pax_global_header00006660000000000000000000000064135735520400014516gustar00rootroot0000000000000052 comment=7ec7a33a081aeeb53fed1a8d87e4cbd189152527 libvpx-1.8.2/000077500000000000000000000000001357355204000130325ustar00rootroot00000000000000libvpx-1.8.2/.clang-format000066400000000000000000000076651357355204000154230ustar00rootroot00000000000000--- Language: Cpp # BasedOnStyle: Google # Generated with clang-format 7.0.1 AccessModifierOffset: -1 AlignAfterOpenBracket: Align AlignConsecutiveAssignments: false AlignConsecutiveDeclarations: false AlignEscapedNewlines: Left AlignOperands: true AlignTrailingComments: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: false AllowShortCaseLabelsOnASingleLine: true AllowShortFunctionsOnASingleLine: All AllowShortIfStatementsOnASingleLine: true AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: true AlwaysBreakTemplateDeclarations: true BinPackArguments: true BinPackParameters: true BraceWrapping: AfterClass: false AfterControlStatement: false AfterEnum: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false AfterExternBlock: false BeforeCatch: false BeforeElse: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true BreakBeforeBinaryOperators: None BreakBeforeBraces: Attach BreakBeforeInheritanceComma: false BreakInheritanceList: BeforeColon BreakBeforeTernaryOperators: true BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon BreakAfterJavaFieldAnnotations: false BreakStringLiterals: true ColumnLimit: 80 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: false DerivePointerAlignment: false DisableFormat: false ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH IncludeBlocks: Preserve IncludeCategories: - Regex: '^' Priority: 2 - Regex: '^<.*\.h>' Priority: 1 - Regex: '^<.*' Priority: 2 - Regex: '.*' Priority: 3 IncludeIsMainRegex: '([-_](test|unittest))?$' IndentCaseLabels: true IndentPPDirectives: None IndentWidth: 2 IndentWrappedFunctionNames: false JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: false MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBinPackProtocolList: Never ObjCBlockIndentWidth: 2 ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: false PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 PenaltyBreakTemplateDeclaration: 10 PenaltyBreakString: 1000 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 200 PointerAlignment: Right RawStringFormats: - Language: Cpp Delimiters: - cc - CC - cpp - Cpp - CPP - 'c++' - 'C++' CanonicalDelimiter: '' BasedOnStyle: google - Language: TextProto Delimiters: - pb - PB - proto - PROTO EnclosingFunctions: - EqualsProto - EquivToProto - PARSE_PARTIAL_TEXT_PROTO - PARSE_TEST_PROTO - PARSE_TEXT_PROTO - ParseTextOrDie - ParseTextProtoOrDie CanonicalDelimiter: '' BasedOnStyle: google ReflowComments: true SortIncludes: false SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 2 SpacesInAngles: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false Standard: Auto TabWidth: 8 UseTab: Never ... libvpx-1.8.2/.gitattributes000066400000000000000000000000501357355204000157200ustar00rootroot00000000000000configure eol=lf *.sh eol=lf libvpx-1.8.2/.gitignore000066400000000000000000000017321357355204000150250ustar00rootroot00000000000000*.S *.a *.asm.s *.d *.gcda *.gcno *.o *~ .cproject .project .settings /*-*.mk /*.asm /*.doxy /*.ivf /*.ivf.md5 /.bins /.deps /.docs /.install-* /.libs /Makefile /arm_neon.h /config.log /config.mk /docs/ /doxyfile /examples/*.dox /examples/decode_to_md5 /examples/decode_with_drops /examples/decode_with_partial_drops /examples/example_xma /examples/postproc /examples/resize_util /examples/set_maps /examples/simple_decoder /examples/simple_encoder /examples/twopass_encoder /examples/vp8_multi_resolution_encoder /examples/vp8cx_set_ref /examples/vp9cx_set_ref /examples/vp9_lossless_encoder /examples/vp9_spatial_svc_encoder /examples/vpx_temporal_svc_encoder /ivfdec /ivfdec.dox /ivfenc /ivfenc.dox /libvpx.so* /libvpx.ver /samples.dox /test_intra_pred_speed /test_libvpx /tools.dox /tools/*.dox /tools/tiny_ssim /vp8_api1_migration.dox /vp[89x]_rtcd.h /vpx.pc /vpx_config.c /vpx_config.h /vpx_dsp_rtcd.h /vpx_scale_rtcd.h /vpx_version.h /vpxdec /vpxdec.dox /vpxenc /vpxenc.dox TAGS libvpx-1.8.2/.mailmap000066400000000000000000000050061357355204000144540ustar00rootroot00000000000000Adrian Grange Aℓex Converse Aℓex Converse Aℓex Converse Alexis Ballier Alpha Lam Angie Chiang Chris Cunningham Chi Yo Tsai Daniele Castagna Deb Mukherjee Elliott Karpilovsky Erik Niemeyer Fyodor Kyslov Guillaume Martres Hangyu Kuang Hui Su Jacky Chen Jim Bankoski Johann Koenig Johann Koenig Johann Koenig Johann Koenig John Koleszar Joshua Litt Marco Paniconi Marco Paniconi Martin Storsjö Michael Horowitz Pascal Massimino Paul Wilkins Peter Boström Peter de Rivaz Peter de Rivaz Ralph Giles Ralph Giles Ronald S. Bultje Sai Deng Sami Pietilä Shiyou Yin Tamar Levy Tamar Levy Tero Rintaluoma Timothy B. Terriberry Tom Finegan Tom Finegan Urvang Joshi Yaowu Xu Yaowu Xu Yaowu Xu Venkatarama NG. Avadhani Vitaly Buka xiwei gu libvpx-1.8.2/AUTHORS000066400000000000000000000161071357355204000141070ustar00rootroot00000000000000# This file is automatically generated from the git commit history # by tools/gen_authors.sh. Aaron Watry Abo Talib Mahfoodh Adrian Grange Ahmad Sharif Aidan Welch Aleksey Vasenev Alexander Potapenko Alexander Voronov Alexandra Hájková Aℓex Converse Alexis Ballier Alok Ahuja Alpha Lam A.Mahfoodh Ami Fischman Andoni Morales Alastruey Andres Mejia Andrew Lewis Andrew Russell Angie Chen Angie Chiang Aron Rosenberg Attila Nagy Birk Magnussen Brion Vibber changjun.yang Charles 'Buck' Krasic Cheng Chen Chi Yo Tsai chm Chris Cunningham Christian Duvivier Daniele Castagna Daniel Kang Dan Zhu Deb Mukherjee Deepa K G Dim Temp Dmitry Kovalev Dragan Mrdjan Ed Baker Ehsan Akhgari Elliott Karpilovsky Erik Niemeyer Fabio Pedretti Frank Galligan Fredrik Söderquist Fritz Koenig Fyodor Kyslov Gabriel Marin Gaute Strokkenes Geza Lore Ghislain MARY Giuseppe Scrivano Gordana Cmiljanovic Gregor Jasny Guillaume Martres Guillermo Ballester Valor Hangyu Kuang Hanno Böck Han Shen Harish Mahendrakar Henrik Lundin Hien Ho Hui Su Ivan Krasin Ivan Maltz Jacek Caban Jacky Chen James Berry James Yu James Zern Jan Gerber Jan Kratochvil Janne Salonen Jean-Yves Avenard Jeff Faust Jeff Muizelaar Jeff Petkau Jerome Jiang Jia Jia Jian Zhou Jim Bankoski Jingning Han Joey Parrish Johann Koenig John Koleszar Johnny Klonaris John Stark Jon Kunkee Joshua Bleecher Snyder Joshua Litt Julia Robson Justin Clift Justin Lebar Kaustubh Raste KO Myung-Hun Kyle Siefring Lawrence Velázquez Linfeng Zhang Liu Peng Lou Quillio Luca Barbato Luc Trudeau Makoto Kato Mans Rullgard Marco Paniconi Mark Mentovai Martin Ettl Martin Storsjö Matthew Heaney Matthias Räncker Michael Horowitz Michael Kohler Mike Frysinger Mike Hommey Mikhal Shemer Min Chen Minghai Shang Min Ye Mirko Bonadei Moriyoshi Koizumi Morton Jonuschat Nathan E. Egge Nico Weber Niveditha Rau Parag Salasakar Pascal Massimino Patrik Westin Paul Wilkins Pavol Rusnak Paweł Hajdan Pengchong Jin Peter Boström Peter Collingbourne Peter de Rivaz Philip Jägenstedt Priit Laes Rafael Ávila de Espíndola Rafaël Carré Rafael de Lucena Valle Rahul Chaudhry Ralph Giles Ranjit Kumar Tulabandu Raphael Kubo da Costa Ravi Chaudhary Ritu Baldwa Rob Bradford Ronald S. Bultje Rui Ueyama Sai Deng Sami Pietilä Sarah Parker Sasi Inguva Scott Graham Scott LaVarnway Sean McGovern Sergey Kolomenkin Sergey Silkin Sergey Ulanov Shimon Doodkin Shiyou Yin Shubham Tandle Shunyao Li Stefan Holmer Suman Sunkara Supradeep T R Sylvestre Ledru Taekhyun Kim Takanori MATSUURA Tamar Levy Tao Bai Tero Rintaluoma Thijs Vermeir Tim Kopp Timothy B. Terriberry Tom Finegan Tristan Matthews Urvang Joshi Venkatarama NG. Avadhani Vignesh Venkatasubramanian Vitaly Buka Vlad Tsyrklevich Wan-Teh Chang xiwei gu Yaowu Xu Yi Luo Yongzhe Wang Yue Chen Yun Liu Yunqing Wang Yury Gitman Zoe Liu Google Inc. The Mozilla Foundation The Xiph.Org Foundation libvpx-1.8.2/CHANGELOG000066400000000000000000000745341357355204000142610ustar00rootroot000000000000002019-12-09 v1.8.2 "Pekin Duck" This release collects incremental improvements to many aspects of the library. - Upgrading: ARCH_* defines have been removed in favor of VPX_ARCH_*. 2019-07-15 v1.8.1 "Orpington Duck" This release collects incremental improvements to many aspects of the library. - Upgrading: VP8E_SET_CPUUSED now accepts values up to 9 for vp9. VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT had a spelling fix (was VP8E). The --sdk-path option has been removed. If you were using it to build for Android please read build/make/Android.mk for alternatives. All PPC optimizations have been disabled: https://bugs.chromium.org/p/webm/issues/detail?id=1522. - Enhancements: Various changes to improve encoder rate control, quality and speed for practically every use case. - Bug fixes: vp9-rtc: Fix color artifacts for speed >= 8. 2019-01-31 v1.8.0 "Northern Shoveler Duck" This release focused on encoding performance for realtime and VOD use cases. - Upgrading: This adds and improves several vp9 controls. Most are related to SVC: VP9E_SET_SVC_FRAME_DROP_LAYER: - Frame dropping in SVC. VP9E_SET_SVC_INTER_LAYER_PRED: - Inter-layer prediction in SVC. VP9E_SET_SVC_GF_TEMPORAL_REF: - Enable long term temporal reference in SVC. VP9E_SET_SVC_REF_FRAME_CONFIG/VP9E_GET_SVC_REF_FRAME_CONFIG: - Extend and improve this control for better flexibility in setting SVC pattern dynamically. VP9E_SET_POSTENCODE_DROP: - Allow for post-encode frame dropping (applies to non-SVC too). VP9E_SET_SVC_SPATIAL_LAYER_SYNC: - Enable spatial layer sync frames. VP9E_SET_SVC_LAYER_ID: - Extend api to specify temporal id for each spatial layers. VP9E_SET_ROI_MAP: - Extend Region of Interest functionality to VP9. - Enhancements: 2 pass vp9 encoding has improved substantially. When using --auto-alt-ref=6, we see approximately 8% for VBR and 10% for CQ. When using --auto-alt-ref=1, the gains are approximately 4% for VBR and 5% for CQ. For real-time encoding, speed 7 has improved by ~5-10%. Encodes targeted at screen sharing have improved when the content changes significantly (slide sharing) or scrolls. There is a new speed 9 setting for mobile devices which is about 10-20% faster than speed 8. - Bug fixes: VP9 denoiser issue. VP9 partition issue for 1080p. VP9 rate control improvments. Postprocessing Multi Frame Quality Enhancement (MFQE) issue. VP8 multithread decoder issues. A variety of fuzzing issues. 2018-01-04 v1.7.0 "Mandarin Duck" This release focused on high bit depth performance (10/12 bit) and vp9 encoding improvements. - Upgrading: This release is ABI incompatible due to new vp9 encoder features. Frame parallel decoding for vp9 has been removed. - Enhancements: vp9 encoding supports additional threads with --row-mt. This can be greater than the number of tiles. Two new vp9 encoder options have been added: --corpus-complexity --tune-content=film Additional tooling for respecting the vp9 "level" profiles has been added. - Bug fixes: A variety of fuzzing issues. vp8 threading fix for ARM. Codec control VP9_SET_SKIP_LOOP_FILTER fixed. Reject invalid multi resolution configurations. 2017-01-09 v1.6.1 "Long Tailed Duck" This release improves upon the VP9 encoder and speeds up the encoding and decoding processes. - Upgrading: This release is ABI compatible with 1.6.0. - Enhancements: Faster VP9 encoding and decoding. High bit depth builds now provide similar speed for 8 bit encode and decode for x86 targets. Other platforms and higher bit depth improvements are in progress. - Bug Fixes: A variety of fuzzing issues. 2016-07-20 v1.6.0 "Khaki Campbell Duck" This release improves upon the VP9 encoder and speeds up the encoding and decoding processes. - Upgrading: This release is ABI incompatible with 1.5.0 due to a new 'color_range' enum in vpx_image and some minor changes to the VP8_COMP structure. The default key frame interval for VP9 has changed from 128 to 9999. - Enhancement: A core focus has been performance for low end Intel processors. SSSE3 instructions such as 'pshufb' have been avoided and instructions have been reordered to better accommodate the more constrained pipelines. As a result, devices based on Celeron processors have seen substantial decoding improvements. From Indian Runner Duck to Javan Whistling Duck, decoding speed improved between 10 and 30%. Between Javan Whistling Duck and Khaki Campbell Duck, it improved another 10 to 15%. While Celeron benefited most, Core-i5 also improved 5% and 10% between the respective releases. Realtime performance for WebRTC for both speed and quality has received a lot of attention. - Bug Fixes: A number of fuzzing issues, found variously by Mozilla, Chromium and others, have been fixed and we strongly recommend updating. 2015-11-09 v1.5.0 "Javan Whistling Duck" This release improves upon the VP9 encoder and speeds up the encoding and decoding processes. - Upgrading: This release is ABI incompatible with 1.4.0. It drops deprecated VP8 controls and adds a variety of VP9 controls for testing. The vpxenc utility now prefers VP9 by default. - Enhancements: Faster VP9 encoding and decoding Smaller library size by combining functions used by VP8 and VP9 - Bug Fixes: A variety of fuzzing issues 2015-04-03 v1.4.0 "Indian Runner Duck" This release includes significant improvements to the VP9 codec. - Upgrading: This release is ABI incompatible with 1.3.0. It drops the compatibility layer, requiring VPX_IMG_FMT_* instead of IMG_FMT_*, and adds several codec controls for VP9. - Enhancements: Faster VP9 encoding and decoding Multithreaded VP9 decoding (tile and frame-based) Multithreaded VP9 encoding - on by default YUV 4:2:2 and 4:4:4 support in VP9 10 and 12bit support in VP9 64bit ARM support by replacing ARM assembly with intrinsics - Bug Fixes: Fixes a VP9 bitstream issue in Profile 1. This only affected non-YUV 4:2:0 files. - Known Issues: Frame Parallel decoding fails for segmented and non-420 files. 2013-11-15 v1.3.0 "Forest" This release introduces the VP9 codec in a backward-compatible way. All existing users of VP8 can continue to use the library without modification. However, some VP8 options do not map to VP9 in the same manner. The VP9 encoder in this release is not feature complete. Users interested in the encoder are advised to use the git master branch and discuss issues on libvpx mailing lists. - Upgrading: This release is ABI and API compatible with Duclair (v1.0.0). Users of older releases should refer to the Upgrading notes in this document for that release. - Enhancements: Get rid of bashisms in the main build scripts Added usage info on command line options Add lossless compression mode Dll build of libvpx Add additional Mac OS X targets: 10.7, 10.8 and 10.9 (darwin11-13) Add option to disable documentation configure: add --enable-external-build support make: support V=1 as short form of verbose=yes configure: support mingw-w64 configure: support hardfloat armv7 CHOSTS configure: add support for android x86 Add estimated completion time to vpxenc Don't exit on decode errors in vpxenc vpxenc: support scaling prior to encoding vpxdec: support scaling output vpxenc: improve progress indicators with --skip msvs: Don't link to winmm.lib Add a new script for producing vcxproj files Produce Visual Studio 10 and 11 project files Produce Windows Phone project files msvs-build: use msbuild for vs >= 2005 configure: default configure log to config.log Add encoding option --static-thresh - Speed: Miscellaneous speed optimizations for VP8 and VP9. - Quality: In general, quality is consistent with the Eider release. - Bug Fixes: This release represents approximately a year of engineering effort, and contains multiple bug fixes. Please refer to git history for details. 2012-12-21 v1.2.0 This release acts as a checkpoint for a large amount of internal refactoring and testing. It also contains a number of small bugfixes, so all users are encouraged to upgrade. - Upgrading: This release is ABI and API compatible with Duclair (v1.0.0). Users of older releases should refer to the Upgrading notes in this document for that release. - Enhancements: VP8 optimizations for MIPS dspr2 vpxenc: add -quiet option - Speed: Encoder and decoder speed is consistent with the Eider release. - Quality: In general, quality is consistent with the Eider release. Minor tweaks to ARNR filtering Minor improvements to real time encoding with multiple temporal layers - Bug Fixes: Fixes multithreaded encoder race condition in loopfilter Fixes multi-resolution threaded encoding Fix potential encoder dead-lock after picture resize 2012-05-09 v1.1.0 "Eider" This introduces a number of enhancements, mostly focused on real-time encoding. In addition, it fixes a decoder bug (first introduced in Duclair) so all users of that release are encouraged to upgrade. - Upgrading: This release is ABI and API compatible with Duclair (v1.0.0). Users of older releases should refer to the Upgrading notes in this document for that release. This release introduces a new temporal denoiser, controlled by the VP8E_SET_NOISE_SENSITIVITY control. The temporal denoiser does not currently take a strength parameter, so the control is effectively a boolean - zero (off) or non-zero (on). For compatibility with existing applications, the values accepted are the same as those for the spatial denoiser (0-6). The temporal denoiser is enabled by default, and the older spatial denoiser may be restored by configuring with --disable-temporal-denoising. The temporal denoiser is more computationally intensive than the spatial one. This release removes support for a legacy, decode only API that was supported, but deprecated, at the initial release of libvpx (v0.9.0). This is not expected to have any impact. If you are impacted, you can apply a reversion to commit 2bf8fb58 locally. Please update to the latest libvpx API if you are affected. - Enhancements: Adds a motion compensated temporal denoiser to the encoder, which gives higher quality than the older spatial denoiser. (See above for notes on upgrading). In addition, support for new compilers and platforms were added, including: improved support for XCode Android x86 NDK build OS/2 support SunCC support Changing resolution with vpx_codec_enc_config_set() is now supported. Previously, reinitializing the codec was required to change the input resolution. The vpxenc application has initial support for producing multiple encodes from the same input in one call. Resizing is not yet supported, but varying other codec parameters is. Use -- to delineate output streams. Options persist from one stream to the next. Also, the vpxenc application will now use a keyframe interval of 5 seconds by default. Use the --kf-max-dist option to override. - Speed: Decoder performance improved 2.5% versus Duclair. Encoder speed is consistent with Duclair for most material. Two pass encoding of slideshow-like material will see significant improvements. Large realtime encoding speed gains at a small quality expense are possible by configuring the on-the-fly bitpacking experiment with --enable-onthefly-bitpacking. Realtime encoder can be up to 13% faster (ARM) depending on the number of threads and bitrate settings. This technique sees constant gain over the 5-16 speed range. For VC style input the loss seen is up to 0.2dB. See commit 52cf4dca for further details. - Quality: On the whole, quality is consistent with the Duclair release. Some tweaks: Reduced blockiness in easy sections by applying a penalty to intra modes. Improved quality of static sections (like slideshows) with two pass encoding. Improved keyframe sizing with multiple temporal layers - Bug Fixes: Corrected alt-ref contribution to frame rate for visible updates to the alt-ref buffer. This affected applications making manual usage of the frame reference flags, or temporal layers. Additional constraints were added to disable multi-frame quality enhancement (MFQE) in sections of the frame where there is motion. (#392) Fixed corruption issues when vpx_codec_enc_config_set() was called with spatial resampling enabled. Fixed a decoder error introduced in Duclair where the segmentation map was not being reinitialized on keyframes (#378) 2012-01-27 v1.0.0 "Duclair" Our fourth named release, focused on performance and features related to real-time encoding. It also fixes a decoder crash bug introduced in v0.9.7, so all users of that release are encouraged to upgrade. - Upgrading: This release is ABI incompatible with prior releases of libvpx, so the "major" version number has been bumped to 1. You must recompile your applications against the latest version of the libvpx headers. The API remains compatible, and this should not require code changes in most applications. - Enhancements: This release introduces several substantial new features to the encoder, of particular interest to real time streaming applications. Temporal scalability allows the encoder to produce a stream that can be decimated to different frame rates, with independent rate targetting for each substream. Multiframe quality enhancement postprocessing can make visual quality more consistent in the presence of frames that are substantially different quality than the surrounding frames, as in the temporal scalability case and in some forced keyframe scenarios. Multiple-resolution encoding support allows the encoding of the same content at different resolutions faster than encoding them separately. - Speed: Optimization targets for this release included the decoder and the real- time modes of the encoder. Decoder speed on x86 has improved 10.5% with this release. Encoder improvements followed a curve where speeds 1-3 improved 4.0%-1.5%, speeds 4-8 improved <1%, and speeds 9-16 improved 1.5% to 10.5%, respectively. "Best" mode speed is consistent with the Cayuga release. - Quality: Encoder quality in the single stream case is consistent with the Cayuga release. - Bug Fixes: This release fixes an OOB read decoder crash bug present in v0.9.7 related to the clamping of motion vectors in SPLITMV blocks. This behavior could be triggered by corrupt input or by starting decoding from a P-frame. 2011-08-15 v0.9.7-p1 "Cayuga" patch 1 This is an incremental bugfix release against Cayuga. All users of that release are strongly encouraged to upgrade. - Fix potential OOB reads (cdae03a) An unbounded out of bounds read was discovered when the decoder was requested to perform error concealment (new in Cayuga) given a frame with corrupt partition sizes. A bounded out of bounds read was discovered affecting all versions of libvpx. Given an multipartition input frame that is truncated between the mode/mv partition and the first residiual paritition (in the block of partition offsets), up to 3 extra bytes could have been read from the source buffer. The code will not take any action regardless of the contents of these undefined bytes, as the truncated buffer is detected immediately following the read based on the calculated starting position of the coefficient partition. - Fix potential error concealment crash when the very first frame is missing or corrupt (a609be5) - Fix significant artifacts in error concealment (a4c2211, 99d870a) - Revert 1-pass CBR rate control changes (e961317) Further testing showed this change produced undesirable visual artifacts, rolling back for now. 2011-08-02 v0.9.7 "Cayuga" Our third named release, focused on a faster, higher quality, encoder. - Upgrading: This release is backwards compatible with Aylesbury (v0.9.5) and Bali (v0.9.6). Users of older releases should refer to the Upgrading notes in this document for that release. - Enhancements: Stereo 3D format support for vpxenc Runtime detection of available processor cores. Allow specifying --end-usage by enum name vpxdec: test for frame corruption vpxenc: add quantizer histogram display vpxenc: add rate histogram display Set VPX_FRAME_IS_DROPPABLE update configure for ios sdk 4.3 Avoid text relocations in ARM vp8 decoder Generate a vpx.pc file for pkg-config. New ways of passing encoded data between encoder and decoder. - Speed: This release includes across-the-board speed improvements to the encoder. On x86, these measure at approximately 11.5% in Best mode, 21.5% in Good mode (speed 0), and 22.5% in Realtime mode (speed 6). On ARM Cortex A9 with Neon extensions, real-time encoding of video telephony content is 35% faster than Bali on single core and 48% faster on multi-core. On the NVidia Tegra2 platform, real time encoding is 40% faster than Bali. Decoder speed was not a priority for this release, but improved approximately 8.4% on x86. Reduce motion vector search on alt-ref frame. Encoder loopfilter running in its own thread Reworked loopfilter to precalculate more parameters SSE2/SSSE3 optimizations for build_predictors_mbuv{,_s}(). Make hor UV predict ~2x faster (73 vs 132 cycles) using SSSE3. Removed redundant checks Reduced structure sizes utilize preload in ARMv6 MC/LPF/Copy routines ARM optimized quantization, dfct, variance, subtract Increase chrow row alignment to 16 bytes. disable trellis optimization for first pass Write SSSE3 sub-pixel filter function Improve SSE2 half-pixel filter funtions Add vp8_sub_pixel_variance16x8_ssse3 function Reduce unnecessary distortion computation Use diamond search to replace full search Preload reference area in sub-pixel motion search (real-time mode) - Quality: This release focused primarily on one-pass use cases, including video conferencing. Low latency data rate control was significantly improved, improving streamability over bandwidth constrained links. Added support for error concealment, allowing frames to maintain visual quality in the presence of substantial packet loss. Add rc_max_intra_bitrate_pct control Limit size of initial keyframe in one-pass. Improve framerate adaptation Improved 1-pass CBR rate control Improved KF insertion after fades to still. Improved key frame detection. Improved activity masking (lower PSNR impact for same SSIM boost) Improved interaction between GF and ARFs Adding error-concealment to the decoder. Adding support for independent partitions Adjusted rate-distortion constants - Bug Fixes: Removed firstpass motion map Fix parallel make install Fix multithreaded encoding for 1 MB wide frame Fixed iwalsh_neon build problems with RVDS4.1 Fix semaphore emulation, spin-wait intrinsics on Windows Fix build with xcode4 and simplify GLOBAL. Mark ARM asm objects as allowing a non-executable stack. Fix vpxenc encoding incorrect webm file header on big endian 2011-03-07 v0.9.6 "Bali" Our second named release, focused on a faster, higher quality, encoder. - Upgrading: This release is backwards compatible with Aylesbury (v0.9.5). Users of older releases should refer to the Upgrading notes in this document for that release. - Enhancements: vpxenc --psnr shows a summary when encode completes --tune=ssim option to enable activity masking improved postproc visualizations for development updated support for Apple iOS to SDK 4.2 query decoder to determine which reference frames were updated implemented error tracking in the decoder fix pipe support on windows - Speed: Primary focus was on good quality mode, speed 0. Average improvement on x86 about 40%, up to 100% on user-generated content at that speed. Best quality mode speed improved 35%, and realtime speed 10-20%. This release also saw significant improvement in realtime encoding speed on ARM platforms. Improved encoder threading Dont pick encoder filter level when loopfilter is disabled. Avoid double copying of key frames into alt and golden buffer FDCT optimizations. x86 sse2 temporal filter SSSE3 version of fast quantizer vp8_rd_pick_best_mbsegmentation code restructure Adjusted breakout RD for SPLITMV Changed segmentation check order Improved rd_pick_intra4x4block Adds armv6 optimized variance calculation ARMv6 optimized sad16x16 ARMv6 optimized half pixel variance calculations Full search SAD function optimization in SSE4.1 Improve MV prediction accuracy to achieve performance gain Improve MV prediction in vp8_pick_inter_mode() for speed>3 - Quality: Best quality mode improved PSNR 6.3%, and SSIM 6.1%. This release also includes support for "activity masking," which greatly improves SSIM at the expense of PSNR. For now, this feature is available with the --tune=ssim option. Further experimentation in this area is ongoing. This release also introduces a new rate control mode called "CQ," which changes the allocation of bits within a clip to the sections where they will have the most visual impact. Tuning for the more exact quantizer. Relax rate control for last few frames CQ Mode Limit key frame quantizer for forced key frames. KF/GF Pulsing Add simple version of activity masking. make rdmult adaptive for intra in quantizer RDO cap the best quantizer for 2nd order DC change the threshold of DC check for encode breakout - Bug Fixes: Fix crash on Sparc Solaris. Fix counter of fixed keyframe distance ARNR filter pointer update bug fix Fixed use of motion percentage in KF/GF group calc Changed condition for using RD in Intra Mode Fix encoder real-time only configuration. Fix ARM encoder crash with multiple token partitions Fixed bug first cluster timecode of webm file is wrong. Fixed various encoder bugs with odd-sized images vp8e_get_preview fixed when spatial resampling enabled quantizer: fix assertion in fast quantizer path Allocate source buffers to be multiples of 16 Fix for manual Golden frame frequency Fix drastic undershoot in long form content 2010-10-28 v0.9.5 "Aylesbury" Our first named release, focused on a faster decoder, and a better encoder. - Upgrading: This release incorporates backwards-incompatible changes to the ivfenc and ivfdec tools. These tools are now called vpxenc and vpxdec. vpxdec * the -q (quiet) option has been removed, and replaced with -v (verbose). the output is quiet by default. Use -v to see the version number of the binary. * The default behavior is now to write output to a single file instead of individual frames. The -y option has been removed. Y4M output is the default. * For raw I420/YV12 output instead of Y4M, the --i420 or --yv12 options must be specified. $ ivfdec -o OUTPUT INPUT $ vpxdec --i420 -o OUTPUT INPUT * If an output file is not specified, the default is to write Y4M to stdout. This makes piping more natural. $ ivfdec -y -o - INPUT | ... $ vpxdec INPUT | ... * The output file has additional flexibility for formatting the filename. It supports escape characters for constructing a filename from the width, height, and sequence number. This replaces the -p option. To get the equivalent: $ ivfdec -p frame INPUT $ vpxdec --i420 -o frame-%wx%h-%4.i420 INPUT vpxenc * The output file must be specified with -o, rather than as the last argument. $ ivfenc INPUT OUTPUT $ vpxenc -o OUTPUT INPUT * The output defaults to webm. To get IVF output, use the --ivf option. $ ivfenc INPUT OUTPUT.ivf $ vpxenc -o OUTPUT.ivf --ivf INPUT - Enhancements: ivfenc and ivfdec have been renamed to vpxenc, vpxdec. vpxdec supports .webm input vpxdec writes .y4m by default vpxenc writes .webm output by default vpxenc --psnr now shows the average/overall PSNR at the end ARM platforms now support runtime cpu detection vpxdec visualizations added for motion vectors, block modes, references vpxdec now silent by default vpxdec --progress shows frame-by-frame timing information vpxenc supports the distinction between --fps and --timebase NASM is now a supported assembler configure: enable PIC for shared libs by default configure: add --enable-small configure: support for ppc32-linux-gcc configure: support for sparc-solaris-gcc - Bugs: Improve handling of invalid frames Fix valgrind errors in the NEON loop filters. Fix loopfilter delta zero transitions Fix valgrind errors in vp8_sixtap_predict8x4_armv6(). Build fixes for darwin-icc - Speed: 20-40% (average 28%) improvement in libvpx decoder speed, including: Rewrite vp8_short_walsh4x4_sse2() Optimizations on the loopfilters. Miscellaneous improvements for Atom Add 4-tap version of 2nd-pass ARMv6 MC filter. Improved multithread utilization Better instruction choices on x86 reorder data to use wider instructions Update NEON wide idcts Make block access to frame buffer sequential Improved subset block search Bilinear subpixel optimizations for ssse3. Decrease memory footprint Encoder speed improvements (percentage gain not measured): Skip unnecessary search of identical frames Add SSE2 subtract functions Improve bounds checking in vp8_diamond_search_sadx4() Added vp8_fast_quantize_b_sse2 - Quality: Over 7% overall PSNR improvement (6.3% SSIM) in "best" quality encoding mode, and up to 60% improvement on very noisy, still or slow moving source video Motion compensated temporal filter for Alt-Ref Noise Reduction Improved use of trellis quantization on 2nd order Y blocks Tune effect of motion on KF/GF boost in two pass Allow coefficient optimization for good quality speed 0. Improved control of active min quantizer for two pass. Enable ARFs for non-lagged compress 2010-09-02 v0.9.2 - Enhancements: Disable frame dropping by default Improved multithreaded performance Improved Force Key Frame Behaviour Increased rate control buffer level precision Fix bug in 1st pass motion compensation ivfenc: correct fixed kf interval, --disable-kf - Speed: Changed above and left context data layout Rework idct calling structure. Removed unnecessary MB_MODE_INFO copies x86: SSSE3 sixtap prediction Reworked IDCT to include reconstruction (add) step Swap alt/gold/new/last frame buffer ptrs instead of copying. Improve SSE2 loopfilter functions Change bitreader to use a larger window. Avoid loopfilter reinitialization when possible - Quality: Normalize quantizer's zero bin and rounding factors Add trellis quantization. Make the quantizer exact. Updates to ARNR filtering algorithm Fix breakout thresh computation for golden & AltRef frames Redo the forward 4x4 dct Improve the accuracy of forward walsh-hadamard transform Further adjustment of RD behaviour with Q and Zbin. - Build System: Allow linking of libs built with MinGW to MSVC Fix target auto-detection on mingw32 Allow --cpu= to work for x86. configure: pass original arguments through to make dist Fix builds without runtime CPU detection msvs: fix install of codec sources msvs: Change devenv.com command line for better msys support msvs: Add vs9 targets. Add x86_64-linux-icc target - Bugs: Potential crashes on older MinGW builds Fix two-pass framrate for Y4M input. Fixed simple loop filter, other crashes on ARM v6 arm: fix missing dependency with --enable-shared configure: support directories containing .o Replace pinsrw (SSE) with MMX instructions apple: include proper mach primatives Fixed rate control bug with long key frame interval. Fix DSO link errors on x86-64 when not using a version script Fixed buffer selection for UV in AltRef filtering 2010-06-17 v0.9.1 - Enhancements: * ivfenc/ivfdec now support YUV4MPEG2 input and pipe I/O * Speed optimizations - Bugfixes: * Rate control * Prevent out-of-bounds accesses on invalid data - Build system updates: * Detect toolchain to be used automatically for native builds * Support building shared libraries * Better autotools emulation (--prefix, --libdir, DESTDIR) - Updated LICENSE * http://webmproject.blogspot.com/2010/06/changes-to-webm-open-source-license.html 2010-05-18 v0.9.0 - Initial open source release. Welcome to WebM and VP8! libvpx-1.8.2/LICENSE000066400000000000000000000030011357355204000140310ustar00rootroot00000000000000Copyright (c) 2010, The WebM Project authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google, nor the WebM Project, nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. libvpx-1.8.2/PATENTS000066400000000000000000000026341357355204000141000ustar00rootroot00000000000000Additional IP Rights Grant (Patents) ------------------------------------ "These implementations" means the copyrightable works that implement the WebM codecs distributed by Google as part of the WebM Project. Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, transfer, and otherwise run, modify and propagate the contents of these implementations of WebM, where such license applies only to those patent claims, both currently owned by Google and acquired in the future, licensable by Google that are necessarily infringed by these implementations of WebM. This grant does not include claims that would be infringed only as a consequence of further modification of these implementations. If you or your agent or exclusive licensee institute or order or agree to the institution of patent litigation or any other patent enforcement activity against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that any of these implementations of WebM or any code incorporated within any of these implementations of WebM constitute direct or contributory patent infringement, or inducement of patent infringement, then any patent rights granted to you under this License for these implementations of WebM shall terminate as of the date such litigation is filed. libvpx-1.8.2/README000066400000000000000000000135661357355204000137250ustar00rootroot00000000000000README - 9 December 2019 Welcome to the WebM VP8/VP9 Codec SDK! COMPILING THE APPLICATIONS/LIBRARIES: The build system used is similar to autotools. Building generally consists of "configuring" with your desired build options, then using GNU make to build the application. 1. Prerequisites * All x86 targets require the Yasm[1] assembler be installed[2]. * All Windows builds require that Cygwin[3] be installed. * Building the documentation requires Doxygen[4]. If you do not have this package, the install-docs option will be disabled. * Downloading the data for the unit tests requires curl[5] and sha1sum. sha1sum is provided via the GNU coreutils, installed by default on many *nix platforms, as well as MinGW and Cygwin. If coreutils is not available, a compatible version of sha1sum can be built from source[6]. These requirements are optional if not running the unit tests. [1]: http://www.tortall.net/projects/yasm [2]: For Visual Studio the base yasm binary (not vsyasm) should be in the PATH for Visual Studio. For VS2017 it is sufficient to rename yasm--.exe to yasm.exe and place it in: Program Files (x86)/Microsoft Visual Studio/2017//Common7/Tools/ [3]: http://www.cygwin.com [4]: http://www.doxygen.org [5]: http://curl.haxx.se [6]: http://www.microbrew.org/tools/md5sha1sum/ 2. Out-of-tree builds Out of tree builds are a supported method of building the application. For an out of tree build, the source tree is kept separate from the object files produced during compilation. For instance: $ mkdir build $ cd build $ ../libvpx/configure $ make 3. Configuration options The 'configure' script supports a number of options. The --help option can be used to get a list of supported options: $ ../libvpx/configure --help 4. Compiler analyzers Compilers have added sanitizers which instrument binaries with information about address calculation, memory usage, threading, undefined behavior, and other common errors. To simplify building libvpx with some of these features use tools/set_analyzer_env.sh before running configure. It will set the compiler and necessary flags for building as well as environment variables read by the analyzer when testing the binaries. $ source ../libvpx/tools/set_analyzer_env.sh address 5. Cross development For cross development, the most notable option is the --target option. The most up-to-date list of supported targets can be found at the bottom of the --help output of the configure script. As of this writing, the list of available targets is: arm64-android-gcc arm64-darwin-gcc arm64-linux-gcc arm64-win64-gcc arm64-win64-vs15 armv7-android-gcc armv7-darwin-gcc armv7-linux-rvct armv7-linux-gcc armv7-none-rvct armv7-win32-gcc armv7-win32-vs14 armv7-win32-vs15 armv7s-darwin-gcc armv8-linux-gcc mips32-linux-gcc mips64-linux-gcc ppc64le-linux-gcc sparc-solaris-gcc x86-android-gcc x86-darwin8-gcc x86-darwin8-icc x86-darwin9-gcc x86-darwin9-icc x86-darwin10-gcc x86-darwin11-gcc x86-darwin12-gcc x86-darwin13-gcc x86-darwin14-gcc x86-darwin15-gcc x86-darwin16-gcc x86-darwin17-gcc x86-iphonesimulator-gcc x86-linux-gcc x86-linux-icc x86-os2-gcc x86-solaris-gcc x86-win32-gcc x86-win32-vs14 x86-win32-vs15 x86-win32-vs16 x86_64-android-gcc x86_64-darwin9-gcc x86_64-darwin10-gcc x86_64-darwin11-gcc x86_64-darwin12-gcc x86_64-darwin13-gcc x86_64-darwin14-gcc x86_64-darwin15-gcc x86_64-darwin16-gcc x86_64-darwin17-gcc x86_64-darwin18-gcc x86_64-iphonesimulator-gcc x86_64-linux-gcc x86_64-linux-icc x86_64-solaris-gcc x86_64-win64-gcc x86_64-win64-vs14 x86_64-win64-vs15 x86_64-win64-vs16 generic-gnu The generic-gnu target, in conjunction with the CROSS environment variable, can be used to cross compile architectures that aren't explicitly listed, if the toolchain is a cross GNU (gcc/binutils) toolchain. Other POSIX toolchains will likely work as well. For instance, to build using the mipsel-linux-uclibc toolchain, the following command could be used (note, POSIX SH syntax, adapt to your shell as necessary): $ CROSS=mipsel-linux-uclibc- ../libvpx/configure In addition, the executables to be invoked can be overridden by specifying the environment variables: CC, AR, LD, AS, STRIP, NM. Additional flags can be passed to these executables with CFLAGS, LDFLAGS, and ASFLAGS. 6. Configuration errors If the configuration step fails, the first step is to look in the error log. This defaults to config.log. This should give a good indication of what went wrong. If not, contact us for support. VP8/VP9 TEST VECTORS: The test vectors can be downloaded and verified using the build system after running configure. To specify an alternate directory the LIBVPX_TEST_DATA_PATH environment variable can be used. $ ./configure --enable-unit-tests $ LIBVPX_TEST_DATA_PATH=../libvpx-test-data make testdata CODE STYLE: The coding style used by this project is enforced with clang-format using the configuration contained in the .clang-format file in the root of the repository. Before pushing changes for review you can format your code with: # Apply clang-format to modified .c, .h and .cc files $ clang-format -i --style=file \ $(git diff --name-only --diff-filter=ACMR '*.[hc]' '*.cc') Check the .clang-format file for the version used to generate it if there is any difference between your local formatting and the review system. See also: http://clang.llvm.org/docs/ClangFormat.html SUPPORT This library is an open source project supported by its community. Please email webm-discuss@webmproject.org for help. libvpx-1.8.2/args.c000066400000000000000000000136421357355204000141400ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "args.h" #include "vpx/vpx_integer.h" #include "vpx_ports/msvc.h" #if defined(__GNUC__) && __GNUC__ extern void die(const char *fmt, ...) __attribute__((noreturn)); #else extern void die(const char *fmt, ...); #endif struct arg arg_init(char **argv) { struct arg a; a.argv = argv; a.argv_step = 1; a.name = NULL; a.val = NULL; a.def = NULL; return a; } int arg_match(struct arg *arg_, const struct arg_def *def, char **argv) { struct arg arg; if (!argv[0] || argv[0][0] != '-') return 0; arg = arg_init(argv); if (def->short_name && strlen(arg.argv[0]) == strlen(def->short_name) + 1 && !strcmp(arg.argv[0] + 1, def->short_name)) { arg.name = arg.argv[0] + 1; arg.val = def->has_val ? arg.argv[1] : NULL; arg.argv_step = def->has_val ? 2 : 1; } else if (def->long_name) { const size_t name_len = strlen(def->long_name); if (strlen(arg.argv[0]) >= name_len + 2 && arg.argv[0][1] == '-' && !strncmp(arg.argv[0] + 2, def->long_name, name_len) && (arg.argv[0][name_len + 2] == '=' || arg.argv[0][name_len + 2] == '\0')) { arg.name = arg.argv[0] + 2; arg.val = arg.name[name_len] == '=' ? arg.name + name_len + 1 : NULL; arg.argv_step = 1; } } if (arg.name && !arg.val && def->has_val) die("Error: option %s requires argument.\n", arg.name); if (arg.name && arg.val && !def->has_val) die("Error: option %s requires no argument.\n", arg.name); if (arg.name && (arg.val || !def->has_val)) { arg.def = def; *arg_ = arg; return 1; } return 0; } const char *arg_next(struct arg *arg) { if (arg->argv[0]) arg->argv += arg->argv_step; return *arg->argv; } char **argv_dup(int argc, const char **argv) { char **new_argv = malloc((argc + 1) * sizeof(*argv)); memcpy(new_argv, argv, argc * sizeof(*argv)); new_argv[argc] = NULL; return new_argv; } void arg_show_usage(FILE *fp, const struct arg_def *const *defs) { char option_text[40] = { 0 }; for (; *defs; defs++) { const struct arg_def *def = *defs; char *short_val = def->has_val ? " " : ""; char *long_val = def->has_val ? "=" : ""; if (def->short_name && def->long_name) { char *comma = def->has_val ? "," : ", "; snprintf(option_text, 37, "-%s%s%s --%s%6s", def->short_name, short_val, comma, def->long_name, long_val); } else if (def->short_name) snprintf(option_text, 37, "-%s%s", def->short_name, short_val); else if (def->long_name) snprintf(option_text, 37, " --%s%s", def->long_name, long_val); fprintf(fp, " %-37s\t%s\n", option_text, def->desc); if (def->enums) { const struct arg_enum_list *listptr; fprintf(fp, " %-37s\t ", ""); for (listptr = def->enums; listptr->name; listptr++) fprintf(fp, "%s%s", listptr->name, listptr[1].name ? ", " : "\n"); } } } unsigned int arg_parse_uint(const struct arg *arg) { uint32_t rawval; char *endptr; rawval = (uint32_t)strtoul(arg->val, &endptr, 10); if (arg->val[0] != '\0' && endptr[0] == '\0') { if (rawval <= UINT_MAX) return rawval; die("Option %s: Value %ld out of range for unsigned int\n", arg->name, rawval); } die("Option %s: Invalid character '%c'\n", arg->name, *endptr); return 0; } int arg_parse_int(const struct arg *arg) { int32_t rawval; char *endptr; rawval = (int32_t)strtol(arg->val, &endptr, 10); if (arg->val[0] != '\0' && endptr[0] == '\0') { if (rawval >= INT_MIN && rawval <= INT_MAX) return (int)rawval; die("Option %s: Value %ld out of range for signed int\n", arg->name, rawval); } die("Option %s: Invalid character '%c'\n", arg->name, *endptr); return 0; } struct vpx_rational { int num; /**< fraction numerator */ int den; /**< fraction denominator */ }; struct vpx_rational arg_parse_rational(const struct arg *arg) { long int rawval; char *endptr; struct vpx_rational rat; /* parse numerator */ rawval = strtol(arg->val, &endptr, 10); if (arg->val[0] != '\0' && endptr[0] == '/') { if (rawval >= INT_MIN && rawval <= INT_MAX) rat.num = (int)rawval; else die("Option %s: Value %ld out of range for signed int\n", arg->name, rawval); } else die("Option %s: Expected / at '%c'\n", arg->name, *endptr); /* parse denominator */ rawval = strtol(endptr + 1, &endptr, 10); if (arg->val[0] != '\0' && endptr[0] == '\0') { if (rawval >= INT_MIN && rawval <= INT_MAX) rat.den = (int)rawval; else die("Option %s: Value %ld out of range for signed int\n", arg->name, rawval); } else die("Option %s: Invalid character '%c'\n", arg->name, *endptr); return rat; } int arg_parse_enum(const struct arg *arg) { const struct arg_enum_list *listptr; long int rawval; char *endptr; /* First see if the value can be parsed as a raw value */ rawval = strtol(arg->val, &endptr, 10); if (arg->val[0] != '\0' && endptr[0] == '\0') { /* Got a raw value, make sure it's valid */ for (listptr = arg->def->enums; listptr->name; listptr++) if (listptr->val == rawval) return (int)rawval; } /* Next see if it can be parsed as a string */ for (listptr = arg->def->enums; listptr->name; listptr++) if (!strcmp(arg->val, listptr->name)) return listptr->val; die("Option %s: Invalid value '%s'\n", arg->name, arg->val); return 0; } int arg_parse_enum_or_int(const struct arg *arg) { if (arg->def->enums) return arg_parse_enum(arg); return arg_parse_int(arg); } libvpx-1.8.2/args.h000066400000000000000000000031201357355204000141330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_ARGS_H_ #define VPX_ARGS_H_ #include #ifdef __cplusplus extern "C" { #endif struct arg { char **argv; const char *name; const char *val; unsigned int argv_step; const struct arg_def *def; }; struct arg_enum_list { const char *name; int val; }; #define ARG_ENUM_LIST_END \ { 0 } typedef struct arg_def { const char *short_name; const char *long_name; int has_val; const char *desc; const struct arg_enum_list *enums; } arg_def_t; #define ARG_DEF(s, l, v, d) \ { s, l, v, d, NULL } #define ARG_DEF_ENUM(s, l, v, d, e) \ { s, l, v, d, e } #define ARG_DEF_LIST_END \ { 0 } struct arg arg_init(char **argv); int arg_match(struct arg *arg_, const struct arg_def *def, char **argv); const char *arg_next(struct arg *arg); void arg_show_usage(FILE *fp, const struct arg_def *const *defs); char **argv_dup(int argc, const char **argv); unsigned int arg_parse_uint(const struct arg *arg); int arg_parse_int(const struct arg *arg); struct vpx_rational arg_parse_rational(const struct arg *arg); int arg_parse_enum(const struct arg *arg); int arg_parse_enum_or_int(const struct arg *arg); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_ARGS_H_ libvpx-1.8.2/build/000077500000000000000000000000001357355204000141315ustar00rootroot00000000000000libvpx-1.8.2/build/make/000077500000000000000000000000001357355204000150465ustar00rootroot00000000000000libvpx-1.8.2/build/make/Android.mk000066400000000000000000000166461357355204000167740ustar00rootroot00000000000000## ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # # This file is to be used for compiling libvpx for Android using the NDK. # In an Android project place a libvpx checkout in the jni directory. # Run the configure script from the jni directory. Base libvpx # encoder/decoder configuration will look similar to: # ./libvpx/configure --target=armv7-android-gcc --disable-examples \ # --enable-external-build # # When targeting Android, realtime-only is enabled by default. This can # be overridden by adding the command line flag: # --disable-realtime-only # # This will create .mk files that contain variables that contain the # source files to compile. # # Place an Android.mk file in the jni directory that references the # Android.mk file in the libvpx directory: # LOCAL_PATH := $(call my-dir) # include $(CLEAR_VARS) # include jni/libvpx/build/make/Android.mk # # By default libvpx will use the 'cpufeatures' module from the NDK. This allows # the library to be built with all available optimizations (SSE2->AVX512 for # x86, NEON for arm, DSPr2 for mips). This can be disabled with # --disable-runtime-cpu-detect # but the resulting library *must* be run on devices supporting all of the # enabled extensions. They can be disabled individually with # --disable-{sse2, sse3, ssse3, sse4_1, avx, avx2, avx512} # --disable-neon[-asm] # --disable-{dspr2, msa} # # Running ndk-build will build libvpx and include it in your project. # CONFIG_DIR := $(LOCAL_PATH)/ LIBVPX_PATH := $(LOCAL_PATH)/libvpx ASM_CNV_PATH_LOCAL := $(TARGET_ARCH_ABI)/ads2gas ASM_CNV_PATH := $(LOCAL_PATH)/$(ASM_CNV_PATH_LOCAL) ifneq ($(V),1) qexec := @ endif # Use the makefiles generated by upstream configure to determine which files to # build. Also set any architecture-specific flags. ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) include $(CONFIG_DIR)libs-armv7-android-gcc.mk LOCAL_ARM_MODE := arm else ifeq ($(TARGET_ARCH_ABI),arm64-v8a) include $(CONFIG_DIR)libs-arm64-android-gcc.mk LOCAL_ARM_MODE := arm else ifeq ($(TARGET_ARCH_ABI),x86) include $(CONFIG_DIR)libs-x86-android-gcc.mk else ifeq ($(TARGET_ARCH_ABI),x86_64) include $(CONFIG_DIR)libs-x86_64-android-gcc.mk else ifeq ($(TARGET_ARCH_ABI),mips) include $(CONFIG_DIR)libs-mips-android-gcc.mk else $(error Not a supported TARGET_ARCH_ABI: $(TARGET_ARCH_ABI)) endif # Rule that is normally in Makefile created by libvpx # configure. Used to filter out source files based on configuration. enabled=$(filter-out $($(1)-no),$($(1)-yes)) # Override the relative path that is defined by the libvpx # configure process SRC_PATH_BARE := $(LIBVPX_PATH) # Include the list of files to be built include $(LIBVPX_PATH)/libs.mk # Optimise the code. May want to revisit this setting in the future. LOCAL_CFLAGS := -O3 # For x86, include the source code in the search path so it will find files # like x86inc.asm and x86_abi_support.asm LOCAL_ASMFLAGS := -I$(LIBVPX_PATH) .PRECIOUS: %.asm.S $(ASM_CNV_PATH)/libvpx/%.asm.S: $(LIBVPX_PATH)/%.asm $(qexec)mkdir -p $(dir $@) $(qexec)$(CONFIG_DIR)$(ASM_CONVERSION) <$< > $@ # For building *_rtcd.h, which have rules in libs.mk TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN))) target := libs LOCAL_SRC_FILES += vpx_config.c # Remove duplicate entries CODEC_SRCS_UNIQUE = $(sort $(CODEC_SRCS)) # Pull out C files. vpx_config.c is in the immediate directory and # so it does not need libvpx/ prefixed like the rest of the source files. # The neon files with intrinsics need to have .neon appended so the proper # flags are applied. CODEC_SRCS_C = $(filter %.c, $(CODEC_SRCS_UNIQUE)) LOCAL_NEON_SRCS_C = $(filter %_neon.c, $(CODEC_SRCS_C)) LOCAL_CODEC_SRCS_C = $(filter-out vpx_config.c %_neon.c, $(CODEC_SRCS_C)) LOCAL_SRC_FILES += $(foreach file, $(LOCAL_CODEC_SRCS_C), libvpx/$(file)) ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file).neon) else # If there are neon sources then we are building for arm64 and do not need to specify .neon LOCAL_SRC_FILES += $(foreach file, $(LOCAL_NEON_SRCS_C), libvpx/$(file)) endif # Pull out assembly files, splitting NEON from the rest. This is # done to specify that the NEON assembly files use NEON assembler flags. # x86 assembly matches %.asm, arm matches %.asm.S # x86: CODEC_SRCS_ASM_X86 = $(filter %.asm, $(CODEC_SRCS_UNIQUE)) LOCAL_SRC_FILES += $(foreach file, $(CODEC_SRCS_ASM_X86), libvpx/$(file)) # arm: CODEC_SRCS_ASM_ARM_ALL = $(filter %.asm.S, $(CODEC_SRCS_UNIQUE)) CODEC_SRCS_ASM_ARM = $(foreach v, \ $(CODEC_SRCS_ASM_ARM_ALL), \ $(if $(findstring neon,$(v)),,$(v))) CODEC_SRCS_ASM_ADS2GAS = $(patsubst %.S, \ $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \ $(CODEC_SRCS_ASM_ARM)) LOCAL_SRC_FILES += $(CODEC_SRCS_ASM_ADS2GAS) ifeq ($(TARGET_ARCH_ABI),armeabi-v7a) ASM_INCLUDES := vpx_dsp/arm/idct_neon.asm.S CODEC_SRCS_ASM_NEON = $(foreach v, \ $(CODEC_SRCS_ASM_ARM_ALL),\ $(if $(findstring neon,$(v)),$(v),)) CODEC_SRCS_ASM_NEON := $(filter-out $(addprefix %, $(ASM_INCLUDES)), \ $(CODEC_SRCS_ASM_NEON)) CODEC_SRCS_ASM_NEON_ADS2GAS = $(patsubst %.S, \ $(ASM_CNV_PATH_LOCAL)/libvpx/%.S, \ $(CODEC_SRCS_ASM_NEON)) LOCAL_SRC_FILES += $(patsubst %.S, \ %.S.neon, \ $(CODEC_SRCS_ASM_NEON_ADS2GAS)) NEON_ASM_TARGETS = $(patsubst %.S, \ $(ASM_CNV_PATH)/libvpx/%.S, \ $(CODEC_SRCS_ASM_NEON)) # add a dependency to the full path to the ads2gas output to ensure the # includes are converted first. ifneq ($(strip $(NEON_ASM_TARGETS)),) $(NEON_ASM_TARGETS): $(addprefix $(ASM_CNV_PATH)/libvpx/, $(ASM_INCLUDES)) endif endif LOCAL_CFLAGS += \ -DHAVE_CONFIG_H=vpx_config.h \ -I$(LIBVPX_PATH) \ -I$(ASM_CNV_PATH) \ -I$(ASM_CNV_PATH)/libvpx LOCAL_MODULE := libvpx ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) LOCAL_STATIC_LIBRARIES := cpufeatures endif # Add a dependency to force generation of the RTCD files. define rtcd_dep_template rtcd_dep_template_SRCS := $(addprefix $(LOCAL_PATH)/, $(LOCAL_SRC_FILES)) rtcd_dep_template_SRCS := $$(rtcd_dep_template_SRCS:.neon=) ifeq ($(CONFIG_VP8), yes) $$(rtcd_dep_template_SRCS): vp8_rtcd.h endif ifeq ($(CONFIG_VP9), yes) $$(rtcd_dep_template_SRCS): vp9_rtcd.h endif $$(rtcd_dep_template_SRCS): vpx_scale_rtcd.h $$(rtcd_dep_template_SRCS): vpx_dsp_rtcd.h rtcd_dep_template_CONFIG_ASM_ABIS := x86 x86_64 armeabi-v7a ifneq ($$(findstring $(TARGET_ARCH_ABI),$$(rtcd_dep_template_CONFIG_ASM_ABIS)),) $$(rtcd_dep_template_SRCS): vpx_config.asm endif endef $(eval $(call rtcd_dep_template)) .PHONY: clean clean: @echo "Clean: ads2gas files [$(TARGET_ARCH_ABI)]" $(qexec)$(RM) $(CODEC_SRCS_ASM_ADS2GAS) $(CODEC_SRCS_ASM_NEON_ADS2GAS) $(qexec)$(RM) -r $(ASM_CNV_PATH) $(qexec)$(RM) $(CLEAN-OBJS) ifeq ($(ENABLE_SHARED),1) LOCAL_CFLAGS += -fPIC include $(BUILD_SHARED_LIBRARY) else include $(BUILD_STATIC_LIBRARY) endif ifeq ($(CONFIG_RUNTIME_CPU_DETECT),yes) $(call import-module,android/cpufeatures) endif libvpx-1.8.2/build/make/Makefile000066400000000000000000000345021357355204000165120ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## include config.mk quiet?=true ifeq ($(target),) # If a target wasn't specified, invoke for all enabled targets. .DEFAULT: @for t in $(ALL_TARGETS); do \ $(MAKE) --no-print-directory target=$$t $(MAKECMDGOALS) || exit $$?;\ done all: .DEFAULT clean:: .DEFAULT exampletest: .DEFAULT install:: .DEFAULT test:: .DEFAULT test-no-data-check:: .DEFAULT testdata:: .DEFAULT utiltest: .DEFAULT exampletest-no-data-check utiltest-no-data-check: .DEFAULT test_%: .DEFAULT ; # Note: md5sum is not installed on OS X, but openssl is. Openssl may not be # installed on cygwin, so we need to autodetect here. md5sum := $(firstword $(wildcard \ $(foreach e,md5sum openssl,\ $(foreach p,$(subst :, ,$(PATH)),$(p)/$(e)*))\ )) md5sum := $(if $(filter %openssl,$(md5sum)),$(md5sum) dgst -md5,$(md5sum)) TGT_CC:=$(word 3, $(subst -, ,$(TOOLCHAIN))) dist: @for t in $(ALL_TARGETS); do \ $(MAKE) --no-print-directory target=$$t $(MAKECMDGOALS) || exit $$?;\ done # Run configure for the user with the current toolchain. @if [ -d "$(DIST_DIR)/src" ]; then \ mkdir -p "$(DIST_DIR)/build"; \ cd "$(DIST_DIR)/build"; \ echo "Rerunning configure $(CONFIGURE_ARGS)"; \ ../src/configure $(CONFIGURE_ARGS); \ $(if $(filter vs%,$(TGT_CC)),make NO_LAUNCH_DEVENV=1;) \ fi @if [ -d "$(DIST_DIR)" ]; then \ echo " [MD5SUM] $(DIST_DIR)"; \ cd $(DIST_DIR) && \ $(md5sum) `find . -name md5sums.txt -prune -o -type f -print` \ | sed -e 's/MD5(\(.*\))= \([0-9a-f]\{32\}\)/\2 \1/' \ > md5sums.txt;\ fi endif # Since we invoke make recursively for multiple targets we need to include the # .mk file for the correct target, but only when $(target) is non-empty. ifneq ($(target),) include $(target)-$(TOOLCHAIN).mk endif BUILD_ROOT?=. VPATH=$(SRC_PATH_BARE) CFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) CXXFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT) -I$(SRC_PATH) ASFLAGS+=-I$(BUILD_PFX)$(BUILD_ROOT)/ -I$(SRC_PATH)/ DIST_DIR?=dist HOSTCC?=gcc TGT_ISA:=$(word 1, $(subst -, ,$(TOOLCHAIN))) TGT_OS:=$(word 2, $(subst -, ,$(TOOLCHAIN))) TGT_CC:=$(word 3, $(subst -, ,$(TOOLCHAIN))) quiet:=$(if $(or $(verbose), $(V)),, yes) qexec=$(if $(quiet),@) # Cancel built-in implicit rules %: %.o %.asm: %.a: %: %.cc # # Common rules" # .PHONY: all all: .PHONY: clean clean:: rm -f $(OBJS-yes) $(OBJS-yes:.o=.d) $(OBJS-yes:.asm.S.o=.asm.S) rm -f $(CLEAN-OBJS) .PHONY: clean distclean: clean if [ -z "$(target)" ]; then \ rm -f Makefile; \ rm -f config.log config.mk; \ rm -f vpx_config.[hc] vpx_config.asm; \ rm -f arm_neon.h; \ else \ rm -f $(target)-$(TOOLCHAIN).mk; \ fi .PHONY: dist dist: .PHONY: exampletest exampletest: .PHONY: install install:: .PHONY: test test:: .PHONY: testdata testdata:: .PHONY: utiltest utiltest: .PHONY: test-no-data-check exampletest-no-data-check utiltest-no-data-check test-no-data-check:: exampletest-no-data-check utiltest-no-data-check: # Force to realign stack always on OS/2 ifeq ($(TOOLCHAIN), x86-os2-gcc) CFLAGS += -mstackrealign endif # x86[_64] $(BUILD_PFX)%_mmx.c.d: CFLAGS += -mmmx $(BUILD_PFX)%_mmx.c.o: CFLAGS += -mmmx $(BUILD_PFX)%_sse2.c.d: CFLAGS += -msse2 $(BUILD_PFX)%_sse2.c.o: CFLAGS += -msse2 $(BUILD_PFX)%_sse3.c.d: CFLAGS += -msse3 $(BUILD_PFX)%_sse3.c.o: CFLAGS += -msse3 $(BUILD_PFX)%_ssse3.c.d: CFLAGS += -mssse3 $(BUILD_PFX)%_ssse3.c.o: CFLAGS += -mssse3 $(BUILD_PFX)%_sse4.c.d: CFLAGS += -msse4.1 $(BUILD_PFX)%_sse4.c.o: CFLAGS += -msse4.1 $(BUILD_PFX)%_avx.c.d: CFLAGS += -mavx $(BUILD_PFX)%_avx.c.o: CFLAGS += -mavx $(BUILD_PFX)%_avx2.c.d: CFLAGS += -mavx2 $(BUILD_PFX)%_avx2.c.o: CFLAGS += -mavx2 $(BUILD_PFX)%_avx512.c.d: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl $(BUILD_PFX)%_avx512.c.o: CFLAGS += -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl # POWER $(BUILD_PFX)%_vsx.c.d: CFLAGS += -maltivec -mvsx $(BUILD_PFX)%_vsx.c.o: CFLAGS += -maltivec -mvsx $(BUILD_PFX)%.c.d: %.c $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -M $< | $(fmt_deps) > $@ $(BUILD_PFX)%.c.o: %.c $(if $(quiet),@echo " [CC] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(CC) $(INTERNAL_CFLAGS) $(CFLAGS) -c -o $@ $< $(BUILD_PFX)%.cc.d: %.cc $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@ $(BUILD_PFX)%.cc.o: %.cc $(if $(quiet),@echo " [CXX] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $< $(BUILD_PFX)%.cpp.d: %.cpp $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -M $< | $(fmt_deps) > $@ $(BUILD_PFX)%.cpp.o: %.cpp $(if $(quiet),@echo " [CXX] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(CXX) $(INTERNAL_CFLAGS) $(CXXFLAGS) -c -o $@ $< $(BUILD_PFX)%.asm.d: %.asm $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \ --build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@ $(BUILD_PFX)%.asm.o: %.asm $(if $(quiet),@echo " [AS] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(AS) $(ASFLAGS) -o $@ $< $(BUILD_PFX)%.S.d: %.S $(if $(quiet),@echo " [DEP] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(SRC_PATH_BARE)/build/make/gen_asm_deps.sh \ --build-pfx=$(BUILD_PFX) --depfile=$@ $(ASFLAGS) $< > $@ $(BUILD_PFX)%.S.o: %.S $(if $(quiet),@echo " [AS] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(AS) $(ASFLAGS) -o $@ $< .PRECIOUS: %.c.S %.c.S: CFLAGS += -DINLINE_ASM $(BUILD_PFX)%.c.S: %.c $(if $(quiet),@echo " [GEN] $@") $(qexec)$(if $(CONFIG_DEPENDENCY_TRACKING),,mkdir -p $(dir $@)) $(qexec)$(CC) -S $(CFLAGS) -o $@ $< .PRECIOUS: %.asm.S $(BUILD_PFX)%.asm.S: %.asm $(if $(quiet),@echo " [ASM CONVERSION] $@") $(qexec)mkdir -p $(dir $@) $(qexec)$(ASM_CONVERSION) <$< >$@ # If we're in debug mode, pretend we don't have GNU strip, to fall back to # the copy implementation HAVE_GNU_STRIP := $(if $(CONFIG_DEBUG),,$(HAVE_GNU_STRIP)) ifeq ($(HAVE_GNU_STRIP),yes) # Older binutils strip global symbols not needed for relocation processing # when given --strip-unneeded. Using nm and awk to identify globals and # keep them caused command line length issues under mingw and segfaults in # test_libvpx were observed under OS/2: simply use --strip-debug. %.a: %_g.a $(if $(quiet),@echo " [STRIP] $@ < $<") $(qexec)$(STRIP) --strip-debug \ -o $@ $< else %.a: %_g.a $(if $(quiet),@echo " [CP] $@ < $<") $(qexec)cp $< $@ endif # # Utility functions # pairmap=$(if $(strip $(2)),\ $(call $(1),$(word 1,$(2)),$(word 2,$(2)))\ $(call pairmap,$(1),$(wordlist 3,$(words $(2)),$(2)))\ ) enabled=$(filter-out $($(1)-no),$($(1)-yes)) cond_enabled=$(if $(filter yes,$($(1))), $(call enabled,$(2))) find_file1=$(word 1,$(wildcard $(subst //,/,$(addsuffix /$(1),$(2))))) find_file=$(foreach f,$(1),$(call find_file1,$(strip $(f)),$(strip $(2))) ) obj_pats=.c=.c.o $(AS_SFX)=$(AS_SFX).o .cc=.cc.o .cpp=.cpp.o objs=$(addprefix $(BUILD_PFX),$(foreach p,$(obj_pats),$(filter %.o,$(1:$(p))) )) install_map_templates=$(eval $(call install_map_template,$(1),$(2))) not=$(subst yes,no,$(1)) ifeq ($(CONFIG_MSVS),yes) lib_file_name=$(1).lib else lib_file_name=lib$(1).a endif # # Rule Templates # define linker_template $(1): $(filter-out -%,$(2)) $(1): $(if $(quiet),@echo " [LD] $$@") $(qexec)$$(LD) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) endef define linkerxx_template $(1): $(filter-out -%,$(2)) $(1): $(if $(quiet),@echo " [LD] $$@") $(qexec)$$(CXX) $$(strip $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -o $$@ $(2) $(3) $$(extralibs)) endef # make-3.80 has a bug with expanding large input strings to the eval function, # which was triggered in some cases by the following component of # linker_template: # $(1): $$(call find_file, $(patsubst -l%,lib%.a,$(filter -l%,$(2))),\ # $$(patsubst -L%,%,$$(filter -L%,$$(LDFLAGS) $(2)))) # This may be useful to revisit in the future (it tries to locate libraries # in a search path and add them as prerequisites define install_map_template $(DIST_DIR)/$(1): $(2) $(if $(quiet),@echo " [INSTALL] $$@") $(qexec)mkdir -p $$(dir $$@) $(qexec)cp -p $$< $$@ endef define archive_template # Not using a pattern rule here because we don't want to generate empty # archives when they are listed as a dependency in files not responsible # for creating them. $(1): $(if $(quiet),@echo " [AR] $$@") $(qexec)$$(AR) $$(ARFLAGS) $$@ $$^ endef define so_template # Not using a pattern rule here because we don't want to generate empty # archives when they are listed as a dependency in files not responsible # for creating them. # # This needs further abstraction for dealing with non-GNU linkers. $(1): $(if $(quiet),@echo " [LD] $$@") $(qexec)$$(LD) -shared $$(LDFLAGS) \ -Wl,--no-undefined -Wl,-soname,$$(SONAME) \ -Wl,--version-script,$$(EXPORTS_FILE) -o $$@ \ $$(filter %.o,$$^) $$(extralibs) endef define dl_template # Not using a pattern rule here because we don't want to generate empty # archives when they are listed as a dependency in files not responsible # for creating them. $(1): $(if $(quiet),@echo " [LD] $$@") $(qexec)$$(LD) -dynamiclib $$(LDFLAGS) \ -exported_symbols_list $$(EXPORTS_FILE) \ -Wl,-headerpad_max_install_names,-compatibility_version,1.0,-current_version,$$(VERSION_MAJOR) \ -o $$@ \ $$(filter %.o,$$^) $$(extralibs) endef define dll_template # Not using a pattern rule here because we don't want to generate empty # archives when they are listed as a dependency in files not responsible # for creating them. $(1): $(if $(quiet),@echo " [LD] $$@") $(qexec)$$(LD) -Zdll $$(LDFLAGS) \ -o $$@ \ $$(filter %.o,$$^) $$(extralibs) $$(EXPORTS_FILE) endef # # Get current configuration # ifneq ($(target),) include $(SRC_PATH_BARE)/$(target:-$(TOOLCHAIN)=).mk endif skip_deps := $(filter %clean,$(MAKECMDGOALS)) skip_deps += $(findstring testdata,$(MAKECMDGOALS)) ifeq ($(strip $(skip_deps)),) ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes) # Older versions of make don't like -include directives with no arguments ifneq ($(filter %.d,$(OBJS-yes:.o=.d)),) -include $(filter %.d,$(OBJS-yes:.o=.d)) endif endif endif # # Configuration dependent rules # $(call pairmap,install_map_templates,$(INSTALL_MAPS)) DOCS=$(call cond_enabled,CONFIG_INSTALL_DOCS,DOCS) .docs: $(DOCS) @touch $@ INSTALL-DOCS=$(call cond_enabled,CONFIG_INSTALL_DOCS,INSTALL-DOCS) ifeq ($(MAKECMDGOALS),dist) INSTALL-DOCS+=$(call cond_enabled,CONFIG_INSTALL_DOCS,DIST-DOCS) endif .install-docs: .docs $(addprefix $(DIST_DIR)/,$(INSTALL-DOCS)) @touch $@ clean:: rm -f .docs .install-docs $(DOCS) BINS=$(call enabled,BINS) .bins: $(BINS) @touch $@ INSTALL-BINS=$(call cond_enabled,CONFIG_INSTALL_BINS,INSTALL-BINS) ifeq ($(MAKECMDGOALS),dist) INSTALL-BINS+=$(call cond_enabled,CONFIG_INSTALL_BINS,DIST-BINS) endif .install-bins: .bins $(addprefix $(DIST_DIR)/,$(INSTALL-BINS)) @touch $@ clean:: rm -f .bins .install-bins $(BINS) LIBS=$(call enabled,LIBS) .libs: $(LIBS) @touch $@ $(foreach lib,$(filter %_g.a,$(LIBS)),$(eval $(call archive_template,$(lib)))) $(foreach lib,$(filter %so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH),$(LIBS)),$(eval $(call so_template,$(lib)))) $(foreach lib,$(filter %$(SO_VERSION_MAJOR).dylib,$(LIBS)),$(eval $(call dl_template,$(lib)))) $(foreach lib,$(filter %$(SO_VERSION_MAJOR).dll,$(LIBS)),$(eval $(call dll_template,$(lib)))) INSTALL-LIBS=$(call cond_enabled,CONFIG_INSTALL_LIBS,INSTALL-LIBS) ifeq ($(MAKECMDGOALS),dist) INSTALL-LIBS+=$(call cond_enabled,CONFIG_INSTALL_LIBS,DIST-LIBS) endif .install-libs: .libs $(addprefix $(DIST_DIR)/,$(INSTALL-LIBS)) @touch $@ clean:: rm -f .libs .install-libs $(LIBS) ifeq ($(CONFIG_EXTERNAL_BUILD),yes) PROJECTS=$(call enabled,PROJECTS) .projects: $(PROJECTS) @touch $@ INSTALL-PROJECTS=$(call cond_enabled,CONFIG_INSTALL_PROJECTS,INSTALL-PROJECTS) ifeq ($(MAKECMDGOALS),dist) INSTALL-PROJECTS+=$(call cond_enabled,CONFIG_INSTALL_PROJECTS,DIST-PROJECTS) endif .install-projects: .projects $(addprefix $(DIST_DIR)/,$(INSTALL-PROJECTS)) @touch $@ clean:: rm -f .projects .install-projects $(PROJECTS) endif # If there are any source files to be distributed, then include the build # system too. ifneq ($(call enabled,DIST-SRCS),) DIST-SRCS-yes += configure DIST-SRCS-yes += build/make/configure.sh DIST-SRCS-yes += build/make/gen_asm_deps.sh DIST-SRCS-yes += build/make/Makefile DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_def.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_sln.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/gen_msvs_vcxproj.sh DIST-SRCS-$(CONFIG_MSVS) += build/make/msvs_common.sh DIST-SRCS-$(CONFIG_RVCT) += build/make/armlink_adapter.sh DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas.pl DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2gas_apple.pl DIST-SRCS-$(VPX_ARCH_ARM) += build/make/ads2armasm_ms.pl DIST-SRCS-$(VPX_ARCH_ARM) += build/make/thumb.pm DIST-SRCS-yes += $(target:-$(TOOLCHAIN)=).mk endif INSTALL-SRCS := $(call cond_enabled,CONFIG_INSTALL_SRCS,INSTALL-SRCS) ifeq ($(MAKECMDGOALS),dist) INSTALL-SRCS += $(call cond_enabled,CONFIG_INSTALL_SRCS,DIST-SRCS) endif .install-srcs: $(addprefix $(DIST_DIR)/src/,$(INSTALL-SRCS)) @touch $@ clean:: rm -f .install-srcs ifeq ($(CONFIG_EXTERNAL_BUILD),yes) BUILD_TARGETS += .projects INSTALL_TARGETS += .install-projects endif BUILD_TARGETS += .docs .libs .bins INSTALL_TARGETS += .install-docs .install-srcs .install-libs .install-bins all: $(BUILD_TARGETS) install:: $(INSTALL_TARGETS) dist: $(INSTALL_TARGETS) test:: .SUFFIXES: # Delete default suffix rules libvpx-1.8.2/build/make/ads2armasm_ms.pl000077500000000000000000000016251357355204000201430ustar00rootroot00000000000000#!/usr/bin/env perl ## ## Copyright (c) 2013 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## use FindBin; use lib $FindBin::Bin; use thumb; print "; This file was created from a .asm file\n"; print "; using the ads2armasm_ms.pl script.\n"; while () { undef $comment; undef $line; s/REQUIRE8//; s/PRESERVE8//; s/^\s*ARM\s*$//; s/AREA\s+\|\|(.*)\|\|/AREA |$1|/; s/qsubaddx/qsax/i; s/qaddsubx/qasx/i; thumb::FixThumbInstructions($_, 1); s/ldrneb/ldrbne/i; s/ldrneh/ldrhne/i; s/^(\s*)ENDP.*/$&\n$1ALIGN 4/; print; } libvpx-1.8.2/build/make/ads2gas.pl000077500000000000000000000133021357355204000167310ustar00rootroot00000000000000#!/usr/bin/env perl ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # ads2gas.pl # Author: Eric Fung (efung (at) acm.org) # # Convert ARM Developer Suite 1.0.1 syntax assembly source to GNU as format # # Usage: cat inputfile | perl ads2gas.pl > outputfile # use FindBin; use lib $FindBin::Bin; use thumb; my $thumb = 0; my $elf = 1; foreach my $arg (@ARGV) { $thumb = 1 if ($arg eq "-thumb"); $elf = 0 if ($arg eq "-noelf"); } print "@ This file was created from a .asm file\n"; print "@ using the ads2gas.pl script.\n"; print "\t.syntax unified\n"; if ($thumb) { print "\t.thumb\n"; } # Stack of procedure names. @proc_stack = (); while () { undef $comment; undef $line; $comment_char = ";"; $comment_sub = "@"; # Handle comments. if (/$comment_char/) { $comment = ""; ($line, $comment) = /(.*?)$comment_char(.*)/; $_ = $line; } # Load and store alignment s/@/,:/g; # Hexadecimal constants prefaced by 0x s/#&/#0x/g; # Convert :OR: to | s/:OR:/ | /g; # Convert :AND: to & s/:AND:/ & /g; # Convert :NOT: to ~ s/:NOT:/ ~ /g; # Convert :SHL: to << s/:SHL:/ << /g; # Convert :SHR: to >> s/:SHR:/ >> /g; # Convert ELSE to .else s/\bELSE\b/.else/g; # Convert ENDIF to .endif s/\bENDIF\b/.endif/g; # Convert ELSEIF to .elseif s/\bELSEIF\b/.elseif/g; # Convert LTORG to .ltorg s/\bLTORG\b/.ltorg/g; # Convert endfunc to nothing. s/\bendfunc\b//ig; # Convert FUNCTION to nothing. s/\bFUNCTION\b//g; s/\bfunction\b//g; s/\bENTRY\b//g; s/\bMSARMASM\b/0/g; s/^\s+end\s+$//g; # Convert IF :DEF:to .if # gcc doesn't have the ability to do a conditional # if defined variable that is set by IF :DEF: on # armasm, so convert it to a normal .if and then # make sure to define a value elesewhere if (s/\bIF :DEF:\b/.if /g) { s/=/==/g; } # Convert IF to .if if (s/\bIF\b/.if/g) { s/=+/==/g; } # Convert INCLUDE to .INCLUDE "file" s/INCLUDE(\s*)(.*)$/.include $1\"$2\"/; # Code directive (ARM vs Thumb) s/CODE([0-9][0-9])/.code $1/; # No AREA required # But ALIGNs in AREA must be obeyed s/^\s*AREA.*ALIGN=([0-9])$/.text\n.p2align $1/; # If no ALIGN, strip the AREA and align to 4 bytes s/^\s*AREA.*$/.text\n.p2align 2/; # DCD to .word # This one is for incoming symbols s/DCD\s+\|(\w*)\|/.long $1/; # DCW to .short s/DCW\s+\|(\w*)\|/.short $1/; s/DCW(.*)/.short $1/; # Constants defined in scope s/DCD(.*)/.long $1/; s/DCB(.*)/.byte $1/; # Make function visible to linker, and make additional symbol with # prepended underscore if ($elf) { s/EXPORT\s+\|([\$\w]*)\|/.global $1 \n\t.type $1, function/; } else { s/EXPORT\s+\|([\$\w]*)\|/.global $1/; } s/IMPORT\s+\|([\$\w]*)\|/.global $1/; s/EXPORT\s+([\$\w]*)/.global $1/; s/export\s+([\$\w]*)/.global $1/; # No vertical bars required; make additional symbol with prepended # underscore s/^\|(\$?\w+)\|/_$1\n\t$1:/g; # Labels need trailing colon # s/^(\w+)/$1:/ if !/EQU/; # put the colon at the end of the line in the macro s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/; # ALIGN directive s/\bALIGN\b/.balign/g; if ($thumb) { # ARM code - we force everything to thumb with the declaration in the header s/\sARM//g; } else { # ARM code s/\sARM/.arm/g; } # push/pop s/(push\s+)(r\d+)/stmdb sp\!, \{$2\}/g; s/(pop\s+)(r\d+)/ldmia sp\!, \{$2\}/g; # NEON code s/(vld1.\d+\s+)(q\d+)/$1\{$2\}/g; s/(vtbl.\d+\s+[^,]+),([^,]+)/$1,\{$2\}/g; if ($thumb) { thumb::FixThumbInstructions($_, 0); } # eabi_attributes numerical equivalents can be found in the # "ARM IHI 0045C" document. if ($elf) { # REQUIRE8 Stack is required to be 8-byte aligned s/\sREQUIRE8/.eabi_attribute 24, 1 \@Tag_ABI_align_needed/g; # PRESERVE8 Stack 8-byte align is preserved s/\sPRESERVE8/.eabi_attribute 25, 1 \@Tag_ABI_align_preserved/g; } else { s/\sREQUIRE8//; s/\sPRESERVE8//; } # Use PROC and ENDP to give the symbols a .size directive. # This makes them show up properly in debugging tools like gdb and valgrind. if (/\bPROC\b/) { my $proc; /^_([\.0-9A-Z_a-z]\w+)\b/; $proc = $1; push(@proc_stack, $proc) if ($proc); s/\bPROC\b/@ $&/; } if (/\bENDP\b/) { my $proc; s/\bENDP\b/@ $&/; $proc = pop(@proc_stack); $_ = "\t.size $proc, .-$proc".$_ if ($proc and $elf); } # EQU directive s/(\S+\s+)EQU(\s+\S+)/.equ $1, $2/; # Begin macro definition if (/\bMACRO\b/) { $_ = ; s/^/.macro/; s/\$//g; # remove formal param reference s/;/@/g; # change comment characters } # For macros, use \ to reference formal params s/\$/\\/g; # End macro definition s/\bMEND\b/.endm/; # No need to tell it where to stop assembling next if /^\s*END\s*$/; print; print "$comment_sub$comment\n" if defined $comment; } # Mark that this object doesn't need an executable stack. printf ("\t.section\t.note.GNU-stack,\"\",\%\%progbits\n") if $elf; libvpx-1.8.2/build/make/ads2gas_apple.pl000077500000000000000000000111441357355204000201140ustar00rootroot00000000000000#!/usr/bin/env perl ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # ads2gas_apple.pl # Author: Eric Fung (efung (at) acm.org) # # Convert ARM Developer Suite 1.0.1 syntax assembly source to GNU as format # # Usage: cat inputfile | perl ads2gas_apple.pl > outputfile # print "@ This file was created from a .asm file\n"; print "@ using the ads2gas_apple.pl script.\n\n"; print "\t.syntax unified\n"; my %register_aliases; my %macro_aliases; my @mapping_list = ("\$0", "\$1", "\$2", "\$3", "\$4", "\$5", "\$6", "\$7", "\$8", "\$9"); my @incoming_array; my @imported_functions; # Perl trim function to remove whitespace from the start and end of the string sub trim($) { my $string = shift; $string =~ s/^\s+//; $string =~ s/\s+$//; return $string; } while () { # Load and store alignment s/@/,:/g; # Comment character s/;/ @/g; # Hexadecimal constants prefaced by 0x s/#&/#0x/g; # Convert :OR: to | s/:OR:/ | /g; # Convert :AND: to & s/:AND:/ & /g; # Convert :NOT: to ~ s/:NOT:/ ~ /g; # Convert :SHL: to << s/:SHL:/ << /g; # Convert :SHR: to >> s/:SHR:/ >> /g; # Convert ELSE to .else s/\bELSE\b/.else/g; # Convert ENDIF to .endif s/\bENDIF\b/.endif/g; # Convert ELSEIF to .elseif s/\bELSEIF\b/.elseif/g; # Convert LTORG to .ltorg s/\bLTORG\b/.ltorg/g; # Convert IF :DEF:to .if # gcc doesn't have the ability to do a conditional # if defined variable that is set by IF :DEF: on # armasm, so convert it to a normal .if and then # make sure to define a value elesewhere if (s/\bIF :DEF:\b/.if /g) { s/=/==/g; } # Convert IF to .if if (s/\bIF\b/.if/g) { s/=/==/g; } # Convert INCLUDE to .INCLUDE "file" s/INCLUDE(\s*)(.*)$/.include $1\"$2\"/; # Code directive (ARM vs Thumb) s/CODE([0-9][0-9])/.code $1/; # No AREA required # But ALIGNs in AREA must be obeyed s/^\s*AREA.*ALIGN=([0-9])$/.text\n.p2align $1/; # If no ALIGN, strip the AREA and align to 4 bytes s/^\s*AREA.*$/.text\n.p2align 2/; # DCD to .word # This one is for incoming symbols s/DCD\s+\|(\w*)\|/.long $1/; # DCW to .short s/DCW\s+\|(\w*)\|/.short $1/; s/DCW(.*)/.short $1/; # Constants defined in scope s/DCD(.*)/.long $1/; s/DCB(.*)/.byte $1/; # Make function visible to linker, and make additional symbol with # prepended underscore s/EXPORT\s+\|([\$\w]*)\|/.globl _$1\n\t.globl $1/; # Prepend imported functions with _ if (s/IMPORT\s+\|([\$\w]*)\|/.globl $1/) { $function = trim($1); push(@imported_functions, $function); } foreach $function (@imported_functions) { s/$function/_$function/; } # No vertical bars required; make additional symbol with prepended # underscore s/^\|(\$?\w+)\|/_$1\n\t$1:/g; # Labels need trailing colon # s/^(\w+)/$1:/ if !/EQU/; # put the colon at the end of the line in the macro s/^([a-zA-Z_0-9\$]+)/$1:/ if !/EQU/; # ALIGN directive s/\bALIGN\b/.balign/g; # Strip ARM s/\sARM/@ ARM/g; # Strip REQUIRE8 #s/\sREQUIRE8/@ REQUIRE8/g; s/\sREQUIRE8/@ /g; # Strip PRESERVE8 s/\sPRESERVE8/@ PRESERVE8/g; # Strip PROC and ENDPROC s/\bPROC\b/@/g; s/\bENDP\b/@/g; # EQU directive s/(.*)EQU(.*)/.set $1, $2/; # Begin macro definition if (/\bMACRO\b/) { # Process next line down, which will be the macro definition $_ = ; $trimmed = trim($_); # remove commas that are separating list $trimmed =~ s/,//g; # string to array @incoming_array = split(/\s+/, $trimmed); print ".macro @incoming_array[0]\n"; # remove the first element, as that is the name of the macro shift (@incoming_array); @macro_aliases{@incoming_array} = @mapping_list; next; } while (($key, $value) = each(%macro_aliases)) { $key =~ s/\$/\\\$/; s/$key\b/$value/g; } # For macros, use \ to reference formal params # s/\$/\\/g; # End macro definition s/\bMEND\b/.endm/; # No need to tell it where to stop assembling next if /^\s*END\s*$/; print; } libvpx-1.8.2/build/make/armlink_adapter.sh000077500000000000000000000030471357355204000205460ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## verbose=0 set -- $* for i; do if [ "$i" = "-o" ]; then on_of=1 elif [ "$i" = "-v" ]; then verbose=1 elif [ "$i" = "-g" ]; then args="${args} --debug" elif [ "$on_of" = "1" ]; then outfile=$i on_of=0 elif [ -f "$i" ]; then infiles="$infiles $i" elif [ "${i#-l}" != "$i" ]; then libs="$libs ${i#-l}" elif [ "${i#-L}" != "$i" ]; then libpaths="${libpaths} ${i#-L}" else args="${args} ${i}" fi shift done # Absolutize library file names for f in $libs; do found=0 for d in $libpaths; do [ -f "$d/$f" ] && infiles="$infiles $d/$f" && found=1 && break [ -f "$d/lib${f}.so" ] && infiles="$infiles $d/lib${f}.so" && found=1 && break [ -f "$d/lib${f}.a" ] && infiles="$infiles $d/lib${f}.a" && found=1 && break done [ $found -eq 0 ] && infiles="$infiles $f" done for d in $libpaths; do [ -n "$libsearchpath" ] && libsearchpath="${libsearchpath}," libsearchpath="${libsearchpath}$d" done cmd="armlink $args --userlibpath=$libsearchpath --output=$outfile $infiles" [ $verbose -eq 1 ] && echo $cmd $cmd libvpx-1.8.2/build/make/configure.sh000066400000000000000000001263271357355204000173760ustar00rootroot00000000000000#!/bin/sh ## ## configure.sh ## ## This script is sourced by the main configure script and contains ## utility functions and other common bits that aren't strictly libvpx ## related. ## ## This build system is based in part on the FFmpeg configure script. ## # # Logging / Output Functions # die_unknown(){ echo "Unknown option \"$1\"." echo "See $0 --help for available options." clean_temp_files exit 1 } die() { echo "$@" echo echo "Configuration failed. This could reflect a misconfiguration of your" echo "toolchains, improper options selected, or another problem. If you" echo "don't see any useful error messages above, the next step is to look" echo "at the configure error log file ($logfile) to determine what" echo "configure was trying to do when it died." clean_temp_files exit 1 } log(){ echo "$@" >>$logfile } log_file(){ log BEGIN $1 cat -n $1 >>$logfile log END $1 } log_echo() { echo "$@" log "$@" } fwrite () { outfile=$1 shift echo "$@" >> ${outfile} } show_help_pre(){ for opt in ${CMDLINE_SELECT}; do opt2=`echo $opt | sed -e 's;_;-;g'` if enabled $opt; then eval "toggle_${opt}=\"--disable-${opt2}\"" else eval "toggle_${opt}=\"--enable-${opt2} \"" fi done cat <>${logfile} 2>&1 } check_cc() { log check_cc "$@" cat >${TMP_C} log_file ${TMP_C} check_cmd ${CC} ${CFLAGS} "$@" -c -o ${TMP_O} ${TMP_C} } check_cxx() { log check_cxx "$@" cat >${TMP_CC} log_file ${TMP_CC} check_cmd ${CXX} ${CXXFLAGS} "$@" -c -o ${TMP_O} ${TMP_CC} } check_cpp() { log check_cpp "$@" cat > ${TMP_C} log_file ${TMP_C} check_cmd ${CC} ${CFLAGS} "$@" -E -o ${TMP_O} ${TMP_C} } check_ld() { log check_ld "$@" check_cc $@ \ && check_cmd ${LD} ${LDFLAGS} "$@" -o ${TMP_X} ${TMP_O} ${extralibs} } check_lib() { log check_lib "$@" check_cc $@ \ && check_cmd ${LD} ${LDFLAGS} -o ${TMP_X} ${TMP_O} "$@" ${extralibs} } check_header(){ log check_header "$@" header=$1 shift var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'` disable_feature $var check_cpp "$@" <${TMP_ASM} <${TMP_X} log_file ${TMP_X} if ! grep -q '\.rodata .* 16$' ${TMP_X}; then die "${AS} ${ASFLAGS} does not support section alignment (nasm <=2.08?)" fi } # tests for -m$1 toggling the feature given in $2. If $2 is empty $1 is used. check_gcc_machine_option() { opt="$1" feature="$2" [ -n "$feature" ] || feature="$opt" if enabled gcc && ! disabled "$feature" && ! check_cflags "-m$opt"; then RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature " else soft_enable "$feature" fi } # tests for -m$2, -m$3, -m$4... toggling the feature given in $1. check_gcc_machine_options() { feature="$1" shift flags="-m$1" shift for opt in $*; do flags="$flags -m$opt" done if enabled gcc && ! disabled "$feature" && ! check_cflags $flags; then RTCD_OPTIONS="${RTCD_OPTIONS}--disable-$feature " else soft_enable "$feature" fi } check_gcc_avx512_compiles() { if disabled gcc; then return fi check_cc -mavx512f < void f(void) { __m512i x = _mm512_set1_epi16(0); (void)x; } EOF compile_result=$? if [ ${compile_result} -ne 0 ]; then log_echo " disabling avx512: not supported by compiler" disable_feature avx512 RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 " fi } write_common_config_banner() { print_webm_license config.mk "##" "" echo '# This file automatically generated by configure. Do not edit!' >> config.mk echo "TOOLCHAIN := ${toolchain}" >> config.mk case ${toolchain} in *-linux-rvct) echo "ALT_LIBC := ${alt_libc}" >> config.mk ;; esac } write_common_config_targets() { for t in ${all_targets}; do if enabled ${t}; then if enabled child; then fwrite config.mk "ALL_TARGETS += ${t}-${toolchain}" else fwrite config.mk "ALL_TARGETS += ${t}" fi fi true; done true } write_common_target_config_mk() { saved_CC="${CC}" saved_CXX="${CXX}" enabled ccache && CC="ccache ${CC}" enabled ccache && CXX="ccache ${CXX}" print_webm_license $1 "##" "" cat >> $1 << EOF # This file automatically generated by configure. Do not edit! SRC_PATH="$source_path" SRC_PATH_BARE=$source_path BUILD_PFX=${BUILD_PFX} TOOLCHAIN=${toolchain} ASM_CONVERSION=${asm_conversion_cmd:-${source_path}/build/make/ads2gas.pl} GEN_VCPROJ=${gen_vcproj_cmd} MSVS_ARCH_DIR=${msvs_arch_dir} CC=${CC} CXX=${CXX} AR=${AR} LD=${LD} AS=${AS} STRIP=${STRIP} NM=${NM} CFLAGS = ${CFLAGS} CXXFLAGS = ${CXXFLAGS} ARFLAGS = -crs\$(if \$(quiet),,v) LDFLAGS = ${LDFLAGS} ASFLAGS = ${ASFLAGS} extralibs = ${extralibs} AS_SFX = ${AS_SFX:-.asm} EXE_SFX = ${EXE_SFX} VCPROJ_SFX = ${VCPROJ_SFX} RTCD_OPTIONS = ${RTCD_OPTIONS} LIBYUV_CXXFLAGS = ${LIBYUV_CXXFLAGS} EOF if enabled rvct; then cat >> $1 << EOF fmt_deps = sed -e 's;^__image.axf;\${@:.d=.o} \$@;' #hide EOF else cat >> $1 << EOF fmt_deps = sed -e 's;^\([a-zA-Z0-9_]*\)\.o;\${@:.d=.o} \$@;' EOF fi print_config_mk VPX_ARCH "${1}" ${ARCH_LIST} print_config_mk HAVE "${1}" ${HAVE_LIST} print_config_mk CONFIG "${1}" ${CONFIG_LIST} print_config_mk HAVE "${1}" gnu_strip enabled msvs && echo "CONFIG_VS_VERSION=${vs_version}" >> "${1}" CC="${saved_CC}" CXX="${saved_CXX}" } write_common_target_config_h() { print_webm_license ${TMP_H} "/*" " */" cat >> ${TMP_H} << EOF /* This file automatically generated by configure. Do not edit! */ #ifndef VPX_CONFIG_H #define VPX_CONFIG_H #define RESTRICT ${RESTRICT} #define INLINE ${INLINE} EOF print_config_h VPX_ARCH "${TMP_H}" ${ARCH_LIST} print_config_h HAVE "${TMP_H}" ${HAVE_LIST} print_config_h CONFIG "${TMP_H}" ${CONFIG_LIST} print_config_vars_h "${TMP_H}" ${VAR_LIST} echo "#endif /* VPX_CONFIG_H */" >> ${TMP_H} mkdir -p `dirname "$1"` cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1" } write_win_arm64_neon_h_workaround() { print_webm_license ${TMP_H} "/*" " */" cat >> ${TMP_H} << EOF /* This file automatically generated by configure. Do not edit! */ #ifndef VPX_WIN_ARM_NEON_H_WORKAROUND #define VPX_WIN_ARM_NEON_H_WORKAROUND /* The Windows SDK has arm_neon.h, but unlike on other platforms it is * ARM32-only. ARM64 NEON support is provided by arm64_neon.h, a proper * superset of arm_neon.h. Work around this by providing a more local * arm_neon.h that simply #includes arm64_neon.h. */ #include #endif /* VPX_WIN_ARM_NEON_H_WORKAROUND */ EOF mkdir -p `dirname "$1"` cmp "$1" ${TMP_H} >/dev/null 2>&1 || mv ${TMP_H} "$1" } process_common_cmdline() { for opt in "$@"; do optval="${opt#*=}" case "$opt" in --child) enable_feature child ;; --log*) logging="$optval" if ! disabled logging ; then enabled logging || logfile="$logging" else logfile=/dev/null fi ;; --target=*) toolchain="${toolchain:-${optval}}" ;; --force-target=*) toolchain="${toolchain:-${optval}}" enable_feature force_toolchain ;; --cpu=*) tune_cpu="$optval" ;; --extra-cflags=*) extra_cflags="${optval}" ;; --extra-cxxflags=*) extra_cxxflags="${optval}" ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if is_in ${option} ${ARCH_EXT_LIST}; then [ $action = "disable" ] && RTCD_OPTIONS="${RTCD_OPTIONS}--disable-${option} " elif [ $action = "disable" ] && ! disabled $option ; then is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt log_echo " disabling $option" elif [ $action = "enable" ] && ! enabled $option ; then is_in ${option} ${CMDLINE_SELECT} || die_unknown $opt log_echo " enabling $option" fi ${action}_feature $option ;; --require-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if is_in ${option} ${ARCH_EXT_LIST}; then RTCD_OPTIONS="${RTCD_OPTIONS}${opt} " else die_unknown $opt fi ;; --force-enable-?*|--force-disable-?*) eval `echo "$opt" | sed 's/--force-/action=/;s/-/ option=/;s/-/_/g'` ${action}_feature $option ;; --libc=*) [ -d "${optval}" ] || die "Not a directory: ${optval}" disable_feature builtin_libc alt_libc="${optval}" ;; --as=*) [ "${optval}" = yasm ] || [ "${optval}" = nasm ] \ || [ "${optval}" = auto ] \ || die "Must be yasm, nasm or auto: ${optval}" alt_as="${optval}" ;; --size-limit=*) w="${optval%%x*}" h="${optval##*x}" VAR_LIST="DECODE_WIDTH_LIMIT ${w} DECODE_HEIGHT_LIMIT ${h}" [ ${w} -gt 0 ] && [ ${h} -gt 0 ] || die "Invalid size-limit: too small." [ ${w} -lt 65536 ] && [ ${h} -lt 65536 ] \ || die "Invalid size-limit: too big." enable_feature size_limit ;; --prefix=*) prefix="${optval}" ;; --libdir=*) libdir="${optval}" ;; --libc|--as|--prefix|--libdir) die "Option ${opt} requires argument" ;; --help|-h) show_help ;; *) die_unknown $opt ;; esac done } process_cmdline() { for opt do optval="${opt#*=}" case "$opt" in *) process_common_cmdline $opt ;; esac done } post_process_common_cmdline() { prefix="${prefix:-/usr/local}" prefix="${prefix%/}" libdir="${libdir:-${prefix}/lib}" libdir="${libdir%/}" if [ "${libdir#${prefix}}" = "${libdir}" ]; then die "Libdir ${libdir} must be a subdirectory of ${prefix}" fi } post_process_cmdline() { true; } setup_gnu_toolchain() { CC=${CC:-${CROSS}gcc} CXX=${CXX:-${CROSS}g++} AR=${AR:-${CROSS}ar} LD=${LD:-${CROSS}${link_with_cc:-ld}} AS=${AS:-${CROSS}as} STRIP=${STRIP:-${CROSS}strip} NM=${NM:-${CROSS}nm} AS_SFX=.S EXE_SFX= } # Reliably find the newest available Darwin SDKs. (Older versions of # xcrun don't support --show-sdk-path.) show_darwin_sdk_path() { xcrun --sdk $1 --show-sdk-path 2>/dev/null || xcodebuild -sdk $1 -version Path 2>/dev/null } # Print the major version number of the Darwin SDK specified by $1. show_darwin_sdk_major_version() { xcrun --sdk $1 --show-sdk-version 2>/dev/null | cut -d. -f1 } # Print the Xcode version. show_xcode_version() { xcodebuild -version | head -n1 | cut -d' ' -f2 } # Fails when Xcode version is less than 6.3. check_xcode_minimum_version() { xcode_major=$(show_xcode_version | cut -f1 -d.) xcode_minor=$(show_xcode_version | cut -f2 -d.) xcode_min_major=6 xcode_min_minor=3 if [ ${xcode_major} -lt ${xcode_min_major} ]; then return 1 fi if [ ${xcode_major} -eq ${xcode_min_major} ] \ && [ ${xcode_minor} -lt ${xcode_min_minor} ]; then return 1 fi } process_common_toolchain() { if [ -z "$toolchain" ]; then gcctarget="${CHOST:-$(gcc -dumpmachine 2> /dev/null)}" # detect tgt_isa case "$gcctarget" in aarch64*) tgt_isa=arm64 ;; armv7*-hardfloat* | armv7*-gnueabihf | arm-*-gnueabihf) tgt_isa=armv7 float_abi=hard ;; armv7*) tgt_isa=armv7 float_abi=softfp ;; *x86_64*|*amd64*) tgt_isa=x86_64 ;; *i[3456]86*) tgt_isa=x86 ;; *sparc*) tgt_isa=sparc ;; power*64le*-*) tgt_isa=ppc64le ;; *mips64el*) tgt_isa=mips64 ;; *mips32el*) tgt_isa=mips32 ;; esac # detect tgt_os case "$gcctarget" in *darwin1[0-8]*) tgt_isa=x86_64 tgt_os=`echo $gcctarget | sed 's/.*\(darwin1[0-8]\).*/\1/'` ;; x86_64*mingw32*) tgt_os=win64 ;; x86_64*cygwin*) tgt_os=win64 ;; *mingw32*|*cygwin*) [ -z "$tgt_isa" ] && tgt_isa=x86 tgt_os=win32 ;; *linux*|*bsd*) tgt_os=linux ;; *solaris2.10) tgt_os=solaris ;; *os2*) tgt_os=os2 ;; esac if [ -n "$tgt_isa" ] && [ -n "$tgt_os" ]; then toolchain=${tgt_isa}-${tgt_os}-gcc fi fi toolchain=${toolchain:-generic-gnu} is_in ${toolchain} ${all_platforms} || enabled force_toolchain \ || die "Unrecognized toolchain '${toolchain}'" enabled child || log_echo "Configuring for target '${toolchain}'" # # Set up toolchain variables # tgt_isa=$(echo ${toolchain} | awk 'BEGIN{FS="-"}{print $1}') tgt_os=$(echo ${toolchain} | awk 'BEGIN{FS="-"}{print $2}') tgt_cc=$(echo ${toolchain} | awk 'BEGIN{FS="-"}{print $3}') # Mark the specific ISA requested as enabled soft_enable ${tgt_isa} enable_feature ${tgt_os} enable_feature ${tgt_cc} # Enable the architecture family case ${tgt_isa} in arm*) enable_feature arm ;; mips*) enable_feature mips ;; ppc*) enable_feature ppc ;; esac # PIC is probably what we want when building shared libs enabled shared && soft_enable pic # Minimum iOS version for all target platforms (darwin and iphonesimulator). # Shared library framework builds are only possible on iOS 8 and later. if enabled shared; then IOS_VERSION_OPTIONS="--enable-shared" IOS_VERSION_MIN="8.0" else IOS_VERSION_OPTIONS="" IOS_VERSION_MIN="7.0" fi # Handle darwin variants. Newer SDKs allow targeting older # platforms, so use the newest one available. case ${toolchain} in arm*-darwin*) add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" iphoneos_sdk_dir="$(show_darwin_sdk_path iphoneos)" if [ -d "${iphoneos_sdk_dir}" ]; then add_cflags "-isysroot ${iphoneos_sdk_dir}" add_ldflags "-isysroot ${iphoneos_sdk_dir}" fi ;; x86*-darwin*) osx_sdk_dir="$(show_darwin_sdk_path macosx)" if [ -d "${osx_sdk_dir}" ]; then add_cflags "-isysroot ${osx_sdk_dir}" add_ldflags "-isysroot ${osx_sdk_dir}" fi ;; esac case ${toolchain} in *-darwin8-*) add_cflags "-mmacosx-version-min=10.4" add_ldflags "-mmacosx-version-min=10.4" ;; *-darwin9-*) add_cflags "-mmacosx-version-min=10.5" add_ldflags "-mmacosx-version-min=10.5" ;; *-darwin10-*) add_cflags "-mmacosx-version-min=10.6" add_ldflags "-mmacosx-version-min=10.6" ;; *-darwin11-*) add_cflags "-mmacosx-version-min=10.7" add_ldflags "-mmacosx-version-min=10.7" ;; *-darwin12-*) add_cflags "-mmacosx-version-min=10.8" add_ldflags "-mmacosx-version-min=10.8" ;; *-darwin13-*) add_cflags "-mmacosx-version-min=10.9" add_ldflags "-mmacosx-version-min=10.9" ;; *-darwin14-*) add_cflags "-mmacosx-version-min=10.10" add_ldflags "-mmacosx-version-min=10.10" ;; *-darwin15-*) add_cflags "-mmacosx-version-min=10.11" add_ldflags "-mmacosx-version-min=10.11" ;; *-darwin16-*) add_cflags "-mmacosx-version-min=10.12" add_ldflags "-mmacosx-version-min=10.12" ;; *-darwin17-*) add_cflags "-mmacosx-version-min=10.13" add_ldflags "-mmacosx-version-min=10.13" ;; *-darwin18-*) add_cflags "-mmacosx-version-min=10.14" add_ldflags "-mmacosx-version-min=10.14" ;; *-iphonesimulator-*) add_cflags "-miphoneos-version-min=${IOS_VERSION_MIN}" add_ldflags "-miphoneos-version-min=${IOS_VERSION_MIN}" iossim_sdk_dir="$(show_darwin_sdk_path iphonesimulator)" if [ -d "${iossim_sdk_dir}" ]; then add_cflags "-isysroot ${iossim_sdk_dir}" add_ldflags "-isysroot ${iossim_sdk_dir}" fi ;; esac # Handle Solaris variants. Solaris 10 needs -lposix4 case ${toolchain} in sparc-solaris-*) add_extralibs -lposix4 ;; *-solaris-*) add_extralibs -lposix4 ;; esac # Process ARM architecture variants case ${toolchain} in arm*) # on arm, isa versions are supersets case ${tgt_isa} in arm64|armv8) soft_enable neon ;; armv7|armv7s) soft_enable neon # Only enable neon_asm when neon is also enabled. enabled neon && soft_enable neon_asm # If someone tries to force it through, die. if disabled neon && enabled neon_asm; then die "Disabling neon while keeping neon-asm is not supported" fi ;; esac asm_conversion_cmd="cat" case ${tgt_cc} in gcc) link_with_cc=gcc setup_gnu_toolchain arch_int=${tgt_isa##armv} arch_int=${arch_int%%te} tune_cflags="-mtune=" if [ ${tgt_isa} = "armv7" ] || [ ${tgt_isa} = "armv7s" ]; then if [ -z "${float_abi}" ]; then check_cpp <&- || \ die "Couldn't find CodeSourcery GCC from PATH" # Use armcc as a linker to enable translation of # some gcc specific options such as -lm and -lpthread. LD="armcc --translate_gcc" # create configuration file (uses path to CodeSourcery GCC) armcc --arm_linux_configure --arm_linux_config_file=arm_linux.cfg add_cflags --arm_linux_paths --arm_linux_config_file=arm_linux.cfg add_asflags --no_hide_all --apcs=/interwork add_ldflags --arm_linux_paths --arm_linux_config_file=arm_linux.cfg enabled pic && add_cflags --apcs=/fpic enabled pic && add_asflags --apcs=/fpic enabled shared && add_cflags --shared fi ;; esac ;; mips*) link_with_cc=gcc setup_gnu_toolchain tune_cflags="-mtune=" if enabled dspr2; then check_add_cflags -mips32r2 -mdspr2 fi if enabled runtime_cpu_detect; then disable_feature runtime_cpu_detect fi if [ -n "${tune_cpu}" ]; then case ${tune_cpu} in p5600) check_add_cflags -mips32r5 -mload-store-pairs check_add_cflags -msched-weight -mhard-float -mfp64 check_add_asflags -mips32r5 -mhard-float -mfp64 check_add_ldflags -mfp64 ;; i6400|p6600) check_add_cflags -mips64r6 -mabi=64 -msched-weight check_add_cflags -mload-store-pairs -mhard-float -mfp64 check_add_asflags -mips64r6 -mabi=64 -mhard-float -mfp64 check_add_ldflags -mips64r6 -mabi=64 -mfp64 ;; esac if enabled msa; then # TODO(libyuv:793) # The new mips functions in libyuv do not build # with the toolchains we currently use for testing. soft_disable libyuv add_cflags -mmsa add_asflags -mmsa add_ldflags -mmsa fi fi if enabled mmi; then tgt_isa=loongson3a check_add_ldflags -march=loongson3a fi check_add_cflags -march=${tgt_isa} check_add_asflags -march=${tgt_isa} check_add_asflags -KPIC ;; ppc64le*) link_with_cc=gcc setup_gnu_toolchain # Do not enable vsx by default. # https://bugs.chromium.org/p/webm/issues/detail?id=1522 enabled vsx || RTCD_OPTIONS="${RTCD_OPTIONS}--disable-vsx " if [ -n "${tune_cpu}" ]; then case ${tune_cpu} in power?) tune_cflags="-mcpu=" ;; esac fi ;; x86*) case ${tgt_os} in android) soft_enable realtime_only ;; win*) enabled gcc && add_cflags -fno-common ;; solaris*) CC=${CC:-${CROSS}gcc} CXX=${CXX:-${CROSS}g++} LD=${LD:-${CROSS}gcc} CROSS=${CROSS-g} ;; os2) disable_feature pic AS=${AS:-nasm} add_ldflags -Zhigh-mem ;; esac AS="${alt_as:-${AS:-auto}}" case ${tgt_cc} in icc*) CC=${CC:-icc} LD=${LD:-icc} setup_gnu_toolchain add_cflags -use-msasm # remove -use-msasm too? # add -no-intel-extensions to suppress warning #10237 # refer to http://software.intel.com/en-us/forums/topic/280199 add_ldflags -i-static -no-intel-extensions enabled x86_64 && add_cflags -ipo -static -O3 -no-prec-div enabled x86_64 && AR=xiar case ${tune_cpu} in atom*) tune_cflags="-x" tune_cpu="SSE3_ATOM" ;; *) tune_cflags="-march=" ;; esac ;; gcc*) link_with_cc=gcc tune_cflags="-march=" setup_gnu_toolchain #for 32 bit x86 builds, -O3 did not turn on this flag enabled optimizations && disabled gprof && check_add_cflags -fomit-frame-pointer ;; vs*) # When building with Microsoft Visual Studio the assembler is # invoked directly. Checking at configure time is unnecessary. # Skip the check by setting AS arbitrarily AS=msvs msvs_arch_dir=x86-msvs case ${tgt_cc##vs} in 14) echo "${tgt_cc} does not support avx512, disabling....." RTCD_OPTIONS="${RTCD_OPTIONS}--disable-avx512 " soft_disable avx512 ;; esac ;; esac bits=32 enabled x86_64 && bits=64 check_cpp < sse4 check_gcc_machine_option ${ext%_*} $ext fi fi done if enabled external_build; then log_echo " skipping assembler detection" else case "${AS}" in auto|"") which nasm >/dev/null 2>&1 && AS=nasm which yasm >/dev/null 2>&1 && AS=yasm if [ "${AS}" = nasm ] ; then # Apple ships version 0.98 of nasm through at least Xcode 6. Revisit # this check if they start shipping a compatible version. apple=`nasm -v | grep "Apple"` [ -n "${apple}" ] \ && echo "Unsupported version of nasm: ${apple}" \ && AS="" fi [ "${AS}" = auto ] || [ -z "${AS}" ] \ && die "Neither yasm nor nasm have been found." \ "See the prerequisites section in the README for more info." ;; esac log_echo " using $AS" fi AS_SFX=.asm case ${tgt_os} in win32) add_asflags -f win32 enabled debug && add_asflags -g cv8 EXE_SFX=.exe ;; win64) add_asflags -f win64 enabled debug && add_asflags -g cv8 EXE_SFX=.exe ;; linux*|solaris*|android*) add_asflags -f elf${bits} enabled debug && [ "${AS}" = yasm ] && add_asflags -g dwarf2 enabled debug && [ "${AS}" = nasm ] && add_asflags -g [ "${AS##*/}" = nasm ] && check_asm_align ;; darwin*) add_asflags -f macho${bits} enabled x86 && darwin_arch="-arch i386" || darwin_arch="-arch x86_64" add_cflags ${darwin_arch} add_ldflags ${darwin_arch} # -mdynamic-no-pic is still a bit of voodoo -- it was required at # one time, but does not seem to be now, and it breaks some of the # code that still relies on inline assembly. # enabled icc && ! enabled pic && add_cflags -fno-pic -mdynamic-no-pic enabled icc && ! enabled pic && add_cflags -fno-pic ;; iphonesimulator) add_asflags -f macho${bits} enabled x86 && sim_arch="-arch i386" || sim_arch="-arch x86_64" add_cflags ${sim_arch} add_ldflags ${sim_arch} if [ "$(disabled external_build)" ] && [ "$(show_darwin_sdk_major_version iphonesimulator)" -gt 8 ]; then # yasm v1.3.0 doesn't know what -fembed-bitcode means, so turning it # on is pointless (unless building a C-only lib). Warn the user, but # do nothing here. log "Warning: Bitcode embed disabled for simulator targets." fi ;; os2) add_asflags -f aout enabled debug && add_asflags -g EXE_SFX=.exe ;; *) log "Warning: Unknown os $tgt_os while setting up $AS flags" ;; esac ;; *-gcc|generic-gnu) link_with_cc=gcc enable_feature gcc setup_gnu_toolchain ;; esac # Try to enable CPU specific tuning if [ -n "${tune_cpu}" ]; then if [ -n "${tune_cflags}" ]; then check_add_cflags ${tune_cflags}${tune_cpu} || \ die "Requested CPU '${tune_cpu}' not supported by compiler" fi if [ -n "${tune_asflags}" ]; then check_add_asflags ${tune_asflags}${tune_cpu} || \ die "Requested CPU '${tune_cpu}' not supported by assembler" fi if [ -z "${tune_cflags}${tune_asflags}" ]; then log_echo "Warning: CPU tuning not supported by this toolchain" fi fi if enabled debug; then check_add_cflags -g && check_add_ldflags -g else check_add_cflags -DNDEBUG fi enabled gprof && check_add_cflags -pg && check_add_ldflags -pg enabled gcov && check_add_cflags -fprofile-arcs -ftest-coverage && check_add_ldflags -fprofile-arcs -ftest-coverage if enabled optimizations; then if enabled rvct; then enabled small && check_add_cflags -Ospace || check_add_cflags -Otime else enabled small && check_add_cflags -O2 || check_add_cflags -O3 fi fi # Position Independent Code (PIC) support, for building relocatable # shared objects enabled gcc && enabled pic && check_add_cflags -fPIC # Work around longjmp interception on glibc >= 2.11, to improve binary # compatibility. See http://code.google.com/p/webm/issues/detail?id=166 enabled linux && check_add_cflags -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=0 # Check for strip utility variant ${STRIP} -V 2>/dev/null | grep GNU >/dev/null && enable_feature gnu_strip # Try to determine target endianness check_cc </dev/null 2>&1 && enable_feature big_endian # Try to find which inline keywords are supported check_cc < #include int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } EOF ;; esac fi # only for MIPS platforms case ${toolchain} in mips*) if enabled big_endian; then if enabled dspr2; then echo "dspr2 optimizations are available only for little endian platforms" disable_feature dspr2 fi if enabled msa; then echo "msa optimizations are available only for little endian platforms" disable_feature msa fi if enabled mmi; then echo "mmi optimizations are available only for little endian platforms" disable_feature mmi fi fi ;; esac # glibc needs these if enabled linux; then add_cflags -D_LARGEFILE_SOURCE add_cflags -D_FILE_OFFSET_BITS=64 fi } process_toolchain() { process_common_toolchain } print_config_mk() { saved_prefix="${prefix}" prefix=$1 makefile=$2 shift 2 for cfg; do if enabled $cfg; then upname="`toupper $cfg`" echo "${prefix}_${upname}=yes" >> $makefile fi done prefix="${saved_prefix}" } print_config_h() { saved_prefix="${prefix}" prefix=$1 header=$2 shift 2 for cfg; do upname="`toupper $cfg`" if enabled $cfg; then echo "#define ${prefix}_${upname} 1" >> $header else echo "#define ${prefix}_${upname} 0" >> $header fi done prefix="${saved_prefix}" } print_config_vars_h() { header=$1 shift while [ $# -gt 0 ]; do upname="`toupper $1`" echo "#define ${upname} $2" >> $header shift 2 done } print_webm_license() { saved_prefix="${prefix}" destination=$1 prefix="$2" suffix="$3" shift 3 cat < ${destination} ${prefix} Copyright (c) 2011 The WebM project authors. All Rights Reserved.${suffix} ${prefix} ${suffix} ${prefix} Use of this source code is governed by a BSD-style license${suffix} ${prefix} that can be found in the LICENSE file in the root of the source${suffix} ${prefix} tree. An additional intellectual property rights grant can be found${suffix} ${prefix} in the file PATENTS. All contributing project authors may${suffix} ${prefix} be found in the AUTHORS file in the root of the source tree.${suffix} EOF prefix="${saved_prefix}" } process_targets() { true; } process_detect() { true; } enable_feature logging logfile="config.log" self=$0 process() { cmdline_args="$@" process_cmdline "$@" if enabled child; then echo "# ${self} $@" >> ${logfile} else echo "# ${self} $@" > ${logfile} fi post_process_common_cmdline post_process_cmdline process_toolchain process_detect process_targets OOT_INSTALLS="${OOT_INSTALLS}" if enabled source_path_used; then # Prepare the PWD for building. for f in ${OOT_INSTALLS}; do install -D "${source_path}/$f" "$f" done fi cp "${source_path}/build/make/Makefile" . clean_temp_files true } libvpx-1.8.2/build/make/gen_asm_deps.sh000077500000000000000000000036361357355204000200410ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## self=$0 show_help() { echo "usage: $self [options] " echo echo "Generate Makefile dependency information from assembly code source" echo exit 1 } die_unknown(){ echo "Unknown option \"$1\"." echo "See $0 --help for available options." exit 1 } for opt do optval="${opt#*=}" case "$opt" in --build-pfx=*) pfx="${optval}" ;; --depfile=*) out="${optval}" ;; -I*) raw_inc_paths="${raw_inc_paths} ${opt}" inc_path="${inc_path} ${opt#-I}" ;; -h|--help) show_help ;; *) [ -f "$opt" ] && srcfile="$opt" ;; esac done [ -n "$srcfile" ] || show_help sfx=${sfx:-asm} includes=$(LC_ALL=C egrep -i "include +\"?[a-z0-9_/]+\.${sfx}" $srcfile | perl -p -e "s;.*?([a-z0-9_/]+.${sfx}).*;\1;") #" restore editor state for inc in ${includes}; do found_inc_path= for idir in ${inc_path}; do [ -f "${idir}/${inc}" ] && found_inc_path="${idir}" && break done if [ -f `dirname $srcfile`/$inc ]; then # Handle include files in the same directory as the source $self --build-pfx=$pfx --depfile=$out ${raw_inc_paths} `dirname $srcfile`/$inc elif [ -n "${found_inc_path}" ]; then # Handle include files on the include path $self --build-pfx=$pfx --depfile=$out ${raw_inc_paths} "${found_inc_path}/$inc" else # Handle generated includes in the build root (which may not exist yet) echo ${out} ${out%d}o: "${pfx}${inc}" fi done echo ${out} ${out%d}o: $srcfile libvpx-1.8.2/build/make/gen_msvs_def.sh000077500000000000000000000036001357355204000200430ustar00rootroot00000000000000#!/bin/bash ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## self=$0 self_basename=${self##*/} EOL=$'\n' show_help() { cat < symbol1 [symbol2, symbol3, ...] where is either 'text' or 'data' Options: --help Print this message --out=filename Write output to a file [stdout] --name=project_name Name of the library (required) EOF exit 1 } die() { echo "${self_basename}: $@" exit 1 } die_unknown(){ echo "Unknown option \"$1\"." echo "See ${self_basename} --help for available options." exit 1 } text() { for sym in "$@"; do echo " $sym" >> ${outfile} done } data() { for sym in "$@"; do printf " %-40s DATA\n" "$sym" >> ${outfile} done } # Process command line for opt in "$@"; do optval="${opt#*=}" case "$opt" in --help|-h) show_help ;; --out=*) outfile="$optval" ;; --name=*) name="${optval}" ;; -*) die_unknown $opt ;; *) file_list[${#file_list[@]}]="$opt" esac done outfile=${outfile:-/dev/stdout} [ -n "$name" ] || die "Library name (--name) must be specified!" echo "LIBRARY ${name}" > ${outfile} echo "EXPORTS" >> ${outfile} for f in "${file_list[@]}"; do . $f done libvpx-1.8.2/build/make/gen_msvs_sln.sh000077500000000000000000000155511357355204000201110ustar00rootroot00000000000000#!/bin/bash ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## self=$0 self_basename=${self##*/} EOL=$'\n' EOLDOS=$'\r' show_help() { cat <&2 [ -f "${outfile}" ] && rm -f ${outfile}{,.mk} exit 1 } die_unknown(){ echo "Unknown option \"$1\"." >&2 echo "See ${self_basename} --help for available options." >&2 [ -f "${outfile}" ] && rm -f ${outfile}{,.mk} exit 1 } indent1=$'\t' indent="" indent_push() { indent="${indent}${indent1}" } indent_pop() { indent="${indent%${indent1}}" } parse_project() { local file=$1 local name=`grep RootNamespace "$file" | sed 's,.*<.*>\(.*\).*,\1,'` local guid=`grep ProjectGuid "$file" | sed 's,.*<.*>\(.*\).*,\1,'` # save the project GUID to a varaible, normalizing to the basename of the # vcxproj file without the extension local var var=${file##*/} var=${var%%.${sfx}} eval "${var}_file=\"$1\"" eval "${var}_name=$name" eval "${var}_guid=$guid" cur_config_list=`grep -B1 'Label="Configuration"' $file | grep Condition | cut -d\' -f4` new_config_list=$(for i in $config_list $cur_config_list; do echo $i done | sort | uniq) if [ "$config_list" != "" ] && [ "$config_list" != "$new_config_list" ]; then mixed_platforms=1 fi config_list="$new_config_list" eval "${var}_config_list=\"$cur_config_list\"" proj_list="${proj_list} ${var}" } process_project() { eval "local file=\${$1_file}" eval "local name=\${$1_name}" eval "local guid=\${$1_guid}" # save the project GUID to a varaible, normalizing to the basename of the # vcproj file without the extension local var var=${file##*/} var=${var%%.${sfx}} eval "${var}_guid=$guid" echo "Project(\"{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}\") = \"$name\", \"$file\", \"$guid\"" echo "EndProject" } process_global() { echo "Global" indent_push # # Solution Configuration Platforms # echo "${indent}GlobalSection(SolutionConfigurationPlatforms) = preSolution" indent_push IFS_bak=${IFS} IFS=$'\r'$'\n' if [ "$mixed_platforms" != "" ]; then config_list=" Release|Mixed Platforms Debug|Mixed Platforms" fi for config in ${config_list}; do echo "${indent}$config = $config" done IFS=${IFS_bak} indent_pop echo "${indent}EndGlobalSection" # # Project Configuration Platforms # echo "${indent}GlobalSection(ProjectConfigurationPlatforms) = postSolution" indent_push for proj in ${proj_list}; do eval "local proj_guid=\${${proj}_guid}" eval "local proj_config_list=\${${proj}_config_list}" IFS=$'\r'$'\n' for config in ${proj_config_list}; do if [ "$mixed_platforms" != "" ]; then local c=${config%%|*} echo "${indent}${proj_guid}.${c}|Mixed Platforms.ActiveCfg = ${config}" echo "${indent}${proj_guid}.${c}|Mixed Platforms.Build.0 = ${config}" else echo "${indent}${proj_guid}.${config}.ActiveCfg = ${config}" echo "${indent}${proj_guid}.${config}.Build.0 = ${config}" fi done IFS=${IFS_bak} done indent_pop echo "${indent}EndGlobalSection" # # Solution Properties # echo "${indent}GlobalSection(SolutionProperties) = preSolution" indent_push echo "${indent}HideSolutionNode = FALSE" indent_pop echo "${indent}EndGlobalSection" indent_pop echo "EndGlobal" } process_makefile() { IFS_bak=${IFS} IFS=$'\r'$'\n' local TAB=$'\t' cat </dev/null 2>&1 && echo yes) .nodevenv.once: ${TAB}@echo " * \$(MSBUILD_TOOL) not found in path." ${TAB}@echo " * " ${TAB}@echo " * You will have to build all configurations manually using the" ${TAB}@echo " * Visual Studio IDE. To allow make to build them automatically," ${TAB}@echo " * add the Common7/IDE directory of your Visual Studio" ${TAB}@echo " * installation to your path, eg:" ${TAB}@echo " * C:\Program Files\Microsoft Visual Studio 10.0\Common7\IDE" ${TAB}@echo " * " ${TAB}@touch \$@ CLEAN-OBJS += \$(if \$(found_devenv),,.nodevenv.once) EOF for sln_config in ${config_list}; do local config=${sln_config%%|*} local platform=${sln_config##*|} local nows_sln_config=`echo $sln_config | sed -e 's/[^a-zA-Z0-9]/_/g'` cat <${outfile} <>${outfile} done process_global >>${outfile} process_makefile >${mkoutfile} libvpx-1.8.2/build/make/gen_msvs_vcxproj.sh000077500000000000000000000416231357355204000210070ustar00rootroot00000000000000#!/bin/bash ## ## Copyright (c) 2013 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## self=$0 self_basename=${self##*/} self_dirname=$(dirname "$0") . "$self_dirname/msvs_common.sh"|| exit 127 show_help() { cat <${content}" indent_pop else echo "${indent}<${tag}>${content}" fi } generate_filter() { local name=$1 local pats=$2 local file_list_sz local i local f local saveIFS="$IFS" local pack echo "generating filter '$name' from ${#file_list[@]} files" >&2 IFS=* file_list_sz=${#file_list[@]} for i in ${!file_list[@]}; do f=${file_list[i]} for pat in ${pats//;/$IFS}; do if [ "${f##*.}" == "$pat" ]; then unset file_list[i] objf=$(echo ${f%.*}.obj \ | sed -e "s,$src_path_bare,," \ -e 's/^[\./]\+//g' -e 's,[:/ ],_,g') if ([ "$pat" == "asm" ] || [ "$pat" == "s" ] || [ "$pat" == "S" ]) && $uses_asm; then # Avoid object file name collisions, i.e. vpx_config.c and # vpx_config.asm produce the same object file without # this additional suffix. objf=${objf%.obj}_asm.obj open_tag CustomBuild \ Include="$f" for plat in "${platforms[@]}"; do for cfg in Debug Release; do tag_content Message "Assembling %(Filename)%(Extension)" \ Condition="'\$(Configuration)|\$(Platform)'=='$cfg|$plat'" tag_content Command "$(eval echo \$asm_${cfg}_cmdline) -o \$(IntDir)$objf" \ Condition="'\$(Configuration)|\$(Platform)'=='$cfg|$plat'" tag_content Outputs "\$(IntDir)$objf" \ Condition="'\$(Configuration)|\$(Platform)'=='$cfg|$plat'" done done close_tag CustomBuild elif [ "$pat" == "c" ] || \ [ "$pat" == "cc" ] || [ "$pat" == "cpp" ]; then open_tag ClCompile \ Include="$f" # Separate file names with Condition? tag_content ObjectFileName "\$(IntDir)$objf" # Check for AVX and turn it on to avoid warnings. if [[ $f =~ avx.?\.c$ ]]; then tag_content AdditionalOptions "/arch:AVX" fi close_tag ClCompile elif [ "$pat" == "h" ] ; then tag ClInclude \ Include="$f" elif [ "$pat" == "vcxproj" ] ; then open_tag ProjectReference \ Include="$f" depguid=`grep ProjectGuid "$f" | sed 's,.*<.*>\(.*\).*,\1,'` tag_content Project "$depguid" tag_content ReferenceOutputAssembly false close_tag ProjectReference else tag None \ Include="$f" fi break fi done done IFS="$saveIFS" } # Process command line unset target for opt in "$@"; do optval="${opt#*=}" case "$opt" in --help|-h) show_help ;; --target=*) target="${optval}" ;; --out=*) outfile="$optval" ;; --name=*) name="${optval}" ;; --proj-guid=*) guid="${optval}" ;; --module-def=*) module_def="${optval}" ;; --exe) proj_kind="exe" ;; --dll) proj_kind="dll" ;; --lib) proj_kind="lib" ;; --src-path-bare=*) src_path_bare=$(fix_path "$optval") src_path_bare=${src_path_bare%/} ;; --static-crt) use_static_runtime=true ;; --enable-werror) werror=true ;; --ver=*) vs_ver="$optval" case "$optval" in 1[4-6]) ;; *) die Unrecognized Visual Studio Version in $opt ;; esac ;; -I*) opt=${opt##-I} opt=$(fix_path "$opt") opt="${opt%/}" incs="${incs}${incs:+;}"${opt}"" yasmincs="${yasmincs} -I"${opt}"" ;; -D*) defines="${defines}${defines:+;}${opt##-D}" ;; -L*) # fudge . to $(OutDir) if [ "${opt##-L}" == "." ]; then libdirs="${libdirs}${libdirs:+;}"\$(OutDir)"" else # Also try directories for this platform/configuration opt=${opt##-L} opt=$(fix_path "$opt") libdirs="${libdirs}${libdirs:+;}"${opt}"" libdirs="${libdirs}${libdirs:+;}"${opt}/\$(PlatformName)/\$(Configuration)"" libdirs="${libdirs}${libdirs:+;}"${opt}/\$(PlatformName)"" fi ;; -l*) libs="${libs}${libs:+ }${opt##-l}.lib" ;; -*) die_unknown $opt ;; *) # The paths in file_list are fixed outside of the loop. file_list[${#file_list[@]}]="$opt" case "$opt" in *.asm|*.[Ss]) uses_asm=true ;; esac ;; esac done # Make one call to fix_path for file_list to improve performance. fix_file_list file_list outfile=${outfile:-/dev/stdout} guid=${guid:-`generate_uuid`} uses_asm=${uses_asm:-false} [ -n "$name" ] || die "Project name (--name) must be specified!" [ -n "$target" ] || die "Target (--target) must be specified!" if ${use_static_runtime:-false}; then release_runtime=MultiThreaded debug_runtime=MultiThreadedDebug lib_sfx=mt else release_runtime=MultiThreadedDLL debug_runtime=MultiThreadedDebugDLL lib_sfx=md fi # Calculate debug lib names: If a lib ends in ${lib_sfx}.lib, then rename # it to ${lib_sfx}d.lib. This precludes linking to release libs from a # debug exe, so this may need to be refactored later. for lib in ${libs}; do if [ "$lib" != "${lib%${lib_sfx}.lib}" ]; then lib=${lib%.lib}d.lib fi debug_libs="${debug_libs}${debug_libs:+ }${lib}" done debug_libs=${debug_libs// /;} libs=${libs// /;} # List of all platforms supported for this target case "$target" in x86_64*) platforms[0]="x64" asm_Debug_cmdline="yasm -Xvc -g cv8 -f win64 ${yasmincs} "%(FullPath)"" asm_Release_cmdline="yasm -Xvc -f win64 ${yasmincs} "%(FullPath)"" ;; x86*) platforms[0]="Win32" asm_Debug_cmdline="yasm -Xvc -g cv8 -f win32 ${yasmincs} "%(FullPath)"" asm_Release_cmdline="yasm -Xvc -f win32 ${yasmincs} "%(FullPath)"" ;; arm64*) platforms[0]="ARM64" asm_Debug_cmdline="armasm64 -nologo -oldit "%(FullPath)"" asm_Release_cmdline="armasm64 -nologo -oldit "%(FullPath)"" ;; arm*) platforms[0]="ARM" asm_Debug_cmdline="armasm -nologo -oldit "%(FullPath)"" asm_Release_cmdline="armasm -nologo -oldit "%(FullPath)"" ;; *) die "Unsupported target $target!" ;; esac generate_vcxproj() { echo "" open_tag Project \ DefaultTargets="Build" \ ToolsVersion="4.0" \ xmlns="http://schemas.microsoft.com/developer/msbuild/2003" \ open_tag ItemGroup \ Label="ProjectConfigurations" for plat in "${platforms[@]}"; do for config in Debug Release; do open_tag ProjectConfiguration \ Include="$config|$plat" tag_content Configuration $config tag_content Platform $plat close_tag ProjectConfiguration done done close_tag ItemGroup open_tag PropertyGroup \ Label="Globals" tag_content ProjectGuid "{${guid}}" tag_content RootNamespace ${name} tag_content Keyword ManagedCProj if [ $vs_ver -ge 12 ] && [ "${platforms[0]}" = "ARM" ]; then tag_content AppContainerApplication true # The application type can be one of "Windows Store", # "Windows Phone" or "Windows Phone Silverlight". The # actual value doesn't matter from the libvpx point of view, # since a static library built for one works on the others. # The PlatformToolset field needs to be set in sync with this; # for Windows Store and Windows Phone Silverlight it should be # v120 while it should be v120_wp81 if the type is Windows Phone. tag_content ApplicationType "Windows Store" tag_content ApplicationTypeRevision 8.1 fi if [ "${platforms[0]}" = "ARM64" ]; then # Require the first Visual Studio version to have ARM64 support. tag_content MinimumVisualStudioVersion 15.9 fi if [ $vs_ver -eq 15 ] && [ "${platforms[0]}" = "ARM64" ]; then # Since VS 15 does not have a 'use latest SDK version' facility, # specifically require the contemporaneous SDK with official ARM64 # support. tag_content WindowsTargetPlatformVersion 10.0.17763.0 fi close_tag PropertyGroup tag Import \ Project="\$(VCTargetsPath)\\Microsoft.Cpp.Default.props" for plat in "${platforms[@]}"; do for config in Release Debug; do open_tag PropertyGroup \ Condition="'\$(Configuration)|\$(Platform)'=='$config|$plat'" \ Label="Configuration" if [ "$proj_kind" = "exe" ]; then tag_content ConfigurationType Application elif [ "$proj_kind" = "dll" ]; then tag_content ConfigurationType DynamicLibrary else tag_content ConfigurationType StaticLibrary fi if [ "$vs_ver" = "14" ]; then tag_content PlatformToolset v140 fi if [ "$vs_ver" = "15" ]; then tag_content PlatformToolset v141 fi if [ "$vs_ver" = "16" ]; then tag_content PlatformToolset v142 fi tag_content CharacterSet Unicode if [ "$config" = "Release" ]; then tag_content WholeProgramOptimization true fi close_tag PropertyGroup done done tag Import \ Project="\$(VCTargetsPath)\\Microsoft.Cpp.props" open_tag ImportGroup \ Label="PropertySheets" tag Import \ Project="\$(UserRootDir)\\Microsoft.Cpp.\$(Platform).user.props" \ Condition="exists('\$(UserRootDir)\\Microsoft.Cpp.\$(Platform).user.props')" \ Label="LocalAppDataPlatform" close_tag ImportGroup tag PropertyGroup \ Label="UserMacros" for plat in "${platforms[@]}"; do plat_no_ws=`echo $plat | sed 's/[^A-Za-z0-9_]/_/g'` for config in Debug Release; do open_tag PropertyGroup \ Condition="'\$(Configuration)|\$(Platform)'=='$config|$plat'" tag_content OutDir "\$(SolutionDir)$plat_no_ws\\\$(Configuration)\\" tag_content IntDir "$plat_no_ws\\\$(Configuration)\\${name}\\" if [ "$proj_kind" == "lib" ]; then if [ "$config" == "Debug" ]; then config_suffix=d else config_suffix="" fi tag_content TargetName "${name}${lib_sfx}${config_suffix}" fi close_tag PropertyGroup done done for plat in "${platforms[@]}"; do for config in Debug Release; do open_tag ItemDefinitionGroup \ Condition="'\$(Configuration)|\$(Platform)'=='$config|$plat'" if [ "$name" == "vpx" ]; then hostplat=$plat if [ "$hostplat" == "ARM" ]; then hostplat=Win32 fi fi open_tag ClCompile if [ "$config" = "Debug" ]; then opt=Disabled runtime=$debug_runtime curlibs=$debug_libs debug=_DEBUG else opt=MaxSpeed runtime=$release_runtime curlibs=$libs tag_content FavorSizeOrSpeed Speed debug=NDEBUG fi extradefines=";$defines" tag_content Optimization $opt tag_content AdditionalIncludeDirectories "$incs;%(AdditionalIncludeDirectories)" tag_content PreprocessorDefinitions "WIN32;$debug;_CRT_SECURE_NO_WARNINGS;_CRT_SECURE_NO_DEPRECATE$extradefines;%(PreprocessorDefinitions)" tag_content RuntimeLibrary $runtime tag_content WarningLevel Level3 if ${werror:-false}; then tag_content TreatWarningAsError true fi if [ $vs_ver -ge 11 ]; then # We need to override the defaults for these settings # if AppContainerApplication is set. tag_content CompileAsWinRT false tag_content PrecompiledHeader NotUsing tag_content SDLCheck false fi close_tag ClCompile case "$proj_kind" in exe) open_tag Link tag_content GenerateDebugInformation true # Console is the default normally, but if # AppContainerApplication is set, we need to override it. tag_content SubSystem Console close_tag Link ;; dll) open_tag Link tag_content GenerateDebugInformation true tag_content ModuleDefinitionFile $module_def close_tag Link ;; lib) ;; esac close_tag ItemDefinitionGroup done done open_tag ItemGroup generate_filter "Source Files" "c;cc;cpp;def;odl;idl;hpj;bat;asm;asmx;s;S" close_tag ItemGroup open_tag ItemGroup generate_filter "Header Files" "h;hm;inl;inc;xsd" close_tag ItemGroup open_tag ItemGroup generate_filter "Build Files" "mk" close_tag ItemGroup open_tag ItemGroup generate_filter "References" "vcxproj" close_tag ItemGroup tag Import \ Project="\$(VCTargetsPath)\\Microsoft.Cpp.targets" open_tag ImportGroup \ Label="ExtensionTargets" close_tag ImportGroup close_tag Project # This must be done from within the {} subshell echo "Ignored files list (${#file_list[@]} items) is:" >&2 for f in "${file_list[@]}"; do echo " $f" >&2 done } # This regexp doesn't catch most of the strings in the vcxproj format, # since they're like path instead of # as previously. It still seems to work ok despite this. generate_vcxproj | sed -e '/"/s;\([^ "]\)/;\1\\;g' | sed -e '/xmlns/s;\\;/;g' > ${outfile} exit libvpx-1.8.2/build/make/ios-Info.plist000066400000000000000000000017711357355204000176140ustar00rootroot00000000000000 CFBundleDevelopmentRegion en CFBundleExecutable VPX CFBundleIdentifier org.webmproject.VPX CFBundleInfoDictionaryVersion 6.0 CFBundleName VPX CFBundlePackageType FMWK CFBundleShortVersionString ${VERSION} CFBundleSignature ???? CFBundleSupportedPlatforms iPhoneOS CFBundleVersion ${VERSION} MinimumOSVersion ${IOS_VERSION_MIN} UIDeviceFamily 1 2 VPXFullVersion ${FULLVERSION} libvpx-1.8.2/build/make/iosbuild.sh000077500000000000000000000250131357355204000172200ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## ## This script generates 'VPX.framework'. An iOS app can encode and decode VPx ## video by including 'VPX.framework'. ## ## Run iosbuild.sh to create 'VPX.framework' in the current directory. ## set -e devnull='> /dev/null 2>&1' BUILD_ROOT="_iosbuild" CONFIGURE_ARGS="--disable-docs --disable-examples --disable-libyuv --disable-unit-tests" DIST_DIR="_dist" FRAMEWORK_DIR="VPX.framework" FRAMEWORK_LIB="VPX.framework/VPX" HEADER_DIR="${FRAMEWORK_DIR}/Headers/vpx" SCRIPT_DIR=$(dirname "$0") LIBVPX_SOURCE_DIR=$(cd ${SCRIPT_DIR}/../..; pwd) LIPO=$(xcrun -sdk iphoneos${SDK} -find lipo) ORIG_PWD="$(pwd)" ARM_TARGETS="arm64-darwin-gcc armv7-darwin-gcc armv7s-darwin-gcc" SIM_TARGETS="x86-iphonesimulator-gcc x86_64-iphonesimulator-gcc" OSX_TARGETS="x86-darwin16-gcc x86_64-darwin16-gcc" TARGETS="${ARM_TARGETS} ${SIM_TARGETS}" # Configures for the target specified by $1, and invokes make with the dist # target using $DIST_DIR as the distribution output directory. build_target() { local target="$1" local old_pwd="$(pwd)" local target_specific_flags="" vlog "***Building target: ${target}***" case "${target}" in x86-*) target_specific_flags="--enable-pic" vlog "Enabled PIC for ${target}" ;; esac mkdir "${target}" cd "${target}" eval "${LIBVPX_SOURCE_DIR}/configure" --target="${target}" \ ${CONFIGURE_ARGS} ${EXTRA_CONFIGURE_ARGS} ${target_specific_flags} \ ${devnull} export DIST_DIR eval make dist ${devnull} cd "${old_pwd}" vlog "***Done building target: ${target}***" } # Returns the preprocessor symbol for the target specified by $1. target_to_preproc_symbol() { target="$1" case "${target}" in arm64-*) echo "__aarch64__" ;; armv7-*) echo "__ARM_ARCH_7A__" ;; armv7s-*) echo "__ARM_ARCH_7S__" ;; x86-*) echo "__i386__" ;; x86_64-*) echo "__x86_64__" ;; *) echo "#error ${target} unknown/unsupported" return 1 ;; esac } # Create a vpx_config.h shim that, based on preprocessor settings for the # current target CPU, includes the real vpx_config.h for the current target. # $1 is the list of targets. create_vpx_framework_config_shim() { local targets="$1" local config_file="${HEADER_DIR}/vpx_config.h" local preproc_symbol="" local target="" local include_guard="VPX_FRAMEWORK_HEADERS_VPX_VPX_CONFIG_H_" local file_header="/* * Copyright (c) $(date +%Y) The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* GENERATED FILE: DO NOT EDIT! */ #ifndef ${include_guard} #define ${include_guard} #if defined" printf "%s" "${file_header}" > "${config_file}" for target in ${targets}; do preproc_symbol=$(target_to_preproc_symbol "${target}") printf " ${preproc_symbol}\n" >> "${config_file}" printf "#define VPX_FRAMEWORK_TARGET \"${target}\"\n" >> "${config_file}" printf "#include \"VPX/vpx/${target}/vpx_config.h\"\n" >> "${config_file}" printf "#elif defined" >> "${config_file}" mkdir "${HEADER_DIR}/${target}" cp -p "${BUILD_ROOT}/${target}/vpx_config.h" "${HEADER_DIR}/${target}" done # Consume the last line of output from the loop: We don't want it. sed -i.bak -e '$d' "${config_file}" rm "${config_file}.bak" printf "#endif\n\n" >> "${config_file}" printf "#endif // ${include_guard}" >> "${config_file}" } # Verifies that $FRAMEWORK_LIB fat library contains requested builds. verify_framework_targets() { local requested_cpus="" local cpu="" # Extract CPU from full target name. for target; do cpu="${target%%-*}" if [ "${cpu}" = "x86" ]; then # lipo -info outputs i386 for libvpx x86 targets. cpu="i386" fi requested_cpus="${requested_cpus}${cpu} " done # Get target CPUs present in framework library. local targets_built=$(${LIPO} -info ${FRAMEWORK_LIB}) # $LIPO -info outputs a string like the following: # Architectures in the fat file: $FRAMEWORK_LIB # Capture only the architecture strings. targets_built=${targets_built##*: } # Sort CPU strings to make the next step a simple string compare. local actual=$(echo ${targets_built} | tr " " "\n" | sort | tr "\n" " ") local requested=$(echo ${requested_cpus} | tr " " "\n" | sort | tr "\n" " ") vlog "Requested ${FRAMEWORK_LIB} CPUs: ${requested}" vlog "Actual ${FRAMEWORK_LIB} CPUs: ${actual}" if [ "${requested}" != "${actual}" ]; then elog "Actual ${FRAMEWORK_LIB} targets do not match requested target list." elog " Requested target CPUs: ${requested}" elog " Actual target CPUs: ${actual}" return 1 fi } # Configures and builds each target specified by $1, and then builds # VPX.framework. build_framework() { local lib_list="" local targets="$1" local target="" local target_dist_dir="" # Clean up from previous build(s). rm -rf "${BUILD_ROOT}" "${FRAMEWORK_DIR}" # Create output dirs. mkdir -p "${BUILD_ROOT}" mkdir -p "${HEADER_DIR}" cd "${BUILD_ROOT}" for target in ${targets}; do build_target "${target}" target_dist_dir="${BUILD_ROOT}/${target}/${DIST_DIR}" if [ "${ENABLE_SHARED}" = "yes" ]; then local suffix="dylib" else local suffix="a" fi lib_list="${lib_list} ${target_dist_dir}/lib/libvpx.${suffix}" done cd "${ORIG_PWD}" # The basic libvpx API includes are all the same; just grab the most recent # set. cp -p "${target_dist_dir}"/include/vpx/* "${HEADER_DIR}" # Build the fat library. ${LIPO} -create ${lib_list} -output ${FRAMEWORK_DIR}/VPX # Create the vpx_config.h shim that allows usage of vpx_config.h from # within VPX.framework. create_vpx_framework_config_shim "${targets}" # Copy in vpx_version.h. cp -p "${BUILD_ROOT}/${target}/vpx_version.h" "${HEADER_DIR}" if [ "${ENABLE_SHARED}" = "yes" ]; then # Adjust the dylib's name so dynamic linking in apps works as expected. install_name_tool -id '@rpath/VPX.framework/VPX' ${FRAMEWORK_DIR}/VPX # Copy in Info.plist. cat "${SCRIPT_DIR}/ios-Info.plist" \ | sed "s/\${FULLVERSION}/${FULLVERSION}/g" \ | sed "s/\${VERSION}/${VERSION}/g" \ | sed "s/\${IOS_VERSION_MIN}/${IOS_VERSION_MIN}/g" \ > "${FRAMEWORK_DIR}/Info.plist" fi # Confirm VPX.framework/VPX contains the targets requested. verify_framework_targets ${targets} vlog "Created fat library ${FRAMEWORK_LIB} containing:" for lib in ${lib_list}; do vlog " $(echo ${lib} | awk -F / '{print $2, $NF}')" done } # Trap function. Cleans up the subtree used to build all targets contained in # $TARGETS. cleanup() { local res=$? cd "${ORIG_PWD}" if [ $res -ne 0 ]; then elog "build exited with error ($res)" fi if [ "${PRESERVE_BUILD_OUTPUT}" != "yes" ]; then rm -rf "${BUILD_ROOT}" fi } print_list() { local indent="$1" shift local list="$@" for entry in ${list}; do echo "${indent}${entry}" done } iosbuild_usage() { cat << EOF Usage: ${0##*/} [arguments] --help: Display this message and exit. --enable-shared: Build a dynamic framework for use on iOS 8 or later. --extra-configure-args : Extra args to pass when configuring libvpx. --macosx: Uses darwin16 targets instead of iphonesimulator targets for x86 and x86_64. Allows linking to framework when builds target MacOSX instead of iOS. --preserve-build-output: Do not delete the build directory. --show-build-output: Show output from each library build. --targets : Override default target list. Defaults: $(print_list " " ${TARGETS}) --test-link: Confirms all targets can be linked. Functionally identical to passing --enable-examples via --extra-configure-args. --verbose: Output information about the environment and each stage of the build. EOF } elog() { echo "${0##*/} failed because: $@" 1>&2 } vlog() { if [ "${VERBOSE}" = "yes" ]; then echo "$@" fi } trap cleanup EXIT # Parse the command line. while [ -n "$1" ]; do case "$1" in --extra-configure-args) EXTRA_CONFIGURE_ARGS="$2" shift ;; --help) iosbuild_usage exit ;; --enable-shared) ENABLE_SHARED=yes ;; --preserve-build-output) PRESERVE_BUILD_OUTPUT=yes ;; --show-build-output) devnull= ;; --test-link) EXTRA_CONFIGURE_ARGS="${EXTRA_CONFIGURE_ARGS} --enable-examples" ;; --targets) TARGETS="$2" shift ;; --macosx) TARGETS="${ARM_TARGETS} ${OSX_TARGETS}" ;; --verbose) VERBOSE=yes ;; *) iosbuild_usage exit 1 ;; esac shift done if [ "${ENABLE_SHARED}" = "yes" ]; then CONFIGURE_ARGS="--enable-shared ${CONFIGURE_ARGS}" fi FULLVERSION=$("${SCRIPT_DIR}"/version.sh --bare "${LIBVPX_SOURCE_DIR}") VERSION=$(echo "${FULLVERSION}" | sed -E 's/^v([0-9]+\.[0-9]+\.[0-9]+).*$/\1/') if [ "$ENABLE_SHARED" = "yes" ]; then IOS_VERSION_OPTIONS="--enable-shared" IOS_VERSION_MIN="8.0" else IOS_VERSION_OPTIONS="" IOS_VERSION_MIN="7.0" fi if [ "${VERBOSE}" = "yes" ]; then cat << EOF BUILD_ROOT=${BUILD_ROOT} DIST_DIR=${DIST_DIR} CONFIGURE_ARGS=${CONFIGURE_ARGS} EXTRA_CONFIGURE_ARGS=${EXTRA_CONFIGURE_ARGS} FRAMEWORK_DIR=${FRAMEWORK_DIR} FRAMEWORK_LIB=${FRAMEWORK_LIB} HEADER_DIR=${HEADER_DIR} LIBVPX_SOURCE_DIR=${LIBVPX_SOURCE_DIR} LIPO=${LIPO} MAKEFLAGS=${MAKEFLAGS} ORIG_PWD=${ORIG_PWD} PRESERVE_BUILD_OUTPUT=${PRESERVE_BUILD_OUTPUT} TARGETS="$(print_list "" ${TARGETS})" ENABLE_SHARED=${ENABLE_SHARED} OSX_TARGETS="${OSX_TARGETS}" SIM_TARGETS="${SIM_TARGETS}" SCRIPT_DIR="${SCRIPT_DIR}" FULLVERSION="${FULLVERSION}" VERSION="${VERSION}" IOS_VERSION_MIN="${IOS_VERSION_MIN}" EOF fi build_framework "${TARGETS}" echo "Successfully built '${FRAMEWORK_DIR}' for:" print_list "" ${TARGETS} libvpx-1.8.2/build/make/msvs_common.sh000066400000000000000000000054061357355204000177470ustar00rootroot00000000000000#!/bin/bash ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## if [ "$(uname -o 2>/dev/null)" = "Cygwin" ] \ && cygpath --help >/dev/null 2>&1; then FIXPATH='cygpath -m' else FIXPATH='echo_path' fi die() { echo "${self_basename}: $@" >&2 exit 1 } die_unknown(){ echo "Unknown option \"$1\"." >&2 echo "See ${self_basename} --help for available options." >&2 exit 1 } echo_path() { for path; do echo "$path" done } # Output one, possibly changed based on the system, path per line. fix_path() { $FIXPATH "$@" } # Corrects the paths in file_list in one pass for efficiency. # $1 is the name of the array to be modified. fix_file_list() { if [ "${FIXPATH}" = "echo_path" ] ; then # When used with echo_path, fix_file_list is a no-op. Avoid warning about # unsupported 'declare -n' when it is not important. return 0 elif [ "${BASH_VERSINFO}" -lt 4 ] ; then echo "Cygwin path conversion has failed. Please use a version of bash" echo "which supports nameref (-n), introduced in bash 4.3" return 1 fi declare -n array_ref=$1 files=$(fix_path "${array_ref[@]}") local IFS=$'\n' array_ref=($files) } generate_uuid() { local hex="0123456789ABCDEF" local i local uuid="" local j #93995380-89BD-4b04-88EB-625FBE52EBFB for ((i=0; i<32; i++)); do (( j = $RANDOM % 16 )) uuid="${uuid}${hex:$j:1}" done echo "${uuid:0:8}-${uuid:8:4}-${uuid:12:4}-${uuid:16:4}-${uuid:20:12}" } indent1=" " indent="" indent_push() { indent="${indent}${indent1}" } indent_pop() { indent="${indent%${indent1}}" } tag_attributes() { for opt in "$@"; do optval="${opt#*=}" [ -n "${optval}" ] || die "Missing attribute value in '$opt' while generating $tag tag" echo "${indent}${opt%%=*}=\"${optval}\"" done } open_tag() { local tag=$1 shift if [ $# -ne 0 ]; then echo "${indent}<${tag}" indent_push tag_attributes "$@" echo "${indent}>" else echo "${indent}<${tag}>" indent_push fi } close_tag() { local tag=$1 indent_pop echo "${indent}" } tag() { local tag=$1 shift if [ $# -ne 0 ]; then echo "${indent}<${tag}" indent_push tag_attributes "$@" indent_pop echo "${indent}/>" else echo "${indent}<${tag}/>" fi } libvpx-1.8.2/build/make/rtcd.pl000077500000000000000000000222601357355204000163440ustar00rootroot00000000000000#!/usr/bin/env perl ## ## Copyright (c) 2017 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## no strict 'refs'; use warnings; use Getopt::Long; Getopt::Long::Configure("auto_help") if $Getopt::Long::VERSION > 2.32; my %ALL_FUNCS = (); my @ALL_ARCHS; my @ALL_FORWARD_DECLS; my @REQUIRES; my %opts = (); my %disabled = (); my %required = (); my @argv; foreach (@ARGV) { $disabled{$1} = 1, next if /--disable-(.*)/; $required{$1} = 1, next if /--require-(.*)/; push @argv, $_; } # NB: use GetOptions() instead of GetOptionsFromArray() for compatibility. @ARGV = @argv; GetOptions( \%opts, 'arch=s', 'sym=s', 'config=s', ); foreach my $opt (qw/arch config/) { if (!defined($opts{$opt})) { warn "--$opt is required!\n"; Getopt::Long::HelpMessage('-exit' => 1); } } foreach my $defs_file (@ARGV) { if (!-f $defs_file) { warn "$defs_file: $!\n"; Getopt::Long::HelpMessage('-exit' => 1); } } open CONFIG_FILE, $opts{config} or die "Error opening config file '$opts{config}': $!\n"; my %config = (); while () { next if !/^(?:CONFIG_|HAVE_)/; chomp; my @pair = split /=/; $config{$pair[0]} = $pair[1]; } close CONFIG_FILE; # # Routines for the RTCD DSL to call # sub vpx_config($) { return (defined $config{$_[0]}) ? $config{$_[0]} : ""; } sub specialize { my $fn=$_[0]; shift; foreach my $opt (@_) { eval "\$${fn}_${opt}=${fn}_${opt}"; } } sub add_proto { my $fn = splice(@_, -2, 1); $ALL_FUNCS{$fn} = \@_; specialize $fn, "c"; } sub require { foreach my $fn (keys %ALL_FUNCS) { foreach my $opt (@_) { my $ofn = eval "\$${fn}_${opt}"; next if !$ofn; # if we already have a default, then we can disable it, as we know # we can do better. my $best = eval "\$${fn}_default"; if ($best) { my $best_ofn = eval "\$${best}"; if ($best_ofn && "$best_ofn" ne "$ofn") { eval "\$${best}_link = 'false'"; } } eval "\$${fn}_default=${fn}_${opt}"; eval "\$${fn}_${opt}_link='true'"; } } } sub forward_decls { push @ALL_FORWARD_DECLS, @_; } # # Include the user's directives # foreach my $f (@ARGV) { open FILE, "<", $f or die "cannot open $f: $!\n"; my $contents = join('', ); close FILE; eval $contents or warn "eval failed: $@\n"; } # # Process the directives according to the command line # sub process_forward_decls() { foreach (@ALL_FORWARD_DECLS) { $_->(); } } sub determine_indirection { vpx_config("CONFIG_RUNTIME_CPU_DETECT") eq "yes" or &require(@ALL_ARCHS); foreach my $fn (keys %ALL_FUNCS) { my $n = ""; my @val = @{$ALL_FUNCS{$fn}}; my $args = pop @val; my $rtyp = "@val"; my $dfn = eval "\$${fn}_default"; $dfn = eval "\$${dfn}"; foreach my $opt (@_) { my $ofn = eval "\$${fn}_${opt}"; next if !$ofn; my $link = eval "\$${fn}_${opt}_link"; next if $link && $link eq "false"; $n .= "x"; } if ($n eq "x") { eval "\$${fn}_indirect = 'false'"; } else { eval "\$${fn}_indirect = 'true'"; } } } sub declare_function_pointers { foreach my $fn (sort keys %ALL_FUNCS) { my @val = @{$ALL_FUNCS{$fn}}; my $args = pop @val; my $rtyp = "@val"; my $dfn = eval "\$${fn}_default"; $dfn = eval "\$${dfn}"; foreach my $opt (@_) { my $ofn = eval "\$${fn}_${opt}"; next if !$ofn; print "$rtyp ${ofn}($args);\n"; } if (eval "\$${fn}_indirect" eq "false") { print "#define ${fn} ${dfn}\n"; } else { print "RTCD_EXTERN $rtyp (*${fn})($args);\n"; } print "\n"; } } sub set_function_pointers { foreach my $fn (sort keys %ALL_FUNCS) { my @val = @{$ALL_FUNCS{$fn}}; my $args = pop @val; my $rtyp = "@val"; my $dfn = eval "\$${fn}_default"; $dfn = eval "\$${dfn}"; if (eval "\$${fn}_indirect" eq "true") { print " $fn = $dfn;\n"; foreach my $opt (@_) { my $ofn = eval "\$${fn}_${opt}"; next if !$ofn; next if "$ofn" eq "$dfn"; my $link = eval "\$${fn}_${opt}_link"; next if $link && $link eq "false"; my $cond = eval "\$have_${opt}"; print " if (${cond}) $fn = $ofn;\n" } } } } sub filter { my @filtered; foreach (@_) { push @filtered, $_ unless $disabled{$_}; } return @filtered; } # # Helper functions for generating the arch specific RTCD files # sub common_top() { my $include_guard = uc($opts{sym})."_H_"; print <) { if (/HAVE_DSPR2=yes/) { @ALL_ARCHS = filter("$opts{arch}", qw/dspr2/); last; } if (/HAVE_MSA=yes/) { @ALL_ARCHS = filter("$opts{arch}", qw/msa/); last; } if (/HAVE_MMI=yes/) { @ALL_ARCHS = filter("$opts{arch}", qw/mmi/); last; } } close CONFIG_FILE; mips; } elsif ($opts{arch} =~ /armv7\w?/) { @ALL_ARCHS = filter(qw/neon_asm neon/); arm; } elsif ($opts{arch} eq 'armv8' || $opts{arch} eq 'arm64' ) { @ALL_ARCHS = filter(qw/neon/); &require("neon"); arm; } elsif ($opts{arch} =~ /^ppc/ ) { @ALL_ARCHS = filter(qw/vsx/); ppc; } else { unoptimized; } __END__ =head1 NAME rtcd - =head1 SYNOPSIS Usage: rtcd.pl [options] FILE See 'perldoc rtcd.pl' for more details. =head1 DESCRIPTION Reads the Run Time CPU Detections definitions from FILE and generates a C header file on stdout. =head1 OPTIONS Options: --arch=ARCH Architecture to generate defs for (required) --disable-EXT Disable support for EXT extensions --require-EXT Require support for EXT extensions --sym=SYMBOL Unique symbol to use for RTCD initialization function --config=FILE File with CONFIG_FOO=yes lines to parse libvpx-1.8.2/build/make/thumb.pm000066400000000000000000000056761357355204000165410ustar00rootroot00000000000000#!/usr/bin/env perl ## ## Copyright (c) 2013 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## package thumb; sub FixThumbInstructions($$) { my $short_branches = $_[1]; my $branch_shift_offset = $short_branches ? 1 : 0; # Write additions with shifts, such as "add r10, r11, lsl #8", # in three operand form, "add r10, r10, r11, lsl #8". s/(add\s+)(r\d+),\s*(r\d+),\s*(lsl #\d+)/$1$2, $2, $3, $4/g; # Convert additions with a non-constant shift into a sequence # with left shift, addition and a right shift (to restore the # register to the original value). Currently the right shift # isn't necessary in the code base since the values in these # registers aren't used, but doing the shift for consistency. # This converts instructions such as "add r12, r12, r5, lsl r4" # into the sequence "lsl r5, r4", "add r12, r12, r5", "lsr r5, r4". s/^(\s*)(add)(\s+)(r\d+),\s*(r\d+),\s*(r\d+),\s*lsl (r\d+)/$1lsl$3$6, $7\n$1$2$3$4, $5, $6\n$1lsr$3$6, $7/g; # Convert loads with right shifts in the indexing into a # sequence of an add, load and sub. This converts # "ldrb r4, [r9, lr, asr #1]" into "add r9, r9, lr, asr #1", # "ldrb r9, [r9]", "sub r9, r9, lr, asr #1". s/^(\s*)(ldrb)(\s+)(r\d+),\s*\[(\w+),\s*(\w+),\s*(asr #\d+)\]/$1add $3$5, $5, $6, $7\n$1$2$3$4, [$5]\n$1sub $3$5, $5, $6, $7/g; # Convert register indexing with writeback into a separate add # instruction. This converts "ldrb r12, [r1, r2]!" into # "ldrb r12, [r1, r2]", "add r1, r1, r2". s/^(\s*)(ldrb)(\s+)(r\d+),\s*\[(\w+),\s*(\w+)\]!/$1$2$3$4, [$5, $6]\n$1add $3$5, $6/g; # Convert negative register indexing into separate sub/add instructions. # This converts "ldrne r4, [src, -pstep, lsl #1]" into # "subne src, src, pstep, lsl #1", "ldrne r4, [src]", # "addne src, src, pstep, lsl #1". In a couple of cases where # this is used, it's used for two subsequent load instructions, # where a hand-written version of it could merge two subsequent # add and sub instructions. s/^(\s*)((ldr|str|pld)(ne)?)(\s+)(r\d+,\s*)?\[(\w+), -([^\]]+)\]/$1sub$4$5$7, $7, $8\n$1$2$5$6\[$7\]\n$1add$4$5$7, $7, $8/g; # Convert register post indexing to a separate add instruction. # This converts "ldrneb r9, [r0], r2" into "ldrneb r9, [r0]", # "addne r0, r0, r2". s/^(\s*)((ldr|str)(ne)?[bhd]?)(\s+)(\w+),(\s*\w+,)?\s*\[(\w+)\],\s*(\w+)/$1$2$5$6,$7 [$8]\n$1add$4$5$8, $8, $9/g; # Convert "mov pc, lr" into "bx lr", since the former only works # for switching from arm to thumb (and only in armv7), but not # from thumb to arm. s/mov(\s*)pc\s*,\s*lr/bx$1lr/g; } 1; libvpx-1.8.2/build/make/version.sh000077500000000000000000000044231357355204000170750ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## for opt in "$@"; do optval="${opt#*=}" case "$opt" in --bare) bare=true ;; *) break ;; esac shift done source_path=${1:-.} out_file=${2} id=${3:-VERSION_STRING} git_version_id="" if [ -e "${source_path}/.git" ]; then # Source Path is a git working copy. Check for local modifications. # Note that git submodules may have a file as .git, not a directory. export GIT_DIR="${source_path}/.git" git_version_id=`git describe --match=v[0-9]* 2>/dev/null` fi changelog_version="" for p in "${source_path}" "${source_path}/.."; do if [ -z "$git_version_id" -a -f "${p}/CHANGELOG" ]; then changelog_version=`head -n1 "${p}/CHANGELOG" | awk '{print $2}'` changelog_version="${changelog_version}" break fi done version_str="${changelog_version}${git_version_id}" bare_version=${version_str#v} major_version=${bare_version%%.*} bare_version=${bare_version#*.} minor_version=${bare_version%%.*} bare_version=${bare_version#*.} patch_version=${bare_version%%-*} bare_version=${bare_version#${patch_version}} extra_version=${bare_version##-} #since they'll be used as integers below make sure they are or force to 0 for v in major_version minor_version patch_version; do if eval echo \$$v |grep -E -q '[^[:digit:]]'; then eval $v=0 fi done if [ ${bare} ]; then echo "${changelog_version}${git_version_id}" > $$.tmp else cat<$$.tmp // This file is generated. Do not edit. #define VERSION_MAJOR $major_version #define VERSION_MINOR $minor_version #define VERSION_PATCH $patch_version #define VERSION_EXTRA "$extra_version" #define VERSION_PACKED ((VERSION_MAJOR<<16)|(VERSION_MINOR<<8)|(VERSION_PATCH)) #define ${id}_NOSP "${version_str}" #define ${id} " ${version_str}" EOF fi if [ -n "$out_file" ]; then diff $$.tmp ${out_file} >/dev/null 2>&1 || cat $$.tmp > ${out_file} else cat $$.tmp fi rm $$.tmp libvpx-1.8.2/build_debug/000077500000000000000000000000001357355204000152775ustar00rootroot00000000000000libvpx-1.8.2/build_debug/non_greedy_mv_test_files/000077500000000000000000000000001357355204000223535ustar00rootroot00000000000000libvpx-1.8.2/build_debug/non_greedy_mv_test_files/cur_frame_16x16.txt000066400000000000000000045521511357355204000257410ustar00rootroot00000000000000486,720 230,207,226,208,198,205,214,224,228,181,208,205,211,218,218,221,213,193,213,233,219,206,226,199,199,189,211,190,204,231,229,236,218,227,194,229,222,227,210,219,237,219,225,227,212,207,197,203,207,216,238,208,233,222,213,212,213,220,221,222,191,215,237,211,226,234,208,214,239,210,223,224,236,248,233,216,237,211,198,227,231,233,236,238,239,224,235,208,219,229,212,241,243,218,233,225,231,230,224,221,248,209,241,215,237,216,244,241,218,221,240,211,239,231,237,231,221,219,240,219,232,222,223,255,229,245,243,242,245,246,217,234,243,222,213,239,230,231,230,242,248,225,238,236,223,214,229,216,240,223,212,228,236,219,254,240,222,217,246,228,215,230,255,226,230,248,250,254,234,235,232,243,237,255,253,239,239,251,251,252,238,241,240,251,240,243,223,246,246,249,235,233,228,246,232,236,234,255,253,221,244,237,252,245,253,252,221,251,255,255,233,243,243,246,221,234,238,252,252,215,242,255,229,243,255,251,236,231,241,246,237,225,244,229,242,239,234,235,251,237,253,245,251,230,220,239,224,255,244,249,249,249,255,249,236,240,218,248,247,246,235,250,250,230,239,228,238,250,242,241,232,237,238,246,255,243,232,251,252,244,237,249,247,242,246,254,246,255,247,230,235,241,236,252,245,240,244,228,241,247,231,242,242,242,255,246,236,240,245,246,235,208,219,251,251,252,251,244,240,243,230,250,227,254,224,228,249,232,255,245,248,247,241,241,245,234,194,186,231,214,243,230,243,249,248,190,175,174,161,130,37,204,246,253,250,230,253,239,247,181,213,245,228,251,255,253,233,249,246,252,228,228,219,241,235,245,211,228,228,248,254,247,239,251,232,231,250,246,227,237,238,251,251,242,237,240,219,155,137,131,88,97,19,109,243,243,237,242,242,254,244,245,243,237,242,225,237,219,226,242,248,244,247,253,249,229,255,251,245,255,189,192,240,228,249,248,247,237,241,240,247,247,220,250,251,255,231,251,193,103,31,14,31,31,9,13,44,26,50,35,74,113,61,72,15,35,22,59,25,24,47,38,115,112,50,34,26,59,48,65,78,66,62,38,15,54,212,243,249,236,253,254,254,244,247,239,240,243,239,249,237,240,244,252,205,186,243,200,193,210,244,243,212,235,232,189,243,236,244,234,244,235,238,237,254,249,227,231,241,246,255,236,236,241,249,241,246,253,229,243,143,99,67,6,70,201,221,250,245,233,248,209,224,198,211,232,225,217,209,255,214,121,184,220,235,232,185,230,236,237,227,213,240,222,241,226,224,215,240,238,252,243,224,222,233,236,237,213,214,214,232,229,227,224,203,222,234,209,225,226,216,202,235,208,207,233,234,230,226,215,214,214,214,241,237,214,226,214,227,205,221,222,224,193,209,209,226,213,235,212,250,180,147,118,85,99,73,79,70,109,95,116,65,100,94,108,104,79,91,75,73,126,199,206,241,239,219,246,226,216,229,213,221,214,227,222,214,207,220,226,207,213,234,199,206,213,221,203,190,198,181,183,190,196,219,207,190,170,196,190,184,208,189,220,217,106,17,8,20,16,21,8,10,0,29,18,9,14,0,11,0,15,39,2,13,4,4,41,28,7,235,225,220,231,197,218,197,225,229,195,220,207,235,222,213,217,219,210,219,207,225,192,207,211,203,205,214,204,203,219,207,207,199,206,235,223,221,198,231,232,220,211,238,234,236,216,228,226,196,202,235,234,214,231,223,213,185,220,242,252,233,221,218,229,212,206,210,197,235,222,197,223,213,209,242,197,226,211,212,216,205,229,229,231,213,235,209,237,222,230,214,241,222,235,242,228,230,229,249,227,212,207,217,227,226,237,232,223,214,208,231,240,223,230,225,239,240,221,237,228,245,241,237,224,239,249,237,230,250,228,244,239,214,247,225,226,234,243,239,225,250,248,249,226,232,237,251,226,231,236,233,248,232,240,233,249,245,236,234,227,211,245,233,234,229,237,240,218,243,237,237,216,250,253,238,249,237,245,248,247,241,247,223,231,246,249,245,244,238,223,247,230,247,252,249,210,250,243,247,253,230,255,248,243,241,246,225,254,248,242,255,239,241,233,248,255,254,255,238,249,244,250,236,253,247,252,240,248,240,238,232,252,246,251,237,231,241,248,231,252,254,243,241,245,251,242,250,212,255,245,248,235,252,243,238,250,242,253,251,251,254,238,233,254,254,243,249,252,254,239,250,241,239,243,245,238,247,243,240,242,243,239,252,238,254,226,233,252,238,245,255,234,249,253,238,226,240,253,255,249,230,232,242,251,239,240,209,246,236,235,239,244,251,241,236,236,240,255,242,228,250,239,252,231,243,253,253,226,235,237,240,237,252,239,223,252,216,231,253,235,214,213,238,230,245,210,152,148,132,97,40,203,250,242,249,234,220,240,230,182,195,242,255,250,247,223,255,252,255,235,251,213,210,245,255,253,226,244,241,252,254,236,250,254,235,226,249,255,237,218,230,239,237,252,252,242,221,120,118,110,105,120,44,129,241,242,247,243,244,233,249,232,242,222,237,241,243,237,251,248,255,245,254,248,242,255,246,232,221,246,171,164,239,248,218,252,245,252,250,229,250,248,216,239,250,212,203,218,148,100,35,8,47,38,32,60,48,38,43,58,46,103,90,69,19,33,11,55,55,29,34,71,91,63,65,69,23,25,50,82,80,41,42,43,1,142,218,231,233,242,239,249,233,252,228,234,222,246,244,243,216,225,240,252,216,156,221,221,204,212,249,236,219,190,198,172,224,253,244,252,252,237,233,249,249,252,238,253,244,250,242,249,244,255,229,227,217,234,242,245,116,88,54,14,59,229,246,234,254,249,233,233,231,213,210,219,243,231,228,242,191,71,184,223,223,235,234,236,237,236,238,228,232,219,226,221,208,228,245,213,228,238,230,230,227,217,227,229,224,214,215,221,217,213,217,236,238,224,213,225,194,248,230,234,226,243,214,214,238,226,237,220,231,236,224,222,233,230,219,229,196,208,232,214,217,202,235,247,235,233,206,186,127,128,96,139,132,132,120,114,79,94,87,109,98,90,106,70,58,59,83,74,76,117,204,208,247,231,238,209,214,190,230,236,208,188,225,211,192,208,185,212,213,201,208,215,228,198,210,217,185,197,224,206,206,219,206,198,214,184,194,186,203,208,203,114,26,0,35,5,8,16,3,1,3,1,6,5,10,20,4,6,4,4,0,15,30,7,22,12,216,211,220,193,216,215,233,232,208,205,214,202,231,203,211,224,215,222,216,199,217,210,228,243,237,216,214,213,231,213,203,217,232,218,225,200,200,205,199,235,215,218,215,211,214,194,230,221,240,193,221,212,234,209,193,207,243,216,217,223,231,238,226,193,226,209,236,220,232,240,219,228,214,209,221,218,244,221,217,206,214,235,227,229,226,205,202,238,205,224,222,241,236,229,207,228,222,215,234,231,216,217,237,235,229,252,230,225,220,229,235,241,250,206,227,222,217,238,242,247,228,226,238,210,248,241,255,218,244,209,246,231,201,241,236,236,226,216,210,218,232,199,238,222,246,235,249,244,232,238,240,244,237,252,241,246,234,250,227,225,252,252,237,243,246,239,243,236,235,225,240,241,245,249,230,244,237,206,237,242,237,214,237,241,231,207,244,244,234,231,232,255,252,250,242,252,254,242,255,228,254,239,233,240,250,255,245,229,234,254,249,250,248,242,249,246,252,250,246,225,246,245,231,254,247,230,252,250,252,254,241,240,237,252,246,228,223,251,252,240,240,246,243,253,227,246,242,252,250,242,243,249,246,255,233,248,235,227,255,247,236,246,239,234,245,232,236,217,245,252,253,255,255,251,254,247,249,252,255,237,214,232,237,247,232,212,230,251,238,246,245,233,254,241,238,235,241,249,247,227,255,254,255,243,250,247,249,247,234,248,242,252,241,255,254,243,232,232,239,229,246,255,254,251,249,248,223,240,245,252,239,248,252,242,242,219,238,243,226,197,211,234,253,253,244,234,157,156,140,95,43,196,234,228,235,206,247,247,238,185,196,237,237,234,249,232,230,254,254,221,249,200,180,219,250,241,239,255,219,241,244,238,248,251,248,253,249,250,241,228,225,249,255,250,250,250,206,135,99,113,83,117,24,88,243,237,248,240,229,236,226,250,249,249,244,252,247,251,246,255,239,246,253,252,254,246,247,253,219,234,225,158,224,246,253,247,240,253,247,242,218,219,247,248,233,215,223,240,242,206,118,103,87,103,70,104,52,45,47,75,65,108,83,63,58,36,38,39,50,31,53,100,89,62,54,30,25,38,49,53,96,76,67,41,16,205,241,249,244,239,255,252,237,254,238,248,235,237,253,244,234,232,243,251,227,195,234,254,197,206,229,220,202,201,184,178,231,251,226,233,244,253,238,244,249,251,242,245,240,235,246,249,236,210,242,242,250,247,240,243,125,92,45,0,81,212,243,251,247,244,222,238,221,199,184,218,229,194,211,237,159,82,167,233,220,225,213,224,223,249,224,229,239,232,231,216,245,224,216,236,235,213,215,218,220,235,217,249,234,241,228,231,227,218,229,224,217,200,231,244,222,230,213,199,243,238,209,233,216,226,216,216,203,209,200,221,218,216,233,232,221,231,195,204,230,216,254,243,224,169,163,154,129,146,89,141,139,112,107,109,93,40,71,40,30,58,31,53,53,49,55,55,63,10,28,88,157,193,228,214,203,200,195,205,218,195,196,197,181,243,188,222,212,203,210,212,206,211,213,220,206,211,202,199,182,201,226,199,203,232,210,196,207,203,216,110,2,0,23,15,31,10,25,14,29,28,10,8,29,1,14,1,16,9,16,13,2,31,10,9,207,201,221,200,221,224,225,217,213,211,228,210,216,210,208,221,212,230,197,196,214,187,234,211,233,220,215,232,213,214,215,215,228,224,225,229,215,212,202,219,222,209,212,213,215,214,210,238,230,236,231,216,220,237,237,237,218,224,242,223,221,213,232,210,217,226,222,223,207,234,226,215,240,225,222,228,231,238,222,203,213,222,243,204,224,214,230,214,243,224,208,227,219,225,237,233,220,216,228,227,216,222,212,217,210,243,227,238,240,229,236,244,235,227,227,220,211,239,244,225,240,247,223,213,249,210,231,232,234,236,228,249,217,223,241,248,221,202,221,239,223,236,219,245,224,223,222,226,251,228,211,211,227,223,237,239,235,232,223,245,250,226,250,251,255,249,212,236,242,249,223,250,215,244,243,244,250,245,250,249,247,237,243,252,226,221,236,238,252,255,246,232,230,244,237,254,255,252,247,242,255,235,249,241,252,247,235,223,250,254,244,227,245,252,252,253,251,241,245,252,235,239,252,242,239,254,239,255,250,254,244,246,254,251,248,253,245,236,213,240,243,252,254,229,240,248,243,254,247,246,249,243,254,231,237,244,237,234,225,252,232,240,249,235,251,250,252,248,238,254,249,238,248,244,229,239,253,229,232,234,247,251,244,246,237,255,233,255,242,248,228,249,253,230,254,237,245,244,226,233,201,248,219,246,239,245,235,239,245,244,248,234,253,250,237,229,253,248,220,254,217,240,243,252,244,249,255,226,249,233,249,252,230,237,244,239,186,190,211,214,229,253,255,249,248,203,167,193,163,81,30,182,254,238,249,253,236,245,248,191,153,227,255,229,231,248,218,228,250,219,215,213,231,246,254,242,247,237,212,189,229,238,246,244,226,239,255,252,237,244,223,240,252,242,255,250,199,126,111,115,109,125,32,119,254,241,248,241,253,252,218,245,254,247,255,236,224,241,244,255,251,239,241,246,246,246,255,229,239,220,241,187,203,250,244,220,247,254,236,253,222,227,253,242,234,230,238,247,251,247,178,157,138,85,67,107,69,86,67,46,66,57,61,48,45,11,36,29,28,55,89,74,68,51,55,25,42,53,64,81,99,79,73,32,38,223,245,222,240,241,250,248,235,237,250,232,239,234,235,243,240,251,255,248,243,214,224,232,204,216,163,183,233,194,195,211,211,214,231,249,237,245,232,237,231,241,226,255,219,244,239,226,240,236,255,242,242,228,253,217,132,89,45,6,100,213,242,239,248,248,240,223,244,221,201,228,231,226,236,235,153,71,136,234,230,230,229,236,247,236,251,226,231,225,225,245,237,220,241,228,214,233,242,214,207,207,207,227,238,210,245,212,223,241,231,201,219,220,227,215,221,200,230,246,218,195,214,239,213,221,217,216,217,242,218,226,216,239,202,216,214,223,216,177,212,222,232,152,147,119,107,118,76,116,73,53,64,61,47,108,73,76,40,54,33,44,24,44,13,22,28,44,78,62,34,76,30,28,99,176,213,220,215,245,193,217,203,206,222,230,209,197,215,224,208,197,221,205,197,197,218,229,229,212,209,217,204,186,197,206,220,202,196,207,179,116,0,7,3,10,6,13,0,3,22,2,12,14,3,1,19,9,17,25,19,9,5,1,0,3,211,205,197,203,223,210,233,216,223,214,215,203,213,232,215,226,223,219,219,239,197,227,214,223,233,208,203,235,202,204,227,207,206,201,202,216,230,223,227,224,207,225,233,236,214,231,207,229,225,227,209,223,204,223,218,209,224,232,216,232,233,224,224,214,221,209,222,231,212,216,216,224,183,208,223,219,221,197,240,227,195,216,222,242,223,222,210,222,233,205,224,218,208,233,234,231,219,230,210,228,196,220,231,231,238,239,249,248,233,252,243,230,237,246,231,215,235,229,240,222,228,230,243,236,241,223,247,218,247,246,236,232,229,219,230,217,229,223,228,255,234,224,237,232,231,220,236,239,241,241,231,233,230,236,223,225,250,218,228,240,252,235,217,243,240,235,252,235,255,247,211,232,219,245,247,250,255,248,239,252,247,243,226,243,247,249,251,247,249,234,233,242,248,241,252,252,255,233,241,254,242,242,254,242,218,238,250,229,253,238,230,248,236,249,239,244,251,254,247,248,251,252,252,255,252,255,239,235,245,255,225,233,255,240,243,237,244,239,249,240,244,229,247,242,225,241,254,253,239,252,255,246,235,236,255,254,234,237,254,231,240,244,250,250,236,253,251,243,250,231,227,249,236,234,249,233,229,250,228,250,231,253,254,241,238,248,251,252,253,254,249,245,253,249,224,246,249,233,213,248,248,252,252,242,244,243,235,244,233,241,242,238,245,236,249,248,254,242,245,255,231,245,254,251,236,253,239,251,224,238,250,242,253,238,244,222,206,200,226,238,250,245,244,231,249,197,163,166,150,111,64,209,248,237,250,221,222,247,243,200,173,230,250,251,241,254,229,246,251,207,225,230,240,255,248,237,234,241,203,255,250,249,251,246,228,239,251,246,242,239,232,249,245,255,246,241,209,158,114,123,109,112,24,108,253,232,247,245,238,255,254,243,232,255,230,224,253,250,255,219,249,244,236,233,254,254,249,240,231,237,247,187,232,254,245,241,230,217,246,243,249,226,252,254,230,252,248,250,255,252,173,90,88,52,51,89,113,75,89,169,140,97,37,60,91,60,51,60,44,82,118,79,70,43,47,47,43,55,51,80,79,49,68,21,29,187,246,253,249,249,225,250,225,241,246,255,240,231,221,232,254,225,237,241,255,228,206,232,199,149,135,165,246,232,175,226,222,236,249,217,255,245,209,246,245,255,244,235,249,232,225,232,244,248,242,247,247,233,253,221,125,75,71,26,94,233,246,249,239,244,229,226,233,201,179,227,227,236,253,235,127,57,138,221,223,226,229,242,245,235,225,240,218,227,228,227,225,197,244,220,244,219,219,245,222,248,229,227,224,219,204,237,212,219,216,209,229,230,218,226,208,209,209,219,212,222,210,231,221,225,218,202,223,218,246,238,214,204,212,212,183,226,197,234,217,183,155,59,95,71,39,43,11,47,37,61,25,20,83,85,58,40,50,20,37,12,29,35,26,28,26,75,92,71,64,44,16,11,79,161,170,200,209,232,220,203,196,213,190,218,207,200,226,206,185,215,228,195,214,205,197,217,201,207,215,211,189,204,202,204,200,209,191,196,211,102,13,2,1,0,14,15,17,1,29,11,25,21,3,8,10,21,9,15,1,24,35,11,10,34,224,217,235,216,223,230,206,222,229,214,200,236,203,200,215,221,233,213,228,205,185,206,202,204,235,210,232,214,235,226,214,239,215,233,213,206,197,203,201,217,219,205,229,195,237,226,229,199,219,203,202,222,211,222,245,228,240,220,218,224,195,207,226,202,197,228,227,219,202,203,233,227,200,210,225,205,218,219,211,221,230,219,221,228,222,221,203,241,226,216,219,223,205,208,224,224,222,238,215,223,209,219,214,230,238,236,227,243,228,240,211,239,221,233,223,236,218,226,232,221,222,227,233,220,241,242,228,229,245,238,241,238,214,246,215,222,240,234,244,238,238,235,240,235,213,221,228,252,236,206,248,239,238,252,227,248,243,252,243,231,224,229,244,236,230,236,238,237,234,242,255,234,226,251,231,212,241,240,248,226,201,241,227,235,246,240,255,248,243,236,228,229,255,252,242,243,255,253,249,242,252,225,250,255,236,252,253,231,230,255,247,254,254,243,250,248,239,236,254,229,239,234,250,223,233,247,245,252,235,252,245,230,242,216,251,251,230,255,252,247,248,239,224,238,246,235,255,243,249,224,235,241,245,242,229,251,253,247,248,244,252,248,231,228,232,237,251,250,255,237,230,248,243,243,222,241,252,236,244,251,233,240,252,246,222,251,248,244,243,233,238,248,252,244,233,251,246,246,217,252,247,251,243,241,248,243,229,236,242,250,252,239,252,212,241,239,255,237,242,240,242,255,245,253,235,242,248,255,247,223,252,242,254,237,242,237,207,235,247,221,242,249,251,247,254,223,161,151,118,92,42,204,239,252,245,227,253,233,239,232,194,242,251,241,233,253,237,245,232,229,255,243,254,255,212,206,243,250,239,255,250,229,215,248,249,241,252,233,240,222,232,251,239,243,242,254,218,139,100,107,111,104,29,117,236,255,251,249,236,247,221,254,223,246,237,249,248,247,234,250,230,234,240,253,246,231,247,238,247,242,217,183,230,237,250,246,234,219,237,248,242,251,234,255,218,254,249,250,240,154,74,63,131,57,26,75,112,79,99,193,180,135,75,176,138,49,24,80,126,113,135,99,67,39,43,34,21,21,78,96,91,50,75,39,50,214,244,250,249,254,255,243,243,213,249,248,249,246,236,252,254,224,228,235,249,238,171,232,220,119,121,163,228,210,172,231,229,229,232,255,238,250,250,228,239,241,244,239,235,244,249,233,231,245,232,252,241,243,254,216,113,75,42,0,131,226,240,246,252,245,237,219,219,204,225,215,220,245,247,207,122,78,118,232,219,237,226,245,217,244,241,254,223,220,252,206,239,223,218,214,229,223,212,244,213,230,214,227,236,223,225,200,243,239,229,224,211,211,209,227,244,236,200,203,230,223,223,216,206,233,221,213,208,228,227,223,206,226,219,202,191,217,205,230,156,110,51,46,50,29,20,33,44,28,35,68,44,44,24,53,88,58,60,37,65,43,47,46,29,43,80,90,93,66,33,54,0,56,159,190,228,182,194,210,182,218,202,186,191,196,204,186,194,202,221,208,235,203,206,219,244,222,214,207,198,195,203,198,210,188,194,207,204,190,197,115,5,16,0,1,5,3,18,6,12,14,6,13,7,12,18,13,15,21,17,17,27,11,16,9,217,220,221,223,223,208,221,228,218,197,228,222,220,222,231,203,210,206,229,217,204,213,200,216,203,216,225,237,242,222,238,217,217,216,200,225,200,223,221,215,213,222,229,223,225,217,237,194,247,212,224,180,241,239,222,220,217,199,217,220,213,247,234,243,211,224,216,210,198,214,189,210,242,223,231,237,212,227,224,213,237,245,205,212,210,234,223,222,230,224,224,221,222,216,216,215,221,229,224,238,217,225,219,207,221,220,237,253,223,225,232,222,230,221,235,230,211,216,229,221,240,236,229,222,244,222,247,232,217,218,238,217,220,247,205,225,220,231,198,238,232,236,235,235,249,239,221,238,237,242,240,238,249,243,244,235,245,235,239,219,234,238,231,221,227,216,240,230,225,231,228,225,228,220,254,255,226,248,249,235,224,234,218,239,246,235,239,251,250,224,254,241,244,223,255,237,251,254,253,230,245,230,234,230,234,246,250,250,210,247,249,251,238,247,252,237,252,249,243,248,255,241,250,244,243,245,243,241,255,254,240,243,250,251,233,239,250,245,242,248,234,242,225,247,239,232,254,214,254,241,219,249,249,234,252,254,253,250,234,231,252,234,223,241,247,248,243,252,255,231,251,252,239,255,245,238,241,247,227,237,247,236,229,226,249,221,239,252,243,249,250,240,247,245,244,243,244,237,226,246,246,230,232,232,246,246,237,237,237,248,240,248,224,250,252,253,247,255,255,249,251,254,240,242,250,249,254,249,226,248,245,237,237,233,248,240,243,248,201,237,235,248,239,238,245,214,148,144,127,75,35,218,246,248,240,222,246,238,232,213,172,235,255,227,251,252,247,244,242,252,237,167,218,247,238,253,250,241,233,243,242,225,248,244,241,241,211,232,255,229,241,250,236,233,254,244,214,112,86,109,107,130,27,132,245,249,223,219,244,242,254,245,250,250,246,216,237,248,242,242,240,253,254,252,246,253,255,242,255,246,247,175,186,248,252,244,247,251,252,255,243,235,236,233,223,250,229,249,185,80,87,68,136,132,82,49,55,37,52,192,151,108,78,149,114,30,53,77,95,97,138,130,82,19,57,61,37,54,74,97,88,63,70,57,59,237,235,222,243,249,240,247,246,255,234,252,248,232,239,238,239,253,240,208,254,250,182,220,250,193,158,174,230,243,209,216,245,208,226,243,237,252,247,249,238,231,254,240,235,249,253,251,232,245,249,255,232,219,248,211,105,73,4,0,104,240,218,227,249,242,224,231,230,208,214,205,211,234,240,239,126,55,146,221,211,229,234,240,227,235,234,225,196,235,218,250,222,216,246,218,222,228,219,224,222,234,211,238,243,236,205,231,218,213,205,219,220,230,236,220,230,208,211,215,220,234,208,220,212,209,194,208,214,226,221,220,219,213,247,218,204,236,199,205,180,73,66,9,11,24,56,73,68,35,63,31,45,35,47,69,86,58,48,47,45,21,10,47,38,51,67,73,70,62,56,41,70,169,224,229,201,211,200,227,210,194,215,216,218,205,187,221,201,207,218,197,214,195,213,208,177,226,191,213,204,191,198,211,201,215,220,219,202,193,219,101,2,2,24,15,24,20,0,2,4,13,7,1,8,6,0,7,8,30,11,29,13,1,13,8,226,226,234,219,226,215,237,221,228,204,220,222,223,192,219,204,198,216,212,211,221,202,205,197,220,212,207,199,199,228,206,221,209,210,199,205,236,199,219,232,220,205,213,231,224,209,238,212,227,223,187,224,234,219,225,217,190,216,231,210,231,211,207,226,199,210,230,229,208,235,219,213,216,230,231,229,231,212,216,210,208,233,195,202,213,227,220,231,235,232,189,226,199,216,197,209,231,226,216,229,216,238,217,241,218,211,242,227,228,244,218,225,241,229,252,229,239,244,214,185,197,234,219,240,239,241,237,234,241,205,238,216,243,229,231,228,244,227,208,235,238,235,234,235,223,221,226,216,221,227,225,241,217,242,236,233,247,229,218,243,247,234,228,237,255,250,255,245,252,231,250,246,240,231,254,236,238,250,251,231,241,244,243,251,232,246,242,234,239,250,235,218,247,255,234,241,252,250,235,243,242,227,240,250,253,237,252,208,254,246,250,232,251,234,252,254,232,255,243,241,240,250,246,240,249,249,255,241,236,235,232,241,245,254,246,254,255,213,239,235,229,235,251,247,231,227,210,251,242,235,241,254,244,248,251,255,255,240,245,251,233,237,246,234,230,243,224,254,216,236,255,236,227,244,241,237,243,248,237,233,246,241,223,249,237,254,255,232,253,236,231,250,241,235,235,251,214,240,232,223,235,231,245,239,254,233,237,234,235,231,252,248,229,235,242,234,245,248,253,246,231,252,229,253,253,237,253,255,224,243,250,255,242,230,239,208,174,189,226,224,253,242,252,239,242,196,178,184,170,91,44,188,242,240,253,243,242,239,251,219,151,220,246,253,229,245,253,253,205,225,183,220,244,248,253,250,246,235,213,219,246,227,250,230,249,227,243,251,229,227,209,248,254,248,242,246,208,129,114,122,115,103,32,103,238,240,239,233,255,237,250,254,227,242,195,193,221,254,254,255,251,233,239,238,240,237,234,238,225,238,243,149,145,240,253,239,250,243,226,226,242,242,246,210,240,210,133,194,163,145,155,66,142,107,57,51,60,59,71,157,111,102,80,163,113,53,75,19,64,102,88,138,75,18,33,56,26,67,107,74,68,41,65,40,70,217,240,233,242,255,254,253,236,254,231,242,248,242,230,244,247,229,237,236,254,236,206,189,250,207,168,157,224,236,246,193,224,227,196,237,238,245,240,241,236,226,242,250,255,242,242,251,255,253,255,250,222,243,251,194,104,92,38,0,143,225,236,247,237,215,223,213,226,214,225,227,239,232,229,222,116,54,122,238,202,226,228,218,239,214,246,221,236,207,222,223,232,238,216,239,211,208,227,238,244,203,231,180,238,238,245,243,216,229,224,210,231,231,209,219,217,231,236,222,216,223,212,247,202,204,229,220,227,191,205,219,196,235,207,188,206,213,242,229,174,141,118,78,71,65,56,67,55,58,43,23,69,46,53,92,74,58,69,33,62,36,13,33,41,65,91,70,68,68,53,45,157,210,209,229,223,218,226,223,194,202,225,194,192,212,212,201,200,214,201,210,199,214,208,185,222,222,206,216,210,205,221,229,179,204,183,200,207,198,200,117,7,0,17,13,6,38,14,3,11,22,9,25,18,1,0,14,7,12,37,23,10,12,1,12,222,216,234,240,215,188,209,211,212,221,215,203,214,217,204,202,220,211,213,235,194,211,211,228,231,199,214,208,221,210,217,231,232,202,197,223,235,227,214,199,205,244,227,210,217,230,221,238,220,207,243,216,224,214,230,220,202,214,204,206,194,225,221,220,231,235,186,208,212,231,212,224,228,227,208,210,201,224,217,199,200,229,222,237,212,210,227,204,209,230,219,228,222,231,229,222,217,222,210,223,226,228,229,236,217,230,222,241,225,221,219,207,242,240,220,234,227,219,244,224,236,214,234,246,232,246,222,227,231,216,219,239,224,234,229,249,226,236,234,228,222,211,223,220,216,214,217,214,226,223,224,248,247,243,233,250,234,239,234,235,232,223,240,243,230,210,233,230,231,244,228,208,246,241,251,255,250,237,250,249,251,249,233,243,242,254,254,250,227,251,248,232,233,242,252,226,244,242,234,241,253,232,255,249,230,241,234,237,220,225,253,240,246,231,232,244,252,252,235,236,253,252,248,229,240,251,229,248,251,255,247,254,251,245,243,244,240,254,250,246,228,216,248,251,248,243,251,228,223,253,243,255,222,245,238,237,255,255,254,251,235,249,244,231,255,250,246,251,255,246,251,246,254,233,251,242,237,229,246,234,251,232,250,254,254,222,220,225,239,253,241,248,255,230,253,222,254,241,231,242,240,249,251,252,252,251,245,251,242,249,255,246,230,235,249,251,249,251,238,223,236,250,228,236,250,248,254,248,253,242,251,241,228,244,243,183,221,235,229,224,221,245,250,247,248,211,208,167,154,101,46,200,255,247,243,236,237,253,232,220,152,200,249,251,255,242,249,248,226,219,195,215,249,254,223,227,239,217,198,224,238,248,251,248,227,247,252,231,229,230,242,255,248,223,253,249,243,152,135,141,106,105,20,135,239,249,241,252,246,253,239,244,246,245,187,207,212,240,247,238,247,249,214,225,253,253,249,247,233,252,230,194,154,225,250,236,243,218,245,233,243,247,233,203,223,198,194,253,188,179,172,60,147,101,92,69,70,52,85,186,115,87,70,135,112,76,66,72,105,101,111,109,82,64,22,22,42,54,92,66,73,68,66,33,37,220,247,248,242,239,239,241,252,255,227,244,250,229,232,223,246,230,231,224,245,248,199,205,251,253,174,158,186,236,237,204,250,199,168,230,207,237,253,253,242,213,251,254,240,254,242,226,240,222,245,251,247,251,233,218,120,142,14,6,147,222,249,234,242,239,227,230,224,228,233,214,209,227,249,231,122,46,156,241,243,220,241,254,229,232,227,221,249,238,221,254,220,203,228,236,234,233,221,219,246,211,214,225,220,207,228,231,239,216,240,213,220,227,222,206,213,222,227,212,217,225,226,200,216,224,225,196,216,232,208,209,219,193,211,240,204,219,216,217,192,186,137,148,132,99,51,81,65,92,72,80,51,78,103,56,57,86,44,54,38,41,47,41,61,56,106,70,64,54,37,46,78,184,182,203,220,183,199,212,214,205,219,187,228,207,216,204,197,223,213,222,230,203,207,210,220,196,230,200,187,204,213,202,186,196,188,215,213,216,215,117,7,1,3,27,12,3,16,10,8,33,10,17,11,6,6,20,26,8,26,9,0,7,6,21,241,213,213,199,204,228,213,211,239,226,208,210,224,224,219,234,217,214,214,228,216,204,200,199,210,210,236,224,231,211,198,223,223,239,222,220,220,220,213,236,239,231,192,233,223,241,217,237,217,227,220,233,211,219,246,225,236,210,231,226,217,207,222,220,206,231,212,230,239,211,232,198,225,215,207,189,221,206,229,219,207,239,204,223,244,214,227,214,217,205,206,232,234,231,222,235,220,215,219,241,238,228,233,213,242,232,231,195,237,218,222,219,229,227,233,220,218,221,230,204,226,209,236,231,236,232,231,202,238,243,225,238,218,219,230,237,229,227,231,224,236,219,233,245,235,231,251,212,225,243,244,237,241,224,238,230,232,242,240,227,227,229,242,243,225,225,238,225,252,204,248,254,253,234,236,241,243,249,234,252,238,247,226,255,248,253,249,236,248,240,220,227,231,229,230,227,247,223,245,247,246,243,233,247,216,239,252,255,253,252,233,253,242,244,249,253,249,237,252,245,232,241,246,245,237,240,255,245,236,234,243,243,250,236,235,236,245,228,247,255,254,240,254,235,253,234,244,199,240,248,217,234,248,240,253,241,247,252,249,247,248,246,229,239,234,237,250,244,243,236,246,249,252,244,233,243,251,245,233,247,253,249,255,243,252,253,254,228,247,240,241,248,244,255,249,241,239,238,242,236,245,248,234,238,235,235,247,242,223,230,249,247,235,224,245,249,249,234,250,241,255,252,249,251,238,230,244,242,238,236,239,250,236,241,242,226,209,251,208,194,227,254,226,247,252,194,169,164,128,86,54,168,247,247,252,223,248,243,247,248,154,198,245,245,254,255,252,240,229,231,249,243,231,255,220,173,229,230,247,241,249,247,236,246,247,223,238,250,251,234,255,255,254,252,236,227,218,159,149,116,84,95,20,138,226,252,246,255,249,245,240,251,248,255,238,244,249,243,247,253,251,226,227,253,245,251,246,254,237,250,241,180,178,228,246,253,242,249,229,230,225,246,250,216,225,221,235,251,172,225,189,102,138,99,97,47,63,53,59,195,162,84,46,155,136,58,22,59,116,102,70,43,36,14,35,38,38,50,101,104,90,62,80,34,29,221,243,245,221,240,235,244,249,240,231,230,253,241,239,255,244,237,231,240,245,244,213,181,234,250,196,169,185,220,210,230,237,217,180,192,226,249,247,241,252,249,255,240,245,243,245,251,246,228,251,236,250,251,254,222,140,108,40,3,177,247,238,255,245,240,239,206,226,206,212,218,211,229,247,215,128,72,149,230,179,239,226,217,228,241,235,232,224,228,235,214,228,222,197,228,211,218,218,208,230,204,225,212,232,219,213,209,231,213,214,205,230,209,217,234,219,226,225,216,208,227,193,209,219,203,229,222,207,229,208,210,221,217,207,227,199,213,209,232,204,158,152,95,103,71,61,43,81,71,82,96,98,127,111,65,62,73,45,48,82,81,109,79,137,113,52,58,74,31,25,40,134,171,192,212,217,209,209,186,226,226,213,207,224,224,195,205,186,218,208,193,204,182,234,216,202,232,205,217,213,220,213,224,196,200,204,213,190,217,197,119,17,12,9,19,4,7,12,13,4,29,8,7,1,14,14,17,2,6,16,14,20,11,40,14,227,224,232,199,213,228,223,233,200,234,211,189,208,223,216,225,220,206,224,194,220,223,195,210,207,214,235,205,202,222,222,231,206,220,219,211,226,227,210,221,223,203,192,223,217,242,231,207,218,212,211,215,218,208,236,215,211,225,208,209,225,225,222,211,201,219,213,198,244,227,209,208,201,232,226,231,211,224,221,211,221,191,216,223,221,222,207,235,238,218,236,241,222,235,227,208,217,218,230,200,216,221,242,245,210,243,226,237,220,216,221,222,246,252,215,246,230,239,208,225,251,207,248,229,242,230,232,246,238,212,242,219,223,236,247,245,216,229,215,249,230,206,240,221,232,228,233,239,236,227,239,216,215,247,224,246,215,245,229,235,234,232,238,235,245,238,224,244,231,246,241,241,236,229,235,244,249,228,232,249,242,226,245,234,252,249,248,229,251,246,255,240,246,242,215,242,252,220,242,250,247,233,247,249,237,236,255,245,255,244,247,249,241,245,237,242,243,253,238,253,245,240,215,232,236,223,255,240,245,252,251,243,228,241,255,243,246,243,246,250,247,248,255,240,247,243,255,254,230,251,248,255,254,253,250,222,251,226,229,248,250,254,234,255,212,251,236,251,254,254,231,244,234,251,232,248,246,246,238,237,253,253,254,249,247,234,255,255,239,254,240,230,240,238,241,252,234,228,248,253,251,243,251,231,253,180,159,185,222,243,240,243,240,234,247,238,243,253,232,245,255,230,240,242,233,248,241,245,252,252,247,255,233,246,246,190,208,206,236,201,245,225,243,239,253,199,151,145,122,81,44,190,248,242,224,230,232,246,254,233,210,173,224,255,252,238,246,255,231,249,194,225,250,227,221,227,251,245,252,252,232,253,236,250,241,236,248,245,236,227,204,237,241,239,251,230,142,106,109,131,108,116,57,134,238,248,243,251,244,253,243,255,244,245,211,236,243,243,235,252,251,248,250,255,246,238,244,254,244,239,247,182,135,254,253,249,242,247,247,242,244,251,243,226,255,251,237,248,211,255,194,96,68,36,41,11,34,33,48,223,159,68,43,158,117,27,51,27,108,85,76,44,38,69,51,38,34,62,74,90,59,56,91,30,24,220,253,255,249,246,253,243,252,251,241,244,254,249,248,250,254,254,221,251,253,243,250,194,205,244,231,187,181,134,203,240,220,191,199,192,237,234,254,232,233,246,242,249,236,249,247,241,246,250,254,246,246,252,234,189,104,71,26,44,185,236,205,218,200,241,232,229,234,204,176,210,238,216,227,249,116,50,132,207,216,198,212,208,242,228,249,233,234,214,220,234,226,242,241,219,205,218,202,200,198,223,215,218,213,214,225,207,222,238,215,233,214,216,216,212,206,220,207,210,200,211,215,212,200,221,220,221,201,210,192,212,168,192,235,232,212,210,226,186,181,102,119,110,138,91,90,68,85,80,129,161,162,126,100,121,95,81,57,64,128,136,133,116,131,119,103,77,74,37,53,144,223,225,224,214,210,188,200,202,211,198,227,217,212,205,207,240,193,200,223,193,226,231,200,197,214,188,218,219,220,207,217,186,199,211,181,206,200,211,216,110,0,13,5,8,17,10,4,23,9,27,1,5,2,7,4,4,4,24,8,0,27,15,11,27,210,206,210,229,216,215,188,212,226,227,192,217,213,218,219,215,237,224,229,219,211,234,189,198,226,225,217,216,223,208,246,233,234,218,204,209,232,213,215,231,221,231,226,201,232,220,211,225,203,218,226,188,241,225,221,189,210,226,234,225,201,232,200,220,224,205,217,220,225,216,203,233,212,236,223,215,227,220,209,227,225,203,198,225,215,233,218,214,205,225,237,243,229,220,199,222,209,204,241,207,203,211,245,233,213,241,213,210,224,241,228,219,210,221,238,198,211,239,236,212,237,249,222,227,225,219,227,226,252,241,235,223,203,240,223,226,241,223,243,212,234,225,227,236,230,229,226,238,236,228,231,214,229,249,231,244,226,245,231,238,240,243,242,222,243,240,221,247,240,240,243,255,242,249,221,240,244,247,242,237,243,248,251,251,246,245,243,236,229,243,238,237,238,247,255,248,247,248,251,226,252,254,239,248,238,224,235,238,242,250,244,251,255,243,234,250,251,243,242,251,242,255,237,238,230,250,251,228,245,255,245,253,255,253,253,244,248,250,238,248,246,248,251,253,236,248,224,247,251,250,245,246,253,234,232,242,239,255,242,240,228,215,254,226,234,252,236,252,253,252,241,225,232,237,244,236,249,229,248,252,248,238,244,255,240,250,241,255,245,243,253,254,243,245,240,252,247,238,236,249,238,243,251,249,197,189,193,242,253,242,247,247,243,219,238,238,249,226,246,245,254,251,225,234,255,251,253,230,244,255,245,238,243,250,219,172,190,232,236,233,248,250,254,213,239,206,164,139,139,116,47,215,235,249,233,235,247,244,238,249,185,183,226,247,245,230,247,255,237,178,159,207,246,247,231,222,253,246,229,249,249,238,249,253,250,237,248,255,251,248,210,156,89,85,113,72,99,143,117,158,153,168,123,136,207,188,198,186,188,199,181,153,178,138,183,230,255,255,252,249,246,242,234,218,235,250,255,238,255,225,236,220,182,221,222,234,250,253,246,230,247,255,238,246,248,253,253,199,206,229,135,40,41,18,43,40,70,41,42,196,184,79,51,128,118,61,73,27,124,94,60,81,51,31,75,26,62,45,52,57,74,49,74,18,61,202,250,252,248,249,251,229,232,228,254,240,238,238,246,254,254,255,232,219,231,250,252,204,192,240,228,134,130,144,190,234,192,183,252,248,225,250,237,237,244,238,237,218,245,246,233,255,243,191,128,91,80,110,114,83,65,38,49,52,61,141,145,157,194,232,225,231,203,217,204,214,222,205,220,236,132,41,111,241,202,214,250,225,244,216,246,212,220,228,212,226,236,210,243,220,223,207,217,242,214,221,227,226,215,212,231,219,222,221,216,237,217,215,245,237,210,213,202,190,203,210,228,216,215,237,211,216,200,223,212,227,238,222,214,209,215,225,217,218,194,131,104,92,116,104,92,81,64,56,76,150,127,145,125,115,85,74,51,49,99,137,116,81,129,101,99,86,85,47,15,170,217,227,213,202,225,191,224,224,213,222,209,197,207,220,204,219,205,231,196,209,231,201,220,210,217,227,213,211,206,207,217,203,210,202,210,193,225,191,216,111,0,8,17,15,35,13,6,14,8,8,14,27,9,5,1,0,27,0,13,0,3,4,0,15,236,216,213,217,210,209,232,207,215,229,202,219,217,236,220,222,193,210,236,209,214,207,228,209,207,212,207,245,225,206,213,234,238,201,211,190,205,194,230,203,221,222,213,240,219,234,226,196,216,211,236,214,231,230,217,225,243,221,191,220,221,206,205,213,205,203,200,217,216,206,215,220,215,218,211,210,223,196,208,190,228,231,198,200,204,215,226,210,219,234,222,220,223,217,214,233,202,217,222,233,208,237,221,217,229,200,207,248,230,229,249,225,231,226,249,220,247,246,219,218,226,224,238,229,246,245,197,234,235,235,242,242,229,208,216,231,228,245,233,228,232,214,227,222,240,232,233,221,236,211,240,225,237,245,247,244,235,243,241,242,223,236,233,233,226,241,241,207,245,246,247,245,214,224,250,252,245,253,243,243,247,230,236,248,234,244,254,245,243,239,228,254,236,232,244,244,230,248,234,241,253,245,252,242,248,250,236,252,237,255,249,235,237,245,234,241,253,241,247,246,244,237,244,244,227,243,251,254,250,248,246,252,235,253,248,234,245,255,245,250,233,220,250,243,237,235,247,251,241,254,241,230,238,232,246,246,245,238,254,238,237,249,252,251,252,250,249,233,250,236,255,249,250,227,239,249,215,225,254,247,249,253,247,249,252,248,255,242,252,244,246,244,248,255,217,246,228,252,247,231,236,238,246,249,237,224,249,240,242,233,247,247,253,242,244,237,239,250,232,249,252,245,253,255,235,244,228,253,243,254,247,245,231,250,239,188,246,251,243,190,218,245,252,225,241,218,163,185,152,83,76,189,250,250,241,250,231,243,252,243,184,198,218,230,229,255,253,234,219,191,211,245,250,255,242,249,255,157,143,232,227,252,247,232,255,206,244,244,255,246,181,84,44,48,72,109,109,133,124,170,127,147,146,112,103,89,98,114,90,59,100,100,67,40,158,242,255,239,235,252,252,239,254,246,242,240,229,247,244,249,247,223,157,217,254,245,252,237,242,228,240,247,242,249,239,239,212,125,186,177,95,70,62,61,40,12,83,49,78,176,166,75,26,149,102,62,74,48,102,65,57,64,30,29,55,44,38,19,35,73,65,55,74,37,41,239,244,244,230,236,255,254,245,243,246,250,249,253,231,244,250,255,244,228,242,249,230,224,175,241,241,154,108,159,206,243,202,214,234,227,216,216,248,254,255,243,254,250,241,236,255,247,162,86,62,32,2,7,15,20,25,59,46,44,62,31,52,46,159,203,198,209,200,221,206,220,222,232,251,216,149,65,141,228,211,210,230,224,232,218,228,220,231,220,217,235,242,229,231,218,231,223,208,227,242,212,234,231,205,228,234,204,218,231,211,228,217,205,210,218,200,220,216,200,213,222,233,233,217,205,216,215,211,209,206,228,205,209,200,225,182,196,206,221,225,171,120,100,107,144,86,70,61,33,68,89,132,156,119,109,98,43,32,47,129,121,98,83,102,98,86,97,41,28,50,159,206,184,223,230,216,193,215,218,214,206,220,179,207,229,209,204,205,221,219,207,219,233,199,208,213,205,219,213,212,191,198,186,214,230,197,201,205,207,221,99,11,2,13,14,4,7,7,24,32,4,0,15,24,20,16,0,10,14,17,10,3,6,30,13,201,220,220,202,214,195,227,238,213,236,200,218,227,201,209,229,230,188,222,216,230,233,220,202,235,209,244,209,213,210,225,219,207,209,230,216,201,205,201,235,214,222,190,222,229,236,214,225,226,230,199,208,210,226,201,221,224,236,222,217,211,200,223,232,227,241,207,230,204,193,229,218,228,204,228,240,232,199,193,224,235,213,209,234,224,232,244,229,228,198,246,222,236,198,231,229,229,229,223,216,221,219,225,210,201,214,219,227,232,227,217,239,253,229,200,221,242,236,218,230,241,231,231,229,227,211,213,225,211,239,247,233,252,243,246,217,234,245,227,240,207,229,245,240,214,220,242,223,221,224,248,230,234,232,247,220,236,246,214,238,243,226,246,221,249,243,229,240,249,234,233,237,255,250,231,223,250,242,241,236,241,250,255,250,232,239,234,232,249,246,248,222,255,248,246,236,255,239,255,216,235,228,230,243,255,252,243,252,251,245,245,254,249,248,239,247,251,252,255,245,222,246,247,253,246,255,217,247,236,253,255,229,242,234,243,253,235,243,237,236,236,255,247,242,234,236,233,235,249,255,249,254,232,231,234,245,242,251,252,232,235,243,244,223,239,245,252,255,255,254,247,252,255,243,255,224,247,215,253,250,253,248,244,248,249,247,247,255,242,237,222,249,249,233,241,232,243,248,243,247,253,240,252,243,249,239,254,224,210,255,251,252,255,244,252,249,250,236,249,248,255,254,246,244,246,254,250,231,245,230,252,225,233,255,243,212,231,185,168,199,239,244,255,242,222,205,195,148,130,82,34,203,255,252,241,243,236,234,242,232,198,189,218,241,252,230,254,231,231,208,228,234,237,243,209,195,239,241,207,252,219,247,247,234,236,247,245,254,240,242,202,103,99,83,70,102,69,76,72,69,85,46,49,88,91,73,76,101,78,78,80,76,66,62,173,243,239,239,249,254,248,236,225,232,252,240,231,237,253,255,253,249,152,207,244,244,248,232,236,241,255,224,240,247,238,236,154,112,212,236,164,69,63,97,62,86,85,50,87,171,153,68,33,134,84,52,56,90,128,82,54,64,34,59,40,38,55,23,40,61,72,35,50,22,34,216,255,240,235,219,245,240,250,229,229,240,255,246,252,235,245,252,242,253,234,234,243,215,168,215,233,241,179,217,236,248,249,217,249,239,193,207,241,249,251,242,253,235,213,247,228,203,124,41,76,54,50,24,42,52,46,58,53,77,47,41,15,0,116,198,197,214,207,198,216,204,216,238,250,234,137,71,165,228,219,239,249,235,239,235,239,230,218,231,229,202,235,228,236,242,215,223,221,223,239,226,228,222,213,237,233,244,212,229,219,233,231,206,208,227,204,223,208,225,198,210,223,204,223,196,236,206,192,205,225,231,202,192,193,226,209,198,216,228,185,173,126,109,87,108,82,80,26,53,52,99,122,72,106,83,89,64,37,50,130,87,99,94,95,77,82,54,67,35,67,171,184,185,236,225,201,199,205,194,216,228,216,226,211,210,223,202,201,210,210,206,211,197,221,216,223,239,241,222,206,219,206,210,204,226,192,200,196,203,221,92,2,3,2,24,0,17,3,28,18,6,21,28,9,7,4,3,10,6,20,6,17,21,26,1,209,203,237,192,221,214,214,221,211,237,223,199,220,210,229,199,216,203,226,219,220,212,202,234,198,207,221,197,223,234,237,208,209,230,191,210,220,217,238,234,215,224,221,235,224,233,197,193,217,227,217,216,215,212,212,215,221,230,216,214,208,198,231,224,214,216,226,209,193,228,221,222,203,203,209,218,191,206,226,201,236,203,212,216,219,226,222,237,219,229,197,211,243,229,215,225,210,223,236,221,242,239,242,211,238,222,218,222,227,247,232,234,244,222,235,224,207,233,230,233,227,215,217,207,247,240,227,235,240,230,237,238,220,238,236,227,222,212,241,236,246,242,233,220,228,220,225,251,223,245,225,218,246,241,216,233,222,229,244,235,228,219,224,223,235,255,243,232,234,235,226,245,240,246,247,251,237,231,245,221,241,237,238,250,215,235,235,228,232,242,232,245,223,255,240,233,254,245,253,244,240,246,243,247,253,249,241,242,245,232,243,249,243,237,250,247,229,243,252,235,251,231,227,246,247,247,250,248,227,254,236,240,252,248,250,247,243,241,252,228,246,250,224,250,253,235,234,254,245,245,245,244,255,251,235,236,246,230,238,251,251,227,246,247,255,255,229,255,236,245,251,253,235,244,229,253,233,232,241,238,230,254,232,220,241,224,240,241,251,251,224,240,250,255,223,251,247,252,238,237,253,239,240,238,232,242,251,247,245,249,247,249,243,247,233,231,241,235,238,247,254,250,223,243,244,242,236,255,239,247,248,244,245,225,210,179,202,211,230,221,255,244,241,225,245,200,160,170,136,72,33,214,254,241,253,232,252,239,245,213,154,194,229,231,231,252,251,253,232,191,193,234,226,237,205,234,238,220,235,246,230,249,245,234,255,234,238,242,247,242,190,122,104,73,57,36,72,63,49,57,66,20,46,29,33,44,34,40,81,33,64,57,46,51,66,116,231,243,223,242,254,249,243,232,240,228,253,246,237,249,231,237,130,157,253,251,255,237,238,242,226,204,226,178,144,200,172,226,251,243,166,109,69,93,54,29,83,60,66,172,127,47,36,130,63,36,60,55,120,84,55,39,37,49,60,43,42,39,29,70,60,59,41,29,64,211,249,252,243,239,250,232,252,236,252,248,255,244,242,233,242,251,255,230,244,231,239,253,161,216,251,247,164,184,231,240,254,195,217,234,163,183,187,240,255,235,254,255,253,243,247,149,82,54,76,68,63,72,72,69,58,76,64,53,70,73,32,38,160,218,239,218,181,168,176,191,228,234,255,222,150,50,155,229,202,235,246,228,248,238,231,238,236,213,249,226,221,231,225,220,214,221,250,200,218,231,216,222,220,237,227,225,210,226,217,217,230,231,215,233,201,213,198,207,222,219,207,220,204,200,215,207,205,201,212,231,222,229,209,189,200,191,204,218,211,149,120,60,67,41,42,45,71,56,71,131,119,113,90,68,31,66,39,58,123,118,101,89,71,86,51,42,49,14,88,196,220,207,205,200,225,215,215,225,206,207,209,207,207,223,201,216,214,202,215,225,210,213,216,239,205,208,199,197,202,217,202,199,196,199,213,192,196,196,220,97,12,7,6,6,8,30,19,2,8,26,27,9,4,22,3,6,11,21,16,11,18,11,28,10,213,220,198,189,220,211,214,213,201,223,217,189,205,228,234,218,199,210,227,207,206,223,228,199,228,229,188,219,219,208,212,237,218,211,219,213,224,212,226,245,227,224,235,238,237,228,199,244,216,223,230,191,241,230,221,223,214,210,217,205,201,210,232,211,224,232,230,224,236,199,223,215,212,221,235,240,244,227,216,219,211,224,218,236,222,200,234,207,228,238,235,213,219,213,213,228,240,220,238,233,192,229,209,229,229,207,205,243,218,210,226,237,218,233,238,217,245,203,219,238,222,234,231,224,252,231,238,233,232,212,223,205,246,240,223,205,226,219,232,216,207,229,223,227,244,242,222,241,215,236,212,236,240,235,223,204,232,229,243,233,247,238,228,240,226,232,229,232,224,242,241,236,233,223,242,245,242,236,249,243,235,231,254,246,239,248,223,242,228,251,250,253,241,252,248,240,212,249,240,227,232,248,243,248,243,253,244,226,247,218,248,243,242,241,245,252,245,253,252,245,243,253,251,235,252,221,254,249,229,255,225,243,255,242,248,236,255,252,254,247,255,252,239,249,234,249,229,252,232,247,245,245,247,242,226,237,238,235,245,221,246,254,252,253,253,255,243,243,240,240,233,239,246,250,231,239,248,232,240,236,226,252,242,255,255,247,243,235,239,240,233,237,243,229,236,252,243,242,237,233,232,249,226,241,247,240,251,247,247,251,229,238,245,246,242,254,231,237,245,240,255,251,255,240,235,242,250,240,247,240,221,236,245,244,176,195,247,237,213,211,229,253,255,240,255,173,150,154,136,71,50,218,234,230,237,214,249,244,249,242,205,190,234,223,240,249,246,235,230,185,155,249,253,255,245,232,248,244,225,236,238,237,238,232,254,248,255,239,244,254,154,108,111,64,65,54,66,60,64,76,65,36,66,39,43,54,49,50,66,49,74,55,72,12,45,152,241,239,245,241,232,246,246,242,255,250,233,248,252,243,253,220,154,194,253,254,232,246,235,242,231,180,230,214,222,217,216,232,255,166,154,152,119,140,53,59,102,101,66,154,158,97,46,109,98,77,56,49,105,58,62,56,53,58,58,65,40,55,51,63,63,36,64,21,61,219,254,239,244,224,234,239,221,251,243,242,243,249,253,233,235,252,246,233,239,212,241,252,201,188,249,237,184,163,232,215,243,193,229,201,198,189,199,253,254,227,250,222,240,250,225,140,99,67,81,84,80,99,62,58,53,61,41,63,58,58,38,42,225,216,215,230,201,182,199,198,198,191,237,255,158,67,168,205,200,232,236,225,209,207,231,235,227,225,222,212,225,239,202,227,210,231,231,217,201,223,235,220,208,221,225,234,214,215,219,216,230,227,213,229,225,222,215,229,233,191,222,219,215,237,209,215,212,202,228,214,192,219,206,192,219,218,240,231,203,125,111,68,84,66,60,78,29,34,104,155,104,114,74,59,30,44,48,77,104,89,126,93,68,75,64,46,28,18,97,201,198,218,221,199,206,227,219,201,207,217,197,210,228,215,222,229,217,212,212,207,216,221,219,217,209,190,209,209,213,227,230,233,221,199,205,205,211,206,202,85,21,6,7,14,5,24,3,1,7,27,5,21,3,13,14,3,12,26,35,5,1,24,17,7,207,236,219,230,191,216,214,216,220,221,212,225,225,228,209,233,200,219,223,226,226,207,249,231,232,227,212,230,219,214,234,234,232,187,222,230,216,238,238,211,217,235,209,215,240,235,211,230,219,209,222,223,225,217,200,218,236,209,208,212,220,228,227,239,218,233,218,243,234,247,200,216,234,210,223,239,224,234,215,206,223,219,238,234,213,240,205,214,214,230,186,188,232,218,234,217,218,222,197,245,225,230,193,231,200,217,205,228,206,219,247,219,218,229,206,221,234,219,210,241,246,210,216,230,234,209,250,235,230,234,213,233,208,222,231,218,218,214,234,230,237,247,244,227,232,252,230,250,242,241,239,248,225,227,237,235,240,224,240,241,247,233,235,247,231,237,230,241,224,246,223,229,236,249,221,249,239,232,227,243,233,247,253,255,243,239,243,223,230,238,244,250,242,243,215,247,252,250,239,249,233,224,228,255,249,240,253,245,247,230,251,250,236,245,244,251,247,248,241,243,236,231,248,253,232,234,255,255,250,242,249,255,240,245,242,235,248,252,238,241,228,247,252,251,248,253,251,240,250,255,230,225,251,234,252,248,248,240,235,253,247,218,240,250,248,235,255,225,226,247,225,240,228,241,243,233,238,247,247,213,246,229,249,250,247,252,222,243,240,232,249,247,254,253,233,250,245,255,251,238,229,253,249,251,241,223,248,225,244,234,234,254,233,253,223,255,254,236,247,251,239,240,243,234,253,244,230,242,242,254,248,218,245,247,242,237,253,246,239,183,231,239,227,224,244,189,182,182,142,87,72,243,229,251,247,222,237,243,232,247,224,183,204,245,253,246,252,217,210,205,193,238,250,244,196,243,251,242,229,243,246,251,247,243,245,247,245,244,244,249,153,128,103,69,48,61,94,74,70,49,66,98,84,71,46,79,30,91,91,47,107,88,73,33,122,220,239,243,241,230,244,249,248,225,242,231,241,245,253,247,211,220,156,144,235,250,247,250,218,253,236,234,252,220,253,247,249,214,149,112,166,243,186,134,100,73,123,78,96,145,183,96,128,130,132,122,68,68,105,49,49,75,45,55,43,51,42,19,61,64,76,69,74,14,48,226,247,246,250,244,252,252,252,233,227,225,245,230,254,234,253,251,227,246,230,243,237,251,203,179,242,250,174,164,221,208,196,231,209,232,234,207,213,229,249,254,231,250,242,246,230,120,92,97,66,89,89,70,29,37,59,46,65,49,51,64,31,47,180,230,241,218,230,222,211,209,192,245,244,222,122,84,145,213,218,245,206,244,220,212,230,219,228,215,217,218,230,217,235,222,210,213,214,206,221,231,218,231,202,224,224,233,188,202,238,210,243,223,224,215,234,223,239,192,190,215,201,217,211,233,219,212,221,224,208,182,242,235,199,197,248,208,218,226,190,148,96,76,91,98,45,38,60,50,134,162,112,121,66,66,35,44,18,102,130,107,85,83,61,54,52,54,56,38,169,205,208,199,217,197,217,222,209,209,207,224,213,234,212,220,228,217,216,202,203,225,222,223,206,203,220,213,206,216,215,226,212,200,209,205,202,225,207,198,189,109,2,0,7,2,7,22,14,18,4,7,18,29,0,2,19,17,17,4,10,21,10,40,5,1,209,216,198,201,191,212,205,198,209,226,218,208,197,217,212,236,204,208,196,220,201,214,215,218,228,198,181,196,208,214,233,214,193,215,235,222,230,225,229,201,242,192,208,215,221,224,216,228,216,209,215,215,219,220,212,211,187,219,225,221,226,221,244,225,225,245,217,219,242,242,219,222,213,221,229,225,242,210,222,227,214,235,240,219,219,207,238,202,237,238,229,228,246,204,206,221,219,226,216,213,237,224,200,223,226,234,238,225,240,218,235,230,221,221,214,250,232,247,220,245,230,226,237,218,240,206,220,227,211,207,243,228,215,226,242,249,227,236,200,222,230,238,247,222,228,238,234,250,231,236,233,247,219,236,233,232,226,245,214,248,212,210,246,225,227,231,232,226,249,252,231,238,216,237,246,220,255,224,247,225,227,229,241,240,235,237,247,244,227,234,246,253,244,242,245,244,238,255,249,248,249,251,250,241,252,242,254,233,247,255,249,255,251,252,234,255,218,238,218,247,250,238,243,245,252,249,244,247,250,237,244,252,229,248,241,252,223,254,252,252,255,244,234,245,249,252,250,250,241,242,243,238,252,249,227,253,246,244,247,253,255,251,242,240,238,242,242,231,250,255,220,244,248,228,243,240,244,243,235,234,231,246,237,247,233,235,240,246,253,243,236,254,254,255,245,239,251,250,252,233,248,253,237,249,237,228,251,241,230,238,232,249,238,239,250,251,235,251,247,233,245,246,249,246,245,240,244,229,233,251,242,222,230,230,199,243,240,229,253,206,234,251,236,242,246,192,169,155,111,49,86,236,253,250,232,245,240,239,253,244,246,179,190,242,234,253,242,237,215,185,235,247,255,236,246,251,229,186,192,238,238,251,237,233,244,238,229,241,230,253,170,155,126,51,65,52,40,36,30,48,71,60,71,35,52,51,63,85,97,66,44,49,71,15,176,240,255,253,238,242,241,247,239,252,236,255,248,231,236,255,232,241,176,121,233,245,239,242,221,244,250,252,229,255,232,248,208,181,101,92,202,246,218,190,131,96,101,109,49,139,188,125,83,85,130,122,72,107,137,106,102,84,31,34,48,52,58,7,54,79,79,47,60,27,51,200,246,244,254,243,231,253,242,235,246,228,254,240,244,244,238,251,235,248,225,239,246,248,217,179,240,249,232,183,195,155,167,223,235,205,230,225,174,230,240,253,249,254,235,245,206,106,62,87,75,100,76,46,16,53,49,32,28,60,45,52,24,43,198,223,233,250,229,252,225,240,243,240,248,228,113,70,176,222,185,237,216,255,217,235,231,214,222,218,204,231,223,238,219,238,217,219,217,220,214,223,214,217,237,237,217,236,200,208,212,230,212,237,197,219,234,194,214,187,209,225,211,222,232,228,225,193,234,212,237,219,206,222,222,202,210,191,217,212,222,143,73,71,76,93,81,50,94,74,136,145,109,116,72,68,54,41,70,128,120,95,136,104,102,57,51,13,32,113,187,234,226,223,208,214,222,219,238,209,227,232,210,202,214,205,204,231,220,220,197,192,184,221,224,189,201,206,224,211,170,196,192,202,222,216,216,194,227,189,205,106,8,13,4,27,2,6,21,3,11,12,10,17,13,17,15,30,25,2,7,20,7,38,13,15,217,241,220,199,217,209,217,197,214,205,219,227,204,225,211,211,219,210,205,217,214,225,219,234,179,225,210,234,225,201,223,223,217,224,222,228,220,229,231,202,217,223,242,206,223,214,217,237,233,214,225,212,219,235,219,215,200,221,240,243,229,225,243,205,215,235,209,212,201,212,216,212,236,212,202,230,210,220,234,221,222,231,209,206,210,235,226,206,191,241,228,233,228,217,229,222,228,208,209,232,231,188,211,215,209,234,211,221,227,200,228,211,234,235,205,249,235,222,232,235,243,233,221,202,231,245,231,233,245,237,214,228,221,229,211,244,244,230,238,243,237,234,243,239,225,210,233,248,235,240,236,229,218,235,219,236,252,253,237,228,204,238,232,236,245,242,224,245,237,243,250,235,239,239,245,235,236,211,241,251,226,245,237,249,253,242,242,241,247,250,229,251,230,255,253,225,254,232,232,236,255,223,245,228,244,245,237,203,253,220,251,252,246,226,248,224,254,234,253,240,243,242,245,247,239,217,232,252,237,255,252,249,252,246,227,249,221,230,252,237,249,255,250,255,227,251,255,255,231,235,217,239,241,226,249,250,255,248,237,238,233,237,242,242,241,245,239,250,236,240,255,242,253,247,224,242,235,230,251,243,253,231,251,252,219,247,255,245,249,230,242,233,220,248,252,241,228,253,235,224,246,226,227,240,243,255,226,229,237,235,247,254,240,242,236,239,254,241,243,248,246,234,246,248,236,232,243,238,252,250,237,248,238,232,184,202,234,229,228,232,237,235,203,223,247,174,158,147,125,62,83,235,251,250,255,232,237,224,255,246,219,199,200,242,221,255,244,221,214,222,242,255,238,221,227,224,195,176,220,233,229,246,250,253,227,249,228,225,242,245,172,127,125,112,49,66,67,63,70,43,44,61,46,51,42,48,45,95,77,55,75,53,48,31,165,236,254,241,236,255,221,255,246,234,238,236,240,242,251,252,226,248,201,113,244,240,250,233,223,241,225,238,249,234,223,178,172,177,149,144,199,248,228,117,87,69,78,87,62,138,159,89,80,88,95,111,70,108,174,145,92,62,50,64,47,74,44,33,67,89,65,66,55,37,57,186,236,243,239,250,252,234,224,236,247,234,242,243,221,255,231,244,250,250,213,245,247,248,215,190,217,248,217,104,103,167,223,239,230,164,209,224,213,229,238,248,255,250,224,124,115,84,105,82,86,103,79,74,49,53,36,34,69,35,29,53,31,46,189,223,214,241,240,245,231,240,251,251,239,207,102,93,191,214,211,239,205,210,230,226,222,198,221,218,190,197,200,237,219,214,214,241,231,211,191,223,218,216,239,204,217,204,208,231,219,206,217,222,191,220,235,202,220,233,216,211,213,230,197,231,218,217,206,207,201,198,185,214,212,217,200,204,191,235,190,177,140,78,75,72,83,36,69,65,105,157,156,135,109,66,48,48,73,146,134,136,133,87,68,36,54,14,93,201,240,226,215,194,208,186,202,205,202,215,220,211,214,208,215,213,220,189,197,197,217,231,227,201,189,193,213,208,208,198,224,226,216,201,187,217,216,232,217,208,227,82,1,1,32,7,21,14,8,6,13,7,11,13,2,0,26,7,3,16,7,5,3,7,13,14,205,220,213,216,211,226,221,230,207,203,228,224,225,238,204,223,225,185,208,212,199,212,240,214,228,226,211,228,230,246,231,219,207,214,216,211,226,206,232,225,217,242,228,219,194,214,218,209,204,222,228,225,229,220,233,212,197,227,216,232,226,218,203,211,225,225,234,220,226,234,204,214,230,226,194,219,233,203,212,243,214,213,232,227,224,223,201,229,202,197,209,222,221,225,221,243,237,224,217,242,233,235,215,225,212,225,215,228,200,215,235,226,237,216,206,222,227,217,225,231,230,231,229,220,215,222,232,226,218,217,222,234,236,236,224,221,246,233,210,223,207,233,233,240,254,240,240,242,250,228,224,247,232,232,253,230,245,249,223,228,218,236,249,229,244,231,212,239,234,235,235,215,251,241,249,255,247,233,251,252,234,238,223,236,253,243,216,242,246,245,237,234,245,245,220,235,250,249,255,245,245,235,243,228,253,254,252,238,241,246,247,228,233,249,250,248,251,253,227,247,245,254,255,240,253,248,237,250,252,250,249,251,243,234,241,252,247,243,252,248,252,253,253,242,250,254,242,240,236,251,249,255,249,242,251,234,249,236,252,236,234,247,243,248,247,235,250,248,253,255,232,249,246,237,248,251,245,236,219,251,244,251,224,254,250,242,241,245,231,251,241,251,239,250,251,253,253,243,231,237,250,243,238,247,249,253,224,244,246,240,249,250,230,252,218,231,246,233,252,254,253,231,253,252,225,246,250,249,252,255,249,208,234,234,199,241,249,253,242,214,228,235,203,206,241,212,149,161,117,54,74,237,255,252,232,231,255,250,246,241,225,194,207,246,246,245,237,203,210,201,208,210,232,190,185,223,219,224,227,237,236,237,240,250,248,235,246,235,245,180,122,115,119,91,33,52,49,27,27,38,40,49,61,31,39,48,55,74,55,57,63,65,36,52,170,232,244,255,245,253,252,245,237,255,232,249,224,252,234,250,241,222,233,155,224,243,235,247,242,249,249,238,212,187,111,149,204,230,173,174,238,245,118,94,64,52,58,85,67,141,145,102,98,76,119,79,93,133,145,118,62,45,55,91,88,82,46,49,74,57,73,71,57,32,37,234,243,249,246,240,255,242,239,246,235,246,249,255,248,253,224,251,248,234,217,210,241,248,230,172,193,235,217,174,152,205,240,249,238,173,194,246,209,208,234,228,244,238,121,82,82,51,80,80,91,89,78,60,50,51,36,55,46,24,31,37,15,47,226,236,235,224,226,201,216,228,233,246,239,210,90,119,192,202,207,212,219,231,233,225,239,230,224,210,225,218,237,212,213,214,210,223,227,234,241,211,239,212,203,230,201,241,211,220,214,191,240,195,223,209,207,235,226,200,214,210,198,225,215,214,213,210,229,214,226,225,199,194,216,214,241,206,224,195,219,209,136,100,116,75,43,71,93,56,139,178,149,125,85,67,56,37,107,125,117,133,109,69,52,62,31,51,167,215,227,201,213,218,205,196,197,173,209,216,221,201,203,211,225,210,196,222,197,209,210,215,209,223,223,221,233,228,207,219,191,213,215,206,227,207,205,211,199,200,212,109,14,6,7,0,7,18,24,3,37,10,5,48,7,0,8,9,7,2,13,24,27,20,17,3,197,223,234,233,227,204,223,223,214,225,220,203,215,198,209,216,196,199,207,204,236,201,203,217,213,215,213,218,219,224,218,245,192,213,204,222,233,226,197,217,222,234,211,211,220,217,226,216,227,212,213,214,226,222,242,227,225,210,204,221,222,200,212,201,205,207,242,237,207,201,217,231,223,223,239,222,202,209,224,233,227,215,227,248,225,226,218,205,224,205,218,208,227,226,216,202,215,211,231,219,216,223,216,223,203,230,216,208,241,232,218,228,224,214,233,217,204,201,244,221,199,227,227,234,224,233,223,207,227,223,235,201,216,219,226,237,212,227,226,239,217,234,222,221,241,233,234,234,216,235,243,242,254,225,230,249,242,238,227,233,235,212,208,215,249,233,229,221,235,238,228,223,234,207,246,217,243,227,235,234,245,216,224,245,240,232,250,241,232,236,249,252,245,237,237,241,250,255,237,233,243,231,250,218,249,236,242,254,245,239,246,245,236,250,225,228,243,250,242,249,200,230,245,241,240,240,249,251,230,252,255,235,255,240,245,230,246,247,252,239,249,251,241,255,253,229,244,251,239,249,249,255,244,237,253,246,242,230,255,211,242,255,225,219,228,254,239,230,250,247,239,235,250,221,248,213,242,255,247,246,246,252,253,254,227,241,233,249,245,247,253,252,214,225,239,242,237,226,255,250,245,239,240,240,248,243,239,230,241,242,238,254,242,241,229,242,226,254,248,251,231,231,236,255,226,249,244,243,229,246,233,221,248,233,217,250,219,198,217,222,252,228,219,199,243,197,179,169,137,79,100,235,242,251,251,242,252,251,233,253,235,219,208,244,226,249,244,211,193,145,205,222,226,222,234,243,236,223,233,242,231,246,236,246,231,226,249,243,190,144,98,110,78,24,16,29,27,28,40,21,19,9,29,37,18,35,51,81,67,60,72,82,40,110,244,244,227,241,224,219,253,244,247,254,250,250,247,245,235,219,246,230,211,138,215,250,255,250,208,234,241,186,160,194,193,231,200,193,198,183,243,229,169,134,126,77,63,77,81,132,196,135,81,70,130,105,107,130,138,115,63,55,66,62,88,65,50,28,64,89,48,45,61,40,34,222,245,242,246,245,243,223,231,226,239,219,243,255,244,253,242,237,222,224,251,224,230,253,252,230,187,239,249,233,150,220,217,227,245,218,205,239,201,184,217,233,252,236,124,52,57,41,105,94,94,82,80,64,36,30,35,40,31,54,28,74,48,75,237,233,236,248,226,220,217,213,238,224,221,182,86,132,208,207,202,228,233,243,238,222,225,244,216,222,225,228,227,226,220,253,222,222,237,216,228,219,193,229,210,207,210,227,201,217,231,214,199,176,227,180,193,214,216,224,206,205,201,194,200,222,210,228,204,222,216,212,202,204,215,213,182,222,207,219,243,220,148,106,116,119,73,79,66,46,123,161,117,124,89,54,38,35,124,151,94,113,99,70,64,33,24,138,212,218,213,199,214,187,234,223,227,191,215,236,207,207,212,227,226,216,230,211,195,224,200,229,207,212,210,213,238,202,212,222,227,206,213,216,213,225,189,200,236,187,198,102,18,1,26,23,31,3,14,18,7,28,22,2,18,15,5,6,5,0,20,5,15,13,5,17,215,214,209,209,212,223,230,228,211,209,197,192,206,224,240,244,203,218,230,238,221,217,212,215,215,209,202,215,194,233,228,203,245,229,227,235,208,232,229,239,208,198,243,247,214,228,217,210,216,241,221,213,238,203,207,217,219,233,210,214,220,215,213,226,233,217,225,236,223,237,206,215,207,234,249,226,233,229,222,224,246,235,222,204,228,213,223,209,236,210,201,204,236,222,227,221,201,219,229,233,217,214,215,208,224,201,225,232,220,233,231,228,216,238,219,235,223,252,233,226,209,210,238,221,246,237,247,230,226,244,237,219,216,241,225,202,244,211,216,211,218,238,224,245,243,247,227,223,228,235,245,244,230,235,239,226,224,249,243,240,236,255,224,231,223,218,232,234,230,243,228,231,229,212,251,226,231,239,249,220,244,219,201,240,216,240,240,226,223,249,244,246,245,225,235,250,251,225,243,245,249,240,240,236,242,249,238,239,249,248,241,238,255,246,237,251,235,237,255,250,239,253,233,245,234,255,227,228,250,243,240,225,253,221,238,250,238,253,239,250,243,251,238,252,246,246,243,235,248,240,242,247,253,241,235,252,252,253,255,231,233,254,245,247,252,243,255,250,255,242,237,226,233,246,234,213,251,242,248,236,241,249,255,255,253,234,235,231,245,245,253,250,237,235,232,228,236,239,248,221,252,254,254,253,246,223,242,218,241,244,251,255,239,247,252,248,227,251,249,228,249,254,255,237,248,251,215,219,224,247,219,228,253,224,203,215,203,236,230,248,224,233,211,227,255,192,168,155,126,90,85,229,247,242,230,234,244,211,222,241,226,198,173,219,216,255,247,203,149,173,251,249,247,211,241,253,206,203,225,242,247,249,237,255,244,219,222,233,188,145,104,94,57,18,23,34,56,30,26,26,14,44,56,43,23,26,51,76,75,56,83,60,91,214,254,250,254,245,244,232,213,255,220,240,255,225,253,251,236,248,215,199,173,116,181,245,237,225,201,207,210,167,220,234,242,233,205,207,219,223,254,249,199,115,107,74,71,65,58,106,174,144,93,107,153,131,106,140,134,108,79,64,64,75,73,75,71,68,40,39,48,67,63,6,78,233,241,240,219,252,243,219,222,228,242,239,247,244,246,241,241,243,252,254,249,233,251,243,239,182,174,231,255,226,174,173,232,198,247,196,206,234,208,172,212,235,248,239,132,101,57,63,87,106,119,115,55,46,53,41,39,43,48,22,17,68,33,81,251,240,250,252,212,221,230,239,234,250,239,163,68,154,198,191,229,224,225,227,206,226,208,221,215,242,207,226,236,225,230,222,208,206,235,192,221,222,200,203,212,213,222,233,222,225,204,210,210,214,217,213,213,203,203,197,208,200,214,215,210,213,234,209,215,212,204,203,216,192,213,214,202,201,183,239,246,197,160,110,88,112,94,91,73,69,145,136,130,132,91,51,63,61,123,129,143,134,89,90,57,46,41,192,230,222,225,217,222,223,197,205,193,209,188,230,207,216,217,213,220,218,232,193,217,214,202,214,210,219,195,201,213,211,232,199,217,204,206,216,233,216,221,216,206,210,203,105,7,7,23,14,11,13,8,42,8,9,18,36,9,7,0,37,19,16,24,0,16,22,9,15,203,220,205,211,230,207,241,207,207,196,214,205,223,213,224,219,229,211,206,216,207,230,205,217,199,221,210,218,202,211,235,240,238,227,236,223,209,229,201,203,229,221,210,216,225,225,219,216,229,220,211,214,216,230,229,215,219,231,232,223,217,221,218,202,235,216,203,218,231,212,198,231,206,214,202,220,236,221,243,235,227,219,220,223,229,232,235,203,218,202,214,213,231,208,226,230,244,220,234,215,226,236,242,212,208,217,230,242,229,223,233,249,221,229,217,223,227,220,221,231,224,209,216,233,218,230,228,233,224,239,226,235,241,243,225,240,230,233,206,208,202,222,215,237,245,222,239,235,211,230,226,241,240,233,219,214,233,227,219,238,221,237,236,249,238,239,215,229,228,239,221,245,241,220,245,233,236,224,230,228,214,248,234,240,234,247,240,226,246,246,212,249,214,248,247,248,251,241,243,243,228,252,246,248,240,228,249,242,239,237,228,224,244,253,227,251,251,237,252,216,241,250,245,238,245,244,232,243,246,230,233,251,221,204,251,245,244,251,242,241,255,251,253,242,242,230,253,245,247,233,255,255,249,244,226,249,209,244,234,246,214,252,252,253,247,220,245,245,240,231,244,244,242,254,219,253,234,244,250,254,250,234,254,255,251,238,248,245,222,250,254,252,241,243,254,242,244,224,230,235,236,235,224,241,242,241,255,223,225,244,233,234,240,232,238,235,214,249,249,237,239,244,253,245,255,235,254,232,224,251,232,234,248,202,208,234,222,254,239,202,237,221,211,215,242,146,132,132,120,85,98,230,252,246,236,227,244,240,226,241,220,196,184,224,251,234,215,168,195,201,251,247,245,206,237,255,208,212,233,250,239,253,243,241,252,233,233,234,194,131,101,56,24,10,30,30,17,20,48,21,27,32,33,14,7,24,58,81,85,75,43,38,155,240,250,255,224,242,236,247,251,243,233,255,231,254,218,237,242,252,246,227,214,116,178,238,255,234,204,198,244,216,252,251,253,224,182,225,208,202,236,250,188,158,134,111,51,92,58,114,202,172,90,82,131,109,74,80,147,102,108,64,49,63,60,79,91,82,68,78,87,54,81,36,52,224,253,240,254,225,245,239,225,243,246,247,250,252,239,250,246,217,235,234,242,208,231,234,252,228,157,206,219,224,188,143,201,230,209,233,183,209,224,187,198,225,252,215,123,113,71,108,106,111,108,67,60,67,59,41,19,37,46,31,15,52,11,79,230,236,238,242,228,212,236,231,226,225,206,146,64,145,221,197,219,222,235,245,229,241,233,240,227,210,232,218,201,208,205,226,216,206,238,196,209,212,227,215,224,220,225,219,195,209,225,216,202,221,180,193,219,203,198,196,205,221,195,217,194,176,224,191,216,205,204,225,216,183,198,217,205,238,192,223,223,207,188,147,125,90,99,105,89,81,135,144,130,141,73,55,78,99,129,103,138,122,105,104,77,29,105,225,199,203,203,210,214,221,214,192,207,239,203,229,206,220,186,218,198,218,214,197,215,201,225,213,235,238,198,210,225,201,215,200,226,226,185,203,208,215,204,198,194,222,221,116,7,2,10,19,10,11,10,4,17,23,25,19,21,2,0,5,36,28,14,8,0,2,13,4,233,231,207,200,226,193,215,229,209,218,215,195,215,221,211,205,218,220,219,212,215,202,225,227,233,224,214,236,235,208,237,211,211,213,232,210,241,193,223,201,226,216,228,218,208,225,220,204,225,214,238,233,226,242,226,219,225,231,214,220,208,231,217,210,229,218,237,206,227,231,203,229,236,208,223,215,241,241,190,208,227,235,222,238,209,227,246,234,217,222,238,229,235,199,229,226,219,235,217,225,219,215,219,217,231,221,218,189,240,221,226,224,230,232,225,216,243,238,238,239,229,229,224,206,212,215,221,236,229,215,237,246,228,233,216,222,224,228,218,236,237,225,225,224,221,245,246,230,245,244,243,221,245,246,231,246,243,219,238,251,236,226,218,230,246,239,232,246,235,249,230,253,239,237,245,249,245,253,244,254,245,239,242,249,237,245,252,246,236,252,243,242,251,246,246,245,251,246,253,219,244,245,243,254,245,236,248,247,252,253,234,255,254,248,246,255,240,240,249,252,239,250,255,240,252,228,254,241,246,237,232,234,245,250,254,248,223,228,242,223,252,254,232,251,250,235,244,216,242,235,232,245,229,228,243,250,255,239,220,243,244,246,247,214,239,237,253,250,233,239,252,255,248,229,214,246,239,244,243,226,231,248,254,239,252,216,240,246,255,242,255,241,254,238,243,252,246,242,237,216,252,243,233,254,227,244,247,238,236,247,250,243,253,249,239,245,249,254,236,234,237,243,236,244,238,237,248,241,236,245,243,219,236,220,221,236,243,244,205,216,238,252,213,207,237,178,137,153,139,81,120,217,247,230,248,230,243,244,232,253,242,210,170,235,254,245,203,180,212,234,241,254,243,200,240,249,225,177,249,249,226,249,248,255,228,246,219,248,170,99,67,32,8,26,17,37,30,20,25,31,26,48,47,54,45,49,49,67,51,42,61,33,175,235,247,232,223,241,236,251,246,228,235,239,234,236,255,245,237,231,255,244,235,175,152,252,251,201,202,249,243,245,255,249,242,163,74,151,170,220,240,254,202,118,129,91,52,89,122,149,146,112,57,47,70,72,36,97,119,146,99,68,46,80,65,62,83,86,62,89,93,35,52,3,60,217,237,238,248,251,245,254,242,213,243,250,243,253,241,242,243,247,205,233,241,247,234,241,255,220,184,198,242,242,208,169,182,219,168,219,210,218,200,217,187,210,252,245,99,123,55,66,72,71,43,55,10,42,35,34,26,37,30,18,29,47,15,87,230,235,235,254,227,242,254,238,237,236,254,147,118,179,223,188,229,212,236,241,251,229,227,218,201,206,239,230,227,231,224,232,232,216,203,202,198,226,216,186,214,219,215,208,222,234,220,219,223,212,213,235,221,215,232,199,234,219,212,206,194,233,200,196,219,203,203,203,208,186,218,224,183,194,212,222,235,213,218,119,124,106,99,92,74,75,135,124,123,107,65,69,97,101,128,128,124,129,120,122,95,22,51,160,180,228,226,209,238,199,221,228,214,206,220,219,221,196,201,201,200,201,210,217,220,201,239,235,196,189,207,196,198,194,199,202,196,220,213,200,223,229,239,221,206,195,195,108,13,5,22,3,2,7,16,3,15,3,0,0,1,19,27,2,0,14,20,3,12,9,22,11,206,232,204,235,218,218,217,184,209,227,220,217,208,218,232,232,215,204,205,206,224,207,201,220,224,218,206,214,234,235,238,213,230,220,205,217,248,206,215,232,229,212,219,215,234,211,227,217,211,204,222,212,221,231,213,205,208,234,223,233,219,220,213,202,219,221,227,232,241,213,221,210,208,214,243,239,228,209,196,237,208,231,196,215,240,215,211,221,198,232,225,248,229,230,230,218,199,233,210,227,237,218,216,218,206,231,247,213,222,224,225,227,213,225,243,229,223,215,233,223,229,217,216,225,225,238,228,214,221,223,227,241,231,223,238,241,210,225,234,224,231,238,231,221,244,245,234,235,237,216,241,238,242,238,219,226,226,219,250,241,227,246,219,234,234,249,227,223,242,250,228,230,230,227,244,216,245,218,237,225,230,250,244,235,241,242,235,239,250,218,254,244,232,230,231,239,240,236,246,236,235,245,230,243,238,231,241,239,244,226,240,255,255,225,246,239,223,251,235,218,234,248,240,242,241,226,255,237,247,253,220,236,252,255,239,255,243,247,227,242,255,236,255,234,243,250,223,247,239,242,255,252,255,252,236,237,251,245,240,249,254,251,246,229,245,248,241,255,233,249,242,221,247,230,226,251,253,253,232,250,246,245,239,241,253,252,254,255,255,242,221,220,247,247,236,250,252,238,238,240,238,238,243,224,246,248,235,249,233,253,222,236,247,251,222,243,253,250,231,241,255,228,241,243,252,255,251,237,237,246,204,253,244,219,235,231,179,223,220,207,250,237,224,214,249,206,150,152,134,83,88,221,230,250,241,255,255,225,240,251,227,223,209,221,249,237,215,198,206,241,252,255,229,188,231,249,238,234,255,254,243,251,247,201,236,251,217,249,167,121,74,68,66,11,3,3,40,25,14,36,41,74,71,49,27,44,45,10,23,55,100,134,239,237,254,242,250,251,232,245,221,223,237,255,235,255,249,248,245,252,243,226,255,174,185,227,228,198,230,247,255,230,252,250,233,155,96,158,184,221,253,246,147,113,57,51,21,105,145,167,147,69,31,34,28,68,66,68,120,98,59,68,72,48,75,68,91,49,82,117,102,53,91,16,56,226,240,240,230,244,250,240,252,222,251,255,214,252,246,234,241,249,246,219,253,236,204,241,251,239,184,192,235,231,201,181,187,232,184,189,252,182,212,249,234,213,246,193,93,56,11,3,13,8,12,9,5,28,63,29,46,33,42,34,30,54,28,92,237,225,254,212,244,242,255,226,247,234,235,140,115,213,197,198,239,221,223,206,243,198,223,231,224,230,227,202,233,234,208,216,227,233,233,180,223,217,219,225,224,227,222,231,227,208,197,225,186,204,190,229,215,214,230,204,199,214,192,224,199,226,216,218,241,187,204,220,219,196,205,206,207,198,199,194,204,207,137,161,99,103,79,90,78,77,121,117,142,114,75,61,86,116,144,98,116,131,121,99,77,44,40,106,163,224,220,211,209,211,232,231,212,213,210,226,204,215,222,236,226,207,219,228,226,194,231,211,238,225,199,207,193,208,232,185,206,212,232,207,220,203,222,214,205,214,203,113,10,0,12,42,7,5,17,2,15,27,16,16,10,0,8,4,6,6,13,27,18,26,21,2,237,228,231,188,236,221,242,227,217,202,232,234,217,235,223,183,204,194,218,217,223,196,201,220,199,205,232,196,230,227,200,219,217,225,231,223,218,228,214,228,221,241,214,213,216,221,242,239,213,198,189,227,212,223,226,213,225,232,225,229,195,209,220,234,209,223,210,210,233,214,216,216,211,233,212,201,225,233,204,225,215,207,227,218,196,201,222,231,227,225,236,210,218,249,230,238,195,214,230,225,216,218,220,222,217,242,214,219,230,226,220,236,205,208,216,227,216,226,231,212,232,228,222,222,212,229,214,210,195,224,216,217,228,217,232,219,216,209,242,232,228,218,249,227,243,232,234,208,230,237,230,237,236,243,231,239,236,214,214,246,217,245,219,242,225,224,236,233,240,229,245,242,233,248,230,239,247,252,252,237,233,238,240,232,241,239,246,246,250,252,235,215,249,244,247,237,242,238,253,255,234,240,233,247,248,252,248,233,237,246,235,252,222,254,248,247,245,246,228,228,244,246,236,228,253,254,249,255,242,251,248,233,255,242,227,249,246,245,241,248,253,241,231,251,251,251,253,236,238,245,242,234,239,236,244,250,249,246,238,255,251,252,236,255,235,230,231,246,240,255,253,255,248,235,229,255,230,249,252,251,252,247,251,251,251,247,249,253,254,241,246,238,243,246,231,240,235,248,235,248,254,230,244,239,237,250,228,252,229,247,231,211,255,255,251,240,255,239,234,228,225,240,252,233,233,222,237,252,233,252,239,231,241,207,213,215,241,245,251,240,255,231,231,202,243,192,176,145,146,66,108,235,246,249,255,246,238,243,233,246,219,199,169,184,245,251,232,205,186,236,237,250,217,208,227,243,244,208,226,255,250,238,255,240,246,237,242,209,220,189,168,169,216,220,232,179,87,30,52,40,45,55,92,41,40,75,85,115,81,182,205,186,246,251,241,238,230,255,232,235,249,233,235,242,244,250,248,243,246,245,232,223,248,184,140,217,184,192,243,242,223,245,245,248,179,81,176,216,188,245,253,254,181,133,128,92,99,165,199,200,212,140,51,75,107,108,108,119,126,70,76,73,51,66,49,76,92,38,86,92,49,69,65,7,70,225,252,229,255,248,243,234,252,235,240,237,250,246,246,246,255,222,219,250,254,244,240,228,252,254,179,164,251,225,238,203,162,152,148,210,232,189,160,232,222,199,229,198,102,71,72,74,66,60,46,38,37,53,38,32,33,52,41,50,28,42,8,127,239,243,253,232,229,245,232,255,247,243,254,148,122,221,209,202,224,222,234,225,209,219,224,226,234,209,230,222,217,236,226,216,205,243,229,210,207,232,228,219,229,199,230,204,198,234,214,226,232,231,211,213,205,213,200,212,226,205,186,209,220,192,194,199,211,192,206,199,211,195,211,204,221,205,223,201,216,180,146,130,143,121,86,86,76,85,122,120,128,120,81,98,95,113,115,99,94,122,99,90,57,28,82,211,190,208,219,219,218,219,223,191,204,222,206,198,204,200,215,212,213,228,215,204,224,224,237,204,202,214,201,194,215,200,219,218,194,215,205,202,227,192,186,203,215,198,179,115,10,16,11,13,18,34,4,5,10,10,25,4,11,14,7,24,4,8,10,43,17,15,7,39,205,219,234,211,223,220,210,235,204,193,224,214,228,214,212,244,225,211,221,218,203,226,228,214,230,219,202,229,241,219,197,204,212,228,240,249,241,228,232,227,215,217,224,216,201,219,231,232,213,189,224,212,206,212,230,200,208,216,220,230,210,212,179,204,208,229,211,221,214,202,222,215,208,217,235,212,238,200,218,227,232,220,241,189,228,227,216,219,234,217,228,216,231,223,208,203,226,212,223,243,228,203,206,216,210,251,208,232,207,236,228,236,221,251,205,200,211,230,219,242,228,207,242,217,235,205,225,226,212,242,212,210,243,215,217,235,241,226,232,212,234,245,220,242,230,231,227,231,211,255,223,245,220,218,221,243,227,231,227,234,244,233,235,233,219,238,224,230,235,231,254,238,242,216,242,228,237,245,251,228,228,220,242,236,241,245,239,244,252,233,237,239,228,219,246,233,243,232,225,232,245,249,233,240,242,255,236,232,232,232,249,240,232,225,240,231,224,239,242,235,225,242,243,235,242,246,225,234,239,248,250,250,255,252,245,247,249,249,255,237,241,230,228,255,253,246,251,252,220,249,250,253,235,238,255,250,245,239,252,231,255,254,254,244,241,237,238,235,231,238,224,249,243,244,233,249,253,228,240,232,245,253,216,237,235,232,236,254,245,212,231,218,251,229,227,234,253,231,246,244,239,246,232,253,231,254,254,254,247,229,241,213,223,239,249,247,244,255,253,236,249,252,228,243,243,248,255,232,241,244,231,208,252,215,238,239,255,253,194,218,221,229,216,237,245,174,134,122,118,84,94,223,254,253,243,242,253,226,249,243,253,217,170,209,223,248,201,184,229,244,242,236,228,238,241,255,227,134,172,239,213,238,248,253,253,255,255,248,247,218,254,237,252,255,254,199,111,11,84,108,58,51,74,74,65,197,238,224,144,221,164,146,250,245,229,224,222,246,246,250,254,241,250,251,247,241,242,233,247,242,251,233,252,196,156,183,159,197,238,245,254,226,169,218,173,128,225,212,212,230,255,247,251,194,142,139,170,191,217,202,226,136,96,114,149,159,148,150,151,120,95,71,67,73,13,69,72,65,47,99,87,40,41,19,61,227,231,240,245,253,242,235,250,246,228,244,232,224,234,255,246,244,245,235,240,249,238,200,255,246,179,168,198,225,215,191,80,144,202,196,219,171,180,248,215,130,197,206,242,219,245,244,233,252,246,255,253,234,145,96,99,85,54,31,22,11,34,132,247,212,239,232,251,250,239,254,231,209,222,109,165,240,202,215,233,228,223,231,243,245,199,226,211,199,215,230,205,230,224,246,229,223,211,214,228,215,205,198,195,218,195,198,218,213,201,205,197,229,188,193,209,223,217,197,208,230,193,201,240,209,213,179,183,209,200,228,195,213,187,208,189,192,202,204,229,214,137,121,120,116,89,72,99,80,118,144,126,130,102,121,146,135,130,99,133,104,125,98,79,18,102,229,213,244,214,218,224,225,214,232,187,228,203,224,227,211,199,236,232,215,205,223,207,198,225,209,185,198,211,206,202,214,224,207,196,197,197,201,191,217,195,207,242,215,198,105,9,1,7,8,3,1,12,17,20,12,24,10,8,9,9,7,17,14,15,18,7,36,18,11,213,216,181,233,236,211,210,226,213,240,203,214,217,202,212,216,225,211,212,221,221,237,224,218,214,237,231,228,207,218,202,222,197,223,225,243,221,226,211,222,194,224,224,233,202,231,208,221,217,228,246,240,226,226,214,224,205,198,247,232,211,219,223,235,203,225,207,219,225,222,210,223,208,218,231,214,226,207,227,211,239,208,227,223,213,217,213,182,210,219,205,226,218,232,224,221,235,204,222,231,218,221,214,220,216,217,225,229,207,219,226,215,217,234,230,223,235,228,205,213,234,244,227,222,221,239,234,214,226,218,229,232,206,233,189,229,234,237,225,244,245,217,227,241,234,244,238,248,217,248,215,229,233,237,224,218,243,249,240,213,232,239,247,246,244,250,214,244,211,219,232,239,237,232,240,252,246,247,226,243,255,252,222,231,214,228,228,243,224,238,242,228,229,244,253,251,242,240,247,253,220,226,252,233,251,244,222,239,246,254,228,230,235,252,255,242,249,245,225,221,255,235,241,226,250,244,237,249,253,241,252,250,252,254,246,251,253,252,236,240,250,243,219,239,253,246,249,245,235,252,239,253,253,228,223,253,250,242,225,253,238,249,248,255,248,254,254,242,255,249,224,249,252,243,255,255,208,251,246,255,242,225,248,249,252,238,248,239,239,231,224,250,249,233,252,238,225,240,249,235,235,228,239,235,247,250,246,241,250,255,250,223,251,234,231,242,229,231,234,234,247,234,251,250,252,254,246,248,242,227,232,254,255,242,237,226,173,171,184,232,224,213,196,226,216,175,135,149,163,66,102,242,247,247,240,231,235,211,244,234,245,245,210,228,243,239,202,174,228,248,218,238,223,169,252,239,187,89,72,210,236,242,247,227,233,254,227,246,248,234,226,211,251,243,249,169,86,23,67,121,84,66,64,56,67,234,255,255,178,101,13,73,221,236,255,203,221,238,232,248,246,238,241,232,225,255,246,235,250,247,239,215,206,194,164,186,160,219,246,237,235,128,166,222,136,158,237,203,254,249,252,253,250,175,115,111,122,85,88,79,94,87,81,61,45,56,50,76,50,64,69,87,75,77,36,43,78,59,71,99,93,55,38,2,50,220,239,247,230,252,245,240,240,237,227,248,246,250,244,237,253,227,243,207,236,238,232,217,255,252,209,177,228,241,217,204,143,186,218,236,233,238,217,252,218,150,219,236,215,245,250,252,255,250,252,252,250,255,187,118,132,88,48,19,28,33,25,133,230,204,233,235,242,248,241,248,225,162,176,112,164,227,214,205,204,224,214,227,215,213,243,207,220,207,216,226,199,219,207,223,207,227,228,213,208,196,215,211,214,180,219,194,229,185,205,188,212,212,204,206,212,214,222,207,193,205,224,224,216,198,233,216,198,209,223,225,205,195,222,211,199,209,214,218,224,177,158,152,130,136,94,82,69,93,138,167,157,141,114,106,102,111,102,90,104,154,122,110,42,30,197,208,218,225,237,230,213,212,221,198,223,221,198,215,214,211,235,217,200,204,233,188,201,220,202,200,204,224,249,192,207,198,232,216,235,220,219,189,201,205,204,206,221,207,208,118,6,7,0,4,2,7,19,0,7,6,16,16,24,7,7,1,5,4,21,15,14,30,37,14,207,221,219,228,235,220,240,214,218,206,201,208,217,248,212,211,225,214,226,214,205,206,219,204,210,213,208,223,200,231,214,230,188,207,199,249,217,205,237,221,217,208,234,213,197,203,207,222,223,230,187,228,237,236,223,227,196,198,213,214,233,214,217,214,234,225,244,210,221,225,224,204,225,207,211,196,219,216,218,210,206,206,203,191,211,228,208,221,215,249,220,242,239,235,240,215,250,211,230,227,230,220,231,196,219,222,217,215,213,213,222,234,237,201,212,224,227,217,249,226,220,227,219,216,218,209,224,206,200,222,204,230,243,193,234,226,214,224,233,213,190,206,244,232,232,234,228,247,215,229,230,253,221,222,250,237,240,234,216,227,227,239,219,249,236,251,217,232,236,248,243,239,236,218,252,232,245,250,236,240,252,231,252,220,255,236,251,233,224,249,251,230,253,236,241,245,221,243,247,253,235,253,246,246,253,239,240,254,218,250,240,241,249,223,248,245,236,236,241,228,233,252,242,213,243,249,240,254,224,248,246,248,254,228,247,251,228,255,255,243,253,241,251,238,248,238,252,244,251,249,255,249,241,244,223,252,238,243,254,216,228,251,224,254,243,244,228,229,255,233,247,255,225,240,245,255,235,252,247,244,252,229,245,246,235,234,231,224,252,234,222,255,244,249,248,248,246,243,219,225,237,248,227,239,229,228,226,246,248,244,240,223,232,245,250,244,252,254,248,245,253,235,255,249,255,238,233,248,236,245,231,233,252,235,216,176,209,223,198,231,244,233,213,231,247,205,153,155,148,117,103,252,244,247,243,214,238,246,223,249,255,230,213,193,251,231,200,230,217,246,219,252,186,112,195,200,164,50,43,168,245,239,249,252,227,232,239,241,254,213,223,188,220,250,214,146,99,26,50,123,63,68,73,24,22,216,252,217,66,22,24,91,203,236,235,198,242,228,229,251,232,228,235,239,249,235,255,239,241,247,255,208,229,211,202,155,123,213,218,136,172,190,255,255,160,191,228,232,250,249,243,223,248,140,119,80,36,7,3,0,59,114,97,47,30,40,22,44,70,49,55,28,68,62,56,73,101,65,52,93,111,68,52,21,59,217,246,240,234,242,255,255,243,224,211,215,247,242,226,242,255,254,239,231,248,244,238,222,222,254,211,181,195,244,241,242,218,180,220,206,241,218,161,242,213,212,225,212,246,248,246,252,214,235,247,247,246,254,175,128,132,83,67,13,45,11,27,201,250,241,238,220,249,250,254,239,217,192,204,130,187,217,190,202,220,242,202,218,230,218,218,216,216,228,196,237,219,200,229,225,210,203,220,223,189,228,199,219,211,185,221,234,209,225,196,199,214,217,206,198,185,168,217,218,194,217,217,193,213,207,214,208,206,191,212,217,190,202,207,221,219,223,218,213,205,211,163,131,107,109,97,75,90,110,148,119,122,120,91,73,64,64,68,100,133,139,102,107,69,39,163,209,218,205,222,206,218,215,202,204,231,235,198,216,204,189,199,211,227,205,210,217,239,207,204,228,183,239,226,193,224,232,196,207,214,222,205,191,207,196,216,206,191,220,208,133,5,20,17,9,6,27,14,0,19,8,12,12,7,23,22,9,5,30,18,11,14,15,20,7,233,217,214,211,228,221,224,233,219,216,206,230,235,228,201,204,246,222,225,221,218,199,205,224,223,194,203,223,217,201,192,232,237,225,216,204,220,220,224,221,218,236,233,226,231,191,224,216,207,214,211,229,208,226,211,220,242,206,210,216,206,208,225,219,203,201,212,234,210,231,231,213,220,213,212,209,207,205,202,217,221,235,234,238,219,233,210,206,224,204,209,237,209,214,198,233,226,219,207,224,193,206,229,238,240,235,230,219,220,215,240,225,198,217,230,212,234,215,240,231,225,219,228,228,209,217,206,241,237,233,224,208,229,219,199,218,225,224,220,212,233,216,231,213,227,237,226,242,241,236,229,223,252,243,196,230,223,243,242,240,240,236,217,229,223,230,247,220,240,244,233,224,244,251,244,242,250,245,241,243,230,255,233,227,237,245,236,243,225,248,243,205,243,254,248,236,252,230,231,240,250,240,251,218,241,243,252,255,221,231,219,237,239,244,229,249,255,247,240,239,254,244,236,248,249,255,253,240,229,251,240,237,242,246,255,251,255,249,245,246,245,245,229,255,243,234,246,248,250,252,222,240,253,253,217,244,255,244,238,246,240,227,224,235,231,245,244,236,225,251,252,238,231,217,248,252,247,255,252,236,242,252,235,238,231,245,234,254,238,242,247,237,230,245,233,245,249,226,227,245,241,241,234,240,254,219,250,250,236,245,231,239,254,238,239,250,253,252,242,225,244,245,238,245,247,245,238,221,239,227,199,235,228,190,221,222,244,231,221,235,248,228,213,233,253,157,165,142,126,119,154,233,246,255,249,247,247,241,246,240,245,241,217,218,230,221,238,195,233,235,240,253,121,35,174,196,193,96,15,107,209,230,254,242,242,254,232,218,245,198,209,173,192,233,163,133,93,0,63,78,71,47,52,47,20,200,249,135,34,2,28,147,248,250,239,194,244,255,254,246,231,227,242,234,236,238,230,230,246,247,253,196,146,165,192,205,145,200,200,199,235,222,255,210,81,209,237,178,233,250,253,254,242,233,94,90,7,37,30,22,24,71,34,48,49,44,48,62,92,50,30,47,54,64,30,92,70,68,39,111,106,53,42,55,78,211,252,246,237,255,231,241,250,233,220,243,236,248,235,242,240,242,247,223,228,253,214,233,217,252,243,158,183,211,238,227,215,159,209,223,244,221,176,206,214,224,218,210,253,255,245,242,241,236,238,235,255,243,143,86,109,85,34,52,54,20,33,200,240,225,255,226,226,250,235,221,179,192,171,152,224,206,203,198,205,229,207,251,211,232,196,213,235,217,211,228,216,200,202,237,223,225,217,207,210,210,204,220,196,183,205,198,215,193,215,220,209,203,201,200,201,202,210,192,200,197,224,208,210,203,231,181,201,199,200,213,206,217,212,186,202,206,194,206,220,173,140,133,75,89,89,79,115,126,131,147,97,66,59,64,49,61,66,87,84,107,82,67,43,4,138,214,218,231,230,207,204,211,226,218,229,217,198,223,226,194,216,222,206,216,208,191,198,214,206,200,197,211,197,208,207,224,205,224,210,228,234,177,239,209,223,232,211,187,206,113,2,2,8,13,18,0,6,1,31,18,14,10,6,9,9,17,23,0,12,8,5,0,14,7,226,220,233,210,224,224,211,207,198,209,222,217,233,220,189,217,226,203,224,188,214,214,205,231,223,207,216,212,209,227,203,212,192,220,227,236,210,213,210,240,221,223,231,224,224,212,220,214,201,219,198,236,214,226,214,211,231,225,241,224,216,225,236,222,221,232,227,224,217,222,196,212,226,215,229,212,218,185,212,219,208,215,207,228,212,218,212,209,218,212,228,220,219,223,235,215,212,244,202,227,218,240,237,230,247,202,228,206,203,211,181,234,231,227,224,198,193,230,208,249,222,228,219,228,234,238,208,208,219,233,209,216,242,244,205,236,248,239,212,223,246,227,224,225,249,243,219,231,239,224,229,244,229,220,241,212,244,233,216,243,246,250,248,220,229,239,234,233,236,236,247,251,254,247,238,208,238,230,245,248,231,235,231,246,251,245,234,254,244,232,233,221,247,246,222,243,220,224,244,247,253,253,247,249,236,243,234,229,241,250,239,250,248,250,232,231,240,255,232,252,228,231,250,254,246,236,248,239,245,242,225,237,247,249,237,246,221,214,247,234,227,242,249,248,247,252,246,248,248,243,255,242,240,249,255,233,251,247,253,240,229,234,225,248,230,254,236,242,235,233,237,246,227,227,250,250,248,227,238,250,252,245,245,243,239,246,255,232,228,249,233,252,243,245,238,248,249,252,239,245,248,224,223,233,247,255,253,243,241,229,250,240,237,234,233,237,251,244,237,224,248,244,253,248,255,238,233,252,248,209,189,230,227,215,216,237,236,232,183,217,222,200,206,239,238,167,161,124,97,114,101,143,218,233,235,237,250,233,242,252,245,244,211,224,241,201,173,135,188,235,236,224,122,59,116,193,202,152,74,151,236,246,255,235,230,249,250,208,242,220,188,150,224,238,152,135,79,5,77,79,80,90,76,24,21,198,250,141,39,72,206,233,239,249,241,167,246,255,254,253,231,242,251,240,247,251,230,226,249,237,238,160,76,68,160,138,201,252,243,253,226,230,246,152,102,179,188,211,251,247,240,249,232,230,157,81,50,51,32,26,92,76,86,66,33,38,65,80,80,33,59,41,37,45,46,82,62,91,70,108,95,48,80,16,51,202,227,255,232,237,253,242,235,254,215,238,243,245,249,243,220,237,249,233,228,229,236,233,239,251,252,186,169,212,232,241,210,174,192,242,228,201,209,180,207,207,219,193,229,255,246,223,235,228,233,232,251,249,149,100,90,86,59,35,38,19,50,216,243,230,241,255,254,237,195,226,207,210,178,179,208,224,212,196,218,219,214,225,222,215,208,208,224,184,217,233,234,220,237,227,206,226,211,211,193,215,219,209,219,215,187,204,207,215,190,200,185,201,197,181,187,202,210,213,193,223,206,202,161,217,192,226,213,224,186,209,198,200,224,197,219,199,203,206,220,175,115,114,111,100,75,61,123,159,136,147,145,101,67,64,83,66,90,92,116,95,66,45,35,164,225,209,216,213,222,184,214,184,213,208,240,229,231,229,203,214,223,219,219,227,218,217,231,228,225,227,226,221,197,217,210,219,213,199,217,208,212,208,194,210,208,200,228,220,228,110,21,0,8,0,3,2,11,24,17,25,33,2,7,12,1,12,18,14,26,14,0,12,2,31,211,224,203,218,246,194,222,213,202,225,220,213,218,218,204,207,215,231,178,226,213,231,216,225,207,206,210,215,204,211,236,218,206,206,217,213,198,215,219,209,213,205,232,201,208,204,213,205,206,233,222,230,228,217,200,249,204,192,238,209,213,219,192,231,238,235,235,219,230,208,215,218,216,243,196,195,198,206,230,202,212,246,211,194,230,220,222,220,236,227,223,215,211,229,231,221,224,224,234,227,225,219,236,227,202,216,219,234,190,218,199,202,205,225,207,226,236,224,207,225,229,217,218,214,220,229,228,236,230,223,206,222,219,243,250,223,219,218,218,225,230,235,232,215,227,215,220,222,224,236,239,232,235,236,236,203,223,234,220,231,220,251,227,236,232,239,241,232,251,246,236,240,250,237,234,237,247,223,253,248,233,251,232,254,245,229,240,255,249,245,252,240,225,247,244,249,237,236,239,247,237,243,252,228,242,245,250,246,242,247,253,236,237,255,245,239,253,253,249,241,229,236,250,244,255,231,255,239,237,241,240,240,238,253,216,235,238,237,242,235,252,249,253,250,246,230,241,252,252,255,241,224,249,239,238,246,242,255,252,233,232,237,249,254,237,249,246,246,243,234,249,224,210,252,231,236,230,250,239,199,234,254,249,248,255,232,234,244,254,237,242,255,241,230,241,251,235,255,227,232,249,246,252,236,253,251,252,253,246,246,255,237,245,235,233,237,228,244,224,240,247,252,242,240,254,249,245,241,252,241,216,205,230,246,249,198,219,152,197,232,219,204,209,242,252,171,128,155,124,73,60,67,73,149,248,253,245,251,253,255,233,252,231,201,112,116,119,43,114,203,228,233,176,48,72,154,228,194,150,205,236,241,246,250,245,254,225,243,227,174,179,196,240,247,184,154,67,14,89,84,64,71,70,41,29,223,247,224,72,196,240,230,238,250,231,175,247,246,245,255,238,230,236,225,238,249,242,230,255,239,246,205,36,16,46,99,232,237,240,223,120,198,230,131,177,232,221,249,246,253,242,238,255,228,207,141,114,44,36,89,143,92,75,53,31,41,50,71,54,42,70,47,51,9,65,84,62,85,53,115,105,95,85,50,62,218,240,235,250,255,255,243,230,250,226,227,252,243,241,236,251,255,253,234,213,233,244,226,211,246,246,196,154,197,241,238,218,176,163,239,211,182,231,195,160,212,230,197,176,214,225,232,240,239,249,239,252,233,127,129,72,76,61,19,26,36,84,244,255,232,245,224,224,215,189,219,213,200,187,184,228,224,200,198,205,219,206,235,206,218,213,220,221,221,209,230,221,217,207,207,215,212,214,189,220,217,220,211,217,208,224,220,217,224,208,232,189,191,210,209,181,178,185,204,208,195,206,198,211,220,192,200,227,202,231,189,224,208,192,191,230,221,228,204,215,199,176,127,119,120,99,132,181,168,166,160,161,119,105,103,69,84,80,136,110,82,88,64,110,215,227,233,207,233,196,215,215,215,238,221,214,210,225,224,223,215,225,225,203,185,207,215,198,179,193,218,219,211,224,234,213,209,188,215,189,207,219,211,204,210,232,242,212,198,192,113,5,2,3,30,6,22,21,13,23,12,22,20,4,8,0,15,7,0,8,6,44,25,13,13,230,208,236,210,203,220,220,242,231,228,195,219,210,213,238,209,230,205,203,206,208,210,231,211,208,207,210,222,234,215,206,214,205,214,236,218,240,218,216,215,219,199,213,236,218,240,218,216,213,212,250,210,232,210,213,210,208,249,204,221,234,217,215,223,207,208,206,220,221,215,250,237,206,239,210,208,206,212,212,233,235,252,219,216,208,218,225,228,225,217,201,218,221,220,228,234,207,215,216,241,205,214,227,215,200,226,228,237,213,200,236,218,224,202,200,241,234,224,219,229,214,222,228,215,210,238,230,231,240,201,219,233,245,240,248,240,235,218,214,196,239,229,229,229,222,206,231,253,231,235,226,243,228,211,239,230,240,228,222,247,246,231,226,228,240,242,221,255,253,241,240,238,224,215,233,246,223,228,243,235,249,238,255,244,233,253,238,232,235,218,250,248,244,246,242,241,250,240,232,242,236,219,227,247,237,237,244,239,227,247,252,247,235,227,234,245,219,255,255,237,224,252,223,247,244,226,250,246,224,229,247,254,240,250,250,242,238,246,220,231,255,253,247,246,241,230,237,246,248,236,243,230,246,246,245,236,221,236,239,234,239,246,244,222,253,232,231,255,252,243,243,239,248,239,255,234,255,247,247,255,227,241,254,227,237,231,247,246,245,236,232,249,252,246,243,236,231,232,219,252,229,249,255,247,243,240,247,238,245,249,248,255,236,235,242,243,233,252,236,239,254,242,246,239,220,253,255,223,250,216,212,246,209,248,210,169,244,206,224,225,242,206,212,252,253,198,170,178,115,74,61,43,24,66,193,231,249,245,234,236,249,253,231,202,60,108,104,1,54,213,255,238,165,130,145,197,232,220,173,212,244,241,253,226,217,238,241,236,231,210,180,170,222,244,163,129,53,1,91,118,109,55,52,38,52,217,248,185,164,246,247,243,236,240,226,184,244,245,218,225,227,252,239,233,229,247,234,226,243,242,241,160,58,66,40,82,234,248,231,214,166,244,228,174,232,238,197,250,254,233,251,232,242,241,238,233,84,57,13,54,107,75,48,46,78,34,57,78,38,57,79,66,18,1,24,101,78,57,34,109,115,68,61,47,57,184,250,251,249,239,255,242,230,244,238,212,233,238,255,244,230,251,250,254,214,234,243,247,227,250,246,215,150,197,244,245,212,164,125,166,174,150,242,207,142,188,237,172,193,200,239,247,239,228,246,253,255,224,145,91,82,87,54,54,28,22,114,237,222,220,239,247,232,181,230,219,216,207,141,205,232,220,225,197,198,229,214,216,207,200,248,204,232,195,203,208,215,221,216,217,227,196,227,203,203,200,211,186,202,217,224,221,180,220,211,206,208,203,211,218,187,186,215,215,209,212,219,223,201,215,202,220,218,193,193,201,219,231,207,183,207,200,203,212,238,204,146,161,122,112,84,89,133,105,97,96,96,78,66,44,32,59,61,57,94,60,38,46,90,219,229,238,253,214,211,223,213,227,234,236,215,191,221,207,191,213,203,229,218,218,216,228,227,222,204,206,216,226,217,195,213,233,198,199,194,202,215,187,205,222,215,216,203,214,203,134,2,14,19,21,7,9,5,2,23,4,10,17,17,2,11,17,27,10,18,12,23,23,8,33,219,216,227,209,198,224,247,233,203,226,221,224,232,227,232,226,207,208,226,223,225,215,216,227,218,213,201,222,216,218,217,222,196,219,242,231,224,222,223,234,227,236,233,226,207,225,230,210,232,245,208,225,216,211,239,216,219,226,215,191,234,216,196,227,213,220,227,217,221,224,226,219,214,231,217,220,238,221,200,221,234,223,233,229,209,231,221,225,206,205,203,227,219,220,233,217,214,247,213,226,232,225,226,224,240,223,240,227,223,228,232,234,222,216,231,234,228,211,230,195,208,241,175,238,221,227,241,238,232,223,241,209,230,229,205,239,237,233,225,235,209,220,242,229,212,226,226,240,255,222,219,235,230,250,237,247,241,227,240,246,239,233,222,225,241,244,237,245,247,228,223,237,252,239,226,234,251,242,247,241,224,253,239,245,227,253,226,251,247,252,251,239,251,228,245,223,255,247,239,244,252,233,230,235,239,226,244,222,242,225,238,247,226,229,231,254,251,255,232,230,229,249,246,247,244,250,252,236,245,220,251,223,244,235,230,237,244,247,237,255,252,232,233,243,236,242,255,238,233,239,234,237,251,254,233,248,238,244,242,237,245,232,216,254,241,247,224,243,219,253,254,234,229,253,253,244,234,228,242,246,232,255,252,227,247,236,229,234,231,248,243,249,253,245,240,239,250,226,240,249,246,227,238,247,239,246,234,243,245,237,253,246,250,248,242,238,243,237,229,254,253,244,254,230,233,242,225,238,240,240,250,234,200,227,209,222,249,200,235,246,229,209,195,241,245,177,142,131,155,69,70,109,54,128,209,240,208,252,250,244,253,224,221,185,43,142,181,12,47,195,248,255,232,194,205,203,251,233,191,215,231,243,237,240,224,232,247,238,237,212,200,206,242,244,135,147,70,0,81,79,73,33,39,19,47,213,253,187,147,243,253,226,244,214,200,203,244,249,251,255,241,254,249,242,236,241,219,238,236,229,236,213,174,228,133,52,117,176,186,188,215,234,204,169,241,234,229,249,246,249,241,249,249,244,239,190,129,77,54,12,12,6,49,13,25,45,67,71,39,58,78,71,47,11,64,111,73,100,61,120,110,58,64,14,19,179,236,251,242,226,245,249,234,246,242,244,227,232,237,253,228,250,243,223,231,237,222,228,208,204,239,237,150,200,216,237,221,232,86,92,168,180,251,249,161,200,221,155,157,184,244,237,252,237,240,212,230,241,119,131,72,60,26,25,8,2,139,245,250,251,252,228,197,219,252,235,228,159,135,229,208,244,216,200,231,231,215,230,205,219,194,239,203,233,216,211,233,208,223,199,194,211,214,220,226,209,204,208,196,225,237,221,210,195,200,215,236,184,197,198,199,199,215,225,209,203,214,206,193,215,198,196,219,217,213,194,189,195,198,175,217,221,221,219,234,172,130,98,76,67,66,59,31,28,42,52,68,98,57,73,25,37,20,27,41,77,19,38,163,217,229,218,219,203,210,213,216,202,227,227,216,231,217,208,224,209,205,222,219,208,221,211,222,197,231,223,210,232,217,215,209,220,220,206,206,233,241,207,218,213,201,194,216,217,237,88,12,12,16,0,16,13,15,11,2,26,9,19,14,14,9,22,4,6,9,32,33,20,7,0,223,210,204,209,211,222,221,207,232,217,226,212,220,233,223,242,222,227,227,224,206,216,232,206,219,214,220,233,213,222,241,202,184,189,231,196,235,231,214,211,213,210,209,238,219,220,226,196,240,214,213,209,208,219,229,208,208,225,205,215,224,213,207,209,235,207,222,213,202,232,198,204,238,231,213,242,192,225,226,228,238,216,222,222,243,224,244,235,214,194,227,219,223,234,212,220,223,218,231,206,205,225,221,235,210,224,236,229,219,209,232,213,220,244,237,246,202,217,202,230,226,220,216,234,226,225,244,238,237,235,220,222,226,234,231,239,245,224,218,240,214,225,227,228,237,216,249,234,250,229,223,230,227,242,247,250,239,220,238,224,209,243,238,220,244,233,240,227,224,251,224,243,252,243,223,218,224,242,249,248,235,252,238,247,229,228,251,225,235,235,220,224,222,241,248,246,251,243,243,250,252,230,250,250,223,251,243,242,244,255,254,222,243,233,251,251,245,255,252,241,253,248,225,246,252,250,245,224,224,224,248,238,238,242,232,243,250,253,226,247,247,235,248,210,205,239,219,237,241,253,255,255,242,246,247,243,240,254,235,244,232,243,243,238,255,246,255,236,217,238,247,240,238,227,232,241,235,235,235,230,244,227,246,235,251,241,235,239,249,246,228,223,235,245,233,254,247,230,233,246,252,248,238,246,252,254,246,237,241,234,253,234,252,239,230,246,216,217,233,253,255,246,214,243,230,238,235,255,255,213,241,236,197,221,243,243,236,194,207,224,225,214,205,254,237,162,153,125,148,39,148,224,230,228,224,234,241,247,236,245,242,231,212,163,71,191,175,57,117,225,254,248,231,217,209,190,245,197,203,184,224,239,246,252,243,247,245,240,249,202,202,191,228,224,142,135,56,0,88,85,54,55,41,40,59,221,247,124,151,247,220,218,214,238,206,219,239,236,242,240,227,235,228,213,240,245,254,249,243,253,242,246,247,246,180,67,75,67,212,235,234,244,142,165,245,228,224,238,240,244,246,245,243,231,220,222,236,209,140,140,109,46,44,7,24,59,58,46,64,117,79,47,18,26,42,109,73,74,30,92,98,76,76,38,55,161,225,245,250,255,235,238,250,243,253,229,225,243,221,232,242,243,244,239,223,235,255,244,240,206,253,247,182,160,178,247,230,214,150,152,207,215,247,236,199,223,220,191,176,218,213,235,243,213,230,236,252,193,97,114,85,84,31,40,14,32,177,232,253,245,236,208,205,234,246,196,176,127,157,193,242,227,203,194,213,221,232,210,231,220,228,191,220,232,220,209,217,193,193,226,209,194,210,210,195,211,183,233,223,220,224,219,240,238,204,188,216,200,216,228,212,199,182,193,190,211,203,216,219,187,235,214,219,218,203,213,204,208,192,212,217,186,207,215,200,127,65,53,34,63,54,19,54,24,36,40,59,78,54,57,44,59,49,48,63,55,35,78,204,216,241,232,209,203,223,214,213,221,210,210,205,238,217,201,237,197,209,194,214,205,209,222,207,231,234,217,202,227,218,200,210,232,200,192,209,215,193,233,231,194,208,211,206,194,221,116,0,14,6,8,16,3,12,0,4,8,16,25,2,6,10,26,3,7,13,18,3,3,0,13,213,239,203,207,207,206,191,223,214,221,175,200,226,222,247,214,178,230,222,218,232,229,206,213,211,241,230,217,223,228,207,224,196,222,225,238,214,212,215,232,189,233,194,220,224,201,206,212,222,217,208,225,223,237,230,209,211,232,235,228,207,243,243,228,237,224,214,219,223,195,237,221,234,202,204,232,188,215,231,213,200,224,246,222,234,217,226,224,230,232,209,216,224,215,215,227,214,247,207,222,208,219,225,228,215,241,220,224,238,220,229,214,203,229,215,223,236,203,235,241,246,233,196,214,202,207,236,233,241,217,226,193,241,198,237,221,239,240,214,225,237,223,228,242,228,220,237,219,226,215,228,245,227,229,234,234,225,245,247,229,216,216,249,241,253,239,241,248,229,245,237,238,248,236,229,240,247,230,226,241,218,227,233,235,253,247,229,232,247,254,253,249,239,246,241,239,254,253,250,226,239,255,247,238,226,248,246,233,245,242,242,252,232,232,253,224,244,254,216,253,234,225,223,235,248,250,244,235,233,241,253,251,243,249,245,243,244,226,243,224,227,247,241,233,247,239,252,239,251,236,239,235,249,240,216,243,253,232,233,247,239,255,241,220,252,241,252,250,237,242,249,242,248,255,245,253,237,239,231,243,250,254,241,244,240,237,234,244,234,235,238,234,250,246,253,247,241,218,239,240,235,241,227,246,232,224,230,236,236,247,253,242,250,225,248,255,222,243,245,241,246,247,234,229,236,249,247,244,252,227,253,243,217,241,215,179,204,191,225,244,233,208,216,247,225,189,145,148,115,50,136,237,254,247,205,199,216,236,219,244,230,245,244,213,85,134,210,160,218,250,221,252,169,220,187,189,207,216,219,207,245,253,236,240,248,251,237,227,251,208,176,189,248,248,120,145,28,9,90,86,60,54,47,39,23,245,250,116,176,250,236,212,200,203,177,195,228,248,231,242,228,249,240,221,218,249,238,233,231,231,255,249,246,228,171,144,83,68,221,225,253,231,126,175,251,201,205,237,242,209,251,251,234,255,212,225,254,255,246,244,240,210,85,6,44,164,82,38,37,67,102,62,71,49,56,89,82,57,63,100,99,31,49,22,47,176,245,255,241,252,239,244,236,234,229,243,226,216,247,251,235,249,247,252,227,245,252,228,216,218,244,245,210,174,213,243,224,229,156,145,237,248,217,253,212,226,194,201,218,222,211,231,234,224,241,234,254,231,114,111,89,78,28,28,3,79,243,245,254,248,218,187,217,238,245,152,74,79,180,222,218,241,186,203,221,189,209,231,215,193,206,208,232,213,211,217,196,203,191,223,233,215,224,218,220,219,215,192,212,213,186,216,198,202,206,217,218,213,214,207,209,191,226,184,201,204,199,216,206,216,209,212,172,194,217,191,203,232,179,220,210,210,211,219,180,147,109,87,66,53,81,51,67,58,46,62,52,45,40,48,41,30,31,52,72,51,22,111,211,222,215,211,229,228,237,226,212,205,220,220,217,219,196,226,234,191,227,219,206,207,205,212,209,191,243,223,211,197,226,220,199,222,184,214,223,206,217,202,218,208,207,225,200,240,206,87,1,0,23,10,2,0,5,13,15,40,6,24,14,7,14,16,10,8,2,11,7,7,3,7,242,212,235,212,229,203,210,236,214,222,227,196,219,169,222,212,216,234,217,234,236,221,219,228,215,227,217,223,235,229,226,224,211,229,229,236,215,225,210,208,210,213,233,226,225,234,193,225,219,212,219,234,200,222,222,210,232,199,213,212,214,205,234,226,210,223,235,238,223,238,238,224,214,230,227,228,251,216,206,215,210,227,208,231,191,211,236,220,212,217,211,242,222,219,209,214,222,253,239,225,231,209,218,244,223,224,223,241,248,219,224,227,224,226,219,229,220,239,236,235,228,204,227,235,225,242,229,200,247,217,227,219,225,235,217,245,237,224,239,225,209,228,239,219,241,218,224,235,213,239,218,241,228,225,225,223,233,244,244,230,240,240,235,229,245,236,243,247,210,243,229,230,231,246,247,235,232,246,235,227,236,247,240,229,227,236,240,244,247,249,252,239,238,197,244,230,247,255,254,236,242,253,255,225,252,248,231,254,240,242,233,252,232,220,226,229,224,238,253,252,241,243,236,226,254,236,242,238,240,216,245,252,230,244,240,232,244,252,248,246,242,238,240,245,232,240,240,244,234,254,245,248,252,247,254,228,239,220,230,249,255,238,244,242,242,237,246,236,253,232,237,235,228,242,233,248,236,237,255,249,252,248,225,247,251,252,242,247,215,231,245,231,239,253,216,233,246,237,252,223,241,251,244,235,226,250,240,255,242,225,240,239,234,249,239,253,244,246,246,250,234,236,234,247,248,255,234,218,233,225,248,232,177,203,206,239,242,231,247,254,227,210,220,253,247,163,160,145,116,55,108,221,246,250,206,173,242,248,219,219,222,238,216,195,145,184,183,215,251,254,251,254,210,217,213,206,211,234,248,240,232,252,252,253,242,237,221,216,239,197,186,190,236,202,126,151,57,9,56,82,84,38,44,36,47,230,251,117,178,226,252,227,204,213,211,205,246,244,231,251,238,236,251,248,240,232,218,235,228,229,243,235,248,213,140,153,82,50,213,237,213,185,64,221,230,211,251,235,254,253,246,244,234,236,218,232,251,253,255,245,244,213,131,32,85,175,68,38,33,72,97,51,25,37,63,93,66,68,44,113,130,50,65,35,66,213,236,255,236,246,246,245,236,227,239,224,224,235,239,230,247,250,244,209,203,241,249,248,217,222,234,239,211,153,193,243,252,203,184,171,174,240,209,222,195,165,164,201,245,200,204,231,230,233,245,237,252,205,114,110,85,83,50,19,20,152,229,246,244,243,201,209,228,246,207,133,44,60,216,232,237,214,193,229,226,204,226,210,217,198,199,230,216,187,212,210,224,206,226,191,203,201,221,193,203,233,196,203,232,209,199,196,207,190,243,197,228,210,209,219,214,224,246,228,195,220,190,222,179,212,207,214,199,192,203,209,199,201,198,231,207,234,197,225,200,171,167,183,119,125,69,63,99,48,101,110,77,64,83,65,46,34,61,60,50,32,140,218,242,206,217,230,213,242,212,236,218,222,232,215,220,238,233,228,226,216,212,222,223,220,213,219,211,206,212,218,189,210,189,242,225,201,211,202,200,194,208,201,198,218,218,210,216,190,232,105,7,0,6,18,18,0,14,16,16,14,26,2,36,9,1,36,16,4,12,2,6,8,10,17,201,188,183,205,216,215,227,244,226,218,229,233,222,213,240,222,210,215,208,221,217,211,230,244,213,246,232,234,225,217,206,226,222,227,226,230,190,213,220,201,220,204,243,231,197,208,203,217,213,223,195,229,238,236,205,221,238,210,229,227,237,217,232,232,237,222,214,227,228,237,218,233,226,226,193,238,226,250,244,208,232,241,208,233,245,218,229,223,210,223,219,238,234,240,220,238,214,223,213,214,213,236,204,234,212,231,225,230,246,236,241,240,215,237,219,238,241,233,221,231,214,214,231,217,206,234,239,221,215,217,211,239,243,237,235,241,202,218,222,219,236,228,237,236,226,221,238,228,241,239,241,248,249,217,220,226,214,236,210,241,249,237,239,230,223,225,250,237,242,229,222,235,252,251,232,244,219,247,239,242,237,224,209,247,242,239,239,254,247,252,254,249,249,247,246,246,239,255,252,245,238,243,241,255,240,240,237,235,251,237,238,240,245,243,249,243,252,246,237,251,241,254,255,235,218,246,241,249,252,229,223,239,243,223,228,232,220,254,247,241,252,244,249,230,244,231,238,234,238,220,229,236,231,231,226,236,236,236,243,247,252,242,245,238,255,255,255,243,248,238,237,251,246,249,206,249,245,243,239,253,241,212,224,252,253,235,233,240,251,226,248,255,234,249,236,243,229,254,245,245,232,237,237,235,239,233,245,228,243,244,243,238,216,224,213,238,249,228,247,218,231,246,246,253,249,235,240,250,239,209,229,176,183,233,243,220,254,209,220,241,210,200,210,254,241,187,155,137,137,67,108,238,252,250,255,199,169,209,246,245,248,222,233,216,167,199,176,215,251,247,231,224,205,217,223,190,249,173,232,239,203,252,222,238,233,249,235,229,206,200,194,186,242,171,122,147,56,9,59,76,78,31,69,36,38,238,178,93,151,197,249,227,221,231,236,211,224,254,242,250,241,247,213,230,223,244,242,241,247,248,227,242,236,177,165,196,109,32,146,209,215,133,142,243,207,190,230,237,247,250,234,230,241,215,196,253,230,254,230,245,252,164,133,41,81,174,81,30,35,76,80,23,45,32,72,92,80,60,31,84,127,65,38,18,29,212,237,248,253,250,252,252,230,250,246,222,231,229,253,251,239,247,218,230,236,233,226,253,238,207,242,236,226,180,190,219,225,226,204,173,140,228,226,236,210,178,131,185,217,230,221,214,236,220,244,235,241,206,107,122,95,34,19,15,76,233,250,243,242,228,214,209,251,242,173,93,31,71,211,208,218,234,193,204,214,216,212,214,199,241,231,189,204,211,226,199,225,223,179,211,210,228,205,199,239,203,222,237,205,204,205,212,208,214,224,218,183,214,208,218,198,223,209,182,189,190,207,198,198,231,220,184,206,202,208,218,204,209,214,186,203,229,208,202,185,211,222,195,201,146,109,58,47,31,94,141,121,94,81,41,44,36,31,24,38,150,240,210,243,232,216,233,225,212,209,214,199,213,209,220,219,225,200,225,196,208,220,220,203,180,219,213,223,217,192,217,246,218,205,209,216,236,214,224,194,227,229,231,215,200,200,215,206,205,211,103,7,7,9,23,21,6,13,9,19,18,8,23,11,0,0,8,15,5,1,15,6,10,31,20,226,231,205,221,210,238,217,202,224,227,233,216,216,216,209,209,213,211,226,229,236,212,232,196,218,212,236,221,208,229,217,224,218,246,232,227,230,218,228,233,207,240,224,217,199,219,231,230,227,224,211,226,215,226,213,221,218,219,206,188,226,236,237,223,217,235,221,238,219,231,240,236,227,236,217,233,217,202,225,233,217,238,235,224,210,231,222,206,223,208,207,239,234,215,226,231,229,233,227,235,215,232,238,233,226,219,233,211,218,219,229,214,210,227,208,230,212,235,226,225,228,202,214,234,241,229,217,252,225,227,232,220,226,224,248,228,233,238,236,217,230,237,230,219,245,231,233,226,223,232,250,220,206,220,240,248,228,240,224,237,245,254,237,247,233,231,244,239,234,242,244,250,243,251,235,217,239,253,235,229,229,243,222,228,255,212,229,226,218,240,223,239,251,230,219,253,232,249,244,238,253,250,250,234,248,242,246,226,236,233,236,250,248,241,237,239,243,236,251,242,230,255,247,239,248,221,233,222,228,230,242,233,229,234,241,250,237,255,235,237,255,255,236,245,242,241,232,244,253,252,245,229,249,242,232,246,248,236,233,239,249,227,255,217,251,246,255,231,235,246,232,249,236,245,245,238,247,243,254,244,223,233,240,254,224,246,237,248,246,250,252,233,239,228,253,230,232,241,229,235,244,232,253,232,253,247,243,244,248,232,232,252,223,247,242,249,245,233,252,222,240,241,233,227,255,243,247,249,214,205,215,198,234,247,239,229,219,213,216,229,221,193,237,246,250,165,128,150,121,45,127,241,246,252,249,206,188,198,225,224,254,237,213,171,198,210,235,242,248,232,247,237,153,204,203,201,166,99,159,197,223,224,237,249,248,249,234,227,232,216,176,192,255,147,138,157,34,33,55,48,56,40,30,71,30,47,97,140,198,192,205,193,187,213,223,184,229,248,228,253,247,237,228,215,237,241,246,240,250,229,246,233,221,169,146,214,144,50,103,182,240,171,223,206,206,239,222,250,241,246,236,228,235,233,225,241,248,245,252,238,237,162,115,42,106,166,27,30,20,62,65,27,35,21,59,80,81,111,40,98,119,70,61,27,26,120,241,245,238,221,248,242,246,249,241,255,239,206,251,233,248,241,246,234,228,239,238,250,250,238,231,252,209,175,185,211,238,245,218,149,158,205,225,188,202,215,85,150,226,190,179,163,220,221,228,230,250,185,106,100,68,45,44,111,185,235,244,226,246,211,240,215,251,213,145,80,9,106,224,205,214,219,189,223,214,212,209,208,210,205,203,227,223,212,210,230,205,240,218,217,181,224,228,211,202,233,240,182,218,208,188,202,213,211,203,200,228,218,202,209,193,202,222,178,217,204,201,207,192,196,220,211,211,201,203,188,191,176,198,215,219,205,212,210,166,191,209,220,210,136,109,56,31,19,16,67,78,41,64,55,27,45,10,18,113,229,245,228,223,228,213,205,199,187,243,216,212,210,225,207,206,238,225,226,200,208,226,210,235,217,217,232,218,245,222,211,195,208,165,213,198,205,221,199,219,226,220,209,194,207,207,234,228,229,204,105,12,14,18,18,2,16,12,6,4,0,33,3,19,12,10,22,11,13,14,22,1,1,8,1,213,229,237,228,241,234,226,226,211,245,230,223,218,218,217,224,206,242,205,215,243,212,219,231,249,217,224,217,222,214,204,184,227,200,232,220,216,207,212,242,242,227,220,233,222,205,218,242,214,192,223,229,207,217,205,211,210,227,227,241,201,216,225,223,222,221,203,220,232,237,223,243,215,229,234,249,219,235,228,231,238,252,239,225,244,223,222,213,244,240,218,252,237,203,215,210,233,231,242,232,234,233,241,230,208,235,239,222,243,210,246,235,229,230,229,199,233,237,221,226,220,212,237,230,224,232,230,213,227,246,242,229,235,237,231,231,230,229,237,229,209,228,241,221,234,221,239,229,220,218,253,218,219,236,243,233,241,224,237,252,239,234,222,255,228,235,235,246,228,241,233,247,223,246,232,221,249,243,220,242,218,213,243,251,250,247,251,231,239,247,246,252,249,234,255,244,249,237,237,232,237,244,228,219,252,241,223,230,230,248,235,230,249,248,239,253,238,247,236,255,234,227,236,251,248,236,253,241,245,235,238,247,252,249,243,233,237,249,254,226,253,245,249,211,242,242,231,251,229,247,241,251,253,239,249,252,254,227,249,250,246,245,242,250,235,254,235,246,253,235,234,216,242,236,250,244,239,234,252,244,246,250,236,227,248,242,222,253,229,240,222,251,238,243,254,251,239,236,251,228,225,253,224,246,246,253,252,227,230,224,238,243,221,238,227,242,227,255,253,255,252,229,225,239,252,242,237,246,250,215,213,204,236,216,234,223,219,211,238,247,211,199,210,242,244,153,137,127,127,36,109,241,248,240,252,228,229,187,175,249,229,249,230,176,217,187,224,232,247,243,234,159,101,236,210,151,127,14,83,181,238,226,234,235,227,254,245,244,230,203,156,199,252,180,131,128,67,50,66,44,71,51,76,61,84,25,107,191,211,236,237,192,182,234,237,213,237,252,244,253,229,236,250,219,240,233,251,241,253,233,252,200,200,117,149,219,118,87,230,245,228,167,225,255,183,230,236,253,238,226,255,255,255,229,209,249,240,247,248,246,240,158,104,23,64,168,26,46,39,68,51,35,38,12,70,72,64,88,46,102,109,75,53,47,12,62,214,249,251,248,249,237,252,255,242,244,224,223,240,253,223,245,233,250,243,220,238,253,249,231,224,242,223,197,174,228,235,231,194,171,121,156,181,164,187,232,157,162,216,241,156,138,213,221,254,247,251,159,94,67,77,23,79,218,226,252,251,212,210,227,226,253,245,168,139,98,9,154,237,211,243,204,205,214,216,207,216,218,205,223,198,201,227,208,207,224,185,219,219,189,201,227,195,213,213,220,232,209,228,220,224,223,222,208,202,211,204,197,194,228,210,218,185,227,204,196,203,207,224,182,196,210,203,208,209,211,193,217,195,214,225,206,212,182,171,216,206,213,220,185,156,146,133,79,59,50,48,26,39,12,31,88,152,215,235,223,229,231,221,215,236,186,219,204,199,199,221,208,209,227,195,225,205,229,222,238,223,203,233,219,201,223,213,231,228,184,210,200,232,208,210,211,196,207,239,224,217,201,190,216,209,220,220,210,201,106,0,17,6,7,7,15,3,1,29,0,23,19,10,0,19,2,24,15,11,4,0,11,9,7,210,231,216,236,242,225,222,189,219,221,226,236,212,212,203,215,199,230,238,223,219,237,209,211,227,242,211,215,223,220,206,237,233,211,208,233,239,220,232,206,221,212,226,224,198,213,227,234,199,217,223,207,217,219,207,197,228,190,194,227,230,209,229,203,219,221,236,241,243,224,244,240,229,219,219,224,223,233,246,233,244,220,205,224,246,233,221,233,216,247,233,231,235,233,214,231,226,241,216,231,218,217,236,215,225,232,234,237,250,234,201,233,220,237,232,222,218,221,247,240,222,216,228,241,242,220,206,231,225,242,222,217,221,240,221,217,225,234,237,238,242,243,252,214,237,221,231,209,237,226,233,238,201,243,240,227,230,237,226,237,237,208,235,215,239,222,230,219,237,228,242,252,224,239,222,238,234,244,235,207,247,214,244,247,232,226,255,253,218,245,248,248,246,235,246,241,223,236,246,230,228,244,255,255,255,245,252,233,244,251,235,240,231,245,243,243,249,231,252,244,246,247,236,254,241,241,235,232,225,241,238,245,239,247,234,216,211,251,244,255,251,249,226,246,245,255,255,235,255,243,245,241,243,255,245,252,253,247,250,221,253,225,234,236,233,236,253,246,233,246,231,241,254,242,242,243,248,236,255,245,224,232,237,245,241,245,249,221,249,241,235,233,250,244,234,255,211,248,229,243,233,248,247,253,231,231,246,226,242,234,227,231,241,224,249,252,224,244,239,244,242,255,247,240,250,245,228,226,218,230,234,183,220,193,208,227,235,248,251,207,224,204,222,230,246,170,148,133,137,41,103,237,255,247,226,234,243,201,179,201,232,243,208,196,249,186,215,216,224,230,182,120,55,141,167,94,51,5,112,187,252,208,234,236,252,243,249,239,245,242,138,208,255,211,193,139,85,78,74,74,57,33,31,75,34,54,209,251,226,253,230,218,205,228,247,239,252,254,243,249,253,245,240,253,217,238,244,230,234,239,228,205,139,62,146,105,112,173,242,253,195,186,243,206,203,217,241,236,226,246,249,244,250,209,236,252,241,252,245,241,222,172,96,31,119,181,18,48,26,86,51,17,61,15,66,97,80,89,64,87,105,66,23,60,25,105,212,234,252,246,245,229,245,252,244,247,246,225,223,232,240,237,239,236,250,237,214,235,251,241,196,247,247,220,177,208,243,240,238,162,105,87,135,185,241,218,187,186,227,207,190,165,211,226,234,251,216,127,83,75,26,86,193,202,232,251,241,250,242,236,246,229,209,127,157,87,54,169,222,227,233,205,211,236,230,218,211,201,215,214,215,181,230,249,221,204,205,198,217,212,214,208,184,223,218,219,211,199,211,212,207,202,224,219,233,206,218,193,230,214,217,213,222,215,214,202,220,228,216,220,211,217,222,208,202,195,220,209,213,215,211,234,210,205,204,199,221,217,239,249,249,236,227,227,195,137,90,29,90,213,239,234,239,235,248,239,225,237,217,212,200,197,229,202,205,216,221,223,222,208,203,213,225,207,232,214,222,205,224,220,235,208,211,225,232,194,220,212,204,221,212,204,221,206,217,232,207,205,227,184,215,226,228,222,206,118,2,0,0,2,4,17,9,16,22,12,41,28,2,6,1,18,5,4,0,25,4,19,2,17,237,192,203,223,239,231,228,219,217,218,220,211,206,239,215,198,229,217,206,195,212,233,236,225,243,212,224,214,231,217,206,226,240,225,222,220,241,243,232,214,236,214,237,204,222,234,214,215,246,235,236,216,216,236,207,244,222,219,233,232,203,237,224,245,239,213,225,225,228,233,237,216,227,227,211,237,239,238,229,239,244,241,238,241,243,214,229,212,224,229,230,231,235,221,239,186,230,235,227,199,247,211,234,238,227,228,217,227,237,226,236,235,215,204,225,241,232,231,220,246,210,235,231,223,206,243,244,248,231,237,245,229,247,247,238,214,204,237,213,240,240,239,242,232,235,232,234,224,229,236,252,233,218,233,231,223,232,237,204,228,254,240,249,243,245,224,225,248,244,227,228,254,248,232,230,231,228,228,240,244,240,221,216,239,245,238,250,231,232,246,230,245,222,245,228,236,241,204,233,251,223,255,254,235,250,255,255,246,242,243,247,240,243,253,243,240,237,226,249,246,255,230,219,241,241,249,254,244,255,245,233,255,234,213,250,253,255,234,254,250,247,243,244,244,254,241,229,246,242,244,247,244,248,247,237,255,237,234,232,241,248,222,227,236,221,237,242,224,235,250,244,247,233,249,250,237,255,242,235,251,242,248,237,247,217,228,243,244,248,250,237,252,249,245,237,241,225,245,254,255,230,254,241,247,245,249,246,240,213,229,225,247,230,226,243,218,247,249,229,239,254,236,235,252,230,250,239,252,191,240,193,178,222,222,245,246,248,204,217,236,202,233,214,222,245,185,175,147,144,24,88,247,253,241,243,232,235,235,187,235,228,250,217,176,217,157,189,205,221,224,132,13,16,127,122,74,49,83,227,249,250,242,239,247,250,239,251,255,250,229,227,238,247,243,182,138,63,35,73,76,68,45,29,64,41,92,218,248,251,255,236,198,255,251,240,239,241,247,243,254,254,251,231,234,232,250,234,247,236,241,252,220,136,65,85,30,59,168,239,238,105,194,246,224,201,239,246,244,253,239,250,249,247,224,228,248,235,255,255,238,230,159,89,9,81,136,19,16,97,102,46,29,21,30,38,83,106,52,53,81,117,117,63,50,7,102,248,234,245,255,230,230,237,242,240,241,249,222,211,225,250,255,238,230,237,235,223,235,249,243,210,238,252,233,157,162,231,240,222,205,138,140,178,212,227,232,171,138,188,215,216,207,181,219,240,234,147,121,143,55,107,199,251,227,250,253,224,238,239,237,252,255,153,89,112,103,116,191,201,228,230,204,201,207,202,209,220,222,228,230,203,230,208,227,224,243,201,207,206,207,201,206,210,210,209,196,222,237,216,201,210,193,218,209,200,210,223,236,215,214,196,180,223,208,205,224,207,198,204,195,199,192,207,203,211,195,198,199,191,196,205,211,187,168,204,207,226,184,231,224,251,254,248,242,238,173,111,92,193,240,245,227,253,239,224,231,212,217,200,203,191,203,218,228,203,203,224,241,234,223,211,217,231,216,221,217,242,201,228,226,206,206,233,225,217,216,240,213,228,210,242,217,218,184,228,202,209,201,220,205,228,215,211,233,212,107,7,4,11,21,2,14,14,0,23,7,3,22,5,2,0,17,15,14,11,33,28,4,9,19,220,233,212,223,227,206,219,218,203,222,240,229,204,185,226,220,226,223,222,232,224,216,236,195,209,206,241,237,236,241,207,195,195,237,222,202,206,225,205,225,238,225,216,211,243,242,243,236,242,233,193,235,226,231,214,236,222,237,213,236,230,241,223,231,211,234,225,234,224,216,225,236,235,243,241,241,217,239,232,217,221,249,234,228,237,228,216,214,246,235,222,217,207,225,225,252,236,241,233,220,241,243,222,244,238,233,209,231,238,233,233,219,235,214,235,210,240,229,232,250,243,228,224,236,218,239,194,226,227,227,208,218,210,243,228,230,252,227,231,239,219,250,234,231,228,239,234,238,228,219,212,251,237,209,228,218,228,236,241,243,253,232,235,252,220,238,225,243,235,235,233,241,246,249,229,246,198,242,232,239,242,249,254,248,234,239,235,233,237,233,247,253,247,246,233,247,254,233,250,241,255,224,239,243,236,247,242,242,250,255,248,241,234,224,232,229,246,238,251,241,249,232,246,236,233,240,254,250,247,253,255,253,244,240,238,235,252,224,247,243,233,239,244,255,221,238,246,242,252,247,237,250,227,246,251,245,252,242,251,243,230,242,240,252,225,237,228,253,246,224,230,245,254,251,223,249,240,231,228,233,242,241,253,246,224,237,240,242,254,229,246,231,237,255,230,210,235,227,222,233,249,246,243,229,237,240,243,240,255,235,253,239,245,232,254,227,239,239,231,216,234,255,227,229,223,237,229,246,214,252,228,230,240,249,241,237,194,212,244,242,217,220,237,244,219,182,173,162,118,52,116,239,227,232,252,226,252,242,199,213,206,243,190,113,195,90,113,208,207,197,75,19,124,224,217,203,183,252,251,254,246,245,255,253,255,249,240,218,179,180,132,121,195,130,136,73,14,77,38,35,32,37,38,54,48,48,173,158,144,200,197,172,221,232,243,228,252,245,249,232,245,240,253,255,253,232,246,244,244,244,225,237,155,97,55,8,62,186,246,171,61,176,235,212,212,242,251,247,248,253,248,255,247,173,247,245,244,249,232,236,236,171,67,16,21,57,41,28,57,121,39,28,12,25,62,65,74,46,49,90,110,75,35,66,27,103,232,233,245,239,230,226,231,255,243,236,210,239,218,237,252,229,250,222,254,231,227,201,242,233,222,214,240,217,172,167,201,231,238,205,206,172,186,238,220,205,177,171,151,204,212,232,174,197,236,177,156,121,103,86,173,243,237,255,253,217,232,241,225,226,255,206,112,80,97,91,104,204,204,241,212,202,203,216,206,238,204,225,215,226,215,207,203,214,213,214,213,198,218,198,240,192,237,208,227,211,232,215,205,218,220,198,201,191,205,203,204,205,211,193,193,209,226,219,218,209,186,229,229,219,213,221,220,222,207,211,218,193,192,217,200,220,176,184,215,212,234,218,215,217,225,217,240,240,185,159,114,65,199,218,230,249,225,230,224,205,232,220,216,227,235,208,238,192,218,234,214,206,221,207,203,212,209,208,214,223,223,207,207,232,237,205,221,224,228,227,227,207,224,201,215,205,213,198,194,225,218,230,207,223,199,196,212,194,194,105,13,19,6,0,22,23,16,7,6,28,9,14,7,0,2,12,10,1,9,26,6,19,8,4,224,211,232,243,232,216,225,236,210,220,226,189,220,224,204,196,201,230,215,210,226,204,235,228,215,197,202,210,228,218,235,221,228,237,228,238,236,211,249,212,231,240,217,238,215,224,219,212,244,230,235,241,226,237,224,203,223,214,222,226,236,215,225,243,238,234,220,225,227,224,227,222,234,244,245,224,241,240,248,243,227,254,248,246,233,215,236,229,231,224,247,230,247,242,255,246,219,231,241,239,244,217,233,224,225,247,237,251,227,205,233,215,242,241,227,223,224,238,233,253,226,246,246,234,251,231,233,215,251,229,219,217,243,230,231,229,214,207,239,209,228,207,236,246,230,229,242,218,228,245,247,230,228,233,219,239,210,241,237,236,227,242,229,214,222,246,226,228,233,240,222,235,226,249,226,241,240,225,250,246,212,225,234,244,247,250,249,234,243,255,238,237,222,241,253,229,253,237,216,236,248,247,252,247,253,239,227,243,240,221,238,213,217,246,247,251,234,255,231,243,235,230,252,237,234,237,241,222,237,242,243,224,233,236,240,242,237,226,223,255,254,220,242,249,236,235,241,230,252,248,232,252,229,255,246,248,240,239,250,239,244,232,245,253,250,252,245,244,224,230,231,242,246,235,233,237,245,254,228,227,234,252,248,247,231,253,226,239,218,249,246,228,230,250,241,253,231,240,238,251,255,217,249,235,232,234,235,249,246,215,248,233,240,242,226,226,242,255,233,246,241,255,234,222,249,248,255,242,223,249,228,223,236,226,184,224,212,244,226,236,207,206,245,246,221,132,154,159,166,98,142,234,242,247,227,227,248,226,216,239,175,226,145,62,141,24,95,202,189,248,204,226,252,240,255,255,251,229,218,221,222,160,179,113,121,113,96,84,61,53,22,15,21,67,43,20,16,22,42,56,71,59,39,51,34,27,57,24,11,3,20,12,15,25,27,38,80,91,143,134,188,227,233,249,245,242,246,245,235,245,253,238,222,141,93,19,51,217,227,160,131,236,237,215,251,255,255,247,222,233,232,240,219,190,255,217,243,236,233,248,239,195,140,7,2,16,37,78,113,64,34,39,21,43,57,73,76,53,47,85,114,77,47,70,5,60,226,244,241,228,234,242,233,255,249,243,236,210,195,225,229,233,252,227,243,233,215,227,232,239,231,211,248,236,202,144,193,236,215,219,211,168,165,207,222,160,166,202,151,156,217,221,223,219,240,186,72,90,71,123,228,250,255,246,236,252,226,246,255,249,235,194,61,90,97,76,145,223,211,238,223,216,201,228,218,219,226,202,229,216,233,220,226,210,206,223,239,230,224,240,191,203,208,224,194,220,200,229,216,212,214,200,208,224,217,214,203,202,207,208,206,222,212,192,195,204,217,203,214,204,211,209,207,218,200,217,204,198,184,207,217,198,188,210,211,223,242,216,198,200,230,245,227,228,200,158,86,57,200,219,200,207,202,218,245,208,217,202,213,232,205,221,226,222,234,223,219,209,228,234,216,217,214,212,236,231,222,207,204,197,222,208,205,212,210,240,205,222,217,211,211,211,209,220,222,217,228,227,203,231,189,224,181,208,201,101,0,11,1,0,19,9,33,11,2,28,13,0,0,0,35,2,27,8,0,7,29,12,2,19,202,204,222,217,222,243,209,219,236,204,216,223,217,219,220,214,230,215,228,216,212,217,227,234,220,211,192,230,230,230,220,242,243,219,244,215,237,231,229,227,220,214,246,232,223,221,233,231,236,249,244,227,234,251,229,241,211,224,226,223,235,223,225,212,236,247,227,234,211,246,241,237,220,233,233,247,226,232,230,241,255,244,238,247,241,242,225,235,236,225,223,203,244,221,219,246,227,203,247,243,228,246,211,205,237,236,242,227,233,221,246,225,217,239,226,226,251,232,227,244,239,237,226,231,232,236,225,225,231,214,237,205,255,237,243,223,212,232,233,252,213,249,229,228,231,247,223,198,252,222,231,240,223,234,234,217,224,235,238,242,236,224,241,238,249,222,241,241,234,248,251,220,222,240,245,245,235,218,234,236,220,246,252,245,248,240,235,239,238,246,237,248,234,250,220,231,229,254,236,242,218,237,223,243,227,253,252,233,232,252,224,248,251,252,223,255,239,244,240,249,239,239,236,231,227,240,249,231,233,228,253,234,240,248,239,238,227,241,238,249,239,255,254,252,246,226,235,249,240,251,240,236,222,229,228,243,235,239,225,243,215,243,240,247,252,244,231,255,234,223,246,240,246,255,249,239,238,250,249,208,245,232,243,225,249,245,254,242,246,250,234,246,244,253,224,242,246,225,242,247,253,243,237,251,229,246,232,248,242,240,236,236,229,246,248,219,222,247,248,238,228,246,255,243,229,247,248,211,222,239,190,195,216,198,197,239,233,242,217,230,204,209,246,246,242,139,128,133,148,144,167,255,242,233,251,243,236,229,222,228,233,236,111,57,77,2,95,212,251,251,247,249,242,240,202,185,120,164,108,84,93,86,52,94,66,66,64,59,11,36,19,17,7,47,65,30,58,56,41,59,41,58,58,33,48,47,45,48,26,35,12,6,15,15,9,17,14,8,25,15,2,15,8,68,88,154,178,217,246,249,226,255,234,193,128,67,47,118,144,139,203,236,233,212,249,235,246,239,248,249,241,230,221,210,245,240,255,232,251,253,202,202,172,34,35,12,22,73,142,66,43,30,29,62,84,95,57,47,25,39,76,82,68,56,9,78,232,243,237,231,241,241,236,231,226,236,231,251,226,204,237,246,238,243,247,245,228,192,233,246,206,225,227,241,218,168,194,205,213,219,217,205,137,220,217,152,196,209,156,198,236,227,233,235,164,106,78,39,86,150,245,246,234,251,255,253,231,211,224,228,239,163,56,70,107,97,141,223,241,239,248,242,240,251,243,222,208,197,194,206,201,200,220,212,245,233,221,184,214,184,203,195,202,226,212,214,218,226,202,203,201,222,207,221,227,208,201,211,220,213,216,231,184,201,223,209,227,201,200,191,212,220,217,207,247,196,215,217,198,215,236,196,156,207,220,211,201,221,228,226,189,212,239,250,195,134,81,65,202,204,238,239,194,225,194,221,220,210,214,218,221,219,236,212,211,201,211,209,215,226,225,187,216,219,211,222,226,229,217,216,190,213,220,201,185,232,226,202,216,208,225,225,195,208,201,210,227,242,201,214,212,212,212,222,224,116,0,0,9,38,21,6,0,12,6,10,15,0,10,1,1,16,6,20,24,10,24,35,34,21,219,216,240,219,226,237,215,224,237,221,228,230,217,227,216,210,216,205,238,214,211,238,209,212,229,231,220,238,223,194,223,204,223,216,248,230,236,218,228,220,219,225,235,220,216,253,233,232,193,234,244,222,238,240,238,243,239,227,238,247,229,238,238,206,218,239,230,250,209,236,224,207,238,231,233,230,206,225,236,233,225,229,235,241,240,236,213,210,232,219,224,221,254,220,220,220,246,242,213,221,223,236,239,237,218,241,230,221,229,228,244,226,224,236,228,246,221,236,251,228,244,238,231,239,235,242,222,243,220,228,247,240,236,244,224,249,232,230,242,233,240,233,222,245,213,226,226,249,233,243,249,239,241,224,239,230,219,249,246,232,228,236,245,234,236,236,213,249,236,213,236,236,224,226,233,247,246,227,225,225,249,238,242,240,236,243,255,250,238,239,242,248,240,242,254,223,250,242,240,232,245,247,230,248,236,248,249,247,223,237,247,251,253,244,253,233,247,246,246,234,227,237,243,238,243,237,245,220,244,236,240,229,221,242,234,249,241,250,246,252,245,234,253,251,249,249,252,238,250,234,242,240,237,225,255,236,235,230,245,229,242,249,255,238,243,234,245,240,238,215,251,248,231,236,224,233,235,254,250,232,238,246,239,218,243,237,239,245,252,227,245,212,241,210,244,235,248,231,247,240,254,234,220,209,223,255,234,222,246,244,239,245,244,223,245,247,253,240,255,254,242,227,246,207,246,222,255,221,203,212,178,182,223,236,249,253,199,208,236,244,219,229,245,247,239,154,138,120,102,80,156,251,217,239,219,235,237,212,222,246,234,238,192,108,110,59,154,227,249,251,161,151,127,130,87,67,84,56,62,84,68,100,106,126,106,125,93,104,93,105,98,122,46,61,125,58,39,58,72,43,29,27,48,66,75,68,77,77,41,39,105,93,81,70,41,34,22,30,11,18,4,4,12,4,20,6,6,29,39,37,123,116,233,178,136,123,61,64,44,111,239,242,233,214,230,248,248,252,249,247,248,247,195,240,239,230,248,244,244,253,185,203,132,39,38,19,17,59,96,71,48,37,16,29,68,67,80,66,52,40,78,91,59,44,17,58,233,230,253,235,238,247,226,232,241,251,244,245,224,212,233,245,241,249,243,236,228,203,237,235,225,213,239,237,205,172,173,210,231,211,217,206,152,183,245,205,191,213,180,163,215,243,253,164,115,86,69,52,128,240,237,252,237,242,248,245,224,239,233,244,214,120,49,49,103,103,182,207,223,235,242,198,220,216,229,215,224,216,217,198,206,200,207,209,213,201,203,196,194,205,199,217,186,225,229,208,222,223,209,233,202,229,194,222,215,206,212,234,219,224,233,213,222,214,212,237,227,215,209,224,188,221,218,231,211,205,208,255,215,235,217,177,184,213,224,234,220,221,218,236,239,241,251,244,186,115,66,81,181,194,226,237,209,217,202,194,215,228,216,227,213,224,202,204,210,213,227,228,233,197,209,194,225,224,224,208,224,214,196,215,194,210,206,213,212,206,218,215,208,198,230,220,236,230,211,204,232,223,208,227,196,218,227,205,219,126,0,4,3,10,11,3,9,19,13,3,16,30,5,15,18,8,6,14,4,0,7,16,17,7,209,216,211,214,229,237,194,234,227,216,220,218,204,223,218,229,225,225,226,239,232,205,219,223,232,238,216,229,238,223,222,223,226,214,245,219,211,207,242,217,240,210,239,242,233,236,248,216,228,241,231,231,218,227,222,207,234,252,236,229,225,210,217,212,233,253,220,245,213,220,241,236,246,242,226,242,239,233,222,201,197,211,214,233,201,221,229,219,237,229,234,239,229,223,234,238,246,227,225,219,237,238,217,227,226,216,218,235,244,216,234,234,241,237,225,241,249,232,250,254,220,246,225,249,237,249,244,246,235,234,227,244,238,244,247,253,207,243,240,242,246,247,247,213,248,220,229,217,224,243,238,234,234,213,229,227,226,233,239,240,244,234,252,242,222,217,239,228,255,237,241,244,230,240,238,213,224,225,230,247,225,246,248,220,238,221,245,234,243,245,240,222,242,251,229,221,243,249,242,218,251,244,250,254,237,248,235,239,230,237,225,232,221,232,229,216,218,254,246,237,239,242,239,251,235,248,244,238,245,232,246,252,245,245,253,241,217,220,237,242,243,255,241,217,234,222,220,236,228,251,208,215,246,243,244,224,226,246,248,228,246,234,255,218,237,251,238,239,200,214,230,249,222,237,238,231,251,250,229,236,251,237,222,236,227,237,226,230,247,253,241,234,226,216,247,241,245,227,244,234,242,228,218,235,220,228,242,237,241,241,245,217,232,226,246,214,219,227,255,238,227,237,212,229,232,244,244,208,214,218,176,237,224,223,217,219,207,237,225,239,223,235,250,247,251,154,132,109,130,18,72,212,195,238,210,207,227,243,218,234,255,247,218,207,177,92,152,174,112,109,27,14,39,61,61,53,80,75,36,60,76,92,204,199,233,238,251,251,226,253,239,173,69,48,123,137,69,23,65,65,73,53,158,196,143,162,191,148,99,201,230,242,255,240,249,254,244,231,228,170,106,73,12,9,4,0,17,4,23,3,24,15,76,93,81,114,91,99,18,106,239,237,223,221,241,243,244,242,235,244,255,237,191,239,241,254,252,236,249,228,219,193,84,10,90,69,28,40,65,61,21,5,22,41,51,54,81,58,24,53,83,74,59,55,0,55,244,242,249,243,243,240,223,248,243,238,252,255,242,216,225,241,246,232,233,252,247,203,238,243,228,233,210,235,197,163,175,212,234,209,215,191,130,142,195,174,239,239,170,135,186,242,241,147,79,88,57,89,205,252,247,241,247,243,248,214,238,209,238,255,215,152,61,88,114,135,153,213,253,202,93,55,45,150,198,193,216,197,220,208,205,206,212,223,214,206,227,232,203,201,236,192,184,180,190,211,222,225,195,198,223,220,204,198,194,216,199,221,211,205,201,208,208,198,211,243,222,223,223,224,199,221,186,225,188,226,245,208,210,235,207,156,204,214,212,216,224,246,229,242,235,236,227,225,106,99,27,70,185,220,211,212,221,219,220,224,228,212,195,209,233,221,205,212,207,224,218,236,213,219,218,215,200,233,213,200,218,208,201,195,209,195,209,242,201,229,213,185,220,214,210,200,227,237,191,229,223,226,204,198,225,196,186,216,214,122,27,7,0,18,0,0,10,2,17,14,1,9,2,9,0,5,29,5,2,8,9,3,12,38,236,236,206,220,224,238,226,217,222,224,224,223,226,237,227,220,218,243,207,230,213,244,220,231,241,224,223,214,230,208,238,244,236,212,242,221,246,230,217,219,225,235,234,234,221,236,222,238,214,226,239,246,225,236,236,237,250,246,209,249,255,242,222,233,237,222,245,234,228,222,246,229,226,222,245,239,228,213,219,243,227,231,228,207,210,234,236,221,241,238,217,224,228,229,236,207,238,227,230,233,237,206,239,223,247,220,218,255,248,223,232,240,252,251,217,222,215,209,225,219,236,241,230,223,233,230,246,236,249,220,245,234,222,246,238,235,220,241,238,235,246,249,218,207,235,243,240,229,239,236,234,231,254,234,245,227,227,241,231,220,246,230,242,238,236,234,244,242,216,243,239,207,227,230,225,249,245,239,236,238,233,236,243,240,226,247,223,240,241,246,238,240,248,235,251,235,237,246,248,247,252,218,227,226,234,240,229,239,240,235,236,241,240,239,243,254,244,225,219,226,245,230,236,252,232,236,235,252,245,214,246,233,231,237,247,238,236,239,226,240,236,239,230,247,246,242,242,234,240,248,231,241,244,232,247,225,247,227,245,233,246,236,244,231,222,231,246,241,204,230,247,232,227,250,245,218,251,233,239,243,232,231,237,243,241,240,224,249,231,247,255,243,240,225,246,250,236,255,240,243,227,235,242,245,242,243,248,234,201,240,240,240,223,238,253,244,241,216,251,253,243,233,250,211,247,230,241,227,228,222,223,206,200,199,210,245,230,237,238,232,214,224,227,245,243,147,114,109,112,39,111,244,249,245,223,209,225,234,221,248,247,255,221,199,174,103,89,84,62,65,72,78,98,82,102,144,181,145,80,32,37,130,245,255,251,252,255,252,253,230,237,191,81,79,129,142,105,83,76,109,61,83,178,177,160,139,152,130,92,182,247,216,236,239,248,248,245,255,249,176,68,87,40,161,232,217,192,161,147,54,28,36,111,53,45,39,47,87,27,146,247,250,205,231,243,244,255,247,255,218,232,216,184,222,240,255,231,236,236,241,222,188,106,1,128,112,13,44,10,59,39,46,33,64,91,34,61,35,40,60,69,94,20,64,39,99,236,250,235,247,240,242,249,228,227,217,243,228,253,220,221,250,228,230,242,233,240,223,216,233,227,235,215,251,227,183,168,200,216,227,215,192,158,114,111,139,132,152,85,70,101,183,232,188,135,81,70,158,248,251,228,230,252,216,253,240,254,242,248,238,213,170,94,117,129,117,151,232,223,160,34,8,5,15,151,205,226,224,210,209,207,219,200,200,232,226,238,210,224,222,195,215,223,217,217,231,220,200,234,219,215,215,207,197,209,207,203,206,204,196,193,210,210,195,234,231,217,200,212,222,244,208,213,221,207,221,212,230,215,226,187,173,209,209,219,229,232,245,238,239,255,190,129,106,43,19,28,128,220,208,213,222,222,242,216,219,216,199,234,204,230,207,222,197,223,244,216,226,224,238,203,221,226,210,204,208,203,225,202,217,208,232,221,232,211,214,209,229,212,202,207,189,220,198,222,205,206,231,203,195,214,219,192,208,227,115,5,20,6,22,11,16,1,0,4,15,4,7,0,0,0,10,4,22,26,18,2,25,31,2,211,227,224,240,236,218,235,227,229,226,225,213,224,234,220,206,222,219,240,214,241,230,224,240,223,232,240,231,214,229,234,235,234,229,241,213,245,225,240,245,226,233,236,235,217,226,207,241,215,227,205,229,237,243,222,229,209,241,234,206,242,244,232,249,243,223,212,228,229,234,239,249,245,214,237,230,224,228,243,240,219,236,238,234,223,226,223,235,251,236,245,216,235,229,233,239,247,230,218,246,225,224,217,229,244,238,221,235,253,202,226,230,231,222,235,230,213,237,242,207,247,223,233,234,222,247,244,248,202,233,237,246,236,250,227,234,233,222,235,242,241,223,229,227,249,240,248,240,235,254,223,239,249,243,252,244,235,234,249,250,243,239,239,227,243,234,234,246,235,246,239,242,241,238,244,243,247,214,233,248,250,210,249,240,220,237,235,235,219,225,239,229,233,246,244,223,232,238,240,243,243,245,246,249,231,238,255,246,242,233,243,245,222,250,247,226,224,221,247,230,241,245,241,227,249,255,236,252,224,245,248,234,222,244,227,239,238,243,255,241,236,229,236,222,223,243,249,222,235,242,255,234,242,252,247,246,252,251,226,250,247,248,253,251,240,237,241,218,219,243,241,230,233,244,235,234,253,219,248,254,215,230,250,251,247,238,246,251,229,240,244,236,228,241,232,254,243,236,226,245,233,242,224,217,248,225,247,243,219,249,251,250,231,235,235,235,223,232,239,234,227,208,246,207,205,236,243,181,230,220,158,206,223,210,238,229,249,233,217,250,189,194,227,251,248,134,133,145,117,23,138,250,217,252,244,204,211,185,207,250,235,228,105,91,109,138,98,36,23,83,80,147,233,237,255,251,253,238,202,112,73,72,205,233,237,238,210,197,167,130,135,118,70,67,125,68,51,68,49,31,22,41,30,44,81,58,39,33,52,80,79,78,80,151,154,171,202,144,164,64,60,40,77,237,242,232,252,242,254,218,167,138,100,29,43,71,27,57,17,180,220,222,204,238,243,245,219,242,245,249,249,212,224,246,247,238,251,251,253,229,209,198,101,18,138,90,18,46,23,26,60,66,54,60,75,32,37,29,23,24,90,103,38,51,28,152,251,248,255,249,250,240,239,251,237,244,246,229,234,215,182,253,236,255,233,229,244,205,228,247,231,217,245,242,234,192,156,183,209,217,193,222,207,170,78,80,37,16,37,114,91,191,246,143,141,58,117,228,239,237,232,246,255,228,253,232,224,222,248,249,179,173,150,137,129,118,141,123,188,114,62,25,5,64,140,183,227,222,211,250,214,226,226,211,220,208,223,212,217,204,211,221,222,206,206,240,214,216,212,223,209,226,207,206,217,222,196,228,234,206,195,232,229,198,226,213,219,194,217,224,226,228,220,218,219,202,201,212,228,199,154,212,200,217,207,218,246,249,254,233,201,120,76,37,17,87,196,224,247,227,220,221,224,197,242,226,228,227,206,221,218,221,217,220,194,232,207,239,213,213,223,238,229,219,204,205,203,212,209,218,214,202,212,224,213,225,195,224,201,214,226,202,192,214,213,211,206,221,227,205,210,208,221,182,221,131,27,6,1,8,22,1,17,3,10,21,27,4,1,1,7,18,0,11,22,0,8,24,11,7,224,211,219,231,225,229,224,230,248,219,241,216,251,228,237,212,238,236,239,219,240,243,239,245,235,236,244,214,221,239,221,233,238,242,226,233,228,226,232,237,229,228,247,242,244,242,231,231,237,240,241,231,223,214,232,240,242,227,242,217,244,238,240,238,243,239,231,244,231,238,241,216,233,231,249,247,250,223,238,218,255,225,225,199,232,224,228,238,219,237,247,235,238,224,238,248,223,235,221,230,237,232,251,236,247,213,236,251,232,229,237,239,239,236,252,247,227,244,226,233,223,237,242,205,244,240,241,230,246,241,230,223,234,226,240,237,237,247,235,248,236,221,239,245,224,252,247,230,250,242,240,246,254,225,235,243,208,235,221,250,227,253,245,242,238,229,242,246,239,231,242,240,246,236,245,227,231,248,229,250,229,235,235,240,250,238,240,216,246,215,222,226,234,252,231,231,242,224,216,238,244,234,239,253,250,222,242,227,216,247,228,234,246,242,238,239,244,212,244,218,245,215,220,235,234,232,237,251,237,247,253,225,248,234,255,241,223,247,245,233,242,243,233,248,238,250,232,248,239,208,240,237,221,220,235,224,231,231,239,229,245,252,230,246,231,230,236,208,245,255,212,245,232,236,230,252,223,225,242,247,237,246,246,227,233,249,234,239,241,247,248,221,227,217,246,240,250,245,231,230,242,238,244,250,254,231,245,222,235,236,252,249,250,242,230,234,232,249,236,253,237,197,239,235,235,239,241,201,209,206,200,233,250,206,241,198,191,243,239,233,183,189,246,240,253,155,117,138,120,47,141,243,235,254,217,229,229,250,230,241,170,76,64,30,101,133,122,44,30,0,51,188,255,247,223,237,248,227,209,177,155,173,60,44,61,61,21,40,54,18,12,29,28,15,89,43,25,9,21,28,6,10,39,47,41,15,15,40,36,23,16,69,13,19,44,37,29,38,30,56,45,47,37,154,240,230,255,255,251,203,134,137,127,68,39,83,62,104,63,162,255,197,221,239,242,254,254,244,244,238,237,192,198,236,218,254,239,228,235,251,206,163,42,32,137,68,19,50,25,28,48,57,43,44,29,24,50,41,14,18,38,91,78,28,30,71,229,232,227,252,234,248,236,234,230,233,235,255,228,217,219,216,236,234,222,225,239,217,224,244,245,205,212,204,237,219,154,155,226,227,224,226,217,219,136,104,72,25,18,26,74,196,234,149,89,68,177,245,239,238,232,255,244,249,242,219,210,226,249,238,173,168,142,156,133,137,140,35,96,83,31,33,25,181,231,234,212,195,222,212,209,192,197,195,210,227,227,207,217,225,243,209,206,217,207,213,205,217,219,219,216,226,207,211,217,209,203,197,230,211,219,213,215,215,210,228,214,218,230,213,227,232,207,206,249,234,214,212,232,182,151,223,222,234,234,242,254,235,199,130,99,49,6,29,163,225,232,240,250,232,239,232,215,209,239,209,190,221,228,225,209,194,225,210,202,196,230,211,227,225,209,210,221,232,224,219,211,240,209,219,213,215,231,218,224,219,214,210,208,205,224,227,202,200,231,198,234,234,200,214,206,206,204,208,196,110,13,7,4,9,21,17,7,9,27,4,25,2,3,21,0,20,6,7,9,3,26,28,32,22,237,239,237,238,228,223,246,205,244,193,244,216,207,213,242,234,238,251,223,246,242,226,243,238,252,223,233,238,223,221,231,232,241,235,228,224,240,221,243,227,240,237,227,234,213,242,226,249,252,245,228,229,223,243,233,206,237,236,230,236,249,228,226,228,235,244,215,237,249,255,228,252,210,241,242,218,229,208,247,232,239,254,230,220,215,233,227,227,246,221,230,209,222,246,239,240,240,219,196,223,252,252,208,249,237,233,217,245,224,226,245,243,225,251,216,250,235,234,235,251,208,229,232,238,235,235,245,244,241,237,213,246,249,254,243,209,243,226,243,253,234,251,248,237,255,244,235,250,229,239,223,238,245,251,240,252,243,249,251,249,233,235,234,254,241,224,245,232,229,225,244,252,214,231,255,236,218,250,242,240,236,221,238,228,232,229,236,217,238,227,253,240,249,221,255,241,248,235,229,238,225,237,251,243,244,234,248,239,238,233,228,244,252,224,232,216,228,243,223,232,241,242,251,236,242,246,233,244,241,232,238,241,227,212,235,232,245,214,205,232,243,236,250,241,245,253,251,244,244,245,252,247,237,246,242,239,244,243,244,242,249,218,228,239,235,238,230,205,233,235,253,199,222,238,241,226,211,221,247,244,229,232,244,232,250,247,225,220,223,196,219,237,250,251,220,238,247,236,220,232,211,248,216,242,238,243,234,226,248,246,234,230,251,227,226,237,245,239,226,244,244,235,247,227,252,239,236,198,236,227,193,236,212,171,225,228,215,246,229,247,188,231,255,250,239,178,145,139,119,46,152,223,248,249,246,212,234,245,222,117,72,36,44,62,101,170,182,125,71,27,25,118,182,168,113,119,52,56,52,100,135,137,96,11,22,10,34,40,75,34,12,18,23,51,48,43,18,12,5,4,16,33,26,12,7,13,23,22,27,22,25,17,0,31,28,47,35,19,44,46,18,23,42,36,25,29,62,138,52,52,78,162,132,84,164,205,155,107,91,81,135,204,205,251,228,235,236,216,242,245,233,198,207,252,227,254,242,222,245,246,181,160,51,8,114,56,21,115,31,32,40,35,60,38,35,12,23,60,36,29,45,86,22,48,30,73,235,243,252,247,230,216,213,238,239,244,218,251,245,238,207,211,245,222,235,234,252,208,232,230,224,235,206,234,242,204,176,179,203,209,211,224,186,127,101,129,189,111,32,24,32,157,236,142,85,102,228,252,246,222,244,252,233,253,250,228,236,234,253,217,171,155,165,161,152,157,118,44,129,122,83,65,78,204,229,255,217,191,209,221,201,193,221,206,207,218,188,200,199,220,223,235,205,230,227,197,242,188,236,217,193,216,233,187,245,218,195,202,193,210,184,218,217,220,223,235,226,203,180,193,234,203,231,242,218,206,219,222,226,184,176,212,230,242,237,245,223,185,107,90,30,11,94,203,246,241,246,225,216,206,228,217,206,232,218,199,220,219,199,217,211,214,212,223,216,218,229,202,204,220,214,216,207,223,219,232,201,201,222,226,220,237,220,234,219,191,211,192,206,199,211,211,217,191,210,219,208,191,212,216,211,184,211,226,214,91,4,20,0,23,11,27,6,35,21,24,26,27,0,16,5,15,16,20,20,4,33,37,50,25,219,220,217,248,228,219,224,233,228,221,211,213,236,234,221,199,235,238,230,241,234,221,238,234,232,227,231,220,219,215,216,220,236,246,255,240,234,248,255,225,239,247,250,221,248,249,232,233,236,237,229,243,251,234,227,247,213,239,230,230,233,232,243,241,230,240,230,239,233,238,235,224,220,211,235,249,245,224,229,243,231,236,212,249,231,233,249,223,226,240,225,228,236,243,228,244,222,224,244,243,251,234,234,243,215,244,237,229,239,240,240,209,237,235,222,233,231,242,240,231,250,231,223,253,235,235,241,244,232,249,239,247,248,250,249,243,250,246,253,239,247,233,244,227,253,246,244,226,232,247,247,252,255,231,246,247,225,231,245,250,227,237,226,229,245,243,235,229,253,253,249,234,240,248,242,243,253,254,234,246,237,242,241,250,234,239,228,211,216,242,216,227,246,246,230,251,249,239,245,231,249,228,254,243,239,229,231,238,239,245,217,235,245,244,244,240,222,241,238,229,232,245,246,232,239,229,227,242,231,236,228,247,244,244,239,237,206,224,244,234,235,239,234,249,247,251,245,250,239,243,236,249,246,238,238,232,229,240,228,211,240,220,245,207,226,253,203,227,253,241,245,204,233,246,240,239,233,237,246,237,244,236,247,235,217,244,240,233,223,238,200,240,236,241,247,244,227,241,203,221,223,245,221,244,243,245,223,243,222,251,248,229,235,232,245,239,242,197,228,245,232,216,241,222,255,246,245,193,248,202,189,199,204,180,238,248,245,250,246,247,158,216,236,255,252,153,121,98,101,60,147,231,241,252,240,223,245,245,178,101,65,52,33,78,166,234,228,190,165,113,52,52,15,46,9,12,52,24,38,55,2,79,124,88,54,29,25,28,98,31,32,45,21,16,62,69,29,27,8,42,5,28,40,11,25,29,25,34,26,11,10,12,14,7,8,23,21,14,40,18,20,29,14,8,34,24,42,15,12,34,48,105,75,92,178,243,190,96,65,38,82,152,209,229,246,243,235,239,250,252,242,156,223,230,239,246,249,241,237,250,173,153,36,2,125,54,51,120,32,60,38,34,25,60,47,26,57,37,26,41,73,104,30,46,25,45,230,248,255,233,245,243,231,220,242,243,241,234,243,229,232,224,226,225,226,237,249,228,196,241,232,227,201,215,223,226,178,168,190,213,217,218,170,103,38,11,106,144,107,77,68,92,171,140,101,192,255,255,240,242,253,249,245,236,217,245,242,245,249,196,142,144,154,137,157,181,128,70,170,183,109,77,31,101,183,214,208,221,201,217,222,213,212,225,225,213,214,211,229,231,207,217,207,206,224,228,210,189,221,202,201,212,196,215,200,217,242,205,219,218,210,227,214,216,217,214,235,191,204,188,202,202,237,202,216,198,207,207,188,170,220,230,243,241,242,189,138,105,51,14,43,167,235,230,232,240,235,222,233,229,230,231,216,201,216,230,210,233,226,240,223,238,201,231,196,212,220,235,233,217,225,209,210,198,227,238,200,196,203,214,229,220,203,214,229,225,209,230,210,218,220,212,184,230,228,217,201,225,215,213,206,203,213,217,181,132,0,14,13,5,4,14,24,7,11,1,31,0,2,7,9,15,25,4,22,14,9,34,18,5,236,226,229,240,207,208,219,231,230,234,231,217,222,247,240,221,250,225,242,242,225,247,242,241,206,235,241,247,223,228,227,250,220,228,243,215,231,222,250,232,236,242,230,219,232,220,234,227,253,231,246,225,217,242,255,252,249,248,244,237,232,239,242,253,243,234,225,243,231,240,243,240,243,242,246,251,231,250,242,247,245,223,243,249,222,242,223,225,230,235,243,194,245,226,228,237,236,211,250,211,246,235,251,216,229,232,245,246,240,234,236,243,234,225,242,244,231,242,247,218,251,239,232,244,243,249,244,231,245,229,240,244,245,227,232,251,224,244,247,236,236,252,255,238,243,252,238,247,231,245,230,244,253,242,242,231,243,246,249,247,248,224,240,231,228,218,224,234,233,223,254,241,229,247,237,249,219,251,231,218,236,238,230,234,231,249,228,214,216,218,243,253,254,239,239,242,224,242,237,236,225,231,231,251,219,245,245,247,251,240,249,252,232,226,238,246,234,200,246,236,207,223,236,245,226,241,223,204,222,219,247,242,244,252,245,225,247,232,238,253,232,244,220,230,252,251,255,231,243,253,243,223,246,246,235,236,240,250,230,214,240,243,248,240,238,228,244,226,215,236,234,244,226,238,233,242,221,234,239,247,246,218,250,249,251,229,230,244,241,243,249,228,239,230,242,246,249,234,222,239,226,229,230,245,224,205,218,248,233,239,207,220,254,228,199,247,224,215,232,242,238,254,231,220,236,249,204,192,234,183,204,239,245,244,251,250,229,240,243,225,186,231,239,250,228,123,106,106,69,21,124,243,229,237,217,225,247,217,194,74,59,19,5,22,89,185,167,141,159,137,62,21,37,59,60,22,60,23,16,33,11,12,40,135,120,92,54,43,87,55,24,4,19,48,67,47,26,19,36,3,6,33,25,25,34,34,2,20,16,24,35,38,16,18,12,2,23,7,8,45,16,26,26,26,18,21,44,29,26,19,78,107,85,36,40,81,80,88,92,13,67,189,242,243,249,239,222,239,249,236,217,184,253,248,247,231,249,241,253,252,124,102,50,15,144,47,46,111,36,62,7,26,24,20,63,45,49,56,58,114,134,122,50,48,16,56,206,233,247,248,220,244,250,221,239,239,242,235,239,242,229,194,205,229,244,243,236,239,210,244,242,201,221,209,255,213,162,180,173,228,226,204,227,198,148,44,36,82,155,137,139,94,121,86,148,220,254,249,252,229,251,255,241,212,227,235,227,226,239,159,159,134,109,127,125,149,96,89,176,165,116,64,11,2,67,179,193,221,233,228,198,222,213,201,217,214,215,208,236,225,231,215,205,230,193,233,213,219,203,218,230,221,219,216,233,206,238,212,217,207,217,208,243,229,208,237,202,226,210,218,217,210,229,233,203,218,234,219,204,203,213,229,253,244,149,104,94,2,30,42,146,230,229,237,235,207,233,238,196,225,218,212,228,195,215,215,219,217,230,228,200,220,208,224,231,221,199,224,212,220,222,228,209,199,228,199,204,202,238,213,223,216,214,208,209,215,236,210,229,196,230,224,210,214,214,198,207,216,209,211,200,225,228,211,221,111,21,19,13,9,4,10,8,17,18,36,19,17,7,0,24,21,18,0,28,24,10,14,11,29,231,221,225,230,215,236,219,244,229,220,233,228,229,239,239,250,235,214,248,217,242,220,226,228,243,239,247,234,247,246,245,211,233,232,236,217,210,240,252,240,237,241,233,231,243,240,245,244,237,240,253,255,246,252,233,222,230,236,242,236,223,254,246,241,243,241,255,219,234,254,234,250,233,248,248,224,230,240,224,228,245,230,246,242,223,249,252,237,230,233,216,231,238,233,233,206,198,237,240,239,246,236,223,226,246,213,244,233,224,240,242,245,248,234,252,225,226,244,221,232,250,248,236,233,248,255,246,246,231,233,230,236,244,244,244,226,239,248,255,242,244,253,225,237,251,236,244,218,234,230,240,247,223,254,246,239,228,253,230,247,245,239,226,247,225,240,250,242,240,227,237,249,244,236,244,245,207,216,240,221,225,228,212,228,227,214,230,220,230,213,219,227,235,234,233,238,239,244,248,240,240,206,238,234,230,237,242,255,218,236,232,249,244,236,250,224,233,236,222,241,245,248,250,232,218,250,242,236,252,234,240,250,230,222,242,238,246,252,214,229,252,234,232,243,242,255,252,239,243,225,253,238,236,236,203,245,249,247,243,237,233,221,233,229,246,239,220,232,246,248,252,236,237,238,237,245,211,237,247,246,215,246,219,211,226,247,235,226,216,255,248,232,245,244,235,225,242,219,239,233,242,242,244,246,234,239,240,237,238,251,242,231,232,243,253,238,223,231,246,227,255,236,224,242,249,255,216,214,224,189,252,245,246,226,227,217,231,241,255,229,164,238,236,251,227,101,113,98,83,12,95,240,238,255,248,212,254,255,208,88,27,24,36,80,74,43,32,54,136,160,107,33,20,47,36,34,79,13,33,14,39,48,16,30,88,143,117,105,113,38,15,2,34,2,34,67,36,12,19,11,37,23,35,17,8,6,18,19,14,10,30,22,24,14,13,35,7,22,36,17,13,10,35,35,24,4,28,17,21,59,58,68,57,63,53,33,35,60,97,30,22,189,244,249,253,217,243,241,233,248,193,194,252,238,238,228,243,223,247,231,93,114,35,8,140,43,68,65,11,23,24,33,40,37,43,52,62,86,147,170,164,148,98,35,32,45,188,241,250,228,232,215,245,238,241,227,217,235,229,225,239,195,218,247,222,245,213,241,251,224,229,248,200,221,239,219,196,158,143,234,198,237,224,216,200,132,102,154,154,149,116,52,15,50,192,239,250,254,253,242,249,242,235,227,227,244,240,253,204,140,152,105,82,134,112,147,95,97,192,148,68,61,3,45,173,203,193,223,229,215,196,218,201,196,214,226,213,199,208,219,239,232,198,210,183,232,216,194,234,229,215,213,226,229,225,213,204,203,211,212,237,215,201,234,243,200,227,219,225,216,236,207,197,224,192,244,204,225,179,226,252,242,157,127,71,13,10,3,82,174,238,239,238,213,242,204,221,244,216,218,210,223,249,237,208,238,235,223,227,230,233,215,212,228,207,243,224,221,231,238,201,206,229,233,224,204,210,219,215,214,217,236,222,213,217,190,210,217,190,205,210,237,235,233,209,228,211,218,224,222,194,199,234,206,223,133,1,14,1,19,14,6,18,4,7,12,20,13,5,0,3,15,6,7,23,12,3,7,4,15,219,218,239,228,237,211,218,251,236,253,227,227,225,210,223,239,227,237,193,230,232,243,231,245,229,249,238,246,246,245,237,220,243,236,234,238,229,236,238,224,244,232,250,244,226,226,226,228,236,234,252,236,239,240,246,251,236,239,230,229,223,246,222,254,240,238,233,239,240,228,254,231,231,239,231,245,235,251,240,255,225,255,207,236,228,239,246,228,246,228,233,234,237,235,221,227,235,225,251,225,239,210,253,224,243,228,241,236,240,228,248,237,242,243,247,239,251,224,238,210,239,238,241,221,235,247,206,239,237,241,247,238,244,233,233,248,229,243,237,242,251,247,245,248,227,224,251,233,232,249,245,249,245,233,241,220,226,243,223,249,255,255,233,223,246,238,238,246,211,221,226,232,228,254,234,243,230,239,231,233,236,248,231,243,231,235,243,196,242,251,234,238,241,233,237,255,249,241,231,214,222,249,238,217,248,217,252,227,247,239,224,209,241,225,227,218,229,244,241,233,245,241,234,228,236,230,236,246,239,249,239,230,241,252,226,232,236,233,246,238,234,238,221,244,239,254,242,245,223,238,239,246,231,237,237,229,216,236,254,239,248,232,246,247,236,243,213,238,237,222,246,231,243,246,235,208,234,251,250,233,236,238,239,247,203,229,215,247,217,245,224,224,231,255,217,226,226,228,234,233,250,230,252,220,233,239,243,249,238,240,237,250,233,232,245,232,245,208,253,227,227,226,211,250,231,246,208,213,249,210,239,206,182,181,208,222,235,246,250,255,173,239,232,255,200,80,94,76,68,10,97,237,249,247,233,229,253,252,184,84,53,81,120,99,46,49,12,42,66,153,181,128,63,76,43,64,76,56,53,31,43,46,29,31,7,33,134,169,152,48,5,21,14,12,42,81,23,21,31,29,19,21,12,25,7,7,19,14,16,24,8,12,29,21,27,39,13,8,34,20,22,39,15,34,37,13,30,18,40,63,98,85,64,40,17,32,42,51,61,27,51,210,238,255,250,205,234,236,248,254,185,223,239,219,227,243,226,246,247,179,89,129,47,35,142,8,86,76,18,15,35,16,26,24,18,53,81,119,128,133,102,95,60,50,4,28,200,225,255,214,210,250,222,255,247,238,246,233,207,252,233,202,201,255,245,254,247,250,255,245,243,233,191,214,205,210,201,151,177,176,206,208,227,245,224,134,137,184,169,144,131,61,52,138,241,245,242,249,249,234,242,230,212,238,248,237,236,228,183,155,132,129,129,129,107,147,119,139,217,125,71,64,4,114,222,242,249,204,216,217,210,217,216,221,213,214,220,224,243,217,231,232,232,223,223,239,216,239,212,232,219,220,212,226,221,209,206,224,204,220,239,206,210,208,216,236,235,211,231,220,232,226,226,222,222,212,228,217,200,209,208,135,123,39,48,55,136,164,197,227,241,240,219,212,208,206,206,221,226,224,231,223,232,239,204,220,219,241,217,225,200,221,200,213,214,216,211,208,218,232,212,237,204,202,216,213,208,201,223,216,234,214,209,223,207,222,215,194,233,217,221,214,206,222,183,211,194,206,203,217,200,240,196,211,205,117,20,3,18,13,7,2,15,0,30,20,10,25,0,5,5,2,6,24,9,0,23,10,9,17,211,229,219,228,241,220,238,238,232,241,246,242,228,255,209,225,215,240,222,232,240,236,245,231,237,237,241,245,244,243,225,202,246,255,224,223,232,243,232,240,243,217,228,234,238,225,253,223,245,253,243,246,226,228,227,249,233,233,229,244,241,252,244,231,235,229,229,237,224,249,244,235,223,229,234,241,234,235,221,228,239,230,250,238,219,233,201,254,201,240,237,237,242,253,228,232,253,220,225,226,227,237,222,231,241,222,207,232,227,220,224,240,251,240,219,236,243,212,226,217,215,220,234,253,217,227,249,245,247,254,234,248,236,253,237,249,241,220,239,252,255,250,221,237,245,228,243,237,240,232,232,253,238,229,246,232,229,255,247,250,228,236,222,249,253,231,242,240,229,244,240,222,216,247,213,237,251,240,241,225,237,237,214,218,237,251,228,218,239,217,222,238,242,243,222,236,231,230,229,241,232,219,243,243,230,233,215,241,255,253,233,222,247,229,200,194,236,244,241,245,230,239,229,242,229,222,250,242,231,230,226,236,228,241,247,238,236,254,242,239,231,213,232,252,233,231,226,232,229,235,250,243,246,232,230,233,247,247,222,220,248,237,239,247,227,214,230,242,243,241,219,236,228,251,216,221,255,242,221,225,237,229,230,246,244,233,227,250,250,247,218,231,206,242,246,235,237,239,253,223,236,237,226,203,227,220,243,236,225,243,243,229,237,207,240,228,244,247,219,242,235,213,239,239,234,247,211,244,210,162,203,222,229,247,241,250,245,245,248,246,184,253,245,250,188,94,69,66,54,12,109,230,237,247,234,242,232,255,224,80,65,39,65,44,44,67,44,46,55,73,189,171,131,111,41,55,69,68,58,38,43,50,27,48,42,40,14,65,142,90,85,29,14,18,52,76,32,26,32,3,22,1,25,13,2,23,5,27,42,17,9,33,30,39,18,28,12,46,11,18,25,24,6,40,1,6,12,31,58,93,93,53,29,56,34,35,73,48,55,23,80,219,236,254,255,240,235,234,243,231,192,231,251,243,249,235,238,253,244,189,77,97,69,68,165,13,61,73,25,35,9,30,29,9,76,50,66,101,114,88,80,43,52,92,20,100,207,230,248,255,229,233,230,223,246,244,238,240,253,230,251,243,218,243,252,249,255,254,254,244,233,253,210,190,225,210,215,172,168,207,224,223,191,201,187,185,184,199,157,141,136,82,124,211,242,225,255,233,240,218,237,210,250,243,235,228,249,243,162,161,159,145,163,127,145,133,166,169,133,68,60,34,12,144,244,229,252,214,222,222,212,224,221,186,232,236,200,210,224,225,252,255,241,231,229,239,252,203,236,239,198,225,233,216,221,231,213,213,202,222,223,236,228,241,239,241,251,237,250,250,239,224,234,229,232,252,248,215,196,162,132,80,30,19,88,191,234,216,232,226,238,222,221,216,226,196,215,229,224,209,210,216,209,222,210,237,193,224,220,197,214,232,228,217,227,239,237,241,198,219,197,205,203,221,211,222,222,206,210,243,199,213,231,224,192,206,191,220,243,193,212,215,206,211,209,205,220,212,202,232,200,203,220,186,226,120,7,15,3,10,5,13,14,13,16,20,3,13,3,11,11,3,19,9,32,25,18,14,13,1,241,226,211,238,204,203,208,225,235,218,226,229,244,224,236,239,243,218,237,236,227,242,237,253,219,238,244,237,228,236,250,235,225,232,222,254,220,234,243,236,246,224,232,246,226,246,228,243,231,234,243,235,232,224,248,230,254,248,243,238,213,228,238,239,247,246,238,235,250,244,251,237,224,219,243,249,231,249,215,240,240,244,252,232,235,238,222,224,227,235,240,227,243,235,227,212,241,236,234,223,231,246,243,240,222,250,228,254,233,226,231,209,242,246,231,223,237,232,233,240,251,207,246,226,241,235,228,247,235,240,236,249,247,215,225,219,225,240,218,241,229,231,240,234,247,243,230,241,221,224,215,243,237,205,242,239,242,252,225,245,245,248,248,243,249,235,244,241,232,245,241,237,224,223,240,214,241,227,230,239,238,254,251,227,252,223,243,233,228,249,209,239,207,210,242,243,243,242,222,231,231,247,234,250,230,232,254,232,239,230,245,220,240,229,226,227,238,245,237,232,238,227,231,239,239,239,234,232,234,249,235,242,239,254,244,246,215,255,217,240,246,230,223,250,255,218,250,203,233,255,246,244,244,235,254,247,218,253,236,249,217,241,231,233,249,233,216,223,245,216,233,242,238,229,220,247,219,230,231,221,236,246,236,235,231,232,227,208,233,226,231,234,230,231,242,249,225,254,237,243,228,234,243,252,242,239,254,225,215,241,249,239,232,222,247,252,241,224,237,230,234,228,247,246,250,244,210,193,185,181,233,233,242,229,249,221,204,212,251,227,198,229,245,240,216,76,82,106,89,3,97,231,233,248,210,228,229,241,234,84,17,23,5,40,68,55,31,35,34,68,113,171,167,115,14,22,45,47,53,41,64,56,14,15,18,47,38,25,58,110,167,114,30,29,16,78,45,11,23,16,4,32,14,22,6,40,18,11,7,0,17,20,39,54,22,25,17,42,16,42,12,25,43,47,39,44,9,36,80,83,99,60,65,72,35,55,111,74,68,15,103,227,253,242,253,243,252,220,250,237,166,235,247,234,228,231,234,224,252,186,81,83,38,50,107,12,102,64,21,18,1,19,7,48,15,27,30,58,92,59,42,37,47,28,18,117,246,248,241,236,224,249,237,242,251,227,255,242,246,243,254,232,198,216,217,245,138,171,162,153,240,254,233,215,203,246,216,186,174,190,225,181,100,88,86,162,135,119,120,91,66,111,205,227,243,249,250,255,255,250,239,225,241,228,254,231,222,176,157,151,180,143,144,142,153,193,148,114,84,41,69,9,39,213,249,228,252,236,226,206,212,226,206,190,211,215,209,243,228,246,250,239,255,249,245,243,250,254,231,235,207,204,209,225,223,235,218,225,234,220,230,214,231,236,246,233,236,235,229,227,231,231,235,231,252,245,236,146,156,96,43,27,37,124,246,241,234,209,218,242,242,238,213,202,219,211,206,213,209,216,225,184,226,222,227,239,209,233,206,208,198,217,230,220,214,216,210,209,209,231,233,219,208,217,217,205,233,228,224,204,225,216,212,199,207,229,214,221,229,183,234,246,226,212,220,221,219,230,208,195,190,202,205,213,208,109,15,3,7,23,18,22,11,10,17,2,13,8,18,11,9,19,12,14,19,1,18,9,9,9,233,231,217,229,216,195,229,246,219,230,235,236,220,217,248,225,232,245,249,236,235,237,243,231,240,246,236,224,248,241,242,252,248,226,234,230,249,247,227,235,239,229,238,225,248,246,226,241,240,223,242,251,225,240,207,222,246,249,246,249,241,250,239,238,239,217,240,239,250,239,217,235,227,242,232,234,226,231,210,255,236,251,251,240,241,247,234,218,228,246,244,223,235,214,233,234,232,222,233,214,232,243,255,243,246,242,228,213,235,244,240,242,245,243,225,248,239,247,238,230,242,230,248,206,224,237,232,236,251,240,224,240,247,236,243,237,232,229,254,237,249,237,253,211,224,247,250,238,224,216,222,236,211,237,241,240,255,236,254,251,224,229,240,229,229,236,254,234,222,247,238,234,247,230,237,230,238,233,237,241,211,226,207,239,213,238,234,252,216,235,211,226,241,232,241,246,252,248,250,233,246,215,233,237,248,241,237,231,242,215,246,219,240,238,223,239,234,228,230,252,232,225,211,235,226,248,244,242,231,229,229,234,233,205,223,253,230,241,238,248,254,246,247,244,230,253,241,247,224,255,243,248,229,251,229,240,239,249,222,241,234,246,224,252,212,218,232,236,235,241,220,224,240,212,208,222,243,244,232,228,244,240,237,245,225,239,230,214,248,211,222,246,228,222,224,235,246,240,242,247,247,223,243,223,246,246,229,247,245,252,237,236,225,245,239,212,250,207,238,237,243,244,232,242,241,239,209,218,214,216,246,241,208,251,215,193,233,246,233,201,211,242,234,251,192,161,148,128,130,53,160,234,234,238,243,227,247,249,242,132,46,1,1,49,75,33,47,46,37,17,50,93,193,184,60,40,13,33,53,44,30,9,5,23,24,17,2,44,209,231,242,238,207,136,135,130,23,16,2,0,29,14,9,19,24,33,19,32,14,16,21,49,24,36,43,44,25,35,35,35,24,26,48,62,47,32,60,54,89,89,89,61,55,50,53,61,79,110,40,58,162,245,248,239,237,229,237,228,254,190,173,234,244,213,217,249,239,248,242,164,69,68,73,83,88,37,83,47,8,38,6,20,28,10,31,25,45,46,84,102,77,50,92,42,33,140,221,236,250,251,237,247,254,253,252,250,240,235,230,239,255,192,128,61,14,85,30,31,89,81,189,244,250,247,244,252,242,175,184,181,185,225,97,18,24,17,58,44,33,38,31,107,239,232,255,249,252,248,255,246,233,234,245,255,238,247,231,168,139,169,171,183,113,152,136,157,145,49,25,35,39,17,63,233,249,227,239,221,230,226,217,211,230,212,217,243,212,218,204,154,144,155,207,231,245,252,242,255,215,219,242,228,212,214,226,205,211,235,228,223,206,214,218,248,229,189,103,97,104,199,223,214,249,248,252,236,179,119,107,41,2,70,220,241,252,208,173,222,238,243,225,244,217,217,200,214,205,215,218,230,233,221,234,206,220,230,218,236,211,229,233,235,241,220,210,207,231,219,209,231,212,202,207,215,213,229,204,194,211,202,206,226,234,203,207,218,218,248,231,202,215,220,222,189,209,212,218,204,229,214,200,203,212,214,215,130,26,9,9,0,10,25,16,20,35,37,17,22,11,5,1,26,18,7,5,8,13,23,28,9,236,238,224,239,224,243,240,224,232,239,242,234,244,213,222,246,225,229,246,246,236,208,248,242,242,238,236,237,246,234,223,237,225,231,241,245,228,233,241,249,224,253,238,247,226,226,230,229,251,234,220,236,248,232,220,255,231,228,225,215,224,242,238,224,240,247,243,214,234,235,242,217,230,224,227,229,218,233,240,241,240,237,227,240,251,237,227,209,221,238,215,245,240,239,217,233,244,238,230,238,222,244,227,218,226,224,219,226,245,245,225,225,235,217,250,244,224,253,229,242,233,249,247,255,228,252,232,253,225,242,235,245,221,243,242,250,234,243,236,237,212,241,242,209,235,215,242,233,247,232,222,219,245,240,215,238,252,250,241,255,249,252,229,254,252,245,247,236,242,243,244,241,219,237,202,233,236,229,246,202,209,239,216,233,234,229,240,213,222,237,222,236,238,215,241,229,242,248,243,237,229,254,239,253,230,235,230,253,242,241,213,240,228,213,238,237,224,241,214,239,224,211,223,219,228,215,226,241,249,240,238,218,239,240,225,245,232,238,239,230,231,233,252,251,253,240,245,234,232,219,234,231,250,233,237,245,246,230,241,242,251,230,238,239,228,242,216,227,230,243,248,245,218,204,228,237,211,237,221,243,233,241,223,241,242,225,201,227,230,246,249,232,234,246,215,240,241,242,247,253,233,235,230,244,245,233,250,248,233,243,222,219,250,223,238,245,236,230,234,229,228,223,247,247,242,210,211,226,231,223,217,213,188,212,199,197,230,253,232,231,188,252,251,247,230,149,161,138,104,50,168,240,224,249,226,229,252,250,239,122,48,4,17,70,75,66,50,50,37,22,53,51,119,185,114,62,27,45,39,44,39,16,13,27,14,38,28,187,240,246,239,150,132,151,164,184,63,14,19,5,22,21,34,8,35,13,16,5,25,28,41,47,42,63,87,68,64,66,60,61,53,48,43,45,46,15,77,60,62,93,75,80,89,75,45,58,90,53,18,67,162,248,237,255,237,250,229,229,250,219,173,228,232,250,241,231,229,255,231,170,78,58,52,41,81,25,85,35,9,24,8,46,36,20,11,36,30,89,173,182,141,75,91,88,30,128,208,233,247,244,240,236,213,233,246,254,247,222,237,242,228,185,99,25,0,4,9,73,92,6,139,253,247,238,250,250,243,198,178,153,216,230,162,66,56,47,44,45,50,30,20,72,149,229,242,251,255,223,255,226,253,234,254,249,223,228,201,153,156,156,172,123,135,146,146,146,96,27,27,9,25,22,106,224,243,234,241,229,229,220,227,209,207,219,211,208,225,246,173,86,80,43,11,122,109,144,209,229,219,230,226,219,232,247,218,205,216,236,219,220,207,204,217,234,189,93,45,22,0,75,198,238,253,239,215,171,92,78,30,16,153,236,250,252,232,207,208,226,224,240,212,215,208,206,231,212,207,205,209,225,224,193,233,199,220,215,204,193,221,220,223,216,235,205,189,216,225,226,223,230,206,201,215,230,227,219,229,230,224,211,217,221,228,233,198,205,231,209,205,209,208,208,233,235,204,212,228,224,208,214,230,207,210,235,239,105,22,19,9,4,3,8,23,7,16,23,4,3,8,6,0,9,5,10,13,19,16,23,7,5,231,236,232,232,230,244,219,221,240,242,231,250,239,215,238,225,241,236,232,229,237,240,214,247,213,232,253,246,223,226,233,220,233,235,246,235,223,233,236,244,250,233,247,247,221,239,240,234,219,228,228,232,239,234,243,254,249,237,229,242,218,233,240,228,237,226,248,217,222,235,236,214,223,230,225,229,210,238,217,251,237,242,216,238,230,244,220,230,219,245,243,237,243,229,202,236,230,227,230,240,243,247,229,215,234,238,230,245,228,229,223,223,238,237,249,241,233,247,247,238,246,255,253,252,242,231,251,233,244,231,252,244,243,241,222,245,253,248,243,235,232,234,238,243,235,237,222,227,240,242,208,240,227,223,226,238,244,238,241,251,231,234,224,226,232,246,248,231,229,250,249,233,241,245,240,241,246,233,215,242,240,246,206,233,233,241,233,243,239,238,238,222,239,247,247,231,243,220,238,247,245,249,249,255,235,246,245,255,233,248,233,227,236,210,215,226,248,232,235,224,232,239,242,220,226,218,237,246,213,237,234,232,228,232,235,219,210,246,243,242,251,244,227,198,224,245,246,245,237,255,245,231,235,230,214,241,254,245,235,250,239,242,255,238,225,215,227,236,234,243,226,240,252,207,227,252,222,251,226,225,241,217,229,238,255,247,225,248,252,229,252,250,247,253,229,230,250,226,233,243,243,243,252,234,248,207,245,234,215,242,247,245,237,240,233,233,254,211,232,221,254,251,219,244,242,248,234,228,189,176,232,216,217,224,248,231,251,244,250,182,171,247,229,247,204,171,120,111,90,30,186,236,239,245,215,196,242,243,243,172,131,46,15,25,57,47,45,44,44,21,52,25,77,116,164,144,45,48,38,32,27,11,20,34,25,168,255,253,233,237,224,92,26,5,131,197,129,75,29,14,23,5,14,26,5,32,10,26,17,63,134,90,22,9,57,60,91,81,65,51,39,29,27,25,33,57,54,60,57,91,61,68,67,46,67,93,82,64,11,49,223,238,241,251,221,238,214,236,246,173,182,229,248,254,245,246,247,234,254,244,86,49,69,86,60,54,72,29,15,16,19,25,36,41,28,25,45,95,161,147,119,50,92,122,110,138,193,225,250,255,204,131,65,33,44,166,235,246,249,249,245,156,65,11,1,55,16,31,22,10,127,184,156,139,170,231,254,255,188,165,218,229,203,100,69,54,52,47,32,23,32,6,90,229,251,235,244,255,251,222,247,253,238,252,230,252,146,107,143,161,151,159,149,139,140,127,60,11,5,45,28,24,5,12,119,221,216,248,232,200,222,215,207,223,225,227,244,253,161,69,99,54,5,21,21,5,39,165,180,210,215,202,215,223,223,232,243,208,217,192,234,239,241,250,163,89,81,26,4,78,219,225,237,152,113,69,32,25,92,228,241,232,250,214,173,179,228,222,213,232,242,215,220,226,242,223,211,218,211,203,216,214,211,241,200,207,228,229,224,211,216,211,218,220,210,198,211,223,221,216,226,219,222,209,208,219,189,241,212,216,227,223,208,207,210,220,215,216,211,216,214,235,211,204,215,231,217,219,227,219,218,206,232,186,211,79,15,9,0,19,1,30,18,0,16,28,26,17,7,0,5,2,7,1,1,12,7,7,21,22,235,222,249,229,224,250,230,228,218,236,229,198,236,229,228,228,234,230,204,234,245,232,244,242,233,239,235,224,224,237,218,226,229,235,242,238,241,249,235,242,232,230,248,250,224,238,220,234,223,246,219,241,226,230,236,217,243,240,245,240,235,232,244,228,239,247,227,217,228,222,253,239,230,236,225,216,230,245,219,236,227,230,239,246,236,252,229,213,239,226,237,243,230,244,229,228,222,245,217,247,238,245,212,240,226,217,237,215,251,249,242,237,243,231,216,233,219,233,238,249,209,227,224,240,237,239,240,229,233,233,231,239,239,240,225,242,249,231,216,236,232,225,216,228,230,242,235,243,230,236,218,218,232,237,218,242,238,234,241,253,235,225,233,223,213,230,222,237,229,238,255,226,239,235,255,231,216,234,206,219,222,213,249,224,229,225,242,236,208,223,228,250,218,253,241,247,236,242,234,245,238,242,240,245,249,252,227,245,221,245,235,236,239,216,220,234,232,248,238,231,228,202,231,232,246,226,242,236,226,237,225,219,214,221,253,227,226,253,253,222,252,243,238,233,246,252,227,235,247,233,242,237,228,238,249,232,230,239,232,228,244,224,249,243,235,208,242,231,246,235,225,225,245,209,236,245,227,238,234,219,241,252,225,253,235,249,240,246,253,240,236,243,242,242,235,221,234,237,227,252,241,247,242,232,211,214,226,243,229,246,236,222,239,207,240,255,244,242,234,240,248,245,247,250,243,222,223,220,162,220,226,189,224,241,233,213,244,216,233,183,173,231,240,246,215,135,154,135,114,37,182,226,219,243,236,226,246,243,237,255,221,122,45,3,4,31,18,31,26,25,34,49,49,34,112,215,122,102,47,17,2,10,8,10,95,238,248,227,209,148,97,43,34,25,25,98,141,81,129,45,41,8,29,19,27,15,17,8,29,151,249,139,36,10,68,99,95,60,51,33,21,13,27,12,47,56,64,72,90,87,68,65,29,38,19,72,57,13,94,152,185,237,240,245,246,234,235,236,242,177,244,255,245,252,250,246,243,252,246,197,119,16,58,108,15,30,72,28,22,23,102,133,64,29,4,24,88,126,166,95,73,29,33,124,133,141,234,242,238,237,174,68,23,11,23,128,241,250,254,255,163,68,24,8,24,2,10,116,156,86,56,37,1,39,2,70,186,234,218,149,199,232,212,120,72,71,23,16,34,36,108,72,125,234,247,245,252,238,252,247,252,247,237,222,239,181,152,150,158,146,171,169,137,134,121,122,94,100,36,51,46,39,28,36,10,105,218,231,255,221,230,212,207,241,229,251,255,250,160,57,29,54,225,161,56,20,27,154,242,237,234,223,212,235,252,237,244,250,232,249,222,222,222,244,225,176,137,68,62,200,254,202,174,107,71,28,38,158,232,234,250,255,221,170,170,243,217,227,206,210,212,227,212,223,212,227,234,226,217,236,220,186,220,228,211,227,216,235,208,222,202,243,231,202,207,220,209,226,213,228,214,198,203,230,215,223,244,224,209,222,222,235,233,233,211,201,223,227,216,216,209,205,203,225,209,214,228,207,239,210,222,196,233,223,206,119,10,10,14,19,12,20,0,14,15,9,8,17,0,5,4,13,11,6,22,32,0,16,29,14,234,226,232,229,250,224,247,222,211,219,239,233,223,224,204,247,232,231,245,253,240,219,227,219,237,243,245,216,222,227,221,243,252,244,235,236,225,218,232,255,239,233,247,244,233,230,244,245,230,245,223,216,245,234,243,244,245,254,244,247,223,227,231,227,242,247,237,229,237,237,223,219,229,245,247,253,239,218,249,224,239,241,232,235,236,242,232,239,227,229,231,239,243,215,222,238,226,234,235,236,215,221,225,231,241,216,223,235,227,241,216,237,238,222,227,205,218,221,236,230,236,240,241,232,235,231,244,250,254,242,236,222,230,227,241,247,253,244,225,249,233,237,240,229,250,224,235,247,241,234,229,247,231,240,217,247,223,235,222,246,228,234,201,216,220,230,241,233,241,223,241,224,213,243,245,234,253,241,233,245,237,212,228,254,245,229,252,243,215,237,239,211,247,245,231,243,242,238,244,234,243,244,233,219,230,246,227,235,239,215,217,243,224,218,220,217,219,240,222,226,217,233,246,234,218,223,233,223,237,236,220,245,234,244,207,238,230,240,255,253,251,252,235,228,246,248,250,253,228,247,243,239,234,245,236,225,237,230,231,248,242,240,236,236,231,232,245,232,230,249,236,235,229,232,255,249,242,240,225,251,238,221,245,246,255,233,247,253,227,225,237,228,231,253,228,249,243,250,234,248,224,246,230,228,225,249,253,230,243,254,244,242,224,233,237,243,249,249,238,244,242,248,249,255,255,183,194,220,224,230,244,241,231,242,206,198,225,234,251,176,174,253,230,251,207,134,140,122,118,64,188,244,245,240,245,236,240,242,237,240,253,155,143,72,15,16,18,3,12,6,9,35,52,45,28,127,192,139,60,17,11,29,15,58,20,48,74,36,76,55,36,46,25,36,15,61,81,64,166,136,105,41,16,10,8,7,1,17,10,131,228,108,33,7,56,103,73,30,30,0,21,40,25,32,21,36,64,88,102,88,52,49,23,20,36,53,99,119,166,121,161,220,224,236,232,225,250,254,252,237,251,240,254,226,156,142,163,232,253,189,109,43,77,97,0,43,53,41,27,14,187,191,46,25,3,39,97,167,213,136,108,84,65,105,94,116,237,240,229,245,178,67,52,6,21,190,229,253,249,192,62,25,3,38,3,41,210,245,219,80,14,5,8,13,26,29,63,169,223,173,210,243,237,163,148,73,7,23,12,86,222,174,138,253,222,251,250,229,244,234,238,233,215,231,224,161,125,135,152,182,160,151,138,121,137,180,176,237,180,63,36,31,32,30,41,11,48,201,249,236,232,234,237,236,233,237,199,122,92,53,31,160,231,233,193,43,35,159,219,246,239,250,238,234,224,237,217,249,246,240,226,241,245,247,243,223,150,104,3,104,174,144,112,49,34,84,175,228,240,250,252,235,195,165,173,206,241,212,220,231,218,231,218,202,233,246,211,215,237,218,208,240,227,215,227,250,229,228,249,197,207,217,202,199,221,215,231,213,208,218,207,216,217,217,224,230,232,214,211,197,207,220,197,220,209,225,221,211,227,212,207,214,235,199,215,214,222,223,233,209,214,228,191,198,209,108,11,30,8,8,7,3,15,25,11,23,10,23,20,0,5,5,4,10,24,3,4,24,34,15,238,249,240,211,247,226,212,238,230,229,230,208,226,228,229,228,231,233,239,219,234,220,242,229,231,241,221,230,233,226,237,241,237,198,208,236,209,252,219,218,229,229,236,228,242,250,226,242,251,232,248,230,224,230,230,246,229,226,246,232,235,243,226,246,232,226,242,241,248,225,237,223,227,224,239,230,231,202,243,222,223,216,228,235,241,234,236,241,246,219,242,223,204,247,219,223,230,240,236,221,229,217,213,227,230,209,243,242,215,241,240,224,231,217,231,210,234,239,220,238,228,229,235,224,244,239,247,233,243,230,227,249,242,223,238,222,231,242,228,223,242,227,228,238,212,221,234,232,247,218,224,218,238,219,222,254,229,235,211,242,233,220,227,233,248,226,245,219,225,246,232,213,234,228,215,214,226,238,227,239,235,246,225,221,225,222,239,204,238,233,235,242,225,214,245,249,229,223,218,236,230,254,251,233,231,236,237,246,218,218,244,238,230,240,236,219,214,237,223,215,240,206,223,229,226,216,250,229,218,246,241,210,232,230,241,240,249,245,231,222,254,239,232,226,249,244,246,243,220,252,236,240,236,255,221,227,232,243,243,249,252,254,221,244,236,254,243,237,230,236,249,218,200,216,233,235,233,229,226,239,248,238,255,244,243,249,252,255,242,238,231,226,232,238,240,241,229,240,247,233,243,222,255,255,242,242,217,247,240,230,254,255,218,245,221,250,255,245,217,250,236,224,231,245,244,225,232,237,226,221,220,190,196,205,222,208,246,223,253,215,201,234,229,222,188,155,130,128,102,70,183,252,237,250,237,233,212,224,209,254,236,192,226,155,145,93,7,9,15,11,18,47,75,81,11,73,140,185,81,18,6,3,58,49,29,33,50,42,69,60,34,33,29,52,12,67,63,23,42,111,203,130,89,60,45,37,13,11,45,35,52,46,26,36,16,78,63,26,25,10,16,23,9,17,19,44,66,72,106,34,48,19,26,49,105,213,205,168,116,136,219,228,251,238,239,226,248,255,250,214,247,238,250,128,100,97,63,131,148,154,110,17,90,94,16,64,48,7,52,11,52,115,50,34,12,64,149,204,155,105,87,83,31,59,52,124,242,252,239,219,99,20,12,8,95,219,237,241,160,80,15,1,38,69,106,234,239,251,144,43,39,79,80,74,10,4,9,133,200,184,205,225,219,155,137,53,31,25,39,188,215,237,159,198,255,239,241,253,244,254,233,228,234,222,187,156,151,138,178,165,177,146,99,133,160,213,220,219,217,98,88,139,85,51,34,15,23,91,229,238,223,221,238,249,188,116,26,13,9,14,7,35,159,227,229,107,6,59,193,228,229,242,223,139,106,90,108,199,238,237,240,221,214,232,207,197,186,118,55,48,30,34,51,21,78,180,245,249,240,241,247,208,181,164,239,224,219,216,201,209,216,209,238,235,218,225,223,213,224,236,249,224,203,202,208,208,212,200,226,220,235,204,228,222,216,221,191,207,218,221,219,216,219,213,217,202,195,226,219,212,207,233,204,205,215,220,237,231,215,201,221,206,236,209,210,177,234,208,199,216,215,206,229,206,209,121,9,11,3,4,1,29,17,5,14,36,9,15,5,2,9,4,1,13,9,7,7,23,27,52,230,217,236,239,225,232,222,206,224,227,213,248,223,245,224,208,207,242,221,216,240,246,219,240,203,229,215,242,236,232,241,232,203,223,221,226,240,239,208,249,231,232,239,220,230,243,248,247,214,228,255,235,232,232,243,242,218,210,248,216,223,232,240,234,245,243,238,245,242,246,246,232,243,223,244,237,224,232,244,231,215,245,238,231,231,234,210,228,211,227,240,213,240,215,232,233,234,211,241,219,216,220,217,245,226,215,237,221,228,233,237,215,222,234,241,242,215,248,249,217,241,244,224,240,248,224,228,231,219,236,227,236,239,230,210,219,215,244,234,230,239,224,231,234,254,234,224,253,252,226,244,237,243,235,212,213,224,224,234,244,247,207,230,229,208,235,230,233,233,228,229,251,235,240,239,212,216,243,220,225,229,220,240,219,237,233,230,236,242,249,224,241,234,238,232,241,246,218,227,225,223,247,237,238,249,216,235,204,227,220,228,228,242,224,252,216,245,234,232,224,233,237,247,233,237,232,234,220,245,222,229,235,248,201,253,230,231,227,241,235,253,227,219,252,232,240,240,242,240,248,251,220,240,255,224,239,245,232,252,248,237,238,250,229,250,250,255,238,212,250,234,217,231,231,240,224,207,228,244,238,247,226,226,244,247,252,248,245,253,251,232,248,236,247,222,255,247,231,237,229,230,245,252,251,222,246,247,234,237,243,248,245,244,251,242,229,224,241,220,245,236,238,243,255,220,210,219,236,204,195,204,166,213,248,244,233,254,230,252,191,189,233,244,239,160,151,86,107,91,32,194,236,246,250,234,239,231,241,247,239,237,202,228,252,243,151,74,22,0,18,19,48,65,39,49,11,17,90,129,60,19,25,64,52,31,14,48,36,114,106,23,25,13,37,29,51,95,34,12,27,93,135,155,124,72,63,63,48,21,38,39,20,64,11,19,39,24,35,21,8,16,11,38,42,32,57,40,32,69,74,142,141,162,157,153,215,176,116,109,155,210,248,216,233,246,244,208,180,104,56,133,206,145,71,139,120,15,21,9,37,38,11,84,106,69,74,48,42,24,4,65,107,44,37,29,75,171,202,122,64,39,133,131,60,39,53,224,225,199,109,29,9,15,10,44,188,192,161,91,8,14,88,252,240,231,252,253,98,60,116,240,235,226,211,101,13,28,145,194,207,211,236,178,95,36,28,1,25,103,223,220,240,124,183,254,241,252,235,251,232,230,235,216,209,133,158,156,155,153,180,153,138,135,123,188,238,179,123,112,81,226,222,207,105,54,29,5,56,158,240,251,229,243,214,116,43,22,0,9,6,32,14,52,158,211,148,54,22,91,204,245,246,185,111,51,23,14,72,188,176,244,140,75,129,115,112,127,125,123,99,52,17,26,42,145,217,255,250,246,244,219,186,154,203,212,214,209,226,210,202,228,237,215,232,184,216,229,249,245,238,245,248,231,239,210,210,237,195,230,233,226,225,213,213,214,228,216,219,229,230,210,213,217,222,215,223,226,204,206,220,237,240,223,183,226,211,234,221,214,215,206,226,209,209,211,218,212,231,212,213,217,223,193,203,203,119,34,6,4,28,4,5,15,30,15,15,28,19,19,3,6,6,13,4,17,29,12,8,0,8,240,239,234,249,238,245,235,226,226,218,229,218,231,242,208,216,239,241,230,242,231,236,215,240,251,241,232,230,238,245,228,208,233,212,233,221,220,225,239,234,238,247,227,235,223,238,229,253,253,240,231,245,239,236,232,230,227,246,220,241,225,235,236,241,232,244,242,232,228,230,218,215,236,249,232,215,239,240,216,235,226,240,224,248,237,215,215,239,230,231,221,226,237,227,224,222,243,224,223,225,226,228,227,239,228,231,223,238,245,220,239,230,225,240,221,224,240,192,252,222,243,217,214,216,206,244,220,245,226,208,242,217,227,221,236,225,248,230,242,233,234,238,230,236,223,242,233,237,231,231,228,230,240,222,239,239,237,216,217,246,208,250,232,218,206,252,232,245,231,236,223,228,248,233,236,247,233,236,236,244,235,229,246,227,234,248,243,237,250,248,226,226,246,227,243,247,245,238,242,235,249,249,252,231,224,243,227,231,239,200,238,235,223,226,239,227,218,235,230,200,227,247,237,248,213,204,234,212,220,227,230,211,230,231,248,252,191,248,241,255,229,214,208,240,231,230,224,232,247,248,239,245,231,252,242,252,238,254,246,209,240,254,251,219,252,230,232,225,213,249,251,222,236,239,218,222,247,250,251,244,235,226,237,234,231,251,237,228,255,241,249,247,250,235,249,224,249,248,234,223,242,248,246,239,222,234,231,237,247,251,252,238,249,221,247,219,240,247,251,236,254,237,249,222,208,208,216,175,188,202,233,216,240,252,231,212,242,237,221,185,195,247,238,254,169,164,128,109,121,70,179,239,243,247,219,218,239,245,225,223,239,221,251,247,237,216,149,150,99,25,9,13,7,8,16,0,26,13,96,126,27,20,13,31,44,20,29,34,71,105,21,21,18,23,39,66,95,45,42,29,71,18,64,155,176,112,46,53,29,37,34,26,14,35,20,40,25,21,32,16,9,24,30,18,27,33,10,29,109,124,184,252,243,238,203,246,160,122,99,173,236,224,254,243,145,86,19,2,7,18,21,15,32,15,27,46,35,47,19,27,59,28,54,99,60,69,37,27,14,0,182,217,61,20,54,97,128,110,107,20,34,183,129,51,29,5,45,69,76,33,11,111,148,54,8,10,20,41,23,30,167,240,247,253,255,253,117,24,81,243,230,239,247,195,91,32,22,165,214,220,235,174,95,63,51,44,2,123,217,200,247,171,95,112,225,235,232,243,238,255,208,232,194,190,107,128,139,156,185,168,170,135,138,146,201,245,165,44,28,55,235,234,242,171,86,24,17,3,91,221,239,245,246,161,76,32,16,23,160,210,139,80,19,12,89,123,68,27,29,128,235,224,212,136,109,43,11,22,119,206,240,187,61,27,0,32,58,49,82,97,106,138,101,134,173,248,237,243,235,210,183,173,184,222,237,216,214,220,198,230,209,223,226,252,236,239,236,250,252,250,222,242,251,241,207,234,224,212,250,201,237,237,205,210,210,221,225,227,215,216,234,208,225,211,230,203,227,206,201,210,233,199,215,217,209,189,221,232,208,231,208,221,220,226,221,203,211,203,210,213,213,224,227,210,219,104,8,14,18,22,12,9,7,6,12,11,25,12,0,11,5,4,25,2,4,7,15,2,12,8,238,225,228,237,209,244,209,215,246,231,222,218,220,242,249,209,231,241,214,237,229,219,232,240,207,231,243,244,221,211,220,217,242,218,216,203,230,239,226,234,232,239,227,230,238,213,248,229,231,242,227,243,223,248,219,246,255,243,229,231,233,203,222,234,236,222,219,231,241,241,232,236,227,246,223,249,223,204,238,238,244,240,216,222,245,239,250,248,234,231,225,246,233,232,239,214,241,217,249,226,222,248,220,232,231,234,255,211,231,245,233,224,231,250,238,252,239,216,209,237,216,226,234,223,245,240,230,239,241,251,233,228,213,246,246,244,242,226,233,234,201,241,246,219,236,229,224,237,248,221,248,234,239,247,232,234,250,246,240,221,242,205,229,245,243,226,233,233,234,203,222,230,247,240,218,219,236,227,237,221,212,227,236,227,240,209,229,242,238,227,233,242,253,203,222,241,216,224,235,241,227,252,251,240,226,243,232,232,217,217,228,229,217,215,228,231,232,226,230,229,235,227,247,252,210,224,225,247,221,215,238,218,225,242,244,236,238,250,232,255,253,244,252,234,229,253,234,237,245,224,222,251,230,254,233,227,236,246,231,249,241,255,237,245,235,252,247,249,247,254,252,243,244,238,216,249,239,242,255,251,235,232,250,249,247,249,252,247,231,228,242,220,247,255,249,233,240,254,248,235,243,232,225,246,239,253,235,239,237,244,253,229,232,237,251,234,239,245,255,219,236,247,236,221,234,190,199,208,206,237,228,211,207,222,203,218,226,243,231,187,219,255,254,247,191,162,123,120,106,47,196,239,222,231,236,224,250,230,215,238,240,227,201,215,232,230,239,254,216,234,248,207,212,199,208,184,141,147,176,190,162,99,65,55,19,25,29,10,77,69,34,46,18,77,59,76,77,46,47,44,40,25,29,38,96,184,147,132,97,68,8,12,31,28,38,17,12,26,41,32,13,19,18,33,6,92,151,179,228,167,236,255,192,169,230,226,150,97,79,209,239,255,252,165,57,1,0,26,6,30,25,19,48,0,156,199,144,43,18,21,37,54,83,151,111,85,35,9,22,56,236,217,92,32,25,103,135,82,66,68,54,72,80,41,60,17,17,22,35,90,209,244,242,178,88,29,14,25,25,153,247,244,246,252,207,128,20,55,233,236,246,192,65,45,9,1,79,249,252,237,181,119,54,14,46,28,48,196,252,193,142,62,24,61,227,246,254,238,242,243,227,237,182,143,136,134,162,154,173,155,167,129,157,127,191,234,173,124,9,19,111,221,229,187,103,50,11,12,33,136,233,236,229,171,88,42,40,167,238,242,250,176,101,14,8,97,104,64,3,38,192,234,249,145,20,22,6,61,145,211,241,203,132,58,45,34,33,22,35,132,165,196,207,173,115,153,234,226,232,198,144,169,233,220,206,221,229,210,214,224,215,227,204,241,244,239,231,186,139,130,133,192,217,214,226,216,221,204,238,222,209,223,229,248,239,225,219,242,228,228,223,198,215,212,214,218,218,233,221,218,214,210,213,217,223,225,224,225,225,208,243,194,223,226,216,210,208,217,215,229,219,231,222,217,190,96,19,0,1,4,20,12,9,20,26,17,32,25,19,13,7,6,24,24,5,10,27,2,23,47,242,215,231,217,236,201,235,214,230,245,241,225,243,232,246,224,214,224,251,236,213,232,231,244,240,246,214,224,204,221,244,220,231,241,234,211,222,225,234,234,218,223,211,240,223,243,228,206,223,229,215,238,233,238,199,236,227,234,208,228,228,211,231,219,206,247,240,230,214,216,226,227,240,200,226,247,249,230,217,235,238,226,224,228,240,235,227,241,224,237,242,208,221,240,227,247,240,228,208,228,244,227,225,212,223,219,237,213,241,238,239,237,253,235,245,222,228,212,239,216,242,252,228,242,239,241,245,244,229,221,240,215,217,229,241,219,230,228,231,253,230,240,229,230,226,233,207,190,247,236,235,226,252,210,235,212,201,211,232,243,218,241,221,241,250,218,212,232,232,243,221,230,214,234,241,227,218,231,231,228,219,230,215,218,236,211,246,224,217,240,240,231,239,238,234,222,241,238,245,234,216,228,232,234,250,242,229,253,215,242,199,225,227,207,212,227,219,234,231,214,221,222,204,231,243,243,235,222,210,223,231,237,232,248,224,248,239,254,246,255,246,252,242,250,253,243,246,231,255,250,238,239,236,246,247,233,254,246,241,222,242,238,250,234,242,250,238,233,231,233,224,244,249,233,241,254,228,233,241,228,242,253,244,246,240,244,234,227,232,250,235,220,255,243,221,249,231,232,246,215,237,251,228,242,246,252,231,250,216,248,236,242,252,238,243,224,252,212,245,247,234,251,222,248,236,198,238,215,219,243,220,170,181,233,232,209,245,237,218,163,217,239,234,242,173,177,98,120,93,42,192,237,247,246,246,233,250,238,251,179,210,231,228,232,216,229,230,240,238,244,226,255,254,247,251,241,250,253,253,252,241,238,255,249,216,135,83,22,67,46,26,49,86,91,61,68,95,55,46,27,50,35,29,42,13,45,90,162,133,98,18,10,50,43,48,42,14,28,24,22,60,8,22,83,216,249,244,235,188,174,246,237,102,91,222,237,157,68,107,233,241,255,172,47,13,21,15,53,23,32,31,25,41,11,86,227,224,141,51,50,83,62,98,151,127,107,29,15,37,97,250,218,56,32,13,69,49,52,31,66,44,96,69,41,31,22,42,20,32,164,245,254,252,220,206,67,67,28,58,194,234,244,220,152,74,26,13,137,239,212,232,188,82,42,66,135,241,238,244,180,96,13,26,36,57,4,113,237,236,165,106,47,12,23,151,241,225,243,254,232,211,193,152,162,130,157,141,149,162,155,152,138,125,159,159,132,153,76,9,76,147,221,255,224,91,37,40,23,36,25,99,213,255,253,120,42,4,18,99,177,225,237,164,80,19,108,165,125,64,25,49,163,127,108,50,26,13,112,221,230,234,225,156,120,70,62,31,51,67,139,123,127,117,65,12,20,171,228,250,183,162,187,234,223,234,226,225,239,224,229,239,236,230,208,142,137,115,112,94,105,66,110,235,226,255,243,237,247,229,218,223,201,198,239,222,218,216,220,229,214,222,223,225,203,210,220,228,227,214,210,234,180,207,224,235,221,214,212,221,224,233,237,217,214,197,225,215,238,213,231,197,229,202,236,231,119,13,11,19,8,23,19,45,19,14,22,25,30,27,16,0,11,14,0,7,26,12,19,1,19,214,234,216,235,225,234,226,227,231,234,210,224,255,243,237,222,245,232,241,229,244,227,214,244,222,227,239,225,224,230,233,249,227,226,205,247,244,245,213,227,235,237,239,218,243,242,231,245,215,242,238,221,205,214,231,220,222,232,239,232,234,217,236,232,228,219,227,221,237,233,247,222,209,236,230,202,236,222,220,231,236,237,235,236,208,249,226,250,238,238,219,212,221,239,232,226,232,228,229,216,229,246,238,222,229,237,229,223,230,201,236,237,213,233,235,222,243,231,245,213,213,234,234,228,219,244,238,252,215,238,232,243,235,234,217,229,215,222,250,236,224,232,237,238,218,224,243,214,228,240,238,244,221,222,239,237,225,219,223,224,233,211,243,234,226,225,230,242,243,228,242,224,233,235,239,212,238,227,230,234,248,205,214,226,229,242,234,222,226,231,231,226,242,241,231,244,235,236,245,232,244,217,238,240,221,228,247,236,220,225,245,216,228,231,221,235,199,237,213,234,217,223,218,222,234,253,226,252,229,243,239,220,237,228,235,226,246,240,230,255,238,255,239,247,229,252,247,253,236,232,239,248,234,247,237,243,252,248,251,236,253,238,215,234,229,251,250,238,255,246,246,242,254,235,240,236,234,252,250,254,250,251,255,229,250,237,229,249,255,232,238,247,252,249,238,241,239,233,245,244,236,238,240,237,237,252,248,249,242,247,250,236,239,243,251,240,239,230,235,245,222,235,237,228,208,211,222,196,189,220,209,201,207,239,242,188,237,248,200,188,217,231,252,248,190,141,127,100,66,36,206,247,249,249,227,229,239,241,220,186,244,243,242,253,219,239,206,228,228,240,249,244,234,253,246,252,252,244,249,252,253,247,255,253,242,201,108,31,46,119,155,137,127,109,73,61,111,56,38,28,51,37,53,69,39,41,57,15,147,239,208,234,219,194,74,6,86,179,191,209,198,210,234,253,249,219,186,145,163,223,243,195,51,88,193,223,160,77,126,232,255,216,94,8,23,5,21,47,23,32,30,43,38,39,25,189,255,238,158,61,94,76,89,120,139,89,21,100,81,73,169,107,58,28,37,34,22,54,22,62,107,80,46,67,50,62,66,15,26,175,229,231,248,242,233,121,35,31,33,98,112,57,19,26,42,10,3,131,249,254,254,241,252,251,252,251,218,197,99,48,0,22,46,51,34,33,208,230,241,208,148,85,56,2,129,218,246,231,220,238,225,182,149,146,139,148,147,149,186,152,142,143,119,150,117,23,4,18,96,223,248,249,243,239,140,56,30,14,15,39,19,62,192,216,202,115,3,17,34,172,199,233,249,120,57,2,70,125,83,61,43,57,47,48,15,41,164,232,251,248,242,251,234,180,104,77,27,44,46,43,47,68,82,85,29,26,121,213,199,173,197,231,232,248,248,237,222,223,225,236,223,251,212,126,96,74,22,47,36,75,75,124,226,230,246,254,242,246,242,235,234,231,209,205,210,220,216,241,215,222,207,221,221,230,228,215,217,193,213,227,218,228,232,196,194,232,215,243,210,220,234,217,209,248,204,226,219,216,212,206,182,209,243,221,234,109,13,23,4,25,0,22,22,6,14,5,10,7,15,15,12,7,25,6,2,17,22,5,10,11,225,236,231,220,232,237,222,241,197,222,199,253,245,228,223,215,227,233,249,214,231,236,226,227,210,224,220,189,214,219,202,216,220,243,218,229,223,234,227,208,240,231,234,212,240,233,238,222,242,220,216,225,230,227,232,214,236,241,216,206,226,245,209,228,210,218,231,245,240,253,224,210,240,238,233,205,235,222,242,233,239,226,226,233,230,245,237,232,190,243,232,228,239,238,244,237,229,236,252,223,222,223,216,220,203,223,236,229,206,230,231,250,225,250,245,234,249,232,245,231,207,236,218,231,221,235,241,240,234,231,239,228,216,238,210,254,226,235,232,239,228,238,227,233,205,204,235,214,243,240,235,235,222,243,238,228,227,243,220,228,228,213,203,216,215,230,214,216,225,229,243,235,213,235,217,236,248,223,238,225,238,206,245,249,241,237,237,229,246,225,244,239,238,229,235,231,244,248,236,241,241,245,253,252,229,241,245,226,247,239,223,222,226,210,209,225,219,229,205,216,207,226,234,221,213,236,241,248,238,235,221,238,217,236,236,229,231,244,240,239,243,235,219,232,239,244,253,246,235,234,239,249,245,241,250,228,252,240,237,245,255,228,226,241,244,234,252,234,235,241,250,236,245,240,231,246,235,253,254,240,252,244,249,229,249,244,230,230,239,233,243,232,210,242,255,233,240,246,253,254,234,225,250,221,238,239,255,246,231,246,253,244,232,242,251,255,233,255,228,234,237,255,233,242,207,238,205,179,215,228,210,218,251,249,194,198,231,214,228,195,214,252,252,231,170,136,112,104,94,35,186,233,249,241,214,231,244,250,217,195,245,250,246,228,250,235,208,215,183,222,236,144,148,223,231,250,232,198,246,247,253,255,224,248,219,179,57,5,25,112,219,171,146,126,71,68,112,57,10,24,59,29,44,54,25,14,52,10,82,242,248,235,253,210,157,27,133,229,253,242,243,235,252,244,244,215,120,157,140,244,254,132,115,101,163,206,176,106,166,251,248,180,40,31,27,20,35,46,29,10,76,47,34,19,47,123,237,234,184,80,36,64,80,134,143,35,19,138,79,72,89,22,91,29,8,16,4,39,32,100,126,78,66,66,52,54,74,35,37,192,252,252,255,191,143,87,18,5,27,18,42,49,50,62,29,27,27,86,215,226,226,242,247,251,240,253,150,47,2,8,29,27,46,17,18,191,236,249,241,220,171,161,171,95,140,231,252,231,243,243,212,164,171,157,139,126,165,153,155,130,126,138,146,152,165,135,201,237,210,239,252,246,246,254,203,123,31,38,25,5,14,38,45,106,210,219,230,233,238,236,253,241,240,188,87,27,22,13,39,65,14,30,29,24,185,234,246,247,241,241,238,247,210,162,123,61,23,39,35,17,36,18,134,229,143,39,72,128,155,190,222,241,242,247,244,245,251,242,233,248,217,233,212,129,104,30,1,18,11,41,25,90,160,153,189,220,235,244,253,244,230,213,227,234,241,218,219,231,230,227,223,224,236,226,212,238,227,230,189,212,214,218,243,216,216,201,192,221,228,206,221,211,204,198,212,214,213,196,198,198,212,203,225,221,210,104,1,2,5,0,13,20,0,7,17,13,20,26,12,10,1,22,0,14,3,42,34,8,14,20,252,240,228,211,203,239,227,225,239,235,236,243,234,247,215,235,219,206,232,241,226,239,242,237,232,235,225,215,231,234,222,220,241,228,219,214,235,225,231,230,232,239,197,237,243,236,220,226,227,237,205,242,243,238,222,230,240,197,229,230,248,228,240,234,212,251,227,226,230,244,247,238,217,229,234,244,242,240,218,236,224,224,251,213,227,227,224,238,203,219,242,223,229,232,251,209,225,240,206,211,246,235,208,221,233,221,242,227,246,234,223,228,245,245,220,227,219,254,218,242,222,207,236,236,240,232,220,227,204,250,219,239,235,216,209,212,237,247,248,218,224,221,240,253,224,217,228,213,240,233,219,224,239,210,217,228,204,227,237,213,226,234,216,212,224,219,244,242,234,231,220,235,239,229,228,230,210,235,213,206,236,233,221,237,229,219,214,227,232,207,224,219,223,228,245,217,252,236,215,236,232,243,248,245,222,234,221,245,254,221,201,236,211,220,237,219,237,209,217,223,236,230,216,238,220,232,236,240,213,229,238,238,243,254,226,239,232,243,255,255,247,240,255,247,252,226,251,234,234,239,210,255,239,226,225,251,243,237,234,246,229,228,231,245,249,239,230,236,212,254,232,250,243,254,244,251,247,241,227,224,227,228,247,236,217,251,237,240,242,230,237,230,242,240,230,226,226,236,222,223,246,240,249,235,243,246,250,227,249,253,226,249,247,213,246,250,235,244,246,230,205,242,229,249,216,207,219,241,240,246,247,196,173,194,213,218,249,244,199,210,224,252,218,247,197,158,109,122,92,32,186,243,251,254,222,239,229,240,229,180,229,246,244,253,207,229,201,150,122,231,224,160,212,221,244,243,234,223,242,255,244,216,191,211,144,132,149,65,72,156,221,163,126,85,42,58,86,57,33,39,70,18,37,53,64,43,73,32,98,230,201,187,244,221,183,51,96,238,240,246,234,238,250,207,210,191,150,176,188,254,202,150,149,191,197,195,169,131,211,249,228,152,30,18,21,9,35,18,134,212,197,172,108,54,7,128,245,252,191,94,18,27,71,91,73,27,31,97,67,81,65,45,34,44,3,48,60,64,30,48,25,61,48,90,27,44,13,34,212,255,239,187,84,34,5,9,59,11,32,25,54,47,30,45,51,29,54,28,91,200,239,230,237,204,165,70,34,9,27,52,28,61,35,60,169,246,244,239,237,253,174,191,237,172,138,240,240,243,233,227,196,161,154,149,149,160,175,171,161,154,135,110,121,179,238,218,251,248,249,241,236,246,249,253,249,219,105,36,22,21,14,24,27,10,72,208,240,239,230,244,250,250,246,166,130,24,23,17,23,25,24,30,14,119,229,228,233,221,255,250,250,196,151,115,44,16,5,20,16,40,37,16,150,212,231,189,99,89,150,214,234,236,248,242,248,247,232,228,227,221,225,248,186,134,66,43,16,10,77,102,58,36,5,13,0,16,78,187,193,218,230,241,225,212,195,220,223,212,215,233,230,212,234,242,240,212,200,201,230,201,228,204,213,195,228,205,215,230,235,210,218,215,225,230,225,223,225,211,231,220,237,222,219,207,226,81,15,10,0,14,11,0,10,18,10,22,8,11,10,22,7,16,6,3,0,14,8,10,11,5,228,214,223,243,196,212,205,241,243,212,249,223,226,239,243,226,229,236,200,220,229,217,232,223,231,239,194,229,217,194,229,233,196,233,210,218,232,247,211,202,213,225,222,220,215,182,241,214,243,229,227,234,226,220,238,202,232,237,211,239,245,213,225,228,241,237,233,238,246,233,225,221,207,227,231,249,226,253,228,210,215,239,215,227,227,227,220,242,225,230,249,215,224,241,235,216,228,215,224,214,215,240,231,192,219,237,230,233,248,244,247,223,206,216,218,227,229,234,217,228,238,245,249,245,235,246,228,221,204,232,241,223,234,214,248,211,230,231,220,229,223,240,230,205,223,220,219,220,248,229,210,231,202,233,233,216,238,235,225,222,214,233,226,219,203,216,222,208,221,213,233,212,225,208,228,201,191,237,228,222,215,216,202,211,212,233,209,220,221,227,235,248,239,242,234,225,236,219,235,253,240,231,211,255,235,225,224,237,223,207,236,236,232,229,216,208,225,231,187,208,223,243,226,231,231,233,202,228,234,215,225,236,230,238,226,249,247,232,246,248,222,236,245,247,248,228,236,240,236,249,245,254,221,240,223,245,230,252,252,246,243,235,211,246,247,249,229,245,233,238,246,229,246,240,253,215,232,225,248,221,253,238,247,255,240,231,246,223,231,242,237,218,232,252,234,255,225,241,250,246,240,246,235,236,241,255,255,238,219,235,242,246,225,249,244,241,231,248,246,227,245,206,251,232,226,209,220,234,228,209,187,183,212,253,221,245,232,227,202,207,208,237,220,219,225,162,116,113,95,45,194,244,244,233,219,237,239,249,201,188,255,233,231,204,182,206,198,177,163,187,250,191,225,251,250,254,247,205,203,188,208,223,151,156,161,189,202,125,169,157,197,137,168,110,49,72,87,94,41,49,46,46,38,59,37,20,33,17,78,190,149,154,205,183,190,133,92,191,227,252,212,255,219,216,253,210,181,210,200,210,177,182,197,191,214,204,157,171,243,251,235,119,24,6,4,41,19,154,249,237,241,252,151,42,10,81,255,233,149,91,31,10,42,58,76,49,47,93,65,64,65,33,9,41,52,64,36,47,27,19,20,44,66,51,55,44,96,227,236,237,143,71,9,10,29,31,22,16,30,11,51,51,13,35,27,47,53,37,32,13,80,114,66,61,64,29,62,77,99,94,90,87,132,123,208,198,191,152,162,142,113,98,134,97,90,155,233,242,244,190,166,161,145,145,124,145,138,159,155,145,153,123,152,178,175,127,212,230,206,193,237,233,242,247,246,238,141,28,2,22,40,43,52,36,15,3,160,209,246,255,251,233,196,119,79,21,3,43,27,41,23,9,27,51,142,191,224,222,244,255,239,158,83,65,42,12,15,28,73,57,40,15,73,167,228,207,165,193,215,225,237,242,203,165,129,125,150,210,242,232,238,206,134,115,41,38,11,29,99,79,92,61,36,33,45,20,22,20,110,213,238,236,223,217,232,223,216,203,211,228,230,219,225,218,238,225,226,218,238,210,213,220,212,231,227,207,205,212,208,206,246,231,183,204,221,216,222,220,213,244,226,220,237,213,206,110,0,7,1,25,5,20,17,34,17,5,33,0,2,4,0,4,18,8,13,39,11,23,18,16,214,228,237,225,224,218,225,204,230,239,226,239,223,208,203,204,229,223,226,202,235,216,214,213,235,208,200,208,216,228,214,206,214,222,242,210,221,229,217,221,210,226,231,209,208,217,232,248,239,229,206,194,250,227,214,228,219,184,244,221,245,240,227,222,215,214,234,215,236,225,218,232,233,223,245,225,232,202,211,219,218,212,253,238,242,233,223,220,219,226,226,207,225,222,229,239,226,237,232,240,224,246,217,223,236,242,214,228,243,228,250,234,215,232,250,235,235,233,242,234,247,243,226,240,223,225,242,239,226,246,209,227,247,255,246,248,238,239,233,240,236,231,216,223,220,226,243,218,220,218,219,236,215,195,224,206,238,231,217,237,206,201,213,238,222,223,221,228,202,232,214,230,225,232,209,231,218,221,207,227,221,242,220,233,212,210,233,230,235,224,214,212,229,212,247,246,246,232,197,238,232,241,240,227,243,236,218,215,217,223,226,223,216,231,217,225,219,244,222,235,221,244,224,229,216,231,234,224,227,215,220,231,230,229,237,246,246,252,242,250,248,248,249,242,244,255,231,252,243,237,253,241,244,215,246,242,245,243,241,243,250,242,249,251,240,249,247,241,235,248,245,229,236,235,248,239,250,234,242,249,243,235,228,252,249,255,241,239,253,247,242,239,249,244,220,243,243,241,250,241,229,224,254,217,244,255,252,255,249,243,246,223,233,236,250,237,243,249,236,246,238,236,241,216,225,214,216,184,197,229,210,192,252,251,222,229,253,233,222,203,223,249,230,244,203,173,124,135,78,70,193,239,253,244,228,233,253,254,207,200,240,233,242,214,198,216,204,197,176,199,253,219,246,240,216,241,219,159,145,126,180,183,207,219,205,229,187,180,159,137,195,132,142,137,62,89,96,92,17,39,56,17,58,44,53,38,27,3,135,241,170,168,219,175,163,128,105,158,219,255,221,249,203,236,239,239,201,128,156,255,115,188,198,184,228,154,147,193,221,235,220,115,29,5,24,30,135,244,241,235,229,139,80,39,8,51,250,218,159,118,10,17,64,106,54,67,69,95,103,69,38,38,35,46,37,56,22,34,49,37,38,52,44,58,39,120,229,241,249,111,13,7,25,30,34,29,40,28,64,61,72,91,90,92,82,78,97,70,84,72,33,37,23,38,28,35,53,38,54,44,49,75,61,71,31,16,13,17,25,25,34,28,30,58,3,93,244,234,220,191,126,151,133,138,128,138,119,117,141,154,135,148,144,128,71,17,14,4,16,13,27,9,74,93,133,155,126,67,56,73,52,54,47,66,26,30,27,40,148,211,208,185,107,49,21,14,5,30,30,22,11,46,44,35,50,14,18,54,195,213,249,181,85,36,0,5,98,223,190,136,49,30,39,24,18,19,100,215,242,237,249,214,139,72,45,30,56,155,208,239,245,183,97,88,20,40,91,72,89,88,54,58,27,36,72,33,46,58,27,62,190,226,221,226,206,212,217,203,225,221,221,217,234,207,198,229,228,221,215,239,215,228,210,233,237,209,237,219,199,200,230,235,221,220,228,219,202,210,215,207,221,230,226,197,215,116,0,3,18,21,9,16,9,12,12,8,23,0,10,8,15,4,25,14,12,0,11,3,27,27,217,235,226,231,241,222,221,229,210,230,242,233,240,231,241,234,249,230,225,245,227,199,220,241,237,201,228,227,204,238,221,212,223,231,241,217,233,222,213,222,204,198,247,197,230,207,225,219,225,208,204,227,235,227,195,246,218,180,225,228,232,237,235,229,242,207,222,219,240,213,216,245,241,229,232,225,249,229,237,217,240,224,241,236,232,226,249,227,209,251,232,219,226,228,229,232,223,225,214,228,223,224,238,234,218,239,229,223,226,236,210,210,217,219,221,235,225,222,233,241,237,238,237,231,215,244,247,246,245,224,247,238,226,232,246,220,212,223,226,211,231,228,201,220,216,223,222,225,237,234,208,222,223,216,217,218,223,212,221,207,225,235,235,243,231,220,221,210,217,203,221,226,227,215,234,220,226,229,217,197,213,235,213,217,236,214,237,224,242,224,211,228,208,231,217,218,240,228,231,233,233,218,225,240,233,226,223,247,209,223,218,235,224,216,219,235,237,218,227,228,235,239,238,235,215,238,246,223,212,228,225,233,219,237,243,253,225,220,235,251,250,232,253,246,249,244,223,235,253,251,244,245,255,231,245,237,228,246,223,236,228,251,253,252,236,252,228,246,226,237,251,212,219,240,225,244,255,239,230,225,238,246,242,239,242,238,251,243,236,255,226,251,219,243,246,237,237,249,244,247,237,251,222,255,234,253,247,247,244,255,239,252,242,234,227,240,229,234,237,235,228,203,233,227,200,198,211,200,249,211,222,226,241,222,172,209,244,236,207,215,225,244,247,238,183,156,127,92,79,57,203,247,250,245,229,228,254,248,202,205,255,233,254,234,198,217,240,228,182,222,252,232,221,200,202,221,134,132,139,199,230,232,226,244,236,192,150,166,123,136,166,86,190,131,69,81,92,71,17,44,63,57,54,22,50,37,46,27,162,243,223,246,233,187,196,138,78,84,186,221,251,193,178,244,242,206,158,131,215,209,124,166,193,173,203,149,138,183,248,246,242,146,17,0,33,3,141,245,245,231,85,0,0,14,21,127,239,232,171,76,8,32,37,52,57,32,65,51,51,23,20,25,41,46,48,10,38,49,40,24,35,22,38,49,32,152,198,196,70,17,14,38,47,53,45,64,115,135,113,130,127,104,126,97,105,82,33,83,66,74,37,60,52,42,67,53,36,71,65,67,62,75,63,81,67,57,52,55,57,86,64,63,101,66,38,119,224,222,198,174,141,152,147,170,124,134,161,133,155,126,138,129,148,142,77,9,10,36,65,29,23,17,28,15,38,49,44,33,22,30,10,47,53,56,75,38,72,31,32,57,26,37,36,69,64,42,32,26,43,40,18,37,17,24,38,15,27,49,43,121,227,180,153,43,31,28,55,121,209,195,140,70,39,31,28,8,63,158,246,226,236,185,133,72,49,27,20,160,242,238,222,121,103,22,39,123,168,127,67,32,40,18,19,16,46,11,29,24,44,19,148,196,228,222,233,215,252,210,231,223,222,221,210,219,212,226,223,219,234,227,222,211,217,206,223,225,199,242,212,223,217,216,240,205,224,223,224,222,212,228,225,212,213,200,225,120,5,3,7,3,26,20,21,20,2,11,13,8,21,5,5,15,1,14,8,9,25,17,29,9,217,240,229,196,254,219,218,224,212,225,231,219,226,233,224,221,233,226,214,197,242,233,246,216,178,218,206,214,212,236,216,189,202,239,227,240,199,215,185,218,215,185,226,235,231,215,221,232,197,249,239,229,216,208,225,207,238,218,221,221,231,232,220,216,242,211,226,225,242,236,222,205,243,214,208,217,231,228,227,228,229,229,241,223,248,242,210,226,245,232,235,219,234,235,225,223,239,224,206,235,220,219,224,222,232,233,230,225,245,236,229,226,244,243,233,221,227,234,217,239,247,220,232,207,218,232,251,247,230,239,240,233,239,239,212,203,238,231,216,249,220,207,227,221,241,229,242,231,223,232,221,219,232,218,210,200,231,232,245,233,242,213,220,213,208,244,202,232,228,208,230,209,219,216,180,223,214,219,218,206,233,199,214,229,202,224,219,237,207,235,224,230,213,230,227,237,228,230,231,230,227,230,209,233,213,252,232,245,243,229,234,253,228,227,238,215,233,224,215,241,250,243,206,233,222,226,220,218,209,246,244,232,222,237,235,240,226,236,237,240,243,222,220,250,255,234,224,237,245,255,247,236,237,251,252,253,236,246,250,244,231,236,232,241,223,240,221,241,237,250,242,231,252,244,231,235,254,255,230,243,254,236,231,249,253,243,253,240,252,255,227,253,249,243,245,245,249,242,253,226,254,251,221,254,247,240,255,248,236,247,242,249,235,236,239,229,217,242,231,233,216,212,221,212,206,213,217,223,246,212,187,194,222,230,190,193,239,225,230,235,239,225,223,251,190,153,114,98,39,69,184,235,255,243,236,249,235,246,193,216,245,224,242,253,233,226,249,255,211,170,223,183,147,101,172,200,210,232,197,212,251,243,233,238,237,165,103,164,136,171,192,114,197,120,97,67,69,113,21,50,55,39,53,48,41,34,33,31,154,250,200,213,248,228,217,230,142,39,104,187,214,174,147,237,218,169,125,165,255,214,80,190,157,149,200,125,118,205,230,249,253,158,36,7,20,0,119,233,225,209,66,10,17,2,117,243,255,248,165,75,42,8,37,102,80,20,68,56,70,45,18,55,37,56,48,45,47,40,25,13,24,29,80,43,60,107,95,90,55,77,105,111,104,88,104,107,119,81,75,83,80,43,64,49,61,63,71,62,68,93,67,88,117,101,139,147,124,151,166,150,135,123,130,156,148,142,143,135,138,106,113,80,83,79,79,99,201,207,144,124,135,139,137,136,148,146,125,153,162,141,130,117,155,123,95,29,54,49,53,66,61,73,30,41,60,50,61,42,33,54,24,65,37,29,57,41,47,46,25,43,74,42,57,63,34,47,84,54,49,53,59,52,38,49,45,21,32,52,24,6,54,173,158,160,40,31,36,44,185,232,140,90,36,14,57,36,48,195,222,242,250,232,154,82,46,21,101,239,248,229,170,94,50,109,195,178,157,78,59,26,31,7,15,34,27,2,54,65,45,22,61,201,205,226,212,233,234,234,230,217,208,209,227,225,236,228,229,211,214,230,203,197,218,234,233,206,239,231,198,227,220,217,216,202,226,221,222,241,238,203,211,220,230,228,226,103,7,4,18,8,21,11,14,4,7,6,8,12,6,10,6,26,14,15,15,21,3,9,13,20,195,216,220,233,227,217,233,209,243,242,218,208,246,229,239,224,209,245,216,216,248,232,235,216,217,219,232,240,219,219,208,238,236,216,238,221,198,227,222,220,227,229,235,226,227,217,226,237,220,183,202,214,211,200,238,203,243,221,242,216,188,200,218,222,225,244,249,207,224,207,222,244,206,237,229,217,241,229,237,197,207,228,203,227,220,220,249,205,230,250,232,227,228,243,233,228,225,235,234,226,197,218,240,213,229,234,219,216,219,221,224,201,223,213,224,217,218,217,240,223,221,209,237,210,225,230,248,237,243,233,240,245,225,243,230,215,206,217,226,225,227,210,241,228,225,243,218,213,228,222,243,238,211,216,224,223,227,220,215,227,228,209,223,199,216,227,211,203,220,205,232,229,211,229,218,215,198,213,208,212,191,228,230,214,210,226,220,234,233,223,222,211,223,241,243,210,214,252,236,220,206,234,226,211,243,235,236,248,231,238,251,222,217,246,247,227,253,222,224,226,246,250,233,236,219,223,242,244,214,225,247,228,227,237,246,251,221,255,236,231,241,232,240,240,244,246,247,226,230,240,229,246,225,248,249,247,234,241,227,231,234,238,240,236,243,253,224,249,255,250,228,204,249,247,249,247,254,234,230,243,230,251,255,237,236,252,246,239,253,242,232,242,255,235,230,255,249,249,246,243,223,227,255,253,234,235,250,252,253,239,232,255,238,249,249,255,207,219,229,243,232,201,213,210,215,220,178,197,238,212,195,248,234,255,242,225,238,211,230,214,222,224,207,235,192,157,149,117,82,53,196,248,222,229,206,225,255,222,202,220,241,224,239,250,199,226,228,248,133,123,155,149,185,192,238,247,221,247,224,252,249,252,254,255,221,110,129,170,178,189,213,142,154,70,38,59,81,89,13,37,60,34,29,73,67,72,14,14,163,248,181,201,235,227,237,252,120,57,106,208,228,130,147,160,121,122,148,208,244,148,111,216,192,180,173,96,172,206,252,255,237,178,73,5,11,9,34,212,236,253,207,156,179,232,254,244,235,174,102,25,12,41,38,70,68,36,38,73,62,42,31,28,41,28,40,20,29,43,56,44,68,70,77,107,115,92,89,105,115,109,78,46,66,60,44,69,109,69,64,83,69,100,108,127,128,134,140,126,141,145,141,135,144,139,97,100,76,50,63,52,35,82,31,37,39,46,32,51,49,27,51,15,6,31,53,59,133,155,163,116,122,144,143,145,176,161,139,150,142,141,157,150,156,148,90,18,40,22,17,42,44,36,38,43,37,53,26,78,46,60,33,90,54,59,28,53,42,55,58,56,37,29,47,48,57,51,36,95,73,74,87,70,63,29,59,67,26,46,35,42,33,17,142,142,81,40,53,196,242,230,160,116,49,26,37,30,132,229,247,253,249,223,130,109,80,27,35,131,188,154,108,41,85,220,223,194,108,86,39,35,113,150,113,96,45,64,42,29,21,1,97,179,200,232,236,218,225,229,217,237,204,224,228,231,212,231,200,208,212,212,206,215,227,216,234,206,233,227,218,238,216,225,246,234,238,217,206,191,231,233,213,205,241,217,206,125,21,8,4,25,14,3,17,26,31,21,0,13,6,5,22,7,29,1,11,4,11,12,25,10,243,214,239,241,225,239,230,224,225,219,227,229,225,222,241,237,238,231,223,229,244,222,244,222,210,229,216,223,211,212,213,216,229,220,213,191,199,224,228,227,209,214,210,206,214,209,239,196,223,226,219,218,195,231,219,204,189,226,220,209,243,234,212,219,225,216,220,214,226,235,232,232,240,249,237,219,225,206,241,236,227,230,237,240,224,231,232,220,239,224,206,230,246,218,206,227,245,239,227,226,231,223,235,223,220,234,210,232,239,228,226,229,196,226,237,241,223,219,243,233,240,229,243,231,205,212,241,231,220,234,240,231,228,243,249,234,217,209,221,218,220,219,245,228,216,212,235,238,226,234,223,228,199,210,224,229,223,212,199,196,215,217,212,225,214,223,204,215,231,208,216,216,222,192,223,237,221,216,221,223,232,204,216,217,211,227,226,226,221,220,209,217,227,235,221,225,217,234,228,225,230,240,243,225,232,232,247,238,229,224,219,243,226,228,238,244,219,224,222,240,218,251,210,234,233,241,204,224,217,240,229,221,193,234,242,234,231,221,253,225,242,247,255,242,246,246,248,234,243,249,253,237,232,249,243,247,241,236,236,246,243,245,242,232,250,249,243,213,228,234,226,246,249,250,237,252,237,240,246,247,213,252,230,253,238,248,243,236,246,226,248,242,216,234,229,248,239,243,239,252,255,240,247,244,239,236,252,247,232,249,236,243,246,241,232,233,240,226,218,234,237,226,229,197,212,209,166,200,242,221,222,240,236,246,202,211,242,229,228,208,215,243,226,251,212,166,154,113,76,42,197,234,246,255,196,225,238,241,178,208,252,240,244,253,204,150,169,151,104,90,149,216,226,225,247,251,225,232,185,237,237,215,249,229,205,108,131,178,167,190,187,154,140,68,51,33,21,98,29,58,52,32,26,64,33,43,15,94,207,247,191,210,242,209,203,238,135,11,100,197,216,142,132,127,119,143,176,207,174,111,170,213,222,240,188,79,179,246,251,239,240,226,141,3,0,12,4,85,232,240,252,255,255,243,228,235,96,45,9,5,18,48,22,22,43,45,45,37,59,49,34,21,49,50,61,67,70,70,87,100,119,106,100,73,41,71,32,52,57,53,40,55,75,104,91,116,134,127,150,131,145,143,118,120,96,51,62,36,58,51,45,33,28,21,43,25,47,39,31,49,13,14,43,32,31,9,46,55,20,31,37,17,21,1,19,4,19,75,129,154,144,146,160,165,161,174,159,174,147,152,157,180,173,111,46,28,46,15,2,26,24,29,8,14,11,26,26,22,16,15,31,49,33,51,32,44,69,65,88,43,50,54,51,73,58,75,50,52,68,46,49,30,58,61,87,84,73,85,84,85,62,52,40,79,44,55,179,239,239,246,128,127,70,21,38,24,63,199,238,236,229,142,93,89,77,18,26,13,18,5,32,15,104,217,232,142,105,38,71,199,229,236,229,173,121,85,61,11,30,8,64,176,224,218,236,226,216,241,227,223,221,225,237,233,228,217,211,218,237,226,230,227,230,238,201,222,229,204,207,246,218,223,231,231,205,241,226,230,218,234,218,238,232,230,215,114,12,18,5,6,20,0,38,23,25,16,2,1,16,5,4,19,43,24,10,20,17,19,14,0,231,208,217,219,223,217,235,250,220,239,224,235,227,226,227,233,235,235,211,225,219,208,246,246,235,205,219,226,223,221,240,240,220,217,199,196,187,210,201,207,214,220,221,223,242,217,190,219,215,232,234,214,233,201,212,209,217,221,233,230,232,211,178,207,209,221,235,227,219,211,224,211,206,234,219,231,217,221,219,217,228,235,229,241,227,246,242,227,218,231,207,227,234,222,215,212,239,226,221,243,226,243,242,206,231,230,220,243,220,211,204,241,221,242,212,217,226,210,244,236,231,232,222,235,240,219,224,199,235,220,238,238,230,208,208,225,226,228,227,227,220,214,210,239,219,216,248,231,247,235,233,241,218,235,221,209,226,221,220,204,235,235,217,210,212,219,201,224,224,203,201,212,218,225,190,220,201,195,213,228,227,215,227,216,241,232,230,216,237,212,208,233,191,224,231,223,244,229,225,215,222,214,231,234,241,236,232,225,241,237,245,242,240,234,243,230,231,247,244,245,219,228,227,235,248,237,246,215,226,228,216,222,239,243,226,241,193,237,235,241,243,232,238,244,255,222,214,255,245,251,249,240,226,252,228,247,253,233,241,226,216,241,219,235,246,237,228,216,237,236,241,218,239,255,231,255,235,239,249,248,243,236,254,253,230,249,243,253,246,225,242,245,246,238,250,236,229,235,241,254,230,240,239,227,234,255,252,250,233,246,219,242,248,255,233,243,231,246,239,232,212,233,208,237,213,194,228,227,255,177,225,228,228,201,211,240,255,204,199,207,210,245,227,255,206,160,153,97,53,54,229,246,228,241,212,242,226,236,176,227,254,247,253,250,173,119,171,192,177,147,169,249,243,234,233,252,223,228,189,202,228,239,229,224,180,129,144,174,191,188,210,167,150,142,83,41,30,120,54,84,68,45,40,27,43,41,149,217,227,225,191,192,224,227,224,243,99,28,58,140,196,193,235,185,164,202,190,207,163,117,188,226,201,244,169,82,171,247,254,239,237,243,191,129,70,9,17,25,45,140,176,221,210,158,136,46,39,6,49,78,51,61,31,62,79,79,50,81,65,78,67,94,100,88,85,95,65,89,63,76,59,47,43,76,45,70,58,110,122,114,147,167,127,133,128,107,67,75,47,17,40,37,15,14,26,33,17,20,26,22,43,22,10,32,18,44,35,15,36,22,23,18,14,53,45,17,20,43,17,51,22,49,28,27,18,33,96,122,141,149,151,157,139,150,146,154,137,163,138,159,127,165,154,128,76,12,49,46,39,31,31,17,7,43,14,29,51,38,21,19,33,34,22,14,10,41,10,33,31,64,31,44,53,87,90,86,79,102,83,61,71,77,60,55,55,52,51,63,70,65,54,78,57,61,54,59,172,235,240,191,147,55,50,30,36,44,14,33,137,194,148,104,88,57,92,72,55,41,30,0,35,116,217,255,227,134,90,57,27,76,190,228,226,226,145,111,68,34,41,15,66,198,225,215,204,218,229,236,233,213,228,212,229,221,211,209,213,229,244,231,230,227,245,185,223,215,222,230,204,230,214,242,209,206,244,218,231,228,194,222,209,237,205,227,223,119,0,0,30,13,17,8,7,15,14,2,24,0,8,8,0,24,15,6,18,2,7,26,3,6,205,204,216,226,228,245,228,243,211,216,223,234,216,230,213,219,231,223,227,232,226,227,241,229,217,228,213,210,239,197,238,214,225,211,215,227,218,222,192,229,224,192,211,206,234,228,191,217,231,194,212,234,208,236,221,239,220,215,207,228,215,205,220,212,214,236,229,206,202,199,215,230,227,210,245,229,219,223,242,213,204,224,218,244,193,232,230,213,211,244,233,225,237,213,225,241,215,219,236,239,222,236,227,214,251,212,234,213,218,243,207,220,201,211,218,204,212,229,213,229,241,230,240,228,238,236,204,239,236,237,255,244,214,229,230,206,233,218,215,218,219,204,239,235,226,225,235,235,232,229,215,224,220,206,212,220,217,227,203,215,236,230,209,231,209,215,229,218,228,227,195,216,203,213,204,224,237,232,224,217,203,230,226,223,224,232,228,220,235,225,208,219,235,218,219,223,243,229,227,231,217,230,218,226,227,230,232,229,233,219,245,244,232,231,235,226,226,250,238,205,225,225,226,226,218,212,225,208,203,223,237,226,196,236,216,241,234,230,238,234,214,240,239,251,230,239,247,246,242,249,237,239,229,239,235,236,251,240,229,219,237,238,236,245,255,240,209,250,247,247,233,238,239,247,230,239,226,246,239,247,250,251,246,231,217,241,246,253,254,254,247,251,226,245,253,243,233,221,223,246,240,246,240,224,254,242,234,254,244,247,246,245,239,246,248,240,230,245,217,230,245,239,203,222,219,222,206,251,174,79,129,207,233,245,199,230,233,212,210,240,218,218,196,241,181,152,135,81,53,53,229,255,234,235,215,240,245,230,166,223,212,184,154,194,198,191,233,247,243,185,157,245,239,241,231,253,207,189,162,188,240,245,251,215,198,180,165,166,198,144,209,120,157,173,146,68,51,130,58,98,68,27,34,33,32,77,202,251,219,219,175,203,255,219,246,243,126,72,56,90,182,201,251,206,172,132,166,231,188,156,192,188,153,224,113,69,198,208,250,247,239,241,218,235,203,163,85,113,53,2,11,14,7,9,5,21,50,87,55,111,111,118,126,110,120,113,103,117,105,84,63,74,62,75,49,79,57,54,53,37,49,104,113,110,119,160,130,127,115,98,87,52,49,29,34,27,49,30,22,36,17,1,9,20,24,19,16,9,17,45,27,15,20,22,42,27,32,46,42,38,38,23,50,39,49,17,13,9,18,14,30,43,35,41,59,85,214,172,162,175,161,148,151,160,160,149,154,145,136,128,142,146,151,66,43,26,30,48,74,61,31,22,23,25,25,40,47,44,5,18,39,11,45,49,12,7,22,7,11,14,41,19,7,19,16,40,73,79,63,61,74,100,96,77,87,80,93,76,53,54,42,62,54,56,88,47,164,193,191,171,85,56,51,27,29,8,30,36,17,45,50,115,156,80,88,99,101,165,204,214,251,251,243,240,247,179,107,32,20,18,29,61,194,223,188,117,89,35,45,24,94,216,195,230,223,221,205,213,229,219,202,209,229,230,217,216,240,251,231,215,241,226,234,220,230,238,209,235,235,234,206,227,229,201,224,225,210,221,229,216,210,206,216,233,202,101,25,30,0,0,5,7,5,0,18,13,20,19,14,21,14,7,9,25,17,1,26,12,4,9,219,206,230,226,219,206,216,212,219,213,206,232,230,211,204,208,230,220,246,229,209,211,232,218,219,223,201,219,217,215,215,215,220,209,216,189,218,224,215,208,226,219,200,215,211,207,210,205,217,212,218,217,224,239,213,204,205,210,210,190,209,208,212,225,224,218,197,201,217,217,207,225,218,213,204,204,214,239,237,235,205,237,240,221,214,227,233,208,214,223,244,222,227,214,240,236,232,227,228,212,236,221,238,205,219,216,196,224,229,218,237,233,232,207,237,221,228,211,239,221,225,227,235,213,221,215,205,241,220,225,235,245,250,244,232,235,211,213,237,221,232,229,220,245,210,211,214,209,237,210,220,221,221,213,210,232,218,210,233,227,195,234,226,242,218,199,209,234,209,225,231,229,230,232,190,239,231,244,227,222,232,238,226,208,222,220,201,228,220,228,224,215,213,211,223,207,239,222,213,221,203,234,220,223,239,215,228,214,226,203,225,224,237,228,252,210,218,224,237,241,240,233,251,233,210,217,229,232,220,224,229,238,232,234,235,241,243,239,236,249,254,233,215,213,221,236,198,245,234,231,255,240,227,254,245,246,228,243,218,217,235,251,228,219,255,219,227,246,241,239,209,226,242,249,248,252,226,221,246,255,250,233,254,240,247,250,224,244,229,233,249,252,239,253,253,255,245,245,228,239,234,253,248,243,249,249,238,221,240,209,255,228,251,246,230,248,204,226,243,228,217,255,236,207,213,240,231,191,107,22,98,228,214,232,207,237,254,201,186,241,218,234,223,255,191,144,152,114,51,30,227,239,240,244,243,245,242,210,170,186,188,181,208,238,251,204,233,232,251,186,163,214,243,242,235,252,187,165,218,202,227,235,248,182,167,201,168,169,204,157,191,137,181,177,139,85,31,132,71,90,81,58,71,37,20,71,230,231,204,196,164,224,243,206,220,242,155,157,99,76,155,225,249,160,104,97,190,170,96,144,211,185,139,181,69,75,221,219,237,213,238,229,241,252,253,222,179,195,100,56,54,17,8,15,44,36,46,57,30,13,46,41,25,68,44,50,61,60,97,61,79,68,51,78,79,73,112,119,118,127,150,131,131,111,89,49,49,35,29,26,48,23,4,26,14,24,28,28,35,7,15,13,16,27,14,18,15,32,31,34,41,23,32,43,34,39,39,34,35,38,45,44,61,80,33,19,12,3,23,31,34,32,58,82,82,169,211,159,161,159,141,181,151,126,166,151,161,169,107,134,125,190,132,49,38,30,12,35,66,61,65,41,12,45,42,18,26,7,20,16,19,17,23,29,14,13,0,16,18,38,34,26,23,28,26,5,7,30,9,16,48,47,29,52,65,75,96,104,93,110,109,77,67,54,73,68,40,48,49,78,86,93,42,54,44,61,48,75,40,27,54,63,87,70,63,38,143,222,234,239,245,234,245,250,246,253,135,46,18,14,20,107,217,229,213,133,91,38,53,17,69,214,220,239,249,231,216,239,238,220,219,238,216,232,215,223,238,237,226,224,215,230,236,226,216,240,216,230,247,223,231,214,207,210,219,207,224,223,217,211,210,208,229,228,247,121,18,1,6,1,9,9,7,13,11,12,13,4,4,0,3,18,11,7,8,12,9,16,8,24,240,221,222,224,214,219,224,220,232,223,222,224,235,201,235,210,225,233,247,209,219,227,234,221,204,185,226,228,226,226,197,224,209,220,202,240,224,214,219,214,231,187,219,223,197,202,215,242,220,219,198,231,220,210,209,228,224,222,219,197,220,212,218,223,209,202,211,230,241,225,218,217,205,209,215,232,219,203,218,211,219,238,220,226,239,235,238,244,244,231,214,217,217,216,224,218,218,211,214,226,220,218,232,213,251,228,232,220,229,237,216,220,233,228,229,239,201,204,202,211,222,233,199,216,223,215,247,242,214,206,226,234,243,230,225,213,214,227,199,233,221,222,224,240,222,246,226,240,239,216,233,225,193,215,229,235,239,243,240,231,224,215,234,226,199,233,215,226,220,211,214,214,235,230,201,227,209,222,212,230,202,210,243,218,214,220,215,188,225,220,217,213,234,222,204,210,218,227,229,205,220,207,241,242,235,238,218,229,220,240,224,241,210,208,224,243,221,240,236,210,227,245,206,216,247,223,242,234,219,176,240,248,193,227,226,228,231,241,211,224,240,211,243,227,255,247,235,245,249,237,242,235,247,240,245,235,209,245,229,213,246,247,246,225,217,246,213,246,255,224,226,233,241,239,245,221,255,225,233,238,241,229,246,249,223,242,253,250,248,250,251,245,238,214,240,243,240,220,251,221,218,231,247,254,249,236,254,229,253,214,253,239,234,232,244,236,246,226,238,223,224,237,224,186,212,229,168,219,139,10,146,233,223,228,208,217,247,217,208,187,215,236,214,255,171,178,130,109,61,44,210,241,255,253,210,243,196,155,147,213,228,235,251,242,255,223,184,217,234,188,146,232,252,187,166,201,184,212,248,226,246,244,196,146,182,160,162,196,217,205,213,150,179,160,128,74,12,152,106,78,58,44,73,46,19,86,243,247,208,190,189,231,202,244,233,230,167,208,183,99,142,235,189,190,98,156,244,171,62,143,235,183,171,185,107,128,177,179,181,209,212,234,224,253,255,241,183,208,116,65,76,53,95,76,82,98,80,91,69,78,85,85,80,69,45,94,84,58,101,119,129,131,133,128,122,100,89,59,84,49,42,49,37,32,46,21,18,13,22,18,55,28,5,8,44,19,20,18,36,32,23,9,21,18,6,5,39,41,34,13,33,20,28,57,54,56,45,44,5,25,61,61,37,49,37,23,24,61,42,65,44,71,74,144,177,238,230,157,188,165,152,147,143,156,157,179,171,170,142,128,144,153,142,46,45,16,31,20,64,68,52,43,35,46,9,20,22,8,15,37,16,33,28,36,28,28,18,46,69,18,38,42,30,27,20,14,7,31,4,13,16,29,20,13,11,29,33,59,51,81,77,88,126,122,122,82,59,64,33,77,37,85,43,96,55,52,75,83,58,72,36,62,54,47,46,51,23,66,113,88,106,98,97,164,223,217,190,167,178,172,227,241,248,227,161,157,123,34,14,24,167,223,224,220,234,223,231,241,230,196,217,202,239,243,212,227,217,220,247,228,217,201,231,215,225,223,206,236,241,235,226,227,221,231,214,243,221,224,232,226,231,211,213,235,234,144,0,12,14,0,21,10,22,21,5,11,9,6,0,0,8,2,12,10,1,0,23,14,23,21,216,216,221,241,231,230,222,236,227,203,235,219,220,234,235,213,233,235,202,231,227,215,228,224,230,233,234,221,195,225,196,211,205,213,223,196,228,217,227,215,202,217,224,216,188,207,184,202,206,228,199,202,204,218,247,216,220,191,236,220,202,222,212,189,224,234,226,208,208,194,227,227,205,220,205,238,224,235,220,224,227,241,225,208,218,202,206,234,247,239,223,226,228,200,238,238,215,204,226,209,220,205,228,210,216,237,235,228,221,230,214,214,219,231,210,216,199,209,225,233,233,229,213,226,218,212,236,245,205,225,209,223,225,226,236,210,206,211,236,224,220,224,237,233,232,223,214,207,226,225,219,232,214,238,218,230,214,215,210,209,241,212,204,216,221,237,230,225,239,193,226,226,238,212,211,218,217,231,228,195,217,199,218,218,228,241,223,234,219,200,212,222,211,226,208,214,236,205,212,239,220,232,200,224,222,225,231,238,216,229,238,233,223,236,217,236,236,219,234,217,229,224,221,215,212,218,221,216,230,238,206,224,231,244,201,247,231,232,237,213,236,219,239,248,244,236,242,230,211,249,236,223,252,244,233,254,240,207,201,236,233,246,248,238,227,230,240,252,239,251,234,211,242,233,223,245,251,233,235,245,247,213,248,252,249,233,234,221,242,247,239,255,235,253,230,247,242,237,233,242,211,229,241,255,254,227,223,217,238,246,243,244,224,222,231,243,234,251,236,235,233,241,219,220,207,226,236,253,154,51,160,224,216,218,219,217,249,216,200,193,224,184,200,246,207,153,120,117,63,58,241,243,235,208,161,187,191,205,195,197,218,226,253,245,255,189,174,210,206,130,129,211,209,190,196,235,206,215,249,222,252,240,170,179,173,148,171,190,195,225,249,144,173,151,152,91,0,119,69,75,61,23,58,46,21,74,226,247,236,170,179,239,242,227,217,244,179,244,210,107,148,227,188,152,121,197,243,189,100,176,253,194,232,217,109,194,223,215,204,199,190,168,171,231,235,181,122,79,34,25,26,17,4,2,12,17,67,48,65,88,85,64,77,85,86,78,48,76,60,75,60,67,87,39,32,47,48,71,93,22,7,20,31,33,13,1,3,9,13,36,37,29,35,26,27,16,16,51,23,56,33,6,43,13,25,6,31,53,22,13,44,47,43,36,55,53,10,38,56,55,41,61,59,60,43,22,26,43,65,70,60,44,132,156,171,202,214,177,150,137,177,158,178,175,139,185,175,162,161,143,155,187,103,64,49,50,20,18,30,54,61,58,56,55,34,27,22,37,26,12,34,10,37,37,33,29,0,12,49,20,45,35,18,29,56,32,48,37,35,22,16,29,24,6,21,25,36,47,27,29,28,40,46,75,49,102,77,100,92,90,56,76,49,51,57,63,88,76,80,83,82,107,79,60,43,47,64,64,32,52,35,39,46,38,30,99,204,227,237,239,246,248,230,177,115,103,58,33,31,55,218,224,228,205,210,249,233,219,233,200,221,208,229,222,233,237,239,234,240,239,216,219,239,221,228,216,223,193,242,228,229,251,231,212,245,227,236,241,224,231,209,239,197,210,239,106,14,4,1,16,26,7,1,3,11,12,0,43,16,12,3,9,27,1,37,4,21,12,11,4,204,227,230,225,238,205,193,241,210,220,218,216,235,205,227,225,214,234,240,214,207,214,233,233,236,221,227,234,208,224,248,213,226,207,215,211,222,216,199,212,200,227,206,215,201,199,235,206,199,193,230,250,241,209,227,211,212,215,237,219,220,210,221,223,223,210,229,203,221,187,227,208,234,223,234,231,230,234,222,209,217,204,201,197,247,239,238,224,221,237,235,224,240,234,189,205,226,229,241,205,230,214,203,230,221,224,239,229,210,243,223,211,218,209,200,230,215,231,223,220,237,215,224,222,231,235,236,225,207,219,229,206,223,219,210,239,199,242,222,211,247,194,206,242,232,213,229,233,215,208,228,222,242,234,232,221,226,252,233,225,214,208,223,215,206,222,213,195,217,205,229,251,233,215,208,221,195,240,211,212,235,213,213,224,239,200,236,220,220,217,215,209,223,211,215,235,211,210,246,231,174,227,247,222,231,225,242,205,228,192,231,239,214,237,195,219,212,222,224,221,233,236,235,215,232,243,219,239,213,237,231,233,239,243,244,233,228,232,225,232,235,242,249,228,237,193,247,221,239,243,245,247,229,204,244,242,236,227,198,250,241,240,235,238,234,247,241,234,247,255,242,223,220,239,229,236,238,225,247,234,233,236,249,255,255,222,240,227,248,224,242,238,246,221,252,244,231,234,244,247,254,255,237,240,249,244,248,249,250,245,239,219,252,234,240,238,246,230,230,236,245,244,217,211,214,231,223,253,113,60,201,249,238,236,229,247,243,210,220,218,214,217,226,252,198,179,111,104,63,71,170,195,175,177,188,238,234,221,209,221,242,235,231,244,226,242,181,234,232,164,114,196,243,254,242,250,203,209,235,184,188,229,166,173,161,181,204,186,202,218,222,116,151,216,157,109,36,80,66,86,74,49,32,42,19,52,242,247,220,152,160,216,238,209,216,247,156,186,222,112,118,177,127,196,170,220,245,114,147,226,248,172,213,202,134,198,211,192,233,174,186,194,188,198,196,124,95,65,67,92,59,40,39,18,15,8,33,47,23,25,24,19,35,33,6,17,18,38,13,16,34,47,44,33,45,42,110,138,73,34,36,11,0,7,10,21,8,38,9,18,47,51,46,21,7,34,22,41,54,58,59,49,54,35,7,12,39,22,18,19,41,47,38,55,43,27,25,27,53,46,58,46,67,29,26,26,26,76,77,63,57,84,138,129,224,253,191,156,143,139,143,154,155,152,177,195,169,146,146,157,191,166,113,82,72,87,59,34,55,56,60,71,47,29,25,40,34,51,18,15,33,20,29,32,43,65,10,11,29,25,24,30,21,44,27,17,53,38,32,42,44,21,14,33,15,2,45,47,36,30,33,37,22,2,38,19,49,29,42,52,100,120,96,89,85,83,48,67,40,44,61,67,71,72,76,68,54,81,68,48,53,24,66,32,43,35,4,104,208,219,248,207,186,96,100,58,13,17,66,219,234,226,246,241,224,229,240,228,238,218,235,222,207,240,224,215,251,226,189,224,229,241,229,244,224,214,223,247,235,222,223,238,237,223,234,221,219,232,237,215,217,209,236,218,214,148,12,3,11,6,4,3,6,8,8,11,18,2,1,16,0,12,22,8,15,3,6,11,7,0,193,206,196,214,214,212,245,226,205,204,217,216,205,229,199,220,206,217,223,211,232,222,236,226,223,220,219,225,228,224,242,229,218,236,205,197,223,194,199,203,217,210,212,204,237,196,232,207,196,187,204,219,210,221,217,205,220,215,188,218,220,211,202,213,223,212,216,227,204,216,194,228,221,221,232,234,207,219,216,196,231,219,197,213,223,212,236,226,229,227,216,211,232,216,204,241,233,247,200,234,216,212,230,230,221,246,246,220,233,192,217,213,241,241,232,239,207,214,241,218,222,201,229,235,218,228,216,226,214,232,243,220,227,218,232,204,228,236,248,202,225,216,197,222,209,213,225,223,212,204,220,216,214,216,215,229,211,215,229,206,229,230,198,224,229,239,234,222,220,242,236,232,231,224,236,215,210,220,219,218,222,223,235,220,224,226,194,220,215,211,223,230,196,214,243,222,203,215,236,204,220,223,229,199,228,218,204,251,200,220,210,212,209,240,223,215,220,229,235,219,219,210,228,241,231,214,223,238,226,220,231,235,214,227,231,225,227,254,242,217,231,216,224,234,238,217,232,219,250,233,237,236,217,234,220,224,244,214,182,219,224,220,240,236,214,248,251,248,237,230,221,247,232,237,227,234,232,230,245,223,247,242,235,234,232,241,227,250,251,228,249,229,246,231,254,242,238,217,240,247,217,244,225,232,235,234,250,246,241,241,255,252,235,235,232,242,242,244,221,222,254,221,198,201,211,225,193,226,104,78,236,239,245,229,224,223,244,213,239,206,208,201,244,250,173,159,159,123,124,54,165,212,224,235,212,238,237,196,235,209,240,235,238,208,255,192,169,200,252,227,141,241,234,235,231,167,138,200,203,184,201,212,172,174,210,185,218,206,172,201,206,109,174,176,174,138,50,101,53,109,66,77,56,54,4,71,224,240,200,113,180,193,238,209,209,238,189,179,212,124,112,88,106,158,211,233,161,106,206,253,214,145,199,167,118,221,204,209,202,221,229,194,209,228,158,159,166,195,212,237,238,227,238,240,247,215,165,74,61,30,63,37,41,42,40,35,23,39,44,26,68,58,37,16,50,124,159,151,81,12,20,16,30,2,12,15,38,63,35,27,15,30,37,37,24,17,12,13,32,71,42,38,43,56,54,23,42,7,25,30,60,41,45,19,21,11,33,71,63,37,52,45,37,25,5,32,27,46,33,58,57,60,98,170,245,242,152,150,139,157,172,158,159,151,204,174,175,132,166,152,173,131,72,48,96,77,58,19,30,88,88,48,74,37,21,19,64,33,19,8,37,17,34,18,38,43,42,38,26,10,31,26,40,38,63,26,27,38,31,27,12,6,20,15,28,53,44,25,24,37,39,15,25,32,20,14,25,9,30,45,59,35,23,54,98,98,96,119,96,82,82,72,76,76,63,43,63,90,37,48,84,58,90,81,70,64,14,23,4,11,54,70,52,56,55,30,56,175,233,239,227,253,246,230,251,222,234,237,227,240,238,216,199,213,240,225,234,215,227,243,248,250,237,204,232,222,218,221,195,241,220,211,239,234,221,230,217,233,235,223,245,214,214,227,228,103,20,7,24,3,17,5,16,3,15,0,10,1,18,17,1,5,16,19,14,10,4,14,8,0,221,185,201,221,219,223,221,234,229,225,190,207,223,215,239,223,219,223,229,223,217,211,216,214,236,221,226,226,225,197,210,211,228,207,224,199,204,221,226,224,215,209,201,203,215,222,192,230,218,202,185,210,204,209,207,218,186,210,219,201,209,218,205,219,204,195,195,225,213,231,212,185,189,197,230,214,221,246,220,211,215,225,210,220,214,211,233,220,237,225,222,224,200,213,222,215,223,213,211,203,218,231,221,217,209,204,224,215,198,195,221,219,196,211,223,197,217,225,222,210,206,228,226,234,225,221,212,211,219,240,196,215,218,241,213,230,219,206,243,246,236,231,225,231,228,199,239,211,196,224,220,213,210,210,210,212,215,215,217,224,240,188,212,236,200,228,212,234,224,232,218,243,229,231,235,235,207,236,220,215,190,210,239,204,200,225,208,210,211,223,215,230,233,218,235,209,214,217,240,222,203,214,215,230,220,202,211,212,218,218,200,224,217,204,216,224,228,228,236,223,211,226,210,223,239,195,230,223,255,234,215,245,226,249,233,232,224,248,223,231,239,217,224,221,225,249,232,212,237,244,215,230,209,236,228,224,229,230,201,219,241,225,231,220,247,243,224,244,232,232,232,220,233,251,230,252,209,247,239,238,247,234,226,244,224,243,241,232,230,242,234,253,254,246,241,254,233,241,207,207,243,250,244,231,229,226,252,253,245,237,239,246,240,238,250,234,227,228,221,215,244,236,204,187,171,188,216,241,96,128,235,253,252,187,189,240,241,204,211,223,232,217,247,251,170,127,120,116,81,61,220,250,242,227,228,246,196,208,218,218,223,233,246,215,236,224,178,170,225,185,126,204,234,204,165,157,184,223,236,199,203,199,170,196,198,178,198,183,140,172,182,151,175,171,102,143,140,141,115,111,84,31,62,56,33,84,195,243,205,139,184,225,222,221,223,239,168,151,205,131,114,54,112,195,206,172,130,137,218,236,215,158,176,109,115,219,169,194,224,205,204,231,224,224,172,137,242,231,254,227,244,228,250,253,242,216,153,134,138,137,120,112,116,98,97,74,104,96,71,82,100,65,42,20,73,157,160,148,68,37,23,6,37,4,11,50,82,47,53,32,22,38,44,54,50,24,42,21,53,30,62,39,54,50,45,49,49,33,35,69,74,64,41,40,51,68,66,31,40,35,39,22,40,30,24,51,62,61,35,61,49,33,66,162,239,220,139,132,125,172,157,156,159,163,173,176,133,129,148,142,193,119,50,16,50,57,94,46,33,71,90,94,51,26,23,33,24,30,55,18,30,21,31,30,52,39,39,45,14,37,38,13,56,22,21,17,10,20,24,26,28,19,46,45,35,45,49,63,41,27,5,43,13,14,20,23,64,39,35,24,23,49,32,22,41,51,49,70,93,98,142,152,168,164,141,118,102,84,85,54,71,87,68,68,58,69,72,82,57,48,40,68,73,65,45,10,102,211,251,247,217,232,217,233,235,232,217,222,234,251,238,223,240,235,227,225,210,234,220,222,235,229,213,239,220,202,245,245,242,249,232,236,242,229,234,202,232,234,230,243,230,220,227,227,199,121,12,4,21,1,2,24,1,5,21,20,28,30,7,15,10,19,1,1,38,7,12,3,0,16,241,189,234,211,206,200,214,211,219,220,230,223,206,207,235,213,217,213,226,207,228,220,213,230,216,215,211,220,218,237,225,212,219,208,229,218,200,228,203,196,228,237,217,219,224,210,211,225,199,213,224,219,233,220,218,207,221,218,208,203,217,232,213,227,210,206,200,228,233,226,218,203,215,222,203,208,237,199,236,221,228,202,212,217,177,203,244,194,211,219,208,222,211,230,200,196,207,206,201,221,222,226,214,213,203,242,204,200,201,237,200,209,209,220,221,205,222,221,200,194,244,207,240,221,231,207,177,239,219,232,222,220,223,211,235,208,235,239,201,223,225,230,220,227,221,197,211,224,236,230,212,223,223,233,235,231,209,190,213,236,227,229,216,225,238,194,232,238,196,215,245,214,236,193,220,221,227,228,235,238,205,226,197,216,230,216,217,217,241,237,200,217,215,206,220,221,208,209,215,188,224,238,237,224,234,217,228,242,223,221,244,239,227,228,230,189,221,233,238,208,234,232,220,214,227,237,233,219,215,223,223,219,239,222,233,239,238,221,214,245,215,215,213,206,223,236,228,239,237,237,214,241,227,229,207,229,225,222,215,222,243,226,235,213,246,236,250,211,226,232,231,241,239,231,244,250,242,226,255,236,248,234,214,246,240,238,241,245,230,251,230,233,249,238,235,244,228,234,246,248,226,241,247,236,238,243,239,255,237,245,236,222,235,239,255,229,247,239,231,237,254,234,216,179,165,214,238,233,96,137,217,224,241,203,206,253,229,192,234,236,198,225,221,224,146,117,105,79,71,63,218,221,252,226,205,249,182,212,200,193,225,231,247,246,242,232,200,166,176,152,121,141,190,228,225,156,198,249,242,240,189,190,184,215,197,121,198,190,158,170,178,134,170,125,75,203,201,153,136,103,70,32,31,53,5,67,243,242,196,105,170,216,234,204,210,234,199,151,196,229,169,85,112,155,195,212,107,142,223,251,215,172,198,108,173,240,237,192,239,213,212,217,218,227,150,214,236,238,238,234,227,226,208,148,115,83,102,123,124,118,116,122,114,127,121,116,153,93,111,155,140,64,14,36,112,139,135,142,53,34,29,18,7,15,6,32,68,41,49,30,20,15,24,87,67,39,25,34,54,42,75,61,54,42,57,47,53,37,32,64,68,46,48,48,50,64,31,61,44,45,26,21,41,56,32,67,73,80,56,71,63,44,95,212,204,197,135,127,156,155,148,169,162,175,206,147,122,139,172,152,172,117,41,53,37,18,37,26,43,58,58,52,50,6,23,36,22,40,38,30,15,9,60,33,6,16,32,29,27,25,28,43,32,41,22,27,25,16,13,36,38,56,43,36,56,76,65,55,26,22,31,12,23,33,43,32,36,32,33,7,4,18,22,20,36,28,21,36,97,70,74,86,116,133,141,125,168,170,171,148,124,140,129,129,99,106,86,86,95,67,66,41,33,11,18,19,111,229,240,240,237,235,233,217,236,239,231,233,249,215,242,220,252,233,231,217,225,222,241,197,219,232,239,248,235,228,246,232,238,234,202,226,234,228,217,249,225,225,194,205,234,221,225,210,206,119,8,0,12,8,9,20,0,1,52,9,18,12,7,0,14,19,4,10,16,41,10,22,6,3,219,187,205,230,225,209,236,211,238,223,241,209,227,211,207,231,217,199,222,211,183,202,208,224,242,227,204,181,228,233,217,205,219,208,204,205,204,226,237,196,215,200,204,184,215,217,211,189,227,223,216,203,207,222,228,195,201,219,231,220,227,229,201,198,234,191,222,217,212,207,232,218,213,207,205,212,209,220,223,212,228,197,198,223,198,200,189,191,211,227,212,204,198,205,242,229,230,217,222,229,224,230,197,205,205,222,231,215,223,221,208,211,199,225,218,218,212,236,219,219,215,230,182,236,201,193,224,238,212,242,224,214,217,217,216,238,223,222,218,219,229,242,234,238,220,206,214,207,203,225,229,234,208,210,239,205,227,235,228,248,235,219,213,218,220,237,243,226,233,205,225,225,231,220,234,233,229,231,221,231,203,230,196,224,210,212,226,227,234,221,227,238,225,235,229,232,206,221,206,215,228,210,213,225,220,226,217,222,225,221,215,226,213,201,210,205,223,228,217,221,228,240,215,228,226,210,228,213,227,212,210,248,240,212,222,223,217,239,237,227,239,232,245,232,247,240,219,246,216,232,249,234,227,229,254,217,236,198,222,240,226,252,209,205,252,236,243,229,252,234,239,250,244,245,238,222,238,222,231,240,231,228,238,245,245,234,232,233,240,237,247,247,249,240,246,243,241,241,227,255,244,243,241,237,248,242,245,215,217,228,240,220,220,221,240,239,247,236,247,241,230,220,230,207,207,242,234,254,101,89,190,228,250,209,242,227,233,214,220,198,164,168,182,194,180,122,144,87,89,60,206,233,250,236,206,245,200,210,225,210,238,231,227,250,239,243,191,182,194,184,108,150,224,230,232,189,162,232,249,222,185,203,164,222,186,182,233,231,200,159,194,135,171,103,19,198,190,143,172,101,83,25,28,40,57,79,200,250,167,60,133,213,227,212,215,249,200,152,177,237,201,93,64,121,225,166,93,181,229,242,215,211,213,145,186,243,238,211,228,225,231,222,213,204,161,225,238,218,240,230,220,185,106,86,51,69,75,91,101,43,38,29,47,61,93,116,98,28,10,95,110,81,44,39,111,126,151,99,33,20,32,33,57,13,25,15,48,66,61,40,26,11,13,73,72,46,37,14,16,49,37,54,53,68,31,25,68,76,48,77,66,69,90,89,70,58,83,111,80,79,82,140,101,104,121,113,93,128,123,143,136,68,141,253,253,194,160,132,135,147,162,184,163,169,195,139,157,162,172,166,135,85,69,60,41,54,69,70,53,73,80,52,21,61,25,32,44,28,38,21,18,6,43,21,4,13,21,35,39,25,44,34,35,54,50,24,27,30,29,39,68,74,53,25,56,66,54,39,41,38,19,57,54,44,57,56,31,28,37,22,40,27,3,0,25,27,34,123,122,95,89,43,32,15,40,28,16,49,58,76,60,79,96,105,130,105,100,105,73,59,33,35,8,14,32,97,214,210,229,222,251,231,244,206,229,233,214,251,239,243,222,238,235,222,219,218,221,243,240,220,237,224,216,232,218,229,244,234,219,227,223,216,233,206,225,224,243,219,231,224,208,212,218,208,217,145,7,18,16,30,7,0,4,24,4,17,37,13,19,6,8,25,5,10,11,36,8,20,15,8,189,220,205,240,221,210,213,244,214,187,223,207,218,208,214,220,216,206,213,211,199,231,215,214,232,241,220,211,217,197,212,217,209,182,219,200,210,217,241,214,209,216,214,197,214,214,215,188,189,190,199,201,195,200,234,219,228,220,214,221,209,201,215,191,219,207,230,228,227,206,211,230,174,230,204,209,235,209,220,233,227,213,211,199,206,225,227,218,214,222,214,238,212,209,214,217,223,211,194,216,227,225,239,209,201,225,186,217,212,238,239,208,227,233,211,235,202,217,201,217,208,194,210,229,229,235,223,228,226,200,221,240,222,235,203,204,230,233,212,212,209,226,225,191,225,218,218,237,227,207,223,219,215,219,224,211,226,222,213,212,221,199,243,222,217,216,218,242,204,224,187,219,231,210,219,221,186,223,211,217,222,221,226,242,224,241,200,217,230,194,206,207,237,230,211,245,213,180,240,208,221,224,236,235,209,227,229,217,219,206,229,205,218,230,227,223,236,226,238,221,232,207,229,215,201,230,223,229,242,210,219,222,235,221,219,207,231,226,222,229,223,230,233,219,243,242,240,225,214,240,226,236,248,204,252,244,213,200,204,215,248,238,211,241,232,241,233,220,236,215,246,244,226,232,241,221,249,243,245,221,229,232,239,224,243,224,224,222,245,249,232,243,227,232,244,243,247,239,232,240,230,243,235,240,255,254,243,233,215,222,250,216,247,234,238,248,243,243,254,221,225,228,238,196,204,180,210,197,90,132,216,227,248,219,222,238,243,201,232,194,171,207,216,250,185,168,137,110,75,61,214,239,245,225,212,227,197,206,218,233,224,216,241,203,223,234,180,189,167,200,175,172,223,236,255,148,188,248,218,188,125,143,168,193,184,191,227,207,213,106,177,143,140,68,47,242,202,135,175,132,113,20,17,59,21,89,242,230,195,85,162,201,226,215,202,232,195,128,204,236,205,126,91,193,218,145,62,181,241,189,201,193,168,119,210,224,234,217,215,220,224,213,241,222,136,212,214,212,206,223,210,131,93,73,67,67,77,44,28,26,35,50,46,20,76,93,86,58,1,58,106,132,84,55,130,109,154,113,40,36,26,84,56,50,46,58,57,45,69,20,0,15,14,52,46,13,32,19,31,41,46,58,72,89,81,100,131,131,126,109,145,114,120,105,111,101,114,120,89,122,95,93,103,112,114,141,125,111,106,142,122,78,191,237,229,182,143,154,144,147,151,182,198,194,174,154,144,138,157,214,136,96,61,52,57,47,46,61,81,57,55,73,62,62,72,52,67,42,59,49,50,52,35,53,27,48,31,66,55,51,48,52,15,21,26,48,52,20,51,36,33,51,62,61,25,21,41,45,38,35,27,49,33,43,16,34,39,25,31,50,10,3,2,19,15,37,86,133,115,127,96,58,46,14,54,39,32,26,15,51,18,16,30,51,31,40,74,45,31,72,78,110,133,189,238,245,251,245,239,223,212,218,237,230,234,215,236,220,246,217,248,219,213,240,246,251,225,241,239,219,240,230,221,215,221,217,232,225,242,238,231,227,231,255,235,230,225,244,245,230,230,235,225,233,238,102,30,18,10,0,10,5,13,0,6,18,22,11,0,11,5,7,13,13,7,8,11,12,33,27,216,220,195,198,224,198,237,205,209,208,200,205,212,207,201,213,222,196,223,227,205,203,203,216,246,221,205,212,218,194,206,206,225,209,202,213,211,214,209,192,226,202,183,226,207,184,212,196,198,212,206,208,206,190,203,216,213,206,237,194,195,218,196,223,215,201,205,208,221,218,219,210,209,215,204,209,228,222,213,208,206,220,228,215,196,229,213,213,207,225,219,218,238,218,230,237,216,215,213,213,235,205,212,209,211,216,234,203,218,200,196,195,207,203,213,213,218,215,209,209,201,212,211,211,214,229,166,221,227,212,214,225,204,211,190,221,213,219,211,219,214,212,208,231,205,199,209,219,203,220,210,233,226,224,220,231,236,236,212,207,215,238,203,214,219,217,208,195,195,215,215,211,209,221,215,195,212,214,223,202,226,211,218,232,201,201,187,210,216,204,204,231,242,222,214,244,232,222,210,230,221,222,209,212,206,207,220,213,231,241,232,222,227,224,209,227,222,200,217,232,220,238,243,207,220,210,228,217,211,206,214,242,237,219,248,225,231,255,227,233,248,227,242,243,234,234,236,254,238,217,237,223,233,236,218,198,194,216,232,238,246,225,226,244,250,254,216,237,250,249,233,241,247,250,243,238,249,227,226,218,244,247,214,232,220,239,247,232,209,222,231,221,220,255,243,229,254,233,236,236,243,242,232,241,235,243,233,231,233,240,234,240,246,228,254,252,208,249,236,241,238,212,204,155,200,175,240,234,111,144,248,228,238,199,195,218,196,174,220,215,212,221,248,234,172,187,167,117,90,60,211,247,232,215,221,217,182,217,249,204,235,211,218,218,223,233,200,195,156,157,205,202,218,239,225,146,165,218,181,148,112,203,211,219,160,196,200,188,217,143,179,141,135,55,65,248,172,66,136,148,137,62,35,36,18,69,243,238,188,120,184,203,245,216,215,247,192,123,195,210,142,81,108,185,173,121,152,190,223,196,180,169,115,157,204,223,235,220,204,227,244,234,218,194,170,228,220,208,205,223,180,96,111,64,69,93,43,30,33,62,97,95,108,88,55,115,82,47,23,36,94,125,86,102,167,127,144,57,9,27,6,31,52,64,60,54,60,46,41,24,11,11,28,52,82,55,80,110,111,124,129,139,142,123,133,142,135,132,133,103,108,110,84,90,121,88,109,114,92,103,114,126,115,120,119,121,90,120,74,98,50,103,247,240,253,168,138,150,148,166,158,163,179,183,146,142,144,170,173,174,100,65,32,42,53,38,40,41,74,50,63,67,53,58,72,38,53,28,56,44,33,43,51,63,53,73,37,72,63,54,73,40,46,51,47,46,37,39,37,34,55,44,50,54,77,66,31,31,34,58,68,62,19,14,41,16,45,46,58,22,38,33,21,17,21,22,50,98,121,103,65,60,21,21,75,77,63,56,71,71,69,52,47,60,69,37,52,90,159,210,245,239,238,250,234,218,251,231,230,229,236,229,233,236,235,231,250,210,225,249,242,220,243,233,245,228,234,237,205,202,224,221,241,238,244,210,247,222,205,231,225,237,225,228,210,248,246,236,221,227,212,220,220,211,225,110,1,8,15,4,22,3,9,9,1,20,1,4,0,12,5,9,10,4,5,29,6,18,9,13,212,207,215,199,212,216,209,220,215,233,232,200,200,234,202,208,210,225,205,227,211,191,226,203,208,213,212,238,238,231,225,216,203,209,196,194,200,199,225,195,201,222,213,212,217,206,182,227,209,222,200,240,201,203,203,242,202,219,218,212,194,216,218,204,205,208,202,236,208,209,208,216,223,193,200,231,211,225,210,235,205,207,205,192,206,240,217,219,210,235,210,228,215,219,217,195,223,232,217,238,211,229,203,222,202,215,230,240,199,218,219,203,211,194,226,200,221,191,223,208,224,216,241,189,230,207,220,230,218,236,226,221,214,200,223,231,211,230,208,220,223,204,211,236,222,199,232,238,180,215,218,204,212,215,214,206,206,196,202,233,239,218,223,191,217,218,204,214,219,203,210,212,213,211,188,209,215,200,211,211,222,201,212,213,203,195,206,183,217,227,206,235,217,217,248,200,223,221,207,220,214,228,200,239,221,201,205,223,191,208,217,228,220,224,211,218,223,214,212,219,229,209,223,218,229,243,211,231,219,221,227,209,207,239,222,211,213,222,216,221,248,213,224,251,218,227,234,229,225,234,210,234,211,229,234,213,224,240,238,238,229,229,215,223,231,241,223,234,246,238,236,219,250,238,229,219,248,227,235,241,232,247,249,232,238,229,233,232,223,236,220,227,240,251,236,237,220,239,242,234,227,244,239,246,240,255,249,204,230,238,236,247,245,230,221,214,237,234,220,236,242,230,242,196,199,224,252,228,89,141,241,196,206,190,190,221,241,218,232,214,181,228,245,252,176,166,135,119,65,70,220,247,247,225,239,201,189,255,220,222,243,249,246,243,234,211,203,244,179,128,192,170,199,244,215,114,170,213,188,206,211,193,194,217,177,163,203,201,186,184,228,164,94,63,128,237,159,48,113,128,199,143,41,56,4,49,202,246,169,151,171,202,238,202,199,236,170,130,218,153,135,117,91,175,180,137,142,234,205,189,199,164,118,149,248,237,222,208,231,235,200,205,206,167,188,219,208,245,235,253,158,91,119,84,60,66,57,47,45,125,110,129,119,98,102,117,104,89,40,33,109,146,84,89,119,100,129,61,2,18,15,16,45,55,53,66,55,60,88,84,115,93,165,155,126,152,119,104,137,137,125,108,116,117,135,106,127,109,113,90,81,77,88,121,86,63,61,46,61,59,62,14,39,52,64,22,33,73,26,33,9,123,250,239,235,145,138,155,131,153,167,185,207,200,149,139,151,166,170,174,84,41,20,19,28,44,25,19,31,48,17,40,28,32,44,62,28,24,64,49,40,65,51,58,41,58,38,65,48,56,58,57,57,63,74,53,74,96,63,57,51,58,41,60,28,30,18,41,19,46,37,56,20,19,12,32,52,63,54,52,21,10,16,35,65,48,39,111,130,125,85,40,15,26,115,124,154,145,151,143,104,120,129,120,170,162,154,141,168,250,241,229,235,236,221,239,219,232,214,222,231,230,243,219,232,225,250,231,230,226,216,219,242,239,242,231,228,245,227,204,237,239,218,228,224,218,239,215,229,230,223,225,210,214,232,226,240,234,224,251,236,222,213,210,242,120,16,2,16,11,2,18,21,15,9,31,31,16,21,12,17,9,17,11,10,20,6,18,24,8,208,212,204,226,214,219,216,201,221,209,206,227,202,205,197,213,213,219,190,207,214,216,186,213,215,220,216,200,203,224,215,206,212,203,203,227,191,187,214,189,205,213,204,211,200,228,203,226,193,202,204,210,189,203,189,220,234,218,204,213,212,201,190,213,206,227,234,206,208,212,205,204,217,206,203,224,195,214,202,199,213,200,209,217,194,194,226,212,227,209,212,216,215,228,209,215,196,219,222,230,186,205,223,221,213,219,213,222,225,201,231,228,219,211,221,207,208,192,225,217,211,202,233,231,235,194,216,207,208,227,214,216,231,214,214,225,225,224,221,232,223,237,194,213,239,212,211,205,231,204,214,219,209,204,189,213,229,222,211,208,219,223,219,227,214,221,205,222,207,210,229,217,237,196,213,215,202,226,221,189,218,208,205,220,209,205,220,210,239,206,219,236,196,223,207,214,223,208,242,232,216,199,207,216,216,233,234,210,237,219,207,216,240,239,205,215,240,238,233,207,209,230,197,211,188,231,227,243,216,207,218,220,218,214,239,235,216,221,221,216,214,203,233,228,227,222,229,220,216,234,224,237,231,252,224,218,211,222,210,228,237,225,248,245,245,238,230,240,230,206,239,213,242,220,247,230,245,240,240,200,213,223,254,228,233,232,219,226,245,252,239,234,214,230,249,242,246,241,228,222,223,228,240,243,249,243,232,233,218,220,207,249,242,231,231,243,230,216,221,243,247,217,206,196,232,229,250,193,89,151,216,226,234,204,240,251,238,220,227,197,186,225,236,239,165,154,126,108,74,65,204,246,232,250,222,199,169,248,234,245,216,238,232,241,234,244,216,206,162,149,221,129,139,197,199,181,193,209,200,218,176,181,235,175,177,181,194,219,216,206,224,157,84,113,198,245,151,48,48,78,129,190,142,57,1,4,162,240,161,111,214,193,221,231,215,252,158,154,189,150,183,154,104,107,177,154,216,247,187,187,197,158,101,179,241,231,249,218,222,196,236,205,243,168,184,231,226,211,221,249,141,82,112,44,105,64,30,29,123,134,76,66,18,34,80,135,138,65,41,40,83,137,108,139,107,119,113,31,40,70,64,71,80,136,130,119,152,116,117,113,122,123,112,106,120,128,126,121,141,110,145,109,79,85,66,84,54,55,62,62,36,49,41,24,39,37,96,53,47,52,47,29,47,50,39,32,6,13,12,40,15,174,243,241,237,149,137,140,159,179,191,163,161,155,143,143,150,172,186,133,50,28,25,13,17,24,11,33,46,30,18,30,9,14,16,25,17,18,30,21,4,35,34,46,37,47,63,52,45,65,58,47,74,57,60,79,44,87,70,119,64,96,73,50,88,79,54,39,23,38,37,53,9,18,1,8,47,68,26,29,24,11,31,18,25,53,37,110,100,108,114,64,44,95,166,149,105,97,99,132,146,145,120,120,106,132,119,138,114,99,190,233,233,249,244,227,226,241,209,228,238,247,234,237,228,246,240,237,241,210,215,207,237,234,228,231,236,227,212,239,229,216,245,218,226,246,222,232,223,232,239,221,217,213,230,215,235,235,220,217,204,225,224,218,234,132,5,0,18,10,25,24,23,32,28,17,16,12,22,9,13,22,30,13,6,3,22,12,32,27,212,212,201,195,212,215,232,204,193,193,218,206,214,203,236,197,202,210,186,203,198,207,197,205,213,206,213,212,202,214,198,201,206,198,229,226,189,192,205,198,203,197,198,202,193,203,196,173,210,199,201,208,196,213,177,206,223,218,209,195,207,227,199,211,203,200,234,191,228,210,227,218,222,205,214,196,210,206,211,210,204,213,201,220,201,190,202,212,233,219,197,218,196,192,201,213,244,213,228,211,204,225,231,204,214,234,181,207,217,222,191,226,212,221,222,197,235,210,228,243,219,219,243,220,193,208,215,218,210,200,198,204,191,228,221,219,218,215,217,231,243,213,194,230,206,217,219,225,196,205,220,186,187,210,224,221,195,212,201,217,199,216,219,223,220,208,237,237,225,221,221,200,223,210,230,210,195,214,221,226,204,219,227,228,220,194,244,220,232,238,243,209,228,209,220,215,208,207,229,208,236,241,226,226,219,208,214,200,220,225,242,230,194,217,207,202,224,219,238,201,237,200,210,226,237,190,215,227,225,205,205,232,212,240,229,236,208,246,230,231,217,214,228,223,214,226,223,242,230,233,212,198,233,232,231,195,211,225,230,229,211,203,248,246,219,190,231,242,244,223,248,234,252,229,242,245,251,239,229,240,241,215,245,217,225,237,222,246,239,235,234,232,229,237,225,220,255,249,224,225,253,235,227,222,235,245,232,242,244,236,240,218,222,250,219,235,240,216,210,241,247,249,242,232,201,163,221,128,124,200,207,239,237,185,248,255,211,228,221,202,166,225,251,253,176,154,127,114,72,66,218,236,235,215,203,177,204,255,203,214,205,237,225,209,227,223,196,219,141,144,218,156,150,222,214,194,181,205,202,202,142,202,231,216,205,139,199,207,200,196,188,139,45,164,234,232,151,62,64,80,102,154,161,117,48,4,163,238,179,130,234,206,224,226,221,208,116,209,239,195,158,125,105,112,126,197,206,238,219,200,197,142,121,194,238,213,214,207,187,236,237,241,212,162,173,228,231,224,206,231,159,76,84,96,107,52,20,43,120,124,64,97,44,55,48,140,137,79,28,59,102,95,88,122,97,100,158,93,147,166,183,169,144,112,108,128,106,109,117,124,114,93,123,116,44,87,78,76,61,51,73,39,34,60,42,32,21,39,49,71,34,35,12,24,82,96,83,101,74,36,35,32,40,30,84,24,44,32,22,2,62,219,241,255,208,141,168,146,167,172,192,167,166,130,139,139,181,157,174,111,16,38,55,23,19,27,56,39,27,40,33,22,26,17,36,7,9,18,13,20,4,20,14,16,19,3,46,12,16,31,42,60,49,78,37,81,70,52,85,62,41,84,81,60,103,95,91,53,73,83,77,51,24,24,9,17,27,58,81,36,63,52,52,50,43,39,72,106,120,146,103,55,92,154,152,76,48,28,57,112,88,94,58,59,57,51,59,69,112,97,117,175,238,241,222,237,227,231,254,247,225,240,202,252,240,221,204,241,240,246,241,249,224,240,230,229,233,220,229,235,237,236,235,235,248,239,240,227,212,189,224,234,227,221,223,216,232,219,247,241,246,243,235,216,223,123,2,7,4,5,0,13,6,25,18,13,30,0,17,5,7,13,15,9,9,8,14,14,16,2,203,207,194,204,199,233,205,205,206,223,232,218,218,200,223,222,227,221,214,199,206,200,210,222,216,218,193,197,199,223,187,209,205,200,226,205,239,204,204,207,187,193,209,214,201,184,213,209,202,211,193,199,225,199,225,208,196,232,196,207,217,201,198,227,202,207,215,199,225,192,228,200,226,225,218,214,197,223,206,202,241,210,204,228,232,205,186,211,216,213,205,210,206,209,205,193,218,206,215,198,209,195,200,193,210,217,223,226,199,218,208,216,198,202,230,215,193,228,213,221,223,223,218,200,187,245,234,225,208,232,227,212,217,240,200,217,223,229,214,233,214,195,227,213,202,226,211,207,225,219,222,203,192,206,209,220,213,192,205,237,223,222,206,214,236,217,224,215,193,190,233,230,222,226,198,226,225,205,228,202,211,214,219,207,210,229,209,236,218,213,219,217,218,206,230,209,203,216,213,213,207,210,198,202,200,214,217,202,235,219,214,212,219,216,223,209,185,214,239,213,208,224,214,202,209,213,214,215,212,222,240,200,211,219,194,213,203,224,243,212,203,244,231,234,216,246,207,234,226,222,225,227,242,247,216,216,205,217,253,226,208,243,227,220,232,216,232,230,225,227,233,247,235,243,226,215,228,244,223,233,227,237,234,240,248,231,242,241,244,234,246,245,236,224,219,246,229,252,236,237,233,237,226,248,244,232,234,242,229,231,244,245,229,237,227,243,238,246,221,255,221,208,171,195,161,140,232,172,135,200,224,219,218,203,227,229,226,209,229,180,210,246,251,251,190,148,151,94,59,81,203,244,251,215,203,187,192,241,211,213,241,215,238,221,229,234,215,216,156,176,223,174,222,206,195,197,170,198,196,206,174,195,192,186,215,159,166,217,208,196,162,119,67,206,249,237,157,57,78,112,46,102,125,179,101,51,196,247,162,84,204,218,207,204,202,210,154,194,245,149,123,165,196,125,86,191,234,244,196,222,172,121,114,176,232,221,234,230,196,209,210,230,220,158,227,246,225,231,240,253,118,78,59,61,118,43,14,35,141,116,81,101,100,133,141,136,118,41,37,63,151,115,81,137,98,153,119,102,149,150,135,134,154,122,122,102,114,97,71,88,57,51,43,57,17,13,6,63,77,37,68,24,13,68,91,78,56,17,55,74,68,53,59,80,66,54,71,88,129,81,39,43,46,38,24,39,34,30,29,13,127,239,254,253,190,128,139,159,178,185,154,151,152,124,165,155,167,157,146,55,58,24,18,28,24,47,48,40,42,6,30,21,19,27,37,6,18,10,11,34,19,31,25,37,44,11,2,15,0,13,48,65,56,27,35,26,12,36,54,52,98,101,86,94,74,57,65,67,96,105,77,87,77,69,65,65,32,26,40,48,39,55,47,39,36,23,46,104,147,117,118,28,78,145,115,66,37,44,97,94,66,31,94,76,65,44,22,22,26,51,104,160,237,232,233,245,238,248,255,244,251,226,243,233,251,228,246,228,247,253,235,222,206,232,238,225,216,251,233,221,226,248,230,223,235,249,210,200,231,219,223,192,216,219,222,221,232,242,237,196,232,221,234,236,235,105,0,2,4,11,24,12,17,8,12,1,3,18,9,13,17,27,10,8,18,0,7,5,20,17,182,195,192,220,199,200,214,208,199,218,198,214,202,181,214,208,208,212,204,210,207,202,206,189,208,200,192,220,177,176,213,229,234,226,203,207,231,208,202,184,208,205,209,203,215,217,210,190,192,215,180,196,197,189,190,184,214,207,201,182,195,193,222,198,206,220,216,234,189,202,226,215,187,188,230,223,214,229,234,216,207,214,209,218,202,203,203,215,202,211,205,213,235,216,222,213,211,205,209,236,208,233,195,244,219,213,215,202,203,219,224,198,218,198,216,200,216,201,218,213,218,213,203,183,214,192,224,209,225,217,219,215,208,213,222,190,210,223,221,232,191,196,209,203,224,208,200,232,233,205,202,219,213,219,175,193,221,222,194,203,204,196,209,229,210,206,230,204,197,223,233,222,227,198,212,202,213,210,219,231,225,213,192,197,210,192,197,188,217,208,193,204,210,220,227,207,193,201,224,218,224,213,215,211,217,213,248,195,208,198,219,196,238,214,214,208,210,208,205,228,205,207,216,213,220,219,243,219,200,225,217,236,220,208,231,199,200,225,230,212,210,237,240,224,223,235,238,203,233,213,235,225,250,229,239,206,206,251,236,241,229,205,231,241,207,197,228,222,237,240,248,236,231,212,240,237,215,219,226,242,250,249,235,249,228,229,227,249,234,235,241,249,248,219,240,229,252,245,247,238,246,229,220,249,238,211,235,229,242,236,240,246,215,249,255,248,233,235,249,218,102,75,77,127,177,188,236,143,90,176,236,225,239,222,229,239,218,209,225,202,219,244,239,230,151,164,115,113,61,58,221,236,225,236,206,189,227,229,196,224,186,237,226,217,224,243,230,209,178,188,230,182,193,221,237,207,128,229,202,154,138,136,185,186,211,164,206,193,234,181,135,95,85,236,253,250,150,84,122,105,98,38,70,151,166,168,206,254,175,69,187,184,227,236,204,197,169,214,233,167,144,173,196,66,44,187,240,242,178,202,161,109,110,219,219,230,247,230,208,205,220,255,186,151,224,249,220,247,243,237,133,98,111,107,115,46,18,23,105,127,82,74,95,99,90,70,32,30,47,112,121,99,115,132,116,118,115,150,171,169,122,105,87,85,63,38,20,13,24,10,27,14,39,69,56,12,13,33,80,83,82,12,43,113,108,147,93,63,95,106,97,41,98,127,73,42,13,88,120,112,52,40,33,42,40,26,30,49,15,66,220,238,251,227,149,141,152,132,152,146,153,151,119,133,144,128,128,165,118,65,18,15,31,46,28,23,48,19,11,36,20,18,24,32,30,33,37,38,20,29,0,33,15,22,26,42,33,27,14,5,56,53,51,41,32,0,5,35,12,41,36,24,67,75,71,93,73,99,106,82,79,50,68,77,60,85,65,74,47,69,39,28,10,14,33,72,63,112,109,122,102,89,80,131,94,58,50,72,98,102,107,135,147,153,98,140,62,56,29,31,61,147,221,243,238,243,228,230,230,230,242,233,222,242,239,234,234,237,237,232,220,252,199,249,255,225,237,222,235,233,217,241,232,225,212,219,236,212,239,223,231,222,245,234,203,248,243,215,213,240,226,218,234,232,220,124,7,2,10,1,3,8,15,2,51,3,4,1,19,7,4,10,2,11,16,8,13,48,5,16,196,210,192,196,228,214,205,210,195,205,212,183,229,204,211,218,209,222,197,217,185,189,217,209,206,212,185,206,206,210,229,184,207,201,214,201,178,212,205,198,223,203,227,208,204,213,203,191,207,209,195,214,200,196,195,192,198,203,214,187,213,190,197,228,203,204,188,193,199,193,208,222,162,211,218,223,197,216,209,215,197,211,195,181,179,217,207,212,215,234,188,227,225,211,220,209,210,239,224,217,203,204,216,223,216,204,203,193,219,213,213,196,197,227,215,206,226,212,197,214,230,219,210,211,221,212,212,218,213,233,219,223,215,198,201,223,217,198,236,207,223,201,210,211,221,213,203,197,239,241,212,206,208,215,199,235,222,201,194,214,208,200,220,210,224,219,207,212,220,192,177,202,229,210,209,209,213,227,214,195,225,208,200,212,242,193,236,203,223,212,212,231,225,209,239,242,207,212,192,226,206,246,217,234,206,206,217,221,205,206,231,223,202,221,206,219,216,217,225,226,227,210,221,190,228,238,219,245,215,228,213,197,217,210,215,202,235,182,220,225,226,221,240,226,221,234,198,212,211,219,214,235,247,234,211,207,215,232,217,212,227,232,219,220,221,228,247,199,239,246,228,229,237,237,206,243,212,224,219,226,246,216,225,236,221,230,239,237,236,243,241,227,241,239,225,237,239,238,249,234,240,236,239,227,238,233,227,228,220,234,226,236,232,234,241,239,215,215,166,167,99,58,44,150,236,198,223,132,111,235,219,228,239,205,213,214,205,207,218,193,206,234,244,251,173,130,135,95,50,84,232,225,246,225,217,230,243,255,239,200,229,230,234,214,234,250,180,191,184,194,223,184,193,189,236,195,104,191,141,153,175,160,160,209,227,175,209,192,231,180,187,102,83,229,209,253,169,59,82,113,81,47,4,79,117,196,241,222,190,67,204,203,236,209,178,189,183,189,183,150,141,240,186,66,119,154,234,223,210,214,159,93,167,224,244,254,231,245,214,202,202,228,175,189,243,240,219,234,246,252,172,78,83,94,140,63,41,22,69,106,113,61,18,47,62,39,47,71,127,150,133,73,69,117,97,116,90,86,63,19,42,12,18,22,24,63,96,53,37,15,13,20,72,95,82,99,87,56,122,110,93,9,73,103,123,109,118,159,112,105,128,36,45,140,90,63,36,98,120,66,36,19,18,25,42,29,20,47,39,82,239,255,247,252,191,155,149,168,176,148,154,155,126,133,171,175,173,175,105,44,51,25,24,31,36,33,5,30,13,51,35,17,27,25,40,31,22,12,15,10,30,30,37,39,13,8,29,20,16,9,45,61,61,38,23,43,41,73,82,48,67,64,51,38,14,15,24,58,60,77,96,105,83,76,82,75,79,80,70,69,96,57,43,72,49,75,41,91,120,130,101,89,110,92,76,58,48,102,98,115,113,83,71,48,107,145,86,61,54,37,68,107,217,235,244,249,247,244,213,237,239,229,235,227,229,210,241,240,235,249,251,230,223,251,242,233,230,220,236,226,239,253,215,226,235,225,226,243,215,230,200,200,226,226,221,237,233,233,243,231,227,232,231,209,214,112,8,25,6,19,12,22,13,17,22,3,34,12,16,7,15,10,4,12,2,9,3,30,19,2,218,206,211,181,234,194,211,206,209,207,202,208,199,209,196,197,210,212,198,210,192,193,199,206,194,205,204,221,213,230,227,191,205,172,223,210,190,184,207,197,213,219,181,184,180,221,209,211,228,215,180,195,182,218,223,193,189,209,222,200,198,192,218,189,201,203,186,197,226,227,199,213,213,206,192,228,209,214,178,207,205,213,215,193,217,202,214,216,178,203,220,212,222,197,183,203,227,218,209,214,193,210,215,241,200,204,191,215,233,218,228,219,201,207,211,207,227,203,216,205,216,195,212,207,210,212,209,221,213,212,223,203,200,225,210,223,214,206,214,228,212,212,208,241,221,215,244,223,218,186,207,194,208,189,213,207,191,210,216,237,214,230,209,220,205,207,213,208,215,223,221,234,215,205,215,231,233,207,229,234,230,197,241,221,219,210,236,208,208,215,210,197,203,201,200,235,214,231,212,211,214,221,194,220,204,220,231,230,222,216,231,215,202,222,241,216,228,216,223,212,249,231,214,217,218,221,216,214,214,216,236,223,197,239,237,213,246,236,226,219,209,204,204,225,236,243,208,203,211,213,249,247,231,224,229,174,222,246,209,208,217,228,229,191,228,213,225,230,238,250,214,224,232,220,228,211,238,213,220,241,221,235,245,226,242,236,235,247,232,241,215,246,236,239,209,236,250,237,235,237,236,242,222,246,227,223,251,236,234,255,242,240,244,235,213,193,176,182,205,219,161,98,118,224,203,187,223,129,125,212,249,223,205,204,228,223,193,181,236,164,206,251,230,240,202,162,137,104,49,79,218,232,226,228,178,222,255,243,233,214,202,216,193,223,218,228,204,190,132,148,214,191,176,177,189,149,149,177,190,185,228,182,187,192,240,185,182,200,222,181,174,107,70,242,227,255,188,51,87,132,127,38,24,63,48,144,223,236,143,84,199,205,238,217,142,182,205,160,118,158,156,226,156,78,112,151,239,217,206,249,174,88,151,254,225,219,229,214,207,216,236,216,185,185,224,215,253,238,250,255,205,97,99,96,136,118,84,13,43,92,132,80,65,34,42,64,92,108,83,141,56,28,125,123,109,114,51,31,19,24,23,21,34,4,49,96,84,65,55,20,27,16,74,126,114,112,124,101,105,99,32,35,70,117,99,82,89,122,135,129,104,49,29,63,91,58,103,108,85,75,47,47,17,51,47,42,52,44,25,164,250,245,248,233,180,149,173,152,163,185,188,150,169,157,153,158,170,154,92,15,36,37,17,36,30,17,13,45,31,27,32,30,19,17,9,27,32,2,8,16,36,21,41,34,43,30,26,24,19,25,67,81,41,56,27,26,74,73,48,54,54,54,37,4,55,29,22,45,36,42,20,45,41,48,62,92,64,83,79,57,41,62,93,87,99,89,85,123,132,121,113,45,85,134,89,62,42,74,149,135,90,56,55,25,46,113,128,98,43,76,52,79,219,230,226,240,205,229,252,245,242,241,248,236,227,233,215,219,223,239,214,240,229,226,225,236,224,233,241,233,245,233,234,220,218,229,217,211,226,234,220,222,229,248,248,223,207,227,218,228,234,233,234,231,203,100,1,3,0,10,22,16,38,18,7,12,23,17,4,1,0,16,10,14,7,2,13,24,3,11,201,209,179,211,199,183,197,177,209,233,200,204,182,195,207,217,212,198,179,201,201,198,199,216,217,202,204,204,212,216,200,208,211,214,184,189,203,214,227,191,205,206,188,207,174,200,195,219,202,227,215,233,195,197,212,198,215,227,207,221,207,210,178,185,219,201,209,219,212,185,208,203,187,214,220,196,208,230,191,175,212,204,216,200,211,207,208,189,212,209,203,194,209,203,224,194,206,209,204,195,218,176,214,213,200,205,208,232,209,220,199,217,222,229,206,215,196,212,201,196,209,206,223,210,205,232,223,226,230,220,247,208,212,217,204,222,211,224,211,215,212,221,199,221,242,193,196,215,203,212,177,200,213,207,206,222,205,196,215,181,216,212,213,226,224,229,205,217,227,201,180,202,189,216,211,213,222,225,228,214,218,209,232,196,233,238,216,220,213,204,214,205,211,231,224,204,218,205,220,193,233,177,212,236,199,191,230,214,192,221,212,207,236,233,211,237,221,222,224,237,217,222,213,228,199,197,225,221,218,180,227,217,226,224,245,231,212,223,238,249,216,213,223,207,198,230,225,220,218,230,218,222,214,203,193,218,225,216,215,234,193,226,244,215,219,231,238,222,242,223,226,237,226,231,244,230,240,228,232,225,247,250,232,210,245,234,247,234,238,248,224,224,238,233,239,239,235,230,234,241,222,250,212,245,239,245,239,244,240,241,234,196,176,170,176,204,206,211,244,249,204,108,175,232,207,247,221,116,107,212,228,238,193,187,236,253,225,186,228,193,222,237,198,249,208,187,145,112,57,57,200,228,247,207,147,165,248,238,231,203,219,229,210,214,222,233,220,218,173,131,172,174,190,179,151,156,195,193,182,207,209,194,178,229,219,177,190,181,197,163,183,83,89,234,220,242,163,49,71,126,99,53,41,39,11,102,196,245,137,54,191,201,243,175,111,176,184,84,133,187,209,192,100,93,122,127,213,179,215,230,167,128,168,228,241,239,241,239,234,219,209,240,169,221,222,239,229,242,254,238,188,74,46,108,135,132,84,46,24,51,114,148,144,152,119,120,152,161,152,119,34,31,111,122,105,131,51,9,15,19,6,18,49,35,106,133,121,137,67,41,25,22,49,106,101,102,89,126,88,108,78,37,63,122,101,36,44,59,87,102,80,74,36,18,89,129,115,98,75,64,34,48,44,77,68,48,94,49,88,250,249,231,235,247,202,153,139,154,171,220,199,151,127,149,166,160,152,119,84,36,44,43,23,38,36,26,18,22,36,38,11,11,35,14,46,46,29,15,16,4,21,26,14,19,35,23,16,14,9,37,84,80,46,6,30,38,27,60,87,45,73,49,36,23,44,19,53,79,107,65,70,54,46,29,17,47,47,60,60,76,82,73,58,91,91,81,108,101,105,147,96,95,91,122,80,45,57,48,111,123,115,72,64,113,95,102,140,90,52,52,40,77,200,235,223,239,233,237,234,213,239,246,249,241,219,237,230,240,233,208,227,219,249,225,245,234,217,224,218,206,230,232,227,208,239,206,229,210,226,224,228,227,199,220,222,244,232,212,234,203,222,226,238,236,204,118,5,18,26,22,16,11,6,11,36,7,19,21,1,6,31,1,8,21,1,6,7,1,27,26,198,195,214,187,223,206,192,201,201,202,198,207,210,207,210,214,209,195,217,231,225,221,195,194,189,185,209,205,224,213,205,217,190,185,216,199,223,217,205,200,200,201,220,198,222,204,203,216,201,206,211,208,205,203,212,212,194,190,192,207,184,197,201,209,203,184,206,199,229,214,224,207,198,204,190,214,222,227,194,217,194,199,194,203,236,215,204,214,215,232,200,212,207,215,214,207,207,210,194,209,203,211,215,220,206,208,211,188,202,201,221,210,195,211,214,215,196,214,223,209,212,187,219,185,204,218,225,217,209,202,229,214,205,199,221,199,229,192,215,208,201,222,210,187,233,220,195,211,214,198,188,209,196,190,198,196,200,215,218,206,186,222,208,221,203,222,198,226,203,208,204,228,241,221,222,213,211,221,224,214,184,206,194,189,212,222,229,196,209,205,227,242,224,210,225,205,190,227,208,175,205,214,209,213,222,199,192,206,200,245,215,226,218,222,230,226,222,226,195,200,242,230,220,226,229,207,225,236,204,211,212,217,239,196,216,218,208,239,238,200,221,242,211,225,210,200,207,199,224,202,196,197,215,220,195,210,239,211,215,228,235,224,219,208,245,224,216,243,215,225,221,225,224,222,225,244,225,236,230,241,245,225,229,231,209,225,221,235,233,232,228,209,232,232,247,226,246,240,238,237,230,231,239,248,243,247,230,246,224,200,196,196,187,197,193,218,237,251,230,247,177,85,159,237,217,233,183,89,115,235,222,240,208,205,245,246,206,205,243,186,227,248,253,254,182,142,122,102,62,73,216,237,251,122,34,59,135,243,229,213,235,224,206,232,227,219,226,184,153,148,230,175,176,225,162,167,210,180,192,211,195,150,110,219,186,190,168,211,226,166,196,94,81,240,243,254,174,48,79,106,111,51,57,39,17,47,164,229,153,117,201,205,243,160,146,236,175,171,179,206,220,138,98,177,191,129,147,177,230,217,137,99,167,210,238,245,229,216,218,217,240,213,187,222,227,221,247,253,251,254,116,55,35,47,127,153,144,77,41,36,47,109,137,148,138,119,122,143,120,76,5,37,118,92,136,86,33,38,18,20,24,27,43,78,114,101,100,120,102,89,32,35,75,100,73,38,1,53,90,95,64,49,35,106,85,73,30,15,55,120,129,112,62,59,122,140,129,95,105,96,104,96,153,156,136,145,138,66,150,224,241,253,251,228,187,138,151,203,212,245,205,155,135,132,160,190,170,116,96,77,78,93,63,84,49,59,47,66,66,63,45,54,76,70,47,23,40,43,26,4,29,49,17,33,27,37,47,17,18,29,56,66,31,30,33,54,75,43,28,74,58,53,49,34,35,49,78,152,103,98,95,69,46,32,12,34,99,55,61,55,42,37,77,77,102,76,98,81,108,150,101,83,98,79,101,74,49,24,86,126,180,159,148,92,78,132,121,104,60,34,53,91,205,215,220,236,233,233,242,226,245,221,235,242,216,237,231,219,211,239,244,239,229,244,218,236,237,203,226,219,228,243,221,235,218,226,225,243,234,228,236,238,215,211,248,205,212,227,227,202,212,228,220,223,228,122,26,12,0,17,6,9,16,17,7,30,12,14,12,0,0,18,21,0,7,12,15,30,17,3,192,189,210,199,209,176,211,210,215,194,213,230,203,212,216,180,196,194,205,185,187,229,200,200,220,197,223,204,198,192,181,215,218,206,198,206,197,191,205,208,220,210,196,193,204,203,164,205,209,207,192,214,217,213,236,209,213,194,211,187,219,184,214,224,210,209,187,214,213,183,189,217,190,185,214,189,211,190,194,191,208,196,183,195,194,185,195,189,211,214,231,198,201,206,185,211,193,227,227,191,203,207,190,207,228,218,201,231,223,181,224,185,225,217,197,201,227,194,217,213,194,210,214,234,217,188,226,209,212,222,190,218,208,221,180,197,202,224,205,200,213,203,214,206,214,216,195,198,207,212,212,197,226,207,207,208,226,211,217,189,230,205,204,194,194,190,188,219,203,221,211,209,239,219,211,199,203,218,197,218,206,224,215,211,188,199,233,221,216,182,189,213,212,211,209,225,240,220,199,236,214,224,180,231,198,193,204,216,214,229,203,215,204,232,233,213,205,185,201,205,214,206,207,221,242,205,216,190,222,239,244,220,199,227,210,215,217,225,218,225,237,220,215,213,209,240,227,217,231,215,209,232,214,214,177,217,228,219,219,202,238,209,223,235,225,227,244,231,212,208,217,242,230,234,218,213,221,231,228,211,224,223,223,217,227,249,205,221,237,207,252,246,223,238,235,225,244,235,211,237,216,249,233,255,247,191,185,165,155,185,183,207,225,243,238,240,228,241,224,250,159,73,175,227,176,167,146,87,151,239,244,244,248,218,248,251,204,225,246,203,213,204,196,189,142,155,154,128,85,115,205,237,197,44,5,4,132,224,225,210,223,209,218,228,238,230,192,167,155,177,182,203,190,197,198,187,216,156,167,193,186,158,140,228,183,172,204,238,238,194,188,88,125,244,239,235,187,67,70,124,132,41,47,84,22,65,206,237,176,157,223,215,240,189,160,246,229,173,141,147,209,147,92,228,235,147,126,181,253,197,83,110,197,240,219,253,226,248,238,228,243,195,164,224,243,250,244,249,239,128,41,1,9,38,79,113,166,139,69,65,35,33,80,140,100,53,54,136,138,86,27,70,120,110,140,100,35,24,15,23,57,33,87,113,99,49,75,105,128,97,43,23,64,103,64,59,0,30,64,83,69,84,110,138,156,120,134,123,152,143,177,138,133,165,132,126,102,126,131,113,125,117,127,140,116,88,112,78,182,249,245,252,251,221,179,150,163,200,253,226,177,104,110,131,171,224,152,128,92,96,65,31,58,55,59,41,52,58,68,57,52,64,67,58,67,46,34,67,47,42,72,69,34,64,59,35,38,29,47,71,61,69,37,11,40,82,67,65,80,50,58,52,6,53,67,54,72,104,92,77,40,20,24,41,35,79,105,84,43,20,17,15,34,20,59,67,64,92,104,112,93,84,51,109,146,120,69,58,21,69,70,84,85,51,95,139,129,67,74,49,47,102,208,227,218,248,238,238,235,247,237,245,219,219,248,201,238,231,238,218,233,221,221,229,222,235,247,232,243,226,244,220,244,214,223,241,202,217,248,231,208,211,223,227,217,211,227,214,213,218,205,215,225,232,223,131,4,9,26,0,1,11,19,16,0,10,16,11,9,7,14,30,8,30,0,2,4,36,0,15,209,185,200,185,219,193,222,198,191,195,195,182,205,197,188,195,202,221,222,201,230,213,183,191,224,200,207,201,197,206,179,199,208,216,204,221,199,185,197,198,218,180,214,202,215,230,234,170,193,200,222,213,204,206,208,199,215,197,223,197,228,193,213,182,195,198,216,211,191,229,189,212,211,185,218,221,205,200,193,193,226,182,211,190,173,224,192,202,189,214,219,198,198,188,206,183,191,229,208,189,221,204,200,169,211,199,194,227,210,232,183,205,189,201,199,200,201,214,220,224,217,193,223,211,185,235,209,207,197,176,205,207,212,195,218,214,205,209,206,206,218,207,172,232,212,204,213,213,210,215,197,205,200,186,170,186,229,198,200,215,207,207,205,231,190,198,206,197,215,194,218,215,186,203,211,197,193,199,208,213,219,230,213,196,229,203,194,210,191,201,212,208,201,216,205,218,210,235,180,226,208,200,211,207,225,224,215,205,204,194,196,220,219,183,210,220,217,197,225,201,214,213,233,214,233,211,192,201,217,234,221,204,213,204,208,210,217,221,210,220,217,220,202,198,209,198,224,196,212,204,204,212,202,194,207,217,228,210,203,232,232,209,219,211,224,207,230,230,224,228,221,230,233,214,213,228,231,213,226,228,233,223,226,211,230,198,239,235,245,227,211,243,228,228,233,245,228,238,231,227,234,219,197,207,183,178,182,199,194,216,221,238,248,242,230,238,208,250,243,213,183,84,159,225,177,160,98,90,131,207,200,239,182,172,223,208,193,207,226,191,132,97,141,121,139,154,161,187,147,148,170,160,139,65,12,14,106,232,254,213,239,228,244,232,230,205,224,197,181,178,182,202,214,204,147,147,162,108,188,197,208,190,188,251,210,228,243,248,240,202,183,59,94,235,247,253,176,59,105,122,108,67,41,43,43,100,200,252,201,156,226,231,254,192,202,247,231,179,85,170,255,120,49,198,255,188,128,159,248,198,53,137,214,241,243,241,210,229,239,246,229,171,190,247,240,246,252,242,160,44,5,28,13,35,32,70,116,109,151,104,87,22,13,76,107,46,9,41,92,80,18,80,131,127,160,75,20,41,24,49,43,55,103,112,120,73,51,83,38,115,88,92,115,97,73,57,91,128,133,145,154,157,172,180,150,141,148,145,153,123,108,141,115,131,112,96,137,117,114,102,104,106,101,95,87,93,63,80,213,241,244,245,250,213,144,188,210,237,244,201,164,113,133,120,184,233,147,81,81,75,65,77,54,79,46,77,61,78,73,85,54,78,69,59,68,73,61,61,48,51,48,65,69,60,53,53,77,59,67,76,80,75,62,41,31,45,85,68,109,71,53,24,7,49,69,79,56,98,55,31,7,27,15,31,28,65,70,72,63,25,37,1,12,45,40,10,25,34,43,96,109,71,74,48,106,140,115,71,61,37,50,60,49,110,107,147,117,67,45,68,78,119,234,234,244,223,241,238,238,232,211,247,240,220,187,223,220,241,234,224,228,216,234,236,233,244,208,245,223,233,217,212,216,240,223,238,222,229,228,240,225,230,230,204,207,225,216,217,225,229,247,221,227,229,244,112,11,14,41,2,13,7,17,6,6,10,24,22,14,7,6,0,7,8,9,5,14,0,8,1,209,192,199,194,199,213,224,207,179,183,211,216,191,212,226,185,222,198,207,190,197,205,177,194,192,209,190,210,202,212,204,194,220,223,205,209,225,197,217,204,199,186,177,197,203,196,205,203,220,229,206,211,201,186,184,215,207,201,205,195,195,193,193,212,194,214,189,212,205,183,222,194,204,188,189,198,225,185,225,212,213,209,211,229,211,202,190,203,205,214,198,180,204,234,191,186,200,211,219,197,219,204,229,208,201,210,206,205,186,205,209,215,208,190,194,227,198,217,198,201,211,198,225,207,231,207,203,207,190,203,209,215,185,196,193,227,209,184,222,225,196,209,201,228,207,212,203,227,211,204,184,196,196,215,231,177,189,203,199,204,210,187,200,191,200,184,195,194,214,203,210,188,202,210,206,188,212,199,210,211,220,180,201,200,224,214,227,203,206,201,217,211,215,220,207,196,210,223,228,224,223,223,210,194,208,212,211,239,203,204,233,231,217,204,222,215,215,220,217,205,238,211,223,234,213,222,207,208,227,202,235,213,222,213,210,200,204,231,216,204,196,218,213,215,182,233,194,225,234,195,217,218,219,205,197,223,205,201,224,215,212,208,202,223,236,225,238,221,241,234,220,222,228,219,214,223,235,205,237,227,231,237,228,251,233,200,240,218,248,232,242,209,218,246,207,227,234,227,232,214,208,164,170,186,204,234,213,229,255,236,248,215,223,229,238,240,209,220,222,230,174,93,148,235,202,170,127,95,111,146,129,149,129,125,164,143,151,196,200,128,89,97,131,124,154,172,158,172,169,164,147,175,149,93,34,46,159,241,255,220,255,210,234,221,238,222,225,211,178,218,176,177,149,157,156,134,194,153,206,253,225,247,199,235,194,197,167,154,127,90,107,19,53,194,255,234,146,69,70,128,89,72,42,74,10,77,238,241,240,161,181,253,226,185,191,248,216,131,142,232,238,129,91,236,235,233,136,97,153,172,91,140,233,225,238,249,233,230,232,248,218,136,182,228,243,239,254,187,61,0,9,6,19,68,57,20,40,105,118,158,94,41,45,55,104,102,34,27,41,54,45,112,134,109,127,49,41,48,19,27,58,57,108,97,91,52,38,49,77,134,124,157,120,120,165,159,162,152,139,148,132,102,164,140,122,119,111,103,104,53,83,33,56,81,70,66,53,46,62,71,33,19,62,55,46,47,20,50,240,248,247,248,251,223,168,192,243,255,247,165,138,140,145,163,219,211,109,68,15,23,19,52,20,24,37,21,45,54,64,58,48,55,56,58,82,74,95,42,78,67,62,50,67,31,63,76,50,53,83,72,66,82,52,95,67,65,82,80,67,58,30,26,31,46,93,70,108,98,78,53,51,21,27,35,46,92,83,62,41,28,28,35,16,15,5,17,38,4,61,88,105,88,49,34,68,127,136,122,110,108,76,72,116,140,104,98,56,32,48,62,92,207,230,241,240,225,207,226,229,213,223,236,236,248,209,239,228,249,229,247,247,235,222,211,226,225,226,224,230,239,208,238,240,221,231,239,229,219,204,209,225,224,226,214,233,226,215,211,220,235,213,199,208,231,228,123,18,18,1,11,28,3,5,32,17,8,24,9,15,3,2,0,0,18,5,11,10,27,8,16,177,208,201,164,186,172,195,202,196,211,203,214,171,173,186,176,200,190,202,208,196,218,194,190,205,204,187,204,213,157,201,182,192,207,196,202,179,192,207,211,205,199,197,208,221,198,180,223,189,176,196,185,218,204,197,170,202,202,192,198,197,180,206,205,196,210,214,183,208,205,199,195,188,178,210,210,192,202,216,198,204,204,190,207,196,192,226,204,193,188,203,160,216,215,216,185,186,196,199,229,208,199,194,189,219,215,201,194,196,219,207,187,179,215,181,193,199,204,214,204,198,196,233,232,200,212,237,212,221,216,211,203,199,216,236,197,225,201,205,220,226,205,215,211,215,209,208,192,204,202,198,217,220,207,193,227,207,199,232,217,187,200,214,191,202,199,183,206,221,164,199,221,213,219,194,213,231,213,213,197,199,211,203,193,192,210,238,215,205,197,200,205,226,231,193,224,201,207,214,223,210,209,242,215,184,229,208,223,203,215,207,239,231,221,214,221,193,198,210,229,198,213,198,203,225,210,222,208,197,218,228,220,214,235,208,198,198,224,248,213,214,219,210,198,239,224,205,215,197,202,217,219,189,214,215,237,193,204,210,234,225,215,197,219,218,207,229,209,226,228,222,228,213,233,219,226,239,229,235,210,202,229,249,221,236,237,235,231,208,215,232,237,248,253,239,214,210,207,194,171,183,196,217,205,231,231,243,243,226,249,227,237,216,215,195,231,230,221,234,223,180,69,160,233,220,199,63,91,115,176,155,170,134,120,133,156,119,137,164,86,118,103,99,92,131,127,134,162,124,154,161,121,145,88,95,94,131,197,223,214,246,246,229,218,212,191,232,153,137,212,158,218,156,219,226,180,222,234,238,252,225,238,152,147,98,84,41,17,3,18,39,36,24,89,128,124,75,52,36,105,97,79,47,44,25,73,157,223,136,67,127,206,178,173,142,134,110,68,125,224,197,70,94,195,225,192,156,43,83,143,123,231,241,219,241,220,225,226,241,231,243,158,229,246,241,252,219,124,1,0,19,9,40,46,33,19,23,49,95,101,137,104,40,18,94,99,89,38,55,42,11,123,125,108,142,53,23,9,24,26,53,109,138,125,108,112,116,117,153,134,139,128,151,118,159,143,114,147,118,105,121,121,109,114,76,79,53,64,53,28,35,26,21,19,56,49,42,31,43,12,16,53,29,49,48,55,8,92,212,250,255,253,252,165,144,211,238,202,192,137,114,120,122,156,223,168,74,6,11,22,44,11,51,38,17,42,47,37,34,16,4,38,19,22,14,40,45,15,43,59,42,56,39,52,44,50,46,66,36,57,55,54,71,53,82,83,61,80,69,74,67,122,44,83,121,104,112,77,38,44,12,37,16,37,64,108,96,55,8,43,33,29,41,19,50,18,40,17,33,74,102,89,73,65,41,41,112,122,180,167,166,152,147,116,96,82,37,36,72,67,162,234,243,247,234,228,235,239,237,233,214,205,229,213,242,233,230,225,215,244,221,223,221,239,237,231,232,238,217,226,233,231,212,221,248,236,245,230,238,222,239,212,224,234,212,235,239,215,192,244,224,222,238,202,216,132,0,16,6,0,7,15,16,15,5,41,29,19,5,0,8,34,1,20,38,13,22,0,6,12,197,192,189,183,208,166,202,189,180,187,192,201,184,190,194,196,202,204,194,212,192,204,198,190,193,190,188,182,207,188,200,191,209,199,196,209,212,202,185,191,217,195,170,207,231,204,205,191,155,189,198,226,187,210,212,202,191,199,199,237,201,195,168,204,180,201,211,209,200,189,184,217,188,233,204,214,203,210,201,200,223,205,209,183,188,230,174,220,206,214,206,202,209,197,195,171,217,208,208,189,220,194,209,216,217,212,204,216,208,224,219,210,192,195,206,186,186,193,188,220,180,194,212,189,199,217,213,191,194,198,221,195,188,203,216,205,214,187,223,212,208,204,216,214,199,193,225,222,228,227,206,213,206,206,205,215,202,180,205,215,210,220,193,208,203,190,218,235,211,205,217,194,218,202,217,204,178,214,187,226,181,205,213,229,196,226,197,226,212,203,220,192,222,238,217,184,230,216,207,221,216,203,192,224,226,201,226,207,213,198,211,198,194,218,219,206,182,209,224,224,204,200,220,226,212,198,192,200,212,224,212,209,210,195,226,208,209,211,226,196,231,216,204,233,219,227,208,228,206,221,206,214,202,200,196,205,200,219,240,225,177,216,186,231,237,222,208,181,224,215,228,242,209,222,235,210,213,233,227,231,226,206,219,217,205,212,222,234,230,217,231,225,234,217,195,188,156,199,199,167,223,229,246,228,222,224,226,220,232,233,213,225,230,206,217,220,214,227,222,235,200,96,109,224,190,172,36,65,101,162,181,194,162,162,163,146,125,161,147,104,94,96,64,70,54,85,75,93,90,92,101,93,78,81,84,71,97,188,160,200,220,213,251,240,226,187,237,163,163,196,144,212,200,248,247,203,191,150,122,116,93,93,47,25,26,52,108,131,109,147,119,86,143,162,169,183,103,57,52,91,97,62,70,50,22,21,110,155,146,83,82,148,151,126,123,30,76,62,130,159,115,93,126,137,179,183,110,48,39,69,126,228,246,216,240,223,243,226,239,247,206,153,237,252,234,240,140,32,5,25,26,32,54,34,24,13,35,65,62,91,73,114,34,40,36,91,152,120,103,72,83,170,118,117,124,30,59,27,85,127,147,159,124,118,98,112,128,129,119,116,135,153,129,155,143,124,96,88,83,40,64,36,22,8,55,35,37,69,14,46,24,22,58,43,62,53,37,40,15,38,20,63,44,60,92,89,7,182,245,237,241,225,154,104,156,233,249,229,134,130,104,109,122,205,209,125,39,21,29,15,54,34,13,17,39,47,51,59,49,26,22,51,41,35,32,51,67,25,29,24,34,25,30,24,26,44,25,39,26,43,60,63,36,84,62,79,63,56,62,80,94,122,104,107,104,102,90,72,56,48,52,32,41,36,77,84,84,76,27,38,34,45,44,46,31,32,18,24,64,97,93,127,89,31,32,120,141,101,95,81,137,142,62,75,55,64,37,73,56,156,234,237,247,247,220,234,238,209,221,228,223,246,235,234,231,221,236,214,219,245,220,234,216,234,244,246,234,240,219,217,247,241,229,222,215,200,233,246,221,225,217,208,229,196,225,230,212,231,212,209,237,231,223,217,228,115,15,8,26,41,3,25,37,28,2,22,23,6,9,0,3,5,27,21,21,12,29,16,8,40,193,177,172,192,178,210,185,170,191,206,192,182,217,210,198,216,204,198,200,194,189,198,221,203,197,192,195,178,202,192,196,187,204,185,221,182,200,189,190,204,191,202,209,187,213,190,202,205,191,193,202,193,194,197,181,209,198,185,212,188,182,198,199,193,233,197,198,212,202,192,185,198,194,210,192,222,199,199,184,205,204,210,209,212,191,195,187,207,188,190,221,202,206,202,206,212,188,198,210,185,212,191,208,194,203,197,182,187,223,207,197,187,222,217,190,201,218,189,196,201,237,208,186,207,204,197,190,177,197,232,200,202,188,225,195,206,184,221,205,196,209,190,218,172,169,215,173,216,197,219,206,193,191,219,188,198,202,213,211,193,214,169,215,193,214,199,210,192,189,203,228,204,200,197,213,191,216,235,207,208,215,211,201,204,190,232,197,214,203,206,209,206,191,190,228,186,210,197,196,202,216,176,214,209,198,223,187,201,199,206,231,217,206,189,216,190,212,203,211,210,224,186,223,225,205,200,207,201,211,200,225,204,207,241,202,232,202,218,210,196,221,219,188,224,196,243,224,204,204,230,187,213,188,210,225,206,193,224,205,222,211,228,229,223,212,214,218,198,222,221,215,224,229,233,220,234,207,243,227,215,211,210,218,238,237,226,213,207,224,209,192,184,182,184,172,193,214,201,220,227,242,236,224,229,229,238,237,209,222,232,224,206,216,237,222,224,214,215,226,233,200,58,123,184,233,154,54,102,109,190,163,174,166,156,201,180,144,168,172,91,120,96,84,86,71,55,34,63,69,48,59,55,89,76,75,40,72,151,176,137,185,196,226,219,218,219,242,172,189,235,143,151,168,209,175,57,40,30,28,20,4,38,44,55,67,76,118,135,190,170,143,156,151,179,223,211,111,58,62,107,98,40,28,34,28,80,204,245,181,123,188,206,174,197,177,149,170,162,218,185,125,192,211,204,202,192,129,74,67,53,99,231,248,213,247,242,228,235,230,251,198,147,243,248,241,171,22,12,37,25,16,54,35,36,33,25,53,34,94,97,97,109,98,32,41,33,86,114,156,158,157,160,107,113,135,121,171,178,171,194,137,130,112,134,140,146,137,120,151,108,90,91,67,59,38,30,26,42,17,51,35,47,35,30,17,45,62,92,66,65,74,29,41,59,89,27,28,49,24,41,41,55,60,33,69,70,99,252,246,240,242,194,149,156,208,250,247,201,141,136,130,116,149,230,206,98,13,9,30,30,37,44,35,30,17,31,43,14,42,34,25,70,15,30,47,75,22,28,33,31,52,17,3,19,21,10,12,20,24,35,29,26,31,14,53,34,63,61,80,69,69,84,64,83,97,92,93,93,103,105,81,78,90,142,110,107,87,62,42,15,20,42,37,35,25,32,29,27,69,76,99,107,70,24,82,135,77,63,18,44,76,99,77,42,47,58,11,53,148,235,227,251,231,233,203,239,239,237,238,223,242,228,203,221,216,238,232,230,245,255,218,223,237,226,229,219,222,223,229,212,215,220,218,209,240,215,229,232,197,238,211,237,239,240,178,253,240,229,211,223,212,213,213,209,208,131,4,3,17,7,12,23,5,7,28,19,5,7,2,0,14,15,32,37,21,0,23,0,21,28,204,210,166,195,207,192,188,185,200,193,202,190,194,193,211,221,227,199,188,180,200,192,201,212,201,193,205,220,173,211,220,205,199,208,215,192,228,189,228,201,201,226,203,177,211,180,198,197,191,190,212,194,184,201,211,187,216,183,205,218,191,205,220,213,222,204,200,214,181,192,184,187,205,213,192,200,208,197,207,194,203,166,196,213,195,224,213,196,224,191,191,187,197,177,217,201,208,201,215,201,217,210,187,186,208,199,216,175,217,227,222,206,185,186,202,189,187,224,207,215,199,226,216,225,214,188,217,202,174,201,224,194,207,216,199,236,206,201,231,197,207,179,195,214,182,198,205,196,203,201,221,196,218,209,213,196,211,207,203,197,196,190,204,173,210,208,208,180,202,227,205,228,221,206,203,203,201,229,213,205,202,197,217,211,203,203,210,216,210,218,226,221,206,199,208,202,232,207,190,209,209,202,192,196,193,202,218,216,221,195,220,200,201,217,193,188,208,211,199,195,208,221,204,201,191,206,223,224,207,198,201,201,207,199,221,199,242,200,197,214,222,194,220,214,220,200,206,212,215,205,234,203,218,207,203,209,199,214,220,195,206,238,224,193,184,226,218,209,218,232,217,206,232,221,216,209,221,211,234,229,218,238,219,228,218,247,224,176,189,204,153,180,203,186,210,206,230,235,234,228,225,210,224,218,224,225,213,208,226,236,206,231,220,224,195,222,221,244,210,220,209,40,109,228,225,218,122,156,149,160,160,150,132,192,199,153,145,122,128,116,122,96,73,63,59,66,52,63,69,66,63,62,75,97,85,51,85,161,141,83,82,121,182,224,215,229,243,166,204,241,125,69,14,31,55,0,24,55,118,124,146,161,212,203,203,170,159,118,130,133,113,110,87,118,150,169,117,58,49,131,147,61,47,61,41,67,138,194,111,50,134,148,133,168,128,160,170,116,126,123,89,133,102,117,150,117,153,115,66,66,146,234,239,251,247,253,214,243,245,254,189,178,236,246,209,39,34,1,9,16,59,40,5,25,20,45,59,45,81,122,99,106,93,57,65,34,27,45,53,73,110,154,87,143,153,119,161,145,142,139,116,158,118,120,97,73,49,46,64,37,18,37,57,48,25,17,17,12,24,53,34,57,83,63,54,42,74,113,46,78,79,83,58,72,66,49,25,33,41,64,66,51,63,58,64,48,154,237,247,254,241,188,163,149,241,246,254,133,123,96,131,129,169,235,151,86,91,67,82,64,48,56,27,44,39,14,59,68,55,39,68,57,30,42,42,42,32,50,69,62,45,13,18,27,15,17,10,46,36,55,62,22,28,9,26,26,37,23,42,18,33,54,54,57,97,97,91,58,58,103,86,93,100,111,116,93,84,68,43,59,57,26,10,41,51,33,31,30,63,81,78,105,59,55,41,71,60,41,50,96,91,64,66,79,60,21,112,234,251,245,250,247,238,230,227,230,224,226,216,235,235,246,229,236,225,237,221,239,245,233,218,234,224,234,228,216,230,224,229,201,237,209,209,251,201,209,229,229,216,233,233,220,233,219,234,232,234,238,222,240,226,222,217,221,222,129,6,1,8,3,12,7,0,4,27,5,19,11,9,0,25,34,2,19,15,22,14,5,19,23,199,182,207,200,180,177,155,197,185,187,186,208,216,205,203,188,199,222,189,198,189,216,206,179,192,197,208,215,199,201,177,204,193,189,205,206,207,189,205,220,179,191,176,226,195,216,183,198,215,199,198,208,231,199,184,205,196,198,216,200,178,217,199,227,205,208,198,200,186,197,189,201,196,219,203,195,205,193,183,207,208,209,204,204,215,218,217,217,205,205,204,198,193,214,178,178,219,198,195,176,211,195,189,194,211,176,199,218,198,214,213,204,198,197,204,201,215,172,199,207,198,201,182,197,204,204,201,206,195,207,193,202,205,194,209,167,176,194,194,182,188,208,219,187,205,229,187,217,196,215,222,207,217,208,226,206,210,207,217,198,236,203,203,213,212,177,218,188,215,174,202,186,192,193,211,215,206,213,192,231,201,214,214,199,198,190,214,199,216,200,231,209,229,220,202,202,189,215,189,211,209,227,204,194,193,175,217,191,186,222,206,209,208,208,197,187,208,187,216,230,225,227,212,197,215,209,212,204,207,217,207,187,202,211,230,199,205,184,241,186,212,178,198,223,200,227,217,219,220,221,228,205,182,246,202,203,196,230,214,209,197,208,235,214,210,208,208,213,220,223,200,206,222,215,224,214,201,207,218,232,237,246,235,245,222,222,187,203,152,196,221,213,239,224,226,240,233,228,223,210,217,199,212,224,238,231,233,226,229,227,226,228,220,223,211,221,219,227,226,231,180,50,95,200,238,224,151,148,152,170,166,183,165,161,169,113,102,126,151,95,138,94,90,84,53,61,63,62,49,73,67,69,62,45,72,51,56,107,104,123,143,199,216,212,215,207,238,146,207,243,160,87,118,108,67,96,202,219,216,253,226,255,228,233,240,197,233,226,201,98,99,148,117,184,185,200,165,26,48,72,117,53,32,50,34,58,104,153,132,84,70,67,125,171,146,112,99,93,104,61,53,130,111,115,133,121,167,165,84,30,118,238,223,231,218,239,234,218,226,229,190,234,253,174,78,5,10,18,3,45,41,40,5,13,44,57,50,34,34,62,130,98,88,95,100,63,56,67,62,22,50,94,90,148,111,126,135,116,111,103,66,41,48,53,46,46,17,50,21,25,18,55,48,52,32,39,28,31,52,40,40,46,102,71,54,54,88,87,58,40,54,112,151,151,90,79,107,96,123,130,129,97,115,109,144,122,84,190,248,252,241,212,173,116,188,233,209,112,123,130,135,145,226,235,137,57,126,131,142,50,101,74,64,72,62,60,54,46,64,78,67,51,23,38,79,60,63,46,48,24,18,33,12,25,37,24,28,26,44,50,62,37,11,24,57,47,52,57,56,39,17,2,15,37,48,44,43,75,71,96,82,90,74,50,71,70,107,103,94,104,68,66,45,55,36,14,21,20,47,92,121,102,71,49,39,33,46,85,97,92,79,64,49,52,70,194,228,240,255,232,238,243,224,223,213,218,234,221,225,214,225,240,245,234,214,238,240,227,222,226,235,220,230,242,243,220,224,216,225,224,209,233,220,231,235,230,232,220,207,254,233,230,223,211,228,240,226,238,217,216,240,236,216,207,228,101,19,3,4,25,0,9,20,11,9,1,21,15,5,1,14,2,10,14,7,17,3,28,14,17,200,226,205,198,194,161,187,180,197,192,179,198,194,187,181,173,186,193,194,200,194,179,186,200,197,204,189,208,181,194,208,193,166,197,209,207,200,188,198,202,189,211,188,216,197,200,214,214,206,218,228,201,195,208,226,174,206,219,180,182,192,182,184,201,193,217,204,199,193,179,214,187,186,204,182,164,198,208,195,176,195,204,179,179,188,179,205,199,202,187,215,194,208,204,219,227,207,207,202,200,194,195,203,221,238,197,163,203,192,177,201,205,223,207,201,188,184,198,218,198,191,192,187,201,235,185,208,203,193,192,201,201,209,210,204,199,215,197,206,186,199,223,225,224,204,213,187,191,188,197,234,199,196,202,205,210,199,193,231,201,211,200,205,225,204,200,192,198,209,192,219,220,207,195,217,204,207,203,212,194,200,188,218,197,182,214,187,219,220,204,200,194,200,220,179,214,191,190,211,183,195,191,214,214,188,208,214,213,207,206,208,227,198,192,197,215,188,241,222,200,216,211,184,204,219,205,230,202,203,200,188,179,208,215,222,215,241,178,215,169,195,212,212,188,217,184,193,183,222,232,227,218,217,249,211,199,231,218,213,201,211,211,222,221,227,199,215,216,221,226,219,208,208,202,220,233,233,222,224,234,248,233,182,171,153,178,187,209,192,224,222,244,223,237,219,231,218,224,248,222,217,209,205,206,225,231,241,223,209,232,210,219,223,213,219,236,222,226,203,222,171,53,89,172,248,213,156,164,143,190,176,165,157,147,162,130,136,169,134,116,82,9,24,31,56,26,11,42,48,78,27,50,82,87,62,28,26,54,106,191,221,247,244,234,210,216,226,144,206,230,196,193,229,234,213,186,243,239,239,255,243,238,240,226,215,213,180,216,189,102,115,170,182,240,242,252,198,83,63,90,113,41,29,47,42,91,213,244,228,147,112,199,252,254,200,160,139,148,221,101,88,230,227,239,217,203,237,190,154,75,102,227,225,235,230,214,229,237,221,225,196,228,228,106,42,5,21,5,29,58,18,23,36,65,48,50,54,21,78,113,119,110,102,120,123,124,98,75,63,26,83,128,87,152,121,70,53,44,6,57,40,23,11,48,67,63,49,75,46,40,55,43,69,59,29,30,37,55,49,60,36,46,68,119,154,197,168,147,135,146,176,145,196,164,131,150,121,141,141,120,104,61,99,45,59,39,66,20,133,210,255,219,155,88,172,249,188,142,147,134,122,165,226,196,123,35,11,35,29,31,60,16,52,61,18,27,53,34,42,70,68,68,85,71,82,67,58,67,68,47,48,46,20,46,65,11,34,34,34,74,44,38,28,59,54,42,52,67,35,19,4,35,62,33,25,28,36,9,23,37,27,39,69,49,87,72,100,76,86,101,105,101,107,94,63,75,68,38,89,65,105,106,102,44,49,83,117,125,115,80,78,21,42,83,208,251,231,255,241,221,240,219,217,220,219,230,234,215,238,221,249,229,224,243,238,219,216,226,207,207,251,238,207,219,225,226,235,224,227,213,214,244,210,220,234,241,226,232,216,235,232,244,225,223,211,235,233,222,231,215,241,220,209,236,231,139,2,14,2,2,20,16,5,15,25,17,19,10,11,5,0,8,14,7,18,0,6,14,27,5,207,186,179,189,225,221,172,223,223,208,186,208,161,193,187,185,185,162,179,191,187,199,205,213,161,188,209,186,201,206,203,220,197,195,186,192,185,215,196,200,187,201,183,212,186,201,181,213,184,204,188,194,192,180,203,214,224,189,202,199,186,208,216,219,213,187,189,202,187,208,206,189,195,207,175,180,203,189,197,206,199,186,196,197,197,191,205,220,200,197,214,211,203,195,194,199,192,194,208,200,196,202,206,216,202,196,212,192,193,194,212,188,194,181,197,193,208,208,190,207,175,200,212,187,171,186,214,213,206,205,190,214,190,202,169,203,199,187,180,184,195,222,199,202,188,200,201,200,215,215,195,198,221,194,203,225,231,235,213,194,217,200,189,213,214,209,197,216,219,228,227,219,203,213,197,206,226,207,197,206,206,176,199,224,192,220,188,196,189,194,206,185,211,206,213,207,201,198,197,231,191,210,185,209,188,207,194,201,206,182,200,205,211,218,215,189,188,194,187,197,177,215,202,212,207,218,211,204,201,200,202,192,203,203,214,215,203,195,187,208,206,201,211,211,190,185,190,215,218,184,203,198,142,191,175,191,238,214,205,196,212,212,213,211,205,204,223,214,207,224,208,190,232,215,227,224,242,235,226,218,188,173,164,160,185,220,205,215,225,233,201,228,207,240,212,219,233,226,246,234,210,203,219,231,218,210,229,201,214,214,238,209,230,238,202,218,200,230,212,231,141,51,68,178,238,217,174,131,168,195,193,177,153,152,187,167,154,177,137,85,5,5,10,19,17,16,10,34,34,19,6,45,52,59,12,15,110,117,179,178,234,233,227,205,191,219,239,148,218,214,172,215,250,203,196,215,249,232,227,225,218,185,190,195,140,143,153,214,155,112,158,160,178,206,232,247,156,73,66,92,122,48,28,26,18,78,232,246,246,165,133,225,254,233,172,176,199,224,235,143,148,242,233,222,188,197,241,224,189,96,63,199,199,238,233,227,244,206,244,213,192,237,123,12,34,1,28,27,56,19,30,28,26,40,48,69,22,83,202,166,137,81,51,89,105,109,107,130,110,77,116,125,102,151,85,36,28,43,15,32,54,49,24,58,72,76,56,60,58,63,45,75,74,62,30,38,58,95,140,158,162,196,131,197,194,208,198,174,149,128,126,106,69,61,20,52,46,55,23,41,17,12,48,17,28,35,35,31,7,94,146,183,139,109,176,241,142,130,150,133,139,181,241,171,60,20,1,22,24,19,19,32,7,17,21,26,22,23,5,38,28,27,42,15,37,61,49,45,57,74,67,50,67,66,78,60,69,64,41,60,53,75,67,29,43,28,60,73,29,39,55,98,38,37,49,40,15,34,26,7,18,25,50,56,41,48,61,84,69,63,73,96,90,88,101,91,86,103,92,90,103,94,91,133,145,178,151,125,45,26,54,34,89,220,241,252,242,251,236,228,233,209,240,205,219,240,218,234,230,237,241,233,234,240,241,236,243,222,231,241,198,209,203,226,222,213,239,246,215,216,217,214,227,239,214,223,231,233,222,233,227,230,219,249,220,228,207,230,223,238,234,219,213,241,243,131,3,0,8,14,2,15,17,11,2,5,7,14,7,5,10,0,12,8,13,9,14,23,15,27,219,205,167,196,200,171,195,181,201,187,202,191,214,200,212,187,190,199,189,191,206,198,210,178,192,209,191,177,197,175,194,196,193,192,177,197,191,182,190,211,194,208,206,210,192,186,174,171,201,168,172,209,205,201,193,198,210,205,218,177,197,206,185,186,192,198,209,190,191,200,204,189,201,188,187,173,192,203,175,201,209,193,173,190,194,212,172,201,199,188,212,218,197,197,197,199,180,203,217,207,196,231,190,210,188,195,196,208,205,190,196,196,211,199,212,202,217,208,225,191,206,202,224,194,194,210,193,209,169,218,210,212,180,196,196,211,187,207,219,203,217,194,188,180,199,220,194,208,207,193,189,214,209,201,180,208,215,207,209,204,209,202,177,209,199,217,229,209,211,196,189,225,214,193,210,208,206,174,216,186,199,209,180,184,169,212,216,197,220,193,181,206,225,170,190,199,203,202,201,195,209,187,221,213,192,183,202,195,229,236,196,203,203,186,195,205,183,192,182,203,199,200,196,212,186,183,204,194,190,194,199,206,198,190,196,198,183,226,205,212,208,225,234,232,186,211,226,211,218,198,208,128,41,71,151,212,197,218,184,224,212,222,204,205,204,199,197,226,219,231,243,207,229,226,237,205,193,203,173,173,205,189,216,210,216,241,217,240,239,247,221,215,215,195,229,225,236,200,221,199,206,211,222,227,241,220,224,199,215,212,211,215,238,214,230,237,216,225,220,215,136,49,141,227,249,230,166,148,160,178,162,177,172,174,205,175,174,173,141,48,15,8,28,33,47,18,20,29,57,27,27,63,76,55,28,53,130,224,221,208,228,250,237,216,209,234,243,148,197,208,206,218,191,156,138,203,214,197,169,166,127,151,174,162,128,153,211,247,114,120,165,159,174,192,229,247,171,68,42,83,111,53,25,47,7,72,199,234,210,152,118,228,244,183,99,187,200,174,162,164,204,242,225,240,161,198,228,211,199,129,88,210,214,221,251,239,230,228,244,212,196,124,35,31,24,18,42,62,18,11,23,38,48,60,54,21,66,196,254,233,169,83,57,66,52,80,72,81,81,74,122,130,141,94,54,2,10,3,12,50,75,76,16,51,87,97,21,22,58,87,133,161,128,160,180,185,191,195,203,210,174,186,140,120,121,61,39,49,35,24,6,19,33,36,14,17,15,35,21,24,19,20,30,9,29,20,7,21,35,0,43,143,143,126,191,209,127,133,140,123,150,200,232,129,28,19,2,29,18,4,32,27,10,24,44,12,33,24,28,32,7,3,17,11,21,1,39,4,12,44,21,44,60,47,34,52,44,39,51,92,78,76,62,66,68,53,68,92,57,57,61,64,25,44,26,51,15,43,49,33,43,66,79,66,36,16,21,28,41,44,47,86,70,85,93,85,97,97,108,99,84,105,105,96,114,84,67,58,65,29,59,77,191,246,252,235,221,229,217,243,232,239,214,221,236,244,235,231,238,246,239,234,233,231,232,207,235,208,216,232,199,224,239,240,223,228,239,229,222,235,250,239,247,240,245,219,226,229,204,218,233,219,222,233,234,235,223,217,217,215,223,235,239,220,233,119,3,9,5,22,9,4,0,2,5,0,0,19,18,2,3,6,0,7,2,26,15,5,5,0,195,181,194,186,207,195,214,180,190,192,195,189,178,189,201,176,232,199,207,193,184,171,185,207,196,175,195,209,198,216,199,196,190,195,195,192,218,209,184,151,217,182,190,202,187,207,198,195,169,215,192,195,190,203,199,187,197,191,196,204,203,198,181,194,181,199,199,195,211,175,194,217,198,188,197,162,216,203,174,190,196,184,195,194,190,229,196,186,205,198,225,218,187,222,200,199,177,212,196,193,200,185,188,229,202,184,216,195,198,209,172,200,183,178,185,218,181,202,193,189,203,186,226,182,198,212,200,180,184,182,213,181,213,184,181,242,184,222,199,178,210,195,207,203,199,195,196,198,203,189,193,210,220,184,191,212,216,203,196,215,210,201,216,193,189,185,193,191,190,200,186,193,191,191,214,196,218,203,186,186,215,193,199,201,199,189,200,216,209,207,190,210,206,200,191,177,207,230,203,210,198,183,172,191,191,199,188,193,204,199,197,199,208,214,190,190,205,204,207,201,182,223,199,196,198,228,209,179,200,213,195,214,204,210,213,196,204,191,229,208,209,196,179,191,201,196,162,205,195,213,183,111,30,74,182,210,204,199,179,223,217,226,189,217,211,219,224,216,216,218,217,217,191,212,174,202,182,187,200,202,216,202,240,213,202,228,223,208,214,245,234,196,245,245,196,206,228,212,246,222,199,210,215,235,217,212,234,221,208,198,211,236,236,216,227,207,226,198,215,215,112,60,141,225,254,248,155,157,158,172,161,149,184,184,181,129,139,156,165,159,98,127,154,99,29,20,52,43,46,58,32,87,51,60,83,79,189,236,234,215,227,233,193,203,199,209,228,132,182,180,176,211,178,96,101,191,157,166,171,167,199,195,225,220,242,214,245,227,39,178,182,180,172,198,223,242,199,65,37,76,113,80,28,48,35,78,234,228,212,163,90,212,202,129,117,179,187,153,154,152,227,249,229,235,174,189,210,219,197,186,66,117,222,215,209,220,219,214,251,233,120,80,0,9,34,35,66,36,21,13,38,46,44,72,20,23,188,243,255,177,132,93,59,91,72,76,95,70,30,57,126,136,137,106,31,24,1,27,15,29,66,99,94,119,100,66,57,99,149,171,196,183,204,197,205,169,127,103,81,57,23,24,64,25,4,25,4,0,34,9,9,9,20,30,55,14,16,24,27,34,15,27,23,33,36,2,12,29,17,20,18,62,119,168,234,171,125,151,145,144,144,208,222,76,77,14,18,13,18,32,7,10,22,32,26,42,13,19,11,1,27,23,20,29,9,24,12,10,41,5,9,5,29,5,24,17,8,25,44,52,37,35,58,60,44,89,79,69,100,73,78,39,26,16,53,36,36,33,41,21,78,78,87,51,32,26,82,67,72,53,82,52,34,47,66,72,110,107,92,113,92,92,64,54,63,37,51,52,52,96,71,117,199,247,241,233,243,235,235,234,252,210,228,217,216,242,225,225,226,228,248,225,218,229,233,222,206,242,242,242,227,243,233,222,223,205,213,231,220,213,250,233,238,233,225,240,250,237,240,205,233,229,220,244,239,235,230,218,201,231,238,207,243,205,225,113,4,17,3,6,2,21,6,22,7,28,8,1,11,2,12,6,9,19,16,19,20,3,29,16,179,182,171,174,182,193,189,182,207,194,195,184,202,210,195,192,199,194,199,200,189,165,180,173,220,190,195,197,195,201,185,204,179,177,175,204,201,194,204,212,171,182,188,189,216,204,228,185,197,177,186,192,184,193,196,171,177,205,203,172,178,162,186,186,185,187,184,211,180,183,181,190,193,191,199,184,173,188,190,185,187,192,175,194,202,200,188,178,195,188,199,212,186,189,189,211,205,204,199,201,179,205,182,187,208,198,208,206,217,227,189,186,179,206,173,206,192,222,198,218,199,167,189,204,200,208,198,224,207,206,204,232,197,189,205,184,209,189,176,224,196,190,185,202,179,183,211,181,199,225,188,214,221,218,194,202,195,205,206,198,191,185,212,196,231,196,216,202,184,203,208,203,199,199,186,207,200,220,206,192,200,197,204,192,208,198,208,232,185,209,204,177,187,215,209,190,194,206,201,206,231,199,203,183,218,223,207,195,194,194,193,209,203,200,182,212,214,194,221,192,201,203,210,213,189,206,188,216,194,178,194,215,193,174,190,195,202,207,204,190,233,191,211,234,205,179,214,235,223,187,210,161,136,180,215,247,194,195,222,195,202,218,214,223,205,216,201,209,215,193,219,184,183,204,179,170,179,202,207,233,190,218,208,228,221,216,208,221,218,211,223,220,221,225,227,208,226,231,206,216,217,227,234,243,194,216,221,210,233,197,211,235,223,222,220,202,229,232,199,179,123,48,146,225,231,221,200,185,152,189,154,147,193,121,172,121,152,178,173,205,203,209,226,146,63,36,60,41,39,29,45,99,91,132,109,137,207,249,231,190,197,240,211,203,210,240,220,155,193,192,199,173,153,144,121,171,174,186,229,206,248,226,241,200,196,179,216,137,38,189,198,176,161,199,237,242,170,54,20,63,109,67,42,19,3,74,223,239,212,159,138,203,200,165,168,174,202,160,127,160,199,225,246,224,149,184,208,203,224,243,134,57,133,162,197,225,242,244,252,188,56,27,18,12,11,23,29,11,47,11,23,45,69,59,25,167,252,225,225,212,160,113,52,70,54,103,73,67,75,84,126,116,144,109,42,32,34,70,46,58,73,62,143,150,169,134,136,155,150,120,102,80,57,51,59,33,22,18,25,17,11,16,25,15,22,11,28,19,38,35,33,17,25,40,49,18,41,58,23,32,38,25,13,20,3,20,18,23,32,13,26,26,71,171,251,223,129,135,142,171,167,250,200,73,38,12,21,7,10,32,39,8,16,23,30,5,16,31,20,10,13,9,14,24,28,17,42,20,17,37,22,10,23,9,53,18,31,39,10,19,4,28,2,34,51,41,44,47,53,76,67,73,75,78,72,96,68,53,42,51,55,82,70,43,58,67,72,71,111,83,68,65,37,21,57,42,43,38,70,92,101,121,73,40,60,49,72,93,120,113,91,152,234,250,244,255,247,217,240,223,219,224,239,254,227,228,224,249,229,225,227,247,227,218,233,232,205,227,241,237,214,239,230,223,235,225,251,229,235,216,212,237,237,195,234,239,233,225,228,243,234,239,223,243,218,212,223,216,228,237,226,243,251,241,214,125,21,3,3,2,17,19,10,11,32,9,27,21,19,8,26,9,21,19,8,10,12,28,2,17,194,181,183,184,188,185,179,218,194,166,183,191,207,199,171,200,210,183,182,179,189,199,181,181,183,173,193,195,204,179,188,211,216,192,207,190,189,226,202,195,207,197,187,204,190,206,187,168,196,167,198,169,199,200,174,173,175,195,178,166,212,164,167,197,207,210,207,201,192,157,164,218,197,205,218,191,199,212,184,194,209,189,197,180,216,172,206,196,207,204,200,205,205,205,186,198,212,192,204,200,210,199,202,188,192,181,180,201,201,195,209,183,211,215,186,207,185,185,200,194,189,210,177,213,204,187,229,202,189,194,204,210,198,186,200,198,217,204,189,200,221,199,197,207,212,197,195,188,195,197,194,204,200,222,196,188,209,180,182,166,198,207,206,219,199,194,204,205,203,201,227,180,192,193,199,204,196,186,216,210,213,210,184,189,186,213,188,218,231,192,198,176,201,210,220,192,235,190,201,203,201,199,184,215,208,194,209,187,207,203,202,189,188,178,203,219,204,197,210,191,207,205,201,197,212,213,222,176,197,206,209,217,226,199,210,212,241,210,216,210,195,217,207,189,195,215,204,177,210,205,214,192,210,229,213,232,215,193,211,220,208,225,224,230,243,215,188,200,182,171,182,164,178,207,203,199,201,232,215,190,218,227,230,204,208,218,225,225,209,197,207,211,224,223,226,200,204,238,222,229,206,233,235,202,231,214,204,204,227,212,207,228,187,236,223,231,223,219,219,186,119,97,152,234,247,209,162,167,156,206,172,135,97,84,140,158,155,170,196,200,163,186,182,162,30,61,74,24,44,36,71,113,40,158,190,178,232,246,240,197,209,226,197,193,231,215,233,165,147,204,249,147,208,144,149,212,195,228,240,201,239,235,175,192,161,162,252,161,98,239,207,199,176,217,250,237,175,40,52,36,71,96,22,4,24,58,227,251,214,146,166,255,221,146,176,205,239,163,95,196,253,241,246,221,179,237,197,202,253,254,211,83,57,126,197,231,219,229,238,91,34,0,1,25,55,46,25,17,9,23,53,54,44,37,138,233,244,251,255,234,214,136,27,69,26,75,37,45,42,80,125,124,161,66,41,64,85,161,169,192,180,142,140,133,126,76,45,24,48,25,23,27,11,31,25,11,37,8,10,25,21,11,32,31,0,20,10,5,24,37,18,72,54,56,42,70,53,52,54,41,40,18,22,19,4,22,46,5,24,11,46,29,47,182,240,179,143,170,155,155,206,217,149,26,32,5,22,21,33,28,25,34,30,37,34,11,24,19,30,34,14,10,27,20,21,27,27,21,44,9,3,15,18,13,26,49,12,12,20,4,10,25,14,20,22,16,38,6,15,17,16,29,41,88,67,82,92,126,91,80,89,86,103,78,100,74,46,42,93,89,70,43,18,72,74,49,10,40,13,55,76,104,137,95,125,125,118,133,152,137,118,184,241,232,243,219,224,235,230,217,234,223,230,242,216,227,222,246,240,232,228,215,227,221,203,220,232,241,239,229,213,215,233,247,212,218,247,239,237,238,253,234,224,241,218,224,204,227,240,239,209,237,233,224,236,233,222,235,214,217,223,220,221,224,228,122,8,11,5,2,32,11,15,8,8,43,25,26,27,4,1,10,20,25,16,0,11,37,10,13,198,185,196,192,190,203,190,200,187,198,185,192,188,184,212,210,190,188,208,198,185,175,194,208,168,166,197,188,196,187,179,196,184,192,170,163,200,197,195,186,186,190,214,198,198,172,170,190,179,185,177,200,189,186,208,214,189,184,182,191,186,187,166,187,179,176,179,208,195,201,199,195,222,193,182,190,194,175,192,209,188,217,208,172,197,184,206,208,224,188,171,205,198,193,192,185,172,197,194,199,187,194,177,198,198,193,186,195,192,186,180,201,208,190,206,207,185,187,213,196,205,177,208,190,198,194,202,202,185,206,192,210,200,194,192,182,191,181,195,196,188,197,181,193,195,196,195,194,193,196,192,201,208,189,216,197,213,178,210,188,214,211,202,178,196,206,214,193,220,191,189,197,211,211,203,206,188,211,172,185,204,190,208,204,202,228,196,189,223,181,183,198,192,231,181,191,192,178,210,194,196,200,195,208,205,212,208,178,206,177,200,207,182,189,186,209,184,197,199,195,193,214,198,188,197,185,184,194,188,210,206,190,182,182,210,198,202,197,205,176,232,168,190,181,225,179,196,195,199,206,203,203,231,234,229,208,194,202,223,229,223,242,239,204,213,205,188,174,184,189,174,197,215,209,211,215,231,207,213,228,192,206,227,209,198,213,216,193,209,203,230,208,201,230,237,234,226,225,232,215,221,239,205,219,217,230,202,226,201,197,216,240,240,226,224,227,227,206,226,179,125,142,190,230,235,185,140,104,159,190,158,170,161,79,109,168,189,171,200,162,134,135,144,140,59,59,79,18,50,15,67,44,63,202,248,254,235,213,246,211,249,246,230,245,241,254,251,139,157,240,230,237,208,170,185,237,255,253,236,202,232,227,190,248,223,225,241,158,211,234,253,229,204,245,234,244,187,59,56,42,120,85,11,46,29,44,193,247,249,143,214,229,243,181,196,246,252,136,129,235,251,251,233,223,222,245,218,244,247,232,250,160,86,139,221,217,248,253,147,38,0,25,24,20,43,18,12,5,56,57,77,40,26,126,222,251,252,232,237,249,208,98,63,56,49,53,54,52,33,100,143,114,152,138,145,147,171,173,180,135,92,55,30,35,20,14,8,26,15,29,16,49,19,44,32,20,14,18,38,30,28,25,6,17,27,25,24,50,33,51,48,36,28,50,29,59,37,33,40,31,10,0,28,21,21,6,25,43,29,26,39,32,21,205,233,163,142,156,158,187,186,198,95,13,27,14,14,31,16,38,27,25,57,20,23,20,12,31,15,24,23,9,8,6,4,30,36,56,48,12,36,19,15,30,31,34,36,28,26,9,8,25,17,25,25,9,3,6,18,21,23,23,53,23,32,35,41,51,80,100,95,87,131,119,118,102,79,46,57,61,88,40,93,98,63,31,42,63,33,53,126,105,98,88,113,144,135,141,118,120,101,237,245,243,244,240,232,243,229,214,239,253,241,250,248,225,216,224,214,224,248,214,229,242,232,221,235,231,216,241,228,206,245,233,229,228,232,222,235,241,238,233,232,215,236,210,226,235,234,214,235,223,228,234,229,216,238,240,208,234,230,233,213,208,233,128,6,12,5,30,22,10,20,8,3,5,11,23,4,0,6,27,2,13,10,21,10,9,18,4,201,181,199,213,194,191,205,191,174,184,192,180,200,192,211,187,177,186,183,167,192,207,175,174,186,194,198,200,206,166,186,203,176,178,207,181,194,164,195,199,204,197,188,182,195,188,198,214,171,214,183,194,211,196,187,203,200,199,179,168,189,190,184,205,193,153,193,213,210,207,197,161,212,179,203,167,209,182,204,190,202,204,229,196,214,204,201,189,219,202,193,195,184,187,188,206,177,168,201,167,210,204,207,192,216,186,202,184,197,225,198,193,213,190,181,194,199,198,187,195,197,181,179,175,209,215,191,200,208,208,179,238,209,188,186,200,216,209,203,191,206,205,187,189,197,163,212,203,194,197,207,188,184,200,214,226,198,215,210,192,216,181,212,213,177,201,214,200,177,193,189,221,200,183,212,206,179,193,214,196,186,206,212,203,174,201,205,189,192,194,203,187,201,189,194,179,186,174,225,219,196,198,218,196,208,188,195,165,184,203,193,180,199,198,200,194,199,174,168,185,175,208,198,182,206,173,210,210,220,191,186,193,209,202,200,191,189,207,207,224,201,199,195,183,180,219,195,200,183,214,185,189,229,212,225,220,194,198,247,217,185,146,162,183,179,189,185,201,214,212,212,217,233,245,198,216,225,225,206,226,193,227,214,211,233,231,216,221,233,242,245,250,209,238,222,227,216,245,253,247,255,247,240,234,237,248,248,251,242,223,246,250,254,219,243,253,246,242,222,187,192,150,175,228,249,220,145,116,156,215,219,241,232,182,170,179,203,177,156,132,95,147,155,124,76,65,69,17,38,34,40,30,21,193,255,237,248,228,232,242,242,242,249,231,252,233,220,86,91,195,232,161,216,170,188,206,206,194,176,170,231,215,218,236,191,230,228,142,204,246,225,188,137,195,233,238,162,45,43,74,95,100,45,25,50,37,171,216,153,92,150,216,158,124,177,173,159,93,124,222,216,215,188,204,175,170,146,165,219,220,214,112,67,129,183,199,195,149,63,8,0,24,37,29,52,4,21,46,63,69,62,28,73,224,227,241,250,252,238,218,150,88,41,92,37,78,55,74,65,107,153,124,188,150,141,124,67,52,36,35,8,28,3,10,22,8,22,20,13,25,44,28,36,46,34,9,52,70,40,57,33,25,22,16,31,21,26,11,54,44,47,34,67,40,79,63,66,50,25,17,32,29,17,20,9,13,31,18,40,21,62,29,54,223,217,134,152,151,184,165,190,173,64,34,22,29,42,8,9,51,38,30,26,11,43,12,15,28,8,35,32,26,16,6,25,16,48,7,20,39,13,32,13,22,59,40,47,37,55,27,46,23,8,39,34,29,20,18,6,24,31,18,18,5,6,12,19,19,32,40,57,70,60,60,90,108,99,87,97,101,91,64,72,57,31,61,33,28,16,74,111,121,122,94,127,136,132,142,118,84,139,244,231,239,244,231,240,232,222,217,229,207,241,232,249,235,242,222,234,239,235,219,237,231,236,229,229,225,236,230,210,204,225,240,223,218,230,225,212,220,212,240,241,206,237,238,212,245,242,222,216,239,203,210,244,245,217,243,230,203,223,225,253,218,217,129,2,11,28,7,24,1,14,12,28,3,6,31,2,17,4,2,11,21,18,18,26,16,16,8,190,225,195,193,191,207,203,174,211,204,187,177,173,185,182,219,217,202,214,185,186,180,211,176,185,186,202,200,192,203,185,196,199,185,201,198,177,202,157,186,189,186,192,193,214,200,176,202,193,197,173,201,201,206,215,192,172,182,204,194,188,168,203,193,196,199,193,206,194,188,209,223,181,189,220,200,199,204,201,191,211,199,190,195,192,202,196,205,180,187,213,186,170,200,203,184,211,192,196,192,183,195,203,184,206,207,202,188,205,194,203,198,200,187,171,196,225,207,204,179,215,224,222,222,192,203,218,199,180,218,172,178,197,202,199,205,202,215,195,208,217,191,209,208,199,219,212,199,202,194,197,187,194,215,182,191,227,185,201,226,203,202,181,180,204,198,199,217,182,213,189,204,222,214,196,196,201,196,197,205,205,200,178,185,194,216,193,187,202,176,206,192,184,210,204,197,203,177,213,196,220,202,192,170,181,207,211,213,192,184,182,196,192,206,168,174,207,209,191,197,189,189,198,184,191,199,178,178,196,223,185,177,175,213,173,204,201,204,190,185,211,187,221,220,204,197,193,210,208,199,179,186,208,219,236,192,217,223,215,109,14,0,77,173,215,221,240,246,232,250,239,240,241,220,249,214,252,228,242,232,255,250,251,250,253,251,252,254,251,235,255,254,246,252,243,230,244,253,228,247,255,252,255,254,241,255,249,239,251,254,252,246,242,220,253,253,255,239,250,241,197,149,150,225,251,215,203,124,147,203,219,235,226,206,221,166,183,185,151,129,89,117,135,105,84,103,82,105,86,87,83,54,77,178,191,171,166,169,163,161,168,143,155,185,153,179,162,63,67,166,118,92,77,98,95,86,71,56,94,96,123,88,107,124,93,153,95,56,107,71,113,98,57,31,67,86,115,77,38,58,92,87,31,36,42,40,36,52,67,10,17,43,33,43,46,77,53,59,68,52,14,12,41,99,133,122,88,63,11,48,77,39,43,62,83,69,101,30,49,44,3,24,34,45,17,14,37,63,71,41,18,92,198,227,247,254,239,247,219,192,137,88,86,101,92,125,116,89,108,125,131,114,112,30,13,16,14,18,13,58,29,11,31,33,5,24,43,27,27,34,21,49,35,56,20,57,24,43,22,56,51,67,37,44,30,48,27,37,39,46,54,83,65,107,160,94,79,45,60,12,22,10,29,5,25,30,39,30,29,33,58,37,69,224,174,140,154,121,204,189,147,155,96,70,92,66,55,34,35,4,14,17,17,32,10,28,12,20,24,8,5,53,5,13,13,0,14,49,6,30,5,15,25,31,43,35,36,58,34,23,48,26,42,41,35,21,8,10,13,9,26,29,24,37,32,27,17,22,11,27,18,12,29,39,14,60,55,66,91,106,109,98,95,58,25,28,50,39,26,59,87,122,101,96,97,108,111,104,105,58,100,206,226,235,255,244,230,236,230,236,219,243,227,238,240,241,210,238,213,229,226,242,224,236,231,234,222,205,229,199,232,240,215,225,238,233,239,249,216,231,207,215,232,248,229,217,233,243,243,233,216,231,235,243,245,209,228,229,248,226,215,225,206,235,234,124,7,0,0,14,12,13,12,7,19,4,0,15,10,7,16,0,12,31,12,23,23,14,36,0,185,163,202,190,203,212,205,205,213,210,200,202,207,192,204,190,171,184,210,212,198,191,191,187,191,173,186,175,215,197,207,170,210,182,189,170,204,190,181,183,199,180,189,179,199,160,187,191,189,190,213,187,187,203,192,189,189,184,179,207,181,206,216,201,202,202,162,196,209,176,195,180,188,181,199,188,197,179,223,183,208,206,206,190,202,168,170,203,194,200,166,168,204,178,210,225,164,205,189,207,203,206,204,202,194,207,209,182,198,200,187,218,177,207,198,180,199,198,180,190,197,203,193,182,202,185,204,205,180,174,186,192,194,192,231,206,224,207,208,187,170,211,181,197,191,186,206,215,186,201,209,214,182,199,197,217,193,203,205,211,204,213,203,207,226,201,199,179,197,219,191,178,206,217,200,205,198,182,208,202,204,201,184,207,206,196,223,165,185,196,201,210,185,192,188,204,161,187,188,191,183,203,186,206,172,203,221,190,189,172,224,196,192,194,201,200,197,206,180,176,180,205,178,204,175,208,191,191,228,207,227,217,209,198,213,202,209,213,218,219,217,225,233,223,230,221,222,215,207,215,199,226,245,250,252,234,234,229,188,126,74,107,194,230,250,238,253,231,248,247,241,248,234,236,251,253,247,255,238,238,247,246,246,234,238,235,252,254,236,245,253,240,251,253,243,247,248,223,240,220,206,232,189,188,237,212,214,192,182,182,178,170,158,171,178,228,188,162,155,137,182,107,103,162,154,188,174,90,76,141,152,186,165,138,142,113,120,120,105,117,101,76,76,127,116,99,108,126,138,105,143,115,128,98,91,77,47,69,92,59,56,28,18,85,42,36,75,86,74,50,37,70,66,64,53,28,32,7,48,40,74,48,54,15,13,63,32,36,30,54,54,63,29,21,23,2,33,72,63,58,70,90,74,22,45,50,39,26,77,54,43,38,11,35,16,46,23,60,72,48,22,3,32,80,131,146,124,73,24,17,30,30,46,71,95,68,36,44,0,23,24,41,52,27,25,14,75,46,92,36,36,154,242,235,252,250,247,192,143,144,92,84,120,132,137,151,145,115,153,147,110,141,99,18,11,26,5,21,16,4,36,29,34,17,31,18,35,29,26,34,2,48,32,38,21,66,38,62,25,60,30,41,45,36,37,63,38,26,53,22,52,50,18,94,141,93,55,22,18,14,10,34,26,28,43,12,30,43,29,35,52,1,109,229,136,106,154,181,227,158,174,114,104,79,122,99,72,50,38,12,32,37,14,58,15,33,21,38,15,10,2,28,27,26,6,20,9,37,12,25,27,46,24,27,47,39,37,35,51,57,64,54,33,35,5,42,24,16,40,16,20,14,34,17,11,18,17,19,21,39,11,6,4,31,9,53,17,21,42,43,53,78,69,95,101,89,93,92,39,79,96,98,126,86,65,75,87,69,62,22,81,227,231,252,241,233,242,237,224,245,208,236,231,239,239,241,223,229,228,219,221,247,215,241,229,223,222,236,239,223,235,221,232,240,238,200,227,242,236,245,220,244,222,244,235,203,209,229,231,226,248,236,217,209,222,216,220,238,220,233,230,209,225,240,222,122,5,1,20,1,8,0,22,4,9,13,29,2,11,1,35,12,42,28,5,11,13,3,3,29,196,163,185,205,158,179,190,200,192,194,212,194,182,195,194,196,201,183,205,179,221,193,203,195,172,184,191,167,178,200,187,188,182,205,163,207,186,205,166,176,217,166,170,166,211,204,207,194,201,174,191,203,179,183,208,204,211,171,172,194,177,190,218,195,182,180,205,186,203,197,196,179,177,210,215,198,184,211,189,191,201,205,188,180,214,200,192,183,205,205,201,186,205,193,194,199,179,201,205,194,220,200,176,194,200,210,197,181,199,194,184,168,191,178,189,186,190,187,183,204,201,203,200,193,213,206,195,208,194,214,214,187,194,198,204,190,211,203,194,211,215,210,211,196,193,234,210,218,211,176,212,218,204,204,192,214,193,215,204,206,181,222,200,188,200,209,193,216,199,179,197,188,201,186,207,204,203,197,215,209,183,198,191,190,201,175,192,217,200,183,187,211,215,199,192,190,192,179,179,206,181,202,209,188,193,197,189,197,201,210,197,184,184,165,189,196,204,182,189,202,245,220,216,217,211,204,207,205,234,236,216,222,236,216,249,240,246,246,232,245,248,214,234,229,242,240,232,232,230,207,212,229,241,236,210,184,190,219,223,200,177,215,244,235,228,229,225,210,169,190,225,204,192,169,145,167,166,187,174,167,174,161,149,153,150,146,176,147,139,125,141,108,138,107,116,99,97,115,118,99,87,129,98,118,77,108,73,81,78,93,66,58,96,64,38,79,76,72,51,64,82,55,63,59,49,79,78,71,85,57,74,44,71,94,54,81,82,99,77,85,99,98,95,68,113,102,108,67,109,117,137,118,105,81,57,57,79,71,51,58,61,50,18,44,65,53,46,85,112,70,78,68,59,52,51,22,58,76,89,53,87,41,66,27,38,79,58,28,54,54,90,105,58,44,49,20,59,108,63,60,40,91,58,36,71,77,45,46,88,70,88,100,59,51,67,58,50,115,70,65,77,5,67,31,67,135,131,170,88,54,31,16,58,87,98,80,28,43,22,43,62,44,28,17,16,60,42,38,40,40,167,245,245,252,251,252,156,117,90,68,62,76,128,168,163,154,177,141,149,150,109,133,96,29,36,38,46,25,32,31,9,37,10,29,55,33,1,22,8,65,27,9,14,55,69,95,73,56,37,43,87,59,50,70,93,87,67,34,33,31,8,83,56,100,88,63,44,42,6,12,19,15,9,45,29,43,37,40,58,22,62,12,111,192,92,128,162,220,243,169,164,138,112,112,111,76,59,41,9,52,44,37,37,28,45,1,44,39,28,33,41,35,28,16,23,15,14,40,20,6,45,11,25,8,20,26,42,41,59,73,56,29,37,43,47,31,26,18,35,31,34,28,5,4,49,15,42,38,29,15,13,18,26,15,20,0,33,8,14,20,21,55,40,26,58,102,108,127,103,87,114,84,99,62,67,53,70,69,51,11,167,253,247,233,217,243,225,222,247,234,221,209,227,235,218,249,232,224,234,224,232,237,231,241,244,222,241,245,223,236,235,215,214,232,224,218,226,235,237,236,225,240,239,222,227,239,218,240,244,242,239,237,234,241,214,228,218,230,226,231,236,232,248,240,249,121,17,0,12,23,22,4,17,10,22,3,18,49,0,10,8,15,18,33,7,7,33,2,13,2,183,197,198,212,185,191,175,213,175,186,185,192,192,182,172,194,190,201,173,197,169,186,195,204,201,180,156,196,201,173,184,194,197,176,194,190,185,192,198,162,193,194,189,184,153,194,175,189,172,193,198,200,225,170,187,192,177,185,181,183,177,212,182,199,177,183,160,212,179,212,181,185,207,193,201,188,181,202,195,188,226,190,178,222,191,204,208,170,210,200,214,183,182,178,193,173,179,195,187,191,194,212,209,218,207,159,204,199,194,187,204,201,198,179,225,206,196,214,201,196,188,206,208,178,185,179,207,205,199,185,183,174,175,183,175,222,199,219,198,192,232,209,210,201,223,198,214,191,185,206,193,203,203,206,204,209,210,189,193,210,192,203,207,208,194,208,182,186,192,175,194,217,210,177,186,199,189,212,177,198,193,204,193,200,176,181,190,197,193,204,206,192,214,189,201,199,194,191,189,180,187,206,187,185,179,233,206,206,178,204,183,195,202,188,187,203,211,196,191,237,242,243,254,254,229,229,248,231,250,227,244,227,243,223,239,233,239,226,204,207,212,208,222,223,190,199,212,166,177,192,187,150,129,137,137,127,131,142,179,126,145,167,144,130,112,104,77,84,118,75,92,74,78,50,82,96,55,65,92,66,77,64,73,85,34,43,44,46,44,63,67,47,60,14,63,72,41,74,50,34,54,130,97,65,68,68,61,77,84,84,81,87,51,96,77,66,81,49,79,67,82,50,60,77,40,52,51,40,57,90,43,74,49,38,65,73,86,52,70,117,107,70,97,55,75,98,83,88,52,46,93,66,81,81,62,108,85,82,82,70,73,86,52,74,58,105,75,71,100,80,87,90,53,84,95,81,89,59,58,56,77,37,33,69,32,70,62,57,48,71,73,104,60,62,46,33,74,109,63,95,47,117,49,33,49,58,50,36,51,59,70,45,19,45,65,95,58,130,64,21,18,29,84,84,21,28,127,119,179,126,22,20,7,75,89,70,21,12,26,45,77,36,20,18,23,42,69,44,26,121,243,243,239,230,255,168,135,153,150,112,43,82,85,96,121,117,134,119,154,114,77,116,56,16,20,19,13,18,16,45,41,45,36,49,24,29,4,5,7,9,20,12,15,23,46,49,75,43,51,69,45,41,66,50,89,120,101,102,68,90,92,118,125,79,24,32,13,22,27,23,71,80,44,23,30,40,46,61,53,52,85,37,119,164,114,173,190,240,224,173,158,154,97,62,32,18,61,37,16,42,41,52,35,35,68,54,32,24,57,43,40,12,43,50,48,34,12,29,10,25,22,18,7,8,37,62,52,54,101,69,73,54,44,43,19,24,50,34,33,27,45,35,26,56,21,55,39,31,12,19,57,26,27,5,5,28,7,0,16,2,7,22,24,25,27,23,54,83,84,103,79,106,79,69,42,76,51,68,68,71,183,241,255,251,226,227,235,246,229,228,220,237,213,232,251,235,224,231,227,242,226,238,227,228,213,242,230,224,245,230,231,233,233,241,241,235,232,229,230,234,230,240,236,216,229,226,236,247,227,231,215,229,235,224,246,239,238,243,231,236,226,216,229,247,224,124,25,6,7,2,3,4,14,1,18,16,13,5,23,9,16,10,9,4,26,9,43,7,18,18,196,175,183,187,209,197,197,172,208,188,168,184,191,193,184,174,174,179,182,176,205,174,195,202,202,160,177,191,187,184,203,189,198,174,178,197,194,167,177,198,183,177,202,211,175,190,200,195,188,175,196,203,187,181,205,182,192,172,197,211,201,172,200,207,190,192,183,201,185,188,178,201,216,205,201,203,196,195,204,197,198,201,191,199,202,220,209,213,212,205,190,197,206,186,193,203,195,203,216,195,206,190,193,176,225,188,194,198,196,200,198,212,208,195,197,216,212,224,219,187,203,200,173,221,206,196,181,189,213,218,189,179,175,192,199,226,201,180,187,209,203,198,189,197,198,210,224,200,162,174,216,221,180,193,219,209,200,198,171,216,199,203,199,196,203,189,194,191,195,195,181,202,171,214,169,187,201,193,193,185,183,208,207,193,196,183,235,208,182,218,184,191,195,191,199,183,203,201,190,190,199,202,191,201,192,226,201,223,188,199,172,180,172,223,201,192,193,188,195,188,193,200,204,189,204,202,180,163,162,183,148,179,146,152,142,162,127,122,125,97,135,106,109,104,102,106,106,120,96,131,174,73,56,64,71,105,90,71,88,63,45,70,67,59,57,35,69,59,28,71,63,83,47,45,76,85,70,75,80,36,36,49,25,57,55,53,45,62,57,57,64,56,42,62,61,39,53,38,72,48,62,107,102,48,56,62,58,68,67,102,96,100,109,142,86,71,70,41,69,58,89,39,51,84,79,67,29,65,49,53,33,62,63,62,56,39,75,51,51,51,54,49,59,37,62,49,56,44,42,95,55,51,37,80,57,43,42,66,50,71,63,52,44,13,45,70,57,62,30,60,48,52,75,82,65,63,53,47,57,16,56,55,52,42,45,126,64,41,24,56,33,109,77,78,32,3,63,100,72,105,85,86,72,26,64,39,37,37,57,26,63,69,52,31,61,53,56,64,73,21,14,15,80,60,63,24,45,55,84,115,78,36,46,47,46,37,26,33,44,47,20,23,21,57,62,53,73,35,117,206,247,252,253,254,168,163,200,255,252,213,77,13,12,4,15,38,58,87,130,88,89,123,43,20,34,30,27,32,35,28,27,23,30,22,57,45,22,27,18,15,25,24,23,35,14,45,38,61,54,59,53,64,75,86,85,92,109,116,96,110,126,109,92,74,60,8,22,22,23,65,57,115,81,100,64,63,71,59,57,42,62,38,128,138,111,184,235,244,189,131,150,116,50,28,48,59,71,50,32,17,40,44,60,69,36,63,22,48,53,16,44,38,67,18,36,43,29,22,22,38,26,11,32,33,33,50,71,84,71,65,28,13,17,25,51,52,49,37,52,27,28,44,43,40,14,37,48,47,25,27,27,18,19,4,42,25,24,41,44,23,29,14,15,18,41,25,34,20,18,59,65,79,105,110,81,52,68,106,97,100,160,223,244,234,244,229,245,245,197,220,247,244,247,251,237,216,241,247,221,219,231,212,234,228,244,243,235,244,240,243,238,218,229,217,233,229,212,237,219,238,229,236,226,210,251,240,242,222,224,242,224,240,226,230,234,214,221,223,224,231,243,219,241,244,220,112,2,8,5,13,9,0,12,23,27,15,2,5,10,18,2,10,21,2,6,17,2,10,16,6,222,181,194,190,196,195,182,194,186,186,193,170,188,195,158,191,219,192,190,172,191,180,210,188,178,202,201,204,166,182,202,161,193,195,187,192,177,193,200,193,211,182,176,196,182,193,177,193,180,175,196,182,201,194,183,198,207,178,179,185,203,178,198,173,169,213,211,206,199,182,210,190,169,184,182,198,201,199,203,205,200,193,203,193,208,185,197,210,195,227,208,209,213,180,188,186,192,197,199,196,195,188,205,216,214,216,179,182,209,202,196,212,217,210,214,198,192,227,214,215,213,189,194,198,202,189,197,221,187,219,195,205,194,188,182,201,210,205,205,178,202,205,209,222,196,208,193,196,191,179,199,178,203,214,198,205,208,221,201,214,205,181,199,216,201,223,212,195,197,205,185,201,193,180,175,191,181,168,203,207,180,185,200,183,188,187,188,176,189,194,178,195,199,192,213,189,207,194,203,183,176,187,193,172,190,203,195,194,207,185,179,197,187,199,183,207,218,123,75,62,47,42,37,44,35,37,56,28,48,40,27,44,24,31,24,23,21,37,22,52,38,26,46,12,23,47,36,20,53,105,112,84,9,29,53,52,44,43,51,52,61,28,62,38,53,54,78,35,56,29,64,77,46,65,50,45,56,58,57,54,57,60,82,46,79,66,70,39,28,36,41,68,70,71,42,73,30,57,45,55,16,77,42,32,52,43,44,29,52,91,65,38,39,41,78,67,47,41,37,68,58,37,62,45,36,25,31,20,21,35,42,21,28,60,60,32,65,41,31,32,19,19,29,28,24,12,54,39,39,32,48,28,30,43,65,40,7,23,54,21,40,42,11,50,27,33,84,28,63,61,62,56,44,43,44,31,35,44,32,53,49,10,39,13,18,56,86,35,13,28,14,14,48,45,27,2,41,51,73,116,74,104,110,31,66,30,48,47,9,35,68,48,28,1,10,46,50,59,23,40,19,12,56,13,0,28,38,5,42,50,108,70,40,60,17,21,58,65,39,47,18,6,44,59,53,80,21,94,208,245,252,237,245,196,170,204,243,234,254,185,67,13,0,22,26,65,42,25,97,122,84,122,28,28,32,14,58,7,21,31,12,41,58,42,50,65,62,27,7,16,15,14,56,44,17,41,14,42,34,32,62,68,55,73,78,85,71,104,110,117,120,121,143,93,66,22,19,26,63,104,43,118,157,142,81,41,23,37,43,19,63,51,131,163,174,237,223,244,171,109,135,93,48,46,137,95,85,57,35,29,40,10,28,51,70,64,31,59,59,80,35,38,48,52,22,33,43,22,9,15,14,36,35,17,33,39,35,43,52,63,44,17,36,20,32,45,55,60,44,61,45,57,55,49,49,44,61,46,60,42,61,38,43,31,39,28,17,26,29,17,43,20,30,18,17,16,23,17,4,56,73,98,85,85,89,121,146,163,143,115,156,196,232,245,244,241,234,243,235,239,243,238,232,237,234,208,206,228,248,216,233,237,244,239,247,248,240,246,225,205,234,237,232,235,232,219,229,235,208,248,209,217,245,206,249,237,208,237,229,216,228,231,196,221,229,245,227,229,224,222,231,229,221,235,220,96,7,26,26,1,4,17,11,7,6,12,12,52,9,10,21,19,18,17,7,22,8,9,1,19,208,207,213,214,208,168,169,194,196,190,191,197,195,204,185,184,181,183,189,229,187,187,171,194,205,196,170,190,187,221,194,204,169,186,194,177,204,174,180,206,179,181,196,190,185,168,167,190,183,195,171,174,171,199,170,189,189,195,168,192,166,179,197,188,203,194,186,203,204,206,194,224,206,202,185,166,154,196,199,193,199,169,212,193,221,186,207,202,172,181,221,186,203,192,184,171,181,188,211,204,213,217,205,204,209,214,217,228,206,204,199,216,198,206,217,204,201,205,214,201,222,208,195,202,174,184,180,194,200,223,199,192,175,193,175,185,188,174,207,206,184,195,201,227,200,188,201,191,216,232,199,175,206,203,206,191,214,202,197,179,215,183,191,193,198,189,183,180,201,196,190,192,221,208,205,181,208,173,209,209,181,213,168,205,184,197,199,207,183,176,181,187,204,158,188,183,203,199,206,191,196,168,187,176,207,196,195,195,209,186,201,194,181,176,186,179,184,128,18,1,8,13,8,13,10,20,24,14,7,18,10,3,19,7,30,10,23,16,32,4,26,0,19,18,14,5,18,7,18,36,92,48,38,19,2,35,21,38,13,31,23,26,23,13,29,36,43,20,35,34,32,26,53,55,42,36,41,38,20,40,39,43,25,59,36,39,17,57,59,14,27,61,30,27,14,13,70,24,26,26,3,64,68,17,7,31,4,31,57,21,17,24,33,39,10,30,14,10,12,22,24,24,26,41,22,45,49,19,23,33,33,13,41,35,25,25,33,11,31,27,18,14,24,33,68,39,42,42,22,53,34,10,36,18,38,32,39,27,30,24,29,16,37,59,26,23,74,57,41,46,72,36,42,36,75,27,27,29,25,20,19,43,45,39,78,48,15,17,29,62,75,70,14,41,38,26,49,44,77,102,74,111,94,46,75,44,75,74,62,38,100,98,94,82,79,72,124,112,95,131,107,105,145,117,123,118,129,109,60,182,171,140,98,24,25,10,47,47,24,46,6,41,57,60,47,24,79,210,244,245,244,236,195,170,233,242,237,244,252,138,63,68,34,54,114,66,36,29,136,103,122,105,35,40,39,20,32,33,52,23,18,39,13,32,60,39,82,38,18,27,45,46,26,17,23,28,20,43,40,25,30,29,52,59,103,89,91,99,110,149,140,159,152,134,100,29,10,34,104,101,45,128,152,121,66,36,13,4,29,73,83,48,138,201,225,254,245,237,165,138,157,67,49,141,142,104,112,59,33,22,8,26,0,9,47,71,62,77,53,51,45,49,34,50,45,27,22,52,60,42,21,10,21,29,23,14,12,45,68,51,77,73,75,37,70,52,51,60,34,77,94,75,53,63,59,51,50,29,75,56,65,68,63,59,68,60,50,24,20,16,25,42,13,43,29,34,44,23,40,22,92,78,101,96,108,168,144,158,143,117,90,88,135,216,209,221,216,226,231,232,232,242,225,237,226,217,221,244,226,238,222,227,237,233,197,198,239,243,239,236,234,244,228,243,238,217,235,228,240,210,236,215,223,217,251,234,228,221,206,215,227,237,222,239,236,218,208,234,233,241,218,229,234,222,221,118,2,6,0,29,1,4,18,4,35,4,27,6,36,23,25,4,6,26,8,11,5,2,26,27,189,191,190,176,181,190,174,182,203,184,196,191,160,180,154,195,215,208,202,192,170,194,175,186,191,176,207,183,189,198,207,194,208,221,158,178,201,195,170,200,197,206,199,169,210,210,209,184,189,179,170,173,165,178,192,197,179,197,186,196,200,204,197,205,196,183,183,192,189,201,202,178,184,189,219,169,170,199,202,213,199,191,194,186,210,193,210,188,186,203,214,191,206,208,189,203,201,201,156,207,202,189,209,204,209,217,203,206,194,217,174,204,206,195,216,212,213,215,194,210,227,210,186,209,234,189,201,213,208,212,185,198,207,209,209,204,204,212,200,207,225,196,224,192,185,204,163,193,181,203,174,211,214,191,214,223,194,201,198,222,205,184,195,233,214,204,205,192,187,202,218,186,216,214,206,194,217,212,191,197,195,184,197,210,182,213,217,216,199,200,189,194,175,190,200,188,213,185,185,196,200,186,212,208,185,174,173,185,202,190,208,198,160,188,212,207,220,150,86,58,49,80,55,36,52,70,25,33,48,51,27,29,16,22,26,25,2,28,12,14,40,20,25,8,6,6,27,12,6,2,60,43,9,37,3,18,22,15,56,11,18,39,23,14,39,19,30,38,23,40,16,29,39,27,14,53,3,51,16,69,28,14,18,31,24,26,31,19,2,14,7,9,3,24,15,27,39,22,14,43,18,10,3,29,4,7,11,2,2,7,12,10,19,3,27,5,30,14,22,47,35,67,51,66,54,51,30,29,33,38,62,41,11,28,36,21,36,34,24,69,42,61,71,54,27,17,49,28,29,70,25,62,86,41,30,136,39,130,57,128,66,122,106,156,139,120,134,136,64,110,127,110,103,140,168,144,88,137,168,197,192,212,216,215,169,119,159,247,239,240,228,184,135,198,232,242,201,91,66,72,89,84,105,62,46,50,202,247,188,170,191,249,253,196,171,159,210,240,238,253,245,218,245,248,237,236,234,235,246,255,244,155,50,43,16,34,50,42,27,9,28,43,43,51,42,65,213,237,233,249,255,186,159,215,232,230,251,249,208,89,85,79,72,91,65,97,55,89,132,105,147,89,12,21,31,47,48,34,41,12,28,32,40,40,31,34,55,40,24,53,53,39,70,34,26,42,21,45,69,68,73,61,108,152,141,106,114,112,111,124,91,100,101,72,75,52,50,59,169,101,95,95,57,35,58,43,12,24,22,97,81,64,138,227,232,237,244,227,166,132,154,66,53,156,134,111,101,70,13,18,30,10,2,36,4,43,49,74,42,50,50,44,50,50,36,39,31,44,39,52,49,12,51,47,51,30,57,55,42,45,70,73,74,57,52,76,70,67,51,83,72,48,57,66,61,60,53,57,27,40,61,48,36,41,40,26,47,41,9,18,36,26,45,28,50,10,23,11,43,15,86,98,121,89,116,156,165,162,118,105,77,22,80,195,236,235,229,238,226,226,237,219,220,241,222,196,218,217,226,244,226,247,229,248,226,242,225,222,237,232,239,248,234,248,251,235,235,211,239,222,241,231,237,226,225,236,217,236,221,246,238,242,204,243,210,222,242,242,225,217,229,228,246,241,214,115,0,11,11,26,25,3,24,2,22,9,0,4,7,4,3,17,19,30,30,4,3,2,20,13,213,194,206,219,198,190,193,184,170,193,186,207,194,211,197,192,191,187,200,204,208,204,179,179,194,179,180,195,190,209,184,194,202,199,213,196,206,199,198,214,199,204,181,188,201,187,186,190,197,188,186,187,187,201,208,193,219,185,214,181,182,211,206,183,193,189,184,205,200,206,221,216,217,221,197,198,224,192,183,219,182,197,213,207,196,177,240,180,204,223,220,168,205,196,193,215,204,179,201,206,185,222,210,199,220,199,224,204,193,212,225,189,209,182,215,210,217,199,203,189,195,173,186,216,191,212,205,202,201,213,213,171,206,216,225,181,170,208,210,196,235,201,200,196,211,196,230,193,205,178,209,199,182,192,224,235,188,199,193,184,195,214,204,194,212,198,190,191,177,209,208,200,196,215,210,197,191,201,204,210,202,214,197,182,197,190,181,212,210,189,193,199,177,195,183,204,198,209,219,179,193,181,191,197,202,201,214,196,189,182,209,189,193,180,209,216,192,214,190,228,247,210,236,222,221,201,214,226,243,219,239,207,231,212,218,241,207,216,218,208,212,194,189,210,177,174,193,197,154,135,161,175,174,175,187,205,183,181,166,175,182,163,183,147,172,174,155,167,184,172,160,161,172,179,189,186,203,163,175,197,180,186,176,201,194,219,194,239,216,213,221,222,233,227,232,247,238,219,235,226,215,175,214,221,234,252,239,217,230,206,210,223,203,205,195,230,228,240,251,234,130,126,237,238,180,58,64,126,214,248,240,182,157,199,210,241,247,234,243,254,238,253,231,159,74,61,64,14,53,47,53,207,239,236,236,252,222,246,248,252,243,251,246,255,244,233,246,223,131,119,157,171,113,122,237,210,214,236,252,255,250,250,252,253,163,137,231,244,253,243,240,225,248,251,251,254,224,127,63,51,31,74,74,52,39,67,220,236,251,219,202,223,252,227,136,173,231,240,247,248,247,210,239,255,243,243,245,236,240,253,191,81,45,11,9,40,53,44,7,30,40,68,69,79,7,105,238,243,247,246,165,174,212,249,248,246,234,249,135,25,65,61,60,70,81,101,51,101,138,115,160,80,15,26,33,47,40,28,38,13,24,15,15,13,73,46,61,69,78,59,84,52,97,62,112,141,161,117,129,150,158,166,193,154,151,143,93,108,122,66,36,52,45,52,47,58,72,120,125,75,94,52,53,57,40,29,32,39,99,134,86,54,110,222,252,244,248,255,150,164,147,53,89,157,99,127,74,34,19,36,27,22,25,3,22,12,2,14,17,49,61,42,69,52,30,43,48,45,63,38,61,89,61,74,78,71,68,94,66,60,87,100,61,73,73,51,30,50,42,62,53,81,51,53,60,38,56,55,29,5,47,31,21,18,26,27,18,13,26,17,20,26,35,22,58,28,45,43,39,43,79,91,113,95,97,129,127,134,79,80,146,179,203,246,245,240,240,230,241,240,241,241,230,232,255,242,232,253,230,240,217,233,209,246,235,247,237,238,236,251,241,233,245,242,237,238,238,234,241,230,240,237,234,224,230,245,228,222,226,225,241,235,245,229,221,241,236,245,242,225,232,228,222,244,232,94,31,7,26,33,4,17,14,19,30,11,13,0,13,0,7,10,22,3,27,15,36,10,19,28,177,190,184,179,190,166,179,179,193,183,182,195,215,200,182,191,197,206,205,186,203,199,185,183,178,173,181,175,196,195,202,173,167,182,210,189,213,185,185,190,194,184,200,198,185,201,216,187,188,180,215,199,206,191,185,182,208,200,182,203,203,192,193,189,185,188,205,178,187,179,178,185,200,187,195,175,185,198,207,181,179,186,195,204,215,200,196,194,204,220,219,212,193,189,201,220,197,240,224,217,204,199,196,206,207,206,211,179,219,195,195,212,218,186,194,224,200,210,178,203,195,201,183,209,206,190,205,199,193,212,212,206,185,201,206,198,199,185,205,199,214,205,181,229,207,220,192,185,213,215,208,191,183,184,191,183,190,211,218,218,187,219,188,198,199,195,193,208,196,196,214,211,201,182,166,191,192,194,201,193,177,191,203,192,178,192,204,207,189,186,189,212,180,171,209,190,172,209,178,196,204,171,201,206,192,226,203,196,203,180,183,189,211,172,199,165,177,205,196,235,246,241,252,254,254,227,246,249,249,229,236,236,253,242,255,249,247,250,249,255,240,246,244,245,242,254,244,242,255,211,248,251,254,234,233,238,216,251,249,253,248,250,235,242,233,244,254,232,223,255,246,251,229,254,238,233,246,252,241,247,239,249,248,255,244,255,247,234,253,252,254,254,253,243,238,252,249,255,252,255,255,232,226,245,255,253,234,235,253,253,225,255,238,243,249,237,255,238,249,252,134,133,224,247,221,187,203,251,255,248,242,253,235,230,252,255,249,246,252,239,230,248,238,184,90,81,65,10,16,38,70,219,227,237,255,245,250,237,236,239,252,241,242,221,241,249,255,226,91,64,156,174,70,44,226,242,217,243,234,246,226,193,185,175,136,133,239,246,242,224,242,180,200,248,252,245,177,76,58,89,38,78,54,40,48,31,210,235,202,226,137,149,244,185,155,218,242,221,240,219,213,172,241,227,221,232,239,238,248,215,86,14,6,1,39,59,35,9,6,48,55,56,58,39,81,130,233,223,250,155,155,222,244,254,232,238,241,174,82,51,88,66,53,67,91,59,47,112,111,111,133,38,23,23,18,44,35,36,31,13,23,33,49,8,6,44,36,29,71,61,62,91,142,180,157,155,182,155,163,129,135,183,170,128,100,76,59,74,60,46,47,41,12,20,29,48,61,61,50,60,55,49,76,79,15,34,35,92,169,116,59,47,65,188,240,241,229,192,103,106,84,37,35,101,62,62,67,59,6,4,15,22,28,12,50,37,2,13,41,3,23,38,24,41,18,37,24,24,23,30,33,12,15,43,50,46,44,46,56,83,55,64,64,69,75,51,70,58,67,61,59,61,52,47,48,44,33,15,15,15,0,36,16,4,17,23,24,0,24,43,36,16,31,30,34,33,25,50,49,55,101,97,102,84,56,94,72,71,30,112,228,246,230,235,246,235,238,233,246,240,234,220,246,249,230,223,242,224,233,238,239,232,224,221,211,219,234,226,234,240,208,240,210,231,234,241,230,234,243,239,243,209,243,251,224,226,242,238,240,210,243,230,235,237,238,224,231,215,222,213,218,224,229,206,217,128,9,10,1,4,20,3,43,16,16,36,11,16,9,4,11,0,11,27,6,7,13,25,33,29,205,200,181,197,183,187,203,180,199,201,178,197,196,205,216,219,177,180,186,182,196,184,207,173,176,175,175,181,181,202,170,202,174,174,184,177,197,185,203,180,198,194,175,214,196,186,174,195,190,167,189,199,169,216,190,211,164,196,184,193,196,197,208,182,202,178,174,179,182,193,173,190,206,185,183,180,196,157,191,200,206,213,189,190,208,216,195,206,211,170,196,213,210,193,189,202,213,202,206,192,201,224,197,212,196,217,204,190,200,208,205,209,202,195,197,203,207,203,191,198,174,189,210,216,199,170,212,214,188,199,189,215,207,215,208,186,176,209,196,185,216,214,182,203,194,191,194,208,192,209,204,188,205,196,192,161,215,208,207,191,201,204,189,213,191,189,187,194,201,183,175,199,200,199,211,205,196,196,214,176,187,211,178,185,190,180,202,213,188,181,195,180,208,177,174,169,193,186,189,195,169,167,182,195,180,203,177,209,193,188,206,177,184,201,183,175,180,211,183,218,205,208,215,220,196,218,224,204,189,217,229,205,227,209,238,204,229,238,228,231,228,216,216,223,227,219,233,208,233,221,227,236,243,239,245,233,237,235,250,241,227,247,231,250,243,238,253,221,242,231,253,248,229,246,224,250,238,255,233,235,255,254,254,243,234,255,248,220,253,252,242,231,254,234,232,249,229,236,241,235,242,221,243,234,249,224,236,255,253,253,254,248,244,246,245,240,228,144,212,189,54,82,199,248,254,254,251,242,255,250,229,213,206,223,214,203,200,185,216,190,155,183,188,101,43,78,60,44,52,33,24,167,184,212,215,213,237,219,239,252,255,243,228,244,233,229,237,221,197,188,220,231,162,105,207,180,175,205,196,176,150,213,190,200,124,193,249,236,241,227,242,159,164,233,226,255,168,96,88,87,56,50,87,41,50,70,232,242,171,147,97,169,202,122,152,189,235,224,207,246,183,159,199,185,218,222,248,251,229,85,11,7,11,17,36,54,34,14,37,72,44,58,39,91,185,227,240,242,142,122,223,246,251,253,240,249,145,150,149,142,115,63,46,72,92,100,66,123,143,145,94,24,25,28,28,3,30,36,38,16,24,31,13,24,19,55,19,39,63,45,42,78,151,174,176,183,163,163,124,96,103,129,137,78,101,62,41,61,26,50,17,16,19,16,10,25,53,55,41,29,13,55,41,50,33,53,136,197,172,109,61,28,23,126,245,235,177,100,56,20,34,45,32,22,47,63,43,34,29,17,4,12,15,30,34,17,36,10,26,19,32,27,9,23,14,21,26,49,29,5,27,22,25,44,30,61,59,44,60,83,55,52,30,71,95,47,31,37,70,89,59,49,35,17,15,4,24,22,24,23,15,14,24,3,22,20,6,14,13,26,31,66,32,35,39,69,45,43,41,72,101,98,118,103,67,24,72,65,31,89,227,218,246,242,244,240,229,239,225,229,209,243,246,245,225,234,234,231,222,232,232,230,244,225,246,221,248,236,239,226,218,209,248,239,254,229,226,238,220,220,233,192,203,206,246,241,236,244,202,223,234,223,223,243,214,230,205,245,222,221,209,234,233,235,223,113,3,2,10,17,12,12,9,10,35,17,15,41,0,5,2,19,0,14,8,20,1,10,0,32,194,166,200,172,193,181,194,182,181,204,188,206,189,223,197,191,199,163,190,179,188,178,184,187,191,194,171,217,195,195,189,191,196,195,175,190,178,207,191,174,197,190,181,188,202,204,177,195,178,208,200,205,219,188,191,199,195,193,199,192,206,178,223,185,199,215,224,190,178,188,180,196,209,202,188,201,204,176,195,199,173,192,194,175,164,228,205,198,216,223,180,186,216,195,205,194,215,206,184,199,207,201,202,197,200,204,232,219,192,215,217,206,205,195,201,202,203,205,204,218,212,210,223,209,192,195,192,199,200,209,197,184,167,192,192,225,191,193,200,185,196,182,174,195,216,207,195,195,179,208,203,212,209,210,205,210,207,203,200,208,207,210,209,200,174,184,220,200,207,177,192,211,195,187,184,208,177,199,196,200,194,178,207,214,198,201,183,187,195,194,202,207,173,173,179,185,180,200,214,209,192,189,193,206,175,203,171,194,193,201,185,164,193,199,194,179,189,205,175,197,214,205,187,187,184,189,209,194,187,204,189,187,194,193,187,197,178,190,211,204,197,193,207,202,171,205,207,201,211,183,202,188,207,202,182,218,216,215,208,207,224,200,218,228,218,227,202,216,229,210,208,237,192,233,208,199,204,230,214,215,241,214,238,235,225,212,211,227,208,218,224,226,200,244,233,221,222,196,218,202,232,196,216,232,214,229,203,239,232,219,211,194,220,240,202,219,171,92,159,185,86,70,151,227,177,184,161,148,166,165,177,120,125,133,112,91,106,116,135,110,85,96,114,107,107,165,116,170,154,118,85,65,20,25,57,27,38,43,71,59,123,165,174,141,138,118,141,206,201,247,251,224,244,124,125,122,61,108,161,176,190,232,209,215,133,197,218,233,239,217,248,162,162,227,221,252,156,104,63,66,50,56,101,66,36,56,228,241,178,138,107,180,167,61,101,168,198,193,219,214,146,193,202,180,213,233,241,235,154,39,20,26,8,37,39,43,23,9,69,50,61,33,56,211,248,196,242,157,145,222,250,247,249,242,227,143,165,234,201,147,115,78,32,26,56,34,54,127,89,122,97,32,36,30,42,68,56,29,16,17,42,29,13,16,33,14,23,51,32,45,61,47,92,100,135,207,129,97,67,72,89,114,130,80,114,86,58,63,8,38,15,34,18,30,15,9,25,14,22,30,40,9,27,14,14,127,189,184,131,51,61,22,28,80,175,112,125,44,26,93,62,27,45,92,90,61,55,66,42,12,37,18,29,33,23,15,38,25,17,24,30,31,19,23,14,28,26,40,31,11,10,0,34,48,36,49,30,53,67,63,21,34,32,67,79,69,22,10,23,35,33,20,16,28,26,15,9,36,50,22,20,13,21,20,17,44,32,42,29,18,39,62,82,42,51,61,47,17,51,81,73,115,110,111,73,51,91,89,36,59,195,212,216,249,243,243,222,226,229,234,252,217,216,237,236,240,244,223,243,244,250,226,232,220,234,220,244,225,243,233,216,240,244,238,252,240,242,237,231,230,235,233,246,226,217,226,235,233,224,227,247,241,240,238,222,234,224,232,228,246,243,223,224,226,227,115,30,14,11,34,15,15,2,15,28,19,16,31,25,8,0,23,31,20,5,24,18,23,17,18,193,193,184,212,197,167,191,186,152,187,193,181,170,174,209,192,186,195,174,174,199,188,201,195,193,206,181,186,207,211,202,173,203,197,219,177,196,195,218,174,201,182,192,198,192,188,162,196,206,174,207,174,193,195,187,203,202,178,182,204,198,183,193,168,189,215,193,199,207,201,190,197,185,184,205,209,187,199,176,207,215,207,223,191,202,203,191,199,205,191,191,183,189,202,211,190,183,182,207,218,197,204,205,188,214,208,206,189,215,193,204,189,212,188,199,191,188,187,180,207,204,222,158,201,184,189,188,202,220,179,193,218,198,182,209,197,199,210,209,182,194,204,196,201,219,202,196,200,195,211,207,205,188,190,182,216,201,168,191,191,208,214,196,196,193,212,172,204,191,195,189,193,177,191,186,222,188,204,209,190,180,179,185,191,199,188,183,192,199,171,206,197,202,186,189,201,209,176,198,193,180,172,207,202,172,201,192,193,182,184,205,182,180,198,174,213,190,167,190,201,211,215,179,180,191,192,198,205,186,188,195,198,209,192,198,193,196,194,207,185,195,206,176,206,195,192,188,212,196,183,181,194,214,210,199,205,197,194,200,226,225,199,197,206,198,221,198,211,171,203,210,201,197,196,206,196,223,209,195,199,213,207,217,221,205,231,213,218,222,212,224,202,222,204,219,200,213,231,221,224,225,186,193,238,195,247,216,201,207,207,223,209,204,190,209,238,181,107,171,155,104,86,116,119,99,103,76,95,69,81,94,93,101,100,104,133,83,143,174,106,115,137,168,147,161,200,198,192,194,183,130,59,44,22,25,17,22,28,0,1,27,21,23,11,12,18,49,73,69,145,192,182,202,110,120,127,110,170,236,202,255,206,183,168,109,223,221,249,228,213,221,163,162,202,206,252,164,102,60,72,62,34,108,73,34,55,225,234,135,192,194,228,129,107,172,168,245,211,230,208,159,209,195,166,222,253,229,198,52,19,19,2,24,56,28,30,24,52,57,71,23,35,177,239,210,214,173,112,181,250,251,255,250,245,139,138,215,245,244,161,71,72,90,31,43,24,67,99,103,105,68,11,46,42,25,45,23,14,18,32,8,0,25,34,17,47,13,25,6,24,31,48,44,19,108,179,113,82,80,89,85,105,54,36,54,57,58,45,12,29,28,28,44,24,9,11,25,7,17,23,50,37,33,18,25,81,147,98,64,68,25,19,25,30,61,92,49,8,60,140,92,107,211,241,181,133,129,112,55,24,14,45,6,22,4,49,31,22,23,26,27,22,41,24,40,25,15,54,33,47,38,34,36,17,26,24,48,26,57,33,55,20,20,72,77,56,27,19,34,34,19,18,15,20,39,25,54,8,7,31,28,29,38,24,53,133,83,48,43,30,74,85,63,87,59,67,54,18,41,99,107,80,94,100,58,68,66,70,50,68,175,237,249,255,227,231,243,238,239,245,225,230,241,251,252,231,230,234,233,223,226,228,252,242,247,236,229,218,243,230,241,235,240,239,237,241,240,222,219,235,242,242,248,233,236,233,225,236,217,228,233,244,217,250,224,215,217,231,228,240,242,219,220,230,238,119,0,23,21,2,7,17,18,7,1,5,26,19,10,4,8,14,24,12,19,2,7,34,32,9,177,173,191,184,201,204,180,186,192,195,176,193,190,178,204,167,163,204,156,194,218,186,195,196,174,183,179,180,193,203,193,176,194,195,192,168,189,188,205,197,202,186,183,193,187,220,209,210,193,216,195,197,187,192,181,188,187,173,199,190,189,208,204,180,173,213,193,205,210,188,214,198,199,196,203,209,187,216,179,177,210,202,198,206,185,208,192,196,172,201,199,194,191,203,202,201,189,188,194,193,222,188,205,185,228,219,196,215,204,216,216,197,188,222,182,201,194,199,198,177,207,198,191,207,185,204,181,204,199,216,195,205,186,209,205,175,208,180,213,198,213,180,215,205,193,189,184,195,178,187,193,210,216,196,169,210,158,211,212,185,171,200,190,176,193,200,202,184,194,197,183,199,187,186,198,183,170,191,205,179,204,184,211,206,189,192,221,191,214,197,178,191,183,182,200,185,206,194,208,167,188,178,184,193,174,207,198,224,191,169,181,166,177,188,186,196,202,191,178,173,206,192,190,218,185,192,215,193,205,208,200,179,214,182,194,203,187,215,208,180,210,206,212,189,173,199,171,205,225,211,202,219,211,212,214,223,197,197,197,199,233,182,205,194,207,202,195,210,216,194,198,234,217,205,199,215,197,185,202,200,226,192,213,213,185,217,194,227,217,190,221,226,200,223,205,212,201,198,243,198,204,186,203,240,229,219,218,204,211,207,198,212,206,211,242,233,204,140,125,127,82,88,99,94,86,112,105,127,97,72,77,65,82,149,157,206,196,192,204,152,76,137,241,199,159,192,178,125,154,149,98,80,157,217,206,205,181,129,128,141,64,11,7,25,107,77,32,39,22,36,36,32,49,40,106,144,115,244,255,222,254,169,164,154,113,216,240,243,234,193,237,188,180,220,213,255,156,87,73,48,60,22,104,83,42,38,191,187,138,210,206,196,97,143,215,191,249,208,210,173,147,210,190,179,231,248,193,71,3,31,9,12,41,26,27,23,41,57,81,32,24,167,247,245,230,158,145,171,242,246,237,248,245,194,136,217,242,239,193,101,105,109,118,45,38,16,111,135,80,142,56,25,59,51,25,53,34,11,16,19,16,19,28,12,31,24,12,11,48,27,59,24,54,64,141,121,54,70,61,66,55,58,49,42,40,77,34,28,63,27,63,15,27,30,7,30,43,20,14,2,25,16,28,28,39,79,85,45,61,58,15,21,3,5,59,119,59,67,124,155,74,91,183,223,194,90,155,113,71,27,43,45,38,42,11,43,20,35,39,29,20,19,33,57,10,8,45,73,69,57,95,42,45,28,30,49,28,37,49,37,18,19,53,57,82,80,73,51,55,31,26,12,27,28,16,35,5,31,12,14,37,6,15,72,198,172,124,101,60,49,66,73,93,66,77,50,55,45,38,78,103,106,111,112,85,58,78,99,35,41,200,227,215,238,236,238,220,206,245,246,243,222,242,235,251,244,224,232,223,236,248,236,230,251,221,230,217,223,240,225,237,235,232,219,244,230,243,226,238,222,214,225,224,208,240,234,248,219,221,227,196,246,231,234,218,227,249,221,242,217,226,226,241,247,229,111,0,1,4,8,0,11,35,11,1,14,6,7,6,1,2,16,4,9,12,9,56,16,21,20,189,185,174,175,208,197,186,201,207,173,196,185,202,163,196,188,172,177,183,194,192,198,194,205,210,202,181,183,183,173,191,207,176,195,174,202,164,209,203,177,195,195,193,228,180,170,197,209,205,190,208,197,183,198,180,183,215,209,181,196,192,192,198,203,204,202,179,178,210,226,200,192,178,191,182,208,187,188,204,187,180,196,181,189,204,197,218,175,187,189,199,203,189,208,208,168,211,205,216,206,212,215,213,201,219,200,207,191,204,194,209,218,219,210,200,212,215,178,179,230,176,178,221,203,205,193,179,212,187,201,189,208,186,205,176,195,195,198,198,176,186,205,187,169,200,215,212,210,211,197,164,203,188,189,193,183,204,204,178,192,200,181,218,164,203,190,200,204,201,193,207,190,173,200,185,195,186,205,182,184,175,191,210,182,198,183,174,186,185,195,194,182,162,177,186,192,179,209,195,173,196,189,191,217,202,198,213,210,187,197,183,201,187,207,190,206,183,200,197,206,196,193,198,177,187,206,187,200,165,208,216,213,187,202,200,211,200,229,195,201,202,194,191,195,204,202,202,201,207,185,198,223,218,225,211,197,199,214,218,214,229,191,180,206,216,185,212,204,206,212,223,193,217,186,203,219,219,211,207,198,219,206,219,205,222,200,218,205,206,221,213,210,219,209,205,210,205,204,223,211,233,201,198,246,212,200,204,207,210,223,217,207,207,212,233,185,135,96,95,75,70,59,73,47,71,147,211,253,204,94,184,187,219,250,254,239,239,213,221,148,88,129,198,173,103,138,108,125,122,93,82,100,229,252,237,238,236,244,208,194,108,108,118,232,232,216,179,94,54,71,50,35,20,25,44,30,61,166,233,225,178,120,176,154,161,229,232,238,225,180,240,188,155,210,195,251,153,79,51,47,70,47,115,134,69,51,121,163,157,246,206,111,80,142,211,205,252,214,234,147,162,223,196,191,238,240,109,7,17,24,16,32,42,33,29,34,62,77,28,29,138,230,253,249,172,111,206,220,250,246,236,238,192,143,204,246,212,246,115,104,107,92,82,30,89,103,128,153,133,139,74,9,19,10,44,40,25,26,15,41,9,13,12,23,15,32,31,20,38,18,54,25,66,80,112,77,66,36,62,42,29,36,24,33,33,26,14,40,37,67,41,47,21,42,22,39,19,7,38,29,34,17,32,12,74,103,78,34,59,11,32,9,19,31,58,137,115,108,140,140,80,33,52,63,77,43,60,77,77,36,72,50,66,66,32,44,9,26,28,26,32,30,48,43,15,45,38,68,59,20,48,32,38,43,71,45,32,48,50,93,72,67,71,106,87,82,81,88,72,75,43,44,29,58,24,36,34,35,11,18,39,72,86,116,146,109,94,146,132,97,73,68,82,75,50,22,47,11,54,94,81,74,103,94,76,83,66,83,49,19,214,233,252,246,238,242,234,212,251,245,222,227,227,238,248,239,228,238,241,222,246,239,238,236,224,247,210,229,225,220,244,242,238,241,245,234,239,246,231,226,226,217,217,241,212,203,242,227,229,244,216,233,249,240,240,222,208,230,248,218,240,239,232,222,230,111,0,5,3,0,24,8,37,11,6,6,11,8,4,5,6,2,6,9,14,31,6,17,30,0,193,189,194,189,192,195,177,187,187,205,206,164,199,209,186,180,201,182,204,196,173,181,189,190,178,202,191,184,192,185,222,187,191,187,204,178,188,203,203,191,170,175,178,193,207,208,187,177,167,188,197,203,191,205,204,199,167,198,217,179,190,203,194,207,184,206,197,189,196,214,193,178,195,198,197,192,188,194,191,209,216,195,185,216,211,192,196,189,181,197,219,199,175,191,188,211,196,208,205,214,195,219,217,189,203,202,184,187,194,208,215,221,210,221,194,199,223,197,189,188,185,196,197,186,192,213,203,188,210,189,201,194,206,181,190,209,198,196,215,189,174,196,195,182,194,206,182,188,167,187,174,193,192,203,181,196,190,185,190,174,198,198,181,191,196,188,198,201,196,224,166,181,183,219,186,192,196,196,203,195,197,213,192,205,167,175,190,192,192,194,208,197,158,190,180,227,187,194,182,194,191,200,163,186,184,210,210,213,173,211,200,198,207,183,197,208,186,179,190,198,182,185,202,181,172,192,205,199,193,194,212,192,220,219,202,204,216,208,199,196,194,180,209,192,225,197,205,207,202,203,200,198,180,220,206,194,203,233,190,214,205,217,227,190,199,207,188,222,186,198,234,213,231,222,194,206,209,207,195,220,213,201,210,220,210,202,223,205,218,234,218,208,221,225,214,217,205,226,224,209,233,223,228,251,227,248,226,201,216,193,214,221,203,181,229,183,96,83,65,56,45,34,35,35,76,134,201,222,204,197,248,255,238,230,247,239,184,107,123,125,59,58,77,69,77,62,50,36,29,63,41,55,78,100,111,112,156,181,137,88,62,66,90,211,243,226,175,151,143,107,112,130,162,107,54,50,39,67,143,214,194,170,202,152,159,231,227,230,227,221,235,185,180,204,238,252,159,93,87,69,62,30,90,118,93,86,133,122,167,239,179,140,124,221,211,217,229,207,211,176,193,226,204,233,229,138,10,27,26,28,42,54,10,17,35,43,58,43,8,103,242,252,255,219,88,138,250,247,239,227,243,167,126,194,243,238,250,176,67,110,72,32,17,20,51,103,122,138,122,151,62,37,46,35,40,24,60,46,31,25,26,16,32,10,41,13,42,46,16,38,54,22,49,110,76,83,30,78,53,17,34,32,26,16,4,28,30,53,52,38,46,52,58,10,2,32,27,37,27,13,32,38,29,29,86,115,85,40,32,10,25,53,44,50,71,124,161,110,61,109,104,74,51,19,31,71,43,63,65,70,97,94,66,55,41,32,38,24,4,13,41,1,23,32,21,36,58,55,46,28,25,35,46,37,93,71,68,96,87,128,138,102,87,48,61,57,72,68,81,82,108,118,84,96,80,71,41,37,30,53,65,62,131,84,40,44,56,45,67,29,63,33,70,38,57,53,31,33,44,88,89,71,93,90,95,92,74,71,20,46,216,242,250,250,237,211,241,230,238,239,226,239,219,228,231,223,224,238,237,218,246,231,250,224,230,209,223,236,231,207,249,209,247,219,219,227,211,212,241,247,222,213,234,232,201,234,225,220,226,248,232,220,230,244,241,237,226,231,231,206,248,232,249,225,220,119,1,0,2,27,32,13,18,38,17,28,10,1,2,7,5,10,35,6,1,0,2,10,7,17,196,179,180,202,194,190,178,186,198,168,198,184,192,181,181,181,161,193,215,206,180,175,185,200,184,166,176,175,202,195,209,194,212,179,185,175,195,191,221,194,171,175,202,207,186,191,203,189,205,201,200,210,196,182,188,198,205,177,198,196,172,190,208,188,202,196,183,206,200,195,188,201,197,186,206,210,181,205,183,200,176,184,190,176,195,181,184,223,201,196,184,206,208,178,205,191,222,206,204,210,200,204,201,204,206,230,222,214,185,192,205,178,189,190,194,200,182,192,197,199,206,178,220,183,198,169,178,211,200,184,209,194,182,193,181,199,211,188,183,179,182,199,198,190,173,192,196,208,205,213,200,195,189,212,190,178,220,153,184,193,206,203,183,193,213,219,194,187,187,199,219,195,204,203,217,192,165,197,187,191,188,191,174,153,172,179,195,171,172,192,179,165,210,202,198,203,200,202,172,210,200,209,193,189,203,191,192,186,183,201,185,190,166,180,190,217,202,176,167,173,182,188,212,187,206,189,192,208,195,203,167,191,206,159,179,184,192,213,200,194,186,211,204,178,211,183,207,220,185,202,211,219,201,195,223,218,208,201,220,208,205,202,184,199,190,207,181,206,200,233,219,211,194,213,204,209,211,225,201,244,217,228,217,218,217,222,196,215,222,212,193,210,228,191,211,228,222,199,199,200,232,163,138,222,229,225,214,215,232,205,205,197,230,200,231,152,46,55,11,62,111,84,106,97,70,79,65,86,71,66,136,137,140,142,119,57,14,6,18,33,45,34,0,16,39,23,39,12,12,19,20,19,38,10,34,24,15,25,48,33,39,31,86,81,66,90,89,105,132,124,122,142,186,152,117,84,57,50,171,222,190,215,191,143,195,225,230,233,214,206,228,208,177,203,207,255,148,52,60,53,81,13,63,47,40,87,102,124,157,233,192,135,179,216,218,209,226,203,206,161,188,205,221,200,132,37,27,26,23,34,51,35,18,28,71,47,72,33,83,233,244,250,230,144,141,206,215,245,248,237,177,137,174,236,236,246,172,88,106,109,15,58,108,119,46,89,110,104,132,119,38,15,29,39,25,27,48,28,33,4,26,10,32,11,33,59,15,13,35,68,36,61,149,179,157,97,54,70,79,76,42,6,17,21,27,11,19,28,37,77,97,82,57,43,62,24,48,17,29,31,50,24,35,69,84,83,77,32,31,25,30,78,63,59,75,78,160,129,106,92,95,71,91,86,62,29,61,83,68,33,83,90,57,47,45,40,33,12,18,6,19,14,31,33,44,39,41,60,12,10,20,26,58,69,85,106,109,105,59,97,72,48,44,33,44,49,30,76,72,95,70,90,139,105,125,108,82,59,71,127,135,107,66,53,23,28,29,40,25,34,37,44,38,59,37,52,25,35,59,69,99,86,96,96,101,63,24,53,58,137,255,244,240,251,221,243,245,226,243,228,233,227,220,237,250,234,255,213,237,246,228,243,232,234,251,230,247,241,230,248,223,240,254,227,244,215,238,222,226,230,243,228,247,243,221,220,241,232,245,232,205,237,227,227,231,211,213,240,239,249,245,240,227,219,249,89,13,1,18,4,10,2,20,14,21,0,4,3,0,18,8,2,22,12,16,19,4,25,25,8,195,176,199,181,167,165,193,168,179,188,182,195,200,171,173,184,211,183,184,190,165,200,153,152,216,166,195,169,199,198,172,186,187,192,185,185,179,194,201,190,199,203,190,191,189,194,201,182,164,227,193,216,212,188,188,176,190,195,199,206,191,196,185,204,203,227,166,185,185,219,180,184,200,209,183,198,201,187,179,188,202,182,193,202,182,201,194,196,203,215,199,214,187,194,193,197,207,216,213,205,196,189,201,193,202,193,190,190,195,197,208,207,210,225,190,177,206,186,195,209,188,205,189,197,189,176,188,209,203,190,188,172,168,189,191,184,184,192,205,193,211,187,176,181,204,196,159,177,205,195,195,175,200,189,206,205,189,197,179,196,207,174,193,180,185,190,203,217,208,188,190,208,196,216,196,198,185,199,195,160,175,218,192,186,184,192,189,193,199,179,206,180,216,196,196,195,198,181,205,179,195,212,185,182,177,190,180,189,192,203,166,191,199,182,204,194,185,181,194,170,174,199,189,231,219,186,183,170,206,177,182,202,221,185,178,171,176,194,211,187,200,211,197,183,185,192,199,187,202,206,189,223,187,191,201,193,191,228,192,208,193,200,191,240,199,215,194,198,218,241,219,224,213,215,199,223,206,192,218,210,208,219,230,226,200,223,202,217,217,204,230,190,229,203,204,220,204,210,206,209,205,97,34,146,217,200,217,206,212,212,205,206,215,210,191,157,50,48,63,101,98,44,79,70,65,52,54,34,26,42,34,8,29,3,60,50,10,28,27,8,30,3,7,20,16,18,11,21,42,46,19,32,39,16,9,31,15,24,15,41,16,48,50,73,50,60,54,52,88,42,88,65,65,87,81,71,26,50,186,247,182,203,180,143,185,232,233,216,234,201,235,202,162,206,213,254,137,78,103,72,46,20,24,65,21,34,96,133,223,238,172,61,142,202,174,229,255,221,179,150,193,222,223,153,67,0,5,19,29,43,23,13,23,83,66,36,13,68,205,238,251,227,127,178,195,232,233,242,239,185,123,177,207,239,251,189,95,73,108,126,24,89,128,61,86,89,98,128,144,99,25,30,51,37,58,4,31,34,19,31,45,24,15,20,34,11,37,58,70,36,68,171,190,163,153,122,122,106,107,57,6,41,9,10,34,24,32,29,45,43,67,79,56,71,32,41,3,13,15,22,47,21,36,42,51,43,16,21,37,46,33,54,38,21,60,64,106,162,113,115,96,88,103,99,64,64,73,65,83,89,55,31,41,34,38,36,32,27,19,39,14,30,44,33,15,21,46,32,28,26,33,36,54,70,55,62,35,32,21,38,38,32,39,9,41,55,27,57,56,50,69,70,45,103,119,106,125,102,120,128,84,84,67,18,38,23,32,27,22,5,43,43,72,67,44,59,55,28,31,101,119,103,84,115,111,59,19,65,110,181,248,250,245,243,236,246,213,241,217,239,217,232,234,235,246,240,239,233,247,252,237,242,236,225,232,209,219,240,222,251,231,228,241,215,218,240,221,226,239,229,238,218,224,231,222,248,235,237,236,240,231,213,213,232,243,219,221,238,237,239,235,245,249,237,242,135,8,5,37,0,1,10,11,14,8,10,0,24,14,13,9,3,30,37,6,9,12,21,8,13,186,172,191,194,190,179,208,164,169,185,183,199,170,187,178,166,178,199,193,194,218,183,190,217,181,197,214,183,197,184,179,177,172,175,199,172,180,188,182,192,190,183,172,182,196,197,190,221,204,181,184,198,188,160,165,198,156,206,209,185,178,186,195,197,175,187,211,189,199,209,197,195,191,190,206,202,184,191,209,182,199,174,206,181,192,199,189,194,188,187,200,182,191,161,176,211,208,168,213,203,203,201,186,207,191,194,197,226,218,232,199,208,219,215,209,196,196,183,198,187,208,188,184,195,216,173,219,189,214,174,190,205,174,211,208,194,147,193,181,189,213,214,182,197,201,193,198,181,181,204,214,185,196,198,204,178,198,193,193,211,173,185,203,188,202,196,188,179,189,173,184,174,185,211,194,190,164,200,199,171,201,196,182,176,200,179,200,179,214,163,186,176,192,203,186,183,174,185,175,174,216,196,170,164,204,178,176,182,196,193,185,191,176,186,173,208,188,225,173,189,193,187,178,176,182,190,194,202,165,198,191,195,225,213,207,209,178,216,187,186,189,204,202,192,186,212,216,203,202,216,207,189,194,197,201,209,208,201,225,218,188,215,195,225,214,210,177,199,213,221,204,213,216,206,210,208,205,220,228,204,237,228,194,200,242,207,225,206,184,213,222,231,227,222,206,211,214,206,224,214,185,75,28,89,193,205,212,197,203,201,219,198,199,211,211,183,124,75,44,87,32,49,52,49,54,75,56,37,29,23,34,44,60,19,72,45,23,15,12,36,10,19,50,20,10,12,26,18,13,35,26,13,10,39,10,18,26,30,19,14,14,32,24,63,87,82,37,48,62,61,75,25,48,48,63,69,15,76,197,200,138,214,174,170,211,209,248,218,239,200,216,237,167,190,238,250,173,71,81,59,41,11,106,84,31,41,132,209,248,253,146,70,170,211,180,218,238,215,130,158,230,225,203,74,23,11,22,4,42,42,28,31,24,74,71,57,45,160,239,244,224,158,158,235,183,224,255,225,188,135,160,229,252,217,246,137,66,102,95,95,38,63,82,32,87,82,82,103,144,76,26,34,35,28,44,48,25,29,26,10,27,25,39,22,17,33,107,132,148,118,156,205,171,145,129,128,135,90,67,37,33,51,22,33,30,25,43,39,75,64,75,93,94,86,29,25,12,24,44,50,29,37,13,11,41,3,29,26,16,26,17,20,21,52,70,50,65,92,65,126,126,130,84,111,116,120,82,64,79,58,56,55,50,38,18,52,29,29,36,28,28,33,23,17,53,17,15,71,67,45,45,40,14,24,24,25,2,13,40,26,23,18,33,40,27,41,53,49,33,64,18,51,47,84,60,88,63,66,89,67,76,31,42,47,16,17,32,28,33,37,70,84,77,79,63,66,50,52,51,92,132,97,69,101,109,76,56,116,107,90,147,227,223,246,249,255,229,244,247,230,228,223,247,212,238,222,237,218,237,231,251,214,237,236,207,237,231,227,212,228,236,234,220,246,197,229,242,235,237,237,222,214,230,244,214,216,235,249,238,244,246,245,206,227,243,231,230,220,240,237,215,239,240,238,225,134,4,6,1,9,30,19,12,15,2,20,3,39,8,14,9,18,27,21,7,9,19,20,0,29,192,192,172,189,172,182,174,195,187,165,191,170,180,177,170,186,170,192,171,195,212,170,201,158,202,172,179,216,186,191,170,167,201,170,180,175,187,217,195,209,212,193,177,169,173,166,200,199,179,191,206,204,204,191,186,191,186,211,184,205,197,185,194,197,185,194,163,205,189,184,205,181,212,185,203,217,200,181,195,218,211,181,200,175,185,173,197,181,212,198,199,187,184,203,211,190,202,185,200,185,210,209,182,180,210,219,209,197,210,208,182,183,196,172,177,204,181,192,197,196,188,182,192,201,226,202,210,214,193,212,202,196,190,204,204,189,194,177,167,181,193,187,178,201,193,184,175,175,187,196,203,183,215,216,184,188,211,188,194,192,192,184,208,185,203,179,181,186,190,209,209,205,192,210,184,173,181,185,189,205,224,173,191,191,187,180,186,200,194,192,193,198,184,176,161,187,203,168,189,197,193,195,207,194,220,190,204,213,217,189,176,196,215,182,183,154,189,172,191,171,181,169,206,208,218,211,194,216,183,209,210,183,176,231,195,213,231,217,188,194,179,181,197,200,182,174,201,178,190,199,210,202,218,205,205,220,221,208,233,192,222,211,205,179,216,193,186,208,204,210,207,221,217,191,217,197,226,203,215,238,209,206,226,191,209,210,218,235,182,216,206,205,230,218,210,216,204,217,214,230,216,116,78,165,208,218,230,206,220,221,210,189,200,212,206,239,108,23,9,44,59,27,54,26,35,38,54,54,40,25,32,71,47,40,71,42,10,27,19,16,1,20,24,6,22,9,29,60,16,31,1,31,10,38,22,22,41,20,6,21,46,49,37,58,84,64,82,55,62,62,64,49,74,56,72,40,17,120,176,174,193,242,157,163,221,205,241,237,206,209,219,218,174,184,201,246,127,61,109,59,69,67,93,92,28,55,177,243,248,201,145,163,240,213,181,187,229,211,142,167,233,227,132,44,0,17,16,61,36,38,20,43,61,52,55,11,164,252,252,248,157,153,232,230,211,211,242,178,124,192,231,250,255,235,239,96,73,105,86,72,29,52,89,105,92,66,93,129,129,62,54,11,16,27,42,20,34,26,9,53,14,57,91,40,53,127,215,246,194,129,150,195,137,123,90,101,65,45,24,10,9,24,24,34,23,36,34,50,38,74,79,114,102,72,69,45,11,64,41,25,28,49,34,37,25,6,13,21,1,13,33,1,25,30,17,22,23,19,67,104,118,146,111,85,96,91,95,87,94,73,45,42,38,44,9,24,19,24,33,29,14,43,19,51,11,31,18,24,43,44,67,38,43,12,34,3,27,10,7,26,6,41,10,31,41,23,19,43,34,48,21,47,62,31,41,35,34,57,49,32,50,26,8,31,28,2,13,41,49,48,90,95,76,74,52,59,52,45,39,82,115,123,90,76,91,87,83,115,49,26,22,83,221,233,255,251,233,235,220,233,223,232,235,237,215,216,227,219,245,255,238,247,241,235,239,235,232,221,219,212,247,242,232,228,238,222,237,234,230,234,240,243,235,253,244,240,235,243,232,245,231,247,230,215,219,222,239,243,229,216,221,237,232,212,215,90,19,11,18,0,10,29,7,10,17,44,20,1,9,5,7,7,15,15,6,12,26,12,5,4,186,174,185,195,184,197,176,155,188,197,173,178,203,198,198,181,184,178,192,171,201,157,204,192,179,195,196,185,177,188,182,174,203,196,203,181,171,179,188,153,180,183,219,191,162,167,173,190,200,191,173,193,188,179,215,190,179,196,200,188,194,215,193,174,211,198,221,177,199,181,196,195,176,198,204,196,181,188,196,183,227,205,190,184,209,216,208,216,185,217,190,176,198,193,191,199,185,193,188,190,215,185,201,200,198,233,205,183,183,197,195,188,192,177,211,188,199,207,197,204,208,181,181,205,173,179,173,195,182,197,181,194,178,173,188,182,188,191,181,172,201,202,189,183,228,167,200,194,178,186,204,207,192,166,184,196,199,206,212,179,197,186,209,199,210,182,185,186,177,200,202,189,181,199,186,186,174,204,193,208,204,202,195,193,196,204,186,189,195,184,185,188,183,189,200,195,170,204,159,187,197,183,190,197,185,206,214,192,184,179,218,186,196,193,179,198,207,185,185,182,185,210,190,172,199,206,193,235,197,220,200,181,195,208,209,177,222,216,189,229,210,201,188,202,214,210,188,191,206,231,234,195,189,194,196,218,220,204,240,212,181,196,202,190,220,185,195,214,223,235,187,217,204,207,201,197,204,196,202,200,219,215,220,222,213,223,232,207,208,223,221,210,241,222,230,208,235,219,245,227,207,200,179,204,231,222,216,230,216,179,222,195,225,218,200,218,132,69,5,22,0,30,44,60,15,36,49,23,48,51,23,49,47,57,68,49,32,32,31,57,16,12,24,46,7,1,19,18,12,23,28,15,13,45,22,53,28,8,27,15,54,52,42,62,77,69,92,76,52,55,47,17,54,29,82,25,27,133,225,186,209,231,144,239,235,194,221,199,222,198,251,222,204,223,214,255,147,65,107,69,85,22,74,89,40,39,170,231,232,208,125,172,253,210,185,206,253,202,149,214,215,163,70,14,23,9,30,68,18,44,17,74,58,51,36,120,237,244,255,166,163,211,244,246,183,243,215,117,204,229,240,210,237,255,189,48,94,126,75,78,38,31,52,81,48,47,64,143,121,46,26,41,27,44,70,6,43,10,19,11,59,106,150,125,132,195,237,169,118,100,79,76,97,75,73,42,28,13,20,32,35,51,20,17,60,55,45,43,49,37,72,85,77,59,72,22,36,34,21,43,28,34,34,13,15,27,0,24,23,10,20,15,8,19,21,33,36,35,50,108,114,160,101,94,67,65,92,92,90,75,77,69,34,29,34,13,30,17,57,22,29,29,35,34,39,13,10,13,23,32,62,30,15,43,20,40,35,21,31,9,33,31,35,16,20,29,1,51,13,45,55,33,29,48,40,37,19,17,43,50,7,40,19,24,3,31,29,22,46,25,65,85,94,58,44,39,85,57,20,90,97,96,115,84,123,89,124,67,63,65,32,34,153,243,241,244,227,221,208,234,235,221,233,228,235,220,235,248,210,223,210,242,241,244,233,227,219,245,219,245,213,236,234,225,216,220,242,214,232,222,236,233,214,231,240,222,241,213,228,227,236,218,238,231,236,225,237,210,234,233,235,239,229,210,247,125,2,5,10,12,27,1,3,4,24,4,22,7,5,10,14,25,20,39,19,4,16,9,20,1,164,198,187,185,173,180,188,180,173,194,167,193,176,173,195,187,200,191,184,181,195,190,189,183,192,168,200,193,202,186,189,187,184,211,202,188,208,192,189,189,181,173,178,180,168,183,188,197,197,196,205,174,183,195,191,192,203,197,198,184,183,199,191,181,193,186,177,188,173,185,194,173,194,184,195,202,214,179,196,187,205,209,176,183,211,201,187,185,194,192,194,182,196,195,192,189,181,188,205,209,182,192,209,209,178,189,200,213,185,210,184,195,205,207,207,180,201,190,203,207,206,201,184,177,219,180,192,198,205,190,197,197,211,202,184,209,205,193,193,187,205,213,177,199,203,212,193,178,187,181,185,187,218,199,174,189,198,205,171,202,196,211,197,185,182,177,219,194,195,198,200,200,192,210,184,219,196,193,164,203,192,211,195,203,181,205,188,212,184,200,210,189,178,174,185,224,199,169,201,178,182,201,185,214,219,185,206,205,202,205,186,214,182,196,175,195,182,209,197,194,199,204,182,211,183,192,203,204,214,203,174,188,219,195,204,192,220,197,192,230,213,196,205,210,191,184,207,189,197,201,195,203,179,228,213,218,191,195,215,218,229,239,200,166,229,177,200,201,223,196,208,243,191,215,204,204,197,200,195,199,222,211,216,211,219,221,218,201,237,236,216,200,217,196,208,196,241,232,221,202,232,233,232,230,218,233,200,203,206,214,200,189,210,209,212,235,192,136,119,78,3,2,12,10,18,54,42,30,30,17,49,71,59,51,46,32,53,45,39,55,48,39,31,29,24,44,8,22,14,45,34,46,29,14,52,34,38,36,35,45,92,95,98,85,95,74,81,84,57,45,36,61,27,6,45,61,83,201,217,213,238,199,129,225,232,190,234,235,219,216,233,208,178,179,205,254,126,51,87,37,71,63,63,117,43,62,171,211,249,171,59,164,245,211,195,179,245,202,182,209,120,73,15,7,7,9,37,40,39,8,45,71,53,20,82,236,237,238,195,154,198,244,242,229,192,197,140,197,251,233,252,233,245,243,118,6,72,88,50,44,75,72,84,44,38,34,128,152,129,30,33,35,33,40,39,49,33,6,22,12,39,137,112,159,118,107,116,51,47,13,19,48,49,65,14,32,19,28,20,21,26,14,54,75,77,56,50,35,29,48,38,50,23,9,71,65,61,50,46,35,42,43,31,34,22,19,18,20,18,24,27,9,22,22,11,17,32,32,33,28,67,76,84,63,82,79,58,69,87,78,69,61,36,18,17,12,37,25,33,40,31,23,24,22,25,23,35,8,13,15,40,41,4,21,45,26,22,44,26,24,28,8,28,31,21,16,11,23,49,29,42,55,71,65,36,30,30,28,25,18,11,18,22,28,12,14,9,29,25,20,60,78,79,80,57,70,67,41,44,60,106,108,114,86,137,109,111,75,111,115,73,24,108,213,245,244,242,220,231,223,220,234,217,240,234,249,228,240,227,225,210,236,229,235,245,231,251,225,241,211,235,234,231,234,255,225,231,235,211,235,251,235,246,215,215,231,214,235,240,237,221,246,236,226,208,240,249,222,235,230,237,241,240,240,215,140,3,0,18,17,27,2,10,13,26,5,2,21,0,9,5,13,12,9,12,14,23,32,34,18,185,172,179,185,178,207,186,171,184,204,173,218,183,195,209,185,185,182,200,187,169,199,189,207,200,182,197,193,193,176,174,187,180,178,174,201,183,196,211,192,175,185,187,189,181,192,182,201,194,186,175,181,208,195,175,198,201,179,202,188,183,186,191,205,186,219,205,197,227,201,192,207,182,217,190,196,198,193,191,171,192,191,189,213,194,174,194,177,201,179,190,189,185,217,198,189,163,197,187,203,199,186,205,186,203,178,203,189,225,193,200,200,179,218,210,183,193,199,174,194,197,230,203,179,187,207,212,211,186,202,190,207,195,183,209,196,202,193,189,208,173,197,157,207,215,175,192,194,202,190,212,199,195,192,201,208,184,197,187,176,185,208,170,212,178,160,197,179,184,197,208,194,181,186,183,184,193,179,188,195,204,198,190,220,186,170,162,192,179,184,207,189,193,217,218,205,188,206,172,185,182,215,211,211,219,207,188,197,199,178,192,195,198,197,192,207,188,200,203,166,180,191,203,207,195,184,212,190,196,204,193,212,224,211,204,199,171,193,208,216,210,202,216,212,216,195,194,202,216,215,218,206,207,225,224,228,222,195,240,203,208,215,210,211,226,218,214,210,209,206,216,219,181,217,215,222,216,223,193,220,199,210,208,198,216,203,195,228,229,201,206,221,216,194,201,236,218,215,233,234,206,211,210,194,215,198,222,222,218,192,218,206,224,204,207,190,217,130,195,187,74,15,20,28,12,12,56,35,31,19,13,66,45,79,60,45,37,37,34,92,27,39,35,13,24,23,31,28,28,22,30,24,39,32,29,22,20,19,58,91,48,71,71,79,50,13,87,89,56,22,27,75,84,108,182,173,121,219,203,162,224,180,144,230,201,226,247,208,221,201,214,197,186,194,207,229,175,47,121,58,63,91,101,124,61,55,173,247,238,144,37,169,227,231,227,169,236,185,201,220,53,21,21,12,23,26,33,13,16,53,27,56,35,77,220,240,225,185,142,218,240,231,234,230,170,135,190,227,235,249,237,235,237,144,57,43,96,55,50,100,82,92,107,101,49,107,143,129,119,25,15,7,26,42,18,36,41,6,31,38,106,135,54,74,28,19,34,38,14,41,17,7,25,17,8,31,31,15,35,78,55,78,79,107,124,75,69,27,16,45,32,33,31,37,39,36,54,42,27,30,38,38,50,33,13,11,8,21,17,30,13,19,31,23,41,18,32,23,24,44,58,22,33,83,56,75,63,61,35,45,78,53,35,15,1,2,20,10,53,48,31,21,33,19,56,28,37,32,21,30,33,21,56,52,26,21,26,40,13,24,22,26,25,31,33,42,35,44,97,73,49,61,72,62,65,77,51,22,52,8,33,55,36,63,41,31,42,35,39,20,45,83,74,54,60,65,79,71,41,52,102,140,135,105,109,110,100,76,74,89,99,71,71,187,235,229,252,242,238,245,238,223,224,255,244,242,235,224,196,239,223,249,231,249,236,245,255,235,225,253,239,244,239,250,225,243,211,211,243,249,237,241,212,223,248,219,222,238,237,247,229,230,223,246,232,232,226,209,241,241,237,240,227,239,215,122,15,9,4,4,50,3,14,10,2,45,14,1,8,0,13,10,8,24,13,2,8,2,15,0,156,185,198,187,207,190,176,183,190,201,175,202,178,168,198,196,204,194,164,177,155,198,217,184,215,188,190,200,197,182,207,187,180,192,197,168,204,183,197,181,178,177,198,188,190,160,186,214,187,192,202,221,180,178,202,180,209,193,215,188,180,191,194,195,189,177,178,176,188,204,168,175,159,224,187,221,200,179,206,190,194,202,210,204,219,188,164,179,185,205,191,169,218,199,212,180,195,185,195,207,200,220,218,187,209,187,181,190,189,215,201,214,208,203,189,189,211,203,201,196,191,222,176,200,212,194,200,180,184,180,217,184,199,187,195,190,208,199,179,178,199,198,167,184,185,196,168,204,185,192,204,194,206,193,191,185,184,207,206,189,190,216,215,158,197,194,211,193,192,205,206,196,197,210,195,198,188,192,229,192,204,199,202,223,213,193,206,185,220,199,182,178,179,216,201,196,179,222,211,200,205,189,202,210,194,203,225,199,176,191,179,197,166,176,200,184,210,172,218,202,198,200,203,212,174,186,203,203,201,192,218,241,199,191,220,224,224,205,179,222,213,210,209,192,216,202,229,229,204,206,205,208,214,203,221,208,197,191,202,203,227,217,198,207,206,194,188,178,198,202,190,198,208,216,227,192,231,204,206,223,210,196,203,199,231,234,210,223,208,208,208,208,197,215,222,213,211,224,198,223,216,214,222,206,220,210,238,226,217,218,204,209,207,194,199,193,181,137,212,254,127,76,33,11,18,11,50,37,47,40,5,36,36,31,50,19,41,12,7,11,49,41,24,32,39,32,28,7,15,46,6,43,18,36,48,18,41,19,48,96,46,42,62,52,40,51,62,51,35,59,133,244,222,210,232,212,111,135,148,172,235,150,156,245,214,214,229,216,226,233,229,204,194,195,231,240,185,77,157,92,75,56,49,98,72,66,209,239,224,64,40,198,207,243,223,213,240,196,160,116,13,38,0,7,37,48,44,55,45,71,77,47,65,211,251,246,179,142,187,237,247,239,236,170,98,152,244,236,234,219,241,198,134,97,42,82,97,38,69,96,67,39,42,97,115,112,120,159,92,19,24,10,7,33,53,50,51,16,35,96,115,76,33,40,16,38,13,39,0,8,1,7,12,5,32,27,0,34,79,76,119,93,83,92,114,89,85,35,56,62,65,25,40,13,44,31,37,16,7,7,60,14,21,36,45,31,44,31,45,38,12,14,40,14,13,22,4,13,23,27,44,44,31,29,61,67,64,67,59,62,26,56,7,14,27,13,24,33,40,33,30,23,33,39,42,32,23,29,32,31,4,25,46,26,36,31,25,32,44,15,54,33,43,60,44,35,39,57,69,59,45,25,30,51,51,68,54,62,64,34,44,17,35,32,26,22,31,8,32,36,48,86,76,90,53,53,68,74,13,71,93,114,131,129,126,81,90,111,97,79,50,60,56,159,228,238,247,233,233,239,246,244,245,234,239,247,231,226,239,221,221,246,241,234,249,233,220,221,249,223,222,241,233,241,237,233,231,212,241,238,234,230,226,214,248,237,246,224,247,222,224,222,232,241,249,249,247,221,222,237,235,237,236,249,221,105,0,2,13,3,25,40,2,0,8,20,22,11,5,12,5,10,5,15,8,2,13,26,5,17,187,188,149,187,164,181,181,192,195,178,168,192,183,176,173,187,180,191,185,204,166,182,201,181,185,178,188,203,197,181,206,158,218,194,172,221,171,176,214,159,179,175,201,217,203,179,188,194,175,157,196,194,217,186,181,204,181,188,172,200,210,203,204,205,196,213,195,185,199,211,198,198,192,205,219,174,186,205,169,219,224,211,205,201,177,201,187,207,191,194,194,206,198,164,193,207,199,192,194,195,171,204,176,189,154,202,191,185,202,190,189,195,186,211,174,200,200,194,178,204,172,189,184,206,197,208,182,221,177,210,198,193,207,183,190,188,192,180,189,172,191,187,191,199,189,198,212,179,178,188,191,201,189,174,192,176,215,196,193,189,188,178,209,180,189,228,188,176,186,183,211,235,174,184,207,196,200,196,207,182,219,199,221,198,201,215,184,186,199,206,204,166,219,195,205,217,232,184,201,202,206,187,201,196,197,195,178,203,216,208,209,198,183,193,174,203,184,199,198,211,232,190,204,193,206,203,209,185,203,197,203,205,211,228,206,224,223,195,228,179,205,227,199,216,205,202,199,213,210,205,200,213,224,211,226,205,209,213,227,212,213,202,186,225,232,209,231,200,209,200,211,195,211,207,217,232,223,194,203,215,220,223,195,211,224,213,235,227,235,194,212,228,224,221,190,215,213,239,209,206,233,197,216,184,223,221,217,219,210,203,229,226,214,207,201,204,220,174,177,232,132,114,11,31,95,22,49,28,49,46,65,58,43,51,31,10,29,46,34,40,41,39,23,47,22,17,36,25,37,25,25,27,43,27,25,20,32,14,46,81,49,60,33,36,53,15,23,38,133,150,221,244,232,209,235,125,76,116,163,233,220,135,210,244,205,205,224,229,218,203,230,199,164,185,202,249,157,76,158,86,73,13,21,87,55,44,208,250,155,40,95,241,128,134,202,169,235,135,115,26,5,19,9,52,47,40,16,30,47,62,56,55,204,249,247,216,124,193,217,246,216,218,182,106,116,213,220,242,241,239,187,149,218,126,52,69,100,70,76,107,73,90,32,72,95,101,150,130,77,1,53,28,30,43,41,32,49,36,7,58,60,30,16,30,32,15,40,26,5,12,17,34,19,55,1,23,17,52,71,57,90,83,50,107,117,117,105,83,68,37,49,29,50,42,21,47,25,27,19,23,26,41,32,49,24,29,39,19,15,29,24,21,25,15,18,21,25,44,35,49,23,52,57,5,15,31,65,77,70,60,21,51,11,18,20,4,42,73,63,73,40,16,24,13,36,25,22,15,16,39,28,21,41,25,14,37,42,53,44,8,30,118,140,114,116,110,63,70,62,57,91,52,50,9,60,86,86,88,92,56,89,74,48,43,35,59,37,41,20,36,42,67,46,70,64,81,57,66,56,64,89,111,111,96,112,90,66,64,79,95,36,41,36,120,244,221,246,233,223,209,247,248,218,235,240,229,241,249,240,241,247,229,221,229,244,224,223,228,229,229,234,237,252,242,249,245,230,243,239,252,230,213,224,233,226,245,242,240,233,223,225,245,242,238,233,249,217,234,218,244,223,230,231,231,240,108,3,18,0,18,15,15,45,22,8,29,36,7,15,4,13,1,18,1,2,7,31,9,24,26,168,186,207,195,191,200,155,189,194,185,199,165,192,168,195,174,189,190,187,172,197,184,189,191,194,166,206,173,180,180,179,193,201,190,172,194,179,196,202,167,204,175,196,211,186,195,185,184,197,213,182,189,207,186,197,215,195,192,182,184,205,179,213,218,199,181,176,186,216,223,186,195,196,193,208,192,181,185,196,220,169,179,186,191,214,206,196,202,182,189,206,202,189,196,186,201,188,181,207,194,208,196,208,205,205,180,213,199,187,173,215,181,198,205,193,190,173,201,207,209,226,191,193,180,204,190,205,189,189,188,181,190,183,200,210,185,213,197,178,189,201,170,189,195,188,178,189,189,185,178,196,196,174,194,197,195,195,189,179,204,217,196,180,199,204,187,184,192,171,202,198,219,193,183,212,177,211,203,182,191,213,183,187,194,209,218,188,207,206,167,198,208,204,210,196,175,215,209,227,178,213,211,188,200,191,200,178,216,167,208,184,196,179,165,193,198,206,190,201,207,214,196,218,195,219,190,211,202,220,195,222,219,209,221,208,210,208,208,226,223,204,203,211,230,205,201,204,197,229,221,195,215,206,189,206,221,227,229,212,179,213,233,198,213,194,203,191,175,209,203,199,198,220,230,219,222,229,211,213,218,221,221,219,198,196,218,208,233,224,225,213,228,220,224,218,214,212,219,230,204,215,185,185,197,228,242,198,218,199,223,232,213,211,213,166,174,236,174,220,198,107,91,59,171,220,183,212,251,250,255,222,229,202,179,171,159,228,137,63,33,12,48,33,39,17,20,26,35,29,24,26,37,12,32,59,76,92,48,18,42,38,50,35,42,49,73,162,175,239,217,242,239,155,135,188,113,121,224,184,246,231,135,224,242,206,206,219,214,213,171,233,228,181,181,204,246,154,61,159,73,58,48,16,81,50,66,220,238,115,40,86,118,58,160,215,207,206,91,50,13,11,22,38,35,33,24,36,74,54,49,26,142,255,239,253,176,202,222,234,242,201,185,142,154,187,247,249,220,189,175,124,189,249,132,36,32,70,80,69,161,132,143,31,63,77,87,143,130,56,25,26,15,35,61,56,24,26,14,17,32,38,18,17,1,17,32,43,18,16,41,20,13,36,34,7,35,33,93,94,96,122,91,81,83,132,134,43,57,72,51,81,24,35,29,31,33,40,25,38,32,30,41,45,21,57,39,11,50,3,12,18,16,39,41,34,10,25,18,48,29,28,47,28,53,40,40,46,59,69,53,53,26,28,35,5,18,40,24,32,48,31,42,2,33,38,31,19,12,33,42,40,31,2,27,8,28,26,23,19,29,71,154,177,179,177,165,147,115,56,56,89,44,7,11,61,52,62,38,42,71,65,86,123,117,88,66,17,13,42,27,64,92,83,49,77,60,47,74,47,48,88,111,109,103,98,107,59,26,45,86,74,79,14,61,219,220,240,247,227,222,239,249,249,209,243,239,232,250,246,232,230,211,228,233,228,232,230,228,227,240,228,230,204,217,228,239,224,235,213,237,189,238,195,238,251,236,240,223,237,236,240,205,236,228,237,248,246,214,224,245,240,236,242,224,229,112,7,11,11,6,14,2,13,11,32,11,29,20,3,0,35,13,3,2,14,0,33,14,7,0,179,182,167,174,191,191,201,186,169,181,184,193,223,169,201,195,192,203,179,177,199,173,177,187,207,177,195,182,197,165,199,191,194,218,212,191,197,195,200,195,169,169,190,201,172,194,211,167,207,192,209,197,197,186,192,204,204,196,172,225,218,212,185,184,174,202,205,214,199,217,185,180,205,185,179,181,192,211,181,228,218,205,192,166,182,208,222,187,200,196,194,212,191,179,192,200,204,171,178,184,216,196,216,188,194,185,184,189,191,165,199,191,212,194,204,198,191,197,195,195,198,199,192,201,182,195,183,191,203,186,183,182,199,203,184,203,198,218,216,178,216,200,188,184,188,192,212,195,198,201,204,210,184,217,223,213,195,198,174,219,195,183,197,214,185,196,181,186,209,212,191,197,201,221,219,192,183,184,218,211,203,195,206,214,201,206,198,212,195,181,214,199,219,182,212,209,195,196,219,180,206,205,191,191,199,205,233,195,203,187,211,205,207,217,195,206,193,192,209,223,206,219,209,220,203,219,210,189,224,222,202,205,224,190,222,208,192,202,218,199,225,230,193,204,184,212,220,205,218,240,218,227,211,212,204,228,212,222,220,215,198,206,214,185,225,209,208,218,199,207,211,207,188,214,233,193,209,209,228,220,218,221,212,195,220,202,210,208,195,234,204,220,194,209,236,201,212,234,209,214,212,197,212,188,236,216,212,224,224,228,198,217,221,188,198,204,215,189,195,182,117,114,41,161,242,250,246,241,242,242,251,244,243,214,249,247,244,178,51,42,128,133,80,24,26,17,20,55,34,34,21,40,25,98,197,253,250,87,131,170,146,159,177,171,176,241,253,248,237,228,231,181,89,135,201,150,189,177,208,255,186,149,221,204,236,201,230,222,219,207,229,238,144,208,208,252,142,66,101,72,47,37,46,111,42,52,201,201,104,60,34,3,57,214,245,211,192,37,21,28,9,31,51,54,25,24,70,43,64,46,110,238,240,244,177,206,251,248,253,206,176,97,144,197,225,230,245,225,151,143,184,210,240,153,6,58,87,88,37,78,120,104,48,72,72,115,137,125,62,23,42,33,34,22,33,36,36,0,26,8,26,23,20,20,12,31,37,22,34,38,41,31,27,46,46,25,39,96,113,119,116,89,112,128,85,65,64,89,66,52,55,64,36,26,38,30,45,31,49,32,28,52,50,35,17,29,29,20,35,24,3,16,30,11,39,4,3,23,18,50,39,20,37,47,46,64,35,70,54,45,57,32,36,34,29,59,39,10,13,37,10,32,28,8,20,15,23,5,36,19,67,12,30,12,31,12,11,37,28,20,105,151,203,156,145,172,132,156,102,69,89,49,33,25,33,13,37,33,38,40,53,60,86,78,123,72,31,18,65,21,61,70,59,70,85,85,60,74,41,37,88,98,125,99,126,92,49,81,112,129,124,114,63,26,204,229,242,226,247,246,244,238,230,232,222,234,215,225,240,222,244,234,224,241,226,236,245,213,232,228,244,248,246,219,237,231,224,237,231,242,238,242,226,227,225,226,232,238,234,231,243,243,244,249,232,228,238,248,214,233,228,244,247,242,220,114,3,0,5,27,31,8,26,9,10,15,12,31,37,0,16,6,13,13,10,18,9,10,15,40,182,204,187,208,188,193,199,188,184,155,186,188,184,188,201,185,224,161,222,175,183,162,193,187,201,187,197,169,204,174,184,183,172,197,197,183,168,186,207,172,180,207,192,205,194,206,193,182,220,192,211,194,186,206,180,182,211,198,210,202,199,207,209,201,186,205,182,208,196,200,184,187,211,190,197,206,210,205,173,200,214,187,192,201,201,173,182,196,183,180,175,186,187,191,186,209,177,191,196,168,189,189,202,188,204,182,183,192,207,189,192,175,209,192,207,208,199,212,167,172,172,205,192,196,190,184,194,183,179,176,198,196,180,203,189,207,221,171,202,177,209,204,204,205,190,203,203,185,197,191,188,196,190,197,180,199,189,185,176,217,219,194,213,226,199,206,192,217,197,189,197,187,201,185,212,212,203,189,193,203,190,221,192,211,200,206,207,224,210,208,210,221,213,236,199,190,186,206,207,190,215,196,211,207,215,177,234,224,209,198,204,213,191,209,202,200,194,205,211,205,235,201,191,181,204,195,215,235,211,210,209,222,228,203,210,209,215,195,215,206,228,223,216,199,188,221,224,213,237,203,209,204,216,204,187,205,194,236,214,224,215,199,223,202,229,210,230,198,234,214,208,208,180,224,218,206,227,212,206,207,180,216,233,196,188,227,209,215,228,211,216,207,210,215,222,213,218,207,223,203,197,216,220,217,208,232,212,196,196,216,213,193,218,182,191,182,255,221,207,201,107,105,34,180,248,230,244,243,248,240,228,226,220,216,208,234,235,98,56,50,159,155,85,40,26,41,31,41,23,60,26,39,53,29,139,241,255,148,85,185,196,251,237,239,221,255,248,237,200,147,225,208,150,193,185,118,126,134,187,253,146,168,235,242,196,194,210,219,213,202,231,227,154,200,211,255,149,54,117,70,68,44,27,91,76,44,209,204,74,65,8,5,130,244,246,232,72,14,12,8,9,48,44,45,28,39,40,76,34,80,225,249,249,202,224,231,217,250,254,228,126,102,208,245,241,223,242,169,96,178,232,253,253,159,41,47,78,87,48,23,36,57,32,110,108,93,135,79,19,49,32,36,47,68,10,43,54,57,18,29,18,17,49,32,38,37,36,42,45,44,20,29,27,21,29,33,29,68,100,51,86,103,97,82,71,35,65,86,56,48,66,36,34,34,50,41,26,41,42,42,32,42,64,34,37,13,40,31,32,27,7,23,41,34,39,35,16,30,31,27,32,60,25,55,58,69,52,50,54,68,41,65,27,31,39,65,5,17,2,20,8,21,21,45,3,24,28,30,30,35,26,32,32,23,35,18,36,51,74,77,128,153,142,129,125,153,166,145,101,74,89,51,30,44,19,16,20,15,12,33,19,27,64,84,93,61,11,45,34,17,60,44,50,71,89,93,36,60,38,48,71,93,120,101,106,95,93,137,99,71,40,88,96,55,176,231,227,226,239,244,242,234,208,236,218,218,239,233,240,234,234,242,231,230,243,242,221,242,233,233,230,242,221,224,196,221,235,235,243,226,239,241,230,233,230,248,226,246,247,214,232,241,215,207,244,250,231,229,221,250,244,222,249,227,223,123,17,2,5,7,23,19,36,6,16,24,11,16,5,0,4,1,22,28,20,13,8,3,17,6,170,198,184,212,189,164,191,199,200,179,169,188,197,197,189,183,206,216,197,213,193,206,192,186,185,168,195,200,202,203,196,186,199,193,207,162,185,183,205,200,200,183,177,192,187,182,167,185,192,176,192,210,190,191,179,192,185,187,191,215,202,196,171,182,200,181,207,233,191,212,193,189,182,188,213,191,205,180,201,199,180,204,191,195,191,209,183,203,200,195,203,188,190,184,182,209,223,198,184,182,221,206,167,204,194,185,207,194,199,193,199,197,220,169,205,181,217,199,210,206,204,201,183,192,205,166,183,206,191,189,195,211,183,200,195,166,216,181,202,194,210,180,186,206,192,194,186,188,201,211,193,203,194,190,192,218,210,206,179,201,210,199,193,186,198,206,209,187,202,205,216,235,190,204,206,202,206,224,225,230,201,205,215,203,215,209,234,193,191,202,215,220,196,179,218,210,209,216,224,197,206,221,217,221,172,204,216,200,197,209,196,207,215,192,220,184,231,224,196,195,202,220,210,209,225,207,219,208,195,222,227,215,218,217,220,234,215,230,214,209,229,229,226,205,224,223,205,223,196,207,211,213,223,213,224,223,216,211,213,199,199,202,210,222,217,196,212,214,214,186,225,230,182,235,235,221,200,225,231,179,233,218,196,208,223,203,208,209,221,193,217,220,190,215,212,198,209,214,207,206,205,184,224,200,216,209,223,199,222,234,227,211,222,196,197,198,241,232,205,124,65,85,35,121,176,203,220,233,217,211,215,183,151,173,131,184,188,86,48,101,179,153,107,57,14,10,47,36,43,54,10,30,57,31,99,214,211,117,27,60,178,248,232,204,166,212,165,184,170,138,227,218,201,225,187,111,132,166,237,247,161,189,245,206,228,209,224,249,232,189,218,215,191,201,188,246,165,17,86,88,88,67,73,73,85,61,198,188,81,35,9,36,207,231,255,143,45,13,7,7,43,52,34,4,48,60,87,6,79,207,242,234,203,215,237,254,244,241,240,143,153,238,243,240,211,230,225,113,169,226,239,249,240,213,93,39,18,65,89,72,52,67,130,87,107,126,117,85,9,18,33,28,68,41,31,35,24,21,21,18,19,39,23,45,33,67,29,16,38,47,60,19,51,10,18,30,43,73,57,81,92,114,77,69,56,46,70,36,36,59,39,75,23,3,38,23,24,54,40,24,44,33,9,47,33,21,26,29,34,35,28,31,47,34,28,32,34,42,32,12,40,60,37,53,59,33,68,62,70,45,59,48,23,25,30,30,25,20,18,30,33,40,17,14,47,59,27,26,13,34,13,23,17,35,42,39,62,121,142,156,164,153,139,147,135,112,151,147,78,61,76,30,28,19,27,41,35,16,6,25,31,35,53,58,80,100,43,28,36,25,44,81,87,77,99,95,33,66,59,29,70,94,109,104,106,124,121,126,77,74,46,100,127,15,133,233,229,243,242,251,232,227,225,217,234,224,239,246,221,241,230,240,233,235,236,240,226,237,240,247,242,218,213,225,240,253,237,238,237,236,216,227,209,243,231,255,223,232,241,237,241,227,250,242,236,232,252,245,255,249,239,234,222,225,249,117,6,0,13,12,8,23,12,19,2,9,9,20,17,0,3,18,26,19,37,19,14,17,26,29,178,196,198,188,192,180,209,165,185,176,181,196,196,186,198,192,176,181,202,218,189,206,195,216,187,163,176,177,189,204,181,190,173,183,186,196,197,201,188,175,218,167,175,183,221,198,197,209,204,179,196,215,184,214,201,197,207,177,209,198,185,203,205,203,208,206,180,169,206,185,213,207,189,195,224,183,191,194,175,198,198,176,210,194,218,169,198,202,189,195,189,201,202,191,165,200,201,189,198,200,175,195,186,193,182,193,180,218,187,200,200,190,199,189,211,193,187,201,201,183,192,189,175,200,192,194,194,190,219,199,179,189,211,192,214,167,198,205,205,200,197,179,178,208,203,187,184,213,193,191,221,204,216,185,198,220,176,189,192,192,209,208,197,191,240,239,209,193,180,190,205,198,216,215,191,184,195,208,206,196,206,212,172,191,201,199,232,214,204,206,223,215,207,216,184,194,221,224,213,196,217,194,236,212,210,194,190,194,203,201,185,211,223,230,221,229,211,217,202,229,200,195,182,217,220,214,190,194,183,206,216,209,241,181,239,217,231,213,204,204,209,224,192,164,199,219,204,221,223,215,196,222,223,201,216,200,205,218,209,221,212,231,195,221,208,220,201,206,198,198,195,224,229,194,209,187,219,234,219,234,220,192,238,213,213,213,221,197,229,215,218,233,216,237,223,233,217,204,221,221,216,207,215,213,218,226,193,201,218,229,219,214,207,181,194,214,209,147,113,50,20,39,8,44,46,130,203,172,187,184,173,151,115,176,188,204,205,127,113,120,162,174,119,61,13,24,56,50,49,43,33,38,81,146,203,250,230,163,52,7,84,209,236,146,116,195,158,221,204,188,195,183,182,148,171,155,190,228,249,215,128,228,227,246,212,211,226,225,217,197,205,221,145,186,202,255,154,77,65,63,58,50,72,77,81,71,141,91,23,53,5,66,243,250,211,79,8,2,1,39,67,52,6,34,22,61,39,68,190,234,252,183,197,240,239,249,253,251,194,146,220,226,230,215,193,201,155,180,240,227,249,253,240,208,97,27,1,31,77,116,58,111,60,60,134,106,119,74,21,46,27,25,27,31,23,42,45,41,16,26,26,16,20,32,67,32,40,38,35,60,35,36,43,24,41,20,23,82,48,96,127,109,74,92,119,75,59,56,33,33,41,21,15,41,25,12,27,62,37,20,27,31,30,34,50,21,27,50,18,38,44,22,34,23,58,23,25,38,44,49,54,44,63,49,66,32,69,32,42,44,46,27,19,19,27,14,19,2,11,27,31,18,37,33,5,40,28,19,37,18,27,18,18,32,31,99,141,154,165,162,147,149,113,118,110,122,157,137,55,37,51,43,15,41,32,15,14,41,16,40,3,39,27,53,77,91,59,21,44,22,30,89,74,72,95,67,81,52,38,13,97,108,111,76,85,84,108,107,64,96,143,169,142,31,152,242,229,237,242,245,215,211,239,231,250,223,241,222,239,239,242,236,230,248,235,216,236,231,239,239,238,243,234,245,228,237,235,248,231,209,235,251,223,237,219,239,240,251,233,237,249,237,244,245,240,251,208,253,245,220,244,215,237,234,233,130,22,9,7,7,18,7,21,9,14,17,12,11,7,5,9,3,13,16,3,11,25,13,17,6,190,169,171,184,203,185,196,203,173,188,189,179,189,196,181,169,189,182,166,181,190,191,184,184,168,190,204,174,183,173,174,179,195,205,186,214,178,182,193,186,210,192,187,204,180,197,175,181,193,202,188,207,180,186,214,192,195,209,200,207,187,172,198,176,207,196,183,188,210,188,193,193,196,165,173,189,194,201,195,198,191,201,201,198,212,193,183,187,190,213,198,200,175,187,199,206,198,181,199,164,191,188,193,193,194,181,178,183,191,181,191,188,205,198,189,182,178,191,181,203,201,204,222,200,183,205,207,198,190,184,211,196,196,189,178,212,180,186,191,189,168,189,198,182,179,193,195,179,199,193,193,212,222,205,210,171,200,175,185,204,203,216,208,178,202,205,203,191,195,219,197,209,194,207,213,209,191,213,204,219,210,219,195,233,221,227,219,206,200,181,186,213,217,210,213,211,205,225,184,218,210,217,208,200,203,208,203,194,206,206,199,189,215,228,201,212,196,207,220,212,220,206,217,216,218,185,216,225,221,226,211,211,218,213,220,219,235,221,197,211,216,212,205,201,211,209,206,233,202,213,209,229,215,224,246,224,214,241,221,214,213,211,212,219,211,211,200,210,209,226,228,221,199,238,205,210,219,234,227,203,197,202,210,215,222,218,208,216,216,209,230,206,211,216,242,222,236,209,221,214,229,230,213,204,232,198,231,216,226,206,220,224,205,201,182,204,188,70,18,8,19,33,45,50,27,82,105,160,192,176,213,166,149,207,242,248,199,88,152,107,194,194,154,85,15,32,38,59,35,37,41,30,100,182,230,244,239,207,149,30,57,128,146,111,139,181,191,222,186,193,180,134,156,165,180,168,208,196,253,195,149,240,237,235,199,209,235,212,220,203,233,229,169,207,202,255,146,26,106,89,27,73,55,81,106,42,78,39,88,48,26,129,254,217,90,20,32,7,12,31,19,0,24,49,89,30,55,144,249,239,193,163,228,249,247,250,252,193,155,213,251,231,207,239,159,94,157,241,253,224,250,247,224,136,160,112,51,58,48,71,54,66,62,51,153,143,111,83,19,27,5,46,33,55,26,24,47,20,13,10,51,15,15,47,23,34,13,31,52,47,23,29,36,39,35,17,66,99,58,71,88,74,49,41,62,35,53,58,36,78,53,8,31,10,25,39,26,11,37,28,42,52,52,42,30,18,21,27,28,29,26,36,53,27,31,29,27,67,85,65,105,78,44,80,35,52,27,34,46,51,20,17,25,25,39,17,32,18,35,48,12,17,37,37,11,9,24,30,29,13,20,16,49,10,80,139,125,134,108,126,122,123,91,106,89,124,144,103,83,39,31,21,34,38,34,40,39,51,30,45,12,27,38,31,66,55,37,9,13,16,39,71,72,77,76,42,62,57,35,34,68,98,99,102,77,91,122,98,29,78,115,90,90,11,141,219,243,250,250,246,237,249,231,239,247,240,247,241,248,229,239,238,248,242,233,247,216,246,243,253,249,240,225,236,251,248,235,213,215,239,240,236,240,234,239,215,235,237,253,249,245,245,248,233,238,234,242,254,228,240,240,247,246,242,241,113,19,7,10,31,22,8,7,24,16,14,2,15,16,8,3,5,10,36,17,12,6,29,5,20,205,163,162,173,148,189,184,192,206,182,201,207,212,194,189,185,210,187,184,194,191,197,214,184,212,188,181,189,209,171,190,201,184,187,205,206,208,215,190,166,200,202,195,186,175,194,197,198,196,193,207,194,188,187,191,191,175,181,213,200,200,191,160,205,194,211,178,211,195,173,209,186,203,195,192,209,224,210,180,207,186,194,198,190,195,174,186,206,208,205,200,185,171,182,199,201,168,185,195,204,206,191,192,196,188,182,188,179,197,208,201,181,184,196,174,217,208,191,189,162,203,174,196,207,201,173,182,190,179,188,186,185,204,200,186,213,215,185,172,206,209,181,194,157,189,226,199,203,211,204,209,196,205,199,189,212,187,185,218,186,201,191,211,212,192,197,213,181,179,188,232,233,192,201,220,217,191,227,212,199,225,207,218,207,243,212,219,203,203,198,208,209,237,198,199,225,217,208,202,234,195,223,218,189,222,216,205,228,202,239,203,228,235,209,210,247,197,219,220,216,200,221,203,218,190,207,211,221,205,202,200,210,238,214,195,188,217,220,222,216,215,223,198,223,197,192,226,197,212,204,244,198,227,220,214,196,215,189,241,196,238,247,215,182,194,214,202,213,242,231,190,213,213,220,213,215,205,210,218,229,218,223,230,230,213,200,226,213,206,228,214,206,218,207,188,212,242,193,211,219,227,206,194,212,198,192,209,222,191,232,207,217,211,162,213,211,150,100,19,24,27,43,77,82,40,59,164,212,209,231,225,191,142,206,218,229,125,108,122,100,181,176,132,69,5,31,29,45,50,40,52,30,63,140,200,234,164,205,164,19,56,145,241,168,192,181,182,160,157,116,139,195,137,190,163,184,172,193,253,146,169,236,230,224,218,200,222,206,217,176,234,183,156,149,213,252,119,33,116,85,53,75,60,62,92,53,66,130,218,79,13,187,231,140,35,24,12,15,52,22,0,10,43,48,56,50,147,214,243,180,130,205,241,254,229,228,185,139,191,253,238,215,200,222,72,83,210,228,252,246,254,220,103,68,133,158,85,32,43,45,25,71,26,83,161,134,124,41,10,27,24,22,43,30,17,50,21,32,32,44,15,16,49,33,49,28,20,15,10,25,55,36,11,40,16,61,46,85,67,65,57,60,37,18,46,38,24,12,75,60,14,26,20,23,4,15,32,19,23,9,28,15,18,50,27,25,37,18,30,32,35,42,26,28,8,39,14,159,159,156,91,134,87,42,32,5,27,49,42,33,42,25,29,12,13,38,25,38,70,28,4,30,27,25,31,50,59,27,8,33,30,17,28,3,108,135,128,107,112,112,91,90,102,115,107,108,118,96,26,31,35,23,25,29,29,41,18,51,54,27,45,14,37,29,45,32,34,24,59,40,39,70,72,44,68,47,80,18,63,10,34,115,117,91,104,65,82,102,35,60,56,31,44,11,139,213,236,252,221,254,230,244,238,235,222,238,247,254,255,240,239,240,248,238,235,237,244,242,229,255,224,235,247,253,231,251,238,246,242,234,243,243,250,240,243,245,246,252,253,245,241,238,248,222,253,239,241,241,242,237,206,247,250,240,227,100,7,18,0,1,2,5,33,6,20,17,9,14,2,0,20,22,20,7,9,7,30,10,19,5,190,191,206,208,186,177,165,201,181,202,188,164,206,198,189,172,189,179,199,189,191,196,185,186,204,185,189,185,196,188,188,208,167,203,191,194,196,186,184,197,176,171,194,198,199,209,194,192,193,211,221,170,193,201,191,190,224,210,194,187,186,192,202,192,189,187,205,191,185,191,185,174,195,202,196,191,174,193,174,211,212,198,190,200,202,190,189,212,205,168,202,210,194,165,172,197,181,198,194,195,188,192,184,200,191,182,202,177,197,184,211,183,188,203,179,200,204,226,191,198,177,193,185,192,174,187,177,207,207,181,192,179,198,178,200,187,203,204,169,175,210,200,176,185,202,205,220,200,204,210,211,190,153,200,180,188,189,198,229,207,208,222,174,199,171,205,202,221,203,183,219,195,183,208,206,207,200,228,200,187,200,211,210,208,216,194,218,201,220,200,224,194,206,205,209,211,203,212,212,190,234,211,218,216,202,216,237,195,199,216,210,205,229,206,219,215,226,222,225,213,232,236,246,225,220,226,214,200,220,227,224,220,207,221,203,197,213,231,206,223,195,187,197,211,216,209,202,230,232,232,214,211,192,196,204,222,202,214,207,196,231,200,216,210,201,198,238,210,207,202,219,209,230,230,194,222,207,170,183,211,219,202,212,239,188,185,232,227,213,204,226,231,214,220,226,241,231,229,209,214,202,202,183,202,205,206,216,212,227,216,208,223,206,192,220,222,209,120,96,95,95,134,99,79,59,101,222,189,193,211,242,177,154,226,224,182,110,109,174,108,172,163,42,52,39,31,43,31,65,39,60,13,66,131,199,225,150,211,193,78,51,211,237,222,230,197,162,136,147,124,164,186,131,124,131,138,214,224,250,147,202,238,225,239,217,201,227,212,229,193,231,229,181,170,203,242,149,69,80,98,75,59,38,68,97,94,132,178,221,152,84,209,119,47,11,2,26,24,39,31,45,39,67,67,21,120,233,223,220,135,196,249,253,246,206,177,157,211,217,253,242,192,172,111,87,198,232,229,246,253,233,66,8,64,106,95,93,23,40,51,46,57,38,86,172,139,111,14,32,34,33,38,36,22,26,27,39,30,10,26,22,19,28,43,42,43,40,39,47,37,53,39,21,68,23,54,43,57,71,40,54,74,39,46,28,33,31,37,26,53,11,37,12,26,29,41,43,44,32,49,41,40,38,38,16,27,33,18,38,7,37,48,61,35,51,42,66,140,130,103,86,91,76,44,40,10,37,15,28,22,18,40,30,45,55,50,61,62,16,30,30,18,27,3,17,18,26,42,24,38,29,28,7,34,74,149,126,113,100,98,115,118,124,147,114,78,120,84,52,29,19,42,30,58,40,14,25,45,48,28,19,29,7,15,52,18,19,21,15,14,40,52,78,50,75,70,86,47,58,36,52,150,108,120,106,105,74,85,114,84,43,40,22,108,248,253,242,251,248,251,233,223,223,252,238,242,233,252,243,242,245,242,243,227,253,234,244,237,254,243,248,241,224,237,254,244,250,249,255,251,227,247,214,240,255,240,226,242,236,255,241,232,247,251,243,255,252,236,232,248,248,248,243,236,249,123,11,1,2,18,13,15,0,10,3,8,8,1,11,0,5,8,16,18,30,18,25,4,28,28,150,188,209,172,159,180,187,186,204,171,209,176,180,212,195,228,198,187,177,185,174,164,197,183,185,200,192,199,178,183,202,198,196,185,216,193,191,195,218,170,179,175,209,196,203,167,169,179,199,186,198,215,185,197,192,211,205,200,218,210,212,221,186,165,190,179,165,190,201,199,175,198,198,201,191,218,183,176,193,218,193,187,188,205,193,187,197,190,210,202,204,176,198,199,182,208,176,206,202,177,206,164,208,185,197,204,191,196,192,221,215,186,191,177,174,192,193,193,183,187,204,191,176,193,197,164,203,182,195,192,174,191,207,202,204,211,208,185,214,183,198,203,216,205,206,212,225,202,210,208,183,228,182,193,178,195,187,219,199,222,220,211,203,192,198,194,182,200,211,176,185,204,201,205,211,198,205,237,210,206,212,227,196,226,205,213,212,223,181,216,214,206,192,207,203,202,234,203,216,184,216,209,227,236,197,211,228,240,212,209,201,218,227,224,224,208,220,204,225,219,221,231,221,199,217,206,218,214,234,234,206,219,211,222,179,210,213,210,221,212,223,196,199,213,217,210,217,219,203,224,199,227,222,211,234,201,220,201,199,201,211,211,210,195,199,205,229,222,209,218,223,198,221,232,215,199,227,207,198,210,221,237,240,213,221,236,206,221,218,225,232,222,217,205,238,208,230,190,235,230,237,210,192,187,172,219,229,218,235,232,206,241,198,239,178,223,236,178,193,179,141,132,111,120,118,115,213,211,227,223,243,184,162,226,201,130,103,110,158,125,187,211,40,16,8,19,62,36,79,27,36,41,119,162,197,217,176,221,190,103,90,196,219,167,152,153,167,211,217,199,207,168,64,117,109,149,156,247,183,149,216,249,222,247,200,225,220,217,229,191,228,220,191,190,209,254,157,59,135,88,53,45,36,45,52,104,131,175,253,191,182,139,44,32,5,7,18,28,43,20,58,45,59,24,103,203,241,214,149,194,189,236,252,226,197,163,210,252,251,220,232,166,89,139,161,237,243,239,242,221,95,26,57,89,108,120,141,91,30,45,35,30,73,142,124,127,103,19,44,38,17,6,34,58,38,39,10,29,39,26,45,37,31,36,39,27,19,24,59,50,15,30,30,14,36,44,38,40,28,11,50,48,40,43,54,32,9,4,51,41,11,24,6,22,25,46,53,40,15,33,13,28,46,27,65,19,18,43,30,38,29,27,27,45,17,38,68,111,127,74,98,65,78,55,51,24,35,3,5,6,33,11,31,36,45,36,24,52,51,28,45,15,13,6,30,18,39,47,30,35,43,13,22,26,76,138,113,117,96,93,100,97,87,72,102,106,99,107,43,24,19,42,41,34,62,15,55,39,19,52,45,36,51,20,14,36,53,21,43,6,34,91,89,84,69,50,71,38,56,55,56,129,102,90,90,98,90,49,90,92,113,104,152,235,236,248,232,241,255,241,235,234,243,225,224,254,254,241,241,245,241,247,234,223,234,225,255,248,247,244,236,235,241,255,242,252,251,238,255,247,253,238,247,251,249,253,249,240,245,245,217,230,232,247,249,234,242,250,247,241,247,239,243,247,238,105,10,0,2,11,26,15,26,18,20,12,13,12,12,27,13,12,17,7,5,14,13,14,1,13,188,189,191,175,184,184,192,172,182,188,199,225,210,178,214,186,185,173,195,212,192,187,178,155,205,190,193,212,182,194,215,211,178,179,195,164,181,197,191,190,209,184,183,209,183,176,197,170,190,188,199,191,196,213,187,194,174,209,198,197,204,196,203,185,209,193,207,190,219,214,185,187,206,179,185,205,185,211,204,194,205,186,199,194,198,209,199,198,193,203,217,188,193,186,189,183,188,204,216,175,184,200,191,172,206,182,183,215,215,186,199,182,185,200,198,224,195,192,203,202,192,185,216,186,202,190,210,218,203,190,219,195,219,186,191,220,186,218,185,205,188,199,217,213,223,197,224,165,198,205,209,209,198,201,203,232,196,203,202,195,201,190,163,217,188,206,225,194,212,196,208,213,229,216,194,225,213,200,204,217,204,232,214,201,194,218,232,197,198,218,203,207,218,191,198,214,221,224,212,222,206,230,213,217,209,190,209,198,232,221,208,217,186,206,199,207,241,207,192,223,206,223,203,220,205,217,199,190,202,219,222,239,239,205,217,202,228,200,216,202,220,205,220,223,213,228,212,212,238,213,210,197,221,230,225,216,195,217,208,202,216,206,221,210,210,214,205,213,217,202,230,230,228,217,213,223,224,214,235,204,208,241,249,252,243,223,229,213,213,207,231,215,231,208,225,213,221,210,208,227,243,206,209,216,195,223,216,236,238,232,225,210,171,141,168,157,145,190,225,185,165,116,105,187,186,127,204,188,238,229,244,153,159,219,179,103,96,117,173,117,198,209,15,13,16,97,82,100,49,15,30,89,216,200,220,204,210,235,210,148,64,89,95,117,192,203,194,245,225,208,219,127,108,146,147,163,176,238,145,134,237,213,246,208,220,205,234,215,212,199,222,205,159,188,196,252,141,79,105,84,83,90,57,67,55,29,91,186,247,254,195,91,28,32,34,8,37,32,7,45,39,61,47,63,177,251,232,152,166,229,252,229,250,198,150,212,244,250,233,244,224,105,76,209,190,238,243,242,218,65,25,38,87,103,123,162,211,144,74,87,89,79,130,129,135,140,39,32,17,54,53,61,64,23,26,31,79,11,15,22,30,36,30,46,37,51,51,29,46,52,24,31,18,36,31,31,29,28,35,28,20,41,24,58,41,61,38,25,7,25,22,20,32,19,43,28,42,23,39,40,75,39,32,17,27,26,25,12,15,40,38,24,45,37,24,28,28,94,143,100,58,99,52,28,22,31,21,32,20,15,39,14,28,38,54,46,26,8,36,41,19,3,35,19,11,30,34,53,10,57,25,22,28,29,66,96,116,119,69,74,62,78,38,18,60,104,108,95,61,47,14,29,29,7,34,16,11,27,38,39,42,16,53,31,35,41,59,15,26,25,22,95,86,64,52,53,74,41,57,68,52,118,83,116,108,133,80,87,77,98,95,142,196,219,226,248,255,224,250,234,239,245,249,236,248,244,250,246,248,252,232,252,249,242,248,223,243,255,253,254,250,254,238,243,238,236,240,255,251,253,237,234,251,241,255,238,239,255,250,255,240,249,255,242,237,255,255,239,243,248,244,250,233,246,250,125,0,10,16,21,37,17,7,0,1,17,6,20,12,12,0,9,0,17,5,12,21,16,30,11,172,178,191,176,196,190,197,200,221,176,206,226,195,210,194,190,180,174,207,191,199,199,199,188,181,178,199,196,201,192,195,189,220,164,196,195,182,212,186,177,166,193,187,204,189,175,165,212,192,179,180,205,176,225,221,208,206,220,196,232,203,177,194,200,181,184,205,197,204,174,187,208,188,200,183,188,187,197,184,198,211,200,213,191,196,191,183,182,174,180,200,173,195,185,212,194,187,182,196,181,192,188,214,217,163,195,170,210,192,195,215,208,186,193,201,166,178,196,195,207,194,195,194,190,214,205,217,197,213,201,209,188,212,182,173,176,214,200,224,214,182,188,190,210,205,210,211,193,210,211,200,212,214,215,201,200,205,196,205,221,208,205,218,213,201,211,195,217,198,205,215,194,205,199,200,204,211,201,225,203,213,220,204,205,200,235,198,198,216,230,210,200,222,211,222,193,228,224,224,230,217,187,210,219,201,222,205,219,196,195,207,218,213,211,190,215,197,223,209,215,203,212,207,211,243,219,236,228,218,204,219,224,213,233,219,227,205,203,235,183,204,216,205,206,209,198,205,231,217,205,204,191,203,204,216,208,221,219,201,207,228,223,204,187,191,202,237,209,210,193,230,205,214,212,233,209,213,225,244,226,198,198,167,223,214,246,204,228,235,226,213,201,223,198,237,241,234,208,213,213,239,222,201,214,200,187,212,195,185,187,187,150,177,159,151,110,112,178,225,185,175,111,74,237,227,82,149,191,180,201,191,162,141,186,172,136,129,137,194,150,193,222,50,27,64,142,140,125,83,40,28,132,229,214,185,226,220,178,173,151,48,22,58,154,225,198,224,229,164,153,191,150,92,162,194,224,230,241,132,130,200,151,204,219,181,200,239,228,223,201,239,237,200,171,200,242,106,78,137,116,78,72,40,38,28,86,133,234,247,231,156,38,24,36,47,38,40,54,16,51,65,48,56,178,251,248,150,163,219,241,248,246,225,142,215,249,242,242,244,240,183,135,184,253,208,250,240,249,112,6,42,82,99,97,120,155,188,129,33,22,83,81,92,138,121,109,48,5,23,42,55,47,42,25,20,27,21,33,25,24,38,24,28,26,34,40,38,41,30,19,39,14,60,39,41,15,22,46,47,25,25,1,54,50,43,71,36,10,28,53,29,25,39,17,42,18,34,20,22,24,15,51,12,17,37,34,23,24,26,18,31,49,28,41,33,51,35,72,107,98,75,32,70,29,6,37,35,32,29,13,8,41,58,47,54,46,22,24,24,11,13,36,16,33,34,54,22,7,27,42,27,31,18,30,34,52,118,119,89,56,75,74,68,23,15,43,98,121,110,63,29,32,30,41,51,34,38,9,61,28,51,36,63,30,27,35,51,55,8,40,40,95,73,68,63,70,83,54,49,53,28,91,110,114,123,125,84,79,62,95,70,25,11,80,216,242,232,253,255,250,255,245,248,244,243,247,239,239,239,235,242,238,251,255,231,239,236,243,236,247,242,254,236,251,249,254,249,249,235,253,245,248,253,248,248,238,245,224,250,250,228,226,246,250,244,251,231,254,239,228,245,249,243,224,220,114,25,31,10,34,7,23,28,6,14,3,5,20,2,3,26,21,31,27,13,12,22,15,5,19,202,180,196,185,168,203,212,188,228,179,174,184,197,176,204,183,213,185,195,178,199,215,176,199,196,195,198,184,211,208,209,200,196,225,202,170,172,188,205,198,183,187,190,188,196,189,175,218,220,181,208,182,204,202,185,172,204,171,222,179,193,188,205,194,203,227,204,200,180,195,211,209,199,197,197,190,197,200,202,201,202,204,180,189,209,187,207,213,191,219,204,211,207,194,202,207,180,203,214,198,209,182,197,181,198,168,183,201,195,221,204,193,184,188,203,187,176,198,212,183,212,191,194,174,205,203,187,192,205,205,206,197,196,199,193,195,204,211,212,195,203,207,225,224,220,205,186,226,213,205,200,193,194,220,208,202,196,195,209,190,185,221,215,196,204,228,209,205,212,204,227,194,233,210,231,200,200,225,230,203,216,218,209,203,218,207,207,202,235,219,239,217,202,216,204,229,205,203,205,235,199,200,208,211,217,223,215,218,209,201,201,226,180,224,219,203,219,201,215,190,217,209,227,217,228,210,206,219,208,211,209,235,227,206,198,205,208,211,205,200,208,206,221,201,215,210,217,235,232,199,197,206,205,214,219,206,212,199,226,202,222,216,223,211,214,221,214,220,209,215,230,219,227,202,207,216,199,217,213,241,191,89,85,128,202,218,223,224,243,215,220,242,236,212,226,194,227,220,219,227,247,221,197,192,201,208,171,171,178,180,176,182,170,200,218,194,205,197,212,127,173,102,52,208,198,110,176,146,146,200,221,148,166,162,129,138,152,166,220,142,191,237,123,56,95,170,127,137,140,52,14,173,244,186,136,212,216,152,155,158,91,69,113,175,231,192,209,193,142,136,208,144,116,122,142,175,244,234,138,213,202,213,202,188,198,196,220,199,209,190,239,230,184,163,223,213,79,44,113,111,76,44,50,36,64,126,205,245,241,162,65,1,35,42,40,28,23,24,22,43,74,48,127,231,243,165,175,213,236,233,250,228,159,159,243,221,247,245,250,176,165,196,238,242,221,233,245,101,38,33,76,102,120,111,93,69,128,103,15,47,36,66,103,119,98,115,27,18,48,45,38,53,50,24,48,29,29,42,10,9,20,40,52,38,28,30,28,53,7,28,56,33,38,54,25,16,40,24,19,26,51,19,42,62,46,65,41,20,14,28,20,30,34,33,32,48,39,43,36,41,28,26,33,41,38,38,47,42,3,51,46,31,33,27,26,84,51,56,126,137,95,62,58,18,43,18,34,44,28,28,28,29,59,42,78,53,17,11,23,17,35,16,28,20,43,16,8,25,33,35,36,44,36,14,38,23,70,113,102,82,107,110,45,79,49,61,94,139,144,108,59,24,36,24,34,40,36,48,41,20,45,58,34,53,53,42,34,43,50,32,35,91,53,42,68,78,72,49,62,70,35,92,114,126,101,115,87,88,45,53,49,18,50,200,255,246,253,215,250,244,224,219,254,233,252,245,249,249,240,248,235,251,243,234,253,242,255,252,253,230,254,251,228,229,246,255,223,243,252,255,215,239,254,216,226,225,242,243,242,228,243,241,238,237,246,252,239,255,236,239,249,244,250,248,201,97,14,0,0,4,9,2,18,18,24,28,11,7,4,0,1,9,21,26,15,5,10,14,37,13,176,200,206,181,182,199,183,192,215,197,203,175,197,225,191,181,174,182,213,206,191,185,191,191,193,203,197,220,194,219,193,180,210,180,219,215,195,209,184,194,177,214,195,194,220,179,202,210,197,193,188,200,207,206,188,205,181,206,217,181,192,172,179,172,204,208,199,174,192,174,197,203,228,208,192,184,201,184,201,196,205,202,197,195,177,171,181,187,187,170,196,223,183,194,195,209,184,197,201,217,203,189,189,180,190,204,196,201,182,214,203,195,192,197,194,198,184,184,209,200,178,221,182,217,189,196,204,202,201,203,201,196,192,199,200,214,201,199,204,200,195,221,197,197,234,170,193,214,203,200,189,200,198,213,206,182,185,215,203,198,192,211,193,199,208,212,181,228,189,215,218,227,180,174,203,216,222,235,222,209,197,214,212,189,213,190,202,214,222,219,200,206,201,236,189,195,217,212,191,188,206,235,181,218,237,196,209,222,197,219,204,192,212,185,208,216,203,203,221,218,212,217,209,210,228,207,240,199,191,188,212,222,223,214,199,177,219,211,195,222,202,218,206,210,211,215,198,217,214,219,225,222,208,219,224,194,200,211,211,196,210,212,212,219,199,211,201,230,202,214,202,225,217,216,228,205,211,215,221,244,183,133,117,124,181,216,245,222,221,223,225,224,220,220,224,191,228,209,209,223,236,199,173,175,149,170,180,205,211,205,214,230,188,220,222,252,210,177,214,135,172,122,57,180,169,85,214,206,171,217,229,172,111,152,142,136,160,179,232,113,191,241,159,50,88,180,152,144,134,81,29,195,246,157,141,233,170,138,199,215,222,77,69,182,187,159,192,228,189,179,160,138,120,164,181,215,246,193,160,214,236,245,236,233,203,178,210,195,210,182,236,206,174,194,218,208,71,74,94,71,79,23,10,16,40,115,139,246,191,56,33,29,25,15,70,24,2,43,32,57,21,53,173,219,181,183,232,241,234,251,203,168,132,222,241,226,223,243,164,158,219,221,245,222,207,254,62,33,58,95,128,123,110,76,41,49,138,79,21,36,124,175,143,121,114,113,18,23,54,38,32,57,58,20,32,41,36,23,42,33,18,24,30,46,43,30,66,41,36,16,56,42,25,16,20,5,25,50,47,31,46,44,29,50,93,82,77,34,15,39,29,46,30,60,34,27,15,18,26,33,45,32,49,33,49,38,13,36,8,33,34,58,15,37,84,90,55,48,59,101,92,69,54,46,40,31,23,14,7,28,30,42,60,81,14,41,34,17,22,26,24,36,19,32,17,68,32,27,35,30,24,28,47,44,20,26,34,51,110,135,107,139,68,62,34,41,94,117,149,158,104,52,18,23,36,44,42,64,43,42,38,51,47,56,53,34,59,33,18,16,42,85,61,79,70,84,58,51,74,45,26,78,122,83,98,113,83,42,81,62,31,106,249,240,254,230,241,255,238,240,247,241,249,249,246,230,253,254,249,244,253,235,240,250,251,239,255,250,251,245,253,244,249,224,254,254,251,239,250,255,248,248,238,255,247,241,246,243,240,232,254,250,244,248,229,255,229,241,253,244,235,229,254,243,227,101,6,0,5,0,8,21,31,16,10,14,11,8,20,14,30,12,9,14,8,5,19,1,13,35,175,197,180,193,199,202,192,206,182,189,203,188,213,226,204,195,198,175,198,195,225,194,201,176,187,205,221,204,205,184,225,185,189,226,196,206,194,179,172,200,175,187,209,201,176,212,190,228,196,210,195,208,176,201,214,180,169,194,204,192,201,190,176,175,216,195,209,203,201,202,204,193,196,204,208,191,226,193,197,188,209,215,185,207,212,220,204,191,197,193,193,197,198,194,205,171,190,196,179,174,200,205,186,194,182,201,201,205,195,207,206,201,192,173,191,202,181,214,171,210,217,195,194,201,196,196,196,215,208,205,207,235,199,202,177,206,213,202,176,211,210,208,188,177,210,215,191,211,208,203,203,194,192,205,222,204,187,212,176,184,214,194,205,190,212,222,202,201,207,222,210,196,197,193,218,234,218,207,217,205,219,225,209,219,199,198,207,224,193,226,193,202,201,213,204,226,203,216,207,219,205,196,230,204,212,209,206,219,206,210,221,202,208,204,221,207,205,202,207,232,215,197,236,188,216,221,198,235,223,216,213,214,191,212,223,198,201,200,210,195,183,231,220,202,225,244,238,226,219,184,213,188,201,236,235,205,222,212,212,223,205,212,191,230,186,215,218,245,237,198,200,225,212,220,218,214,229,220,208,195,225,221,187,164,205,187,215,208,234,209,214,198,237,221,237,235,203,221,202,193,174,194,136,202,173,186,219,225,244,217,243,240,160,208,211,231,195,150,205,172,185,98,98,229,183,118,195,234,197,224,220,142,139,154,130,155,119,214,200,90,177,248,176,34,63,147,150,131,76,42,10,172,243,119,175,244,188,201,225,237,225,112,66,156,200,198,224,233,219,202,145,143,132,177,226,235,246,151,144,221,247,246,240,254,230,210,238,220,220,191,234,198,162,173,172,187,65,73,45,51,40,19,30,22,84,88,114,164,65,14,13,25,11,37,64,47,40,37,78,52,66,84,213,198,149,193,219,251,238,234,161,157,216,237,238,213,209,156,155,227,245,192,241,214,166,134,31,48,80,86,106,125,70,57,45,64,141,92,26,43,146,175,134,135,137,107,15,22,22,57,50,17,58,30,40,38,10,25,32,29,18,45,22,26,34,25,22,47,20,15,36,30,19,48,22,66,19,39,3,36,27,21,30,21,72,86,62,80,20,31,27,33,53,35,43,50,33,40,41,40,3,21,33,36,10,38,38,13,37,33,46,56,47,71,134,147,87,59,74,64,89,89,71,51,41,42,30,5,13,16,17,45,40,53,33,28,23,13,24,22,13,45,29,22,18,15,37,16,41,42,32,18,41,33,32,17,38,30,50,102,80,94,74,45,61,42,69,95,157,198,146,63,24,14,42,46,33,33,43,32,33,45,50,29,26,42,30,23,37,43,62,84,76,36,57,58,73,47,65,71,10,103,107,104,112,92,131,83,92,47,75,234,255,251,237,236,244,254,246,242,255,220,248,248,235,248,235,243,252,253,240,241,246,251,224,246,249,251,248,229,242,235,245,242,239,240,233,252,238,255,244,241,254,245,243,253,237,239,237,255,247,240,229,238,247,246,247,254,231,237,250,248,251,248,212,106,8,18,0,15,15,27,9,10,11,20,22,5,22,14,1,0,22,34,0,10,15,13,34,8,201,202,193,197,195,229,169,188,205,177,191,199,195,180,198,203,219,180,189,186,204,208,190,174,176,226,183,189,178,229,207,182,207,198,184,160,217,180,218,220,182,194,219,212,203,208,178,210,195,183,198,214,195,205,209,238,192,178,191,208,194,208,201,212,209,190,199,196,191,185,205,201,180,193,183,187,206,196,230,180,203,193,215,191,203,197,213,187,201,212,197,196,160,207,188,204,201,197,192,182,204,205,205,200,199,208,179,196,200,189,202,194,196,188,194,209,197,201,200,192,208,196,201,191,166,180,198,204,205,166,211,201,203,208,219,212,211,178,234,201,219,216,212,207,230,206,217,218,209,199,187,213,226,179,222,197,193,214,188,209,207,222,200,188,204,227,208,216,232,218,204,182,217,177,230,213,216,190,229,212,201,215,184,217,202,207,210,213,191,222,205,219,237,197,209,214,218,203,226,226,210,209,217,193,196,188,202,226,198,213,206,207,217,210,213,204,222,197,196,206,210,221,189,194,193,176,222,223,214,191,210,215,202,197,233,203,226,193,183,194,211,205,187,198,216,220,206,227,204,201,226,222,210,214,202,209,212,225,206,238,195,214,220,205,205,216,207,197,211,196,216,215,222,226,210,213,223,226,220,209,220,244,242,231,204,183,202,242,228,214,216,215,202,223,220,213,208,204,185,206,204,227,194,215,220,212,241,234,226,228,227,202,179,226,214,225,140,164,228,201,186,95,121,246,201,96,202,217,198,184,226,133,133,175,160,174,137,223,193,108,164,220,221,85,98,155,144,108,87,47,40,200,246,98,202,242,181,177,203,238,132,94,43,98,171,209,245,205,201,143,164,162,183,203,221,252,229,144,161,248,250,225,231,238,229,204,227,237,208,207,239,232,164,149,176,174,107,35,59,42,74,83,103,165,146,157,111,117,42,7,12,9,46,40,28,14,20,58,49,61,129,195,165,172,190,201,237,240,241,169,154,225,243,239,240,226,199,148,201,246,237,213,244,212,26,31,34,96,128,101,123,83,27,86,52,72,127,77,51,35,22,65,111,139,134,76,31,32,48,29,63,49,27,43,39,20,15,15,31,21,20,27,37,29,57,60,23,35,36,34,28,36,28,20,42,30,19,24,38,50,45,30,25,31,55,48,87,61,24,25,26,48,30,6,36,33,28,40,28,28,60,30,6,26,29,41,23,36,31,52,50,99,89,85,125,187,75,49,38,71,79,110,75,38,53,20,29,47,24,54,20,28,28,39,39,3,19,27,3,34,33,20,12,30,76,24,35,29,46,64,26,28,39,36,30,12,40,21,52,44,56,22,68,43,49,57,47,47,93,184,133,38,36,15,26,30,38,49,58,52,23,14,50,53,46,40,45,22,23,31,32,74,72,71,56,55,98,63,63,96,33,91,145,104,120,101,83,107,77,84,83,214,227,223,252,250,249,244,248,255,240,250,219,233,252,250,238,253,252,250,240,244,247,252,241,226,252,243,246,225,250,222,255,236,251,249,243,232,246,250,242,253,231,225,237,251,236,236,241,252,246,222,247,241,248,248,243,255,252,238,237,239,248,250,242,103,15,12,1,31,27,2,12,1,24,38,8,15,6,0,11,9,10,7,11,18,21,5,19,10,198,213,201,206,212,195,221,206,198,209,197,197,180,190,199,229,210,193,196,191,182,181,192,217,203,197,179,208,187,188,183,151,184,208,191,186,181,219,203,203,197,195,170,209,212,209,212,197,185,185,197,188,170,170,190,190,206,187,189,199,202,210,193,180,215,215,211,212,209,187,207,180,192,191,226,190,193,198,183,178,186,180,219,167,199,192,187,213,184,183,194,177,170,201,218,211,227,219,188,200,199,194,198,202,188,202,185,208,199,196,204,193,211,201,217,212,190,205,190,205,191,198,217,197,209,208,181,201,200,211,193,212,224,187,197,202,221,225,208,203,192,175,204,224,201,178,193,194,195,228,230,224,202,183,197,202,216,217,201,206,222,197,200,205,214,212,203,185,223,209,231,196,204,215,201,218,199,205,184,186,200,196,205,203,225,203,196,200,209,217,220,202,205,220,222,206,208,218,210,215,197,217,225,217,217,220,198,213,209,189,213,208,193,214,227,223,211,209,189,211,205,224,217,198,216,204,219,224,204,211,192,234,218,213,211,205,236,184,192,210,236,214,211,217,201,196,205,215,212,231,222,210,210,198,208,211,187,224,210,221,210,191,225,219,223,219,214,211,207,200,219,229,222,217,226,219,208,213,233,226,222,207,232,242,215,213,216,183,194,211,232,211,192,204,194,210,194,203,212,245,217,237,189,220,208,225,199,191,201,195,202,219,172,212,211,216,141,166,237,190,181,92,112,238,215,100,130,189,182,194,216,172,152,134,179,172,142,247,201,103,188,238,233,130,72,118,125,115,72,42,30,174,232,89,215,250,168,137,180,175,177,138,101,129,160,235,209,166,225,166,160,128,166,176,197,231,226,139,188,232,221,249,223,207,207,205,237,220,231,214,229,248,201,171,150,225,109,14,30,81,105,67,146,168,212,190,48,41,33,5,26,42,46,14,4,42,34,61,24,120,202,181,176,182,237,241,255,252,175,152,194,244,238,213,247,170,143,203,250,241,241,201,246,108,26,64,78,131,106,129,74,29,61,111,57,92,146,57,27,58,105,123,125,82,108,64,21,25,5,29,46,41,40,16,46,28,45,39,21,24,33,33,14,26,26,17,49,23,25,44,32,27,31,32,33,38,19,36,39,34,33,52,23,56,25,37,66,76,28,20,28,45,50,4,28,18,45,37,33,35,12,46,44,37,21,24,35,27,42,71,102,128,112,150,171,129,83,35,23,34,57,57,34,60,26,28,33,22,35,7,38,32,33,11,21,14,6,16,19,11,29,43,31,24,31,17,25,37,18,24,15,43,28,28,35,20,53,37,7,22,41,6,43,70,15,51,7,31,49,29,50,40,53,23,38,45,50,39,49,43,51,40,46,24,31,35,5,38,39,30,50,63,69,67,54,69,85,57,68,78,25,46,124,132,137,110,102,113,58,29,25,122,237,241,220,249,254,246,211,235,251,254,247,226,255,252,234,251,254,249,254,240,243,240,252,245,254,253,249,243,252,237,240,254,248,247,211,255,237,248,252,242,242,248,249,255,244,255,228,253,233,246,244,250,252,241,253,249,242,241,226,250,253,252,242,97,2,12,12,13,14,13,0,36,22,12,18,17,4,3,27,7,23,7,33,1,16,1,27,11,172,193,205,215,189,205,186,178,204,201,202,228,171,190,177,190,197,194,193,202,189,199,213,187,197,211,190,188,184,190,187,193,185,200,192,184,181,213,184,193,210,201,196,189,191,187,177,194,177,195,207,214,197,203,197,198,188,198,190,192,181,186,191,192,207,213,196,196,212,187,185,196,196,181,211,196,234,188,197,194,187,212,186,205,201,205,195,200,226,205,200,181,194,216,201,210,194,201,194,199,204,233,193,201,204,223,210,184,208,198,222,195,205,196,185,208,194,203,217,216,197,222,219,207,211,208,209,214,190,215,209,189,217,198,212,194,218,174,208,224,210,210,208,233,189,189,184,187,207,196,200,170,210,199,222,197,201,206,228,202,183,210,217,192,200,203,207,183,200,204,210,218,211,225,195,214,202,219,209,211,211,193,204,204,225,214,231,221,204,213,197,214,219,179,216,225,204,191,194,196,205,217,214,190,220,207,205,218,201,231,199,224,200,210,207,202,221,189,208,185,209,196,230,181,204,217,221,202,210,199,239,205,232,208,179,212,201,205,181,183,184,218,202,192,221,233,193,218,199,197,236,215,224,213,200,217,227,211,217,210,204,214,254,230,223,192,219,213,212,231,203,179,218,209,237,219,233,224,220,237,223,234,235,246,203,233,184,196,178,183,188,221,215,204,222,211,222,233,218,218,223,236,202,204,203,184,228,216,210,211,226,201,205,216,210,229,135,169,243,173,209,77,72,220,221,130,146,237,195,217,228,142,152,158,157,157,174,231,203,110,185,235,229,160,82,112,115,125,60,28,57,196,195,95,213,174,112,136,218,220,194,210,120,116,145,193,171,216,190,185,162,52,104,182,253,236,191,140,227,231,238,241,213,236,202,215,227,209,219,189,235,238,200,176,165,220,111,26,53,60,33,89,100,123,164,134,38,38,11,24,36,20,22,21,13,42,48,57,111,226,228,121,172,212,245,226,251,216,161,223,250,251,251,247,170,150,208,255,254,248,242,175,98,53,43,95,76,133,124,81,63,56,113,165,35,112,146,37,42,149,224,165,147,113,100,54,19,28,8,42,46,17,42,31,41,44,12,10,25,18,30,32,16,29,20,17,17,48,8,23,27,15,35,32,36,47,37,36,34,42,52,21,30,47,29,35,69,64,36,12,38,22,11,9,16,14,28,16,26,10,48,57,23,19,25,25,26,75,137,172,99,125,162,166,113,68,26,26,22,18,51,64,60,26,43,37,8,20,29,45,31,25,25,15,38,23,49,29,9,5,36,4,30,15,0,52,17,49,51,24,21,49,32,43,44,19,35,44,17,34,11,46,67,36,21,61,36,8,4,15,15,40,26,26,43,9,35,26,59,42,36,33,56,59,47,45,23,60,29,43,43,91,69,37,59,65,68,76,52,83,43,54,143,131,116,122,102,105,86,62,23,84,245,229,238,239,251,248,252,236,248,253,245,229,255,249,255,247,253,247,240,253,255,252,233,252,239,255,255,236,242,254,230,246,237,241,243,251,253,255,252,238,240,251,240,229,222,234,239,252,238,239,246,236,226,244,247,254,253,239,255,248,253,255,230,104,16,26,3,7,0,10,29,0,33,9,32,14,3,12,5,12,9,9,8,4,7,25,16,10,192,201,213,201,222,194,207,215,213,194,211,189,207,188,229,182,197,188,205,217,197,190,205,199,173,226,195,219,193,209,173,198,210,209,201,195,183,169,177,184,203,199,229,230,200,203,184,187,177,194,189,193,178,200,224,190,165,186,182,198,212,193,184,210,184,207,169,196,199,197,204,187,207,212,188,191,201,214,195,206,201,221,189,192,202,195,198,192,196,214,186,198,185,189,192,203,185,199,213,205,209,197,191,221,195,210,198,185,222,209,212,190,212,219,205,193,195,219,209,208,215,204,227,192,206,199,210,174,207,213,219,204,207,194,202,214,212,212,217,209,197,205,178,205,209,221,201,192,205,204,204,199,211,222,192,209,197,205,225,200,200,200,198,202,211,199,205,229,193,190,189,228,199,228,192,219,210,208,207,200,222,187,200,200,208,205,220,219,213,196,192,216,235,215,224,217,222,186,215,229,238,201,180,201,209,216,185,209,202,210,199,211,213,204,209,213,220,201,202,209,221,217,205,210,206,221,180,241,236,218,218,192,197,225,218,203,205,202,203,217,213,211,245,219,210,218,214,223,218,214,226,213,218,220,229,203,214,223,206,199,193,233,210,223,208,222,225,210,222,217,236,220,222,235,243,223,230,208,231,230,231,218,234,188,171,169,202,217,191,176,176,212,230,214,206,241,217,242,199,226,221,216,214,230,214,200,232,213,219,235,225,167,184,235,237,213,146,190,248,215,209,84,113,215,214,147,160,246,192,200,207,121,160,169,178,157,172,246,216,144,197,224,255,239,138,110,140,117,77,39,4,165,138,91,164,136,158,167,234,209,221,207,77,58,145,193,186,243,201,191,108,56,133,217,238,255,155,127,226,217,252,247,219,242,230,208,238,227,216,204,249,231,175,144,122,225,124,73,117,93,20,74,120,92,68,85,30,40,19,30,41,49,23,20,64,74,46,82,221,220,151,157,235,252,225,212,200,166,212,243,252,250,240,192,125,197,247,241,236,253,246,101,42,12,36,118,102,127,108,30,72,95,167,139,29,108,147,48,37,99,186,163,151,142,109,43,8,13,1,2,26,19,23,7,49,21,26,9,17,5,25,23,25,32,30,54,49,45,45,36,25,19,4,28,43,13,39,41,55,48,30,42,39,65,42,50,49,83,41,22,35,59,35,29,31,46,18,30,30,42,36,27,52,30,39,56,41,60,105,151,128,84,122,83,55,43,3,4,34,19,56,69,80,51,41,29,28,31,25,32,31,34,34,40,1,7,22,23,15,19,48,38,29,30,43,28,16,42,50,25,41,52,8,48,38,54,28,9,25,31,17,34,79,42,44,39,3,45,9,23,30,29,29,40,31,44,7,33,26,60,66,49,28,18,35,39,40,31,38,46,41,65,79,37,57,53,84,81,23,84,18,57,99,128,108,119,112,131,61,58,14,55,224,228,255,242,238,253,252,242,247,226,243,233,248,231,253,230,249,243,251,241,226,240,254,249,245,233,230,238,239,241,232,244,254,253,228,251,255,253,227,238,222,237,248,226,237,252,243,234,248,247,250,235,249,239,246,253,241,241,248,255,235,250,243,110,0,0,5,12,14,28,9,46,21,8,27,1,4,13,0,4,29,0,12,15,31,18,2,29,192,179,198,228,198,212,218,188,209,210,209,211,204,200,202,198,198,209,194,216,209,196,185,179,211,216,198,188,227,213,191,203,222,219,194,220,194,175,179,174,186,186,215,191,189,183,202,183,186,192,184,209,220,191,215,189,212,175,216,184,215,214,214,198,178,194,199,216,207,195,187,197,213,194,208,202,205,210,166,189,202,194,232,180,210,203,194,201,190,230,218,213,201,224,199,215,215,203,197,184,203,184,201,207,213,187,176,182,194,200,213,191,210,199,234,231,209,184,210,227,231,216,212,217,195,208,189,206,197,226,218,217,212,195,196,209,222,193,209,228,207,241,195,207,195,196,230,179,217,203,216,222,211,192,212,210,216,213,210,184,205,195,221,189,206,228,194,218,206,214,199,204,184,196,205,220,205,219,218,211,199,205,208,218,210,189,229,214,216,203,208,204,226,219,207,217,229,193,224,196,204,215,214,193,203,207,200,188,199,217,225,195,202,188,213,199,223,210,205,209,206,187,197,217,206,219,210,201,212,183,204,215,205,194,192,197,223,201,204,219,216,197,206,207,204,231,208,200,211,225,214,242,191,230,237,203,213,209,202,214,220,215,239,222,208,212,224,218,228,230,225,233,243,247,223,227,238,246,238,213,198,181,181,181,227,242,238,244,244,227,242,212,211,219,246,230,232,243,214,233,236,247,224,206,213,234,241,234,234,240,220,203,209,225,253,236,130,200,240,189,200,65,120,227,232,157,106,226,199,177,153,98,146,188,192,166,177,229,229,134,206,235,246,236,115,124,103,95,67,18,13,127,148,104,185,175,207,195,231,179,161,145,101,102,145,162,215,235,169,143,152,126,182,243,252,252,137,137,250,221,232,228,236,242,209,188,241,203,238,186,226,217,217,134,197,240,172,57,70,90,44,141,164,76,83,81,53,18,12,12,26,32,49,32,40,54,70,193,234,197,136,194,239,241,215,183,146,195,251,245,235,223,206,136,217,255,250,232,244,250,188,34,35,0,36,106,112,85,45,59,119,138,153,92,49,124,113,40,35,83,150,122,118,138,95,18,29,35,22,20,20,43,48,41,55,26,35,31,18,30,15,50,40,36,33,45,22,10,33,28,32,26,44,59,49,40,35,19,43,34,26,57,29,30,26,33,66,69,64,27,12,33,50,22,22,16,43,55,26,31,36,20,40,27,42,49,22,27,39,103,103,56,24,42,23,12,6,35,7,21,30,58,79,70,37,27,51,30,16,27,21,21,26,25,28,25,19,1,29,25,35,18,18,56,7,16,39,31,40,24,41,53,38,51,53,21,46,19,40,33,39,29,66,30,14,31,21,29,22,61,12,35,33,45,39,39,18,44,25,23,46,54,61,56,31,30,35,30,39,16,12,66,87,53,70,73,93,74,25,52,52,25,96,91,128,128,117,125,66,61,35,59,219,236,224,239,255,250,238,237,247,253,253,253,242,249,237,239,227,237,238,255,255,242,250,240,247,234,248,239,245,249,243,248,230,247,236,233,245,233,247,248,250,255,251,255,241,225,245,219,246,234,249,248,253,248,253,255,244,238,255,239,239,250,254,111,3,6,4,30,24,1,23,3,16,5,8,33,27,2,8,8,10,4,12,6,9,16,27,26,217,238,218,179,211,183,209,207,207,235,215,195,222,224,221,201,207,196,190,213,218,212,203,179,197,203,187,204,230,198,194,207,200,177,225,179,205,198,191,211,224,213,195,191,201,164,186,190,177,210,199,193,200,205,206,212,230,198,197,210,207,199,186,183,194,181,206,220,193,193,209,186,195,196,204,190,183,192,215,204,188,209,189,213,196,224,200,196,195,212,194,200,209,186,217,190,206,209,208,184,211,218,192,204,195,190,199,206,220,204,213,195,183,217,200,199,219,209,224,213,227,184,218,214,200,226,168,208,207,224,224,223,206,219,219,190,202,178,230,205,192,196,208,205,210,209,212,213,214,212,207,206,218,209,196,205,231,199,193,230,192,216,201,208,219,208,200,211,227,211,212,197,223,193,209,191,203,179,188,227,211,199,219,181,193,206,220,186,225,239,185,211,219,223,209,205,185,209,184,212,207,193,222,228,208,187,215,213,208,224,234,216,197,200,197,180,195,200,220,221,214,221,204,203,211,220,226,209,213,187,225,209,223,211,203,210,200,193,224,218,209,199,210,226,205,206,233,192,222,212,203,216,212,212,205,206,215,226,230,236,246,249,236,234,252,240,237,253,236,248,255,235,244,242,230,237,229,226,207,220,225,252,253,255,251,246,247,238,228,234,234,241,230,239,245,249,238,251,255,248,249,254,231,221,246,227,255,246,244,248,255,208,239,246,242,249,176,230,252,187,141,75,88,238,237,173,128,249,240,231,154,122,164,181,194,208,244,229,220,186,236,246,255,236,115,92,76,73,52,33,4,143,173,162,235,191,190,216,183,165,181,164,122,122,103,146,196,187,172,178,147,183,224,245,255,232,144,214,249,226,249,217,213,226,212,215,201,212,221,198,221,239,176,140,206,247,156,47,85,37,65,185,78,36,41,83,41,34,18,32,21,14,35,43,36,37,130,237,205,176,157,218,240,227,177,133,170,214,242,244,242,213,136,179,243,243,245,252,245,178,41,17,53,28,106,104,79,58,65,113,117,134,123,82,76,167,93,63,28,37,96,137,105,164,23,15,25,40,50,38,45,57,28,32,22,27,31,26,40,21,38,29,36,42,29,27,23,41,17,23,43,36,25,34,35,37,41,56,36,20,10,24,42,19,21,50,74,98,40,54,25,20,48,19,39,37,27,26,17,42,35,37,40,28,28,26,35,25,45,109,46,64,51,36,22,3,6,18,18,20,57,45,64,38,57,39,45,15,16,32,13,29,1,2,19,43,22,41,36,18,18,16,32,24,38,35,39,34,22,38,27,47,32,13,29,37,25,22,18,17,13,71,65,43,20,52,30,61,26,32,20,41,23,54,69,9,50,15,34,38,31,27,31,11,32,32,20,48,43,29,38,85,113,26,54,46,55,59,39,62,37,33,84,116,97,99,114,122,86,24,26,3,191,238,238,240,249,242,235,250,243,243,250,250,238,245,251,249,248,249,244,236,244,222,226,228,242,237,226,249,255,233,255,254,255,249,254,254,230,248,252,236,251,239,235,239,232,250,245,241,239,224,240,255,247,234,225,248,240,254,243,234,241,252,249,114,6,13,22,8,0,15,36,25,5,0,7,49,7,9,0,0,14,20,23,31,30,3,14,41,190,219,212,203,200,200,221,201,205,196,197,223,200,188,190,195,216,205,216,214,194,214,174,194,181,201,212,191,195,225,190,209,189,197,200,202,169,213,207,183,169,200,210,210,205,212,207,187,199,193,204,192,187,201,196,167,215,198,212,210,205,192,198,208,205,199,176,192,210,205,200,196,215,204,214,192,214,212,196,197,204,202,204,198,188,194,180,209,199,200,216,187,199,228,204,190,214,197,199,236,183,203,188,206,232,179,197,223,182,220,193,201,208,210,197,164,202,208,186,210,208,188,218,204,227,201,228,215,192,224,201,216,219,207,201,213,204,213,196,174,218,188,205,207,221,205,215,194,205,201,187,185,194,213,228,205,205,215,207,187,221,193,193,191,212,234,194,224,197,206,200,213,222,218,206,224,208,229,212,216,214,204,197,217,216,226,196,222,215,216,199,205,195,215,209,201,208,198,198,216,207,213,213,210,206,210,214,226,217,208,198,205,211,203,198,183,203,196,217,218,228,197,229,199,197,202,212,232,212,214,205,223,221,194,215,233,197,203,221,200,205,209,216,213,224,208,197,206,236,233,203,200,206,213,209,213,188,207,196,216,219,196,209,214,190,181,227,209,179,174,166,177,173,157,171,147,158,131,184,185,209,225,193,220,210,191,203,210,232,202,200,190,188,168,183,183,198,208,184,200,203,190,191,184,202,173,207,199,199,200,190,169,190,185,210,146,133,187,204,140,93,18,55,169,144,143,118,211,195,190,144,131,154,176,197,162,227,255,207,164,229,237,250,229,108,100,90,70,65,44,39,135,207,149,224,194,191,215,176,221,214,231,192,154,132,118,174,169,235,175,174,205,234,234,225,235,148,228,219,246,255,235,236,237,202,243,232,236,225,210,213,234,171,140,202,242,119,39,53,77,121,173,84,12,64,82,74,54,43,24,23,28,59,51,59,121,233,248,167,189,189,234,222,195,151,173,230,244,248,245,238,153,201,241,240,252,240,231,146,39,15,15,88,85,139,106,37,75,117,120,128,102,101,36,67,134,87,38,63,187,152,144,129,129,59,7,35,28,19,52,64,43,49,38,53,22,33,34,21,19,35,33,26,29,25,23,33,12,28,20,31,45,74,81,62,59,21,29,8,17,37,59,19,30,30,10,32,73,54,54,16,23,28,21,35,20,64,29,45,11,9,21,46,44,35,30,21,32,31,44,42,46,55,21,17,43,10,6,5,19,30,48,62,39,56,46,30,15,19,12,37,11,10,19,29,22,17,24,13,6,27,6,30,15,35,39,27,39,34,25,22,33,16,35,60,6,28,22,5,28,47,59,54,14,34,41,31,35,21,22,34,31,26,12,16,41,31,37,38,18,34,37,26,25,52,24,50,35,35,30,52,78,101,77,63,56,35,38,44,51,49,35,80,110,98,141,79,113,98,46,34,30,156,236,237,232,253,243,245,248,252,254,252,239,234,249,255,236,255,253,231,250,225,250,255,239,255,253,254,245,224,233,235,253,251,253,250,254,237,249,251,226,223,251,255,248,247,242,220,255,238,223,252,248,236,237,249,252,250,237,251,228,249,234,251,115,2,14,40,0,23,11,2,1,19,3,32,8,23,2,4,2,16,23,18,9,19,27,15,19,191,185,221,187,211,179,215,205,196,192,195,223,195,201,211,205,217,233,190,182,200,193,212,207,214,211,215,225,177,191,220,206,204,197,200,203,214,211,206,183,204,175,197,205,226,189,222,201,209,159,196,201,193,197,176,211,215,184,203,201,198,180,183,170,206,195,203,197,185,196,209,206,217,167,173,205,193,196,207,186,202,185,204,201,206,188,161,206,190,196,203,200,206,205,213,203,199,213,200,204,209,205,222,225,187,196,191,195,188,202,220,216,216,178,229,215,218,219,213,217,226,200,174,201,198,187,227,238,207,183,231,189,211,202,206,208,238,209,179,225,200,198,180,209,180,218,182,221,186,208,184,201,191,198,182,210,200,176,205,198,222,212,212,201,190,214,200,206,225,201,203,191,225,194,223,193,227,221,205,209,188,203,228,212,206,220,211,187,230,223,225,221,206,199,217,213,202,213,201,218,231,205,207,209,223,190,207,237,226,206,210,203,203,191,220,218,190,202,199,193,186,201,222,207,188,209,213,223,218,220,226,214,199,227,174,208,204,220,222,187,218,203,203,208,211,203,218,214,233,207,215,223,215,213,214,203,175,142,112,112,89,100,100,97,91,68,93,114,84,71,69,50,58,28,19,13,29,24,37,48,32,62,48,49,30,70,61,58,27,34,45,49,59,81,102,93,95,72,91,37,42,48,44,68,72,57,63,15,39,60,70,36,50,50,57,55,18,29,46,64,144,51,10,10,19,31,5,88,63,69,76,108,86,93,97,89,110,125,86,61,92,77,129,119,88,118,133,63,108,34,20,138,108,80,135,123,101,142,155,148,153,151,117,124,49,93,171,136,187,106,94,120,196,241,212,145,119,239,235,237,230,217,224,227,204,235,244,224,214,186,250,208,159,186,187,206,118,61,63,76,198,190,69,33,70,109,78,36,35,14,29,39,74,56,73,228,252,194,211,213,210,191,184,154,175,244,255,239,237,229,185,162,244,242,250,253,248,167,15,28,57,65,101,85,80,32,89,138,141,138,88,47,74,41,81,142,71,50,132,202,206,143,145,124,63,12,40,29,37,33,52,37,41,40,47,20,43,20,20,36,11,18,29,39,39,27,32,13,42,52,100,163,173,152,121,147,67,46,51,65,7,55,28,20,30,44,41,76,80,61,17,30,20,26,48,2,32,32,35,21,24,47,29,26,20,25,23,55,30,21,32,63,34,17,35,13,18,5,5,18,53,13,43,65,58,56,58,22,17,17,11,28,11,20,24,16,18,41,34,26,45,21,6,15,43,19,24,48,37,43,30,55,47,14,32,28,8,28,32,31,42,62,36,29,13,24,23,18,20,10,35,52,60,18,35,28,32,20,39,35,17,44,51,42,43,46,12,39,35,34,33,68,131,74,40,75,71,63,45,31,42,25,58,132,104,121,89,95,113,34,20,15,112,224,251,252,242,217,252,255,239,251,243,250,252,246,245,252,255,251,222,233,250,245,249,245,253,237,244,211,239,252,236,236,245,246,248,245,228,254,236,236,245,253,249,249,255,243,244,246,242,251,232,255,255,252,255,241,250,254,242,250,238,246,246,123,17,5,1,18,0,3,19,17,27,23,23,6,5,4,12,4,2,0,26,23,15,8,12,5,227,187,198,201,200,203,203,193,203,228,190,210,222,196,208,196,224,213,200,196,211,215,205,213,205,219,207,208,193,213,191,195,209,219,202,206,214,197,216,198,185,176,175,201,195,219,207,197,186,199,178,191,205,177,213,222,206,196,206,220,200,202,188,193,191,178,244,204,200,222,208,199,198,185,206,201,179,183,199,211,183,196,215,200,213,214,213,207,222,208,175,200,201,210,215,202,215,219,227,189,211,190,183,209,190,206,227,193,213,203,200,215,208,221,202,194,212,218,179,218,211,200,205,206,200,224,180,214,219,209,200,206,209,215,211,219,195,206,210,202,181,200,212,180,201,196,195,214,192,197,208,214,222,203,194,196,204,197,192,187,207,210,208,207,230,206,212,197,217,225,198,211,225,195,190,215,197,218,193,181,199,225,198,215,204,205,194,201,206,208,217,188,202,199,212,195,226,212,219,216,200,215,181,206,196,211,204,215,209,215,200,204,208,217,208,205,208,254,219,216,208,219,214,223,205,189,233,198,238,216,231,217,239,204,219,210,198,204,203,216,213,220,210,212,207,211,206,202,221,212,198,196,206,219,206,213,198,158,166,135,141,147,127,128,80,82,81,107,112,125,162,118,131,129,124,126,90,116,100,94,97,132,93,80,108,92,107,116,100,118,125,126,109,133,138,152,108,123,98,106,114,109,112,126,116,86,109,98,117,119,92,108,116,85,93,89,55,89,113,122,153,87,59,101,50,46,37,61,91,97,80,117,101,107,113,99,89,71,107,59,61,78,84,83,56,104,109,68,61,45,82,107,51,33,76,55,48,46,47,49,35,41,28,35,36,94,108,91,82,34,53,14,80,90,94,65,50,113,88,121,132,114,135,193,192,199,223,237,245,201,212,203,163,188,181,197,156,106,74,110,243,198,50,32,52,88,90,28,15,25,46,47,58,89,180,245,228,234,242,250,186,195,119,104,213,229,251,223,243,166,162,237,243,234,246,254,192,40,30,59,134,121,128,50,23,79,123,122,137,76,64,84,56,28,101,127,68,26,56,121,145,118,139,122,54,18,54,26,5,11,38,19,48,45,36,31,25,27,36,10,33,17,18,19,38,36,49,54,116,143,139,177,120,185,159,184,144,108,44,39,38,34,8,33,41,37,53,81,75,48,19,25,28,21,18,28,52,11,18,22,10,34,33,20,39,44,37,9,12,8,24,15,37,28,32,10,2,10,17,25,19,35,81,93,79,35,6,21,6,7,35,37,32,33,20,36,8,28,40,33,28,25,38,19,26,40,31,43,17,38,40,47,44,31,48,42,44,38,36,26,66,83,37,19,30,28,18,47,32,25,21,44,53,30,39,13,44,30,38,51,37,30,15,36,33,15,41,28,18,41,38,38,124,66,83,73,65,61,64,64,42,39,72,115,109,125,137,153,105,63,36,18,85,221,247,251,242,253,230,240,251,254,215,247,253,239,235,227,245,245,250,243,225,229,251,249,247,246,243,240,239,242,222,250,253,253,255,245,223,248,240,244,249,246,255,230,255,221,251,241,249,232,248,250,232,241,245,246,250,234,247,248,234,255,230,114,12,0,15,9,11,18,19,6,8,6,19,6,0,5,5,28,12,11,33,8,8,17,25,12,191,175,185,201,185,189,184,219,220,181,189,175,179,206,203,187,194,218,198,196,201,210,193,179,212,228,195,207,188,188,221,188,192,189,191,211,188,218,207,219,194,197,209,200,209,214,191,199,204,190,181,193,196,210,188,211,217,199,217,186,199,188,200,205,217,207,223,175,192,205,192,219,193,186,208,211,189,215,200,220,199,188,208,193,200,195,189,213,185,194,215,208,216,207,210,218,214,206,210,191,227,202,181,199,185,209,187,217,213,197,206,208,205,209,214,196,213,205,230,212,211,202,223,203,211,201,200,224,210,202,202,212,213,183,188,210,199,211,194,178,199,192,193,209,221,213,230,208,212,183,204,190,198,174,183,179,182,214,203,201,200,220,200,203,192,194,210,206,199,203,213,191,210,205,201,205,196,224,188,214,168,191,225,201,193,187,201,198,229,179,190,186,212,210,214,224,210,204,220,239,236,222,216,215,213,179,225,235,203,227,210,220,214,190,190,228,197,187,236,220,210,208,210,211,208,212,229,202,213,208,223,217,209,217,230,196,234,216,242,232,232,173,215,196,241,204,219,209,201,205,235,213,219,216,225,196,206,250,240,240,221,238,203,212,185,172,216,213,236,248,252,237,222,247,245,239,255,211,220,247,226,231,239,215,241,244,229,251,246,246,240,247,234,218,243,238,243,209,223,247,255,246,236,237,241,221,252,237,242,228,219,213,244,236,233,197,197,249,238,212,162,86,131,255,225,194,187,169,244,191,186,215,198,198,175,210,255,212,254,178,209,252,237,253,138,88,98,53,62,49,82,149,213,171,120,201,161,116,97,127,126,113,169,199,125,125,92,139,135,118,138,127,156,150,139,83,105,169,144,180,123,180,161,228,206,221,238,233,211,206,242,157,188,187,179,224,160,87,87,214,243,114,22,139,86,58,36,14,26,8,70,24,23,147,227,220,242,242,245,207,191,140,70,170,222,241,235,243,173,166,191,233,238,244,243,184,54,12,75,113,110,103,89,10,40,93,160,123,93,52,90,124,52,27,106,141,36,48,76,111,130,149,108,110,25,8,12,21,20,21,36,68,29,32,53,57,17,31,34,38,24,31,28,25,29,37,95,182,197,167,134,96,100,164,86,47,67,53,58,24,42,28,7,19,65,62,38,74,46,51,20,29,37,43,21,29,32,43,33,34,34,40,16,29,17,34,10,24,32,29,27,20,34,5,19,25,10,26,60,57,26,45,78,69,62,52,9,51,18,3,24,19,34,17,43,48,23,28,9,41,34,27,29,6,13,44,40,34,61,29,21,30,61,26,43,22,49,24,42,41,91,50,37,43,3,15,14,19,56,58,59,45,58,64,68,84,58,59,23,47,23,28,26,41,36,21,29,31,43,38,31,51,101,48,54,80,68,97,115,39,60,32,69,103,92,110,110,117,118,44,37,11,53,190,245,239,253,250,241,245,232,254,248,253,235,249,246,249,245,228,243,247,234,254,255,238,234,249,239,244,249,245,237,253,247,234,252,239,254,244,253,247,237,229,244,253,222,237,235,243,236,245,243,248,243,235,250,246,252,253,244,253,255,254,246,122,7,1,1,12,29,4,8,21,25,12,25,10,18,2,17,5,8,4,5,14,6,21,17,15,215,220,216,203,209,189,195,216,197,192,196,202,229,202,190,186,183,198,185,209,209,197,213,192,203,216,205,191,199,210,186,198,190,211,207,200,204,208,207,181,227,196,183,206,197,214,205,179,219,191,201,196,193,201,179,182,191,217,217,197,215,202,216,183,187,216,204,220,194,201,210,203,200,201,183,187,207,198,179,185,177,198,192,185,192,227,199,201,192,193,172,198,202,194,186,202,196,207,214,213,191,214,203,245,194,205,220,226,188,197,186,191,213,215,199,210,206,197,209,192,218,171,206,210,208,199,219,212,220,208,173,176,212,226,196,196,227,205,206,192,182,203,200,195,211,237,195,204,203,230,204,208,200,212,180,205,205,216,208,230,201,201,208,194,224,202,185,200,163,223,194,193,202,193,192,213,208,207,228,196,185,192,180,208,201,206,181,184,223,172,225,210,224,212,190,214,240,210,206,210,190,218,223,203,234,211,214,223,210,207,215,206,204,218,195,202,199,190,189,197,227,226,212,215,225,203,210,230,214,225,215,235,163,143,198,170,222,197,213,221,208,232,196,196,198,194,206,214,208,207,207,205,227,190,228,195,206,206,193,205,207,219,202,235,228,215,204,244,252,254,255,249,243,249,234,249,226,249,253,255,239,245,235,234,250,240,251,252,255,247,254,254,237,247,232,240,224,235,211,217,233,245,243,234,245,241,245,226,249,206,208,230,242,247,238,208,241,249,245,203,199,70,152,247,237,211,198,206,228,197,139,137,121,97,132,245,231,253,255,224,255,245,250,255,128,116,135,91,76,55,44,181,251,224,237,255,208,212,190,232,240,208,230,247,208,134,123,206,178,198,215,192,247,250,216,124,185,248,252,242,231,250,207,236,214,224,207,235,228,210,202,169,201,168,148,219,118,34,70,210,251,50,39,179,123,41,41,22,22,28,52,25,131,227,210,206,251,247,242,201,116,78,130,206,247,226,229,149,141,212,248,243,243,241,197,59,32,48,103,136,109,117,67,22,94,137,122,70,35,80,82,67,31,56,128,127,32,27,92,163,149,122,143,82,44,10,25,18,47,48,65,44,35,77,11,35,20,0,24,27,55,14,18,37,52,74,81,115,183,127,88,90,135,155,60,64,92,87,71,32,22,15,21,54,21,26,74,76,59,43,9,19,21,21,38,28,16,33,34,17,17,17,43,33,35,35,43,44,25,42,24,12,25,16,13,36,29,35,14,37,34,31,67,56,54,50,34,11,13,11,39,13,19,12,19,21,47,33,28,6,14,26,28,46,48,30,46,35,38,26,56,22,18,20,51,34,29,41,23,53,68,74,26,8,12,36,22,32,74,71,63,91,92,112,166,104,66,52,47,36,28,40,28,22,15,46,22,58,30,33,35,48,119,63,40,42,54,123,139,78,75,35,59,129,91,107,79,115,83,64,21,57,66,126,251,248,251,247,216,253,243,239,232,250,240,242,246,240,252,241,255,250,241,241,250,248,253,248,246,238,236,243,251,242,229,244,252,238,234,248,244,255,236,239,221,253,243,231,244,246,254,245,248,242,233,244,239,246,251,241,238,235,238,255,242,101,1,3,20,10,20,4,10,13,40,19,9,9,7,15,15,24,32,10,25,7,22,4,12,20,183,202,205,188,192,222,187,194,197,195,182,215,188,203,186,194,183,177,187,189,200,202,206,196,204,192,220,236,210,223,173,192,187,192,198,206,179,198,183,217,190,213,189,189,201,199,221,188,199,174,199,201,198,213,207,208,180,193,188,173,201,213,205,202,199,180,184,163,192,189,192,216,202,202,213,196,223,198,202,190,211,202,201,210,224,208,186,185,197,200,223,207,185,208,187,181,202,216,210,210,204,185,189,211,179,205,195,233,210,187,199,190,200,203,191,202,196,200,214,201,211,209,196,198,195,206,201,219,201,195,203,204,212,218,187,193,201,173,199,213,226,208,195,210,219,192,216,236,205,204,184,188,223,199,227,194,217,209,203,213,196,208,206,204,196,196,174,190,214,208,225,191,210,203,192,215,230,189,226,186,188,230,200,192,203,229,222,224,196,212,194,199,212,206,218,211,205,182,220,196,215,208,190,192,217,210,234,227,214,233,226,218,189,209,204,206,236,224,221,216,230,211,237,209,229,193,209,205,197,235,211,195,80,59,136,199,226,234,210,213,216,199,215,228,220,221,194,208,210,236,239,212,187,220,212,175,189,194,174,180,200,217,202,211,196,195,233,224,244,224,205,215,210,220,217,218,200,218,193,209,216,220,219,199,225,229,247,237,227,237,192,212,213,205,241,211,215,217,226,213,206,218,220,204,190,202,218,229,228,191,192,216,227,233,206,194,230,232,229,233,124,67,146,246,232,207,133,111,107,68,74,45,57,37,21,64,86,114,106,55,148,180,195,165,53,113,110,89,109,45,51,136,204,157,192,213,173,205,215,249,217,160,222,160,216,139,88,172,107,150,235,198,255,238,178,146,231,239,254,239,242,247,216,255,222,194,214,219,212,201,195,199,209,179,164,180,69,33,57,193,170,3,143,237,65,35,24,22,49,27,48,73,182,211,213,221,234,254,227,148,84,180,193,236,255,241,154,154,218,230,253,241,250,202,70,22,51,103,113,114,97,14,53,95,104,147,84,71,113,131,68,60,54,60,147,135,58,23,44,138,158,143,140,65,30,36,25,51,33,39,68,46,40,48,44,16,40,37,42,11,15,60,23,43,51,61,44,89,138,87,115,102,107,149,114,104,55,47,32,28,48,9,16,14,33,40,47,92,50,41,19,18,24,26,22,40,29,68,65,24,55,41,5,21,16,46,87,74,33,38,34,25,40,20,27,27,39,23,37,59,46,50,12,45,54,36,20,35,11,2,14,13,38,28,5,31,23,38,41,70,52,21,33,26,28,19,23,29,44,40,35,31,42,68,19,9,17,54,57,47,88,62,32,15,27,26,34,21,40,54,86,80,111,132,121,82,112,105,69,53,53,20,51,32,32,37,47,13,34,44,21,67,104,74,35,48,48,63,65,70,54,29,57,143,107,104,133,94,133,74,11,47,57,78,211,219,234,254,231,255,238,240,245,222,224,246,253,226,236,232,245,241,252,243,242,243,228,252,237,230,252,250,209,240,234,245,247,236,228,254,243,254,255,233,251,242,243,232,252,251,252,242,252,251,233,255,241,253,255,255,225,250,251,242,243,90,1,4,19,20,17,23,12,11,0,4,15,13,0,6,1,38,9,0,12,10,8,18,29,17,198,167,218,210,160,185,216,196,226,197,204,207,178,188,187,187,186,190,202,195,197,181,215,203,207,206,194,188,198,207,195,216,181,197,214,164,202,199,203,207,220,208,185,193,185,201,182,212,186,189,184,198,203,185,198,192,184,221,178,207,201,187,227,183,199,198,189,198,198,203,197,190,210,211,184,191,191,186,207,207,208,202,230,196,204,207,210,207,226,206,181,226,220,217,205,209,164,208,210,212,204,198,190,195,186,202,192,197,214,208,206,226,222,219,192,179,187,207,225,211,207,210,180,202,208,194,217,207,224,197,230,200,212,218,210,185,197,201,209,220,211,199,195,204,212,200,203,191,205,180,200,207,212,203,213,194,219,194,206,196,191,226,211,176,207,223,240,217,207,232,219,212,202,196,204,187,176,237,185,170,218,190,203,209,192,197,211,204,195,210,203,209,208,212,212,211,213,191,234,198,210,231,200,232,233,217,217,212,218,238,206,212,220,205,209,222,220,221,201,216,202,202,217,226,205,211,208,210,206,208,215,202,112,96,193,220,226,197,221,221,218,214,214,225,196,223,220,207,200,199,201,203,176,192,186,179,201,216,208,233,197,203,202,216,216,211,207,230,217,242,200,241,204,209,210,227,220,211,221,222,219,218,246,203,250,226,232,210,223,219,185,218,229,230,222,235,220,221,190,210,215,213,189,187,224,207,223,226,226,232,166,214,241,227,167,161,227,233,234,177,149,59,121,231,239,207,91,39,47,45,80,104,141,91,99,80,45,86,67,29,55,60,67,75,15,27,34,36,86,71,50,86,96,65,89,116,87,100,129,129,108,45,74,52,114,116,71,38,23,135,243,216,252,249,130,152,216,234,208,230,254,207,220,211,193,177,223,224,234,179,197,196,242,152,127,148,123,90,100,195,105,7,201,194,31,37,38,70,47,45,29,129,199,200,237,243,244,242,209,106,121,222,227,244,235,183,176,211,244,255,248,236,224,83,33,62,100,116,97,123,36,28,94,154,123,107,56,83,109,89,62,47,31,74,161,110,41,25,24,112,148,153,109,35,28,30,40,36,43,35,30,48,45,27,34,9,29,30,31,13,11,23,29,17,33,84,85,49,91,65,75,31,68,77,58,28,15,38,28,5,13,27,7,54,26,31,38,90,59,51,23,30,18,62,27,16,39,21,37,52,83,43,53,48,60,122,167,113,87,47,35,41,38,83,48,44,43,36,42,26,57,73,33,37,30,21,38,9,2,30,23,36,40,42,34,15,20,48,87,49,46,15,27,25,37,34,26,53,39,31,40,32,33,38,21,17,15,27,26,72,87,57,36,22,4,8,18,59,54,101,77,62,99,82,79,126,91,90,88,73,45,41,24,43,20,33,18,63,19,39,45,68,126,82,41,39,38,34,41,33,50,37,33,101,105,99,101,108,111,67,61,26,65,30,132,245,234,244,244,245,249,234,225,232,233,222,255,249,235,226,227,244,240,228,239,226,240,255,251,221,235,248,249,249,251,254,239,229,246,223,240,247,235,249,229,212,241,244,255,250,243,255,246,243,247,252,244,246,241,245,233,252,253,251,249,110,2,20,14,5,7,2,1,19,20,14,17,3,3,9,39,12,7,22,9,11,11,0,48,6,211,182,171,185,210,181,212,201,186,204,201,204,203,200,220,202,205,194,192,198,177,221,207,190,183,216,200,221,191,193,204,213,217,174,212,199,177,181,190,182,193,198,201,205,202,176,186,196,211,187,182,189,189,218,188,210,222,193,194,218,218,189,196,177,174,217,208,190,229,213,193,215,218,206,196,212,178,208,186,201,195,203,199,196,189,197,206,212,203,216,217,204,205,186,219,200,190,211,176,199,181,220,219,219,206,212,213,200,192,219,183,232,204,229,226,194,213,200,198,181,185,197,208,199,224,195,197,205,178,196,217,198,210,213,185,188,194,204,181,220,222,187,198,201,193,207,211,205,226,199,200,208,214,181,207,204,220,199,191,203,197,194,190,226,220,216,191,223,199,195,231,219,215,175,202,221,205,206,226,207,192,182,226,208,204,190,212,200,194,201,197,188,208,206,201,216,206,209,198,231,197,212,224,199,224,236,199,202,199,231,222,232,209,209,244,209,221,205,211,215,249,231,221,198,230,217,207,202,181,196,241,238,155,212,229,218,217,210,233,239,225,222,202,228,212,170,178,211,172,200,192,187,204,202,211,239,225,218,211,233,222,223,216,229,196,210,219,202,226,214,213,228,240,213,204,216,236,230,219,211,229,231,233,193,235,220,208,216,224,236,213,195,221,230,228,223,208,205,241,220,217,211,205,160,214,180,207,228,218,202,189,218,230,232,180,195,244,233,240,215,145,60,136,240,221,235,112,141,141,161,183,183,180,228,234,249,249,233,218,154,169,203,221,211,54,34,11,12,78,90,45,84,166,120,114,165,140,110,140,125,73,107,123,117,158,95,59,53,57,173,250,221,226,254,163,177,237,225,233,251,240,232,227,252,233,223,225,244,244,173,199,251,229,169,170,146,155,129,160,177,69,76,250,138,19,25,16,47,65,24,100,130,200,233,253,255,243,213,162,197,186,225,207,226,182,172,202,243,254,235,247,204,68,38,78,97,97,109,116,36,56,125,131,130,109,65,63,84,96,44,42,70,60,81,147,97,37,14,51,173,94,121,112,25,53,42,15,19,24,14,29,25,26,20,23,27,42,50,32,43,20,47,41,62,64,75,76,43,73,28,67,32,26,38,79,50,56,90,19,8,66,55,30,12,12,17,41,99,50,43,26,7,17,47,34,29,42,24,12,85,109,74,77,133,153,163,130,149,157,113,88,74,175,136,42,27,26,22,38,45,40,54,23,52,45,36,37,25,21,27,12,43,68,16,46,46,67,100,119,77,40,43,23,39,70,42,52,41,54,41,28,32,40,39,53,30,32,28,8,70,90,70,46,21,13,35,40,44,32,56,31,20,21,36,43,67,63,70,81,44,42,43,40,36,32,22,41,38,48,28,34,56,106,74,69,65,36,57,21,21,50,30,37,94,144,87,128,87,102,76,31,30,76,38,35,203,239,244,239,225,233,246,235,238,236,227,242,250,232,242,255,247,227,255,252,249,241,235,241,245,248,242,227,249,222,245,241,241,244,236,242,244,250,223,234,244,251,254,240,244,235,211,230,246,242,253,255,248,247,231,250,254,238,247,240,107,9,0,21,20,7,15,27,17,38,21,29,3,0,6,8,5,2,33,31,30,0,9,33,2,193,183,213,201,203,184,195,222,189,185,201,200,198,189,188,200,206,207,179,215,195,194,213,205,211,186,213,210,191,212,209,218,180,196,184,211,212,196,191,225,189,209,212,199,191,206,186,197,182,204,207,199,203,197,201,198,185,216,207,195,193,199,177,206,196,206,192,183,210,177,183,197,183,216,186,203,194,168,197,180,186,186,203,209,207,211,189,189,202,181,201,198,187,204,180,202,195,198,185,204,196,201,186,209,213,241,209,176,216,205,219,196,205,188,220,213,205,205,187,212,235,198,197,186,206,211,189,173,199,198,211,178,213,217,212,206,197,196,197,176,209,207,207,202,194,202,195,193,192,168,178,201,196,212,215,195,223,186,205,196,205,206,194,201,181,197,205,196,212,221,224,209,187,210,210,192,206,209,178,226,201,223,201,180,206,211,209,219,207,220,192,194,214,189,196,195,228,225,222,196,209,199,213,194,230,210,219,241,218,218,226,225,215,226,235,224,232,225,210,224,212,209,212,242,227,202,227,202,219,209,196,228,196,240,251,243,208,234,230,213,226,210,217,206,187,184,214,207,208,218,221,241,220,220,230,227,224,226,229,225,234,210,226,239,190,208,227,226,233,225,227,239,217,237,240,206,221,233,222,222,222,237,238,159,173,222,245,220,235,241,230,228,244,218,236,246,228,233,255,246,228,225,223,213,236,219,214,213,226,212,202,236,236,250,206,214,252,248,246,181,118,64,159,242,249,237,199,212,220,222,215,231,213,255,226,246,228,227,249,249,252,252,244,237,131,33,36,34,100,47,34,156,239,247,248,224,244,173,232,242,243,238,240,247,222,218,187,122,138,195,250,207,250,252,147,242,249,242,245,245,250,228,247,240,241,241,220,255,249,179,205,246,183,130,140,207,221,128,151,160,51,165,225,24,13,39,51,59,71,138,137,129,210,239,242,247,222,130,175,208,186,219,169,138,144,214,237,239,233,242,216,52,29,53,101,104,129,98,41,80,102,166,163,104,42,57,18,25,66,34,47,38,39,117,169,61,48,8,124,151,123,145,117,17,10,26,9,42,39,30,37,20,12,32,34,31,20,28,34,58,23,53,50,74,88,79,38,36,81,54,84,22,52,67,79,48,67,61,34,56,81,41,48,18,30,23,72,140,78,69,47,28,28,39,29,46,13,35,53,59,81,74,75,112,129,87,71,76,123,164,112,106,151,137,69,9,0,23,33,105,114,57,49,14,47,36,6,34,23,28,28,64,54,55,24,72,110,98,148,168,134,81,76,142,117,61,22,36,48,30,28,20,32,37,19,35,40,14,49,85,83,49,26,15,54,78,81,50,38,58,78,59,44,51,76,54,46,38,66,18,30,20,26,28,45,23,27,36,35,22,28,59,110,74,60,56,37,41,45,65,49,39,19,124,118,94,93,97,102,89,47,28,90,61,20,98,221,240,221,244,234,244,250,255,238,224,245,249,243,238,246,255,244,250,245,248,245,244,250,252,250,248,237,247,230,242,243,245,233,245,244,228,247,229,232,246,255,251,231,239,243,241,253,248,255,246,252,222,247,249,239,254,239,237,243,123,0,31,11,36,4,13,10,2,3,34,7,19,10,7,10,11,8,0,14,22,13,5,11,21,181,205,212,189,213,187,190,201,215,191,192,224,194,213,199,200,194,205,184,222,209,206,197,198,196,176,196,175,194,209,193,199,202,196,216,174,192,202,197,189,184,196,192,189,193,184,204,189,190,190,196,197,197,204,182,182,185,200,205,199,227,193,196,183,223,203,190,199,169,216,194,206,171,204,211,211,195,209,199,208,182,204,202,203,202,208,206,201,204,193,190,169,194,211,170,205,180,220,195,186,191,204,198,206,215,184,204,192,182,180,184,190,189,187,225,204,211,202,179,196,213,190,176,196,180,205,214,212,203,206,202,198,216,221,197,182,188,200,198,170,221,193,202,203,194,196,203,202,214,175,214,212,222,207,217,214,186,223,219,209,216,202,218,215,217,212,196,201,199,196,210,197,201,196,201,220,199,205,217,179,218,211,216,199,190,190,212,217,204,193,185,181,220,220,199,208,226,200,228,200,215,217,210,207,211,213,231,224,223,194,200,226,210,221,211,204,196,227,237,179,224,225,249,208,219,208,243,224,241,216,219,191,193,249,226,240,235,240,204,111,77,154,216,236,198,237,197,250,225,235,237,245,255,243,240,231,253,224,253,240,242,229,245,239,233,243,234,253,250,254,243,240,248,252,246,247,243,248,254,255,237,249,228,99,137,237,238,252,244,225,224,251,252,245,243,255,250,250,232,254,235,255,252,227,214,227,238,205,234,191,196,230,237,221,193,221,228,242,221,131,122,58,127,243,246,233,190,178,172,146,149,158,163,232,245,226,239,229,216,164,200,207,244,232,100,76,69,73,62,12,45,147,169,179,211,228,187,131,189,230,243,207,222,194,172,230,180,140,145,195,172,194,243,182,147,225,241,239,230,222,224,223,236,229,191,213,230,237,245,134,189,202,97,61,90,175,220,123,175,93,61,197,163,11,8,29,23,33,144,216,143,171,242,254,245,237,161,142,229,219,197,198,132,135,184,231,233,239,249,229,105,18,41,72,118,119,102,37,34,77,161,126,72,34,49,16,19,32,50,47,56,43,45,70,109,79,26,10,131,139,95,115,68,25,41,18,19,31,5,13,39,40,1,21,4,10,21,34,1,35,7,32,35,58,101,99,70,47,35,77,93,70,100,150,118,120,109,85,57,50,55,55,28,36,29,55,57,110,113,63,28,35,25,59,14,17,34,20,77,102,44,30,23,35,31,54,29,17,23,16,39,35,27,43,73,23,22,19,116,209,145,52,48,21,29,14,17,14,11,25,19,61,60,53,36,25,70,38,20,92,151,115,101,108,103,59,57,34,27,26,23,17,29,31,27,24,49,19,49,108,62,48,29,19,79,77,65,43,72,101,105,81,65,91,78,65,38,49,29,48,49,41,37,40,27,45,39,41,50,40,21,54,112,106,86,66,40,42,69,99,78,56,55,120,125,104,101,81,142,91,30,16,186,160,78,47,120,234,243,241,255,224,234,247,233,229,248,243,251,254,255,232,252,233,243,247,237,255,248,249,251,249,247,238,242,253,254,249,250,254,245,222,250,249,247,236,240,239,247,235,231,228,242,253,237,255,249,247,245,252,252,249,231,246,233,125,18,1,8,11,33,19,22,7,0,26,9,22,0,9,12,5,5,19,25,2,11,1,9,1,216,175,201,197,172,191,167,181,220,191,202,193,222,208,207,173,197,194,185,205,193,213,185,179,204,188,203,207,190,179,186,202,196,245,214,201,188,202,178,191,197,188,219,189,209,216,217,221,193,193,203,211,193,198,199,190,208,184,189,202,188,187,200,198,196,195,193,200,185,213,200,226,191,187,189,208,189,212,198,195,196,207,198,228,209,207,168,193,197,202,207,207,197,200,202,214,206,202,200,193,186,202,208,217,229,189,205,213,193,201,203,198,191,200,204,196,184,204,193,205,212,196,182,185,201,196,200,206,210,204,212,204,221,194,193,183,182,191,212,206,213,176,189,191,208,203,205,225,188,212,213,209,210,213,209,203,170,207,206,216,198,202,196,213,179,169,197,190,210,199,209,190,205,198,211,199,231,202,197,212,203,225,204,216,171,225,213,209,200,206,207,217,196,183,211,209,207,217,218,176,225,185,216,225,211,229,218,224,219,237,231,247,254,247,251,249,250,240,231,249,255,250,251,255,252,240,254,250,211,246,239,214,220,250,253,247,234,222,199,103,104,186,241,239,240,255,241,255,243,255,237,255,241,253,237,247,235,251,238,247,228,246,255,255,253,212,235,253,208,245,238,242,241,246,205,218,238,224,193,220,225,196,174,100,121,172,214,226,217,221,202,215,201,188,216,190,199,232,200,239,193,214,210,177,171,184,175,168,158,157,157,202,159,147,118,116,113,127,123,89,122,85,98,117,175,145,100,112,85,103,49,111,79,112,137,112,124,132,86,44,22,90,127,117,56,94,111,67,47,35,76,50,39,21,36,63,70,37,41,52,82,61,38,65,50,134,114,122,92,83,102,76,114,74,64,149,105,89,77,77,70,86,95,78,65,84,100,73,147,91,81,115,78,112,116,173,177,119,191,66,85,226,61,14,71,34,15,103,204,204,129,181,247,252,217,148,149,199,239,199,196,187,143,176,228,253,241,231,254,99,21,59,66,119,108,78,65,44,98,141,118,110,71,57,36,22,21,28,34,61,60,75,43,83,118,72,27,14,156,153,101,129,57,24,21,22,46,21,36,34,12,11,14,14,26,55,55,34,15,24,28,8,53,29,70,117,116,53,75,124,117,117,105,161,135,122,99,96,67,57,37,49,36,22,17,37,50,85,88,50,41,29,14,44,36,64,43,49,137,85,45,11,39,19,39,14,7,42,37,35,13,30,31,50,112,92,37,33,137,178,102,23,38,26,27,27,48,13,13,35,67,37,23,24,14,18,21,11,42,19,33,34,37,19,18,63,54,13,46,47,34,37,40,25,35,28,52,46,54,89,67,35,33,45,94,110,68,61,61,168,150,86,118,155,129,87,59,47,10,16,34,33,33,37,43,16,64,21,38,57,50,43,94,96,89,71,84,62,73,79,84,85,44,111,140,135,125,97,110,86,65,33,220,226,178,119,45,172,247,233,240,234,240,253,249,210,250,253,248,247,254,222,245,234,250,248,247,237,219,242,206,227,244,248,239,243,232,244,248,234,237,248,239,225,243,245,237,251,236,255,234,237,235,237,243,249,243,234,237,247,225,237,254,255,239,124,26,8,17,5,16,21,18,25,10,6,11,9,15,7,15,0,18,0,36,1,39,18,13,31,198,181,198,187,213,201,222,199,184,200,190,184,196,190,237,194,200,188,214,180,203,198,206,191,193,214,202,190,192,206,200,194,207,174,193,173,213,201,180,175,205,176,189,191,184,216,198,191,211,179,195,211,191,223,195,208,207,195,214,198,211,183,212,202,205,179,199,211,211,222,191,186,200,201,206,205,209,209,209,190,194,188,208,179,227,177,218,208,213,206,203,188,191,204,213,204,191,183,196,226,193,206,204,196,192,219,220,197,210,218,203,200,220,227,206,223,208,185,222,193,173,227,210,201,199,206,183,217,181,198,201,207,219,203,203,194,189,181,214,211,198,192,190,218,191,212,204,208,204,190,193,190,194,171,201,201,191,215,210,208,201,216,206,202,211,219,193,206,214,213,207,185,213,205,205,223,189,212,205,225,189,209,191,194,202,223,216,197,215,196,216,190,203,210,210,212,223,207,229,222,229,208,207,238,206,234,231,242,241,244,233,247,253,243,255,254,238,243,240,239,246,255,232,241,240,255,243,241,251,224,223,241,190,186,183,177,163,218,187,166,146,185,210,192,204,162,169,186,159,172,184,165,172,151,159,149,165,143,174,159,160,139,160,170,168,152,146,191,153,145,141,141,148,135,123,143,156,165,130,117,132,122,122,119,109,117,120,117,110,131,114,124,129,92,116,112,88,130,104,79,91,83,79,102,100,98,143,100,147,122,104,127,108,82,51,40,50,61,65,89,92,62,75,83,79,77,65,86,93,52,98,78,24,31,19,68,40,16,29,33,20,5,42,55,73,95,115,113,58,86,70,69,44,46,11,44,26,34,53,11,43,13,21,32,23,55,50,79,112,63,64,63,39,19,47,66,34,28,5,17,15,56,65,52,42,34,5,31,110,103,43,80,120,143,129,156,104,129,213,52,154,222,63,8,26,21,69,195,204,169,161,237,248,251,174,135,213,240,248,169,110,184,210,241,249,252,217,247,130,34,47,76,108,112,105,57,47,90,128,157,114,96,70,39,24,24,16,9,40,61,53,94,40,83,127,60,37,54,165,103,103,114,17,20,6,0,14,18,18,31,34,32,40,25,11,3,32,39,12,42,28,43,37,32,51,79,74,32,42,71,97,74,111,139,81,88,45,61,31,33,54,14,17,33,8,48,45,86,97,48,23,57,51,34,28,50,22,33,116,95,35,41,34,16,29,34,37,69,61,21,25,30,20,67,98,90,32,45,44,62,46,17,14,40,8,28,27,38,32,26,55,32,22,24,18,9,20,14,35,14,17,44,41,13,59,85,85,55,32,34,34,17,35,41,48,84,84,58,96,69,49,50,29,83,130,92,59,66,101,147,106,139,105,141,111,80,51,58,48,58,56,35,33,43,49,42,37,34,33,25,27,44,85,32,60,63,50,32,50,57,48,86,51,108,128,141,100,95,127,93,12,47,192,218,225,138,48,40,233,233,233,252,233,248,252,242,255,248,238,239,226,247,249,249,228,246,246,246,253,239,247,233,253,246,222,231,241,239,225,255,238,225,246,243,254,240,241,230,238,249,253,239,248,251,246,240,244,255,247,236,234,255,247,234,226,126,6,5,41,23,15,20,22,0,32,23,14,34,0,22,6,8,3,12,14,13,28,27,9,13,195,179,201,182,194,229,207,198,177,192,193,184,181,200,176,191,193,180,199,198,182,195,187,212,194,178,200,197,191,207,178,178,196,200,172,201,204,198,199,172,170,180,197,201,217,164,192,175,211,196,179,186,193,222,191,200,190,200,187,202,193,192,189,180,196,195,190,205,185,188,200,184,211,200,177,194,196,219,175,200,191,203,221,191,196,199,209,208,196,204,220,195,209,213,204,213,201,183,185,205,184,220,206,200,183,198,188,179,228,199,218,191,204,200,220,234,207,185,210,199,203,191,209,217,174,181,175,199,199,183,196,199,206,217,224,219,173,208,199,220,200,224,226,217,193,193,190,210,196,215,213,198,186,210,201,208,173,212,218,205,188,201,201,221,201,219,198,206,200,189,212,184,200,199,206,215,222,202,216,209,197,206,227,195,211,198,213,188,204,191,218,210,206,217,203,204,224,217,194,216,221,223,221,213,223,209,196,184,168,179,180,174,171,176,198,175,145,160,164,168,150,163,141,149,137,151,138,104,119,146,181,198,95,55,112,73,96,111,84,132,131,120,96,90,96,93,84,77,76,64,72,80,80,33,84,57,67,61,88,94,81,66,52,62,59,46,84,70,72,74,46,61,48,60,73,40,103,108,60,52,56,73,133,87,112,102,105,57,65,53,79,35,41,47,40,55,42,72,37,40,42,46,50,45,59,49,69,112,120,125,83,86,144,86,68,55,23,32,69,62,87,95,62,85,87,74,113,103,122,89,88,82,37,28,23,14,20,35,56,61,46,57,43,46,76,136,114,101,64,30,77,79,68,49,31,53,66,72,46,25,70,67,31,59,60,60,43,57,96,94,120,68,56,26,38,42,19,27,22,20,25,13,49,37,46,52,1,32,80,34,35,111,107,93,177,185,75,157,176,93,215,242,36,12,28,10,138,175,188,215,226,236,238,217,143,187,243,246,240,147,102,155,240,239,249,251,247,159,15,28,70,116,116,86,36,53,116,143,127,106,46,49,43,34,30,11,33,45,85,90,117,98,55,115,137,24,5,88,163,128,122,120,31,23,22,27,31,7,6,35,43,3,36,22,9,15,28,24,37,60,10,22,30,9,35,54,88,86,36,35,36,65,70,60,57,45,50,52,57,25,37,33,44,41,12,34,40,67,66,48,46,81,21,22,49,23,34,29,98,61,23,36,31,27,28,29,25,99,35,32,6,18,23,86,120,76,26,4,47,41,39,32,27,42,23,19,25,23,16,35,55,23,28,37,21,30,48,22,24,24,45,42,31,26,61,126,131,43,31,74,66,36,36,51,62,87,92,104,75,57,53,56,63,86,112,67,66,55,31,50,116,129,135,77,94,66,57,39,27,31,40,16,12,16,22,18,36,36,25,46,44,34,20,31,46,28,33,40,21,23,68,56,46,119,140,129,114,85,91,113,45,31,145,147,173,115,35,29,143,239,254,240,242,249,253,247,247,243,249,235,229,247,234,248,253,252,234,240,238,243,227,249,236,238,219,249,232,236,252,246,239,247,253,230,233,248,242,251,237,251,240,236,252,237,241,243,240,230,252,214,246,245,248,244,244,101,4,8,11,9,10,20,3,13,22,5,18,24,0,13,5,22,20,12,13,11,17,6,6,9,197,224,174,189,211,154,189,202,187,182,197,192,190,168,197,205,189,196,183,167,185,216,202,188,189,156,191,198,208,195,185,207,216,197,185,189,206,188,208,189,201,190,188,201,193,207,195,191,186,216,224,204,187,190,224,184,202,185,216,208,179,224,191,201,185,212,205,189,199,190,203,197,206,213,206,190,177,200,174,194,191,221,212,194,198,186,215,188,197,216,183,200,184,202,203,190,203,202,188,203,190,184,209,209,169,205,195,210,227,209,193,208,189,201,200,193,209,205,198,210,209,208,219,215,209,167,195,192,187,216,210,192,195,184,198,197,199,194,212,207,192,182,188,206,187,205,192,215,188,194,194,185,191,194,203,197,199,205,211,181,180,206,185,189,204,202,207,216,197,190,213,223,199,208,226,181,215,193,194,210,168,202,195,212,207,231,222,210,214,215,214,202,217,233,225,218,227,218,221,213,212,227,240,235,193,206,111,18,20,33,21,16,3,20,13,22,15,5,15,16,15,1,24,1,14,3,30,23,30,8,63,88,33,17,13,11,12,36,1,13,12,19,18,30,6,9,6,1,0,11,36,14,17,6,16,30,18,4,0,19,7,7,1,40,22,23,24,8,10,4,23,12,7,19,12,22,18,16,10,50,22,20,27,12,34,19,5,22,11,25,23,6,14,17,6,16,11,11,28,10,0,13,5,16,23,31,19,4,62,88,67,55,44,76,91,54,22,10,0,16,32,10,22,60,79,72,84,96,92,64,69,36,9,5,4,28,29,33,30,33,21,23,9,30,20,116,132,118,65,58,66,83,49,39,22,42,22,45,34,21,44,37,36,18,1,33,17,40,21,33,90,75,24,17,27,33,39,18,16,43,54,19,14,3,28,43,18,11,44,14,24,95,81,90,166,121,100,205,186,173,251,186,18,61,4,40,166,171,210,250,215,228,200,156,200,250,247,255,196,129,165,180,225,238,254,228,131,38,28,60,94,93,102,48,32,93,112,131,88,80,41,35,39,20,22,23,45,82,112,129,131,116,104,140,130,33,7,97,173,97,116,83,44,30,6,5,30,27,30,20,22,21,14,28,40,13,32,36,44,25,16,20,29,21,12,50,102,60,44,28,37,37,62,56,43,21,30,27,39,35,27,24,15,40,14,17,40,73,53,43,47,63,41,19,26,41,48,26,66,111,32,25,16,40,34,59,29,51,35,26,43,37,11,94,130,47,19,12,36,41,35,33,52,5,41,28,27,45,14,2,50,64,30,10,9,35,9,20,23,25,23,53,28,32,68,151,74,11,29,40,25,41,22,44,76,113,47,86,82,64,37,20,60,61,94,55,24,38,29,47,88,116,72,46,53,51,69,69,36,36,24,36,14,35,33,36,26,51,9,38,51,33,12,19,53,37,24,36,43,29,18,59,20,79,143,128,104,84,106,118,57,33,32,71,42,31,32,28,181,228,245,250,254,255,252,248,248,255,250,231,226,247,239,255,251,248,255,250,241,228,231,232,251,237,250,238,239,237,242,233,251,241,243,246,214,253,222,234,248,244,243,234,232,234,252,234,252,237,255,226,231,246,242,241,242,121,3,4,2,2,3,4,18,19,7,22,15,11,4,3,6,14,0,1,24,1,0,3,12,13,193,200,195,214,193,201,191,185,199,222,178,203,189,203,209,185,193,219,181,177,196,194,202,182,194,190,200,213,202,207,199,205,189,208,189,205,194,210,198,175,188,195,198,217,188,218,196,197,195,188,193,205,193,191,192,182,206,211,191,196,203,202,179,208,177,178,199,197,217,224,194,208,208,219,217,198,214,209,217,196,168,204,167,212,204,217,201,203,211,191,195,222,210,221,201,204,185,204,189,192,187,190,198,226,224,221,194,204,202,201,203,190,201,216,197,188,216,200,215,203,197,193,204,200,218,178,209,221,201,204,174,194,198,210,215,223,194,194,169,218,223,202,206,188,198,226,184,194,200,193,209,198,193,203,202,195,189,216,212,197,219,198,222,215,191,178,235,196,213,208,194,195,199,196,200,224,199,232,220,198,209,203,223,169,213,229,220,213,207,214,202,224,199,217,209,218,201,212,208,229,227,238,241,200,207,219,114,32,35,32,62,19,40,21,30,15,22,35,18,10,16,22,29,25,10,1,20,26,41,12,27,49,25,10,15,0,7,7,4,14,22,10,17,4,21,19,8,25,10,0,10,3,21,11,15,29,11,20,16,17,36,6,18,14,21,19,10,17,4,18,35,17,8,8,25,16,9,23,20,10,27,18,12,4,7,14,14,4,2,9,18,11,10,31,12,18,7,27,29,13,25,13,19,15,34,24,21,12,21,19,44,30,34,61,64,82,43,22,5,6,44,21,60,74,44,106,95,59,73,66,57,0,12,4,5,24,25,15,35,45,17,34,35,12,57,80,115,108,77,62,65,71,52,8,15,23,11,39,32,31,44,37,64,43,35,83,41,33,74,68,84,119,66,63,25,42,118,22,37,1,19,28,23,2,54,52,51,133,88,65,129,97,36,120,155,89,170,199,155,146,142,46,31,139,93,89,137,167,220,242,205,212,130,156,228,247,242,243,161,145,162,139,228,240,239,138,17,41,83,127,110,90,73,78,115,150,116,104,64,51,34,16,25,22,32,35,91,100,110,132,107,112,82,161,115,26,18,112,162,94,109,110,23,21,5,6,8,4,10,26,48,26,49,26,28,16,18,29,18,31,50,42,44,43,26,38,91,76,36,45,25,68,49,47,46,22,5,43,18,41,41,77,50,20,23,39,40,61,57,42,53,83,23,17,25,10,39,27,55,83,13,41,42,31,43,38,50,62,26,17,26,34,28,58,117,48,34,24,37,41,39,37,60,26,44,68,34,32,14,11,41,32,40,31,32,8,16,32,21,37,10,15,37,19,60,109,69,18,23,41,33,15,52,53,83,84,104,104,83,21,30,0,55,56,73,33,73,65,49,63,43,60,51,25,73,43,46,55,15,39,14,39,34,18,29,32,12,33,22,48,29,33,29,63,15,56,41,25,47,38,51,28,37,103,143,126,106,78,94,118,47,36,26,52,22,31,16,110,248,251,245,253,241,243,250,254,252,247,253,235,215,243,254,247,249,250,241,255,235,246,229,246,231,241,247,231,226,245,246,254,239,234,247,238,212,247,227,238,249,234,242,250,231,222,240,240,231,243,231,252,236,223,251,247,241,135,3,32,5,13,32,14,12,37,19,28,10,11,14,15,8,17,33,2,11,18,10,34,13,29,169,191,184,176,191,184,196,192,171,170,213,206,221,197,195,217,189,187,199,190,214,198,190,208,185,204,203,188,195,189,188,201,187,196,202,220,205,198,196,201,214,198,206,191,205,201,191,183,201,189,207,208,216,183,202,189,214,191,174,209,188,181,200,206,208,184,208,181,186,206,186,181,205,195,204,208,201,195,206,205,208,195,180,197,204,209,191,197,186,208,196,198,192,215,203,177,212,197,206,197,212,192,197,195,225,195,199,204,207,205,230,205,202,190,207,189,213,210,201,193,190,196,227,209,183,205,227,206,177,185,214,195,207,221,203,199,196,215,223,203,205,212,211,206,224,202,210,228,210,184,185,198,181,213,199,211,223,182,194,190,208,195,194,212,215,212,206,226,214,199,206,196,202,198,214,186,229,202,214,219,212,208,216,216,210,233,204,207,214,220,222,224,213,238,214,220,214,222,194,226,198,183,242,219,224,226,247,235,231,210,209,207,232,215,229,210,206,217,237,228,203,236,202,198,211,196,181,190,182,160,186,157,155,157,148,182,156,150,140,130,147,130,142,119,120,118,92,129,100,113,135,146,139,126,130,127,110,134,134,120,143,122,132,137,137,135,134,142,165,129,135,114,124,146,144,133,130,102,115,152,126,124,123,97,92,90,112,148,127,155,153,170,169,158,137,143,174,160,181,152,176,152,155,163,154,167,157,168,155,102,119,184,164,107,193,164,142,174,118,102,84,31,62,162,168,140,122,116,108,103,154,158,153,209,185,203,199,187,159,145,190,200,217,149,64,106,89,103,83,62,67,119,154,150,166,186,177,213,156,94,173,171,225,201,155,167,173,211,213,170,130,121,95,190,229,252,254,212,225,247,254,216,214,212,228,244,243,251,243,238,137,57,69,161,141,131,215,139,87,140,83,3,134,227,120,115,158,203,239,233,169,107,164,191,239,253,235,188,55,117,189,191,248,250,171,27,11,65,115,101,102,42,56,128,133,143,101,65,75,42,19,8,18,8,30,79,106,106,110,107,107,96,91,152,111,9,38,135,122,85,116,44,14,27,0,18,28,33,38,10,21,35,17,9,34,13,37,15,46,4,29,33,14,22,23,40,92,100,66,50,50,97,49,42,36,22,22,43,19,29,49,58,64,51,7,31,33,43,32,20,45,59,34,16,21,32,64,29,77,116,47,21,40,3,23,32,14,42,48,27,45,15,51,69,89,27,18,18,32,49,53,85,99,84,91,50,46,13,16,12,25,28,20,6,10,54,26,22,18,33,19,7,32,64,51,122,47,34,31,23,20,61,50,75,99,92,69,56,49,25,31,18,39,79,60,68,75,65,49,76,68,45,26,11,51,47,45,45,52,23,29,27,33,9,33,26,28,59,44,50,42,32,45,26,37,18,29,32,29,44,41,22,42,112,156,133,124,72,90,115,54,18,54,115,144,128,214,253,232,240,249,252,249,249,240,250,248,244,243,255,252,238,252,241,244,252,251,237,241,244,229,252,220,236,238,243,252,253,228,234,244,244,247,247,217,244,235,248,249,245,242,243,226,234,252,252,242,221,234,244,233,234,246,251,250,124,14,6,14,17,4,26,23,4,1,24,6,2,11,1,5,11,11,4,2,7,9,5,14,9,186,178,203,199,195,179,189,192,195,216,191,190,199,194,179,195,208,195,204,189,180,207,210,195,217,195,198,209,206,168,183,176,219,186,198,215,194,195,192,216,181,183,161,185,199,197,191,178,186,169,185,210,176,222,201,184,201,177,201,213,201,199,180,183,190,176,198,193,232,184,208,207,205,192,212,180,189,182,193,213,190,192,205,192,200,210,203,196,208,189,191,207,199,208,190,189,194,229,190,205,205,193,198,200,216,205,202,207,200,210,210,189,208,194,215,221,229,184,187,199,181,193,187,199,187,222,185,200,219,186,202,209,192,187,218,209,210,196,221,204,198,197,188,208,186,202,214,214,217,176,203,197,207,203,208,195,185,185,222,220,205,227,200,212,205,203,218,182,213,185,201,201,217,214,203,220,217,202,196,199,214,222,203,223,218,205,224,205,214,198,214,226,212,229,224,217,216,213,222,218,210,213,198,225,210,221,237,247,242,244,230,224,225,248,252,248,238,252,235,247,242,246,239,244,236,238,255,234,242,255,253,236,224,242,244,240,248,252,247,236,245,248,255,221,255,255,255,234,247,215,254,237,243,244,253,254,253,252,229,244,237,247,252,255,250,245,236,236,250,252,252,254,244,228,246,253,252,248,246,241,245,238,247,240,242,242,242,255,238,234,230,246,252,255,254,255,243,251,246,245,243,247,237,222,236,242,245,252,228,252,246,250,249,176,212,237,243,250,255,232,148,108,151,247,178,185,178,125,139,158,246,252,244,245,236,247,238,241,247,249,255,226,250,169,80,98,116,106,63,76,86,154,245,231,241,255,235,246,255,216,204,233,253,182,196,179,247,254,254,219,150,122,106,237,242,247,236,249,238,248,255,245,248,250,238,238,250,224,255,246,161,30,73,184,118,133,240,111,99,192,106,106,191,243,164,104,178,213,248,198,93,155,234,242,235,234,201,138,120,157,224,209,241,209,47,62,80,76,101,103,49,65,74,107,127,99,79,58,66,18,12,4,27,30,87,125,136,109,124,89,101,73,111,154,82,0,60,147,114,101,93,38,14,12,1,22,17,19,34,21,20,40,13,15,42,45,24,36,38,41,25,33,31,43,29,55,111,87,82,94,73,60,56,62,46,55,37,32,8,39,68,62,58,35,4,25,29,39,24,32,36,24,28,21,5,50,8,20,110,91,45,29,22,9,62,50,20,20,25,47,38,29,19,81,133,50,43,15,38,37,24,116,183,202,182,156,115,67,21,36,58,23,26,16,26,49,7,25,41,2,9,28,34,35,45,108,35,14,41,16,15,34,53,56,69,47,56,14,26,30,15,14,46,110,68,64,86,82,72,53,65,77,56,25,13,27,14,34,39,23,32,56,17,15,29,30,31,20,25,17,44,33,35,33,30,10,32,35,21,34,28,49,35,102,118,130,119,77,89,96,85,19,88,232,249,237,255,247,241,255,249,245,245,249,250,242,222,239,235,235,248,251,233,233,239,254,250,244,238,247,230,246,236,236,241,239,234,251,243,250,232,241,248,225,249,237,248,244,233,234,249,246,211,231,226,236,221,242,227,232,237,239,253,249,226,112,3,11,41,7,17,8,28,37,9,18,12,23,10,1,0,20,17,29,16,8,8,10,18,7,205,184,180,213,182,197,167,192,212,212,202,189,191,206,204,196,184,177,199,197,187,183,218,177,190,185,219,201,217,220,207,186,200,208,216,193,202,169,226,200,204,181,187,198,204,215,193,189,164,213,190,176,180,197,219,189,206,189,188,193,221,203,209,193,198,184,197,194,178,179,177,181,211,175,182,196,192,197,205,175,185,215,200,212,215,194,192,181,208,200,191,207,191,203,180,201,196,184,202,228,184,197,200,192,189,224,202,208,204,192,199,205,190,191,202,201,210,195,193,216,207,196,203,217,212,216,208,194,203,199,205,204,203,215,197,202,172,200,206,193,192,186,206,244,213,201,200,222,209,252,205,194,217,191,203,194,195,190,198,202,204,219,214,218,208,218,188,223,208,206,211,189,230,216,204,233,198,198,223,243,198,222,202,187,216,234,210,199,214,214,220,203,213,222,208,208,198,213,236,229,203,230,226,204,204,218,197,202,208,222,228,248,240,240,218,238,239,242,251,243,227,232,198,236,231,240,206,238,251,242,247,229,247,235,247,251,231,252,255,247,243,238,245,251,250,239,247,255,245,247,250,253,236,254,231,248,242,239,247,228,254,236,253,249,253,246,243,223,252,247,238,250,242,250,248,228,243,223,230,254,231,239,242,241,243,242,239,252,249,237,248,252,244,247,250,240,254,251,246,234,231,228,255,241,236,234,231,228,223,247,249,241,207,199,227,243,219,247,235,254,178,128,205,200,174,165,156,196,177,175,218,249,249,249,254,244,250,245,222,241,230,241,255,140,82,70,117,124,65,95,86,126,230,228,246,157,188,238,248,198,191,191,218,193,183,196,245,245,246,218,122,73,110,161,210,224,241,239,228,233,240,246,241,244,221,243,193,180,238,231,66,16,149,167,68,169,142,49,115,195,175,120,243,229,102,144,210,237,252,142,151,232,254,252,241,223,167,221,183,191,243,238,163,58,36,68,107,88,101,58,47,95,117,129,67,62,47,14,20,13,2,37,57,65,88,109,112,122,118,105,119,114,143,129,74,13,79,146,104,134,109,16,12,2,20,27,3,43,29,35,41,18,41,28,4,25,41,10,34,26,23,25,17,19,28,37,70,59,122,99,65,56,46,47,40,36,27,60,6,36,31,44,41,41,8,22,30,44,59,27,57,34,26,27,14,17,24,50,134,88,32,44,40,33,18,43,53,11,31,27,22,35,37,109,153,62,46,34,35,57,54,237,242,242,233,235,204,150,63,38,58,35,47,14,18,25,26,18,41,18,44,7,36,57,72,116,38,50,20,31,32,43,40,77,52,39,30,30,32,50,25,69,89,125,91,76,81,50,19,18,37,49,62,51,39,24,26,51,44,27,45,20,33,44,27,24,26,32,38,29,45,14,33,55,28,17,16,41,15,42,25,54,24,64,132,144,116,90,91,74,72,7,91,232,246,245,254,252,250,255,243,239,238,231,250,229,235,252,246,254,230,254,240,247,236,218,229,235,249,225,244,222,249,242,246,243,238,245,241,247,246,228,246,247,248,227,253,230,215,247,246,239,242,226,252,254,249,237,243,254,240,246,243,247,244,125,27,13,11,4,13,15,8,18,38,4,0,21,4,11,22,0,3,9,8,14,0,12,20,5,197,199,185,214,181,176,212,196,178,181,193,194,193,182,182,197,193,205,199,201,178,226,198,212,186,209,208,198,171,195,203,208,208,178,199,203,182,207,195,205,202,200,178,192,203,198,194,190,180,205,207,178,183,178,177,183,190,195,219,211,190,193,196,217,191,185,197,186,203,181,185,201,225,192,172,206,179,195,206,210,196,207,225,216,197,202,213,191,194,222,183,212,155,195,194,205,194,191,179,194,196,198,192,210,187,192,181,190,202,195,215,194,194,218,204,191,210,204,184,215,224,198,220,195,194,195,178,210,204,222,219,214,206,201,229,206,217,208,201,186,199,224,195,202,229,221,198,213,202,223,203,184,193,199,218,213,212,222,212,208,188,225,211,206,192,197,217,212,208,209,203,224,220,220,230,211,214,220,212,226,232,206,215,235,223,206,222,208,199,240,228,181,207,220,218,216,213,213,219,208,201,205,218,206,218,225,214,223,213,213,218,219,214,228,237,222,203,222,219,211,223,218,201,203,222,241,214,209,234,223,192,221,213,212,205,229,241,229,214,232,222,210,235,216,238,201,221,233,223,210,221,217,231,218,230,223,228,216,208,222,227,217,220,211,233,237,195,226,225,238,247,228,243,218,236,221,235,225,195,240,211,208,220,218,224,219,231,226,211,229,230,199,223,230,210,228,220,181,225,203,214,211,228,227,179,221,210,199,197,207,238,208,193,171,224,213,214,229,198,193,149,115,195,175,154,203,189,177,158,164,236,223,216,232,229,214,224,216,187,212,214,217,247,126,78,103,96,103,72,87,49,152,213,123,219,182,199,241,203,158,91,197,209,199,185,204,238,241,221,213,156,81,102,141,144,207,235,219,212,203,242,213,237,218,207,222,182,146,195,177,19,67,168,147,110,129,105,68,70,161,141,114,162,194,100,164,203,235,236,152,186,245,242,239,245,147,133,186,236,227,250,169,66,31,57,121,94,101,100,23,88,139,146,100,77,60,35,15,34,2,16,36,102,110,113,120,148,92,81,108,95,116,139,150,48,16,103,155,106,120,105,31,37,31,24,9,38,14,36,17,24,13,35,8,30,22,20,43,14,45,22,22,22,34,24,45,50,52,67,84,52,90,73,50,44,70,45,37,22,44,30,41,65,48,25,33,50,40,26,50,62,47,17,13,16,26,18,80,141,35,18,26,12,30,21,41,23,34,64,51,34,43,38,57,155,51,72,28,78,94,128,192,181,150,159,158,161,98,59,48,48,18,13,21,11,21,15,29,26,35,9,17,54,29,63,115,59,23,34,20,26,39,19,58,46,65,31,70,65,34,25,112,126,158,90,84,67,34,21,47,66,32,37,39,11,30,16,34,41,34,20,26,23,47,16,23,3,25,34,31,57,45,31,31,40,62,28,22,32,46,34,13,38,48,125,144,124,97,123,126,89,46,70,235,241,232,234,242,232,226,252,224,243,251,245,218,222,251,241,249,234,251,244,226,229,243,231,246,231,226,254,247,243,228,251,232,227,239,240,234,249,246,242,233,250,234,236,225,231,241,202,244,252,246,218,248,225,254,220,205,240,240,231,235,245,109,11,16,8,31,14,7,10,15,6,5,2,7,0,29,7,4,0,1,14,13,26,15,0,8,201,202,201,186,204,191,226,187,198,179,176,176,209,183,201,196,176,200,216,208,187,222,190,188,206,207,188,202,201,176,181,228,186,187,198,199,190,221,175,182,180,184,187,183,197,203,184,200,208,189,193,192,214,174,196,197,180,191,182,206,202,199,195,185,194,200,182,182,195,213,201,195,193,203,196,179,216,209,202,198,206,186,209,187,203,214,197,190,196,200,203,171,203,181,207,202,181,198,181,177,199,169,196,203,191,199,197,203,214,184,188,215,194,196,215,219,210,193,203,210,220,193,216,223,209,207,207,224,225,218,214,202,199,212,225,204,216,194,203,207,202,227,198,190,206,188,186,176,187,200,201,205,216,191,201,208,209,206,231,201,235,203,202,207,243,215,201,198,188,196,219,213,233,202,205,215,233,222,216,224,247,214,203,180,210,213,220,195,210,211,190,216,242,203,191,213,209,229,189,211,197,220,240,212,213,205,219,211,214,219,231,213,195,195,218,216,227,227,223,225,209,220,203,212,232,190,218,196,235,183,215,215,199,218,202,202,198,207,222,241,238,206,207,240,211,185,223,181,195,220,206,226,226,174,202,213,211,227,230,165,204,213,216,215,234,218,205,190,223,217,198,223,224,209,217,205,240,182,171,214,245,183,218,219,212,203,208,221,203,172,220,199,210,221,216,199,228,210,211,209,198,213,200,197,194,205,189,165,185,232,228,221,157,153,181,217,190,214,182,192,170,90,160,181,172,183,179,132,103,141,208,217,222,212,207,217,224,217,183,216,202,226,227,121,70,77,38,75,71,103,69,177,224,179,209,169,212,172,153,174,133,239,226,184,143,202,244,198,239,244,172,64,112,134,149,205,233,220,194,192,222,236,220,216,205,213,155,81,195,104,37,135,188,124,101,140,61,78,85,128,142,92,147,184,141,191,241,230,170,167,171,151,131,108,87,38,0,121,180,195,207,78,59,53,122,105,119,79,31,96,116,125,95,56,64,20,25,8,6,2,33,87,113,120,122,106,134,80,85,95,106,118,133,158,56,28,128,130,105,101,58,17,18,11,17,38,16,8,22,6,36,19,28,14,25,42,62,63,37,55,54,7,33,42,28,39,14,20,86,85,81,150,133,87,108,55,39,13,28,6,40,54,66,66,82,12,50,48,50,74,70,30,16,0,9,16,34,81,106,49,35,21,32,41,51,51,31,40,47,37,50,37,45,20,135,115,94,186,251,217,133,61,41,27,43,24,34,32,15,51,25,35,32,15,35,53,36,31,55,16,19,53,46,47,49,117,108,34,18,33,18,64,45,64,98,37,85,115,86,44,85,141,146,128,78,105,83,57,50,73,60,50,80,66,27,49,24,30,35,29,15,30,31,19,11,33,34,31,7,33,60,49,46,29,33,29,39,55,55,50,36,48,15,44,141,147,118,104,123,117,109,30,50,236,253,246,222,231,236,231,255,249,251,246,218,237,224,250,233,236,240,230,237,232,245,244,249,245,247,252,230,241,247,252,242,236,235,245,236,233,249,214,234,217,254,231,222,224,226,249,228,250,232,234,236,236,243,237,251,235,253,234,221,248,234,114,4,11,24,11,15,27,12,14,7,16,23,12,3,8,16,5,0,20,10,22,35,8,19,7,196,205,206,183,165,209,205,198,204,191,203,192,203,188,199,213,198,182,206,188,232,172,183,189,215,197,198,185,181,192,213,185,227,201,200,205,209,196,178,192,204,193,195,181,175,188,192,173,174,195,201,203,172,211,195,182,209,179,187,193,197,192,204,150,196,208,171,206,165,177,198,208,210,183,187,196,199,173,219,206,207,196,216,204,180,211,206,187,194,210,190,188,209,212,204,194,212,213,198,194,194,184,189,194,198,219,208,222,202,196,200,214,197,212,177,211,211,218,204,213,220,147,199,225,204,214,203,180,218,194,209,193,188,172,205,203,212,196,217,214,192,229,204,202,205,197,217,202,206,231,198,182,216,235,211,196,180,195,202,199,191,206,198,207,239,227,219,184,221,206,212,194,204,211,207,228,212,224,206,211,196,197,201,222,211,215,196,228,216,224,191,210,214,221,205,234,208,224,223,230,220,210,229,215,194,232,212,215,209,195,223,240,213,209,231,198,210,215,201,213,197,236,217,211,219,211,209,202,202,208,211,205,211,222,223,216,218,197,216,212,193,217,224,222,202,210,192,218,212,208,209,198,207,217,204,207,220,203,205,217,218,203,200,220,217,231,221,208,213,211,196,232,242,182,217,198,207,204,174,242,237,225,197,200,219,210,206,200,206,204,187,226,216,192,184,214,206,210,203,208,225,224,217,185,195,230,191,169,155,219,197,205,138,142,213,204,198,229,173,186,120,110,157,165,140,166,131,128,91,124,195,210,218,213,214,211,226,217,178,209,206,234,230,83,85,54,85,84,76,89,76,203,229,190,222,172,170,172,214,182,203,228,147,119,153,194,238,215,211,234,118,67,113,122,168,210,225,205,213,243,210,229,225,229,225,184,97,103,159,68,66,202,162,110,166,75,32,48,74,135,213,108,202,217,173,233,215,170,102,44,64,60,18,29,21,15,2,114,185,135,73,1,67,103,110,134,79,41,70,113,144,81,54,79,46,15,35,15,21,34,64,114,131,83,99,88,72,93,102,82,107,99,130,130,16,27,127,125,100,123,59,28,8,24,23,25,7,33,29,26,34,19,40,52,74,82,79,64,51,42,54,44,28,34,35,29,0,36,156,139,117,158,157,95,103,42,37,36,32,48,52,49,74,70,62,18,43,23,52,92,44,18,34,37,14,35,19,69,102,68,49,14,49,24,30,13,26,21,30,15,54,77,29,58,203,139,64,192,224,185,158,124,88,63,69,57,51,40,22,88,21,33,47,20,14,33,21,14,78,30,12,38,37,32,35,115,155,58,32,17,18,27,63,78,53,32,154,138,67,82,125,115,178,147,125,141,62,85,64,66,46,24,49,36,10,31,13,29,16,64,23,25,39,28,29,38,7,37,24,43,37,28,36,18,30,40,45,29,38,35,21,36,43,57,133,132,135,108,95,121,84,23,67,236,232,254,249,234,229,227,242,254,224,241,217,244,240,212,242,233,242,253,247,240,239,255,243,236,241,234,225,234,238,250,244,230,243,239,208,242,234,250,239,244,242,227,221,216,238,230,228,220,231,238,251,222,235,248,250,238,231,245,239,250,230,116,2,15,9,9,7,29,10,11,24,34,18,9,3,6,9,4,8,18,1,24,37,19,4,23,186,191,212,221,192,177,183,194,201,204,191,203,211,182,221,195,188,193,198,190,216,210,212,206,180,205,179,194,183,205,208,197,199,185,208,200,207,205,195,204,184,201,187,195,170,201,182,207,192,179,208,178,190,206,178,214,206,203,174,209,208,202,190,196,145,171,200,193,183,189,175,203,191,186,213,201,178,188,221,180,197,177,202,195,213,192,206,172,207,195,192,186,196,200,194,171,194,186,188,195,199,192,191,191,219,194,193,220,205,204,199,220,197,194,200,207,211,214,192,186,193,199,210,209,203,205,196,223,202,201,208,224,188,230,190,198,193,190,223,202,194,196,198,178,197,192,169,214,218,212,196,217,191,226,204,211,226,222,228,223,215,215,220,198,186,197,217,210,210,207,200,197,177,200,215,208,208,192,190,216,228,178,202,231,193,194,177,222,208,206,218,196,217,205,206,235,222,198,219,212,206,219,226,215,176,216,230,210,224,224,193,215,190,206,214,221,202,206,214,227,208,191,205,202,224,199,212,212,225,218,227,229,203,203,221,200,207,204,212,210,188,193,202,234,190,206,197,196,215,176,204,196,196,203,230,212,188,226,215,225,214,220,163,190,190,208,210,204,220,224,200,218,246,198,203,189,238,186,145,167,220,207,207,217,214,193,204,195,205,207,191,202,207,210,206,195,182,188,207,223,193,220,226,180,187,195,189,165,170,209,213,209,152,173,192,220,190,224,226,156,146,119,161,169,137,184,151,172,147,172,208,183,214,229,196,203,212,206,156,194,211,217,210,128,89,80,102,68,70,77,49,167,187,183,184,169,195,165,223,170,210,177,132,157,189,220,239,206,234,249,116,76,140,112,182,141,158,205,206,227,215,233,210,241,161,135,111,146,143,49,139,179,109,157,111,51,9,35,46,116,171,106,156,144,149,186,128,56,28,30,28,58,53,73,66,37,41,199,188,43,37,36,115,99,106,96,13,30,121,131,115,33,51,33,2,11,48,38,44,96,73,122,94,117,114,105,71,118,109,103,91,91,110,121,11,14,136,93,100,99,66,28,30,37,21,38,11,18,32,18,49,48,101,121,128,116,120,84,75,95,69,48,48,46,55,55,39,57,124,143,87,124,130,94,100,46,50,56,61,40,68,66,60,112,20,25,48,50,80,119,73,47,16,16,5,12,3,41,82,98,57,50,58,59,65,64,52,39,60,44,56,66,44,95,197,105,53,42,92,74,44,137,147,145,92,57,41,37,31,66,42,43,13,14,34,28,22,29,36,26,14,20,41,29,50,116,112,57,30,32,23,51,58,90,68,106,180,121,96,113,123,100,123,151,137,162,74,91,70,47,38,5,42,28,22,41,49,15,36,42,20,44,25,27,20,21,35,73,17,42,24,52,54,35,32,34,31,20,35,32,27,49,37,29,144,140,127,109,107,97,96,22,58,222,244,243,236,238,248,254,247,227,255,245,248,255,242,242,242,213,246,238,231,231,255,239,234,230,222,232,236,244,204,241,252,227,244,230,244,222,244,244,242,238,249,231,222,233,245,238,249,253,237,234,224,250,211,233,233,249,227,241,247,220,221,139,5,5,0,36,16,23,1,14,34,19,11,20,12,7,5,8,25,39,16,15,4,24,4,7,188,218,189,168,203,196,192,190,190,192,199,190,202,198,227,195,192,211,205,189,189,208,179,211,206,207,207,204,167,188,202,205,180,176,188,189,207,190,187,193,190,180,200,195,206,186,202,206,198,160,198,209,211,193,195,188,198,201,204,169,183,193,198,181,180,184,201,178,201,199,208,187,191,210,188,180,181,182,186,190,209,199,197,170,201,190,203,164,192,195,214,205,223,201,197,190,181,201,217,195,187,186,191,200,201,208,221,209,208,206,192,174,195,204,204,217,231,230,205,196,214,215,222,205,217,190,204,190,203,197,184,176,206,195,204,193,197,209,213,189,206,197,191,191,215,199,195,202,205,201,216,197,217,191,190,208,198,212,192,197,194,182,198,212,220,201,205,206,182,196,202,187,216,214,204,215,184,206,221,220,206,230,194,207,209,212,205,221,200,224,220,199,220,211,227,195,210,179,204,209,233,205,216,207,199,213,219,220,203,221,207,238,191,225,225,247,241,221,194,211,201,205,218,222,216,205,219,213,216,221,202,227,230,203,209,225,209,207,219,206,189,184,201,192,223,205,220,209,214,192,193,195,227,191,201,216,202,211,192,210,215,196,207,174,228,198,204,193,202,224,187,163,210,205,206,207,201,167,56,83,190,215,215,193,192,194,204,239,198,198,206,197,213,197,209,198,205,213,220,187,191,211,200,187,199,203,162,153,205,209,207,199,142,178,194,213,217,214,224,148,118,95,185,147,200,200,189,185,159,176,208,207,208,217,207,203,232,205,169,205,231,238,189,132,95,49,103,77,90,91,8,117,175,147,164,218,212,194,193,128,209,206,186,178,184,232,205,205,228,255,131,109,170,140,173,118,147,187,202,241,229,246,227,194,107,118,134,169,86,85,208,127,1,149,156,59,71,69,22,58,110,68,76,96,64,86,40,24,43,24,69,88,87,55,70,90,67,198,117,26,76,84,98,111,71,42,55,131,139,113,59,42,64,27,8,22,34,52,95,92,116,106,102,125,103,97,94,88,105,67,74,130,145,93,48,82,140,126,86,137,22,17,9,6,21,18,3,35,26,26,59,109,139,162,114,107,99,81,106,100,100,102,107,83,59,80,92,114,123,147,58,132,148,180,131,45,122,50,55,70,64,80,61,87,18,46,47,46,47,56,62,53,31,12,12,24,40,14,41,31,55,87,98,109,81,88,111,90,74,119,74,78,94,85,52,34,12,20,58,65,33,86,103,128,120,69,46,11,11,44,31,41,35,42,33,51,22,83,56,34,45,57,84,16,80,97,69,14,37,19,6,39,35,69,67,103,203,88,71,110,100,69,60,69,123,110,57,59,85,68,56,23,63,44,28,47,48,38,9,21,23,41,19,17,25,68,32,59,75,39,63,23,52,42,26,19,39,24,32,20,48,56,58,9,107,136,140,113,104,128,112,39,23,195,255,251,254,242,252,241,226,253,230,234,251,227,244,235,237,244,220,239,238,238,217,249,246,236,228,244,229,222,253,248,219,239,238,250,237,238,238,241,236,251,228,255,239,234,226,240,231,233,241,253,227,245,241,244,233,238,247,227,249,255,233,119,2,0,10,2,9,17,1,3,17,18,4,25,10,13,1,6,1,30,14,16,4,39,6,8,173,215,181,205,199,198,201,200,196,185,183,196,183,222,199,197,176,188,205,202,199,208,177,191,176,174,181,178,201,198,175,194,192,201,185,209,199,200,201,201,202,207,183,169,176,207,198,202,185,172,185,199,205,200,181,196,211,182,208,185,178,180,196,187,203,197,187,194,170,180,208,178,189,212,198,189,219,197,194,208,211,199,188,195,172,209,196,177,191,186,182,172,177,207,170,203,200,200,218,191,180,232,182,213,209,178,215,187,187,208,195,201,188,229,209,218,218,206,211,210,204,200,225,224,207,206,197,185,195,192,213,201,218,162,210,209,204,227,213,199,206,203,215,168,214,211,184,185,189,196,197,196,196,181,204,207,220,223,233,171,211,205,215,213,210,204,190,193,206,182,197,207,232,204,236,198,203,196,206,196,193,224,217,183,212,190,206,214,208,202,216,206,199,199,207,218,229,215,217,202,227,211,206,233,196,223,231,196,189,206,188,219,214,207,198,198,234,204,187,183,211,235,223,223,203,201,192,210,218,239,234,221,224,206,217,180,190,182,203,198,221,202,231,238,171,211,201,211,231,180,212,210,188,181,227,206,225,188,186,219,183,189,195,215,208,218,185,181,161,201,186,134,222,224,217,227,192,195,85,98,151,195,217,180,213,221,203,203,212,197,211,215,188,181,216,198,193,225,217,177,199,190,225,203,173,214,165,167,189,218,224,170,148,190,204,216,197,202,186,136,119,114,173,188,177,147,141,182,172,149,186,196,209,208,216,240,206,206,164,224,225,228,166,135,84,77,124,91,130,77,25,119,154,173,231,226,192,199,151,117,216,215,199,180,209,225,238,192,222,237,136,200,203,185,166,129,149,167,184,221,240,252,214,76,80,142,183,172,64,121,221,84,88,195,115,57,44,77,81,54,58,70,65,50,43,50,45,55,84,43,31,50,24,63,41,32,26,64,54,50,84,82,108,67,45,48,100,147,119,57,80,39,1,26,22,23,71,64,95,101,116,119,90,127,98,109,108,95,95,34,150,168,158,123,90,128,173,125,124,101,44,12,6,17,36,31,41,56,16,50,88,121,132,111,117,114,88,87,127,109,149,134,122,83,94,123,103,99,117,91,120,135,98,152,83,50,79,18,45,79,76,114,50,64,39,26,30,18,39,40,20,35,16,55,22,30,36,15,14,14,85,100,87,74,78,133,176,127,96,125,146,138,119,50,7,16,31,37,47,60,64,147,146,142,120,58,44,25,26,35,28,41,51,63,63,61,53,66,60,61,55,81,69,55,73,83,37,53,30,33,40,41,12,31,25,67,116,58,51,74,98,76,53,80,75,104,69,119,116,66,40,85,50,67,67,65,54,10,16,9,23,27,52,50,40,62,73,97,113,90,89,75,30,36,52,30,40,53,49,27,13,37,58,19,109,162,131,144,86,86,118,40,30,186,239,232,243,243,246,237,224,235,222,230,240,249,242,230,238,235,213,235,223,246,245,241,249,236,231,243,231,251,226,239,207,216,232,246,247,235,231,235,233,225,229,240,242,241,247,216,236,228,219,246,235,245,222,249,204,240,209,244,244,249,239,135,5,10,1,0,21,4,16,18,0,5,14,16,2,13,10,19,2,8,17,1,0,2,9,20,191,189,184,217,195,184,206,215,205,213,180,196,200,192,221,195,192,233,191,198,198,173,205,191,199,190,215,176,189,198,196,206,194,193,198,215,175,192,192,193,190,176,206,198,180,199,196,196,193,174,199,192,194,182,202,192,213,196,197,211,204,195,224,189,183,176,185,204,221,188,189,182,178,193,195,182,194,203,210,181,202,188,210,196,190,187,198,202,176,185,183,208,202,201,202,196,211,190,201,179,212,194,210,189,204,202,212,188,209,176,187,222,196,205,202,200,201,172,205,182,199,212,211,203,207,197,183,185,209,210,215,193,209,202,191,203,192,203,184,217,199,198,206,194,216,193,205,207,198,195,179,213,193,199,209,207,210,200,192,208,204,190,207,198,200,197,209,195,223,188,191,193,206,213,200,224,202,227,213,231,208,212,207,217,217,203,219,218,207,217,187,217,210,205,222,228,221,200,199,193,239,193,205,197,202,192,221,212,193,215,219,219,212,189,210,241,203,228,194,212,199,223,179,212,218,213,194,198,230,206,238,201,227,210,193,211,205,171,219,212,195,191,183,191,218,222,201,181,182,218,193,211,203,202,211,220,214,199,218,170,188,198,197,220,210,217,196,196,194,204,207,163,200,214,185,197,214,216,204,214,197,192,209,196,206,183,177,220,216,190,196,224,214,195,210,222,192,205,229,203,187,183,173,207,188,178,196,170,201,214,233,176,147,191,186,212,199,229,181,167,111,44,118,180,159,174,162,206,171,155,203,195,198,189,211,228,241,208,181,201,213,228,127,131,110,68,100,80,99,74,60,164,220,193,241,173,150,211,153,175,222,192,150,172,218,219,236,198,247,221,154,239,239,162,165,185,157,129,145,210,223,233,146,14,95,162,152,123,142,177,169,68,93,162,79,64,67,65,57,53,55,66,63,73,69,48,82,25,44,24,5,13,21,0,28,2,4,43,41,99,80,71,75,31,43,85,116,107,58,92,33,18,1,8,17,39,112,113,103,135,99,121,93,109,101,105,115,82,91,116,216,215,170,128,145,117,157,84,111,99,24,33,5,31,7,36,49,12,59,99,99,140,124,89,84,101,88,90,76,80,76,99,74,40,42,61,95,85,74,102,82,72,45,72,41,37,78,35,49,77,99,84,54,51,25,16,36,28,20,39,18,1,21,21,31,36,19,14,41,57,69,77,37,58,17,25,41,47,35,32,19,41,49,19,14,27,12,61,77,49,119,202,183,110,74,33,28,40,45,35,25,33,58,40,54,40,21,43,43,68,75,83,124,119,56,66,50,33,20,19,28,36,17,42,21,58,88,55,17,46,121,79,90,94,95,112,116,125,127,61,28,86,74,72,16,14,79,33,46,46,46,40,66,40,78,73,89,92,107,113,81,93,69,56,13,52,36,23,45,38,38,25,25,28,73,113,129,130,96,85,120,78,21,154,239,235,255,227,254,254,228,215,227,226,228,222,225,242,227,229,237,229,235,205,236,225,242,235,222,249,251,245,224,217,250,220,238,220,243,226,237,234,207,233,247,239,248,249,248,244,243,239,228,244,242,224,234,233,226,241,212,241,240,245,222,117,16,23,11,26,8,14,24,6,4,23,13,26,25,7,15,0,5,6,9,14,10,27,15,16,199,207,206,171,208,209,183,189,211,201,187,202,211,182,205,201,202,195,201,167,202,181,214,199,196,197,185,188,198,181,203,190,202,214,180,185,187,165,204,201,194,197,196,214,191,202,173,211,187,193,187,166,217,208,190,196,211,200,187,188,204,188,181,199,192,191,184,187,192,200,213,187,212,184,177,194,186,194,181,212,197,201,211,202,182,186,176,197,189,179,201,199,181,195,185,172,204,191,186,175,200,187,189,222,210,204,202,182,220,191,176,234,194,196,188,195,221,205,210,202,227,199,199,207,205,205,194,193,195,203,202,202,204,201,217,182,220,212,193,180,192,203,174,183,200,202,199,206,218,208,200,187,217,198,211,186,223,191,204,206,210,206,220,195,210,213,199,222,193,209,188,211,191,178,205,214,209,205,178,218,192,216,228,210,210,214,205,195,194,222,239,207,199,199,191,215,191,227,196,212,235,206,205,219,210,221,216,221,198,211,201,191,204,215,210,215,209,192,192,208,221,210,200,223,216,237,215,189,212,207,202,223,230,192,202,225,194,190,202,200,206,200,206,225,227,206,188,205,225,213,193,201,208,219,210,207,191,180,201,192,206,225,206,212,208,218,178,179,171,195,186,202,240,212,199,188,196,205,173,217,218,183,225,201,192,209,191,207,207,179,232,216,190,181,202,209,197,201,195,204,217,218,192,212,175,215,170,173,201,206,199,171,127,211,191,199,201,224,172,121,84,55,155,180,164,158,156,196,165,134,195,185,207,195,196,205,203,195,164,212,179,188,135,167,100,55,108,77,92,52,68,185,243,213,200,110,178,204,198,179,144,154,153,201,233,221,220,237,232,185,160,238,238,186,141,181,174,157,133,182,234,230,102,28,134,143,154,186,224,195,79,75,75,88,41,6,25,17,33,20,32,32,45,50,50,33,39,26,1,15,13,12,3,50,10,11,17,62,88,109,100,51,61,33,77,109,114,65,46,52,4,11,10,34,25,119,106,123,94,86,87,56,120,99,84,76,77,74,112,176,229,192,113,113,141,124,130,97,128,70,14,37,6,33,28,31,31,56,95,192,148,117,92,65,92,108,93,87,33,43,50,63,42,80,45,43,78,77,91,64,79,55,29,48,12,34,42,10,48,70,40,43,51,50,41,26,27,38,9,9,5,24,30,26,28,29,60,49,68,97,56,84,74,83,32,37,41,17,27,20,58,28,39,49,40,33,20,72,89,91,163,189,138,70,75,56,36,52,36,32,25,38,27,37,21,12,28,45,30,23,24,70,104,140,114,87,53,69,28,38,29,38,30,49,26,108,80,51,31,73,118,100,106,81,113,168,101,60,57,35,59,49,49,42,39,60,53,62,81,55,70,66,72,74,83,100,115,114,97,92,80,99,63,42,34,18,24,41,71,20,52,41,40,43,64,126,154,138,101,114,76,56,37,130,245,233,254,230,228,237,229,216,236,232,241,229,242,211,230,235,216,238,248,250,224,221,254,240,251,231,238,236,242,249,243,249,243,226,246,245,255,253,238,243,247,234,214,254,245,228,239,250,243,247,228,242,239,219,233,231,238,235,241,246,220,116,5,15,12,34,5,13,16,9,7,17,22,2,34,4,9,2,33,3,6,4,15,2,8,37,193,198,179,208,201,203,173,191,197,213,167,198,195,211,206,174,190,214,175,223,213,195,196,187,200,209,213,203,192,174,233,217,179,196,188,210,193,205,176,176,170,201,182,194,197,185,202,193,197,198,184,193,204,186,196,199,183,173,192,198,189,185,191,193,202,194,215,220,181,180,208,194,209,217,214,200,199,187,190,216,178,202,204,185,212,202,197,197,245,195,213,212,200,208,206,218,189,220,199,176,183,183,172,194,193,215,201,204,195,193,182,201,213,200,215,209,218,202,205,212,214,198,211,222,184,208,216,212,216,210,240,205,204,196,200,205,206,219,195,189,205,192,203,190,194,207,191,213,191,210,204,198,183,236,216,204,202,231,218,207,214,229,197,200,189,209,182,185,219,202,213,211,212,206,226,193,211,213,191,212,184,186,209,207,175,210,226,208,207,232,232,209,204,214,208,211,208,202,220,175,208,223,188,197,201,210,201,184,231,196,191,193,201,199,215,203,208,211,226,226,233,207,210,215,222,217,238,199,204,189,207,227,212,219,204,192,210,208,215,214,211,222,194,196,235,199,208,223,213,190,199,207,205,223,189,207,219,203,223,197,179,207,190,214,217,208,199,206,186,186,222,212,201,227,215,195,186,202,181,194,213,167,200,197,226,211,209,208,217,202,209,206,215,218,187,208,206,188,216,212,189,207,181,200,190,197,188,170,210,210,206,143,166,225,215,205,199,215,170,119,103,118,190,205,173,164,190,200,150,128,186,173,202,240,189,190,216,195,186,212,225,199,159,189,118,77,80,74,69,17,59,184,169,170,213,163,207,246,176,172,145,171,188,216,224,223,224,237,226,118,170,233,238,184,159,160,200,165,138,191,208,215,87,101,143,123,154,200,244,146,44,49,77,72,19,35,23,25,28,21,20,25,33,16,16,1,13,6,20,5,12,12,12,11,14,20,32,48,85,106,101,35,51,75,98,111,63,53,23,14,5,12,23,54,52,103,106,116,108,123,91,94,116,107,86,110,90,89,162,197,210,125,71,115,106,98,138,97,109,75,33,21,19,11,26,20,22,51,156,205,159,72,120,40,30,39,66,45,34,52,18,56,43,31,47,53,56,44,61,87,67,53,41,57,33,30,5,53,46,81,65,38,64,66,62,78,79,37,24,25,19,0,38,69,87,74,83,113,121,106,69,88,85,46,46,31,14,40,32,44,49,58,80,72,93,105,140,129,162,141,121,142,113,146,113,81,96,92,101,79,75,81,48,45,16,14,17,7,26,17,64,76,80,104,78,119,94,54,67,30,49,38,37,10,73,131,84,52,13,94,133,98,94,80,126,139,60,22,83,57,2,33,69,10,41,33,64,62,80,65,60,88,60,86,102,89,78,121,92,79,65,80,82,82,69,22,45,29,23,20,19,35,38,17,71,143,141,117,97,82,119,56,17,159,239,254,246,229,232,212,232,224,241,241,234,209,236,239,231,238,238,231,244,207,242,229,249,218,236,244,229,249,245,255,231,232,245,237,246,225,222,237,226,243,248,235,231,250,223,239,241,243,239,213,234,245,242,223,241,240,248,233,237,250,241,110,11,8,0,23,18,1,27,9,27,20,12,9,16,5,1,12,19,13,15,22,18,29,28,35,208,207,218,217,204,188,188,197,175,213,180,183,203,199,195,194,206,215,191,206,194,201,200,176,172,189,189,199,180,192,199,188,187,169,197,192,203,182,196,213,176,204,203,195,205,184,207,195,190,190,192,207,197,189,193,179,181,214,192,198,164,213,184,226,179,190,221,214,202,195,205,208,203,196,184,193,204,197,183,210,176,174,182,187,186,192,174,163,192,160,160,206,225,197,182,199,198,187,208,186,185,211,185,222,191,202,198,209,197,197,214,193,184,167,196,215,185,193,217,204,208,190,193,199,205,204,209,196,195,199,218,203,191,194,171,216,219,181,184,200,213,196,199,193,185,178,232,182,180,224,193,219,195,197,206,197,217,179,201,194,190,204,207,196,199,207,217,222,205,204,201,206,191,193,189,198,221,183,202,208,197,206,192,209,218,206,198,233,192,208,206,216,206,202,225,226,212,237,222,213,202,188,217,233,202,222,224,213,216,224,234,209,186,212,214,222,244,217,199,201,212,207,207,210,199,198,215,193,214,205,194,227,215,200,208,211,209,227,207,215,198,232,202,230,210,198,197,188,202,168,198,196,223,224,182,201,194,215,195,204,210,224,201,214,222,199,200,206,210,205,210,199,183,198,214,182,199,211,182,204,198,194,187,200,194,196,211,188,216,212,203,194,195,187,215,184,207,204,179,193,192,219,210,219,210,189,138,157,205,196,189,164,167,191,215,229,197,216,155,120,95,109,171,170,135,174,197,238,159,108,183,226,173,215,204,197,207,164,167,205,184,212,157,172,117,71,103,104,84,60,68,64,153,236,193,176,199,181,169,163,190,197,199,218,223,247,227,232,214,86,169,244,245,210,169,181,180,116,126,190,225,220,159,106,157,173,176,227,226,93,33,69,33,30,5,21,13,32,23,22,39,23,12,16,9,24,11,19,16,4,19,28,46,22,34,5,87,81,84,89,56,49,68,102,66,77,61,57,27,38,24,44,54,87,101,142,126,94,90,96,89,130,75,99,84,78,120,181,202,136,154,114,71,105,72,92,118,79,107,62,24,38,26,14,28,48,23,70,156,166,99,85,50,50,42,28,29,44,66,46,37,49,41,16,35,21,61,56,53,52,77,42,43,51,56,58,38,38,50,52,56,69,66,58,73,77,73,60,48,33,37,27,76,87,103,75,96,80,77,47,23,43,34,49,45,34,56,54,100,118,151,190,213,197,220,173,197,168,148,142,67,54,63,76,103,116,119,131,140,120,134,141,134,131,98,87,69,31,53,25,45,47,66,75,38,42,67,48,53,82,63,54,68,55,97,115,73,50,66,156,111,112,108,63,95,102,52,45,64,33,54,43,33,67,78,47,25,42,59,58,95,121,96,22,36,79,63,73,51,64,69,95,85,43,48,76,32,18,39,33,38,31,42,39,53,154,145,134,124,86,103,86,69,150,247,222,234,252,234,251,248,255,242,250,220,223,238,242,238,207,225,228,223,230,247,243,243,248,236,245,252,238,239,243,251,205,250,238,248,222,247,246,249,223,237,251,250,251,236,236,218,234,251,243,237,247,242,226,238,224,236,241,234,249,226,106,12,21,16,5,2,5,3,23,14,15,18,32,20,3,10,12,15,19,15,4,3,7,10,27,200,186,199,194,211,197,188,207,203,203,212,171,193,170,206,207,199,186,192,191,211,216,170,192,187,188,181,193,192,181,189,180,212,174,202,196,215,204,198,186,184,204,207,200,186,210,210,205,217,188,214,196,193,187,196,200,210,166,194,188,183,181,198,196,194,188,215,193,193,193,208,181,203,179,189,210,181,183,205,212,201,207,202,204,213,189,201,201,200,197,212,200,211,218,226,195,183,202,201,181,220,192,171,204,182,188,198,231,179,191,188,195,209,198,191,213,196,201,202,173,206,218,217,196,197,210,210,208,205,199,203,195,193,232,188,197,197,187,205,214,188,221,197,186,201,212,186,212,196,197,196,210,186,214,193,197,172,177,176,220,191,205,221,195,201,207,192,197,207,197,193,202,178,192,229,199,193,213,208,195,201,201,197,209,204,213,199,211,205,210,212,197,203,197,227,219,199,209,214,211,200,205,205,210,227,210,201,202,191,180,201,191,200,187,219,178,199,218,224,214,215,228,211,214,214,192,193,185,230,219,221,199,219,211,222,211,201,204,203,202,195,218,216,186,197,182,206,207,193,208,184,192,218,210,207,174,175,172,204,196,186,200,185,204,225,221,213,224,190,181,224,177,196,200,205,189,204,208,202,191,200,202,185,189,240,213,181,217,195,189,210,205,205,175,165,185,210,207,207,200,186,193,205,214,212,199,164,176,212,232,199,148,171,209,198,212,207,198,169,95,98,83,154,151,176,205,217,237,205,115,181,192,180,223,212,193,191,178,170,190,165,197,138,87,80,63,137,116,84,88,70,137,207,227,207,181,131,184,189,153,198,181,218,215,201,237,213,238,194,100,184,225,229,225,202,168,118,152,163,189,225,239,197,146,183,244,192,221,189,39,59,49,4,30,22,31,10,32,27,11,30,13,8,18,39,39,19,44,22,18,15,10,14,3,27,45,85,90,51,47,53,64,124,101,67,83,38,32,11,91,69,52,66,96,115,143,118,125,91,108,82,72,107,57,98,93,161,190,119,72,92,62,101,108,89,89,142,121,108,54,10,16,26,46,46,29,21,87,153,135,81,70,40,49,46,49,52,27,53,57,40,55,50,28,54,47,44,42,52,50,63,50,75,41,38,41,16,52,37,27,40,58,51,66,69,56,66,47,80,57,66,102,121,93,62,54,50,79,84,35,25,25,10,42,49,79,122,204,205,221,223,214,155,140,133,53,68,60,74,83,54,23,16,21,56,44,80,91,39,36,51,59,75,110,122,180,153,120,120,100,46,50,31,38,32,39,38,3,33,55,78,55,79,59,68,87,69,118,93,154,104,103,97,37,72,112,33,13,22,23,48,26,35,59,58,57,47,78,64,58,56,90,43,53,28,28,60,73,52,51,45,55,79,54,72,42,43,45,48,44,36,2,28,23,55,139,160,125,121,74,97,89,41,167,204,220,250,254,252,253,219,215,214,227,218,230,243,220,206,221,216,243,228,255,242,254,248,252,236,249,253,219,251,240,242,251,229,244,248,253,236,234,238,205,244,233,243,252,237,253,241,222,255,255,236,214,250,234,222,246,227,224,244,231,231,122,29,6,38,6,12,7,30,7,11,30,8,6,1,1,0,18,8,6,29,1,8,53,23,25,204,194,189,235,188,207,213,196,197,208,175,188,172,182,206,175,194,188,196,210,179,216,200,202,182,190,191,220,209,196,183,218,198,198,207,216,216,185,213,176,188,185,203,199,180,203,177,213,188,198,204,198,189,227,193,186,222,168,188,190,187,189,179,185,208,195,200,208,190,190,180,189,202,191,199,189,162,173,203,195,204,188,188,187,184,203,211,208,223,178,200,178,193,193,194,206,204,178,197,183,212,214,199,214,189,185,188,210,208,190,197,200,204,191,215,182,203,189,177,177,189,191,196,198,204,201,216,206,214,194,191,197,212,209,191,169,209,183,227,214,194,201,196,182,189,227,190,178,194,226,218,192,206,196,175,196,212,173,223,184,226,220,199,219,210,197,200,217,202,194,178,200,192,206,202,206,204,199,199,216,234,229,197,213,217,196,197,215,221,209,193,202,211,196,209,217,204,229,213,235,218,229,194,213,201,205,225,199,208,213,212,232,203,207,178,188,211,215,229,193,193,221,230,193,206,195,217,198,195,193,221,203,205,186,209,190,181,219,197,214,193,194,222,192,208,208,219,233,204,210,209,218,202,218,201,196,203,204,204,183,235,203,200,217,200,187,179,228,182,208,212,210,179,213,200,204,193,207,188,228,223,202,199,181,217,179,214,194,202,184,205,186,231,202,211,194,216,198,194,180,211,187,198,208,223,160,161,183,204,212,182,161,175,192,203,226,192,179,134,93,96,77,136,168,192,198,230,214,165,102,158,203,215,185,195,234,233,182,217,178,161,170,120,99,67,58,115,81,73,72,68,154,243,212,177,159,132,210,176,115,173,162,201,220,230,220,212,242,190,88,210,237,236,251,212,146,133,152,161,178,175,204,183,165,197,233,198,214,170,59,71,41,9,29,35,18,28,39,10,25,9,19,22,28,8,10,20,37,33,2,23,7,56,40,26,43,104,65,23,36,49,88,70,71,78,49,20,37,74,139,177,141,121,129,104,113,79,96,106,118,93,98,95,115,105,88,105,114,72,61,90,107,98,114,98,99,91,139,86,45,28,26,36,52,21,34,2,23,116,69,59,50,48,33,57,21,50,59,62,52,50,35,29,20,19,63,25,37,70,21,38,26,14,28,38,22,26,43,67,62,51,67,62,48,67,50,47,63,59,78,73,104,74,49,38,28,21,52,38,43,25,31,53,128,159,195,162,165,156,94,75,24,46,66,48,77,19,76,68,52,55,66,54,44,66,58,39,56,59,33,49,48,36,30,51,75,85,128,154,130,119,104,75,69,29,46,16,23,22,8,34,55,35,82,68,63,93,95,102,125,99,101,63,52,85,68,61,71,54,27,49,63,85,56,50,60,61,58,27,36,42,46,31,30,16,61,33,33,57,50,52,28,44,60,61,91,45,84,45,35,41,50,26,29,72,147,133,149,89,99,116,111,72,84,119,185,245,244,252,217,234,235,224,224,232,228,228,224,218,236,224,232,236,249,235,240,245,240,220,246,233,255,246,252,227,221,244,242,228,251,255,227,248,244,220,242,249,228,240,217,241,253,224,243,246,230,220,243,239,239,218,241,232,231,241,117,6,29,5,8,10,1,14,25,2,26,35,16,17,3,14,0,20,3,9,15,36,8,6,11,210,189,173,187,191,181,185,213,202,186,178,202,193,192,182,192,183,195,210,212,186,194,190,190,184,209,190,174,192,191,186,197,175,195,211,197,188,203,195,205,211,209,207,213,197,202,215,220,191,181,177,195,174,178,189,219,192,203,214,183,188,182,188,186,192,194,205,198,179,184,183,190,174,190,211,205,186,175,196,204,184,190,210,193,198,197,187,193,178,222,212,190,210,187,211,207,202,185,194,213,198,202,201,207,195,204,214,214,208,208,196,207,203,198,184,195,202,203,199,195,206,194,170,208,216,181,197,206,179,204,214,229,190,184,185,176,169,195,194,219,188,203,207,196,217,222,196,206,186,200,204,199,190,202,210,216,210,191,199,206,211,194,191,174,208,204,193,196,195,205,211,184,212,203,207,191,188,205,211,188,190,213,239,197,215,215,217,222,212,207,184,231,217,188,192,193,211,198,205,176,213,208,203,238,187,226,212,211,226,229,209,218,198,230,208,210,212,207,193,208,207,210,195,190,215,199,213,221,208,226,221,192,198,190,172,222,198,229,203,217,196,218,214,168,180,196,218,211,215,180,194,196,222,210,197,209,197,202,208,184,213,208,218,176,199,210,210,195,182,188,194,205,189,216,217,217,209,204,196,223,208,215,191,201,189,226,199,169,200,190,185,196,216,220,203,172,198,196,216,200,194,198,221,215,213,144,179,187,193,215,205,152,208,208,215,216,212,182,138,105,83,96,180,167,191,220,217,206,154,107,137,225,239,190,210,179,210,178,160,206,188,206,160,162,113,60,108,80,90,92,53,190,240,147,154,149,167,243,147,145,150,181,242,216,233,227,212,215,141,124,240,213,252,239,205,171,189,179,204,187,160,178,167,159,197,193,209,191,157,87,73,7,17,8,17,25,16,12,19,16,15,19,4,16,5,14,18,25,36,29,17,21,7,11,76,82,71,60,33,61,85,104,70,60,32,44,1,43,145,203,216,108,76,85,113,115,122,90,115,84,98,93,99,88,108,90,72,72,83,128,135,80,88,84,77,134,93,163,90,21,30,15,43,46,20,22,26,56,35,65,52,23,20,49,51,59,47,75,56,50,26,13,67,48,35,64,49,60,37,50,36,25,7,28,43,32,43,49,62,29,42,53,68,59,64,51,56,68,67,64,71,58,26,42,42,8,26,32,24,53,93,129,213,211,158,143,70,41,49,49,32,57,35,53,56,55,53,51,40,34,66,60,59,53,50,57,44,67,42,71,38,14,39,26,62,60,59,53,47,81,100,128,146,75,91,50,56,31,43,8,7,17,52,80,64,68,57,69,90,74,68,56,49,41,63,63,53,31,35,36,68,68,58,66,52,47,46,57,39,21,46,45,23,47,24,53,63,57,59,55,35,46,62,56,71,69,42,33,70,11,25,39,59,35,30,107,140,129,99,84,107,91,77,149,68,100,185,218,223,243,233,241,245,228,235,230,229,234,218,244,219,254,247,237,246,236,227,232,237,240,231,228,241,231,238,248,249,241,231,231,222,238,242,230,223,254,233,250,246,240,211,239,249,247,228,227,246,228,227,215,224,234,231,251,224,91,19,0,12,21,10,14,19,6,7,13,22,2,24,1,12,2,3,9,1,22,14,1,30,29,198,189,194,215,215,175,204,197,182,208,197,192,209,207,205,186,197,217,214,215,186,209,216,189,204,215,178,188,166,192,200,196,197,189,195,212,197,184,190,202,188,185,190,190,207,157,189,206,189,185,190,199,215,195,201,202,202,195,208,202,205,207,219,187,193,175,185,174,210,207,206,165,206,201,187,183,189,183,212,193,188,197,225,205,192,204,181,172,204,205,194,194,181,194,212,227,171,230,197,176,181,194,234,199,217,181,206,195,227,221,189,191,166,211,178,213,193,207,190,218,209,196,196,197,213,174,231,194,206,212,198,187,179,199,177,188,185,209,218,199,237,203,206,197,203,191,223,203,221,177,209,186,216,198,200,208,220,194,198,202,188,207,195,180,205,211,215,195,210,213,220,189,213,207,216,203,209,229,212,225,213,217,195,211,175,192,198,218,203,198,222,230,219,194,238,196,195,190,190,208,216,214,206,216,215,206,209,196,209,209,197,250,215,228,201,204,179,221,203,188,207,199,202,182,207,221,207,204,195,179,204,203,203,211,220,196,210,218,206,207,191,207,196,203,189,205,208,216,204,195,187,190,196,191,172,197,199,207,199,209,207,209,198,219,193,196,219,201,188,212,209,210,197,194,208,187,198,220,196,188,197,206,191,173,200,208,195,205,189,205,206,180,193,215,208,205,197,211,203,188,221,217,176,191,214,144,158,179,212,221,160,160,193,201,209,207,195,189,133,117,71,66,141,169,188,180,227,223,160,167,175,206,203,224,201,188,194,152,176,174,184,212,201,181,119,150,171,99,80,56,31,147,205,190,226,166,193,207,148,199,195,240,228,201,232,215,233,209,151,169,214,213,236,229,242,198,172,197,185,171,183,192,184,166,209,238,162,140,87,32,24,9,6,27,14,15,8,3,57,38,25,32,14,14,6,37,10,34,27,24,16,18,21,47,60,76,53,55,62,84,104,64,56,71,44,11,33,44,142,140,173,75,46,105,90,95,102,114,121,108,88,116,102,78,128,91,72,125,195,152,125,102,111,121,104,99,127,120,86,40,31,35,6,25,5,25,10,28,26,27,41,52,51,47,103,44,70,65,25,37,20,49,79,65,66,68,66,34,27,51,29,22,27,62,46,34,44,45,30,22,34,66,39,38,44,39,27,46,56,55,49,28,43,14,17,35,44,57,64,165,216,208,142,115,47,61,30,38,44,32,44,33,54,36,41,26,17,20,8,40,44,79,71,28,63,70,22,43,65,47,52,27,37,26,37,38,47,52,74,49,96,57,82,97,109,89,83,61,28,24,10,28,19,29,45,52,61,57,30,50,55,61,58,55,26,42,48,29,28,47,46,30,52,55,58,77,36,29,55,47,43,28,40,51,47,47,32,63,37,55,27,49,45,36,36,37,47,44,48,29,24,39,29,30,35,110,169,123,103,108,118,82,107,111,107,50,85,116,132,177,195,216,228,243,249,224,244,229,212,203,255,240,240,237,236,248,249,230,251,227,233,243,239,241,250,239,230,250,238,245,233,240,229,229,225,246,225,236,245,222,235,230,252,215,230,225,227,230,237,214,215,230,237,219,224,128,12,0,1,31,7,4,12,7,6,15,7,27,15,13,4,8,9,9,22,3,3,5,12,22,178,191,198,205,199,202,191,172,175,170,198,177,192,208,188,224,180,218,174,195,193,189,194,174,184,181,204,210,195,175,195,194,179,205,215,193,207,197,188,184,207,187,189,208,197,178,187,188,203,190,185,194,201,184,181,199,183,174,193,190,216,191,187,195,203,181,177,210,198,196,219,157,187,195,212,187,199,190,210,192,176,200,195,199,189,191,191,211,202,187,179,197,197,197,180,205,183,213,197,185,195,200,203,206,187,177,193,191,212,216,183,192,192,225,192,217,204,210,185,225,209,217,182,206,212,193,207,208,223,208,195,203,231,218,225,191,214,197,195,198,200,206,212,203,196,218,202,207,197,215,214,181,194,181,209,199,201,207,204,195,215,200,199,185,193,223,216,209,195,198,201,215,188,215,180,210,211,195,192,204,168,207,203,212,227,193,221,213,183,215,196,203,183,208,230,200,221,193,214,224,224,186,206,208,202,219,188,198,206,196,223,206,216,211,205,205,207,210,197,203,199,204,187,217,201,212,196,204,182,199,208,193,231,215,223,210,196,200,178,195,202,223,183,199,216,195,202,186,198,208,217,186,201,204,177,186,188,196,191,199,210,202,199,209,216,204,199,186,206,194,193,223,180,205,197,195,223,188,186,193,211,199,189,186,203,201,211,207,201,198,235,199,184,193,206,205,194,190,202,193,207,206,209,179,214,171,174,214,196,197,159,180,197,215,216,205,187,188,87,125,95,71,160,187,225,217,218,204,173,163,126,197,239,218,219,196,220,177,157,168,240,219,178,126,100,145,164,103,102,71,26,130,198,207,236,156,207,210,156,190,191,209,222,209,222,184,254,188,154,185,202,225,205,239,222,207,217,158,159,153,167,175,164,204,208,232,177,75,46,5,0,11,8,26,22,17,15,56,19,27,18,31,16,29,5,14,13,2,9,27,11,13,39,28,93,56,58,59,80,98,61,51,41,33,35,21,46,99,69,47,107,63,80,125,108,127,149,102,107,95,91,93,79,92,85,65,86,150,181,126,128,105,105,97,89,101,117,134,55,3,13,7,13,38,11,26,15,39,21,43,53,98,58,91,64,58,41,32,28,33,59,117,107,102,82,88,88,29,32,24,41,41,14,30,15,31,15,11,29,12,54,39,48,34,26,24,31,51,35,36,36,29,16,26,17,50,97,215,234,192,132,55,48,43,93,50,47,27,35,4,17,5,12,14,2,22,5,28,22,47,61,45,79,75,103,78,95,47,66,38,40,25,35,16,22,36,26,40,48,55,55,58,107,71,49,69,116,106,129,71,53,32,30,19,26,33,48,43,19,23,34,39,58,29,24,37,31,28,29,39,40,34,58,38,76,40,39,76,45,27,21,31,31,39,51,35,30,64,70,40,51,39,36,59,54,56,28,46,43,54,37,48,53,25,33,100,163,140,116,112,109,75,89,117,112,83,85,56,84,169,214,220,230,231,212,213,219,238,237,235,245,240,223,244,237,215,235,245,232,224,237,241,222,233,237,218,235,227,251,239,251,253,214,227,222,223,234,212,247,235,223,237,211,241,222,235,240,212,241,215,237,221,217,232,222,121,20,11,0,21,4,5,23,16,25,12,6,10,15,7,23,11,5,13,15,25,33,12,28,3,180,177,188,224,196,211,188,214,217,207,193,196,213,196,206,216,220,213,181,176,203,228,202,178,203,193,190,230,209,202,183,198,197,186,215,179,186,178,180,195,204,190,198,189,172,186,184,184,185,196,174,181,189,197,204,198,196,203,173,190,207,198,198,186,181,194,210,179,216,189,179,165,201,180,190,189,194,200,220,208,210,211,208,190,199,187,171,201,200,198,205,197,217,207,214,179,201,222,189,196,194,206,197,219,200,214,223,216,194,208,194,185,187,237,175,211,210,201,199,216,181,225,171,196,214,200,213,198,190,202,217,193,191,209,206,180,201,193,202,208,217,183,188,205,200,186,199,202,207,200,217,213,191,171,187,221,201,204,222,200,226,197,194,204,213,207,212,210,196,188,197,221,189,179,198,212,216,197,206,194,219,205,201,207,202,202,203,207,208,192,221,208,212,205,227,198,213,208,223,200,201,199,224,201,202,213,209,210,198,201,198,215,207,199,220,206,225,203,197,227,207,211,219,199,223,198,159,212,192,188,200,224,202,202,196,200,207,215,206,204,195,222,190,211,199,190,204,187,192,207,193,190,196,195,223,198,206,200,217,191,211,203,200,200,194,205,175,197,214,179,182,206,192,181,200,214,204,193,215,193,200,210,215,187,180,213,194,193,156,185,220,183,240,192,192,191,229,216,204,209,190,197,216,189,209,148,192,205,220,198,156,193,181,190,195,188,139,132,103,128,118,88,147,194,237,172,242,194,162,146,133,191,191,220,218,190,230,181,150,211,226,136,103,126,96,93,142,134,130,76,42,132,212,219,168,158,225,214,120,135,173,221,216,209,199,226,234,157,140,199,178,243,222,221,240,209,221,136,122,108,168,193,185,148,143,169,155,42,27,14,15,5,13,10,21,17,22,39,22,7,18,25,10,8,8,18,36,29,29,13,39,50,53,52,67,54,79,113,76,61,55,48,29,25,20,1,75,119,42,60,99,100,138,145,153,134,128,88,111,99,99,111,114,102,127,118,135,143,127,106,101,89,98,83,79,117,114,105,43,15,37,3,21,22,5,40,27,36,10,38,115,106,78,72,72,53,46,46,70,88,105,135,135,104,48,49,44,26,34,14,45,64,34,27,22,19,9,4,16,24,15,43,33,18,21,55,50,39,48,38,40,49,83,53,118,198,238,197,111,65,40,57,41,31,39,14,29,5,5,5,3,9,21,24,8,35,11,15,28,26,57,42,92,95,120,76,62,55,29,8,6,14,4,18,30,22,13,14,39,39,51,39,74,77,77,108,58,73,92,135,95,65,44,34,40,53,26,42,30,34,27,55,53,26,24,18,33,15,8,43,41,45,49,67,49,56,64,55,55,83,105,79,105,61,36,40,37,37,4,38,80,35,29,30,36,40,21,74,26,38,32,77,36,41,18,76,153,132,126,130,84,91,103,114,118,97,84,197,214,243,255,239,209,236,227,227,202,236,229,245,226,246,241,251,232,239,242,247,250,231,231,232,226,225,224,221,221,238,231,241,249,242,233,238,250,238,209,231,220,207,253,223,225,222,216,238,208,238,208,206,224,218,242,214,219,124,20,10,5,0,10,6,22,0,25,17,17,21,13,15,22,5,22,0,10,25,11,12,11,28,185,211,188,211,199,197,211,202,200,185,187,182,187,202,200,217,182,183,200,194,201,188,178,200,188,178,175,207,186,180,206,189,201,171,194,192,185,191,200,208,202,204,185,203,174,175,192,211,195,206,194,187,197,193,180,183,191,171,205,181,161,168,182,206,187,177,201,202,219,190,164,200,201,189,192,207,210,167,220,195,184,200,194,176,190,188,191,194,190,200,209,217,190,201,208,202,212,187,205,221,205,175,184,194,201,190,213,193,175,189,177,195,191,185,209,208,196,200,212,197,202,194,191,199,196,184,194,219,192,207,196,194,200,193,212,188,199,200,181,167,199,203,205,222,195,208,205,185,188,206,183,190,207,198,197,215,232,203,208,205,220,210,190,217,216,224,213,212,207,207,178,220,205,182,213,187,215,208,219,246,205,185,223,213,213,239,213,207,207,200,226,225,226,193,226,191,211,209,189,211,201,182,196,207,196,189,222,213,216,215,194,201,196,228,179,228,209,213,201,226,214,218,186,209,220,212,210,205,191,182,217,203,212,199,223,205,206,200,209,205,215,199,189,192,183,204,209,224,194,196,207,206,221,189,204,182,204,212,221,201,198,189,197,217,177,176,218,202,202,197,198,177,187,187,202,197,209,197,207,186,224,210,202,187,202,195,205,199,206,204,172,192,207,198,209,217,193,217,216,208,212,212,182,227,197,118,167,211,204,187,151,194,178,232,196,184,168,156,136,144,131,96,188,242,227,210,237,199,168,167,142,195,212,203,192,187,212,200,161,192,172,123,138,124,80,125,129,110,159,108,71,131,207,193,177,189,237,128,83,156,202,224,190,202,218,230,237,141,186,189,224,227,209,206,192,225,224,179,112,136,196,169,121,100,43,62,64,28,24,32,15,19,38,17,22,42,23,21,10,29,24,26,7,11,18,6,36,21,13,25,17,23,51,83,64,81,127,116,76,60,53,32,28,9,12,27,114,104,21,77,108,99,101,108,111,94,74,73,98,134,116,98,85,100,176,185,148,101,98,98,114,111,95,98,83,120,167,112,23,5,11,22,22,27,44,52,47,31,16,33,81,112,85,52,61,51,56,46,60,76,93,65,35,63,44,43,4,19,43,17,53,22,20,54,32,38,20,42,33,74,68,58,71,84,94,120,163,162,154,198,203,161,151,168,175,208,130,121,30,54,56,46,11,18,28,20,18,11,10,17,6,1,19,14,15,18,26,43,10,42,39,61,103,59,93,51,50,28,17,20,21,23,2,13,8,12,6,18,27,28,9,28,21,53,52,44,69,62,63,76,140,117,65,79,75,59,96,31,69,54,53,38,27,68,37,41,10,7,15,26,26,35,48,79,51,44,29,45,86,123,177,135,112,99,49,102,30,25,29,42,38,61,30,34,45,49,44,40,17,7,29,32,63,40,75,68,165,156,148,118,105,76,93,116,105,109,132,240,240,255,249,208,233,219,224,243,233,240,224,245,233,233,201,221,241,225,241,225,239,227,221,231,241,222,223,219,217,225,226,215,214,230,238,227,232,232,247,240,216,230,226,220,214,200,217,234,226,223,221,225,244,243,233,217,211,100,12,2,0,4,3,29,12,0,6,28,10,17,0,6,12,11,11,44,19,13,24,3,10,9,195,215,195,194,207,191,198,201,199,201,214,182,173,210,205,185,199,190,184,194,187,201,219,195,202,184,196,185,173,174,206,201,185,208,186,212,161,185,192,209,186,203,191,183,225,182,220,202,174,185,175,198,200,200,232,199,168,186,211,188,180,192,169,208,182,160,191,175,202,198,217,229,187,175,193,171,180,227,179,192,191,202,195,202,208,198,181,173,190,203,210,181,199,194,201,219,208,211,206,205,201,189,202,182,194,201,180,217,230,201,210,193,187,205,216,206,213,194,215,211,226,211,209,219,197,220,198,229,219,204,191,209,187,195,207,190,213,198,214,211,220,193,174,196,227,198,231,179,171,183,200,210,211,191,197,206,205,205,217,207,213,191,220,186,204,197,202,208,218,188,187,199,215,218,210,205,223,224,215,202,198,204,209,231,230,198,214,220,208,207,209,181,205,228,209,230,197,211,208,206,217,201,221,212,214,215,202,199,196,206,185,212,213,201,219,205,188,219,192,219,199,189,210,210,181,191,203,189,219,208,210,210,226,204,194,188,197,221,197,204,171,204,194,211,188,192,195,187,203,200,201,202,194,215,174,190,196,226,166,178,169,180,228,174,205,193,209,181,201,199,180,214,198,173,178,196,189,188,187,169,171,171,201,190,201,184,209,220,199,195,211,204,205,198,197,218,213,213,201,197,183,188,195,204,167,141,208,197,194,195,163,158,179,208,217,198,164,143,130,139,112,118,233,223,237,210,237,186,186,190,153,186,197,200,221,232,231,177,164,203,239,160,149,165,104,80,98,121,123,73,43,128,181,215,183,221,178,98,103,199,223,227,203,222,238,226,186,140,206,163,223,208,218,219,241,239,226,189,180,161,218,194,81,48,55,19,30,11,20,3,37,23,32,2,21,22,21,27,21,35,39,17,20,14,8,12,21,20,22,19,41,36,60,57,81,78,89,69,79,41,10,33,19,20,26,61,114,70,55,121,113,43,42,34,34,23,36,42,74,102,138,102,73,86,148,163,90,88,80,67,59,91,74,77,99,126,111,90,23,29,3,25,20,51,31,37,22,15,30,10,52,36,15,22,8,39,32,33,47,42,37,54,43,59,53,81,41,54,59,59,74,89,95,146,154,161,193,171,192,224,193,154,167,194,172,197,198,167,168,176,126,123,114,77,63,56,39,40,40,49,21,34,7,14,7,22,2,11,20,2,31,14,33,37,21,31,10,34,13,48,43,75,32,20,62,53,37,23,12,14,22,27,7,15,41,30,18,31,2,28,14,6,21,18,12,35,46,55,93,94,63,117,128,142,145,143,156,136,120,145,113,126,90,76,82,59,50,29,27,27,41,30,22,32,53,23,41,25,62,69,105,116,102,62,59,110,66,67,69,67,45,75,41,34,45,18,9,42,14,8,24,37,42,33,40,66,122,154,103,123,100,94,92,116,103,85,115,142,215,239,223,246,244,225,209,207,242,216,227,226,235,236,250,255,225,250,239,227,229,231,238,245,242,226,244,212,239,254,229,204,221,230,224,216,227,215,226,229,242,216,225,243,223,231,233,209,235,227,221,229,239,232,234,231,220,125,13,29,15,0,24,15,7,21,19,5,22,14,12,7,5,2,8,28,16,15,8,15,3,25,180,209,191,183,223,189,182,213,199,185,202,178,201,200,182,196,193,218,192,197,210,217,191,193,197,171,182,172,200,189,190,199,202,182,184,204,164,217,197,198,216,192,194,216,189,190,181,194,192,174,208,181,221,170,204,187,199,201,189,177,206,198,207,201,199,205,181,165,187,207,218,188,189,186,187,178,181,192,198,182,192,207,176,181,186,213,190,192,195,206,199,177,188,178,200,199,202,223,193,205,199,191,203,208,189,203,222,201,206,201,208,206,219,218,199,193,182,220,199,214,189,198,199,201,195,200,214,196,197,200,192,219,207,213,214,209,215,209,178,208,229,201,185,224,198,196,209,202,183,223,194,182,209,207,207,203,198,186,203,208,193,197,182,205,228,189,203,221,194,211,199,205,187,217,203,227,196,198,208,203,205,219,194,216,191,220,206,214,222,230,207,203,205,229,210,227,217,197,202,225,195,211,211,197,195,223,204,214,215,215,202,207,197,197,202,217,191,211,210,203,201,199,210,202,208,221,209,231,202,203,219,210,222,178,211,190,201,207,224,188,200,191,216,200,208,202,192,197,215,183,216,170,188,199,211,178,176,209,189,200,196,198,206,199,201,181,205,201,203,182,170,172,203,212,194,183,192,212,178,180,191,209,206,193,180,161,217,202,206,219,176,202,177,222,203,207,204,193,170,172,187,217,178,220,195,162,207,191,201,178,148,171,169,181,181,160,122,107,105,131,124,148,190,203,201,213,243,204,204,181,125,189,196,228,208,179,217,167,211,217,210,182,180,178,134,141,106,121,87,75,84,135,198,204,192,188,185,121,164,227,221,218,222,221,216,243,161,148,224,146,207,215,214,207,216,213,219,184,187,194,209,161,52,41,103,42,35,24,19,8,35,11,33,22,16,8,6,15,42,19,9,10,51,17,49,22,40,33,3,40,39,28,63,83,100,102,67,73,49,26,14,0,24,22,50,109,129,71,92,139,66,61,80,61,67,60,40,35,31,36,75,112,97,107,121,109,96,108,73,68,78,75,93,66,70,147,138,60,45,39,33,39,23,39,16,24,47,26,40,60,57,53,61,65,65,64,54,85,92,84,110,118,147,143,135,173,175,184,183,168,98,162,179,181,163,166,100,117,140,130,84,50,66,58,83,39,36,35,81,62,57,97,27,44,46,49,38,46,36,2,20,27,11,12,16,15,19,18,5,12,14,24,17,17,23,4,8,23,14,36,66,54,48,30,46,48,29,3,25,33,10,26,11,13,1,50,45,16,15,18,28,39,10,38,24,33,27,53,72,68,78,61,51,81,56,94,126,92,114,158,116,157,168,158,171,157,148,146,110,102,82,69,68,39,38,37,65,33,26,38,22,88,63,9,41,64,37,41,70,71,53,48,57,83,59,37,27,43,22,28,13,34,37,36,31,43,141,155,146,109,101,97,87,111,79,78,85,97,180,233,249,237,234,235,232,219,251,250,242,249,255,247,252,248,241,248,247,240,244,232,220,242,247,230,199,219,224,231,230,242,242,235,237,223,233,240,246,233,245,237,247,206,221,243,212,218,231,199,241,238,228,218,235,242,217,126,9,7,8,15,14,0,18,20,4,30,42,7,10,5,3,23,29,0,21,17,22,3,23,14,191,196,188,202,203,190,193,202,197,200,218,196,218,184,197,181,204,200,154,199,180,211,195,195,181,209,181,185,196,210,197,195,196,164,177,196,185,200,198,188,190,186,195,212,184,187,188,174,218,182,193,188,210,202,193,199,209,198,199,195,174,185,215,203,189,179,184,186,200,199,205,190,208,192,198,195,207,183,187,204,187,186,191,191,206,180,189,203,200,161,171,181,179,184,213,218,193,198,187,190,205,213,207,222,203,216,219,191,189,187,176,198,183,200,212,214,208,204,221,194,212,195,229,204,215,211,192,190,214,178,191,191,200,200,195,175,212,206,193,190,217,201,213,202,219,208,213,185,200,194,210,217,197,193,183,209,193,207,189,201,211,204,214,221,215,213,207,227,210,211,237,201,214,185,177,230,229,215,214,212,190,219,219,215,226,239,216,201,217,213,226,206,201,215,205,213,207,211,216,212,220,210,213,209,241,203,219,213,211,215,225,206,216,209,190,216,219,206,207,206,179,193,211,184,203,226,244,209,199,183,204,209,182,207,213,208,188,205,214,205,199,212,192,202,199,186,203,207,199,205,200,203,214,186,179,211,205,183,191,182,200,209,205,187,192,209,220,190,197,169,186,209,214,172,211,199,204,199,205,191,201,189,183,206,167,180,220,205,203,196,202,175,209,195,220,176,181,178,216,185,209,204,228,208,185,124,193,205,202,158,190,186,192,189,178,142,101,133,149,118,136,111,137,117,126,139,245,198,175,179,141,183,201,200,219,193,192,149,202,206,174,114,173,214,112,125,129,143,94,36,87,167,228,197,191,169,168,176,191,237,213,245,203,212,198,232,147,170,214,131,209,245,210,222,192,239,221,215,220,166,149,103,49,57,65,15,23,9,28,25,10,28,14,22,20,20,17,29,10,8,40,27,33,29,14,30,14,60,7,50,60,41,46,103,98,71,41,74,38,9,34,13,27,49,97,119,152,87,119,113,49,99,90,40,53,21,41,46,37,47,83,116,154,147,143,141,144,141,118,154,114,125,135,115,97,119,111,66,101,126,120,168,131,167,134,147,168,176,185,151,169,166,171,165,144,163,166,150,149,141,153,151,127,139,136,125,145,134,112,86,87,77,52,77,67,67,54,68,46,67,60,64,70,60,37,61,53,70,60,58,58,50,25,59,45,39,54,29,26,20,12,22,8,1,28,15,16,1,49,16,14,9,17,26,18,21,44,2,27,40,87,87,75,51,54,72,47,13,31,31,27,19,10,16,28,34,31,31,13,39,17,21,9,10,14,18,10,13,38,55,89,67,69,78,85,72,57,69,69,61,61,44,75,53,66,104,131,143,157,134,173,136,159,141,97,102,84,80,71,49,65,52,49,49,27,52,30,39,35,34,60,55,52,74,90,67,33,20,29,39,18,43,44,50,54,16,97,141,123,119,92,120,107,93,79,117,100,120,159,235,244,248,221,248,244,233,251,254,250,255,253,244,251,226,253,255,245,246,218,244,224,219,217,207,215,229,217,208,213,246,214,223,213,212,201,218,244,229,211,210,208,217,209,242,232,234,217,234,212,232,210,220,226,242,249,139,0,2,5,8,19,11,0,11,28,29,14,12,2,16,23,23,7,4,31,0,5,20,11,32,203,195,212,200,180,198,189,193,190,182,207,196,192,197,193,181,190,203,174,187,175,193,199,170,182,179,160,214,189,196,211,199,168,199,201,178,208,195,199,192,195,190,210,182,191,169,185,192,199,214,213,196,211,210,170,169,196,188,181,190,202,200,193,204,172,198,202,190,176,195,197,187,196,182,185,211,207,196,215,199,195,186,202,208,193,214,171,187,213,188,184,196,197,196,201,201,166,194,223,216,211,196,198,208,218,209,181,204,194,198,201,200,206,204,190,216,191,206,235,198,206,194,198,236,204,203,210,209,191,199,195,209,203,190,226,202,211,211,198,203,194,219,212,173,211,200,215,192,208,195,186,195,211,212,185,206,220,224,223,219,215,192,206,200,194,201,215,222,216,187,201,195,203,206,219,222,221,190,214,216,195,221,216,207,219,231,207,209,203,187,228,202,204,208,227,217,209,241,209,227,210,218,194,213,225,212,190,204,204,212,223,210,198,215,214,208,207,212,229,202,218,201,199,193,199,215,218,222,205,218,208,214,226,203,212,194,221,226,212,198,211,188,180,189,211,203,210,192,212,187,226,192,215,225,182,197,199,194,205,184,200,200,199,199,189,209,202,204,184,188,199,186,177,174,187,213,202,209,217,159,187,192,177,195,209,180,201,211,191,200,198,179,204,195,194,184,189,209,179,186,202,200,197,201,171,167,203,213,183,173,160,189,210,177,186,145,107,128,123,116,127,100,112,87,73,153,241,183,183,187,151,154,200,192,217,216,168,179,176,142,155,89,153,202,111,116,89,143,137,94,45,152,181,153,193,171,190,169,168,240,209,238,218,234,221,205,131,190,223,119,238,202,225,236,212,250,195,216,208,86,92,74,84,50,0,40,16,4,23,12,18,21,20,7,38,42,37,31,17,10,32,35,6,25,16,5,3,43,31,61,67,42,86,90,99,91,55,50,17,13,30,49,40,86,143,103,98,95,97,108,71,91,75,89,94,80,37,56,72,53,70,82,132,128,120,132,117,132,118,137,137,136,132,103,117,106,89,88,135,165,148,145,155,122,146,131,152,117,142,164,128,116,141,117,120,99,83,97,43,62,36,73,59,60,56,76,45,54,63,54,53,58,58,67,77,68,70,62,80,74,72,52,57,47,44,57,37,33,36,27,28,44,27,24,25,40,52,15,36,48,59,63,38,10,6,36,22,26,17,25,19,6,32,18,26,27,3,22,42,67,142,142,101,69,66,74,39,26,32,3,32,27,6,6,39,26,6,7,14,38,19,15,28,37,27,75,72,24,43,62,33,43,56,48,37,40,71,43,80,76,87,98,74,70,63,87,49,50,54,55,72,93,91,125,136,99,118,138,138,120,128,99,108,88,79,73,58,45,49,36,34,62,54,83,36,50,26,27,24,38,40,69,90,47,41,17,52,127,113,115,103,84,82,98,77,114,123,118,134,135,132,190,206,217,249,251,249,240,219,212,242,246,254,234,204,225,236,240,218,220,206,223,218,201,200,213,211,216,228,238,216,223,245,207,243,235,221,216,236,230,235,223,219,215,244,248,243,242,228,223,206,250,227,246,236,124,3,3,3,7,13,9,17,0,15,20,10,30,7,14,20,22,14,9,10,3,16,22,31,23,203,183,203,212,187,200,210,193,186,207,203,198,173,196,207,197,178,187,188,192,196,157,191,194,218,215,171,193,187,184,185,194,207,180,190,199,201,200,188,209,224,212,196,180,165,172,172,183,183,176,180,213,169,179,199,210,188,188,198,197,179,187,210,199,200,174,191,199,191,177,177,207,204,173,179,191,183,199,196,209,192,201,172,181,188,188,204,187,192,190,204,195,182,215,191,187,212,196,181,211,202,236,173,210,192,204,204,190,192,187,201,204,211,202,206,214,193,213,210,203,212,212,214,208,210,206,194,218,204,210,210,221,206,226,213,202,190,204,197,210,188,209,195,209,196,206,207,212,206,224,210,230,201,187,207,203,190,199,201,215,196,209,211,207,229,194,215,186,215,176,210,217,188,221,203,226,220,210,220,203,230,229,205,215,230,228,212,223,210,189,210,236,214,205,216,183,223,213,233,204,218,186,194,221,210,207,205,221,203,222,224,184,182,224,208,226,218,197,214,197,206,208,205,210,206,215,227,201,216,220,211,219,209,225,199,212,196,190,204,211,214,203,214,215,210,214,202,193,199,206,214,229,203,204,232,204,191,195,224,189,208,191,209,185,171,187,200,191,181,192,172,177,213,191,208,202,215,218,205,179,207,162,191,190,186,189,187,192,200,211,195,213,226,195,226,212,183,214,194,178,174,197,199,206,134,150,202,196,168,148,185,173,193,192,175,122,66,107,110,101,100,78,49,72,57,129,205,184,183,219,131,177,212,211,229,181,158,149,160,140,177,122,184,167,66,82,77,86,78,110,126,198,192,206,183,126,142,120,165,235,194,228,215,234,209,218,130,213,222,135,210,225,220,212,214,235,226,219,119,55,60,71,53,20,10,3,51,24,1,7,9,15,29,23,10,2,24,41,28,19,16,22,22,13,2,22,31,33,83,44,43,75,124,79,69,31,30,5,12,6,44,78,92,87,135,97,66,58,93,110,123,130,113,124,172,114,101,135,102,129,141,114,85,63,48,49,56,72,63,60,42,67,63,44,74,54,59,47,77,76,53,66,65,54,51,55,48,60,42,60,66,38,79,73,79,72,79,85,45,106,76,57,55,102,71,79,59,42,51,53,52,59,43,50,24,44,35,52,33,22,55,18,22,28,24,38,20,21,7,49,15,12,20,5,12,20,52,56,63,83,80,94,71,35,8,24,16,13,36,11,31,23,20,37,4,28,18,15,16,114,183,127,126,143,131,126,67,27,9,28,24,35,9,22,18,17,8,18,30,11,10,28,35,65,78,86,70,56,62,37,48,47,34,18,28,40,37,34,45,28,49,54,59,31,57,73,52,77,50,73,42,74,60,76,102,88,73,65,55,48,75,81,116,128,126,133,141,112,119,106,93,79,94,95,73,67,77,62,41,41,26,55,43,33,74,24,82,135,126,120,122,112,129,121,94,138,136,59,67,46,63,180,238,248,251,236,174,118,129,104,114,123,143,134,104,154,224,214,213,216,222,237,216,214,209,226,226,228,212,205,196,226,233,225,209,229,204,203,232,228,213,232,221,236,229,231,207,242,207,219,232,228,225,207,210,94,1,5,7,15,2,10,16,28,29,9,19,3,0,11,2,7,19,17,16,6,0,27,17,29,206,197,185,198,191,206,201,197,203,204,191,215,215,207,187,196,185,184,221,217,197,184,191,216,175,220,178,197,182,195,179,190,186,202,191,199,204,187,198,189,184,183,184,205,171,176,195,192,181,193,180,196,185,182,196,186,185,171,196,201,177,207,218,201,189,191,198,189,194,178,182,205,176,187,183,193,181,186,197,192,211,179,213,228,186,210,222,208,195,186,191,199,182,197,190,202,215,210,210,203,206,210,209,189,168,209,225,195,212,176,213,204,177,183,180,200,191,199,192,199,212,222,210,230,223,211,208,204,198,184,207,228,211,196,204,194,214,236,217,199,220,225,215,232,204,191,234,203,198,234,217,210,207,203,185,222,214,202,208,192,210,191,216,207,207,235,201,213,198,202,205,217,200,199,215,206,209,216,179,206,206,213,213,226,212,217,207,229,215,188,197,245,195,203,217,216,213,203,187,197,190,224,240,187,214,214,216,198,206,207,218,226,211,204,221,232,199,234,207,213,209,204,199,204,196,221,218,195,189,223,235,228,191,208,234,201,193,216,199,215,217,215,221,190,222,217,187,200,201,203,197,209,185,199,211,203,211,185,200,199,200,210,181,202,205,217,207,186,193,196,195,183,187,183,217,187,189,190,205,186,190,202,198,194,218,180,185,194,195,192,203,185,187,199,190,193,204,190,200,167,172,196,211,195,151,169,205,201,149,137,178,186,175,152,150,103,98,100,68,127,63,42,35,56,95,150,204,171,197,207,142,155,225,205,227,176,162,179,170,169,185,117,142,137,69,74,106,74,61,56,56,203,164,229,152,151,190,143,189,216,190,193,178,220,219,175,134,236,196,151,201,198,216,199,240,218,208,221,57,49,40,45,17,14,36,22,17,14,15,25,19,30,31,10,51,28,27,36,39,13,25,42,27,45,33,17,48,42,43,36,60,75,90,45,63,42,15,3,16,17,43,68,125,117,89,71,21,16,100,65,95,139,107,129,130,124,119,95,109,164,156,148,66,77,83,59,57,66,99,80,79,90,93,80,91,96,60,56,56,106,99,88,85,79,36,77,95,63,95,48,60,69,65,80,55,65,88,80,74,58,55,31,37,46,47,46,46,24,11,17,26,22,21,21,14,20,25,8,19,38,7,7,15,31,14,27,6,13,19,10,23,27,10,3,23,87,82,112,142,134,138,133,105,40,18,7,12,28,8,19,27,14,31,9,21,17,3,47,36,132,171,131,155,137,179,151,78,22,14,5,11,12,19,38,13,7,22,15,0,44,59,17,29,95,110,146,139,118,99,83,80,13,20,13,36,26,19,11,40,9,29,18,33,36,41,46,15,31,38,53,67,61,59,56,73,87,53,50,66,45,45,50,86,103,55,79,73,75,88,84,95,126,139,123,150,127,135,131,125,128,95,85,75,90,82,63,123,176,156,142,111,115,112,97,103,127,113,107,57,70,73,185,224,250,233,99,43,61,49,64,70,64,49,56,22,82,177,192,228,218,208,202,225,214,222,226,233,244,203,241,211,235,220,216,230,224,203,218,220,252,228,207,227,204,243,240,224,217,216,225,230,220,229,240,223,114,6,1,5,6,29,35,22,29,6,14,1,13,12,9,7,22,34,30,10,2,16,16,18,0,201,199,207,204,202,215,183,213,206,185,186,196,200,187,210,199,188,214,195,172,206,198,194,180,184,187,199,192,201,192,190,223,182,177,208,195,206,187,172,214,204,175,205,187,199,202,185,187,183,173,195,192,191,191,220,182,214,194,188,204,190,155,197,205,215,160,206,196,209,184,188,182,177,189,185,186,205,183,204,187,198,204,184,197,201,209,198,186,193,197,197,209,192,191,180,196,189,202,182,168,183,197,214,204,215,215,187,194,190,218,191,215,219,196,200,204,204,194,205,207,219,201,210,215,207,200,211,196,195,219,223,224,225,221,207,171,224,238,225,222,239,227,236,221,221,224,197,205,215,191,199,216,214,214,183,211,211,201,196,192,210,213,205,208,202,215,217,194,226,212,194,212,200,212,214,235,236,214,217,220,215,216,210,212,213,181,223,228,216,216,228,197,228,214,215,176,209,211,209,193,192,211,214,244,213,234,220,186,215,209,204,186,194,206,217,198,211,215,205,201,211,214,200,217,211,242,207,217,236,229,222,221,222,180,212,212,233,238,204,205,238,221,228,201,200,218,204,210,206,215,195,242,209,207,189,206,190,199,208,196,218,201,199,191,186,180,183,189,193,187,191,215,185,210,193,191,210,205,206,191,192,194,188,207,213,186,210,192,188,195,202,174,172,203,188,183,205,222,216,189,193,187,204,157,154,191,175,214,143,131,172,162,180,168,150,114,113,67,54,67,47,10,25,74,68,169,213,174,211,212,159,150,224,221,195,182,181,188,154,196,194,119,107,130,94,125,100,84,66,39,5,122,161,195,156,200,194,163,218,222,209,206,216,214,221,148,165,245,196,152,233,211,206,232,227,201,233,163,25,51,61,23,25,36,10,17,5,25,20,3,20,22,22,1,16,30,29,22,19,31,18,15,19,41,18,56,11,28,54,66,68,100,61,32,56,38,2,26,19,44,76,104,108,101,141,116,6,56,114,91,79,86,56,90,63,69,47,81,105,122,85,51,34,38,20,58,82,59,42,71,64,66,61,97,122,124,118,72,42,47,41,55,45,40,40,45,49,64,39,61,54,38,75,39,41,27,24,12,18,28,5,19,44,12,43,24,31,18,40,31,26,41,52,18,8,16,6,60,25,7,26,7,6,17,9,12,6,31,24,42,32,22,31,11,63,170,193,179,174,206,199,194,144,62,18,28,48,31,33,29,15,22,15,40,21,3,33,14,69,132,152,180,163,127,176,151,106,39,19,22,20,11,15,33,30,13,31,24,54,19,6,28,52,140,156,147,151,182,155,155,95,32,6,40,19,37,10,3,14,13,17,38,26,33,13,24,25,18,36,20,29,41,20,43,44,38,45,31,42,72,68,73,69,91,76,80,68,48,84,57,66,97,74,51,43,59,103,107,127,123,127,155,132,143,135,139,138,157,153,121,107,91,104,134,119,130,105,99,140,109,204,235,233,251,178,58,11,16,73,73,23,31,44,24,1,78,204,222,214,233,203,201,241,221,219,217,218,220,220,217,191,220,193,206,231,206,205,198,212,205,211,206,192,216,222,237,230,217,233,237,237,243,238,210,216,123,3,23,5,8,11,5,17,0,27,29,31,13,17,8,15,34,20,23,19,3,26,18,4,14,198,204,200,184,191,221,208,228,179,177,193,191,199,205,203,214,213,205,168,202,220,206,187,204,208,200,201,209,188,166,198,187,174,195,201,177,182,190,188,169,186,195,190,192,188,179,201,203,185,189,185,224,205,187,220,205,215,218,210,181,181,202,175,206,170,199,179,207,186,176,194,212,183,186,193,196,202,170,174,191,227,193,219,223,226,186,183,210,214,176,201,207,208,189,202,203,213,205,169,200,198,222,180,200,199,191,189,205,196,214,190,227,219,201,203,198,196,190,224,202,202,195,202,203,199,191,220,185,193,204,214,244,208,190,220,201,193,189,226,211,210,198,219,198,197,214,194,219,214,222,209,195,199,191,209,226,189,211,187,233,167,214,202,228,218,194,208,210,202,187,183,213,211,210,207,186,206,207,204,221,185,192,218,212,209,220,219,227,201,212,214,229,214,230,189,195,216,199,213,199,209,212,216,241,213,213,216,196,213,217,214,209,219,220,202,211,214,211,239,213,219,217,207,223,194,211,230,218,230,205,226,191,198,215,208,184,215,193,210,184,213,212,219,169,203,223,189,217,213,213,200,204,165,200,202,208,194,177,213,222,213,162,196,198,203,181,193,203,205,192,196,179,203,204,172,214,215,206,209,169,201,216,198,195,200,201,182,187,178,170,206,212,188,181,207,193,199,205,244,188,185,194,210,174,187,228,200,190,146,160,204,198,174,160,166,145,123,82,109,93,51,52,59,128,157,240,227,199,198,208,156,145,221,231,193,160,216,215,142,153,183,136,174,176,110,99,103,68,56,56,18,91,182,217,153,199,156,162,232,224,211,195,207,227,196,157,190,236,181,152,233,231,237,239,210,185,144,100,76,66,32,40,29,44,13,13,14,33,26,18,30,21,6,49,18,19,25,3,19,4,56,22,19,41,24,23,37,30,82,67,88,46,61,55,29,36,18,34,59,72,123,103,121,92,99,76,20,83,111,129,90,91,51,124,92,34,77,26,56,89,129,49,10,96,62,49,120,82,27,12,53,28,21,93,96,73,86,59,17,38,34,39,27,20,11,24,29,6,54,18,36,12,27,30,28,22,18,19,1,5,31,34,17,15,18,53,65,58,84,70,66,80,97,47,16,31,16,2,26,15,20,13,19,33,24,26,5,22,7,19,8,11,23,4,81,208,200,186,167,178,182,186,171,70,3,11,15,27,6,25,16,36,11,26,19,33,16,33,60,104,141,168,140,140,144,158,147,51,19,16,28,19,28,20,45,27,16,29,24,29,50,21,55,140,156,163,167,146,176,175,164,49,14,11,10,38,16,25,9,9,14,7,9,11,17,11,23,18,15,19,29,41,32,22,24,36,20,21,29,33,48,34,40,52,53,64,57,45,50,66,92,62,79,64,85,71,59,61,77,64,59,86,70,68,114,86,72,85,72,68,45,87,88,65,96,94,128,106,119,99,140,200,162,172,91,60,29,69,57,86,66,57,62,65,21,152,243,221,219,213,204,226,223,228,199,212,233,208,220,236,198,207,219,212,228,215,206,221,222,206,231,213,206,205,210,230,225,224,223,214,199,216,233,210,236,130,1,0,10,18,11,16,30,0,2,21,10,13,0,0,10,26,16,25,4,20,16,18,13,12,213,197,190,173,228,202,212,189,190,218,195,233,192,181,200,197,189,207,201,174,212,224,210,183,211,184,201,186,199,206,167,191,199,180,193,218,187,177,194,199,217,199,196,191,191,209,209,214,203,194,198,213,182,168,192,200,193,198,184,199,208,181,204,191,194,210,177,184,193,187,182,205,208,179,178,201,186,224,178,196,196,205,207,203,208,194,207,215,215,182,204,206,182,199,207,195,209,212,168,210,231,200,188,189,198,210,212,200,193,221,212,186,198,224,214,193,215,187,203,197,200,207,214,199,216,209,211,214,212,202,241,207,193,191,211,198,207,188,206,225,228,207,226,216,191,206,198,220,204,220,190,232,194,204,195,208,187,179,196,205,202,193,204,178,204,209,220,204,204,223,238,209,195,209,232,194,203,213,204,219,204,206,197,229,208,225,217,191,205,215,221,208,211,219,199,223,207,235,240,235,213,226,187,216,206,224,211,212,220,217,213,194,241,195,225,214,191,209,205,192,220,208,220,220,212,184,199,243,209,192,203,208,213,204,218,209,187,212,199,196,214,187,195,228,224,215,204,232,217,201,183,201,215,229,196,219,216,209,197,205,218,184,188,197,191,226,200,208,182,192,201,179,148,184,179,187,179,200,213,185,212,186,195,187,203,210,231,163,206,215,197,200,184,187,190,194,193,198,227,187,194,175,215,143,191,180,176,219,151,197,198,184,190,150,160,150,161,112,121,56,65,38,101,168,203,236,209,171,164,193,168,158,183,200,170,181,238,151,106,125,204,149,198,157,77,129,88,91,104,64,53,158,250,244,168,157,133,164,202,221,218,247,208,224,182,137,186,205,138,156,147,243,237,230,170,99,60,43,75,44,26,50,21,11,24,18,32,21,20,13,24,45,17,43,34,20,20,44,26,8,8,25,31,26,60,51,35,68,80,88,59,58,60,20,31,46,42,66,61,121,84,135,126,90,108,79,44,96,130,87,117,117,123,130,61,61,61,44,97,150,121,59,80,129,135,71,111,129,47,23,21,24,66,127,95,92,113,38,22,32,24,40,40,33,29,33,4,23,34,5,14,36,16,11,3,22,10,20,51,17,21,29,14,21,8,78,121,148,139,168,167,160,199,74,20,34,24,29,13,25,14,19,26,32,11,2,10,22,19,20,17,30,12,16,95,166,151,161,110,96,125,119,105,24,28,42,10,31,8,43,28,17,18,20,52,34,35,17,63,68,138,158,132,136,135,165,182,94,7,25,22,34,28,37,37,37,20,31,23,46,45,22,68,144,170,116,167,163,210,175,190,110,29,9,18,17,24,22,38,33,20,24,19,15,22,3,29,17,27,3,8,38,49,59,63,32,15,16,49,50,17,17,41,40,16,45,55,54,27,27,66,41,26,68,67,92,70,90,76,80,85,61,75,61,94,70,73,58,49,71,77,66,78,58,45,33,55,58,70,55,43,39,30,45,42,13,18,56,45,93,46,50,63,35,40,218,244,232,245,217,220,212,217,214,204,219,233,208,208,196,239,224,206,201,221,221,218,191,233,230,225,230,224,230,210,217,221,204,197,223,233,233,224,249,222,119,15,12,23,18,17,32,22,14,31,4,13,14,6,7,12,3,17,8,6,0,19,20,11,11,224,218,231,216,195,212,228,190,193,196,192,202,206,201,191,181,222,205,211,184,204,166,190,178,179,192,188,181,199,192,184,189,190,192,183,189,188,202,204,202,184,203,213,200,197,206,220,198,187,178,202,202,191,183,177,192,196,203,193,197,210,200,198,190,192,228,210,181,181,196,192,180,183,209,193,194,197,199,171,179,190,198,196,216,180,229,191,179,195,202,209,203,209,213,190,210,205,187,210,200,214,183,210,240,211,207,214,206,222,195,199,202,206,201,207,210,211,209,202,206,181,192,191,176,219,191,216,220,226,203,192,226,217,221,202,194,212,211,196,208,217,204,211,196,216,217,224,223,204,198,212,211,206,212,203,220,204,199,228,218,218,202,211,209,207,195,219,220,218,194,225,213,229,206,226,213,193,202,190,196,205,177,210,207,222,217,236,209,211,216,206,210,205,206,208,206,229,232,207,224,211,202,232,215,243,215,195,223,231,198,213,206,227,232,208,212,218,224,208,223,234,201,202,199,228,203,194,215,211,214,214,207,232,199,197,211,196,200,202,213,210,213,202,230,229,202,220,210,213,218,224,226,199,188,235,226,188,210,220,200,208,198,201,216,206,210,178,185,222,185,206,163,175,208,228,205,207,195,203,204,206,190,203,204,212,200,206,202,187,203,209,182,237,187,191,198,191,194,198,179,202,164,189,136,176,177,174,170,151,215,225,164,143,127,144,157,158,142,107,38,63,48,104,203,218,241,201,166,194,228,179,159,194,205,168,213,168,128,149,180,209,155,167,97,80,109,92,87,81,20,51,221,225,204,140,152,172,160,217,206,196,216,227,241,145,158,202,77,70,90,125,197,241,209,104,65,58,31,65,33,22,7,27,10,15,51,27,24,18,11,27,15,15,23,29,17,54,22,7,26,35,21,27,57,34,45,63,61,91,71,83,56,48,12,65,95,77,96,82,100,115,130,91,88,126,123,118,146,119,74,117,113,119,96,57,77,87,74,97,83,120,136,71,159,148,88,82,161,117,44,35,48,138,175,93,108,94,49,29,59,26,48,52,57,21,20,19,14,10,16,12,22,13,19,11,35,5,11,5,8,29,23,30,20,25,189,187,210,228,223,180,190,214,105,11,23,15,8,14,22,16,26,17,24,20,16,19,32,23,14,8,23,0,31,92,110,67,68,35,38,60,71,87,31,15,6,36,26,40,17,12,66,14,33,47,32,19,29,31,91,147,190,139,145,166,133,188,71,11,17,10,18,29,43,24,21,38,19,19,15,26,20,89,123,113,128,109,116,160,140,174,99,29,21,35,24,12,9,42,23,4,33,14,17,40,19,19,8,25,28,27,21,91,72,54,95,73,45,59,59,37,14,1,31,18,19,18,9,3,27,24,19,39,37,58,14,29,50,47,59,78,56,77,57,41,83,64,70,98,115,140,142,110,90,36,22,8,40,63,60,64,53,55,32,39,14,63,72,55,76,73,59,68,15,133,228,219,232,247,231,202,209,221,198,204,221,229,217,220,226,218,230,208,221,229,211,243,223,248,206,228,203,228,229,227,204,216,235,227,230,202,223,210,226,230,134,1,15,2,12,17,14,14,13,4,20,39,39,2,1,19,1,0,12,11,5,19,3,25,7,207,196,219,195,199,196,192,196,184,200,190,185,190,204,196,191,187,212,225,209,203,216,204,205,178,200,219,194,219,218,196,200,213,180,178,200,194,181,211,194,201,193,202,195,205,195,207,207,205,220,193,206,191,211,209,225,194,194,189,176,200,194,199,192,193,204,202,212,230,189,171,211,210,204,199,216,199,186,223,206,207,191,192,215,195,204,191,223,213,212,201,218,189,191,210,201,204,209,197,185,206,205,207,216,196,212,201,199,196,190,199,185,199,208,192,214,211,221,180,201,186,189,210,209,197,214,206,213,221,230,201,222,205,215,229,233,205,200,223,199,185,173,208,197,205,205,221,206,214,216,214,214,207,199,204,207,197,202,220,216,214,205,213,209,177,216,229,207,189,228,198,209,210,213,196,206,242,208,192,210,237,221,211,202,207,200,217,203,227,224,210,212,215,224,219,208,221,212,211,202,222,227,207,222,200,214,205,192,204,222,206,229,178,220,205,203,216,211,219,201,200,212,226,206,220,216,206,222,186,208,208,199,217,208,193,202,215,211,218,221,208,210,222,220,193,239,236,208,212,207,204,218,197,220,225,218,204,195,213,193,190,202,203,208,206,190,187,193,205,214,210,205,180,203,219,213,197,205,205,222,210,211,201,218,197,210,199,198,207,220,193,225,197,182,199,191,201,202,190,172,198,198,173,152,183,192,186,213,143,207,198,150,152,137,145,194,182,139,86,72,41,49,91,211,211,245,219,163,226,167,105,113,173,163,187,218,165,174,185,230,161,109,170,125,87,129,64,101,62,51,53,172,203,134,152,187,168,189,232,204,191,199,178,219,146,176,189,50,45,74,83,148,240,202,46,37,62,69,8,15,35,22,21,24,23,17,23,38,20,10,1,14,23,7,11,35,26,17,2,32,20,30,18,45,38,51,61,73,67,77,53,28,32,62,144,203,132,113,100,103,137,111,119,119,140,195,135,113,38,84,111,103,77,110,57,96,75,67,88,47,91,120,158,176,196,81,101,138,129,68,45,39,144,132,93,106,71,37,76,69,97,84,142,109,27,41,15,12,31,26,24,31,3,9,16,31,46,11,27,6,31,32,23,33,37,170,189,190,190,171,178,176,192,128,6,20,9,13,21,39,11,9,16,20,34,17,20,31,30,25,10,21,24,31,98,112,45,84,41,51,29,38,30,27,44,11,48,11,31,17,25,36,28,62,11,53,14,16,36,82,118,148,105,148,135,156,169,74,31,22,37,31,18,19,11,7,25,14,36,43,34,45,113,129,92,71,48,61,121,88,117,92,17,15,13,27,18,19,24,17,31,8,54,15,8,28,36,17,33,11,14,40,137,169,152,141,135,131,129,98,40,25,3,38,18,18,15,37,10,13,32,10,15,24,19,18,16,33,39,42,36,43,25,42,41,25,47,47,37,51,115,128,106,96,110,137,147,163,105,117,75,68,70,70,89,87,112,106,116,120,127,101,81,75,157,227,229,207,230,205,214,220,222,216,209,214,221,201,215,197,240,232,195,221,235,202,230,226,225,219,193,232,205,239,186,218,213,205,230,251,245,199,235,220,237,124,7,12,10,8,13,15,32,0,4,21,18,2,22,1,4,14,0,5,1,20,20,22,10,37,190,204,225,205,180,197,205,211,187,212,194,168,174,186,203,203,224,196,191,213,185,223,202,204,199,193,189,201,223,209,180,202,207,199,197,195,199,182,190,222,198,194,207,202,194,210,197,205,198,192,199,220,197,201,187,197,212,194,219,206,220,219,199,193,206,169,192,194,191,203,197,206,196,174,216,202,212,199,198,203,225,203,181,181,209,193,207,200,215,209,198,200,198,210,208,228,198,205,198,198,203,157,216,225,204,221,182,176,202,218,217,210,195,203,214,211,201,204,207,196,204,222,229,205,217,203,214,195,200,187,211,201,220,179,174,202,216,214,197,203,212,220,200,207,222,202,218,220,208,212,202,179,226,197,201,216,214,210,222,210,204,203,214,221,215,190,212,195,199,196,212,212,235,217,211,208,227,219,206,195,206,195,208,195,215,220,183,192,229,199,220,224,195,215,204,231,217,221,201,208,209,218,190,224,211,222,203,187,189,209,216,197,207,206,207,205,215,231,204,196,192,213,215,219,234,198,207,221,225,194,228,206,225,198,191,213,232,216,235,205,221,194,237,169,136,179,205,199,210,207,210,210,219,193,198,191,207,229,196,182,188,196,222,212,218,183,186,220,204,210,187,184,192,193,195,237,192,199,222,190,221,218,195,205,202,219,198,230,222,197,207,210,195,205,170,213,201,191,210,199,193,221,205,148,197,217,164,190,138,147,113,95,166,166,161,178,200,155,114,42,34,31,90,167,191,244,194,173,195,169,99,137,80,162,223,198,154,182,213,209,129,88,202,177,143,105,88,78,52,68,84,173,199,176,175,221,179,197,242,211,223,209,228,185,115,188,151,48,124,96,96,148,220,145,36,62,67,42,26,11,16,32,31,12,27,18,37,23,21,34,52,13,33,37,14,28,15,29,33,12,34,46,29,65,61,65,67,67,49,76,38,7,54,144,171,170,108,106,130,128,104,113,80,125,150,179,156,116,47,62,113,129,95,126,109,118,84,82,81,23,74,106,173,206,152,60,56,164,144,94,0,33,151,116,99,99,101,78,129,143,165,179,191,188,27,0,12,28,24,16,10,31,27,32,35,16,32,24,23,30,14,4,23,45,58,173,132,106,97,78,79,103,111,58,15,17,16,13,3,22,7,29,27,1,24,26,21,39,33,38,36,41,24,60,84,133,57,42,53,23,58,43,61,11,42,24,30,29,38,20,48,47,63,17,38,37,16,23,42,74,80,104,93,107,96,68,102,51,42,19,27,31,26,32,51,48,35,58,22,17,24,48,148,127,77,62,71,36,85,45,51,34,24,30,51,31,23,28,36,17,19,22,11,49,0,41,31,63,45,32,41,25,171,183,162,194,189,184,178,141,86,27,24,48,5,0,22,18,30,33,27,15,22,18,9,10,3,4,41,18,27,15,45,45,24,57,34,10,29,48,83,108,103,159,252,247,237,158,93,59,4,21,54,85,99,108,90,117,94,109,124,135,124,45,144,199,218,239,216,226,231,215,242,218,201,204,209,230,231,208,204,234,200,218,215,190,241,210,189,218,218,211,222,227,196,245,226,215,221,216,232,224,226,241,225,97,15,20,5,16,10,6,3,14,13,17,9,12,4,8,24,5,6,2,6,12,23,29,14,30,223,197,193,208,194,198,187,205,203,200,213,192,204,198,179,197,190,208,199,192,216,193,181,201,206,195,222,184,192,206,213,214,208,180,201,198,193,218,194,195,189,188,201,218,212,198,173,202,200,207,210,202,194,184,199,203,184,187,194,213,198,191,188,203,186,213,216,198,211,196,183,181,193,217,197,201,210,177,214,204,200,190,203,183,206,182,188,211,191,207,205,197,176,216,201,203,200,203,166,204,189,213,194,207,201,169,194,217,220,192,193,195,203,200,204,216,196,205,218,212,207,202,225,195,211,208,216,206,218,199,226,204,212,204,206,220,235,216,210,222,212,234,179,207,202,193,206,207,216,190,192,181,167,209,207,194,210,195,206,206,204,209,238,231,198,210,210,181,188,191,214,209,201,225,209,202,208,209,214,210,225,203,213,221,207,218,213,201,214,217,210,211,199,206,219,226,227,202,221,217,213,210,194,194,217,185,244,208,176,218,222,202,219,226,206,201,199,195,196,192,198,187,211,202,216,203,207,189,216,218,195,190,210,214,201,196,189,211,192,221,201,223,210,172,118,162,186,181,220,209,207,208,202,223,212,218,187,203,211,201,193,194,202,230,184,223,193,205,221,212,220,211,193,196,213,206,228,212,207,198,197,210,226,216,182,224,230,194,204,196,195,210,215,197,193,215,206,189,191,220,205,237,188,141,203,203,163,164,97,133,90,48,146,130,156,183,134,128,86,57,35,7,96,180,198,214,213,196,192,179,153,158,85,109,239,194,176,134,173,202,128,139,212,163,121,106,114,106,59,56,61,179,180,182,194,236,182,201,222,196,222,219,224,181,130,218,134,71,108,75,89,180,229,168,73,92,26,5,20,10,1,28,23,22,22,30,47,35,25,21,45,33,19,14,31,43,34,16,23,26,55,25,39,56,57,69,43,31,80,51,23,1,92,106,94,121,49,59,133,99,92,89,127,93,87,100,191,153,77,99,94,107,121,138,143,128,104,52,43,26,51,49,82,101,47,15,70,179,107,54,27,79,162,94,117,112,110,150,166,183,184,179,189,163,30,23,5,34,35,14,19,33,7,17,34,33,22,15,19,20,28,26,15,33,74,103,56,56,48,45,57,57,86,21,9,5,4,26,12,20,21,53,30,31,42,19,23,21,17,39,30,25,33,40,99,81,66,69,53,80,56,93,60,39,48,42,24,42,36,37,40,70,62,34,54,42,44,38,64,60,94,62,46,57,20,30,47,34,13,24,15,29,19,42,40,39,22,34,13,46,21,58,123,99,54,38,51,63,48,31,52,34,5,32,29,18,37,49,26,8,22,1,25,11,18,41,12,6,15,2,27,48,140,142,157,166,194,176,183,193,160,41,15,14,24,4,35,27,17,37,32,25,25,4,30,35,21,23,12,19,30,41,67,72,76,73,67,68,35,32,101,114,134,184,251,244,216,122,55,53,5,69,107,83,80,41,28,54,106,78,114,105,120,67,70,206,228,229,235,225,231,226,217,219,228,208,232,218,198,242,208,201,214,200,208,215,232,217,216,217,212,234,233,213,222,191,209,207,208,190,197,232,217,223,215,131,14,11,7,13,29,7,4,17,7,11,17,14,0,1,0,1,12,2,17,0,21,10,6,14,192,198,175,213,213,196,206,198,204,197,205,183,216,228,194,186,180,208,195,212,201,187,165,199,201,202,238,209,199,210,208,189,194,175,196,204,209,184,188,203,206,195,211,186,199,226,185,197,197,210,202,210,209,207,202,182,200,196,188,202,206,170,196,235,204,197,189,201,215,212,191,206,197,205,233,224,219,224,193,199,193,191,197,192,214,212,200,197,213,190,193,227,184,201,221,212,206,171,198,190,192,214,211,199,209,199,193,216,211,225,216,194,217,223,217,200,193,213,197,221,179,208,202,206,215,212,219,215,187,206,200,195,205,188,193,214,201,215,204,199,230,206,213,205,202,210,219,206,224,180,230,216,199,205,208,195,227,199,217,199,208,203,199,213,201,178,214,218,199,213,208,219,184,216,219,212,194,215,213,189,211,192,201,208,212,204,224,211,209,226,207,195,192,207,196,199,230,185,211,224,216,228,212,198,233,205,233,209,209,217,198,233,192,203,215,211,211,235,199,206,214,196,200,191,193,219,217,199,184,201,219,197,204,208,197,202,204,198,207,213,204,201,221,184,197,201,212,185,228,210,205,220,184,209,173,214,212,196,195,200,196,217,208,205,199,196,216,220,220,216,188,210,222,238,201,207,181,212,243,202,190,227,223,204,191,217,195,215,201,203,206,219,205,226,214,235,193,181,214,209,212,206,177,145,199,229,190,177,114,160,120,104,151,125,156,168,155,125,80,51,39,53,138,203,201,219,193,166,212,219,213,202,115,134,194,209,141,142,182,218,156,119,188,151,112,129,86,87,62,48,51,170,237,182,160,241,184,205,216,194,217,203,239,176,139,226,189,110,136,72,89,241,253,161,75,26,34,24,12,14,5,12,28,37,41,21,22,14,15,56,2,25,44,20,10,16,29,31,24,37,53,36,71,68,88,85,64,52,49,15,26,32,87,85,64,96,49,102,57,98,95,78,108,106,74,71,154,190,154,87,72,95,72,102,120,90,95,39,22,62,21,42,3,26,38,45,118,129,59,23,7,76,139,81,65,68,99,137,121,124,134,131,150,111,28,29,0,8,19,11,5,30,32,19,33,41,9,44,50,52,30,34,20,38,100,129,76,45,42,40,47,38,44,26,34,36,30,8,7,11,28,25,39,49,15,57,30,60,62,39,75,62,66,77,164,174,114,144,121,173,142,158,133,152,115,157,158,138,165,173,177,180,163,197,156,190,157,180,166,171,146,118,52,49,34,19,36,59,26,50,59,62,58,48,50,41,37,26,54,38,35,62,110,90,102,58,44,52,76,45,69,28,27,20,3,36,20,17,39,32,13,21,23,4,7,28,31,8,30,30,28,50,122,125,104,139,107,147,126,184,123,54,15,36,0,19,14,25,30,16,42,11,20,12,36,20,56,21,35,10,8,74,156,185,177,110,110,131,76,137,121,132,100,178,235,217,108,35,45,29,73,130,135,71,57,64,46,79,92,79,80,79,77,74,108,207,240,244,224,219,220,227,230,199,212,204,212,216,203,204,211,199,223,224,222,212,207,199,227,200,215,204,228,205,202,223,217,219,211,234,212,242,206,220,229,130,6,5,8,23,4,17,16,17,1,16,19,16,11,12,4,2,2,0,18,1,10,4,24,10,204,191,207,185,213,183,224,183,171,185,191,230,221,206,199,197,188,193,197,181,194,203,204,192,192,210,194,198,194,214,216,190,182,185,204,190,206,188,207,178,205,193,217,209,207,196,211,205,198,213,199,176,179,186,205,184,213,223,207,202,168,199,223,210,205,200,193,176,191,193,202,184,199,210,205,201,207,211,197,192,170,214,220,210,211,216,193,207,200,194,184,201,193,212,223,230,213,206,205,197,168,219,226,200,179,202,209,211,225,218,202,206,201,213,204,198,208,212,209,195,207,200,199,195,208,221,234,207,221,209,224,203,209,230,183,204,227,212,228,186,211,195,209,192,232,209,214,212,210,201,218,197,218,216,215,215,202,215,191,234,231,188,188,202,204,210,213,187,221,211,201,197,203,204,224,222,225,215,198,209,206,198,231,226,192,212,214,216,202,193,217,197,205,207,208,199,196,211,213,211,211,228,224,218,242,211,220,221,198,224,195,212,203,208,192,213,209,191,234,200,189,217,222,193,208,207,217,185,198,210,195,209,208,208,204,198,201,231,213,194,204,221,231,216,233,216,228,223,219,214,229,205,184,199,184,237,185,213,217,200,212,207,208,182,213,199,215,194,213,217,193,220,202,208,204,191,205,209,201,215,193,210,217,211,205,196,203,204,211,199,200,190,226,222,196,202,214,217,197,221,222,218,172,193,230,187,149,200,151,211,161,123,205,192,207,203,197,148,80,77,68,61,134,211,186,212,176,192,203,207,225,210,103,133,142,143,200,180,216,208,146,145,242,145,99,117,99,109,55,16,25,102,166,131,175,212,163,185,228,213,224,212,223,144,189,243,175,114,115,132,158,254,228,131,52,30,10,12,12,12,11,21,23,25,12,22,9,8,23,25,0,33,14,12,11,40,42,3,36,48,63,50,77,75,47,74,54,50,11,20,12,52,120,41,65,119,69,101,80,100,99,108,114,123,101,78,142,211,195,144,80,70,66,95,90,88,86,62,31,19,39,6,28,10,37,128,142,69,16,38,10,52,125,79,100,72,54,69,27,56,35,38,73,22,26,32,11,13,32,49,25,34,29,41,43,10,38,56,31,25,31,24,24,23,104,93,51,70,61,38,39,54,65,23,44,39,41,23,39,77,67,80,64,109,105,137,144,188,166,194,215,218,214,195,199,180,195,174,153,173,137,151,169,162,201,206,169,188,175,187,158,164,176,186,137,178,167,183,156,149,138,106,94,87,83,110,108,84,100,112,182,163,183,145,129,103,120,89,70,60,58,91,148,189,140,132,138,113,122,118,79,37,24,26,24,19,39,37,18,56,36,11,33,13,25,42,39,1,23,27,9,47,138,128,63,50,60,54,108,95,68,52,14,13,20,32,27,39,9,27,24,39,13,16,22,11,29,16,15,30,55,88,193,205,228,204,226,212,167,190,156,99,82,161,198,75,24,27,25,46,132,156,61,62,118,122,89,89,65,86,65,53,88,76,97,228,229,229,255,231,229,236,216,208,219,192,227,218,219,205,213,211,214,229,235,222,228,222,205,209,214,203,219,220,203,194,216,222,214,208,243,210,219,213,218,113,11,35,6,12,24,22,13,12,30,27,12,26,14,13,7,27,4,1,24,6,10,46,4,3,189,202,190,195,209,202,187,195,186,208,202,198,168,190,194,212,219,207,203,192,184,195,190,217,201,191,195,187,197,173,200,216,196,205,207,195,188,184,177,204,185,178,195,230,222,205,193,195,199,201,208,185,218,221,199,187,189,220,200,176,189,198,202,210,187,198,191,194,183,221,202,224,197,201,205,219,203,221,217,184,203,207,189,198,194,198,203,181,218,176,215,208,207,209,197,196,206,184,195,193,194,203,195,205,212,193,196,214,208,189,190,204,191,185,214,207,169,193,210,204,197,211,200,208,179,200,183,191,195,210,179,196,185,206,211,230,205,215,218,192,204,202,213,221,206,203,180,228,213,193,182,213,202,217,190,216,221,209,182,221,208,215,198,221,215,202,200,205,200,204,201,222,205,211,199,203,214,202,204,204,195,213,207,225,215,200,181,213,196,208,221,231,211,215,220,201,203,216,213,218,198,182,212,204,203,197,205,229,216,201,228,208,224,210,196,207,201,209,214,211,237,177,216,216,193,202,221,210,203,194,223,208,230,215,203,198,213,195,207,220,193,230,209,214,222,203,232,200,196,199,189,200,207,204,218,213,209,225,222,201,205,195,192,224,189,205,219,179,185,209,227,220,216,203,198,200,220,220,195,224,201,225,209,216,220,182,185,191,220,218,195,199,217,200,219,196,246,197,203,215,217,210,165,183,218,217,173,207,192,207,157,156,181,178,204,167,214,195,93,42,38,29,152,252,172,227,177,202,200,210,242,179,130,139,161,157,168,207,159,163,153,165,238,131,105,75,72,88,80,74,7,111,192,174,188,223,168,217,233,216,216,227,197,139,173,231,200,127,111,69,150,212,135,75,39,7,13,45,30,20,60,5,2,6,10,28,17,8,18,12,9,21,10,22,28,20,27,35,48,62,26,54,55,97,68,65,27,41,23,23,32,80,80,15,83,127,84,81,113,129,108,114,107,99,129,110,122,148,180,148,154,88,76,89,95,60,98,43,27,19,64,33,60,65,103,164,105,30,12,25,0,88,112,89,110,48,53,67,56,46,35,59,59,22,17,23,36,12,26,60,37,41,37,49,38,41,23,50,53,35,38,41,30,62,124,144,83,126,53,80,118,105,91,69,91,134,100,168,192,202,202,224,218,211,198,177,202,202,210,193,174,188,191,141,131,77,123,85,58,70,72,59,61,62,71,96,100,65,67,59,49,79,44,63,90,72,62,58,71,72,38,47,96,68,77,76,106,86,120,128,143,144,153,155,185,185,195,200,186,160,175,119,203,186,182,163,145,135,142,166,139,54,36,58,33,34,16,35,28,11,39,38,40,20,40,40,19,47,25,42,21,102,93,97,45,44,42,71,46,64,63,34,12,27,16,24,30,25,22,24,21,61,27,38,39,28,13,27,25,24,18,79,151,182,190,191,203,179,190,194,135,121,87,153,140,21,40,39,40,76,134,130,46,65,140,105,129,123,84,103,31,41,63,38,43,171,209,205,227,235,236,233,206,175,220,226,215,182,181,217,217,205,215,200,214,217,213,208,219,220,197,197,214,206,223,216,232,221,222,229,221,182,230,227,216,119,19,0,9,14,15,6,12,21,43,2,6,13,13,14,24,7,7,19,10,1,1,10,24,6,204,187,235,198,191,201,185,188,213,192,199,219,202,192,196,187,204,208,174,173,196,229,196,202,206,203,200,221,202,206,226,215,169,194,192,209,210,196,179,178,192,208,202,190,198,199,207,185,206,195,197,213,210,192,190,194,196,211,210,215,204,188,212,191,194,202,213,181,197,187,204,216,192,203,211,184,212,210,210,193,220,211,213,226,205,185,210,202,208,186,200,198,182,194,224,203,200,183,200,215,189,199,190,198,193,201,216,213,221,199,197,216,197,223,213,164,194,210,197,204,198,197,220,209,206,200,177,202,235,196,212,206,220,209,213,211,205,201,218,191,188,201,201,208,193,207,206,201,188,221,203,196,197,229,220,180,197,191,223,192,187,189,199,203,225,220,187,208,229,218,206,207,223,216,213,200,202,194,222,229,224,203,196,235,225,208,187,221,213,181,225,197,201,196,215,204,216,204,218,217,212,204,216,223,219,209,198,203,208,174,190,198,184,204,176,203,221,218,209,223,200,237,208,224,209,217,210,211,202,220,231,219,221,196,241,204,188,211,223,211,221,176,207,193,219,215,232,180,217,222,210,213,199,211,185,192,187,216,198,195,204,190,236,195,195,218,201,225,204,211,204,198,181,213,212,218,214,200,233,199,200,198,239,210,212,181,213,187,213,231,203,188,206,178,218,200,214,199,206,214,199,209,172,192,229,203,191,200,191,163,145,123,146,125,151,173,217,171,102,75,25,45,125,241,218,195,170,198,207,192,181,202,196,160,182,207,172,187,142,186,135,177,236,152,98,60,93,113,99,46,50,188,213,206,205,208,154,223,223,202,202,214,153,125,163,213,175,114,100,69,66,90,67,36,8,19,17,17,41,34,17,25,41,13,26,27,8,16,13,39,15,5,18,29,26,18,18,71,17,28,66,85,64,70,75,49,36,19,53,78,71,155,103,63,122,130,111,114,103,133,119,130,98,119,110,112,110,143,132,147,109,72,68,74,66,37,60,54,46,33,34,41,101,140,114,44,7,23,33,14,30,85,116,74,120,50,73,41,38,34,50,66,76,57,12,16,35,20,25,33,32,54,51,50,20,67,84,64,84,72,68,82,96,144,179,172,172,201,168,200,173,179,183,161,176,182,176,196,199,169,163,149,152,121,113,113,69,63,91,67,56,64,68,71,54,72,60,41,60,75,44,63,77,105,79,90,70,79,73,72,98,82,87,83,41,84,75,54,72,65,66,74,94,62,52,72,56,44,60,58,65,59,83,70,63,96,82,92,95,123,127,127,122,123,92,100,105,111,124,158,154,110,117,138,143,86,67,82,59,58,58,41,44,18,68,44,36,40,34,36,22,113,122,104,63,50,47,42,54,30,57,41,37,17,26,6,18,25,27,6,25,39,37,6,14,5,20,35,47,16,10,46,134,143,123,136,121,154,167,121,121,105,85,127,102,33,40,32,3,80,151,70,58,55,70,79,113,109,56,68,66,65,79,40,20,117,112,138,138,164,202,237,226,195,196,214,213,227,215,213,191,195,235,218,220,221,199,209,210,220,197,208,237,223,237,222,215,206,220,189,223,214,221,220,229,119,30,6,1,9,27,10,16,5,54,22,0,25,9,2,10,11,27,13,25,4,13,6,2,13,218,217,191,197,204,185,208,226,174,217,204,205,185,199,205,189,197,180,186,202,199,195,206,198,205,207,191,190,204,203,215,213,212,235,213,184,205,192,219,189,211,191,233,215,180,210,195,225,194,233,220,200,209,202,188,199,226,201,189,176,204,172,210,205,219,202,226,203,187,193,210,206,220,196,219,195,192,220,211,192,207,196,189,217,179,212,196,191,202,215,196,181,202,190,209,184,175,195,202,202,202,213,195,196,206,207,225,192,187,199,199,202,212,180,197,202,214,206,197,214,207,215,182,226,195,170,192,204,212,200,205,221,198,201,201,192,190,219,182,199,187,211,205,251,209,186,219,235,214,198,192,214,205,210,207,209,202,205,216,199,228,201,189,192,184,184,163,198,212,203,191,193,198,220,235,209,212,194,202,211,203,190,208,192,207,223,226,218,209,197,230,211,206,218,214,200,206,203,210,192,195,213,217,206,210,197,194,196,218,185,238,207,202,207,205,198,215,200,232,199,220,197,208,175,211,193,225,205,213,206,212,185,205,177,206,186,202,219,231,188,221,195,195,207,209,202,195,209,204,188,214,203,211,192,206,205,211,200,193,229,189,227,197,175,194,205,231,233,196,200,202,207,207,196,166,211,235,205,210,195,165,214,190,210,217,191,209,181,200,190,196,203,207,208,206,206,212,213,196,203,199,192,163,202,220,182,149,169,144,116,102,131,139,151,183,182,235,201,99,31,7,27,140,198,216,208,194,225,199,214,194,227,233,175,185,194,170,174,181,182,138,141,192,126,108,88,84,88,72,66,56,160,212,184,232,198,155,197,186,220,186,216,115,136,223,228,164,84,61,60,100,60,24,14,8,12,47,30,28,15,41,16,32,25,23,25,21,21,20,36,10,11,23,13,32,34,36,43,40,54,71,55,83,64,78,36,47,68,116,141,149,135,140,129,126,112,119,101,81,118,121,130,115,119,77,56,88,64,86,98,73,41,32,42,39,18,14,13,21,87,89,150,143,55,52,5,5,43,15,5,40,100,85,92,135,48,52,34,58,78,111,108,97,29,47,48,65,44,72,71,136,126,156,145,191,178,208,219,193,187,204,178,168,160,171,157,128,137,133,116,128,142,120,95,51,76,117,91,35,57,68,66,67,80,64,73,72,67,48,52,97,68,90,37,52,55,67,58,44,68,24,38,70,66,63,60,34,56,38,53,35,31,48,51,7,29,35,33,40,57,66,21,44,64,51,42,53,68,54,38,46,50,78,43,52,69,65,62,56,52,29,40,63,73,82,60,49,64,79,106,99,94,148,195,171,166,143,148,145,149,135,132,74,77,75,93,67,57,66,48,39,98,161,143,109,93,85,84,54,86,70,28,26,10,39,52,21,45,35,29,38,31,32,42,47,38,35,34,11,32,31,67,107,74,35,46,58,58,49,59,65,114,94,110,106,27,30,14,20,77,159,130,82,68,59,27,112,60,57,64,45,72,87,83,54,79,69,23,57,66,173,223,207,227,211,194,220,226,222,191,207,202,205,220,221,218,208,230,182,235,198,197,209,217,213,234,200,209,227,181,231,214,203,223,226,130,1,12,15,1,8,34,15,36,12,22,12,32,1,2,16,4,47,7,24,20,2,16,32,5,204,215,186,196,171,202,193,178,200,194,199,195,197,187,207,210,201,205,220,209,202,193,198,194,202,202,189,176,207,204,205,206,193,205,208,202,191,189,213,210,213,169,207,194,194,207,177,186,217,201,231,216,192,190,196,201,188,196,222,194,183,227,198,193,176,215,214,203,177,196,210,172,204,210,184,190,194,191,188,178,207,224,194,184,194,202,198,205,188,201,216,198,180,205,193,209,204,211,223,207,208,210,206,190,200,193,222,187,190,213,232,180,184,199,179,196,206,195,203,195,227,215,189,202,195,192,201,198,230,201,203,201,206,203,204,192,205,204,216,219,207,202,207,210,185,219,204,226,217,202,199,196,213,174,211,200,200,198,193,213,201,222,210,191,221,181,181,221,197,202,218,189,219,212,195,225,209,221,190,207,207,198,195,202,192,189,205,206,228,198,197,213,210,181,220,213,234,183,190,220,203,199,202,203,179,190,227,190,225,203,214,196,215,209,204,205,217,191,205,230,205,219,194,206,211,195,204,206,194,206,209,219,189,207,204,194,202,213,200,211,208,204,199,208,218,198,205,204,186,223,223,199,188,206,211,214,220,215,198,195,223,175,209,176,213,190,202,213,201,223,220,187,200,200,203,198,203,218,202,208,198,204,212,204,219,226,207,200,202,205,188,210,180,191,248,185,200,203,210,196,204,189,154,223,185,125,115,121,171,117,87,173,196,180,205,241,246,187,107,52,57,50,143,246,247,204,203,220,250,247,174,121,153,111,128,148,181,189,177,193,69,83,162,145,119,102,98,81,42,43,82,137,181,191,224,183,157,231,190,199,201,212,131,197,215,213,226,88,68,51,57,40,6,9,13,7,26,10,31,31,29,17,14,34,22,27,28,23,22,11,20,11,13,33,50,42,27,43,66,85,76,79,117,145,138,121,176,127,169,159,136,142,130,115,88,91,85,89,65,62,95,126,119,100,38,42,50,17,49,70,60,32,41,18,25,17,52,46,54,68,87,78,11,23,3,30,33,12,48,15,61,132,91,90,108,80,95,117,162,160,199,191,188,143,180,194,218,215,199,192,215,171,184,167,188,184,178,162,154,100,86,97,72,57,92,76,62,63,58,27,61,70,63,87,59,81,101,56,87,90,86,75,56,73,66,48,29,57,41,47,69,45,62,35,39,24,8,30,52,30,17,13,14,11,31,23,28,3,40,16,11,18,9,9,16,14,46,24,20,35,27,23,11,19,43,9,25,9,23,37,22,44,37,24,25,48,36,67,68,79,59,72,68,55,90,80,72,66,41,76,63,57,61,63,85,60,83,114,119,147,170,138,141,152,162,160,139,127,128,109,132,165,188,199,177,145,151,161,153,147,95,24,40,35,27,13,40,20,51,47,51,13,51,34,19,20,52,38,36,34,12,71,102,46,48,31,44,44,59,41,36,53,82,124,94,49,36,24,24,59,107,115,99,77,35,19,26,41,58,54,62,60,77,68,59,66,54,43,52,47,106,203,236,231,224,227,227,215,242,199,209,208,216,222,216,205,233,209,224,203,212,220,206,214,233,194,206,217,211,218,242,211,230,210,240,116,12,2,5,21,3,33,5,3,22,18,23,25,4,9,4,8,12,0,4,12,7,8,10,11,199,187,203,202,193,172,185,191,181,184,236,194,202,188,235,186,183,221,200,185,186,186,195,183,202,218,199,184,178,209,189,213,217,191,195,204,192,191,208,195,214,190,185,198,206,195,191,215,192,188,192,186,182,201,221,198,193,205,224,194,189,197,205,214,185,213,209,205,201,187,206,211,204,189,216,199,199,198,223,178,201,166,212,190,218,210,204,193,191,178,214,190,208,184,198,224,234,211,200,220,203,198,188,197,221,206,209,197,180,196,199,224,195,183,229,220,221,209,219,189,211,210,206,193,193,192,222,210,209,188,197,202,214,206,205,209,190,192,225,211,215,199,202,197,225,186,186,189,188,203,219,225,215,210,214,220,195,179,203,182,197,202,212,201,214,210,220,208,209,212,208,231,208,184,191,216,227,193,204,208,216,192,203,210,206,176,215,195,197,198,203,213,185,206,177,195,193,202,234,210,224,204,206,210,192,196,184,246,204,184,178,218,206,228,200,179,183,207,196,197,193,212,178,212,207,220,196,202,203,195,195,192,206,171,215,196,222,218,198,209,182,178,211,223,218,201,213,201,208,214,215,208,198,202,217,211,198,200,215,198,207,192,179,207,221,243,208,199,200,195,196,222,225,187,189,214,217,176,206,190,198,208,216,196,231,205,205,194,215,190,213,207,168,219,207,215,200,205,218,211,211,154,157,211,207,104,94,153,168,97,109,170,175,166,179,194,173,138,90,74,109,72,81,192,201,141,147,199,222,213,109,65,61,51,105,199,239,207,173,141,111,103,213,147,115,117,73,71,49,84,49,93,177,225,244,172,178,215,185,185,230,188,135,201,215,216,210,114,66,27,23,4,10,32,27,6,14,6,32,30,20,36,34,24,21,42,38,22,6,5,20,3,35,44,53,30,42,79,96,67,80,148,176,172,164,169,145,121,129,129,104,107,67,71,58,88,80,58,63,56,79,79,83,79,34,39,66,35,81,61,73,45,21,13,47,25,53,71,77,57,5,63,17,16,7,18,8,5,15,9,56,93,105,114,115,150,186,163,209,180,183,157,183,152,182,157,180,141,108,121,111,125,68,49,86,50,44,76,47,63,59,60,40,59,56,65,54,67,67,71,83,93,76,84,41,67,63,47,47,62,53,33,24,27,32,30,29,3,18,17,14,8,32,26,23,15,9,5,5,9,8,42,43,20,36,15,14,32,24,16,16,8,29,39,35,24,7,23,30,24,15,13,18,20,10,6,3,8,21,28,26,17,34,34,11,22,37,20,33,20,37,27,49,22,50,46,49,50,35,49,53,64,60,88,82,77,88,71,81,28,60,75,70,96,125,109,126,120,135,156,177,144,140,128,124,144,167,144,165,152,131,66,56,53,47,69,70,38,56,48,16,40,23,59,29,16,51,24,18,34,38,108,85,86,94,55,62,45,68,40,55,108,99,92,109,50,23,21,11,20,68,105,118,83,48,54,44,23,58,35,73,36,50,62,61,70,72,59,61,55,100,232,246,233,252,232,243,217,229,222,230,200,203,239,212,203,214,229,207,224,208,199,206,243,204,208,199,205,215,187,218,208,235,215,215,120,14,15,9,7,26,9,15,9,19,8,26,29,4,22,25,21,8,13,0,9,10,12,27,24,192,186,212,215,208,200,184,206,190,182,203,180,182,160,221,196,208,204,215,205,201,222,196,181,203,203,197,201,176,183,186,197,187,203,190,205,179,219,223,182,178,196,189,185,198,212,188,196,197,168,176,200,200,206,203,202,203,202,199,199,189,204,208,200,212,189,219,230,188,191,193,183,202,193,234,200,179,205,185,211,221,212,211,202,189,195,202,192,232,193,211,181,178,234,202,186,212,210,200,206,196,181,193,218,201,193,202,187,209,194,197,218,200,220,196,187,176,197,192,200,192,215,194,209,193,181,213,219,189,213,208,190,167,221,185,180,217,207,189,209,216,200,175,231,233,207,203,205,197,212,209,202,197,215,202,203,222,200,189,218,211,181,198,189,193,204,209,204,212,210,229,188,197,203,189,220,217,196,209,202,188,184,196,203,189,187,197,196,196,215,199,217,204,197,194,205,228,203,196,204,200,204,232,197,216,204,200,187,198,213,206,219,190,191,197,193,202,197,205,207,200,201,211,199,192,197,213,208,198,204,212,193,194,211,214,213,182,198,193,212,213,175,176,184,205,234,193,185,210,200,209,197,234,190,197,189,199,193,190,206,213,188,205,210,211,203,234,226,219,233,208,224,191,208,172,202,203,232,217,214,174,200,209,194,203,211,228,216,218,238,199,205,189,189,204,237,221,211,203,214,194,190,178,191,161,98,91,130,126,103,113,134,159,128,121,144,109,86,72,90,135,83,51,63,93,25,51,40,75,83,71,49,28,43,87,189,213,177,151,141,119,157,248,127,76,89,105,97,91,66,36,138,215,226,227,160,163,196,183,206,232,157,140,232,215,222,225,114,87,3,26,0,13,22,17,46,61,30,10,12,16,28,21,25,12,20,11,42,33,21,29,34,27,40,55,31,41,70,58,37,87,165,183,171,163,150,74,88,132,78,69,36,50,57,46,78,81,69,70,68,57,57,73,76,69,75,30,35,136,124,64,25,11,42,41,36,31,50,40,27,12,13,19,28,15,21,16,18,18,5,97,105,84,102,129,129,132,73,76,92,76,55,52,62,60,32,40,54,59,44,74,60,53,60,56,57,35,55,57,49,45,50,35,46,33,6,11,24,58,37,41,30,42,48,24,30,33,26,28,4,29,23,6,17,23,42,40,26,21,34,8,21,19,32,50,16,35,1,31,3,24,39,25,16,16,31,1,61,21,38,43,48,21,31,27,2,15,35,29,27,17,44,49,30,41,18,16,15,16,20,47,27,41,8,3,28,21,20,19,28,19,9,14,36,6,25,12,4,45,30,45,39,39,59,35,50,54,73,80,82,49,62,68,71,68,83,66,71,65,82,73,76,63,88,55,86,91,91,93,115,132,140,124,144,154,145,137,168,137,128,120,107,84,79,79,75,70,48,86,38,29,96,148,133,143,123,100,99,76,90,119,120,112,120,61,61,59,21,27,24,18,76,97,115,90,74,70,33,51,22,52,65,63,58,85,53,79,71,99,87,141,198,224,217,237,255,252,246,241,203,228,231,209,215,214,221,214,217,217,225,211,235,209,215,194,221,214,219,217,229,223,196,208,210,231,121,4,1,9,6,22,24,29,3,35,5,10,22,10,1,30,7,30,17,16,20,15,12,41,4,194,196,193,213,206,174,185,187,194,179,186,227,207,211,192,211,219,213,201,174,186,203,216,211,196,203,201,218,192,211,184,208,202,206,206,229,186,202,209,194,154,189,188,211,203,192,193,211,207,209,181,187,188,196,189,196,189,198,203,193,178,198,202,172,178,213,194,197,197,196,189,190,217,191,214,188,221,207,199,225,208,209,189,226,204,224,188,220,218,198,177,193,219,192,188,228,193,181,211,196,179,184,183,196,194,212,226,206,193,196,217,208,216,209,221,195,208,198,212,213,196,194,215,205,216,204,216,200,198,205,189,213,182,213,209,198,202,201,239,221,188,210,197,223,205,211,208,223,205,198,200,224,218,199,195,192,192,205,208,218,195,200,203,206,215,200,206,192,180,203,217,210,211,225,223,197,185,192,202,188,209,194,209,200,214,205,213,220,203,193,215,224,199,217,221,202,219,210,228,230,212,195,191,228,225,224,183,202,219,232,208,219,209,203,217,203,212,172,221,210,211,215,205,194,226,236,225,187,217,200,191,188,175,200,174,195,202,200,224,214,218,201,211,240,201,212,234,195,217,217,197,188,193,204,225,214,209,210,192,187,196,197,219,203,194,200,206,218,186,212,213,203,211,201,204,227,200,201,195,196,210,217,189,234,212,207,213,193,217,218,209,195,192,178,210,197,204,213,203,184,198,174,178,138,70,55,128,89,65,108,120,133,117,97,125,114,61,82,73,90,132,97,51,114,104,97,39,5,0,60,69,50,32,35,59,73,115,177,193,178,147,192,236,127,105,116,97,87,82,71,39,139,236,228,227,184,168,195,212,197,204,114,164,224,208,236,182,95,31,15,57,68,46,40,28,73,30,61,96,47,5,46,21,30,24,42,33,14,32,17,35,33,33,39,31,38,81,107,152,144,110,120,131,142,142,123,67,79,64,61,40,57,36,35,25,37,29,54,29,54,39,45,54,88,70,57,81,159,210,189,127,58,18,39,43,30,12,2,29,12,11,8,21,22,29,29,10,18,6,17,125,118,106,149,90,44,64,66,25,38,37,70,99,38,40,45,50,51,57,70,59,62,37,41,58,47,29,16,37,41,15,26,19,9,35,10,30,35,35,24,32,22,17,29,23,28,37,30,18,18,31,21,31,34,21,2,46,31,31,33,16,29,54,46,44,40,26,27,11,20,18,41,30,7,13,15,14,34,27,26,15,42,42,33,5,5,17,31,26,33,70,71,112,100,86,35,7,46,19,19,19,25,46,15,16,23,19,23,39,30,26,6,22,28,14,23,25,25,24,25,8,14,36,43,32,22,28,43,48,27,40,50,24,43,46,45,68,60,58,66,64,91,41,66,79,57,42,72,71,75,74,68,106,115,155,149,141,149,170,159,165,165,174,178,182,194,126,181,145,151,126,177,187,177,168,176,166,172,160,152,148,164,103,145,133,65,40,51,15,14,7,41,64,74,112,135,95,64,42,27,61,53,52,56,84,68,69,108,85,117,100,97,84,131,161,230,239,243,247,253,240,240,225,249,241,225,213,207,194,185,219,195,175,195,222,201,209,208,217,199,218,207,203,214,195,105,2,28,6,1,10,8,33,0,33,20,11,8,1,13,24,4,0,23,23,12,32,6,19,40,184,195,207,222,194,185,209,199,229,177,198,193,186,204,189,197,191,211,216,199,215,202,228,184,190,210,202,228,229,196,200,204,188,184,210,188,202,202,205,211,187,211,216,205,206,190,209,221,223,195,203,200,228,212,204,209,196,214,234,194,197,204,207,191,222,193,189,200,196,194,188,213,193,214,185,197,208,184,208,214,214,202,188,216,195,206,210,190,231,203,205,212,234,206,174,204,217,213,198,191,215,187,182,196,171,199,201,219,192,201,186,207,207,178,190,202,204,194,195,205,227,188,218,199,213,209,206,164,195,228,201,176,221,188,210,186,181,215,200,203,205,204,202,206,222,209,200,214,225,193,230,211,214,194,211,212,213,202,226,210,196,196,203,214,213,217,216,201,188,203,190,200,205,211,185,202,199,208,185,212,189,197,202,196,200,204,208,203,219,198,217,193,172,211,203,210,214,207,192,220,218,215,196,210,199,202,194,208,211,195,208,198,201,215,213,211,201,208,174,180,217,202,195,197,193,179,225,205,219,181,196,223,201,205,206,203,207,213,164,215,177,189,203,207,229,191,208,206,224,201,206,210,191,225,227,194,202,199,180,217,201,203,204,198,183,218,207,193,204,210,197,184,197,201,206,159,207,203,177,208,186,177,188,202,202,216,222,170,207,190,214,198,202,207,213,214,185,206,197,209,195,157,197,80,17,45,75,87,90,116,156,141,123,114,132,123,70,131,94,98,121,95,103,188,192,158,132,48,32,65,111,105,68,71,50,62,95,161,207,195,140,226,192,109,135,139,114,124,67,84,44,159,225,205,198,141,181,232,182,213,196,96,167,209,226,200,145,62,49,35,118,200,95,70,48,22,70,211,211,37,1,34,17,5,10,3,35,7,45,35,18,33,36,28,42,90,80,125,163,176,126,118,77,100,92,126,84,18,41,48,33,59,71,46,54,41,46,24,53,17,48,20,33,18,41,107,167,189,203,164,89,25,19,1,2,22,0,16,31,9,15,44,52,21,46,14,3,23,10,61,125,100,106,136,66,58,44,52,26,36,32,34,54,60,17,46,55,28,34,7,28,23,42,19,21,26,15,6,25,31,13,19,25,8,15,13,17,25,13,18,4,27,13,30,29,27,29,15,6,34,20,28,30,22,49,40,17,20,27,26,40,51,67,48,56,43,21,19,31,45,8,15,40,22,4,19,24,19,26,23,25,12,45,47,19,13,3,11,50,49,112,139,184,133,95,36,22,15,26,20,13,14,27,29,19,22,1,15,12,11,43,46,40,22,17,16,16,12,32,25,5,23,30,22,22,7,17,31,15,36,31,13,19,25,35,22,41,38,49,24,58,29,58,68,70,67,63,81,60,72,65,57,93,88,65,84,92,101,72,96,102,116,137,124,122,176,158,160,151,147,141,146,147,158,143,135,135,154,145,181,154,124,113,129,131,70,29,16,32,3,30,21,41,43,117,145,116,96,69,79,50,37,33,27,47,71,73,59,74,78,49,60,30,20,72,113,151,157,221,237,243,241,242,249,255,241,216,216,214,201,222,194,201,190,223,197,210,195,220,194,202,207,214,228,219,88,6,10,20,24,20,13,20,2,9,1,1,4,9,0,9,16,10,16,23,1,24,16,0,27,187,209,193,187,210,189,173,189,201,198,215,188,210,194,205,230,193,185,222,202,187,199,211,212,237,196,214,210,181,193,188,201,198,202,194,214,177,184,210,228,166,189,212,195,221,196,203,191,210,198,226,206,194,212,214,197,209,179,198,199,180,192,189,214,195,196,199,197,189,208,166,180,207,199,216,214,193,198,206,221,214,203,201,198,196,201,210,182,196,186,212,183,221,215,189,218,205,180,199,186,188,216,218,205,201,218,217,188,223,198,214,221,203,222,203,186,187,205,183,197,208,199,190,219,196,219,190,195,190,208,192,205,202,207,192,222,200,233,188,182,215,219,218,204,213,204,233,208,207,208,195,188,202,211,177,192,218,206,212,227,188,214,197,196,202,218,203,200,197,211,182,213,216,199,212,188,197,180,191,182,195,217,179,219,204,217,203,189,200,214,214,209,199,189,231,183,200,180,193,209,221,186,224,200,206,202,202,210,217,207,182,208,199,219,202,202,202,217,207,197,204,199,184,176,196,199,198,195,198,224,215,170,193,211,181,223,200,212,227,198,222,222,201,170,199,208,187,181,195,190,185,195,219,222,205,193,213,209,201,219,184,191,181,208,212,219,197,200,183,217,177,187,197,210,194,184,208,220,198,212,202,199,181,205,208,221,192,196,189,217,209,211,221,190,196,207,205,202,187,209,210,178,221,117,30,32,76,124,102,126,137,118,89,111,102,60,76,42,59,77,107,112,60,93,130,96,76,58,69,15,38,73,104,73,90,92,114,155,213,174,103,215,201,114,103,66,99,112,58,60,42,170,235,208,211,159,197,218,197,253,153,95,208,183,231,207,113,52,45,70,206,244,89,43,61,64,164,249,248,76,9,15,12,35,7,34,22,31,18,11,71,38,18,48,87,54,101,62,83,106,138,161,88,90,70,84,47,39,12,43,65,65,78,42,58,57,46,42,50,33,40,29,29,64,51,135,154,132,78,13,32,8,27,10,14,2,18,28,26,54,53,59,51,61,38,44,32,16,18,79,148,100,158,126,70,25,45,37,12,17,18,46,12,55,24,10,19,24,21,19,30,20,15,1,28,16,27,29,12,17,18,17,25,22,21,17,13,6,11,12,9,41,6,7,25,38,29,10,19,4,14,25,34,38,30,9,17,29,32,90,95,116,106,83,89,41,41,22,16,48,28,42,39,43,26,33,25,8,21,38,12,22,10,26,51,28,42,19,44,155,149,127,96,94,77,37,24,16,22,34,11,36,18,9,33,8,2,39,42,9,16,49,14,33,27,11,11,16,19,22,15,21,50,40,6,22,37,19,3,18,27,17,23,33,19,41,25,10,28,18,6,36,26,31,40,36,48,57,42,66,48,66,35,52,94,102,71,110,92,67,62,71,57,58,80,51,87,65,75,77,70,72,67,77,69,123,93,89,93,84,106,84,76,135,137,83,25,9,12,2,20,7,1,29,27,85,96,98,113,88,61,54,39,55,38,56,65,42,68,42,74,74,68,48,28,39,45,69,84,134,160,178,178,168,217,215,252,245,232,233,219,220,230,174,239,224,229,204,204,198,221,226,209,219,223,109,0,5,21,33,21,17,26,11,38,16,5,11,7,9,3,30,3,2,8,4,28,2,7,37,225,218,180,195,202,198,217,183,186,201,205,181,158,206,195,190,204,185,204,213,207,204,198,191,186,200,194,183,190,182,199,184,200,189,199,211,194,204,204,230,178,216,228,190,222,200,197,190,222,188,199,226,210,216,209,197,213,175,205,206,198,221,195,187,199,199,202,207,202,197,210,194,193,200,181,198,221,197,213,197,208,185,163,210,200,200,193,182,192,219,200,205,205,200,191,208,184,195,205,185,209,202,211,197,209,215,209,187,209,203,214,220,210,191,207,185,217,219,181,220,211,200,203,208,208,203,189,196,205,209,217,205,200,199,204,183,245,207,187,209,204,196,209,207,169,192,206,220,214,199,211,173,217,194,209,210,204,203,210,203,205,213,207,202,181,203,210,214,184,189,203,204,207,224,201,200,206,188,213,219,193,225,192,183,208,205,183,210,208,186,226,191,209,172,209,233,224,216,204,185,201,211,197,206,193,195,217,196,213,198,208,200,218,203,220,191,187,224,202,197,188,216,206,197,229,195,198,207,185,183,207,189,201,206,208,198,205,200,190,222,190,197,170,203,203,193,186,184,206,200,210,194,200,212,211,211,199,224,203,198,202,222,204,206,196,192,202,204,210,202,201,201,215,209,208,205,185,178,215,205,196,224,193,204,196,204,215,197,220,214,212,233,176,203,203,181,194,192,215,217,168,202,253,132,82,72,110,137,121,86,108,110,68,31,6,11,43,62,45,32,131,120,7,62,75,133,111,48,39,2,48,73,57,76,28,44,111,114,198,92,123,220,187,122,90,67,84,85,59,77,30,125,237,209,199,148,190,192,189,227,107,93,207,221,238,149,76,49,23,40,123,140,37,43,45,131,230,251,247,92,62,29,17,27,2,47,22,47,22,19,32,63,57,62,79,107,111,55,49,58,114,142,77,68,78,80,100,62,41,46,43,50,68,39,51,43,30,50,43,27,50,33,59,143,146,162,136,66,34,12,19,7,5,32,28,11,58,48,64,36,73,38,57,43,12,14,26,32,32,105,128,99,137,121,43,15,28,16,14,28,40,16,28,26,16,25,21,16,32,49,18,33,17,20,14,10,38,19,14,30,19,28,39,19,30,13,14,24,9,48,48,8,46,14,26,0,1,23,30,4,18,13,33,49,50,36,20,21,31,135,158,181,130,107,116,79,52,18,31,17,30,42,20,5,27,48,30,32,8,8,15,21,42,17,43,33,36,17,81,157,155,158,118,103,95,70,68,39,19,22,11,20,15,11,23,19,1,0,20,5,2,11,28,14,56,35,18,11,47,61,40,52,77,58,33,42,37,20,6,9,25,44,40,35,28,47,27,40,35,12,26,14,36,25,29,47,5,18,28,45,22,34,3,48,49,34,48,76,68,52,33,60,70,70,75,42,86,57,57,75,51,94,67,78,63,69,63,84,60,73,71,43,92,96,113,93,39,31,28,8,31,15,42,24,33,35,62,60,120,130,82,53,76,53,61,23,59,51,40,52,15,40,50,54,48,70,74,46,47,50,67,85,84,117,138,128,220,228,244,232,224,234,219,205,217,224,219,208,216,220,230,227,231,208,245,106,15,4,26,25,33,16,21,21,2,5,12,5,12,0,8,13,11,16,14,2,33,17,26,12,219,158,192,210,190,208,219,188,173,203,204,202,188,201,206,212,211,167,176,204,218,210,175,188,231,209,177,207,195,207,201,207,202,195,196,201,226,197,200,201,223,202,205,203,224,194,211,214,205,220,225,219,197,215,187,207,192,178,226,208,211,211,191,226,207,211,194,198,202,197,183,197,220,200,187,189,179,197,213,197,199,201,194,199,200,200,197,189,206,212,209,208,212,206,213,207,198,219,187,216,196,187,205,170,214,211,203,195,189,202,215,195,194,201,214,200,206,186,200,214,199,205,177,203,195,203,180,196,192,196,215,187,212,192,191,223,223,198,212,197,209,156,198,192,202,210,180,207,195,208,207,222,210,207,209,212,202,209,190,219,202,199,230,197,223,205,192,213,178,203,189,204,203,175,221,196,190,212,198,205,192,199,204,199,212,196,210,203,193,201,211,185,192,197,219,199,203,212,180,196,220,230,191,218,189,198,190,206,210,207,184,203,207,166,192,208,203,215,205,214,191,192,192,192,199,210,213,188,203,209,204,180,207,185,193,199,200,205,193,204,197,192,205,205,209,208,213,213,205,195,198,191,188,198,199,182,194,210,187,191,181,216,202,200,206,207,195,219,169,180,199,202,195,203,196,216,192,200,182,197,168,185,210,192,184,189,207,217,191,185,192,219,209,206,202,171,201,210,209,188,156,194,235,182,117,94,56,92,60,85,78,104,41,36,40,38,24,18,43,61,106,88,59,81,136,148,114,52,29,16,23,59,36,14,37,46,105,168,192,119,131,162,161,115,96,62,89,68,40,52,10,146,242,237,186,124,214,238,214,206,66,107,216,211,167,65,47,41,16,40,42,48,35,47,142,209,247,218,163,189,224,192,116,23,3,29,28,21,34,57,42,60,79,94,79,78,79,92,84,103,76,137,107,56,32,46,55,56,72,41,41,46,41,40,27,53,70,41,37,43,35,114,158,185,122,84,81,26,18,13,13,5,39,35,38,76,75,65,30,47,75,63,56,22,15,6,5,15,43,137,131,104,130,118,57,40,15,20,14,20,0,16,34,5,4,18,14,7,21,30,7,25,9,56,48,6,31,10,28,3,40,46,14,23,12,27,16,17,43,10,9,10,25,14,31,3,3,14,25,8,37,28,18,33,82,43,25,15,37,153,174,140,120,106,111,59,47,30,41,31,16,9,11,41,38,38,37,13,36,5,32,46,53,41,24,34,33,18,69,112,120,120,106,41,81,52,15,34,16,6,36,24,6,11,36,16,32,21,12,29,5,10,26,40,67,24,12,30,46,107,96,116,108,96,95,38,44,9,40,43,49,43,27,7,35,41,22,27,29,20,23,11,26,30,10,19,18,17,16,28,20,38,21,23,28,12,19,30,29,26,14,54,3,28,28,35,46,48,63,46,59,49,56,70,44,75,80,68,73,73,60,87,114,107,97,102,54,11,7,18,40,16,3,10,23,7,42,39,47,83,91,62,55,56,54,43,22,43,54,52,44,33,39,61,63,72,98,82,85,70,56,62,65,52,62,69,96,139,234,226,246,227,223,234,212,225,226,218,211,222,226,229,241,238,228,123,12,7,5,21,4,39,24,34,18,6,12,11,8,0,3,0,20,4,8,30,14,7,5,22,202,189,192,197,204,197,203,193,201,194,197,187,219,190,218,197,202,204,198,202,234,231,204,203,218,201,207,197,170,212,212,194,207,198,202,199,220,215,203,216,180,175,209,201,212,226,198,212,202,191,201,187,207,197,211,155,194,181,242,217,214,212,228,218,207,200,213,205,187,197,210,213,197,214,190,224,190,220,199,205,212,228,198,218,220,208,217,194,228,217,189,225,196,182,216,208,215,184,223,225,184,212,200,185,191,215,213,209,218,191,175,175,199,197,207,186,166,200,198,221,201,192,196,183,213,186,222,201,213,206,203,207,188,187,196,184,200,210,203,204,206,191,215,199,203,188,189,183,172,207,208,195,200,185,203,193,205,183,221,186,205,207,211,221,204,196,216,213,201,194,206,202,185,208,216,209,218,183,209,186,205,191,220,198,196,191,177,183,193,195,203,207,196,199,202,196,193,168,185,205,176,213,207,205,180,203,213,213,197,182,195,217,215,211,195,189,212,187,192,206,210,183,186,208,206,204,208,191,217,196,200,202,193,199,211,208,201,216,195,190,183,206,195,205,199,219,196,182,193,216,209,215,188,166,212,194,203,207,196,187,180,211,188,221,194,225,199,205,197,212,218,230,183,201,205,185,164,198,182,202,184,185,199,219,206,188,189,187,175,217,209,201,191,190,204,189,213,184,203,202,174,173,218,193,157,103,65,86,53,95,61,48,17,15,23,19,29,15,52,50,94,89,44,90,163,194,128,75,59,10,92,53,18,37,93,152,161,172,167,127,125,139,160,164,94,95,55,76,78,66,30,150,233,189,211,147,232,188,219,205,78,159,246,162,88,21,5,23,37,21,22,31,18,148,234,245,222,126,176,248,240,248,222,61,0,16,39,58,62,31,40,68,95,76,79,84,104,89,115,94,76,124,105,35,41,37,44,47,68,41,27,39,45,58,44,46,48,25,27,49,114,174,188,128,39,54,33,7,15,4,9,23,37,68,47,38,29,52,57,43,81,54,66,12,46,15,27,21,56,175,105,92,154,82,29,16,8,4,39,8,51,13,49,14,19,21,15,32,11,22,9,7,17,6,4,19,28,29,23,41,44,58,62,69,55,30,37,13,22,24,26,43,27,19,16,28,19,16,30,10,13,13,20,45,32,62,10,0,39,145,110,129,87,117,67,57,58,32,20,27,41,21,36,34,22,45,47,40,18,25,9,37,54,16,27,70,41,12,70,104,73,128,92,59,69,15,43,2,29,31,14,13,15,33,26,32,10,8,17,15,2,16,31,28,41,38,32,12,123,168,148,140,108,109,87,58,44,34,37,23,33,30,49,10,18,11,29,42,22,31,33,27,37,28,28,22,17,29,34,19,41,22,20,25,4,18,13,25,5,33,21,23,41,26,33,26,8,30,12,26,25,39,19,36,23,59,21,36,41,52,51,65,120,119,115,80,12,14,17,42,54,33,54,29,22,23,32,23,16,12,51,62,52,71,27,50,19,59,26,7,14,41,44,25,63,27,58,88,55,87,72,71,54,43,59,35,87,60,133,180,224,240,219,209,185,202,246,208,227,217,213,209,229,221,221,126,15,9,14,23,11,2,13,38,6,19,5,8,1,13,10,0,8,21,13,5,32,8,6,22,209,183,189,219,154,199,190,213,205,186,188,195,207,215,197,194,209,215,202,216,196,182,195,219,208,215,200,206,210,210,197,159,184,170,193,185,214,198,196,200,185,201,234,203,207,211,203,204,218,215,209,183,221,195,217,219,223,200,220,205,203,208,197,215,220,209,190,234,211,204,200,185,172,236,199,213,215,193,196,219,214,217,226,207,211,171,201,203,192,207,220,235,193,209,204,210,200,208,181,205,187,185,196,204,212,192,216,199,199,201,208,186,203,174,186,203,195,217,194,216,198,215,212,204,190,191,193,194,196,193,183,188,212,197,216,201,184,209,204,202,220,204,191,211,185,191,203,194,227,184,197,215,203,197,216,190,195,217,182,179,204,216,195,168,178,194,213,197,192,213,193,206,230,214,206,199,197,200,196,195,186,194,197,203,197,205,185,190,196,204,164,198,215,198,207,186,189,190,230,191,212,194,187,194,235,205,230,205,204,201,186,184,208,189,204,210,182,215,217,198,213,207,205,189,206,175,189,210,203,212,192,202,205,180,185,205,200,187,223,197,194,198,195,204,218,225,219,200,207,198,205,194,197,192,196,194,213,220,209,209,199,211,190,184,209,204,215,196,202,186,191,202,213,191,209,187,191,209,194,185,199,196,188,208,209,189,220,196,199,207,225,165,188,205,190,177,209,212,202,210,184,223,238,210,182,135,100,103,88,65,68,48,2,26,52,74,30,17,64,43,70,74,0,80,127,135,96,79,38,16,66,38,73,55,173,242,211,198,163,112,157,170,227,182,76,133,90,90,85,51,4,173,232,208,182,150,205,187,234,153,61,218,184,73,9,9,28,69,21,9,46,62,103,193,237,224,119,136,214,250,237,245,146,59,69,129,115,111,49,38,53,74,83,74,113,96,106,102,98,111,88,100,110,78,72,60,81,61,62,53,58,32,21,49,43,39,27,35,51,104,146,155,81,27,18,15,16,8,13,57,42,70,63,44,55,80,60,73,43,66,53,45,34,11,12,17,23,18,48,138,107,94,152,85,19,8,26,29,20,49,24,31,41,37,31,35,10,18,30,25,16,33,36,19,26,37,26,2,22,14,80,57,88,79,101,81,52,31,35,30,27,35,28,27,13,11,24,29,38,28,9,20,24,33,53,51,13,15,64,109,92,86,86,96,86,36,43,36,49,41,46,30,16,28,14,28,22,34,19,32,22,22,19,25,31,63,45,14,41,92,92,103,125,87,74,46,28,17,19,7,7,9,23,29,39,24,15,21,25,25,21,29,38,38,34,63,47,77,151,173,158,136,83,121,94,34,53,28,35,16,27,56,8,22,16,17,41,57,31,23,15,20,17,37,26,15,26,54,21,40,23,31,31,28,44,5,28,27,14,31,14,30,27,31,52,16,8,45,25,20,31,16,49,21,20,53,35,47,14,21,15,36,92,105,97,115,73,42,25,5,27,28,74,66,32,22,27,21,16,12,37,30,14,20,41,51,53,34,31,28,70,13,20,45,48,47,81,64,79,59,82,45,54,39,92,64,74,35,48,51,101,184,197,223,224,219,201,245,235,217,212,215,212,224,228,98,10,5,6,26,6,10,10,23,19,13,23,30,7,0,9,4,26,5,47,31,29,10,22,28,191,200,186,195,191,185,167,195,214,216,171,199,211,214,204,176,212,196,193,202,198,174,205,202,179,227,217,209,190,206,196,208,216,204,211,188,194,200,207,202,198,200,196,205,187,206,173,200,202,197,199,216,211,197,192,214,206,227,227,187,203,218,204,214,210,194,215,201,223,209,199,202,222,204,202,189,188,224,210,196,225,205,214,205,206,218,198,193,215,214,203,199,227,200,193,185,189,206,203,228,225,199,191,199,218,194,207,210,213,183,202,192,192,204,193,184,208,204,194,205,187,184,193,234,181,199,199,207,225,200,211,217,214,181,200,163,218,198,193,180,218,202,201,207,175,212,190,208,199,209,193,197,197,207,198,185,204,201,214,210,207,203,210,207,200,234,202,207,188,188,179,201,172,171,208,197,193,192,180,187,202,184,191,193,205,199,199,204,192,177,203,173,194,168,200,201,209,197,205,214,207,216,200,181,194,204,194,196,182,193,188,212,217,201,194,206,200,195,166,212,185,194,195,206,202,201,212,192,230,199,201,195,197,200,198,195,183,185,184,183,191,185,203,210,208,183,202,220,165,206,212,195,156,209,208,208,208,174,210,198,212,200,194,217,188,178,209,202,200,177,200,202,222,213,203,174,176,203,205,197,201,197,229,221,193,196,192,206,193,187,220,203,211,195,200,200,211,189,191,201,179,199,191,173,144,117,173,131,152,126,143,95,53,83,93,126,95,66,76,51,68,59,18,63,152,138,100,136,82,40,67,124,175,171,246,190,185,132,155,146,139,196,220,137,86,137,98,98,69,42,9,164,188,214,184,149,199,184,247,137,63,187,85,17,8,85,177,112,66,40,38,125,204,233,218,156,161,208,249,223,214,155,75,125,132,113,128,102,44,88,124,67,18,58,115,105,102,101,104,119,82,100,64,73,46,46,48,67,32,36,29,22,9,11,12,18,24,104,157,153,123,89,26,9,15,6,10,19,17,45,47,67,65,52,68,61,60,75,75,70,32,22,15,33,20,13,38,2,95,147,105,126,128,59,24,28,5,38,36,16,30,23,14,27,12,16,7,48,24,24,19,30,19,22,16,13,23,55,22,62,100,79,82,112,67,54,27,34,34,26,25,4,32,22,20,17,42,8,20,10,30,61,40,48,32,41,46,10,41,74,57,86,110,158,77,15,49,27,32,14,36,29,33,42,3,10,24,19,34,38,22,26,1,27,15,60,50,10,49,59,50,91,80,86,61,44,33,28,24,20,26,50,36,11,33,12,14,22,21,30,48,19,18,12,68,52,31,99,160,136,142,124,114,73,65,41,39,28,17,14,31,4,23,31,42,32,18,9,22,36,11,15,50,41,32,5,23,67,42,48,41,28,16,12,31,9,20,24,32,29,27,33,35,18,34,32,23,13,24,29,22,39,50,3,38,49,12,5,32,21,54,56,120,121,104,115,67,33,14,41,28,36,72,65,74,71,48,37,9,24,28,36,33,22,20,30,24,41,65,64,55,41,51,31,39,38,34,79,19,73,26,50,48,49,63,55,64,3,49,47,33,177,196,236,227,233,225,238,225,231,220,232,209,217,224,126,13,20,24,5,6,1,31,0,19,40,6,17,2,8,24,13,7,2,22,19,0,5,21,3,199,184,174,206,204,198,203,206,193,223,223,192,201,186,179,211,177,212,197,203,185,179,207,179,194,197,203,205,228,180,171,182,187,206,186,212,195,166,195,215,199,179,223,189,222,209,201,211,227,195,198,208,222,202,222,186,221,183,207,209,184,198,209,215,199,210,198,208,207,195,191,198,210,230,197,207,192,215,200,220,193,187,208,203,205,190,200,215,202,207,195,195,214,196,180,206,193,216,199,190,213,212,199,191,217,193,194,193,215,203,200,192,200,204,191,195,212,223,205,201,206,201,204,216,196,199,191,196,204,178,208,208,187,193,193,202,191,210,202,213,198,207,221,197,187,209,213,192,199,199,193,200,181,210,200,196,201,198,188,202,190,211,208,197,211,204,188,183,194,187,206,212,193,201,199,180,187,211,213,211,186,200,206,195,181,198,201,186,202,173,188,187,190,204,196,200,196,191,197,192,205,189,205,196,217,200,195,192,180,183,194,203,221,214,200,203,205,171,205,165,225,196,180,195,197,199,219,199,184,203,199,187,209,197,198,186,195,192,202,192,203,198,188,221,190,191,204,203,193,192,177,211,200,205,196,222,203,189,193,178,217,223,204,198,212,186,198,217,168,190,201,194,214,197,206,184,204,186,212,167,170,195,186,200,204,199,190,198,198,195,171,190,192,186,177,187,192,189,224,198,181,218,199,157,110,156,180,197,216,186,165,205,137,107,186,186,173,93,119,36,144,92,5,114,172,138,173,243,163,67,57,185,236,234,211,187,140,136,188,155,128,177,179,100,95,121,99,86,60,23,22,136,236,214,186,132,233,223,239,127,61,138,24,20,29,125,230,155,106,39,97,214,242,246,165,179,218,245,252,175,120,47,72,146,89,119,84,61,72,134,88,47,49,83,90,72,70,63,70,100,85,83,95,75,72,77,70,48,34,35,23,25,21,34,11,18,60,135,154,159,107,25,25,7,33,15,41,38,34,49,60,58,80,72,49,63,73,63,86,89,80,71,29,47,23,31,18,42,142,167,105,103,139,63,34,37,40,64,20,16,55,9,19,21,30,30,45,19,13,15,24,6,31,23,51,39,39,36,33,131,135,142,108,65,58,54,56,20,29,0,10,19,28,33,40,23,53,24,35,12,29,37,32,30,5,59,61,57,37,66,80,83,170,108,34,41,48,7,4,37,28,19,35,31,35,6,15,31,54,28,49,4,26,19,33,92,52,40,59,59,53,42,68,52,13,47,14,19,33,1,3,51,58,45,4,24,15,42,29,32,1,20,11,31,59,81,37,91,141,116,107,93,81,96,72,23,7,12,23,48,15,31,2,5,40,34,44,29,23,22,33,17,25,36,69,28,15,38,70,50,75,68,49,66,68,36,47,44,16,29,24,29,25,17,27,24,20,39,12,30,39,33,7,20,22,7,29,30,11,27,31,84,125,112,106,118,49,28,9,16,15,49,60,68,83,59,65,65,24,41,22,31,36,28,0,18,6,38,46,53,47,79,75,46,23,27,28,66,70,49,54,64,50,50,22,40,47,44,73,49,78,151,159,224,223,221,228,226,224,236,226,228,233,210,200,111,16,0,19,18,20,29,23,15,19,10,15,12,15,5,0,18,23,26,17,8,24,21,2,5,214,214,202,218,211,205,227,210,211,210,207,194,192,194,208,186,197,213,215,195,193,214,193,195,195,201,206,224,226,208,205,181,222,195,182,197,216,187,192,219,207,194,180,208,212,216,205,204,208,221,209,206,201,219,208,202,205,227,196,213,200,213,227,214,214,201,190,225,201,198,206,194,181,186,192,205,198,194,181,197,216,224,199,224,234,199,203,212,185,224,217,204,193,178,208,187,223,192,208,207,192,191,206,218,180,192,213,191,199,217,204,221,218,207,210,222,189,191,207,180,203,205,172,202,201,185,205,190,188,207,215,196,221,204,186,182,207,209,205,198,204,193,215,198,194,179,197,178,200,215,185,211,198,199,205,194,213,208,234,187,205,189,188,187,193,182,204,194,223,199,198,199,185,211,208,200,197,208,192,187,208,179,192,222,189,197,208,192,202,182,221,203,203,198,229,202,206,203,195,202,205,190,198,196,215,178,222,202,212,187,174,179,199,209,196,194,219,203,185,217,194,218,199,190,181,194,202,185,163,194,209,206,189,197,216,215,189,203,209,208,165,174,204,203,205,204,202,202,198,193,220,209,202,204,193,196,198,188,192,192,203,165,196,170,202,193,183,175,189,175,196,187,195,210,202,192,171,178,195,201,188,186,187,187,214,185,214,175,196,196,187,195,176,193,212,207,186,199,212,159,188,193,170,160,155,176,187,180,175,147,191,208,149,85,141,241,210,131,134,36,115,136,43,170,172,159,180,211,119,107,123,158,166,182,150,123,174,171,195,170,107,189,172,126,101,101,73,110,58,64,3,143,217,215,176,179,253,190,196,157,50,37,16,8,47,157,132,77,76,62,191,253,246,207,160,218,254,249,246,116,29,60,118,131,138,68,35,71,121,153,77,88,95,99,98,34,117,124,101,76,94,87,117,104,96,78,68,58,52,39,51,28,23,11,13,50,107,157,134,86,46,45,8,35,22,13,9,37,65,77,79,53,62,52,64,69,7,26,66,60,77,93,75,30,30,6,22,39,160,139,84,129,102,35,33,34,97,71,47,43,18,43,25,11,33,5,40,22,53,23,5,11,5,19,43,35,44,38,47,96,146,125,133,82,100,59,36,40,1,18,15,26,31,16,30,19,12,22,26,36,22,16,6,17,28,52,48,32,30,61,75,74,56,62,53,36,39,41,28,25,48,45,7,45,31,43,20,41,17,37,13,16,20,47,48,84,74,46,60,57,35,34,33,30,36,40,35,50,25,12,42,10,34,20,12,21,30,66,26,8,24,21,14,18,64,122,37,92,102,116,73,60,147,121,43,36,29,26,28,7,8,20,44,36,22,22,18,12,0,53,49,15,35,53,53,42,37,46,89,105,122,103,166,91,34,45,16,42,38,8,36,11,19,17,41,10,44,33,38,17,24,52,21,19,46,27,10,44,18,48,9,90,104,99,110,108,65,42,33,22,33,53,96,94,95,81,79,83,61,45,28,20,43,25,36,29,15,22,51,27,26,53,83,61,73,18,27,44,57,31,46,55,33,40,58,56,67,75,45,43,91,195,218,201,205,228,238,213,224,230,227,231,232,201,223,95,16,7,8,1,1,20,0,27,2,11,12,26,8,2,14,2,2,1,12,5,26,7,5,16,195,201,196,197,170,182,193,197,225,216,208,205,210,202,199,201,220,187,181,200,181,203,223,210,195,180,195,191,192,185,213,201,196,195,200,201,194,192,199,211,199,193,189,199,216,194,196,209,196,215,217,190,181,190,209,199,219,204,208,215,226,199,217,222,219,212,187,206,215,208,208,196,220,201,203,196,219,224,219,210,193,189,207,214,208,209,208,210,218,197,201,228,189,176,202,218,216,211,192,210,189,164,216,196,202,210,184,199,208,223,183,195,202,203,209,207,199,186,201,203,203,206,180,202,182,216,191,208,200,203,204,197,216,207,206,205,207,172,209,188,187,215,210,196,184,192,184,202,169,202,203,217,211,211,184,212,203,187,190,183,203,198,184,235,192,179,206,193,209,204,210,199,200,223,201,209,167,184,187,184,196,183,185,205,199,188,193,199,202,173,225,195,201,183,212,222,234,197,193,185,193,189,192,193,198,198,198,169,175,175,187,197,202,185,204,217,203,191,194,179,200,184,199,200,209,223,212,173,196,183,216,211,188,212,194,185,214,207,184,195,207,197,201,185,179,212,170,202,191,176,187,180,202,183,232,186,179,177,206,186,204,220,212,198,220,192,236,221,181,199,186,190,195,191,209,209,169,182,212,170,177,172,188,189,174,179,192,188,207,203,189,195,210,189,210,177,180,196,193,176,188,186,181,209,187,192,164,149,165,151,195,228,212,122,73,168,166,136,94,67,139,87,10,103,187,130,137,126,92,148,201,190,140,131,143,163,176,165,210,106,168,208,194,132,72,91,80,95,56,45,36,160,224,200,188,138,183,99,133,104,56,11,4,21,36,47,37,52,92,176,239,212,218,180,205,248,241,228,189,42,41,75,149,165,90,13,67,115,128,92,46,70,70,157,77,122,165,170,128,112,104,109,108,99,107,95,102,89,63,56,43,49,14,34,30,92,159,134,67,21,20,47,16,23,20,22,27,44,75,96,66,86,58,74,86,93,54,26,30,38,39,40,111,78,28,45,5,60,164,127,113,141,79,52,10,99,107,40,59,38,41,12,25,24,40,5,23,15,32,29,30,32,31,32,40,45,41,36,49,151,137,123,123,94,107,49,49,39,32,25,13,7,46,39,25,8,17,28,16,22,55,38,12,18,23,67,95,36,35,70,31,44,68,26,12,29,47,46,22,27,25,35,8,16,40,28,29,23,11,4,19,36,45,34,58,102,54,17,73,36,45,63,8,28,36,22,53,9,42,26,26,24,30,9,11,39,12,22,44,18,30,24,29,38,83,99,29,58,78,104,89,137,187,91,29,58,26,9,14,20,35,31,18,51,43,17,17,18,47,21,31,38,44,49,57,46,43,74,100,158,130,102,119,85,39,29,38,13,21,21,38,27,20,21,47,53,1,18,48,8,22,46,18,24,34,24,17,11,38,61,21,93,123,112,125,108,92,32,12,10,21,29,51,74,111,87,88,80,81,82,80,42,38,47,19,34,11,14,20,38,51,44,84,83,67,49,37,42,31,22,30,42,62,61,81,93,44,49,62,49,156,232,211,233,219,214,212,223,238,232,227,232,231,198,232,105,12,0,7,2,9,11,28,18,10,31,21,41,36,4,5,15,13,14,3,20,13,24,9,12,212,189,218,195,218,212,177,207,205,199,221,179,211,207,177,218,206,200,216,205,184,193,213,213,201,196,208,217,197,206,220,204,190,210,191,209,205,197,175,206,190,193,223,188,212,205,207,197,209,210,198,195,204,185,231,181,199,187,217,173,180,208,220,208,225,230,208,197,233,195,216,197,215,182,184,209,192,219,163,221,195,211,199,191,205,204,215,199,197,183,191,185,217,201,206,186,204,215,202,202,194,201,194,201,187,197,210,198,182,210,209,201,207,212,193,193,198,197,185,199,199,203,187,208,214,203,200,195,190,202,190,194,181,185,219,210,203,182,189,213,198,197,187,182,200,210,173,195,196,205,192,198,188,202,200,211,200,207,211,189,235,195,192,215,199,219,212,195,218,212,201,190,210,214,213,207,205,197,221,189,221,183,171,211,209,210,181,180,199,178,181,200,200,186,195,188,208,178,204,196,162,206,218,194,177,200,199,181,192,188,204,201,199,185,185,204,191,185,185,186,189,198,206,184,213,243,196,195,212,208,186,207,191,172,211,185,204,185,187,207,190,208,206,199,210,200,198,180,184,196,194,197,191,204,212,196,199,214,185,183,197,224,190,184,170,217,199,204,201,177,182,201,193,214,201,205,189,194,195,199,177,158,199,197,189,165,194,177,195,200,211,209,198,194,193,197,198,202,206,178,192,181,127,114,145,150,147,147,155,150,212,220,195,190,132,34,100,118,102,73,99,82,8,86,162,138,80,80,159,170,215,174,128,174,169,182,157,138,191,144,161,213,193,162,81,103,76,78,57,23,27,145,209,197,178,87,140,115,55,29,8,30,34,62,72,57,35,54,206,243,227,189,179,196,253,238,226,230,65,19,44,123,128,118,54,49,88,109,89,51,49,25,71,117,150,171,222,158,116,119,113,111,105,82,64,82,83,94,56,35,34,16,52,27,73,145,142,101,44,5,34,6,1,45,54,4,35,39,52,90,64,55,58,81,106,56,39,19,30,25,52,59,64,119,54,33,30,98,164,111,96,119,45,42,45,90,87,65,64,32,22,12,7,26,34,37,52,2,10,0,28,21,43,41,14,12,67,63,58,110,108,78,122,95,75,30,55,26,30,16,7,27,20,28,23,24,18,19,35,20,11,46,49,15,11,60,82,55,30,53,18,41,14,44,19,48,46,48,41,33,44,27,17,13,40,66,53,46,30,8,25,47,50,54,83,115,82,19,39,46,41,20,18,28,22,43,34,26,37,39,15,37,21,17,31,29,30,13,4,31,22,78,61,39,115,114,38,57,96,89,89,68,82,43,43,42,31,10,7,20,37,51,23,25,22,20,10,40,29,19,16,7,35,40,85,60,39,94,141,162,169,85,108,104,51,27,39,20,14,26,32,6,31,27,26,20,32,32,31,17,34,41,19,12,27,58,39,25,50,73,26,116,127,101,108,108,96,48,16,10,37,37,25,64,91,111,107,103,65,78,82,73,77,51,12,17,42,17,3,15,25,30,25,65,61,72,62,41,66,19,62,56,58,64,57,69,84,39,52,139,229,242,218,243,247,228,208,228,223,223,234,208,233,226,226,97,4,7,13,19,5,17,24,31,19,20,12,14,5,3,10,11,27,4,3,7,2,10,24,5,208,213,197,197,200,184,208,222,184,204,211,212,224,189,166,195,224,204,197,183,201,207,224,201,226,207,197,210,205,193,198,173,191,196,208,205,201,207,216,190,198,215,202,185,221,224,212,210,220,208,192,228,210,199,212,206,205,222,200,202,227,199,204,205,191,204,227,229,240,201,228,224,233,192,203,209,208,202,208,223,206,222,208,177,191,191,206,177,221,223,188,197,195,191,211,198,201,199,221,192,201,200,191,175,201,197,201,192,187,206,194,184,226,187,184,175,217,199,197,165,216,200,191,177,216,202,191,219,195,199,197,203,203,191,212,195,174,207,185,197,213,179,200,188,211,189,214,218,210,200,231,188,187,184,183,193,204,202,206,219,196,208,201,183,236,190,195,195,192,195,185,192,197,187,191,184,192,191,197,191,195,204,193,174,200,188,201,183,204,207,213,184,219,198,217,190,196,209,191,175,191,200,205,186,198,185,198,210,201,167,187,186,172,199,188,164,206,200,183,202,188,194,185,211,184,192,199,206,196,204,191,196,198,215,204,198,195,194,215,176,188,200,188,205,218,235,173,192,194,211,204,202,179,185,183,187,215,207,192,171,210,194,188,211,200,184,201,190,200,194,195,192,177,197,192,185,196,186,183,185,192,160,188,225,193,215,202,211,228,196,185,206,202,200,189,207,205,176,185,163,195,169,115,103,130,131,127,152,143,195,200,197,233,220,159,99,5,79,100,77,77,79,9,76,102,88,63,99,205,205,196,136,156,202,161,122,170,171,149,114,161,161,191,149,62,119,90,76,46,22,16,164,222,210,185,95,120,119,39,35,0,104,192,72,75,90,61,105,241,233,203,162,212,250,245,246,220,116,42,50,111,86,90,52,84,106,131,73,66,60,50,8,42,97,152,217,216,123,118,108,92,104,73,76,33,72,79,53,66,59,31,23,14,36,73,162,130,85,6,17,32,0,17,47,78,31,39,57,71,67,62,101,101,109,66,25,19,29,11,3,37,34,87,124,59,37,5,123,143,93,115,91,38,37,96,125,101,106,68,27,36,34,17,5,12,10,6,9,5,12,2,26,10,1,14,15,53,68,36,78,71,65,143,111,59,25,35,14,33,3,18,18,22,44,20,7,42,36,15,24,16,39,52,37,21,67,72,39,47,42,22,54,19,16,18,33,53,44,37,18,37,35,15,31,37,35,37,70,43,0,64,46,44,37,76,141,46,30,60,30,34,53,40,40,36,45,28,16,14,4,22,26,14,23,49,20,29,46,31,31,42,51,61,26,94,93,56,65,84,44,23,19,3,28,39,39,17,9,28,19,38,47,29,17,2,17,36,51,13,25,15,27,32,55,80,68,84,124,136,130,128,100,109,114,51,32,31,15,33,11,32,26,63,32,20,37,6,19,32,37,15,38,42,33,44,54,75,81,131,120,93,144,149,107,98,116,111,42,57,48,28,20,43,87,117,121,135,120,81,90,104,80,75,47,69,38,1,27,11,35,42,33,21,32,46,77,61,67,70,41,77,58,77,67,32,43,66,43,70,212,238,234,220,231,233,219,222,225,200,215,218,207,215,217,222,117,22,4,8,32,18,12,41,17,12,18,9,38,3,8,0,16,13,1,24,4,26,17,15,17,200,215,184,191,180,188,205,199,198,177,185,192,218,201,186,218,202,187,204,200,188,207,222,220,202,185,197,216,193,194,198,195,211,215,223,186,212,208,190,182,190,222,230,210,190,193,215,214,209,210,198,234,199,213,212,215,224,201,208,201,194,189,194,207,208,188,208,212,218,193,212,190,195,198,222,209,191,200,223,202,205,201,197,208,219,227,183,178,199,206,191,215,229,200,182,199,187,212,212,206,189,197,214,208,208,198,216,214,192,188,196,223,220,189,221,206,175,200,183,207,214,201,220,200,197,229,207,209,200,196,192,198,169,205,207,200,203,208,207,226,190,175,198,187,210,207,189,201,193,188,217,197,194,212,201,209,184,173,201,191,203,198,202,179,214,180,203,200,210,194,199,194,213,197,196,208,214,196,213,200,207,183,207,202,181,199,184,201,198,212,187,208,182,185,202,202,191,205,186,193,180,198,172,196,186,200,196,182,180,185,170,191,194,217,215,197,188,201,207,180,194,184,196,201,204,180,184,184,193,191,193,198,207,199,208,187,177,200,212,189,206,217,188,200,181,181,220,165,193,202,206,191,218,202,186,197,209,202,200,192,195,182,200,181,205,210,182,188,172,196,205,217,194,186,195,184,165,166,193,193,188,180,193,212,187,191,193,172,196,181,198,194,187,184,165,198,217,202,187,164,168,150,159,160,157,188,169,156,176,197,212,174,216,234,239,135,34,15,90,77,101,82,60,59,74,117,126,148,148,169,208,146,192,184,124,126,164,216,152,106,144,111,191,149,80,108,85,72,54,44,48,167,182,176,146,94,105,88,42,48,8,183,137,84,72,45,150,136,246,205,126,200,241,248,250,219,106,49,91,86,107,47,53,88,129,154,114,80,79,26,36,19,43,101,131,140,141,70,90,109,82,92,88,68,62,67,54,69,68,53,45,37,42,34,119,158,121,45,10,20,4,33,21,67,78,34,28,72,80,108,106,144,147,135,54,31,21,21,12,40,26,70,99,103,48,19,43,121,146,103,100,75,45,45,80,124,76,49,55,40,29,28,24,21,2,44,38,15,7,38,40,9,18,34,18,10,80,75,18,68,42,66,155,105,68,50,51,21,19,38,24,16,39,11,34,51,44,47,12,22,24,28,57,35,22,77,101,29,31,51,34,54,36,26,19,34,43,26,13,52,6,44,52,23,26,5,10,42,29,75,48,23,19,44,100,118,52,38,88,83,83,61,79,61,85,78,63,22,23,5,49,57,47,34,10,6,24,44,22,48,26,30,53,50,120,94,35,49,63,31,19,44,29,29,46,32,34,14,33,60,25,31,20,16,33,35,34,20,29,48,15,43,60,86,111,48,94,144,119,100,78,100,118,78,42,33,16,15,27,22,10,37,33,24,25,14,20,33,36,57,34,34,27,59,77,88,102,129,186,154,130,135,112,131,120,132,121,52,12,14,39,50,50,71,71,113,92,86,95,105,73,90,107,69,69,41,13,39,34,38,31,44,36,19,47,37,69,79,40,71,79,57,47,46,47,23,42,23,100,198,245,235,238,214,243,226,237,224,222,229,245,220,232,221,243,108,7,11,10,21,7,2,12,6,11,10,7,13,6,3,15,19,2,0,16,12,0,8,10,0,190,221,201,216,215,202,195,229,194,207,186,200,195,190,203,201,195,221,189,177,190,197,189,228,196,213,209,217,203,197,182,190,201,211,208,198,204,229,209,192,216,203,204,192,207,193,196,204,194,202,199,202,215,214,195,203,204,201,217,197,212,166,207,222,227,215,218,207,229,203,207,220,196,201,197,194,213,202,230,219,192,181,206,198,175,188,225,200,198,176,212,231,206,221,208,204,188,168,211,181,195,190,211,194,200,223,199,201,219,201,195,208,181,174,194,212,199,198,205,193,193,206,192,193,201,203,189,174,193,178,179,195,181,197,187,201,197,193,184,192,178,180,211,224,193,188,195,209,170,202,192,219,216,200,210,201,209,211,183,175,208,189,216,204,205,201,204,190,189,201,194,190,196,213,210,213,189,192,203,204,188,202,210,191,200,200,177,196,201,209,190,205,196,176,169,185,186,192,194,186,181,192,178,211,204,202,198,204,180,180,200,212,184,187,190,177,193,196,185,206,196,190,167,212,171,190,201,208,164,208,180,175,196,190,190,204,195,217,190,204,189,207,198,182,189,184,179,203,182,209,206,217,193,196,199,174,186,199,192,201,171,193,198,191,182,212,181,207,196,210,199,197,186,198,206,197,196,181,184,204,197,174,176,197,179,191,194,181,211,195,201,183,167,179,194,201,196,210,187,146,159,158,167,161,168,146,177,151,206,187,176,215,182,219,207,212,119,34,46,60,86,112,60,100,116,153,130,138,143,189,165,158,146,147,131,151,184,196,130,97,168,161,214,128,100,131,74,91,70,16,29,133,117,72,149,91,69,50,13,23,20,83,81,79,62,138,217,185,201,202,167,248,242,248,219,90,57,71,117,116,48,49,57,117,147,124,96,97,22,9,3,40,63,89,72,43,35,34,27,78,111,129,121,121,108,66,63,70,83,40,26,22,35,85,129,119,64,23,24,9,25,38,69,41,66,76,56,59,99,117,125,114,84,76,45,14,43,44,41,26,11,75,147,115,16,14,23,142,144,84,121,60,23,28,61,112,82,39,15,29,16,23,15,27,8,57,26,34,41,35,19,10,40,31,26,46,77,86,21,63,50,52,72,25,13,32,32,47,27,6,21,25,28,28,31,14,26,45,35,10,21,55,30,28,38,61,105,38,77,83,40,63,65,12,70,74,75,11,22,25,33,35,35,37,47,28,17,49,51,61,51,27,24,29,113,188,44,36,125,118,167,178,120,190,130,76,38,25,21,45,19,50,60,44,24,34,7,64,32,47,34,14,36,70,131,95,66,51,67,12,22,26,31,13,28,36,30,26,58,8,19,8,20,14,41,46,31,12,21,47,17,64,27,52,91,59,40,76,82,89,82,108,86,66,46,19,9,16,48,53,31,31,24,40,34,14,26,42,43,11,27,39,75,54,99,148,157,144,125,107,65,59,97,110,105,104,109,75,18,24,33,56,38,60,12,67,70,125,123,122,110,118,98,109,70,47,16,13,16,32,17,18,17,28,21,56,56,87,116,84,61,51,17,20,17,33,48,3,141,218,252,253,246,250,235,218,238,248,213,227,221,218,229,215,235,117,11,1,6,18,24,9,45,20,42,30,36,14,18,0,13,10,17,11,24,19,25,3,0,12,217,171,190,202,186,220,215,219,206,208,191,204,189,224,202,225,217,190,212,189,234,200,182,195,191,203,222,184,206,209,198,206,192,200,176,185,213,206,184,186,198,192,207,192,202,171,209,226,187,238,182,187,204,224,211,206,213,209,223,208,203,205,201,232,189,234,226,217,215,221,222,219,227,190,193,219,192,226,197,207,183,206,216,217,218,212,206,203,191,186,188,208,187,218,210,213,208,215,181,192,204,204,218,191,204,192,199,201,194,170,195,212,202,226,210,202,192,178,197,220,200,194,195,213,187,207,175,182,207,165,173,194,177,206,211,203,197,201,196,186,212,206,175,180,185,206,210,183,159,182,182,185,199,174,197,187,210,186,196,197,194,187,186,162,195,194,207,200,191,201,204,190,193,220,199,192,182,208,193,196,204,196,202,179,188,179,206,195,193,196,189,204,186,199,217,224,177,206,188,190,207,198,187,223,189,213,191,195,194,197,192,190,198,170,174,197,195,196,183,197,176,189,187,187,200,217,206,206,172,194,210,185,196,210,207,217,196,208,184,200,211,203,185,225,179,195,186,178,158,206,187,192,194,205,199,198,204,192,186,215,191,203,198,202,187,179,156,181,196,180,181,211,202,184,175,189,194,168,196,196,199,187,162,197,183,188,194,183,184,216,195,193,176,215,192,198,194,204,164,154,159,124,109,125,123,149,126,156,208,214,173,184,181,173,198,215,179,116,119,66,68,75,34,128,139,106,134,125,198,201,186,146,142,179,173,169,211,154,109,121,174,202,224,89,82,122,64,89,55,51,41,117,119,78,103,69,38,30,4,50,73,69,44,24,110,212,255,168,161,179,218,244,251,225,114,65,82,111,117,70,47,82,77,131,111,62,58,52,31,15,18,41,88,122,58,59,60,31,21,59,97,129,115,129,128,102,97,83,46,21,24,29,48,115,146,117,56,10,16,16,35,72,39,48,47,68,127,131,127,129,67,47,33,37,11,21,2,15,34,45,38,121,148,63,27,18,61,170,135,101,115,79,44,27,28,105,107,45,23,43,39,32,45,27,1,30,14,47,64,26,5,26,18,29,81,24,86,80,5,43,22,40,18,25,52,23,44,18,22,12,25,22,16,43,11,38,16,25,38,51,66,37,27,50,29,121,126,31,91,134,121,186,140,114,179,132,50,48,43,11,35,32,34,31,39,25,12,63,42,40,48,47,36,51,170,159,39,26,71,83,155,130,104,148,98,60,33,20,7,46,40,54,28,43,36,23,30,19,49,42,39,3,19,52,139,82,33,68,66,39,39,29,9,48,37,19,29,51,41,20,22,23,36,49,56,19,3,54,14,39,73,35,40,50,82,67,29,59,90,61,161,170,85,30,44,7,13,43,17,64,56,51,6,18,35,46,33,38,35,48,16,64,115,64,132,154,154,139,107,112,59,57,26,112,111,103,92,84,26,52,49,43,27,34,38,37,61,117,131,126,99,93,95,112,78,90,29,45,28,25,27,10,11,20,20,16,43,64,81,87,66,16,10,8,50,60,28,36,180,218,247,252,237,240,243,232,239,212,234,243,206,230,218,235,219,135,12,13,5,18,33,7,14,4,10,11,18,16,2,7,1,0,7,13,15,20,27,5,13,17,207,228,203,216,203,203,211,194,221,212,201,224,204,219,192,201,194,193,198,214,185,175,189,201,207,219,205,172,207,200,209,206,226,208,193,188,183,190,224,202,201,210,209,171,187,221,235,181,194,184,227,206,207,193,191,203,207,218,204,206,204,188,202,213,198,203,216,203,205,218,212,221,201,197,202,216,203,195,198,209,227,211,202,195,208,227,189,202,188,200,215,211,184,204,203,201,189,206,212,210,196,207,183,192,197,226,211,208,198,200,206,186,197,199,205,183,162,197,200,196,184,195,200,207,195,194,209,189,189,191,190,195,201,191,177,205,203,196,204,210,207,193,183,203,190,171,210,202,178,204,192,200,178,188,189,190,216,196,217,199,187,208,217,197,204,203,206,193,202,192,207,162,193,192,182,203,182,178,224,204,201,189,196,189,179,196,187,200,181,177,199,202,198,190,218,179,204,202,177,198,203,191,197,187,192,203,210,187,206,214,188,177,204,194,201,173,186,188,203,187,212,200,192,204,184,193,197,195,180,204,193,195,192,182,205,192,170,209,170,186,197,185,199,195,201,182,203,202,201,175,191,189,186,186,188,183,188,203,195,195,192,214,196,220,200,185,204,190,195,159,188,190,199,175,197,190,184,164,204,190,183,200,184,192,185,200,203,197,178,197,175,193,201,191,171,202,184,179,136,137,148,145,142,164,178,149,153,154,197,191,171,170,193,176,214,203,184,168,159,80,78,102,84,171,173,137,143,122,178,173,141,136,167,184,142,164,155,154,135,130,159,191,203,95,94,105,61,99,66,30,74,140,93,66,94,34,16,7,22,41,54,88,40,103,200,237,243,133,185,249,246,247,197,103,53,60,104,109,66,54,69,125,105,81,104,64,59,13,4,38,58,96,112,157,130,75,54,59,16,46,39,61,94,120,114,121,123,91,101,68,42,29,56,131,176,58,28,11,0,52,78,63,33,20,22,10,68,141,138,95,53,19,20,27,32,37,25,27,19,51,40,76,61,26,56,21,83,179,129,103,132,64,37,20,28,59,39,12,13,34,36,14,6,16,16,4,47,38,35,45,12,16,19,64,32,36,118,85,41,67,28,21,28,19,35,24,52,28,18,39,7,12,69,43,48,14,27,4,34,57,39,15,21,24,33,127,105,48,70,117,109,176,113,134,127,90,18,9,24,55,50,42,21,52,9,30,42,23,33,7,38,61,97,29,104,143,52,41,43,42,17,34,44,43,26,51,41,32,24,40,39,42,0,10,36,29,51,50,39,33,26,26,27,66,146,75,17,83,72,39,63,51,53,38,58,44,27,26,17,31,17,28,27,34,37,40,43,27,60,17,15,45,30,73,116,47,15,46,50,102,105,92,43,51,48,45,23,31,34,14,31,61,44,33,39,42,21,49,30,20,60,118,76,109,123,88,118,92,114,107,92,50,53,117,108,102,90,38,34,42,29,32,31,20,46,35,32,78,122,106,79,90,100,127,121,74,29,49,75,67,40,18,21,15,6,42,38,47,65,81,37,71,41,28,11,29,30,53,202,222,254,210,182,218,231,242,240,221,196,231,222,237,222,214,206,110,34,13,28,30,7,17,6,7,32,1,3,30,5,5,0,49,9,10,25,14,36,31,19,21,183,200,196,184,188,200,179,207,193,201,205,194,196,183,199,187,196,204,207,213,217,204,209,179,203,217,198,173,206,206,212,180,212,220,218,180,206,208,196,208,205,199,171,197,197,217,188,209,195,224,209,234,214,218,189,194,209,200,231,216,225,225,234,194,204,219,216,201,213,216,212,217,177,202,218,216,196,229,188,187,213,222,190,199,195,198,213,207,193,187,194,203,195,216,187,211,198,190,211,202,209,207,196,204,184,189,177,186,191,174,211,216,206,209,213,206,204,192,175,204,194,207,203,200,210,155,192,203,189,191,230,190,198,188,184,207,196,212,195,202,185,207,205,188,199,225,221,175,197,180,171,213,196,204,182,180,206,207,195,214,168,192,188,209,176,197,214,209,211,204,195,202,199,178,199,201,188,179,226,193,161,200,203,207,181,202,199,206,166,211,197,202,197,200,186,221,190,181,197,211,182,183,194,194,206,165,196,184,195,203,200,203,175,219,170,187,206,197,190,226,214,198,190,209,182,195,205,229,197,205,197,183,201,201,193,193,199,193,214,195,175,179,199,198,173,215,189,203,191,193,214,209,205,190,194,209,190,206,196,176,205,193,218,195,196,216,183,208,226,197,191,184,198,205,191,214,199,169,198,215,213,192,167,194,187,180,166,202,195,188,183,203,202,208,180,191,178,210,134,159,173,132,185,199,223,161,197,161,185,183,169,170,196,187,189,199,131,176,164,104,87,109,130,245,150,128,184,161,175,152,120,195,146,151,136,167,152,174,156,118,163,189,161,110,83,107,81,85,52,40,134,152,117,78,69,42,22,41,33,70,49,67,103,196,245,232,182,147,252,255,240,195,77,72,58,88,171,119,52,56,101,128,112,82,72,41,34,12,53,58,101,135,152,160,121,110,103,66,45,35,54,22,48,106,131,142,137,139,154,114,55,62,62,132,122,43,19,4,9,50,59,45,40,23,17,27,38,72,125,108,82,30,31,89,109,54,22,35,28,30,27,11,27,16,30,14,93,181,111,103,126,53,11,36,15,47,42,19,34,34,43,7,24,25,19,19,29,3,39,27,30,54,68,31,17,51,138,72,19,51,28,37,25,24,36,43,37,34,37,11,9,13,17,33,17,20,26,25,35,50,26,22,27,33,69,98,88,53,59,43,35,39,34,12,48,29,45,29,69,58,23,34,31,12,44,62,58,3,24,40,89,76,106,48,53,114,48,48,41,53,57,52,22,29,34,44,41,25,38,16,29,42,29,35,25,69,22,26,4,28,47,63,43,51,127,74,45,164,132,136,175,91,119,156,82,46,14,34,14,33,7,4,10,24,7,25,29,56,56,35,28,19,43,114,89,58,36,74,59,38,28,26,39,38,39,41,54,29,12,22,25,47,68,54,27,25,13,45,29,63,81,119,71,55,129,104,90,129,119,128,84,45,33,65,102,108,128,72,28,61,37,33,29,13,26,51,46,60,98,146,111,95,97,120,105,96,19,71,140,64,45,51,26,8,11,12,43,22,52,80,62,66,22,48,46,48,32,16,117,143,138,123,58,142,164,147,157,142,152,183,228,228,213,214,212,128,0,18,14,20,17,15,19,21,16,8,35,13,17,1,3,27,1,15,3,20,23,14,18,12,213,214,202,205,200,171,206,191,200,218,222,190,208,191,200,194,206,219,202,229,185,203,198,210,200,214,199,218,206,224,196,193,196,205,203,224,197,221,204,207,188,204,218,195,196,195,177,199,218,191,195,189,172,205,189,205,214,208,202,219,242,186,193,204,198,211,199,202,172,208,184,197,192,215,194,204,189,182,205,203,193,209,208,219,199,191,213,198,214,204,209,198,208,190,189,190,183,212,202,193,204,192,178,210,205,198,210,185,200,202,212,197,208,158,195,181,193,203,182,215,186,182,180,185,187,195,211,196,179,204,193,222,194,189,187,172,204,211,187,192,196,182,180,194,204,184,207,227,200,190,202,192,194,205,185,189,194,191,207,195,209,198,201,187,214,184,195,206,186,193,192,215,192,215,183,200,204,189,205,200,197,204,194,197,223,190,195,202,174,220,172,216,187,159,219,187,189,182,179,168,188,184,209,196,200,182,227,204,177,213,204,167,186,210,191,231,193,202,170,195,189,203,196,212,198,179,179,194,211,188,174,198,212,198,193,184,186,206,179,195,202,180,214,216,196,170,172,209,206,192,189,218,196,191,178,208,198,186,179,185,194,182,194,162,181,185,189,182,184,195,196,175,183,192,188,186,182,193,191,214,189,173,189,201,193,203,190,208,195,207,200,172,192,175,180,197,200,204,137,174,187,166,176,180,200,129,215,188,180,207,189,214,195,174,199,177,149,165,177,92,72,73,146,237,137,122,143,93,160,196,176,169,138,138,138,176,166,169,135,116,193,186,214,124,117,107,110,99,82,68,139,83,64,105,37,18,44,71,67,78,56,77,204,255,255,164,199,162,244,240,210,98,42,85,106,113,129,93,67,48,63,118,91,61,27,25,34,15,36,91,106,129,106,125,119,123,124,101,90,66,22,30,43,19,59,139,114,157,162,155,166,113,56,20,26,46,29,15,29,32,56,38,35,40,14,31,6,29,76,128,77,31,118,195,132,64,28,15,38,34,54,18,41,32,30,42,127,148,79,110,130,49,21,22,3,3,8,22,48,49,13,3,6,9,31,36,12,10,16,23,45,59,27,22,12,26,97,81,43,82,28,57,69,42,41,31,39,51,26,41,48,24,10,7,30,47,26,23,28,24,24,63,65,63,77,75,123,23,13,12,65,37,14,33,50,56,85,58,31,23,39,52,32,76,81,50,79,75,75,75,133,122,116,103,112,128,43,104,68,54,61,47,70,77,55,66,81,30,54,78,50,53,35,36,46,46,31,26,35,17,50,44,12,29,129,63,10,135,93,141,164,135,154,159,78,57,7,10,31,0,47,24,25,13,12,1,40,51,49,18,39,36,51,94,91,40,55,67,26,38,37,29,29,30,29,61,23,26,35,19,30,36,68,58,38,34,30,26,44,89,54,117,76,49,101,73,122,136,151,149,53,54,44,93,129,83,114,74,29,54,65,46,18,5,17,42,29,36,126,160,145,136,114,113,109,79,12,117,199,84,49,19,29,23,34,34,22,11,57,56,48,74,41,27,23,62,35,52,41,40,34,49,15,76,133,138,143,113,94,88,155,196,200,186,222,110,6,12,4,10,22,5,17,2,21,5,6,30,12,38,2,24,12,7,34,22,11,15,24,0,229,186,199,180,200,208,213,205,225,194,212,205,237,212,207,209,206,196,192,195,200,201,177,186,210,169,198,214,183,212,201,216,211,181,182,194,195,201,230,201,193,191,196,208,204,211,201,194,198,182,194,201,203,202,208,194,205,207,232,213,204,190,214,200,223,182,195,196,205,200,202,184,175,205,189,171,205,198,191,188,211,222,200,200,195,188,213,208,203,205,204,177,188,193,195,194,183,215,200,178,206,196,171,195,175,207,208,180,194,199,168,202,211,201,189,183,178,193,174,227,217,181,186,200,213,188,169,183,184,214,177,184,218,193,192,186,220,224,193,220,174,199,205,193,187,177,212,206,182,166,192,194,212,153,200,195,202,189,184,221,199,202,206,191,188,195,199,193,205,189,200,206,206,197,209,176,185,200,204,208,188,184,197,196,168,186,193,196,193,166,191,181,217,200,194,193,185,191,201,200,190,197,205,207,159,197,218,206,200,181,187,195,207,199,180,177,214,202,212,171,191,183,193,171,200,175,196,190,215,194,195,195,189,190,186,182,199,179,190,196,177,195,203,205,184,196,176,214,221,187,205,195,184,204,188,171,213,180,198,194,189,201,211,182,192,211,187,188,182,183,176,199,201,165,187,212,193,197,163,154,217,172,189,186,199,188,197,210,174,160,154,162,180,160,209,203,181,195,115,143,129,124,143,152,142,129,206,208,199,187,179,184,182,196,214,195,156,181,182,46,45,65,115,157,79,62,117,147,148,200,170,146,156,142,207,184,145,190,139,171,232,182,178,131,93,145,95,116,57,88,104,40,82,78,59,19,57,76,78,90,63,184,247,242,206,189,246,199,255,228,108,36,63,110,127,107,74,54,60,88,68,74,70,37,25,25,12,40,89,97,115,116,107,106,113,101,124,116,140,102,74,50,48,44,19,51,94,157,182,133,145,149,131,72,43,7,11,36,34,48,47,30,46,21,12,19,6,7,28,121,120,119,161,153,46,31,25,61,88,71,39,34,30,56,48,47,152,136,116,129,127,60,12,7,32,35,7,35,22,39,36,23,17,32,33,38,31,26,16,40,48,24,27,21,9,32,140,84,66,93,57,77,72,64,112,74,60,38,34,54,35,44,10,13,19,59,51,47,24,11,12,37,56,144,189,141,113,55,20,23,37,62,47,35,37,56,65,26,56,52,68,128,99,119,104,97,118,149,182,186,172,199,166,178,198,169,166,207,182,175,175,163,146,155,152,133,132,130,150,162,158,135,112,97,81,47,67,60,40,58,51,41,38,68,140,40,64,42,58,43,38,54,47,55,60,100,37,35,25,21,61,23,30,21,14,49,32,28,36,54,19,28,58,87,99,61,67,50,14,31,59,16,18,14,28,36,20,11,25,18,36,14,23,48,33,4,30,37,56,46,77,141,81,36,59,55,59,116,111,89,39,22,32,83,122,88,120,87,30,43,54,32,32,24,38,17,48,59,70,60,55,96,102,92,88,44,83,155,166,115,71,50,53,22,41,17,55,51,79,70,77,45,73,18,53,44,44,65,83,53,17,28,65,95,101,79,134,79,81,20,73,171,206,220,221,125,1,16,17,46,13,19,12,31,15,29,8,23,15,5,15,21,12,23,12,17,10,0,25,1,190,207,221,211,216,202,225,206,186,203,203,214,193,224,213,197,197,179,191,206,211,185,219,185,187,212,197,196,212,225,217,193,175,195,209,198,194,183,197,188,212,193,198,218,160,189,209,205,194,189,206,223,210,196,187,190,226,202,197,209,188,202,201,190,214,208,197,223,194,204,181,186,198,202,204,203,214,181,214,165,209,209,182,195,175,203,232,201,209,197,214,203,209,192,203,199,201,225,190,233,196,174,186,197,210,174,218,205,179,205,215,194,181,172,187,202,210,201,183,182,176,199,191,182,188,196,200,205,189,201,207,188,164,169,201,200,196,195,177,184,193,173,199,195,215,187,204,188,209,214,197,201,200,195,192,189,200,206,181,210,203,189,209,207,208,191,198,185,185,196,170,205,189,186,213,189,194,177,220,196,214,192,188,164,186,180,213,198,199,226,209,186,187,202,212,206,189,173,170,186,188,201,198,224,154,184,197,192,194,170,203,206,189,208,196,174,216,205,177,192,211,192,205,179,204,182,225,179,202,198,190,204,188,221,181,205,215,208,170,204,196,186,185,195,189,188,185,191,184,183,188,193,201,187,199,178,173,212,177,202,197,191,182,210,197,181,217,188,218,186,184,210,173,188,170,201,157,197,169,195,209,221,170,187,216,172,196,182,177,182,181,182,209,185,189,200,218,149,93,109,153,140,178,149,144,172,207,175,188,178,191,186,213,191,180,216,195,206,183,135,92,126,80,97,17,74,195,180,168,155,184,164,154,185,185,126,176,191,125,155,190,115,175,127,101,137,58,73,66,61,81,6,37,58,42,33,65,56,51,83,152,250,250,215,137,220,252,224,213,117,41,84,96,134,130,77,82,54,110,66,60,65,67,21,6,29,42,83,92,137,131,95,96,122,113,98,94,95,95,125,118,139,65,35,61,57,33,89,115,159,137,142,128,136,106,56,59,23,25,10,18,26,23,37,17,13,41,20,27,65,120,117,178,77,23,6,41,76,115,102,65,40,44,26,22,52,178,131,110,132,98,43,30,15,26,34,29,12,23,61,25,4,38,6,14,10,38,25,43,32,27,10,51,22,57,99,175,53,104,156,131,232,116,161,185,99,68,22,22,27,35,20,6,8,27,42,23,41,14,8,23,56,46,167,105,84,120,47,31,48,35,36,43,52,63,58,128,113,115,129,173,185,217,218,168,206,210,211,206,201,200,204,193,192,187,180,160,197,195,167,181,208,234,223,158,235,211,199,201,209,197,202,152,158,171,147,165,164,145,132,93,43,28,117,139,66,32,54,60,38,37,54,27,40,63,34,15,47,22,14,11,20,26,40,61,36,40,8,13,49,42,70,40,83,112,35,27,61,18,31,44,23,34,30,40,52,32,20,51,35,31,39,23,9,46,68,54,43,36,4,84,137,74,29,55,36,56,87,86,41,44,49,23,95,120,100,124,125,55,13,25,52,33,16,35,12,11,16,19,59,46,78,87,99,102,83,143,167,174,112,101,90,62,32,11,27,42,22,64,58,94,71,63,38,22,43,27,51,66,53,60,84,97,120,119,99,64,74,87,119,169,187,213,201,213,120,15,9,25,26,13,11,2,30,10,8,24,0,19,8,3,1,19,29,25,17,2,3,22,12,165,191,198,212,206,184,203,193,198,222,213,199,188,195,207,211,190,200,192,180,207,197,198,186,197,183,203,207,197,203,225,222,224,209,208,206,187,189,219,207,197,185,207,186,174,188,222,219,183,193,212,183,208,199,208,217,218,196,213,210,194,179,202,204,208,217,207,196,177,215,198,196,182,211,210,189,189,201,178,196,207,198,194,213,195,202,198,208,190,207,202,202,198,209,187,201,184,211,212,220,202,181,211,194,183,205,168,194,201,196,199,213,197,188,195,212,177,199,187,199,189,198,190,187,180,202,206,176,183,214,201,199,195,174,175,177,191,197,200,192,177,202,206,188,197,201,177,184,200,200,197,218,206,219,215,210,171,185,172,191,176,222,190,199,211,180,191,203,213,208,184,205,200,204,231,191,194,196,186,183,186,208,233,209,191,189,208,186,194,185,194,195,197,203,190,174,179,175,204,211,211,210,206,188,183,189,189,214,205,179,189,183,204,206,177,200,182,226,201,191,182,178,174,185,160,181,199,182,191,184,187,190,181,192,194,205,210,175,216,206,193,185,182,181,177,180,170,213,186,188,193,184,177,221,199,199,194,189,196,207,182,181,177,204,176,176,212,192,222,194,180,206,186,191,196,166,188,167,183,178,188,222,177,189,159,177,166,186,177,182,181,169,204,177,198,219,203,144,144,167,171,185,175,177,177,179,214,193,197,166,185,174,190,188,182,215,184,199,213,131,125,146,134,73,38,141,233,199,145,165,167,185,185,187,170,108,183,189,62,167,168,134,199,114,101,108,62,85,61,71,37,15,21,71,63,41,71,56,56,146,233,252,209,173,195,225,239,186,99,57,50,95,119,107,48,34,114,102,80,71,85,33,42,5,33,47,63,101,109,126,118,83,99,132,109,98,110,87,87,84,107,154,60,27,40,36,54,45,53,108,130,179,169,142,130,134,156,91,52,37,12,10,1,23,33,31,1,28,51,42,98,122,142,78,31,7,73,165,139,151,115,64,62,36,43,75,157,103,116,135,111,21,15,20,24,7,29,27,28,38,39,17,24,6,11,8,25,21,59,27,25,6,31,63,65,64,122,55,59,75,55,152,75,62,95,46,35,40,8,27,9,35,21,52,29,27,6,36,18,54,64,51,41,78,60,127,93,29,49,38,47,56,77,134,142,180,208,188,233,215,239,207,199,185,182,190,181,164,173,147,147,124,110,100,131,120,120,118,135,113,150,164,157,154,168,166,174,171,181,150,148,174,186,159,166,176,176,181,197,180,175,161,151,162,152,71,78,51,38,52,36,57,53,15,43,48,13,51,29,35,47,24,28,48,51,29,21,13,18,44,78,65,39,84,100,43,96,74,39,109,56,51,70,52,49,21,14,13,28,43,58,19,18,2,37,49,76,20,15,22,29,131,62,43,50,55,27,39,41,53,17,15,32,84,147,127,117,115,53,23,27,32,18,25,39,25,57,64,47,68,26,64,119,143,132,96,69,46,52,66,68,94,81,45,28,21,18,78,62,66,85,95,60,54,15,43,85,58,41,89,154,134,125,109,108,103,63,115,161,187,217,239,219,225,211,154,8,2,25,14,31,37,23,14,3,12,12,10,17,2,4,10,14,0,7,5,17,6,19,21,199,189,224,203,168,184,209,203,202,206,224,197,205,177,219,205,181,177,203,190,185,216,164,180,197,191,207,213,201,217,203,215,182,197,218,188,200,222,193,203,212,210,214,179,200,198,213,189,190,203,201,214,215,201,212,191,210,178,184,181,208,218,205,209,205,199,207,199,218,192,206,215,199,175,182,205,210,208,189,216,186,210,177,207,203,200,199,189,214,215,186,177,210,177,182,196,194,186,200,220,219,205,206,208,203,183,169,200,205,207,219,202,181,197,206,175,193,192,167,187,190,197,189,188,180,181,189,196,176,175,189,202,215,179,203,209,188,185,194,191,187,176,172,174,204,182,182,197,201,205,187,191,182,186,195,192,185,204,173,171,188,183,173,183,192,220,199,211,196,204,187,185,175,210,176,200,165,205,188,192,196,174,198,200,216,197,177,169,188,182,189,193,191,206,192,192,194,190,205,207,197,195,195,184,206,183,199,199,178,208,183,183,177,191,183,215,188,214,193,216,196,190,200,208,197,195,192,189,201,202,167,178,207,185,180,202,187,193,203,170,204,192,175,189,183,169,191,218,188,178,197,187,191,201,195,205,195,204,194,187,183,185,170,183,190,202,177,209,194,163,185,190,190,183,196,171,177,186,214,192,172,172,167,164,153,184,172,187,197,180,161,179,190,188,187,203,138,96,159,165,158,168,171,157,176,216,207,185,178,197,164,177,189,192,195,163,170,203,198,132,138,213,153,123,94,165,226,170,145,160,176,169,123,191,173,160,215,146,66,154,171,190,226,71,121,123,107,107,87,73,49,8,28,93,88,58,70,38,154,240,251,253,155,161,241,241,239,136,29,47,86,145,109,62,64,86,125,101,53,69,47,41,17,52,39,82,107,102,135,106,109,83,119,148,113,117,96,84,114,102,110,125,70,100,66,29,53,43,25,35,40,96,149,164,129,121,149,143,140,101,54,20,32,17,30,0,23,57,44,20,54,102,147,71,44,22,14,68,2,31,104,64,31,17,29,118,164,114,126,147,82,31,29,17,23,40,10,42,41,70,38,15,55,41,36,18,28,36,38,31,19,28,52,62,66,102,115,65,23,24,26,44,39,51,46,22,87,48,15,20,30,32,74,49,19,10,34,64,71,95,116,93,88,160,87,128,128,146,181,146,195,202,196,209,208,212,224,168,201,170,153,159,141,128,117,139,127,147,135,124,122,152,144,122,148,149,157,171,169,155,146,156,147,163,190,162,204,193,168,164,174,178,148,136,153,123,120,112,158,153,174,160,169,176,174,150,198,164,134,137,76,62,36,31,49,39,41,33,17,40,29,21,37,25,32,29,17,8,33,61,18,58,59,96,102,14,93,108,125,242,108,131,155,62,50,29,38,30,8,36,8,32,25,10,37,60,55,32,32,26,81,131,88,36,64,51,20,38,36,22,25,22,55,88,122,95,117,124,59,49,16,39,40,34,13,47,77,105,61,55,46,112,159,145,81,55,36,34,32,25,36,120,99,63,47,11,41,35,53,23,56,42,34,106,171,144,126,127,121,152,149,98,113,83,72,89,129,211,231,237,224,242,237,211,233,132,11,4,23,8,17,13,17,33,19,16,22,15,22,0,20,29,12,10,10,7,18,25,7,2,221,206,204,211,197,185,200,180,183,214,210,224,219,209,201,205,180,222,199,218,214,185,194,205,198,205,218,198,226,205,171,174,194,214,192,205,207,185,191,181,217,196,208,199,184,198,164,224,183,220,205,210,200,205,195,198,188,184,196,165,205,208,203,199,217,208,221,189,211,223,188,209,216,194,199,209,203,222,194,178,203,214,174,194,218,202,192,196,194,221,203,206,200,188,173,190,175,209,193,205,195,176,194,195,218,222,190,187,218,186,201,179,210,186,180,184,186,202,196,191,209,188,180,193,195,195,192,186,198,178,191,194,186,204,200,189,183,175,218,204,202,218,198,187,160,195,199,187,209,188,214,185,184,180,214,190,166,214,196,213,202,178,202,205,218,193,196,191,191,211,194,203,187,215,193,200,191,201,189,178,204,176,180,162,190,182,187,187,192,178,192,216,195,204,197,189,204,188,179,187,193,209,180,188,188,194,181,180,203,200,189,178,184,169,202,193,177,202,198,194,182,217,205,193,183,207,176,177,191,187,179,200,195,193,196,168,190,204,206,188,191,189,188,183,187,197,200,190,183,187,220,200,190,195,193,175,221,185,183,191,206,187,178,197,195,195,198,164,196,204,187,185,188,186,181,197,190,218,185,179,188,156,162,182,163,193,168,217,190,180,214,192,153,172,212,218,176,107,126,123,136,148,154,150,179,176,198,188,184,197,183,181,197,195,189,179,207,189,221,132,161,148,121,91,85,147,205,197,171,155,173,151,174,177,181,170,172,120,47,164,155,179,169,80,149,150,110,114,101,68,19,35,140,210,129,88,75,83,239,231,236,188,131,231,249,238,170,46,62,88,109,109,67,83,107,122,117,64,73,71,24,0,9,30,81,105,111,123,137,86,103,84,113,156,170,101,99,75,67,89,112,115,86,103,84,60,56,17,39,40,36,22,40,113,110,156,129,171,169,179,158,99,92,48,29,16,12,31,18,4,30,38,89,111,78,49,15,47,21,73,126,74,46,17,43,139,153,92,128,107,64,33,41,73,42,49,92,88,50,53,50,19,30,31,36,18,20,7,36,37,22,70,50,20,29,87,134,49,38,50,54,41,30,58,51,29,40,51,33,13,19,58,39,92,67,82,104,97,131,139,160,162,185,201,182,189,182,216,224,187,192,176,152,180,136,132,140,158,152,140,131,159,148,147,159,144,157,148,141,133,145,139,122,135,109,129,98,124,107,103,89,73,87,95,118,111,114,160,127,150,142,153,110,176,142,134,181,168,171,154,151,144,133,149,146,172,177,185,178,194,156,167,173,120,114,102,70,86,21,32,68,81,60,18,10,41,33,45,27,26,21,35,62,133,93,66,76,87,144,195,102,145,131,65,58,45,28,27,6,6,14,32,22,57,17,8,32,45,70,52,107,175,79,77,70,34,25,27,27,42,26,29,37,86,141,95,133,153,84,18,23,58,58,45,60,49,71,79,140,89,84,164,148,84,33,8,42,15,35,35,79,111,131,85,48,15,37,11,49,60,63,113,156,207,194,179,151,156,141,126,87,80,51,99,134,188,236,236,210,236,219,208,217,223,217,126,11,11,5,4,24,24,8,35,21,9,21,26,7,4,5,15,23,10,17,18,12,3,27,7,206,217,226,201,185,207,213,195,220,217,201,197,244,235,207,206,214,199,208,224,226,220,199,220,190,202,211,217,216,209,179,198,184,214,219,187,186,210,192,204,206,180,195,212,172,208,218,200,188,196,216,222,199,211,186,218,179,186,196,219,198,201,215,207,191,196,183,226,210,202,220,190,203,192,184,201,224,191,190,217,187,188,205,171,203,198,209,182,199,175,190,198,188,217,198,198,202,198,192,206,196,186,203,201,195,218,193,198,207,154,188,178,198,216,208,180,204,193,187,206,193,216,202,210,204,195,207,197,181,192,190,197,177,200,210,180,206,202,196,195,199,189,177,177,191,189,188,202,217,184,200,198,197,176,200,193,179,197,201,200,193,183,192,204,189,194,202,199,198,179,212,190,198,185,186,184,176,211,220,180,192,200,177,181,187,201,197,208,203,202,218,191,192,181,197,193,205,187,199,197,204,214,178,187,195,194,176,192,152,195,195,200,199,176,195,182,195,173,197,185,188,187,203,175,198,209,203,198,195,199,168,179,173,196,199,191,194,181,200,196,190,189,210,193,180,183,189,173,194,192,176,176,180,207,204,181,187,200,182,203,209,201,170,189,190,186,178,214,199,199,192,192,191,201,200,209,187,179,168,156,148,188,186,196,184,191,201,190,205,187,180,186,180,205,191,194,146,102,139,157,152,163,138,138,211,197,224,203,169,202,175,182,193,207,175,178,167,182,198,147,110,86,62,46,45,69,124,162,126,128,137,182,203,216,137,192,165,140,64,99,163,173,143,102,128,104,114,124,99,21,14,61,193,188,131,59,118,200,228,193,138,174,217,240,234,145,52,29,132,115,106,88,58,138,122,131,65,73,40,12,13,33,22,91,119,112,115,81,107,87,99,98,75,136,111,104,109,66,128,131,127,157,79,76,72,39,46,21,34,12,37,45,43,37,61,107,150,171,145,171,154,165,157,120,75,32,17,37,8,14,15,0,28,73,119,79,63,81,98,188,145,69,29,21,52,129,154,95,127,113,59,4,37,60,42,30,47,32,49,47,20,21,21,13,47,38,40,15,10,12,28,55,25,46,1,45,130,24,48,45,80,68,23,57,48,51,52,54,36,55,74,83,119,122,142,157,187,202,190,200,185,211,199,185,185,185,148,152,167,124,120,115,119,132,131,144,129,129,110,140,147,132,144,97,95,74,51,58,36,43,33,30,55,48,34,54,15,36,29,18,36,33,29,28,22,25,52,20,49,24,41,33,61,86,88,110,90,138,113,173,167,145,135,143,146,131,123,136,149,161,147,172,179,190,163,143,162,163,170,148,132,113,104,71,65,21,52,69,31,39,23,27,61,125,109,61,58,35,58,50,48,41,37,9,43,59,51,32,25,16,20,36,70,19,29,11,25,14,91,73,101,155,80,36,95,38,62,69,22,17,37,33,55,110,120,87,95,101,69,39,17,33,56,81,59,48,21,57,115,98,173,188,114,64,21,36,42,24,25,52,144,187,96,47,42,32,17,87,238,200,212,189,160,152,150,147,131,111,101,75,58,94,133,202,237,218,244,234,228,228,235,234,237,223,221,117,0,0,1,24,14,14,10,16,7,18,30,36,8,4,11,8,9,13,24,13,21,10,25,29,211,186,203,188,192,210,200,197,206,171,207,223,203,202,216,221,186,199,221,202,207,189,207,221,202,205,202,189,217,202,192,188,171,201,182,214,213,201,203,205,194,216,206,199,203,204,217,209,189,191,208,201,209,222,194,219,228,222,182,211,188,216,207,190,199,191,219,203,176,210,216,202,203,195,234,200,200,206,202,204,194,191,169,196,201,201,182,187,208,184,223,190,216,195,187,199,215,239,197,178,199,217,206,195,176,217,196,214,179,195,192,204,227,204,204,195,173,196,199,188,209,202,191,213,185,179,196,210,177,181,211,200,195,174,178,197,199,208,194,202,183,189,193,176,168,199,194,197,200,198,174,209,185,204,213,194,187,192,201,189,197,169,195,197,201,192,182,181,173,197,215,198,179,197,199,194,175,191,187,186,212,208,195,189,202,191,195,196,205,218,187,201,185,195,204,176,173,196,172,190,182,221,194,209,209,194,198,202,193,210,183,172,161,193,184,196,202,163,188,220,168,208,197,190,194,186,169,200,193,208,207,206,186,178,177,183,191,210,171,174,189,195,196,168,202,194,189,177,199,188,182,179,188,204,171,191,185,197,211,209,207,202,182,205,186,198,200,187,195,202,190,187,221,202,196,187,143,155,173,159,185,197,184,203,213,185,208,199,188,212,176,180,182,216,189,178,143,152,182,174,191,166,150,209,205,214,203,218,170,223,191,181,185,186,207,206,183,161,175,120,87,67,97,70,61,61,99,127,135,121,159,165,204,146,119,185,185,167,58,71,140,161,198,119,132,112,90,111,99,39,24,21,92,103,50,114,240,220,220,141,177,229,228,245,150,60,62,49,120,133,58,64,88,124,110,95,79,51,4,21,46,39,57,105,128,100,116,85,82,88,135,127,88,71,103,81,86,95,141,137,151,118,81,21,35,64,57,65,44,12,11,13,42,39,37,44,58,102,133,143,166,154,144,160,177,160,116,49,19,34,28,13,24,47,70,90,113,132,176,156,59,40,38,27,66,162,140,110,122,92,35,19,23,23,21,20,11,25,49,37,49,38,27,8,29,31,32,14,20,54,44,34,34,28,43,54,121,78,28,20,24,23,33,27,35,13,78,72,93,106,152,205,203,191,208,210,227,207,213,188,164,156,144,133,134,106,103,90,138,129,133,137,129,146,124,131,116,70,58,39,29,41,59,5,25,29,20,1,2,22,15,29,34,18,12,40,15,27,18,11,13,22,31,32,30,34,32,39,40,12,5,22,44,2,43,24,50,25,56,27,62,81,103,87,126,125,113,130,135,164,160,175,149,144,152,161,161,172,166,151,200,168,179,139,150,120,123,99,71,24,17,42,66,125,144,37,41,39,76,49,24,35,28,32,53,50,35,43,37,13,50,50,7,22,21,32,14,32,61,63,133,157,57,67,94,65,110,91,25,69,76,27,75,119,134,123,104,111,98,22,56,55,60,62,34,60,10,56,154,134,158,114,74,77,11,20,25,38,41,69,131,104,87,16,32,42,27,140,251,242,220,173,130,114,139,106,110,88,75,73,118,195,251,249,227,235,240,237,200,201,234,241,242,226,217,114,10,0,2,9,38,18,26,0,26,11,0,0,20,4,13,14,11,31,17,23,2,15,13,3,203,204,199,197,185,219,207,172,204,184,223,217,196,207,198,233,225,222,218,199,196,227,195,204,219,195,211,205,206,215,214,210,191,197,198,210,218,193,216,187,227,168,183,195,222,184,194,198,207,202,208,212,192,218,215,215,200,203,212,205,208,231,221,192,198,185,221,205,191,199,205,198,210,201,192,195,202,203,202,202,206,222,222,222,210,196,209,188,209,186,200,190,217,204,194,200,183,208,192,197,210,197,184,197,200,200,206,188,202,199,213,204,189,193,191,224,180,184,193,174,207,193,181,177,209,182,189,197,181,194,206,214,183,216,172,173,177,195,191,181,188,207,215,190,172,216,221,187,172,226,150,209,199,192,181,187,193,171,191,200,214,177,176,176,183,182,184,215,199,184,222,198,182,185,205,179,173,194,193,193,200,187,194,214,205,213,202,193,178,197,213,188,202,207,193,196,181,229,181,210,199,196,184,182,193,220,188,196,195,175,189,201,197,211,171,210,177,194,157,192,178,177,201,195,209,195,197,204,205,176,203,201,210,196,190,200,197,184,203,191,197,202,194,182,210,182,204,186,184,211,187,194,172,210,195,174,190,197,172,212,211,180,196,216,182,200,207,190,201,183,163,209,188,174,168,176,186,177,210,209,182,174,198,181,205,188,191,190,208,197,194,195,214,195,176,192,143,165,208,195,212,186,188,215,189,230,214,186,164,201,192,193,206,182,199,209,182,181,159,91,45,91,122,84,87,69,89,154,170,128,186,175,153,139,198,187,211,156,70,116,211,178,177,130,90,131,97,119,84,61,64,90,107,80,59,148,248,227,156,159,230,249,247,151,19,54,101,72,118,80,63,104,141,119,60,62,37,17,15,22,53,55,75,126,135,111,121,80,125,132,133,118,33,39,73,72,106,99,110,94,113,96,60,31,71,69,81,114,46,31,18,22,32,26,33,33,38,40,75,96,169,173,142,136,157,184,178,163,123,99,51,45,22,37,13,38,46,36,64,61,22,25,29,25,84,157,123,106,142,86,10,20,27,32,60,50,39,62,70,53,22,47,38,15,33,15,32,47,60,4,21,23,28,28,26,124,125,57,49,36,39,49,55,63,77,97,115,167,192,203,196,162,200,193,158,161,174,160,128,135,116,132,154,135,138,134,189,151,119,109,108,78,71,33,15,31,22,18,26,40,30,33,11,12,8,21,22,20,34,28,13,22,22,14,36,5,37,44,12,28,22,37,53,20,33,16,54,32,13,13,11,5,40,31,41,26,15,7,15,18,29,17,48,8,25,70,71,85,78,83,122,133,120,157,153,175,156,153,129,154,159,133,147,164,169,177,184,148,148,110,116,124,91,120,57,35,51,64,64,86,89,57,55,43,62,14,42,32,26,60,22,18,22,15,22,38,54,51,17,9,120,163,51,112,168,116,216,147,137,179,65,23,76,163,153,112,116,116,113,32,59,58,61,35,47,51,13,128,175,165,135,95,45,35,11,22,12,35,21,21,38,60,13,15,13,38,40,145,210,161,174,128,111,123,98,126,111,130,154,209,246,248,252,255,230,235,225,250,236,231,225,241,199,211,221,130,0,9,9,6,2,17,17,4,44,27,18,41,15,12,8,43,15,23,3,18,1,25,16,7,216,212,209,181,234,234,213,208,183,207,217,210,190,213,189,200,217,182,177,212,206,203,217,225,225,220,230,193,224,226,208,234,232,210,198,180,188,199,207,231,186,189,205,189,179,186,201,180,208,192,211,181,178,205,234,216,219,200,205,215,207,228,208,200,220,223,209,213,222,217,180,214,215,207,206,208,180,195,194,191,200,219,188,174,192,182,197,215,209,226,210,215,223,192,217,193,187,191,203,200,183,181,174,194,213,201,200,205,197,217,190,206,217,187,195,188,204,205,203,205,189,207,178,164,200,178,189,191,190,185,206,187,208,209,181,209,226,204,212,199,199,200,213,197,187,194,203,219,203,187,179,186,189,207,195,195,204,197,213,170,197,191,206,177,198,202,193,179,205,183,225,185,192,191,205,190,169,201,194,203,179,178,208,203,221,173,180,196,175,179,201,187,198,184,184,195,191,196,190,197,193,200,184,181,196,170,200,204,207,186,180,186,189,208,200,190,179,201,185,186,205,209,178,176,175,199,207,181,210,203,205,201,208,194,168,195,181,190,200,195,193,197,159,179,190,178,183,205,173,193,183,209,191,173,185,227,183,194,209,214,196,184,202,182,190,202,182,189,185,186,191,168,181,181,199,190,196,182,225,206,209,197,160,185,209,197,221,186,174,204,181,188,203,207,154,153,96,133,164,205,160,150,168,206,194,212,210,207,194,210,186,183,204,214,190,202,193,180,143,138,68,78,92,88,71,57,117,156,209,171,158,177,152,199,210,165,170,162,60,207,220,179,181,124,109,138,64,93,46,148,150,198,226,91,70,121,202,143,169,213,245,240,164,82,79,117,106,76,62,58,72,144,114,91,83,39,20,16,38,66,74,79,129,131,112,125,112,110,100,141,145,96,24,52,100,82,99,75,122,121,151,125,61,92,116,108,112,100,31,23,9,13,42,53,52,54,39,16,19,25,70,98,113,146,148,131,140,164,199,152,131,121,67,68,37,27,16,41,3,19,11,39,31,29,86,157,132,103,147,77,33,8,24,38,21,75,105,104,81,43,54,38,13,5,21,38,37,49,63,27,31,43,68,89,111,139,116,43,74,104,107,146,155,175,196,206,200,179,194,152,181,139,137,133,132,129,127,137,116,124,144,126,138,132,120,82,53,53,47,22,20,2,24,50,18,54,31,26,27,23,27,21,11,25,21,29,28,16,35,13,7,1,21,12,22,30,31,14,34,20,30,8,32,33,47,30,49,46,38,18,38,7,6,25,27,28,23,4,8,21,30,47,41,20,18,34,13,22,29,37,34,59,66,69,103,146,126,154,151,161,138,150,132,134,125,143,148,134,163,165,141,154,164,163,99,50,45,26,48,49,43,52,35,42,73,66,37,21,44,56,59,23,7,19,18,56,42,32,9,25,73,134,60,99,83,73,139,115,98,103,62,57,84,158,124,128,112,109,92,33,46,71,14,27,30,28,61,158,169,83,82,45,28,35,13,31,27,7,34,34,39,23,47,28,54,66,106,156,109,108,99,61,102,124,158,142,138,196,234,250,226,235,234,255,226,245,245,208,246,206,211,228,217,238,239,110,4,6,11,10,0,5,8,9,11,19,29,11,7,5,9,11,19,34,11,24,5,13,37,23,228,197,200,188,200,222,193,209,222,185,219,205,207,211,202,206,221,204,211,205,206,215,215,194,210,205,198,205,192,217,221,227,213,186,196,198,206,210,211,219,198,198,189,206,197,196,184,209,198,193,208,197,229,187,210,212,182,211,199,226,178,193,205,211,184,179,193,197,209,218,224,193,182,206,191,195,228,211,191,186,221,209,208,190,229,210,178,206,216,198,184,206,206,217,181,212,219,203,201,197,202,179,211,192,196,182,207,218,190,192,205,224,202,208,192,215,191,204,184,205,191,180,183,181,203,203,180,197,168,184,206,192,195,170,195,187,190,217,188,183,176,190,205,188,220,189,179,211,188,184,200,193,194,201,192,184,207,169,173,178,174,198,186,194,207,157,216,190,183,196,188,187,189,204,173,193,194,195,175,224,190,196,174,183,173,203,194,185,211,204,225,192,181,187,206,190,190,192,166,196,202,197,187,190,192,191,205,192,218,169,184,205,195,189,179,186,195,187,204,176,183,191,178,209,198,184,201,184,209,194,193,181,197,205,194,198,197,207,201,195,198,210,181,188,182,219,187,210,192,175,197,167,208,187,165,189,189,172,185,199,183,189,194,195,190,203,194,172,196,187,182,194,190,195,192,207,204,205,211,180,224,191,173,199,206,205,198,200,187,202,191,194,206,194,169,130,78,112,141,143,117,164,195,229,216,181,208,197,178,208,181,168,206,204,205,193,203,149,164,180,158,88,114,113,107,96,135,226,200,107,160,160,182,214,157,94,220,168,100,170,184,160,208,115,85,125,47,76,57,147,211,227,192,78,72,85,111,102,182,241,238,206,72,60,75,115,99,56,73,76,140,132,97,59,38,21,16,23,73,77,124,127,136,114,126,106,94,116,112,137,136,57,21,94,133,91,115,106,138,124,111,86,81,62,86,65,78,71,49,34,15,22,57,70,77,49,46,33,17,60,20,37,47,92,97,115,129,127,178,131,162,173,158,145,120,76,46,24,15,24,10,30,53,2,107,154,69,117,104,64,21,19,15,22,36,75,111,69,60,56,38,43,40,15,31,39,61,84,96,82,90,136,139,147,153,126,165,161,190,218,210,234,203,206,228,177,150,133,144,145,145,143,140,145,112,155,156,110,112,103,68,45,48,26,26,44,24,41,23,14,14,38,79,50,46,45,3,28,18,10,8,7,14,27,41,23,22,15,12,14,22,23,24,8,34,31,51,36,31,44,42,21,17,17,56,51,64,52,41,47,10,18,9,41,8,33,3,20,25,10,11,18,46,27,44,36,45,42,40,40,46,2,36,27,34,35,53,83,71,63,121,126,130,145,117,158,119,144,137,137,148,160,149,162,141,112,117,77,85,48,32,28,36,41,88,59,30,19,66,63,45,45,18,32,64,47,32,75,35,11,107,135,46,56,42,14,40,30,51,45,17,32,49,130,151,101,112,96,92,56,38,44,45,32,48,72,138,172,74,73,30,27,10,31,17,31,14,41,27,18,45,24,54,128,143,155,198,140,120,89,136,104,110,125,123,111,140,210,249,242,255,249,250,218,215,225,206,223,208,233,226,234,218,235,208,150,20,1,21,19,10,18,10,16,14,26,7,0,18,17,1,17,13,7,7,20,14,12,8,17,204,219,197,196,183,212,186,211,191,193,187,197,200,193,210,210,199,206,215,213,202,206,197,219,214,220,227,205,206,193,176,214,198,197,197,196,196,188,235,189,190,232,203,207,215,210,198,200,208,199,175,192,199,204,210,203,209,206,223,197,206,219,190,205,194,221,209,197,225,205,198,205,198,203,188,227,228,201,188,198,208,227,201,181,209,189,204,190,181,211,194,208,198,193,176,193,194,187,207,196,200,176,179,210,207,184,210,230,190,202,172,201,186,210,203,208,174,204,176,208,194,192,193,185,199,201,213,209,208,205,217,198,192,193,175,184,193,203,185,194,186,197,182,216,207,177,167,175,196,183,206,214,173,198,199,198,174,213,182,194,190,204,197,186,215,189,189,170,199,206,166,200,182,169,214,199,196,192,220,198,196,183,185,202,196,207,198,187,200,184,168,169,193,199,185,183,191,188,213,192,195,189,175,210,199,207,191,205,193,194,209,204,182,199,197,187,194,178,193,227,189,206,205,225,194,177,191,196,198,207,210,204,182,199,192,209,193,184,205,206,195,200,185,216,199,206,191,185,181,192,194,183,213,223,164,180,184,193,169,198,188,192,199,201,205,184,169,176,192,183,197,171,216,211,206,206,192,187,202,195,196,200,183,190,188,205,207,181,195,201,186,185,204,170,189,140,127,175,186,169,173,177,201,219,168,201,201,208,179,218,193,198,198,197,190,218,185,152,162,246,162,134,131,153,129,103,103,177,160,123,180,145,166,178,155,164,229,123,56,136,147,195,237,129,116,114,105,68,43,176,218,231,136,17,26,54,117,158,215,227,201,106,47,97,97,110,70,75,123,124,102,117,101,74,30,34,45,46,115,103,125,109,147,106,97,102,117,114,100,106,108,24,29,148,124,79,109,75,107,91,66,111,75,58,59,81,21,42,30,55,10,56,31,66,63,91,62,64,24,27,41,53,47,23,54,32,99,105,126,134,144,141,145,162,183,209,157,135,101,79,46,54,45,28,123,135,107,128,118,48,4,14,12,20,52,111,119,61,38,44,47,55,72,77,88,135,145,150,153,145,166,172,153,190,175,196,164,176,193,156,173,148,148,110,131,146,120,135,115,127,115,136,126,96,113,72,57,27,21,29,23,19,29,29,10,28,29,21,0,19,31,49,54,52,54,40,11,27,10,13,19,16,20,2,2,68,41,23,56,22,16,25,30,24,62,59,74,70,68,56,56,33,37,62,62,69,65,70,65,60,28,10,5,13,11,5,15,23,2,14,23,5,6,8,5,3,8,64,81,34,47,45,37,29,23,35,41,29,49,41,46,40,60,80,105,132,123,131,145,149,134,132,118,162,139,162,159,150,144,107,95,83,47,68,66,82,72,69,61,39,39,52,71,48,10,12,23,67,45,73,140,150,68,56,64,46,48,24,77,66,25,48,53,95,139,98,105,126,105,38,11,44,39,57,98,103,126,101,55,49,26,38,10,10,26,47,42,60,70,78,128,150,163,155,143,169,145,99,102,149,166,89,105,95,52,44,28,56,133,186,234,229,224,229,220,218,239,237,226,203,215,215,211,200,224,105,22,13,23,8,21,13,26,28,19,12,24,35,17,20,0,17,24,0,23,18,25,29,2,17,188,196,209,209,196,200,186,188,205,192,227,209,211,175,230,213,206,230,206,196,190,204,231,213,227,185,205,193,180,183,182,217,207,201,186,190,184,186,212,209,213,183,191,210,214,188,208,202,217,206,189,205,201,199,203,210,195,201,188,212,193,214,206,206,228,203,198,208,191,214,197,202,206,183,194,210,217,215,231,181,194,211,179,202,201,206,203,209,199,180,212,228,177,194,203,186,190,170,185,208,203,206,216,190,189,182,186,210,167,202,191,191,199,222,183,194,204,182,203,205,186,191,201,229,190,206,191,200,166,206,207,176,187,220,176,189,187,181,194,192,178,219,211,180,199,201,187,180,202,151,180,186,210,207,197,201,215,188,182,207,210,198,203,194,169,191,182,217,186,175,193,204,194,184,217,207,197,174,201,194,200,178,190,187,201,196,210,186,225,178,204,203,196,196,188,189,190,192,176,170,178,203,198,182,181,203,178,205,202,186,199,168,198,198,182,197,174,180,189,190,200,192,189,173,178,187,180,176,206,216,203,175,185,194,195,203,181,179,196,192,199,170,173,184,187,164,204,209,221,197,207,200,190,180,202,206,200,203,182,193,211,216,194,194,155,166,206,187,189,209,210,210,209,207,190,199,190,199,188,181,179,185,189,175,207,224,218,186,198,188,201,187,195,187,176,128,163,203,216,144,131,196,233,194,207,210,228,176,191,203,191,159,196,169,201,234,209,145,201,212,170,98,124,135,90,91,54,142,146,201,176,103,136,207,151,193,235,68,80,126,162,236,244,148,81,123,122,71,16,139,241,253,140,19,22,153,184,148,233,191,100,68,59,109,126,85,49,59,157,124,63,113,65,30,15,27,40,72,105,131,131,139,117,102,125,113,114,93,75,53,51,12,68,148,129,108,101,88,109,103,118,111,102,118,89,90,9,15,6,29,24,59,27,29,33,35,54,50,37,35,17,39,22,16,5,11,28,36,63,79,116,122,123,160,157,163,180,183,200,171,163,149,139,115,164,150,102,123,65,55,41,14,59,45,67,60,72,38,41,76,81,105,142,121,143,198,180,191,194,186,182,190,165,169,186,151,154,118,91,104,113,147,132,161,164,147,132,109,82,92,67,64,27,30,11,41,29,47,16,10,18,3,42,12,34,21,29,32,26,41,36,32,21,54,61,25,13,32,23,20,37,28,20,32,42,48,97,73,57,57,26,22,23,69,75,98,89,85,77,68,76,71,96,104,133,137,83,57,84,80,51,14,17,2,26,70,62,53,24,50,48,11,3,8,6,38,61,105,63,57,39,19,38,22,14,34,33,36,26,54,10,20,6,26,62,67,70,114,124,134,125,148,143,130,123,135,136,135,150,151,160,165,116,122,148,124,143,107,57,52,24,101,94,57,46,22,36,40,82,79,121,155,43,43,44,46,70,58,48,50,43,39,34,104,138,135,108,112,120,52,33,14,40,52,58,71,45,22,22,55,20,46,17,46,57,50,111,165,199,166,172,148,150,114,110,138,140,134,141,108,142,106,52,52,39,16,60,56,19,60,147,217,223,239,224,202,212,227,195,226,226,218,213,238,213,109,1,3,18,14,15,8,17,7,4,10,13,5,9,3,12,21,15,17,11,19,3,16,23,44,184,208,212,191,201,221,212,208,213,212,186,199,202,199,201,201,192,192,208,213,187,197,201,195,240,221,197,206,215,197,208,201,210,200,224,199,201,202,194,202,198,202,195,184,197,208,193,195,217,215,208,192,207,174,200,179,189,182,213,208,200,187,209,186,213,177,210,192,207,230,209,197,201,183,202,209,213,196,197,174,200,192,171,187,199,171,211,192,227,183,207,196,184,221,208,194,213,223,193,201,198,190,184,190,198,222,175,183,182,201,176,199,174,165,202,192,205,217,199,179,196,203,208,187,196,205,199,210,185,170,202,189,188,209,207,194,195,190,198,181,184,184,167,187,163,183,207,194,202,191,212,192,195,210,200,198,197,188,181,214,191,194,199,181,175,184,210,196,166,206,187,186,207,181,185,184,196,229,184,192,194,204,169,182,202,187,198,199,208,202,187,195,196,174,193,185,189,189,204,188,193,203,190,204,204,208,179,190,194,213,166,184,217,203,179,181,183,188,184,196,170,199,195,194,192,206,202,198,213,199,185,197,212,209,167,209,209,196,219,211,216,207,219,200,185,221,205,202,186,204,195,197,197,208,232,225,193,206,213,196,181,179,195,195,160,193,201,208,194,200,203,209,194,192,191,199,228,197,198,160,214,187,190,191,185,207,184,201,202,194,205,217,178,111,144,113,129,159,174,133,142,192,218,169,193,180,182,204,162,202,207,150,215,183,190,201,170,178,186,157,189,106,120,112,124,96,60,94,152,210,159,105,173,170,165,195,191,91,65,116,160,216,232,125,116,123,95,51,13,101,225,255,177,130,183,250,214,231,234,91,56,65,117,150,55,60,99,81,133,117,75,68,20,44,31,53,49,102,111,123,123,123,89,133,105,110,107,93,68,22,35,9,75,149,104,82,95,113,146,139,139,122,98,139,86,68,64,28,58,55,85,55,41,25,36,15,35,42,9,18,8,42,50,40,26,25,39,31,36,27,59,83,103,126,120,142,149,125,117,159,181,195,203,179,146,111,114,130,78,66,67,111,148,162,139,159,180,160,171,167,179,181,187,190,182,174,205,177,158,178,131,130,121,129,102,125,115,117,143,145,166,149,137,89,92,59,53,50,12,38,36,70,25,4,22,6,6,6,4,15,31,32,31,9,3,27,8,17,58,61,43,38,97,79,46,23,13,9,5,20,7,28,10,21,83,87,46,41,80,82,97,33,59,83,97,149,152,129,109,91,96,120,137,143,167,113,143,88,80,128,79,38,19,50,102,148,134,74,54,51,28,29,27,17,36,59,80,67,79,89,51,12,13,23,20,34,50,1,10,16,11,19,37,42,5,12,2,6,25,40,47,70,88,105,124,120,158,108,151,153,143,179,136,157,152,139,162,148,168,119,156,122,139,86,64,41,31,84,73,51,122,142,56,23,39,10,19,54,47,60,64,35,41,86,138,143,93,98,126,96,27,19,9,31,12,43,26,36,39,21,41,73,91,105,185,161,188,185,162,133,125,134,121,156,144,145,136,98,52,51,153,122,49,43,21,24,34,46,39,13,71,173,225,235,222,215,238,217,223,200,230,215,212,224,226,125,4,5,19,15,7,8,13,20,21,31,15,1,19,3,17,21,6,20,17,8,23,7,32,25,194,218,205,215,209,218,237,176,215,207,176,214,211,191,206,211,217,189,194,206,211,188,183,202,200,187,204,205,227,201,186,234,192,227,197,221,203,215,200,184,196,209,222,201,217,194,190,234,207,200,182,220,211,190,198,186,192,210,204,201,209,182,213,208,207,222,182,229,188,223,182,202,199,221,188,181,211,197,190,212,195,205,201,230,235,208,192,191,217,203,184,211,205,195,184,199,231,184,213,192,195,214,190,200,203,195,199,201,215,196,212,193,197,189,199,206,167,207,198,198,200,185,195,208,184,213,221,196,190,194,216,200,188,208,193,208,198,187,198,195,180,186,193,188,197,171,174,186,181,196,203,208,192,217,187,198,194,203,207,195,202,204,186,215,175,187,190,190,205,159,190,211,203,213,176,192,203,200,175,190,187,208,192,198,196,201,193,190,202,186,200,210,204,205,201,205,183,186,181,193,207,188,198,200,212,182,184,181,195,191,206,187,198,188,208,198,190,194,182,210,200,181,207,182,193,213,190,182,202,185,204,190,211,193,208,195,210,196,204,212,167,170,183,193,197,209,213,200,210,213,194,211,204,210,169,214,206,190,188,186,169,197,203,203,211,191,197,187,207,208,196,198,219,180,183,200,195,188,217,215,198,189,178,188,194,215,205,184,206,204,200,189,172,103,114,71,117,155,132,109,164,195,208,212,194,201,192,211,171,223,193,180,191,195,179,180,152,152,156,137,166,124,93,62,86,87,97,139,165,201,142,129,184,181,164,213,147,80,57,80,155,241,204,148,111,92,75,46,9,103,223,245,195,187,251,255,200,183,95,58,116,94,109,79,54,113,123,86,93,78,75,43,4,48,56,110,100,123,114,143,132,100,70,131,135,109,88,92,59,39,23,24,49,130,108,66,120,76,76,47,105,99,98,143,68,63,61,98,114,72,42,52,48,38,11,10,46,24,17,30,11,46,53,49,27,31,37,13,32,10,51,15,35,33,74,113,116,125,133,134,155,146,159,189,128,121,120,115,72,126,187,211,223,210,172,187,202,189,213,158,145,148,172,139,145,163,147,121,90,144,122,125,127,128,129,130,136,125,115,96,45,30,21,26,20,28,36,35,56,64,38,0,13,18,9,8,14,29,0,36,14,9,10,16,33,21,15,41,25,17,35,67,78,78,31,21,4,8,23,14,21,39,19,55,116,84,46,58,74,103,104,72,153,117,121,175,114,43,59,99,136,103,149,149,108,100,140,89,77,160,111,45,87,159,149,104,53,124,140,65,28,24,9,7,48,60,64,102,106,111,57,32,10,18,15,27,25,22,34,6,39,28,8,28,35,9,40,31,28,24,48,28,30,47,17,75,98,100,109,128,135,133,146,142,110,130,128,164,203,192,158,175,143,176,172,113,142,130,125,74,155,127,58,42,51,16,24,36,22,26,48,8,32,83,142,134,94,97,126,70,29,18,25,14,25,74,78,65,73,138,139,160,161,174,182,137,131,123,126,150,148,136,163,130,103,98,61,29,19,115,157,72,69,33,40,29,34,47,50,13,58,189,205,238,241,221,212,191,210,220,214,225,236,223,236,114,19,8,9,26,17,15,26,12,31,24,17,17,3,2,14,5,8,19,10,4,29,24,31,7,187,201,190,216,168,193,202,184,204,211,204,224,198,205,217,200,206,189,217,213,182,208,196,191,203,196,221,209,228,200,198,200,209,201,201,212,196,193,195,222,210,191,210,212,222,212,206,225,217,212,198,198,209,202,170,192,185,200,218,191,222,194,213,179,212,195,211,203,204,199,194,201,223,215,196,211,221,215,189,212,192,197,176,208,221,191,199,217,195,214,201,193,186,210,192,184,194,188,205,167,207,196,194,179,203,200,226,201,195,199,194,222,194,210,200,210,197,183,205,198,191,208,193,189,191,189,217,229,207,204,199,189,202,217,195,207,189,197,192,197,184,186,171,198,206,192,184,181,208,198,193,206,193,217,211,197,197,197,190,184,182,201,205,178,197,233,215,177,191,164,200,184,206,189,206,164,203,207,178,186,170,198,196,182,202,193,191,194,185,207,207,185,185,210,204,203,183,178,208,199,191,187,177,189,187,207,169,192,176,201,216,227,205,194,193,184,199,215,181,189,199,188,184,217,210,205,183,194,204,185,208,184,189,180,210,200,203,182,190,182,136,93,158,209,198,222,201,182,203,198,205,191,177,189,196,211,195,175,200,219,201,177,197,187,227,218,186,198,167,188,208,202,199,218,213,191,186,207,194,201,221,187,169,223,199,189,217,207,189,228,216,197,182,160,174,142,182,193,116,132,152,187,193,223,193,203,174,197,198,220,215,181,206,207,222,165,144,175,162,178,175,141,108,108,101,98,110,104,160,216,171,166,173,155,168,229,130,63,41,102,225,219,232,152,109,76,31,14,31,173,246,251,158,175,244,254,136,83,57,99,100,96,73,38,101,141,101,51,45,54,38,31,18,51,94,129,167,144,131,124,110,113,92,96,89,94,83,124,79,52,28,10,35,179,99,92,88,65,35,52,60,91,104,121,27,30,98,101,86,32,46,37,66,38,33,30,15,37,48,14,26,5,48,36,27,22,65,34,21,33,36,1,27,15,29,23,74,76,124,115,160,144,124,174,153,146,144,114,108,158,163,176,217,169,174,174,184,137,143,108,109,118,100,117,121,140,139,127,140,150,122,125,111,98,89,68,26,26,25,31,19,21,8,5,21,27,6,30,28,26,10,45,3,17,19,15,17,11,9,33,26,11,13,36,6,12,17,24,5,35,46,20,57,26,21,33,19,26,24,17,8,32,14,55,125,130,92,91,74,90,104,111,141,91,76,161,107,110,87,139,131,48,104,121,132,96,161,103,82,146,123,71,155,123,97,69,27,89,169,126,22,9,22,26,44,63,57,74,125,89,38,15,43,21,29,21,22,18,32,28,23,9,12,17,24,5,28,8,0,58,30,51,13,27,40,25,39,48,49,53,56,115,108,121,108,128,125,152,182,172,170,185,159,158,179,193,159,178,162,144,168,130,64,64,63,80,65,69,53,46,47,47,84,106,134,140,109,80,135,81,58,44,69,69,111,141,147,157,188,165,161,177,158,129,126,135,177,138,146,158,138,128,108,48,53,31,57,29,68,202,128,41,42,45,40,14,44,60,49,44,70,204,212,232,226,218,228,220,226,228,222,177,203,215,235,104,8,10,2,15,19,6,11,0,8,12,5,30,4,1,8,0,12,12,14,3,32,2,19,20,218,192,221,219,205,199,211,234,197,193,196,229,196,221,197,224,192,182,190,190,181,185,196,201,209,195,204,202,201,205,177,202,204,179,204,197,213,230,195,197,183,188,200,174,198,208,197,214,190,210,211,206,192,203,218,186,200,215,199,211,199,201,197,189,196,187,218,186,198,204,203,191,203,203,195,198,211,210,220,159,222,201,194,184,199,204,210,201,186,168,218,196,189,202,212,201,220,176,201,185,222,209,199,168,232,199,192,190,203,187,228,178,186,181,196,192,203,192,198,208,205,198,184,196,220,189,201,165,212,196,198,187,188,198,205,186,188,184,188,201,166,175,183,180,185,216,175,210,196,190,188,214,181,207,192,184,191,208,204,185,196,166,198,170,185,187,224,205,178,225,178,199,182,205,196,200,185,183,194,184,169,184,204,201,196,205,194,201,204,200,233,182,191,195,208,208,199,196,207,190,207,196,222,186,196,191,199,183,216,204,182,182,179,196,211,207,203,194,184,200,184,191,190,209,202,186,176,192,190,181,166,180,197,186,195,189,198,199,204,206,148,123,180,211,217,218,189,240,214,204,217,207,194,180,181,198,189,207,185,199,187,215,203,205,192,198,204,200,190,209,203,217,222,172,191,234,194,180,185,203,221,191,174,175,212,189,233,220,224,221,228,153,157,160,153,160,226,222,142,183,166,155,176,204,213,212,176,208,188,214,227,171,219,220,203,194,178,160,161,166,158,199,127,108,106,75,76,75,159,193,152,152,141,157,217,196,123,55,66,151,244,246,244,128,40,46,31,8,114,234,227,219,151,133,203,141,48,37,59,90,135,73,53,72,118,140,84,47,22,37,27,20,42,88,134,137,157,134,121,117,86,69,58,16,64,62,105,95,85,56,52,61,89,162,102,72,59,48,84,19,98,88,62,19,4,67,120,103,53,35,79,60,45,37,19,55,42,37,52,45,34,52,16,23,9,33,3,1,24,14,15,46,13,18,4,47,14,36,43,41,52,72,93,90,114,144,147,154,150,177,178,186,201,175,169,162,131,150,148,152,136,133,131,113,61,133,103,54,88,49,42,22,45,20,30,20,18,9,30,29,19,8,0,11,19,33,14,16,7,10,9,1,9,8,8,11,19,28,18,22,29,29,17,1,32,21,29,11,15,33,48,60,60,56,27,52,26,8,39,2,17,25,26,42,82,132,105,91,53,95,82,110,129,78,86,99,102,103,146,85,36,32,62,77,115,158,127,71,134,139,151,103,175,73,82,147,92,184,216,116,23,0,39,20,39,65,117,88,82,55,19,15,25,47,17,10,18,14,40,21,23,21,6,22,43,33,26,44,14,23,21,60,33,28,51,39,35,53,27,2,26,32,49,64,67,82,103,149,147,167,141,160,144,148,156,160,175,150,151,161,175,152,120,198,158,176,162,143,147,135,140,122,147,179,200,184,143,82,111,96,73,117,138,156,189,175,179,142,157,145,109,145,151,155,174,148,132,120,122,67,72,36,41,18,38,57,22,14,208,246,87,36,51,31,40,56,36,38,38,24,67,205,194,223,237,200,225,202,227,227,200,225,209,206,205,103,12,11,16,4,1,16,60,4,29,23,17,24,17,11,20,9,13,0,8,9,14,27,23,13,208,191,207,189,165,202,211,185,217,205,183,196,215,185,180,175,193,175,205,196,212,179,178,213,188,211,189,182,196,213,205,197,195,191,211,225,190,197,204,211,209,202,181,203,195,205,208,215,192,193,209,226,215,216,230,218,208,198,207,201,199,221,219,181,211,190,185,208,214,195,184,191,220,193,193,198,189,215,176,208,203,167,213,184,193,177,201,172,225,195,220,178,206,229,202,195,195,222,195,205,200,206,195,201,223,208,205,188,205,196,185,213,224,183,182,210,173,202,195,176,205,209,193,182,211,173,205,197,171,189,174,187,199,216,199,196,209,194,193,209,191,207,173,208,197,178,180,202,210,195,198,168,202,186,199,209,213,184,183,171,191,187,179,192,186,181,201,168,205,193,197,216,194,213,213,199,219,202,184,181,196,185,191,186,190,188,193,188,180,185,189,207,207,169,211,195,207,196,173,216,205,191,200,215,176,187,187,196,200,184,210,197,195,185,203,183,188,215,193,207,191,206,190,196,224,185,216,217,211,213,216,209,232,222,212,204,229,221,214,228,190,204,237,239,222,193,211,229,229,220,204,193,205,214,238,217,230,232,226,227,240,230,229,224,228,213,221,219,223,207,226,207,250,226,225,230,217,235,234,213,206,241,236,195,235,245,237,217,209,210,199,168,145,138,154,161,187,162,147,196,195,127,181,229,238,233,226,210,197,227,219,217,220,227,215,184,200,172,190,161,207,229,163,87,91,101,117,66,112,126,110,122,153,228,240,144,65,73,34,141,233,236,150,44,1,13,33,46,151,230,214,230,104,99,157,56,14,58,94,136,83,48,97,127,162,102,81,80,37,23,11,45,77,117,123,130,106,107,108,133,110,93,59,67,57,47,65,96,72,57,92,72,40,142,121,64,58,53,42,12,98,159,67,30,76,115,158,79,49,63,116,72,49,57,26,27,32,52,10,18,14,16,18,17,41,19,4,1,26,18,28,19,9,32,13,54,24,64,32,26,20,23,14,33,41,49,66,58,52,96,100,107,146,117,116,120,105,76,78,95,48,59,29,53,11,29,54,42,21,33,34,21,29,30,22,14,10,24,13,20,34,34,4,8,15,15,4,32,9,8,45,20,3,17,15,17,13,11,24,23,2,40,21,32,18,24,12,23,48,70,97,135,98,78,93,98,55,35,26,19,7,12,7,9,48,62,73,108,65,65,74,80,121,87,104,121,61,46,28,46,33,34,37,54,68,64,83,121,180,163,138,96,124,83,80,176,93,155,155,52,26,2,32,17,16,49,141,83,50,46,36,26,9,17,22,21,19,10,25,11,30,27,35,13,57,39,30,16,21,5,12,27,37,25,17,31,17,21,12,14,20,18,5,17,35,39,31,72,76,119,150,123,106,143,130,137,137,126,127,139,148,140,153,177,154,179,156,209,181,180,178,184,180,161,146,173,124,113,114,126,74,124,131,141,151,126,144,139,128,127,126,139,144,148,126,107,71,37,24,26,55,55,43,28,16,17,15,96,243,237,80,31,43,39,38,28,47,46,53,13,68,183,216,223,218,205,202,227,221,214,209,231,251,221,219,115,9,0,3,33,13,10,10,26,14,34,14,23,11,6,9,17,10,1,16,5,29,16,2,10,192,183,215,177,202,218,181,205,208,211,204,233,203,191,230,193,204,199,199,218,212,168,217,205,210,223,196,187,213,213,171,207,164,206,204,207,222,200,195,228,186,194,216,196,201,190,192,189,179,184,192,211,196,192,216,198,201,204,198,223,231,208,213,219,223,206,187,195,198,186,199,194,202,210,181,204,191,191,197,207,214,200,221,219,200,187,196,191,197,195,196,185,205,181,208,191,196,186,205,182,221,195,188,171,184,186,185,221,201,203,176,191,175,205,222,206,206,185,168,197,210,195,208,187,187,202,200,204,181,204,189,203,197,219,185,217,187,201,179,210,179,164,195,199,209,189,198,198,182,185,204,216,199,200,185,200,215,195,199,197,185,188,178,185,186,178,185,185,203,184,186,187,201,191,196,169,194,195,205,216,202,181,174,167,211,187,173,190,214,197,200,199,189,224,194,202,189,186,202,186,192,213,200,235,205,164,205,194,190,202,212,169,179,205,204,187,191,189,190,206,198,173,205,201,207,222,245,214,242,238,249,213,230,242,234,236,245,227,250,233,219,237,250,238,242,153,99,197,208,246,213,226,242,243,240,238,245,234,222,254,240,234,252,251,247,209,231,242,253,239,233,237,248,251,232,248,225,235,231,250,216,240,216,212,227,202,238,222,215,240,188,157,156,140,122,124,146,120,184,200,192,147,114,194,237,228,208,216,212,212,194,207,216,218,187,188,211,133,175,176,242,207,123,96,111,96,125,92,85,55,85,152,159,240,218,99,59,77,14,49,108,135,62,19,5,46,22,100,217,248,222,234,130,105,126,33,61,109,101,84,68,81,130,144,71,54,50,38,30,34,49,93,122,118,156,127,133,126,151,122,55,47,101,92,73,51,96,87,51,109,72,54,25,60,67,70,43,51,36,49,148,182,80,79,123,163,156,70,62,147,94,72,65,64,65,19,13,27,29,17,24,25,32,16,33,29,31,19,28,17,17,10,22,32,5,13,34,10,30,37,31,12,30,39,33,28,27,23,17,2,9,30,37,30,23,32,21,43,29,26,21,24,43,11,28,18,40,45,19,25,54,26,31,6,8,5,24,23,8,21,55,19,36,27,17,11,14,25,41,10,24,17,4,29,18,20,19,19,29,18,27,22,14,42,30,29,26,8,123,176,199,191,181,175,193,152,62,23,12,2,50,8,19,9,15,62,98,116,50,62,96,97,127,74,97,95,21,36,24,42,58,24,41,33,74,54,59,123,139,116,97,82,93,50,67,129,34,70,39,23,17,47,29,5,74,148,144,113,86,83,39,43,47,6,25,25,10,21,44,46,29,35,60,20,9,12,22,15,13,1,22,25,42,35,18,43,12,12,27,2,37,43,59,42,55,14,36,36,37,27,36,30,83,94,95,90,112,107,128,137,164,127,130,159,131,153,114,125,134,149,144,140,132,154,140,142,130,108,139,122,110,148,133,163,152,120,130,153,158,123,94,107,75,42,48,44,48,31,35,44,43,47,21,23,15,23,35,127,231,171,55,54,34,18,24,25,69,49,64,18,75,211,210,224,239,231,232,213,232,223,231,218,231,223,221,129,5,6,2,12,0,25,11,4,9,12,26,20,0,15,1,14,31,0,20,30,4,2,13,19,211,221,208,211,207,193,196,200,212,197,223,195,177,194,201,178,228,213,210,190,203,201,204,170,213,217,220,225,188,219,187,209,191,195,223,196,198,211,179,186,209,214,207,199,201,209,222,226,192,206,192,199,232,208,221,222,216,216,206,166,193,183,207,207,210,193,220,215,184,196,175,205,188,206,213,188,224,184,206,197,202,185,197,191,207,174,199,214,212,208,200,205,178,164,211,207,206,193,178,196,206,213,194,203,213,214,200,213,207,212,177,216,206,189,185,187,174,194,200,190,198,194,208,170,187,202,211,196,210,205,173,202,174,203,199,172,187,211,211,184,185,203,209,171,179,191,192,166,179,200,197,189,199,187,213,190,210,176,206,204,198,199,212,207,189,200,195,191,208,203,187,194,217,198,206,197,187,224,216,199,198,185,181,183,213,191,200,204,203,190,180,201,192,206,201,202,159,182,168,187,192,176,200,163,169,232,188,197,192,204,210,208,195,198,193,187,212,207,206,206,184,180,194,178,211,212,209,206,210,186,212,190,185,207,204,188,157,188,156,207,206,180,155,153,120,97,51,119,151,185,163,169,164,160,140,141,167,138,160,152,159,134,138,169,187,137,153,154,160,138,154,158,133,146,138,130,139,125,131,126,163,124,148,167,135,109,144,186,151,134,130,140,117,116,127,136,152,118,165,220,208,142,71,111,130,109,134,125,120,153,125,143,119,143,125,156,108,65,100,113,131,124,79,116,141,127,109,105,102,72,79,92,76,152,102,35,50,70,23,17,24,54,44,40,46,43,66,153,246,249,224,250,160,113,89,83,115,98,96,65,91,123,127,72,88,56,20,33,29,12,64,139,142,138,147,131,149,122,129,98,62,50,76,78,63,68,80,73,37,61,31,41,12,84,81,81,38,33,109,107,52,70,106,110,109,109,109,51,34,84,86,62,64,58,60,31,30,7,42,34,49,22,29,34,29,10,29,19,18,12,2,12,11,18,21,16,18,27,7,7,51,38,36,26,32,17,0,21,23,27,9,19,25,15,29,16,20,44,36,31,24,23,9,29,23,34,52,72,36,36,9,13,39,10,32,43,23,16,27,30,14,16,33,32,13,38,7,15,6,27,9,17,37,29,41,14,33,26,16,35,15,18,10,35,13,34,8,11,49,178,181,191,149,172,198,145,77,23,23,23,13,37,32,24,41,93,95,54,40,87,47,137,135,63,52,54,37,47,37,52,41,54,64,98,54,39,33,62,92,149,164,101,111,55,63,116,67,27,10,23,24,5,38,53,125,160,133,155,130,136,153,156,63,19,35,40,12,14,18,31,19,6,29,37,3,10,29,21,20,13,42,2,31,14,11,32,39,12,30,24,22,7,35,26,28,15,18,44,26,13,31,30,42,34,55,54,27,43,54,73,57,105,106,77,115,119,127,98,134,109,133,125,112,152,157,151,113,130,186,155,112,122,126,125,94,103,80,45,31,53,34,29,11,27,28,36,20,36,32,49,39,43,24,18,29,40,28,76,131,94,64,66,40,35,34,29,62,43,66,60,162,211,219,205,225,206,214,237,213,233,237,209,241,197,221,114,4,1,1,7,9,19,36,3,45,15,2,12,2,26,0,23,27,16,17,2,27,20,23,20,203,235,209,194,200,187,200,218,180,207,201,201,191,189,217,224,244,250,219,205,177,211,211,201,186,213,201,185,200,191,209,195,211,185,193,178,186,202,179,212,196,188,195,186,201,200,203,211,230,182,198,223,200,203,207,220,203,187,198,193,183,208,210,209,203,192,204,219,200,198,191,197,199,225,211,187,216,201,182,227,196,196,215,192,171,204,196,194,220,230,197,217,189,214,210,178,208,190,202,206,205,188,226,197,182,175,199,192,177,201,183,204,212,196,190,201,183,188,204,193,216,182,197,200,188,183,176,218,170,185,199,175,174,197,213,200,197,201,192,188,192,195,175,196,198,210,209,206,190,217,188,211,194,185,188,193,210,195,197,200,209,186,187,186,186,176,172,201,191,191,182,192,193,198,196,188,189,214,190,190,191,202,196,178,213,181,174,179,204,216,178,199,188,194,206,202,175,207,185,193,190,188,201,169,192,172,198,205,174,196,185,210,230,186,198,197,197,198,188,196,168,191,175,113,64,74,69,50,74,63,60,78,48,53,42,27,19,10,45,106,76,28,6,14,17,15,8,19,22,29,16,17,14,44,5,7,25,1,4,14,3,2,12,23,57,0,10,4,9,33,12,9,5,42,53,17,10,7,1,7,15,2,46,59,46,9,42,81,43,46,69,59,64,87,98,110,145,136,195,224,173,147,89,55,4,5,19,10,22,19,7,20,3,25,38,37,54,9,44,54,27,36,21,95,134,117,126,106,100,42,50,31,0,43,30,4,65,59,61,44,31,21,29,56,42,45,49,199,237,254,215,225,190,138,126,98,90,84,67,67,111,130,76,100,70,39,24,47,40,98,127,143,129,125,135,124,146,144,118,81,49,55,68,68,69,46,55,14,46,77,44,46,33,60,74,35,50,129,234,126,14,36,55,111,72,51,94,48,40,52,51,76,46,55,28,34,26,30,17,22,22,3,13,25,23,22,7,28,31,21,7,16,28,15,16,17,20,35,31,13,36,11,47,12,47,24,9,28,23,37,14,39,7,7,14,12,14,24,20,55,0,15,34,22,13,48,59,85,47,56,36,12,5,16,18,34,34,21,52,15,26,26,41,3,29,15,31,27,25,22,14,20,27,17,20,17,24,13,17,43,28,9,6,17,25,27,18,35,31,49,95,66,60,20,45,49,56,25,19,10,26,19,36,48,63,138,84,38,44,59,58,108,147,49,39,19,33,19,46,85,87,126,122,44,39,45,72,61,28,135,169,121,99,54,63,98,96,56,24,1,23,9,9,38,74,131,172,141,145,116,160,174,59,21,35,17,29,31,32,42,15,12,14,16,11,15,8,21,45,33,23,32,17,38,2,20,31,34,33,15,12,25,27,31,21,13,26,18,61,29,41,24,9,29,35,39,41,41,36,23,32,19,33,28,45,59,75,77,84,84,101,113,99,72,80,77,77,126,100,64,33,65,49,48,61,24,12,29,20,16,28,25,5,22,4,31,23,29,48,24,40,7,27,27,22,25,50,51,46,47,62,56,19,31,56,47,42,39,6,55,186,226,208,246,207,220,205,220,235,212,224,231,218,225,208,106,3,1,6,0,12,34,20,11,19,13,25,25,16,1,12,4,25,16,45,15,40,36,8,19,211,210,210,196,183,196,195,208,199,206,210,179,198,188,218,177,246,246,194,214,214,192,179,196,191,204,214,189,185,204,215,176,212,219,222,201,211,185,201,209,196,224,210,208,204,227,202,196,210,209,186,200,208,227,209,213,190,204,203,209,210,190,189,211,203,233,212,219,199,230,198,202,207,226,205,197,206,196,188,212,190,181,205,175,195,200,204,182,170,216,213,218,210,176,216,188,194,209,209,192,201,196,202,209,172,209,192,202,213,208,194,187,195,200,196,197,201,214,218,204,208,190,212,191,193,196,177,194,193,191,222,172,189,219,198,194,197,180,182,180,181,193,196,207,215,169,199,182,190,184,219,192,192,216,190,163,216,202,180,202,213,202,194,175,212,207,190,178,188,200,196,191,166,198,215,200,185,191,179,201,196,192,192,209,199,192,207,184,203,200,193,197,205,210,198,189,186,190,202,217,188,176,200,182,194,180,193,214,194,182,198,190,192,199,181,191,210,220,210,200,202,188,181,118,85,73,76,70,32,68,65,77,78,57,77,74,55,65,57,77,95,79,63,63,73,67,57,57,54,27,56,57,36,30,47,44,33,35,34,33,62,57,55,51,104,48,37,39,67,42,41,40,53,34,94,52,21,34,40,18,44,35,87,100,42,5,16,73,56,36,62,82,79,45,77,91,154,167,208,179,137,99,38,44,31,13,15,6,58,34,10,25,15,9,12,24,39,6,25,41,9,19,33,87,109,104,157,131,118,76,49,36,17,61,27,20,66,75,75,47,30,22,36,32,61,38,56,183,184,191,157,160,178,152,122,109,70,38,90,121,106,91,62,64,62,7,34,59,89,128,116,118,149,119,129,118,115,80,108,37,56,74,70,126,59,41,4,28,119,131,46,49,31,43,11,38,81,168,200,100,45,81,80,55,34,77,62,46,68,48,10,32,33,25,50,51,34,7,34,38,64,30,11,24,17,25,33,18,16,39,18,22,16,32,38,17,26,41,22,2,30,13,10,26,18,6,25,33,4,9,12,5,16,17,7,12,42,7,28,16,23,40,28,28,29,29,67,62,56,28,6,13,13,3,25,25,25,31,15,23,4,36,17,24,12,22,23,31,15,23,31,10,28,13,19,2,13,35,17,11,27,19,34,4,28,21,11,14,32,59,46,52,39,45,37,26,19,25,25,41,15,25,39,34,93,122,65,51,50,61,66,73,86,28,52,9,17,22,30,91,123,130,65,42,14,9,5,59,72,90,130,131,101,47,69,62,126,88,37,23,8,2,37,16,48,62,96,80,85,102,52,52,54,30,51,11,19,33,24,25,30,31,18,18,28,16,11,23,21,27,7,19,18,28,13,56,2,20,22,10,15,5,35,37,18,4,5,6,9,35,53,49,28,36,47,48,63,78,59,41,11,32,16,33,24,49,16,50,14,32,36,26,61,50,45,48,20,27,42,25,40,46,43,43,49,36,18,8,32,16,19,29,20,22,33,14,14,35,21,27,18,28,35,23,49,38,12,6,20,43,46,69,34,34,33,43,46,25,38,144,243,236,209,214,216,236,199,237,213,203,219,239,232,209,214,90,11,11,10,18,2,3,25,13,17,41,31,14,2,6,12,8,20,3,3,2,14,17,20,8,210,192,221,194,206,217,206,211,204,227,217,206,194,224,220,203,250,254,217,207,213,205,184,215,202,178,211,198,194,189,203,184,192,197,213,195,196,207,216,205,217,172,198,193,207,207,196,183,185,193,170,208,197,192,199,190,209,200,202,211,219,203,197,203,190,194,228,209,219,196,188,206,181,196,190,208,201,182,210,190,205,210,194,202,160,186,214,201,190,201,206,209,215,177,201,183,176,182,196,180,205,207,198,196,170,217,195,179,211,186,195,196,184,198,196,186,194,197,196,201,188,204,204,206,197,214,213,201,191,176,190,187,213,187,179,200,193,188,187,181,182,178,197,202,217,204,185,196,181,219,191,211,195,196,191,199,183,204,194,203,208,198,210,182,197,195,183,197,208,211,168,228,191,186,167,196,177,220,183,201,213,195,198,174,196,192,221,187,182,206,210,233,200,204,194,188,208,198,196,211,192,202,196,172,214,190,199,200,189,191,179,205,185,194,174,190,182,208,174,211,196,183,201,197,184,207,193,213,177,213,206,216,209,208,211,196,215,185,180,167,194,223,226,211,208,214,203,216,213,187,227,230,208,203,231,217,223,211,186,224,221,224,210,220,221,198,211,241,218,212,209,241,226,214,205,201,211,230,189,227,202,190,205,197,161,161,178,192,173,173,172,136,141,122,105,96,157,204,203,162,69,73,45,73,160,183,187,186,173,140,158,175,103,166,144,147,115,111,180,142,156,138,113,89,72,132,134,114,83,19,92,77,97,95,70,52,59,61,42,12,17,4,36,50,49,45,114,202,116,84,99,60,85,110,133,79,32,71,134,124,83,86,42,10,21,14,62,122,140,117,119,94,127,102,125,107,113,40,42,57,38,34,80,76,49,36,61,132,208,207,125,80,45,16,63,143,160,64,84,90,55,75,63,66,36,70,41,44,82,43,57,8,28,45,50,65,38,23,25,34,44,37,22,11,30,22,35,22,30,22,12,12,26,31,8,8,6,28,4,19,22,18,30,11,24,22,12,8,19,5,19,13,2,29,24,23,25,34,19,14,13,33,39,26,44,70,119,67,46,28,15,16,40,16,5,2,1,27,13,29,16,23,4,17,26,32,27,49,14,21,48,35,13,1,30,14,25,21,47,25,21,17,18,30,12,29,28,34,20,50,46,49,10,50,47,13,20,7,13,4,15,4,41,50,102,121,63,82,68,53,59,85,67,47,26,55,35,40,65,120,154,106,52,17,42,12,25,44,72,88,107,117,68,40,67,94,110,107,57,31,13,28,10,2,38,49,74,43,17,17,35,29,12,24,34,15,22,28,44,30,30,14,22,7,38,25,26,24,22,13,20,28,6,13,23,50,17,7,35,26,28,13,22,33,16,25,24,31,41,23,36,6,21,49,90,131,107,56,62,25,29,28,28,42,34,25,22,36,31,16,15,10,34,19,23,34,28,27,23,47,57,44,48,38,27,55,9,13,52,38,6,4,25,20,25,16,27,24,28,22,30,15,39,15,29,20,44,23,27,50,41,86,49,69,62,55,40,88,165,234,250,210,218,209,222,212,195,216,212,199,218,227,209,222,234,116,12,0,9,27,4,24,40,12,33,17,22,1,5,14,14,8,8,11,23,8,5,12,35,16,199,229,201,188,210,193,221,209,194,203,189,198,203,206,206,192,243,219,214,194,205,184,203,202,208,205,193,204,189,200,214,212,204,183,209,211,218,215,231,192,220,196,213,197,205,218,211,221,193,215,202,221,190,185,212,206,207,179,229,200,187,208,185,199,203,199,199,200,207,208,193,210,192,213,212,191,223,205,198,202,196,190,210,194,236,185,185,194,182,198,219,207,219,206,203,210,206,210,209,195,184,209,189,185,189,191,213,176,195,206,202,196,182,195,218,223,216,199,199,210,201,192,197,176,190,209,213,202,198,172,223,217,197,214,207,183,192,186,172,215,183,170,220,215,200,178,187,207,198,200,185,181,192,202,204,218,172,174,207,190,182,187,194,204,184,185,193,191,171,180,181,210,202,218,190,191,197,235,191,192,196,195,167,178,187,183,203,185,189,204,215,202,184,195,175,195,217,218,210,190,201,194,175,181,188,206,180,211,176,213,192,206,203,191,181,210,184,184,194,214,209,180,223,226,226,212,222,223,247,252,236,255,214,233,245,238,247,224,217,242,187,239,247,245,246,236,247,246,237,248,243,255,245,247,249,232,241,239,237,241,218,234,227,247,249,229,237,252,248,232,235,255,241,247,249,253,239,251,246,241,247,249,246,241,223,218,248,231,202,189,194,180,182,180,130,137,158,173,141,111,64,33,47,186,247,245,243,253,255,211,251,253,216,247,226,220,177,202,246,248,204,178,211,167,123,151,128,86,58,29,111,217,198,160,161,122,74,68,25,33,16,10,34,53,48,57,181,182,84,76,89,57,35,96,84,18,53,132,132,77,78,48,25,29,42,74,104,115,120,106,112,106,102,124,129,146,103,46,33,53,40,19,55,29,39,57,108,174,196,162,92,43,3,9,104,229,101,18,48,73,95,59,91,81,63,34,35,41,64,17,20,54,59,34,104,72,68,40,40,10,55,24,40,37,2,6,28,54,14,32,5,35,22,31,23,11,37,23,27,23,19,51,13,23,16,10,18,16,31,43,23,40,8,9,7,8,4,13,15,15,32,19,59,36,36,78,145,98,52,47,50,27,13,14,5,14,24,47,17,3,30,18,20,34,33,35,8,25,6,38,27,33,17,16,12,2,40,24,43,17,22,21,40,21,21,21,58,36,36,80,56,50,37,53,35,27,58,1,20,14,23,19,28,58,131,124,93,88,90,110,57,52,39,27,18,40,23,19,74,182,186,97,60,62,39,13,35,20,68,112,95,67,46,144,134,71,135,150,56,23,3,19,29,55,36,66,49,20,29,34,15,43,28,13,30,16,35,29,27,27,31,18,9,17,18,25,14,30,21,11,39,10,6,42,27,22,13,12,21,44,25,9,27,10,18,40,20,12,10,19,31,15,26,100,113,100,75,54,56,58,12,20,17,40,42,17,25,65,50,12,22,26,39,13,41,34,29,24,22,50,54,37,55,41,19,15,27,21,9,7,30,21,15,46,21,45,37,40,30,34,20,40,33,41,38,55,97,25,21,23,46,56,79,55,43,89,101,218,238,244,235,230,227,230,194,210,225,197,204,201,212,231,201,224,216,110,10,14,11,17,15,3,27,2,7,20,8,14,17,22,20,41,9,28,11,5,10,14,28,22,191,209,207,201,206,213,181,219,200,153,213,212,208,212,190,187,225,236,188,209,227,190,219,188,207,212,189,200,210,195,243,204,193,208,218,197,204,184,204,200,226,196,211,208,208,204,211,221,207,209,187,189,205,194,190,206,226,196,205,203,197,196,198,206,203,203,212,191,206,202,206,199,209,207,196,189,191,199,204,174,187,208,204,228,196,198,212,213,208,192,186,198,174,200,189,210,196,199,198,204,201,207,197,198,200,177,186,208,198,186,199,202,188,203,203,234,191,212,203,202,180,227,210,212,234,197,196,189,194,179,208,195,213,199,185,196,216,178,185,198,202,192,181,200,209,200,183,207,183,190,171,187,214,204,220,200,187,178,203,196,197,204,203,188,194,187,207,194,196,200,198,179,177,189,187,194,182,186,195,182,174,181,181,199,198,202,190,205,187,214,171,189,195,217,182,182,175,194,198,172,208,194,198,202,188,189,188,174,171,198,207,200,189,169,190,197,194,207,193,211,196,211,205,206,199,224,213,220,211,226,206,214,194,225,201,202,201,185,211,209,198,193,212,211,230,183,219,201,216,209,198,207,221,182,212,226,204,212,224,226,220,208,206,218,210,223,227,217,203,221,210,216,232,228,192,199,191,209,208,198,206,242,224,228,186,206,227,206,176,188,170,196,170,173,116,154,192,197,123,93,35,67,136,186,242,199,222,215,226,184,218,179,198,217,171,152,161,211,245,183,155,182,223,181,132,118,93,78,61,66,125,250,205,224,186,77,45,24,57,88,62,44,48,100,39,155,229,225,111,123,99,30,15,19,73,70,135,107,83,82,47,14,20,45,45,103,133,124,121,140,112,93,149,148,111,147,105,91,43,33,35,39,37,44,19,90,166,198,183,88,47,27,68,32,60,147,95,59,69,61,72,73,63,101,45,42,47,41,52,25,29,45,84,92,71,53,76,76,41,56,54,24,35,4,38,5,34,15,38,51,16,23,32,10,5,31,39,47,18,20,30,11,33,21,32,37,27,22,5,10,10,21,18,11,0,23,2,11,14,37,18,22,13,21,18,59,98,77,15,24,13,2,2,21,9,6,31,30,26,22,32,42,37,22,21,28,19,5,36,18,11,2,40,33,19,14,12,26,34,21,43,37,34,30,9,17,4,43,22,62,108,68,89,84,69,38,19,31,28,23,28,35,43,52,156,98,82,134,119,106,36,30,27,33,20,39,85,94,75,98,42,78,53,80,32,25,44,27,69,71,58,56,101,172,101,65,129,146,92,9,2,46,24,38,56,69,61,47,59,39,24,46,7,26,16,31,7,21,48,19,28,36,28,19,33,26,28,21,18,13,36,16,39,17,13,15,12,9,38,29,17,29,22,32,27,18,35,18,32,18,34,22,43,99,79,63,92,81,60,27,25,18,15,20,12,22,6,2,10,24,21,11,23,18,3,36,27,44,4,29,22,60,17,29,23,21,25,11,11,27,32,32,24,16,36,11,5,20,28,29,15,26,50,32,45,159,109,60,27,30,46,43,95,86,98,75,75,196,230,232,233,219,213,215,242,187,220,191,186,232,213,213,221,219,217,116,19,15,15,22,13,16,2,42,32,52,1,2,0,17,0,31,12,6,12,25,9,0,35,8,202,180,193,184,184,200,209,216,208,199,207,210,190,195,204,177,228,206,156,171,211,206,215,215,230,191,216,230,210,204,204,195,206,220,196,210,206,197,184,206,189,192,203,201,209,211,192,189,181,181,183,176,201,191,207,193,164,209,204,183,195,190,197,202,199,188,191,174,206,197,220,189,192,188,203,187,218,210,169,195,208,202,174,206,205,224,200,221,201,213,197,193,200,213,212,213,209,213,221,201,196,203,207,201,184,190,208,195,192,208,176,178,174,200,166,199,203,196,172,177,174,190,204,192,180,199,179,198,206,196,196,179,193,222,209,189,180,191,201,231,201,173,173,182,181,196,194,178,197,213,184,194,210,184,168,196,200,177,220,174,194,200,186,233,185,186,203,177,202,194,201,204,193,212,185,217,190,204,205,174,197,191,177,181,190,186,199,182,191,174,196,212,192,173,178,182,183,182,189,191,192,180,191,172,175,182,199,193,216,222,190,198,215,178,189,163,189,183,211,194,199,204,193,184,188,204,194,172,202,208,214,207,189,208,205,199,187,202,197,203,186,165,151,207,222,167,200,195,181,204,200,192,180,184,205,176,177,201,194,184,179,212,209,179,212,220,176,190,198,191,179,193,171,199,172,208,213,220,183,192,206,186,220,178,178,191,220,174,122,139,156,209,191,152,125,162,213,192,159,62,19,94,193,243,229,206,204,197,206,190,196,158,160,185,141,147,181,221,170,61,91,190,237,201,138,135,78,62,78,83,118,211,156,135,130,77,42,16,56,156,58,68,59,79,156,242,248,187,69,91,53,6,33,38,70,97,63,65,63,36,46,22,39,66,84,110,125,103,118,124,104,119,107,100,112,103,122,92,64,49,26,31,19,28,93,150,151,149,103,72,109,126,113,86,41,55,72,83,71,46,73,70,39,72,22,37,45,36,38,13,20,44,54,89,69,48,38,81,84,105,109,78,63,20,28,21,34,11,22,23,41,29,33,24,15,44,41,21,30,46,41,35,23,15,25,21,9,25,13,7,45,9,23,8,25,2,20,21,22,25,15,23,28,35,41,69,71,53,24,17,33,4,15,27,44,14,14,21,16,27,10,9,26,34,22,31,38,23,21,6,18,29,11,14,16,9,17,21,19,34,20,8,23,25,31,24,56,47,73,105,127,138,163,116,81,53,23,22,21,24,26,22,23,56,121,138,146,110,152,149,94,44,9,16,28,102,141,99,100,50,54,46,57,64,92,74,52,21,12,82,96,83,103,155,125,107,166,176,80,21,3,20,45,33,52,57,38,54,86,72,52,36,44,23,26,35,28,13,30,24,20,38,38,23,33,8,9,20,43,10,19,26,35,20,26,14,27,26,27,23,40,3,32,34,30,36,24,34,13,17,22,15,88,64,63,124,114,100,55,40,21,28,25,13,20,40,46,46,12,34,20,9,6,21,6,10,16,16,41,44,26,60,22,1,42,8,14,58,10,33,21,35,20,12,17,33,43,50,24,29,27,36,26,32,131,199,80,78,92,66,80,74,79,72,76,48,28,141,231,219,239,192,228,198,182,184,193,198,227,217,220,227,211,213,223,119,13,13,6,16,21,10,9,19,11,6,21,5,29,13,8,20,31,11,30,9,11,0,10,5,198,217,214,196,191,216,208,214,209,188,171,188,215,200,231,215,239,237,194,199,182,203,200,190,185,190,201,164,202,186,211,177,193,216,205,212,201,219,189,216,206,200,195,208,192,189,204,198,217,202,203,204,213,200,189,220,211,195,189,206,223,181,199,204,187,217,210,210,199,188,226,182,205,209,197,210,216,198,203,189,207,203,183,178,208,187,179,207,217,197,226,185,201,191,180,192,206,204,192,195,198,195,188,197,179,197,205,198,186,183,201,185,162,205,205,201,182,184,199,185,203,190,197,197,196,207,201,200,207,198,187,203,220,198,212,229,182,192,224,184,204,190,215,166,185,223,233,208,218,215,206,222,199,190,193,225,228,192,200,155,203,192,197,176,194,195,187,192,203,192,188,226,205,214,192,190,167,202,188,167,188,195,198,167,187,211,192,173,220,209,196,173,193,206,196,200,198,182,187,202,201,180,200,187,195,200,199,191,187,187,199,196,210,212,214,188,201,214,201,172,201,196,180,220,193,202,194,211,170,199,194,197,190,179,195,210,209,184,203,193,204,192,164,179,211,201,185,187,209,190,191,199,181,175,230,198,221,197,188,173,201,200,192,208,193,182,223,198,195,180,177,196,193,189,179,206,182,187,199,199,190,200,172,193,162,184,205,168,100,104,143,168,184,122,135,218,233,187,129,37,91,196,229,218,232,209,209,172,183,174,184,119,190,184,168,186,227,218,108,40,125,176,154,177,159,90,100,102,86,75,133,153,100,68,73,38,19,23,8,74,77,85,62,143,242,237,222,99,53,59,0,40,10,71,113,85,64,47,51,34,35,48,66,89,154,125,106,123,95,108,128,128,107,100,109,123,132,94,119,127,51,37,22,126,131,202,178,103,149,185,156,146,117,55,36,74,46,60,72,67,94,83,52,42,36,26,41,67,56,27,40,37,77,65,52,60,101,96,144,125,142,111,63,23,34,30,7,63,35,50,20,8,17,25,60,35,24,25,42,31,33,34,30,28,31,33,12,3,19,1,12,29,25,32,17,14,12,27,36,16,24,11,18,87,106,121,118,47,85,54,42,26,8,13,33,30,30,10,31,15,29,11,16,13,29,39,29,21,19,34,40,29,22,23,17,22,22,58,43,37,41,24,36,21,30,27,73,59,118,189,220,149,146,132,101,57,32,16,11,9,14,10,30,24,73,104,129,150,129,152,104,52,43,17,6,96,183,149,133,73,44,55,100,116,156,99,20,26,9,72,101,97,128,133,132,134,172,159,30,13,13,12,21,54,45,102,106,129,172,128,86,66,33,24,18,31,27,16,34,47,19,30,11,29,24,35,20,22,34,51,23,34,14,12,20,27,14,27,32,30,8,57,24,16,21,14,28,39,39,5,24,31,54,47,69,116,134,74,40,26,6,8,11,9,33,53,20,40,30,51,19,16,13,21,27,53,20,15,50,43,62,29,6,29,28,17,27,43,2,34,14,37,14,16,25,23,9,26,39,28,21,38,30,96,250,167,97,148,119,68,33,48,52,72,48,60,43,130,207,186,228,212,199,219,226,218,185,237,208,221,231,217,214,209,209,127,6,1,0,8,19,16,25,18,17,11,15,3,1,21,15,27,21,11,20,17,7,23,8,0,215,193,179,200,223,191,199,174,209,205,212,198,180,202,188,195,250,235,174,202,204,202,215,204,205,193,200,201,205,200,211,217,202,227,182,208,203,198,220,190,202,205,192,210,196,201,205,192,208,196,187,176,221,189,219,202,188,198,201,197,211,202,196,213,197,230,199,200,205,211,202,214,190,229,209,212,195,203,199,185,181,202,189,211,219,224,211,203,197,192,165,200,205,200,210,172,185,187,193,180,203,202,211,186,194,197,175,207,196,176,187,189,206,199,182,208,176,195,186,194,197,189,179,203,178,174,185,204,186,189,186,188,218,187,222,197,199,189,212,190,224,201,195,195,189,181,156,174,174,189,179,207,190,214,172,199,203,215,189,215,183,192,204,202,205,194,198,205,211,200,177,209,186,214,192,201,200,179,207,215,188,183,189,190,175,191,184,224,198,209,181,190,178,213,218,192,197,169,187,196,203,191,211,190,182,197,188,202,186,199,208,200,215,208,180,174,206,194,187,209,204,213,234,185,192,182,167,193,196,207,189,196,181,185,181,185,175,193,199,187,209,216,181,173,193,212,198,186,186,190,198,206,176,199,207,215,209,196,198,171,217,197,196,175,171,219,191,200,224,201,222,178,206,195,200,194,198,177,199,204,198,184,196,188,215,209,154,165,134,95,143,159,122,77,180,190,203,187,143,134,186,224,214,214,177,193,187,196,195,174,160,120,189,214,131,163,187,171,81,21,99,106,129,169,144,102,95,95,83,125,122,79,59,37,24,90,41,24,50,38,48,59,150,215,237,193,99,102,90,46,19,32,28,88,83,53,44,57,26,20,22,54,57,116,125,110,130,108,87,89,103,116,114,119,112,114,118,123,133,71,43,37,64,183,195,149,86,72,123,147,102,81,49,49,29,35,40,28,66,69,55,58,24,83,62,24,89,72,55,36,48,47,91,121,127,115,113,119,122,126,115,86,63,46,12,30,35,1,72,39,38,25,28,54,27,32,35,23,25,22,36,22,27,32,26,39,42,67,39,35,18,11,20,21,27,26,8,34,20,8,15,23,145,210,185,179,116,132,139,122,89,27,11,45,24,25,35,22,29,35,8,18,24,26,18,25,13,6,24,28,31,18,6,54,21,29,19,7,5,36,44,17,55,35,27,53,75,68,112,154,155,120,142,114,80,27,44,21,20,15,24,42,22,24,76,82,83,82,110,140,71,57,38,29,22,140,154,97,97,84,43,111,148,130,146,61,36,24,21,64,91,119,108,137,118,134,152,95,39,23,18,12,11,40,139,158,193,167,152,164,140,66,36,41,29,4,7,18,16,17,23,27,28,21,34,47,56,41,46,14,13,24,31,15,47,26,16,18,44,18,48,9,36,34,53,19,27,42,7,49,30,20,39,57,72,92,79,50,23,18,27,2,6,9,24,21,25,27,20,35,8,23,38,31,34,29,35,13,27,32,22,14,6,22,11,42,43,38,16,15,11,45,11,23,30,22,54,7,17,22,67,10,79,238,228,118,98,178,138,107,59,38,48,15,88,55,35,156,223,200,226,226,221,217,229,226,212,211,205,222,206,202,214,176,220,115,0,0,8,8,25,15,13,39,17,10,14,13,17,2,7,20,6,18,28,13,31,5,7,12,198,188,213,203,192,189,186,195,214,160,209,175,201,196,200,208,241,252,230,204,220,201,193,195,201,192,180,187,185,195,184,192,209,201,216,212,206,201,190,189,195,199,185,196,213,194,199,190,207,212,192,199,183,193,202,210,218,175,194,221,204,213,182,229,198,203,193,180,176,186,200,176,202,202,210,174,190,192,193,180,193,199,205,189,194,196,206,210,201,199,203,194,202,207,193,196,219,191,181,189,205,198,177,208,191,178,205,187,181,177,189,210,203,216,189,205,189,181,194,191,181,220,225,211,208,232,208,212,175,212,189,206,203,202,214,192,201,190,213,198,183,210,222,208,210,196,201,180,210,202,196,198,202,186,186,184,204,191,165,183,189,215,174,198,176,181,194,190,181,200,192,201,198,199,193,226,200,207,198,188,188,190,188,194,214,191,200,189,191,192,189,194,220,191,203,179,180,187,189,185,185,193,183,207,197,184,207,184,205,186,198,215,194,182,201,189,166,186,219,176,205,192,198,195,195,196,202,202,202,203,199,184,189,193,196,190,193,202,195,185,184,191,200,186,182,185,186,209,224,213,201,190,205,203,190,212,191,208,224,195,185,170,213,205,193,186,202,195,204,195,198,205,188,192,169,174,179,182,200,201,198,200,204,204,198,174,139,185,130,182,168,195,122,106,167,242,207,205,214,198,199,228,193,216,217,196,171,182,196,160,168,170,161,98,104,166,203,147,85,38,23,49,168,191,176,108,104,69,89,134,90,64,7,49,121,82,53,57,73,63,74,158,217,242,185,90,89,107,132,79,31,87,137,117,90,59,21,32,7,49,49,60,113,100,105,102,104,118,111,112,88,109,110,106,133,95,110,90,79,54,20,33,152,200,149,91,0,62,104,96,99,44,62,86,17,49,30,52,58,34,70,53,27,39,51,36,45,82,62,34,48,108,132,131,135,113,112,109,108,95,85,67,18,37,20,15,26,36,43,42,34,23,51,6,53,39,27,42,24,27,34,25,23,12,38,41,21,39,26,18,28,24,26,21,22,35,15,13,2,8,34,30,112,180,186,205,192,184,194,197,101,22,27,7,30,12,23,26,27,7,17,13,32,45,26,15,19,20,34,38,37,31,44,55,23,20,39,50,4,42,7,30,28,61,24,29,92,69,75,82,143,109,122,100,50,68,38,7,20,12,14,13,5,42,77,89,91,83,119,101,52,44,32,44,42,107,86,49,80,119,52,93,142,68,54,37,30,46,24,54,83,96,109,93,136,71,143,107,42,13,8,2,13,60,108,216,152,145,156,117,104,60,16,37,16,9,5,16,25,18,51,30,19,28,22,32,33,31,30,25,41,16,42,25,24,58,11,34,38,34,31,36,13,7,35,35,22,29,26,5,39,54,55,79,112,88,84,57,34,40,33,20,39,18,26,29,11,22,38,25,39,30,44,59,22,6,36,5,56,24,15,30,28,35,16,39,22,15,25,46,41,20,19,17,41,52,35,60,37,28,20,67,208,238,172,62,67,140,127,53,66,39,17,45,61,29,39,173,217,219,220,219,240,221,225,230,192,208,223,219,223,214,215,207,218,90,13,2,8,12,9,15,33,22,21,7,25,30,31,30,5,3,5,18,7,17,16,8,0,7,186,202,214,201,219,223,161,218,193,180,192,194,203,215,193,174,245,253,249,206,197,233,177,186,180,190,208,189,200,214,194,180,216,185,181,190,207,188,210,219,238,195,182,222,176,189,207,190,194,184,208,203,174,182,180,184,179,202,206,205,158,172,179,228,210,228,202,216,218,209,205,196,199,191,218,203,174,203,202,186,204,184,207,199,169,202,218,200,208,201,168,198,200,190,197,203,219,215,204,210,180,190,207,208,171,208,193,198,191,213,180,205,203,192,169,210,194,195,238,189,190,199,205,210,213,206,187,207,223,204,207,203,181,199,184,191,179,154,197,204,183,186,171,194,213,205,186,198,194,175,189,196,201,179,205,208,195,197,195,190,176,203,201,189,187,179,195,193,176,168,194,171,180,224,199,214,192,192,197,198,210,227,183,190,211,194,197,209,228,198,164,200,197,191,185,176,196,220,181,185,209,173,191,177,196,221,226,191,201,209,197,194,196,205,204,201,197,214,195,197,193,214,203,191,181,187,195,201,212,211,183,238,161,201,194,203,179,205,199,184,206,214,190,199,207,178,197,211,192,204,205,224,207,196,198,176,180,211,195,185,207,198,192,212,205,199,174,212,213,190,197,197,202,207,176,209,209,186,181,212,192,167,187,190,194,160,137,159,162,161,217,202,126,142,199,206,193,199,179,220,222,208,213,215,178,191,178,206,207,145,182,158,123,23,143,232,224,135,51,20,20,117,214,218,142,87,96,43,83,75,49,13,32,209,195,80,27,86,35,117,176,237,248,150,95,79,97,108,70,125,115,143,113,98,84,45,39,34,31,59,97,118,116,122,116,95,97,100,110,84,104,103,116,110,106,119,85,63,73,40,23,46,140,122,60,20,15,69,157,142,73,45,103,75,35,29,56,26,46,44,54,67,34,40,39,14,29,57,79,77,144,161,193,148,115,105,98,69,84,49,51,39,17,21,8,25,48,34,24,38,38,16,43,40,37,39,44,51,20,22,29,50,36,27,21,43,38,45,23,32,41,20,27,23,28,48,38,20,29,18,45,22,61,111,123,134,139,137,143,87,77,17,17,61,9,14,33,26,41,22,45,12,3,42,18,34,9,11,14,24,39,42,26,23,30,11,22,22,34,13,2,20,22,30,57,35,70,48,67,85,94,117,79,72,77,46,35,23,17,25,28,2,17,47,59,88,140,119,100,102,100,33,22,18,46,67,101,91,122,133,126,140,120,72,58,42,22,11,34,76,85,118,118,136,121,164,186,91,14,17,41,29,22,52,117,135,184,124,113,119,108,77,56,54,34,34,32,31,30,32,20,42,22,33,41,51,40,31,33,17,37,20,50,22,51,19,27,36,46,27,9,6,25,29,53,30,27,31,17,23,19,90,119,192,145,118,91,82,71,65,41,8,15,9,26,44,5,24,20,22,10,10,14,32,29,32,19,15,18,22,45,21,27,14,25,41,42,15,16,4,27,17,27,52,21,18,46,20,28,54,65,114,231,222,102,51,46,58,57,67,69,47,51,42,46,15,112,236,218,207,204,219,229,205,228,202,213,223,217,202,224,217,219,195,217,109,5,0,21,11,8,20,21,27,16,33,30,26,10,0,2,4,21,19,7,17,5,2,0,12,218,195,206,199,193,214,204,205,204,180,199,193,179,202,231,212,249,243,189,201,200,187,206,179,205,205,196,184,212,207,212,205,197,202,189,197,192,191,206,210,201,216,194,175,189,200,204,217,202,166,201,207,193,184,202,196,184,181,199,207,207,211,197,187,200,221,198,209,223,182,206,212,177,185,201,198,189,194,209,200,233,202,188,218,212,216,173,188,195,205,219,192,175,195,191,191,208,192,168,190,199,185,189,205,201,165,225,196,180,205,217,202,202,192,202,198,195,201,207,198,195,210,192,208,215,197,175,193,206,213,182,226,208,182,189,190,178,190,188,201,211,210,193,217,180,201,199,201,194,188,197,201,187,195,197,188,193,187,177,195,181,174,189,184,183,197,200,208,190,203,163,181,205,211,184,176,183,177,186,190,184,193,185,173,200,199,185,194,169,200,195,165,197,214,184,184,190,163,205,195,199,189,178,189,178,192,217,185,189,199,203,180,216,195,171,194,217,181,212,195,205,195,201,192,209,174,171,189,217,181,194,187,235,201,208,211,207,215,196,220,195,176,205,209,186,155,205,185,195,232,209,189,182,196,192,201,202,181,186,167,176,198,222,186,176,194,152,169,205,191,178,214,212,185,184,203,206,180,228,203,192,197,166,192,198,153,108,121,160,172,206,166,133,170,226,201,195,190,206,230,188,211,193,199,180,187,207,224,185,149,167,163,105,90,196,228,152,85,40,10,51,174,217,198,117,56,46,40,57,43,25,44,99,203,138,53,49,76,118,234,248,243,118,72,93,109,98,48,103,154,141,143,99,98,46,24,22,53,80,118,107,124,131,109,117,86,121,140,118,106,85,85,88,94,84,74,115,94,37,33,35,100,119,52,17,56,117,122,129,73,88,79,71,61,42,48,27,78,45,66,66,45,46,55,17,1,15,45,109,157,153,191,163,109,112,110,83,18,12,38,38,61,42,26,34,12,13,42,32,28,25,35,33,40,32,37,40,38,32,41,48,10,36,12,43,20,42,28,29,35,37,27,46,30,29,23,31,35,46,25,39,23,47,74,29,28,30,41,38,29,37,8,26,27,19,32,42,25,53,14,25,28,23,30,12,26,17,6,15,19,23,50,20,42,48,17,17,13,37,18,33,31,38,22,14,35,70,77,83,73,109,63,48,66,61,66,34,32,17,20,6,23,67,43,34,80,81,85,106,132,85,71,57,38,19,39,75,108,131,111,88,85,96,71,117,54,39,30,40,117,104,145,85,106,143,146,134,42,29,9,11,11,51,39,102,166,117,116,79,75,59,67,45,36,25,35,12,29,25,25,9,21,25,10,43,41,29,42,38,29,47,36,18,33,43,4,24,30,15,27,15,17,55,10,36,26,23,44,26,36,32,98,188,177,207,158,125,141,165,139,35,7,14,27,28,34,40,20,18,7,37,41,26,20,23,15,8,49,21,25,35,14,53,28,10,24,22,16,26,12,32,22,12,27,40,19,46,29,39,40,83,141,186,131,97,99,38,48,78,63,63,31,38,28,32,124,209,240,233,214,211,217,224,210,202,216,190,219,219,231,221,207,222,196,215,95,5,14,9,27,6,14,29,28,3,14,30,12,5,4,7,22,8,13,20,22,7,13,28,10,206,194,209,186,201,218,205,187,197,198,199,194,211,188,199,175,254,238,151,202,191,187,180,180,228,193,197,208,192,201,202,213,191,199,197,204,202,186,207,202,187,222,185,190,200,198,202,197,158,190,196,182,204,184,217,188,200,192,182,226,195,173,206,202,217,204,183,192,209,191,202,202,211,216,195,217,202,196,181,217,210,177,167,183,186,171,203,193,186,219,220,240,188,197,191,166,216,178,202,202,202,201,182,221,172,221,171,188,207,187,193,207,202,195,207,214,204,184,207,181,214,199,186,192,226,214,203,221,197,210,219,187,201,198,183,195,213,203,212,200,193,186,186,218,193,175,191,207,194,182,171,207,216,192,195,186,183,192,193,197,180,189,196,175,205,200,199,191,181,210,178,209,191,186,188,195,226,195,173,184,204,205,201,177,189,163,184,152,209,206,172,195,177,191,201,187,189,176,205,190,172,197,195,177,172,203,192,178,193,193,194,195,196,202,203,207,221,186,197,190,223,185,208,201,193,178,192,198,202,220,201,183,172,216,190,223,198,225,210,210,181,216,217,186,217,173,182,193,190,218,165,183,178,206,207,194,206,204,163,201,189,192,213,179,200,196,169,203,200,181,212,169,182,198,176,210,206,216,206,178,175,193,204,190,153,146,114,135,173,156,196,141,104,171,202,174,200,174,194,229,203,193,178,206,185,180,218,200,165,162,223,178,94,55,140,118,62,53,90,62,93,241,246,158,113,63,70,31,46,41,11,20,91,88,86,52,34,108,196,253,244,128,79,79,89,63,52,69,91,126,62,84,69,29,40,12,44,78,132,163,130,121,106,122,98,127,98,97,122,115,145,122,90,64,64,102,110,129,44,34,56,112,60,13,46,161,159,97,93,125,105,84,72,113,30,33,32,7,44,17,58,55,39,22,30,29,40,55,56,147,160,130,127,97,91,50,34,50,36,39,61,50,42,28,37,15,28,16,46,18,43,38,21,36,55,33,4,29,12,55,36,35,43,9,17,59,33,30,31,21,35,27,32,19,12,64,38,20,27,13,17,18,42,43,42,35,42,20,39,15,36,3,16,15,22,26,35,29,37,36,2,28,24,19,15,34,16,26,12,17,1,46,27,43,42,21,34,31,43,19,18,11,38,24,34,43,60,56,83,81,63,92,45,46,44,74,45,49,10,19,24,22,44,37,31,76,91,106,134,151,108,60,35,22,8,8,9,35,87,58,30,66,34,60,54,26,39,45,79,135,92,177,104,55,53,83,57,21,22,31,25,24,41,35,109,45,110,70,52,52,41,85,43,32,25,10,28,19,34,43,23,26,18,13,27,40,36,47,22,23,68,39,43,30,19,23,21,31,37,39,26,22,24,12,32,7,43,29,23,33,34,75,127,141,184,170,168,187,176,102,55,35,19,23,35,10,16,19,14,34,32,22,30,40,11,9,49,26,25,21,41,11,11,20,21,21,12,35,19,17,24,30,27,61,31,29,13,47,31,46,107,183,188,134,147,134,97,99,76,55,67,55,23,21,126,229,249,253,225,227,204,231,199,206,209,190,202,215,201,197,209,202,200,200,217,114,7,23,5,12,14,3,3,27,1,16,15,17,12,4,15,15,4,12,16,18,29,25,28,28,203,196,201,197,184,187,203,219,183,204,214,208,191,184,207,194,243,227,170,225,200,227,207,193,183,202,208,199,215,198,203,209,211,199,192,207,226,231,202,190,217,218,191,187,185,187,206,207,192,181,192,195,214,190,207,195,225,184,194,189,205,208,213,196,207,202,176,189,233,202,215,176,210,184,198,197,199,202,215,216,208,200,214,203,200,197,205,202,197,229,202,214,216,199,202,172,211,197,192,216,214,220,193,211,173,191,227,214,202,184,189,218,198,231,193,192,196,211,190,202,201,222,188,192,212,184,216,190,194,190,203,188,199,175,189,193,168,206,185,184,175,191,204,190,207,203,199,201,200,173,215,190,191,182,203,185,203,202,191,194,172,197,171,193,213,190,168,190,208,217,201,198,200,225,174,200,224,180,188,212,192,190,200,199,170,180,206,193,213,165,190,176,191,179,213,192,216,199,189,181,187,189,207,184,174,210,206,196,220,197,178,195,214,200,197,186,208,194,208,213,189,178,184,186,197,218,203,171,185,164,190,189,203,214,196,211,193,199,191,193,194,211,195,198,205,186,174,155,205,212,186,188,206,198,191,204,176,210,208,175,220,188,213,194,193,199,209,199,199,178,189,216,211,224,190,184,201,185,202,180,186,190,220,188,146,179,121,117,163,158,151,119,133,168,192,195,210,202,191,209,167,205,204,221,217,183,205,203,183,174,207,161,43,30,27,64,77,134,182,104,152,228,244,170,115,69,54,21,61,156,41,91,195,137,24,41,132,236,255,246,119,86,79,106,81,32,76,118,144,94,83,51,41,36,7,35,81,119,115,137,111,89,100,103,105,103,88,114,114,126,134,93,88,111,98,116,145,123,108,66,69,35,14,52,131,205,159,77,120,147,146,108,122,88,41,33,34,22,22,53,32,41,25,73,30,47,15,18,35,76,103,95,69,46,48,22,13,36,24,25,42,51,44,46,43,21,29,37,13,42,28,15,14,29,16,33,39,30,19,34,44,37,19,49,31,35,12,64,31,18,41,57,57,42,26,21,24,35,24,38,50,7,42,55,36,21,39,36,34,28,15,4,21,28,36,36,44,29,28,8,36,40,53,15,11,23,40,52,8,27,29,37,25,15,30,45,21,43,28,15,29,23,26,41,37,38,60,91,104,66,111,110,49,69,54,68,76,50,19,8,51,20,18,46,84,145,166,142,183,139,105,65,60,47,30,24,21,62,127,94,79,98,78,39,20,21,18,45,121,100,113,195,149,128,70,76,53,6,7,3,3,13,37,47,59,66,63,76,81,24,58,32,68,35,22,30,14,28,24,33,40,38,38,27,22,43,5,28,29,25,42,45,29,36,12,28,40,14,39,46,52,18,39,16,25,15,56,34,39,56,32,25,69,86,68,102,116,100,56,50,27,19,12,21,27,34,37,14,13,38,18,43,12,28,39,21,16,33,42,21,21,31,15,31,13,16,30,32,31,33,40,34,36,12,24,12,25,51,50,68,104,127,142,132,141,146,149,133,129,112,74,53,15,28,156,208,227,239,220,206,221,216,241,206,235,198,194,212,222,220,203,212,211,225,218,109,16,13,6,6,1,29,6,9,18,29,21,25,6,21,27,35,18,22,32,31,14,14,11,22,187,218,196,196,188,194,192,184,201,202,200,197,200,204,186,190,238,241,176,205,192,220,177,196,203,203,193,204,215,199,214,228,189,215,189,221,206,205,194,212,197,182,210,235,194,196,203,223,180,184,192,200,198,208,226,200,202,178,203,175,190,209,177,208,193,217,186,188,200,187,185,203,196,201,224,194,196,189,193,218,200,209,213,215,210,186,222,186,211,210,214,198,183,192,216,196,198,210,194,204,205,204,207,205,200,203,200,190,193,200,196,200,181,205,180,192,215,184,203,186,189,207,200,197,179,180,179,205,180,224,215,223,203,183,189,210,177,202,188,180,179,181,232,202,205,222,199,194,188,184,196,182,193,188,204,191,176,189,188,177,197,190,203,185,186,193,211,189,189,189,194,215,197,186,175,200,218,157,178,223,179,190,209,177,198,218,188,200,196,215,202,203,185,180,180,200,192,206,189,204,183,191,194,193,193,199,195,211,199,180,192,201,193,196,213,193,220,222,212,194,220,207,205,195,211,186,192,191,164,190,200,193,204,197,185,202,210,185,201,207,185,206,188,192,204,180,226,189,179,208,222,189,187,216,165,182,221,210,235,191,198,182,186,207,203,180,211,181,186,199,184,198,195,165,193,189,202,201,209,189,206,210,201,184,181,172,175,159,196,162,105,110,151,192,188,205,215,215,190,212,212,189,175,155,214,215,203,193,162,223,201,116,56,17,12,115,180,183,224,148,202,223,163,164,95,33,19,7,156,239,84,146,157,103,63,144,242,252,220,89,60,91,103,61,57,77,135,144,101,66,51,20,27,36,59,79,141,110,125,110,89,105,89,109,114,94,104,104,112,98,80,90,86,93,99,87,129,129,113,73,35,8,66,169,103,89,105,35,103,147,96,110,120,61,63,39,26,37,13,4,48,49,20,12,30,54,37,56,28,41,58,40,41,28,13,4,10,32,22,10,17,60,43,55,72,19,7,31,27,50,15,37,48,39,38,34,9,28,34,23,47,33,41,34,60,27,51,33,53,14,53,46,59,45,31,14,26,42,27,40,12,15,51,46,66,24,50,71,51,38,20,3,22,39,22,27,44,41,24,16,49,9,42,18,37,10,50,16,47,31,35,35,59,34,13,45,10,26,35,48,38,46,46,25,40,18,106,150,169,138,162,179,122,153,179,144,178,103,47,2,13,18,48,7,158,203,123,87,58,43,67,47,46,68,27,25,53,97,181,168,177,189,118,24,38,31,38,99,103,84,65,89,131,161,135,144,70,38,54,23,39,25,45,71,106,89,110,102,78,69,59,74,65,75,46,10,9,20,37,58,46,53,9,34,20,37,25,54,18,43,3,25,25,36,22,15,16,38,32,14,20,9,30,25,3,17,19,32,27,20,7,70,47,58,26,45,42,8,14,38,32,23,12,20,25,31,41,53,26,43,29,59,35,50,40,27,35,36,15,37,48,31,28,59,14,34,34,43,32,42,25,40,28,23,66,56,62,104,129,44,59,49,85,100,108,119,148,173,168,162,82,40,5,51,195,219,245,252,236,243,225,251,239,218,223,217,217,240,223,188,201,217,223,209,234,132,12,6,23,34,11,1,6,25,22,17,2,24,5,2,17,0,12,13,1,5,6,19,19,9,200,189,200,182,195,178,169,171,209,204,214,203,215,207,212,206,233,254,138,187,208,198,150,207,216,216,206,199,199,216,195,199,215,197,205,204,204,181,211,184,199,192,214,186,210,209,226,204,212,198,204,222,206,222,190,191,199,205,203,201,233,193,219,215,205,212,199,216,207,196,205,186,200,208,206,196,192,211,215,219,195,182,219,201,197,209,188,204,207,213,193,182,222,196,218,214,185,207,195,188,193,213,217,205,213,191,206,219,220,211,213,205,191,170,197,213,200,221,195,202,190,217,215,182,198,191,188,190,182,188,212,184,176,202,179,179,184,183,225,173,201,203,205,191,182,192,190,176,208,181,189,183,166,208,180,173,202,191,194,211,203,168,187,233,178,175,175,170,171,195,203,175,215,201,210,204,184,191,204,178,158,202,201,225,166,193,202,185,202,202,185,197,208,207,192,181,182,184,213,188,176,192,179,208,189,176,179,182,187,207,186,185,184,228,185,201,163,197,155,199,182,191,211,196,170,172,192,177,194,180,200,189,171,207,188,217,199,187,192,187,210,186,185,175,193,187,221,186,168,196,201,203,212,222,196,185,212,196,208,213,212,210,201,182,212,190,176,207,189,184,168,196,163,173,187,199,178,183,173,210,210,212,207,184,131,141,172,184,194,120,104,143,153,225,178,184,207,216,203,216,183,177,189,188,204,208,223,189,163,199,193,77,83,88,39,103,139,111,149,148,219,195,117,121,44,8,3,27,136,103,63,78,81,52,158,250,244,188,73,119,93,98,57,85,98,111,148,83,131,86,23,11,25,51,90,121,132,122,130,105,113,78,114,137,112,129,102,104,130,103,87,65,98,93,127,124,96,114,56,26,40,29,154,203,51,44,36,80,93,101,101,98,71,49,45,61,44,23,31,38,62,28,40,32,32,39,24,26,24,48,27,48,45,41,14,10,14,27,35,35,40,43,60,74,70,52,48,23,50,60,51,54,56,62,89,40,53,43,56,64,62,58,83,63,77,55,68,60,62,64,41,65,58,29,58,45,73,51,33,48,46,44,70,87,106,64,140,97,60,59,7,3,12,36,48,18,10,32,26,27,27,66,46,28,35,30,41,9,48,48,18,43,36,44,18,15,20,24,45,21,48,42,37,34,38,20,78,158,187,129,162,158,172,151,159,200,145,107,27,29,5,25,46,29,138,168,89,58,49,26,35,73,58,83,69,51,21,134,140,84,129,123,137,37,42,32,58,102,97,107,41,39,47,48,91,127,126,28,32,19,46,26,55,161,157,158,188,185,167,168,181,186,185,114,44,22,10,16,25,44,17,36,70,29,25,35,35,52,18,48,49,23,40,18,12,12,28,27,41,38,25,41,13,24,38,30,11,24,41,25,16,40,64,40,45,46,22,25,6,30,4,44,38,41,34,26,55,74,7,9,31,30,30,57,49,18,44,12,41,33,60,28,27,40,47,25,38,24,30,14,37,23,20,40,33,33,68,194,223,146,107,69,66,86,57,96,128,172,143,127,62,112,136,137,209,209,218,236,213,222,235,232,231,240,221,223,239,188,219,211,215,222,215,222,227,120,15,10,0,11,24,1,19,22,16,19,19,29,2,3,4,0,13,4,14,21,9,17,20,29,202,194,207,173,190,198,210,197,207,216,212,188,194,197,203,187,234,230,110,154,194,168,206,195,195,216,169,182,203,184,185,213,197,209,203,158,213,192,187,191,204,212,200,196,200,178,183,193,197,201,183,219,211,195,228,194,201,190,190,196,221,187,205,216,227,175,169,210,219,178,222,190,199,194,199,189,231,220,206,207,188,206,206,222,206,204,191,217,192,194,231,205,183,197,211,201,193,200,217,195,198,204,209,191,173,219,188,192,183,202,211,201,192,198,199,189,186,186,183,198,202,185,185,167,211,187,194,198,181,208,197,212,180,199,192,183,211,187,199,189,216,182,211,202,191,209,201,191,187,177,223,212,177,195,198,190,170,190,178,205,202,207,194,177,204,207,176,201,182,205,186,200,201,200,210,198,166,233,189,206,213,181,183,202,185,199,186,176,183,197,191,202,190,169,214,194,202,194,184,199,215,184,190,199,180,191,211,183,211,198,200,218,174,191,188,181,181,184,204,196,192,189,229,217,194,191,160,185,190,197,180,177,197,210,198,212,190,191,177,202,194,215,161,186,171,182,188,192,178,174,173,187,186,197,217,169,236,179,187,193,201,202,181,193,201,191,182,192,192,196,183,185,187,190,201,209,189,201,220,174,195,208,201,166,118,116,132,161,150,142,81,94,219,225,171,139,153,222,193,186,198,193,178,190,196,209,191,159,119,148,77,63,171,107,30,61,53,24,64,158,223,160,9,10,14,45,9,67,110,77,83,153,121,195,248,243,177,75,80,83,85,42,62,124,152,145,82,70,67,82,6,19,74,102,124,139,114,120,135,141,122,118,108,106,89,110,103,122,113,83,80,72,128,136,107,109,79,44,38,77,136,94,128,124,34,89,65,92,90,121,140,56,23,31,123,145,40,18,12,50,44,41,40,14,9,30,30,18,16,24,24,28,8,36,8,31,20,20,15,19,29,53,58,72,61,40,83,58,63,154,162,196,210,161,180,195,174,180,144,167,164,176,189,152,161,171,169,172,180,196,187,207,176,165,160,139,108,134,160,142,123,94,135,179,188,186,140,160,139,79,44,28,60,37,46,93,78,60,67,84,84,88,91,86,65,79,75,74,55,77,95,53,72,59,80,39,68,50,65,58,60,64,64,44,85,66,99,170,151,42,39,55,35,55,67,35,44,40,22,9,28,6,20,34,73,91,64,69,70,106,93,103,90,62,65,38,57,117,60,32,14,44,110,100,23,71,72,120,117,97,91,64,54,73,29,96,150,43,17,11,33,26,55,194,190,174,167,166,215,177,163,166,187,137,16,29,47,12,31,10,13,14,41,47,20,25,30,27,28,41,20,33,68,23,41,32,44,26,19,30,23,36,26,10,35,23,29,52,27,24,41,36,50,57,44,26,41,50,21,32,9,46,12,37,23,43,10,33,32,24,67,30,37,53,15,38,10,16,59,21,30,36,41,14,37,11,50,20,27,35,62,28,16,22,50,24,40,163,184,150,155,110,72,94,110,65,50,88,44,40,47,117,195,193,199,152,141,163,171,183,142,170,209,205,217,246,238,222,231,232,202,198,214,225,214,96,8,8,19,0,1,1,40,35,17,34,10,23,33,34,6,24,22,41,12,22,13,31,28,12,186,186,215,198,181,177,206,180,211,196,192,201,218,177,209,202,240,217,93,161,165,199,189,183,201,221,210,196,169,197,217,198,191,188,194,197,176,212,199,205,187,187,192,204,187,208,219,222,194,197,180,198,187,176,211,199,213,197,186,170,208,207,198,218,200,193,184,198,183,191,185,191,203,183,191,194,188,201,228,198,181,186,185,213,215,207,201,190,206,207,186,201,184,182,206,188,199,210,214,183,208,194,221,180,209,208,195,192,193,194,195,206,204,204,210,171,211,211,181,200,204,180,210,197,179,193,210,185,191,217,200,201,207,209,201,187,188,193,223,212,212,171,199,181,188,183,189,201,182,191,174,193,201,180,197,209,210,203,186,186,188,196,204,196,198,194,192,203,209,206,204,214,191,181,202,187,200,205,168,195,169,204,204,208,196,192,211,167,185,181,193,182,182,175,190,196,196,213,214,188,188,176,195,189,193,189,175,199,181,186,190,191,153,204,195,201,182,197,189,181,205,184,198,185,221,182,214,190,184,200,194,190,176,190,211,197,202,185,192,186,188,202,178,214,209,190,201,172,167,200,187,204,195,211,201,185,199,198,206,197,198,190,203,177,183,203,202,194,199,201,191,173,201,197,208,210,232,221,213,198,202,229,181,144,134,87,99,140,136,116,78,90,147,233,203,135,109,194,162,224,182,204,172,190,181,210,192,94,46,42,51,184,212,127,29,75,107,66,97,146,184,82,21,10,100,139,64,189,122,97,180,219,249,252,242,141,99,82,85,102,55,84,150,140,103,69,56,47,48,22,18,59,110,130,119,123,98,107,100,119,105,84,100,103,119,124,120,90,98,112,115,111,125,65,91,76,12,88,140,142,185,95,63,61,44,102,92,47,109,60,87,46,45,78,161,148,84,15,14,7,15,8,27,12,26,5,41,63,44,36,48,30,16,31,30,34,40,18,36,37,27,60,53,92,99,98,62,81,69,124,135,150,145,157,131,155,173,175,150,178,164,222,145,156,159,128,162,142,173,179,174,168,123,172,130,153,119,176,153,158,163,104,148,168,185,169,168,142,118,111,90,95,85,89,82,102,107,105,185,138,164,196,161,174,150,182,200,187,175,165,199,198,174,157,181,171,155,144,157,152,179,184,172,167,195,196,217,166,63,32,37,48,55,61,27,24,58,34,33,14,11,24,35,9,52,82,112,118,137,133,95,109,118,117,110,78,70,36,25,35,18,30,72,74,62,62,92,139,174,163,152,120,99,90,84,163,134,44,15,11,36,6,93,187,157,107,52,60,55,39,55,69,61,46,52,30,25,25,35,19,59,28,39,43,28,50,35,39,43,40,50,39,32,48,33,60,33,61,40,39,59,55,37,32,35,30,43,40,11,11,19,44,62,76,99,57,89,34,51,32,32,29,13,8,21,22,8,26,35,35,33,50,38,53,43,37,17,30,32,38,46,20,22,36,41,54,47,31,32,32,32,42,49,31,11,52,34,155,111,114,90,49,52,112,103,50,23,38,44,32,27,154,137,146,141,114,129,132,132,146,119,126,125,144,176,198,199,226,243,234,236,244,236,207,205,102,7,10,32,1,8,5,29,19,16,37,3,26,11,4,8,36,13,17,1,15,14,3,41,5,163,194,201,214,198,199,208,188,188,202,230,209,209,195,199,194,253,225,122,169,210,200,197,199,200,216,210,188,202,211,191,204,194,194,181,204,201,187,190,217,190,175,179,186,207,194,215,215,195,205,198,201,200,188,169,227,193,197,184,193,206,181,200,205,192,196,201,211,209,206,208,189,190,197,196,189,189,216,194,212,195,192,183,192,200,207,187,190,207,219,211,199,169,187,204,173,190,197,172,202,207,206,200,232,186,207,206,206,185,221,182,203,215,209,195,189,195,190,187,172,208,205,219,217,200,199,192,179,183,210,193,190,176,176,205,195,199,205,199,181,185,200,190,203,211,204,180,187,202,187,205,188,204,165,203,193,199,206,208,197,200,197,197,161,212,216,177,183,206,189,217,196,206,179,196,186,179,207,194,205,188,200,212,183,207,195,222,203,216,187,183,211,170,202,193,183,189,176,177,192,186,199,190,173,195,188,161,175,197,158,208,198,190,183,180,187,193,164,186,200,195,193,189,174,182,197,207,198,180,185,203,212,218,174,227,177,205,223,198,166,191,195,205,188,180,186,170,173,199,179,188,168,180,195,211,181,179,204,170,192,225,213,193,215,190,204,185,199,192,184,202,196,207,235,237,242,160,170,200,192,227,166,149,130,115,113,114,146,146,97,101,62,109,194,230,148,121,213,201,209,207,209,180,195,220,210,110,89,34,29,110,212,220,121,55,102,163,151,89,151,159,153,158,108,234,125,111,191,128,97,104,146,234,193,105,117,81,89,60,96,103,117,141,94,67,48,27,18,22,57,81,101,126,140,111,108,120,117,119,100,124,94,85,97,110,92,103,96,97,85,107,101,93,90,64,4,75,185,173,133,122,94,65,41,36,41,48,71,79,51,22,53,130,159,168,107,33,44,3,26,14,9,28,19,19,20,28,60,51,38,22,16,39,13,33,24,16,1,41,36,32,27,56,69,63,97,46,86,57,43,29,32,35,33,33,68,68,39,39,54,52,61,62,40,60,23,38,45,37,44,35,16,38,28,35,48,33,25,77,53,25,29,93,110,145,140,101,116,87,41,73,32,33,47,24,47,81,64,62,95,76,63,72,82,75,101,122,111,113,104,116,126,92,130,101,135,111,107,101,138,153,141,147,150,156,158,169,97,87,113,64,60,51,70,63,70,57,46,36,21,18,22,17,5,18,101,112,120,93,102,152,165,187,181,168,59,79,104,46,55,49,60,84,42,77,139,157,196,185,167,150,87,121,156,146,149,41,5,40,22,11,38,45,94,87,65,58,27,47,40,42,51,34,61,64,60,45,67,53,66,118,116,137,133,135,142,139,131,114,118,112,89,97,89,105,109,78,87,78,100,75,91,65,75,58,53,70,60,69,72,61,88,146,145,162,171,178,159,49,48,21,33,16,22,30,24,28,37,32,80,98,51,67,50,27,33,43,58,41,48,40,46,46,30,40,55,43,48,22,65,45,49,30,24,52,97,141,155,152,75,53,13,20,89,107,51,17,22,71,66,63,139,140,127,113,131,111,125,120,118,102,119,111,132,150,147,177,171,202,223,184,247,226,187,204,120,0,2,22,12,0,15,32,4,28,17,38,11,8,0,19,3,4,0,10,6,38,39,12,0,204,194,203,180,213,230,203,194,196,185,209,219,195,210,195,218,243,216,123,204,192,195,204,181,191,210,186,211,189,184,199,209,203,191,179,212,201,198,199,186,167,215,198,211,215,188,205,177,192,192,191,204,173,191,217,235,201,182,199,195,211,179,191,217,193,212,197,219,180,217,169,226,183,191,201,166,173,172,204,206,190,215,181,202,218,197,203,192,198,219,202,190,194,201,189,201,186,208,178,197,204,213,197,209,200,202,182,213,192,198,198,186,180,200,179,208,197,190,187,189,198,184,192,197,200,176,190,169,206,201,172,216,194,187,187,193,221,194,187,208,208,200,195,203,181,233,193,202,190,194,174,177,182,196,202,210,197,190,168,171,179,190,193,213,183,209,199,192,193,170,187,173,199,204,172,194,195,190,190,177,209,205,180,180,217,185,190,188,195,199,204,190,224,213,200,176,189,179,200,201,175,171,199,184,205,187,172,167,195,203,180,193,225,164,191,185,186,186,189,201,205,194,209,203,208,185,211,196,195,161,190,187,186,183,205,203,207,219,196,174,182,197,187,191,202,189,202,186,205,213,198,178,210,207,187,178,200,182,179,196,207,194,187,207,192,196,235,196,208,196,206,187,199,223,235,200,109,182,207,171,218,162,122,93,74,102,182,137,97,58,93,44,76,115,134,129,147,211,247,224,224,224,219,247,225,154,149,163,80,103,93,188,221,90,61,26,73,87,75,110,212,238,243,161,125,111,89,112,68,89,68,66,160,114,83,85,93,70,67,100,121,131,120,89,46,42,43,36,55,79,93,176,135,136,107,103,109,107,95,104,108,96,108,80,95,113,78,65,81,88,102,127,106,73,22,6,78,109,129,117,59,37,50,47,56,62,64,36,62,51,56,102,146,138,124,58,32,23,34,21,0,10,21,26,6,34,30,59,38,21,57,30,42,33,34,41,34,13,21,23,45,34,45,50,38,84,77,83,86,59,22,33,19,50,27,50,60,60,39,52,39,59,33,69,77,116,86,82,18,12,33,73,62,43,65,36,73,10,31,37,15,32,88,113,123,64,82,55,73,67,28,23,12,38,67,55,76,79,59,74,58,89,63,75,32,14,65,90,55,9,54,50,76,62,29,28,74,99,42,32,44,55,61,58,38,32,58,37,64,135,109,107,110,122,123,90,53,13,13,19,30,37,37,26,85,143,113,83,127,178,165,125,86,102,134,143,86,72,73,94,87,104,117,151,130,196,237,211,167,100,85,74,70,82,122,130,51,19,12,3,8,23,30,34,92,75,73,70,49,40,54,62,41,48,77,51,48,92,122,126,93,140,126,154,156,154,165,149,134,120,132,152,156,173,121,125,162,164,138,130,153,103,137,146,135,140,172,79,148,175,159,156,168,213,158,167,160,187,108,70,49,55,41,58,49,55,59,88,89,161,129,137,128,126,107,123,124,111,122,123,122,89,104,84,78,133,84,99,90,102,110,60,49,52,116,169,194,146,159,119,72,67,95,136,132,110,106,127,155,157,160,153,140,131,138,144,131,119,114,115,121,124,109,129,120,123,136,124,159,168,191,210,234,235,231,100,12,3,26,6,2,3,22,0,12,8,17,17,0,1,15,5,4,2,29,24,24,25,14,18,200,190,189,183,213,210,215,217,186,202,179,210,179,209,184,197,240,212,159,200,196,205,170,231,200,207,200,176,184,225,196,208,180,183,185,211,200,203,188,198,212,186,202,211,194,189,188,164,197,207,187,189,181,215,179,195,184,190,203,180,200,218,204,200,185,180,195,193,167,182,197,189,216,214,179,175,203,190,201,211,180,212,204,196,193,217,201,204,194,189,200,188,181,194,192,204,192,179,209,198,194,207,193,192,216,219,166,202,213,204,213,195,191,180,194,209,202,189,199,167,191,220,216,188,205,164,188,187,189,206,196,189,176,194,183,202,212,194,203,185,188,189,214,181,209,184,203,176,161,172,198,179,204,177,188,193,188,178,208,165,199,183,216,184,180,174,206,186,212,229,193,196,200,189,190,218,210,195,192,185,176,201,213,212,193,185,188,173,182,194,195,165,201,182,190,208,186,205,178,173,182,182,198,189,170,191,198,190,171,171,159,188,194,197,203,221,197,215,183,170,210,191,201,181,182,184,192,198,173,201,179,214,191,180,192,211,203,200,184,212,198,186,217,189,214,185,185,198,211,178,202,167,184,201,180,202,217,179,178,189,201,170,206,185,198,185,190,186,190,201,195,162,132,193,164,109,137,241,243,193,217,183,113,98,68,120,197,173,77,42,64,55,28,24,55,78,136,203,206,193,159,218,214,206,188,104,169,217,126,94,85,138,131,86,68,36,31,13,6,82,215,244,237,144,72,81,202,157,72,60,7,49,83,93,115,112,37,69,142,149,136,82,58,62,26,31,15,36,84,88,137,133,126,95,136,115,122,101,126,116,118,86,113,114,94,77,88,80,84,127,137,132,73,32,35,22,82,101,124,92,82,106,66,29,25,51,39,38,46,27,41,103,114,94,107,56,37,62,51,23,23,34,6,8,19,23,25,34,47,20,31,30,8,26,16,17,25,47,28,14,38,16,25,42,49,47,84,100,86,90,61,62,22,27,54,16,38,47,64,56,46,43,35,67,122,149,114,72,33,67,118,112,111,77,42,69,53,81,47,50,24,42,88,73,104,100,76,80,46,62,50,17,7,43,54,97,112,141,60,62,117,104,112,139,60,38,91,122,96,13,29,103,109,105,10,46,127,149,57,79,110,89,129,96,44,14,54,45,106,145,114,159,141,125,165,110,20,34,35,9,43,4,22,57,87,145,86,102,156,93,62,43,74,24,26,110,80,74,103,124,169,174,209,154,144,200,117,42,15,3,60,53,97,46,86,156,70,18,21,37,26,19,43,43,103,116,138,90,119,93,116,111,59,26,19,72,31,27,23,32,75,72,61,58,99,45,43,73,50,45,37,65,60,51,57,91,75,75,76,54,74,60,67,56,62,64,88,87,123,124,117,120,143,154,174,159,152,128,87,92,75,79,50,55,56,52,74,112,126,106,166,117,130,146,136,142,173,120,138,143,150,147,157,172,174,171,189,173,156,176,145,86,43,113,139,155,154,128,145,157,159,174,180,187,167,155,160,185,193,163,153,138,126,142,137,138,127,125,132,111,147,130,120,133,139,118,112,118,128,136,145,178,217,228,216,101,19,10,26,10,0,3,0,4,16,50,15,5,16,13,2,7,6,17,7,15,27,11,16,6,214,172,188,190,210,191,198,196,204,193,174,179,183,189,204,211,253,205,191,201,192,219,202,191,184,186,212,201,184,189,201,196,203,215,200,193,207,195,201,191,203,211,220,179,202,190,180,199,174,198,185,205,193,195,199,184,194,178,194,201,178,198,205,214,194,234,196,195,205,202,194,192,192,186,192,211,184,193,208,179,233,190,193,179,189,186,206,224,181,203,166,191,167,193,198,207,181,197,207,187,205,178,194,225,195,187,183,197,190,194,187,205,195,192,219,183,188,200,184,189,174,181,225,198,168,176,196,208,191,185,205,191,172,202,188,186,206,198,201,211,178,206,200,202,184,165,207,206,207,180,210,196,205,202,206,187,208,223,189,187,180,165,175,168,185,193,183,180,192,206,201,196,206,189,170,210,201,180,214,179,188,176,185,180,178,185,175,198,199,170,181,188,196,182,181,176,193,180,185,184,198,159,185,189,211,178,187,188,204,193,198,203,207,207,187,211,167,223,189,188,211,197,186,174,202,181,196,202,207,198,187,169,190,189,210,168,191,194,188,208,214,190,199,194,188,180,186,181,181,217,184,179,194,180,192,210,204,213,177,185,208,186,207,210,196,206,200,178,214,188,191,58,53,95,21,26,68,147,160,120,133,131,123,103,86,111,145,146,98,89,91,109,67,68,59,18,62,84,172,179,168,173,131,150,133,75,119,122,53,66,50,85,123,100,88,25,58,33,26,98,182,244,247,183,115,151,248,157,96,51,0,57,110,101,76,62,79,141,108,119,57,55,18,23,17,12,42,75,108,112,157,103,116,89,90,148,80,105,145,122,107,102,86,102,85,83,99,107,132,112,124,96,23,3,19,12,118,85,71,83,90,133,82,33,54,43,54,55,39,46,48,89,93,62,56,50,38,42,72,34,20,15,19,13,15,29,44,29,26,12,39,24,4,38,36,16,58,11,34,32,30,38,32,56,69,87,103,76,132,90,88,51,63,56,43,28,47,59,46,55,47,40,62,77,98,104,108,33,21,117,106,128,103,85,35,36,47,42,38,25,26,18,44,92,93,73,45,35,64,18,51,33,15,1,54,100,95,128,45,67,125,109,117,152,87,2,95,143,108,49,37,106,100,125,57,62,113,165,61,128,173,134,156,95,48,35,34,14,119,142,127,113,94,128,129,81,12,25,6,19,19,47,14,6,68,155,98,152,158,68,55,50,107,48,26,94,75,67,54,111,172,191,163,79,104,185,63,13,76,58,92,86,100,46,52,145,91,37,40,8,1,19,12,34,127,150,141,132,144,129,126,98,29,9,57,42,82,27,70,68,58,78,100,83,64,60,26,82,98,70,20,45,62,62,94,25,45,69,81,47,67,73,70,72,48,54,85,38,29,32,24,84,97,132,89,118,141,93,86,54,42,36,41,45,45,48,14,66,39,63,53,52,49,78,55,62,70,67,66,103,97,63,66,108,102,93,82,119,106,95,91,52,113,161,142,121,132,124,127,150,198,167,190,180,179,166,128,145,131,101,126,136,121,147,130,126,120,106,106,119,119,126,124,126,133,143,130,127,117,108,110,132,179,218,213,123,12,0,17,27,4,13,19,22,21,16,15,4,12,2,16,4,24,6,15,23,11,12,13,8,180,180,201,189,198,200,212,202,201,185,175,189,208,203,161,179,227,235,208,206,184,192,200,200,198,178,218,201,191,187,172,202,168,181,195,204,195,185,195,215,204,182,195,191,163,211,200,199,168,212,197,195,185,189,204,189,170,188,187,181,197,179,203,222,173,204,196,205,195,184,182,196,177,161,210,199,195,191,201,214,181,190,188,215,201,189,211,197,202,221,205,192,213,183,205,207,186,203,176,208,203,185,193,192,196,217,193,188,178,196,206,209,181,189,181,193,198,211,197,198,192,199,201,190,204,182,176,208,179,185,189,184,172,176,200,174,187,173,173,182,207,192,198,197,203,187,199,219,186,194,194,196,189,196,196,174,199,214,203,177,202,186,174,173,162,171,215,195,179,231,207,195,208,166,198,192,181,168,183,197,187,193,184,180,174,194,190,180,204,195,186,197,184,200,202,199,181,178,180,202,184,195,196,190,172,181,183,211,197,189,170,188,165,212,194,193,182,210,182,192,188,179,219,197,183,185,194,199,214,177,172,176,185,193,190,206,195,200,202,187,201,185,191,183,202,192,192,205,206,195,190,201,189,187,200,176,216,206,197,194,182,187,180,186,211,179,198,215,180,210,191,83,32,86,60,4,5,9,16,42,86,68,104,80,80,94,76,121,116,89,123,92,70,71,35,0,11,73,107,74,97,121,132,158,146,94,90,84,33,33,44,64,118,117,54,19,59,10,91,212,181,137,138,81,86,164,230,71,60,35,47,132,108,53,67,102,129,137,116,77,51,74,62,28,38,53,120,126,104,107,100,122,115,111,86,115,93,118,88,107,121,122,73,89,78,93,107,92,140,109,62,34,11,45,45,93,143,102,81,101,122,103,82,44,49,38,41,64,51,64,38,50,55,66,79,52,41,26,13,49,11,7,13,31,13,33,37,10,33,25,13,24,37,48,12,45,20,29,27,2,3,17,25,30,64,102,85,113,113,79,69,87,68,54,32,28,52,52,44,40,15,60,5,59,102,118,92,30,43,129,144,141,147,132,60,35,48,23,62,24,29,31,120,36,59,78,48,51,38,49,74,27,55,33,39,101,108,90,45,34,118,84,135,133,38,2,88,147,93,41,44,122,126,165,114,119,111,101,5,61,81,84,119,33,37,49,18,29,174,156,82,119,65,118,111,72,9,15,21,15,35,34,36,30,39,137,46,146,148,67,55,63,159,81,23,121,110,73,31,48,62,56,10,63,163,187,24,51,116,42,120,104,107,74,51,130,144,65,19,30,36,24,25,7,106,160,130,117,109,136,132,81,33,61,59,22,71,71,73,84,81,130,140,123,118,39,69,154,131,81,36,34,84,107,60,42,51,126,128,73,64,107,124,73,90,112,113,55,18,41,48,68,101,89,135,93,106,66,84,73,44,44,20,34,35,65,70,71,102,75,46,66,76,60,76,52,78,78,84,74,43,60,37,48,59,43,24,34,42,47,38,100,149,155,137,156,118,144,162,174,188,160,180,178,146,136,154,120,125,130,119,131,125,144,117,119,109,121,127,125,128,133,113,105,140,119,119,123,131,134,121,130,151,172,192,121,7,33,8,1,3,13,15,11,24,20,6,0,26,2,4,5,23,8,17,7,30,27,33,30,209,175,210,180,196,199,210,181,181,206,193,201,224,207,184,176,251,206,192,219,194,218,201,203,202,187,168,164,178,196,203,191,202,211,191,195,198,199,206,211,192,210,189,186,180,182,212,184,209,192,180,213,184,215,198,192,218,185,210,214,180,188,179,203,212,179,193,194,197,179,193,185,188,220,202,192,194,176,201,184,194,187,204,177,207,197,208,202,204,201,214,194,186,196,186,193,194,193,207,204,205,189,190,196,193,221,172,206,193,214,203,183,175,197,205,200,199,208,194,199,202,191,178,201,172,174,189,181,196,180,199,203,204,203,203,187,187,203,189,196,191,203,196,193,195,179,187,199,188,207,181,208,185,200,186,180,178,226,176,179,176,194,185,197,197,224,205,181,182,187,165,197,177,219,174,190,208,203,180,178,191,191,182,188,174,210,204,185,170,186,193,219,200,177,186,193,170,211,187,202,168,177,185,186,188,185,196,187,186,184,201,213,207,186,192,184,200,191,213,204,201,200,212,191,180,205,190,189,190,179,189,189,197,194,185,189,177,178,214,194,201,182,207,190,188,213,196,155,190,193,187,197,192,205,166,188,185,206,194,212,207,190,195,199,222,204,200,199,200,216,232,137,59,119,118,60,27,26,33,17,100,70,84,58,87,112,65,82,157,153,104,53,50,71,53,26,65,66,94,136,119,150,154,147,110,165,147,124,79,89,92,80,139,103,43,56,35,13,72,118,68,109,82,50,30,161,211,32,42,16,31,101,72,117,127,128,133,83,46,79,62,163,83,50,77,121,148,133,101,112,100,117,103,116,75,101,92,113,107,138,121,80,67,81,111,92,113,62,129,54,24,13,16,62,112,112,166,173,113,89,61,123,94,45,14,7,42,46,24,35,36,46,73,44,57,49,45,43,31,51,12,45,28,13,32,16,23,34,10,26,27,26,7,19,40,46,40,32,10,22,38,45,36,38,42,61,90,82,88,71,40,57,58,54,35,20,19,46,24,38,25,51,46,63,105,116,81,36,17,117,97,88,110,136,54,26,47,39,50,47,52,111,145,157,182,140,122,132,138,145,150,115,34,26,21,93,75,44,9,68,132,105,141,93,57,8,95,147,97,29,45,115,143,121,119,131,131,143,15,25,60,85,137,67,7,36,13,42,169,168,91,82,84,87,93,49,14,15,2,26,41,21,12,21,75,112,107,130,107,43,56,156,232,92,2,95,155,75,45,34,86,46,67,192,242,154,0,62,114,41,142,120,98,80,130,184,106,46,34,34,7,39,8,25,126,152,116,76,86,117,101,72,48,39,48,59,74,74,109,48,72,91,88,91,99,47,66,145,97,47,32,24,97,103,80,28,23,116,126,40,63,116,124,123,170,156,94,54,15,42,74,82,59,87,67,40,52,18,44,59,14,31,14,47,31,53,27,65,96,130,111,118,132,59,50,82,105,116,117,48,71,37,51,72,48,30,29,39,15,32,108,131,168,148,118,125,138,159,156,153,156,160,150,151,138,142,152,143,166,160,136,153,152,124,143,124,136,152,147,146,133,127,124,113,136,102,126,125,140,124,130,127,135,151,149,105,7,2,16,22,36,14,15,20,21,18,0,39,3,4,2,29,7,3,17,21,0,17,6,20,195,176,170,184,210,213,179,216,212,189,202,177,206,193,188,151,207,160,182,206,203,201,172,209,206,194,175,172,198,199,192,169,182,182,192,189,190,185,199,226,199,183,191,177,190,185,197,193,207,193,229,200,204,187,185,204,193,204,208,205,216,212,203,210,176,180,196,196,193,208,192,197,211,188,184,191,212,205,206,191,193,202,208,195,169,184,187,197,187,199,176,200,201,188,208,177,182,178,191,196,178,180,181,205,188,189,189,174,191,178,206,188,176,188,204,206,193,202,201,180,187,191,195,190,208,209,218,203,183,192,180,209,210,204,189,188,178,194,202,185,185,173,194,187,192,188,177,178,181,193,179,187,192,191,188,199,192,194,212,145,206,204,177,197,202,221,207,175,189,193,177,185,188,186,198,203,197,203,190,155,193,192,169,179,208,173,206,184,204,181,186,202,208,178,198,188,191,168,192,208,209,185,188,198,188,199,180,211,180,201,193,196,195,196,203,198,196,194,194,184,200,161,203,215,205,210,190,178,160,189,179,190,184,185,190,205,184,210,196,191,185,191,185,207,182,196,192,168,189,197,199,204,187,206,173,169,184,188,203,194,181,198,195,205,184,192,205,195,183,223,252,157,149,205,215,193,162,105,64,34,85,69,95,59,81,154,112,103,113,139,109,28,40,29,21,19,17,50,109,106,97,101,96,86,162,224,196,175,134,148,167,134,167,88,106,214,81,37,79,46,68,150,132,24,69,214,168,4,14,6,102,134,107,128,132,105,96,99,67,132,153,166,84,54,80,98,133,109,97,102,121,102,98,80,69,76,80,111,121,113,84,90,106,85,95,77,102,122,40,8,23,34,71,131,130,129,140,139,116,80,112,105,44,53,14,17,19,32,47,29,47,36,45,33,35,56,61,43,35,69,39,39,17,20,9,8,17,16,22,30,32,28,30,29,38,21,39,29,18,45,44,19,31,51,39,55,54,55,60,51,57,53,28,23,27,51,39,41,28,47,38,30,50,65,95,98,107,34,19,31,59,30,84,49,38,54,39,36,46,67,65,118,232,175,188,224,182,227,223,226,204,150,43,15,16,80,99,40,21,58,111,100,105,119,85,20,79,132,87,16,57,99,147,68,91,140,160,147,15,57,103,93,136,62,10,34,36,65,189,149,92,84,75,88,74,94,4,15,21,30,11,24,30,13,46,90,69,118,86,85,116,230,209,42,13,103,160,153,111,99,139,143,218,244,187,112,7,52,112,113,111,63,54,134,182,201,98,16,36,29,5,33,26,62,155,145,88,90,79,52,82,63,31,14,42,63,80,94,92,69,90,62,31,91,104,51,57,140,62,52,10,24,124,90,69,32,37,102,80,31,75,117,65,105,102,117,57,32,8,17,42,80,103,55,73,72,54,37,44,35,39,25,26,19,22,40,44,78,37,119,113,93,66,24,17,89,108,112,85,33,36,40,55,53,47,9,35,52,68,139,167,158,174,156,137,151,139,124,142,148,129,130,149,144,165,134,165,133,151,142,140,155,149,150,156,144,149,158,175,158,123,130,128,122,139,101,122,130,148,138,116,113,118,122,149,111,37,4,18,16,12,40,11,30,12,13,19,17,22,3,27,14,10,12,29,8,28,4,11,15,187,211,179,213,200,214,200,200,214,208,207,204,199,226,240,170,163,150,152,185,167,194,209,206,197,213,204,191,211,203,178,198,220,208,206,196,199,211,188,197,194,194,200,201,198,191,216,197,192,190,216,205,187,195,198,197,192,168,207,186,196,178,205,186,206,208,194,173,179,184,172,198,205,202,230,191,186,208,202,189,217,189,202,206,209,196,194,196,204,177,207,186,176,210,185,164,178,201,180,171,174,169,193,171,200,221,201,200,207,187,206,200,207,174,178,210,186,214,193,183,200,197,197,206,196,200,186,188,194,174,216,186,168,178,207,178,194,217,213,196,186,194,190,181,209,203,196,191,200,187,213,203,203,201,183,189,189,190,174,195,198,180,200,184,172,187,180,217,191,187,177,177,179,193,206,194,194,183,198,181,203,205,186,193,204,188,178,206,214,195,203,187,188,213,215,216,195,181,197,198,174,195,217,187,191,201,160,187,188,186,203,192,192,216,207,201,183,202,220,178,174,193,178,191,166,173,167,200,205,193,197,205,206,184,179,199,199,176,210,197,209,208,184,199,205,163,177,208,168,190,199,192,192,179,192,183,192,192,189,194,200,191,168,193,161,188,203,180,192,253,235,149,171,218,222,243,251,250,199,147,107,77,103,40,57,133,124,77,48,98,114,72,51,66,27,35,18,23,104,25,22,11,1,66,131,186,129,113,96,106,106,67,74,16,106,180,97,175,185,53,125,209,141,64,144,219,106,27,17,119,217,172,156,148,127,84,84,43,93,124,173,154,68,58,82,78,134,87,99,93,113,99,90,107,57,82,106,95,113,61,85,94,110,82,103,117,66,38,30,14,20,72,118,120,131,134,129,108,80,92,110,72,45,62,34,30,34,42,52,39,23,3,13,35,35,59,39,47,46,85,78,59,63,28,51,28,13,13,12,3,27,31,43,33,16,16,30,24,28,34,33,14,31,72,83,90,80,102,80,106,117,97,81,60,65,88,128,53,76,56,78,87,62,104,80,104,90,58,51,34,76,95,87,58,73,40,29,84,66,63,92,150,165,110,111,65,98,128,93,120,106,81,75,55,50,69,120,57,24,114,121,81,108,99,73,85,101,131,106,25,81,116,115,49,24,109,133,148,58,22,102,99,84,20,24,54,54,119,186,62,57,55,52,95,66,68,42,39,36,19,28,25,15,19,12,47,45,77,94,116,107,176,103,63,20,16,78,166,154,191,127,199,171,168,126,46,0,31,44,87,104,48,101,109,169,126,40,24,8,23,54,26,47,90,165,118,51,56,34,52,57,57,44,27,52,47,117,94,127,101,93,23,13,91,117,11,71,162,68,67,60,78,131,95,65,34,60,130,96,9,99,120,41,31,49,82,65,28,26,22,73,127,128,123,126,105,64,62,92,89,91,56,29,37,49,82,85,84,67,104,80,53,63,33,56,112,101,107,34,28,39,45,80,73,50,25,7,71,157,167,148,101,75,135,139,159,115,168,131,136,143,137,145,180,155,210,173,142,138,138,113,162,136,156,170,141,158,153,152,157,127,135,152,159,129,110,123,128,142,126,129,97,107,134,128,99,16,3,4,9,28,16,18,12,15,4,6,16,2,2,16,22,7,39,10,25,26,8,15,8,183,211,221,223,180,201,205,167,222,193,197,182,222,220,211,149,136,121,143,204,187,230,186,184,194,185,192,191,171,202,204,185,180,191,194,205,171,208,185,196,201,203,176,204,199,197,210,216,201,195,185,214,185,184,193,226,191,195,204,217,177,196,201,214,196,197,194,181,202,190,204,203,199,192,175,207,201,207,214,209,207,188,186,192,185,192,200,179,203,187,181,185,184,191,189,203,193,190,172,204,216,191,205,189,201,201,207,190,196,189,189,186,208,181,190,194,184,178,189,203,193,178,201,186,191,178,219,190,183,186,188,190,208,197,176,194,167,202,184,165,202,200,194,198,187,199,192,210,210,202,207,184,204,181,189,181,186,179,202,219,184,172,175,171,196,191,191,195,198,193,186,159,178,218,175,186,183,205,200,215,195,188,219,181,204,201,190,194,192,199,189,225,174,173,204,208,205,181,190,206,174,165,191,191,200,192,186,191,190,216,207,187,202,208,196,187,186,182,212,199,183,213,170,187,196,202,197,207,193,185,170,192,191,192,176,220,208,213,176,205,178,173,194,186,174,186,177,193,186,200,196,177,198,188,202,201,179,156,194,194,209,202,214,180,199,184,202,215,214,249,217,124,157,182,227,232,243,251,240,117,94,67,81,94,95,115,142,128,59,39,104,130,153,105,34,56,74,113,145,107,101,85,75,103,149,116,105,49,44,93,83,60,90,67,64,110,102,207,117,52,146,171,62,78,137,195,131,66,96,203,201,142,123,84,87,68,18,4,49,51,135,171,91,70,90,82,99,103,91,101,94,118,104,89,90,101,100,83,108,85,109,126,104,104,101,79,28,22,9,31,71,123,121,107,95,118,124,98,130,85,58,19,10,107,95,46,26,9,19,26,39,22,36,40,41,40,27,36,63,77,58,68,40,38,45,51,4,10,7,15,5,56,18,23,20,30,40,55,14,14,43,30,54,116,175,173,173,149,136,165,180,184,150,168,173,202,203,162,166,180,163,170,180,163,146,153,147,140,181,159,172,160,194,196,129,164,167,144,174,165,193,184,143,86,64,67,82,95,91,89,93,131,130,141,172,183,171,158,182,158,149,145,160,162,161,162,169,150,132,141,158,113,135,128,90,115,106,156,114,132,153,123,121,102,128,113,128,196,166,133,130,111,105,108,109,130,62,65,81,64,80,67,86,60,64,70,52,108,137,120,113,69,42,43,66,41,47,97,130,142,140,131,107,114,45,12,53,26,39,43,97,86,84,109,77,53,51,28,35,47,46,49,50,143,176,60,77,43,61,72,75,87,44,29,19,95,70,41,126,96,28,23,54,125,112,66,121,116,105,105,91,91,129,97,112,73,84,124,92,27,82,106,43,39,57,98,62,56,20,34,122,155,181,134,174,158,159,152,206,184,161,88,45,32,52,99,140,94,103,106,75,125,140,101,71,95,96,85,14,35,33,42,99,51,51,61,35,128,182,169,79,38,62,129,149,174,149,138,135,150,113,115,135,144,133,130,154,141,139,123,142,148,130,147,150,140,136,140,141,129,126,139,153,123,142,157,154,141,146,125,120,114,117,151,164,128,21,22,0,4,6,4,16,42,25,7,25,22,6,5,14,18,3,5,6,11,8,14,16,23,209,193,180,191,186,195,193,193,204,206,200,191,183,201,214,175,194,189,171,207,199,171,180,188,196,206,203,217,198,188,223,211,189,195,197,188,190,206,186,193,193,181,191,194,214,217,197,180,184,197,202,196,200,182,204,200,190,194,182,197,221,176,218,176,194,186,188,214,180,183,195,184,192,205,178,185,202,192,186,190,186,205,194,182,182,214,160,183,216,186,175,194,190,200,207,189,202,199,194,181,177,155,203,208,187,181,188,176,164,208,201,224,192,190,180,164,192,193,172,197,193,206,192,171,172,203,174,206,185,188,194,205,210,178,189,197,196,190,188,192,208,168,199,188,186,213,203,174,200,201,180,217,197,179,193,168,205,187,193,171,195,194,179,190,196,172,188,183,189,182,178,189,191,184,218,198,178,198,183,221,192,224,230,219,184,193,215,213,204,228,234,221,146,123,158,196,215,181,217,172,170,200,195,185,184,205,176,206,190,169,190,192,177,190,190,222,164,212,181,194,190,172,183,171,189,181,186,196,194,210,206,155,164,189,196,168,217,184,193,205,174,193,192,174,197,182,202,201,188,205,200,165,203,199,220,198,205,183,208,189,211,208,217,197,213,210,194,199,218,240,184,135,150,180,221,217,251,200,162,112,99,49,74,92,99,121,140,147,121,103,96,164,173,151,51,35,130,163,209,224,219,218,185,155,141,114,105,65,120,106,105,177,147,153,140,79,30,79,49,70,187,100,43,93,188,147,59,75,191,238,176,123,117,72,60,27,13,48,61,64,125,145,37,65,75,97,76,77,92,115,115,113,105,109,79,88,98,82,111,91,120,108,133,113,81,37,14,20,73,77,112,137,151,112,112,112,106,64,85,44,69,10,103,200,130,36,2,19,1,43,9,45,32,14,62,54,33,42,61,75,84,64,61,54,50,31,38,37,37,41,11,30,27,17,13,23,30,48,10,18,48,29,59,126,155,98,74,56,87,89,83,93,82,67,66,69,120,94,98,139,100,89,108,113,62,84,93,119,116,136,117,144,136,103,130,132,113,122,149,152,128,108,83,103,104,109,107,117,96,107,145,162,169,159,162,151,122,168,168,177,136,167,123,132,128,135,131,143,107,150,157,145,178,154,131,152,110,139,143,174,149,167,139,174,200,196,191,172,167,125,146,188,168,175,142,177,178,172,174,168,172,188,178,172,199,162,162,125,144,154,140,142,142,152,175,126,152,158,132,138,151,142,135,116,122,141,124,134,121,133,131,132,129,157,83,79,115,141,129,142,154,149,170,231,182,117,110,102,148,148,150,149,103,81,123,106,57,112,89,86,95,55,113,130,97,95,118,129,101,144,90,111,104,93,101,83,100,130,86,54,107,77,17,32,78,87,62,54,22,62,146,173,140,101,114,100,121,130,124,120,158,78,19,28,48,52,106,110,88,80,54,121,128,133,75,87,81,49,11,57,24,68,80,44,76,42,13,46,145,142,101,91,109,157,138,148,167,126,154,129,127,135,138,114,131,118,100,152,139,160,138,120,142,125,132,106,115,148,150,127,134,147,136,149,132,153,166,145,131,142,146,156,144,147,136,106,18,4,10,4,35,9,3,9,10,16,13,43,8,0,30,6,16,25,2,15,19,4,34,23,202,200,187,215,198,198,187,189,202,193,209,190,187,182,199,212,243,221,200,190,208,205,204,195,205,189,185,191,212,206,210,200,190,190,213,202,184,203,193,206,175,204,190,183,194,187,175,202,203,201,204,192,187,193,204,210,193,175,222,210,178,226,176,209,219,193,220,179,217,207,205,209,200,188,193,183,202,182,182,207,184,175,182,201,188,189,190,187,171,195,198,191,203,196,166,193,188,174,188,203,175,183,189,176,181,193,194,169,196,188,192,200,209,204,225,208,171,196,183,178,179,183,176,184,185,207,176,182,170,197,177,210,213,169,175,184,188,196,188,196,203,205,189,180,191,216,213,218,215,199,177,176,205,162,186,201,192,190,197,171,177,196,208,177,209,184,200,191,218,181,200,187,190,185,206,226,192,208,218,187,194,200,197,211,208,216,228,199,207,208,214,198,127,111,162,195,202,190,190,183,184,192,201,205,185,189,187,173,199,192,190,190,187,175,203,200,183,181,183,183,190,204,181,188,198,185,213,198,220,182,191,198,219,184,200,204,184,160,194,192,194,182,190,187,195,210,186,180,180,180,213,202,216,217,198,176,203,193,203,179,189,217,195,201,190,214,197,174,241,229,134,117,129,140,183,199,199,187,111,77,71,65,80,113,124,98,108,103,137,112,156,161,139,149,69,10,8,12,68,142,177,190,170,174,151,150,121,74,159,147,134,195,151,243,160,66,105,101,48,100,217,84,20,148,176,138,51,145,219,189,110,61,76,37,14,42,19,94,76,70,160,91,56,56,58,109,128,80,120,109,97,112,87,101,82,77,73,54,74,103,140,80,148,91,62,18,21,62,102,110,127,126,139,116,109,97,62,49,82,68,99,150,216,203,80,28,49,9,23,19,25,19,24,30,51,69,34,65,96,102,83,76,42,43,29,48,24,23,10,54,33,3,57,8,59,40,21,16,27,9,14,36,52,112,111,112,91,46,87,55,76,61,86,59,63,84,55,53,81,68,65,61,59,64,39,63,84,59,56,49,53,67,55,75,74,79,70,88,31,62,37,75,43,55,63,43,80,78,63,63,36,61,72,60,58,69,56,43,36,57,59,63,51,39,49,21,73,17,55,52,55,63,37,46,55,69,80,57,39,84,49,78,63,82,66,47,67,68,100,76,59,120,96,94,80,119,92,109,78,99,87,116,118,103,118,107,121,97,92,98,119,119,100,152,155,133,133,163,112,117,107,102,104,113,115,137,149,144,156,133,135,108,98,130,113,105,176,162,159,182,181,162,161,154,168,137,142,151,150,175,172,151,171,182,173,176,149,126,147,113,145,166,167,172,118,167,154,132,158,119,128,127,142,121,153,103,142,168,113,128,177,162,143,138,195,180,118,150,138,161,197,163,123,74,85,65,73,111,77,80,106,102,65,44,58,105,122,143,72,57,78,105,129,96,97,80,27,51,80,62,75,82,84,77,64,58,55,23,70,134,124,128,141,134,128,159,141,148,139,147,128,132,141,148,128,116,124,121,147,132,131,111,134,126,95,138,146,157,157,150,148,154,145,166,136,138,136,128,123,147,188,199,159,143,155,112,30,11,17,6,8,7,35,11,6,8,4,7,3,5,14,11,8,5,5,20,19,28,7,7,195,184,198,203,218,182,191,186,189,212,174,181,190,188,213,207,245,231,206,203,197,212,197,205,202,191,206,205,185,216,209,211,203,214,215,210,200,212,221,210,206,195,208,194,183,191,180,187,203,187,201,167,200,183,197,204,208,187,209,171,217,194,169,208,202,208,202,196,161,189,192,188,204,171,197,202,199,196,184,172,194,207,195,202,207,199,198,201,210,200,194,203,182,179,206,196,195,191,218,210,201,159,218,155,211,186,217,203,200,184,199,209,195,201,185,217,198,181,191,198,200,197,191,180,185,185,181,189,203,180,178,178,194,197,181,214,174,194,198,213,203,184,235,224,214,193,171,203,189,172,193,183,181,204,194,180,188,186,166,186,233,212,202,188,208,209,199,196,221,199,193,222,231,214,196,184,226,216,213,215,184,127,116,161,175,190,195,167,129,105,142,144,112,94,143,183,228,211,204,174,209,217,197,208,182,186,180,201,196,195,188,174,198,200,187,181,204,181,200,189,194,193,199,205,192,187,209,206,182,200,181,182,221,191,187,191,180,182,204,219,203,177,202,190,202,203,210,220,209,217,189,167,188,201,167,183,153,140,156,152,165,152,140,127,139,130,136,197,237,162,131,166,99,107,90,142,141,165,137,59,87,55,74,113,131,131,130,142,84,106,182,132,143,157,106,21,20,48,43,4,2,59,100,107,109,117,103,107,223,155,92,93,98,158,74,114,229,155,90,152,186,94,94,159,190,184,152,153,146,90,85,61,50,15,18,23,73,119,87,87,120,56,54,85,64,111,140,112,101,82,85,94,94,65,95,60,59,113,100,119,135,94,41,24,3,46,30,84,126,134,141,143,121,103,109,60,100,78,126,128,123,155,172,131,53,45,44,14,30,14,18,38,27,24,20,75,69,113,97,98,92,44,21,27,23,18,21,26,25,17,23,22,43,29,0,34,38,32,31,40,40,33,25,117,89,190,168,176,160,159,141,127,124,154,149,150,123,128,146,100,135,116,126,88,92,121,109,111,121,132,128,116,137,135,76,155,136,155,96,150,150,123,135,129,144,149,150,170,131,107,139,118,133,90,92,125,90,95,94,107,91,118,106,98,69,127,83,98,106,77,103,76,98,81,72,80,92,104,75,78,77,74,74,51,113,85,65,82,94,55,43,36,57,55,63,52,73,43,57,63,74,54,72,78,86,70,73,60,54,57,42,42,89,47,76,47,60,48,47,52,42,61,47,44,63,68,95,69,81,54,62,33,50,59,65,56,57,43,27,81,64,53,34,53,23,40,58,64,42,69,75,65,78,111,97,115,113,118,78,96,98,102,77,102,118,103,114,117,80,109,104,103,110,113,110,114,125,126,92,105,137,139,138,172,160,131,134,161,162,167,137,132,118,77,88,77,92,89,113,98,102,98,147,137,106,131,127,123,128,99,130,138,115,113,106,116,108,139,127,131,114,153,103,136,169,113,141,107,115,136,154,124,124,148,145,157,176,189,168,157,126,124,129,109,137,116,117,106,113,119,132,140,117,127,125,117,129,126,142,137,150,149,151,146,145,145,135,156,144,159,188,195,175,157,168,125,0,23,7,42,4,32,29,3,13,10,26,13,8,0,3,25,44,23,2,30,19,14,8,4,189,195,206,184,193,208,198,223,179,188,193,196,181,193,185,231,252,221,193,219,203,224,209,229,219,231,233,233,221,236,211,212,227,193,186,212,206,179,189,185,186,195,200,189,220,201,204,209,216,186,194,216,215,221,183,188,176,182,197,176,199,198,209,195,197,209,216,211,204,199,191,223,183,176,194,200,180,179,198,196,192,210,190,194,192,210,185,203,212,208,195,193,203,212,175,193,187,196,219,173,198,195,176,198,155,205,190,195,200,185,179,182,184,162,166,143,199,195,193,204,214,201,205,208,205,221,195,191,192,195,209,214,212,182,231,192,182,228,214,195,202,195,171,145,147,168,159,172,189,187,203,219,190,174,213,183,217,229,217,213,223,192,220,198,226,232,227,223,227,224,225,197,231,222,196,156,163,182,206,211,158,122,84,81,134,124,113,93,81,66,61,90,94,133,123,175,192,208,239,202,250,216,215,215,198,201,186,187,211,197,182,187,192,211,207,191,203,208,181,219,202,212,182,201,198,211,207,190,180,194,204,193,170,185,200,177,199,187,177,198,192,203,217,181,207,180,184,201,198,200,130,102,79,161,152,139,129,109,123,113,104,108,63,85,54,88,88,176,209,89,127,139,93,108,105,125,131,116,165,104,100,50,74,111,146,158,121,157,99,46,116,142,126,130,118,59,12,31,37,25,34,14,32,4,36,52,125,129,218,169,80,55,62,43,25,183,244,101,133,164,125,153,169,201,151,194,202,140,71,57,56,38,8,15,65,96,140,137,66,83,140,39,54,41,102,128,55,77,128,70,100,78,58,61,73,86,75,103,104,135,89,39,43,18,23,55,89,103,98,127,104,107,119,97,54,119,106,119,118,133,113,96,96,79,42,30,63,40,55,14,21,18,28,26,33,47,67,89,58,54,55,31,31,38,33,27,21,36,41,25,4,51,35,23,17,20,26,22,52,37,33,85,35,57,74,85,53,48,60,68,83,87,66,77,78,45,49,82,88,76,64,80,84,72,77,65,79,84,130,91,96,107,102,133,109,93,103,141,117,117,134,137,108,101,122,135,141,99,102,86,114,112,120,134,131,97,121,128,114,121,93,122,135,156,120,109,114,141,148,126,136,131,90,124,130,127,119,121,126,130,129,177,140,137,109,116,140,110,133,122,111,122,108,104,98,109,122,130,122,125,105,138,137,135,115,127,112,143,147,126,111,101,110,91,103,115,100,72,115,108,110,115,106,106,90,126,99,133,117,87,102,114,115,94,92,114,76,75,59,92,66,66,83,91,47,92,83,56,58,66,65,64,70,66,53,53,81,71,71,64,55,74,78,84,68,49,67,73,52,70,47,57,58,52,56,69,70,56,43,52,59,61,69,80,38,49,54,72,58,54,69,63,69,83,78,50,86,36,52,76,83,83,70,84,65,87,76,84,68,92,85,92,74,92,90,84,99,121,128,117,103,119,124,109,142,133,132,140,134,108,99,107,103,97,113,104,118,141,136,129,102,109,98,100,113,107,102,108,124,104,162,123,127,142,123,139,121,128,137,134,130,138,145,142,164,165,170,168,158,139,163,177,173,151,142,106,14,4,27,25,3,22,10,1,14,24,17,20,0,2,16,20,26,8,16,5,6,12,11,3,186,192,181,210,208,174,204,183,212,189,189,204,213,183,175,215,255,223,153,189,183,181,237,219,228,237,225,231,237,229,227,221,231,199,174,180,183,223,191,197,201,195,213,218,193,218,202,220,162,186,191,203,191,199,193,195,210,180,216,199,192,182,201,206,197,189,195,183,178,198,212,208,201,203,212,208,187,169,190,200,190,174,204,208,207,195,211,219,194,201,198,214,182,201,178,198,162,193,201,209,186,191,159,209,211,189,205,188,182,205,183,137,91,104,87,128,183,208,217,218,213,241,239,213,198,182,208,242,206,206,208,205,190,212,200,203,230,220,205,185,220,203,127,152,112,127,98,149,210,214,211,179,193,191,234,190,170,199,149,166,173,184,172,185,208,194,176,155,173,179,166,169,134,171,144,76,97,114,128,104,124,122,104,91,101,86,92,114,98,97,95,111,144,101,98,141,92,180,184,193,183,186,193,180,193,223,191,204,195,207,191,179,193,174,218,199,200,193,198,191,217,227,203,204,216,183,190,218,213,209,178,201,194,187,206,228,199,193,218,179,189,194,180,197,198,143,137,149,126,133,129,96,85,129,146,132,142,120,106,97,116,89,76,60,102,62,76,194,201,90,89,100,113,112,77,163,153,119,168,152,106,55,85,114,122,146,117,91,95,57,106,113,74,105,142,111,112,63,111,48,67,30,35,19,37,75,66,103,161,194,125,102,164,73,50,161,166,60,110,130,136,160,187,187,164,170,202,138,97,57,49,35,63,83,70,97,129,128,45,76,100,25,64,91,100,104,57,60,86,66,25,68,51,33,57,65,79,103,99,122,57,4,27,41,80,109,123,122,114,116,114,96,97,103,102,132,119,94,114,98,116,88,53,52,69,56,31,49,14,20,14,20,0,36,44,24,21,53,38,41,29,33,30,20,38,20,65,31,14,13,12,49,33,46,43,33,32,34,14,52,52,29,19,51,19,50,53,47,34,61,55,45,24,29,41,23,22,40,40,31,41,25,62,48,37,40,51,45,67,44,44,34,60,43,59,27,15,33,63,67,29,35,35,18,16,29,64,41,55,63,45,58,69,44,29,40,41,27,29,58,72,28,66,56,53,46,33,51,81,37,46,33,47,55,33,42,39,43,58,55,40,37,52,42,60,43,77,54,49,57,36,71,59,65,53,73,54,62,103,67,56,81,98,76,83,100,83,88,72,96,106,89,90,80,96,62,102,80,119,108,108,111,108,129,117,117,111,118,117,127,128,135,151,114,78,137,136,114,133,111,119,132,138,139,133,113,117,103,119,86,136,127,111,90,114,115,135,119,129,130,123,127,117,110,125,103,111,112,67,123,122,122,119,131,90,108,104,116,55,77,113,113,98,114,95,128,118,90,75,114,80,113,88,92,94,102,78,112,102,88,90,67,74,53,110,57,69,56,66,75,60,76,74,96,73,100,107,88,72,88,80,84,78,68,121,82,75,74,42,34,75,85,52,53,47,48,58,59,71,64,68,114,121,105,97,88,76,84,85,98,92,119,140,149,155,143,139,135,119,111,148,136,120,135,132,150,177,171,155,158,171,143,173,153,113,117,26,17,13,2,27,9,7,14,13,5,9,7,0,0,11,7,13,21,10,29,0,25,4,5,172,192,197,193,204,161,207,186,218,191,214,196,219,204,197,244,251,223,160,155,139,95,112,131,119,127,144,124,129,131,153,192,182,211,202,206,201,178,183,211,206,184,205,176,210,207,184,178,195,191,176,183,201,203,204,192,171,210,212,182,185,184,204,189,201,206,184,191,188,211,195,191,186,186,193,180,186,184,194,177,195,184,211,182,177,206,196,198,200,178,207,190,204,190,189,192,198,179,212,197,199,197,209,185,198,196,209,186,206,232,203,121,96,100,93,109,149,197,200,201,221,192,160,166,190,195,193,199,153,151,232,218,169,131,135,140,152,218,199,227,212,211,138,111,130,136,133,149,198,226,205,201,190,216,215,120,82,85,61,123,169,146,112,114,122,103,120,80,78,75,103,95,66,92,99,60,53,68,51,73,103,122,108,99,97,100,105,104,141,134,128,120,118,70,77,84,68,84,101,81,100,114,134,159,170,191,191,154,175,183,178,180,183,191,177,142,170,138,125,151,211,183,158,191,198,165,139,154,179,180,204,195,231,192,214,202,197,237,175,187,139,158,146,146,126,121,95,109,99,117,98,82,109,113,124,144,125,137,100,140,127,104,117,95,78,67,111,236,122,80,93,92,108,140,108,212,175,103,190,141,65,52,62,110,49,76,128,121,108,94,89,89,77,89,119,185,156,130,127,115,85,92,102,97,223,148,135,145,123,123,68,149,195,93,80,164,142,69,137,90,91,172,148,84,111,186,198,193,87,84,69,64,86,54,78,67,65,60,67,87,44,61,76,90,103,87,96,54,72,46,46,46,37,45,71,64,51,61,54,40,27,15,50,102,113,116,126,128,103,95,78,102,84,100,95,111,107,84,104,58,72,56,66,62,73,35,1,17,25,33,24,47,17,36,20,36,36,36,28,33,4,24,46,8,49,26,32,45,29,21,27,29,18,49,48,29,47,21,45,2,37,26,25,29,38,44,71,78,71,53,16,23,6,26,35,40,31,30,48,48,65,45,43,28,36,32,20,47,32,34,42,52,35,36,29,69,49,39,28,47,31,25,46,40,72,29,38,48,19,41,36,36,59,52,60,31,41,37,33,29,47,48,37,58,44,48,38,40,37,53,48,53,59,71,36,45,20,35,32,60,26,24,43,26,31,33,47,28,52,72,46,36,44,41,46,26,64,59,42,35,50,50,43,60,36,18,61,44,22,33,48,65,55,20,52,61,48,41,52,28,38,33,52,59,43,20,26,64,38,35,31,42,29,46,71,52,28,48,37,30,34,23,49,47,70,38,39,50,65,33,57,39,46,57,41,79,76,90,82,82,83,74,92,93,86,93,96,106,98,118,99,106,111,107,91,106,110,111,96,104,98,122,141,106,134,122,139,126,140,112,118,155,113,116,106,104,133,114,133,135,143,102,133,94,101,85,94,98,107,104,124,122,124,130,128,138,121,118,123,114,104,103,115,100,123,128,102,120,87,110,95,70,78,68,70,98,47,57,67,82,97,160,163,165,166,101,95,86,107,70,78,105,143,137,158,129,165,128,121,111,117,100,90,108,124,111,124,144,158,116,149,166,164,94,117,97,16,7,6,18,16,37,27,33,10,16,34,6,0,10,1,0,15,10,25,25,18,32,26,29,183,200,182,182,222,198,198,205,202,190,198,211,196,202,209,226,249,204,168,202,140,1,15,6,4,14,43,36,22,11,43,146,202,176,220,223,206,187,208,200,207,215,166,201,193,203,203,195,189,196,180,171,212,197,206,181,175,202,175,218,185,181,193,193,206,198,210,214,208,213,198,197,184,171,210,168,224,196,203,204,205,187,197,185,184,217,193,199,221,197,204,212,188,194,176,182,183,182,208,199,189,190,191,193,185,221,207,238,205,188,217,140,118,187,137,143,147,173,146,154,155,115,108,133,120,99,117,158,84,110,179,212,126,59,68,38,97,192,212,172,191,166,120,122,94,126,127,172,195,202,195,199,222,208,196,95,59,59,88,163,170,148,105,94,105,101,64,85,78,45,92,111,93,98,121,89,101,104,94,84,102,103,118,113,71,109,88,139,123,114,117,79,74,64,75,68,85,84,63,47,82,74,115,139,157,155,127,121,149,162,174,176,145,163,147,119,155,134,128,142,133,124,154,152,127,136,134,141,164,195,191,184,175,177,170,175,182,211,170,147,149,129,140,92,115,119,109,89,124,144,110,91,84,100,98,97,111,100,114,151,185,165,192,154,126,102,158,254,121,66,108,107,119,172,190,231,186,101,147,176,107,50,59,83,44,56,164,233,174,73,94,120,94,93,79,154,204,174,138,145,136,170,173,155,198,151,146,128,68,40,60,159,142,71,103,194,135,100,147,85,99,166,96,57,25,132,160,163,85,68,49,35,52,59,56,69,60,79,55,78,54,44,54,55,80,82,38,37,98,63,60,76,45,78,71,71,32,45,25,18,47,89,111,101,142,133,137,121,98,111,86,79,83,125,84,102,116,84,32,17,26,56,43,63,38,52,11,31,45,51,42,16,20,11,24,25,10,40,15,25,24,37,22,19,52,43,28,13,6,16,17,20,14,33,40,38,38,28,15,42,30,29,32,34,36,67,66,73,47,38,25,33,25,17,58,66,50,48,59,38,42,84,64,44,52,39,48,43,28,42,48,30,35,49,49,45,51,44,39,12,27,27,36,34,28,32,43,30,40,48,41,35,44,33,49,41,13,41,39,54,63,51,63,63,50,52,40,68,39,40,41,42,52,45,31,19,37,48,36,46,36,52,39,42,24,39,53,41,42,33,39,69,21,55,45,38,37,51,57,30,50,45,21,39,17,52,61,57,60,57,51,55,42,56,48,42,40,26,45,44,45,77,42,53,35,44,37,37,48,45,35,37,32,25,31,44,67,52,15,14,37,47,29,59,7,50,31,7,45,36,21,36,50,40,17,43,48,37,34,51,77,43,41,30,18,59,43,54,46,25,38,43,15,43,58,30,48,57,68,58,25,53,46,49,85,45,47,50,44,38,47,49,57,41,51,49,78,45,72,88,47,60,59,56,75,64,48,108,78,68,88,89,86,90,73,73,83,100,117,101,115,110,79,118,68,113,107,122,111,74,111,108,109,101,104,122,104,94,106,133,115,150,158,151,117,113,116,122,107,90,119,139,154,140,156,144,148,155,121,124,119,109,88,106,100,112,115,123,117,113,124,124,127,86,91,99,27,26,9,10,15,19,24,20,15,19,39,11,4,18,16,29,17,24,6,10,5,33,21,28,206,211,202,210,192,188,203,205,204,189,198,181,182,207,204,254,252,232,176,191,126,22,11,5,12,43,9,15,40,43,40,173,220,238,230,219,215,209,215,184,226,201,216,209,215,223,176,213,213,192,210,195,211,185,211,222,190,218,201,209,183,185,191,219,211,213,206,196,240,208,210,229,202,193,193,213,199,181,171,196,215,220,199,199,196,198,205,189,208,202,243,230,197,228,205,235,207,188,155,185,196,196,218,202,207,209,209,203,162,127,127,120,132,152,95,100,109,109,86,76,72,84,87,78,90,52,85,100,61,78,172,178,53,44,34,22,84,119,146,134,156,121,89,89,79,79,102,163,171,156,147,136,137,183,158,115,77,94,169,196,156,107,95,124,120,104,114,99,105,107,124,116,117,132,101,116,119,122,122,125,166,131,125,123,147,147,118,120,101,114,90,84,120,103,110,121,106,86,94,133,162,130,133,156,155,139,151,180,154,144,176,153,124,158,141,149,171,147,154,167,175,186,162,190,186,168,184,148,165,163,117,98,114,78,79,98,134,146,156,153,162,149,115,102,117,143,89,126,143,214,180,177,109,78,64,80,63,74,166,189,198,191,210,175,115,101,237,224,100,111,87,116,188,228,248,254,199,84,109,149,122,67,91,136,113,111,157,208,161,88,111,103,88,79,32,80,210,227,238,221,166,192,228,103,130,129,97,145,115,43,82,155,82,68,170,149,103,158,189,216,181,131,56,18,0,69,80,69,52,29,26,7,19,63,57,19,58,63,34,91,51,71,82,76,107,79,96,57,74,80,57,58,60,51,64,56,40,16,15,42,70,105,133,139,110,140,111,81,106,73,96,65,81,114,98,67,72,31,19,9,26,32,40,51,59,48,33,35,60,41,31,30,31,1,25,18,20,16,21,35,24,34,28,45,10,26,40,50,36,48,46,32,40,12,57,33,28,43,26,17,33,33,34,66,59,73,77,12,39,28,41,39,21,35,29,43,72,39,87,59,61,65,42,73,55,43,46,56,62,64,59,34,45,63,58,69,49,49,8,37,38,1,4,9,31,19,18,13,30,23,35,58,48,31,61,67,37,69,54,48,62,49,53,66,63,77,66,50,70,62,55,41,44,40,43,42,38,50,49,38,45,30,34,19,25,24,56,25,50,15,2,49,22,34,45,18,44,32,56,28,37,41,47,28,39,51,50,59,53,49,68,34,48,62,53,28,47,52,46,54,49,48,42,58,37,44,46,59,26,36,46,30,43,49,37,39,32,69,43,19,60,29,58,31,32,47,45,32,40,36,42,39,47,49,35,44,47,27,29,45,36,15,53,30,51,29,53,38,52,50,32,44,27,50,23,31,30,53,43,42,27,38,59,52,51,20,39,59,45,49,45,24,10,20,30,52,42,35,48,38,49,48,56,50,35,55,66,37,21,62,52,65,58,61,41,57,74,51,49,52,28,31,20,33,33,26,38,47,74,27,73,99,111,104,67,76,119,111,102,97,101,110,124,133,99,107,134,148,117,125,159,174,155,158,142,174,148,132,142,154,136,102,136,122,102,76,91,90,111,136,99,91,109,74,117,96,15,28,41,2,20,24,16,28,15,26,22,30,2,4,22,14,9,32,18,28,15,25,5,24,166,192,186,191,203,189,175,191,182,174,183,187,187,198,208,253,247,206,166,199,114,36,27,25,43,40,40,35,51,38,73,184,223,232,240,232,200,208,198,224,201,220,231,200,224,205,200,205,210,212,217,195,214,195,209,237,212,208,211,177,217,194,240,244,197,207,213,203,222,230,191,216,211,186,198,211,215,205,206,236,214,216,214,240,240,205,172,212,210,200,170,226,207,202,180,223,202,175,207,182,192,177,204,161,136,126,134,141,75,55,70,68,114,104,76,73,97,107,81,71,66,58,106,77,115,123,120,103,91,101,120,96,65,54,95,84,95,118,98,109,128,111,86,118,155,138,143,115,119,116,125,80,103,103,126,99,141,151,178,189,176,124,151,120,144,109,157,135,134,117,98,137,131,134,108,138,124,126,121,124,136,156,147,121,170,153,127,74,79,106,122,125,153,158,154,138,116,139,126,155,180,113,128,149,116,138,138,184,181,183,176,175,142,168,170,162,174,189,186,187,189,186,187,205,157,158,144,163,160,132,99,47,82,89,78,120,178,192,191,194,183,156,152,144,144,119,129,125,150,190,227,237,238,203,159,163,161,161,206,207,169,202,210,203,84,117,195,166,110,147,112,135,200,208,236,200,192,97,107,137,163,112,72,87,160,180,116,135,122,105,104,97,90,34,12,53,104,168,208,177,148,162,181,113,127,100,71,173,152,86,138,154,90,93,128,115,168,178,175,229,185,104,51,21,12,30,23,57,52,33,40,30,45,41,29,31,25,30,71,71,65,65,36,63,66,65,63,53,46,9,52,55,52,46,44,44,21,39,37,59,110,91,91,99,115,92,89,76,84,97,101,103,118,131,94,85,33,15,21,46,50,88,19,64,66,30,25,57,84,49,72,41,6,3,5,17,5,26,21,25,9,44,8,35,25,56,52,55,80,50,54,39,3,15,33,27,18,17,27,21,66,51,59,78,66,73,58,37,23,52,23,20,34,84,58,61,55,57,48,73,67,75,68,29,80,62,61,66,38,48,75,60,45,73,39,43,53,52,20,5,21,21,6,33,16,59,35,35,53,70,47,56,29,61,39,74,39,53,53,69,44,36,68,47,44,61,37,34,62,83,57,38,53,68,47,48,54,34,31,45,43,52,17,23,21,10,9,29,6,36,37,25,25,36,38,34,38,58,41,26,36,62,75,75,61,57,40,68,58,64,47,68,68,44,35,41,59,75,90,60,52,45,30,53,50,57,54,23,18,45,26,27,38,50,25,60,56,43,26,37,18,18,32,5,18,4,37,54,24,27,39,52,42,38,53,56,40,49,42,33,51,22,61,69,38,27,29,32,37,18,36,53,42,45,47,61,20,32,55,59,26,12,52,34,51,27,42,46,29,61,47,62,54,52,70,65,37,52,59,25,37,40,31,31,71,71,55,40,59,62,62,51,71,61,45,71,57,76,40,25,10,32,26,33,38,39,20,51,60,38,77,51,107,75,109,110,131,116,107,88,57,48,59,81,91,74,124,136,121,125,128,122,146,156,147,147,130,134,168,117,152,155,145,137,136,97,77,113,126,104,108,108,105,109,120,97,3,0,5,15,10,12,49,15,21,17,30,20,27,17,18,5,8,41,2,18,11,11,19,14,177,180,188,167,159,205,199,195,178,193,192,196,204,206,169,229,249,153,166,228,139,46,26,38,26,37,38,37,32,34,94,170,181,127,158,164,148,170,172,172,148,135,128,133,160,180,192,198,199,200,210,220,176,206,209,211,189,212,198,213,192,165,200,219,171,183,177,190,204,197,204,195,202,220,213,216,176,196,191,205,209,191,222,212,211,205,167,154,128,108,159,147,165,145,180,225,220,217,242,255,231,196,166,117,58,61,74,114,113,67,83,121,127,132,128,128,128,90,130,105,85,92,96,147,131,127,137,128,124,83,81,91,73,101,127,124,119,146,114,148,164,89,119,154,201,228,164,194,159,163,133,102,108,123,140,149,135,229,196,191,181,139,131,113,118,102,157,117,104,88,114,110,128,104,109,113,127,128,96,91,104,107,149,157,149,132,111,85,97,110,114,123,132,106,137,132,124,134,136,157,141,137,103,112,92,89,138,190,154,160,146,141,135,165,137,127,149,173,190,148,158,128,118,156,118,87,135,102,131,127,92,97,127,150,184,184,181,205,160,142,170,142,139,130,116,133,106,128,124,145,208,220,254,251,250,251,236,255,228,191,188,157,144,115,95,126,175,130,91,133,111,110,162,193,229,170,161,109,116,133,166,117,106,55,163,180,115,106,115,141,162,107,91,44,11,62,12,25,104,115,126,121,186,159,184,147,115,203,116,104,216,197,46,125,97,97,188,107,73,140,148,56,14,2,31,124,104,77,67,41,79,52,47,69,21,42,58,67,102,69,43,45,27,42,50,37,25,50,49,42,43,32,11,54,29,35,63,82,91,95,126,131,78,99,69,97,87,65,53,83,103,121,136,138,80,24,5,30,58,64,116,103,92,51,59,17,25,75,51,62,51,64,58,41,12,13,31,49,46,37,18,35,14,49,34,48,28,44,47,69,79,50,23,21,6,39,16,31,37,31,57,49,75,61,58,31,24,44,20,27,21,40,39,63,67,55,71,56,48,40,45,55,80,65,55,34,78,60,49,78,58,48,62,61,36,52,27,20,3,18,18,23,31,45,14,13,25,18,43,41,62,39,63,51,63,55,40,47,68,70,52,45,65,55,44,51,79,28,37,68,25,57,56,41,38,50,41,52,72,41,37,26,7,16,26,12,5,49,20,39,5,26,24,48,35,42,40,55,49,43,26,80,79,56,66,74,58,65,42,46,44,57,68,35,73,75,58,26,36,52,61,61,31,27,49,21,44,48,34,31,70,28,37,31,28,53,39,58,20,36,15,21,31,21,31,21,19,10,29,40,27,55,50,29,47,59,51,57,79,51,63,56,63,28,48,29,21,46,63,36,59,21,32,50,70,30,60,27,53,38,14,65,69,36,24,41,15,30,50,44,7,51,29,18,64,65,58,45,60,60,52,53,35,64,36,76,43,57,46,83,67,60,56,61,67,72,51,47,40,30,7,31,27,35,46,37,58,81,48,52,47,30,49,67,44,82,98,56,46,91,19,88,75,83,69,77,112,115,109,137,105,98,132,141,162,94,135,109,77,130,117,122,152,129,111,110,100,145,139,132,113,122,143,113,131,104,28,19,21,24,21,2,2,6,18,23,10,14,25,6,10,11,17,26,9,21,16,36,6,40,124,189,200,186,209,200,212,222,208,220,201,201,209,220,197,237,214,125,143,219,103,59,48,24,3,21,43,63,43,44,76,130,97,31,81,77,91,68,140,124,66,63,77,99,102,145,152,171,124,132,120,148,117,143,160,157,140,120,161,135,112,104,135,148,105,124,122,106,109,130,127,129,163,174,146,101,94,119,152,138,125,143,156,143,148,138,114,113,86,65,89,97,79,100,128,210,244,231,239,250,245,232,162,99,85,117,102,145,101,130,139,152,143,164,146,127,137,114,144,103,143,168,104,115,85,128,99,110,128,90,127,126,151,204,168,197,197,167,148,136,144,80,88,136,180,180,140,172,146,147,138,129,140,127,151,147,193,214,164,138,137,129,92,92,104,65,111,120,101,99,59,104,83,112,71,105,90,101,100,90,92,111,99,120,111,125,111,123,119,110,115,110,106,123,89,114,90,78,114,132,132,83,104,87,85,88,150,167,145,112,95,109,107,107,109,70,72,139,140,85,58,86,124,113,113,109,125,161,150,123,158,121,141,156,151,160,135,107,127,95,108,136,107,94,126,111,140,103,98,126,135,147,149,159,148,160,188,181,206,220,211,194,205,172,94,147,224,132,109,100,117,110,182,214,215,174,149,152,146,98,133,132,104,17,124,212,132,121,124,161,200,255,240,209,174,132,104,110,140,155,189,135,161,144,192,167,157,223,80,126,212,102,75,144,128,136,148,76,22,38,77,67,22,18,89,131,137,101,90,119,154,134,107,70,96,94,71,47,69,76,52,24,27,52,56,18,25,28,56,46,54,55,31,16,12,61,60,123,95,113,132,113,91,75,96,94,68,105,79,102,117,118,109,89,21,35,10,41,74,102,123,113,120,92,82,26,31,54,102,85,110,64,46,52,56,19,14,21,32,17,51,47,48,35,23,22,13,31,42,53,56,46,35,24,34,47,24,18,45,23,45,91,76,41,15,22,12,34,17,21,44,70,51,75,62,74,63,49,35,51,32,71,73,72,49,58,51,41,53,53,57,73,51,56,44,48,42,17,42,24,19,12,13,16,42,19,10,4,38,52,59,65,59,23,23,68,45,66,60,51,38,27,41,52,60,35,61,59,44,65,81,42,45,58,47,30,57,57,60,51,29,16,19,4,19,28,6,14,35,19,26,24,18,52,42,48,25,80,43,45,28,73,52,95,39,64,77,65,47,67,79,64,48,53,46,59,64,58,44,29,48,51,40,57,47,35,42,47,46,32,45,36,35,56,44,61,29,63,19,4,34,31,8,32,5,33,28,8,40,34,36,66,49,46,33,43,50,36,21,44,48,58,33,58,29,36,45,44,29,41,56,62,50,41,63,55,37,54,45,44,56,35,57,30,17,29,15,56,13,52,25,42,12,34,47,37,30,76,49,35,51,35,53,45,53,68,53,63,72,79,77,63,55,69,58,73,51,64,19,25,21,19,44,34,77,91,131,106,63,39,72,75,89,64,37,83,54,35,47,43,33,83,127,110,125,103,133,139,134,122,93,115,121,116,104,111,103,87,104,94,83,129,111,78,75,105,121,124,102,111,113,135,143,100,145,105,1,30,20,14,18,14,33,3,24,9,27,24,48,6,4,8,36,6,51,8,13,5,18,22,179,174,210,194,217,208,196,216,198,194,203,205,218,223,222,255,218,158,106,153,37,22,33,43,27,21,51,36,34,51,77,128,113,54,64,93,72,99,142,119,128,89,105,99,129,135,123,133,94,51,51,75,72,85,110,111,67,60,85,92,97,52,85,94,67,60,64,60,83,78,69,57,67,108,90,56,87,76,103,101,60,77,72,75,119,94,79,64,74,60,80,79,103,99,82,136,181,181,169,182,156,156,118,115,102,115,116,118,140,131,143,133,144,131,103,143,132,127,152,120,111,115,135,102,101,140,143,111,141,132,170,147,196,212,236,217,179,170,160,140,140,77,118,136,138,118,136,144,120,139,114,134,105,99,113,166,208,176,131,115,91,108,50,56,91,87,114,135,105,82,64,56,88,83,100,116,102,128,82,67,83,123,96,116,148,143,142,106,117,95,96,99,109,92,130,136,111,85,49,107,134,90,109,113,118,99,120,124,108,100,84,77,108,125,111,110,131,123,105,90,103,100,91,101,90,132,166,161,160,160,159,160,121,101,94,91,82,84,85,72,80,66,86,114,120,121,96,123,84,79,38,54,56,61,46,32,63,115,148,195,234,224,208,219,112,105,187,155,128,122,122,157,239,235,180,152,174,164,186,91,117,185,160,70,146,244,138,81,90,124,185,248,232,255,237,247,206,238,191,162,217,109,101,120,175,119,181,175,98,169,139,79,124,202,246,193,103,63,23,16,40,52,17,55,48,77,85,61,18,86,96,93,85,83,77,95,63,61,69,39,46,35,31,55,42,43,57,7,60,86,49,61,15,26,46,54,114,126,105,133,118,104,92,87,87,123,100,98,105,103,109,90,65,39,20,22,32,84,103,125,106,136,126,113,75,38,44,94,104,117,106,55,36,24,32,44,13,17,18,32,15,0,34,40,11,18,35,44,45,41,73,45,47,57,44,26,45,34,56,75,59,65,69,26,24,7,21,13,31,58,63,67,66,67,75,56,50,58,20,44,64,69,64,54,67,46,55,36,61,40,50,27,35,59,50,50,71,5,36,44,14,24,19,7,14,17,28,56,52,36,36,40,70,53,68,59,63,32,74,52,64,58,37,43,71,58,60,64,50,73,41,49,47,49,67,66,44,24,63,47,45,38,38,38,2,28,14,34,20,24,22,33,46,22,64,73,34,24,37,20,57,68,61,46,43,56,49,49,64,49,75,59,51,59,53,68,53,30,50,57,40,40,60,46,45,50,24,40,27,38,41,59,41,43,28,36,29,25,43,26,3,29,6,21,32,31,29,39,31,44,52,53,62,45,34,64,58,54,65,43,24,45,15,39,25,32,49,37,30,52,35,59,49,44,75,43,31,74,59,61,44,3,27,28,7,41,18,37,26,34,9,12,65,12,27,40,42,57,35,72,49,58,66,36,31,60,45,58,53,37,58,50,60,69,57,60,32,17,14,43,30,13,14,8,85,129,157,91,99,104,128,121,131,133,90,103,102,75,103,93,108,101,138,141,128,137,119,139,123,110,114,126,134,146,119,126,99,119,89,118,92,106,62,75,83,121,89,94,136,108,95,122,127,105,133,120,24,0,34,22,11,37,0,44,22,35,5,9,15,6,1,17,9,18,29,17,29,22,24,23,177,170,185,165,169,142,118,135,116,149,138,143,134,134,173,253,228,134,59,66,5,35,18,22,27,47,25,29,42,48,101,145,118,83,120,172,132,150,190,164,148,148,148,142,138,118,130,134,118,92,86,86,73,94,92,102,95,61,69,84,67,101,87,103,64,104,98,108,95,94,93,75,84,97,109,98,101,90,94,114,94,100,95,75,95,133,122,109,102,107,107,133,122,118,74,86,82,88,61,79,80,75,85,120,101,99,95,114,123,91,96,109,119,98,95,93,126,119,119,101,102,111,128,122,128,123,109,113,123,137,161,148,185,243,236,233,236,208,209,201,178,117,124,118,116,120,115,96,94,87,109,110,93,98,146,178,191,146,115,140,131,103,89,81,110,89,125,147,115,96,77,101,120,122,135,118,91,100,120,83,144,148,143,136,139,159,115,102,139,135,114,126,100,119,121,140,128,127,114,86,80,102,79,115,122,112,130,84,100,89,117,105,133,143,130,118,134,127,84,118,84,90,94,113,111,151,170,164,152,144,145,163,131,92,74,111,88,82,107,103,89,102,112,97,122,87,115,91,84,63,57,47,65,71,57,72,69,94,110,130,157,136,162,166,116,83,93,94,138,148,153,132,152,126,85,78,63,90,72,72,104,170,171,87,83,133,120,122,59,24,89,223,235,236,232,246,232,177,150,117,186,164,106,157,120,118,185,89,72,213,61,101,188,221,246,145,103,33,20,65,37,53,43,45,42,44,64,18,63,48,46,38,46,46,49,31,36,37,42,53,37,27,13,56,27,28,37,50,38,34,44,19,36,37,104,72,97,99,98,102,100,106,89,117,76,84,76,131,105,127,95,73,15,3,38,74,70,110,98,83,98,134,101,119,106,68,110,103,96,75,41,40,47,25,22,26,11,39,31,41,8,41,50,29,28,24,32,42,11,53,51,64,43,45,26,41,39,40,55,65,40,38,13,15,15,13,12,37,67,53,66,47,83,56,68,68,44,71,55,69,52,45,57,51,61,77,65,75,33,61,45,42,36,48,36,52,45,18,2,28,25,16,9,14,13,29,37,40,69,60,27,62,30,48,57,54,45,57,59,19,37,69,64,59,52,47,88,18,28,65,27,32,62,42,49,56,75,30,70,53,47,9,18,10,21,16,32,50,31,36,12,29,26,39,42,52,11,16,13,39,45,58,53,49,37,65,31,55,67,41,60,60,61,63,42,69,48,38,57,77,75,54,49,36,49,74,30,38,36,28,37,38,39,51,64,27,26,27,28,4,1,22,39,15,5,28,22,12,33,31,47,65,34,58,45,38,68,54,46,46,55,38,71,39,40,42,48,29,58,39,59,48,39,48,30,56,74,43,58,64,44,38,24,22,28,12,5,26,12,46,3,29,8,37,49,57,37,52,52,73,56,49,41,48,45,59,59,39,65,75,50,67,71,34,59,60,53,13,44,55,45,29,29,68,103,130,116,88,117,130,118,130,153,123,91,116,149,148,152,143,154,151,151,156,131,109,107,136,115,99,137,122,146,150,158,114,123,121,135,122,137,148,120,124,110,103,98,88,87,58,65,95,91,109,127,89,6,1,17,12,43,9,19,28,7,17,23,33,14,2,12,17,15,27,18,14,9,19,16,14,143,111,115,117,129,117,96,59,46,34,30,39,73,70,122,241,225,124,66,59,2,3,39,53,29,49,43,27,42,68,115,147,153,167,162,143,156,141,194,205,165,113,128,120,137,118,136,110,128,149,104,137,147,139,143,129,114,110,93,87,110,112,113,124,95,110,95,81,119,109,121,120,88,92,117,121,137,118,129,126,126,143,116,109,129,146,130,92,127,116,135,119,114,114,94,111,98,69,101,83,93,78,96,93,105,91,100,119,96,124,103,102,104,103,87,96,91,104,132,122,114,137,104,104,128,118,127,131,119,154,131,170,230,208,221,239,240,251,255,221,205,143,127,125,127,126,106,85,105,117,122,127,150,187,180,205,187,118,107,152,126,132,133,158,165,169,137,202,164,158,168,162,177,143,145,138,150,157,159,143,153,155,156,146,108,162,127,118,147,114,147,142,129,109,110,122,157,119,125,118,113,102,124,110,99,111,95,94,88,117,92,130,138,143,139,114,101,102,119,109,104,127,152,129,127,158,161,152,155,152,165,167,134,53,80,77,93,83,93,103,96,111,121,100,115,98,109,106,111,148,127,112,110,81,73,104,96,87,107,112,74,58,81,140,145,123,35,55,99,57,53,54,87,64,34,54,42,44,9,8,40,69,162,108,73,19,71,141,103,22,9,10,143,202,236,247,184,126,151,115,207,149,144,148,107,139,140,34,118,169,143,161,158,58,117,115,51,25,14,64,49,38,62,24,35,23,50,13,53,30,11,40,31,27,44,23,1,42,34,43,42,17,20,12,7,57,25,25,27,53,32,49,33,23,79,116,113,97,115,101,108,100,99,114,80,109,122,107,130,87,45,25,37,11,63,109,116,106,129,115,85,109,109,98,89,94,102,81,44,34,46,44,21,12,33,11,7,33,38,14,28,19,21,14,34,16,18,20,36,27,51,84,74,90,44,34,31,81,69,58,24,13,26,25,25,36,35,44,65,55,86,42,69,75,46,46,80,47,27,62,60,52,64,82,37,59,70,61,62,39,60,60,47,30,61,45,27,43,45,25,5,14,12,32,43,10,45,37,41,47,29,42,67,64,56,66,56,14,16,29,48,58,64,59,73,53,60,81,59,62,44,51,51,78,42,69,34,46,51,40,26,25,29,29,12,26,37,6,10,38,21,38,37,45,35,9,34,5,5,50,51,56,72,59,65,90,52,77,54,49,70,62,71,61,52,53,53,82,49,32,28,58,38,43,40,16,59,56,72,70,58,47,54,44,72,50,52,42,35,8,28,19,11,20,33,23,21,25,50,17,32,53,49,44,57,58,69,47,41,62,54,32,59,51,55,24,27,67,63,69,54,69,29,64,78,59,44,43,42,39,52,16,22,40,31,9,29,32,22,41,16,28,33,43,33,29,54,46,51,51,76,39,76,57,53,37,81,46,50,53,70,79,30,42,83,58,41,37,31,30,57,25,56,107,131,115,106,83,89,107,121,127,77,113,94,110,117,115,158,120,140,141,121,108,152,94,115,114,118,137,133,145,150,127,157,133,147,151,127,95,139,177,99,121,112,118,115,130,116,94,87,90,60,79,116,105,31,11,11,11,14,10,9,16,33,11,11,19,9,4,43,12,4,9,13,8,0,42,7,4,96,103,95,117,155,183,182,153,130,105,98,101,80,74,133,244,238,148,83,137,60,33,41,38,44,25,45,17,25,43,83,164,163,182,168,175,188,192,170,142,146,111,140,157,125,123,105,108,129,134,130,189,161,168,158,173,158,131,116,124,126,152,115,119,137,119,120,111,97,139,126,114,113,107,82,120,128,115,135,131,125,117,153,126,97,127,131,82,128,123,118,131,92,96,107,94,112,78,84,105,99,111,123,100,109,108,130,117,131,123,121,125,113,122,105,95,114,111,111,117,128,116,127,130,133,106,140,130,132,117,117,120,143,143,149,168,209,196,210,231,218,208,180,156,146,107,113,110,115,96,122,151,175,211,237,245,173,160,172,173,188,188,233,202,186,206,152,174,177,177,200,202,185,136,142,126,188,184,178,138,112,124,107,91,146,125,100,137,129,110,129,162,128,130,127,112,98,115,128,122,108,123,122,155,137,131,106,130,131,119,137,118,139,132,107,103,117,83,93,118,119,163,217,198,123,165,172,171,147,133,153,123,154,100,85,119,123,138,116,108,126,108,131,108,127,126,150,146,172,242,230,122,96,106,109,142,144,101,88,115,105,100,184,205,193,177,42,7,8,9,21,10,99,165,132,139,163,170,97,28,12,36,134,152,172,128,129,200,125,84,54,32,10,62,77,150,155,158,209,195,211,147,162,130,117,192,104,90,172,196,212,186,89,28,26,49,42,41,61,75,50,30,36,17,34,34,25,15,17,26,10,34,11,16,41,37,12,20,42,39,25,45,11,22,12,26,16,34,10,49,59,77,60,49,75,98,100,108,108,104,105,101,93,101,102,90,116,130,100,41,17,20,35,74,94,100,88,101,85,102,62,105,115,114,96,64,85,71,32,31,37,40,7,21,47,47,29,16,29,3,18,22,30,55,24,32,20,36,22,49,30,45,61,58,57,28,53,48,25,16,18,11,40,39,31,52,50,81,74,59,60,61,38,52,37,75,63,45,61,48,60,64,48,68,61,68,85,70,81,59,65,60,43,41,41,28,21,31,27,49,37,10,23,24,16,27,37,41,18,38,50,40,69,54,49,46,41,45,41,31,53,54,53,48,83,61,41,37,55,38,63,49,45,44,47,68,61,58,52,35,34,12,25,15,5,10,16,8,23,33,30,81,45,46,41,15,21,11,17,39,78,56,77,60,34,65,81,85,73,73,51,56,84,65,51,52,92,84,65,66,58,52,55,44,55,25,52,32,57,62,40,42,46,65,32,47,15,2,27,24,11,19,8,24,36,16,42,58,26,61,44,57,26,40,71,44,55,69,71,68,59,70,80,81,64,40,38,64,70,56,43,36,50,48,35,58,52,45,49,39,47,37,27,12,26,17,24,30,15,29,18,25,21,23,65,37,43,14,67,51,33,49,46,66,60,58,54,55,82,81,82,81,90,72,80,56,37,45,53,28,24,32,62,150,107,110,120,134,106,54,73,76,68,103,62,60,126,129,122,116,133,138,115,124,113,113,159,152,128,121,151,149,144,147,121,129,154,108,134,131,155,149,136,116,116,142,110,118,128,120,99,87,106,111,87,86,5,29,14,16,29,9,16,31,20,46,20,13,3,16,36,6,20,23,18,25,6,43,1,29,87,97,89,152,192,213,212,190,204,174,150,132,127,118,203,253,245,158,148,183,97,29,27,9,10,13,41,23,50,37,63,148,135,179,151,181,166,148,140,98,108,137,138,148,107,134,109,130,143,145,127,141,145,156,127,147,167,134,104,113,110,71,89,114,95,97,115,98,113,128,124,116,115,104,80,96,136,128,121,131,116,121,119,133,123,136,120,130,121,129,110,107,131,129,103,93,146,117,106,111,127,143,115,110,137,144,131,149,135,117,131,93,105,90,121,86,136,98,120,122,116,106,103,116,95,118,122,125,156,136,99,84,92,71,107,71,125,148,185,171,213,221,237,248,208,139,119,116,127,125,118,114,150,249,223,167,167,163,171,178,184,197,189,189,190,193,138,149,117,163,202,191,137,98,133,150,188,170,137,124,90,93,70,95,125,126,87,101,109,69,112,108,116,117,94,98,111,99,112,117,128,157,148,137,124,109,97,139,112,123,106,113,138,97,89,67,70,125,155,176,144,182,245,212,152,157,147,153,150,151,155,172,147,125,49,84,111,138,111,120,145,209,167,131,119,131,136,129,178,216,219,123,102,109,141,164,160,133,66,83,92,113,188,232,220,145,122,104,76,71,22,56,145,201,198,185,191,194,186,110,91,32,99,163,167,153,221,212,122,118,109,71,95,76,55,129,172,155,157,171,201,158,170,103,145,179,123,184,208,217,189,105,99,10,14,32,70,76,56,39,30,36,22,50,22,15,18,16,49,28,29,24,22,37,27,26,16,5,27,34,32,18,34,23,23,22,24,48,35,51,76,89,42,53,90,107,107,105,96,70,83,93,102,118,102,87,117,82,43,34,16,47,68,114,123,112,113,123,112,104,69,76,109,94,124,107,96,95,94,129,107,74,28,44,50,50,14,40,30,17,26,12,35,30,38,36,21,36,12,13,33,58,54,77,98,43,45,28,7,19,5,34,40,24,66,50,78,56,63,63,52,40,40,39,51,53,50,32,66,64,67,51,37,72,65,47,47,58,48,58,64,31,49,38,46,25,32,38,29,8,19,11,26,21,24,43,43,52,35,38,39,56,47,25,56,30,21,51,24,41,30,52,63,37,51,73,75,53,60,59,62,50,64,33,63,50,66,52,60,25,28,34,8,19,24,33,3,27,45,25,52,30,47,27,12,33,32,37,51,92,78,84,60,57,48,52,57,59,49,69,70,59,79,67,64,63,46,67,57,43,58,27,42,27,44,43,62,15,60,42,41,61,30,40,43,31,19,13,35,18,37,47,14,18,35,24,0,37,53,17,33,56,38,47,71,56,60,70,58,66,71,90,28,16,47,40,59,22,47,66,44,49,22,51,46,77,65,58,61,36,45,20,16,38,26,10,36,27,5,7,6,3,28,68,35,81,54,34,54,45,53,73,61,62,68,54,60,64,59,61,47,53,60,76,49,37,56,39,34,55,41,44,128,159,161,149,138,136,109,62,44,95,97,100,67,107,116,121,124,156,133,126,128,120,157,121,139,149,160,118,129,95,142,129,105,129,114,94,126,152,186,150,139,155,132,110,123,135,176,133,122,129,119,108,84,70,12,21,49,13,6,5,5,25,16,29,20,11,6,22,7,15,27,26,26,5,17,23,23,11,81,101,80,86,154,133,121,153,139,181,154,136,144,185,226,254,243,136,179,233,150,81,12,12,21,19,48,38,51,33,70,89,78,109,127,95,110,133,67,82,85,97,95,91,101,98,110,81,92,69,82,103,105,76,104,97,137,111,78,99,72,64,96,91,113,135,103,103,121,130,131,128,102,101,97,145,138,128,125,92,96,118,120,110,139,114,129,130,132,117,112,123,143,129,129,149,128,122,115,123,102,132,116,136,120,168,152,138,167,117,101,111,75,134,94,98,92,82,110,115,108,107,124,133,123,85,103,134,130,125,99,92,95,64,66,77,77,75,118,140,147,176,162,215,219,203,182,150,139,108,86,64,162,175,157,126,112,124,156,159,129,142,147,134,141,152,113,103,91,84,113,111,120,97,105,111,117,156,119,116,102,115,120,149,89,107,111,112,98,59,118,104,93,90,71,79,70,96,89,83,119,146,130,132,84,126,123,126,114,98,124,107,102,110,84,91,163,156,178,188,143,195,239,200,154,160,168,171,170,135,145,140,171,147,91,78,88,111,120,139,197,194,162,171,129,113,120,104,97,184,131,120,124,99,161,178,201,143,92,80,95,102,139,142,172,180,227,151,158,163,131,111,85,172,207,154,127,146,179,131,83,7,45,127,159,136,200,179,93,80,78,81,87,86,85,110,146,137,102,115,179,194,135,82,151,146,161,234,199,238,198,61,34,1,59,64,55,60,40,42,39,7,19,29,33,19,53,14,34,15,61,25,29,29,20,29,0,14,17,11,40,15,40,22,20,29,3,25,40,63,65,86,78,81,110,125,89,75,77,69,74,109,91,116,94,109,92,25,17,19,40,88,107,147,91,90,81,103,90,177,93,82,111,102,114,101,115,125,123,150,159,77,29,57,22,24,16,74,19,2,19,17,25,48,18,19,5,19,22,31,29,60,71,81,96,90,50,34,30,4,16,13,54,49,46,76,52,76,50,52,78,45,50,57,64,64,55,80,59,58,91,55,65,41,59,71,74,25,22,60,34,58,29,38,53,13,7,32,19,15,15,21,46,30,15,12,49,32,44,39,67,34,66,48,55,53,48,41,19,33,27,51,49,71,69,57,31,64,40,60,63,64,69,58,55,29,51,74,31,13,14,34,18,17,5,40,25,28,16,23,68,33,30,30,42,57,38,43,58,77,43,64,64,50,24,71,50,58,59,48,61,38,31,47,67,46,40,61,45,58,76,48,46,61,77,42,74,61,64,73,39,51,45,54,59,46,23,21,8,34,13,9,14,24,27,11,13,19,59,36,44,56,48,45,96,34,46,66,43,41,68,37,53,62,49,59,57,57,35,44,78,49,47,71,47,63,35,57,62,26,28,51,18,15,16,18,16,33,25,33,23,50,43,29,75,66,55,67,48,78,67,44,63,57,47,64,59,91,65,63,85,66,69,60,34,44,50,40,35,32,55,103,130,147,109,122,128,136,118,101,129,121,104,142,141,87,114,126,114,108,129,145,137,133,141,130,117,145,157,110,109,88,117,107,110,112,108,153,132,142,128,127,146,152,147,149,143,120,137,108,117,132,102,121,108,95,7,1,42,20,14,10,15,3,18,13,22,9,6,0,23,8,21,18,32,13,7,14,21,20,101,99,86,74,85,99,56,86,92,108,90,96,149,195,244,247,240,115,99,238,133,54,14,34,7,42,36,76,38,28,85,79,83,90,90,74,77,81,73,68,71,69,93,108,85,98,80,108,78,75,91,83,90,76,63,78,113,68,98,81,91,97,87,83,90,109,98,127,95,89,117,122,126,103,109,118,129,133,74,122,121,117,91,112,119,113,102,130,140,116,114,135,142,91,129,119,148,143,123,143,152,126,120,105,114,112,131,149,142,134,116,138,131,135,138,145,146,97,113,104,94,112,110,120,112,105,67,115,151,165,137,145,137,86,111,90,84,91,85,112,108,64,119,189,204,217,240,232,187,133,86,32,120,143,122,64,95,107,131,90,134,123,100,82,104,103,108,95,86,95,85,132,106,93,121,88,103,144,127,119,138,137,142,137,101,80,62,140,108,84,110,91,86,87,53,76,71,66,90,72,84,94,111,136,128,131,143,108,116,125,132,105,107,145,151,143,173,191,191,182,132,175,147,150,141,159,164,164,156,147,132,137,177,172,118,57,85,85,140,114,160,148,149,134,141,109,119,104,118,136,94,122,152,157,185,150,178,194,130,78,89,144,152,127,90,185,242,192,250,185,167,107,124,181,214,191,159,127,146,67,60,4,57,172,175,136,168,104,51,30,49,72,66,43,73,118,171,86,122,161,182,158,77,101,152,129,177,161,90,159,109,20,18,17,64,33,25,35,42,25,28,31,37,23,30,23,54,34,30,9,28,34,58,15,43,20,11,33,32,35,17,22,27,34,23,31,22,46,76,91,90,52,66,80,99,75,85,69,83,67,84,123,96,88,87,67,24,18,11,78,105,128,126,126,108,77,121,89,100,136,87,104,108,95,142,103,100,111,113,127,141,152,89,50,31,48,37,47,42,27,19,34,24,40,44,30,5,22,17,16,53,44,97,72,91,74,68,63,44,35,56,33,53,44,79,66,65,79,61,65,64,65,46,64,41,54,29,46,42,58,56,57,29,45,36,38,29,53,24,65,45,29,20,41,25,6,13,50,35,12,15,17,9,23,41,44,53,35,43,62,31,67,47,56,52,37,18,27,41,19,53,64,54,57,46,60,49,49,43,46,71,61,56,37,63,39,53,58,43,4,43,23,42,32,21,24,20,14,12,55,58,20,53,44,29,26,43,32,39,75,64,74,69,75,52,52,58,40,38,23,53,69,51,52,51,53,90,53,52,61,85,77,55,48,45,60,69,45,47,46,45,33,63,56,39,11,41,17,21,13,20,38,16,19,14,44,36,40,30,52,59,58,66,60,61,51,50,74,76,71,70,38,42,42,66,87,62,44,45,53,50,57,69,60,47,62,80,65,22,9,31,7,24,45,11,31,31,20,30,14,45,47,56,54,57,65,59,69,31,47,73,44,41,76,76,66,57,57,72,64,67,46,52,40,41,40,35,76,62,84,67,96,134,116,107,122,110,116,124,139,145,146,139,147,133,120,132,109,122,112,142,153,110,120,118,105,124,148,123,122,103,151,114,133,106,111,101,95,99,102,88,110,127,128,155,131,127,82,83,91,115,94,82,89,99,93,28,26,17,1,8,5,46,16,0,9,0,29,9,1,11,0,10,20,35,29,1,33,26,3,128,150,140,164,158,148,109,110,120,127,80,106,103,133,191,247,234,65,88,201,82,25,34,17,26,26,70,76,53,50,72,84,64,63,87,67,72,58,83,72,105,90,64,92,99,90,96,105,77,74,108,72,76,121,82,116,58,100,97,110,127,96,103,119,86,79,89,84,96,101,113,145,127,106,100,119,111,124,103,100,97,101,95,111,109,126,109,126,144,112,130,129,117,101,114,104,140,116,140,117,117,83,113,95,127,139,144,143,136,118,143,114,116,117,142,154,173,135,135,128,118,112,92,123,91,106,120,163,190,175,166,116,105,120,137,137,114,117,120,137,108,152,82,151,205,193,231,240,243,214,126,119,129,128,125,60,55,76,84,116,146,146,123,132,96,107,100,104,92,101,113,98,101,122,114,90,109,109,120,96,105,128,99,86,76,82,113,148,134,103,129,111,91,77,88,53,85,79,72,41,70,86,116,142,114,125,135,109,89,117,135,181,119,129,154,160,176,185,186,156,161,154,178,160,134,186,206,184,152,169,179,132,135,130,113,104,79,100,134,102,117,112,117,120,104,123,134,121,134,116,116,91,130,163,162,166,163,167,132,104,131,125,126,136,141,216,209,182,247,213,159,78,122,220,223,147,129,118,97,86,83,0,70,173,163,138,181,105,35,87,80,111,150,117,119,166,172,74,135,183,137,133,75,168,186,154,142,83,8,32,19,37,65,40,43,23,41,55,11,75,35,45,46,45,39,31,26,17,46,32,36,5,17,10,21,53,26,7,25,29,33,35,41,22,16,21,53,59,81,84,80,75,45,67,81,96,90,89,74,78,103,124,108,83,48,23,8,30,66,88,130,120,100,99,102,77,88,84,94,83,21,62,83,63,81,97,95,112,120,98,126,128,110,50,43,60,66,67,47,55,52,14,35,22,25,38,41,12,34,38,53,50,38,79,75,55,38,66,81,55,47,59,75,54,63,60,68,52,49,71,50,30,63,32,49,84,24,46,57,54,38,49,37,56,70,57,63,82,44,59,57,26,33,34,52,10,37,26,34,31,29,23,26,30,13,43,61,62,59,28,33,87,54,64,47,39,48,34,35,32,40,36,52,60,55,55,54,79,48,73,52,81,65,45,44,33,64,27,11,26,7,13,10,43,21,10,44,32,36,45,59,37,61,41,16,37,44,20,63,93,48,73,45,55,71,61,35,66,40,41,53,56,80,75,48,34,47,47,61,43,60,45,62,44,81,53,45,55,48,76,62,43,38,79,59,29,27,18,33,42,11,20,17,12,23,11,15,68,39,53,57,57,41,58,50,62,67,77,40,61,57,70,63,49,61,45,52,68,25,61,51,68,45,67,56,65,56,55,61,27,36,20,7,22,36,11,37,46,29,25,15,58,54,51,55,75,71,52,48,36,64,43,63,79,75,60,64,80,86,58,41,61,47,52,66,70,89,180,148,131,147,96,96,76,110,93,94,73,87,134,126,125,141,147,97,84,83,104,122,101,111,84,88,120,131,132,145,167,139,151,142,149,128,121,145,117,79,66,111,88,67,98,118,146,113,112,97,75,92,108,83,94,114,76,125,98,29,12,43,24,24,15,6,11,19,17,26,19,0,2,11,17,8,18,24,21,9,12,24,4,167,146,165,215,213,181,155,146,128,133,122,107,117,109,177,238,238,99,54,85,8,26,40,9,21,40,27,33,49,61,70,103,103,114,109,89,114,98,84,80,96,104,112,102,107,79,78,64,94,86,93,103,87,110,101,118,90,89,111,139,151,114,120,111,100,88,121,122,94,122,115,97,82,81,103,76,112,109,99,118,101,115,115,118,137,103,126,97,138,112,141,103,102,84,93,97,114,104,103,131,122,90,117,98,129,140,116,127,98,102,98,104,105,120,122,115,120,133,113,100,105,122,94,102,92,104,97,163,128,123,134,111,118,144,179,131,113,126,129,149,94,100,115,150,164,161,149,159,197,246,223,213,180,160,182,139,146,82,47,102,153,167,150,143,133,125,138,115,161,133,151,115,113,131,123,74,88,116,89,91,113,106,114,54,53,84,125,132,103,110,114,93,119,84,96,100,82,72,73,81,44,58,93,104,119,109,110,116,97,109,123,101,105,154,160,184,195,188,168,167,162,163,171,185,174,177,169,178,157,167,167,81,81,126,132,151,114,143,138,102,114,90,118,87,72,99,109,129,113,97,124,87,101,123,165,133,121,126,130,139,158,125,130,120,166,225,216,203,244,209,186,118,169,221,140,81,129,169,157,24,28,19,75,205,136,157,198,92,38,91,136,183,160,164,143,176,149,95,182,154,131,160,188,228,240,148,85,16,0,11,47,76,86,65,62,15,26,21,33,52,32,29,23,16,38,63,27,28,26,36,60,21,7,26,30,38,42,23,23,22,27,48,10,34,20,30,34,35,99,108,97,95,41,70,74,74,64,89,87,93,125,151,83,53,16,23,24,59,138,126,120,98,90,99,78,102,94,68,148,107,12,41,47,71,91,89,58,54,84,112,105,142,119,101,56,55,72,61,65,53,38,60,35,31,34,36,27,60,56,50,45,48,33,66,54,50,45,62,94,70,23,41,66,85,80,67,62,57,58,64,18,43,36,69,50,55,41,75,48,61,64,38,63,55,65,82,70,40,34,40,62,52,58,49,33,9,24,23,22,32,27,12,15,17,30,36,80,43,61,59,42,38,68,47,61,27,45,34,30,55,41,70,75,46,66,45,56,26,57,60,22,26,49,80,65,17,62,44,17,37,11,30,1,15,16,32,12,14,35,47,71,53,33,58,16,23,39,19,37,44,55,37,56,55,62,41,47,30,57,56,39,21,22,53,30,35,48,39,54,31,63,54,43,42,87,58,63,51,44,74,51,46,66,56,52,34,6,20,9,14,28,13,19,7,30,45,61,33,68,46,52,57,58,60,37,54,70,78,66,73,70,59,58,58,63,37,31,30,35,58,67,61,46,70,56,79,49,44,73,1,24,12,16,3,42,30,24,24,28,36,32,50,54,42,62,72,76,54,64,75,38,52,46,58,48,32,52,70,65,63,43,59,74,42,51,109,177,182,188,162,143,131,69,45,84,72,96,108,95,112,141,99,104,101,88,61,42,75,84,116,110,111,89,95,102,118,141,153,154,135,111,146,118,113,137,157,116,104,131,119,110,116,131,97,123,107,96,127,89,106,89,81,80,107,99,73,7,1,17,28,23,17,14,8,16,41,32,10,23,19,3,29,5,8,31,18,14,5,26,3,120,125,115,152,176,163,140,98,124,145,102,130,142,147,201,251,234,83,57,58,23,54,27,17,42,28,33,45,49,47,89,114,114,116,106,103,99,122,123,126,103,97,82,138,130,123,124,114,112,108,136,113,100,98,108,129,89,125,131,102,135,108,99,106,105,93,109,101,103,99,106,84,94,82,114,146,102,113,121,144,140,142,121,123,99,122,79,88,100,74,87,110,101,91,123,104,120,84,91,81,123,92,123,98,101,104,117,116,97,106,86,113,112,75,90,104,105,96,99,88,88,100,92,93,75,94,98,116,109,106,100,108,115,123,148,131,104,91,83,105,92,69,91,95,130,230,183,149,168,222,208,155,106,98,201,245,216,174,148,169,167,128,142,144,139,146,152,136,127,124,116,137,94,118,89,88,83,100,62,81,120,120,78,78,82,91,134,119,81,95,102,72,75,87,129,122,103,122,87,73,73,86,115,99,86,139,125,119,124,120,101,121,162,151,165,170,172,183,169,177,181,171,160,160,171,169,161,159,172,182,166,134,74,121,115,82,113,125,145,99,122,122,113,107,39,70,92,118,112,76,81,93,86,71,105,105,97,104,126,118,125,114,127,125,196,242,214,246,251,240,166,160,234,205,81,51,185,217,175,70,37,5,127,189,157,196,215,123,66,123,146,125,119,150,164,175,131,141,181,159,174,186,200,149,216,137,59,16,9,59,38,77,39,47,37,32,16,36,22,24,33,33,45,19,54,50,61,24,44,37,24,21,20,9,12,6,24,33,25,11,35,26,49,24,17,36,70,67,84,96,110,88,92,81,60,92,101,91,88,84,142,90,51,31,12,54,85,88,112,90,92,108,106,101,76,96,127,119,134,149,32,25,37,71,86,65,115,73,84,109,116,104,52,86,103,103,52,40,56,45,52,30,39,23,31,23,24,40,59,59,37,15,6,38,87,37,64,87,100,49,82,64,45,49,56,62,52,30,43,63,79,50,49,45,72,68,51,39,61,51,49,72,48,64,74,36,62,34,29,46,39,51,63,34,31,21,14,4,6,22,25,16,35,44,20,51,45,43,76,74,69,64,45,56,50,47,41,19,55,27,40,38,70,44,37,29,41,55,65,47,31,75,54,74,39,50,50,30,11,2,0,24,13,13,31,29,24,19,11,53,40,40,56,32,38,44,15,48,63,55,62,72,61,48,48,55,21,52,49,27,12,46,37,24,10,55,24,52,45,75,57,28,62,70,43,41,75,69,63,41,46,59,69,29,39,9,16,9,17,28,41,23,17,19,30,18,80,88,59,65,63,50,68,49,67,81,66,63,53,76,51,76,33,74,44,47,38,47,76,45,47,49,59,40,75,52,62,69,29,32,19,19,13,26,11,34,3,8,20,46,50,63,59,42,69,56,47,47,21,41,54,71,56,41,65,34,51,47,76,62,77,60,64,42,77,111,154,146,136,108,119,68,63,50,92,114,54,97,127,136,97,123,79,97,131,84,59,79,81,108,94,105,115,132,116,150,135,163,138,118,95,101,132,132,150,187,142,121,76,136,91,102,83,123,116,120,120,148,113,87,64,81,38,69,60,73,24,34,16,34,8,0,13,16,7,0,17,18,23,4,27,14,16,14,25,5,33,4,42,32,139,139,121,110,121,133,104,123,140,105,103,129,167,160,220,227,219,74,62,57,2,60,4,5,38,11,29,30,61,54,80,118,110,78,116,109,141,110,110,99,124,102,104,105,118,133,127,123,121,134,152,117,91,111,132,123,130,149,107,126,101,104,103,126,93,94,129,101,104,107,104,91,115,120,114,125,122,109,120,126,110,128,154,118,128,128,87,85,96,79,104,117,102,113,107,125,86,80,76,99,98,97,99,57,63,93,104,135,104,109,129,103,106,87,112,127,108,139,136,117,124,114,129,96,84,105,114,117,105,132,101,102,110,99,116,101,123,95,51,91,77,75,45,36,95,160,158,137,155,142,103,105,11,59,125,211,230,244,211,199,142,127,111,85,91,119,129,98,125,89,87,83,99,109,119,110,104,158,94,104,86,114,96,108,67,66,108,110,77,112,92,92,86,108,127,130,147,109,96,91,70,68,115,109,139,152,148,146,102,146,121,147,173,171,172,124,157,147,166,148,182,124,158,156,174,137,145,155,160,155,158,148,135,107,112,107,139,159,123,95,103,119,65,66,86,80,107,114,86,100,79,68,92,83,50,101,80,64,86,108,80,109,66,152,237,244,233,230,248,226,134,182,206,148,58,47,100,198,171,50,26,4,125,254,192,162,176,139,119,124,172,140,130,154,184,143,101,170,171,168,169,152,111,44,81,77,31,34,10,56,78,52,40,11,11,31,58,42,32,24,63,31,44,47,32,35,46,43,39,30,16,6,20,36,19,15,22,30,40,28,36,29,36,30,56,69,74,86,93,92,82,40,68,69,98,92,87,104,119,96,76,23,47,25,17,63,118,115,93,94,107,102,117,92,90,114,123,89,167,177,70,60,48,61,104,69,94,106,99,116,121,101,70,90,98,131,113,55,50,82,86,45,24,19,18,39,26,45,51,24,17,14,8,41,38,52,74,82,82,69,60,59,71,65,49,65,65,45,61,39,54,60,23,49,41,55,51,50,45,47,62,69,75,66,30,85,53,40,74,61,53,47,64,33,9,11,33,14,14,32,30,22,37,42,54,61,44,57,28,35,71,38,47,30,53,52,47,33,45,44,58,31,58,45,61,77,77,35,51,40,45,38,50,64,58,32,42,39,29,20,34,7,19,4,35,3,23,35,62,43,55,52,73,27,29,9,25,51,65,32,66,60,48,19,9,17,17,10,20,26,34,18,15,12,22,9,20,41,49,62,62,38,36,81,40,69,53,70,79,67,62,39,52,55,29,22,36,2,6,21,31,33,6,28,18,52,60,49,85,42,60,55,67,69,81,72,82,51,60,43,48,50,48,64,56,41,19,44,41,65,61,64,28,56,64,43,36,39,14,29,12,7,8,7,21,41,30,15,11,55,69,76,54,52,82,49,66,51,48,50,71,60,49,78,57,37,68,60,79,55,93,47,52,63,81,154,137,128,103,77,81,49,52,46,129,117,64,48,95,115,111,106,121,109,109,77,91,99,93,78,111,102,118,135,114,115,112,150,142,88,104,116,104,112,137,164,123,124,83,68,76,86,76,108,119,101,97,101,120,82,105,98,41,33,39,44,18,5,27,32,18,16,42,22,9,11,13,10,29,3,3,35,23,10,14,10,24,44,29,21,147,140,112,98,103,113,125,158,106,114,147,148,117,149,207,253,229,77,105,19,16,65,36,36,41,26,57,30,28,38,94,131,78,108,116,104,115,126,110,83,94,98,116,106,114,104,108,110,83,117,105,124,86,91,106,96,113,112,94,103,98,111,105,122,119,104,115,146,131,100,108,123,102,138,125,133,117,144,120,122,122,122,124,108,122,116,99,111,80,82,102,107,92,69,100,99,80,98,61,75,90,132,102,101,110,122,106,130,150,144,158,146,161,167,142,148,141,116,137,135,99,119,97,104,111,133,107,112,130,128,140,129,106,114,87,111,117,122,80,145,104,68,54,41,48,78,84,72,123,130,124,161,110,59,27,103,149,148,203,201,196,174,146,98,91,88,76,76,89,92,110,109,89,88,154,120,136,138,108,108,71,92,95,105,96,105,102,115,113,152,145,105,102,72,79,89,123,109,135,129,95,111,141,180,164,100,128,136,145,142,148,189,160,152,142,151,126,132,150,143,157,134,178,137,158,158,147,152,157,165,170,169,136,124,108,105,102,126,102,123,97,107,65,66,95,112,127,63,43,59,54,57,53,28,41,71,88,83,88,103,52,91,127,164,230,250,222,215,244,218,169,205,230,133,60,26,3,52,92,38,48,45,167,235,124,79,122,140,121,125,210,178,162,168,174,107,126,215,171,187,163,136,77,19,7,34,28,38,55,47,38,19,20,35,37,23,34,37,41,32,53,49,27,25,1,37,66,39,19,32,19,49,16,57,25,18,36,18,21,25,34,29,34,19,39,43,83,90,104,83,65,56,81,95,86,118,106,131,73,59,18,18,36,96,93,126,118,105,112,95,133,81,83,78,100,91,102,107,135,171,89,103,94,83,71,95,101,82,105,99,109,97,86,101,112,126,89,51,69,60,81,57,59,23,29,60,62,46,16,24,39,7,10,22,45,55,72,80,60,52,89,68,78,57,52,56,71,82,76,30,49,35,71,37,41,65,49,38,64,63,57,57,53,57,67,72,45,37,63,54,41,39,66,34,14,17,12,20,13,17,29,4,35,61,50,33,23,41,66,29,69,19,13,17,8,14,35,35,40,27,50,54,65,77,50,70,46,54,90,58,38,52,36,37,47,41,83,34,22,28,27,12,43,29,25,39,68,26,60,35,52,44,41,49,20,12,43,42,37,55,57,34,31,13,12,31,3,20,20,14,7,15,7,16,10,3,50,73,38,87,32,66,40,80,44,65,49,41,63,52,50,51,22,45,29,17,30,23,14,8,15,20,25,51,36,39,45,53,55,53,54,70,40,69,61,44,61,56,64,37,61,52,58,56,28,17,32,63,35,55,57,41,40,72,45,46,42,46,10,43,17,15,15,33,35,27,22,16,31,77,66,70,59,81,53,45,60,26,36,69,46,60,64,61,70,85,61,58,74,71,61,41,26,57,147,119,100,122,107,61,74,87,111,118,153,136,50,35,59,53,51,98,97,36,52,40,72,98,81,75,57,62,82,91,74,96,99,135,107,118,92,93,62,51,81,136,115,46,56,16,73,55,37,72,100,93,85,104,99,71,87,74,48,41,17,24,20,10,16,10,14,40,11,25,40,13,13,8,27,9,19,4,4,31,25,3,36,19,2,30,133,146,103,119,117,114,123,116,105,113,130,122,125,132,207,249,216,58,74,58,19,60,25,51,28,24,56,52,72,42,93,153,78,90,90,97,100,121,121,91,133,117,102,76,106,99,115,70,105,120,113,118,92,98,74,88,103,118,127,98,125,134,138,109,103,143,140,119,104,89,120,104,101,132,144,147,112,146,109,123,120,113,139,120,96,74,112,127,86,102,96,114,118,97,101,128,100,113,99,108,122,100,113,97,122,105,105,103,129,141,119,138,133,143,117,110,136,107,115,113,92,95,53,76,112,109,104,80,79,100,114,141,138,101,96,80,125,109,99,127,98,97,93,74,93,66,55,102,70,112,160,180,163,81,45,86,110,125,147,143,207,245,230,161,111,85,63,61,82,81,120,118,114,98,90,140,114,146,138,126,115,100,112,104,116,107,129,134,96,139,115,112,120,78,83,82,102,169,149,148,95,144,165,145,109,76,92,93,143,187,176,201,189,177,174,193,170,178,163,170,177,163,148,129,167,159,168,199,190,182,186,157,141,158,129,95,110,94,120,112,109,135,83,81,100,109,118,106,71,73,45,77,36,50,35,46,62,69,96,73,90,97,112,205,242,255,219,204,239,228,194,171,240,193,50,22,6,20,25,44,39,61,132,145,95,49,49,110,144,211,204,175,185,168,162,142,218,238,149,139,158,83,81,14,36,56,61,68,54,49,34,14,46,49,12,44,55,35,40,31,44,57,23,32,40,44,30,30,34,54,25,48,45,33,36,22,19,19,14,31,30,25,29,42,47,69,97,119,76,80,94,93,82,79,103,121,85,111,44,5,39,23,68,98,93,121,103,110,82,104,96,107,95,104,100,79,125,73,118,162,69,77,91,126,100,78,90,82,69,111,104,123,116,106,100,97,84,44,39,54,69,67,64,44,25,47,46,32,31,34,45,27,36,57,75,75,86,58,26,49,97,120,68,100,61,60,75,69,59,60,64,55,55,69,59,56,60,66,72,97,64,59,57,59,77,45,54,49,63,86,51,39,41,16,26,10,36,23,34,13,29,26,25,60,39,56,40,21,8,26,30,25,21,22,22,18,18,27,41,20,50,49,54,54,59,80,53,49,47,39,46,72,54,43,40,51,68,16,11,19,15,7,6,13,20,6,29,14,34,30,39,55,36,36,30,13,36,30,32,42,53,35,28,2,8,28,27,25,33,38,9,28,22,30,40,39,42,45,44,75,47,47,42,83,47,52,61,60,38,26,78,21,45,31,14,42,22,33,13,23,29,29,35,36,53,54,66,32,63,69,40,98,80,54,53,35,44,39,42,58,55,20,39,67,31,39,53,66,53,66,48,68,73,85,52,35,36,34,35,24,5,21,11,19,31,22,32,15,59,53,76,55,63,48,47,32,42,25,68,73,73,66,56,72,110,63,54,59,42,46,37,42,35,112,131,108,127,129,98,66,89,133,150,158,141,115,86,76,72,43,35,72,35,29,41,40,100,165,91,56,90,90,75,78,36,76,109,121,103,108,116,63,58,32,84,94,69,77,91,85,82,65,58,59,52,78,80,69,72,61,83,41,46,79,84,64,12,8,3,0,22,27,30,18,15,35,13,5,27,0,6,17,33,33,17,6,6,3,6,31,130,147,131,100,106,117,130,120,120,125,108,110,97,129,203,251,231,68,102,33,22,37,10,33,30,16,39,48,52,74,107,125,104,84,111,99,128,122,135,129,125,122,102,92,111,114,111,104,112,114,105,120,139,130,115,116,116,121,101,105,104,116,108,98,94,95,123,100,103,111,128,110,119,148,126,111,123,101,131,116,106,135,115,103,96,105,117,136,121,97,98,77,102,109,126,128,115,100,129,109,128,110,125,123,117,135,90,99,111,79,80,107,100,77,74,102,118,132,111,90,77,71,92,117,101,92,75,100,107,145,104,87,98,116,102,85,109,109,75,86,94,100,116,129,118,108,87,101,83,104,156,132,112,81,71,133,130,97,130,115,143,168,172,172,157,167,138,86,103,87,127,136,124,109,128,151,130,107,124,124,115,158,151,110,114,138,111,102,122,122,127,91,84,77,89,116,110,143,145,132,82,106,92,91,71,53,52,106,155,189,192,233,212,230,224,244,193,208,198,219,214,207,180,182,194,188,211,207,200,206,205,171,134,160,111,101,103,90,140,120,127,106,86,109,81,110,125,105,115,92,96,70,67,67,49,49,70,83,94,109,111,85,138,223,255,252,255,218,246,208,176,171,187,179,120,28,32,90,156,54,61,117,106,62,59,27,15,111,154,223,190,185,184,175,164,184,224,184,116,112,95,40,9,2,63,105,57,73,38,36,23,12,25,30,48,23,42,47,19,21,34,29,9,12,52,46,14,11,23,38,47,30,28,23,36,15,28,39,20,36,27,21,36,55,48,83,86,80,79,50,109,119,100,111,106,112,71,36,14,5,76,44,85,83,90,93,99,97,70,88,120,115,122,81,80,71,48,30,117,134,80,101,67,84,102,95,78,101,113,127,103,103,74,109,72,90,115,88,51,56,57,112,64,53,58,30,13,29,14,30,29,24,51,58,64,68,94,62,65,103,105,88,61,70,57,62,84,84,87,79,33,53,62,107,81,89,52,74,79,74,70,79,52,71,65,56,81,63,51,31,52,57,35,50,24,29,14,16,27,19,19,24,45,46,43,51,17,34,20,43,48,55,28,21,35,8,9,48,30,17,37,45,53,28,48,45,66,70,64,73,60,69,64,64,79,67,18,31,7,1,27,27,31,29,34,26,28,35,10,32,39,56,48,28,18,27,21,13,4,15,49,33,23,11,28,13,6,11,33,15,10,13,35,24,16,16,21,38,34,27,44,34,39,38,41,52,37,74,79,37,44,52,24,29,8,15,9,17,21,14,13,38,39,38,65,15,52,67,51,51,73,63,61,27,36,51,23,28,42,25,39,55,47,39,67,38,24,19,18,63,23,24,64,39,32,53,44,4,21,24,16,38,34,27,24,21,46,59,53,49,36,55,50,43,59,66,61,51,79,81,69,99,69,93,94,77,67,47,45,25,33,56,24,86,121,95,147,136,90,136,162,147,135,168,111,142,129,98,123,115,131,167,131,125,105,137,185,183,139,145,128,186,144,136,108,124,117,154,170,133,115,82,102,12,68,133,109,103,80,92,67,56,56,56,46,55,86,95,72,80,59,60,58,73,160,103,8,1,36,4,25,7,26,20,31,21,35,17,26,26,15,15,17,36,2,18,19,13,14,16,157,183,153,147,169,165,164,122,152,152,155,143,133,151,209,254,219,78,104,57,9,44,27,43,29,33,16,48,55,43,131,150,118,118,94,125,88,125,126,97,127,104,121,118,114,150,109,79,114,106,148,118,137,143,105,128,110,123,116,113,104,93,124,93,106,117,101,93,123,105,78,103,95,133,134,142,146,136,141,128,116,115,114,138,114,113,131,114,75,98,81,89,72,95,91,109,86,84,101,93,120,121,119,95,141,123,94,120,116,100,107,100,65,86,74,82,93,95,97,105,77,97,115,95,96,85,63,105,119,97,83,89,75,98,105,80,97,74,92,76,43,100,102,103,146,130,89,104,100,77,83,99,95,58,66,87,88,103,124,90,77,110,148,172,192,245,205,163,156,133,106,109,117,104,108,115,95,108,127,119,108,133,132,118,139,132,106,91,74,86,143,89,113,80,66,68,82,96,112,136,92,73,95,77,60,93,44,75,178,176,172,207,230,226,229,234,163,189,219,212,199,186,193,166,208,199,242,218,173,182,204,135,99,117,107,88,118,118,117,116,130,71,47,62,71,111,105,155,111,67,88,79,51,46,43,53,82,81,103,125,129,122,154,249,252,247,212,228,243,200,172,116,172,202,156,136,136,244,200,74,45,89,32,9,52,34,50,96,184,204,185,178,224,202,193,202,163,130,63,31,47,20,38,66,93,85,23,45,43,10,31,50,44,37,43,43,51,59,28,46,37,37,36,20,50,9,29,5,18,63,42,20,20,14,16,52,14,27,8,33,46,15,66,86,61,89,83,94,132,73,103,73,108,120,108,56,19,22,37,51,100,107,103,85,96,97,85,96,99,93,68,113,104,94,76,49,51,48,101,118,103,78,95,115,93,88,102,80,112,92,98,94,96,90,87,108,110,95,89,56,80,84,92,79,50,14,15,37,5,16,51,86,69,55,98,92,118,102,84,109,91,86,83,42,76,63,81,64,43,14,51,59,55,74,67,62,67,82,74,64,62,64,88,62,68,77,62,56,44,60,58,76,57,35,8,28,33,32,31,8,29,19,29,33,28,26,17,32,58,30,17,43,41,28,41,26,26,34,12,21,21,40,44,27,35,45,64,37,61,84,49,14,68,48,41,12,5,0,13,17,33,11,19,24,13,13,31,21,2,42,7,32,45,37,59,39,10,14,47,37,51,47,19,16,21,2,13,3,25,16,49,12,8,16,27,45,34,41,20,30,33,19,0,16,28,66,61,90,59,40,59,48,62,44,14,18,22,26,22,59,20,40,28,75,29,58,52,64,51,54,81,64,75,45,41,38,38,56,55,77,37,31,29,45,34,36,33,27,40,53,52,50,58,40,18,36,37,26,49,12,25,15,39,30,6,43,66,45,55,59,57,60,61,44,46,39,71,68,78,103,62,80,98,68,62,36,37,36,28,42,40,33,52,134,132,115,121,88,101,125,160,132,124,109,73,92,88,113,140,196,207,197,192,172,181,194,197,184,155,144,168,207,158,157,116,131,117,106,132,157,148,120,114,76,111,157,118,103,100,76,92,116,107,73,83,87,112,128,84,100,115,82,121,159,221,112,65,5,2,9,25,13,30,3,16,14,21,11,37,16,23,9,33,15,14,21,26,14,24,13,147,151,170,158,131,154,163,144,133,169,157,178,135,154,222,230,217,70,135,46,7,55,4,29,15,27,51,44,53,80,124,129,132,122,108,113,79,101,83,108,124,138,139,105,112,132,131,98,122,108,110,111,99,98,115,105,115,129,102,117,139,112,118,79,125,105,130,104,107,76,86,119,108,123,154,158,136,130,135,144,153,152,136,144,143,147,124,135,123,132,109,127,113,141,100,88,89,59,61,82,78,97,94,95,144,106,73,124,98,114,129,89,108,82,111,95,106,86,104,91,113,94,87,83,90,81,66,87,78,78,89,32,65,86,113,81,82,86,84,64,83,93,81,101,103,124,143,123,92,99,114,97,108,95,85,84,95,84,106,112,101,137,93,135,143,194,184,180,227,190,127,112,123,125,73,86,64,78,129,108,92,123,145,111,112,120,86,106,93,69,122,86,105,103,88,54,59,73,117,144,89,86,83,80,62,69,37,45,116,196,156,196,240,217,230,211,166,149,204,183,174,181,176,191,205,202,181,183,155,162,163,110,80,87,82,84,116,105,130,111,121,68,65,54,75,70,103,113,91,91,45,67,54,25,19,62,140,152,117,86,124,126,172,219,246,206,155,186,173,179,120,158,144,99,113,77,114,145,139,45,16,46,56,31,89,81,62,130,165,185,179,211,225,219,134,112,143,65,40,6,27,93,82,94,51,34,20,50,13,21,25,36,77,36,35,24,23,37,31,40,21,43,12,29,13,24,31,38,11,41,31,41,36,22,7,37,24,22,31,27,29,56,63,61,64,70,88,80,100,72,97,104,105,98,28,31,14,43,85,67,114,115,116,89,90,106,96,88,109,102,95,97,125,111,106,97,95,77,149,159,85,92,84,103,93,73,100,99,103,67,76,88,83,65,98,96,137,117,125,89,72,87,95,84,48,34,10,46,58,58,43,62,50,65,64,107,98,110,126,124,114,118,54,30,53,68,52,50,59,41,41,53,54,50,51,52,60,64,55,55,63,55,83,44,56,67,44,36,50,55,36,49,28,22,18,9,11,13,9,6,36,33,24,32,38,24,29,55,63,38,11,11,32,20,21,16,26,50,36,48,37,41,22,37,41,55,53,16,55,57,13,26,31,7,35,42,13,34,32,18,42,27,34,24,33,40,28,3,23,15,37,30,21,53,18,36,8,24,47,16,9,25,21,9,20,30,26,18,24,52,12,20,29,22,13,30,42,19,40,12,8,22,29,20,60,80,48,66,48,70,46,39,16,5,4,24,10,27,4,21,32,27,21,43,45,81,51,71,49,56,38,58,46,42,41,34,29,80,47,27,74,12,17,47,34,25,71,45,51,49,38,53,53,68,20,48,39,19,13,55,26,8,9,31,15,50,46,76,73,62,67,81,62,64,30,51,67,65,42,47,71,75,51,40,39,14,16,25,33,33,21,47,120,128,103,62,44,56,81,90,105,102,73,64,63,82,55,101,141,172,186,176,132,193,166,179,159,132,155,130,148,165,145,140,139,132,122,127,150,158,120,129,144,104,158,187,120,94,72,127,143,132,146,150,158,161,134,152,128,121,153,132,172,186,175,108,15,4,11,20,2,26,4,15,28,30,11,31,14,17,19,6,37,33,23,16,18,26,36,6,143,133,151,148,127,121,151,142,163,146,144,173,167,150,211,237,194,81,97,46,4,57,30,51,16,50,55,47,64,58,127,137,122,103,89,110,95,98,117,114,115,110,98,99,99,111,93,104,131,101,102,111,113,106,102,102,107,119,102,106,137,131,113,109,119,115,120,119,99,121,109,122,153,128,159,171,137,165,164,141,139,169,138,171,152,180,181,165,159,190,166,153,159,161,154,162,135,130,104,147,101,115,145,127,142,141,102,147,157,110,116,136,106,106,96,107,108,74,100,99,83,88,53,75,91,69,88,88,74,78,86,61,34,93,99,133,125,91,113,72,93,98,107,109,92,113,112,135,125,140,118,92,91,88,98,95,58,60,111,95,109,155,132,76,87,123,124,146,223,192,117,126,110,111,80,69,66,80,133,115,122,97,120,119,115,119,119,119,101,125,75,87,120,126,101,100,103,100,81,113,71,63,68,75,88,94,62,80,135,186,143,153,236,247,246,218,147,177,202,215,208,207,185,173,174,193,197,184,150,123,155,67,88,90,87,108,157,122,121,106,111,96,66,63,62,94,104,112,80,69,81,116,128,139,137,151,220,165,171,157,136,166,230,236,195,135,117,133,151,130,147,162,96,99,115,69,29,62,92,16,3,59,141,120,102,109,102,172,186,213,232,201,200,178,113,85,116,64,33,10,60,105,69,34,56,17,21,44,25,64,30,34,67,33,40,26,36,16,45,53,44,47,37,50,48,31,40,30,24,28,49,35,40,31,18,43,27,39,23,29,31,53,48,61,80,70,56,116,93,107,109,128,79,22,30,12,38,101,128,107,119,87,105,78,120,98,91,88,92,91,108,112,124,86,104,116,111,81,173,165,78,90,87,115,98,78,102,87,91,97,127,80,94,106,103,138,110,119,121,85,80,70,77,92,87,60,57,50,51,47,73,74,49,88,77,108,86,110,113,113,140,89,54,45,25,42,67,26,30,39,36,43,23,36,41,41,37,50,27,22,43,35,45,56,25,23,26,40,38,41,38,52,30,21,32,24,27,47,29,19,24,33,30,7,13,28,23,18,8,21,38,35,28,29,4,40,45,16,26,35,49,36,44,42,58,60,33,50,33,42,40,33,29,64,29,16,32,23,28,18,16,8,0,24,5,42,4,37,22,30,44,34,34,45,6,29,20,36,41,32,3,18,20,10,26,25,30,8,36,15,33,14,12,20,32,23,25,37,46,29,61,8,35,70,25,67,36,60,54,42,50,70,27,23,20,15,32,28,15,11,22,6,23,46,32,64,53,70,57,55,57,63,49,49,42,21,42,47,30,32,33,44,38,56,43,35,44,18,27,37,47,63,45,62,26,20,37,32,24,50,31,48,5,12,65,48,62,68,59,49,77,64,43,42,36,60,17,65,66,73,53,84,71,63,15,38,35,28,44,18,10,19,95,129,126,90,63,91,54,53,56,47,40,93,53,84,78,102,117,110,138,120,148,138,134,132,106,104,126,125,87,86,106,137,106,92,75,99,126,116,110,110,138,132,176,195,127,104,111,124,129,132,137,153,183,140,136,116,137,160,146,179,173,179,152,98,17,0,10,7,10,21,9,18,21,20,21,9,12,17,31,4,18,4,6,6,40,41,5,2,127,91,140,129,131,125,117,148,165,141,146,147,116,194,241,243,179,58,81,24,13,38,7,35,17,29,45,54,38,40,138,104,143,127,103,94,131,113,101,98,106,95,105,67,96,127,89,115,106,117,137,107,100,105,90,107,117,109,100,125,95,104,122,89,104,96,89,96,100,107,110,114,104,144,110,148,125,141,127,130,141,137,131,128,105,119,111,151,166,167,160,128,124,146,181,168,168,158,165,156,194,168,166,180,132,125,104,129,106,117,110,121,102,108,77,87,88,62,71,109,70,67,60,71,85,121,76,66,59,66,67,63,64,75,99,129,89,104,69,82,102,111,127,122,117,103,94,88,106,120,87,125,83,80,62,73,92,79,67,101,83,121,125,109,109,102,109,125,160,140,115,115,93,101,61,98,104,116,118,108,109,103,142,182,117,98,89,113,114,127,105,120,154,98,98,104,125,90,99,54,78,94,53,91,123,107,81,89,95,164,135,110,110,222,201,89,106,162,223,211,191,172,178,163,151,145,168,155,151,129,136,78,130,101,88,102,103,95,90,93,134,109,104,104,93,124,102,120,116,56,99,163,219,255,248,248,246,251,255,207,186,221,235,222,152,147,128,173,165,122,159,144,136,182,182,143,115,123,137,57,22,70,180,128,156,80,158,226,196,241,234,145,138,156,54,37,21,23,20,68,81,47,35,30,51,51,54,41,26,50,30,40,31,35,52,29,41,48,23,58,34,30,34,38,29,20,51,39,43,56,28,6,63,39,25,18,14,42,49,43,57,64,79,108,105,89,80,147,104,118,100,36,45,36,5,46,132,122,102,123,85,81,78,94,88,109,77,71,55,107,92,108,119,90,106,106,122,95,205,205,92,98,93,124,93,78,99,61,77,88,104,87,112,91,87,99,147,138,78,34,28,77,91,96,94,59,47,80,73,45,81,74,91,84,102,81,84,61,83,51,60,64,51,58,50,64,46,22,47,18,60,63,30,13,59,36,53,45,44,21,74,41,23,54,40,28,41,22,25,20,66,18,30,35,25,30,10,51,25,10,36,22,27,43,37,51,40,35,51,9,40,43,47,42,20,13,46,12,22,30,34,50,52,32,12,43,36,46,30,34,47,48,29,34,63,26,48,39,13,8,16,53,13,22,51,20,9,45,15,31,25,28,34,35,15,51,14,57,25,30,5,37,21,27,20,32,21,24,30,31,30,29,26,24,21,41,51,38,41,1,45,31,50,40,11,46,40,30,44,38,28,23,28,28,31,29,53,42,25,31,18,25,35,21,17,29,27,40,51,60,60,43,47,50,18,36,48,36,55,50,51,24,54,24,35,17,38,36,25,89,64,37,50,51,39,23,12,22,19,12,14,16,7,30,23,47,64,62,67,18,72,69,46,82,56,61,32,79,56,73,35,65,48,55,21,6,36,9,38,29,13,31,56,58,104,111,99,80,70,71,94,60,44,53,88,105,96,104,64,87,86,105,143,169,161,157,120,107,95,86,73,115,118,120,126,88,65,109,105,94,86,80,92,110,132,155,121,125,113,111,121,88,108,127,121,139,110,115,159,141,156,119,160,137,139,92,30,15,26,14,15,18,8,9,0,24,12,8,25,6,11,8,30,32,23,27,8,12,14,13,116,143,131,146,141,141,133,145,162,149,140,131,138,137,252,253,200,75,104,23,45,52,19,13,11,46,9,61,43,78,109,115,124,136,88,105,111,100,102,71,64,99,125,106,107,133,121,116,115,135,108,117,110,114,106,97,117,114,107,104,105,85,61,110,94,101,99,85,92,80,115,109,101,96,113,76,124,100,85,99,118,99,87,54,39,79,97,84,112,101,123,120,104,103,143,139,158,161,145,163,153,152,154,112,112,52,76,91,88,92,97,106,75,96,63,70,50,45,44,46,62,73,67,80,101,109,117,93,67,99,107,94,148,127,126,96,73,69,108,113,124,127,107,125,105,105,138,108,91,112,94,108,94,85,89,82,52,64,68,94,69,104,131,134,117,94,105,111,123,103,75,101,107,79,90,138,130,96,113,107,77,122,132,175,110,96,127,131,131,154,123,141,151,98,79,98,105,103,94,72,89,57,78,89,100,105,112,116,80,87,101,57,59,67,110,54,63,134,181,170,161,164,157,119,135,75,96,100,94,158,125,122,99,77,95,93,103,87,66,62,94,99,82,69,93,92,103,110,113,125,96,181,213,246,249,251,236,227,228,180,144,223,242,213,187,195,171,169,147,128,136,211,182,171,136,96,105,155,120,37,6,30,147,166,148,153,203,243,246,242,130,90,93,71,19,0,19,8,82,55,59,51,53,33,51,22,34,16,23,43,49,30,37,13,52,26,22,49,31,30,40,28,28,49,56,52,24,2,19,35,37,35,12,36,54,58,12,38,57,67,71,92,97,104,65,101,120,134,137,53,62,34,14,41,70,104,123,123,103,99,81,67,121,99,87,93,92,85,66,84,105,90,103,109,108,56,84,86,189,182,78,89,81,103,85,79,75,57,92,120,91,79,82,106,89,126,117,61,14,12,33,62,94,86,112,67,67,88,57,47,68,74,98,74,88,112,77,24,7,12,46,62,12,26,52,50,45,56,38,64,64,44,57,47,36,58,51,54,55,53,61,53,22,32,41,50,47,31,58,49,31,43,14,14,32,6,37,34,47,14,16,13,38,39,22,39,32,51,25,29,40,60,45,45,34,27,31,60,10,17,28,36,19,50,38,25,28,41,41,48,43,13,24,32,59,26,41,35,24,7,42,44,31,11,26,29,33,27,51,22,27,35,39,35,22,45,41,61,38,42,24,15,48,31,36,21,32,19,37,44,23,35,29,34,30,64,51,29,38,65,25,13,36,60,38,35,19,18,23,29,30,39,48,13,48,0,49,4,23,35,36,27,43,14,42,31,34,66,46,38,51,49,57,13,49,12,28,51,51,56,56,20,23,6,34,48,27,7,20,29,34,71,48,18,39,9,20,27,28,20,20,20,38,25,21,53,23,49,73,70,50,77,55,66,62,57,73,72,69,58,36,52,61,65,46,32,30,38,20,32,44,43,41,56,45,81,64,83,37,39,87,39,47,75,63,63,63,43,24,39,42,29,101,123,126,103,62,83,87,108,93,94,92,82,101,80,90,108,90,81,82,100,113,92,78,124,113,114,128,101,91,103,92,100,107,116,91,106,119,87,84,107,141,157,140,110,13,14,8,24,10,13,18,23,28,12,24,16,8,4,26,7,34,17,19,19,12,10,8,17,147,143,139,143,153,134,161,149,132,169,175,143,130,166,214,255,186,92,148,18,22,36,15,39,16,57,13,43,64,44,111,86,103,117,106,87,109,116,109,63,67,129,106,90,107,118,111,118,134,102,96,92,120,88,97,89,98,81,72,106,115,128,113,93,84,73,121,96,91,100,108,102,81,75,110,98,103,95,100,112,109,94,63,65,52,67,76,67,77,79,110,128,79,121,98,109,98,114,141,122,138,116,79,59,54,42,52,63,41,96,88,83,81,67,64,75,40,64,32,29,75,46,67,97,112,90,100,102,74,99,119,70,145,90,52,67,61,74,94,129,134,102,103,149,122,101,152,80,131,126,76,123,81,91,115,108,117,75,98,115,95,117,143,152,114,124,128,130,92,71,45,89,98,95,111,98,87,33,77,104,89,68,104,86,91,96,94,114,136,146,142,112,138,85,89,113,107,124,114,114,122,93,116,95,118,168,128,154,83,66,54,57,28,73,91,78,85,132,151,161,138,167,155,148,147,113,105,103,83,103,144,100,117,107,89,82,84,74,64,84,100,81,55,74,79,114,74,76,73,48,92,141,138,174,154,131,112,64,118,56,92,163,164,198,251,210,167,144,139,142,208,216,220,168,95,73,75,135,115,35,4,64,168,153,146,204,255,162,176,164,92,51,26,33,8,19,63,66,76,58,26,46,49,39,36,51,28,12,39,35,43,69,49,28,22,53,49,34,32,56,64,8,34,38,37,47,45,45,23,54,12,53,40,34,57,57,8,43,65,107,85,106,99,91,102,119,115,117,71,34,38,13,51,88,107,115,107,107,74,69,79,92,82,103,99,101,104,75,87,102,75,136,95,104,114,105,100,124,210,214,87,79,75,83,94,65,82,63,77,84,104,134,107,89,102,47,22,18,22,20,13,61,94,86,68,51,60,74,56,47,82,62,68,87,104,93,64,44,39,28,38,27,39,76,47,50,67,39,78,50,55,42,42,32,34,48,24,67,59,45,40,59,59,53,64,24,33,46,65,46,45,65,10,15,32,36,20,6,34,18,34,18,14,36,56,32,44,25,24,21,67,57,58,41,32,41,41,52,58,36,30,46,21,71,39,55,65,36,57,28,53,54,36,39,22,46,32,28,13,12,0,34,28,36,50,39,15,26,33,6,52,42,34,19,11,62,32,54,60,15,22,30,39,23,39,22,58,37,60,69,64,29,5,38,15,29,40,7,47,27,17,64,45,43,43,38,21,35,12,45,41,22,41,24,35,43,7,16,38,33,19,18,28,28,33,50,35,23,29,42,48,44,45,35,31,24,33,28,10,55,50,34,38,18,46,30,29,39,31,53,24,35,53,16,26,8,25,16,42,25,27,4,29,59,43,42,45,37,73,65,44,41,45,54,38,54,33,29,64,68,73,49,49,57,29,31,11,29,38,27,63,50,49,35,52,28,56,37,54,46,74,62,51,84,42,43,5,54,46,57,16,51,106,124,123,118,94,102,101,113,115,111,101,110,98,91,96,103,90,97,81,94,121,93,81,107,115,138,148,138,74,98,134,86,107,100,123,91,79,92,92,112,141,102,126,81,31,29,12,28,8,15,30,19,31,10,8,14,23,19,15,3,20,19,11,18,12,21,19,8,126,121,130,126,126,149,157,180,134,154,144,131,126,154,220,238,149,108,135,10,42,36,25,62,35,36,47,27,54,63,84,94,93,103,78,40,72,89,83,72,88,76,72,79,116,95,112,135,107,87,101,117,102,115,113,71,119,112,98,117,145,105,115,97,89,81,69,84,87,118,104,109,106,107,117,117,79,136,126,95,84,62,92,105,88,86,81,98,124,89,97,111,124,116,117,92,89,102,120,156,137,76,63,56,45,33,28,58,63,98,77,97,92,84,43,55,29,80,53,69,64,57,68,62,72,95,107,84,102,102,113,98,105,48,35,68,67,82,59,88,100,110,117,111,104,110,109,77,80,110,94,104,101,81,85,112,142,121,112,111,94,121,133,149,128,133,114,115,87,69,71,104,115,112,88,111,104,75,65,87,127,57,70,62,43,63,63,65,79,102,68,115,108,123,128,186,135,160,154,126,149,171,154,157,190,189,169,145,91,117,81,80,88,77,87,98,114,118,134,164,141,130,157,138,135,107,118,107,94,98,84,104,106,90,102,81,98,100,127,102,56,73,71,94,116,81,101,95,90,68,36,100,50,55,20,67,71,26,83,47,97,214,143,209,220,192,237,218,180,232,185,251,182,132,111,108,118,163,131,105,76,114,154,174,163,237,231,123,142,154,50,45,7,36,53,50,73,38,47,55,31,42,50,13,25,48,60,27,30,30,29,29,33,33,41,36,47,49,55,37,34,5,46,47,36,48,34,39,32,29,44,63,49,34,57,81,51,67,102,117,121,103,102,84,121,114,72,38,14,33,40,69,104,119,99,123,60,96,95,120,96,94,116,97,82,146,107,89,62,77,119,112,56,104,106,89,107,97,166,205,135,94,57,88,64,81,99,99,110,101,113,125,126,91,29,51,47,29,36,15,34,60,82,50,78,43,70,54,48,59,75,74,101,81,113,114,66,21,11,15,26,59,44,43,56,53,51,49,74,44,66,60,44,36,44,45,22,62,57,59,59,50,70,49,68,38,58,35,36,46,53,32,20,23,28,19,21,21,35,16,27,30,29,29,34,49,37,23,44,14,53,65,44,23,45,26,58,58,48,27,49,43,45,45,47,35,70,47,36,42,18,33,26,50,76,38,42,39,31,16,20,12,16,34,25,65,9,27,29,32,39,47,38,38,29,54,49,51,79,48,28,33,48,25,32,41,49,65,58,50,81,35,23,12,13,32,42,17,35,33,37,24,47,44,13,47,45,40,44,22,28,52,21,3,20,29,13,14,1,25,9,40,42,44,25,41,16,24,25,53,22,27,47,20,31,19,9,38,30,39,32,3,15,23,63,25,16,37,27,27,21,36,21,47,15,7,30,16,31,14,23,17,18,25,44,39,43,48,58,43,48,65,38,44,31,49,75,62,46,50,55,38,51,45,33,24,33,38,48,19,40,35,60,36,33,20,45,46,59,88,121,85,90,68,42,45,34,93,99,72,92,98,105,113,116,124,121,111,85,111,99,86,95,110,127,81,127,121,106,88,77,90,104,69,48,77,88,114,136,129,128,105,123,130,111,123,115,100,76,96,82,91,104,111,63,64,17,35,7,11,28,17,10,9,13,10,18,12,36,19,4,43,29,21,10,11,8,27,40,5,118,57,111,77,114,95,110,114,120,149,151,145,143,164,251,237,165,62,73,18,51,51,27,40,12,25,30,75,55,71,88,106,87,61,104,61,74,47,55,59,72,74,52,46,64,90,92,130,114,99,75,102,115,53,105,104,92,116,90,116,118,119,82,101,102,109,111,73,83,79,110,100,121,92,94,106,80,85,89,86,71,64,106,115,138,102,103,86,77,70,54,100,120,130,106,105,122,97,129,148,100,85,76,83,48,81,64,94,61,100,89,103,120,77,96,74,111,101,122,85,63,78,75,97,56,44,54,68,67,98,86,64,74,72,62,68,85,67,35,66,86,79,102,85,129,113,77,60,91,125,127,138,84,84,87,122,143,142,121,90,80,114,131,137,147,143,123,102,114,87,77,96,81,75,88,81,51,50,90,59,66,50,76,76,61,109,68,62,49,69,111,147,138,135,156,163,187,180,151,176,166,133,153,157,153,153,148,106,77,63,82,78,89,67,66,82,70,117,118,93,103,112,129,85,106,96,89,91,92,87,89,76,94,104,106,94,82,101,93,119,98,95,86,83,87,65,94,88,101,37,66,91,67,55,33,114,146,54,138,88,141,224,108,165,191,216,228,231,213,185,160,180,158,139,138,159,158,169,150,139,116,121,151,180,178,186,207,90,105,69,12,17,27,57,60,58,47,39,35,57,21,21,35,11,18,32,48,26,27,40,15,22,25,17,40,13,41,39,24,33,46,42,54,35,21,57,33,19,60,63,58,60,39,55,51,73,62,45,74,115,80,71,68,109,88,54,21,19,18,40,88,120,136,107,103,80,110,96,98,79,85,88,109,107,107,85,115,109,110,101,104,99,116,98,86,90,71,89,86,174,162,56,61,78,82,120,116,120,114,89,99,124,75,37,32,37,36,15,20,53,73,71,64,50,66,42,81,53,68,75,84,103,89,83,84,85,51,33,16,24,21,63,28,39,69,51,36,38,48,28,40,68,43,35,42,23,48,36,20,20,66,43,73,64,63,6,55,60,68,50,20,36,26,10,37,5,27,31,26,23,37,54,16,33,46,53,32,60,31,48,60,51,42,36,29,41,58,40,57,54,47,72,27,54,29,52,45,35,46,7,28,46,38,39,41,23,35,11,45,6,19,2,20,32,34,15,30,50,37,26,34,50,5,49,59,65,48,35,47,31,49,33,31,47,29,39,58,30,21,49,100,87,50,46,28,52,42,32,50,42,28,36,41,45,43,29,39,44,36,34,22,41,27,28,0,44,26,6,33,27,12,28,23,21,28,24,16,35,19,47,46,13,14,13,36,15,38,14,30,43,24,29,53,41,17,43,0,35,22,30,42,27,33,37,36,44,14,14,35,23,27,25,21,29,31,36,50,21,34,49,55,58,33,46,45,36,39,45,33,70,42,46,47,54,48,33,43,69,25,31,37,26,21,58,38,36,39,49,75,144,129,128,97,64,22,90,104,120,88,97,104,103,125,127,121,114,106,114,97,105,70,73,85,121,125,87,116,96,105,47,53,66,96,81,75,102,92,53,106,114,131,122,92,101,117,105,115,105,100,106,92,102,79,100,77,87,29,5,8,23,34,22,24,16,30,15,17,14,17,32,42,21,41,29,22,8,23,1,6,3,82,99,88,77,84,95,70,85,54,135,110,135,150,208,242,240,162,59,55,30,32,49,44,37,20,44,67,55,73,74,120,95,114,105,102,112,86,65,84,93,75,70,45,58,75,83,90,109,94,93,99,124,107,110,75,113,112,102,97,101,135,80,123,110,108,134,106,120,111,145,116,117,119,98,81,107,81,88,60,69,65,84,112,123,120,101,80,79,42,54,62,51,78,75,91,101,122,111,106,84,102,82,74,107,85,67,104,76,98,123,104,105,104,84,72,75,100,104,107,75,101,107,83,105,76,74,51,62,63,93,98,56,82,87,95,77,59,67,36,69,60,40,73,88,130,125,85,50,105,132,119,98,129,96,82,110,151,142,113,79,94,117,167,156,141,142,125,100,121,111,105,52,71,75,107,70,76,58,66,57,66,53,67,56,70,108,84,46,84,64,100,133,170,150,139,157,140,129,179,180,130,88,73,35,44,99,53,59,46,76,54,106,45,98,126,58,86,102,127,130,100,117,107,84,86,92,66,96,78,97,81,90,80,66,89,104,87,67,88,90,80,85,64,55,71,102,79,41,89,81,81,127,88,60,65,155,204,86,120,163,196,169,109,185,205,163,185,187,164,120,71,88,98,114,153,140,152,175,181,167,177,179,191,235,130,101,110,64,22,41,8,55,42,56,65,43,37,39,19,38,59,42,48,27,30,27,61,32,43,45,55,48,12,32,22,36,52,48,46,32,29,24,68,18,25,29,41,57,83,92,43,33,70,74,101,111,77,52,36,75,83,70,73,50,38,15,15,39,66,96,113,119,104,91,133,105,93,51,64,89,86,89,84,89,119,63,109,107,110,89,110,123,110,84,112,97,115,66,99,155,207,108,51,70,83,64,106,92,100,127,104,55,30,29,7,23,12,26,69,82,124,113,95,84,81,63,94,83,58,83,74,104,96,78,88,55,41,32,3,11,46,31,48,18,37,31,35,18,21,26,34,37,51,47,45,51,64,59,42,85,55,52,56,49,38,42,19,44,48,47,48,31,34,43,39,53,27,17,39,34,48,44,36,42,6,55,44,32,38,45,36,50,77,58,18,35,52,38,29,42,58,21,45,45,45,50,39,34,43,13,17,26,19,43,61,7,17,24,41,5,34,43,46,28,45,37,33,37,33,24,38,35,26,33,27,60,51,56,44,56,7,11,51,35,47,39,29,38,43,40,64,66,54,41,44,60,40,22,32,29,48,24,26,25,77,13,16,31,17,31,34,29,18,9,14,6,18,20,15,37,21,17,7,44,6,22,42,31,46,31,39,41,24,20,21,14,43,59,62,45,10,24,31,28,43,35,30,23,27,51,50,51,45,63,31,17,7,26,22,5,18,19,33,12,36,19,32,34,58,59,48,20,67,36,47,47,29,25,42,26,44,36,16,56,52,21,51,53,48,44,46,15,26,45,25,61,64,82,151,155,117,113,142,120,125,105,82,93,108,129,154,131,135,123,118,96,87,110,91,86,82,96,132,113,81,73,78,86,98,88,71,98,105,85,61,91,60,47,94,104,106,121,60,89,103,137,129,101,82,88,73,136,169,165,119,62,9,15,16,19,26,7,43,2,33,14,31,18,22,22,19,12,27,51,28,9,21,18,9,24,86,75,76,50,73,88,74,57,75,113,112,136,120,182,245,239,157,65,76,41,41,27,37,13,6,24,33,52,85,88,105,105,113,110,98,96,105,83,109,95,73,66,65,66,45,76,93,93,90,98,89,91,107,105,80,118,108,77,115,95,90,99,113,113,123,130,130,135,129,82,87,105,63,90,74,66,91,62,80,103,112,85,107,92,97,79,84,48,72,64,71,76,66,87,73,86,102,105,117,78,68,81,91,82,89,100,88,138,99,102,55,50,86,84,82,113,102,85,76,87,62,73,79,109,84,82,69,74,67,90,82,84,101,106,103,90,83,66,61,50,72,101,90,113,68,96,90,85,117,126,137,105,81,91,115,106,122,137,134,123,120,121,152,149,172,115,116,88,91,118,117,85,84,93,90,59,80,62,65,51,77,76,37,54,73,66,65,55,94,113,77,80,106,67,95,65,87,108,147,131,93,36,40,15,44,46,4,15,46,77,84,161,162,53,66,122,124,136,158,135,121,103,57,94,84,100,126,109,81,83,102,81,74,79,50,78,87,86,76,79,98,50,81,70,58,91,51,65,65,92,119,116,57,56,74,148,179,102,144,222,207,135,132,209,196,218,133,81,156,75,17,29,29,101,140,114,138,154,141,168,167,155,178,187,82,64,29,24,26,24,52,76,30,36,52,32,46,41,41,36,38,49,38,28,39,37,41,34,43,50,38,28,24,32,50,13,34,37,34,39,31,32,49,28,39,40,40,91,112,78,70,53,9,55,124,132,124,105,87,80,110,53,22,60,33,42,56,94,99,100,91,99,107,127,134,109,115,87,104,99,88,114,93,82,105,98,110,102,84,107,108,113,103,111,70,100,89,78,79,120,215,144,72,90,87,90,108,116,126,77,59,42,27,12,10,24,43,76,103,119,86,105,123,129,127,71,78,92,134,96,94,77,80,82,115,57,45,10,1,29,34,52,16,47,27,76,34,36,48,36,30,46,39,50,51,79,40,68,55,47,50,62,46,42,48,52,53,48,39,29,54,31,41,29,9,25,25,8,18,33,58,48,36,59,41,59,54,90,54,47,50,52,53,40,56,45,49,60,29,34,45,29,40,42,39,22,41,44,33,20,19,42,30,32,25,15,11,23,44,44,19,33,45,23,5,24,48,47,45,62,40,60,13,38,58,50,51,49,35,46,45,18,30,37,32,28,25,18,47,42,26,60,69,46,20,10,21,53,33,54,27,40,51,36,58,46,10,15,5,25,18,32,4,7,31,17,28,11,33,38,35,30,43,39,45,47,39,26,5,35,36,25,51,17,13,48,44,61,31,14,32,44,40,29,47,40,34,26,28,52,59,30,20,51,64,40,19,20,1,10,20,21,29,38,29,43,30,18,31,75,38,41,63,57,53,44,40,42,44,27,52,30,32,36,16,62,24,57,44,38,34,34,51,19,12,37,76,134,122,123,127,123,147,115,129,122,107,106,114,139,155,119,107,81,141,112,87,95,125,63,91,90,117,108,103,89,99,113,116,126,136,100,56,49,70,79,46,44,52,74,63,71,44,66,107,120,122,87,76,96,74,154,165,166,161,110,25,1,9,18,24,24,12,14,2,11,13,34,45,16,19,21,20,13,26,42,11,6,16,9,86,84,35,40,70,55,88,80,81,94,101,93,73,179,239,247,119,56,100,37,36,24,19,39,35,38,32,41,65,83,92,101,78,61,66,81,105,83,85,92,90,89,84,63,48,59,48,47,73,81,89,88,94,90,70,113,97,113,84,107,100,104,80,99,70,100,69,91,93,103,83,109,75,92,107,78,79,82,74,94,111,91,108,85,51,88,56,111,73,64,115,100,97,103,106,117,80,89,60,68,70,77,98,100,99,97,113,132,119,129,102,88,98,79,87,109,97,103,91,87,100,66,74,91,98,111,75,41,58,94,91,72,72,97,76,115,66,86,81,79,81,107,123,62,67,102,102,134,138,125,173,137,106,93,107,94,82,104,142,135,145,166,159,116,159,157,144,106,76,79,127,94,79,97,63,62,56,76,59,80,73,63,89,59,45,70,81,82,98,93,69,81,53,31,49,55,60,88,113,95,77,81,44,20,35,28,45,31,71,97,90,187,119,66,93,91,152,121,102,125,83,80,109,102,127,93,120,99,93,83,90,63,78,72,66,71,113,55,46,60,99,84,53,39,42,47,54,25,35,70,105,96,52,88,62,88,122,142,214,238,158,94,167,246,226,169,16,78,130,61,24,23,23,83,110,29,75,59,93,91,81,103,111,34,39,35,23,27,38,72,63,66,51,56,34,56,67,43,60,54,80,43,28,17,57,57,37,20,32,44,36,27,15,26,40,48,43,31,26,35,20,31,34,21,49,36,34,69,114,78,69,36,42,31,86,128,140,121,146,140,72,29,21,34,55,50,97,103,102,134,98,86,108,98,108,110,99,97,87,109,86,60,97,101,98,88,77,86,89,94,108,87,94,121,99,88,81,83,67,89,187,179,92,86,92,107,150,123,75,15,37,2,17,12,53,44,91,108,106,91,115,120,124,146,84,74,99,74,81,75,101,104,88,79,83,27,22,9,16,17,40,55,46,43,57,65,56,38,30,47,31,44,53,40,35,30,45,43,54,36,38,51,30,44,41,36,48,9,68,49,45,40,30,30,33,25,36,13,37,54,47,43,62,19,29,27,55,11,34,37,48,22,25,23,28,40,37,60,52,67,30,36,65,32,40,39,20,12,20,21,41,53,53,40,28,9,25,21,13,42,22,31,22,38,24,26,34,47,50,45,48,31,36,51,40,14,57,39,28,22,26,65,49,36,42,41,53,44,22,21,30,26,81,105,37,17,30,28,38,23,47,31,47,31,52,30,20,20,28,23,25,27,10,23,15,31,30,9,7,8,36,17,33,43,25,29,40,28,22,12,60,43,26,34,38,26,42,44,40,38,35,26,9,27,35,23,23,23,24,59,50,46,8,17,21,18,36,13,39,34,24,24,34,65,25,20,34,26,40,74,43,70,69,73,30,46,42,24,49,58,33,53,51,28,17,63,21,12,27,59,15,10,21,12,31,20,33,83,88,98,106,110,88,81,89,99,109,120,95,105,110,95,101,88,82,105,70,68,114,100,65,119,108,104,94,72,115,102,108,118,113,62,71,45,73,73,55,44,27,64,55,75,86,85,103,123,128,118,122,106,98,134,187,161,139,71,19,4,15,2,45,30,23,7,30,27,0,14,13,1,23,0,54,16,7,12,10,28,2,27,78,59,84,70,94,93,96,94,102,116,77,112,98,189,241,251,117,67,71,13,34,33,50,28,48,44,26,75,51,81,100,107,101,60,55,82,91,90,55,71,64,81,113,73,95,67,57,52,69,65,68,90,65,87,107,106,116,98,93,114,121,92,103,79,57,69,67,70,118,96,131,155,85,69,94,74,102,106,86,90,117,104,75,77,93,118,105,85,108,104,102,101,85,99,100,99,95,75,77,74,86,81,41,86,107,112,135,150,131,135,132,140,143,93,90,102,75,82,71,84,117,81,52,70,95,111,59,71,53,72,81,115,90,97,128,103,67,59,68,101,109,102,102,75,99,102,122,112,90,101,111,124,91,74,91,63,54,65,93,136,139,129,136,117,156,166,144,107,77,76,102,125,78,89,75,63,44,75,76,70,76,65,51,47,104,113,70,56,85,75,51,45,100,75,68,34,50,94,112,109,138,92,114,63,82,91,68,113,113,112,73,135,118,71,58,112,124,102,107,78,72,85,92,97,103,96,87,77,66,80,95,57,63,69,44,91,53,92,73,60,94,72,53,49,68,72,66,44,41,19,64,54,45,62,71,90,158,229,245,130,80,102,171,226,190,76,20,25,136,91,96,101,125,121,54,14,6,23,4,34,22,17,12,29,34,29,35,47,74,39,69,45,27,15,31,47,49,81,46,17,83,32,41,30,54,33,26,55,32,6,40,58,11,41,45,51,36,57,42,17,17,40,60,40,58,46,49,69,96,90,70,41,36,44,51,102,172,149,96,62,15,23,50,57,78,96,110,94,93,82,89,78,103,125,103,101,80,93,70,118,98,105,108,92,95,112,90,105,95,92,104,75,105,139,94,91,113,119,95,107,133,207,128,86,114,97,91,54,22,19,23,21,43,36,83,97,88,99,93,86,126,91,112,98,115,92,84,79,76,79,90,66,52,69,50,31,21,8,28,40,29,36,43,61,68,38,30,57,56,63,45,48,42,23,82,29,47,51,26,59,59,20,63,48,22,38,26,50,55,60,52,37,18,39,24,31,13,10,19,40,40,55,45,56,22,36,20,60,18,25,38,33,29,45,13,51,54,57,22,34,37,39,2,62,32,15,10,30,28,16,36,28,20,56,16,9,25,25,13,8,23,6,0,19,25,38,20,35,7,14,28,50,41,52,12,55,58,32,53,28,22,27,32,28,13,29,19,26,47,14,27,11,68,78,69,62,41,40,6,32,26,40,55,42,35,35,23,24,26,46,14,6,15,8,21,26,39,22,33,60,35,48,20,34,38,14,46,11,23,29,31,24,13,12,60,54,54,38,40,26,17,23,15,3,25,6,37,67,45,51,36,37,50,29,0,32,29,20,30,29,21,30,15,37,65,34,54,14,38,33,18,35,57,68,69,50,27,43,57,41,29,44,38,24,16,22,31,44,24,40,16,20,27,10,50,30,70,76,72,93,58,92,74,67,80,82,79,104,90,111,131,68,90,64,95,83,83,91,120,126,98,100,116,77,113,85,58,74,57,104,71,85,74,26,71,35,38,30,9,96,82,98,74,128,98,121,134,106,124,119,108,113,151,157,129,64,29,0,3,21,21,10,19,10,7,28,6,4,6,6,18,1,1,3,11,2,31,15,11,22,71,54,85,71,97,98,87,127,117,125,121,115,107,187,230,243,123,45,67,5,40,14,45,34,23,35,28,81,70,106,82,102,94,44,71,61,90,88,79,42,59,62,61,69,69,71,65,81,62,76,93,92,112,97,93,110,115,125,112,129,94,75,81,72,79,79,78,98,86,121,158,133,110,98,122,100,118,141,108,125,129,67,74,59,124,120,110,100,110,94,137,120,98,76,81,88,89,66,108,105,72,114,71,80,89,121,147,142,124,128,155,138,146,150,132,123,152,117,135,112,87,149,94,78,110,99,96,107,102,85,124,95,90,114,93,163,74,67,87,84,104,75,81,78,65,103,84,100,113,86,113,102,81,88,96,78,100,82,94,93,121,153,133,107,120,123,109,88,79,81,129,140,136,93,66,74,60,52,43,61,88,77,61,80,116,113,122,79,92,109,80,117,129,127,99,83,65,107,127,113,126,135,145,131,93,134,127,130,134,123,85,102,89,105,133,117,132,138,105,89,49,87,74,53,60,54,88,71,84,85,98,79,90,71,78,127,95,92,82,70,53,67,82,123,99,90,54,82,58,54,62,27,77,49,58,131,249,229,144,34,31,43,89,143,124,45,1,38,78,115,120,107,100,117,85,45,78,89,48,55,27,65,94,48,58,63,78,77,48,39,67,44,36,51,42,36,36,43,41,47,58,65,47,47,54,45,49,34,44,27,46,24,43,36,43,45,42,51,43,20,72,55,55,69,39,52,70,84,93,88,78,63,65,31,38,38,97,74,49,29,48,88,74,105,97,83,122,97,101,110,94,88,109,114,97,105,121,76,73,70,98,101,93,74,111,108,106,88,102,86,106,104,124,148,97,90,105,137,142,48,58,176,159,113,62,16,8,19,24,21,18,36,70,81,106,90,125,106,129,85,105,119,107,103,108,49,67,74,78,60,79,111,107,77,68,15,3,12,10,30,43,41,43,37,27,55,28,19,37,21,60,53,2,48,38,95,49,57,35,22,34,18,16,15,48,47,35,35,47,30,15,44,58,66,2,18,14,37,16,46,21,29,27,25,34,32,38,41,51,32,29,41,45,32,53,62,36,68,31,49,29,39,27,13,53,19,28,24,36,37,35,27,42,41,23,1,31,20,15,14,39,16,27,17,27,52,26,33,32,21,32,13,41,60,12,18,14,37,12,38,28,47,18,34,28,32,44,43,25,49,45,49,25,86,80,66,26,31,43,14,6,22,23,48,26,45,19,48,32,36,21,26,30,25,36,33,22,10,26,19,19,17,42,46,13,15,21,36,44,38,51,41,36,49,56,66,48,52,24,30,5,18,46,27,18,13,14,54,33,46,37,63,43,24,24,28,5,9,11,15,23,13,21,46,43,41,33,42,53,43,25,34,44,28,42,59,15,55,50,45,43,63,45,29,52,40,37,57,36,38,31,5,29,21,51,63,52,52,64,58,58,41,56,89,83,49,81,112,129,101,93,58,62,96,92,89,91,125,118,144,110,98,111,99,99,68,39,45,59,89,62,43,38,53,48,46,33,38,101,122,99,108,92,117,110,102,72,106,113,95,82,90,87,85,75,61,23,10,14,18,12,10,17,24,15,15,37,17,21,7,7,16,2,22,8,20,18,26,5,10,59,35,78,84,83,98,96,108,136,126,133,137,159,209,216,225,122,42,81,33,46,36,31,22,35,41,52,68,66,85,89,77,77,59,66,72,82,85,85,82,25,39,53,50,79,56,75,82,77,78,64,94,162,104,87,83,128,100,112,102,94,90,96,82,118,148,118,149,115,135,144,96,96,115,110,107,110,159,123,91,120,94,72,40,108,107,79,98,101,110,144,197,67,54,86,130,118,89,109,114,113,119,77,107,140,153,135,107,125,135,92,109,136,147,137,132,145,124,147,96,152,179,139,114,116,91,87,121,105,121,102,104,106,97,150,181,71,53,92,89,126,93,101,96,63,84,88,133,175,160,114,129,99,165,153,137,138,152,137,150,165,153,157,148,145,146,139,97,94,74,125,162,114,130,98,72,47,56,52,68,54,66,65,118,113,114,81,87,64,107,119,129,136,119,142,139,105,88,116,128,147,107,147,124,114,157,127,121,151,110,77,100,140,102,151,118,91,120,100,113,101,84,114,90,97,96,84,80,64,112,109,79,56,71,92,92,81,66,62,58,75,49,41,110,149,84,53,83,55,102,128,97,77,82,5,100,192,206,170,35,0,70,44,70,43,20,15,45,88,88,105,93,98,85,119,153,96,103,97,88,69,161,142,105,97,81,83,35,27,34,44,43,31,45,45,48,60,48,66,34,31,38,32,41,44,26,27,27,62,34,39,53,43,36,42,26,26,42,12,37,25,43,60,47,36,62,70,86,105,83,58,26,39,35,68,51,52,50,33,53,53,84,64,104,124,80,119,75,102,77,96,83,117,77,96,91,100,87,101,57,87,101,88,109,97,116,107,95,122,105,112,134,117,122,55,39,92,122,65,27,28,111,184,104,30,10,18,16,22,10,27,45,95,101,140,116,106,93,105,116,114,89,95,106,54,83,64,67,75,61,69,67,78,83,73,38,25,2,21,24,41,38,33,52,24,43,29,45,33,29,53,32,53,19,16,36,21,62,30,31,28,54,20,30,53,47,30,32,24,43,8,39,51,40,32,12,13,25,22,13,38,39,21,49,25,46,29,87,17,20,33,35,29,29,32,77,52,41,46,37,44,69,32,40,41,56,40,47,33,25,31,29,54,21,24,46,21,22,37,43,18,29,23,29,61,30,50,42,22,30,32,19,44,29,24,45,11,46,13,9,22,40,17,19,22,31,19,30,19,15,20,43,7,19,59,65,62,20,55,32,34,28,31,56,12,13,25,41,33,27,45,6,7,0,20,10,34,19,26,26,15,38,52,27,33,16,30,44,45,49,46,56,75,41,38,54,44,54,37,3,42,30,22,34,22,47,26,41,55,48,44,63,26,38,16,22,23,14,12,4,36,19,38,52,43,42,47,40,8,64,17,34,31,10,48,38,33,57,44,45,85,57,50,45,53,42,43,46,30,49,51,15,7,25,27,22,38,52,63,58,85,44,55,78,90,87,52,108,118,111,110,59,70,72,108,134,96,98,108,131,118,96,115,83,99,108,125,109,84,126,119,82,89,67,105,87,85,82,108,152,121,146,102,133,99,119,103,81,91,106,91,105,76,66,93,86,29,3,5,27,9,18,24,7,0,24,13,27,24,25,37,3,12,4,20,34,16,5,9,0,65,27,65,45,95,79,93,114,137,124,124,137,129,203,227,243,89,51,59,21,40,26,27,26,12,36,27,53,83,119,85,59,30,48,51,38,81,91,111,121,70,36,37,57,82,81,84,51,39,71,53,79,185,99,66,98,130,111,94,114,94,120,86,99,110,135,133,128,103,103,152,89,85,77,88,100,102,174,105,78,86,84,76,55,83,91,80,67,94,80,180,213,66,63,82,142,129,88,62,72,92,110,104,115,136,110,115,86,69,127,53,76,124,147,146,125,111,112,106,110,95,212,147,94,118,96,93,83,83,84,106,77,69,49,155,182,69,47,100,110,118,88,134,180,160,92,107,171,232,170,145,140,156,207,181,155,158,164,173,185,183,174,199,202,221,153,178,129,119,121,122,115,95,88,81,74,80,74,80,80,39,56,81,99,111,82,56,56,108,86,103,132,106,126,145,149,141,116,107,120,92,130,119,122,112,140,106,125,123,119,92,124,132,100,100,92,104,101,115,121,131,166,143,132,115,125,95,66,48,76,104,107,79,107,79,95,91,113,59,75,95,90,62,67,123,96,70,26,16,53,101,109,110,91,33,15,35,141,187,28,9,68,92,123,47,22,93,93,114,125,85,68,101,73,97,98,110,116,91,109,87,120,92,116,97,57,65,54,68,27,55,50,48,47,44,47,57,73,51,33,23,46,30,44,37,57,37,29,46,44,56,41,31,29,50,42,37,19,48,32,53,55,57,82,59,77,86,98,116,87,67,49,45,56,35,54,51,46,89,98,99,112,110,107,128,92,107,123,100,91,92,87,112,100,93,87,88,73,101,99,99,99,89,105,97,126,78,61,101,115,131,120,79,61,27,58,53,83,41,11,6,55,140,105,58,44,18,24,35,35,66,82,108,111,74,98,131,114,88,81,99,99,73,47,64,54,51,73,73,86,64,72,83,69,53,37,27,15,24,35,59,56,30,36,48,29,35,46,46,25,35,21,8,34,29,43,20,63,27,39,19,49,18,52,62,21,53,12,56,34,34,42,59,47,13,40,4,24,33,39,49,30,44,58,46,29,42,33,17,37,44,23,67,51,36,24,19,47,65,59,61,46,41,49,48,54,43,46,32,45,53,37,31,32,16,13,57,26,30,27,31,24,43,21,22,49,11,10,26,22,36,34,39,7,10,40,44,8,43,23,23,38,8,28,40,8,38,14,33,7,12,45,46,17,54,84,67,49,29,39,28,37,13,28,30,30,25,17,22,11,8,34,25,31,6,43,21,10,18,27,16,18,21,23,17,25,55,52,36,24,42,25,33,38,36,20,60,30,26,25,17,28,57,35,38,34,18,79,40,13,51,45,3,15,28,12,6,22,14,20,10,30,44,55,46,31,38,53,38,31,27,3,3,16,38,35,32,39,14,60,43,44,54,54,44,49,45,23,29,39,0,40,29,32,37,45,43,87,32,40,61,67,76,103,104,82,78,94,94,80,90,56,63,53,91,97,86,71,62,87,97,116,127,160,148,165,183,176,158,141,132,145,164,163,138,149,142,142,146,163,131,129,145,169,124,153,137,149,144,113,131,127,114,128,146,130,5,5,39,18,33,11,8,0,7,15,32,3,28,0,11,1,9,6,42,18,1,32,26,17,94,72,33,34,58,80,81,139,140,120,97,122,139,226,245,238,83,43,44,4,47,16,29,39,27,32,27,63,84,127,130,93,56,41,64,53,70,95,148,142,92,68,35,69,75,88,74,47,34,33,89,99,182,133,84,125,129,140,86,83,75,106,92,91,125,82,74,76,70,118,100,62,57,60,64,63,117,162,113,59,111,102,91,115,111,108,89,82,96,92,157,198,63,50,89,161,127,72,85,71,84,106,92,97,119,100,117,92,102,174,58,64,99,107,99,95,109,98,98,102,115,170,81,98,90,92,98,102,71,107,105,83,114,93,176,193,72,78,89,105,109,73,181,226,215,112,95,174,165,141,146,135,184,156,102,140,130,157,148,166,137,168,169,141,107,88,103,111,81,101,75,82,68,51,34,58,59,68,54,54,52,43,33,54,90,75,66,81,73,64,45,68,90,94,119,91,125,119,116,101,106,116,108,112,125,132,135,101,130,94,98,112,140,147,149,150,163,140,140,136,144,135,129,124,92,125,121,105,97,79,77,84,62,91,92,69,81,73,80,61,71,83,52,122,144,118,116,79,56,81,134,144,95,75,78,88,47,43,122,47,21,99,155,152,93,115,129,118,114,117,104,114,119,120,113,130,125,126,132,105,123,113,90,74,35,44,30,59,73,48,66,48,52,59,45,42,70,46,44,57,65,68,43,32,32,35,58,47,44,19,38,41,37,23,49,37,56,49,54,51,47,56,92,57,64,69,74,77,83,121,114,73,33,33,58,50,41,49,68,124,139,131,119,94,61,69,84,113,67,78,100,123,89,110,99,89,96,117,102,93,99,91,82,95,115,93,83,106,109,92,81,58,33,20,18,68,25,41,51,30,25,78,153,121,109,36,28,44,27,73,78,107,119,88,85,94,69,79,110,109,92,73,87,68,66,69,60,63,71,45,94,93,83,85,37,36,15,31,40,52,38,47,37,25,29,38,25,16,51,48,44,22,23,44,48,43,32,30,31,26,35,36,26,42,19,33,21,5,13,40,35,44,41,23,12,18,21,33,37,55,43,28,70,49,44,14,29,46,50,37,31,53,30,46,27,41,39,46,47,37,59,50,45,43,60,37,52,45,23,31,43,50,39,10,24,20,15,21,8,11,9,27,43,43,5,33,24,41,14,18,41,31,62,8,53,41,27,51,43,52,19,34,19,51,31,15,39,12,30,29,18,25,31,26,6,44,107,60,39,31,33,11,26,30,48,37,12,10,47,23,27,26,4,22,38,21,20,33,31,31,43,16,37,30,28,15,38,32,65,37,32,33,41,44,59,47,31,30,35,18,31,28,17,38,32,37,41,32,47,23,33,40,21,27,23,44,2,12,10,22,14,34,29,36,47,27,34,29,58,56,41,38,37,10,17,40,15,25,41,29,65,46,49,11,71,53,36,48,52,21,46,17,28,24,51,59,52,50,50,44,48,61,62,73,53,70,69,76,55,80,84,60,51,33,52,34,98,13,36,36,53,116,151,162,168,160,199,176,178,208,201,222,173,199,199,180,186,188,180,174,161,183,168,171,168,174,185,177,156,147,168,145,180,156,127,98,14,22,20,33,2,8,24,18,24,33,13,33,22,20,6,14,39,6,21,26,22,31,46,18,172,147,112,62,39,67,91,102,145,139,108,94,108,198,243,224,96,46,41,0,51,47,20,21,22,29,33,63,93,144,229,219,160,115,88,104,146,173,224,231,152,147,116,98,79,103,114,103,62,64,92,105,206,110,101,87,111,100,72,97,94,93,86,95,77,71,77,95,96,129,157,103,73,67,99,78,105,178,154,138,144,192,163,171,160,181,185,173,149,160,212,214,123,84,101,166,149,136,138,161,158,162,166,134,154,146,142,139,212,250,142,68,70,81,91,68,102,74,101,74,126,166,126,169,165,132,183,149,161,168,168,162,206,183,247,251,148,121,112,103,137,100,190,237,182,131,118,124,113,111,92,115,123,87,70,52,90,88,74,61,63,47,49,29,5,1,6,14,13,11,40,13,17,16,27,13,12,29,20,32,18,15,34,19,35,9,28,24,26,34,17,36,22,54,69,111,117,77,114,106,83,93,127,131,127,120,70,97,120,106,141,118,104,171,154,151,169,140,146,118,120,131,154,103,118,110,142,106,111,69,67,66,71,64,78,85,52,68,9,44,74,83,58,123,125,98,103,92,125,139,143,110,78,101,128,122,60,82,154,98,39,102,141,109,120,124,126,110,147,115,104,129,122,113,132,120,104,124,96,129,102,118,49,37,111,72,73,75,90,45,69,33,50,52,33,59,52,59,51,72,54,70,37,35,63,38,30,59,36,44,39,47,32,24,48,40,42,33,67,54,68,100,101,88,94,88,67,86,123,167,148,62,49,58,56,64,48,31,20,77,132,115,95,93,104,81,88,98,104,82,100,120,92,90,78,75,109,77,85,100,84,87,124,95,102,103,104,104,79,29,33,35,32,41,34,17,11,13,45,46,15,142,242,130,111,54,15,74,75,101,119,91,83,67,68,39,65,75,69,62,68,79,76,73,71,70,67,74,91,70,97,93,73,55,22,14,13,26,43,38,66,51,28,52,59,57,41,24,74,43,31,39,22,59,39,47,14,51,20,57,22,40,24,28,48,55,17,39,36,35,60,40,25,25,36,12,11,15,31,46,18,81,39,30,28,69,43,43,59,25,18,45,39,59,47,26,14,30,36,46,60,72,45,42,25,94,38,49,37,52,33,66,54,14,52,24,13,45,11,6,32,16,26,22,29,29,51,51,30,40,55,9,50,26,48,36,21,22,40,6,36,47,37,11,16,16,19,30,16,6,4,26,34,24,21,22,69,89,66,27,18,27,45,23,27,47,34,59,41,26,25,46,15,21,19,24,22,39,33,22,56,14,40,35,7,14,30,43,41,38,38,14,50,33,49,37,41,26,57,39,32,31,18,45,23,21,40,27,51,33,37,9,31,41,45,30,9,21,37,21,23,35,35,33,45,50,47,50,66,51,33,62,48,30,31,19,24,38,36,38,34,37,42,34,41,40,38,51,32,35,19,20,39,36,58,83,88,64,53,32,70,30,68,71,56,9,7,43,65,55,60,56,46,55,53,106,113,107,35,10,34,145,205,169,215,199,213,217,239,226,240,207,219,215,203,214,216,179,179,193,206,203,230,203,194,185,193,196,201,178,202,196,191,195,220,119,9,3,14,31,13,24,35,16,11,21,18,2,18,0,15,35,26,30,1,24,0,21,23,25,232,228,223,110,95,95,109,92,144,134,112,110,105,202,244,236,94,108,84,24,40,36,61,55,15,51,27,45,75,165,229,237,213,127,125,135,206,242,252,254,183,169,145,150,149,138,176,155,133,146,130,179,245,183,123,99,137,137,136,129,145,125,145,155,155,155,148,155,157,203,222,159,104,173,201,158,157,208,174,209,195,193,195,186,199,179,212,204,208,200,204,193,159,157,139,179,190,211,210,210,185,203,213,194,217,175,211,190,183,195,153,129,81,80,70,73,79,45,89,81,119,164,204,206,213,182,186,200,180,211,189,185,217,204,192,199,173,150,107,135,100,62,111,131,155,127,79,83,78,8,17,7,58,4,0,14,15,22,48,11,61,145,106,64,35,24,25,7,29,46,20,14,17,10,35,2,9,16,0,18,7,26,26,2,5,3,61,18,15,32,26,2,0,25,44,41,83,95,94,106,137,135,141,124,134,98,95,101,127,119,128,156,113,147,146,146,139,123,100,76,125,96,108,137,148,142,124,107,89,119,90,71,62,113,69,91,72,42,38,31,80,70,39,84,120,112,100,98,98,161,168,116,53,75,98,89,80,47,143,149,69,98,103,67,58,66,94,121,84,94,79,71,80,132,89,82,87,63,104,104,104,75,71,41,101,100,131,83,101,77,28,41,44,51,53,26,64,48,33,59,54,45,42,59,54,43,44,30,60,55,39,39,43,22,33,41,27,36,61,91,138,166,119,123,81,127,116,119,135,96,77,13,30,46,46,54,83,40,52,70,44,107,110,90,69,75,85,79,108,75,91,113,95,107,100,93,97,81,101,92,101,117,89,90,70,109,116,65,98,19,31,24,15,34,39,19,25,36,33,61,44,150,185,122,158,52,60,75,92,132,99,53,76,80,70,69,65,77,79,56,65,84,72,89,71,50,110,107,94,89,120,65,55,23,6,20,11,5,51,16,52,31,36,49,35,52,41,37,42,31,19,28,30,57,14,24,57,31,26,12,16,29,23,31,26,19,32,30,34,27,35,39,24,3,3,10,9,18,36,30,34,20,40,27,28,31,42,28,35,29,47,54,32,26,59,28,38,30,51,50,48,36,59,48,33,46,46,67,24,60,56,4,26,27,9,27,8,23,27,27,25,44,52,54,38,23,40,25,55,50,61,17,37,33,34,50,51,32,32,19,31,23,37,41,30,37,20,10,29,44,13,41,7,8,14,21,54,105,68,56,15,14,7,39,32,45,34,29,29,2,34,41,31,20,31,14,43,34,33,50,30,20,8,48,27,28,29,20,50,43,41,54,40,34,31,46,53,26,50,16,20,33,35,60,29,49,16,41,48,38,42,37,20,20,41,24,32,39,30,21,37,27,44,32,37,48,30,54,23,50,45,12,22,32,49,17,3,37,52,34,41,30,51,73,29,45,60,67,55,44,9,24,31,47,81,91,74,80,36,74,35,61,47,30,22,39,45,21,45,52,50,62,104,91,107,80,143,122,47,60,46,186,213,193,210,197,215,225,205,240,213,173,215,200,238,228,219,192,201,207,205,201,201,202,213,225,193,197,252,219,212,208,205,217,218,118,32,9,12,16,3,12,10,25,26,29,26,16,14,0,13,26,26,6,5,9,27,25,22,11,234,239,237,156,113,150,103,120,146,156,163,166,136,188,250,218,89,155,152,47,32,22,22,42,52,30,47,30,89,122,141,249,219,142,169,156,229,236,225,223,174,204,179,190,187,198,190,193,192,186,181,211,217,160,160,140,170,200,186,196,196,194,180,214,202,193,197,187,208,224,240,195,219,232,243,231,220,213,189,186,203,189,200,239,219,201,204,210,177,129,84,104,117,199,184,224,179,210,223,207,180,181,204,184,182,189,151,158,116,77,133,176,107,79,76,93,71,50,89,95,165,181,209,212,203,224,194,178,189,179,209,202,204,154,72,97,138,172,111,107,71,71,106,122,132,104,82,83,32,24,40,30,34,98,98,146,172,248,243,229,236,249,244,240,232,238,212,247,215,226,224,199,238,212,192,213,235,192,224,206,195,193,187,175,169,148,151,142,174,176,171,176,120,134,104,51,86,71,109,140,126,117,128,137,99,115,77,101,76,81,133,147,145,122,132,113,127,95,105,86,103,67,122,151,109,115,133,126,116,115,106,112,93,85,70,73,54,70,81,80,90,76,68,52,70,122,95,46,72,101,110,78,104,81,66,52,56,61,133,160,73,61,64,61,42,39,49,57,70,81,91,78,65,75,91,61,36,71,50,60,35,58,17,58,33,67,100,74,89,126,103,97,103,93,90,109,78,72,70,69,52,60,49,36,29,55,45,16,56,24,45,21,53,52,49,56,55,67,55,97,140,144,103,104,104,129,131,102,44,21,22,23,65,73,66,66,55,78,45,40,54,60,73,73,76,102,91,82,98,88,74,90,68,111,101,83,96,110,85,105,133,88,105,91,97,87,68,93,116,87,42,39,27,46,36,18,33,34,27,30,34,79,80,57,133,91,45,66,61,89,62,53,50,24,61,68,90,67,82,51,86,86,76,81,78,75,94,87,94,97,39,14,41,24,1,32,25,20,42,41,22,44,22,50,27,39,33,10,29,46,44,22,52,36,25,31,28,16,40,58,78,53,31,12,44,45,39,46,21,49,25,24,13,44,31,4,8,20,38,24,40,46,31,24,19,22,32,23,36,29,32,37,40,63,39,34,32,41,54,35,53,40,64,53,53,16,23,31,59,31,43,63,26,43,1,7,1,28,24,24,43,27,35,60,40,39,41,39,59,34,22,41,45,26,32,47,55,29,43,50,20,23,42,54,17,27,28,24,44,23,26,39,11,5,22,46,40,44,65,36,44,20,45,38,37,18,49,46,8,4,21,13,44,26,6,26,33,24,47,33,25,33,9,19,10,26,33,49,32,35,34,30,40,72,27,32,29,25,44,56,46,40,34,34,42,41,20,58,43,30,35,43,16,11,21,40,38,20,30,34,19,49,35,47,42,30,36,41,23,35,35,43,45,37,52,41,4,12,13,34,26,7,10,30,9,49,43,51,65,35,41,17,21,60,123,93,95,99,70,113,96,92,73,76,89,108,88,60,36,50,26,49,64,66,103,119,132,104,99,65,131,239,204,215,229,206,224,227,191,206,198,193,219,205,192,205,215,192,200,181,215,213,225,208,211,229,188,227,229,205,223,176,216,202,226,97,23,21,11,0,29,5,14,6,0,31,17,12,29,18,12,19,3,2,8,14,35,34,40,12,234,245,246,150,96,156,150,161,172,191,250,249,209,231,241,233,106,180,119,24,63,5,34,10,13,39,23,55,93,144,136,156,149,129,149,138,195,224,193,162,145,159,193,189,197,225,205,195,193,198,168,115,108,139,172,180,192,196,205,213,198,200,183,203,195,196,193,190,184,73,98,138,227,247,255,235,178,180,186,192,173,183,227,250,188,174,176,181,130,39,9,41,92,170,218,201,194,187,201,223,176,180,176,205,172,177,182,82,57,30,63,147,162,134,27,85,77,118,133,172,180,195,190,193,187,188,183,186,169,196,195,183,177,78,44,69,103,174,150,135,69,81,111,129,143,115,122,162,229,229,240,228,249,245,254,250,255,230,247,254,231,236,252,254,255,255,249,232,254,247,252,239,255,248,255,252,233,243,234,237,242,253,246,251,253,249,246,234,234,242,250,229,255,249,233,172,135,88,98,117,99,59,93,95,106,86,108,102,72,67,148,100,124,121,118,97,126,130,94,140,134,107,98,134,136,131,147,139,101,82,117,82,92,74,45,44,51,75,85,53,69,46,25,77,64,75,82,54,79,107,88,107,77,85,118,81,70,52,101,147,65,27,46,9,29,26,39,76,76,62,61,58,54,67,81,77,74,51,47,81,46,69,57,19,54,24,44,74,74,105,127,124,113,124,156,146,136,126,71,25,44,43,68,11,67,45,33,47,50,17,44,28,40,40,60,61,78,94,72,35,118,141,113,139,122,116,69,27,38,27,49,65,131,119,72,44,47,31,57,28,28,51,50,58,76,83,88,108,108,95,116,99,99,82,69,85,119,115,94,115,102,107,122,69,63,72,78,112,117,103,72,60,35,45,19,52,27,37,25,40,29,48,46,14,97,51,58,34,27,49,77,51,63,70,76,71,78,52,77,73,82,49,71,77,97,69,76,77,82,29,26,41,10,29,23,33,51,62,36,43,50,28,28,19,61,26,28,16,29,38,39,37,55,34,30,23,29,18,40,40,32,31,57,51,27,34,26,44,30,42,39,24,6,2,2,54,9,37,22,44,25,51,29,45,29,61,50,32,44,22,37,34,49,40,14,46,38,43,35,43,44,36,18,55,48,20,54,46,57,40,41,31,37,5,35,12,20,24,17,35,25,38,34,46,30,55,25,39,54,38,38,52,35,48,31,48,46,36,28,21,20,36,56,71,20,34,54,29,42,44,34,27,39,66,21,38,32,27,54,69,86,38,39,44,24,44,27,24,32,7,19,25,2,17,24,10,17,19,7,44,16,12,14,17,39,43,48,54,21,40,46,61,43,31,41,60,38,43,42,28,65,9,46,45,26,45,44,50,55,38,28,19,17,34,23,3,7,28,3,22,27,34,54,24,30,18,35,23,38,44,49,34,27,23,29,41,16,39,25,23,32,35,24,58,48,42,35,43,44,56,29,77,150,172,152,135,114,114,149,161,136,149,129,149,151,159,128,166,105,93,85,36,58,64,53,84,104,110,127,103,192,238,219,223,211,207,227,202,179,187,183,221,211,172,180,214,205,193,205,178,201,215,193,219,199,181,207,209,197,207,185,213,214,209,183,129,10,4,16,5,9,18,0,17,5,3,1,10,19,15,10,12,22,16,22,14,16,28,37,1,249,233,233,130,113,140,164,180,205,224,249,241,242,240,253,225,100,161,86,20,37,14,34,21,25,44,34,54,64,110,101,72,73,93,102,115,126,130,150,152,201,181,144,214,177,203,168,169,194,158,132,17,3,78,146,203,219,196,168,185,192,178,163,157,178,179,157,178,69,4,28,60,196,226,227,232,157,145,154,163,162,193,212,211,186,169,156,150,61,1,12,23,22,134,179,202,219,159,176,175,180,183,217,164,180,172,117,52,38,38,45,118,171,148,77,68,81,119,171,203,199,170,193,181,168,179,186,147,174,200,168,168,103,27,54,19,44,86,137,129,83,31,47,81,101,107,171,247,244,255,236,254,244,248,252,232,255,254,250,230,253,247,245,244,243,240,240,247,255,251,252,250,228,230,242,243,243,255,254,239,239,249,248,255,254,255,240,246,249,255,224,254,243,238,229,233,200,121,93,107,120,122,108,149,134,85,115,126,104,114,119,115,110,129,121,128,124,106,109,130,149,88,114,142,139,145,90,73,53,94,116,123,96,87,44,61,77,84,53,61,62,35,27,56,57,82,74,49,39,45,95,62,68,74,101,78,80,74,65,150,79,21,63,66,95,46,28,53,76,72,100,89,65,33,44,103,90,69,80,55,46,76,48,55,56,56,53,63,102,111,131,135,136,134,128,148,118,87,37,55,24,78,95,79,68,50,55,43,42,66,75,64,23,57,86,116,114,108,121,51,91,149,121,143,81,15,30,28,38,64,110,94,108,129,137,47,27,75,35,22,29,62,119,67,109,89,111,96,97,100,119,116,100,99,97,107,77,106,108,113,72,89,87,61,82,97,82,104,120,112,127,82,47,20,47,35,40,27,57,32,35,55,44,35,99,84,30,15,12,66,67,55,53,81,67,85,63,89,78,72,76,45,64,79,87,96,71,72,45,16,3,21,24,28,24,31,63,62,25,33,36,22,46,38,54,49,43,51,54,44,22,53,11,34,25,46,50,53,57,19,25,50,40,39,32,45,43,51,39,64,43,9,34,45,35,10,17,24,2,29,22,34,27,51,39,42,31,37,35,37,49,37,43,31,18,52,38,26,45,61,51,45,61,43,60,35,46,31,21,39,63,39,53,13,30,23,33,37,24,27,32,1,14,28,39,31,24,68,38,34,39,45,59,48,80,33,30,16,14,8,57,38,23,36,37,42,38,36,29,47,34,37,49,33,47,9,3,40,64,61,77,58,24,29,44,24,38,25,9,9,11,23,8,16,24,28,47,22,38,22,39,19,14,25,38,28,35,28,54,37,28,29,34,36,44,54,26,38,35,18,21,34,29,43,20,43,36,52,29,53,29,56,23,27,23,4,11,19,10,41,32,56,25,23,39,15,16,24,51,22,39,3,13,31,16,36,29,23,33,22,36,2,33,41,21,23,57,45,56,27,35,199,252,241,222,177,141,144,163,155,157,161,159,151,177,175,170,159,170,149,119,80,88,57,67,102,114,145,71,140,223,219,196,222,191,209,187,174,179,189,206,199,212,167,199,184,206,210,180,176,177,191,198,186,194,191,211,194,184,186,200,212,202,210,190,124,22,0,10,9,27,15,21,14,18,24,8,25,8,13,12,45,21,8,14,10,14,8,29,14,246,238,208,137,122,146,116,145,187,249,247,240,253,253,232,243,157,186,66,21,56,12,50,18,18,46,48,57,75,125,85,32,57,73,117,122,149,148,179,193,153,161,188,178,172,180,175,160,141,137,55,9,30,19,88,191,220,197,174,157,160,167,169,150,176,179,171,116,77,30,46,42,68,164,208,210,151,149,152,197,190,195,167,174,165,150,162,83,29,39,40,29,39,55,125,214,204,202,169,188,165,198,193,184,164,131,82,47,69,77,91,84,126,193,152,107,113,157,145,214,179,177,194,177,186,168,156,164,181,172,185,96,28,12,25,37,40,35,74,96,73,59,11,18,24,11,72,124,231,243,237,230,215,206,184,186,174,168,129,117,119,228,245,245,250,229,254,252,247,251,253,255,247,246,255,238,251,246,255,253,241,237,231,246,250,231,255,253,250,244,253,255,249,249,239,243,235,179,75,82,67,94,86,130,108,126,98,113,145,150,148,136,128,135,97,132,124,73,110,111,94,76,127,135,130,117,80,68,85,92,104,103,104,78,67,80,77,108,84,49,57,51,39,56,66,57,60,63,58,29,76,75,97,78,69,96,105,106,88,139,125,21,70,148,141,89,74,60,108,162,161,126,60,33,100,104,91,89,66,85,71,84,90,77,85,59,79,79,87,90,143,115,91,85,78,45,63,64,68,76,86,98,101,81,95,83,78,66,69,57,40,55,27,43,123,133,134,109,128,112,133,140,78,70,15,6,9,72,115,131,130,118,127,111,123,127,112,85,29,9,17,87,131,115,93,100,120,113,109,138,86,102,89,91,140,119,106,79,111,112,98,94,71,56,86,79,90,122,99,107,115,109,49,23,22,34,28,35,39,44,56,57,52,96,185,108,19,30,12,65,54,78,89,71,94,72,87,57,61,102,71,85,69,88,104,87,52,41,14,4,9,43,32,44,41,65,57,81,47,33,27,42,45,56,26,46,23,48,53,50,41,26,51,32,29,36,11,52,29,43,40,36,43,43,57,54,64,35,39,48,55,60,54,55,29,32,50,33,39,14,30,45,34,17,38,43,25,45,35,6,46,28,57,51,18,30,29,54,45,67,54,9,19,13,60,58,12,47,45,45,59,22,31,2,32,38,53,26,27,22,35,45,34,42,27,30,38,35,47,50,39,64,19,54,24,33,16,22,16,7,34,29,22,43,47,18,16,9,4,51,30,39,14,25,21,33,36,49,71,41,78,82,45,25,33,12,36,21,14,30,16,38,18,36,21,34,38,12,40,6,30,19,9,32,20,46,21,52,17,20,31,31,45,62,62,22,45,41,41,15,37,44,19,33,33,20,47,32,33,26,22,22,36,19,33,23,27,15,16,33,33,17,38,22,33,37,8,22,19,25,20,25,52,60,57,31,31,20,34,18,29,30,33,10,9,35,6,39,36,8,66,199,248,251,227,220,187,147,165,135,146,148,140,150,153,185,161,164,191,177,140,139,121,99,103,127,90,81,104,187,243,231,224,213,203,214,189,181,182,192,188,207,212,202,204,209,217,218,192,219,191,218,178,183,189,212,199,194,201,221,205,202,216,217,197,124,7,0,5,23,33,1,23,7,2,23,1,27,28,2,14,26,22,18,9,23,6,20,23,9,248,231,234,212,140,152,103,127,160,166,194,244,244,238,216,219,136,184,71,7,31,3,54,15,54,49,60,66,88,145,134,101,103,105,141,158,176,192,166,187,160,205,167,168,155,157,180,177,133,32,71,60,58,65,49,113,205,199,180,184,197,178,157,160,183,183,145,84,113,135,132,57,31,119,198,177,173,184,209,208,153,160,125,169,174,174,63,38,60,79,125,88,44,18,79,139,191,201,179,192,211,187,174,163,150,81,86,103,134,116,102,91,68,128,168,135,157,157,160,189,182,178,186,202,191,180,171,183,184,171,109,128,119,106,109,129,107,107,165,237,248,243,217,136,72,38,16,6,96,151,138,161,181,241,242,246,241,154,126,102,178,254,232,239,222,252,250,242,236,246,255,242,236,254,249,237,251,226,252,233,255,252,243,254,242,254,252,241,247,237,202,123,164,238,240,236,229,169,84,72,91,89,82,138,122,95,67,104,124,148,155,118,112,115,102,134,137,122,94,82,97,91,135,160,139,95,100,111,105,90,78,90,124,120,101,117,134,97,113,103,93,91,51,56,68,53,54,53,39,16,58,87,106,82,83,112,106,123,105,176,139,62,109,148,126,122,87,111,168,170,151,107,47,44,70,87,68,95,104,106,71,144,128,144,93,126,76,82,104,95,71,52,56,68,72,84,85,131,134,160,122,134,82,65,86,111,114,91,89,87,118,108,73,71,108,111,106,125,117,141,109,90,11,21,12,45,68,109,132,143,136,124,127,131,137,115,132,92,11,17,42,147,95,91,146,112,79,100,97,118,98,100,99,120,89,76,100,110,118,107,83,73,55,74,87,84,108,116,112,114,84,24,36,36,10,21,30,26,34,53,22,43,74,96,230,148,34,25,47,72,75,90,56,80,63,65,71,54,77,68,67,101,72,93,51,41,21,4,31,25,8,29,61,60,75,69,72,46,48,11,22,48,68,29,48,59,25,51,41,34,33,51,27,38,50,50,39,48,21,80,36,59,65,33,45,42,38,32,33,86,66,72,48,58,74,59,18,14,28,36,13,23,25,22,43,52,46,56,32,15,73,53,43,30,49,34,24,51,27,55,55,45,49,32,56,40,43,55,28,74,39,15,29,33,19,44,36,51,26,25,34,35,31,43,19,38,43,37,47,36,39,35,72,46,44,31,32,4,4,50,10,19,21,14,23,13,46,34,5,30,23,27,19,32,3,0,40,16,19,49,66,85,83,58,29,7,36,21,45,30,41,22,33,36,31,40,26,34,5,27,16,25,34,23,56,22,27,31,52,22,47,14,54,34,39,18,37,21,40,43,15,31,23,33,42,38,43,12,18,33,54,33,20,29,15,25,19,43,39,47,36,53,40,28,19,35,43,11,53,31,34,70,22,44,46,39,36,36,34,15,15,29,24,26,29,14,29,18,26,131,190,228,188,165,205,220,218,207,204,161,140,133,103,108,103,127,145,140,156,139,145,159,150,140,132,147,103,113,158,229,235,240,245,240,230,199,233,212,218,245,218,212,232,217,228,242,200,228,219,237,237,218,235,195,229,247,185,231,205,215,225,218,202,223,195,113,14,4,1,21,31,10,1,7,16,20,35,15,15,2,0,8,15,1,7,25,29,3,18,4,252,242,222,215,93,83,80,102,70,48,47,99,139,175,227,181,156,162,62,49,27,6,35,22,32,66,55,74,130,210,149,132,143,113,155,185,158,207,193,187,192,197,172,185,177,142,171,141,49,54,122,120,125,108,68,70,102,204,225,182,170,164,168,186,171,112,51,110,140,161,181,141,74,116,181,155,170,212,178,181,135,137,143,123,146,90,27,30,85,187,168,172,118,32,53,84,165,198,201,170,185,189,180,135,114,61,73,106,71,119,145,91,53,101,124,138,160,198,204,165,195,170,182,158,190,200,193,192,179,116,95,138,154,140,183,218,246,254,243,233,235,239,227,238,243,220,175,118,172,208,252,248,248,229,253,234,250,242,230,241,230,224,250,245,255,244,243,230,247,247,255,238,253,252,226,251,235,235,255,254,243,250,251,244,221,244,238,244,231,103,40,25,93,145,169,161,166,92,82,67,100,105,117,144,145,128,91,112,114,97,116,100,102,67,83,83,128,93,80,99,130,121,142,101,107,108,140,130,95,100,112,99,103,105,122,124,127,126,102,120,49,87,79,111,92,60,85,63,46,16,71,100,93,88,96,101,104,119,110,155,177,70,66,81,76,85,91,170,193,138,157,95,33,20,67,90,59,94,64,73,104,144,163,156,157,113,117,77,77,85,115,66,76,100,118,119,118,190,180,123,124,122,93,71,86,93,132,136,100,131,137,124,91,83,58,73,126,148,117,111,65,58,80,79,110,123,142,144,161,138,144,133,130,118,136,101,111,100,60,27,76,115,147,89,114,113,102,129,68,103,79,117,117,93,63,92,92,79,97,75,88,87,87,86,95,91,108,107,97,42,70,19,23,17,20,65,27,28,35,41,37,44,42,66,131,166,77,32,64,109,86,66,96,86,78,73,73,71,90,82,73,93,78,35,40,25,4,29,13,0,32,37,52,68,64,63,52,49,13,25,49,14,48,25,56,36,42,21,34,15,5,36,7,49,26,12,34,57,25,40,75,51,33,41,48,40,46,45,71,78,97,88,73,72,78,66,54,22,22,32,13,17,35,32,25,26,31,42,32,30,38,42,36,36,13,47,56,47,73,48,30,47,54,20,54,43,48,21,41,36,34,21,60,50,39,32,2,6,21,30,25,51,38,48,55,27,59,42,33,35,37,54,30,47,21,46,21,6,40,30,16,16,35,15,23,38,17,26,46,25,22,26,46,12,25,28,20,44,43,25,26,53,70,77,56,22,14,14,36,51,29,50,41,38,25,26,33,20,18,30,14,24,26,16,23,37,22,28,31,47,39,38,21,38,38,24,54,49,40,24,43,32,17,30,38,42,26,25,62,15,18,11,29,41,42,62,44,17,38,14,29,35,30,56,33,63,28,47,28,35,42,46,21,41,40,46,26,25,40,22,8,14,23,31,14,30,34,40,175,248,247,251,204,207,181,182,178,199,216,202,188,167,134,115,110,115,120,134,148,124,130,161,159,156,169,135,115,148,202,210,203,214,232,231,228,231,230,223,251,222,233,247,239,238,234,252,247,216,226,235,229,223,228,232,223,221,216,220,206,191,223,217,212,211,217,107,6,2,4,18,7,15,15,9,32,48,6,0,26,6,10,2,18,12,9,5,31,12,13,22,173,201,211,136,23,47,65,51,79,60,51,44,52,135,208,134,118,150,63,36,20,32,19,36,53,28,29,51,106,181,181,149,147,165,176,208,195,202,184,182,185,192,164,151,169,167,155,51,31,47,138,167,146,118,59,61,55,173,187,152,164,154,163,129,145,75,15,97,168,180,177,202,178,157,161,135,147,173,166,145,104,147,166,117,98,34,13,28,118,177,153,183,119,61,60,68,75,187,180,124,122,155,145,123,82,32,65,121,103,82,110,77,78,85,85,106,123,172,220,177,211,181,205,199,182,228,175,176,117,66,48,89,179,238,241,246,249,255,245,232,243,246,242,255,243,239,254,247,250,235,254,235,250,251,249,239,255,253,234,237,247,234,234,244,253,249,247,255,234,243,249,245,247,234,255,252,255,247,240,235,243,246,249,242,255,251,249,248,90,39,32,19,93,85,87,127,151,65,93,115,129,177,120,120,126,115,105,104,85,69,84,84,68,37,61,76,96,103,111,136,124,98,146,130,94,136,140,134,91,82,66,117,97,88,133,137,89,94,101,86,82,39,67,85,73,61,63,87,137,127,126,95,98,102,135,138,144,146,143,148,183,124,68,107,144,123,145,173,147,105,138,84,19,44,66,108,64,71,76,78,116,147,155,158,146,132,164,147,104,109,103,67,86,108,106,116,131,154,138,95,118,119,93,43,79,139,131,137,80,79,84,99,70,101,63,83,129,104,60,8,12,67,139,128,117,144,154,151,134,136,120,111,133,95,89,76,91,78,106,95,106,124,90,109,87,114,91,97,114,106,100,81,105,85,59,97,104,96,99,111,94,105,105,106,87,94,103,70,50,41,29,26,13,34,52,82,41,47,14,14,22,49,42,18,68,172,150,75,47,77,87,83,70,87,60,60,57,107,84,82,65,57,16,22,4,10,27,12,25,77,62,76,68,66,53,81,50,57,34,15,37,16,34,11,26,19,28,24,41,52,27,26,23,56,36,24,49,46,43,37,21,54,27,54,24,39,27,52,30,67,71,86,61,71,94,72,67,55,35,9,25,19,7,31,21,36,20,39,30,7,57,43,37,39,27,44,40,45,40,45,27,49,37,50,46,28,50,32,38,44,27,42,14,36,28,50,50,33,47,42,44,41,15,48,36,36,50,48,60,35,30,52,17,27,62,18,34,10,18,12,4,15,7,11,13,7,25,27,31,47,48,46,27,23,12,36,26,18,32,23,41,50,54,88,55,28,18,21,16,33,50,64,36,41,44,21,46,13,20,31,17,5,28,22,20,50,42,32,45,28,13,26,59,45,28,8,25,33,21,28,34,38,29,12,20,29,42,31,39,17,18,18,38,36,30,43,13,67,42,36,25,32,28,32,24,11,25,56,43,63,39,40,28,44,29,40,48,29,27,37,11,43,37,49,38,9,42,66,249,236,246,245,239,255,221,200,176,173,182,187,188,210,207,152,124,144,121,158,159,182,127,169,158,157,168,155,138,160,175,170,138,160,141,150,126,165,158,139,159,166,172,175,187,188,157,178,197,190,167,172,192,194,210,207,203,223,201,198,194,180,215,208,231,226,232,134,11,1,6,11,11,27,12,9,6,20,19,10,15,13,13,6,15,37,26,8,8,5,30,28,103,102,64,46,28,72,60,86,97,74,78,109,90,124,170,73,74,117,21,38,37,36,21,27,29,7,45,74,87,123,133,125,173,169,187,188,185,177,174,164,169,161,170,162,149,128,102,29,37,67,126,161,119,149,138,136,71,120,149,157,103,86,155,131,65,35,26,80,135,177,177,223,223,185,188,123,102,177,173,128,116,180,186,142,122,39,31,58,96,182,177,206,132,23,62,28,56,125,126,105,77,134,107,61,62,70,76,127,60,71,99,66,83,119,179,215,183,188,215,181,163,174,196,166,172,175,167,147,125,77,166,249,246,244,242,234,236,239,249,252,224,245,255,255,237,247,249,243,255,250,245,254,255,231,241,253,237,254,255,245,250,231,243,245,222,238,236,249,245,255,249,255,240,248,242,239,252,250,251,222,252,251,252,243,244,236,230,114,86,63,74,100,70,74,38,98,114,61,124,51,132,116,96,118,107,65,88,70,69,73,100,114,90,77,79,102,117,125,172,155,102,83,138,130,128,98,105,97,73,106,99,93,80,83,137,135,132,102,103,72,70,80,23,29,67,62,105,110,114,133,156,148,115,140,132,144,123,116,135,127,135,121,49,53,103,127,66,92,69,46,63,60,60,16,77,90,22,29,88,113,129,144,128,119,145,158,211,170,144,115,63,59,36,68,72,106,123,112,133,97,124,90,89,26,46,145,147,129,102,49,77,111,107,128,86,80,54,8,27,9,68,112,117,98,117,71,79,91,95,105,104,85,64,61,95,90,65,77,89,91,102,114,79,88,113,88,98,96,103,73,103,135,125,123,70,80,127,83,62,84,103,92,96,77,101,77,53,8,17,28,32,17,62,56,88,108,98,59,36,18,11,35,12,15,53,144,179,91,57,73,69,65,78,69,64,54,66,77,71,47,28,40,16,42,10,25,24,55,64,62,75,92,73,55,79,72,66,47,21,47,27,25,32,32,23,23,21,37,45,32,38,56,50,38,30,42,33,44,45,34,48,32,28,37,29,45,27,47,48,110,63,57,52,67,74,81,79,29,2,26,1,14,38,39,42,17,32,18,8,27,12,35,27,30,22,29,34,33,22,24,51,41,44,71,35,53,32,41,27,35,36,30,20,25,26,58,43,59,35,7,19,39,24,42,30,12,48,52,23,75,50,27,36,32,27,32,36,7,36,14,14,4,18,42,24,16,26,17,52,38,40,22,26,24,37,27,17,35,11,62,25,21,39,72,96,46,0,14,5,6,36,33,27,70,49,26,40,24,16,29,41,18,31,18,41,17,4,28,32,41,36,51,39,30,27,27,21,36,44,45,31,14,45,29,27,39,35,37,18,26,47,6,61,49,27,11,74,51,44,49,14,40,47,24,46,32,8,31,8,43,33,35,38,36,38,37,16,61,8,31,16,13,38,34,8,33,19,100,247,228,251,248,246,244,223,245,227,178,181,169,182,175,226,221,238,221,198,188,192,173,189,195,164,175,173,162,162,185,166,141,107,37,18,5,27,32,60,39,49,35,27,53,45,50,66,63,79,112,79,67,96,114,97,113,117,134,141,157,127,128,147,166,166,169,190,106,8,0,25,27,26,19,14,15,11,12,13,16,10,4,7,35,37,17,22,14,2,16,27,8,112,76,44,33,61,110,148,160,133,125,124,134,92,182,183,82,68,62,30,45,43,35,35,23,28,20,60,78,113,113,154,134,156,163,192,193,161,156,155,180,183,165,159,159,99,101,16,25,30,78,76,82,110,147,228,172,55,75,108,131,133,121,151,58,30,30,41,87,161,156,176,231,182,188,182,108,92,162,218,135,121,217,254,180,164,125,62,53,100,197,178,167,134,32,41,39,35,99,164,115,164,166,79,38,79,88,87,102,80,90,106,96,87,159,240,239,210,148,183,171,156,136,109,125,85,112,133,115,218,250,248,249,223,252,255,255,244,249,236,251,232,248,255,253,250,244,230,252,249,241,255,246,251,255,239,254,246,224,255,242,238,248,237,246,223,244,251,242,220,255,254,240,249,252,225,252,252,252,249,255,255,243,243,228,160,158,84,59,71,76,71,70,78,69,56,81,106,119,120,110,110,103,86,65,91,67,80,94,87,85,129,142,132,95,83,102,129,100,128,147,133,131,130,141,115,66,84,67,77,107,110,85,82,103,92,108,124,111,107,77,88,74,75,41,37,105,104,88,97,98,88,64,41,81,81,29,56,36,31,58,90,146,60,10,33,34,36,48,19,46,57,64,51,36,62,56,39,42,106,137,151,145,133,103,133,136,186,194,156,150,139,115,105,87,62,70,88,101,86,116,159,105,95,19,38,54,92,123,107,61,49,94,144,115,58,36,32,8,69,95,115,82,103,76,63,42,45,56,69,78,59,65,49,54,83,50,75,58,63,74,86,97,101,108,93,117,107,87,68,124,129,150,133,97,53,46,61,67,21,39,49,77,116,75,39,38,31,21,25,16,18,47,71,107,118,121,146,117,51,23,30,27,51,82,82,111,143,138,50,30,53,56,69,91,78,45,91,76,70,39,39,25,10,21,9,39,34,64,52,64,60,47,96,56,79,69,69,38,31,78,25,41,57,25,38,16,50,37,65,46,32,32,23,27,31,31,20,30,53,49,42,46,66,31,16,44,45,23,65,64,52,49,76,52,15,23,48,21,6,33,30,39,31,42,36,6,31,25,51,34,61,19,26,15,8,28,22,19,9,57,26,11,36,27,41,30,29,23,33,40,31,34,18,57,40,41,62,43,28,58,37,40,15,47,11,33,28,25,30,21,24,18,20,22,23,33,8,39,55,23,27,35,4,21,23,20,2,30,13,31,16,25,36,52,6,26,17,25,17,36,28,42,28,65,87,97,31,21,26,23,32,35,30,37,38,43,22,47,27,19,35,20,18,20,15,32,28,40,33,38,7,26,26,17,36,4,40,21,42,40,49,18,38,38,18,7,17,51,20,13,20,23,26,54,26,39,48,57,11,37,25,8,54,39,33,28,3,19,23,41,35,37,43,23,21,33,50,46,23,19,16,29,26,26,31,49,4,134,234,246,252,246,234,253,239,239,214,209,225,203,190,168,186,220,253,248,174,149,182,175,191,161,148,152,162,153,166,163,130,122,79,27,7,10,6,34,30,0,9,22,31,16,18,31,10,12,36,16,21,41,25,10,28,7,11,3,33,42,41,51,57,76,89,90,119,91,30,4,31,8,9,17,25,17,11,22,21,36,38,15,27,11,7,14,32,23,23,11,16,14,139,99,53,89,95,101,151,160,155,137,155,170,124,193,218,76,50,59,27,55,21,53,34,26,37,43,31,80,125,146,134,139,202,186,190,165,172,162,163,158,166,162,147,151,96,57,21,31,67,56,53,50,55,88,205,126,42,81,96,138,156,106,106,31,15,28,88,149,189,193,197,217,159,161,178,129,94,174,219,160,112,218,247,202,172,180,106,67,115,193,123,142,60,43,76,41,86,63,143,174,167,133,57,54,91,96,80,132,109,116,111,122,124,142,202,193,140,61,74,88,87,75,58,64,81,104,142,171,244,244,254,248,254,254,247,236,240,254,249,246,244,253,237,239,255,246,253,245,240,255,240,246,250,245,252,243,252,252,245,249,245,249,254,226,238,243,241,251,242,245,254,250,244,247,248,247,252,245,230,243,252,244,253,241,155,85,34,48,54,34,41,71,74,65,73,99,109,135,169,141,115,102,103,101,106,120,116,99,100,117,159,103,94,93,76,79,127,97,113,122,115,119,129,125,112,60,23,49,81,98,120,78,107,61,101,112,107,110,82,73,106,117,89,61,34,57,74,56,40,35,28,35,19,52,49,19,35,15,25,7,42,153,79,27,25,40,51,57,57,55,54,73,46,63,76,64,40,60,112,158,178,148,158,168,163,161,165,155,181,134,138,124,104,113,80,84,123,123,104,122,91,114,90,37,15,45,19,51,83,79,85,130,97,44,4,6,22,88,125,134,142,129,88,27,22,35,57,47,42,12,44,35,45,14,28,41,50,42,34,43,68,108,86,88,81,122,108,105,104,140,139,128,70,44,62,43,46,41,63,48,39,42,47,63,7,26,18,18,19,51,65,99,135,120,123,114,135,115,107,93,114,76,31,68,31,116,100,154,116,60,57,61,61,70,61,56,55,15,42,28,18,16,11,26,26,73,88,65,61,73,44,76,93,59,68,46,56,37,22,61,42,36,31,49,28,21,42,30,32,47,44,39,46,41,52,49,41,20,42,52,24,47,34,47,14,66,26,42,48,21,19,14,46,48,15,33,34,11,19,20,9,31,45,9,18,29,26,50,28,40,26,26,20,46,48,17,8,34,33,33,36,31,37,29,30,24,37,35,44,41,2,9,24,10,37,44,31,10,38,5,19,16,24,2,32,34,29,0,29,26,39,32,28,41,25,22,7,34,40,37,7,31,37,23,22,23,25,20,28,20,18,18,40,38,13,35,22,50,25,14,26,24,31,39,50,102,84,47,6,18,31,9,10,43,34,35,40,2,11,38,23,20,21,33,14,24,19,16,27,10,38,25,24,20,41,45,22,31,32,21,35,29,50,10,23,12,29,23,38,38,31,9,38,10,64,49,59,45,35,37,8,35,41,25,60,56,31,29,42,33,41,31,40,52,23,23,32,34,24,28,40,36,36,23,9,50,34,171,234,251,238,252,236,249,246,246,246,223,248,209,250,227,219,163,220,188,79,57,84,86,74,98,83,74,83,51,85,69,88,116,99,144,143,137,142,124,79,105,131,90,69,56,47,75,61,66,56,42,36,18,17,49,62,15,24,25,10,25,16,28,20,25,33,32,23,55,11,15,7,20,13,17,21,12,3,23,2,27,14,29,2,13,26,5,13,27,18,8,26,23,82,100,92,157,115,115,85,89,106,75,117,175,130,215,199,101,97,53,23,25,38,18,30,23,26,54,69,68,126,208,222,215,220,221,225,216,220,203,208,205,205,216,231,145,78,118,125,95,102,97,121,97,67,70,109,99,74,126,151,103,89,82,89,45,54,58,118,196,210,185,225,236,156,154,204,152,133,174,228,171,98,187,244,211,160,173,159,114,142,176,63,53,44,74,94,78,60,63,63,103,85,77,53,54,126,117,73,107,104,117,90,119,115,99,105,110,51,26,26,58,52,75,104,145,231,237,245,255,237,254,232,249,255,255,243,240,255,249,233,251,243,255,250,254,248,245,253,248,229,238,236,255,243,246,235,225,248,255,250,250,252,231,255,252,254,254,234,230,246,254,234,236,254,246,251,252,242,253,231,252,254,243,243,245,128,93,56,44,110,103,83,81,86,46,38,105,119,134,184,148,133,128,119,120,101,70,70,104,101,109,102,88,97,86,90,62,80,105,95,109,93,106,104,149,81,72,33,30,44,39,76,59,44,47,101,143,127,102,99,78,118,92,111,54,39,54,45,44,46,59,29,32,43,56,79,68,45,33,82,44,71,163,114,44,95,89,91,59,44,58,70,109,69,51,64,118,161,164,171,175,116,109,162,153,166,126,150,96,57,64,4,11,20,66,102,58,81,65,36,38,39,57,80,43,12,15,11,50,116,82,86,54,10,4,15,78,113,135,132,138,161,113,55,23,22,19,27,34,27,34,37,45,32,24,13,43,40,43,13,46,43,75,74,94,81,110,93,107,116,94,55,10,43,61,42,22,43,38,26,38,24,28,12,25,25,38,12,20,64,77,104,110,110,156,99,110,122,107,159,182,171,72,27,61,74,73,79,129,159,70,48,39,77,53,95,53,17,31,16,17,29,19,41,70,67,84,86,85,71,72,92,29,62,47,43,70,60,35,39,34,46,73,15,56,23,25,47,62,35,33,68,39,35,49,52,71,15,40,32,30,58,33,21,52,45,25,34,38,53,67,40,23,27,9,36,35,9,39,18,23,20,5,26,42,2,11,38,30,23,16,41,12,34,34,20,20,16,21,39,50,21,15,22,33,34,16,15,21,28,28,30,33,17,19,9,37,28,13,21,16,21,32,18,36,24,54,19,42,17,16,27,12,7,5,33,4,22,41,29,19,37,38,6,30,22,23,13,14,23,29,24,35,11,17,16,24,31,27,50,33,20,25,12,28,23,59,94,79,35,16,17,30,19,41,21,27,24,55,13,27,10,26,42,44,7,22,19,37,41,47,14,38,33,18,34,17,16,31,38,36,20,39,30,18,51,18,45,28,18,33,15,32,23,39,39,31,9,40,44,21,26,17,44,32,59,23,12,30,11,20,5,36,32,17,38,39,42,34,26,35,21,44,49,13,17,31,46,201,251,255,243,247,243,233,233,232,246,230,240,230,246,218,216,118,70,54,29,0,15,3,12,18,25,25,32,18,22,23,71,154,192,228,235,233,236,241,224,238,223,214,232,207,175,194,189,205,170,169,174,195,162,168,198,166,151,115,134,154,120,117,100,88,71,52,39,54,32,10,6,4,8,6,18,3,26,17,8,25,0,7,13,5,9,13,19,5,18,26,11,0,154,133,161,173,164,134,100,111,96,88,93,121,132,149,183,67,67,38,50,41,23,31,48,35,42,29,26,60,159,220,253,238,243,251,243,242,231,188,244,222,227,233,234,129,111,168,130,134,148,135,131,109,157,122,136,125,133,167,249,130,49,64,105,122,117,126,187,205,240,211,218,220,164,137,131,117,129,186,127,85,72,73,165,107,108,155,156,148,147,176,94,73,68,88,125,81,87,77,76,53,44,63,83,93,93,128,104,130,97,116,114,81,82,108,140,109,129,65,50,72,98,117,165,200,224,238,252,246,250,253,229,235,247,238,255,249,250,246,226,255,246,254,248,240,236,243,238,253,226,254,251,219,254,250,247,237,242,245,255,255,255,240,245,240,233,152,144,175,151,172,154,154,137,137,155,135,157,153,183,145,145,130,139,118,50,58,60,110,125,100,125,104,77,69,75,103,134,139,131,110,112,94,99,94,69,57,71,64,81,97,63,103,96,85,60,94,98,103,116,104,54,84,97,75,100,59,14,21,20,55,54,42,47,36,121,134,119,106,62,103,91,105,56,34,13,23,32,26,45,70,59,49,53,105,155,141,126,134,141,146,110,162,167,96,172,135,102,95,113,114,141,82,40,13,75,127,108,171,127,136,116,62,88,75,91,69,24,22,32,16,25,23,6,1,30,57,83,73,87,89,118,167,232,233,234,251,249,178,242,146,38,10,9,41,100,133,152,135,144,124,142,92,34,52,67,96,61,85,111,58,78,94,56,43,74,73,22,34,38,41,62,76,94,81,63,72,83,57,49,41,39,29,28,50,37,41,18,30,21,38,37,14,17,41,21,21,53,64,103,126,106,102,139,108,104,101,105,87,119,105,101,63,29,42,76,86,82,103,134,179,96,20,33,45,19,5,21,26,28,18,36,34,58,74,87,82,81,87,78,79,63,53,99,117,109,123,112,69,73,92,96,61,80,68,52,69,69,76,106,81,70,53,53,56,79,80,70,49,97,51,71,121,77,79,91,77,47,60,53,41,24,24,8,24,28,27,12,17,24,49,3,29,39,13,12,25,13,19,53,32,38,30,27,50,28,21,26,52,28,33,33,27,23,20,30,54,32,23,33,16,34,51,18,22,15,32,11,44,41,19,12,18,8,2,33,48,47,39,19,16,37,45,15,20,22,15,26,38,36,26,33,29,33,26,6,27,21,18,21,19,11,21,36,11,25,15,12,24,26,20,32,1,18,34,7,30,37,112,56,21,36,9,13,38,20,28,15,18,14,20,36,25,16,32,14,27,39,14,6,27,48,29,17,29,17,37,2,35,13,28,37,19,24,17,37,7,12,38,20,8,7,23,6,22,37,21,22,17,30,17,31,32,20,41,18,27,16,37,28,19,28,39,52,47,36,57,13,56,32,48,14,22,36,42,39,14,62,238,245,252,240,224,243,234,248,242,247,246,225,242,245,228,210,129,18,2,3,12,18,30,36,50,51,77,48,52,57,24,96,165,232,232,249,250,243,195,234,232,201,198,215,220,230,198,229,202,235,227,235,215,250,219,230,225,221,218,224,221,235,215,186,183,178,142,169,92,12,0,23,10,26,8,42,6,11,1,26,14,24,9,22,10,9,19,22,15,27,21,19,20,176,129,125,144,166,143,140,95,86,78,53,88,107,198,180,50,38,19,13,53,58,33,18,4,14,23,66,53,97,179,178,167,165,167,175,165,98,114,157,194,136,116,107,74,79,139,97,120,83,100,99,69,101,96,84,79,115,168,236,83,41,99,124,182,205,239,255,254,238,186,154,150,168,131,101,80,84,145,107,72,57,66,61,48,90,101,68,68,133,152,97,100,78,84,92,94,100,99,102,72,90,86,58,80,114,78,97,110,107,98,81,85,64,45,96,141,136,106,61,75,37,102,149,196,159,115,169,207,197,202,243,235,249,236,239,245,254,249,239,248,246,250,255,236,254,246,241,240,252,254,250,247,252,253,241,253,244,255,248,239,235,252,189,192,239,24,24,13,41,54,38,112,25,28,91,27,60,47,6,43,81,52,55,44,49,84,111,117,91,59,80,62,76,91,84,111,102,131,94,113,122,127,116,96,86,90,83,57,94,119,94,92,92,62,78,86,104,83,94,105,54,59,50,80,87,54,40,56,32,51,88,47,56,53,120,111,100,124,61,71,78,84,81,44,59,50,52,50,0,60,62,59,40,124,135,110,114,109,141,120,90,111,130,98,134,97,89,95,107,116,115,76,70,31,59,63,41,67,26,13,34,15,25,16,18,17,10,8,16,31,81,124,139,186,206,230,252,251,234,253,247,244,229,245,232,241,250,230,252,124,54,14,105,114,140,177,128,101,98,100,113,107,117,117,138,142,173,154,163,146,159,153,141,122,136,79,46,35,94,70,71,77,60,104,89,72,52,57,77,34,18,29,26,43,46,33,34,47,12,53,21,32,34,14,69,37,80,76,95,91,73,103,72,59,66,75,81,26,41,33,27,38,47,63,85,82,73,76,131,167,158,56,28,4,28,32,6,35,22,50,74,70,87,64,50,88,63,62,57,62,55,80,120,175,160,206,174,147,131,140,153,151,143,144,148,167,156,153,132,141,164,159,165,169,165,167,164,137,157,150,165,148,129,172,157,157,146,130,76,50,22,9,2,16,12,20,40,26,41,15,23,44,27,50,35,35,59,27,74,37,38,40,64,74,70,82,29,45,50,14,38,31,30,38,30,39,26,22,41,25,15,13,16,2,10,5,18,17,36,12,10,25,16,26,14,25,11,18,39,24,32,44,24,25,40,33,37,53,25,32,40,30,29,20,31,33,23,25,10,25,29,13,21,40,15,16,16,55,14,21,11,7,28,16,31,51,37,92,109,68,38,37,27,27,34,12,35,23,11,25,23,38,31,30,47,15,4,20,12,27,17,14,5,18,20,36,17,21,26,29,27,29,13,42,19,32,41,12,19,38,15,10,30,27,20,22,8,43,3,28,32,27,27,14,38,22,10,26,41,22,26,40,32,30,20,18,21,29,47,24,47,48,27,13,20,30,97,234,249,238,255,247,249,245,230,255,238,250,240,236,228,212,208,164,119,82,28,13,27,30,45,48,44,100,82,74,100,73,96,131,129,205,182,194,198,177,175,153,170,167,163,173,186,178,191,195,189,222,207,228,179,215,217,194,171,187,212,234,192,213,208,197,209,197,218,130,6,0,8,12,11,3,11,24,12,22,1,1,8,3,18,7,1,28,9,39,22,37,0,24,154,82,66,96,137,142,126,103,125,109,98,96,129,172,184,39,46,61,17,13,36,43,30,33,37,56,50,53,70,72,54,70,88,79,94,79,40,50,97,85,38,33,42,20,62,59,63,81,71,61,88,55,48,46,50,70,28,139,227,79,41,73,163,206,235,233,228,175,122,45,69,149,198,212,214,169,148,189,161,183,118,86,92,128,86,94,70,55,44,61,54,85,78,74,62,68,90,84,117,103,87,95,88,80,57,85,94,102,89,98,63,77,59,43,81,72,90,63,61,47,62,71,106,136,79,5,19,9,27,16,43,34,54,96,157,123,76,102,107,102,107,95,124,90,67,85,62,61,84,89,37,49,75,50,53,59,104,157,83,116,45,45,2,139,189,28,29,37,39,7,111,214,53,10,28,18,16,32,21,25,63,46,86,73,67,116,106,118,69,37,100,44,26,61,81,81,100,141,125,132,91,92,112,127,115,104,107,92,87,122,110,105,64,81,59,30,58,75,55,80,42,55,50,53,64,45,48,49,16,56,82,44,10,65,83,112,120,111,52,82,87,74,91,65,110,126,130,115,86,120,89,76,78,136,132,56,43,22,100,67,41,32,58,74,77,108,64,70,44,70,66,68,66,31,41,22,17,40,31,25,39,18,39,143,119,163,154,243,253,246,237,255,252,255,244,254,249,255,249,240,245,246,225,211,142,172,103,68,94,41,87,140,124,150,148,109,106,107,115,113,120,92,81,99,122,142,138,138,118,129,136,156,130,137,151,71,20,12,52,88,91,103,56,63,53,98,105,83,99,106,75,49,53,54,10,32,36,41,31,43,38,44,46,60,35,37,50,92,66,46,61,70,69,101,69,88,54,52,31,14,39,40,35,54,96,118,72,85,71,138,191,116,46,15,8,23,10,28,24,67,72,67,97,98,91,58,71,73,60,86,82,54,72,101,96,159,113,126,102,112,114,114,144,126,126,110,136,134,102,123,124,133,121,143,122,140,119,106,127,93,138,94,121,110,104,96,94,104,51,47,10,8,18,6,9,15,13,41,9,21,27,42,63,48,57,73,59,91,69,56,80,98,109,132,164,131,78,68,53,44,73,42,54,30,57,43,20,54,28,28,42,31,46,16,28,21,14,14,41,25,37,30,52,33,27,17,9,22,39,29,22,12,30,34,23,26,29,32,13,36,20,20,30,14,15,41,4,9,38,34,37,28,32,26,14,37,41,3,34,31,27,5,35,20,26,23,26,63,101,89,43,27,16,26,40,26,21,21,46,33,36,37,6,4,32,27,26,42,21,24,42,33,43,25,24,23,9,17,25,25,29,28,32,33,5,57,23,20,29,8,22,44,28,48,39,19,27,55,37,6,29,32,17,3,22,49,7,56,32,17,36,21,28,27,37,25,21,9,35,13,24,45,29,30,42,4,115,230,169,213,226,239,255,249,245,226,253,215,241,248,237,212,233,239,244,224,196,162,117,29,66,38,54,54,72,49,43,30,27,47,74,103,136,150,161,156,138,176,161,168,176,152,177,167,170,177,178,175,195,166,172,194,177,158,176,180,177,181,170,153,172,173,183,197,153,138,17,4,17,3,7,27,17,37,2,35,11,7,13,7,24,11,19,7,11,2,14,15,38,8,123,107,38,64,84,71,103,150,177,164,176,147,179,185,192,138,188,131,108,134,96,144,84,100,64,49,72,81,95,88,77,81,98,90,108,118,74,36,54,15,20,10,51,51,49,63,34,60,66,73,87,69,84,71,67,78,87,114,199,107,77,143,113,159,178,131,89,52,34,46,92,142,207,247,239,249,248,237,227,250,219,100,83,130,98,92,83,69,65,59,39,58,77,89,69,77,90,80,65,88,53,85,92,106,102,86,106,74,87,94,130,128,91,37,73,60,98,80,99,85,70,43,54,62,64,41,76,9,18,24,28,18,22,19,89,94,13,14,25,32,26,35,47,36,35,32,42,6,64,79,44,39,24,14,25,26,65,153,75,26,24,22,6,150,245,28,35,39,43,39,149,246,92,44,99,45,64,52,43,41,97,70,112,101,113,101,71,75,47,61,47,49,39,5,48,64,90,93,113,106,115,102,121,102,100,130,77,88,67,73,139,114,71,79,77,63,23,85,73,29,52,50,85,47,31,17,52,57,74,49,85,43,73,78,143,182,155,104,93,113,102,119,139,146,162,168,203,178,191,211,181,161,113,125,122,14,60,10,14,48,14,23,19,16,23,32,38,6,9,11,35,39,70,68,116,138,149,195,236,247,240,242,230,253,234,243,221,247,250,255,239,246,236,234,226,221,161,153,129,98,102,84,31,17,16,23,21,7,54,66,143,156,146,133,101,104,97,127,121,143,74,118,95,123,97,97,129,93,121,109,108,103,123,127,78,22,17,11,73,95,108,103,65,94,89,111,94,102,101,132,105,49,36,32,42,20,36,32,15,29,29,36,24,23,25,36,23,63,40,63,40,74,83,82,74,64,64,43,53,35,33,42,27,68,84,89,80,69,71,43,140,165,89,10,14,40,45,48,76,68,65,77,99,89,73,64,92,67,48,83,98,59,76,57,32,52,20,33,20,47,16,33,21,26,34,7,53,49,36,39,26,21,13,27,6,29,30,44,20,49,32,10,39,31,8,24,39,53,30,7,40,30,25,16,21,23,32,15,34,26,4,13,3,10,13,57,22,13,6,61,36,35,42,85,143,112,89,20,40,62,42,57,36,29,23,41,23,24,14,36,16,4,9,36,24,19,24,13,41,29,15,16,9,28,33,7,23,26,18,46,46,39,26,12,20,30,22,12,29,35,40,23,16,14,30,29,9,21,37,17,46,11,34,3,38,20,16,14,39,21,26,8,34,32,26,16,27,17,60,90,93,36,40,26,31,6,29,32,38,35,35,22,28,31,51,24,27,41,26,19,51,32,41,33,9,25,28,13,39,31,29,43,32,41,19,29,18,47,35,35,15,28,17,13,13,9,15,19,31,25,23,60,39,50,37,41,41,25,40,41,28,52,41,57,10,46,31,38,36,21,39,31,42,41,36,23,71,95,96,106,164,203,234,232,213,198,197,213,203,220,210,172,206,241,255,240,249,211,209,156,97,88,63,45,36,44,49,33,27,20,19,35,46,79,105,129,174,177,201,172,179,189,195,199,192,205,203,197,171,183,149,180,175,125,177,182,151,156,154,165,134,153,159,169,177,116,15,1,7,18,19,8,7,3,12,17,33,4,11,4,0,27,12,14,11,3,28,9,20,21,153,132,73,78,90,61,108,171,239,229,221,220,235,230,246,230,216,213,194,200,222,207,219,166,105,102,115,115,139,113,147,153,168,82,128,133,111,114,80,80,59,59,69,96,134,87,77,111,97,107,101,105,123,130,167,156,156,173,233,192,194,203,167,197,165,151,107,145,144,110,111,151,210,224,255,239,222,194,244,243,128,117,104,125,72,36,73,81,96,69,42,64,60,92,103,88,46,72,53,95,84,78,100,77,97,104,75,56,85,159,155,206,176,82,57,52,103,113,173,128,106,70,70,130,156,183,177,112,178,203,210,164,154,110,250,250,155,143,182,197,185,200,205,166,191,168,176,181,234,255,185,171,161,152,166,164,200,248,168,143,142,154,192,243,236,42,84,101,134,116,196,241,87,116,123,98,115,102,90,142,132,71,77,135,95,61,38,50,79,43,53,11,13,59,90,117,120,98,79,101,125,122,94,87,80,69,101,116,84,154,136,83,115,158,101,49,60,52,48,70,30,24,71,67,48,27,36,40,77,116,122,145,160,175,179,179,119,90,54,130,144,177,165,163,181,158,190,182,212,198,184,148,123,72,14,12,7,5,16,29,25,7,9,47,23,94,122,122,164,210,226,238,255,248,255,253,227,252,255,250,235,241,251,249,246,247,224,191,190,180,150,115,118,13,29,4,13,29,2,14,33,29,11,10,37,49,18,5,91,119,154,149,124,92,93,100,109,122,103,124,95,100,106,134,127,111,128,127,100,114,120,102,115,126,73,0,20,47,104,97,101,88,88,94,91,96,93,123,89,119,130,93,49,28,12,31,51,7,19,30,33,39,34,38,60,37,58,63,28,73,83,69,61,69,52,73,67,64,29,32,40,49,39,73,71,85,61,44,44,5,37,147,151,49,48,29,63,55,71,74,74,83,97,79,56,72,63,77,77,84,69,63,65,47,31,11,41,16,22,1,17,12,39,38,12,35,33,4,7,22,19,13,9,26,12,18,42,13,17,30,24,41,42,8,40,14,18,29,23,20,8,32,18,32,16,28,63,1,10,27,24,34,35,27,34,21,16,16,30,13,27,39,18,16,72,109,70,53,4,19,8,17,41,33,14,18,28,12,6,17,33,12,10,22,23,16,14,35,21,17,19,8,4,9,33,43,40,15,14,26,32,11,0,16,36,11,14,23,13,9,34,26,28,17,23,25,20,15,16,30,18,34,13,30,12,25,2,24,28,34,28,23,10,23,50,20,10,35,39,60,101,48,33,19,7,20,28,39,19,13,30,15,48,40,49,25,18,27,17,31,31,39,57,33,19,34,43,17,53,37,57,17,49,28,34,18,28,16,31,6,21,17,17,14,19,12,42,22,11,38,51,32,53,47,38,29,36,67,49,26,33,38,22,48,53,51,43,45,34,49,56,39,47,60,45,65,42,66,46,50,62,115,128,138,120,168,153,148,169,153,168,139,169,161,167,232,210,234,226,170,146,157,135,130,107,114,146,125,106,108,88,102,98,85,109,127,153,166,189,167,170,165,153,200,165,193,192,199,188,173,162,164,173,179,172,146,175,145,143,163,178,153,155,175,172,123,23,18,0,2,16,11,20,17,6,14,0,1,2,26,12,4,6,14,13,3,21,30,24,23,232,211,119,165,114,119,132,149,216,219,186,213,224,219,173,156,201,182,132,141,159,196,232,155,110,111,103,128,114,115,162,160,178,164,152,158,146,131,124,137,90,76,121,144,164,191,165,201,152,161,178,183,231,245,238,238,245,248,244,253,245,240,255,247,229,235,253,237,244,255,243,247,241,207,209,202,231,198,216,188,77,91,144,158,85,60,59,56,112,95,69,66,70,55,59,64,38,71,63,97,68,80,90,88,81,79,68,59,99,119,162,160,120,60,71,81,89,95,92,104,94,94,130,157,132,227,183,150,215,227,241,233,188,200,255,241,218,199,223,206,194,239,228,206,227,234,234,223,251,244,207,241,242,241,237,233,176,235,228,191,244,238,194,251,195,56,163,177,171,117,148,251,112,116,132,96,120,82,102,91,137,106,104,142,92,79,83,122,122,105,66,37,104,119,112,86,94,97,140,139,111,95,76,74,26,76,66,120,106,102,133,75,100,146,121,69,29,56,93,85,45,50,47,48,31,46,64,91,147,170,147,138,168,147,166,123,78,38,90,105,108,132,112,141,144,176,164,144,139,123,104,102,128,99,55,43,109,139,170,159,194,224,253,239,252,222,250,238,238,240,253,233,241,253,253,236,233,232,198,181,161,128,84,75,65,61,33,22,6,14,44,29,16,26,25,18,28,33,23,13,44,25,41,28,60,23,34,27,65,87,160,110,73,71,79,97,117,121,148,155,61,77,86,85,81,112,128,99,115,123,137,99,120,103,45,106,123,112,112,92,104,116,70,111,110,76,70,116,99,105,110,56,51,35,34,36,26,44,48,17,42,57,61,63,38,35,67,70,76,58,58,61,77,56,73,65,63,72,19,55,40,20,22,82,72,39,6,21,25,16,47,56,145,152,70,34,67,83,82,67,55,31,79,84,71,65,111,90,66,89,48,51,56,23,8,10,51,12,12,3,56,0,41,4,27,8,23,31,35,16,18,36,25,17,11,36,12,36,46,6,16,6,14,14,26,6,19,16,14,19,25,0,33,9,21,9,25,17,16,1,9,34,14,22,23,18,26,35,23,26,15,35,21,13,80,98,74,48,16,34,8,10,8,8,1,24,38,12,10,3,6,27,30,3,35,39,6,17,2,22,11,26,25,21,16,19,6,23,29,32,26,14,42,5,10,18,11,35,11,26,34,25,27,0,16,19,18,25,2,38,12,1,32,47,11,16,25,1,5,19,12,9,25,21,30,19,27,38,41,38,85,98,79,29,33,19,32,18,9,13,34,18,14,9,35,4,20,24,3,39,65,39,35,35,19,29,18,20,26,13,13,27,21,22,41,23,23,16,35,70,24,22,26,10,30,7,31,24,37,27,10,37,29,40,38,38,24,35,16,42,41,48,43,48,75,35,15,10,21,44,45,45,69,40,48,58,50,27,47,56,21,26,107,146,176,157,149,159,145,143,167,164,123,130,177,179,179,200,173,176,166,216,206,193,208,226,190,217,203,204,212,190,172,182,200,184,173,155,192,136,158,144,127,155,166,178,195,197,209,187,161,174,169,174,169,165,175,158,144,168,177,173,179,165,206,118,15,0,28,2,5,24,14,22,3,38,19,16,0,2,9,14,12,13,32,7,5,14,34,13,253,229,208,220,238,194,169,137,217,198,199,237,243,196,188,165,219,168,139,114,95,201,226,164,110,110,142,141,113,144,127,150,204,200,152,162,161,187,155,155,134,112,155,158,227,205,182,229,228,211,241,248,247,216,243,250,250,255,245,254,241,253,245,248,245,235,246,244,255,253,241,241,228,221,247,230,236,171,238,216,34,108,130,141,119,85,68,76,111,92,80,87,110,59,48,81,77,69,74,101,104,120,102,52,89,67,94,76,42,93,56,69,67,67,52,67,108,53,74,86,90,138,134,104,61,170,135,61,77,78,113,116,158,170,222,241,149,202,163,95,82,146,131,74,128,162,164,224,237,183,131,134,163,244,206,155,120,179,220,201,199,136,100,202,126,50,121,80,116,56,108,150,79,102,71,74,72,70,58,48,106,111,156,139,107,114,116,121,94,100,81,84,124,127,94,65,138,159,136,122,65,58,63,50,28,13,41,81,97,71,77,40,79,105,62,68,58,20,57,67,74,37,44,60,34,59,120,127,141,192,143,173,155,131,139,106,71,74,96,98,83,99,108,129,130,150,170,156,168,146,190,196,244,239,249,246,249,249,255,249,244,255,249,253,240,227,232,229,248,234,212,166,130,89,109,63,44,22,1,19,13,12,4,8,38,32,8,18,14,39,43,49,56,39,31,48,54,36,50,50,20,102,132,48,61,74,22,22,25,54,105,86,78,64,87,107,140,118,156,146,86,78,107,110,97,131,153,113,111,133,125,119,107,91,128,110,123,93,98,99,128,103,101,104,124,129,94,131,114,42,57,30,7,31,39,45,31,30,26,38,27,55,53,54,34,41,62,60,64,64,70,66,97,74,85,91,84,23,49,41,28,25,50,49,69,8,32,19,34,9,26,37,107,116,140,85,47,79,65,72,76,76,66,63,98,75,101,65,84,65,39,47,28,18,21,42,14,18,29,11,17,45,29,30,18,17,15,26,16,23,20,24,28,13,38,10,18,8,21,14,21,27,18,14,25,21,18,11,3,22,23,14,53,39,18,30,4,24,44,31,15,38,28,28,29,22,15,26,8,6,27,22,26,23,46,95,89,22,8,16,11,14,3,3,26,39,4,12,25,35,17,15,24,32,29,18,34,17,15,23,24,23,3,23,23,37,19,8,11,10,23,18,31,19,12,11,10,15,9,30,13,7,22,14,20,9,34,31,22,31,38,18,13,4,43,41,41,8,24,23,7,25,22,8,38,8,21,12,14,13,49,79,79,67,36,20,40,15,22,22,4,50,31,11,14,39,12,16,20,27,44,33,34,6,23,24,22,34,17,29,16,27,6,23,43,20,10,17,15,25,9,27,12,21,5,4,34,37,34,26,28,22,14,25,20,11,11,12,20,20,13,13,18,10,30,27,19,14,29,30,43,16,42,25,32,39,23,42,38,21,27,30,104,203,170,205,206,211,181,176,182,178,183,155,167,167,171,157,165,161,183,177,171,211,230,202,197,204,209,216,206,218,215,214,229,216,193,207,190,181,188,182,170,174,183,164,172,190,191,185,199,175,199,176,150,163,186,159,196,182,196,190,161,205,206,97,22,21,10,37,21,10,13,22,8,32,37,12,0,5,31,23,4,20,15,8,22,24,16,9,245,231,227,250,249,236,220,187,215,242,251,236,248,241,249,246,231,247,247,252,181,235,241,185,183,206,174,182,140,114,117,135,193,149,157,182,179,148,145,138,166,187,198,160,162,138,136,218,167,176,238,236,228,213,217,182,228,251,247,239,251,255,253,254,233,242,252,236,250,244,219,184,154,179,203,235,219,179,232,189,61,83,96,84,100,100,148,104,148,83,80,58,80,61,81,115,88,98,68,88,86,123,99,71,68,82,92,74,52,85,77,64,70,82,43,58,115,78,82,103,116,145,164,100,70,145,101,102,114,84,64,72,120,113,200,252,213,210,193,119,46,131,164,105,142,153,152,202,249,183,178,143,206,230,209,197,174,223,245,222,217,192,124,199,122,88,87,84,86,72,100,98,69,93,69,98,109,99,105,88,146,107,81,107,92,113,124,110,80,39,86,61,63,102,83,161,145,127,110,104,48,101,103,73,58,24,22,58,58,44,71,24,42,50,32,29,50,66,42,67,81,90,71,40,29,38,99,146,154,124,103,145,113,114,123,135,138,118,151,146,182,159,214,253,239,243,243,249,254,249,243,255,231,245,243,252,254,231,245,251,217,207,167,148,152,101,61,46,17,102,130,38,1,2,2,11,19,18,25,39,37,17,33,62,41,61,68,54,74,40,55,76,123,84,30,33,42,56,60,89,127,143,172,88,21,88,49,56,29,28,61,46,116,106,140,156,131,135,177,122,79,82,101,131,92,132,126,111,103,121,128,107,111,100,105,89,61,90,123,96,106,89,107,112,115,107,124,77,43,19,30,48,41,48,39,18,41,31,41,15,38,24,26,54,61,72,62,64,60,71,45,54,68,72,42,68,64,46,32,51,14,34,32,52,50,33,34,73,49,81,64,76,105,96,154,144,49,30,82,83,66,64,75,96,65,115,58,39,69,55,33,62,22,21,35,20,46,31,36,15,37,13,15,20,20,39,29,13,21,24,4,18,15,27,14,26,15,30,22,28,33,23,14,15,7,25,8,13,18,14,21,11,13,1,15,19,1,22,27,4,21,25,31,26,30,10,26,33,25,17,11,5,35,16,47,88,79,49,22,29,4,5,12,17,21,37,33,0,13,41,11,22,10,22,4,32,40,19,20,15,12,27,25,19,12,18,20,30,27,15,18,30,20,9,25,5,22,19,13,18,22,9,23,42,16,34,26,42,24,38,30,37,20,35,34,43,16,1,25,19,32,15,6,2,30,8,6,9,20,2,20,49,46,85,63,12,14,17,7,30,8,43,29,39,36,26,37,6,26,35,25,28,19,12,18,29,16,11,21,8,0,22,9,35,16,12,4,5,12,39,13,22,27,45,23,6,25,32,6,25,10,35,12,18,10,6,26,9,16,16,5,30,24,30,29,50,25,9,29,34,21,13,30,15,21,12,30,12,18,28,20,69,184,195,193,177,168,198,183,179,162,166,164,182,178,138,161,134,148,171,219,175,181,164,148,166,154,156,186,197,194,198,172,192,179,198,205,215,194,190,205,213,202,197,191,172,158,192,203,197,217,216,191,199,213,221,213,192,219,230,220,222,199,201,197,127,20,12,5,17,1,3,8,10,11,6,18,15,19,7,7,22,5,17,11,24,34,2,13,4,208,162,150,199,206,211,113,99,127,179,204,244,234,238,243,243,230,251,239,241,129,217,229,122,140,169,120,197,90,108,118,135,169,130,143,182,188,117,52,131,140,192,213,118,134,81,100,170,84,61,112,104,148,95,98,104,146,199,198,182,192,230,255,240,249,255,253,254,243,234,222,136,82,134,139,134,182,191,252,177,45,95,67,82,86,86,143,106,101,108,84,73,81,55,85,90,87,75,60,78,82,121,97,63,55,60,76,68,49,121,96,67,61,78,61,61,53,124,111,100,65,105,93,113,45,134,79,102,120,54,93,75,121,121,171,211,121,160,158,50,47,109,186,116,114,107,103,162,162,137,125,132,161,199,184,212,139,183,241,192,206,191,137,158,139,84,103,82,125,76,122,104,71,92,58,85,94,113,80,122,139,104,135,143,97,88,116,65,42,46,34,26,54,57,89,123,111,81,53,76,65,102,129,97,96,52,49,17,17,65,53,44,69,45,62,61,23,58,33,39,67,74,71,49,46,78,121,141,130,151,154,168,174,184,221,255,249,244,248,239,254,250,252,255,228,248,253,218,226,244,254,235,239,219,201,190,171,136,110,92,43,8,3,11,23,4,7,16,12,65,105,18,12,14,30,33,21,28,26,58,57,45,60,142,77,131,98,57,49,75,38,64,142,136,71,32,56,117,89,82,84,94,97,64,37,48,26,30,16,41,87,78,135,116,121,133,117,136,133,92,62,74,109,100,99,134,81,125,122,124,126,84,85,79,101,133,77,106,102,110,125,101,84,116,119,79,38,30,21,18,72,70,63,72,8,28,6,56,52,30,31,34,50,86,81,91,70,65,76,70,86,95,81,45,94,81,67,68,41,28,30,39,58,55,60,43,73,76,104,104,72,69,71,61,128,171,133,63,45,42,80,96,90,72,54,68,78,40,82,50,43,44,16,22,34,17,30,14,15,15,22,6,46,11,10,49,27,13,10,15,12,28,14,44,26,18,4,12,26,31,20,15,17,29,44,4,16,32,18,22,13,8,19,48,25,13,42,30,13,14,40,9,28,10,25,22,30,20,16,21,7,32,32,54,28,83,112,56,7,25,23,23,14,30,21,32,2,10,11,36,11,47,14,2,14,5,10,3,2,39,28,33,4,8,21,32,17,25,13,5,15,20,5,49,9,30,3,14,37,30,29,17,10,40,15,26,56,54,24,21,31,24,24,24,36,24,30,15,15,60,12,26,21,11,7,22,31,3,3,10,5,26,70,114,88,65,26,17,28,37,6,35,51,27,28,27,5,29,31,30,65,51,50,7,21,47,21,35,4,0,13,43,24,15,22,33,58,14,29,18,20,19,5,21,16,4,11,11,27,41,19,13,49,20,19,12,15,25,5,21,10,51,30,28,17,35,10,10,20,42,32,27,24,23,3,43,8,16,9,15,36,145,188,176,179,134,160,154,145,130,131,152,204,217,200,175,141,162,128,184,213,222,213,194,166,148,145,117,154,132,121,118,142,142,132,145,134,169,142,142,175,164,199,156,137,124,127,144,192,216,215,196,196,192,207,222,209,228,225,206,223,198,198,184,212,126,11,4,5,9,0,24,21,38,7,20,25,20,7,12,9,23,21,6,25,10,13,33,17,23,178,97,46,54,56,86,61,49,82,114,150,227,191,125,184,186,220,210,166,159,61,159,225,90,37,67,105,143,78,58,85,132,129,112,113,142,155,116,91,113,110,119,146,142,98,89,143,181,117,80,53,89,111,64,50,65,119,110,108,155,155,178,254,249,249,222,249,247,243,231,248,142,77,87,64,85,133,155,240,147,38,95,39,82,27,65,82,97,128,106,93,97,53,59,91,110,87,73,96,64,68,78,78,107,34,49,56,69,68,72,95,79,75,59,66,46,82,55,63,61,47,36,61,76,51,84,51,25,63,53,85,80,82,86,65,77,61,27,12,12,16,36,79,40,53,19,45,51,50,24,31,70,70,90,95,136,109,109,156,110,145,144,108,97,55,47,63,62,85,92,47,38,44,61,37,47,52,45,99,122,127,93,104,95,52,66,88,115,95,65,56,31,38,50,64,66,94,89,108,80,39,89,79,96,115,103,97,48,33,7,46,17,17,61,63,76,85,63,72,93,66,100,150,132,191,224,237,251,247,250,255,243,251,247,251,246,243,251,232,247,254,251,230,233,202,222,187,76,40,117,201,177,135,142,170,166,141,77,72,31,25,31,15,28,12,20,6,0,19,69,93,117,57,47,35,58,53,52,47,95,129,83,77,103,84,133,134,60,41,63,40,32,62,71,66,64,38,52,77,53,56,60,93,59,29,74,61,55,16,117,124,150,168,130,134,131,131,140,126,131,103,71,128,120,118,97,95,111,113,113,78,89,96,78,55,112,106,96,89,95,117,94,115,110,72,22,26,39,15,46,65,119,120,24,1,41,12,26,60,6,48,54,73,68,82,70,64,60,70,81,85,66,80,65,90,99,77,88,35,31,41,29,19,33,49,53,61,74,76,95,86,51,88,58,111,149,165,87,44,67,65,111,78,61,76,97,73,70,76,57,76,47,37,20,21,14,28,8,8,26,22,4,21,22,35,29,3,23,37,18,8,25,21,14,5,27,10,37,19,31,33,41,11,9,16,19,28,33,15,7,29,26,13,2,17,11,20,9,28,44,11,21,34,16,26,19,25,18,40,24,10,19,14,16,9,79,115,66,20,31,39,12,8,18,19,25,18,11,34,36,12,3,29,35,19,21,18,14,37,5,42,20,25,23,8,36,26,8,13,21,36,30,36,9,20,11,26,33,28,18,25,21,33,17,15,7,23,23,40,41,13,46,45,10,35,16,21,12,8,9,30,16,16,33,23,8,7,44,5,18,33,13,54,48,77,75,37,36,24,12,17,26,17,15,32,19,10,24,30,45,20,28,16,23,10,14,37,10,30,25,7,35,8,36,14,32,18,6,20,13,21,18,17,13,26,41,15,46,11,21,13,34,28,25,15,22,32,17,8,16,2,9,14,14,3,8,20,16,35,12,5,20,17,32,17,10,27,20,24,44,86,155,124,176,129,122,122,135,124,143,169,210,254,247,243,229,248,196,180,224,250,243,250,249,220,190,126,79,76,74,128,128,90,110,103,100,125,133,91,95,114,98,93,77,74,89,60,65,110,129,157,153,143,147,151,132,164,155,165,168,165,172,165,139,166,95,22,10,10,0,19,15,11,22,27,20,13,11,15,19,18,11,13,18,24,11,30,4,4,42,230,147,92,104,118,82,135,130,171,192,197,225,207,150,216,180,210,195,157,158,84,179,213,50,59,55,105,144,76,73,80,85,95,45,88,106,141,137,124,106,76,106,194,165,144,106,167,237,157,84,80,115,139,87,142,93,59,44,64,157,177,234,244,248,235,255,235,252,250,250,254,127,72,124,121,98,48,101,159,29,28,62,84,82,44,57,53,55,116,114,119,113,96,82,77,100,99,86,90,103,81,39,37,54,57,43,74,74,57,68,102,87,73,71,81,64,83,93,53,68,90,71,94,151,129,188,163,42,59,102,110,88,75,75,57,79,89,131,172,139,201,176,214,161,133,111,142,171,128,112,71,47,73,64,44,78,61,57,74,96,75,95,85,71,20,60,45,37,52,41,31,62,42,52,26,53,17,40,128,125,129,91,73,66,45,87,90,131,94,57,50,33,61,37,37,50,112,104,78,49,14,20,56,66,49,55,74,31,13,11,17,22,77,108,182,178,209,223,255,223,249,255,243,249,245,239,253,247,240,254,243,239,242,243,240,231,213,213,208,200,187,178,164,172,174,168,107,8,0,37,151,154,82,50,61,66,58,30,35,35,24,57,96,102,102,147,110,39,64,120,122,154,114,38,93,78,35,65,35,68,115,69,35,50,50,95,85,68,42,51,43,65,40,68,70,67,11,34,78,53,113,99,135,100,24,88,84,47,84,167,186,156,150,139,131,141,106,84,129,151,117,115,137,94,91,97,146,121,94,86,66,73,53,72,76,79,77,118,101,123,122,105,69,44,12,11,53,27,41,94,102,84,84,47,13,15,38,68,47,55,35,73,92,78,69,88,89,69,86,72,80,89,87,67,103,45,40,38,50,35,28,15,27,62,77,50,67,69,80,81,75,68,50,64,52,74,145,185,103,40,75,61,90,61,72,52,80,68,70,43,23,29,7,20,18,11,24,3,30,27,25,23,22,13,12,15,12,34,12,6,27,19,28,10,28,31,30,20,6,25,42,17,23,15,10,9,10,12,17,24,8,30,20,13,14,11,23,50,21,33,4,33,26,34,20,18,23,37,26,7,27,39,16,20,0,57,93,65,23,19,17,11,31,4,20,19,37,25,18,31,19,32,15,21,21,26,29,36,11,11,25,21,35,16,22,12,14,23,10,24,24,9,27,20,31,21,8,22,16,22,38,15,38,26,34,26,32,24,23,38,38,23,14,12,40,26,26,24,10,10,20,13,4,12,15,45,22,5,38,32,7,27,11,37,64,96,74,49,24,25,13,13,11,24,9,41,24,42,16,18,46,43,24,31,17,25,36,20,5,17,46,25,31,41,1,33,21,13,18,35,13,33,41,29,11,31,5,10,10,19,32,20,5,3,41,8,27,25,10,47,22,28,23,4,16,13,22,18,21,27,46,15,14,52,4,44,26,25,24,25,33,93,118,96,105,81,110,113,121,150,161,210,194,222,241,226,221,192,164,218,239,235,230,224,208,178,96,66,57,84,130,153,126,118,124,144,172,139,121,88,38,36,23,18,63,50,30,41,40,43,52,49,42,27,39,27,34,57,85,55,144,152,196,167,134,90,26,0,16,0,25,26,23,28,0,46,43,20,19,5,34,23,23,9,6,28,5,17,15,24,219,188,100,162,153,149,163,180,182,230,199,249,188,144,192,188,237,208,166,194,103,194,221,76,54,58,93,127,83,47,40,40,40,33,53,73,91,157,116,88,106,167,216,206,167,114,150,246,162,104,74,124,133,150,151,84,27,8,40,135,242,237,241,227,245,223,203,175,168,154,190,102,117,167,138,106,79,102,138,77,65,67,55,95,55,67,65,67,77,75,100,95,98,67,78,68,106,67,137,125,87,58,31,39,25,67,62,72,98,68,89,108,79,74,59,63,121,112,54,89,138,65,158,164,211,253,229,171,133,147,143,68,73,69,144,237,237,247,247,238,241,250,228,248,250,250,255,246,252,238,249,207,166,71,41,45,4,47,34,27,41,25,27,37,35,49,32,57,42,25,27,31,58,37,27,46,29,29,118,129,91,74,108,35,58,86,58,59,35,41,23,19,29,9,65,34,36,41,37,40,39,86,83,91,116,100,148,169,203,222,235,255,246,243,254,252,242,226,242,243,242,238,216,248,250,230,237,222,208,180,164,173,153,134,126,172,184,147,140,160,176,140,128,116,88,108,46,2,28,10,74,62,63,52,77,92,68,97,128,145,136,165,152,201,238,225,222,95,57,83,133,124,43,45,49,91,55,57,40,51,66,55,86,32,34,41,75,105,30,60,94,105,107,97,97,109,45,63,30,74,100,94,126,98,40,90,56,18,85,154,178,146,145,131,150,150,78,46,83,128,89,115,115,120,106,87,74,69,35,85,89,79,81,100,55,82,88,108,102,71,60,22,26,25,34,10,57,90,96,103,42,47,40,32,12,0,30,25,60,54,63,64,74,93,62,95,76,98,87,82,84,75,66,44,26,33,32,14,32,11,15,44,56,77,64,71,68,80,85,88,93,81,54,90,68,48,101,157,173,99,25,49,44,67,71,78,59,79,100,74,39,47,13,29,30,7,28,26,17,25,30,36,25,13,14,22,24,21,44,9,20,10,28,20,4,33,0,9,14,14,33,19,13,16,31,24,40,23,27,19,11,23,46,9,41,30,29,25,26,28,20,15,34,13,23,34,30,7,27,26,21,34,25,45,21,37,86,94,66,7,20,6,11,41,26,25,17,22,5,19,17,7,15,23,21,18,52,26,18,16,48,34,16,19,28,30,42,25,17,9,41,38,16,30,26,30,15,25,24,9,13,39,22,40,26,28,31,33,37,21,34,24,35,20,55,13,9,31,21,15,25,26,27,7,26,41,32,40,36,16,18,17,21,37,19,75,91,51,42,23,20,25,36,25,7,23,31,38,4,26,43,46,13,24,34,13,9,20,43,41,25,35,33,12,25,21,9,39,30,5,9,10,30,40,36,19,7,28,12,28,19,14,16,11,59,6,25,18,11,19,13,20,37,8,14,34,31,37,14,47,18,31,38,11,42,13,17,46,12,9,17,60,56,40,77,68,47,47,29,89,82,69,77,74,69,94,97,90,71,99,146,106,117,121,131,97,45,45,66,103,152,160,127,129,109,112,117,117,137,106,60,15,39,38,50,52,61,51,22,57,11,11,19,25,29,29,10,30,33,20,119,228,217,220,149,68,11,22,30,10,9,8,39,17,8,29,15,7,16,17,32,11,8,21,32,3,0,13,24,7,237,131,108,95,76,114,140,189,194,231,243,250,199,174,204,213,244,246,228,191,95,236,211,80,85,48,78,139,52,46,25,53,14,9,22,37,102,148,142,88,88,127,185,177,77,81,125,195,110,33,55,88,128,146,157,129,54,53,39,101,185,199,206,228,196,224,165,88,36,43,110,88,142,151,107,83,56,172,225,109,60,66,86,95,58,76,60,51,64,92,134,102,73,90,92,128,138,90,108,145,89,94,69,31,42,37,53,71,107,70,84,89,48,75,67,48,106,105,130,124,122,73,130,124,173,234,244,224,72,104,124,83,77,103,135,178,246,239,242,219,231,241,236,245,247,243,225,219,189,249,246,206,133,54,10,24,8,29,44,54,40,17,39,55,35,25,27,40,36,33,43,33,43,57,45,49,25,11,48,50,58,73,98,114,35,18,22,11,23,12,21,7,20,24,24,36,152,121,157,175,210,241,233,228,240,237,255,248,255,242,236,241,251,253,243,236,226,218,194,154,149,122,115,81,49,89,147,141,150,138,116,117,114,84,56,126,91,95,86,37,56,40,70,54,39,94,52,10,12,8,120,118,203,187,207,173,168,187,229,192,174,205,183,168,136,175,154,43,17,25,46,53,50,21,36,53,53,33,23,37,75,49,71,41,39,9,86,158,77,54,77,105,152,101,103,67,82,66,48,27,44,27,45,61,32,42,53,16,100,144,167,145,152,135,150,142,87,40,117,138,101,137,129,100,75,74,65,82,58,53,32,70,53,102,105,96,95,118,68,56,21,7,10,32,54,24,40,60,76,69,21,45,5,53,34,7,50,51,44,79,62,65,78,103,80,71,90,88,71,73,68,91,64,18,27,30,2,13,34,36,25,71,62,74,80,93,94,101,55,86,68,76,49,83,91,73,70,113,165,131,94,60,39,42,56,94,82,113,90,59,49,7,12,26,13,10,12,27,28,16,30,27,11,37,22,1,32,15,10,20,44,15,2,19,9,31,21,13,19,8,6,22,41,26,14,24,13,23,28,22,34,12,11,34,28,29,8,23,19,9,9,29,15,17,22,8,10,35,10,14,10,23,43,20,23,26,87,84,39,37,12,7,21,37,43,0,20,30,9,15,25,41,25,23,28,2,32,16,19,26,9,23,2,19,31,22,34,36,18,18,3,33,42,15,23,29,11,25,16,15,21,30,28,35,17,32,35,13,15,36,43,37,46,44,28,37,23,18,26,17,18,33,29,25,28,15,11,18,23,22,51,30,18,14,18,39,74,92,58,25,3,14,51,11,24,39,25,28,13,50,81,53,17,48,8,32,19,35,25,6,42,44,25,13,12,12,18,23,22,11,4,34,30,13,5,7,22,5,30,25,24,44,25,36,29,30,15,21,40,28,12,5,19,24,16,35,44,14,32,25,25,27,35,10,29,34,32,30,34,25,32,55,34,79,51,52,59,58,33,31,8,15,33,33,6,24,63,39,10,26,54,21,28,33,69,50,71,67,63,85,112,99,79,72,42,68,70,65,78,72,50,48,20,26,69,72,34,52,45,35,45,36,56,103,138,156,126,92,53,15,189,229,228,222,153,66,4,27,18,40,24,6,33,4,25,0,19,36,1,22,18,16,17,9,10,6,23,24,23,18,225,165,91,91,48,69,121,141,202,192,197,246,199,186,225,210,240,240,193,109,95,224,203,87,62,64,70,138,81,83,67,72,81,29,56,60,77,119,121,82,60,51,63,58,17,29,32,25,14,54,23,41,63,86,113,120,131,115,117,155,189,158,147,173,138,126,163,118,50,50,42,70,124,83,70,71,95,146,217,87,35,70,78,87,96,79,88,76,79,88,106,99,75,55,95,153,158,114,132,140,128,146,116,98,64,72,78,93,111,110,68,106,85,58,88,93,125,105,94,114,112,65,86,74,117,192,227,146,2,90,95,142,163,155,147,101,42,142,181,184,130,168,253,250,255,240,215,218,196,148,149,130,87,31,32,16,13,25,37,63,43,35,60,54,41,32,40,51,48,23,49,30,84,126,94,33,11,19,6,22,59,74,102,80,44,39,36,36,63,128,159,187,205,231,245,252,248,236,253,254,236,243,231,255,243,249,243,227,233,197,186,208,157,134,154,97,128,108,29,1,7,26,84,68,43,53,92,101,85,91,42,33,44,33,16,15,25,34,22,63,77,122,176,117,122,193,121,61,52,34,148,232,226,239,200,169,141,102,128,86,94,90,37,67,40,74,96,81,118,106,123,126,135,129,99,125,45,24,55,58,96,65,66,70,50,31,48,124,79,29,26,80,83,80,54,67,52,83,36,70,41,69,90,102,17,32,85,51,131,129,152,160,142,148,126,137,81,65,124,106,86,114,97,109,63,54,41,56,94,59,89,89,97,71,103,100,58,32,25,26,16,25,31,49,62,76,55,28,80,111,68,8,24,22,68,58,45,46,19,39,49,64,66,64,81,68,65,59,93,80,66,28,20,9,19,5,7,46,53,85,104,89,107,100,85,82,107,71,92,60,91,57,85,78,86,80,47,72,103,171,187,102,52,5,78,51,88,63,74,50,44,40,36,17,3,16,11,24,8,36,32,22,34,37,47,14,22,32,5,33,13,37,20,21,30,32,17,18,43,16,4,17,8,32,54,29,19,23,12,12,30,26,6,35,25,24,1,9,7,25,33,29,4,32,50,15,26,18,34,5,31,16,22,32,5,32,57,96,59,25,7,13,18,8,20,10,26,16,23,26,45,39,10,25,24,39,10,16,27,14,16,33,15,36,14,36,27,25,42,29,16,23,25,7,8,35,24,20,23,23,31,35,5,23,33,39,14,37,4,24,33,44,43,16,28,39,19,24,18,7,3,48,21,3,8,33,20,5,19,31,33,15,41,18,20,12,47,74,90,58,12,8,35,18,36,30,9,46,56,28,36,18,8,55,36,3,18,34,50,12,19,29,25,26,30,42,41,18,18,25,7,13,56,7,51,25,9,30,24,22,21,5,4,24,11,26,33,27,21,21,39,12,27,11,14,41,8,19,18,34,25,31,8,10,33,23,7,22,12,24,49,58,60,78,39,60,25,14,19,13,34,17,38,37,20,6,82,67,29,50,38,25,24,31,51,26,65,76,49,88,64,28,50,57,11,38,77,35,56,63,50,65,68,111,89,91,94,72,42,34,34,89,135,201,230,222,230,216,129,96,231,230,245,236,177,82,4,3,16,21,10,17,22,20,5,4,33,12,0,0,0,24,32,15,17,24,21,5,9,11,251,221,190,140,68,92,131,161,159,139,160,216,159,113,150,194,227,202,113,98,82,194,179,117,106,77,106,129,113,148,150,152,118,111,112,131,123,119,95,72,77,65,48,79,74,59,64,74,69,86,75,40,71,79,76,88,139,169,199,167,145,112,87,127,89,86,98,126,101,97,55,43,84,72,72,63,65,85,106,35,66,104,99,134,153,114,48,62,73,83,88,72,76,58,68,134,135,99,119,169,123,130,113,131,111,82,79,104,110,106,98,128,64,45,79,112,160,95,63,26,25,63,66,59,40,50,46,8,19,6,24,55,156,103,80,7,24,32,26,22,20,25,71,120,86,106,140,178,145,96,86,124,94,17,34,17,10,27,37,39,33,38,31,25,30,55,46,66,47,39,19,20,88,155,122,114,94,90,95,115,145,154,189,228,224,229,252,250,247,254,254,239,248,255,249,244,239,250,245,237,219,182,157,157,142,109,98,65,41,6,21,3,18,51,82,42,71,89,14,9,53,65,79,14,38,39,49,61,58,65,58,53,64,74,27,40,7,63,109,103,138,163,166,151,131,149,150,61,52,17,94,142,124,109,66,70,66,43,81,77,69,115,101,85,73,100,148,180,172,193,197,174,130,130,154,114,66,74,92,33,75,62,44,74,41,70,21,37,56,75,39,75,77,52,80,60,89,105,118,106,95,105,125,113,61,44,93,46,53,100,140,106,130,113,125,155,95,86,137,115,83,84,76,72,55,53,58,58,101,80,72,68,86,83,54,45,38,50,34,61,40,27,65,78,139,156,118,137,139,122,149,57,69,62,65,63,31,31,52,54,57,71,64,68,64,89,103,101,78,44,28,14,45,19,31,50,49,83,85,102,110,103,98,84,55,81,79,68,70,89,61,73,65,82,88,52,65,59,65,105,175,166,117,55,35,61,83,42,45,44,17,33,16,14,43,17,36,25,27,18,26,23,11,7,17,3,12,21,18,15,22,42,24,41,35,20,30,2,14,44,33,25,9,44,9,31,21,27,21,9,20,30,14,18,17,15,25,20,19,19,9,24,39,12,23,23,25,18,10,38,33,37,3,33,17,17,46,77,92,16,12,4,7,16,49,30,16,19,23,45,53,33,40,25,15,14,39,7,35,17,22,7,49,24,15,18,13,22,31,38,17,31,13,27,36,11,14,13,11,17,9,49,25,34,26,23,39,11,23,33,31,44,58,37,32,41,23,10,41,22,18,26,29,18,44,31,10,31,25,18,22,35,31,12,10,28,32,44,84,105,52,14,17,12,23,1,14,23,25,37,53,22,18,27,34,52,19,18,18,14,43,8,25,23,33,3,17,26,11,14,45,1,21,11,17,20,10,32,8,4,27,35,22,21,23,35,35,14,13,37,6,47,2,23,19,19,47,32,29,4,20,36,29,21,40,16,25,14,7,7,32,37,63,64,42,31,21,11,21,30,59,20,15,19,11,15,66,76,55,75,27,25,32,39,31,61,31,84,73,69,50,66,51,61,29,56,53,49,85,93,73,96,148,216,212,238,209,208,189,168,152,173,187,240,207,225,202,183,172,149,239,240,247,222,179,132,22,11,34,5,27,15,0,32,9,8,8,38,26,9,2,29,9,5,5,14,19,42,10,38,254,240,246,234,147,130,145,170,171,112,125,165,152,105,146,160,151,132,127,131,146,151,152,123,133,133,145,138,122,171,166,161,191,211,235,235,212,216,190,227,196,205,185,185,198,193,229,230,224,214,201,212,199,194,193,197,207,206,204,191,161,162,176,189,129,73,51,82,80,87,39,50,88,58,79,91,82,112,145,95,86,121,141,146,135,120,148,100,40,54,58,63,78,64,59,121,86,103,123,131,100,102,130,153,133,111,110,89,96,79,43,72,42,46,35,49,115,26,52,44,57,90,38,102,63,53,19,23,22,40,32,21,43,25,41,28,52,14,18,37,15,19,31,23,30,53,74,67,47,62,56,126,73,12,22,9,29,17,9,38,16,12,20,28,40,44,57,78,28,26,31,105,165,227,207,213,241,234,251,240,237,246,226,240,241,242,231,242,236,199,211,219,159,174,125,128,84,108,110,72,36,15,8,30,23,32,11,22,33,37,7,8,31,8,31,9,7,19,7,9,24,12,29,67,56,86,86,106,93,101,84,69,70,54,20,13,4,0,20,27,26,53,40,13,30,19,28,32,36,10,31,62,63,45,39,90,144,137,179,148,137,133,99,71,67,58,66,65,90,72,77,77,68,83,57,54,40,70,49,27,55,66,92,30,106,70,11,66,157,64,58,105,130,63,48,41,66,32,64,61,47,37,23,48,78,57,39,47,30,70,113,112,105,121,147,178,93,87,78,66,62,68,94,72,57,66,86,70,103,99,102,79,58,40,24,32,36,26,38,38,64,66,100,143,131,140,156,139,171,132,135,128,84,99,80,74,49,31,35,40,42,66,68,88,67,89,53,47,38,32,37,29,24,29,33,60,64,98,90,101,83,103,114,79,49,74,86,64,91,60,73,53,69,38,72,80,82,70,42,40,104,167,188,144,97,56,28,60,24,6,27,12,23,39,18,44,24,14,25,27,22,37,26,33,36,19,18,31,12,8,50,28,38,29,3,11,26,26,17,34,19,33,32,9,17,24,15,35,35,42,27,34,25,15,14,43,24,26,10,31,23,30,18,13,22,22,15,14,18,15,40,1,12,9,32,28,25,79,99,32,6,23,3,23,10,25,16,13,23,13,34,16,17,24,39,21,25,14,25,4,16,15,41,24,24,19,11,27,0,40,4,28,11,17,16,40,28,17,20,4,12,29,54,21,27,18,39,24,46,32,41,35,33,24,40,18,36,14,12,27,18,14,4,18,32,10,25,29,24,22,36,27,26,19,25,41,40,34,63,88,64,41,7,15,24,3,18,31,42,48,36,35,12,16,6,26,21,26,36,28,21,41,18,31,28,5,5,31,21,25,14,17,5,9,18,23,26,32,28,44,16,14,23,22,13,26,13,16,9,17,0,9,29,12,39,25,38,47,33,44,20,29,14,25,26,16,50,31,29,24,65,65,101,96,97,121,132,167,167,180,169,200,169,170,139,163,195,183,161,165,112,92,125,131,136,69,88,91,78,93,80,67,63,42,35,45,57,61,47,68,48,131,165,218,241,246,243,248,243,225,225,214,234,240,224,185,148,144,139,133,215,238,242,231,219,95,17,9,8,14,36,11,16,20,19,11,14,26,16,0,8,26,3,31,17,11,9,14,10,23,250,252,240,247,165,132,130,153,141,133,137,157,125,169,173,138,130,134,104,160,142,173,191,165,193,192,214,181,180,224,225,233,232,241,254,239,232,251,239,255,235,250,230,251,251,253,245,255,249,234,244,249,246,254,255,249,246,246,243,252,246,253,233,246,254,108,100,59,86,113,74,122,114,141,157,188,134,205,216,111,121,208,201,207,188,180,126,104,60,63,44,65,80,52,64,88,87,125,135,163,113,93,122,151,183,126,111,80,43,45,67,137,87,72,59,53,73,63,131,116,164,196,171,226,251,245,243,220,235,213,235,226,232,215,214,247,240,240,239,227,230,207,230,145,177,165,180,208,180,203,181,219,190,227,229,216,240,230,229,222,232,223,137,234,218,142,248,250,240,247,244,225,247,246,253,241,243,248,252,246,238,231,244,193,180,184,182,183,151,38,79,126,144,125,108,72,63,24,23,34,3,12,6,15,21,42,50,66,55,64,77,81,91,129,72,98,78,72,31,1,28,52,92,125,174,101,109,85,71,21,53,18,40,20,39,19,37,41,24,27,49,33,38,33,40,99,98,50,25,22,124,149,129,114,27,69,71,103,110,68,63,53,2,19,32,3,18,8,16,29,8,16,100,42,43,92,41,39,47,35,46,78,161,152,169,88,29,114,140,103,93,84,77,50,47,45,37,37,49,60,32,33,43,45,49,38,75,23,51,80,70,79,89,92,107,141,72,63,98,82,89,94,116,82,57,80,86,107,101,103,72,52,52,25,34,50,28,41,66,75,123,111,116,148,128,150,129,116,105,122,86,75,61,68,71,76,41,23,15,56,41,67,80,90,64,48,19,21,17,20,18,31,50,77,91,92,86,85,92,82,95,86,90,82,94,77,85,124,74,72,57,53,73,74,80,71,65,76,41,58,46,94,187,208,146,46,31,37,16,17,19,52,21,13,10,23,34,30,26,34,26,21,13,29,14,22,23,35,16,26,21,28,13,29,39,6,26,30,18,36,35,7,27,12,29,40,31,17,14,30,16,28,30,28,60,30,37,3,44,29,12,21,28,14,14,10,18,31,35,33,6,38,4,28,34,32,14,87,77,8,15,8,23,11,5,9,40,22,21,22,14,42,32,33,30,29,13,25,18,31,22,11,34,15,5,39,9,15,19,17,19,7,20,3,30,29,12,14,11,14,27,30,24,45,38,34,42,29,23,36,12,40,21,17,22,43,22,21,24,28,12,32,17,20,43,35,30,19,23,32,43,27,15,17,8,31,26,42,38,45,113,86,48,23,23,30,18,64,13,13,48,34,39,18,26,17,29,49,25,22,28,4,12,40,23,13,4,0,31,16,32,0,41,32,18,46,11,9,35,41,18,12,22,10,38,17,39,9,29,10,12,31,32,12,18,14,47,10,32,37,25,36,18,22,8,28,31,12,29,19,43,64,69,100,219,236,242,232,247,247,249,243,229,252,246,243,250,253,254,243,230,220,239,239,229,162,97,125,116,84,86,76,46,56,41,29,73,56,81,57,81,136,185,171,163,237,233,241,240,233,239,249,243,250,205,175,176,168,151,141,227,224,212,246,245,111,0,2,13,34,13,26,16,32,12,25,35,11,8,8,48,6,14,6,18,18,18,14,12,20,203,244,236,235,145,111,159,120,155,115,156,167,152,160,123,165,157,147,159,174,220,246,237,234,249,242,242,243,255,239,252,255,240,255,254,250,252,249,232,252,254,249,255,245,251,245,236,244,241,254,245,255,242,252,255,241,255,252,237,235,254,228,242,253,241,187,126,133,135,150,141,154,183,160,195,227,201,164,158,102,144,226,225,234,216,183,124,78,80,78,56,74,93,83,92,150,128,117,120,165,115,123,90,92,160,124,88,75,83,57,99,188,182,128,150,103,150,92,149,183,195,227,203,240,252,247,211,250,244,252,226,244,248,232,251,239,246,245,237,242,246,234,248,246,236,239,255,246,252,248,243,216,246,243,243,251,254,254,255,242,253,255,255,255,246,252,244,240,242,243,252,251,247,250,240,244,252,239,255,250,253,242,222,253,251,221,234,251,255,67,83,224,238,235,162,136,67,52,99,94,144,186,194,250,238,229,252,254,237,251,253,235,250,238,251,245,236,237,207,78,90,236,225,167,115,50,88,75,53,55,88,134,175,217,242,231,226,250,247,252,240,240,250,218,247,245,250,221,96,22,102,160,108,58,15,4,13,39,42,76,86,114,173,148,178,167,164,159,157,175,133,96,113,117,128,124,50,39,50,51,38,77,131,135,126,60,49,62,80,22,22,47,26,55,53,79,43,84,103,96,103,122,114,108,76,18,75,62,43,73,62,71,79,77,94,97,77,51,60,72,93,81,108,74,71,78,90,128,83,60,13,64,40,52,29,65,53,102,81,97,116,135,120,107,115,120,126,111,78,59,75,59,67,94,76,52,71,61,54,46,37,77,77,23,39,4,49,21,13,19,51,86,93,77,82,81,90,74,76,73,55,93,68,71,78,99,69,72,62,69,61,64,75,79,89,57,70,66,81,51,65,47,87,170,166,58,32,37,11,24,21,5,13,13,30,19,37,29,15,14,13,27,34,46,40,23,35,16,36,19,36,30,19,24,35,21,12,33,8,29,47,37,9,21,22,30,31,22,17,20,38,15,24,23,29,29,16,23,3,28,13,37,27,26,13,38,16,31,29,10,16,11,9,19,5,14,12,28,96,28,21,39,21,22,21,15,43,37,29,24,50,23,44,15,14,19,24,14,15,18,9,30,19,26,7,24,16,20,5,24,26,16,9,23,28,22,26,29,9,25,22,23,34,23,38,22,46,25,29,35,41,20,29,18,45,55,51,35,22,35,17,34,14,2,8,49,35,44,33,16,29,31,12,21,2,28,5,26,42,17,62,85,90,46,22,20,18,11,27,52,45,0,29,9,11,25,11,9,20,0,37,24,14,14,12,21,20,14,45,28,24,23,30,22,20,30,26,21,41,1,2,27,24,22,20,13,18,33,20,10,15,22,19,24,25,8,21,23,28,9,33,3,37,26,24,21,35,11,41,25,22,30,31,109,245,246,248,236,255,252,244,255,247,233,246,247,247,248,230,250,245,250,245,250,224,183,141,134,78,70,20,18,43,65,68,39,66,57,89,75,108,136,97,62,91,135,190,198,191,210,240,233,235,240,228,230,231,223,206,234,248,234,243,233,237,105,7,2,33,16,6,22,21,15,32,9,25,23,26,1,5,11,11,33,16,15,16,19,13,4,142,159,185,168,126,137,136,137,145,148,149,155,175,169,138,152,190,205,229,252,243,236,215,208,210,246,240,240,233,221,206,196,202,205,207,220,206,206,219,205,214,223,160,210,211,221,226,199,211,201,175,179,188,171,190,198,193,211,204,203,191,205,214,204,165,143,141,112,163,164,150,143,146,127,157,151,140,108,80,54,122,132,121,150,127,82,67,9,110,104,121,46,80,82,122,114,119,73,80,74,128,98,26,54,93,81,87,55,46,81,50,173,135,91,118,83,130,127,109,127,142,122,143,219,242,226,219,150,233,179,158,208,222,185,242,236,233,249,231,238,252,236,254,243,252,248,245,253,252,242,255,254,252,253,242,251,253,249,224,249,246,255,255,238,249,253,231,234,253,240,249,237,245,255,243,251,244,254,244,249,249,225,239,246,241,254,240,219,191,34,45,177,190,212,193,217,235,251,239,252,243,245,246,229,237,238,237,238,238,252,233,223,249,234,252,247,224,220,156,27,78,189,156,186,165,196,238,255,244,255,250,239,254,255,251,248,243,247,239,255,255,253,249,239,236,242,252,124,16,13,89,135,125,140,172,203,249,251,244,242,237,250,243,247,247,250,238,226,248,255,246,186,175,157,115,81,65,37,56,4,28,42,74,45,71,34,50,77,73,107,79,84,76,127,129,108,104,138,137,147,137,152,82,93,66,30,83,59,35,66,67,82,103,109,103,62,90,79,75,94,98,74,71,87,82,67,62,49,34,18,16,28,23,53,70,68,107,105,123,96,106,115,116,122,110,83,76,67,35,18,83,62,58,86,60,60,41,39,41,41,52,55,30,57,7,39,24,19,28,58,78,90,87,101,76,88,70,97,71,36,76,76,69,62,53,62,36,67,65,69,71,41,50,85,64,60,62,78,79,101,64,44,44,42,60,52,40,34,19,18,17,27,19,29,14,26,28,5,44,38,33,12,37,8,24,17,14,45,16,16,17,19,3,28,37,23,24,35,19,19,37,0,34,27,4,14,34,38,15,31,20,31,6,32,31,18,18,21,49,27,39,26,13,28,14,30,20,27,20,21,38,1,6,31,20,21,17,70,81,53,23,26,29,24,14,53,14,43,9,43,32,7,16,36,52,10,41,31,25,30,25,26,21,21,13,18,18,9,25,21,46,14,35,24,50,6,33,26,29,36,30,23,20,19,35,27,46,21,38,15,35,28,35,24,31,11,48,19,26,26,14,29,19,23,25,29,14,30,14,8,43,18,31,26,29,57,36,29,21,18,13,87,97,84,30,18,28,28,18,46,25,27,33,4,23,12,22,35,32,8,11,27,8,30,13,24,9,9,52,14,13,37,26,14,22,25,20,9,35,17,53,34,32,26,11,16,2,25,22,32,36,20,32,4,9,31,42,12,28,34,9,47,29,28,35,40,30,20,17,46,11,14,29,106,215,234,252,233,217,217,235,223,229,203,239,220,232,227,237,202,224,235,253,246,220,191,135,100,28,53,73,21,35,61,79,56,49,74,97,112,112,128,85,65,40,79,147,166,220,220,241,246,243,246,254,253,247,238,229,225,249,228,220,244,237,109,16,17,9,12,25,6,27,0,17,25,30,3,15,0,15,8,13,46,23,12,6,9,21,18,236,231,214,173,151,131,165,153,185,164,145,145,175,157,184,210,228,250,246,250,195,135,113,16,61,148,109,104,55,53,38,2,0,0,5,16,22,23,25,39,20,15,45,24,31,30,25,41,24,25,56,52,41,1,22,11,53,102,99,54,9,19,81,64,76,92,76,96,79,62,54,52,83,70,81,87,43,42,57,49,52,26,27,58,23,23,53,47,81,56,97,82,77,87,127,127,103,100,96,109,76,68,71,72,62,64,96,62,74,40,71,92,32,63,44,44,76,66,77,70,51,55,42,82,91,24,10,38,41,30,7,81,107,104,151,146,168,159,189,182,215,234,244,245,246,248,228,235,232,242,223,209,192,183,161,181,190,192,128,83,119,154,183,172,170,131,152,199,212,207,193,203,201,200,173,172,126,110,170,108,108,79,107,109,64,75,60,72,94,26,29,63,131,211,227,241,238,243,251,245,240,250,224,236,225,237,209,184,163,178,148,114,113,91,93,71,58,61,24,12,70,216,245,247,247,249,236,251,250,249,254,237,239,240,227,240,247,224,213,205,153,153,138,117,44,37,66,51,35,2,92,237,249,237,241,249,248,245,248,232,245,248,246,246,235,246,246,230,217,212,193,89,41,65,73,86,46,44,35,63,88,140,69,60,53,60,71,129,78,84,96,91,85,74,37,64,62,33,35,52,56,60,35,45,70,38,76,70,52,93,132,119,124,141,104,74,81,75,119,118,143,124,96,95,43,47,32,13,8,30,56,54,60,90,95,120,90,123,115,98,91,101,91,92,52,69,54,61,61,51,60,67,63,83,59,79,51,45,59,52,29,54,38,30,56,49,73,92,93,128,100,106,109,105,111,95,78,79,50,93,76,54,76,68,23,40,67,61,48,67,71,72,55,104,98,85,85,94,68,29,43,10,7,3,20,21,29,38,18,24,24,4,34,25,19,48,36,14,19,47,23,50,43,36,37,13,32,29,23,29,16,50,11,13,29,27,11,21,40,12,12,34,20,11,26,11,37,15,36,23,15,29,6,22,18,34,16,21,31,28,35,27,21,14,32,11,11,31,13,29,19,12,33,33,49,44,18,64,88,68,51,39,28,55,19,3,38,12,24,25,30,41,29,24,40,29,29,30,17,29,23,42,27,34,41,18,18,20,27,15,13,10,4,3,32,9,7,9,7,33,14,44,31,52,17,33,7,16,14,26,36,25,39,32,22,20,23,3,36,34,23,4,9,38,17,25,2,28,11,13,47,23,36,18,31,20,43,28,24,34,3,60,90,106,86,49,7,9,36,52,42,31,9,19,21,21,25,30,6,30,19,6,21,14,11,27,15,30,18,32,20,36,26,41,9,20,9,32,8,32,20,21,39,18,11,33,18,30,19,57,8,26,29,9,23,16,25,20,29,24,19,31,14,41,23,26,29,26,30,30,26,37,8,146,194,205,202,190,185,206,193,203,185,224,224,222,209,199,202,209,200,188,217,177,169,155,143,124,138,152,143,138,73,78,68,46,80,80,88,92,97,100,106,89,71,106,227,224,239,246,248,246,247,243,250,224,246,255,250,251,255,246,203,243,245,136,0,10,7,5,32,3,20,30,1,26,31,26,5,1,4,17,19,8,15,7,16,12,4,10,194,241,250,251,242,185,203,211,235,194,123,133,171,144,198,254,232,226,221,237,134,96,82,38,18,56,41,23,26,9,22,36,19,11,14,21,16,22,10,30,35,14,21,4,20,25,5,22,23,65,61,87,39,2,17,14,56,60,58,34,37,21,62,58,64,79,130,102,80,54,31,43,88,76,54,21,38,27,58,36,55,47,72,83,65,57,31,64,61,71,51,75,60,63,90,112,100,81,64,60,67,79,74,71,90,54,102,73,41,40,51,65,23,18,34,7,93,37,65,38,43,17,25,47,46,10,54,30,73,38,21,122,135,190,229,225,254,245,252,246,234,239,202,214,211,182,169,152,121,109,120,91,102,108,104,143,115,105,49,15,19,91,95,101,32,4,52,54,27,22,47,42,23,34,6,25,4,32,11,31,12,19,19,54,54,61,79,113,110,25,22,11,34,105,105,90,80,110,112,76,89,50,43,47,49,32,25,6,19,3,11,17,60,43,54,9,31,105,39,6,39,130,158,177,158,178,134,125,136,136,97,95,49,63,15,34,20,10,9,20,7,20,41,50,9,38,105,61,34,46,77,210,204,165,168,170,199,147,161,157,87,151,97,119,117,52,52,30,27,17,39,6,9,0,95,136,80,50,17,43,99,150,85,61,38,52,42,40,54,19,46,67,39,41,35,66,30,59,79,78,83,131,105,152,117,73,151,89,52,63,115,126,106,81,72,38,51,66,77,114,79,73,51,24,20,44,24,12,40,47,57,90,74,84,122,126,103,110,103,104,90,80,92,72,76,50,74,62,87,77,110,76,61,89,77,74,52,41,60,44,15,41,52,33,70,120,123,93,110,112,114,90,102,101,82,69,101,75,64,83,70,84,67,47,63,64,52,34,69,62,66,62,59,83,74,76,76,49,35,2,12,31,23,36,2,18,32,66,7,34,21,27,22,10,14,41,17,36,21,7,18,24,26,26,15,35,33,24,22,30,28,5,10,29,18,51,36,20,20,28,24,10,51,38,55,18,33,31,16,21,2,16,19,56,48,9,27,17,34,9,33,27,27,7,31,33,10,27,26,27,25,42,27,24,16,19,11,36,91,88,34,54,32,7,16,29,27,27,47,18,30,32,11,4,18,0,1,16,24,20,46,15,5,20,0,34,34,22,20,6,19,4,7,27,21,24,23,11,41,10,42,38,36,43,36,16,67,26,12,41,16,37,56,26,37,14,20,25,43,22,23,36,34,25,6,12,17,30,15,31,32,8,31,34,27,5,14,8,33,48,29,33,43,86,112,68,25,14,35,32,48,25,25,14,20,30,33,38,19,28,19,16,21,42,38,9,38,0,29,32,39,18,18,15,19,14,40,25,9,39,47,28,25,2,6,14,16,0,35,5,26,26,5,30,11,25,44,8,41,15,38,17,23,26,19,26,30,20,23,16,12,14,41,131,186,165,161,169,159,168,152,177,175,172,176,174,203,197,173,148,154,125,149,141,137,139,126,96,153,176,212,133,84,60,37,21,42,94,74,50,71,93,118,77,97,201,247,235,249,221,222,243,250,252,236,246,247,233,234,252,232,192,209,240,221,120,3,1,0,21,2,5,19,6,34,31,26,17,0,6,12,24,20,10,6,1,17,22,25,15,237,222,255,252,255,237,252,228,225,161,113,151,129,189,216,255,226,186,163,190,181,207,191,134,146,153,89,108,127,151,94,93,81,80,90,109,106,81,85,106,85,82,98,115,125,116,119,84,139,153,162,184,107,69,95,72,83,103,104,67,34,60,105,81,105,103,128,114,89,89,73,68,63,78,84,84,69,70,68,87,69,36,71,95,91,57,63,61,73,48,58,60,75,56,68,82,93,60,67,25,18,40,50,53,34,47,42,50,14,13,33,52,42,38,28,52,122,91,130,133,135,161,138,154,153,124,119,122,193,124,133,194,233,227,236,232,225,181,169,164,116,123,143,128,106,118,122,97,82,75,86,71,68,60,75,60,65,51,30,13,35,58,47,38,12,22,16,26,27,14,13,24,57,45,72,86,97,84,93,101,81,91,109,126,111,89,88,108,136,48,17,17,45,39,26,30,16,17,10,17,6,10,46,22,43,68,96,61,70,100,98,97,88,140,101,82,73,128,87,38,5,54,24,16,19,11,3,7,12,28,21,31,28,44,35,38,52,48,56,48,9,50,131,138,75,29,105,112,55,45,11,53,48,48,53,5,13,15,34,23,28,7,2,14,29,3,23,22,36,16,25,41,58,47,62,129,50,22,63,60,38,65,64,64,63,42,55,54,73,47,69,82,94,98,123,136,158,170,162,139,129,135,143,125,120,100,81,36,37,52,57,72,27,49,57,47,51,64,59,33,45,4,31,10,51,14,12,59,67,65,88,82,114,123,88,112,114,120,120,119,83,62,56,57,58,67,59,72,45,75,103,85,82,80,78,60,25,41,41,28,35,59,45,38,81,114,131,109,117,127,76,101,68,80,57,80,75,74,110,68,77,60,54,29,72,37,45,76,93,93,82,57,87,83,68,31,41,18,9,9,53,18,32,2,5,54,34,29,19,16,23,21,0,33,18,15,31,6,33,10,16,53,27,54,39,42,15,13,22,23,19,23,17,19,4,16,15,27,3,16,23,26,16,40,19,14,47,31,35,17,28,41,24,3,35,42,41,15,7,12,4,39,9,13,16,37,46,10,48,14,22,5,16,21,13,16,39,23,31,75,47,42,7,28,13,15,29,12,52,14,17,16,32,20,39,64,37,32,39,39,32,9,48,7,40,27,10,8,20,18,22,11,6,42,24,12,30,7,29,27,43,19,27,40,24,9,0,36,12,16,24,9,41,47,29,42,22,25,29,21,32,28,16,26,28,7,6,12,23,11,6,23,1,2,7,27,47,8,63,23,20,21,29,36,84,81,46,28,17,41,8,15,28,12,15,24,23,30,21,10,24,33,21,33,19,34,25,13,12,10,6,12,54,35,25,19,8,19,31,25,26,27,11,7,20,16,15,23,11,38,28,4,18,14,7,46,24,28,30,14,13,12,36,31,42,21,20,18,13,15,16,4,50,109,101,148,115,98,133,142,122,119,133,129,142,142,126,136,133,146,103,104,98,133,87,98,69,25,86,112,116,88,82,88,62,43,37,12,8,6,27,61,82,71,104,205,216,217,216,222,220,232,252,251,226,251,236,233,253,249,205,182,187,212,192,103,18,4,16,34,23,4,23,11,17,12,8,5,5,3,32,0,12,9,14,29,30,5,24,26,255,252,254,253,255,251,238,243,238,157,115,152,135,194,242,244,235,179,128,221,251,236,241,231,192,242,190,184,236,220,189,133,173,169,176,236,248,228,223,216,163,247,247,248,239,239,252,252,252,233,233,245,181,140,178,169,121,132,115,88,47,96,108,70,94,123,141,139,135,103,62,76,61,72,60,69,80,76,67,68,49,61,50,113,90,39,38,62,76,71,38,35,16,25,61,90,77,61,57,25,24,30,23,38,43,70,117,121,137,97,98,151,141,151,91,127,237,189,200,220,207,190,155,220,190,135,125,92,123,94,124,122,176,171,148,143,135,53,42,56,16,96,78,88,86,88,62,91,84,87,78,102,62,50,18,18,24,22,23,30,25,86,74,100,64,20,104,135,129,131,131,150,143,143,150,151,157,168,155,147,134,88,95,58,24,38,31,36,46,33,48,35,29,105,116,138,132,134,118,121,128,142,137,172,164,171,189,158,133,119,99,81,73,76,48,46,57,62,62,53,14,57,64,65,90,77,114,91,127,144,105,120,142,139,149,162,117,124,107,84,30,30,69,65,48,24,105,59,48,38,20,45,52,70,64,76,47,59,9,41,4,20,5,17,53,23,72,42,80,50,40,23,76,35,54,39,58,36,31,47,35,42,49,85,126,140,156,163,144,153,150,153,147,136,127,106,95,115,59,75,35,24,29,30,18,15,28,30,25,37,59,56,53,40,38,23,58,43,38,37,37,16,16,19,60,42,53,91,112,117,113,100,119,121,104,97,105,122,88,68,59,37,66,59,61,63,49,79,85,72,94,98,77,59,50,19,24,26,13,48,37,78,64,76,116,130,89,69,73,95,78,83,81,91,61,67,92,74,60,48,45,64,56,67,56,58,65,64,72,60,66,83,45,31,34,37,11,47,28,11,8,33,16,36,61,48,64,35,17,20,45,33,46,16,9,29,26,22,13,40,34,31,27,13,21,0,48,19,30,32,21,28,16,21,20,8,15,29,37,35,33,21,6,38,18,26,14,38,11,26,37,29,8,12,30,24,14,55,25,12,37,32,10,14,5,25,38,25,13,15,31,20,48,16,1,43,30,28,43,88,81,42,16,23,50,24,43,14,16,51,9,10,9,23,20,15,15,51,18,23,37,28,15,51,21,32,18,14,7,10,8,47,28,21,9,20,19,20,16,19,11,11,31,40,19,46,52,26,25,40,27,23,26,38,26,22,19,16,26,16,29,45,4,7,16,4,10,17,29,6,25,34,21,25,10,32,28,10,29,33,24,23,13,6,67,119,84,65,80,37,16,22,6,18,23,21,12,28,28,13,24,35,24,36,44,25,16,18,32,33,37,7,27,20,33,22,70,35,30,31,14,22,12,5,26,23,18,10,28,28,16,30,10,28,34,7,20,2,39,40,41,22,20,0,32,27,35,22,18,22,24,36,87,124,111,148,119,112,122,142,101,122,117,119,140,126,103,125,127,121,134,105,135,128,113,121,95,116,90,100,118,125,106,138,111,72,46,76,105,84,78,92,73,87,155,232,237,217,235,244,236,249,243,245,239,240,233,245,220,249,215,211,198,216,205,110,2,1,12,25,8,14,6,24,22,14,10,16,20,7,21,8,0,25,7,0,15,15,25,18,200,151,156,190,184,229,244,241,219,131,115,123,122,144,173,207,202,142,96,111,187,213,213,106,92,168,109,105,141,159,99,102,155,144,141,212,194,190,191,179,176,193,203,237,197,154,225,228,241,247,243,238,96,122,158,135,116,141,120,50,12,87,81,73,38,76,124,121,89,99,89,44,51,57,62,69,79,59,33,37,18,57,46,5,7,49,42,33,47,62,50,29,24,28,102,104,122,152,133,180,161,133,133,104,137,205,211,230,212,171,143,162,124,169,112,113,205,151,119,134,141,124,87,102,52,74,57,17,38,22,48,132,42,92,99,115,103,27,29,18,25,50,64,4,15,30,55,41,65,71,80,124,99,105,115,125,147,132,88,47,54,151,171,191,166,57,136,179,169,150,125,108,90,95,89,69,73,72,92,98,97,103,109,91,121,63,100,98,117,100,49,24,83,189,205,214,184,188,167,141,154,127,110,132,105,105,96,91,71,77,75,45,55,56,80,49,48,98,38,57,1,93,169,192,175,149,140,152,130,138,142,149,114,112,103,96,87,53,60,46,20,41,77,105,55,62,163,136,53,5,33,144,208,191,116,119,135,96,87,72,67,64,50,50,55,52,43,73,40,57,49,37,60,81,108,114,109,126,139,49,90,111,138,152,129,167,146,98,100,82,70,46,60,33,41,37,27,29,18,30,42,21,31,2,20,17,14,14,39,38,53,74,79,62,11,23,10,29,40,12,0,35,38,70,93,73,101,121,77,107,106,110,114,127,87,105,109,69,72,70,53,62,49,60,58,83,70,80,81,87,142,84,37,11,9,22,10,25,40,57,76,80,134,96,103,96,98,114,64,80,86,85,64,72,87,77,80,64,68,38,55,69,47,53,67,69,59,75,77,64,62,48,41,31,36,15,32,16,32,17,28,33,20,28,26,56,37,34,38,66,97,69,36,43,12,27,36,37,34,28,21,36,8,26,21,13,31,23,26,33,12,21,43,44,1,17,13,9,18,27,23,22,4,28,23,36,19,38,29,32,22,25,37,26,29,31,25,12,24,10,49,29,45,33,12,19,12,43,18,4,15,25,24,23,35,18,49,17,0,83,70,20,20,38,15,10,7,39,28,24,25,12,51,10,44,48,14,28,41,16,3,42,29,24,34,10,27,35,16,23,11,13,29,11,11,16,14,11,17,25,29,32,51,23,13,25,36,21,52,45,22,48,27,41,24,22,29,28,22,36,29,25,11,12,40,27,20,9,28,14,20,5,12,42,14,16,31,2,26,27,44,15,5,20,18,72,93,108,77,47,24,26,11,17,21,6,24,10,26,35,20,17,20,6,40,14,33,9,3,32,10,23,19,5,37,17,14,19,29,23,16,18,31,35,8,19,37,22,9,16,26,12,33,3,6,22,40,22,18,2,20,24,33,31,19,14,24,22,17,14,41,16,61,114,102,117,134,134,183,149,132,142,164,208,185,172,180,166,169,211,199,172,189,193,188,201,229,209,210,227,189,179,208,195,202,158,185,193,213,196,189,193,175,168,205,229,213,245,246,247,243,236,249,250,250,249,252,233,242,239,228,218,215,222,208,90,18,0,0,13,15,11,5,16,13,30,3,16,13,11,16,14,30,9,2,40,12,37,3,16,69,94,96,132,201,200,179,190,215,139,99,139,111,93,20,15,31,53,25,19,35,20,8,23,45,19,2,50,59,45,41,36,40,9,45,51,32,30,47,35,58,15,47,40,74,25,30,55,53,105,121,116,28,49,49,41,117,132,194,95,56,135,124,84,72,55,88,67,68,86,96,119,57,68,71,77,94,67,52,56,59,77,75,40,43,70,92,55,68,69,126,145,126,111,171,198,187,254,237,236,193,188,145,154,201,235,240,217,232,205,186,166,63,111,49,68,140,108,68,61,78,72,87,99,83,65,22,27,23,35,63,60,49,43,64,55,75,1,30,40,14,68,67,73,69,41,106,77,126,126,128,154,179,158,124,147,195,179,118,36,23,148,146,166,77,51,126,84,93,67,37,40,17,23,36,20,49,71,119,128,118,161,140,156,163,159,166,130,143,105,28,36,67,150,134,122,85,79,46,43,42,35,49,49,3,35,30,70,72,80,122,130,149,135,155,105,106,152,68,51,34,98,99,149,121,83,87,48,24,31,30,12,9,29,11,36,64,96,97,74,8,124,164,169,81,61,175,161,93,25,18,103,129,124,65,26,39,10,30,58,27,54,64,57,68,86,142,191,152,151,162,176,165,153,173,148,151,192,171,68,52,68,53,54,41,42,28,36,19,20,54,15,18,21,44,36,29,28,24,30,3,25,29,7,42,36,31,42,36,35,37,99,70,39,33,20,16,24,35,30,22,50,34,59,66,74,69,92,127,94,109,111,118,96,110,72,65,47,56,72,70,70,76,45,72,75,77,76,67,62,48,50,29,8,1,12,24,56,73,57,79,116,71,126,119,100,102,79,68,98,61,99,104,51,44,75,61,68,48,51,80,91,87,49,79,41,71,79,64,68,44,24,15,48,19,2,19,40,7,28,33,22,63,80,43,26,41,67,55,65,141,73,52,4,25,4,29,12,31,36,20,27,29,8,42,50,42,27,19,26,46,44,3,26,35,17,14,37,35,31,39,9,9,13,30,32,32,20,28,18,26,9,39,21,25,45,44,12,19,34,22,8,39,30,35,20,11,54,14,5,19,24,44,35,29,34,39,30,35,75,83,54,44,20,7,24,25,19,23,36,49,29,44,5,40,42,23,37,11,35,18,50,39,14,13,16,15,16,13,25,13,29,36,17,16,24,7,21,24,20,14,42,50,57,18,5,23,21,26,39,17,45,46,20,25,30,16,36,20,25,14,35,14,12,29,35,22,0,55,12,10,44,34,27,35,29,27,52,27,17,30,15,23,19,15,36,37,109,91,40,37,15,25,41,29,22,29,30,13,24,45,15,4,14,2,30,18,15,38,39,29,17,15,27,34,4,0,9,13,15,29,28,3,34,16,19,51,4,40,40,25,51,33,25,8,23,33,30,20,31,12,16,23,19,22,23,36,23,29,35,24,29,43,92,32,69,81,84,135,85,98,149,159,158,144,139,119,159,163,188,208,202,222,223,220,249,251,237,245,248,235,211,214,207,214,188,217,226,201,201,201,207,178,165,169,193,236,214,234,226,197,187,201,218,215,214,214,210,226,242,241,241,197,202,189,108,32,5,14,12,15,2,20,12,21,31,0,2,13,22,8,23,25,37,33,15,14,59,13,17,180,187,186,171,148,140,114,156,224,159,122,133,123,91,32,14,58,90,51,30,62,35,19,19,63,67,49,76,91,79,38,24,44,40,31,44,47,32,22,31,14,69,21,49,33,28,19,28,11,39,59,88,61,91,121,94,157,188,247,175,122,164,222,199,141,75,62,57,64,106,131,197,116,89,86,70,108,44,73,68,107,168,214,170,131,185,184,146,113,88,137,224,241,175,187,182,126,192,166,156,75,79,92,86,160,195,157,147,197,244,241,180,83,88,102,141,222,214,112,84,91,56,95,123,73,80,89,60,53,53,54,58,49,55,59,76,47,35,72,62,50,117,130,169,133,127,147,153,121,110,132,119,93,107,131,116,97,92,39,24,38,49,24,34,31,13,92,115,104,94,133,118,123,137,135,137,121,129,140,137,156,138,142,133,130,112,98,80,50,33,29,62,39,48,50,29,45,56,72,106,126,114,96,110,141,133,137,126,154,173,168,179,156,148,124,81,49,76,38,89,28,29,72,32,42,27,50,63,94,99,82,110,103,127,106,133,165,179,175,129,77,95,130,133,68,19,55,72,33,87,6,33,48,96,71,60,89,129,117,109,119,128,141,149,172,167,191,199,149,130,137,146,131,80,85,52,46,78,61,30,30,33,16,10,37,7,21,2,31,35,42,7,29,10,34,8,11,27,38,34,35,47,19,28,30,34,43,31,41,9,60,111,55,46,27,16,31,6,39,36,64,71,50,49,72,51,78,73,95,74,88,73,68,76,46,76,76,52,46,72,66,78,61,86,53,81,66,74,52,20,22,5,26,15,21,37,51,76,84,68,81,128,79,98,74,90,78,75,77,84,98,77,80,62,55,42,50,79,63,39,86,77,73,60,92,83,60,62,38,13,31,13,15,15,16,9,46,41,39,44,39,27,30,55,31,17,37,29,23,40,82,130,58,34,3,17,33,14,10,38,9,46,23,42,46,24,22,53,51,32,21,40,30,36,9,49,35,46,16,9,8,41,34,51,20,4,9,27,17,13,22,28,2,30,27,24,23,34,18,33,29,25,20,26,5,26,38,35,18,42,11,26,6,19,18,54,29,12,30,61,94,71,16,0,21,49,30,19,36,37,18,42,14,40,48,41,38,18,11,18,30,37,23,43,27,13,32,3,31,19,3,12,18,16,35,25,14,28,26,25,28,35,54,28,34,21,36,20,37,14,27,16,18,49,35,27,15,35,21,15,28,29,30,30,18,13,18,18,28,21,10,24,3,18,33,11,3,25,22,50,23,33,15,27,31,19,38,50,121,85,49,25,24,50,34,29,52,18,11,33,25,38,40,24,8,3,28,22,5,19,24,30,16,8,27,36,23,21,17,27,18,28,32,13,27,27,25,50,33,29,40,23,36,51,31,19,26,11,8,17,17,22,37,36,31,18,19,7,7,41,43,19,55,112,52,26,25,47,53,27,40,56,68,79,63,38,87,83,110,113,131,124,167,178,149,224,206,184,181,188,169,121,138,167,194,175,175,154,145,150,128,164,140,98,117,118,127,122,133,106,107,90,104,83,120,133,149,128,152,153,177,162,168,155,140,97,31,2,8,5,6,8,12,2,31,12,48,2,28,13,17,13,21,17,22,0,5,5,22,4,252,234,232,180,140,130,151,186,234,166,143,165,151,121,12,18,80,135,86,36,74,41,30,59,110,39,75,84,120,94,70,72,82,53,79,68,60,34,53,55,66,80,66,95,115,94,77,48,74,87,140,147,99,160,120,129,201,233,247,145,85,187,229,229,129,98,99,122,114,109,142,156,94,119,134,126,124,69,65,92,142,186,255,175,179,248,232,214,115,42,103,162,188,139,123,60,45,105,42,39,32,27,69,80,60,58,68,81,165,166,201,152,88,115,81,120,198,195,99,73,84,126,120,126,132,110,107,117,67,7,62,158,137,145,135,137,123,64,107,75,121,164,129,128,138,119,87,70,37,29,31,12,30,22,84,73,93,96,108,74,20,105,134,147,94,46,127,175,163,180,185,80,139,168,128,126,112,98,46,60,17,54,49,75,60,60,82,92,110,99,56,4,43,92,151,146,146,143,184,201,174,196,177,176,146,134,107,60,91,111,105,80,88,64,64,44,44,48,60,47,38,52,101,106,130,141,147,157,180,204,174,187,159,131,123,102,110,99,125,75,36,74,54,24,32,44,51,76,105,67,52,41,108,182,164,146,192,182,170,152,159,155,117,73,105,85,78,49,34,46,2,25,31,23,49,26,24,28,28,25,44,24,25,24,25,3,37,22,29,8,36,35,17,27,21,13,52,27,51,39,21,25,50,11,23,37,64,27,16,25,26,24,31,23,42,14,3,42,40,97,66,62,78,80,88,71,70,76,73,73,69,81,75,43,47,58,37,48,65,84,44,83,74,75,81,68,43,50,38,25,38,19,21,25,82,58,87,98,81,93,86,85,102,84,84,75,96,66,64,78,71,73,46,34,53,55,48,56,56,83,92,54,52,79,61,48,65,32,11,21,2,16,14,21,40,36,28,49,49,19,32,34,51,27,67,36,35,39,24,18,34,112,95,60,26,14,13,20,29,18,7,27,29,36,25,24,14,28,28,35,16,11,13,44,13,11,8,16,9,17,40,32,48,16,13,39,33,43,1,33,36,32,28,40,30,36,39,11,10,57,11,45,19,25,26,12,40,29,28,19,39,26,11,36,54,25,37,23,23,50,81,62,34,20,36,16,41,36,23,25,26,15,34,16,22,30,15,28,28,17,56,25,23,3,13,39,44,24,27,32,48,22,33,47,16,28,26,39,30,53,38,18,15,41,33,25,47,38,26,44,14,18,5,25,65,30,26,24,22,15,26,9,30,24,19,43,26,14,26,17,2,1,27,21,11,19,2,38,26,7,23,20,25,15,29,26,25,49,71,93,78,37,27,32,15,2,21,24,22,5,7,23,22,24,18,28,10,9,36,20,11,23,32,19,13,22,22,3,15,3,13,45,42,41,10,8,41,18,21,49,16,16,47,35,25,28,22,16,32,33,39,24,20,16,20,14,12,41,33,21,50,23,71,115,54,31,30,11,57,58,32,50,41,66,61,72,49,72,72,57,71,56,106,99,71,92,121,108,112,109,95,66,109,142,145,151,113,131,70,90,103,59,61,96,76,59,56,66,45,74,57,50,31,40,74,97,66,69,71,95,90,95,101,70,127,98,7,12,29,4,28,20,23,18,23,19,5,1,1,13,11,17,14,27,15,20,23,21,26,19,252,252,255,239,222,225,245,236,231,167,130,133,169,106,18,16,96,141,66,63,92,75,66,56,78,81,95,117,100,95,57,38,60,81,98,73,65,49,43,56,42,43,81,70,84,75,65,67,101,83,83,62,73,86,82,84,148,216,208,130,83,141,147,120,139,118,157,177,171,172,139,83,111,145,162,150,151,86,47,41,76,132,149,128,129,181,139,176,126,43,69,122,153,90,68,1,22,56,27,46,28,48,76,73,65,28,26,30,44,34,13,43,36,28,31,41,34,56,21,51,110,114,185,175,189,197,179,180,55,17,25,72,119,88,99,79,73,32,41,33,72,85,58,50,57,45,61,63,41,48,58,80,90,114,120,143,122,186,129,63,29,136,187,161,79,26,106,89,110,112,82,80,48,95,59,53,49,46,40,18,3,54,62,99,107,144,148,157,187,164,66,44,84,150,142,130,128,129,133,122,130,67,101,68,49,45,31,29,32,19,21,46,67,105,85,55,122,126,38,75,38,65,142,158,193,187,167,165,159,106,94,96,69,67,38,27,5,12,25,70,74,135,160,169,129,164,174,167,161,129,44,73,139,135,139,94,72,94,54,32,26,45,13,12,20,35,31,37,2,19,29,37,32,18,14,1,43,16,19,27,33,46,2,19,0,23,6,24,33,31,61,31,16,29,37,45,50,23,30,32,20,26,57,26,42,39,47,29,25,34,31,17,28,24,34,55,60,92,99,82,83,62,62,41,66,96,61,86,58,89,88,91,49,67,46,65,64,73,72,67,74,83,63,69,23,26,37,42,8,25,21,29,68,59,76,92,78,88,90,92,91,84,81,78,77,80,84,102,50,38,94,62,62,32,71,58,58,66,66,68,71,76,68,57,46,38,18,11,13,28,26,34,20,25,34,53,76,48,22,31,51,30,65,37,35,7,31,44,25,23,25,39,132,99,31,18,12,14,45,33,33,12,34,31,16,34,28,33,38,13,65,10,31,12,28,43,20,13,21,10,27,26,25,13,23,11,17,38,25,1,52,13,39,5,40,21,34,37,27,32,31,37,43,30,42,30,57,19,29,45,11,22,20,22,24,22,59,30,46,37,102,81,36,36,0,38,28,27,20,25,32,33,35,34,5,25,20,32,31,29,34,40,42,14,8,44,40,28,37,25,11,32,27,7,34,12,21,22,29,35,20,19,49,14,34,42,32,39,31,37,41,31,12,35,40,48,12,20,15,16,27,21,9,48,16,24,13,15,20,7,22,17,22,36,11,9,39,9,25,10,45,16,33,19,15,31,28,40,34,68,124,67,37,26,13,32,11,16,24,25,36,33,46,11,23,36,24,11,24,39,46,35,40,21,26,22,20,32,40,17,18,14,18,10,31,23,33,21,28,20,19,16,10,29,16,24,15,19,24,44,45,31,38,2,7,32,30,4,40,51,58,54,137,161,129,108,68,40,45,33,42,44,52,54,41,76,63,58,68,78,55,79,66,79,75,60,39,70,104,96,58,60,83,142,129,83,83,103,91,106,63,50,71,65,56,63,68,16,47,56,63,41,51,55,45,66,40,34,39,50,43,70,37,54,56,62,31,21,5,19,17,1,15,19,24,12,16,13,1,2,17,27,11,0,4,30,25,16,6,2,250,249,246,255,246,245,223,243,236,168,135,138,151,114,48,61,128,130,98,130,128,105,74,117,127,111,120,115,83,119,55,35,46,57,88,122,41,51,43,73,71,73,98,106,100,110,103,85,136,81,24,48,93,122,74,81,170,226,236,112,81,186,124,80,108,141,138,135,170,197,177,103,73,77,102,112,94,67,51,85,50,44,60,44,60,102,98,122,133,124,67,70,78,78,54,35,52,39,40,46,66,52,72,101,79,38,25,44,47,62,25,47,66,59,61,82,88,61,93,75,38,53,74,103,111,106,141,86,30,25,58,101,90,67,65,70,76,21,55,39,75,110,109,140,146,144,161,154,162,187,170,169,137,161,139,130,107,95,96,48,38,56,77,67,6,38,39,41,65,29,59,47,64,95,86,107,102,134,131,133,132,98,113,134,145,125,113,139,123,91,37,43,47,69,28,44,44,41,19,26,18,13,40,66,93,83,90,121,119,87,99,119,112,130,103,82,112,102,49,50,70,55,42,59,61,40,54,53,40,48,54,68,92,131,138,142,144,159,150,144,178,182,173,178,153,156,121,112,121,51,27,39,65,28,10,24,37,16,18,15,6,37,16,27,33,7,16,19,9,22,2,11,25,19,19,41,18,11,9,51,41,53,29,36,31,37,51,66,67,68,51,37,37,28,40,42,45,52,52,74,83,114,99,97,44,6,43,33,46,54,39,51,63,61,45,58,52,98,104,67,75,68,28,25,65,77,76,74,59,94,35,80,65,53,57,63,62,56,43,63,104,68,43,46,22,1,25,16,27,44,68,84,97,86,122,106,104,80,78,80,68,73,72,85,82,77,76,81,48,73,63,49,62,61,59,46,62,70,54,95,65,82,56,23,26,0,19,19,6,7,18,57,13,57,60,41,27,57,58,31,44,54,62,47,48,26,38,25,41,14,28,29,77,94,81,38,39,29,32,38,4,26,50,31,24,55,32,24,41,36,24,44,19,40,24,28,23,10,16,35,40,28,41,13,44,16,14,17,33,36,24,35,20,15,27,46,28,19,22,25,22,43,19,17,28,27,20,23,35,26,27,20,37,10,37,24,26,55,2,32,70,88,51,63,26,19,29,15,28,23,34,39,23,31,28,42,42,16,41,44,24,9,5,41,23,27,15,29,33,48,19,30,21,36,31,0,37,1,22,40,42,34,42,34,17,43,39,26,29,29,28,44,34,28,30,14,17,48,17,2,27,6,28,15,23,20,27,26,16,30,8,25,13,8,47,28,23,36,34,7,34,22,40,10,17,33,5,36,36,46,100,124,92,49,22,17,12,21,10,22,20,36,8,48,26,29,27,14,23,5,22,38,17,11,50,28,11,22,24,19,16,27,17,35,8,36,11,41,31,24,10,19,19,18,23,22,33,42,13,23,20,15,32,16,32,26,39,30,69,61,64,151,212,208,208,184,154,93,60,24,28,45,19,35,63,48,46,48,71,83,59,40,64,80,59,38,33,86,132,99,93,54,87,91,85,97,94,90,69,82,76,57,59,85,79,72,76,33,76,65,62,49,36,58,56,82,56,41,29,44,41,74,84,44,45,58,41,3,21,6,31,15,29,46,10,23,26,3,6,7,1,24,3,5,17,19,28,9,11,33,235,255,253,237,246,253,244,219,213,139,132,135,134,183,176,119,151,189,120,101,91,47,62,85,133,84,76,60,75,95,68,44,25,42,97,109,46,64,68,81,74,65,99,89,47,90,73,97,99,71,45,22,71,104,53,82,156,202,174,86,55,171,106,63,104,134,73,66,123,191,142,57,57,33,68,80,45,66,103,88,68,77,37,30,26,29,26,104,118,63,67,69,54,64,88,55,55,73,51,67,56,53,92,61,74,82,119,122,104,101,92,125,94,62,81,100,76,88,111,68,24,36,23,32,21,32,24,9,64,213,245,253,215,158,173,184,192,130,101,46,117,191,193,179,177,172,166,134,180,157,107,94,86,37,32,12,14,15,48,35,23,43,87,98,33,65,152,159,152,179,186,173,178,178,187,157,178,189,167,170,150,89,116,73,66,29,32,31,53,62,43,63,34,66,109,146,134,166,139,181,139,143,192,196,198,178,189,165,182,150,121,89,62,43,43,7,63,59,18,78,28,44,40,81,114,116,154,120,120,122,143,178,160,183,208,196,185,136,146,124,117,92,66,51,44,12,45,21,37,14,50,26,37,16,20,4,37,8,19,14,13,9,6,38,29,8,19,38,15,4,40,8,38,28,85,76,24,41,32,70,60,68,87,105,158,139,141,147,99,91,59,61,62,87,108,115,98,117,112,126,145,142,146,148,78,5,23,36,52,54,50,57,39,32,38,43,45,13,38,66,64,64,36,45,72,51,68,55,66,35,71,25,55,62,56,27,76,83,64,61,66,28,21,26,4,2,30,62,72,71,70,101,98,96,88,80,85,87,93,50,71,64,97,77,76,76,51,62,38,52,48,53,58,77,65,54,59,63,49,54,64,8,7,7,7,9,51,23,13,19,37,54,28,25,48,41,57,61,39,54,41,45,60,37,42,66,44,33,49,56,45,37,35,76,109,46,27,18,39,31,57,40,43,23,40,29,34,42,29,41,32,30,16,15,18,10,38,40,47,12,21,22,17,11,29,23,54,10,19,30,21,20,26,38,20,15,22,50,1,32,26,40,38,30,13,21,14,24,33,20,38,30,28,25,36,29,40,15,14,16,57,93,58,22,2,24,21,30,43,28,23,32,37,19,30,49,25,53,31,19,17,26,25,33,13,15,15,21,16,42,41,49,49,32,28,30,19,40,41,53,22,37,47,15,17,34,21,49,6,6,45,12,20,31,33,35,16,33,31,15,20,31,43,25,12,21,15,30,26,19,13,24,14,54,24,37,43,45,26,35,19,28,16,34,25,19,42,62,34,26,51,94,125,87,28,17,36,27,37,24,18,20,29,34,20,14,48,16,38,25,3,44,21,42,37,21,30,20,35,31,20,15,28,35,16,40,29,12,28,19,23,11,5,4,12,24,26,34,13,24,23,32,17,35,24,37,15,25,34,67,138,223,229,175,185,201,213,204,102,44,59,31,43,43,37,28,51,39,39,52,45,74,54,41,73,82,30,42,96,116,142,80,30,38,35,86,42,36,41,32,63,51,53,53,76,65,48,61,82,27,69,74,54,47,66,60,102,75,52,59,51,73,50,70,48,20,19,9,31,32,18,11,4,18,11,15,14,11,8,9,42,3,27,2,9,10,21,16,12,3,228,254,246,236,224,223,221,209,193,118,115,141,162,192,233,161,156,149,78,102,73,33,13,45,55,79,95,79,106,109,94,66,46,45,78,71,56,65,74,54,91,79,65,75,52,69,52,86,97,78,24,8,62,86,73,69,69,84,81,31,36,92,50,56,51,81,45,64,79,103,74,51,54,76,35,45,37,62,69,58,89,110,71,80,72,47,40,95,75,109,127,155,130,88,85,80,40,36,48,74,69,79,71,82,88,87,105,107,55,76,80,82,67,50,36,29,16,33,57,27,25,39,42,111,111,71,98,118,192,222,227,228,206,145,141,187,238,129,62,18,41,98,87,80,83,74,55,22,22,17,23,23,23,12,33,31,20,50,116,52,16,72,132,171,83,58,137,118,152,156,116,127,92,108,94,91,120,68,76,53,71,28,40,43,28,26,32,57,98,104,58,25,44,182,163,174,170,178,172,175,111,93,128,147,112,131,114,86,90,96,51,62,42,42,46,65,92,137,137,121,104,90,72,210,198,209,170,142,146,94,106,109,96,86,42,63,26,17,16,11,14,19,17,22,60,16,40,19,21,22,27,52,39,14,9,15,21,16,50,30,16,25,36,39,41,35,15,53,42,30,33,43,79,84,107,136,96,45,83,28,142,142,109,125,114,94,73,79,62,74,81,105,93,111,97,131,107,87,143,122,112,105,60,47,45,13,13,56,16,36,54,40,22,29,56,32,31,22,39,64,82,63,55,52,54,77,53,64,63,66,80,46,55,56,71,70,64,80,37,55,15,29,35,34,13,43,59,65,83,108,73,87,110,84,89,93,80,84,90,81,77,72,62,69,49,43,67,53,25,43,52,71,30,71,55,58,74,53,35,44,18,33,29,19,14,28,28,21,38,23,33,42,44,58,72,39,37,46,38,38,38,50,49,17,38,27,50,59,45,45,56,44,52,35,107,102,49,29,41,57,65,18,32,34,22,26,45,40,27,14,40,55,47,30,36,45,26,28,18,34,36,8,20,22,36,13,32,37,40,28,16,24,30,26,37,23,38,43,18,13,27,21,21,6,37,45,27,38,28,13,31,17,40,41,49,43,22,28,46,35,11,82,74,45,14,22,57,35,15,35,20,36,32,36,30,23,14,38,31,19,21,13,11,20,34,26,25,26,14,18,14,23,52,23,28,28,46,12,17,27,20,20,27,47,35,28,29,43,18,14,16,5,19,38,25,31,31,13,17,14,34,37,30,8,7,13,31,24,14,3,34,33,33,14,41,4,25,18,30,3,28,46,15,23,24,19,52,59,31,14,22,53,125,87,63,22,7,36,26,19,19,14,12,33,6,18,34,34,31,28,21,37,40,46,17,28,27,13,4,8,52,8,54,6,37,28,25,33,32,27,22,24,38,30,21,18,31,10,47,31,24,37,52,41,52,57,53,91,138,146,162,160,95,152,127,115,147,201,162,137,113,89,67,39,44,25,18,46,51,43,38,54,67,58,40,51,66,44,59,44,68,62,61,53,57,48,46,57,41,33,50,69,44,39,50,37,52,59,42,62,82,54,60,42,60,52,68,82,48,51,70,62,40,42,29,34,22,29,7,21,12,22,26,36,5,31,7,15,2,5,25,33,1,9,5,9,23,32,13,12,247,240,224,235,244,228,216,208,153,125,132,123,127,167,218,156,116,118,57,101,104,38,66,70,84,134,111,152,158,166,131,156,152,121,140,157,112,116,94,140,137,138,113,139,118,130,101,128,118,123,128,35,89,141,126,103,86,114,100,97,71,101,111,128,144,110,111,105,96,110,88,55,60,34,74,64,45,57,55,69,63,72,95,134,110,127,93,150,147,185,246,245,202,120,82,43,39,35,28,44,61,80,58,54,53,80,84,56,53,58,40,34,42,27,20,52,25,51,100,106,153,151,191,222,241,238,220,198,225,197,179,101,61,26,35,76,111,74,53,5,47,40,75,105,80,121,88,113,114,154,146,137,180,167,182,138,121,146,150,86,3,33,102,69,42,49,75,57,24,26,29,48,40,30,38,57,62,85,91,75,85,122,118,134,130,134,120,143,125,130,87,17,31,90,105,86,80,35,27,11,11,36,30,57,87,49,71,59,89,78,149,162,172,148,137,154,148,140,136,136,105,42,38,102,113,83,59,23,55,44,43,65,21,29,23,2,18,26,38,23,42,36,13,9,26,25,32,33,47,31,49,72,41,32,47,29,49,72,62,59,70,96,114,84,47,59,91,115,107,76,50,10,103,169,160,114,91,67,63,77,64,83,63,52,40,59,76,70,88,81,93,101,106,105,110,101,107,112,79,82,52,49,21,18,28,43,70,116,54,32,46,33,34,61,59,50,16,35,49,65,51,82,70,62,68,57,84,100,77,102,61,83,47,82,45,48,36,16,40,20,38,14,47,60,82,84,93,85,94,94,115,85,83,87,67,66,70,83,75,103,72,61,38,59,75,81,48,37,57,45,58,51,48,77,65,60,42,63,23,20,6,16,22,25,34,30,50,63,36,57,61,39,29,53,51,62,19,53,37,38,67,56,46,59,30,69,23,42,45,27,36,58,19,49,31,78,99,53,31,22,37,20,25,2,22,29,12,18,18,25,29,21,39,30,25,32,43,29,22,16,17,28,32,25,10,53,3,33,6,23,51,27,54,35,32,30,25,18,25,48,20,32,21,19,19,20,37,15,30,31,9,18,45,33,33,19,30,37,15,24,24,55,74,35,19,26,30,39,8,20,30,37,20,42,15,40,19,5,12,14,20,27,53,38,17,10,27,28,10,40,28,21,34,29,39,42,24,13,43,42,17,31,18,54,30,29,16,40,33,24,41,51,35,28,40,34,34,40,23,4,33,35,24,4,29,16,16,27,16,5,27,14,16,3,14,4,29,31,41,27,13,46,5,15,24,19,71,70,23,11,42,28,65,83,114,67,28,30,0,25,26,20,17,5,29,25,14,37,23,5,29,44,32,3,38,14,29,17,33,18,14,10,4,5,33,33,31,19,26,30,14,39,14,12,26,35,20,36,28,17,31,60,89,105,126,119,114,156,168,135,186,112,45,85,37,55,94,140,167,168,164,120,111,42,28,55,28,39,52,62,62,38,66,70,68,52,53,52,43,57,57,56,41,62,36,45,64,68,50,39,44,33,34,24,37,47,55,52,27,27,50,34,54,49,54,46,72,43,44,25,57,38,54,51,45,36,16,32,9,25,6,13,42,31,22,12,14,21,6,6,0,10,5,14,17,9,18,3,13,28,249,242,244,242,250,251,233,197,205,129,119,112,145,110,94,76,126,133,73,143,106,100,79,108,128,153,174,175,185,179,176,168,190,174,176,165,184,166,191,184,164,165,182,147,180,185,173,159,132,136,127,95,116,160,169,133,145,129,169,123,154,160,151,169,142,173,165,143,151,106,94,53,86,60,108,78,68,80,56,72,81,42,53,82,89,83,74,83,86,137,157,148,112,81,66,71,78,2,35,17,47,40,60,93,54,36,58,14,34,65,31,57,78,117,97,121,171,202,252,247,236,229,228,218,225,208,176,111,78,52,28,3,6,34,28,29,62,93,98,96,133,156,172,199,208,226,183,222,195,188,236,219,134,174,175,145,119,99,109,51,32,13,47,38,31,34,76,108,112,118,148,140,181,159,179,115,145,208,223,230,139,220,182,187,152,138,112,118,83,81,38,38,59,48,46,74,90,93,111,98,132,162,150,184,163,146,114,152,144,141,156,142,156,90,77,86,46,38,47,33,24,15,13,19,42,21,6,22,18,35,41,39,30,10,39,19,21,14,12,8,33,19,0,29,20,20,37,55,52,73,117,136,150,180,165,150,178,185,217,167,154,194,198,183,92,84,169,209,185,158,94,75,204,203,134,117,81,42,39,52,34,42,34,91,69,73,98,115,84,118,83,120,104,119,105,105,72,70,67,15,41,20,65,48,97,132,155,175,192,117,50,25,42,63,30,38,41,57,49,82,78,53,76,52,61,84,75,85,69,81,70,53,84,82,42,68,25,25,17,3,24,30,44,43,66,81,96,104,68,86,98,93,90,83,78,64,96,98,75,48,60,56,52,54,47,49,77,78,48,59,84,47,77,57,50,36,29,20,23,13,17,38,33,43,48,60,53,74,64,52,36,18,56,45,28,47,53,56,39,48,58,28,52,77,42,41,30,26,50,63,53,17,18,42,35,32,106,106,53,19,15,11,39,36,8,27,30,47,11,33,26,25,25,40,26,14,10,42,47,28,8,58,24,8,28,39,34,35,9,7,11,10,22,37,0,17,32,22,33,18,15,15,45,20,10,18,32,24,38,22,53,30,10,30,31,21,47,22,35,30,37,44,140,36,22,14,13,42,31,16,40,12,27,37,28,22,54,21,17,20,29,7,15,47,27,33,22,17,17,21,44,36,15,20,24,31,21,29,42,6,55,15,30,5,30,24,24,7,35,61,21,43,28,31,32,10,32,24,26,20,24,15,2,1,14,36,29,20,5,28,9,6,22,24,23,50,13,20,22,38,14,19,29,33,13,39,65,65,45,35,46,17,25,48,95,99,93,20,18,28,30,12,23,32,31,6,15,17,15,16,18,23,50,5,30,20,5,12,36,45,25,17,24,12,17,18,48,29,28,33,20,31,8,20,21,29,27,21,28,37,46,102,130,115,97,102,120,139,129,107,119,99,106,92,77,56,81,95,146,133,103,126,84,51,41,21,29,32,41,45,71,51,52,34,33,79,45,64,55,56,65,50,44,36,38,47,60,73,60,41,59,50,44,32,46,59,47,68,51,33,53,67,29,47,44,40,41,25,84,41,42,70,26,56,57,48,8,6,31,19,33,31,22,10,53,6,4,19,2,35,22,11,5,18,6,21,5,22,14,45,255,224,242,254,251,245,209,230,189,123,103,107,94,86,55,77,143,180,133,162,175,131,90,130,160,163,169,174,182,183,181,176,180,183,189,173,199,188,193,210,208,174,182,210,203,154,184,162,124,174,119,41,86,123,204,179,126,178,171,157,171,188,153,154,139,153,148,166,171,133,122,141,100,86,110,73,92,46,42,58,54,57,32,6,27,38,41,56,18,14,41,50,91,84,83,115,44,32,2,30,53,39,94,97,77,89,119,81,128,111,134,186,178,211,213,230,242,214,233,201,176,136,111,98,68,52,65,23,18,33,11,32,86,112,71,59,67,91,158,155,184,200,193,171,167,116,126,127,123,134,156,106,105,117,122,87,131,140,127,104,59,19,79,127,77,116,177,210,190,192,200,190,158,182,171,148,149,121,114,122,125,88,101,102,94,71,102,110,110,64,81,25,36,117,148,193,203,185,190,151,177,150,147,140,110,120,112,73,60,34,34,53,31,31,21,15,17,33,18,26,27,46,42,43,18,13,43,16,31,22,26,39,20,51,38,38,57,66,57,49,69,60,61,72,65,72,141,163,168,191,209,191,207,220,184,199,214,214,234,164,162,217,212,175,108,126,165,188,202,170,94,75,184,146,70,73,52,44,41,37,115,118,110,125,137,133,153,123,139,79,116,122,106,74,75,64,59,10,38,20,35,86,105,131,148,129,132,172,173,120,33,42,87,115,69,50,13,64,71,77,40,72,84,55,72,88,83,107,102,119,92,61,59,55,14,0,8,29,36,22,38,79,65,87,62,92,77,83,62,76,86,55,43,44,92,73,55,66,51,68,50,69,83,35,86,79,60,71,47,88,94,67,39,33,42,26,14,46,21,10,27,7,23,53,25,41,36,39,45,23,51,59,68,70,42,49,22,53,65,55,58,62,52,39,46,41,36,40,41,35,48,25,66,18,13,32,53,82,112,43,12,30,16,4,43,27,45,43,22,33,13,19,44,31,13,45,43,42,24,35,21,40,1,23,49,25,34,19,39,38,29,34,22,14,49,41,34,54,38,21,24,22,31,26,38,28,23,5,39,44,34,44,37,18,32,30,43,33,31,30,24,65,95,79,9,18,5,27,24,30,20,26,36,18,19,6,23,36,5,42,18,29,26,13,8,20,11,23,76,45,29,11,39,20,49,26,17,49,6,44,30,20,36,22,32,35,29,21,28,41,29,11,10,61,57,20,33,38,6,33,32,21,27,29,19,2,14,51,29,22,29,37,26,36,21,10,27,29,6,20,31,8,13,21,22,25,63,90,22,42,2,25,8,46,88,104,87,78,44,22,1,14,19,23,34,16,15,26,21,36,32,33,43,50,55,7,18,2,15,7,19,20,24,51,7,28,76,11,46,8,65,35,17,21,14,22,30,28,36,24,27,88,92,107,56,83,90,99,86,50,74,104,120,95,77,81,87,83,91,57,77,94,88,98,60,70,81,70,123,97,110,56,110,113,105,82,88,59,82,105,98,58,81,71,83,74,104,88,61,62,73,74,52,21,59,79,94,90,96,105,96,73,64,65,84,89,84,119,89,83,86,62,47,74,66,65,48,16,14,2,47,5,18,30,8,6,8,18,14,24,7,4,6,10,14,30,11,20,15,43,241,252,250,244,244,244,221,212,170,108,126,132,145,104,86,102,138,128,110,114,132,70,73,71,115,127,137,120,123,97,135,113,127,115,145,153,139,140,134,131,125,101,148,122,157,132,160,138,43,72,28,0,3,74,96,101,65,88,106,123,125,114,148,116,92,121,120,131,102,112,105,118,97,87,81,89,61,59,28,37,47,41,54,51,46,82,104,105,75,80,94,60,136,102,110,144,92,19,39,54,132,129,170,161,160,174,221,195,189,207,205,210,220,213,141,212,129,109,65,41,42,32,37,34,7,8,49,65,55,41,36,59,162,187,95,41,19,54,85,155,147,126,128,81,25,32,52,24,43,77,66,75,118,122,127,121,181,159,199,171,67,63,167,171,123,140,190,169,148,107,119,85,63,56,22,2,0,7,13,28,125,72,90,135,145,175,200,217,192,163,103,55,70,114,111,140,84,97,49,67,47,27,25,25,13,14,57,10,31,23,12,14,28,34,13,3,48,1,23,31,11,43,45,46,32,19,14,26,25,57,28,38,35,56,112,152,162,198,245,228,227,248,205,217,123,150,231,232,240,222,209,179,207,220,197,192,215,198,206,133,121,188,164,187,176,183,186,205,153,136,71,23,90,117,98,91,97,90,95,108,111,128,134,143,120,135,134,125,111,95,100,53,25,35,36,8,52,49,62,78,130,130,140,119,126,134,127,127,83,32,5,9,94,94,71,72,53,54,78,52,81,78,64,89,87,84,58,90,122,126,58,36,34,32,26,24,26,30,33,61,59,69,69,43,76,78,54,54,79,42,74,78,79,72,45,45,58,52,47,48,67,43,50,69,60,54,46,45,56,55,59,23,15,13,32,21,2,47,5,30,47,36,51,33,58,24,43,32,51,66,51,34,52,58,58,49,57,43,18,59,32,76,54,50,22,46,27,47,50,48,57,54,31,27,45,43,38,45,91,95,31,43,46,11,5,43,34,33,28,26,21,33,27,39,10,30,25,26,10,37,33,46,10,26,27,12,18,56,20,30,37,27,19,28,12,44,45,40,39,23,44,21,32,38,14,29,12,31,55,16,31,6,31,42,19,26,23,18,38,36,62,32,75,76,35,49,39,16,31,18,54,44,58,28,30,8,19,17,18,17,35,20,23,41,18,18,30,29,46,35,17,35,7,21,33,32,5,38,28,50,40,20,40,38,18,19,35,39,26,38,38,29,28,8,28,7,26,36,0,34,25,39,31,27,36,36,24,27,34,26,15,25,27,45,17,20,26,39,8,53,35,16,21,45,25,47,97,82,13,17,4,20,17,28,25,60,112,122,72,41,0,13,5,14,29,12,10,27,44,15,17,8,30,11,27,13,20,40,12,15,22,30,58,35,44,42,24,10,37,22,16,42,19,24,24,8,13,34,21,27,38,55,139,132,110,104,123,141,73,91,105,102,118,142,101,99,79,101,90,73,84,83,83,109,127,122,130,138,132,141,151,148,150,149,129,169,134,103,117,129,115,104,100,95,117,108,120,105,95,56,94,66,81,103,84,112,127,139,113,169,134,133,146,137,140,161,144,159,153,155,163,120,110,125,137,100,12,3,10,2,19,19,19,20,5,32,12,9,7,17,27,7,13,32,20,3,9,20,22,1,250,249,247,230,238,238,223,221,170,120,144,118,162,146,162,206,132,50,15,4,22,51,17,20,31,20,25,37,58,36,23,23,55,51,21,18,58,37,52,55,41,40,28,57,26,76,123,106,48,41,41,42,45,33,23,23,37,35,21,31,55,33,44,42,54,52,32,44,67,47,51,44,55,53,68,44,25,24,13,34,118,158,175,168,160,195,195,191,175,160,198,171,169,155,151,157,174,168,207,233,220,239,230,220,209,236,210,188,149,120,106,114,82,66,103,67,36,48,25,7,10,43,74,57,29,25,54,62,53,48,42,110,229,251,212,120,25,10,28,51,32,25,40,36,27,34,42,46,60,40,14,64,74,90,75,80,69,63,80,74,18,31,85,83,49,53,49,52,48,31,30,21,38,46,85,82,119,129,107,152,134,151,159,185,139,184,153,126,119,64,66,60,29,34,12,28,8,30,40,11,22,24,28,55,18,43,28,29,29,15,45,10,37,32,11,24,13,62,28,23,53,43,28,50,9,7,17,32,36,65,69,67,109,132,150,174,165,199,195,196,191,175,203,146,106,123,165,156,197,143,159,158,169,149,121,164,168,153,154,101,92,136,140,154,160,156,160,139,96,62,54,38,108,122,150,65,65,68,67,75,115,87,84,98,70,61,99,82,62,40,46,30,30,54,54,48,68,86,115,133,149,141,139,132,144,134,132,79,19,22,62,92,63,64,74,89,81,77,50,54,68,44,40,69,106,108,86,121,69,60,36,30,18,39,28,36,67,80,79,66,67,62,57,71,57,82,67,83,58,62,81,66,72,74,50,43,74,35,56,37,51,42,56,94,79,63,67,25,66,20,11,43,15,11,15,9,47,31,65,34,58,32,41,53,64,15,57,50,73,57,46,51,41,40,61,43,30,47,44,32,46,59,55,65,65,36,30,45,44,27,82,42,55,42,50,37,57,38,113,110,66,31,21,35,36,38,40,22,18,14,25,34,14,23,23,35,37,37,22,20,29,13,22,24,26,47,18,36,26,13,22,12,47,25,20,50,17,12,23,11,37,16,57,43,10,8,12,18,16,37,52,11,3,39,51,31,18,51,47,23,21,31,56,82,88,34,2,33,34,25,26,44,39,51,7,12,42,51,27,6,21,37,33,32,19,48,32,6,29,23,30,17,16,33,20,15,26,61,38,33,48,20,24,41,17,22,23,25,23,28,48,23,31,22,23,23,31,14,36,12,12,23,38,29,24,32,13,31,25,20,24,36,13,6,30,11,51,2,22,18,28,41,11,10,44,32,101,24,27,23,12,20,40,15,21,14,40,112,105,90,40,32,14,15,42,25,22,11,33,23,5,9,36,15,20,0,13,49,24,27,42,38,17,14,41,31,17,18,14,28,50,15,20,16,40,34,30,14,18,24,48,59,85,126,110,142,151,162,170,142,162,130,146,120,141,131,129,139,110,124,148,118,154,155,128,115,137,150,177,150,136,120,146,167,183,152,141,124,111,98,109,116,109,124,98,134,112,120,113,114,104,138,134,122,109,140,156,174,176,162,192,164,150,177,191,160,166,208,193,190,165,150,148,172,191,93,28,3,22,12,14,3,0,5,9,3,23,30,5,0,6,18,23,22,12,8,20,30,7,3,253,248,248,225,230,249,226,215,159,98,168,122,147,112,156,167,91,27,15,41,34,61,32,23,29,13,21,32,43,20,22,22,27,44,21,29,28,17,19,27,42,35,19,32,80,80,77,45,35,59,33,45,34,29,41,46,20,16,8,25,25,41,51,20,32,68,22,33,45,44,24,45,52,69,88,58,69,70,105,109,175,253,223,242,224,224,203,195,177,187,210,207,203,185,191,229,219,218,228,201,211,175,180,168,92,111,78,92,122,117,85,41,12,25,32,20,7,88,64,45,74,52,55,44,46,43,23,45,44,24,63,148,242,252,233,197,40,4,32,2,47,17,34,19,19,36,35,50,47,42,32,28,51,34,39,19,32,51,47,53,47,48,27,57,54,101,63,88,89,102,128,101,144,123,144,135,68,141,155,129,95,72,43,26,18,45,45,25,25,13,27,44,53,26,28,59,32,38,40,23,16,39,30,17,23,33,25,27,48,21,23,26,18,40,19,27,9,50,34,29,19,18,31,38,27,16,21,28,39,25,28,38,23,19,40,27,26,30,52,24,20,43,27,50,27,57,10,41,46,43,17,55,55,33,36,64,32,50,34,41,54,63,52,84,46,30,19,41,23,38,47,57,21,47,44,46,41,49,35,47,24,56,26,43,17,71,24,49,33,60,66,35,61,77,103,131,133,157,140,110,108,124,111,141,136,138,119,108,81,79,131,92,67,78,59,69,61,82,59,69,73,71,89,93,86,112,97,23,64,36,27,14,24,35,76,101,73,86,83,70,96,49,64,44,39,51,81,79,56,49,52,72,59,65,59,47,50,69,43,63,82,66,52,77,55,88,55,32,17,22,7,24,36,19,30,31,36,31,33,42,61,65,33,52,58,33,47,51,38,40,47,51,27,65,35,71,43,27,46,36,41,29,27,68,43,77,51,31,58,45,33,17,63,36,46,32,36,10,23,72,86,89,60,5,19,26,31,21,37,32,18,32,26,15,26,10,10,47,53,19,46,26,14,24,24,42,11,34,32,39,23,36,5,32,6,33,6,4,25,13,52,21,32,19,37,38,44,43,40,8,30,44,10,21,37,28,62,35,34,17,41,55,46,102,93,25,5,43,37,21,29,31,28,2,32,43,38,17,21,28,20,17,26,46,46,43,23,20,29,53,30,35,10,18,35,26,37,22,46,43,26,35,17,55,20,33,35,35,57,25,28,36,31,35,24,49,28,28,24,36,16,11,27,9,21,44,29,19,29,20,7,48,20,13,30,25,14,30,28,10,28,41,34,33,22,56,70,20,31,16,29,34,13,27,20,9,39,52,104,101,59,54,18,10,10,6,27,33,53,27,27,26,3,13,25,24,18,25,15,18,27,16,22,27,25,31,35,7,18,41,30,28,8,16,37,18,9,25,45,34,27,85,82,70,62,94,162,179,178,200,197,189,150,198,170,155,175,150,161,158,178,198,172,162,194,157,167,172,154,166,153,142,171,160,140,132,132,134,107,124,124,119,106,129,129,127,128,156,142,123,130,136,138,130,153,128,152,167,158,172,158,174,180,158,170,191,187,207,156,177,201,166,169,169,190,125,7,10,0,10,20,3,12,2,20,0,16,10,20,12,8,8,5,10,4,0,37,8,11,34,250,249,248,249,241,235,225,213,146,115,175,140,151,76,56,56,24,21,22,22,48,39,46,31,33,40,56,45,23,14,33,12,31,18,25,38,32,43,23,48,41,9,41,39,69,85,40,45,23,27,54,43,46,53,41,66,59,60,38,39,46,57,69,57,49,46,32,58,103,110,86,102,165,131,147,157,162,175,222,218,229,237,230,217,177,156,150,131,147,160,167,142,138,153,156,145,129,129,106,77,64,47,46,25,25,12,40,74,144,124,77,79,35,30,24,41,34,56,41,46,30,30,48,54,36,40,32,66,68,25,83,153,243,233,236,225,92,16,2,24,40,31,44,46,67,61,48,64,56,49,65,60,63,53,64,75,110,85,116,136,92,64,99,123,123,142,140,151,166,147,142,106,89,63,74,19,58,19,31,26,27,20,11,27,29,23,29,5,40,28,42,5,48,43,52,24,46,55,36,30,29,29,23,53,59,50,38,29,31,20,42,31,27,21,51,13,18,23,53,29,23,18,21,26,18,29,23,30,18,17,29,31,45,4,14,24,36,56,31,28,23,24,19,24,50,60,22,28,14,3,19,12,21,27,24,37,24,32,32,28,55,45,58,34,29,27,7,31,43,41,62,46,29,33,14,27,29,47,40,31,29,45,32,54,56,68,68,68,71,89,114,106,106,132,113,146,111,122,137,126,109,98,117,114,119,131,136,146,121,92,78,52,58,95,84,91,82,62,85,70,112,112,93,107,62,46,31,9,17,10,12,27,60,75,99,86,108,118,102,96,93,78,63,85,50,55,80,67,63,83,73,51,56,58,47,70,61,58,48,44,69,70,58,55,37,25,37,22,30,42,27,19,54,15,51,33,43,50,61,51,65,52,69,54,57,54,65,50,47,50,29,36,32,58,40,28,31,55,49,53,42,37,35,66,28,33,30,54,36,33,48,66,63,14,31,47,67,58,45,49,83,140,75,25,23,5,21,25,8,27,19,4,18,27,27,13,15,41,67,15,36,29,11,34,27,26,43,31,26,25,30,45,47,24,2,29,39,19,42,34,29,31,26,25,52,39,27,24,20,9,9,19,45,14,14,15,25,16,23,26,27,44,32,73,63,54,34,29,19,20,63,18,21,44,35,37,33,6,14,17,41,24,14,29,23,10,2,43,26,36,48,25,19,26,9,36,14,38,21,15,31,13,33,31,55,19,36,29,47,41,49,28,52,14,56,15,33,32,8,30,24,13,22,33,19,26,36,45,18,11,14,12,40,38,30,9,6,35,43,17,12,37,15,7,11,71,61,7,36,19,10,16,38,46,25,7,32,27,69,82,93,74,39,17,34,40,30,23,18,17,23,22,8,21,13,32,39,47,17,29,33,41,16,13,42,11,35,31,13,14,37,22,37,12,42,14,29,26,13,44,71,74,92,124,96,69,128,202,178,209,191,196,203,181,177,197,170,173,149,176,198,197,199,195,180,179,173,180,179,170,168,176,164,175,160,180,169,169,171,146,146,136,168,157,152,155,163,170,168,152,176,170,163,160,177,141,152,170,166,184,170,202,182,183,169,161,164,167,189,170,183,154,186,185,188,105,12,1,13,3,21,6,7,13,5,2,15,15,2,17,27,13,44,32,22,7,18,10,11,40,250,255,250,229,224,238,224,214,141,147,154,194,205,84,21,24,33,32,44,31,54,32,50,37,21,41,23,28,50,30,24,41,26,25,24,44,46,64,51,49,36,50,27,57,60,72,85,51,51,41,38,37,38,48,34,82,85,112,105,166,135,142,162,147,196,182,209,196,219,191,226,181,205,227,204,221,222,229,203,198,163,169,168,164,145,148,171,138,99,137,118,120,116,84,13,85,11,30,8,33,26,24,32,41,34,19,61,63,75,52,60,47,42,86,141,129,91,65,30,18,35,39,63,61,56,30,55,80,28,62,132,152,178,162,182,159,99,64,31,35,52,52,55,47,50,112,87,120,97,120,164,170,163,144,143,124,146,149,139,119,65,32,48,71,89,50,37,59,39,32,34,44,15,11,38,17,27,22,15,53,3,33,17,24,38,20,32,37,45,28,41,29,7,33,10,30,7,50,45,30,15,23,4,23,14,28,12,17,16,19,17,36,36,19,14,36,31,19,59,36,24,28,37,40,18,19,14,22,40,6,24,29,31,24,6,27,0,30,7,37,27,24,35,27,45,51,14,14,0,17,5,56,38,26,30,52,32,47,46,37,24,13,37,47,51,12,8,13,31,7,58,35,15,31,44,36,40,56,33,58,60,43,97,98,108,115,77,115,137,126,82,110,123,148,118,95,98,102,120,127,102,121,100,133,123,119,123,99,59,67,45,48,72,55,62,71,70,107,89,116,103,97,57,50,29,32,17,31,19,58,94,104,97,75,109,131,82,97,86,78,79,70,92,89,79,81,77,71,64,70,89,61,46,40,67,58,31,94,35,69,83,44,66,26,18,18,12,30,11,23,30,22,46,60,48,34,63,69,56,41,42,75,63,41,59,70,49,38,30,35,43,45,31,29,51,63,35,40,29,49,55,42,41,22,57,42,58,43,37,30,62,39,35,42,44,35,39,31,54,37,36,72,99,73,28,18,28,25,26,30,40,9,38,26,19,32,21,15,37,54,37,31,34,21,23,40,36,32,33,15,58,8,19,26,23,26,18,62,36,20,39,16,47,44,34,18,39,31,36,26,34,10,57,37,23,47,20,40,31,22,28,36,18,61,98,42,32,21,25,36,27,31,18,38,26,45,24,12,36,22,40,27,25,21,30,13,26,36,40,12,20,32,23,42,35,38,44,34,14,45,44,36,35,23,28,23,23,41,9,31,41,27,26,28,35,46,12,9,31,21,54,53,5,39,33,20,25,11,20,31,15,15,36,27,33,21,12,7,37,15,23,23,23,15,48,83,35,11,43,28,18,19,25,17,18,38,28,20,33,51,97,84,76,27,44,31,6,22,10,15,29,33,31,6,31,11,24,38,8,25,15,47,30,46,17,26,26,24,29,22,18,34,18,25,23,21,24,28,36,95,83,105,135,186,154,87,64,108,163,176,207,186,195,190,179,167,205,210,205,183,187,172,183,168,182,184,198,183,186,172,202,169,178,209,187,182,176,157,175,199,194,188,185,185,197,204,146,170,169,194,188,190,186,186,181,190,176,195,165,170,178,179,173,175,180,210,181,187,163,160,166,145,166,180,166,114,17,7,6,31,13,13,25,2,18,0,22,4,8,7,16,9,24,22,7,41,18,35,16,37,244,254,248,244,237,229,215,191,98,130,156,212,210,61,24,33,29,55,29,46,31,32,52,51,10,46,33,48,29,59,53,42,34,25,44,33,59,40,75,58,70,56,66,39,51,128,109,85,82,100,140,131,156,156,180,215,235,206,200,216,234,244,235,203,231,234,229,221,229,221,200,170,167,154,128,135,103,109,71,38,79,33,41,74,144,169,196,169,106,95,93,124,80,45,0,25,31,34,58,38,90,39,45,37,36,62,60,59,57,39,37,71,84,188,255,246,204,129,26,34,63,99,34,37,39,34,53,58,79,168,191,141,86,61,71,91,57,92,90,120,162,112,145,135,140,171,163,130,153,138,112,111,126,100,72,45,26,53,22,48,18,34,48,6,69,19,19,50,29,34,44,40,47,43,41,34,49,19,39,35,25,40,62,51,46,38,45,84,36,51,50,45,31,43,51,50,43,57,48,36,45,66,52,38,42,19,46,61,53,52,61,68,50,51,50,53,22,37,58,42,32,49,43,58,47,33,31,47,37,27,62,44,41,34,66,45,54,32,55,42,58,52,38,66,61,55,8,35,19,56,35,39,40,70,50,38,36,42,33,42,47,25,40,16,14,33,20,45,16,53,75,36,36,25,50,47,77,50,33,53,42,68,98,96,109,89,86,99,83,106,102,106,111,111,108,108,108,90,73,113,97,98,136,115,110,103,63,69,46,50,37,52,76,84,57,58,78,91,70,55,58,26,34,24,26,27,7,73,74,88,118,103,111,115,112,84,91,67,75,101,94,62,77,95,71,68,78,63,53,68,69,83,79,67,58,66,73,49,63,54,57,35,21,15,26,24,26,22,51,47,41,34,51,37,56,62,60,41,57,55,45,44,73,39,62,72,43,36,56,53,59,38,35,35,76,25,51,41,19,37,18,48,39,50,42,23,23,47,32,70,34,45,33,60,25,48,58,24,39,44,31,30,87,100,66,33,8,34,39,53,68,31,19,5,39,21,29,35,7,0,26,21,43,44,28,29,38,42,31,54,46,16,35,21,19,46,18,26,8,46,38,30,21,35,31,47,20,5,28,60,49,22,26,20,8,53,18,41,51,52,7,35,11,42,91,63,57,7,20,27,12,33,37,38,25,54,11,35,29,34,38,42,26,34,16,27,38,27,36,30,20,10,37,47,42,16,37,9,31,44,23,18,55,30,23,33,14,46,20,42,32,30,44,20,63,21,33,29,17,14,41,25,18,31,32,33,49,24,13,19,6,17,46,38,14,8,32,2,40,33,35,4,34,12,57,102,37,26,52,17,35,31,22,34,16,1,34,22,15,12,88,98,116,34,9,15,26,25,23,17,30,49,9,53,25,48,30,20,7,0,36,10,6,5,34,32,20,29,31,20,11,56,27,19,22,31,25,27,142,159,98,72,59,123,138,62,20,44,82,146,178,197,200,197,155,148,220,249,229,203,177,179,179,190,168,174,169,178,167,170,170,173,173,194,193,177,188,211,207,188,204,195,200,189,202,200,197,175,186,190,199,181,167,194,213,171,184,179,174,187,188,175,183,173,211,204,188,177,207,197,212,171,175,189,184,96,10,3,14,13,9,0,7,4,1,11,9,21,27,15,28,19,23,23,13,15,9,13,12,10,252,251,229,244,230,221,210,166,41,49,133,197,196,63,6,11,25,44,36,54,30,56,31,50,48,30,45,51,61,52,60,75,61,106,69,93,83,98,113,123,147,166,165,203,175,205,222,227,221,233,215,235,233,244,225,235,225,235,219,221,212,211,209,177,162,175,127,98,119,107,117,108,100,80,52,71,59,46,20,22,9,7,8,62,101,147,137,124,109,77,98,92,97,77,8,11,54,34,16,41,58,45,37,42,22,73,101,115,134,91,86,86,148,250,252,246,241,148,30,30,94,100,93,58,33,46,70,79,112,157,169,88,55,49,42,41,47,130,135,84,145,93,130,82,89,66,49,46,32,38,69,23,42,16,34,5,13,38,29,38,4,48,37,46,15,27,22,17,19,31,39,63,19,22,48,47,62,31,30,57,54,92,86,137,146,175,198,181,224,184,191,192,171,176,193,192,193,191,174,163,200,199,194,188,206,194,184,159,190,188,202,217,201,208,212,200,193,138,180,210,209,190,204,120,78,73,88,128,176,175,190,184,176,185,199,190,158,177,178,214,212,213,195,187,176,86,36,14,44,88,139,160,134,160,156,157,141,149,152,159,153,155,163,146,130,97,108,111,121,133,132,100,81,92,89,126,131,63,51,37,32,53,92,101,93,78,61,72,76,121,58,73,110,95,81,72,101,96,117,114,107,56,76,47,55,49,35,37,31,43,45,49,74,63,66,45,65,56,55,52,38,11,48,21,33,32,60,93,92,84,77,108,89,120,91,102,82,97,87,66,91,87,91,78,61,63,67,90,103,57,86,78,61,74,63,66,62,52,49,39,22,11,44,5,14,18,37,73,59,31,32,42,61,55,48,59,44,64,12,58,67,70,25,66,31,39,45,50,37,47,58,17,40,30,41,18,39,38,54,34,63,50,45,49,29,37,55,47,29,53,60,19,46,27,17,48,38,82,22,7,38,26,12,86,73,74,9,13,45,45,61,13,16,23,7,22,19,12,36,42,19,7,48,23,48,25,29,39,27,18,19,61,10,15,22,34,9,29,29,32,1,18,36,23,83,37,11,31,23,52,43,28,20,38,41,28,31,28,25,24,14,41,23,15,97,82,45,30,15,30,20,47,22,25,22,10,23,7,32,36,30,19,27,9,25,6,22,19,37,40,21,13,23,48,19,33,10,13,24,34,42,8,19,40,34,55,37,39,53,40,52,63,35,34,42,21,31,40,22,11,20,28,50,26,24,50,5,24,33,37,28,10,6,18,23,0,21,38,31,22,16,28,23,31,82,82,36,24,14,19,20,30,12,4,26,15,14,10,22,29,30,59,90,94,56,47,19,28,8,31,26,14,30,16,28,37,39,5,14,31,41,31,13,13,53,41,26,12,34,33,4,19,10,12,8,36,24,69,169,172,112,106,47,38,38,8,42,4,37,117,124,148,156,208,192,209,244,255,204,186,208,171,207,186,181,173,182,179,162,186,186,157,162,188,183,174,159,179,191,193,190,188,167,146,172,165,175,186,155,190,174,171,195,181,200,177,189,203,191,183,174,196,165,180,180,176,193,189,184,183,193,157,196,192,168,108,32,5,19,32,7,20,21,10,2,19,15,0,21,8,13,8,19,29,20,0,8,9,9,19,251,255,243,221,223,236,220,186,43,81,138,210,212,38,20,17,21,39,57,73,52,35,49,45,45,60,61,91,111,129,132,149,160,162,182,164,179,222,218,226,217,231,219,217,233,233,218,226,207,211,211,184,201,182,139,150,97,89,71,73,50,63,43,39,26,33,12,32,64,66,118,159,163,87,63,84,52,80,50,70,44,11,31,28,87,155,131,116,82,116,112,144,106,44,22,47,73,68,24,45,59,67,60,43,31,81,110,103,110,82,85,135,191,238,238,238,231,160,43,14,53,148,139,123,48,64,149,145,121,63,48,81,68,66,43,23,37,54,30,22,25,28,41,37,37,14,14,15,21,7,16,12,4,52,35,30,38,33,46,28,35,34,46,52,17,34,68,45,41,38,71,44,56,101,108,117,167,178,197,187,203,222,209,223,245,240,239,249,242,218,221,230,202,237,255,237,215,214,217,236,247,250,229,247,239,231,237,225,223,243,250,244,246,230,237,231,255,249,242,221,207,204,167,78,119,139,219,227,238,247,209,238,240,237,242,242,249,248,238,227,228,199,173,122,78,94,131,196,206,219,233,240,225,239,226,245,233,242,231,216,237,202,212,229,227,178,173,177,194,223,233,194,143,141,140,99,119,145,60,75,55,77,101,74,79,72,73,50,65,61,67,56,76,98,88,81,86,100,117,87,50,49,53,39,64,19,36,39,13,40,59,52,79,43,67,57,58,49,19,26,16,18,27,54,74,91,90,107,98,107,75,84,102,74,95,72,98,72,75,88,70,67,60,45,45,68,94,80,76,69,46,66,61,72,77,49,102,46,32,31,38,8,9,44,39,40,47,54,67,55,57,47,57,54,55,42,54,61,50,56,64,48,71,55,45,42,48,53,30,36,52,59,32,58,41,28,17,49,50,46,55,40,55,40,45,43,34,58,44,38,58,50,52,48,29,37,52,33,34,54,75,13,46,15,82,122,63,53,25,25,63,22,13,9,11,4,13,2,13,20,19,9,26,15,0,22,37,13,25,23,8,17,22,30,45,9,45,26,6,45,18,23,12,26,59,26,27,18,48,30,20,16,32,35,12,15,43,47,36,42,18,46,20,12,69,95,24,25,34,29,17,19,33,26,20,33,27,26,39,44,27,22,25,41,22,14,19,47,34,5,15,50,19,32,32,38,32,42,54,45,32,37,18,37,55,36,33,30,30,23,35,40,12,46,32,25,53,11,26,33,39,14,32,28,18,38,39,16,26,49,31,16,31,18,2,22,23,28,15,23,9,22,9,26,56,83,15,37,28,19,40,9,31,25,31,64,24,20,17,8,31,15,77,102,68,78,23,20,25,39,31,33,9,17,19,30,34,24,33,23,28,13,14,10,49,7,23,22,20,22,23,32,21,11,28,28,15,91,184,146,157,176,144,120,79,63,41,43,82,147,125,119,161,208,156,177,207,180,113,145,146,165,164,137,165,151,167,160,182,181,152,146,138,169,160,139,173,169,164,167,148,163,131,142,149,171,143,153,189,160,180,183,179,188,171,185,174,189,162,171,175,182,172,181,180,178,166,194,160,198,162,153,194,183,167,113,13,19,2,2,33,13,3,42,12,9,4,6,21,3,21,19,17,16,15,14,14,7,34,14,235,255,237,248,233,248,223,169,77,86,136,197,247,76,82,77,82,130,121,161,151,147,184,171,175,212,217,202,230,244,240,209,215,162,226,216,210,194,203,180,165,139,141,144,158,93,38,108,110,103,66,38,26,11,23,6,23,31,9,60,18,33,43,22,34,50,33,35,25,58,104,137,102,57,62,83,63,95,108,133,158,127,113,63,106,157,145,122,97,122,107,124,114,32,0,61,133,157,146,137,173,151,96,36,9,23,48,45,25,43,117,135,153,181,219,196,193,126,15,5,70,144,119,108,35,70,121,113,53,37,25,44,80,18,38,20,4,41,4,38,40,33,16,19,25,19,26,40,20,44,11,55,47,56,30,49,74,64,50,44,67,68,64,53,48,90,108,134,162,171,181,208,221,212,225,221,231,235,233,228,198,209,204,175,150,158,163,155,186,193,179,192,184,185,180,164,165,158,185,205,195,169,165,183,198,201,205,197,203,171,190,187,201,213,215,214,202,203,197,75,68,98,120,157,179,224,212,193,195,216,209,215,237,250,227,199,181,157,114,74,58,26,90,110,174,168,208,226,216,246,213,211,193,211,180,193,205,197,171,190,196,217,182,205,175,166,120,119,147,181,169,185,190,142,123,84,93,106,82,78,97,121,119,78,81,74,59,74,72,71,81,91,104,78,98,91,115,89,81,84,54,26,45,53,37,69,54,67,68,113,52,42,47,84,47,48,46,17,30,10,31,70,68,74,110,102,109,133,108,74,101,76,77,60,69,88,84,63,91,59,101,49,61,59,86,81,84,94,55,61,86,43,76,69,53,39,48,21,20,24,57,10,31,53,57,29,61,51,42,60,50,47,58,37,45,38,66,50,55,40,56,53,58,61,51,65,52,75,47,56,36,43,64,29,37,40,35,48,32,32,40,21,44,36,55,44,31,37,56,26,28,38,48,43,44,33,46,58,40,49,13,33,32,10,48,85,120,91,36,9,16,29,9,15,27,26,9,32,27,22,25,31,23,35,27,33,23,40,7,35,31,37,27,36,26,28,4,9,30,16,35,41,35,13,27,28,14,30,14,16,29,13,40,35,30,31,32,23,10,24,32,42,31,29,75,72,55,32,25,14,26,33,17,3,20,32,52,49,30,23,25,28,24,14,34,24,30,9,16,28,27,23,38,25,13,7,47,8,58,31,30,90,17,22,55,19,52,47,41,37,38,45,45,26,23,38,30,41,44,43,36,31,13,10,8,11,11,31,48,18,19,29,27,7,19,28,36,27,18,22,37,15,1,10,95,60,23,27,25,27,14,30,21,24,20,24,19,35,38,20,22,20,42,39,109,98,26,41,26,16,21,8,18,14,39,27,21,27,13,38,30,22,36,36,14,20,19,34,36,14,22,29,31,21,45,1,43,179,188,192,201,214,203,225,204,161,131,138,96,100,82,53,93,142,150,142,177,161,118,157,136,156,148,157,169,159,146,145,143,136,128,115,121,154,132,131,127,122,137,119,166,131,160,127,116,143,146,143,157,142,125,154,137,134,143,152,150,163,172,193,169,163,200,189,178,158,160,171,168,161,176,187,206,175,174,117,2,1,4,17,23,13,22,17,13,15,28,28,33,9,22,8,24,15,36,18,8,33,9,26,244,240,252,251,241,246,217,206,118,129,182,247,243,150,181,188,184,224,195,214,235,242,233,212,225,237,197,226,194,217,199,192,170,133,109,120,113,92,95,64,79,70,99,65,58,43,2,31,10,7,39,24,58,25,47,60,31,45,33,50,81,55,55,34,24,32,33,32,56,74,30,29,26,53,21,68,42,61,97,143,198,240,219,130,187,175,200,220,187,169,204,226,230,145,18,86,178,223,240,216,203,205,152,83,31,68,74,85,53,87,133,142,93,81,122,136,109,63,29,19,24,60,61,54,33,41,75,47,70,37,61,75,33,16,5,14,20,26,31,28,25,37,41,49,59,48,61,54,76,80,49,62,131,136,92,90,75,114,147,166,180,180,204,228,203,222,213,240,245,236,251,239,233,212,212,207,213,192,199,183,184,175,175,175,170,150,146,163,181,144,155,156,148,136,167,151,168,177,147,146,150,131,137,146,177,162,168,171,164,153,161,165,194,198,171,107,111,104,57,38,68,145,186,198,190,184,181,173,203,199,181,190,172,136,100,64,23,38,76,99,131,199,167,197,182,209,217,201,178,191,161,153,170,131,170,173,150,157,180,180,179,158,156,201,169,141,104,103,82,107,99,115,102,103,80,92,110,105,79,99,101,173,115,109,97,102,116,103,89,72,77,70,76,89,79,96,64,85,44,42,52,53,76,53,40,26,32,64,133,93,55,52,44,46,3,18,10,26,48,47,68,94,108,80,103,108,97,98,78,91,80,74,86,70,50,70,69,65,58,85,73,58,59,106,69,104,55,52,93,92,59,98,81,57,21,12,19,23,35,36,19,38,35,53,36,58,59,54,53,24,69,38,42,62,26,43,43,71,64,46,58,41,45,35,45,42,44,40,51,67,25,58,66,50,23,57,74,47,36,28,25,48,27,41,37,32,39,37,42,38,43,38,51,34,16,36,34,51,46,35,53,38,48,38,13,42,83,102,74,65,19,12,15,28,21,20,34,40,19,26,42,34,6,31,51,11,31,11,45,29,23,25,25,29,23,37,2,11,36,32,18,20,30,38,34,29,21,32,48,61,36,35,25,37,41,39,13,41,21,5,18,38,7,12,55,83,81,33,27,27,21,11,25,29,30,37,35,32,22,39,5,20,36,15,29,18,32,62,35,8,29,21,20,49,39,18,32,32,35,35,54,73,58,28,25,41,44,19,21,28,28,19,36,26,23,17,17,5,16,10,21,32,22,31,5,54,15,12,31,30,15,18,9,12,11,19,16,20,42,27,37,7,34,30,87,42,25,41,15,17,26,5,23,16,49,35,13,7,30,30,14,32,28,40,90,117,86,39,18,12,22,20,49,10,18,48,26,10,27,26,41,17,23,20,22,46,33,45,14,33,45,30,28,31,61,27,139,192,241,250,212,187,174,202,190,185,197,164,122,98,65,26,12,45,69,94,170,192,213,228,226,175,159,179,162,182,149,162,170,170,143,178,148,148,165,156,159,155,145,151,145,169,144,135,159,134,102,117,123,129,154,134,122,114,114,129,152,121,130,135,133,150,156,142,155,153,175,159,149,166,192,167,175,159,145,98,23,4,0,13,7,8,25,1,10,13,8,1,21,12,37,6,33,9,60,31,5,12,22,27,233,230,247,235,238,242,242,205,161,196,180,198,208,148,180,205,187,198,184,160,151,122,163,125,131,131,97,90,69,97,77,85,124,112,111,105,109,120,117,105,144,128,127,139,159,122,62,25,1,31,24,83,91,101,90,97,102,78,96,75,65,59,70,55,39,31,28,43,97,73,94,77,45,77,26,35,19,36,50,152,217,224,242,250,233,244,243,228,238,249,248,244,241,185,20,25,149,177,161,119,103,94,69,34,29,50,156,182,168,153,119,84,50,20,13,10,3,13,27,18,34,48,32,56,24,38,45,53,53,37,80,37,32,49,36,46,62,47,79,68,89,112,139,127,163,176,202,218,245,208,95,107,229,209,216,234,240,219,242,225,238,225,245,225,221,203,227,198,199,194,175,171,173,192,193,174,172,179,187,178,202,197,204,172,152,154,169,174,156,152,186,189,168,164,164,151,170,200,166,178,153,165,150,170,157,161,180,143,193,175,196,187,181,152,78,31,50,92,165,141,180,202,234,226,197,190,178,156,101,124,112,64,46,36,70,90,135,165,160,178,205,208,226,190,175,183,175,177,167,169,163,177,180,188,156,151,144,160,166,172,170,154,163,179,161,145,103,121,92,79,62,33,5,56,68,36,55,57,47,106,124,141,162,130,126,135,121,122,101,105,100,101,66,101,97,101,103,88,112,78,74,76,70,84,64,78,49,54,64,59,37,30,47,38,26,19,42,47,71,80,105,119,101,67,95,91,110,79,74,85,80,56,86,94,77,91,79,72,59,61,65,66,94,66,63,60,60,80,60,105,62,42,18,34,19,6,5,25,26,60,53,49,55,37,50,41,84,57,37,50,69,47,33,74,53,51,38,44,50,63,49,29,68,54,50,25,26,49,60,25,33,37,45,20,51,30,36,71,31,53,36,34,38,43,26,33,43,61,70,37,34,29,44,62,56,55,66,32,25,43,46,29,44,24,30,34,37,95,99,71,14,46,18,16,34,34,19,24,57,40,23,25,28,5,35,36,16,27,22,57,35,20,31,16,33,18,23,26,26,13,38,38,31,37,29,22,18,46,52,20,24,31,27,48,28,31,38,44,29,30,3,20,20,8,2,85,94,44,28,17,40,20,24,24,33,58,24,26,30,11,12,34,57,52,27,37,29,19,30,28,27,25,21,42,35,46,16,13,38,56,21,41,68,48,15,15,37,23,14,45,40,35,26,39,18,6,35,8,10,38,20,22,24,6,22,39,35,9,13,16,28,21,18,20,33,13,45,39,24,28,30,27,44,72,79,51,57,2,29,30,7,12,45,31,21,29,25,37,13,22,51,22,30,32,49,92,104,88,58,43,21,28,26,26,2,20,14,39,15,27,35,24,27,3,24,33,20,32,10,39,32,22,15,17,22,62,184,222,208,161,145,88,104,152,191,175,202,200,146,149,87,7,20,14,10,44,97,207,253,233,245,227,212,250,244,217,225,209,216,213,224,233,250,220,220,232,233,212,204,234,195,217,234,208,204,206,142,201,185,154,184,174,169,155,172,168,139,162,155,136,147,141,170,142,148,160,160,151,147,161,150,149,169,142,111,103,11,3,4,13,24,38,20,8,21,6,23,27,11,0,25,1,16,17,26,30,21,20,20,10,197,183,201,188,195,177,161,144,137,113,118,120,114,103,138,123,120,133,129,119,129,91,111,95,113,83,120,38,3,22,64,102,153,128,165,151,136,173,197,187,190,192,203,192,202,190,118,129,60,19,44,115,142,160,151,151,114,145,133,139,112,111,124,129,108,56,44,89,110,202,187,186,158,109,81,68,58,78,100,171,241,247,224,229,215,235,255,255,246,232,212,224,171,100,3,13,68,70,43,35,52,9,38,55,20,102,169,236,184,117,54,61,30,23,19,13,5,21,28,30,92,101,74,44,47,61,69,67,75,66,78,69,117,120,163,142,178,192,204,215,216,230,225,223,217,220,238,235,227,156,86,180,218,240,214,209,226,253,234,212,202,183,181,190,182,169,167,180,180,165,172,184,189,170,166,153,162,176,159,167,203,193,157,190,178,170,145,159,158,169,177,167,173,171,157,166,159,163,167,152,174,149,187,170,166,156,175,168,182,177,123,79,102,57,101,113,137,189,210,231,221,203,205,177,163,108,62,53,39,9,78,79,100,136,137,156,165,198,197,197,181,188,212,149,171,200,187,182,172,143,158,190,185,164,166,181,150,176,141,157,164,171,164,170,170,131,112,117,88,84,72,8,7,22,48,18,37,29,46,81,149,168,151,110,142,156,141,131,119,97,93,93,101,106,107,78,112,108,101,111,109,95,53,82,96,73,60,60,33,6,32,35,6,34,52,75,61,95,93,81,92,59,112,92,97,91,96,82,68,88,78,71,100,67,98,75,88,26,36,58,67,56,81,93,63,59,94,102,78,98,45,28,31,24,8,21,39,31,53,59,77,69,28,66,52,58,60,53,65,39,70,51,49,41,79,71,51,19,65,23,45,39,26,50,37,52,50,48,46,56,57,39,39,27,42,32,41,35,17,21,34,28,54,35,41,37,37,33,65,36,39,67,43,42,28,50,57,46,43,53,27,42,19,30,12,37,27,38,94,111,82,27,22,28,19,17,26,29,11,23,25,25,41,25,28,39,50,44,20,41,53,41,12,11,17,39,42,36,19,29,21,20,24,21,10,8,29,23,47,26,22,52,32,39,15,15,26,26,36,27,11,41,28,24,37,34,118,64,41,42,3,29,52,28,31,50,23,27,8,49,18,9,41,45,34,22,23,50,30,9,23,37,36,40,16,44,24,14,44,40,29,33,42,41,29,46,12,47,16,13,40,39,41,39,29,26,14,19,20,16,44,40,28,7,22,32,25,33,5,34,18,44,25,22,23,19,22,12,24,17,34,10,22,66,94,22,19,19,7,8,25,22,17,42,22,41,20,49,16,17,4,31,28,17,30,29,49,116,93,56,43,23,2,16,26,30,21,23,18,12,17,17,7,14,24,27,38,20,13,19,26,26,21,35,33,67,186,213,218,180,154,124,130,167,169,161,173,195,177,200,146,119,111,115,56,42,109,195,252,244,244,252,252,226,249,249,247,255,250,255,250,246,235,251,254,226,222,254,252,255,252,252,239,253,247,252,253,248,224,254,219,235,237,226,252,234,222,255,216,211,214,190,188,187,171,185,181,164,175,138,156,154,161,140,135,114,15,8,22,0,12,21,12,6,3,9,16,17,4,20,21,19,38,5,16,8,3,32,1,23,80,84,44,59,58,55,72,92,107,138,138,155,154,130,135,134,138,146,171,130,157,156,166,148,161,142,152,67,13,34,102,140,193,187,199,166,166,168,167,173,130,144,151,161,127,106,122,117,25,41,46,75,131,147,132,131,155,133,166,134,151,165,161,204,198,70,23,60,157,205,229,211,193,140,103,89,112,131,151,171,215,208,195,144,139,150,180,166,142,106,68,94,49,21,29,60,113,138,103,109,111,127,75,20,15,64,150,131,74,75,33,88,99,92,79,76,105,134,49,19,130,186,124,89,62,97,101,127,56,21,38,110,151,246,227,229,255,230,228,220,203,215,177,181,192,192,186,199,184,74,73,154,187,208,181,176,160,159,174,138,144,163,161,159,138,178,162,138,160,192,186,183,155,188,148,155,175,160,166,160,158,155,177,150,176,164,170,158,178,174,159,139,135,141,140,141,142,129,156,165,148,189,167,159,198,159,155,108,109,60,38,75,55,157,188,184,211,204,203,189,161,119,87,48,9,53,32,100,113,145,122,160,149,136,161,151,160,164,173,161,191,193,158,148,153,171,155,128,137,129,151,157,139,170,170,186,156,153,160,158,147,162,164,166,166,159,129,104,64,60,112,51,23,39,44,60,59,46,81,94,113,129,115,132,107,118,117,100,66,83,87,104,82,105,102,93,127,89,110,115,102,93,68,83,80,85,51,23,9,42,39,47,51,51,73,108,99,75,76,85,88,63,94,81,102,75,91,48,79,76,90,91,51,86,89,67,65,51,32,88,87,80,88,87,79,73,64,77,41,32,32,17,16,21,21,37,42,50,76,80,59,48,37,22,44,66,34,44,34,52,16,46,45,41,52,28,30,32,53,74,29,55,41,52,53,64,47,24,43,58,37,35,41,26,52,28,67,20,43,52,51,55,45,54,62,29,37,43,53,47,23,20,38,47,48,48,38,30,33,21,18,47,17,30,26,34,6,15,51,49,96,77,34,28,22,32,23,24,6,32,42,9,24,14,17,18,25,52,52,0,36,31,19,44,37,15,40,16,43,33,19,13,31,8,47,26,45,35,38,37,44,31,48,17,49,16,44,38,37,27,33,28,34,18,20,36,74,100,27,42,19,21,34,37,19,31,36,30,27,16,21,49,17,39,14,22,20,5,37,38,20,36,25,19,31,14,26,11,29,38,29,17,21,42,31,33,7,42,17,47,37,31,32,33,36,39,39,12,3,9,10,33,38,28,16,38,58,18,23,28,21,19,14,32,10,6,30,13,13,30,8,31,39,87,67,30,23,11,0,29,29,26,30,17,11,52,22,34,27,7,40,16,5,24,38,20,49,85,149,115,61,30,0,36,13,25,37,30,16,20,32,10,34,23,23,27,44,33,26,25,26,19,20,54,27,130,202,230,174,207,232,175,174,171,175,148,199,182,164,200,196,175,189,150,126,143,169,176,155,147,166,189,225,252,250,247,246,255,250,250,239,243,231,252,248,252,242,242,251,244,253,252,253,243,248,255,247,251,238,254,253,237,249,247,255,255,253,255,255,253,255,252,231,229,228,228,232,246,228,200,201,187,199,138,135,77,6,0,0,9,29,21,8,30,3,22,12,12,11,6,11,5,20,1,29,36,11,12,15,4,74,44,49,28,21,16,80,112,143,157,215,245,200,182,163,169,160,184,171,176,171,185,165,161,179,160,163,65,24,18,51,116,126,137,99,103,62,90,45,80,87,66,85,94,45,78,83,61,17,19,79,120,180,150,173,181,193,214,205,186,201,222,223,210,236,132,20,82,143,237,234,237,220,114,81,84,92,161,140,114,108,122,100,85,21,63,82,104,135,137,165,161,177,49,28,108,193,201,200,172,164,151,115,45,30,63,93,81,48,66,47,107,83,116,179,238,229,244,111,36,114,165,109,42,73,161,135,136,108,22,25,117,215,188,244,211,208,215,190,170,168,168,155,162,154,177,162,157,143,32,80,165,142,173,140,113,112,121,122,95,172,142,133,167,174,188,146,150,142,141,160,151,134,157,150,162,151,133,152,159,133,172,136,130,156,167,175,212,197,187,189,157,164,155,133,144,166,162,119,188,187,172,203,174,190,128,27,14,59,103,139,181,207,233,224,155,131,61,53,11,22,25,44,89,102,148,150,183,169,200,165,207,170,146,188,207,185,193,179,191,133,155,161,145,165,156,159,152,159,160,176,168,166,160,148,156,156,172,173,149,150,141,173,160,167,142,152,98,79,107,102,100,51,65,72,69,80,124,94,95,103,65,77,63,91,59,92,96,74,91,104,103,122,120,100,108,114,122,116,110,87,89,59,42,28,7,47,40,37,50,45,40,75,89,98,97,66,73,63,67,83,87,51,89,90,36,54,74,92,97,55,94,56,60,72,59,70,73,91,90,73,54,81,86,80,51,60,35,14,25,3,31,16,54,55,36,66,69,30,54,60,40,54,48,66,41,49,56,28,48,28,45,36,40,44,50,44,44,37,61,44,38,32,48,63,43,36,51,53,57,16,48,46,49,45,55,36,42,76,55,52,53,41,61,45,9,51,47,22,48,31,13,50,46,28,41,41,34,38,23,52,55,57,36,35,34,42,45,47,32,91,117,74,19,34,5,43,15,19,46,28,12,29,18,35,18,16,54,36,13,39,27,25,25,43,23,24,31,21,12,30,29,25,21,30,37,7,13,44,26,20,30,29,35,10,28,29,16,22,21,11,20,26,45,27,34,94,105,50,27,15,27,54,20,29,20,11,20,22,21,46,39,27,29,13,17,9,20,30,24,32,40,24,15,28,34,38,26,36,18,46,37,29,41,45,43,31,26,20,22,60,24,35,36,44,29,35,14,49,13,28,8,23,29,42,8,10,19,24,5,24,11,32,29,9,14,21,16,32,21,21,4,62,67,42,47,39,8,32,26,32,19,23,27,8,24,14,36,26,24,41,26,38,10,40,33,38,22,88,120,107,73,35,30,28,34,23,28,35,15,22,30,16,12,34,11,42,26,10,42,31,37,37,33,101,124,139,132,99,189,187,176,164,143,173,136,171,186,184,201,183,174,197,208,213,242,243,168,107,0,51,83,134,172,200,205,182,172,227,247,254,253,236,255,248,251,255,249,236,255,249,250,255,237,246,255,250,255,255,249,245,248,244,244,250,255,251,247,241,254,254,247,250,245,240,242,253,227,236,242,232,240,219,202,194,105,3,6,5,17,3,15,10,17,14,8,12,12,10,0,23,12,22,27,18,8,12,5,15,8,142,144,70,29,40,33,80,176,185,202,241,234,234,159,167,166,144,181,146,149,117,176,137,107,99,112,74,27,13,31,62,119,112,89,101,81,67,70,92,96,87,120,113,112,112,133,153,135,21,19,84,174,207,197,243,215,217,230,227,224,227,209,189,187,203,67,1,46,95,153,133,114,75,50,58,43,77,114,80,50,31,19,34,26,14,56,132,212,255,251,247,240,229,144,24,101,187,185,145,120,115,104,51,25,20,77,147,160,137,122,150,147,73,125,223,252,248,217,75,28,108,80,68,33,116,207,179,181,129,50,12,122,200,205,218,187,194,185,171,175,189,179,164,171,196,179,185,155,113,45,124,149,156,163,174,163,154,145,181,170,175,160,138,163,171,172,142,168,161,156,143,141,179,182,160,158,162,164,178,151,144,176,183,177,172,189,196,246,230,186,156,155,182,245,197,218,189,237,236,203,185,206,146,105,93,124,160,148,179,201,215,247,174,123,101,65,49,33,61,121,138,142,173,217,249,232,197,215,211,197,177,197,247,208,192,183,231,218,178,155,138,187,221,214,194,174,214,218,201,167,204,206,170,189,178,215,199,188,189,218,216,201,170,159,161,166,122,130,92,100,96,85,54,98,97,97,100,112,95,95,88,91,93,79,52,91,117,71,82,77,92,82,116,106,117,113,103,129,114,85,52,33,25,13,44,34,36,37,37,85,57,122,83,66,97,83,71,83,88,91,68,61,89,113,82,87,96,110,103,86,72,44,57,83,51,80,46,67,102,56,88,80,95,81,35,29,31,23,47,2,30,34,41,50,40,69,33,32,69,40,36,45,43,33,70,81,40,50,34,56,38,58,58,76,56,43,66,49,90,39,52,26,40,46,40,47,51,35,29,72,27,35,45,53,37,38,31,51,32,34,34,41,48,42,37,48,49,36,44,66,47,79,34,19,41,23,47,43,24,20,24,38,37,37,18,9,34,11,10,41,59,64,116,120,48,11,21,16,19,27,0,21,18,24,25,24,27,67,21,27,29,2,24,26,34,5,14,20,2,37,7,3,48,30,47,17,44,20,15,31,33,25,33,12,50,40,26,41,0,18,48,38,33,29,22,51,63,71,46,49,16,17,11,10,5,17,44,34,39,7,57,34,20,14,33,23,31,46,29,15,32,49,36,28,38,50,6,19,33,31,25,15,45,41,32,36,51,47,27,36,16,27,39,34,31,16,50,27,37,43,25,13,13,20,40,51,46,23,28,16,10,26,2,22,37,25,25,31,9,21,8,22,53,99,71,34,14,42,30,7,20,34,26,21,19,25,28,22,7,18,10,22,3,28,28,13,14,14,47,50,121,107,83,22,3,31,7,24,42,8,55,21,34,23,36,12,15,27,21,13,41,28,39,91,101,32,61,74,55,131,125,109,124,104,127,127,163,174,202,197,200,177,204,231,239,250,253,202,146,8,18,48,45,97,140,161,133,139,169,236,237,230,198,198,235,237,227,228,235,246,255,252,228,248,252,239,255,249,239,244,242,237,251,250,245,253,247,226,236,251,246,250,241,255,255,239,225,254,211,249,236,219,227,241,211,101,28,7,14,4,8,11,5,28,17,12,7,7,13,3,28,29,5,21,26,10,28,8,30,14,168,192,128,57,37,60,101,149,151,145,193,217,158,87,100,93,100,61,75,80,43,61,66,68,69,66,56,31,23,69,116,125,142,169,156,178,187,198,204,202,212,199,208,209,203,221,214,161,81,36,97,207,220,224,217,231,218,213,206,206,126,152,103,99,57,22,33,29,21,23,13,37,18,25,17,90,77,85,86,73,54,38,36,72,105,142,189,236,238,245,236,244,219,83,18,33,97,87,76,55,50,30,38,31,15,45,142,205,184,199,212,168,90,61,148,164,159,101,2,24,84,73,63,26,131,236,165,170,118,16,6,104,164,179,185,164,123,109,144,189,181,233,253,198,175,186,222,223,111,95,227,245,225,180,181,205,222,186,165,188,228,237,184,188,211,232,187,165,194,251,244,154,176,242,247,198,187,213,242,233,162,208,242,247,175,145,158,197,124,104,128,133,251,245,195,163,246,244,234,147,137,91,41,50,112,243,236,175,133,133,85,32,16,72,187,217,150,128,240,220,223,190,235,244,254,192,179,227,248,171,168,235,234,201,147,173,217,179,84,118,173,245,239,222,166,205,219,221,145,170,248,237,150,161,228,251,233,152,201,248,252,201,152,161,162,133,128,98,87,113,87,47,43,108,102,119,112,109,130,117,106,98,127,114,108,81,131,98,97,112,77,111,102,114,136,123,107,70,13,23,40,8,19,59,61,52,85,92,84,79,88,70,57,86,105,63,72,72,70,84,66,58,88,91,86,73,114,57,83,52,42,74,55,70,80,69,116,78,127,124,102,75,54,39,29,22,23,8,30,17,52,80,42,44,61,81,65,53,52,63,47,47,50,27,47,69,52,42,23,46,45,41,36,63,75,57,40,49,24,40,48,60,39,42,57,30,19,21,29,42,30,30,50,53,41,43,42,45,23,45,34,40,52,63,47,46,39,30,25,30,43,33,52,54,31,46,63,35,42,30,35,54,53,30,10,25,22,39,21,35,23,0,85,126,85,64,35,6,38,10,21,18,29,24,50,15,15,19,31,10,18,43,38,35,24,32,39,25,26,38,42,41,29,22,14,37,31,19,33,24,36,10,30,33,42,23,33,25,20,12,27,46,20,55,19,8,30,91,92,33,19,20,20,24,29,18,15,39,37,30,28,14,14,26,33,26,11,29,27,12,39,24,34,38,22,29,19,24,32,29,34,55,31,38,29,41,42,56,33,51,25,45,24,45,18,26,14,28,8,26,7,18,22,27,35,8,18,26,38,26,0,5,7,16,35,20,21,20,15,20,39,14,54,85,44,27,9,12,25,21,29,29,17,18,17,30,26,36,29,24,33,25,32,11,27,20,17,42,27,24,88,103,97,52,40,11,19,11,36,28,27,16,17,13,23,27,42,24,16,23,48,47,91,138,87,38,87,62,52,86,121,136,133,102,137,172,153,181,207,208,194,175,183,202,208,203,243,243,206,121,100,68,46,21,47,70,127,167,154,147,144,147,133,148,139,144,172,219,242,217,151,201,191,195,199,206,228,234,232,244,243,252,229,255,255,245,253,243,248,226,255,243,235,251,241,211,222,233,246,255,241,241,254,232,255,115,2,16,15,9,29,7,19,22,16,28,14,20,12,0,0,2,11,7,16,8,8,26,19,18,146,134,124,26,47,45,73,113,118,115,112,140,75,49,66,58,91,105,87,109,110,119,135,133,152,162,170,39,15,78,151,215,249,249,245,248,243,245,248,247,239,229,244,243,228,217,236,155,57,20,66,109,201,179,159,140,121,83,73,59,50,65,57,61,63,32,42,28,48,40,51,25,69,79,108,101,51,38,50,100,83,57,92,140,202,237,197,190,228,179,178,153,128,19,11,84,59,75,93,91,97,77,36,61,31,73,124,156,131,123,177,171,28,25,83,89,78,63,20,29,124,150,115,63,121,202,214,211,143,28,12,105,154,201,158,82,69,75,95,128,142,226,245,176,161,200,241,210,86,149,236,250,202,167,220,234,181,150,159,212,252,206,147,174,191,205,183,135,183,200,212,137,181,245,241,181,151,223,246,228,160,167,236,240,133,116,107,101,85,116,146,165,202,228,149,153,165,188,77,44,84,83,22,57,93,126,98,45,36,90,118,99,103,146,230,242,184,182,241,251,194,148,179,230,218,176,130,219,215,180,182,197,186,157,144,155,199,99,111,136,149,211,236,187,125,172,200,167,136,140,184,176,141,168,207,217,158,144,184,216,204,160,182,132,148,164,141,132,126,131,127,68,61,46,70,62,111,91,94,118,117,86,114,115,100,105,107,112,92,124,128,93,123,107,92,64,42,34,31,26,51,40,57,93,89,105,88,91,72,76,70,92,87,88,89,114,82,89,67,88,85,85,83,81,74,86,79,79,46,80,56,40,70,70,60,73,94,112,108,72,56,27,29,35,40,50,19,37,72,46,59,63,48,46,37,43,56,18,34,39,54,40,44,55,70,45,43,41,31,43,45,32,52,53,50,53,55,60,60,50,53,51,53,38,18,42,41,30,63,38,57,34,59,51,43,57,44,54,54,37,29,49,31,39,21,18,61,20,63,38,14,20,33,33,39,40,40,39,67,36,13,34,7,37,15,43,40,23,97,20,39,22,29,94,103,95,80,19,14,12,17,38,44,30,36,30,18,33,31,40,33,24,19,13,16,33,28,10,22,41,47,26,32,45,21,51,32,28,15,0,38,26,37,32,5,42,31,26,21,10,35,52,25,41,11,16,46,90,78,42,35,33,30,15,46,48,21,24,33,57,36,29,33,12,21,19,18,15,14,27,16,39,38,40,35,41,23,7,40,35,44,46,24,37,41,37,18,35,20,39,37,34,26,21,36,24,34,24,24,14,11,30,30,18,25,29,20,24,26,25,20,18,34,20,7,8,10,35,5,17,13,22,54,69,35,9,11,7,8,45,34,47,7,24,47,8,39,27,19,34,40,13,27,24,24,5,23,17,20,58,39,79,70,94,51,37,3,20,13,35,39,12,32,38,20,12,46,13,0,19,20,84,164,169,116,66,159,124,103,125,139,181,203,161,213,206,195,203,208,196,180,167,165,203,209,242,223,242,220,147,217,180,92,34,1,26,101,138,170,136,143,172,149,128,139,105,115,180,224,126,57,56,101,108,135,121,157,215,245,214,189,167,172,230,208,221,188,209,253,243,255,227,254,247,235,213,224,240,242,255,248,253,244,248,224,108,1,11,14,14,20,0,23,0,13,14,29,42,15,9,6,4,5,11,21,21,23,10,10,11,113,89,28,6,63,60,91,130,125,132,120,138,128,127,168,157,167,183,194,213,215,208,205,194,228,237,203,101,22,88,155,207,214,233,227,216,233,213,215,206,198,160,158,164,153,131,127,52,16,15,31,31,11,20,26,10,12,21,16,43,44,101,139,158,166,36,33,118,170,212,223,230,196,174,94,46,90,67,87,88,27,38,56,103,127,100,79,88,70,108,113,117,120,32,36,126,185,174,148,162,154,111,112,99,127,119,94,85,55,81,85,77,28,56,150,178,210,201,44,38,120,105,88,46,43,165,129,202,218,44,1,78,156,212,178,154,126,148,135,130,159,157,166,157,145,181,221,120,68,81,189,165,159,153,158,157,152,133,148,177,206,170,141,153,126,153,102,151,155,142,94,124,125,130,175,135,163,147,163,134,162,161,183,176,125,167,173,168,174,189,223,193,227,209,127,103,76,61,20,38,76,69,42,32,30,50,82,96,167,162,199,176,199,171,213,188,174,160,187,161,138,138,152,162,153,157,144,179,174,152,167,156,140,130,159,159,174,160,168,199,136,155,160,136,150,168,163,172,172,156,172,157,152,157,168,180,161,163,152,162,128,156,169,129,159,149,153,151,160,147,123,119,93,71,62,84,94,102,97,107,114,115,94,102,124,99,106,127,97,103,85,80,53,4,12,7,23,35,45,81,102,124,106,89,89,100,91,83,75,90,96,78,58,84,110,73,81,76,72,100,102,75,81,62,66,80,50,68,49,101,68,54,70,77,77,94,124,80,76,37,20,36,22,18,24,39,26,75,58,54,47,52,34,76,45,55,34,45,82,48,57,61,55,14,50,52,24,48,61,67,22,51,45,43,50,24,35,54,45,49,57,28,17,26,41,27,39,45,45,32,31,22,30,27,55,48,49,38,27,30,29,47,16,36,38,47,19,28,35,44,2,39,41,30,50,22,23,60,37,18,9,43,26,42,23,32,20,24,45,26,9,10,17,12,85,97,85,58,15,10,24,32,40,16,6,27,23,26,5,41,29,9,14,12,32,11,17,15,38,6,6,22,12,2,31,22,24,23,42,33,26,10,35,19,14,40,49,25,24,8,15,17,10,32,23,45,53,31,102,75,34,39,21,19,30,6,40,22,19,30,23,51,41,29,24,26,14,30,40,8,48,19,21,37,32,25,45,13,59,36,46,48,28,58,34,35,23,56,39,49,15,16,27,32,22,32,7,22,3,13,6,40,26,30,7,11,22,25,45,32,31,32,27,18,37,24,50,14,25,25,27,28,79,25,10,14,18,10,36,35,31,39,9,5,22,16,8,11,44,46,17,32,10,46,11,11,8,9,17,26,9,37,77,100,102,52,24,38,10,41,20,52,11,37,29,4,17,28,35,38,68,121,140,125,111,111,126,129,112,119,164,186,174,156,190,179,175,193,201,202,184,195,200,201,179,178,161,185,168,161,195,145,131,122,58,18,48,51,91,123,159,154,170,174,169,168,149,179,252,148,26,96,139,134,130,135,145,178,209,174,124,82,86,142,150,157,98,159,245,232,247,240,235,252,231,226,193,215,238,231,240,236,235,191,182,108,0,27,1,0,10,8,12,23,3,21,1,12,4,11,22,6,0,22,19,14,15,9,42,11,74,96,38,23,70,76,160,201,193,203,221,202,219,205,205,198,215,242,226,224,222,236,233,208,214,220,188,99,20,42,82,164,174,187,140,110,131,98,88,77,92,69,70,74,24,22,17,29,27,36,59,40,42,48,52,27,16,11,44,67,73,71,67,111,79,49,61,103,220,239,236,245,231,127,36,31,83,91,96,41,20,9,36,75,48,33,11,53,88,132,183,181,165,32,25,83,125,132,101,89,84,68,24,60,159,142,144,162,114,123,217,152,21,131,229,253,243,232,72,29,91,123,33,39,117,172,151,168,169,48,18,29,103,146,232,224,231,190,209,178,162,161,148,145,178,174,146,58,37,131,168,175,169,176,166,155,168,178,165,169,161,159,175,160,157,132,162,195,138,167,150,131,155,146,137,181,148,145,167,149,144,190,161,169,150,182,209,222,228,214,174,178,126,56,24,26,22,38,13,59,70,62,115,131,162,177,171,196,207,225,200,199,174,180,176,156,139,151,179,169,159,165,176,169,175,155,132,144,131,137,120,145,162,139,162,176,192,183,179,167,148,172,147,141,162,146,176,156,181,136,189,149,147,143,183,163,172,169,164,143,153,157,163,152,142,155,155,135,128,140,133,117,162,115,97,102,113,92,100,54,80,90,79,70,78,66,65,55,41,51,17,25,23,23,56,77,54,109,127,149,146,112,92,99,99,78,84,61,91,108,116,91,101,97,78,73,79,75,87,89,81,66,62,40,64,56,46,66,71,68,66,87,109,96,96,92,78,24,40,26,38,13,42,43,62,62,56,60,67,80,67,55,45,37,34,69,54,51,33,49,32,39,72,42,45,48,37,47,41,50,50,17,18,38,51,41,38,43,42,48,26,36,52,45,40,49,59,57,55,43,47,52,44,16,29,40,48,35,57,40,33,49,47,28,8,27,16,65,16,53,38,19,25,60,42,26,18,65,20,50,33,19,30,40,37,35,18,13,7,19,10,9,28,32,15,76,103,123,37,33,27,33,60,17,51,22,2,5,36,40,21,28,17,23,30,20,29,16,16,19,65,50,10,35,35,10,37,36,42,28,30,19,27,30,22,29,34,27,18,34,24,37,16,30,17,43,35,51,100,89,54,11,22,6,20,36,20,30,33,22,23,46,9,44,16,23,14,36,15,1,15,29,42,17,36,19,30,12,11,12,50,29,59,11,33,42,54,22,43,38,24,27,29,26,25,21,39,24,8,13,29,34,36,24,51,12,25,24,15,22,40,11,11,38,26,26,34,32,1,17,13,76,81,28,25,24,35,41,33,9,11,10,29,33,34,27,3,18,42,12,24,50,17,33,14,29,12,23,21,22,24,18,8,73,98,92,82,25,28,47,21,7,16,23,11,32,40,36,7,38,92,78,61,42,44,77,63,53,65,78,89,90,70,76,111,129,141,121,135,145,110,133,120,89,126,144,139,156,146,147,171,153,149,181,120,118,111,33,25,58,102,138,148,169,181,179,168,207,235,173,54,99,148,176,160,173,164,194,230,181,103,37,126,162,136,151,90,148,247,223,253,232,216,246,242,205,212,211,225,207,199,173,167,142,112,96,11,1,19,19,16,4,19,40,8,24,11,21,30,24,21,1,7,6,12,10,17,5,1,6,172,176,87,25,58,77,163,223,218,247,234,235,201,195,184,188,171,185,181,182,132,156,143,157,146,128,120,44,13,24,51,105,56,90,52,32,31,30,54,79,56,79,76,54,76,61,40,13,35,32,87,114,124,92,95,115,115,92,105,100,80,38,67,25,18,5,22,72,115,136,110,111,95,135,91,60,56,95,155,103,19,29,92,141,138,121,115,106,120,147,128,113,52,22,24,27,54,45,22,48,29,28,13,41,132,152,161,220,193,232,240,246,89,100,192,238,225,148,37,28,98,99,72,62,151,233,203,196,137,10,16,53,16,58,120,209,211,194,183,180,173,176,178,170,174,173,139,18,74,184,173,164,166,152,154,174,166,172,156,181,169,128,164,147,158,158,154,207,182,165,165,185,168,182,151,165,156,197,167,172,183,158,173,194,200,196,199,221,145,95,47,3,15,26,33,6,40,89,134,141,182,189,217,200,214,194,183,187,183,181,193,175,150,162,158,149,189,187,170,162,180,157,168,161,165,171,159,142,162,136,131,159,133,152,152,179,153,173,171,171,162,183,163,151,157,138,153,149,154,173,131,120,130,134,124,131,135,168,146,157,159,169,144,135,124,123,136,126,106,134,102,127,119,147,135,147,123,111,92,105,65,74,81,50,108,48,46,26,30,45,59,75,58,94,109,120,136,135,139,111,128,87,89,96,86,70,98,82,98,80,47,89,89,105,77,94,78,101,83,60,63,69,60,74,73,91,65,74,65,96,74,90,108,93,46,35,6,31,19,7,18,42,38,59,58,52,59,71,59,52,48,67,38,70,62,50,52,26,28,51,39,34,35,41,44,43,41,60,63,43,54,60,40,47,20,71,79,52,53,58,50,37,25,43,61,49,33,67,30,30,20,28,43,51,33,23,35,26,29,53,29,39,16,36,32,26,27,36,59,8,46,30,45,18,35,37,19,35,43,39,39,34,26,60,43,17,21,10,18,14,14,22,20,13,28,43,52,78,116,107,68,15,27,12,21,27,34,41,42,20,20,18,30,42,20,11,25,10,34,34,29,12,47,53,13,7,27,42,18,22,36,24,5,6,18,24,29,23,5,54,24,18,28,19,33,37,33,38,60,86,75,15,57,20,20,35,25,38,14,34,33,29,26,33,46,44,14,21,52,32,28,14,37,24,43,43,22,23,27,19,66,48,44,24,19,45,44,36,53,46,29,24,19,38,14,9,24,22,19,10,23,31,4,19,10,27,17,24,40,20,15,23,39,39,17,10,43,23,39,23,11,82,60,7,30,12,15,16,38,21,25,27,31,22,14,25,29,10,34,20,13,38,31,45,26,47,38,36,11,8,20,25,28,35,39,104,92,66,45,20,32,30,7,50,50,10,13,17,63,106,69,67,56,45,57,55,79,52,56,64,75,47,50,50,40,72,42,46,38,64,43,44,79,131,165,191,210,222,221,201,237,189,220,220,124,200,188,93,77,64,91,115,125,170,217,210,211,243,249,179,82,123,159,164,156,161,169,197,219,184,121,60,145,177,181,172,108,154,248,232,236,220,247,243,230,229,184,181,178,192,160,141,154,132,144,101,11,24,29,1,23,3,25,6,1,9,38,28,22,10,8,10,23,7,19,7,15,5,19,20,203,194,118,36,61,80,155,168,194,244,233,220,189,158,123,131,121,111,75,63,69,83,51,44,66,42,18,17,39,44,84,113,105,113,130,135,127,133,132,148,156,152,177,157,160,157,153,111,42,56,128,197,229,191,200,184,161,146,125,100,96,103,74,39,9,29,24,91,129,138,118,139,156,174,193,112,132,172,182,178,40,38,132,182,159,144,124,89,122,119,90,38,21,7,31,76,60,96,87,100,116,159,118,65,181,164,133,157,165,159,223,150,40,31,105,72,44,25,0,32,71,89,62,27,122,231,247,219,160,32,8,122,137,34,37,70,136,191,184,195,194,175,213,190,168,140,31,60,162,198,198,175,169,161,198,203,162,184,197,172,181,183,194,168,164,187,170,162,154,151,192,172,140,173,164,174,172,165,190,174,174,208,225,231,182,126,83,69,40,3,12,27,29,81,122,187,210,207,191,212,199,218,179,192,167,187,186,145,154,155,189,193,184,169,171,175,164,161,182,164,165,191,164,164,164,202,150,194,188,168,185,160,177,179,169,161,148,147,162,157,177,157,144,167,169,151,146,163,187,152,165,158,181,160,158,150,135,136,167,143,144,182,181,142,123,89,128,154,141,122,110,119,90,93,85,83,117,129,140,119,145,111,101,105,124,98,107,98,102,114,110,116,127,150,111,148,145,112,115,104,102,117,106,60,78,76,77,79,89,63,63,89,100,77,71,62,61,54,73,77,73,76,80,67,83,97,67,71,60,81,85,82,87,36,6,13,17,8,45,33,41,59,47,51,72,55,54,62,56,42,40,24,61,53,56,57,55,44,37,54,50,37,57,22,50,55,41,39,50,42,67,49,51,38,51,41,33,40,38,24,49,41,35,64,52,67,60,50,38,28,42,30,16,34,18,46,37,31,5,38,19,44,50,35,48,41,31,46,61,57,37,35,33,35,36,30,26,20,36,51,17,34,32,10,42,31,55,22,45,30,12,45,23,34,39,38,34,32,68,135,68,53,17,42,11,9,63,28,27,24,27,18,13,12,41,31,27,8,19,30,25,49,36,28,32,33,36,29,42,30,42,43,26,43,43,40,30,45,24,35,40,31,22,37,7,15,36,37,31,106,77,67,11,19,22,16,35,35,30,43,48,27,14,34,22,48,42,27,10,7,13,24,31,54,24,28,39,31,13,57,50,13,30,33,30,22,22,41,27,37,28,30,10,45,41,41,36,5,29,13,13,28,22,33,22,14,30,25,18,13,36,40,25,32,26,45,42,45,10,2,28,89,71,33,10,10,17,19,33,33,27,22,31,28,29,32,23,19,17,23,18,33,12,30,34,24,25,14,23,27,31,21,39,25,29,56,155,98,80,52,49,13,29,1,37,32,8,73,94,95,62,16,56,59,43,64,48,22,37,42,71,73,46,79,58,64,39,35,39,10,28,69,193,225,234,238,254,251,252,251,252,248,230,189,96,145,188,124,82,79,54,51,56,109,185,205,175,176,247,134,47,89,99,124,134,147,157,183,212,141,85,86,162,152,183,147,106,155,248,242,243,249,218,237,254,236,202,173,198,164,158,130,136,131,134,130,7,1,17,6,18,5,16,12,14,7,14,2,8,4,12,13,34,34,6,14,4,25,4,25,169,186,74,13,33,100,126,147,145,167,167,153,77,66,66,43,76,50,58,70,61,62,73,87,100,112,73,16,26,70,118,194,197,203,212,209,208,201,190,218,190,185,188,184,207,193,220,121,61,21,74,167,177,140,125,122,138,139,116,110,95,138,144,184,107,24,56,134,185,224,235,233,234,220,174,123,127,188,228,147,48,23,91,100,122,104,75,65,78,111,56,51,37,12,54,137,180,213,201,212,220,249,157,90,206,194,171,149,65,25,38,28,12,18,28,48,25,26,4,50,17,86,30,38,107,148,181,230,198,58,25,116,212,161,47,37,46,125,131,199,188,197,163,162,70,16,56,137,194,183,185,167,175,168,185,169,157,167,148,192,180,163,157,135,196,169,139,174,164,187,137,164,166,166,191,184,197,215,222,198,209,163,148,108,44,4,19,21,83,105,134,188,199,213,225,214,209,212,190,176,200,192,171,173,164,190,157,133,172,175,166,186,187,158,160,184,178,160,173,189,166,180,158,166,164,166,176,159,186,177,165,196,167,165,176,181,191,165,192,152,158,150,151,135,163,124,143,135,143,161,199,193,191,166,163,191,181,187,152,156,173,176,190,122,109,111,120,145,135,133,98,110,121,103,96,106,118,134,124,138,127,127,117,125,129,119,113,124,133,122,108,112,140,125,116,109,113,97,119,65,68,104,63,88,69,87,78,67,55,72,74,76,56,76,67,45,49,75,61,75,75,78,86,63,93,93,73,70,80,82,56,36,27,29,25,18,41,40,54,61,72,63,57,52,64,71,69,68,50,57,39,59,27,81,52,36,35,48,48,56,51,45,36,32,55,41,72,45,53,36,12,38,48,41,18,53,39,44,34,40,43,49,27,43,39,59,32,37,35,34,40,50,25,6,43,33,31,39,58,39,44,50,45,33,14,32,44,37,57,32,60,27,53,69,25,23,60,34,44,38,42,64,38,36,57,38,37,39,10,15,23,8,2,19,7,29,13,23,41,80,124,77,51,34,8,33,25,5,23,21,11,46,13,15,49,16,15,40,53,17,14,26,13,16,45,12,26,51,23,60,25,28,19,21,35,31,30,21,38,43,7,14,32,30,38,32,25,20,20,78,94,57,29,41,24,36,14,26,25,13,5,20,26,45,51,18,32,29,13,13,31,25,19,32,40,13,17,4,44,31,25,14,44,32,24,29,35,27,15,46,52,42,23,35,29,19,25,25,16,30,40,34,32,10,1,30,6,17,32,29,37,25,35,14,34,33,28,10,44,32,33,93,41,4,32,13,17,32,6,11,4,19,34,7,31,24,23,26,27,28,14,17,26,27,2,5,15,6,27,25,38,26,18,16,41,42,82,103,125,47,51,20,22,17,24,35,56,107,58,76,39,34,61,58,42,64,75,45,51,53,38,59,55,39,82,66,71,17,37,40,131,216,222,235,213,226,233,241,251,242,251,247,244,192,92,99,104,113,109,118,12,19,0,22,46,68,62,116,197,96,33,80,62,100,95,104,113,156,215,142,61,96,149,150,192,169,89,172,244,251,232,212,226,245,249,238,239,184,169,181,160,156,154,160,163,114,1,0,20,22,36,21,18,19,23,2,11,11,10,19,6,20,15,5,0,11,23,24,29,10,116,105,26,28,27,61,87,83,114,77,91,62,93,89,91,96,114,134,124,151,160,189,184,175,197,193,145,55,29,94,160,229,225,220,218,183,223,190,209,186,148,135,142,89,137,103,101,33,17,41,57,114,105,98,112,110,131,133,166,166,167,211,200,227,129,33,41,149,204,243,243,213,139,69,77,44,92,122,138,84,6,19,50,49,45,41,48,52,82,134,129,178,90,2,102,177,226,235,238,189,213,210,134,84,198,194,210,213,132,82,82,33,1,92,143,167,193,122,18,86,71,60,48,49,123,163,141,151,169,50,26,122,197,217,183,102,17,13,20,74,78,79,61,33,37,90,155,173,205,152,164,173,175,141,174,168,160,173,153,170,179,172,167,171,194,176,173,174,183,190,194,187,199,210,230,233,194,196,134,96,63,91,2,48,51,98,143,190,186,228,223,255,197,200,193,184,163,175,163,144,158,185,176,175,182,161,194,166,138,163,165,148,151,184,175,169,186,171,180,161,156,135,168,177,158,143,177,139,147,156,179,138,159,149,175,136,164,149,152,197,188,155,159,162,132,144,163,144,170,183,199,178,171,162,195,181,208,184,168,176,136,155,159,154,138,89,91,105,128,113,110,120,112,119,99,117,112,115,123,109,105,107,123,134,134,117,101,127,108,113,108,116,117,118,92,114,116,83,106,87,86,91,68,72,52,88,87,92,80,92,63,95,73,53,47,59,69,78,49,68,69,78,77,80,68,87,109,99,37,30,43,21,16,21,31,45,44,64,65,37,61,57,55,48,57,54,49,48,48,54,51,48,47,64,49,46,64,54,51,66,26,41,53,18,59,22,32,33,52,31,48,58,40,75,43,40,32,48,28,35,83,44,36,20,46,11,30,26,45,10,51,72,40,40,44,46,16,50,29,28,20,13,40,45,64,57,48,74,48,46,31,15,32,25,33,48,30,30,23,22,28,24,15,43,47,45,25,24,33,32,22,40,19,12,18,19,37,50,55,51,98,153,118,47,43,13,16,17,37,11,45,42,44,37,45,33,66,53,6,31,36,47,35,28,52,23,21,46,34,6,13,14,5,32,28,25,28,25,35,8,26,43,40,35,61,6,34,40,20,47,87,87,56,23,23,23,22,20,29,13,14,11,39,30,9,27,15,12,35,44,15,34,42,21,9,34,36,42,14,40,37,21,29,32,71,37,45,21,55,25,20,40,34,55,36,15,22,7,39,24,26,8,28,23,15,28,7,17,16,3,18,40,40,21,17,24,7,38,50,12,65,74,21,41,26,0,31,18,29,41,13,48,28,19,31,33,1,43,13,57,38,14,27,57,15,17,12,26,30,26,28,32,24,61,25,33,33,55,119,89,75,50,18,28,39,77,103,82,45,47,39,52,54,24,55,68,43,39,40,45,60,60,67,44,62,57,17,45,32,45,136,180,189,215,221,202,188,196,196,210,247,239,240,223,136,105,88,121,160,147,99,12,9,26,7,33,2,53,170,84,45,84,69,96,88,86,61,146,168,134,56,84,117,165,151,141,88,174,226,245,251,236,252,245,241,248,252,200,173,183,177,186,178,156,191,113,0,10,2,51,8,13,11,33,13,2,22,23,19,4,29,11,19,48,22,9,8,31,13,17,55,15,20,31,47,107,107,150,141,133,181,166,163,176,176,190,226,226,223,242,231,225,224,220,216,226,182,62,22,75,133,160,169,159,150,134,120,108,89,100,84,42,66,63,69,54,61,20,8,64,120,173,155,176,192,206,195,205,198,209,201,211,219,190,155,28,48,100,187,197,169,64,27,62,39,54,75,100,52,16,17,41,71,106,84,91,116,128,146,189,195,242,116,21,74,147,172,170,147,127,84,59,3,62,153,164,204,207,172,157,173,86,64,148,219,213,232,110,42,48,102,99,55,46,178,224,187,183,118,3,10,108,168,201,173,175,152,122,46,37,65,48,99,107,137,159,192,179,171,167,157,168,149,170,131,169,165,129,148,154,183,174,172,168,187,175,172,175,200,197,203,218,191,187,127,105,82,9,10,24,44,58,78,170,204,188,226,213,203,191,160,188,188,192,183,157,171,146,154,181,150,156,169,165,192,166,145,151,142,183,178,149,166,178,175,162,186,165,184,149,135,138,141,147,148,167,183,145,151,155,162,158,164,186,148,120,116,144,161,161,155,186,152,167,164,192,176,190,168,137,142,167,190,171,169,161,187,175,191,156,158,185,192,170,116,111,86,93,106,89,80,59,111,96,98,96,98,125,119,121,126,100,128,112,122,127,119,126,111,149,103,107,121,108,102,116,107,88,103,111,66,44,82,69,76,84,77,95,74,69,52,62,62,52,51,62,77,51,66,97,86,75,76,91,98,73,36,52,54,35,27,23,32,26,20,60,72,66,49,46,66,47,61,49,47,48,50,67,39,68,55,54,61,41,30,52,46,44,55,44,47,41,32,31,42,37,43,56,36,35,29,50,34,52,31,35,28,55,78,21,37,37,25,51,27,25,53,36,32,46,18,36,10,34,32,31,16,59,34,52,29,30,59,49,29,36,54,26,24,52,42,20,27,20,81,53,17,23,26,19,32,47,36,16,22,25,18,35,24,4,22,14,46,9,38,32,24,16,53,39,26,46,126,95,82,51,26,14,20,29,35,10,31,42,25,25,40,48,32,42,32,10,42,50,48,48,39,23,17,52,21,25,16,46,11,21,36,22,42,49,53,30,23,41,30,35,37,26,35,16,91,97,46,35,15,20,29,19,13,37,20,11,64,16,19,28,35,23,56,30,44,21,7,24,43,38,15,18,31,47,48,15,47,32,30,34,8,36,60,38,18,29,47,27,33,31,17,24,32,31,28,18,36,6,43,19,20,21,7,28,31,32,27,28,27,44,37,9,22,26,77,60,22,13,32,2,33,20,6,27,19,43,18,21,18,12,35,34,39,42,3,8,28,15,45,22,30,19,48,30,29,20,22,34,14,10,21,26,45,107,115,54,59,29,69,99,82,33,45,63,45,31,55,62,57,43,45,9,27,38,38,64,84,60,61,65,57,28,37,50,102,137,175,219,195,213,193,200,175,216,201,212,202,172,148,123,104,94,195,220,214,204,193,144,110,67,74,139,179,66,36,85,94,126,88,68,66,142,206,147,45,41,78,103,105,111,67,165,225,247,254,246,245,237,230,239,236,234,194,183,185,160,194,184,197,106,10,9,38,19,15,10,29,6,31,1,30,7,3,11,15,11,9,17,31,31,4,9,13,4,143,103,8,64,74,114,188,196,192,199,226,206,215,203,208,193,218,224,205,180,169,192,177,164,127,146,84,13,27,48,105,90,99,60,81,39,82,94,65,75,74,95,88,144,120,180,137,45,20,61,141,198,184,217,207,209,195,204,195,194,130,138,125,115,53,20,64,64,115,83,117,77,66,120,131,124,120,150,141,87,0,54,118,159,175,104,120,125,149,194,215,212,81,6,48,112,120,92,60,73,68,72,15,77,147,139,166,187,164,156,228,85,42,101,171,114,90,56,16,28,13,49,5,37,174,191,251,238,135,30,31,120,185,207,183,181,182,183,175,156,123,168,178,164,211,204,184,185,147,149,188,137,142,157,158,178,154,153,167,174,156,196,220,192,198,195,166,161,153,110,97,65,23,17,5,10,8,6,26,95,167,178,206,206,207,201,201,184,189,161,169,161,149,157,156,166,152,151,160,154,169,176,161,163,156,138,147,138,154,165,166,174,161,187,150,152,166,157,173,189,155,153,154,152,156,166,170,181,167,179,170,166,165,170,174,152,155,182,173,175,174,190,185,197,173,166,186,187,165,164,149,150,162,181,169,211,154,156,138,177,188,167,174,172,181,92,76,89,73,103,104,73,90,74,87,124,143,121,103,146,97,126,101,125,129,107,87,102,123,97,92,136,100,96,99,111,97,119,80,90,96,76,75,68,79,72,82,91,89,63,55,65,79,83,62,58,69,71,67,75,101,74,95,65,45,59,5,18,29,30,42,46,33,60,60,57,45,48,42,45,31,63,42,52,63,65,42,60,61,82,44,56,73,8,69,40,48,64,58,58,56,57,20,50,44,42,54,30,71,59,54,54,40,36,49,39,48,31,14,18,51,55,42,46,47,47,44,38,23,58,42,27,28,46,43,44,25,42,37,27,34,29,16,39,27,39,25,11,29,31,26,41,75,33,59,41,47,24,53,29,45,60,9,45,19,53,15,40,50,16,29,27,35,27,19,34,42,12,21,29,25,7,19,69,96,81,63,43,26,31,35,17,54,45,22,36,30,34,44,16,31,18,12,37,20,35,5,30,33,13,46,42,13,14,11,33,41,35,17,11,25,47,31,8,12,24,39,12,44,22,31,82,77,18,9,16,42,45,9,46,42,24,34,22,24,19,22,29,29,35,16,31,26,26,49,53,31,20,34,42,37,37,20,49,62,1,28,43,25,45,40,25,5,31,40,23,15,5,43,28,50,21,6,33,8,28,22,54,14,15,12,16,4,22,50,41,30,19,17,49,91,71,18,34,14,10,32,9,32,32,35,40,3,20,25,12,24,17,19,3,25,21,13,22,23,17,6,15,19,42,17,35,30,10,11,26,16,16,35,71,95,115,71,98,128,84,19,63,47,42,45,44,46,55,43,83,28,33,39,37,43,68,43,48,60,67,40,59,4,31,51,73,95,158,186,204,211,188,202,216,200,189,194,127,121,146,92,133,203,253,239,232,248,248,240,252,233,227,220,107,107,108,129,112,122,95,84,173,239,171,84,86,34,60,64,65,80,128,222,237,252,243,233,255,212,249,240,237,228,175,173,202,208,181,171,122,4,8,0,28,1,9,0,28,21,39,0,24,20,4,11,8,18,10,3,7,15,15,31,25,201,162,47,51,100,123,196,209,217,192,193,210,181,181,171,159,152,139,142,63,107,80,77,89,79,39,19,40,36,60,104,119,109,106,137,149,158,162,178,169,181,171,184,202,194,212,188,105,26,73,152,181,202,158,166,146,127,146,100,92,94,60,48,23,0,13,43,102,155,160,189,118,118,156,130,128,175,235,234,145,11,64,146,150,160,128,110,87,96,130,93,79,25,13,70,108,145,115,136,155,169,196,93,121,204,218,172,161,96,110,98,47,28,30,57,73,34,25,8,15,44,55,37,34,63,176,170,240,180,28,14,118,187,206,190,158,190,196,174,188,191,199,193,206,183,178,164,157,210,132,154,169,167,167,188,177,202,233,185,184,177,186,169,131,99,117,85,55,29,25,30,23,34,46,15,43,46,82,128,185,199,193,176,181,173,210,151,178,183,145,178,175,193,184,147,164,176,176,188,161,176,173,149,156,184,161,159,154,181,140,174,144,161,189,167,155,151,168,157,195,186,170,166,160,169,172,163,156,171,166,171,167,161,192,151,187,201,187,190,167,158,165,158,145,157,163,171,166,173,197,193,165,181,183,159,162,169,173,150,157,197,172,146,169,144,123,82,98,89,96,70,105,85,68,101,94,112,105,127,102,108,110,100,122,114,126,117,79,112,138,100,111,117,66,94,98,103,117,110,139,91,77,63,72,91,98,96,49,72,86,78,78,76,81,91,83,71,68,110,71,91,69,39,28,39,25,32,40,54,24,49,65,60,72,52,44,51,42,68,40,52,43,46,55,58,66,65,51,58,19,36,65,44,42,51,46,50,49,83,44,25,63,31,20,41,29,31,39,25,48,53,73,49,43,73,57,47,37,33,46,19,31,44,40,37,40,32,62,37,29,49,41,13,50,25,38,43,51,72,40,34,24,25,31,47,52,23,23,20,47,44,8,15,25,32,18,49,58,23,57,42,35,49,11,15,49,33,16,44,26,32,14,26,39,52,8,30,14,19,50,11,35,29,29,74,103,109,83,71,30,15,13,35,19,18,24,5,4,46,29,21,32,26,6,30,32,25,24,42,43,22,42,41,25,22,5,19,28,42,41,64,36,44,27,11,41,22,54,17,17,37,56,103,53,30,13,25,35,27,35,26,42,12,23,43,14,48,33,14,68,12,50,35,63,31,17,24,35,27,52,4,37,45,50,44,34,47,32,33,26,30,43,36,38,46,39,24,7,25,33,38,16,35,21,25,22,19,26,27,13,21,33,18,16,5,8,19,33,17,76,84,30,25,34,24,3,11,6,27,17,35,28,18,25,44,22,20,21,42,26,28,11,42,42,39,49,34,46,29,59,21,9,35,9,22,19,8,33,27,35,68,113,132,130,79,61,34,36,37,49,42,57,60,51,54,52,64,45,43,56,39,58,60,25,41,47,37,28,48,41,32,49,55,40,72,131,152,183,184,198,218,220,169,119,124,76,93,111,143,200,210,238,231,247,251,253,237,244,234,186,196,200,153,140,125,127,205,209,254,218,197,141,154,118,101,118,109,187,246,250,248,233,253,208,172,210,252,236,213,185,191,179,199,183,155,99,6,8,0,2,40,17,39,14,10,6,12,30,2,7,8,2,16,0,14,8,22,6,14,12,158,140,44,43,80,98,133,157,148,120,141,84,96,87,85,90,49,75,59,45,48,49,78,98,92,97,24,13,48,81,157,172,178,210,194,213,198,213,229,188,214,202,200,212,184,166,161,72,22,57,98,154,135,91,76,73,57,65,61,79,82,101,110,140,75,24,109,160,237,234,237,186,126,129,123,144,176,191,216,129,6,27,85,110,96,70,30,50,51,82,74,76,25,23,95,175,186,188,207,214,233,183,100,124,207,217,216,174,84,57,65,23,39,15,45,41,53,29,29,18,33,25,49,22,104,125,147,182,175,4,14,107,159,185,173,153,170,199,176,154,166,166,151,151,164,152,166,166,161,154,165,166,169,188,223,184,203,178,158,136,112,67,48,39,19,72,47,102,111,86,141,157,198,185,192,182,94,152,134,131,128,133,157,164,171,162,149,178,156,127,162,160,156,160,172,150,165,151,173,185,169,164,158,135,165,157,180,150,160,153,155,155,157,177,175,190,175,163,167,178,186,187,174,178,167,163,189,206,160,183,142,150,177,186,162,164,160,173,158,170,145,161,141,173,164,173,157,187,186,186,186,181,178,158,167,162,177,165,182,162,177,171,169,167,152,120,112,113,80,84,104,68,63,30,80,69,75,116,129,121,119,124,125,106,117,135,99,75,110,109,114,128,75,88,129,112,117,107,98,89,79,46,97,84,70,110,65,64,74,122,68,83,97,75,93,84,77,84,48,66,40,21,46,35,0,51,39,40,46,58,51,59,29,49,45,47,78,62,59,51,35,40,51,65,80,45,55,67,54,34,54,56,45,44,31,71,43,64,74,74,45,49,71,44,48,57,56,37,42,45,37,35,66,39,47,34,29,29,46,46,37,32,58,48,36,52,41,24,19,62,64,44,43,42,31,62,23,35,46,41,52,53,28,25,36,28,35,22,48,22,12,39,23,35,13,36,60,31,34,37,43,23,25,26,7,38,26,20,38,47,18,11,39,13,25,30,29,15,22,14,22,18,28,45,10,59,97,112,89,49,41,40,32,18,37,54,22,20,35,11,10,32,34,35,11,35,41,10,26,14,25,42,7,7,23,22,21,50,42,9,39,38,33,45,9,42,57,40,42,29,28,32,91,81,17,20,35,29,58,47,31,13,40,46,29,32,35,46,21,18,32,26,32,35,39,18,55,33,27,35,34,47,40,52,45,29,17,35,33,17,36,27,34,25,32,26,28,15,20,26,21,22,20,20,44,45,34,26,38,16,9,34,8,22,35,16,39,24,39,105,53,24,12,28,18,24,21,20,8,43,43,45,17,26,20,31,28,46,12,21,37,0,38,35,17,19,27,27,38,51,6,16,39,2,21,46,39,27,17,37,51,93,113,107,51,26,35,71,11,33,64,61,40,42,80,65,61,61,54,57,34,49,62,76,52,72,38,55,35,56,37,52,35,60,41,18,89,80,93,115,144,169,154,136,117,101,89,87,126,186,197,203,169,169,171,209,209,193,191,203,230,188,110,75,30,98,176,240,253,248,248,252,231,247,231,234,220,223,251,255,253,247,239,227,174,212,239,246,238,214,196,186,187,188,175,104,23,6,7,19,23,8,0,24,21,4,16,30,5,17,18,19,24,10,21,20,19,18,27,10,144,106,24,34,38,76,103,99,63,67,62,95,49,62,76,73,65,122,124,143,140,154,175,170,179,184,90,27,77,117,197,209,223,199,221,193,227,188,165,166,168,143,147,88,101,102,57,13,18,27,49,103,88,96,88,120,104,160,171,169,153,230,212,204,129,55,73,172,248,243,254,215,143,134,149,105,94,116,118,60,14,25,52,51,47,62,49,70,123,178,205,181,70,55,100,184,230,217,209,185,148,113,42,85,165,159,222,213,105,48,23,14,19,30,45,3,45,21,40,48,82,72,79,108,160,209,240,186,118,11,19,143,187,178,168,183,174,162,184,171,184,171,168,160,173,192,176,133,183,189,201,192,176,154,151,111,88,61,22,49,15,39,104,133,155,159,170,189,159,165,210,194,200,186,171,199,171,177,184,121,171,164,138,141,136,145,146,139,160,166,159,138,139,127,147,160,143,151,138,152,164,142,165,148,154,163,159,154,169,171,167,131,131,169,164,152,164,166,157,169,150,153,155,169,173,177,153,161,165,160,156,182,165,166,156,165,154,167,165,185,172,164,167,197,179,160,173,160,176,165,187,165,167,167,148,176,164,184,184,162,192,162,175,169,205,171,110,97,93,102,105,87,77,68,53,71,86,102,133,105,99,108,97,94,126,136,112,103,136,100,128,119,130,129,129,79,83,51,66,72,75,80,68,87,81,87,121,75,89,81,80,76,96,93,118,98,51,78,33,32,30,19,37,31,30,46,70,80,81,81,73,51,46,69,66,70,56,47,31,27,51,64,48,73,46,82,54,48,65,57,19,52,55,13,56,70,48,49,60,60,54,40,68,39,25,44,36,51,67,66,40,51,53,44,40,31,36,42,44,41,37,40,31,48,52,51,57,27,54,14,55,55,45,29,64,54,46,26,30,42,45,34,22,35,21,34,15,22,39,23,24,27,64,44,54,47,36,30,28,32,6,5,41,47,32,51,34,33,27,23,40,8,48,29,34,28,33,12,29,30,54,34,29,27,39,49,23,59,87,112,70,30,26,23,30,36,16,36,16,45,38,33,48,18,37,33,28,12,21,38,20,16,21,25,51,31,34,37,17,40,35,28,45,16,41,25,37,53,34,10,24,33,73,91,42,32,23,8,23,36,16,40,34,32,25,17,45,39,5,9,55,45,35,37,48,14,20,19,48,29,58,56,18,24,34,33,35,19,28,31,49,16,39,22,15,29,33,44,11,3,4,59,25,26,16,47,9,19,24,39,29,24,34,13,6,23,5,19,62,110,22,29,22,16,13,37,19,38,23,27,7,13,25,21,18,37,17,6,31,19,29,24,21,32,20,11,30,46,10,11,35,39,19,24,19,38,36,38,26,42,99,102,66,83,69,84,49,65,36,35,69,20,31,59,41,62,46,62,54,67,79,54,49,52,49,71,63,53,52,48,55,58,59,25,35,30,55,27,19,32,59,101,116,106,112,119,105,94,131,176,177,155,171,162,158,155,156,149,168,154,161,136,43,21,24,24,104,202,216,254,255,245,243,245,249,249,224,242,234,252,238,249,249,233,220,246,241,246,253,206,202,196,182,179,174,100,15,9,8,15,8,11,0,10,17,20,41,13,14,9,8,17,23,0,9,48,12,9,19,29,61,32,11,50,55,81,106,108,126,119,144,136,141,190,190,167,196,209,197,209,194,213,211,209,215,221,84,50,32,75,159,188,177,171,146,142,160,123,106,88,88,87,59,86,57,63,55,15,27,68,146,181,164,170,181,216,219,229,223,189,228,220,223,205,105,38,77,113,201,221,195,161,106,150,113,87,47,0,19,25,38,47,60,54,34,65,62,114,182,219,189,195,56,17,92,134,164,115,130,80,60,21,20,104,133,133,155,209,136,33,40,15,10,36,55,45,41,20,17,61,114,149,177,163,168,195,217,224,134,0,26,121,169,167,166,164,160,180,145,161,176,192,163,170,188,195,210,196,159,150,116,132,59,53,22,15,49,52,108,134,150,169,194,186,174,218,206,176,201,174,147,153,167,155,158,165,181,183,170,161,171,156,138,186,156,174,144,130,158,162,167,152,146,162,164,161,147,167,146,154,145,139,148,139,122,136,150,144,173,149,144,147,166,151,160,163,175,148,149,141,149,180,164,178,162,165,164,184,180,170,197,189,176,153,157,167,167,170,159,183,200,155,169,164,182,145,159,155,146,173,173,165,168,142,164,190,158,147,139,167,152,177,153,156,161,167,145,122,98,82,71,73,116,97,80,93,112,122,125,166,132,101,100,131,124,103,123,138,110,92,116,103,102,90,76,86,82,25,80,59,79,75,74,90,84,82,53,82,73,99,79,113,96,68,60,33,46,26,15,39,45,74,69,74,55,53,49,61,68,70,40,38,67,40,48,36,55,28,37,57,46,59,61,49,51,45,61,68,42,60,45,31,31,64,48,62,50,66,65,37,41,45,47,20,34,27,22,48,65,58,35,43,54,40,33,30,43,33,38,44,35,37,39,9,73,37,46,55,44,38,37,22,29,33,46,46,13,42,25,53,19,19,45,46,12,37,27,17,27,40,31,17,32,58,34,11,20,22,39,41,33,14,31,18,11,25,51,14,38,20,31,20,33,16,24,17,31,5,39,22,16,34,26,26,32,10,6,17,51,114,114,95,67,15,6,15,8,24,41,15,28,18,26,38,4,43,43,46,22,21,29,17,10,43,52,36,34,14,31,24,23,14,24,27,38,37,30,44,43,45,13,11,42,91,70,16,23,19,7,15,27,25,21,18,36,19,14,21,37,27,37,54,34,42,41,25,22,55,9,23,33,33,44,40,21,34,43,42,24,37,19,35,23,47,20,50,28,10,16,29,11,21,30,29,23,30,22,7,12,32,9,25,24,23,33,45,8,43,85,78,24,46,30,32,2,25,25,24,21,26,10,23,19,0,14,15,14,31,24,20,13,45,18,30,44,20,21,38,5,25,19,45,42,11,28,45,25,12,76,128,105,28,16,47,123,146,92,81,95,84,114,77,55,68,10,59,76,48,101,41,65,55,46,59,49,55,40,57,31,37,39,47,34,21,62,42,67,65,49,39,12,50,89,137,148,152,130,134,150,157,216,236,208,214,181,174,177,181,174,155,159,155,97,82,35,37,68,74,110,182,242,232,244,241,249,247,230,238,242,242,248,235,251,249,238,254,243,250,241,200,244,196,183,220,183,119,18,3,3,10,32,19,11,10,29,7,11,17,7,2,3,9,26,23,7,29,27,12,24,22,81,40,21,31,86,127,156,203,182,200,210,217,218,194,193,216,199,211,205,221,206,192,179,175,163,168,42,43,30,63,133,112,95,79,94,108,76,65,71,103,109,124,146,148,155,163,130,20,35,92,159,212,232,214,226,196,226,193,192,169,142,151,136,124,48,2,33,50,62,93,103,108,96,127,110,109,87,19,23,22,21,53,66,93,80,88,98,120,137,119,126,64,31,40,82,120,87,58,56,43,44,8,46,102,194,196,135,167,107,16,14,19,36,28,20,33,16,22,23,50,90,95,86,97,98,124,149,214,148,3,19,113,174,164,189,176,185,204,181,171,190,215,173,168,121,126,107,66,50,22,12,64,67,104,93,146,151,164,178,203,206,190,213,180,189,217,191,187,146,170,151,173,161,176,171,180,158,166,162,169,170,170,162,161,178,174,157,146,165,181,159,174,169,142,169,152,151,145,174,177,167,168,148,144,162,166,146,160,139,160,143,162,150,157,155,132,153,158,168,165,162,199,158,157,167,152,166,183,158,171,160,183,158,173,165,154,164,181,186,165,146,158,170,176,166,177,166,159,166,165,182,169,190,171,170,170,156,193,189,166,166,167,147,135,162,169,194,223,172,142,51,37,122,83,116,112,101,153,138,139,137,144,136,97,123,109,105,103,98,114,94,90,74,59,77,66,81,60,101,95,68,86,97,111,102,103,86,79,97,123,73,80,35,38,23,18,20,42,48,69,51,58,63,44,71,47,85,45,57,67,59,74,58,54,74,50,38,49,50,62,46,77,58,51,62,37,69,73,60,66,49,24,47,56,72,32,17,61,42,61,23,39,45,57,65,49,38,55,20,54,40,60,50,37,29,28,33,55,32,30,38,39,46,44,59,56,40,27,17,22,49,23,48,32,29,41,35,38,39,18,42,13,41,34,47,47,18,56,3,30,43,61,37,20,28,12,41,14,26,47,33,37,33,17,50,14,36,41,31,24,41,9,57,39,5,8,41,41,12,38,34,19,22,41,14,45,57,21,29,48,92,125,91,89,67,17,20,27,30,9,29,33,40,14,42,58,5,54,34,34,44,52,6,43,35,22,24,9,45,20,25,42,36,32,30,11,29,5,30,30,40,34,34,86,80,63,47,13,19,26,18,6,13,11,27,15,50,32,28,4,37,30,57,37,32,20,11,41,22,38,35,27,45,25,49,44,38,22,36,29,46,47,21,39,37,38,29,12,30,15,36,6,29,44,30,19,42,10,28,31,24,33,30,47,21,27,36,53,84,39,14,40,14,21,21,34,18,16,34,62,13,17,36,15,24,21,35,22,15,12,11,18,3,25,24,47,23,40,21,18,25,27,18,44,21,12,27,99,96,73,44,26,40,181,203,163,151,103,134,125,155,111,117,89,57,104,98,102,63,59,69,41,39,34,34,46,47,72,48,58,50,21,52,39,39,48,62,71,71,65,49,57,85,131,183,183,150,160,149,175,252,227,249,255,227,233,236,211,247,221,225,213,234,231,170,163,121,81,71,97,124,139,190,228,225,243,245,225,247,249,248,234,250,255,233,221,233,237,242,204,219,194,205,225,183,93,0,0,9,0,27,35,25,0,8,21,32,6,10,10,19,5,3,19,22,8,16,11,1,28,175,85,40,72,112,170,181,228,210,214,213,173,198,177,185,201,165,167,158,118,154,115,104,93,74,64,14,16,18,67,108,112,74,63,110,138,133,156,170,183,175,213,195,212,189,225,165,62,28,78,170,205,198,160,176,156,110,107,118,96,107,58,47,47,15,40,44,89,98,112,140,119,158,156,118,144,134,120,125,20,46,89,133,163,135,158,126,104,97,74,79,48,21,17,46,68,69,53,43,30,21,35,49,168,244,245,212,159,67,1,37,0,24,46,44,15,36,25,14,56,56,71,74,90,107,128,145,176,147,31,76,123,169,198,211,200,212,184,178,162,150,99,102,83,10,9,22,57,65,106,127,157,166,197,165,218,211,192,183,199,161,164,158,192,165,153,150,170,143,148,166,146,124,155,164,167,167,155,181,144,180,178,133,147,157,162,138,170,149,150,148,150,141,159,163,173,145,162,165,169,140,139,169,167,142,147,144,137,132,145,143,165,166,158,149,134,169,141,164,137,146,150,176,172,162,161,159,121,147,158,140,168,158,167,156,153,150,160,138,150,150,164,136,147,165,167,176,172,163,171,179,186,184,160,166,173,173,157,165,180,143,166,158,144,153,149,193,222,255,209,62,69,99,119,135,126,88,85,95,93,99,94,105,53,87,71,66,84,94,74,79,72,84,98,106,115,96,78,68,92,69,85,71,106,103,102,107,81,56,61,34,37,33,38,26,59,50,67,65,53,35,40,42,45,62,66,42,62,57,59,58,58,47,74,65,57,72,54,47,47,38,68,28,46,44,47,35,47,39,85,68,57,67,48,49,33,54,60,45,54,46,24,35,39,48,28,12,35,20,47,41,27,52,62,27,33,19,34,38,19,39,52,23,47,36,49,43,36,53,39,49,18,25,33,58,29,61,43,31,33,45,33,30,32,26,51,56,43,43,31,17,30,2,21,53,18,9,39,11,30,33,37,18,26,25,26,21,28,37,28,19,41,26,37,22,25,33,40,28,17,30,19,26,29,34,56,71,45,30,59,29,36,79,119,98,87,40,45,45,42,44,8,22,38,20,17,17,7,29,32,32,18,20,28,20,33,45,54,6,27,12,49,13,16,21,36,10,26,33,37,56,36,34,31,86,58,48,33,8,28,28,8,46,28,23,41,24,47,36,32,34,46,33,47,46,27,22,21,16,52,49,14,30,43,21,14,33,49,11,46,27,27,40,22,35,52,8,14,16,20,11,11,17,23,21,28,23,8,31,32,10,44,13,29,24,13,24,67,114,23,40,36,27,23,21,35,37,19,24,20,50,1,28,39,35,4,21,52,19,12,28,39,8,29,20,17,39,20,23,41,38,28,22,23,34,34,91,128,85,35,29,11,133,199,193,174,164,109,153,122,140,109,112,100,100,115,121,99,118,110,85,99,114,96,91,127,101,111,103,84,51,34,48,45,28,41,45,31,28,19,51,12,37,54,108,126,162,178,167,195,255,252,249,245,247,235,246,238,246,237,225,244,230,246,252,252,194,163,166,104,39,37,65,59,86,173,171,175,196,211,228,210,240,246,238,213,255,238,230,192,185,195,160,172,172,101,9,0,14,11,3,8,6,11,6,14,20,23,30,16,14,32,18,5,25,11,2,20,4,21,208,112,33,71,91,163,188,194,172,170,142,139,146,114,127,97,95,92,101,73,76,37,72,29,58,33,27,14,46,128,167,191,174,181,175,183,165,209,185,203,189,214,188,184,200,196,133,11,12,56,97,128,102,95,79,58,64,91,72,121,112,125,137,129,11,18,102,167,197,187,218,171,157,161,142,149,151,206,127,84,47,65,131,139,127,103,98,78,101,88,58,31,37,23,23,38,17,11,26,16,11,29,33,79,175,226,203,197,76,18,31,20,10,20,36,22,30,27,33,59,144,152,160,158,167,153,142,142,99,68,137,213,195,188,173,159,120,98,55,37,28,24,38,68,104,112,156,174,179,186,199,177,210,209,181,196,178,173,160,167,163,165,160,154,172,159,161,141,146,149,155,175,175,145,147,154,150,122,150,172,168,155,185,154,157,161,170,147,155,154,138,144,170,135,153,157,167,158,174,137,154,160,138,149,142,132,137,135,146,144,145,144,154,158,162,150,161,165,127,115,154,148,146,153,142,163,150,124,141,145,124,138,159,147,116,125,120,149,130,126,142,138,145,147,120,129,111,112,121,123,123,122,152,155,140,121,132,132,148,146,142,132,163,121,172,118,163,243,220,239,132,66,38,117,102,103,85,100,92,70,87,76,72,86,70,75,60,56,76,87,90,106,89,91,107,104,90,78,114,97,84,90,77,90,61,86,67,40,29,21,58,50,48,53,61,70,70,73,50,34,71,44,83,60,57,42,65,73,71,52,67,63,53,69,70,25,64,82,55,65,66,51,53,44,62,53,58,57,58,55,50,48,44,29,36,47,60,71,50,41,34,58,36,40,33,51,36,66,58,37,26,63,63,32,43,28,33,36,39,44,50,48,40,28,61,49,43,55,43,41,36,52,39,37,28,29,39,40,51,41,25,28,43,45,9,27,29,28,12,23,20,17,23,35,34,52,29,20,30,33,17,48,43,29,31,33,27,27,32,21,36,4,26,34,35,37,8,25,35,12,44,32,32,25,25,33,9,22,55,23,31,33,27,84,135,99,94,73,33,41,22,23,22,14,12,32,22,16,29,13,6,33,35,29,38,9,33,33,51,19,34,13,38,36,30,48,16,26,10,9,27,16,11,21,58,58,61,32,20,35,44,27,31,2,47,13,22,30,21,26,10,38,49,29,38,58,30,46,31,29,56,41,43,18,22,37,34,31,56,34,27,37,21,52,21,24,14,13,7,46,17,49,29,24,2,2,42,26,15,36,22,17,32,19,29,9,13,102,45,20,22,25,42,17,34,21,23,2,2,30,39,26,14,29,16,36,38,25,23,13,21,24,31,17,29,19,41,27,51,7,33,37,18,27,80,120,131,37,54,32,38,115,180,203,176,184,186,158,134,122,103,108,104,146,118,126,124,139,140,107,141,109,137,143,160,166,188,163,160,151,136,145,144,77,68,46,49,41,44,14,29,44,40,20,38,77,109,125,134,175,247,252,237,244,246,236,244,250,252,255,252,251,252,255,244,251,232,248,251,219,133,66,27,26,17,31,101,150,174,168,179,158,166,201,187,181,191,185,188,114,161,145,152,138,148,87,3,36,4,2,6,10,5,30,7,20,32,9,4,26,0,8,28,7,3,11,3,3,10,21,168,55,19,78,64,118,110,102,105,89,82,83,78,51,59,50,71,92,69,95,114,117,135,133,149,142,45,20,111,156,199,214,235,207,189,208,215,189,156,175,170,128,138,139,106,100,59,10,24,38,92,67,120,75,98,136,143,160,173,183,198,218,207,187,64,39,92,156,211,188,192,167,129,140,137,110,111,118,84,55,48,47,81,86,89,74,56,53,44,33,41,32,29,20,61,28,22,41,22,38,41,43,40,83,98,147,181,234,83,13,8,17,29,34,24,16,29,5,23,40,69,76,60,63,63,59,35,48,15,42,129,172,138,84,64,43,21,31,41,60,118,130,152,162,159,198,191,197,202,182,191,171,154,143,146,151,153,151,158,189,160,134,148,141,141,170,154,163,151,146,161,158,168,149,154,156,155,175,153,154,143,153,139,166,147,157,156,158,163,142,156,137,159,155,138,145,146,130,155,173,146,142,158,130,116,147,182,152,148,130,143,127,161,145,151,162,131,163,159,146,158,155,140,163,121,143,155,169,132,157,127,143,124,148,135,134,156,140,129,160,146,140,134,118,112,120,120,111,92,106,105,113,154,116,135,87,101,154,140,129,128,143,190,133,121,118,134,185,229,255,168,82,39,73,114,131,96,112,79,105,106,92,98,99,104,104,105,71,104,90,121,115,124,83,127,109,95,123,107,107,87,70,65,52,35,35,43,24,51,31,39,61,70,79,69,68,49,55,81,49,46,58,40,46,76,72,36,44,52,58,71,52,32,15,62,67,41,40,40,51,70,52,23,55,37,54,45,43,44,41,46,58,48,34,40,41,61,64,44,51,55,65,33,58,33,34,22,26,30,33,75,58,54,12,39,15,50,52,30,36,35,39,15,44,29,23,44,28,36,43,22,51,75,29,31,6,43,36,30,28,47,46,56,31,51,35,42,33,45,28,49,51,46,48,33,21,28,33,39,24,39,28,30,29,20,38,18,17,37,36,20,35,27,38,22,23,11,18,15,29,61,26,35,8,18,20,29,36,35,42,51,22,32,18,52,92,128,120,104,39,5,16,11,36,24,24,30,45,40,18,37,20,38,27,27,33,45,29,22,24,18,31,20,54,57,19,19,38,16,34,29,25,25,29,54,63,68,14,8,11,12,22,20,43,33,19,7,49,3,18,36,12,33,24,20,34,18,28,36,40,42,37,48,6,61,25,42,41,39,48,49,28,41,19,23,45,4,17,3,22,45,22,30,14,25,36,20,37,41,31,43,39,40,29,27,7,91,92,58,2,44,4,16,25,21,23,15,39,29,24,17,27,29,31,6,54,36,16,6,25,21,19,33,9,22,5,21,24,54,37,26,29,42,87,111,72,76,44,18,29,116,205,206,176,185,178,167,157,156,137,119,122,144,108,146,128,157,137,158,156,142,142,135,173,162,157,166,154,161,161,177,149,159,141,122,109,143,132,106,92,57,84,50,65,21,40,58,50,68,104,180,204,229,237,244,250,244,252,255,226,236,246,251,245,241,252,250,228,236,244,218,192,145,107,23,55,113,173,182,198,225,196,172,167,154,181,154,144,146,127,114,129,123,125,111,84,15,1,29,10,42,16,4,5,10,22,8,9,23,0,33,15,8,20,13,7,6,5,35,32,58,19,44,50,56,74,97,62,87,94,97,129,87,124,140,162,158,178,169,184,203,201,191,206,227,147,73,32,84,132,179,202,198,188,174,157,147,110,118,79,96,86,68,50,32,51,33,1,64,105,140,179,188,184,186,207,214,191,227,193,193,190,205,178,63,19,57,135,121,104,136,140,128,116,124,124,73,62,49,13,5,61,46,81,16,37,7,16,26,17,17,21,25,33,26,15,38,41,48,60,78,70,109,140,182,159,168,188,56,31,14,7,17,29,27,22,41,36,11,3,14,27,15,20,29,71,56,56,42,42,56,60,49,85,75,140,132,148,163,203,185,212,215,196,164,159,156,167,162,170,170,172,143,152,141,134,138,136,140,143,140,139,161,142,136,167,154,141,132,138,162,150,138,158,184,174,119,159,166,170,132,143,156,147,169,157,148,130,164,134,156,160,149,156,136,144,157,146,140,154,162,157,161,160,168,184,160,130,131,153,149,122,130,154,144,140,142,137,141,166,147,158,146,169,142,136,147,142,149,141,151,143,153,146,176,169,183,155,155,150,157,169,157,119,138,147,147,157,171,138,154,131,141,147,115,138,141,138,129,127,143,151,136,119,102,109,144,213,211,206,156,160,76,82,106,78,96,93,110,87,104,124,127,99,118,128,77,79,93,88,81,102,116,116,120,93,88,95,99,47,30,41,48,23,18,36,51,41,54,46,78,49,73,63,74,53,75,51,64,48,43,50,67,36,68,53,66,45,65,56,45,55,45,48,58,29,44,65,52,53,59,45,55,44,44,59,37,45,49,46,52,42,47,49,53,38,52,50,42,38,50,60,32,54,43,45,22,37,55,35,43,44,57,37,49,38,70,65,43,43,44,30,41,40,38,35,21,45,38,44,27,42,34,58,29,36,17,32,22,17,43,31,41,53,47,37,34,22,30,34,37,48,56,28,31,54,23,33,15,45,13,41,28,18,11,17,44,48,35,10,48,38,32,35,37,18,9,25,45,24,24,32,53,19,6,23,13,21,33,47,41,32,17,16,9,20,45,113,113,124,78,63,30,24,12,22,28,40,39,27,29,24,50,43,41,47,41,54,35,32,37,14,54,20,21,24,40,11,43,32,18,44,46,32,32,53,93,65,45,11,29,10,29,34,31,22,37,49,33,27,22,27,28,12,29,35,24,16,16,28,29,43,33,30,32,39,37,36,36,39,36,36,60,33,18,26,5,34,26,19,19,25,31,16,25,17,19,29,16,5,34,12,22,21,0,27,96,75,15,21,17,33,53,12,18,48,6,29,13,23,49,10,9,33,18,40,54,34,42,36,36,19,27,17,27,36,23,55,23,35,0,50,121,110,91,36,25,29,29,103,163,186,193,154,188,191,167,196,175,149,142,134,125,153,166,140,178,174,158,156,151,166,173,175,185,159,173,145,131,167,157,170,161,144,149,145,147,136,135,136,122,128,89,84,63,11,39,7,14,11,77,103,125,128,159,180,234,232,247,246,239,255,250,254,253,250,253,242,250,230,251,247,242,211,150,122,123,155,212,237,242,224,194,191,164,169,181,149,154,164,187,173,161,146,108,69,13,17,12,1,24,5,13,0,9,7,11,26,9,9,13,21,12,22,5,32,6,12,4,9,43,40,26,78,105,127,147,162,163,192,172,205,205,199,212,185,215,209,213,229,186,204,188,175,166,139,14,11,69,125,142,148,152,105,78,78,79,69,76,65,69,103,90,137,129,167,78,30,90,110,179,191,196,195,219,197,211,212,201,167,136,135,135,86,1,29,68,88,134,101,132,143,130,144,124,97,64,41,34,1,39,19,21,21,24,24,9,36,14,17,16,34,30,26,35,36,17,94,181,116,191,164,185,187,224,222,145,122,32,29,17,2,40,25,34,19,33,19,36,38,40,11,20,42,52,43,50,62,34,16,83,139,130,165,172,175,191,172,187,185,165,166,158,174,153,167,165,156,148,157,161,126,158,138,144,136,135,161,141,148,153,163,141,152,151,149,146,163,154,148,142,152,150,177,135,146,157,149,127,154,151,141,140,162,128,148,140,142,148,150,153,151,165,164,153,163,145,139,155,150,141,156,149,132,161,158,176,135,175,126,134,149,136,129,159,165,147,165,115,137,151,147,163,175,181,172,138,142,172,173,155,145,165,152,125,136,152,151,160,148,141,152,143,143,110,167,143,180,173,187,164,152,142,151,155,130,166,153,168,166,136,165,134,132,94,106,112,169,215,159,106,152,145,110,103,83,108,75,60,91,104,77,108,107,98,112,90,102,114,86,134,104,122,81,91,81,54,45,41,29,40,24,49,31,38,73,51,62,73,58,50,55,51,56,50,81,57,45,59,60,47,61,23,50,64,62,47,70,49,32,58,24,65,60,36,27,38,56,44,64,77,49,28,37,66,36,34,66,50,52,41,44,29,39,70,46,72,38,40,18,50,32,38,29,54,40,54,29,43,47,46,36,12,30,41,45,34,48,45,32,47,21,43,55,34,32,53,38,68,37,27,39,39,42,43,40,36,48,53,26,40,53,27,34,39,27,52,30,43,26,16,38,35,83,27,31,60,10,29,12,38,19,20,34,16,39,47,33,54,25,18,28,18,29,27,35,38,19,28,33,14,25,44,18,48,28,48,17,18,35,12,23,39,32,24,20,22,33,79,133,142,83,74,50,50,10,14,38,49,38,26,45,23,36,17,35,23,24,59,34,30,33,11,27,18,37,39,16,17,37,17,15,12,23,34,44,76,87,57,19,20,16,19,29,4,32,37,24,18,21,59,58,43,32,37,46,42,35,20,48,26,32,42,49,35,40,18,25,16,40,24,36,32,19,45,29,16,27,16,19,30,25,16,23,38,16,36,47,27,27,15,18,15,24,9,28,96,22,12,17,28,17,19,26,36,2,15,24,10,50,29,39,29,34,37,23,53,23,39,32,60,37,4,28,31,6,8,25,26,4,93,120,86,52,41,41,36,63,94,132,189,198,156,165,160,175,167,165,175,159,159,142,84,106,144,154,189,169,203,160,185,168,145,162,167,177,155,156,179,159,149,153,161,139,151,133,179,165,139,167,130,106,95,84,58,43,69,38,24,20,41,82,90,95,72,84,107,105,123,144,179,205,211,203,252,229,242,248,255,245,255,242,255,223,226,219,196,213,239,230,229,215,175,202,215,217,209,196,168,171,206,192,200,181,141,93,9,17,7,19,1,28,9,0,42,28,32,0,20,3,1,15,8,4,27,7,11,22,19,25,111,62,27,96,145,178,195,215,196,218,194,193,205,198,188,195,151,166,179,147,137,133,115,87,86,34,10,34,30,55,66,72,86,59,93,93,103,114,173,173,175,187,202,197,193,219,126,42,64,113,214,207,200,206,173,142,150,117,108,128,108,94,96,52,18,44,43,100,110,157,180,162,146,117,135,116,18,5,6,24,20,50,47,7,6,31,26,36,23,19,41,42,51,24,29,18,55,79,122,126,133,112,135,130,181,179,195,129,14,28,41,8,38,21,40,44,35,20,57,50,58,36,49,65,79,61,56,44,61,105,161,180,189,193,184,168,153,173,159,149,135,135,157,148,131,121,140,131,102,157,124,164,132,157,154,131,151,148,127,140,138,139,135,123,162,152,133,140,146,144,130,149,145,144,120,140,150,149,149,145,145,150,161,116,160,166,141,167,154,159,144,159,132,160,148,141,148,165,137,164,163,162,146,148,128,157,174,133,170,159,158,132,149,143,152,166,156,184,154,144,170,156,158,144,150,154,172,171,115,124,140,140,132,148,138,154,147,159,115,122,131,149,121,108,109,121,140,115,114,142,143,170,155,132,151,143,132,114,163,120,113,146,141,113,99,49,42,85,143,66,66,72,115,129,95,115,104,70,43,85,61,73,89,104,90,109,79,99,80,89,90,84,79,34,35,34,43,37,43,49,49,66,32,59,59,69,60,54,101,50,80,88,60,64,34,64,50,50,35,65,70,41,60,36,60,34,51,41,64,51,56,52,75,52,65,57,62,41,40,62,45,30,60,66,11,56,44,63,25,36,43,38,64,47,58,17,56,38,36,68,50,75,49,32,22,43,38,46,16,50,51,26,29,53,26,33,25,63,53,40,31,13,48,20,35,26,34,42,37,17,15,32,48,38,50,49,34,29,48,43,40,38,31,43,13,16,16,22,56,29,17,40,13,42,50,24,14,26,14,45,45,23,50,27,14,43,14,35,60,39,40,17,33,6,25,15,30,10,35,33,30,56,17,26,25,35,16,31,36,45,33,40,37,27,37,39,15,12,42,57,91,143,116,109,54,42,35,24,35,29,55,27,65,56,27,20,6,50,49,28,9,31,26,27,33,25,45,40,39,42,22,19,26,27,39,21,97,101,63,43,16,26,15,9,26,9,44,21,27,15,52,27,34,39,22,27,25,33,30,45,20,39,25,18,27,41,19,2,32,13,23,37,41,39,41,30,18,34,27,31,40,17,27,17,17,10,17,13,2,38,16,28,9,8,6,60,69,16,16,4,37,44,29,26,14,46,8,37,24,29,21,19,7,11,4,10,10,40,10,26,26,24,37,31,29,27,37,17,48,93,110,76,39,28,25,17,69,95,127,180,197,194,186,180,165,190,158,216,169,132,154,171,125,96,100,101,128,155,171,153,128,140,153,161,147,159,143,170,177,158,149,145,142,142,145,167,159,166,154,145,138,135,132,111,100,79,88,82,72,88,125,145,132,130,97,63,82,102,77,104,140,135,122,147,124,130,141,179,186,194,207,234,235,216,221,206,241,241,249,180,138,159,168,177,196,232,236,215,211,228,224,243,230,203,193,110,4,11,1,9,11,15,26,23,18,29,29,21,18,5,15,9,13,22,8,13,29,23,11,31,158,49,55,109,143,182,175,185,167,169,155,184,135,141,119,108,91,109,100,58,57,61,54,46,49,44,38,24,54,76,88,117,129,152,173,193,191,212,214,241,208,232,204,212,228,203,111,37,25,78,137,129,140,87,95,65,81,72,127,111,106,103,125,76,28,17,56,44,87,123,179,161,150,120,118,66,25,21,20,22,21,40,41,33,6,14,25,14,35,22,23,12,34,51,22,26,21,58,64,145,57,44,51,108,111,176,213,195,100,21,38,34,44,58,17,55,33,29,27,39,41,71,105,146,100,118,150,170,187,181,172,162,144,149,128,136,142,137,145,149,133,145,134,133,144,139,152,113,143,138,139,154,138,138,146,135,170,151,145,150,142,146,108,142,138,137,142,140,134,135,153,133,142,113,138,120,128,160,113,157,144,151,152,165,146,116,142,153,153,135,109,162,149,124,154,144,128,165,166,151,177,163,135,169,163,148,126,153,136,137,152,143,141,126,154,118,153,163,154,157,176,151,128,141,150,160,143,138,145,153,143,129,162,131,126,147,162,142,138,149,153,150,156,167,140,160,139,129,138,149,143,132,180,144,129,127,135,112,142,148,114,149,177,140,143,86,62,52,61,36,47,75,58,118,79,96,135,177,128,40,0,15,28,50,76,65,51,56,70,47,47,61,34,62,30,49,60,45,46,50,63,55,63,46,60,69,68,45,72,91,65,27,44,42,41,65,54,50,56,60,48,102,70,53,67,71,71,66,51,70,72,68,43,59,60,85,53,28,61,36,68,61,71,63,58,36,55,46,57,38,41,56,38,43,43,60,64,9,48,45,56,28,29,40,22,46,19,48,39,58,59,35,35,50,55,49,20,40,46,30,36,25,51,28,33,36,16,46,59,25,19,25,58,25,64,38,56,47,25,45,10,36,42,51,20,14,27,23,27,38,64,42,16,42,27,31,42,35,37,4,46,29,32,23,40,45,28,28,15,25,32,43,30,1,32,34,11,5,34,32,42,13,21,23,11,22,26,23,7,24,40,22,37,35,40,15,15,32,24,29,44,25,83,139,143,106,61,76,34,9,8,18,23,19,15,22,22,34,27,36,37,27,47,42,25,19,21,14,38,19,35,29,26,48,48,35,53,75,90,70,20,26,4,25,22,23,13,6,20,31,35,42,61,25,40,15,24,25,42,28,42,46,35,54,42,51,16,46,35,25,41,27,41,46,56,31,29,25,8,6,26,16,22,27,14,20,39,38,28,39,33,29,19,16,42,70,58,2,17,27,2,28,6,2,51,13,41,31,19,28,51,11,7,19,35,9,21,15,34,26,45,33,39,23,16,40,58,91,119,89,58,21,30,60,41,66,81,106,154,207,182,185,172,157,139,160,168,181,178,157,192,181,119,113,112,69,104,154,159,151,158,170,161,163,164,170,136,171,174,155,167,170,182,174,179,181,139,175,156,188,201,169,156,145,120,92,110,117,107,103,145,143,158,173,138,109,103,111,123,145,124,131,123,156,139,101,126,130,153,148,117,195,173,170,170,193,216,209,173,104,64,137,115,131,158,230,244,247,252,232,237,248,251,242,218,113,6,0,11,43,15,27,20,18,15,6,8,11,2,0,14,10,60,11,26,26,10,18,12,24,141,27,50,46,82,119,144,116,97,109,106,89,70,51,53,48,39,65,78,79,93,122,119,99,149,68,12,59,68,105,173,170,199,199,222,217,224,188,201,183,203,171,165,134,108,121,53,16,22,42,79,113,111,97,82,62,99,75,63,82,46,53,36,38,17,33,8,39,16,80,135,163,134,135,156,90,23,19,16,25,4,38,44,10,10,25,13,22,4,21,22,17,15,20,30,7,77,92,133,120,121,133,108,126,154,144,206,189,87,34,16,40,22,24,54,19,10,24,27,48,46,64,142,150,169,170,175,185,175,169,164,131,161,170,159,146,140,141,149,143,139,139,135,165,169,142,134,139,144,164,122,141,145,147,137,120,135,148,129,149,161,145,138,154,170,136,127,129,134,144,144,139,123,156,125,134,128,168,157,157,133,161,171,158,152,167,131,151,147,138,111,154,121,142,151,136,150,124,126,142,160,141,143,158,155,161,145,143,162,165,148,135,137,150,121,159,154,158,137,140,135,133,154,148,144,154,158,150,140,165,145,177,149,143,150,121,137,166,154,157,182,155,149,159,143,177,154,143,146,160,122,155,141,128,156,141,136,151,157,138,135,170,153,150,124,85,69,47,22,56,81,76,74,67,57,125,223,251,242,205,116,55,24,16,35,10,16,14,61,46,36,47,52,73,61,77,69,55,68,49,83,36,28,46,54,46,65,28,69,44,56,66,43,75,52,37,61,55,57,49,46,66,52,58,61,48,66,53,63,55,45,80,62,57,50,44,57,70,37,59,46,44,76,57,75,28,62,58,84,64,53,65,39,27,40,74,42,49,26,60,47,56,55,42,22,39,30,39,16,39,58,57,47,44,43,51,42,39,43,40,61,21,52,31,44,37,21,35,26,48,32,29,53,53,13,25,38,41,29,52,21,30,58,34,40,40,24,16,23,39,18,30,40,33,20,36,25,17,12,40,39,39,29,36,35,34,40,44,17,35,40,42,14,24,19,26,42,32,23,15,21,27,15,31,24,16,33,41,26,38,47,42,43,20,15,43,27,20,15,18,31,15,33,50,66,145,134,129,108,72,19,30,17,8,1,32,19,45,24,34,19,15,38,45,38,12,43,41,52,33,27,36,28,24,30,23,36,36,76,65,61,43,5,34,42,18,35,9,21,34,41,23,32,37,41,34,27,3,29,41,17,14,34,35,15,34,47,51,39,30,34,11,28,28,32,22,43,39,25,0,11,45,23,20,22,16,8,24,38,34,32,17,39,30,51,53,33,19,19,19,27,20,4,23,10,15,39,15,19,14,39,51,24,14,0,6,12,17,24,40,35,15,42,27,41,65,119,109,68,57,39,32,18,24,55,73,131,153,175,209,185,170,158,142,164,152,155,154,163,155,170,161,149,147,120,126,75,126,164,195,182,177,190,188,153,181,179,150,185,199,181,153,189,160,168,178,139,165,181,177,191,146,164,153,149,151,123,106,158,136,133,151,150,151,140,167,128,159,173,168,164,154,159,155,149,165,151,146,129,136,141,143,156,141,166,142,146,152,150,67,24,96,75,98,117,207,236,235,248,238,250,244,255,255,223,111,7,0,2,35,10,15,18,23,9,10,6,25,24,2,7,0,22,0,7,7,1,11,18,21,80,8,40,39,92,98,65,61,77,60,63,73,78,93,106,120,145,149,177,157,185,193,194,195,182,97,58,48,105,152,166,227,197,149,153,145,142,136,105,115,100,103,106,63,74,20,40,29,12,66,64,69,75,34,22,32,39,14,19,24,15,13,34,38,41,26,28,24,49,59,132,131,112,126,119,93,46,24,15,7,26,27,9,33,33,2,31,61,45,50,16,35,2,58,36,43,23,99,135,146,142,160,145,142,116,72,56,44,45,27,10,40,28,36,26,27,48,30,45,15,43,61,144,161,171,189,165,136,162,158,145,136,145,153,163,142,162,167,146,129,167,161,143,174,151,164,146,142,139,136,116,137,134,156,148,137,140,164,167,149,152,128,132,141,131,142,132,124,152,160,153,126,150,120,169,144,157,130,138,159,146,178,150,143,176,141,149,153,172,152,153,151,133,149,166,128,130,169,146,130,116,119,118,114,112,142,145,132,165,120,160,147,112,117,139,112,124,114,125,116,142,145,131,148,127,163,156,138,131,147,147,141,144,149,148,143,140,144,130,118,142,120,132,97,86,109,146,117,127,143,105,121,101,109,107,122,141,122,114,94,106,97,111,98,41,36,58,37,53,32,28,22,50,39,48,77,175,224,240,254,245,229,163,99,24,18,8,17,32,36,57,58,82,66,64,76,79,65,55,42,73,77,79,65,51,53,52,51,56,82,62,47,66,54,42,49,50,70,68,19,50,59,27,64,74,49,63,43,59,59,60,43,41,69,78,59,54,41,65,52,64,35,63,44,54,20,43,62,52,80,48,60,60,49,18,54,47,37,36,51,36,47,63,17,40,37,57,53,36,22,46,34,56,53,16,42,40,38,43,45,24,30,6,38,58,22,43,52,53,32,27,45,45,43,46,51,37,45,47,68,42,16,25,42,22,34,6,54,28,48,39,27,20,42,28,45,37,31,25,17,16,34,62,33,13,42,26,18,33,32,44,28,18,44,6,8,34,29,22,52,13,33,28,32,17,28,2,29,9,21,18,21,47,22,36,31,29,15,20,13,29,6,29,37,46,67,73,156,138,146,109,77,39,32,21,40,10,20,47,30,26,21,19,20,32,68,22,5,46,30,36,51,71,35,36,15,45,28,38,90,76,46,13,13,38,1,34,34,30,23,58,52,59,41,60,21,12,28,14,30,61,16,41,58,11,43,36,38,51,43,21,43,39,32,17,23,28,14,25,9,48,41,20,9,13,6,35,10,18,41,6,13,8,32,81,60,8,28,11,8,23,29,19,16,24,6,28,14,10,32,25,34,34,16,30,35,37,23,36,26,45,34,31,75,130,120,83,28,51,14,35,26,62,82,78,108,108,162,199,187,161,170,159,148,170,161,152,183,122,141,180,158,178,174,178,158,110,113,107,149,178,185,162,153,165,169,156,175,163,185,142,165,173,163,185,153,164,187,182,172,170,168,190,167,154,131,134,102,120,142,140,155,162,162,164,155,164,150,163,176,179,159,143,149,163,133,126,144,130,110,115,138,148,144,141,126,180,148,151,104,90,79,78,48,65,133,180,186,205,218,230,247,242,251,228,116,0,14,4,0,18,0,22,15,45,14,38,12,9,0,13,11,8,6,9,4,26,14,33,12,32,37,88,58,79,100,113,109,114,157,158,160,167,188,175,176,183,200,184,198,160,183,199,168,153,46,30,41,104,131,141,130,130,100,81,67,58,76,82,51,54,49,60,80,54,51,21,2,13,12,30,57,27,35,24,18,8,28,6,36,8,17,39,42,40,34,62,46,26,69,132,130,102,151,117,84,22,17,19,28,48,24,33,47,14,55,33,20,17,4,39,25,34,45,20,28,45,47,44,86,56,60,48,45,36,29,31,11,15,16,42,21,56,57,95,82,82,92,100,109,120,108,139,166,174,190,167,145,137,151,131,148,124,139,136,129,121,129,128,156,126,135,133,154,129,161,135,175,152,139,140,167,142,121,148,140,133,144,125,124,130,129,139,138,133,154,137,139,160,179,162,179,158,147,139,137,129,154,117,160,155,135,150,156,135,181,140,163,106,139,143,145,130,134,131,110,140,146,134,133,152,149,116,136,127,133,131,142,156,124,155,153,166,132,138,135,133,126,134,118,159,154,156,160,144,138,142,136,136,121,162,147,152,113,138,99,131,122,126,94,96,89,134,97,113,115,102,104,120,113,116,109,135,89,98,113,131,110,90,88,84,104,47,79,85,82,65,84,75,69,62,11,50,60,48,18,99,182,219,237,243,234,225,224,162,89,46,41,21,15,48,78,79,82,105,60,88,76,79,56,33,47,47,45,57,50,60,50,54,53,41,68,47,31,69,27,81,62,52,63,42,60,38,26,58,47,51,77,62,46,38,49,58,49,64,40,55,39,53,41,43,58,40,36,42,45,43,40,40,57,64,59,12,37,36,43,70,53,69,56,45,48,51,30,51,40,46,38,70,36,46,40,49,59,28,16,56,47,26,41,43,46,4,32,39,43,50,30,29,43,52,32,36,16,21,33,27,22,36,18,23,28,31,35,23,16,33,44,33,24,33,58,60,18,38,31,31,57,29,34,32,34,26,35,47,17,38,43,31,30,28,28,42,17,25,39,15,40,8,19,20,37,28,26,37,21,28,21,12,39,3,27,22,21,29,23,24,44,34,22,43,27,29,31,35,51,18,40,69,140,155,153,132,81,32,47,34,29,11,8,55,41,53,22,36,10,52,30,23,25,33,16,22,3,21,24,33,26,29,82,137,78,23,17,4,14,38,29,18,5,51,36,18,42,31,21,26,18,17,40,36,24,33,24,30,43,49,30,25,28,27,12,45,30,43,20,28,19,34,29,18,10,27,49,2,9,13,14,16,12,4,24,50,34,68,50,25,29,27,31,23,35,7,21,4,20,49,24,13,23,13,24,27,40,41,26,23,3,14,15,33,83,98,111,92,39,35,54,37,24,30,31,69,101,91,122,152,179,148,162,176,183,158,177,171,159,157,129,159,150,188,190,173,186,168,163,113,101,102,99,139,174,178,169,146,183,182,170,170,178,176,172,162,183,177,181,191,182,169,167,183,181,175,176,172,163,129,146,157,153,158,136,190,175,161,154,148,179,156,154,181,176,154,137,147,133,155,145,144,154,124,132,123,120,105,160,170,174,166,141,176,141,82,39,64,86,98,111,129,138,170,204,205,202,239,147,2,6,14,19,26,8,0,27,10,22,17,20,21,4,18,19,9,20,14,12,3,0,18,7,41,67,51,72,170,195,192,187,174,184,202,176,189,172,184,183,182,183,161,174,156,165,153,120,83,17,19,33,72,99,93,106,68,108,91,111,118,79,72,66,73,61,41,26,22,36,28,35,29,26,30,8,13,15,13,35,37,31,25,12,26,13,45,19,36,48,105,98,111,145,194,145,136,152,152,80,0,9,33,4,35,52,23,32,32,27,19,41,10,13,31,12,26,51,24,35,56,39,40,44,18,35,15,21,19,29,14,18,42,42,62,76,133,172,179,212,188,198,185,205,225,211,170,166,177,122,137,142,155,145,150,141,117,147,136,153,141,153,129,137,133,118,123,140,126,128,134,127,126,130,124,165,128,150,137,144,138,144,102,154,140,159,145,134,157,144,131,153,100,143,132,121,141,149,135,150,139,124,121,124,123,145,114,142,142,119,138,133,120,112,111,153,111,124,133,134,138,157,115,144,155,117,134,136,105,123,146,136,140,134,144,131,129,150,143,130,114,130,122,114,124,153,115,139,148,126,132,129,141,140,128,129,100,118,91,124,130,130,146,137,120,133,121,157,117,166,160,150,132,168,159,173,147,130,114,106,94,81,81,118,97,81,92,76,51,63,79,90,94,93,67,91,70,17,64,17,83,121,125,189,251,241,253,255,250,233,159,63,18,10,10,48,39,67,94,43,74,54,53,64,39,53,81,55,38,61,48,71,64,60,67,59,28,56,76,41,87,68,54,44,41,62,44,54,67,52,68,30,86,70,60,43,56,47,40,62,56,40,55,81,30,56,38,45,34,65,45,64,30,36,52,53,54,50,41,55,45,39,35,78,51,33,28,51,59,25,27,50,11,21,52,45,48,31,47,50,25,34,27,36,25,42,19,43,34,22,40,23,29,29,29,33,26,39,45,47,38,12,48,47,49,36,21,33,25,11,13,9,46,24,61,15,19,23,35,47,57,34,13,13,40,40,18,0,33,15,30,22,15,13,31,24,14,32,26,30,18,35,16,18,37,37,28,63,27,46,10,39,20,45,28,13,20,21,28,17,18,21,16,41,50,30,51,42,8,20,18,50,47,44,58,113,182,174,146,120,100,77,68,49,19,33,28,34,30,33,9,11,16,42,22,45,16,17,39,23,33,31,31,18,72,96,51,49,16,29,36,25,19,36,20,61,30,52,41,30,15,17,25,43,22,31,44,43,28,34,13,49,38,34,9,16,33,37,21,19,18,32,12,11,13,45,23,45,12,32,22,32,30,20,68,16,52,54,21,28,32,15,17,47,48,29,23,28,24,33,37,6,9,11,30,24,9,4,30,22,28,24,47,81,113,127,82,70,25,36,28,34,6,12,57,87,103,101,98,134,185,205,176,180,174,176,162,174,177,189,164,151,210,158,203,158,164,149,136,152,158,153,99,83,108,137,155,198,197,197,173,187,138,181,148,154,164,155,185,164,162,178,180,156,146,178,179,193,165,168,152,124,139,131,150,155,153,168,154,164,144,145,160,174,189,157,150,175,169,162,165,138,160,130,127,118,128,134,155,148,163,175,182,173,191,179,131,130,113,116,102,73,26,80,91,124,173,160,192,100,10,3,17,26,6,26,19,8,10,11,29,10,16,2,17,4,17,6,8,4,11,18,24,24,65,79,92,114,179,171,205,188,171,178,188,192,179,171,141,165,168,129,128,107,97,108,80,55,39,29,42,21,76,123,129,94,111,131,110,58,35,32,58,16,19,48,44,31,29,9,42,44,16,35,13,30,45,2,19,25,9,28,9,26,8,24,22,37,42,55,132,156,184,189,205,155,137,134,142,68,36,5,28,24,20,38,19,13,25,20,26,67,47,48,46,40,49,48,51,6,26,29,25,56,27,16,32,29,53,19,59,77,82,125,149,184,196,212,217,217,195,177,173,185,206,157,177,165,142,142,149,150,149,140,153,139,118,130,125,122,163,145,121,138,137,127,115,104,119,106,126,125,140,116,116,120,137,130,140,140,147,174,150,141,155,134,127,139,143,130,153,133,111,132,134,124,122,129,120,145,114,128,105,143,160,125,152,136,121,121,126,132,121,135,117,153,104,132,134,138,127,115,81,118,136,149,140,143,128,121,121,137,136,132,130,128,139,138,153,132,140,142,138,142,137,129,124,125,106,123,136,118,143,120,131,108,115,129,101,117,76,110,116,156,126,141,112,126,132,130,105,150,147,140,153,126,122,120,88,81,84,68,54,76,76,101,80,93,76,72,82,68,93,99,67,70,54,75,67,19,101,115,90,97,114,175,254,250,246,252,220,244,177,86,41,0,10,17,56,44,83,66,49,43,54,39,51,51,52,78,42,67,42,73,60,41,46,61,69,62,71,62,32,53,42,49,73,55,60,45,36,52,58,51,40,47,24,61,58,77,51,26,46,46,29,62,48,47,55,34,34,50,26,48,65,37,43,51,46,43,42,44,49,26,23,17,46,68,41,30,49,35,36,44,29,31,23,40,47,27,19,40,34,47,54,30,49,43,16,21,43,49,38,35,32,45,45,45,31,28,36,30,25,31,23,21,22,39,30,24,20,11,26,44,50,45,13,40,35,25,55,16,23,34,43,30,26,32,6,31,15,28,32,22,6,47,36,33,17,21,36,40,31,31,60,33,17,29,12,18,31,25,58,27,15,13,23,26,28,31,37,29,26,54,45,50,44,20,29,24,35,43,5,38,27,41,34,64,127,136,158,163,133,91,68,45,17,14,38,18,41,1,25,44,41,47,54,3,27,23,11,8,40,7,31,84,78,74,39,16,22,13,17,18,43,49,38,40,24,26,30,11,33,28,20,40,20,29,36,27,32,25,15,7,19,32,39,14,41,16,17,7,9,14,18,32,20,23,13,43,36,17,32,33,28,32,94,50,9,6,22,25,15,11,20,42,7,25,22,21,14,20,8,17,32,12,18,17,36,18,48,99,129,129,109,61,25,37,40,24,14,25,48,40,79,101,107,125,118,182,189,173,169,155,177,198,162,202,172,144,173,167,174,172,172,153,159,181,167,161,164,161,161,96,92,108,140,141,175,174,190,195,156,179,181,185,162,151,149,141,150,141,145,150,165,164,170,197,162,158,137,138,136,131,146,144,147,177,183,172,174,155,152,165,166,172,163,140,160,140,165,163,171,155,169,156,166,148,143,173,158,160,185,154,202,195,179,188,174,165,155,109,75,50,57,61,94,123,138,108,18,8,19,5,18,29,33,11,9,13,5,31,4,21,6,7,16,17,19,4,35,22,9,12,51,31,73,94,156,176,168,168,135,148,118,116,104,95,108,86,52,63,82,58,52,66,63,82,94,42,27,38,36,64,75,44,42,20,15,47,21,12,14,24,18,37,17,34,29,30,21,5,39,18,17,45,21,29,16,33,50,34,18,42,35,15,24,33,29,44,61,126,137,122,137,121,115,137,123,77,44,19,20,31,36,28,41,32,34,38,54,29,30,40,11,32,18,11,43,25,29,27,15,24,36,16,68,61,57,137,137,125,200,217,189,199,170,161,159,152,168,181,168,168,167,123,149,165,129,154,137,160,146,154,136,115,138,159,107,153,147,122,139,130,117,142,132,121,127,114,138,137,124,132,112,122,132,138,108,132,161,150,146,128,111,99,81,138,103,133,117,113,140,132,140,138,104,131,142,154,143,148,164,134,167,141,138,124,116,146,142,127,141,131,120,124,112,147,131,139,130,106,105,123,122,145,134,95,122,97,128,130,156,126,120,129,136,103,129,133,120,112,126,121,132,120,115,123,141,126,143,139,122,135,104,111,129,118,110,117,100,95,109,94,83,57,108,88,56,94,90,113,102,84,109,125,132,120,102,107,109,109,108,98,89,86,114,67,84,92,78,75,56,68,69,59,86,79,61,51,68,109,102,99,124,123,151,206,255,212,254,252,254,247,147,70,15,0,1,28,28,56,34,52,56,68,49,51,76,53,42,31,78,67,46,65,55,39,65,37,52,70,25,34,41,31,63,39,43,48,62,44,73,61,67,45,65,56,47,41,73,57,33,66,35,66,47,60,50,60,35,51,43,57,50,37,53,58,57,63,31,41,55,65,44,12,35,50,43,29,27,33,11,45,18,30,31,53,25,29,49,41,31,43,47,48,49,42,47,31,26,30,28,32,60,34,46,16,35,41,26,21,22,25,36,62,22,43,37,46,27,34,60,35,49,28,10,42,8,25,45,50,27,15,39,21,38,32,18,14,46,36,17,35,39,29,20,6,39,15,34,24,23,36,30,52,26,21,18,23,14,23,10,16,15,32,22,6,44,46,31,44,35,47,31,26,38,19,34,38,19,31,59,17,34,34,51,53,48,55,66,134,112,168,168,150,100,75,44,43,29,26,25,21,31,22,39,58,27,13,27,22,20,27,35,32,105,82,68,21,25,6,30,25,44,47,56,27,27,28,61,26,41,71,43,22,37,16,31,33,27,39,22,24,33,39,28,29,28,11,11,23,23,13,29,25,31,24,21,15,21,9,29,32,8,40,29,49,26,33,18,7,8,24,21,30,25,6,28,37,5,23,11,16,21,27,25,34,64,79,129,113,63,58,34,38,45,45,29,32,23,34,74,118,105,113,97,107,179,178,175,138,129,188,175,160,180,169,124,134,160,181,172,129,176,166,168,167,162,165,171,171,150,158,115,77,96,119,173,197,215,156,167,177,185,167,141,169,159,158,180,148,161,155,142,163,164,170,165,153,144,154,147,142,169,160,160,177,141,171,155,167,194,167,180,173,165,162,167,180,174,143,163,149,188,188,151,166,145,157,191,161,187,182,176,213,206,201,189,199,188,154,118,51,72,44,50,57,112,111,29,2,20,16,27,23,27,26,31,5,12,8,35,4,27,5,19,32,11,18,57,22,21,14,46,41,66,63,116,111,109,110,82,88,60,71,75,73,71,91,87,94,92,85,80,44,42,62,28,49,23,14,20,35,19,30,21,13,17,27,17,9,17,24,5,25,29,26,29,20,29,28,25,11,18,18,35,6,21,11,22,40,20,26,20,28,43,19,38,38,57,85,74,102,115,163,144,135,122,96,20,22,23,31,43,38,61,23,41,29,0,26,8,31,26,7,38,33,40,12,20,47,30,66,41,70,63,137,166,168,208,209,208,184,167,160,163,185,152,166,126,158,188,176,174,161,147,169,140,129,145,124,155,144,146,139,144,130,124,125,153,107,130,121,118,146,168,135,133,97,113,147,114,119,118,135,136,121,112,116,108,97,74,67,123,109,102,131,97,101,107,105,122,98,89,96,97,106,81,108,109,118,89,97,112,123,106,96,121,114,138,113,113,121,128,149,133,130,139,116,81,130,108,136,122,143,136,104,128,89,115,99,98,119,103,104,104,122,93,93,132,97,94,102,135,128,110,116,127,118,149,136,137,101,90,134,139,110,96,129,118,89,68,72,82,133,66,74,105,109,91,93,94,124,118,133,145,105,148,128,108,105,144,122,121,120,99,106,93,92,73,86,49,93,74,61,72,81,78,38,68,91,123,155,159,151,137,137,184,227,254,247,252,250,246,246,182,111,38,4,12,28,30,47,32,48,38,35,55,43,61,64,60,71,81,89,61,55,71,69,37,44,45,43,67,54,67,42,64,49,20,45,61,72,37,60,60,69,48,48,57,60,52,52,5,48,34,51,24,39,64,36,48,55,32,36,58,58,75,54,31,52,24,55,16,21,48,51,30,25,14,40,11,44,42,27,31,48,29,43,41,46,20,52,37,30,20,20,23,26,20,47,28,34,46,32,31,32,51,40,21,33,32,28,12,31,29,41,21,34,27,34,38,31,67,32,40,18,64,16,16,38,16,59,52,23,26,28,35,31,51,33,10,16,8,26,32,2,12,24,1,15,30,53,27,13,22,14,34,36,28,31,29,39,23,51,22,47,23,33,40,28,24,24,46,37,28,54,5,22,25,19,20,27,19,46,57,47,25,28,32,48,42,67,116,186,174,153,148,143,109,68,33,36,45,41,32,41,19,24,35,23,48,19,28,25,75,58,68,31,41,20,15,8,31,29,37,40,19,33,14,35,54,53,55,41,49,46,19,30,28,47,17,19,24,24,33,48,24,53,30,17,28,12,29,29,21,17,21,9,15,40,12,20,39,54,19,3,34,15,21,21,48,8,19,45,40,31,44,31,25,30,24,53,36,84,96,126,153,110,62,46,48,28,3,48,26,16,35,50,36,67,93,123,130,109,92,150,199,182,165,159,149,155,171,182,170,180,164,158,190,186,136,155,183,162,158,160,173,159,168,200,180,192,159,135,128,88,103,134,182,163,161,170,153,130,159,173,185,156,161,178,160,153,163,168,177,172,199,160,151,152,145,129,125,134,162,160,184,145,145,177,151,150,166,171,140,165,158,150,133,141,157,166,159,156,135,167,137,162,175,163,149,173,169,194,188,213,195,191,183,195,170,161,106,52,39,61,74,79,3,11,10,9,18,18,19,27,21,4,5,38,27,11,3,7,23,23,11,4,34,18,13,23,39,28,39,69,89,102,91,92,97,99,104,112,100,106,105,63,62,44,47,21,39,2,26,17,38,32,12,35,53,31,17,22,30,51,3,21,26,38,31,32,27,18,19,47,42,41,45,19,16,37,20,34,45,22,11,27,5,22,56,13,16,7,21,11,26,60,85,117,127,143,132,159,136,113,110,77,46,32,30,11,28,19,19,51,52,48,13,20,29,18,33,18,26,19,30,16,34,94,106,143,166,178,177,217,196,204,181,195,174,184,173,173,160,176,177,163,157,139,166,161,151,143,142,126,127,138,148,171,139,140,116,115,113,144,110,103,139,120,152,127,151,138,148,149,139,104,151,111,120,146,93,93,114,125,138,98,131,95,107,135,107,114,147,111,128,113,124,115,115,100,122,103,112,91,92,119,81,73,65,62,119,70,78,111,96,137,97,122,118,134,119,139,126,86,117,73,98,93,122,120,110,105,124,107,146,97,90,85,62,90,92,97,103,88,86,66,91,101,93,121,129,118,101,116,111,90,77,120,112,119,98,93,121,97,64,100,111,70,82,99,93,113,96,108,146,125,130,147,154,151,126,124,129,110,89,91,108,92,102,118,101,86,102,114,101,106,71,103,116,79,79,56,78,72,54,45,53,86,61,116,162,139,129,110,106,131,165,186,249,252,255,250,234,230,198,132,69,3,15,53,23,35,49,63,66,56,56,66,69,62,74,53,90,46,59,62,89,75,30,51,70,77,34,57,90,60,57,47,40,47,67,41,37,38,86,48,66,44,33,71,57,70,46,55,60,47,51,30,55,16,28,76,49,13,55,48,53,27,35,41,68,37,50,58,24,38,18,41,39,35,33,40,39,18,51,39,17,33,31,33,14,55,43,30,15,58,28,41,11,28,11,47,15,56,39,13,23,58,29,42,33,25,59,43,55,18,26,28,46,19,46,41,14,41,37,33,24,51,31,27,15,0,12,43,30,45,6,4,34,24,29,27,19,2,29,17,16,32,48,34,20,41,5,36,33,6,7,21,20,26,28,37,35,5,63,52,49,43,44,7,18,8,27,26,39,48,42,31,5,26,21,32,50,36,27,20,43,45,35,21,44,69,60,102,103,151,171,162,165,118,131,113,109,58,35,47,26,29,24,28,24,17,26,41,74,61,58,32,15,33,32,52,50,14,40,71,113,120,86,48,47,50,63,25,22,34,60,29,45,48,42,33,27,38,18,59,34,21,25,6,9,5,39,50,22,17,8,12,31,34,46,44,20,14,20,40,17,15,7,8,17,31,27,28,11,37,40,30,50,108,148,191,129,104,56,60,33,15,29,14,39,27,19,49,18,60,62,106,110,110,100,126,106,176,177,187,182,158,137,170,154,147,176,159,146,165,147,144,166,157,160,163,156,182,171,166,138,172,146,187,181,159,143,87,76,126,118,175,177,176,169,173,159,161,181,153,170,154,201,168,176,158,174,174,162,166,150,126,123,152,146,162,164,144,178,155,144,177,168,159,186,179,180,184,148,152,135,152,168,175,192,154,150,158,170,147,187,180,147,161,163,184,233,224,216,213,172,177,222,145,149,115,120,90,72,106,11,10,18,29,4,7,22,19,14,9,14,16,3,3,17,17,1,28,17,5,3,9,26,3,41,39,41,50,97,135,100,93,103,76,37,63,27,57,25,5,25,28,19,31,44,27,2,4,18,28,16,33,31,18,13,19,30,14,40,29,36,22,13,41,38,21,9,23,33,30,36,36,21,8,40,38,12,11,44,15,11,17,25,21,30,17,33,38,42,43,59,99,112,97,82,57,40,54,18,34,43,11,7,15,56,43,25,29,52,41,7,42,17,12,6,17,26,5,41,50,87,146,199,232,199,197,206,165,159,157,136,162,146,161,138,150,136,127,129,141,133,146,141,148,129,132,113,80,107,99,130,129,139,144,151,114,127,111,109,108,97,143,151,144,137,136,121,127,117,112,93,127,136,140,121,144,140,149,155,152,132,136,117,134,136,147,170,162,162,123,142,151,144,141,154,152,132,119,134,130,120,141,121,128,119,135,174,150,120,131,149,136,175,126,121,130,119,122,113,117,137,127,110,130,104,87,94,111,108,105,92,103,131,130,111,98,109,130,110,118,144,103,152,134,140,159,143,151,163,131,115,148,116,108,100,120,108,105,97,114,121,112,127,137,120,122,133,112,75,84,106,109,88,114,101,108,62,76,84,77,92,98,94,80,86,61,117,91,90,110,63,81,71,69,63,50,76,47,54,66,64,53,30,27,65,52,76,109,160,116,72,106,142,181,231,242,255,253,246,255,235,170,87,42,20,4,21,43,61,58,57,61,71,54,50,64,75,51,59,54,73,77,66,57,46,60,73,24,86,65,31,59,36,39,61,61,48,54,38,45,63,54,45,68,61,50,55,26,57,58,66,51,37,27,44,65,59,60,35,44,51,51,63,69,6,27,72,23,72,26,46,43,18,45,12,19,36,43,25,23,27,40,33,12,40,28,28,35,39,54,29,64,37,30,15,47,45,3,56,5,35,22,27,36,21,32,7,47,21,24,23,38,65,43,35,37,20,14,51,37,28,29,11,14,34,33,40,37,12,21,31,30,5,7,39,30,41,10,47,21,41,24,32,16,44,48,45,7,41,18,24,24,36,1,27,24,12,11,26,35,15,30,19,26,17,39,21,41,30,45,19,29,10,2,50,29,51,39,23,19,24,30,17,38,38,31,16,46,28,24,63,68,131,151,155,161,176,144,153,127,163,131,121,105,50,68,61,58,105,106,109,84,55,76,114,134,147,169,186,215,243,196,127,59,33,47,73,23,25,37,22,3,11,42,21,21,15,53,26,27,61,38,50,20,20,19,24,43,17,29,38,0,20,71,44,18,3,15,19,8,33,43,8,22,16,41,47,45,63,79,145,116,172,125,100,72,28,56,64,11,34,9,32,12,42,32,32,62,59,84,113,116,89,83,117,99,143,181,166,143,166,161,159,161,133,184,169,190,160,170,180,161,184,177,169,141,152,145,156,161,157,150,186,158,172,196,160,147,130,110,135,128,145,172,200,177,186,175,196,170,185,158,173,184,160,166,173,177,160,177,156,143,145,181,156,141,165,178,166,184,184,169,177,169,187,192,153,160,167,172,159,159,176,160,175,152,173,169,172,147,168,151,145,175,180,208,193,219,208,206,173,206,206,165,156,156,160,154,134,115,16,0,29,20,6,7,9,2,13,12,2,15,7,12,17,11,24,15,17,2,31,3,1,14,11,18,47,46,29,34,57,42,21,41,6,24,15,30,7,21,23,10,30,22,33,29,0,16,21,32,15,22,35,24,32,12,28,30,42,11,19,31,44,14,35,25,25,9,11,24,25,10,19,18,10,11,30,25,25,32,29,29,61,33,29,53,29,33,66,37,63,48,57,29,28,13,35,36,20,25,47,23,11,22,13,35,33,30,40,26,8,9,47,29,7,40,44,30,50,22,30,137,177,203,150,137,138,113,107,87,102,126,131,131,124,116,111,106,101,122,105,90,139,124,107,134,125,129,131,130,147,127,164,136,125,121,126,125,141,114,98,134,133,114,127,105,110,135,96,86,114,113,125,129,91,130,121,101,80,76,106,112,89,120,114,109,99,99,54,86,97,106,102,106,97,92,91,99,102,103,123,104,116,123,124,123,107,112,116,79,144,116,99,82,100,69,74,71,129,117,112,135,141,121,107,96,89,108,71,71,139,145,148,115,98,115,119,86,118,115,140,124,148,154,136,150,124,127,133,141,141,134,118,111,103,116,119,96,151,143,134,133,139,134,136,128,142,93,99,111,94,88,72,74,89,87,66,91,96,72,120,108,88,94,103,110,67,81,88,86,66,88,71,36,54,63,56,42,59,66,50,64,25,44,41,36,76,120,154,187,125,127,104,141,151,195,241,234,227,237,248,251,229,174,114,39,1,15,13,32,45,62,39,58,76,46,66,44,28,48,72,55,45,56,59,60,64,44,43,69,52,57,62,55,47,45,56,42,44,73,69,42,34,49,60,56,55,52,53,53,38,71,20,33,46,60,52,60,26,25,46,30,38,57,43,62,28,57,56,51,19,38,42,53,49,8,42,40,9,22,33,12,60,27,48,57,24,52,29,35,57,38,34,26,33,16,32,18,28,28,42,2,8,32,28,37,41,33,27,16,28,36,26,20,31,32,33,34,39,41,36,23,22,11,28,24,36,25,23,6,14,19,15,31,23,25,23,18,34,18,40,31,19,41,15,52,8,43,44,16,26,41,18,26,26,16,25,17,17,31,12,1,44,19,19,17,5,41,37,22,28,37,46,39,24,36,38,17,28,26,34,29,32,24,17,14,17,17,3,25,57,30,29,40,47,57,86,96,134,169,172,181,195,165,185,174,174,152,151,177,222,169,179,221,196,193,193,193,196,139,156,112,69,14,32,40,45,54,60,25,14,51,30,31,33,16,20,32,33,25,51,29,61,57,34,48,36,29,41,17,33,55,55,27,47,59,66,31,31,23,40,65,84,75,95,69,144,113,167,119,94,97,46,109,29,22,33,14,19,7,31,21,7,25,25,53,61,85,85,84,102,95,106,120,102,159,169,171,158,170,174,164,172,172,144,178,171,170,153,176,184,147,169,182,160,155,154,164,157,150,168,166,145,167,146,157,171,180,165,134,85,92,117,161,194,191,166,195,178,155,171,158,138,173,164,159,189,167,179,167,181,182,165,151,165,195,176,158,177,178,169,195,173,159,188,165,170,134,155,172,178,184,196,162,146,157,158,147,155,174,162,133,150,162,185,208,221,212,201,189,179,182,199,172,153,167,165,161,151,103,3,10,9,30,11,17,25,7,11,31,41,14,15,11,29,10,11,23,57,23,17,11,22,36,42,15,26,15,45,36,44,38,38,34,19,17,20,38,25,34,33,33,21,11,27,25,24,29,43,25,9,20,22,3,20,24,18,13,18,44,25,24,18,20,21,24,20,16,37,37,22,31,43,36,21,33,19,27,30,21,68,39,46,42,37,35,7,53,56,32,32,34,43,20,27,13,41,15,28,26,38,28,25,18,40,42,30,35,57,28,36,45,27,42,21,27,49,22,25,27,38,63,90,118,145,118,118,94,71,97,99,116,115,125,96,89,108,88,110,107,132,84,103,120,137,135,161,131,134,149,155,165,179,136,154,129,127,140,116,137,97,105,85,66,65,64,67,97,105,86,95,101,113,123,107,69,91,86,77,82,85,69,78,83,67,63,68,39,44,64,75,35,47,73,63,59,65,81,86,68,68,79,87,70,83,101,75,69,97,39,33,27,50,57,60,47,81,97,88,92,81,72,113,95,80,63,85,88,81,107,83,113,121,91,90,95,48,100,87,124,135,88,113,75,86,70,104,82,69,36,79,112,89,55,95,120,109,114,66,102,116,101,101,112,104,118,108,114,130,114,106,90,75,65,58,60,30,67,74,70,60,57,93,81,75,75,67,106,68,53,76,88,62,51,65,64,76,47,66,41,61,62,66,67,58,20,66,69,142,172,161,194,178,137,102,113,133,140,203,245,252,250,250,254,207,134,74,68,33,32,26,26,38,27,62,39,41,60,69,51,40,38,78,56,44,52,33,56,37,53,51,34,62,53,46,58,37,50,42,67,62,19,55,60,41,50,71,44,32,54,49,43,25,54,27,40,36,48,48,24,58,30,50,43,43,23,56,31,59,35,40,33,35,33,15,24,26,36,32,38,38,44,47,38,19,38,45,29,35,32,35,28,29,23,17,46,59,45,39,45,31,37,28,33,32,30,27,25,22,29,16,28,37,19,37,37,38,16,38,51,41,16,39,45,31,30,8,33,8,20,40,36,52,39,20,6,16,4,18,10,30,2,9,14,15,12,28,19,14,31,16,25,35,13,43,22,26,35,8,27,22,32,39,11,7,18,12,56,22,15,0,47,27,22,44,11,15,39,40,26,59,34,10,30,6,32,19,40,41,22,40,13,19,19,29,44,30,29,35,49,60,57,56,90,41,83,92,113,116,135,129,139,161,143,160,91,64,51,19,42,35,47,60,39,49,10,28,53,8,43,32,45,28,22,20,37,11,16,40,37,28,18,47,91,38,61,55,84,80,72,90,135,120,64,88,76,93,111,128,155,129,113,183,145,165,114,63,55,47,39,63,22,32,46,37,14,30,29,7,13,25,34,15,71,45,80,77,87,94,66,70,96,115,106,133,167,193,162,159,191,166,165,194,155,186,132,169,173,167,181,135,155,141,145,159,178,166,161,170,143,177,193,137,146,160,173,188,188,167,151,119,91,117,125,150,182,183,199,175,193,147,159,162,173,174,179,154,154,164,172,174,169,177,192,173,174,147,185,189,196,172,180,165,187,189,145,157,139,151,187,169,168,173,180,162,182,171,178,160,164,174,169,159,172,171,202,199,204,200,191,176,169,173,191,157,170,158,172,156,100,12,9,43,15,16,22,12,7,8,2,30,13,10,3,5,34,10,17,16,35,14,13,7,21,20,18,10,15,8,36,42,17,23,43,21,33,19,26,51,17,33,12,27,15,12,21,18,51,8,23,28,27,13,30,26,44,44,47,6,43,14,23,37,33,26,18,33,8,41,16,45,41,44,43,22,65,41,25,23,56,9,27,33,44,20,12,41,11,18,18,32,14,45,34,32,27,26,23,33,20,53,20,9,23,64,54,66,51,127,79,66,71,54,73,54,64,55,122,84,97,103,97,130,145,147,102,133,73,99,77,106,93,101,89,62,69,106,93,95,96,84,90,74,97,104,121,95,101,117,124,110,117,145,139,139,151,102,96,137,107,91,111,131,101,112,96,87,97,111,112,104,99,80,107,108,139,131,144,136,113,136,121,124,126,75,102,132,130,153,133,102,57,71,62,67,83,92,100,97,95,102,89,87,92,111,81,134,56,75,75,49,88,96,100,100,102,116,108,81,113,84,75,76,94,81,108,110,108,127,109,97,81,118,84,84,77,90,118,83,121,111,96,110,90,84,91,80,87,81,75,87,80,86,69,93,123,102,95,75,76,102,107,95,111,77,89,101,91,83,105,98,92,74,72,70,46,74,50,71,78,69,101,97,72,79,64,60,103,88,53,40,76,78,64,60,80,58,70,66,81,86,68,81,38,63,71,62,38,84,90,133,173,211,164,172,103,97,104,121,169,213,244,254,250,234,204,180,148,83,76,45,24,6,15,27,7,37,48,42,32,53,69,63,26,41,66,43,40,50,57,42,60,51,42,53,58,37,51,53,36,58,48,52,40,59,27,50,66,40,61,61,44,46,61,48,51,53,43,35,27,26,48,31,35,47,44,37,20,29,52,49,11,31,38,64,21,19,55,51,33,24,58,45,46,58,24,53,17,6,48,50,24,46,36,32,36,48,37,22,36,25,35,55,17,48,41,21,38,26,36,23,30,31,15,20,25,15,37,41,51,27,17,9,27,24,17,26,25,36,22,22,55,29,13,10,24,42,17,22,15,35,18,33,12,28,18,21,15,15,24,59,21,41,26,33,30,12,0,28,20,32,28,24,21,23,22,12,27,11,9,36,16,35,19,34,19,22,41,31,64,58,60,43,18,42,19,14,12,50,46,15,27,30,70,37,24,21,37,16,38,16,55,22,22,21,19,23,32,37,35,33,50,43,73,86,71,53,50,52,3,42,151,138,61,28,27,30,20,28,36,34,52,34,38,27,64,41,19,43,23,16,27,31,15,43,37,52,71,58,70,72,78,95,92,95,79,89,85,106,63,79,78,61,45,25,70,39,61,44,28,3,15,38,16,4,24,36,48,24,30,25,18,48,81,67,69,73,89,105,95,109,89,128,143,181,206,175,151,150,171,159,173,150,145,155,174,151,131,130,160,150,156,146,147,150,188,169,159,127,157,174,176,150,172,150,155,175,160,182,169,126,105,96,78,132,158,144,175,182,154,145,180,190,173,168,133,164,163,170,197,175,174,159,170,154,158,144,166,169,165,139,151,172,174,153,173,178,172,152,189,184,152,179,183,178,166,182,166,171,150,177,163,158,163,173,182,186,193,205,180,184,174,191,166,164,163,144,159,156,116,21,1,24,4,17,28,12,16,8,34,11,25,0,0,4,14,15,7,20,9,11,14,8,11,33,45,38,17,36,23,31,31,24,29,17,29,34,27,50,43,27,43,26,14,39,53,25,30,20,25,23,12,28,47,20,26,9,30,35,18,23,35,38,10,13,14,37,42,36,49,42,40,49,13,32,24,45,63,23,43,31,1,28,3,2,12,25,25,33,41,32,56,40,23,6,21,11,39,23,34,54,24,42,48,78,121,156,215,193,216,214,206,215,171,202,199,194,196,187,153,134,149,106,136,105,94,107,121,124,108,138,130,160,127,129,105,98,92,95,79,80,103,92,105,99,90,56,102,113,123,80,47,75,85,82,73,73,71,75,111,101,92,127,119,125,129,123,118,149,138,134,109,119,121,110,124,166,139,183,134,143,156,138,136,124,106,147,127,179,152,93,108,88,103,114,125,107,170,137,110,115,144,164,150,129,136,119,94,94,110,141,156,154,144,163,147,128,149,157,114,108,106,96,84,108,111,117,84,105,103,110,72,103,78,75,84,85,94,97,84,104,135,130,117,94,100,91,107,111,97,99,99,131,121,107,107,114,104,87,60,93,62,81,102,110,103,99,67,92,94,59,109,115,79,114,81,81,90,107,95,98,94,118,111,91,97,66,89,59,68,95,76,67,61,52,78,64,73,40,64,77,89,47,65,71,60,42,60,40,62,67,78,111,142,159,146,182,156,129,113,130,121,192,191,172,178,215,193,188,162,141,152,151,100,59,39,21,9,11,19,31,30,52,70,53,63,75,53,70,46,42,43,48,58,37,43,71,65,44,74,69,33,64,51,35,65,59,61,46,39,71,37,38,22,57,42,40,26,40,51,61,26,38,52,26,17,30,61,55,38,49,39,18,39,29,18,36,27,33,58,55,22,35,55,33,47,44,51,11,27,27,34,33,33,45,31,41,61,38,41,45,51,30,52,41,31,41,14,68,37,30,11,42,28,31,38,27,39,33,6,46,37,26,33,25,13,33,32,38,33,37,36,35,36,3,15,13,30,13,21,14,28,18,24,33,21,25,0,0,33,46,27,41,16,48,17,14,20,11,10,17,20,28,38,9,51,37,20,36,28,11,57,4,42,32,46,38,10,15,11,33,21,19,5,16,44,34,38,36,13,48,31,0,35,20,34,29,32,16,40,33,31,27,41,23,27,23,34,11,7,42,22,21,46,77,67,102,86,112,118,164,243,154,71,11,54,38,31,47,46,58,44,39,51,52,41,41,50,58,40,32,3,16,36,18,18,32,37,32,35,14,36,38,56,20,30,27,8,46,41,56,32,44,17,16,11,46,1,32,18,2,25,16,24,36,34,15,37,25,53,71,57,55,72,83,94,74,79,77,68,96,96,121,161,184,168,175,142,140,150,154,125,149,147,137,153,140,138,163,169,169,171,155,159,187,152,154,137,171,178,158,162,156,167,158,192,171,153,182,138,178,140,127,96,115,107,137,148,192,179,164,155,158,145,141,144,157,196,146,158,179,193,176,145,138,133,131,144,139,158,147,153,153,177,177,148,160,162,166,158,161,129,177,161,169,178,144,142,168,147,158,150,144,159,176,187,195,190,204,196,195,182,184,183,180,171,160,150,183,112,31,1,31,5,0,22,0,35,24,28,19,13,5,15,14,20,11,28,24,10,15,10,4,10,40,24,20,27,20,28,35,21,32,35,34,51,35,62,72,69,62,21,50,28,6,4,20,35,25,16,8,23,35,27,30,18,22,26,31,23,24,39,37,27,30,28,64,48,37,55,38,47,22,19,38,21,2,33,32,27,40,30,21,23,38,14,35,16,31,45,24,32,29,25,14,18,14,24,28,39,30,57,61,142,102,158,108,157,148,165,154,168,217,141,176,153,153,147,121,111,111,104,91,75,105,94,126,151,145,145,90,112,126,101,125,128,110,90,122,127,104,92,86,93,114,99,77,99,103,115,94,91,66,88,106,92,97,119,68,110,118,141,113,90,149,129,143,116,138,127,141,119,119,135,120,129,132,120,113,93,90,85,79,89,109,79,67,101,111,125,112,121,128,143,132,141,116,92,129,120,109,130,142,116,121,134,122,115,105,115,121,118,116,111,101,115,115,111,95,95,108,86,87,98,84,65,119,67,68,93,97,69,91,99,94,144,93,80,106,94,104,113,123,83,103,82,112,89,123,124,108,102,121,108,102,101,99,105,97,66,102,93,96,107,103,76,85,80,95,108,77,96,98,72,99,81,72,98,122,78,96,77,89,78,87,59,90,78,74,78,82,84,64,79,81,66,51,61,55,37,58,77,78,43,82,62,51,63,71,63,36,63,52,88,144,176,190,168,193,139,102,94,135,107,104,133,152,179,202,182,187,249,250,243,227,175,140,74,41,29,6,16,14,15,43,55,50,48,40,70,59,52,53,56,42,67,44,70,30,55,46,42,51,56,34,40,61,21,28,64,28,51,61,49,64,49,42,48,44,44,41,30,64,41,32,39,29,33,33,40,43,19,49,42,15,52,25,34,36,59,47,26,33,22,26,16,34,29,28,23,39,32,22,28,52,32,40,13,17,20,33,46,34,44,28,26,32,37,32,19,27,37,26,10,38,27,9,28,38,34,27,9,9,6,30,39,23,7,17,20,20,48,27,30,18,25,35,47,33,20,19,7,31,13,20,20,27,44,4,44,30,34,20,29,24,24,18,26,32,28,24,26,32,12,22,17,8,31,9,33,2,20,39,24,24,46,31,20,52,36,16,17,42,25,31,43,16,19,38,17,18,19,30,31,18,14,40,23,43,25,31,46,47,37,16,37,34,26,30,18,21,31,56,39,28,62,58,74,67,73,109,126,71,39,43,42,32,90,104,88,86,115,117,83,67,95,100,86,61,30,20,37,48,29,67,55,51,23,34,26,7,13,29,27,31,20,23,13,12,3,22,28,20,28,9,15,28,56,31,38,26,18,22,22,25,44,23,27,68,68,75,71,82,79,97,83,72,75,101,65,99,97,179,168,183,161,176,179,138,162,137,168,170,132,131,150,166,148,160,159,164,149,190,175,147,130,149,150,160,139,162,118,158,160,148,149,162,166,152,158,185,164,153,156,100,104,102,152,168,159,178,144,153,154,161,160,166,148,169,166,154,139,127,143,141,138,131,117,132,146,123,158,159,150,146,157,116,147,166,152,183,142,159,154,166,179,150,125,149,162,165,189,154,180,163,185,159,199,194,208,213,194,214,190,154,149,196,157,144,123,11,0,15,44,6,8,8,31,1,16,18,13,3,0,11,10,1,3,14,16,43,12,20,11,40,21,27,14,27,6,42,12,25,35,33,60,65,40,62,79,62,31,28,19,7,11,33,38,7,52,28,58,25,7,40,45,32,19,78,11,34,51,44,50,18,34,48,15,47,25,0,40,28,37,25,19,40,32,31,22,19,31,26,20,11,31,26,30,21,24,7,34,10,14,25,23,44,39,38,34,56,95,47,67,70,62,51,81,37,39,72,76,54,71,62,29,70,72,97,101,102,108,88,108,92,94,94,84,94,81,81,76,62,96,109,104,92,112,118,120,105,105,64,83,139,112,125,77,111,116,140,113,109,123,125,95,119,93,86,105,97,77,56,60,91,111,91,60,81,91,96,101,98,88,52,80,78,82,76,72,71,83,92,75,81,59,81,86,115,105,92,96,118,107,105,129,87,61,89,76,74,69,68,102,111,104,78,74,80,58,69,70,63,86,54,58,84,67,81,80,90,76,100,73,87,106,74,69,85,97,67,112,87,109,109,130,116,129,114,119,90,85,100,77,100,112,87,94,113,123,106,88,114,118,108,97,118,97,131,125,128,89,86,98,119,79,87,80,89,112,82,75,92,86,112,92,74,122,107,91,96,109,88,101,109,72,93,73,83,82,74,95,69,67,77,82,65,56,85,45,75,67,56,58,82,62,48,84,60,65,53,54,54,67,71,118,128,135,128,98,122,154,157,145,97,49,89,116,163,149,163,227,249,255,254,254,243,247,210,160,122,51,16,1,19,24,10,31,43,56,53,43,54,62,86,68,48,53,53,41,38,57,49,69,18,57,38,39,48,42,32,50,54,52,50,60,27,42,34,20,26,41,49,38,27,22,38,41,27,38,54,27,21,29,45,30,35,44,35,54,33,38,32,23,37,11,27,20,24,51,44,19,9,38,31,21,24,39,18,18,33,51,28,37,45,42,37,38,33,10,21,51,25,4,34,31,23,37,5,8,19,53,43,33,38,30,27,48,47,22,47,2,21,21,17,40,25,21,24,19,21,19,11,39,21,43,34,46,9,14,28,14,32,10,24,21,35,18,37,33,33,21,20,19,30,29,15,19,10,36,25,33,4,39,5,18,42,25,5,0,27,28,26,14,2,1,26,53,39,57,39,51,22,31,40,30,51,31,26,26,37,22,20,42,13,33,3,32,1,25,39,15,47,25,44,51,25,54,10,48,41,57,42,12,27,26,74,139,132,146,146,166,156,141,108,126,167,144,138,87,47,15,40,54,39,35,14,21,18,23,32,46,13,33,32,19,30,16,6,12,33,13,8,15,15,19,10,18,19,15,19,13,6,69,27,61,30,41,57,24,77,52,74,99,63,77,113,112,70,97,110,184,199,191,160,158,148,157,148,171,183,165,167,172,172,177,158,160,171,165,166,151,164,154,170,133,153,165,169,158,145,166,146,147,168,156,189,156,161,154,169,148,148,144,122,124,68,88,147,182,177,180,152,155,166,158,148,182,164,152,159,161,133,153,153,149,147,147,129,158,169,151,156,147,150,164,156,156,164,152,170,157,177,160,153,151,151,138,149,156,167,178,162,178,196,168,168,195,186,180,209,200,191,181,193,151,172,183,192,105,16,7,5,13,16,2,32,20,2,2,10,21,25,16,18,2,25,17,15,7,6,14,18,22,18,28,20,33,28,23,4,45,18,30,31,41,52,87,59,58,59,30,57,49,25,13,12,15,19,31,45,44,38,42,25,58,54,29,32,33,14,20,28,28,35,10,13,44,21,19,32,28,20,27,25,30,35,20,24,10,32,12,27,10,17,23,17,15,55,12,33,40,25,24,35,22,49,51,37,40,41,34,44,52,68,62,72,37,41,46,27,68,85,68,74,106,98,124,105,128,111,91,138,115,93,101,94,61,87,61,70,67,110,87,122,128,152,165,148,151,152,139,125,84,97,116,154,111,112,140,110,98,134,99,82,62,75,79,85,54,66,80,80,59,63,63,51,52,75,57,75,112,76,54,42,78,66,92,81,86,74,105,92,85,57,81,87,80,88,109,85,68,81,98,75,97,127,97,85,67,68,57,91,86,65,76,85,93,64,82,88,88,71,106,100,82,77,73,98,102,117,112,105,95,87,99,95,116,116,127,102,79,108,97,119,130,109,98,116,113,91,77,90,126,94,74,105,101,95,130,138,151,135,99,90,98,94,104,90,90,87,97,120,110,131,96,117,114,113,116,104,106,104,100,97,88,88,102,106,123,97,88,106,100,88,81,76,85,77,74,101,75,87,99,57,49,53,82,81,45,65,68,87,76,87,77,89,53,54,66,57,61,42,52,24,39,39,28,34,83,99,155,199,115,75,62,93,84,84,95,95,142,236,252,254,250,243,226,253,251,245,214,168,134,116,43,3,18,27,11,11,36,59,53,48,44,55,44,71,38,56,50,51,58,54,54,46,56,26,57,32,61,44,60,24,24,45,20,50,30,53,36,32,55,29,33,45,43,32,45,10,52,41,32,69,5,33,48,20,36,31,56,10,34,53,37,42,26,48,11,15,24,56,30,38,50,52,33,22,49,22,17,49,11,41,20,39,48,26,27,53,43,49,20,15,7,7,47,37,58,27,19,43,19,33,11,2,34,13,40,41,48,23,22,43,24,32,33,16,15,6,24,26,34,5,22,31,18,51,17,14,49,11,2,43,14,23,7,39,6,24,17,8,37,31,15,37,29,16,34,28,31,28,16,11,24,25,22,45,32,42,24,29,14,26,26,21,27,12,47,36,30,32,42,23,41,25,28,42,17,22,39,40,41,47,8,23,40,14,6,34,28,17,14,53,26,32,40,37,15,38,9,46,22,24,56,61,88,159,135,133,163,129,146,137,125,147,151,148,109,17,16,33,7,39,11,53,24,1,13,15,3,6,15,25,18,12,36,33,26,30,15,6,64,28,9,27,41,46,26,39,27,42,50,41,34,47,39,52,60,67,95,41,70,81,86,70,77,79,120,177,151,181,173,171,168,161,138,173,171,161,176,161,164,156,163,158,171,178,151,141,165,161,161,159,159,166,162,165,161,169,196,166,177,172,170,165,159,171,170,183,138,154,154,144,117,94,75,116,125,180,184,190,175,143,161,158,166,133,143,184,135,134,176,157,170,172,162,179,159,158,172,159,132,141,162,161,159,163,170,146,189,168,155,180,181,182,163,131,184,151,170,182,152,180,180,188,182,162,199,202,177,184,182,174,154,189,157,177,107,2,1,18,13,13,3,31,11,35,28,5,11,11,14,1,12,10,38,9,40,0,28,3,22,32,2,11,27,29,23,31,10,11,32,56,34,37,50,45,65,31,35,29,16,26,27,44,11,31,51,32,34,34,22,39,43,34,6,34,39,16,22,23,36,30,15,34,24,41,30,26,47,36,36,39,33,54,32,17,8,24,29,21,21,20,9,22,9,17,35,18,8,32,26,22,30,16,45,52,38,42,46,37,41,78,35,53,49,52,34,77,96,72,79,93,111,122,123,80,98,101,98,98,103,102,69,75,66,74,90,102,92,90,99,96,101,110,97,101,68,63,83,86,53,70,104,98,119,93,120,112,113,82,71,81,79,95,75,64,78,77,96,109,79,82,102,81,77,84,96,67,81,84,95,113,117,107,100,112,104,112,106,119,81,88,82,104,100,94,117,125,78,115,108,92,92,105,117,107,92,118,106,106,103,84,113,82,104,106,99,117,84,90,105,132,87,102,116,118,103,142,104,94,95,109,100,110,111,108,110,118,111,132,86,121,116,121,108,94,122,126,115,118,97,97,131,106,127,141,125,117,135,123,142,132,95,127,111,80,106,127,108,112,135,116,116,122,122,99,136,95,120,102,112,91,105,104,119,96,99,95,95,102,86,116,115,100,112,100,92,96,115,86,79,84,86,77,51,54,60,95,77,66,69,80,71,68,79,50,38,35,29,59,64,50,43,40,35,41,18,52,68,93,107,85,108,87,70,46,51,38,39,114,150,176,220,239,246,250,251,236,237,218,249,244,210,167,127,76,79,37,23,3,8,42,59,55,36,63,29,50,46,73,55,67,39,33,51,55,50,39,45,9,44,60,49,15,33,59,57,31,35,40,52,27,44,47,39,42,48,47,37,38,48,44,30,31,47,46,31,11,52,37,39,21,35,1,48,43,51,53,20,49,41,14,45,36,42,18,14,44,36,43,34,27,32,25,40,13,37,7,35,18,20,28,54,27,20,16,26,15,8,25,27,37,33,25,30,33,22,21,24,25,19,19,41,13,12,31,10,40,11,32,18,18,16,9,37,30,21,13,17,29,35,17,27,32,17,9,33,31,25,34,18,7,17,22,28,13,60,8,22,49,30,35,39,1,49,55,62,28,26,20,42,42,48,40,36,39,34,28,27,9,19,36,25,40,21,28,31,27,59,16,27,23,24,13,23,13,28,30,32,37,23,30,26,36,48,45,26,11,67,51,35,23,33,51,68,127,108,134,131,127,129,117,101,117,149,167,120,30,9,20,28,20,40,37,30,21,38,19,13,1,17,42,8,26,16,35,9,7,15,7,18,3,7,21,14,27,12,18,22,26,58,35,24,52,50,61,71,72,43,73,94,70,91,85,73,116,142,183,179,162,142,162,143,136,140,139,168,177,165,216,179,162,179,164,162,176,155,173,134,138,153,150,140,176,178,166,198,167,147,157,146,174,171,153,168,163,141,151,172,151,175,194,154,140,97,88,85,131,152,183,180,202,178,163,171,170,144,169,164,148,159,143,141,151,155,145,131,159,171,186,156,172,163,139,158,163,160,152,155,178,154,153,157,157,163,163,143,166,125,162,163,165,195,147,199,160,209,169,169,165,152,156,180,157,150,167,122,20,12,25,20,13,8,40,23,11,16,18,15,29,35,0,32,43,13,7,23,9,5,26,5,39,14,25,39,14,18,19,39,17,25,44,41,27,62,55,46,45,75,43,23,16,26,29,17,34,25,4,10,23,25,37,25,31,52,34,4,10,20,26,21,45,38,31,22,11,47,38,26,43,31,9,42,25,21,26,35,10,21,6,14,24,10,15,9,22,9,19,24,19,50,11,41,36,28,41,35,45,41,19,49,23,37,39,35,45,46,29,30,55,44,45,36,51,41,48,37,26,32,34,56,52,25,26,15,44,27,43,67,62,59,37,45,21,27,31,34,54,29,70,39,68,62,54,81,93,95,78,80,95,86,78,102,105,78,102,110,99,121,140,137,114,117,122,72,118,94,108,112,131,101,118,117,118,129,103,108,123,101,131,91,92,127,119,112,85,113,104,116,98,87,87,105,119,119,113,102,124,123,120,134,117,122,111,112,119,126,103,104,122,125,97,107,97,124,95,124,120,92,103,93,101,101,83,113,124,132,98,143,111,126,120,104,153,126,101,111,127,107,130,123,111,111,118,140,104,107,107,132,123,117,126,117,111,114,118,105,127,104,109,113,117,102,122,110,111,109,115,96,120,97,91,115,103,104,117,95,109,112,96,119,87,110,132,116,109,94,102,82,75,99,66,65,89,67,54,65,65,65,58,89,87,64,81,61,59,65,62,52,59,51,34,45,63,49,70,40,52,54,76,80,87,107,105,73,63,80,44,62,77,110,115,121,147,190,207,233,244,250,249,247,255,243,255,255,238,190,145,135,83,45,11,4,57,3,18,26,24,39,33,29,55,21,40,39,28,29,53,64,43,60,48,47,34,37,28,25,39,40,51,58,30,47,55,37,46,37,47,39,37,31,51,42,36,30,38,28,30,36,22,17,53,23,19,17,22,64,52,20,18,35,25,22,14,36,21,33,23,25,25,15,14,19,25,29,44,16,23,35,20,17,30,33,32,10,15,33,33,24,31,9,15,27,39,40,19,18,36,40,33,19,6,18,0,19,31,23,48,18,35,30,6,5,14,30,5,59,26,25,35,11,32,17,26,18,25,25,33,11,30,31,14,16,4,12,40,32,12,33,9,24,21,28,5,29,69,34,25,21,22,8,18,46,26,27,11,10,29,27,52,22,46,32,14,23,28,13,26,30,18,32,10,43,27,29,38,36,10,32,24,40,56,54,20,42,10,25,26,47,49,25,45,30,47,78,77,82,106,104,122,127,116,101,117,135,154,119,24,11,31,18,33,37,17,24,14,13,9,15,19,39,24,11,28,13,13,35,34,33,7,24,26,51,28,39,15,32,30,50,61,33,35,58,54,66,66,52,84,49,75,83,101,99,103,121,151,168,172,177,157,137,146,136,134,168,154,162,177,150,192,197,157,175,167,167,134,152,139,157,171,159,169,158,156,168,168,153,155,150,157,178,137,179,176,162,188,157,165,145,172,162,176,161,150,138,110,64,75,117,178,190,164,188,164,157,164,163,160,161,135,156,134,144,153,184,162,162,173,133,164,114,145,159,168,140,140,152,169,151,159,133,151,150,187,187,153,196,155,171,123,169,140,164,167,149,170,173,143,165,164,157,169,162,167,149,195,138,7,19,4,11,39,20,10,3,0,5,10,22,41,0,11,22,18,27,0,35,19,8,37,19,24,32,13,28,33,33,30,50,44,21,39,30,41,27,30,40,31,35,24,20,22,19,24,26,22,15,23,24,11,12,5,18,31,21,26,22,36,23,45,15,24,14,15,16,30,21,22,14,15,42,13,18,8,27,10,7,11,26,6,9,16,18,15,17,26,3,16,3,20,37,42,58,49,44,54,74,58,45,50,81,72,39,58,29,60,51,29,22,45,54,33,20,44,54,50,20,43,37,30,62,43,43,32,40,34,55,78,51,67,61,117,73,80,99,81,79,73,80,92,64,81,104,87,90,94,101,98,110,81,121,101,94,76,96,119,91,119,112,116,131,114,122,125,129,102,132,135,104,107,144,131,111,115,90,84,96,132,136,126,126,116,109,115,122,86,148,130,110,82,115,114,111,110,106,138,124,110,107,114,124,120,147,94,131,103,139,126,101,115,126,131,105,116,115,124,121,123,117,114,142,135,105,109,114,100,112,130,98,126,110,115,122,132,138,122,130,99,80,104,106,93,113,132,110,120,109,117,110,96,144,141,122,131,144,106,126,107,104,109,103,112,142,125,115,115,114,140,108,132,116,78,100,88,103,99,106,104,102,125,71,101,107,90,89,78,88,118,82,70,55,90,94,94,104,100,57,97,87,92,58,68,57,41,81,57,63,60,60,67,58,58,50,83,96,89,58,68,77,76,66,47,30,91,86,117,105,114,102,142,99,112,102,96,81,117,110,160,162,214,234,254,248,243,252,254,252,254,255,251,214,156,146,115,83,51,35,14,20,35,10,42,32,53,44,48,64,27,60,38,33,62,27,53,43,19,14,15,37,22,45,18,53,82,42,29,15,25,41,36,14,31,26,37,30,34,22,40,53,34,43,32,30,25,56,48,19,27,57,21,11,25,37,33,22,53,44,18,17,29,34,36,42,4,10,8,5,27,7,43,29,12,29,43,11,24,24,7,49,4,11,20,32,22,28,33,55,27,17,17,39,52,29,7,12,30,42,39,16,28,11,32,11,34,11,23,18,11,15,18,23,29,13,30,29,13,10,7,23,25,27,30,32,22,6,9,20,10,16,8,24,29,16,19,23,15,31,37,35,32,20,34,49,31,25,9,22,18,38,26,12,47,32,31,13,31,39,38,34,20,31,50,10,14,27,63,8,33,14,56,30,62,44,29,28,37,25,26,35,46,40,23,41,40,61,78,82,98,67,103,87,92,99,110,101,107,95,30,27,13,10,51,23,19,19,28,19,43,27,21,16,22,25,26,35,23,1,17,10,34,10,0,17,30,9,29,45,33,34,21,53,48,45,39,47,82,73,45,53,93,88,82,119,99,165,161,161,161,162,166,177,159,152,149,160,146,169,149,170,176,167,149,163,170,154,131,156,154,148,156,183,154,141,169,183,171,147,163,139,150,162,154,156,142,153,160,151,143,155,149,157,173,192,191,188,137,129,82,100,118,158,161,172,175,196,160,166,175,179,136,153,165,152,171,182,174,162,153,162,163,178,185,175,158,169,146,144,145,141,159,176,153,146,153,169,161,151,140,154,141,178,149,145,159,163,137,167,150,158,167,172,161,142,170,178,176,109,29,0,37,2,13,27,9,9,4,6,12,8,14,9,14,46,25,9,9,25,10,28,19,4,35,25,32,32,37,45,14,69,25,41,29,51,49,46,21,47,20,58,31,39,42,40,32,29,29,28,19,19,11,42,25,13,38,33,48,25,41,15,31,40,32,18,33,28,25,14,27,12,31,15,13,39,28,25,10,0,14,15,12,8,27,11,15,15,25,9,23,14,9,21,36,44,69,72,59,104,46,62,72,84,60,51,70,60,67,64,58,64,44,60,56,47,32,16,39,39,38,32,64,40,76,58,69,47,56,42,45,25,29,88,82,132,96,103,123,96,131,127,151,121,132,115,129,95,107,112,128,123,104,100,108,94,137,117,104,108,78,109,109,108,106,124,115,109,105,109,97,119,109,113,115,155,123,127,125,125,134,125,105,103,112,121,114,117,125,112,125,93,85,121,119,96,110,115,124,146,119,117,107,111,138,136,126,142,143,135,104,127,108,103,127,128,141,98,137,124,111,122,103,116,150,136,121,108,110,138,121,125,95,132,99,119,149,144,144,115,121,127,108,100,94,125,132,131,129,123,111,116,109,121,98,131,102,114,104,113,126,133,107,112,125,105,127,128,121,131,107,86,71,112,95,125,96,96,96,122,113,83,71,101,86,86,97,73,71,77,83,111,97,107,97,87,119,65,60,41,50,99,57,67,85,93,90,78,71,58,46,55,51,45,72,43,66,18,73,61,81,78,96,43,30,77,105,100,81,97,100,118,116,106,138,106,114,103,82,79,75,82,131,138,163,210,211,234,251,255,248,240,249,254,254,239,205,222,143,128,104,56,38,16,1,26,13,28,18,32,34,51,53,49,47,34,55,46,53,28,50,33,26,59,15,49,54,38,47,48,3,47,47,15,30,20,34,32,16,48,29,26,38,13,42,15,13,33,7,40,40,61,33,15,4,35,26,44,42,47,19,37,31,11,48,18,28,32,49,8,20,12,35,16,22,18,40,28,42,21,40,41,27,15,4,12,30,14,9,15,15,35,35,46,29,46,2,26,29,8,20,15,39,34,30,14,14,16,20,6,19,34,17,14,9,18,24,29,1,20,37,24,44,28,15,4,41,39,28,38,31,15,27,26,11,34,37,49,6,30,16,16,56,40,24,10,22,13,24,35,50,27,45,27,22,25,17,16,45,63,52,43,21,44,11,36,23,31,16,25,13,20,22,62,48,26,40,21,30,32,42,63,51,38,45,12,43,47,66,118,105,96,105,115,101,98,121,84,138,139,39,36,30,31,36,27,21,1,25,52,37,29,29,32,20,14,21,18,31,31,30,23,10,35,31,6,21,40,44,37,23,34,49,55,64,57,42,97,81,86,76,82,100,81,95,103,155,170,163,174,153,142,168,169,192,160,181,178,135,156,154,177,145,150,144,165,143,186,176,159,160,179,162,161,166,154,180,156,158,162,173,154,168,164,131,164,168,162,155,148,151,146,153,150,163,165,188,200,167,157,138,99,113,109,138,154,184,185,175,164,175,160,141,186,146,180,168,167,168,178,168,143,188,183,189,175,197,168,163,175,129,160,132,158,175,168,151,157,144,154,148,157,149,139,160,148,164,164,160,147,169,165,149,168,162,163,175,155,164,102,4,23,26,21,13,11,27,24,15,8,28,25,26,0,2,11,9,2,4,26,9,10,22,7,70,39,26,34,14,19,11,19,31,28,13,13,5,14,6,21,24,40,15,36,8,28,6,14,13,41,31,39,38,25,53,19,27,27,31,16,25,13,37,36,18,22,14,3,18,5,38,27,20,30,20,21,11,23,12,14,23,10,11,26,19,25,7,22,25,17,13,18,4,38,46,48,44,72,45,54,43,49,42,59,48,40,56,57,70,51,64,58,46,67,62,53,56,36,40,54,35,56,61,85,89,81,75,60,69,78,47,60,43,57,46,95,103,113,81,78,87,116,116,118,128,131,135,117,116,111,126,106,103,114,111,106,98,136,103,107,108,107,118,120,94,115,103,98,102,103,124,128,111,133,150,121,139,136,101,99,132,117,145,137,118,108,126,121,84,118,116,125,146,145,122,128,125,115,120,97,99,124,92,113,101,132,142,105,108,102,110,96,122,93,112,112,124,105,122,107,112,125,128,123,142,148,132,100,133,139,121,116,122,100,133,125,156,134,125,121,139,104,129,143,122,139,135,148,123,104,119,104,144,94,85,106,125,142,101,129,133,127,111,115,140,106,116,132,101,145,132,138,119,116,134,128,124,121,118,105,106,80,76,61,81,101,85,76,82,77,73,100,91,121,100,85,81,72,68,60,62,55,62,68,74,55,57,59,56,87,97,67,57,62,71,54,44,59,86,49,75,83,75,72,57,41,76,61,65,82,62,84,96,73,74,129,161,113,132,113,105,37,55,42,64,81,132,138,172,162,194,234,243,245,253,249,248,243,215,239,252,225,188,150,173,112,51,31,9,8,7,42,22,45,35,46,49,45,46,59,42,23,32,36,39,36,37,40,50,52,41,23,24,47,30,39,38,41,45,21,33,56,48,41,14,28,24,37,28,27,43,31,39,29,28,29,18,17,24,34,44,34,14,10,19,6,26,36,20,13,60,34,20,29,31,17,39,22,19,30,47,19,21,14,51,8,19,26,11,35,26,13,12,15,32,34,37,40,22,9,9,17,31,7,7,18,27,7,30,21,29,18,11,22,37,22,15,35,29,10,1,9,11,22,27,17,21,2,17,9,44,22,25,5,20,8,13,45,12,5,9,16,22,25,49,25,41,33,55,21,25,35,33,22,36,43,26,30,66,76,59,33,18,28,26,40,41,28,34,54,48,41,27,29,38,35,53,40,49,25,43,33,49,28,31,32,39,38,78,111,157,129,158,138,100,102,140,144,154,143,70,41,6,35,20,23,15,10,8,40,21,8,37,44,12,27,19,20,38,34,31,10,9,32,30,26,20,16,36,25,38,47,49,54,59,75,60,62,102,86,93,74,95,95,80,149,150,176,154,143,156,155,182,164,153,153,155,174,185,167,161,158,182,132,160,155,167,179,155,170,152,200,155,151,177,150,196,162,123,161,175,185,171,146,152,155,152,132,142,146,162,160,154,150,173,147,175,179,181,181,192,140,112,78,109,108,160,190,184,185,178,142,186,174,178,146,157,167,170,166,161,191,132,168,155,145,176,175,161,154,159,145,166,140,152,156,163,157,159,150,164,158,152,178,160,155,158,161,152,179,158,170,169,171,182,170,141,159,142,111,8,6,10,10,31,17,31,4,24,4,29,30,14,1,22,28,17,15,2,9,10,22,10,34,31,47,17,24,51,21,7,32,18,21,33,45,22,9,18,17,6,29,22,27,8,3,33,21,0,12,9,28,44,20,18,6,16,35,28,12,35,28,12,32,30,8,4,29,11,10,23,12,6,12,36,31,13,30,20,23,28,34,48,21,14,16,20,16,13,38,28,19,27,45,6,65,62,57,66,76,49,59,61,55,78,66,83,70,55,53,85,95,104,84,77,65,82,61,58,44,69,58,53,99,93,74,121,81,106,106,89,75,51,89,71,70,90,70,87,65,73,110,69,99,103,87,129,109,128,140,140,114,125,111,126,140,123,115,116,116,119,84,128,130,119,124,101,151,111,130,120,129,145,116,150,110,113,127,119,107,121,164,120,105,117,108,124,126,118,132,104,146,112,110,108,136,117,103,134,118,135,104,119,118,123,124,64,95,105,127,136,135,122,106,122,124,102,102,131,136,123,148,130,122,103,131,137,123,117,148,128,112,117,136,141,109,92,114,123,107,139,119,133,111,117,135,119,103,116,113,111,114,86,94,105,119,123,112,122,94,117,116,116,110,105,121,115,119,127,149,128,118,131,121,134,148,124,159,145,139,133,85,100,82,84,112,87,84,82,74,76,58,79,120,93,101,78,64,94,69,111,85,62,68,89,96,106,88,74,99,74,74,62,68,39,48,69,55,80,68,62,52,74,56,50,54,46,49,58,53,64,79,60,65,100,125,150,150,179,168,154,150,124,87,97,111,82,115,99,89,125,124,177,166,195,196,215,201,228,248,242,251,247,238,252,241,243,194,166,102,67,114,5,19,24,25,30,33,35,29,15,41,18,31,11,48,35,49,13,39,50,23,31,33,39,36,30,35,31,24,13,24,42,34,9,17,50,49,24,50,30,30,33,30,33,25,16,16,12,28,33,33,42,33,10,51,9,27,8,47,32,23,43,45,12,25,22,23,20,49,19,14,41,44,23,25,14,12,23,23,23,22,14,11,19,12,14,33,23,29,38,28,8,5,26,31,24,10,22,15,23,3,18,20,5,22,28,17,11,25,15,28,10,13,23,17,14,42,33,0,43,31,30,9,33,14,26,7,29,8,22,33,31,14,21,23,33,43,14,14,28,20,23,48,61,31,43,35,23,47,29,17,41,14,22,30,10,51,35,22,29,33,28,42,39,61,45,44,42,52,24,36,49,44,44,35,34,42,80,118,140,134,122,95,85,112,115,122,146,139,63,36,3,12,26,27,8,17,29,31,40,11,45,41,15,31,26,25,28,12,11,11,15,23,39,20,39,31,60,49,29,50,56,42,81,60,51,52,67,78,76,109,104,107,144,183,152,167,166,156,154,180,174,179,163,164,183,167,159,170,147,172,148,166,147,153,159,145,149,154,155,163,157,160,164,155,167,151,148,135,153,151,176,161,146,168,150,165,162,174,194,164,151,149,167,143,152,146,181,162,160,146,138,102,87,91,127,164,185,206,147,167,155,168,150,156,145,175,147,157,156,166,146,150,138,150,150,153,148,155,179,161,161,174,180,171,167,160,185,135,167,180,140,173,174,167,187,200,199,146,178,145,156,141,178,171,185,174,170,124,1,8,6,3,13,15,13,21,25,28,12,16,9,8,3,18,5,4,10,13,7,6,3,7,27,14,15,18,25,32,17,9,9,30,16,24,26,45,10,33,1,7,18,27,15,33,32,31,25,20,15,16,30,8,46,21,11,0,25,27,38,40,7,11,31,31,4,35,19,21,24,23,3,26,15,26,24,22,29,9,15,1,27,29,18,10,36,12,46,26,35,10,2,37,35,49,23,55,72,71,68,87,63,102,71,66,80,102,109,150,100,79,110,102,107,70,74,38,67,39,48,64,65,117,113,105,97,106,117,121,104,97,106,84,101,95,85,131,133,94,91,95,87,88,130,113,100,111,134,137,137,126,135,143,132,116,129,120,132,136,153,121,128,114,117,149,132,140,156,131,122,139,128,135,130,144,146,140,144,134,128,97,141,126,132,128,121,109,91,102,117,118,134,121,132,128,117,135,133,128,95,97,129,133,126,118,124,139,138,111,128,104,130,121,147,157,104,130,132,127,112,125,118,102,112,129,149,129,137,114,104,132,120,119,145,111,132,121,131,129,126,114,140,144,133,127,107,108,99,118,144,133,130,132,154,145,131,132,128,135,171,130,148,141,119,112,140,118,104,104,105,123,116,140,144,153,126,140,150,109,119,121,110,133,122,122,58,82,89,86,95,92,102,116,98,100,126,96,89,110,107,100,92,107,84,98,101,111,102,70,65,63,73,78,46,39,68,66,74,60,47,34,65,48,34,40,60,26,37,53,55,102,64,81,97,122,137,154,172,162,173,194,195,184,164,135,125,129,92,101,73,73,92,63,105,132,154,157,179,226,225,244,235,243,252,255,246,237,233,234,241,233,210,144,116,70,66,19,5,5,41,34,45,35,34,33,33,30,47,28,28,33,27,35,38,31,36,57,32,38,52,26,22,30,15,40,35,31,11,7,42,36,19,25,22,27,7,44,35,37,16,27,20,32,12,47,29,34,32,38,39,22,28,7,27,33,39,13,21,23,10,15,15,18,33,26,13,23,18,26,39,29,12,41,32,34,11,4,18,37,14,20,13,17,12,29,30,52,8,6,29,15,27,45,44,12,31,34,7,36,26,15,41,37,2,24,6,11,29,45,41,9,21,31,24,25,18,17,3,11,30,42,32,4,24,24,25,41,22,22,23,30,13,40,37,27,13,23,37,39,23,26,33,17,20,50,18,21,43,51,10,31,6,50,24,17,57,47,40,33,36,44,32,17,50,16,10,56,57,71,61,59,55,37,36,41,55,66,55,76,28,40,9,33,21,15,12,8,26,34,21,10,41,29,20,7,17,45,6,47,24,9,20,25,49,21,36,26,42,17,50,37,63,40,51,70,62,72,80,96,94,122,108,141,173,149,162,151,174,171,165,155,163,176,163,141,157,153,148,165,147,140,149,126,112,165,175,166,182,161,154,148,157,168,173,169,158,145,160,138,164,154,160,168,162,175,179,167,198,173,169,155,136,117,117,151,150,158,158,174,154,179,163,134,132,92,85,102,169,178,174,162,172,160,159,154,159,168,118,163,153,152,157,154,139,165,157,175,176,167,162,173,169,145,147,173,183,154,153,165,139,152,181,184,140,158,164,166,167,141,148,171,163,157,149,171,170,168,159,102,22,10,3,20,3,11,31,10,7,3,13,7,20,11,14,10,10,12,4,5,19,5,1,3,10,43,14,15,12,4,38,24,19,16,42,5,24,22,24,2,22,49,8,18,26,10,17,10,24,22,20,33,8,40,11,15,15,9,13,11,22,34,16,13,42,14,24,16,9,10,27,12,8,9,17,19,26,12,18,10,18,46,36,11,20,22,25,11,28,19,10,20,32,41,24,25,37,31,50,36,33,36,31,45,53,65,55,76,57,82,85,68,81,99,58,40,70,12,43,45,35,66,52,47,82,60,47,75,42,63,61,80,53,65,72,51,57,42,76,63,56,32,27,45,82,85,90,50,64,78,73,113,93,97,98,115,83,112,125,118,100,123,116,128,125,138,125,136,113,125,152,125,121,112,118,121,161,123,125,119,122,98,98,135,97,111,137,139,124,123,101,137,137,134,138,138,128,134,136,113,126,133,135,118,122,116,129,144,140,147,127,135,139,128,123,136,140,131,144,128,119,150,128,128,128,141,137,157,128,117,120,128,145,131,127,114,126,96,117,132,128,126,141,115,121,146,129,148,145,146,147,125,128,146,103,132,131,135,135,121,136,126,127,124,124,136,112,101,115,125,123,127,108,129,121,121,115,128,113,73,108,87,132,117,91,96,104,120,109,127,125,117,99,116,140,88,114,128,91,85,96,74,61,98,88,76,61,77,73,49,35,57,78,101,77,75,79,66,92,45,67,58,26,51,30,45,62,72,60,36,29,60,113,88,95,109,112,70,102,116,69,115,91,80,71,68,132,111,92,103,92,91,116,43,51,69,89,58,123,132,131,153,149,180,194,228,237,238,247,239,252,249,249,244,240,216,179,190,131,98,113,48,13,20,10,20,24,29,20,24,16,40,49,33,18,27,16,38,40,40,21,51,16,20,43,9,37,15,29,9,11,7,21,35,33,29,18,6,43,16,47,38,38,15,31,27,53,39,18,25,35,19,11,23,29,17,22,20,32,31,8,44,22,11,18,24,29,26,44,32,17,20,6,30,20,23,15,40,30,23,13,25,27,31,23,38,19,19,16,30,14,16,40,18,12,26,29,22,15,16,37,55,17,13,65,17,41,52,35,9,30,25,19,29,3,27,46,16,27,33,18,42,31,22,22,7,20,23,2,2,26,25,25,30,32,44,36,17,49,47,51,19,46,34,23,37,42,33,35,36,34,31,24,20,22,20,48,46,39,24,21,41,30,32,59,36,21,45,31,54,41,38,18,47,34,32,28,38,72,69,30,41,32,30,17,12,16,29,28,16,40,21,22,11,44,31,26,34,10,23,7,12,34,22,37,24,14,26,54,25,25,57,31,41,71,76,50,79,79,69,112,93,155,194,169,173,151,163,180,163,148,144,162,161,168,138,158,156,149,141,178,150,172,163,173,144,180,166,153,158,175,166,149,147,156,152,173,153,157,137,178,173,151,169,149,163,142,186,168,178,158,166,162,146,151,152,162,165,197,144,167,166,142,171,150,106,66,66,131,145,152,152,178,159,152,138,172,158,164,144,152,172,165,180,168,165,160,157,158,183,154,169,148,167,133,162,167,164,166,149,174,154,187,170,166,172,160,166,173,138,155,164,170,165,183,152,153,184,190,116,3,7,11,3,18,6,18,18,2,26,19,2,27,1,17,4,28,22,14,24,27,23,4,4,30,31,16,43,14,27,43,15,19,14,20,20,10,29,9,16,45,14,25,12,23,13,20,12,11,16,8,45,45,36,19,17,13,45,10,10,31,28,35,13,31,20,2,32,32,36,11,12,2,37,4,4,43,23,17,21,7,38,30,19,14,27,35,29,20,19,31,8,33,28,22,16,52,3,4,22,20,44,15,30,22,17,47,53,33,36,21,33,35,22,35,32,32,47,40,69,36,35,42,48,67,53,57,66,27,36,61,48,47,63,31,60,61,65,34,23,36,46,45,58,56,75,68,42,75,45,67,78,46,76,68,112,103,125,130,129,112,111,114,103,107,119,127,138,114,129,124,128,128,119,134,118,116,108,105,126,100,92,109,118,139,108,147,132,117,136,144,124,151,127,151,129,128,119,132,129,123,130,121,122,138,166,154,116,129,135,132,102,126,133,124,129,145,145,136,129,139,138,132,128,123,133,148,116,144,125,121,122,141,152,118,134,129,134,125,132,119,136,144,130,135,146,126,159,149,130,145,130,147,112,134,133,127,118,128,110,137,111,129,130,117,94,136,108,133,139,140,118,124,123,109,102,115,103,118,100,114,113,116,94,107,125,115,126,115,102,128,118,99,109,66,107,86,88,102,82,68,75,67,90,113,68,77,91,74,64,56,59,43,87,40,93,60,89,62,64,54,67,83,84,63,52,56,42,20,24,53,70,77,73,61,36,52,58,56,48,40,49,62,53,55,61,86,89,108,87,128,158,129,112,88,66,46,22,48,14,30,72,81,85,85,129,122,169,200,185,239,253,254,244,251,234,255,242,250,252,243,195,175,148,110,88,65,44,25,19,28,66,30,39,26,57,23,28,47,26,34,11,21,34,33,42,44,44,32,53,32,32,27,33,29,47,13,34,28,27,31,54,22,2,35,20,31,24,19,50,13,9,7,30,30,24,14,29,41,17,5,15,7,38,4,38,16,1,24,19,14,28,11,1,8,6,27,19,27,23,37,7,43,12,21,31,5,45,51,24,17,32,28,5,11,24,29,6,5,2,7,5,9,35,39,31,38,8,21,27,9,10,9,8,24,13,13,3,26,22,26,9,24,1,11,8,16,30,22,10,24,9,30,20,18,25,29,45,42,67,67,18,39,53,20,7,41,25,49,21,22,46,45,42,27,49,17,57,30,24,49,43,62,25,45,43,39,56,44,59,53,68,21,19,22,60,17,44,28,26,42,37,46,7,29,31,14,52,20,41,8,16,14,21,12,30,33,3,14,35,3,23,34,12,44,46,21,18,34,30,66,55,48,62,72,82,95,85,87,90,115,171,183,186,163,170,150,182,162,152,161,143,174,154,163,159,174,176,158,168,144,146,177,177,174,165,147,159,159,178,148,186,158,163,168,157,177,173,163,178,163,162,158,164,152,163,141,152,142,177,159,165,137,155,153,137,153,187,157,180,172,162,181,210,173,163,106,90,77,106,130,168,162,171,168,161,173,148,165,187,163,174,151,174,164,167,162,183,167,159,172,165,160,177,155,165,166,158,155,158,163,151,161,152,163,170,146,159,176,180,162,153,161,152,156,147,189,143,180,106,9,6,3,0,24,1,11,13,29,26,29,29,12,17,20,5,7,11,18,21,3,20,16,23,60,14,25,23,41,15,37,11,39,15,26,31,10,4,24,20,20,11,27,13,0,48,7,24,42,29,21,29,8,34,14,46,23,9,28,15,7,48,29,10,13,40,10,11,17,44,21,41,33,18,24,20,52,51,34,4,33,5,23,15,41,27,21,38,34,10,19,29,28,27,15,33,19,17,9,4,33,11,19,37,40,51,36,36,38,31,30,34,56,32,54,67,53,24,46,49,68,49,45,64,48,21,58,58,64,31,59,61,34,67,72,62,64,69,74,53,44,54,54,44,105,77,94,82,84,102,89,89,88,136,134,147,129,114,133,142,106,129,152,136,108,112,115,110,108,120,102,105,148,103,139,135,137,116,144,134,138,114,126,154,153,116,141,138,115,135,115,122,119,117,116,120,130,115,101,118,122,125,121,134,104,134,144,133,132,130,132,133,111,120,124,139,124,144,138,128,150,130,150,114,141,148,136,137,140,135,119,136,140,121,138,130,152,130,130,129,123,166,121,157,130,135,120,143,173,136,130,138,119,150,126,119,138,143,132,152,137,168,122,118,146,146,134,127,130,148,143,140,123,126,131,116,139,109,141,120,137,124,140,106,115,125,123,121,92,100,115,104,111,116,86,82,85,79,84,109,79,88,85,83,93,89,75,95,81,101,85,59,75,68,64,52,67,61,79,61,88,67,76,79,68,41,58,71,66,74,60,60,30,48,66,90,55,59,50,64,40,51,72,35,60,57,45,77,95,113,129,121,146,141,141,132,80,59,84,85,91,75,57,57,53,70,72,98,112,104,146,159,219,213,223,230,247,213,242,239,226,243,235,220,229,211,196,190,145,121,105,94,57,29,21,25,11,61,37,45,39,43,32,13,36,44,19,30,11,30,28,17,15,47,13,38,33,23,21,9,42,33,20,15,14,25,42,25,31,19,17,14,56,43,20,37,25,9,2,22,22,18,32,24,3,29,2,46,20,2,54,20,51,43,26,14,17,13,11,6,33,38,21,37,12,0,29,11,18,33,28,34,29,29,38,20,29,16,20,39,35,22,39,21,36,9,34,18,12,8,19,35,31,27,36,14,19,16,3,18,20,42,44,22,17,13,8,15,7,24,22,3,6,30,17,46,37,52,42,32,36,54,39,40,52,55,53,19,32,23,27,42,9,26,48,40,57,30,10,18,49,37,40,28,36,29,39,50,38,35,49,34,44,29,25,15,17,33,31,29,72,25,40,51,29,12,15,8,39,20,23,20,13,6,50,33,16,44,24,13,30,22,36,5,53,19,44,33,40,40,55,52,59,63,94,91,91,86,75,108,157,179,178,147,169,169,166,134,161,156,170,147,152,153,186,140,173,191,178,180,172,155,157,183,137,151,154,169,132,164,143,168,169,169,186,184,163,160,160,154,164,170,162,166,162,159,151,144,161,157,160,162,163,176,150,137,127,154,164,167,171,161,162,153,155,176,148,142,98,84,92,134,147,183,196,174,163,170,172,132,167,153,176,188,175,155,165,193,166,137,164,133,185,158,168,157,162,159,162,159,150,176,195,161,151,204,190,172,178,191,174,168,154,171,155,149,166,172,166,92,12,5,4,16,15,2,9,4,11,8,20,5,28,8,15,30,5,4,9,18,24,40,23,35,22,30,23,27,31,19,23,13,38,18,10,14,16,19,12,14,9,18,13,21,23,50,22,22,23,20,13,10,9,15,6,33,4,18,36,13,12,13,10,15,37,10,33,21,30,12,47,27,44,2,8,31,36,8,18,36,17,12,39,19,29,18,34,16,18,13,8,19,19,16,30,27,23,26,25,36,21,37,38,28,33,58,24,39,45,63,54,59,40,35,34,46,56,61,60,83,85,72,45,68,61,79,65,50,49,37,72,46,35,62,59,88,52,66,66,82,73,92,112,95,80,100,98,92,59,90,79,105,126,108,124,122,130,124,122,94,115,127,145,130,136,122,113,112,136,148,134,127,160,149,168,136,138,110,140,120,144,167,115,104,149,154,137,128,116,124,121,107,143,126,141,117,111,125,116,117,141,140,106,102,133,138,154,128,102,107,123,121,131,125,118,150,122,91,127,154,97,128,118,131,95,135,129,157,175,148,137,156,122,126,151,173,153,138,143,147,129,143,142,148,156,141,136,146,136,152,115,145,138,140,154,173,157,136,153,151,126,159,148,140,171,134,146,154,140,148,130,167,151,133,153,156,126,154,163,146,127,135,144,147,142,151,128,115,131,114,97,126,95,121,83,94,124,122,92,78,76,62,70,62,81,64,61,60,88,93,68,76,84,74,107,89,89,96,100,77,76,64,63,41,78,56,69,70,44,58,38,49,62,60,56,15,58,54,65,50,64,7,78,60,45,49,53,34,62,75,81,94,101,122,147,152,129,124,150,161,160,153,132,90,87,87,63,53,60,38,50,56,95,98,132,158,168,206,190,212,238,233,230,240,209,253,244,230,239,247,246,223,211,200,181,182,141,99,62,60,44,24,14,29,22,36,10,42,29,15,28,8,23,43,33,27,27,36,37,9,30,12,28,5,41,18,52,22,33,8,16,40,10,11,26,21,18,15,27,16,32,26,19,40,25,19,9,20,35,17,38,10,15,4,23,10,9,11,27,27,11,21,16,17,7,21,38,8,17,24,44,31,10,29,2,18,44,45,20,15,26,8,38,12,5,7,12,40,31,3,29,5,30,44,23,18,9,29,7,11,18,12,39,34,11,44,16,16,22,3,32,13,7,14,14,38,38,22,37,44,22,29,19,34,51,20,6,26,37,50,48,18,13,60,56,54,39,50,55,63,49,68,20,29,35,32,44,43,43,42,41,23,55,26,30,19,0,36,29,49,28,16,32,15,47,5,43,21,15,14,16,17,11,10,26,32,51,12,36,39,29,23,20,23,31,44,39,44,51,24,41,70,66,64,61,85,93,74,100,166,149,173,160,167,168,146,169,173,151,171,156,158,149,163,166,145,161,166,143,153,146,151,156,161,169,152,122,164,173,147,146,151,152,159,153,163,152,179,165,160,150,167,152,153,157,184,162,193,160,155,155,128,155,168,147,185,136,125,141,157,157,157,135,157,178,187,156,142,128,108,88,95,100,144,150,169,180,168,161,138,158,156,168,164,150,160,154,157,170,177,165,148,151,153,168,180,156,162,148,160,152,163,168,168,151,174,167,181,170,149,174,161,178,169,161,177,177,150,161,86,4,29,2,3,22,16,56,16,13,31,11,26,1,0,11,4,5,17,49,17,6,10,2,3,27,35,18,21,21,39,17,13,30,18,11,12,6,23,12,10,9,17,25,24,6,26,30,18,20,23,33,4,21,28,29,46,23,8,37,19,32,31,39,22,31,52,16,27,22,14,12,37,25,22,14,37,15,18,10,21,31,6,24,14,8,11,12,27,31,25,25,19,34,11,9,41,37,27,24,25,47,38,41,31,34,32,50,73,44,38,66,56,47,55,59,57,75,50,62,67,85,70,48,55,94,42,62,47,81,61,48,49,24,59,72,44,46,39,59,10,40,46,95,92,82,62,81,75,57,64,94,86,96,96,126,106,106,108,101,87,94,131,108,126,127,148,115,114,122,141,130,88,110,111,121,110,152,147,113,125,119,144,141,128,145,110,120,152,129,133,132,139,142,139,110,127,118,130,123,134,144,120,123,109,150,156,137,148,123,134,136,147,143,161,154,112,132,123,135,150,134,147,145,128,127,165,136,140,124,121,130,159,155,173,138,132,141,132,130,164,140,157,127,150,156,141,135,162,134,120,165,145,152,136,103,143,148,116,147,134,123,146,141,144,175,183,172,156,176,167,145,144,145,127,132,144,133,143,144,142,120,117,123,127,110,117,111,116,108,116,141,116,106,90,74,105,98,101,96,81,91,71,61,59,46,75,62,74,50,70,37,65,105,82,80,79,81,92,74,69,65,95,78,76,29,66,50,70,54,31,38,53,54,62,59,56,68,64,58,35,50,59,45,63,52,41,77,63,71,73,84,85,82,132,83,133,67,111,151,150,143,157,137,129,101,139,98,98,92,37,36,52,71,46,38,59,56,87,99,130,167,130,154,179,209,204,216,216,216,228,238,227,230,241,235,223,229,225,204,177,186,164,149,144,63,69,17,22,18,39,29,22,29,0,21,3,44,10,48,20,12,8,40,28,22,23,2,30,41,26,12,61,12,34,22,21,34,23,23,37,51,11,21,28,38,12,22,26,11,11,32,29,13,13,8,37,33,37,28,20,30,22,1,28,21,19,26,30,25,24,28,55,10,5,5,20,24,13,6,21,9,12,27,32,5,4,20,32,37,53,16,35,20,22,25,39,8,25,14,19,40,8,17,33,5,3,36,16,6,13,26,4,34,15,49,27,35,23,37,52,21,56,22,55,19,24,21,46,15,36,42,36,30,62,67,69,51,42,35,41,27,56,54,45,48,29,44,38,59,16,54,39,45,31,53,56,29,22,34,27,15,24,39,14,13,6,27,20,26,34,22,28,32,43,31,30,23,28,3,8,37,36,22,13,15,41,25,11,40,30,38,62,62,60,98,102,97,129,150,129,150,160,142,135,163,154,163,135,173,168,164,156,157,131,139,132,177,144,136,144,150,117,165,126,149,149,146,140,166,145,164,183,141,131,146,113,145,152,145,139,172,154,165,154,158,156,152,167,159,171,165,168,144,165,169,159,141,159,140,134,163,133,175,156,167,177,157,152,160,166,125,86,82,136,148,155,161,188,147,143,132,152,143,137,173,173,157,161,160,181,132,174,175,135,176,169,160,141,146,166,148,152,161,201,161,158,176,148,162,154,171,150,166,164,154,155,182,164,147,127,19,16,6,15,13,22,11,19,22,25,27,45,6,8,12,6,22,14,7,10,22,0,0,33,14,40,7,9,12,23,22,9,26,32,17,8,48,45,2,24,34,24,17,27,23,11,32,6,2,20,38,12,13,4,14,20,24,27,4,8,5,5,34,14,6,4,27,27,24,26,3,20,9,22,10,5,21,20,18,31,6,7,8,13,6,48,21,11,3,4,36,13,38,48,40,14,25,16,36,33,39,32,17,14,26,30,34,32,32,22,27,39,35,41,58,8,19,35,33,34,8,21,42,37,45,56,43,28,83,68,59,56,67,28,34,38,34,25,21,25,25,14,52,56,71,86,93,63,77,66,86,108,138,111,87,97,75,101,102,122,71,96,105,118,129,139,141,150,118,136,136,128,136,88,82,114,153,126,108,132,121,127,120,147,133,143,140,147,147,115,121,107,131,136,133,144,152,145,172,166,135,137,140,169,178,139,136,153,168,162,171,155,156,157,142,149,142,151,166,138,141,123,139,95,116,126,120,118,123,145,123,121,102,89,110,140,137,121,131,134,120,121,122,98,121,132,123,147,123,156,106,116,106,109,126,103,114,108,95,95,114,117,91,113,139,139,136,119,130,147,123,124,139,114,140,131,134,105,124,104,90,116,115,112,103,122,114,113,110,118,122,78,100,73,71,120,118,105,83,75,78,79,34,60,63,104,85,53,89,63,94,101,95,80,71,30,60,84,55,82,93,62,63,71,52,75,45,64,58,69,57,59,51,33,62,64,69,69,82,76,54,59,74,77,105,59,78,79,80,74,78,55,110,87,90,96,70,82,79,82,67,94,86,101,125,150,151,121,126,120,134,108,84,77,57,28,55,27,38,28,28,43,52,78,95,108,140,153,166,161,179,191,212,198,170,200,213,182,214,203,218,241,240,244,213,174,172,141,111,106,94,83,110,41,3,18,39,22,19,10,35,12,19,24,11,14,26,22,38,49,50,36,22,38,10,30,23,39,23,24,46,12,43,30,26,31,14,1,25,17,31,40,40,29,28,30,14,29,31,8,18,18,27,23,15,7,10,18,31,1,48,32,49,24,36,23,32,14,26,8,20,4,50,41,31,18,20,26,19,14,9,24,29,7,22,46,32,42,37,14,20,9,21,20,9,42,25,12,8,25,31,21,10,40,27,43,34,20,35,48,23,24,28,25,45,32,31,20,47,29,55,32,33,78,60,37,39,49,17,28,51,24,18,74,46,62,37,58,57,51,45,49,40,29,38,43,34,46,28,22,32,33,46,36,13,31,13,24,17,9,39,55,5,18,25,5,10,17,3,29,35,32,34,7,47,39,40,25,26,41,43,25,69,83,91,104,158,186,160,142,152,165,171,150,167,155,144,137,109,157,160,166,156,176,154,169,159,187,177,159,172,139,150,147,161,163,151,155,161,166,158,134,150,150,172,143,151,152,147,174,142,154,130,151,157,179,139,154,156,143,163,149,168,168,139,163,159,172,162,142,161,147,152,163,170,160,186,146,189,166,168,97,103,77,122,124,187,153,177,153,145,175,158,141,172,145,167,163,126,161,149,176,164,152,172,141,152,132,154,167,161,170,133,149,162,140,157,168,160,151,175,160,130,152,166,156,165,182,157,103,23,13,28,8,16,11,0,5,22,20,34,26,11,14,7,22,2,7,17,37,31,22,0,6,34,7,15,34,27,39,20,2,11,28,21,12,7,21,31,33,21,19,18,11,28,37,31,6,16,30,29,17,14,25,13,37,12,26,24,1,6,24,4,20,28,14,36,39,10,10,25,23,3,0,44,23,3,31,45,22,18,24,16,27,28,10,13,0,33,23,22,19,2,19,39,26,31,35,33,32,39,7,15,39,16,22,25,48,31,9,16,33,37,46,6,26,16,32,27,33,39,37,33,50,32,43,51,84,71,61,45,51,59,37,67,58,56,41,52,56,37,51,69,50,67,53,96,87,78,111,106,77,81,67,55,61,88,121,119,102,107,112,116,119,127,87,112,98,146,154,167,117,150,141,92,142,126,118,105,114,116,124,78,93,129,121,124,127,112,141,146,133,120,121,147,103,127,129,130,127,129,130,111,95,104,105,113,129,102,103,106,99,130,100,111,139,139,147,123,113,136,121,132,125,121,135,136,114,128,138,106,66,89,53,63,134,138,121,156,146,108,109,96,123,124,116,140,121,126,130,135,109,115,143,115,125,103,82,90,90,101,105,106,128,111,72,56,89,95,69,96,105,100,95,126,128,122,118,107,106,91,119,118,122,127,109,109,96,99,105,105,101,104,114,99,131,123,81,99,78,115,80,101,70,87,102,75,117,112,105,101,124,95,58,72,34,64,69,88,80,90,64,69,59,101,54,79,63,54,50,36,49,39,28,30,53,59,56,38,62,53,67,53,53,68,57,60,46,51,55,49,63,50,64,43,48,42,47,52,51,33,22,57,87,115,142,126,84,112,147,108,129,115,90,126,115,109,89,41,86,52,59,62,71,30,75,58,57,72,85,78,110,124,123,129,134,166,180,189,192,190,212,214,201,230,216,213,198,195,205,232,193,151,116,116,109,103,47,34,19,52,15,29,32,10,39,27,10,26,5,31,33,32,16,50,2,26,41,11,32,18,21,21,16,15,15,22,23,17,16,14,37,10,11,33,25,17,10,19,12,19,36,5,18,15,21,31,44,30,7,26,25,19,13,24,22,19,15,15,6,26,17,50,5,23,7,18,25,41,16,29,15,36,10,27,8,33,38,18,18,35,34,37,14,2,55,27,50,7,9,16,12,28,22,25,8,31,23,20,12,36,28,55,20,48,22,38,38,35,31,35,26,30,47,51,53,52,32,22,14,22,42,35,13,39,43,36,23,36,33,41,11,60,22,55,32,37,40,39,53,48,20,9,30,51,27,14,28,46,24,39,31,33,6,7,14,12,38,17,22,24,22,20,46,22,26,50,26,15,45,65,27,76,103,108,154,170,167,210,157,178,172,171,179,181,163,161,170,172,176,144,165,165,143,173,150,163,160,179,161,147,154,167,182,159,142,143,149,146,140,151,146,152,143,187,171,144,165,190,171,162,191,162,166,161,167,170,146,148,149,164,171,157,139,151,129,161,178,172,140,160,155,153,154,141,180,173,172,155,188,170,137,147,86,62,81,129,164,154,199,185,147,154,158,180,147,169,164,153,143,147,144,145,125,160,153,162,149,165,169,154,146,156,172,170,191,173,151,162,164,173,145,158,154,154,149,136,160,186,110,28,0,12,7,7,38,42,19,11,16,49,14,9,4,0,0,10,13,19,12,45,11,24,1,29,23,2,14,28,13,4,11,14,36,4,22,16,13,14,22,5,12,22,37,12,6,13,20,6,1,19,17,21,19,35,20,4,33,21,3,25,33,48,30,41,19,16,10,12,14,7,19,8,16,40,24,26,16,19,9,1,5,11,19,21,12,29,24,3,29,19,16,28,20,10,29,45,29,39,18,40,42,14,17,7,18,31,36,15,27,40,39,30,30,22,9,43,43,33,30,41,32,26,43,45,32,37,71,66,105,88,42,71,42,38,53,43,69,56,19,41,17,29,43,50,32,29,36,57,46,55,48,44,56,75,89,115,122,168,109,143,120,133,128,117,137,129,112,142,124,133,146,162,151,145,137,135,133,121,133,146,110,105,120,117,131,134,166,128,146,151,127,120,116,93,102,108,133,134,107,111,113,92,92,86,105,95,66,73,91,101,57,74,94,113,119,112,128,139,113,121,120,105,119,145,149,144,114,163,139,100,98,121,127,157,156,146,150,154,102,142,119,148,98,108,128,99,117,117,123,142,153,157,143,149,158,139,142,133,137,119,119,142,124,132,134,125,110,99,116,132,127,102,150,153,162,135,109,117,138,99,139,131,146,120,110,94,106,121,108,79,110,105,129,131,106,82,77,74,72,100,118,95,95,112,96,113,105,138,146,107,72,75,55,86,59,60,50,73,89,80,75,86,73,62,77,62,36,70,42,49,52,41,82,47,40,56,37,85,65,55,65,54,64,39,49,54,64,57,67,50,58,60,40,73,40,63,41,39,39,26,22,48,65,58,61,80,64,54,110,104,71,112,126,110,114,105,138,96,95,87,131,94,71,60,71,53,47,59,25,27,34,24,83,41,76,101,91,104,121,146,132,124,161,169,169,184,177,205,216,218,207,169,201,185,189,174,158,188,149,112,100,120,96,72,44,49,21,40,30,16,42,1,40,5,15,31,15,17,19,19,18,22,17,27,15,15,12,17,28,31,25,5,24,11,1,47,12,7,1,20,37,27,29,43,35,35,27,8,34,33,38,6,17,25,16,21,28,39,36,15,7,9,34,9,11,33,15,15,43,31,19,32,32,18,37,22,21,23,26,43,30,8,26,51,11,23,34,67,16,10,13,20,2,46,36,4,29,30,44,6,10,37,31,36,56,10,24,35,23,36,19,44,59,51,58,32,40,19,36,17,45,26,34,57,54,36,50,64,35,49,21,29,28,47,33,43,45,26,18,11,9,30,15,25,26,2,20,8,18,35,24,18,33,14,8,30,27,32,24,14,19,17,29,28,16,17,47,35,43,52,36,59,67,128,152,163,156,178,193,143,165,157,157,152,155,155,160,129,177,129,154,148,136,160,148,141,145,160,147,179,166,159,168,167,167,167,145,154,165,175,163,156,176,162,175,181,158,167,176,148,153,129,150,145,132,167,172,157,166,150,138,145,175,172,168,171,127,136,161,163,131,163,150,148,182,181,163,179,176,185,155,162,127,97,77,111,126,144,175,160,174,172,147,181,154,170,169,160,165,152,139,168,155,149,159,165,166,157,159,179,161,158,133,158,134,170,152,157,192,151,170,170,145,159,129,175,177,156,112,0,0,31,15,15,0,22,9,2,8,3,12,26,1,20,9,25,12,30,21,18,1,35,26,10,9,15,27,12,14,9,29,12,46,22,26,20,12,21,10,4,7,38,0,40,7,8,24,13,21,38,27,15,31,14,23,29,14,0,22,35,12,22,6,15,35,17,16,33,4,37,27,8,13,14,21,17,29,5,8,14,26,16,16,32,21,10,20,6,12,33,17,53,17,26,18,26,31,30,18,26,21,41,32,38,39,13,22,12,17,27,19,22,28,19,22,33,19,35,35,28,33,39,49,29,41,30,38,13,55,49,61,56,72,58,71,37,33,66,27,33,21,38,43,55,39,52,58,26,72,55,39,42,71,76,78,124,144,111,124,130,137,158,152,133,144,126,115,108,117,124,149,128,148,154,169,157,157,173,165,130,133,113,153,114,125,152,135,140,135,131,117,120,96,108,112,129,102,123,123,120,100,119,145,135,146,122,108,118,100,114,110,114,128,118,128,103,102,95,123,104,133,136,108,151,134,128,144,155,101,155,154,138,182,171,135,148,139,133,120,118,129,146,111,118,136,151,120,124,117,116,142,149,174,172,124,152,144,120,142,135,128,151,149,147,135,162,161,142,142,123,157,169,118,137,112,131,132,129,136,119,145,142,111,109,123,107,133,101,86,97,116,111,91,101,80,66,94,81,121,74,91,108,109,98,105,88,99,93,94,100,68,73,87,70,76,46,64,33,43,50,52,44,70,71,60,67,51,59,57,65,46,62,45,66,67,56,68,65,59,49,98,97,68,75,18,45,81,59,55,37,71,68,31,31,21,27,58,47,42,55,71,61,67,56,45,52,52,75,73,78,76,81,103,56,72,102,100,102,100,117,126,113,103,107,91,102,80,122,87,78,47,65,54,60,47,47,51,72,36,72,61,37,70,131,126,112,161,152,153,210,189,172,181,172,153,187,205,191,175,176,163,192,172,134,170,162,179,159,78,80,89,113,52,47,29,44,51,27,35,29,13,37,30,10,53,25,43,36,36,24,4,19,15,22,11,36,27,25,49,16,15,21,14,26,47,17,31,25,28,11,17,14,29,14,0,26,15,8,5,12,27,4,41,25,46,12,6,39,7,3,22,10,3,33,22,26,23,13,17,9,10,29,33,38,23,36,40,4,16,42,13,9,21,30,36,29,40,24,21,26,18,17,39,28,52,40,23,37,37,28,35,62,29,66,44,55,60,22,44,36,40,57,49,62,51,56,44,55,39,30,33,38,43,48,37,31,51,36,45,25,42,21,1,12,23,16,14,24,11,11,44,14,11,17,36,14,5,33,17,10,34,27,24,39,30,50,20,22,50,33,39,49,99,83,109,125,170,191,169,155,160,157,164,156,169,168,133,145,148,152,140,151,151,145,167,154,144,149,135,168,177,161,172,180,184,178,190,149,148,194,170,159,165,193,177,151,171,135,165,164,163,158,155,148,193,153,182,149,159,165,180,168,150,156,158,152,156,145,160,168,162,151,172,156,161,166,136,158,147,157,178,148,136,143,116,102,118,128,172,167,173,161,164,148,160,174,173,143,152,133,133,156,163,153,142,154,136,157,146,142,128,158,136,150,174,155,167,185,158,155,140,156,186,145,176,179,147,148,107,18,1,3,5,22,25,27,8,3,27,17,14,8,12,11,7,4,2,6,10,9,2,2,11,9,24,29,5,13,41,14,30,14,3,22,9,12,0,8,17,37,11,27,2,23,9,22,20,7,0,17,25,8,43,19,27,12,33,7,14,1,0,14,3,6,33,26,19,32,3,14,11,10,24,18,9,44,32,18,22,18,20,15,11,21,11,19,33,18,9,27,17,10,43,14,13,35,48,32,35,43,15,68,15,29,28,28,21,12,24,27,19,33,35,18,20,26,24,27,38,32,9,22,25,54,37,30,61,54,50,36,70,58,46,65,44,71,49,62,45,35,55,62,58,44,51,43,69,60,61,48,49,30,41,102,93,121,120,110,94,98,68,122,111,142,112,125,118,118,121,112,99,124,119,129,122,110,146,117,138,136,150,118,160,132,130,134,147,127,146,127,132,117,147,115,144,110,101,110,134,110,136,120,159,128,152,147,152,162,164,168,185,178,170,146,101,113,139,97,121,121,125,135,135,133,139,145,128,156,113,142,128,156,164,122,134,130,136,133,126,128,139,137,136,165,132,128,140,141,104,153,145,125,142,151,159,177,132,124,132,116,138,119,153,143,164,137,135,153,145,178,154,129,150,127,118,139,106,140,147,120,158,88,143,109,131,109,114,126,142,131,113,84,101,111,98,117,100,119,105,83,96,108,131,89,96,109,114,82,78,83,98,87,79,69,92,78,73,56,53,56,62,64,62,98,69,38,72,39,84,42,61,44,66,68,49,54,63,57,65,68,79,64,51,49,64,38,57,70,51,41,30,62,50,50,51,38,58,68,49,47,46,28,43,46,53,62,74,88,75,75,63,55,74,71,73,92,103,83,106,93,116,94,73,104,116,121,116,122,122,141,135,115,111,93,100,87,76,57,58,38,24,40,20,35,39,37,36,71,79,66,111,148,138,166,165,139,165,152,162,185,205,183,180,181,205,238,214,185,175,188,155,146,155,170,183,155,127,121,95,63,78,31,47,55,68,69,21,46,38,36,38,17,12,30,24,19,32,26,15,23,34,5,3,12,20,16,13,19,8,21,11,35,25,31,15,16,33,10,25,20,38,16,10,18,31,23,53,38,42,35,11,11,14,2,20,23,18,20,8,43,11,46,15,31,35,27,21,20,8,32,28,18,44,18,29,36,16,5,2,8,25,19,34,27,38,28,34,28,32,28,28,34,57,45,45,49,43,63,63,65,58,45,54,44,52,37,58,42,44,32,48,28,18,21,17,18,58,49,34,9,29,31,3,13,7,22,38,29,28,4,29,19,11,26,12,10,11,26,12,31,24,4,19,26,24,28,67,39,61,59,53,60,75,52,92,117,136,158,167,169,161,182,170,192,175,167,140,139,149,177,167,142,175,124,178,167,171,156,168,170,175,152,173,133,169,166,159,176,172,178,128,159,150,167,133,166,166,159,175,171,151,172,161,160,162,178,174,142,146,153,156,180,143,150,170,162,155,177,192,134,156,157,154,167,146,151,147,157,145,148,144,165,142,151,116,133,104,92,111,136,159,166,151,145,167,149,170,146,150,146,164,144,150,161,151,167,168,157,147,162,146,146,176,171,148,174,151,147,173,147,167,154,172,163,177,185,165,187,135,0,4,2,9,20,11,29,18,13,25,26,5,23,19,6,1,1,4,27,30,16,16,19,29,7,18,7,16,6,6,17,13,21,24,26,20,18,7,28,18,13,26,15,12,12,10,3,17,23,13,14,35,15,41,14,18,19,24,53,25,12,9,35,18,3,2,25,25,18,3,23,10,17,20,15,8,27,44,17,4,32,40,2,7,7,37,11,13,19,16,53,32,33,29,8,22,31,36,48,20,31,31,27,29,11,22,44,35,27,13,41,33,54,37,44,13,26,36,23,41,28,30,11,62,50,60,28,30,38,60,50,33,24,44,73,48,67,86,76,82,93,65,38,74,83,56,59,73,80,69,66,47,64,102,99,127,129,113,123,90,109,82,90,93,99,119,117,100,100,127,114,85,113,76,85,94,111,87,124,127,128,120,137,144,119,139,102,145,137,126,119,137,150,147,147,141,141,129,126,111,165,143,115,116,129,147,116,138,156,150,142,156,130,140,131,118,139,149,149,163,144,161,173,169,144,154,143,135,135,148,145,131,134,157,127,125,146,140,141,140,128,123,145,155,162,105,136,110,109,122,143,127,101,140,95,124,147,135,132,133,127,144,159,142,127,138,128,140,132,126,136,139,147,122,101,120,131,107,120,116,141,140,123,134,141,132,126,128,110,127,93,122,106,98,86,80,108,119,99,118,109,124,117,136,95,115,72,96,73,85,87,70,77,97,96,45,64,48,45,51,54,84,39,62,63,43,68,43,55,72,71,59,68,52,55,61,61,58,44,54,74,44,40,41,56,54,50,41,60,54,57,47,47,62,43,82,49,61,84,59,73,43,50,69,45,37,55,54,62,67,76,79,105,93,77,113,121,108,113,90,86,62,82,71,73,113,88,93,125,122,112,147,92,108,115,130,98,92,106,118,75,69,60,52,42,44,58,42,24,30,53,25,31,46,86,92,84,80,107,113,112,149,121,145,167,177,153,208,186,189,178,175,162,210,198,202,173,156,127,140,177,151,141,129,168,167,108,54,85,115,104,69,49,79,27,35,26,41,10,35,12,18,35,5,35,16,5,14,15,10,11,28,22,12,20,11,25,8,17,8,6,8,12,11,29,22,19,27,10,20,28,22,34,15,45,31,4,18,11,39,25,17,22,31,10,19,25,25,1,23,25,35,50,22,21,41,12,16,31,28,36,6,63,15,21,40,27,48,29,7,26,20,23,66,30,25,57,46,49,55,77,60,76,40,50,31,46,51,30,20,33,33,14,35,33,48,32,33,19,31,13,42,19,29,17,7,17,27,16,7,27,25,12,37,23,28,38,13,22,13,2,31,27,25,33,25,37,34,41,65,68,71,79,80,84,95,90,129,125,146,132,148,170,149,145,168,161,154,158,172,177,169,152,155,145,151,157,162,139,136,136,145,145,155,121,151,171,150,164,161,139,159,146,163,150,168,145,169,150,172,149,166,182,134,147,157,146,164,166,142,156,143,141,175,166,155,182,157,171,158,144,144,162,158,184,154,154,160,161,166,184,171,143,172,146,155,158,127,90,97,98,140,177,174,178,169,156,181,145,181,168,150,158,167,150,137,147,158,164,174,155,141,142,177,163,156,177,163,135,152,156,141,157,171,176,173,140,154,163,104,21,15,11,6,35,21,12,45,11,29,26,28,45,19,20,7,12,20,28,5,17,14,23,15,6,30,49,13,14,16,23,34,23,12,6,16,6,16,11,25,7,36,36,14,5,54,35,20,18,26,30,16,25,9,45,11,32,32,21,20,24,10,31,12,27,24,20,4,16,16,27,15,39,18,18,8,24,33,3,8,27,37,48,32,20,22,35,9,19,8,23,20,23,28,40,58,49,46,20,20,37,14,23,42,38,39,23,58,39,43,41,52,54,53,47,35,65,51,61,30,75,58,63,33,52,74,49,42,64,72,60,89,49,91,50,70,83,87,82,81,81,59,55,73,91,113,99,116,110,123,110,92,109,140,138,145,156,133,141,114,138,110,116,98,111,124,118,134,114,86,156,101,93,106,114,144,125,128,113,142,100,94,89,110,111,132,126,124,110,127,135,96,137,112,119,106,103,118,111,135,143,120,128,119,100,117,118,118,111,117,117,87,110,110,70,101,159,120,140,157,111,130,137,151,140,132,142,137,108,146,95,126,128,121,133,151,156,131,123,141,144,149,124,114,123,130,109,116,134,130,113,145,129,114,110,124,120,144,108,133,113,129,107,149,141,138,123,131,109,127,137,114,127,100,125,145,117,107,113,110,106,112,126,142,133,124,86,114,79,92,124,144,144,88,119,91,105,110,92,73,107,139,128,116,75,102,68,85,95,97,95,68,57,90,63,74,71,32,74,63,77,73,69,95,65,46,79,60,53,56,65,52,75,82,59,85,85,57,50,58,90,53,47,69,39,62,59,66,68,54,60,43,38,45,54,44,66,58,46,51,67,60,36,43,48,47,42,42,54,64,56,44,64,59,49,47,32,31,36,51,62,41,20,62,67,68,54,72,53,58,90,79,79,66,95,99,85,98,85,110,109,121,104,83,54,58,87,61,48,43,42,16,36,52,48,35,25,55,42,16,37,30,57,78,91,121,105,117,135,130,112,136,107,155,136,144,103,130,166,157,142,165,147,140,132,125,110,142,123,155,154,138,157,128,121,100,95,110,104,74,74,65,42,48,55,30,40,24,31,36,18,31,33,34,10,5,22,21,11,27,34,4,30,29,10,30,20,27,4,10,5,15,36,37,30,42,10,20,32,20,14,19,12,26,46,23,36,22,31,26,33,9,14,68,46,30,10,2,34,25,11,27,6,31,37,13,33,44,31,56,23,47,53,34,66,38,60,65,13,37,72,75,70,46,44,41,61,48,49,45,40,53,25,40,36,31,30,27,36,6,16,11,12,14,10,25,4,7,39,29,27,21,29,5,30,40,15,13,19,40,2,40,25,42,35,51,47,42,45,70,82,80,76,89,111,79,74,111,104,118,145,141,152,136,160,124,156,162,184,147,155,146,158,161,149,164,153,169,158,150,145,157,174,164,121,173,134,150,120,149,148,151,164,138,146,143,171,177,163,133,151,154,154,158,164,178,131,170,157,162,178,155,165,186,174,155,177,161,158,143,141,142,166,172,158,163,177,181,171,170,152,180,173,157,187,160,149,149,148,93,117,98,130,152,175,172,166,169,156,162,168,145,162,157,143,144,168,137,169,165,126,131,164,161,160,137,169,139,177,154,149,148,157,163,175,149,173,166,160,110,10,8,23,15,30,10,0,28,34,17,16,14,13,4,34,6,19,15,11,10,9,7,13,24,4,32,10,25,19,22,30,19,38,15,28,17,34,19,27,33,20,15,56,28,23,17,10,37,20,20,35,12,22,20,40,24,41,8,30,0,2,9,37,4,39,11,11,23,5,19,20,14,11,25,13,22,55,43,37,11,20,26,30,19,37,33,33,27,18,23,11,15,47,42,47,46,23,39,19,58,37,43,70,49,56,36,57,80,48,68,52,62,46,50,53,63,83,58,40,67,49,25,57,56,31,52,30,29,32,46,60,49,60,62,38,48,67,33,48,63,68,52,29,57,62,49,43,74,81,115,111,142,85,121,100,88,80,118,121,120,129,122,110,115,122,97,115,122,119,147,109,122,114,115,127,120,108,114,115,116,128,113,95,106,116,110,118,98,80,96,118,106,116,120,107,86,106,110,127,123,137,144,121,99,113,103,104,130,85,109,106,123,100,121,97,119,126,105,118,95,81,101,119,120,91,116,109,121,138,129,107,106,120,130,148,132,131,110,148,126,118,117,114,123,141,141,155,112,115,144,128,164,155,136,108,147,140,129,136,119,130,106,143,137,123,127,136,130,158,130,138,121,106,125,101,109,114,101,109,104,112,133,104,135,107,109,124,100,116,105,116,106,103,87,123,107,100,89,92,77,107,90,106,134,75,98,86,77,103,90,90,84,68,65,55,69,90,101,64,58,65,71,67,58,46,50,58,46,49,48,35,46,71,41,61,31,79,44,47,94,71,28,48,28,51,56,56,49,47,57,60,42,52,55,47,66,44,66,67,59,26,58,54,48,43,60,45,66,61,62,39,52,57,45,56,45,67,34,38,59,57,43,27,33,47,50,45,56,47,72,41,64,56,72,56,71,64,76,93,100,93,77,72,86,86,77,113,71,89,111,76,74,68,61,48,51,48,59,17,33,27,44,66,36,33,33,40,63,29,13,50,69,71,89,71,67,86,117,123,89,100,139,139,87,109,100,149,130,128,149,160,120,144,139,136,119,151,128,142,114,143,144,143,137,136,145,130,63,83,72,59,79,68,37,54,25,44,62,31,55,32,38,43,51,51,29,42,20,40,22,13,3,27,24,3,0,13,31,16,40,17,12,31,18,21,9,2,30,9,12,7,33,34,30,7,20,12,5,20,28,38,35,34,52,13,31,41,50,57,34,29,37,32,57,56,33,36,57,39,45,58,44,75,70,54,42,22,71,68,61,42,34,45,24,36,22,57,28,18,44,45,36,18,5,5,51,15,17,13,22,13,19,21,41,37,25,35,20,3,27,23,30,60,19,56,61,62,88,45,35,86,79,81,89,93,72,70,89,112,91,132,122,140,90,139,138,138,153,161,142,163,159,138,149,156,160,149,159,156,154,148,142,179,172,170,164,168,158,157,147,164,166,178,176,147,112,176,124,140,152,157,163,174,146,156,159,151,140,160,166,164,150,155,119,132,152,150,155,139,141,150,165,146,171,161,169,181,174,151,160,160,143,156,169,181,160,132,163,159,133,129,96,96,126,153,171,193,172,170,160,166,167,175,174,150,133,160,154,143,158,149,146,153,139,171,161,164,158,159,158,172,168,153,146,178,144,138,147,154,108,0,17,26,2,11,6,9,4,8,11,27,14,11,8,19,10,19,10,14,11,20,8,15,21,26,32,27,42,23,6,15,1,15,9,25,6,27,8,14,3,10,13,25,32,21,22,15,12,24,12,40,18,37,12,13,18,13,1,7,22,26,9,29,22,39,27,36,20,20,24,40,20,27,36,3,6,13,12,13,38,20,8,12,36,8,25,11,10,9,33,27,21,32,38,18,33,48,49,14,20,58,64,55,71,56,31,42,27,44,31,20,25,57,54,35,38,52,38,44,35,34,33,20,16,35,21,45,27,31,48,39,36,32,23,42,61,41,39,50,43,30,32,50,45,33,11,32,56,35,65,85,96,95,90,78,73,87,93,102,125,115,114,102,81,79,75,97,103,109,147,142,96,127,115,137,104,92,119,121,106,124,109,118,120,128,122,115,127,129,125,120,140,126,128,139,134,122,144,134,116,104,136,134,130,101,122,101,120,136,132,128,105,127,138,126,102,135,123,132,114,118,103,104,123,108,109,105,122,112,131,110,130,128,161,104,156,123,116,121,114,125,112,96,148,130,144,113,105,133,143,165,131,117,140,140,164,140,155,128,119,128,117,147,115,123,134,100,127,127,121,121,99,127,155,101,158,124,118,110,114,70,118,75,85,122,112,108,121,133,84,117,105,74,93,104,100,94,110,112,90,79,78,105,107,118,126,108,85,105,125,99,72,67,60,39,103,75,58,54,58,75,61,72,50,76,76,33,78,30,36,51,76,42,45,71,54,39,62,52,61,47,30,43,43,58,62,67,75,45,52,53,71,30,62,35,55,34,75,28,55,35,37,75,37,40,35,63,61,58,48,73,52,57,41,47,74,66,56,67,70,67,72,28,42,54,45,77,61,74,63,55,66,49,62,53,42,53,85,34,59,85,52,68,94,69,53,86,85,86,66,29,60,73,89,128,139,134,84,88,88,72,90,39,51,55,53,33,29,30,18,63,46,28,46,56,64,54,51,64,54,33,66,84,69,90,75,127,90,105,124,89,102,134,120,144,141,131,137,123,138,129,125,108,129,112,128,124,157,135,117,141,140,130,125,129,114,118,91,105,118,91,98,75,95,73,51,60,69,40,37,39,65,42,36,35,14,31,22,48,33,17,21,25,21,22,18,22,43,9,3,18,3,31,5,44,58,28,10,19,22,28,32,31,17,28,39,32,32,38,48,50,43,51,54,35,44,43,50,33,16,47,36,20,65,61,41,32,54,54,48,62,10,30,31,34,23,58,18,23,19,21,22,16,0,10,25,31,45,1,28,26,24,27,34,40,44,15,55,48,58,50,44,33,75,61,42,57,83,76,72,57,47,53,76,56,49,85,60,81,78,86,108,132,149,169,155,154,160,140,138,177,147,147,158,184,127,94,121,160,153,150,169,151,174,158,152,161,180,177,159,170,147,165,118,131,143,144,156,155,140,141,141,151,141,143,140,159,162,151,157,167,160,148,156,171,174,153,155,142,130,149,140,141,139,162,133,143,171,147,149,172,152,155,188,171,154,146,158,175,149,158,120,117,96,111,149,145,185,183,171,170,154,129,156,149,161,154,179,171,139,164,193,156,159,137,184,176,156,160,168,169,169,164,144,172,161,169,163,177,99,24,0,14,3,20,24,40,20,35,11,32,14,23,7,14,4,16,19,5,14,16,12,19,19,22,6,31,6,12,38,16,16,8,28,7,35,42,37,28,23,51,30,18,34,22,15,50,19,20,26,0,6,28,22,24,3,15,32,1,19,13,19,19,9,15,25,25,2,8,24,20,5,7,43,18,7,24,12,12,20,35,30,9,12,30,31,6,8,18,28,9,48,24,31,46,37,63,46,40,61,65,63,66,58,65,55,55,21,20,27,51,46,40,46,35,44,32,22,37,34,45,19,27,25,41,27,59,68,33,39,14,40,48,64,37,53,36,29,78,54,22,33,53,24,44,31,43,46,60,25,78,60,91,111,92,95,92,106,100,128,95,94,102,95,116,120,107,144,117,109,122,118,107,144,142,132,121,124,127,131,122,121,124,120,119,102,124,122,127,127,130,136,137,118,118,137,117,112,126,107,111,112,103,100,114,94,86,127,121,128,108,118,139,138,115,105,117,102,124,119,134,145,142,113,133,131,114,148,115,134,130,123,126,140,129,123,147,151,136,124,150,119,123,134,129,125,135,119,121,117,143,160,155,172,177,160,144,150,130,161,147,116,127,134,141,106,94,113,120,142,130,137,113,126,147,133,104,131,134,139,124,123,150,96,111,120,111,93,88,107,73,112,94,93,117,97,83,71,73,73,102,79,87,106,110,86,107,106,104,104,91,108,78,72,85,55,49,62,77,74,60,59,51,73,75,95,48,56,34,59,47,45,70,48,79,53,72,71,37,55,81,42,51,55,26,58,54,67,66,58,76,65,63,38,72,48,50,29,55,65,80,53,76,29,37,48,57,51,58,47,62,74,44,33,66,92,57,57,61,67,79,68,69,50,50,50,62,71,64,50,59,63,68,40,62,62,44,59,68,76,61,38,52,39,44,27,51,45,45,60,25,55,62,103,106,86,94,115,95,135,82,98,97,72,72,82,73,53,38,66,56,23,45,49,57,50,19,42,25,49,28,35,57,31,67,58,68,66,50,50,54,40,99,89,101,100,63,90,101,97,127,86,77,122,109,110,124,123,112,122,132,108,144,112,144,96,159,169,150,153,95,103,146,86,91,132,97,161,110,157,112,125,151,141,100,88,112,91,79,83,45,66,58,55,45,50,41,36,60,21,9,39,23,21,24,24,11,6,31,50,22,24,5,42,12,51,32,10,34,43,25,39,50,42,42,51,51,71,18,69,80,52,50,50,62,61,85,68,58,64,32,25,29,18,42,8,26,14,23,19,46,9,15,33,26,12,39,48,23,45,44,30,28,28,34,41,38,48,69,62,61,25,47,53,73,52,61,66,56,63,59,78,82,86,77,75,61,78,53,82,122,97,116,154,191,197,167,127,149,121,113,143,189,144,156,146,146,155,147,157,154,158,156,143,132,153,159,157,137,151,122,152,146,136,135,156,150,167,169,169,151,174,151,142,150,172,147,139,172,158,159,140,124,143,149,144,148,154,157,142,168,198,176,155,155,140,149,166,147,156,149,146,152,117,172,150,163,172,154,152,159,141,107,101,95,91,108,157,171,170,149,133,130,147,135,171,160,123,158,155,153,173,180,173,168,172,147,179,145,131,171,164,162,149,156,169,168,140,167,122,6,3,11,3,30,20,32,9,3,13,14,12,14,0,5,12,5,9,4,0,23,3,9,27,15,26,24,31,19,31,1,27,17,5,33,36,7,40,26,11,0,28,6,20,38,9,23,28,15,15,14,25,5,21,27,29,24,14,47,22,33,7,24,12,29,21,26,14,5,5,6,16,23,26,15,2,8,40,19,15,25,25,27,45,1,35,13,18,35,6,34,10,35,24,16,49,26,55,33,37,41,17,41,43,42,29,28,48,18,44,51,44,14,16,26,39,21,29,44,28,9,48,35,5,34,31,26,24,29,23,8,39,15,29,43,17,30,43,17,31,39,22,40,24,1,29,53,31,18,42,65,56,72,83,88,94,116,120,122,124,102,133,89,114,127,113,123,87,95,94,121,102,121,110,82,108,91,131,102,98,129,104,119,107,120,120,115,105,135,92,110,121,119,107,116,119,112,113,123,97,133,114,129,133,127,108,114,97,105,96,106,121,92,101,115,107,93,99,121,114,108,123,107,122,114,118,138,126,124,119,117,122,101,73,87,121,108,98,112,113,99,152,144,142,131,130,109,146,118,102,94,132,143,152,148,125,108,141,127,137,118,98,110,121,135,112,143,163,137,160,136,124,137,94,134,116,120,129,136,150,123,136,132,126,121,92,122,102,99,87,94,90,95,111,98,109,80,87,68,52,90,84,75,71,64,103,109,102,87,96,83,96,103,104,97,59,87,112,63,96,72,72,29,44,62,89,72,26,60,73,68,72,82,48,64,66,58,39,81,45,48,54,58,50,76,68,61,58,31,47,60,53,57,55,41,31,49,69,55,45,56,47,42,32,57,75,51,76,42,43,12,56,44,53,46,32,64,56,55,83,33,56,64,59,56,39,37,33,52,73,34,60,55,71,48,47,48,29,61,46,43,51,62,50,40,57,17,40,56,57,57,61,66,67,51,45,72,43,35,85,68,84,64,65,79,86,94,95,94,75,86,65,89,93,87,71,77,73,82,54,43,45,46,27,44,36,34,50,21,47,40,33,37,32,44,32,46,50,50,47,68,64,61,71,58,59,80,64,91,97,109,109,116,90,99,125,130,139,117,121,145,123,109,102,139,123,105,135,146,155,125,122,153,145,146,144,144,132,167,152,109,154,135,124,112,104,123,48,93,99,73,83,44,37,66,10,15,32,30,53,54,46,24,7,34,11,4,24,22,51,28,1,24,38,64,44,51,41,22,37,57,81,37,47,45,41,76,55,47,35,11,44,33,9,32,31,23,41,27,30,38,50,55,25,22,36,54,67,42,68,62,65,73,66,44,59,51,51,48,31,63,70,42,44,55,20,46,50,69,61,70,75,104,67,78,80,74,74,80,84,142,129,150,149,171,177,160,162,116,165,134,145,157,138,174,160,143,185,171,170,157,157,179,165,143,125,169,157,128,126,145,158,168,169,152,138,162,164,138,152,153,161,161,152,145,157,188,161,157,163,187,158,182,153,139,148,185,150,170,164,174,194,156,161,160,173,147,139,164,165,165,162,152,141,162,175,165,182,154,136,149,143,171,120,123,99,81,143,163,170,170,151,152,185,174,178,166,176,158,162,157,155,147,144,163,138,177,130,133,134,159,140,176,155,150,155,160,143,160,94,8,5,24,6,15,2,11,8,25,6,28,25,24,3,40,33,28,25,19,8,15,1,14,36,18,5,47,34,9,30,11,34,23,22,4,5,13,20,5,15,17,17,17,41,23,0,34,39,16,22,25,31,7,12,41,10,12,16,38,23,18,43,31,22,22,25,24,7,12,15,5,18,31,14,28,3,22,13,25,16,18,32,45,40,27,4,18,30,25,11,36,19,30,6,8,22,52,26,21,15,14,21,32,33,17,36,47,3,15,46,22,6,48,38,25,31,14,5,31,30,21,53,30,32,27,16,24,33,39,22,42,9,35,55,33,31,45,36,71,55,60,50,29,26,30,23,37,22,44,36,51,75,75,96,97,82,83,120,92,118,118,125,109,119,107,91,107,99,115,114,123,118,97,133,101,96,74,73,71,139,97,104,111,84,101,109,120,128,103,127,111,105,101,129,97,117,103,119,130,105,132,128,131,132,124,109,102,105,106,116,129,146,138,79,111,127,106,102,123,101,79,115,107,131,135,109,146,103,115,100,109,101,90,69,106,79,94,94,114,120,116,123,85,134,93,125,109,121,106,115,123,103,101,100,117,119,104,133,113,127,147,117,142,113,134,151,131,137,139,137,125,86,105,90,114,119,115,125,95,134,103,110,120,109,95,122,105,121,114,94,112,100,94,103,104,104,107,98,110,62,91,64,92,80,72,52,95,82,81,52,85,73,99,98,107,109,115,102,96,108,84,92,103,88,96,95,110,118,88,65,50,52,46,60,63,62,57,73,34,92,25,31,39,49,63,67,57,64,69,42,83,61,80,58,32,54,77,46,62,56,44,45,34,56,58,59,57,39,58,67,58,53,56,36,30,42,43,29,92,60,25,35,21,40,36,52,46,29,42,17,58,49,54,30,34,47,43,48,26,13,40,54,61,61,24,59,61,48,43,67,41,35,38,56,44,61,58,66,38,67,34,47,54,59,70,59,68,62,63,60,74,51,103,107,123,87,97,82,59,75,63,52,65,55,45,43,60,56,53,13,64,47,37,36,31,21,34,26,45,39,48,48,42,41,45,32,51,42,42,73,44,55,56,68,61,75,89,75,96,82,86,86,98,98,120,105,116,111,117,136,112,110,101,109,147,114,107,124,140,139,113,114,144,147,154,155,147,150,124,146,160,157,140,145,166,177,171,190,210,211,238,183,176,176,159,198,95,138,67,50,33,58,94,41,7,33,20,41,25,28,52,42,56,47,46,41,38,70,47,46,26,36,15,20,50,34,30,27,40,30,45,51,60,43,41,54,55,74,31,49,59,23,15,40,49,21,32,41,41,18,20,36,41,44,62,56,31,50,82,68,59,94,83,75,70,77,48,62,59,58,85,89,108,149,144,167,164,149,145,150,165,146,136,149,147,168,154,152,151,186,184,157,189,179,150,142,170,136,131,129,179,171,157,152,157,160,165,171,183,158,159,177,167,160,175,146,182,140,152,166,172,157,173,177,145,143,142,185,152,184,186,165,178,162,164,166,175,155,160,135,162,183,158,192,149,179,163,157,156,153,178,177,187,161,172,117,103,90,124,143,161,172,164,153,157,172,155,148,158,161,144,176,193,181,162,148,183,161,148,144,180,160,158,162,163,144,180,162,156,125,1,14,1,28,5,12,9,26,6,16,4,24,26,6,21,24,1,0,42,8,29,10,22,11,36,26,61,29,48,49,40,33,54,31,59,43,49,36,24,27,21,15,13,10,26,11,11,40,14,28,28,28,32,19,28,29,39,25,10,27,8,37,9,10,19,22,52,1,19,19,19,0,23,17,20,33,11,20,24,16,17,19,3,28,7,26,5,42,37,21,43,14,27,35,39,34,46,15,37,25,33,42,24,40,74,9,21,13,6,48,20,29,29,19,34,10,16,29,31,26,14,36,2,30,23,34,21,19,36,30,35,44,50,29,24,3,24,46,44,25,69,28,71,80,86,53,42,54,48,64,77,62,102,114,77,88,59,79,76,73,78,118,116,94,105,109,122,94,111,119,92,98,108,109,118,92,120,103,105,99,107,95,111,89,107,101,94,129,103,116,120,115,112,107,112,131,138,122,128,96,104,107,108,108,108,117,123,118,94,112,108,131,123,128,108,123,140,125,110,96,102,132,95,114,97,133,132,118,108,125,118,113,91,103,92,92,102,90,78,70,92,103,78,97,118,97,119,112,116,102,117,81,75,73,112,117,86,121,123,126,120,121,134,110,163,128,94,126,102,96,99,132,125,110,104,124,133,97,121,121,95,80,95,85,101,98,106,107,114,123,86,112,97,99,106,106,120,110,105,128,103,83,76,84,73,87,37,72,58,59,77,60,88,94,78,81,107,79,98,95,105,121,95,115,101,107,89,96,80,61,87,101,61,55,68,47,83,46,63,51,58,29,83,60,72,67,52,44,49,44,45,62,65,29,41,66,56,67,45,56,64,57,61,40,46,67,65,75,65,55,61,49,21,15,53,68,52,43,38,57,29,49,51,61,35,44,53,59,58,66,21,64,59,80,60,50,38,39,66,26,37,54,68,50,62,59,63,73,58,74,33,29,50,37,22,57,21,23,36,56,35,67,51,21,44,47,49,28,60,41,74,69,66,47,65,65,71,63,82,53,52,86,73,70,58,66,77,83,69,67,74,52,43,36,51,71,70,72,46,33,57,23,11,70,22,34,41,24,53,42,53,30,27,36,33,39,30,44,49,39,45,41,64,80,73,77,93,57,70,101,97,64,102,102,106,113,79,129,117,141,129,124,140,134,148,119,163,184,167,154,138,150,190,209,228,237,238,245,250,247,235,240,241,238,255,255,251,233,244,231,234,247,251,227,177,156,105,15,20,10,9,41,33,50,32,64,35,29,61,34,18,34,39,24,34,48,55,24,25,21,10,36,52,56,75,58,78,49,30,48,39,35,50,27,67,46,35,36,48,42,37,29,46,58,27,39,42,60,38,54,61,60,42,46,44,52,68,37,66,70,50,74,106,127,132,135,149,145,188,164,160,170,151,147,171,175,160,157,166,156,170,165,177,169,128,128,150,135,172,147,165,190,160,144,159,169,162,148,165,164,165,143,147,171,165,149,133,166,142,172,151,160,156,163,184,163,164,165,156,149,168,155,163,169,173,146,179,168,164,159,141,160,174,150,158,129,174,165,164,174,170,161,153,163,164,178,113,86,96,103,137,161,179,163,138,150,156,131,118,132,149,143,170,166,155,172,193,156,137,142,167,176,157,181,166,155,141,152,159,110,1,0,12,7,7,19,9,29,12,18,14,10,15,7,28,6,7,11,28,11,3,17,43,19,57,78,79,89,61,124,84,113,122,111,124,128,90,48,50,52,26,34,23,21,24,9,48,21,19,16,2,28,24,14,25,9,18,21,16,23,19,11,18,0,18,26,18,25,17,23,6,7,18,15,42,36,10,6,41,9,31,14,15,21,9,17,17,30,27,30,27,12,5,23,40,33,20,27,50,12,48,46,34,47,53,47,47,7,19,38,41,20,55,45,19,12,4,11,21,6,11,25,20,17,27,47,11,34,17,41,12,50,28,44,55,20,30,43,45,32,41,51,72,51,29,37,70,50,62,52,65,62,78,68,60,63,67,77,77,83,90,110,89,57,77,56,98,112,115,103,99,101,110,94,129,113,107,132,108,140,125,105,100,105,132,123,117,133,117,121,125,129,123,117,138,114,106,96,114,90,112,112,103,136,100,119,102,128,92,143,104,131,119,127,123,131,117,156,114,135,116,141,106,116,135,119,115,121,129,133,104,109,153,130,104,107,54,86,92,136,111,117,141,112,112,94,114,123,134,121,113,110,120,107,138,128,106,112,119,135,146,137,151,138,126,121,80,118,101,103,116,104,108,107,145,145,107,103,115,112,113,88,134,101,113,108,105,87,115,110,80,86,114,106,102,95,86,93,95,93,84,78,87,89,72,94,81,83,92,84,64,78,68,45,44,66,58,58,77,48,57,58,84,66,67,62,71,98,63,71,36,56,68,53,61,69,35,73,68,54,55,51,49,67,55,69,77,69,53,68,53,50,35,39,58,57,48,73,76,70,50,52,52,52,34,44,52,45,50,35,44,37,47,41,31,32,38,27,57,67,39,53,58,48,66,51,49,57,57,40,37,35,54,49,44,64,52,44,32,22,60,49,46,53,30,57,62,45,54,46,29,50,45,35,32,50,22,56,28,55,54,63,37,21,28,49,70,56,57,53,52,58,22,56,39,75,63,27,47,60,48,48,56,45,76,62,41,66,65,70,62,81,64,70,70,69,79,69,72,53,63,71,54,66,45,30,94,75,48,34,69,28,30,41,43,51,55,52,45,38,20,30,40,34,54,36,55,44,37,45,55,64,45,53,79,95,68,78,84,102,59,115,135,112,128,135,124,131,148,170,180,187,238,250,251,239,242,249,248,213,255,249,253,246,250,248,241,246,252,252,255,251,250,241,243,247,230,187,77,38,3,9,22,8,26,67,43,57,42,48,43,41,48,18,45,35,51,61,36,41,27,39,35,36,44,22,37,18,37,31,47,11,39,31,32,27,37,20,45,16,42,22,38,33,32,48,60,33,37,69,42,44,83,46,69,51,57,51,62,40,59,59,76,86,80,107,139,184,143,154,183,159,148,156,160,179,171,156,150,154,169,178,178,168,133,136,167,154,140,154,160,161,142,140,185,171,164,183,174,186,149,170,152,166,171,164,174,151,145,181,156,126,158,169,162,184,155,152,156,137,141,138,158,150,154,157,174,157,174,149,150,147,168,146,150,146,165,172,147,165,153,166,157,158,128,163,121,144,104,62,113,91,140,132,174,170,154,146,140,112,153,153,146,145,168,166,156,159,151,146,151,161,156,153,172,163,168,155,136,129,26,2,17,20,2,21,25,7,1,33,29,10,7,13,28,9,7,9,32,17,30,10,24,2,153,154,183,182,175,169,182,153,152,146,147,136,129,102,64,51,23,38,47,12,7,17,19,37,8,38,20,10,1,25,16,29,20,4,1,12,34,16,26,17,16,25,36,23,8,32,40,18,21,34,17,22,13,8,10,10,17,7,2,16,3,20,19,0,2,31,13,33,29,22,25,32,21,16,23,8,73,21,59,44,34,32,50,0,26,47,44,23,52,36,11,34,22,24,37,20,2,29,21,28,39,16,16,50,13,20,21,40,49,23,50,37,54,36,58,64,48,33,52,53,52,30,18,45,33,38,49,57,65,59,65,48,62,60,80,58,95,73,90,57,84,85,110,104,99,86,112,118,133,117,96,116,121,130,113,126,134,110,135,135,128,116,86,102,106,117,143,123,139,100,128,120,117,136,121,122,135,103,133,132,103,106,110,98,85,106,119,136,125,112,82,152,123,133,102,125,123,111,124,123,126,111,110,118,94,127,105,122,135,148,143,135,108,134,143,135,145,138,118,130,117,124,113,124,114,118,108,112,142,140,101,94,109,132,83,95,95,110,116,112,109,129,132,95,102,113,97,103,114,98,116,119,114,98,122,113,126,97,92,98,112,116,87,71,82,107,83,84,96,83,82,91,83,100,104,94,88,108,83,86,60,94,85,98,100,103,94,78,94,80,67,99,79,107,76,67,61,27,58,29,53,54,66,82,84,62,67,60,55,59,45,55,90,66,51,64,38,63,43,56,90,40,60,46,55,55,71,48,63,44,36,59,40,57,43,63,47,28,36,55,67,50,56,71,47,42,49,37,49,47,72,46,42,50,42,51,51,34,60,53,6,44,39,38,36,56,55,55,38,63,53,34,50,67,34,62,39,51,56,29,42,33,24,41,66,46,45,69,49,65,51,51,39,31,60,56,46,53,32,37,51,59,49,64,57,44,77,63,48,41,32,58,43,58,49,68,48,60,8,48,46,59,38,58,61,45,55,79,48,25,51,47,33,40,65,75,77,45,84,56,20,46,61,69,50,62,41,65,38,65,63,43,45,44,49,37,52,33,28,39,47,29,44,37,48,61,44,37,34,34,57,52,32,70,45,69,44,37,60,65,69,88,57,98,111,161,136,181,206,210,212,220,212,229,243,244,247,242,252,250,253,231,244,255,253,246,252,250,253,252,244,253,255,243,203,168,152,121,31,4,8,20,14,17,49,52,53,57,15,48,33,35,30,35,45,39,14,53,42,26,38,27,24,23,18,33,37,53,41,46,43,47,39,51,35,31,33,31,26,22,58,38,39,27,27,50,69,60,58,59,60,57,49,52,76,62,64,74,64,100,93,90,126,154,119,117,131,142,131,140,158,157,175,164,151,126,147,160,143,139,161,171,146,165,144,163,151,148,142,164,176,169,169,184,161,145,129,129,153,158,167,161,150,166,144,165,161,162,136,160,173,169,186,172,185,164,178,135,139,156,165,145,152,155,138,160,146,127,144,150,178,183,118,139,158,161,147,166,164,171,188,138,167,185,135,87,73,109,150,153,178,177,188,166,153,164,144,123,149,142,155,164,156,135,159,143,131,165,185,159,147,181,130,139,168,110,23,6,10,9,11,3,0,1,18,15,21,15,8,5,19,46,24,15,15,11,6,18,31,17,175,151,184,146,131,132,121,133,109,75,64,35,51,27,47,33,32,26,18,14,16,30,2,33,8,16,26,26,22,7,5,20,25,14,25,20,13,6,24,38,8,25,26,45,5,23,14,14,8,2,3,21,11,27,9,10,43,12,36,29,18,20,27,53,27,54,36,11,18,50,47,31,21,54,40,17,35,34,39,49,18,14,17,30,13,31,77,17,39,43,31,29,18,29,27,26,20,29,19,42,62,23,41,22,18,25,59,45,56,71,14,29,51,43,51,45,68,63,60,96,66,48,43,40,64,36,86,69,101,106,107,101,61,74,90,94,85,133,120,101,95,132,110,126,81,120,137,99,113,126,102,121,99,107,110,113,107,126,111,116,104,96,124,101,109,112,109,115,126,112,133,116,115,117,125,120,132,118,107,108,110,111,116,108,100,109,104,115,105,97,125,111,122,115,108,104,104,100,101,98,105,97,115,112,115,119,108,141,131,114,156,120,114,114,122,104,121,132,114,116,124,113,100,99,151,149,96,106,103,114,117,123,115,120,108,106,77,109,76,112,111,94,96,97,111,130,101,143,112,120,135,127,105,101,130,110,109,120,108,111,102,87,110,85,95,100,107,75,101,86,108,85,68,122,102,92,105,92,92,82,102,91,98,131,133,128,105,119,119,105,96,98,73,74,72,74,93,47,65,57,65,86,87,75,84,55,51,68,76,50,49,63,52,62,69,59,67,34,45,66,66,57,59,50,58,93,29,69,47,41,79,55,70,68,42,62,70,42,56,44,41,44,41,53,44,49,24,83,49,36,47,53,55,50,52,35,42,57,34,24,39,9,32,45,41,29,35,38,63,28,38,26,42,49,27,40,42,39,36,36,23,62,37,38,34,60,47,59,32,28,49,59,28,23,51,26,35,36,35,35,30,46,40,52,43,47,51,39,66,50,8,59,39,38,35,64,52,48,42,76,44,36,44,46,40,43,25,35,37,67,37,51,52,39,59,81,46,55,67,67,55,40,70,64,42,69,79,51,55,75,41,71,87,71,87,58,46,56,68,39,70,61,59,50,60,60,41,50,33,65,47,48,25,44,47,27,45,34,56,60,72,60,27,32,71,68,77,75,82,94,110,122,88,155,157,148,184,178,213,227,255,255,252,255,248,247,255,252,224,254,238,255,236,255,245,233,232,246,228,111,115,26,0,15,7,19,45,33,42,24,26,23,31,51,68,33,38,32,27,25,19,47,44,38,48,38,41,25,44,39,17,22,40,39,20,23,49,38,12,21,13,26,48,19,41,73,60,69,52,44,58,68,57,74,46,56,64,39,64,53,91,106,100,109,145,117,140,154,133,134,133,142,126,142,119,137,133,121,121,133,163,107,146,143,155,150,117,149,169,158,165,145,125,135,146,141,147,135,151,152,170,168,171,143,146,137,160,153,138,160,146,182,161,168,165,161,146,138,157,127,150,175,160,172,170,142,186,146,166,151,148,156,174,135,157,147,151,138,135,165,170,195,174,175,150,124,127,107,77,134,142,162,191,177,191,179,158,142,157,156,159,145,160,165,176,179,160,173,126,169,174,181,163,149,152,132,14,0,28,16,12,7,36,34,11,0,12,12,8,18,14,32,6,10,16,30,20,32,1,5,107,108,79,49,88,62,15,5,35,39,10,37,29,17,24,19,42,7,42,20,39,22,29,25,33,46,14,22,28,44,12,5,5,4,29,29,7,31,22,8,22,21,27,25,9,35,24,29,28,17,6,14,9,16,12,4,28,21,16,7,24,15,22,29,14,27,65,46,30,31,36,40,19,58,34,17,48,60,59,50,32,47,20,40,34,43,16,59,33,30,57,14,11,37,36,24,25,27,38,38,51,29,15,39,32,23,31,48,51,35,55,50,43,40,31,53,33,47,62,39,35,58,62,59,66,79,89,90,110,126,124,96,100,146,115,110,136,117,152,138,116,106,130,132,143,106,111,114,117,119,131,123,119,115,123,112,115,102,128,120,114,131,113,132,90,115,121,110,121,106,128,115,124,121,127,149,119,120,120,127,108,124,102,108,118,134,126,97,128,109,113,133,117,138,137,140,116,113,112,128,126,118,113,124,135,125,111,106,130,99,119,97,140,86,142,139,129,135,100,120,132,118,157,123,122,149,115,124,117,118,145,150,135,141,133,135,154,111,121,124,121,115,100,122,128,95,120,125,129,121,82,94,106,84,124,148,124,120,123,116,157,142,124,109,116,121,118,107,116,116,107,100,91,116,89,92,117,86,93,92,77,76,80,89,78,79,97,99,105,108,74,85,82,70,108,93,58,59,64,65,55,61,46,74,95,76,51,71,51,45,59,49,76,65,43,48,49,41,26,47,63,74,71,38,62,49,64,44,64,59,36,51,70,47,59,42,63,55,43,44,75,41,66,36,47,62,51,32,48,42,47,67,48,42,53,39,37,12,18,36,5,53,34,40,62,41,36,40,48,54,24,39,40,36,29,55,58,46,29,74,58,42,37,56,25,22,53,45,43,31,31,44,17,65,51,57,42,50,59,28,33,46,50,60,35,60,37,26,50,35,54,51,46,33,71,44,47,38,47,38,33,77,45,36,17,65,45,50,41,59,31,62,61,79,40,41,61,54,50,80,15,42,58,70,33,86,39,47,93,62,53,61,61,72,91,67,60,66,53,66,58,84,72,51,72,64,23,59,79,76,84,73,59,55,54,56,31,75,18,56,54,58,51,43,74,34,44,39,47,68,44,56,48,54,49,50,63,73,103,104,135,156,151,203,182,160,186,209,203,247,255,255,254,249,245,241,252,242,217,228,234,232,173,94,11,1,12,18,33,2,18,12,21,29,34,49,53,27,54,35,25,40,56,38,44,54,47,34,15,40,21,42,44,13,28,35,34,31,39,31,46,23,37,37,50,41,30,43,47,35,54,41,46,51,42,68,50,55,42,66,67,58,92,68,114,130,173,173,138,142,146,150,132,125,130,129,139,148,162,147,145,151,160,136,156,157,154,145,155,132,165,134,152,133,148,156,156,157,137,155,147,140,121,138,148,155,167,150,148,157,142,149,151,157,155,130,148,188,150,146,164,161,146,155,140,154,183,168,175,160,165,175,139,150,170,138,159,157,150,181,145,158,157,166,172,155,142,111,99,66,113,143,142,162,174,143,155,165,132,149,156,144,156,160,133,156,151,138,153,145,160,140,127,139,163,111,26,3,9,5,9,10,12,22,21,18,14,5,32,12,24,0,0,11,29,11,15,19,24,18,64,34,32,51,39,12,26,29,19,60,37,16,48,8,16,24,25,34,38,32,17,29,18,19,30,3,23,2,11,25,15,20,10,31,5,33,11,4,12,27,29,36,25,19,19,28,2,43,40,36,35,27,3,26,38,12,15,16,31,8,22,16,11,17,19,45,24,26,10,48,35,32,52,43,49,18,21,31,13,34,9,43,61,18,45,45,43,46,39,52,55,38,33,28,58,51,41,49,28,33,38,26,31,38,34,44,48,17,36,26,16,42,20,15,23,17,41,55,53,48,48,47,43,69,67,39,85,74,110,110,115,119,121,98,120,132,126,105,124,106,107,104,130,108,113,112,124,139,95,115,123,114,124,109,109,101,116,124,119,123,138,134,110,125,148,122,91,139,117,115,121,124,131,141,124,143,123,97,117,152,126,105,139,107,121,118,117,122,122,118,126,137,96,137,123,114,110,128,129,136,120,132,119,145,106,121,117,92,120,114,128,103,121,121,129,121,115,125,127,108,102,112,137,133,122,122,132,128,113,143,150,123,154,145,101,140,119,138,137,121,116,118,148,153,127,118,119,147,102,125,124,138,115,95,115,101,103,106,111,94,98,127,130,131,120,106,113,94,111,89,93,84,110,99,74,78,100,112,79,90,64,68,61,68,78,104,66,105,81,95,95,67,61,102,90,67,83,62,75,59,43,85,42,34,59,60,44,86,59,79,52,60,44,56,66,53,50,68,83,42,59,62,49,71,66,61,59,56,78,50,56,56,53,56,34,59,67,65,41,60,52,71,61,54,21,48,37,56,54,54,83,26,38,30,30,34,51,26,47,41,14,48,51,33,30,43,45,25,53,40,13,33,48,25,17,50,56,55,43,48,46,50,34,32,37,56,60,30,51,73,25,72,55,20,58,34,44,87,75,66,48,24,29,47,35,55,25,44,45,29,56,37,59,52,59,9,28,34,69,45,37,29,40,37,47,60,48,33,48,35,39,35,23,37,62,18,34,24,30,43,40,49,36,37,61,33,53,41,35,42,39,52,54,50,28,78,33,59,52,46,37,64,58,42,44,75,83,71,68,80,34,71,71,71,66,64,71,77,73,71,87,84,86,81,58,79,45,51,69,97,73,59,84,82,56,51,81,23,64,44,39,43,65,88,93,35,58,104,149,175,227,249,226,240,222,249,251,255,226,245,252,246,242,247,190,130,59,11,12,11,17,28,16,34,7,35,24,17,39,37,36,43,39,36,63,20,46,50,45,23,59,30,39,55,26,50,22,27,45,25,52,40,29,42,31,23,50,47,60,38,30,54,61,37,49,43,47,41,54,82,68,51,76,77,76,78,131,124,130,137,132,149,126,118,159,141,163,157,179,180,173,156,173,165,165,168,178,144,150,136,169,169,153,170,146,163,154,163,147,114,142,170,129,159,145,153,185,158,174,148,133,177,178,161,148,169,160,161,168,153,138,161,153,169,153,152,143,186,164,162,171,157,145,166,180,167,154,144,157,145,175,162,149,166,164,173,158,139,139,78,78,108,144,159,174,193,161,171,152,152,169,154,169,170,157,167,164,178,140,156,168,169,172,176,160,108,34,0,26,19,8,3,10,5,27,13,33,41,18,9,9,29,30,20,25,13,16,13,14,54,44,40,31,34,35,28,35,26,17,29,37,29,43,2,40,27,9,13,33,26,20,9,30,25,46,25,21,20,8,15,25,7,9,10,22,5,17,20,6,19,8,30,17,12,21,26,16,12,9,26,13,14,7,32,23,14,18,1,45,13,22,11,33,40,19,24,36,49,26,24,18,48,36,17,36,13,2,27,43,29,29,4,24,32,23,9,42,30,19,17,53,19,39,20,26,43,29,25,37,33,27,62,30,45,13,42,44,38,27,24,32,39,26,18,36,34,72,35,24,65,30,39,33,63,47,72,62,50,68,78,116,112,92,103,94,116,110,96,128,117,109,119,113,111,107,115,143,126,127,104,97,111,115,124,108,125,142,134,114,106,126,126,112,120,127,124,133,107,132,122,123,125,138,101,128,126,138,116,134,119,107,105,118,104,128,112,133,90,115,107,113,82,84,115,77,110,121,134,138,131,118,124,131,166,122,108,126,115,117,106,100,106,124,92,135,131,110,112,93,122,124,123,121,110,127,132,100,126,115,113,131,122,126,117,117,141,107,103,96,110,135,144,135,131,108,104,126,135,110,122,98,141,118,90,125,112,98,102,86,115,126,120,127,111,107,86,134,103,96,87,54,93,91,89,97,81,86,100,83,92,102,77,91,66,97,91,106,91,106,101,78,60,49,39,46,63,82,51,43,30,56,58,24,43,52,46,17,45,44,71,42,66,77,60,71,52,39,69,47,76,49,58,71,56,47,73,52,43,57,47,53,43,72,45,64,55,48,54,59,61,67,71,41,42,73,70,59,58,59,58,42,71,29,63,56,53,41,56,34,51,35,63,41,79,30,80,54,46,59,60,54,70,30,18,54,50,27,31,44,68,51,49,14,36,36,30,26,39,38,38,62,33,36,30,52,46,46,60,40,38,53,17,32,42,30,11,45,44,50,45,47,31,24,8,64,30,78,42,30,30,74,44,34,47,41,32,23,53,44,55,38,26,41,37,39,20,39,49,61,46,65,25,34,41,21,40,40,49,39,56,38,31,51,45,66,55,61,50,51,18,17,46,39,46,40,40,56,35,56,67,72,51,74,73,68,60,69,31,91,75,96,72,60,77,77,81,95,95,105,106,143,113,135,114,151,153,141,145,137,97,103,107,89,118,82,9,20,64,92,130,130,163,190,175,181,253,253,249,246,244,244,249,249,249,248,246,220,228,199,185,196,220,192,190,163,205,165,48,23,36,36,44,25,37,30,30,14,64,25,47,35,38,42,13,37,52,17,14,35,30,22,44,23,19,49,25,18,29,41,54,55,38,54,54,39,62,66,79,67,80,97,59,94,100,72,107,96,113,122,119,122,123,118,116,146,152,175,166,165,172,186,159,153,186,166,153,149,156,158,162,146,180,158,147,148,154,156,174,160,139,147,155,150,157,156,167,161,209,181,164,165,155,152,148,163,159,151,173,154,151,155,174,167,153,168,155,159,153,180,163,138,155,154,164,150,170,155,151,144,172,152,175,152,167,163,173,160,173,175,134,105,88,106,128,155,157,170,160,140,136,166,181,179,163,163,159,161,186,149,157,168,166,176,151,168,111,3,36,5,10,19,22,18,6,14,12,9,11,24,20,28,3,9,16,10,17,16,12,16,32,29,19,3,53,50,21,25,30,10,25,33,36,10,35,17,28,29,14,21,19,33,11,13,17,33,12,42,33,23,30,51,35,12,15,6,14,50,19,25,6,34,19,41,44,19,4,21,59,10,12,27,32,1,2,13,25,20,5,34,19,3,51,26,10,26,30,30,25,27,24,26,27,19,32,39,22,37,42,30,33,57,37,36,49,17,35,25,56,19,38,16,24,22,33,42,34,53,24,33,56,28,40,15,35,47,54,42,39,41,30,42,39,17,42,24,45,63,57,74,49,75,66,71,75,60,72,99,93,87,93,135,99,109,95,127,112,121,114,84,88,97,92,121,111,116,135,127,103,110,110,108,140,109,117,125,117,119,117,132,121,112,139,111,111,109,88,101,78,116,118,91,95,99,91,105,136,119,110,144,115,101,117,112,120,126,115,113,106,93,90,96,118,101,109,109,94,108,146,119,123,115,124,122,122,117,123,114,126,131,131,114,127,137,99,89,116,110,105,130,116,117,109,129,114,123,133,142,103,111,120,128,112,128,133,133,119,81,122,102,117,129,127,115,115,99,110,114,135,130,112,112,108,103,102,124,124,124,135,134,107,149,123,153,102,92,118,94,76,76,126,144,121,136,111,113,114,112,112,96,97,144,78,86,67,53,108,82,107,85,91,100,75,57,36,56,90,78,91,66,29,48,66,64,40,52,42,77,62,63,30,73,44,76,49,60,52,77,35,57,79,65,53,29,61,43,58,53,47,45,40,76,50,55,53,84,86,72,54,52,77,43,62,66,38,78,38,69,54,77,34,37,42,51,48,26,62,32,63,47,44,33,77,60,31,44,24,35,63,43,44,29,31,37,21,38,45,40,66,43,46,38,21,60,46,56,41,26,43,35,56,45,52,53,75,64,54,47,44,26,49,41,31,50,44,19,52,44,12,59,36,23,40,27,34,53,16,39,32,32,40,26,42,56,22,70,36,35,68,56,48,38,28,22,61,46,22,40,52,46,36,36,56,34,32,44,37,19,40,37,55,34,39,34,59,39,42,50,32,37,27,10,21,45,65,16,53,52,23,22,49,40,54,56,82,64,52,63,64,62,75,44,42,64,78,96,76,82,85,101,132,116,113,135,140,157,148,134,157,117,160,191,173,180,136,139,114,128,121,132,101,93,105,145,82,113,213,241,251,246,245,242,241,246,252,236,248,252,243,245,246,254,247,249,234,248,243,217,61,33,35,22,42,32,24,7,29,32,21,37,36,35,10,32,15,16,27,13,19,37,18,34,42,25,29,13,53,23,22,18,40,50,69,46,70,66,68,45,55,46,71,92,66,119,109,122,109,133,121,95,127,109,112,122,129,122,169,119,143,118,144,154,153,165,125,161,178,172,169,166,160,169,136,154,157,158,141,153,148,188,154,181,141,147,118,156,154,158,191,203,189,173,146,182,162,147,170,161,163,175,167,154,165,169,160,153,186,152,151,163,160,159,168,153,131,163,184,165,143,160,147,182,138,155,157,136,172,138,164,151,185,128,123,70,88,138,151,170,170,161,129,140,159,152,157,162,159,151,180,162,163,154,164,161,147,148,107,17,6,3,21,23,10,17,0,2,7,29,22,42,20,10,15,9,26,22,31,8,0,15,8,15,28,21,31,32,41,10,21,17,39,12,29,21,13,35,50,32,32,12,24,9,11,30,10,9,17,14,13,26,6,29,21,18,14,11,24,17,12,14,30,2,24,25,40,26,1,20,18,7,5,17,29,27,27,19,33,32,24,13,12,45,40,13,27,3,12,4,33,27,24,34,19,34,36,49,30,32,32,56,49,21,18,54,60,24,40,36,32,69,43,48,15,26,40,26,38,51,56,39,32,39,14,24,71,48,49,52,65,39,47,44,53,55,64,42,43,57,52,48,74,47,85,113,88,98,102,99,98,86,85,95,105,130,112,128,115,106,106,123,99,116,129,114,117,117,124,119,131,94,111,119,107,117,116,103,118,121,151,113,111,127,95,126,121,84,111,109,114,132,104,81,112,106,145,132,134,129,114,106,114,116,93,120,116,142,122,97,101,142,124,134,127,120,138,107,94,108,101,120,129,131,104,121,119,99,125,118,148,144,112,122,139,150,131,125,128,112,114,113,146,123,117,110,136,136,112,124,116,116,146,147,137,145,124,117,119,108,125,117,130,126,138,124,108,117,114,106,109,131,123,83,80,112,110,121,109,127,110,101,96,125,116,108,105,102,120,99,108,130,119,116,138,136,111,106,123,115,108,109,132,120,117,115,74,77,85,90,72,62,59,85,82,110,108,90,109,115,90,81,73,50,57,75,65,95,77,69,65,56,42,85,72,67,48,80,76,56,68,37,60,67,62,53,75,76,55,47,75,51,55,69,66,69,61,57,61,39,23,64,64,56,57,62,55,60,48,44,40,65,68,51,62,67,37,36,49,41,52,44,70,44,37,45,85,32,31,77,31,20,25,37,50,32,72,32,27,56,55,48,28,41,57,40,39,37,16,40,44,54,50,49,27,57,52,29,22,42,38,27,25,47,41,55,54,63,38,42,32,36,34,27,49,59,36,50,40,57,68,76,28,16,55,41,19,36,35,25,50,55,33,46,4,22,22,12,54,57,43,11,6,62,45,27,22,54,41,49,29,63,27,46,34,33,34,28,34,50,45,54,8,47,49,32,43,37,45,66,26,60,43,62,65,60,73,58,55,48,36,44,50,57,30,71,67,65,89,74,92,100,97,115,120,120,122,98,111,127,128,131,140,183,125,175,177,147,140,135,224,197,163,128,108,117,66,70,149,136,181,225,250,254,255,255,254,250,255,229,255,248,231,250,244,242,253,242,227,212,43,15,27,5,12,19,20,27,34,34,21,19,28,9,9,23,24,27,25,42,32,22,15,7,33,15,22,12,44,26,50,25,29,33,20,32,67,54,31,44,62,55,65,91,65,43,79,113,138,102,127,104,120,153,135,142,147,131,131,161,134,149,140,125,134,169,147,155,157,178,170,166,180,173,180,134,149,151,151,151,156,187,152,157,174,155,140,152,124,145,159,170,154,145,158,152,112,168,131,163,153,138,157,163,161,135,165,160,130,149,163,154,143,139,178,149,155,165,161,161,161,168,169,153,159,149,144,127,172,167,151,177,177,148,132,105,74,83,80,161,168,180,153,175,165,177,138,166,168,129,141,147,171,151,152,164,162,163,112,20,1,7,12,26,18,20,13,21,18,24,3,1,2,20,14,7,6,6,7,15,39,39,9,50,0,18,17,20,29,5,29,33,44,5,19,27,31,27,21,36,38,31,10,20,37,45,16,16,25,17,16,6,37,11,20,11,16,38,25,25,2,9,13,6,12,30,9,18,28,13,40,36,0,16,5,12,18,22,35,24,24,34,29,24,29,16,25,11,29,12,14,38,24,52,11,29,40,21,38,9,15,33,39,52,26,11,40,9,24,46,46,20,57,51,21,23,13,15,48,40,66,40,68,36,31,38,52,66,14,31,41,42,41,46,47,69,43,56,39,43,42,44,48,63,60,78,109,103,104,88,83,103,99,112,125,96,129,124,110,115,124,110,148,129,126,119,120,129,113,85,141,126,114,113,136,119,120,118,146,140,97,124,120,118,122,126,115,153,135,115,96,128,144,103,134,127,114,132,123,131,141,129,95,114,149,114,98,101,110,109,109,113,116,134,126,113,123,136,129,124,105,105,118,103,112,109,116,146,130,130,92,126,125,120,125,107,128,126,124,107,138,121,139,135,98,110,140,119,144,135,132,141,87,130,125,126,140,145,93,126,148,123,124,104,142,98,126,121,122,120,98,118,130,99,119,90,103,101,94,132,114,104,118,113,95,94,92,103,84,127,108,121,63,90,96,109,134,87,110,70,78,109,92,110,90,104,116,116,112,60,72,100,81,90,105,104,106,85,103,80,117,109,96,109,89,90,93,101,74,98,91,73,70,60,57,75,62,75,57,56,40,58,29,79,46,35,68,51,65,62,59,66,58,52,60,47,65,83,36,35,40,60,52,59,58,25,51,74,64,74,66,66,54,60,36,53,40,37,54,39,41,42,54,36,59,46,43,46,51,40,21,68,68,49,38,40,84,47,51,45,60,49,42,61,44,68,52,61,62,34,50,49,98,48,52,80,30,39,48,32,18,50,49,55,54,22,67,27,69,41,36,68,60,61,61,47,49,41,41,42,40,46,50,38,52,49,44,45,33,39,56,43,13,38,43,45,27,39,44,73,64,13,46,60,66,42,44,47,53,24,59,59,20,19,60,35,30,41,38,53,60,30,43,29,46,58,27,11,41,36,55,18,47,42,41,58,33,30,74,60,38,68,60,53,51,65,54,45,66,45,51,65,76,85,73,59,55,74,69,105,95,92,107,114,123,162,147,104,120,144,172,164,156,157,205,137,87,111,116,115,89,115,153,190,195,237,251,255,252,243,250,253,237,253,221,250,255,252,249,221,37,1,17,26,17,45,53,35,12,57,15,42,38,27,42,28,28,30,35,3,64,32,34,27,28,19,32,28,20,21,30,44,35,15,11,39,26,31,64,62,34,45,57,52,53,48,62,75,68,117,114,147,148,152,149,91,107,132,181,162,145,122,145,137,168,131,152,136,140,175,141,176,124,148,161,158,161,167,156,161,157,151,144,142,150,166,161,161,146,155,166,202,141,141,159,176,139,143,147,144,136,124,164,144,152,150,142,153,147,144,153,172,125,150,166,168,156,162,135,147,154,160,124,164,162,170,145,142,143,148,158,157,156,129,148,142,125,76,75,103,129,143,174,144,158,135,113,145,155,154,171,167,159,166,167,164,171,147,140,27,8,14,8,12,37,21,17,16,12,33,20,1,20,13,28,29,17,15,33,46,19,6,16,13,26,23,22,25,15,21,41,26,36,31,38,22,14,33,18,21,16,18,4,20,36,8,14,9,39,57,32,30,7,25,34,31,7,8,10,8,20,13,23,6,26,20,16,21,31,25,40,11,20,35,9,37,29,16,5,21,24,25,25,23,12,3,3,38,8,9,17,41,3,21,23,45,20,28,36,29,15,39,3,22,13,32,44,20,17,47,51,21,43,38,23,22,22,43,21,16,35,43,37,18,41,27,38,65,18,26,41,23,46,9,31,47,57,38,38,22,25,53,44,46,53,74,68,91,117,118,87,100,124,124,129,103,112,97,123,106,135,148,112,124,137,129,132,117,127,123,126,136,133,120,140,131,114,138,113,143,114,126,94,131,138,129,114,146,129,121,94,116,130,141,138,125,126,139,123,90,119,108,103,140,120,154,108,137,126,149,110,113,157,136,100,141,137,145,129,137,144,144,116,120,149,118,111,128,126,153,138,152,134,115,110,116,125,146,109,131,134,138,131,120,132,131,129,147,102,157,121,120,134,118,148,142,121,111,133,127,146,154,115,139,131,122,104,134,135,108,127,118,110,118,142,96,116,112,105,131,116,113,116,117,95,100,129,111,128,100,105,66,115,123,76,130,81,85,76,80,107,68,97,103,97,124,110,148,89,85,89,110,94,95,126,105,79,95,105,96,92,117,120,121,126,149,112,96,90,103,122,110,89,109,82,79,55,46,52,51,57,45,75,66,64,61,88,35,44,61,60,65,52,70,31,68,53,53,26,68,50,53,76,54,44,47,48,21,49,22,78,45,38,68,47,52,71,48,58,65,34,38,68,51,42,59,38,52,81,70,70,72,41,73,61,38,36,49,64,44,31,65,57,51,44,54,44,35,54,48,20,38,58,19,35,55,56,40,42,69,30,14,41,26,59,55,54,58,32,42,30,39,66,40,16,37,41,27,32,60,54,58,21,45,38,22,43,31,49,50,47,48,31,29,30,29,65,40,36,38,35,34,50,56,41,45,50,44,46,54,23,7,33,34,49,19,38,25,46,34,37,16,49,47,45,40,53,58,2,40,58,30,8,39,34,47,40,58,70,49,53,44,29,53,64,65,71,30,70,45,58,70,53,58,40,52,62,55,84,90,109,77,74,85,86,101,99,100,103,110,136,123,155,183,180,179,124,131,144,155,154,72,78,83,114,107,166,189,210,178,197,213,215,206,212,215,196,227,235,175,12,21,12,32,15,34,29,49,21,20,46,8,43,34,21,30,27,36,29,23,12,25,24,24,14,43,39,32,42,25,37,20,41,40,6,39,39,39,54,42,47,61,73,59,58,73,48,69,64,95,126,140,117,132,109,108,122,131,177,186,175,149,163,150,155,163,135,119,126,156,161,150,156,139,176,158,163,190,158,156,160,141,146,140,176,153,147,169,144,171,134,174,164,151,160,151,116,117,151,136,172,165,120,156,155,164,114,130,153,133,155,158,159,146,152,131,141,154,139,143,180,170,160,167,158,161,156,150,134,134,185,186,165,145,168,171,137,136,115,98,102,147,143,182,160,166,167,147,158,166,159,174,130,192,140,165,169,170,112,1,16,11,3,4,14,13,18,13,10,7,11,31,4,15,11,28,1,5,1,22,4,20,30,0,31,10,11,5,38,32,24,46,35,38,24,21,33,43,47,7,10,16,19,12,8,44,24,18,15,16,3,23,14,21,6,44,6,29,22,0,20,26,23,2,31,14,13,19,24,4,3,5,18,12,11,11,15,27,34,23,12,25,17,14,12,14,25,22,51,27,3,2,29,27,18,39,13,28,24,24,50,31,17,28,18,35,39,34,51,23,19,35,16,34,33,23,49,24,37,54,42,34,64,28,40,31,24,7,45,23,36,47,41,39,42,24,52,42,46,54,38,35,43,41,69,60,24,53,80,58,60,102,97,100,106,125,93,130,134,111,132,139,128,100,130,128,93,139,120,120,152,142,144,121,112,122,138,149,123,125,139,134,132,128,136,138,125,144,115,135,138,118,138,119,131,125,117,123,116,131,163,136,128,132,92,136,151,123,162,110,129,133,116,110,98,109,124,120,133,155,151,152,135,111,139,147,144,159,124,114,165,123,134,133,151,130,144,118,140,131,113,105,117,141,131,124,123,123,118,123,126,135,131,143,136,155,129,117,158,122,122,138,127,109,146,165,116,135,141,142,111,127,143,113,133,139,138,142,133,99,105,131,130,121,123,111,122,118,120,115,84,121,117,132,142,120,110,114,109,117,92,109,88,102,102,116,100,93,115,77,124,118,95,115,121,123,110,121,110,86,83,82,72,59,73,86,88,89,96,95,121,86,119,81,65,80,68,68,76,56,69,107,79,36,81,24,56,57,44,50,33,66,31,78,55,51,44,75,44,71,64,52,54,44,72,70,47,54,49,59,59,52,42,36,54,70,37,56,54,50,59,50,18,70,63,62,54,29,43,43,54,30,36,64,45,25,37,37,71,47,39,47,59,62,28,32,61,28,63,28,64,26,40,34,45,21,41,45,44,56,36,44,27,50,9,57,33,20,29,58,32,43,43,61,43,47,41,23,29,27,71,24,31,51,35,50,50,26,15,16,50,29,36,53,42,40,50,40,25,34,49,48,34,15,53,48,37,44,10,36,61,41,32,22,41,35,62,19,38,52,41,52,43,29,17,32,53,73,22,39,37,62,51,56,35,29,27,12,48,37,46,16,54,45,39,38,48,26,26,42,31,50,36,53,48,84,42,54,55,95,44,71,51,52,70,59,55,66,80,96,100,116,84,144,170,142,110,143,190,195,166,128,143,94,137,157,98,140,146,145,157,101,99,94,117,85,107,120,183,99,25,34,16,29,19,26,39,20,13,21,31,23,31,20,27,39,26,17,30,6,30,2,18,10,24,4,44,32,16,31,47,33,24,27,25,45,41,48,76,30,58,51,58,45,46,77,75,67,63,104,107,90,77,130,90,102,131,136,137,147,130,175,180,160,151,160,143,147,180,163,160,156,137,133,165,159,154,143,153,157,141,136,175,146,167,155,167,170,160,161,137,170,172,151,138,171,142,148,145,170,191,162,145,150,144,135,115,131,178,155,149,179,169,180,151,137,139,166,147,176,165,152,148,145,182,160,169,162,195,129,162,160,161,152,165,143,166,160,108,99,81,125,120,142,166,177,186,161,176,147,142,149,128,158,177,146,150,187,108,17,1,0,6,29,23,20,11,38,19,8,2,3,0,19,24,4,19,14,31,17,2,3,17,28,8,26,32,28,49,27,35,17,10,14,20,25,30,24,20,17,21,20,32,48,15,12,29,30,13,21,9,5,9,19,12,37,25,11,20,23,26,27,9,51,37,29,25,17,23,15,33,24,12,31,32,21,18,33,41,27,25,39,24,36,48,29,36,22,13,6,21,42,25,23,19,30,24,24,11,15,28,39,8,44,16,21,34,27,31,10,57,25,7,42,31,43,20,21,48,15,19,31,15,36,33,31,34,27,32,25,25,6,31,32,44,18,36,45,44,78,51,35,39,61,63,54,44,45,70,52,71,85,100,100,111,109,134,130,142,119,131,136,126,131,123,113,123,113,123,115,144,153,115,126,129,120,108,137,129,114,121,147,150,131,149,139,112,144,136,129,120,128,128,137,140,125,151,113,141,144,137,118,99,117,121,122,113,117,140,149,107,107,114,93,95,122,124,142,122,94,139,142,123,128,129,107,95,149,149,142,146,152,118,124,119,135,136,136,127,131,131,127,124,111,110,137,153,139,107,150,147,149,158,120,139,108,87,120,124,138,149,124,126,147,141,128,129,167,155,136,132,132,128,112,101,130,116,130,118,110,119,111,117,115,112,109,119,125,111,121,130,109,138,115,138,143,138,113,120,114,115,115,110,124,126,118,107,102,96,71,88,100,63,89,84,96,107,88,89,85,58,43,35,52,62,60,62,72,61,66,59,55,79,75,33,57,60,85,54,106,69,84,77,60,66,55,82,85,63,62,62,53,30,63,57,59,38,52,74,77,74,54,52,47,72,55,61,51,51,38,62,46,68,46,73,56,14,61,78,49,40,52,49,51,52,47,41,58,40,34,67,27,32,50,52,37,33,28,51,43,47,54,37,53,34,69,47,26,45,71,40,47,50,58,46,31,40,22,58,35,41,44,26,58,35,38,22,34,45,39,58,26,38,38,58,37,58,43,56,46,29,22,54,20,20,42,33,49,46,50,66,49,48,39,10,38,56,31,45,17,47,76,28,18,57,7,57,51,49,19,24,61,18,42,34,71,22,51,48,38,43,29,31,24,54,38,23,32,57,44,25,39,43,30,55,26,36,34,48,33,52,33,33,25,49,51,42,40,24,37,56,66,45,53,47,52,62,44,87,58,24,64,58,37,53,52,50,56,58,35,91,97,100,82,135,118,111,105,144,191,223,196,209,189,188,213,214,190,147,128,76,84,71,85,70,74,26,39,79,40,26,30,15,13,32,33,41,8,7,3,22,24,36,41,14,42,10,38,50,25,38,25,4,16,30,35,30,23,35,34,34,48,51,56,33,46,56,38,67,40,35,44,51,70,77,46,55,74,80,51,94,100,79,85,121,98,103,95,108,101,149,170,187,178,173,149,158,142,157,179,166,179,161,154,156,193,150,141,144,142,141,131,155,158,144,144,136,161,165,161,151,157,147,141,163,177,153,148,181,161,145,169,141,170,171,148,168,143,140,173,177,165,138,157,131,146,148,180,183,163,156,144,156,129,176,165,173,166,171,161,155,160,152,154,134,149,176,151,153,111,118,86,65,108,154,158,170,192,182,163,160,138,133,144,141,151,124,148,111,12,0,15,55,30,13,37,16,13,27,11,3,3,1,1,12,39,2,18,7,10,26,18,28,30,13,22,16,28,27,12,12,61,8,26,9,24,22,44,22,19,28,30,24,23,26,19,2,17,16,28,20,30,28,27,23,16,8,37,3,11,9,5,34,5,25,23,28,32,12,31,20,27,23,19,28,20,21,32,23,35,32,14,25,15,25,10,41,37,21,39,14,42,21,50,55,37,25,31,31,35,20,35,7,15,20,17,23,26,31,4,42,17,43,34,43,35,25,14,30,22,34,23,14,41,18,24,31,5,39,44,42,16,28,26,45,34,57,46,44,43,41,59,56,63,57,76,87,70,57,13,38,72,58,95,92,122,120,106,121,101,100,130,101,85,102,144,140,141,126,132,129,127,105,149,125,149,133,163,133,104,152,147,120,101,119,135,94,118,128,109,124,134,136,139,117,122,114,129,108,134,126,110,121,133,113,139,116,126,118,117,146,133,119,115,124,116,124,85,151,124,127,121,109,136,117,115,147,113,127,133,128,131,137,137,163,152,138,139,134,103,101,94,152,125,122,136,134,123,150,121,129,99,108,139,105,123,121,104,84,136,132,122,120,114,139,160,118,100,132,115,107,99,108,124,95,117,127,96,125,131,113,100,126,124,112,107,129,105,109,113,131,126,124,126,142,123,93,138,105,106,121,136,110,124,104,83,119,124,121,93,103,79,83,98,121,80,101,91,47,63,76,90,78,34,77,82,64,69,25,57,43,38,39,78,37,16,42,66,73,65,72,14,59,67,26,68,42,43,62,58,49,57,60,67,81,66,60,68,60,52,72,60,71,64,55,45,61,55,18,45,71,60,57,49,63,61,44,23,83,81,57,56,56,58,47,46,60,54,52,50,47,65,65,56,55,57,24,37,46,56,50,40,53,52,73,37,31,47,70,31,62,58,69,24,40,38,54,36,48,61,29,38,35,25,40,63,50,34,55,60,49,37,38,57,25,57,64,64,47,42,44,36,49,15,50,44,34,39,55,71,23,55,49,69,48,26,35,23,46,11,26,61,51,29,37,51,37,31,36,43,41,42,7,22,52,45,32,33,8,38,67,27,24,35,48,32,54,43,51,28,32,34,15,56,29,11,33,29,37,35,42,44,29,37,22,40,50,28,35,23,14,46,42,40,57,47,51,58,53,49,73,32,50,72,42,65,66,57,59,49,68,89,101,85,126,115,110,107,120,195,190,200,170,176,164,188,171,162,123,107,114,140,140,164,140,138,126,130,101,54,4,32,3,17,15,30,41,23,14,18,38,22,37,37,7,8,15,12,35,2,17,30,18,20,11,18,35,56,29,52,53,61,29,33,42,54,45,33,49,27,76,55,79,83,63,71,63,78,50,102,78,100,104,158,119,133,148,135,123,141,133,161,141,158,137,160,151,128,164,159,154,137,160,157,167,133,129,156,142,197,110,145,180,168,169,166,137,155,160,172,168,142,165,158,178,147,170,162,127,161,180,159,139,166,160,156,173,139,156,170,189,146,166,163,171,142,141,164,139,144,143,151,172,168,148,156,146,143,157,139,159,154,144,150,159,125,159,169,159,149,119,118,112,97,92,133,169,190,166,144,186,176,140,124,142,149,160,166,113,25,4,1,19,5,2,16,5,5,35,7,18,4,7,25,20,14,37,20,3,6,2,15,16,13,29,13,15,15,16,20,18,31,33,17,20,14,23,40,19,11,25,15,32,16,12,31,10,1,14,34,42,6,8,14,24,26,23,31,14,39,15,10,30,7,18,24,15,29,37,10,16,30,21,17,31,19,29,8,16,26,2,32,3,36,18,20,36,33,36,1,5,48,21,51,17,18,33,20,40,18,41,43,56,42,29,37,21,24,22,19,44,4,42,32,29,29,33,7,32,28,27,20,42,10,17,55,34,32,38,48,37,28,30,43,39,47,37,29,37,53,46,53,38,68,66,48,75,45,65,60,60,25,31,62,106,57,91,51,59,87,104,116,110,105,141,127,139,135,133,119,130,149,125,144,142,123,102,101,119,114,105,138,127,133,107,120,123,108,131,128,150,135,136,130,119,132,114,102,125,133,102,152,119,151,139,128,114,123,119,132,147,113,155,127,132,120,135,131,121,109,119,138,134,134,142,118,135,131,149,127,135,117,137,133,139,151,140,137,113,111,146,115,128,128,140,150,105,129,117,129,119,120,140,131,154,157,153,162,129,113,118,129,128,120,146,126,143,154,136,128,111,115,137,140,116,115,132,115,133,122,149,143,105,111,117,101,108,106,115,99,111,112,83,110,99,97,118,115,115,148,144,137,133,130,97,90,119,137,121,128,106,77,100,114,128,101,121,103,111,125,115,118,102,115,89,83,72,92,64,57,77,87,92,111,82,76,88,98,71,80,49,73,50,57,61,60,60,70,48,44,65,45,42,57,52,58,52,97,83,70,37,49,27,47,46,36,19,78,47,43,47,48,67,82,49,35,45,64,32,33,66,40,57,47,41,57,44,43,67,11,52,51,41,48,46,65,41,44,60,45,42,42,59,26,37,54,34,66,31,42,22,73,49,61,59,31,22,42,43,48,45,59,55,56,33,46,62,41,48,49,59,29,49,34,44,34,45,60,24,47,55,40,43,48,41,33,34,51,51,72,42,30,20,41,43,36,23,36,26,44,40,59,46,61,17,30,41,27,25,56,69,21,54,45,14,34,49,33,51,27,25,33,39,63,34,43,40,26,26,35,42,29,30,44,41,39,27,32,63,44,58,46,29,60,64,45,36,30,47,39,78,70,26,47,40,46,81,63,46,66,58,68,40,54,73,67,64,74,63,61,81,108,107,111,68,63,79,88,92,129,162,132,155,127,141,113,87,112,114,129,178,165,182,166,198,203,210,193,145,74,39,24,8,23,3,27,13,38,34,29,33,1,26,23,14,32,12,25,32,42,33,41,59,31,30,22,22,32,43,17,39,45,33,55,32,59,46,67,61,60,83,58,57,53,80,74,63,50,71,98,125,127,121,137,140,151,135,168,152,166,149,151,144,114,121,124,127,111,121,131,148,119,143,107,137,147,148,174,156,164,159,141,159,172,166,132,167,160,167,165,186,158,171,167,147,139,153,156,136,176,152,169,135,166,154,145,144,160,170,150,132,159,158,167,162,158,174,176,158,171,165,183,159,157,147,147,150,147,125,145,152,134,177,134,144,144,142,161,129,183,162,133,122,97,85,102,119,152,180,167,150,173,133,151,135,156,153,147,102,15,3,18,14,13,4,10,25,31,26,12,7,11,22,11,4,9,8,7,5,14,3,16,33,34,7,19,24,36,24,27,31,9,12,32,23,45,12,0,30,21,32,9,12,18,38,30,28,7,22,24,9,15,43,12,15,16,18,9,25,31,9,13,16,11,12,29,24,22,44,13,5,14,28,20,2,19,18,14,13,39,4,10,18,32,34,4,27,28,20,5,34,12,13,43,11,49,54,34,55,38,48,23,44,38,18,48,20,49,29,27,44,30,48,23,49,20,35,25,41,55,8,42,38,41,26,38,45,18,40,51,51,38,25,29,28,69,74,72,52,32,57,60,65,87,65,119,121,72,111,69,70,72,85,104,106,108,110,99,98,106,124,125,107,115,129,117,124,111,102,94,132,146,144,129,126,102,119,136,120,135,129,125,128,121,133,112,122,149,141,137,134,106,140,112,139,152,122,151,131,132,106,142,127,113,141,141,124,139,115,104,116,168,137,145,147,137,165,132,130,158,143,129,141,143,141,171,113,141,110,118,129,114,126,139,134,110,109,143,145,117,121,140,124,131,133,131,125,143,119,136,134,146,147,141,135,150,122,132,115,144,120,120,141,141,152,142,149,122,141,120,145,145,127,173,146,141,150,148,128,171,126,132,127,131,111,129,122,127,123,127,107,84,102,113,85,74,89,92,92,99,109,137,97,105,86,80,85,95,101,102,128,111,93,128,126,81,82,91,132,115,110,127,92,86,98,106,106,85,79,101,81,99,66,95,104,109,78,82,55,107,66,105,72,68,54,83,65,53,37,29,60,55,61,75,80,68,39,44,44,62,70,57,47,37,29,61,45,50,50,61,51,54,39,60,35,66,49,67,57,59,54,29,59,59,23,44,50,76,53,50,49,44,43,46,58,49,42,49,27,40,49,77,47,44,62,40,74,68,30,60,40,54,47,43,35,59,50,71,50,31,57,45,48,50,55,52,43,41,38,32,52,47,78,42,40,52,60,28,46,58,40,56,73,49,58,36,21,44,46,33,39,34,43,68,31,33,27,50,28,36,46,30,43,48,38,33,51,43,36,30,40,16,67,27,48,20,47,58,58,35,56,25,33,30,49,46,50,20,39,62,45,48,60,18,55,37,53,24,34,39,23,39,32,32,36,56,33,47,27,37,28,18,41,42,48,66,34,40,70,65,44,37,63,42,63,43,55,50,47,73,56,49,63,63,80,64,51,66,61,95,78,103,92,84,65,98,102,121,93,117,109,139,149,124,132,148,183,178,164,110,50,40,31,27,42,38,38,23,31,10,46,14,38,34,45,51,25,12,35,26,33,47,18,29,43,55,41,45,17,53,37,32,42,41,62,42,50,57,45,69,61,73,46,60,64,55,67,54,70,120,104,117,126,125,112,119,145,140,105,143,127,152,121,166,142,153,144,168,159,138,155,153,113,141,135,176,148,165,161,135,175,174,172,152,179,164,127,149,171,160,160,155,157,157,193,140,126,161,143,145,180,140,132,161,159,136,138,150,145,168,153,116,131,151,178,169,170,189,137,149,164,154,148,153,150,143,154,157,148,145,125,154,105,141,139,146,128,121,141,156,140,153,128,119,83,103,73,97,139,146,149,156,169,158,148,163,144,147,116,26,4,17,18,23,15,19,5,21,17,8,21,0,16,28,0,19,6,3,7,42,18,12,9,21,12,32,10,39,27,15,31,11,36,27,21,37,28,39,17,30,34,28,38,19,11,43,22,34,22,8,1,21,6,50,6,18,25,20,22,14,24,32,5,20,14,18,22,34,25,17,16,28,40,6,1,33,5,37,44,24,16,23,32,26,18,19,28,14,6,30,53,48,35,27,31,32,48,28,49,49,52,29,40,45,62,29,49,18,25,33,34,36,37,36,44,16,55,44,52,58,34,37,50,35,29,33,15,42,28,25,48,48,67,48,45,46,61,61,28,59,58,42,68,75,80,91,98,86,82,89,105,131,110,115,136,143,152,126,130,116,118,118,115,113,136,108,95,92,93,106,131,128,101,121,107,95,106,104,113,115,130,130,121,139,117,157,146,137,142,155,183,162,116,125,123,125,125,129,107,134,116,135,158,114,112,146,135,123,146,155,133,131,118,126,120,132,149,130,131,137,146,133,136,138,135,121,133,129,144,152,134,125,139,130,143,129,145,153,122,142,127,117,143,151,140,127,152,139,127,153,146,179,127,151,134,126,127,127,115,170,150,151,141,144,136,120,135,134,128,129,124,117,133,114,136,150,159,162,116,161,173,149,120,147,146,127,122,116,141,123,92,93,100,91,115,108,114,83,111,96,75,90,99,83,68,89,86,92,86,93,124,118,129,120,101,82,55,95,22,56,78,83,85,81,99,100,117,84,103,101,89,95,51,63,84,74,54,75,58,55,25,27,61,32,57,22,64,32,52,69,34,85,80,56,63,52,57,66,55,81,83,70,40,71,97,73,48,64,52,61,54,69,63,63,59,52,59,39,51,56,46,49,54,50,52,37,37,69,47,34,50,40,59,50,45,44,35,49,45,66,46,22,49,73,51,70,55,59,62,36,58,55,48,56,40,22,50,50,55,41,50,46,20,34,55,57,37,28,26,53,34,47,50,29,54,49,48,50,50,45,45,39,48,68,36,16,52,26,54,23,25,40,48,41,52,57,45,48,59,44,56,28,57,25,44,33,47,47,29,44,61,53,34,44,57,32,53,54,53,39,55,36,48,61,32,22,40,46,39,46,39,40,60,40,48,29,30,36,24,33,40,28,63,27,49,74,32,50,51,44,46,64,33,62,56,58,34,62,53,31,45,49,43,68,62,70,56,64,54,57,57,56,57,57,61,61,64,61,29,35,78,77,48,49,80,94,115,95,72,70,74,106,130,71,78,65,113,142,152,116,39,13,4,2,25,37,32,26,26,16,24,29,51,9,38,56,49,41,3,9,37,14,25,47,26,48,48,31,64,33,45,31,48,28,59,55,37,64,53,38,56,89,51,58,63,40,45,64,66,82,104,106,105,119,83,92,123,110,128,124,132,124,139,156,192,144,199,173,118,172,162,155,157,148,157,203,166,151,183,173,158,147,145,161,166,156,133,157,156,150,163,143,143,135,154,133,169,147,170,146,164,167,165,136,155,154,138,149,133,137,134,123,141,141,146,152,148,140,156,162,151,152,159,145,134,164,146,146,150,145,138,155,170,155,156,155,138,116,146,146,143,139,146,143,166,107,93,119,119,104,125,152,162,161,145,164,132,156,94,7,11,30,0,5,18,14,9,15,23,8,0,20,28,28,15,23,13,42,19,6,14,21,21,38,28,17,35,28,13,34,19,52,22,38,14,4,22,21,18,23,3,16,11,18,53,33,27,38,22,5,13,36,18,34,23,34,32,25,9,24,22,12,20,35,27,30,13,0,13,40,6,8,29,39,20,3,2,24,36,19,38,23,39,10,22,16,11,21,31,13,14,25,36,62,24,35,35,6,20,18,34,3,29,22,30,34,16,22,28,51,17,39,24,42,8,39,32,65,44,31,47,45,62,31,18,14,32,44,44,27,53,30,61,15,32,51,41,32,48,50,37,51,54,73,78,74,90,65,85,58,69,123,115,124,98,109,118,135,129,159,114,107,136,135,157,129,121,111,127,128,150,135,133,132,117,111,126,138,122,115,124,100,97,131,111,115,133,129,102,119,136,135,119,126,124,118,137,144,130,112,141,126,100,124,125,124,138,137,135,119,119,114,114,112,137,137,116,137,122,113,138,151,125,120,144,136,140,120,126,122,123,130,144,142,148,136,142,127,127,112,140,114,115,142,130,125,134,120,127,141,139,167,152,154,159,141,149,124,158,138,140,142,141,153,118,136,141,127,132,134,139,133,164,146,134,148,158,139,138,155,121,133,95,121,124,137,130,143,107,121,91,92,103,118,123,112,88,86,91,113,99,131,143,103,81,108,112,92,110,106,101,106,127,108,98,119,114,94,94,106,62,105,88,69,91,110,108,77,101,67,86,40,66,82,81,41,31,58,84,71,30,53,56,89,64,71,93,54,48,85,87,76,87,65,90,69,53,71,96,91,66,94,86,68,75,80,83,102,80,59,59,51,47,53,55,70,50,65,78,69,39,41,61,32,47,99,58,62,62,31,48,68,72,41,57,54,50,37,75,57,53,52,43,49,67,69,46,68,54,52,55,32,35,40,68,66,35,29,49,44,55,53,44,83,51,49,47,55,49,8,28,82,33,48,42,47,34,32,52,58,49,50,41,38,35,31,62,22,54,35,40,20,41,42,30,24,40,59,47,8,59,54,41,49,24,12,36,37,36,28,36,42,15,27,42,52,54,41,32,54,25,44,31,58,24,22,36,39,46,16,66,38,46,54,54,22,32,32,11,42,17,20,50,11,34,18,40,36,37,29,30,28,33,49,30,57,68,69,57,54,50,61,76,61,53,54,69,55,39,58,25,54,57,74,45,72,61,74,73,63,69,84,75,61,60,101,98,104,103,105,102,103,87,92,71,58,77,115,116,68,45,30,30,35,33,51,30,13,23,49,29,29,41,16,19,37,41,24,23,22,39,66,22,34,46,24,28,17,15,19,53,47,47,67,41,38,58,29,35,59,45,31,67,35,67,36,57,67,71,82,90,113,124,133,121,126,147,121,129,96,107,130,114,164,158,168,172,162,168,149,130,138,138,142,168,149,144,169,173,170,150,162,159,147,139,160,151,157,160,163,180,185,154,147,150,152,145,155,145,146,183,161,157,152,152,136,159,126,136,158,154,159,147,148,148,156,127,147,139,145,159,146,131,139,141,156,160,154,158,166,144,139,137,147,136,169,128,140,143,151,157,132,114,157,151,148,124,122,104,98,136,127,162,150,148,164,148,152,115,0,3,17,10,35,20,35,1,29,23,3,6,16,1,27,11,19,13,37,28,0,4,36,22,33,31,30,33,23,9,18,36,27,14,33,25,23,25,54,48,7,26,43,23,31,28,12,18,13,13,12,29,15,17,13,25,18,27,22,41,22,17,24,41,4,32,35,19,32,18,10,4,15,26,46,29,18,6,49,25,36,12,11,23,28,33,42,18,19,33,36,19,29,30,9,29,21,11,33,30,32,9,11,32,21,34,19,32,40,52,52,51,33,50,32,52,53,33,23,17,29,63,26,37,46,64,56,35,64,52,46,22,35,30,48,70,30,43,55,65,54,48,41,79,62,54,58,69,73,102,91,82,95,83,94,84,85,87,99,102,153,159,148,128,102,120,152,148,121,159,136,139,114,142,127,116,145,132,138,142,153,129,141,147,137,123,122,137,129,118,127,121,124,130,141,114,168,131,133,140,162,122,137,147,148,133,129,117,134,104,141,115,130,117,133,144,131,116,122,111,125,132,120,125,110,150,146,146,148,98,130,142,131,144,134,121,142,140,144,148,142,137,143,131,130,153,144,137,130,148,149,143,161,141,133,149,148,158,143,160,151,127,113,114,118,132,155,130,114,135,144,124,137,147,152,122,124,117,124,136,136,127,129,137,116,140,107,137,106,143,171,147,159,136,122,150,114,159,123,130,138,139,143,150,129,144,131,113,133,127,104,116,105,103,74,91,116,83,100,108,117,106,99,111,85,119,82,65,109,97,95,108,96,106,79,86,83,101,80,65,52,61,62,55,94,96,116,89,90,111,95,67,81,86,88,49,69,109,78,80,75,72,78,86,94,67,65,104,98,83,101,76,48,71,68,67,64,56,80,37,63,28,56,42,83,48,69,49,62,46,46,32,65,59,58,48,45,48,34,58,75,42,60,48,72,38,63,46,60,56,30,26,37,15,52,44,44,23,40,36,52,29,54,38,58,53,61,46,48,43,52,44,31,52,64,26,39,62,31,25,49,53,39,55,50,63,51,46,23,59,15,39,52,37,51,42,45,25,50,44,47,32,35,41,53,47,66,50,32,47,36,24,28,50,42,48,45,26,40,26,47,42,38,57,29,33,23,33,38,28,43,44,61,50,45,31,66,35,29,41,24,43,47,48,40,55,60,37,24,42,45,67,20,70,57,41,36,48,44,39,64,51,54,62,55,73,48,54,60,58,65,49,58,44,37,104,50,94,99,94,86,52,105,127,105,85,88,89,97,99,100,68,87,79,88,83,66,65,52,72,70,55,19,2,51,21,20,27,36,4,15,31,27,47,51,40,38,30,27,20,31,30,25,41,42,55,15,38,22,61,17,20,36,31,48,43,29,50,36,35,57,62,65,70,39,56,52,37,60,80,85,73,91,120,125,121,139,147,123,137,150,143,105,135,135,173,148,144,140,171,139,158,175,166,148,164,154,151,140,171,155,152,174,163,173,160,179,147,141,167,165,167,167,143,168,140,160,183,168,153,164,166,157,179,164,170,161,164,178,165,162,165,163,136,140,156,136,158,138,185,188,164,159,143,129,155,143,126,170,128,137,148,149,154,165,168,160,174,138,147,123,155,159,150,129,153,140,148,153,73,68,91,116,120,158,163,189,193,157,114,12,10,11,13,0,14,7,19,31,11,11,5,4,0,15,5,9,0,33,10,12,22,14,24,30,23,15,14,31,33,27,16,22,24,12,20,8,25,9,22,30,39,22,16,19,22,4,24,3,7,30,28,24,50,31,14,36,22,37,17,15,14,36,15,13,30,22,13,30,26,25,35,23,18,30,35,14,14,37,34,9,5,35,15,43,10,12,28,26,48,26,23,4,20,8,45,7,18,46,37,21,20,47,29,10,42,25,51,42,23,23,30,27,10,43,53,47,45,61,31,23,36,30,26,32,43,47,63,58,55,53,41,29,63,59,67,64,37,36,64,30,56,70,49,84,71,84,83,69,66,99,139,102,92,100,77,80,81,81,107,90,121,117,137,126,146,125,107,158,95,124,128,129,115,130,140,125,141,152,175,123,143,138,125,122,95,112,156,96,134,117,140,131,116,107,123,137,138,141,128,157,148,142,158,124,147,134,150,134,127,130,95,136,133,168,169,142,140,143,165,130,126,111,111,143,143,134,148,119,136,139,132,145,145,140,155,139,129,132,160,160,113,141,146,156,137,115,135,134,118,125,135,131,109,119,139,150,137,130,126,115,90,92,82,124,115,122,134,125,114,121,125,116,126,116,116,100,111,165,123,129,114,126,126,145,126,98,105,129,113,140,152,141,151,117,108,137,111,125,116,116,124,155,110,120,131,110,150,93,77,90,114,81,72,80,73,48,87,94,93,72,115,73,100,75,76,115,74,123,90,103,115,135,89,50,73,90,58,106,74,72,57,48,77,61,102,90,80,56,87,94,28,69,44,57,62,42,61,63,50,61,56,68,66,84,69,70,80,75,82,39,66,73,61,76,72,81,58,45,65,93,75,48,55,31,47,33,50,43,45,73,68,46,30,74,62,49,48,33,45,53,45,42,67,22,60,46,31,49,56,34,55,51,35,50,58,80,39,42,58,62,45,40,46,13,29,41,36,37,14,54,24,68,49,55,50,39,53,32,45,46,40,36,42,66,41,59,20,31,37,50,59,31,51,32,34,53,36,40,33,38,45,19,37,23,45,21,25,33,22,36,40,53,16,34,48,27,44,29,46,43,47,26,61,22,45,40,40,60,50,26,58,65,62,15,34,31,31,36,47,32,57,29,32,63,43,32,30,40,58,32,42,52,37,58,33,48,31,62,51,30,34,64,53,55,34,46,85,73,48,62,30,58,81,71,61,80,69,87,76,59,55,80,67,77,69,50,78,56,82,71,69,57,66,62,71,51,47,54,69,68,64,51,30,35,30,12,23,8,16,31,52,32,30,40,18,20,41,20,59,29,37,22,44,41,24,59,24,55,32,45,56,46,33,38,57,48,47,52,46,45,54,64,63,82,36,75,57,47,91,70,62,88,84,113,113,84,95,121,143,144,124,133,139,145,123,146,165,135,117,134,138,166,175,150,136,150,144,156,159,130,164,150,151,178,167,136,156,149,116,154,160,139,154,165,136,145,159,147,158,144,154,134,178,153,156,150,170,165,144,128,147,121,134,160,140,171,149,172,143,155,143,169,165,153,195,171,157,132,145,158,148,139,162,135,151,159,158,137,170,153,160,130,148,152,171,165,132,123,105,93,67,77,111,141,170,154,143,160,110,11,15,34,7,21,11,26,5,19,0,21,24,12,5,19,5,13,4,10,23,3,19,9,8,9,39,50,38,23,30,15,16,21,26,27,33,24,28,24,27,14,9,26,43,4,11,22,28,23,17,5,9,23,38,24,33,23,15,16,22,36,0,11,31,11,31,48,10,12,13,13,18,17,42,26,16,14,47,26,4,28,4,34,39,14,31,19,13,19,19,31,22,26,39,48,3,20,20,43,46,46,48,11,9,39,35,27,12,28,38,14,12,22,40,49,58,35,18,62,45,27,42,80,44,60,55,72,73,24,61,46,67,64,38,41,43,37,57,97,57,75,90,95,100,106,100,99,124,64,82,74,120,79,89,98,72,83,71,87,61,121,111,136,120,142,141,143,120,133,124,127,124,140,113,127,135,128,113,140,131,123,100,126,115,124,113,106,128,124,148,122,154,123,114,136,141,122,125,118,100,140,119,131,152,134,129,152,134,136,156,132,117,146,134,146,124,139,133,121,131,121,123,117,150,132,119,114,120,146,113,168,132,139,132,157,142,155,142,144,144,122,124,134,135,117,125,131,134,122,130,112,122,121,137,114,102,133,125,138,118,134,105,137,108,123,128,109,109,121,110,132,140,152,147,137,135,129,98,112,103,111,102,114,89,136,109,105,112,125,124,147,125,123,85,120,103,125,134,105,127,147,136,125,129,132,96,85,95,121,106,83,99,116,80,79,82,97,90,104,72,78,89,86,81,74,77,74,36,65,32,54,78,75,104,65,95,62,60,53,46,52,61,62,63,56,63,69,71,90,55,51,87,70,43,81,65,59,54,52,71,65,49,80,39,51,50,62,45,72,68,63,50,69,49,53,43,48,41,64,59,82,61,50,69,37,66,46,58,71,32,51,43,59,34,67,55,68,65,59,20,35,46,50,48,47,34,29,57,57,55,64,57,53,49,48,61,57,66,32,49,40,62,46,60,47,63,42,49,51,76,56,54,51,41,44,75,37,42,57,64,34,50,32,43,59,35,54,51,59,38,40,60,44,32,34,57,35,48,51,41,64,60,51,54,52,1,58,44,66,38,30,43,36,74,73,48,56,30,38,25,49,37,47,57,38,57,50,58,52,74,44,47,14,10,29,29,33,64,36,37,47,44,42,53,48,30,34,41,34,35,47,32,54,68,34,36,34,32,45,51,65,45,72,71,64,70,68,60,70,54,59,48,40,48,72,74,70,44,64,43,72,69,63,71,89,75,81,55,66,63,56,53,62,42,69,74,62,66,53,62,32,27,37,38,44,28,33,51,44,29,40,26,54,29,24,5,31,34,26,56,52,34,29,22,28,39,7,69,31,32,47,52,60,54,48,54,68,30,58,58,57,38,52,70,66,68,59,52,77,72,95,101,65,51,71,84,90,89,95,116,137,106,125,149,132,149,134,144,153,144,199,155,133,181,151,144,143,150,128,127,129,121,153,132,145,144,126,166,152,153,139,160,161,146,161,167,156,162,170,154,158,145,147,178,150,179,158,153,143,159,157,133,145,138,173,180,141,140,143,155,147,168,164,137,160,170,170,148,153,137,151,153,142,165,149,180,141,145,135,132,144,145,137,140,127,141,121,155,175,137,136,103,89,76,95,129,145,162,163,106,26,0,9,37,16,10,2,12,16,2,14,32,19,5,7,15,13,12,42,12,20,0,3,9,17,30,48,1,28,26,18,7,11,52,36,22,31,9,29,27,7,32,5,11,10,28,13,36,23,23,11,34,30,18,23,11,39,26,2,11,12,17,38,5,15,17,11,11,4,10,9,8,15,27,24,35,35,37,5,37,18,26,2,19,23,19,40,32,12,26,33,39,46,30,24,45,13,49,26,18,25,42,35,27,36,11,33,36,36,35,27,28,5,35,19,57,53,35,28,52,46,39,29,35,65,59,58,72,56,35,54,33,77,49,53,49,35,72,68,74,79,94,66,77,88,96,71,79,80,56,99,80,67,88,64,88,94,110,117,105,108,131,123,118,152,134,126,148,128,131,132,107,126,138,140,101,120,128,119,117,132,125,155,136,104,153,133,130,119,136,164,126,160,144,143,124,101,125,144,113,120,144,143,143,101,111,105,129,124,162,127,133,133,127,134,126,125,87,132,127,131,126,97,134,132,133,133,162,127,134,142,142,135,143,119,131,135,124,144,139,126,130,131,143,129,118,112,152,153,139,112,132,165,135,155,139,115,135,151,150,167,134,134,130,155,121,132,120,127,128,117,125,133,129,156,115,123,134,120,116,121,107,147,134,121,119,130,137,130,130,156,117,128,115,104,129,141,141,122,99,137,115,139,138,144,107,89,125,129,118,123,105,130,153,115,123,154,126,114,84,101,80,97,103,87,59,79,89,63,75,63,95,86,86,83,64,73,83,74,100,100,102,118,97,81,86,96,67,56,102,85,96,54,31,73,61,67,65,81,73,84,56,51,76,52,79,40,53,46,54,56,59,41,78,57,63,29,59,47,66,60,59,51,61,46,58,69,55,45,66,46,49,56,56,77,43,42,74,48,24,24,55,49,46,54,27,55,59,56,32,62,41,52,58,58,40,52,61,42,35,48,61,39,49,26,44,36,45,32,25,69,47,57,16,52,42,44,41,66,46,46,44,34,28,59,52,63,39,42,48,58,51,36,53,49,58,55,66,50,57,26,26,43,58,25,24,52,30,73,21,54,30,52,48,46,57,31,16,48,66,18,48,40,32,41,29,49,56,48,30,26,59,29,44,36,33,39,47,38,58,23,47,54,37,40,24,38,41,78,43,61,15,49,26,43,37,52,40,37,39,33,53,57,62,50,45,47,81,70,48,37,56,52,39,49,51,69,86,46,43,64,79,88,45,86,88,55,95,97,107,108,93,79,58,45,75,59,54,24,75,28,23,45,47,9,24,12,39,31,26,48,20,21,61,46,52,31,35,60,53,52,43,29,19,54,41,36,40,32,42,21,34,33,60,40,52,45,63,38,37,68,59,61,49,62,22,46,53,77,69,88,77,52,56,51,92,82,92,97,125,136,144,132,162,121,132,162,171,151,157,148,171,146,181,173,177,133,156,166,170,186,140,151,149,157,148,150,126,157,115,167,183,162,116,125,148,171,162,157,151,165,148,158,163,131,154,159,134,109,147,169,168,150,166,166,137,144,142,149,174,145,149,139,142,145,138,158,160,158,156,159,139,173,165,161,154,142,146,131,155,144,130,137,160,146,133,136,142,142,148,144,120,78,84,78,106,123,127,164,102,16,18,15,11,12,11,11,18,12,35,20,21,13,4,20,33,15,9,10,20,3,25,11,25,38,32,34,5,48,16,40,18,11,36,11,30,7,41,18,41,30,11,23,15,31,32,25,33,9,29,36,6,10,28,32,38,35,14,6,27,10,19,7,11,6,17,20,14,11,9,14,27,10,18,38,36,12,9,29,17,16,17,44,23,34,44,38,14,41,22,19,36,30,22,64,33,17,33,20,49,34,35,25,26,28,28,12,55,15,9,39,61,3,39,44,29,16,33,51,40,58,24,24,51,45,46,77,34,76,84,71,71,82,84,73,91,73,87,90,76,90,79,95,103,66,90,57,70,55,95,71,82,92,107,135,104,134,126,111,113,132,137,132,132,122,129,118,98,125,132,134,127,110,135,122,117,148,153,167,152,144,170,152,139,152,153,147,147,125,107,156,129,125,133,127,136,141,129,145,134,143,136,162,142,151,134,118,123,147,142,126,141,149,115,124,144,104,137,124,166,149,128,158,159,145,149,153,170,127,120,154,131,127,114,103,142,101,129,134,160,124,146,120,109,147,118,126,87,110,124,137,134,145,108,118,123,137,128,150,149,164,163,133,130,137,146,123,143,146,146,133,128,138,122,144,138,121,104,128,134,124,120,138,125,123,136,143,181,118,116,137,138,123,149,138,130,160,101,110,107,105,113,140,143,138,140,144,170,148,165,147,142,151,129,125,143,151,128,134,112,117,83,132,111,133,105,119,110,98,64,70,104,86,113,86,69,76,70,96,95,121,103,70,93,76,67,53,88,47,86,37,61,75,69,55,85,83,64,35,60,74,80,69,78,44,59,95,51,50,58,65,39,53,42,55,62,56,31,62,38,63,34,61,55,56,64,43,34,64,46,26,60,53,43,59,49,48,23,40,30,36,38,53,43,29,74,48,61,67,35,35,44,44,48,49,29,53,42,52,55,59,53,73,57,42,71,59,45,53,36,43,46,42,67,51,55,47,36,13,54,26,28,46,53,30,30,43,35,37,37,37,43,70,49,42,56,38,31,50,47,28,29,33,36,37,13,35,37,58,46,25,54,56,53,44,36,79,45,36,56,35,39,66,46,75,46,62,40,33,45,34,33,37,40,49,30,59,51,25,31,40,60,55,50,29,41,14,17,50,62,46,65,66,35,37,33,49,43,82,67,38,45,50,29,42,33,46,38,41,38,54,44,43,60,38,46,42,69,52,65,75,72,55,72,87,76,86,93,72,106,113,89,63,58,30,62,41,49,46,74,60,53,52,25,32,39,44,44,25,38,43,38,37,49,30,37,13,29,32,23,35,4,35,26,36,56,40,52,38,32,44,30,40,59,50,47,42,57,47,51,70,35,62,51,44,66,59,86,67,50,52,60,50,55,55,107,105,104,118,126,151,141,155,184,168,121,181,139,181,168,165,144,170,171,168,159,163,169,160,180,152,191,163,178,148,138,166,147,164,191,157,176,168,131,132,155,170,169,182,156,150,156,178,173,160,189,153,176,162,153,148,172,152,159,148,142,145,165,171,150,156,113,123,141,139,157,173,163,149,148,139,181,119,176,140,168,138,181,161,155,166,170,138,140,139,134,120,177,150,162,166,172,159,121,75,81,114,82,126,119,38,19,14,16,7,23,23,2,25,44,13,20,27,12,4,31,13,14,33,7,16,15,41,25,4,9,46,29,33,14,14,18,49,34,24,34,19,22,32,18,5,16,21,14,28,20,9,30,5,11,31,14,22,35,17,31,11,16,17,4,8,26,18,22,47,24,16,15,20,9,42,25,30,46,13,7,18,19,34,33,17,10,33,29,31,11,39,43,25,34,25,29,17,30,20,5,22,36,30,38,45,64,45,33,33,28,33,33,48,30,51,66,27,35,40,29,44,54,55,48,50,29,50,24,56,45,73,67,94,103,93,99,107,122,88,112,71,101,76,81,71,57,115,68,133,94,96,84,104,123,115,118,137,138,109,116,121,132,138,131,109,138,127,102,130,107,118,128,93,134,128,137,121,114,126,117,157,167,126,154,137,143,123,137,116,117,129,125,136,140,132,132,119,146,131,164,128,133,141,137,137,125,161,161,159,162,167,148,155,139,141,138,144,140,140,141,142,158,157,123,162,152,155,157,169,151,137,132,145,137,129,157,122,146,114,113,109,96,123,115,122,131,140,157,129,113,137,103,158,110,138,150,149,118,142,150,142,147,137,155,140,155,128,141,153,148,129,148,141,126,152,165,147,159,133,114,148,131,148,159,131,112,118,127,99,156,134,126,131,112,105,101,140,130,131,116,148,127,125,114,154,155,137,115,106,139,153,159,114,118,132,113,107,140,135,85,88,118,125,79,88,98,66,62,79,114,93,69,112,99,59,56,92,59,74,94,82,89,79,76,49,72,61,28,50,43,46,60,51,65,49,63,44,51,84,68,60,66,58,57,60,71,64,71,65,72,73,44,65,74,80,71,57,55,66,61,19,64,70,87,50,51,54,65,22,50,28,64,65,44,49,56,32,39,57,43,20,55,33,45,50,47,46,91,50,28,62,67,68,67,39,32,49,47,56,34,38,52,87,22,50,57,47,29,50,33,38,51,46,38,63,43,55,42,41,48,42,58,32,43,52,57,36,57,52,67,50,36,34,38,24,49,41,56,42,49,45,35,46,34,46,46,31,26,46,55,53,47,43,20,33,52,60,55,29,51,21,41,74,52,59,39,33,56,63,54,59,56,13,36,44,19,37,31,22,39,43,12,62,12,16,51,46,28,29,29,37,40,47,48,52,41,52,46,43,40,44,44,39,38,57,34,46,34,33,33,49,50,68,46,37,69,39,62,58,67,57,71,60,60,52,67,70,69,75,76,59,100,32,76,42,78,58,67,81,48,52,63,35,54,39,57,34,51,49,18,25,42,55,45,31,45,33,24,14,7,60,50,28,32,51,31,39,45,17,46,32,24,23,82,40,19,25,41,61,8,29,32,62,32,53,68,51,48,67,60,43,63,40,41,77,32,51,44,53,69,83,70,88,124,149,135,154,173,150,145,140,144,173,168,162,140,165,185,187,164,159,167,170,190,185,176,166,155,153,153,160,148,143,177,165,156,168,144,149,140,157,158,157,152,161,154,135,145,186,141,176,145,140,153,130,172,164,156,160,160,164,163,143,153,166,122,151,152,130,137,147,156,148,140,143,159,141,129,137,157,152,163,158,163,164,154,153,159,126,158,122,124,160,111,149,168,168,132,117,79,99,102,119,103,29,14,17,13,35,12,1,35,18,15,13,2,20,20,19,0,1,17,9,7,19,13,3,22,26,36,33,22,35,15,28,19,24,26,32,29,51,24,23,37,12,28,6,13,15,6,31,32,35,15,9,15,23,8,21,20,26,45,20,18,56,48,19,18,13,24,20,36,25,2,12,20,46,46,34,20,23,37,7,27,21,33,33,39,32,39,5,28,45,26,16,21,12,19,36,44,41,38,30,33,7,42,25,51,36,38,31,13,58,19,11,23,23,44,43,53,50,52,50,31,39,75,39,52,50,58,55,80,71,72,71,79,86,80,65,97,73,74,84,48,62,34,62,98,91,94,98,143,105,95,120,157,138,141,115,143,128,125,124,124,138,128,116,141,148,123,134,145,151,136,108,154,126,149,156,164,130,112,135,130,127,105,104,142,109,113,122,128,141,126,130,135,119,141,114,111,132,141,140,139,127,133,162,134,138,122,152,132,157,147,150,138,122,136,109,156,137,138,122,154,154,148,152,155,168,153,125,130,142,94,109,126,134,135,133,133,114,147,120,150,130,160,136,110,163,161,139,159,159,132,135,131,155,166,158,150,125,107,137,134,137,154,165,110,123,140,131,119,144,147,137,140,159,117,142,137,168,190,161,121,131,114,137,140,136,137,156,157,160,168,130,160,133,101,107,102,91,98,140,125,115,108,118,74,94,132,120,104,67,94,122,96,111,99,89,89,74,81,86,93,83,91,70,44,78,91,120,100,114,76,80,89,68,61,85,56,74,80,94,92,74,63,81,60,61,40,53,31,60,59,45,53,47,38,64,90,54,50,45,86,26,72,70,58,43,41,37,71,58,38,85,58,49,79,56,71,51,75,64,56,48,58,54,47,56,49,58,63,66,60,65,65,40,66,43,74,54,46,56,59,47,53,50,33,59,56,47,74,46,56,59,49,64,18,65,55,52,28,59,42,56,54,64,68,48,71,47,24,47,67,44,62,26,57,60,23,69,28,32,43,46,51,31,26,48,16,42,49,34,69,51,47,48,22,26,42,64,29,40,41,47,53,42,47,30,15,38,19,27,54,48,28,26,37,29,45,54,27,41,27,36,36,40,35,13,32,48,24,40,49,39,40,25,68,41,51,27,46,22,51,53,28,55,34,46,43,62,36,54,40,44,45,55,52,37,43,55,46,53,58,61,39,56,31,37,47,39,61,73,62,58,60,43,54,46,54,59,70,73,51,92,70,76,102,106,74,58,42,48,56,55,64,38,68,68,75,40,61,42,37,56,52,65,46,49,29,30,50,56,37,27,50,29,44,44,21,11,40,51,50,26,38,25,50,40,14,53,53,28,52,48,41,42,65,70,47,38,45,54,22,63,70,60,47,27,62,52,46,61,50,89,52,83,77,64,63,62,49,64,91,112,82,108,122,145,137,136,119,161,160,129,128,145,148,175,147,123,160,134,188,166,161,167,169,164,191,151,148,158,141,157,172,145,159,142,124,150,157,160,159,148,145,166,168,136,140,143,114,139,130,168,160,149,181,175,156,168,160,145,163,171,155,164,163,140,160,141,178,160,165,154,170,156,163,138,130,147,153,144,126,163,148,155,147,136,137,143,143,155,156,153,143,173,144,130,122,116,78,93,81,16,21,32,0,0,9,7,26,6,12,17,18,22,6,18,18,7,8,19,6,14,17,23,14,27,31,11,39,48,25,20,15,38,29,23,31,11,51,31,32,13,30,20,18,11,21,13,12,6,24,34,9,43,13,27,18,43,3,29,36,35,20,29,16,26,34,25,29,30,12,28,29,21,25,40,31,20,26,25,39,23,25,11,34,30,22,20,40,49,49,32,11,38,40,17,40,24,30,16,38,52,24,29,40,45,35,35,48,40,48,32,40,22,25,29,34,47,57,47,67,41,32,63,24,40,52,57,47,46,54,48,28,36,40,79,53,85,47,28,55,15,48,54,71,58,90,104,86,109,137,130,118,138,126,139,144,167,164,148,162,141,169,154,149,129,124,138,141,126,124,152,145,158,154,141,138,149,126,134,131,122,98,113,112,141,142,121,135,131,160,161,150,164,167,140,149,140,144,114,141,128,108,124,125,128,109,159,142,123,131,150,131,143,125,151,126,113,130,101,140,141,108,141,141,113,144,141,134,145,171,134,142,143,140,157,134,114,158,147,154,141,141,122,134,132,143,141,167,158,143,138,130,102,124,119,132,139,136,146,151,145,161,134,134,107,115,110,135,135,129,178,120,142,125,114,134,143,163,148,151,162,164,151,144,146,112,156,132,138,118,149,158,130,129,126,101,117,110,113,112,83,112,126,115,92,92,84,84,60,73,104,69,118,129,99,103,89,92,95,59,107,99,121,97,92,90,90,86,111,95,101,97,93,13,56,49,68,53,86,80,69,65,73,82,120,45,92,78,94,77,73,65,73,61,46,69,39,58,68,54,68,57,53,41,43,31,66,51,76,37,60,57,41,46,57,55,47,70,59,60,55,56,52,56,32,39,39,56,46,54,70,52,39,62,46,40,65,54,25,57,48,65,44,62,69,42,28,49,67,44,66,61,49,47,38,75,66,57,62,68,64,54,40,47,38,56,36,30,43,19,50,39,41,36,46,43,60,52,51,31,40,53,45,55,51,17,53,18,35,47,53,43,54,54,45,27,29,18,19,49,50,13,36,29,55,46,34,59,47,49,45,47,45,47,41,58,28,58,44,23,55,33,45,64,53,37,23,45,28,54,56,20,58,52,54,25,42,51,66,31,62,53,32,34,29,38,67,33,52,23,44,37,50,43,48,44,50,45,57,52,58,42,52,44,50,49,58,58,58,67,52,21,37,46,60,46,43,44,49,51,38,69,53,59,68,60,59,47,73,41,73,70,88,69,71,53,55,44,44,39,33,53,47,29,29,25,17,56,46,20,34,31,30,61,51,56,25,62,39,31,45,44,42,31,25,31,31,55,29,41,37,42,50,46,47,51,53,57,47,55,48,28,58,39,71,52,39,61,60,54,84,60,90,62,75,74,52,73,92,107,140,106,125,105,126,130,121,130,159,178,150,138,127,138,169,216,132,150,177,167,145,151,164,161,155,156,138,151,168,161,161,144,147,133,143,156,167,140,129,156,132,140,156,157,122,121,134,156,149,172,193,179,158,150,153,132,173,161,154,178,170,173,130,136,138,163,121,165,132,147,152,186,164,182,126,156,144,154,172,157,156,166,158,127,161,131,135,141,153,157,156,133,163,145,146,143,118,100,76,65,25,11,40,0,11,37,46,12,2,24,7,27,22,31,20,12,24,5,17,8,27,26,11,9,17,16,15,20,43,8,17,11,30,31,28,43,33,37,15,21,28,21,29,48,19,7,9,14,29,30,10,22,25,21,18,31,43,19,31,11,38,38,32,43,22,32,17,43,22,23,37,27,32,8,18,25,23,32,28,10,21,20,36,23,22,33,7,50,24,45,30,47,42,31,34,45,38,30,27,34,22,36,51,24,48,50,35,33,49,69,16,18,31,25,50,42,54,55,58,65,35,34,75,41,50,47,60,44,62,58,44,27,42,40,59,76,70,55,57,41,75,65,41,80,45,85,94,130,136,108,102,99,135,95,120,137,156,147,191,155,148,113,144,117,98,133,142,143,124,143,143,134,125,143,156,117,121,123,139,150,127,155,159,121,145,152,134,136,103,132,131,127,133,154,156,145,138,143,150,139,142,120,131,130,135,146,127,123,128,131,131,135,138,132,111,134,153,111,118,126,130,121,133,131,129,158,151,128,171,152,135,135,148,157,149,153,151,157,139,133,128,140,134,126,145,122,158,165,144,133,113,132,142,143,151,180,140,111,127,131,137,137,123,117,135,120,126,118,115,117,126,168,126,137,138,146,103,81,129,105,115,120,100,115,102,92,100,94,94,103,80,101,100,142,131,129,117,129,137,124,131,149,141,143,122,139,113,106,106,92,120,121,137,99,101,94,122,146,111,100,110,136,129,130,114,80,74,75,49,44,76,91,71,57,93,60,65,74,62,54,69,62,67,57,98,104,88,89,93,58,72,75,63,76,72,84,77,78,76,44,28,68,76,65,57,72,68,72,52,55,74,40,37,71,60,56,45,44,51,21,38,42,36,41,72,35,44,55,30,42,43,65,29,42,53,38,54,81,56,65,61,48,36,67,56,45,62,37,70,49,52,65,54,58,48,44,57,34,22,47,45,54,44,63,33,34,59,56,59,51,25,56,29,55,53,62,57,42,50,18,51,40,23,35,31,48,48,26,53,47,45,35,30,39,22,44,36,40,40,64,40,73,48,62,36,43,27,33,39,56,50,66,40,41,47,44,38,50,35,49,47,48,36,45,28,27,12,46,38,38,29,35,38,62,55,43,5,45,44,47,74,41,42,40,47,47,42,21,42,66,35,72,28,32,54,60,65,51,66,58,38,41,52,67,72,41,63,50,41,32,34,35,43,56,23,56,48,51,57,54,40,49,66,44,43,43,59,58,54,36,87,57,49,77,78,64,67,59,30,55,66,21,28,37,26,53,32,37,30,30,25,34,37,30,27,36,40,30,52,36,64,26,47,38,44,32,50,46,11,21,57,43,43,40,31,43,64,53,37,33,54,19,58,69,50,55,66,51,57,77,87,86,70,94,95,89,102,104,109,142,133,119,132,128,164,135,158,156,160,172,182,167,153,163,161,195,151,171,155,150,155,162,138,144,150,163,150,130,167,155,164,129,165,127,171,138,173,142,164,133,180,165,162,164,161,167,164,173,168,155,166,162,176,169,163,163,138,166,166,167,144,153,160,167,141,134,164,150,145,153,164,154,121,136,141,129,139,153,140,155,149,137,136,120,153,134,147,134,143,134,149,151,123,134,152,136,146,116,82,80,30,5,27,6,26,17,10,1,11,33,14,9,33,0,22,13,1,14,39,5,33,32,22,21,34,19,22,21,45,37,8,18,31,19,8,34,32,6,31,28,22,21,17,40,41,15,16,14,32,14,22,14,13,19,29,26,22,9,17,26,33,26,40,18,43,24,3,15,33,40,29,15,48,21,32,30,50,31,41,23,43,31,23,21,29,29,9,20,29,9,31,11,22,42,51,36,47,41,47,57,26,26,18,54,58,15,32,78,57,54,65,55,87,57,52,54,67,91,99,83,74,85,102,118,94,111,157,133,138,126,135,140,113,107,113,107,82,63,91,63,66,97,110,95,98,87,88,97,74,86,79,90,99,108,93,83,75,97,110,97,108,113,127,143,123,119,127,146,131,125,138,151,159,155,156,161,122,113,145,150,137,139,130,137,141,173,156,132,146,145,141,153,120,155,142,160,159,158,154,161,121,126,138,126,125,142,143,138,136,169,166,134,152,126,146,163,170,158,146,164,154,140,155,154,143,152,192,152,142,156,170,132,130,128,119,120,89,123,111,125,141,137,122,144,130,132,162,150,136,116,111,145,165,150,142,163,158,163,147,141,117,96,107,156,138,191,156,155,175,165,115,108,122,129,147,163,165,129,161,179,154,170,131,151,145,176,149,145,139,149,148,148,135,107,132,122,133,128,132,149,127,120,120,112,130,136,143,98,93,127,158,123,117,145,151,114,101,151,144,143,122,98,97,126,70,97,118,115,170,130,94,58,57,91,105,68,98,112,113,90,85,104,100,88,82,61,78,50,44,61,46,65,78,80,72,76,73,64,71,47,92,93,85,90,82,52,96,75,85,51,98,64,58,63,55,79,56,84,42,27,73,51,44,46,56,39,54,62,58,26,65,62,22,42,38,30,45,70,18,32,17,37,78,32,61,58,48,60,45,25,54,37,41,55,46,75,57,27,54,45,42,64,43,43,59,69,46,28,60,77,30,40,33,59,32,36,32,54,50,50,41,32,49,34,32,65,50,53,29,41,39,37,64,49,21,39,47,49,55,53,58,25,64,44,51,46,50,53,42,38,37,41,45,14,64,70,43,40,27,30,46,52,66,29,46,37,40,59,44,36,28,46,45,32,36,25,10,51,70,31,36,55,46,36,20,36,40,29,52,41,65,29,44,59,57,57,37,35,76,32,25,54,62,52,59,62,39,42,45,58,54,44,31,52,56,30,37,29,29,49,42,32,50,52,44,32,75,45,48,44,46,68,47,53,66,58,48,41,78,43,85,50,32,67,47,40,66,42,43,36,27,39,57,25,51,31,20,23,49,50,53,48,35,39,40,29,76,41,66,18,33,28,58,47,56,11,29,42,48,42,52,40,16,55,57,46,53,72,67,75,70,56,49,60,92,75,97,124,116,109,100,132,135,134,135,147,141,166,176,172,175,156,157,149,134,152,175,173,177,170,158,166,156,163,181,161,162,151,164,178,166,156,178,155,163,153,161,163,142,153,134,170,150,138,151,144,153,148,160,129,158,141,166,163,139,182,145,157,173,151,124,154,151,142,156,143,134,161,144,182,143,161,153,144,158,146,149,144,150,155,148,125,157,142,148,149,129,148,121,129,113,135,141,176,159,160,159,161,130,132,29,2,28,21,8,33,7,21,25,31,12,11,19,8,10,9,3,9,31,15,23,13,46,13,7,23,31,26,14,15,33,33,23,13,32,20,33,20,23,33,25,21,19,28,5,14,24,31,28,36,22,14,20,32,28,42,16,7,20,24,16,32,53,36,11,29,15,10,11,42,16,26,40,11,26,4,12,24,38,6,26,44,54,12,20,22,24,24,31,52,45,36,47,24,48,48,41,41,27,24,61,38,14,45,57,61,51,46,31,43,72,62,58,82,29,64,61,60,75,73,81,57,91,104,99,121,123,129,134,114,122,115,120,116,102,115,85,80,63,78,75,109,96,118,94,90,96,83,106,89,101,105,106,100,107,70,101,90,91,113,119,108,117,162,154,135,149,136,140,130,159,121,141,157,126,150,142,117,115,148,138,139,156,142,134,136,164,121,142,102,118,130,165,132,149,137,143,149,143,134,126,117,135,114,144,144,150,119,128,152,167,136,146,156,155,158,156,141,139,155,151,142,150,141,146,155,178,161,171,157,152,139,146,122,138,128,113,137,126,144,130,151,125,148,128,146,145,149,104,104,146,140,137,182,147,140,170,174,140,152,122,117,133,142,145,151,141,159,123,174,126,122,139,147,144,166,135,130,137,177,163,133,148,121,146,158,136,139,159,137,158,153,128,115,123,123,138,145,148,113,105,100,92,118,121,121,124,119,111,131,112,142,138,135,94,129,110,104,120,134,142,127,112,94,95,99,127,124,136,163,111,77,84,80,96,87,132,106,103,94,102,113,93,77,88,84,79,36,54,65,74,72,65,55,51,65,48,47,59,72,77,86,86,62,93,55,82,83,95,79,64,89,73,48,70,78,20,47,37,37,55,54,38,72,46,42,48,45,48,44,59,47,62,53,42,34,58,50,62,58,40,42,78,67,54,39,35,55,47,35,42,36,37,43,45,57,45,67,67,60,30,40,61,47,65,48,55,46,51,77,49,58,48,70,28,49,52,34,48,54,45,44,46,31,46,56,28,35,56,54,42,39,57,66,38,45,32,32,60,69,23,45,50,47,36,57,47,40,67,30,39,38,41,53,52,49,40,46,17,26,41,49,50,42,84,47,26,48,44,34,46,44,70,38,39,56,44,54,61,62,40,72,44,35,42,36,53,32,19,26,13,34,65,79,37,45,37,20,40,38,49,55,46,19,50,41,36,30,46,43,49,57,41,54,24,62,48,27,52,32,65,49,53,46,81,46,40,39,46,53,53,56,69,31,66,51,44,27,52,53,67,49,30,42,46,42,36,20,39,29,50,38,50,21,29,44,20,46,35,24,38,54,34,51,43,38,27,15,37,19,51,57,52,48,57,30,48,81,42,35,70,41,43,62,25,42,58,76,52,77,79,58,84,82,73,84,88,128,82,123,105,130,164,131,149,161,161,158,170,153,166,156,190,149,162,158,168,148,164,163,140,160,151,156,168,158,133,152,171,180,169,169,154,159,157,149,143,175,141,149,134,182,156,155,154,148,146,173,172,160,141,125,129,147,139,160,150,160,174,161,153,151,137,125,151,169,158,130,160,142,127,136,136,145,130,154,137,144,154,154,143,125,111,121,136,156,145,145,115,137,138,174,137,138,132,144,138,146,165,100,25,5,1,19,8,1,5,16,17,17,39,18,25,31,19,27,13,19,21,31,17,25,19,10,8,32,27,1,26,29,29,21,31,24,16,31,17,41,27,15,22,37,23,43,23,13,26,33,16,18,14,30,13,10,4,30,16,42,21,42,21,4,4,24,14,11,31,18,43,27,21,50,38,27,58,23,43,39,67,48,39,35,25,48,22,34,27,22,40,35,19,18,21,16,14,50,36,46,34,42,22,35,43,34,73,56,49,57,57,65,57,71,70,72,57,56,76,100,89,89,91,92,118,128,98,109,121,92,135,115,109,107,123,99,114,94,158,104,124,102,150,106,110,121,124,135,131,143,149,132,139,151,139,108,126,114,125,133,120,163,169,139,125,151,152,148,158,151,162,149,165,165,151,139,132,137,139,132,158,136,123,138,153,151,145,137,143,144,126,142,138,151,128,144,130,132,154,113,141,139,147,148,130,143,160,157,118,143,142,144,140,131,127,127,134,152,142,101,148,125,136,139,137,153,153,130,144,127,134,152,135,126,151,161,158,121,139,140,149,168,173,159,144,150,172,176,132,149,131,146,157,146,148,150,150,128,139,151,152,155,146,159,143,119,143,158,141,113,139,135,134,133,126,108,85,116,119,124,121,112,122,131,124,132,126,112,124,130,127,135,124,27,4,20,12,23,20,7,5,17,4,28,8,19,4,0,31,20,16,5,5,25,28,18,1,15,35,40,25,31,8,33,4,1,22,10,16,33,11,3,3,9,7,28,14,1,11,14,15,12,16,28,23,6,11,21,7,22,24,20,5,14,5,9,6,11,10,24,15,37,21,21,26,35,18,33,12,0,25,21,3,26,26,34,11,22,4,29,27,13,11,17,29,7,8,15,20,11,19,36,21,26,15,41,38,27,46,5,14,23,18,14,11,7,24,12,13,26,18,40,34,11,24,22,20,4,31,10,23,15,18,9,9,25,18,14,31,26,11,14,16,11,17,17,9,14,10,10,1,16,11,2,16,4,29,3,3,16,29,45,14,15,36,13,27,26,29,11,3,15,13,22,11,39,1,14,3,16,6,17,7,43,18,25,0,20,24,17,7,18,5,25,2,4,11,14,30,15,8,38,28,23,7,16,42,42,7,10,23,25,6,11,16,30,14,47,36,17,28,18,15,41,21,26,16,24,34,23,23,14,15,4,15,37,13,10,23,10,0,9,25,7,17,5,7,27,15,5,15,6,4,27,6,13,36,8,12,18,2,1,14,15,13,46,8,31,35,21,23,19,10,1,32,23,17,3,18,42,28,11,3,23,18,10,45,0,15,12,37,11,3,5,31,34,26,12,30,3,4,13,4,11,10,27,1,15,10,10,8,13,4,10,10,4,24,16,5,22,16,10,45,46,45,26,11,25,49,19,19,3,10,12,13,16,24,25,6,4,17,19,3,14,15,45,40,10,16,28,12,11,10,19,14,6,51,16,29,42,13,29,1,25,26,13,10,31,5,19,6,22,30,20,18,37,11,18,25,12,20,14,12,2,15,26,11,1,24,24,21,15,9,38,38,46,8,7,23,12,15,19,18,24,2,28,5,2,16,25,30,3,16,58,19,15,31,15,26,20,11,43,17,31,26,21,3,12,8,29,8,2,24,38,24,18,10,1,20,16,32,15,7,19,1,0,7,0,26,4,2,34,8,9,32,32,26,42,26,39,37,14,51,23,25,8,43,14,14,33,33,18,6,21,11,20,29,23,13,21,18,21,26,11,35,17,20,21,48,25,16,16,33,21,28,19,32,33,37,26,48,39,27,19,12,20,45,19,21,35,49,25,53,56,20,40,6,10,21,28,56,24,31,48,36,28,21,22,70,25,24,34,36,47,29,82,58,49,63,58,64,71,40,68,53,42,46,59,74,94,99,79,97,103,128,109,78,100,112,117,100,129,104,115,92,106,91,82,144,140,131,126,127,131,128,148,141,115,138,138,137,143,124,139,114,151,124,150,147,126,159,145,155,170,139,158,136,158,126,146,157,134,164,174,137,143,98,149,144,132,129,157,149,123,110,134,126,112,145,139,161,139,137,138,145,146,131,109,127,108,125,138,148,121,130,112,131,154,123,170,155,150,137,130,126,153,142,122,131,129,117,134,148,126,162,117,136,157,143,151,103,143,156,120,150,154,157,159,122,159,130,140,144,145,152,117,164,162,143,151,160,129,147,142,151,116,144,140,142,148,130,159,152,156,124,156,120,160,118,118,99,148,186,132,125,111,113,122,122,125,123,132,124,129,134,149,132,130,118,123,128,145,125,41,0,24,35,37,7,18,31,15,20,13,18,12,23,8,39,16,23,16,30,2,23,21,17,16,21,8,12,21,7,22,17,28,27,4,3,35,4,20,30,8,3,15,19,42,29,12,31,18,18,13,11,19,24,14,25,51,11,41,8,11,10,1,19,14,2,11,16,25,19,11,11,16,12,52,19,8,18,12,18,42,23,8,12,24,19,12,17,27,28,13,42,5,24,19,12,35,7,15,15,17,12,19,21,12,35,13,35,11,7,0,22,21,28,18,13,31,21,1,5,15,14,41,8,21,41,30,0,26,16,6,17,22,13,28,8,20,30,9,26,30,6,12,34,45,42,58,23,4,4,14,3,41,7,25,1,7,10,2,20,37,24,22,31,4,29,6,5,29,23,11,13,22,31,30,33,13,13,24,1,4,6,10,21,3,13,4,30,22,17,26,19,13,30,47,32,17,18,0,18,12,18,12,10,9,5,8,18,3,35,12,41,17,13,7,10,24,28,34,24,6,39,9,25,30,13,27,10,4,18,10,31,32,8,23,39,26,14,13,35,28,18,10,25,23,4,20,6,25,40,18,38,15,11,25,22,30,12,32,20,25,2,20,16,23,9,31,20,5,22,13,15,14,23,23,21,18,12,11,16,5,1,15,4,40,18,20,18,7,21,8,22,1,15,28,30,17,38,20,8,33,17,23,14,10,1,22,45,20,19,19,12,10,25,3,3,13,12,29,7,0,23,13,1,10,18,29,54,5,10,22,12,13,20,16,21,6,12,3,5,23,19,17,10,13,16,12,24,31,20,36,27,23,1,32,30,18,8,24,27,12,24,50,28,5,6,5,1,24,30,13,33,27,18,23,5,2,31,15,1,8,32,17,21,13,8,15,34,30,7,20,22,15,4,7,10,35,14,14,41,14,42,13,34,16,20,14,22,29,26,26,8,12,16,30,24,22,10,4,0,9,21,13,8,11,20,10,17,29,12,15,14,17,20,6,18,11,19,6,11,24,22,3,26,31,0,3,12,32,8,21,5,18,20,26,9,5,33,19,25,31,38,20,16,14,5,17,10,34,12,26,8,12,19,38,28,30,17,33,32,6,21,38,19,15,3,31,22,35,7,21,3,30,43,17,14,10,40,28,34,16,30,26,40,18,6,29,9,18,2,36,25,34,16,30,3,30,14,13,28,36,38,11,38,38,51,25,8,36,21,22,17,37,17,33,38,24,50,31,28,25,20,18,30,38,40,9,31,22,14,42,33,35,40,36,53,10,47,44,34,60,57,35,49,45,70,56,39,33,31,48,53,25,40,53,35,52,49,32,60,55,38,50,65,65,56,71,101,91,91,77,98,116,112,110,84,118,110,104,102,117,128,141,112,114,152,125,142,130,128,147,142,144,134,112,156,130,139,119,147,130,131,140,131,130,119,145,106,112,103,128,112,129,123,107,135,133,110,131,154,148,130,149,148,138,129,162,120,112,148,138,126,141,124,122,139,134,126,137,121,140,103,134,147,154,115,117,147,104,110,127,153,157,147,163,172,152,137,159,152,144,128,123,144,155,136,134,131,142,115,136,126,128,123,101,120,107,113,136,138,120,113,97,85,111,111,133,111,105,143,102,89,141,106,118,149,135,122,115,106,96,149,119,123,153,90,136,138,115,123,130,148,150,122,124,144,135,124,127,116,126,118,151,138,138,144,96,136,121,138,99,98,84,79,103,94,62,71,85,66,78,81,97,101,84,88,85,121,59,102,107,100,100,132,89,64,81,82,74,94,59,63,43,62,53,50,71,93,56,98,73,68,54,53,46,36,61,70,70,50,51,54,86,42,63,62,46,89,67,67,45,77,54,68,74,72,69,71,47,67,71,51,63,58,56,70,63,52,83,57,39,40,21,23,61,69,49,46,35,39,23,47,44,62,43,36,40,49,52,46,39,37,54,62,25,14,39,45,26,67,43,25,52,65,40,35,54,51,39,49,38,37,43,34,56,43,35,30,53,51,35,47,32,28,40,30,48,40,32,23,18,44,48,48,30,30,37,34,29,36,46,44,45,37,52,42,58,45,29,30,34,33,51,42,49,35,50,46,22,44,41,50,66,57,36,15,31,40,53,52,27,43,62,33,40,37,36,33,36,22,54,46,22,42,52,40,32,58,44,22,42,36,32,55,31,42,60,43,59,73,54,41,60,24,51,51,35,64,65,46,54,68,60,55,62,69,81,68,80,90,62,48,78,71,82,68,74,42,82,95,84,82,28,48,61,86,73,42,70,60,56,67,31,27,30,33,64,29,57,43,20,48,13,28,29,10,40,32,51,31,46,29,32,31,28,64,28,11,39,56,27,39,32,42,20,22,54,47,45,74,79,46,63,43,59,52,34,51,44,85,82,77,66,68,84,70,119,126,94,111,131,114,145,148,146,141,140,128,134,156,128,147,184,156,139,158,142,122,146,153,152,150,156,149,107,166,136,172,149,171,146,164,162,150,146,166,163,163,142,141,158,181,149,143,121,162,149,147,133,168,147,150,157,127,159,161,169,160,166,149,168,141,169,146,164,168,147,152,177,174,156,144,148,133,171,138,154,153,146,152,153,118,162,146,127,137,142,174,137,160,102,85,85,86,117,152,141,174,135,168,102,9,17,25,0,15,1,9,20,13,12,23,0,23,10,30,24,18,18,14,35,6,43,28,16,11,8,22,3,30,41,21,12,9,10,20,33,35,17,33,8,34,24,46,23,49,38,30,7,27,11,26,13,12,34,11,13,49,24,9,44,10,10,30,35,27,14,20,37,37,7,20,14,31,29,26,37,16,19,21,6,43,45,30,33,28,37,35,11,31,41,8,16,32,30,24,38,13,61,40,27,50,40,11,38,16,51,33,22,12,30,28,28,12,8,36,56,63,64,47,49,35,24,40,48,56,50,71,61,61,68,58,31,64,51,48,76,82,57,59,47,62,89,111,89,89,89,97,75,81,61,57,70,91,97,101,71,100,72,84,89,110,110,115,151,132,123,121,133,137,125,161,157,99,119,117,126,134,108,121,112,120,126,146,121,134,94,144,128,122,136,134,142,149,107,141,134,151,131,130,142,124,146,134,153,136,113,107,117,123,142,165,138,128,129,156,135,143,117,116,127,123,118,129,130,86,138,95,122,129,145,123,121,147,123,140,145,176,136,114,140,110,129,115,117,108,159,118,122,130,127,126,123,116,114,139,122,152,123,136,135,94,120,133,115,138,131,113,124,106,117,97,125,153,147,130,116,112,122,113,79,130,131,99,124,97,93,124,116,104,125,115,114,106,79,99,118,104,129,117,116,123,133,147,127,102,72,90,108,77,98,105,109,94,65,41,79,92,99,98,49,83,78,72,96,60,63,77,58,58,48,81,67,111,80,23,51,48,87,65,56,51,68,85,52,49,102,55,50,69,50,38,69,64,57,33,68,52,71,49,57,50,50,54,63,46,62,52,77,77,52,57,76,39,37,71,48,63,60,64,67,46,62,59,68,40,82,32,64,72,50,58,70,40,58,39,63,51,51,38,53,66,50,59,42,42,38,43,28,35,14,28,49,60,63,50,45,32,27,65,47,60,39,52,40,46,46,24,74,53,57,37,60,50,37,49,43,43,45,49,50,48,37,43,52,29,57,52,49,39,23,67,43,49,55,36,33,25,55,45,35,59,34,36,43,40,48,39,58,26,31,40,50,40,32,26,32,28,51,60,45,36,62,70,56,26,71,50,54,49,35,54,31,50,19,54,41,37,23,35,54,29,11,40,34,37,34,47,56,29,40,35,33,54,65,34,61,65,37,63,42,75,48,52,64,52,41,23,55,53,46,68,60,68,51,23,58,45,49,49,47,90,73,69,67,75,91,56,62,29,66,77,48,44,32,64,63,42,68,70,63,53,54,40,49,23,40,63,44,40,38,39,50,39,7,40,38,31,27,32,37,22,36,4,31,42,33,16,32,30,22,49,50,20,54,30,30,38,44,47,43,46,57,60,46,76,84,40,100,62,98,97,56,49,72,77,94,65,90,74,154,139,122,137,124,124,114,153,134,152,152,152,158,179,181,138,169,119,148,162,124,145,142,156,161,138,139,156,146,153,140,168,174,160,152,178,147,161,170,166,162,152,156,154,160,130,157,120,138,166,149,149,136,145,122,160,161,132,156,155,144,161,163,173,157,169,166,150,170,143,151,127,154,166,146,153,181,161,134,138,128,182,153,143,146,141,123,170,139,128,126,113,88,77,68,78,104,154,170,171,114,14,5,7,6,23,19,18,10,13,38,33,15,28,1,15,25,44,13,19,29,44,10,21,13,24,13,28,51,16,31,28,22,15,17,44,9,14,28,9,37,9,38,24,5,10,20,4,28,18,32,13,19,15,14,28,32,25,28,21,17,5,26,15,41,27,16,12,23,19,27,14,10,28,49,3,42,60,14,4,41,26,29,24,11,28,40,38,21,17,12,32,42,20,20,33,45,30,23,29,30,39,48,26,36,12,18,23,39,24,41,10,28,11,32,30,52,25,11,82,41,48,40,51,34,79,31,54,28,37,31,36,20,69,47,39,67,49,66,67,93,77,89,83,89,72,90,84,70,68,76,79,100,112,106,80,107,95,108,111,91,131,123,131,123,132,137,146,126,135,112,108,155,119,106,146,126,138,127,127,103,96,145,138,134,121,109,126,150,126,149,136,124,147,159,128,128,113,123,100,102,113,135,104,129,144,93,81,96,127,126,124,124,123,114,126,128,148,132,146,138,143,111,141,151,145,153,145,133,142,146,139,130,122,111,110,136,131,99,134,111,121,137,121,132,122,134,112,133,128,143,100,125,142,137,122,135,136,130,117,153,133,137,131,127,99,115,101,125,133,106,119,143,147,135,112,143,129,114,105,122,132,121,134,117,90,114,148,129,140,151,133,139,99,112,134,113,130,132,144,110,116,130,134,146,124,98,120,105,120,121,135,92,135,118,131,142,114,144,121,83,79,106,112,121,86,73,70,86,74,85,76,102,123,76,118,92,58,70,71,81,71,111,117,109,100,77,67,75,78,86,59,96,47,38,97,65,62,100,67,71,40,34,66,42,59,26,68,64,27,32,31,52,45,50,47,60,66,73,62,65,42,77,47,56,37,21,70,67,70,47,55,38,56,47,55,73,44,55,41,65,48,44,40,39,46,54,62,43,14,68,53,78,70,53,63,46,50,51,79,62,31,42,47,68,60,46,73,25,78,38,53,34,42,40,24,48,23,47,21,48,52,55,44,48,66,13,41,46,53,30,61,36,30,47,37,36,79,68,59,49,41,42,47,46,55,44,46,18,8,57,52,28,26,33,39,76,64,26,30,48,35,41,56,33,37,29,51,42,30,54,38,22,44,49,47,25,40,38,23,56,37,43,42,18,46,32,33,37,37,59,20,25,49,18,21,47,40,47,65,46,41,43,50,64,55,66,57,55,63,50,34,37,34,67,63,60,31,65,58,75,71,88,61,77,92,88,84,90,75,84,54,87,106,48,58,58,57,71,41,30,52,36,31,36,43,19,40,13,50,35,26,35,25,41,18,58,19,20,56,36,41,22,21,24,55,40,28,51,25,23,27,22,21,44,47,33,42,41,25,52,68,51,64,70,53,62,37,77,59,51,75,61,51,48,72,99,78,78,127,134,141,127,141,142,124,146,154,158,134,166,156,170,171,175,158,173,158,149,160,164,159,159,175,164,126,161,145,146,158,133,169,174,135,152,138,158,167,172,186,156,165,162,160,160,172,165,149,143,152,156,136,149,163,148,154,158,138,145,161,151,153,154,138,134,147,144,133,150,152,165,189,158,156,152,167,150,159,122,143,166,137,136,131,141,143,142,165,134,147,146,157,143,94,86,93,87,114,122,167,123,20,3,20,23,12,27,1,22,15,20,9,16,1,26,6,17,10,34,32,12,23,19,53,33,27,37,12,19,38,3,41,8,14,9,14,26,6,32,6,25,45,26,7,6,14,15,13,40,31,8,21,7,27,51,20,29,4,35,3,15,16,38,13,18,10,8,41,29,32,9,2,19,16,13,46,14,18,16,19,14,44,7,16,8,20,27,4,10,2,17,28,30,62,25,10,18,35,14,33,17,31,50,39,11,30,7,9,14,33,44,34,54,34,41,13,26,21,36,39,28,52,48,45,54,70,53,65,61,69,42,52,64,83,75,84,71,103,83,91,79,66,82,84,75,68,78,87,71,75,84,69,109,108,116,133,139,109,120,126,129,141,131,123,121,137,112,96,137,120,117,132,139,137,136,103,142,151,138,162,142,166,128,165,137,154,145,141,156,149,133,125,139,119,106,131,133,138,151,133,138,135,107,144,145,142,125,126,113,121,110,106,125,128,115,130,140,106,125,143,146,150,166,149,151,148,140,136,145,137,147,143,153,145,106,122,132,103,132,132,138,128,153,121,120,119,128,113,116,95,119,147,126,121,141,133,129,122,142,162,161,158,159,114,142,131,162,146,150,126,119,117,148,124,133,110,127,131,115,122,140,171,139,156,103,117,124,148,155,126,128,105,118,137,145,137,125,132,116,129,128,123,133,138,110,140,123,147,148,168,146,139,132,134,158,152,144,156,134,138,113,87,100,113,106,117,80,91,95,96,108,97,97,84,80,94,52,95,79,101,112,98,110,88,80,59,88,46,68,102,77,69,71,53,47,44,77,70,33,56,86,45,85,66,55,48,54,65,74,69,43,54,61,43,59,80,59,70,38,62,61,64,67,72,66,43,76,53,43,64,51,48,46,71,63,40,57,67,54,67,61,55,27,28,34,60,60,54,36,53,53,74,53,23,51,54,47,59,43,22,44,38,49,58,70,47,37,47,65,61,15,30,26,52,47,55,62,58,65,45,26,53,30,55,64,34,56,38,59,39,34,28,27,52,36,51,28,22,36,59,51,42,49,45,45,51,45,40,32,27,53,42,31,47,46,3,46,39,44,40,42,55,58,42,42,20,29,42,42,35,66,31,59,53,52,47,39,28,38,22,55,22,48,38,20,41,35,31,32,20,39,37,44,54,34,24,47,40,36,62,32,35,55,57,23,72,30,67,42,58,42,23,21,28,58,49,77,44,73,83,58,92,62,63,91,72,79,80,112,92,86,104,68,65,38,43,66,55,47,50,43,59,33,27,33,14,6,66,52,46,31,37,31,44,26,31,66,28,36,44,14,38,19,45,33,48,24,44,51,31,40,38,30,35,16,54,42,37,29,44,77,53,67,64,58,46,61,63,46,69,35,62,33,94,69,38,107,69,76,94,120,137,141,151,162,148,128,121,148,167,195,150,163,158,180,171,156,159,150,173,154,182,171,163,164,197,165,154,179,182,156,182,153,147,135,118,154,134,187,171,164,153,153,172,171,143,159,165,151,149,158,145,148,167,160,160,150,140,160,156,145,154,151,141,160,135,129,131,161,157,143,184,150,159,149,159,156,158,163,147,171,158,171,135,147,131,146,112,156,146,135,174,150,132,112,103,81,98,113,139,128,5,5,0,15,1,21,7,22,17,11,15,45,17,14,53,19,15,30,18,6,39,14,16,39,34,29,5,19,47,33,19,5,14,29,27,8,25,22,34,27,29,25,12,14,8,15,31,20,29,7,18,15,2,33,36,19,21,24,9,20,8,13,18,11,8,11,30,14,20,36,44,42,11,11,12,36,28,3,29,4,36,39,36,22,42,25,30,15,17,8,28,36,27,40,31,41,18,44,28,28,26,35,41,20,23,64,40,36,10,36,40,43,26,19,33,40,29,40,45,61,48,33,41,37,76,57,75,83,122,102,70,102,113,91,100,120,56,91,78,96,59,54,104,67,131,67,92,108,96,112,91,123,128,115,149,129,124,127,130,129,106,121,117,119,98,130,134,105,117,117,132,140,143,131,130,146,151,149,139,147,156,138,97,109,133,132,141,129,133,119,131,127,130,97,121,121,116,133,134,123,136,150,159,156,116,178,166,159,130,150,118,148,157,161,130,144,151,152,154,169,153,161,130,137,124,154,132,146,137,163,166,142,155,145,165,144,108,122,126,132,120,109,134,143,162,96,134,122,125,102,151,147,120,162,131,137,142,160,165,122,153,147,119,136,117,148,163,139,126,146,140,156,141,148,118,134,126,121,126,159,120,102,123,147,101,108,150,140,139,138,107,109,142,125,118,121,112,107,122,122,136,118,108,122,124,142,128,125,111,137,132,93,116,119,108,122,108,90,114,89,112,86,89,106,92,86,97,93,92,87,79,80,62,78,60,65,69,82,72,72,79,75,67,49,53,39,54,29,46,66,49,57,41,63,34,22,66,34,54,51,50,64,66,47,64,61,62,54,65,90,52,76,67,50,39,57,57,37,53,57,50,69,67,42,67,53,40,45,63,64,55,40,45,34,25,55,50,35,46,50,27,47,50,43,54,37,42,50,69,60,46,49,48,44,39,28,47,42,53,32,29,31,23,56,47,15,71,28,42,44,46,46,30,49,36,45,39,62,29,48,34,55,58,20,41,37,51,52,46,61,40,20,47,49,39,38,37,32,53,45,39,27,51,41,37,47,49,31,42,31,39,20,65,45,47,46,51,49,52,46,21,39,44,38,54,38,34,44,26,35,46,27,49,36,32,25,27,37,35,25,57,56,35,42,33,48,44,21,49,19,24,33,59,47,51,62,70,57,45,23,37,45,67,36,27,46,20,36,44,57,43,43,43,61,47,79,49,64,49,65,56,55,78,72,88,98,78,74,57,59,79,53,60,53,52,64,81,84,29,43,44,28,48,32,18,46,50,40,22,43,48,44,9,38,43,33,24,50,21,60,35,54,34,56,30,31,40,56,37,29,35,46,49,58,48,50,51,43,29,46,55,72,65,49,56,70,58,46,73,39,56,38,78,56,72,112,97,65,66,101,128,112,132,157,153,113,135,141,175,149,137,137,132,161,177,198,188,184,175,191,158,171,166,166,212,156,139,150,141,168,161,150,162,147,157,159,169,166,158,121,156,174,151,171,162,166,150,152,133,163,146,142,142,154,125,152,145,159,157,154,137,161,148,150,162,143,142,148,141,140,134,138,142,143,152,132,148,153,138,146,156,149,153,148,141,131,142,138,124,177,158,165,145,129,119,97,80,77,111,100,24,0,14,9,26,6,23,16,0,21,28,17,9,14,54,41,22,3,18,21,32,30,18,30,11,42,20,43,16,15,8,24,14,25,41,9,5,27,25,45,28,32,16,0,35,12,12,22,10,19,41,10,37,0,9,18,21,29,37,23,16,13,32,44,17,6,24,25,34,19,37,7,38,33,11,6,5,30,17,19,7,37,21,13,54,12,4,25,26,0,18,30,31,24,38,25,0,27,38,20,21,51,30,40,40,34,34,7,41,17,20,12,28,46,45,29,52,55,40,42,55,70,24,42,49,82,72,61,70,51,112,50,62,64,71,67,82,66,62,53,33,59,70,111,95,81,99,102,111,146,157,143,130,133,145,144,148,132,138,123,126,154,129,127,110,110,123,147,158,132,125,128,147,132,124,163,129,104,113,138,120,88,134,117,108,123,140,117,130,142,120,121,141,136,136,88,154,131,126,127,140,120,111,120,110,151,137,117,153,166,134,143,140,111,106,131,130,150,147,141,142,160,169,151,142,155,136,127,137,130,123,136,126,150,98,148,148,137,127,136,125,135,154,117,148,124,136,138,163,129,125,139,137,137,147,139,140,141,115,143,117,148,151,146,136,147,143,134,156,156,167,155,161,131,169,134,157,169,145,131,136,125,152,125,156,140,138,138,147,148,160,131,121,87,92,85,115,94,117,126,112,93,75,92,115,101,100,96,81,68,114,142,75,94,105,91,105,77,78,70,89,70,88,70,73,79,85,94,95,104,75,80,42,40,78,82,93,71,78,78,63,73,66,60,64,59,32,64,62,58,53,46,69,53,66,52,44,70,36,41,47,80,64,71,64,62,58,57,66,68,67,56,38,61,63,55,51,63,69,42,63,53,58,67,72,42,56,32,46,61,50,40,55,92,41,55,48,54,43,42,50,53,73,45,63,61,26,45,48,70,46,66,48,34,53,59,66,44,50,55,69,26,40,45,36,43,51,61,59,23,36,52,67,28,53,42,51,46,59,40,31,37,60,49,24,54,49,41,63,37,46,39,58,27,58,30,55,44,57,48,40,21,36,67,33,40,45,43,68,50,32,59,42,41,40,45,53,28,16,25,61,54,19,37,41,28,32,40,39,58,31,32,60,68,59,42,29,45,42,40,59,75,42,65,29,36,35,43,51,57,31,36,61,46,44,51,79,52,29,37,79,68,56,65,39,72,56,77,67,73,40,54,64,45,69,67,60,55,43,24,54,80,96,78,95,72,45,63,41,60,57,40,41,76,55,65,69,57,61,39,76,57,29,46,56,19,43,28,42,37,28,43,30,56,23,33,13,56,26,41,59,39,37,46,54,16,47,41,25,36,36,49,28,53,60,29,51,43,44,52,60,44,49,50,32,56,67,72,68,42,61,42,67,44,71,52,60,48,72,113,121,104,109,141,131,150,135,137,170,129,143,141,120,154,167,164,162,165,127,153,138,151,166,141,151,146,158,156,188,155,169,170,178,166,142,169,164,163,129,180,152,165,150,136,154,141,139,153,170,175,176,156,166,174,162,157,178,138,158,172,135,167,170,174,169,149,171,158,163,170,139,165,143,140,162,160,136,122,157,154,162,135,150,142,163,151,153,120,113,116,135,137,159,146,140,126,91,59,88,80,8,7,41,9,31,13,5,4,15,31,10,8,18,2,23,10,45,12,19,26,4,7,34,52,12,14,59,30,22,8,22,13,19,37,25,32,11,18,12,28,19,23,33,24,26,18,24,16,28,14,13,2,19,22,52,24,28,19,8,40,25,11,40,17,24,21,16,44,19,27,6,22,35,43,16,3,18,11,20,32,47,20,10,21,4,40,7,28,4,52,44,22,19,19,23,15,33,52,35,62,44,25,18,18,39,31,9,14,39,30,27,39,48,34,24,45,28,56,41,61,56,43,54,63,72,68,61,54,74,73,29,39,57,46,52,31,74,73,45,28,24,64,28,49,48,89,79,95,125,128,146,135,136,136,140,152,181,165,142,151,153,165,150,159,149,126,148,152,132,120,151,131,128,143,120,134,125,114,140,153,119,117,126,133,138,143,143,119,130,132,151,152,130,149,128,144,156,147,133,124,110,118,121,140,140,122,122,134,123,140,137,154,133,100,125,119,121,98,95,139,136,131,150,148,143,147,152,173,148,136,144,138,137,140,145,151,152,155,143,122,150,124,156,139,151,128,121,141,139,142,123,148,115,107,126,114,123,150,139,150,135,169,146,138,129,120,117,149,135,122,151,145,139,111,132,132,114,153,142,139,146,139,140,151,127,112,157,164,143,136,112,127,121,119,142,118,115,100,121,137,105,106,119,113,122,97,120,107,94,75,92,97,89,94,107,110,86,104,97,83,110,107,108,118,63,85,89,72,101,75,117,112,80,63,50,56,65,63,70,67,82,66,70,90,89,72,37,83,42,82,70,86,70,79,70,48,64,48,65,85,53,54,38,43,80,54,68,58,34,40,53,48,55,53,55,49,69,42,61,49,49,52,53,48,61,39,57,71,69,52,74,56,23,51,68,41,61,72,69,50,30,40,33,65,38,42,51,56,40,39,53,58,46,36,51,23,63,50,39,52,60,48,47,44,34,36,54,31,22,37,55,60,56,44,63,46,56,30,50,36,34,11,59,40,68,32,45,58,23,26,41,36,51,30,27,46,44,47,43,37,34,53,63,11,42,34,43,61,34,34,43,36,55,43,39,63,38,57,47,72,39,36,53,43,28,45,47,43,22,33,15,45,48,41,41,32,32,57,36,34,62,42,48,42,24,61,58,31,42,45,57,32,25,54,36,49,63,63,66,41,29,39,38,40,36,63,47,67,57,38,57,27,61,52,62,33,12,50,53,69,59,41,57,79,86,69,66,29,52,37,83,74,79,48,47,45,35,43,73,48,38,43,47,47,53,39,20,14,26,29,46,41,20,49,40,20,21,12,55,62,53,47,38,21,26,55,27,44,53,52,36,53,44,33,61,52,51,48,48,44,37,74,39,41,86,36,47,63,43,57,92,47,94,64,90,98,77,66,94,119,139,111,126,139,128,120,140,144,162,182,159,139,120,137,169,149,141,129,146,137,136,139,146,176,161,129,129,148,164,157,127,171,140,152,152,146,123,148,148,137,152,131,161,161,134,155,138,131,174,172,144,155,159,152,163,141,154,162,159,157,155,147,122,143,167,120,135,160,158,162,150,143,128,172,156,130,139,155,167,141,153,159,162,137,118,164,146,170,137,157,161,181,134,126,162,136,131,76,70,74,33,18,20,8,9,29,2,17,12,5,15,26,6,32,25,8,18,18,16,25,31,43,26,5,24,15,39,8,8,43,21,40,28,16,30,26,39,46,26,2,24,7,9,16,8,47,23,22,22,32,17,53,20,9,5,6,39,20,30,30,13,46,28,23,12,20,21,15,3,34,17,32,26,27,21,19,31,12,34,22,33,27,16,4,6,36,43,14,40,21,4,30,38,35,12,49,56,42,48,40,22,35,10,46,49,23,32,21,61,24,41,41,45,35,42,52,54,33,38,47,56,47,42,53,63,40,27,32,29,64,26,31,43,31,52,53,71,68,52,74,45,65,45,54,79,83,104,110,116,77,123,119,107,122,149,137,157,149,144,145,120,137,119,120,138,122,124,148,125,134,142,141,124,120,123,102,114,141,123,125,115,124,163,160,122,131,99,134,140,120,152,140,144,132,131,152,141,148,133,130,157,152,129,121,109,131,137,133,130,126,123,124,120,128,103,130,124,109,120,109,121,115,148,150,159,131,143,142,163,160,163,127,140,154,142,148,120,151,133,149,156,131,144,127,139,122,152,138,136,136,119,135,124,140,150,137,144,127,135,152,127,127,149,125,122,134,112,114,107,124,145,145,139,118,112,113,119,88,117,136,105,120,99,80,102,91,84,80,103,94,55,76,107,118,140,134,151,94,120,125,124,150,166,147,134,117,100,101,100,90,104,119,103,98,83,97,108,120,106,110,121,133,130,109,121,108,88,44,46,60,89,93,94,89,76,52,62,69,69,66,88,63,80,64,104,95,84,71,64,76,67,28,52,75,58,63,84,110,80,65,44,63,46,72,53,55,65,81,54,47,60,50,45,54,64,67,62,74,36,54,60,49,43,37,52,47,81,58,43,51,73,56,41,76,48,51,55,67,44,65,58,56,46,59,71,43,72,61,77,67,79,64,39,34,40,69,57,63,59,48,27,47,65,28,41,23,83,44,54,44,71,63,51,47,58,14,60,38,41,43,33,36,45,39,48,22,47,16,28,40,40,43,41,31,37,60,50,11,23,35,55,52,52,57,51,39,51,16,49,56,32,40,52,49,43,48,72,36,56,63,29,25,48,64,52,44,43,42,63,42,31,63,50,59,41,55,49,18,33,35,29,55,57,66,77,40,44,23,62,52,51,52,66,59,60,44,55,64,42,63,31,53,30,44,29,46,62,24,69,68,60,40,45,17,29,57,61,33,42,30,57,59,41,61,37,43,68,43,53,74,53,61,40,74,63,54,57,63,68,47,26,29,56,29,41,59,57,35,29,59,46,41,42,57,36,23,20,50,40,29,27,13,37,19,35,47,54,52,42,37,30,29,51,51,15,63,63,18,37,43,33,60,67,22,54,80,36,52,49,70,87,75,81,46,92,78,108,111,130,137,129,143,135,125,157,141,154,134,167,168,191,157,153,158,161,172,184,157,155,133,139,149,141,142,146,162,147,125,162,172,175,163,161,156,142,154,141,145,165,144,147,135,191,160,182,191,161,152,168,160,163,199,177,163,142,195,153,153,144,167,146,150,166,184,150,170,157,170,155,147,177,163,145,164,151,138,158,145,159,140,167,166,127,145,162,133,146,134,154,146,122,154,139,158,143,145,143,130,117,69,23,16,12,21,26,12,15,14,8,25,12,29,36,26,32,26,35,18,27,25,8,28,3,14,38,50,9,9,29,37,31,34,19,8,22,17,46,40,5,37,17,55,31,21,40,27,20,16,30,35,4,16,37,18,14,27,16,22,40,35,20,36,8,20,20,13,31,20,16,17,44,40,47,12,16,22,11,33,21,34,25,34,19,35,33,11,23,26,38,29,24,23,23,59,29,27,24,62,22,55,42,38,58,55,31,41,54,68,66,37,56,63,29,46,38,64,80,55,82,77,80,70,109,118,119,130,122,124,139,122,99,123,104,118,103,98,85,62,86,91,85,105,114,114,100,109,103,87,56,67,101,106,117,99,92,123,114,101,87,109,77,108,132,128,154,120,157,163,123,141,146,133,164,147,156,116,136,135,140,164,151,145,131,154,145,150,135,128,119,140,136,112,113,151,137,151,155,158,176,135,126,136,135,136,150,157,122,127,144,153,143,155,140,151,175,177,167,151,162,165,166,153,138,141,147,161,164,157,145,155,141,116,131,103,128,131,113,128,133,117,128,134,138,133,159,122,121,123,140,148,143,154,149,136,140,156,143,162,152,133,128,104,113,159,144,165,128,143,142,141,139,121,136,131,156,138,153,150,155,175,163,136,125,133,149,174,130,118,152,145,123,131,132,129,119,130,125,138,127,137,127,92,116,129,106,140,126,112,118,151,124,123,130,142,103,140,131,120,142,125,134,116,110,96,90,116,106,151,152,113,105,92,57,96,99,69,93,102,97,108,98,120,116,93,84,67,77,62,80,71,72,72,85,62,35,81,62,58,69,74,99,99,58,71,62,59,65,56,102,76,72,67,73,42,76,78,45,43,56,51,42,60,70,63,38,51,53,60,56,56,41,35,66,49,49,77,47,31,61,61,31,59,43,75,57,38,58,72,50,50,51,83,44,56,64,47,56,49,64,68,45,72,47,58,56,19,34,59,79,72,25,18,34,40,42,38,52,51,52,56,37,56,55,27,53,51,29,56,15,54,20,30,49,45,37,51,41,51,34,49,50,45,51,61,45,48,22,64,15,49,27,27,56,62,42,59,42,55,52,58,40,36,23,52,36,50,71,43,52,69,55,44,68,41,46,44,53,73,55,29,54,35,26,69,37,45,39,38,50,58,34,61,39,48,30,44,40,30,50,54,52,53,45,39,22,50,41,54,49,61,60,62,33,49,25,33,52,46,44,48,51,71,61,51,53,40,33,35,49,49,38,45,59,67,54,58,72,53,63,39,62,52,30,64,63,50,48,44,54,45,47,41,61,49,45,51,41,43,68,21,26,16,39,37,34,53,39,61,48,34,56,61,70,44,66,55,56,50,49,50,61,31,43,66,52,57,72,65,48,74,49,40,74,80,89,97,100,122,113,105,81,134,126,142,144,151,163,174,187,169,150,179,166,160,133,154,168,152,173,172,183,146,153,161,163,141,157,160,175,165,178,156,160,150,147,130,132,152,168,138,140,146,155,142,158,143,172,172,135,148,158,133,171,146,169,138,136,161,152,132,146,153,140,140,133,154,157,147,138,151,132,135,149,160,147,165,143,142,138,171,128,118,119,175,133,127,125,105,132,136,130,123,163,145,162,151,132,125,133,101,21,9,19,3,16,17,26,1,24,6,34,17,32,14,41,21,30,14,51,13,28,4,12,18,24,25,34,22,16,19,22,28,41,27,39,42,30,24,4,24,14,15,23,10,6,47,21,19,38,37,38,6,19,35,31,35,11,6,37,23,27,38,13,36,10,14,21,38,38,9,9,11,28,22,30,49,15,52,53,28,22,9,66,27,4,27,21,25,16,32,22,9,40,34,47,38,27,31,46,37,36,39,34,68,41,35,60,49,55,58,25,57,47,61,46,62,99,98,115,73,83,61,84,116,116,113,127,129,113,139,133,128,90,128,86,72,93,66,101,93,98,99,111,113,105,91,100,98,98,72,94,92,100,114,100,103,97,90,99,109,141,115,155,144,146,144,142,117,103,118,151,157,145,125,146,187,117,143,135,145,137,118,137,146,154,161,141,146,142,120,127,130,138,126,134,138,121,134,161,125,128,131,143,123,120,110,146,142,136,153,170,134,142,166,165,157,133,130,160,169,154,136,148,136,148,160,165,148,142,160,160,143,130,143,110,121,110,136,157,109,116,115,142,137,144,137,115,121,140,135,121,123,141,131,142,152,152,163,149,130,131,140,137,147,143,175,169,155,141,144,145,106,113,131,141,153,150,147,153,147,137,148,122,122,147,169,147,163,131,151,158,126,126,138,113,120,124,130,121,129,137,112,133,127,121,113,132,102,126,120,121,138,125,120,127,117,115,122,123,133,130,113,128,99,106,117,115,111,147,131,113,73,62,80,91,100,102,92,112,110,67,95,105,81,104,55,61,65,67,75,50,89,72,79,64,58,87,56,59,76,52,92,102,104,95,61,79,72,83,74,94,80,79,74,92,84,35,69,47,54,42,66,53,39,57,57,38,51,60,27,41,51,51,52,65,38,42,35,29,51,40,48,51,21,64,54,43,33,38,48,52,31,59,67,30,47,49,69,58,57,35,49,62,29,77,44,64,72,42,32,44,51,56,53,46,67,37,48,48,55,56,57,60,36,32,55,48,22,39,39,57,40,28,11,32,38,29,57,64,30,30,30,51,60,62,57,43,67,45,63,40,23,26,49,40,55,31,27,44,50,51,54,47,33,54,47,58,42,37,25,30,45,52,53,39,52,53,60,53,31,40,72,34,52,68,53,24,49,34,47,45,52,55,40,35,32,51,54,57,21,48,53,69,64,26,48,53,64,17,41,28,42,32,50,54,47,55,47,31,33,38,19,58,64,44,71,55,68,56,41,66,50,39,65,56,61,49,32,55,44,68,52,43,40,53,54,24,37,72,51,23,28,74,50,34,64,39,39,42,27,56,47,48,40,38,45,73,14,53,23,61,76,58,37,42,43,45,45,40,42,67,35,47,87,53,44,52,78,36,71,67,82,55,67,104,86,85,122,118,87,75,103,124,142,131,145,153,168,160,171,168,179,193,167,166,163,163,169,170,150,163,172,182,157,161,168,163,164,174,183,160,172,143,155,142,140,149,165,162,160,174,156,180,147,169,143,164,145,159,138,131,147,158,166,147,155,147,131,164,152,136,142,125,119,126,160,146,160,167,125,139,133,145,146,155,166,143,130,142,155,165,123,127,130,130,139,149,130,122,121,126,139,147,114,169,114,125,137,138,109,15,4,21,15,0,15,13,30,7,35,19,0,47,27,16,12,17,42,33,19,4,37,20,26,16,26,39,30,22,30,19,27,25,23,28,32,23,21,30,21,27,54,8,29,12,27,7,41,17,37,31,37,18,14,38,23,32,42,1,28,48,20,20,39,54,24,23,29,41,23,25,12,45,41,51,28,27,27,18,37,24,14,18,23,38,37,21,11,36,40,18,28,33,24,17,17,46,42,25,26,37,29,21,54,53,48,67,42,57,67,55,60,62,83,62,83,56,86,111,70,108,108,110,135,77,122,118,87,120,100,121,136,108,106,109,103,118,103,132,116,115,130,114,143,112,121,123,138,149,142,124,128,136,119,137,123,152,124,148,169,156,135,139,153,158,150,149,139,180,137,144,152,154,130,146,131,137,143,133,141,124,140,157,129,132,138,163,136,159,109,132,163,137,143,171,136,116,115,134,155,140,152,115,122,138,154,157,164,144,151,143,123,144,125,131,148,114,129,113,141,135,102,149,154,159,162,152,86,135,134,149,135,162,139,136,156,141,124,163,141,150,108,157,152,162,162,134,144,144,131,151,149,170,151,144,149,142,162,149,145,155,140,139,152,145,117,126,131,131,141,139,151,147,105,145,122,125,114,119,113,125,143,155,150,146,142,111,105,137,152,97,13,13,5,23,10,25,16,2,0,7,24,49,31,16,54,45,0,30,12,13,10,14,7,30,24,19,24,38,22,12,2,16,39,20,22,24,7,27,7,32,28,20,29,13,16,22,28,5,2,43,17,8,28,17,13,7,5,12,15,30,15,32,30,5,23,24,40,5,40,0,7,5,5,21,34,7,18,25,20,28,41,53,18,14,30,13,12,14,2,11,9,31,15,1,26,21,11,24,20,23,21,20,5,10,8,12,21,18,20,33,50,23,7,15,28,26,18,32,16,33,8,21,25,13,5,25,33,27,5,3,10,34,8,37,13,26,14,19,23,12,4,14,20,10,21,4,19,1,29,8,26,30,26,28,36,29,26,9,9,15,21,17,18,25,24,44,2,27,17,26,9,36,23,33,4,7,11,6,8,21,28,27,11,27,15,17,31,41,9,38,20,24,31,25,14,16,12,9,16,16,16,23,21,30,22,8,28,29,10,1,17,11,20,16,6,21,18,11,35,3,11,5,9,20,19,16,25,7,34,24,9,32,22,17,13,16,27,12,19,24,34,18,0,13,30,5,8,10,26,10,26,19,17,12,12,38,20,2,13,12,35,16,29,3,42,35,15,40,15,11,5,19,17,8,45,39,20,10,40,12,17,27,6,13,42,9,17,7,18,22,14,14,20,18,22,1,5,15,24,18,33,40,33,12,11,11,11,36,8,45,19,29,40,2,20,9,6,28,9,15,14,25,29,13,25,30,32,19,16,8,24,21,45,30,27,45,15,28,29,5,29,18,11,20,9,32,14,18,47,2,10,17,15,19,29,5,13,14,17,35,21,18,5,16,12,11,5,6,29,30,21,28,40,10,31,3,13,38,14,2,21,22,15,29,4,9,6,3,16,23,13,33,13,28,22,17,22,19,20,29,43,8,3,20,17,15,21,21,21,13,33,6,2,12,2,21,16,19,31,21,14,19,21,31,26,25,37,3,15,17,14,20,27,29,25,2,16,8,14,22,12,36,50,33,38,11,26,28,20,23,23,15,34,21,26,39,29,50,34,24,18,34,25,30,15,16,39,14,17,3,24,21,41,19,55,41,9,13,28,22,19,28,24,5,29,51,19,14,35,16,27,28,0,39,35,36,34,39,35,35,31,41,35,40,46,47,13,25,26,13,50,32,23,48,37,34,34,34,25,11,38,28,27,41,27,36,47,25,43,16,9,30,49,41,54,74,47,58,64,52,66,41,57,52,39,72,106,88,87,97,118,119,129,82,102,112,89,103,129,97,132,118,108,126,103,111,91,100,128,123,135,115,136,112,130,99,139,157,120,144,125,131,117,153,138,101,155,129,147,162,160,136,139,149,160,134,132,169,142,160,112,164,133,120,121,139,137,146,160,136,110,157,147,160,143,119,140,157,157,132,151,155,146,134,120,132,114,134,144,166,166,174,129,139,144,165,125,119,159,134,120,116,132,140,144,116,123,116,127,128,105,144,143,153,155,144,149,127,139,129,150,123,169,138,151,137,128,126,169,136,152,136,158,146,164,149,148,147,140,139,165,128,131,132,154,125,115,166,137,143,141,120,145,147,145,136,118,108,130,171,132,126,135,93,107,115,157,147,132,112,121,124,123,128,118,118,108,110,147,103,17,0,3,9,15,36,5,37,5,26,35,17,37,15,1,27,21,33,1,16,7,1,22,21,10,7,12,6,0,22,9,12,13,18,6,19,27,17,19,24,16,22,9,32,18,7,29,27,18,18,1,17,47,9,25,13,22,21,13,10,13,15,23,21,23,10,24,18,34,8,14,24,33,6,10,0,33,1,13,37,2,12,37,9,35,34,5,21,37,10,28,13,12,43,15,23,5,31,10,18,13,18,25,26,45,4,20,10,20,11,16,7,14,10,25,16,17,3,9,9,10,9,12,0,8,6,11,20,20,17,17,3,23,12,22,11,7,17,10,7,16,22,14,34,25,16,14,18,6,23,3,16,31,16,11,21,37,12,12,2,19,28,30,19,12,25,18,35,14,29,26,23,8,39,16,20,2,31,14,42,16,8,29,22,18,5,5,15,26,16,1,18,20,27,23,5,16,17,21,17,24,28,33,10,25,16,9,19,23,27,19,28,33,18,30,21,12,16,12,19,7,29,20,12,10,4,32,19,21,43,18,32,28,22,31,29,13,12,16,36,19,13,30,32,28,22,24,14,22,38,26,2,31,16,13,16,11,24,15,12,34,8,26,10,33,13,5,8,8,6,27,19,12,7,27,17,48,24,10,1,5,17,25,11,6,38,13,15,20,14,21,14,23,26,22,0,23,36,20,32,16,8,12,19,8,11,24,1,13,12,26,29,23,2,21,8,31,20,25,3,19,35,6,13,5,16,26,19,3,14,20,16,3,20,10,42,15,18,13,27,19,3,48,4,8,19,14,22,26,11,27,18,12,9,13,28,37,1,14,23,16,23,21,17,30,13,26,20,15,23,21,9,29,37,16,17,24,21,25,4,8,6,20,34,12,17,10,19,0,23,26,0,14,24,4,15,31,4,3,8,35,30,22,1,13,29,21,15,8,42,6,14,13,41,2,17,43,17,7,7,30,4,17,38,20,8,8,15,35,19,11,13,24,31,16,23,36,26,8,29,27,libvpx-1.8.2/build_debug/non_greedy_mv_test_files/estimation_16x16.txt000066400000000000000000000171531357355204000261440ustar00rootroot0000000000000030,45 12,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;12,9;12,9;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,11;9,4;10,12;11,12;11,12;11,12;11,5;10,11;10,-1;12,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;12,9;12,10;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,11;11,13;11,11;11,12;11,12;11,12;10,9;10,11;10,-1;11,8;12,8;12,8;12,8;12,8;12,9;12,9;12,9;12,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;12,12;11,12;11,12;11,12;11,12;11,11;10,11;10,11;10,-1;12,8;12,8;12,8;12,8;12,9;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;11,11;11,11;11,11;11,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,14;10,13;10,8;10,11;10,0;12,8;12,8;12,8;12,8;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;12,11;11,11;11,11;11,11;12,11;12,12;12,12;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;9,13;10,24;10,12;10,11;10,0;11,8;11,8;12,8;12,9;11,9;10,9;11,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;9,10;11,11;12,12;13,12;12,12;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,13;10,9;10,7;9,11;9,0;11,8;11,9;10,9;11,9;11,9;10,9;10,9;10,9;11,10;11,10;11,10;11,11;11,11;11,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,10;9,11;9,11;9,0;11,9;11,9;10,9;10,9;10,9;10,9;9,9;10,10;10,10;11,10;11,11;11,11;11,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,11;9,9;9,10;9,11;9,0;11,9;11,9;11,9;10,9;10,9;9,9;7,9;9,10;10,10;11,10;11,11;11,11;12,12;12,12;11,12;11,12;12,12;19,14;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,12;8,-2;8,12;8,12;8,0;11,9;11,9;11,9;11,9;10,9;10,9;10,10;10,10;10,10;11,11;11,11;11,13;12,12;13,12;10,12;3,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,11;9,25;8,12;7,11;7,0;11,9;11,9;11,9;11,9;11,9;10,9;10,10;10,10;11,10;11,11;11,11;11,11;12,11;11,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,12;8,15;7,13;6,11;5,0;11,9;11,9;11,9;11,9;11,9;11,10;11,10;11,10;11,10;11,11;11,11;12,11;12,10;12,11;12,11;12,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;9,13;9,14;8,14;6,11;4,11;3,0;12,9;12,9;12,9;12,9;12,10;11,10;11,10;11,10;11,10;11,10;12,11;12,11;12,11;12,11;12,11;12,11;12,12;12,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;9,12;9,14;9,15;8,16;5,15;1,12;0,0;12,9;12,9;12,9;12,9;12,10;12,10;12,10;11,10;11,10;12,10;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,13;9,13;9,14;8,16;7,17;3,16;-2,12;-5,7;12,10;12,10;13,9;14,10;13,10;12,10;12,10;12,10;12,10;12,10;12,11;13,11;13,11;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;9,14;9,15;7,16;6,18;0,18;-8,12;-19,3;12,10;12,10;13,10;13,10;13,10;12,10;12,10;12,10;12,10;12,10;13,11;13,11;14,11;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;9,14;9,15;7,16;4,17;-1,17;-13,12;-30,-2;12,11;12,10;11,10;16,10;14,10;12,10;12,10;12,10;12,10;12,10;13,10;14,10;18,10;13,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,14;10,14;10,16;9,17;5,18;0,19;-12,14;-39,-11;12,11;12,11;12,10;12,10;12,10;12,10;12,11;12,11;12,11;12,11;12,10;12,10;12,10;12,10;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;10,14;9,15;8,16;7,18;3,19;-4,17;-13,6;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;10,13;9,14;8,15;7,17;5,17;1,15;-3,2;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;10,13;11,14;9,14;9,16;8,16;5,13;0,-2;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,13;9,13;9,14;9,14;8,13;3,-5;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;12,11;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,12;10,12;10,13;10,13;9,13;9,13;9,12;4,-7;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;10,12;10,12;10,13;10,13;10,13;10,13;10,13;10,12;6,3;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;10,13;10,12;10,12;10,13;10,13;11,14;10,14;8,12;2,-1;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;10,13;11,13;10,13;10,13;10,13;10,13;10,13;9,13;8,13;6,12;0,-6;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;10,13;10,13;10,13;10,13;10,13;10,13;10,13;10,14;11,14;10,14;10,14;8,14;5,13;-1,-10;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;10,13;11,13;11,13;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,12;11,13;11,13;11,13;11,14;10,14;9,13;9,13;9,13;9,14;9,14;9,14;9,14;9,14;9,14;7,13;5,12;2,0;10,13;10,12;9,12;9,12;10,12;11,12;11,13;10,13;10,12;10,12;10,12;11,12;11,12;11,13;10,13;10,13;10,13;10,13;10,13;11,13;11,12;11,13;11,13;11,13;11,12;11,12;11,12;11,13;11,13;10,14;10,14;9,15;9,15;10,15;8,13;8,13;8,15;7,14;6,13;7,13;8,14;8,14;5,13;4,12;2,-1;10,13;7,12;7,12;7,12;8,12;8,11;8,12;8,13;8,13;9,12;9,11;9,12;9,12;9,12;9,12;9,12;7,13;7,13;8,13;8,12;8,12;8,13;9,13;10,13;11,13;11,13;10,13;10,13;10,14;10,15;10,16;9,17;7,18;6,16;5,12;8,15;8,15;1,13;1,10;4,10;6,12;7,15;4,14;3,12;2,1;-1,13;5,12;6,12;4,12;6,11;5,9;2,5;3,6;0,1;6,9;6,10;3,10;6,11;4,11;3,10;3,10;1,8;-7,3;-6,-1;3,8;4,10;2,12;-11,13;1,13;-4,13;-6,13;-6,13;0,13;-3,14;0,15;0,15;1,16;-11,13;1,14;3,13;-2,5;-8,2;-17,-8;-3,0;0,6;1,10;2,11;-1,10;2,11;1,0;libvpx-1.8.2/build_debug/non_greedy_mv_test_files/exhaust_16x16.txt000066400000000000000000000175261357355204000254550ustar00rootroot0000000000000030,45 6,4;12,14;6,16;24,9;30,2;7,11;11,12;13,10;12,12;30,11;9,13;10,11;4,11;1,-7;7,-13;9,-32;1,-12;22,9;29,5;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;28,-9;12,12;12,12;12,12;12,12;10,5;12,12;0,-1;9,29;14,3;4,31;11,24;-10,7;5,23;-15,-32;13,6;13,6;0,6;27,3;10,9;12,11;14,3;2,-19;-4,14;16,-13;12,12;17,10;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;6,18;10,17;9,12;12,12;12,12;12,12;3,9;26,12;-16,-1;8,31;11,-10;17,-17;16,-22;13,14;10,18;12,12;12,11;20,-23;3,9;7,4;12,13;10,13;12,12;15,24;11,-6;12,12;12,12;11,11;12,11;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;14,9;12,12;12,12;12,12;12,12;24,17;4,11;17,12;-32,-1;12,8;13,11;13,-4;13,25;14,-26;12,12;20,-8;12,12;12,12;13,12;13,12;12,12;12,12;14,11;11,13;3,8;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;14,16;5,6;5,12;-32,-1;12,11;13,14;12,13;12,12;8,-2;12,20;11,13;12,14;12,12;12,12;10,7;11,-10;13,14;11,13;12,12;-16,-10;12,12;12,13;9,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;16,25;12,12;18,12;-32,-1;12,13;12,12;16,14;16,30;11,7;10,-4;11,15;12,12;9,22;13,15;11,-12;12,12;12,12;12,6;4,-22;12,12;12,12;15,24;12,9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;15,9;19,6;27,12;-32,-1;11,5;13,26;6,-20;13,8;13,-2;8,-14;12,22;10,1;11,-14;13,24;15,10;14,17;12,-8;12,12;11,-9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;5,12;-32,-1;12,12;9,-8;12,12;30,1;25,-3;12,12;12,17;8,14;8,24;8,-2;7,20;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;8,16;8,10;12,12;-32,-1;9,11;5,6;14,1;-4,27;12,12;19,7;-18,-32;22,-20;11,20;11,16;10,20;12,12;12,12;12,12;12,12;12,12;12,12;31,-22;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;7,-4;12,12;13,12;-32,-1;12,12;12,12;11,4;12,12;12,20;12,7;12,12;10,20;13,8;19,-1;12,11;24,19;13,8;14,6;11,9;2,16;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,11;14,26;17,12;12,12;-32,-1;20,18;10,10;15,-17;14,-2;12,12;11,26;13,27;10,12;13,14;23,9;8,20;17,0;12,12;12,11;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;10,13;12,1;13,12;-32,-1;16,11;12,12;9,13;22,0;-32,-5;9,8;15,6;-8,-2;12,12;10,-21;12,3;12,12;8,3;12,12;12,12;12,23;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,11;7,12;31,-1;8,16;11,7;11,18;11,9;12,3;13,-4;5,24;8,17;12,12;12,14;12,12;-9,14;7,-2;12,16;15,6;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,14;11,12;21,-1;13,20;27,-9;11,6;14,8;7,22;19,7;4,1;-24,21;12,19;11,18;9,-24;14,-20;12,7;13,-28;10,15;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;13,12;24,-32;14,5;9,18;25,-26;24,-27;12,11;10,16;7,26;12,12;17,17;13,4;6,18;12,20;30,3;16,17;12,16;12,12;12,-21;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;6,-32;12,12;12,12;-16,11;12,12;10,11;25,-9;27,5;22,28;8,13;13,3;12,30;-5,-8;18,-31;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-9,-32;13,12;12,12;12,3;18,29;15,-9;1,19;-9,14;-3,-3;-21,8;13,6;-12,12;-2,-13;20,-32;-19,-31;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-24,-30;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;27,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;11,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-5,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-21,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,15;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-29,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;17,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;1,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-15,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-31,-1;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,9;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;3,-1;12,12;12,12;17,20;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;-13,-1;12,12;12,12;-8,25;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;21,-1;-2,5;-3,25;-18,27;1,-31;0,17;-9,26;-23,27;-23,20;-27,18;1,30;3,15;0,0;3,22;3,5;0,0;0,0;0,0;-17,-18;-15,-16;-1,15;-4,9;2,11;-13,-2;2,13;-5,16;-7,-2;-7,7;0,18;-5,-11;0,-10;0,0;2,-5;-5,-32;4,-27;2,21;-8,7;-8,1;-17,-10;-4,-1;-13,0;-8,5;1,10;0,-1;4,12;5,-1;libvpx-1.8.2/build_debug/non_greedy_mv_test_files/ground_truth_16x16.txt000066400000000000000000000176521357355204000265200ustar00rootroot0000000000000030,45 12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;12,12;libvpx-1.8.2/build_debug/non_greedy_mv_test_files/localVar_16x16.txt000066400000000000000000000473741357355204000255430ustar00rootroot0000000000000030,45 0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,2,2,4;25,115,115,529;196,644,644,2116;225,420,420,784;576,696,696,841;4900,3920,3920,3136;9604,6664,6664,4624;9,120,120,1600;1024,2208,2208,4761;3249,5016,5016,7744;484,1870,1870,7225;9,288,288,9216;64,384,384,2304;324,882,882,2401;10404,8058,8058,6241;1936,2332,2332,2809;196,322,322,529;1,10,10,100;4,16,16,64;4,100,100,2500;1156,2482,2482,5329;3600,5160,5160,7396;81,594,594,4356;0,0,0,1521;0,0,0,10000;1,134,134,17956;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,4;25,110,110,484;196,560,560,1600;529,713,713,961;1296,2196,2196,3721;4096,6784,6784,11236;6889,9960,9960,14400;441,1659,1659,6241;1764,2520,2520,3600;4225,4875,4875,5625;529,2208,2208,9216;36,558,558,8649;36,276,276,2116;529,1564,1564,4624;9801,11088,11088,12544;3600,4620,4620,5929;225,360,360,576;4,2,2,1;1,9,9,81;1,44,44,1936;2025,3060,3060,4624;4624,4760,4760,4900;16,168,168,1764;0,0,0,1681;0,0,0,10609;1,133,133,17689;9,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,1,1,1;0,0,0,0;0,0,0,0;0,0,0,0;1,-1,-1,1;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,9;81,198,198,484;225,540,540,1296;2209,2162,2162,2116;6400,5760,5760,5184;6241,7347,7347,8649;7396,7998,7998,8649;8464,7084,7084,5929;2916,4104,4104,5776;3249,6498,6498,12996;784,3724,3724,17689;400,1820,1820,8281;784,1288,1288,2116;1849,1935,1935,2025;8649,4650,4650,2500;4624,3060,3060,2025;1089,891,891,729;625,175,175,49;81,0,0,0;441,693,693,1089;3969,4347,4347,4761;2809,2862,2862,2916;0,0,0,0;0,0,0,2116;0,0,0,10816;1,134,134,17956;1,1,1,1;16,0,0,0;25,0,0,0;9,0,0,0;25,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;9,3,3,1;9,3,3,1;1,0,0,0;4,2,2,1;1,0,0,0;0,0,0,0;4,2,2,1;4,4,4,4;1,0,0,0;1,4,4,16;196,378,378,729;256,784,784,2401;4624,4692,4692,4761;16384,8960,8960,4900;6889,4067,4067,2401;8649,3534,3534,1444;17689,8246,8246,3844;6889,8798,8798,11236;4761,8832,8832,16384;1024,4032,4032,15876;3136,5656,5656,10201;6400,5920,5920,5476;5776,5700,5700,5625;6724,7626,7626,8649;2916,5130,5130,9025;7744,6688,6688,5776;5184,3528,3528,2401;4225,2665,2665,1681;4356,3564,3564,2916;7056,3528,3528,1764;2500,600,600,144;1,0,0,0;0,0,0,225;0,0,0,10404;1,-135,-135,18225;16,0,0,0;25,5,5,1;25,0,0,0;36,0,0,0;16,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,3,3,1;16,0,0,0;25,0,0,0;25,0,0,0;25,10,10,4;4,4,4,4;4,6,6,9;1,0,0,0;4,2,2,1;1,0,0,0;0,0,0,0;1,9,9,81;576,720,720,900;100,450,450,2025;1849,1763,1763,1681;8464,5796,5796,3969;3969,5733,5733,8281;5041,6177,6177,7569;7921,7476,7476,7056;8100,7740,7740,7396;6561,6075,6075,5625;1296,2088,2088,3364;4489,3618,3618,2916;7056,4368,4368,2704;5041,4331,4331,3721;4900,5530,5530,6241;1849,3311,3311,5929;7225,4930,4930,3364;6084,3744,3744,2304;7396,5934,5934,4761;6241,7110,7110,8100;8100,8730,8730,9409;5184,5688,5688,6241;1,35,35,1225;1,54,54,2916;0,0,0,11881;1,-137,-137,18769;64,0,0,0;49,0,0,0;16,-4,-4,1;25,0,0,0;25,0,0,0;36,0,0,0;25,0,0,0;36,0,0,0;16,0,0,0;9,0,0,0;16,0,0,0;9,0,0,0;25,0,0,0;25,0,0,0;16,4,4,1;100,10,10,1;49,14,14,4;25,10,10,4;36,0,0,0;36,96,96,256;1764,1680,1680,1600;1600,1880,1880,2209;1024,1184,1184,1369;1681,1517,1517,1369;2704,3484,3484,4489;2025,3330,3330,5476;2809,2915,2915,3025;4489,5293,5293,6241;5329,6132,6132,7056;1764,2100,2100,2500;3025,2090,2090,1444;3249,1596,1596,784;1764,2310,2310,3025;4489,6164,6164,8464;1521,2808,2808,5184;729,810,810,900;900,330,330,121;1764,882,882,441;2025,1530,1530,1156;5041,5467,5467,5929;3136,4144,4144,5476;0,0,0,676;0,0,0,289;0,0,0,12321;1,-139,-139,19321;9,0,0,0;9,0,0,0;36,0,0,0;9,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;9,0,0,0;4,0,0,0;9,0,0,0;9,0,0,0;121,0,0,0;36,0,0,0;1089,231,231,49;1225,210,210,36;1369,185,185,25;1225,245,245,49;1764,1218,1218,841;3364,3654,3654,3969;3969,5292,5292,7056;4761,5313,5313,5929;2704,2964,2964,3249;2209,3290,3290,4900;1681,2952,2952,5184;3721,4453,4453,5329;12321,12432,12432,12544;9025,9405,9405,9801;2116,2392,2392,2704;2704,2704,2704,2704;2304,2256,2256,2209;1089,2244,2244,4624;4761,7176,7176,10816;900,1890,1890,3969;100,150,150,225;324,342,342,361;576,600,600,625;441,945,945,2025;3600,4860,4860,6561;441,1092,1092,2704;1,2,2,4;0,0,0,3025;0,0,0,12100;0,0,0,19881;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;9,0,0,0;9,18,18,36;256,320,320,400;1600,720,720,324;2209,611,611,169;3249,798,798,196;3364,522,522,81;3600,600,600,100;4761,1725,1725,625;2916,2376,2376,1936;2809,2703,2703,2601;6084,3900,3900,2500;3844,2914,2914,2209;3249,2850,2850,2500;3600,3480,3480,3364;9025,7505,7505,6241;19321,15846,15846,12996;6889,9545,9545,13225;1225,2485,2485,5041;1600,1640,1640,1681;961,1147,1147,1369;729,1431,1431,2809;4489,5025,5025,5625;400,920,920,2116;81,108,108,144;576,336,336,196;576,552,552,529;289,1139,1139,4489;2304,3984,3984,6889;4,58,58,841;0,0,0,0;0,0,0,1849;0,0,0,12544;1,-144,-144,20736;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;9,0,0,0;0,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;16,0,0,0;9,0,0,0;121,11,11,1;484,22,22,1;576,24,24,1;1024,32,32,1;529,138,138,36;1681,451,451,121;2304,2928,2928,3721;3969,5355,5355,7225;4096,3776,3776,3481;6889,4980,4980,3600;3249,4161,4161,5329;1296,3276,3276,8281;4900,7280,7280,10816;17424,15972,15972,14641;8100,11070,11070,15129;3600,5100,5100,7225;576,1272,1272,2809;841,1102,1102,1444;400,620,620,961;144,228,228,361;1936,924,924,441;225,315,315,441;25,125,125,625;1444,1292,1292,1156;256,480,480,900;169,728,728,3136;1156,2754,2754,6561;9,117,117,1521;4,68,68,1156;9,174,174,3364;1,114,114,12996;1,-149,-149,22201;4,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;4,0,0,0;4,0,0,0;1,0,0,0;1,0,0,0;0,0,0,25;4,0,0,0;196,0,0,0;400,0,0,0;400,0,0,0;1225,315,315,81;2116,414,414,81;1936,616,616,196;2601,1530,1530,900;5929,4774,4774,3844;4624,4964,4964,5329;7225,4760,4760,3136;3969,3276,3276,2704;1024,2560,2560,6400;9216,9600,9600,10000;12321,10989,10989,9801;5476,7104,7104,9216;3364,4292,4292,5476;289,578,578,1156;676,286,286,121;256,96,96,36;49,105,105,225;784,672,672,576;144,180,180,225;64,136,136,289;1444,988,988,676;144,252,252,441;144,696,696,3364;1296,3132,3132,7569;1,46,46,2116;4,94,94,2209;1,61,61,3721;0,0,0,14400;1,-150,-150,22500;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;9,48,48,256;900,600,600,400;1521,507,507,169;1936,440,440,100;3364,580,580,100;4225,585,585,81;3969,693,693,121;4225,1755,1755,729;4356,3234,3234,2401;3136,2912,2912,2704;5776,3192,3192,1764;5041,3550,3550,2500;4624,5236,5236,5929;13456,10672,10672,8464;7744,8096,8096,8464;4225,4290,4290,4356;2601,2499,2499,2401;324,720,720,1600;676,728,728,784;169,286,286,484;256,288,288,324;1369,851,851,529;289,374,374,484;225,300,300,400;961,651,651,441;196,280,280,400;256,768,768,2304;1681,3772,3772,8464;1,53,53,2809;1,0,0,0;0,0,0,0;0,0,0,14161;0,0,0,22801;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,9;256,0,0,0;441,0,0,0;625,50,50,4;784,56,56,4;1089,132,132,16;841,232,232,64;2025,945,945,441;2116,2162,2162,2209;1296,2268,2268,3969;3025,2585,2585,2209;6084,4992,4992,4096;5476,5550,5550,5625;6084,3900,3900,2500;5041,2911,2911,1681;1225,1365,1365,1521;2025,1980,1980,1936;784,980,980,1225;961,682,682,484;529,552,552,576;1156,1258,1258,1369;2209,2021,2021,1849;576,744,744,961;625,650,650,676;961,930,930,900;576,432,432,324;289,884,884,2704;3025,4895,4895,7921;2209,1974,1974,1764;16,0,0,0;16,204,204,2601;1,118,118,13924;1,141,141,19881;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;0,0,0,9;0,0,0,0;1,0,0,0;1,0,0,0;9,3,3,1;9,6,6,4;16,8,8,4;16,68,68,289;1681,1681,1681,1681;2401,2842,2842,3364;576,1560,1560,4225;1296,2556,2556,5041;8836,8272,8272,7744;2116,2668,2668,3364;1600,1120,1120,784;2025,1530,1530,1156;1444,1330,1330,1225;1849,1677,1677,1521;400,740,740,1369;1444,1596,1596,1764;676,1222,1222,2209;1849,2107,2107,2401;1521,1677,1677,1849;676,1066,1066,1681;1521,1716,1716,1936;441,882,882,1764;900,1230,1230,1681;256,784,784,2401;3136,3472,3472,3844;7396,6450,6450,5625;3136,2352,2352,1764;256,1056,1056,4356;1,107,107,11449;4,278,278,19321;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;1,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;4,0,0,0;9,6,6,4;100,390,390,1521;3136,3304,3304,3481;3844,3100,3100,2500;484,1254,1254,3249;4624,5916,5916,7569;10201,8585,8585,7225;1024,1536,1536,2304;1369,1591,1591,1849;2025,1710,1710,1444;1600,1400,1400,1225;1369,1369,1369,1369;400,680,680,1156;1369,1147,1147,961;1521,1131,1131,841;2025,1080,1080,576;1521,897,897,529;1521,1014,1014,676;2209,1598,1598,1156;441,609,609,841;1024,832,832,676;361,627,627,1089;1521,1326,1326,1156;3136,2632,2632,2209;8100,5940,5940,4356;3136,5488,5488,9604;9,393,393,17161;7056,5376,5376,4096;0,0,0,0;0,0,0,0;0,0,0,1;9,0,0,0;0,0,0,0;0,0,0,0;0,0,0,1;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;1,0,0,0;0,0,0,0;9,0,0,0;144,0,0,0;256,0,0,0;100,0,0,0;256,112,112,49;576,408,408,289;3025,1925,1925,1225;2704,2808,2808,2916;1764,2814,2814,4489;7744,6864,6864,6084;5329,5256,5256,5184;1369,1850,1850,2500;961,1302,1302,1764;1521,1560,1560,1600;1936,1628,1628,1369;1600,1400,1400,1225;1600,1280,1280,1024;1681,1517,1517,1369;3249,2451,2451,1849;2401,1960,1960,1600;2209,1504,1504,1024;2209,1598,1598,1156;2209,2021,2021,1849;1089,1221,1221,1369;1369,1258,1258,1156;1156,1190,1190,1225;1521,1287,1287,1089;1156,1088,1088,1024;3969,3528,3528,3136;4096,6720,6720,11025;25,670,670,17956;8836,6298,6298,4489;0,0,0,1;9,3,3,1;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;1,0,0,0;0,0,0,0;0,0,0,81;400,280,280,196;900,210,210,49;784,140,140,25;961,589,589,361;1369,1221,1221,1089;2401,2107,2107,1849;2704,3068,3068,3481;4624,4556,4556,4489;4489,3819,3819,3249;2025,1845,1845,1681;1521,1287,1287,1089;961,1116,1116,1296;1225,1365,1365,1521;3136,2072,2072,1369;2704,1976,1976,1444;2401,1960,1960,1600;2704,1820,1820,1225;2601,1530,1530,900;1225,1015,1015,841;1764,1218,1218,841;2209,1645,1645,1225;1444,1406,1406,1369;1444,1406,1406,1369;1681,1722,1722,1764;2025,1980,1980,1936;2809,1802,1802,1156;1849,1849,1849,1849;2601,3672,3672,5184;2916,6912,6912,16384;16,660,660,27225;7921,5785,5785,4225;1,6,6,36;400,120,120,36;144,36,36,9;225,0,0,0;64,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;0,0,0,0;64,0,0,0;0,0,0,0;0,0,0,0;361,0,0,0;1,0,0,0;256,16,16,1;576,24,24,1;961,217,217,49;2304,1248,1248,676;3136,2856,2856,2601;2916,3402,3402,3969;4225,3835,3835,3481;3025,2970,2970,2916;1681,1804,1804,1936;1156,1088,1088,1024;1600,1320,1320,1089;625,825,825,1089;1024,768,768,576;2401,686,686,196;3025,935,935,289;2209,1081,1081,529;3136,1344,1344,576;2401,1519,1519,961;729,999,999,1369;1600,1600,1600,1600;2025,1845,1845,1681;1225,1400,1400,1600;1849,946,946,484;2209,658,658,196;2025,1170,1170,676;2601,765,765,225;2601,1224,1224,576;3136,3360,3360,3600;3025,5500,5500,10000;225,1905,1905,16129;7569,6438,6438,5476;676,910,910,1225;4225,2210,2210,1156;1444,684,684,324;1521,39,39,1;1444,38,38,1;1444,152,152,16;1296,468,468,169;1296,576,576,256;1024,480,480,225;961,496,496,256;1156,544,544,256;1369,666,666,324;1600,680,680,289;625,275,275,121;841,348,348,144;1600,920,920,529;2209,2021,2021,1849;3481,3304,3304,3136;5041,3550,3550,2500;3844,3162,3162,2601;4225,3120,3120,2304;1369,1295,1295,1225;625,800,800,1024;841,1218,1218,1764;1521,1092,1092,784;225,255,255,289;784,588,588,441;961,558,558,324;1849,817,817,361;1521,741,741,361;1764,756,756,324;2116,828,828,324;900,660,660,484;1444,950,950,625;1444,874,874,529;1296,792,792,484;1369,703,703,361;1936,968,968,484;1089,660,660,400;1156,714,714,441;2209,1504,1504,1024;1444,1064,1064,784;1444,1558,1558,1681;529,1426,1426,3844;144,756,756,3969;1521,3315,3315,7225;3600,4320,4320,5184;2401,1666,1666,1156;2116,460,460,100;2025,405,405,81;2401,490,490,100;2025,540,540,144;1296,612,612,289;1936,1364,1364,961;1936,1672,1672,1444;1444,1064,1064,784;1089,528,528,256;1681,1066,1066,676;1936,1496,1496,1156;1849,1419,1419,1089;2116,2070,2070,2025;4096,4096,4096,4096;2809,3445,3445,4225;5776,5624,5624,5476;2916,3348,3348,3844;1521,1053,1053,729;1156,1020,1020,900;289,578,578,1156;676,702,702,729;1089,1089,1089,1089;400,500,500,625;400,220,220,121;169,156,156,144;400,240,240,144;361,133,133,49;289,187,187,121;529,322,322,196;324,180,180,100;576,216,216,81;441,252,252,144;400,240,240,144;256,144,144,81;529,276,276,144;400,320,320,256;484,594,594,729;1156,1088,1088,1024;625,525,525,441;729,945,945,1225;225,750,750,2500;144,732,732,3721;1225,2450,2450,4900;576,1296,1296,2916;2209,1175,1175,625;676,364,364,196;1296,576,576,256;1296,756,756,441;900,630,630,441;1600,840,840,441;5041,1562,1562,484;5041,1917,1917,729;5041,2059,2059,841;3721,1952,1952,1024;676,962,962,1369;1444,1292,1292,1156;784,840,840,900;1024,1440,1440,2025;4624,3876,3876,3249;3136,2912,2912,2704;5476,2738,2738,1369;1849,731,731,289;961,620,620,400;676,572,572,484;441,399,399,361;1369,999,999,729;1024,960,960,900;729,810,810,900;324,324,324,324;144,72,72,36;36,30,30,25;25,15,15,9;36,24,24,16;9,12,12,16;36,24,24,16;81,54,54,36;49,42,42,36;25,20,20,16;25,15,15,9;49,42,42,36;81,81,81,81;3969,1512,1512,576;2025,1530,1530,1156;2209,1269,1269,729;2401,2107,2107,1849;400,1220,1220,3721;144,720,720,3600;2025,3330,3330,5476;3249,4788,4788,7056;3844,4402,4402,5041;2500,2350,2350,2209;3721,3477,3477,3249;2601,2703,2703,2809;1369,1554,1554,1764;3136,3472,3472,3844;8100,5490,5490,3721;9604,4410,4410,2025;14884,5002,5002,1681;10609,5768,5768,3136;2401,2450,2450,2500;2401,1176,1176,576;2025,1125,1125,625;2601,1683,1683,1089;5776,3192,3192,1764;3249,2964,2964,2704;3481,3186,3186,2916;1849,1935,1935,2025;1296,1224,1224,1156;676,702,702,729;729,837,837,961;1296,1260,1260,1225;900,900,900,900;729,702,702,676;441,252,252,144;289,170,170,100;64,128,128,256;81,72,72,64;25,20,20,16;16,12,12,9;9,9,9,9;49,49,49,49;81,72,72,64;25,20,20,16;9,3,3,1;9,3,3,1;25,295,295,3481;8281,9737,9737,11449;5329,6935,6935,9025;4225,4355,4355,4489;3969,4032,4032,4096;729,2430,2430,8100;144,744,744,3844;2916,3618,3618,4489;5476,4366,4366,3481;2809,2385,2385,2025;4624,3400,3400,2500;4489,4489,4489,4489;2401,2352,2352,2304;2209,1410,1410,900;2500,2100,2100,1764;6889,4399,4399,2809;8464,5060,5060,3025;11881,5886,5886,2916;7569,5220,5220,3600;5184,5040,5040,4900;5184,5112,5112,5041;5625,5625,5625,5625;6400,6800,6800,7225;8281,8190,8190,8100;2916,3564,3564,4356;2209,1974,1974,1764;1936,1584,1584,1296;625,675,675,729;841,870,870,900;484,660,660,900;625,550,550,484;729,702,702,676;676,728,728,784;169,169,169,169;100,0,0,0;25,20,20,16;144,96,96,64;9,9,9,9;4,0,0,0;1,1,1,1;9,15,15,25;81,81,81,81;81,63,63,49;4,4,4,4;1,0,0,0;16,104,104,676;4356,4356,4356,4356;5476,5328,5328,5184;2401,3332,3332,4624;3844,5332,5332,7396;900,3060,3060,10404;121,638,638,3364;3721,5063,5063,6889;4225,3575,3575,3025;4096,2176,2176,1156;5041,2982,2982,1764;4096,2688,2688,1764;4096,2816,2816,1936;3249,3477,3477,3721;3136,4032,4032,5184;5041,4899,4899,4761;5184,4680,4680,4225;5184,4464,4464,3844;3969,4032,4032,4096;4900,4410,4410,3969;5625,4500,4500,3600;6400,5120,5120,4096;5776,4560,4560,3600;5476,3774,3774,2601;1681,1722,1722,1764;1156,1088,1088,1024;1156,986,986,841;289,374,374,484;576,480,480,400;400,420,420,441;289,289,289,289;289,204,204,144;225,150,150,100;49,42,42,36;1,1,1,1;1,1,1,1;25,20,20,16;81,45,45,25;1,3,3,9;1,1,1,1;0,0,0,1;9,21,21,49;225,195,195,169;64,64,64,64;1,3,3,9;784,784,784,784;4624,3468,3468,2601;4761,2691,2691,1521;3969,2205,2205,1225;6084,4056,4056,2704;1296,2412,2412,4489;529,1426,1426,3844;4225,5980,5980,8464;4225,4875,4875,5625;5041,4544,4544,4096;4356,4422,4422,4489;3364,4292,4292,5476;5776,5700,5700,5625;4761,4278,4278,3844;3844,3534,3534,3249;4225,3770,3770,3364;4225,3315,3315,2601;4356,2838,2838,1849;3136,1904,1904,1156;3136,1792,1792,1024;4489,2345,2345,1225;4900,2170,2170,961;3364,1682,1682,841;2209,1739,1739,1369;784,1064,1064,1444;529,667,667,841;441,462,462,484;361,323,323,289;256,256,256,256;196,182,182,169;49,28,28,16;9,3,3,1;9,15,15,25;100,90,90,81;49,56,56,64;9,6,6,4;4,6,6,9;144,84,84,49;1,4,4,16;1,2,2,4;0,0,0,1;4,6,6,9;121,99,99,81;121,132,132,144;361,646,646,1156;2304,3504,3504,5329;5184,5112,5112,5041;4489,2613,2613,1521;5776,1672,1672,484;5041,4402,4402,3844;576,2496,2496,10816;529,1403,1403,3721;2916,4320,4320,6400;3136,4480,4480,6400;5184,5544,5544,5929;5929,5467,5467,5041;3481,3717,3717,3969;4489,4355,4355,4225;5929,5313,5313,4761;3969,4725,4725,5625;3136,3584,3584,4096;2401,2156,2156,1936;2601,1836,1836,1296;1936,1452,1452,1089;1681,861,861,441;1936,308,308,49;1936,176,176,16;1600,560,560,196;1849,1290,1290,900;1156,918,918,729;1369,777,777,441;324,306,306,289;225,150,150,100;64,40,40,25;9,3,3,1;1,0,0,0;1,0,0,0;1,0,0,0;16,16,16,16;81,99,99,121;100,110,110,121;49,42,42,36;196,84,84,36;169,52,52,16;36,12,12,4;1,3,3,9;49,35,35,25;121,44,44,16;361,361,361,361;3364,1914,1914,1089;3481,2301,2301,1521;4356,4686,4686,5041;2704,4524,4524,7569;3364,4698,4698,6561;2304,4992,4992,10816;324,2376,2376,17424;529,1449,1449,3969;2401,2450,2450,2500;2209,2115,2115,2025;3249,1881,1881,1089;3136,1848,1848,1089;2304,2016,2016,1764;1936,2332,2332,2809;3969,3843,3843,3721;2916,2700,2700,2500;1156,816,816,576;676,234,234,81;841,174,174,36;961,248,248,64;784,168,168,36;484,132,132,36;529,184,184,64;841,464,464,256;961,1054,1054,1156;1764,2772,2772,4356;3025,3795,3795,4761;1024,896,896,784;1024,64,64,4;625,0,0,0;400,20,20,1;121,11,11,1;1,0,0,0;0,0,0,0;1,0,0,0;16,8,8,4;64,80,80,100;225,330,330,484;225,405,405,729;529,598,598,676;324,324,324,324;169,156,156,144;121,143,143,169;289,391,391,529;2500,2750,2750,3025;2304,2448,2448,2601;1444,608,608,256;1681,615,615,225;1296,612,612,289;961,589,589,361;1444,2166,2166,3249;324,1674,1674,8649;484,1386,1386,3969;1521,78,78,4;441,21,21,1;324,18,18,1;121,55,55,25;400,300,300,225;841,464,464,256;1369,481,481,169;1369,777,777,441;361,285,285,225;225,75,75,25;225,45,45,9;256,48,48,9;256,48,48,9;225,30,30,4;196,42,42,9;324,162,162,81;225,210,210,196;841,435,435,225;1156,1020,1020,900;2704,3172,3172,3721;3136,4256,4256,5776;2704,3692,3692,5041;2209,2538,2538,2916;1156,1020,1020,900;484,242,242,121;361,19,19,1;256,0,0,0;196,-14,-14,1;196,0,0,0;484,22,22,1;900,90,90,9;1600,280,280,49;841,725,725,625;576,744,744,961;64,184,184,529;441,1029,1029,2401;2500,2500,2500,2500;81,99,99,121;25,20,20,16;121,132,132,144;196,168,168,144;81,54,54,36;121,440,440,1600;64,608,608,5776;49,784,784,12544;324,306,306,289;1,8,8,64;0,0,0,0;0,0,0,4;9,12,12,16;169,52,52,16;196,84,84,36;400,480,480,576;196,364,364,676;81,72,72,64;49,28,28,16;100,60,60,36;81,45,45,25;64,40,40,25;81,45,45,25;81,54,54,36;100,90,90,81;169,182,182,196;196,168,168,144;841,145,145,25;784,84,84,9;1089,297,297,81;1156,544,544,256;1225,910,910,676;1369,1295,1295,1225;1156,1122,1122,1089;676,572,572,484;529,368,368,256;625,375,375,225;841,638,638,484;2401,1617,1617,1089;5929,3619,3619,2209;4489,2881,2881,1849;1600,680,680,289;16,56,56,196;441,966,966,2116;1600,1560,1560,1521;36,48,48,64;1,2,2,4;16,12,12,9;64,64,64,64;81,99,99,121;64,328,328,1681;1,71,71,5041;49,777,777,12321;289,17,17,1;0,0,0,0;0,0,0,1;0,0,0,1;4,4,4,4;9,3,3,1;25,45,45,81;121,297,297,729;144,288,288,576;36,36,36,36;25,10,10,4;64,16,16,4;49,21,21,9;64,16,16,4;49,14,14,4;81,36,36,16;64,64,64,64;121,132,132,144;196,210,210,225;64,80,80,100;9,12,12,16;64,16,16,4;144,12,12,1;441,21,21,1;961,0,0,0;729,0,0,0;441,0,0,0;361,0,0,0;400,0,0,0;900,150,150,25;3249,969,969,289;8281,3458,3458,1444;10000,6700,6700,4489;3025,3795,3795,4761;16,120,120,900;400,340,340,289;2500,2100,2100,1764;64,248,248,961;4,12,12,36;4,4,4,4;36,12,12,4;121,99,99,81;144,480,480,1600;4,138,138,4761;64,592,592,5476;576,0,0,0;0,0,0,0;0,0,0,1;49,0,0,0;1,2,2,4;9,6,6,4;169,182,182,196;64,120,120,225;81,108,108,144;16,4,4,1;25,10,10,4;49,14,14,4;49,14,14,4;64,8,8,1;25,10,10,4;16,8,8,4;16,12,12,9;36,18,18,9;25,30,30,36;64,40,40,25;9,9,9,9;4,4,4,4;729,0,0,0;9,0,0,0;961,0,0,0;961,0,0,0;676,0,0,0;289,0,0,0;529,0,0,0;289,0,0,0;784,56,56,4;1936,88,88,4;784,140,140,25;3721,244,244,16;16,12,12,9;100,100,100,100;1849,1247,1247,841;1024,992,992,961;1,20,20,400;4,6,6,9;16,4,4,1;25,15,15,9;81,36,36,16;1,15,15,225;9,213,213,5041;libvpx-1.8.2/build_debug/non_greedy_mv_test_files/raw_1.png000066400000000000000000024134371357355204000241100ustar00rootroot00000000000000PNG  IHDR.IDATxYmirg;ߛys̚X,'("%Ԗ!em в?4 m@Kѐ-KD*ihJ$lRUƬʬϴ"[ q?+V~>ܝHٙ8"qw̦ؼH݁ӘA>8 "bw/?Ybqw'0ׂDDªXʦ ND-y&"vvwfv7{DTl;FD8?qF8ȝY,DBQ3\"8A(Do' ?NDq7`Awl8TsU-ya g`;@4|~bV:>Y y\=`y' qtf0sٍͱFbeOo{΀P-oz赯}o<꺾j:[]G^y^;w=|rzzO^j:~RM\d0tTl]׽'~n<'z {w/|/2;|ugwpwzxƍ߾ӷo^?<=\~oGzx㍋i'ӟsŻw/?ݷ~i~GOzEm׮];}7K15vu'o//[>}eS?SLR~ @"x':23\ܐj,[/hW?=Γwyo~ڹ{jwd~7ٛN;{6K_sx,/fV w(ʚg6,5~ KQx坱Θo>FDu߇m0ԉHǓ \<rAӧ\t5}+fͪG"p9 9@f؍̌f83EGw7?,['_OWv r`؇" G>jg9@Z7_i?u (_nP~+O?Smq\[]+KpOΏݥ``Bu,tc_[O.nTo?׾__=;;z5S/-~wo'ׯ_/AWnv=:7O?8;?~{ݣ4S_Sp%RawH3+U*p+? C=Y㓶=MWr_>g4Btvub_\f4;||_{ݻ7~տǞ7nf tx`^.P djgxRle:CWP3> ,)Q͝ ʵ7YD\@nJnaI+p^%A;U-DDq9ސAO!N@UQ?`8~~_Ky!15gvܙt]92so8 =`bfd3 gEOaf&l;ՔRԜ QҔ{x6۪ 9aGv 9=-"bp23XEb=f",pO wY k AIJ,@QS% &՞Չ dpuucP-` ]3$ -|CB:aʙR޶Da XCG )ب"śaw,EKnS~(ckZX+;%D W`>uFp-R." Y˙1,AXv ޺x Amn]}_΋{[DD0_f^Pd6ę. Uo]~vXcT{\fdo|㍛yegթc)AXS?}MY=~.f5zX]̇ҠTo꨻yFh4|x-: vbaFL__}7G^;[9ug4+-SZtkWG@ F]C( /7ύ^?|'>MF=vusZ$lHw"`XTtx6A$MpȖ R=MDf`ꌩwK:5|Z/yN/µ?z>g^鉌CGOAP pQqAr1Юn.Bij҃T!jL#V9hU5ײJJ ^5hDO}sy2~s7 }F Y3 $0GI؍1f ﻔRv^*q۰;+T7wg"5,d,FP8~qɁf)!%2(2C={m4JD[1BK5+hV#GN蕘MͲf9iO YI; 9GιJI{;=DU&X.ϩ[gAz wg!3#ZuUQSQN2qAB10w"fUu#DD쏳f.ew"+5bΪlD!}c4d033caeUp G98 ܼtP &xoMmm(Al `B ykFc^ec12-3cKY6^Mm3b m’cvڸ[K6.I*tɍ7f6nL֐{ M9,nշ=7nvZ,Vuu$#DK&vFw=88h./\A l6}^G=jţg7M3 5zux8[sjiroUWpVH \ÇXHqݎ?NTW: ^`t*w U2*n1L/ݿ_{{UiO~*zՌs^,^|F#0jV=y7>3/W9+ֳԉ+:48::>QQt+{pG^|#/?[z1ݘiQT Ćy36>i0n*eԮsb4}WTiTLꔄ뵿{䬿>??/1sWLʵy៛o?ؠ0pPGdgDIUEcُhťN &1Cd$u=*{&zUg[;N_NKsReff<_k.\ +scfNlfv$ s%"r aȟGN(e!7 D9D2gb!hK9 R"& e331v+X ZY[. qIB4 jd@so)%q/lElyi(Q$fH2j9UYUMA""u_.XTg/aRQ#CԪ}"fƁ*ELc[pbv; "v+CjL)PfAm|&'dƁͳCR׌EvP.ї%^gs.CGBCEh{i ]A~؀SAot]ͦ@tzu|J pn)lЌ!c~__'?y3,z ( p$Z@RBhIt#*'J:awְ-w3[z&@Ԃݺvյf5D:tbzqG@k{9 Dd~^,rvvVV%/Iw5j9/֕vcHOD Lw&"B;D(gbZ wn~syKo,?|xܝ..HgH(6T6zb;KyJhf>ܝ8H쐄r5ڟ~:N'qw1`^+]l {n> F.+'[)dy ss G p]Y=`Jx܍l4ԯ^OyGO:G2/_|c(z~3|~a-4=IJ}ȝ֮ݔ\lDD 8z͡pwb#!"YA,XW^xj(`e!ȳe?bNbh%"=2x2Q4a3H̉lFJї8-2oH %}g&&%sPwֆgt@&"73#a,EՐAD`sgwU6T-5ZL ݓiP"kh@;"""{2 jLY8M8&Y NR13)8Ij9LEpVG+:X?MrGo`sp@ `V؋.666rh|ȭ(9XY&Qxv1}% upsPVB )\K82U,г=:t:n1r9ZM?̧>cV};(q D:_*)0(39\\ R)ַ_t=+T-0OGѨ9Y4ٙl.tD{tH 'Q:pudM,)x筷ՏŸqeskA #$ᠴW׳lzU\tCUYJwT ly:>=?XO/;%W*_\W~O'>>/ܽ~Ϊ]\#͊˥񨒔PbjS}xe?bRH&lég?|)*#)AGLS /D&+o]~t>g'G fpI<@ ֝Z\̏.ֶu;Q-QG=jJUVv&~Od2 ˨k!g(0\^w0_>q6S`ppi! ]ᙘ%[@>>2&," 7vrB "rsYcB w>wy+w 9,z~wO_??zsį枸_t/# Ѡ؎% KU6|)m .kQڈO 2ﭮG o{XƬCaA=W?B&Br 4 }Nq5`\kv&0jAE|f+/޼~pm(ݻwo}zztq}C:f9`/$^ M0dDGO珞#|sT;L@ Yuͪ8лUM=ݙgǏ7NWI*3Vq8ptC 6d(l-n3sR3I2ђfH!į0Nf8iVpD-C:Z+Rݰd _^{<13)g2nog/q|/uoz[ \̀X`bD,YMHL p)RVUsv]af7r&?RaʩB u"9zYH8)%@"b8(M"#aJD9 wFDpx%X=lBW,ZTR M7a V mXb"T &"c %Q9ĢUB:(ʨjTPftwądQm٘ȝ]=>kW*l$T"g @pm&`xCCwo[gʋ<~_ǐ T5ַĞyl?lM7rkIpC͆F; %½>$7߷W-.iK,TWS؁6ߔaLۼK8(nM#<İ\vI "Qx&&dG `pKfĎmEnb8P1& )I9rS5VW{[Φb훳 Fd2ӓKBpˊ_8psrT}k߼yڏ10Z1 C\]NWj /b zaMդ@ƿ;+;{MOªM} 2R-> du"`> $Ɲ뇋ӧpE3\ t$%>>9۫Ԫr{ֿ׾Y5'[W36?ܝ25l@(1',dP3"#+9"ɲ$RU%sbW7Y o2sŒR)"bddz3%({!]=H&SBI])*f #Giƈh'HD?1!s,yoCBೃl}/Cv:d\L2h+"5hav9@%pnMQAR̰ A>J3c?7{uY9r^dC2 afMpu4 qs~SB5aKCH50#"a;:Eк,q\16Blo}֭[%_wcŹP֖i  '\ݛO8GoJY7_|y|;oOa](Eܴܲ0rW^ WǓfu UuJqh {'URtuY/%̑ M ]n2`H܎C)F.gϪ_?'''>r#mvGx<LUjW f+3[>}f2w&w[=uxCÓnj 9*ADѣ[اW9?~ƍ JT xu[)Y21t}m'T  nD;%uמW=$59pR*=2kF=xEDѕI~lfj>3$T9gvi̕$PסˡɰGgBI2LXƩ.?sqq~VkqE;]//EӮ~{oP} ѕg$fսD*0r(!(38 LS"X$;Niľ u ߇l$3aH(L.U@DsdebF w3N,D0v!Dq3]+ff}OlxD7p=2涉8Q‡f3 B2͒FlUUqefP)@*iB*9L7{gHX&"hF&LWr6iB|f}A WPH)*W!47Q$r ܝ$nBP}Q&K^4[L;`[!qM"q+bӧo ?!ۆ"E7c`Vhex. m ; [?rT2':*Yr@ bpJh e=}VX"`0\f(z= GG?xx֋폽ה3,uUM`Sv+I5idAC+ZSWˈ@ ߼;SH(Vx6~w.*e QWR? DJ (:wD[l]?uB`Udau@LˢB:^Dhp/=x+GiŕUUݪt]<gVkV(,OUݙޝTk_zu^Sy4Ow-/Pz]si7[-7?~qMIxEwmZc(F)+}*1y͏}|έyFu0Vާu'c@_q9A5RI`>:9yTMԤU'{c^WsKlU[նzQaqqz|xVapb銗§ٻ1`by'$ziOD.PJꜳDrp""Ŗs$n"&Brs*b+3r50m =>ykE2mUS袮kGݻWWgӺK9v}ӒTGnNfGj=nr~evU]-[srAM$v+):SǓR٘Y81UU!llLFgu& Nv$D &a"PWhUd^9 KC"L_ԡ30K,:ؘH nd)2ofR.gp"j}On^MP),)38p0À;U+cvVdC|z HDT8ڦ*%XsRq-'ʉ) K)Qtr)i 1W` !%˂+hl<@ dbX.âJl[ m}#!Q?|'Ն ‡fu# sk0O#r΁N/}>0tD-mc_Aəl~[XS "6m O*RB M(ZT+ah4H끣(gb %ԑ iکϿޕ:9Y~Ã^et՞z/ΞFΧ$ Xjrg*AkN0?݋_{u''a i&GDrsG~:"jӔjefdiݯA,B#÷K+ҩjX>}X1J )o f\b'ONݳq{M7d׼\-ڗ Iֺ'UUyM_WEyM{~7MtOBjFEDnT2hs"`NN c++V)uDމpU ,J60Ypw'8<ԗHI̜AhPTGZ(UUUU4Q+1 ]Uɡ` ^44}ꨉhiTUff^W/[~W^Lgr׽zYUr00˗* "AD;p]."jIR 'aN@&ֽe" ( m8 h,0c79>]@g A"99pHŅwARؒP-C3Sl̬QswF0Hn fwѰjfFZ8܃ lAjVV|Dv4CY0#o ,ptE`De#>ҡ3⥻N6VQrΦ<3H=7S(H< &.*cNGȖ U xum4p\ڢ[Io%"]g/YP @Ka^<.Kdh-/w==4xul%%xsX! Rb 顷#({ 6K[ʦt PH W}38zj>޻B{7߻7C2m.j=iow {W6!G/F̘D0 Ravm2z+Ql z0_ZN$`)|>M';˸]#0ޭm3'I. 8,#XF Sժi.߼B2dz$;7o^4ӗ>KW|+\OOnu>px4ڗU7Kr쬦WR=G}]o1 #vٽ/94_;]KYBH>&"B#XwN7=8;].^G{zZ3ǣ6}=@C=܄$" ji۾GwUȐW11$ A}6Ҭ.8_6F" Pf73D$X8揦s-PU3qDQo6s<`|=̳Q٬Pu]fVI4nRkvwz,GUUs=\\,7}' @[x<_Wb_?LM{$"0v'ϙd'!РK`։Qr^3cXt܆!,zdu L!")\b k1 #-J^׍UM@Qb \=kDK܊ `0dh]{C(:NuPZH͉JĽkbhDMjs Ebq-!0 AYeHd @Z}tGgsԬ5c2ln q/SirDT5g"f XU!,j{Xh &*~,R(ԶtT1i0*<,@,:z>Bx(lJucxnwPYt{1An[lav$6~?g8Pmcֵ[ =箵_L# Lmb# <4x`Pń)Zpe۫c~} q6[w=g@Ze}wz뭣.՛?~|\xRGsλI{LdXfmb z+BDGn'9/T>7/zg+`H$E+DHN EfV5sW!N, )8J$$p6XoQpJHa쬪e bR&,HH'-ͦ5|J%ؼu͑2 ‘D,*6Zmfmvm% $l6 #4Qqm1ર(hE- έo^鍀M:s32B3Lqwlt!ﭮVU:[/ںW/6n޸шt:]Xjh3xwΈp02 J&#^]m'ygf=lvw ">Ģn 4ٙdy'# Bj)(S]=5 8D>85#ǎݿb'_~vEs{}z&՟Qd~rŏ4͏;v~b<܌m\^ˠ u2<L"UyJ9"dS礀0V] 7-.:t#_|wv4bqvJGI1,h9\Z.ڣr],,x rPWM6Z{XjOzrTw9E/˦iv)*7!d@V33Hs&"j/4F۠܍L9gՈ gSj&)t(Lқ\U,owoUwt4휬#KS5ti5ujvsU[%J|43%~ˋTd;uBU1=d(uCUrgB vH8G1s-Sn"02e(!HjJIaf[E}!03gTB*UMDzWbbrjrΉi6wd)HcZADrwEA f^3D"o4NmaLsAXdeFv~Ll 6LLܟ~?l7%iB6 L9! hxAVؚW 9SE]c3XH#0 C-utۥ8'u2:T5jH|}Oӗ^>lԆ~ehY+R͵QO5xZ>v?/ܸqs$0<}Gɏ/fGq{6狧xzph9"O~k/o{ SN3N/X]ʏ}4}~>|ΣG7?“o?E÷|{rk>G餂.5?wfOvh*g` 0Iq?:"g2GX|~G'gby+/]5pVFtjwaISm+3=ޙw}he"$B>gvԫ Tǣ1Zλֵ垘 vu]"qUT*[6j+aV`)}\EY9 nf$"f-91Qoa" Hf܌:_\xZ4nȕ+{iͫ. vOe_-gϏƕ4L<7M&aNNl~7/ /dsN rA$^JEpwHULD: >,1r2 (W|M#"A{)ăb9ٜ8Pb'$NNaQ\Fn6ͱR"C!zUMEj'"ɲF+l35fh/djq;pRݢjHIyE}Y$TihlqsN/niD@SV/ sfR EXؔ"rbSW#bU+̍XͣAL S&~v%I2QU砿 lPlCo gP@)pJb p ڸ)8`ޔ=6jʽΣa8\bhM;_J]JD4J`Ba-z3~x=߹xXMiݙdzƤO9՗'J)ޖv E[: wx5 R.7\޵kRIU %A:zy],=>5ܽ{I*N<1d*7@(emf貑P3a .BA}1un*^vF5֚m;Ֆ\U{P) Uufo+`ELnGHQ{$}of.BY_HsFDkީ*3۽r'?NNUUu8M|>_ͧvGk7YuZV]B ‡ ݽSI5@?7g +"QK =8Rxo3yɢUDroځ(D8$0{u`#uffHT2$AcO17~0sـ{跕3WZB)tYJ1Ҧ(uf6  LsHT8ܽ`nf}cRx2b艈4w1sC ND Xi(ZTzD$eb:&Td3B,fF{7^h}W+.8n,$b]t/CDLl@w 8֬zxH`>X2T. &l6g )CK1̇oy{l`n ?ɏ v+u8_~j V xv8RPd8 4<&DXL )*AL[On16ߞbeF4 Rzz*"'óoܕ |p=mn޼Bc`̨ƒR8ҼnymkO-YTV>|2_Vxo:Уaq~cWvWrhqynVkAKEμ17qmTh6wE.U9uC`qpvrr|vͼZN!'y㝝I=ٝ( LU5P5khZ(he͢k2$z`յ%uY6=نIcYװۊ9SpcR7dfRR3wa@H}S\Yb6'g3Զ]^urB6ݲS+Ozvwwo~%Iux[㣣sn&vgԶJ4{lB]x?8[5!L)mۯ'mCU@EU%LP#g|&_+`pk\,klm9qnh2I-(|n:Z[3=2CTU4# .2-'Tং;[o^$0_nydg?'~W JգGv3KZirvn|߼=C#jr5WEw/..>lSJiw2^~ʍcڼVl[1E -oP vTɥjd5p<;x/ېtH{f^,FDzvMNߖA@]Dq+;ɣGFEׯ@ ~<ިC8[tu7dHAPZ}6&SmjHnH@'bﳰWEY"(FjA"-9+ 17( QG12ly4>yrv6_l4e%"asO}{ק,$M_}սŴmO&{O=}!6?_\~!x[B+NXTCɁT1;,rSԛEo`$L=6B1]j8]>YHa@wlB1Δ搎ylq"3e&î]0.7Gi6r#$hM"!8!TCaRILRLcDjhrpJDf^|h >CJ‹މwgv" P#ՒsU- D}.\([hx9{T$2#ݧ0$c]e+rT"Pc@Ea b]3wW\- CIeCE`l첽v#B]EbA($tH170ؖ/l:J6*T [s2vp h|6h=Θ͌H KPm 3Ye@2 Urڿod{饗>rfFUfao~dWπ_vw*?l{}ko\{p+!m󓕻/W^~0|u] H rX,_\\,Ɵ9p>;hgR\iNQEG2wΞ<{7\9£;;+Vl;i+ٟz VC Ž+~LDGD;-2m*yww+M|p**Nw'׾ͯ}C;4?;?v::"4Y+,+tq!ݲΑx>Sݯ>=yh2RUfa:UΕA68`z$$jۮ՚*\M?OO_soSzbUOv2Ͻ+B\3rcQwa-WSwwȦ]6U s]P""N9^xFŔsɿ*J&82?X_ܜ#9cˋբm?;$֌ףÇfWv9Q31lJRfPw>p`2`fSLfN̢F0#w881[ N$Ӿ>bPΚ"̜-HEU+$bHLX*zJɼDG6F 'W 5@XtC[3uB.*>ŶlXn)@{V VNG$:h2P3+͔ ̬nܑUD = JI%5 KVe+ְ""ޗgΪLD%BP|2K:guwN @P"'pq-ml.XH)3B,7ʞKlˁ"hA^.顅d,Ʒ2'x 9 Û97`qrGbhmխQ9vT)CD 2@aswd1_x$¡}ֹ 79go?wjPK KX꬞q:?zlofl.G6Wy5Qw#{_NUTs7f[T}:#%=#GOn|ժ{ݣOt֐1=܊y k:/o=~]yis1]}ʋ~(&fOnzxַ3U}wL^xU>#9״ZY]3y ^% +j"&zlfBAsvb#ldL-+f)!,Y=PQL`WRz-MK_]t6 ^/_:Z-Nxx^c .g!IQ?Ztoggg7 5SaŃL:;3/$s"g6̠Z̨iY.DF@R36A+p HF}DK{Ѽ{h"[G 8e퍌1ě2$qaHDC0Gq`Ԍ :j<.B 5HSF1(4ַF?~QvZ2z*Cȑұnۇ0DzC>?w}4bȅ~_g&;:T5&ξ̪KOdt`Հ dHӥ5?q5upڷ{oo|=NS QyzjZ}Tf|sp0븅S:WOO?jUuS...&Vrkb-Ѩq]&D!N넾xw b?~[_{O?zG?~z' V5SE>9?j?Ox$=e^g]B:Yc{q&&70FCנki: rKC`*,M[+Fn{Ws}{֏o~;wTv2ZrtqFT$;ٯʯO}ݧgOcc篿xN3L9?8ٗ/..>x<_/뺞j=*1N"+f;Fc܂5 b} [Hi1Y*Dj`&.(Ҙ{Qddxኡ,2 R0pI]6B2elg.)Q%~s0_?45 2]%znM7qt;TRр49׬*8[:HM.:"t)SUlΆ1\@5 Lfia|;̊R6|XuSmV„Ї\5=}r6}aotk㫻~ܹs繗c?qͦ*1B5yΣ})& ioV+Dgvl6%jW^=NΎɤK6!_<}#גJU0y]ݙVNw/]0 PAD,X]r(aqx^_;\=;w~_|Vmumo:0rUMgެ0jG^sxѓǭykDHi\-ڬe3L){63YFC Y6ԉZhrP=>8:+}?|JBx˥~;߸qC_A:+1/}'O.Xo'w>}z&{{{{{;FSO{?`_s-h+qj)$fȸ= n^h bvBqR@ "vy48IRx(̝ܼY !'&Ո9Zb]oNu];uoIWZ-<`7վmWI-ݩ.\.?Ww?񓳇j?C|ՃOh4[^{3gՉQ̲z]WlRܷR*vBBs72+tLib co @)wHT@D1M5(Hw]c1UգԊ#6:A]$\;T XB͌Scf[f+麗 3X(IruR/,J@ԄJ'3_n"%wa~PG5+lD.9gwq/V8'nΧ"Mux (i0fg5Ћnj$Q'DHA $pڔ'O$ 20bX <:oKӰm0gܴZ}arTKupנ4̽a55fjBeƆ QMCIkK>|ɃKNY&~9nTԾ\3wi߭Lm6;UŶf@б5;)P08bjmSiŠ S4dЮ{2u+'3Q"#S…EDHU!s6uaWIDiK s9VxqfU0֑+,2Pn'aA-V+& 8X@de,3KptȢ TM&}_ Igt=R& xR QyLOU3@FْĖQvqchXl@ FJ=1QPCCECXݘ]r ~izINjS]YZ6 p}d̃ډk)csY2K-p|=;&8a:bXGY9uFt1K =!5; *n2Lj C 2$t}땶o?~} uם>}`r )άn׾HGɛ z޸:rRÁ3!Ut4ߑ4n{֋4fD z%MJd (!P+Ψf3]N)SiN\[qGlpw:\$^dFfӧ֋}wfTdFӽ?Տ~?_g}h'^yw_<=;|փ=z ,zܫRʭ8xgϰhD"9gfzB^G[ZG;A bfHpuZt3OO $ /xx|."\#H`Cy;t֌DDfMhafRJ+ZUUutюsμn}ͫWaegIk9̚TR5>TTU m5OFIraUmFBIX u"u͞K@ g\a_hzUU!l"I#OD3SNr9m;VX_tI7nJ FCܾsJ~^qzzdiʿKr#gNr`0/ hwrXGYůO0߬*_8ͷ[?oϾOW7oMwX[< ;"x|\YUd ,Q,8jiє%6% 5n6h ݒmlH-A)ĩb ɚs|o~Zc8&勪}{N8{olmի/>f))%XlE`r p*l\ftjȵʡo(s3\TQ`[&h\9<tPcJ+>1p-),[Y. aSL)ɸ4Mq`^V)鴰q:xZ#UM//|kW9P}^ \H|ԄJ&FKĤ:B6*Լ&`&爺NTI(_ν7c3测j;: [<=NֶWIJtES<,, ߙ lI b\-c !d9GDR2LU݃r:u3(G&AfBU3+Q2e?Z3b,d:Ie KXWBf ׷)1љz>a|ӛ7BbQйIJ kI`*#[Z YG9rA3_۴… ;wQn0C]JZ(}S ֮ꎽ,ʋ*HW8S1.Վw"ޝw5׋ C CJbL;15,!^B&& JGWxC~ֱA+r~F]6iU=|P?}敛n *Nˢ(@&tz:,뚄%C bԨI{Y/ǒr8Qz =zr믿> FP]1Nmhˍ<9 LӪ*RWܴmbQ<?*Z 8m a4,Q{~><Ԯ!5i `<,|29x㳻OT/}ٽ?Nѥ~ d~8^8~rw| t0ܸte;wʶ$*F9L%+6f!!POCd2 6b'ϖ`kOv"i!wݻt+;oYZ"[EUK`9fUqfC0K Uۂ)S4iYTâ!r8~=˳F ,}d5WUV%]˃)%Rc:}w.˟#1CjP"Duޚu4ȵ[~YDC5I"BbVFm}WI aQԬڞVյ\RւrZmsm]>LC.1ǩf|tirrRߪ> ?ïԠ~&UWAU$hfeͰH棐ZxTL`0"rWq%Ic "2@Y)&n'"8[8 DGQѤVKPeULc ""bMqeL !8Vy7oUB#1fp=jB hDf{ WwONDD99NN8Yˑ掝>_0SDZp-@1. EL)$*͌ћx_4DB~%?f)"d+A'de3in@ֺ~BѰFSf\gxPZ8@b:YԫA$8gE)fT.Zϖm;]40*Zt.:s!riYc7nΠ^b VKq5'oF`gg#o7G7NNNΪYqh.5Zk;]K[{y0-/ON,76e1oRW )/\qڊ%+,a9l/cR!p 9lhzEAoē٢I~_֋M q0\Y0m>x7cĵ w;?m#(.kb/4 EdzUk`-524|ٶAce/] FM*8B2ʪn0)9~C63 f$Na$ lkDFԹI0HdƕDDʶmXBs4^͏> d.F)MV*-VGeT٪/x9Mr8L'+-"ŬiVwzzի?'8B UUj{0Zqv4 QU:u (&1: Tn>_23P :i#J)2&&B"A!`JD07>7m]{奢9V TՈ)JQq1yW}_~y¼9#"hωm;I4V8Om;{tNyŝ! fM&HA_ Š,j(ZeqȰ\&H@Q۶lUjumssӰJRV(&1Z&&c;Bu̬,W\&"RM1Ʀf`k$^UrdOWDN1L))!%5ԶL,AajhbN_6d9j$銥9(V7cE}K}0x<^y\+VUծV%,h46OkH~DB { PCrUպ&z}EjZ ¤0! y{6 !I7L[f6rK1Fy b)Q'q.p!< "uS"BTmf|8'},^DĔMTLrOݱ<%dyQ`9Y$&>cN:Ardpm+:n:hB !xi 9WCu())Bujb1?Lj8dr١v[ DF5@fcEs_;pvX9 ӍHr֋E;Nj5* rNST3B_+\o9S;Ј pX*SɊ( 6F%YwzDk_s *N+-3s<9C L&hZa/}4Sk/ԩ Xm*mlӳt:,,2ksAP J0&JZVfV/n_(fvtťKWo䙪p )ƻ!ضf9 x<-v[VGg({g1mYRXrTӦJ(d?Ʊ 3LLqAMO>z4]IPU^'~7o,tt8߄˗`T_{wt>?ؘVUusZ׫۷9<([u_kt=?^;5oo2hm# ū^ i `rkR"7nvco渎I oMMq}xht3)Փ¢j46q') vQ/[)*"t{nd'Mf(gj3MwUUb1\.{,f6Z5Rf ̕J Uի(Ǩ}},N̋ ZS$lVyZ(8yrʛ4>~{KFX=k[(n"n 9 % Ƕr"YY]1U-" P!_<ӃS#@d$FazE8+{Y67m>N~N hmp5h?9hPOxk\&^"6-GKfqmml_>tDVBUQ@D +_(Ͳi٧D&iD0Ԙ z5u#HT\͒7LdM,Voɰ(1ԞU{eYnXX-o_>::Zןs qUjtYC `RQ QAʴƕMS9[e) )%BLRcH iܴo9($0EiSʉ YfQINT$XdϪET#b,;攤("ږaB:RMLZe+e`jjfL6D90Aqf(8{!Y0souJH)!իH!/{c02Q"oPl y9@D\h;ƍ7Ɨx77DZ戮"23`JEp0*zflm9;d2~; !`4>bDbj;|=ztx6_i˗yRQuk[e(Ԥ^N-&k(HsF1@vIoݸ2?vW+W@l;/|jiNnx/nޒO^$7l{כt_za8?wݻ{{߽3OAmڠp؀U==FM;{ FbDw|Lz)6#wdKBU x"mӖ(xss[ڻӳI%EyWS"z/2Fm._vxxeNON5g'''sYL&~Y ^.88]r YYqكz[듲dcgtq "eUs3EX`H:&\[arC2m.K@Ģp!&iV؍YTL"("F! VvtQzE}Z I tn1쌷67 ~J_~>+k^ jS2i 32 D:V܅pI51"#hX1)%6zFsn^}MD0~b:V60SZg{)L#̑|Raf]pV;Фc4H2&! !X8EBEB5#."R1D$oUp\149Cf)^#}4`LFDP%U7'uW=1gÊY2UA}qI"%)A͈ITIUHe !ZL ܤU[2>D !PtZ)ug(;W!9;lAg 99#P1u  y CԉTo4;8/W w]Q|YmkOuq,[DZ\:p4LBv {MEƊ#*:^/7U*( bH /@(l_1(oW_!O&҂Eveh{)CؘRW ɡ\Vq0Fe<Lc>X%&iY0F =>]0rwwD1R/Ttã_3gUŢZv  !`ld* |plys糡A.z{{o}pppr`r4 '_ݝE'mM޽xa:N'zUxociE5MEaxPW&Z bShƀ)ͦ+.lݸqc¡5\~7?~噛MZܿ;;z|GU#Dx "4hv{+4RJ[Dmnlm$MkVBUgӍQ6m4ԤE 0OX˝2,s> NJa~B"C19|0b &qkq, 7 A8|t[v\Q;*amEP)Ce%[G[{؉l&i+jlpJX`"BVZ]a! P2DZ=.٤̽3Ĕ 0HVGzȂdd)h( TA禒v~YDd$bfYfZ4Կq n0FL,dPU "uA,A#*D#"NI:2ռ̜]@\y0ɹ@$f/l̙\B&7*E7ƨ"kf0:VJʨ3PF" "KqD@&޺D$fth.ўB Q"tO@ L#^,-';̝!)gǽXL3a>sMҐ!˒}2&x^ءׅ:\Iቦ(kb#uwj7p~:am~iJ5Z&c <Ƞs΋(κj[*@X5DyeϐbA1@p՚kb.Cθ! P!f0V4F^`PU׋UF>~̛=uj`1if&^|trzv-E%x4Ε{;9U}!e(C!Av{gon^~|7_QnͫW777ճ__Ϳܕ^ϜDVF)(; jʗa5|GOgtiDhҽ˫~u3֕W?u>~ꕏ|`?I4FQ7 jOS=m͓j9hT5sѺ^iYЀJbeaa%`sAa 2({e\Y+P;j~t/xXws|Vn Y?^#w>o|΋!m'Dj2TbM7-ǃbX<: ,nl%==(ˊj;c"-vw6GCiWK#R̈ ɉxVnD ob|~r'I%!Mf},,@ha){TM)Ÿ>Xp !yO^3Z5"N@hx%TН"hOe*x~y p,ˈcrj (%wJUX&k9i"&'-Hڮh6F,dVd^cۻLI<:/f@ u۰o'%277^"Ņ. ;ځ{!Z!5ђp0b:[U$GFdtzθ  A5XzaV97.[q8?Y>fꌗ2^(KmLph4%=Mغ WͻrմM+|rvrBXmsN3 c9_7i-bQl I~hܺ8?>OEBas4 ~{h.nK߸ַ'>/oV4=/'ы7s'˿<壣# ̝MN&T{~5AĔ(BBNE?J*Õ&<ۅ;׿[w^z~v{/sE!Px 6/}v7ǹ?XVUmoommūK., Aڶ˔aW,Ē+IBɠTT%`ܺ ;!-fB/iy%BJ`FAЦV0cXO>=/Ŋ} _}/zJ˗Rg"j9||3srssbA(QV?~z6~Ћ4mU)a:- 9,OKOOg6*+ i @mifnWV2 (]| ̣-?vV$0DK3 w@bĚ ""flF۞|gОSSI }e )_+?;;Ooǽrɦ"&]PF\z03ʀ1IdJ$j3[LIrC H';$"TW+eP`i"(s8(+R*"CbP K0sVL쐮&5/Fu֪ eOͥd |DuC 3 E0US2623@"kw0bɝ(80ZV.Xf;^JF9Eʁ0nSC80 ̜̕P]DdH8Eq;Q:!rƽ:cijq]vL>x7t{Hq֌8ؠFAu4d|0 bמvi3wPz^bփ {B؛ _ ܱU6)-nq|3AP>NB$ԙ#^eDߩ^Rݴ@Z A .#}bq6{jLuww4qJ908zFlT7ƧXHJMY0kꀓ׾~t|;_[w.]XAe Qdغy)J˃z?֜TU%"uZ˓MD {=9?/<'>(K&g`_lοNZUU Hbн7zVwz8Z;m=p/=/?Ǘ/_ۻ{G.+"/GɓgGeYdzEYUMӬ07yFA1h8P3H)z%sJY,謮S!SGRh2"bU.|i ?eY>wZûZ`U#U0(+Q>::tnt0,ҢefҠ6wJT=ɝGg^~e*s6U$WThPdr+y4n@TjJt1I-U'E932×*xYS/YWK[;/3uv0@ sb21ɍ7Y"T?@4XQs@lfr z˹eD0(E"Dմޕa>P@jH LH|\ "x^r`YQ9rV ~=71D3sM %)X`)A$uBP#e]+V5@B90yrƜ$*En@Ftpxe"2"B&MYR+Û\%y+Zp 6dD\'&r]xdhzqhU.\@7?8o܏MF[%b0)i>Ӝ|RCGP8tGcϼ].ĬiGȢd0A”P .b7uyth=i:w#z~ohSH U hgo.΍gΟ 6lXE0, x굗o^XWj$R4Q?IDUQ׻>c~xOO_w~J*9rҽQ{zzz)K߹7RDWg|󆛞]ͦd?__US9^0'% PC2ʥ݇ѥчNooFO'3} 2'6 ?~_~dY?mӟOӷAUi>7||2=^>sÌVHqԏVΛ* pE;_&3޽p~WtJ㶕F,X,$%Qd99:}%w7?qxx8,Ţnj1}xɳ;k|4K{[O|:'q}f9@_rPT  I]`PS*ƽK;uRJ%Oq7+ptDĂ+YL,[nC]r~[1FFM !$e;xk]<}r⩈,l˗/_i໇i[[[xtpxܶz||7wǡJ+,X6ۋAo}I "a*,DǞbbdfChR%A$zQeM! "#PRJ #T ́-s<5;y]g.DD|$X8MVSK@HԙVDrU4>4ok,J&!'*ЪK(sPOLgTxeofytTf fc7!$#QuH༊N,b03>ȫs!"O s%D"r%ȽS"t[d䗒?eL,pnm~~d4~8?aE$gǚN?6ԋ N[. & tA^<Ѕw +V8\1uZq Dt]l{%̅ٴ#:LPx@rZOf;w7^ݼE>WIs}m[l:W/qWx VdRdX.EWǏ~p?p_a{7_};? _kT&-e5rxwfi#[T'%emzevuf_9H;}lW[|RM8w~upyayvk!Z{}_y>IC#X )a~fALx9rY5%`EkjM|VfEe+lv2+G %0IĬ-CWN$}Zm-\zQHb gQIӀ~jQUɓK'.mOOT~vsCzq*Ӄb/]Cup}3{١V^HG=.pֳ/]bxg'|j㭓d<lLI%iB>+`"̬4NudD$n~nnj%@U$@ )Y*(Zee#_'FU-Q @XLǵb7b%N޿Yafdߟ>ȣwwK3ȝn\K [<:D6sO$cd#8b<;7wn-BLTEj|<ҭev 6gfϱYB#!eflpNos)/ID֌CGEhB61՚QrnSQ,7bd8wMU!{\mMX@/0|4R3UYE ŭf2@yY`g|"yk9̮EƘ]"z~SfUaAVQQ\4:`I s~7̙K[cxUo =P><:]7TA _y\\;.)lASK"Xڒ_k0 F3gc$ey؍{g@!"UU0qm(2cq^wW3?A?\^dDy!H ]دG@Ngx^{[Y=8=[QΞ._{KO-A Bˢ`"j?|7뭷jw=Q>=}|V]}񵟼O$4 X,AMTITHl< &}5M/xI0Ү_yO޺ǧ+P2"FFAZR*`=~}'?~e03{bҶ-Sp'OvwwA0.^Y=kg2&OI^Vx< UuoUbA'"MTWޓI`Z RVq.gnʷU! UQZi^PH =BHV &0rgnx2}sgggwO]|?NI@ḦuCVlvz2GTʥ4E5Jl_N3)EK!I)0YPBҤQ@ܘZ̥ "2%yE\.f$C"21YF+&﷈4?5&}SsT"hHYN4-pwc_')k[0AVpywS/_럝P<<|o0C|F(<#,70S!ADp5,~n^~vԙsb2 UmR# !TE`5pe2VUŪ% |SYj;1TIQDx(3 ɝ-lJ@P߽ޯ=96~^ѿw˿W?wF~#Ԁ͍?S?y֯i:"|o=O|zz:.̬~pHm>8INjjoUhѶK"WjPU$Qb&![ 1bjj#ȫa* Fmj۞V!iݮVonnƸaݼ&̣̏ãk=QAU1REDzMDdM=$+'o\)F>T4͝wO4t1.h֊m #4GguIݚU@j]DȺO@U5c#&nsxQ̠c4uG⠢~KEQ(bܿ?:?|T>zT7>Bm6 0(|Z,+>.83SX̂͝QGcIE:~ٹ]sH1b׵J !x:n,,DC > ɨhNd^O`T$7XBj۶( 4%W21w>1<~qncX_s"<:Jpfk9]c l͢誖5Q⾻,.~hYrHdÐIב7Krӓ\vpu/뇗R?*TateՇ .d۹P`\TTƒ Ѡowyg.zH*Ħle?iJMJmMЇ_W^dѢ ,@ '{wh4z[%Fٿ}ٟ?˷6b~m:GΗ \QTV$Bh`%rI%eۣ)P@W|<9Ʋ 2!)`C T~N(K@d|8*yfp޾?ɓK. }l0]ۈF0I lj/~mV^yi}`gX6w(t>hm=zhOq,#ǂ)X*jgS;/bH( ^.Z˅#Uu!j[;WG뷂R#\o|.mQ΍g5{pW>W3V%nVौtWlU۶'F'ɵoWVVWy2sK*Qf("y`6f$%J mF0HC0>0QU!ztDPsDŽg"3e"ff`3))<8toլO0ᓴXWE4`rnP6;k;vYdNv!5=@UC uA:_9^ -)r(A(@O m՗nܪo|~m۫x093k5eR^p^LMC[m* E<jW_ѼnBUzPj h-PjZ4Af~j`2'_W_퍛[tjd> )̏z{\܂a:͚UmADWZed1gw(/lv΍V~`sW,E͝jsBZy Dm1e$@qK7B0fvs'9'fFr0a8er6#˘r׿}ޣ7N9::(p`+3{xܯ7OZ;~.h4hc~n}e3Xs+"yZbf9܎s=(H0<Qc^ "8^)3 30 sނemrͅc)!2P>4yg!jZO M@1j愇%3#F\cnRCPl`ɼlD8]p4FĚJͣ{`QT}&ZG:CQ[9lMpI-$@m;yxfbh"5$sMfV[@B7 S$[ԙSBeD~JԚauQ9ODL/>[5gJ벻4q( GN(k?t3Tct^rtJ;Хj>x\DG|S7^y]Xw( ?Z*V\D rzi?ȟ< kCK w >"hƐd2ݫ=XG|[ܼtRV9BfيFO2lYѬ~l|_=.~n>35P׽/aX,fXnҥKqHRei zvr7f̏ EXIS{ݷWGnU4(8mb61oֶM(1IGk7W?ۻ5*Wo^rOv66$ V1{/_}/.ַ}w^xᅢH*/̫_b^QjhE4VW_EepL! R~gMR)M*4ML:]55~Y'rY?/}=1޾~}nJ@ *NgSkIBټ:Xwn?ŢyサVw^)}[w[Gegklm֬ʲ4m 9af1C!f$fVCRA^0#H6;(Jf!TZ9F J"703E1;j$&D|;Ǐy6f3˽lV19zV[b|\yָW3%B} 0$6fr:ս["98"c鼑L(TU#Y$we1U(@*H HɵWnΌq@,@XȎrϿ+y/֪f6(.wAܛ+RPQl\A@^1m(0G9Jv$2/n_? xf9cZ= !Jfj.qs-BqkcpX23o-0:)EvV&rG"+H>6 BB`adjQ(jΝ-(f1Sd$ĉ>xb23sJLMCNJ2Q@>4RcSR*wt߰c9Ɛ :*‚lϕ&W:Lꀊ<:x,Mpn]YqvW+_\5S՟VIs].8Evͣ"ђ^$Xf(KuyOtF-3* {o`uZhΚըwxk{A֓wھr}cWei5h+ b@eL)ْLq>_~砮+*XhCUlWo]-`/ҧ_8GG!6m.u/b0$BQi"ZceeO'Ki^z,B ӲWdKr~2^wRJ8ӕ=%6vFѺ;5@#-Dg [vc9G DXDL2Y4˕S| Dy\ߨB-$DDJDk̀(k(Ngxe, 5v@v2"eyPpձ"z{\ąU/]1-raO%c<ȼP-kB۹jC@ LN[-s`3Dyv9˄߮߯|bC&Y2!7ϧ dX&4Ktrz퇓ZzϽ4,_nmTupDqWtEYQWnQ|=3Meż>nҰӟc vf~^~a ֩`0X蠰4m_:9"ؙYbr~6YVUH$FR@jVW+eE br(-̸j7sh\u)aI#Uq#nln]yV}vvLW_Rs0}gӺ!{&˗G[P}gxp3n0ԬIhy[G!1JlHP)PlrY G3ܺ;\EQpX5mzm[qPv-5{b^ G;V(޾3vv9_O{ж+ چ(')^/3v OKlÓ;}QCmȐ7ARH{!O=?KՂeVuAedD!sPd΢>w f-RQh f+VИ_7"SAjś޸3W_?>-ǿ:jTg˻,~H$@ۦ>v<~Gq{[0mzyo yoFG/ó8`NBii?3!d $V?cJj`Z f4n5S w9P$)BMY!D"M)UZR6IwU2Q6<@!Y$_1fAT@p‹X8 B-1FXv9 BDDQvTGTe&'1DMhME >sO ήhږ ~xT-L}'"!{bRs:'3C,R RW1 G1gzyZ2Yՙ:5yݽpɀWR~_G vA%rYR#8L5O\E]L!o;:ruR8ã\N) !Z\I<4&=g} Y\1ԭc.NvfL@ٍ+TU\Ջi7Ltrѓ'^oLujI$bٚZPkDf>k{uqãI3K[ͼ]M}p4q޶Xowcy%hۃs;Hf(uҜ|o~K4F0`H %( eyrrrt6٩.'UUMxûo}>92=yI$0a6mSR?9lGB MƩ9~Ix뭻?~ko~c8,Sw<+/O=١ 2,,SJ2}n'e-dv<;`?l(i +MeeQNN5۴ly~ctװdh4A/UhӪC/$U=6EϦ} hs<8>x._{nΩlm zsH'OOTeC ~~ݽv |O''gG77ocj6rEaJ{V5gjJd4u%E*(y/1 r 2 -AJ"ZFA2himQœkϾ~пoqP9NǷ_lIu{8F]6=Wٟm`F--Tr#9[+2 Ե[!VS;aq*%?֙?B!@DA2XROB 2DJ͆ (CiI(R,b@X1Q$*|P3S􍗂HZチȡ2)׎1TSa DCpwrPՄ4UEkXBQd)GHEă+"D3Zq: RE``ht%gG?"")8:k/2VD@1QIe3@Gp,k-B//u5%z\;riGx߰y_]B bV'˅ eI]q?Z~k\vhKDUciBG0):5^;.I\ݝ>{LL IJ=3ܹͅ?@)$K1v_*=}xI*VJ g2;:;99t1_PHDacԣy=yxFqugS&Ms^l+#Nmu~'j6޿7i/S60EpD#$;;:YmnHaz8L_m=??WKt.'fUG=Dl3VTAU.omIo^n] Oxji4铃ӓt2!L'gpU~UV ~ru:-H6~1TKѝ}twk)2o/Wv iՂ"gzed @1Q`kUӬfժƠ16۷=5uٓG'OV|7OY22z1YV={x%]k혟xQޥ]T=xOnb7?jBQmUa`21c ֺ9=H4_zL@dB֨&![p! Tei2v~QNB4x2W%xR0V1IC%j 7#-oW'ֿ=|OϞVrH)S'ӯ-ٿ?V%*AP'sD&3m* Pl(BЭ&QKB)Vlbm+De* e cβ9Db9CDw^%9J.FQ 21" 3 |D`ZC[Wᘪ &h%qAj1$% PS?L*%@fE׫MGQ%[ 4P`F {S{:QG#b(GB@ddH` &f0CD9d FOD5< #+ !9:gC̙J ib.Fכ"c!"<wBR0ql|/Ybkvg r}T`end>uF[y``w  ׁ lezaNP+35?sƠs6RJH=8ʌLA[y|B&m]cx~&v#;祉%AA!#0+TUF^ؽ[EKUYjqQY֋ECFW5 FXDH9|7=:۪vw^z޽{Ӣeͯ{o}띯o׿Ӿ p1_DВUdFm[ԜL'!Ro{cgV&jzrz:F I~`|LB ݇l_ecKDEZH~;,e/_ɵ۷o-fW].T}K9tzY>g/2Mz4i[hPb^7Uq6gb~7vbP@(cԔZQJBPRPB#(K.b$UhkUΨ4S ~OU _DJI65M3Y=\?>w tzOs}x֢?ڽ4o}g6-Gƣb!^\rEj@̰ $šu_W""1 :b#&`"(UryINBl J1̡mm5t&|_܍曏r;9>>?3~g?ٗ#B4&d;t_xM˯2>x \ 츻CAB rH7F9_^T_3! ؍MG*q,)LSWYYYȋ#9_NH)Q7a3a{A`pцX^>谙 *tq"גu<'u^96*"1)+AtuePL aFffދZB,ػ;)3+ؠ!1YR$2dד65 5C>I1Fܺ){r <:;g#"&VpLkeIW kvH}vͽ&0X?V5]5Z5kSKsrϛq֥z&yfu 2D aUh_76Iv&!)*yWgtE_׉Zimp֖*DqwN+lmcUzl6;'_B@UҒid&KHaU䄴]%ugajpucX}уۛ\ad}Hu1+W7_A?-y׾'jK7GVr@#.Uox`{{۽Q^/FeIDKi˲J#~׶/a ޸YmF;[("7eXGH&`ytWfx>e2|;ߪx=_1rFJ01*#3ij-z8%e&eoԤ@,* MHV1(WW6U-"dFPZA*.۶h2+˶i 7px99.Ű_pg>яqq|rI^M&ԯ@K'[=:>;GDδGD1h:9yVx2r=jlWaڥ5Hp$sBECԔbp(yPnT]3` 1(,bf)<#DjSC-`:K7~)f/o?65njǂ(u EDݹ+V )e UUf2f>YETK3J'3+~d]Ӟ{`kSX6bD}V|IIU7PZQĈ H1؂D d=b`S \+iCKǬ(zժ \0KbfHXa"2q1u* u<|$\nٜ; 2 3_ݻ03lbJcP3eH`- 99& N)gݾ ${Et9.d H3cSȑ%"J.%2FAX ܤ•9=B,e3OЕn.mǂJ-ڰ!EYFקu[9q_-ѕ4oBP/e:wzN@` KZJe{T"G۫YJ-\P`X=[kIb^.E0"I/<>ݹkvsI{'x04@rb+F?E : (V-Z+w/_}vE6ǧ~{{KvꋿOq}ӭ_ٵK^;/')bŲ,+7w.1ժ)= E3 o­/)pҦ_1W~s+ 36;`)@9Ұ}7~|;êÃ?c?8( 3RBz٢ݺ`JdP HH)ᓓٓ[4f Ww5Uc9_Hh246JSP_KBPf# &r1NSPmj+x{'}P'$ <({Wo=;o\[&r6ѾϜk1rƚڰ|+g>?<EU%~|;W.][nD52sJd4*f`,6Qp("̲yWDn=M(킋(DD1b!zG8J=U()6W`t%?<~G~?xetc ms4I%("(Rntw]d#M@|[%2?,|k ,:[vwC6)q(u"Ma!戈wS tn\0 9~[D!BPˉLZK;"ΐ'Ǫʚ[A-ZH 1A``06/]gc61`3 @FHBjM- hNbS(Ԅ aDS!e{ӯ㭢G}W_=O}ޭ|G.Jv;&!NkK^:XY_,{xW#Gx ..y7Z"(z衇{o-WVVP?~x!(j>[=we;v̟ps-Z[gP9u]ֺ'g!F$)# 3"c"`PRv~cqMl4r@0e!`a-,JYsF"9UrJDA;-*j3tM e`NhwY6vL"o o6m< 1Tۦ2Thv1FrhI/T2 <Ƙ,'bڎM-B:#5=#b]MfCws*9lU9 Nu#78BA$sـcAb ܈IbEsքLL +0?笪*"lוB4*9vQ,w&ְ""K9n^7(Yޘ!7002kV2B؅23NYDk=(MeḾ26[moZ뎅"E+ 1dˎ ! &ESuFvgrD8hA`caY7tAh+V<:*(Ŋ;dQZv" fSԘT0&>QQ}Ucz\ PgӳqE1ɸ| ~7t:tq벴m۶]kii-m+DdT h4WW\;s_ڗ>k o #Nvmҁ7Ъf0긿x?ȭ ׻pcm//,\zuqe8㎎O:U4!I8kZK]1eQ>rniMMoQwQ}oyړٹ4'xcŵGy>KUlh" η BƔ1׏\ܴ}-nݻ!%87 .9U娢+ńM`O"0V?k6`6)hQ<+C"`c4=A/Z.ݺ>A7@Ɏ]+{虹9otnRĨ  m4M4M(lQ B2y. *||Qu\q8(3aC WUzr M₝C`rr2kNMZPT# #la|aqWx̨Cs >c*#ZG)RK' b:tL Fgm6 e ˒hB*Q)\Ș$ D$˽v"wKeDQ<1!睒ʬ hR<бzjs5 TUdYضhBdLDd3$G&TN{Q؉\t6%R[DR)p] c`[ǪJ* ZXYmcd1F8qiJ>P& 0_eD, Z H3w>PȮbKVQ0)IN(LUMĺ1Y X7-GWTIgVu$GnS6>ׅAm^Mq=ZWiˍ6 :TπD{*5._2 M'D(EPHEC+$(+}$_ ԓzKVuk=G08y%FhZ^|¥ܶ{b8"tYY9BxrxT>w[wO Oanv@܎xuES O^:};o9yĉOF/_nkc8,˺WᕥAƃ-w_xm5*g}A0 !2v{h4!YiPz0~+8zk]{yhwy[w|F Q}p][>VU)TGBPA(-$J빢(!bDk㞛l߿mnݪۿxmmw6 o9[*(TQ"b^o&&݀o{U7ܺXEΝs?Cl9F*0~aUUUh8ۍb9Tn )2nuZqqq>ݚv{S^uE! Ma7.\cO|+/_nu{vvXTҠ3G %pUW._$J7Ⱈh'}֘Ru~34Vlli*k\CΪ"9n۶FZuA~uh=wv1^g,˲RB8֡06;!lVNBAu8ꤧ*F_!F9Y_h6gdwZePS?lTFY ~p~bn|] "Reu&F.p/n|8yP!2['G1>rLc$vQ"lop}C!eyUUPJo˽C3C0lbd sFDPd:]SуrPDU) LNbc¹+jgbT ӯ){/ͷcMf5غĈ֜s# J*N ٤\m  (qیlГ|تj5>#*F"rc3y*D"Ǭ!:JEZhRMQ'k$Rȵ 'Ya|v ٜv'!G@Sׄ9bJED<9֞ 3$#f[BDJ"`*egNR(Ybnw7t˘u1F$ %rBU0`^,4&58o]D8P1 @],$P]P(QvFhHڼdiSHU6A'zϪEScSaFXبz)z!QaNhYU LJ7(dyH"VlaD?kTpv?s&@gMzmx2҂ ݲ:35=33s7OMs箜RfYFVknyN1W(ef{t|oSGQGA5+0DM5_w?yK/]:2Oﯿk:9ֳ'q;[,Fo5nyGCsss;mokO??!eDW_|ſ^zWV766We.]S͕Zx\AmNgOMM]/o/{"6"քx8Z9!Dd*e4잿wkמ}pp(ONMMM͍q5lD>daGp9EQlvZs޸qBo VQ c9H% v*UY i6bCu8;NYo0,ì٪4rx57}ЃN>W;>Hkb" Ǝ;LnSgUU1'-Kj˕ˠlTV{?A)>䫏S~qw/dt `{Ub`V2f03'cs .#(r^~e ;n,ES%wE3v.--V#] c`¸QLLIAf4n^E2Z}4:rM*j 3*TѨ90*<ϧfY6 fN}'ǩ oE[jr5tΙKt9E$_F5dUY5{|.5jH9K*ZxxH^ FGC|ϑ瘘FlNSǏ?9t{g %(ȨQ-\ zbc9R'0Fx&"iVH%w` %)5["`%^"SIۗhzd[{U=6%<!P=:a&,}"o Z*t;pڭ˭nqC&2VQ= 0-'CUvfb%fѠ@Dbfu'EUe@KuLIVc;2!KD8;RY?9ƃ{uiԧRAĬ \t$m”+7SZoPySNYRFiXE1F9]̑FD+Ox$JՁlT*@.Fabt6slr& i Q!H Bvz/Ks(2~<h~u=7N휟; Zlʨj0Lp D9·&TƦwys*w d9&SHqњU _q[^wO}dV h~Gv 3PZC}IDAT+_Sܷͷe݀pn{v˗VeW_}x97BYQ-_Yy+{703oݬyvxO[oͱevvn~饗~_Ѩ27][K;"O^ϻQ $ {/ō+糙ŖerË[rvq7D-u Dɡ lW1cg*eFaTˀNh]ݦwdֹKg>իW }ӛh}E0\߸W+ h*gY/1eŶ}ۼ mrO~w/}ye\k]{;geOqIe. 97r.3'PIʃx#<; Jgf/\7WH;YU'PrGz fxBw;VV;/Qj(@d&X\㯞=tХ /|I>z]`Ae=KE{P0KGx1mOqTm(;6G} ;RmP',FJ1sHAD%L=1vn,Q1*VCsB eNb0*ћ42H%69&jxEDcjw*X:YmݡY"b1}Vz=A*QQʠBrUI94 HrLѼ!~Q*LOʵ0eA0 \VUHS9SIEtl$+JUU42uDIK!C؁1ctVTH2I'¦` vH,6MH)- *E K Q4eCȼ*RRĺtyP*5ц$1|q6BRE$rY# ~]{ot˗/ -tL}2|V8?)+\,&ץrWtyE~bCX?G}j0f/nfN13r≧wD],Ջ/woۻZ^iZXXX>877wɓݼuQQUprtAzSyh40F/#U ec;n_>wl:/gieYhBۺ_}|sUunML<75YmlkL7Õų/^zj( ƽֺbb:D $@z Q_)w4v,˳gݻOmmFS[g=7+"tW^zHX#B$8jFf1F9AU5D \XVqYMdk3\Ap|{ڟki։-=yyܕW{s{ʦ绾k;| A "qvםʲlf^3]3"V@OçeĎo_}MW ( / "QAD$@$${%l,AD,QҔ}Q޴faÚȁ"ITԓFhcu avz< 3hpKǞ|ϼ+:֭[?~9#d^!>@" 2 =#"ݠu=J&B!3 ͬ$M'UGʎ)~HUũZԌF$ưMG)]7ƾX8!ce,CvхUTӱ PS0s ʵbS"LU<3ddchjYfbKsNFffr_|!%XE$$EWzbI2c1˝9K`r\rp$R$"HΩ aHDG6uĠ8 s-I3爵iLDѾ[]mLVftxS{MHK URW[&u<-7J:L&V1V!H<q$W¦?@$ AP:7"UUvjLab.P4񊵨(ho3 PZ"hb|t#f ?)B(W `(A(rNad,_AIĞMD$B Nʍ^a2 -@TY)GfĪ\6Wf{mT ǎ\?鏬XXX8y•u{;o1ܽW^;69kVrihud:rffe\8Q2(݊TNA6vDDu)U\ƈVkd8ҘTu)e3oBȶgϲ|Xs/?:cNZe/~oR|p[g~j  7mކZfB +'Ohw/Q4_̶oxu⹩dDF10e$(!JՍ1ڧq3CQ0(TԠBprbV2Jȹf*|5fv}'b{^^|ţM7==}{x;n+vJ,NŠťxLDpsc711voS'N8p[2`9r+ $m89HʼnYF!V#jXډ-pSJLU1-v̘a<+ĔkNLMa/x<+Ϝ~S~v@S ` Bek'Co?X>6 V(TMlX+I4;@b2(J fVőA87t7V1@GP崣)k()3y Ei{b5 S Ygb) _m SPSr.TD.`#tVJvLc Lk !"㎰J$,wMNeP Qؔ" L%xN#6cdD"IQ r%MWFkW L@œ*DP縦8MA[^͗dW$vœ]DaT5"l&!U`w@T%/IN"δ2=L277ZIs PY p` 8rmZv3U%u/QF#\QEFL'h4cFŰSdego(~}7_%QX3FG&>K}X]},7]r+'D>]m;}Kyh)xGDQמȄaXq>YhQ<ϕ0ۡ@1Rh׆bUUum6㕢(@xpSGd.FW3DUJot yo.dܙޒmPr:7+Հ[+7W*[s+2hz 1'wzFebeKRp:$a9d*>*)iTUK%Ǯ8b*$I3Us 2I&7BSαD,Ynf=l.cr  ˯7 >,(HDήL؅ɧB 5 >*8nk=]l7-JnMuWox@LAEcf鱩5ДTTD 3D$Xm6 8>TJ΍Lmg0[{C]l/[w]nW")j_`ݟVTf Bf(ҏ9DKSض RB4VXDgH%V Lk%M6VVJo\ItiX9.j{Ԯ[v`nRtaj\E6u,]]eyuE\[YVBmJ<K^QstAKS8}v<ib;t-䭳x [F.2sg/<[>я~dˮC2N$?VY1r&4&]ݸUGhEJQcZS\58c̏Tō 1sT-[I7x>O==ujoE5/~e0oMPnG&0 ȥ !1WE'9G>/Ae5{|nK0.{:Aĉ~i5iQ766ʲgB3$ĨYVx8*c9ٚw{7o?[[O,޼'.yWd?|s߮%R!XE$^V^v#]v`q~~URƵ̖/_zEƷ???cq/BJZ΢$L ҬJISc,"r.Fe."wU(k=CU鸊Tg ._}3g_x^&-lo{pa%T!BPi;L壭'i0`-NQ;x[" `r Q`c"Dd2 " CED,k8lO6s =3;8$Ì*L$Ry9'ر/"Gƥ$Be$"1Vj{1lITQ2̋bD`?9ljE@ML 1T8`YY730<>uA @S8SPa"8^1>SZK$ 5Uݬ*i験+Ub@-qJ[ߕꏴՙUԒ!a]VLL] n\yjzji\*NsւҺZBZ ahv$SeZ˄7$ZK+ R&5TTqIbxǬRG(m]9HehXP 2EjYUn(}ҀI(MSs ~@](h(vy]s,/ӕ yeͶ&)<QYi4#i#+}{̡(h0il^;nq=k׽urO??k+<ڳsϵE@S<>wmmmj[֩N<O+ڶʩsFhRv6' ġBf9r%+Q:9="JDbRsQl<5,FSSӭB|wyk#,;~.Zܨ_ٞU@Npme5k4"96erT(F#˲h6\}n=s|oΙG٠X ͧo}m R!j;Jit8'j˖W^=қ`c=IW;|`׾c/\>]T?۾5e*Ce$8afVo3u[r1YsԼ6%v NPmS7VDd8|p BAUYfc34x/~~駟>zjnv+6uĈ F ei32RvcLSݾuƓ#a-[\ \u^i5ʣ7M@-ʙm)i3rD0hF%"!?9 671]TU粨b,j:fNj&"+3iڶK`S\\fZ M@6XDԗUȱhL0Ϭj oSz`!ʻ|0 gZr?y6OO"ɐlAsQe')XYm1V EM9iBnAj$B 䐤0^vSئ.QYLZd"Ms*1f!`,YfvG-BUBL VP3FhqkgMTWgS f.M^Q0AbL3:f4³Bffi\J5p5Yu ,4-^%eb Q$.҉AWjM!kQM2$ػ,I 'm\kn[/6VzWO]jSwO|"h6 QbY G1*8o5'a}{ᆳ[޹cnccՍa9YQy~SO>=={Gϴ4|rx|s_]~wo~Gt %@Y9FU F#hGPUp(DQz"҂\ެISUTl\" 0>[[t-asΏ46 ’ZLbrRPB!X}ccn T 6VWmI7VW̳j4T(+cJk+۶mS\Y|y}FhFɚ9CeՎvިb hvZ"@ qhn[(|Lx}4w{30]{`&{ז~F9܁}pP>(bBC\.Ga층e=yr=6??≓&x߻?׫b*} "7f 8(֋%,WHlX|6<1Ҧ<gs1eDeiA BliWO=~YW[s[)`P00)i\o,/N{oM&vزPU`NTՉTB L(#!U*BN!.QsA,vd[ZUQrCn r t *.mUD. 1F\$ND4!"rTUeYFEUKFI/#ݘ/ɊTpO*B('bzKw7ٺu)eFL0ey0./,\[ y69*|(T, Ψ Gfgj[Xŧx-;>p|翱{3/|fsC,ѵKk_?9z~`x]|\1`bhn 2(/|J(<;!JG1,˘M81VY1IU>Qݮ99;wzZOLn71GJ9Fy55*!Ρ l6#u{@1ᰕq> (#jk~EFourrU!ݵ+_ߺ{w:s+jYn6;TaJ @`EV:SnӅo 'zrK#M. F{<e&8]… 5^7?70\8Z[9{>|{>F-+Y*hª|DOppsjjgy6+ xzsF#Kr$b%yW]ބsLE (‰!qb '|%6 h#l@ M\ܥh(Ql7kG/\\4~~`Ĕ\ QHH9 MAU$>3Lg1{P3ZQ$)H'd@jIatHj9&* 1@0+VH s%FXx \"UP3htQݙQ)h !:P *L9VIc|ZS-7AgbIU\HJt'"yJG|Z ISGjLR3eB`"D!&LA\U"9'ՅHRV6:! U !033DUʰhŇ_މ ګF^Αi>""){Ҩ^ !+QUȉHFBy@U03tЉ[67[&@1ĈPIQ(T 9 M鉔5] \sv=kL.j=+$*#tYs H彷!:e6QclG6k'6Ī005f;6hՉK+Y̜Ҍ 5?;e1D%LB 4mR/)Rxv3*'Nw*sYidǪ@H95e hc7Tc`׭ ~Yǎ9gN|s`m#E^l9! EYaĈVrjoϪ)Ԗ㕘DլU#'R0(5 rU$e yꔡ$lM`1h5F@FThJNbURfb8 4sY$PŐyb(e]?+EIstYk^Rv͘N+$ [1k2'*':yfxvk关n"L u&hi4-c75kTJa@{=ݙ_Yvcr;8eQ_ov޽ .tWf9 sBlGk쩡o7o-Vc=[]n4unEP"q@3KhF-挱LsycKހs2}{2J1fsQ_ɉ,iИ8\C{ݻJH qę #{41b|D]?{ar_]]/s<rx/Ih a+^pebMwm;m2ni\x#jN3"AXyx/?yw>~օc;v߽o}}}}ԽlɦfLLw& 7fWcʥgo?l^5ί>پw*GaKht2 S[Z1fE1ላR/+\$N>կvWen{ûߴ%bK(2+WU΍ٰEF7^|ifлw] #k`m 32&@:\i_o~g>uĉ#Lm$:4 =yssO~g}RF791Nʵo>P2DGY"lQ'q Jd-(Ks8Hp& 7%I9[Q9vp.5LV(ْIn65H$u?|JTUXDȓx(Α 8 r!MaDT\Y&=mBHҿ`v&`CLT1Amo5 /Q+;[o&F0޺&"NPLR`ϪBև:GJBECDȱ"19H`^ѴP,8#ALYD*dx˽xZ [@ d]!UΙĨ Y҄5R[(S9,3J/v JSkF0%y@w1F|SQ{^u>,UJH1ƌ= `5KOnaSlf :IQ+g`TI Īcjo^BMpU' 0ǟ?ŕ\?[מ>o~s'_~??>mg3Zwy $0yޭccrĐ@$7!+3` R٤1 6[ g! STcH= ( pb!B9:Xlfr^bރslq!sX7544\OLk sNx#c_DLz1KLJD >CVTꬿa%cM[-| ")3 NjMd ąؓ KyUee"X<^B0$|ش,PY$ O(al`A"¤nTDhtق u<J1j ɬkc" 1 r88(I@p̰Jd,;J,C־_1;lT#TD+8TTU*$9+_̗D vU%,Rn%3*ZŇƀ̑N Qj6HVD&p\V"'BUU ][EcN5:(徚Ri<LAVOm%*q \fu};HtI=O>{t:ZT{!Dp81jGLdM~J n:}S֗{NVb+q4[k6QmowOUz/ %Sc:nZlO `C} ?<|߿}c;\fƎhPqٮunr~ѣG?}KbzE6è;.5GeU4@(ڍ`>.ǀlll48ltW]^sm{챻#X+[3@њ9 W {gpWNoʲ|̡N3VcOgO7ZvQ';3wҜŅ+O_r򥼷tumk; /~/=v辷/;|NT3N:Զ9Y}/X+^ZvZ76{n?wZww޹ΰe筶l w}Ή42Y[( ͤ9HEh$zI"M^$M"IYbyT3p4c+9!!"WY:D[iU[a==xWRK,Lx<}hfvB̓HUhY A4*|'Ž[d1ݤ_@b4k<_@dךjrg0h~0.BuKߟu&;>Nss&r!T&&& v]θ22F FybYAJڳ}zgSO]H@bci4a/~Sx#<~xW>(-*.MJ1\.ϞzGg8D|7 -l|;w=ӏH fUaѹ}+-oڳ񙧯]|&{m,ˆ56XM4[qo:Tu2_)Սju=EBk0JwjL$*Y-AEAP֩=r}\|~ß?q\/E{ώ]fRu铯\:mt7vpro 8ϽK 83Ӎ࿿/?u҅2L4N,㖟cFz>~mb[>o^Z^=8X]=?w.\;rb0Bx;opAF4hIkH+H;v9 1T-6[[^ΎrFk Z~WF[q46TQ;4B`r1I{-_v_N><\-؁BXA n$8bCSS/[cOz#~^z喃_~=|ACaxM{2@KdƊ((D`Db q}ɭKf+wNXLMMyϞ;;VVXyj+WWWi$fqV} a\Y7' Plx}Q`r*Xn=[L?#O.W"Yug&^XM#oԩk sڵ“?0+{;[m4̋w;µXf}qwÅ,}l97/|K^ik{z쮯/kĉ cZhfl=wݱu9^tszbeewa+XsAtڿ#:hw&nݻNL WL7ӧugogrRxmfڒDi㵿?}'|yPNM>w޶}/WNZfO֗~?W_M?.vwN-t7 8tO|_<'ouAALZW_!ZM4[18B8۳g[o_8z9U[sp_w.SQUv#:L5Kx쥗^ G?8y/|ώl/0=)\1r }*`Uh[=?nhA ۏ7sLӶ;pɓw<^xGG>]ĿG^>N2yp*DHK0%.m7ɤ*"p1yu (MU8 Qbv,!CJZbkPDa;BEA#f%PE#Ո1z )2"Mf.@:8i@gfHl5@&w8DQui [vMI8I h!6.hRܹLQ,QX;Q"3ʦ {E6hKP'rNETKbt (9K0US 6%̬JLLDbpaDm+ fG"#"u覀_s+  Q 5!Uϖs:feAUSf1(u]( ! 6 AR}ռBH"jEUJbY[$!}ctΚl ;g#ucajD,^BLY& TUt.˲zrcg "y@T e[haq3jWw,j/][Qs׮]7ѥK=Zպf2,W s# q[a~;>܉/M /'[-]Ls]Y@߭6pOn7g_z:w!ܾg 1J3>ꕽx|EB`B(+{NuݑM=;N|tg}g{/Mn]卿o[ᲇ/p+G?+zȑr833?OL}w|x_o?o}y+_J鹣G}[~3ݱ\Y{yvm{ӯ;[n}_~gg\cz+2`4G\Pv9ќܺȻ Nk)[%T\*]ƤAJ"B (8#UuFUa@P\9VeY)3F5xۓ#= f$76 IEcU!Z>TvIMD RBEY.t.U\L6bYn m6=iP"TG##BxOZ}tAsp$ .QgnyWA .yh8pLiY-hY">u,R <WU+̣Pe0~}_{K<83o"R zιN F|-k6Gn<\y~'~ԩSOΗ}Orui@Dxɫ^WrYt6yn Z9E[ֻ7t;,;8p̓u}`*Ͻ ;'x;~~wweeχ=_c1tw/̡{={m.jêx}ܵ8ΜE͝Ű^ܕk#RJɜ ֋]s «-ݖ|~uaCz?h-Yx/ŧ=4ޞ_?*kop?~_Yk`vYxs:K_(ϽOw۷Cv<ݻחۧ{76]\\=duuwo@+%pΛM6!%fHDB@hLPkx%X[hR@% BHnLƨϒ 6g#fxc"pRC)VJt.ƨAY-0$Q,,Ro_WCi" PHPfkS2ePC>RcfH -)K IQI,U4:T5V((J̚J pjH@{ q!AQ#[X)oD"bU5@"1@,2T=14T9s,=6PIf1kT*&F0q4( QFXH9s>]"I@s0s[ĿA GDB4LjH@n!"ALEX)>yO9errVc ȿud|S_KM)c]׏=+uY>,h4a ?TkO]5Z dX#$Apx $>&"D8 a0*ñ]XXe|ttds-,\:qx^x:g Z8!T!ⅳX_5Q3A/F8 Yw|3W.?/]";vz{Guo}ya zn.0EQIŰ\8suff&L)Nh>Z O?kWwo#?痗Ov!Hk4ړ-Ѹ*uvI{*rg<nww馛γNN[^o{>&g;m۲ݜ :Պ?{>H[:_r0V^^vSt[ӗ?xٛn+/\pꭿc?c=Om~<xOћ_9'p% 3h^~_K=z0OV{>7<# % vXY^reҹK!YǙ ADJY槱.e?͇m~?;\spz{[h}'+Wyޞ·K_ҿ}gߜk[ly ^w[rGgŃ7k>-;yĿ>s/G4ͻtמ]y*_`egF@BU& AUalO(@@*5 zԺpT{D~ jK4\nRKS3kؐEn眈2B#fF K*Fh]Djg֊* i"ȞĐM0KffX)?e lV9Ki)6חZ\Gy*40sJh:U!lJ y$ƘZ`SIssDvaQ@pJ:E `%d "BBpvX.Qөx=Hf,6hz"樁EVU3Y(8b5oآS 1h&`"4&g$5 fPk!”ۈ۷5æ-m2d-Qa6QHH)3s.PL;ɛEML$QB ufm91-ࠜ<2T6$& ́K@X!K7-wˬ UTp3 $fd?2PZ$))UNg(1"2i5pD9fgۮ=}7奅YιPC$2ޑD8̑yX\Qs}lI"(#dʊ/ k~>/Y8ߞ[դ//_*6'ts!U(p: V/^^vYU`qPVLģ[|~2tw&l4h2Zs= >c-w=tpg{o<?yۛcKlɳOwݱ9܉mF릛kkn-u<|w߶un:z4?8|~f[ 0@a>AFkZyakoN,?7ݲh{PEcVy,wZ ÎiuLP2\ aKfʬu*ݐ7 fƉz0Q=,f&H,Kc7 -D{$"I`"Tf^$zTcTƨ0sm}EF1f&uk6Vb1fR&XJ , qt]XUFa%RE*c,*QPP4zLZ ?zK|>"( ̕1`b‡vBVA7ԿR_0v㉏s6UzHřD=GB A8fw!,FSظt8y0I1s &auĀlY;vOe1H BPE44, Zgi ){<wdq=`B=W.\Xv~bs.[N'Qs"jo|K|W{gOfy3 ^l1)~&K7ƻcw<22+[xϣZ?޹n;y^ gx8ڼs]X2Ǔ?p; _~#O=_%?uO;{anN>^qzw1lnnb˷/;qmsŸ}+o7l_ W-hqm56݅Sl_d2oݺu|]ᷬ6@`!@4ǜxIj@2NM/P5"> @BY梙.($I Xalu&. 4K]Rm3 #yM6uhf$I:\%&XAE%I\ ?@L0OΘ]3myѷ\?S_{2֝(R0E {oG{| ;'9MSk]-0^o'hXO84"pQqce2he1fo4޳MmnH`0(6k73005`pwNwv/'aoM|̙3 wN|\SZ0ǔړ܌SOq~?Ξ=k~;ЃW\g?[LKs˯ٳ|6x?яޘRzGz*T2eѣo]|7d&uxfoxםO~F|nvO>}ek{sN3gW;v!;?y /nag^x_bwn[v]w_KA>ͦ_>KN]'`?q.evDD A!!Tĵٟ:uΝ}moCц@ w=r.XH,_ZgS]Au楻~o9 _+?c 7#lZӹpSO>GM&9sKOO:un+&s_hٞL?pK.>=pj$T!O(bX, <ן?ٸPŴ~4LUY2W̌XcB@o(~1ΰ֪J$D(6@FhL< DTrxtVqZUUc@ꎞ*UZjKQDDuH Z:vCP>ynԈBJ=L}nEb7 T@  %}]L#Ž%f_I 3T#ұ:+d u/TDؑT`Oaࡹe+7l9/tj~~~b$,bqmWnWUQߨ,R R"%Yjd 䆟>wcp {7HL=N ݽ1ٺ3W7'>ϻs/:=ݤ,U M' jWD߲W_Ϝ{_uJ0O!2(~Mozdggo: =[_ sg^|ԩQtVMfR2"JCD2|& s+Oo`Ž#_Ƌ[ eWWϟ+očO]|y#W!_lh -:ܸ Z0Bh4VܥK}uWA~/j]W4ǎXsܩ.X5X|G[paD8fԍ`cL\kFE  VTR߻jמ4PG6iDԘmg8UH ZEd%Bf2V^IcD_#1M(&lAD04%g YV$(gh:1 iTYeeB~h#>= F( *"ZAKjuHk iz$2` H6p,>@eEAaBD 3KU u}\q3ӢI(ID#P&P$VD}*Fsj$TC P̻1\*s1c=CXѳnj(ZTd-ϲo b ,?>h@9*Cy0qfZcLTGG#B>a=fhb1YT;J|`c\H,%Ԣ0/:L"kNj kGġ:^h2:@ֻ1,ƪkwZc \._>jϯ-hM8&m6(q] RIUurLV RÙW9b'?ىo?mc'Yr0n&~̭"2ua{{ +be JQ`jhא6+208iqʚM~/--omh]x:!&4 s4|#G.vm{ăotŝdS G[x֫( |>i/?S{Xq''<_ϰ}5,KRC)ʺG; g?{+y/VG]iqЯ[Zt0z O~' 'O^bnpKg?G` f2H0GrIn6<7??&սM'poxxŽ?gΜymoo!hsd=@s×n*?L#XÐ޵x !@e0P0Xm`sHDөpo2[رc?;bi{=܅-،AqH=3Z6G#,a^[ߣ@xDzF# QUos!8`DR&|yQA0" 23YǪ(aq "'/Jqk"Q :0+TnFEf|O0{ADF5{h =DXPC TU=u'{,Xi2 R'*Yii AD0Uj>@D",EÌ *}0Q!" "R˂u˫A 1f9*`8#j㉪D1 B*!e "7jED.@fVu* ͱT3\fC FB%HK-jڶD0+9S- A&'Fjtme?LIZkef5$^5'M*Xn[oy>^JD:nlMh|,sZhR0㞊Ш+yힵYy{wZ$_lubFh84{M,;ܩ(w/Ϻ&Vf!:xn+8C#KUКD9a_*LEBo^~kAHϝ;H_W>^XxÙ^U=|)9jFex;=<i>Gj޹-­k[e!`$j;+?#? $|ēMvzp))ZIۛŭ/fw>[UY{c[q\9Ή/F)QRY5ͺ[bqn_~77z Z>\ 9L=klZi7/Cs_/oZ?~+W ɣݓ$s]O:0tN흹?=x…t>_cm67oO@e/&E’͏|޺u{ۮ]pv4ܽ. 2;y#!;:i1/rޭ|ˉo p_y /i#+PNs';cwۭ -^8ӯѿ[_]sV [֪'7:PUG$sC ߐgG X/KOݺxjPm\-{NnO,<ӕ2gBh?__9]e SP !$: CA$ jz-4ىĔA$R3D??檍ZbT U0D *X$QUg@"S7H Q'K "cB(4QY56^YCbOL Y~xH3"v,qhQ$ &( mb#:P>neڿ%fkQ RU؟Rt[w !B` Z*rP\zu:h4GS9&#W`G"BUvp,HL+!H #Jl4PU ,<8|]YoV ˶ Xx/~˛sѯړ'M6okWsjtgn06&.7sXiGSԩI]͔{~;vtܹEn^QΩO: \MF㵵5%PK/Ԝ[~#~_:x,}FʥуG'ARB4ƕ>:ɕ |ݷ| ='c/n],8(R&t04m=|f$k-{{{]L(\B(F 9CCYR*lT@O~?ZV GI5!i ein\c\T8r#3z/w9sLN'S yJo_WKϝԯ~rkkQ}ʺV1N?vzߖlrky{ocL1eYyhܽ~鳿D`JDA,w}䁷u:aa13o&X0yl|1}=svG?Gykμӷ>V&.`k!T pιQ,N̿vpItJ?zwO|W?'[i?OAhr?/NgO=mmD4̹|b֭>pzz7n.F۩BR͟X[ʯ?[7 O8WEAXW?cD0F["Fg~ A⽈ ٦;l!]۵&P62Xk0"4kК DL7^ݽw;t,ι͍=_VZ hU5q`490&6 VV7ˈ޾9㊕mSqfUUϟ?/somm\2_I2Mz97lBwO'/?5q8wUZ=oGC߿t\״^gO<|Cg}{{g0ArOs+5`zxsO FgϞv}w}cc?߼"G7XͶY y淿WrvCD:PDJIkx# ۍzYBx/RT(7@g*7U,u14 :JD֋Gpڙ(JADP`Ƅ#眈(ؔc9cIdDUQl!H^J"" "ZW#G ́ B0"bCu1 ћ a$SOEE, 'O"dS]P# qJDCSc*GDg2Nc0h bJJ>5 c80"zU5U"JoJQ8بF TG"e4}(|Y]WG7E;!) PFDcDD XkEưԵ n<˕\B0vpẔUG@T=NiUB3v3yhO(0kb%2@PYؚ Rk5"KX"ͲM_z&j{}]>3#u? B"Aɤ\9777߁1ڒCínnRNkARZr#'TDk@ ,k$ᕹ|Wyd={/ܼysz补{:nennny..]ظsرMdi[ayuJ,K,w6E.NB \\bJQֿԓ';mdxbS _}Gs^xZ˪ޞNc.Ѯ/,ϯ :{۾ (M~@ziյ{۷_&&Ԍt#̊ƥLu$hsW~?ߺu ͕w\nWSDmt452sc*1&mZ [Kܜ͚IX9icie(h C*"?j[۝(7Rkᤲj@Ƹz5 ZT_13Y3/C鑓&ƘNbTծS!-3l!b1.LRdEؑK݂avҝ3;!<'Irxt?gm~_gޛ_z!`K CK;k`2e }Op|?>}g}/>JtxeY 4@VyO^ڹt꯺K-8܇v-@r8hFЬ^A5Ad\ N Tg14pϰXkɄ\Q7!B$fOD]PiȄv/%Y*E L `9$z1,,H*#TD$u} (HJ*\ U *X-D!\bnDmĚte, "MMN c+MM$2,XkƆk] @d #VhLTQk-Xi+ācM+13"b.ό@B43bH'gUT0(5!j50'&!%U6> VULF1Omݾ1dY!#$$7WpYu9jp0.c 1X%1&4LADڠK`FLDY{@1'TrA 0 :tݬQ!:#+͠Ł;bSNc"'Ś1͈b|6gH,h7]M󛷷8E,q_( ´rLsܚnjр( ,. F 81Yo.@᜻'Ν;7w +Ǐ-?~J6β ӹ.(0O?αcYmy7v߸z2MSvBHKHPp1ڛKhjL&onwe}&#G7_ξ:2=z߉ ?~awUr38q#/9~_xꋧn ^~'>|{ ??xwW7=Xʲ$g2AFN뒶Z-wկ\v(n_Fo {cWO7$fsatVfvSk-YkXG t;L&A,AU`f4iUUYc|ylx|2j]w5O9w5IaʒgpLGH8m؆p4U7{Jq}#ScL2TDD3N$I\ld!iM$I*0BEU0Q2rٕiE\a?~k׎]/~|7Uwnm]}|AsƼ+7?>>5RH(1 `,V$ea8D@BBDJ}d0^B>*eY&AdDUV\P$g "a3pu_]NY-x"RddBTK:A0بxZ UվkaD5 UlSg)1%Va/LtRtET(r:Ƙ:E!fvdDBqTѠ=fd@T Y靉\ K"b"h@@Vxy }θ~ ( [TEDfAL">Gf\T `qS6䜳:d%(")ɼCJZBJE-)@IV#:FXjZC ^B"jDf Ƣ$&geUn7k9S~=Ug_8~x{C]U  i3e$Q*J}өFU\Q?Umn{~mѷu !]p ;,F4Pt Y J7|! #0Hۍ3tWnZyX[9P 7`LDS#"so{׼[/D$ ޼ymcW#_7wY\\<& GS,?Ʌ=zgZ[?bJ:r޷ӯy؝ӟwG[t=ǿ=W5jԩS+/oB}sVByAI%gcR4M.sεs477AUǃuluG{,dYvd1k/ G*NFQi=<{kcccTNLmYCTun[RB"I$!,$Id!d(q>ll55hjl1. !% A3M<)15"hJ;=P1Ol*IӴiMȚ82 $D4OyUUNDěҔtճ63Za3D\!x,ˏ?r??95_o81<լ[AV6/¹_2|_͏ol>o̝'spƏ77|os x/̧;Nz?0(TF3q%6u `ʹ" .g)0C̋FbMȔ,Ddk@h| (`>qT  qS!_Q#fue1\4Ra Ih 3!1Q+T5h%ш[>(f``Dk숈 b" &86ŜS",f?CkbLA Uu&׏nis U@EQ:3-đ6'3hYRwlה "%H4c0\Uw0VH"QDA?=!(!(Bh(Ho4X(QԧpqT!cB)yȒ!|U|OQ}Q‘R"ű50BD'lNtr.DB!QD;Ғ** όb2PwiC0L q4j" ' GRc~>g=̔"oޘꫪbɺ03c>4ׂknl߲E5W^ODD϶vzoo;,,,xex̙Og}wcb:6͞!眀ZkC)"&9IQL ?LFW+[ReYssy3_iZ5tV&{?p=tq+UmIBdiJ}"iy(K1ӼIN@vD ˲3yesM6wq?p:bM08M]D<5ƴ9}9 $8ooâjhlook*$eΙ̪!Hi0@&@:WZTDԞywrl6;轗eYCYaZ0ݑ1"RI$YiJ9}W6X wt:%8w Wֺ[XOo?5- lf'e⧿~?"MݨDDH.*.n^xz-[ p H :#R)*o`sx=oyڭw}W^l/6_>{fo4 SH, #o{~}w{S7.6X4MO>pv:,˒Sߛ $aݝAQUW !pDԪ<"QfI4,B9vo֥K{+˖>N$ `S dR6  g#xgG]͔R}Outk_vCo.O}Si+ẃۖ#w='m^n?\|?ֿ}omxd<@{=t+pֿoz=b hÏϛ@rxw}`,m޾Rί,f6]!XVU\<wؖ]u"(Z!ąSB`Q$J` A ۈwhDie"2D"b9 uKFYcZF͟G>1F $ˬh!E F kdQAb1I<̌1Vc*""UUpYcED,TXX"6 * 1]1h|aO1R"D3e l3O&H4+ 舙 b'8g4{PÂ9@蜋zYb3 J1*"q =;b(+lne:Lՠ2d֨'"frUՙbUBų>D41EĂ E Ȓ2(Q"B^kP58D2Hdju0@\34lD4+`D@uZxZ#" */ S`^ ẵYw`6_AJ".,܃wFwã>߾=~ w'ᥥCpp<;|u1:y(7/u y2edrO1+OZTh) uG% !ȱ.,Y轤4u1`6DJevLoD8bf+1EIYE#kAڨ*"hxRE(Q,7V:)iTS*&>5.q*" 6QjDՒƃ|*{ X @@F+FD5Ī XEA*T{((T%? Ab@k]Ȇ? Fr,q cRP Sd$@ UTU=2m3΢!%GɪUR Qq.109>NCD%<ɘj !2ט=8k|W2CHI(q/I w_(PSEQ>Z1& I6CÌX! (`F!XՈIQ\+]Xh `W_$X7!G0:0:aVD/UK;W]~׏#=}9_*F{"q;1Dѝ^oS]պ׎ #wyTlJ/pEkR H]'ArZ^"}?‰'Lkk+GWZ<A' r ;?ܼ`wl,:vxeyanz! k79hFV۫¡^XAr:̵pUCESWHv[0ub&T=}Ʊ_[ɫ {+o޼G$co:qW'7m=poSkv}๓7[Zx{y~),H/}gϜTeDX(vnFE@$`=RD7G`gR!"Ҫ\Ғ/Mh4z:&AP052[ߴ *eye]BȇQҘN*\*IIIb=.B#o4tcT5 ɔDIΚX+kL0M; !4d!2YUC6q]RQ޻]B\hւl.\sy݄Sr|ʏ-5l Wjxg- v,XN(pE6r~d m΅[I_HGnwٹx<}Q>9 v eEQLiQ\gocne9~=޴<@.>u򅗯\𕧾؟ ރZ"f` `"'Fbmęb!lȣD{0HqpZYOGO\3bQ<;$P*Q5Wғ}DQUB,0,^A bAR)ĴF +{H"1EA 2ݫ">Ys ZUHF!!6@TUK Ǝh1[.""MZ61pʪZOAi0" DDUEEHQKi$ *,@#5H`t&u66^EhZ^cRj HW *)"?g$/d"j"OgLPB 1s(D\gbUMT~BDC,kEc "2qcd ޗӲȲ |RYVAjZl-H@;gDfXM \_@-S$zXbW$J63\DpRU^/..޸z%sʑ&Mwt :%XRr $Yc 5&I)gOh?yh4j.,333xPU%yyjo4du\xt8>;y>ñ~o8v[s.o,/Z=fsdo2M0\Y^\h6]apVWqb$ Z\ۼ6Ͳr$@Dg6;Y7{_z_>z4?<S_yc{t:MdYjJ9⼈TpTg~~r~W=vlVh4*ƃ^7n{ *3 B&iꄧƘ0),Sժں4iVw;|ŧY մ0FY=fyO1{^t~>]kv#Nr}CH&$/:83-EKL8%̡WJojZ9!McABU&oI<;Nsi.>|wwЭsV#RF5l}fkyi;dѶ +m^'IFn `Bۆ) -DBnp8ISіE$)5 $r1SPj[PW(IV¼ P-Rg*D`" X{Su+@ch9ngƸ!%*J{1_ jEVF;hZ+1~ s]q\Pef)D hQ3P4D-ϖ2OHD!D bYUjQq.  DAThPg qNSkmbR km!aX8WD1S} Y.!Te0L PwG[O}`#:+RۃA0v*Oc|,NIK#QLe!!1e4@EU`!Mhp̙ut'bkb4+C: C]1-dFE$lL^ Oʩ >7 +oz7 2( U L3,BV&F2K VR-Wo?~cao mKUUUEc:LB!b:>;P얟Os}Zݱ ­N9?7ʬH 74w.ښ2wL@M90/5 _\iߩ7ٹBb C:\X,z;9y4CjaĖU1M *ZcYېp8.tmV8 +veZ&IP9m6̄'W_hZ圣jՕ Tb,~pއ! Ո3#8{AD=L1}"eRສE4\[{_e@gVZMIhgB CDckq=sP%@]5k>*A$E16!ĢuFYK3BjcUQ v +ݤ6˼ҋㅹ>"@֠T4s+7'z,]z8v,` `{w9FPxd_^Yݝln\oΨotΞu&I;N6nJFnsU]remyܾŔ;6KѸlp} ox÷|͛7 BQqZa+Gh4<$lp> j'ϋ,K.6S"XN^ewvz/|_]~h`av31zKιvcn0 ̦I`qksێM5REh+ ";;~$IWc͠'V=. ]j;q2ˆ0A0[+(˩L~veY|UUc:V+k{m#oPYYc{1]ón9Usss \_]]] iQ;{^7^oB̝9YZ9XxȲN' U;?^)v҇>%0&:P@66J:oGhfCAdA!vԉёZ Iq;{kz2H %!u10`H42#}l!5`Ƞ*a:k3Y`.mpeM$BĎc7Q\| DB$DQ vʑ$B5 j " uǮsaV qEYP803BLeP uĄ LQ P3P73"Ī!6E B`qeo0kU|Acu c@ kkl Ue7(OR#1\ J!h 2M@ !F٘a/bh Hh8 3u \=~EdPkI%+Qu "ZWUD!YԼ"Rff@i i)BJ4&@>3f`DPH}buE8:U&}bhmPsPDDCh?~'L!ϛ̺5ѨJ&44ҤlЕjɭ_~{? LR? &e㲘3C$ Z%P*fs Os?,..>#/rsqj_^lfYcTYܺe*=Yם_8ʟ{?y컾{iJxT?-; Pz[W]\?=.' 53inowr5>{ʱοzC=|batcƛǾwVr+^v |oMЈv_җYwcaa^x<I6Y{Y,h5'N^y/nrb| 2]TIԋsQXH 8$L%S..HtIZNalĦʩ٪BE!eVy:JH&tXPGSXZsehؼc$ 8ax77T}YFGXQooTTF#!rMǃfBn[IYRp̍F&3Nh,7 Jt4IbE0–oJd)qZ@i`ՙ$sڲZ( 5;fpd DT`6NјZ7iXȏ.Rkvdz'N^i^3Ƹ=N}1O|9{fԗe)~_F2zIBu$@`KP#:KCV5fnk|U`>XE"`JKLG",(lxhsxBBLRA)FCUgh=/6- PI5,DUbc]ᡂ1G(#">6. h::(aT!xM8XK!ħ@E U(Kj&I-:̊""C?} iZ{LT ĈآcRطg}¹; 5* a'%`M|q1-3_L'rѢ23d`-dQByceem`ԗ΋lF6Rx=wtt7aCXqU1ápAoiꙙ96 i|w1f*Ny*?pisnyk7nmQw^O%=u .|i7;kZhI1nE+ 4j/.>T-MDM .7'[IVDWcYUb3k!wY,kiyYsi;i61FhIƘ0Sj\6]Yyo&Ivuv1ﱔ&I\;ό`0%kT nn6M`*21&(c9aJ|RUU"b!4 ]Z#y׳|+(C`44(բ%QhT4PQT멵6r;3ڢ(v|%2kWU DM(4sTR4VwҲiB5l4'4Q_ڝhڶU܈<ϻo+o'Vlk% )Z99R3c` @uU&Z/ ڛf^$ZUqQU+PEH7z@4PQ"Y*Js6b8EGKuF Y1:GAR(Y++("llPR^>P%&eQ@H>34K"R0AjGb"ħIQPn-AY:*}8FBM Qu+ cH0H$&7#^T@QDŒ6ڧpGĩ_fQbEP?KA58}F1ߺ4FcʵrHD"50rYT\L#e@9a` UF0BeY 'SqA|E/I!DWrUmI*LGc@ (!1 uP:W5R$5wZ/u,L 1y',xH-$k S[Lwn]KSl.ajSiwq٤c^;:I 2tm2GdD,=&4]9Ge5t+0UU N[:uvkv:s+KK,'affOd:R#H9p;k/?dևW>&]{W^p{c~42@(TYX=i #"& ŕ;W?{_jf.( x7ϭR<3˗_~Y4S&䙃^Gx277]yןZe3;ǍƘQYYCf,R8Z@ r[ m n0 hXC?4&$Eqk`*kȌ̌!cpo{;eEfo{o}~\BHE]qb0k,+Dι(uT:O]~n'n{y4 ʲ̔ I-*Id1+*MÎs.&!q$ݟxz_yko)@>̊Ѩ dMQmTDU<[~̏PI綏:  *1Dď Uu]19,"Zuj31Ǫ gOZ+AMJ3/rcɢ &$c &]*UVa@0AY&E/lYUetXy@bʲ\LZ"RfZjFa{~ tn@7?[:$\D{S4G ʀ@6e}ܛ>ಮc]>" jP hؾΨ6F\eQ"< XU T<g;!EEqDI"bhyU)n6!ŏ ?*BHӔ Tā/C:TUA2k'7Rh[v 4ދ qߡ7A 'Q"T$UThLʱ.#u YYޥ<ϳ~NlUm[ݭ&s_,1gb*OΎg3 SQD< 6%dhXge]l<|z}|cb&xO|\?cf0m+ ,6[7oZ=y[$_ ã67 ~NNN ;5K:h'YkPf)x*~΍;,<ۗrlr4~0#H<<#zjg*[v0 ֦i˝ϯSSnOLVɠuIa0cgik5Kʔtr0uL#+.ƹja3Y=!˟}𥟘L&Ӫ0Ɯ[VZbէX,fg)T&̜'HDG|>v:]&"9Jiys9"zzyywO&pggّ2ӄQd25&|?;FXu1 tX$:II D$$I<"vZKT[Y kdT!ӤJ3i]IeyaJDA NSPB8;j[Z\Y{E:MPiy>!4~RZE@ݶW_u$I+xFDZn-fD'w1)0  $1)81F(iD3),7.5BIIٿb22Fx7,K.`YPt\p?4cD眡Xj @oX9UW9YDFTja@}sxFW|DDœ/ 4EOB5J\A@eD!J` d4m*D?E͗Uk0hX:T9F'cc#e47 JR[dg65+@Aˈxm,͏BwMX  qfBDRhv.qQAfm)LCT}!,Y V"zf?Gd%G"!uRզ7.-@,q ѳ"j8 !&t^YP6x6G4E!f莺_6C&J: &c7A}7O9W5穪Ba5$;BSP}LYVG;"*Ft@tbC9D . PD@!YkϟV*;?d,.l/\L' ;(q.d:x'ϧ4رu]s>^Ȩ@Pl[4ѓj:/^ڠ?UmAN+7͓T}hRȬo;OXn2>3>VEn4~Mة\]Te=iQ$"SBɮ޸}wyT|>oY]/ \-Z@  @bmUlu~Ա7'1yG2hO}Z{l|wW4}>{?ywd*"̶C]5?μ0]t:t666p+p冈9罊Hn<>rev(WI.r<;888Y+ +"'8˲9"b=O/x>/DL<u]O!o]F@YW֫UBInSt'^$1"?{>>~nl#M(sI@:=M 5GDsζj9_ZkԇAyFĴrBE`*xβɂ¸njFv*:!9A,xwi B0dJeH!Afy¦z/͕t;WWWWW/nwY`vfur7ƛ׽b =eRoMU_\rI6Ql|<+yA6{.QI_KӨ*2y!v B->5D'" N{ϊK3'%5-2nMDI! DDQ B4GckQ49 Ѻ!hbLX, *#RNDD(‹jcӦEİ/,B@ㅍ?4Z&"X;#L#0;c A|tl)(dX`<w=uX= s+n;ޓ'OP/W"z|q)´E$ aȱ|мwrr|o'GJd&!b2NrbDOLJәQ[iu׮jԊn9k厽eY+Yqp CMXJ8Eb:-UusI䔥ij҂:G&p'`֦4y(ZD`Rδ:m,w\V$})ufȲl0OC"=#!,wu])˲`<}]_{'U@Y@"5@1ͬ k$iH<4 ]0.\%2QDD,QL,6 -L(^p8"ߣW*٢j{5?,:0>#C CD$>0!Bl=ͩ!#">]_8WA E4@ ܰ:F M0P= Ȉo@D r>P "BM%J$BjJ4Q8*D"^P8jn>p"!."<b5p8-Ӕե#Q1o"@ɒQ!H@O }DDc/bbIйf*1<+S!\r"L^dt9h#GOFK`È A"\PI1;/$4) ! i4T5Mz7d0⊈ WOd:8ʪZb眗 Qo>H7BFk ixPRf@&$("QITSBip$E*df;?prtDD3YL+֊OfU͔c WsXbI\uO9A 0(_x%D(2b4Ye~+`36q_ɃvV/d6ӧCkc[`:>ܿGvܹ˫_RUllPIjM sۛ`SM*; m jaP Jss^]x>+Zt:M6Sz6mm|%d2hE 67ώEQ잝EQg;.'ɠz?kQU[ L&fx6YW/:.oV:X9=x~m#)'~c?v:`sN +Ps@ͦ? W`?>-Q6:jaNy:>{ko)#S *,^Ģ(4bpU g b `\H JIl_'g q"DI$j>йUTzXD,D G Wd2 L{ƏLDKbld@h z_;b6", 6&2lUܬoWKD+ q ߜB=1dD!""@Ths@$$2rvxw_~۟g_~c_hӺZNaυ b@DE}m4^Z%Lfh6.CeKtj`'j;  j&Iuw.R XosM˙{ .m3sB,H>Hʠ'ɴ랻x9HH3BZk_o|U?77nj*VN._J\xoq6L'EY|*sS "qeY'kaQLNzĀ" ꆱd*)HSnˊfCx*eJ" (gf&",~% ?]"h)ⷠK[k0ı 6HEjkエ;]m%2r4ЀoD%"T!Dw *DhX~oh0ZC`kY ÄRjX`>={||<߳~g^zh5;Lfkwo<7H pD.  !Z uYK_gha`{JaiJd!D,j@f제cZmUQK`z鳇1坛xui=A!i5>Mb<3{$yOda1Ϝs/}ͼN9KtZ-itr $̤pΝ;~ht~/~ǯ~+O pUy`Ώ/i]g|8N,%w2(\0]tj>]p΁Nv{m͞;lɥތٯvj۩]]Z8>臨t^P֨<™RB g"qev[O~;@]eݖ4cflY[m"J)ޣ* ȉ8q*1:ՈPk1JZXUsnHjlA$kj@"Dl09;w su]e*51;gnw NgfI^bUUU=Ymw۰ީotFJB@u]/ tRHU1)ǚr~njjosso??qɥ76?w/VBuTTOڢ,)"c!RrU  /:dDp'IlHd&~!J^CtgE=y^ˠ F+o!Df@T"232] D8:)z?z7zY6ьʋ*jdRU⣼ZU"X)%oA$orZXV.b{"4Qf&4&=06c]DwX.-- t@^ 9 ͅZwmEH`q2ܼ$" *Ą@KI4nVT%Q݈0v8Q(2E F::8--D{i% /KM< lJTQD @/!aE@DA" U]N3F&&uW! Y~~>$q9H- AU)[HZcEbx76.H(`""A "!N,@0g?>{cֹ>~3~旾ggu"T5<}4eu]#bUJe"e Mn;':H c A @]'`0DLbo mټOgŃ1iZxŝ5湍|x"i[A$i<|[mU-e(U{JV[7}- A9w<W{w~鬟+<<~tQ}feC]Q$9D2D\][HJA;wv];`@I8yC?|lL>v=kjtbsdgfw'~rŤ8ѣ#Qحʕ |uDOfAIHI_]o,|(?99a'eYVVza꺞… /W={fu_n=A˺8|HtALuV.lّ ,Nơ @e1پmeu%hOOO&㣡TQ"2t9`!%}5JB(xfny5uj/h!ZI~Z{j kh:W~0/`:Lz^Iȳ[Z+ul^f5N?$IppfV-*IݶHRV2PKeY{G&9$l{/C@5(&  VyD2(@E-Yc]]P l h-) bJŻ hzb} UDt)eu" lT12|tl r!Po2( c*"D$7`ɇ`"M~O""jg";VୖNj( i/~?V[IʱMPDr( ͡/nM ! )H>/>޲̏QFK=9EL3@RyZH&h%K> ˴3{qMɟj\Z:Z"Q@/JDy^SAPI5kǦe_!s.~>#TJh DN$6ƠqjHQ`4 5 ֪DU$8B!Dԁc 0`G7qGţ1+ODQ3)Y 1}=_T?|zzzjXYY9?Hd~Ƶ۷Ο[ ,LGӹ=9:]ɀJҤP!,0$+bqjl "@~>{@ Xupbq9-n|v @|ӯ!m\3Vv_^UP9l: 8(Ԍ+_/zQօ]jsKW޹N0]QVڽ,~[~~Oϝ{֍{_vqssɓ>1ƬebILP/ MAKDt:;_ jPU]<;[ͫI=9{?1+WXœݳV^W$_׾b*jl4U6gu`йz=Y^$ӳiufb:;$"jv:83CuT/I>}]X|y;{w|q~WN.32sHTuhpkOD'Ut MYTtK7ʳj`589z¦ LD,մ@(, UE"pYb"ɹ:YҺ7twwqCLKWU:\$OzݳnvO^>{W+677d/mw/گ?a߿xxvsHaށyP! 5HY 68۞U; K|m>)d6O/^ܸѣ9MqnBhwݸs{ZX[[q}I=wEh2Of$EM eUq֋ Ch/t /5A#+4n&w ";T @AjBJC*#Er7QMʌ!0r-Yz44D}2w % axnkhy70Όd{52+ݵ[釭,Mtm_Xiа1VߴmMWvۛ6% |ʰ~ 8 Ҿkꝝ wħ'>O?|$I׻=b ؄$ImcUM0BȠoOo{ǣl8sg(^3nރGַ_H|7wy gߨ,I{n\ޙfyÕs뙺GߝksNrҲ4;f{DO,ˍKi6lϦ2rQC$(leG L2xK~9D#.QS8f@2chp^Q禧Im L !"êK/}`fi/6DqF`4[ RrTE"7Zf@0׵x\$ylh"ӓma"]qV|jfH103bƴ"4kDDycfa ! "5,JJЫhcP 9 %"P\RJ  "jhd&5ezBAA?:!Árzkq " Y߸nET1gf@B'{C| M1VEjơx*ƘXWlATXPiiQ 1qpD JBPuRC>FD } 7?hȀwwiƆfJxȅ/^0JE.2! #H`fW7H0$L]Jl6KZk+Nkp$"$&MOθE)>Iٻni9NïF#NZB^:NG6 GXEݚeI4h4ƕZUp gOy6=yV:xuckpFN;˨J>}ttxwiY }kkkm@Ma.ڍ O>5[=IDATƔ55۫&gl2 ۛ5Ԕ8,eӲ{_"x?$"hԳAC)1PיTU %N,V AN,MpdgsY9wnXUqZrbA4u͊ ژ1z:(ݴsay$+ׯ_8޷wm~z;skYo:yqt…s}S^CSeIp}h+A-$Gk ^q HAB"{RXB?8U1dbb !8#A3ƀ 1ApƘ:Bs8@86uQe9;)k ,#!.c A@ P"2 4y@e&ŀX}BCPNPU)b(*"ů@'N%qDk HD 1Fv*6/{uXLQODDT%a&5Ԭ3"1MEPEc@tRh@3>SΩx QE %B^ ڰBU "r#` ,K"Eix!ճ61Lcȓ#C\U5L"8081x4*Rt1'VU2()JdqJwNӣyis;ݼAj/qN {o@$4!9+/qK97̧O^U6U&@67I'gƚJKWߨmjzqS6N'3UJ@MҳјLrU;>B0x+۷GgfXϞ={v߿׿z7yޝ?YJ[yʉ $`n[9W?=;޿t~f\E݇_\ڭCn]Z<7nwnݹv0PKVU]UZYQ';7E:1X߼y Í-6uh|6}~-hOjmٝ?yr3or:Xk$P+ UUm,)t:?:X|뭵 0C0 d\]_t`sw? 5ӵ|C":Y:'DU9IƘ୵R7m8SiQ'9!1 cf֕Aj ;BRZu]˨^,gLvu6~r3NrTIy,#y4AD$h1R9B|Z,dSrkoуGgxŃt:=YΕkۼx6ós7Voٟf ށfIC`FA||j`I*6ilJS*.7XZfE2IqEG. DB@7&b5v?P]Qv.21ĚҘl PHD8kmFA=1#"z1Ү ՋC3T$D_!+"MK\ :¼%6cB!EؔxUyWm'?TK"bU%ކCA=jd{(0Pl0V:Zf D W!A@1CD.xbRQ=[UMc#b"zD.8*ͱ+QDfFò@ov"!3Dqd T80qd 7 Q_UiyU ABeD)D֕6i6'fI*|(Ue"zdc<6qC *Dr1F (2R ,˲8Fӓ|>Gt[^Yf *J/kq랈%dZ"99:ٽh:s^xi2M59===[総_ o'_ `2?qr`2{(cܻ+ڧOq:6>\|edY{ox2ݹs?|z4=Ji> ǩw?|sⳇ~88==d{__~gZ؏Xkk{ 9,c0 Z&PO+N-` p_׿C!ZiY;R]s~txԿٺq; 'qVǔȏW0O>5xZKWazA`Ы]9qEQТ7~>e YU Cít;Ye s"!x t:){ow)$8^M^Ml'?JKfͯLvx֪M|[|@ E=,3k9Z%xHXUQYSc62p-`5!T@y1.'I2Vc6zkpsb[UGX?~{˭K&5PVsɓO{K}/kW)W0%,0ĻY.c,46K;;!^5tshS ̡ BQWf` Kd6G(Å;<M_(7jBMXh%!"E܀8n.0 iNe@qJhҒ/M,GCpQ*̋0R@Ā  UecBh!ƥ鎉E 1 ЄDc0(2RBD L**]xF@DPxQ$/X !2c<2yixh""l(dT1#BY$03DK"֥cfC{TeALeqوp*Q"`Be(&ML#$Y [W0~5Dђ% Mݏ~||BDD"#z" !XkAB0F j3~4cVg82 )}dw~'>Oׯ_o?}?Vt:~[{C?><7jzY_|SGrn|e央?[*|vax'k+{Σw_k}GZ8\@kJ!B㯔s$;l0a'eEԥdezвVwΝگ~+OSeu'}\ڍ7 NsCDIL?~[ '>T 0Dhsh@ t7k|?v<: )B0.Q;XFWi&Wak)A<"0DħzL4EF[4!E%FM f0Цw ـH&A&bE%&wh<ߢFo'" ĩMEĨg@dďB"#{a4z|P"$U8oVf8@Fc#I,QX!E 8ÉRlFZI|<.瓺β,W%`ivSG{cXT''OpϬ_N86 ϭȿ>߾iYݻwot|t/%=f6xt{ [O|cǾ?=tupu=,$I>շ^;_>͛7O{W/]z}2={ӿ4nvxxd+jK'[]XXB9PGeH?<ﴺx2'YkvWVxnM lo?"{[x墿BY*^!6Tt8=>-NVVVx ;&ɬgn\Ul2WlZ|h2gs.WK۵1fijoVJnH8֙3}=4m]K͙ohri#_ﲇw=&D kՍO<\Vڋ`ג*K>&. ڢP k5 Dv&"ΰ5a&Ii;Y*$V~VZe& cf !qxd$ |rqCs|{jt}kkkU[85-$&"TMP.Ui0KVWïgz 7Nja{;7?{?*E ǀRؙ `˙m⳺y@h4YD]K@$XE?1$DBfRUU Rˡ$eĬ`# FUM !H&4Dxafbo fB=$\1_*"P1FMS{G8N)hd87e6 ⊧JIUh샏 ^.0#waDdX KƆ5HĊ\ 8̲8 ! !0(>R.ndB(⑚(1iƘ'^²6%@AIE0!!`@ƾ0Vs>8g2 `{A!{Dj}G2 ĺ>x$ ^'@AEUDs>~vy\!2 "{h_ AAD |BbR i.zx:fRմﭯzVeM%,q^?Hc"AT>$IJ-LJ&az7~~[i5:*Te _c.Ln1aBXLKb! .h I{ !5J/xdUx%4ū~tAP @҈~`bA,OmP!X6 A$4&D0v APUHLPWB*1 ǟ Q\ę v.8f&(l> @9J"J? $fUbNqe^괓$Ct!8z g%X 1(%˃S4*ӣlptauZy'nfJSkb;:yzp<>~?g#gLVs==Tl@` iq'ί^>g2ln[j6tx:v>ڛfbvg6Nf.tN_6*̣S-\p1YwA?}ɗia Wms>v^ukw ӧW^WaoƭXH9B;k>t:W0;E)/e~}bKe++MG)wVŘk[O4\9O%ϞUE'krd6?t_~xXZtJu0T[V3r4.3X6m>5 M81C&罃|>?[>>;lu+S Cj(D\x:9uxNj$Ӭ5UJ/n/?qxZtv/><Q$]B83#""Y I@TD҃'TmEJqC^ 154VΉSP;b6]49%h s$b" ZXZn Ƅpt  QB!zhD)G6hPEr^aeVU ŰiKV ;.*϶ޫ#DB~Tk(DԶv4ZEf 2 FZ{v~xx!JBr4qXBP˯L11Q!FӓǓY!Mzyu4MiW 8F8UaVƸY*CJ/|gh0]J^(Edj"h4GuQ媪Oz_OoJo\LU}݀$y ̦j_;wwG{u]oo!f61$QjZGdV5ӪƲ\ ;.BDsA3iw 9nd ZLլnՎ[יu$IVuN u$[yݻ|e-h4:4ϻwv1;7a}ݻ،F8;wÿwRXX,9)@j4.AC@P\PċG  { 8$#mUJq0@L[0ܰx5hG\:+nPD&nd*AL<)&2{h]DDjxnQA Jß&f{O/4 D' '8vqPdQRA9& G ^5-cPC j 2"O$W6NQ'e !xɏ!alԅ+(27] ^1"C!QV%|,^#w1 HLM7| %JrA14xE5s1ETY"9cOCL"HfYض*)4QhBk!" @CQ+x/\t:V+*oHC!eqSBDI EAx:ֳt:-+ֶZvkgYZափ6ߗ%PU xo -](ʈxrrŭ]iUutU79wnk]Beuu%5~[ڲHc8xa[Z{o4nʍO7ŵ )ച2U勢9AC#+Y+ܴFG(,>v/ߵ$~ンkXxBo/^VAU=". I9-$0q_]F^c$o^]KJk_r!o{@'E^_F[_گTN&']檪,*yHY7++XkZ$KON5c0Î)~/_yojgv#[_[|u{j]n'o>};5npa_ +3O9gK=2TK[acKnܸvpX[Ν_ן+"RM lp2.>}d4ƅ'"(T2 xٴ%"GIiMM\?+$uj4 *,.lt<~vᓽ=K%3l2Ijkmk"HD3k-u܆ IRB`L^`ÚJ~tVe kYZyªBMB,3uk튙qʯ1&ې4O/;;[,ZGg:^׺v.YY\ s`7Gׯ]Vu6|d>3_"x SsP* E`.*&>vrV PeShqBDc{4Yj6bU5ރ cXjHJڀH8jHq]jim 'D(W Q؇)  "N؛A5n<ʯd Q R bEj,ƊH]@Ƌی "xb%bFH *`%W"@K b,]BEnD &5%.k{itE(""VYAEcJL!TJLiT"?QQ$.03MPE&/м0m6B"2%,UX[;Y_O~}ӟ^|)AUQٳƉ$Ȍ(iQuQ.$dYֲ)Hs:Ե5j"?(3y-tܰRFeN -z r6/kjok`IڻBR~(TΎt0Uǣuccc~LLlY؛y?o_w[ox˟x30}n+=Jzo}st}uU g )WӺ[tյ5f>&6V۫N{}K[J9={oMe|pX<&m"eh4.sPVś/|`!7"G8MofckkkN>nn rEQsw|(/wݓ'O&I1u-$R]u*1pu$GD5*(:7FK lNg ?o+i^_^ko'o;IBΟ?ݣbX>H%Wu9|QxS" Jr .IJ] w1ijyT1sDľɴt!muږŎi<;y1:ޙhp Ջ`BKLUkt\CX,'j Z yܙsnn<۽bH+mpOwN'GY?vt>W#h 30<05A  A(ۨa 2 k 10amlh2Ā#a1J;Zi^>Ĭ {`D033x v ֫ ABKc Q(3g@8 !Ѻ"WLJ$@FO&8 BsKD "hҢA$^j 棣T5 Ao6uq"r/P@58簙b ~ {ȄAć c@ ;&1|p}@DAU66)BK|N@U !9 C^33RBI 3k8ʅ!C"j NU! qM9"(4 )ULDca@hh7h &А&fwhx.76[H>B֤>fRk^eIADh:Ff椳t$ɓC (vq $0"m<TjLQ! "xAĹ$5ǸRbaj}9wrxtW+埽Wzy`ɲts]bK-* A%h(RӨQ PҘff4m6f3mmZMM!E$Hb!WVeV|[7|87i/EċqvOqz|eYOl1yY:8j'p{ST&EmMya]OGQ5-kZyzwo8/^87y_x`˘w_pxx(ik.귾O}ʎ< <7iHJ9PIb|隺lk)hMFL5u7޼ _7z-/lyߜrw?Mǿ|z՚N"RBSկ}kvzEAtzYpFws?{iT·N0 e3T^=wZ3޺r,4uQ,˩3=`||`j;|SXYYYʻn@U Clyyv=^~ xmL(Iru5s-UY63%V֖e%/_/_ҙ7n ,igk'2j eyX #GƐ!!TDcP@ P$!yJJQU'̾vd^:<Ͳ%rMj UUu.p8ij%msV[,E-Lx<}2 !PH[4x:IS.''NxIJ;lZܺ}@D[kOnUj{gν?0CmURp*d3"MqgAB@K0QA@F m)׀""BUm͘&SM|@h}\l ̋?MFP&r,24`HmqX(&h90Dn RB lD`1"QX!h 3R-eDEU`PD p63f* D 5df1"1RB6l j̫ɴdVd `E A Pj[h(&L"/͊"ĬDJ@ʌv (!amB(h8odVU4 !t|b" >@B Y9uT8a4B:RӛaWԨ*QK+bLIL5ND1F^BĀD>z!KDbH ک{__?O~r$IN?IC% $I :s˲4NӁ[z^J1֑hUU 11βw= (2E$1 5_HR5X&AH fVBA$=Uuʭ۷o/=x.WlvJ[|dy;*>! AU[\ytsG>c 뫧{^ryyYliiPq卙k>!z8S=ZA+U/_;5l}p1W9o}c[-:zwpew/+ 1J5$@PҖ{֟|_ Sk0G;/}so>]^YzTuku51WoN[??:I^k.nl$7 Olv]GL(Zq'R\H6F:[[[ynooOSC#o\=xy6}4ɿ-8gR6N\{[\g.IǑ:x ƘeMdA4H@V"ByZZlspfiw,3ۜ[V/3Ƙ&(I7juuk4u3?yiyux۵i2wsU [/w,º[SĚZTL>|lb/^ZZxw !Mq $XdQA.,. Ʃ.^bma[)(!0N1ƨT8d$"b j@ƱhVX("2zk-aX/ :q5-%Q*< '5Hb-"5EU\QF ދ1*jP%n‚РDcaC?#X5qwC8ب1F2\jxh.A @E{/1>Lm&Q41彊 ExicOib1&P:U5 ua!* Z2(!s""m"w%"xQyF,\6 UXI% @*Ҡk5 .F3-Ǩ9_Uօ?Pk-QfE:yV0ޫEQ0/2L }QV"^wym'IblbQR+ !5!RoiP{8hZQ- 8 kTgoQQ}gfdt+Vbp8ԅ,-O]sҍWGӕf,Ttx8T WO{FҤ}7U9w~~ܹgʲ\& JYSIj;(r1<~Gꜜ ˭\vt\8J0͎}5 ŭ`}8˲3G?qw.+䢿r{7zt&?5cЪA־s<]Z9.Zo~oGOٓoݽ)މӭ#;%t];I >??n6ͼ)?ۿk7&zKm}0,m$5ܙ߭_ҏ K>/~ ѨOS8'[g3t{ϧxN͉^QGpu\L&3C<7?.ԩq3˓y~ԨzK[O---f.}W}Ot<p @1sCF:E1h@ ` }E0Q`Hb0죤ԃ H!4bRȱF*7h`%%0Z ۸A$"¢ʀiG?/ycKB0q^1p1\`"B1(SRcMJ&56͡#,9YTf(f֊"Y\>QI5^C%EX8QK B!Ub =3K (pSn;*Cb#';m5TcbS(_WaNcUb "5v/ĈDTg@QtD q"bRyDdC@`S]tE#M" B@ƆIBbDxajJaU0TQکDAD*yb`l*lƕ^?Q[V߳6R@%l{UY5quV+u6bXV֚>@U%pcv~k2Z9k3YoN/_|,8[peT 9,IV1?c?n;u7nv޺1fYF4<5j ] q&ITu iڨi99p^}Uµ=ύ1-k}%%(KUq'ax'ctZtgf,:(ާij.H0. NԩAj\AK m$B;ILk-2rnr1GL@RNDGz{{tqnNO'gKK"Rܬwww[\Tpsζm%.512[r3Y;ZJ'_-#>P` E 6)qH $ 0I( HE2%W$*uHEP&BD4S5w?X %%c$I0dCQ5u:fagFhJqAAY Q4@21TyA k`$d QUk\l5!$r^;taERfK$"HEZ+YB@c|SADc1&Z2,MC$,`x9ŁQY"ȘF"̠F%1mAY f"5A"פFrY>jYC*"D.DnC@)UI]@ͰbB@,U0¢1S {`JXi h8J^\4p*面ZR"ˬDhHID`숍ŐUT~-TM#d00"!qGd ,=f;%J#eyk4Lo+ɤ8UikiZIՊV^j1VdQhg2PԀZk# Ykc1"ܾwiN4.IB1%؈HʥK(7Y .[vV_4̲ ..mr]k'w0YtZTnTq=v+ZP5.Hr @Y(bNyfWo0l&o֯]vfxٹFPd[׶n?PozW>sgس<׊ѥ!Ib;JF_9s[g?pp~]W/ַvމ3gvEIokwogYFO?uݻ6y;W:bϿ+Cc"\ffRa:讠` wn}g_lW]{먜HWR`ur{z`ww\}p(H2BmæqnYWUUH&nZkDDlUU]ZE0-<"P t{kRJeY&|~&3vãÃTg y1fcĖRX$iҬoGgccǰWܽ\Z]9I-[IsDL,*dИVc& 8i[Uu&Wdm!(5l dBZ3C@4BaxQ!3yUIU L9m^ZUNgz?\*Y2yכGWƾJYӵr+kP?k͎jfʖ5M=ԭ2 C `pđGt%(@dgk3(Y*Q] @58&60/ /gA5q <$b TjKZR͒8VU4hp!^P$e*l"Ơ %h^PxJy 1!'"T1ʁѠI= h:\UD`˚ؘABPmA&.860$ PcLӽHk9@A "`4+33fE\n"k%R+ ps "("030Z   @`Ğ4wIlUFJ,MTLc0iwI1Ru.yAIU#$"ǣJ&VEm!2R k8B0#6 @ǪdDAX XU**1'z^T"Q.z] =UK|;|ͫ+kY+7ƴNĹ,IͿ_tٟ͟}WtVe6KZ"͌ 7*Éޛ.in;=?7qJ~Olx߿Ro|/<ٕMxҷs|^I[ݥwUƓp*UpcV*(O'׈6`?w<דhkta~p8a#bF)y$KWt8r9&r8ёx|eY{dsssӧg][kO쫿ٯs6mZ$u]'AK &\],xGƘ_~7n >>S~$=׿9YYYY][μvtުnǒv>h\|H %kI1뷟?F´/ƥxϵPvsjnOR;?v:_/~l>K "[jGRg!VBt"4;@ (>d-LӺ-r`&Nv.`0g8˗/GΨHUTsȴ:yND&:={gz5*Awa|ce"'9+`-R'I UxS)"VPSJ&.5 کE@#&~]Y&SUUWu !{B$b`n)gМIq@cp|P7j̬۝uE937f;1Xs/f&^P5xISaͮ˼tpָFvl[P@p¢:{A'm<.Ra#~ sD0 *Lbh+$nl$=ScHD? Ek "Ժ{NPh2eUA%5rDD )#AGvPޛ(܋!XJޙFRϨ B ͎=ec:H#ЄE+b#49&HD T(a$I ZS*̩sp`2DHl \* 14!efGVTs -UA ƣD3YQCu@g1%." P\ˁ!jwT6(W(Y(H"!!uMs@;E] ̪6)21&Vi͇RE$@df.3'^EE:V &#KW8k%Qcl92L }%& *3D]fY\v)t!,Fq[cC(%tn{^Z!L&ynL3k[CKS.b=|o˅h1N%k*6j5t<WQ׻{tsy,Dd4fЀeYZk] M'| ÷xf{tgtԿz}=|V-b I6ώGqjYM (-~( ?~)eY`fY񘧇OgN&m8/Ynifvww3vwvpp`])>䓓k׉x:uΝ=czFNT{<VXO.Hn&;IRaD`R3诬ZϪ8t~7.l;Oj_?7n6*Zij\/|s2ScL9-"*hG}C`'VUp(" Sėe9Ѷt3I,Dj89>Ӥ.ZfSƘZt9+˲p JS[?q=ظ19z7^8{7Gcfg7ʛǵ( %Iҭr)dLDyYBj+[afi&2eu,MS& )TraEuIHA(9a\[gq)~b kcsJ:t:vO./u7A]$)--H: ;tj2oK {XUU:z;- '($lJ]Ѕ{Ǚ53R)#TϣRu3 ]5HA@hM`s<ǭxl7(ٞAkAU}P]DDR2Dz-Q%0  AIhŒdPDDIn"CTUY`o@A Q$9&DQTE ,Mͩ$/Y^ecTRVŘDFf!14} * j$jѰ6pB ̍$XFCTb .DNFԝ+h1-C*"@+E!CDܢ1 I3T hY!RCb+Fa!"` 5jjIBLIIwlS@KTs꜉MیI7("D`}$iAf%I}4Zrk|m( - ޣ#+?rIwr`>ϪIxм-B!BPbEJDbwEAB,!+)Jxo^J{jZڢ(B]"8sG{pf3CtG%QQ'_xa΍UWcYƥn SL+<ɫ9ogG}\QcQUeYV,KKR4,G\5_]]Vvyzny{GY6@Ok'+ιVYSI[<#PV akkkckʉSW__re+V+M&f<8kɆjwf7Wá[bU]ey̹Gyd~r={է/<3y~&([?wF8ܻZ?^+(,P'y:X[9̧C m+k'J;M?'׊>?E71o?Ň:n~ZM(=>>>IiMv{{VyHLEjcj$QW:wlgUk6Q3/X>!1Pso1:ОKL,!AJ-@Ri:7^~՗Ν;ٻ|ƙ\Q3irMj:viKkKi\ȣH!T$\COrbI]+qU9ˎ!ec QS'8_Q iYy"Ȧ M2[4I>v"02q ,dحZ֙lv~ĉd|zn4\p3.|cFA'H=4nmc2"@EiNƱfXH,) fhkc!€,=R$pAf󄈍BpYVg͵ťx"R5v4*+SU!0Q/DU(Y$߄C׺Rш$A1h#S ] P:Mq"K'%<0FYDef#!X$JD,shNjE4YA !_DB_c $UD)* +ĂLLlZ;s(84$@D@DHk[D8aiq("6&5%MDY-f ByŕffJUUsEU!4!̲l9~;|\9\'qε,^|SI=Fӣ|eϋY] aY2"$MPf.& ;wKD$SXhQRU Qd˲A4m߸At:ڶ,YUC3ϳ,2d2,/%C>źwi4l8 d.jyU`2v{~_phY9LUE۪*I ]26p )VU=f&iZLfirҵ%\M4gl͆eغߤaIYZ<ݴ-wʲ_Hv wAgm݌o צeNR'լZ M|wz::: !^Ft/xM-y~7ï?セ{\Wyv2U0\|J75tJ`SuxN~,Hoz$߾_ξҩS(ui sD=HT}WcLfh6'1sέ|g+ET@DW 8[]1ꏦe9Lɓ7]!dghii)qƷˢ(W%l /jAĎ1hl,AJO,T,2F 1QCT1AU!|PcEUE1vdXscQ5LL",f-5b Q&tc$h[ Q6-;fk4a1 D&Bb&5R. d D"U1UD"JS uDUQ ! 4&fJ$ ̜ST"] ,!fi!c{\c,PhonRKWeHJBp RXY*QF@;ދ`fcmA'T/IH9r8Y!<+&L"u]y5cfA싒edR:={k7>?^^^N[ia]5dZ0G :i9⽷)bdX1d?y:C *kuWec/3NИN'CD`t645]CIe'q稙kbu+MW (p4t'3AbfGD^DT5!Ifflqe{/e1绣˦*f/~B;jѨNOGUUtZf~PW`rhDn}o/8+JC.#b4B@ָl2ؕQv6lXhNvʕUx?y٧y>O '_WWaڦvj{jIz̩QY՟u{y͎Gk?w}-Quk?(Ϟ[|iiG"҂N17PrBEg:+sH:uJ߸'Ak潷|Pܘ̗IeFx[yn,C>s28}se#̭ !k?rxd۽VDj t2$`Wz+*V\xr곗h:;}tpR5 2"WMgB1<,y/DD@R0˧i␙:MHhH$TufJh$I0B9Jn3I$ƦLZPЯȼ5I#>eVVVzaw}lfNfח#Y4kP%#ghLhZ`" aB袌Pm8RD fQT|gȦq 4zR$[ H\c!.lb yg Akܩ3![6K"h""i*cL8`![Trxo(1ɒ !px,^*{QvVUĈWk12"!Ep6&:PPcP  UH(@8 &ؑpEdDY* @QnFTX`Co #qd= F * 8x@X_x/&@T (n΁lԻĔ@PEu2q UkY"~Bi+g|>KY5zWUU <cED(Ϯ-"֤ ]!6Yc䎈Rq|ûO. >?꺮U !8cCVm(Th,I(iAeE];CE !ԔlmgR{"JmR>w;o\/뚪jwzUuw 3cgYsVlf嬞UBռ|vxḜ,a3K:+Mk2:>83g{+++HIBrS,G2V=-[tQwn꩕[o޹ebݿ}|zҾ?9O=s=7z[క&d\uU66`D|GwqQ߃_җ<7eb79}홾|}̩~^MYΧoMsNMhFQD}05T 3Vn9DJC05I"\c2 Ƙ1 (ŬTt:suIky,=P {|0ߗc43'{;O?xlvm6;NfAӧ/vgΞ$&bxε9wd}護zߝWd4*WO^wZƓ␤!srS2ݬnY*Pڠu틺}ι~٪k]E?/dƘp47)L\<'Sfc:e{F`< Cvq:YMpi: vSEKԆk xɓQjgϽ&dO~ 4|)f1ӊXPbШ=U {O湈]YZ!46#:ĵ"\  L(p.sP&"1TrPT"Qcq1} *AAa*O#"Dlc1D LT&gfhk "b@$nD*T~ D"s "Uwh5&V)²"6 s̑dT #aHOmi$rqh@DBT"Yƚ,4x¶#`A!b).MI}"+#kf@& p@[VwO\nE)`c! "`pV $k )]AI 2B&:q.W XWRl6 SWoo!bj iB|U;JȼމFşŷoܸ1ٲNLbd9e"\s3D/ι3 x 'Ir[U5ㄈfV5L@ՂZc C7nw3-rZDTUj]x"Ba*cLUTPܦe%cpH+M!Td0se[L- D䃩PnlѐN#+!2lD0sRa5 UpbڽrTuã,ӹ3f>7{h0TNݽ3w-::ۺ4 ,ye-6\-lFYgVOv:GؚlLnUY}p,xg^Vi8D(ɔ%"$jy^AΌk%-[YgnX;w2'^@Oׯ__/7/Fa9o(/^ܯ?AZeuDTA- *0}c`Ls^E6 Ea  y^ \}C&U-*ER;ΈgXC #<sp0dY-ڠ b&ZcgiV$U`c-)EŠ$B^}C'\RH/>֛ǷK_wݗoוN-;s+?|bkvg-?1_yo\ ` !~a!\?k9< mt::NÆtv{s_ÓJ=/7<q/`S< no.\8~xFs4*2x&@+d 2h6t:vldScLf :%In/ Zky%"zݣl{3Mt۷qh468;*./6L"12+Sg.7 ~ |du67MB٨';vJɠ*{XZC>΋9ON}w]h%s.'@+O[YMrjZތFJ'@tIoա6yBD gT0VUB$ht5$כO7t2"}'D?GTf,- qMۮS0ޕ+޺{yw;6PP 4.YHcLL5Q;Bc!0M;$Ng6BU D!Ru5R@#9!Z4H,z" ^5 ²`'",ZDlji@DgMCH`DChcoL3@4qL`dģ!@d,2J⢐S-$5D̜Z[oAez4@1#A`#x(!#* "80VȀFЬ $*&u 榪Yf@cLu,QjH&rHQ(hLP .bc-:PЀ i,BVM/֖u,ͺ4ƲeYEDЗ9B VU(w{/ [$}}|eeAY(0.`@[p@D_Dy1PeԨ괘]~]z>y97Dm: {FUYAD4M%HTU,WZU5fY ;Zk1&!DYyiC3$Ug}v9XCU5!qI^eƼz8/>__SW?uTCzӄ~xkk-yço{qF$[g/'N. a1; :hF#X0r}sa2~]yoNzi:\Lh>(f߾y]>!}Jg7Ϝ*nh%4Xzrs՟^@=Nv'~gON{ǟX9;`g *B oÌ!11xW>8ٶib[) djwx'< iU}9utn/|['s0G{ӬܿP$G{tXɹtW!mGG vͶ[6p1Y6Ɯ[KϜ9sTT{l׃,-- ֶ޽[8fwܹ33&I|wiiiS'O9;8Vo_=>'Gvy'Vnk3 9TVz:>8N92˛h#,I->dQGollIu]hLHeWYYʵvW19BЀut>Q6qR@c2CE -+Og7jzհ1Pť1)NRUT)OD$ ;=|~/?Z*`SFmcա>qht @@!^D޻,0 @\B&$bjcU09;-h8EJ^LJ  dJ8ZĆc&jTQE0%XBDePUوN='c*GŪHH*]vΉg4 +.`%D5L* B=(+D)23ij(t4VC$dQ1MUfN,EHVDc$(1h0hAcLDBK2B;HdX0BXR_2j{ ,*1Vf5`,D`jYL{c "zo_s"1XBˋaQ}9uIA3*|^cd=H{117?fQ5?g_\>b u-C~iF Ђ?u" ZX`.\*&-DOAxh̹]:q3?}5`tNVZNJ5׶ mo<+oz=}Ǟ}饕Sk>~1<}zpg}C4nVӪ|wׂΤhݝ޼*Fz<?{C# @5?sPT8—?{N rusD}  piR"maTfK Oeи$iJ`VJRyN^{껓Ndd2).˲e ooy[W GeY#Y#$ TnEd+Li>|_6쏾p++ye _yotϽ{f瓲j54CelHȜrEDnT_9ʲKivND b0I:fDl꽟 R^z=f<l2/%e=4fĸL{Ox|4xS4Hj&4Vi6fuo,:\xS?;g&CaA,-W#G 1` xC`B\G0Ȃ.d#u7^ɠF /2ch, &\a)(,]+!f*q޸W-xZF**WDDTa#ZD9\JUlqϊj^M}"PRq@5JƐ ! *p&LUȢ!!HƇ`m2 QDf13!"/9'"!D4j u5QUEC ` !pPD 8!"$j4bn{@`8jbD*ǐ#r9P5((1@D} +ML!ƪa 2F5j(_m!pS qVU"2NX@XtqV k[Acg(چUX4&̼ bv&1$6Ilc&4j`,P|bN7{ӎsE;jxx6IS9\߾tjׅc)@ Dv~ҥK_>wKo\y#ȃ-9իwi:ܹYJӴEp|||gtǓO޾djG~K׮>w}{۷o ?n+MkΟ? Tl&g6nmY{K;Ӻ"TU`  }>7ܷhmu:jU{w7/w4vV\V)V{eee8NCYtzy{rK_/'V0᳟h|O~sI4|'~d*&_zK9>{F3c9,lnT[I(*&Qj6&UPj&<'fn]~[7g\٬74}n9\9LB;yY I<ϖphzWiUUa~p̙_k?Oh{PJ߽~OA8ۿ[K60_)tȬjKp2/40~*oC֪u JA4VU k4M d:ӎϠ3ʎ=pB太*51 $KcTATP$M- fVgN|P(ui/z@fx'=[Ƥ]ecKjvD5C̃\%@EujԘP4%@ 4\0J'] P4Y8ętXp8bP)B<"mv!GNDUf4qnDjbmxBG ɘ"DZRV2jP#@$% dYca`pGl$$\c4qT‰Qy$D"ѻ +Ѧ.xB",Q> >/_zԙg_LJNz|~֭&"RFh6ݝ~yy6-'|gΜ yJ]vttt@!YWI4~ꩧJӫ<_Y9swfkwۣ~+]ٿCO| }'VVV_^^yu )/ F xBd_νCPVFmFX+Kyd̛=? ?;o^1dӢN;R+_g?f$`VTX= 1vx?^NOf3ޭO|O<@Bx.{W/VouD\Oӧ,|uV,ijaJ@4`P +ieyd+eYr}g9wqF×^?;sL'G,ﮬ<A U,jlv3鼒PfD41û##G~t:yo_=\0_z3g/?WIy}y4~nG7 ]XTUq?y.P׾%-%Z<"rB]k0VurtNwt*Ε|2ust[zNk޻I̥:9Ju]/.,X[]TNی1ȶ`Z[j(YM:3cڛ߹p1)@ I9`X2Iܳ=c\vb\̮h f9hm@`TҦLQE]!f]U/vUc·ZbFMU5DD Db|6@4ӂ Q ƼST5Ac^* )5`Mx1goU ‹%Ī*` jN$kE"hMJq(iES1 Eā{TE#)*a[Q\TT&Ԩ⌾#,(U< bdcDd82`@D&>lipPc,Dd1,,HhY-Iij61A4q.V.dͫ72h9X{ +0`EocI6LiŻGA 9(Q|\uĶ8OԇXT$֪j$AP3`,5ƣM!L.+E0&ASR &boCHNӹtbaM $"I3ߺo3c6|^ʗ&ݲ,n4M gOj]+"!9[3g N,AN1yɧζֽ'֒`1v_}յ&Ksxy?PBDO[K^/=:v;\1:|D+S|Oӭ"7߸tcN 20Ԩt@@ jZԹ( @{m{o}bmzxT'i2wޝԣ=0_/o~k&s,\&z˿zک_!+dʯѧqήoIQ=_^]9|_V82(~x4p-'sk-Us4<7%:IW2hQ;{)˲ۯ<w!Xpzcέl0\aӺL<  s3Sr$hj,nw߽+;CB6m|n /ޝ}?#^^{o~?P^/;_Q˻~YQ2OgJ_DT<8?\n)IWu{y;Xrt-#4?VlM8mPj8dC"U! Rr,UIbjx͋/;wLqQKv;0bZ.+[AK]w~#;kzx'VL2gm@[/^]ڰ4@dᴸ=5h\DT@;@t^^ЀHjlR4Eʽ]yAlBm xoO@:!"&{67YP Q1@DA`gD(D*XUǾ` 1D( )60n$xPz9 A6js(4laPǍXP2DDCU5zo 2FA-P`fB"(MxIK2ݓw@DIO(JRk b"*H,$1`D$bV" Mf&ĬD8佧LfU=}tPlA!"I U!"QT՚$ 5G=`@ReKdvZus+'_}x<>Y懓 v:_ZZzmw^c'N={Jz~_?~_}i_~Z<3ɸ*G}kPE+/]zg_}xg͗?`hݟǶH k!7l6:nj]9 ♉9cW *^ U.nu,If;wx例u]wt=ԇW2""_O::<3ߺ|uVB3LZJ\;b2aιڠJtϪّqrnwSVL[4YT+ֹ2LTuYSd&"m鬞elzgύw+I- Fo˖83KdJ%Ib r ~;K6tbj`-@t02X3`nf +aC,7ט+B)6 @QA fm:"&F!QTM@3R$|g_q_$!Q DZq7HbkjGM\86deEu(!MU '\ ρ@1|͎B 00gf+!Y& WςdZaY$(¢J "D0l TUltG=QD *Ft:1buBhX+,T,&]M# r󺄃1RA%D%1U$zm4:]Cw2\uc 1F6/?YQ|F@3Y4BYl~ga>f6*4q,! AED  ΥL3b UcF32Tjt黨``Q2XbdTՀ1F"R&e; lW?w6)BUH[m%ܕeyvVO&!"2nnOxq^=/p8t=eso H-fƥ ni۝8 r bs2M"|iw~m#љ"`cS #nuw}hn&؉jӲK T&CfJDTU%aBTՑc "b%2 VF ąAxIi*Zd" !K``B\1ԭk(zP5aqG"F@M`295(UXPAhp3EDB0ƀ9ܖ1kwT Q@`4o9fc 1c$0E"EJ퉟NDbXHjk-Y  YD!R9T-F[", (TQ#q7pU]n1fE-j4"I(6Ȱa&s{"Cdd6݄ ZUDg8j+cu($ jucؗJ)!AhU4\13Ix D(9JVhd i "PcS'"e@pJ$I[YmwtZ2QK!T "hLh֍W^L&WjX͒9KǙۺ/<~k ^?ϽvGhtPuO4o,--34Mۧ.^{mWz玠~MGtn{qŇt:O?k?~Ͼ[~/hu;?zw_Y`|@ҼEcQ@J̼)K58_?wl6kKj E]?w=^+0*#cn_a[BHN"rj[ur􋪺|޾z|}GNtX]g~wꭏ}caϿz++++]~&!_ .WefG=$J^EF*UE8O,"?7G#H܍|j1z\⸨g[9$T Hm${44њy'=|㝇5@H@"EBmjJCݶc;N2VۉJۊ,Kl([{|( !Q=ek8oYVsvݥ;&*cg[3g:h?zpՓѬWuoof%" ,!LF_8ip·0β!Ūu}0Nutve,;ښ_zׯ_5lvpk: E-h8 Jѯ,l;g_vbuv\z<,Wcr4lXphgXg: `4V`/ں K.΢8n7 8OIQH(05H$@lΑ"c\Qy|#GFd& 5 X L c_bZCHi&*DܮQ*j,@U'(Ũf"2ĘTq:=Q5uD)+5c`JmaT[L4HpAr`8ь֡"4wW230HE\\C 9U5DޙN"  b2'ʜ̌,D Mb ADEBVRM }`@uT"2H!F"' Ob9x"$ JƜ i9@f sjl-;6,޸ӭ&]ߨح[ã_o/\^XA-A±vεG_[cKY=Ч?D@oo}O| ;}"3y/yu֍C2ؚ_l3ؙe MLbfNQyaD[.g:5bȲl`رc?}o|bPoݘ IgsVzݢ3|RߚVz46zk"2;U}u͛7u.60ʎl=CC[7^@ufumxiU {c8 RaB2[Cf౅YyRwU SL&!mv2dTuN]ԝQ]_VuxLH%t:ͳ#~([VogYQ,f[ /{֋.moo_& ڏ3уvm}a%5sP5@ƌjSjedYlOZE_:bf: >q B[37P/J4ߘ <7M>4Qi2S RNBPpm\ l<S>(ЪaQ3#u H[zNt3 8`CB) DD! YE**$ rHL@PDdTJ J =ZiPfBY13HLpiP4${23Pb03$m<Q<c }H2NYo *6Fg,"棡1Dbf@D!TD9WוQ*EP'(SD0BfSҳHҀUT! bL96cHEԌQ抗6K4>d1d;= !b%NZQci>ba (k$Ć0J,ŝ@A9`f yW5Q!B0GֈL5YymhUħA ٕ\ ;P).*TGCx?}O/--- 5,'r4=lo2X}}Xo\"S\άe]c}ˡujy.:ãmtL<ΈH#ZBYD0Q}PZV-szorWTUs?V;S[~%:Q۽uwԂ 喗V2)c1341AE2q fԖCǥpsOo9G>w7o~| _޸v8Vus[B ǂ?~;̯-auۃ5V\oqS CF34 j2EDKwsꄜ "4ۖ}o)܃:NG)#)qFhd r$RU=;Pӄ7 X}95pAiC``HeY}Y  c]O"hjL"ޥ`IR%fJ g03ɤXU!Gg LTy@iiDB2gM#!1(101{3hNMY&(yWD, @$(q1B\) &` (@UD9wi<C#,K_2 Ἐ0ffPI`5ERQQsƈ(ӪHDG *1,)T1\VPqH&&jE*GJY]܈͌vnvIsNS u1GUudD#ĨLHD@r_ _{{Y-bj 'J0E /}k7,o@jV̦ڱE_`0A],tJg35r䡂B۱~Zj]0f\uZ˴^:uA;o߼t+PWryvʵUm18yeߺ/]޾^ ˻+YzMV.A"Q 2֖4 If j)s7G)-23Dd&ݑ̢)RS2T4[iS1bH?~cǶ_;V4@|Pɱ~ۯ€`uuusn:cޝBVO]UU51^^ 띥ɭ#Gt2eY\YCdkE4ior BblVWr?CV"BFnT\62 0ϲ{ w'ׯ_?;[l8Fhb !7h  U(rDPvs0Zy;ku{ p6׿3_Ce8d:!:ujg~ Zvx4,رc-]T>dǿMiO>fgd:b{ߺ΅'O>@޺j?,F/l-k?Żǯ]}Qi0hM{H(/'ǎ҉˗/,?'kVo};=,ի7/\E7fǤu9cޯ?LtrA3زIFj h&es91ӈ:QUbve6ݻWf>kTi\ׯՊީ`5)Ͻ|<BQ;ѡje'Fxo*r EB T 1˲ru#@YUQ0l< znۑt:#N2!u3ーCvŰ1قZX2 ar*%\:P~|ǟ#/Xhk<YCuaЄJ6*b~\2!AHJFZ@T /ji4I5 i PA@vѨs l2;l` cN7ĉ ` *1)3hdǩ8*ϧ$UegZ 3GESZjqbM8CQ!#vNIN4.ssã&6ED$iRkm=X'@;`26#":jF8"2"y@.)%AvfjDsbVDvʌ!dhn:%U rތS"ϝFdDPDY3Y|1eVĆM{z}>pJ(O{{˻G|YB [^-gԩf_/~_@oͳ#}=m!wcm/o}{u 3g/?zwnGN&?[[[KFcTa+ČP*:={_;P}WUw}G//_|=z)[ڵkؚN+D.˒ jN"n!3꾚٠   gNG` +a#ʜsWTƍ<bæՍWW-//NJ o,uehS.|ǀPCzuPe)a.2DT fΑeY1rR[ Q-<.Bl+kfvs(sDUS\ m ! ڌX25_3̅fug?)eQ]WS=VF孷5魝w̠9c AJ@ \ <ڌsc8wq*zCp n%H(&z;bMJtkz[ BӨv(PȣH̨|s zZ6fDApP"3:vh ޳03j@ d@GTir3eSdb (e, ) 19L^ `y!(Eu$)ySI\P#" * WD2R5du$$aM1G5HyDh5ES"Ѭ403"$." YJGOι#9bj0YDq@?kY!RS$\ȹLR!kȍөh9A1Y4sq2E?|t4NcdV3toGf@D,aψ Sż)0l*ā5Mk⊎w`ɼ㔸 sNe0 8[j@EBIŤƙ :&6LiYPJFg뫈X')'LD x!,zS>?'W_~s3̖K"(E' 幠 g^džՕQ]˸0<8Pl"VG!NWL,V:fksaaaesID:v=bk0!"^__[C WjVb&"9C12OD $NX>yuWm|;yΤ{y?#O=0aj xcc}mm7ɿ|^~_vGGG/@+;׿_ku~}ƭ'cǗ {o/>NWQ1!Lw`t@ᵉs]Źsr[N8=~Z@KVUEChl;ur;׻h{oquъ/ C]A{<(5s^,kmv"onnt:x8vCiK !vovu[FiU2gms(xvCtQDDsU27~(ƈPuÃyޯu2T~B!U-cBLU oeqKKQ*dz#P>kp(h>H e E{aUɀճ'B+zG3)sΕ0:Yc3 1Hm0(WLq>h-TUpVKK8>oxӏѧ`a2Y((b*P` +!Q(hoTCRyS@N4(`ִѪH‰Ϋ:Ixб3gFĴ@q *RXd@&(ch*H- ȎT#aGhR4D{`bI~AO`fQD":TBjE!%MP35)JL3鶉TcJjbIc2D&nSӳ C S -p/"!RB!)dư.J' "D 0IfU`&3SbĚW3C{K&3!#SM!XHcji#0ے3R%cD2 ̄PE:bޓd`D#L# HH6D5bqAD2Kk$bKZ3nPFREncY成U` a iMٌhj%[!9h,ijdŔ ܧaٲ5&)Uj<VUj-;$((Exgo{!:{G/m] yCRq$V6ޭFVW0JF[f^*rmVO܂A`>01{b8瞺["pWݿw_酅~gu+S>Ngznڭ /߹Ujm)H;/;W;ΪV}+{B!6~iu*[['Nb׳xҥpcQ1` !n< cqt=t2PZ C`yL9WƈQѐB{֞]v`qq>:qB8βL0PCNg9:tZD/@dtKCa:"XbvD1FF,lJUsRd$C˦s!:l8iM0{UfMP0uqYzQ'$s`5ijEHIhSD- ̜ jsV̛A1&o@:!(li"I 6FDD i144m"RPCYLDDzsGB\>ݞa0sܼU8y{ MO{5BG#!:簡y&Dh#$3s)Ɔq&)/lVVr9vbɟ1&&VU5Gˈ-58ɑ1T1#D"8IK !2"kP:˲4LSJ 2+jhλN}ӏ׾o&K!ljG7+0Md8d4^D"AE,,yeY,㢫=_xV{7.4N7w/|_8<y쮷zg7WŽ[׮-n B҇bT&  Y+;~Ǚŋ l,V;H܋U;kWZUUEۙƪ*Gqx8Vd_O$pH2 fe¤-2̑ bY{Z f^1n˙3Xd|ߊ.T*HKVuL-Z@k`E4x]i S3pIH~'a>Ց& qAN&APMAs@CgՔ*H )%?"?Nh@5#*dyYiLhH1F罪"s.IPrS3S* ΨI 5Տ1}} ] p""pc0R2)}EL ҰAI) ̲!I BH)7HdzQEț!6QNHrrx1rbŧpxP4*ܙ9̕L }:UIT{ݣ#ƽÓ?굃<7|fsYݾ|eteLJXlsC,˲lNM(k&2 Zݝ#x0Yzk9x(;;Fo/Z_r f;G ӬhKVTֺ\v98Zf5١a)DD@<y-orVeZJ<2n1R'úzȪ:6uYւ^9+tȄ{FLnP򅈵%)9I:&VQƘ؃7hE$BI;b_EuHYS(1F[=^]׭,F1@e(Lu=_Mntl}བྷT1F1TD`!  L " 4:P?vٌw޽xC_?}~Snwe 0)Sg饗۫y:MԬ튍Kgl,Vq/bU{fp8~…ݼs\yџg>lݷ~+QñuG1R+WBا20 ã0 Nix3IUB5u:W~W.obiմ;Ehxf?/{|gΜl]gu{8 y_z/˧\]]^^\⨵/!Ze;888ܝ7.Ms3yP7ig99xBDS< 8+2i!k?xxqgg'ٲ Wvx,˦-t|+WJv\ Yʬ,K]$$V;p&uUumVl7EPE Ѡ+0 ^dxu^qεSι>L$aEyUuIdɯ?fFC|SW.\x{*b/zMDDMvb1P] .!p^)aV0>2fykWgڟhC[؍e+;Jy=;؝ۿ}᭿i8#Bsp ` u}1@`5 5m>i3dp=M&%:(Y4T07S.R>ߺ7}DEBr((N"0:U 1V2ݵߖJ(K ̹$ [,)A""2"'R$DHfID.ͬ"<i  ĐT?5M%XEQ`R8 QA,q"[bFKFª T jG @Ǝ|'""TE"ҐPF$DPGE! Q5,EG;t3'Qd}o+*:$QOEBN!'BIm&㓴 Ms 8D6 +њ-g4(DHFFfd"1 <u-"v!EmMZ &ԛK+i*I1ι !# aZ S>%TӷR%`lzL`~9 BcUN;bc3[__o߾S-YZ@@_DW¦,`0xf2C"~=yO~x`8+}Oٳg[}:(B/>^O}2^{8peYh"f8oAuq}yoou:L6W.On^ :%:TOOZo_}ԖYΦl0g'{K-xwG//Rx.޵z󞡥{s?y=gWPt_ދ'|jpcۖ}-SkmٸDm30Mn^YtjƱFpB{+k^Wsk't`) &IUHiИ Q !`ڿ@raIQDc"ݱI @ AȀ*dM Ə1G:`N$hR  +)2rj!rb;KcG1Fd”@RS"%JH&rlIoV DdԸ hu"peYzύJAdR1"(l0b Hb`DsC1v)R |NQz]$3WIb&b`T1kmh`ر+ed@BؠZILs(QM1P`dK078B@V!QwmC1%jTgGD6sB3b250ًgIÞ% E3&jrBLRHcDpQcAvibĎ3J[4fhM""i ŪŒ<*=B"%&5!Ҽ/F֒~cJ޻uVcDG3o֘!R"$ "j vu֗6Z!bɫ^޿{v7666@CPUUQVo]|W%h|sĪT%4&`$CHâ32=t:yFB:0\Z.uk׬,M}}'Z 7}he5^ieut:]]:;g?7=}x#3.}rg>b"twwoo/~{Zᇾ7|>u"XvbK3yk?TM+O~2Y ʺUQ$1F.ȓԲVW3D_.'eU( \@2^?g,pt8i rgs ;z2f۳SZxlf8˲B-*,pH=`m 5!n ne<ł`I{u:"YR`RX ߩq{6MlzR{^km(e]KD  9F>F܂*ԹO|Ս<ϻE>cJvy;;C-+rcDhJ)Gu8i':mX{ۡ2gRtFQr,"`FXn#JWq^KNcV<\]^pt`|b@&C" "cV"T!0q @b6Hzf y\aQ32 F1}lɛ΂Jsj NR'Y(`BG*tegu]9B$SBSdDOM))*2`4%y*ݚԀ2)7513UtT5]"D1Mn=$K|ԢI%9$rIU-Z!f@HzA L#"Ƙ9Pl`(c3b4B2B1s5E9ȩ,2r* Ho)UM$EDnAC)ZOS+ilu晓5).Avb"DLD k-u2CUc&B21fF@63M5O*Y+0Mj&QS\>)|;2E@M(ؙZu]ˬčGJOp&gP3VuDQ%i9d-QȁI4(0`-`Bg9pZ~{o~{mM.7/~l6[Ϊ*fx|GW/t>}xZ\z^Vj foʲt6W>G>qlyza3_W+?~γ޷~owx˃*~;] 6Nt/>_:qNU=y|  )ĺ:zi;Vc'ׯ~y?u~G;?zֿ7ev8lng}օW}g=wz^g*ܷ;~g~&`z/>S?p +K(%\6i?>s[/]<~7|KUr  S˲v H8BL3 lA"VDQ"@.FD0:rDZ;Ik2\4nn֩8kr nWJW}5LS6Cm :NB@@4Pu 5:d:H*&Uz,2'Y/s._0,1uE p:9*,juIa]A"Dݮ耤Y.Z.A\3"j RY{O䓏=|;eY^/G*LJiͼ]Oxp*-b !ʢe ҷMv;'F2y1#TB]ñ1X6f4Xmb{*eU5f~oz_n7>A'b&A. t̀ @ ؁Ԡ >Di4!@ i_PI#T4QD7B@P0O i  Bm EZp|Ap},ݽ! 9L HXkf" 缙g> E$s>Hb"dh ; `d .eAD|-|$1 8`E6TbeyAE]j}upp~$G  M%g4#  VuΓCՄHUT13'}R}ʎ"`ȔT"tFЙeĀHIz!SH3M)œ.~9340'eA֪jZLEHҍ\8= .͐sCR"e:fM}n3˒QAM 6cMn+UۃL@ I͢sw:sD_ِ)T5cjuljQ"9Lsgz_SɜAZqzz{0y@qSjCFDMC {_^a/9-Z+ݫ/g̙3w,<⸺ޝN:нՕ+m?Wg핞".,./C3Z9gPJe\cwʡ?'Ƈ;߼h]Ty[wu0)VOx׎mQu .\XZ_7CjF9n f[nݚ\[2ٚ,7_=Kx܏O0h))J &vRT) 56K6jS T<"Z Mt(Q􊦻h:ɪ(&{H!rD7)I҇$ UU~CB t&\F2U)w91CM2V3E@d"e1F0df4QCb9t$94{ 9P ˤZjkxx6=? ?Ͽp}K_Z_^2_{wxK[̽Mko?^w,/BpͿX/\:z鎗_~W#97r;y]׳ng0r(8j\QMyhB J.;9ido* :-B&'xx8ٞv:لv8pxm7. ' fΜʼn>̐Ra2z ؁OLz((3 ~+z+W\ݛ;;;XK1C*[yj/i[$Օ̷Y1zi,svRgwB yK\޽~i4@1j !D*})AIlޠDpN9Yn"U!"6)ΆE fAEjiC\;(fyPr')屨Gd-3o>9wW|uN3 $Pm&.]e~H0ɪ !favC|{ F  a&N!~!C5Bf pHFF̹ECQs,D9S" 7 h@.ޱVGƪj@Ɏf5ƚ TBFA9f1ƌXb$lnFe^DbR"9 Qi#UA"$s!fBUE(FD" Qs-*1kE"A*EQTe$xS VTj`b2 "ͤDd PTDD32DiÄjᜣ-!#BQСF5EmbpI(H2U$H1BՀ)r{9 gbQb -,:al1D9^T<֡&B" V05%H.ETlM@TnziPQ5i0&RyC$2@2)f7Df1taى;h9˲/`/=ة˲N9whq>_vmv4-"[(Vת 'G/{WnlgY|oE4N `ĎG-cPIU=e7o,;Z©>vgtw7[oO}ǂoA3кz^q]$@Yw0#(M >qhzøl-88YpΕ6WN}O>g{pGw~>y7d͛Uc<:N\qݯWe}p/<Od^zOӧϝݩ+מ}/T/W~wq *|hL{bJ-Z!p8`5T8,ByTxD8=Axׯ<ٟ뮻-`p_'?WG`S{7/V6Lګ{GX`xT~>po<)L3ZL^93y9r˲j!oJETD(fVQfUdGӣz2ZmmAj} |0hCk'ա ";nẗ8*1䤈 qm{ﴚ֓^xbEPʒ\)"֪k{Νd0ʀc-Șx5/S4bfY@l"y;Sq}ppy^鷝sf z%ҪLTšbkC>&bTC'̌+ GLitxm ;4UX!l{tSΝеQ׿寓Jc P Pށ״Lpfy̕>NFHE"}$sR ;PJPBt4-:o5h>dIBG)ُ! HKPyH@K?"@ɪ F" ]pwMY:Ry=p̘9Zon9{ Of@AU# F2YYZ<s3)PL8jHޓ`c!AS# 8L R0733;"B3" Y3FjS> KƼ􎙑]͚{VBk x (!bTާe 303PdVF5_2G:|1!)fI"bfa FD~i Pߣ!hy0 x{|LyI"EEB5TMhD,RG 1vQcMQT!3ɆtfB7Pkܝf9FI&#rZA-X$#^Yj@$zi;#@HK.X݁C)ke]}]דh4;ɤۢas=O?{w޹8-S_}tTUy{޼ypee(ŅN뷮uϬ<[3јO}gqŪ\;~l^o(ll8q;v{;䵾֙3O;nן{vGl :y3s9D$!KW4B]Nm=[{{{y Q`xsZs'xxzw/lnHnw1_(etVn辶mȩ~ֱ{R7}.^x=soA#g6֏}'j[&}_@=_y?ࣟh=FIDAT'}kq`1ƍUV[xd-x0NF`aR303ʹ١cUcUm`RA`&"̓XR{SN\J٤UB5ʼnA'i #:G\A.mH{WYd\0e1ƢƂ\9+v<+;V`|e!j5 lQ9B>iEXwAԌPh*.1y9G+!˒w./ȺUU;V;ܿka}}]'2B! 6uJ-Q,;9瞪ܴdDD> PK"D||!Fr6=乿::aݯ=:['[]28! ?Mj8317AC˰9ocJɓ6 &3iBݎ6.6\煉@܁?p4oRU@vh`j?vXv4Mi& #Ѡ%P}Rojƙ HZ٘2 y$t. 7S!= 3sLwcDDe"$Y1k^T`A, G@UAbiI f4 22 0;KKN413j%sgZy8vcf.}L alI>-&A<=&ꝣgTF.!0u %!"h䄥5Ĥa5ʁ)BObMPĜC"L Y2lJb%gIciAD18 pS H2< Q"xb+jӘ,t6dn|7Hg8WU;Ah۷_~;vm03h|; (lLQ U;Afl0ᐪ:0v]3'[vtatdǃUJ)Μgo|?N;F+Ӄɲn^rju֊:yw)h:}#F\a/<A}py:_rk:q?qЃ 7;;;;_y徟[}w|~fh4[h|z~`D9r漙G˳WwD\iAݭ|s_z'~yvE1FQQnkMqm}y)o~-uydux{}ѹgʗO?t}) 0`oqom!t:n2:sͲ,W:.A`h"ϑJ$BRUFtCF 3 ʲb\,CrrҕZV@xZEl-f㲶1[IQsZ\ [l"btzpӚ,0"Y:O'̜EQ"`pua{F6m;jz8N գj$Y$WuK!z4-1PcVU΄DUAP7,ܽ.Lcw-qJnzgٿZȧ:1Q,ˍJ#02s|:21UPx, 3rl.e6sEt;8_Ql1[:zp 7n>wuǂufzQ!ns?cʞG}4$w'!쿃4ͮ>s}gVfUw]@af00ZQt$.BRPD* *FppC.rf h|VeUk=zVK:u@k-rZFc4@2N }Umb|܎CSŽlɁjAnCcu_5gTUc<75Oq3Z(gO7"*j,0"P\L#DQ)L**YCBdceKzfG# "`1hbk*4(/p@PFPiJw;U1@COӴd&K iA d 頻Ms}Şd O"`&2 #w'?G> FAT*h&* C?6xA _i9l+b l*t9:HdyqySEC ;+"^ŕ""O{L]&q*YXDp:ЁfLfY<9DTVUc\?Hdn*8uUE0nJňIT iOD0*JŊ)N&`{0;}DUm 1D F3A(YTAT91F7""Q%"cRDQhBD&ZJnD&1IAD(iwDV#PY,(CCli¨>{OE *9gwQy"T`"ɤYA"2UPE[VRZJMs"EAGsHr3E1HÈ#D?[t A yOw7{g;bd0+*!ksEp?n^âzr>7lt{ŵGOn|tscccwyYMr0v8쮯?ԓY ɕYHn_qJn+t{Dz[Oħ^|^K!y X_:k>zÝj4*wGgϟpN[3Is{'{_1u{woo?~mq"\Z]8zD[,}ޝ=..3OqeW7/?Ma(I7~OnyV:dTVNlj`nyO;ۻ}#3(Loe.t,K&5WKHſW_}ᅴ;㧆aI^|c*G?^;wMUمTDDуhũ{uNU'i WC]%" )bC8Eu' CK =|2MSIJ]m1Ry;adL)  R9Ibؤ-I]Ƙ^<[{ܹt̓ȡPwff8*db\YBͭV+QlUM˲옅$êH4.J;Ti@.ԯ|}B.͇n/Wf<.wd[<ݼ{}']9;{㏺7>|ZZ_oU'#cS2IGr(ƭjwDv{p{0 7Ξ=s.=5{b&._|… ON&.ݾ}Ϗ67:{O98 ]Svۓ:Hv\;?R ɠp\#!AAmZ©~Q̳o3)zful$?jg҅~Gܹsյ ,巿o)~KkGP^(韜ХB֕7?;vs'ONnoolZ#9|٠52R֞r7;VRa&g UF^hXpтQHU5X/֓zRSC (Ir&,l=&-QMG8Uf-")*( y86ל8+Ʈ)#tPFA)ZS"1)(2 )q")@ףsc?~G~APAqg~<77'3UU^*T $񾈉.AxDF4BD<S!`mjJ"1:2he8GAU\Pv$KSL8DWI44Өh9^}Ff1m9(bP*X3"!YD Km B1$3]KQ➈c% ! b<9TTj"]T,BQ֚H}8CTq#"usь=p"vTMm*`Igb"&_0r6YUI Fn :FdD28zF a:X" ,sDe#X89 E"\?rMY F,*" {BhOXl c !U22#*aB0tyg5fYI2PNdI U𓪌rk Gngv~nɌx8 aTkïU-m B;BȓܢMz#'WF A?觞|weLT)$H%lݺukezM/`wvϿ}~eܙnl>7=evrS*0s]+3G33'q';xww>cdE$;9Ok3ܼyswÃdNr`/6F;?7|spN9xx%/?܇n|'$p`Џj_;qı#gW jw>x^/{ݛvaq_byy ӎCcHFgGh|)ivU@QLfijuxk RZjs bD]dM`sKBYNm q縬4D A#)}G9jY?zGnE DlwvGcg)5eE]W1y}e$a<Ζ`!gzu-Y 0=YBR|:3(EY:0Mkwg`ukK դahv_M#:3•ιW(XWUekkPk5''ƚ )EQ4;x2} :V|!k>ugN d-LSb#Dcisl|gXfL<)婼@dXyEfSW*L8Uo6٘A"qsI<5`` J*D""23 2i2 GDC2dń4[< c0`0T4.X*!F u $CA0*ͨI D5M6 s"F1Rf"1!"h5ySLAT+H`gQ%j"*z/ lP"Q5xT5(FȪD pSU*2ߟ@<Հ*jЋk2M$ؘ D 0b7!@LЈѦ/%na& H29NVP"RԈ"zb%R` eChmx9$B¦b沬KIܛ 1&myi/9g1JjhþfH D* ֠5 d28<~f4 5[֊ݤo;6IvJsx\hB  IGY,0w&Kzד$Yz^n :>h9[7?>i%*7^3?3yE(WkH(qB\ݻw CU/Aޚڎ_-kW/g ]v{4؝fi|+0+]ꕭ|w0x>'q~{cgpԩ^;}^kWnܹw _힘[#_ʫ7?/#gln_g?wls^w0˜yW_|Vv[9Y'>;c7F(wQ[30ܒriiIJ/\!ԾbQU/& ZY ZS)Cث]Ҡ-H!0EMCU"-_c Yk xos_ZZ7~ԩS7ys8{g}e J&cJ6ӚFBaߗ]UUBUH$uY$ Z"RI`u-4MYG_gD[ NsIrڂw4OSWi5 6lYb%Uܠ$efUA@xMJhbjNj_$s>L20Ͻ72>&8['o:T38ciXUy~NJMUk4Jh%z;H4~&%dD%@wA+zFLDV*Xk#\9?U4f\0hUa1Ybf(9+؀ DԠl,+ *4:)^5EeӴeU&x'vL}$OhAAT@snODf&$RD7 ~7`5fNQcFDb1}:md:`\ਪ~A)^ VEfEcXG`P cSUYQ3} ^TALJT5T%[F'SUDTX)Hd!KQQx[kEXNG.-ccI+xW-ZU5Vthm5%F$h4,'pXF\v{{5rױI8HCADQ%LpdB쏵HD$Ps"U"XreÌLVٛ&6ke]`ߞk/$f֖smm;C^OœfZy8*۷{'N<çϜsvo޼پuS/p&ڸjgg_.$N֭[ȰH0&x¤P[!%xk3u]kg?yW7.ߺu떛 ,tnaZ/ڭV>{:G~7]vL}~w޸yÏ?z\ 8[v~k=rv0OݻfY q+|vy?|<ބGJ @p0,кkb.4vF3sZ3!ck܌4:2]6c X ty1(KM[̕6R&ć` yQ0@" 6\EBFKxjm`r YT1LD$1jgz1 4bhU On)S(h@s/}t0!4IB,,Q bZXB0.,3\ Xmj5J:¢A! €Ȟƪ*Es'!bVUQy@hTj0庢1l:4ZKDؠADedϖ 2"j`AQ cPPE 35.AUؠ*P N;_DĠ'B婋H,)rh`q{ !MwW`|OvmUD$(!2j|cLFę6*Ez0XLӴnVn˲DUXD&ƔJ@&"MSZ[fgg<y>ۛvZ&YVښ&Ξ[>~SBpwg/vs{skП[veYIY_^'LR2P7NeƦIuz߿rq|{suY8[ޞouǝ?۸vl-?_=sk',[\Y$E% Dt=@`4qh8>ޑ7{{i.߸qciX݇']n>?;wz?/='gOt~ 3ݛ?|)07}=o ~W~7R'׍R?>zCkkkM65I)ɇoߟL&ڝ xnaN V=KKKPZ#Ҵ{`f㠖(qy]׉($I\w}5$X_SD.qR{JBk<Ԯ:5 l]fUm-!:C,*$qK+~/e{ާx<]8e2Bbx79X6JBl."V *>1i)OHʈRUL :[(ªE !1&^Um۠r b( L5 "F4E4)ATc r<qU՘pFB &`$Se4q"DULr4HqNYd@Dh 7VUd(UQsMD٩,j8v6-}UK jH`EF(, EbeYdh& "@>C1Ϩą Kh̆ D!0ƦCu Vw` DVaJk| h[ 9ӄkcDѰ2D4 10EISGV JJ3y#*.I"$޷ (hZ xǧ5k@&RQ? 1aTU15X׵"b$q} jD& &Is=c%@DA 1`/,ڙ?~<˲Lbmc3<gJ_&M(wefPu $b{y~~6VKs=o`P~x{c6kc+sigt({a:ݹndCg[ř^0cgvvݗ& 3=K'1e%/朜Y[ pcw~R/'ϟ8u~ C)O\8۲Z.K9RfF![r^d~iiayA8;!Mӣ,߮ z-^-e=.rD4|W}#+ Q=޼r;?/ܹ?|:g-~wΑδz'_G}re=Fg Bvooo80sUd-Ygggni<(L񤕦^s))^R-G@I5Zkj1:TZkjbsr]cOΡj+se(@ED-IfKKuz"K {5K[N]`Auը\Zѥ(-4%I1SURV2)rQ2TC$y!TE:&lRyAC1sڞQUQ:[NM!Gc @8S4e֎;j U%lH{^ed/I ɪ=:yLo?Ls ц`r6J @3F$y Dw1+F$$`e@BDHB4H4b(d ) JWQ4l Y {qRa&1 *Iȳ[fFD(窦l=1ư7FU6 c Bc,T@IAU*YD0sQ("]l{B|`DH q|" 5ORU`8iDLڤAPA#,ƨ6pMH4M4zUfvFD@Ցcf@fzW蹑"속 tFy? 0@c@U±Ox:¸QCXP!fIT5MS>| f\]E(Abp͞`0 "@7ͽwn*BDgx<y8pȑV 0-o/w͎V4=q[wz䑇.];yJ%ׯte=ssfR#Z_|۷ozQ%HXkgvUU%Pl{fp U35#Xu]༭Єazu]l L&W[`(zDw'hculA$ULBL<1P1YUjիW{yWe5 *A8VJ'D6#Y`@ֆgYuM 5vhA 2jX*+$65N6w&燓>Csd+@P CpX!I5vR ]rhl&-E2kLDI a0)m7&RYL/`ػ]kvyv@F "^߹V=T63@AAo6"i.`c#&c  ظgU7'1M8%q碊D\LFXP8IR5'^)DDQ,M3i*@E$,smrY1&W Y%k!16b0ED8s6Z~Q0x?jNiS] qU@VR4£ӅDE`ưFw)*6c FXaGDN+!05137:p"D[_"/U5_ %87m21iMCE9*DR "Z&= 1Cl(TUIM\|H 0GW"19ܰX1&=6tZ(D6$UAJ)/fm0!LD(6޼?nQ,"P"rC, W6"3d2qUU1]jϓ% j-ID&h[\\\HRH-pp0 q6&6!fn1tFι6R_`~>{܉'s  CDͽⷵ.r΋s.ucտgƘp|λ94k7/3Y~gD?_zwڵ .ʽ뉝WG}W޽:t:Wtҧ~W^?d 9C,X4X uIIQi&hwD3F% B%Z-Bi1X|BS|6u3bR&q-%݇97 U)?Irz'x4+Bh 1er7IPJz'@Du 3C,b4i{[.-26k-x8C\ovuuui)=~f\y{?y雷zsaKΊNVWPeUUդmQUaoG 1҇t^ݻW%p;}:L4U$Fws8uV/ɷ_y .*#J[~v;{zK_,,Ĩh,lmX-v?$ 8y߸A+>wFs:Gz#N|7|?%ĀBXl?vdMUfggG)ʹoܼ>u<|,Ɇu(rIszp jJk۩"̌ujA8BPEUhT5~aFuYPMXUko1*#JL&Q%p g@ )*2*vƁ LҒ E+Y>A=X* >O$lӺ[y>B f4K( !]D}U)9BRCk4ՒKCɜډ1.fKsK8@g.IډSk&<ƣqFo|p- r3{cN=a_xSf`9⛜g0f߆A]#h!>5ߤT% ~Zλx\QΦ[kc,Q;h㈍܋ƞHB"QU4z߶$6tY LRl@ɐ"ĵQJvlXH@,+IDS%{Z|@@hTM [}eC `*(*T*r*EU9IUb鍊(ZLu * UUPyADHq637ؼ50tͬD|PQdV$"A?+"ڳۂ85"f%XgnF/}GHú)؛z?+FbX)O^(py?$$ 3Af6)q!r19眜:/ұu}gfUU'4/zY8\|QdYb#1X uke`.a4.T5>.kzPu'ᤷJ \s.ԛ[o;}zuf8ܸvp3џ omfY_W^zwʊmusoz駏2//>|~qqn~G㐤5> +/_Ó'OV}浛 .=t/?/_6ǿps+`N˻GΝc_nz[P 3AqXWjHJؾ}f ݃.ޜI[y̏ςoƷ~[}w T*y'E9?㟺wlXZ=^zG}\;~P~[d׿1 ^x_;w.pP*WW1GZkgnx?s־[km;o{8,lU)>Ck#bs.}NUG]4%5O}_di2lG}wy6qpPfMRVC|v>s/N&> y]?ԥ0<4omR<U]$;yݽ}w~~_NJz\; Iԓ~?~gFJO8Ee3?>7k얖^|iu,o?qp{yܠc666<\K֏XM޾{'O p>=z($.~3Geh,iL=;;hvg"gp"FA uZXh;s;{G.JzwlnFa/Nfn]Ro %f`0NReP;MEI  Z@fGgwyUUjc:yDƘ _pHX4I؀# uN; LiSb 0ငjϾvIIbk@3R%)}ruifFDڋxKBS0mUU5,fÅ!A f itj,ܖt)[)ťh Xru1o_rWۯpcCUN_UM+'S7nܸ?˶=X7?{իWO/9}kzÙţ-8upwR~fnܹ>3S`nWN=QLBPő/ VAGhߛ,9ꫯO,8{_O5V%sI|$Ϭ?G^ϬԠϯ?sa P/ʝ;w𕯭:jBx_'_e=fuW_?2&zg֍rd;k⠾纘c޶u/[,{}M\9&I۝o42DF^ҖHmZHBh$ɀwoJ6gƩK"$ e]3Njwr}w&7hH3-}TsjUʪ,%ԫ1vɪ6-k]$ׄ{y+w<,ʲ i KZ|i S{xPHADmJSSj !hUUӪ>IUU!+e̤*ZI31ué2Ͳ#3V_Ũ`2kCMD?f:#+'I2ͮckz C,& CHSQ0I˦mOzy?L -iQì|ܭo>'S Dt UQ^LNN A&Nѥ +C1R(JW1iADhzrs ",D FmCU)C8;3kD^))LlB"eFR{28єlicP(P9G0BD?MUWCEeBD^5ma"5וga si1$YuU ιqUhʱWBC3pÃyW^gfsZ6Oi(Wd2<ܔ7_YY7߼rg㰞k'. ?+=oN\papL5ۛ_|8yz7<'Z  RZWN<"v>M5ƴȇs|lmlxV,YOzk_׾GW_}Q12[Kp8R~r08~\+7CQQݽruͧ>}4w߇E /p/~񋏜;q}wcرc.|q{{w_/A3gΜo_>,odWsݵ-Rpwpgu;}.Q 0C`0y:\W( YQȼ0;yd0y굲,>888t4yogHTRM&mNf:̝m%nH MjH1Zk\L1& bf1'uQqA* .'k3!1&jj 2ޟ̂đRR-S l%  uz{3#bYd\H\%6]Xη'1<\e+˯ڛƣg=r|oa`j ;-[U]ٹ2 $pX,ZNꬅZE]uڽNr9Y!>ȓU:~__|ocĉ?YtG,'f_tine!(^~#O?K^933͏nܸg<~N zܼk{< ֫?z17f=^ lݾylN XNgdl6-/R=qv|f~;;!A;VRԥu!H4~¤f}̳ kYFbw^+Bf:v\~n1 (wn۷]IH &EQ LtBVj[MPP5NS:KVdkiVI򕧎t׶CϚ}~Ν;/FįѧMp$"ٛ,,,d ZU8rD/\dz:U|ppPj/N9[c,䇣DL$T*hRZ-!e\fq5*[rmq\$xߛi)H[,&FF̎a>f?g>OjF07ͮM$8R?Ԑ  lӑxG `myQoCCA+Lq/"Q&ۘXP"d5ă !"bfB4H LB֨XӥATvΑ0"rb/F9"ٮ&i !6)B-&(^FGj>x_+ws{{`i5K8ͫ:{(ess?sٓ'Onigspxo;;-,vCؽsoտvVWWO{YbΩ<ǾrtMkzk Nc\eJl]{'ΝZsXOi  %\ @U/l ({իWoeA'Vb쎮\_[pqV$ 爨y]׮j --( F h`V̼\VGn~}ȇKz "Ou׻?휻£;N22ŨO=Ԛ;/?PU:$,< Yjc[s'f8wco|Gݗ^zV/| =,/K_\h@i']V(lfv~^֢z|<5׻#]Z[[h/LG8???R !%:B  ]l/7E]%y$Z[Yn3=If -zÃaIfUc_:ܺshE?˲ܩ‘6KK_ )ܽwx7NX$^vׯ_}ʿ.ʲx/sfyUWvuհ0o߽O|,{7\(kO?w&{W4IlߏoeɳoPxݻwc'̯گ Cqҹo'Wxd'c?o>>rt}]&#~HEQ}so~GO[h` po7^?z)7^{I?u֑3Cl}wvvA rﶅ sϷ?wJou;Efb 2Vqb,aBI`2^*%1Vrh;ףh4*G|/96+f4u$`sX&Jg@'[uU0Iƣu Mx0;L4"aonngt)!``!K*J\pq$Q H3j\SQTQ$0@Pa j,RlIeUa $"2ѵ A SPAF#^(R2Q Ŧ)0DBJ(ށ%aH!rd ĪhHL,(,~8y5,"b눸cll'*(Q$72BLļ6$U%Ĺm?>4t&M,kcltdf~x _-.*XbrR9.LyڴʀD/MP(ڈoEC`3e=9#bM60vJĨ{"MSV){oN噙*\וsPUU$w8cjɲl7 }~;;;׮]}kDdwwwts_y~K~tkooo/UuNg~{uev<[z.\X?BرcgڧwvvGô:_W^ec/N?6=z>j˲JrN'yy|5Lb^oǘN3Guo?[O=c*w>9T/-n8rl &'0sd{{rٟyr/QýyOgZ\ӿsk~~ӝgCg2/wW.^F,33UOԩSwؿ2skp8vv{΍ƈ5FŸ 7ܚY%!vbTC5y,YU!bICmrf… GN._޲ ~s3K׮]syNI2m=`9P1I5ySnv޽w] s/[ c!fk'~صk0zq;Z+$\ : %69DRXY̯Ibc'ڽ4M9$$IR^٥RDXD1F#U].]:4ukAU5RUHlfmvqaed̊r}iL$Sv.}?;PՕ0PDpͺ,Fk8I眣:gbR]t~o&=A0MR"V@vR*/&Uv6_K?~{V6@`4"_DF]4^ޯA6%_n"iWLS0Sј(~?iLI B@4q&Ԍ1{gADፂ$`"ZūM1""3G"wX| D3 h@ݳ,cTTQBUE@hkŨ# p[?):!E Pj5dkL!: c6i(XkPi>jFL0zD"@Q璺cJzQ` YcKHsWi@HQ5%T b՜6i=AH8fs4FDuMjQ#Q n;϶Q4ߟ5z(d2kQyCieilv:L"˯~cj?LR:ZSC휫LRD2H{H1}(!GxF޽$I)2;;ke{t;IJI`fJD#"C]UUibvvv0fffW[:9$I$IʺLӱټͶEZM~usp5k;>6n3ŵy۫,z!x<8[tX.7o_&FJ?'xD{Y坽z޽{wߺ{6e$mw]J3+G?;;;~HjvyᇺPg^'OJ'R3_#yΕi6fs gP~SGY >^|スu#sʏ]KaH31 4gbB/P ( Ѱ@ږwY|{k-}e8 I/*2*s^{e5\6ֆާꕢ+PW YŜRDBTų[GЏ$.N+? [lB&6>xptttBE3 <ϊKY˸?|LTl#)v| 3G"r\5*F#uV(X/dܰ^_Y)N}nJT䃥k#jafQ%/cFXGÇ xGȅ^v\1ݛƪ,NuvV7뺾7oew=lVJ|3j"QOӼ (FS&$n:4~f1+u ,ƸZ1U~- N'u)n8 >Z/EE_z?@ JD  *ħ@3 ykX=Ó'O}5dx:yzeĮT.Zgӣt>Rj(hhv@%f8Qς1USrJJDnM-&EdꝨEbj̀cm83C 3\v b)HA)"DcJOCɩh9Ǧ !&H!.-_ĩPrŴ(ꙈH[͇rdȘ4@#1[TfV65-SkZ4VX GLRö`(@dkK I(HԄM5`ڹ39uDDTO=<@D0zfnߛiX"r?!u܏®_~qJ|ZNG* h17`5je3!r@ o߹wvxeŴ ۛU><%f2/criiiQ6^3WC+''Psk<&~V˽^"Lu].:|׫ŲJV7Ktue{7W࠼wid85TUlPW&AM?b(]V|t|/EGֽ;W{ncgn?,^y3;;;A݇ս7/zܕڷ{;Dt|4>9b~[[>---cw [l2np87dټTﳮKdwD%*iLDk%"s Wnpxii8}饗Fӛ7?3u=^VVV"a/n@8.9&u 2^iF;I7[c yuMXiE9'Yr(NG!=ML jXB =$<&6.f#HyɫjEQ v=^ l|> ro6Ѷ.?;X[JWp:Opmkp:O o?^o8&vҤpsYTVӹÂ+3?3 +"kPiTz5v颩k7o|ܭ99Ozu{/Zr@"tigק%HT0:EpkmMZDHi]x&H#?=uY匁&*5s3=)%4s&" 1 j!vԢ*Ӛ"&s'0hcĔ',00Jl0Đ4 w"b@N R]ž! ͡E$DEIKM#b#ƷoR5W_97>F=:>,W]V7:W36+\TU5JmvWK L麢(z&SѢytŔ%F˲qU=nxMi-Dҟ1ɻ?>5³O=KthM @EfΨ+̀b[HHGb  2"o>|4|kڸ}u&=jlfU`ULF9UfE9i@sױ*J=Y[s%>:#rrاI] `Kح*̖`:.73nRocZw."M(E돫aF\نe;eIve{t3'>o'?r1{W:^O~ݿwR,p]b$ܺws{ CÓ?:?X,\'O4g}իBwk\x[|1'G,Z./xnk|Ve͔*Y>Lk-V`uyk2~%8/``w܀SUb DnД9Pa@ب8rM IyH~`Yw'w'L0ܩh#e1,B="d0{!RŪ93/DZ6K0̜p  Xs٢ZT]ވRG΋r4*e^U*&yIei!)3+*8$ji]9ѭg d8c0SB`(m2B4@2f1kbZ<;M6d*Odl`L1$d;2% 9u&(Q5LLĢꁓ y9dWYTALY r/9ZY]twcIPz!MHt~-XQ4R,rVz=$6ITUUkMPժCIt3q"jNdfFlQ-qec蓞9Ǫ!Z~vnŋ: 1GG lft|Lx9ȲL"f8 ~<wݸEx4_vΝ߿3.^Ho~w֭[[/_v}/Y-ug}o|__3_1DK9|P˲nKlfuY*׺K^$diCڰ5 uf8l>7ƌWzge٨2>F0$"^U+9Rr! Ij} p6N"cbluPt|QQfA+b4/Bl^]]~!Zۉ1¬B:W% ^ H)9W`yW&rA 15g䝎9+P&@CԾ /rnOXRRC"wA!9L!zҪ:l g1=S~|"2է5D[yR8]=ĥݕD63g_С3 *xA7L $#bP!iC2m$& 5!0)xƈ@THЩz73@,)4@ҵLmEUKŁzAMS;C,ahD;v1f6K)4H!@_4B0MҁrN5ES#$40K*s$ `޻!dQM2 H46D28 HcU 13 905D@A>9K9fQ@:XU1Y)8LR01K?|loGw?Vats>{Eԋ(( ~)rS?;=dܹtV_}[ox_]K=7ZɗY,h+e󝥢oJ|~?{wݗ_Ӿ ~ewwwbSPw_ =dȻF_h3.dI=pt2 O{/p}^:d{Wz{?k?_[0a)l9׵+:߹oݢz˗/_5;/߸|dP܎1V#;|tJag~k_9,YR'NI]u{s"B?oyj̯|'޼oNn΢Y>9Wƞ+B Fe1'f։o 22^7nfMT5!g^ܳۛQ]_]::::f ˫<{kfM˲DpEQn0-8zU @ "t"ҐDc]]/*! fh>m3 n?hWѕ zePDgOrFHuG/gY[jTR!%jRYC[&vbrWy:^o]*gݎ4sxG=2Sd02M9(Y^4ZQBjCD C2U"J$E`v œBU51Zk 齷E`HQħ ʌDbuQr(H)Esi,F($Z*9UU 4`54#0!"]1Yi1Ql1 Hjf 1- Lr:""[-AἪ:V$ಖ9脈@Lg݌-2F"BiYͶz4Ո ÔslJ3%|Y{nSbYq ` f%*@O}yOW) )E5ή[< h#4 f(j`nu ^\+1BC]l}9FFGst)norVWWP9@z"pFQtwθZcI/&`aqxjY0n7<,l> %,ܦ7V}[{7nܸJw2yX_:P{i6ŇJSOt}};ި6=zV>ޟTnzoί/@;877ſWr0]9ʳ7y͏/Y;,FEĽn/rVq{96,w&3@ر Mhfj!9{nF(\nD Z*(`vz+gr (1P0i˲'}6}"H**!wN?ٹvg'p岙Wߚ_w>"ʛ9_lАf: 7WYqYbJ4;YN{ F\lne 8Us/_^q+os/l^\,Ű?-uzۏ=-f|:Y'uϛJJ\lZD=DDӲq1 2N{UŠ-hInw"*."4 s !ueo$h9 @_;ST[!"DY+EΤp0pE&SqX3SOZ(P2 Ib&Q{gNc L4Udك !8D$v&Z sghx`gfh`YI$x b#DDG3PoefYs)`1 #:MHWS3CLL[G s$ QZ`%uwۚSKSQ@ #:|" fY5aRST!e1ԢO^IGz)Ԧ[%z*) a#UEL,ywj̷Mѿ_eg"D :V @4 \4ub!TQD""sO:[bIC1Ӫ @[IDQ Ό)G9urM`%2,l"yKۓ0XbjU \w.Y <3i\<HU"`T3<\4{h|;E4 "VGB&zyq>|qt:\LGG:_7ny;G+jk:"5"F%# h/b䴺\G^6/vw;XE+d.={W_b:wfQsbhf^1O7¼}zTs$kEyֻo~[2__UmF2ڋɆ/zu:9>{׮qx{|'O{Glܹܺs7og(>~y˜w~W_};>xp"G(\e*bmmpƍseb,eU3:拣 `mWc'Q%f y]Pup]B̋BYl1XeE4'6ccUufz^z~x?7Z"J3]cAUsXj邨yt hME8Y᱄9:ȢG{+x|A[n4j5MY,e^t0)Yd<1`7jn5$B4,3fp!*/ժv1F{Q$,#,zݦf(dEep$d2<-ze ̉'Ig/UHA Ri|~,>Y#"`gB$$DX>ʙD3? U<%B ( a "DGX AL=zlaVQS8Em_B"YҊF FL%23y'"(D0F_)UU #@4 ffbLq@n' zhiM30kcE,KTcvDQKM3FZULD ْ6 TU͘@Z (BP333lр)EJ8V!]hMҩcdiaIsj:zڬݴYdmLTS8)EΪ4-A&Xb[j%X:T RӲ Q[-aꝤahDĴGѤG1 @0MLOj1='3 ͔TI-\˲Yk h`Ԍ˝jPp1FU(BѹL93v(fHL!T$x*`!y "F#Q|QMfN~x\-[t06\֍qTsVOi6Ea{ 2j _{ͻ|ϼX,xjo`meim= !;: 4ֹh ?r7o~wߢJoh}glg?~o{{; a%߽sm1om^r= ~~|!>Lɘ⤮k*o}wr<}w>xg7^ݹ8ZZѭ[?<~,Nx<9 cS=̳k;jѢiY̲{k:gf'whyc:s_W򍞈4Fl6TRG<(-g'666`uG~ ?xNAw:!^=t&p]`$k6w(gj5E6,`k{r-ŤEtMKǜwew W|鋟裏~7%,*ojheSQ9*|^~(lnuѦӮ S~`P"Wy2b G%?ι<̋/,'G#څNtq}ccjIÇ[};zu56 E!"l3{dXN+Bz"9WGWbAMNQ`fr9ߔue˞@_. ;+Y&kk f@#D6DL VT h aي`: p6Foluf2 5Nׂ*д"Ek'%(*`4hL?I#2Hn< !09G)*665#ldt3h6%DU=K)6LL5"HUwjbhbNZC07D"$ODlH̢1clZؓXMF"<aHҜJ0;h λt~pPMk009DcO!"+`dWFnC|Sm 8'12 Y;P0hHNv("aL 3H pכ$ MҒt2(:vo )& IDkIE4TJc""T#D%jΐm{g ўR2H4UTCB% AK)1Fޝ瑜(_7n?K?)@,L(FLA6M/} @,#H1JYV.K\]\m-Up"Z(A5oDd_[]ْw.NCEn`h:`&@sO+WLd}k}f|>~Mc{wf`3Ww^}JQ7t2r-olKݐ\l+u#Kc9~xe~^<lo/7kA޿wxq}\l]On/7nsyf4q4̓eRo࿚>|9'o|?~fgٗ~ 6ݽhY]-˗7sHu::NU(B5=>EX]N沬PY(,Aolm,,˺(U:0̘ϊ#X^iH&{2́z{ED`xIQet@X [tj#,cHv+reⓠ9.G&3frッ=(,nl=ߝwY!կkh}{zpą{l B r'5*!rADV|>Bמy|{8nNXTEw^?2~s[fxQ5c,p?c C =I^yP˜멚Zfc>=k&P8ƅ <@fflAjMXGxVǾAD(98U60sFhեE` *Dc1#-̟R4US)aH_ wq p>=ҵs Ih"D$ `b)XU!}@ ̬"Svbf261QZS)KLDR)Dɍkf)n0@*0Q# B1EfE4pփUK3J5t(S]CT2""3 1Qh^6v4*-F,)XE]LYQ˞+TyL/eYf5O\}-ZFbHQEVk*FlAM@(zY&X̧MY" :4eMj 7Wn1lr܅b1Nw4.uA.J!9ΐ}y9y]]MG'[{Gi|:ZseVebtIaXL&Nt2ڽ|1F3w>ga0\|6&{[_/~3׿1>X\gl^~ڋDƸ(*u:efp"c}wo3*Օ^Zv>FhZPV}r_εY:ni*4osa]{/ƳOFW^m|}zz*>=oX^^;[^ܫs@1 'e]0XTs疛jݮΙsý޺ÉPe3;)>yR?޽'6Tѝ痹05% /b쭮t,N u,;>*dfjF֢8D}ڟɏS5f @CHĢ,O*1}Sl=5"Pjv"6`IF*h)v\L!3<Nxf,``j6Zf- ph[/p"j"g J.AAX @OIDQS]zLgAD$A@,H,K@kB=32L-NjWaU4IKR5ʬxr)fjJhiZ ff*CDILQ3:#Yx1K&ЪLS%$Us(biEb)R LT, QSZPH4Dh&""B4Bz?BHk H4Y UMI4vs國 E3k?RQaO +֠fsuYAddh$ @;PWM*C35uT<,]#{O<>x6-/{H])8 e#բi 33Ra=3#`Ó㣇˟ eYq>җ|n>?7xk= O~{y$uD1<[ϾRh?J޽wa]r|;կetQ_g~ag^߻G{h3 zRp^ɯfY1e<> 4˲&9lHc -у[\/\޿қGuݜLy}ynwA4&X̼ѳ"ɂFӸ2?~K]vG?w 䗖$6LbdqL!Vh#MSnK Qd2Yh8{ι4Bg>?ӥc|{;:ݻwP,VM\Tk@#U8$qI5r"Pl _,m?^UUG_mVUԈJBȴcQ;Nq%;t:N:=DX{jƸ@&̴s)n0/ˎ'D,$QKJB( 8 zPPQ4@c B)8$E0cRrQ {Z b@m y'`[}EΤsSS33✓$ 6rulI I~ȉXI,sDUk3Ziy›4DEd͌=]P 2#*SGH*HT HDĞSqx}G""Q@i3_ }'(oj` db JHDzg ѤXLH5k&.}_WBg*H HK"3|B§ŦLt6㗑4$FCb+Kw&%&D+ff>Da.VP%鏪"JIi䩑EE" ըld4#8öo 1AUhJl~LLXLQ1!9TN90u?kE."QTBԜ16̚1   ~*0(lM p2;ʚ!u9'Tf.l}vε7xZtUWbȂ2UZ':.Ω3gQ6p_^YF7ϽT ]Dz+lH y{XW?xrݕզ!<|`bf{ift;_qzt?tZ4t:h,t3H^x}i\ NQy5G议nwz+5~FU=[[[Oʛ?2䤪8_[[[Y۽rΖڄn\6,j̫׿|EQuHa<&";:ԥxy/f%___֭[09ϊئj;#Uciy;p'E1}x:r8t}8[TcU%~41d ռ^[[[UղQkSl|rAGZSaZgG+S͇w+Fx %f]~}?ǁ:/nW7Dz9W6MsmqX*~5uT7^o~'77R擑8PWݼu:`l^/EAιٸҫ:SiPD)e9tcX55A0`!"DX@!B'BQ`O T Ė(QMԓbHE$CKO_d(iCk#-`i};NhΕsH " Lm;!!$"E *ĉɋa4#fNh46iUUm`MUMp@`:+(ƴEcIҴT0S˜(lfIjU1MU9R{ ͐$p#me!NL!CDB.:Q5 ~GH)H %F*"=3jhIBݐ)JL%EҊ2!bij#9n?@@I)E@" ԏg>Hr IaFDl`0\ U&`* "((U1Y DK^ffUDCh. dyg$73M b|6zQHZ5:,4j!18HDUI92BAbXH#S9@KKai5wҗ56PR.\«{xkR7VW<g5+Cs.#"s"rճ1moV6wVݝ9 yxvօ)r/mVϧ=D>"@}w+n>~)M9._N%U;w;n߿`g.g4IU1Ωop1A@he;YDT?϶za_xIӼM#aoo_u+K/۹t:]'U Z<{K_Z=ݻwo=:9U0*!{Yɝ)nl^GWuygo訲EUUw媪`4ef Ba\1ppT` Y6̶+!街eY:Qw (yS .\gDl3 <8=ep 3,s*o,~ozWֺb6}pt;rmoosͨOǀeբ.E,LN# 7/نgIFbp[[я.6~ׯd}}࠹3c?h䦧/~gI3n4ʝ`0C4N3,K(Y4 c`^L(q&JQsZ.7_>~|_] u~SͶ^KNg^β,{>_f&sI7^4/߿|~b]oӭl6 'HQUX@D oMaEJ|gJoc>;?Zg766>{њO3>˺O&;s/?~NY@yY263:-B擛7oNnzw\17X^9RvCL(bx8.ܺEQtin:͍a=aęaU^9/LN '냐6 Pu:nJBAbȐ[+"iv&@(-Bt#EQPuFi.1Y:սF6 J`ygD1-< D%CDtLyv;<ZK%&j{_DhȃA o`!Fӊ 430 !%I+w@$ i%FR(\Cږ23mMRqalNE$>}j:h@[a_䬭~h"IhofiwXU= "$[dthihr҆ڱ9Drm(03#I "zϪ`-:cj29MSUJ[m+2!6JISn"Y铌ז@DvH>Tۚ1:IKv%n"1[J?PlZp,3C*"x# $tEfܼL bI8X&)FQnړɥW*2MD?URBrU?H2v"†`1+hIA-teVĦ( qR.3eUDb4G>`x iEi(f71UkGaJ+=Cxv$T5u̳z^ѱ(L^D|>;UEhzڠww3OgWt:jD5/˗G;~ei5qgIvϮu:,o\z3Fy ^}Un߇w?x4>m 򕭭>k=СߨcٸxYuG}|³E}4M; ByK3aDuΐPy*bTPיs> ykua69X'gOϝ;[ܿSISjoo꧞_,k,,' WѪýtW{/_͏V,;? baQ˚x8mImwV8Y^~ͻOFj.g# 9ϖR*qlj㴚ghel S!).S6 Q;˙Qe[[[ˇd>Lz=b64MBv+w֗+|_ͮ^ɨ,s݃ΐ|8Wwpcc:œ .7az;9vJ_~vt?qONNnAUFQ"z,FE=[K[ ;SWnBwkc{0xzx:<se%刽C(xfSQȲ.TR{D?:qZp6w Wf*g:(ҙ&3U#`>V*5d ʇzku ne$*g.m˽FCC<<3]ഞ e2شA@yju9+h,nb>k9@ >-+0s U!јQ IrQ$"LkacB4@ B'NT)Lլu0#"I@%ERW ϏĠL )B>5jι\ ,©xF`N2SS$h B"f.-DDX  \(S+>1wZ2ZI?A1f184KbRѧ{v֛rquM43Y$2A9Eӵ(2qyf,2>[] yĚA+E%UHi&T)l^S|sFsD|R-=*LHijɤiɑpy,˖8_^Z3rhꦬs@9Iy@uz7<i9vgՋ;7=cu^=Ͽw}ipas~en.+!5fs;&~w<>8^Y^^_[ ciHFQA3Cic%EQы7^f~~}F'6[W?c8`Veq&<Efz{ߜLxwyJU@4pLJf?8t\^1Xmf0>yD'u]E4-}<~]GqZZS1_F TsVe̚rlRUe3}`"@#4UPuM>[YG&z*6|pscs_/]T{7ߺu\E395"ޞd3Jqɺl~r̠?":oO}[MQvsڵK_Y? _G{w)4XG4؝}w666&{CD6[/'d4揇{ɓ'Y#^7MY? uyCziyޛvjY"p9bA箋d]jit]YmQ%5ڊrP,K~+_Y~K]nAaVDu\Y`HdTwA+:Q  BM?ɜj#ϤLf $9="+$BFMFP`[Z 2hfNCH f|& x$E4=(@4Y7옐BgLL L1M4H`@`IV-T$23)Cd3fFiV04pEJ:XQRҬ&FZ\`3-]"oQB-8eUcDQ$1Hƶ٣ f 9ʂF$LFJ*C@mY)6 $=MO$ %Y|ȖD&q ɞY.QB\:Ұ&CBQs |c$l]i F̩@`Dd` &f9Jdzf `m D5uzg,7L$k-,a6ACNU"J'1El*[h3Bt0SB0Cb"9Q4)9{ߔe]Lb@L04.9w 3Qut6NnHGEtBr$Jh2YZZ"s`*Y2?j4(L7 qYl}}}0/l_|pt2l,Su{o_DzW^}|g{y}oo 1SװwxsB YĐC\_KS?967H5YHȜ1ЙP^19Sc\Bk V#:P8UəX1 %d+TDSkB݈)E4,l&*HĊ31#&RFH"D6vVbnb;t0HE@5#1Ff"ə@3ABc9L0TAB&5"t HĔe@DOƧ%d I!̌Ul*ڇ!AHP3qLȡIP{:dq"zS%ɇKK-gc34֌([._D4@$s-Uf @$d$yVjZ ;Hl hm-14"5%>d*aHD)ƥd"P6Es ~QD͗i#B.7*%L3dGc9QU{cmoMQҪ2ߤ y h/3yh1n]Zdp 8ZU`6wwL"VL&a^bZcQ9En/QƂp2jrw/|ak;1v @4ẮÇ<"hUE7f异NW4M75ߍ1~twzgQ]%CDG5"QE$fceyHDk\mn]4t/]ҽ{4NC'źww77zkpN( X烼׫+d ,'o/_Eau]g(l^vX1cΣofeѲYn1ܔe/"hz2)7?YYy噫Df5gMa9iU1NS%S n4Tup"‚M)i4+.׾';~UU/r uYU4MSVBDӦbf #Z-'Q'\zfUCD9`d:ߨB_BNz%H&kim 'xߟj<.Z'T{UcdbZqLeB]ÓFueqb8XN{gn>e5HtʉB&YNu]6f!SE7\g+"%#"ضI ~B\x{u|ltsw>}q/w:Rh=):猄Ok!9ibҦhi6($ b@T,扒Y+ P[<#˲,[gsDx9UPYTFjzg=@z)Mu du9g1zxh;sk-=s= n#99Yp􃎦кy44$$}cAMT|TTŎ&e*{33L."-L .Eb@ڍV)..,njKF)Sm{J[^n!osRl#&V "8椦1e)),e`!i*u ("31@H$|tOVt*22scHӁMԹ#%%"vQ&kSZ;@Ch$]F@1 vSC $ "VZj'2ltHbĈ=REZ ̡{ |b6!fY)7$3$ M<ٟ9s%5#Ú(#XL1QEH4>ŶY2; "j д WW4gɳ q^=t6g O_{u#/ク.j<g{=:},˞,/_>xy׏of M~/lBm37q^䮉1;}47M=yOti/ʲ/ysŏ<{b @#oE]%z֘hwjj6;q9{s_N>|nU8_8[v2o'Eyo4Ԍ;"-5@idDO7WW䃳]? YYB-ySo +LKbh:c7n4ݹsg>8t1?Ym{sF^ztzA'K~I5˝0 mB̑@ch~\N&jR(r7n?◟={WG"yնjՊ-"61LQT YMohk;;;.ڶ]Mbż^gs fl(_d|֏~q9gNG/+?~|^^^~|AώoFj.M˿f7_r^.lcDjS 3fY7ue6lNeǽ̣lnjEzpǩ*L'|sѐ4|0ybQ]W^D\kォAop.LW%Fer3Pڠ_oOGǛ7o&Œ-LQc&mSjt Jр"(!qWnQE$30-Y O882#& d1PuTNwl= /|RzEFbi @B'CwC2"!$2y0ddi @5(ٖ)WWNW)esnZmtslmi ɈbY=DlڶZf69LF.2A~ɲzKLJeYB/ϲ |Z&敝K_^^^ٟ1jw\Nq6nI9/AsC ̲l#A@^mr9[`SKZޢz&SuHDʵ)Tg7F2iQ1_ziGz=o!{ lc3Xn "dM硕yh{dz=N_~wWެYv%6*Ed,|,4qڛ|S:op0,Y0J(m4d 5i%nUsѠ4cD 'U0mtˣ3@r(yPBcđ]ǨbFL芨#&tȉɈʉN`H?[)sbTe,) 1&7Ctu,jZh Rvю%*c|\n[xFɈ855@l/iK)ɚ f?T5c]^JE@̈Eڒ9uj wUe瓈.G3 0B::gS&5+@bD%"h)H J)yq#BtaD[IGW!]y"aS+CHU( X j (*8$MsnbζTc$NH~p:(*y lf|e"51 F,ځF$UYAMҊQCTq1&ҋ fF"J ̈]^۶KD*N 3*)hc9:R0|!N0r"A#8NJsh|>b"fEބ6Cql Y>@ֳd:wΫoz|dV({#fvO+#OE E< ;7w?zA1zܜ,N>rw[~?8<~K/K'f1>j>`o|d jm^><ĸ_.Ƿƚd8xppfwtgwtg=~|9\4MS˶m?>{4ʹ,~zw?| 5ιTDze1 v"Bِ>CebW&C.D!`fђDDjPq:VG36@p%" *I apذl6Ht؊1&Ѷ|C.),J IN"Zmz)Hm4ٯuO>}wx}Օ]/'~{Ԫdf٬f+zݨ/)eep|'4cukr ]~U8ܽĬ  GhO/カl=jyplF{t jv5zdmkQ;"4BT%V!3&9o _nTAQd1)gg7Ql3tC·rw_"q/?^;}\vtY]?|zr|xO>傳Xfl9MLYnf͓y{Tq9 Wy=US\6vα+l#p|޽Nx<$"pZџoԃ~UU YNBxǽz*V+*X{c3UE1Z0350N(h@D#Oޭ 1@80(jh՛wbm4]VCgQ$ D\jhdI$hfL #"] ͩ F5"LHTNDh F1sƍ ̐ɶ{l.t5HGx:S;@;kP2h 50T3R[o]:jgbl9С+MHȪFD-̜#`h̒gc"AK͌("804J &`&̔l L'F4jAX0eQ@; I&"!q<i!d4ԨiDp>LgH7a3=#"$抂1#C o"&/tJ |DPeUE2f^кx8km~4` )x]L t̬M͌ I̥45&mX#uߴ3HqeDt&L".Z<`A-" 2#!HPA$UcRfFrF/@)Zob\4tGQ]h7by[o;z[ mQԭ6 Q1^^"C^\~z~ȗ㻯y{߽ޏ.~_{cg)_)s99ɧ7n(|1BC&AcTTeVOOO?IiGl}4c6W8G4Ĉ-m$XHߠYLU*hpDVTYul[gFi3Fy8ݺu|޽x_7_M ;<p$1MYm"0r#ꪼ%" Ĭo4.z_YV̀z jр03KI2nԄ"ڭu̧M[/;qɊЭ9i_oN|V{푙p8szp`:>;L&g0lhZM|Y׫c_,˲JD9uʌ<#"`l*rzrZvGCCps.cڃd^Evz @oq?/<?7jtL(dwhZP5``i#gB@3CW 8m#bFB) E~FM[aRDtQiJ$}/6[:BL%c\`SJhPbAc}Fu]7hg/4p9r30sw 1"q> J)!b1C(R#MI`VǽHw!%1&Ib y^P8}uXŢ:D@3.Iۛ*0I*HSKZ9Dp9:U; b+ws_~M\@ѡK"uH|W٧mfnhIl=CojC@و(tVcDVbL%604_"*%*(*"cz- ϙi,33&5\7-}tLL@0RX2+HLEqґcȱ!ֈF̬`T"yAHHT lQD(>c&4s!u1=y/տMF>yk7^zFh\?_u<7iVm,29T^^-/})]l .]pg?4"׫?yǜgUhLjaY<uŘeY֨J^ܩjRQ6,N8pɦaq:/on?~p`{G_.lޮmCMZ6*ldiնlu &f:,K6\.qDDs.t5p"[_eYcm $9Auq{_Bomzl&g"tn6M""r-JZQDbvv0Ю6\1M2(ê pLqymy/oK[,<5f!V IDATȀXŵϖS>"b[b 2Q1cFj001Zi_a5*Nx1oVmdA4fK2Go봘,q,D%sH!1#BiefhJ^ @D,JމslE%LNTeD$$iztw[V4y1#h`&=! cWj4%h:e'rmg:҇KJDv4Idv>3Jw%"2խ²,Sfp1F4|:_f3+^0RҢcQ`GO fΠ`jJ]*kQ5p< Q@ȓn#Rikh@3٘$b8FQЭKQ5m_T9S5)(c Fu(1 d1$T08B2 9s63iD!rHS6}0YtPAR=9% LF21"6 (TY|WhD4dUɑ9<] FL;{]4&;gWWW1QW>6fq}Qbl\lf51^ۘY];ՀYp}}}>v:v٧pl4qȞ߿F9{w~<k|W|:3eY>}tӄw_ZhHuU4( ڬY7g]4E Yc -'0+hW15΅w_}C;y?teuΉV@ (5D47_}Bo^'e=N8čyβCSec\堌"Nxrv*("z/yZhtpS^]]jXy~i ru!z.fiڋ n(#ʺd؟MSO7o?>mɭt}?"OwFBި*cpmڨD w,ʑ9hDZ U(cԴ٥Cpl2L{' h,UQ! 9ti^65$wGZ+5"P bP"-03lK__Խ$G8Q-RR-%ZF, bBA%?Q$cBAQӹ :a (7AX5]T2C.ӁȀ^ iÀ)ۓatODNԅ]s3ILqH݁I0BEV5(E`PQL;3)s蜇DCB´IѨ*^_B4H5Jd:K.萢ńSU%uΑiR@bKŦ ZV$3$vMC@=JZG0Sb !0#ִ3!B5PhFgfIM(x5*2MЊދa11ObcDDI!iy\AM0b8&ںNgg\F>{pJ-?}M9WlaSՌ{{{ (˅sn׻jdc:>>լm[ HYf@mk䂙u^!`kAU]6P̛ԪjP8cUŶmЩVu@ňf!8s1c/~v9q3 $w'jJdY&1D!T^`=0rTX U9T>Y H"Yٶ5dTy丹"Y?Ͻy(1r_]]ɼL&zU}|v0sN$(yHD'9gJYVdkpyk:hfe`}7mp(]gWF,[{_*7n˼G3rj47lyuun|^m9ڻqo<f)xYPʌݽ^{|q~Sv?6_?d~'7}g_Ʒ X,CJjR8|C)8$qC 'huݐjP?pϓF uvQc0FsGvf (Uè]8;19dVPIHߚ 2ޫAC$WV bj=ĻdP#SD 'Dd^0U+.qhEkHP) Ay=$$4?(MT3tf%DA[I1DSQL:Ci[,҉(Ni 1UL ؑ1 4!RnʴCD@Hҝ .^G^5XAQ'.65a9 qJ;|aIm/3":hd S Z*̢XBt"yĮߙDbY$@#PL.1ۋZ Ȑpy;Z}S 5B2Oc b۴ι)F2_ ef@4J3!'д8M"reӠݲi>3KgYZ[V')-wvv^_VYh!Ht6MjY/ћoy5Tgvho/߼yX]̮zggg~뽯7v~e_fyKUDQψ*ˋr{6;G| q\O.O{WirQhe@@1+dE!:393:kWdw/9ֳ9\owgtx<ٳ\+ ~GNOO~;~74g?6A|>0-(/r8?>wJw}Sܭ/ŇG*q۶-JUbe1FI۶nr;Bİ,؅R8`նMӏ-23E:kj޳s._u] ]~fD\>TFUetvyYfͰU@Ik@$f#`,9f8qgOoX18 + ̐/8l:f;=ݯ0fXt墓q윘V3ShK 踜-#N蕈F82!^B3Um\dS&Y[EVDdFQx??>{yNڱ߸r1)L ;=fgTޯ_|߸>O>?=β̟ _k p= #0~ XE#)n(I|Ffxj'wZV@DNEPⰻu;)=1!&di""CC"J%@DO\83% .9%4t697"pAj`ZR̨-P83V,Z՘aQ9f v7gǧ¶7Wg҆p0h94Tc,">prppk;;j6X=eLMӖe\ yQv{ 5E߇<Ъx|McS5}`5z MhJUuUəs"&` 3R 0TgY@,deq\<~^{G*UUs{>jlf>1TK"y &bw32icBe"$C r=jzmCTCCǜ_mBs;A A\ #wDĮ0)#ڒ唐~,ѽ@U#U%QZ3s-lj@ܪHAhk"Ui4f#9#b@mchcu0DDT"pAͺ^AbވۯR%3gϟ+U[~K2U ,HlBi7n"'~ݻw͛7[naq~jY=zAE=#AQH-uez_~,O~eWz??zlV N/חeQ8PF(Z 397"نI<"@Bؽ^ݺugEd1YAX˓ӗ&հ=q+?eťv4 ;U \7?wެdPG7x>_gzk~C&ok}XtXc!nq!g1vPs讎=e+{{0pJ ZR ع!"uԙCŜ(PjEBDIb [6kmXZ^+cG2@3IHLfs,B3mӶ%蜓hde+&Eh(==JĖ/Df%; hjً*P;NʒMNU 6b„VUPF10O{U 8k7&UN {3nU5Zfb@4/ރUQBc>u1WnpA$!5љc"8 ,GTɴ(U萩u~dthTeY2`a[Ϟd"#׊r^$zg"5,}^/#?]=?߁ݼ9xq F* )S4q" -"fN3E H]۲mC\ᶬلQ :f!$\,M11]:1u""R bfn`̼ =-fF=l§!F$$k?%bZU$CXKHdNĪZ.˃b+|ݏ>}sܼ߼ypԂ>y^岹f^=j8 6}/z,..NF;;?Ϗnŧ:i&K3mk+Y?wpW,~5͢,0~~yӵM ry>z/rwmf|l6Yy+4!Y굉DP'%ePXR֪Y24 t:.eޣJBk.ɯO[[?z>fƽn܍/~w/o;Gg{\m?sуm '?ݿ?~}v/ w'͗ONNbS9Μs[ϟ?_L81TbD`k2Bڶe7M eUvGM{X:99)x|م'ZyfB(BZl Rs.ӝqPNϞ_|N㦉=,Y-)JH l{KF0Bj69yb)acIv>ܘ0W̲6aC۶UEeVzO߻O&503{K[fڌmw ( T>oۖKZ@9m̬N|_~+l|5뚧?4CD CPQ&$"s<=^Fu"lcY13yW˵xx8˃`g6_GO㼸c[nk;2# YHh"sٳA>k=eK z"=*xHÈ)HBAdƁH<{*/ WZO>L~fqc{Gc*3э-.뉢CYQӆl3_ԹG-~K/_?͢2weu~ex&\__{Y˙"ۇGY&wݻ/tB{|>Kw~Çwkԥ>9}9kMXY =># ism2UP رZ zRIX,O-g/W_;>ZVW$*A-ԧUXֵr*_g޽;Ϟ=[7T/?^vp㥋oov|6XܯW5|ǿvwww^y{{0HD-$>#drJa0^{x~~>c]?ٷYY%6iY$si<%/I|bͺ646>( cV&"uҏ1n%F瓧.w.g 3?_>\]jIldJcw,\{]MN/mrELEtW4M32h K7fiS\3B921s08ɘw~gUڝ~vg"m^QB"YQPUfc,Յ:~UhR{n/U|81[/ iݓG;U7ʲ\[KGO>xg>o܁p (3*SЀ!e [iȑi1y@f""H ~4|årZE4Cb4DTBjdGY!dvPA@ 5 `I֑M55Ym%= 1wSU4 Ie;uB\D3bWk D|Z$r< +EF["([t `.m(]>Ȩ+˄vˍ[6hlɬc>-@(3&؅[Z43Hj$ " ܶQ iVbȨȦV)Z0#fM#&|;\,t4a w$]gŎVBy88p%2sLVr :fS+E% L"ޙQn?-^R3E.NS0e!"V@-=A۶4i{IqZqQ ȉHQ* ;V6c$љyPV Cpٓнs|>3eU q*ru+___%wsA5xyG/?Omcd^Fp-"{i⠢^qgOe;ؽ[Ӈ5no8Q/%8y/9`ιk@ob! ([B`b9XHFy\e.۫eYƈXC?JQxb?Fw^}U_|^mjceyB2.AY93 ke QCV!åUu;l+/ZC W=^~駗jR2-]]PUh- 6UzrvU?Y CoMmJX} )BbTVoG3QPS0TG y!Q~G;EQ{w^z{oeIuM"b\r5sF C\!ObXBʗ˥Ae((Ft#Tj%U&E>33EOptt4G;$ZjjDœe4*1sM3)~?l6kW9::7///'|魗~~ƴM_]>MN׃9Gpg|y5weD_6@ Y2 "B;#! P"`no:  IU@bDöH..4DL*Ʈ+r)梪((bH#nK@H>u w$]m)CuIL $")D4c%gfUS{r]@ QDIU-י(`b,l=5EQ3B D!D$m.9 Q ML]Bm27sR zI /`d&EvY^jhq*9KhI 3#L_K,6J`if˷G)(;P@"'7E>2HɈ"B~3bUAz] jT0W$ZQh1풲\˜G h&Uub&9:9GC 1 BBafmA$ah,S ؑ%8Pe̱A$e ]42U@ mz~㕻O&+fwOe)ϲxyrfM *yUnD"pܘUQVw///?pk4_ݺV h@@n8O/'?;o?39?k-6Gi`jfѧ;G;Cko'<?;?Y3Ыʖt!'0cVfndRH@j26tH mF|a駮ZHq2LRxWÇߤvބ'Aly]y,@+pۯ=~hWfe[>hׂf)nTr4//ʽ;o <|:,ή~zO/ByD^赆LT ++,FseYM EqK]f7^ttޅY%B\z_u*˲ePUU×xoΝ;m,+B0,[QdfjIFRe!|U |I#MHfε-Tr\2c/s>{7iwwwW0F;;;{\4",Ebpxns{6k>.~tMؔe)ִC7uej*T\麾^N|Q|TUp؎9_f>/YH$.ZRks"qp@I۬0~݅!m?sPU\4LO7~?'Փ9RϾoy!|(5=Q<cx|~ = \ ȚY4MqŘD 5I1_*/4R8R :G23yDR/1RLҟy$% 9D0D"H@ 0C]GRq$lkRSHAqQU!}t(jw"uU1팇F]RB5xBDfYU"2"`%Tjf@b/f] C9J0ZHc(hkvR E.1 m?SԶ K1^ (Ё9 03@E{$A#bצ߃n ƋKDJ16̬ M^> 4DB$K*!Җ]7%.*C3ob{Qmֱ&) b (^\v{46E6cn$J"lu[@i@DKo qyLS`@IM#M:<<~eeuʱy9Ui\<[.o{ ű-V"#cۺiQk@(w꺞N?st |>_zaVqHf0S4-}GIe[ۨ*|yv>Loٹ@\ C||q>7O?|B.c,c6%J4)TɩjtT1GM#*VUSq˸s@<_l痏fʊbRb|螫vwvaUt`eY!?}^zOOl`/]wMQ_g?xB@vMլlI RmXDցg3ZD, l(@gȡ(sr;/z޽{F{"v㣣WU+ U+W!ڄ ׫bqdٳ>*bia"c`*1=8-%1:<ٳوo͔t5vՄj, }1pMDԪey 8(c.M:[FĬœ-jYi[{ ˲=}(?o5faAs ޕ+lK\/Zw֟|/6Ms;(_Ƚ ] $u*1jZCލ63DžHڢc= .ISF{*#tJ$JKH*;n`fBIIT7D$Umۢ(BND@\\B_+3D5 1RS!"Sa"Ho$ߥ^HK$˧\ ,63 "B|Et>}MZdՔOҽL Ϣ1PN- 6@D$A҃ۧ*,mlBʤ/>ϟcZEi nC`(?=twt5c8&O30D92$=)Z1HA0a]]J("j ELgd`*"iv#h'v$?[> tY:m[b@y3m&)TAHBhcAYՂG_uՍ_߸wlwwX7:gOQ=Iaq?ɓWg/W|}ZemM]i+Қ3hc-:ش6YUÝ^i$r2`~i7O?_ٷCǟz>ãչ>:Ofr6of_U+Ba0,N,is;2mόb(QVf7Ե~w{tyӭ h^zd`kjKODl6[<y~_O&Jsf6j^8"BЖe4 챏9Wyny|̼<{o}&2T5jƹD樯9t0oznC;1jMY(Μ姠o]}M6X8&:LLMQ)` QR|\U`Qb@ӵ"}7%F E y!MU3H9TfC$'_XG-Sc&[wƥ}:)="B u$B Ε" !$cTU$CPU(*C1$3Q*ޛ"tJAg!992R %Vr$'PD!) H@UMΧ4@Oi HJ@KH?!T%3UQ4Rӗ5DT }p!SH@CRLMHR3 >]1""fvWU:BD\Ou]Q@ ӴTac(ue)ƐEfyDAo(ОN꺆{ׯ.͛v2Np8tUeU ,qmѽOc^7l𾸶uigÃ'ܾ1sWmʺSUg:t gM}pc<썖VVݝ>9'Sml\xz=]#8Q*. asO뺆5;R?k׮]*7y sgy؂n,n/ݻ;UU<5Mk=7Ż O~xiO3%fqmC<9믩zя TA7`-?8?hKU#94T L(k!PLwyI4 ʠ , mǑft1ȹ#8%ۤ1#`f21ZKIHkHfMgg$a=Si>٥[{$ݧ#׌KWkDʫ@T7IU5yIP ӁƔؠ.IT#DM-ZC|]^sBv, huRy!3 ͡@@)k Q&`)) G" n331j=6FJ'1xĝd"ncIL1PDHlFOa\u=$̢E$cjǨc|KڡP3`j#ưf RSpZ= 77e3W]a< ~F1b+2 4.\E+v7w/^X2?<ݻWxƭWFsy||{_Un [*gI,BnˣHTAU.jyrxr-cj%Ve;? !S_jnekR7t@q摙se TZ@{h_+f<7ׯ9" 4*55$qb@jE i:yyW1D i%ue0PXC~.ޣ1Ƅ5M6Ef'*z"+& =RŠtNc´GR#LT QDdj[u=O!b$SJHHD rLUDp&{L.&DNU9y]zj "RS =eYa(2DS5⤣rS0ԋ TU kmFޠs*xc`9ItΥWg>48+!S| _@0pXFI]phfiqf&f>!5 hޣe`fĜ#ub"@$"4'"U@U)D%4\@&lݭ 1 p`qΡj J4m#@&!ː\^A,UJ]KDPHD 0$veA:3wO{ 0 E.f91c2؆Ph++ݾ?9K7nd[.#_d7f~,L/>//}?2ݻwyfeYևo}Ͻ~'~ڵuݞL9d=>sr6]7U[fEFW/n6bū8hraݹquqgڥ|,OveD^cKk}A$0c^ZГ,Aȶ6ڶu~gkdYvΦb!Fu[[h4Z,=ˎW7`pZ+ţ{W[L@9wB5YgND ^KUŌ,s.mo)33|t{߯fX7\^Xܳ'ѣl2ҥKo7Mc_EVU :dtl\ZzO(*}h6Q`Z#nf캎0!8Ev̊bM#hdtsjF!"QD.CԦdM$<2bgUX&\@b.- LDED]^U"3w%EW Q^ Uڵ+{`iaA[麮KMk4hh'2Q8/0єdATLi;d?9N DUT;35D!M7TtQM*B޹SWK+! \YBig 9bH>,I5ӢEsܓR>{G0@h{kӔ9$$Q L hQcꛍ(1-SFW5]'+B׭Hbbj k ,J䕸dkgh ;Ф:<BⒺW,?*d4a!;,Hj꜓2,j L!B)EH䣪a +TC)aRJy-D3K^"AD@$9%2,{!A-Mb̒Eg0uYE t{nCKk:$b,& JD 21'Y2RcEq"{%ܝ÷&ӝOߞ:G~ͫn9xrr!|Ňq&_+^?rJDϙu]ߟ~噫jE`m Q^?|U mF8]=9|lrlueu9F8ϻzucLg0WnCˈcˁKnm^8K>߾ZU[0[<覓ݻwW"<~;Fyexv̬ 3IrV/)"UUΰ-aUU;*.zcg5+(twpp@"M4O3c Bʉ}'q.o"EƩVI+]t M$*UQp+u f>v]ڈ+bHdjj@N! 3AgтSPa͠2βjy],_qo360=+60'۶mAEE\C1*X&"ժ,#u2#3u]e׎`K_]:G'x_g#bEQ[cD9a}s|%˲ Tt-"nZS<>>5!c]׭9YֶZn ; C4fB^nmg^s ~I'uF2' IqJ3. ָҠSȹCȪlW}YXz!OFQF; qӔu,U-BDKH'3Kh@LjiiF(8$Ե±8;c)hQL&C"9u= cBD_H!*%% 1͌Ҽh'sTY b}(3[Z0%; Zڨa΃A$w6eBHCO:&5R*8Bu ! 8R3K3o>a,v0-@5}15xM%O]j\_Ua)VekQ~?&QI(}C)*̠^Cp1*PSu 3j$+H@ .1BL(6 5ou6W0Tsf{hrlνÈle?>!*bSPq&,hLMM붉1{Rg^r7lGe韻Iwt0޶?>,FZGˁ=ۮ<}]9QU\؛ryï믿Ͽe٩/>J{zO/WXn6abי@>tC\IokDkTA[7bYpޒڎۂfw._f@DN../ʶ)|F9N .=ˋŵcO{^ﴩ9=yp˥WtQU fVGD3Ҭ:~s׾˥z?{X̖2rf"-0FY<[nmnVp8x?'ߢ(aa&]堠j.ά[B DUsm-.yEj@sgNs.2BhY LJQsnlDeYMk,CS<$ogF`Ccn8mmmz["?e4kz] ǤcY+' (‚LU"[jppl;"lVQ&m,Zh"46OlR FZ}"DAhH⺭ qh13!f0`RY$"ȤoL,&I3nb¨dsX9 XJ O?"Bh*1fYf"KEwb&"jb U'tT^O& "[.)g.`{V̵kעpcyvv軸=:;;C/ǺZ6ɷYY_*L.?9b<..nۨlv~ x :,8N;_̫,:}Z'göm=O?_׮\/^3 JԸʭV> #k8[xR5$1l* ɢ+JXR F(IVrjo1FiT&Rs+կqב4$ &#l@T҉fr\8ς(' EfN0@2sID@3K{YERE:呦uX3 3K,%usUD@bHɭoh,_'A0Kq s(`j53BHDK5E!F,r.%aUM[CJ{7u0z :YJ'(ʈ/E G !)$b:UXcA{4L D*"]DD ك Qc4cU2TTך%]EF]gfb Bz´aQUYkjmkf4 ̢QKmƀKok1ˍ1&D_޽dRo &ι!elr\v˥<xU*n0.(NWw^/%iÇ~{kkö֦l.!A$<PX8+EVROmlcg|'_w6?ڿ[onî:hZEu+E0ܞ2ƅ+}'ꫯuP6hڏYGrpC"rvMdu:b ȅ[^~ ~Oo߾}n8.Q,nv0 *6y!|Z%g$puo;ZV'<ލ1)="P^R|*m<؉H/ @8r1KP{?cR06p&Yfffh;Xgf9n<^^2b8F^߹z[[[ժ:vynb\Z7;o4k6=iq&jvJl6Wo9su ecBUzy`3KM_MuvdxeA㽷\̬,r)XQP*xRbҴAU#>n+[۶/}s.Tg`tRcr p?4-w.RI3KQUPИaDHG$"$lFjDĦ* T;1~I0[jj3y4$RyIX!=UOƻ}uLO.ն13)º]- R颪2:S(:2 Z)慈Bf&*؛j&.k^ jטhHHxJ^73ψA#"hR̀@3<"D41I,1k_%ckМjL1XpiNNnZxt?|o|xx|;wl_ptɵk6U}ûx}y0>ԶWa6"))2Hۅk:ol;9%䵟r '>櫏~QTU% lk#`L m;;O䝟q۷NGͭQlѬ rTL'D4`.bШhs/6?3 ?y~h2;>g(z<fw9ifE$Ռ<3E$`k.B=y9+Y`=PV\Be0 :RX"!gDlsc?== y/;q7xT~o@4Ba7xj&ɪNHAD7̘~4y/*PA2q!![ȅbpko> qS/Εo=δQ=[6u UnK\(Aacf9pж[[[ȊAf9|/EX,FPw#7pj܊h WEiG !UUlsM\$Ʈ( Nz^27QꅩGoC E/q5yȻ<\7f(fUYelQ%+ ;w]Vo7˓l6{< _:// {s~_vojt:-fYv'"Q'ND^s'~͟~vww7ſw,utE(Tvv!>vA%BS0i>DdfF(jfQ.CFj&Qb,LgPLĔEެY9Ī*q_cfbf$k!"CЧuEeDkfccc7_w>w/V4_L&}< H~5캮˙͏Ӧi>`˻_^uϜܽVgユ^;??k]ۛ;} vx<\2ryݓq6?AD42T?kc}KU"ez q+_\ozyts.뼙|6r. 4uj9,jՌ[}5!=i gD1fYV9 %?.T5&˲:ka1Sy̼..z={m[W Y/ԵH]#b,8v%˲¸~YE!fUE#2iô6C9 XUU̺ ]=]{?8|xW^y׮N79OD 1'goamM' p8rh$.ݪ!q IEPمy :BTG,;4SAt;玓T> "IG@Đ D`"H90Hq55_[R위:v@r!FvhD] 1"1Gl2*F&4\S:elI6ՠ&Gaj?tDA` 5D DBff(E!y* % $Q >gkA5LHgm#A]'4S6CD$)bQQASJzLCɇ.;Dh ĤDRI*3X($ZcA(,H  +,4%Y4ѱhGD`$k @!q9Vh6+r#TR5+(DM[YٮWL#e@5r{1BJ7ƵiKtgg]a0(/=s`uC"jgܗ׻;rуdÍ!S]qus?^,9p /'qkkkژъҬ (6AEE!ݕ׿߻_ob5Q}N F_ÿ6w7go˟kV?31kk6v6&ɇoyln|IIN'_riL@m(] Urڨ,2rZZ[6gӾ Uؙ^߸zOo^]yUl>GSYa RdG GY\.b#9Hn^*<_ nEK6#"0*sJ[%Ȋ G4eWLL+T==,b ֮myꥪ2=zH?w3F]Uv mgfFWzU>'+6Ytû?ʷpILJ Ҧ.FF՝Ys(ΎZq*maWUzؗ-feBf;˵7Fy[UUi mQwѲuhZ!0Q4YKۃypxeՙGGVmooHr$"okm6둩sQ #b3jZ$"G3QH%grJGDUmۮĀxXSެcBcE̼=elqb1VgѠ !Tq8>S/[ fn\2D -zUeOl4 VŢPӉdecJ_.NG?xoKKӿFm$M29H崞*ĞV[d+9Bo *k"Q:MD"BY\!5UUTşLO !s$ѺL-U%\4@5bmf֨+Dt1vLDbR5ea;zuoCs *;BV9Dc2OcbƘ\ f)ckNDT)$äR۪7UM嫠FL031C@D<t@S,shjЭ rQCF33bQ>?ԱR<֏<*cru!APaf:D$a DP ,%JXT% $L h Q4ZDNA3N>4eTPĀL@X"2;`fe!0 :%Z4q㬈DhO$ -Jt.%ϊrڙPG'̞L{pA^o>owyg;.\plb /^ꭟu~Çgq?Sbfϼpt)Mq58R/FE~uq=eudƼW-f;,~xvޑo.]/n~z<;N{ݽsa^ מ{䓃ۮpreQmXTUA-ƾ*8]r/c"S> mדax^6R'Gu$/wvʭD\;n̐hiX4rUxnFqhe1pC1XPU+tWufȮk[β|XzW񭛣/U"r4?]eMNj*|9UKIӯ_}bUvɓcUZLul%24g=lfeCn^w]?Mb{Q8hls97vшb݂#g\33[18Z:u1bF~Y2TF( 3/"<⴪* meţٻXy^~=z޲<֕uG"A\5 zmݺ!Hͬm{2<'mى[6O`8T2bEaZ*[llld eU^4M\FVDPusە}?}8=y콯WEo4ӇeŧVRu$8JSqnvΝKJI1Ui@l(,J1fȀd ̄"Ҷ})) ORfjM1S1!e9 . #̌ 5h]a3j0[֫,sfRغ|ͪED9 ʢ3޹7=A_ZW?xp: l j<"2?x2)4:z$g^U7ѷ-8ꍿs[@/޼~e#7ۗ^|oゟ4s|K { ϻʍ'|)f1[AV!vpU]Dư=L{<ⱗ/8'T5cz{Gq/Ogܢ^zic8O?/Y?Om߯H2Ёy,@M{y1Jcr jdN۶s *ml7wNڲ;%ei[GŊbԦ .GE7N9Z9@.I#xO[?ݳb4!4{&zvv'O.pa|巷9''6g]GKݩW]#V硧c8Ոy_ag~12RX;Btgcgs8 Ooe6Ɵ+ѣGof/ږ4AXIyỠ"C$c۬HAm1_]߸q#7DW\>,0.E90 @]VU̢bTl۪iql8!J I\X |!,gX析 ulZ9GEEQ(3VY&n:N.e‚ضm{oVVxR-;E_}+r P2$ԢO?~K UC" `@j MЙBn1ӑB9'"Fh.JSQҋq7wm;@./^'aUO7 s/++woqFz=|o?6ċ/]Ǐ?sݾ>|,W|ѻwwW f <#YP{럻V?ݻw!bDdQ@se(":oe4~\^lz-߻rvҥ+ KQu=/dχEvv(20;sLJ۟]VrmӓU9_,cd9VwyΙٻ*!h}w6;:9L6V[\F캌 tr /zYshլFUÏ4e=@GNhiu eYmQ^ "} lo}U[[mmo[ڝm:Ȭ\\>kspg` &'9/MrZq=Sg6ͥs=|?|d8琴^-2<#"9y1撏Fv:cc$ n<{g'iݐXT(}4䔙\e>{4VU iJ_Z֖Z:_$A[xC&}69*sj$:vp<'-HST.ˌ\¥&i31@KQSCιsb*KD;0(cTg fiC;h]̌T4U4*ιySkoVp8R+n.lQ/Vo<7:v cY޹KY~Jo裏L37CsVUrDϚy>=KӲ lta;Ldd!x-(|}sn:ӝd9??>᧷?y7۷]jT;]3鯿olpٟu#mo=yuwTvndG1nlҭv!=N7whҭʜZ"R2Ū@wqOA 1͋''}⢵dKo^ϫx8 F*N"J\,@Un!tӕ,ȟA۶d?m9!gEu61kQ/)7p:s-2gwpS-1V'6#Qjh*^,\ig } dhg!_a!|>"Fij*A(rtsnn.mf J].^wppP *κч߸wӢh+EseUu\.:"D7eꖙAzs^ Ln3k!v *wjb܁I,|̎gK:}噇Aw cspYFWu;AE5mNn 7yă l6|QXMoZ+f4rrau`I'uݣ,6rN9Z5ɵvG͓_Wևݞj%"vZ3F$ESԴqn}G;\;|gYB`"TSS@A*7[3Ah("{USRJDVܚA12{HPjPE0 9Wefbhk)*1ۚahTլө+HLJVD֏D01 9*;03Pbt!J8!OӶ%FIvJ TTJ " SH D`署?iwTPdg,s^dRbj5ÌTK0 k`S3(Ձu!G程{ `,`1! Fv V<2q lV([NxcrL3쌕"t*MރcCBdԈ46LB]Ras6@]ԐM9z ;UG23-Z̙NcD~< +wnE]'W7vO~yս{<]0<<ڶ=ϼL{p>9{O߻};gz7Oe={_㸚^Z# ŕ6] V+z:i.tjD@_ ?g~sʆx{޽{C {|tcgwyq:'?zwvK/~Sy|N>ssۏ6]BŤG9 U,5 g1LN_/ ~?]k[[a~o'蘹Gnn6>|Mr\Gs.èa6b:fxΪOONO^~y89O@ ,󭝺[1E̺< 6luzY]է^{̦ A}಩DBglE /]lf1[¬ a6gg\@/"*Ve#9yʟ埾_W+).e L1:TR=?h%]V?s߈0QUV1 ޢӒ4e3Fb "Ȩ>`fOT dGBI0MD"`DԔMU%;oReNq'8tkCj\/r,!Q:֨(0|"䍀vPvnHV;,J:IU3D&pFx1YC," rI!"ijEc1"@LNCKÍ"$KzE``4"dMqDD<8ӈ= ތ  ""53SFiPDsN" ;GVbR D"h"rI#C$T:B "&i4VT -3r44 :b4dg^48@ow! -*#!9 ? :N㜫I:2Tvc jk1 @T!Oιh4vP[3p8,Rnf "jnfX~O|[xᅫ;zb&,şrʣ>y9aG8YlxoOOoX,?CC^Έb*燺?z>|(5⅍œ'O>9yJ gtr33;::Ó7/ i->=8/}… G'_{ŋ?}{6~"@0j;}avCo[.j w޽>(kqu4o;m+硄`fq6WUs<+ a8+H}Ρ{ΰey+7 '''#rvVϦB8fB蹡snJiDzK|gG "ogs_+;/=GD}lG-Tgc\M/./ha{݃k^{_'4M7|G?haGJ8i?Ou "\|7VUUV0?~|ԀNyho\ux)Sv'=Bl]p7q"փyO>E BEdԯ^ڗxw.쎳iW\4 UcQ7F[_L^(W; 5cU9AuTE(b`0m!RQ{rw?8<<,5+68Zi?Gǃ_ܭ%LI˥Oѕ-. &6K.^y"٣gT]4f8;C, 3iDdWL&s#@x-W.~}{cp௽eך?[,y_<:99)uDGl4V0+ r97az! eFXm,˶]Hƫ3WGvl<p8 Gq[xwbK+|oEۢ[YuA떋cUXfeyתfxAi? /n[>0WW\ #&|omUa7óуoQYa#)mw~UYo.g%Z/(rQ8[-vrOcDyUVsfS( GeN YPCj"8^-q1~6-X8֥/f ƌ efZ޷\.#VQC`^ע3Tq1d*LJ"ϑSzγ:]􎟺BL ѐ c6KARƄT9@4IK Lh1ԧjQR*#@EHj8a֓Ԁ2 ϙYO՟ie}9<ϻoge־tuW3ӳ0$($M Ҷd)p~C/aYiBM iK)齫֬ܗw{9pSWugVoH7?z`˺x`jPXْ3B Vi͟z["y"En&J%-S+m4WOOOkQll3?&l6ky5UUUNfQ,JךZﰜU6Dbm]ǵZBZkYj4ꢠ,qFR9BsA(ҶЍյh=3G~Q'O2*lKSL}R~?wάRجoxJ Zz~F8b3luɸcYorFd4.Դ(JlE:IgZXosf.; @ 1VibәYʟ"ղf[c Ь83F,0Hȭ,$a5zg V̋ N($ Ł ^ gA‹̈un=):O*ifH30y3;2 9ú$N}@B'3eXsPJXZ&/h-+k-BD!J{f…(gUs / &萊uR%A彀!@̄=3," ^X#gFb$Gr9!<${;Ϡ1xx*xV8@ VAF,yZ-t΃sN (qJk m(%axa aϞEEX 3yH%xֈjgZlI%hw7f4RAs{ι )lm)%IM.~Ǐ'7ןW9)fY{>^ZZ&~lor:.m77Eaq1֑mFJ'TL^}NGF'ݨszx㲘:Tjgu׾kW._3͇x7xi` pvkn2to~|zkt>'~t|td`es|d2YUC'7~ks֞vY7yoUy^208GtfZ&NNوwPW;=t{I[z ɼ)8NPV4dVu]d^aivhQ܈*êguqz:W))Yjt j>޷[5;)փ$>9W-_;wݕf*iV!*Vi+{X0I2y mӃ___->yi h,͆Ex"6;kw?yl"fw+I(Ӝq&l'WyAZ˲Q_N k'oxHGsQ1uIÏ2zhZ*ji+.i1%jFVqyR?uKYeOW{'ET[ua/#y===Eȹ"vcqV( Fx{w(Yk$ܢ"yr;~գ$蚳+(إZV6ni:gq\vZ֟sƖ+><}aِoH*+gPZwU֝k4ɹ (8/aQ3 Af*JF!1Q`cféV/N"~{ .ʞ!k " HzD "Eر ]H3@"Yoq읏uBRJ,[>vnmR݉knsh47o>row w'ׯ_O?Up7C_FNDuVonC7ljtv6:<[tjZv뻣տ[{_템g?O߿R^~}l|9~޹sk}Xk$7[tۏ&IQgT/kE25.͓iɮԊfYgeMy/Qn/F׿fw|TL3N(OٳZ.1K}㤘'pw[qn-.{́(Z="eYZᥥdBVk G͵[Ҍg`{VⲌ(0]^3k:iH\Yql6ϲ" |u&TC 1ӴhIߙv:8=~~ TUAqMP;ih.kD\5E/ͭzEAOMaxLǃ㢔8́(Z뾁Ods~y6U3T*5ҫiee1q^N\}}ə=ݙՉd^RWzG\~{_(`oo@S^DQ2 ʴ[owzepSc2:A95Z#:^|>֖`Ոj 1~[jv1Lw35i'm3,E~ਞeUV'a5oߞ?m{~ó%{:+:P)ЪUCy@Qlz7o>j6I^C]wRoդWcK/Nlx\WΕu3C,*QjIV 7˭n?~|}_zZʓpx`0_ziiiI+XHP@:A0GG5x`a.Dk a([)B "ADU?g\RlCkD\h,L)ιP[oZ/cUV 33yD^e~[1HV$03RιPπ||uP!yDp{m(fLLRN@4=30s؝(DŮʻNZu1kQ +ÔB$AZQ0/\$"5]i,0G>=i<UݝanxEGBsed3k+#5d> x gsm RF)+eH#|>LC7(ڹ,ˊwogzOTjM3`5Ff<C, 'v-uUfՆֺ/YI6ߠkԍ+7F߿ơK؏ui}:[v'.sKRN彗jc{8_z;'o[YQ]~@DϿoV+KJ4K\HNKZ<ﴧ"X.b@a65͉8pkgt96Ӈgy:BEQ5SMj9[@U^-TNVjk-Thٱc{C)Tye{iM]ZVxz[qEA[#kQilOa>oj$xq}>єn.1^y{'FUv*5d:1E朎ef+DT*nՒԑ:u7>QpƅMz\U\eYF#@,#[AҖAިj4"]?G?la|Ff[nmȟN Q9@yVhQT !"jA($SJs &?CC(ck n`ZXYiAv~_@1 "M" z"PTWxft""(GYZ DV!$4Zqȋp +X@Pdfp~EP\T+y6b ", ڇȢ9g""E<{`ыbUH da4Q2gE ^Ї.9MD8c"Ҩs^{Bn_|{ _ w {^"RN.f | ѱzZ +kYNwF'_~}*~zW~,{7NJ QU@3b32'WtMϖ/Ͽ|;_w0/??]oܸx_N+wΛxlh5j>>9F|>+eT/Eʴ*ed7y"Һ~8;;̜3\~{EɴZ^^{?*{gFg2rlVTl>G냫7o>~Q/^˲l%i?"G:v{{cm6:$J$ HVzpKZ_|>wEA|ޱf霣Vy`W^n\)ܹtԫ[Wwvv9>ͳTFYreY "j4[1x!:9-+o?MM\ v0fcD"67yQ֭HёME6P.~/cH+E·o][:88غxuuuKi(̈́y'G{Y%dMxvtXUGqVJp8Z4QǏnܯ?ձfma3!K$9$J+z\N۵qh05Y咸ʺYos^Iұc9[eY5.gd`DA4UD^EKQGuO;Z=kݻ,D`*{UlGȈ5[LڂTvf6~v3z݌*g'g/W~}[eR4:Y)%=q+Ged!Mnf'iRIG7IԊW7wGQW m ^+ւ"(/C / y>ġ@/ QR &"`+^H-D<0JȪpvbhtTR[$ f`P~uJ)ED4i€2A'""v_ 'AH84c"_@} "jbK#D6ѫ01GZKWcLӑa HIwH-6F"Œ B&@ :g1zP1ds  . %%6P."aƱ,A9)RȀ,`hJ;!j "J0V≈,aC=Y , `R{\4 Ђҋ)LB';$SX E  e,.abxJk-+BEZ+ΫyTNV dTJY[>{N W{}8v$PH4Ȩ^5n^eU$Ir{;On߾/ᣇE6 ռz޾qnO\>ۿ/BSM^W2xϦIVVn~*;ųf__% ;>zt_I|_/^X~zd5I+L4se|>/RvkkSޕvTJZ7WZ[?~*z`;+y9g=ټy~|_p81%pcRf3i9ްDEx8>lٵ/ >GǙu-IuyҠqzz*Gg9ۭM&Uf|6F,q#Ҏ2]om,Qs8Vql[Khn-mQ7WNyx2V_|>~??+go=;sr:D͜p Cm$<_n_F _ݽKwj<ޜ8ߔjC^wkFJ%_ygϞ\Y] ƴ8޹sQQyWUItxrR4=TbI˻j`!m4iצ֚V~|ҙ,lGx|abn77SlL|rXSèrIi@D ;ellv򍄑/WUUyDƓVNV$ɞ[&=yGV׾*Z.<*ZoPUՙ &|(Mw6'SY\RUgd[ JtXp8o5Vp|||hJG1:'V [Ɏ@x>./nO׾{m+Nf4KK^"^~T4'-V&Q5}-Pt6z>L`܅NcNy/myrqBDCH,p+`%T(uvH#0FFCچ0C 0{U8A@WjAH{D{$ !^(ÙKhGD!wU"̀Ɓ%<8BT (xqrA! 9O>!WnydJÚt#!,H=;RXX b"ш( '[ FQ+tJ\q W 38"̌ $͊E! >&Ddo  {iD "a$+D$QAGEPU,D`t 57!fVڟ[w @18 ӄ`$bjA0Y/]̄1{(fWyVڄp 1"MyŅ\ WR9faENPk핱ւ ?({o&c 5)MUFڥK*W_ZZ VWW5&GVuQi]GDu͕6˗>w6TO|[W_IzojV/cLQTd4nxf{{gԭo\Rf!"} U̇y~ӞO_SG?|o`&ÿWoxN^Z{?yVQoFM|r6N۝ͭzpΥibn$ei/Oؕ ۍ81HJ|O{w\~ҥs?}}zvw]h4Ny=VJMUDDKtii{V<\4t|UMs[7?>8/k~]}Xőwv洪y5x"Qjڦ7R|(VIYk磗^zuՓN?ٿ{Hmq~-DOgmDx"&ccO&3t^/i%em^[7/A/"m[$Yi\~s 36GK&Qk:?+ө/R*n@1FI@jmE jU6&yE".}CMc0ҦRn/ZkJ|V17-}gZV] F @$%rnP;a^%Ey@^{A1BP{{OJ1 Ƽ_<=pB!m6 ] ,#:$ J5ITDc4"d!jQl/eRKk-XM:"^@ *(1@ Gf^`œwLĨ-7XDТ8 a$@E;8C`y‚HJaBBD@Š8gu_Zܛ<. *q!@I8Աg@D|>g檪UZ-iԵ"^p|\m *%ezk眦y!A3H2y^F2pE.1ll^DkIZݍv7n`]?8xȧ_y "N5DT~hhk#E*˿?2ou{K7neI~_7ZolFg~=+~q6TJ읞gi^_kk ( Iojq秧iRAn-5~xwG+ͥ|Qz} `γlw}]+#$Pđ:7y6ԓV 6:<-3i<\H瞓"Oval63yzebU ιڮή~d/nzm׿>N.w8b1ʇɳ4MռpzvvFvI{_Zˑ) #jt#H$NZq]y;&5QQĈ8Oq-zjf^7m|x=MM՛W;Zpl|>窨x&q^?g~_wujs`g;fOW7Woף8Pd(B zb9iTD40Rl%%|x;Jug qp:(C"'boE9p_$6`Q ^iapHH^<^5Qd !R@ .ŅD9ʇ[2ip054}p%P؂Rc8σԺ  aB4"$"/E Xh! 0z{8LCT0IDН㝷88o >7EDPlၙYȁ Kp)|X? EQ;!"ezECw+<)sxcXaR yE`X1Rl#x" @~y"v h\XcOYQ("Ѧ<;f5_pn1("Z-KL'{[nF$(bfPąDDh|k8>>^;zKڱU <2Ȣ4&4J[kuL/";<>Oٓ{?Zaydoo+^jj'|q 4T?\z#˽f>L_ [ݫɷǾޟgN^y+GFϮ]vwjϽ`mm)=9gRkmj>.ԒvZ),ц_m%EL$"HV}G+/p0ai5EAE="4'ҝ~Yk|uu$I̍ZUUW^5M3 $Iꃕ8/`V.Bx{4M13J1rS6m@< ?U9fք7!@2x:NiRs<'Ϧ?9>-'o_\hw;ˏz +ͥN|cI)ell2|6C?M)__^_^{뵵]Zճgk?JG޺vÝO+gǣ,/]o\]mO+o&Q|ܞ޼.Mx0h c<?;a4 CfX>:o_~oκ1xvo_ywテqTD, Y Я7񚵶o|?{#v, ltZ\VrfIL+o1KaWi<գج%gggpǼrtmmP=>4iOܨ筸& |D ѷ l.-N(frwU#DcLfi:<˝g$IyqDsZ^c'MOlzzz?dYs&SFQĹG|,DTF3sTxuCqUU%IbYIH{vQtYu…Z*&o*6t;$IJ+WPD13Tӧs88I4eb&$n5KILH/f7_n^QnT`::qC崎K٬ü,+9xF?<ZҭH=F*^*+"2d"H5Zk("0Ixx ֋xugA:.Ί9]ZZƟ+W޼(7vujz$㳏3<4M{{{Ӄ,OFq㻿?öi2V@j"h(a7:g_Oyab9Q{EspEiA>U@;ffO q.LxD$r((!cQ !GDqH@Bj bqHAD!cB Xp8QJ; [0j*p@ĞăMD 8Ϡ^aZQ@8P:H)MJ<+̌F#";z>9E VCBa3xd@"r" I퀙 Vd92( +HB FL&:g (!eBjI *ı"}Ck 6Ta>K'YuP۪+fȄ9Ut~39խ^~1VٳguNT!FTF15|ϏvAߞNyn{ݶiz˛?Iԯ/o^0UKw?kQsCW+ (Vݷs$Iwou;i^"_?GKi^Zͬ(x.xyÓWTv6.ln\uWJs\V,g÷^kr1h_`c˲ll6*ya#54" Xb"I6䱉*[9""6Љ`TQڨJeV%<{w JjZjFq Q~jinӊcTs+c媪\=}qz-}sxd?~,99ckig-Mq^Wj .}Tn=,2*k(f6խxɋFV.w:ڶD-6f3qFj:K՗_ֺ`0{lT;MTL]ۘM޷OJ3lGԊ }ở>┍(o4)G>l\YN߸t7w?9y:~'M즓*.e(J҃6aӠI9/DIJ'!"EܪDDT0҆C,R,~?00c5r|BFZ;u(. CK+U$BNEue(:2ie*)rT;H,,%真3g^|Ck=8G6Gv^'ŻhDY/}_o/5&9i-,"*D!ZP.08\EֲNk> D<A<}A :xbd$D.Ŀ u(DXEk q%EA$,3(T́G6 "}"0H<0*pGah*AfQT+m!c\07*GzD9f-\N` WFk0ӼA@DfQEySk 8l8 0f@r02A%DB9_*| ZM{/âVfs923VAY0…RR&d\2ι&R8%V(G 8q R^wRfAyQdm)"YZE4mcLDDD$Q1h+8fm<'O5N^l6qM$qމ" B'f%h]2,}6]yѽ4y+e7W5֏=e.&J[sdӟ4^hDw3}^g}Jڔ8βt8;{7~ڗ#}o֊J /~ÿtP]^]^z,qDtv?̯7[[:k+++2Fٴ* bUI#tgF-ϞV8LR/Y:˽eס7n /kx0[KKK{IlnnrfAltMl2ɲYyKZW H)ϊ9k.&AD+#E~E`jTJ1)3FQd " Fy`UTQ\bJGSDuVmoolZ^Eugm>Zk0Ƥ5FhҭK$IV(vӵʬ8MGOKc4x*'0<2zsjJ4@AMZ|>n6Fo}ޒ6 ;ݓ0q+MjjF(yӺL촖27֝>dRj &{Z Na[k7/z/nz9?:ٙ7^;qۿ+[ɟVpAVK[EkHG1A$ 2޳RE8I9k)t B<"K@  ,&P޳ADA!:G|]"#֑^S!t0РB12޳"%cȢ#<LZYoeAʤ@ A^!aˤ{YPJ1[DQ1,,RC eCh PvE)㜋($O@,8BBD3ƄaHtlP `ED nQhh{/*"F>""A.ΖHT>KQEHZQMGQh -!Q'd"h ,Rc5K]V<<}:  U. Y>kF,:/}Çeۋ+{p_Z[v>__qK7?NW_~LO>gJaٿt… zqX;gE&zV޻)W:ڳgϠ2U\L%xAYg4Z-V}_Dr^"wyӛ7orw3,_0s啗ztZ;~ك>eOlaOIɜg^Wvso]:q;C^qfo3dK53j˃vs[qZZ71 vڕ(( ;*+ӳgGɄ0uifa6ؼC|zzɨ,g'ONN`$n  UJW^ofMRtz*h@QI=*Z*$UF+ zX4֖;"z@Jn&drޗBĈ8ҤÅ xU*u%XCEZbSY8I\EY);ku [IH(EK.0OR o\4hD?{۝02)c U 砪8&hTZtDdcc<~;{ZB@0QC@Dbm fe^eUq:ı1&&>EP\ʲd o",EHD@H`T6+fdMrΝfIT8[Z Tg优HVRjKK[OOSңh.OYg$}{ ';޵_~1hfs۝AQ̠k>w8V]1uǟN\ri3ݻoK/-//?QzoooO&g{φU}vZN-mnVEJ8r_7׶}X &XʺcgK/tt|rhgEQj0kVrT]bgI||l9̶Q䓃rqڹ8\^^~i'I"=dfi[Wny'Ǐ'gϞ=p} :ҍ^kZ-ٸ<635ݪf'=ճ(^])Z[m\vE˃BDJ(iVJ\k}!Zkŧιy|>7 MSPN=#eΪ9;f4e,">BcLgVcZIL 4ci<ϥVϬUs:"th4cEq>cVݮ⽏E2L&KVpl%hT)[g-XΑCDro^mlllǃIl4{_LO3&%y('kEݺ֝ψQ5Jyw9lva\in.UU59i:6(76.y,qYL3y4O+tO+KN漜-ezM w୓a>G䬎K*ed?헚J1,.?ʇ""K+k+?{GO~zzzуI;w?-=:cd:Y\8=/,QAmG:c5[Gw gFEf)Dv=8@Pi@`hGFBr («5g8,&`w ǘZBFJk&ڐs.!;BEX, lSֲe%%af<(PHF  ,`5!s!DB 58DL Uh!"!LL# F`س(@D/Hޱ(}\~;+ *T+fFE!Z{a!!&B k0y&@rж 4 8&_;O E%`_۔ $*! ^S* :Xe( QPT qnbfH"PZ*wtTc8VB*Z̗Q-", s*{1eNC#YQnF H@EQ+*؇ bz:N|FީZ#ltv _~ᑴkIAn~nZ/8mIDATͿ*N:چ+\D8G"/=~<:~v2=,<=u_Y}+|_-ol^Eco 4~k_ݻ^ij;-Z{&\NZXw5rl6S$V׻ZAMji7yިu0=v\EK986Պ^K/ngY prDk]FkՆB57qvqyt4{=1'g?kϏۿeN#VV(JA >/8 0KX":\i֚9<#Lv(,P8E?_# "-REQ:ċCD,(_Ur:" WOju`Qh.B B!& l.J)"AGI$,!K!6?7xB Y@hR:X{h vQ^Xh11)%HxrŖ֋-,8z=|kٲx@}qAa}4i{FB @0 z" Xـ\ S@: I ELD9ÓAP3{MD(P^VH!1@BBkHh$RQ\raR#彏щֺw,6Rh"x ~8r?"" {F8i"E4EdΞ<|\,_tzttQQB2L5]#YWH 0h:>5QvkO&i /jnyw~o|'c;_^5af_"=^%x AO|[?xWUlt7 ^%~dNsQoWJUbZi Ohoo8MS?Zk,3[+KƶȩPaX: e@^c&ayfֽhpIruÎJSDVՒgϮqess9>8_ՏR~kHzKCuJZl*n'|wFZƍ{Gnʍ/o]/=w…7ɓG.yfXJ;-F/6.i(@ G4oċ%E $$SJ;KOF>t:Jݾ%g3q'NS$~TVݬ]7J6:=jן{Ƨg[j1͊"F:3 ~jAGĴvA餷kcYqhTs=K[q%I;\|irݲ2γ]E4UU٧Oo~;GeT g1YX8_rQ[9KܢRDk (_2CUJ_, @Ev+J" 29UHZ EH(򖈼 M"aLQQU=_BDE>5!H '"7Q&SQH8t09=! ^HDC"AHxrDdP133]Hlf̳Vy l,DBG{ XxuXȈJ)XX*B9!@ /S2"AS)+DT"?=>992ϴMwQJYxA.mpZ1TRkR{ɞQ<(|HKàQ*l[kQ"hQ~?|]Xݺyc$IP+4Fb[MCV1\3\FEj[E9J8s"P?}WER;15>>z}o|cid4q(esP"z7 #3PD. ǧon= kkko$ISIǧifƣ#QQ<ʞ͕ZQ>X~m2Q35bqt2~ovvvc:_*Yu\{$,d% ;&q  f&@#g&2.SWV-J4e-מJ괽rsÛ:Dt9sa2Lҍ@?~'N믿^%*g?PJYQ?t}Օ;o\Wō7|sާ/_~RGG,k;ZR^ziY W\QQi3^@4((Z)OЀ]ΕjFh T:^^^^nvm,bOgQdLz}/=t:n.]'"oǍ۟~ +/Fg=qd-<_UCf5"Uy6V}Yz/sY#T/בWiU[nhccּlxx'MSOYSZhp25z۫şk[їP'UՒ$Jth͜+D#R!E$# . ~ B&ćXH`]#)pC&0,4rk 3k'( h΅ޫ^6H렪x{@xD^DGEp\i/γ .0X1 6a|>aB"MHfQF9$J;AQx`YGF)i= 3c@9Lrrk !9=0} ]9"b "QSNi] ذ,H'̉E)dXYPb@$Cwfb 69; *DH$_" p d` ߨ@ VN|H{5pwݒZaqoڟqC JVGD#,rXUUu"D ՒX?`$p !VU`tZllTi1የR@fUج_},מ68s)O[0XŅiؓV zq('aS1S[jƝf2vܭ;R;׶0J,--mm䣏>zfht'_o}gsnJӢ(G]YO`0P3^6ikrNyvµ+׷60ۻKcc:Qֈfŗ_t2kιE}{lo,;$tx`ϜJV8k?Z?ɣ7n<?wj}~6 F͔2?LV)KuW{~cM2眳1)cO$h1Ӎ^j}iڴQ]^O~bwng3|$lf=WU%s#2N;+:9o ޔɲ@ A 5E9JTk8̬FKkAߺ{ZvkFjuM[ W@̪J>spnx(u+[}<It:ݮz|ǻc6JBD2U!ϰc^"Ɔ-:t1y1FT!EW`Ni rΔ"!=dkh4x7'.d!΃jş|~|Ͽ_;8w|n۽x\;މէF2~!W.Wݸ_zXG`MѠХ 'n=)H klYSM:I ,F_Xxβlm}Tmg]{vׯvˡD kqCGUvޕV_iFqƳU]AO/Փ볇"PԾE' NMD :'~ys~e+K+ON^9u/]+++` 빥Xc[.Ql.>yރX3EqyeCDLU8(2Y`v=~ , m+hD5"FVReff*z,wPwT eY!]Q+LDLX03N9~ 'fFh`9 q b Hdb$2?;QLR%ej, AMeN@DK\0罊CK-@TLGMaj8FeblmSw,@EPMIB+ιDAJ~f# ) S@ScC@ꥵD33pF``Ly654G2ТDbM (1@۶̡vD0s4#FW֥[U3sc= |5CQ5"U`DyUP%ڒ.}Ξ:{ ˋ"t:BhfB f3D.cp" @/cKe"bm0N٨蔪Z H.Ug(L$3/^9::Z[]9{ѽ{O]pRu`˻e)˼,'^^t,WιK0OB}p8~TUUݍ7j|~ڤG .•lyYDmP+PM:yv$/UUY:٩a=|fՕL&V4@,U5 9i{߈%F~3'[7k3 ݩ7=؜j>(в(q,+Ŷm _ h2DZ͞X=ЅNx8Uu]ﵝI^NiYY7xVFKKK+Fi..UU ]^r|cf8 m`zcc#wpjA '.>uԩ3fG`o:9zM36|#nw=}p6Gf}j/E [~ 6CNnAH^8ĂN2O1DΩXTK?*B$"$")3"91XB|qJK:7CB 0!$; %LʭY$Οa JDs1ѧ);sfx`~B.p8 M)"LB~؞#L4mZ0&@<79 :D-*38%N`GjѭOgG[OWWwɝ6߹ҙ5L:JvEOLOdY7N,ozSN|߻Vo}`ᓻϝ;q}? +ӿWBo[~?_wlllکş[w@ v{rݯ/©'ϟo'[W.,nf{;O~po[;}3wr {[7\:ģGqCx޼gt&3Oϳ,@(E`pZQURS-&3?nXbO:'FDo_`0'. NeyV0AbԱEf;[̝H˲&YGdRάuA^_.?`7ޭdvk/cG7x7s'"Rm[UKٻ~_g~qo7VÃE9UYmE!i>#5$sQ 獡O3vl5&9%AM!Hlမ,2C4cdx]Q٘'k"ƈ̜o53`TS#f:?)5k*fȜb cUrQ5cB^tTE1j2@b"2 @znK) " @[&I~lTI5AD8I$""@SUHƏ y 6I=Yь̢ϕP%P!Tc,1!Aȑ%ۊ$:$@Tmc rWR9`le ) .@$OfTӰDM |"PvCL"GGwݲ, L9%{ŜDiQ'N o}n>'>ҙXꚁ`uMs77u] "1WUͲl8D 6Otl'˲.sGJLHY9k&vqssriI{g)e۪Ij"eؙt;zR^|k+6`=ی1tpu?Z{ai4u7Xz{݅wo_ۿn%|Cȋ1V6o\/_yDrxq3W;%9|葟ޝX}>k0qQEu}gNKWL0s8lv'G{ZܩSܽ؛^}+On\LvS=Ay:4qNrjQHy9ӆJm'9 "7fFGfE},V8,q%qWs,k: 2blRSf3$lȂH*ٷCR;qehXfL\lj!54.2)(7Sm[Jp~|bG١u%N{om?jB`ZɺY{O[ʇ7upk^:n/BxIteS_徫Tuzk W}8\laMɤI|,+ Ni"xwAh5.xU jt9Nj~G1nb sm~+B0CH8$sMy*̈j#c%(Ju_=eij`{q[R-\c}a_zkb]_yr})nV:޽r^s1~tyomq1@$  lAs Q(d31Xϋ>* @^ScĶ@|*T=Df^Ėg,ڠ]E(0˲`MK5G/;?ޭΞ<`d;͚Q;ĻlVT&" 3G֢(Π5Jee#k;O|CR넾.˲ƒ`ι.1kլq jL|rя "&t^Ft((DE#c@f8I@m,MDO.m!x`Sh$ziBВbvm))_2&hhBTT9UHr)M¶9TPU%jJtpYLDѥ]K p!Qs4F6O6 gNRԿ<::&n`v, 0A%8ǺVIp!($=CDrQvs.SH\T & ֲc9 n)&=U3C LL!U#d" Ԕ AXUC0O d|>"ΪZUE$hucfm40\1܆__k`UY۶|{?q1Fhccһ*-;3g8I5/Ƭ}уg[~rݣۯ!bǁd=*n~щriik&Xr($O@- BTe"u9l&9#OIo̺@:3\%l[T,5\4Ie\PK$:sy ɨuˋeVf4ԍ:Q*Jf<|B+"Tf!{O 1~JULu}]jAac*[o|g^DZm-aafcM~/ݫUWK\t;NGj|mS… ꫯ~V9^Alqf9tnCYa7tڴ4A%@DAtʉKE֭[l8^5mDTȌض,9 f:cDF"{Nr1`r1e @ji<ϗPyfozmnȌ~q=3~'GDN3v0p^0BuQu]w;ag?y0---=~JY1n16:fn(Z`mk͘y*9GPmCp^l"@YeX Mců;_Y:Ju V  2**DF0C 0DFt̡)LƈDC#SsT""16F @LP%8ﰝW'Y| ` %F@Ԛe1(,U bqa$N :&P[q5]Jز Q\Z͠!aFh3 RDf fhƙ8sQ 00"OkA 1&"$SUDDɩ:A,rt3#'(#%(D+]$2QjIJFT@L}JȄQ@ђ; 0EMs88¤v%o RZ)JD1`DdQt |ad]v+wWDd]U{{OnsP=[?⟚ED Qߕu]7m[U&Op0}H^̹HD*j, ӆGn6W.]]޿׶mLZKKeVnS' ]ifhx8;,O o޺w޸uh5]F?mnC'Ac2޷9eR¶}6LrwڽSN-fk@e6pLJ#9:Aod 6Խu^.۟)G677怈aEU9;oN/\9g\?xpNNWpacm˗/\}fh1ZC{^֓"k+e.^`wvdyyŸ;wܗe {>8jO_޹[xh`{v8F;qYvǣ~یڪ!{WU5cfvXDZCpcR+wMhfִ}։hAklU\ 1GD,˂w)(f9[^/ϻѬ:ꕲ aZ y46Cs`^ Cu LbD X1bSޡza{߾s ;;;իNVfރ;owg?~{ttttp~ͻBQ(BH^/݇]p =7=0,>z磧O.˻wÇTU?Oo Z׭@tqlQ]o ": ҙ?~|_ֵC>8|ޞ̙Vmy3W,'ݮUU2kq\8r D3sA(Zl۶p}3rǞk0 U@ a!"ѵ֐9fR@nmUyʞ:)sWߟ=#A傈@ޣ !1ıͫjk/  2: p""-z3s9 cf6q*f"]i6.*bneŽe]9!3i/%B@6 u:Us=jt8C fp̭.6gç\ kGǽ3ڞ]?QJc}x8bbJeyTgwE$e:^ECU'TE+̦өe1FQ YL@щCJYDsŅ/L7m+UϪ^pAdUmɗb2˲̅TuBXQuM2Qc*$uX-JDe$|1X>{ŧ>33i=+JN;5 b?@vM]Y[PFEfbdLC15s ?whBBD(Ƙ *.2s YjF ЪƅVc39$(J:TT)c5hh+i:"#&̄(~1IseK98Vb0&rI3P BQIlL9]d*B"I(y7J˜jLp]qI9`/"Hyl:nR_hʕ"bLPj:Qq=@$%ЊgVKNZ}PyfآĦy65#&fvR&Tɱ%SFPS h-1 X4XI:"C$=fDD% yd:i﬿?ǫ+:k3/mllt/Mze?|gReSwK;*~|/˗~ёn LA߿/b8p~2s+'O^x&www<&^ye}|qԹi%;crɘaygyt:eL>\{׽i͛zo<7nnSgN>7NgÝ,`MۖMcQFtP Z9^d>  Bqb 3_pu 풄PxEF"0YSE cjL- ƂЁ9*d Y_X5&pNsBLiƈ31:JY -Srh[9kls Fe>æ"b;D!"n%[3k#sEY;YJhHH1z@Hs&YX:w&4> ?/fMwaVQ(ܑ0ig,p QTcLGd垉̉JIMc~CƖH8&ƃAD3A3Aa8Mfeo9h` yA٥63h|Lcs$%ySh8b8( 8F,"Ȉ(E@Ӥpf0N+85ш0KֈЄnՠDQ\ d.N0$R%IV(Y 8U|A2ss3L 11UQ̡"͔E9əh$D0R&9$ep$NE@y>(7\"W2 i& i)|}ѣGIuw8?sfh<W$smfX~׻Kpگ+/"yC|=N--Z\Tƭ7q{KupFaOgk߸yhe+O?'޽; } uxk'W%B5k]&2Ţ(dv3Zy:wy'>?JwxxONeoޭo>){-Ni<Cb shqjThO|ieeŅ޽{74CeLBFqY#d.oD$S$dsyY [iZ]3s #Uu.DYAG@h`A.Қ#COJ=weUUmn1K=;:, <e ,D8fN/~˃nerww#_a=&@Pu:aK[ըX[zOm :'ׯ_CD~<|~q]^KN|cٚSnF@kNN'?xṕli1A0f'm[yKL.AYT5wvvy?qj(0kCeQ9ʪq&"5P!12seQ3T5]U5mM󣥥wO.'>N]~]ߙf8?ni59ms,0KJUh"wiṳ9_0 i(h<"f8[/7?L&#[Sxucgg0 o=~ƵΛ!bbɆt{t)g  m>?~RKgZCj)ؚ`262S !WKK'M}*NCdcTRM{ü4sי|a6/Bl43pND)mQu>Z3Cا )ϛH A!,u9݉PNp)8Uyͣ.)5 hp5$INKfOĩTdU-6G7PjPƌd sYOyk]jLF&:ޭp 5硥qRuCDM+DDoDdҎha)%QST,)IHdv;0sCd&{WqsJ"Ra$σ7&Qqs.F2sXk653U%$UۍYS(9גjثj$`jb8%?Z=zrȖ&*x_oxnz2u%*1FGMU?3b0X/l>oԡV_}fB[ɕ-n~CwoizzV+}gWʏϲo[hAQ}v™3;;7wNWu]8XZ]_̥͹Lw/-9p*˗TD#o~pϞxwߝl)b;ZWYyw{o8q`R48o)2{|pis+[.,pf.Eԅ޹sw-ve.2^YYѾSX1uo V: ˾xvfYV%Weu]3A@ZbEBgTyB!"!O>0@ 4Hh3:jA·fQNOah g⍝:CpSPc W/o}[;/?xoToºoNBfG#DT!ZhjM123S pHXQB LHqAhbMYTL*A"bML$SΪLg.map LMhnӃ'LG92d , bN9O $"1&uMol1ib` 843kcj3DyRZ5i/:/?)&DOd"~d%" @5# h'+09zlA`2UFJf`D$Pt (09mp\DRjf!m" ܫ!$2DBw PՈR4)F*d4 g2=Z(,y.)@ 8S$bP3OfH%"1ڪr7;."u B0 dzY#jf EDN^"҈ < gx`BW}ޓ{;},B?߸| &S= *u[7r8a/nߺ{?{j{rTxSO_l/lL֩SdPʃ^Os7{1Zye@'z߸w/fuh'G5ƉՅ;jD̋|{{;)dê>#'q&F>mnM'>W>`/_m/L6'[wCخNmgi5s@(*~iaquu3>ZwSO=/(: 1ʹenqw{@ c %eSE#`g!-G8{z5m8"B f1J`hvPt:}O//,_G[^<{מ|;߹>wCNDZxvӋ;+wLUbyN j#:K˗igAږ[{3vYk1Cmb6PSVaNjÃ'h3uӶ-qKDX( ga}+B {,3ViɲЉa!i-gpSǁ?ǃzvhӓwle3nFH9q=3dUɁNX/* u婶mۺafa惶住ACLsPC;:::}0Noꫯ~勯^rᣱ޽1݇cDd,*F2pF+UU= Nf>e/;+p2f*V`{׆0W,g.}E"0rμYƗҌNV7Q|k6Q@`Fv""&DuĘ7GY2/.> @o3Co7oSk]f94(gC*XTHCYҴYʕtZ[b ͢9S[(99џΟ\a1:-qskܭB9E&*d]~ۯ5}\HQ!fp{kwv_Y9s[ G/y/b͇`yx|wiҩWJ![U&Gݺo~lrɓia=Jbneub̳aL1y|;MԴ d)˒!33jCaໝiUKÉ'.,oýŦ=+Vix,.Zf$)Ѵe`fHl$BcfJ&2R00Sƹjx Y藳Ta`0^0S/"}"`}ܵKDTRT4D\B0@DDSSU2UMQREA$JCb  5l{}8d pl7"r"#iNvthĹ79#J%lzu8AjG"O M#!2sq,h̒$& G %Ј,Dlc vX4F[w %̻5Ic̽牱w/;8bPFta *6\2+|!Ȉc`c4dCT2gsnyÎAn5(&/1e$IO#69(jYɊ@POl {d:KjB&ΉȜ1Fy^"!;JՖH L !ITa--'8[ysy6=l#aDs h-v:vt4\\Z|?~Wkݭǻ{G~~%G_߼}5QծqhhtF=l_|OkwQfV߻sϝ[7|޽X~;;<OZ? ҳF*ZUʸ @ K+\,B鬩f4aSϭw^|O&7}3nݺboqq6VN=rsX7JNGz |4twûөP+*M"2v z$!+#"m¹y2  -YЁ&6yNt:ʞ&#&33#D||o~Jɞsׯ̇kۖ,+4J8pB< B* "J@J+٘:mqox۵koŋ;rOoᔹkn\io_zgzUNގv2i0b0!(9݌g`:9ir!x̬dtfZ@Jʫ+Lu_5[0ôS6ڂ,ˎ|?<ϝ6ycD XbfvEhd9ͦy m4*9{ݞ_|ٝػ5h}>AD`v[ [#?[42c0.M K(*Elљc ,MTܶ-2sbA0UEu.92IǑPn{En5#"T1n$OiP#cP!ir3@=`Զ$Dq8"Y X +̭XcJ@LUҡ9/r;@T,رz(*!6;E@GdbBB d@L$T$!!-,0aU}d'fh`jLU!$?Gg l`1DlT)6w1FCFrjf|qv_w3h>ҙK/~qtx2mvg랋|Nnaiֽ{''>w_jat~K_w+Μ*7vww÷`2]\x^zk0iMnYZ(køx|zDⴊYO޸=ַ{K_zn3~~mmor"Ar; <:6(qlf<3sV+楗^3棭]u>ӣ]g`vt:u(bɎNUմ% )2ZUYk#FH6Y;m X8R.f=fVQ]%_{[ڶiiC,rPm5j.8mkH0FH$(̉ b0Đ3f=/e8tI}w_}_{=Bq̬/ {8;~;޻PFS^>s;zX(};?sٓ'O[Ã3+Չ2ʢ?=WNߺSݾyotGY"wY;e`V?^[鬓񸪪,b䡉"bchhY5MUUOF-ڟ/g.t^`ܧ0nYfYwppP-˲x|g'Gnm} ._91/~}e쩽h|r}cpk~etZqxxg}#G̬cЎ;)C~ͽ{SMZ"U_̛ ^ O=;,.>{;o"b|h'w8^̃w^w^ﮯדJ 9C֭,t3$Y4ι!\1 Q ]($Jclfg:y^{&ff6v*4;sO3!gTwx}׎Y-0*kAfcТ3HsK_!pA1 G2󮐦 P6Y1ny Ϟ=>ʚg]xwt`rVp6+i @z@E@#ssC(eM{b{c[`fncs1FJ&?y1E@-f$"Fb$Ua0Xqlf4xÓVDDViѓ D" biH P><`S;'ij69ttud%9f2e:̌H'"F΃(DDFO"|s^$!1jDR59iPQQK)O)M.@Fmf h.0!aP!Ǥ`r`(*"{D43cCJX@$VdSFKlYC60`@FrAvdH&1" ̕4"9Qj#I-|#`bX5 Uh8}FɄR"QX[GX9g)">Wi0+DE$c"b"V@I#3wVw_,\tyb8<+|bs 0#0s,}z.- N.{tލ\z~coo~Y=A|{Pj"e4n{*P:Y1+*1cBt_|nՏ?B 2h;:t-,HgIGǥ dW 5I6$2jHP1kĬg,h3kGΠoo۶u9:( 6]v5GysME\K3cq!:bVQ=( =8c5YBE6m^ "bfh;m[gٳsm nܸ?Sd֍n?]kLFEqx? [x4͖1<0N>CmJGaA}4s-_{C%/QgD/]x>LfwgW[;;]c-s٩S~ [ָo]}9`.bdJ[E{UIm;V,@H!FY!@2hF9S$)(pNqo] F5ŨFY-x<B`T`!8p*;fnPM׈Bs@/wH`rluƂ$@Z]~GDL"yrYT D#LU ŤLjegj <-Q4FE3mEPSV@7L*CW,,ST#cX@2B4 hJP7uRӥ^7$Jx2cSsf@2кf5%T521B򵘙q^ܑsHZ"HĘjslH) X42 3pm>Dft]GϿs/[ϟW_/`ỿkw7Q7֮]vƽϿRK,ްI=z /=?@D0u>*uP:,&I@?ORGզZ\o>~W޿g7?}L[x;xn|%ŶMӌ;Lm[oPȵR{GV53ى1QV4+U""j{7uqa0LULi嘙5|o6g IXm QXÚ;"VUμ1$@ä*, !D'\d̈H)NY!:oo۶/?'/=4Zy‰"O7u}L+gY6faaO_yW>|驣2x@nAUV^յ3Xw|Z '?2+oݺUoͯƹ慍wyRUn>0._عw/Nӽ7|zc3g6MsЋGGGLU5M:df?{y>s)3kͱ Dllu ]|kfβLr,"9o͉Fڂ|8fMVy﹎Fa.f)!BAMy.|QB4MKu-2``ۊӲ(Z5UׯB ("gl]C੟WGQuݍJN2߹dBMVW~`q@}(! V C 6,$4*d?rJ$UbR2Uc)S‚9Ϩ,"Ą8ᓇj$3cb s.%G0HģDeC35i?Ff`ޣEY!v_@E"K4?FDaFäz$|uW4J{Pcb1g)1 `S¼ 6D:܉3!3"BL <;6s=fn4 4A5 AB#(31 <P!R"1 hU*ϛ739w{?Y>C8{} !s1l)Ue@`넼 t,&T5҂^f}@ȡ^ `TR$RY)'hp49"" CNdXe. 0,IYQ L! Kp{GA"km$|fI#EQA7Ȁ;ϵ8O_, = Fs N!zYhWXC⑀X9/CG[[[kZ^^Qnwz+撗t]nB"GMܸ/u/\}?zō?9]W'l6c]8}rwUU}ppWwiYQg_ůL |4V'_7^?rȍQotfYƩg> /D\Vɨ݈g3[km#~<$jiw=L&,o$b?[~p~읝n9h]~V^>F((2ϳRk5BDմgq;A_xJe.U#iaeB#_jώ`zdNTR KOTVZSQI)+C̦q<%N1e!Ul< GP>lӇQ'#f6dYTi.D,RJ#c<HYQ\Bm2{D\23)3[DĻ@G9X`n%m}m&бNьwC_^dyS%h ̷f7y{{Lq[nw0 ^9~)S[MZJN1{CGqxku`kkх ^}aJ%RUYܸwj ":,wy'2FD(@!D*ȶ1K +mył̕X9h:hADy,/:i ȄܠυB" %щV٠,ge&R_Wqޗ:%x-! xc i@3_]חի N(8~wnGzɓ'?tѼ :^aRR*]RQQEA RQ@(Zp8Kv^Ѥ!3K +Y܉C XDR4zou0L%A$xavݏ5۫<{A< J+N@ jx"Eu!'QHB$# P\ FM$H/u*^α\Y)j -E3 XM\Z\9#E^YkX)UWY>q=VG: W|7AL{Np믿~3 PLYNn~?q;t}~o{&_}kp8l/u[?^{^q}ꫯ~GGGVCٍKnΛq?L(R#'}OAdɐ2·c*l"I4<ϫ2ˊ9!<Ϸo_vY9R:Oy|_!U1$Ua棸G}}"_v?K_< XʲZ [BrDPҪis(],ޱ8jD[ʯj/fvJ(BAE** ΅ pcFd2)Ѱ?{Rj7_}駟D`tN(.^ӖRgE'-YNCU 5`P!8bAkH>{o !, aF"*o4~?÷/?nۢ&Q l<tihTqՙ3gNM+pͬ#lzت9xCݻ:ra#}ʜ<{ԾZ ^zqsMJ.|<—Ww12ÜΩ/57PQqn˕e͠J"*p`$ :νP@T*SX{%t>cr6QU RdY*.)1Zkih)㽿DBZEщ0GLHzj JP2 QIB%D&6'L@aFٝj;{&;tXy/o;xw=_UJ\ȗ[1(2":NIӴIJi13bfFm8>'T7 t-eRי׈K!DV H$.I-PX0jiC/$t!| .(" @kZC)R VQ@ץ!'!FHDB < bU&B]DIDj^Ȣc!Axf֤DlN!̵Oɹ("< @dERW~)k{medȳYUkkB:L <(ǂn%!Ѫ^%«XhP׸@ZDAHHA+$HD} bLuK^HBs,BQL VPF3f R])D^Rʐf$lp>iA7RkcYPQy;Ƅ jlK (rh8k;E>T(2<;;m:{_pEIMG  biݛy?so_{חW7KGѭj0/^kKgſOs)oڑӧ?Y_|OeZ_>[g8֭[;;;;+ղ,?}Μ9fYṟIaJF5 td\YJT%.5ߺ^/~_zʤWˏ=r/ꠛyM{⽫~+_yWFvE613"JY$ bJDC  !X+"N3,~W6;1&K,{zY/ (JԴRnyy>$5mC"TOn:uJ`+ϽR)vcGdá˗ M=щd9Z6Ơ*(F-"ÇWv;sP{e>C\Qlڈ3Ϋ(|."J1"BhsmtΝ?9NH]AyT =N@?|Wʁv}.}Iҗ>?+63ӡ3lťұX8COH=nU{ΘFean4Odw?yT{{.^m%FTZr*9jW}km}:m"J媠f "+"{ fZ~ 7t(bMe))Is -40G% H:8 OG#DVC|gw^~xoySO.x  WU57ߚ[ՎC@ףرcNQh%: ҂JY3@}D@H p].#}PPq J3; 7Zi,{y`fZkIJE<)0zQ$5{=aq,6,RC!"+dIj "k@JQX@^+\ +TB=g @")"""#aQN(aLjH5m`@P("h0&$"U8#R DjΩ+eD% "k킅`]8@\%R-eQCZ < hf`@AE?VJXkBDπ< OAa .[D]@DWUdދk݊CMyZ/EۈH=!(Ah"@Z gbRJt|&jUIāu;d怂10Ѷ 2hql'^|:̏nl}?éZ22nǝfݻ߾xzaGQ[oC 5<B褣o]U{̋_zk:FF4z$2Jyt_|W}޺uw/M7yK6gvW?& \Y^E/r_87޼1ټ5"k=s-:BD;#œ<@i. kJIQʲmͦ"4̪_·~^~v{yRWo|x7UU;لvwnw /O}ֵAϱ)iBNC*xk+软*",ƩdDd2bN>GHot[(UU=JVji?#"#\Q5{HBņ`YkO\>*emْJ?I,ں5j"7.M&;,MH)_YޗPVUv"r竫K"Mӽ,啃KZkZVrS>xp;GWeY8jH{JkP 8ȑq̑3î˷(:t:JJ=}e"{eY/C_/tc׺tR? r,?}x,74YHW޿0ͲlaN3]iz<됈l///_kZ-CI$(fʲTc6U2xkzU,믗*I9(]KJ)q|ggֽ7R9$Lqf)"JtF,+"qGf<"TK2 QZd(0M&W&cǞ~}|Tݺ~g&&>F1c%X=:z^?rjêZgYcJ̬dg`hsl6:l6ۜHY!P hчJR5*,,R|hXiЂY6[X@9 |U4{lA 2a@@*/:I5[$Դųxu^:, _Zp|zh5櫖B  4:!:w:3/|t6r+H"JԻ,>â(4ZP;n^q3v`pG?pέZKFenfrޟ'VϞ8q"˲wëfrF O:AWE$#&y҈Y@M"0MgSl)8c(Ibՙ)%眧*s%DU#qׯ]R~fͫQ !5٠}d2Fuh *w|<st 5;ӁUU=Y[gY* xzjlflmmmo$I[^^~A/oBh6oGt֦!EQfL~glD$<$Ï*Y)A$v#js_zGwhޫo|;7}T Qǒf^ 4M:_~~>[7nn~xlB'M'%D,lB~}IT2J#"$VD9>"vysw;xv\}bH)獤+vFH[ELf6u6wq+Dů~3K";weW4;!Sf1*j!Q>}̑Vs;uy"bR*q͌%vi5dB*jFQDut:75biLkkk!o|B Ram'w5C筵urD@': Sd8ĞFPسԊ= *ݿsO䡱]T[Λ[Fge%;QOnۘO~_1} bJ'߽{/<|Hm*'𤱺z|Uya28uD9g!BK.O6t3 .+4<]0o,oll\ +1o94(d4E`Bi7v=888)<ڻO"24G Uh"p f_P1ctJD@DD D9w." ֍()K<&SR^p^AʦaҀ ^{ ͵מ]|h{o<`lrf8bfi}YnLٙջwΕvҒtLoV5OO=s؄RרHSrw 'Ak;d׾<Υ[ܗs{y(av}üY9;wwʰ,KNyVn&G%G/]DŋI,"SEUo^~c,6|_e;WWϟ?hPJ7oݺ7ZտWv7(ZY|o~oqD2΋¸$L1>>"ݣ,Ui}tq $m߻u! ?4kwgY櫫IY]lӮ3ZgYVbh,jFvR뗒$);r3eV' ͳ|~k=G9)#rvVFCҕ,W(ZZYdž\@kY)"W֕y?ӵd7C(MG=\{I>]=}nJ@'2)h6vMc`ًVFb4ʰd~p̙9h6}ԛ'J-"ڒ:އOG “?(EDjk£XY2J@rh&Af ,<#*dE[_DV_o/nz[kRJJ-mucD6]"Whb]o(k{Hf5ѼqثEefRaBOM 4ԕ ~ }ŋEyG"r އ1C rC`Br,\ LrGT$"9TDD^DU:ǫxB a#W `]/\3\YcQUǁ}IDdPu@ubh!^p]xPR셅!8DA4y溶ޣVGD ^e 598.C5ӥvbMC* %IBĹ?榘xwww 7ƛoG' ;2=bIZ顳+;wһ߽|@`s.?VvsO<wѫ~+_~vsGoܸa +n{>ԳkkkN~kŏ,Kޟ398XFEdAڢ-'I!Totf3mD^>+7PmtVzf4 kҡheӷ-gPUU,PUѣXms|A:N& Aˆ`39*J J0T~%^%VLjtExDQ怌tƒhHē0ՈddPDFm֓S"Eq…ƜH0ʃ .ܾk\j.cai|@PW#^Z8|x\{5f.mv3br%n6m`4U!j> !(@P ZkMbD!qY1+`'h>9AE#<*H5aABD"Z)SM@D+""@`( 0;-O"i3+Z׾zQG2 pڢ]@#! B^) V^1C ΛB$vAj zS6YB`D,,,H@:'B&Gc*hP^1(@`QJYDѮf "6XLj(wŲR[SFBkCo;IG[{ΞҕU.<Z^x޾}[aEڑttqo.6DDe4m7zF;ك{AM|^*hGۿG~NoO<ڻ`|GNBKvvv콰q.k#Gs`y0xhSP8 h6ͪ]ZJZclOFc7>r( 5bFdno ,ѼDUFL7~<3g3ta0$Ibɔ&Cw^xʕ+q}u͏z֌=ܹ=OTD\JRܾy,۟poiGծ0N3o,Fy6`{24t:=v.>g:*dI˞8~*iY`jf̉C.9 ,C1ܺ"v܊/>/ƕٳ+g=^w/pnmE?$iOS#޻E@ri`:C/Xk{ehv"<ϡNq#f>]e.]ge\yj~"c+7jt󇇳~߯C/ǏoI#IQ/m^ ^8IO*|,{]2cj3~ze Em/N]N1O/wDA)ef"6|""ϼx'mE 3eY\B\3U%"* JW8qiH$T!@{/vj2sÃkwIwH"Wȟ2A#/@ڢDk i * E|4,Z,H8c\q۹l{+٨|2RMދJy$ \RcPvG.-.T"@`C.=s%B:D %Hk!b}eB~s"'Af^I?ݯhJ)OF7/JfFEVu|{a$T`idX{"¤ ( ̴VXkIhX1ZzDJQkb) j@kfRZaN@DJ)wأ -&@:DZ(M@V5BFzB:Ȳ!KSd*D0{ (TZkGoC\ X8ql<'e: T,%FHUƘRVCϜYZx0ΧY( dSetBdT UpOFMւHQ;jGK.ݼƩSr~'VVw4;"08Py$( U`;".XB9wG̅X+Dt\ t`5[tUD;/r0SJjDq9<(Fy>r5sTf rWQKV1D x~<۹9þMH-)PWƪ|7ұ`hGεk;{{;jzs6s[ZEd‡r"$YmL:{dd NZ'X޿q3zU†QMUqF7,C$ϬY~QϽ,//OnЛ(5P QSqjLw4گ8}ĩ+w2,u^WM7n2X-q}Vxg(i]|uԩh0ttbr9id6 2"_3\v{p{#Gf|_wG:r_Y_Gh+ݽsS/]x7b4{)ooN<ܚL_hOV?Rh4B3p$L`4EWY rA$d8GڨxuDi-F?[[[InO>w矏+C5!da;kL YN5EiJFD;1Fica#RzGiR@ȕR^P 8@rD ,7;#EZ`Zɠ6$IV˕ |)bWG[߿qghpiTyYb4>L&KQ5;DQx{jgVh6;ҎK8`0"ݧ{M:ByZ/_gU,={+"",+[pjhr}|Dmjw*L}FB/IQ˙(nܽts: r[Ms>Wp x ~y~pGS(T,?p7ͥճ4:ѵ=\8V>N7o?FA>7T2^ӝ5V@/z˺ݼvNlYS|:$`%U,Ő?nvy}gzq*J\/o,@[(Q4R>"w_(ŲwSO-5dkw#3=[$5C${++iz"E3rTt\ؓ 0BtlsVu*[CʤExf$4" J"pK&,;L'UU-#SBIbY/?k׮ݙYQ+Nmfw۷WjMUg>uMǧ&6z'sD26O^Z "-ٽjflul?jDgZV׻|in45o<͓޿_MιĦXPiS,ŘR/R*s#_ݟ;vGb?I7Wŕ/~O}%^k]?=Ty[ǻw khڻg䧳Z}]+Gg4EpVt哋7ܙ#" -e6_qRzC++tZ4tD=n(\5CkR`;2ɕ(gkj /C5'_\էOo<'{{{>|0ǀXʲ鴪:cL֚b*%x>qh ͑ uG H x"X}aj Z4^!O,YQu=,lIu;n1sDž!J,ZAED}vҋo\? KzPk)iN7>tyɃwogrnu-M}^dn2iGfy/ݝEÃׯ,i|Rv+[;ٗWnUuO>ßh|Vm>'7XǷo)(n\|T1GhXY0HQocHƱчf3f46IFl6#m'[w?`0Xv]z3<:db FPRi>\Cj_:M?? Y,Ϭ¹ź4VWSN#t96ZӅj:Wp 4M%"2b 67_#"2I 6,&`iyzm<΅!W_-i'1vWϴiZ:$$I_oFXԸ04ICvG8nwVv:~_1+'>Q>\9mj[}Br8[>5"R|[9cqş-n7>~X;ĪJE+ӉoTU"BEQ -6ݵtj]W?7H6b:H:I8'G5zB ڇ齟kr7 RZaDR5j03A " CA" $/+\t:aMzBa^,&  kc8T$y@)Q K%F=R@ Q!@@h&I$1,K=R{'y^YYI$˲z]uוzDI)L )D \"VU8 uԶza:^PGHFriCP3RD%+Y ulD4DDԕš+צ>hgG//|_[i Ёzmypppe(]Axׇ/}Kw{ɓOO޾Ǔ;w{{q[*??u~|ooUF-n`GQU&dTUny1G!`ܟhBP1*4wn޺9}h\?|S/{Ntz[iܫ[Goa)MIUUE7@l:N?@Q%'԰RhIl8!Qk)c,,"TRk-|PD:R|s?5(קxL)zE+i9xI)Rd2Y$ë+Ͻ\Yj,`$!bY$Oj0\Z޻ -"I/izhzQikٳggWwO?S*XkUcJD=\ruJt~zwG`;݅/7^=E&=shp8(P4W~ډn쥜>sT|4mvnzGX#pwek{qRE\q|hza5.h/;謖/̕On\6fV<mI 眳&7h}R}xCfwnrx4]݇;I>sIw𹵨K[On;vzG c#ݣw>gW?Zz7hO;vCَyuCߍ[/۷oa;LK^[7_ꟽ/n߾)DO?ʭq㓝UI?+ɋGٙS/EQZ|^ĩAm<J.y_G9iTشB)qro~+v777?M3!FܯBAJ &3*aI"C!ҎСAmbŜэ e+y2/ݦU=qH/58nac]|Z]=~Xt:lADnk:fuu՜펌9Nd:&Ɍ6pX\hvRiS6dm&;sG6KFK֞5 rV%RUUk 3PtQ@#Y=u+n'G&IV.i/wFoUvZiϗZRm---*o۝%bo`$I88jOhܨqj-vzA8dm:꾪 ! VM5k8gU!x;LT*= !@Q(OGW|D-Vq8uzvG,2@N4Ml3aRdU77RhXQ S)UGYL}DA$^i B$"( ql7+E!GJ)ZEWcA5ӂj8a" .` ԅuՆQ $Aj'DP@ H>'bJ##TZ+fb1@ a"KTaD 6 T7)e XKV*σH7F#74StA4EZk(943r"EF~R(Ԥ2C̞4iZmRF?WUL'@ES+$I2VyQ؎QFXƍ?I/$Ic^Kk-tý(2sN38+"G\@;.\Dq0/, U: Ji_t@-وEjwEk͊HZMDԟSɉn}8nbc?hnP|;o~;vSO{Qwνt?Jd|wZ#1,zѦm4`{^^Mf+[*B@x6p=hc;4<'IVJQí~UUAa$/%*yf{Evر͵B*RyyCB!4ʼnއ H 3YT4J)Dc-Q eY>T']9H2gY6; !TJ]Rc0Tyi1͢(`?˲G|>WYZZx;ojfks~|{{ii3O.#a)!U6[A$GZG~{'?j_|fʡG!R*Meykkk'kMG<qF2?ѷ/k]e1zٓm\:qлx<.._0٥3gΜyü;fm3niT~>0L˻!Dt/i5X%{߿x ˲S8y;4we&ӑPr?8@r<m?zÇiV29~h\L,ۃR%E)S9gp'fi1ʂjWIN* XR8脙_Z?u]7o΃f"TIHUUŕRJH$4Z1=@f/-#=h6! 6USY TD k@+`D!{@A0ԁODbD Ϩ&ODT2jH$NkDPC0eEkAFD$O5:+_ti'Q}f6ֲ3id i%}iZ$IKfd27I;i!͸yE0j`0<ؾ-uo-j<ͪ2(tٺV+N{^m#Z?IN 7""ZUNF-=^G>-Z cZBMnEiW,ЗJ)*mHY*ʼ,ff$s5EW\kݐfj Q`]8`ZMvvvwBrwB35ι LYaWy d? L}щޱ;ۻhUhj?jIMFӱTcnOZL> 4v8AW/}}hGwMm?mQLW^ y7:o?|W_N[߽ѥ׾{;\k{ξ>&5Ȑ,RCg{Vk0n>xF}o hDWRṛ7CtΥ7f/-":z۞}ę3woBZ֠Xk]! b =zcAQʲLֲS3`s&J)q@]Sgbtj+ATJy(1*Frq4M;1LӸ8ȔRF3e4ZD1'c XaRqekųիg86JcN*+p><7,SJo6Pnk*pũEa!sjQy8z|HUeYIgΜ•ey^8VJنRJeAMh03c"VJqA%3'ZQ3D( U*YiY1UbFsNh`#k׮g?xDUQXT&"tRUUIF|Ȝ֔!c2ITkǏWgCBHHp]^#(@4P00GAk~(<6j WV0/ejť|*6A>#1LcZՐ.D( {{{!8Dl=真͗.x;}i|8i:]6r݈z|xɃ?rǫWsÛ7oLe kkCxN7˲Ǐ.ι޸wvvZ::t!;W5Ӗ^^>Sx6r\xBDt>::f/X1R,VzZn*c7.9gtȳmnBIzc;Yڠt~ _A9p{z\{:MS./LAF÷|oxɡVJ /_ȐsO~o_jW?ڻo?ڻF3į/oZ|M;Ͽy~lGٿ}Z-/i^nZ^]F`2t912BL`peuiRdeX@R3/%ݛ>ߗ(:y|cc ϴZW{:8zUYyTEt.Jӽ[n٨ J5RjZڑ0횤wh@?sA# +QZ"}L 1XCb"uCp6q /F޳*MTr|e)IT`{/g.`!I^3k\T>EY+(?NLTUvhX* (f!44Sg*s.)Spَ h䩍P2`A(43"+L.HYrjJrT)ȈaD=i0 ;~h&>`b̬ 0?,qlr*tQUMZ|sGm(˒f!eEcp? XfNaf@DBpBfv1URm"h0i*XAl6K q<Gg.I(99(R@V ҈ :wlKhꜤBEQ4tZu"x"*&;֓Tq}ˡfwdK#1xf1ِ 0 |;ذƀmx,#4p$6٭N_/>TMO\MIu OUUkБpN_,te51! cD"vw—y3{/pXD u]k,DdV&CoeW|y^vΉK7s#Byg鐍'ӆ5M4Ͳ_/N0Or#m6eYbC9Jo]'~+*3W'B@+J̝G:% ^\@"!`0m&LG?~w~[o,zՊ///=[{n|g@^(`bT xlާtvv`I6U4.EQTCދbo3L6iŬn.H)\1W+tnuyyI}uY){έzut0T17^{э;7/o_+'GJGGGU8:XbiUUo]+WE1x.}c>u|F(k;O\۶_ӧO!mo|9?gt~9dvu8|G/>_Յ_ڿ[6M!,SbBRFmqEQd2,sm֋ih ʋ9.RC.=1 ~vx>mPW/7`cd],mhUe<ζt:4re3a2k LqDDFJi!k1AM4x|'RJrwގ ccr$R"-3s6.q#I[ê4:U޹2&f$!@IRRzbd!HG#&mۺsEdɓc"$b:yV"c|h* cBgDK1Wn&pB`g"Aj=ȌyIGDRFzdeID{H! diR2yTHZaQcBke!,DTw{]0Qu#bPymxR$/eFJ%)`RJk)%Rʚj>KkO! +DO)L6HDIؔ{Ju/~l__|!t)/Kf/cJEJ)QZkQ)lr*sLe1I(_<><xmKѶFZK;{7W΃?km܄bRJ #"!˲[>fYFB+\a",}1ʳnYa1rooo]R%*o|9ejV*9 r"ڙJ˔fXVŻzªL&u`0ɶ,bK -fv\ H* eok֜l>|ӧOVO/_P`0_/w^9In^?Ñ7}˾Ͳ֫^mDJًo}޶ֺ>yTz{X;}d;׾voW^AVm߽w.ysyy~gMt|\Mv~;_ۃ],NGt: fqVuz:z|BѠ>.FeUV=څ 9F LrYk裋|>/j8('v7$: (zX$bEnYicLfжك2Ů'8IBeY RBL"S$"H M^ӑQ fne](jҌuL:˲h2*b칵v Yٵ)&`ɂ!9o1FE)ѥXK$!.R<[1L9TJ2%90)EVk- "o/$!\i Hx![$2FpekP,A:, }o;aB҈C dDr~FNuJ|Ze"MDcJ tn*'5BȖ lJ)#Ba&F4>Bt+ѹT8US*! Q &p2GT)$}'T11*81)@I""#%h(Xp"GmCn ]+(DQDșy4I2OseB@J cQJEOEt2EEtP(DRJ 2ќ{Ur\/ ȫ %&eY `4MSpG(Ri=;}$d2Y{TQ:yz?s Ddh2eQ\Yhvt4cq)Yvٌ'[< _M !c1FMh1IcRm]UURs f9Yrly٢{ȫɝ{1FXH 2ٵ7^=|}i5P*,%:"2+C)%.3F)'"F6Bh D `%uJ9p9)ل+1ˡQH)+(ا+练mm J+fMT %ח_|ɓfsl? N{zM #9x^۶#=>ϲ ΎϞ=;v$TZuz7&F!)̌amBXeu]MzYE1O\~zz4KGᐲi۶B܏8)}2aFO/}fX,&Cu685lggLJz#evcc5@g},k.pr9o׿[[[?Ώ>s3>5+8'OMww<<8[x{R^iso>u>>w~翷7nA<|SJ~¿Y^(Vr 7'l2>avab yNYy7Rv gMle#/xuvQumiZY{m1zR8V]^J+ZkOˇ>ϖ#';;FYy(jX7gR,/bP$*3ٹ!E@,4 V )Z 4^@wMSTJ1:e1P[LOxF\ BtAR8X#R_4B(!AZED:>3R\߈? " zHJ]Ȉݕ2e$Ƙ$Z+ƄZO(#A, 5)%CZK Y+ $@R^J12y!h)y=fU2J1sׇVBձ]rB9*ӶR#7͋˹MfVGҨ K)e2d"V)գ8L a\)SX)vFuZ?b1>Ua|51+oggbn`vB0,zz^Fn, +g@$LR D&զc!H/TE8Î sr/>|-㱞Bv ">ײAUUݽO̯(5 YO y1Fċ'ggg{tE+UUT3s)wd&ײ~0 4\Yff_NNNwʲ˲Ν;צ3?y~"G}~ϋx1h6q{0-BtXa,UupX,P )%fZJZVRfl6-}voNZ+&eYq¸s5}88@hl5v~྽}x^ӧO3y$mg۶ 440RJ.utJk0ͦ.$"f5V)ey;4Ѯbgr61 Q)EB)%KCD_|'}xhd>Hժ^7F#] JpmI|m& t bC1NiPVgq%\S)3J)]J)&[ӭbcc<==??:D1ΰ'9PJD@bH qqJ"&$H)I!cfKeB!B;!)e)J!AsBRB`Rb,'ADM(*NPB2*+)ȁ'sufdbk1Q Q6+|PV}D!IID3KR'mk5!AH>z*sE!JR2erc Ƃ. $(Iܐ*HDD. |QB r<*% y3 1QUc!J9ƯјU(JeceB ^1BJa92B̄1 :7gXW0ƀɲr(R7h8>9$g>B+ZȪnNlιxqxxC؜eιW1yWkQˡ~^Rb k7=r:*t݉t=D*oS沯3~vYJJ'0rmHO!Ğ |ftNWg6P2ʵheJ2%q󟝞}wvvR`pj\Ik`kkwSUՃg\^^UUUUjt΄7xٵׯݼyYUZy6w}Rf:d2yAJiJcLUN1s-n߷ڽgd!Sqf׏F@_UU?f"Ds :pyoMrA(E$#`o< ޾y]ݹsgm f.//vy]5t9Ce'S5Dkw1˲민lUl.VZ_z-]4tFxl`qY^)N6Z$jFQfF"j1 sn,'ibVJ7]JixXd^ OfZ%#B2J){8SǏ_ ~C3'woM&`5}j}И}||6}o}޻?7u:I'zG᧟|i b╩BzqΝQqlTUA 4ڬV5d*R@%w)fѶ;+m;|ݻwL&1i0GՖOyif/+{8<2| uJdѢ",Rc#cܬZO!aHnR[{E `Q)%#6 D>**L(%'@{p;H./dn5v߹ӶyREe*pa㕝oNXt]G2eMaQ"Fo4I ʑm悈RL}7li;{Y)I* E,lW1b]yñYfM4P#">ʷލ/(`!nm%2FFZfnEaԫPx9=dXJ2RU3; obŲ8)EAᡩýr\BKgggscpXTU5[_\\dAJX,6l`mW6MchI}kTUgby~aUbM?LZߵm;pcPwHDKDTĴ?Mw^ziTf:y;nW;SIDAT߽;uփPxbwJ/X,f3{6!h | @*Bk+4Ƣխxml퍅9jEJBDN q+/jHRҪB2bvZ3$RJr2+]6wvF0FhXt5B4:+>|`27{N)JQNVFḱSJ].CX/7u]Gv)$Iw^{w~v\ ?zq[7]<};w~:<,Zdt>{ż^{տoONboK]l/߮Ğկ[o/׿xKwOsO?yo~ѳ?o;y>Vxq5J簷?S[mnCMzu!(jUә8UֻB 9#gb\U737wMȲl\ݣ*MYoǍI2ˇf:N)%RU /?ţGb<:<}z]pK!\$oTf]t]\P,5Rƈ I|u()8oҖRQ&"ޟ^y|6?]!!̣r<2[[[Eԡ1Fʘ<% #rB!:F%ʞ#Rw(0E_EK>'B 6TB D@zF.,D eB礔^gI}/K%U=Rzy$B Tt'tUq*t>5'ScBfV( bO #3,-"G#D=e\ȍiD3"]ش I)"q,< 1Ӧ몪c4R`(!J2!A&\$ (VH, LdQ8][!x݃ e#(*gIkP}n12d R A\&0&(zo.:1}i+$:Iykrp}r3ٺ|wwur΍v4g'.oܘmi";y`X Z+rU1ƌ̬mMmL\Izf7NjAW@ǿ\tN)L4rb2]qmgﴹm}pyX֭k6(^6 J)Abow|{QtPpRwQ\k];MyXJ)5!XW"Ωx+l@AF2DDhH5<9AI9K<evv^E6ΫXK6ӹZ+f>ph;Ҥ\,J >;睸CŀRJ%ɴ,t]K!*/3!_yrvX]h^c0ݻյþ?b__u':fͶool[??:{~'|O}W_?ѭO`/?M4毽IW7>>5eq2Cپ_I)=AXk풑Rzt.y̢)K'AԅD2._5y0TI&7eNQ Ok1>( u\58,/͏~>m{ܼ~4uwN)mu,%ܸe_6F翼Vo$m!"PJ}"21nF tH[/tw4h].f8_@J (=VDTS BB 42b) t!2S6ԇ+11p&HhacDB`J>yN2FT}(@9-56k@@L)Zy똄H{ʼ 0cZsS$Bpl<ѱTle.c׬C)$@HHW멒wLJ)1ecL!W ]q R{YHcآ@%S@9: RR"SI\}֡ZʠQZqD{È(c )%B`"DF0&B 9+ +7"K&qD f!)|&%HlB@  Bp11cR+sX9s$}Xu]M(eÊLb!he0+B0RʥlyѤuX`\뺞-ĭ^OJ)(ZonYn`{7/l9"8io_MbPf#ȋxvLחeYvwwQfEȲtnuΚXrZ__/JzR/d, ԓA4H ;W_nx<̭Z%vGdWk6RxZeKuI)cN[N~O>Y\Fĩ<튙bo?{wyG-_~3ϴi~ׅBX+&mOny7hw;~;?Xu??w[oۗ^?~7g~-jW|>ÿ?W??d ngG[@D ~bډcgAk =gu8ҞK)VOq*;V5RBMYѬXPTd9ǔɳgtc&z֭uMg#;MႏEQfa_%,xxLMd7xq}ߏirTR} T'N1e@`RsRJ)# !B)@ddD}wˮEbdf-FR0΁4 cQ*Fx #wm[6Xj UH BJ)DH HHHB802gڶxl# !ܕ8 R(E^/.[-͈Y@^R 6]!e{_U&"}U2C%9`df5W8t YI #bZxODI UL) URCEk)YDDBA$%;|}TCm"!X[)4uQZcR>H}b8q%A ѻB CF)K?L))blc:S9%fתrAZk-C/,&R$lm`YtbJ@AԻBB):;7ou)Zk#2ִ0RYkkh,Fʝ8{=w#$+(VHevv>ID iVgbFMoO꩒JaeD[zWY+B|y8|-aPfޭD4J.C3-&EP1nY,s}ZksP+v@rڃRÊНjB;!}IjU!QYK+ߜןsRJi4BlMыlX43&u=*ɉDBv 4Be:ܸ6z͋_[;GX >~,(nUW׿/;,Nwvʯg}v[;x{{[~>M7Z۾W[ Ѡ?7,O>?o0]jV=^ RL ^)ǾH1s&8p/RJ UiQ5U*l@P BjzF *ַ~>}&[^yBPִҳ"_ۨ6bͳ,[̔*r c^ՇVU/; & N$tZZĂƛi)uROZdlYeu4)%wG"K5%kJ YY/RF pRGDTUѵZ ؾcJ0b*5I|Y,MYV+NRECT[)%栤`е ci 6B6m߻Y\\m}g7g|>j2{|@e p4ȦuƻvvY |cm|Wyo^ƳuY!.f坏< w=y{ʲM7 Z8Kmٳ&dVo~ލYM|㛷*TJ{vߚ _E?Rؿ[7i,r=쨗w'%ncj{y{ *On_#Am߼{{[&ٳ?{O.f+ >WY5//Of`|P2 g@Dqe3W;`Q)bf,1 0$(&λ4ɳ|;/ʢ>oct88lQdbTDWb>'7ûzVWUY 6CR m'.&,mptysggG٦/b (s䒲c/0z6Yb tW+٦ ZJDi?q.XIP'rRI1,/hwF6βAι0HD yoEt>Voݺuݽ''wg_|>?^=];o{7 I)<˲̭Y苢P RJI)_=?}@.}_~Ek6hSεAD#.bakfٴ}W`0 Qϼ}g scyfY^wg)n}tttoߟkO<~8tV]߭+wn~zZ/8LCN7w:_+!6ggg+ιɵz|Vj ׍mmddãÝ/zYn>ݜGggg/[|fpYZ?郋N#8==ORJA_{_^4?x;o8?;ݻ'...\w6n:ގXzB#ń$yJ#"S Ycf&JmUe,nβsnwGmC@!?|>lK[w^{:ː6bUU`w%1h ȢB<i3tl✻X;fvШRn~SJ)Ny1P+ gG64qa,K. EQ+IZ\`I5(RJL!8ŚzހtG)he S@N)*BECƴBH!DٛR ,0%1$K mS bdC"4')fL.!DyLѩwA ! P)Aq`!=UP (#".sP3'|B.(Е Vk !2(M#3TBpj*2=^r9#!4ιTېIYGDHR3́cR+LB\ys@uBc!u dBR (ȘK)& C-BA>h=CPRly#e)r`JOR^A1֊[-LicN@L)-#$W7$;w^YfjQ>^>3}_~;i&ue1[7;Z cR 2-RJbٶbtlookR1#3 o6n.GXN|)T׍lV]Э"+;2K-3!9]??:ý9ֆIEE0HVB[!/~/<cd45Nj)6mc9MZFm[K̡[+R" !RJg-2hԩCi3;{*{_r(3[s.1EAJ)u&/fքNx &`$js,RN&,beojtw4)5yPJFɂ"NFU4W.t_K@btdz&B/n^Y_??m&o쿹^._'Jg[71 ">:<%ܟc|Kg'/Mi (AehSvyQ=c-Qh4_A_86JuR<,:hUkU2)[EI骵O\mߋ EZ*D\_`lƏGo}xaq!3vd~A .qo{Hjum?W?h3*WFrL-ޫ/7_LK/~??47,>|jv1lgJSU|O>^Z7pTյGx)vww 7Bdؔ?pŦ>9ş}=㳧_}_oէԣvom닧! U|Q`8uc,%ڮR(Y @ R99r\i9ގ.9]JN޴ RRn}?}==u֍1ٮzklSl.6{s))A$]S A:?w_~%#%eEېD!@BI{!V"kIxpSk}g.u2(8S]2%fcҲqE̬ (D)󎈌C}@cLO/bOB#Sj )d 3° G1%.pǼP1cʅk(X*1iNi)L1'SiRsJU!e!#"2*3>ExCS 9% .1 HNX)sRjIBY锒H2C쐘e%B9\$XLJ"K#L"y^HCJTB ˙2ۮTܵRG1FcLcZ1)GNaHJ `@ )`=+e`jhXՈRrBTCKR"b_')U 1!!c rTTX`.ƃk_b!>>@UommImΖ]V&j7iMڥ)ZXjFK)5aB1@UfJx*@y'm]Xbpz{F  Sg8@aT%p}LEUA*f>rR*R6QO6Pl"Ĕg/ox2?'?tg]w`sӋ8 Ճ =$|l2{R$z x i-97e!mժB(|KH=&r$)z[ څcS"=6cd@CZݕQZ]4RlhJImVAiJ(Jծ%(E4ׅɲL uT*QX,+G{۷_l|KXMvǷʺMY=zkgo'vzln:M7#xv2VuarΟ\#oK+Wy<_/צy~wƘ`f/A)mZpC !-uεQھq^$(vՀtNe^Ԯ:6)F5BAdde N V\;Yݯ~}Yd<x"^[7h:bo}Mef8ݾ?_~Ocݯ3Զw Cֽm ]BD\D r}nT!$Z75J9':@  qxbF*_^۶t3Ms#t<^^^/Ά}Ɋ7^w RuS\$W?=N&:뒖 iS3N]z)Z6zh$1fB!25t2ޏ1֠@3\wI<ˮlF(D #:9@J}蜷 3c2@vRP$9@AL(( X%#Tu͹A#" LJ%Y@K.O2%&Dѣ Aa Q[$BzX$.B"R޹VD""P!%R##(B̡ I [LBT&#-2L(+\IG!P,Dt*rT7&9"<XZzƈ)u&*B@1Q)!!,<:J| ǔ9J%{fА( ņ-d| Y\{sW^ٝu~~~}ߟfE5,[\hˑ;888^,Y#>|t|~ ŋ^uB A*sTU6a4fogJ`Mhk=@K/fsսKޯSJIgmdT%]\!90[%D|ȵg CP^)*B4f2:==-{&gY h~O?{tnk&,qӁͲ"tΙ "#uD c ^T@z$c| 8Y5IaH 랙0+e9,oۦ+SrW>"˒{nl>H^{t[oO|OaڳYwuŋp6M/X*ml yֺ3qԄ$%H"ρ!x'3F( ^2)S  RKLNg3D̼L! Ga}3 0]~ћoe)BۯL֮DoV+z֜?^kƼ-r!Ńr.Bk|N7>rΥɲ, J]r8m S)w˲\Fl~rsm#UBߜ""!|7IL2wu+&"%@R6'ΛAc\-7%q\|sr %nT ˺,Ul`UqSZ,&Z[ hhKFBE4WYIZfq;o,x>7dɊa۶e+|qZ"C!DYH#0˄*`!ȔڳKDdTS6Me1!4ZkcRJ;ׅMj,)y c"eއR uu(QB_/f4f0x1.W~o?>\;OOϋ;O_/#uqt0Ȳ,jg,%,oAe"6vY&Ȅ4zbi6~kDz6&t{wnuWQ^fr0LlQؑmܷn2FLd2BZlzc)/D0L "e<4>R`xȰmYXcd"FYJKl@.Z"`4YZ[TIhofw{O޺uѣM_}ybtR,k7ejprTwma,tv/KV'Fх/~'7&w,//.OkQܺ~&s|q]S?(P -"RB\Z/Rj= Bh)f)ys(VĊH|^I-RfYF)Tz?ُ~[޻w( ]]wZ_٧1Gm} #==::NNN&jf|]/fg΄}/>.xhVS҈gq݅=W'vwVE^6G vf9%R0!bJJDEPΥ`E!BJtAH)U! $̜1u-zƈRJ]QHOAhbI$@!Lk VQ '!4. {T !.Qb0X1RJ rB,GU)89fRyeRu}%^ _n" S CJI$囘By$iZ='d ZDPʦID}L!$QfYI>@Z-ω !R`kDLFLJ rA2b!b'!DH 29+vHO)E)HQ# !8`$=RsMf0$H"b|lR)DHb?8}T'jx~zzv<[uҗ0#1 ajMQy~{!gY\v.q!Y.YU-MSa r4<[>_hSGSQwd55ZKbNWȘ"33z/Cem2}jkk<sR)b c XHA -b3' D͢VРr9IBcD0 WdVx%҅P5~)p0@L囥e/]`cM)Zjƺ,mU]B)NԠ@cLqI),RƁRe4p0}(b#Y1ȶafYsh[WkRNZ926Rme&>ދ2cX@hb^i\JY$LȒā lFScB(C\VѨje?0#ӜůНaßKӧŻRxgӗxߍ9*a_%q^w.zZJ)ed:ˆ"\öm],2Tqj8$Ț<^>g`Sd0fLZ&SVz[px*:srzp3)fݤJ)m ,'a2ܺSh4aAnWc.k}VM,bZ^4 c Dٺ٩fy"h|ggW_t,+"OV??ʠHΆ˿oq;<|rL0NAe׮>==-Pm|xwtT?|'w~ `4_SDA$aJIa)zl:_?}'zb_~'Ӂlfp<c< Kp!`f` 䋁h˦dvlMv<}B>7R'&e. Z| sYFʪq.I,U UAJيZe{|x}w[JOiZ?f\h}Gz8JPa޸ZȮ못bɹ?[UΕҴrf;Nxٯ?c5%)e; 10`Z*9w\\_l6HJ Še:q dQ.cS9glIfd9)'"`DPc9e@Ƭc`5@J2潔rSc9+gGƚ1}[+g.P)AP(XR l*iP8gƤ`Xo-TOdrbiĴޒ5cBN#U,Y,^" e$2R!&$J9 !2ちem e0>YJ)@I@=DtZ9*u{HQRB)%2",ӦѰ@XR* Q1&H$JzɻҘ8Z{"Br"Qp&㘉ILzֺS̬̾mzV"̐$&VJK))G)(sn[]T$~غ<|o˗:7o޹{ڶ* R"Es,Trl&ӘSH$3W$"MC ёUV" ;l2MX]iE3H)2>>'5w=ztw8]A(1 @_0 Z+crr#Ճ;ՈNk]f.(dQ.@s9cUURS݈}يi] I1pSJ$TwE#;1Nz8I{-6B͇aHZ FKI)\ںYdbE id(A01sc Z{5bPJ̒DBb) [+TXz`(̊iѕcR^GU OJ)0c,ɱ5ŔYeFSȨyy %'Z/s !."%8F"*paHd-Ւ2s0h!豔FSJ\$Ccc$#`,X'ii 67 c2R@bF*q9@lىDC;SQ |S (I0c{^yr)MJ1@*Pkʨv&p 1:笁BN~׵uE!dsSJ) /%ܞ?y;t+?ӇFĭj'_zbN)=Z,i[/nW iSZbaR`jݾrw}}UxWūnKחz6U .H)^榗s\߃C4, ߉}s_}uu5VUEujRnnnnNN"&u!rΙP!_" rk$ͣlER9Q62'FYJ#KCR1 .N0$SJv{Eƨ*)%i3[RcUUa  WՕIC9"d5CT㪭}'ZM>ISM*.眳 DԬaVd^7mQo֒춝$t9g_]}_ R8Lv3S Ps5U^3!]>s>o?wwOY?O<-\98W|zUvjZ +WS""`Oc8lV-a bvlVMӤE `>{]6M[dZ|__>=-zxwW .'a;LV9_E@)^TU5d2YO˗tZ-cw1~?ONN_g}W^^ݜؾ3}hs_/D  R*ƨY4@Y `z:PȠB4!FM)e6Deڶ%L]//6ÐVGJKYɦig_|փ:G?xG8c̐BeiB#+e=9v7^g')ҕRFm8 dӤi DMw\.]6 RAva$:;f9I(J. 5諳7bT)5=059g[q1;BȔ-U.͉RJ 53 QfPz KJJ)Aܥ8kj֧ԊRrI !X69&^oE{E-h.pl,D@DNo2pVf_ UJiSH–:D$c\0ܥ$mCD` @(Hd* 2äiRETJ)%+fƔI T1v$E)Evb !R TB 59r) t%ZUp\q2$!Ta`ROBtSR= }‚T hm|J*&4q:tfkT9eN]FRE~T&Wŋ]'4tޛt:ڇw:~89WpaXJ4)ܞ]}ť8]ڷf̜^D$s*4Ţ\*$w ZҨ$Qt^l/Q"xՌZJYhjYϪ0u5W19!ߘz 6u-H)'Pi!(g*Lsΐ _ptb^$ s k9 +\đِ:uQF*,jf^K3LRkTbk66RޡIJLvt:l6i꺞-tJI鐽;ITtw6AaA-Gw9ֺ~_N۽:BMF$l6lzW#ї}'/B~'^^|lZ-^R^y޿OO?~z]{U?3I)Q1g'9^Jd=EfdLu`-'̝A4{E%)j*Ը\M>'GӶ!t:stq~3Zut:=y20*z7͒@0o7)_W]׽2O&>ɢQ: cVJ;7qm{xQwĺHlEJuN@HmlVM K|w*lۘ6X {rUc,L&5%jO a rΣ1%|1%43'.Zk$U!uSp)Ƙ37A%g8bLAZ5s*I b:I6뚡X 9Ƙ6C"s4$9dsR 8 Jƍ9e#!J1\&,G?k%hR"&B(#"@s+e>c)jb);), 3HM9#%E2il4֢*!)<*ELZoX8M88%Bve* dF+)EoY #9Œ@ EptV4=HA.˜QSʡ"纮T&JہB!nv7/s7(0 cU=u 7@ ӛd2{ Gͽ{zLvq~{xx.3֭Hm+իWD4oiR]M:8v[s|޽ӻo}駗W_}~+$X6j亮λ΋~2d8ǧッ[8 )Vlp9:ӳ띻9NkU_UE KR)"~,U>yHӗ߿/QϾx6LZjf'ɢz/`]f)<{_E}mm^EwNiv ՝;# j/Ο8y]5].E?{|gg/_~s|x+1Yݽ{urfa N/g?y}}?vxTL"fuػdr7vbK)c Sι:d2m *6,d x/BXch>5=e̫l86uJ)傈Nç?uxO7Nɴ !KRI)W8eOD•N:NY %)SLR33lF9J)9*(IVw3D\8X!W$s"dH-Un۶;; 4f&qq}FcTqߍ8bBuN,FϚB󬔒oQL$ܣØ}d)u.b&E1Rƛ *)ZH)eا ep0h)njU)Ӡ6>w> 5GID4$. JVȊIk!g(B 1`k_QDð !pR%s',# )pLr /R9˅3).D]2F"(EcRȜ2H,JiQpҢJY D!VJ D4 PUr#^T(<Fd"؄*V1l-:BHR2;#RJHI\ OT9̕! kQ1Cj^l48cLyfdWLJ uuz: F"w߿eJ)aa[_Ka7)ɝi&r^xJBڙZ"\oZ^^|y ;UզҷSc9imRJBGᇏ{_>̙YQ@idz鮵z:U* : T&ƨZ]XvG"!CvPJ);͌AGR) i5hs4tF0s)8\#BDK+ 4%\ 6qNR7E̥ 9Wc1V)%jv!tc `9j6Ri$|1mv:cyyu]slf}/R:LUt/-!Fqժ_ZCD,IZDj9<<1M33t;R3ԓa&RB%:ևj!i 5VΖZf/ TW_=z4A?̿YFL.~Uh5*ZTݳYn^Fr+ zzwyL)v 46/_vѢkrPfG{v0NOG@._ʺڮV0S"m8]nn+9/E/?.6"i]Zʌ,s4fB̴Lz}7c/_T{Z c7>,{Óp{B6/_翨zss~qq'>|\N/ggO?:q_*Lmݣd2.=)o,ͧ_ޮonsG_{j9Zk15HiυSr2TI1FᢲEFt:G$I*kDA,1X3o+<,HIHv+)%fdfv/GA%b",&!&!i3@NFD(HBKyw}ڴtܥm{RRU}wgUU͎ke*lz9Ç#61f'0n])>[jM/'ϚDZ'''ՌވYpaNjŶH=^xutttc!DmB8rM]qRJtu49JcʀC.o#/oiW^?A5؏le]L!Jf+T.NuiKUu*+LpBW~D`7 [iڶe9O>yc~z;M0 ܗzP;=^NU jݝ׍\-;˔ҝ<1.ormuho/ʞ'mκ;yZUiA<)]Tݞ7gwwI34MPJdb熬`?u\zrslSl-b!p);‘v&R9LzmKQbӊw{'yϟᣏ>;U O>g/̓=>=Ow̓kz P?{IQ;oƣ@N.GO/>99n~^n_r|yt݆]11&rLc@@F1fFK9g&&UFca% 1M^rϪ)D9 )mT#;9{NeF$m'?O>wpZ7WmcÛ}Lh@I)!K ĄNMӐ5YK!LV3;5i@ɴI)%''L!L&7mk 3ZNO&X,c]b$s8QV:e}ʇ$ aqYX"d9; b1U{ju,%dRhu]Xў]3kms }}Tu-e6aP@JB[ѕl0e횏7鴞.BL $#n(>V#2~҂T>'T*R "v qU* ϲDk2.Huчi*I0kU 'd2s"+n\RJH?F!T8:_Y{IR vMJIHΘFD|_Wu]qBsX+EyRa]|-ҪZkTlrRJQmn\|quֶtr)en֛@S0߷}uPڐ4ƈmpttLRX[M;U@:I}[CФZ#2C.n6i9W]?$ԴnP|^n ONtѿn0^79.=Zn/^xtti]+˥aY2m R]89Y.NŸgǓ 91'IO?K#,s?\}Gg%vc d!(Z=ezR]JfٔR&𠩛ybc8΅n~?1]ox}Y߿_ksu~9jo#g;?O}Yxym[cn~)Z>T:Mڌѣw*\ɟlszz./};oY0 "WDM%]D[uUX!1gPK*ijK)mP3c71vbP)߈# !cZURJt*, !&,tZT_mO~T@\q&+c | }Ppt19v#1xR☻)M !ƽ߉V5)?>Ν;!fZ0j2D4Xbp':8,D)EVB*aCdfҺ0qD&'fh{sRZkr uCWilb(Zw`gU\UUPf0V3Y Kֻ| 'J4Y G#%! 4iCRh) :%fP"Im*J)*i# IEĐRJF܍TwhK@!T 8DBs,P &PRƒP"KGD9t)gfCMVypQSRĪ.#=t:22w. f\SJYf֒B9febr*f42$ :t\b"dG$"k&T m"cL*2Ȧ,K! IRo¾PJGJh%ǤCZxN.3+B5Ȱ;ڶ*-NĶdI{t~ڶga/O1 q   B\UU88 M$;bRJraݝmكg9,~nW'|>7E ! U*ׯo?۞^o/锸87]RJ*0 Cv~-L_vUțRy?8թRnJϟ @Nibu~{fR||t}}O8\珷ɿ*x:\ӧ-"V\.˗c#\-(ȵtMz4n8n;j_9]v~t_.a||7/>˟ӯS{|W_V(^F75z&gޕ\Wٓ]~x'ſO'޹;h]vAq]۶:Fޕ&ZH\)^cQ̬9B&]2F z:6e5 ,hZ+{px!Q*bl_>{qrgqrݿ_hoRJTXU Em~1X#A!h-mS1vُq1_M@[1dy0_o篝s5&^޹sEu}9A͵q/Z6Rf>^^<}z9 \dm 9Q,D ij(F5bJ)G.3*(ocBZ @G cKAJ d23@T*8]r 2+DTZݙ^+:eRPJ)@ MR6e@,9 DbBD53d*Ph QT.h1PPJURE@*H`ZC0bJ0"DaA)BDIUuC@Xb) S9$R ( ! 3KyM1u]J|y7qn֥䜱U1`N'&;EY)%46B19g6b %*D̅11n-ӭBQ)54R5 J)2R^cLdV)ؖY+!bBUUQ.i?X~| ^Jao+-yinnW*-][!;۴isC3_* s0EN}ڏ%QJIvsݤaZÀ KB&0[wwrV3J)wpKE*l0!ͬB$-|J!~_8 ijb}cB E}.1gc=ZkD ! MQH`@U'&Ynq*a^:"F%P Ǭ2y|N)л>|Ue~yX AzطGRVDYuܗOn}^/nn? wr 6N :*TA ltӹP1mx_4Z}܋/nͯ<<<~^C/~өδ=yfJY*wBDiI)']ߨI9dbPlvb(鏫;V{۽ \fifã"իڷ6vU10sv*:ȅ a mvoi5}{}}=nUW^z}u}?| f6GppppygNh·onW!zn֕/' [0FfO].6&A0]v+Ķ=<9YLZuϹqbm_]ŋ|o}ޚi$sI%30b)CilEHG[),0LJfk+W0 S(!2BC[>yY=1!`p|0׻ݮMɇxv]>?[% N9aiؖ_ŸڪRr)"B,EG,% u ɊDlb N^ 0Ĵa%H6Ib1v%jR (TN1UI䛠2'@$F mmJsP%Y!0E+$XY!xqiR2 fS1h62VJʆtAc 7RJ<aJJZi,̓Fw]D1lP`U`.!"qZͰqdb 2**J6Z VB~(9 !br8J7x)rEds6*%\.,~}"#ZC$≈Md9@Ĉ)<{j!e%IXB .8` S$*dl" Mڜ_F 7kH^S)v-v:"Șjf]S5SZ(Ht6nwƘP|ފ)fǑ4)2\f:z_w]X[ r.H$Pwb9x||{9W-\__nAB1NQFPR襪E0EvF g뫵JqVVږbuq1Ql [Tc I>6jw=Zm6S4Ʊ2 RRVEi]Fs{{|kbkC Q\mUճj^w_нa^m[ N ]UUǓZ b܌ۛҶ,XYS̋G)5u6#EdtfoJh6(y 7fк^_BQJli]ݓy\V흙Qޘ??mRJ 뜳Hk~ӫ̻;wu9>Rm4ΟΗZܝ;T岐GĒqCd//Xi=Vnq.R,=^oJ)-]n.DG`3H1*tbUU%qD3U;w?Cߛ.ϯA膱w/>޿~rå}s'?gQݙE7ڶ}!b]}9guJ2[`f |yܹѝ/ձJǓ;I۶JeD,F(wK)єR.I(B822G_IY0un)r@*bdgQ{`H2TrTKΙQbtT4ZeЗRPI:DaPR EF,~֪<p<|y~~b\}kfs>iK;Vq΄CaBf)NCҕDyՇqLT c U?$ؠUV#Z(`T F}f3H #}Kǿ{RJ>Z~~sWck(ιi.#WUuH, E8~+CF=qQX4tnZY(88ÀnHݨѓOɛ1tiG>s:cd"$KJS9s vK+5a-@1x!H,*n.+>}tד}ږvv.%,,l~HVJ٘T)esD<t}}=ݓӳ>'ꊞ^c~W_ ץ eAV-*Ha xMy2<\_CAsG"PF="#IPJrmiP5_r"өPK~{bވo;w֣>8?q5MG?~pgOiBb{߰꓃?3ŋ>quuտޤLyJ۾(d){%M49g&"hsu1QJS$="d8c3EIНdZo6zӘSu/ɝwnǿկvW}9zA۶Qz,FX"LI&vJU1 YOŽ~w#עF H'I l"bQFoK[ۿy:CVR؅9bJ:~;;":4Pw)*l۳t`ԄQe$]@a:ywzAma1,PLQRZWʽ)it(Rf@Rʛn⻧0WW|~ً]o[U)%T51T^Q1:- 1pz8"Q煛7\bR EF4z^sܽ>99 6MnY{>&8&dG9M 1ww1ЮbCZLZ+fUd>nۏޗRDDDPM["…Rw6gMT4}z8::FkmuR>y9 SZL7ڔR !~G=|*'-BBI:nFJiY}M#EΪGC~9,ݘsm8UeݦmilwFjLjy=Kն93Ia6]Bn7o2ZzjZiW5C)eQntun׆co;Т{U]89.'naQa>sWnooS(q{frUR _9 J7tB" h Pazk 3 !ݾ FA4SHҞkUӷNv\t܌w?zw/~㏾}W_=~ӧOs?O~W~Ͽ[? c1Z ճw޹?8ov777w퍉60 e[Z ̄X )[!a 1P#*Q  SJ1EP2=ygz\Uճ/˰Ջj:wMl|ힼ|stzXNۋv{oN޻w]NsF΃*r]<*!l%JIB9U%OޱHaS>)Vz䃷ݦt1N6IBAtv?я._XRJOOܹiMuo~w?g9eݐھBI7Msrtifkzxۏ?~q_.IuaK)l6wi!"H % RJk) JwTU4cڔ0"d,dQh㽯EI1eYe,1Ly V,rXFqth IZK$fz +d:,ZkIiݧ4 N:g5U%T @RJ, K/H,VZJ颮Od%qPC@=tZ=im3#3g+Rg̣< &Hc,8lTBT$BZs.qiMJ 8A -3ˊTB?8dUzBE( / 1Z͕b)BG=f)Z," VxJ)WB.޶jN $CX̘R[C SP9'9C.Q)%KmS{r#a.sI.ifu7WnivƜq9L:F M)ilv>؅!mf+B20:*EMZ;rM si0<mP,EpKZkJ)Y;WŒ$^?9)0b%*RgDx~cLOMȻzj }iu-uV"zL6">v\_x?9TW^=P/Bw:;"$5Zf.dAA)92MQP8 UEID&e)eĪRBJ82b 6K.w?[[=x(d?ş6+ݻwo=D1‘#%531(CD@N,*l#,J %cj)U7Ο.W{t|牋%@[dVf]8.7f=ȓd55Qk!HDLU2%Ddy~ eA*SR*ĐDfRS@؅`7({`dADLQ@D!FM1McEJ~ xŊPǴ_bPilJIs9R 1&_UƓ(R%])`cNH t֦nv1M3:HJP.<`T ; ) 92Řb(uMD^rBJ)diˮړ6t&Z~{敿λ;DD~ȣ,1r2@JhIF v?|Ti-\]^byݘV!MߙTzۄETܭZ%'JQ`pjH$c +U!dupƮGRR|, ( 2¼ܳ4Dq8b@Fp{ǟ>HӯΞ|'O?U-srmۂnCQ (!*\)bZ-QBADL$"PR62!h'9os Q{_|r}5U/>Z0,TNNOONNtǨ !t3oۂLMh!oשۄ3nbkkXk1N+pɱhe)%6Ĥ $.4RK)k$=l.JiPX8*qmp]9cR %Y!)}*.T_,%$ VaڶRrDR1 9 %{6(ث΁2g "Hq&EK̬`0) RlJE!Uɱ/ܳ~w8`RdGE Q U)1g9(Nɇ(C'Ja<7JSR3! @hIqA)$  $iU!PJvZ)0Χ,҅"E)0Fl#bwoٶ:[nOo]"A"HHTbIP,KC~Ɏ0#*%GrŒ( Ld̼y3oOvW3 `#{>ر;vĚknM&#*-sJI9R 4$Jyq#"DBfO0E"BIRA~>] SZF` L{^ l yuctPu]/&*3ٌq:L8'=˲Zi2B.GGGEJj&_9ډF ^ݺ|T詓QV~y4}}uIL5e >](D5!xt[~Pd*yoӬi6zݘ虍X{iZa./=RS!ĖJJh4z5,˽jƍOB0 aթAqq|̎\{&5MSJ}y_E1!Y2{*˲}JPܚ[=k,&/ ynwbZsc+q+g"{}rP7ܵ9]GhZX]-v 4"88DlVԷ^9X, F 3k]EbMӜ֖j[FD-W jn'nĔo^μ1l;G%bff}9_?Y7zՇrٻ% 2q;xi;XL]r >io9#{)g*i۶sZkU:ƔDZ {II)툌b) k R,mPUJG9p.)a?^wアz>G٬Wgvvv|sڦ1/3RvV)4{:8hĈ> 6DԌ@ 綪hPŝ;u'>{tr}[k}sϿ_{N&Ô8צb.r &YVy1ֻRdbuo}H>6@YU=a]562HHM>61y!njsw2Ƹ DJ)%22Rb 5+SEbrR}=\XB DXhb̅6]Qz(#"%j\V)yhf*G !&L9)\ۖ3icTR;$y#:ZER:FBAIᣈ*d%A1"[h4v(N+ B3Y67b}usBdJRJ*$"٠ SbNHD3c}:/Pt\kk3C$UA@Qdcɛ" X2 RIQ$=;t2MJ))$糾1f1"HCm\k[e+%)[cRF̜sR$qҹ+X%A\uR Ufu >1鼾^M܌1&ڥPJR\_;ll%xնodlqZzߎ1R2K\7k(:tR5w]sx!D[1s\5MK$OoFwJUB1TlҸ,jZm&{&u{ 6QJD(*EF=is y $B'x~$LBKRuTZ-@Xq+%źE7iPzCPr}aƠ._KL? ˭sN KDڅN^V 7d!DSP@nz!E~5Y;Zt!֭|+;;;7ևu_ݺl*ړfUux K]@ui(!ѝʕ}|\R>r,Lcm޼vpo|z2*2,lj[0_Anof5]rǥpcgj:q[Zgn޼qJ...zrTeDD9`H)bc0Td1Abӝv!DjnnnTjaW.zҴs?lFV vb]YF(dY m WE4jZNtEQT]*Yl5 !}UUREHYULJ.;msovϾtudž&ڿ앫wCDփ7[z\Ƌi|~փ{|'۸{MOCln~e)/. ;_eZ f'< s\$".JϘC lC jK|"\$IU9yu`feVԺk}jVu-iYDJ)FBMtu(`ɱ*%*]k%[!$R@U9ƄH.64 {!8)AIl;kJ0DF@Ȑ(Cw6G X{1!i II"x$h peY1ZbuLDdђtt1F_=)G%TI2DzR+QG)u֖Unc"qڮ;XVkTs(c0$LiԒШRH!%:m{ŏf7ܹB &3vdJ>FQbb &P }獢`0JiUm69HK/j,Dr@MrI A2>̡a]OBRԦD8Ԥ!RLJ)z'vZk*$FbHF IuY emS r$cؒ$i!( NuJ"yn!p!1_ļiV벷=kghe}ARJ$Y5qٜܵG{{{ܸ|ի W|֑qU;b׶-\Ս[QVz۴Q13cEWfF|t]xSJ %mG݇ͪ&1m oj՞#`βl{.[JQaDpO'2^)PmR=<[ً4kRJq9.Vf|lF-'wƘ&?spuAfRz|h +4FHmʪ$c:˲cgg.{pQc,7J9mmtzrit)^,@jڮi:\!)b,l{s[>_y_GJkI~>%q(ƫZE1=_>լo^9۶ˎ;{V4`4^RDΝ7=nءPDSzf !B:IbrmocEqCnZAp۵"$W7j:=?R]Z(!TAJXj7š ]uXn0myO>Yf*KÌv޽{soÏ?bw x˛WzOwg7_ʫ7n{?KNvvm ʓ(U2,o[BL Z@LfFa |~??=Ń?~k ^o#n/ wwfX-Ra/d\^RZ/Zk97[\uv HP&-^v[9 $P5QV T*$qB( (j|6:iDAx"x>ᐨam9ղ8a" 2DeYĈS28j٥HPBȸ!RJ_JLD̡mC$PH̬%;S JYhY)r)s ʁ*BR BJ$sRDF)w'vooo6*zA kwN!" ,"s c4:~&JbX*a}&EgMY J)cʵ(Nl(}`[@>pJIC.$cN %cVRJD1y' L Ϥl {uRex B0%׽DV O̘"'\ o\G# Y<9<reNossʮE!Xc1HH !PZVeoH-^/mס}s]e!b駟6 c?ȧ$Hrb7Yu=/. cgELٙ'''poo D"PhmH%mpE-w!Z@$B) *'u/m/>)䍋fvTJps.M)Eoyt<wބ{wܑ9K)e6"#(^ݤzN sJ"$P"ET uuȲcJ&cLH ]!IJ%?x|^x:c/ uZde??_%IFBQdc^* ! JCP@0JJ)b&x*ιQiiM %+%)%kcHNQ8WH 8_VUu> y\"&cm+QZ+t .O,H14)1G00qU0xF`K.ٴD53K)P$\Q% r RJo㔢4%" "CDJ=t(H\NO6FYuܺV$ڢ(ȹv)1z‡@))lJ3Z( 0@,1>^IA,u $M&U®A$LRDjR=@$R,rc ue0Hܔe/N1%sXHJM(BX@?9*IIU]G>2R $u]A@İRr*,Md}DaN +z:N•+/$fQ2#ʲ\vONcJIB@ fMv],HQ4m""*{$=@ GǺML֬["M2CDܵmR1^|>wuxPAzz^7amsj!109Q*>.R2|s?]o}o~+_~ՔN߿NOO7僭kW<+^Z~MZ?Z<me1*ފY[RW/8p}T{si4v.=SE=#B}3kj /Oq\j*6p{#X&(bB$ !ܗx_WDJt55c^[]_J)RTfb˶m S_ƿK|qqqFh޶'75YѣjfvyNvZdk\/+ V8w(@B(us[؝晍, TB- >(XC1֫gmmDnn$uMeW*L n$9i0fMDwPjnدbώImӽ{z{ck|,b>pZ_ݣx6b*J&3γ_QtMj7ee%jF=U08H_}jrz'bT$(Y-,p҄&71FG:BefׂRH{d4>3}g=""3m'O` O?}sJ)Wc N,3wF@K!4S( p!TB5!2sMbrBm*:^><<Ĉت`0b'#]/3_vP6lu4Mtj BEa," L$˾ 6Iˋ9J(^u]Wpjk|ш7ZG c`=(%sb0HAP |ä0\& !J@̌pdBAF@ -;)c$3)C'Q_IIfFEJ$(b~*%"JHD$DԄ$0a+DL::1{px(t$%w"1(1nۇ{Wfr=cI\ъ@$k`k))9H}&Υ!" `@(J@ZEB!eL!X/)NJHg2 I" H]cL%&1 bjJbHLPrvi+N"0QBB &\ULnuU3(A+܌(lhUB`nW1Yy@SmU 3"޺Tʞ̞~qcMʲd{yb\iA !FcQ0 Mb^L"vu]g !P5|wTG)%"QV&l\ιm\LR3bY^|k?sꍋ4},"bFϞ>1w>*RQA> &,$DKK3{WnWOXC_)> ͵f\R{/]???_?KW_g.N>sE4/M\V(PUBI)[w˟̭5z͛~>0a7x\[y]\*'W{1j /`?8EQF9 1v b#D׶y_O.޻s~_{UgBW]l`|y{cw+>l aiŭ]3?$8mݹ[MF5=<m!lJu3E1P3lI[o|;-?վ"0 cPi)¹X@ -:3>._e[)%I Bt]WOzbU"o6۬]к&ZZ 9Z;zͤS>GY{zzt7.n\Zh;7{JCp+r5g`wxNW>xţGR Y]޹n]@vLk=_bUU;=;co17؜X,f46PƩ,; cO)u65ԗ:ZJ)eR)*FV^|X,k^^oyn)~7?xᅥEK/)Yr}p8sp܅b)9rm[*d2Φ|b][J0 "F(GԃQ<}so|bܹ5ܔEq;u]ڶVj4 mSRJQhƨw]7NRxGB8L)zP$RRJy@!1ƨ:&!Ir%|zA$̔BHHSVICBj|ekI`! >x8e! yJqc3@-xZ T2rRSG+e;1 |JJt+FI beC(CIJCF>r )T%H4M0OIRPRuI JAZ.';_'6wўOƞZu,1A`JI0Ru+:LYA&Du*J1ƧwNXYiP+DBR4 2B3fQ,D$ͺ !"(R"@ 瑤@JJ74r%ՁH| $/X 5@ʝYCYneYy{+#V· a9{Zjm0]UUFR/JUyWg96HE]S/ "dZ m$1huQ2$߶mc0Nk͢'Tps,;B\-%eTkY 緺ٵ1|Re~xҼ;@So[W?>x#Ht_="Zm`Q)'[?2m}0< +jA<)+YX %aǟ0,w ?oTϵm{\2GcL-lB)bK)<0{=}}i닟sc(_<ϭm@#4y >==VOJҗt0$&M.30_w_?99k_z_xNfg{zkf)HaZl&p,xB8>G)_}K=Ƀev ]o۸\.ӇGgv^yѥ7n_E2B78o%nbsH) 眍A)eT޶m":c֍B i"({1>|s>?u 3w{jRŬ^iQWUcP$mOd"J6 !sx.Tz=,{mW]fQfYu]kCCD"P J#3gTJ)yK>FFTR&DRR7]3 IZYJq\ !(``8fu]2dRe{5Z'Sy!#"܊$N QD;2f)RBI޵2CJHAG12t:"D5^Jq`C )e'W=6.2MVyWU |>qTJ6{%]f :JAJl-ٽBn>OԪ:od,NNb,? _UwqQwmSfNιT۶G[뺣믿z;۫e/{LDe/_s/_ʅyfoCiB}յL>sw{gK/^jrowo,Srg*0QH+)>Χb-vduҏ*׿~W>]}c\!Vz}ȍ$,`5M~a Ofkl*2tKI2ȲivMݶv^"]/6mgUE$OV1uhvS9GK$GO}KU:\9;;; `c\0I?jy|vQѴ^'R. [[0Ǘ "rww ^9o9!JE1d(1 s%uGG=:q<ܜ[' Sh޸qҫ_mqqC:9/6 677!K$+B(i)2;Ű-7BQ|9͒ycI0۷o?j#DZh ˦;u1ѽ𛛛Ybwx42CD ˄nzS 0,sa}|qq19qW#7")hkYex;-m͈@mjQt1T*uYfH)xmsV睵 `e$6SBf"HVLL16B" h)FR5k)eBI`QpZ{ZKIۼ(QVJ SJC,4hmȤ ,! R]Zv ZnT9z(Ş1]!hwwP?93v] \ RJT Q")VH" 2,,r!҂E(>S($)# aR`W@RC2rJ|+`1&/kI tιeQ0s/%DDDlI1Q,2}XR*C T`n~c߼v?gd<ttu:#9lm(BDR.G3^;??졵Myֻ*Ϟ|+?F)τ&t^^heFIcƬm[)Y32h Zv֧L1FBbcDc<ɑwo 2s"&L~عIѪi!GE?!l{ ܰ]Fxۡb+ W~ P[0]v#W.?x7aY6R#r]JoZ9颾>~fl>?;v9zKWBk}w7~뷾3;>w^r]jqJ Bu]5peYY~ïg^}oۣ .<^.$aYilb9w?^pԗ=)w?zrUf3VcbyUUKsgQbv~_ܼڋJ7ߺיSJ]񉙻Rmwrz;߽=$W~=yVٶ^տ?OK2)k W6`s~Fx77?z/>r{aVdBȗq ^d1A#dލ]U;9ܹݻw a&ٕ뜳hT]+kvO۶mG^IO!km%~EM7J, &q3nm-,uQog|yʞYf~7biw]grhE%C3< |6 R)նmU亃iO{grں9<}px:u]k"G[V"-V =}돾c8(_u׊Q9W{z'R[yq|{:QW^=L13&+R66! $A$9B1 Ӌ.I"D1tUUe,i.O&Ւ:ĘbX"џgF뷺C\nP 6JQg>tR +4=:_%0g:X6Gd6o ]˞䍍As69F“Nj!1Y!ș|"*Ru9MD X6 *gPuy%U=Y1 1s %  "YX1BD !d,`IHԑ%:8DA(1BNR5 եdj)y1+s'\(3 ^g!~?s^ bu!#E,;ebʙھ2Pi&`faCpQ1DBe203G! I)e:6a $ L5K9pk̂$-7`2 V{ko\=ǘfc^6Mŕ.tS{\Z') )unl⩄EBk;X6iP*dMӈKD|;`Ff^YEV7uB Ӻ䒉CòTxΚ6@f 3v.ʤ΁΢&tDP)TY%F|@&EQeYԓLee;;E:lxd{f^ahm۶V=5#kg@h^q{|?{Ջpcu"{*\_}tZqB D<_?D^fuXB^?<>r*w?Gꫯ _??hec4Zk Guޛ4d|}kz:ko[nݲWT}õ;_ŸǴr삞fОGGG[?_??~ᆞrMH^|w?im Z|}jzj:ȪJ¹ik8'O6_.T\\JmE)+1l+*dʅ:yBnlh1l捃:PQúN>ڵk2UdmߟK[G~'~?rZ{mk۪}]~Gvu㪓 sqmhq{{mOYQ^<(hz.LyuZy~seYIX!t7yRgggk MB1pkoڥ(rcPo.?yNQпqnnoogYZi|aG>|Y`Z]NZ#QE ,Ɇ irgC kʴBЫ/?7߼ےJ HAykr).tcΕUn 'ӋO& ]P3sO ,kB>}R!CT$ W@Wu%@n#H# B AR4B ]HA1F2#%qU"exeq^33,ƘdBsJRyBXCB`BBLKZk2 dJ)BH::BhbzDFeY\4]#Y)'$GeT!41n_Di4yWu"#"6BgL%4)%:.Zc]}|r~i?VJS\D0 DH3.w@p9yO Hb.jPʺXڞQә(e2VZ134%D)c>"b咄B F EE?o{Z'->6>CZb1Bq+۸>1k`w}|rr}6ėWc6 m(R7DQ9/Lyi֫/"|7Wz[pg67-("rRBēgYqn-!s|Kw8|/n ` =-˲=|8`{OO^eo|t|55qӨЯvR$ԡgg=΍ׯ~~;tjTfW{>0?;rvU+Uٸ7^ \fe]爀3LrRJJ,|޾_/?L)Bmnww__`?~|W 奟 Ҧow:6._s<}@qsa A4ಓL ;;;n Yٖ*Viyzf1aJi#Gqro)c~eDm!\u׈:M)m]ÖiL͛ub3H j,fm17qcۈ 0]3r]UB?ί^_zqsshVkVuJ [V{^z9V붮xͲ,KhuaLmƍn/..C~Rfi0gYĵݝie9`E?O|nx 7n}kmh!ǥXYtz\ZXw%ol؂0ZIHu Ui- 6ZK 5|Ү,u&V !!dEF:}]{:ZQJ  DbٹYr3fٝl6Kx} I).Q UY%QD90Y4^3.y)-R J)*H]e&wVkг.J"' $zRNE-gg/ jTe. ^u~~~ Պ #Z9r1J""MLmB)Ƙg}oQJ=r$B#J9D d(RbJ)eցs.RIIEAF˘ Y N@èm'RxaDUJIc罏Pz ƘkS#ǠmǷnݺ5NM;*MNy"! sJ̜b>ֶR!ROA $P @"ZxR .'I!Ƙl1qe=y>%Ȑk0e(HCm!b3 `) Z%fP}O)ÞNg|.m;Ƙego\~ΝO˴\*9"veY6ؔ"g]\nw81vbPTR0=DmS&wRɃ*)XkS_T!bdLJ)10S/?CJ ǺTg 纶f}>;޽@=Ky7_,2˲߿mzꫯ/_z^G&ڰgfwV;r698-eﭘA5-m'C exK)MDs4?r8~˗/Y~7_3,¯ʯ<{R;!bcwnk׮ÞZ@z;ņ-s-t_گៜ;_{Ak&^_L/|wMZچ;ǐ&uV5:Y\T;[B{||;clI'䧌93) !jf{'_!+ƕ?Hm'?\goݹsg.={ׯU#n(˯K%h1]Z0nA?HQi w7yio{ݧǣ(MkiPFtk)eZ,..8Dm[*g{6ݸ|| vuݢ·(luq^U0efRԵY)^KagDaU=$Lwg*&R#%_nf?֎77> ?1!mtTcH瓠nyOv΍؎NNN*ήꡒQʃ̓˗Ew'&%a[e2/;vTx7vz㺮wb|߹u떸pxa'=xl6c4ɡFYY 5Uڶs Hz9>$,IBM] В!L;&ͪ,\l69$ V.{GϏBmR3aVGKJǿ/}_gWvEH@s-PyuONNNe42h"p8Xf.%  zx5EU;1'ȉrJ))MD!tN Qp)DRZ^|Z@Ċ ^pA|MM"\@EBlSHѧB5IbdThR%6))e5)"I:9yK)EJ0tM\e)%ˆ$dQw8K/" " - hspd):]iZgg{Fwm7]]%1WR(:bカ2̌c0cfH$1X@bTP̕h",&b`R6u~&eK)B&1NyBAªynMi$DRbMe^1 RH[ 8'IW* /t `1]SϚs/999y8;N'RJ1xt+6\gzԓrY[׶ƯpS!D1(@J)MBB։$)ΒR]BD"ssfFHV.1@JJ_OC? ,$.?% CYOFιo[^ukyex7-D<~=vYxv;gby&oc='Wc D,=xtƌ` R6vJ<9{zvlc¹m%U4Ev=OI+wLµ[*3.a3_+ϬV7W-3QaaYs|xOgFɲ~O[}n߼}9'{ţG8_zma46.&^Xldtk\uQ`RRچ(t[o:Ϳ7MO5Ƕͯ7gXR ;xz ' v~o}g*iM/ggT|mvv;Aꌈ |2im۽A'z5iV=? ` z\.;0:+n>o?y,o4Pjl-7}Zm}ٛ^>5Mg[[[[ιdzU$OW{{.Oí7Ƭ>||:79(%|U=vKV肕.nm/MWY73stx~xK3j;Uwկ|+ÕY,&瓮~o<ʹLsP^{׮; S鴉ї"524mƷ~yaU?9ڧ2kcL2\ DmEQgSlTJbPYQZzzj{;X,f$Q> x<_&M|;Bdz/߮?l*&*b@)qeRE;s".xc7BϔepR2( Uɇ舄 R̴ )cXI٥LRdl91P0JRUh,YKVLHD@HA.cZ p+ 6I!(!jRƥNK3uF+ :4)b}J AzyFe7Q N*emnD!q!jR-0 d!Qh-5e)]bIde^M̓gη܅;O)ypF<2!I HApg\ŐR$!)iQ"5=)hsЂDM8 hҐ_iO0UIP)-h{s(dBGD%$Bz+P!,ff Nm_/n v`]d:D|fJByWJyf@DT~uB"*h9]QTY"! &DH4#9o~P4ʩ!oj766~7_տՔnoݟwygoƦب#y\<x*NJ"q8B4c:}Ej<ުY{ !XΜwl(.}7?w(E5d2{M|:N~~/b 3?OԞ,_ sQ|J;'x<`{V1xs斢{4m@hx`l0tm6Nf'ܹ=ls-oÛ77￯~I>5[n׮\?uַVԡxkB7>y0xկ 懱l?ޓ#@E*& 0? cO?M77,[\,>w\YÓc8vE;Ns\".˲|XyHZǃ`Ȉ7w ߟs]?U9X{eRB+L 1ͽRzh\Sacۧ6\x֕kiySCؘ6@vmxx3{;{SOu!{ɧޓ#fDubEƛ;PzZ^0޾}G!>  zT/QD?|X[yd>~꽽WΖ?<<KϩlrrtxxyEe K7_x̦3HbRTaI)U9eo *a%z_Jt1Zkc@}+=pswa:[w?rԻ֛y/rwwg~WG{ и|߼S>846RmJ`|;_ҝ>_v.&E9ܘǏ_يsΓ;4 o|pgϱZc?,LA5(;q5X?#n{F~7RRJhbhj.χ bOG!7?@u?RA B|>~/sΝ@c\p8jڟU0^/6t:=xVlfSJUW`Ynmm28Z >-ӺsteJqI~hq{~_-3´]\,^&ݻw3/m̡}vGw<,(DENU۶i^ӗ?~:^yo~<UOUPdN^;C\KNSRT 3U2W.]󹘮?y+C=yᬛV2;.RȄ3-lL)-)в*!)h$!R,˲?C%믿~m{xC_WʫgMS8kmb5lnnvvvF<Ϸw;ƷdHagg7^8޻wNJ*U" :Ãёtbsx<P׳R09gm$jN'r F),$"41:X@Rr: \Ђ)DD]ZKHEiV)k&)5˧}LkJ ehZ聥YIc@R@]1֪"YD^Y0!Zl*!WD#bBޱ6a*T'F)rHt:`($ >9DstS6<(uCY)$*9'<=[ɥM66 Ԃi*u)BmFBA!b4p\K!P1 SLѱADN`\4LD& Bjf&'ƔRBh+ PD l&SJhkN$eFQxIL0ELR01 ABc5tLJNKDȟF Fޕr~1n ѵt5rT@ +օZǎ{ `00h>_TU\.L1& Q\ި?x1{Fx ZtoWŮ]p{֭kpB^>ѽ_oo` (>=ѯZ.N)IH 1D32ŕ룫myIZudG"i@Tb-|tap?ۦw/uƒދZ9?>>x8ۻs3sԏ|u>K_۹wG}DMZӽ)գ{4JިncgϿrs)n߾#(E{W׺E.J)79YV1%R2z^I:" 12bm*BNne fSedZPJE'Ty}:z!%g <"RDmv&xk#"HL1,zsI% emP`5{f e"pIFLu$S 12 Vb`а7$7cc*d^L͈Hw MJ)uCbA/8A_ey&6ƴmzDUV@ zbP`;d:(AO JOzS(6h`3D& @\VUfeVf޼ޜ{g{z8E AMkŚcs߯ *`?'S.aEZBhhԔ+K%P D<9?mmm\XӠLX!1:I" K x2UNŊ2+@Dk4*P(ιS)XԀcb7H.BF^+#Τ҄RZSy.S"@l X`Q :*cJW$Й/tkH FFpu ʈ PZ-UqX&$~o7[kϴ֞rBV_!+2Oa4 "fY|izw+ ,{/jmARIVz!1|>F!0 0"0BR!s!_v+zzzNNN"ϊ鋕+ k[onFT>|Q2sy ^_~˹)0SE(+cІfne-D R)Ź~`Yk?bPo糪EQ":ϑKVo/Kou{ŏ>h{(臭Nv+VV}પB)Vŵ'U߫Nڷ?z/W^fүZw65.sƘ0+F&~wH) [Rβ`/t)d/jjxqVߛiokZZ^F7oo_q |?SF|K?C '(@XO|TPΥc&rTByXN.\\_]MBBNkMB _9)N[{ysJW"99=(5[,FVDlen$II<Y2$j6.^~h6;V%SUFw: MuzG*' :c[:k-*X__]ϟ?H0liRK5o߱RB$/ғ4M)B4ǪL^sLqSƂJX,E?nwqxv"4X08=RUamw4W~rKiok,x/ߺRrZ||xxz|_B)愀Ŝ 3͙sAk,aBPF4g!'A8<]8`1k{ s =%F Ngkl6V%"'ҪR*+ol7_}aqWE!ˣnܹ5dza]ovl>GSuV8[z)aHGz MBxIXJYQ1+k-EN %9R9Ƙ6{ RJ"s$Q}(m5[4pƃ s U2K*m90"SͽIk3AAl!P b w΂91N[QBRgG 3ޟIj"T(4k=/cF3z3IDATRaԌ%QT n# !<фR{5Xk1P!B,3p:N0 vZҵiꝥӱ6ե0Et1)KɎ1M|X,yX,2Zkkx3[-;A t_ۘL&N?t=llT\TUeFL Vx|M -=TBjM@]ilx j .p6f ;Wb[kЍH]*zغ 5zy8mv7OG]Oʈr5,?˓<`bu}zxss3I²,Ӽ`lUUŪB +jjlD\kr=@ԂJȪ?˲ l r4e( ~1Na+ v#w}(YZUUu0`P 8&$աc򄂱ʢrDOh4% V,2 *]ᰔ._y(1Eltv^ K;[Zc\ZMv|mi1%T:~i4M 1>~iu}8 ==9z "qaR&N* W,K"  RJ{Xs ќRZIܹ!CO{Kyf~B@0Ƭ.sq罵@FYh1i`a{2{;JshhmJ@\$ccdDr(8U1#2@DmKDDCHt(6Wi]BeRAWΗ'spQt$˲+밶qA((gIL=75h bl1JZHs(TZk{kw.'݋+*c C7RL@%"À@D96B-ι64MTԨs.bȲl<묘L& >|xރw{Js[{4}&4^mq]V=]fZo{,ͷoVz 亿T!xgEahY*~^ZZZOnnn:!dM~|5:;;uNr'*lpB7B_Xk|H)1ai Z>*<.lZK)cR4R#ITJR8*Ycnx::KKKRytebBÛQ4?)D9׀0312JD.ͮvniUܬtRvk]ֿ>i)m݆<8sqÇ>8<ܛގdQӋB*RVl/}ryߗ>NTZKѡxY_.]j_+U\Ϯݹ|Pz&ԽVx ^VUE(0Re )E]>{f֣>rh :,**j좘ĂPow@K څEMD8脊g/M,JQ~ eY2hqXQ-JɎ?hҗ/r(#zOMzRsn9`Z[ a8P_~a-7M>+GѨ6FuJE۝ 'vj&`{ Q$I)H8+˒U6m\5%0,&ldL S۟ŽsniARg'Eh(~H<\W^/{= -K](.i}s,o^#<?GĘȢ(]YY}\F]UB/?RҀhD_˿?n~rrrO?G//[3e&R|5UA\((4Tlkk+5 ׯ㳓Il6MښZrI.(8!y^c #\z1Ly['E~8,3S'pb}3,B0k;s!JvvvT5:ɏ5q_s `&&V9b^ڨǃώ;a;niӳKKKDcY0֒ E>sҘ_uXt~+WM;~.}(0C9T <~… gggkgϞ}/#3+^ӱ2@6[cyV )%w2J<Ͳ(ʸ +L Qi!d2=f@A|3H )* @[@YtfYB"wIUe:f@sE , J`Lёx,}ꫯ:[]]]TsOs4a(8˲NxX,l0"L*WycRjheq"dTR)s: իWo蝳3G?܃Fe4M떗eY4yos$qQu&P+(@9! I' S < A|Errz#QQ""@Q]XsNl>ڂUٽ͋uּ0hjKM@LE[^"b)8 {"RB(4W9)G{:e3X`(mAhmUrNk]y!Dȹsj]02C, PJ"H1*ZUc8{_0 |6O@J.w~ŋZx<|Ud>]'_ſbv=z-Y8 !|e/Oіqk7 ,Gĸ&Z38rх֞s+Cq-B$ZE@@0bHyD{6)sSu䣳Go3gft{h<><gg{W\q+qQJ+ؿ7VI2`܏H{vv85c$ZZ- pޘc{,M~7 sKeh+T Nȳz9z^7d#SdciGqhX51ֹ}on:|=ݝN`2ׇ4Xv ҍ7oe\k3Lb~xpp['f')2IZj6*t+DZrAj*kfnt0ϞK)AH.ރӹg9_+uzެgvkT򅺩3}4MiiXewJQDcX╕pcYFW cMUrz*rj&( )c e%RT+zۯ]t:g4/ΪZXZOE"]YBt6$"N<k6s5)Rg#GQ41u?Sqx?\@jz+w޸vNӹxhu/ 3鼰9& (勽_<}N94GO!s/}jMf×QY R2ILg(m|ExkmUD4"^aqwO>MiEFP y-ZT1rz}mR{3d=}nRcc潗Je!(%_X<La\"hA[:c5k 0Px8'O?tknf "I2KR9D(*ZQE9"G?(@c  `J)O!r9IxMּc(j8~f6Rqr/Xkp,23޹*'h2FaVBasN)G.]+yq[t"sF)=ZcN^kM)ZSSh̔&/qhrcB`w`+%t 1%cX4+s=peeDĐp<'* T1A$Jxk=x" !?m>ٹzaSbι*2ҐsX y{\ NɵgPʍC=q , *Μe@|H@[KZ˝s AN³ְ<1gNA델DKu6xAɯA|w p/3BrV׋"nnnF`)i?~auWcʦi*"R^c)Yj/!$<$I.ׯI)eu~6Jkbkoo !/hg7n޼yaso.hEݭu+Td7~lv%KEQͥ=[vLYFQ8FX.eP V]c]EYcv[fC.M),KƨuNE7^]pō{oX+[XgY֍{KKKW־կww~˿pY>>>>? !bfW\ >裋muӢ(>K_ZZZ޿#.Y򺔲wkΝ;lomic׾go}%Ho/]5j˛:11viUܪ'C0 eYگجC:Rʙ7hV+nnʲVB %krkN*ڕ~v||<׆Z$3 qKUN"719voggo\m4CXխ`YmXKZ*PJI 7}ghz}aBpIGu{iyy Bk׮5כYI"c a{GGGHOӥG?/W֊H*+cԑ1꺨w:8-+oF^+l1óɈ1\_sclޟvS*B84%uv:l]uVYDJz3)98'!U:]F#P:IE%sc G+p͈ A5i*i I1c9m*(Ja:EcU`pP;Vo~_/eQ]ΒfRʆ*XX4i,!UUi7ƸGDj,hJ&D#v;ϩ'k$n3DŽ֚ RV' JƔ1Z;#-ǀLs,K.K"c՜bNK!ѪB͝b@Ƭ{{?Q tB 9"PYyJp'8KsJ)Х9J)UGfPXҳZ'0`jAXc 5\@2omi$ZqBcV{ [d=sxx ADFh 1= тS"֌ 4R  a-mX*]z <Z{3;'HY˸pH KBX4 {œsyF{B" ׈H]zO1f A\9HΙgf+3{O^~r|B?1s-)0H-,GŽ+ׯ]t)胔2j6WMx~>u.Nc m9diƠXU4gl6\( q0k aB/ڜ))u J[ kns9޳7ϟ?_ld~aĝhȍ[iOo\7.M' 2|Lӱ=}ΖjypzzZl:N?zKo'c!DעV1tio&q=|>ox2Wn\N,[r׫rGO^aZׯjL@W+S 0h`-O^)Dptg/>A- >CD@0 0^?mR4iD,,:KyTti7T>y:S017b06k-] M? }L.Y'xiA•syܪHY/NC-9(@BzN$X,想9ouoީ\,`t0كg!$ݗ8Q2)a( WcST< mR{r8f-^|s%'իWFL9׃l8,Z[^}GqQ~?0X6ej[RԪXs*0 Wazkeee%h|>?<{gEָ6ftM&'''a(˲<=q|jD|%s?[zK+;Wo++La *svEFh2?eAYLuZ !dbBCj 7jry|NGwE.n{V}йvu讔2]fhvVFdr.lʍ֞x.G@QR{i;iմ Z?y^Gr4͕4A%:GFʑɂ8 =HN,J$XcRUt2LZ'ɘXFxLejWIg`Ӯo_zԡ.laq,9|1w|8~Gzd 'bh4LJ͉!6[4MtMsn#;sJיb+q̂ɮPt8v:z[ZDD<fX% !tE8 CZ-A$yxlYKv,D$HfrΏ}uwBQ !1&@HP[:G̗SUYo144c5VL'5T!9[,IC;F;v߽o/~? 3FM!p5@݈yyYeғ,MS J @ "Zp0\E,J9DkE090R0c)A:k,T1SJ)ITkAQ zl?L&1C[Q)ߐf*X'anZ2?,v}\ڕ. R!@KI)ԃxA|=T=y/vbq0 HӜ1|guӼ$l&Z3j^t:QҘ k0~9p ~tzGGGR5Wd1)r >je9*X^^nli:^hVꅫGGGტhA:N=---߼ڍ E Hhspo7ג$INEt89::z^z:Gei +80ʲtFQK#P3G9.u[ьg{O`2dchΦaٰ0 XƘ/42щ$It,50p.@R!2Eī8OfK)D i&ۈhAJ)y䤔g#߸᫧;'yߝEQc $aBHS"kRʒ.t\nD!B(JqZ֚}AQds!jrN(:ˑI=!Dc |RUVȭ'r Oc<| <?+Y.R[k7BN(QNh3 <4%Ej YXQZkz% E$Q*ȩ:K!\A9GSEq$<8PChTeJ?O>@*k'hwsDDceR= cN Z{cq;([)ee$0xBZ{kOcD4BCc-RP0'~ 5 62@m fRY !/=:7&cg0磬+)@ |r| ]<=x ?8˲ Ͽ{TJ]W>ϑD_k͟`po|sp;;;_~ %}|avnwv\fw,ˢzf&IET`.f㖡VUbjPJ4[p8l& ڏG[IL.>e/N]JNt:ŋKќ~n666=GO $OT?bӗb!#v:3X,|X>X GO?NeeX p#T{DaW%`|QSpyA8)eMń[E=;vGܔt/ O%Ð4JF,x:9Ɇ{ǹ w"ÙRw {󚏈%K ( 8I+qkm7n';;;W Ȍlnvi+ll+ΰ8>~p=pvqx*㽙v\X___6WZRVU{ㅧ!=1V\G9)m5Ǣ(\q"NLWR&l8?F4#6/cF4#NMfS┩V7t9!D*d֚hPr4P:BJi@:3 ”2fQ6s9ʲtY:<8Ɲ_>~[!bIպ-·x|fq-kWy"*";睧rf5"0m 1DOhc(sTHtUZDy-Z -5(T^ci &TW:ljuQι;B(e53!!dM,ofA,Jy_%;2!њpzTr:-pέ/$Ao@$*pSV:gy Js h(pXUJNޜ{OQI*N8yoEw9>DM޺ukfNBr1 U)$t49Eq*KL!81ޢCR眴JiCфhs.`9LjZf`}S.9׊1&rggҞyBFo@_ ! k*QȎkw!3D\~!0tggC?n_ye0_?x/דW~78ᣥ)w"f,WN9zpwJ'L;YUh#{3&ׯrS'ٓ{{Zdwt7˗W% PP4B˧Yb_,޾{n1@;*4􉮌1Pp9ެ$ |zr ֺH嵠<gt}$ vM`pZ^^ BH sXh $ )RHF&iE!yY!ǸjŧE|WrԃqXQ%Qt^rU=ץKHE1!܃ @Z9wΕ#"u :jMEaQi "f* Bc  #ޘ Ֆr뜳ZkƘU oty4RDD 'AsZ#RJ=GK/%PF1!!NS2wbZU!CB *dGq.7s^Sz_zs.$cK`Zk4E+J9f{s[]|ߞ΄#(B Ger.Z4>++IX3mVMܵwށ׷ycs΋2V.4/wgnnm_]YY38e!8q4YȝW\CQ{aW};W`:ch4P(¨^06ɢ$zf/(&IUUJo>Ө54-<%,W0d;Frttt7|]z-7{x /_.:}؈IV_߼xtttz{ңGv/_|ew֭wx[>8:Ѽl6M,l`1D|F$嵕VuݷZs& ؞mM,lcpGf߽ 9MꙝCJ I͵VH;þ-h z {ɸwG/T8NSLgKۯ6zɜQ& !h>N^cjGgYkө Ŭѐ=<<,% XqӸ &l`K6-& n&&\kmE软PzU*j|8'/| $u:Θ ,A9Z?gW[z/ݻq d+(L*D0[pgspq'O,hE)*F)hE81$J9`fH*"?6k#9-c*O82Q=pZkbJm9rh1JU(HtZA aysm,#:tPQFCZU'{.H4()ђZk(kѕRsۋyc9B%OpJ!y´2J(NR/He4#\[pBp"ymD+BC@ p[RhBg Ķ4eH\Q`*-!BO eY:"(c * (N#'.YY&@>rZj1r0y-#Ze "r3T0O eY`trrRk1Rwwdbdm@Ķn l0`7t4>YZk*!^e8H].P.RJ]-//qZhRIXDuuionncUh4:q[^NݖQ]-%V1#+9DR\a`KiM& Tk]jW˲ho0.Ug5RY"Eo|tK/tR0_tJc_ ˛w.g?:=99ivzgӁnX&=Wsvpr 3WFM0;BՃW0=f&NRt'Fhu}~ޢsgݭs7oqظ+.]3_ۂ՝AOw/}Ճལkdoo{_c{:3qbb)T6;k:FRRJ|,_dR89IeHΝ;Ym9㨚=y6RЁdEʲ,|:>zj "$+knՊ[R7޼ROZe GqO̠ r Z&Iy8 ð 5mYXg/><@o_,x!++]]ʍc X{yppǾ˰ D޾}{htpԅM(jɀ!)%rq3HF`x2q!4s c.rr믿]ltӛ>8EDR te597,H56s|?^Nivw~$Lo\u^DJzX[UQ F9;<zyu~vww잾ʩRڭGI/6~zD<ubzW{wZUUumͧ',[?oOϊtXkڐQq%(<\qAU݋kx>ٸ+Cה1^˻ɾj29X77fM1,ݹpsj4ZvdvY5DL ! ,a)/MeBc7ayW ~ţL֐?@AFecqWx󛯽j5?x68cS8Yyǧpe}N&gfs'Yezݪ^L1tzשh͋(ixPZ7IbRzo| |x4 ]c&I"w&/Է]Zn +?Ree2q䂪$I2"uO1iz[v~#?_}??}KW-](w.y.f% ̽ѿƯn{i(68Vs nJXrԁUNW\<޽{#VSs6?^y1hL֐o^^+9vnܸzk֭[n}5.gVX)P<|Qɣё=4MΖ\vˍlvZfv:lÒ(g=㞔fskݕx}}~?> PLx?vl}{pxx/fŋ' *Y(.F+?TzjE>3~ X4kϞ=CRݶ2; ųdˇ-e"^REL0J A2c˩D uNA?N&NE]>j1+$AEB[fGǏqFQ ͐R$I.8ٔw.:7mu!A7 eZmQF4Q(/JӪPX#nnufP1DQK9KUU:`($&d{KG_f~?*VGҙ{"@*0괁UhHSg4!CtFD,K ! e 1_TDxRQHWJ1+]/3(q<9 9W¡DV)EYgHD8?̦[v@N8M9sqgK?>|IqkBǟ`>'!ƶwOofZoWbyyEy4i(|$ R) q1Xڪ/cbϪt> ӧgOt8"^}s~*ɩ:\OiJ5M 6{~ZK,nn/|ы&thdd DkIeyʫ w鸶L4zW[N4ni0K;jWgׇojkkk/COwѨ[wޝN…nw>}рS$dfMAMS)eQʖ"TF󽘄=6Tkd2$dߊԎ/B,z6{ZV'ogg h As2pӃ^LgϞG9!ŻO?*7wʲd&2lt;Xs14"(QA.ZW\?,.oYj+F%+ YLH lD|>~7~/ʲ\;!!%͛&[.7777ˍf9Gh?|s UƘ?99W2#*I_䥥a|jj 6-(08kJwgl 5q[@zN`008 \yMN~>ʲ7L txyy9jaQ\U1ey 9w7|2Y5XA$%QeK6F2 /۰Fˆ-eff"YEVkʬ囧Ӎ=,$Ο88{ko0,Y^ƛ mOer!U~wx/D6'ǧ~9oWdld # sH:r&PS+Jb,Śqq΢@, ޗQa{TyWBh sD BsF(Q#V:28Gi+1D!4 08FDZ? [S e+d @$wP9")eB,!VLgH1F} 1`x|R!{',1ZT1F $OtpJR!:Ƹ">0-ko8E7V Q BNxmdDηg;,nk}JB!uh5O%'81dXS=P@'EQ \ _ޛ46 ;rŴ̛?t|1O6{WB$}߯$I/_fp.Pb^eROOO#s|||:Y$IE9'"i փei>"'Fx&rxy̔R.K)% DD8dڙW9纑z2$Ig;xavIn#$%ZȒdd2ɇ;iJI"'I\[__׫^)2-3s ϜsX(DN1 f b u^ÃdƘXnZ)ފ1eYUO1&F!(2z[Ub ӚsS"Z7o?_/sk]ߺyk5DT$&0!dUg-$"C%0Niιz!#c,\H 9AHY 2/y1 DT$K/s߿1Byxo<o[;S\tz\(Ti,E)FYS6Z|pS>f/Fl\SSM&3q*Ic~Z! ##MAtD$$ӼlZ QU׏6'2,fUE%b<ͅ4^[۵FF$WwCw{k'c8evݩm /\imOGQݭq))qNf3F+OKgYX^W_K?'|f7OV\cU3@D{ṋ s.x:Mz^Rl6WW o|mA@U#4cpQl< !e9trs+_$S{;[{[|;˲t4 眨0~ч?/1oZW͍W~鼻wzzZʓ~1?>>0ƨtjw%)zX;YaKZ~w/=k='{ngYXc6Oc~<D>+|VoA>I')]r׿9gMeoo*luR~9yp>Î1JJjbKdr,ê r!D+iyL6\LIV HY1$D4:DCc@\6vgfS%iW b3ĕb@p|ZwDU<Nᜏq9: ̆^ֽh4`>6us5Kgl@i)R Z1g|2TJck :Ɵ3g?8}Wk]O|Eel'1n ^%s/$G`uH .8UD R1! % $ uRf 0!Qjb*{OtΩJQ g;kɈ^Hb!@QrBxFDc@⨜w"G@ÚY[#D4x8|fqL xHYD !-9A!A,#aGH(6s !#E D1KƘx|l{qr{1gY<ʄ2+. 4޴N x1jcIhb \.ZB'kH^FH,!,6$6@ahaJ2pA֑[R0(Ξ{?'z_9x .|<|u|HE𫧗?[n?bTfI`JJUUe*8,XXmv;񝛫/x"‚nH EXX8zz(yQ6L n/=1zyc b$P"&mȟBSw_۝틝g5ǏVUUu6EnU62ΝͲ2b +?T4_; 9 ]3cVd|}cnηvNӭ˗ϟ?3gڂ ? 2 9G7>տܾ}_wK{Z# < < cA 5 <*t*Yv4 .\hxGQ/9ˈY0{Qy$<0D2jiN̸@*H,2zX91FbԄ@` !RFx9oTCȳvQwo+ڴϮO+?)+ <ç=?$˱1fsG2ZV)U ;ӻw?_~ueMx73 ^L)UV_*luZ \"mR@S:M5…?}~Y× ">O^ W]U%8mWB`{2k.]52 -p/_i>c F`AYBن_A Hy|?u_2epcM'^FQĭ4MCPLG+++YX,yQ" f|l>tONN;{ţx2FwƘq{A7M{{{^}/_RgN^eڝ(ɓ6&缽ysA*ۜbdXZ3'b臝F돤 r_3>k_o~++O?/NSqFK!Z5Y){`.Wr^?sg9%܃l_|VksW^_ovF~R$y!|J*h_;{Ͽ/']W>q,a~{ܷsW<]fGW_7׺.]Z^6Po_޸wTݽrKt9Gmw߾o|dYB\X_J[!$I=h@"ewnl˲\ɤ&0y`eEt\|aN^+4MRs(Nhz4( ,Vԅ^MZ,˒D9gAʸh0hC]t>c)FEαD{ !WaBhk3GyÌXYnIA2TСsu 1r03c,MYU+HDfL0s~4ZLӓ?g>#Zj†ưBD!2E!TBTQEXiQpne$UA ,J.!G ׍55A41B-1W +$K,s#HX k@ItG';'q cZ HV64ιQC){*Ldq DPQHRJ| 11@4͑b@葂  Tu1 /38"LC U<b1 % L0ŘWc,&Y !<ؿS}A\,b$ + RCu.K(\J\Ё h^ȸ":э1nѐL+%H(`" F3Q J[TFD)9\% PD/œuʗeaQ;1ZVҏ1ؓ~߂ W˵5EQl^~zB.ZvXʊ1n6pT,FxoW~esmm-\OZO@X5܋ >`#imO?o'$4}iVWWG]NδW|>Ϟɤ4M/]tp[_x)e jS/ ,Gַѷmr_}x8O{#Z,m$hF$Id^8?J(s){ŋ9w}9sʎ1fhZԌbxt폊"Zݘf{`aI1i۱v.z͕lYx/Iɷ$myK:ռnʲ:,Kzق|@kiBUUѤ?;>#zoߦө$?ʹ&Z͵ܹsϜ9m,DX@^A[/ ScLȉ*_{7w[N硹qΝ;/?}LD4rjbYkk,`,t(cNvu}kzpKܗVN+PVBi.f(X.cQr{{{k6E5O'2joq?u 8Gp8N1N{p/>Ճ,m۵6rl,y ;<x~NocZei1jϼ~ҥ~rn+$@DR1ƄOaANNazcd*g#.ZW6YxΫYe2DVOCX,ݓ3FCDA˦xMq~X~邸xac[7Wϝd.}4Z֫EN%:̧YW_ɖ3B{RS.h`:TTER B@ "b",68VɎظvŽ#uYksnay,›hW<0ϲ̶,ZMwoV hl)9ʧiʬ" Xk5l^N,bER^{wuNrRt>F냹1&iJ%?`MĢ dqd;|ПR&W^_y _{h4rD|~\VU5y>Lfp8[e1l6,|dzOnrgv߸O i,bCjzR!=!"@>٪X]SGtkw~r~ݏU[٬|\@P]bC\.WB2 4my''޸v𓛚ٷGxT #|t|>SL&X8\6eYj8sDl6+++!9^Cmnܸq/^㓓٘1Sk@?Nru] V+iKϼo͋Y w.WXeB,87908!x)eZ_tkيHD4_zu|[ou}7y4}|2sSL&^Z-a#ԣr|tt4:VU9'Q P=kvi7f(ޢ:k}iKS Kh;+B47ZLJ@ʃ+W\i 1I,eaj`!UQl҇b1Jˊjd]X7cO 7#,ON9wYs#.8%D(.:Ƙd9G !cV(ё E0RXkSbd-uHg@1s3Ι 垤T*,Y31FA*RV!ȀtefޔV?svumLxVs.Ű F!d2K$^1j biB@)@wumR,z"!D jε>.3{ʲL!Xֺ,7u"X2`) #E! hp8;e]ιBhFSKYE ONN\tim۳S/{g{x~9˽ [ooowQʙW/˿`&X.`8 *GŅ+[gaD 80O>X\o'0d:ݻ~00tqfspo;/ oN\}~޽Ok6VVV>q+3kkk*2MSq2R:lZ1ƘKl{jݸ[[[1>V>v[yKuիW^_d;6BR<ϣBιf٪꫘!B`$ZשּׂUb\CVƦ벜Ib]spK_O-4m^/x6MJ:gaCeY6{jf %_zu·n:n qCO[_W FQC؇Gqƍ!/YѐpE]t,eC,|Ybcd3T^p. 9!ZN#%cD$iVWά$}l6Q''"F!D)%E ["ܦIUU{b[kn4ϭ4{kmC$Z$Ir"ZCWk蕝3|?>ZcNNNO]O "0ƜZ-%6&㛷\rWz n߾ZO_9f?c:Td*d? O6ŀ`EttKGjldZ,Yq{q# CS{|PDmE%JgI4 !Dʲw|Ѱc eYGMro;ZYY!>'̬?~wBQMTl5ek5z^Y5+1-Fm10bEEUF#Ed˲y< !֍Fb5 %}ÜrjE1R Zu%༁¢6R.C<ĺXV-eL+9/`JE$:cM!T < 0s1F^ - BUU1cE}W^H_܌x"&O6cCEB@n @5OC& GH M$ 1gIɸceB4Ed2c@ѕA 'Nk픬MyGDD$ xpgxR1mcjFι9%"j4Q !4(`D.9ƍ7u̙F͛ōs^W`r B ` `h?5O.=\YG@ h0@ b<*Ezܧ|aRkv/՗.mxz7;`4]Ov_6l:R^&Ik_Z\, ؽ/^:?67KDIԙnu[~n:粔EuZUUU#fnD\.C !~Q*Pr .Q$ !%*β^z2Ȅm5Ϟ=~QJtvww@={]?r;'AX,wY Gdq|h4|0Zr<>>vEO?j}}}לk%DeͲ,Znڊt.sپ,N]rl2 ooW׷/k|n?WF"¶mj @@ϞfB|q29z<<\Z3v6mڗͶR.K)%*"vTs a6 bGᰪZO~;zj55psQYUvݴJ)[k T`~>f{u||əGTK5'tֽpirAfyrي>8x댗RRL)X g&ej2JaB>{4M'#(kND@6lb֊s["\Miiv֒V%MX+VUʺ,o/'1Uιlc@DRjN[>M&U4 zXcQŝb",s!٪FyllQyuZe%"TlfK9䣸:3u`TPu{s|wl;~2\^'ʇo`ڿ*||Cu%[ggO]n͇arޟ|a ,<4z'tz& i8U>I۝"06m!4@E|!,wWVV I:/E*p7xc VVVX{CkiFҼּ&Af;|'Uk_t^zz`4oU'1ƥD\D4:YZ)l^ںb&4.Ǐf`rc[EQ,t4&#cL ̳a>L2Y.]Q5,,c_)AԱKk+6ׅx<%"Nڱ1ZU>d:( &T= valժ1S>?p֭K?srlݹw~qƍuwa:r%cSD5UT^JeiXcs_pu,KjߖkkNS)UԹt/gtttT=%c8%ssRk X3ƚ2͂Τ鸡8evd2=ytygyyZnz^smzk=:Bh4 " 9VIp6YFE:uQ[YYj0`\ɜFtmmBn%D!c0(rνt rVuyr|8N'2cȑ1ƸPB*|+UZsSɑizg';o̥CR ,Lj uC,s<#艳˜GD0hm9@ Q3 S)eZd\ FGV#i4LDeBR inƥtB,,EQ9 I2s !(L8rZ^ !bH 0`5<0 RH[{EKOczT=5!SDL,q. MDC@DKJQQMǪ1?QuI6!G)"19 A#B+Xh(e1H,86pH ld&FbSɜAj)dBT\@PZƠ*gN#sd}zf\r]$I9"kmt^c̺r$qpkkۑ4-." F˫W}@PƘ!mL3k\z 3S'1q33+-Xi?UFI@(c37t~_PrttNgDk}ʝ摩u]OB_T;f s^m֠PnwU]`0y4{;+;+ !ü{@D{Y,lwxLw;O?T4+7vvvNK̹sj^u⢂x#߽zqgϞs×6Kέ[>;c֢n?\ߝݚ.Y|`I#*್E5Mw{tN:rz5-ey̙okWZkb-itTƤ7?sڵS=![5Gͻw{=\w|Xwbb1nyXӃwAlPi*Hi̋Ѵ.<'EG4Lty6/2X_뭍j¦zV7N{|VuF~ɏ:N~t|<Wem%zY_szBZs,=?ypx{fw[&cC1&RSFь͋/έ^pa::pΑНNguRJޤl6ܾ'tQ0Ƙ1m,rc)c.ZB6%MXIDOW.6fuYU-sflUfNn:WF(D@( vn~:Ls ueQA$ʸR VړL߻p6iנ|aNknZNwyY$a^v *k1UU-aQ" zLRqNXRHl~<EEI B MZ4r_Ay}(qM-bRhygp@t` 'Q*I!CfCK/te:iWI;d(C>2)5)'V"28)%" D#! ^1jox12P(CĒ TVpMʘra@BC AȚOz\D *KM#ar9CR)EnLYdKT'RJufey%+;.TbdO$vuiCpRyn&$ <Ƙe9WDDD4mRs7Vx͔ /9<+ X"rN^~MJ)dP\.]ԟ/\Ȳ%nUU݇}>s|*&ͮNS0T1{2逯Ol4ԋdV)"YIuXN< 5st|1seQuپWVV/|^믿εڋw=\.^vgر=ys/mguBE]^h6+?Ϸ_|xx޳>q?_᧿3{/O|-l߻{3(ýfg}cc=uVf3_|Pݣ`z… _so^:)aHfv{"(vM.z%i4N+cgD. $a""REY)$(LJk.INs%Eh|g8J::BP\n ښ6Cb]3c.TeEGSdl:J#1* M)S!#cJr9cE]p\.@KGDž1(1yRJWBăg?q"ܼytZkII1ZCV"BC7X͹4b,#3! "{n'<Wa~Fv)6.>:+WHzaAW^ӓO~"?ӖW?~ûGoy'eY6kiGo!b{/=T7ŷodzz޳q5\9Nl<1;Ο[=9/pw~~s=cc&Xv]Zk[/߽kr_쏦iQyy:i~Odjf$_}K{~s׼lrzzz<;88XyCJfLӴ!<{sf;eY~?s3ofOef3F$ʲ\0Lp2<^ǓGN>/'Æ=3_9)Tm7MY9FxXܟ=.G6o޼yC3$IwN]vrI=wogg._׾V+T)˲ebQo}ci4Z!Vz)71&A޷YQ`0DE!2!hAqB$,24Dz:X%g " tZJ(*ʑ>EZ{4-q)1FQH%2[i\Ϊd"شGFQ{Y*#L3.O[,1 O1n 6)%c0P5肭6PJpչ:`ELJsd!0ϧrTUU5F@^pxf%(lJ,$lRj}_kWfc_zι4G^PU Ht +#UP1rbPK.&"U1;@-P@DȸZ+12aHB1R%ybA !pιF .Mqaq]ϛ-1`H1V FR1҆h$3D4,BFke)k9yBMFFڐpxbk"".DT#g^#kMȤL A_/o.>7|L?~23K.+]" 8<<<wP[p:OwUU׶WClVN{Ӱoȟ2 9,{7l6&nh,I_4*-iݵ56H_ؤdw'G}?כN-.OodPe\Vw?6M* >'I=Zjι,VGGGkn'aatb.pαgq\\.aZy^`.g}J89kFc#KWT+\Mm=e<]Lδ,0b]N<{_}ʭG4ޚqpSMQvh>|8(QQ9&ƨ .{D42qs\2ݤxR"@h 9g9u!!b4ҕMӗRvҩ~Fu"xMf-f u׻Nԗe) B_ 3F"ƨvCTѢ%W@r.1Eq}rX,|!:6( nLi*ktB]THI*΋ì![ke)BxyЎXPV$2YFSŢ9/1^=tr9'J,ќc`B"nֺVMk B4!m¡+_Z3ON,&4MSn6"\&(|AȈJj"9@c F.RȕRs#!fQUC$." {UIVU1sHOqXXsxF@%u0!DA',AJ9sNk-}ʸ(tT _3$ -9SfKAC",J8tI(AB(E R!XR2",FK9SeRD|"'00rb Dd'*ϰ" n>z}f>GGGc=NӴ#B 1d* QZr\i#1(bL$7|PcdՒ"O2bpL0 !zkm1&B !Lj>z 5g-LJ 9궾g%)|ͽbYJ)L)yQ{`IY F9p?ԳrT/O0^fZdKgOټB'_S92V,"ֵЇt3BPpyfy|Y'!HLsS!SxAD1@o1h &,z( .W>wxx6-[ g65_1"M}[+T#屨L'%ڍNU4i)-UzRT޾}}\, b6w_kVj˔@у;wN7wzϼNz_8*XC$IaQk+;O֮e\k|OV0{WՂMS, )g͵6|M#uRVO$:ބ;A'I\VWFnS_뽼vco>~ƹs>iOׯ'+uCꚌQl kknoo]ʲ,{/W40dӪQ6Q w~~oZ_&Uh  t'GF9Y+#*! ;[Lyxd| !yjﮭ\i'I"p8l$Ije3y_,yϺ_IkGZ˥ "D43_N! k\F .o~h4JGqs?ߺu+uj .C JfDK(d=(92IJ&"VuٳxnpݐRBR#lX5^J1\02nZnQɽ[}Q!Tvhj RȞVU \$1k DN$Z;˅HJ˺ֱADԪAkX0NS9V`W7Fnt6,K#+c) 1ȐyW"Z X(Zk#CdeJb4]rĶ= 889KBeY{{4c (ZWIHJ Ldb|>M$!V-Hc\)I􂙒`is1b1p"C`Q"o~ey#bgՕCRJ V潳ip Xzt<Ǘ*+TړSgl\:Fbs l{feJ(%VsRJvSʴ5dE,l]|zũ 1bmrq|Kl#>y;t0|mܾɻzoݽ{}s_٫Re0Rpz. '''B;:Ï҉<}{`Ngz9T˒NژL&1c+;u9AA&MCkEf\o76:q}h1UNу:<P gÝbb`bL)|QŲ,sd:Z GG<6d-KT7_S|{cc#ֺͤ6mQV@9畯뺞ɣpEBxKu]Ii'˗/`qnC R d[[[RdY:VǻâMƓ؛Oci!.b!K|Gԭo \Ƹcs$`Y`DR|4Rx<-d}}r{{{)W4ؖLjG)%s;L,aAKV!p7{Ve|2Y 9D<\IӔ;/FeBcd뺎u-UQy[RB|BljNu9wbJbS$Vou]K^Q E|YuUQ4RoLpu]s57DZk!Kx" ҃@fj3ϧWX)$aeYE`)""!1A rVh)%PDe htdЀaBLd1J4 HDY3 c1D+yJLZ8 ¬Nn7R*Q7onŠi ы68g cshH|^;|M"9d| R<crWK4H`{/:b _"c>MCID$BH[Y?PY h4=d@2Ɠ_5kGEfs(%o,csS1ƦRcIUc\s`w V.-dzx1q*WiRygnl16C[V逝Bjh. Hzl),c0*^y2 aVN#J"/w ۻa!܇Kш?o7[l4ܳ;g`1e۹+D*w]Dh3FvZ.JX~a+{޲5IUg%׍xeX׶Ɠ,$Ia9Lj88 󰻜!6V+O=x>S뽟ؒs.m\.[kk>Ţ3cZk" ЧP/q1vxO]Z9,~(UU מsz<|>Wa>O:1+i2 ;]l6yzm1ƛfX{;}NG40I*SRY6Yoؗ.|a."eQ" Ӥ@@49}B嵾fxM0ܮ.末1T?ޘfӧO]}s-ˣgOsz..JDf$TBUBCK ,J)IR*0+FJk Ⱥ<i(:\/߰EQog*)MWܹ) l?.^yAEl8EDXeT >">y'7|Zb׺_rwRtzzf _,rnhRWL}_0Ox(7^9έWo+:>>r%bޚtսO/677_\.sp̫`*pr$uuQk+vuƍ8۷[_K!o}T>)Pm֮M\O;B`R97ttt$˵YalrlEoM[_+w^L&ob#Y>1ߟO/'?M}/[W^U\xgZUUpu]A"y1G\f?VyަtH)eNcݗ4HomNS511Ƙ|>+Gwֹ?;;Kٙ:׼QUܣd%e4{o2Lrl۶Zˬ<[a}U3 >~*": 3Kf"Ҩ`"cLXIyf4Y,E 9ŽCحnYDz s0r֭/b_ܼ}ڵ =yۿ|oaEBĄم<9?8¤UUe7K G)%3;r]vxg-E&5޽JY=>>~O֪* !9EFCMb(.Ed2xV)#֫m!s07Ʉ5) 7ԸX/4jp\1slh Ch|>9W'R*J)rd^㪀 Ks](,Ku"dT3N̙s5)) h\=ΊNDJbJf֒%7*r_|H|5V1!e(AF}`Qh*!+BZ2.$aU!FAGs"B s 5{PaLY8ש[>rLbq]s>5@pf^uqN"HI'3j!U@bsF$ ˜@c YCr]Y)AN91ijt([iS$!GĔdR=̆ :89_K&0so r!ztUVfz`a8l~ﭷ7o,ߟEa ;C*]@kũW9geL1M$fB 46O\8chIW2/O.ڿYw9^?ywolmxB|٣O^ȣS]=)_5?bثHY3# c(Xj?k/֕Ej B߮G?o>ڿzCŀ@ A".j{MpzڬV?w?7~Uʽm^Y`LNuc{b`8 !j}^2A)% ƆFu]-":W+[};zk|kTj/!(UZCݛ6~٘UϰG)zkN%atx=>E?yعqɗt}yku]XetJz}pz|rqqўt=??=NaΫ.;{;+tZlדɤ5s.u4otqvN?;e槏s<7/[ԒTʣj IQD+f,(]ixRo rNz< tRnFA]vrrDfٳŅܹs=z`s+ǏTUZUO۟f.R<;;rvMw.QUUBDDH`0}MX.; i4>7ӟ]/W}(# j,$6! CYwvڪ6Ȧb9eO:>O&**CiUK,CzYr !87IE4>p*2 ($[4Msf٬n21'fp"bMQ䘕z++(!\:c2 \#G&MI@3R4 Q~uzq=ztqB91Bt9*r T}-ETQ%(tA QŌ蛢(heZqvhmS.8)x"* ,۶%ND!F/4)@R:u]ʥ%wJv͑z6}'DP% ǘliZg}/{fZkAhA^MӔR@I P5  )"-Q6Bd DxR)E:16'~YNȈ`HXƐTΙ HKƘR} (~fn]al~zn~Y>  NF+hMSk@R<#twcT!T0@s $7o^׶rޟamPޮ15M~U˲ ]JBHK౴Zq"} _h[glFbO"˅!nɖ̬ޗ%^ D3(]aBnNEl֐-˲]纮 loOc `m^|u}kz5dhRJH)1U/pg1ư{V=S8=qk;vv6(bWJ*B" EQ:2=]%m.6gUg]jQq%!olmUh|~/xl-t 츸~QI:99Qn֪1p׎q5,].x|~Yucdook/_v=_< K"7^5DH,w.fKEQka;9z5<EbirK\>ݖ}xq㭯|7~ڵk'N\UUx^/pbqy_mӃmjYYkǨ.ףr&L¯_|u.{=]9E||ZO=sEGgk#aR2JD8+X*+gdd]RJsj.("J\cTն/bT "Stέ ~֥Kck-89ۡ+Ox>>f8N+u}sz 9 ZE^*ƘU88;8[ϏڋuJi-[6OFT:'[ډ)T)GTtzQ`ƨK_g-@ā1ZTQۗ&u]֖ -*IR|BS@Yj&!Vuř EQ\2@5|>0d9`aKTRUU" b9 i Qc9!.Y"3e*ut/Rx̛;oNI$9g .PٷZJ@!Ac58[g]qrEQj1$y6XD+[B")+D"]q@F!v`Y1ZkX)Ѡ?k27tRlUI6%*)MLB"gjsιW21 "Q JY{%XԠ\VUEb BywJ- 2""1Աo|Q{4HD"d }QF1Ƈ51pHTUVMiK[HuUU-Գ) TKw?~w~|9ʈX2ٖ" fEcTQk$ B Q&BЦ! KϜOju.m~|}^V[S5۶8mq;RJuι.TڒW67qd2T,~#U7dfŅ2TqT}o֞Z BSJCghܨa"DZƥ+"Nsn\/u_}y9( mi&3?bloƅjk~1n^҂ "g`Y1uP))3WS?qrxGۼn;[>/F}e"s]:hum-5fiv)pWr/^|:?^ /Wק騜 b]l*I)ehp/^ϟ?/md{~}~ic8Ο\I) k_kׯ_y?W̿}pzz>wޥf^3,˲ڛVU,;6^Jݿ76|^{o}ʕ'IUUGA|<z~~޷Gl!J(h0L/go^nF? ~n*<_\\tMX, "RJ6|{Ci$p,@rDD( e]%vLj:0GUYS2w^:(\Qzl~( )V1ƴ2{leSC1Wlͭ˿휣!?yzX/?~Y=~b1:QEmOsR(kUN9 ct)F5 @o&1ƀ)u&3{An۳iJ; J1 [% 1ActdaFE%!D}52S41("VTqj 眍V ~0t6޿w_DVːY+R:!km6FkS(. HRh"s-b -TW1 ha(G+" hm !@iX)S1 YksNJHRehiv뉾lmXpJ D(ug`A܆u"꺆)5, J6qmD锒 ~U4.bHZbDH%zΨ Ir& FA FPZD<c:tsfbJ{+ko4YlN;O6}7ѽI<"6%$`6YOV03`L6BDfN`('=B+"kcJĆ 8yѺB!˨nƎ>JJt0kl%dTw΅NM]q2˲ŀ11 銾|ʪ:Ly2z{a]Qy{c;DDSKVݴJ{<4, bޖ}7S.ٳg]LJQB8lۢO~/KWwn@ 9S`5E"̄B Q*Td5ٻ;ҥK|ߖ1Y_l0 w(ƘPkt:V򬼧6K"rf|o}zr3O!ُs7۶e N{ڵ./7N&9%51Y~0<}ߏrT S6ҙmOv%bDؙ~ _ onշnwSJ1&crgxQ@&9̡ga]ڶ D䤛=8_U'Oc?,s^VHIKק_ښ^ UU7׿r+4,a燇#] \; FÇѽ6lNwΉ`ZBX:X ⦌ 3mɥKq1UfK`mSO_s.]g*q3ZM3Z.M!S,VN=~/lDCVe5h ",)EC3"X{6 lau爨3a*$ԺT/C-&XHEd)YtƘiD Pm4ƗYmӇVW)%E^AZu)%tUQPJZ'YlCB"$Q \_>CDѮu6:q’ )W9@>$ IJ,ժ( U6hũa)"Ƙ2a dRJNE",OJ4(Wc桫I9?hBBn}W?yS㭵a4("dJ.PSM*XQItRa I )sf.![^R g A,)"&1sQQa$) ΌY)"bv1"XeSH9ֆA$I$Ev謔 =`^ƸvXTZ3!,;Gnc\.צUJ%̬BZʙ({ ^"t9@ZH]0 A,&eY$![L9|><Fa۶C}{?Ј&[kU#z89PHW%.bPFD1ګv*,ݨrNRąXUci1wu]&Bey 954(*pw'Mg4 ({oyY"$ rN xҌmgϞm EU]atS֞{q̌)b~uYK_(kPOd1|/s~?y/+Zk<8]Vƕ+ڨ XwL'.O/67ǧ㋦i'nNӓg=ztpZuu~޽Lqc{*bP},bZ9k~׾o`__駟BD2f4\,lliƈ E'pSJZzHƘ^z{ Ol鴙}r8&9仿zl?~l6 //..VmtigCEU`0ss!ZUO&i bK 3yЖR F Ԩ2||zTB܀d"tџkkנ~ks9%VJyfZ4NF)tvJRJF)LJ3瓮su]!5"nN `1[Oϟ|'HJ[k]RMy6(Zbo4Lq=NGsnc 5ckv&+j8N_999EW`0X4=;K M#Xҵ;o5N:꺾;L&œ&ctrν9炒RJ*%ZYFHD^l*DL*al>K8[sn\ٜsԠ)XZ!XkKb0#褃O yUU6j=xt8Ays|r>==XY^ׁ+tVP@ZiZA޷dUHj&L)ҥrgTG~`UΙ!5Vj1X@н*uRD@Ld Ŭ3w-3 %T2BDF 6k*FgA Zш8X:^J@z2V[W0 h1Za% 2FDUl U5 pԆ68D.9zq ) ZH 9SUH,yTsl|uιrY ZJ&G9z~Am֝]>;,N#Y(J3gɎ:-q _j ViJD43(eJ)SI QkJz"ʛWܾ}썚iasH1Ft\uznsD766+fBQ9צoE ;}B)"3圻3c j8 U̚.@"r6e)v0c>ᄈrY5( TUh̜ dRA2n/|D~|o$> 2sV!eR>̈jvK8N1?1ok'ۉsq= }g/\,€4u*J6qI)9;I)+YD(2d&.n =S&SJF}6a52F۶NWp>^99p8SnNGf{{F4 :^8Ewg ߪ pն*\v\Οhf=_f3Zjázuzʕɍi?u=3H)-1sV!b{w$b&1h1[gjPY{_j(cD F9cNHS"r;<;LR?'9gkn_/QZDǫy8z2t8:],B? IkC+"U6FJ̬2( cutJ)zHec$d]QUvTm[H8**e%)3 yN+P}0I r| ڶw^VDlDH6zcL%E[!LS UFRZ4ijbͨH_銍9;.U@ cľ0Kfccb</ǵL)-"9Xkf霋ab\J\\ IG.fAiA[#c,5"RYOOEu6"tΥZH C.g 5 zX0)c %@Dd]R^n\_6^Ͳ(* hӈ0E}TeY뒈@\uR9(LV(2DT'B芆b9ɥ*RJ."j#A`frs"cL*P@̬YE`0q0(",_7g/N?fZbV'bP ˜@"p",e ;ƵEmGz1 HF\JJU9Z. &Jd`swE* ”keV/秧W8)txG/w6Նڜ|s[v4ݨpBWEgkmHD 2 bQm\uu??rr6]zviYIGsu]{wo>{E\UmT+VoftgEQ'|;4[bP9PmbX ^}m+?f޿Z2ymɢ,0缙vQ׺,KSt: +v09/"pf}X#wxxqq_ +"W{`eؐ+9 mmD^WFD"ىͽ&ja U,9'D'VUQ }\! "h#"*GB ]Jb@$"#)lhzTyb.+F0Ie-el;@m$uQ"y,)Ƙ<)*;QGTi.ҜR߼a>hTכDbc-@Jb67Ƥ 1FAA]rdD>Z;xr^G 10d "2p kmJDO( UL 3UM}m̨(!$ )Ml +_imkl5HJbJ)l+KʖQtzsɆD(8k`ȣ s{aR0(csʬP)10JI@TD{B.jssJ AlvgRʯ^ԎZhs@ZiƎQH$gJ6Nkm˺m[Y)eDmt-K) h$d#d. YIm@XaRH) iSQgfwr1NN9rۺ9hɄUVsGZɠm>IaRRBYDYhZ fIJ@ I2MFD:ե iTT!kE$g(y!"xy1Ɣ ;r0X@Fs*K:iKSUa<[$J(Lb A*m)RGejl?~6dzGKTQtvU6BI{"rhbt"]RJMoNʨ&R7wܙ7˪_'O7vvvtجiY׋g)ۯ^ |OD:9+ZW={VdR#ڪJD 1]fW-ec$2B Ƙe4.g-1)AEkQQ!g1 (h@䜁κZJ 0fgXdF_R*޹yr9ڂ4X<8V,A– /c9Bx.&}:LW! ([8^HR#D$&J j971 @E>k Zcn~x|p

S˜{צ_700O&q:_{*{k4,?ꞮOҶϟ s Qc3D$vUQٿ JZP*mH0PaBYԮpSTJ aKdI1&˄7c.eY&fffYkuix>D4`f!&f"zx1qzկ-X{oZZ[iιQkfFSJC*b܎u%LADZj 9*I6"ZRD)Z ȗQպ d/O;.|z"-\I>z(tmF^v1R纮,|tzٳbyzzFjf&!Y)a#21M秧rs rJYnoo\${ rK "*H 5uɨsΘ 21zE39_ש އ""12L5E"%"j{ubBКh43w)23*bQJA VuZKfv$9g ґ#q2Q R.ut}G/SgȈڼYd EBP黁{QE.k0̑:䜕@iR1ATi(sd"Ӊf"(E I[d*yQ;,qɀ2Cxsͬ5(\R=!fDD*`JC]QșTXJRzJeD=8$Ւ3 t]*ْшN>eUAKTHQ,}Τ=(}m]L.VY9b*9 @E+j_+rWp$nPJ-Y[1FPXKcLm)g 0D-`$VJmWEgY$#JL2`\o憐닯<_/kkkh4$ܵ^{ƨ 3+R-1"2>7M.V@*gYtD$m5zdK/m(2JڡdH)_D̤ DDS̚Ѣd(4I ?S^ZG_+Go2ư뽜tVS~ ZD2DW ɋ4X_iEnҪ)6p#k*\"I+\,R#SHF9~gp0N&x<FTD׮]iuhwZUpx!"k38gfYD=|ѳ­CR`s\1sb^Bgtj\~yXâ*c̥K{"/Ȩhʊ$.ǻs/j?sW׾DWg'ߩUzoL8sng`pv'?Iϟ /. WOoso7Vy|zzGS@ kx> "D97hl@[CD$IE(luUkmbUVEc49.O.O<ߟMܠ,b8WUJ1$е*c>|Xk1'E$n2Tc^}?~[}ߗſO9p{[[[QsFTY"CaX荍.RJmgg?ַo^zkK),%&Ժ[*SJ}!NkX x<<~p8 Ny?xF/TUm4u64 RD#pj۶V䜹UO'''{ iJ|.Bd#//.!V+vBȽVj!zwwW.ٺE}`!"(PDL,rR(n..*`gAl>Ϙ$;Ce=T ô^,qz8H4h^A#.`뺀ɡ"NyMT7(e$^PNHW$kEj&!ARydfF "͠P%]ʲL9F8Ncuw?~7۞|=-`))rb%6lPkV RToXCDfBQ,b2zX%ڭ+lOE$hM,yݻ*h@)*_FBKدW>s9 hc8"̇l1a SFR} {XHfT@ RDꈝֺARS BaU##!%(8clH8Q$ 5.ۆsBL)%;}ߋI iK}qdSDТ uchPM [svڨzs㎇O> k"%EGNI)TD) +Z2"rI , hOR7z^0"IS:n^ sh ")\vذ t}Ph]`HIcPZ9P̒q&s.kQpYZ[E%h(.M)X:t^O~֗.YݻxusV̬IrȉQH9P,{`w b`cPY03 pj0+"fZ)J+:"]E9qFص4@[󓍍 C%y-GiEX-EDL-eYYt5Z͂z(˲R*%m(P*WZ)/"U唥L` /ID9 jH)6&娵 'ęLι$K(0)(kdtI2ZM]Ur349GRD}v2Bu\(7o޼r:+ꎞ7M3jkڇWJQ8DL('P6 :[k (bP_? T7nR|[ϷΞػ󧵫yɢ>q;0G}'g[ud"@ (ÙRc VZZ@D֘R"mw,sPLtߦtm(dIV1䜓$dԄ/\Q _/' t#^*|ELS]燧-\Hߨ"*Wn v& T_ܥ9N=o{{{^Rjs02ƨM #U5O,~y6=W}8z5ҿxbeb7nܨVp||m[C rlx2mnƥ/V4(H+JͧyV6cb1qo+W677SUݻ÷}#b'nJqCwx}GWĖ*.DDen{B$=^mڍFt2[977M*{{{{˛nݚ\vvv@kEQekZ"@!e.6mхZN9gSQ"kc)J$jR* 5&af.^ j<3IJ,1~~*W͛7ǫ31ƸJ.}ٳ僧O>?v"2޽s߽qΥx}?^Dea戁M&%5Z4WPpRXx )$2XsnR@$1zHqhbH)KI99hѺD%T @ b&hIb A@Y4)x+)ޟ{U}sr#YCÜQaY "ڨ1""RJJάъXC2DĜB( d)3 Dy$cm:J% %#2Jk"H@Dgpo0iOf3"J l"2* ֶArΦ W6 uʪ7br z刨*Pqlk`afuc$E" 925rJ) AVu%p($r3'UD}FLVȲJs34WX$%*$."g@Tsg}ko7!<|K4X1Ӊ9!U( "9sFfY'G$!IB&1&e>+_۷^V}]D ?|b8}Ѕ .>ӧOVg??7<c.D1sd(d ZQh8ڪ;HAc3[""qZkAFDЦW!HZZz ;$rӄ  Txagqxrl<ej B999{\~mۆj^Kr^}w}ܶ컟Jι=3~ݎ7PE`N9{mU>5gDJ;8 캮tNmqi_v7n~+7}k4?2[/N)QZkw~yWx㍅io9==}{? dcO ?vmO gqvgޭ4piT50Jaؘ42ldHSeFw𠪪1t^ѵk׊ۣt: VsN*R9G%>brz6<|@RJ`$b!YFDScHe£hcX5/x\&zuZRP!<=:>]=׷կ^y֖R&,RYVO<9Žw?0k׮}~/M\2F#L]M.oKCUΙ{ ! ŪEQ HJi~2,."mbf^ZAu=\thyP;} ސ$g&"M#"sr ߕ`|#YC\zEa6&WJ'"fI!تm[ShRj2Tt ,7VS߮A΍ b45IZc*ea*=A̽1&hFIt |Lqi(rpRr^H&V*JCBDrLI+$f.)b7bCq`bVιU9gݴ~};|g~Օ%u)`:D(T'B^ \DroHD޹RD$z}N$(Y Z0 Kb͙RfQ:CDPbH쌮_6ت6眡)VDskuԥ c2ԀR%h`ʹ!SY"R .$ o Q^D|or1$+ yE9H3A;R^+Y фJiDy,qڍE $hs$kq"ʊQbu㢼}9vP۔1]yxdҔsU}Z%\@\_O~|qk׮M+ػfONNOg􍝷~vu\ ({2: "IOrǪcX~I#D]c* R!2sVJ)KLkQ>1:I^Z5^1@" tsФ'0sN_w[S~ \@P@0Jw1bbjSJj%}4͏pdהR̢YDTb[c&"UcuyBIJI9vqj"LCiMZgwm^9l%,׆tZ{^u"4 E6C#%FGb%_Rr, qXn6҈0W!$\j>@ d̠vJ"ʈJ)}J Lί9 #/TJq HDVeιCΙ &g[&f>j ]iRɔD1&Zw-HK.DB^2ejKW_^+r!ZiaȔ81Kml6)#FcD:SO.KF`0֘Ufjp~v.1F(6Ӛia6bN=p'#D0saH\kLQ^lDkCP%Bժ\a#DRfld rV{5 59G RƐ 3sP(6)' DFPT#@y4ܽ{\y{<̓_eY  " ˳WCOeY D!Yk+ݼo,xk>G>yIա 1F{my+nljmys[d*|q]d2㼣v.bfD@ 2)֠`Xx'OF)Q4 D%SF) zaf2Ȥ`D/~i!^SJuȈ8n^ ct?Izݮ!k!yN;vz﷧٬zs6\HbPJ7(LD]4y]IQ_˷n Cq+ʲŨ( JDȵmkjon|ӧ[kB{CH̬[sec )\"̏}ʭyw}?;ׇjgg6)D̪6-^ ӧO1_}O^t]XQGruחgf7UU P*QObENJ;wnllll/^7%W߼MP)5y>ܠo^{ΆV#ҺIqY O6oN!^0~m>Jm.i.J, {FD!c8s}3SF5—ajDeTd|y*TUY4M1*,[ʔ??'UUnze_/|a5Ш۶=X}ßY!L_ٻq_Fx:ٹ;NK3`l=]s9+ZqRJ)CdB&*W*p8 l'{N^ګ;PZ4RXvVe4SL2TYUCMrfuyڭ7fYY\xm"bye]%v>ND2ι_*cm\0&L:sjܰ"P֚zQ&ɦāZ9O2u%fN1J !wScUG+1s^@k |ʠSr#(TԶN` (AJ)|蕉kP. $9jˆ2,Z!fiDZ@HH̐{,{=7tٳi`,RdDYH6 -I hhC#˗I9mƧ"""ʐ> /Ώ\(UZ+^H4 1Y%){e0ꌺC]3⸊OB%XLsҤS۪N&sGuJ$YK$hLB]4 Ekn{!p9RI J BZkMGo'E z"Zf6J$%4pf,6 ~{ʤ%gFIf d4[zN'9~9՗n!JcY#af1`<fFDR%U]9}9o'/"`}?kJDVPƱ,)ջ=>7oP]&uM*$XE$"<I{YRQ  XW^V4oo/nmdrh:D3v{!oYYGGOË LY9d ]}g ҿ hCގËyy 8U!u ,(sz /c u.Ij3+ `fتm!MC}IMԩzON{hzv;Υ٥%5n&mDD,Bcr%R-)Qч3=n ٫g}F،}6L {OʲtJVUu~AR*9"2&It.K!̋T\L,;=7;B޾1ynmuW<&۳d>vG4э/UU};ڠhGǏw]\:ws~~~B϶?G'O{4NCܸp:`0XѯI).\8,J/gZG}dmo׮W(R@0t\y6|kD8~?P=$#ce;炋V s"bΒBTJI)BΡҒpE-Ս5 keYJkCd(N`=Y )e7͕Rif(p$e}Iw8řo꯮]_Y]]moǏY_<;;;k׮QUU9uPӉmcԇy eYDT ' bޣs!&2cL&ZT@98rEuɰg/XLD|:).w1Ctp{Ln@DvZeY#km! mJ !,)22`.(6&`ۯڄ̈L)nf6Rζk@"KFĄt8JM"QpKYkhDDRD34Ie 3'1k{<&^C/e32m̶Z.It `F:RL\RVRiy +A9A!$^`vNvMh@0I O% Z)-sNLi`QHLVH ``!PR*IUDD- GCִ_K_{Y\q5[;H)=L0DT1&Y]^uc 5S Č&cf6Z4M& E-h$IDF%ZEPa|u(ORPpmDAx*ݖjPM,RjUVJNqwzLIPf\M@ -HA eAqPe#X;J,-D%cC$I,h4 7`T4I`̨AT-LgZib@)iZc(28Hk )r7q?΋b̋3sUѣGquF"BPFjщR "5m\_ږK59}vBͥ?kuY*e U,Y|%yyemoNb]__r HRvϜ\d%P(۬ڐ.u44TD$li˳@κ΁g( L Y+6R>{9wc\ޜ-Bq5_AD16tSkOtū/,&odq s(5ؚܩ'eYGp8lFk*{ I]יi1\UXv[ݼ>*TEHRaQ1e;Egg<%@>@ק.ga?џ>v]*Ν;;wLv4_𳳳ًO_xkh4~ɳԗ[ރrgt6gqPg]?O<''}7Nb2UO{/f%I(ysǡb>6RJ[^7Jfy4MrԞcGOW_x4N7M3'ʺɜs3V&R?Ν{WWw+++Sxi|irܟ5ľY>6&0ɦHi( 2 S"G}|GsU=5a'e&iD\[$$xfn)Ia@H_xEݲ,?KKV\ ~LG۝v_!]Yj213-v)Z\\fOſ{Sp…Wo\~Ņ7gԹ0aBi|B3:Z'P 40B(\JD*¼0c~z]9̀ 'CҐ$2 eYcX;sn=eٿޒOYZ¬&"NAYu5!`̼_Nι`Mf$NX)դ +йI҄ZeBn D<~qy rf{7^Bp&RA! UU=ט=>8EVp3lOVI4jzZ;h<ӧ_n c 0Cpkl}ȍ˲ܿ;vvvH^wnW)VZ__^ a Lm [;k78;[ Mӹs#%@ED}}c$[1 F>tISvt{d4MӕRj"n7&0I"tDk 1 Bv{U̩~j^ss$xη峳+++wx|9 a4G^xоtK.I][kڽ{.|k[<;wEs~멽( 坝d\dK g`a{yu|Wz9/\x}<̂aJF)Y d4jfvR:˙+E>S!X945RaFbj"U>$tp^ iPif^lkWX?}*cSJ),@8

}||6Ͻ^`~puu,tTy{WcI- smaP_^_wզ?ڽ{/ZƓ4My=PkmІHh 8=@Ppv3zA'%  i\i$eΌ޳bn8K/,?mqԹ׮ykǩBkJW֖fLAJ KwNͯfD`ʕ+wxu]LsJuJɔ h1W˫ܼ|st"4Ƥ3Fkm1hBHigԝ;w\zttԴ0Y՗$cTVWx<f|'/ٱx~8;B֭us̙XַB^=a==MOx_zoϝP)Gg∈nc,ZƔT Lq?~xո7s_y`dWrCo}{fϞ={ss_P0<xMg?+r&q۟VF7_MDroHy睍 7'|?O=xz]n޼_/h\wfi޾}9{/@/}K닲ޯg|߯Ϝ9$;;×^z+<_ZZz @ͻ1^<ޛnoͽY!\81F\Y! Gio@AF:ct$M;I>Uj8dR٠a_{u!De0:I5c`d(cB`mSKY"8gc7'S[i*ʘ @un,B/t0Q,CJb1$51L"jJ0Kٺ,ˢ%`T !tÇ!7n;@ڠGhE!QՍZR  "b]Ft38NSʍW?3nt>ܪt:+KY]OϬ'yJlpr;JRAB/ܼ7C:ow?޾.\p̹{3M&uc_ꫯ}kG0N繘.]X5Nˉȑ@P7~8uD'+#i?xQu:31vBF`!tMʲ܉O6E=ݚB?Ym?}n[GʗRC Isβb[koo\kt](0ss( fs_fY&t-ζδbnWW~e[N_znonn>|w,/;VNgT3c`Uj̙Ç?ޝ</,,ڹ4Mutlqf[7v{O)/\̏rrTn=H{o>-·:uuuxclGkg]myxgccy]ѥO?7JK0{x8syr7GT9)o.ߏO'vxzz5:Ϸ6*0w͝^9\U{ýګfvr7sO Isɓ'l?yO;չooVp lsl?z~4qՓfgggJLYY0|`M6z綶K/J;-) SիWC1IOLF1,VKu!t.9Nmnnoyof9fi^t:=\eS%B" !Ԍ1hCc!DYNb@E94BDAQ)ZU1$ !0`.LRcڒG#E D R:)O줃1O? =6;J)%b$LU]4ΕT*($L&6! SB&4M#]y;6Zk)3BtIF$TAF2TPNCf:a@&L`S۸DKDt`8h"d,Bt$2 hLČ|ԓt %ՈHPBr 1&ÌC!|wqu$IE(h#3eR`f8s(P!I)="&^Ǒl3 Opm?D)%I1!eCHIV?׿w,>KW^ ѨҥK ΪiffɨjI`aod+ u/oW_]y;WV{3~]5)G.\pAzqFGǏuq>PN_ٟ:;nڕ.,E9:w5YcE@4cfډA.MN*(F) !?J LN3neYbf}|Z{p4.F41ƃ|8ޙw3qjgq%$ӝN'\S^iZey9+qAfvjpL =z~|8lˢݹK{o^Qjteqo/|9M;{OBڹ#X;>>~vK/}7.\0z=`0ͅW^bs|1݅o~/|Bk!}.׍tWկ]~E)%/nò ,;>?u8Njro:n&:,<|RNkgk?) ˲stmoew>L?}pkk:o#0{uaT???piO8޺|X-}ttoiz=<^}4?8=9xؔneϞ=6i˃~Ht1oZBs*v ʬ\t׍'II,jkQvhv{iid@Ks//iJ{d]x7¥[9A_??/2I|itL'dQ:vXNeHDlTg#Mj4M#F,R&Rk8ln-9QUU') ڢ݁DzV"6g9щBEmcP8iZ.˲QM_lƘ~ 8x\8'CaÉιZCCtvU^+t:^{ۃ/~ W:I[aM$`#A=?;gRcDy56cksH#uhVw3{}jw̙3K {y^R13nx~_`A0\* ꄋQeʵTy9|`Ǧ `IB hfݮh3"ׯt K^yo\^[[[8=9nk7{˺.3 !E+SIvRJd 煒D4 6ƘεRBJff9*12`TF,YDTs \u4I*>CL(`$J'I];!G_׵weTZk}AeMZ3Z9"b3336y7ދPkmjT[.&TNJj#,1DENKʓZh1DdRڲ( H)0&.tʲ`xBh#ֳ$ Ch)zl F:),"ֵBź21!B|Db<853K I}[JPTʜ,lbR 6̜F֊&8jjR^J29 5F.ZY*(R&$)3kB-BORzQF@4֦0zf?};~S|k$4y{0Q)%Y(k2 D2bBJ)5561E/ *E#vuM y~6)21J|L剧d2z'd)(2g]^J fÌpˆlaB[ɦ4zSKw`O:zRr*gW_{N4&6b#6PI%Y(&aTQ>G7sׯ_?+.,LZ/]?dxל3 :Nftw~~`td ͕ 6y^ 4㪮Yh|88"'` +vҀmX, A>,m;hKWT: ;w~G9ES$&uqp8܃:09j{b89ܼ뎎 gx<OGQQfeYc5d*ieY&5zwTL_z/IHD7e)hF~N7llm5z0v㜓Ze:ggۓVyGZd^}ڵk+ ~ǏՓFB^}&)'{;Xޞ Etpt`p~Fm.Zlwޕ3 ?[[[['/ t?G=/իW`^)5 {@""J8q>s"@DTBsc 1A ˜RV㦱Z}{octV)eu )Xd5dBEJ&Y 5m$E%9%B (d5m&U]d Ȯ:eXfqIi0 %"-E]EI;aQx?wx<%"JclIVU,0@IZY 1:IAPU%Dy5fҖZkhX !Ec^JRR4 (D:U>`Q60NH3)@ DAFNPYA 挙ijT* O'Y$;'Qy,=*4uZ;%ɳRPQ:ԡI2D,z&i߽{{ƫk[{&(d sLy(0IF0JbYFsNa1*ѾvIf$T}% ԥRJh!JlZ@[J~\ζ:se]eȑDB 4<p; pOM"EeJݔX]I'x2K,TZkB )"ؑR:zP9DF1F I&PQ4 QD*?>()u] i4yF"JHi!DЖ L$3PMcTGQ/\|q41!}ܥKk;a֣=# ݑ A&M(8S[5 9^ƯoBg@@2: [KP|ż}Aogo:m~+אָ_W_^x7cmQvxx8y&s}ƃ5XʣZ%@}ހ`pzna>JZ1r8*-+|W*̬mfSo߾jTTd{h?9<<'?N͏ZbΪ -": }X׵s3ngNUeI$ɥHJnrن ͣ\IٜmȪ+i4Ǔ$TU?,˩޹jvajsih;nvs|hZqBz韶`0X';&GgY~pn3w=fY~ao~G󕫧:McaSQxw;wr_h#'e/|o}ɓ'?Nek/s)Z᭖nv Y';]Hݻ?ڿ_Z_<}Ey_sBkԉ:R } 7&BP'`f)2#cD)N Q+RFZ-#s`8|~֝w񭥥Ӆ™k|K˫Ν+sssY@F=Ni].Bpy8&fF45IT9JRJyYVbPi#wƎ->RHDl޷څF$ ,S#&<V-b-0# Y)afJt14,kAed&+rw8:RZXpgDAˆEUb cMD! "1#AtΙ4:suDF ڦޥZCS DThJe1ze4+'-+Pi1N7&( Q5iI6"*YDF!V2CD=%YBTq%3C$6ޕ>XQ̈#Aj1ؠRHDA(3pـ9IQ$0s kB5I AD)Tp })HDR8cR"(`fpbЙ8xtVk_}z֪3Z&CL-sѡ@ЈʐW(1%H* Ƙ\CL'CRJ5lNH6!) 9ȘXS)FfL >TN ޫDEVW:(!iemSH_) Ry 9XTa TO@nI[db6N-)Xu:'&pJ?BYg䅋oYz>d#J/|(X@(ND ߷;|+/u{/^V3"Pȼ* D|D%:&=T->Ǜrq(T9^YYY)pn}8 Z.]qn[ןG޽ؾY>\~4Փxsk㳗'at\t,Nh֕1: / `])P'BY#22gzWUY\r1Mˣf7 õSE]2;;%)%Zkni| xfŬCeg[`5hqcʎ,~Y `A4M2u]ˌ3Uk^}J!L:IRJ)eY-iL=tE4w}#ofݞ~uݦy|5fgo...:fND933sܵ0;;-Nj!lmn`0y0 vvv˗/{`Kn׾qvt7O{w^ xp;&TwWtnNL[VI)-Bd|׿|Д 34_Fs/^RH$fT(c9B0Ȫ5@@S*2$ !'TdeV%&҄܇L`CLiZ{BB z *OCSiL)ADQH4T ˲ j JH(ADB$LBp\I%ED,2s` VEܼlk_>-u{gM[n9r*i8Zb0r!V)"G}^;NYѨG﷎q.|ŹͻI\i͛vUtt[6o|ʹ^X8;;g! T{wtgffnoF>c~ Ԟ`tI˦P(!-c2/*%3e/ `eږRV&c6Aumq_^ ][z(4^H/+v{}&!L2 GOBLˆ7M! WC3 辳ݣ[{Q3LDs 8YYYiUQٙ<L?{lt "⨞X7i~h<?;plh{8OFJMKxsj~4_ɛo?"-DP\z2lͽtR`nڻ8~fm:ڝI1rl#^Toܯ~|c2XY,cV+0=:J_2BT( )TRJQiH) ڞzxpocckkxژ2Ts7~_ZYYvƘ|Y{ým> A) q1FHD"*t1Fbʨq ` AH!kw$UXk!"yO1Ǥ",QJ%O'uYY;N DJfb& 1Qe$U9 6ŌHY)OܵQ)pb H0CT $IP!b7Mӈ3;Ӹ 3gcT^E@JI˳sN1J."%>-˲hdYKkhA+BsJ5L9@DiQ@)*r1ed@b@@$r!bhb(1PdJhҤ*vMeJoT(lb$DH[ӦKH>1I9T];⢴YD'*" "1 6QI>J!B <5y⍐E=mD )Fj-))5I )"1:`>R*JX5"hG$E&ZiJ+$I} bgZ.Vx/O,RFG%Rj-&S8gJJUF2t XTt5~Y yBR$DddTIf$!FZKЙ AT:Szx_en'IJ5P !B6M{J=4"(NkcB: v\\n+g1Vy.:&r4Ƙ~V;1ݑA?cʲnLq1&E6{:^^\>n`et޽>X][+X0 /pesa<@G\mvx<~4;`0l~Sj盥<;8taaAɥe}>hwZ{onX ԓ9$f?w2ga.*f|z~?G\k!N1XʗO/cáss%Q4aA'VuZk+|UU`hY~`i>\nuUUӅpƍ_|qmqu4x8miOވ(Z& ! "J-@"FD$OBE$F Fy 1sBFdb¤ 3xb)E!b)%IvK!҆gYv2KD!ru'(hBJP&i"ze.8hcHnJDNY|F e5iJv .rtZ/ BD!$s,*ƨ}7_<%)%8 BOatV~йsn#ˇG镻iS !k1H"ζx<3Y-|Gq/-Jj:I׎3V!Dd3 "b%d/tbZ^绻Ssxx7^bwn[N$7޺t$ݸ'""' %dZP2Pb"FbPB DER"3BspP)%BPDMpMӠ`<Ɉ`Z'I$$At"" "6h N rlϭ-5h%* X)4^^#`A)U;!8",@1@$Sl&645nʲ,X8:Np HU9=cMOmYXUZ-nD$y$D%XR#"Hyq+NeNklDԒL( ,Be%ND%#QDf4^km$[cu- AYD1J !:HL!Dt#3y0tYIKJ!eRDD]tD'D@F^z:P2D4.IB~ZgY`www/-..t2R) rRq1BNH)O;i<[r4)B'B B\Bifk\̔R.VFNs.DbY dIC(B`RЂU!UlBul9PaBcL: 1ud,(9T !\h $9/FL,33dSDDGKƵF(Trp:J& uG+}'^m9$N"65Ĉ(eWGv荣3KW\9]KZxͧ^;p޶{`"oOLycpjғ;dw/_|_j`2H.oBӀ&G|޽pႊ)82-ff΀gnMxQ(#kZ ĉ /1uVE d5 ͘"^X'ϭ׽ߦͲ,oRWGvөij+iZ)"nMʓ$yw#sYmt*?%Z[x͍ϟy_ypYS%R"z)T($ BJFa2 ӽッ'OBY-Y^>;wak_셅m'1HQs(_j4MOvyHօx0Q`)IBD>i)2֚2B# RJyL'3b #bB ED@DBZ*)%BDFuAݒZ%ID{D1"ol`FX&IH#BeRMaQ}}X`hQuLm^QRB`!"DQ6[n}'?l6˿v[Wdl\/HEU,%Et3q.BCU-O FV{𓏷O?qtk:ܟW;C?av^<16q!?z|}ѓG[g,κ2ik8xn[~Bxo}{W 98oӦqF׬Z Ta}}yŋWpy|ֽb/..>__h,ˉjݯ뵜VU:+)%8o5)4O1ӰG}֫lZG=-3kfWN.>[:gs;G)o6n%xRw5ߘWkjql*͓٣CGѣGϞ?>'qD67ͣD?h?K{{9g!6qeիqߎ6MZ.&W|~]zGDXjAMm[n(0]Vn/>ŋG^;oݼ}ʕ+|>GrffEq,aCN9<33{rQE'.Ty"4ADXIADR|<\k@,hr؊d/"pyb!yT#1#RD= (0"2s-)ZQUkQ`%+ZHU..V[AuGi[m>n=95z&␈.{s j B۶Cͦqaᘥ "0x']+[noqcǟ<^Vvmꦡ {vffVG(Vl :Ċ}U ULHYa(D&Q)D T;rUKA4#WX$'2p*dϕљHH418fB6FI5\HJr̗(18 &X$5MP vUD|@4֕Jzw).>EAXMw]S:J)zr e%h!]=z׾t;ÇZD&'fg"/ޔ1ĩVq1,z P! ÐA'L$m6m ZMRJ!W_O/niv9ז-KԱw*(~d v@3sŚs2vp ,Ys)EQ0fv%5h8IE"ƒK \.M6眀9K[g۾i& Ud)T ]jfT3(]NJ6dvPJ!߲1Ɯ9NhuCy+/}e˫?;/շk-"'<+WONN^ ðxW>|xV4 La:9f7J YSAw޾ugSf'7o5mjsSZk6={_|/#GOowUzx؟9¤|2>w!5`d ?q0 V>V9~{pj]lOcGo^ޮ~sX59'&X@\QU!9e)"0yϟC^{ޓ%_\\؉SUb"ōbv>?|?bG_r/9Wp(THr]m6Gwsw?g~i6ѷmŇ/Sxu7u,.]<-?yW2ÇжORxlnZuC:;/җ-O>|@M4gu #-4،ۼߴf6Qo_yw^:NK8{tqBDc @sbw'ǫ'Ϟ>}*O?a߾GgϞl>*3µkO>:>>{ׯ{/gøޭRhV!dYr9(Es=ի.aNJIط+h=Z*A~2ڲz~GwӇ?:Ν;}wYt00=z1i-N Ӷ "K~24v2=rիͿc`}ؿzx vo40aJASJsu'n0 6}viٜ^?҃ SfWM[,ͭ8f:< !L;=::bw6jOynyo6Yz v==?-/ _^9oomќd>}o}vnN潷~GGG/ln'_^\\؞o6vӲ[>]cn4\tw_.w~PrleD3wIѝm[.,NGOlk_܏aڎ5QDV?zhP={f?zã[{t׺t'se\k9WPJj\f *,@TV)RaYM$̘{Vǎ،%X̊DԪ"RkvΑB@`&5BTrADDDPCՓjI+ae3C`)%2]4 ](Ր&-4LD`@q(i9w!iI㭃o^Ƶ>:~Ƌ۶ dHfV 3@D;9w#|/s~+Ozvvvq6/Bm@Dbc;QEf@UKPDD NQ6(RعHGcDl’ZP.K9dRU*"H .=3{fް'θOPuw&,UתVL;C":cYkժyRG4Ͷ"RkWJ9dAdpάPJ1 2^g#^6)V3I@4AĹ ERHѳOe$"s4Y^]/g?x}~7>j97։]Rs"9+t h{_S#=DPQy.j(A{x6 %{t PLM9Aؑ 5BTGD2&" #C `*U*3:A Rji(֪L+D0 n@mSS )O;nQ"R߻ƍeԐ*" $}R`(hsV=z"?*@WL& kh{qG鿌wK7Y>}qz쏘[~q\FX*q>-hZ|o;<<1 09kx{ëᵉ(%˻ݻ}ۯK_{=}xxp-:\4Mm4M-u@zi`q0YoT\~aqxwBdٽZk|B _|xFs~~]}sv]8fapC"bڤaЛ=[G׿_{w6m[pǎ&e I֏O?;>=?gܧMB]u|}/8/v믾+_(r^ke0կ[W~~}s8Gx:?|^.>3 Z)#}6[#c!KVޤ UڦY[O?_ݻkW'M؃O&f%1|};GAbf#[}}Og;߽7_8<;;;{gϟ?O>̼w8{[ytʕ+oӦif!r̒waTq*s1\WBceP3S”G.DFq.*[R"@.iH)dދZΙj5j@8t"RA@.e h3m۪"04h(.fRbeDlb RAAJΥBZ;3ED,ee e1"\ZkաvO}x /dɵS1 fFU RJɥm[+k\( `;bV`f` HTiDmK8ǩ@ιL/ɓLrH*8'!%#ajLPJ)*uG Rlλʫks{R 2B}v7j`eG"" ˮYC)DHdմ8 ;"(Nցd8LIsdq+ftvs__˯}?p: C ke˩'!* \8dq8* b4;riLVPJ)׬O_q y&׶QWse% θql@̘EZM48Gl#[9W3QU%JnZkUhS.P ` I_ܹ+~8? hRE\v w%/Dn2(P,~zqv;oT D=~~_ܾ׻zf0pw968h~W`/~|?֭[7^KחPιvm۪i89w|Оy0v6j_~RջqKkqiO/ӏW#r!W3h~s=mphҴ׻(p^mq!ǯ_{SJ:l?㈅anfX{fˋ6=7߻5}00܁w(|۶m,z77?)m޴MݸbOO?_ohlf?}~.J)'%l6|јZ@@fy*s:O6Ï>Yūz詶s'ġ眔'?O&hfbX.H|>??z;O?>Nv+_ή]ض1ƹo̬ǜyJiԺnDDZTRu\cTk3:Qs4̩#t%uMH Z/{/z뵗'~7MwI^f3_6K6x6<.׻iʰnuV .6$y_|qxٶmj8=6<~zϿKGZgwϐKCfOcws`:ƫd2id2q34M&|pNt-pUeVaXzZ_f3nd:c9+z/n߾ M`!`fB+5/^,W<|7=vaNNpˏV+]FNgf1#O%*i>Nb^mctw}+_i|ٳg}n#S\c'wqO)Rd~ppďdw&Qw=::j圇vsqsRRj9$0 Cq\k 0sJ}ڎFBEm|s93 'JUb+82nvylfΜ#DBcBDpNƲ&"0$"CPZ+)hU$2!cbIaf|2o9[ͻfҊ̚`&VU2DM;(t hJ[)%FR<t3^;XG{|{'̼mnv4ՒY!98 J$GUؗR$P&Mf񣳔R'.&|wTi{;""UPj)gvM.ɣE l5,/[$q* N#"șć2&8,Y R6bQ|cjLF%XNFUmdN;1ᥝƮhDf`QԶ928Y)es. :f. r1k;e943# [(`@m,d RT/E Q=R̍w"JF0AK=>y섈hCil]wTSnqSz ?g?{_Wo|/>||6xq|TQ2քT/c'DKS SJ{97!%U1$c*8hj7^=S͸!"$U(;GYvaWD2<0QyIHQbTkhZh̡"aCjȸ uI_8yrlibfTk:窹Zi-"ׄ̾R 09v w[}ó7no_/r"߀/|u ^=;]#MIXl{- tzz[nܼz8E\!<0cы ꜓T`,:t&O>ef}/..}.޽/^~ :. &9_?{ʿ~G7o,o|ᖷJ".V^joã0 {Oh^6oR[W{dun$=kE귌fszt><ܹoP'Bh=`1FLbfutZ,|w?>80n_j <ɭG[7ok>y_~/~?WW߹qp lfn y9rZ.?|ϧ$79V(b- ;,Qֺ}xrNA}@R+3cOR)nn/Aҵ/}ok}yf;ټ8Ï?9MB). 9WyJI2ٛ)sW_Wmpp:\,aZIHU/V˜sKu K)HFlsvȌP֜s1Ĉ8"E,"f$".4^YUDB@B%c4$ދQ> ACL$JMK 9F\NThNM8F3˚ۮUMZQ-cY4 sw;& > jqooNq}:;==ݿ~ R%9G(ӌ`}}G4yǏ͗PN_]ju+̹l8?chc >{l>}lئfY@8UuޜCO .-2!jb6dx"|l TT WUE9;UIl*:7"coFKrT5' Uj 28.1޹hL @E3dADm9R%UU!&!$@ka*oMsSqoEӥ1j96 bWH kq("9!V`.MR(Dĉ& :2c.ZGMR7iz n 䃳|v[os} BZ(Z5M,vـ<9yANfLD'31IQ2Z=jZ+#"5r۴ڔtq P?mwYyZkJ!xtb{Woz]!ZłTAdbRe Xo?AN-~˓鄥FHb+;Lq:+5t7zNvR:~iwիv'r}0~GqCU5Bh`Zoj%b4V"R"CbJi3W VJ)擉*^6>аj &  p *MȦ!M)\kdR_8DUUB;/} \RBhLimCHdMYSJai}CjuRJȶ㧥Iǵ~96~~t?xf͛7s2иgѵ2N126a$~d999倫RCB.7MS1Bp s%j)!4VD̊1Y0.Tr%"ubE,Zx9;N0DUcC$"xs6as8Mj>8,"3נX& fVGdnJHʈ rU}'"L%ĐҀKZZck$s){F2Tc`od0֖b"u7f&f&c[hfjRꤐ .WQ3C.kkT"DSU< պ%4f+}}W}㯾o}w_{w?ɓ'v0 .j+BH EZbf؈0T"6DUP L%WTFB2X|ݻܹ:n>fL{K)s%438hr9B1"xK9ʄ̆&FRJVs9RZP%DEʞEՉRgZsB Ct"Wo]mO/m=gv)\{f #@MZE\0 FFD<Qʎ/җW'_x=|s8zޝ_yK_ս\V9 v`p||˛7oy࠻9%wiʴs朋!Hʍ3cG"tE8ۺYC' LU^u_XOǣ/...}ѵ#ff;,3ǫݭ[no2s[`}?/yjwömˋ\޸75s|sNDݬz0?!'@0Q_jIUf];oowƆ ,&arkyY?^tژ@^Ϩ K)l}vf+]J) ðfP|ޝ,kC;\\r |࠻t].&]q94v v8O% Ɏ^|s/b~6M m-sD[[2#O/~z׾pmjܧ bV 6qx$] &T3 X Kϻo_1h}?L)k[oݻ|'֚qV1e{w/eooo+o'FDņgz2RTb)kN}0 I#fRQU}"z& @rp ,""R+/`r )xv(5c$if)ZzL&yе-F̻q'FRkP%+8tY,L5J)UvК*-QjY"_i65R !E3k\MR$I۝ H mL >Ϟ=uXvO&v]|N>~>z*u1 J)o6:a u.yۣhf;32 (@"@\ )"cni8zWJ!TU0'f[3sU 0u":`TeDj9*͌+i=Ih VUL$I-8w"RHDޜQj@=ze]$[?Qn1&aL!rA/"JDHD2suÜJ83SROF$+A 2z/`Y.JLΩffZdb.i\ChܢKr ᴟ??ko~__ǏwѣG/6U&Tkz̰dqBr؝^CX.vznv}V8ʅD-|nu!??Onv+_y޼w]Rc>2ZnWj/DwaWώ}nfc)R^}-ՈtRr|rl|*YIqe1S@a$CιfR}UYU xL'ܾoۿco&< c?.4 T\,XQZz;H1G2""gLz*"0 CtfG^}u#l6 b("%+Rk 5,B-&cNZ*vNYU8O? >F@D7Zk`ţch7gD o6ͧ栟ƌ./ ggʳln֮\}lv?g|-/!_(Z·Ka8l{:,aL&tM&s/zY7::xfgGr3nf\[<{l>w5]\\p0m?W\y'"{E3kWJq jFcfƇOsΫmJ-`{sU.Ѣq̄R""Ŋsd3d O&scݡ_V~Çgr6snֻ=ӴRQ鮣u>qX 2sj+$Th&ViFɬ53&dFDRJyZӘȱQth₪4R|f,`ڶ<"d9fV/:\1Ǩ ,j: /riVYnv4M3FDtJZY)aCD;03{wrzVضm$'"nڦkDb UT1gv9_RCroiAԛ1bY 8^DBx66MS/53Ql6GCfwzzIG\s?;wM㋓I>1^^wg!cl&̪snz&)*gTVB Ί(ZJ?Ji=TŜD#>U`MM,e1Hp, jRM ͤJd9j}O脒G ns\QDR"HFB}P)uR.Q=qΑt&*V#I!%I  R#iIE#a*lEChT@㐉BE2:\肯T0j< V*{nrʑr0g,!y/a,2Ja)vw۶~W~WoNNN~żcRQdգb2bdʥ\a&=`,h7޸qf_8I*TSb ɼWY 7A!m;#Sc^ц#-0wf&!K)88ʊj-QULD&h2=zhOIsI'oT1L^NsSO)R3q66H'+_y巾N8} v?Ǒ "|D/+Gsj[]QUA6M$Q)5_ugϞ$cbsM bOEȻe]UsΓS쉽)\SJ%tkC12qDmv6f?!R~7)љU)e3+Ű3q}q_CG᳋z՛WYLxrB0!hvYTqO>}īdž&)ێi<>v)n_f !D7M@Zi6^ ɤoc2N/iiZyn{k=ts||V}VkWUmwO|>')R0{ohFFr@I+Bjsu-DLD4eGXsleP@#21Ihߌ8]6{{DJ)ڇZ%%4: ><&F I(!OI0B1\XREdGSwc~336!4m74hY_..dh:"aPմN:hl|T)%ԝ!"_V']crLG&-9K_%70/~w?Y.!Y7iV=:{jUi@*J)[فsRJQgJ$*I3s*}DĎj+圅Y5FR4RdB^DDeҶ)oP" i(X)E8Hb%ZR@oMeɕR㎜xJa, Sn+ BI%%dtŜsS_K[#Uem Su;PR12f'J]ukqZ)*&Z|dBDZDJ^.4aN 0n8i'~~~oo6Oӟ=|\ K)3 ;ЇOUZU3Iơ\3D|zx1_>Vιz@"W*ܰCcIEWj-U=TAT$R\lRyOVιZ$Z>ZXG*RZDg`qd½"7K-\)EʥjJtb{Ux!gr_߶{G}tNZxqt޻߸rڶmW͛ʡqVckK`T$nĔ=BF088|K>|틋oիWA| H BQ+9gI*XJTi!0NT͘YA Ղ9Wоq:w$|4wḰ p1l؟/&-h\?TG燇7w/nwj4~M3^˗_WT.r菗fv:hJv,-ji}΍ p륔:WPQ@j)hfkLj9{/Xvñ>{C_@sO2:'u6:m;y2iB(k7-fij!Х-Y奻Vo4_{EYZF7D@19e,0"PTdadu Zو/PUxu%";x{zu+w㭷:zl6id8z,:^nDRZq`>}qDdפNUskV-om2z*F+MǦi4dQDѥbwVkUTas/"="25"B>́+L&RVU*48q{nB4ԉI)EpNT9W}ϡ1Nۮɤjv"ecο|%CFfdj9<>>ަH=8n{͗4tιy8vmJώ?9 kϾ><ceuoo|??q1_=y;'cp3 d(Le.ZCXBcL@yцYdeHsF_f̈Ri:XIE*iPy,3(JbϘ kD8F3-aHa ׆\!1(j@r5jDD"Rn )sn ;D ؒ"0 Å BF}lLkU fs0ͫV"eQ@@ASE1E5MVwE]tA2|>:4,r[,$:004`d0 #k\ W2Q0&6T!6X|㝀j+/ q0+*({C*ټsBwNoݺoZ4gߟѣgD9ջWHW_y齷_[ܸ6GtǑ\q&fq04ǭ ֺK.P>97Y*cA }(%3ۣκ̿~Ν;"*q3(KRuU*risthf6Y59xUDD bXF-jf2mKn۝!u:9i:'fֺSVu82b~pFCjkfi*!3-o[ѵ H&VZ8 "R J-gCab#jzqq[lZ1bS3x?jARYS8_jӮN^UN*V_ж.v E*ki" Y3,˒hoo'fy]iRnO7EP,Rk08lU)҆Zky帩^ٿy~~RT|/?{/WPÿֿ/?/?=98=bhYC@:̤D41!6؂~@:]C{MH)G%UQe&d٠J=Zc4&*+~Cl 1G+8Dnjsq4x5ID0.b:33-!p xPUR*D.!L,ڶ2 "~M`#"ODaX$sb6TEZidžUƔ:&lYX3b2eoಁhjNn:0p3¦zo]~ó|W~wo}ֿs8x+?|r\8j')NkRSa5-Ճ!5;~4\w|W?}~&!8p i#SU׹d؍d{4AS6'QŬ0NJ.BL@-8_T)OkZ}DDщ29l1a&L#о1R F1xW"""f"L<mꫯZ?\mjf6ݼu{_<:: ;x-,w>X 9L)K6 9!0n@]PGˊg=x'񵮽Mh4@L&8dqLu6X۶U*VsmC )MwDa꽁QެiR13ԦimQѹжk͎Tnwlѣ~O>_ro޼i~,M% 7d0"%q82BÈx"q?{ytU3:jVn`LJ)sNDje"2}&'0ZkZ lJH̎2<߉T.#Q533x] (Ojɤ{ϟ?Ӷm%8BbsS+;ZCv`֎؅H\srr3pwx_b7:<zڳt^l:`7m6Ev>M$gY7ަ+f;s>χ?hܻo~S^<_~}\~||l6K0Ѡ-RDrcf땏O}fqu4f0AM&YADDمHD3cH0:Wjffa4Pls.3B*^:r!" 9O% 5dE QDc%׽wK/ݺ??ܦ:7t8nrahޝܸsKa;׳f{+{. m1W* TKEUemXkD"0M͂sηar`ٳg<]r:<<~Pe()oEBP*Y]̠*`)ųọ6 UԔaHZJ1+0w7YcY[" AnyGDDׄE)u_'?g?Ic}yuԑL @`"~7.??n9BUKg?_JH}mX莖eyfPIe:"ⰷ1y3#Ul6S9'4c,Ć0n@B&"BhZDJ)kEDUVSF{lqnp#}.{֢g٬?089g?"jb!D$R6̬Z 8Mo b2L&|흛٬5!Vk====??7"2^Ղ1̜dRZkE(9U "rsKK)fnRJa]d,C978~ZMk2-}WR@}ι8mt6ruJD2}kD`"2#-mu+Po Mv3#,)3+$e3[U,!t1*jZ ,BsUUQD"A}+"@  04DD.yYD0ꄄHSJ(XkTkeʭ )1Pk#Y7":Јܽ3EƖ,V5"Rw)RZB(fnf! WWW˗/a(5 zfnr23+r5-xJiޮ{}܏M3[cE_}o bc,Al܌x٘Rz<{W) ~("0~igF "C0!,EZ^=.enn )r)%@B(]P!U#bA{Zywyy*#!Ƙ+|$"̹Ro?y7|3?}>li J)&wUAuUMRqӾJ9Zg#<-!*}DljR9rq6~vg"jE)}Ϛ[kW8ƘyBU1RJ˗//._|x}29~khBDt켵V^]]}y޽q|B:ͧvuuuWn5zFCgϞTJsR!Rvs1x<juobQO>ii]\˜ݝeMDta{+"J>MS!Ƹ"zyXf&ۭt<oynK)0:|{'Z^{lX <?Ϳ7v9w.h0,廈,P?v"K`QEu袪f-ȭ5Bᖙ]ϦiЈȔ̬$"_E剈Ӫi&^^9QLU3go*:!b;Aof& B:HZ ,"z>L#3@Iݽ2֐"pj ̼.MU 2"rK))UPl=`sc&D䊈3Zv—iPyߩj?&OTb6يv"ұBSՂn4).fϮ?BÇj}=~~=MS1@by@p-笣HD(""j""rKA *Ba糙$3nPΫYRQW޽{/>}jӫWsv]jr]y]iB""֚ "8#bPYi3l뺷onۗtr ㈈iPyFTG]nsBD^:,^5v`Ghf6vVUU&xw{<;"b3/ZX[kucou`8rDul3sVVݝ(l6Q+EUuJ)3TJ[&~ޅJ8R^Rfonnu>O1Fowi}:jeYRJurzιΣܝ]x%Rlf&^#"B""9g82N' 1\hryGyӧj%^zUsΫa1"Ox=̈P{DdUp8Zvqqfw{G}xÜ~{-+'ſ{wCxRJ΁,RJlC ("t:b2d$t4GD[jPCnf!b "ڋ/~n-*3a !` 9w8vg58nkq:6N]0 UOFSͭ7n'4i#qGDWUunߎ9g@/yO{jf^YG;o㈹{ճg/DD#^\\'уfR_\HLt}}1x1K)\6iUjDvsr:>{~yyQwz@y?ֶ8s3rDĻpZR 6N) 9g)%ԉhR&XR~',}{=~g]ݏ_]rGyw]0pwh ` Ht3caGjP)y#Y@)LiHD$kSh13uw UfU)x2ͲeK+YAW>ľ4DDh hT5 :ϳfQ&Bj#U}[1j]N)Yi.dfT"r!D>g KƪY".Xk-r 9TB*#RJKPHqYU5+ GY-ݝu]HRx<כj YlR:SrHw]g]ulo~~ΘyਪBcpiF'nooBF0óW^|_|Y.K)*!0V7&"劈эYιB\Ni8^^^^Ϟ=J^__aaofz~~=ٟ~G뫫wm$T5Xg쌶{mÓ.."D 7֣_z+D4冈IΈuJqU`f."22AVyG?O_~Qk?xz̧ iES"J DTJ133!bD̖NsjDv'U)%̮3§ifsuɣYWǻK)upw:4MZkTk~p`fD~1UEv`>Nd_~W>͏NnVD1B#3{D\݅; ""Tu9 !"DD8+"ZflVY:33>nD"2՜R$}A bCGD"rjal2Tke͈h&@^sΪMD:_K GDco[oc{Wj/?{www%2t]躎(a1!訪<0$cJIgrsw^u"ژN-\Ed)D<1`f B!mΈȁTyF hfhhf4%NZk:Ve2!"TRy֩Ք.tZklZDU3"v)ju3633֚R2[k ll%F"+ " dZ("y^֐2#" 3jd+"2X* 9EgDT؈QZ:."]k0.t{Jlf1_朧vtCBFĻ֞|qUEJ)*:ө?;#"ŋ˗/_xɎ#)x33:*0wg·a0n."}7LN %"1Z͈hD4bf}"3Έ`\RJ}23+O5|߼yHDJub "TLU-3hf7!"9VCD0WUu3"".JDiScGDl?mZk/ᶜB "eK)q\?X</BUmG_LWU^L   Kkɼl("xl1DK ̎ "T5zCDxJi"Rc"lGU ADAHDB,D4 a b̬^ ZA4ږR]fystw8sO5pI)'xYJ9a5l6ӧqK)0 p;8{BH(s)173}9Z)n_[ʫ4Mq3a|aE#7L)Ͱn_a?33@;8E]g f&lטjt#jYUW0 M~yw53: ! y0|n~K)9<$`ftyyy_nrDU=W7/b]@)L0) 2:fuѯ<[~/|'~Ƿo}7~?Ayn'|>]__FD "zZc\V 3t>#jyf !C3@7DpFDjzD ܽNN"":P ͉HARbK)1 ĨȘcXڰJDCer_1H߰뺃,0,-="3U6 L(3l1ZK.{Zk`f*hu H,vrw=pݝ1*1[l̕(@^"P)Gį {x1fFl ҤEFD̯wݓ˗/~OD3Z15EXQ v:U"ܝ kRJHy0bzUJ ={l 儈XyqNy?QT݇.d\ yWzްO>qNImGCgpx"Aͬw =X=#0wcιZB:u*imZR| yBS"{2 SoKn*eaf뺐z">oVB5wR ̠ff&s9Se̘%"F:%ʵc[*Z.EvBxDd.sQoeOQ8ܝ1W!|@ܝk4^1HDPD01jZN|78Mg*4Fe]pwb@yb[CUCdf7D<{D !i""]ZDD$EflfUU$wG[BDuI)-zPUfVSJ4?OX=~'OVܽoG2Z_zrOv !pwf_~Ğ5S"i: Ni:Rw0 !b|QzM]ZYU4M@"noo)ut}COf fެcg7O>JCD(t1}*I[ۻO?4|~~~`{vvb9Sy8|DdVDfI9^__'i`n73R00 (4 p-0 ZMVzrwMU1t붛+7ROSǹ213"Vfz}}mfF?_g?uvigf-4k!`ZKyK;4!hjP$"EU'N4P络4b͂uZi![XNnӛbjˆSk-\]BLɋpGf#2s爸l5 !y:QFĈFjQ2l؅H8&%[hZS堟ݝ5К9bSUnRjܽ2Yɉ={MU(T,/Z b캎dǑ\,&23 !ywQ "fJe" hk}oo7~7WR*HDsM`f)N5LD0 {ua?R-1N1.FEZ2sg`f$XFDv 37J}dUl6FgóݽYU__~p.oxsscf=.%2juu]OSU#"j_p?~'wr}> kV<xu~9M~Tk-p1\yH3*2ZuwF4 iu]ܧyg̺[t~?D%`Y܄Llu([Tpfd Dly:73K뵪ITf޻wՈwgDTfۻj:cOm&jin^a G_ n!=~Ggfxww[43@T >n:;cq UgkbG/0xEX4.2 <9:Հ4/UyZ%wj5ϵ󑄙[D㔐p,J~uת`J*Pk-' O\1 BTSXcNFTcf`OEoFoԯ?_n?ß}>DDpmO<ڼ~/^O~Oa_}CuݶU3`J pAt7\r5cmjL;@M$c2 5/[D +"lf"uZ{!:vfV)sQDllH HwJ")bD,qV8ݽ(E3s ! 0D,3#-蹵U%:&+XU2nd-&)LQ,k |ԄOS5W"j\!̎Da0!:s+O&TfHBE"c]C*_ IDAT]g?نN'~ݎt83#>}:1P[G3t;w,ў_̐Okf<2"2&ȜݕDw}? F]`@Dywlr [Aڜmކ#Vϯ_9nyԢ1s^U@e̼N)%LЙ ~ V{'ɈrH٪Lz\,Y*g/}}]A@IZEg53 ZsGukjP݉ͭ5_D)f "EwWt5 jnV;0/3% Yr]ٺR FB@TBW D$"N V?PkA)(,i`!JD0 Y1 D|ϟ/I !0&ZȰD42s 0BsqEZPn)He.#D#!("-eCpC"6Qf1G3 /]Yz:;;k>3}R Ib]]__U?lWlsOy?<۾ZI[)vu~>R~/3#b/kw?u6twrwױhv."fB9gJ.*co։J5`H)yT"1RAU Fُc\j)%1mEoz3#.6w'󔒈2s;HYDڱԎlEdخE`M+fn8W]gzF*Dwk h9<)uDS!4f8,aRծZkqO)²bYUOˆ B&wo UU@9@Q`>6@)XXBBЅ,Fl9g : cݙ˔ lݯ^Z)E}kïRirwǪ1PDB̘LU!PseVLBWcݽw]ri4tĖ eZq.$e3[Eًjp33Gn" [Dm)幈(&3""ny?߮kZ.-{f.qׄJe)x)Gwik=ͭ rWcK4 3OףUwGϐsX<}!Z+$d>bI0ѵf`}|뭷[}wQl}zg}:^?ګozH[;~??{g?l/Ə7z}!@QZF3 T#VB+;s.cr&2(7p/_ Ȣ9C4 ̢KEP4EWU2yh,jӗb;j%vJ =yf}9!zFD&ec@Eĉ6&;c!:wUqB 4M#"L!RԌLKUoF(#6ƈ^L}m=z)B9(OFZ:0 t)V-!8{JKyUk3c1B~SL܍<@M g!y;)bFlr Zՙcb!BԺD so0!>>ZGV`v'zWa']l]ݝ.%jBPŚ)J*BLL!(:jrG(=8f},eNmCfK(p{sxePʼG'tFckNQMND<(D !F+] ZdA!荷護zib-OԐMQB bRNsK)1$Ƙ1B@D@!qkQbzUu2.Tٵpr2@ؚ6Ub4qN6\fC SX#IafBmZ6{ӐC^|WnE 'n]׉O轈,qm[ڈHUIJb9A0( lTUN9gxaX#(H<+3MPTuRʬchu$".iqØh'"qRJgV*Bffr @ēRB'53_<ϗr+`DdrwcJISIru;̓u]םc9Χc&"vswbEDqaf&k*@nf UG5SJO\׈HfrfG33^!2HjDDl>cqC1y7="VDt8 IK)As]w/~ݾ`o>h2PW]IOOpct:-#wgo,axpa)s eӯ`fUdvvR`#"wU~HiokADMDike0N'f&y,P!("ݹ 0*Z#" TkbT huA !B)Ej>x[˹wwc2*ET5CyXJ }oKBDHky5f&4wosQUIUA[9q2*1j3UD@HĽ)⊈Tk VHUB(e9p^kwfn&_ôj ̸!v~pD~^kTJُGU}|eo{WWWW_׾x7;زP}"?'|O۟ik/.z !,р(yz`(~D f-1F7E#ﺮ-bݫj3rwuq(j̠l9MXK))8ʌHUuA36>錈ؕRT$p[q[P Lk"BffXr("@L"b  #%=0s3Wո18W@ĸ0YBr=" "*<}_ڜso_={ ݋p{ ͗V3\KEC$"R9`0;rB scL{!dDfN&T c<)޽{/vM4*xۭkm<ϑ"t)%΁ε~2ԧ 6/)%.1UD~~ؕRL[u "./\HˉFD"bꃪ/XR"†`DL[kvDD0Ӭ%JU5QVADb&"s"Z {w/^U#3ݖButw$GD4@QUZcZъ]Fě!%2PÒS\qY7 t**v:qayn֊q Z1z}j֚Ljf; Dom‹/Z볳n{7|ͯ?y#+ǟ~޳g>8#bÐ"\K્-lDYVQD$\Yـ;JW6f,@&"]aMUO1RWlij2wOک*BDbtCЦx<ՔR0 n=kWJgKDD0BFR$tYU @UR@{33\tt1IAT3SRwQl)2(uȆn(E\ "jT5-j3"NQ1a֥%t:i:,Ʈzs!b@03൪ZRDĠ!bjk׾wWWW!1Ff)ayp_ۼ_x[73_ZJi_ʛa53u9O_9juqؕRAUsj5if.pYqkӳ3ڃ "O'D\Zks8"Jqm U"}E0I1VT)Odi(MK=B~wq|o~=$~ 35U͈Df|.LD( Z֣a ./A^33S`DD"w';KS iXa3#ZeiI)ث2'ՈJADVCTUDJ)!YyB#LTՏ3#J`D5ZASS5,]!z -,FԊBJF&" 15" k TLP̻.Y1V"O2E ݫo(k6 nCyj!֦ɚ]tz)TU4^w9(ƈ}i{޲#s"Bh",?3wE,"Runs#SQ!8Y9KsWU )("yUkJ˨)-PZ*3<{EdYLQƔRQ;M)PlsH *Bssw30UZje*U NqBNDŴT!~#c ܻͪ9 N!)j$8 bSu(循u>ԙ86 !~!br/z":nkxL77d*!7 !'躎$gv|"9p䳰=F 4㫓CJio)gݕx?~ǛǏ?~={kt|W?_<_}qMuoC!rي+UBs`'G4}QûcmS5t'RRl *fM vl3 9NT6Sʱ13ҾkB泣tC,03(-[il#ͽn=,!GDZ#'-T(0twIBEČh`B5p"DD=kVݹjVFk60-S1@#!bB$5՘J"H)rjMUwfVKɨfO|7xcODּ֚"-`}%^# ".,0q *u55p bb`Rts+McfcB13$ta] !di.!xOzJLY0DD'Rj6."MRJ57&wjBD z  7hNhNvZ725"B`jʢNU #.IM"qWx)%}{>,UwVE"qY6]fSDɒ; )6"" X Fӧn;b矶i>_v䜏c8Ћ!bJC tcN;`1C8Y`46mJDx,-2]3Gr_Hb !G !ZsVv5u锨gYff6@&"L`fZ9CinGx2ڜs&">6]\JicmK 'W#ڪ4My[s)?QҸ^=j 8 )c^M^K)UU܍"(!zͺ6cd"E#30YQ*Dɓ'Vk^zϿx{ \^^>zړ'O?7w!'eO??c>?ӧTU5|o}<ϐw!cShNb= xUA5YkmH'F'= t]Sk-6Bj hl Z TcUs>BUDEqhfMfb,YFis&F]@LE*L(S qDJxVU7 88Ңz12AYfA I9z0Aue 4hjk iZU}G3л T-O&treE]9@M0 צD.#۩# (1D晐 ;z[8tss3~s~7×p"bқ#ZEHܴ+g1&D)w]# !`]^Dy}vTRR- C8Pe,h1Ƅh:6CfuIkiv#Ы}23u9#b՝͘ӗ/^Vͯɗ9%B,0qbd#sҢZg5`f&fc!EsLSJ9e62*B") A&AK8Y9 2W-Ն#!YRilK ~gf B-Udfbs`wmZ1x+bE4'#])z!%@s^'L`@-5Px1H;8F57"Pvw 5ƮQ\Uٸ;~iMn"T>z-49Y? !m6N[U 8!|'"yooC6[52z8z~ 4wcKffqX\ ܑ眙9Jĺ_BQIDZИ6DE(F-DG!XBnI!V("K4!&ƨyL-bF sbq.h33Ih)3TDCf cD$EvB*>}of֗I7wŮUik&9s!hCʵL[Ff 稭܎}l!awZ{Y&8ðzQ[כZ4U )NKDv82yQPuGwww]pUg3?!G+lXz5fu3OkIkĔv7w:^|KD|g?{Ov7?~|/3~k|O}>7rss3dgXy^NSYc&;dPHhHw"mnRwQBhPTdD%jkt΀+"6/VZiDINb\mD fVbN);7PB`׳jA!0pEDB -TJswTkHtb"7D8N9ARODF¯IӐ=KؠQK E`9ħi^Ա$0OsXTK5lEV )2[&EZb t`fhf8f~յ6ZDPx~U? SC1NOPa4jm!p *,'D<;;hcD Vr@*W RJ=$W/o>;;;wVASfm)8" j#oM)0s"Rڂ[vy<#d q1`3lDW>ZyZx)_~)YUf **MݛZ@j̠ZÀ hnf!"".V@&KPIL"G"2fbG}'ve0|Zc#"rdj#wf.B䃻oCk*Cff`` #Mm߆*uy* 4@8,,WhxI ܄3A+hR:oؚ̬21pTՎ$lLsu"[m9gb=Y B"m礪Zsq3 ("6duw_ײ,f96ָ#b"YJJ_T:T )=U@b$Tns{fmeb޻)zV" 1"`7ĩ ̫\L"9ظu]7 S ƔA(mr0o_Rz~وh $$Z:2kw'!DDD1&nD;楏{r}|쎞aͷhk{=[7jDt "v6l+h UXnyHN'imE09fs]ں'YVr~gn~"nooG+Bͺ{EfBbH^B%@l9˲!44C,7dI`М"PA{c_08٧V!"-"*ޛ23eTՌ'UUQ"ڪgܣffe5 H#bHRHnPbDۓOpؙ9"@(5%"rtDBU3jJ#'tDs09<ܗY :a4A3 -R8'"b3v\˲ h96<8ȳo}{/^p޿CJi=k1 }#lfuK)a[I)1 9gQ!nmZ0q1Q.("޵ѴFu%{Fw 5I4cjDt߃[Rq8JuV" y༮+* {a&vw70ȅ8!ZMs("\kE3D4Ɣa13@DZyyd3{kԜ3XcgVjMU23{O9b{A)%qpkRJLD9Zk߶ Zk/O?=իWtx1MO?=޽932~7ͯ[O?}JQyƽjrbU%)VHVlFuif6$pl3"3w҈`D"N)%Xht|ض-KjYFDM3읱nraUNo{sV{DDlAsY*-">fXߣS@DA~|;-X=#bNcfk J(Y!19I0\Uqzv D|9qDʒsiRZD|خF9ָW:a警K۶-޻@>Ng}~~.wff.fXܽWˠа=}U4fN9@xcfv>@}駷O/KDZs D$Vֈp"sVՠs23;%afUM\kRA0pRKml "rƽcځB""uUݖ/"">_|qh-\&"m]Wff*")rʺ_D[qQ&Th7);">%3Z;,*ㄈߥISι+ާ!ޭ1"LYDP#"Y^<q e޻RKu1ݙ&D< CD$Mf!Dj)WQy{ UYD *&aJ)%q33 "(}ߘȜs7 HF"p@bdf|Ru!*DŽ〈8"b֦VqerΛ\yrLx aȅaq9χvg'`EI^ yy`]׌IDzJI=s2p#PNF)M("v;3`fR*. S(>MSZ[v۲,{f6moVUu˕̧iRR"NzﲳD#q Dd2󞻑ݨem[mm۶E9,13}[){Uf=93. Zt:!|df1KJ"׶mզڙysJ)>˲x0,%|QU{;ٓ 0α·)}^?kUr|(zwwws/|'o^z5^zjp H`M/|ۯ[X˲LBDJ)PD~$H=V3iT./[z wv`fL;9#~A,sky!╙+n }DWk"b#"`(7O!9I f"b2sp3%Ј/RhJ)wf"j1p*:3s눈 !E= U;Ȯ"br"b3yBf更٘4M-LOvpN]Dz&_~("(6sC;Ɓ[k0"B)" r}}9`^ڶ;1"T"Rjfhi=⮔圡ir&ORʇaѻ!?;\|Y`z3vAYD+DDdL^aȸ7BDE)̌mˈv""2 "13 BrwRvw^J$W˗/>ic|)%uqZcD`.RJ%0 oeW9/I1;1"lJN[(ZfM.afma 5Z=;"DFaon"ټpY'd R3aK wE@l̜(a23GF=̌һ7ndlf)3@lk0&HCq/ sΈ !i6Nπ5Ї2j\PeK)p<;TYQ, Xk=M9_^g?CϽw7f53fI8G0k{\JFt,C 0#3#a؝!wt{Mx{D)[5@tiTյ5D4+JWն1s:"lF)0T5m 9޻xpvU%0PI9'b}*̜ g}r= 庸;O9"@xk]CCL04O\F"2fn9 ] huPʤi`޶ 88b DQ50 OOOLDB^kuLeH[VaJdae@"CmlZx:fYY{n~m|z<~~z|S^oz'?uDN۷O?lW}9|'o>իW_|mJQ D_}qx<_Ko懟>EmЕYEf_{u I%l 4lCGnFc:ywd)Q!ɁTFj6rJm X GP8 MW&NxWh3r?XQҁt[R*f@춯!OֲjBDɪ:d6ތY;bkDQ!fdwk7 mB`s&J8,g}@Ԑ1BCW5 Oa8ݦT"Rrp8("+o: kLc#ha) [S)ؐ M0|xh{Y 3WB+"Ҥ0N1%9ZuMO/_Ni۶@KuE fdN1 b3ldnfR`ϪpIff aK HAL.a!'`8RU<[@L]*5jmܝoN_\>?pxd@DM3rb6o"yKh8 0]S P9;~C?u!"N acfY{Hֻs@UeflNKD gsDk`fleD=*D`!G ")G8 R6 9D 'beHbEdL NbBE EȉSK2U RJqODt ~F~i(SuRڦgDL um^z$]f6p8 h)%?H(jm~i|ջvҙb( P8"nIȚi"* "@ !*z7@Zc$:y"|X4b8,O[DuE&cb͚=:I)uc38̃Ʊ!bGܐ)Ti$ & 9"ڲfH\@*9c02{mBi,bvw7xijÇEUo㺮?Sчʽ2#":fAɍӎ'^, Kjf 3va9%Iն BT}G*!iajTbR# !CnK$CQ$zgS[(3N= "꽥@u` *wO"ND &ñD3OCvBFo($PFȹ9+S9aHT|e2YURۜjA9uTp0$Nnfcλ?"BB2k'. Kx2L28؞犅 O,XipICNaBȫsq'&!:#/`/7S`w4CB"0'.L wJ #պ@)LC.fFႂM " 5"w@@#(ԃ݃=2zh`B1'"rWDD 3%PtD"= eAɁvvg8#Iv fiQG&(-@]3uLk$UmyiZ13 Vz0%0|q,63˚79!ٶ 58{_<TnnnVz33-ٺ,Om޼yEYcfXjAa8OOOc)ò\Qmc)tWmYh[y'jfQL YV+#R2Bӊ~oݏwwD,{w8Hen:wwı>tXQ»վAf zĉ#1vUݰ\.nRVM+.{<]4?/O-"dDQ,vy;O'ǷO?x 괯ýߏ˃n֖m@m[jN8t^{w77*"[Bʠ՜aݮy%,8>=.iśm-!O"9-;]k;cD ejNxCu¾/Cwxwz~|<=x/^<>$" no~qOOOu}w]}o4g`8Ǜg}~绗@/>9TJYit_ʜ?<<߾۷O޿?oꑧ{vp/^?<<8(Vbi As""NamV":'涯H $6`X$Yaa@Iܽ":S!; 5ȒHW"©\יu`B$mCJXYT D<"L:V1B8u+.` l5I nh˕[m;Rʥ-0L]W)n\_81vDRffRiZk0gw\7EDILD=KBDZJ˲QJ)eL)Ebu]bkm."rN)5s:X8v<֤Ypw؍*\>q14驙眭ح{(YDD`*".K)t:n۶lpD[sw.y&zֺF}$z\>|u-z>W3󺮶Z[>Ss̳(2\"^ID1NS7?#$.,"ԬƼ,q>N߾?6f~|6x{W_rax>K;???cl<+R_N_'_<<ݼOs񢎈a4 %9?懷ox~zzzw>|De=\{{."Zۛ,DnN;ܺtTdTPՒ hA5Km6!"'w@*"&!Cg'"2K6 Î`U lfL@D "[e8FDAD@0ܟ{#tϭӰu}Cbx_ǧs QqDd!";: "W"RF ֚HfYhDv23!xa NI@?FHqWjsSCDSM.-(wwF LHD#3eMBdf 3xk;fw_]ys$ iZIf" QH^^֜2刈R_DwEʘ>Ǻw7+&Tsm^[n6g?owO.իM>$" @y~x4Gpximky>:zoSxX 77GcÈڟ#jD엚Rm+1%˶!bITkr·|/"g0n yʤFfIU[2/ADP۶}8O"l]۶q9 y:{[Uum6#E#2Z#ܭ(a,E)k.OkW}݋q.ǧZkM77Eⱴ6+T^Qjs` Ft)Fm] jV]uՔҵm~xx_Jamo?Z?M`hޢON՛8Lo>{w,Or.QE/??}Rnw9|ɧ~zoK)7708p9o8^pHGy=<}{,RJ˗~{G"0أ38vfwQӘ|M"#@ hxFιnK.\4 jl.\1*Dcx;^JfA2ԱЦoI`$AYR9hPab$rNDRvLPUDtRJFSEu )J⺄Y_!ȭ: CET "[ >H%iZ`LmBd sg 4Ƙa ]P}"-82tXRJxH8?_|80뺖ӭ&ajf AEZa bsGmw$|x0 7Sk-EZ姫*'><<[f8~SDúKDJT$aK ѡgѮCFgiVU R͌ t8GXOާqӄzr\.zny~^~znۖYy]W??R sk-1bQCcf؝)%g̼)U!l؂1.Э7"^JRqwRk""i!"D|,e ޤ ,jkJ[JI{gfpH%][""S`fG3cl"bA=Nšg_4M-0 Vm/XO^IwmX4MҹcI)%9".}I)2m#sADZݑ v!Z-qBD23eC RF~oꫛOwJffw$/\lfmu]{; ZK#{BkLw߿>R>ծ8%beYimu`6"nKkmq3RJ@[|YkQ.<ު]. e_.ݩ03!M}זRZZ; ÇzS9^f֗PZu}3<@XgfEU ҏ?HOOOYDhwQk]."q)RZ_X P" D<*\맟~U,4@ږsRj{1z!0 ̼֮/_cJ7}ef7Gfnoo7|sp^C֪HZnk;SYUx͛WtRnXU_D,"qYuj"AyVkEa,˼FDxk݉df#G ;6f6Jg 3KFDǾFDِ$"z7f=r"bf;EC7"RZUUQ!\K'PfMۆN<&)mfA̵)3NfD"dH">Zvdf6v^Uq|yiofRB6*ژy7QVU55i9 )%0AwGA"ܝʤQ1"="dL̜@?28goѽ-7ВQ8zJj]U3 jco"b5RE욄iVmc"&>23ȶ@23/|陈?,Y;޷=3*ki]Wŋ<fwG*"2MSYlPI(Tա#"8D/}[T&3s"N'']a 0t mk {}ssVl-"EDdHVJSVUkPkCaZt3˲8t0XmmPN)XR:pP0"8뵶'ȶm0 yڶr^Ed'p$qkhvyzzJ!3n 曹b#^mra)er\|<}PkZ sBe1R.9k[} %44Xڶy?fn0ﯴQdgDDY4M۶'33m܊HjDD "iDR"qm۸ cDn"Ͻև޶־;"U55a|Y<ϔ^zu9N{CaFĘ ,,"F=T)N<*6"RUwbJIcSbf[J)b',tw @Dj8f&}cf!VU&8f 2:$RRuї/_T/Ka]-_H3?E`DeXź@ f`^F#O Bh' /4|GɟfBD9`53CDew_taRCUM?ֺ('){K]{Sqǂ␶m R xں SJVLD⤪kws~FDv} aT~|w}͛7/O_~=㺮U3#"Du˺v&*H7̩ve?7k/̸=?ptSՃ C]p @6 :Gd27o`HHDV`wXPyDp]U nr'@<}#" Nh(   -#9vq,DS@8aB J!$(\335Ė6UY=R܊u _z9/>WFZg/~v^e SjɑMW+ZCltOD sV5E-q^$4)OOݵȶTD׏`)e$m<FCmr=}̄K)tO_)36;Ϗ׺ڲ@s[\SJݳr0A!"1gy[FT@Y«^Tc78O\i|kU}>_糄]k6wT5|wŋ777LAӏHyGg$\UW?'߿_yzc7_./y}Ulwsn>⋷?T{1+2wX;""cfMT Ѭ$ɝ[HY=m')'uP3#*MI5 ݝ~~-ĺskmctIUaK\J#XkKCˆWI{:paԚ+9WR  Dd]qZk -ygZЀ(dqK!u]yTETD,"Ie,Ánx4Hݼ??<*|s[Ikafep{y]`< 8bܻ203tv{3l5G_34M[)c&[׵>#ӳNb̰xTu]=Q\.}D3LbfaZJ{_/SRʜoyVTռgq "bm#Pw5`ūCJɛR-Q.֜KDꮨ޻!"T|֚{DDeq+7`LRUUU5ҡL)fz\><<;tL)a(bVt^-> 0 U3{xq۶|8Q9g E="ehD$nqlTupwڦm6B"* 梪;2""#wpeIRJZJAD{X*f )%qzU9kZ ,ie^EK)Z]OUurr~Z׺TRDl{D5OPmF8}Zyy<9tB#"8TDr ~{y>_y][]s+FV|~~TհMӴZ8߽N3]NF. PSJ$4pDt]9Pd03T@Dg5k)0O)P#"k!"c"!bN="pwbfSESJs>?;"r)""+3}ADDR?a J)a~||M#b@4'eǹY" <cOEkͫDK)Q┒ ɰl"ǡAN~8z]Z[Z%is6뺢B)e=uM)Yu8ݺ{ƧZ+Zvwuu]wW0 OR m6O)\޿qeT租msFfn۶ QݏqyL)!we :KAiPڶmF3+֘yr3Q?ȭ: e}tK,K,")3_ZۂSJC_@ޯՔaRz5330 s|ssJ)%xm.RN?믿o޼Y.mk]1.K;$g"nZ<O/""a230O<#扈^|pϏح=??5:<#nfl<۷ LDWQՈpmĕ@%X,`Rʥ+3Dڛ(DHGl<3 ֚Z9U T< a,x|4`WDd;}o̼׫Y %rw SJ+t"rJDĀ0 :SJsJ"BRd681Q578䜇a;oqop8=3}"Edl a3~~~ZoooGUK=;OPq0w\!;33{31n[(3~/LD{{r23J!"ޟ|Çaη_|IrDl̬-MU߀lfRNf֣F;2*GN &wZkr9__=HΜT|}sm[<"m}SD,}fH:"6YTz:c-"eΪc.uw>v7Tfݾ"23$GDr)"7"nĥpܨ0 EXfAQk[sE 3C!H]7ؽDdEDdR]mwA& iY?RJo 3FtwwVj\.nzZJ_v#*$(aH> @ֺV="ZkQDRQնUw+*SٶMQD"P"bV3 ux|Ad'pL)aaDD[ "&ZH)mڗeKww,r8Uneo"b̠7P3K`}\k"rmjfS"FZ+Rkx< df"n9gkݻ #b>hϭ+ׯo Q>NbfuZpS齻_(Qڶm/2r""@ue: `߳IwV30#fhcw-j4i)@#"i]2Ȟ(w g?*vnooo3#w^rVUJI)imeH>ן ckY]eٚneYj\͌\,ůJ)d+pZktwwŋesΧömߨjN'K<1ϭjqsssy|d(Q㴮~,n˲Ȫ>ͷ9g?yy3w?<ߜ3RVww _u5"dL8s6Wf1"ՔRZ˶m>,4r2P9KSՔ Ky8I}7Fzوh7)%{N͘{ "vM?e޿~CM/^u[ba ݽy?#3* #-d*u]O_?kHP8#E@{ [%r A w z"y"$(*R8"DUUIgDFJDpDHAPaQP1"p" CUcwo)2GUfw<- Ð2#@bgˉ4V lЄ#7!"U㐁Gk&asKWyc*-گCR-" U I):;x~qe(ctu]}5+tJa\`{[`ݶqJ9z׫9@H 1e({Kw|<1ҶmjP&8HHvY5< U-<.> i; ^R۶odrDwSOS4P&8?9J)LDkoXot^< C>]e6VD'eCmQqG]6WR]3;l̑kv8YaHp<^. f[ՇK) z~~~ˊ 2 BNܽ P8wޕCRJEO=v!@sL:2ٮNTUSx82B#"ZZJI]運˕- '뮭aPUJ0 3 1p DA7eaJbӐx1fn7w~< qΥ\#B7Oaw%<|S-s‰S)w_?Ş?=} 뺦yXU%ovYϥa5}ɋ7wHY)oNt.t:30sP՗w:qsqO/$ms>ܜ'|wW_PU){eY&o߾}[Y$o۶]Z, 1{`o0RsѤSDX¢n`fFBovY<"NIBUL A]#u¢o33`FfUi@ k2'qAvQWfN[ +sxJÏ9g)#S@mF .9:t;^Ny_WY䎝"0ALDrYa45"{2MS{q%Сwc h,1qT5GO\$@D@'$vmsV ؽvs_jt{{WX~_ۖ7_~! DQ9ppBN S,P @)-@4Hwh羭&ѓOwO`~ B#"c1Q} vѲ w(F՘w"nHbD{'aw@D{.(Hh?6e{" }ǻEZ 2e-kJ)e5Ue?|;gSJn{dpضmح&֦iZ}`CD}}oj<b YU 9AD8x@p|{zQJ|tFzrHMf]3wD=7ilHW "jf)VsΆ̴&R)"y@Dpw7vGN5HnaIKJiu&+"@Dknfko˅)_~=ޕD˲Pgw{[0 Wz||qDm֫{dfS}(xrnֻIJ)9q쵛Y~>??K#kuהұdwlmO۶]{/3_/UD^|q\6㛛4ɧmm[.sNCo竪n5̌ʢ횟MV_UdJ)U=;u&-:"(̜uD Fj6#b5Bl{FsDRᅫӓx<7GU-2#Ͼ;<8;S5ܸլN Pʥr'/ˏOn^Zf"yu]ߝ}q ;"NCٶrm(#" 3S)1";\63tWUɢ7b:{M煙]?K)<̼^~xzda$D,ǣ,mE."[ƱM km;pEztavrSJEGĽlbͼ'֚d,63GG3e3BS.k78wߞq*_~7?{{{_$fEf۶wDTL)"0LULC.- %3 O%g"(`6wHhO33Z"Bj,D1F`Kb$"8:0}n AD "bD2j;~TU."[Sl;;ckO饔ZMcYU@(ݝ36Qfı("cegp4p8pf>.cJw#""ɷ(8Nx"?a,7#>3":%Vт#t{,48l۶j뺪{Jɽ3s "_7DZkl9)<yضdfҟ mm\szp/Oöm˳Sݶmcǂ-˒z[H|;l"Rm ۶EIJDWD,:5Z]y䂈׸)*")?Lv8= yqFGX׫ٞ*4MQ-DRJ-R)@Dp6|)Li޶M{}zzmiK{F=0mE{󠪃"RzP,"zM8Ƕm&aI{=9[;N{<{d$|dfZ x{>+i逈ߞaAFAU͛7ܗeyarwwW^~*L/77oտWw~mRihwww۔RZ[?Wl˺AdsukާNr) #CK"PG-   !@4l"lh!(TB:}]{5#3{/4rmGc5fg9Gf{=9r=LD蛻{J)f\<|U=ʌ633;ys03RnmN%j9/%R'DԵ*ƕRJZ>Ӈx^kĽ]#aϕR>qoӱe)%"q<%qwHXJ!DOtp~#oe5dDć桪pRUMDn@Duw< K#<1\vwFDNwcw"*YbC/NcvĆ23H)! H,f8H "{)D{1F Cv3<Q)C},\h9缊3a>v+|0Ƹ/T;oOc <"6W3;~h"J9"l@yr]|ԉ*03\k=ӫ?Y|0_,sD3v/^h:3sJ) [|\.y>{vfPp{`#.__?o_n{qRFՋp:Rry9Qc8cҙiʰ0Xff#8"z-3JBhfػT}ZޝUY>^ɋ/p@̊VJ!<ׯ_F]UGι)l͌,8Xk͚rLQqm;8)3zbPc3eޱbs·3׻19 3GwDT@U'D!sFϽwH˗_~7˗/_}/^䮪fffzJE!GmiAP;1Bʶmw7_ ߃0"oWGY =RJd,"-34@NyDdOI978?#"no_ޮk13^avcp&"Rh*\T=Dh`CHGtgfVX߃v|03RڅThmrSTp"Bſz֡ݑνzd69#bfVQ}W_yv'Rw~1RϵֈZk2˲Сl*qvwGPuR)"·v뽻KW033sq|\ʳg}֭ DTv#$gf@vwclֶ Ly^""|۶-IeE$|Qm1nukC9@Dֶm3Zs!3zO}DDM|FlVz?LxF0`DzHf].c]WtY*|:ˏ?حg? 8ͷPtNw}ww/sz Z""DĔ̬0\.$'hSzO,~`TUU)DtA;4,wL)DDsDI"R9*A="un!}| qWm ".֐="fjs5#YU'E豷֘PQ=RRDdH9?"b twe۶I;,5"1E=t3G"j%"*93/˲k}n<_OӄDp5CV"<CDf~e9/r?3]w0a8!c}???Ϫ>?{_n<q <:\|+N#ݡV(ȈxTn)m=qME'f#p y^tz)>/_>|~8u]),曻Ry>f۸0jIq蝦,Zim}o7(c@Ḋ9Egj2-"b3p?Q F}/~?W>裻>âkc6?$"rl9hvE"֑gZ6_@t_/1GTzZ,KN,@ ߮ohƈ"›gK="bZ'9g93Yvm.$ UPi0* _.`t0EU_W9"J͌ 2QRJ\23 \[k =@QGDqlUoafZXUGߘs4DG\v]1C)!GY#ݩPJI[kDpp2m;imySDOP߽{G$a)󬪕^ GԹmv~8n<m=Aq%6RJ*Ǝ6MZ UfCekw*s_"XT1U=G~K#"213ko (;g3K/"cCH0 @U?80qMD&ewF\i+@oZgc̼wDTUr=d۶, '"B/9| 1\F b@)%H m̴mw߉z !"i83mݡ"FOfs-nDsj}DhZS޿z>/_~XeD$eTJ968 }@ K???~??&)w9iy9q[kWx"9֤icrd3#n! $"LRJ) 0\Y^H#5ls3HDŋ{q-Gi$"$AD.1R ]S1!1="iFDؐRJJe{ cRKPG)D ӈ@R(~HS!:1|DDe_믿߾'?z|#XacY%TDhI.9 ya=ռmvH)d3{ݓn[y{IUҾ,BD q}eJF <>{,?>>&έ5OE""ڡ3DuׯH|*|ZJi;-u籾9[m50҈; Ge{k`D<;L c#PUkG"|LoXZk)8"FVDF"&{yQ5Q %)vuwd1rhx:&@{|<˘Y\Ew9$c z( avָ R7cf<%>OSb1̬RJhf%0)2FDԉ~iD4|;%gnd"6p)dfjJDLD|Q)G0D)偖YrJ)'fG4ȋ/Ne{˗s( t</w?b*M "QU} ="}i}h8n*."4"hO/'UDDL̺rq^Kʈd284MjшH̦G`"rW{Wwd1AD"2ؾ'_}vk?~իپ諸z[CѺ:1NMO?߾Z8C_F w#"vn"C߼ys===G\.c3Kf̬7l B &d'R ;nN:PFH@X#( ""INbfǖE u $^.)toh5@Pq\pfDmXƉƦ fɠTLR푑hjE_16 %ʀa{)gp'"5aN ^>OR zqwggf@hM-S9q@,y+0;X.=+218ܝ0$N9/mT S>*KL> 1"nrΥrDr݌)I2MK:zj})n1^kJ;#e cP"9?,>t7w?@$4Dl!e[ZN}+%{p/I Qt nnSs8aKًpښ>;ۺ1tkf}}ߣ/.P&3Cd}73[fmYS>DO̤:J2$p9ѩ1v۟J) @~q]7Msuo#4Cv9stbid^ L `p3$sD]##$ n 1N#Ca(]6aY]Onm!D884M)X z #$D4tC&pQKP$89ю %6ѠS*vsOI쾸FKHfKQ%3٧2S bN}p ר?Mt" 7)V[DZ b`ZIF/I"X{lqa'wi0w73({,65 ","nĪV@w&"7ŀ.׋ly'7(N@i4oL'N"iED04t-@L<0̻wޕuef lfu+@kD4S,۾@J)XKf"R9LB%䩵&9p`7ՂRhx fr+~G-?z朁Ny:kF)B(N)hW  N2 (c^ՈQӔ ow'wO}Wmۦ}ffiK)j)%c >ekΙ12>@|T_1GD "WD1RJaff@{2IDб O)K)-+W8W޽36Mӭ8V'!bᓈF K)I5,ji]gqv20(mrNDDݝ`fܕr)`|ݩ-b}IfGkC9c7w<1sNG3#Xh2"8݇&" e`fVJ<ƸDZ6wP#li1ʳIUTUp'H 8I)7()~4XΙ3;"]QDhMCݼݙu1c1(DK)ٓ4MLsE *vzT3""@B36 fX9p)د׫i=GS9w/_˥]$͛7 қ7o|Vk>MӶSJɠ:9l&fv맨D|^וdDt[)D4;#{{?H'D\4c \Nc kJ\T5%ff7Djv/)GD >"){<U9]N;"nU5]D&UI=QD63*C@f汻zS}U bf#bm,raU] RJt:h]JVUdc`ǜsd:?CDvGă|H{s D| "2IcDryyJW^g%o.VzlV(7|[G~O`r_&o!dhHxsjyH[i`K1GDnf 1FHSJ ffLw۞s$y$eqi -?4M~*v{zzJ)=ceY8$-(7ysZRս{DH`D3d )+3$ "{1wiI)˾sM.ED[lRJ""wD,t43wcg?yuO>G}4x{Ei/~ ?ݑݹέ9Rѣ%ݻw{]U1h{AAőPGwgfFtpt2TG=ʳN&`f3%w@"2mD$Aܠ@ G#" 1ʱADQJ9ьsn~Oz9[k޷E`-|Ӆ]DD@&c`̽}gt`w=sZ:̌bW#DLD> "$DsƓ;<<csAA9`f@LeJFDપyOOOe'#s|DĽ1dfnm1F_?<|/׾~o c} )?1뺦)'|E[o?n])E@;ڐ%dva݉g3D"9Iu]ffQD4Q!1Nt:[kϖ"bٳϦ7^x,"rٯ@KEPJ}?G8>"ctGDpqp]W{ H4Apc ?}3KU5TU*ZC-vp}Wן}wߝd~c6py3~'"@ウ}߹@DgU]ݿw~=V߯T~vJM2YJ9NDʩo^ŎZm(*y1 $ @BfxBf(L!@Ă{P{8aINtP D7#b;~X=2"F0׶+\SmyƉͬהe|u$yks'>G"gxb^̬Ӿ:{*Qi( }tjK\8vYNgb,De.$"2k}I7afNhf4EZ/""ڶTD21tHeՑ @ 5={֛14M5˪mY:E%"GGĈZkP$.9VK)0"0)A}zݺ3'%Ä+[^QT*3m۞#rqcgkdcֶs{:4vE/:Mku9A|wk{|=h<"zZ7ۗe֬\U{JEPJշm)2H)3ɔ4OӴ,;i)|5W, 1$"b BDH}PDmNDf3RUՊ"<%:aSpVKl hp̭5ploG $+ ]_ai9[(0$ClzHKV)*b&CASJB96g,Dژ;(FVҶ7zk0{1ݽLd0$;%]7Ro.)zk-QbzRU5(OM_*刘"酘,{_˅7n~Nݻw?_7_~kw7: N36|&9ԿB o_/_*"3t \q{k)%VὢʺNԶ='wo=7##Q(`A'5Yp}?|1)?9E{Ɯ6pl"<޻nkQU5 UU>HNJXȺ T\ :"bd^kT#G`cXf3318j,q{eoFG4J)N}_W痟IN,ͺ5wn~"Z!4:EǟW}2`gpSo̬ kęCbs1"ho$߉t;D؉(v,$4#`q2ƣR!`gD434,hmxJ%V0B|p"3]{k>t褡iJsp曈dƀ3" "sskH)u[E5"^.KDh}ImQkLjN4O/DzADbn^/-tlFČYU=1Ö Xr@8pϨY< !u-"2`h$VUic}꽯o.R=RJtZI 1*4ja2I"4ۓ ! 4BLfHX$=ncY&1\c m5""YUCİ>DŒ$C´QWuNR""Cm)z AShDCF{πRJ}%D`)aAfBAhU'ԀDiHщ9yI!c@#ҌD@-MckDDF{Εz' Bjc,x;GbdL ! 8)IIJH ȅF+K:zxJlp;\_D@\EZ3f" >ޟ'GChNuy ëwW^~?~_n?OO>֫_uw/jYl/>޽xF- "6 Efvs cʹ'8QclT<1RJHѶ]"өg!6xӿ_n'zYFޒ37am K);#"&cm mHwZ321`1)aR UQB1P!HSI)LzmmD[{|0R꽟=?ڇ~X·n=5Ј(ycc8 r1 F}@4<"(C3 PhBI( DtfktE"17gʇ)pc@9uClAySUV@q݀] dw#T4C: 3.RN #?"aqf-FjV33` y1TU'oOPUCϽZk6%3SPDL־61Yə<ٵ?jÝGIoe=r} )xAf&;O]^%agX,UPݽsYthWFѺ'NRDI&i@Da:gHi9 'б 0w4Zy\W"2p)uTl"53DH)#$Y&]n #&H{oD2[GjEDz74%T'TG GB P0qYkp>Q8([۶Mraf!BPnԜ3cRjfy1vɧ|kr7X7"z|iǥ>y*Si\YY$"'DI4@Bbnd t3 dq!$Gm+1ZDI3u: )q C[DpSrݗeAhw@ ڎ+":֐DD1uIwBf!{lAQtpŝW̞zaX83eU]"b%,Nf$"{@9HN3][A"a=aꮛRJ6Hgdq01{#"c[zSU}HDѷ?VD!MrO۷˅~O=uo|\@:??_?//!'p$~w̐14`&`o ⁺J6 ( 1pxezWJ)50df`D֚F0v.An{}Xi.ezafoc 1ޣmX.oBfXPD¤`޻ZRdyGG WGkR}nݑBL!29#$tB{Ba\&"ϑFb[<jL}fN9zLs1l< 1!£B=ֈh$jaHq t>2sb2ft>"Sh`X"B!4V LJJ |a%b{H08܅ |fp$lf Ǩ> HB1 dkRND!Zg`G /"p'ě&3<4Yozjfrp^)C l}jQ©Tޥxm{SJsB>$\͗y?ZWoZ\D1pD@)ff~D"r0IcXP!YDU#2*I#5c&uw4sޅ · R8Kʽw>r2@4-f]U2i(!0%%Bc`I9y63|n2/KueY}ǒܼuUњQ=>H ܮlY@ZJ*R?>}ߏ$Vó?1"zcy~Ul\ǥ_y\5BrI°ظC}0ݮv2;#Ij1"HVՔI{rd#u!f61b(9lTJ%D.զi>cDJU8" "ɴoʂɔR>mp]VȒpWs44MH2B%Dh]iFDuGD5>|bafN)=jDXXtߘGTO@իg߽޺~GPkwjл)(8ºQ.Cu0A jf;2"D~ʈl4#X\ w F$~ChUF6D$juqwaLHJHIqJiC>Bt IzΜؚ13snp@[\af 1si+ڮPn=Qc$J``>$Q)FkM("؁(IKּrέEDn.;Zk0~ƛ5}mU澕,|w5猕l[׽i/#W>oO/4i>:31S.e7*EMmLnж` 6RJ)aBi$J DD#Er.[_ie/*eot:i{*k 2' 1vѢD35,֛9wSր(CUuOœK Nff{ۖRN`ʈA #f&<ȣMD\0uD<}O)u@aUM1N̜sڶ o6r䜷mI#v3+̈TM4H5" "syHʈEM)M 8u?s-%Z}%aF$4-#wDt FpF&CY;J&w0mLG:/l 5뺮 nڶycC;q m[|wh# a+H{C.M̼I'D:UG`P3ZZ&*"VD{Tʹ}jfw`T%" z4w4<{"|%Dv:'BveWS~޻oﮠܝmB$,ciHfnWm[4my &o}\n7^'ԗAw-[ \rY?Yui=wZzD97gP&Fe1 #bz_7$h\ &%Wo[.Kfwf}0$@Y a&蠪X{DDn)YPȳ;ݪ`iű ܙZ@+@ fQEͪnӔmOS R-(q>j)i U~Luff-u8I%"RBPmT}c0c޻ T9'@Yl8 ><h0 ؓt]5 M(;3OёPwPD 41ʈfӷ1_?p:m鳔99,s,{;;UZzΙ{"fơ$H` }$liI䢪9ODDHr- @"";c{ bRUtk"2RJURP"v[E)׺ Gǯ_P}G˲IU}Zn50}?vD2W{ϥT iSj);!`pPlYHH.aց -Ђ1BF@JȮ{J#!؍;"G杠B"G6fԍ""12߲`P4(,dfwǨ)R$݇SgCh"Mjn W0Pq9\vHcHB^؍F;X)|:13,"fUMvݯG7<iQE6ӴLtJ 'wbf,l@{j,nMHGs=qve P[`׶," r} Ӷm(Dh 3|Үlkn&r^)Os* aG/IH;ȗe;x!xYݷm#1R0$m}dD݉2;{1FyYuɜsI J>n7ܟ:Y 1 3VӒr|j{>`:?==ުw\Ne/m+]9|3۷yhI雯>裟я~c1fD\J^hǾ$2Y"*1fѰhPEn\}23`w-ӭQsq$Yhs&qnR|۶DZk=;0ƈ2@!M>q_dI1"ΞmAI4܆EcH0}* E D)"3kbWPM5Ed2w("f3}R*n[0iNKpuƇXJPШ1ff<̬֭DD)8b`dђ0Zs;I2KwD)AމHQBHt0}'1"({Ji<Ӝp]피j^~[Rޡ7wıG w^ۏՏ~P|>i޼xqf}nshDKfBSa&,i۽"!tmZͥJifR]UMAR$m!IRJ S,9\د,4 "}pcxur[p|7G3:hY "F ԩA )e2 ;bBDp>@zΙ)"=ZًH(afv1c~Jy~N7ÜFWM{`mWHBf-眈{tx Zѧ X;\7D(@!pa=EX1ƉfS<9fwىsqл;I"t7'!DF9 d=襤AyWYdǣX`g.c؍j`Liَ5"R8HKiCAhSш@&$('RBOFdm7Xߟ#^SKq6ŔµRٳgi}{b_Y̬?Ŷmc[kx'"[>4/m/_/?{s%pJ[;^ZD!XL Hs5btO,su.NKI}8`/L"L "" sZ.Ysr$,? fEJ)ͰY׵-tY\#!)> :)J)J-Kp:eUEܽ Js.p!CefOHqc 00S9 }OS>؉ S*ϟ8/'߾Mˏ?HԀ#F/k%?"" "d 3cq9]{J9sLh"xS-`rx#RJw 8>44 ! )oՍT=shvB؅y*ݓ"*a FL2J(Ζ:x :1Ys-ry18>׼͹)z^!~1En̼7|r$(@V K]%zśH,Cs~9}Nr}0"J-"]$'$@<SE"<@y݁+s:>ۚ%;wyD\JD*""b{sD0ga!"$7ռtnF.HP(g" "<xΗpkݝmVSi̧eA23{Q;!"".s6mƭވ8׽=۱\vu7ok(F17Ooy^4in߽j8Zp]n͇L%:lp~Pfyvyқ֚aɆ<+m[w%k%gt≈2&|n`ԓgְ8e%IiĚ< wp>GD%r70$]C!`NĝFuGg7%<6mY)Q%> 1 1pV@|dk 1;G'8}o}+:#H)Tavv j wwS"⦒n>j2Dq2"N=D'3K=TIi]gUۮn@8#"aVUXax2`A`wۧq-QT`` RD04Fi_T6<ϰ>sR߷8}G@QՁLjP13Mi9KDu<2i?ԵT+3"29_r n>"~[2C:D#G&SNZ{) pЪL ه'}{wM#DDL^ Ē$)Z9E;203찮mXDtRJʶ,Kc!S$jݤ_'r"igKZ5)"priM;"o巏oFDTy)w3~پl7chfR={4sor9n{;CY >mv̜Em"v]W„VJy*\mwww/Jk-"u 'k14/"lB2QhPx)h7qf4bmP39i{JU;!avDp% iMv&8H0{/D4uƄG]R@t̶$zؚr9h #(_SP dgBHH><;=TUfAF]5)CZ{]*+3k >BE1y@Gff1QhÝd"Y[ŵHEMh`>ND/JG8bUAJ%Z fv[۾2䓪N% ="*mfDD²)!}O?qv~XweWxys}hf&~o)w3/RVH#1q6Ց"8@.;@W򕪎)Kt/c@C2 A z)ow'Amݧ%q%=>ѯ޼ys,ˢW23Tk`f=pI)TBCTDP <"e<<81 !"̀A 4P2n}c蚃8x_RJ5"0٭ qwa^y|~~x~~O)}ߛ;U@8bCL1@ 1`7B827OU$"`f84hg`JIH$6<1! !" C BBJ{{֚$4 9 Rkp=Piߺ;0W)qnq3,8 Gĵ)%R-Sǔ(D$.\ߘc\#"$"$eY8of~YN۶pJc_ҮRbDO"B =H1sD"ؓ@)lh;"g@b0nl+p8"2m[RRrfB!'H5@ Ȝ(l99g!CƧ7չ"nRk(Ti{چҖRdc qnW"ʒ,~ﷱm|>!9+3m_;/o&Dqyx%jH֬~7˅y7duçO%^k_~ U~_ٛoB~Ωix @DRRy)~^~ۧAfiK:-Bz~x<,ccںTQJw%")nAITT DTu24Qe>xDqJJ\W{>vNꭗT# f9p˲(K1019;3fLF"ª u| t>mmdlk* 03zP\F/##C{U1fQP rf`Q/8PM7 G//遏ǃ!8'DQyFKuI$we6R j gu]j2微~W=Ϗo0;dwwCCW!8'NA<"q2½[J)oLIoQx ?gVJjW/s J%=L"zZXyĥq/dׇ0p[. 5Ss5' wA( ,X!E3 G* #dsrcU1b"QH3p*#RDAnD߼y[2~A~ſ^2:kz;Á!%@ w9.)l7;k:֚$13}u4@@+ 3#`D4w"t؁2q# |D11sFTfR0 ;zyw #R}+ў` wf&I`{[$u')g=,F`@DscyNވe)g wt Q#(ECtӎ8Zк6n3he@u7wXZW|zzSh7T;!2BPu<ψu]c朁;"j?`G TRR7{$cb߻)_{C.L,, 5UݭZet'\u]Gs9Q0sFDd⾅mIp׏㫊);uoxSEdCtÓܘ #1sbs Y-\zywHxzzJHc ~67CϚo.^__?"]7U_/ L-70_eWoD>o7zc\)ZG0Dߵ֐34Mgέ5;"2`*6p|W|~S o*%j\ח{n# bqcaapPJy)B m[)VDzou.Kkc'cRj8YG)}(0L"6!"᳙rT0# U2 Fn :3GJ)){OE}J|SolQ^e&[ k"vu'L)%X.e[?3%"r6w9gs&b3 + g~hsw6GF1Dh+rxm&k !K"r]2-ŢV_N)/ ]e~{9^mm_^R45o޼ImYMҾf!;dG=X=Q"1 mUW AѴ& B8 PwM"ƹds'UR_Ds9 ۣMOT|Z׈'MRٕ|7Pos&}DMR$;Xx SJEyPS)c E CŘy U^וKGDc# )|$ i":'91QBi%t7}~w0ΩbD"@qpi 92o1 ѽdiM4C@Sa;'F@DdJc123[g] `x "RFĭc BH!#Y[!ugD5!v"9 Rn) ޒ`D4<(18NySB眵qmYhDQs=h}TY=D|.z/l;RLeVE yqSfSt◢䷠Q^__ͬ 6aoS9LDf@ 땈vE#b? s".Խ>4M=FOL'0wCiz9ߖe1"aS)Q`2%DLM)XKNanf5ϻT3m1ϗ*SDKGNdUO$߾DjqmiE6DwUey5[{ u}<̬n2q(!m.2@'K.mO0m43xLn 뵛gw~ӟ梪ӝ8r??_}7h]3ӧ^c)JFD ok40 OJ +zxkm*[vO))myo.ou:|= 7<Fc֌qm%vwL5"֮]=dfi|̽OCt$$Ա人SjM,`9cN s=-xEDNut>sVA@7XLm[4|skh%5չ*}ߗdӜ_1H^,\2H9 4`"bRkЬ{$,t"b=a$}]2 )4MCIY$;}ca/9gKblC9\GFd}y=TqH"B$Np߻>Nwؙza4a ""ԖcDDwzIw߿p~,G>{o~x===3y9/9go} 70wwEb bهS8'cUs 3f6&FMg樈U™8~0" XHD9P# ۶d^Zk{[SJ B#)"aJVaf]H G!"6c铻ǬDw9z9MT2;`K)h9>|kwӒRZjJ)M3yaZt(F,"""zafi LRJi\-,"^ȍ9A}{1\@.U0ٶ圥`DEDn\ GDD̜~۶m")%003INqVQ5TՒ/Txm0 ̌|\<)]WX}S-h눗^xYlú[3]K)4Mu|)%ȅB?=ňˬKkDu&_ӧO";3۔oۯ]|릪_%sE~ DUE #{7sgwlf3@H1/t-"} M÷;h|0u9oRu] 3Y`FD 1!Gn)%03J 4\D#bVEڛ_R5jRݪ "JD ETs~K)&"qGLNYp6 !b 3T 6]D7DfWgML3Lff+ 3)ERZ{DJ)`-VJ)y3L)e66}}ϟ?]i.wZV:dfmdw'vDt֙5"0R#̣)J {حژ#"EJD."tH̳9"O>|8 tOK:N~y<|<,itK)p&"9"ﯭ+ p(\Tx8T`fV= AZ#`B*off"S 43e=vi3 Q;%f6L)m۸uf)% =>fƪڶfF{WDn۶7]'_7 3<˿_ݺR`FwgNxPDiDD#4QwIN.a̔' 23Pw'Ja)Q}t G%""( ^%w !ƎEĐ,LCH 1+AH0!mn1 &wS!NF+R$7fv;1'.#P5"梦k¥νflFͧijRM]20ZGf-Bc7u}bî {)zX[ՈpzV/h6DC]Gq{}OCY\&DQt^.]qizQmm|N]>^הR hإ4sf{c޷*g\Y {;:2HjDvs}r23qdvxQN)]U-Z/qbY91ضmoqqe?6rwOv9GӢK̃q|Vʲ_w1[iȭL_ѾTP8gD,nf;-̴Yb9RhK H(1ܘF3OnSD ;TFD$9,:T.,0.jmBB`J8UcGFqHDf6mf#4.(`` JDt߀ᐼ(\i8O{# ! DpdD6Hnban,cxnf7D{ AGD(z@aD:MӲ3}~ӧO߿o7oތdDG_HRG A!G3<1J5"D6tw+KS=. BD !xX͆޻Tf%gD Cx;"A90JL}UZ R2wME:Iң)sSwdNh0<#@#Oӛ}{-e?HЙK9Ӳ3`f"0Hzיo)fIiywYߞx!n("$8zuE)U`DtT1FJI*d nf`*@fq0l^EwW/&5eD 1\82υRUpO)A_zTzr{WL#6E4dBD-47a]aǿj0o<"t9=D,]%s{s.¥zz['r3℧ľmRc)\9X@}շ˦?vf6א{///KEeyWtbf>^T4Mkח{(sd۶RJj[zz4i1콕๜Ed~dmۭZkV,߿?|C u9 NmB"AǦA6:pN=jX8|)"p7Ze R@9swܻF[yzxJRryX5-<."g9} ACwwBwb*J"H)Dƪ'w3Ḧ́Zc^?쩎)8S5w@@i*f9n]NZJM(4rDtځRWL]!&t [Nks[n@DBuWPwu6#_Z6/\\*-i/\ƼT̠tιYUYD|gDD89ᤪ;;9ǀ0‰!!`OĈGg8 0('vtR@m3wO(&@Z;*_=<Є"._\?30$Q -{-0"hLc #DC]ȈzE 5mC#"t0 1̜0"$ED8CD9-/ wvHDnZaR=̧i2~j(q#{眈IyfFDDRDlJ"a~~Y?\3oo~VN~O麮~Y _^y:! uzJi3"*aD8," J)rTXBGD-D{eC3DKc1mۉDL,3OYrίs3qNR;j b=DǶՒݶVkl53SP}LD IU;&vw1$׿<^/BD!"YJD G wwR=1Ot K 1743I("C[A|>_߯=|-`ѹpO)2]\Jn6yܙ̶2<ƘL Dħ;D̒Rl@okYnw?{}}eU%̼ 9}$R\̴xkMRt8u]\wf -f33{̂@2-݆q*p!C>Fbfs[JIza(Dđծffz:3[y^.}QdוY&n%"61Kqw 4QȾ,:E@̜ #jMe6fvmrM"Dt$t4bcclu0M)8l㗿%~էO27|qbfVRUUC̬G/Dzz@xDTZ " D;]xb>*i9cD='@DU9.uQ{/,{u&W"2$D3"8v؈ȬJ)Lycxxu]T}!XJiaUv&e3HTRJ;EH9sE@63|8zli+3 wO9[CD@LJQ P swÇçOe߿zq> ߾,p03[eQU&90wEcW8,{9oc݅0"+/ܽ8BPROc C""{y]W0MDhf3DlDԔKNS23Sd%GDsGEQ2UC91ZJ9V0f$LDUL#,LPߙٛϟu;H8yL X2x?qRx)jDth)Sn*#Ր̫pNa$E#ȻR>2ugl,x6M<,0F 33+ sJ)Sڶml)!"3 9O"7)#{9' Uy21bo}GI `"u] pdzg B{ 7mrËT5Y\Y@aCDR}߉#[X*`sDjC"C;1g3sC8Ҕ>oN<!b@t˫4g3$ <&Џ(kH2)aJfayNyrw#Su}$覵1FG`I)e3%R{FDi("^!sp" ;P Jq9[DN{92-2` @Rcˆ8)L'}5"+ uCDLj}Wl)7 FG$n[DJ3a9Gv'rPc~k XA#3ӑJv E$AH4lU\JuNL9z7i4ꫯ_z m۲"(sqzNU!5$"B!$l4U0%Pw Rs$B2$`ic乊aC=@Iff1FxIYUK"fc~pfxnh;8p8ks""\yJ˲nc<ϳ|][kөN},zir9gpeq}}""'Zk\^^^N0nGlu3y >|EKJi9? _|˛Z+8#1(zV۶ۇƸ\.S)%)1~]%1'DRR#\}rJ>p;"^)""FUh%ֈ`)fA):p"rH) /I"҈(hRU\vN &{O(0'pNZC9a RZ[ RJW∈#^C92^Kbfs;&;qJI]@Jn(Ҙ6# 3v .1 b-֚#(:fك+]T5tڙy "l` FD0zUUf2YMt?vxR"*Zk̀1FD TUp:9%NcaEDmx<Zk)e3;#)"R@V3ʭ5J'SJS)e8紭vAĩE$էomo߾j14MmBD)dzfz}}۞s$hC `S^ZkX۫6Zkp@?|I'9فqID E3"QR@VJA}PݻR{T3W9B{Oi?\kFā]Y,Dpiiiiiiiigd =vD לsHDc\h(fE#"c9"澥V~9x/U-"3̌#z9?ּܽW"Edl1MӣW6DfFSBD̵ڡ]z9[[ ]4M_//뺖<J)~9BUCrJX{RU>'x)$"M/@;%3/R8w3$DxUUG AD?,Mcfu$vw)+3[ 8TUx<es۵{H)1DDW۶ZArk|M)Af>GJimm[G:GeY)XD@cވhëp"*U"۰ISJ~ZxzzxؕD\O 3o|/ӣ֪D#xvD?.LmvX{[?bw׻OH&"ѷ;r?:?:c|9_~aW OCa#wc}?^,pT|E:>c˩???m}|{?:0"0'qpϞHDЁ.x̏I8{9q_~4sܣ?=hYsF_"iiiiiiiXu Z!ޓIENDB`libvpx-1.8.2/build_debug/non_greedy_mv_test_files/raw_1_12_12.png000066400000000000000000034027611357355204000247130ustar00rootroot00000000000000PNG  IHDR.IDATx$wޜ{r:LO 0 .65-Sf-ʦKrd"-Wv ,bɤh1+.$.6 ` ӹoߜѹ~s&j!(sJ [|'T'PMJa" ܉6+vs>ϳ#/'E\g%Sw_#bHؙ~O/%NBiVsq~^,dg ydr 2-0XK;PMG $r[G*%řGKR qxtJ⤽xPh$y2 WFͣZ3-;p՜Ӝa'z]j(2 _MljrqΔֳVHfM' uhyfroғ1hE'h_]KBdWپ]")Țz(b9P0OUB4X)=5X:!C 툨fOJA#(ËG!݁ y<{@-[OKaNJWE4;3|/cY&k}CIW/h~ny1<<+A&ۏI~~䟽lU@s4[$pՐ=2bldUdRЯa DltL ԖTS3LPsV(h*5>yg}!qW*K,m'<{2}@3,fYD9G) 2+'dN|8%Oam#^lA܆sCkRÿub.OaE7OmE޷{o:[^Ob([wU֘vx&_Re.(*eO7ڈ:߼_ܲCB6nM٫FA@KɡD݈&/ ;>[_ݿ#h闟${OQ \Qd^t봔[/wP C\{T|?ꛂZ7CVE[/F5m/B7Bn"uǺ L3Z.蔺8}q{fC ZN1dNDߍ:tS?IKm"tyZNWgEͰ|p͚:ՔNodIZlj&ZbعI^RCF<23D&S*NFKaYhN@dZKkfԷ,ǙN7s𪍻Z͑ |ādN VvTN1]9(Q6*> AzStf.>ۤb!c@!\'%Ejt ucMZNա=bvևq>ժ!`:cHD '<^π8s5]P7s-Ҏ=z{1WQ&n rY,3r:Ka!=92!H I)eف;䞐 h&*l;؂( rڼ#u4_obݸ d#AVވqmz3]H|ʶ?GDNm$WEo!PKY(u+tLQsXi^#6AzH12nWaghYٶg@T8 %fEA'X0j \hyqv sFLl~9\E̕a疰D rXL^a,_c̪>I0ĮXr^əxO (X5ͲTEd bO\yRr ;bϮ4#̥">Űv:l(l6`%lbꦿQKirdzֲ~qGcJ)tDzB+Ÿ;d~MX%j#Jo䏊5Z8|s~/+]OJwrr%oNr0wI0'??V~K'Bjy=OWs8~a:ʡ7L5DA39'WEm-nڛ.'i;~̖ H T~GkzGSC!b.$#hsbWmQn^ Z%l e~BY&y) m[/ ) +Z"ץ[(GCgNܲ8vsUVd}s~mXɣӟiI/Ե_jgv4-2~Sr/׃{9%G9rG K[Ki<;B=P9S*΅,Q<{X_:iPg" Cy`7i 5<J-"{}EktjD]?J%bQuIl,l!Oj F{&D]Y7Ssx,(>XM&6F6(=kuTyGS pLez*5<!Ng p"7۱sc(_AbiX1([eؤ8eJv(XѨѓ- T'jܰT@m&gp'f G;NaޖMz+ 8? ЕNV/ {se%+xՇ)pK"*iJ>/u]02]\0+)<^"E b ,."ش~$%(Dr D4@ ]HX(aOD 8'"+c2:s-Ǫ dNnH>Z 9^ 1U6b$ԌV`!QS蟻25s ) 0M$f0@"O mhHW&h"ߘ}~1x-Śnf9K5M 'B"mN,,:.E > -(E i>㑈lB#593XV3[vb3QI{UjU<|aUoU}j/r$y5 *HiGdcEh&Iή,x;E%eLNY9]A]6& oBE{ KyO|-Iw#sxp f`$;]7%"-]Դ$ `#d򢒿;i٧(H&F-g=Z|6ߺ-s4#7\%~o&~ow~7޼y'lĜUO(o(,ׄU{ {#C3SwwhF*H$ HolíFT,B~ Z63$oDOb~&{ǫj ޵:bCp L খ Xk U*V'@4$tЗj OUչuY\Biɝl*_rZ]&5%  BzOHA11qQT c{m_F=XX;KYTs '욜0w]js"$p~ lva >?dQY3I+uhݰ_^+̋;$r{84ۥ\4}gʭX{xR~4u*ߕmCu2RjQ?(Iݓ?/N?g,-3_.ۧ?G#^c6/7ש!iVVv,L+څUqTcTa3]vbCRv9к>_~F{tC.xOT̯IkX]'9P|,IK^XM[]\L"rqkm#mabMڂ墐U¢4l7oģtZ&=֠nUT쑩$|b%hl*8;IwwQD*pe͋Sb3z̫5%M#έ_%^lg3>#s+ƴDҿ5VggxTeM:F\~+] 80Flhl0s]]xԖa,>=6BWD\+p%X [ &LE [.`v\ڜIZp,I6EJI8\#6=yi p{-elrM4 Nn*̶Pɜ# w㸸%8,^ί7r_ڿܨ8L$;+kg5GNw8{k|qQjҭTh~'>:?< FkngnlCe;J X^dOX02;RȞ FRL2 L+wFXh"m5;wFֽ@DR.v58%tvSF."\)w 8?,c0$웣 9㋌bGJmS|~d&Yʋӻb6g7|Б3΋aS0;rE"U[}É?Y~߬-Ƀ-V\ڪ~ܚx,$@UZK"7nެ6iN{5;)?݀YMP\뷵`pD݌KHˣiSB\҄鐚 Tx>dY)A񜤒3*/&"ۜ/q!S;SݒOĨj28ǯ_C!^,b6Gs=zlX9eqUf?\RNyp)]+Rq|#+6_՗vɫ$K^/Y}h8A>yCP !ӉP)ʢK _u#\ϲ'ɡd 5KD sM1(4;7CЧ wrהc3Aۯ`77ص^be-Elg%ȕv$A9Y+U^)D|ې%25S"9`1NևdQoeI7T*.?O֝c,PnȉbMJ@P VUp:lF)FV}^"[E3ΜNf}bM zC@UGkYe =K֝A_2u # lm拱OyRz"K lR؋`L,rŤ=;vRBfp*@9Xv -lg9#@E*hIrVoaw ċ?Ue93sYP "y jk5d?4e wo0iRش |EYB83rEtv3*(φ 2Ȫ<); ljg噥vV)B v4;wJFna6<\z؍^ڕv @-(@Ey/TAșQ/uFR91C}Ţ徣ߊb+ )T7|x_zBtv6I!=V'D__!>ޚ%7\Tcy+c/PuļIra&+- /CZV`x$$a}d0 Q{ȇ1տW4}M:-NLSh+=|N"p2D7sdxWGNCOMfAD1{9*9D,K<GR :p }Bj ;[+)$(C"P~O>O.#2xj3Φ2kor 7wƮQɗTRޑn%" ü1N'˧=F4).));q9}8L f9q*%Mt6aJH40}?.'JM'oof,YuPV鸿Ӣ=Xe91%w>HlXeg#QqtH[41??YѤoOnYv.`z4v)<>J}ӹa-;\}rv@Y F|dsN4R1lǃԭ zG9޽2~Gו$Xk9i<^4F|1^bɭVgTAjɒ= Ų6HJ]8WYaødX ]p|n ;M;K|}c)>3l؏>شN{sL~ؗI"nO ;F.vώ{'{/ eɹ]O _],^L:}ӻFU!Dld^{q}|Xm1ik1Z0p<Y܌~9 J0$*Q 8/7HvM7|,۫YeČrJ"5++f[.!z.Rʝ#SwVXw-P|Se+zbEQ8Yܫ uv+:( 9*q0޹6뜝옝]䍞y`M,A ګejpo Ǣ}Rk~Ӗ]%~EN3vk4VС+]%&쐰o2Y{Q{-ڕU~GKl#\l#+.;"VZ3syB'FF|IƎ3aFkb$vE8!^m@>g^mR&WQ=Zx9LOwLQ8!+l1,?^DUMrXl.ǢX| LhRIYֈ|cʮT(^$arExs ۹tn&&3BW\~ P|xGۉA·>4myM6. #2p#0[q:K't tIH(4ÓY5ddDa?Bϳɲp \`,j7_c8cbJqPJZa9n B\0XP>Yf&Ok>^p:*àBA!6* ߍFJsLD`.H‹ϒFEJ ¯7t%8C/3|M!B\<nhx|jlJmB7V`2uXE~L?LJ]Pȿ ,ex`'w ,:>bgÊpP1ӵ~{!a KIN.W?l;mmb ,jRSdt&XP=[DFjVejq^Kvf^n70y~ʺܗ@HI:šd.#afu 0{"<]ZHM?Ћ|0VoN_Ϊix璻w&d_7N]wذĺkc۰;;b% YW< MHUtՎg;aڞ`xpȪ:׆ג S@r 3ߏ姿ODZhf)Ϣ(kL&xs:l_ш +i/L\,څh;Do`'@" EgՔ*Nϖ/&=~v;&rΙ^973~?3ܒ44h鮱b1}Kpl9FJ9ޏ9OcATMn܄yZj)e 8{RKv.$J)?^4IA%~a6z{z!7WљTyxNOR;k~v r#_~/8{?>OtzB, l.O#݊z&k^Y R[̛O~{m_&f8x>*џeVܯ};l.pੑK )G n6{þݐQMl'7#zKG+ lgC㋏7[J_ 5,2cl3' ǻ? M0gOy@!gъh}÷fT$1VQɽY{d|t"/vV+U8;SȺE"1kF ISʏqOz%2Դ#!˘RRb}?q}yJSL}QZg@v56Kg^ܡ4+ݦWl]'hP 5 .P.+K^§8YVy$C4pAVz!L=Ӛ/ƻ@x@IZ/26.mr.p:X4XDTǘ r K%p$:$$#7]C+D( OZYGQv'a $>L9:0?f #wXE9BzilHc8h[3~'(+#cp;,q`R[zI\-Wb1Э .kdulb'tMNH)iDX^Qn. xIfE>k "U8qW0i-(Jqgɕbc#'l:",\ϱG廖y|]pp @aF: Yyc^y'q֘\S䗧'%[2a 8,ٕrWZ|t oTJ L&NS&2ree"8u~HeHU,9qUy \501]CZubMaİA@/Jv եd6һݧ/oc4ȋtR)^4Yd|^eݑƙ_|xwyWnؿ}w[}Un< 7Q?$0;GW[^-[&sr|Y}]Sd0gHssRy{Cx="GFu9f3Џ[ޜS0%GN^o]/^>WvG٠fU&f| 㘆t^-)ÈL4a}!D=XAyɳ6_4ܽ( ՛/\9J`d٘dvv3|f۳OUvc9W}{5?x j;0uߵF7|?h竏RDr[n$;"bnۢKϨIJ,&a`fwHEJz#0(ɉ nPƴj#؈I|E|DJV0gnfX:731L[QtX4Gl2 ]S& lF7\]mbؓ뉋n_?yfMHyɒ+6Kxw᭻[۟]xo^p|R#_exy[yɎf1zw&e-c/pfI{t(ˬoe0⭩hBo5H92AQ* TB1\Wi @~N ّwZK8@gl Y}?Ndjqw29,Ҕ;؛hH$dB.003P;;y6ޛ"MEH}4{￾|Ѿe ͠#jz3pb2tU2bIT-jfWKHW?;Z|coӥvmIۡͨw նFb2We6{4{8 %-XU<ΖswBE.ك-͒I*;r_l5crUG&yM Lu+1hne-0`%|RO󷺄ZQJfyg?dS:04Lw϶֜I.ݘFYI\hNƴhm퍼NN ps FFc24fĂ Wn*UB@VVi|> hP[BLnHǺU-~cGPh@Nsݴ֎ŽDlXMGX\u>I D f2\@Dʱi 8Ic@`ft'pA]Nы06'&*?Đ}= o'9faaYKR1h] Q,ͬ}u{vCDKIN/t1-d;+*Ҿ@3 ](Cy4h?2j)<%0(dFSZ TO6iZ̢ѥ- )hK}%4"O2!^>\9ĄN,p"kiғ ?׳r/8dMj)ΩfbWM񵭰(㋵^H`d)8W )LbWK85ĔΫ?}8(PK.Hkjg-zu$Ow4m !D-e׀cf!?`Q=:]Vkwףj]/Ҟ&,PhL8m8 ƺtH!XYc[8ٵ92X1XvHxO[\d/نvj˹踝uvJΟĬ/_~>?_oGܗe;v9lNh+z7gSk{qO[#_xk?J*%7]lV6e.GPd l8&.4y솀-)LHc܁2 @V?I&&Fִ7v`tOs3N<"11l Xǐ0ə8 ?09VfSz!wٕ l#yݲ#{t4Y/:rn|St;9sH6rocx&~x|Qurct ;)%į>޿}^?M6aK$v+eGCw[^|M;2st?M X㓫$S3\JܾuW7-[|wIپ_]17X*n ֨ؼ~=I$WP `xKuNdף7i$N4mrS@S!Z3ϋݓB썿YVhIWކH{D]悐5эFr\}npUJgsZv(V-'_YIZ אT\ "ﵮ0Z`R>6LtXMGDYBOX 8-L:t\ꛌJ$T-MV_JBfk)JrFv}r_Ϭ]KTr'Wjّƪ5^|=DK≂>epU`2;^?uG%p7\W7X<,ϻ +;|2 z)3s'ׂy"Ojxl^&luG;wd,y'"r\%9e56Q@_>j€1Ewt9Hm"&2]Vm$n)9_Ҽe7oĜvx-g\e-fvCu(<M6潭>Q܁iM$FnWJPtAk%{*THQ@Et[Ob}zh/kC#"wx x(:ъbF6]>HHG [Nh1OlnfF Ic]6;} (9)S'dejt)IEp]axplSRܖx!%'eKkĢGk(.LjlnƸE5i%Ų2@M@8LœCW^ö U%Ф?@\!Xr1[!M9@\]hU4uĢ>ttŐ[[1 O[wC Eu{-PNq1YQ$Lσ+ K*"c|c;Bcrܛy>0.89{0sy܉c"/VD<@bd!}7j2Dž ifG ˴%uM)]}ߚ*?xuSq|7mr[R|{YM-ɏ|yMNLeMm>ݧVywygm|z|"-'RA/^WPXy9xLl)Lt b*8C 0*:coVIwo[.=,丒-۟49>S z*ڮY3'nNv[7%!J0c\٢o-)MG\۳ _8$lFPmAۏT٨C"r[$}}ؾN5bA *1\F+eWP>LXXCv+ d]xwjӖ3r\#ק Ӛdl\q ,E Uc԰ҎuAӔuu4箦o?b>x?5kOm^~[_+lef˷w~v՝7KWGgɲO74{L@woŤmTZ;7+'0K.C~ Yf,%H9τ!*ïE]@=X ۾}OoxLXMQdl >+ҳfo5ǥBxv:$q?j22r}iԗp Z0Z)lTw:F&zU\Уw퍴t&eӸ W9BagwYfL!<JZI:NZz/ԈxDj%?YuSy-sq΋i$'kBsbLcJynrVH}6?h"XF޾3ΐ[b`_,rla},}N~2$":FUqLFl-l ^,*Mdf/JC18AL?6 mB_|  ܭg,gZ.w"GK&z 66xvԭ5q[-Th `w+Bk^dƑ`HpG,tS#A$_ՔZ0Ckrv 9gYe9HKʂK (S]![2Xp̅y 2 oo)Qa%+̯mu&}+L3 lxUDs68e%ܤ6ޛIpҎ!nj^f_AXװ7_4r#ULX@tcٝXw7$x>VtӶ{ǘ( "dd`))(I؄-h]sD ב2?Rd_0e:F't$h+FNr=lNoFì5{ yLJRn_E"ǒJYkʉ6(uZ;IF? 0ecp"g.ex1@".K0OK!y>5oy ڃM>qXǢ&"3<"u8e׃iɠ$̞vz)bZ}U}4!ՕlV4zJoa@}WG7(7sZD{uc4bOt,w}~X䭞8/$[kQ˂ z\&mܭЋORu<^b.L;bLNl_qp3( 8MWcLLnv|c1C LتxĚ3%m5g$ݣv~:- KYzD"l$1VpQE{qВZ./Vlhf<şfgF e%Ki1qu"^pVg4K }c)~*H]:`OoώA bGneqcsY#✠~!.z h6uFr?*GBuDWOwv.WGfg};Ingd~ɣe"u15,[C] +.ӓF\ܾefko._zf[o_WQyR3ݺl,"恵8RULUt,4Wnu)Jߒfy2@ /AR5|%_atO 9J0#e5g0YL /EmgqN8q"@dؓ{s/+Qo.F C v-MD'rl_Y_{^VLuɋE*T̓r|y}ݣSJyh1ZU^ӑ-?R6NY3觠yobVշم,كT^*g'ӱƛRwy\.Vmhy,Dq'pyqSiui%>+?W=z9\]~)"k= iȪAև?֞>U_x+&7wVVoLwﭗzroe[_{#Շ Շ:QQpcۥ_?w77w3[x=ERWMKZI[?szfs{Ma|>C(^ k2)iUO%|01yZw3}PYu%'ܭȬGF@ c8!ďhuuGǧl8޷V_|{=.Wz7* '*|L̄nePk%"2~Q;^|q89HyJ xM$&卯cz<3GUUޟ&=3{ 1 ^_E +/ƋEm0̽pzaA7=We;)\WzݜF~|P]'7;xj0tD,-Vs{!Ny8:GÅ)+eCSb1 Vs{"+ܬ™CYn:j7bXP1EK&W̭KnW> /޻*<\ӌً3r1ݴozpR٩4H :H"Q3Xy |M9i0ZlA4 91y4U]>4'x(P[QgB$ffr6sm!b4]yk'qbhƔ.<"ZŻʒ< tFILDS)倫1ܔ3mId1NJ42V.a16T+yzfAh\@Nkyyރ-htUN˅vEi@jUxXDO87+2Ot*ym9V4⊚&K1?.A7~J {TDN*qfsk9A%MZVoGE,_Eb~T~]Ršš{vBBm }?쯆N~y_)}Gÿy_eJ֛>i VJy`Kϧĭ?<׌Ab-XPHZ >AX? WY)s,BdE>' p,ldn_=E9o=Y'#u:uó4i?sGOc+)z߭ WulW ]p~΍ode NL/y 0}˟.(Bo>Χ[Or|6YĿ%:T^/~c\p}hT|\r^yj}&ssq2HZ1C 5}6\b"sPƊ Aܰ.KpJߔ\IۉظVǽԐ$KζoD"?̝΍_IxbTnfz/Q~l0hZ&Bv/r7Tܕ qh<ۍ:.fa$ӖzėabBЪSUQRe#[|b ]#W9* ޒ8ʉ#Jx %[z;R&ZGQY7ī)6*~+Yj}5pxfO>/Ŀͧ#&2YqT`E:3Ge7:4D)YÚ7rY\i1k2Ǡp^ ʡoܪxe;m6WR)#FAĺKWRKp(YD NL Wg ER ^oz6ijHɤs+`q`gːS 1 T m Z%c6 7+rwd3[h}6'<]oAi"n !޷I:XJWO/ЪchHn2ݎ6:LI΃%>pg pƄaVKI4\%k*PACHH(/TML~HfVs4W^M:< f/.ljdle@O 2BN#NȱU O@-yT㹆{tyQ"]XP X|vv׼7?L3ƒ% ˊ\ђNe6}vS_Y], &L^=\?Ze=Bls^I\ԶQ ~6{ pԲ > 4t;yf|nwkJ7Aw%b}0)uo?xxW$ HVYq{%hex5_OL?wgڹ)G񑒲~eKɀ\(Ju)hu1y-R3>u^ۻIPtTnVhfa)V^HbX,W]u,~j1 ݏE?Z5:;wUʎ$\}Q^xV>5O_֜KcɄ {E? |5p^ٵXl<;|ia7n.r%-0,]X,vq\NNrƳ"V)hQ2 3nlGL #)9 w[};[at[?"`' J'֊IO8*s&7wtfe}b$S6Ɨc)-JbXJ<FD B4B$xf*tyx$HiM VZ')jQnX&SLGĖ>Kr? pPgљͅ#K12RoO?q+uݥ*4_{cJmp'HS$(=״YAuǞ)iuxR }.OYp6nqZʯ3jm"(ob >g~JK,ZSc#`̅ugPZSڭ2?b:^a6Jܵl\So6VYv*RctiZ^aFk{7%~G>[/])uZ`aBdo$ͱH\ $/ a-)=zIΆNE?Ӽwي9nr8L'TY73{s9NNӌPT&AMj1Ɲ/o!ho?ҟlͰΈ8Cl/ {BH׽vnU@SYnxy}|AMmۋubK*7à"bRjZx;ez}L}TeW9mWEUy9JԂd%몷A+K$N~ʄ#.Ibvn͵;g-f"S1F)1/~r(G8`^%IFr{$ 7]IX}N-Ck=|~DtpLzj;rgW rzI㗹H6 Z~ ƽ޼+~2-= l L5Ln-ʁ U@Ci?|)gq~vudSW($)$j͋fg򰗈]Ć~6V+`/$i<U0C@\Qp\A)v#IM )01`e%򍬄*i$apr)\iN^/>@ myI(?kB_ PP"++ʘ4CE &6iߣ fscf9D‰Vcd+EGZb!W]nQ)¹yZP1NW5a6X0 Jzmf\ K)be,!P`T4tcq -/D0EEWfc7B8Bi܈^d`j]w2'(w2%9 Y{/GUu5IŞNvyr̥mbĒ ~{meV PCn~ ``:?~~Wyޔd[dq.ӟZbW;1[9 /f Lo ×A [>K UD! wM peR'E44/e.fg{2s'E~e%Q7.SMָʯai:%{2Wow'יq##0> *Y7HɯNJ*gMǽubvu0{j+iaHN3c)қ#>iVn6w@LEKOW7*+߿=L6+0]3a{0קL| 2//̗gz}7;Z!"ʊ내ܨn_mM{a'عf15dd,FK3Ƃ#剱S|JL\MNۋVReL M%J7( (qyɌi|k\Ynqa*RoT%qqV43]aNk?;NfrѺO( )FYTp*k>Ay>S\0JOOGDb&Sk\i`.Tx_]0" 3oYlWӻ iy%j#[W/jkQ\Jyq9;)MZ4vv@712 nrpSn+oѭNd Vmv|Pp;ܰ+\.-=2Fgg|2?JijW[syqq:ZINC'dUlب'8?l"y#v$M2#^e 'w}bX}91Myi@\ݧó.kkz.[2es ={x]-Mq你ci6O8f|$Lwu9Ep+R7ܵ+ᫌ%URXg+Kв ωy)>K 4ߌM g3ЇIi#JD|i!$03-F* Qւ~ :v 1-6h/nԨ"-&x` =ř!34Ï|L8Ϊ~z3 qa` }9clHXs АKHKQ5ąADGn9dh&Lv&H} arKsTrg\W42qf:LVPZIa_p:ˑd.G|9 +bҞծ͟a?ؤ<+HevW1tx=yɀA<+7ի֧?74xTS |+QҰO>8p;9|B9柷q8J?!7Jj駍[Lp;}Ϊ &\rncO~u:|NЅ mF.G0=0ulkrmAڱ^v'>9fN5F,r Yt :<$HU$r!m[5L/M3R.?Jt)ALb0NVJ3MN+tfa P|f%q|Ғn%=&1"'36WV0-C;҅}Ǡ6Kq$olзf/=Yz)p"I;4aKU)9lyC # 拓g n7w^vPu3Oђq-o'帼5KߡՊ9W ߓv674ӭ+{6Xd4Waj6k-֎7 R XrA}< ;!R. c2_d!u1z8<66ؚ=XO4ۧ?Y? ~'noǙ;KL-eZ*.,UJR%ҶLS6d6NtoNo/"Htu_1Y1lS ,cV b_X&Ki3aD%:rB&_EQK|KF)e^(nU%V oO{g]u,LoJ%s#IϐAje!z<KMAIޛi*=RN7o儂v%?8YlޮXsʹխ)3wnua纯/}u-65)q4F>Q/2X5@ۿSS983d%Ýc"b}=9[1 K,,#8-Nψ4Ԟœ.;ٵi֗del#DG@Lu,_zƄkM)F GWZbjfJ!"Eِ2Cҥ,j|_BSwxCxovʨ%.VEff%=l\6O?CȔ$%ڋXY1mob0[&2ocbs^hwB9i73tN"$fkmַ/ [&"w2kDWղ?R7zPImaCW,`yQAH5(/lfv٬c\WZQyLϔt1wkߟvϒ5.Omt05g貴ma `<׎)mV&׸uoPlNd7ky9$Ylg5>(þliw,Z(v`V=¢|n:QxJ," b5{dH͡H#Zi cJ &ɈŊ0 Wel3Ȕ:U wy)sN=ʑ':w7zL_C<H']V z-] >-r„1-REGʎS6Zåħ(DpOG=^"Dq&cY#tFIpFVȳ,}tQh@7f\Ó%bA`6gi~=y#>hҔ7<PLi'(]fB`\qpk. );XNVx4>P2 "@eeKɪ%jRﯥe=\V!q:eIe f\9`9"V6?CykЦ)K@3If7A$ W^i &0X~7l@:4X0s9i0[kv˶HxcGҗϕD5QPUvjmQ b3:B@[瀙K$\d"x;q@g= 4!TڡP'sW v! \K1C\;w0n59,8X$/7)vީe&y5&_PY t\R{(ߵ~o7Nlct(t.kpxX$E , QNLeJxMzUHlMA! SI{IF_LbjV瓀S/F\oǻh^0{J&.IY _=U{0α!;zL8|h t%ȋݯ,)xJܹLi/sm}g4SOץ^{,gd 1_]%BjۀvLRe.vG) "E[[cchb=*b h{?Hvf_;ZiWșPhzk=SkQ05%*8 OB8kTQϯcA xu֊|^ˮNd5^#Adeسјg ζ_-PpPjYQi%-d9X iku!(*׎8M60#TH%T~ wZ|*BK<'ġkٷTTeWI<[\>Obex+wLCB͌MzްjTA ^q.@Е\C+rr'd r5:SðGU凅'yc-6_S "wH +Yzm".r,G18q'ϕU sv\+']{N̲0^\fbӝSz.fsg99.|W>y׀ 2]de6TqE-mf8عLWMʒ{*d3J}qF0>;eK۷cALpK J}ϖ)E M6_4v7$I^" pڨIg7aϯ Wyrie+ fd1i@ngwui ifH7p Ձʎ }I&r,M (!G8) s9G6H癌`tqX</wD$ѳ @@N0wK\h*{2W >_iz$s | "U`o$hnH$PdP";eyaȓ@B&hy nRX]gάkmҳ ,kp+4&\ f Sc$ @a@vAw>Am1g'Ljpd]RTk 6x}Lt1VW`iwɣTzumC B-@큛.GxSpHZ1Ar>ب؀#9d7pn~ItU XY9ddċ6P5xfbmgz/`=D#S03^~o),57xPnG8#@SG9yd_j•E[S)ݿ(u/bfy)]441Ƥ$ ӝp+" _h ׋t-~lUXdLiΤ?)E; (3eM0h>e,'[= ?j4ʝN~pFM|@b'Cc`#r7K[@A>qs;?/?ya(ޣEuܬ<0t,q:Ҹ!"WvEljuMBMUR'IE ̀U ;PQ1Z$H'+&h,@2Ǫ{)^ieEW\H}8ZB,賒ue/[^H/$cScw{x[w2N dc_$=U}-E 3ːW%2yg[W P`-Hp]@屸=oX6ߕsdRMLcC{u!h&>^hQs3 m* MfKJ#E[723 nq7+lKaZY.R.)J~0Tj o#$,䪀lbg,zrB쑡ևt'e-K'tԖDKB->Y{K]8;_͍4' ~$4{pf69BJ7p@R񕏰5Nɿ**=CfUDH*՞]"_7?}ץ THT<:V)5D= zq%}^ - J|O54:GSC,<;~4*7Qcj-^fI[^w{ }~y1]U4Vu;G+ (՞myu>qIaS$6MW@FHu у&򚩾AM|BP!(퉪8iƲrj~nw'rw59#M3%"9-zVlrø1|(n{蹠$v{-wNneq2iP^z&ZPu$B#=Z3H6\ `Z6S+被>-?Öi"=A䐺c Ia HBn͉gtF+^Ҁ%$ " MG`*)rA,D`VՒKgi7/pVhyI]! a[B5sĖS Nxs`G M_ڟ N|3Wڍ;pr?}FlK8ՎƒvOE #C8T6S,C}'~/lL|; .^E0ȔĴe k]I@e >kS/IGG2 -Q|2W!XLBJ Cw.\<7pD^/?}_HIO5K%0Hn uM&7wIZ)QDU#gTr4~/=3.,8{ ;Ιq3M")L nbյl'w/B >;&$LL鳌/HPN2.Jh8ݖa 9u ~@c$f|-k|R랒4d :[j3('0oE~++[o'@J:y7}RWis"MХ溜 s'wWQ/pb1đNy1;)F ^Gx ~i1h D}|}O& ߑM]Ff"OTui;]W_^Le0p}EV³ 6rP=_v4J~;4T5*ndeKa$HzL)d0q CԳi?<κSLC5Q1-KHcRkU{!Bz3OIJ@(^@Zɍ+F&H. u1_u(,aQ!KJUO7N@:w:}d9ׄ %|yCx 䀡րs *ε# (PDȤn@CF\\QhuSlHA0Hh@J$(%)lHH(m56${9> %>"d3.$4)c D)Au,W6YGBg0'&0--3-$p!ϮuV\j">PqP W q<{. I|Eh::1$“WIo/ M|thDQ\u#\D-JsJ}PcO6scԍXyΥ;`x\ oWf/"/?Kou֐IXS҃ip^牰€ n韘?{%5Q||V[]m,.gJDK>E.qiiчpTJOYM `r1 V(\ !\&/$7qebmj&PTȸ?)qG~S2N7޹OLo>7ڋ I{8×-Lؘ-b؂9JP@2) %1 ϡ6_Ij&߹.R O!eY690W c]Ea!Zy03 O 7Kۗ;Ņ_ƻ齍]!yehAsj 9&xC}qe6J.VrJgy\-hR}ʱl}LE?]Ģ(ă9ܭTȆ gT/W]gؿ4Ȉj̞]œ'c}7X # dn0On/wSR-2_7䓆3e-`tUbO@ZwR e݊^ ~s_fϋ7×e^683/27ƭ"q8..6qm}3?4cC=y!UJSOto5#J\=v?-lS`h; HCb: IgֽC}uY=⽕;MɢSQgڏ-/mgȀ56DbšM0B^,A+SYSu9[͸)|o]]ۼ(7diZgiDPe+ P}TArf{IRꚚ~Gܟ͵[5w֮Vu wA;LF4`yTpiȅ^S\82MSm9x] kMTOrSzQN+|G8NRh#o`9n/enئ|f/F/.ӐhRmKSs"XFuޛz-iט5:}j GPdTó{ZdfhuW6öRk+x 7`q;?q5btOt+iy5C\3s}1H$Z+g7G7ss:_h:k{HigȫDo¯Jq*j8Ȋ˥Et>;zb` L@ZnN>DɃK6/W&Xn> 孌gdF q`EG ˁf EU!^rQ a (@Qհd( (W BtY7FgN CNk#.[^RKch&| E@D9wDW0Y 5(g-1sx#l["cC˗8b>xQSȡܱ`/8;KfJC 6A^@ Pز"CςN`nCqs4f9i14 NH\YnG~a Gy 2m(ɿ튺 ~@B=8z ܱUӼI9?a$p~*s)j?ϼŸL,G|Wu!e{K@/#phx$Y2!E(%PRYA@%-wġ(, XOϯ~|71z~̝؅M+_ɲ!U 4T;M!/ p%WĊ䶍 i8&"W{Tc,jD)Vg4u6V;VZ GeFW!3_f87D "W%|>ߩ$o}7e[J[)錿śh_~0S 50 Gu+HFfzYuYu.]O#%*n^wl+ U|vH͎rlLҶ=[eUe2F[O/m+ԸpoӁЇ?DnFI gC1r}7&Vƚ? EY5E+r1h>Gt1v }^"}-㨦%B#n sIj5>/ܐMHrp82&>G6B*Ifi -쫞Eck6PO3xq*[bWc.H n;5 .8͚3: >`>LCKV_uT/+ϨP1=N.FTTgoTA0&!C ^! n&d暳f3T[ak1AGH)- i9'OI0`="V`/~sn:;#X @L@`:K9OUNR|K4ܷX3TTR2X~ >”3f} yTk$ ˹@40i*cKfX!Q)!i :* P` <g@pS1 j2d/ q$N6TN9}2IK98Dd/$!fT5FƧ,& Jw7Xt]m_~-,BBBɬ~2iwfT7 nzsf؁i-a~ucV@wܦՕ#7}9O_xik "EW.{ݿ7f6n"d6|UuXaݸVg{Kw(!\fIi{Ь?Cȴ=1O3IƱ7捵cr#r`3IY7샋h-$3}$[&ԡ{c+S:{МS[ށLZ̈́[}_θ_ ;Z_e:yM2`GYق`Ry(Y=K?Ț꽨$-+H"4VѾ"σ | "ӀɁv)z=] 5 X>i犜z,p)%Lgi94擁6@,smY,}lcBLV،B\ c2βt-0tRK%Hx𺗔ݯ2&31_,NT0ȪT;c"XԖ'_9O"7f$]`Я(`+Mۥ,YK4B؁n.@3gPQf3 d6W"} 걖F,:9x H ΍.o(fImB=haX F< l04!1+:F ȅ @%@Y^%[:&]9%uGLŃBҗcun5&MWu2b N$<Y 3% E0lڃm $OMUi$]R$,%0#f陌n3ܩI`Cp*癄2uii0}yڽ²%<>|,txqb81Rˁ5VhI>2?eoPei\~vfWxUi񾙕M!UP'!P|'@遾-g!Xtщ1%sQ$qahEȒŞ 0f̘Q<\٣8Ɩ207 ( X;H;kˊG&tڳ",nws13edi}wӾ1_7['QZs[OU$Ee0}cMznþf!2ī {]N% #޺A>'sU\ѯ?xE[wey5āv_HzmI̐Pg67\*i)2zcys\{칥ΪapFK@l50~#b_.hzU%zq$ٲؒ#6 [f&v(r KSO.tź͂sV 㥙#a]NHrx3o#z2F>•ϝM1eb1U,WkY._轷^S'ƙlɻ23q߂벘y2SS=)Dg_0,S E/f^_u1BɀFl殞7p9,m(1H3*o>ioѭ؛+#!PP>F\-sxfU%VvIUXp'XUr6qry/TA SzmnaN"+`!#]e#.@<l& 6)Xirypg"-> WX24r C~F3owm-^97+$bf 's_y4.80p 3/_Ԇ 7߽x8̃5%r+ B`\-"4hB7.0.*@KT q2g<(3,-8/m;({y2 C݀h[tZ/EK2u\  }"-G@.T? (A݂+Hy+$I:;+\6ֶ)BAX{;K蓳bs7lRY9[ A8(BGkm jb2J; gnԠ,viW _It~]1.|f.DQA'vz±An&/5)WUUF;R %8N;IXjtH{9U ?xfS ج\Λ eVqE޺%aF ϟO?ҧ!Hw/ޙݪ?W?҆[ 2Ʌsf iU V6`!/{90Nw͝QW58.R T0p 3a! f^_ǻ5^L*5 Va1fk y7[Yz?jQF*R<"nU)o#NpU'Ӄu/!:O+Xzk~$gI^ [)[R1C,"2|s|kSf̕`E)qZ}zgi}\?rH,˒nKq,\ڔ秽+ܲ߻ZLv7?`˦y m.=[r|^#$ έ {Yվ'krAW ~IV.>C,!6/lcWU1ÛØyuU2oR+1NyE87 .?W&5;sz+^$ӇuQP;lN5_~@yt˭>Q{Z&@F]68(\~Y2V'4[U<g[ހU`@z{;哾q6X*4]'4:@/0gy"Հyoms<` qٺ?tޭ@IL?ZsH-y^K,NWND,) k3 ^FDiE_v驫s/CrLm&03,ԓek같mv!]4\'Œr f_9%>a?raPɞtrjqiF[yˤVw hiLiB> %O5\3ۗ} \yTh;nS(pw-Xq gcj3{0MyVG=ԩRl>)NV;ToL^a.m [;]nTX@P&׶!=HP-nVf6#RsY-p dm=ՑڙTtcSCfaWP,+*-,jNY70>ق ΏU4؍-` ǸQu(Gnlΐel-k!,` \ u5n)9 ?;sއ!P-~lV10?LV,̋f6h(_B*0!),T9@W[@ p2Y :Z#"m&Y=4]E#x!R`eF@"eɹSZ#ʁ ^2e{ % +) * @1gx9P*GoZ[d NN;@ˣ:R!U2VwV 8a&/kJ uN-McIQJ Yj4YD1-}&\VrJd&VL\V0 [^e9:hpV< ~܃4PAKk_ʀTk&(3DpDAq F<ݍoW';ZI[e7zvՒ~bU;Yw:gQ];_}N)Ԁ>IMBa/K$Rb ,2Ew@37R~'u0HǣajS-RA s~ՋwwdXDv0g.G=MR7gGy=}~hEyt'1|ne_pA+7]i;ךۀ.OVV:ؠRT|qk6B.)p3E 26qHEI;\WjLZ.Z̎=GJQ{ל+`p'?]*>{'99blzBRVpvWGnOoS99|/d:CRq27H}I(/+s[ȫ]m99q56/q0jR_ĕYϺY+TݑI\#Zi4c >Rs mxmifL/rޛl HW>Bz}nRAFă@IxV8y@~s6MHv4O%@6JDLpXώDt") i$M~%ǡ1)EUSκ_*[Rъ^V.%rM+H?Wȡk;72S@hJ.7Js$(TۂSkohA1u-ly=X]3To+.U4[ew.Ym54Fe`>06mn.@YBr{c~PIN,_%$HfO2KqOƂy^y8DGZEaPSRF``)[`rRUYbϲ">WA:f,gDꥸKqg6헡 /,S $'K ldz%Z=D-$N~e8~x.dzAvy @d @dCJ$Ȝ44qEFuhy[a9`+&󛀘 9wTqé0bU6@,"@s l:`9@P`8 n$M}ӃqSsnQqb's؞FM;K ұ^H&8J<80zIK9;@~~) d,z.ppi]g\ZO,EN1`O"5v0̀5Xq!4}^*zJ-@Au@}y Sѧ8,snpxnY}>YSK$I^o/E :_ \7׋oQ8*{Uj~RQK rtEJamXhւ0PrVMJ['҉l.,ތ|k:v`f٭%I+w;_;=?Go}M Y"11zz4yN_o־u{_ &z4gsֶ_@}0- # ZƠB*)a4/^豖2',q;_i[@9rzm˿W׿Z?Dw/n_/o%ν/#0X[ܤg6Uo*ThQEl,Hml_j:h"ps*?RFM9YB8v-y 60׆'˼y8$+U3s}GyaѢM61s÷J{Wһxp`;*Ne1S[΋7 Z`)?ʿAv&Ζw`Lu[(Faxg1c\faPj(ϡ,W:1HL)}(p멃r^MNE3))Ebg?΢:q~gON#_;-_Yatw#-8bwՅ*nX2{f򇙺" ' ^FWŶku4d%va|Hp 4!qYMFѦA)Ѓ!_ZDG "M>"GC`0R<HNiXA`W< `xd' 'c:GB4⅂ SX4Vp r) `P 2Ýk*OK ᆗ,Qa&Iy]H4BV4#KX&A2"[/IqbfdBld$QX.?Ie&M4GK_!S4!ֻ6_i3D𤍘/U參E˸UalBKP峗Uk~ոXb,.C4>ә"+d1N8Z5Qmw?mw[k0~rz6#'f|טsxXu%:\c}:R~k}vCǽ:c`ar5t߰ce3%.K2_/.< oW@*&SonuK~oWc3(x/e6/ws$" j_ލY͏zN.s#MD#DL- ɑE4H,H0Jw8 `q4éGCz{g._O<\3+HAn}no 6Utow7{֮pϮ췱~IwW[#Y^_w'\prOlvlyInV) 1 s:W922o@7 2\grJb7zg 㒃I'/7v:2a2 }t^!?>џt};&7<ٓrY'z'2'^%Cnˆj _؟ߗI|UmR0pE@ۅy1Ӛ%/E SBעU<S r9yl|.!܋\^̓KYJ)\yڰBJէ i{CJ5, aof [?mqΧ'mW[2-8ڛyl` RŦY_wy!Ï o|ïϾr_|1fKkiYgKD.IĊUIi2dksVő ;Fo{"QsR~Sfȗ gMZjYWdrVD]; ?Zx>7/+-%MR҈J`=p0ԊJ@vHcsΤ󤩂ƀvU131t2g0!]DžƋA 6u~;Gq{XQo^I:Io?~rsԴGOyϨqvAUK;fu-\Am#jօg_<*oƉ5ڬY F[qpu"Ǩ#8,`dl]TߚSE;:^ŒvdW&mH?2Kx Iv.Z&7Lԧf8YՊǑ󛗷M{ahR6ע Wcߥ0k -;vhuR aF$K+퐷/[;FPuoz3X80ն|%7,us/QsK睊Xi憧('ri[tkeB- O#lybOٯSu'=HTMzUh;Z + -bh;d]؀TX6īu+ ީ'S)! !JAWMtj \E *hd͒Tb,f#ڠe ) }a؍Dm:2c!LCv(Esw04'\ۓ@قm:=>a{7۴T)oF-K9鄮k&?(hePhmb(bYe{}&U߀rf\Po ?Lי!r 0^tsH]ՏTZyu͜g f|H-%5)6[b77f0'tүnk[0 [G8Fy9 ث>uӛ*,DUSt hՁP ߙJ V)ri7.`*\UVipGo1YyLu7iYLZbn V'ߺ^$0a=ʀr8K[$8 ˜#jy{3䍚;[W|Q+vI5K5jp§]ukD12Tg>"f#R_hyQ}z~wZGmK6ڳA__x8F޹L%V@ bpZ\ |ߙW^+[ _WWYHNlto |rg0.Z:_t8!{4)}LZ ][;b6Zx 3=EȯI [u wF+T?9U4 : M-FCUrcvp_&^0&G92%A%f xW`܂$jD:Cv{)t&͏4]vjV;kAwS{6DgWlmEOH/ň0G,caa_x1åJϱRH}쯿%*>dF~*NI|1 0Se:;2&DvD_ .s,J&?d۹η+{ET.ϗZxM+y%u0E^\ :{EXr ;BmV2{I4%r)-E, E ?iPm M5}r#p=7ZYq糺7"V٘헄ѥ!D뵱(M oL4EdI{{q"t\t96(|+풪Ʀ+^R1Tϳc_A݈yJ 1BR iQd򓙥rc HNY@̱S^abnށi@7Wـ@E{ȦIG&JW+cw,@ڎ%_ HAо3XF@wzY.b6(0DLilׄŘ PsD*% e k8VQE,JDabwT_m[Dwі{{}XRtDaQ&, 1IKwH{J "0M#Uviyc@}q{uw$g劉nKU i mݣiǢiy-)8"ϡ5L^ p⩢ɧOPYL%}iQbֵg$J<]=882 [a} k!boj_P> uNMw< XU XCUe?88jxmnV5>'Cm+ЇlR9C"<\_k'Ne C 樸 -Mȭ6D#D!7>(iTi '^mmcj? ‹2d9uM#fzƬ2F= Y˦|eC$񌯧XLڧ]aZ4͟i,>i7zau^\kS4usSLH # v%}8}+1QkE{Zʒ҈Yq(\W3cuc\8-{ђVIz0I&n=YCPI>?Tô q~oۿe.2hܽv7پwomYIGM ZoCq-|д4{ 6 K>Ē'i ƕ 1t7_j;9LH9q ŔX272ޒa{BC]slV_8 ]@Wv1+Yzw̫wxdմx]G Gt|R a7-NYX1SjZ )r S1ѺX({P@2$ bnnaJ\P50sXY6 2]Z߅u04XFoݱqZcVDfَ+  G$>8 i^Nōgg;BԥUk\NEs & 6O,A批PXi<6T,- l54 q\ ؜?8y> W2Jxɟ>WRdnئ◛=p'*?5-:Vx "Ƹ[?rӱ>;:]zOAydYa!IU{m͇"'(T8 `+ɿi-N.\Yƍo}ks7WxQ2s b^s-Gߑ^m8\W/ޟ0fw$wZh_fqISqG}̮eX 0Fme&"i^hixgEQ|8 ƪU\" ~ZI"74|jܦ'd2 iUQ6Vt6.`'G{`\I ,@t )i,4m'nR"S,нy}GdV4[σQ_j p-h,WPSv4E[uZ-(Di=xz!_1Ո'n{xݬ X `naV]guy঎Hh3Qb!wӐ K gVi*X( w[HQ<|Q2\.?s?Q~ufɣ 7?3Z~ΫssH)L^x}j>|x2ԌG؋ӢtNV:)z#W [Tx) 2z)QuTU$&+uIH&[P:܌6_:B̲=.D5]LnDŽU-MYRH>7ipմH{ba kM%ZB",uPjl'{0cWVJ=L!sZlc絩_+UlSγ:+#| N g6 +o͆tx>"¹2H-uOTQ6g'ʉSLDplb3Y;(PFu$W8OK8\Suws8ksg #ib០#\ ^'D31" ? S!<>j;ƃ歡,{U7?~(v(*0.ߧ̑Μ= uzH$(db%BҨC҅1VWe**&T{?_pC*N7h&AC}f@58Qr_T&4}  #CƼ~L'TXO Tf`Bz1xN 6`i9bsQLHlLiH( L-{HD-' L%ʦaAT@QUކqǙGÇk)KzI,pA;8 m yq[xE}#?eI0_9Wuuss;yhWa%! ~<2lAO<!PXi%mݙts:9w]9d3#6L%ilVR)6mW Ypc)IN %>b,Rv+pTԛ@P?P%Y# V|!У 'lc*@"FV6 <}b K&0Ž,$Sd CIapn @ Up,xOe7a`PgH׆NmB6̵qbARk5E* s/#'DJ'˃S γI ZsǏÃ`0d'!%)6c㝕9;﷾?-r~d2i'hͲoz浟l|\u#`=Tߞ0z4}= v^+),hgC/|ʩwō~?Sj7@ @MZiF^i] UP3?~k|C_|oc_]Z| 4&G'͜h@JLodYtO.u<TDg iϟsgX*IzR ֨^^:){6Nhvɗaߴ\; //+}TpYz1B'fa2^[~T 0ႂY @` mG,< ~:^xWmdɰ{g <9.*6j1dDhb):ʈl)[BvA·K,3 _ ׮ߋ3梳϶g))>X>̲X߿MgR< Ziw+t} d!ޅ5K8+ך|>=QŏX`vb352ra¯%9zrHmrlvL&4|2f h[*?ܛH4VOn!f6*n [zDyM"381)'/2IamsNGY' oxZo-`h>] tvP3>E=dM"#.3h禖(1A3T: C_4ǶC b:R!7`~F  p9<Nr[sڥe`Ǫh޺b"lD:s^kBZ~^7 h0+1Z!c:@T(Xl iFiPc-7{ hF͢%cgp>8@Ow¤H0N4JxPe07yL轢' sδ| +CBzl./貖JCZclM&L+ϬL!iD9}vVC_'h\|cX|R38I F+,bi:NeasPH[+-1e%~|\|hx}}'[d. MX+ޓuyj\ZR~uO;,$6˥vw?͟hL(Nıo+{-x"oV2\c?B}*avf۩9Aj;3'”+-ӈaE]eN$};3G>݅d8d7Ҧ\ ;bԻP㼎Ġ]rH_{̮oW߳@M2W)0- 963Ӕ `JO,8ӉlvFe?c#ZHZV3zBY[F-Ot'_M&\Ni.2!NgO؍ HȯͦuZMt'b1̡xR"DkHeVWS&R4ܢ2Cuz kes2m'KPZ0?D/kqL9^;9t'ŀn';b=~9%sٿ$YlZQu<3kKR1K$)~=fLᣌ4kl=:_TfCS/R ODkOs#n-YQcD,Ӊuy2\lԷIX坔RHgZbT^T^<^u欵5SIG_uwK4X$M|1CD2L/Q=Ӹ@Boxy6%3S0ɹ?8BRLyn3)sz08*,7Q0guK=JnF3˗80ly}(hG|( (i™C`隗 C`8LDWD@. 5 /2C }5 RFp暀6HE-@?t8|(E6t24j]Ght/3$(p]f:j/tH5B{Z7@#Y8GE4k VS}fN[ _$i:ر+엷VCҞ;3M_/3(iqd`Qy@wc:JOFэ_y`2#HP* p&3/$j^3]&2@GY^O s )뚹0WY[@w]>!##+ wkh0cȨSC,J6w/azLS$ ҕL]19U%[,m?drsZ^bd0Q;*KyFN%Wl`g6fi-?XܛDfP[J**n Ƨ̥Ӏ"k'Jq0Pl& F:=JXd]Q[K.]ˠQkȕ|J9{c]LnׯJ~qӟ縜}ho̅SOYԹ_,[>ApchXGcۋ؋~ծ Us*-&'c/S(tl[8CY Ӝk]pe bQO[kj8 6ҶbЋ'Y7pZ4\f1<$[A[3 s5 V ڟ"[{՞M!p2` * 6̔]Fn;l A:C2f(\եrdJQ3ȻO 3 pBLٴ~#J5 zD=b@Xtnѣ#&ה\`ĈuϒrގXD_M[O7R|rSϏU'gC༜.`&X_HAnB<n-p$.aH0BA!7B%8=AE2քmhx, *UBq+6AC@'T%}/uMr^@LT,8-ώCw_SpyGJ [|<P,պ |(N4ӯ&ib^b8O8x A?sRH]An*F\[oϽ0Ho )\Pĸ*fEZ*rD/8?Etԭl~,_Km0Y Rds2~x}|\#F.7t(~ijZhj6r>wbn+xRfsgӃ鄚 EKW1fӅ)=`ђh[傧:^{ 'uSk`) & l ?.hk8% 9}E`dڞ|#оּUdƅTysNL/vD~~p?fRC (<77Id#>kFڶ}Ntdzgg",AAt'>A]eG&VfW/ܶJ{;F$T qJ)cR?'9|6/?{>4_ln,f4_߳^~/H$)i)I/)& 3P/4ݳ;/m=*(Hlá6P/ urL8%b/'@]9(Zex:&Y -Oc+kKY,dt6:.@@q=<#c-KFԎ' !;1\yvٕ=(T%;n?Dm{@nbDJ8tgBFI.ysD۳ß^1#S?-jd6`*y}rfNn f,ߩFQ97S9hY0odIø4 !J]vWfcj(̔i,GmV|YPȕ˟ħB9;}.a~(b02x8ʯ5Z" iˎYwխ]$ig8kd}Q`v?w3%1ӊVAR]P/ӫ:K&sxZ9O9N˽b#\2~I?qt#$| z[.8@8$Xv\5׍m{y6@(gPB p p莮{`Z rn<^̏T7ZFuV@*M$C e[Am5q9žV * ±9-AYeBk32w d2NBM>h l8*s :_!tvZ֚DXΈ*AC ;ݧE .aE< Df? W,C۳ѴYk!R mcLX(Ɋe AϚ(-ȓEĒ\#<~!l'd ęK ( D0HTA1=l )9 HBy$lS࿈cLRkhmi ǪbdaʩhQךoQa#I͌y_/~^phSX_??^~seᕟlSsʩ.] װU;l: wDkKׇeЙWh_.䥵'Cߡ{Rl^l`r6`s`gx̦N:uQW\=X@^S¬BXoཽf/D!Ww?+tvOH \f!W>|1.ފ( ;K#g$jHػ߳tȱT-Mb>W{vy<&>b69.Eb!?KLk! PW{YOQv[y֯>O:O-d#}D2"H Dv_TBjp^KW,x0 _OV>ssa$˚nurB#P%9/?6"Yjڷ%έdF(9YvJ{qBͤ猬kL6Ո9,H [U\ϖ>uqɦYi}26|{! ǎ5,^'d@Pڞ)]ɗ9ȼ{kMOsäh|B2+w>MWڒJË;doXIb <# WO4Я\s1|sK^GxZ}ܴ:ɶ F[ueסyI^b6vbQuY)2Bԙn䬋6RU: г 9K4 G neid&[+rIZr=ҤOaaջL4Lřܱz тbLn&i?l'0:lCDHF&8!4QX#V}o2A@T9(aaTU1W,޺`Z ZT`G!LC ϛY]ixL JVe3 bNw /@W Y:攜 Ha& iHx}@>Hu$[P%`n%hq=Ás`OA/nd)6e.\ qD: @QLNiiQ$U]X`*rT6}Z>*wm?0F5cd"AQm 9#K!ѡRbwɬޙj#hTf}>W)RɥWޮ\ݬfjK s۵UD]nc MV57~hITx>ܰ42hJ9O`1Y )J,*"Ht>;V{hZ!lyWTP!jC?<$C 3px1|A>ʓlZ)*F5"lnMOAl\.8U7Cp|:01Vc`ٍ} ?Nu|WbufcY~HV׳? z^e̮o!̘m$41E5p?9h9y{rI)&&@|ΎrYzhBz +/&7O;oDNX giY9wLVOùXkalu[da "׆^j{L#? #k:c4` 4b0. KDp2OJ6%j^Ι,J>#D#ڛ#&3ϼue%qs_! JãƀK8n\vNdHbM݉z?h?,|Pz2w[|&>;_N1 \¬JWMwR1Y-Ůd/=^.|.A3g/q&n:y"/Xt|' HwZJc.ǃbH؄Yy;-7JbA~>:͛N)cP-h9K4wOĞF tO-pdjlr&,)0E%=6 ŕ+?RNn~ki$]:ԪnPD O}F2<(WkiaH$r2q P4;)v%dajOT߆D(@n#E|4P>O$aB3PˉofZ׊AI8ek[h:,׾ڧUNrP1σyQ4OWe^egwv89WX zc38I9`j=rDΖ*9bcso Vngv-g>?L_z㱟' MT$Jm% VSd$b5{Z$sʛ흻xprys&04,Vl2C:G+kȆ_.ې>$o~?F^=~:Wor2eJ- d'oe+{O^W{_0I`؄ \7aI 9+ :Qʒ'ED1dlo${jTul%`WfИ ;dA iXFʁn Ŋl}Q&&ϖ*XB{69JH*+i_; A{ lЖ ĕA ڥx@Տ4H-? raR[rČJ CՈ@>r<$?^$XD-[GzdWMIj *lwYq>eR!s~Zv* pFgtU}/9@B> Aʯa)AG- 4oX`ԕ}/ p8SR$1ۂԂ! NPPA_20jzDC(B%mz(Szl[o$NC@Hk $uYPٯT.*PHj-\YgbiA %47qc`@5pX'8a,guuIv܆T Z]ǡElsML= `i;HxK {I-rԆx0 :o,bW#AĜb!u^%\18- |"WV(:I nvy٣Kt'P32 @F#Wשyga̚:k @W_~w3wv߻-8FX8{7:/'׾LxQ6۽!nHhW`}H#ӻ,~NX(o  In9KuWnọ1{Lx|:UUsqW>6S M-˩Hh3%tcT owi"{d5 _mTO.A'q{%秣?m-.){s7F웈H(_ec' yc({]LU/[ rpHba[Y&ʚ1B^8=8x$l-r˥)T6~ۉ CZDʮ ԰Ϛ%oT9~ZPX&U0=)|4ʥ,Loі;?[(5I;)ccw3n:uJ(lmiWJGqcJG}~QW~YNUN=GFUmr ٧эI6^xlپ3(GBY+sri6cŲۻVyc*d gp,q.h91^˾%Ξ_Z` b^|7\6nw!0ΊEe aS!{GzP%d1HseVl{qhfOD@:̅;PaNefLxb3ȉ|O0Ia2Jb@q&xoHIYVW{%p2ıG9&2 Nsfhi:PS$$ ;]G]i с a9@5 :yﳋ}+{m'W-ʪ.~ލ kbfJ< 3 g vcp"M|3e0h3>`c~"n߼w(EgFZ@P4s{9w;*Lt՞ZB} MXv,Sb> b!4'VoW{+1c/h;q4U_ڿ|r{ 2wzad2~&[V=,dѠl3)TZ,wݑ2m.5~v߿.;Gcm=c|"Q 1??*C[fSj,MH Tll?Vr)Ry_yce5ި?^V7 W+ȧ GKHؖ!9_wⓆ2!b,vF_gy +ɡosMh+[0DdEF8Le×ï/'OtZ,D gWMOr;ھО$?43:~78rya5UM$eY WzLH7#]GWNZJAsh:,p {k߼?x=CF PB___EX}j!zX01b4;qcV=SmrX/Uknd8&'o1+]X})l&[3F(]n nx`>NP%x)Ƌ$9 lx2Sl8xDWBVZ`2 T"r!@}Ks~?|+3B]DtQAWFXÍz ĝjMzn'WFo;Ŀ\val.u#e>_)`qVcf<'E%*pDL`vNTJx4<B$;|Q`a;s1cƄ|W +٘ד?#{aPDة7lqA]wq=&3 ypqs - %0TΏj,.U`Lu:\*V19-xP# W4"#ұ@ЦD7`ȝtl=dmF= YL\h#-@(g\FLr|p# B:Hp^QjC pNfdMm Nq< xXk?l2Y*4}-i_'aݓa03v/:g$3+ׄ1xӸ,W6 {7! ?#Uhd16GF2aTcVb7<1և&=ExTv̓VA ҳ֚+scm Mq)]4ȡv0R4+6TU"ワ\-\ډycj;Uُ\&iYasX!f%7ڮi)g|Bq#FZFK|/Ogf;@g'Ōz}lFi?ʀoJ2ly.dS[' qCr1)햃4& HU Q k`U!g/0'UIcGkeMPOC<3eh[Bp|Dn۱Lڵr\h{y0r0bVN1ٺp# %XLc1 L Zl$i$:FoRdypvfc=Оf}SqSa<]<μL\:=nn;=)#)W8>X)gQWOfJ#/v8HKL;Г5dIDVqJB\8q-S 2(l *mnOQ<`X,ӳ٭eɭƬ1BQyy;`VIg%4MmË,?15J!2z2p]O:3GD٣b1-DÊWu7Y4wA؍Kb`ΛwU2^nֱEIviMȖrpL +XǷMS>krkXCcNst8"֎jشNJD,$XTO Vml Db0fĘHص W3ƬM8dEkd[|N3rXݸ̣O]NyZrs\xR^ڼ^RmE=\>>~U|olxuIڨ,_E4?x^0ݛoA?:U]^cb+ӇhE咑czThZ;T;bI+|>3Ó &X{ZgH{(:UƲ-X.VAbwCq˞̌⢔ĉOOk[5$EiH!!̯OXƚ}ג]u^Fjp_{?vl.w._Wx3bB,S"Bx}C&[!Fyf)cQnOd֚i485N &VWȦ$6)a vƌLe8 XE 45d_EFyFMBƧtaLaMKI).d(T$czB{hBh80W%O(f^Ԯ6OӧnǷ%s\L=ZpJo'4 +˩eK{vS+t_37N2]DG O 'j-6)JM qms Z}\~]&x,&LX|A v;ܝʺ9_kV?[ܠQS;O]]F-^0'HHXH5>eKf\H# Β) kh& P'|9ᘋ\?xBZg>}E^l6k+ L8%mn2f['@$;>.ÌYL`t-Vda~KZ01X 6=F;hJ:)oj`o[cL.=L A[<F=LMzF^qPnQKiY &s"cؤ#kͦN)w ~R@?!lGaUaM"v@R%Ś}%UޠS'Y.l=,˶׬q&`Z}n|Й0HTdکQ)96CBk`bK>'$FRA P`iڜvF\Px,Ez,2> h\Y ɥ3fT+b"5:"hP4dp b̅)wP+IP03 .fy K5D)xHlLݰ,-h)Bs9bg\軳x~qXݲӫ\S97ޓUe a Y\<? UlOP2^<׳naЋqN.'l636G7?AǙ'3Ͼz?ㇽ鞩kx?_3_pi&.!Ӌǡll|/^ͱcjk>2H/y *rxD0D?ͯܒeA+rQb$vq6 vKaIPufe!md~dow`>g_}imMBI.V DOB.PA6!k1*\L+6{:kG8/ZѝW=8|yѝ w2;I9YN^n=vkg2t" pIC2ܳ (pc;|w|ȘxCCLnz 't*L_}mٟZA#n0,{A;ٽa+cijV0wfϾ%R4?W/_ +@*aQ4҄q$>z]J7ʕ2HUi)gUDSfiT&ke*7lCucRZ"T/#an!bBfbs<MI_+l j@%>[3~v cnj_8y$= j&wq3$„5!h!8!Ok&n"lgs2Bbn\+k@Akg>3vts-6N1iPujuec*hE00O|I,w@k!HǔKP$!W6yv3$^:5@`nm&(g Əfn0@C Cɪ-ƘN$DK{fK%dėd?[ȳ7ZC׏>f^">Ea}q^!Bq}c8>ȏwJJx#TD|z)m~H9%8K[wS)O"gMvMG(m'R8:hy+STd{Z1 {5›lX^6梓<;q;9"&IRsٕùrXMp5W+]b %!p SS` ViNK &>CvKH}|7:^oӂA3H5q xQ܅,G`ӯWnl"56OT fPzm GI!Ղ&cy!ak{v& SzԠ` R޼kCB5(`|^k= &i,!%{E:''?yo};HV"MT@ $kDK?<<ww^_;v>K^/wLҎKͯo,~}?Wﺿ.;h\(`&G\[~Y֠MҸjp@?<2|ʧ>E'y!H?xj~p>'0 EAaID/ iG=4RJh91Nqq8]>y ]$GȘ uTV ?Or7gѠV}z j|!t͋qA~v!i+89tg~lix|$A "<^#\RgEu:5W}IWě0ukk۷U-JxJL,A{ ٲE'tsa>S |zΡ\;x[PAB#e8aR{Qc;&T-~ns;*ہS_xG C@J!p>l(kFuK''Zk,~@ց< =o:6wcEL(q$ǐ AFzL) ȷ Vɦʍ)JE]Qy $Hjbieb<% tT=nGtpmI*Sˡe/0Ld#-ĀFyיf"KAc̼P44}_YP1`S@L[qEA`zoHA␼e>ٕg_pf{H^ xHT㥷T)؜ f-kUyoJƻOw}<}Y _[>HW Oa Mn"׿W>7dhwa5-bcwyTL!{A D&W|KAU][ۃ/@ `RP' IpU"lqGK O\#v9n /"Q†|~{ĸ/|7s[Etr_sy=8Ipn9q䩉,7ȍb3〉rf$>(2/|SO#.DK/jcz[:CbP@Wif6#^&3+],3@₂d=4g,H.z&)š֖"f'% :V.dCĪWG`Gr:C|7n(!_ZoϊXb0I8_m=OɲqhDKg߾\t4{.07_uӋL9w=Pŭogo?YtJFg9 NJޤrm4T0enSq%#!/{>LWg'@h=NE3Ztpiv)Sy訐WɕN1]!LsERNhmu1XaIkt!+oF\f 5nn[=M^b&ꋐŭ) yI}֩&d+ǧJ ׈V$WԑBnMBW"#,͡P\@[9g#h5n8:Wuk#8X*d㜇9r"{|Zijg@l6"^G 6x>Tՙ=7뗹Cg\gS /vٚ+ Qxo0೼LRVԖ+)noG/B]*d*p9ſCJ$% y8%4i89u: wlՏ'CA'2+;DNNRM.2T43c ,fB^KdĜBL"LSaytMF)y}P6S*4Gyn 0 SMPLۈh e'vcrE3 L.XOǠI*oII5XlnAڌ@8GTSMwa2# &0 VnYz_łPZ0,kaa0+_-ɆnH S*^8w]LAbx0GwPWhQZ(: HFT8̺N|P1Cm%RPսQǟѼuYlyW.,)c>,>pߓ}#o_{.Blj'"vGn/Kqkd 7 ,\ɧ v􇓁h|GlƤC"wp?],ĸT}8t:wK O=s59WwSY.lt^}{%f ~} n@j}Kk"Q%͍-_bQ&T${%:?yuvq/d)+m0"dτwu Rlh4.IVX[/T ii{tN2YS:1εÏk+`edyR૒YBĵ1q|; qGyy=hCȤظߟǯ:%89"֌~wHZ2/uəh\[gooGq7Xje[|F_S2{a-+W_L fQ6#$ !.c],JUqnql U3w忩<+S|0$aKmIWCg7[mvQ6@,9W(j1 G ˮE st260=B`' :** (35dWۘ[eFI2AU<`I][s, TYRet2&4G6\S!H&JԉeRpGuBHL] `B@Y\ЫvrHrttLh8byyF05:9~y%N 7йg+ LcK- <$MTGN;8jĜqTY%/P, uI8+OzWɡ~ń3`bv5S(~2t|.8p~@1-`2ߣ}eY//ܾ?]j_wY^>n3g43#!.PZX[*^."-mb!BG/~ro] ,pkzrYu?k3tOzrMZW/bZRv{ GkB>ZWkW.`1#q=}c? ٝWYasc7uZ?GS1m^{|#V[b4Bkr+]$}'$CG'ǗUJ>D+6|gęF/?kUHd(Ÿ5w^V􃳷*6j-nRDu'ޛCXT"8WYO'"8.?ju)-W%=dQX->E hHS//Qq3Mv ˑI.}qs tXtYEϞIrDσVuC98&TƭDL~ }R$GT6+$Rĭ6kMF-M5lq,#S 7gRô߃[P6Ȑ%X%X)/h@3f}^ءe:1U!3 ؎ZbALVy(??eEX &YTOrʜnGrrQd^`tdp@wAp Ӗi^JPlKI$OEb8)&ApZ 0EL~Kw,9;k܂?ϮڈZ%v}\!8+ wjRSASa LΞ|Ԉf5c3WO'T'qWX=pKi8Jf-MvUo1hvnA$ Z0?K4p~S݉3v;,1U4$aAϛ\ݼzb aGoY][:vv* !mƸ:&Ra\f,u_bbi*"BtA b381lǭYhcqp=Z:(Bv3iO"P@fT@ &XUl#HqSE/iGn\f+q1GG5~"5g'^B[yZE5+!D5<[,a'm)X_#-?e71Ht *_(XcUdcr|CMDHZ21i:5+8XpĜv`4e΅$eqlC NePcG aA ,*7D\*YЧ zjl kGT>ud! I}4,V]cs"1. !i C,`,C* ]#U&-k.(GN0Oy)S)b3IGk=8=RG$d *9bIi$Ye ԀEs,WPIb$ ]MSɃh#x UQwfY}TWKjۃ{whx 9d}rދ{5wms2 .)];a_6>Wd>̈́8wSOjvߎjȣ;#lOA'@|!B`yXqKV :ta)r9P;duHs{Q}WIk̊j䮻g?ŭ;;D n6]vCMD詐f@r0aNpkwvw;G9 իҌ;.؇f ?73kei=WK攩gRUجuĻ9]fpEJ B8%QGiG^'"^E=:N{7. b4}*Fѓ6д~ZK\n\Xq ]yDuQ9' rRn3Mۗ X-LnJMd}.&_ Q=LVǫު9~ >[~Ǎ?*?#k䜂'3+F~wzf?4 ϳ6uDX ˼|r =}wzތ.OiUQh!T>m4c}xڞP'I[KhJ$/`!̣BMU໣''m+r{Y3'%z:F!J^iؚ^O?ˣ7'|>^p$$ˆH aA<̧yWXIBQeAGCZ|It,sn`K9 m(_3=d5mA}6uzȗY؞[m/Yʮ)>j`kd<+!jYeBs!U Yfs] ?M$ꂅv^jKuPqr=sS_4KQ檞3fwWIneC>KLԤ#~+ 9H2%1g:X+uRKqwJwr?qIi%3]XbTdnw,W)z\\FڜSo=d0vӦQ^?aDy)`njf[ݕ:zBTKAq9ݲ;J33ƍͺĜQ217a~OӘ%8OA_s [COaOoTTɝz:ݿ$:uIa E;Rvt!@sTF48 Ƒy}kg}He8X7 : "sܾ 0 KP@ R;jԛ0R+H:9cS m<ɗm'-|qb! gt޷(PYa/~? l$|TduBb%>*䴿GbO(MO6 ^@RuW^ѩp1M G]=5<.Pz<@ J #eUQafgJt.UU3[d&\&VŊ8B9HpX0 X(?ahL2YfkJ1˖gZ$`h'1(C<TP⠖ {& $ D@x`D?JA Gj`'UUzQڅ bk[?I9p"rn%B:.Dt!vaޡ]Ƅ ȲaP=#?<%' 6*rx!-2njlG=cݡ$۹ s|^9G˱^Jt5|+ԍ}97Wmwx'?UK?~ϽX{n'{I^/FV `X~]s!ILWL\qui,җ_ye粖l8Z{8 z:7/¤p{c9]ѻ$DNN R֯+ ?_/\yicywY{JK52Q8i E!/>44F@P]{k'OA=R0F_Ef奄~,S.[}Go*K)7 ɨΦurtA]pS-J qř> LL:d)ףqyF|oi(D|8Dsr(&^UX]pst;SS!cEMںʟw׿?ߩ7+73wt҇B3R1]0,hg9^2\l}.`2}ӱ3GtLv2x61{ X]LSj-'-|TgVW6a"&<(≬_+o?ƞJl}h![R՗MrUe]OvCsF3`';F9FwŽ"b:\‘d^͖w?S/(BT H0å5S3}>h zJA%q3Ҍk9ǭܼ0sc$rᔅ(+#/YWmP)/Ul'x 8 ӅzrC;d><4:oPM6hcr8>I`'eا'Uf+`5 ]GuيL'%ubz,!uӮƋ2tFX]5|1c?;?xaOR; q{Ufmka2Xn%RG˒x 5nF{)L)`^B3<@ȡ,q\bG.l*ћi JiKTa_Ua3*)+~|{j91 FM P߯Be-!qzSm΄ 4a:$/KCI4s='I")ƹGltv"GPG_|2qZrN/_ywmLMkfZ^$SZQ͐_Ģ܅[gפּItN~@g^L-dRJ%m~oYq啭LSoG쇳>'.h~T*ue=N ~TI{ӕ͑hɋo[>jF3O{܋7}ngwO}Nʟ׿FcA{1u;.4[ߗ-s ?&qE :8!WVr{C6N%uD ѤaۡE"!*/nqҦ0`V1[-rTu~W. ZrDsxT$~NTA΢[[@K-gJ'g2QIkMvT`FY6xsۡ"5-\H_g;[F-D_>y"?֑`W3L0& 1Bg]*;ڍxPyzb0~?Kgq#ߠNJ/^n+wV @i0څzn+|umsgtm&MkYry7bbnԸR~P0h$U3ҽ.&O#TD. Ӏk@S!`#8ڎwgiKu,,l(70ߺw5%{}L5 ! K̖a$#{db3{T w\;"O^LZ4dFZQ(̱6H AunKhwqucNʙё).⛁?K {Ir_oW>Bxp1(>z\?CgMT,;iz:8.n _ #ѦL}%Y9T"xS+Rm J##I3OJѡ yO(j<>[GoR[7)'P#x] ivC nS02b,@7ȧd|Q\4E9 C`@SУ% 既jK-K0ԓQ: ۝ĕ}>m.w"?Ҁ1!{>*_b>VRm0ǐD_SS:k`]Ly^CT{bB-EϪ iF .{UqlvKYm/_ hTz( 1"s`@,Ŏ|JkhؐъYce|K,I׺6aw4I'F7܅3{”nQ52rdI~hR %8 Cg}Ygħ{h9z=|$J4=Yv"a>-BR9aw\eܘ;ɧaP+8 '#w(av(}CHW:+'-n FD+\3c#|bRk 7D$t`163:)~jʣB=wP䱒GC;BҖ@UH`ԆaNf ~ :j-ܩ>`qhǭ K52nNGn5!>⠉"d1=GY/=@A\Cx׾ѫB} v`7^#C%}ơv]zIlǎYv='o|~g^[Vd~5T džZЭIa5z*ܠO-<~xv'lZf4҃`7U"g٘4Cd{q;t-|5t}.{K~ 9w]"-區Ys0xӓwBMB,pӣxT^k&\qF 7_ɭ}AwK!{# BcWڕէ VuX{XNj.?g4 $㻑YC›1t/(+b֭L+:VZdq#-VFѠ7.}p$-bJ1Vb Dv0# 3s;vWNwfvQ+P ΃7*ɫO4FI=uL2R8T1ťn ^hto+)}}4Y"z>jgN|wӍϭCrֲD= pp= 2RpiD&oUk[Dbu ud0?Kn$A6U*-% ^7[ӰQ̗9TɺaoQZf.nSCM`XlGAڍf@s?.4d>e,|Ä621.sPcZ1yn) :$2UM (()Pr[(fVP5=}i [C:>'B|܄0pSkxH<wT0&p0/f2'mKP|P>=zs]Ab[ft<)j)#;'h90{3UŖ~"q`}HN)aBBzABo0J5ᩣӣig~o&Uc3o0*jk#E| eq=}^J20V≉\fɒF> 2އ Øj=u38D^f|ׂSA-2=YÇ@h NSV+ ,#w\Uy10:20 i&֔%W_ 58צ@f@j$Q0\S)]ѿ 4T ,~f7O.L4[ky.@z<#xX)`x='/ Xe'G:Qэhy<38ܟg\zw]sGf b[u+wo Oԅ_?E?͟Gg٣ӯ=DFl4\q,adWy2BbL- 6ØO6 ;h`+FB7o͹*=u_ʔEuO{k/_؅w^,_g<94ܵ//~f/N~[<)64d~|OE| iWxpe{0!ʅ3:l9clm{؇E2?PXDJk8E'fZCd;iWI"@M c rOkXm o2"s{lE}Ik2zb:RYzqU5GC(m͆sO~wL|_?d5u;NpQ>[+?t̀ UȄ0.D)U/.0*{`gw1a'sGIɣ\hr|zi3u9.9aYcp봴 -L\[zqxm.MiSh MɗS9QX.'6|"n;oM:BzlF䱣= Xr{top2$~:YIwk"\zPFjwb>H~ vo5>??xן/ֽ>jFVrj ߲R vqbϝۙ/J!7f&Cd<ϔgOI 0`%NFyɉu5}9d&X)P] ̰V ʳZC@cIe ʈ«bd}k f S&7C<_LXC:>dPI)S1 }Kr Z&åNA-5X+u+vɹpiD#"M8E!Kc/#T2mɲmrD{IV p;/qOp!W<ϡ?cl D@2\˽zf͆w~Wĸ&iz UR䢝 X Z,*enٱPYH|ɦݢ@A]LeN5!vrbouCv@t4=+ꃁOP:%d:Hr7m3Y?\oM}Gy:a>k"Qg/:ş"qkCA<̹v# :iǻ6dIa"@I8<7lk,RQ;cXy*L50Ì̗xR]؜!P[X[h@~od%:Uk^ǜ&:iWh^ 9@Lٱ҅+)sؼA[D=7Yo /ϡ0/&d f2֮ğ9q< >n4uU֛y'%hW˚ Af(R:}t^Ra,783 (k<)B ($qB< Aaj2#5 *̯V"5v$YGɧ 0.!c) XY0]'v2|$qF|F Ct3%H̉4bZǻduC4 ݓ)>bgʧw>=W]Z ˈm׿wΊ *8ٓaeyxDž /;pHZ52|]+Ĵ$&֏SMSn3'}@W'{˚Ks ]?.x=)cy2~bc>1]> &@;3X.?9rBšw.Pu퓯zg 7_E~I<gŇ%=05(a2I9̀WeZBPytlcebg }˰sȱsznJ-]@۱t}d>>G +աZQkX5t>0@Kς @( ~= bpaX ,-esw鵛c~p8WK7q+9nkr?zVcO}oS?&ӕz"W^e %latS<_MEUg5 ңٗwKp7#\/bnvr(/3+_ܛ}O\?wמWپ*)Y>Ȗ-&4TP(%])*UTTaӋY\;BKzlYx礳 1& lh9yrVsߏp :SE3%OmIB9y_ Oo|}~GT0lf]lV$h`Xӵ7%Ԕ>Yzu<1$39M'zU#K;<3*Ǔٸ/4Sʌ1>w4#dg"h򢭫T^Qэh|1u,.ik_7循H2nRĬR Eln.n ku(ާ/7Mo)HƠ+&9OQ&w&#"#'E4z,=XlxQ2c$NJ/xm9nK ` GuxrT'k&k*Z}f"'Ʊ;w@$!> ^JȌfI LP6JS=Ceqa/gg ,#,ApD;o92$y@#8PK>""Xr| =2RZg(u7aBNAT&UlnNg6ٖ3LT+u{ 4DY]w4Jȗ$7;>11rtYS:=b ]c~88<4$?>h+b9QjІX]FW4f]TWJ(J3더O>EL l#[Y6-2Ef']߁:٢g2^ c\ӀCdNJ{xhzAI e')"Gg%ࣩGBOmO4 X䨓4Ch~zb(1Q Ixe¬tۺ"hEx"wI7]UIRgl`Hm2r++($iآ?362Cx- f5J'"fyK! u\{<];GCA¯78kX& =3xhlfl<gZ0(՚쏛T/;ũ/xhG?ҵg^^t[R-.MaYaA>i),$W((y_?:9ÆM& leSy4v0άʨûQ6 5EH{Owa7~7߿_'/ [}XDuq P;uupG{⬲spQ' Ǎ6NYvG64[ϻ~Iyrmr$1MGN:R 1"AzS+#9\MI2K|' r%/^@z*tdF ݍ'T` ؈@x|UO|ۼyŃp =nTs7ZAP$48;_kjܯٍ/~t?S|ӚlQkR mQږ>.ɳS =80ef`wP@tk?_wԿS$Pr+}2a-2e8~Md=YR}̎2q| b:,eX8 =rugT֚4+`XmkE~rE7ZqIFc0UIr$ ..+v,4]ԋN=`W{D>ij/հ$5}9p߿DQI SR=<9gbr'^q)H>²%tL.LʞϭޢAHlG^#Z)E>Е0O+0ȓU`${Ԇ|_t8[#]ʆ㴡,ZMcDJ鞮ز@bp̢vԙ)շyjێC#o<(h+b3>~0̋Sj,.d {L67== Y p(^ S (2ˬprvj-k2Z,yOnVF98#vA $ Qz>B(GVqn{0 >l[e< p-DdF9a')X(ΘF(TIOY؜B{n'st` %]?Hswi9Oy E &̻4O ͟ l>tҏ}{݄XO.򍔾n[o!I,L=J[%tDhiFz,L(Mz:?R(ax>\Xh96d,·W__/W|\//xi ;7 TST ρݗ,ZUJD/ K}p Y 4lR哱8X\ \wcF8d!ePcdJ .BQ9x3&A3 Γ8J.8SNf4ՆD]L1jiӞYp賁4F!=|d!5ӘΦB3b'V̼<\i]~,h|D=dQN{ó ORL-\)Rcyɀ#i4.SYm,"s=K\ ~(1AFt]St5 0[^ACGo>Hye϶d w !fzf #<%G7Re3Gz0mqx6H@P2mfD^MvݐjEğEͳ8dŎݚ1fgt N,贒6l}ߊxE8u]ZҺkLHduK=Wmdq:}U2C}XȄ@25.p8f@ј7du2Od`K0@M[,]7pƽI~6#Id|b&4{}ޘ?-cf57h`(hYu5fYV[|8Mз2'>o'Q.!M2r5dŲĆ%+z G^l_9)Au>X@e큛 ɡ-'rp|ȫΪ'uf?*©L~>΀ ͣGR&V%Ih%lbƴp@?a] 7M&vD>m(A0]( NT"3 k M\0\"{glM,|z$1^K 񫷋=%+T yVo{_+edws{&%OQACpa=Q4=qؙ*)Y3gYq"N伴18''%%kuץ3 W?!f~%JDwXwrϽ~>LDrtL𞻙y'KqieB|԰.0e6/82|/kcu]Hg/(d> 3𕍏~g: ƿ΃Kn t`#\&,3k>2^K$׏4RuǞ?/( ij*#MkĒ޶O#dp˪`ځT] ީ 1/@*B]G]˧p~>V_eiIc"&3vlb 1d,Bz4i.xXPb1Ƨޅ a714OJY<[(kldůeM_FoQ^Ώ+ٕQ\z>`MgKbI  5p"< vɝ /M/J5Et8iV!Woڝo5_;tw;|>;wLr%_Zՠ!M3Q{sH47 >.XR銷tV[ ́WL?nSV] *݊1Fsi.7bo^ Ēy;X2j9VVzĿ/Dqqkca-o׳IqrjWVNl#dȈƑHbZ; tcnW,]5WGu u235Ɋ| (r~J6@GcHc.d@0e |~ ۳cq̔^r!=ܰ?[AIgFy?JxI7Bç$$L71YJ f])t NBx~8s})v#|bd\^M4je[jUAy GGОqe 74Fz_nr},V:'l3v0z!L@[PR!UIy{G\YYskR~#)tɬ[unB blR68~k!wxB?DBU ]+n>= =Lݘ٬"q~;>V1oy bVDssT%PLwֈ0;~)d曍w^N*X9[2Ͽ<qQ ص6 dIG,I|8l7p3llATjt wrc%靁q+ ڻ' W$Ƒ:Lzzp-*+!& uRu]}1gSD #)X\lcc{H?.޲ d6F#d򳑷g>ևdž+5 dO~;@BCD6%gB 3c"ͣ+2A)9 z13-p )RDZ+tV#C?0o̾vfwW uF_e @9ˋ Y.gR]B_'{_&PMFV?Sb=Xrt~~lZ+\?!Zh˾Bݛ(#˅Bi!wh4ئ7٤ӄ'h@^|@ƣ 5s* (g\IRtFGs@I+uƂM{=AR'vNxᓬ1X*UDo@rA'/ RCRAU~NudИ=k%SU"s*+v|124SD-݀|qF5x_Q XIm`5ʴk,ߧBܜw$I2CIi,S{yJu b:;@n”Z2l=A:3wKn= }fF**Z8fLBKGF 0\YRC:faWEҊ\ H+d8gN(MXv&MVmi0szĩd x~D rQ,kG)FNam2k6^SW# ,hptGҒ hBF =ް 7)HY%܅SAwB~7<~@28D Mdqu/xG<7 U#2DYZYBi4:$a~42Tҳj$f}<;~Pj30)!b+68^zoz+y#qvgY_ Ǥ1J^PO`.:AT.]~T(Sӑ_K )ie`֭sTR 1Brq[N\P@iPJڧtA1GRҤUx)7ͪ{hcW`tmp$@]lby嚷U Fv viQ/cXaO~4ş|KQRi`{RWW;p_`ir<=9d@#av"^'ѱq|9e7sp\4`ʴAP9qg;#T[T?Y..f-\4owQyY Q]^P k'OgoQ6OOP'3Wo(mƫvrV{é'X#"C g?w֍OYo&:'ZPΞ`4 2#&ɓ+n`X|1p2%uAD0T]~P7KEӑ3/Xk[}g6Ffbq:Р3A2g{[ >n̔ *w=F>3}حMlgHlkvc@z,Γh'wn+T'oϔyŃI$% ⬤L`4S i|=Z?[~gR[[I7$xFSVo5Ld٭o9ϵoףvXm8VnB0PAmh>DN:>a3牨A ,D?ӏ(`KmIu*>l?gFF"k犞qqQ=eN`H{/f4[,2JUAqi'Ԋp7`H߭O]?b e01dq`z[+C[YƆ=t}oAD(C1A%(1B& MFa{PȂ9icI(@|oޛh*J˝)F#\^V.-m|~?6jfg`$ϡoazn tLvNF!)1@LgP;aƦw,]c1f;U0A)/VF4 &볒 'LLf n<ֱdSí>`{-Dmۇ-LAF eߜ EPQfIggivf[-~:P/xfo`@?饌_>ƃqw$!1JQ k2[w~j-]Az?|/4{|zfY88vz}VPz99J9IQ}0<[ 77XK%'*r%ʕdT/9wsk #)cFH:QlgM|ԡItRr#4_<:zYb0X$U^LOmW@H+fvМ{@!_4Bcmڍ`C~h䭋BLo~ߕ|rRȸh-禔@ #|A]k?cC/6O<$/wW~񗌕q??xEP-Ħ h4| ~6)*6E7kqI8*W$>=ҎA>4@kt6]0QEfgԑ,^ χs/\ 7:].Xc&[绹y<@TḠ;GfX}:ɽ~?Q%goW2LB- `kǐR nBN'`o_3o?APa_x/y7wO_Q M"sae*=빠>nwC^]{!:ap%>&P,/Nrc*@>.r3q@kDԻ^Ь,HE$~:\\sWOci+"s`%X~!\3/Xꘙ_̅D^Pf4[=}T@8eDSBX3ӋCz}&zיMh{RC' ǠKI s`w|2vx>WS=(橻$).Q&% %$}g3GHkV§KJP  ? S;v=..pI7݇q~ʃ~ y4ڑ`,*%L[81 =.^.`V !"'f؟2imw aOf8Jh3V?` " Z=dR_Nh@"-03pݨr*I, XNݙ]@,%'c@MIңnw@RKp&!MQAO.HPK &Yw G6xAU}Q,k<#0ĥB6=uh޳435 Q,W-X\MPԲA\j4Tn~cEz̶COT_Frj"ͮn.3Kvݶ_{noШ0<;J*#ȍ?E_]Z+u8oG]R#RfouS5^5AG;FC#GOޯ߀P,W~ohM|lBE{sM!Cw=Gk$R4 /-~o#ѷZSqf(+ϋvB?|!G״(L//- ]kvQ \n^RƩ:b:@U {E}p} hA!) 063w.n4/OL87lcA;q Tހr6q]WGڍs+Jz@x:[C?3K_=j tMMH;P@+\B#+Z_8W7@3HSc-̇sKGɲ|s >zdXwO= `Up"8F}F ]b8!…]6qA0=!VNcOB`_-G<"?Dۻec >ypp48$о0=ϏS{$&&1]썻%ʇu2*Q/Í( pP l6^괖/74$Guvw~s/??ɯO-fpD:?Y.@Ӂʃ"ۙ.'d.`U]<BAn)ԣR dQ Gc8]$ٔyв;V y_5ORNfD8C?p!~ wq1.蠝cgfjbefXoj"q 3a,:l8BțQS6 lSuJa\U2:HuKdK 4RDO y1˫}pvKSNP;*ꤋlU 6DjCyb-1("'eHn9`'Y@Z( U.^ =vsH5V&P5`kY}2ƒvK 4 J$ED7l 1/F`770::ET;ߧ|JO{Rl/cZ~486\qW.&7!.>2vJڨjFFږ^Us´,6O!2v"4h{1 jH~T7'J`$/#6QNϞt"͈ / ! dt-_. x\CMԱ@?z8EX( [)8^=#>CֱʳLӡ'fpzY-)ArhuFhz읱PabAHN eZJO:焑Dҋ~+x\bH1\ȓNV=>#E&Y%B>tŒ  pB!z25Y*\ p:JaI 3`'<HNe9;_dd"ɵf0uDu ܔL{*YX[!Cz W3SLTH`HFXj7"鰬1C'ؼ ,c:H :e&&W{6"'9xtiGpZO̔'H])l!]??g^PliV1ab 3)?oDZşϿSe%O=yȾ3ltnm43}j3˧q(&.[髰C<~סuG_?/V[e|^#yQ/٫sIנ3_UMHM_şySS!hFO|jGa ZF]Թ$,lo([~9q᷏LhB'<x4:Stx_=Қ87|!ESs7^4Oթ@BNٵ#;ID~0 CPﮇyݎ 39 mٴ,{`w>-jYU.T,N: Cz  |B}u?\@`B{Ό9@6d0G4q.]␫:n+H{/3c> xQAYvN p5ك랢[qOA"{`dC݃Tm+.Nauߔ}[R)9 Jnhz!"OҔm3;"՜[-yEO*Fr~i%{ѴW{nxA#vb(xeh Xg(2>tN Tenm#,p8Ӓ?6tHUiIx*K;c`aqLK`ݷcq N&dQĶbZ Q/@^#1l'lX1%3ذ_QX)Dh2PK* "r2vvہBa0v-dk? q+h~ǘm0Wl#\ G1Vyz(ʋg97G ;WfEMƍd,D׏f޳v@v/ M5!5D A:?GOt5b n&^gb6kLSfp|Jb*550 SĶm~l[4[!Ϩ ~DKyaxnVPFG^UGBkF&ʺQBPxɹMㄹ2ohhz)xDb;A"o3Q?oD>n=$!Iʦz ?sbr (pc"A4$`@o3Ő#l(v ୽i = @t:ۑ$l߆F"3Ԛ6N`W@t$޶#X1`">QϐCP  Z혆yPB EѲ@b|g !u+OUWon9rS(#!;4~訦Bp.78Qt{1nx0//ońܵIf &$ A.>#4/FA"ƿSa.4 l,#6/.n.OM )ej533s.]zu`]QZt_:4(uo]rh-jc8cdҵn㒮ի|%o [|ZGQOvvά 33X]77Fr4 ΎfzG#MPU/^s^+f]'2ئRuylH{srF7&ψ67qH/!M2?jpiƬ"eh'5[S?Cs;n}Z~%\=\<{@Woon=%(Hq#TqTf6ccųp> y*nkG_۷֟t`oƫ(7 &{wB/?בCQRqBכO ?1nkt! ٣?ztL`%.s0S t$[?x-X %N~WL2<<~EԇjSZ\5.;ʑ0E;᭑0SYS ڛޱs5=wo^zr㋞Yٸu=l_0d 1+42Nn/ 8BV8@܇;(i)U-L=TpyGiY @Dzk#AeM! Zyw- q1J@Ѥլ}ޟס9w}*ZNrYhށh)5"iT؁?v(`!`;,8Ulŧ@8 J 4H&Kfۮwu}&= B2{HoFJ7b7IB6t0R c #C9 D8kxY(l:%+i&䪇QW-y%dnp+1(pC )d߯ol|kSگd?8_9Ҁ}Wf߿c~JJǒڟӿȉ{Lt"4{ϱ#XsaK8q#4! ԅBW>{mVIT'>T<_nyG}$awXyQ"F XHt4Һ\%Dl.u/><6|j[/m^ɠn\?ێzWM s1-Xz?a.;sܘƠ33?wԮԜv<3H0/=_zMUMnG9`<կ~4}?ꇈ?~β2zÏ/|q±ޭz]Sj׊ArPN\C'0F^}r?u Y%N*;0RBq۬=F8UW࢜ 4(N46-Xx;ߒoMB~fǙpܰVw U;Q,ATwŠ:.7R,%ROr#A3\٠'uYJ 9kXsX[-6B<tc&ŀw˜f*xLZHz{w;p%I &! te/X}NEJf[!sf&?6|뛈0,CuG~1\km{lN,m#~7 8gVHF7=*#IߞtUjm"^I޵eCTF|`J juOj4Xp)r ^ XjxR^}5SEeDV8ɔ6Qr6 9OT N`D 1+EĪ1Z9'm/TXIlNWV'}ځv N:hdoJ"nF_3}9: *,aF?DJS<23 Uz|(j6H>~' 90&ZH&၁(V PtX q@jK!ءoef06^ / y62`-)P&jk{'+[2c-ocF{StAUwwQG۽<9*g`@.~{?^:u^ x[9߾ϜgԈ' %5n#v؍ǔ~oƴ]pQX8.K]g0CEUd9{~yy#?kqvzE?o5}2KWWIz?o]x:0%v)xd~^ך C tDU4o˟c秿ZBt@γ?aȩFO}^ZD7߾ܬ{ySo@#V~RG>_ǿJٝY7iۅ-&EqTmW^}*߸R356b\˅- PAڔAo^@Ne7AGx}KǕ?eG᏿-:?{WPTϽ ="nBƲ?}*$p`8X_'?~bsڇзo\Or(`{7ͭe\Ouo:Kaq50qt\sXXkQ~g\ 0J "KE1'@w|GQ8,~x`'}m#}mz/=b"DR_ lFr(1(pwjn ߲Aj#(砠: 2:n`0o_Sq%ZЎt )9ъ|zinހ<\+ h Vv w\Tp0,:X\b"T\ 6M9;bvz"_/ALZpÜPЈX[X  58Ԥ?6]فHUkkEp,k20frQ聻J Ҽ. LLVąBrH\lsj%Z"dlPcqDHJwn}"j=vCΑS= JYq`F&Cz5 DbWe`CͭՍLsOڏK)~Gs`NsyiZ.zdߩy+qO{Qu7]|At뒢_cZAc8 Aʱ<@80`9 5,63LNE%#L#SC`d].iikIq\!Ea(o$Y8` R0ꙮ ][ITg 6Q tLO4m;PUI페wJRO}j!I9wco p51C& r 8ـK54E~ԙ7oT?#M+ ȆrQdHq~o˛OjEs,1*/7d~sQtmQYg>З~WS,dMO!JAV jk(5cR{2%}(SWǫxL7]©Y>s8Ӂ>n拡)jeZґ5jrUI;_ѷo :B's\Nz3n+)鱺_*IWuko G bʻ~k1G}vյod"A]O@7gwgaڋW6uw? Žϛ_}~>|t(/>;32UWD@姞4}#/tfxq<E U|sv]޵~MPA;x y?p&%xXj惵4ROshVa͢65^"6vZ/U,#8Z 'g P6+h ^yڬymZĹxщ}?#_GK}~ǩ c6jm!_N =t |/ o7G)?<"`[;kgvG[ [ivb|zǵ]5@4u{f2Ⱦ|5³ JhxWٓ zYwp،dCp{@zӜ f%s`^C-7O ǚC!&DD6t͔MY8bo:_ԢfP|kbÜJ-XLldi bӏIذN<]G@ vjC5=d7Tf.InF툖N+<b=yGCZ*sqWx0ic"v5en8!rAײ.d¥5У<q3!` ލr~\+7Wi׌lcaF:!e“-X&hK2험v1txN$24x"lB@O"> :TB \R'arCTsC'Ii`e.Az'w t)[Xr\al=Q%xEj Q09 >hHBӼ)^@U:8xRNRY#mD@#Ыo|!h}_TK^![SUߐ1sZB ҧ^G;2 $=`0u#}4zwwOn_&`T%b~!"tQ!b> @pj{'TF)#CVw'3.f`<o '}>,l%3[F09;>xrCB#:$gCӘy'n N0MQ{^sF~ʌ^?Ds3zYO3={8$~sm?猦Ce #eNj+l:NjtG ;;9XGD`.N#$O/uOϝUKQIS8/\K_W]o&[S'WؙW6|.9ȐMܻ&6Y_ɏ)@-?K/F=fmOePʯO0˰.~S k:{ _8On#kBѴ3.h/_/wb_/7ěOUrc @:wywy95?݅`Ƀ޿|婕>?d#G` B 'qOFCL^ .+WKL1OZ$=~$Vn:O_V vo7~gMy_^ ~^̈́#h2r/ױ~óswO~'Gu;ߗ؝;;8/{lq H;Nh+{io! 7h[;fݿ^_|^w2o)bvDrՓ@;o:XZus Ԙ(lʡyF^w)hKtw2ˢ*8Ў[0Yo l5xɊS4YJ'V+4~dvF"x\ȕ-cK\bf:F粰^A ^AJo&˺ R $TEx6Ʒ l%U:: eÈ=3< j륁}33K'k?Eb!d{<}f꽒I:ݥր9B!⣠`tDwEB]Kö(f별{hUfg}~ x.9zXXMF7C|Q6-|UfbxW;Bdĸc T~*i~.C. V{P =F|5N׷jݱ0{=oU :y_Ty!ϔ)O@,`EZK_tΆYj' , sQBf>rB8`NMn?F<|F$N%z8"(ucQ;^ @oz ȕL2{xx<Q{N8{n+$G9L'[, tPixƦLtkZD'4Iqe}2ٶI67*GA6F\m[2G 8>샯yp% ÂA>Qm(Ě@{q6ꥀExŧ)5Qwc;k]yU L}SOlӬ J&)B$sܾ69  :C9垣il't6 3!e DTi kG.Q Grf7&FsV V=ݘ#kLPl>_r89=%<]:dLp L^Rüó${I7GuA:+kLPg|;2pǽ1̌\m9';δp~ JF9!P"rБ_Wurfv_mJo'^Y'0f j/,FvX@W}\md!Oa;h;K;'c3 cg D> ND|:O }޾3 RMRb8RH$@)7Abx1uWuum.w؇~ pMKI:)ܤ@QG}r:#~7n'b[e^ 3hԞοj<NCh@\e䏟7GnG䢿wLcVW{$3/yWYQ 1z#Ɖc-:?|I?[ Uz}1߭[5YԄj"7GwzX]g$67x_!`lGx!cc7o/NynDbXow/u.|aAvAן`$ΫOCe?x~ux317[b/2#m(D7wF`K?,&>v N[mekaٗ|"[Ԫﻋ Q왟sl[{ɘ#@蘀10,~UOcCd3>5?=l~F;ӍOmCt᏶D嗵z8}CS1^j.]"Bg=Ϥzlbژ`S.~{u"8'^oe'Aͭd7g_G.[Ñ+>gnX:T{'{0':p866 {/;孎'ѓBNvz}Vi,0{"7JHGF#KήNX0 |pWeHnDѶ@OՏO38|/RDn {&Ov|=FH.m^ޟk HDx; x"l'aϔ+_py%؍IQw4))9A\jU7P} Fc*zQnN])L Z3 ֲ|SrUMxKՅ518ZPr5|%xpdvǜ  ߪx'z+퍐1+ՀmB(>;Eranb/NwǛxhev؛ICmLoZ0FYB!fO}Ķ惂1 P{#1MAwW'u`C%f,`)'(f/oxO:e{QADm+$̩N5+IY4.{Kj\, Jo:*g<)b5zj`I!rQjrzݏqO:p;x!{PC8׾S #VXB kEl#mU#̢[a5j̅ќۏ4~_^ mEs'ijU!7¸_Uܭ<xܨ=0|5~+ВȈMmgrmL3LG'z$͞Eɧ!$ؼim?| *-Ub# J`AU5+>t zb!nq0 X5$QֽQi*:IK@p0‰}B'2BHbLR mrvoHAI4.u f$W8t:q l"BVpHjBvV=~"7<.53)'$iRXp n .2K)l3%MytNSaR5_ɾ|s톜fΜߎl+KAgo?C3ptIrG6w_;patΫ)8S>ÆyL.,=8:7fQ0pf۸=נKbut$-:}Q:"4C8bc 7A4,@a)g[{]>y<> 4ķ^9o\gYsuN ؼnGǿɫHK3/??ypZy[٣.Tҏ{ߚ;Nջ7v_q,.[oԌUu 79rwKٌ2HTcè>ro}gO,|&' _otӵG#O|'*r}*'>[DN; X,w]xG}eFTz#Ӳ{pF?3FÃ7ź [_KtZE7F9*+T &u٣=YFۯ?{ssLP 8jḊn1 m31$GA6-ӌ':)w 2 ~FE`Gl[͐ʒ1[Q#4<ɲ 2lo8$H'U~{|##.;xb5נ]ʤph*fLnb@T?Oz T;Ӯ^lu4ޠI {$1Lv3QAJJ2 W^M`pKmldЊ\y&ji y#!4h] oA0GMl9-Tc+"܌Zzyյ9Xk>ӣ4:RZ\,ubztPFb 짍-7c.)s~Mgِ'/sqv#˾^^S3Ag|3*r]8yBsrAпYC9NdFT(ߔ;:hr3,?r׬.pHx8R :e]@t{=f(9Z$̼tX֩У^uHN'IZDAgL6*>tPI-[1u0B4Lm x =օy&Fser]\!U{{XD0=v4c 6u(fq f?&9Hԃ:QEn(j:8Oӗz cMlS]0p^L.U]D:حv\Ooҍ@R6&g]o1& /6õmA 8Ζ9'~S'f 9y.j T%[/Dėk79:wvRinbзkc `G1H4O:>[{VlkoMQ xrHVT2:"dx O[K=X~_yƹowZk|,#clW""хbJAI 5NSd ׋b Qzs=u"vLCnk J'yT"mG zO3?Ka;}le& gC/*c|ZN4&p{mquQj/cTc;6埍捃:qcf cHs>a[?si\1}pk8f5+;v_? }V^Q_/p~tZC{בg1Ls귿\_>9kUCMKZM=;žU TqbCڋF7.QC//Oe^'+w|@`ᤵ9X/LTM6ZFՓ\!o?OO?+.M G!%+;1.Yy9rDr\&l\6@ֈI+dǤ{9txUYW]y2ܻ6 LpqTM6GDT'FCJT=o(5CPqнk  Tm睨C$Ax e.pnqh\Z3X ]uu=JL:DTM9p"wJ*TF pwxۥ&(ũX&gq3/Q '.CjdLpXދ2뜝HiT-,-̈́#+7z{BVT@:ҋh1[ Hɠ橄P`τB:ǭŒIdϢ57HLP{5ǡu-qqۺ,Rn?czIѓX,NQGA`(%rt[LӹkSA,pȰG;oEAݱe1 WP[ J`Qvq j6"s66q6=m62rtvR#1GoԼXx<9N[I˺UDS4 ӛ`C[ hEG-^s;[vy<594 *7EPuP]у/pZd)Y#7QWUd4Fᖀ9 e ?!@kN'ァf_x^9sq©HכP\". 6L[ ށI.,8^ؒcԘ'Pe3p *K`}gxբmȍ "MZ {=U`RŕTQ!r6 8: J}(GrH/()p1+⅒β`sH\7ԃZu5A|AוVюO oX䕦G}q 5(˾vqB;kN $x->{qǴHu;SsN ,DGVb45:b~ ^тĔ^;oÅ U:r\z/P_h`MI>81vp[NPKB;[;ܘ{hE;阶y`5p)4H{s~4I!{yL,Ks`rѐoK Z&Yke4tyhO>,7/{c'$@" ;1_/ J^= b񜿗L\j燛 ߘ!⋇+?pWnjo[; /Oӯ=j qL w|Wn&=(]x?g뷏G$VR×T{\ӣI[mQ*n}W>@k=4qJ"Dqb3%x ꫂJ.  c? AA]8em21H&ˢoU?:{4 w\9%!D']eAH mp~C^OWNo E$MเQvʠCY-qvgp8>{-E!# Y='L;0/#/mbW炀uqyͺ14Gz?O7 ]y gΰ}´ G.G?"k yq{ .;µ],ro`ae6UQj>>CaF =PJ<۔u1)rWfatXoW 4}ac.:IkO;e>4ȳlHF{;`(i&|c;cDiTv-h @uwXj`tägUv (ҡL^jh NFBlO2/&Z)# 1W!.ֳnFt}0Q"D+wmb.iAe4 :^DD^1w@JZdj+]$Jwth=R:5}CyKSp}AFjHI _xub]d]#Z`okdUە Ѥ;-➳OTՠz ɴSa0w9F8bALo3 rmڨo^읓'\@>W8 CpnL4u t.n{x`k`#2X"mP%˨@0a| 8!5>Jm=þ9 rdA9TrD] vEwh_&ZD ['f1vL.aT y`{H7rw N2c&@d+G t !ƹs̩^z$HGa/?l]\kpZH:G6蘗Bef&VA~T}5HdHB#Q+N0$9ݜaNаص)abm扃cfDMX8qѝRqIwU<=ia2qRO}fDm龲8C~Yk\)xCĊ0ݎ`ʖrPGѩ+>~Ns4a8&0Π 7EwCs0鏕 pL8%!I㮙Z8ڽ}Ɓ}@Nz4$}}h9ƼW:^-^P#`O{ؤVmTV W2;'NQp76qG\c1THW;V 1?˟}t@ww֊rXdhns3qA,s J䉉о xk'{p 9˝IއV؁;Aht=Ȝ._e;U;*^m[ӗ굑FaƹV=S'xQV^Y23P O%͸L$?ěrܙprئ'~,_7|]^rl<CNɗ|/OfKp&߫Sݍۅejci/3% <ϥ3)N5(CJs W>hYWeIwrz01hGdUʿ[yG;R/q{{kظd }GLf}-'o8b )?'F>^ vw/>/ͣڙj.Y4ٙ^B0Â6!3f``eWq|(لКϻmd;S`a }BS[8(.]AWLa;WpG&K?{Q ^f3{jmӡ4fPغ{wEY [Fs̘^d=eQBU<ڰYFѥu >QnnzUĘ<sMy';3Q1f)޴43W#AHu a|]{׬u4|}CdVXw"{iJ+8mR'IlP4JO\%4k-.9jea&ikh~ >l$T\?vR~ߡ2TeZЍ'ɡ ^7 9F:DyihuȒJpiNu wrg X2N5pDk J/,yFbD*7*֛],0ښ% =b&z8+ I'۞RK+Rp^.Ke [72]UFm?'QIf3R!VihL.s3T VЛaf)0V`mxOMլZ$.fv sFMַ.7/qIdFL”Xy 61[t1I?dޞmQ" 9lPP0 Xp.Gr Cwo4<ьMH40NtRT=#u[-9j#%E[cUTHh)8AD+88|na3Y 2!+^(me&\8`I@@U4~sB]/HN/P ubBQ=t%L^lf[`aE6$2b͒Ƞ[!y O%fabjP{j0)aM)Cunշ.ϊO?y&qK#GG3pF~#cLϜ^eKD8 - YݨƢ5 g@{[%U{Oo'йx3`b{rGwՕM5*lis?{>O)摿WV{룿: oWot͐M<";">Iy~Pph]rdsy# fDԀUX:&+$P^$w=,RQcP j(r>HͷpgU)w=/cx=4ɛ SԵ*azа!T/ X}Ez?>uwU )ۆaObFȡĆZNPgdXDF Z5Qh0~kۙn5n`LYWV[\lj fWM#&,&+BR: uvI^ܵ r:F6-"Znu nsUtYD4ڮoZmoғ؜n-uT <> Z3h yI^3bzI\8lioqyZlk{3~%0>q* ) ԙVgut0b)p0_j0=t1_۹<ޫ+Fva;HIb00)Wk*dn'WkohR+!TeR&rC v9ޥ}Utstq"G+^"f˥+WAT&%{XQD"a nc$u:#A,ř Rg $w5U -ڌ[G' i9`Vo(}>b[muVGvuēYdHZSQv 1 A^+rtM=p `>.B?ؽChRO Znh ?0(@-Ky(51o;8G;%R,a(kg8CU -W}hm!ۢ<((ձ$$F.o䪄>茏CUlb1+ {*q@ƽ|-`anJS*#sp|Y)f2G[h{j*_sE}I31r"&pՈR3B4Cofp>2J=u qRh@&Rb' 1hKپ 6.P GY áiF\Y,9k~ƳZ=lDşx)~f퍕E+UetyT\[ӵwXviĿ-c!Io ؖ*ϧQo,,neOg'Gϥ:5?Q}tTv3sq'9=6;>ѮC2D&=!iP'=C(Қ{z6tD`hUCp,t侣(& wˏ?OW\ܯ,Q&$wlakS/?x_}aF:688~7U&x5ilgcfFw.=xIc5QbgYEgZh苏~LjDo<Dƺvҫ7]6 m*cygA`A6C0/Sth[d0si{F~wapԯk6 ]V .m*3 ݻtaGW7l`,u17 ܘӅ ;"uZChQ0B$iP2Twހpwu*m->GT N9lxh ܡI:JKmp0 "t}zzPLj-BF(rl x"acϲfĿ\ϔ~R\fen wr%/tn@)vgWwT$vʵWwêqH$KaMK7Zsۇ==!hlB(=Blypܻ"if>5)KY M7UP0GPu&#}2`y86vCs!/ȇrr)l#fٓ\ddJ=Sbc$LO:qQr/&:wi\8wB%lָ3V&Gٗg=FuϤE>QͤMJպ1oA ͣm!nŘxnr!a4,+͞ӏG"Hp3B|^LG"=hM;]h'@@~b߳WfzZ|F/f\/S[Up O o$ 5wDZꑦZy{ȱecjQHyORE'Iid$IoUñ +}zntI}FKLA^hJrlc5J3a&`v(4bCCPC)EA8M%g/玀,КS%6QO,ަhql/FGpM4HpQw[F`eNPa p'04-L ]P -Y}\cT͸ʼnqԭ_6.c"%45jI:+9ז!|@p f?܏qٓ,JVHFe0cQտxuv;:b_zźsM}lj~denb?# );{\'tnni'{6H8V]x?Os&I=/ICSۿQkdpswր8bzgtz?s$b2\R=sGF֑zNb j?śIi3.ıړ#8'=`_ C|ȬgI B >_*¤Jj& ggB%չ3R@h3-]G)B=0Oe;;Ru)\dO㷐icb2l=q;ٲ;!O:_:ܽg$[^PGfk^b{ tr02<1!x&`>?;3%ߗw5T'Evyn{ ;_ '%i0NdKd.X;_N͖cnqs)elWsnەYKM"0g!نb3v#b0 W] V(䈲|K[)r.NTWf/i顳28-#R]I " zd *!ұgLTFd=Yv'\Ѷ}P9lmwt]*&}XA̮n*'>V  PYqoLWXIt!$}EnsR:@ȱ;n^i4tH!3YrHZFI\>ԇxn>;/ۀZHWCA4M%"ѵzA?D;*Qֲ؁8-$xqc=ǡwbI 1njH5eI-uQƳMQ\PwϸMB4? !u|5 f?aey "$ l%s?"$@L:$de*$;U uOݤ @0C{f )41s =mQz9rO^oQK]?BBe0nö4vPz#>VmOW=V&-_d3gFfCapm(=W7w,;):ll,ַc{wwsKpݝ[?1:0Gt#L`@\vعZlNL)'g}"9uמG bp)YGN݋K5q߲`,DhH3*M:#0 wr;fX-L'7ӏǧS19X]o=M*hD׾b#IǑs?77u֚.:To{lM>2n`!.LMֵa]Pnt 3̔ G:LJmU;?|طݧ.oFˋpeeۧ {Џt8{{Wbtr--j'1-77J큨ҩ)tIpfniz'8UnLݟ'T{={x~20\CS|O#X׵^Y FfN okw(D/McbN#Eco圚pMӝ 2f MzaP>&>}EWеGPJvd!lIy3$7˦;g֮r͓đJ{1u}m%:C "d vU6b{ݼ[_t ՗{|=fn KHT}:(ۛoKշy跏5CqI*HQc\5 ץMg~jN4[חB1Y?Iҽ(x9hPڕʇX`,mH˺Ꭿ*1A&C2g˵Nçq4vq#8!i=-qYe'>,`8TӆՌi"q.[F @c-  qCܢ!o]7@a8Ԗ2 6StaPu ߵG׮܌h&K13X:BֺNi9;Zi7 !oŶ-)N}84CZ8q b})4#xkQDtآضl:$#1ϩoD1[A wTi+{dBޕ\ v' ϭg*O>Ӻf>hn-nbgv  U,NV5KBNw(sj5)y7hAHCzLfR(w'\rF g5Tq3QqSۤisM<rm%ڢ+8NTe>Jy}\ `9 +BinOubTr}wLscǧ[%•/cqWܙ]T_6dC}Ɓ桨q4  ́0VG۔ܻ+L1_moz;ٍ&aKf*5k`n@])x3PmvW57W#thp1i4 [BE,&%$DkϡGs X0GƷ61kDRMfs9z͔3MW8v,{|Feǰ]I/UPa6K*:SLQpzAIU#m|"(09^=rmBtoAD(^AJ?|&3>I@:F[Cw.FˮlrC]|Qn$f>^K Aw"nБznzwP`|"kzE[\G*5½[SL-QupAV!xk8:v.^pAٻ~Fa 'vp! ߡ޸1aq7yi|pO0Ib/|韾G[ _R 'zo3}yHq1(޺)e&AR.Z=wU/JT}uts2帹k,8;!q?[f#WsuvK?VWY/Ƣzld=d khC)ξ@jmSgyNhHv \ oͼ<7sHJÍXzG/3N\:DϓO_1;${W#tcx .Z ܛl #n*KUt[@wiMډ ]Z868M0cD?X1:$!I(A[:Z{-&ZFY_>C[1Zi#8, Dxd |k) L0cizJ!qaG{ vg`YD\TGJDQ5Ib0J~įS1;5 @m^ոJh̝R,Kb7M L󾱞Tzd䌮Ϳ>j\'UFoan.2$w /Ya*VtINW~|\SdK$f,?4z;]%JآW"J߱?ݿޭq7ndBW:ºi4<YyЛY!2<EKgya *O/8,:*?yM;8_JKo(ˤ&Ez_ `LJW!֏is8D a~\)zR֜wP՟&e,o)@;E2=rf ڞ /ӭqJp~.FIĶDx vЫ0o ,ZJڃf>$JWljJAk L-ZJ)gKga %?;ݖ0tHVX_RkDL쵆pEtMyyQ̓<Wl&QM29f(38|f^&5[G,r! ~r;Ffk٘W;<9,>5Ԇ(U BÄ=5OL[/%a$3O$W~o7d.Qk7gưA,IƯ7|)Pٞa|?p^K/Ћ~rhqWBL%/cM>S!I]VL@訉ZB&-3& ~iq{bs:课B똾|l6a4C-H]`-|3>薍@sD6HJeVgBnf'CmwreJ mC0`zD7|^CmJDg0.(>+?cdj0> \tpqgENMM|aSLu g%“vj^=*ynxyK3u7:L&&{u5%rA0%ry p8!'[c w?(Z|ov'io.AyZ$J%aC)h#V+z$wgNdXDG` ~:sp31I?VwB]>M p&%E+3MsTS!v4Qc=43EUF _1ZXD7CG'a9\=HV\ݱhxcYϒUVQmI@h"Ugz5"-qCYvqo#N:卌,N p4KJ!j@%.kщ̈YL=WI8RO[t=mC]Q$oKp7E[ =m- Psᤳ)na%b+,D[iBc]LODg3\/1In2Mۤ֜!iYiP S$ck+?w7f?~0ASm2A\gg4f* ѝV{ڞE03״ =:k{2b{q8MwN>Y:zO a^F0ߏ8>@{1. 6GݕbQWXK|yG[P->I8n^'d)AHqɬm˿"EG3֔ZKՇe|Mp2؞H63_Njhcy< ̣u/`;ytlMgFv}#uK/&7d5zI "Il= v|9 vg%ݮnه~чmʪd9GJ$YN<LB1j)[U{tdjl4F_4uMT[?{)_(tEoJ=W^_x{G ˫J1^4Օ@5N$ѓӝ)G?w_߿,Tds,V{ ݳ?^OlӤЧ tv6ç ςD r<$7HiQ_O& p?#@`~Vެ5VI.*m莠G)g{&У>pB!<ijXFc}@N<e$FQc`vL*)eUUs5biG(` zy">l1-bE0M|n{K|"… }?(?L ȓP`K9۩Hr<\9Yz hoIY;I_A@q iaX`:Z;{u<0R}T/mmAlslfmҗv:؅Z YՍs/$^`=*aȌu o\ /P 8XwΡ.Ey {قY"ꕁw%' PZ>rm54Uڪrcv+{2TԺhzß|l[Q>6BeɚW>Q k˛0 .L2`iGy:s1V~'i q,T#K jHr|;ĠWnyz>:0d >y/nn,ג}Vy[੐1iωY{M VMZ=䊿t.^sauf[x;%lef]3Zl&1HMQq/~(bOX/L5Gq|Cv=D<6PDI1VMDXC#bb%ed ?n|/'As "1ze%rpCsvdop{xpmQ=o`{~3y= tH-'96Px,G~^0o$Ymfց~HۥLqZv׀=b)2)GPv=tpIڀdg}3e&Tʹo͞~NS>lx4|tـK)L?L@]'ǘhr(7JU> ٭ign3gs3S؋FLT 8G.O>c|'=Q#]h!!$3d{abF.l6rD"kV<ֈ;)_}'J.Iɳd.# L`/Y@tPĝ|3*._02`EOgBW|sAX3Z r.nᶘ[k1Nfu lD_8o@gj#v\# skRK߂ LzYlR VY38 ؒv(Bșo|xe8-SF"-{9  hXS` {UΣXDҚy>gԶ;糎 rp;?{:aŸJ@$KD)}XZy ..vԍ⹋( m/v2XN}ׄb[+!"ɆWWp?dĴp7Goܹ͕ʹUʵb[X1]nl9pUU sOC@qbԐ0+uoB85t+;lw%7Xӡ0e/iH48Fú|ʨjKSUPv8WhZCǴ$s?xە^0 #M錟@`?F" 46_A!>Ptoe^nGfW*Ÿw꽻Jh6)`vh*Hd/;=3OQ>XBC0MzTk·V;£_1~qݽCCp[B;Lu'DKAp]Kh{IOyxsCU 6Psq<9-8Мƫ'V/thoƢ k}DÊL}Lr}d_IpXk@Aͷ ]9U!gohP'zl._xcƛ_ǛW?ᔚW7끘 Fu3][#ch^4^Ap}v+,@O;@~q[ Aг\Uٝ~IWb ~#()H xӁᄻʌƲSWT h;Xi$Aa(#x>(L<켏Q !CMS?4}dd~sNC?AE͞n5 a83#dN˸˔i2$\D_ bz:D<9n~UId8ޝ/G{ݥbsTf{ V4bI[3`zu<׃/GM^PdvOos~_ S{ȯ(Y.d)cilڇr<]Z K2d>q<߶@8j`:33N64sv ͅOiA)Լ烲?kR9Zb_Fi89 !?]+^0..!AqhP7&8dXQ(DI3HcOgowr=_iLuj;suf wtodE9ph﬋)Bp-d{4}7gێ8;n2FХV5@:^>lZ;/Z`E}j$(Vw=eԓy7AT|'0r{>Xq0}h2~X_Uo RzFCzC2Ozxʚ68F0 gVpSsu8KfKoLh)2CcVʇBGX<J`2dW@t~U\Tȟ޵>{=$"A8(-ܫW=?aK6M*ND sKқGQM7B &H> 0At H-X!˺PC'y p}:@ Q50ʚϹTyP;hAL2ȗ_"\ZĭA t~|J#0>6!s>||m;(МA3{ӎ, Q&+Mqu#vgل2%&]ٍw2Խ>M+>w}[-Lwʿ~13J>$aWi~.zOXֺr gGXn̽Ai,.+j T["5Dxra|EumMgnYU MjJ7ɩ<{ b1<6MW{"T/nmH)*>ym`H cJC0L:UYxP4+t,徘!LOrs.)CJA5PB3'VE5O>Dn64Qm_ m_QA)?^bV_< '~H?. P9#|%3 r稄`֦Z6 ҂j}ieyKr\ mF Pt}A {rE]= P t 2?36@~ۋP' <֟aVCl|x3H?CQKV>ѹDy}lTY]l9A5|vɰQf{l^]V"{1צF}ׁ&ɎwmNzNB :ͥ>Ds}[$ȔvpEᨪY A>Hdke _`f5@/Xƒs Z ] 8LŚH3X [`24%ϱ^g955 @dAio2hKD-M\5=}ƎEX8>:5 y/;&;)p1vשTuW?8ImO=GJ<2s-;,Alc>hD7c`d=Ͱ̰{KzW Q.x~y>쐘F`vqص3h.g蓡aV qP 5_VFb(0fH߷/v4fKl:IdIoS̸bA/gD w.5$ ꦱ<L=xEJ+ǀ"rfj@m@bψ& j74&p@ 1Bp72D`b'~o\c[JE<ܲqvXLoo68yi}TS 5% ҼrBeOW'9"b]AG$;A^Zn3瓇W Dn633ZUtf)h*T } &,T0z~-o͍kIXQU5,h8w!~D~;_bm ںMQDh}NCͣх _Mwnbܢ޽Zn}_X1+}=*>爜#=w﫟#)9D X"N#5$ DcOOH2q:5H㣝Ro;oz3ݚ@>CQsO͎VZG uuөD rCOnl#Qq.h0Fc#̄hfV@ hg~{[~h|ؕM*"#O;<*$ًf wc֣g{k+4CY |,(C-Kf-IShu5 Gg/WE]ooU17D7lm[vm$iqx4½Ɣv $ ?2AGF3a;5@SPa"36G]ŚB׾/;dd8.`WW`)63I#zrySN`Kn)n+:jO?< ,dzK_xu[/m~SY8qq~emw*ėy4MX%[9ø'm/w_֋li9[7iWm4ԙJØXߙv#̪'Otd< eہچ)ƞ^CܴNؕ$J]/"d,Onn`Ni/zwiI- Bckfԙֿnr,r]XZmT:rz16i͵lB(!_uګϏ;A ,&gKZhēo+Zsx/{1i@vp$ca N1 >C5M9;&N !蠟w. w 5j~ctS<:WdE;Z~jX 3za TAp-e~^[.. =)r=6 ̮?hig0ZT"iX)E?qlhBvm9_#J՟MRM3z G %Bʴn?/xqV49o==")ge](kcMI)CZZn:.M.qsQyD^!Dh#Pki4LrꚹEf|qGԀFik,Y$3x!:ʵf ս! 5ڥ i5Gh(01LpV6> ːڍPKڌcawív܊Jmէ4j[SGF!P{:׋HncIzV+%ΐ oR$TH[_^Z!>!Z\FT(<8h%wrE(?|ISv[_'<A Qimw^4*Q2 M1,o~u#Ґ9S<9x1NAzk?hHA8#ƸwrW;D3{&7PRj'Cz'uyȼš;O>&]|0/_-?+wiΨڲ+ߙ{Οm&U> 19"K, 6:7%ļE*kd8a=;Zo~[>%F l^v9k"Td=EMSq)åѸpm7kslC 0\\%_Uf㛂vYc^1"bGC-kU 7?~ʑ24DK/gC~ ")<*k'4Q⾯4'G[S|1}6ykN}rqe",2y}ìPx_.q}/2ZTyB3 mXds,p5H%r@RKx:竱F|aftSUMKCXX/hw+^VC|_ކ]-4iBN6"|Gsb8f>ߴΙB-x/S^$jֽ7y%? mƼBu G%tɋEɴwpɚL—^gMY`҅ ?8,Pp:%E1M(͞2CM RӐA~x qOP"ѾJ:}1s0'._d&4]a3$ }o?,|Xmy2G +LtK*HPM{x{~8_>9:F>(0+&{١FEt}Njm[=b~57>VO0^ ` 0@J$歀43v$` C}o:LQ4E)ѐ[O[P`u.>Any݆+'Zkj b]=MN)t/6jj3h|*cMmgABL <0h"0v= cz ]D9/0Nnx,_~r#.5'mݢÅ'|jˏu&;;[ /mNj@C+l*NÁp_*k,H#$ahխ$'w[gsʡ/?U^csnqH';Y1C5іkzF$9Z Nn@Ker+iN~] q)i=/1M)xsT:8> 㹹ZWW?Onۏ=F@>Yeȅό0c FVlaES5dvz8IP ܍@Y)h8c !!op!ruHjFT<m/:K̐(]UoD1-hO4.FRi8vɞ__` \^|v !8FOԱޣkևy}95C#1$Oc qދx"QT- (I J.6Əpў=1j48`T֫ԑY]ĽP3 -IRa}=l Ĩo=ZYl5)7^PoCM"=.tMmyǏ9 ]~H$us.OXޯrg?.5Si=q|Xo7V]"[lS8PJg# fݼ<+PFp]'ʽw$.t)%!.fq<{A?Ϛ I @d3%1B$>r|O=_}scLJJ ^?W/P2 56 %|x_|67ٳ';!;n6]o3Yw-2~`f ՃYĊ-CSӿ!FdB> |ܳW>2fo$*mW[׏S1g+%C mȎ{7upfJ vCxf?^tO:˿_RHT8{#0^NC;v 4(\g .ߡ2%j*f/rxRg7A"߆- q~ϧf>k|pS ϸ=٫YILމ sIl7`CpLl.O-&]Oi'>KSRįXa/,j^)Eqλm{\M3ވd/46؛+-%Z%cָ8fs šY|8~{ VZu~".PKR1wc&o -ToAJף(F7np++G:*0n#a\AiciC-t:l2+{Gq2k댛Zb8llOC+w W#-b;YmIζ`x8͗n3^@]WkDY<4 c{ZmJוr/ۋKũ߾PjI>ZV0 /(a+K~A r.ij:u T_y\7/^x:ol+f%/{'@O*?-`n;yYy]\!bPA>~>WrrÎ!mB ǯӟݜctwG`E 7{Q)fܺ-Rs#LDLMZ@3ApR0uR8 i>x&Ez t|ʏV1 #;Cn(.^/^騮S?rko1p6yv瓟/M#ip># $p=hZf_prxb8@樦 4p)f>αGͿ0.|ֺ>e5,81y /U9FQ Ҡ5K A9H@MmmlFb .ҷsЦM[`2&:ZŞGzrR=6tNi(zģ96a=5>gx]̇-aH0mi)\JIDLHgגp%Auӏi,=F[sq3vH3N›u,=9q1[SZed|gcKlv쯗Q{ g.u\ 6有vqDjuDBe?kpIUje3[w‘ a(~lc3: RwO f]lDs WqsdLϘ> $<aK*N5/\"ǡx˽->rz_Pֱ(ăN 7nv5#O0Ct}:-FA&}aG9C e_bZhLr&]3Q"&:l&t?ehNH|6?PSė|oj]#s Kx8/rD8j{0?BN¯6jP\#3p==Q1 W*^vF5;P2xy$[L2,#[s\愤`;  zYt pS`1ӜݝC>┗B ]ǚ!+r"m '\9}avkI,F tk.FC(!b1=4>.x4 R]X퍠&8uc!pyPn"69htK8׻SºTNNP{-̷5t)j #+oBCΌ`%U ~ʽYLceoXll>5,mQa"Á!Ѽ,_q?'m{8BϺi/VA4)ќmO MKAO7[zk/-?~~ю|0u'LRt 9=6,D(@UQRnl|]baE wOs_,a]l?o/g2y&혂ve}a +fg%ot S躳O^n[.?Tzc.?"z;hSwͲRP Ԓo*E?i?mXZ}i5wkH`Ad |dy is6넽Ms &E e }Zx; }uF1Fxh׊e'cNY`cEπ$-./,yQ 2Dz!tK8a]e4ގw`~Ɖ.ww ,BABSG:Cք40AݔbTwL3YdPfO3r[*=  ,:kq"5x9Za 3$V D}F)GcKh3T~CryKUI{X&hk U$X?4"0f`O4on6bAn*<Igkc \x;oaCAD)7#t䃞¶|^ Rll1wk1Mrs@c5| :fgC$)ozomӡ~06]Qg>8SoӀqZ앹"IM8~~W"-ި|>=1TҺNj'sУ1iOٳ i=C26V| =2 :8 =jӑ8asJNBxx\&'K}GG8[Js,&M!&,qC8h_٪mS+ N];LM[U$NS'PBi441R`D̄_ҭ)c5=T49m?ա(3*] 8/qk&Y4-VE&^ Xe&*l/`x55RAPafcPY^^0'(ّQ,[ 2fPE)hD-y\3I?9De^Agfԏ@^F|rYIlf< /q=V 'wc].e!LZ#  Uh_Ue(Kۓ\y֞qFwcG~>Nt$X~'{?y*6I SUǺ{X(+'Cپǽh}TB:ޏIu#{{9/ d'Mq`RFEG~ml duz?Y,V~k%p@LoTˎ_POf7=H3bf@O^i9wFvTt00Vh{(;:=l橢Q/7o^) %GZOL~Qo>Vɇ߃ߌf٦V^^^tD(W|ykZTE`_w7NIL7wcW/뛁o.8э\maPE>}Ty428W@@goX*5J5dh|={+͇J LJRW׬1>?Z_xӾt!X&_.`jMk6h=LLxTd L5b&!::?_ n>@#{>X9/>([,ŘғGP@Z9J픹hʪ .I d EJX+|-'[(RSqR)]# ?&~hi7CYMW.Bz SCV̙O +̑<Զcz=र?e*eOmV ( (A;( ?poCڴLZ=fKPhq`Ų}Ǡ x2ۧ[$48>щ)aԲC>l 'JfѹӋ8۝XY*uH_.`VZ y|*3ܼC)Mj؜2m8h#e@ϪB ica;>ƭqsF1B T-hzp4NόW`;πTB$I@4౞n=ЍyfN|p8Nڳ~Z 'dZ5t82)6 @LN2v[ <)!<[{ &ak)ջe/n";gkOҕL@{!ZmyqӒ)TE&y&*;Һ2 /Ѹ :& nfw籆D@,~w6]Mnb`6);?^z[lRn!N =~"5\"BLopIx)#޶ivr>_l3iu.V#`jľ^07 ȎS;1τ~$^pO64!LQ}3Ys%|dECEi0jO[8䎜Sdcs;͡gs,>wV־a%J=Zaz h~yV_[j// V? &#;P{x[z=~q{= } EW^J.K^BsA5~̬2L4è-_ipcvcV>>i#M<-;vzٰqrVl-+Nm8li]Hބ?nn߽&`'}E7VՏgO} '҇J|3>n+^?&Y}"Ulp/S69e tre_Jƺ6,Z[&Ǔ@ub(!Y23` dZ44pLL,Nh+#CsO|NDŁԊX-XDuWfr 2* 챦3T姶|{~)>1cn#o : ȶ\p芏l|R"^l߈4ɚ`;pq9U|<_A5KEC_lSb54}I⡤O;KzN\Jj/D֝#IӉgEy=mBlCMg <%G-jSD`'V5*OllpFm=r1Vn<ŬwO[u #~?:U=(_wnIt/N`q qy+_(/7&]ŠoJNl uZ -bI> w15`mۨxY?)(R,&!a/vix4%FN2ԛU <)i΄89>Z[BCoNԢM˹BaQ_|E0 b&J&>o  nS_`& N:vܝLw ԛ#)\j͂8U,^ȍ ,_kw mj0-Msɮvc9zp7|O; tRo: bU.dÄ3 ;g)SB[PIM tէP O?k1~(Ơ0 `8/^1eͿNF^VR÷w+G>VoP 7wx2͗=*;{|oyo!wG9}Y\ړYdЗGYX*r$>YRgua+Nj:S *F(ir ~ 53x^i1Ժ-\DJrd"H.[,Pf|[XC A$ǚdQsd::7iJbc\arEa/B FxIk< F `BS16*r SG7J=%kΏrc,d"g;&_^ NV %y[Vclߌ-ǯ<4ъOi\zBr㙐g N_rQ{Ar1oԲj"gm]/x^m?0z3;|}U^*dIK F[*usB?A}'0S&?ZgajmEċbY6-r(pe׃}B~ FbѴ^!fy}_dZvSGN H&qcla$< ZdGğb]& 27 ͮj'1 GJSp9Qv+₋K@LshvxzVЎ+ q`7BQ"1ӧmOѽ]5]^Ao`Ywf=SPOˑ:8e ~7݅k#wvajOb\K,ཇ& :K|9?J8q/~Kl*-N+J9q%$V"=.­>e$0WXc&'KpRڮsUrC jUzwO&vL>i 9kR9mc.4"3of:*ȍ@Nx+Gz]iEbTl+pw`w0ju LԆսx,/pZ w)DEF[Oѹ5,h*B61"/;ǚ[Auhr&)xM\zOZx4#eD*kyo]DПU $:gDxnHXw>wE7Xsh }>!uǜ\KAEw ز~4oLo5B\}qҶE=դy< g'ML]t$OiQo[_qLN)=ٔ\k%6>)ۏ^Fh-H`!!9=;# ^/ KcڑTOR+wkD;ͥ*5滄q?tf(L=V%XC0i5L944ӆ{wd^U"y+)T/Ok7}dk>N5_UfG_X;FًQ[ǜݦ h#X/QT0L$ x~*b޺xO(=ZWIH\ *xCi|:r]E>wixwp|޹|:>/5QjN. jc Tt 5iwFyt!ZY yxtF(N4%Q55M A3\ 'UtVP4vF^w.d-R!W;Bo濹,,{P?;}h9q=ƔiO>{P;s5ߺN㿸\Ir/0B6 )@쒀@hIꘀ< Z>bcdc /f{9r/QŽLDU§d:nCucZ x) ΐ! 9zt,(2 XtaVI3ف'lyQzHt}ԣH0\DR!=LG: =|5NpiG~DTky&$H bh,KuC '葱{:[篫hwd |96sù=]CA@ 4dlyYb]|F$l?=-Y΁!xܹq59xp ֈḚxg#eK4oy"- k$&4e#s72W R=n]x܇]B(23q%Otw܀q[bk;xD_bV.xaL5“.՝2wPZ~uVF8Hx!@ 9FEapZ.1BF 'C8zĮ<ةXof[ !qc{0o 3vr 4O;A҃%m,4!pf3Bf;_Z @i ^@9qVW9pDvJ@] OKZ|AV+@yz +W~W7zEcy?c{N c4OV8j;wξq|@ac]Lh900DV8*l'эnd#F!Yc믷Rf1N1l oZWI,(ٛTݾS8,jEvJ/~wx:tDž.IpDϞM3'Qd F 5ŀ8=0g RWH3vu 4 TBf(S},~ ^A?>=7M Q'F,amIFZֶA`M=} \ nodybhc03fD2UWuw5th4#Yh[aaξ5gl,Y-4=Ő ދt\րA[w EnJ1M D!ր̍ݘV ӉP?7iU[0=ڔeۊCn5þJi͍t7ZF5=$AxQs|:/,xHOL(w=2Ĥ~4q$Nzd~ǕrT;y)`@`Xai*-+ž42.טlZ:B;[Fqۺ0;yg/FnɃ)iVzKvQüdF[h@p̳%ߜ@7ȱk6 . \ nBś.%„m+Zhú9˜h1v"ƼM=~J;S \4dql%O>ƞ^yo# WLjG#Fej/bkwN#e9wp>UXRD_2{V*Fd 0(qÞ1Hhz`ZwJG$ڽFq>rDNcWB7T%`暃Y0^dW&:p7^r3X$ b\$.SѨY%Bs`72Gwշ7aa{`,*a!aߔ>J1+E/Yt}zDC<ѕcEO;/fvc! 9&i\ND!*6-{YM>lۿSۚ [2>E0"eKr^t'2 = ĕ]w q:0M#១q* I&:\f.C_0=oǙjf@9fk>,[k]1v \yw Zb)RrQ,ZL;^sU<(ni1μ|l/3|7·p˟~msX㈛jT[g"̵$~^d{'~{!+WEy1(!8ύZ>Ҝѩk6 `#k ݤ=*hѓ'޻A0I=qZ,3.XտҜCv^+]ȑܸ]q݃|&1>iۮg;zu+rh K*a?z|+?}?lO>ppf7ٮI^au{p`3,cE-ziSti(]UsB"'iS =?q' {ۿq< xobcǣ~qr:Γ/ O1\wNO\/֏"RqOF|Y )X,|df"pF;p!򜘃')RtpH;_@xx,t}+)ܠ?=<ѰGNynѴ}+\;OW <+7U),0̢~{߈v=}Q}̴qfB&Uܬػ3SR&kS W9i> c&F Ʋ4goR+ehc!45Cܡ?~p5Sh[Du>啸I8 ZTa>5u!#am{OWkԂ5E$͹(N5]c O| C<`ye25,|T!pЊe usMg 2x3! 1ĄNI*M0w4kNͰ '؍ZW37*ٔ7$4'5!yNDD$">M *gA1lmɃxHx|I8 R Z}-"L.|EOf_cUGb9uGbD! |u bvmNh+kg_6}}s'vuhI$osOܳ0X< &d|zMDXh: ?h)n6D?9Gm\/ce-f`6ҼLrY #5i蜻q/I&fH{|Xʘ'Lԣ-t"8DLzq%.lAX V9Grbj`h>fã q 6T lH45í9R̾xgq%nCwcC\} |WF2!Ȭ< &ߺ؀!bHwAOUr3ysĜ *v4rns2؂p[X$Z 1»;:B1D7#PlSdly?1 v`;P`+=t(…:XHh6?x߿MPڬǠnw:ALsWGo(hz!\<+C 8=l{z)y>5%Ә;χz`:8Brxg]Ӆv_M\Fa*Wi'$ "H؟73qfÝy«j_uweCߣN6);8"q-]ׯL~>}p+_H Q`">g IwϠRx :>4n٫5ՕwI/|SK3WNI^2Bvb߸z|wi].R&RzQIJ /ц/mhd_62xwWq]*hAcÒJ앤P,G?~NT\7[ߙw/jҳs "$5K'Ƭ6vKB`:f&>wlo6f|"QK;n5l:Rk!:zDdh1!_c`_ac0WT-6Jyٿv;1½t>:߯u m)C^Hozƥ md*&]}{I\:g4e^ +z^Yk3+wA#8J>js$VVMT>WeqC0;yr#5^h@R' +ex5+M.Gfp[3R4 ȑV `?Ϗ&-']OfGzWnj}oz^_Xic5ɩFU TjX8BiT皼GًA6BR1:O`GݡZtNKH& f*FS(%TÙ=6{(tTa 3lenPG'XxN.81on"`n|~W|9)9>;ʁǵߍ{p::Yr S$GdNwt); ~<6^/N6:r.#j+o3 $&dWkSGܫ^L\SzsiwAة)B338~))p^`BtN1>t3h5>?6YX&MR7ԣ~Åc+5Ct-1)4 ھb<;3օ6Er5XtYD ;0C~%4ဳ-ld_"h׬ ^긓P56#rOf d9Ļtz*?Ԉ"jP6C9A6픆z=D9dȌ9ɰSqQd}+%-m/idvC6vh+L`s˸{BPDj{JAG5ͷ@qL]UO> |G53qK`I +9BnfLȁO^NnN|.E([LnEվ G!0g>$xyB05ssЪ7^D P5n:$12)@̴yr $1>TBOGGYz]i>,Yq]";_̀3M6 w{hÇ||?ɋ{+HOW^+ Dy)UvW߅uq28#AI&uoLp̹cuQ3|9Jqt}V#C@G| 6͑u\+3now`n:~Xtx̍_KK_'2A3+( 7_|{{o}zʣ~jYO\A=#_5- MrYo-NۿٹwqDU`uEch$`/P|CI'Pf1b ?*#{(:Ձ n͞Ok9pPݖcIDRKIIoMgz9;t8RWk.=Ժ>ze}5+\^myL&x&+χ!lj`[LT 򔓉UYvH,6KP)X!MM}^ԜC!~o7g[3zWcf^NO>oyY ޮavJ~yȴRͥϋOp=ba_ObPōӕBAm<Ayj'_cǓSLDIծcoMvww.s{ nyK'$RHf72N̜š8tX^0J@.ĝG¶(8[q5`T1v< `9Pc 8xʳ?ɦ1eGĺ0 МUy+9PtyrE5[Z< 5nJ'=G.yٙp3mN5nʹrXccZ1K3/pjgN= ö|9 ^u.̠o/EM۝]/]H@ u1u59 \#ʞUUK1 'w" gl&Ap rIO-u WrqV:vi?$}tܧQ )=MAp 9\H T%%a47tdwJx!Exi!nȆtx7?&ڤFZQ3 v冕^Cƈ$b~Rv JUź.O?ߴ23P#٤\Լ>>7W!mp"80Pۆ`ĻJ'OJ][{޹2z;L3IYnˢzpVi7b eUb]į]_:@e 't|Xsar$ĠV5q;s2^>m5>2g B!2^-F#h*xq2Hq4$}PmX`s\mAʪ.^wݢ$N)5]"lx%rQלq eJ,xd' pxPcwdfPS-(ljD' vYai~گ{{ 6sK؝\[{oݯ7%+M* 6C#'ǸSL x4,4FNxJ B,M 9̵9Zrg9? 5@4ꜗusgT:l`ACSWt1p1TLMߘ{ꯕtq>;R?=Bmu˫zɲIUuբ+KB?Y$}U=3^F_׽^Ņsy_οi5bɰQ7rDe(>ن_#MϙXT1E/QQ)+ZR x.v"/n,P[:ug,.w{l֏˟~{櫂O{ױzL{~q 針n]vp)\G,So-)+滜8ܠi]u qҭpO^ ݛݪ1+@ok+_çQL_q> :'<Ԋ'J/Sڠ7`p"Uk/U=Z[J^/}ۡ;eT LŖjIiԌ[2=%X=`}z~C{3qgbӞ"'n FT-?W'ӉJ7-^ùlӥy}LC{Մ$5:z%%9IGO}sÃM48H‘27H5'hjq^HDO3uۂӰ1kBTz1ބzgDl~>eoKd*8rK fa.ZF) ـ#kSչ奶 cÔNW^ BkV=&fx>̽QjC3}91xbDXy 7ªc~+&0b/ws+ ; tY|Z@L9*FQ\nwc\h!4O4, ?'+!,~3u5%[=gfkg.)nF:{$,m_bTQW+V@TZuKfH;OU*H)ZSP4u\1E5lh6궭#b!> DM&?ʕ )Ce1~D)\Wo< ȱ Lh3KFsGȎk5OeH ~ G' +3P7L~hKWtcqF4 { I}v'Of,Z!8EW)M:Lqa/qkI_< ]TG~w%:A8Q.^ )'?g[>"LkDQ^=<5H>d^fK= ) =jC_˼/*![G UE;_E.Y|4nd~;q=; 2fCPK;j8v{z}?PU-֬~Eǧay}QlkotZwmim[M1c{'4jI`(YL x Q.f`9-YN`ndxx2 LSy6Ɠ㶓+w~կ7K_|eޕ7xOP,wZ 1^9vu*>F'4EО%x6w#?35 _"2_kv}Lm> ep)0gCW(r@6FHp⓳4?:<ʑG2^=&}]*Q:\gOuBWDlM>+ʫwEUA"/5m:ؚgǴvFB& |F1: |&!a 1l 0k֌gSO6Y@x*ږEcpAN:Gʚ\EohZuJtLN&fŲ{ 2KPN~S3y9j?[lfq: 8W֪CY{}kݬ4pb'2F?$̝ )6M6nlBIu :'^ۀuQ^|ebg^V:E"H$ԟg^:OroCD@v"s21Aj@Wv+ԓ04~ dEW@LNW@恥/"QM˨@%Bؚ0z.Nl @ t PQ@M`,5 bGKq(@l$6$pOY0R`/*6\$'\8} j]G%%H 'a 0\׃sN=zr&{ :pgpFYlHqǼ[QԐ}P wѴw5TTfaV؄ӭG;Xx\w9'NkyڇLx)&p<ƙ/_]Hڼ* A:M5&8pGKړܢ A+/{leCO{]";} UTh$O~׿o_n aͳC]fmXa>傳|$u$q ̹( ZK?~^*Wk;u0rJ6Ǧ5FPX" B Our*BM<0;[rhݞ7e^u}%\H~-j/ tc)I(dk\}/$ǿ8בi*MZo _Mnvo鼼L-'0\Kx>&@D0=V}ȥl`T:ErڰGן}*)hR{b$+['0{g+A 8V.ȈP{kZg/i"A[pdH$^=t3~?@OF&n*U=C@EewxNM?WφaGb7c8}{6o>̇#ܕ[~̗?_+)Y}^U tɺr0$s$r G+3w!F+-v;zˆ >q L/f#V~ 4SmA,#=4ISZa,֖ɮ 1`NgɅ <[ jx̙R  ѺR|xK@g kvul3VpY^u8w{m\m"%>nɏcĐ¥si^`GcˤϝKʳjD_soŷ qwu&(UiZI\eQF# py0PyBd)ZM@ġf aS(N"eUy=1 H ºXSJAkk y7iV/Y<C0Nfi+FOӦ?d`9u#'&p*&u].2cg"ib0j3D{ܶ[:n@+bc z h֫'&F&2A_w /:a%Y J3]/ S8g|;yA'>'t*ۛW'#{z<m/Q_<6:oqOqbzIϖ[d_NC8^i: *tO+PH{dcu؄q u>ϿF}5h9]1} v4V@ݔ6TY}Gһvi(.9W;{xVu@>㈙iL)#>&CP( D :f;`z㍙g.Cue;ٞfSav]=]kű#T35 nt{>cvs$.Y;iTj:*VT _@[gh5[%w`EO&-<:n?m;1Lfo\8`qAe. 1J %򁷦A"B{ͭX"COMbϮyj8C"rUorQ`=ԫ@$XfZAdl4Yޯr|ԉ=eP%U!pNHBjǭN/nqv5MѤcK1_xOh3)i?c"TcN_=(e}pw|_=韊N{ nWDa B@\XGԡ?ŷp2]#J ^#`EYzU>]g>//D+f.{!'X~tt=xs[B%K #Eal?>UϾV{ @}p{[{O`-5x 3"'}Z3]sa CymG@ѦZݚQ7< !w튴e2>7xcP)+G)TG_}F'Lxx.!mjB|d&/^sݰ*QAUU>F#se,-{1H}~,׿ޥ}wngk.9f[A}i׋?OT>4Np>f6ȠUr08/΍yju-O]rS`mOC\|2z3yyBCQ[΁cgQW"N;=9w-dkz힃ar4pX=bQokl5~ h:,hd̂@͌8$`@X i3kl[`pURUg-&A@ƈ~9CִeFGpM_F)oasPj%Nsu>цnҬӐG IN%ƞM}@Va< };ѫ'BaTPyo{'RﴉmiأM y$8$iH}?(d"l#Spu߳YH9b wx0IK׹Or;^\}!͌ 6+$@|~xKh=].u =B{ڱXj-W5%? ;jeSߚ|iHtF!*-_e ?{Ol8;vz uF(=mP;L3 k^'ΤNkU`VNl% 4 1n)cLq'5W =( lP~ڵ!=b㋈sxn^ʹYvT]Dvw _i}!-H`֢_3mCv/HmZ%OוFKO}了)v@R3 {x ܈Sc-G; > 6s-2+xctCPrj!pt+s*x=8sxc;L٦Dy*a:P{m2\9˃8wu&ع.xiO_b;m铷pZ)|a}QB:珜x̖SK_A6TϦEM]"_QOUg[4ڽvӒ_,6Ƹ.ke|vukn v7Y$EOLLJK-Lky+`F[k~ՈxX SeV> ^+;NWs[KfpN#T6K][@ MXZcPP_ކH v~hw|Fz ]/|*On]ˋۥUhgk掻tQI;Go1:?C{'Q $:8&O_9^A9 " 7Mx7E P+-02Yܪg#$T%\JwZKPh#%:+6D?I3].4Hs3REEV=6e6/l;kѩԦNErQc©w䛴kysG [г=5Q Sp0Ъ:tP*JzA(/-qPO!33?M̻=jRJ6Y|)J7'BU*eDG}VODV/n  1{pZ)ŢfC3Y0}G'G't3Fn~0΅hwҫެɮ3oݺc% %9yq>DXc)vB!981vx ^j $BZ $;iB-`B44&l$]IY Z?2;'j!BH^s1sms4\l7tو5agjt`]Bfh-&[Mrm]tS<>1t@Qǁ9@#ϼP3V.sZjȧ z=7]gLuD*g59U n hUsn>"Ś]9٭N"@eyp8\_B!G:[Jn:r$0۫`\.q rla=7"aH_ٌd'>6O3o"ŰI oTb̪W{3y:w\4Y "'-*ݪsrj36xy5\i4ߠ.$i}{Q=q cSA73lx)@axްӶ6gfPQH-ׂcW G:{RxEqzß30?35;Ɯ n붃N۳gOcNn! |J}ьyWylQC!  v[R`!9'dqG`zaEm\b#P6L$3yˇS[r{>0P^0@ 'yխLKP./zX| :>p:Wp WfyoLLt-}x]>(?=Ҙ:t{,{uC?v5%<\|Yȕ2DUm\4|ewz㛍Wo[H}6#+NDxf6vST7]T~g-\ڋA 38¡r!k|@¸*@clf{YꞺD_kL6Ϯ޼ ^"tikbRи xM2gvE1@H}Da\23un-ONzYvp+N@ qJB[/ǐkؙJcY?ka9sϟ5 .ufڒ˓ON.m\+q{~iH9F9l;,:ƛ-JMdmr g"vk39 {Gc*;8r_e1 &(i̺#bXzګ m=TӲ10bO_տVʜtFѹNo %5xy47ӊbjXDlz}ҝnW悝m|8j[={5@RU7>GWevԏݧq,'-1Is3_` 1ˊ{򒜥rֱaP$9 \+1YtJ V~N/ixrc>rZq3h-jĿFCi%? lu]э=_'=ȅ8 RBЗVf̣TrJZ3OX=-G]\JAv/ewuqֽ̒43Z(Cc" r<]Gq͠+*6۫+3 Gzza}ʬ%mE(#tuJnnG̱SSx4,ؠ'ʁWG]8~H U>:7yB+$#1P~ Q{3uV+|L"cYm^hq'1~nkezQ7oh"5{h q3>{X"jEqhCqq(1WbL[j]SbF1 a\ѹ至5n䤥:YB`ʗ翳Wz㐿1mA܋ĦmѹrE+z㏥MACݣ.c-zs/X^령S:F/5%:(bR+?3W | ѿ; A増\ )qW>iqfۭ̞\ ]39+HR܃aޘzMe'˜_K~mMֶ} 7x"7Bn%qezލ;916QWz]JE+~Xx_B k=@0>Jq]FƵx3DoVW23zK #q{x1YCaIN d^ۍKxtaK=7ㅏ[dgOpzJS/BҗϿfum$$\7U>;{9yswsuUFq]A7 ahRNw'ʞYqH]wTݒVG" r4 r'TU!* _Yq@ TB.u$:J?T1. s ]RZcawxm܎𤗤{` `ۋש GUxh2,磀 ~'4v\3]GG7َGO FjIQsx0]!{D1ǫ =df[zq21˜7k;bUU}yමp^ݜu?_-ع2WI9R(4vCh˭0ӡ۫ʭ`_>jS{O#j B QSh,ܸ|tnE%Nl|JkbQ፥*רN3 )ZXӣ\"M[x71}N I\=[A+n;AKm'ǩӮhlB->Tj}8ziӨ}}=8mdO6- 1\nT^ҭdwy4?JՓJx(wdJzx|?YL }~rz %z_Gg_Hڡ<(Ο}.$֙"=32w=y@$*),O:7VdL{4&l]@;2Nc? ÓatVbp y>(Ss=^bG!}j< OrZ g*K@=cv6L$:RpNRV@,hJ?7Ueb{9nw*zω,'%o))k%9T {R$YZ?M~NOD~ʥHIڎNMM {OM~+z,.dsB< t>T(`\;R„y/}5UWxh2WMG$wUBT_tӞcߍ/%O_|=oЖU+l#wyZܝſqg-A?wrj@* ־ߋz o^r/Žoo_^_j1@_r=87^ž>Ʒnq:/-Q@s+Jeoѩ6lkI롼^hX  /0mM/|.":;I ,KxRXd`k/'W"H=v|!sKЖ|gûwQ,eآ &erKgZGkoA m/`Rv< c3Qo/xovDOH\'3z5dLgvp;=7旿|;.jNmՍ7'_ɻ1 *nƒ: !JkX[2ūnFkN.@V_Jq,d5uu2W]T.; ZpLOuTq|4TK-O<̕ÀUzziBlB'BOB/ɉiUj4ugN_RBHOpyܷ/^3v_H7 Jj'`1p'`G>ٝK&8zM&)9 V d?S`3-ʹKh6`\cw?bcx2:,4S6!poi?5rj+#פD`PsA M-|d{GNjVގ1BHJr91w~yM׫cxu2rdwQ1ӎ4νnzIkMێ:2CI=3dF>.N2J`&1EC F<,ֶ Ӄz9ɔ@Nvcb`<^D~ nḝ48 ƹvD1~pn7՚t-;'F46MI?mEZbgn+|gl]{nrIbLLbZ^-;4l$尖Mgz!UIۧQgd~%-6,'I.\9P8wAOC`&%I@zɏ2\/z2Ϋ̰i.wpd&QT\?L1'd%?_<}hW)(ucĩ:;~f9#wh2 Y˲R_p~Jvaz$NO%m󄤩gV +:-=tp%sr]Ã>&Tw:BtwPw"G;5\e%TFhf ȗ)8Maۈjx.p 1a!-j6퇊pY_S.Db͐k,KcO|eyim̶HD=nik]=zlt?H̞`狂p1݅=w:YUN])/w!χXP-qÏC/c/4ZGJs> , FAEVP:>N'i=!3&`_ОcxypR=nZy)~΃ N'tᑅEL(͗g+Qx"1ɟ8:bFvtRo#w},u L9k f>{7CxrF_\0x~p%D€pt@ݟ>5ΊIZ{D$0~'7bKBV2x\}OԞe+ܻw t`MG+':>~_=93}srzD{ b58NW+gWdwjiS\F7~F4!/`8ē Ӈ~;CcGE0VvR>` Ugi47֔k8}½bSǽ+^H ugzO/omוֹi3S1M IV-vg.U/zU;|U{6GG74jCd_ePS(΋AwNp*-Vy1ܙT (M½Ef>?; ߯C@gW@Dep t:j/M{?l6P0}z7Ex .~-rz~}Z$b%N&texo紆Ϯɾ]Хѐm",g't4xgpZ{r KrcD*@ - FdvЮ]zKơ|4h *|갰޲k|~f: l풺`|GgQm4+=ߔeW{4f~Xޣjj Ɩ?'vOă k1e}6>6?+KH0SPLeL8f$9J>3q\ ULDA% ]Saꠟ [%:n6C6| LuXkaOV!X^o]/ɔA^ h ;`Jٹ)}!D)Ǩ܀R&xԁ H y ǘtd0m$BuCFzWu}BOխ(6+N[M{P5xκ\q{ PS&t:+]r%dA?Ƚ;+dl{+{VٟOdtڙ܃IxS8ޗ)X*ʾ(7"g99X,:>#_xOa`uH|vE^AjK4+r_Uq׻wy"OMi#;$XR^+}QUE:]s nl,y)JTfOxq9WuOɭ|p5eͣvp鞂|w)O}݌SaJcWapȽ>s@/Oh}—68(קK8q@Z  +Z5^<?;(ɳ(8{Q>W^)bQ;{la)x/=NT$d.s5=p7}p%Ob#8ɱ*-To+E08wN}.*Im &`)Z|<4?ySc+~ 7(uqϷPwUR%1siL8$cXw|pvo6P ݎt>,z+/ F1 9fWwjo_ mONl/#ݪs *~{v5fL [];GsGT ҂p+np"')f `魓[Q3Ss:*FvJ*@R^T2n㭁]bOƾM}vNul ]bD,~id'^v=4r5 KzϿ=g]L=5ڕ 5'sV} nIx;n~2 H=1MlAE19v2l*#䐷 ElTv>sȌ22~gU6^.V![3fI Qi_MF}Ȥp: PZb㿏ovOm[qDPͬ~&324 Cptm4^Dð-L#hK4ԝ 3_^csiUd#\Dr wv3CQ$a(!Nc=H*vB_ӣW+=矿5>x2CpVS!X)܈],ncr4Bæ /k$dW['޹H™ŮЛf'fF=OfSe6 aJ-&~뇿#Ddž&]a $bs[G?Z:鸕ad^JXY`o2IEX֨f UOQUHxZ q;yG;;!X TY}bv avqIoiv~)L!=s` 4u+H u&p f5ad*uрg^[Z-6kUX IJOSnm'@%>'gh 1@By/73ܷ_R zŒ>x3\gqW7n2.s8q\~_i[N#ν;lG|A!穨;.PuSH~_۟>9I8dwm&jT1/ZqV[:w-ٛ `GCV3}@$H ԚJf *T"?쬶Nj>fgډfU9r;oCX47i(oYצ럺Ouý讟AN\nn'NzyUhfM%z߁wËCS WCi.d ^7[ANȪioXlL7zGskAw ^|Y {xBpaj{3k48d ]u22x8OBfFx'C}]Pߝ~49QhsRditd4)QoIBnoK';,(^5@*Ts&!SZ;h3u{&TT䟛n;<~ KBf(h$&pk5KI4D=݉L>|WtIJ b=L`7+n{Vb:->q}x >׮u'81iǼI^OvڹjR7Cǎpܱגܱ7?K7^VZ印ܙG'<#Ycm;ᇿkooPi,Z/_iP`>9 F)`Kk Z$5VnO/[/djd8M?HXtfނO )5!&76n2_ =wSR!S <~{/T\scz& K˵H|{<1,-_"s C =u ML{L9DnveR Z%s/ѥ}?߼zr\z>6^j_@.)ϔks.rm6:D FZCN>ر97R_}孿.}T{0x-Y\݈!P6rwX&9xRsՋ-y_Ps?x @^ ^v#OP`;!>7T]wP;m}^$g=Y? \M½xyd[TejBSNny \U ?4త9 ڽ\d/)&6wK<(lWmθDmIW 2<qnk^ Cgý8[H%Uޗ(~t.5n~=ygV4hLhhT=b-4|DdA4WΨu݀F / &cI4OzS ;NM>: ,l`}32NS>DH-B$=]W#l\SJxE: vsbޞb *Rc]NI?Ax(Yza:U.c=̈-ْe$N0mh7<'CeqI .s1S~_kZ7G ?ҕOމ2.*rj,Tz?;Gצ@6FēQg:廯GnL:!g!jV\ Ph.SY6af!>GH!S >=? s̫egP0Q#3\|o [svܖG5Uj1 >#1U;QeҐ&4]t B^VɭDpXc ?NK&eDxP;*)a{3,ܯ7O.xy8LVPQ\`|a-l|g>\~{ڲu]LXxܐzt/fASA.,xpqF?~9w ϵY< y0~jܰ2ay8~ ͇_םZ>;]UÀes6Ios&^f U-'Firm<, Wg&Sbڹ_:Ҽ29M92SXi$X''tM-~RndWPv0 ;wFɭ*g G NeŚ}ZtDp&𨃖 R8EeFC=_CP9IzGK NEd@6e9LE?)]JzU~mʐHM}۳DEZ58Cn~ |'G43^yW~Y:`Щ?l1ΌcÁEqZ| [VfW/:1H34gޓ5FV Du}]Y>V p7ugbFbmG!4>:rSRDټU!D3W{И_|sZ]?""T4~ +7G{J H>uB #1YoTc ׸}/7\v5@A E.-] TNQLc9#)d4_\pa2&(R]PfyCVơ |ICX:a6f&W_q5le] b즀4I_hҖXĈ뢩`L"ĨVǠ(Z+z. }g {33~p2Yi;V=BTvAk=椛NB0sIl z ЧzQZAY Q|SSoNTz/\DYԫO{m%}qx$ .[(ks/'NG:1Řp!XD 91+(gU'P`?/ %Fb$?6Vs>;0N-n9{@MF^Otwj )El./:~-g/t+܃rE>[%e*Hz CS͝f\pFL')ke$wADZzr><#>MBrXth3&Ǽc!N:ދ6vjG>˝HT@uǡ=vO |P82щJuߕE|qG0 1{/Zp娏ǬH̀LadOHzqs$;Mۏի!ϰ14U%>I}#7T|ֳt}h3?;; (в6rFZ^p}nWm v4lDKX?. HiVCyx _] E)j&P龻5I{}n:y0G$6PJL/dz4VG]9MX0~s8S oD3w8zpPCadk8 e697όɑ!Y@R/rXbƃfбZo]e_ {1$ӳ3wLOՃ_&:> %9'-q "N ‹F"@y 4F;+Oh[z ' a7ɦ5@pLtؔ_9ϖ: V@iC71%wBB!\hʀiSE?ü`TA 9Gcngi ͖ˮI"ȌUI5Spq5@4뻉p2c7H ;KU}2Xɫn&$SSZ$lvA}lk70 LFW۞|;|\?c2P30QvZze=w9i3bfdqS\ |iM289J fƬQT!7c|%4٨lg=W,3, A@zb;-./?eH釮$S}x݈Acb1^Y 'vJ'ܺ)wZGP3qg "CFh^6K#'2+gd֣ocn@,oJ{+:>G/*(n1eRhJ+֙W'yӪ;;y+ir!yW#u4^`Se'&ģxJ 1u-yn<)cnzy-. -n=l5r|{>ZE mz Hx> ޚb UhOY}4N}~v&&MQ.~/۫0C[u+xӃͅk[nfBalxTw||&=b]6Id {)ٲ34Vd6,P~CM8W~wƣݽ\,H~AcðzjQ SuW3dY'ZQ9Ԛdvc>77TIcr~t /͆N).nLoW&Do9;Wbjᗿo鳙1yEfPP>tX_# D ȿ! iIW=Pl/M_?tg~Y ["b\^ᵋ_7o=u҉Pe2UX?~>BZFVϣ`Pth^*;%F^m:'qL=X~B}mO-3^L3y3@>ղxH˺حb.r82o\߼aݮGW}b=?Ц92Ow'MRר6$z53W}uȫHƗH坕 $ N+]0#[77/fީ(@MqΗome߽a9zqo2QwS}>*WCz$?h쫒?^w}3@!ώZM09n*xgkذ)?V8=9/ESv!6$=rIؑ^UJCn_$p>q4NkvbKLSdsq$/ !eKX1@D.}(U{~ze7ƭk0h>"Q`eH ꣻ5Ǡ - Zp̎WA83{K Qt0Hip-Zy #md5ƓE.}ZsDkō $r8K ~ fXtzdaà7Awn|}7ƍrC\kbX#Cg*͹/r$190dZh3b/ftpFyXVnuAx:E(7~]3Sr/f"8=}4MckQRU9`l?~\ O޸0~pxBrKo`yZB˝ d7*Gϝh ": Л1ix˿9+ESɠ& R}SQa&FN8aR0sE.˩Lz٧eMj.\SG~VPhZ[(Y~l/ , "wu2/,nB4& nD}AhM;. m{=~AOP|O~J18jκOG{KҗdΆ€ܮPLڕGIKqP-%&8q ң@rP%e(7(+Bµ|/%9C)D Q//-gb-]@eqVek[QzB(S+X8[!lFf&mɠ#CaDςL]"NM7aڝ;z;Uc,aXڴe>Nt+(%LJAţ?:Zc3؛]_2,_yԚ׮ടV܌cir]qr\/ʟuўT+ޗި/MDl󮘎yX Ke起B\g>?'_7~G[`+5 Lp0~ *Y@kyTt&{i$9yt;'g= K'@ sd5~pɧ$nnQt$ OA?*PvBz}'݅Xasvm҇o7օ})F JeK i?}3.TFCS0 s&l9vW D~;hz4pĄs)d c>ZNscs7-D! W75TC'թY4ܿY!L=sNlH\Rg.=ۮ~ ebU"-7hC,H;F.m" 0mXI4jZm*񥻡U,Pvj;*hxzTN2 JLK"w#ih:(D~u+-Uu>}f ᛝP0$\'CȶWTt(Hؒn⢡^ap6>!+y(|yO.Gld\y* oØo% 2?H8!۷sD[?",7z%|FknV*6x@jwQmf6~&|xrhg UF0#Bܵ{\s.<+*M4<9@YuZF85 N cė bm;oxz,̃o( l{#;wUiMtR0z=݀ȫn΋D$:uVNjbW]B05ómdv8yTmlW:7 Ih޵U@4.t'Q?`/$4F6/nJf»D9q8e~9ϥkf:BQtgzEҷyy+gވZuWGx p-8mQ=~V}zռO=F#w8 7+)kJ.;p47.| -l^zsch7N}q9SR#v('~qMFQP졼qPչ욓I O3^H!(q80>_[' r|ku)L6v)8 AD4?ƈ3 ¥;n/++w2ȓ7ޗl;0kA0&ss+ZZǎL[n8p48A=]Ž>|mXOR@GۀK#Jp[ϩ8a=rc ,)rTSWͽGߗI 梲t(Јʧs7kmQX Z\`s{<IޗR3)~]ّ$pm]TkUۍj Okj5'Qa(qR?9Pd}F I6/m}v*'1)cYU2X+&nѭ!J\9f~ֵWW^O?}#^ ijw_JEq$ɓ+13#˪wtSU!\l_vf8"wI GO#mV;qҁ(6g ׫WФ߶ (t#@ 8ӑJBh:o)"6QC];4{v BKеϊ3GTV0%Ht4KP5ᢋ{~9P[tzq*8?-5A AD./] 29E%staX"gۀdܶď&/*; wc8`H CËzȴ{KkSBtUrΈow.e {LѪds~#"<8 ]\M2j!"4TCmM.9@]Gp#R#auڐʕl -@? Yc%;$2FoϺUvG#S',_YPO AM탭uz3ZGh練@Ȍ41?]hONEʁt^TaN{B=Fǰ Dw$[GG-!w%|3GedyrNܓ-9R,3vPTq2q !sb,BP=Ԍǚ4؜GKca'`L_JD.~Qx&+hn}B޼ 4Enq@OɿX7ib66+8$jk*I00Л{b簐d5 dxJt)|eUۆiF#4|.~]h?-HfseIYkr{+`( ͇Tlsh_{vn` YDz~z/!fa]:ňmK<(iiB[_}= 08Z|#a40*_AX/Vt0woG/<tbkL2u\t?E'68ǎK8逺`\m$x?Q f5ķǪ>:ʥRҗMnV.C\ ^[KEur|,,C`O gmD!vPf:y_ Oʌ eOug x`FIa[BJgؐDd}w_OrxvTf )6g(`O55QmpK_zu'5xۥ lW??`Jcn[ܜW(̻Q&VMl7϶oBR2})T2Et-X,O¬ᘅ,!:҄spr%UAVor}: ubdNř';7 fU: َܳ&y8C~"l!~"M11'u:'<ƫ`9 arԎTԤD \x樜8L'4\&(>D`I , i:؁?PBAY2i -xcUqҚe~IBy5Q?3sU@27{aǫH-hثC_wEzZ߿SH 3"t5~zW]u+E9CsƸ>~#Fˋp4EBJ"XBUuӮE?W>P!!8gaϕqԾQ oNuC NLGyhͥIvpP^T&2f{'T=!݈f_p~.T[Bk+F-hP(xD&΅ aBz}^ڄ#u vx ܄UE@ʸjG; /yU\q`xK0XBFuovΊfMUuj1{pj*ݾsF#3ԋՃPgsӟ6v ,-J*5ږ@<15ACw{S]!u>gaZ''[&[n1-A8lE)2t᮱C#[xRt@=BObҴIẫC~\)Ljo-t#呺\0!)q)KVŃq4/HwXU74|ЬmɫZDNHAmg{r;"E%G!LWJjpv9#@u'MhB6fs^Cr hNf22FkqVOfnBI2z@/v4\W|VN)fƉ(۵)zQڬyzoZ,^Ehm#ztyO zwO0eOeV/.\ ƥhf Gx/*CsĸH'e}xxOk2T*uvˁK9GTF 2;ײixP&i [OOnK,M"nOAȹm0O峇Wi8ă)s`|vdwP[C?i,g]SJ>׷bO>m2WsC U#wb}u~(Pk=jr6cNKF*#b( \uIBZ;^c!HqTi,YH )鉺{>~y[s9.PƟǬCPJo5X%Ϻr| `.t2>:V.@NΕ|ܕ:~ٓ3tN#-$|N 7a|:w(}cEe9V n-Ҏ\;G5&rTWE6fߋ^3,W(o]#twѰ$'lcDgZjJ!̎,;H{'όi_6}#<@a}oF0QX|ic8pcbvqd@h M^Y.ONԿqrrnv]Ͻ#878UZw`>d\czFaf}&*'Lg9ԕQp=cU2=l*k7^҃'RgcXWF$ @g)Raǣ{j J7a$vuFIu@ }LkAy(jnPA^ygPGP, Ju;=UGbq4 ~fQ ABgZ" oOp4:ŅA7Æy'BZʏjsX7\`̄?j1qzt ?7o|{t1B أHs,g MsԱȂ1ܠ9~D0`WhKj(AL^Qp5m։'it@92@̐5$7$ջyeM0}3zxS@O ѽ[sW/~6`HND*hY+}GerY0ްk6c/ K s7cZkoV\家S綈Ъ̒ùFߵQ>I!L{a]hT C^>h?E/9Gm{bZ癰28*/,Djxaיs_v~=]V_|c6n$!DU}b 2 }.pm@'IpQ@un> W>?qfX*O?*_fvjMM6#L|;~ ;ϣp-3ݒG^3/HvP'5,>oa"’!0}=ᝏ&GjS>hZISKQCh1V$?*yj90{ 4*vƠͨ|[kW֭]%LUԕvQ rͻq Zݑyw\_D: ֽCV5J-_>xаB`B;ENdf>:~rO0biQ0uzLR.6<M!B,LO$^gЪ qOPp~R-ߺX|ZX|nA;|:Y3|880Rst~O?z YТ;άn.am {N~!;f.=me,,# eNxL}n(*9[sMN.%t/R5"պס+4EH+x fs}~TLza֛ X+]tp$HZu2`6vqg|pwM0 +`z{.f .`#Nl IMh1.թfJ) ZNx$eكYckn~cO }n]}߯|?7cp9Tr+}L> F `hiGuS߿9/‘ tZNK+ݾc|Rw'+عCO˃$ۗxI TS't+Hb\/t?Ãt#ozeD. spVbFF0tLmOlOm88Ll2lc}P)IPD+jpQhORYzQLxؐkwб׉)h,aA'92=fJRWa,'-=zL "Bۓ,D94# 2NB&0FIR^_Y̞fHnzh.˫{Ys(mhܽ` 1@ *4D28x78i+-!O4~Y6bW@n0Z[v5@Sg<CQT97t07m #׳4Aa c m# lr5ܭB:ࣙ7bVnS/"yݕT<cNF P>}'뛁J1ULдk&;3o =\=Z! dqá'4Cڑ񓏏#'ԊYvg $">/fe>pDCC3`5CSlޜswB9Ze!> }_Y<|X_F?p {Zr~]:z]x0H`HUѳM慘l;?}HPkGH˓IZqx p{CoY\1mu6gFcHtv.Bwkg'v21i3%wK"b>[=9Akgx\kw6Ό*XSsa ?̂Dc7|2 xƖWdB:2XK\iks>ڻ סqe?.j}&!rĆv_s1pbn_jIl zp$4#fR9::phaX,A{@QFfO!{6(DNg@K,\7`hhyhG5]擵K,Hu^ع(0Ú -~nA '̫S{ҥ=?oAM h3IpThSYsŭUN1qisNU҃GxӘ eNLMɀ6$ )uy?^d||q$<!N*Z A(/JGq֛0 3ߦcI :y#<pr,=[٤Voe/#,&%B@b^^%Ѷͺ+Zs&=בz6lDB$=~2 8E"[Q`5z s]DΜ^euy +ؔz5o'U|k敔cE'LҬCDi!LR֧;jo\zsy; j:jɽyD2܃b6GBޓ6gEʻRLyA}]D, Lk/<8]<2ޣF>^ٴz@R [?,tXj4Ó1CvOOy%d#5ԛHb(ut|<<LDf4mkqw84̵Ӌv2g6H7NaZ"\Ɣ8# w9=ΗϽO{U8Mh^7Ԛv\~Flꜻy}Ŕ@q2g@]$rSZLI\L΍IFdG8T#~tg|ULXT&};yaeW[ ͪzg'}(=8n}^hs""vnW/ z`H "R/gXe։Fu(ؐ zhwL»c2\$ +nxܯĨU~ky8.m5YsN٠}"*G@ ZHSwhbS9(n*5Gp~N'nTϤZ*ˬOOP"HTK:t}YJD{7ܾ|8bt&BE)EU'k0:%saM3ź0^coۦkkuy\ t9uxC":83c/L9<޼ '|fetrp?s5Z!8RR;-*p^̃g2hL@ %6hECX2,M{uWϫ@FJR?6KSa%800?t;(;HÇ!(5Q6囶֝㓞$Ż}aj*PoHuMw#d!h(<֞e5'_&~aj"~G 8&< ewmWfX ] Epnnάͫư_EfrmGC/t4#L{$J m+ԎojX?S82d7z z_nmJ|ㄋHWPZ1@aO(~1T|al=j%S ؅Q;Q4hOTA5Ex/YF1V2Aqvůܼz=1PPbAX;p1kt^?"hf*%C:Qh9.sUL<"N>6pH ftLVaeC3-A1.1 EP>`NZ%x;ek}i 0 j&D {_sv@q]!8>6 ?OOUN PAxX_Pƶ=ALͬp";S? 3N0j itм4HlzG-NFkiWk<@/9lqr7 /1!OsI8ܧ}\<<ir>[KΎ!ĒoɐuYC)ٓ5:A- ُG!ȣ-e86 tjh?\K=ad5HcPؘź56ÅNLa+v_s~/Wf|W]Ѡn$_u1c&.u@ih #yiN4d`\-慮$Ԗ&~*e2o6߷U0ʘTgPݧ3TUiNvGH`'n\ތJsc{x!RD,16BAH#"}>͸fNbǴ@p~zs~\8ؤVN>^UJ]L7eJݒqdRB]Ccu70s7ȝX~Oj`q sY=m=M\+(=YB屑{dHpd~ )8>.ɵ#UɏuBb8|T1Һsg[ruTQd Y3K;iFP !6T=O`V ݃T@S kU%5O f^D+ w U9|"I'b{+w, Q9=Yî-Q^HVbJ8x v(Y \י/OH`ŗp# &HVivkg{3LNbcYF{ry{M&٫q\A^EBYU4?ľx*9іF$Aj_ 6k/n Cv!ඎq@t]ҙF3 Ax[ NCAuCZk>}?M@IOQ=1M*'zð;~ꊇ6wKޭwW'J9jwR#0UՉ tWpċZ<-ϯ=:AӔ ܟ@V :SЈns_ο 5p@O+k=ກ $am1(j@iWZ .7;={\\/^SaVv6څ/]%dzaZ> |y:3EC?*r&3~ܛ-G'bbY6TcO5'K=bO )Mhb}q}]|ǝ肎QuP姜SY(4M5UxsXs}"VAK&v*.c6Tq,NH~,kf5W3s9mǔQ Ȋ! 6?M7D)>wltD]06Ll/Hd:xgǧ@1*V:)x+qb4DsYUd) ;:Fٟҝt s5#Iwfɜ6bGb0:_BೣY`dwuO*`RщK; _jz|W'57 z]K}9Nѐc;cz7n8L/`l_:-m]v=i0O  =yӾz$ Ie'Eq/Xo=N@˔ō~q{L̎: ])L;r?'5B_׀`?ڼBp%؞iC[!3E~.p(QeolRkUK#!ϤĦ4uW}ZOm*00@l+*sش` /;AI ci2^;ϝ O҇N3.S,N0;ƢOOS2wtʂ~oݺq~pT[,R4QgsB4zv2HvӬmܭуR fu90ʾtb_*8Ǩ6~M(d]:ĿN;X>~s?}U\"E<(Qo9 3?zw?zX4x~f={7?Ƕg:N{$F=5ݿc5-v"d{HW{2 "^򪛫ri#wUvD$uxSul BG`XsT;K`R~=R/v)1Oצ/31Y57?3xWnx f?y|gp p<sT>X<~*r<o 5 lYw: ǃtdD=h7W$KM$^ak!.vP X@NѴlx#d;S"xe,m;3 y<;0w)vޚ5KW1H>Q(XνpWz#ӱv#vvk/Ó!2.U%&zK$(Pʙ$ةa 4.1ldi:Dt`tyQcΝ͓xL[d; 3`&.KOi{eNB࠯v|⇜ѳQ~u~p"]'- q߷GOhHRɴX1ދ!.4,`5|AW, ]x{В ZGM,pq Qy\U| r~,&qŝ\XMRud l'&=?Ã[ջ Y۽nBeP&oтv*:<QQ#fU=ӥ}a=P(@ϡ\]. "1XSG#3p:#KGn>O#]3^k ^O!P R,^B_p2llՋ5|]tF9[4s)0ӳn ԕC%vջcW~(7Ga>NYT*[ǰmoY )d26gӉ$( :$˷ehHcmm}4=f)j Nǫr&)vc8xq>G?i"Ճli(nT|s:q/4Ko2O <0ͬ|c3=ٮW=*!Ar9Wp[Ex6LUMqz>q{oSFGo+tC&f%!ɽ{uĔw Z$`霬B2g#stGҭa N41 I$mhƺ̶1Y'b{{-n !KADgC.`$ ҵL7VN{,wAą}s O3KAonPvp7n ,]^1DϜ0T߿P Q'R"S{вH(*N홋W|(k%z.2azʒk# -LL09Ou۝h${@} 2g[](>#hւk\CZjcMb3 EngЙ/mj?Fo-ЩoH%lr:8][Ht$|~!<TKDԵè& TفwEIzw>{i> Sy<} ƭg`8VjMYÒ²}ܫWѻW}]i7|~8i:^qS3]O;x@\Ln8SK;f d}e MX>7=?~9%ѠWBLvl=v rgtuJ97DާGedzg̲WAA*Xw^SQWьu!:>q)&RR.4r<ͱCʹSo?:PIRMB2o/"5l >}wPGq?xEiW/U&X n~qx0\?ԍ_7L枷^W yRPˎ : rqt3vdp?.8 Mucfj?8Dٖd,zxJ8{}O> !2b)[Qە>fy[HY>0\6aPoɤ rߘxgiO 6-;h=pQ^&׀vBOvצ9Qda=:U9c'tدjH_5;@6]<,.ۦ^{n2է]M ҼVe €!Gd5Ĥ>y>ftpF,xhKa`ɰ smǔiiAwpshf&murt)n4a۷@ l{A`ٕS yݐdhKMn"GuH2;19Ƞ6ԩ"?mM>Wkaz}`K?7_& gf)'I>u\94ٮg$Ruuzprm~txD6Bn9km䏫;{WuO iSLulFYU^1z(M"vhOHzT e+@83;iԅxXZِWɡ` &%" OT F~;D18b&LDqN;/vYy6o+⫆X`?hsH^MVfǷFmt0({H9K#XX95\NsdȠ; :<#XXΟ/"@Tp'Y鹯Azo7"h/!|o>V sށo=_+f"W<%a̤&~+2=ş땝ξ'."p#MZ+Ul?t".SQ2"䜴?N?$e*mϛV4ĩuf7|L< (װ`V5j%<3L7[׀.?vF{KzKwJ{Wn`1_cl[z;]ex3P5yZ80ZMW& jG& _tZv%uP+K~+c ƺ뽩/yRhZPhPyk}m(@Pp<řS/zod{ĩ9yյK 4 &pVhA/DrN'F.IMZ;ډ4~RR{w/L \ #ok_|7c3E)W6Dw! ISз/wVJ^ό\ګnǛ!~fPr \_ѧ{b.i l0URY|tOޭ_[S@H+ϯ}ˡEWf[;7ІX\ V}-D/KC#I\ą,inbf## }~DT$sǿn\`Ario/bU({Gޯ89EP^/\ӣO8{n?EUo&V k̿-9k ab^@jwѴ@٫$,FIoүr{Y*>LzkbTࣧgW0Q&P[DR? 91s9=18A>Q@M[hN62:Tp`GIOfxe/]1"Xʳ?c0]Qݰ[ӈKL tв"G]esv׭wvp+quD\i+1;*2`N^)Eb `71`"@P9 @0s>3= $!F %@` 63Nwսv =f"Ul}CWi& oԴѰKYǽqT F]mjչ&xR'gZN+4[dE10Ky>">W_|N٭ݲ;~m'KDZ33󡨔FaH'C' ?uft)}ʳ[GL|I(VnՏJWKH'=`jA SlOqB"Y'`XP, zϣG&lJN#_a¿T ӻ_gI> -;4=Vm78׺c]&j)xw~lNk^a!@yH^?;3˜ WX,lDTEW1 z aUo jb';xK?@槛#P@ u|󫿓/oדl gT!`@~h;:o0k9'gHLpoJwGOqbbicHWq)qrh07z8巿Uur\Zӂ GgQ df>5-{G~f`wxUb:3o˯G\M( lĜ!=x㟎hz}!oջʲ\N )g2ؖ}ccGua16t fɨ$.t>G_KECXjFBy ٘''{gIm7xD:m ~:ًf}/| ? ќڷݠK,Ϫgi]ۿwWqۯs7P:P6|`y U@m/<2hċ¹KwfZz1aV@\* ︓zQEY_J}wWx& s>.UDsw…˽l ߇=9%:e+e˜`J @P*Un٫ӮwsZ bCfCM ?+Aon)p%w伟]_u0 y}WT ;"Ǟho \f43!Np }%Q:JhgZ|+ \"Mgbo:ex_wvy s*!˞;ߔMF&rq :/6eնvR0z/PmZD-G~1$(=ۛ,ǔSL =MG ݣ&CdL °< 뮫IpV}-;dH8ldhYˎF ^^a8i`- \t{l+7ݫm:աa59v/ 2eLZ|kʽ^TkyeXc9G1LK(Mw`HA+G/¾ Qr+_8N/XŚjmȱ0.*mޚi\K^yd=5K`1kp̘SUdGs3+y  NLfQ]^ԪmeМF\_/ [و@N\ 3iоeW9[c ;_ͷ!siq>{$]KlS@$X= WL b]ENV# ƸxAoJ?g:vr|I^̀!As [U3'G,'tIGϾΕݰ 'm>w<+TTg!ؙh3m7(":( ̑<0k3[^(;>(ǐ;58K(S Zm!̴z}-81Mvu=zoQO`'@bWBhPt,^:aVnQ1Xdo-GW]O<~ǯѲF#)hMxĦ<t_o |tJnKkr$Uc̚Rv=5™PמuCZ֞}|zI ︣mu'1;}]LU&[1u}4Gmc\BN`ɵӽ٪WRNECS`wʸ#fm֭ %eBf.(=g@#G4ׄ Ao٣%c7&UO=Mtӧ)FP/,Zm^kl+CC&c@|eF`JHHhpaO ZAu ;*K2i%N%6ݵNaψӺ1qkbtnsДA\[UW79M&x܍߉c(5?̼ Z@sDN<0Xx{^kX һxVwdQ]5Zة8dL r/LƋw\I],XTzqw nB!"vخN983@+5|Ձ<j'8e`so1lV[33-C qNN 2Ao_nਙG9 8v) !Euc U' 1mYL翺 j>1U#ɑ%LSKo dGυljn r^=>fuu9]R#3\=o6|g=|` l4<_o3-رqk%5.O(c .2\qAe<^a[vPbͷ\^UOMtͅxd]|HR,ݴm̊}EF犙Vk9dk;wo, g=;nq DmR< @poa7gu:οAKY_>7~90M(>Lo?:QdΝ;EuMxYchh9(S]pTb;sh?3J͌P hz~ tyVi, <--G>š5m&).v1j x:.~唍$h"f yCmP >9" : Dž1!ϧf;Vl686QQ$Mث fЃ=~fpҿAisu.zT d{i %s? IGW8d PO,9<7=%%GTXo`Q%hhQp =hUv ‡y!X5? ͨH֯FcǮ ;7\S+Fi?lଭz7AL0azNT*׷sqH cw`Ai6R/O"Iix8Iʒ؞^l ~c-3z^~J[MĦ4+0} 8# HQAÕĻ5*5UGY="ptE#r߽,; G)}''+O}=c;DoՏ& X-l4Ժ2yO(Q¢M膧X)uzv.l(9bנ;q+U6$VL.5Sb ":NƬչI|W+py=>YBkI,89)V¹A:ӻnxO,ut:e\qDE3"-8hS a;@̞֨ViJtE N"MgtG^C9`i׈}kjI=<<Ê^^'BϮdJ}~%2XYz02Q|7:v+yc2oT|ZUSTy}eERkeU̫+ɶݒmFg;Dž YaZ :/  ѾZ;agd7YDu7kQKz2d=W~KgKnUd띃򼟒$[FƓʽ{zDג\ɗsm"ңjs? T ﺧ;WF?ϚQsVW_9;\mc_st{Epեwnߴ:8sNf&cZ+D.nT楶av¦8k,Bfi%pP-cqN(1Iȿᇏ03p9K~rv؍]BJ=Aw0SbX)m 3.p=q#7>jGfcB=_1V+ؚ%O?ЈWo>a?|㚫@w]֊Jlg Qة~uwCl9r{ %t˵s|lF/;Gdz2Ȳ[*B.w6ZN#`=L 2; <<EQY`t ͧSX@+QX̊I/TˡBgs#WZ3O? JgLb ƶ}^NMFŊw-f&{aƩ>!*uW]Vk m݀{zx< }j|qvO2ӂ06w/FȨTPeOf"- /\0@0dSLd#]{)Cj*P0{qx^צN|hlрwf`3Ϡ7/0 YVz!GGQTS`X}h~&tq$HԤPX|DZȟgGVE$h" Ñ0R '+!]nD:e+e~F+}\H^*tTWG_Tc kP&ߥWn\-x/p;Ǜ/ϞZoh.pUOOBEqGP ,LILa#8j-0p<*V\ˑf9< u؀0TUI^i| zKwH>2Q8K+ )~oŪ/#\ * it,vo"ޤ#LӿCG?"(HAc S'_gr^=+es{Fwk򝿷ٙ) X凿\/-4㎿w$AhOʆQkQuwi,ґ#wsA[G!@Ni-퐶{Q hͅG*U*S'j\;@q7+Un}.#+j,xEJk7}A<2p5}⌛pwJ>*S9??LVW+_7 KOmZ-}_oK0"ɵ!c={eWlbĢd3BVʗBs4t^hrh kآkI81 :c} 8*><|BJlr$MS. GM(js|/E 7x(@Ⱥ ѻ1-A6:ӥ^wd;:F}yv/)L}*4O1WwV67sc/g ǴT3.'< X ^EHĊr y@@TveLd>pjz'|y`Dw&rs Mx-VMTU7Z,0U=4VƱ @p3ydy/ mO!Q5#!ea829B}K cJ;'2=ΡmU:p2̸;HhWup֊mPBGDk֗Tavy׮uKxѳ\N2s n*ۅty.G~ gS)gP#~KLHbS8 l-la#cFԽqxr3e;9‘[&޷sHuUIx *FU< t=tn g)ynGCpg-h~ܪv/T\Ð:hUCx~0`S P5f:}^6GLLi fxui{,VugOTo3w؀wi3k쁧uP]YW}#6K]2Aӻvh7˵Aj,uVھtrdG%u=$ږR_=?'Z4J2Йvb=̴VXeo7[KtM2$ Z)r_FsQՓYydMGW\[ٝ:& EW=WrMCb}]CR<'FW"{ +ȆH# 2ZP 튆ʜr7f*Rc7 ?\> 'ۤ0E - jht5df֘95zMy3/nQp¿f~?+xyZ/-|wsG~Zʵ39,:O~Gؿ?_[P2 ko|{'O_>ӿU<|%W 6ľM@/ S & F]lWQ@^_).r Cor8wR|gӸ:w(+_^9zH7 ?/!1k|^|qIas:stD+-h P~ 7UN7.|#ue0GR%gfΙ}\piW/$jh\/[y&oh l# |<+ ;Lߤt&~̎|J wnрi9g͢dWbO?*'O%u6jYhDj|Oђ>mV*uaY4ui"߈5՘wip7*pyu,Scbٮ>$mf/}-vKo+cI)گhQЁ ]$<{o ^pWs  MH-F7+{z/r<|Y4ZڵKlݜp3~_VFNh-Σ30 sʺ-h;,mCb!9Mov{S±|$0kB6u4,,6X4M[ AQ:iXtK/ǤP &<-T.a,7lp.%y{TFO v@^O˩9M,x3 obq8uFL,Mt#z{XU8Ә U:dg#ÈɣssnɢLm P:+ a3˭\OʴT vJGu/ g*Q,[5=%vhSi`eɄĮƔ4dGlw<[dcs?f-OiPD>f<;wI됴Q}O4e6/҄KWt9Qyb;v7Pl oL7ȋSDޝ=_Ӟ},we$^nV- #osG)$ =_>:Rx$: B;TK[H9v5頣ף}-G6^-:},t67Q nbdܪ%UK5pnPM vȝtC$AwziGGV㥉#lhKQrsǧd|Sɥ)в'T3"Aөܓpycp5=d!.3(bT:wExw+~+<>AL|ܯw?/}ASiׁ(N.c};JYjFɈHtΝl;ޛ?ʽŢnP֤71v|R)0!"\^λG'(]N:go^䶾g^zq3s|kXJt"3-^RʝEV|Z ..>9sA;|+yX}H,\҄gK׾} #[R4we=Mt ykq04'g߹b`fA#j{m7Ԁ&}DPwϦ7~s({>~ۿ:94aKOvFO^ai|%7"iR}V̊\)߸ |E|Tm8թlD,;zdo`L"n~<8'i)!F$GͺdW;#<,Z7șW:/e q3l:P$ȘY,-`FfCBDZڄ"v)I&*^rENmǼOh]8o;T/_q\b^^iR·x4Q٘Ayu=a@A^zޝN{n YI-%| 1^unȜTz*wEs}a{NED"j{m ic-$-nv%1 _ҵׯzf'lM!:dlOWɴޭn>3^ *DqN^ͳ8It0@hEQh žV9%C=Sgz,c"$K0¾nWπPB -nޏF@c/:c8漑Y2v`l1@ u <{q  q7Cc~G`VYpTU s8ڬZ u:M$8#Z08ll-7PJX\[OkϧkC8e wkIP&[!7g=43hY`TaCa֤SGgՁ'8> Poܪ q.Nu ?veE& UbCHy v$ ~^ڞvK3jHqQKun1dS:C A%NұAuOũ=*7OM1EA-= (&,~ʹ.=X(ixDP,jX:L  _Cv'&3 pjtd48Z\ ;ͷss.mr͖e? Mƶ-$;ˌ8XK]ԎP~_}9BV r-Y)P),͝$GW.MepiW񽏃Dڋ)xY7ɓ9ʐbB|?Mbg}jGwgr1mCt8t1t ߸UuV+#Lp0qHnR.LR͹+Y57"qsUű#e7M_>6 U8 -4 8JDxiyYP3e_9탠(^PmlvNɸg}D6!ƤYgzlxwR-nпhu Voamc/O=\1nt˃$3ᖝA/ !-"v#NFyMmc]#uܩo,}O]Qi@q܁q-bi)YCr~80{o-\VڮX'2fMS}1 FωKXp{\N0U·q O̜c X+sW]Kusoho|ȍGs;gw#D1fx xK'[YE'B9~kߺh:ԿaKݷHzH~1L|e-DX'dQMlg>άl{_jbRwsI#鸩o![.~7X-O:ve誷Q!؊GeZJ# 9-ýrB `2o`H87Hږ#n8!*=ak^vuXl6x==W?nc|UG=鰇C\R~af?DZ؁_LyIQ:>mە{;bHKc8%+SKR [ʱw\!{*C^Ysv>=U?lwG%qpˋG;pך`?;Q=urJN]CiD|}rBzXK[qD)Dx >𽩇+xƳ`h\p6gqu\:c]wɘU yJz:"TtlEH;5+31f(tG|]:0r2Izta DgW g?8^tboEu-g4WctăON9b){^U=?4=l9eSyʼb pAYN4X`t Y'o~զrlS+o=lNʉO:" KIzNE0Ih>`obϤLk*Dٱr6 nHjtOn b€ =ꞍObTjS|mylDfjZliZr.{R\o1zr ']V LzM3TwwK|jO9LEv[3nC_/n)`;Ӆl~CL:Kxj^Ȓa${ʅXP:GӍf1S& CGr>q_ɷ!"þ[?\w}$zlMR=N$C+ze/|ȵ`UoN4Y|BFk͚ޠy)e|Xl}uWX_/Xa.O]'ZI * #g=/Mq>T.N~/;ha9 LaB}7z=7|h`}O^_>IxC:ޛ˗>tXnGп͕֯%g볃oUcly+wꇨشs>[1}s픏6Կ49d[|q*Φh moHS<?sJYG݀76|=&{ڑ1k;JIv=>Y4ڛ+XlpNe&d?1X]'Ttڰ|c#}[/J Xw yQg.[q3f@kU4ud1xWhϖէh=6QSCtoO騺FM/" 弪iЭFmVV[V/ R;^_I0[e\hwA{zOg 5i꫔Kw̉/C| %$ rW=r CŃ4P:[^ 8᤼)')hjjti Ƃ@a‰ Ӈ40ˣ1:vÐ˕mCƂ 6$A\Kfg1Bg˖[Z 536=?@^Dex-=Bejns8 N$0,caܨemL]5ה͑| 4WYH_!HIdyֳ1pHw֎Uv,g:`ѰdMlZ0gW_p(-FѭuQ8},JaWJ孤$( }َz^4]CA;ȑeD&Tv/9Z= RuDh>19cLBvӧ|rWدgd>!${ 9ucvrדzU ߩ{7II('eUa"w8D qwpCxfh+ܟkO! \J9\:|L&vju2Ν\0"*FϦƅ3%\ygёc3)W?n<(^_/ST2pm7|_ᚯTrQ/||8٭18zM^dmJ`52A!$uߝ8^J=jR{JX|e'׾?|ϨsGɼgZ:^g^!~w4Qk]f}\}Ȫ]|oҥ+&@F]/.{ ABe'oÉ_0Շoe?;ܦ~,EOng߼o5|O?E#kSڍ͋0]/xF$Vى!ޯ+ sf/Oo S9Vu@44KœyjC&&(l ^731pP#BQtF0T08;'MMB485#6!b7Z,qQ5Q7o}N'<2YyDRG L[fG#aZGl*ё1|uWEMKFVq^-r6+0Ta]t;6scRb*2i98tS,P=kQϮn'f +W WY{suŬ4"@1\^64]dC<TXy\hy;m{ vY+ƭ. ]}3CʪOq{(3@HN9jf MCz>IBYFxfn/iu16eXO^ynA._?#l {ԡU%)P Ϋ,Tl\oKH!CX^G@C@)jU=cZΚ_s〭cOGw8Ul!5hh4aCy5r k4y/N$Gd<42+΋c^ž^ԏqǮLM0#}[O/ ٣#{+Ho9%rptвX,{(2K 7g\9"TFL.nqիމRQFRsvM/,;h>P̨XB{ L̉O#tkthbFD;$Tj{^r3JV={5q\OڽS1Ptj\dx:Pǯ|Ynu78Y2 \#ɄS2"xL0w:E#)3]vwHi?D]nS-0BcrE҈kO9^g18 :M#ZkA؀ LD?|򉊿eH) G!=I'VV=k㞫Ф Ps_^{t97mZ=Z ĵAk Y杯?k<; vIq^BAPJ >[v!c29B}BG*}]ti{?*Ya8fߒia!IޖQ@{ap2N z'=qzs {V~S}!mN&ێY`& op枷e Z4Gl!Ǫ9l8\%_~D"<DϿ6L)UA^9&Ǡ)tMC?_^uz '6>9M$NY3CJZ9cԯ>7-_~!ʯ!p´>yvSk3L[8gFUS u%#$7R@ۇI-G*"%My=cQWl}ߐV\a+ClWgG?{//%tb]w@ vi>)BɇKGQ! ֻnB}ѝaZCѵILތ7k*F'T$mˠLU8VskT]' 4(|K%E_G<};*;_o`( 0u]]zd* 96Fd",+x"/8lL\[a:23w"Q,i ׹38iD) R`Lg=8B*OJwh8ۙ1 YC#EM`Ph <w̦Д-9]v 4Ike*=XBpK*ϦkIM;Ч䥕;-"F[k Ɣsf|4kWZM%x~4X ;?BS`[w١XaյtֲKqX'tLSYXCůlWQ-}6 ]D\4([YhVk=W%~̹7SM-Ij&y*R($_ti}`iOc}J7^qguVT8}U ΣW[ɸv+ޟa,maa3Z-L^`M<0Ģ7`I aDםZ#|VsJp6,Z'mVly21Pl>mkV[8MЭ鈗C-2 W37_)L "j(_NH! L+Ww̳Ruw~!S8 団AzѤPoPH2(H gZ1v&"UDmw.z| 䕺aΣOܴL : # 4^UUmd:;S8[gI2Y]0_f2IPr MY {IXg Oh%ϫl?grMk {S9 wq?2ܨgsDRW(l܃~gW| ZZ$F,F"lw+_w'0gg_jU&u>N/\GyH֬KC={"Ixq+'1Ly -AO5W䛔#d=:LgE7XwA*rA ..? ^W_LMnQz|ݯ5?Ojn$Zϻ#FT_(Atea{f=p`C%-F-gtQ+^WnCž+dgLtRڝ-G 8=x)?҅,:kٍ h_n0}̞IN6T%e~l5Fсiз0[Sp=egz6B]<\Iæ;?=,Ux& R~q,}߹-5n8 g2{vnX/ ckӪutfP2V3z$? O}ai(:P9cYbf}G_WO2_̓+"]@QJ@ tFi @%Snη<3ۗucz`܌:> i;('x7}Mz[=u%H 8#@XYHqwdA@QJd5Em!8 \X[  8taAƃsʂ<5c=@YȍTuVv Yg|{=OPW_T]<4E,;n~lKBgNmX6ĉo:H8ft|w.Ѕd' Os~R> |12CSs'<`xROp{#m:%zv.~ ?7-28^h9ݙsU>dʖw(v"Y%-#135BSQБ|Pg1||.eJӉC-bA975m}%3|0BCu}R唏Ư`D~l{58R_׀ӺzNoin!-u;Jro@%r0~o?2i%Fn*>1QHl҇ӳ'bJi1ތxvBaueuQ9p@-[y0:d&D+J@/ _j7a. FƤ%/OF?_kϻI0P?ϽȈ$44YF >^1`&jYלa{O$>.p͈ c _? _Kם?W$֝o5?|dU;n;cx d5־/7Ld[2<۠{\rvWFA,䄞Xmk&9QArś:P2X1Х\}tBkl)ZOW\gMGzy<Lɢ\";fXiР(]һy0=~oZU`SK@XS9##9@JgZA`<}?kz^{} 4*.W$Rg`62iYHd-|hTBhVQ|cXG[1 d$p7 G?^ZJ |+R(QEA4 JiegPt!x+' Hg7vs b?]9ciW*LJQ"hX Aap4Kڭ6b% M'iog l_κ+C!΍K6@SiTi pEv+IIA _qK_8:ge)uz ?qBVmʸ65A/ rYm4Dhrջ(tJ섗Bë)΍\Q犱mbCQ'%WώW}xeVDUcMljrl6;<<$#8DEcM&/cppa=i2U@&8dy &~<]%I7tCt֭z{T}v5^~n($=iZ.bQYV@HppՏG5t|Uf(n]5^5-&W9Y6=^@M8O }60M045%̝{kJC DE62?ޞA4tK{=jY)ٱtD.xũRbcpz1 ύ̤3Xm˩~),I"s]viV uby9~2cujx3.=o,ƸA/\[뚢p w$^;#Yd]Us($\Iv#w\0[NW{pOZ1 ŽtX@ݖ߹6ywAnfP.4'z1Jf‰-ՋI\XqDhn8m+`nzЛ,'!mDGۆSph2m [Ƭɘv0^Cͩ-I^e,\:+p$ƭRW1*O&|c oH0n'|}!N\_Փ9.Wn,𥛗/ _ WOI_m#+ޓ@(ҵqu@z%ڊPG/Τ< ۭPǷB| P[ S9:ub{+$WZ3Qy}k҃xն΅ ~uzG=%_zc:'_(%2:LQs롁4N>>ՋAVWpu8hEvԹ@\W%1z`+&5(rэ$ק?w˿!)M>[7guH_r}`8] L0+a7{o  _={nA,eT>>.Bیa㼤62{ϯeca@:pXOC,$JusB%/yN=UM =@Z?ï響[㷬ܝ 4:;O#Wmc1Yx8F9ȗzo5fߌ ߖaa-yX)#b%r@3h!: r;zJ=BJNC-'qRQ߭'sܱ3D=8|x0}oy$kיV r&4hz$)=r~#X"9^a"x4!m]` \y;;3 bclP7vhlU'sl o:ɣU'y,J(V&#K#9|#O \n_}R|~:IJSlXìe֏;.Jk;XIMB訑4j)\5>T4 /n(܍xձe\B&D\ˢ֬qV;*\-0S(S6Iou7N/2T}A_Aݩ8mp-w.N:TB#N6t7N3w2 wt%"] ]7B=mI#''xVb%W/#%7URJwx+]}e3IJ6I[[P<N5sQP1>qԋU3}7/Za)ծLΒkt•ϝ>dŅئY e_q]iz^s`ˤB`A)n5}ږ];Q=aw""Cy ށ zfVEP:;cSNh>e{b;J2Db?Kk!bnL:4A֌|"dq4vWݚ2j ϵI63^I ͍ghգdeFMڙ._@6g!yy, s4s=Bd:Ȍ>sF5vhiR}O'ްt70VaܳTgٔ Sxlc)K~9<huG2jƒc)xfӫ:\%n&͐=Q-AJvԁ>i^3"dd>Ռ;h6WdWlh֐{ T,FPq^l23v,W*?>|ڗ7pn||KMz`MѰɡ!o֯hbdu˅Q_ĖvEr[?+wpĞuQZt9,1v?yB:$tD@mi?k8TI Zϭ3<7 14 g])rJ7BXWGYXs3SR}h;ƽSB!45X Fױ"͉ (lyS^j*6Y 4GܲIٷWL60~ f:e+sFEPPŞYS^L_L/%'nXk-]wjμJ11)tx*$E(, #Kxx[Z|Q_Dcgώ_PD8ׇ-}yƮΖΈ#ȬzM} 2[Un[}=;kØFL&24r.HH@uy_C-2TAyNEq(8['J&0t>J<$v4J05 ˅~:8>J=p9gV4W^LF}$seMQ7iyRu>8χ~|5`3r6.8X@qXug gc0} <:]jgjMYz;46xFcN&L|%C7~i~bI4Se"lZ?kVv h@st /p+ a{Jq+.sC< pҧXQ8Th bnbt(k-oL)h5M Q/r%jb[d6>n&^hGrwm,HcEWYEeO҈::*Uq H[ z)O_Vm_X0yIYWg"-t&FcxdkpAD$U]n'ǽ9^9Sv 32D]g`!T&o$Bq8 8DEȤƃJFw!xP+{ׯ_ _"ѣ'ndGru/$ 7ϲE3|Lt09˼\vHh2d۽ ow_Bҫ!*' YU:8*<ޡt(MR]-[󽼚35G2>@~y>cŷn@A!ڍYwœ?J؅sUSRq?6{,h;+%U^n³k~e;xNʔ[gܮ|2W.k|+5j8( usD`>h{G2`< ]E伇c l~|*auDc:_^PqL#2mk+Ƞ;yZt)«AJ}M7A?)y : +ܕyri;Fa Pv8}f$fWQUop|u?(ꤨ<&0;q;LNh ;Ұ ̀PyۇogsE'7=# 8w1c\rXDJ tq2t}XXz-TokGg8I[5Iw8Irk)yכa!M*cm&͙yЇxFul3={YoU? D{$۲ߨ4G Ѡ%K^r}T^=_֣hT?A؛ Hh3;a`R\u-qSư4$oqS!p̭VJ]v > h.0Fә7V{ȕJXJl}b$(zp/E--D#k^#u[7|3;B NK)PԙE}mn|g ezӿuCk8X"g2j7< xpԵGͳps+F"I?#9fl$n=gߒI~oԗڐIw{ K]ƀ73WEM#W4p@y K9O^2 IcpB={9{hNs7O._ ~wu:Xo?N'?_|/?cy8Pyw/Aˉ3<!X"7Ϯ?o>yA*7GFe>dKѻj/nU6䊪ņa],P7~c"`ngEօٶgp0-̸o0?Qww, ް|p\fnO /G:Iۤ`8"}ҬMYn|J`SȪ =y=C|8NӞ]xmo{‚= XbxƉ6 pड़ sQ;sGP>.heO]KeE|\J=pF x1iO~!L3/Duu-FJ^Jo';K\٢+-:[lM+[x57X..7 lKG"ykZ ۉHbSݣis~EiXS}6neޞz8$sJf\bP9Gȡ:7nqL+-HXgB.:80 a0+;r} d3E@@4ךZL;9!!BNAFlVzT}lP?$ p/W71h۸,6Xm>pcj4]UM"φQ͙"wE9?^9؞^8{9իlώDZkxzzrw*>wF96NHYPכH~(fǍLciGkص3!XJ*x6BUu7,˅p)"jmN'e( 5|H %Ǔ^v<0'Ǭo]IF Y' J]G[} 1m.SjS~(gV4j Hi*p[aWSRیgFxU,ַCdmYY^>Fu.*~b[z (јN&<$ #ABDKLМW9lT!m'i8[!h'[ !foAbr62x"u4 ޛrG^FYϓAhb-> wyW>|1mLia^F.`fY)\; DR G5P{C[E|{-V 5M~ ޝ`:.(-tCd3'/>;_A/.-/~6ϱrU=Fw`w|;ʫ{;_6N5i3@Y8HɥONR`ڃۓO_ ͹֮d*{L%3FmwrC\_J^ljAI:zT7׿4dU.$‡9$!BQ9szk>eFpS@*Vaĭzlr;>k\߳=`__{R)oOc.fkwרS7U2i/I Z8gn~WÿpXSF @>,>ʗ6W7iKgo>;u,7T ;/V./=Cnlumm"޻*OGp~e"Hẕ@q73pˇk*I.[ 8'W\bރ)w4)i֏ R1Z[~=S4'/>*T[0&C dVӎĆv<M~S3cUgL-B dX܂$Fuy"y&-RP 74pQʤ@N sUKLX}D `XF\ ILJl0޲ {yP,I/!P3v/uǔ 饹\ kQ yWR Eqp^UQgyaj;ӞGֽ>Oq+N (7D=3^6*. )oR§b@"^́7{w]?>G|~ Nyz9w˃/{u&NQnH\rxi Q 0$wMfX^8Hn'EX/ @ꇇQ4|@ ]r`V@0e%>:/1U+jg M3@r.7#y I(¶s9pHK֥Yl6c:fۃy o u vGq޲)' 3)ýQ ň!]]3wdx7)7fa&~?z{(B h͸uǹm1[^ K= lW.>_T?e9*Ā =@>?7ysK2V8[an}~"iiO[rAȝt/ttõY1Ep-z 134* N{ 5 DVJݧң}>ʽfWk `V/aXyt{W :|h|[kŎ؇0D xыR5vMGB.(>W✡ݨ7pʈۙQ"t|l8vnl|V(`Z "wl$=g<5rڷ"`NN2qRF3A[,FihEB`j %\ݳ]8MXkc8։ bD_ ad[ 7FG@F4\wJ D"0iA(G-y!*K!{Ufme +dmFZTov>^0Ǎ l W\=yPGKW~ޛ#qji9)(0sVL S\T-'~/ekn?H}X2>=Ih;Z:xv  #7<\+v_-ϮܞStHGVW$<@t,5ȝ, TAh)B&&"vg㴑i[h쌙W9/p3@F 6 (7+TW]]9>d= P{ G"03fپl9 oOe(O/r$'/gCR?w/FVyls5K#GZ-zVFq岫iO -0GKnh5:ф BmY,șuӆVyGI3>2a5w++L;[|^>s 4HeUjA8,XnלI>h75'~,{2osbM8Wg'dUܸ8LO x1>APԊDh~=jBh;li=ҹKF-1< ;ZnH:Y,M G74 鲫qyxa'"vL߱N^N\@@1V$X``K} @.{sW9 h(uߐ uw]DLp[eI(h 兠? @gm Q pڠ3fCİx 5 1Y)I!g K,c_3鳀5%%g{-Lt9x*RM:(O],OǕ:|+jfH 6׫TG1/cujȠsKdN0ܦAShl4. ۻ*vףo젱\xrM~Gܪ]Y6s{6hJ'b)) 2ݎ,a\zo\`DecJקE}\0+w|xksK4=;92Pwk)kKdI.fN?~H?~:n.||v+_ IgcE=8?{<_敒tw wK]Iw琊y_ J4?#K2fWjSEwDaV_s^~RrKHp(ȧsCyalu}#죾rE_chZ?@!q+NE^ڔD`$]ɘpv?N9G O?"?0g0 8>Y_ɗ6 o<tf a%<]8:?G+ /m .?*e;vZV 9^Hrҗq@8d T]IdE`Ck8|TϳNKw_]<b%5 ͪZMO~fLj{QpLZOqٴBC)B@O͉F#X jzqdXR>`.Tڳf2QHTCThc5ƁCAԔ ]w; c 02pp=".xOPp9GߘعrY+LXfRkzQ V_PpNC y (LU!7'̀0CM׀@HT1Ę4>d5Q Pbxu>ƺ9tAݪl\0ᾨכ,K#]ahTb氢a'|Y<+*ږ4FLOѻ.T{SWS矠wPzfOl7vz')cER9[2|w$1::(gT !}2"P`h\eMt~} UY^%#nޖKSڱNz.6!$%Eum/?}V?B52;0Km#)3xri%YAQ3rci:sb@0*ef7(1ᐇ]* ,4 R=:A0~;ҳg0o3{ )7%d#:ΫulӡVqUS(.nInM s€xc9rov]Z//|,}&.{q,N:ЙtΠv[n |3ֆ+ddK(gc$ :ϴAeDKDmFc,XV1|H7`\b U7DQRoH]DDֺëFgۋ/zGQxQبM|$x]/"F~{AGb~%?_'j1q~IىhqVkԛ0*Ϭv0'(B Fpi3wʅ.;buXfkC= {6wY2xVg6iyŊ="__'p`7~w8:#=p _ZDZ՟1)f7 kLN޴. E~ZҳB8[՗a`vŗoͽ.7v+BK3%u{o-FSQ$&L:A$h@; #;@Rp+s47:l;Y^yacׄh tӱ0 Sqk=8d띞n]|x&=5lwfuQMd&ܩ%;v0 Æ SK@Fmnȓ=gGUݕ M}řWKA Ťط_ߦld~ol1oiX&;-j6e>CAbg &NU;f4|x5(=qrѷtE}cy#5d6ٔ)DNyOU,R"5rf{*;wY81U8\6;"=W?IE  @ `PU:zv0·Al;`@CDcp"Ȯ}Pc|e W$2mT\:=Hw2Դ(Pv9@A"|yP@0:1ǩYiSA(d.dVpNs%Sq%Kp]SNqNmսʛq%ް;4G㖷^mXѥݚpKZ*s-#w Ve 518FlMA qTФ' 5OT,2ETqLYS{0gTWhy{.>x}wSr8aٍ9Z&tׄO 'ZdBC>Saq\Cڮ%LؙOW$ﯜ1y!?Ԅ&MC@1be}k1LTf;\7㏛|*#b դg qn)IsP]6vko􇉹c9cず ɓE;/uuow*#8wէr|ٳ RʓU,*c#cAjEؿNQg9^5Z5-9'oH(Szigjj,k,ٯ;9L6zjL7൶CdFp Lyy)J@blmvZ*qЫ4P5RQ%4psuN~᡾szZ?˝ӲyG'N}{i>YΓp6U)_&~+opn@ZBܩ̦520 p hl?'$t^4ٕ F7ٮ mܤVW//ɩܛ/C@7Ώ[Q,^ y7[6h.1 jtqovۍZ1uP¡Rm]W( \H`]23#m76??w/ZE}|\lDL2F9 ;~Cv8Wj Vnccx&8 ݂yTjDx{Z8*N1̻Vr*MldLr'6*IW6\X_TU zfR }yL2ƃ`t.9|Ǩfp[K0Z!!ߝC;ϒUer;q\~ԧ`Cq/jČh˥YFuZ< U4Ө\?Z/+XJ1@,u6pE%I$pdgSQlP {c/\Puhuǀ>Vl 9K>v %Κ @S@$y0`0m as 0Ehap.Y A eѯ[*S&Q*`&ks[7[R<}̓Np}&.,ѻb6Vw kASngVj}"21Y$a(ox ߻‘ʆ ~/Ƣnut|>ݮ'ϩe3eRa&S3ELO.TIUَW8;X+׿yâHQ> gԧhb9xBhwܓ2PƓu-f•L3"Io|x^uȫf H@([/wX}c""b6kk=CvtfۮQP-ne`uYAd5INN/ ȭp6mUI!;P2ˎ8ʞ ژ`;V4kF$QOztdu}L=G^phÚ3+*ֶ aBb]43e^c OdYyfXezkb7(b%'KyGFZ.'ucj91[k9 _o+sȣ >8.p\?W߹vӿGIXo3ĒKKUld.Fk~3WQse\,"F*CGۋ*-j& +y Hn^@/柔%GZMv?-_\ g-76S(f=N܌] j\׶Y ^>$?}!a $NeX!9"ۻ?z磇nMMZ^@v*_}P{gniOyGx/F~akz>:7)]7/ĝELj%k3[~u`ηQ _}}󭟹u : kLJTJI%U D AA;2ڍۯg#"#$rŕ4d}lǍ1wW{җ%@S3MBloOT.,s wѣ;G]'W$W#skFfU]r3 <ܪ{D׽ kܹ=/M^'^ 5׌`eRidwn}iuenoeߊ:,{>j P panwNf =ZF|J`GݣU#౽3LA4)`L;yWx% '× s5I>+OxD@ ƂՄZw`;u̟"l89qx2ZB{K5 pZQ# ~ 4yUE~$p\v v4lcxZcW /tA=-7u Yɧørѫky'nA^ڦlNe3&xEc!`)h_b\vv!jHC5|*|e)l1 py_39"oV44߉6Ͻ١Q{~B?fD%kS',^W Gu(?2KPsmczvfJrjKi1ږowUhBɻAtYܫ07Xpqz gAB}*iH]1C5S"JNB Y 1,@מ"e' m8|hZH;JSc~K#ՌLFAy]"ȞDAc,@!h8$덪LS+Sϴ]Ja϶MTn+BJZss z3r*u^2Ik *e6Tz^ #pl4ss1Q1EcF=ӧ{iwzZlzӫp#IjTyo#⑝Ɓ2%],P݉tTk.8~6p4,[ĖF,?;“oOD?"]ڶgco(j<->|ͽͼ|3ʖ|}kd@ Oǁ86~ZG21*{|49yjA4:iyjDU([؃?=:^ˤ`:ǟdI;_]]te Xy>t5T7я?'&;$[u뎌 8mO4k/BOkfAD0K ]tQ]r1?fk/7o}B6ͥoeއcHH'4 V8S1|7r7C~qXaY 8uTq#Ϭ;<ie&~wv3nmlMo޿ & 4zҟCO5gqx/Hvڮw5o5NI1w?᭸;A?1wCVꮧѝr/yasWrdQg5:&c!R:h.o<쇭8hv"o6j ]Z@t7Vm%NB';㹫4Gd<{݂xt>wu~;Os'" I8eRk_ Zsri5z<3*:$uڅdpV1= yM3%\`61CV'0u1wg¯8B[s<|i !uq'loDRڶzUj(f$:&ϛ wlSpd|Q (=eu-7T+?{5_Iv=?$wN 7eϊ)6X3prƶtQTk? t0[||ԙ8DOxCT^]:!loJg1tO|/E HâksN ^|oc9Q6#'݃/M}*d)uPGZW!ue)!cBu^R1eNv.Ƞ\!Gj~=`MWr°c:ϤG\mm'Q]L+`B&)}%r\4ؗ<|nGq޳_?nm,LB:Jy`qӁOrj)ժP22w Sa[)KF"ґf8WxDD=kK+_Xz/x{['ZPFR]KFh^+_-oEWD'w}KtN65.L`E0VWv{7ʿ7K`")o=B!B&Eʺ3DžOZWo}sg i/nԳ!!~=H-7.bxBCV gN7S^gSDsBY<^AS:1vl6WnPI$Ҧ/YWZ.}jx`~1DkooEϣa` sC`Z!PXW BHՍ5:lȇĄnKJR2I`+|(w(:+5"$kvFqSvH(Xn|xRQ)q.^y7a庲.!"JҿmJO^oC IS`yw>e4UpH l0&S)>ڃ9O6X\W'; * D $pa/lu0x :dg&AT Ph+j.V琋ƯnT"3`q.rrZ ҈ïvcRiPFMiN#S&?^,xc=^xo-uNwܗvV/_T#f5h[C}X! ڙIJ= ТzcBXqf@eQoG቏`ݮ޶G~8T6IGlzl,o#ubn9\ % T$KLd tkZy6dFWĊԊk"<{BIlW-Z:R|6 Y w0B{cхvmGI͗"ImkaqTfdtnD#s@QLBKZ^y7<:'W)IDW֐pg%ңPpMRq/].7_WUF\q:%la]t¤V ?.bsv 1QFȍ̓vC37BGٛļf"Եi,kM0FH8_X'ʎr=d`Ϧ[zzPvŋg:.h8I8:IshAplwj/`Svї.S, E q2+MsC-¢Ocw9egAQet&{&p=iRE=KyD6!~$Qrz6u9C3]5LWl\|)F=x̂Թ@|e9n7xY3퓶 Vz/K zQ@8u;an݀Ð#(RBdᐩFS;l:?3 k/@dR>y7*c_-xSH3PuOiVѯz?Rz4+7䋓{fX dye9T:r D2>Gs;iVd ޣ?F;s _ŅzVoeaiڀ %tHL8<δ-Ř!*%"r&a.ꊌ<ks\?84Q)Ly/ZkLpp?o6jb>Rn=`&|P3}jƄ~I\zfM`ǭ)T!V~iSaNvB;۵n8U,UԦ>pM$a#~β^K XfSxYD&vWKNReUc ۝R'"75u$@&;Qߨ`!ơ)ۿyr22l-ޘ*LBL]kl0ЈO䣁?%U;%[k7z~4@ՕYZ2uZr Ʊƺv{$u!_$)T-ov/_aCT&!(@dCZq"s.Z[Sqfjb}xPAeH@p̒Ďz%4^VYN"X t`B T7lk`*!VtO@# BtV *|6#!0Q`n{@f RN[,"$H']HPO2rw =ObiOGvY&H 07[$k>g54{:5Zm?:uA՚H*rܟ#6?JNUի$bD& j'TbSܵSw St2Ҩ cPcG4 {F=Y`%AxWQ賎NLd!J-QObc4JCǁ[7{; k8*|, ?7u=fQ[Xx!,  Nf_Me42G6s7gڂ~IhټYT?`|w6Yu2=z#Ts!Î/l8&ݍ.\U6lv]=9(8LyۛXDJtgyi&e#X'b8ryJcKgA!kX3 ^OzS.UeesF\Q0E'zklAj)>1̆8=b>4GuT_8xd80<g'!`$׻qxz )CBU&K` pO_1 =sƆ}MES21wY[AcB߁'Ukf `BVQ]|?(S_-E+^%'R>ļ3e ⛤3!;BSh:IN K0A8ӚLtrSӰjC[blA0kBZ̀ Ѹ0:ýoA/x +5~ܟQݛ/TXҊ|9#רՓV@Pwƞ,),CǺ,-us'vȥ/k_z- F 30?Jݘ_ \[/<̒x%aP{Pޣ5,wgDxۈqBqvlv3]:EhqlQ=u0-npd4< :0/DW=dٻSC=CAi.EG\Ѹ~<͗V^ݻ;gKѯb-[}zgp߹ɓِW//ΞT_nx)^YRMo^rޜ_YzL\^|-C>{G#̇6v^^[pߵLk.S }2@g(>u%\3Rh]s k6{mql"5z/;29. yJj$2k29KJo]|4^(nziIU&+]LzqEswRs 4< ڊ磣IR$dioC~N5@\(:XEaD7@:)H5}hOeu[g=4O;lPOzaND6Z*OvssFua*<1cD^h$-+Xg9L2h0 8p+jUrc$.^5k7ٗ@K4J O P@[O] $EPBVxBqܟm)}]$F@ Ԁo jS8lh6EF;&>Ƭ!naW(ЙsuQvP b!*td3"!W V~+ Xp4Q')9Yu1hiCjner)C@&.47|-~.KcaNBs"."=xjd*'9-˴ΠӁI.^@ oMs xkK rSߊ>Z~Vl<kkh9 a%Ea nԥt#U7vRMpzʷsP;1n3w 8k'"ed[l503m@wu-GTZf׍U8@E 0ŋⵈed)F΀ӑ@k:{ Sy[cTlߝd3))YgğqzǜbŸf5Y{3 ,a~ 6q؇Gm98~CK VkW꣝ZDpZ`qk7wèe|}G;SD=a$ED(ϲ'WHdӯ6_ ].F{UX8|d?A[lM|0XBwa&g;sKp{ ]1~Omfo\"/+Or2|f VbV35+Û-.Ztcs ^Cς=߳?X z̶~{t6K Tdtch-r`8o8&ugV3=nn;\*.2'$X7fkC%e'?ڷ^wÀWr_F&J9j8hB0IKX%3l)9U804hgUA]Mv7c EccC0o9I*NpϺ@w Hsʠ[ 28&0;Bsުei)5_ye@<_©Yq t٧gT|dal*yEB6A$&l`~̗֨N&{*G|C(W(eYҕȰNKA"OsMOX3Fjr\^Jc $U=NIgn[lS>$p{[pu_OkjmC &a@f-PLaB6t!.{pWl$~n r,TMXs@ϯ<ioW gf#-Q9Wo֓dẂg'#=Z.b)H-O4u&#lXB7"#-? b|m;߶꜌濒 ?/pxi}}[E+(iZ{B7ð#!73.iiX:Fu.ᨯO]28Cv՘dsqE;1"{!~5w?O81bhmMM?fS e*~~髲6oivf :?OyK`樉{ xGUF̭WC_'lA>݆7 I > !I;j OyO@C)x4+0UDC0|M 鼭LKz[%XC(٘UOd@ 58aɤO2q5O *9"gS .֡-svZja}r!qJ7H Fmw(͊IJ2N#M{Y~>!fuF'W:asC\\m2ʰ@.&$gl13 ä́::HȆ>=eFÛHȞu8=zv%i{gi)~ ی|׏zl_k/̧;^ޣKdvSUUx;F`t,ާqڕ^,0ӂv8jŭuO {v@خN vvށE"A0=t >Jfk!aHy?h+<O>n" W2ؔp8AFE,qCEPǽ@$!N ,B ;☁M@ I'G N;e $ F,@je>?.Z [9{@qTy8:); Ȳߎ@>ә2KsQeN^iU8 ߔqN!{~0'$Nd 951c,vf}i \%ӜIuh>E=epY9Q1@\费8Mb}Ǚ򞭟5.4&5<{2+6ϰMj -)HϚeBHUoթS6Pp]R;$O:>a^(PL[]ՑĨ9p yUq_;|^a为y0Fi}"fF:CQ"tpy>[Rw*?.S鸬N#KRU_R"]142ދ@pwF zu%#'XRCbcuL;0r >wxBCA\@«}qgN[k }~لAbuqj=>5j3bsp-UW;b|9١g#FW`ǘ۵{|GնO|gP}Z܌I.&/:ۼ_γ+@5Fxd=>:+FQzk*%4W2QtkHq]'c:ZzÏUdɔP5G6W0R͙\NWyݱ-!ng}uz ްgdڞ@b zSxdhGd3:[ P14s]3}T6pI1=8WG =J±k"~[e83ea*٘H1p&9Y?mk˟>!?gu|ƹ\d®K *Xzm}a+w ͺq$xοy'o}wr K) =Ixu$0lKACtn!sH7ХCQ@-4~hl_Ƀ^׿yExwnh>|_-Qyx$"=p[Z y2&KBR\{a.*A]t] .SG6(KR!ḁ~_yK[.SkG ǻ_Byv; Ih'=375 j_8rSz8">e?J_| Kp%RcK[QKeF+]8J$Er0꧍Z2+A1 k"^ lg =7M!OB`, \#qy.nf"#Kjkd"㳅\!{/84Z"OFIP/w7, N=5Q?=K?$= zlUAy1 >!ycll'* @\Ё#d@6=Bp?_yp1 ^`@B fP `$(Uء&F0]*!vmYM)68J n8O<8lL [f',_fh'9/(.7N0OĞH`M?Sw5GVztGGi^Mfս:CLjy](,8n|Đ[ޢz?Ls57}nlmR ]⪤ۜ9>缰 "sgYjOG Y)tt N3S+= N-60O%#v@g,r)uą3su)ܳD.Ǚ{gaŨ.S8l 'neMd^a\"*jA{?w^ rlڬdьv\ 81Ytu}Gݐ_C 3g؝k^^;G]jDS1eNEx-'oP(Vy=UYvuk݊/  co5.i1ɩnG%~metdm^nW(o0ۈѝAmW/ӠͰ6Yn`r9{q ȣoqh(d`T Rn`*U445 uyS%6H:~zL_ ;]ZQ-e;k`PHbh{1OS?C΀iJƋˋN9<9MBJ 2fgdbB"zdD!7/8`5cRGʕN$;pވ3 o㽲A>_>mP04<-< \b!Yu+}z?ܵw+ˁrZO3T>;Gݑ3]5Hn^_K~DS7m_jF O[N]LPBz#FH~zrS~\T.Đ>D|6m>D .aw6i!<`AԌ{w~3?ow%o)29;3MO$ɻY\oYj8#f?A/[~s9Wݜ˱_'խ`Y#0`aa !FeeN/͹Vqoiۓ7xC?{I>^Le%1Lj{0[o|qȀV|+H5>*pAk1hG,}"K1[Anj$?|̓9zc}E'o)Q䱍5TޅZQ\a]5yt0beDַJ.;TpË9W/$$lȟgUr6* qZοSmBx_r&W&J_'Te.]O‰cN39~N"ďoGN$l[1HB+fD[16rƬܝ뵃2(ը"='XCJ46\-v.\rK5odNacv!e)ԿY 2rĬ Y!#{]FI_8WBbEB{*kI2NbuU<<|_VhDK)"|*yqN\c^$څYOs)4>A.=pp'`ku oM<]DZ&s9H뢄 %Acj! hZ zpi)bpt9i-X{~n!3_]L<%KhhVZۜl1ҵcZt\:lKi{w+j`nOvjQʌ;G yN.(~nCJUzEp۲ O}J@8}hiU-'#W_3m|Cpmnn {O(;?zy^t2Ob'fvR66}O#W&=cW4Ę!;Jg~uU;ONY/Ns>;;^w5:#FՒWA@<3mq=w>o8x\w>IͿ^ $*|sKq綵ЄURǃ6/iqM*o?Bvʍ/Ů]nn#w290KR~w~[onP<.l7Jqc:,Nf1eK ͍3Npi$M@[X^pa êqVK[~ZйK܅$o΄;Mҕə'u]C7!^ >JCn]}u6›* .Xy&Dd^7UB/_Cӌfa K3g^ T,z&Cj;ɭu7Z.uQr6 NʀTIiy5Jl7!{5Bzar5؋8O#{dzGM=wW^|7x}z@ٽziQob/MG]CԂfqWhcQpcPduY ';er"\HvB:3$.ݹF K/'iIsR h` %F,T4*P rY;0QB ,"i#G@za/N9gxOv˪Scsr^xAYڽ.?1}GBvC (ª:ǜN4DvO[%U[(' Dk 򜞻O*H>q1׹\92Ov &\_jN}ĒzU.hnwo#w{2V2AۃruxVCʆ$$kp[PgwFI>_@KTl**̲j3fEn.1#2=cjq4J[l~=קXg  ȬY<ф՜9MzѦmo"j3"N2< XG{s : Nv9|DdH29>yAzs]ᯟGg,d1B*GQN}ϠԮ6}[#9TN&N?1:hu'TjGuM.nx\ČSu;!fD%/7\`JzO,})} n%e:nS0$q8L=w"Zvҩ  j#9* OnJ#G+$O͊ 9YԴVhlD'xh27q"Xz<,LS&̨mk$>'DVъ+"Yr L;`N^f"}Q_s EKk@H]u'8&\1 HN9 ^@6Uu=NTʻԋISHs#9C O. &S1sj~߫N[[>2~8y1^_3;D*lbs=To,{@9"=wUA5;l1i-@;PV&t>@̥w{D̗ J[{ 溾\?*=ZOn+qY-р_!.C<vAKق'R M$]Ad^6>3s>- # }ދ]q;ux{ƧKWOM7#) oW%z} 7dlۀW 4XlJO9[o\)vO.I<;W_`roMf4|[-u24v=PBZF U^E^U_>'VՆO-GO]u{!KC졷FpQk"-U)qK@}=c^RҥLvX TNu› }Ŵܪr.xZΑ}n+Wsg/E.]o_Z;=٧GsnvC#K3?yotCpx$V};Ry Ay@H$[|˲@?S'Y-Wm \}J)7R3X+JsOb;Y],r2_xϾY}cfhK !-I6GTkLGI!L8BVZ ~$cCdg6*D"]Zף@.~W~|\ʭFJ ݫ 'ƒ7N@As 82)jNo#_lW3z) WPw  K n͈Q-A= 5pY 8QkdqFH]KAA. FPHrkC!0Aqq`ZM|iIO]mҥB ?P'ĥ C|YνצJtèFRwHX)V}k<E~gu; }Ϝ*>ll8cYYp(`"^tRgTItjtLHvA.SOcfc?Zq$8⁊/^!/9Տ_?ShnT+͸ |ShUWΒ'{G6ג֫'2P %CȍIh>ƈ@*0> 6wN-_ kwL' dz{:fQJU slE GIvARGISA49\6,,e(SA~#j͖$<8I<}8v#͑$BS384:=$bΔ8x5nvsX>rezϜGrFRt.@>7Ł!3q;[IӤ;1V\DwLk M9V.f{H]\$'A#_.ވ΁nfx9O6N˝ <ܶ_(N >?VX\GȄDOqCTpuTr拗=,XB?t!~9SsVҔL,H^cДn_x-ca-ȸ?|>}cu3xGߍr|(xr[$mC)_W־tZٖF[;Ư JOwᩊHIVf,bڮ2&!?d`+s?EHA%g ߱hR% o \c#S-/-̪H(v"B{({XJܹu!"~+#gw;Wle2:e[>%*@՚7W\SfL5OMΆ^n?E"`?;4,T$ۗ>'f W-ZKJ١'sڶ3C4J\;T%"@/ٽ䜐[]3 ĥʰb"}!gح7=p$0]~fx!#zQy*᰾1$&k~Ra0 A)!yщypO dY5/^\ZOIxQsٶ*=apzv\J S~[Z܋{ F`2 lLUKAY>(1L/l8+(5kþc%C>h5Bk3/y\\MEª\0!_$*g0(zD)X+֑ā55-Mۛ݉)i-u-# ~|%ݠCsk,%S^DǍﯞںహg?N96(5Mn;dxҰ(wnrXx23G9lBSB1g97 fbȁd} ?GknXR qȏ vcAAz ؞Ҫ!bx,U /1XSSܯ. mܰGp~Kw郻mQi~K+8IX3v{Y-)g6%ݎQb kGo5ԁ$Vvk:x~hŽ_%^Y(Zr俅5AgHO{txw۷D2l <Eʄn#f̶ _S_R(pX?{|_]S 0NW^Y-s;᲼C?EnNT5]+#зF=} XxOB}<;7u8aL7Jq[ ~G">;‡+qv XFPrs -G+Ga| )9W`F~bj!:Qa:?ǒҼlf{@ %EXL\FR=\ Z%i&].FǠDo=sr؟Li+§Rmz4.i(z-l ,>bŧEC/*ǃcbWAFBuފ/O"Jȗ} k7bǓvpySrՎہ8Ta  &MVԦ"N9>KS<A>(&+Yp@yš=wC]74h' 'y~&E ?Z5TD:&qdԸxGө7yKg :FB*@,Hs/{ޟaf6#@[cn0H@j%lPz4=vuNp 2Z!288 >"3h֙ndtXo5 v6l4> E&c:֕mLud(ndܚe5߃D٫rvJ1G%`eh|pq Ẍ́a{`bc{\@ Ӟ y%JH-\QUn^W5?=Vdb0,KbX‘Y>4<ѴL0T 9;Pwy614%u:Saݱ ,&Do K!l`"M]ι˞\C7\WcrDG8 Ƈ(a wS:?MҒ; \E>TxMM/]޽Ž6|5a{kg)ҠC(1?^)Q11>2;A ceg\exq맧FL%kP[_(/_;́,&+UXkhX0Qmud=rEGI;M=H^Gxuܪw$ƇgS {4%ۆ?-=TQe4u׺I΢X )*)LK2v>ŏJ0ۈb /Dcõ,7^N^#y}:mȰW>˳\5mksC1DgbWHɱ]ФѢO-qQ蔶ѻ_d fN}O;^<j@gON= 7AWZ4Σ#oTߎ hk(TCˋ{ +QܓXN .zQTBw^4L 4_qy\ y%@BL-tXsCR |粩 >k+zʴwjnj#ʄw1D+Y '|]]FEP|XE\ў(?~HSQŎљk Lzy;M$VZ%y"|2E&0-ot@LqqW+&ҏ&3|"7BoJ}yTI1/lS9bͳvT|̈́fLUǟ+&Jݽ, m-upn([he ON]d_+z e!*(\S/vu΃(sl*_V9`Hy:uަuI\rx;UĜyz+> e-H%PUM4  2@IWbԾZҼLpcp~$8wHv Xl-Lp 2i`=5CFylH9d847A b qR Lt4X (:L8^B4 ڡL<6dtH8Ϧ=:lb+R,sw!XS15qUGI0/KL<_>|\pfkGm5dc"¤#zҫhepo&K:sbŏ1]=Pv"VKaqMh=ӯn9?%,Rݔpiʾ ʹQb*ǓwoYboJ #O:"}@vzkF1U]=j>U`M&EEpb6os[|2z#H}/!x&/JBsh]ep; z[Ri E Df>r޿xpTOGt<,ǚ.eXozvR Qo&fe`)#tp@*hc[93W?\a{m\dG}Hm-6쫵Lx}O2AYV=pFw<=C Z,Z oø^trLC%A|)&#'TQ(oF׳b05B"˖r季-ɟ4ŮГ: L8Cf"y>wKG"۴ "`~&y^_?u-NLE),ɥ械LLMf `V]\ns {>SR&}I2̇A-VnxS@ dMx11!8`ؚ%Av d4䩨 > ׿ůQ9,o8fY3I?>F Ub:L*3&AC!Ls͆n~BL:!nZ+Zhamz~.sS O;ʑihjۊ]/^;79z4OOA_ʆo^Tpi^ M8oκzڵ PICQ.\N U3*ȏ噛~')wmv}W5jWhjEr-)m?f o4#pS!M$ɧDyt 8?/ܜoBMNϫ' q&ɟYڏ@Ϛg1c6D蟡SCMIBz}fV1]dnP?-V/O.zz??{WDgWgqK‘Yٳ_ߖ8兡'ӱ;2JkVnXOlsq`AGWn<1|+M|,% cv1w?6JQ`s6B S$=,I H1@ qo>g%1{v!^6ާ( @MyV2[@JWWg]@{UڛYQ6HB %aToyh$!)svYc)qDAX1~s2ViAVF?oG\T3f\Ce+Yr8`(xS3}p'UFH%^ph~Ul + &OW+P}7gLi6up wFkա ]>pj́>?tRVz}H/iђQ!"x4+c= '@x.C{DSfע~(cMб`芻"UF )EȌ26FG L !:FK5@t]zD mFf6W[.T9`(ijzThRJ1Xxj:u!Z>M='ybyV YPT Z 1~< Es=?r^d"Tjq_}EEXܹ.c֟3 StaNיl͐c?ߣ#z:`-~3+ڃdۍ\D"Ҕm' Gh+v|ہ~͍H7n?El;bo-uA)r-?{Gx0{GJ~Ab3=bO2e0 Y V45G3A'I/eW*Q1z.DfcQO ;̕e Ѕeͷ: w°hȰ:*o bF>*7D&oi dII\Op@6^z511ny1t2NN}8O%:<+_JXOWct哣#pi+?=*LYAn`͟,,30:P?!J_X vaNdjӟ Dx[~r{W>*Q̽I|o{F89;<ͳAj7j{"& Ưx6X@CgTŅzk Ћrqv?Is1\4Ζ64.YtSe?f-1or~EJ^ *^HO{s뢲Wޝ{fveh#%b9Hȅ^$ϛrG\KfNv첖7 jgX ] q[i:P~=Xū+a~jwGƛ{4E r˪f 2׺ӽS S+ő[@ZU+/} {u9+7Bt۩LKp\-!'?IijH)1 #(dQj|nw]Be$Eq(^᜝ricϽ=liW:e6‘`n[AEX.:> D^h8E]y݁ض6Gyy1vIsl/ ӾQsl"S[ƨORQ$hX^\r,Suƛ`pG0k4!?m!kp=S D;Nh[ o|0scx^NzKu<2fkGf|&n.Ո7:dU֕4{!lpfнP\˟P~ǿc%SOߊsA; #|.@k MB+B{e%SW/}ebpP5HlMNX`Fm5TL?jG\Hl1^4$qz9⚚V4έ}jU 1"pD,IdpZ$9~C xd|iR,Պ4qr2'xNG=B٢tH Gf3r:grw:'ѡ{D)1I{`rA1b,paNZ8F%ly`;RX);ԷIlC,iA\FjÃQ'kF6OI_N4ܙJL;>)nLc`A5^OV];hi&V7BG$3aS[BcDgif}9iV[L,;BB̨y공wdh#^ǭ'u})B]"_0e7d@s2HG#&kiTb,c,0,١ls` Zq^LVq_oW : pQ=$@)˜j]CD/Ҏ( c:Ư45ڸfh*+Hp:cGc<8A 'վ xD2\N;k4|:#,tɢC?t.t.5^z`9a[#_Gup%7_zvM g㋘͵ T\dLM[>6(K fsc{WD؄ipPmXMy+o0?Ws/kfUp/'[Cu'=quZԡOxzlei9ktb7-Yw$Fϙ|iTC nה ;5ZrdG:tR< gxAEO\WH0yKgybCdJ/cԌY؀̙]B >6`،PNB7gϷ첪b\@:5v}sKAzzK)WyxgΞy|ZnY(G|p)9%cR\lA =BzF?9ZW|J'T<_/?y2Tl>~ /m7;ߙ\d:/gg{G~9$oǯ8ӹ-mt@yge[ז:)Q0ɽ"723'DO^ڻ~e⪄nRWZt:~NV{Ҏd}Tr_j?'Ͽ)B>pt%RB'&1;)t?ak<#kyWޞ]=K}^>gg?h$[A##":ޚQg^D&/T&$2᳭R[^,Dix{JrxdSVgZ8S~oV+5,:ЍHA+4oqIy8Tt7r pHt;ptѢnq($'F}UwIZ ZkYBHrFueWۼ:GI |5 \nG00C5{33ui4Im8OH/A=^-\[f4.JPL'Έ}~= /-c1A&<ϿS[ٿRX)d2<q{6i8)E3[oyi` @p@ kR9 (A ٘G$8~xl8 lJ`O](Æb4arY`(1iGمVII6 j}&XBrmFe]V&C/ yj2 @cWYv5ʱ-]5'9rC&zʚ۫n5]ih]p1 ;Wv8o;8ԕBO|4 J$RJsp{nfg1aO+^2dmndu^&^i"-rx2 ;y%N^z Eh I'2a<]Zߔ04N[)/tg>hC]:cDkGڡR}yFiߞ %RE5NdBθ??]QnGIө?{"}$fg[0fhQ]Y{~/%LiŨȱUA8̶ '(Gg}k0lev8#Pw܂ǘde-n:vAjR ٘ _fj8Hi̓ZKqN1?y )gA𒜹)⪴ PD<0 b{qA(G:X힕@̹#Kz4҅hL"j5H ִ{Q6:.xڡ:9#n0rg>@xLZ2[j?Uk ޘT'c:_@ ~jǁhKj@8(t@ 0'q|/@  H''½(`,HˮP_$l15Xt$Ϧ&l- r ?w_`&1{cއi;dog*nD?x6rz:IDw}3ꛫmGe)&akDUN KKbaH ۣVkd"dM9]dp10a:q|q"U0:c С={H 8g%|XzΉbѳahaa1_2{f70b +^QQ7`?e1< UB 2uu$mȠv<;^jG-ݳ w}0Iq!܍,=wrָ;%S58|{k]>|C}"x6&[D$Zf$>;>ϔ:MT*wyk*55wup+ТP OLw5XIȔ T~)# h$@wi|Yn@.Y,0;040` i O` zDΑ0M*;%jmt{;Z Gq"}\DL:BS?ԓEBD2z8= )yM Π+ XpS&#!$|$\hssl.OUj P֔ GB•qsZt*s2ɔ]DoS\8jZ.߃, dL~ 3Pכɞ[vꍳ$bw>N'y z; BzLiB:g;A[l2?ޏS- 8pX_o{j^ vNb@gb muzk 6I T\]H'rxm`Ԣ'64h绣'gLϨ 0x30B H_xGF^-2c$BeaS7wz3[8^3WH:^zҧ@g_6:-\24~Ood>M _ @; ,GgyFufC__;WMFyjR( HL&S(UXODYN^8o:ڟ?B+~ݽ M "6t@š/'ox7%WJLR@ (o}\>ο\${8W_əZʛFԯ X4 u# Y|})tMv8G tqM,Qa!D&yz_ f`+?B(/\eo$^p"@L^+%Cq0|G1@Hqj2 \U0H EOZ($5k]@! v7 xR#3;Yn7lם+B.P(Q~TZ&oEJ;,6V \J<yyxq>l)irWސQWDfԙQѸƥɍQșlMIkn0K[/:vc+p60#)؂j].jY'QsRհZ@0MfZ7 S^b#V+MqcM5Gv~C N_$::s>'ꡏh˨0ocUW;55Fގ26&G(|gԍS~X> I4BgAzy>hgPK3'8/5{ ϭ'~2 FjSLrb.̍ymh3hx&vwz! ]E5ڼZڙjx(e Q_s*,%ϓ>1Ԩaps].ݏTeaA@wgt 1$W#hf(+7{ 0Z G/o˟\!ob% }XIadhwww:۵r.TѶ?Ηc¥t>3tWޘ9oG17mxWg_лϼחҹh]*6AXF6Nyw[+̓^U+/W#ati%1~3}IRyhi0HL_$ٻ Ec=, lumߟqvMpR!W-cV[o FB{93 !J"7;b r[ *jhK"Y❰[?,lDGp|!,QL_tj!K@eOg:=+TRWHXr}1b/?E[{> ?=Qg!KPS') ٩aW6;ei.Lwe.u63WUe_(鵋"S˼\}WHIm 6{}2ݪE_5ҧߒX-.kjhw㇦˖q ߗs3Mb$>J}Op,e' d"E߽Oha%{|:!F uS)+]7[@8"VkoJAq0+r4uOPz*Qs"{CGOV2פ%& rap)$ĶzeihX)P0uQ"&k JT>hpV 4cuH_7al(?IJ@֋G9A?Zޜ+hB^ d ՠ(ȑ=`'OۀH㗊.f(1uP*w/Hԓt'Q)A5uL/q2rmk;M%A1~[.\a7c[Bz֡ǫrgF6Vq ϓllJ3 {K 7&@ m,&5޷ϜH~t:|VVw!cˮct^\ߎpa{H!hoV kBVB^i&;1*f6 v/a:mxդud=uBYFzOC jBn*o0ѯEզp-.aǣ*+bdI( ˆ>hEXVy ߘ})d*Y]* -ӽ)VWӠ #d_U* lU5 &LIJ;Y @!b۰N0ؒG\O~^if䟔 k-:(ڽ6iN > @Us$Y|e_y=h!Fõ$B0G|Kw_\7mbDYـ FHonr9C\jkQ?ϧ:ܠHvϤR4lA+pJ4InkZ7hf}ڍ H]+f )0Ɂչ^ȍ11>y־F_s}w;+̍ՋUVo<f{sp_ܺ4yk|_q*f&Zk{3~0Ys~\q9 _z.}mۉ^>YFZF_kzggJaP(NoU O 59ڂZ]O@Ȅ.3`̟_ +w~Xw\>ꕇZwDyg/={06c.LnvJ3XqI)܆Os4g00v\IDɏaow7k,sw*QtTINMms A_d_ q “@$?gװŕґɀ.1#:f s.&&yzsV)1:,u结⢪H'˨7Os k=" z qdcPJ'CQk\Oe_gHh'] mh,F(S {ܑӤ1f5ӟx.Ftu}}M֜B97b%5F3mEA?sA%CPƤ:[ -wmkiNoM5qɃ,{ع࿥#IBv0=079 {A] {Tk(0㟪M67|Dže}gc p3~YnS\ZR],*À*Ԓ"Q[$ejbMCU`7U 0 2 P4BSޔ0NH43Z8W*.=I95aUY@,GHl1{Q`@@ 3"w!sQĕtρdEP@|~7Ɓh,MPo-\Cg. (vPoJŃ54\ #C\})\#,cv/4_ }cgjh!<(Afh:e!BWĒ3g>GӡT8Th̹C<#vVɀ%1 kx6&B+˶:~u6ov&{XiBk1/ȉJKze , |1 ,jX[P5[LbDæ[&?8sT&94zPT5m}n, 54K#s8 G,);.y|Uw'29N%DÚ!#罀?,]zL,>RI$}ZNcqi+LKɠ dxD1r$I3|D6Nh6^|4Bm]b9VD|_NG *4#NrŞO?|}8}I&L_#=(2R8rӇG^B 泯= {JO_M@+$/Nj םd~|Qnk!qvϼ=u5ݫCĺR\z+ _Ar??.0N8/ UF7A@ ~}/GONղ8aV3S˧-[LL]n>p6إOf~[|  }굟vWGΥ6z"l}0dk6 hut~FT_lRs6p+7~a! \+cE;o]x/V=F $IGLp :Z\Qq,5޹ZzFc:"QӺiW\ O!afo5#G6ƶw}Ad}h*r"TϞ}SQyYJL[f+7>v%syts GtXkBCJKDVvF>2#BOze؝z>]=_^&6qG=UOv;ct㔓D`*QG#mMm ݛU1@w(_/O01ϧTZa7k0硹m!-T\h@Fq8Hۋ?c?8Ys(NR3#o U]~|>N#:ÃIp }'XV&SvaۂZg]n+$ ].֒ ^IrP[.AC'FcߩqnoF]#BEV:/_$hD&5ky+scHh]] *X9bL>)L>"At@dǖ{/7vRل ,aܽy^>Cwco9P;zhG"^B~{8,ò=H!^yEy0j>bo8ep?9ju,<juI $OWLhD" jГQ$D>Lljo!JPoܾHonS}L-1-f?/{wʑj6dm엸hƒUC1L-"&,= DGu+s3ۇi2pU|nRx:Xf4&݇1:,B+IIP7( ?io8dZ+>PN]"Nj8Oi}t421Gt¨W'Oviyl:' ?cK'^.z,$UR:,վ뛷ŻvJY dru.X)슂h<I>&aU5[!3b&Tmoj"c خJFI@ƻx ɳSiY7&'hLL6Mr ueͨo!qGaktodȕ]t.?͛{Ǥ\YwmQ4~n_mo9ğ8Pvt?~0p֦6wƽжUoY5YD1 8~pԝ 0BOxqNY32k$P?/+.2 aL֯bߎ1:C/ 2w&ol^^2[~ȸ&WO8rJAAB߫r _B'n֘~h-W>z&\2= /w]5㦰 `mB~*);qrrZaRx,@9ZE J8" >xz;Ƶ1ж±ˍruָׂ>i10@҆*˟"0rnsS%աEz9UF"g #g"!'^tubd nٟ>;?ӰIV.#VʋaQח.^m=n~niսƍ<TL]G D =7S>㸇eU!`ǑX`tޣߺr5EZ?Or|hgՋD"BTlOoX\z/G޿WQ&Hc3> 8Ng|V_]E:B4(S}wv ~vXGbw>BSKN|Xx_'_xK&c9;P E{tؿH'M3:0nx"W*10 샄rMq ,fG[ї ZI;Di0Df0flZHVAbwu6gBLо=7>~Q*iN~rxj=K2G.~dޠ!T(P~1~×Td{ĨwxfFUށȬ?o)~jY4Si;?%%l ܱބpl v' $>22Lz+WuΎV+q|xǃGyi~WfL.o{2p9ͣ''zG0|əjl!gcGf _ uH KA)vU? ^ ($j3=x :oM*reb֒=>72v$I_+q) (o"z^}2κ0ΧvP*??Yɴ&B@⇦r; Gl0t1p(NSC bp4huHQs`k<4T2C05QrB vW d& 0oxq"屋Yh&0u @:1-6Ɨ2_Vz%lEɌ"p0J#:筇ݫ$p܂RkG`?g_y Wh<($etGgVCVX rp0j磱`iH#g4Yu{?|wۏ/-^̯-sVY+(=~xBzfۅ#>|Yc( |8̍٣ŷ.#<3 Tۭ3-Vn\ 8Г}0co|<ȽkVDe_>g~ѭBur~ɲa e :z٠DC|siSr5AEW݁5̸?tquuA`cpj?W|2E~JFQ|`Z w\ԧfC Qw;&ʋ+ճSEb=n{ҮIf?Q"q #=mqIOYI\m"`H/ӿ1k$&Xc[i;^ysO볫|Ji >K|.}T-KVvZVn :}n}@4"T޸^~i:MoS|3י^;e>@\j3.=.c,__zރHvcRׇ]|7d;}\dJƈ#s-a-7~Y~Ki9`>$'v^s},܀Tc&RRzm͵M!sK3}ꩋ||G?K})r"%O Cg@w,ɋ:ؤ ˃ь`֜>USQ/Pa0(00pI hXq4{WmP oԷ=>=#4E TQ`nfzn oM1*"Akj)*ᮖTmbHQMg +w 9曚|h)9wal[deᵅ sX׽'ì#+y{is3_hWXt@GJnbn'YYt#wo.㸖pݡ\*Acgۄ/v.Pahn}q$acB 3\_bf{ذ'(,gKUkX7)q)>7|O" ɑO$E'}$T{:Z#p64} 4 2>x!3Ocz2Z9Zsyx>/S^IEŏO1KC$lmsgB g>%0kT;d Ch Xzfn=tN \@0$]"ʈoo14j=".ܣ$.xlވ_9fl%.x>lU]0섟;r-RTea{{Q2? g}^"{y]bCΉ+~̰ mEn8$Օ3udPoy0Md1\&V'\imFB\)Bޮ] (X Ͼ`uݰc WЉI[ag]\c$8?)sn EA瓘Ǝ!:1yTo]!3a}o 5I?|oOg GO:M]`=T۝1r''Inijdˁg ?ӎ6YUa]=[;R OYzxjB$O=_f{ZQ63?ݻ̝{%XXkmciMO|37O~;J7)o5勵-VD٘_"oޙt"\ ksgYuƗF۳v HNrIG1(ߤR"BP4cB/SwQ499nzcGc`nF:x/< wn:;-o1|1LOj|vuھt?w)\s=c[aL>tg뜫IIB%:b< w}aE;^%o~vS ٝo2iX{ 1J@R/.f5q`%NO;R gj PG#V%ÄP= *QsPnVwVrÃRz$Qe:v|Y# 7Gf5qq S~ W˟dv<=Z[ud57$ł[F* aQ4k%9\ @<6Z1ZYV,n:"t6ϴ6kܥ˓uߛo]+ףJ5 jYZ%d%mz 滐7%DD.ҫׁFIމNH@dž_y W{32>V1; lE @@2B}jDoDX*슨"ǪG ʏU6']ʼnuGņŌ 0Rh"N+ڸ;1AܨV r&ۇ389̜Μ%x87:dh$eeszwF^;;}iOB_"+v22)rP1=ޛg{{2M>t].S^o(WU/r)F~8Tkj<Il>lh:?svvFbUXٛc!?!J|6a\ČaQkTZAa '[nBQA!1E{ ;sTj:I5RH0 L& m) pJ]qyRL9Lz'8o ;.gEs^aoUױ9ܰN Mn76sirecCpr]٫xGԚ=: cό'=5OB͎ /+>SrF6 eҀ6T 0 >#a@R(uiMB4.CMr4 ?؏2Q3s3'_z{$ G4\iʝ1Mұ7Ln\"7{Cd%  O5j}R(vŸ?!K-ꮩV"C5+7>d~ x #M8a=kןt.35`O>oyɝoD3ZRx]]un^ַ" HudvuGkB.<(=KH>vٗtM'lgCKǷgZw2￾5|hx8%$}߻S+mNy[hyv%gSEҗC'ES +^SǏvU!gpr% X<V??\>A)v:+g5/ C7gbw{]%1ǧ1]Z7\mUdm/W-9:oesFI @^)c/<,׾,lYv \/|w:S x8Id\z?XPqo|q#o>dp.cl!lkLe8`6xyzMN~wRf4 @f_vt4^D}+Y *W6{w+0&eyL0 ~nЪ/fgZ ݣ"U3]جY*0+5)!"RcŦGHجyx&{s-擠4{&iei{ bv`x Z8aLxƈ .SS"N 91ɈGZ<G&si\hGHm\EZ=])TG,/Njybkhɷ@ZS2=@/ 1]tJwn@G^ũ9 89K{p sVn{%֕ypxrk$^@NT見";@s(/ Y!i {S'  w& 5nگ<Z,^4ŐWxxu%:(6*#:u{u}}V%`/j')C1Wܴ7?BxtIan{HKgDʕ3֬Gg75b(>;tbZK"]t:f~GgXZI67#`NA# Rto^q9JW2XhᇕGJ9'릘ZNG߀^魃@5bؐ͵*I$}tC ϏA>HfCQi:i|3^Cwα>t20N3wl*xP9#r &-%^TМu1_Q q#j[weY:s"+fX|hOz*%!H6ǫխs,W,N]@QR{ҪY <Z߭ҞOvBRyJED8O?GGIWf|̱)Dcꙅs YқA_}uj8O~Yٚ #-df2Դ'TeVab.__-.1u׳($9jXİX%{`}O;1Flf~W}UnZ/W/?Kq;a%;E![jD8n{NM^/1/F%׍}G+s=v> ^u/uot{֘Q?U#G`Bh A1eh*gr4N_˔lqϧ'l,o=$Ӳ+R8nPer^Xww}x&ujvX`X$~؂{Hᘘ2Ԟj9M,wG?_t6ʵb'O9+>Y4ʕoui~cM«oNSBp#$/6/8ux~ Z^ZGp>}}ٻM'pKKߌY.]]0*g~0x ;7_$xRP?}s}іIb^DN`}O?>~\>O +?\#?PG]Sl{5+9k4E-^TV9G( ;&{h<8ItqaZ6@DkGΈ:PMizLIt:oxOMƀ3|+/D&M4N}S: o-B ?egX4(H VP^nGxMtqh1PLͰsGM˙e$/viY9|TUQχ7P>z~;99zi17~&SSaCŨ<@fߧ9& \ۆ]T7 hfF0zJXr̪"f㽾)g10 E>/acUСq8S}Ue:3=.8(̿40q5lS'hZTk@k&Spԁ|`{ Mx^mJ[h'ń~W{s 6ⴞxF[c kNR%.xלIWn:g|ʞ T'f3*oc!I}bvǔʓ91$¹0G'{n%+O??3噅Ҟp*3k*שqIr=[ BΆ+Dt#u) hMw`1즍/\DBkJ]hht=?  De܉N_A.Y~&0 :qs=9.Yȱg}\ÉKƸVF7U+yDW .;KkCOwi zVa''~W_U~ kKa#[rV'TtɮO(,0'B0 \CgܢϾyl7S!<;_]47\>T?}kzW}߼[3ᖶ˕,O.;2 KuÎI8u𙫖v4!:g<~bW7y}hX^J$a~)Ƀ"a: GS39CIUFtvk"MΔh47/g+7}Һ & }Q a CwJ[m/)"B-xݦ77-&Q:9Aաj=) }iʄ:)CU6hFL-Kkul?h~˺g ^L7Vf_4g7jw'뗯qLy֨dzҙPI ||~~}u@@6ƅz_|+C%&?{b~eW_wi5D$YRڳ]Ι7ٳqL~B !_Q߽Q+%y3W0$LK(Yj׳93T1@a9m9zUG> ˮ Jߡue,( s0eθ;!֑9hF8mݥ/@;L~:~G~^Hχ{0}`덳<^VE)lW@+ )=&[eψU7zm+x!_;m3}v[N;SC'] ]1F fH.\s.w߹ͪxTK}FbSTi^f=ZnLif~]xHPuJYLc<1*:Ga1mnaҜkuܟ;=G=%c;!?^3r-,_~y^R 0 3*♉`1vɦB Il?<2pAC*f!P8gzIUҗe c`t9g@/5{NIKWP͠Q"~cٿaE.͉z\Fe`nh@&̞Od)%3|E0[4@Iu%X"#s .y8G'l4̳zrv!xSfdm$V!1A&DNJńuKh]K_1PRÀ{1>BIcoscz\VN=N,^ ubëq]%%Q:Fl$rjFA^[]"*Oti<9w.߾:vtn+29B"DE+š5OLuXs!` N /)X]0o$R3JSN!ZQ*dMLoBxx=!_LDRi+NݽꚙrO"/9P<4pȷ `/HěÔI>gdg ܡ[ pi;Ih1-Qa`l=&t#'+rߕ1U[(nF8V~C[C"$H76 {EHN7<9)yW8,f:!ib #FsEdvIbOrUO{"J*>},RH%PU#hyĘuR|4} cbFA$`Y.W}:4E/f>,"|oЮ>rˮ5asRBPW$6FH#Ak~ԋߒJ졈_:0[㌇Qh ݎRy-"%E> 5BP}>}F!>q,g:Xeӓ u,/S(WS_ {@R_j?ӯo>s{A'Ğ9ꈼbGjzAUiCg;6h,6iD(1&C_+T5rYiuH$#+m<,>_lRf0v{DQ"9YYcބȓ%5QV 3m2Q"Ykm~UU7"ZWSW`sE{8G-";Wިh"ռhMS⡜p}7U˪!-EU+u$/*_S@aItdszLUGE cMՇP48oKT@r]?-7/)a; rUWipb=wO?5V8qMB"SBь$~]~+ya,=<CDN驀0ji"Xh0|$()ÍpjsUI!k+mh;`  A $pLm9jU,zasa7DUL-f6kc7/ ]! ȌՑZxlG#F!h0B,N}\>"[ aqS D)R59^=>YL흇\>ݶAcOcX-irG%S8УPLoձgw(%ܕ$E8q{uV~!X%9N7䇊Ҋu[G2y bgUѵFA6kp b8xx5])&Nja1NH)4L:>jЫR3VJqm协xYT/#f"7͵ _`&ThM%;J4#1rՙCvTL&x Hr 숈rfp" :cF ;6NӔ8^X*Tҙd#떂zs_+4P7N\|r`bl2<j3m|=MdOtz>uU&㈫^Yݎ^}9~TʤzIQ,*4=c#,;s3?\'@%Aկ×ΦTr|mew8 ZWwPzxv<x:wSYg*}9ZIjC4 U.YDGEsU k4ki&H \H4=\ wP \JuP{gZl]j{,>"l[<6}ΐ)8'H6+X_ Xo/2noE+y8y\#ݸ&(5T&Yֽ~ ?=Yħ_=(/2ҝ;?佇hn51`8 BW?{MK!ʄT=yUS%{_PQJ/_{O6x՗s<9YU[.d2?8Ze߭Z/vn3gXܽ pS_nNb8Cԩw1~^U\CwOYZs7왒 ^ͼzW־q7~+SĜ@b!cnvy`>y󏾒WWČ7.֪E8^{1&0ع-^3="#Nsg(Ihu}T9`*_yB;S4~Mr ; pYJx sRx ݢZmi^~ʼntce1}l1 9U_K.{T]V* Գ ;3`tXFZ஻4;ŨxCirr-mwjwSl- 4uW;3\+y0bz;'ՇESEO/b3pvדpA~N]$LpC>.r7ivVxH m=|O{z(?lSag.$Yw4^;xɹ0/g0頦O*<Xr{.FdH $x3l%**d1|.y4jgMKS8˫ۯB__L;$}ify0SlP4XCo4dL('aX.PF9R=$30` 'LdM>{|fC7&MhT47t!ݞM}DaH+8̋J[1,F* (2cxeu}t6і$݅_~!?*lW^LF*,)W{jq^=_ixٌ ŊU̱l*;Cr͗Lqw5kk P% I _1 x0K˰{])-Y-ЯPASg|^n֩ւKȅ"D4 tVZӱ䴀`g{qZmƟ$a#bo,^O qLʖ0ydpt>9] vi TDc^TgZ 4@ Jn* Ռtic1~_Vcn9`b/z6c^Zh TZ>~@ع΁zk?:xeDL" `b.#Ǹ)5f%AO &0_sPh?+0A X#/pW "SiO6}` `dj ߰]LdENj!IA]0~ḱK]&CfW~%P=:}y?Z7,_5/x eA獕@!yC'2 n|eF{ʻ+،F~P0 Rq/hp9Q7.'sAx7ulnF&me< n%' K߶PYhXЁM 9S2^ٞ>P~p5xtd-E)W'i'>qhI;@ӐۚN;n:Uzҁ;u,xh YEk- {!2ř{.vpPgD+vLw:4c6%9K.%9`x)|NmAb쨡SqI0UFcsh Đ&S@<{N³8݇Mʗ6/_~9G7rݯO{fryxbI B,;8[C^O;mvO@Ο;vUHW.ܩK/r3R]SKM_&MQYhpS~8go];P:EHuȵYutpQ u]BUhtoz`Adl<"NpClc~Wћeἰ=`u6@/ \«t L@)VnИF-y+zwI1!V@Tu@6ļG&se 00{ͨt :B5j,%V#e[%( tS/|cO>.%,f,UM`P]AJ2֑( 9e`op{19)j׆ ZnhT;0LŞ:].PҍOSc%F fOۡr4oD)W `,G'Lb8vDڢnG׏J4T0H[a]yQv1ѕ9{ZTr*V?z}9M8j:ν)E5*ZW]GUci P'Оcdp&!UpAzX<ƋD@e5`ゾwpl|t;;އ uj+2݆!uzO"c[φyW鈁_y"PWk_-9óȌ wn(l6>/- < ~o;wDV[\r6GaFY%1΃֮~X%G0'_ Rη{&ox m1J阷ȍ_ 'Wx)f6{~90BY]xSiՍD7/-|]g6:%\F?:M< rnla\h-'0һDO{oAM,w>G>1<dI ])ӻlwu:L}ojI. =2_\DdԙNm ;j]6uϘ<9N G5?[aIG?nsEgp׏^R)պ^uNJXl<s8N)[L}IB@lӦP( fU5~WN;c"{J a[. 8 `æxs.~Ie1&Ihc.ٝR @ډY1?&䘄IԌ P86DL @+Bh@؈qqepES%!R< F\?}F ʢQr:jbwȸ@pπ@iwcG"8 2X:EW98{Vz ygR, SԉԆq\c? tv,x E3]P.RylO4 =sh@yi#CZ➹kRX!E h"bGYëPTCn15pTZ,֜QKr4cƢ0M'O U!LoezOIx&MG?FZ rNKrz1STVDF`4qowW wEch&(ɐA+ћt/'K2=V=I`=a:XE\94 i0FS9^j ~p24#WC!=,[35AwnMuVz/:UK[QK9/ODwO{5nؗ 1]=JNOj26W7MޕQ"bLG&S/ɇp^Rh}Kۍ-OD]K]A.3D8\Am1>s=M[MchYӞPBkܚ".WJ@{*Ey0޷]CW[픒eU9:i3wvs4=s8퓻ʮm?V̓61CQ΃qA1Ro~4<~hZ\w#Tvqr4V]wR˦sjs^[9g"G]P8;7t՛|2Οޫ~9ZT9Qg]dv+?ʡNe8Bg,mWj3.X OO{j0NKx֕Ʃ!kd#żC*[}3`" C9CU=I_~^[ h w }w>dwPip;㡯*O剻Ƕ''cR k;eUg];teinb/Dm~UDs5r:v>V~+è2p;s~xj44w*gyKTކ1 iKG´Bz?b6ω+|#& It}ʛO}GWoO/ʏf{`^~Z< Pa\5Eonk&d0n+T}ŴcdE"Е0;_ nmDK)>IP=t6V~םh"Ϟ M?`VG\𮵤ph8jhKeы~}5qߑ  ח > (gGk)?;,K(wOk1{#3IӲ!vo4}PZnP0{hXΜֈ24.FX]CE1l;}"l0{5TlL{(.)7:n'E08Kg<93Î/)bHבh@+j M c@@׫ieEo[Cy>zC^hH P ~: aRc\jlC*&-Ag;AÌ c2}o(% J%׭q_?,cGL&+)r3y>FSv]SmGd?bgp\ k!7nqcE.C.c|\(ըݎܣ.ZF &`< {.m!}G{#=<^e@'mh8W*Pq67IپXQ}Vm0!}jFO3d8W\Tb$AfGk^.4;5=O ;8ًɂG#^Gxn8aӊPښw/aJܓD'3žFm28}Fj g2B燏 k CVW W#5XKb lL%0bSN}N򾜇L8Јge6n3?}:ڑFeqX0( :`va oioD2z+y^ڥ2>/)+ P:bJL+2^9>ru6(PGz`4Qnb|I ~!RAk>|; ˗ʸMɤ 6Ż;0VuJ*U[4"`!~Pp][aʻj+KhZ+3iUT߭>% G.vì#$5n4lG'BXk>E>do Qˊf !^~^JQ Y:veZޅ+woqTJ|.~~&20yx9Y~HM].`'lGl}Π47YMOÁ-g|ܹL`!'֗z1:C+iQɝ(?BVVPyy?[>N!a͂tRPm<æpLR.ZF^v矎=Uę0gw|tB|g' G]l ; K\5y:~Y4d%_qk>K+w!Id5K}Q7!ńIuw.DƿP!~MzSV{JsV*dSY<3[-܅{͉i3%n$v7PpxtH ->xrfm_'16> ϰ,AOth.F,o xG"mMrvRMonW])[ ju`h+RP#49\G4d4]  aa& 2\2ݗ&0Gi)1<-X>r@Dă h9}qs4f>h-Q¹@ ǯVĞ2}=C<^XXcOIΈ3m iÈ( 5ϔ)ۍ4$``q։|nYs3"J1}[>y)29Id}UZ&_Ӿt2zv>'}6LQ<t8;apLakV>~r~RtT_Ꝥ+1;BʘͺFrc%l`W/88q=`Ն( =%J)JNևWCW1.|bw.Ɓ⮅@BP|MѰ9h((+\q ge_ aZ`<^5/١[{H*7HzOn:yYX9=8/7v뱅+.{c;GŢϑ DjȮ'U,2!Ns0 szѨTΝ~mô9JH4V^#>(q I ْ> D=à  ՓvY ,1kb a.H@@rN|cHӋ RiBx3RS7Kr 7FV5ǢgFtΑljh DMI@)Ex3&PnAM]N=Z3Լ(L,/.%QܰF'jPjSwvcwNſ] `+#oc[ŃpPB;tayOjhx nCΉRX}LvXL G6g{[Ïda~k3V"9K,53N :L\w@u.QxQ|qINCd5nغh(^E#1J}8d>)JVk_ ciO WZOڣN,sͼM%Vʓ_'Wߨ~;+jСJ6?}bn"}:ßaVYn4{1Qi;mw ?nYA#tvi}rG/r@4jaEɽIvChc~G%[١l1Ndy]\Lzm)6Vd !=ķxd$T㍹ 82_zaBF 48J׎Tt4rjIKO|y퇽hoP>ML[HjK7"zFmWh5!Ų]5Mt&Ӷ_,)}y3S%oEi`ʁb(4Y,lތ3Qm`xO7![ ܺ]󗷢H33A?Z~ՙC|`P.W^|6=ku"3RȂ]984}+UiٸDMSxz¢tP[-H~iP^펄M\tH(Yg|NQdHu-γ%:MȧVi^v@4Wy1/lׇJt;߉q^x|SqH~%(wgڴOd0'rvK/,w{4L~%-07}J}6!˶Yt6lo4~jסBӾ+Nxg]b6dí(t3l!;3Ji5͇ZdQX RX9)uxcdK ;2?`'}ps$imG謉 cJ \;Oi͊Utp#!q.N;q;=,=>,^Kf>jƿ \tl3FM1. ?G'znm\nM7^}D!@?(>G 8VMHڡnc6,1w,W"]5]~IH7TL Lk.$̽g ~@6#B_Eo 64~4贯^*U նG݊[4Y'@QDλѧc+<ۙ;w|96^}g BaZ]4p1 &%`˯wمv6/ԞsYPR<;V|{%סw'+//llqI/5-*g 󟠵B2?D.D_pzNA,XhXD#z߾6K_^RJr*}"dSz}3?7OMdaLLxfP-2g=:݇-e1)uKp`[\,\Lsl uP""v$"OgKcx8!H8 O5|v3gƨRprmZexk zsFkiLCq?P]Б]/0zͷm| .=%W/k.< nit:K-<CG`zխ?L'2_E|-t)(J[qȴ|u.k )k^f#GwkTFA#kRU.?f-zɍʃo;˞ mgӧZuclym-BĀ2\ӷ 0^7}4"-nKFFNUbΨ,6҃QKzZc5ts+F%Ԧ ,| ;e &0㩡wAFh%h{fd*%<= CYK'ޔ&A3QR(NmIr_37:d&qڌ-grQ 1w~/8[1?IidrhMHCPOԜ h]`*Ad2pA5{ 9:@wt i x *jևsrӉC ]jHpRpV<;ٙTM44&ZE򙛺odzi">ƝW; # WQda Mٔ@~ WXyNUڗZ`7>E) -9+M0?~IԀkfp7"ƲG 4z1]ƧU~HB3]ظƴƦ*"`ؽ?iu":W#.s$dTg?Yݱ4k A&u%cRf\j&07+.= ,R_ttGAO: %K81yVMZ!/g}]:㌌xOmnK`xBsɪYJiT8|{< _hB~ZFO_ַrfd8N[E4b{UGsRNG@:9knnfq"!>ÓУ[݋uL{[ } 3vɊGP}#7{#^Yy+ꟙD8yS˨lHŠ4B' $a·1: EqH%2Sn(L;2ZvL>;+ߵjpܲn 0Η/\^aXfDO)miT7*V#o uϷC[$\1%.j[_yÇ9;Vgz;8:g+(4=As3}e^PO>avp2V!n7F'Rg?yx?յ'ӱӛ-RLp# cJ||ఉG^} }u@Z^JS퐩=Ĕh'>%Yz )(]?PgZ.>u3ތ{72bp̆bޅ 0sB4`+\N g8ъ~u(-a7E͞8nv*5|&64p?0d% Jߜ ڇH hJ.(ڵAye} bP!+#=' Z<-+# \hL-OD@cCt^*[]kbtN{A>#p O҂J[# 9E 5Qs؝cxTR.Oyz%ɤb8'!f!`u)]s.% sŞ|r%Z0{&05SVD'㭏YuMt>y@Hx"埆Y{i*Wn}{>zrL:đqgZIݺ.ܫst#YD|V׻Wyj\L vޙ\R)x߉TG1lk`abc(L٥I//FmѺٹXgTT|ztRY3 (fuSu!sXOBOx`.O3P70lm0AԳ[e=4`8"fHXks">\y-C55\Gp'gb*e6&pysk gX>hZ=]eņ;_d3#7sYlԅ P3M `#ɰ5דּ¤W$q-͠j^DӋq-['y<Ʉv"Ȋ%cxnPͮ!vt:1#^3d\.[=z\ykb2M7ǛNpt-pK}qv<96t._g|T)S!#潠)1|Tkz_eӎElCIoS.p{qwPv]Ǟ 9#}O^b5 AK\w“!tVDdQקO}@`Dk@4Fl/ӂϼ1cfڵhMTSda '{Ҳ/55%B噀X;S Ec=< 4lNqFxG`R@Pt<n ?lib;]UrSUN7ǾghH $y&,eeX2" $Fi4ӓ;9ϩs ;}̀ Xxm2GQ3FO " NѳAS&2U^܉-"KƱr`i19I؈}]MNC)È/ ?4yX7yͩEwf/HQTP :@j%t I[5q=GbHzVC zigM|B).g}GS4ΣNc{M?:Zc,rO$n x_0@#>P#}y%",8K l*nƌ-u#~"و̧! (O=0E^no9N'hQnq\Y=x8ccEh}Vt{R -gebJ9 UՌx9 z+Å&2˛vu 'sUSeߕԞUfĐ5˱wgn3~o'Y0z7 2̒&5!isYɀ[!=,37S,uWK!%,8mJ^QejzXEa&~vJAՊѲK% a~ؓ`Hi亮LSoҪEPJ)m<]ub,jջSn[ϡFh0FҠ$enb]1wcxyK0Lj;b* []Ohw ++ "޼~t3C`ˋI&4nP(^?A#k!J;Bk7MJvId`nr\OLJ[77qQ&exM}QExgc&?CZi 3.ꡦ)8edYo+ rEŨs6%ޟI) dO\Pl+GOEW7@R$pNfgSDl4sn&zo2׾z`ӇO1Iacoq<\rtttkW[WoGfܦo 4&l1AvI@hR`~xu:w{tݕ%3^%'J'Ö7fΓw5y2l@85y\;Ľdi~w:yȧ]ahڝt_} :͖utrڜUFSiɡT@YCiiyse[8Kه\ϫGdX@uf<[F装&^9YVjM+,N_U1_wW٪qfETs]q3~.J_qxqq?Yw?$+nQʪx~JoW7=k;u+DVn=_#08/Vv,;ϩmMhɵ{ރ|62cg:12xHm͏5ygT&YPV08On`]Zy=ßzsg0m!겉GxμW е9׷6`@.gVv(!ê-\;x`P_/3[7|>MLѼ4:Rh_=~H9u`sI /Z\LE@p!Y˫cjioJjo'q*bd"Y報oϟۆtk˼E8ʿb^J'ؤ4~, Mc4M/PRiM%ٴٞ#> /$5ܰCP䴙 %a24B=Q`5 $Z<<:/$蔧`|.0LY5*5nxb 7 >΁CRb^*Itoi]E7Z.4Gd^ ZB>{\- c-)txwbv0p:ۓ < oq#[XÚ ?QoIk޻z`^¦jE]hRJ-̸Ѳ/p Ysꉖv]j ,V}m~dV S +@c|\^|%]$Tjֈ{u:-42nHz΁|Vޗ@:SWnwJ{)XKο TROy>ەܯ:I|4/s ;-a lr8QU,="D5>k׮MӹH~+yRIqKW ĵQ~;G7>l~a_&2>Twз6 ^ Isґ̨bp6u mԩy/%CoE>xUF`l9#ج~0(U4zybH{=o}D 5A/?jBw?R~l)_EPD+AC\w&lh)O^rLϷ5Onn_ku2rNc,h03[%>B Y˳E2L=Cm;Rӵn<6_$&VmLgr/< 6?Ci6Z?]3GXt >Nj8\4̓0W<#7:(F2t k5EFۋ!O!$ƥi7.j9qx=FۢQs>F3͗A%A4=XΈ,#N?^HNS5$R ZSvH6(nvf&XvDB(#8 'mdS"Yz0I8,ek}syyh,IQ%\G`2%2{;=f!m64͠<$,ħnov둨 {8Wic0y+B]TN:SPC KY;^E1~&u_!RcIB(~nVnIG]ɲ|SJp!\IՄFWwf -ĮjM۵"^S>_C[#}j.s(L=CRk_GiX#澼#V\yʈrO$Q𶰦g{H hƽ]pTb`zfчZp)2Ld1QsswCHn pe":kԜ<ÿWSC*l(V2 |<,-,7%AY+ َ_{j=7ˁQ_܀\mB6mcO '!ZD-3 }l)]p>TCf 7m#Ep _g TtW `-X4Ax5S%m&B1f1TG^o[ߎF<<> qC۾WA'3jk %͆?UhڴSx;,Aޯ-8R}nh* RTֹ!VCtd|e=?UޖSo ʳmG { QI?m#:!Ul =>^S|C.[Þ9O$:@n'tr9+(L+ [ nx3ߌ}d_2 3aJnM=X tET@qASw&Y^#QŽe$^rY"hng pq$wba]/G 0_ ]i?ܿ\#suE]K'ϻgU^F k{"9GtRFL,Yi/t}˪V[9n4(GKtN~9Xfϥj<%UfNr8pl 7Q{ >#}W~v'MDeo$ hjғϸ^Չ {](4]{So4}NcG=̀f!;KBi)B!Ba̫_ULJӽ[)~ї<!\)5vc^KF#ל*ض=_A<1Σ®9'/;urLzI{ bl{F#I1Xëc==꬀3@̃$>H&wUTM#))dGqU1/|BCIɹTD'`ɋ?j&&,DY31I>Yx wvO }v<&/N]+(lpD}&)O  )NR>Ðj'#>)C\Goxª++p2b̮&ϳؗԧFN?qUC+!SιFQw:|t1G%&*Y ~?~3_ޡ!pCVG'{+sFcr'Z+,e\:UD9uGI] >MoLį?:6ɮL;?):n/3ODDa#Dq<ƲL[-2(Dm.̆n$"Ջ.%/-Az׃~%E9# _<4Zёo5Sc,O96H4퀣Cf{zx^83X)F|oW?Odr^ĝB@[U+*[sgOM yor1&m b|ו}$.uӀ _5NbPvM͵%} q2wmfii aiEnVM3KId[lꚾRޱ vԯ܂tTWY+qt&-q\0ZTtr3!דyQܸDib} CҾCF-iwz=ˍb𯑬rKY0Lrg^7{{2[<{!#s< |^۔yvy xq{gp vߍFWn~*g8޷ցo2174ZJٺ}eުG/yKs^$ ywW֧j8g{N9"lsw:ܫ}i!F(fR`ÌpH8ϹBܴ ą^G=ܺ6⾛ @ARiW頗$!Ce'h.uxI.ˈbL=G["6`cpUg J XqD$"=ɘY} 19 OS|`5|:Z}q9bNqaSRԥԗ /0 u$c+9bz^p^v$]zH=aQZ+=۹9Nzi9򜲍 Mmj5L>ӓNVZBLdY+]/)(;6??OX2$RΙwȚ8+\{tp!f_}{Q#bz{OWNoAgpdrK@*My=hNSfp-A%$OML'0=ȰtO7+Z"3=E-#r2K&oalG%U_sC0ȟKt@ b͏7jo +pJy<|܎%ПZ,ʞ }9(̕0˟VaE@[F.{&SyI96A2"1K[5MoD!6V.‡J9|.l|z(^Ti`@T\rTYffׇ!.6>lϛbf^ɧ4?<1޸j˞sʾ[>U6;p4ά|+7sW,;9}t-zOm!a"Fd۾TOO+erclJC }Nz<pFrf՝rˈMRw1Q?v|˓K{eܑb3|UqEtn͑Lrܻʻx n栿l(}(a+KhuYtGʯvx+URӏoG%NYo{U&bߘ;Qut\rHufPCޅgumb$="doeU۷4Og بZռISߜS{ɃjKdw_3 JNjcmE$_?׺gT8 v3Ⴘ۶~120jp OnvBpUS2ʙ@8E;r{ϭ՗Du0ǵ J2|u8"ezj:k8|P4 N^;bEyκU:{R?ծٌ=bOɝi xyj\Vw5v>+[Y[ᛇ1 ^BSȿދ.j[$7gx7J ?XEBUk$smk.6OvH\\Mo g.;V_1֨@`*FܿP.}?*f̕ZwU)֜ tgZ h,rN. }]q3s.|CIKͲ(!gWG^oOfg:7g4qYkdj9\ȩt=:K)_TbpEo: N,TO5vRu%ܥJ/y4Gf*I5p+y?1+Q|$S%c$"lTs8Džl6 kB Tfg.إDws=XhL7+#k>eu䣣?6MH\\9[۾'|*̗"r gÍnpԪ9oc[f۞8 V2vBO]e KЙ~3M2̌Mj͎}f,t[ %YT)g\J-DB\.jVD>틮ϑҫk핦2׿q@5ky!QO"9 DyΫ7|oƊXٵm3UyrɳL up$; 4CԐck {/O[ţbү/OoKN}~v M %hG`\Y;{=Hԓܒޖ򊉼?Hxon2\1.J5}uDzy- fWAa+T-aųE(eg6S5``@B(p9p@4`y2C {O:z2r s fe乓 " L!E>K F CŨ(N{ rtLƄϢdzKѻG1É{uMe' 72_{?w\VwK$pTf)P^Zvcp_i?r%'yۂ3WX{"F+DܛfN]_Jp'[5/>ퟶ蓖xSnjv`Z:hdXjFeMwvD-!Aj-wRUj5?.sa.f܃ۮbqw փt tQnQɭ2s)TZ݅˫ dz}r#zdwquaƧdHY&2nc8oi fzߙy19ܗnEФ`mӸ lߑ[Zqs3 \kA5 Q|Zlr!h )(kcCiTC-71ն|>ѥˬҢ, V3i%Vr"Y u09sI!Z5^pڝP@UahU0j)~2,3|=R0 M]8 zʺ4\r|* RViuOxK@Ю cXuj6ǝ?;x"-v̬/Q2A\|}^E'°կ"BK Wq76d7tMp{z ~svb<vѠWvgldYe?E{^͠ z.78\s#8s YK6oW}3Bxusؗ'om̪Ybz5-y&W{i܀zqbQ)SI$4`Puo@@򻍀83 1t2rǘY|k+KTZÈy0 0Lyzzft|<w9~[ m$,dfUT`#:Rϼ qD,Fؒ n^`>`,כ(I|7 z,$н+6:;y c+T8$"\Zڅ!Bl-9Nhc>F\CS~A*: y }ȨKޒmv:s23f"ʟF<6mgxR\2-RZPh|,odiyx)j/aٍ="nQr| *MxVSo͉X>7H,!;G{'b!JMaOy*# ߣ*>ɫ%M62Pg.)vm> ?:B{nyxR*|xn~taỌWPVB=Bx~-쩕姧sleoF3? T^Oce9  a>P_QNsbKڳ$n|rپϪqꧼI =#2ǽ454ڟymf1?+σZv*Tfwna~B9^Vbž{y6~tοП JC_S3r}io5\b,a9^cpa SGd,-ct;L" pc>ґH3CwJ;llD  Kj!:Ȍsq> cͺiTsl4|Eżg,Na]S~>=E}%f%A+{j"ףOwPcdf6Zd/X#Vj*`>}_' w".\^pcFÿy,e&zF./6!N#/E]hx 7izMspZg(g5<W&@zN0aG 8Zhք^L$` L$Zy O j4bT}a8fmL)g+!RQiN n\l) 3;9zܽ13m!퀣75VuŠSudU&G%J:ԘʷI+gάiGOv6w>R$u*yx<2\ 2 9ӡ-mLdnqns`wfsrIzӟ<)B.5W10b3 G(-ȘrIvܜO]nc+㢏(>GܵI'Rnng`k]O'^7ކS2!i@ߨNX̙\~ qt`ɰ ҼX~m{Xr#%ւ/Lٮ..Gٜ!'Jh|;X-}&8^kzifӡ5yQ_C |H6ϸDZM`&Q Q/Agm^抡0(.K$ZNj]pBw"xҽ_?BEcget::E'X5ODmH[o \I[&N'{ޡOz]*e?9Y_n]ZZcTGYsr =I'GDe8[ ٌ0A'JxνgcFcjQB6L@{B=*mz!IuߑJ_)]]> / i#65׆T##&b{#MkC^cX!4;k؋;o]sog6{'c1:'ߛ ?H5mAi <^V,8hYf%ݮx7@+!P@r%RP&<eKq{н0E EӤZ+L݋dQ QhIr* 3l;5%>O".gDL"#ykI`_C|נa? galPD[/$=\VWJ_\Ȭ,X%\:o[Ckӕ2U@OSy~Wmf$M^sŌ l0vwLE@p}di>,< N$OM\"!_0[~g)3;b˨b ع#8NDu/t&*5{gU&'i;G*K ؙ/<^y?fm-O#H]x"D%~#~\^N=`#K!>e']X-R7 ״ xJ*vmNw]QY)lw0ucg.6TXm~$0Zi 8`4% ^[.挧06܊Yryrdщq|x<6suY{=ؠuxۥuClTdʩQٳҳr)JrBW5Ėt !M1y&t<\db a5:^x9楀GIJ g΃pE60vF#10ǘT,!M'TE='oPaaڂF_mދ;i;y۸c̽xlm:q6^|gqCu~?fzp/|2'c/9P)6Qz?>n;~/3١J9 $PgNE! :OGBh6ܶ0q͘]H̥ ?:~to~(\g_ڟy7~UyĎC8N@Je;I9~< ցSͭ~g-Gۋޱ;UEx{4. !<&G`0-tWz̻???/;[UG<ӥw/iM ;OFVm>ȓ}gzF^Av7/scrmZJ-\oDGֈ zl`e]ǣHKp-ybוkqa,P @vC՘z§L ;R]=W#uOB>Tnf ]a/@aI5ܡ^:G7 7WBꌪ7nK.J:QlfȖ$9f$쵂ѻ4|\l L{rݙ')NVeB׎nUOUIqkھ>bV^n@]7 &_NͪLxeiy]yLiT4,ktl/Ujv|Dz ςekz.UXָm'Tncz]D|hg2bA? 8\4b.I~Kq _""]MOW[Zy'܏s'W6ex%K~ݰJUdzXq3jTH[k"p_d$ w_04sJ;y4 iDx~raSסO_0L&eKT Gў[1mNDJxgX@d̕.FQG9jazSjitM2hE+kḟ"#rRAq'MIxU5I%=É\]?+\CvVW2]Ԥe#HpQ+q|Wñ;K{8\2f-GC:h]U)H_$0%՟ ^:) S^{,DV[G㸿3Xc!nȨg[qerg٢U35.-ӟΊBG/;(2jJuVAce# + qgIHRRϱo֟]Iؓҁ_𡚻da N#lcܬc!$x fby޵G&.4:xܜMw/zY5b?T>Z^GTP  D!CspR3|CZFpM_DZzn7rbOn!>Elpc-/9*0 0}*Oxoԍާ%륾sB~mEo!k".uBU5(0[Guvy֗֞5Z6Y]Ί3Z >%Wގ:7ZWꥴc7t/i^cNvڐoZjmzjdsǐRԮɞ,@[A%:WO_7+$(ufhϣ\Vzjm;GR4 (s]4(RG`ZR@"EA#9 |(F+z _*vh3i~.Ϥt#ͽj*$oYpUpz<1?h| @Sڑ^ju1#46 u-OHSpȐb\ -au$eSAfD6 l+=Lw͆[}s'Q^M:=nW(5^jCp.\ѲmvGaɬ9eΥɹ-pl gUZ?5+uk!P(Fz Gv:x]a1ft<(eNUX5AQs0^ st䦟;~D2z'/n()OżUv 5$R75!<>O͡/-^+&>=yQQ0՟(ݭp-*5̢Oj*$ ȋy,3Z[ԪPXQ K 'uЯK fK/]f@JD<jҷ=Sff6 s9_̌;hFv'jbɡ~U\DzԺT\>v]/͝֐~@µLruu"䞗 X`zs O+~G= 1\Rز#Kݎ2-r{En_PCf~96Nn@GK 뾰vt>rN'vuu*zYg+犞UJcQf˭ 4 cщTWe%`g;\|3+nbDpP:rMĎt#K< E^%ԕHyg-ZhTvȟ߽G\UiqEeYg5!r1~pV!P ׯz, :J QÌو0YK;t́mf;$sNW{msy_?O?ysٷ;HAΥ5qSfw$-p)bij'gJrMZ8 =E^ɏQsCH-%tWHe|N W͊}8_6y!Dc| qVXSUTXǙQ€ttpּst_jx}살Jw8H5Bs|3|K y&A 䫆D|~IÉG~&I^t4ӅIOhHCzl&Hѥ c|o._;}En#qI͎=N2M_;kIR~oZzsWiq_ṳ9wY/C!9;%Kݙ>zJg2]{ 1e?"hqvдn) !]`HBeb*cN}vD9Q{}EHn5uyCv\^ B(Ex ;czzb}γo ge&h?k4m96nC>w9{`$*fr@p{>4\V`a'k7GlݾNo20|4ox<Ghy@?#;NNg.e8(X,៶@wi&{s=] wWiv`_Ǭ |j#Ic` xˠ?^r/$ów QSx PRIC>0T~4Tn7gS^o\ z)vi:9udtGxù8Ip%]Yƈ;!FKF͖B-S#WnO܄RokȞ,ϐ`֤bxm|a Kɣ4 1R;_iZX><=7Gܱ\+2{1v~J{4dLE4TĀO?n?߮OфJ@-᬴!Npu 3ۥiEXY&똞pt;P}~gȕً%Zh<%-&cZeLbn3h͙˱[ٳKm۽V}^Λ7O:OUF9Kya쳙qY}YZKךlQB~TQyoTɣy9;k`!V[]7f{+? ql]p~fƧ`9h:lpYk\'3U`v(jgMa@ȼ{]`E"^5O& PJtoCHևWc`Y3pNj}ixο_iD#Dٰ~_]g=H8NNQi!B\ _d:=^zb  miIZAӈ/}퐮’e%d}xvW3|"aS"aޝqKC AΫ9ZXӡI/H۪>j9T'}''+tXÜ~aC9j|.:t?$ay_ԇR?q1H屗3異oZlf$`(wfb;0 ~-&/gq o.+DqY;J[Dd ?^pGD{Z-,x Ӗh{ S\iLp.kq%·N7}m>f>>l>XI?~tyfT =}Lya%sn}nԎ2>p4mzɌ1^[}zd/*%_A*M6m%f+edQq%kuƩ2P6c0KJ,-49_js>ߟoyS W/|oԒc^n{Ƭ nA[0 z<טּ(dxCx"ȢE*/ ަ&]I~]0Mp Jƀ ޾_Yn΢,"qf)Y‰J8 ,P&J"ݷfvχGh?; x{X¦k_NɆ rNTK:)j/Kϣ "mv!݉&ںYm[-5XLis]8HWKA ^vvԜ_E>X_\Nx*2(΢(F{uPߋ&/N߰R9&ƿƏaMރt!I2N/!EXlnѭ(vr*TlQ+,m98%MZnKBUSfp8KMB'-$=.R\&=I2^5LsVm)dzxowu'la:a;-]z9 0˭GGhm< E'|$'ep'|>pi_erQJ YZ]lsru uW]ƣbLo(ج[86'8HTl3P$B1fv5'Cwţ,~x22,UЎTЦG$ku0^nsJ"+O6yfE\+9ʃJޡ{Bq3û^N;t3G`b _wUN26.2*v#ݿۉJTx5 L$BI,ɴޯR^yq|~,-×^GotP5[{bCڸqkH7"?}ze$䤧;T-B!,:Per1$NkyҒ.A0X~J:2|,:f䚥,SF^#}`{i%Hno0U &Y> w7ݡ|ZwgC6f:?TgW3z>e%u}r߉/ʻw8DH:٬.ph *D+^F<{AͻO7!3r雑9Uc1أx~,kȾSD9*8C3N=SEh@ C"%Ay[C?uru/*E2N.(hL{˧]TJY}%U(˴1ڟR|fz2Rt=UG+ty"j{R쎣aL@&t A$axR'P1 cEWA5 h;Ig_O1k:m-[{ܓW3!vի噞V_ w5AO?~D(~clwOmBm8<  #ꕕK[Ðmy__C@Żgj[7@WB "$,L%0$ЕI5#hp2ӖM:m{bro-Kl7* hkq1g@骛8DB^󣷋#2HxN ^ü[C.m:?&c.fK7IM&m4 };19b,NQe<_9~~<dž$M9?OF[xii_ {kRQ=,uu0K-ך \nj SRon!fsr0ͼVb$Lq *{O5%P넶On鬀`&@@8cvŁJweA{H@ȳ5rL"(# aFACf3R? wl&6[x+ `hr@XtAa"F#e(R4t|8xⶅ(P^kdUd`{Z)WrA <D'nw;VV| ;)%YI(ݺ Uy\$UhgFN Np=Ljh8475%t|VoƎZe 9 !0Bߘ`5ޓ&_1؆P?cJ (+¤E'g_.<%7$*##=^'En"3Ōu3@@_|p N_"_ɸS }K}Y-tpWe0%`&Ġ/j_P7\~tf ̙ 5/C0<\7ߎ k?'oeW;'yK&/ƸpC@?zui<&SpHKj٨h4N9:֍zz? D=/ⵏ)u.ߝɨ0r }_@G2\m o9ɏ"/6ӏW gz)a턼^Tb1fϻt}W*uedK94Csש0W-S 9הٙ鎧|˽&`ro5+Ct!U6AD[7WƌV+4\Y5K"*phk1)G| ѴќEm,qb1A| 51cC=_g6 ACH8g{fp] :(Q3:J#a^ ^xO+Nz *u#dTcni5bP܃0nFAϭVl $Z >#Oۦ<`m~e4WV ^rbJbq6rOVr 4+wo-j|&&tlD H ,u$x0*k\@蛚EwZZ# z{8>L3 p܀/u'`jkf]DqG+;>Wؤ͘IY&dzzG܇PU(cԇĜ8FA] *cF t5dM6lP55 I(OXMGXN6I=0.Χ-5N7 !ͳLg\|3J/yֲ֩|adG_0$@90Ljޥ띈tBD!2?OUMŤ/"Ed O -7$IK68Ym Kd%bCc fИ# revu<.tehmm*]U 歡t>cב샓q/0E3N")aᖃu~a4~~XGN ^vo<8殑[ @\SO:zVS}fx Wk yvǘ-j۟PvkBso;dVnE*+t%}/ݵPneW&8iG0UFݯA}Z*Xm^XզZo/C$}^›[/$C [VS#@rW_: EJX(>#)טv@tH1L, %Obr+NsǝL~!]J-~/ʍ#}]PNL;d%U_vF+',6X9_mX2LNt E"V3q2KW:%)2:o3 ]Fڍ\OX?l{N2r&2#iM'E˝R FIA?ŻM;>˨AůSbq|O?}4Z縊)Z$.?v>!Doځ.Jse-dy)Un!7.h -mv<ozo)_$;¼@lNd% JU^]w%TT>"|a d(.\̄F#Qi~7R I`1v]\ƀ8rR cSḦ"p:@Z[F@p`mv^5u !ŀC"?Vȶ%̳͂Sng`8h   `D Abuを/ksp~6(וO{g(3վjLv_Du3AhVpՑCvp}qCݽ%~߲ܲZՁz|ќZR7(xY}'{iO-Ykz|ͱZ% !hZ8q\j5s!v)5bNj\0 3+sx/79&  ʖNN'wYmN$*[H;3;9y9uι++R)q xhm5"R\ ZU:' کmGWai]Hd_YiѦB4K8#Oq)3T7E-3)-=8 .Zmg?yg hf{{(s&^[1GS9f"-LG=TYcV&V5-4<^+AKl0ŚXÉ9˖Yo: =z z B0^NZ1qNξ_="+}))^Q d'os~?J*|od346'Tq<tO\n[լ5 z/qS@p b4hcBW!WJ_Iޒ{8`9 Rƍ>@;$&Gw.xZD&5}`sF2{HP^nV\#qYeڎrW96-$HzuیtԠ)_{+" Nw0.C:> uM -iA݋wxXkQ5zsH Agƾh9i5jz/v5/YMq_=寜FpIMO :n xwwD!DžO}۪רh66CE`>/=ڷX\2][GST?WjLtAx Y`q$C_SԖteG zUI赾Ζ_sï.5mIC޸fk#2BN@[sYğiQT|(B@Nr\ٵk=B8?9OF/k}&Ke 6:ިeggo*^sum\?3~Т.r̽h\3ޤyOY96 nңlPOj48x!aUv69G=SՉ; x1|/fK Q{f}"ñ,haE$c D[ȓQkCgyo3yg58+U?hokC#0Ŕmc~d/R>ڱ<{.1v#uwA#;gwn$/7;@k l Z^#DnUz⿉G?/q*HxvۉV~vKiٚW VҙW8қ3+Ls̉ɼ >"!eftִ.0,txGO兣JМu.ʵ0bSO!̭9! xVЀ`3]^˚ؑ@| 44kj5~PN# ZrZ]z Η6WAE p|XwD=Gy'9\G8_y24MRj; rg|t ]aƏp'txit^yTsǜq44=!7ih2}`}|0ӛj90Iv ̉<ׯöԢx5oձYOQT=4X|)r N?NiΝr54ܡ3i@#w'-s_u(x{<"E{>IΓ"hv31<8Ldia~<9VG̻#! T\YX9k2wk9/Yσia#wɓY'߉ A 0*lu|}0űʥ#p'J0AڰC~4{E](o+m!'j^\^`ǀE=_yktYNE Z@E\q~P5G.g_)wkηp W[yfb!f{W`a_ 1S0kY_P ڛq+c+X[/.aS^N7k7G8ZX~fIJM tN#ϓvOќ-E%Yl둔\:}ƞtjTˊ5n/ Q_4b~7Nˊhqzii%`)uUo:rxRok4 D.ViFOe D ʇt8gxR9]$QBvLքqn AHCD1R "r;(m)S4!'ԉÜb'fםF,^*=vSP&C,z]_a܍`5"1GM5o[ b?龃ݭX]9cU\5ozɞd@8=`=rվe菥\qہyM-`}%O;fMz1A_7ۮAME7B}..yzzoToHE>^({Z:-6{ >Ն 1_?n_AX :~͛@jt:>gt#'ȃ尧b{d/Gs8kDS!)#D>np*_&j&݉ÏgF%k`Au܅Nh( DΜ}a@nƕ=[ s+(Lh ǫgGΞv* %/Cg9铻6b\Ϝ*YSljtKld 4Rl"{mA>wPQ[;?XW!u8+B!jXs3iW>uxT#癯5yg#bcR:ԢK`o}R '=1*f;Slbj c4G2*(~/ }GgMSlB/ &nv" D>fow)[ U,;#[M=FtOaݸOɱ1_s;N`-"c=ЍR仱/OԚlkQgMTІO'ar"JA|S4"~j<3c([%4qq(5d#F ScHZQ4u}p46sb5 xG^Gnmntʊf7h7,^N2q| G1ǎkЁϣÕO;ۗai/gpv$]x4܏,|,shU3~(7-p!R! Ԁ8+VYRjx.t#B-.Vjټhlyan /˘[ ?e֓c=S& jP5SXJZu }moxh[>봐y!SUu%}^A[XGÉ֨ar:muwڌ=c\ԂP4iw@:ETq'N'RsR8o8x0MB:]2 vA" B"5 9#9fkņ٥8pf鮌>Pi|ކ1Fc^<Q=2mcқƌ+LѷAbq./gƓf;=Gt[_lY߰NȱɡoTUkd\ĉݚ8J2tm.o/炜كI&;1"ۃv9EPFJU:ITm~Z^qo^ ;<=r^fT =F.w=RR{&owrprׯ1L q^a#b~BS {P8{"7+-gdz`~P5a@Ǩ숍wtݾYB_((8!gGH҆T8i8nQ:OG}"Z!)aqnP6(9♢c5>Գx3_:Px 68vΑS ;8&2=BAG]؝u{4BIOmN I={% .2<_kQ_g)^\0S˙D*aVRYXo;1'.[;Z4N~+0Jl[w| omfaH ii>Rv6뵗X:ҿ}'ym)wzSL#brehv>z24'ʬK"BEڢR/]5睰ͣ kRǧއ_>8MI&Ց9'bRG %U}%nn`e2fgmntH5f%@Bp |P=:טWwpqo"  3c%N-D5ܺ_'GN뭹xѥcʖ7S:>8Kv@[qM_Z +by~HC ?DC7]_9)8|KҸg;迸 |ÈH $sNVXTu\+ ~ѧg6:pk;5\:0Nq:wO{ܓ"SMG|oE`>LL3Y s.q'}|?VË֏/gM~GX/i{Ak r]+p92;߹{]5KS)'=/Nv\3$siBbu+?NOhEOut!:3h A: 9zyʜ"RryYǨ2sTXC$toOL[Z_u5LEҜ%1r(doiJTDHkFu ^<gK2&Wx9T9xJ,d,;zq4HFtԅT v0]IL[}Bv- =MU'w j3:6ö8%N$RY 8P:ȴ5aѣcO6B=*}DXG;Y &>.W\5seGH$Y !k.M nY@PWݻda7`H1˔`iC5'9Ē{r,@*?s4{`t&,Q{D>RN;bUFgpx $aBTPA:sAשf|0ߤ=NFgmOonr‘=}op왌KLwXiw?-WBI1k!Y9>E6[cvlDQ,G N6|08MxPʋU>BnOGc  |{UZyχ* Q[V\%MpeS.h[-w/r GH^@B/;sΉ HW[~/{Rv:TQÖz|93łZKib1<`ݩ)\x/ x$zP1S^{[{#u/DD@U f [UMz醮=ND3y?3 x u%>.Ui5|}nWπ>\v) }}NV;' —/u -OŽ.ֹ]n??w~g4hҀ3艉Oc9w !iݗx.׆slDZ1/嗶3̜:0oƒCNoa]**wp\_[(p !(L7Ӯ?{1.mݡ(6jϚlB'v1wR\Ϝε|kxv_[s+aʙ;ysWg FGڪ!; yř5@d8.KrtW]g6k$4=gsNy$V1 g $XeKd3 xW\7`uz|#]'_c~}Y)$D@䴫=[+b ͪK%aTc jܦM??I_Q8H h2X42_]νX'K6nkLn`/r4|9ILx|NY?2 .!fQ5lV7kAb)q7/eaԬ Mƿ>Y<^6dgX*+׵qZ&q/q ZMTPoAI\'xL3FFWthO@['.V g0q|s+"һ \2<BJM΁gACVTOF;=8caeprFI  l2o^ *#C]PiНU3;'5@8+w]ݗDK]8k~w3DA\m2/cل#j)D4 ;mcq(lÎ0Z.KJuRe.Kp0phvtizF1 83]¶@Z?=wBIBfv]SAa,MwmrOf$ͷi2hkڊK/aouQO,cWZbTokA̯.AQrM) 4C&{I:0g3N]mTR&.UV섁$Lvkgci6q6 [Q-6 Y 0^sta@à $z4QKXC ސV~LtVq=2ރ!; xH0AIYm˝nʹ ܫv>S2Guxt)T }q$GA`t..oۣIUGsL.-c!⬰@ϣ#t:H`0&!<&˛+v3xE^~Nx1X,'WWc>l]j&>GLe~'_^^њ6#:hKsc▆EXe?7N^Q =-+F&Z te=>x2ium\֌- ?uq$3:iQBx@؎Wȗg_#s1ujo7޹}oٷA4v6Xr:и0rvD>:N1cVeN͵  D~2Or?O싻,Bi:@ O9aP9kC[] n~%?l D!64K.6:X=sΡwi3 :gwX0vtΙaͿ K'EGn˧OI=Nv_fD#!Y[;22 y¬TWՅH?I.wVzZϻ ο;nSI`Z9ӫcǴӜ {_))cnbOۋB ѿlH?ZV8x49Ͽ>eb9%6uw:/mj?br.ȳ FcEP謭.4mخz·ڐ/?WWW@m1րT x ( U! ϟNCZ͜1 n6eAm ,0`^~!RT-T!9aw$9%_oV|2B 9q1,H8|,N_0.e#_S,5{4 ^YsJZ:k^8[gc9T`g&nB5*L$hY}K(]:ׁR=pDZ"Zu>N҅$_U64OSߒ~+a$Ы CBuI;rܮtH(Ry3.Lmwׁ!jݠO~tͱ/7NIH^XrB2C[V3}q~ʌ`h˶Zk"9D*0i5j`/1]d傔-y} {trE4Uxxl =zxuQ aG6O.yB=Ye&بȗke;BӶя29"&n]'1 Xħ@u}G&Wb{5[!& mi3t#:;KtY*5!͖ߧЎׂr9@[|= v ۤYVZ%A88"bkE{bwBgi5Bf皼'~z76RǓ$6W]!W:ݰ$`,t<_6gmfJ\^zK0?$f\z`*c.xO -&1:n>5L~<#v 3Kz!jɬ4Lٕͭ1VcZٌ-RL$E-6䁋+#zl2Nk~f1DSkD=قy=:=') ^nBxg9z8BVT2g}wp06B}6KsuEaCzlo4{UIįnA[gם?xD!Efv'"4T Kd'>MB>Or >BPhc"{X~|(v}%4tܚ>i(e7ΙC9 "S<c#ɬAݟE>!̰>Fb0dCy>lѵ,7n]+,0w^A<),G]!D?D[7펩bt_?`n/tIt y6x9}!kr9_۔/^;WsBk7}Unf71gpp1rL`O#U o3 If(Mˑ%"qʁu<(>kq}==*r-Xkf ƅd @edu%\19GP|.o,hseޚ2Mu'%WH,M~^ >*O,eϱeQgZ}u ]!;DxO;g0' b z:]_D𫟣<>o>飇okc our3+O7T:ei dX;Kw_n޹[7~,!2 n¼c7& *d7&ņSd^iP'%?}ȼgb_}YEaa@s//{?A_m GN&4*Km=5-oMmiᛗ/֠ @!9JЄAbFD?<08sz K:;ѷ &u/@;9G{(Aǔ0/ 1.@Mj -Ueҿ`Н!YE? Q O,Wt[<eMeE\ zn|3y. >n .D*x+urꨆK8oAy }h4661Jmjm/VÂ{FoPN'JE<ZtvVtd_qGkR72?M猑opBd CZ ]T7U)9 SFn Dz`q71'Z)͋pi/鯹gd{cGT$Nt~u` ˢH=;`#f|}h;) ynN#G" 瓡W0Tj(hkieCc4 ѹϨ\Q^Jץ>g^#$äuSc*ۯ=FN/.Ƕˋn5'jݖ|=5w{ځ6_GlZ gk9O/KhOiƒXh|N}Љ#w3zhBORs닗n<m?=ͧο2Hǯ7cULJ(@h3&-8\%7u_]y˚Ϳ/a|aWzI-1 0l?/ye?ݱ]`Nq_ߎ#` =(_džPD$IBM/iAO₄Sg3KN\X anQ8uP\3ΧS˫;y:{f_+@wy'2; LL#u36=K86ˌ} ı 2*N3 ̀m[rQPs x]ztr,}^*Suwˡ v"*)$M.Ϧqt@ * en@Rsͧ#pµ3q:07]C=|W5,6mrw ޼|?7yF­U+J :r\mtX]_\g|m"9ll;W@*PeXe>%U0UxX;dyQ0ꛕc?fN,6HHYr3q'KQ݇Si${2 I@%@'& pW9FHf-a ־`AܧH‚2L!|D 1pׂ+`:{$諒~El-g )s90p, 1'h g?⚪lRnݡG}4P=k|.6>]hvݩ.3uZW1̢W=`V)I~o!:׿4hOs3c<PnDBL,`133|468-tvudV䈲7 lZH\{D;D 6,,k ?\K O !̦tr5阀n|<nFm]BW:f8dps**p7Q" fZ[[U`<1CwXL`rMc ."}ϣ/άP1W=Ww-r&fgӭږW}X׎Għ2%@HbEJsI lG` |/Yoxj]U"3 `C%`몴0q~8qz&8h㡾Af٨!Y % ʖ18_tYHFDϒie VΆdc]t@!,Ѧ-e&O}O%AHxرrHC2+ޛx Kl!Bk3)gk 􁤾y_ӽ?P#/"v!BTI[AG=5uR< K|#ݠ|;_mx_-qsscg2 "qRN$C`>$\{w蓽)H: ]tù꥓i ф+7B RBo./n*Z7f˪zd0SKwY(;6G|۽֧S팺Q5=y׵ ݚQbp&w.̐7e)w ߛj+~ ܴC`3T"@s%cF~.dX5뵦Y ~Na٥: sTwXkI=VSYK9J88<+5FtbO& 8wݽ326#pҿe=]ӯ6Hsv,e#SjPVCGK{>|<(kp :ݾ*4_<3+7 obU֏> ms;\kl_h'{?гz J'pX;O 6*`XІ8?RݾvKwm;I{m2vؑW{G @'jkm<$m=iȠ'W? Cy*Nh3+2Ԁ~@2KfdZ0FXULeTUx8DHf7(h@/V6Aga&wu'Ti4TL,[L{Ry( oԿv6{_yp}ad?Z]==ԌlևFuKu6&䎺q;)^mѵT$A kYq9@C@ap9B&$85LD};p<WyM'AeBh؉& j*2&u8WE yٍ߸^\)6J름*‹gL+nV SC 8|닧iN'2/ zGMTw\zgpBa BuCk蹞IH aBU4툮IY0)#@yG,)҆5 '%~SxRgs.̢{l{յQHv }vH4ir YT0'K>Wdڜwgz;C"fb!\\T\eNR좶9e%dWQL}b2lǃ(݁/'wA")v : XԶ*JWQ)9\o$uԛ0ӊ ,X SyPM*j=Oř3vvޜ귔rg%ܕV'7.0x`7Lt4de/5cK3؛ W0*HkX}҉4 )d?(_Lj34 Ir ևt`|:/EPM 4|BC'm#ߕ|K8vGB7n|i\2&@X}YW ݏ`))zɍ@3]jsNƛC\i^ n{l>Ͻ9V;,̑#=7B{,{ՍݦW#P Faܺt`Z]"CjvLLS|T}9~ ڈX:C|6 \̈\xO.p$ztnYEw זTFSL`?.01Ur1>途͏y'5MhUtaTtᰕL۳o /Ͽ- HM"X䛽o- Qg@u)ky7a|gɮf` =X+S=`(>ͧzdĭ8؉yLQc eH>x>ٺauz4ǿ27kĸዕflHIx`?+F)~7Do\q>8>Zm(o*LF%OSR3Ѐu*a>ժfTJ6bIoE6E v_eV_uU}~])y_ن{iy,$vW lr&{ ]ww35[ ٱ/쾀 i_G'iqJaiz̈Ύ5?͊^ EY(BjUP`li[m2@\!~a 6P5&eI#CQ' ywP (O# Hҹ_Pbm)|J2RgD#^_T lstӐ I32,)\揢 Xk k\TCCѵ== XeODuQ<ױ{ٝ l Z:W44Y|WNb}{ъ⩺NwrnTU9Ǒ۟B Gpk4b$N K5ƥI^)WESOF-[9@QLRE>:awT5\H#/TwЧh[˓O̜P{zݻ|qv#tp+TЫĪ0GRKz6oa^: 4umE̘>ɕ[ʆ+^R-{)=+J"ۢJ#i^~ƒݼ wzb5!Kud/g긝MI}wWHzߞu![A B/rȾ>M.蔔5ddͰ{N`, r`Zu?{AR`s&ǻhy6"6<}hq*5yťKiDܣ+Hrɐ/\R(,,~50?|}j^JzVE#:u@XLH0l RY2>ZfM= e %s(/*3 U}TQs'I5+$,C _!2xU`[bŜvg%Ug:u̚?I>E0*8 ۋN w/GrXmuďV47kAB(6i{Xߡ/}׸D%Q_K'LSDm= %GDzEu:GJ/l8<|#4CFxڷ__}ՄW?Uk& [ۿYieE e{geI粯8_Պ5RѥQ9*s=$o/_4dof_)CM‡ .DR:/ J>a\ hb$A:/ gK۞s\t{&ewɏWr=3yV|KA2_7f4o[lIfg=(2KѺzpi˸73*no8w 2(=0~~V$ anX]Gث\[Twi}prtmJ28=lnf=ēK^=ٚVV`>hE/yU ] NH T__h6mwd%-ͯy3xFr9s]["}z}beS l㜳ECӿ@νOSHpkDn`O쵐K&# V6RdyR˘EF,Ů9?w!J/W@I0_ 1݋҂;Qx9KnWU#uk]E7BndVΒn%ӘXꃀ{`;oeU??s/TO3'Li?qcjkۿ嵝ꔉni z6YG=mӔ smO)wW"p|lΤ;avT}p6 q~CJ{q&.bui ^7ϦqJwu|n׸Ebr8X _:]ݒmvQ,x_ԗv0yVQR2JޚZ2ٽ4<=>~ 1#%V,E3޺P%'jAfӞ I3i1}3iM}3h{$]WG06iD 1.RH-$}Od4}y-6@ C=G=E.,|wc1N_,v·cbրNˈ 3RuJ!1hslQ`խA"-hg /~#fCpuχБz8&MFj0ړ _Ž99@"7#=E[V D*ձKhR9\՚':9˷eMbh+nJ9U(OSNenլ :aXo%dGzj2)=zz`Lmԏ:jcށ^䊌GИRKShMf:7v86Ff:mfBQHP!:k5I8Cѣ4yEzީgVEKy{__<$j.[96OXK',/ڻ婙cR>$9˼7O<)f1zX,c-yٕ C0;F|Dtcׯ?%a'('5`J{y?Au5t-2CPݤi=AKt~[&nb'2d?ǹ WSḮ~Mj_PadEIe竭}!x6;3DG['75z|_nS0?>_-Bv(;ц+Nu oVо{0yuߺ~ww)/o Id@~kG>Ζ"`"S3@X5Gt?¸hJSVI*A[ۢ`'$fX1񓌩FVgYՍ4>ijVη.zx$ݔ0ޫ1|0>a;pҰ(bE5w)1==YRrPXszfi  {3f/u ~r~WDT8>PNɺ33̀ m0+N 7iurd"QX( =fU񪀷L@K+,4x@TQMmcy D2B HH::Ԛ;@Ysv%hK5@7(]PB`;DpNl})f |k?wHg ~ϼqi|Sp#?տMҫ<(\HNp,JJ. km{EGjO5bO1z.4U|WIvi?\ #֘/$qUdב^dlHRvUe!\*b%7FpHǰi3aK_++@P/Ĝn'nΖUxկSTu F:*T5o_:{Ia4᛿+q՚xb,y@-OI&$3tC/[L~wa_WNzSx1QqT<߮u4\v@~O֑j$mOI°غ\ };t,m+U!J~ E6։th:¾~杽Y9u;>GGZPeKC0xB,K}g9z::'#=BO6mwp.n@ZO?Ge螰pql1-?Mf&([qN/ƾ1ɞS}S-KN uD|)/F6cfn*Н.Gm(D她~~¼z~wU'{ y+k[Up:ÖtH3v@AۿQK6&3QFuѷjmnǝRXVzTyΖ}ݗӿ?˘k;-,`א;;&M-uT̥*W< ~bDz7ZȆnKG$ 3-cW«$ۿsY43Jްp([H=b:9ddO:~V?Tgb!Tox M.I20NRbZ__'~L;M&5HSs?kܼXgP|QtRm0oD_K)055o y|yc tm~޿5w9ϪfdM+u盗4K%Zj] Y$H:@,z!m. ̊"K &T4VO5u*s2~yῡ6쌓uRbsL`rnJ!x T | )}^xљ{9 :tTՅA!ްa8>zO>JO6Β @I/|.M6ז"#BIDATLntM>- Ǒhc3Lܸ#FG'>l%MBEpko`XHgykY{&%&~K h0!鰗.V qYY.ۣi_.g@lցjCDOr"Sӕ=ӏܻ=`4q[1 :nK6@ A]b1fAY(ʙsgdS^#}+r%v]ި҅е&j>?aL/x_xњTӉti au.^ܯw(~uh. H88QsgVjuUʇ:3Eg`3RoID,WOz YłYh M>[381iM ;[Vsv骭ա^SjgO;IG9%D.Hϔ?=h1\Tx|qVf2tf6H፬YHL.`]뙃,/DZKITB@D ߮߰'P31Tgs:|tnI+F9 ʌ05K+X[P<%E D/1@> U[7z>-kL̀(Q:߃n<(`BoZIJx:_wjOx̏ê{mT|)Qb{7>^ewoew%BTzң] '+|n-zj=obzccNDy IRFV/=Ǭ!]c{͹'j3]Ɏ+OXoL_(B`Xa,8'b ƥƜf1en)j |):hq}QZK67_B;LW9@zOyм=2i ǿ%s/aln:+2܇|淋vF xlIKnWپ)>C.@~vo]$6ڐxxic0xOA'KY'^^gýrND_?5YB8\ ytɓ*y7F˸r(^$s&V kJB"#cE<2rO.0?Zloܼe}jQ3J:f.N ӳbӯb7&MÏ H~ -ogы.|dy6T5k/8iÉP҅7{3bb[VI_XJBێj682o Rsjj>N6KV`j6*@‘888S#ngmgDzo/A^jY x?fȁ'݈c1__D2ɄȠ'@=Q́t&u({P?b x9cd ~C=n7J,x:k /pfTli.uf#zj̛>+QbXe ]p\~ "^w'׼WJ"Ӏ*T񦣼<(˃.RW)|҉#&{qq%',kD^_]+zSS& $DDZ 䭭OS}wz z7={*kAd*yۅxk TwJ@J|f #vdXv3!XŎ()9o58^L@,>Fa;m2\p#'{8\{_0N"l'Ѿ1f$_1`6!N1FǷπngG]uP 0 ʞb$ج^ˆf%2vw=L჻" \7 )YFXn1oiG կOeO `1u E2hڑ Fs0\̮D2-i"t .{򯻙 R1OUBpV6u!:7cOk-mDA5{ʹcEu3We|x&״:ցxd*NbY45''UM$t8EtX΃J~s":9(PVs$ v'Ap;zmbl<'E9E4AAxAX:ɎfbcZ{n9a J& MMZԉ~֝ #ǵA:»ǟEnQx^jB'#XZ17Zh1ߠҌޢ뗲O??W_|dL+q_Ara8ire=f'o?v4p^{׿RT `K/7@gEݽ_ģs9 3$|˵.d;=|:>kY@ 6 EfM`Ko~) qPK4AQ2[mi2$\Ңgyt{zseiaF*Uw+^=+3xqA/oIfwҫJ-ǿzy6BOkg̓Y4wA*{~?؛J^(_Jrll_ɟȈ.IJ眥3bz~w+\Q*Sk )EniU >98:M`qpA`|6BN-;)W Čqz[Zg<~^oUى-5üjL:5,ȸ<͓o"xyqv$ wIYD=} njT3} G/MwyH@.u||k6P}/-{aʚ!SCx>RyƔ.Pfze@DA[ -dv}"9yXN=*k#Uc!G39MuwB e;.ggm!+z <0%vM&LD`ʷ'GL{S2oɋ/?@iHwga؇8m0"NC)N}#XImEq |k?oxKOIv2TM=o Uw8TW du+ Jkr ƍQbі90t;ޑ? L pjpA|~>fծ;N3l`Ю&:i';` ƦfMkDhɄh*<45ocmʹЂԝb IRAPFg.T64IgaA1.,ࣇry-ضbvArΓqޥ[:^K{ -"O`"*޵ب޼5Jӕ@*%"!Gl|]NRݔkH_ ;B4ሌZp'u2NQ|4-wD6u@\r*k(6E#rIʟ4xg$>xn5jc;'ݱ@}B )i=tgDQ0 BZ \ߊDz>I\2@ ''U+UHmIn5Na<>,Cϰ z,4} BbV7aQݘ{*dpFO'"B Xt37PP?#@J.cZaf2XB<<*G3 . 8,!2~؝yi{)w3|(5 HªVg&$A]U]n}=gy͋'B" ,?Yv%,Q s7r纞sK{wUuwAƏ.&ৡC:'9lMbH._ q}<7kI@.ړtjTzySG1LSP& MDt 4B!24C "CfpgI gqA.(wYX6[CИ6io^R/A2ڛ>;bI;\44 5)*%85t><;+cb";~&o_~?Iڥ/,]|p-F N,u~I/xڽU eE߸Tщ??+J]ʟ?^'v+Ȩú~;wW׹rI4sϽ]%#LwLq}M:dj^ W~>MjF~쀶^^d!J#IyM5.A+N!kuTSm+8g'Ah$rH"K&z~mT`=Rfwrj23E~xcpR_0To:ӱv~鏫ϼX]l:^Jǰx hs<+p+42/=.&b4,8JDdF_[^eGVs5%>oO ,?+9wH{S׼;$Yf'7lgg(^S` /4ܨI:V|^jt}ĹbC_mQ0Bਗt.S9 ]QUi#K ]pc[d b4>S$pp32z>֔Mnv6}y|fٙR+c3Be(L'V *"w)r|b-5xH$ռjDm0Aq%2h_ C9p& fi wz4jӤ=qB4jD],@Hi0wqH,E',75 &Dۏa'Њ[h`!33rsr݃+ֺigjgX$=L.T-Gy͏._ ߍ>jL {oi]D/ɝ~s(^jEAZ26#2G/g)J^1Ś<)puOW;ɳ%Z2?28z!"s 2C%e)g=n̎yŊb*šTaZ9+Z;K!gpefD@u4i}J%0&µaH3 %n7KH=KwڢKd94S /gfЗK'4]n ?۟5LO.0닾9J? sETW83X#)CRLq&XNK^& 䏴u:@{SYBԒ:NQ>sk^$[ȺldFZ' =g‚5p D?:}9[K 02TC1Kҗtf^Y\O —8Lwz=AYɑ 5\9 e{LA2  Sj>DGNP_5OȻdzf#Y,6Gv]*WVs;F)%gLs$COis@w3_<~3\V.DY.JXId8Mx$}֑:꼿>?؆aq. WF>\ [ļO-fƜ<$#n6̼:Zw?!d?֗3H=xbAƚPn)@\ 9a1dG4ʷ߿B$g:/b?K eD@Rzإ*8rU Z⹿nW;O3N4hJ)ߞz^#mo0;ds/9sf) ڽCr'̝:` Aoi.uk|lOY&u<8P]4闭ls`C܋J;bMG eXFjcE7/r6EiZ?%~KH*/cd(׸@L!1z/  __͉*\ϴߛ+7?FK6zS 惘7p3u-w>o77/nmڍ;Y~Wcu@pgiffS [xD8Yz MƜh^ɽv|G<(v@PsA\ gMx.9g$`HxungoN;n5Wxڊݧ֡G8cΌ'\[xu 79rvk׬OOĚz@;dSJG&2IX?P"]BodTaRZ ѵvӕgNTAF(&^=+.>RsT{#0 f>N3C5 1;%h%A8Js2|m>t+o5qPz̖CiĐ.LŒH5/#?ޠ]^Otib2c)?E.2<EH \͛= ֜:p@_X}z~vYb {`xmO2NC+)}nƹCf7J_\$.%#**0@/ޯ+!#*3xA]+,Qvxn0G|d'aIP9/0q;G/ rIF)6({ I5K#]SL|/[ZiqW{XA< ?LL]'2,t&,>KV7HzU3'rp1UQ]w{1blel{;u}?tx{ܪo֧6'YО,՗j(9;>>\g0?7^dPcR9|q=!3ʫn? ,n`}HدzuW^!׊?rsAioNd|vFiY?5!f@ዝnBxEf[Y2ɡr%0?X]-t8)3Цgޒ%H4nPļ|bO/N~\Pj%Z+5>GVo},twY7N$A]T!/eRo46, ¸Q\{~^[QV$?1׹;74Y\0j2h75~f#8yG@ u\ d!9=-@dH];sHџ" y2ب14ZU^ِظ=*>_=ζޛO-J"jZ":/&ߊMa^xoHTW ?ID70._mͱbg>ɾxK"_p F'J9:Q<Z_(|FZxGoPX '|ҟzz/>_ҊFm'~_G0l}ixuȨv #=9ic qЈȩqSK]tе{?d; N_XP@ 84, NZu)tSp7k5<Ȭˬ̿ۺxΉݹcYn6\m~vrd'! 0>yGcT#.;!hӿN6 82dqpبe!`P@q+ۗAje E $CB€K`9EL.L/]\t2}F_f54Iz̮ 0j22Ə<Vj.-!}\+K S^v[X槕9o<gNFp '>Ȧԓ 6/hO&u)8RݥjG?^~y~(zFJ <轻xvBRw{Odۼ-4Q0FwރcYCc/0!- '; ë+Q)vr-Rfȝh["^YҾ'Z 82fف:3`W(Jʰ}hjXf2ҙ'ܓŵX E =߹K86 ~gPmE刺.|`Vk8_l|@{2ѣqU{skkRjLm L8fȎ<>LY)wKKqOnHQ'j~2:4F% f &<-L p> mMoV xS IS) YXv"'nf,Lzkd|˼x2~5بї^2adW$ЕuD\bwY V[K2#ɇEwͥge f7?6vnQ§†X ]N5N8EtւDNN[LSko~72ۼޔ|*8)"v/1 dˀ}JW%Sڞx, 2oa M3T9tA7|; RьqYof/Zi4{7 .Pbŀ$}ޛU&1Gh=LB0ٛԯNJu0Q|v00yX<8NW#0n6i؟ž@a6m/7 ۞XOfZ_Âܤw Ts葫E8+=ȎN0iqH2yT_$ҽEqcq*{n7ϛ61(;u=Y{&k=/7SίTh~u-ϑٮT@?no#2_xȁB0Ԧ(&Kȃ^ }|b!{s kKv=BmCy>&m +1~G3r3an&*̍q3w<8d//vb7rum\HoMC3ܜ=e>cUoVqBOF9A_snq赸>>Qs]_w2x*(K-׼y+l8GA%G ae*Y*PTtvRf>`ؿU~ "=\z#Zx2~m P3]>/gp^{hͮ a|3֙M%8u HÞTntJvQ 25)>vx~s0`B/Z,k $=H@g?RhM:_\1PdgGϩ{3:ȻxK*R"8˵0#'B1G)/?*- RSi@_W?1qSB4Eޡ8T 17u1;^k^dRf 0?\ghBlr&(% &R@6D$#O6#37cX =oLβja0I3hqR!xn'ڼ)Tp.b`zeJڡ['$O=>5G+Ͽ*}/3>3Vvv=Lf1+#Pt鱨yUf&{hk;G З؊q29‡!GǍڍN9Pk7ʵT-= l;`z Ѻ*j{xP:2 T, NmoLX @@T>| @Rš"@JObr=, U n@ "-ثjľ 6L\,/BVYs)v|[P_. ʡcBGq)rz ?!+Q:Byikx>mang%CTEw={d1?!}^񺆏FW$V_Qt(՗e 1'SL{TSԖԴ>S?dAx\Kek`%lJ:LJI,ޙU>@:V0"M8 @{,ǾӶRƗZ널VbΰDoYT0.F JļŜ7E>Uzz #H=VJۭ`v,͈n7:> -+㔌lN7ܳu:׮U!:Bgu6 : fg^mT=D.u#yG 8Pl,uõcX_1tgƎuc`U=1W&l~=*\ '~xggCGRG}HDQOxh<"q'D Uar2 `f.7YR.ḱ׍aάҵ| T]݅Uaj^4 Ӈ.HĞ uc9ICKb#[M(kk*)Nd6/exEөSZ0ȣAYZ>uvE.s=b]H-4&J (d 0vPtaqOjٕ/ryg~W'=.{8܍̃N [ʑBuc.BVnVG8xEJ+2ge}&4 6.>?3R/Z@'YYw.xبSd@JF8=LӘh<%, _?Aέg{xיZl_MY =3Z0dfP9d,˜L`Dd ʏy9.3m[iM)SG9:gQP%%%u|;TG'^]9ttKfϮLZ?$N& UMOFNO}G8k9|~!W C7#\Хi`l_NhS#Gmꔚm'yc,K3r)T]yLXw͵"♺zHT>GY)H`6Dp㣥aKRǽH@Db#])% g*\0k FȃS|_r}_0>x{Rfow!D<thG*o}zy}X&G4fLINk%=nUg70"mŘy/ /E}DzOA)h i.vV&:(C x΍>mꪢ`<6'Ǩ/LA(h,KqSD }Z{ӯ®QՉL(CX[ TB17ӺtJjroxp||)oJZ͎F&| ؙ.5hn6Taќ1CBRS$0#4_nE܄= bm![@MgWSl&fٙŐ^US2%O}'t=4lX$ִD=oKs2lD/؍{)S]jnڵ=h3NGWƟD7 ˿qmaeߝ^w~:V?mQ:BԘtOCX_ٟ/ޜ5'ܑdu|o/d=hN&:$"$C=dvX˜tP|un6.447m ]!b!xi-yiqޏ|#5^mOa(Y,)q׺4f VPJ? JǜYp;vj,6ˊ<W@՚ :K|#Y<O'(,5i P Mr2vD+U`+., (}%X$,J_KP\{U;;OSlJ,iABG3 rU(([\?&d.޺:;5@o]P٘0%}|/McPf#ZǛnC݀Ci`VԒsl?d}c6qwnۻtщ8%3qI Q炃NInt5gRά`y9!cK%D\`Q2g.sAn[up}DTd6'QWyİ{ num!34.n; rG³%1Cqc=9- Sp!r2@ 2:9Ц/"M5xC=#R~]3tVQ.@2ez[0?7)/ [l޵bzk$˃mM͟x$:e Uri n8vEu|_,32p q ,p O:x0 7*!QQިO؋ 4k0ծ'9 -M~D8/k+utRch3A&ˬKk I p(QD\4-Iz` ť*X\sl/3Gi *f,M4謞6*㩖2Ԭ:6C 1D9"3&6ü43t1Е$`͕gX/pMR͈V̤ܼ߮&4#sô(8O8gKj#>CRz\psEC IgCFR=(K``ߘ8j?= fiSl>&'4G7aWVP˃St9@FD7:$w2@`8'0(u]xsI汫=Va9,f-`y@bW&`\5f"BokH hSE}_iW=A(%©Sbnzu1jWT,ÄfCgIh=a3?hs)J\6BC'c>嚌{auݓT}صO}E?Q9_7nW9t7H||ퟤb*&^vg:oʑpMs%bλ joUN%byɛgۮi2lKm܂ݾ1oes3-Go(rW0y(zOdѰA$γ>^@&Q*)1Bx4R,ȻA>3Quk+aC (pEO 堛5{U E''_t_!]b|iy3 ȝ^^61\di\6ЙFhPi+ @ jKڋ>q7T{Σ#3^3p[; p/L9BbP4Zp+b٤Ʊw4iˆŷ"uOv+r-vi: vtPwG}~QYJ'O<;o sK_^΍`p=Y異/:ns|ρ*y&P[ƺ/iW|vN@+c/3-L`_X3024Y1Mv3R645kjzsG_t~{hN;-D|<>=ќ9QwNJ\#Ogo `4C%N|;e0S/ z߸J (=`7~E0d9_hԌO4pĕ'/vARɠ?GGS踘 GYʐ<1s-0DR~?;'wWD"Ec ;lF$[ڈt Í \qOPgD۾S@ &x#>hUV#g"v~>9.ӣJȺE^|^|?Ԣ؟E@@<XX_SJ5^rU]c~uyO73tֽR`w\"bKsE2=.6#p6-a>Y#[?g/\Kf|!cbgb?S1,#ce3abQ mrFޥE^ݩ]\=<)\iz(7KkRԖ8xvs z?øGk8ݤ̄Q drk> ;tw&SH.[?j* tyd4w oj[qy<8E۵hX ;.bGNgl3iR(a"%Hѱj >HγqOj!j6huuu ?OFIt_'_n_NjY1'Y0%67ޘh;͓zce ͞O5k1. ;>uo_qi6e0i]Tf!}h.*jLv/mKH4]FGY'^R򉿯][kbgvRNv ?};*~& nt^BJffΗU5 W͖[}"&0Bϐmuew:\-[eu8ds8P"ޙ6=2LeI0 K3M I#Xp=-ĭ$kaWC F%Lhe6!tUPƌf&S.0TCdžuLM "LyM@dxoU cK$_vH#ǽ8D nWl3APnsOR;tb֑Y}Vr+^ M1N`4Y*#z;Ty_<<=ty׽zBN!g-L~^x⹖zNE}9f՞>t9z'?ћ҂gR:0T +U2t M*&CA>焃90h.Ym =v<:ږ<.`@:(uEE6: -6-0dA)mJȨWU֩eϥi$!JHBs*O|X,  :#wTs%|Qz1TE[zF5Q3xT43hխ]I!2)ٚ\Qu&Oz",<|*ew`|8c@f<[KQGAgR}T~ךC`șvlqhXS-!#RbYتF*G|=^x]Lᮻ ǰ\b,S6~{&؊@91'_)"Z+MeH y owaGGTU.ڰXSCm΂t mġKֹYV7GҚ%6t8SFoXt)JziW1`Zu][ N{6(km' Ȉ]^VD˞! ؁BeƓ\┽_<X"~+g8bj/uWLF1^hBpx NE1b,FZ/d@.osywwQh#nLH,7h>iL؋+!?|y+F熜QaGzN4w|_zf{p+~JnYf^q5xlqLXݪ 5w cr٤׏}L]t5f~Fƭ_r_s73o+ɷq f-M#+vܟS_yC4}WX ۰y~/%0Inb`?cЗy]5m ] {wח2}eyzx4w_YhK?~?'ȤvS UN7(z9ϊ ࣏fC'Ä8܎PDuO n{q8R'Uh+챩j)@/g zھsn@bf} Zqn(H~\ce +|6Fsk4)h<ܫz,b;ry\e|(eT[J mQj"GѦG1+\QLXҖI]Ly3es.=8ް.u+WE=nEٞ= ˞\ϪPKmTIIz R.K[%(vc*}gu SaiW#tkNp)<. ЀlU9X"PXwOGS,҅YK@~C,aXg~ш[1^"⽤,CX';®3$`F0%7zotIxxT!ѱkİʄ:MGOhj&dmW k4[ѷ?($BQ:&/;{h ua8?Cp@gueꀋʷ2s oׁG5C F|d35<[싡е3B> r"V)wE&`{$S?f1.`{s=qk15pJ}z{:W렽EIjAMp.rDrOv&^&o-6hO9X٩ `6$s0K^˗< Oߦr= 43PWg: D*skn%ص:D,AnZHr@|^' _ec}Y*܊oAr.}fM ,=ǟd[ea.JK0mX,S/(q=`&M)I2 NYaO4I\&\R/ 2bkaYv,"q=W!"{O9x~j6=ll$m_C\,^Rg'1HM>f%8^Uп ׈L?I% X\p2683톢Pw%aiP(wQ87n4(kn  QJv p~Yl<:M< bD CH ĥsJHCRta@``̋;i0xj +&F/d,큍u{u~f_IP{)1k$GBT mv5_"-?֔>/ڐNNsIJ4@wbh" N TRi3rg@MTԧ;G!mSGbx*lnח[]M7??n άo/|e-igֵ/__7fzw >{~ݤD<9l~0<~>ӌZI ec5`u8 Dx_[R6.~ ~O]RVRr$m1 o7?޿z'QgٝƇ/.Е,uC|="O{G]tNQݮ;+PKq#w] X[6R٥frS筹-;1:x5:U{HtjZn bdepmY/1+V v N1n҆Ij +O.XHQ1M?KLfn xQv6jAsb.潤«Hm{qк t /ˊ=$-sˈ@boz'ٮw#_:A3,:!NO)wjlJ-pJ 'wpE,jm|  G5f>טI4F&VF4EVNjrz2u7[xwTL{bO1s{9c5ͣxʮeo#rM5tQ>jGd/-fɰxV` BF˗̊>g+`Lb&&1_=VIxiN(X&bL&bj%B.3B"2uu(:"8G$cʵwm2Z>]sǩI(pMB,'Tw TGs]]m)6T]Tysuc޴%@V)Iӹfjbϔi8Ff̦9Ӣ_4W Dgaҿ{g) 3]:.h` q'aVC6k(t_ŁtVS>`fN?<ସ[a@\y\H0UBW*A9zJ6 "#9ON`)KcX«E|-ؓ\4sJJ/֍DS4\ݱdjˣu"`a'f>4cљSკcɑQdVr1\s2k*(.ގXF:n)9bëw?_[N#6k~78D?9J*FG`6 Y6Vn0| uitD DĹՒD(XH;ܠvz Ə"vs _J-;9tYi3wW}i^ICWۅWC[q3oXHU}o^|"%L@Hy'Z^3v FX}24Ŏ}/ڍb [__'=vrqv0v-Hgoh^}CKR'|h>|ћ` i0W`_X]HiݎoZj,9SWhcN(mts&byJWwr^C%]p sRp>7Kz7p@@}av/>urҝ~_zu!<&}I1VZs㒤 u93}&44/e^$;ז7F CaŜej쏠)E0~r1ϵ>xN3}O+U%1__H=5;E6~W}auz^S@qWeTtg~8[LL)Z=OBt v?;bj$yO IBCN;ꗑTELό*bq^ &t41hGOԎE͚g^ y"}%OSdʴāS2pZy hx `&f:O1?^t$Tr (?g3 $Ņ%蕵jFk1}'F\pOoLѸ$al~ ~ >u-\ǯ+.hUPj,O0EʋL$"S @$-5ޥϽq=JHO⋇z@1>@%:,v`˱)gqpەEƺt'#LMlew}"]4-R c4dl: Y,AJ |#}[0wi҇@+yg["aOlU-J`ܹMS3 +d= jb~h}p]^;՟0+7+0$H JD(/zٔq!+D*`7#{$$`1]zc@1-v[MjpUj2hi OSdϬd# >i:]l9?̡Lf!W/w bx|x !-IK$Z>/c ?](%|hG{^؊cKԶՍx +gN"݉N2/ nĀ)[CAL_S ݤ ~9Y*=vRgp9oO;ω@wk:@ ¯}?![ym۷jk bQAWԑ"eh0 ;/ʲ`ډ?A kaա/ >It,`gJ{%Оb ʅt?)*Lfbp֌r^:8jv]*Jb@;\2*棨 glsjyvoCuc,`L2ùKo巜Xb|QWEfQZٿ3s.iKdY <3ёu t>m%:䆛&錷.AV#fs3˿t Xt9/s;ЂlFjf3%HU?xT;"\O1e/рP9]BvIⱲEZ!6(^Z]$hP>2NN;gP4Թ ;kbN _ncwfI,Xw;js먜JNDfHT:1^\pWS]E)j$>::6θ%sZcTZpy>gRgCܜWCI䂶RJ'7>òtł!$n(ˣ3&֧6M:1s6c+1Qw8qءt=Y* Uɗ &Oܽ%NݲM$N6c]p/΂ˊ$:*=jm$C⽔Y ou6ጅNA%hMR1!< h$&?2^ݪ܆7<\_.[n9.EVpLuՒ|9 Os -)"Eǃ@E2]@0x<4UJ35Uc?lfd'1>nГP 2v8,cCv)s eQW. MNpѓy\bzx p6/]fyz_%d9Xs5K1q)Ɏƃ6 a7]IWcO$@oB\|Ah'i~yn#X\&YUTǬltĂ>A?2T]{<OPh ?ǢR)N`ٸ "sdۀ[::B 29iǕؾh~.렛$jЀ:Z_O= ւ{ ZYaE9(qb!2>w^ TG=Z'N0R4Yju A+)LK~yԽ`H,rI 04EՀ $ 9@Ww}Fh ~ɤ +epV״)\zd2m`F9k˄H&1=Jyv_K9 ;fU]}7;;]}qR( 8w-굛&!c@RyS$Hy@v4z0Bx~MoD90{C{3(y yB<Ӣ&ITF#-uQ)BIzA \Ir(E}or8\q/2Q4刾0v|nW7Vp~F#.o{r(B#za?>*0'?HTP) x[ /GMC0~,vߢ>L:-Y@쌴+氻^^j{AGFQIg [aRAia=4RA_%fwcyj4Ё @!fVZAq2H (ФY}diQr[tv"5MϨaWJ̐sQ\[lYEƳ8#0!iqgLiTB棵&K-B^pb`ғ iMzewˎT:'&]4xoYYK.R.kQ_]ǣ9_ӷ@JBwh1,(XO3H`)Î][#00n[Srn~ GRԚy|/֙m#5W/}ע yKZY[w'FJ8 S:8ЕZڒd:JݓyMo_t{&oUEen9erJ=QkDA2*Iv6T?s6w>LIJgiUAnի3BDFjlCp=,bX4KR4 ^$H2Qhr$%tQXotn&"Bf{04WU2ӆ @Fk$q-+̃h 0Z+7*:{<bHN, S$`Xi!FNx A(NEYib.nV[vPjueEr۞ u]\xAJz!z7w΋ ΩM>xo#ޏ\`7jW[Dzjػ?߽э,KRA=4W2K"ʹOeȴ &i`ڀB9O3&_+PKŨv'@m,\6o]t}<%Vstq?'V^>ڗzgI32@}mSƴr,I%wNgz~4ݎ_rBZ鉙ꉎ!W}s`P'[s. ;N N*+Y#:|pY.( Cd";z٧nEksŐϦ6z~:`TFI|(NSY= x{drQXR>axF>SWvOy:\XYcVs}{"aW  MUddFaY Z]V8mns 5!'Njiզ K=`Q nͯ2u6??QXmoACSkщ"ڧ"Sp6M2hgjxLq9K @_0֩#[|mFFo>oO#bh,MGFhXCXkj%"nl8g54+2\v"z1ab9V%gmBp͗؂* st(M@#yVr?=q41{^ D,;bݵe[ KLljgDT}/o64WhCر`ktL󸽭" n=Z U?1 Hh`yPÅvYX?}v% (XJ \DC wO/{1<0^a.,`@}~P#b{CBOXK]늖HE^IHGV3% El $xG9r~:F"XTD<  )|j|-Q0oeM<}:7V} v[t:|u@>Bb !qfN*B-5{4+dO0RN,*)%5 y .":KF;"@eR. FCANAWNz 9 vm ,*dNZ~P=нr 3$t57NjXv><[LgLX> G Ҽɐ!'a. zc/6 } ʤ[c\-Dct$bŹ/7W[wc4 J,(楝3 J\3U7 zۦB52럽ڞYJ WEG״PH~"Q+34..̿^Qz#o R-q蒮x\&!W’cܙ?$,4J'-?$ao95vU0vݠ:<.qeWx\F58ĭGhʡAoLjiŸ9ZkaK3Gdk =b8.jVVTѨbq[6uz>Cc6ˆٰt}r%Ǩ cHIiS_bS_4b5.ΨQ$30r: 7|[qH\(E!&dMR%p/aj_ N/=`aẌ16L48`6=ܟOi0b:`EnJۊo~ɿW OwZnjٱ=Y7׾Zmj._C(\yu^ hY0-jf>pIq$Sd9y+ދV؉)q%5߉KzDv[xY-=(P. ;r`_RÄ$\҃W|FSQ-U[hh/LݓLxҔk1pnR}h2AG_/ 7_ 1kEzr*#mDO9l>5OZm8Val_)*mk#LΘ@XҡbxqV7YXM?*1hE zuC P!.s~TpwOƙrߐny4Qft8";l ԇs%BCH>M{HI PȬ8N.Ps ~u>~5{L☐>͙:\ݫC5TVE~Bx>Fvtk' ̓bX`* IXKE{R AO\դyA̽QDZݮ{ Kmajruo867H@p R :Uzʠnl:x+}shxV}Qw `lj1 q&dd `gDCO07 K<4`Tt֭:z1(eطv԰kzN{~m'>q slPXH(Q:g+`UR˳4'" ʽ#&@llqd,s=k\(zڒ&Ik}Ql.91dr/Ҙth޶r r'Ö\0T+>>^Em b a$8NL`kt{\Pɣ>һ}KULE͡Z@,ĉƮdȡ T}2RY}Zz$cL7",-8U` ³e)̐4eq0 D,򫬎zޜڮFi(Aep9o.%A3Pq8o4vkBX6qUy+}N4\D4>V >/*>F4iL--2 DE1lU0xL}[1M;/Y]f;DYl/GJ^_`ϼ{cCϙmɿ 1,ԪL=wc_ٛFjZC7-"1Qaw{\qؗ}Xǿ'pu0uM}|vodW=ӿ??=,·Se)1n^fI_< ?:Ѭ?WA4#ؑhkA% %6qD\ӌ>S  2c"18*m֥9ohN{.(yYO_&a8,VA,1-a{`~߬8.^ m >LGg'2j,.w!8ANǟLe,(JtJUj٩:,nZf5adĜ aL{u>[|!11 Ng1_H [Dž̇nT[S)kg+Z_8w55-|W2ط Ȫצ䥈|sz)3FVZrɇÚZ ٶȋA{ɇ÷vP]Z3yR)@ed&ǞֶA&~ ?6bjb;4)bThp^:Psٺ6=±gȟt*muws`xR| )n6JXhjZ x($҇#5ߘ 1 Njr댒Ӊ~?77<$hiӾؖ\hW_pxНۣ:sLE 2BaTɹ0L퉑r p}2_ַnۄѷқ @$I%Y|b:Zy((t27GKsu%Eiy(8\e" _Gc2E`G-܄$u-φa{ۤWF''bDiᤊރXT%cKV, j8N]U+(DO2:??8a#O")vxZ'MG޾Y csg@Ư?~v@9Hl=ᢥIp3ۺ蘜ur[] b=u~ܯ;T4"GUXZF0ڨ~^>*)U{+y|Ҫl[7С;p; hXm7 xhp|`?ynD( &7KsZVhDrOVxT[nMYB92 T+(~<ᵲ2C"s _wD>ZBTmq?:vSW?w3t4ŵ]?Q+R(vpCА7yXjإJk $J6OkJ/d8W x;ڃ@Vk~hq{y6ܯM?, ϑ=9q0 ÎC2d6ACr6vb׹/:%r(Ywqvq]}v'Ǖ*SoA2h b$ڮ=1Y˚82'),sU}0n+jؙQGf2>8L>Y ƞYQ&38X]ևq/ù.'J0f:G.̑y&̤5" 7bv+;H"PXt~V*7 c,af:bjHpĔڮѡz˱~IKGaOŕk ]Ki*K&>{x@Oi_*N>pöʏi `_] 7.sFmnS=vUЊ&E,>j K `6q 9X:Kv&`ҀSE(nc*cm `(s8]}ce]C؁.J,uGgnj l<(P9X 4F,YZiu8w/=S.Nꇐ5 RY]5dt43΋2|IoǾ(; ~3M=Zv>#e[n$t:^Za/tjS}R*vxy'v4 $YnoWgl.!1]sў=B{J]M؅$,.e$ igv+ fx洠0؂9K _(M%adi@J&Ry-ǘ/f%m (p-yn`۹轞`amﰷ^Swͳ6XE˳OEbyMei;sVL.a2jv% :5!; 7IjlmYAY; EЫdۚY1a1ӓxBj9,^ h>X &-R=$L"Oʼf10ğo-6K8f>"jAکz08hvhO}tḆ-G-t9;?x 1B @g,Keq)L;^WUyy1g>:{|}B4ڍb63@Phԍ9,϶FrOsy9Y':5MTbWE-uPo}|)pMhln2;V^FO?uegCtLBS?+ҍ"3{X)v%<_hp~=æ50sd;@J^SiPdK?]N֮~zj㋥gnUuK7ž^AמO] 9;}ǟ t>HCыJo{\8$M<|Q}/< n'IA`'Mbנ {̃A]ڇ+[@B, /caThmO4}^g= _򁲛{fӂ;x?3He̸=,f}|"!"#:K~W6GuqK(*uA?M}QatvRzdE;;rIx6Fe2k] Q[Cž&PvA9dwp5vK"Pیn6eav);rGqȨ㕩wSg fҾ>;oF=ܕוEN2[KN'oo>!vrYBn|߷ Nފ::JmIcגQD~r h\P I|g'ϼ̻RR E8˔9; eMj0<0=ea;9geѹPAXxk;%+HMR5:%|_K+k v!S7j9mG Z,C *  TILyxzG\"yzp&m-m|3A 8F9-MC@%~ jӖÁ/'e)gѬ=9 c}RVՋfZ=o )+?$ꣾYkдåV?R.6BDE2x$|:;a'C{ HƔhb$o8ƵTP@,^ݳ4<"j`5YNl`}_}"mž38<5S0Pq |L FӡCPa.kYnj%`Ke2H63% +\rO@(,rfS=bZrwyR^Fmr:$T6vAD(&44PԣrZ=9 Lϩk;` UMPʭ狑 9b#&^Ϧ{}tN-S藱hZE[C_ 'J+F h-bXU?& ;@Yĕ2WZf@樺{&0QTh;! ڛP%´WK$v}vrbeA0 MbӢ$Snط1gZuҙ,{:nM6[/5= ;8s?CAΠbJۺJ8lHY-BqP=;513DxI--QCa ^&(tiмk6c#c ' KӖ~ ap9\]A9Zٳpp9Q5RlQe\PAHQp縍nFxhb0NhLhtߪc`vWAbh'd'+6m6j=<8Z\,ݨuiTͼ%AsAF+H0]˚ˎvDK->UXYABJȩh{L%3աreqi_k*8-|#Q]vqt+J=7v:|*MG,娖yApP O47kW wo1Cgmɋ3vó1:PW'8ىȜcU.c2 )f[ঠXBOڲ?8"i4?evk[)~U/ 揵َi>~[fT~>vŁw`/* Q̹Nm#'}(m}Ǟ':!H> uvΠ-sX V=jdr :PW >/IXʏM`S:gXٹ>=5%u>YVFAc* *@\h6"sy0V3o1Wم௢/T]ϙ+uAAB2<dsFvFY'0ǒOCblYѕ98씩q}ϿIG7|[R Yyֳ>-SZ+ R p(4+uXLʿzTZ`Bcu"9 JQ@߈gk=dX |uຘ5Z+͖totsB2#LggKŏ~\{kpdhDy̻ϼ3d 1/ؠ=|6]G{VIrU /BkgGo^ כ[M_yr=)WDCD-Y3TB2ۜ$Tk& ȑ %`54Xu 6 pW15866u0/+6چůѦ#hQ `&%i&n05 +Zq oخgYⷚd7'#3UM0==u;۬g{LQOD9ܤ`RW'2P8*qjc` #ZH@1gcl eSK~˚㱇aeXoUכeB1lQ5bD1+QY 7 =gHQt]j/sM}hMu(ǍFg~1]7&|-| y@!E54gUcv /;UX?nʎ w9PO)Y2Ofwf5ʤ,y> |¦51k?7f# Ӝ-z3!:ϫi{D%VjsBNϋVeƲT4T%7Aik_v  p{0\``m2z~?16jp|䞢X+>c?+S-2'?G)?ր_ko$o5 O|.}8Y{bAlo+~]ID4jM̥-$Ay}v`cnȨ'-{b !${ ^2q͢ǀlKB{xݹBf*kI2bYZ\60iJSg 70 xzJqίFh.xmvu񄒐.Vaa~{-¾1t/f3S0{9^3h=,ާ=&u1"qUq}sџN̕j*33/$CT{E)\N^ M0ҳk/B,R&l4_@ ]ije -%9X-vx8rav7.)U An ugWKˮ |s% J(ne&޻6"C:lT(©U P8HmbFED8ҙ3{p\uh۾lå3qؘRo&;sH&nq:Iog֡@imR=~^O;,1Af/;Įd!Օ)(؄0=ŏm Be6Z4f;x sp}4ӵ8#{f'KXQ)ZT괐_?}?@RT4^P6Nu mR14O֒BhQ ]j\bDQ1~i0;G_-̓r+бH-u6[e#[uz&,mAnK6j$'vrQ_NK O&@ >w('E'',$Es3[;>5 t}íf♿gk 6$l%Ι MLx?0s4 v ״'U?AMQ5ݓ3ٵcwOS%د!AgHbtV1.zת♹p/#k:Dg}==۬ ?8w̐!B B:U1!ZOGqU=]0AIJdqSwF^g ܘc(N)JcN:>[)؏Zz)V,5i_'u$/qmը#lY8hfOLu)K2P#ߝvg\jH/} sh)}z:B[}w+< ZPh^i#3JD^U;ePM!*wmowpw3|ޗUuWLFl7zو [ it0MpEfroZ?#OhY'А "5R~兙 Y lǂ-` Ϥ|J_pfܼp|N.@W7 _+;%M`I;D}qR:f۶"JY"x@'S+RCb7(w&Ք ;?p~9^DS7T_ f%I'~j3kXƙ %>RfWxTn}jTaʮb[_xs*r\@+_žmN,t{&džнpꋏ{M ɘmݥM? ;͒qđ1sYUv0yg)GL:zc#GnVpA ȼ0ԛ8ΪǢzlb#xF7=TBAlJ`8jGǗzT3Rid{gߓn*s suz2B(.W5?J:1+7nȞ'/€P-֋R׾vV34aDUy{8%=ɹ0-2𹫑שlu?S_oOBbG{f?"WԑrN&:;”BqELXTdQ}ijr\s$j]Ug aY 1fxA-ѤK JDb*nuo6-a{yԥcK.tLs|؛u-#=8ѓ'RWbȐDƀ!dO+íM.)X6|-y2Z{'<u?._CzcЫb~ DfVǨ=r(4"=Z3Wg.\&M]5l^1A@{tX;7Fټ5*ԣbz5qbxK;w2.}.9 |,nĚz֕PP1XcOj[I)foݱm/20i R8'؈zA3T]y?ى)H<\Ǹs74&7D&É⼈~@Z_d3\I[]}nLMۇee|RMvLJ ȦQ~,زAMvttbOC9?u3Q 7Tlvzi,rڪ_~Qt6,z__{6pԹ@ `훋eP( &2DCȎPi_Ywz+y:U"Mrrc[]j/_msx %,Eӳ$pW6&  v͂C˱S#2Y̭ɎVHf*@z$Drn/XLJZVK}@* "hKΉV*%ɂ.gQewnG1Olrm)E>U#>ާy4 )7װ9p/c3ȺތP[Xf N|;vh59Z 煰}eNd;Uݖ.hoJ,=@Wڞ?K(Muȃ>k.@`غ)^u|csJ+Ac촰CҔ;b]$"7&e;ے{cb/Pv yfwz1fUJ1ix.XdP)iČ duKHBKҩkfju8mɳF-/x/u屃wN'_oWكޖl:w+ }Og'6aǀL+Xl84(mc"N:^к0|Ml?$3|g?w 7 㧜VXȗ~瓏n3 VZ|wدSȲ4izaY7"^|XUtɞGjʌ̤nw6~1|brv2=]yT~RiReoZӽ\TUe BL߿Tz&L8bơZׅGrWji;4ˉO6{r[C<Ecid|{~tUaXbXr-b=)yf4+lbK9So}:^R?74xkdM~Oԙw CKFo y̫Pp~>F:H!bk<44+V}urU#v⛼Ń6fgR\lMNF_a S 'fvkTVs3 K$f"0רWjwv9f> nVp/>_Ft#Sg&\8ץyCiP| 3nA _a ſe 7 5m0v05FZ2f0LJLNg^gk W@/ՠD`$xg3]`E pa_^籏;dΣsg@b@BUfA0~Oj祩5[|՝m}vl(|??A޸>j\,y_pd^,Hzphn$e D@6P~eK05 R/by` zX& #fEt[]+Eϊ]ޤRn{8g+kiuO m@h ǮO}WРCj7E\ PIT RG[`vcaTTzk^\w^XxAca$9ȭSd` 2{;4321t^ р/8LdD;et0O}E@k@yl/I! qpPs!!}3f93{0/=fQ(& 1^^;̶lrvhjTtPZܣ*2gI\CGJA.%1qU$$M/~۾tUڣ"}:wzPlr&'lشZBq,?ž Khbt/μ>X;ee?@ъҐt+!I`նT&ב[Vy9ϻ+fۻ[Kq-qqJ8w/ :Hr9 rq҃ -жߖztq1jcȟ NW㺮6FO~CϸqgŚ%6ޭM<79y_pf?| kR(Z,MX~SBL%-X@`6ótOУhC5Wy8P-O‹UӤۓLGC'ä`Sh̕鯛vvt%mJ,|x-uPۣv>0z YTTykXQ@i?Ys.0/ύ1GAӓ[=s`|Sҝ#9/T{T(l`k%wڼxe%K+cTF5W9w@|Ad_ ;]6lyE/؉yֻ4v/Qpy!2x>VsSN̂n>emX_Xpq`/g7ucIA7,__B-q03?z5-8ZÊ帗^C_\`Hlsd#Sqj0ZSmp֜ UTeàxvbD5cLW?|cB3AB6Ejx>-Nvy ˮ&bAOJP|L咕$\ mJ`$ClkāFQ5n1^ ,2֦&/੆ldR-Ǹe))At 7\6SHϗ3I i(_{&„8E 4 $_}_P_St4錰bP̘|&L b9?J܇g3IIRzVTgeZsn!fnRtD!} c׃>eUNʝRUwLh1 Л7 }~1\4k?6d3/FZ``>AT>ZѴ[Z;OQG\F.Iu&r玦Q@V}iυBˁs]#{e( _ 㔛3;&= O>HΪ_^@kM7PaVq>OxI5n[*DQG~i/m1n(hٙ`-/vWݞ|lGe|ט[%ZD{QBn_v3([· > \X IJf9p)RjZKc۝йȡjPߘJTyy)uZ2KqX,puԠ_h2Y35Uk'ܩ2 G.*v I/3c=1UbJclIs OkL!_i4ꟾ}u ׿+Q!VщmY#lBfPŗ? R\9  dFUge 4eHZf"S~ ;ЌԙY6Om\>/t" '} _A;i3j$ [8'H\l& ھF[U447}кyt@_M?$pϵp^nn^U )*{>s;wթ5h(`~kRMVkm2C}͆:q:<ص>xZy s w=24x W3xC,l"W)BoDJwVt[51S\}Tg,ү9;ʹL3?j!WHSPP⽐$ z\C~ p؏Wܷy1OZ=XC L(G;Uۋ\AVZ,OG7*[l̙+'>YA'@tzirWBaVW2̥6p#R* Xxh;n?.'\d^"UJÒ7_qB dHS]M?$#GG|ׁT^^t%O?]A7i,Le 8c IJFKMӿR=qv#3brt/>IzѦ6G PG/m><gtd.cµ̃X- X]>:}P4q x\ONAd `{n;ƣ}cyk5;!vy8'RG=vsA!Q(fko_n-ekm|ןoo&+(˳RΥyrAׂ2ì56oNeH統-Lq!x}ym^SmZGuI!sz4>LҼgKOj$Rʢ# pFˇ!")>eʻf5ީrAݣѐU26&T?8 8ބ^n'JAgf=ӭ?> @u/S]3VotߧiׇɫU˩kܚXq1`)avӹRwgc{GNazg-x/ ǻ533.wlXfv*0:Ҟdwcy6\H(ljk$txZYez{cѿZ/anmNQ{`I*!w9ݺ9g,ILea8?kxك /Bu(Z3 kP *F3-Hu$fJ08U2%zI➡Ŧt УWW[Oar5\QtESb"||e\+e ^"Cc؋'0Ŷ+:$F TH* *CϘp{(W%ıԐl&S[B0@.o).F_֧W: u`ST|łd i A4=e,K1I#8zE=ѷ V~ؗN'H0̺b b }YpcdR_u˺yܛ!KZ7scL#"(6f=8\G=Yr%:Z|[ZEulE c/c̹ {Oj[GWmoaYUWşSkYi TOGzr{gvbIM8Vǿe7;.Wjx\GkHqׯ1IA~&?h.qk>drҨ Zo5j~~}"#@<4ݻ^򋛗əyǟ+)w7W zz\ӊs;U?P K-: ¤^.Cfe;2f|m<>:^irAkO6sY-ݵBg^4v F2烝5z+$բ1gjx`-6L TV޸VhBg =u8.;Q7mOw`]R;mh 0Iph?ƛ۬V>Y+c4rKgAᬜ}ٿT 4BHeri 1 $z, (s>އDɚÞqp>&pV'T"+OmۻRV̸ B_~lK;XF{4$la1AwEp~0zѵ-_wo^:qSilȐon@Q╩cx˫}&pTy'(͘=S7r|s |C1 ~ aim[rQ&P_;>M + z5UJT?CV3"_VCT^gCu\DJЅE56ĥ4$kE#wpiAY! '`K(&g  :BV MMU,90TWTy>Q;OrsW:e'渧Gu+jps*`8tuZZ1nh,VS3[i;{ך={pڷV>{:U(@a">\cObU;tw9R9r8KVD"}Бz]qS9{e1_ g"Ud]F‹gcj?N`ywO}*^ͮEg2+_Pg X,5Y]/.I]UVwkP]j}/_"7&ж0rVӀdzi=j BuΏy0ܛNj.mx:eѝ[$wF `ZkeF>ANO_9 •zw*.*ŵyˏ>TON7'c8g ~.>KMA=l.GI_]' RO)4Ve9; l7 K*bodU™>ÚRXF9T}6^;mVl>赂K{dCfئiUۜr?A}"QR@ÓnA<3zx(_ʻ >ɫ4jK#4-%/kL^V#'dǟ؟rU \o0͢6!zhZt3+ډ 6U* 5Vx6"U_fP;.QYZM14d3P_ 2Ȩ8Gȁq43շLWhw9^™U=eP!IǛ4+:+>чF\}d#sPo`nj$0ڛ:Mv(ڪI(|l?Au4Z!ǫ3Icҳ]ޔt _dFiݞ`EĀž>FEYl1 c'y@Q{Q7`Ny @;OAT1nQל$оiF}xYu9mT~ +bRW'Ŧ$F=RqeCJ?;@xUYPuwJ˪=csv|]py[\: x۔jK)SqZuwVIK]3:}ڙu:3mnugqmt'S}R\-K+>q3J#0I|Hk &!q .֣ g)(.VO SIHagͨ1Uvf\7KЮ#bCA2Ωa6T8NdgI%sB/QgZ4ҋuћ}-~9(k8AIt2N7 S;J;C:CU=xY,ʻ%~θ,?l]0NOLTo$]t6vѵP&'w:e>&B2'oыkY9PrAqp}WҫTnP89юpu/Jצ6'봭^˧#?qc_[{D!K ~rdlgpqc)(鈖@Atn+_k _,\}{~ >iO> Kp5/ޥ;?t=rvGa΄k^iXb6MՄ,VIf] F*iO BvC~|T AQohlWz:9"9͌Dd*"ǭ|y U }k uώ=F-q>R'EH`lFL_?ځ=N6*ﵰ@!i9<=G"F%!# cLha yLtZRZNa^۴BwfsVCD JE:HONu2*B-|`uX@Ŗh1ӿ0uetr6iijE%|*L(uCB+g鋣hϺjTGROgs}xO <̨ڪDxN7g>:Hp^?FM’ `/V؟CŚ(rdՔl_-j/ϷPYa 5<Њpl)H Im}1VuՈ raH!h G;ӒXOMyr+8]ޫ+V9C3%K mC~]?L,:~zн6>bMB{kݣi"1{ӆW*KC/˫e *G5lt064k#ssm_"U,˜}՘'\3>+1=gڝonZj@wX>Aa7D!ê%,bڂPyW? Ŝ6S:zfLÑLy̕\ j@ӡVH/c`9 gOj75े 䕦Eer_l:`nXQwT)\۫yXqT{U|&$^`; "zÂ(5;CKI#pl XMpsw"z01Uk'RtjVۧS>PkZWm>Ro> ؗOh]9r-h(;S@_\ĭ1r Tr[KkBiVgyvcf~Nň[׼.jh3D/}]Sxb6 ]f^^Y腵EϏ;3Nfyu 2!Og_|x)5~g1o"7פQ978_jƳa7g:ݬ;7_&Z͠ %.: D!ֲиVnV,փef:\%\{, 48B+cފvo n{͡I@=Q >\'LÁk7$EQ7 ;2tp[ŢKAoqэ\(/h72Bߧ+V#K k:ܶHֺ:tk7@)'lXT@ƪ䐥)F1?" l)%oNs9-,euW4̺pr~RhLYq xhTXP7^!䘝0a@.j^.׶:\Ȣ_ $1_|ۦuFSOL9⨿ԋZ TjZl&m;09Јo{Zdg:H'MMhV$J*m6%40)1]hzX` Psh+{&Z+x]u9 ֹϫޒ4LgPu~+/∠[%-;NMQ=:\8ATKENqjGzfJ^/y_LNv3=5?ǬW 0zwF#+dQDUdϿ md[?ņy$ 8ݾXWqBBH ֪.%oywhKy5nKjqNv ?DtZ?{5hя%3BgP kᘎ=l œ\y/7J^7Czgk:}fBOԩ~vxp)U:8N̊'?mK 0w{{9;]#iTd Ix`x$}1Slcd ˒63~{9{z߽z}g’Lc僿p^?*#u0m~hj2[/p͠:_ L{dI.A[2_R͠ԐX?3BΞ1}[WsV;NŻe?D:$Dt 8GcH^۸_V^a7|67`v22(+1o|2o+ ٙsHx&h}ON:6'z2yI,S=lob,wλa=m1$<}گP?Yx#WXs,|@"ʠSi*!UztбtѵR\Њx~Ү5_+Ϣ|ᾄ/x 2}JT75gDn!9 [bܒh1M" Z%eҙ}9:/)0FҐi~~>2#׸;# Mh4 uMf xMz '^>.X8 ]j2е$=5ԑcmaqT(e+Y=A-{/k{ Ԉa0*9ys#bRU= , )Nl rn Mn8NI9p87'u(Y/aA zAd2W-w^Ey1`{(HkaGW|)ܪյ\/W$ZS:oo*)-µMn Ҧ -2F'K?1mN2fe'*3o. /:B^}و_hTύUqzt529)(AMǧm'-~ѥ?DŽW:RJ‡Pq5{ ^>bS+p앶N+ڄSqN5Y iUt5PXbV[an*QsBѢhijac"D%\hVedkRXJ͏δ\Oh (X!F`v>;׈́G3޹l5ILtcm,s ;CiG*]Cad$ևFϐ5:z}+/?j_3 xy)k츧N Wg_r ax\{v5I(f.szH‘)*7/Mpǟ?Tb"V}ʍvj*Ԟ^ TGdNq_(L7qVVs{C {a$chKLp(~qLF[R᳴L}RBBښ{ݪAϓ=rBe6E5[{؜^W\eG:f*’rS䉓zgA{^DX x~yg " +yLf˛/پNƑ-ל*5rB\AaIސT*_kBPg`̠+c6Gq=DqU\zpSGznRl8qI/f!e;?7#A,eG 8ytT8nnsa9DA$ensNz,-vV'wԥ]gNL FN~kovz#D ^+DaByrĺ~bVe];aOx"Lm{ր`.uh9A'Aлh$zwi~*Y3z> ͫF<Eҭxf7j& 5gw, ov/9aq% Od_7{Nް{qw]?jh`(&R<?[NF_в:ḌW`{2 o{^#?;#氢:k[{N +{TuǗ3TU>JR;E6~I@c:8yp' ~ b2XOLpC7cѭߚܾ ^\>ǻg젺z!G6}D_${|yo#\ 6sej$1墓*yZ;D$vy)Ƨ5>F 踫*B0X$qϫD6AbMS3rnwFhtxi=MDJJu xbèq0꙳bf.N%[@ :zi̖MYmZ(#V'u (E8sk$xK5}}1-`:{ $IkNuN"\Zr^;2WO.:7-ARBO׺kMDf(~X=]6so߫^F#-mℿ*rx@T_<6]ldҏ"ടDŽ!;R*zIDATxZx֓ߌLL6߶Bg ^zK;OR(e:t@_kqʮ+EawT#״DǼEOAɔ +R>?`<3!7|oaʢ[?ɗEjSvjU62#r,[@uUn=]#B 0dBF\Npc:3H4ݟTEJlhU.s H/ܟ 2 p^ĪtngrJ`ϴ[q0!O yX橾 gdPzPåз :6^6[Q; nC ZqXPr zd2aOG A^:TXF @U:zaMǐ9򄆡աN$>O=gymTXwja#W{n+=ɳoAoicx~/wlt;"ZEFĩ;ٍ=lﻚQ+Ẁ޿}Q <1;. U?Bލ?zk_c}'\K>_5MJU \ ?S'i°r˿Ş ?5yuVfG|O&8WUN|R䦻P}?StYJUd w:T_îԧٝ.̥2BE6e4dKZt)=ReZ &eLjo/Wu0Jء̣tZ;j}0[ԵW? Y;.D¡As9tZ@Zg#_D4 jۿ] p&< ,da}~%н6i~Ib+[}Q5_Ƚ-* S3E9&A|0Iɨ\?kZFR dFSH#+᧛An).Ev!VzC˓$ަ]p⻶mGc+^ɵ7c@W9RX)hᱍcϋaz:n?tktߜ֏Q m-9*~,T̈́jwM-i[͟!O8/smQ ifrJ:=ڜlLWxG?aq@JA:j^/Q>vJgIR%ޞi]YvUG*"BF{~$W ݮ/ҵx1IYNo2|_tU>/3!g S*.AL_Z5NQk14,CY1 LbYn|\NziX }X$LHs;m*ׂR1hEG7qsd^U%ľIPv~)x֩ZPɥ'HFVkn1w<_k3kIG Vћ-D_n22#v[ȋOoبjBy`9MnOO+oũoyú&uv WϩX]DBlS {*~{yNzNl'U'}9F2O4+ |e^O+DσMHMbZۈP@YeMrio,0"o5ыjGE vS;=ZxEk}!n^Ƣ1f&hC:keމ^QoyHƵ%Aw̹>;\ﶟUvȏP m+I)NNNO6p:+7.OA-BR;kh3ae)X$!.2Tknu˙$yb0|W2OQ]y]% 0Jp/аEDŽŗgNy(2~2m_3/a-So{U‚1֢5V_\#~/AmΎ :H6%.duor8~N ccNT"۱})uHoѥ~oO}z;԰ϡ9/e>p-z|/O!g8z<+!&?i"| 5yEB5}Έ 0A4kY,MZn ~'ֽ_Z)iٯ!rU++3>XMqhI୞H 3a/߷Kyld\MnpBC8o9CAh S4s.n#IײU3zljÇc <OX&idL?qCK\,CH ?^zoɜL˅q3U<3*veW_9_Hz@XXO6Id.)NxkAAsɦcd_st߫hZyVژ,fq>|W0osvqdNjlua%IBkk!Â|IQogL1e[Ad\ݴzYz{Ͽ 90 v49G'Gm`%q `%ܵh{n1 ;( cBA:a lƌ+͋)`ckAXÚ#H.M#@5nNdB|Λx#Zхi=pþc"%? ]G:[w*I'qœ,kSW)7b^t%fxQpa-H{Wsk}V`&@7nyb1pUtV%̐W/ܼ$ KyY{ & ~i/bp:p52ͼ}=ΩvJ_xHsthY{y_Á;A\C:j`qEL&o=6^8̬kRy_~,sHt n3qVsp43}_VTwh;c~rz-[9SOosFSh:! *%'uvQ p+l:L"lpWfb6`WOSk]Mk5ѬlH0xϛXcsE3z=SgQ lʺBh>[s#8#NB]4&|lQFjvyGUQ8' Hb[R! JAYv$4>Q4dSʷ.!=S/us6>?--V[?[pM([IQ-ⵃ2m Bt ?Ag L==d^D2c~<`+|bu/4@uqM-]v@e 8'neu<؏,MĢԮ0[NkP|6j@}ucqUPnq-E#ˍX7^$9ˬ9*s ~=shj0Ro8yHŰ1z]>c`^sTd EV5kb]-?iȈ,~26t{+Gɹ%GƜ/|},yG;_'5zZq֮̇D΁(jFQ`܉KMľteQg::6fl1t-%ļ̇cA_= ڣ!vyt Y|^2ߚ`osDWI(DN\;$;b m~w`.J*`2֡2 PևDLkd'QGܓ ] a_ez '0xR\b& |E` 2VoxKx6Jocps&0w9E~yk,875Gn\.jНZH(7'?|ws=-m_ n[TsetNM@ l~lq3ә:pGl 0ruZZ Ěu [D3{ctou&ȀAYBO7flk_a>we0_^]v(7 le[ػ @_WrΓol,z2O#YΌիs] PlKļ.ى^  Cbdsë.i/ıLȯ v7^w4pP!]V*}͗%r8B4'vo֥u}]!ɽu$߭@%hPeG͌̑j|Ҡ3'Ok(AxHj,lhD!hh2萑4 kF]!:e,9anԇOST13T/ҽ٦;=ij FX7ԔM8bK'PغI^l\AgbZ[嗱fcNbM)0ǚy2WVn_4dk}dz">[G4nsVsoȁ WkI5O/_>KqJBy77h`a(K;ӟT:"DeMؙ ygX|~a>Mt+!kugqIU`!ܓq<.1@&y걞˽:XTSLR)&X*u躱](Dp˱>xi -sUރ$lt2{rқ#g0{d}/QZ!r"M6</UܖNA9} ^ol1Ն+g nƒW^{.|Iw^[nB(K {>h! (?{ow6&gͅf!?x"lyZgrsBbt%@G9R'2hߚ~( GG^14zqn*P'O?]5QFHiKQLhBㄚN])_o)2@XzjخmrNZ@AW/H*Ј)q/z*p4ٹ¹%>p;缫DB)C_v@B{݈d(p>M#fgI` §[_LJL#Y\F?QZpbG/m[Cn33Tk]~y}l2RxTRad&4̮<5A- .pw,{ ힶExm)a_ XmۗFdlqkZoHϧt^ ϯ|iZ?=MwAln*v,sh?:<;CտWXxJ X#kGcደz .!m: 1zLw| 2S.bX9"]ZVBat+/mf|iHF;_Nć-p_ l]9ऻg p\o^ ˳Oz9z۰'WFMzՆG11l|5rb]b-#.lɭm|khJPh9/u >2wJـk\*^RǔOw ne$yuoF;{,uL՚{K+ɟ27DBUg 9uRBw0y,Vqwՠ.:5tX퍑6 `:n0x ǀ L)mG&v>n,Nd6J/yaU@_J JglĐ+$CqL6Pz.Q+~K6<_x<9"Y(A DaJ{<2wRܻWl>vٱpC<`kev=p)J8>cx #Q<>rY#mY>HZ-OǨkrn7{$RA5RȾX씎b7j#Kgq k)<;w±|syװC[r$m^Azw4x4 ^ᠨL$6am\* +-OQ;6KUY7,՟b̾οS, J;ΡG}d.ʑ Yz7VS&YfqbiWDE }CJyR|v 73E{DS &cIQo؆P@ +c*+q3f)a՗Md07ihjkR}ړf%ssNi3VtxEkE)KU~< .L ֻ]M!șT?b<\{'kq1uވ{jl>,V75;Z0A|J?N(ԁw⎿&n;4vv$*W5SH#dJ&EO(3][܁KH ;R' :["KXH@ʱ!`Z^њ,y!Vl_PDN9/dNGߊE$ \Vvflm]Mpa .(kFT,ч4 \yYQ믄_`-␢1'?;.ouZ_co\  v2ws.ǟO~4_}6"~y\y.5mꦗ$vzg,9IJXްh~Hks{ ߷7#?vͥV[j-Z|KP{Gu;O tjÏoc9t! >aih^.f{d=>)&.6&lcoWn XoR?WuˌՂu2W@qur0zt}?g'|>cv>,Yx՗=',\3*u؝ً K,P sJ1qhoOjQAlk?-'gh}HoJ5 mDuˆF0];v  XpW&V(k,v'\:i^-@#atqՇM3;W睲Pj1Z|=2g2-"2f9r땬z{iWyu_=j>8jP/'1zҟK/Ղb;k;Q^?,-={loq"/olr]Q*<6ޔ(6;VdDy@ { _9PsK&G(<s>;mqrY!ir +LB.k <MY0F-߫DySa7]q?p)Wk2r!3G.tFf̓X4kߴLD;`v4XF=Ⱦp]oWF<eK-ҡRH,{:J)k ry2ZK N殉KQoiV7#vTVj-yw Hc06ϱ@= _cZRnc -B\l[ , #gCmZc i ׭zPwyvQ3[i65>Hxb[FO[{宛_[yt&}%#^sDKS\6'C?u3 mx&af- <)Hҙ6fkxٕgprufpgaSdO虇d^8 hdGh]VxUb)My8"{Qkav]l6겑d"$AXRͨ )KXu7~,FܡruG3ÙRj3VG8 cGhi¶/>s @ኌӅwT^o~"AϒeלӚtח?%۹kY[M#,pW {Ve\t'uYq?]}5?>Lx, gσZ%+Df{2+ӆz'ꃿاm}#u7l{ٮ 0x =/-uyr&6!TlYǓ6Tqc.H3fRgur\#3 &TuATm<{~IBn9_\Dt;Ӈ'Qo[8_urFiM*`׹†Wv^2%T`|GqcaY.\: 1^90c~4"|驚ߤ<5t9/)yN'F[$<|Yr)bOȵ'lqb(KuL9o+<5[w|j?{tc׼,1rr%bJOO[5> l|2wA-89W!;VSpSըGxLV#MR ať:.`OP*[ %hZ1]vY= cF w*`aQcWï~5w[ [KFf F-&#q_GPdBvvxhGپ{f1keto'AaKfcֿ>:z2'~i?ԚLl}~0虆wW3g邸{wWdhL"j]Y?`,R8Ia-=IzT'*h1i2Iq- _ gmaDG|ÔCƦ*8Qp9`e=| YI@(t'|ZޮFciZglYAk57 AcbȻ8@"S6eq晡wSS:t}D= ` 0ISzi>aEdDM}2CZ@m56?0\'RN [3 #R>7w;,Uud䆉[cFgyUuXham8+Kн&Ei2-:}d$= KN,P@X#h}i)%w#_GtYn i%3+h!:s׮r؛ gDI$q'q殮g+/k#CjfX?r.wO/NZ;HfAw鰼6C*'Ě^>^w9)k,L6tLpR=+8e{Y0N0vq8> ,)D9Vq0t|‘Gئ=h|5J fi3龉.kZFr;Oyh295F }=\;ï*n'uTrt)lV%S;TBXQ*"օdϩjv 3z3!oZz{3^荦>q,s55 ,qM\WӨ!scݺ T?脱شxܠcE,OW]}$܀'zl1L^`֤h gtrؙ8i-Υ3 gNaga44!Nl 7lG5IɽHo^ϻ6 Xw, u_vύrm=ֵ\VUtBQUr j;swlU[x#Q2<3RO 'Iiɰh/4g4 `4k3{[g9%WB Mt`(ta"}괿-{M0[8 Z~cn4^pیQ p@vǞ<;-Lj:Q˕Ryeݦ ,Ό5э\󆀩5(ה/\2!۪q@pW^`D̨i:Vˢ\>?:A2BMSbcgxCsiҠWL[ }ed'cp)9' ia;U`м~0csГ} 1>dij%yf0,r|v֪bK8c{"&ZF#a:k9~ ?YQm11u mAIvZ8, b@_G:4րٮxnt\.A~8m>A3!|8HeqyN_g)uG8ΈdjD04Uf0\Y4KR{03boSXs<4r}p(/c8≁U.R 6,G{ KqXfS -+>P~rF.Aʒf"^0@s5Pӡ)BcIO{F/t*{;*w&xpN,5 sp֐Qe}9"\b"Hkc|.X* BuzqJl|S#$ZG/t,,ĩ[a=?sviD3{̀]`OoAW,ˮW&}{?:C%g"VW^{k0\JB@W)`.6JL7tjd8OyVN՗y%'J/n>{v zյ^O)TߺS̭jйcDEz3Uli646)p~+}tݔl- O05Jt&1a`I Kԋm}녙jnh2 M}NMc]!dtjfЫ3mc/*0 " n)Z &OxXf!%Y |:Zh,E/nKJhf75Nb X./`oÀ3O }/!G&>G*l"[Czy[rJ'w4ӏkwxTL<l~R/K&l|KWOno+Oj ׮~Tf<1`GN^M_,{/M=H=pq$1m]S8Cn5[iU/eHą}U]6j\L;E'<7W07Iz*@m˙A:CiB_ ӌ hJ%iIf6VsCq]O`g$%C;SwT|iF#<'=gf@*=_DAe}ޞ[π¡tRZcN0`qryȞ1.(05+2Ttmo>z ƕIdkq(\ϻկikHk`^r=蕼*(!%jwF H V6€!o5` $@%ht"rNhk6{{f:t׸Nׅ.2^<: _@΄4h}9B3+{Q @4f=S(-=Ƭm"lDrΒyE TD`v醍[8z>q X>>;/A%6tO/eѷE8Fڜ70lL sas'Ŭkm?UՅѬ!_kوw 4@R(_ߙ(9|ŪPYQ;H *].s`uTsCSʰܰ.B?y\Na^<~ %e D\'םۻJU4i zs,NL3\l@ڐŖGܴӳfV6Z [wXsq1O 0yB0105QD`3Tw/aB@+h͸Px1=X⌭W^ױ߱4<ֻ '"DL ҷ~zju>ɄzFw5J 09fzic@}\˹=Dyfjrx _+%8aAvwo~pi$O{Ua$zctR1\W)43om,rMhn: =1\$L\htbASN4GL+=\8 Ig|@<!|Fy,*gmĐC&2`,'P ݁k12xhz~g19%;1Λy-J4v*167Fnv:7/ABGE@o4-69FJ?&. ~ߪHWEbXm"Ft [ͳI2t.㋧lqs\H]J;Z`_'AWsȣAV``"0 ܅^85pm|:2aѫ1ޅ}گ^[Y|N_}<ԿʟTz0+8mi}pXc4iJZ4||F6cݮ#V mJpmqF'8[ޜ &D [/~8@tB1O2_qVH< =C i 3В7nfG-7z YW:h՗spW6qirD sY{tKYKz苏ۜ.v^pޞQ|x"zX=:`};O]#qi\v ~Ș2b{]=P%8`mY[gUp ގCf~o#\~m$)嫡5!k_ڥ Oy-M;[jKK+?V=g`@cHN f.mzO{9C>v;yT b`cq1uWCjaٮfB^ RL@m:56yQ*{0'y`ќ\9;tG~_!G݋Y7-<ȶ,M\r5gWO`>: PFTwh p@/Z f]zb! o|xYȀۉu CNkčN{9GFVh4JPg@1GI⊞lYzg+: ;E51pNϛmS"&[;oYBgOᨰDљvcj`if?:i4k.G:P>|2y`-ǀĆ'*M/pWxE8_oDňg7Yg*\+j֬[M_O˺j4E/jʲLm)9JʛCw HBh\.yyHQ1Hg.TkQ'SJ{И\?C^τ2Z bMt^"wGe ~tq)0 '!v-pbS篞OF$8r_vA 3!m[d꜈N+X7>n)U?!vLۨ; 7Cr| S+(oHI|7FvY:Ec5=ş;TԜga %4ZwVspw]+CthQYPH屜ƀ۾mOR{zxdH)R<^:'i k{ FbxFQ\P!e!Kˁ|خP=geU$̤r&G}|`z_MșPzTis( Vv/dh}iLrM7.]P7g?"f폫+F?[b=X[vtn>pkmnRؚ8c5g5~7ݱTx3~Xb}PlzlYm?lQSr]a9 I`豵Ojvvfȝww"Y(aZK9iB5_sSjN/0%W&2!VCC~ʑӡ_pXٺ Y#쌪28SؓZN=PrQ|JPs=WmEUۥ8PxrЇOby ׭1[Dh'f 2`6?VG}"' ,„}d5ΏG<@f"b;I:XwDCfqbI˓CIbp Y~s"^r,pm؅qD |)x8ow@Oގ-mXߨ>c+_{^~?-T##urсwx*翌7TxOdG>o¢^3q";7Gל#v LSmM#E*Yw|D#YnaMĺwP+]΍z7٦7<1/ltwzPxrK٘|(u%~w#}4cɔ<~ӤEr2>S8*t`鶌Pzu ]'H} [naǁ]MK=j&EǻOH55bu<Z ]R z2m{AImƋ`!VX& ͜v4q冃Mb d^85AKcyO'YT ͺOǘ͏ɛOSȨJoϬȗ'+_ÖNav;i~0N:x{C?5d@jn]w5D%8H_pC y2k61ֱ6Z=Gan=\t~@57]:w>F4̪50H jDK2>=]yE. ]gr_ gc; {ѽI-V٭]按DT䓾-՘)eL,64CM W 'Mh-hBjs`6n?t1,)+VɢgT4\KZ\k6 ?^ pfӺ펙vϹPؤUmf S<kKaֆ%t_;Z+ׯX֮ڄn|!*p'cr뉫ע<25kax |ηkzȬAžUdP/: ,{#_Y><2uŏDy{{r1~y^,*eR7;߼~%ؙ?(}7Z4ĞXjto<_scyHE> qu,׻A貏F jǖ}xɓל6zY}^Vjގ*۫CPrtҺYذ^Bcmf`|Cl)JIª2-`y kztz)TpDaV7棑u\̾:{ևM1G@S :u2.ke O<lZ傫>GreEe [Kyq[ p_R;;j[ 3ъZ[FW^ܜU:KnjnYDŽs6=eG6'{Ѐ3KKWE[>;w_SMUy'JK̳^283jf1'z\D+F@Ez&t/g59 :sG93sX{`/kM|][׾Q7SYeEDju4•r+D>{ #3b,1)-kݪڷeÐ\gUE\)\ZL m:H£Ji4!x iN+y8*7w_`$qā͡7p'N3Zo:7=6$> -8<#}4 W* 7&ek-qqߙ@2ۘWkEl ]Wiĭi@NdmptT,..`Iatvdk9!0[2z:Y_=B%SD԰ C| RDgգ> .Y9fnS677T`î? ,A`(X9 !9J2JQw#0tl zG[qU+)МAtQ6IȄ3 AB?T% NW=fA=E$&݄dR0-fCë=m39gcj؈=NFq4(,05djH2G^0^F{)? Q&عJ`ivMPK!! aj a3HFR ~]ƀ`0i h,oVK s)}y{(;y Xx;a~9`9L#k \_b 2䟵f~?:8a; voo;FzËU#eUfEIC l7[QIO ,{`*(س}f@6SA)x9tŻx[>t]87ҿxX7Wk\(Z>70@CvU^ 04()_1P^YWt{0\B-oalbtVu6mбAm6UELE{ ߞz 0R ][/#Gpqt}7=SęM yZ"M$Y^2K_5j/TR&W>)Й:)Uq2pr8F3اrNH߾ߝ*e[ 7MQ~bOM=MCz؇U0w{ξdEkY IsKv(\aQ "t֜W#1{/ [y\2IU}\sWܠ~eLQh-HB~Ҍ[ba(ճ{_|3=𛎌-i50ladd&w]!iQ7/G_t%kFVæYȫ':ZiqӅth!_f Rjǰإ&?O0U ȣnec/jdXf3a8G :@1k{V:c&Rsu֥Y@_\[r Dm^dIh#iטcc "վ\= Pxnv:R}ѕvUYrv\c\l49m'Ol˺ `zN@`peu!TXrts'J:gnpUj1@G ӔO2&\ ,Γ ` avx #FQMܾ2 mJ"jûn8FyLjSO vϣ6tyfzHʢ97Þ! EQ]&}G ڶ` q# Oʝ+g\wB%壁Uc0L!< HڨL:f4ak pujZ{uusF9MT8m*M rR-CpDIwle1|x $<Ͼrf/soOH@W\뎧ggkȧmv=4T 6Ɩ0CEQܒeAq3sE}3g}6ӓ2 ^cWTL\ raj-s圵Ԏ侢cs>^\VowV&u*p\ i5iP 'fȉX JO zkꐑkfD>oM9wͦRg#]<$x6+;E֝Z97`o{=ΗI뉭,=p"GJpY^Ku[wDgur$r*s5roDl5}%4Fd/[y 4A}+Lx9MjxȰD 3@_;uՙNޅ# X*'o),|__!vPOv+gb3iuBjN'e6C͞GKk+j6ngo\/*,?p5rЙvILsXiä=k7.-nXG/|y??Yps9Cx/L U<-U0wyMc_fcK-`|]_'O=Qo3w[Rf:8:x<JYH0Z`-- I֒ ԷN?0@}waF%`+Q&t5V."PN6.5 4~>mzxkw• v7i5I-u'_7VYеC İ망Km!N6:Ң8@ģ@o5ݼꞺ>-GD<;f@6mcgn9O>ً]oP]CL<"S76_Kh^ S pLqjֆ(|v00s#(] KH'zp{!2?OEpD(g$BŜ9 >7camq,|:i[vK+Az\/jV ÀwL`'V,KY03缲+mՀ-=yPJ`;HzCTݷw@7szn ϝnW-N܍{asZP)U!NNy}uHfwd1g&mAk5(XF!E+LtU9Z$a͊_[ ')W0~ŶYw9ʃ28)ц ^77j@Dn_C'4یݓ\ |ϽWYX=~Fu h:e4`$s :$@QDafO19=_ j0pꕡc3РSNf56x1}lN[v4Dh<=A,'Or]Q7Vh~O9PClhy Tsy7_WZU(XdV˕eI rE끣HD<=FNaۆjڪJ4o؏.qWr] .E{=w /gY\V\<C ˒ϔiRe(Ӏj74ĎV!XvX6&d 0DaP(i [w0%'=z<ʝ-R-?$ҞUĦs"ue6c m*O݌|] ×!;(g? j0Mщ }iܐJw|vƊ.c.&1U/ F^k\lz,tPj$`d ẖ*%6wa-5o݊fY:PP" Nr=ʔόr6\^ĽFp;c8< `څouyTVJIz0R?6~YD! U@+ ƽiF#7wL~T3.+V [I3ScPlf/ٱ2.@>-?<鵀|l>8=4U;nėƇlҺfD3SSP6uŎK{E/-D<-^ $VNrW_[SS{o I؏dUjs)z?U7!zqX7lḓ1XyT.}=у <2zH%a{~/Q[,^zG8~?מa*%[AE+yً6 sQ¢?_&*e;N'i)7*ޭM*~/2o_ +ye;@b> A뮁e,AdK2?|SD`?iYvW{oa7ӯܺHEr,i'"ݪB" 6䉱pQ W /²ulT.giZ@5>V/,b{ 覐fB #@ b˾KcA>O%6[~` h8s+_.}3cn[UBta`խ^;Rg_sf^Vqp׽ᄍO C@.rP$n݊c.|)=.FA<0׮e9 [nw;K [CUTſ^"RŅWi?A`i߫LOwYⲠx*w!牊v6ivg'tʹ9?>*gDVfZE^_}5?ė -LaIWӆkʪs}f'C F=PS#Ylz4I-S7~5e"^$U3*ޱKĨ~l;NmGuձr;[ _XB,Z{g؀4Euz c@%0wff s_I ͅhʝ¡OW~g1` z=%|t?DåΑt\R ׀]ay@Ҽ{0xϨ#?_cS5eh_g)؄vl'F!fBUHC>˴c4z q<*4X P:bMxA 1r"7FT(6z.j9B;5iͪ_ j%%wAcRHBXRqh}Of/,67n3W腉c[ iG9}NU:>02dtDZ\l&NC*,\%gX+畕 .:޵x;ԆID&UQȩhsv R}f+TηQV(VuҴaFF#&C;:|8^)ՅpmmԅY3#p:?p[6:>W7u$+Y 2p >q'cv&&[ sUTkx*]f*'bO--pr-GZ ʁLpx!VH8➸|VB8D|efZOG.5L6ڵJ{ 33"߭ C%꽘'ZemS3J#:L_;y6rFM~>mC~ v{㉯~DC iSl% :>Wg:*f-PH pڔ_"EL%wSB{uPmGyEjEEx@HWW2mOJg jNYR"}77+&K5o+wm۶pZz/{,}~Uu]2T(fb#'I#M?zի9tq>nӷq%}%cow6~d?zn倯Z13cZO|uٟːtm9?3x< ^ʞ!:FU.%Nҳ Ɣ/Yl- Ba]3|xGUw(\3(ap [Q^Rh^NYoDYٱOxtA` HNL7^ tIdөo7__c0TݻF$9X+k!Ÿ%ZOML[7s!z#mf=FľG=< w% LXw;jMLaFw~I~dbJgI%}Ea nMXZڳXqzJv>a*0X״QUu{M05qvzŜPttƜyۦ670Qn ۢL}K*ȏ L((tcK^]BPc ZL{n%YlxO!`42bC!3`Pdō7Trk[bl^L!"ׯ-da= t3 B\>Uá~x(>WA4Aj*UOb?l#^-.,\VH쌝\'tN2ZҧQsh ;qaqeKݰ&N6=cĵΎ IgX$oJ-Wzs/{qF z_z 0((>K_ows9ltƽu|SYswCOm=P8S/^ SRITp{!u030X@[tg"ua1%tyD%՜ّJp;XP0̶}6nvf{}tQi٧,v)1jt(!3}bpt (~:Jkc,B$P= ]AKMHë*ߏj65 ~Dbs(9Qo{z1?nBaO!j!]i 'gmz^{F.wu)RF:p@?8;:c!D)/޳!bm m 9Wf(jq=RK@[Xv"-䜲uԞL_%!sSǏbԼ.R(Շݭ60Ú1R:8 :}]5Px94,NTAAܭS9!2A 0_B7R |;GpG1?- 8-WnoWm?fN( FQɫ 4ek5uY2 Kf!K%Ar)^+ 8X3Sn_z+|B=iL\<9ه&hjYʉ`:.U? ԃ}c0v9")sJ}h 0N1S~Z޶':+9i9qckGaܳpN#)'RGVq[rG@%3Ab7ֆ8oCce้Oݪ?od`{ug5q5[\@N3!f{D[x0ĥY{I])aXPOڵGZ9˚WD_r9 >@DG~}*I1~CA7l6ٷvB>?0UN#qni4q1[,^ձ6R@ -E׮hC]HFimw Yљrp=?C7grkE7hG0]\W]@']:+ jfDvU3Îx0_;!:ǥd<+`4a/:9@Ych/_?>*n?^F$r;gd g|ЇzٚJ ]KeFob +lj|:KYYvuv>FB^BQ5}FĘW w%@ G̾<;КgO֕>jq!5B#.~5ٯoXA<Kތda%# N(u-'D .,2.0(W ]E|Fu3]8ȄŖ X67@G*tƦ|,Kف9BPva];RmKh+1-8>9VN_&l:IlGo|x̷ -K |~ЪL6Y%YjaX^'iRf\ @6jϟ38 pbN|n~BGí.!Uu0[xE>j*pB7`QjQYf_4mA|E/ ;pcZO&[]l8H*zZE9a%*̢oR<^w:xm.YH%\#/5BA2 ~ԊvIҟ7HGjAeq3 ]*arSw9C eS1XSuF||)Q)J*@j$s]͕⥧F`݉Skk|8 uV6[a56$3q\.cV@Џ D,UwL{nOS/8#-c3uahoS`TJ{;!J3? FN=(u=kꢁM06S%cv蛫y1kݝ7 o|Gq=Fߖ;=QA@U|c?wJ; Z؎:sC@`kwּMJC}Hm?训c)k` Y;AlѦۖ %LrulF׶h9 ou}Χ]]mwW5YfJ]";>y3ksn>==gjRjJ{?iW8*{6.\q%͖jքXŠ'R tKeO9%ڕy#DRE `Uenµ}qsZq+n?r(֭黢w~WylLo drA%P/nuuWH9<o&L׎Mj–[U:7oxܪJ$>96O,=Bgn47ńJO>OZ< / pn4,ˬ0t#M>SLۥF`[D1AfD\0&;,ZR`WoLd'>3DD>]gF i㓎ADp6Rx7n㝍Vm?w@g0BD~ =홨xha@4=l_&X^O)c.}& ;C3P̀PM4=Aq}-a˽sV }(Ʃ g0"36Ch7PklO|ydpA;|/̙)gb١|P;i}.5QqILrI?|( zjbTۗ:Gשen!ZU_T SMw|j}>†?E7sat|eЃs/ф`V72hq]R+q=O%r1aMxЁ: !PTX@s^A-w$ iJJ$>UY0P{B6$Xc@I((!vUG"{~CW,0P>FV]c\W&GO-ԝ7.,r&0 8l On/㍾X3؞Tw 0x[W&9R;"}J}ΊOR?x du潫WsTo?UYf&;—cvV-؀XRkSJ2%%%9GЭoh>G!sdV<r~z!K{5~[!lJV^0(Z\Koxbje\8--ZjwGӲk`z!֍|"n"w#{s9yO.qӹSkSD$H#Mqi1z}ז"w:HIg TL1^vƾ21Q-3$fzXƒykH&FģSX&pdɲщS%]oyr}!e>7HKH9E\lHj;"tz(&aZ& }1ōY!YS//08ΒՋ'2CRrNt*W.˶4iCD8WnZKڛWOȚcq?o~kt[ ** ˆa" 1 8PvihNJMV.}NPj4~ןrN^;#: Y_ՅB1s3wF^{ϦmާZX!GSM{uF)R,4-S B܋&Dح_3壹qw\d= ss7o$"@#v/ۄXP[mODwov ,wݚhfbApt/ݻ\|\ovrnQu9H9df`xclRЬ,a4ZcX# HNc2Trg0ZA'$Hqv,ڧY4q6qӣ9?Lj+ʗ^ +f>=Ai:<чZEn:եN]8ܘ|9c.MDlǘgkYت IG)2=~r~<7$fj$@+guؗ"D4z ܿTe&y [lbj쓑E9f>q8kdD[X#/o1;5&2|< zksDtcԮlo9 4)&f(>`q1P4/2_@wtD? uNw؋y{5ාR/PFaK,-Rә.uӄz&^ F`CquMU%؀pږV= w쌣l7(+YvDN$2fC=WŦB型;Keҭb,0b'MGy;f+p5? X |f{>7㟐9,NO|yfl1.9\ɘr|d e{X׆j Ckm]dNOw69K^O=QV_89 &'DئOMrM[ OԘ(oPˢd"kQ 2gZjV/b (} q],uԅEI^}h;(5p\-`a@VۭT[y[PG`(&?<W]8v  SR O.}VjCáw<}¹ivEqLՎQP_?e叏GX^Mcz;\Bw( Ra0ַ})!Lj*F 5}BұG941dEhSM[G?WCla$ Ya:蹭J{w;_񌕺bexXK5M]2To8LQC R cSR#WFש \~DHX֔nt٘a 48$JJJA䌍.ڥ㸤m'3ª2qN)}^ p_~̓9ށA2&q.r7#;]߈&\P:NZ<%m@hB n^+|,ǠQ`Cc`0I&}_ԑ:0܂ޡ5œPQ3! VbJ=2uf;ഺ\/҇DEciHg~1ΊK)H0g$nᕵӞ*M/"QDw֍f&@gAOcaW=zc{ۮZΙ~P xب|v"W\}I%uwWt 2@d{tjoQbl;t^TڍVT:Ŏ(%6Lq [,8GovF(]sP]ے{<ᕁq Ƙ;sk`aDeuma򸩢Mڃ_;-т֤J 6 w&7cg]:uts'jHBanQk"/٩gûīS"~XvI^!($ zL.qܓ>\ 2V~dspu 5Ӫvu pLΎLNK+L\H8q۞wY[n7QN*Lq2X]kvS 9 {B9=Q>{AÐ9:uϪZG($&ޗG]H{{bTkwK?N"k􅅅7~ܾznLc>x]du >Kt3+z&?/&ifg$TLqG.B3G(6`^v1׸|bbN)ly:aM)rQá EVf=ޒzUlJ˯2H%g`732M4|f6D}#nnmO9$9 q^ Ig#r~c6RtȪq'yb,V>%(ǰQiۉ@Φrk?sly3<Ƈ(zx7DAPz@`b=ՠ 8S^a.]ڮ#[dFzKnl4u?p-g{ 8}w;>O3qus4CBE`'n`xYYh_`R'\,ĮqaA l3eM9Kާiߗs%:᷉vіiD7fpp՞ ˍ8\{ N HUNrF ) Q\*3dw@޶j(sgSؖwxqP7Ln1 鴼sVkWGɣW@Ԓ9 p> <3̦osLMIOAl7F㵖6|Qgȣyuk$ Y<Ab'UJ$'d'`d xȇSM=C]93@0@ ٺqjp/tԂ} SUV6X?*-f7r=6|̚2S[}9L(5#MCCSWce#Þ@mttEfv| @G[ag0WZu?UtOF,nehRrS}rG:!9œ+kK( |#ޚX ,AN\J~ɟ8 O?IB`Gkq=۸il~R ?_Z>Ôv.ϰV|kKNu}smH^>@l(5@ eȕT&f&up53XI%Ch7Ug!Oݚc:ӹc 6>본`Lz$JcL;7s͹#+Fo͖N 5LQDonFKeH3}3G,h|?W+;ɥ>-I6>.k׵8Wg^~+gZPrb3K nogVrD58菿|#whZ$_W?YQ~7gW5OTKol^ٰlMS$k}t")d8rr&ǮnH"Ӟ^euQ.!m7UA)MA譍 nl_\;@m94tr /jp kxqyRݙM[vk~(q|b,4h t@1֒0 _3`O'a{Ę5p_G)f0Nb ٽƋʠOC#m'%cꧨfc~a|xj}xQ~ycCd2 u! NBizN֮8./wf-gE1^p~ bg]|}ӾNx.L[=```x9)'ֻ}pGF̀GEG"^sj+;HӅo_/0KndIk  )]<\=*;RϬй?3jw۽} 4ȨBݻyԲ=F-Ө vv\=iyk.Xt`l,K5+Z:Ψq$[TIW]Lj[-'wCWީXvePv_MZ6†aZSn%QrKހc)YuNu"bf쵘*5{ĥA0=𒡝A)ڞ4 IN{/GOZ61(I͠{ va{N>/xE;݈!p-CEv霄ojƿDQ0yCLyr?i$E]\2|v7ǗÅRM^n8i%7INpcZcFx_F. pakHCNl(+p<\OOA6kau3}%;N:M#v"LJaG򤘳,osdр06bn9 $1ݰ9[c?# e5tf XWEʫw90 ^iu}./r0 `|ډge5'5Ȇq#rMƕ =VE8)6;&荎Mu3ۻMe2yiG~ț_TO̳&GޘXcb)+A=r;a}Xr\ĴISDIMl[ѽS Ɖpٌ iCAm JnԐ:Pzb8;>|(|A 2ѳ?j=?.~mTOܣsgzAyzI_}v;R,8ӨF=ȉ=jRkkw~w}jIUϧ?~㩧soXp>y%ye imPx_~=|ӗ|7_Bs/L)BJwDaEJ:96-{vū(gaX)[Ag>{2'pltK=s񩼷ΘS(,Y|dAT63QQW}}a}9>l^aШw<.FL-d+*MΣXi*sFk*SPi@8Jwp7Twa܇=GGege kLY_쿌 s\svʦ}UtljނH6bC)”-|l>/zf[č12Oz/'(htB/ lڃ8RU)ooF ;&C%oP 3`ڗ/VܱS#'̰0X2dS66Հqt봃 zZ'r |M(JRn =xSϘ\w\"g",*jhʚBsme pckW "4Evo'笧}JDsrrV}$>? \W wgJrM`⋲!z'ՔnF`,stP%tPME??zニXd  L=oXh?_7kHL|_v\LLh,p 1'Zv[=T?iz߱cLZ ̬iWD)95­N>uE˧s*>zu{`h]H|TS"{j&xuJ?#>wXC8tqۈ^t7 6:PNVC R?{Qv[g\W~N20܃,$^Li{5jk GEڼ01oF]v>"{;H| lXA4O C׶ͻ̃J% )q/re'I5>fF.͗SAF쮩Pp!>tU0UǤ1~͎kKBB8 ?3a:;vC8!gzeQ-wEuȮMXM&~ h:RHGq'G3a3oN Nh/(O.xV d!€[0\0٪v4HS.wԌޭSO˛Rn65̱טrOlbQ]yָ.Z~Y> oX*0nWg L +~lufHNfSqԶDuZơ@vh4Ԭ٣ *I:05t3 _̪g!![4]e$ﰏV[Ne['eCrC wK)6q3' 0 k[DdMЛF/e뵪xlgk fi?P/z6> MJ4 eMgqLCvJ"eq JNL(qIr<8T&nE$F{jҟ"1;3m6<-?>R4fϛXҢhAչ?eZa[M)ǿQ {:2 8VimF:Rf(;թ ؝7Ӛhtxд׹~3SfCj÷WCFqFtu|q ( (p?9O00bԄBbÚ=&neN`X✽'6XpJf0*6]B&(Fzb6hFd)F^ ȧSՕj/IP͵h[qa0dS!H=E5QŮTH;Ws*˫ґ]rM N}]*p{%љK_$F^Yz9` YE#f@4Չ'^*gͬG -RSYdp4$2‰=ԺV -XX/ARvQz{NP }Gr`[~= eX`ѣ>-6ʪPVsN%ZpCz T6v&&Sx`Q 9Цc0Niĵ \9l_p+Rj{+;(Ԣ~Ф8C+-=3* i'FpOzg=zU tq6sF]>\LP_wzÌM:M%2:Ɇ=_q3Om5VY,S-"lmeGSr~> D$ m?tuG1<*ہ&daA}ډս66SOG݁g0v3+[oč,=rIqW%7p{MUwS&<כ|. Fg'U5ߜy< rӸbXTs ĦyMfpgv P# skF֯aoMuuw|99-Cf(RNJ Nmܼ$߿U8_ {}bO};x!1t85$ vhpߞV?+FV}W~J׶~rxqs@-rpxuz`$̎Ћ<ԟoWm.w3}>G;w[G!  O2$}Zmgx˨'pGrrE<+ɐm{ NHeKVsqtnsSA=c 1xA8rH >+ޣiGg%Wmo;cBf'I76mۻޣ~6xưgnԏh@CG2BO}6B2ݹ2~:蓘=`E7^$Ư)gOԊ{uZn'Xi+i7#EWL\Fvk./>'NW6 ̓|\@>sk7GF=Loˣ'˾푝g\C%A* ٟ{]بr4ax_.ћ֟uI,Y h~}O3T&4)H^ۈ6- =HY"2]q6j3~X+}gH>} }X8>2-sxup 튙; 5(\*){zʸNjwk%2FTrbϽ z<tY28ŒrN+ɠWAJֹw2Q2sd-3! , ͦ)ֺiȪ=;BV-Y@r K vcv1km4@m$|իϚc:_u,@f~Ih5HW='(>wvnAq'5+ħl޶oD.|efj ؘx|DOQJEk7t,~QMw?#@JSHZ2Y61 `u0GW9C8(kL89W=/zvd|gUTh^Jo`jb ,.Aka,58k.aMî1]HE2^+o(#R %1 B"'CaD1SGb7vD;g*@u߅,r"kيP=uE7`@C0aݯ"6d7_0?H2M^cr U0t|gb ©β}6RhYh9ʐZ n:psԱ,V mω|\*ȻxqRk5{>6XR.HE06dsý}eu? dbiQkpuI(#:fLݽEgHt> RR'Z ) %Tq3!xf>979.ueAPEz Z J(n xzēS]ߜg;Hn1)M:&S1QjhɗZz8}p,=AA2zxmuKUy@--/CE%JKR g@53ªF_?>GG[ 7?|;>m3z?(gգU.nX2D?<T#Q0> ,yW'vlzӟWUGg'f)e~6~'؞PV?eoɇxI㳯s!hk/}x'U _/@h;_)#o{?aov/? pV9":[AK3C*VePyb"LYP%:oDŽmuW7Ԣ^Wi7ZGa7}#ij4(|uu2Ծ+UGXZOH\g&]E;bW(&Mʠ6h' Ll`ձ~dq[x_l\ZuF',蘨Y"abp ek'$;r@\vaZ;Ҡ uxifӖ]Å%xԌc/͔N{ЏQ%_!0ȳƒ'$N*A̱>+[4 kĸ|JT =:h~Q|<^-G!Sx֯˚v:~?Ƈaf˧b7$a9vZU ,[X&g|l"ƙ[Cj,Ƭk]K rzÉ(ƺ;n8'|h|jYZ I>0!p #:\TS.$)^: N4vamMe5 U%5'g p * xK'MQL'r"+`|aϷ/y*Mv ^=uvHO১\O&1$I>.@V6F3uvh um5 S6]-rF^+vꨘG6#Qأꁜp `wo qn k 9zqю3+)eu0h\蝠R?Xu%޸wŢVk !ONĔXH1k9#EYi!u31X# >K>Smt})j;An({L!1 %n9W0=@RzYR/#"O)'3 %Nߑ;[sQq_Vt&V̽N1MtS_< ~@2yk}2ꏿH޽_7U:z+I'mE{e +?~zv1lKgUșUQY6[#S>}ɒ~pN]6 VJ;kmp7JƧ_?[@*9reg0L#FOKr q}{rWlqGԇNɍ "{7ZVqkՇ$b4xL+kcU&g }0S{~Ę#]ϬusuO֓˱FYʢܫS=U3枂ٻ'Yt՗/?Oթl~&~=ȱ N?_I+KKh| NáF(MnS`ib;R5fh;Rd?8`&Xx~tm^v{#`U5H GϠXy=b2ȕS[~7ѽ"qFE Qߜy7voR-,:jFGa|FO5 {nu?X:47<"-㗏m{牥9gXx\1HիjkέSlFw tr"p"ϟsSZ/v79GD-w C>(;Uc9HL%AO\zz^?+ [DzϪ񏸿# }Tc~,Tc' FЗg ۑ=:ȭӡԀZ_Z7^9g5ZqkBwH+Bɮ^ (feJ!}DzRD4#`5jCa1iLMGPcNkÃ-B;tݷ`4Yafct 8To~H:WAVx$ש]3>N݁+2̶@^,3DqH6^pvvLv#VƎ^k9Nk P0|?J`TA4ԩW9{L6.ӲB |q4۩ѥGnwk)&yTߤj}&CK$+5ȴq<5nb(7[ipj #WJ׆sz&XH{MU uuNl4v4YWjCn-n@* YMPgOTS3B" "oU=&5,Vd,!xBVNvda9dJp9Сeu.>1>nr2nx@4䅕#3YW]:Fסr~*H-wUy娀䬧 or@x8E˺Fv27#GabW ~+gq 9'*=k"nB&rO_\9+JJM*y06Jm`^fP-6 6"xY8M~I)V~:x{Cemjt'vjcg8Ռwj1/2{n{Gs/3v>bR&{CNgت|O,^4= Aw>lzK~,wP_.wEcKAUzoQHSqC9m'DF/u,C?yq듥9qةBA&*UѼb^?l`^2O9yc:f+ ~fhMb$h9j9$fe:eCKW:61[e_k@}8bxE a ,XZ OyC +uCUO(Zb@j6⧘)==pf`{'Eq~7U$iD'ﳗWx^w0]w-4ՙݷ;H/8*}sCvvG'u~x uJ)CV쓎ͽ_6UqlRmgi ǵ`#T6F _Dv&l E ~5d7sAÛo;G'R|uZMY)-1h)9nPV?BV{5 W{wʒfFl=XYd`f>(}sgp~0 /\v^ڹ-ףˁK;טte.*4Ce)X|i6۬yN 5H/+2kk$wb,K.F=DtR͉gj ׅ? _j.*;sJ lJ\aݚ& n*zbRlN/CR$?RMO/' ^p2h"ԣ[1gf72q1g5|9V݊ +Ogw9h2;*.!}Ye\~Yf'YkwaQ.dF!CϴT1^PFUF 6?SRS3 3FSuj,qI< ޵YLC( OծmL`n,vU[^Oo8>l a*癉7zY|PUӑ{ n嵺c9Z8h uF$pHq;GL ;CRXgv1 RJ%(['< 6!Yy5VC~`1ÖV{FQ~l3Gz6IzhtAbGG"%GC%=&ΠmN87Kʘ&FhճY9WuHI/mɾuMulH?w'+ԦSGc|i NuӮV >JӼKy1$g \zǖi܍84a}/6˥vr:>4V:u`AL;ݲ\Q80I=_4Bs% yhyN@V̸uW؈~ 6MDE$IAKlb NU_+n0}XL|\ -X{JD /H8yƈR,cꔠ/fQK1 !ƒוГHmW! uiN:iLPJYPOL:n]`=5LRjrJBg>["F9*K x^lPO U;i$~oE`nĪ\b4oYjI:lJnSPXʴiE0"'#,Mp[ʹ}`4 genH1\ZEN3W9/?{k6MpxȬ67tνꖆ{i;K6uH+{Eax*7T 䞯LAsvD)[G}E xQJ!62Qh\)Ysl Y)8F/ͱ``^7Rr|wSH/|14 ݭca;efYPEKCżmG;stۿC#$YC5D- _/O񍟚?z~m%y㞵 v<(מ:5DVե>h$m%@9L%YH( iwaxG_okt+>Lz鐳PvdtFL~cX6z8-tDǺR|xP[Ï_)_}-L{d|џ` ^u2!$OyOd/繗^XgmGTU6[Smo}旬j 'ٛ;K}V(s$orE1]`XnȚx b;Y`#c4A7ZmS{yUr"[m"acd鹁}xn$fFr,gUYGc`+j߰|=qeSG=c$`tsyO&U?zb?$"붙XJiZ7w-qϥ8uZt?ϛfO[28k b}y*a-ivo.7w۠؜͊X֋Gn5[C1FkʞrVeGgNUGѳEϺV K!WIeNU,DP*3|.JRѥZ۾S.Be IaXn @'@8ƀ)$&JhÆd`J"ѓ>.LOaC(MAV94bs8AguJ^hL sy(FWCXCR/yn/E<f2^ؐaH)q^n䜨7Fv ISSKw/+Vs dFTĎV9Wbh<6)EKyT:& g ksf&k^JWlm9YTF ΄Z5?RZyͳMpfUMT ѝJЗ˜L[UӸ0'Ψn;EG]4{Lg[T|4uy$a*]D|%K_=G0ؗl"$S6MYփ-YMEOp"ɶιa 6Y<[v} -YZb`cL=Fș^jQDO6S-0fTS PA|N]oe2?C>)OJFrԆۮ?YM;FS77Bա?/-AUl&~܎֚sIΑ{QbD;׼]Ksj1aG( ;5*- tX@q('ʉ䇷ɞcdN5vJBfR䬏>~:35ⲯ&YoE6*¸h6m450Ocړ#CeڌgLz20?[6~K3WɳQY=5D>lN |6_5F:冸bVK3;Z$;o ݮj3/wO]òJ3+E Sۥ'/P"a=m]A3-~Y\W(k2==iWW/}fN#Gש /~)ڳ>IY?7 OQ%Lbv^`p;5Զ+ !+ CGYgnsSRpO\Xv?t(z%E€ԌHo =/[P5”%ΰq}8qZ/Ϩ>.ku= 6]@f }g!D5"p>gv/ c֒pcB^( fW] jyNq_#DVǀP Y7$+Gz{Ih 8e.WS{Kw8,{g3ݲn6CNU֦$Nacu&D\W-lmͅr }=h5<XbƺAsKrUB޹sc8 {΍Wn@?>{tĹ\YV"s,q!p'>P!{<ȿ]Y!eB.KOZj@z{r~4W// O.߷l|uN~AROc_xƈ?տ{_zX^V-,X _v(<~f?e &tƻϦȳO&'z3Uq M+5۵B;5t8K21wS9?%~06cr,bӕd' ##gRw3WNf}Gd᪪S3Rii7N;8]ӐjwąnvڮmۖaGaXeEk?y|hIȲliҗ|}]'֢pi5{}цahmM nK_s|˓dWrgl?PmoDTmUTJz:]:q=r8uVzfS5Gʌlo??*gx$ŷW-ڇ[Yv/uGdpg)K+VZ_hq 5ޑYJ_w|w-/'z-;ࢗmǭ]-Fޓ?aѫ2!_;-U}7(Q#pwauub6|+ݐ9_շޯY!o7>-yo"7oo|˧f?ɡ^8w09? 8HW?i ?@<S4NS0MK{^=co [`:4(ߑҋ\^s9n&hٔ u*S.47:+WDjۯܓsw'~Nֽ g@z)Rp<׳k35UЇ Eb٘f# OTUu1"1s[Wf6T %}GBjup) rmgMg&qAncW[cR]!o0$qudgg2m^݃o^HX![N$&5O}q뙣i]t#I:AdLdA'N~M e]8-2k`̼9 $mQV˹A NoI}D@X 'pm)`+es$;.s^EC憫:*&XG'?y|t043bhmuyxZ@:_e\kf@ɦ6fsW?OŹ-9b~X76]Dڅ{DֺQ.N?Q.3$iԩKcaI[Pц$pumi!یL?v7MYÞtN$cǼ쵔XWV)!F*"L݄@U MB1:ʡ[6Ψ &9r(-fR}đ>hIL#XCaAf9-IȨ^ Dr{ o]Wi: 6HEzxN<`^ zk .68(p<`'A%+`9Z톓;Ҍ ( n|Njn>-rPt7Z`0̍j?[T#*$)~FjkT#VX41M1:Rh?czէ;fi)n"V9p] ۓb!s[ B0V;'`[Y)Bxwx=_#BnR,Cѱ3(Qo{uK4԰aKW6}mA [&04vs\+ Y |#GEVAE(w 4hw=s8JLҙX5s_:tt䝊Z3^(1X AASw47/wrc 7Rz߅O/dLv gD4A8x3D;Uń5<}Voϑļ>^.ufAlX7__6jY ;S /m #FF{zOorg}QO_K,}{ocwn퍌Ïw֍K+g|T:AgKΟQܭe`h>U?@2GH4!ן3##D9>I1]@9ŠgcW4lk!..j۱ٟK2pҲ׷h`AGfT-MμϺ"0A@ޝ?HQ>hm[xuÄI;G\?rj뤁f?XNc NvNKW/ޚ~c>. Q;liiHwrKs4kO(Zvl96.[A{S\ljjg6^-OZ"Y^$׎4 W ]0U̻t(\ ZU,F {͔cgN]9z;17fյnP Z]^)^>ϔ0׮W7s;}6퓎Z_b6veٵܕk}&]C 9y̯ԪF,KyLhb`=ȩ o(4:&S~w^e8X*DB_eU3^߆>_n}.֓ŖAM, W6ϢW [fj%Lm@!͢%^=GvcwT8 vip ҇}M{^~<vlop,Ջ_wO/X8P&Ïؾe[tph}摵r];|pS_!@%h<5zuѴ#+wOY[gթpl"yno~*m>6x/>xY0RS0=U&\gN3G"l?7ߓ^@$ѿ'ռ@Goÿ~Ϲzqysvp2g /س}2ͳ3ft;whWO1@u6<[!Dͧh{o)\b_SmpzFwl &r:bWEgF~O?K]A\"%X#f@YNIb! BPaΤ!(¤ 0)apOѼ%76Y5kDN4H NvSMaJ6H`U$yC^F$j@"om&Лtt5ɒhF g12Xpj ekXhe陔O3l@h9$)JH=B#gV'tgrc|>?*Q(1C_85o8{4:;b `VYazZtfHtBj8m-ͥ9؂AQ cqڗyAY2opȹ?5Iا'j\M'Э(Wѧ0WLE$#BuhS62!Λ9ZWYЕ蒢Qǖ3=jVOuzw‚3ӕRIDAT1I6Eأ``d3K:nӿ Iz~:1h\iԾ̷v3#96C-$zs^V7?؀Ӽ4sN*}nWD r.A~χFP'^uͥ?^x$m!Y6{QM4-ݔ^=X=Vpx_Wbpb8<S%lSpc+$hqftX`4'u `Y ad]Z,))XN=%16/Sax 2"gGw@sT'+kxwNO6^ ޟV~3R|0?=?n nCoИW >#x~s% _4)5Y9]n_+-oy'wu_zFLJ/m_Py1 yK=dڼE/nm;.4gu0LZ9]9{.G"ӓw`h9swβbqUXuydfD_a_~g~Oڙ'BIQ~">7n*nLU^ؗ-vhݶ;LfPP>!턀C <JAr=vfqCX?)@6ǯr>^zc11(.+ސ~iK$\pZnΗj??ʁJ$6K.)饀]'Ys%lӲOGonWcAX·v?+n̎P885yg*vӧP!}cU*ָ7 ndWqoggM!yb1jQĜIU1HzcaH :\ efhYh;͗T_ZʻEg}yЄIM+k; V"|6-^yZ"A3RA]c}G9,/())&vg"ѿWzo^ߌ) rEu.Ϭ&Y[RҼ;%Ҧ p]Ds%*Vm5#rF=|W\ʷ*G#4rS_1r6tlf;ܢ{NtAiE ӪIxR^v˭c7<UG*rGF7mdE S175D府g_]ƨ\IqX^s5;p>tNMw$Yf!󐥧v[/mFqRIV?z5R0׾pߟtcvj3/"['?s_?g.:ދ>d޹/;`{>i*ŕr 82 Ɨa&65{13o&S6z)?U)vl8& Փye}7ӭ'(6cs5Җ>j6?ܔomf\..] D+F )`J;Q @Y): vRYBv4:. 28ް] >? FN-+NȀlrTm`7=[`} ki-,t5o>+ 1l?1(*#G'[*3ˠSԡ<մU(<3?^42(ڛK& Z{:l b)c[ >|bw SRK ە! _,/񊭮5q$)n-$/W!MDŐ;L/yux璳ғaܝpRO<M)$žH5mErd|fg È4a4aDҵkh S2KIv+CpG|!~N<ޑeN?Xm%!ӼL_^)%^;/rndPddB2G̗lb͇[Ѐ$#lT%R ͥfnD$&}"0N\5q} ;,J& "\K;Ɲo2-1tm翶Jr᝷MK}28gĞ7ӽw),Po`kJvaEcZ@~7u[gʗЅ ̦01@+*aϨdb\WmP vLi,lխKm6؎hUIk7eHU,}ߢ}l0Ɔ,Wu,Ffpof8wGv !}n:iX;Zƚ fKP'ߎg_M6,ÝkD.9&%.UZ2P^%d ځ@ç5:fF?y_/+>>l?=B^17tE}ilB,5TċK%`O>Dh#I @ 5r<-5OJ&'gfVW)וȇ̀T<;qAY9#mB/^gv5pe:5!O7#]n)Vc6-vǻ;$9@P5tgTq/jLk9n' r[uzA Xٮ3veIC}dڄxk 92zz@Nj57]gH;OoP1&H HOĈqᔬzIiWuW$h*X)>gjX/ %&A'RcA ;ݺ쉼VNGX_ٵQ7#A˹_x]\Yst&&Ck2D_5:T 8rwg<;%! n~6)nѭְ:>Z _]e3#h¬ehKy6TC4)umXu!<1h[}jwRűc8.%+^*e)CRĚbL.Qi%J:z΀lTè_l *TIE^avi`0e^̖5i0CU8/!mӠ-`@Uo,'>x'ξ8.xtɓH'wT5 mt2/|r1)9|p_[wQ73[oEB[Kâ nh̿Ã"vAGo/ 9?zI87kޞ~祈+zMNM`wu)t9qm} ~Ž7R^]tESpH'^s :|P˓} ?=7V 1wJ {RMρ_'2MWg󧮇Ԋ<~ʨe{|7ޞ+ ')$х POOF&V†j֚o{Jls)a cc g=n$*nfZ1.;r???*'3OgwDQBTdك u٤raN﹠e ,*Cr2@5VЃ,TkA>?7@'?x~arZ ۺv͛Vu-UDI5N pL"r04>;t~cFZM%k94lc),(}j\Ne}RfE(C `0J<ꮝ\ kQpj_ˊTHI"ܵ3%B2}-\Z*ZISӫΏ7xɲiwQ (fsf4']y ot:u0,ʪE`|PZJ.wMy:X.QZ1;dyD303jU k_Fh>/b.QO氇/H|$tzU:\Lڎe2cdE~qDzW=Fhm?Nt6>vϤEAш7r"f cj-tlA/lG+tZ:J>HYɛ=;fz;M#C361:ήKjqd m7TAC<u(\ɆrB_hng JD|R7 I5r3jjCǒJtLwQMv x:Yˇtׂ;GB7@ NaаAK, ^,0[$v&Džf7qx1Co5D \l7HaֵRز^^NR,GT&4ۦ*ʶb=ƈM"CU60),]9י Q֧ w#]%ۖRDm_`.MC5w<$]ೣ A\ JnWo7Q"W[Iw |]I{/+Ԛ@'%?njN}&nl`Kݼeqɩianmx:vn:R=g`PѶE%@2Z/yT0jSە,_> g?Uttҳ|8͵Ĺo;`Ojټ`ǿ*{j;=4͋}v?y)6ruw! 'zL?P[c,zq4D-A [Fs^6C-c3Kfk[Y30O%norl>SUvu24ưy^NquJ D;z<oY>N'kPF2=W?tZxR c}Ҹ R9gOL(Ə*F(o |\OI8m#Ui9:FyVPq];ym,M B{~e :6sܴTC(%E4]H;Җ-T=9b$ˮFd>q8 i؄}أtjݭKݩ{"1i{2wmx>kKC.ì**b&]'ww*zU ˰MIp礽3)bzwm:^yQ<.)H~@U y;FЅ `I^7/l/ΛHB>>;/28B~2\N&|=?L$ bv,S4Zs LJCM!hOD IQov1˽ɞSqv50D{|ܲ@5Nh;n8֖s[cvǾX -t= OBWSPlw&Ա0uTkv> wJ=8" om46XM:4:<'4Ak\3=">F>=3Jk"% I{naUiQw!aÀnu-gڕɰ"?O!1'т;Π0`EҺJP.+HtP!cZTH q%= G%ɰ9pUz$':J;^𼭚XuOXu!?ypbuxԘ:*?|[-L&ܰK1:ksxT0$ sXbp;$ u+֪.CVg/)Sb,J:\Fx[KNDZipNu 5ӝHEȄ1InF+Ub<|f@g8[*z6^E+{ cW Xꙫl 2 -}:$Omtlo~1bJGoKͣާ轭c:^fHX*R9]bZToT]3+G]h6K]|py7{ cWJGXy_rm>[__}vݯ|;qB88YwF7uwrIXwFb͖Ep 2jwO09< ;-oǂH,7$cJDiwwG>3;cMy o+g0C}h2Dĕy;@#Cu׽]dmuy1Q0T*b\,`cV'?y+~m h.N Z9w79Y5+!ֻysD|DWrcI̿2l[3! 6%g&b, x$Oՙ|m®tejt_7=sQy0?i -ڊs= hl&9\[[lsD&_ְ J!pfVL/H鳶]ຝ-Pe?lJ#<$N2<dFƃL8`yӖ5x+._?K Xh!D۟zz(!l t|Mo3Rj(lSfYW+rNCf1 ؁L^[C)՛<̾~f1G8빾s&uxzm I?_f-]cia>=Z~fp٘mqESgÎQz=}c?f)kS00l;!Kpo^6V\+`IL 03̷Ar>aͷǣ5` pšQrR921uQmы /|?0,lJNh7&΍FQMDw2`tm3J2&V"w &f;ۣmcד dD<lƹS+恇(1Yjp  !?'Akw 9r[6+c[Qܝp5Z4y!jWG*~ ~LGxxoc ŕY̯ү2(чF?ӛӉLYt\4 h.[gzd u+,u'%T^!nMW!Ӣmt"8TI~T,64QNwы 5N|@1hdZ&J^ : Sł4zX`?ON,vL!oqZRoϊ9Ćhb\Kt->8,)hiEp(_i@L&wMԭ7pSTk>lQ4NK {&p x?o|'bJw*BAkWNZÙA.b+cz09^-]YsWizLIV-Ș!6z]7;|:8||f-K `*jу_hcX4pɁKxߟΩj 8Cci`lsN&,V:Q*x<a:gMӼayBql͞cؙ7^)k? U "$eĿ/n<7fkF1)DS ++I#uj7\H5M 8JՓLv lfײ/d# VSW7ʆF\;+?ƇqmhձR$ C#d/Gt5}oo§>g8xXK@Pdi 6yτf/&h69~_ȿ '>Tc;s6*p1|+{Ɓo)+wӯ}.?|^a/M?>9~|?1Ug}k{P,HmDF>ubT;F<:?}R4S'I|YCgfpl(\i+93АqOw0 *$I{,Qx;u8Ȧ=¥} 1mr.>vѦ 0%vrh v2`{vZUĦG>$H2"&* qiK3B"yqyOx,-U#XEǂ꼤Q%%FEgf#ђq'j !jV6Z7؂e91E+'*i4 Q diJM;L(NRs j/V{qӋ54,@%MT [$ai}_g\wrrsMl!AjQ+TJ@Eub5j46ӃF͂AGM =SvpnkvBӑ=w]Ph~!fhLLRRӴ&8.,ՙSb +kΎSpʓbeb e,&rXޗl5pUGOFWMB h]`OF X(9J7a}Y BkKljnB;CK'],ˋg%~+8Cܪ;3:ϻdPR X;SR,#5XF]xI~ p>Y}( t#VcoOm(Tm{k9mAp՞l 7@Է =7fM4jfVM2cM`2Jb[+%`iUeX܂=k-\C:>T۪[ w"Adz_]ĩ!};nv1d+R ɥhQrc !{rVZ#^*'氳SVC'Xp)g*hhX҈JC-g?됁ʲnڧQn"4]@ bǼ2T"$Sd#f.\ }d7wQ_nV[g^\1rZ"_Jj`0v$h4)-+*kT4_ ѤE]˵2>p@Lb^GCQ&C&l L}4Za[aؗѕKצjϕgX%;q-S'Π,x.?yu2~X̟qJûX}!O.LJdo0nh~`yJ[~Dݝ}`|B`x#1l@Ⱥ:Z\ۗ^h]]8b{)f^6`!/D&-E:AH8l 5̍6m.=&s[աt?Bk_s;B_JF;ŋ5{ԾA%]2w+)= YE%Cx c X/2?qni|Li3sv rfM}qt}9OZc<35.J_W#{H"<6كma{j6ܯ'E'",auš<{#n-G,XCq*-) >X@0ϩApnR7w9v|`ٕdux^OIJ' `j}0-u7u1S61Q{{B{D>E"r#sRUQV.jBP9Zoz,-V2JGc܊8X?8ZWU/fFglϋ! FN9 }V^Rl/gzOw=L9yɡ*kսeGlim>1NSKݩ qZVYH^^&=7qݦy%1|Մf]b|0gc铚bPdA% 0Cm%NwA@lvT+kKTKduh]q]@^U=5{RF$\xbXZO;73儷|r"eb2_6+]0;=\!O(a=/w;U cg^B Vhg[XzϏ |=`mQkZ~ߏ\S9Awym HN8"qbsyT5E5Þ]r<>r`!|f"F kWp5>oZPHl>:Q.ɾjKe3u1wЋizhվp0$~'t4p.7y# \r|$D{7i(*vkᤒh1-%LuKؒ68 ,R|C|1\Uz xAn#bϘtÆPIXb>M#62~TmP7nO\~M訁]i,3^zaZnR^-GX WXc%]7Fy d/EoBK?(LsDHovH~/Bͷ|L8wa7:Y:|. QP9Wnm[gN`Դf]|,GpKxRiJ-m | \U Vct h/۾Is2{UCm.!wsal82Y>Ma!@dڣG c4)Cg)ōM&5% qiN?Lm`j֘ݮU2ml]w&A\6h gGyq,nv*ޓj0p8ZPM{WǟfƱnō)k)d\y.2#M#1)NeHdA60`qAI3a4fDZ ijJs v]K0KYuV,NYJNݶ8s}<^F"Yxn{lv Ӡ=;)⺟5]Q_WXAhL|$5mn>uǬž[:m3o+tz6vw W;VGD_'J1&(5k_4S V?}0( dqz*{p?G>:'S{u(Q`O8U!hs۔*6S-nsg3\V9ޗq3*XҎMs"=ThdlƲ"9c~`pHCpםq"&si QA3;~K_r&gb2J 6kiEƨ4`: 5' *]qQretf V^Fh1} wZ飮ӊmK䑑@)I;"~0u 00&󁭴*|ggCS-W=}?% y?ucOH(O+}bdнx}W NK}-}9).2WNqvaX gYI7dÒae$,{o2 a }N.\諩ب́vIEtF0.s~)LJϷ!d4{}0U@T8u5q9HT:jpՇێzi\iho0bR.I FjCuCp)ޱͪՠ<ΙJSj/V/]X:S݊B25zJ%Of;^#Œ9yZ c3#?BǍ5$> V/EQ nXZtPɠxC*sz4SRwG11!%.*V$CeRuuxPsH5ǺrhA)4O=}n{W`0[t{O{l[ BqiyRk.GJ"~o ގvf@dayݿddxLbir>zs}{7FNYB#sۿzuRy#ntZ>[xcGE}3<`ѭn'6&L7WQ-RT$؈!b ^ImT處!'M.ϙIS.#&=\}W}-´p#8=*c*?n]OՊ|Pm1&P9XׅɽO&:"ص4ԧmifl(9-D` h.ҞZ^lqXȃqƮbHHo6_Sjw'0E q\Ylt/+J0x:WFnI)Vb[RK| >Tm >hKGX^] :e36%[)Hf_JG[ǭ@#^28+-Eh" HoGk/ܥzA%e9gtWۮlysd2qV(^2=X-I`L{i~xȍ7~㍯>2/ ZEˊ ĝזp:&׉xp>3U%r#쿓s趧l:zWO&e$u(暃K37?vc* !U#J%.JŅK4b|N2&`cL3vm~ũi׳MۭQ 7jlwiqQ x>/9~ukK';N֍+Vk6e ]\ ;ut& X7%\9QU:D̂rѐ|V^ԁpUF(V}SJ: w;L Z{w4АPs1p*ۈ!N'*n1L$tqoBZKǠw .gPg؂tH:VX&!$<0dH+\3=YUJA9y{ܔ7 r yر]C117 .ƾ}ߪNY7maOKƵ@l൳m{{n.Go .XrÕJiMcWú8i}wGݲ x Afq+KjCg'Kb!}\lcAxЀ~5,PVhG+xty}MLO -_:F.Ze^akKqRzƁN3to޹L1)HMʏ?~^c:^:tIzS/^< p+[{Mo,ֆd.:f ;^EC+|' x{[~iKo;/+mY*boy{9oo Ŵ?eP O.&m2t5Z5GQ5qw]6`q`L*ndD3 u= [ob@Y^t8b dlrhȹ$j3'Z2ei}փv3~+;c}֟j2j{/jy3F!rXӆ=m9%2 NmZdfgUCgo q%N=*zJ.ӭJ+lFnUÈKGcUnL;& KӴKP֨[doA[=MkEg:(M 딼KGE_GWZ+m82g'I,<>hxA4Gus{pL=W TuGKdV'0JZիOnn xrjMHZ$睁z>cǨEuOyK.fKVPϱ;g{;Iw]\~?kƗol~?}^nֿ嬙çz|Mggydzv=?c2 ǢKCqI'E61~ cK$4.=}\=5'Y)&nW]Yfb}_oc’; +;=.߶ϟKn\A܇KO!]$L)ů]З}$XZ~猢L"lJ*iL ZsB7#к-[yBj1v@gh6cjlKpM2'p 0*ukƂ.cBDtpcZ%[yr`8;߰jH! dJr%4b@MpŒ3%,shA::ԸAPu=WMFm,^[g ;jCGiS8S]5wG+_z65j%*15y߬9꫍ R%N85K|yb0p<x 3gdptI 6Z޺_(S*p3T>>OkAn,4,v;vEG+:GʌgɅ6XШܖe&;72|9Dw Y]uoVM)~}ab56°0>UZ;(t)*7 ŇU;U:s*PNG&])~sͺT/T԰'k"#vu6i'2Oa'V:"4ojRI7/.%h'2Tm>{לq z-\#:'_ls$ 4C)rwZUu!C,i_504Tx@Z05BH鱢xQYN_X[6VgjbڧH`py^4pcvyAE-/FX%Gcݖ5y6e54?+B1?6c\h9,fgiJp&`QzO0:qZN 3/" 0_Bt޹l]\s'1;yOfp9A^$d^7?o,E<=Ziú))lՠ@Io{^m2u Ǿcs#DhzƷ“oNh.U 8(Yy5cK#jƾ?v}0Q R<9W jC|Y W@ EVuofZ bD]J(uz.36BPQ*i}cO ztc:ںKYۏèQV{hjs4Vd+.jPfۑ5!H8qU%{/Po(Mg:]uP"pPmx e1 cJmqK <뵌xթ;lNv5Q^fʕ=m0G+> ͂a.l|5Y0vӦi@`i7B5i|pϷ-@n| 懞ցSHo+̀>}7"Cvpx;ah-OzZ5Wzgv= F<3O]\\E_LC$\c' z6_xAYI,Bs ~, F#+6<\k͒u\EƎSsޅ0O0^&r\`?dzN.,7!$@@Ɩ?Oa39c^iqt3̇ 0Gb_ޯ{ ۭX 6rlwZ QtCq NLKhU7g*$]SQ|DZx[Y.0/wCegDS_;;HB,vVBʓL*AED."pD`JB.|$<tKϙnUAG ҖO'( KR F!49p{`7y? 67Xߚ<{ 23̆uu:WwshݪD|sL?%MƃY߯"ݺzdJ^8Q>2?OeIIK .{֛6_3fwWރwlt+_Mpf_}.?>Yh _~qg/jO'V_?o^2"O?w/qÝ~l,;ۍĕX|5YY a;8mkZrp0LbəuP<'" ):y 6EҴSMfXW/lʡ\U(dhW- 2xJeY1I@tm}'F):)H bg6;o 쉄kW :.cԝ+N]r_tz}47_0rC$m-906T^SㅮB<T!d`E$!HcA&Eh0~H!k1fpm'^5ͮ3FXY/ ؐQB6CJ6\+m7ړdw|VBqMa`$Z̏}Ld@f:t/ݱv{{Ѷk-5B_^r"AL.zkki.s*) ˽5&-]bo{Eڶ dgA~ d|4lxu@Yz= ZI*3ӝXM7|eu,4=0 )EѠhx2fVWYy,V}bºaH&XtoЩIͅB'OP%9uWg}BQR#8THark`)M΅jՏ,{nogj*x0vnܮi`)ƀzLQt[^WW&nH-Yi4U݊l#>ōIڟOaNTj`-t#t>ηBֵuϙ123n4v'v|et1 ne|J2҆_!R':j'ڧRdF""rK\ `tMۧvRwI3@y(#{TT;%eo y#s>>7׼FK]m"mTȈL1ͨf61L@m|Dz(* .CvL ^; X#CKlDڠ5i-yY׺8HmHSU7LzjyM{?m ,0.6EIϒywƳ.^ =:/";`c.? /t}Q!,#r ]E~XS!~DLTPk7{9*l/=1ֹ۪Ҭ594a{k2#z7O.sbae+C!&@hNs_wGmiәp3_zOt"epm1Po?~TK^citY~ 1@=a^x"fdJYR9O I3V9Ρ+$\1b,X8s Ϩf?['̲Z-hIFlNMW+d|dUz+0H&deK}Ꞓ@6*k=U>7Py+*T+n6yWf~R֯Lvk8r $rn;Fr{Ϸ}m+Jxq:* ,񐵡# Jj@M8]Bf&ViLff=@r% ̡̃c9)|)邐ـ=r-#~eJbMS㷽M> 7Ea jPeSVY&dP(FtޠcffۡBG tȘzgca>hѼ3ū6H &su`џ~zώ,BQZo!_ڛjn>h-dʌʖҔw$r%=hi@ /!9Eq!'c`伲sAc?|{zG6! KOZBW出윟oa7^qVxJʥ-U7ܺXuQk[r7\g#u}d{t:6o1'VP]naC9Hd"^?sw̎c@3JKPqIPGGlvbݤH4,4u{sȯOB^Wz,9SB]۞Z"P!T̃o :' t-_u#^^ :۠w/6l Ĭj^k>\x%/IӁe۠vtN=J*EYA=@14cyYw:O%3 b >l^cJ޳%AH@h:{x^Q3g b%-7vz3;HS{ m[|/mK`MnVA b]E7@aKlO-}wjAekv^y6 6t+ Gp>`yosHS:_\l݊2H_F򧒵͙uU1YK

T?a;?di5=K]vrmĺ<ُۂ M\"A9yX?80xJ:|1踩{# _5{n\tz,601/䇙(NJ84Tʉui߄梋^c< {+֚isPw@;a{2_Vч;I^ڠ&%uMGbAT},C1v$YZ=|QQQӌ̷iqC&9Lr,PG^:ac+J*;q-hKµؐ(Dxu#vCCvPGvGRvO} y&vXKW4 󤁝(P5ݳ0bF兇SFHiUмOoV.6q:`cc4 #7N\pTIhOJ0G>9shX譇)_&;v4X'Hqsvu]* {#ZU5EМ[,}uz5C~ĢOǁ u\v[WPe d&o?FhJO{)yuP,m[RK mb_m?X*m(HSy*E`XkScK{mPߋEWg-Ba|wnh^naeOc' d-}kbD313g=ːdU:h?*}/=ѭ@[Q~Hױ|WYb9ˆleO䞲 V>o}R*A$)}FtvvXR?@Х$љϙږՙ>кOZw>>L+i!3n5hzh LMLU?ny'Э-Zx\#orRڜ3}CWs<2{&ćHKR,HFxsL3lxpǑ5™j(.)q3{KSga8eM;N_Pnjwݮ4#[+–,w/uO ը$+neNL4\DS!M0_ K5eu}e&'WM)! \kvF ;YE%%( X[>qD&@7ʶ<[]ut[ -ο()?]72yEfP6n5m9܄+Q;~_4jW_sa<>aDm9Jm^i]{R;}ߵޜքU^ȇ@ |7<f 4vjyw FX?yaymB.1 i`@߅g,Eٰpj\zvЎct}aWK /cy̳A&6C~f @GvęMIMk+'_:ݦ7bl@ޝ©{Oi/^&qXgmm5_(d}([YJWWCn|,Pt^TBgd=hW\ߩuWRMzV2߶]3mt-ۣ#(!`(XMSzóy"ھb&p.2ЛĽE[YxO9#} rι04Z-1G~yW >v;FYtXȫZK僭eǨBwT ,_xdjne@-I9?oT%K̼ݐ{x+ȸ׬ <6V@r>B$5&~a=8D-z}G XA__Rdϟ}w!}[8GTl,J<ŴYT`O^ؕCPwln+Emj=}5#WB@sl-l˫5B+E@k]CrD<9ass泌PRlXکa܀cuk60c.}]6En`T%LU"-2!ޝP0@>'-@qpDm1Qx>Ї*)!ncc:˃Lѽ/]!TԮZY5sx^6ИjkX8 ߾y:>K.{hpJ쉦g?u%qXz{,QtT DkN gmu[%ĤNI.'B/.*/M뮈BJNy>JL﯆Zn2ivׂ~Ѵg4Y|R<>LA<˲6 v\ǀfLǸrV`.Nz$ x =*xrFnbVa |s[sƧq8C3|k.h1]L]|&W3&7!ࡃ ;'b%v2Qf O/f8n~;m$_;p]/aCٙeo KC&PDkn BO`0I,a#\:kM.T,a: -ծµO4ȶh„to:͇gsh>gx{|UQ7f~И4m# {g\Xȓ:8 \_-՟׈Nm6Y-:_~oˊ&W}+׾alwz zh7 Y-ԗ%k,>nսAK>{}G؆iϾ׎$x1'_K'Xw# F`iGk/iR@{f])jxӡq!ى#*;v3Lk~%Bi7M ,c6@:(OZ;JC{/S('oxfƭXדv5Q$$a8(ة@Q@m];BAY)+?+rx!iqir'r>Q !-1?YIO[U6I~RQߣ\P=bi{Z?@a7v4tm)S:DuD<`:fr 2O~ySjewھ#FA{b5?чj@Y?~{OSW)'1}Lϕ? z9-7> DͼϚ!Ϧ+lf'5塹eT;^M MOЎ7ȶGdʦe6#{ߤk@"bxu]Qt񲔤òMgn۹7\3%fg6“ir,pߞV縂gߓ fD֋:޲0_po} &N~-{OwwR }ysm,(2N b?tК1UKa|nM%7-k'.nI,f4+Yҳ`r؝_Vmj5>9GYKX}aQjYsm֤Ͽ{G߫M ǺPoKb@dUڡ邬 }\#92E#On} Ru'è_Qj!mZW*쓥Ia=<9ܙoq Zmi7JW}j*q ݉# a[\GʩD ~޳ ?WhiCes?#~@-/s1K]+x^~3-[Zo4!|a p IGZb^M7m^Ń;_<}S=o(67P7-G=w ܭ [L+ Pvlݙ}ʼn$ kT:/E|弤ܾUb)&,wgBJ In9ң̙ `F$YGFg[vڪ.݌=3a᳸Lwz[zSmJ_-ޒKO 7^0@"<ęВb7j$}k:,bU|{`% )Džo=%C+J9΃߇||u;qkNG~@.N`pR#|osWgDpܘ-k|=uJȱ ^Ӗ4 H7I [d3&|b9̑4Hjޠ!#8^|zmQ82MxJ<h ~ ͱ?%Y?-XʵUFDوk}w{f1LO#n/AcR;Heo,=9Xg뤪^{z{-x.3Mkc獛tIg36졘}I;>s/,_LitG>W3a αg7CGC*|3,%/3@! +5]yA/ lJ:쬽:ꈒP캹x0H]C7*!ן{/4-˶ŝ7˽C~<<|'B -Ʊ\w>lºs8$lwD o#R;s1:1<ՓF?%/gͺ>f.r1k>Te㰡Vꃽ"Fp)HPOj.^SmhThҡ4w!V=%ڰYT I6:d PX'S+(dee2(ΈDI^d>U6pʎB,yw+fnɯ{ w'^ץ١y*LT,ӳsJ'=!ءTc_ɣ~hB>GgdΠ?417<\l9^UNJl\]K2+/J`S^w֨gK.(F} vp$U]F7Ie358:x񾫷PެM_H?>Z6k'GlsΠYĦi%ɢj^Ӛaσ{Q1j-mJ Yb♂I\|*'89y*u^^ჿ;ӟɸ>.?Y꿵uGGn~4uKv;͓M` Hպy~G}: ,Ԧ&ʊyv4Aq {Ø!LO2Z'(yL;uöi Wgu_4ubn:pirmR)!f-%c+^Q6os˖yU~ h8gמ_v n]1s&t [gQܧ#@22 Q"CHY^[N,64i_jg#hN4[vs8ݽ*hQA !xw0W?l_K-B:4PfsƏJ)W ؇c}&=vOgBWs65m)\N>X9U Ǫ u%vq2ܞ}?P}ecZZX/&uHR;Vn\)r^=r'f@7M"s xhgrdy4HE3t>1Kb +c.\TjD "%Wǧ lTvڕk>LI%?rGSBO 6QxNDc^0~Buv>4}p2@00Kѧ}BY僦p-w0 [z,>?<|˧[%owK_]|Y("t/iُNb\Ŭ~L(QkL;4C/NGz16R1cCIU`XŶ'$+5bI#1l}}&J@2`.KxFZ/\^'q2(Vp7r8zbͷ -Y npڴ2vG3 *6Crw . I)ko%Ʒt O^<1isN=awlX?:!ʚ,Mh1UpH8{pp;#e_ԷEGݖ? :X؍Q&hX{Cz}T|PD-Pά:m+p*z};&>x Ƭ) YUe[ߏ' k|>ok܅Q|DtR;ciP>zB#{t?K~;\EGMF:Lb]?aZ(z+rOPx6w^2a$s;Eڣ DŽ\itdO/x#߂w+;{!AL}?2nQ*~);Y^Aݛ`ueV5CPf`Ivxya-cYHټa v"9%c晶9rҮ."wx7WGk6_Z_eo6kdXqA:zܦK4PɤtkTU03juWuv4q`K3nEdr e:dr7*V"}s4666O3Z8'Hzɺz;w&v#Ҹ84{۾YHchIZl݌uueFvi DT}BejaNF5zIRX9஠m^-VI|ny. '}(C"_Y])sˢV _A < ;#K%KOߏQ R(0#afu3 GÇBXs5m%8TQ]Rzuf {GHn.bsUEB0q%n/T߻G+ F?,r,d\sNdlCC 8`{'>08a0ɤcHB[:^ 8-zH@M1aXCFK:ټ95ҴvQ{lþ':_tgEqHW+ i^ ,>UMUW3//i~U< Y^qPP>y)f2Q#.y!7z_82t) ~pm)Oc0hlxl% &w&Z): NI*+=czE-8ɛ,h3C n!{Pap5t,P̧>Fh2<̂Un^j>҂aWׅLIv]=$pG-RJPX8Ok>ަ}Ieuc׼̴Yj%j?(k2s("d^^>vtxk .Σ.\U)Ǻ&3OOٌ016=G΃ >(Ҫ3~aę{xt;&I +Cկy m|Fe^mM.#f4ڨ>0aABߖ5~X3M|D'`eɩ}Vm}kV?{&+*(_śÆH,$EnĞ}5,xLwmZǃ ~n̍In7{' ˮ;SŨdytNM,ny|nyU#a;!Ќxz~# >x|0ÿ+#X#*RH޻4\X>ZAgl_$\ P(SF*?B-$>~e|1g<lU{vS&V Ѷ#%FywjWu+-5Y۱/P#t<Tr=Y@@MRW?щA-Bxs\pXhqԉ]d^bt'KM=m`<}o YR7vhڱX֝^iYFգTu(wj$bfסʟ'zFq9˖>Ӳ@40![jNq:lDo:DjlV*PuT47Uw!<}/0AuAʪ)yݝChT?dk14`ar匩3VpigndFǒ'(j>{~ Lr =6D ݽrc`wKч!i //5 NΥ5z8 IcgNkY#ɡ]}x|xO8ӟmY rz`/[GI쁬ă6]"ǎeV<{L5i=n k}h : ҙUx[`O !c-Gc _|J&5K,TlW|V rұOv4tvmr[}c2[u V#|HvMeAQsQf+nP(mBғF{H\"ӭ =FzðJOT)?R}%FURPA_҉UךkF qT*=ƪy'>v7_mW,eE#[ſ?n++A@KAɲWmب:Ɛ!|?n7,^eS/z9842%<&anw97'7{6\9N!EE;@+-z:T9m{vq\ 3 fak~._[zTƓ̵ܽ@.(tO1 dzJ wM\Kunf73ݟȲ&TdAUL.Y'?v7*#ͳO?=f|5H:jBi,a/G a\ow^zۦ5CzvD]@8dmZn&`hy٭U*ܓQ[(m=]A blSN,f^&p6e^凕90+ພfNӵ'KkoW}MwAĂv&PWYO.3:﹌g\x/]oM@OP)V&f:g]0MY_l]FQm@"4(6%0s^6\YԀz&UcpRR-5[Ձrbj;;%tmaꁥ2EF?xBύ`25^>DnGM9% X+U*c>xև/=y$ {9 _Q=_"-xq{L>%R2 % I}"d+r- D*k^4\-`2$q@MML[pi=~#IF&ԁcgOx5ݕÉ0jkCcȝ ?~dMMx92Tn9?/Y-ua@KcS7v &'ƔL$ܑv& @ QF7e `^^7y:;)R|UF&3 Ƈp?vQ$ `D@阎;PRgĿ<,:cGCQryٳ&u04hˠ-Z%.Oeh~wo{wu!aMc9?+^/o;L Tzk% ;*djԞH$u_cGH="pr|? \Ƭ|;%ϣe~N] .xw1 O &8JЬʺVEH6IH&[F.WT"oFA/54fpHR[xOtYӜ{}Sk>~㗹]H&ުL?'#F{cP~3 ]mJ7(mUrI8r`o&-ɲ;9:y"b]..sA0fK*\s ]=jAY:g9@C{#ݏ |p5h/vifF29a/!NV sf]O{OvS]ޥ'4p^ma|˻ [~ȒgǢs~UadAh@XKL'iK)0uбGs`8n X|C3!*ON#= [ 9,./Z⻟dBH^_GX^x.[;IFJd9N@yղfOz`)N٪5pQڵ o")tCNJF'u"eڤp8X7tlXO?Ak{v4A hVfJJ z!JA1=Ɣz\Vܹx-(Ǹ2fym̝ ğ'L~o2kOgPNխz(j†Cֺ"3Eh+Em)vH,+ pmuaV[x_iUℰN)QHjjLUo446䧥ubrސz.DXZϡX#T[&I#1VBqu ,+YtHY<é44PΐwK'7Vv# t2UObVFWSI:@ 64vT~u!:F]FאN 8|{BI+^3:`Urg0mY@B]}˱>?oXwr1Ħ1_ZT9,pl &FnٛΪʉE}hsU `Oގ!^캈[];q}ISL耥Cd\i3ε߰M}9y}8paG<#J6w~CZ蜎u"Hx>G(cIƧq +g-xZkcq=zoX$Z%Q'-fc)OzQ'0ᣣr-Gh"aC5m[ @;6wbiWr#q,)訨f9.;F|ol4+1x' WKc@ ӆ_N`XTvo3Iȇs5{Z}wGm4 d/Ϗu[惽yoh6LB328R1A1@vnnY-8zo`&O&olNy~TDx٩܇ip+|Rǻ,_M0v̮:RoH'L.7Gc~З(o}vaq%f_'o+ny `51Pw0V~V_;1yn!R7jd|\ j21x3(XdLr4Y e@(gk/xRo~[OXt` tlN{[ZC2ⱦU'#Pxl=YY-b; sGG oMnRcxlWWO=ʸ`iHP߽VKoޯG/(BmIOS( (8&Fkdpr;-?@PֽpjW1RH9(6 5b ׼ef;zDLAzft¬2 |ŬQy%S0\HQ|hNܤ(N`%P\w[ ~B'Y/=1}z"@L&|C. V`݊b;5B)D-uwĜ5c< O\XM: @ܮkY1`;5vGqkٕ}v{~рV\, <&JSؘ$8:Ը.Igr]y}ЯZW$Fc<|`W#1 &@ed @UGpúYfTkqEZ,Yj>"I0Kþw)ex.h#48fѸ9z Dɇ؅~ItISevH\}_n Xw*UZv;5AЀdv ܣ=Z]&˻I{{rzF]%r5m"49֗@uv(F:HYߍVMBY y/%UfCAX"xJ/㉠uəI=7%/^\;&'ۯdJ|g8X?:71}NDV1wΆoXg80ۿvr̔v]zW aoҭ8> ;p}+?60Q4> q%$R!ՐC Yr/?Oc`27p<ͳ H,nlO*m@0 س}(sK6 g(?S]fv 0XJbPY@ cwb4|{x#*Mm]:5Ssd}$l(P!`:z#ܘv$gʂW2Vuj_dVL*Wa :3 yOiR)&B]9Cgrpx ~Ѡ\N@+e Lt< 6ϼ'EEE:,-An;N_z k=Dgqt HIV\/iM _9FdEEG$ (rD9]NbAaVb4nUs9A r<.&33.J-- L W# uT!:~4?' =ukҧʕ ǧK?DdkwO Ažbd F^Ƣ>i𩝓N;=?AuKbs99_Ԓ,ے13 ==0l ,e+vKju97ֽUrdh?%*?AMӣ KcX^u}*ѺH*|I:N.zGNtn xWE.Z?ո ֞GGmurq=GpXv|=rJOF ;U WWNU]QBI컓cT2vf øVp?|ob+s~ /Y~L&EΕ7{ g~U~>VrH즻n8=}y;}ʗo֟ő+̍溺o/N`uR.i̼m%2blAI׌=C"{AJX#΅rX.z#'{ެ&lÉx]cY,urEH MUt؇Ewkq8N:}/_Ml}^uDG/U0}3lfu*6BFD6i}KuM4ui6F,ȰoE75&KgL146OsKx|mlN0<%&t+u(`2~I߁wxzr>:X0hM/WF-vc7/a l,1e2}|kٗ+x=XL='ht隻O,-{ՊG+|>kH N5w$ۋn'dhq(y8-yfz3ϟIN\n!wg@{ᨍ+6ʋV9jRn͟ݐ3r9)@W[  )17Z_u@D@!M)@G3Jxx6 nPK'W}Qqx!f, >g<D=0|};>ma~E&g4iO]d/ˀ2%]čD"ZP9owʎbYȝ O8%u/-T7 b Dvs@y@ n$B<{Љ  CTA6^x+D_B1ٯy=Os+Z^ $@᙭j~)74S2xZp_{k@e;g c.c2{_g2`k#(o%![&: Wsk2~]K^6yWCIU6#Qcp?>ivІ >F"L?b`Gƿ1{F1+nik)>=PQ:fX9 5ǰmkMl}+ s*;JT{a)NJl/a$dрI}gG9b/[2.`}\u[wvt } ] NӜ e $P)kϏݔ_O\Fme}|PNh;e,Y 1 -mͣ),R!!,oϹ_!WW:Jiw3lI(Vmb-;h+,t0\֧QblY8I-erH.vu&@x1Yz !Pk-RRr#Ceݭi[=WIwa.*1s \yX?⺵\x+x 2aZ{P% dW8D'!!l ۊPۯĮGPtƼ8zzX#ЎZ} 6WHʣn3QRTxZwS%@hN>)`sеI40ULy颦FCF32<ڹC(ýPf6!əvN:~mBG]p>Uxj嫉Povj[oqZq28b VEH/i<\dzH,jOֺyk@YM/ElzU+-%GݛQ,/ 1=q^AoaVNyghj$/r@yn2CiuK/IgiX088:%Ĺ&[M8 D,(7ͽWldvpWLj| QP0 ]zم^CqsIF neA=8^j-˩LflZQ'kרϏD;#Ef'jag|򲊔@l1^ WF{E~ֲX{8`QLL/1Y=qK7:E s$hMܣws*j#Ąv'+$ (얿D4gKV.k>~=׸̽E%‹{@#?sAM\m:Zz$XWLM/{#WB G8NQ t,pu=;>Kn:RCX4VBtquE0 [0/1 aN#@w< 5/Nuq Y#lg6!Ձull7iQޑxA[83g޿|q~p(RM 9j3KNY2 z5 Zx I\~im^]@@.$+c[HQdAlZµqʮ'倽Ľ':0-?H _hSf.;kO{ͪ=[ڱeֳ=պf n[& /;M ܴSU+@&N&x g][b v34&8ʨxu}+ O[㟷|c%*eKH.q\ ՄZEL,%%3fW$z97Lk7X v]9<-ˀ+-BzokTSu%|MrU4 _peBֳo>3[ο3>?UIJJL+qF'sf9<*H>͇Ś"1Gd8r[N*ڄm߽kO^ߺdy^;ތ{G|(ȅ?V'-ȮEf.Ð{%;/_~G^ХD+;45F5.ulЇoyK' 'G"5]ݥCj詤jt}WCP>̗>~=Th[S urXagZ j͔C;j{%g>goF*)]>>¾g#f`"D0BR]r.Ƕ=fަ={Ph%A_IC9z3զ s1 Ӕ{ !>geB73u3}2Z8 DB>L^c CI "&]i<񀮎k u4y;LxX^(mi7ּuܙ@yg;* ;'z X46|X툡U?#,<՚}k|z Qre=gs@1$ƹw ܈ÝLM7Fĸqfp'dG,B /n~z-}?OC!̇tq8|CeD:u8af.؊{n=ȋT{WMӼro!.!m Pt 3 P )S Oy Cn9$38@zH~>p߮}9' (,CO\t1vN[ZsMgJ.rHU!~H_{չohק7]dCa^,Lmb|F.`+Ňi7Wl0(͎7⻤#GeNet>_ǷB։9>r m&\"tٱ/u ` S6z_^ԉy`cby!Jچ-G'ܬEOm`!VPOIMU5|fNy_۳Wl@;vH.,߸COԕz*s->m1ְڑAEmm'# Gc~(.q]8чvD:,q r;Gnѡbmo<*[s6ۨUIƷ+Bma ]cS7V;Waycm9+;oΑRQunV<` yA#kKDHAT1=s(rϯ~4cå׿gmC%:1uUW=5j{ǚMPxmXbp4xL]'@?9\ʥK x^\I7ֿdѓ>@4{'tmN_ͪkV*VAmmǟB7C xX았SbڀoaK~uWonKEqe ~,/OlL0 kw/s"ML&PMy8 Y g, }oHGL־a_{n!b';fA uɄT^7G-eN]_>̖N\<[P՜[:|m-#$ѪQ9 ^^cuߤF/^$;/OJ |mWo?= Ofkщzi\j-~t[D fp, "OCogUa P1):Mm YJ.Ġ؉uΊ*Z;RQg536K)4A>C;*?i!𞫝m&u+5B=;I%VNg0Sg+Fu6Z|b3^x9vn3Gv@cmm/ "{c%|\i-9SiU@_bH{]Q?m%s zȈBT1Nqm'gYD&p %Ngx?T^ s^ g.Ls!=22*a$n?;+]x2I$|\ auwz"SةCM<2qO.my<z[O'W^G|pzߞ!mO$~j66 ' mԣJy^O=C&!}4WB/ϟГ08 W"tȔ☷e&ci*W\yg/ ym?o#a1Glrc]{'D6|MBu셃g3TD' eC缢Wرn N \wv"gLJEx:@b=ENLXt.$\#t+,{z[C0Dst=ĒA>OIr" eK::?:y]Bu{t|rZFw͜+8wt՝QĄKY^!=4NIh>FVBԒd/MɱkT}آx7Ic;VwCsU⋟P_tIqZE h~=P=8u„y> >(˝Kʭ*ۋ6^󑽶t(Op.n!Vts]޹aBz˪uCG\rl >.TAbڥf@FF[5i;r,?/Cbڎ!8Ufŕ/vgH m3 ]NJq/E%>gmL7 VUl/{fށ҄x>ai#KshLljHqU4hn 407z ̴+Žr3-GkM=2x ?fnjۋͤ4n; c a]K_p!||$З'Vp,/nzHqྤnt%l5zеQJ(_M m{ƬX,M(=w4Iu,@[gvƟ (@ @@җIy ӗ+9\֯]\@mZ?¹z8~=0{09VE,C Xp8{sP>e*mߧٌi*ٹvQ(9!#cI?Qkqw۲A򄫮V4/{hrr)!%uׯ8 $FlB /ɟ`!J+3g]5?T"N^s Tt[Ɵ )mL/l1+G<=е$RuN2ȯvGExnu*mnܬU 2[:bx [: @e2pEνQ lpLq_8@;I lTp]$o4hZc텉\$!U ,7mۑu w zb rU[@:Q\ y'qvJ$R OI{BԮd},wh:GSoٯGhrDC(XHg 3Gj_/ݯ^ n0IҪ6Gxum׸>^;OF8|8{)m *3ݮ i4 !-«Cv|{sV͐*sEұ=v;o#mRWغJPc6uYZk__k x#׮$D=[gb(w >X="1WDgigm( |Ӆ?Hhmއ=t3H\<(د>irGRuʏ+m[C~d6{{}IƢ 8fqŶfBgtpzZqYB}x~&W=,-TpUh્#fVnbtsyOOĘ7[䬾_EHdi/{엳cGqĽ-_yc @X#ܼ+cc~`? m3w1Z[$o:n>Rd8-gϞ`9صLy˸w_ܦmG#Ţ__Gj۫h bI@%LMJB緌'CeUOʁ1=^t0{l\@Xq,DXW1HKӯg=rYܜ0bvU.]Wrm#]=du%sX.镼M Pܙ`脅z3e]WJTZ}-Kpn3ou;'^J/̟!R|i(qa~Tҭ(VsV4 ay/7bI3$q_'; MGTp\s8c%NgPW"ppRݵEaQ71#F|yt jd6;(*'s]E@_n=Z-5G43 Vg Z\ќN>ڒ*$`5a]?"%);=k[BSQh }`#v$Ze_={5nfQ0sb2[V&Y7t-jґ3ფbR"tϊ`K~|{ g*s\B{lp, c);@ʦ>gG2qmP#I\?~8V8I􉏴\$~sauEzPC ut0jq Jf4bӯl58!Lc+̑~8}9uS?=?=h>}H-+sZ&qi=to{}o~翑|~77.7m %ooa8<U/{摴2m[燏b~e{|Ln̖ICn#L}O?"ȩ|,8l}fO{!u.x/?_O >埝7/0 yo=lw_~Lݱ۹>+%7nbS) z뵿7~?pr9 ^< `K/iU1o,R"#'Ӓ=/^wiCW1RJvؑ6',>= {et ydSQl{tkGc4F_KZP>h+B-J}KMf Mzq|>$O[6r Y!}KIwG80%DkB;o 0R&hhzuծ~#t8URum%s#ꤗ)g ]~ѿI[0۹ L)ϐaNי9qdav!γ#ʼnBSܝy@<5ItN&z: 6wQv,ǓЄ\-qm/Ic=LGƺT2SM'y[xb/ nR3\g 2Ɛp q"&G" i;ιtu`8G`A?|\x#.R/Nd6:H/@:eY|}} o]BcKX[[~:pةլ;ל{oB`Wnr#p w]7:3񂲷4ekDfY+!ԥ5&E{uGmHʺfVE v$ppadPS[/ ۋ\\d8bӮ9z0f3Fm Y#ΖO ӆ݁hRBM]B3GgT [e1mۖOHRrEpVm% wE)ZP*bmvS:l10Su!u jn1[:*Bv0o47"ե&wuc28Q fmeQqؔPofc]R]XoZ~fB`7/[F<8 ֕tcosL߼ptDYmAH*s{4)uo2ʁ>Xf0fwsBr;̤6_s!j&W~˼pWAw yנo Q7UO2F 2cb@m͡fu&ܮAHdWD%(U)~ZYuwoud3S㥧kϕ 7,qAp>lCo "T; ZbMh顢a3o>!p׈`_Ay2(mr'qpJT'BS3b1Wqe&ȫ 6|T,Fَ/xcg+(AJYDZKs^Z*E@}/\ fwcvwFÊy[ظaT?&s?V?6.;'cި85(UhQ <bgRUNtkW ~_=ou~P]¬Ju@,W=t5 '[tN0nJ0zGep@} [o!sama||م@ĺ8u?wG;^\ [(r͸ad "bq| Uk{KPfuH 8uP/79@ۮjc/7!%$1!h mu=&'ch" Jb;0S7|+zk`2Mjlqȷdϳ~(!kE,ܮvgё?KbkO :{!֛bgTX]otm #gATT3`NElrxߙ&(HhnM/< K[XhiMWZG+ۀ~3 %) G3cyjzI|^k(?!7zM``M&DD6ka+cG )nk# 6lп)B: C^pd =vu}XYvF?q![{^|=̺'"?j.H}Q#ka33_*:BB{0]'v5K*4ZiuDIQUgd]]e6C"c^u82\ќ}"mBѴě!s*D#%3tO{=*cæMHIKuD(చLɆt@kG׊_ʅ+.]hQ,h]̏_ߴ3tZi>mCx ȁ"E{g>MY+Q}:^k?`DréǏgu~[x/_f/N}abϕ07bzaruk2"Q֫ 8_RXY3 d 46m -wQ7Us|x}&Ɏ汼tgyE>@ߙG=VƦ/uiԎPޛƹA%<_3wX21Ϟ}~Ň> W+5+}])0sWxR͉?UQiǿ^޲^}{- uzJ=29j~V%} aL~Q<{&0"V^S$h%D(ou\JſowrtEU̳W~d}votרw_U rμvJK3ΘAޢ{@ܺ-Vh/@lx$ v~v(ep`\K+ηz:Tׯ{Seo?֋$wMIlk}uvIo- -!~_/7+cts͑yIؙk+fkx}}!h>b6k`ZlsSm=Gqslf$9+/:.{'t'%!MV/_.&}G3J!qۨ=ɖk㦣FfmbkP[}R\Y/e&ǸGv6; /b 􇮎%o!   0)pClȑF E= H2!!+2(mO{b \l(dq8p 瞀Z}»sg:JgH]Fi=h7$JKx(dcIn;=pln7 ؟3B,P?GeGqR#QҊXslÉGy!xޱFT.:(Q-]uq QV'6Sdiw 3Wy Nq 9/f"%SY\.ـ,)^]y$z{'nVG#Juh$^HfT^BNB"׽s a\Ǔax `="OR Du'j&H]F:+꫓ ڸiKkm^(Y 1'6~8$ v td|.f҈5WRcXkdң:}|b[$e>-b<2J_{ YT-&c΍h w-jdF/ԪaÅYpR=\Ns`8EaXMk4#m w q!Ռҝ+vn"hиU|m(~FULN;+'hY5XG'F/R#ܳ[w7R?_{(4サP l@";QW*6b`x^9m3-ocZ±6rÿIVEhכ|'=W'oЧ r F [/^"VǛ4 q8#LDq-D:K.0>%{N]ϰl 6 Ԯ⤇I~!?WFzdi >]<L-CJ+1gϻXwSh^T񌭯{}? +0 |:.ou8x'@ݶ7JĘ\(HGg?oV; F n abm5[>[D~y@zAKtuXEr)QGbVpͯL=T}I7`Ձu go8=6%_}678st-ݹzϦ6)n`u")˓OxQ!!: z<&@ : ˟8%%pxyqJ:P0 Y 7#ilv)C笫]õ𧀾L_:踜=4fK4L3ɂ˫SmozB( ;fĴnHqSN>55q@.Q#gRY64Pkx@1qI1⒆Ңe< Mۓ8n񒾻6~懿  ^G=XG2_쓩o۶˵ʫi@el(cW6ɥ`c9BwrRSEF[/e4չužir6DXRSyP(A ~N-G;kѬK` 9tg!0?l`tI3eG*a &%T{)s.Py+Vp vc DZe3:!,TP&7Z=Dӽn*:ftY `uCZٸ誠tlע6ӱ!+ /+Hr'!.= p 0u랩Eu>JԵX+f7"=:~RSW -_':4ZYshރN-jtnG:kV1m4,0cz*o]xc9BMb5% FB@cGgͱ`@]gh{k~vZ ?!)g׾էaO&>R!?r-u:g"=^f@)FyLg[K_!r9&s(6CRmX櫣Ow&%PqhP *H4"*s%Ps/;guR(hBp=SQ뱰AYGI#: );uHQĘzzoGR+DveJ 6Msw/vOߙʪ}4$6(Me@ኧ*b?s@)V'LQ/`l0I'l*#HqctqC](a#)iфn:la4hev~>H[S+r.$0\|3 ~wQ Db b'sĜ v}3X yL`w4])_ݻ3z?xR[|#$d[(Aً`#܌yG+cַR632$=yM'<x"ܭ/}1`$:!asoZ\ҹ!$c `Mw2GJcHt ݿu~>֪ S@ɗk5,՞ٸHԇ.OڼP鯦-KCY: cl!F|hk|;߽*\(~]AӟgW]NvЍ Sy r1;},&ipqqN^` W?#FW:"i8O*{{²0Xg5X5@n8H?6 hĴU-mxƣs9NU9Й* /5w\3 ?9ΐ Ԓ و(Ո[&ˏ8 Gfk+ؗ㢳?o I5_ՠ/Z;@x66 ȇC-j|UZνܭuBzwڳ׏I6em>ɶdzmVjy˄(͈ggk]pp<leupxjiQ\ɳZdܟٜFMG]:yaYG1 *WF%0Y@k %!QW^w/l\U{–Eq 8Ȓ,ZzS%E *l.>K~wpvK?)Ro:GѢa.&$|Vp І7H;Wc>ĕx;ՄR\x-:B݌gp|&]Ȏ룭>4wFCU^&|dcՁvmL=gPZn눂?W9#: 7(r1ƽTsbjuprP_/FtI`s҈o1@i ߪLj%oJ$YAY{WDFY|nޯNڛ3ض N92)@evڵ!kBMF ȌooqsW5="kT֜Z%~v Ml&(Eg~ۑ1Y2~nػhs9ybD4GV婽s}}]@ZFʡg+5e+;,hs8$TTJk5,<y$|(,Ur7ⰀTUSb5jG}f'EPTF_9|# qC_ʹ^*6; e۳ļ9qbNgm‘($rT2q.v\e{fl!v!-~ɕry]w*'!>,0ˎ1U!F:'c5u@+`qmg g_w[i.,zhsuȞeziµþ7F/kJY:+gfpt&{`z<-tp2\ ceE[GW=}>h4ʸ6G懡i"1g.m22T~Bb@.qg¤s5q[NXyZw)*J+_⽫`S?걥˰3_ʽ4q&mxoԦ)M%C)>c1W=G%h6WSyﻯE`hn;y@o~}n'5)Qn0#ᡤ%\txR産-pvA;/|<:nio|0álc,aN8X ׿O??la?jcJH͚T;,DrGtr >Sv}+hr4Slz4qJtr)dZ!UEHDY`ګ7chfIG&~*@]]k}IYEDHIYh][Fe݋E&mԧQf`H_.*}xΜ ea?/4֍>q*V`{wW?%D 7Fbwgn[hJmy2}֞Tw+N`}F/etQ^8m!G Qu B(i>} "qv IhZ^VCxv.=d/VFONxWK=񅈮^1Fgwĵ Ej7ź{X|PpRh>zf Ev} ׮W njSL}I Z _Jy?'k+ꔸxW5b}}[&?g_í,mg6W];{Z"{R =v[ ;/ <ڽꔻ޻??v"%'H i:^ØC1._)*0:9]dWfʲ`yq]IV~n\u'eۭD:)NmfK{c#SӔoY$r6$j9Ϡs0wW6ި:Y?0ٵ0SN.gM?aTe"Sޞhg<8s2j)1'I[MRهwW Nitw6~ޫsUhʍd {6:>,M5c\f,fHh2j?r^@f-΋YAꚬmD!yG;(;wKcu}9 kož۱/xYʺm/6RM7]WsE+xua0Ud,܋a/;+Gt̾bJ C5/h惲ꠂ(+P$`uzF\Λ^({jVt@g3`xi={hald7)JII<,;6B(tqV;׏c~KqBGDJ;_JJbFQѵVDll@0n4vV}Ѓ .mzq59cg;.4An0Hf uWGF . -4x+aq^R@-ZGn*(m4LICD\KbbZt 3]fz3n{hz[@p 6vp4n%fKKRgt΀953BrsksNH<ɸ2Ϭt6 /`;;iʋj-?j=2`s1|}t6OyaVQN&p\te Xh"%ߪ8-p(;!v=m=ΑZP+W[ksP9LOe? ƶ֣iFߘ^p1|G @ G&@b9 hX O G( Zek렚h !q/^g>hM_tq\ʴּ-=YSӴa8F'PNRܪ* G/roacб @:{@>lxRhj~.|{w,FOʅ%A!Nlo~y7܎͹^l{5Qw$!ѯh/˒PaK )L׮J;K׈q_[ܵeQ7S\ J.t;Tʚ>">5i҉Nt-BufiH(u[\6kEʿu vKF%D WnH yv{7z;6M}[ AwϪ>k[kw\{7.Y^m-_yA7/`3;IԴeOڗɫz#MA׎.}y2<:۰'*OyvjA^zP8sUiiۼɻ3oB2&1wUXhu`bu5wxWcFnK;xS'پcW&GKVFkwCbmž*ev{ lTS;%ŞAD)YyKVoɚl?5Mq ٟw:G??0UǼw&ؗB\W~Ev>~xG=֒MBIJ lhcGW.jfC~vҭLd1Ld x, hdvb~uD[V2GU4՘wWWJg/YC9)yF&R;xj#p,GӃ?)=aPGks3BB5 za-8!ً0ձ. (b43@J Z6ߓ$ޞƻa+>u6ZUלIKI.7dL1&r0Vn]1 /&,[Pgn<1\,2^$9rRV~li~}+"B1??X,p}@¢s6vqnq)"u?Fs~Σ+V'S'$ɸW 镞Q & 3@gZٙOTFĝzE*Zl!3ڻeyr ]UN94ms+)IwBuL;G͵koƼ ޏ78Ǐ׸ۃpY 64ݖmtyqLdV08G%KP- n\mġٰ 5C}PMLe0ҬYuAU 0i 0*` p­2 "bC\yw|V}vV'(FQ溢0JT;'0+v3ͺͫ{/g3W8CyW53k( SIA/ lP=Ż5}Ykf}-3O^PK9x(FL _:S=j|Ηw!+nI]6ӆ1Zߝo'!+Ӏ?d>1C+~ڼi96^?ew/v|&P&~L&PkYs'ݿ$\U}Փjɩ>Y/w:ksA>/h_(~67&ZH#Ece./nloN"~[ڕ 2>mO洿ĕ]κwhQgݱft9 mp~ a]^0iSY⼇ L>*ǘ]*ze++?ݝS1E7}4kM#?}~)1-kƲᏭ܀er7ݹWO{ޚ(Ήq|9m=.#5켁 h/.L&_%3Y7>}gS׾(:r,s@:ot<"xZoͪWO.jDat pSj96kTK+Ė#pG.7ӭ͐FvŢ`TnRGa5P[s;ߠ ñD[J\tJYNv#\WJaef<zH~jA4"iYC3q׃TU[zX?ֵ5+Zi|KB/_vQ7bP OYvzUX:8D:uXcޒAsqV} -|>l>$!wYjΝ4ܒ R膕|ޞt\bG99Vu|EYw]&ӐݙM[~Mk9ݶ|0etqs>i[RyGgiB;h;a/7 +pL<{yJض],7;;[((1MF#x}2WzyYi퍻[4gȲ pvB!Vt)[7 ̀\\Q;DrLM{pc8uf;a1D,t#ZXVYb/? k7eQEٌyF׬=}8>rr y8wfCf@Bvಹ[ smt6 JW*xw}7v@vV3Vs Vf7w>OalZ|">xuk3YC+Z1FKU/ j@T:Nů@QS՘0Cצ7_s y OGׄ=DJOK9φCWc[^'R 2UT 9C-\Q$]gzY@N,dE{xc6-J:iMEXa9vG*2Vy<|3Ch=d/.1:KH4#Ewi=+{ 6l˅Α6$oH~?,\I͔mUɊB4B<ӛ`k쑱" %m)Dkuř|Oa =};~Wfܞ:<,:ο_'FI{J00e%ti|+_Ikm뫳0-y?Gȱ8\[ }>X_uŸ~YݚxX}'ӖV!w tz{g8;)ReRha=7a$܈/Z5Q}*^,ڕ$I.IG7F1t|ۙŪ b.7G9bl>)!\&1 Jxl d>58ث.I|0-vDiF3<ᦷ@R> +N}%~kΟGJ5E\]I}k}&֫HJY)) ͈qtɯ?p;ٵu|KCNv|-wt>V^EU6zj\nk".+ q J*5(7]_VJr#! 5LuM̍N4pSV9斠p0&JcR+yj~atrlhyˉG #XF~sr^:\!;2Fb02c>i.uIj[3g2-aH/ ')xڷҮL^/Uf< tIXǬ21R?S5c &XC}z]0<}_\BmP65 =S| u5E^U 5~*djKyƙM3eNza[ŧ3A+,ʇcB'lƪҊ[ 7x$E:%"KF2D_&.>!O+/Bt`'ֆ=hI?XvNhRCCtuVjxzV'Os;;`x8SbC75fSx4|383xcS?2pX9]X MW{el{P{*= }Yt3-ᣊg̅d{JżWN)ViR]bA K mYtӣ}: $]D =n!=SDK־#Pb=.޵9Pd[g }8qЖqy|:W,~E/3AG(:+Kb^eC7E%?9靥NE[Zu&I(9mw}Q7>AsM`f6䶽>`7WA&nQYUY@/ވ}4:]S^.i#6s dbCm(yBFE[]=#$s?Cnl u0~1NZE>JYvbmI6m4#( Ndчø7tøy {M[&;( 4@If qIX9ݔPsige<'ߙ?xgѵ>R։gKLx^rA{4mM/et7f1;d })d \ -{ugFUD&@hL@/_˥wde $ 767I]x{|̾6iBJZZ(չ=Z w7WN>e RI?a2Ɗqb7r->/37?lzTo M5k,f{ݾ=xFƳ}ğ~vG!)HoN\nn)2MDz ,{d}ye$T=϶^>5m+cGO`=KAp,d#dɖNtnݺI7(A-aVSa#pbQz !t(ƚn*?W8q94ն !ص(KaV"Jdgl[ פLe442%?vpo|M*:8pa/+0Gdo2vvyx”h cyF'ziuu]|nҧNi|ifbVn*y w|keOEC;_X8UR3Mktla·jKǨɤa5+p}8c_yccÖW+C_b]z욞N.|х{b̮i&FL~u ־|Q=^̍3W2v-eEY xmsO`jZJ^Oၩjܐt=gZ v~A"3Wga8wК`y}5v 8Ɯ޳.!:4*?ɵl3˙&{.`<}/LG3t8uWY 9Bsk7m&|IYE.%;g;{35u:hZZg/tdCS-fld}}$.5Vy` lK()qR|3mL#گE~h&7ojF8!A{9Sˉ뿴,`itGՅN$jϋMmilo/N3R2wd.{"DѦַF]}?6⢌`,l D?=yu?wGeK<#ӣ/*7†hT#v$ L "u *zZNAZP7K$;}+=`\Cǔ[`5y9щ_]n#G~ޔ?f4<%>?7#D:–zMEӉsuT'6Uji;U6]T~|4]7ZY*]d)^2Sur3.2Q^.WqW䞁%NcfK+zjy5v2?NӝcN03qlgpxM}+u4(JȽs΄-CH3X9]!kVxAO7Hd*v1R߻gxrl9`iYZ抔bLkn3Yq 0s#xKed?G D p.I9[ b#;qL,=?DsGo3ˈq.ݦA[ZDMaǦ4Hs_ּ>b! TV3xC~wdn'j`"< N3= YƝ0kS= t]m6-?}lk^c,b[ZtxfoƓ^j>9AV|"1DQu[,GncbzM9WKi5zy,B̰%g.Ӵs'N,b݉ZէiZXɎ2MT}켆H!f};|,Af_[*^HGZMMcȧp{rBE7E=îQsA05#%z5QRNCf>饑Vd @(\ gqSSy#ߜ9M0P;*_A0٨P϶,EB-4eIV~x*"#-e혃ϰ L:gT#ٗֈARVCf qdyTlO?– t}} U<Si1w\[<GA/-M/ݹ#'gS䡊 V-fivN8+ܴ8ٛ3+gt@oef:ZJ+k?k1^W~g>y=wHW\GӸmjlmdޮ=^~i2tlAwqtx/ތ!pch"bvWv|A#Ȩ`buYD!DVfǻ0;sO^_"!nUܺx%He5~Kc8ڗ,``90;:6@l[œ4j8ngcw&C R@ʏRnh%IJx66דo>z5<5M7PY+n]K?hJxbnΌ:BLͱ,{ [a=~ʈuk!`X0!Ps}3E742j[u1~bs8t\rBö˧puR}im(%B^40̥I?n8*{Q,Az'^M]yQR]v;Ma 55ZHB큊O| /hv⋯eWQq|[SͺN&6-PxF>\Ӑ9HYHGuaNX(`p*>,\0iwi8˥|7l-Qs#,Sj %XuinI9E>]! chA qPVhbB ܙ.MMTrXnMa5U)>v\@¤e 3g: X1b[<&O/NgD{'Ln$WM:=;MWJG:EFk5-A ]KAa l-`^NWWO75aФiw/w"ZJԘ\^X*o:jmBI 9>ffzZ9*U3f!=\5.1T9Z{_5[O#iqAG)ktEg*z4Ras;<165z 뽌 vm! `O.. i7wgR}F*񹵩׹6nCEB߱Io~/~[/>ڳձqUC XLrk^^.)kg-C.sNϲk0 7S+BKٻ'+oko~scS24ؚ6q*f2Y(';,32MD>_lpmA[67l}<ȎˡtWſ6lt eKSO 0"wh]@} >N nRVD;tl`ras+4Y; WܴlуVwu7ݮLJf "ҟgQ`n<U&L!BxiqرP4V}/N9,X{ O?;!@iଯkDu}Pc1 ^[N|820LǬG̦#yY-W-u;3/XwM=U؞՘ɶhGEcI.1fV[Z1*OMV;׺4dL$qݞb.^>#o}򓓺p"Eŵm](U 1v5?=Gj_29ڈ.ډ̓;{b%fb aFgdLE` Ӓ "<, mbhã'q|>D/ȒS!#! PKc0\$ HY`  P5ƆP#@N N ^ ÁT[^^\Xc4mٝvt;1W+:6)E[ڋ$zۦIya.:b$2L,ⳤ vv8݀%9(x۞>>dϿ_sVvc0`!"_[Q5m<N/;8'B/<5[+~+9wjawy}g~l0E&- d[b<_żpßʞG_x0 &S>uj=DCz'X 8*q]_G$g1S$wKPi 5!5~Gh ;v82vY6 f?*eaؤdlh> b1M2eVeLu43r\H_,c a¶yŋ kwauJXT@JfoX6g6;̠!mG_"L6zCC=XI566DLv~]&762ZsOxoN>T'.9vg[?" \&S`QSꇺ,la5gUM[#Je,;,h1z\-8(PH0fL4@+<=suU5_n6՞8,C!nm:/Q|H4F*CV{2 zdNudr'#:j`pFJ>G<>\PS(4c[WRnwm/iDk֊ejxPTb{Qn)~.(:f}N6Co/uPxǻ=ŊzM]q -U/'&/)t!p8ӭsn"/&bܦ6= XS548v8ox4ۨ%cz-;wkAՠ?(\#t] Ȕ$ssbIG9c"$YCT?-F@CCSA(-u Ba2DfɘLcdBc`_3d$$eyZ {~ܰ>0H` Q{\6SMeze!ł$xcFY:=;)7d݆R͗8ۀҷocoCSi1; VzC,I$fo5:6 d]˿?dqw֕*2\FehziD>?'ULU)1O6'Wz|a_Yl޳VeAs^G(Srǃ㷱ז'^cfVgn"Y7%'wg_:eG,k.#g Fa'?~#e.G| $Z^<[ȭ΁&tI9D?_ذa PZkFiT`E ) AUV ]8U;5Z6 Q"Hc F@jc,+DBY&].%+@v.yv6k!v=UiRe?r3$'"]:QɓCs5KFz|unrr`so&,y#< "E٫QiلjoM.G.Zﰎt:.>$zs_7}87"IuZ<\{mDw`֚h!r}hmyg. V0VsX]\TESVMG =;C~NU?{eDxV>UȠ 'w w;R!O4d>1[&':7c'WNT(^@FQ;0@ÿ%3{@V&zYX'cr)Gb22 zX S䒪-8jto՞n Ma$}ɾRysd[4y$eH&uZε۶r34hYΤfºE4ϺBOMǾysAVq]cwqpiCsc^a6gS$1lzhSyHFǰq3ex.BʣzlokUUX~bz99FNJvt5IqpFbn 2Iexv6{Y[o0ۣs&" z'T˕fvLP.EqVSCYkj-N1U|RDM!nq(*mI\={uJ&C־}Z⩤'̩1fG^ Go6#|ctJv:#ީM{hrs8ITZN= ;DՌZ?P6gܬ4kHĘe,Qc?}Sp&ð\sx"[6ݶK~ȧOtQVi`jGMӃ4(gb>\2@ CMpp1࠹H@L{R1osɉ}5,2X2,t#ݫb;QB`pa G4Č(JYa`:+0ț^qf6K7)qhӥv> kv@vxT6d-vjcz\7E,^g&v3axBQCF5crTt}*3:+G}dJ;9mxd+ح1W5>n6TR#IJ].q/J&% cdBٯ&mb#BDB {Q-IQ:_]KeA@?=#k*PXѭMO3~wOuAE wML!XpG@5 8w6wBg_xrY1hw}< E_ -/{'h$M4J#Kym#tC~v5 Yz7LI9\xE`"Cta.t1M?'b?!&=ǓtDLU WrI_,:(~nbcm(du&N&lG!8M{.+S1Qq,/+PࡵdMW EC&Bq"5퉟7g}\"߅lA9v+A.>|6MINarzߊgs| ݪ]bhԎa3~کY 0 fk;MOaMVCV68ɲN)<+3'>?!mvy9 e'oɚk6BN^zKt}c& Ni>o{꿌G'fI yô+0Prj& NBf%:;8 9}5aAYYGo fܒy)E&!q/t5üoͻ4У t8tTf4l*[CE*x )v*:8vہx2- L"] GLc? LCYqYmjsw:6TwA54f,nsO`y9xg lMOID'CJ iN9ӭLիm[qhνߵ &+ާrGW91_n/Y[QvYy "@6#'J&ͪNSe)6՜Zs1FehyNG[$b6:Ξi~˧Z|МIԙ;4ԕ&X=ixS1ۊ"^/|zvߘw7h51anU-ZV0Hg¬Nfg}^ػK*8|zrχ,.hLu\"&~t r͈\$*%z?_BIܺ}5~0Sy-o78 M|ʩ j>YKe30rپ GdꜘцfeP1Ϙb*Go|/#ڂ%bEtS@x@0Z#Nj5^DH 8h`f!a` UA^8U9y痿)CmO&~W_R+gZ_yT?k<+ҾhL?•}nѻ8&LJ=Extn\䮭:BWFg"fD S!/UeKvS/ZSСR֟ //^1i_nj/^B Ǖo?xznb0_<П^[>?.LsڗA߹k+Q(co={o1g0_ԥ逇r9ic[~ rso;V* ]F`믮M^6vzdPg6 y5=yj]^fh - ֮x0@*d[\|ea TFXBZ4: ZXUЂ׻D8 ́dP@~F3s=PY2KS(66<S9HdWM|a'*'S1k#߄;(k's=Ó(\OtGj<ݤ-BH|Bn]/wK\# 4d +oP'CBfuz5n&]1a/{J|f1ر"f2&?mG|IVfsU@Y.7;uE U|IU}ebsϬw˴>{磷?e-LG.~/Evjº2;#vމWX\ȱ[3-R7a24 `s{i`PRO^pW!wuN+sOBalٹqQOۺ ˺$|^dZ@ťঌuنOR.w\J\KyOPGlB )tc;33Ɠ͍0f~)HǕ Y}5qD*1Z'I$ur3HڨNc kqGѦCx3aSUy|ޛ+Q*[LSYxny?܇s}'@iь*NGH̓A(⺏bh{Ya6>֚MO\[<%lAUR6Bq[4_FG;}PE_,kT)%uo]AC0`,CصK}>t+6y)|uK~Da,nGEyxu Y kZ*fD5zCkn+zfԾ]MۦTq@.ZoiQ!JP -Olf PM S6ŤN?v1UaH<gcM|Y C{ Xef\1B؃R(L"GBfPr&a u+ưM@KZ*#krRi24p><::Rk#̵ $3uM⬱tMF iH@ 1FSo-cv_TL \1bX46T^jK@d._|xބ]3fm&<_ϿŷT)&1 16ٮ,4<ǖpw\w; ;^xl D1Vm#UIr1FI)6c/qҦHAnGoiZeL-_wJ bjsLu穓hM_]%2LO${G3 ?\btNNZc+Z^JD3qhPBa]m]ٷxޘ7&L|2}Gn,ATzqbr_kgw삅KTR\f ɶ6):sX5{kًi\{ൢn }#>5^>{980:&ąGEyRKbm8 Yrjyưv\ns[%vPמƚ/f\N%3 &U}q8^_񐘰x3wM}ٽ=/a]J^oY9c LUB?sc&'Lg{EGb=3|ᬱ1[Ǧi%'C)hPvOR^.0NzI9cY Wt~_0w/n =PSerU+{sl!:2[QI*cG?bh5g>B`p,Dcѕ=-C<P$*d{x9l-bnj~)d#;lM7MCz{` V8q3? (lpJ9SϴgP޶ Xã&M'6`w*QcxǼ{n=3fZ 2 |C z#o+BP+FtpRÒ/Pf9nb6 pj[]Q'nƺzS0eU\;7Tt}wX+(ޒ! *Jll2Vl29\\֧N[B1ȼK/-5o;F=j8 90V;摄KƸtTAvXNM\slЌ䵮ZZnٴG:p&(WE:uNiS E6]edԠz5^%sd#FM =չ6]7(Xyهᩁ!Ϩ)5'9lllLaܤfîyrM#,Bv}KliBؽk:ز|x0@ F#9}k6ܛ>QK_02}8@-`E9*0YOʥNs^Cď= 0 3;<םSrh̹h+:0WUcJw20#1i{Kf@m~Πiӗ]m| 5 '"b鲨(ˆu=O跔eLaU3*R4:p]G7ohAͳAmĘ-!%LD}z㓃0OH,sOŅhġ0c8$L%T)0Uk .>Yn72F%YVd&״Ih]z̙"~7~z*(%.'.=ĄE5iTמ[=4e]Py➤뷮>0NO\-)5ZSwlp oYfg}n:0:r9Ꞅ̐f >V ȉ_\ ʻ ِ2||f~%1N-eNy9|Ԋ`V;?cxl[΢lf@un`#X(9}`|Es#oV}%G,6*&,l1izF`f )}< .z) !R20<|w`"-XcBj׮[ykhb6uk̓͟)^5֐xN5R@2TBO0Lsa (׃S{.9vR]&/r`ik`:[YGճ áqյj826 z S_ܤ1V[&2\SA{W`"gx6y[n^E[+ M?umA,(筧S>z@矰}&ˎty4D<1c fKo fl|.l& D'pmA.~[*"7o)A :Ye=Í郍pfұgkEw9N['AWX_~N)dbK1 4Qv lAشa(Be(v =)s2;n$ |" >aVq.#Z mUóliveUz4j`])_q?h猓3k8tqjSqJwJ=#h,xi#歷e K[OF^&b'Z{y.uE 4=_u,HLna~tf>HX<@Q^S yv8șj볫{t4g=FMC:-ubcj PCJuzƜNTH'򷡈7RGxLZp>:rX49P9Uy̭(BR gqvCt`F;:Ĩ~ڱy`z Z{xLVUF%*$H½ūh%=fީi x,|.iW>Zf>>iB\KE" qm<ڜpz^ub?ƞ ^[P;1WOF%ǧ>]1']=c.d'ZyOԛ1<+z d9Iw SuC֋>%{de9`Y 7rޢEspq>&TO-nS be0%k<(hg!+Pny`SMPŜBPJFw3c9EWJ" ۖ_Dm,|_fQ8ud<K-`NUC #8V\mk!;XK|/:kR\R &U_JŶ=T_`Mb"8EpYSͱe|;w9dDxWD)/ 9ۃX<@0*Ml2"P$!{T>7%lV?8 L`62!ELXDMۦܓPrdLc`8OitvTwmIC %P;SB(@=jIl[w=>3.GJ$jAW<?߿"yc:E&)g_D9tȶ9䰵~g;a&<sGWONAiEO26S{B9黇`XV#N t^[2[XCA5 \ː%XO S.P gyN_ranpnPS{S1Ԉ>\mbU6U8ۭ=x]U9^ov2_zQL4Ӈ&ww^6λVb9@|ծ?˴|&L&*9^*!δ=*hv3 sk?)QUҔӜݣL q1(2|lԳrEͷF7}LP7t$Ǜ#xa0T9Am փI-!A3uyedj+[yNWչ j<}6-LJqSA &&TZ7ށl~+e<6`'p\},6f?gR)ζ\ԲڨBz} #P;O̍m"ҋanQx0<8s2$MR6~"^4٢vsfjHH]KG"Gr]pHA,c/7M Yrh݂lMCCtis`Z^y> ayԛ]k8 Q??TFCuG'峎%JFRd 7K-̮0DϔPL8jAq]]"d iۓ2[!فܥ>c72ΌFvPRq5rUjQr ĈT^qeCy`2 ʼB(#6_@N%{tP_1z;,NmUND>R@^*ƈGvf^`>v>AYFw3±"0=eQ2i[Fb;ovpmɲ)x) 3ļBJT2 F71"!e^573X `o}&2nVGr,*8 \騯t*#>,RS%Fm$~+-b \zp|<ٝn6 g (~9xȰj/Y@?60AJɧ2͆/064`O`jU,kLb4x>WMb3/ޛQvxT]:g`v&qoȧ7}Pufܨ/;<xVԉrVz uYv*_OwJZs3?j$4>Q5><5np#ҟ׻O'^RMI-Z*걷be2Ϛ>8v9?B" t[YSfR;ݜ*cUX͞A{e~e~,Qq٪q?OݝB|?ⷯl3oS*OS ֦qȇt5*^_G`zE $r["YS5ںO bPa ʰ"qa $LXց5 AI0[` ng&e;"pS^]!##c~ڥE* j^ԙb(`€mJ b,@>ӠM(PW2}ѓn!\] iccg2#S1[Z;s*y4@r94y~>uhpVĒdI,4F㡺u!_V89%ƨ_c>6ݡ0m84̙L?dĪK^r赙EiL%qѥyq{).hN!]ɷ:PrwUzU̸淚+_ppO ɜtI/=$" [p>B֫^!^mՏ|cGH 4(Ɋ&@b41'鼸>L X.0C<ߍAOvp9_3-O fDy S##H"id7Tf1?[n^>mo{,ߗ2M =}U5 4dJax/:1mquYL [Dn^mrD|wk!`6 uh{DD蜮Vks꣹ YDgCǹGm Ԣ?'3U+_qurMr{t玍y3wmdhP$!m(jWbi&~֡M*ʲ$Q\̓04CRKwoA5a}:.!¬SV,CL?\uVN.)'/yh?(&U(yɎ5a6$r]R}'=CvΪЋ7,)-qj9 e *8\Jv&Vi!5hܙ þHeJIܬbv/ cv?NS# /+5r4d6EdYO!u8MFrأJyTB&BS/ #S8t^F`h c?8'0P'Ḛx&gPIk.|zXeF^R=;ى> ,^Ri\g~W%6O>s+Y5a?-{s+CB)`>WVjq2:a^IqoĒ$C{\wV5S eZ{DzK !''R^Hb|; p!г=c{9x5K 1CSh|U2" 6"Nn[*9h dͶe9f[m`OᰯD+gTfã;mG~%YCOOUѱ׾zӭo[Pۿb+]jl"7q`s㍿BZa}>LY02~\ޔrf)kՋu/ԺWl]; e[sGP:O_f_VrϏz-eXi1pќgcV$DEhCӮs']l8\\wA z/pW};Q,!A؅_f>z8Y&·ծߤ0fR:kNFNZ.;EL;~P`׆C2k~iME&a)~2W5u> !R =ѭV/p@cCGx^a l8HDZLNJ": oZltXLd'A+g"b[aSn=j`r ̳tAf ] ,W۞1_ k]Ґr!͆jH8d'Z9Ryt* ^dM'AM7j(velںli8RNZ} }=hN`Q2 #0=9]m^OX[^ 8 < -*s#IF˃V?N[%N܌&vf@4_%vMw1%M'M*P^Ƚò%>%aZph>TIO ΊS1[X5j]hvZ]+b b6tDk;lTT2NO%V ]gwEb7jw~ 0j+Jn *IԠI"FuCyتPM Rc}oA*dZ6U;Uwa'0 ?JJk7N 1  z׊*`BeS}bv Ws}2ezԹ>5$GS7,t sK8ʨTS1f[ΚAv5K7.^;^N:)64k)sHdM,#/xjZ:4_F.E!V `SyY22ff;KCTІD{ʷ*m#"1{ Ga׊j}b0Y."/d9( cÙ"&멬B{)g#*dT~u#">|7_{S-|t89ad'Weҗd{\d wNm3:to v8̄5WIOh tJGק|ԁ3%>_btX/??~G.5ܳuYSM!GukTnmM?=o{|q>!{rB1NLzG`NAtbkgCv?6ֲ1ؿPҊH#s{otR v!["+*SF idɾ>dEK<+kݳf_  G܍FO>G;>6MNRr`۱ nў ^ Լ{JPэZCjXh^LiA_'e֭>n_̨'_3r1#derUfGתX7=NgGlrJɪt?PH*&Z4[&s7Pyy|< W\߽es㉜s:o}/HTm[^5W GInUts4D }kV;զ +͍ 6 =еꣻ;*⇇Kn2?~RBQ拽uv ?vJѱ7=&-[nN[Xll~oa‹d]+[ wɢiqhJeϋf-^q]gYLZƶC$+QGgr[W"ӚYpbAnR Lp Z%iNġy*PV| _ͱ }لu[(2R8(yGveBSZuh؁QM~+|atL͓^zaUYydiK X}OЖ*^ZV5>XBXՐfԧWfrg{5 epz== 8Od ;=UzΌVed .]V//#q%hUr(Imxbz C:M[׋j(zl#6M\$]!Pҷk<Ĥ2jLn}jdE}tqµ'U!.~M:閩f:}bTF1ÌyCjSKPPmrK'YFZ4MaD1m9|g^ ~hyy:xZu *L4ݽB;cbB;J"8ӭ"bQbSc=|ؚ (w;$q8/9xuI{h|qڣ郣HV Ԭ\βt ȓ12gҏI){j硯p?o06r=Cpѧ<*x{s%0ݶЖ' 6n[޾?l׀SmaoֿF =̤E\Dp0X=Y(]f"ˢimWzp<2&_=tj>(o\C?W7ۍ o.]Z_t0Q WW{ !b^h+ymٯWgF~;9gQi]/|2<SNye z36K)'*{g1~Y/}~w^6e}ɏo}2Y_z޵߈]Tj ;Gv>&#S8ToF*g`/WLrǬYw'"(zN|Ӑue}RK!s-#PӲюZerǶ׵+6=aZ6ֻE_.4+q Y jwϴ mmÎZ}v"sWܜcxG/ ˓3k)Y8oϿ/?msi393K %rUSyzI/O'̋g?^E ̨ؕ)%[5eXE_$[GWjn7A@Gaz] cc_31eZ޲Nu%2)?F^$]uy𫖧Ί|=Ne^ѣqLYpqx~4Лl KVǺv7aJK;ta|x (Yǔ*YY-\HVf4Գ-QՍ5r\i]SW:gt= ufn`oN52,?aѤq)?[ g9ځcHvi{#qjXW~!4j i2W,wD 1RGȮxs?uP7o.}Sۺ6;S+=lZgIvQ 2-qHg|2}89L3\Bfn|Ɋ1a{x5Ȟ3ȼ&'UD777?7O|f|_uC<Էfu7$lL<;dJVR)Ӑ#*Zo% HdgL_]*'Y xf#T_pÒ$ ;2e6T;6ʆGLc[Ҷk*lp*h]_̋ڤRkJ~=chUዀ'ߞOxoDf^I|DoIr'¸*ەwo}2J?4CC )"ݤ 3F c Iaoz] LrŖ ikMOTڤ՚#gzs}2.3E1TvmAeԷYgR3&MT7?2€[ k8x>2C6'l-2UFI8~BymA?o6n;ڳ-dlVX{{U;t㣭/@?6 /` N;WXwlS] )LV ?v׮"<3uis?YT~Z_ )ЂHwPi=Oyrw‰vt~$g7uO3*DMuQZt7Yb]E[\fGdj.o6QJ VԍWDiԋtxzݴז'|]u9Z{;Cjէk][kUˮ'ǭz'>s Is$ w[t N>WWЭZh_Hd?_?{ih=[Ư/oŻg~Maw:MM Wܕ*E[z8RSuNM> Uyv:XWSu\;LByȊҘ%b;H ^:A57@6?9@JF~IkT7뛧ah. Jn-ͣy :/NzZ7+΋nnQպKT\baY(hdPm[75t+ _ys_ĺlC.=т[*R6p&${%߈^;: 5ZpMV[{3@:(rb7p%yy+$91huYܹu=Ukb1vY5D[{7v׾~_"vj -)zʦ$9ș:/(RziV J DGh/Ty“G8huf@2:%.KQW$ĔA^%5D\[;wvg`'gs֏Cp0k1xkc[zsa,ߣ`g@]l9d a4W]"XyY< 9/Q_yUAjw߸@cޫe(Ʈ+:0ijKձ&2KxhSIN^ 21WQ*ewU7Ev(nBnFj2rNYk"e,Y"Fx`\}PMhU*5^x?Or6;ʪ'u.kH<&S+ȽJ7:!Ϝ6& p4²5y2QaW7S]C AA6p΂l-afj2:Zöa2 %ڜiWUO@JdkOx;&{~J౳_WˊwC$'RS#y/|>ꦛvWI. ;?o))e ~F9C:7+k[˃q[nA[UMAR+2Sn1:!e~n7w3!sͧ'GGHI [nI_!$Xb6Izz:EǬo<WbS2=s^=ZyX-iw ^#m>%=He]i'&3˟+ =p36 + eT(\ ?hGe@V֣AgbM]G{]QoQ`JqĞM.-?zXg~hgzL]w2A J^  QBƚM!y;ML 5,X^ .#B 8 ӯY}Hj!"0=˝Q qcr^{ԹژӼ6X:}>OVC4Pv.H:*)JϢMP'Mִx. qdB0/o>ohw5]f9CP[Uk<7/JG+4= \X n q`uOdͅe:G~Aϓ435BBT6̛wDZ2&/-[)s;_dwe'?Gm瞿r^6OaOMa-H@S{ޣ mPhp}5 ͫ6*? yO:>iJq ^3F_:ec77AXՅm&4gL-t&0`` @2i:Z0 E\fEzYΟ!9^EOzBuTv1IGaX:8$Z|,K1Tvz4gx@5d?=4Flܳ.>J ʳ F- *Lxft$ybceo]y@קsT{D9?6ϭS7#v4ʇV/Oo,Mo l#Sx"Ց HG_{&E ;^ẕSqĩwfO? _ovC^J|nO3 Mxwm]sϨmJLg'I֫])RG 3F>@ ֤`ss IнaOD~Zja۰Ը?QGͰ O0gQW$l ?,UZKdXS%,'SW\^t. 4_|& ʘ9 ?9 *"Pu7E _=lаh>Z5KMfH؄'5z۽KS 5 ,-hVֵ^ ]sg(磩wOv . \FLN^NA7_jW<=k>Hݟ\"诼|%t-cKa3y2Du;h3Ŷ4Yd'͘CZ82 2`y98J 9uL;w<ԹPcU#=vXWP)Y쎅xnPֳ]IOO4jw4Y9msv vL+kmu:%ihf񩩈HnrvBs4ISnWw<6{dϝ}ob^F6"=rW]e4 #2y[G} tvݰ8#~RݚP*h9R9颕hX:D` |Y[plp}we>ch_Xڝ-#wH+``c .?s 0 gsU2(QʰSCPQWɵjp%5Bm "p5"5:O-4K9zvˠȊ#ށ@ʵ&ZZ Ϝ<7a9 o=бR\Ls<Nk GK|ӒG^alU;ÿηcjW.mm%?NGRHBiY` &\\1-'B1n#1QCvG_||(߲p̭pdzg^\U>~iڝ$؏>]Fϻ&勍{e _`o0g+$EVs"zai߰=l֩U͐9,@uM-@XbH+(e+dɮ lAuxA4czeB^;_[_K?X U+q=oXb2Rd'jnMYTM'B??ÁlL-rah24lre&C<俊iT׶.Q-@.-KkOqӗ;o^ʚP:KsNFz;W Ra_Eڏ*?oa}'<;%?w\{sϝ|bL̚^[^Z:]v|?y\Qjq\;OI mQP]q`0^#}[v6OB!پr SixCv炓4wætBwj ᳥t쉞g='X F- 0شRPbG8ǣ@a_X:>Sk/?釻\H_u+YlLXhpb &ʋ~i̦9: .5˹ig>:Q:g?vRJ{.wa{mnFCA9z¼Zܼ>!@riSDR3.iPX1lR,nr`NmۇX/঄0Z6QOړX["$nO^fץ oe3#D8oa/ws(x~ !S-m_} fQ:N*)fj.x/{;kC!;GؓkEN9mur'zeS|a돉^j8 >}Щ{ɑ+LcqKEMoZNke;U}{D {7Nxl6e8gd[J=(ɪ`cH0kYVWˀb L}!JGP4Nsͮ<%gq)hp"ȍz}yF$'#nj#)3< ܍*ik hH@D{{//8 s21*vcǢڜJzA'ھ" Vʤ˿ UZ=3iEkj0] M&pҬ8*qѾrMC%r}fC"lj$j<+=" d`qʀ7wjtT|C3̸&eמܿt҈,@1%n3 Q<-x; э1+CĮi 86 h͍ *%1bDHtɀ=Lc..`\ox^*QCz; &},btl?hp&3Rd夌Hv7 `;u_*үGBZ+lF{|&Ukݴcʁh¢GHV4rq2< Ξy|DOݏro}_rc 67gj%YjIt~>PYH%t?VM'( w >:X$q]*KL×sW'fOсu\?vO)Sl%I4xg?{v{*>2jr ,7bU*ȍH= U;9agIG 2Vܠd$=GyϏ>˖t␄)",Scg?51t O+ lYZtW]%2wD=d/BGи$V] 8o sb,NvEOo@{ۯHI DiJ`8ao# [)681kJ-WңiqFJZnXo}3I<_ͭgyef6#s6!&̃y#\BęY[9y넠<j#tsM)v1X6XZ=prn![,Yx)Ot!kwb y獍CaBca'6GNyw:l-Eo&]]"O&aWV:GiV9 !5 -ASL1Zk'/;o]'^[lwޓQ 폜XΙ]Y.>!a Bx+ 6b/mM_`U˄v7+=Œ*s$Ҋt#vbUС4k>R"E`!5ʔ I*u! 2{9+{,FTA@5۫⒝jC'DJ:igp-b>Po٦\#aOmR܉hv9G 3 M5sO3i$Mq<7*=9S* R#/Oi8MoP)cf}\*V&+]hZi8?7取GZM/w:XkjXiMP>Zt_ 5?]ns@,b:%d<@Q-7J1Ӻ|{Zw>dk}Qby>=sKgGK;eM]ItC ^8 *2@$ߤ^5uJ˙'f&,@~5>8yņ )@ 6Bן ^h$]j8Q C+;QIƪk{GZDI9:3Oy&̞ 8j-x"Uʻ?{nv0*FY`>)xU.D[wef|o۬*>ȖE&:@ޮղ)ЃDka.]p5(i: SU~шgz/> DϞ%W7!,h 16a TeL͇΀sC V3Y(G>sѭ]m7M. V ?mFr}xA`l0ݐx:#TP4bAq;Z5t(]JS_F_f7T 1-x-hhО =h`X{">]DO=~&I}3!6]іnMzTB˃3X90kgJK'I ȄCctm>lOy9M6<c║w>Ɍ@W"])Zѻt{"l^*2Mab^'zrtHOn&0|z`G3.?q@I|Zvڦw`P;G£v/}'?{/B95d*)dBRЅwS+6@̓>xzf/q}zD!A%M Sdh NcXGۃZimh-+O. K&,[iN9NAvT]x=qHi~d"=q  ZJy08r?2 ʩa̫۳ݒ)sIbgbcq+?:vmn,Ԫ e81p@))k6^ BLs-)25:0wq䬆!GXRl=:uvbWf4/2NZВz\ԢPh Rh΋ZpOƒS xC]Xw'@<(q(qZ/79%˞Ip&p<Ҩ=n*;9 öXYgH;~u-͂pC & 3(*uĄf-5 M^CL̆*U~jB{lz1ov5+hae쾦 .?G|U9 z^5Kh+uˮ !N>Tq`籥_~d$!4owua=u@MM-ROE(``*odzfqn-xߩv ̏knRc>v Ζ0n݋n}Y*|"t|OT i6-R7YvN%TύΎk r(jP԰r4&}Դlx]Öb Qƛ ]#&eۨ)fQ c`Tl"_IɏkG6(.>m6-T>9|WtM{OaYϽxo^,?Zy_ gtb-cl<. [oL3Ӝ}KLOF23/n~rgyguWPAK{c k{iz6aE/؄lMGOL(g;w>ҏn../n"$w9E9h!gueSl Mkp3Cr^[|.oVIfܚ)!//gV~Ya8qN$ OKq .|^Ⱥ`\j9 yr1>vYSFG6&XOVuS 7j_?vΛzzyhje+jn rtyKDuٲ⚟1vRdiFd&5Gn{ t6<D<^{/p7ws}17 RDrV-.Q\'`oI9gR!"heeCRTyLQguzM SYY;+~'Z/j{/kxh6Q)iP{3FY2jƶ@{lcӣ $ )zkF1 N1Kn~;Yr6n|^wF/<&k:PD Ut 9q -t.K]-&fg,\-Ecy%U]=Sf-5؞μ"ڄLm肃TR`9xK)"d٪:{4#s*֖ ،\I@nɪ{oeuJNmyY PN g|_? SW>o낎jG:ߝt/Cs_AV,ܜ8{x?久9&@IG8}P>z-~z4Qy} 3fq~rҫޭ[Ɣ(6oTƭiE73NSs)3Eur~e!(nAD?2a.h>폲oo!5 9BP͎g и -Z&ΊjOSgD \AS Q'  IDZyQ{pwn372qM,~oyA8U<]rb>R[,c'{3kQ1hTy# MπؗWt;=((R\|rf"+,4)ښ9HP鳮 Oz!…6_ A}jZ#Wu#zsjIJp?}'ۋ` d)% )5<5γGq{3n0ƞ\1cR3}7pCڗ]k/^ҹ0{jL&\hF.Phg>{ޯ>3F>^u} mu^x懋b!l칎}) ~֏wt_NU35VYx S &SgV{Yl[#fi5+ 3BN9FusH]tD#yyOJۧa?/G2G?klrg{#3;?QJ/Fnk.%ZjS, 6Xʰ8-nŴ\15|o]oqfk3na}reeWjeis7n\|LoqAd9;4Ǹٝf}!{mwpe{u]2j`GY]R /d&ge? [߭$|pV.kNl~cq~pWl9/0]ud^Va$ȡ@kZ}6bAsn>m2>"R=]ÇAT. c%.f>[3͆,&:e_d)G[J*Ei]CA4rF?Pkuj/wl4PΰgAN)ew܊=r4"Wmbd[L6lMfLɓir" $G=DŽl-袿G?>f#`O\@spF]0~ 5iqdh)}$MdZi(ám Ƚ }X8 g\Kcb]Ot ʜ`dNIt {";{y R;mU Z9mMbJ'[ޓ 7}, @8ٰ \?HEKfmhB7tV[]oqGN3puP6i$%ώ)<-q2b7|#lׄ=rXٜy==S]vUEkv'd">R~ٞo2Cm6BOՆT=ZeOj񓙚e f1g;鏻d)&m+ۣJ?hI,#8$X;qF0?䄭cOG?wH[;v@CORPGx >l`+ލvu:pgQ9kɝ{8|_0MxiG·ǽ]E0 zoLU{rJ S&A4mb}c+lVFJgBEyLyMP ךg̍0iBi`wm1N٨)KG?qZ[J;.mrQ"~ /,nYO$_ZXSB+;oNg"2j|K̜: }Ymsb:dɟɕWms \AZيk ܼ8łk0eg\/Qt6YS7'Xs8c)$<=Zc,:U_3=h|땟=kӲ?~e**FJ"rΜ/2{F[10ba`ЮraA[ p(ֺY8Xqf.ə 'N1k+h1~X`\Uّ«3 ]号wZGW?7 3íʗ^~56@]Gէܠ {bj}~vOz`IUNJJ4a]=Zܿy Mt;;s9|Jt0ޘ6-71!ޭ_gVkGi閘ļ/B4ܔeMxʄBS*Wdw휳+"O^ɹC|^QmBXWn, ͓O6~Ό+g<3:2{%0FsbǬe3 D;hX:#=aUbNNULtj v#d{wtDYI=.1#,* P(*uإοCOSig%?a8G zb,  &2`#^K)YʽCBB ϪkH3W"̹y>08J8x?)lKeg[Ld!pZo! !뀏Vj/5 lZ|SIuq 6Mzd Z5]qU܄2Ӭko&] b%/x"+cý5Z sTkYCnH"f:afpM~+XQV+Fh'΁쒂: ERf֋Js"At 3Xq!oƻX,oM~TIZCڮ3hqoml Q8GF0s>eȝؕ=lY:lRM7v'-yhF Kg2,:͟hgνރ ^NTvqYpti.غiO&#~sCC#a6b>B5sZLA3PN*qmdLet8'';z 'BBooͮ o/oS͘{Xz;J̐Kvfq1ֺٰnMNA[Ső2}' û]|3%e?"+N-e24T ,v3%BHxšj"`13 ɮX:ߙ^ ۡ[6~4n†~~`[`La?'E0Nf%f#*yDGʄeh_][5ۃu÷Sz?C,[JOij_"ȺyG9/]~i{UˌeL oyreX!KJdE=onoUЧܞ:%^O#ykUҌ䑗?(9wgral,Aa$m3s\Lߧ{FXw }3k0يj\1ù.14W3>*G{@yŪn<8d2-KTAĒ5-مM3ET΍'Wpk\Po.Ǭ<4oﶴ?B#Gbh%16zo=*Ü; XVDb @ASkw-y !3` Mto38Qyߔ-6 PW 5n9;|[d%A \EWȝwok]a6v[gV߰k.}鲱S $-!ɶg{{taCH Z-{obw^Ϯ"'A|6M( g'~umm~fuk*#BҘe\Ir~2 Uk*:Pca],|dǤ T$.x"/RE@'KnӣIk*9peJɜʢ\A@Q~?OKO2"_lC6;:}YmN ztYԃw >AOr?qxZhR>Qfm_d:aK;ޓ)c#ކg,8)L~x9ei21k?qlT>asW\!wezڨ,ޅL>h9盬-{eE; F꽪~)qԔ gU\C.qE`B,P4_u|`>S9%POkIOGl?+C6w~:.>63i tk o~sgV׸9PhB OOZ02~ƨN7l{ $ZIN tMԧ_}T} lϜ/,pPIt"0loN4 㚃~9e u8kp:4} R9 K''|06qBPPn[UP:l=9ch J7[i;IGj9dT.>w5no}W_IΆCK/mW[8{(y݃*zZkk\Tp3z|WUg˷h9~ Y\ޕJl-oYGx̕돤semV=L6BP1}%~R MqŬzo*¥yo[x7zvqrwsd>ýA/ դ*i At> ;r6kfW9wVa:7,2Io$qq*E^mLG?44g=dAM+ *C/\[55Q Q%j0t&4MD?`?OCf9R6)I:Hm]6Aٸ`6I89vECJr9>= k(gcb#Y3į 3m.lU˱7 c4n$)R?赶><l83gz/U̔NX+ݵZ| -䘮${H|?=1 n,^2}Їg׫?/nhm+zp>/CG$(g#R!/MҬp7ڒ|{< 622-YCv*8&ګe^B%dz"rsP<3r^uw<(]n8XW/['SI}!e1/QjtHL+⒈+Su ~9ٍz(i؜ՙk|:c*pQ8$;;rⴊkaB:uKyczMKTF? k(b'"^FPp9 ^fDF 1nN \cqF T~^Suu)+3r%|-=cOUl5MeBhhWmSz5IԞC*䐆-%)rܚQ.끦sR:4 H=+G2! s8S{ǻ E%!cM=*l0 jgD:tg*=7bW8 tIS(!ҙ@)yX&`ǖE`Ff̆R+ìƢVlL? ;ezvG~d2{.,e␍jGF:1j6+l&PE-8EgNI*wWݝ vKZ7<bdf1?qsҔk7G+9 / P^cZrX^0zt/06 sٶ^zDJ&~}4Դp!~Ķq0u:"e? T^Zpj {vj*_Kqݲ:Ҫ|x ڸ}۔M3qcA`ԧ`le"*8Y;FON?fE۸-d/SNznnc_\j>߸NtN^ujcG:|ϸ0+F6 ?TѭMYsͯ@t Nj+?Z *6߾:9 8)~f_Bpxw|)w> ZDBPe'~^]3w{F%sX\V4SAcXU~3~)8[߼;fȶ|^逓Tnĕ\,Y IuӁ#g-nC0qjڬ3P;BEVϋ}D󱘯ب+xISNzcWht.^qiJɌz*h1I'T:m})&BTeRve$DVQ*\ @魿y[bq#9ǃϱ9}H7y@HR-9MKYEƀv8 Jε@s\tdl"[7Jׄ+Bd䉫/# 4MyUfŁVTAMX'gNБi&kj%˭}^=D$=;j خԊWtS\fB3x갂X\^r nTVC۩ignMc6t5ε7_+a";of~rGm^5IV!MT@hKnҦÔKu4${7UȦjC{ՄjFhKHTF M;$CM5HNUc 3 v:Z.87 %̈t:7'khNQ8nzӘ![cFqȃօ>szh); u*8">ȤW@0-:NV'wjt{Ofᇜ;le$nN%a]B]Bdv ǘ:ČJ/62OᏙ,EL];rRWk'~ʹdUq Ugrs7.BH:>zϿcGhdlg@*0N;yvz3gC鵴 yAj((!|P2+tHP.?@蔋ߣj]sei+ge[RJpi-Mf8x;;Y͏ޙ:@A!QvPS``xdMBGEU)@$c;2, bB-CT``*$G4+__c^'Ns :/x΅߈\pkMNo}MԴ(mݏwr-Z;$\Z '{S4ެ^'u2rK9k].TƂDFwZlYuŤ{z$"`,@;lKIQ.Fs4@,9d# > YuOx6H Rh!uZ@2ŚQ`SwoB{ֿ}Y1ЮL*7UD}ҪP؈'T&ØFBk 'şsK^=/4D{Üy#aS iʩCW?;:Ac Hc̣G}j]zfz1k\Xޭ8|l's`7%=h8}klv[ #ԙ'}5C1T_xh,}:3t)f: PR7.40ܵLm7ÿ?H;fXJF\/]/o9®ͽ̯~Uzg7(ccPat%Ypth24Ϲw9 qW‘-v j:jĎcR7e~^;did ϋD;yOƂ qeذ2je6u_Lw62( .iz`xF¬2_AW}"`FS^\<xgf_z}a 2x} Ǎciec@BRgqB4u"ȑ8JKIS|7bW4 (\|8 -'@o-GL`ҿʷPj8]@gjn`kH39k@x7%klQLe-C2A_kd'٪4gn{|wk 5½'1D8 qS|6 (MI1 H/([:x;A^mļ@ r@U)`!@ &@j@# x( Yv (9 }wP*q:&@z*xnWqVq#qx}Bvr'i?&25`z Mi﹉*^z#shDM/nwi_ؖW}oG-Oc_+9;s?|qj5T2$ HsxqcQ@`@@Ζ`$evBs:8ɢSruF`{5t@XJ(H&`=f8Mv-rv$ॕD;*|/P>iUz C^4^JJKَ񓟽1lk(nE1|B` -!eMC.r;@1)/az=s߽W)x]*q^MʞǟOO> mOx¦ِE<+qPy[~;?4 ƙ3>V.PCsMaE-U@8@T)bR-8Xs}I}cŇf[.{NhQ. k\shr-moEB@%ƚ*?ezJXw͌WUY),vd/NԱ] ҤsY7~[ ȭ^<uOM[+զ59w1SZnѹƄ )l>c!JQo3hrP牗֡q*.͚k 7ɾ<7֝uqGg'(x>:5ىN=o:ÅGpVgD([QGnØDPߙˏb|?/>~XDl1ʍN;bs}ۯrL T>Vo@IZ!%ߠ:-O]{+é's+#=/@YP/ ?pzO:J ^l1O^C0UJqAu\qY=pG@(/] ޮ8FIGP (*Yu88.<]HdIP65Ql#Ewlh$ЙS҃@?!hˉ8<1L cg6BT.-[}^c1r,oQ6NuOZNwe Cll˗R~3a4φ]tdﵖ ,Om[L$/='- ]2X;C1ݩ8#鍚6==:IoJ^gG`5=&Z.Ͻj|kog.=hs,5d5004٘,ԘZk(XSܖn%G,D"[]5M `""kq YdTǐ5g6z?%OV}H ~˻ĈZy_%-+>Ipsǀl)(Qn bA La*yL$ a!O~gfޘ&u YF\,:;BnÓ(ׯOokNA]_%nv%zV88'1brpy 4T"m^-g%M1Oa@5Q'88K%Iy-ɐ +{wHYg.ϔݪ94bӞ1r~'DgHO@Mq."9o6G V_p:SbWSS! s _ۺFS =$i/:N_k+ǠӇ` 2rYSUPus1>߆\O^Z8∱a:~:78g "fKgJH .5OO^*;)c'?Z'+RT@;\H~ķ~)PE~QovWg m/&|g\(Nn [U<'X}p+˔\Xρƞ x4 I:I@G@q2馀 f-(Z 1V䏤Hp̀)2ݒ'Ha&01e0o c8XjX[4Ylk~7Np P1@L#$= p S9º@]dƦG`p(trSo~ʵ_?e.yf ވFމ1R-T-Im7 Htqb'^~wݛ"W๕ic}/A:##[j{fۖXe}Ujm [Vر4u)}>:1Uʇo=uo-UYelE"ifg*p99BjX %Vh :pDwg?=Bgu54 c(8#\pGwfarMO{܄ 29.U؁$|\4=UbF@Uܥ|r _D Qb .ffMt8_SdrD/ubUi/`=Eӣi.x<6_ kHtPҤLv|re!&!A1̡Ůa]XFbjBx :S{WrxwSL@I@nJϸ}7itji^q&pvP*ϩGKY',T1><4\B .8n,Dմ6"X5ߞ@m]YۂR)ph*<*3q>,_S>8?"=[S EL K#G0SkNR|, 2|jf+z4q<۟~ZT"K;[ ].!`9r!gU\vDIpx(GYS^(aOcym|P9? NOe> c^d2 IQP4)$7 Ļg菰!G݈wev΃&7m©Uin-ʁ:Lz}ξvBN: g9/AZ4'ouY+7Au\[~.r,˝.zҞ ㇏&/m*:'d\[9R#Z6ڋP|Clm=p{]vz;]CPybN'bŰ]=xxbQv?R'B#x~)rejeqNAq)uS0tu_(z^eo^z:r5 (^˱'kOTeeSf+[4#hCS!{^b33=&aG}AXq?lɱlyFz+$61Og&718OiSÀ{޳[o{踆j;}X{:aى/^WW 1p3Cwuj=hv͔"wz^i:Ym8椌c*g[-ss_O Èxh%-*= ؛m @+aY:ТF``;ooM7Yb\|X?R^#Uׂ\|cHTǸUw#ge`snZU'qH̶Bz<='Sܵv=Hꂿn˓:@߻x%4%S:i!}ڨn|4qRq8wonAu t_-dp߇p#@QUlqX!懶6}'>őyaֶH6F tYg{ ]kֻ.7 :/a0É5#5W-Wg0# ㈨#e^!P&ʦ+[?ANb{m~\@#J<rP;9\VM֥>ϕс˄giUma~r@^~M_q87Q Yaޙ1+IN/83U R48 r0)Gj);@YU"|\ÂjԎ1_"Uh A!tka*}(ԝ- )w/J0B&U|jʞT+6p6(ԓu\9v+% Ah Q?,O΀TWP$7TvG%|<䋰 x `i}fd#lhpZǨ_L@+FRv-lt\-iդ,o7$n}ѩ^_u1'q/dg'_4-]Ѿ{sG,OӋ֡jy(+XMHF?)|ĜOG W3Ki:^b(U82aE3" >?:e(WE_h43U#Le J0a"M'; j 4b&}BS!gdvmF-N#f~)]ʘ+'ouGs\l%R wc4P=Ss_G LQ{$Z^%G`?}3l$ RPYip$})`pg8Jj~CDI&MHz^/]tߺIXs/2F]Sjf5{=#@ wl`}' mRTHAk!d革aZƴʮ=q1m ]剥^gϟ֝Y4r<8ﳧrVW _XBOAGa5 t6ӻ=L 2zͩzr;s'}T Ry p} =wi]P:?,Z)pbu-k3h= rݺ(_2Nяͣ,L"+1dߚɧ uO\f<_~1-72T! `tF6fֳ#QJ9qvsggY^>#ߩak;[ۮU,|QϿT u([d^\+>~Dvdc }9HNy{k2o:6Ne7)=iS0a硧23tWcMxTiNsY5&UfiN}")vo_1f={1[zj/Dq0T>2ѝ=D (͝ҹ>+ ~a Utgݽp!'Zɽ7JlXVUX)M|鞼 4n^_/vpG'HgZikl'u3<֞8V[䅎D 1I zr+GnMN~5C~}W%VLmx13Zi3UW5;^`h='lqҲT_?"67ڽ&Ё0Äw#9cGaBߕc4supaf=`\1{eeduub `H~Q{h 1! WM'H]rtHjԭ:>w^%q7RETF4}Ψ- oC\``WL2Լ9Һ[s‚=$JGiQ3c&)]Lr56ӆ1F:ƙKt.Lmqmb+^6@``y`9DBѣG{0{I݈ '=G[]VaA@n^C|sƻST>q#OޚxW6,qLwN9s#rp`9݉z.>NL]+ Z(XmAl{a)?W3W?9޳fI[otǓݝVZK/;nj 0ۢ#NmE}7->9W!(}*OH{4O^ KK3}]sUD39wzél!!׫8Kz9%7_>1tqPg_7y}!,rD}j{Vq zKuX"w7:^~ΪVzB֪^AgA׭Bv)eg@+ƏC"+ࠒ}Wyf=JS!?{a 1PW2h雼#W1@*) R̹-6N>In@ep͚4ⱀy4;<[ X(t3,uC6W8IUnam,8,n- 8+"Li#QhTLhM\/ô ÛSrR_Otf̣HwXnXT`8  Pd;'z\t`_ںr(Ha ק[ԃq)q^G>S#mXQQUsl^F Tz|{$:2Z+|M#C`q5lכEvs:KroEd4L=L8)%WiPMi'NV_Īrdqc^mEɜ!כ/Eg1P9WɎ(] Ҍgytp,U77ح95/:j&֨n3h+t Ӥ/1f*A,eUh+wB,$rb3aGPNM]d{Dҫ+a]Nw:u|Bu8JыI/ 5 S`Q=qY?`WgC%Wߠl+:=3ҕN{_w/ml>I1__X0wlR!6kahB+(0VZ" uQwq|_uhxguy$y;Gx;ϰUT 8Fj :O;?zzSk|oW/c}0ߎ"}w# mh>L6=F3Vd+/G\5n5;PSu-DIm=?T&?~hbPTR 24s*7=oE-b 6}d v^Ow{Z¡:Tv7%vJb XZ@!/gWjzt2lZ= *$Hit}Q빶UP^@tz@$}4+1qZsGU:3[Homє " e({r3F' ^V" Wk}"3X8wQ&jvٌo'"XDBr s8HJ.<,pn ]tkfǎ þ`qΐېֱŦx4 IFzq}É|Y6j-k2= t;*?.sɫ<$t):ztdUh|ADix 5/%,Y;i%UWOn</al1ľ90Yf;1_xF؞ _^Iang{QEi/f̐Q-oZjDjzQd4Jۤ 烾z Xt B湫ǧ.u4Y(,I${.yxaAc .bΉl$x@/Ifz>]qveԁ^`4x~`lqrx>E|72j^;}5nB:),mV)-!H966ZX\.РgWM GM%bYe-)t^eئ=Jn?3'>fC3P>Iv/*1 MQM{]_6(l@}ZۿF #57T5ltI\M׀+U˄b6r@,{߻l0|l)Pzs.x-,Kh1(sذ7({h!}dk/_;~.%@V8ٍOu5(6ʑx%>6BfڏOO>w%XݳѺ 3kѕnwkG_;u.e)c.D/FT0 G|(h)$w6 ^'DGSժiLv6Ġw*N!Z$OoX9]v\KRSsNy0_L~30A p~ L2ŃwOeJ~ɳ'½ #3Z+P>Uh$}Zۼzq'zuMU797ͽX(̻1<P& &n^(>5҉0f ?~_!bIHfتΏ: a2qsؔ7}O2K 5rpS,>Vf-~z!tB}U] xTQy$Y0LwrMYO=cɷi3w7o`0vlKGm)rU183dwB 3] *`lVq>tn+o< 5i4?&*vCϽyfR_+ TJ0ӎ#.߉a&Ix3~S#N,^3 Ė,Q1Yl+/9Ng#I:! cSoeQ';&UAʴX)[;--tCm+q7^=Y2!k"-p^os1:<"f}8*GIb} X-Ŧq̟%M2"g{|nc^0cq0q[ܴ Cs^jK2QsG$Zʥ%_P dPỷ >4O ^V'87 E!áDqUϏI<3j#/l|x٦fq1WDXp̮r^l'm*G3-\Tv&b +01;*h =|jRkƀ dikzB3+n4)5Qb$ŹjW'[i䏐yPڲCtOwr,FnOҭa:\Џqa1 qߘI@l,ˣ]^ntRYgׂJJE\Btkiz`0xTw]3Z vqB++cPV҄^qNepke{cŌ<=s3?^As=Ok/˙e :NFL(r7T d\W8Vѧ~>f t[C{~oߖͿ;?)!oJKOVR8~֋=ottv? ~Ibr: ~+/ϞqF{.vͧ wϿSl+n`V9#jd;$| :FuԨ =f߈B`zw_u'aam!bAeV44jA |J[ ZoDĄ/JGFe0Z^ h!< FL;0C,i,0LSG:9*D (q.Ht,E0b-b z?I9em#CXߪS7SNMV2[z*5z)A#nEO:kApVq*"[c.*ߗ͚'hbyy0*"@,IȹKo,Cy!u6"e "sNΉ'q9%NYUo<"HcL5c_%oi&c mєͩNMcxNF n "f?= [*4f*a5zBL H!jA da{ۘcX8]/Ia8S ҕf4c^'ZAbvLle~0?$K-x =nR#>0x^-=|=j:F5se~saN T1kƀ@Yxl)-H !uNRD,p>pDɆwd-gG%|9LW^^?Fy^Я#m&ЫUNx,T*D7huH1,;ڏndߡP$8O ~==nVa3"eU{#~S,/72&P[0xͮE{0Q`1 h41oaj"l@odK`Clwb0y Tt~$|jlI4vpb!Ww͍q_p{؞C/ا?ZyDWozӤ BYȷХ 5먕3fko-N˶:v}捐>l1L;Nd'0[xM[tNgFbOՆ`2]Fɺc/!`/#EiHgn,ZENg5*PXXҫ~;1ƒz 8$(\*zay5bS9*(TeVe^ q#+UxMֽp&j$L"#[t -M=-%)9@f/X1>yy{ bӆw?4})淌4M Nݧ~q{.Îl:3agVm2 v^5eGt}kHMjIϭa'tH 9set7Xt`[ dq`(=N\4d]38"sKb3mB5j\Υ*ٜ܈Sp(+) 3am롖OrپBǂ}ki5@B [k4d/jv 8V)ݐ'@cHW']F `YL(&]E1.M#5 SʵIk(${ҚK&JO`*+”2_XCO鵵;Ѽ>V!]qz|WSǓL G$`̚Y$%P2{.;tP @s 92=1šp:Zp\DD[b_@u=irk&  $r!C;b:qZZoƬ+O^>WsAƅ^m*%KΜx234Q^Эg?4ұh)$P4??|HO:!-#f 6Ni#aSP͎݊R*B1}9^|oM ,[75 *<xJo{ə /@\@>:RI@VN6"!0JqGk}f=k7hs>~w;tܭonGMgpLyٗ_@OGr?UzQ&SDSnÞ"X=d=Z5k t.%5xYGg;q+қ]LՓ??;y&ziW6>0O3?h}Fڙraɂ_}֗oe˟LWܩM~bK7'|vf/]۸HFB65mBrd>Oxp]KBô?4(2ȦMkYߺk7pZ#Ws_+$F"[v[:o&%^A2e^Ԧf<ܨ.lS:|spﹲqih8Nmwg{8EXc8E.;ʉե=]̞$]RG0<`QG\Zۯ֬f']ē٥ᚾ ,5qwL-]bQߑO/M 6γqI? 2iX8zi஑=:C$m_=0\c6nV;/܎`Fb> e ǎ Lcuɭ=&y 1QWg}+RHHOuI``ΩY^'7v)_`=}K4-n{xkë@E\gG}ɛ&H"BgyWcKc)lx07;j>퀠닎]L`WUά1[ECnShw Iެ!w"KUiP¤ ؚL.O'z88y(Aen~+AMZ0ť_c|"yH3፺ ɧaw=Ǿsѝzk ;z'OzI NJ66- :c+9 889RT+6GKzCw4C!2p7.e2"p8!DTߑFx\j1`4(< k^ ;;yQIRuٌv f2qIhuŢ\=it:JGb8T?ސ^0ٴzHZ6ZǦ:k]Vm¯&f h4s!L^atTB^8uй}) {^O Sxɭw2C&xAӗhD i`6gZ vy*ezVb㥠8@JKDt%=`0 6Nc@&vI(=w©d/vx`xð3S!}ϙjSXW-{Z'XX)R5!s!tzfywePFt$POaxɰ,Ė=,G9rPHp _OkY8ٙ(WS[ꞑ Bl߭-DLґ`<%QOSb REӶe0/۵73W.玿v=ZKSr,$b'ѓ/_ӫ<ݫCn(%ޘZNm}IaഎT"qt\xj׍G<R`̾w(=aX8%C~Go\ }+9NguF]>9B=}bsN}3-on)SAv0>VMu^@FkrfLԜ((XHSTϩ!put UDˏ OUgc2wUÇR׉o|`'`˕& !8jAmnA/mI u MI0RMoH2uG)4{C&JmC[J&٤ʱ/n~~ϭDy#v´̈J?yoG'R֚쇵^ -ch*|4yN'|>V>Y|nw{?2`:.A@>;8y@`wm<|8}OJZ=%z*dtl_VcdhA@7?oV)nSeou?>A&a;uɗ!R9@zxG}i HV eO/NuL\_v rك?iV/k{;?^+UM7Iz}!LG^|ZhPlv{ h!Be"rN [?#^}*XSe::H|ļX 5^ԋ|c72ꉭ6O\^: /R?Z%^"Bf<A-|? ͚F=h w2l/8|d #BeϽ6scVӱ }8jDgkMКdB(ƩHA Iu͙ 9xh9P_uޅ~0?m֞ɉ"J|DY׹H (6A!q{0KRgb4^t?Bzvd"wZIq]֐cQ\lO#ty9w~oEN2Ό^v伒s6ꔍ%!rt<VNE:8XPҗ]b!qtNc#Wg\Z _D Ȗ ݲt+xMM}+ct?ީA8(u8Brdgz6jB=u s?:304ѫVWh;ΙҬ2SP)n\`.wn(V6O|Z=-C"(t^Hʼnk)_3I"ǃkc ic+| ZODr= (Zht[e Rn8p$҉+=W$ѽ1i3Dwie!`I0&#lmϿOrw%]<*NyȒ1rb G/;Z$/Y *PKw[Fzx.*+48P9ynՐ]d X:t\yB'+\hsB7B'P 'z-(,o(t9۔(Kԡ谡H[uIq' x!Xk`-%` ( eDǏ(2VޭVlr:Lķow[ ,(gxm]rVNJQs&vX?j!ЌDJpr &ZNõXV+Y~gzEݭmƗK$^,nk0iՓl}~IE 3QXur6 6>Pi{KcddPGXjA||ݫ&uR]~XUkˁK+J/ sgTv82bP 8(ŵPnؒDg^ꛏ8KEV&Mp|I/\ll 8ƥ#;wj5k#߲t+y)E_!w׊e&OLTnThdCwooX>Zs6Ql ųc=5dr}m>e}pu# ʗcѻcwKM?}e=6 P$Y ctimib<$J o7w՛P6!DrRz? 7#wk mnߊ )^ps鍂JM/;ݜoJQ&7[߽q i_}&5,!%A+Rܰ-Ϫt ( 9n[,XAնNyMnx6KJev= BBc>ѣ@* $6a-%>"\&H61lqLAY=%-͐^͍z6s--!͹k]mn < )jCFXwIڝ+sAxQ/u{SLOT'Ho\k( { _5_n螅ؑlAw^ש^Ȯ80SJ＀6Si*l[STϥBO *&'lS2uLbZl7V?>#R`bF =jH=Y.,^?D`CEˮJߘ&N!zb-D-4L:G#KsvBhLΜP>CšP(S5Ks{v]).BaNi)t%cM$o ޴^?깑 0U\rId!7c N+y{&5gՀ$9ϲ!a K|d ܄c}HĞ.s|݌߻Y]OXG88E'gZ;j Yف\Гm@ '/Lk'M0+)ɿRy0^M-uI;i 4qc<:,Zdxά4Sg#z>;uI/V TcF`H"'Fs(PT(dMu`c򎰓 !v_HHp5=tz*(]\\-WC L'lO3F2y<7} !Iy}ԳPzlJǠXՐY.=R |}j?ɠ= ;Ys6"b9Pn u.]Q|WٳG# )\B\`g=܀ SE3=8+`  BYD[/q&'}f ||.PÝo`X>4ƒ9.+t. K{$jjSR>Q|:g'?il9]rYQGT<&,~z~=yȷ/6o>F7~rxx1\;b3PizT׵!rPGMUXV6O?g36x?}h8۩qVjseyc;[D̟:l vИŒ\ NٳƣA? (:׷OZqv97~g NlnTz/w_jq˃r*3-ˡ{#M.''-(q z㾆 a±AwBsJ*Q&W/ڌ# <" $OVa#Xn5w~ȇN.r^wn*Jz$E8;N@*>Ʈ f؏0 'ܳ{i cxu*ep X^y{GTB.n9I| HK"E$"S- >ԻrAk1(l^{u:G.bIF}9q"#>Cdg-w(mg. R4>O6uT&<lYm-wܒS2S^>5r$i*"Ȕ 9֝Y;BP/xBK6j;OF"͒r%Ɯ</;NQNѸQ0mNTYT )2esoG/D "A;`셝ۋ0"@T[Q9!$uj>' #J.&)¤|ZIl8~sP&eK/` ΅F#r+,?'HZի\#P_bDˎ9`SDA *]ϵ܁ Z;W\cȝR8W?tEe0,h(ݕ R!6Q ȃxk':[ Zf$y8 ƌ=W~PJ"zM칥MTѯ4we]N_z=rhfxYOob8mc.U4Q;ykq;KWe޼~~'YofTf%v@n 4+M\QvfFH<|5"vlX0CzIvv-vfž,y6AJ"OZ3tqݻ^|0;hXNx \(ոq WWܠjL7+8:]u14iX5sƏfns1#[(l;]c(a2;?qMNfާ7}nF-8^؎qCFmSrqaog6E*9) sn)]9cvo,ētM8O:J]N BÁnPvz9PǥSZ\9:I{=HTXb,`PeCݛ6n~)"0źEa+#mglo ad%W!A셇p;vcaQbIϐkS#~~5*=h\CKa4hTWFˊQٜ+E0]mp)h>]@AʵpY8F`< r\*ɑLӛq̋q޳U헽˼VE ʾ pe:KI,aJc.|Aw0߽yMOx8 )"QLf6y ?}Hj&h#Qm* !|GF!8ZQ dqc6o: wFg"z~jJjxX-ZM_5h"B;F`*^TGFD/Vn"670$59d#',':)WN0pf=UKdžD̡ =XF|K]e)GWg Si<):ړ&vIGMXGa3XO۳^7Ve^$y/8/$33GYS=L#1y<"fFP^'V. թ.ۣh!( [4:ޫ|bA{6N,>)cQ&{kޠ\f+~׏a#YmZHx #q)hU8H!p Vu#5t˅M@ƷWԱRĉh&Ck@qY}h=q!)^sY)GDB@e8ZtGR8c{A8]L 4@`xT!Q3@*`ac)vͦOꐭ@g)9pd^3v+nX}+=x@'X:MtfhbhC"t; 4zMơĸ5S.VhKa(pUiW{w O 0Խ˃tW&ᚫHێt#Ȥ"}\ؐh`ʖL>5Ƀq3 .a=6v W 5)Zt+)0Ϝä{U,f{?r2'kH@Dh℗baP/wR$ p8CecÒW'//`%}yK:&#Ӄ9Yb&jDZo }њ,|3{gƬKy! [^GO!֣9)LoRsOMax&M|_AtҚ{VνvjB]`uBS`} `NaSH1NyZqBrګnKe6 IO/L-փFXA}A'ѭ};{76ݽ9C-[ /럛"} Yg0,$Է9g0 ${Īz$v'oƹk6Vzz4 0 R7#G3j ژvzu'u(v'8aCR7TgڄFzY?pEdv20_0ϊᚒn],12qvߋ-2QY}z$^Td(4-M 6O6qxo'Ǖ815M, q_!b C;::C⑰m*CN=qIaHohb.b'GQ),==]7i͠s<]"֙ao 0a=D|Lr>^:4'ɅCΎ {Ou]n9r|βKoX+Aڿ௕)i_j^|v >NчKY +NȜ0XmwмOM!/α<' %do1yz8aH4铱~Sp/U88vj~)(P3/Q[zXN^+*9~qNyD f!3ri+d8R\PRa,H_I X;)PhΟ&իgCܳ SIJ^ ΐl2ҵP+]p*4%Wu87C=HZ }leX0Yxԯ"BRhY=sC,?ьӾrgE0@ i<ax#Nǖu 2.¨0#;xHj|nW~K9jy~BٲQü9J=P[ m9p^N&V1d,D=-Y iyjR 5Wٻ̴3ls8Hun,!sy$vyl&un⵾C=x8o}&M9ȯ$ v2͞(B7. Тկ: b3ង=0Wvv|R<*ڼ"wbm $A Ƞd\Ugz4ш>+y}z}Aʒ-ɣf.~c2sfDdPO *R;S49V%!7X{f~_{LQ|i2xAGaũS ;n~esQӽ W1Ρ{}Xg`@Ђ"SXѝ9G1ж c>~$=z矍:?m l t =]D`<*"K 7:V*|#/JɠI4@fJN`f.jۚ.SA{]P]@%rq|d>j92dg]_OrJк}Y-"-#}]ܖv+F/ 97l SLҐ×rr nN|wBߑ8I{p8P0?ݸ~(.8x0jR ߊ]t<5w+dFDN='Mo?tjm .-ruq'H7\TI1H0$B_nNO̼4/u5}eii/g]ڽZ`lc] Nic±^(7~W_[u KS~?3&'bE!ϰjţadsMZ̯aq eݘN^PWKl WCf{{HI5T¢^3^/ZO색0DX"́W9I5|v0c V_!<nepRo|'Xpf5ho]6}8x_O8Q1v)eA7K Qdҹnpn0q,i|K꘯>)snG{폾#՞WRg+ &O/)Л7k㭗I[ǃjGɍ3XMcwommEFykY ;6;̖HLJ@ <; q;t4(=Kƛ*8<"~$5ܧ2~rnq;y늏!ҨOf="Qio!n~ I\5L"z;{MQ0}k+Fji@t0bjo$wp ހoǟLą1LF 3yX9mwsn7u mvp#m# ^aB?9ve:xqFM U71 mz.r\\ EgzQF3;/]DuzEb{F側syw4g=1" ZA1n^?[G"B[Iys 7ҪMOp Ԡðt1IH x@.z'4LHך(}r5v8v6}N?G7^LG5R_[Kڋ} c5z*fuT[\]jth< L͹g(eXEAoʬK4b/~š7k2wl0;z9-9_  .JftfJEw9wSgԐV;\80-T9"͉<FqܝiCr /9=]}a^EOЉPpG*ކWzVpCM<ԃl. ld(B-O%|.cɖPo$yRO@LƅTQsX -3ȇyL.HNc?)s c鴦<?19tH9UǨN՗L^6R[f\͟\At4ܤuS0 Ryհ@Yҭ?j$0q Hlw&zo?wհ+(;Xv7%;@۶b@~l:^c{n\[tE<]H3483zC(w9s=¬~?pyЙw:džJZfIۋ3N֊N KI ?wޙ p(RWM'ˉeD%7q>osK-/řf· Les)Y婘;nawk d$='w}ٚYq /?D>iεo smWq^}ꎈjiMP$ۡs iKs#lphePl2y|{/l/K]e= k M­^$)g2f)?mYxz1Yj@W- T|k@X; =Z0f֮QTq<:BWJ/ˈ 8D~'\6.<4 ̺._vs1HL [#a4X B Ϲ FNfx=|aWr9hpćshM>r7^{}>@(XDy+{ 5&'7ú_O;?kYg}&^a_Vm5K#݊P3Ÿ#o1]Y P@HgEAuƲA#tY KBSlpKܚ{$@;d$S_E}/~䕝 Ml@ 6`ԹI@Z) bR> gC,pclk HL9T۝@~lm\H @,N|M3s"+k^?& `=v vnob% x~Q#d,Ȼ|3,u8h2 :y1Jk}eE%d{2_f0nu ocFG&\R sޱb9GjVbq/C "pRa$fkޯo7.0|USqev&43$F=DvgR%/YO/Aa]%rC%Rݫ'x@#`<GBz|IV$' )m}f'ɨj577o\B3nܣ>h}mjVZfIMTLy'exd3BW[Zݵtz}wsh|s~wJrcxE<${*E?1{`us0;)(=`ĺv㤟v|tSkDGNo #ЕJ2c=a_` DWwXyeIS[;)S]%m9ˇe;5^s̫ OF;䂘ȆG'R18"ɘ4&x4 %/M!zݗ :}=ט nǘy(N p tJlLjVdzc'*Yo?x ?+6giMi^cnKlIW!Nǁ.fwvq~[Z oZAd)A6FGH(vEߎD<_b^#TL!O?N q{-qKcA<\~n#lcZ7!e1 .%Z擣bG"TK}tKѦW)55Hªn)̖8W$Ve*lj$L~WeXm5P]q$AG{ߌF&c%ԟOs/ ߏέ5>1O`c/z95 V0vFdb)buBe51+b"sAPxg*aTxh#(a05X;QmkBrdzLIsD!Q:7Εg+rplhjNFW/ bS/c* ˹qQFHrmP7"B O2@[3zGNhX:IRխ'h{!a'Y _F #Svɤ)OF tugxgv izoq̓è9ߟG,KB|K}ؤv^/hz\RlыD)=f,V%%q<9?F428BYxC_E o ٥eS@(/JK 53OQڭV[504̀~ɏS_w_m/>;o&M O_@%lb&"5aѩ #[㮎MݨD"@BUV@[P#@ x(/ őlvdW l]1'V ]A`&0r֚)9qSY?pz68{]rWv 6[b=q-蜏:iv7`)7&wtPap\X_N)%*QHQo;;/-pN?ϛ&T"Q2|\Q[? .1EW3pNG;NWzN pi:f.*5c 1H &@it*Gc3w{diq0NLݷ>mN^#/XmT.F!`7s~щ;:_hK֌jV⡠фyxKꬽ~ݶu{|LnrgKClzΐQ2<^b>[u5 )v2`.&; xl ˜rD~qs*p.m ҁL|+C|g}4x;,'}0=ǘ0?=pkbcҥv K>5QzQJ |BEsA⌺"Azvn) fX& dOAi071V\V&# skh/kڷh LN >|UsG;TgE}(~yivAi>oOitUv|GNJ Z(˙'n1 ;bkiՄ豷Jc:3iBe)qIVC>_x nul;z k_kPbctD)*¾ vwN]-pj%p 5&n9ƽEP&m"+$Rɣ.sH\\t:ӣ;; tn`sR{<8{2b5X5yľ.đN/ݭv h0iI2O&|vE+ ($sǞ˭m+>,5oR66 6~-?X?<6Oپo${ͺםa&d[6:U9|1)x]5<9p2DFZNV\w3y3z#2"qJ O ߸/,C?(5iq[\b{y}4x%=_H $FmM#M㡣,e-F@~{@%7#hHu_!=Go7E}i'(t)wV/DXml0ƕB:A$z[ax{IQ}_3҈'+F⋬? r@􏉈yUFyxT?1kd ^tI4_պGhI"4;?%$X՞#7 30zRCYF<1~$sp)8(+CN9;kTհ4s)L:(N y-qة?lL;`f:{*ᑸ\kYS,/1=Hж.xҟ;=ܽό>^>*?CGM=+ITɜy0g>HvX@蚓D85î/k:w'ʅk}`^ӦgLwt$QaHdBL\:T96MA>h :Z 岜 ̬˾K/ $VM-Z]o 0mI=#Ι CWsklE=#fSuǵbмVwm%, v^R[X"*r%s9he݇ `_YXP|ؼb܃ yQ hIN:s(])=uGAv6) K"]1У!R]YH|?[A|Y8Zy.M=.<\h @NV:efn9tŃ;E ]F9astfڣ;z wZc2U:`Ϭ}# #I;sInllc)Δ3CT< JsLyu*KI2sI?/W1sş!7$EǟvB|{\ju^)i*Hlz o>dE`i:6W8/GBXZ͊ߥZn|xyY Ns=ohNOdǥtkcw{dl_raiD#D-K1+N(j0s3Yk{ğÃIH(q'#\f2bfFsYh-,4لrZc4G:(NSiFʀf;L+ sg"vi8ۙ>);FZ<=jjhc~4 ZnW>}ߢgԉz}RZAZ}yq޴`I4Dk"䋊m!t\3RLnu!Dۂ0<%\ /#S=_- )p5Iw'6F{Yfh 9;<%ʥG "bF,/vIaz*^8׭@-kzvٺ\%4C"dm}px];f| OKݴoq`uA>m99i̴)DcID&P8qn4K'Ȉd( o ¨.E>BnPd%Ѕc)wescl\VWK76Nh!T@F3k7nxcPIJ'Q牝6͸ O!hןKvYw),lG|kIjS4t?FMcs݋.!L6kq M4*2im) HuʍN( ic]`i>aͶLFZS^ʴ `g=rfx}gD-,ˈs.4nK'pGin_)o'g1uCf#mG kBMfv`O_ˈ_Ӵr4k][gl$|W-82d0ȡV~}f}Nibdႇ~t♹HZ,LIޓ ނ~z;c0#UG`V8dpc`zUu)s#ÙC IwS.Ͽ"u-Ō 9Fydž/ͩE 1 \'b礑Vsy6%{w W&)49uխ5phTqaeA}1Xnݭ^.[TWƽX%6|8jJFzn<}+ٙvn羾cS-yg87L& !OsK6N8%[qІGi$F'z cUUj sۋn8zhn;Ǝa}#I Ibty~NN35R {͑9 Ff<Ҟ)Ϫ:'ͮc9FTQ^UۖhCXב]hƣ5=rs{IJ)`&bw"LF/6s /m98̋OdD˘soCHJ 'JzxZص4l& &Vafǧ{6y!KbFFq+7Y9C/|@f9AfةIR}%|ơqtFx" ,|&O]Ntx@GŦ3 [+Ro&}ͅηC|a;"VDm-|o$4IZ ne6&=D2oL[X|>ّʠ;qssJ6wvۥ,tˌ&f߸MOHbyT&R?}%zmMB/`8ؑ[ Ԑ=>ߩ סVNE 1JfҮG]-4j'[%֕ vLQ\;]& ~84Kt{g}? _^+N t/ِ#JI!{c}h!cnr^ 1TP'=c~2J|iI\Fɇ_I8Ӳ{z'%\Fc@dD&D?yݺP)w޷xM𻱩 b)a EbrGs;luԦ U͚.)yVǓo8RXW]fx쭄}"9~2HzGr08dv8QguA?OIbeZ< fBG \l_dMN5WT $nY:S }E-u$ Zu]<`odk*c2vC uc6V10 _('u&4x9'cE4(!Rdzds1tB0v^9F7"{WIYyVH<,#Dd0^Ga5FYtdۊ_t=B]h 眙 ,u#a_ڀEG5u.Y&)\ "@F~%?sy#W"_~B =N[Dr4cy6Y,=!Ə1Û_eGH%[:Na϶.BҌ6#E/+?8Gk]~%{{BڨB^?wy֘vG&̢cEoz{-_VIS70q58-[4gV~9ZT?SGL'ȆyW[?{gv?;$1:76^J rtӔZO!p7/=7qnhE63]Ё@bJ W?tH=f*dEt#tG8Q>e keGoH%DJmh1 gf.>樵 RT5;R 5zq(BS3Hj2ԂH`ijc`[h͆Α9mz`4bED(xёs]S Fn_%zj":(EܓUP7٘F+fВn"Hbӹ | FbB \qnVlG‚жwqKkSfx2&he5ֽa<`aVKr$).t[vJb'1~A/{Hynt:X~*eG#1[wRFٝ)Fe{)p]ogWwLNH`"]6 /@khiOULÞ+ցҾ!Zs~bwf/UqI_9u9G,L8Znt.:]p5y'JAp2-s%-[iFRcȉslĸMD!8Ol&&pJ5m3gOpMz*ƶԲrl+ y#=r*N`0 luY 9KcsMDx'zyl^~⾶)_uǽpn@)>utOh?]UK$8CY͘+}r.aa{MrQ!zP\G5%uRR:Č.@QG,lU׽gu 8CƋ?sK߿p=>Z~^Mg4hl q05ԙ1"Ne[Q+;rrIm]tN4'A`}('.q.78RTAF |\y_T1 q IsgeFp X6V{Y m[ttIrHw5}ôN@nޮ&H!/%p=:SYh|F_" L4? ]Yd](ٓKZqcyJP:Fa,ؒP v<};Yޙ O 8MgET0)IWݛ, eWF.z YSv]ϣώVA ֿ=(_I!SRkm8%?Vet–^t96^n"ᗏ]o%nʅ*6޾ct0V>r0{Ty'⡩ E*ȋ)FS??m!&(Z,V#@ţA3{y?w}ǿxZ>N|BUf~{ G~kyO/?)^Jyk~ <\Ph㰛 ލo;Uv o<MpU u~m~\ ڨ.0lvԱ".̶#% sXڨu] %gFǔ8F=Qtq8Byw7_B<Ȟ/Vh ltOQeg0dTC>OR68Vyinpf.G $[+.nci/LzG<8eұ AG-Raua1aj?vbMO|G?ױ[- U ݢ S- rŐb,B vye8d5jg:H9-5WKD3@swJ9c(kM~*1Kpr$FJ4ʚ%I.VW'ØgNK6>y;5) KxE51dWV&Xyݮ6Q0Ng(h v҂.óG`Rcza%QE݅fN,M9AXr._B[+Dopm6s_c!P&g;ېl<@|,'hN<*'W*w?v“3! *&L0qAmNusuP7[ H*|(t귧bc8ѫȳҝy0!˫S+;L$cMOJ8vYMչű'x+R(ê\3tqWm&{HSy`)dJcx RG&}/5'T]#Ƹ wZa+VK8N 7+rυ! tĝSnO ϓLz$ kmE<妀C@;*jbt.gjrA ɽ_1=ݶ770ЄN"+;@!I3x];u0 A&>3p GN~gu'f"1in't̷|xϳz81|YIhqD֩GNrrzDξrxH(Kfci'xY JJzx5(/w'T Rut2xyw+k13CX,Aih˿,(TvoL 0ȓvSBݘ ta֎Wmr {sAeD"qp+AIOSlıAq5XځB.w0_1Q0*+Qa9KOwH ~'.`z /$4Eb55?g-ӣxrHvr375m :Є S6ܗ(Gϼ)}P-6p*]x_d(U-ǠUBh^+H$lPkvy'hpX9PWff \V+Ϣs乱| i/ ;#88usa!OI`"_dA7w%$_I[!HM`GE/N2=g]Z f{:z? ;^= =QPM`y򯮻j£_[s]D8‹G1mo<_^[wqe~[ՃSik '7Evָi!V&br(KRL#:cāCq+Ś".D">.u;˞FgAQN&v`rֳpE}:$KA݅ 6J9:ҼIw.(B5IrO䭸eUL 勗ӖWq0w k0EBթlS>bcgU@-"  pN(ZkmIo ʷf4܏spT<~ QL0;'-/SXu%a%c8`vK/~CUv(B\Hb3볃7oΦf0C|xI_'.Yr lp\]D$e"j{G: Gj[X0˴E%EB>nww i9k'1YI䝚3#zi(ˏl@aJI-Od;S &%qM $ܤ}`:uudy[`sQQX֝P/=]F<̴If8\']89job8G 'n"N q(9;IcaM4&f[2W'jAhsݧ ]vxK,,*͔}-1%ZOC̔{I&lM$VP&:#IY40gyQm0?|y%J?೓J/E<%|Òh!M4L( W*.oy1` ]`h'%qM5)Nz~y׌3OoGA'ǼĨ(}CX[8wń=Udf12kx0䧳Q\U_ZǡXՈ߉2Z fOd -Zd.ʨ\ c}^<,$Xscð]pFj֚zL@jryZʺx~u~ 8twtOV$q8f.BO%a0(kc lJ4ٜFn.X>Мh :uGӺsF1c6x4bCS'cKm}VRRj(.~ݭc9DD-9 ^C}5 QT,hF(`9[EF͛Z~Q =j F^EG j7B25 +Q8wLlXmNzq4^s:cCO-7ls'K]sF#;W"7!j jB$A* sFsc g*[Ed,ǯE\ iZQnYi8*QS x_}`>?ߛYFSk买`}nBfh V2d)SV wX.tiL1!Zl/e̎b`O=@nOy慫.?ovF7X0 1MiM-s6u]{t],)]o>+-;k&VI 7Qxyx{YNm?!DUK{gB}xDjYHͬMNôg(S l m #WŅⵡZ'oR,~d6ݭ}}ZS}u{J؍,h#[JzŸ|߸޴in$=)ͯ~4Ѯx Z=g;h =[z/1Gb;Q~|SyW<{]?1ٻõ}wݰ=$ʱ3O QV[+f2rkVq;2Zbs=@/a-:uȵ5myi%Ld{+;`36|@P0K>б!!:i`뱂_5Cɕ-ųv?Q'A';ǁ@^*8aI 3*lntFwڵ41k^t- 1DO٣';a.FY2و^sV_\o}HSSFZ&|]cuԯnW{x/34Ք#=yOC@#jC347<ғrb&my݈ep)N*?C^ 'uj~g(=zHpYL!~n#:f){sSȧ+0`x,`}c.x,;! 6'%?!A=O&oB<^rRfóJ _ af}sWPggqʞZ.{pN{q-<.b/~x4TaflIڭMW_]YlPTI=hToAV;yJ~vxPP2Ň8@%uS10`:%{G銎4IdPKg'cWJ# DY%m4X=/Ad%|KKA|S-d3 Esu+ʥq>J[ySOUN-(#LXWC\hYI 5/_}أLcy~*A+Gi<"ܵaY*z'O6˃QR]5sNx׶z=$>7ފ9K{;->n}vD&-WS҃v?ͷ=_*avwɃܬH.F6Zgu T?]{϶P ^OFE(4L-gTo T #<&b$|cmu!{!}b'urI~5HϹЮ_~k7kkKLYe$˹叹/9󆕔&lsR>bи=,عLB:Jxw:ZhAOG.S_.{>9J^>VkCgÉ { 6N[OVq?{.6.ܫgXw??ɗT\Q;5ݻ'wk+Y ns2;3dg&H1: e''إWP 83}wq `lɽ>o7ΥiP kD^Uڄ 1WFBtN|o?BG-XLrq]/0B+8Auѕa1>$v˧DP _}[PtY^r3Ӯ֭LAJyw'y[%($%Гl7rgU/2h$d0fc`b[y̶!W& J}2˫A&K/6$YZL,SW)m':Sr~Z4u*ܵʏ:WBSCCɪ8괎Nq5OL9t(I e(&J,ilu/٤D0ŚCħڠ}т7K$&^.;o#pz'j:i0v<칫NQ#Bm{GkH&F7LK!E=tna:LxTV,*1(WSLxz(e pnFp}HDE\VNd8;ά }0:p(AoWK:ݍ{>Dx}KݸQn2>q|\踇oBUsP% G]]Aeljd|@)yϜCxgi6=zE\︫F@­{ll{2Pc- #1.xy+Ɏrޱ<\o^V)̠OہV)ϕmґ9k9H|[iťGq_K3$姪ca/|DUξl!lR,kvֲ}&`(*;sb,#ptHFJ ={ BR[l_\D'dpNԼgט1}ɿW@Xd O,k 0Hx[w,`oYԩ0g%lRԗ ެ*_ɸAB R&̶ +XqtqiK=]HB3U!%+-F_oh; ɽ%Lb-Cܰw\r=x'">ꅱasi3ïwa ` m w)*88Mϟ/>/*nZ{ҐTK{ӼV( Ϯ}?[#{NF(38W^#J8xF%5̅axuC#ij#g9+8F5qRO.ǍӟOFAo6뛕2kCuˍi`,0;F|[Vr{yOxڧGIUoylK 65ި* p|W\ɚYj?B" F!T49ĞPw45Ŏ^v ? ƤA)3L9q)t>i:8/߱5ӎ^ؾXFumesNH7=&LpmiSʦOICI2OjR } !u&Xj 9#Aa\ml=S\cwQ= P:N0fgYh@eliu5gJKR]]49ӎa&rj7PxMtOf:Mk!.+hRVdbtsOי~`_Sl5ި]Ɨ+c AFHZ!_[F h/ qR[Q=0%fFˉ* >u!} S`!M9V QgY%ڑOF֞(X0MM.] Q>&vCg+CZgULag䇋 |#|<~(#WfvâU5ꝭ#1ͷ~ YSc@ҙ3W7$~xBt ;<.Fˇ a  ':,!K1SbΨۄ䣧o4 ǭʂK>M33/7d`V@Y|g]t|}mTZ<|$$ .h1>f4`uЭtږ򦨔'7~ifVdw ߃p0jg=}~t?ߏ[Z4ͅ޻?湦nʶ&4rg^0Jm::`BeNC eiynS-Qp1[_xI 澐{!YS-8j]GΣR")?2rTUi֏=*L XHu g8\ʪA"7HI(IH -j4dE$/ٽ= ˜BE5ѕ:7l:o6+?xCDE@6Wg%+ QZp==RU?/WT0#Sx*[֌$3vql\=;ަz(J3Bu,YlC85tOTgF]X8i9VFod`q.\z|K.R%AV*]r2aBG$z0A m@E\AC jKƏ8iuL1' <69hoxvTL_fCr ֙jnRk[ G,K]rg3Z7I@Ccj_.<W<ꪆS,;=N ZBZ]hۧr'~V:,Вߘ`6Q9\ t NؖUtMF C$$f<nh:\7u__;!]OF+{`4<nڙߵB6W=b ;"sL"2 [^pѷԆOryP"|ZmmOꪥs妎1$){p`'[+G1&嘁[w& ͬL{34.>! ̉[`R^4g+m:Jә d"tfEdn9ݴM/|*hP#A3*0FƁ] '~VotթRt`z8Pv !9k+H7CBHvVB2\׿PXgzï<ǫ^W?/^vk:vPy K K&0A @%zLH0,@pK4 +HG| *@] 3:݉3w,n&BT[ޡ-1۪_BY&6aSB{zBYˬ˲z/Z:T]-Urw;:|Y))_w{дvܤ isó=ODxSٳ!Ișwe"H^B "؈+g<r.Y>6. ! 1Q=EOuc]??Nn;y&%z+2bsAO8*]JӒ?z? y0=ۙn?0ݝwz 7JW z?"i4j2QOƮj.x K,_ '"rrYG' ya]Xo1IS}Ft shkܧk^.4+'BW~~[O><(.>IU< Wx!Ϳ+o~\ၙ\ $ ftt .fjƞ(?pw#;fB|;Rꖼ+|'~I+PJn'rÿϺ_iwG`zڸKNμW|BAڮiWՒ҄Xjn@x mwٱT! 8Gⷼ! S&.$v|cW[$}Ww^6-λƨ8S mj"*\=BoS <"}3Uu*yE 9_8Y`} ,of0T2,8C?779략 R ª: ([M- 9-Fn6E`9dK5RilS, M!)vG80$ڄU;إA;08B1ҌD*> fju(Zw  lηCC%a&:I[&M6,IIuZ?;׊GZt FEЈޏ)0~k˿-Q)e]$+ysnz }+i "!'MP XEEZ\KE%s,ݭ҈iYE3\|fb"ժպm.FK&Ww{ǥw}?n5_l'?%I @rszr~lVI d1qw>g,}P^jN:rWWSu.|.ބQei пm8<4UsÙe{Wid SYl0z{[JCW7WIXJŸo^~CKolT"Jt=Av2O7zO~IUwO]R;\GG}ԴK U3BM;ŗ  o+f{s}${ϴΪκtqsu^#_G|Iګ'U'<&þU BCYk:Кz[=:%4oBZM2x6M۷~LߣG/1ֿqswlqr-LZ"7~{M)_}ʩܔw] zr?d5ι-F_hjzYw6"Ľ1E67#3s[j[ڛhSЗ.sGI4gӽ!p=ʷB|^-Cvi`=EG&vQu ^OC FޞF@#mm==Yi8Hdx? <ɴG'}Jxr'}̠񼊢 UdEtXj-M[yae peW!lT2Ê[!wc> G*I|D g™IUtC|TĎhb>;CS/N[O)yIC^]LKQK~|XO LW¯ՐRŬ8XN[)wOZø"ғhs&_{sOVQAmɪ&P"da%˻> } M} ܭΐv:N,i&"Q;15dHrjgNv2!}BY>`g7MSj>;@(ۣoA7s&m0I{̃5)CUOئF\s06 &6!W1PC1XKidgɑ]29Gf=y *yJ'NA!!-cTN_$3o?<@fspg-'s 3.q&}oeCaǕo N'ޒZ!p'Ey 1mS0I|*M݂iEBJL\B:Y|",+qIC;IȶِFU~K籜jrPoa k <5SF>gu˝N[5)nW<0o㏍O{;_^`^h)ӟ0Wnϛ4R%97Z1E!@ypqz\?t|-V W7_6Ł/X= 4=#:?+5nkЧt`@fssH, @I`(,af`dM rrنVSc#t6a $ma$ԅtl,)mR,m( cn_GvnG4!FW ڬ}5^D.<= %M,{cL߳H,eCOͿ?Dy*O[KY۶)+s'{i2pbMU&>#ܗՈr)+_LwE^j<.BO/NJȀ=xPP7bO؇ܽvҠ{RA@vm|g{z1:(桘SHđc 󇿙|w@ ؍ii-~.T> ;KtϾ՗֗0cM߭.FK^{C;`g/aq"=ĭ5=w FDAܺ0AvAQօ)B rZ\c}ז;&s1/ںegp^֍EG -}Җ|W`d/}䁱db$37&BCJ}Je|䂜^Ӧh[5+QmN VjttBa!|ˏ,FE)2URGѸ(V}p;s6-=貿T6t͐(,bMro\u-AmOpV9l.]|m !egB[g[n Q9{8,LVa LHY'g5Ι-|M"]]t>WRƫCp 0VFBYiR a>*-uIDAT'n൴5c`tꩶoޣ\Ou]WƖ%mtv Nv ˱ h%u豀zIxe&[V+ y, !ʈ]{6x号:I%~D̩V78f$<ȼR-z'!1=le$Ȁ;zvPK[yERy hi36R c|_djѲ mx0T ٲRz,sK'QDZDkX]kBlvpkdOUy׭+J !fYaaBlp¾_=(7a{zNAnV,n;+/O Lˠ -X^߬/~_9Ơqau2;ipG(>==Zrz9D/OID ~ `U˿pֲ>oV.4(x>`d&?wJ]6?/Sg57=y@{9W~CwPEoyfSJ &&_#}ݛ5L>rldrNG^R+2?BVqo]:VEV¦~o۟}x7?MN̦a,t9?yIn:^wY5+T%.=zDީ/Aw?1 qa/X2v3Zt\ ")=^D`\z~VkiY.n=,O`RN87\Ea! ~28~Rk`bwT<=jV?E}?03Q+c"&Q93 dLCm;~ՠ<,75V fujc"%&N| "V+gY6$iqFOVTӫdo Ê%C4UU>ylǻ^.0b[U97y/]xf5]WpgX3q4<5uYЉzD#ҫ!'Y({B gMw / h25y~8I={ԩH"3Dmg-9b-`.L#}C7XoDt:ky1и̧~+cG@IJ9E*cպJ?nˆJJ?21ocSbpV mQyt0YC-e5vu*,Y#,Q,Sfke#]\癠4qԩwcqFQ~LDk5`!Y$c\^t4yqOraPԬ~I˵9_7rb(6AJYdBa=[)ӡLܒ6^$ Uq-ć٬y`,`>и)#r-b# fXwg{Tw@'`ѲWeh+W1m,~7/z7RDf(.pP| > 8@몆ܓx8y59&:OLaP0}ӝɬqCap 0$ֱAپk zX(~8>~)mdyQqHa#9_%rBDE6; znQL}T -nmVb_\u#EhGHמݥ^S f6;|Z䓏-Ùn"ژ QTk"NGA^8A4l4{JV5`@/yZ8 e0M~ǜfþ?;MB?bf@XHAѹI.'u {"?Sͦ~iw0.̇}?**'?j4u(qfX.ؽ]+r+D@Ym09+ 8m~E{|l&KTtѱ-YWB=^`6lܪ@_ޏ_2gywQc6H p0)K8Òۚ=6WSh3 ɼ3?y "XEiot$rߘ}WcxuO`y{1~mƂ6d[?C'`1Pr{YWՁDM. UHUŸ훺U_=|R߱7w:jXJ/5&ӏ[)!R bow P+48~vb"4vKګg}pU%Q~gsz齿]G]mDFz1;K_\'?tm Ëa,'v=Z\ߡSR;[2+$ӝӽURk^q~bԘֻM|S;:qK÷mMYIdUS^&wJzݡ6u>Y0z9(t -x딤$x97<\VtvS݀ǹer[5<Vt..e㣘q:jHO ,Uq.wF%Y98cw\Dޜ}܅ã6 4?nb9-`I.ka69-M{ʿIڔv[m9!uE -EO=q:f:dme6+1|bBkBu25=p2a∞ YJ *ZE5ݱg nϛO <J8 !r,G݅|z7(qp(&YeDϠR)*9&f-y?M6ms۲</3ݺ="340cs1D׭̸Ciʍb'|;H9"quEsMs|$eg\(]bYUOUy,0>@1JfZ[43'o թC$DDu\W햽dv4 "Y9f-&*%ћ}7O~v|sEC,mӂ%dFg8޳O[{}~pԚ=8tYsÎ6q:zN?`mnnMoze7$73gz;:??}Üj]у`VR_]SN\yb^s@&e?3w<5o$F1& 3T,><4;uٗ&nPn,&MY"7y@}ePc]~4H^ O2Y]ɻW?6@BQf+Q#¥L-ݿm/[Yv0ͧӼy84Kkö=Lu- {!^S8܀H_0 [~L+bM ~\~ VZ%_מ/4ҭq"<hxߍͮ2SȁS>ؿų__?#8UWV\;-ObR}).KUjW?ӉFRPigH.Ðϕfy{eZJg6{#s[skk'o}=Uܸ1RrCԓ5ȓ'3gtnEl:DGYZzڟ{о_cZ|ST'bz]Xi3)_]2xW[ye Cv;traW>/TUck[l!3q|MaOo&ѫZ+M*&'SIw}4tB+SBS׻,ߞHzsuw{WGiUjL31<`0={RȸL"acE%,c(ˎ 9L6,YCԕPjI43/U#:Ȧ;;3 n4?RYf)AD"VUgX-h熙B+Fݤٵ 3Ė4cdB`PBj"ѧùf8-tԯ1"5xFOl]e*;h^6[H>׈'Kl>胳[Uo{P?cTSG0rK+ʈ| bE-=ϱ_02 y:EkZv E5Fv8b"߆~_`a-$٢[hIm F('{݆DKg6 s hkH&8/jv;N٠`fYm|$~ N0!- \/,'] DF@(SKAwB ۮx飍m#m iEf0!S:g*9R+; Hk,pͥ#P0Yz>┋M{#dM\Z9w=PK*i;ޠ>ok}r' 3B+Ayì>ZckϾ5#ZXw &h?-߮?kܛ|Tp/?yx* kLq >/F?>3~-\ ?:+cwZ'p D T`-fp ,d̿@ #w mXjDf7㯾1c=K^#=>!3Z9e"EoA͔$jaM%nY,DfomuC]%6񑲡u483= qAަ.)[9v>7&W6:7m-d5>{駸.'dqXs /ǻُ>v8^On`-Ϳn;48&9zR"g+#˽ar oZ%Tdd jCCW_`7~ u?޸r;#@{3??Hţ&grn:>YlԍMĿ}K }}w?2`G"95 SGRV/}}ugd6V2=a[H13%k6عCstObuS5<}Zm5 }V0:̧PpLVkLA~/kO'QX^ƻk$_/ob6WyzbF02/']osua y4ʐl|QK@3_bv0b$P p.P\QSFx؝}(Kny}<^ O T!ܷx,(^]~ wmad@#f0'x|PwrIz yE 걻H: 'h* aą~G{2]wN]/Eԕ :AM8O.. C&kMhԚ7Ɠ+j*OvBR;"EyXX=y8-Y'b&a@Ӕ1JsBFQfAJ7s 0cJhFWw-N5"X`utXX;~%va.VF7]Q}dʾ CՇ#9{j<#>n^5Y3٠Hv6J ( @h} %k-EmfXvlgp@l<;.NQ:8+![ ;j'N1>2&8Fc3BʗttdLKo)e$0lFBVL*S`H "R3Ig =Le Ʃls@6GRQ:\rT&z"b'5Mk6-F 9Tę1%t‰QgyY̢ta'aYDxm!A^U7ܨbpuL,͍cVr =aWD?=JܾfMSAb֘lrj|tɁgГ7b3-&X^>1 75v޵ KS\* !;o:jq5,#|$RSx8@UB>ZyRRhK/i[%7Ul:I Q W-~]dbʳ↟?'xI0׽׫Qg/ۺEˣT|:5! m23<ՙ 8*J{?h'kT" a2َ<'9ѵ(FV6O{Ck7XMt16%COWgB7fۯ;8mތrVhOWz 5 C:.8;h?mK#?ԓ]+ +DÓ0GAuJ1@M|K:LQixUG8{"=>b t6F0B+F(l"ɎF ;"{iv r_ixqS&ʨ.t;Бט M 0gFs_&_w omȗ&^?20^wpO>՞ ZknGcؽ 11[F*tP=KĚ{;o.u+s_ oJNIIT2c@M!|vrzdV"Zog1?4NyWC~ !}pۯ.8Gc@8Yѹ'\te&b.?9:-ir=JZujt^?x_Hoj.D IAlқjG̬wDbtHJ>Ղ&D6A\()nv{A\Q4z` FNO-ޡ:ӳNT'tj@R$#@s]r7:F#S7|1ZwjF'%Z2@k:WY`aoh;u: ,t \ڕ@'c+eh@pj7wGOSF:J\~W,x8-M[ſktLIhL|\/ֶh!4_m%|;MzX{eSX:=Cooӓ]Sxvk1/ "-UMdI!7XC\0T#`ɽ}`k9*@ Ǝgi19{JۣPR* RٺՉIL_heTy<2 d{~ؒBˡɘJH^LES3--prT{IV,ãfnVZ[&Dbdrܪ j;_# G1rd'5+q rOob/C#xv888mD8_|ZeGod=] µ덃Vaګ* u4gyo&|N %?-cW8}nSzKLp6t{g~ϡ'fǠ@ub3ak>e2{%19]fF ݸ '1L>6 JT%E{B׆H>FX[RR|2ڪqLsV3x^k,Y%[.Q?wRI.,ޓr~LJ5/jjz.Y*F?5.tduOo'噹Hc+w 󙅥W-7cMFK8gBZ g g ܺHOOG _v=)N)8 br xc/w H,jϼ9dwVu%Y.i4ڕZXqmLj_NE=Hh`u{B|2ɺ:;f= Gѣtcz'|hmaAVȆL|ZfϖO f:A . xα@Mb϶IgdX*mbd*zK.lкXh{h a*Z vv鄉'E. yJc%Ts[5~.:,Kv=3-DaTSݍ Ix,[azc:|֢/jRֆlxmB^xR.pWS xaU9+BC;~u! YBa8 S*vc5wA8 Ԗlsz-+v=9pL\qP[uw-_~_r"҄r`m0\m cvKnn)ՠe$ƂQ&G!/e(و]Ieq0h !k:SQ=qxa܍k'Lį^|ay I!~Q'еY %ا%%G)t5 uF}vCڀEzbY~ 6bztkDž\f$nʭK <'#fmAfђ6~ׂI'Η;V eٻImq5NIC'_Kp(R%b»CxPJMR6{YCKh*$ mp .FMVMt,Y֤W̎-ҍ2>&ɣ-}}-[S{YAL#FTq1`Nх'Zg7y(t~鼣/|73EzxcZ.S/SIO;ޝ D#I1Xx5$>ݏBgnd;8<" 4`'00 5O?tWSk,5DA{87CDKX;v.2CqmؖpQlNQqa#2tAT3'B^.w6Ls`[`eǷq^JL顓XЕCy'd6<.@]iXy7E /)sHE"OՍo BG@><| ·K? İW8exkbC,}vtcQw8vo(3}xDLLc%h'} yysX>)9s߀y2r"nFyޢފkvYOjR׫m+\0cX0{8`ǪlwL/$Y;u/>fUq甜hFm\]~޻"_/踢Z`JQ Ǥi*ö/b lQj Y Ի`y95dGild8b`շvw}lƯ̌}VtUL{P^QY"M8or$6 왢4AQ"OYap dD'ϛ#G[v`Nی;# urw~q1Z4göX Mp- g9![(G7=OMEey X$}#[1a̱Fz mHሶ kll7| WLgDsAUOؐPx虄{.tB`ӀGS( g7{iqgD Y~ *KI=^ʕ##L@-Jh,myQ.!9nϨ}?(O>)Y2j%L-ќ?'nr`-` }31aϪ2 Ka9=\$!B;y^3&nҚ5lHH[VҴháTZ ়R&@62 :CVh;DPε91>Ut6X9ofpiOKo|8rRm|,@Y Wڥ )t'Fv| I@hIx2𢾚&lv Vcj%ѭrl{G"^K^<ܷځ ?<|n8v)PowjzgY)*"geSŧ=So(nB:ӣoovsbBX@Wah( 'N}Gn= Ej?0.Q<,gv0B{].nMrtV3՞HeP*Tv* wV=9I Gi:ۅ8 $P=X҃'K>=r0F] ݇j (8mo#,˚F~΄]3eb в* 8´\{P{G3yP8z~cr͈]гD5`ysRvPB=P+NDi/hOH[>Qm{𥷅MӇ?\zf2MCݩ݇#" h@ x8c3 *>;j(qSNJ몏p>;j'M0;IkIe'ɏ'enzM:O11bVwK!υN]Ft~A>qӒFUǐ *^;yˮ#L\ʿ__wRay$Mayΐl),:[Vɭy c,-*r zmhb#pB=ϟ }tF.kh-a."6>lf 1ݥ[}E癁ҙx{'+B?g+s.}]nkpx:{@aCWHWPGwʘbv _W:ƦϭZcᏞ>~Bř65vC"~t(*?.漐?JSB- Z cif2:AZ<;Л_\av*KL}l>mO@n ѶL]wZߴc^j{ ˺1(Y[Yƞr^n48`02am{k?žU8Z*tRVm2Gv't9. @s-rt h[ h&Ýw5M8i9ݱ ֎fe/цFwnKUR~'i`d4N>3v:zJ _-`Ż?́zD\EpXz#a֨'gsGrGH+{Ro<*7i$)ߧc ˩ R!'C?. iRib( $2 ueo1j:ʘc~v )Cn1bh=h ӻu̐ V?i1МEmi99tth^t1tୡ2u9\>؋3qo};D.qR`P?Q)ƍy&ʏs'c@NbB/1%r |:+FJz4ꟼ6qKӢLOBS`m._hڷi1 }Yh͒E2a KUk)KpY<؍zn0 bgDr:dFQi(a٬ ^Ѕd4ɂh鎧գ5*&i$s!LW PPK 3GL!ms/amrh~GdN*Rw9:^8AJ N<ƉmP%drc[W膖,Wlz:ϨJQat j(=vF?J/CvٿR/(l,xOfh8+ICNw݃am;?u_7ڋag^D=Yh# 89Oz<)>m/CNQʴ.HSs "Z.r F`pʮ5[yrnxƴ:N42gu>J3ugPL4V<c S jԋ2@B<-l{s ].4E`p5.Y<}shӇJ*'8|"{'*!^.zqד-hUM:F+@ gUw,7C=wP{T41EQzyHI8t+5Z .?ƹ鋛5'fȋ\כ+%}ѭ1+wy*S9* a:|v ǁ 2# vۦ^-0UCaAAՕT{/܁kjhW 5 ުj̰پ06^"0v|6\N2w;>};ìxh Dja~4y^L}h?k&3)[eVCϼH+/S7&Gm='ki%^-Z&SڸU?, 箄%n#4d`/O˱= iy|~ur*z~aj{Aus?iygS&OOr})tWqW̴HJ*ax~׸f> i_{;%c@?ڡ6~ CMT؋Ls횿\ѝ~z7Q|1?Ujeh{>Zˀ!GqYݮ3yQoS1,}?zP5"YiHx;Sr%Mμ.,Nt2@6t'퓻'څSi'S'UPw<2=.7R0{լ+v("[YZ*AyºQ٠vM^woQ=qqڡNoiDˣ))+}aDr iGVAEhY|܋1PWU Psm2[k%ѶnTϗ\P.cb`IxlOo[\ū/T,Verz{p9:֭x4(0۝G$ܵSb:U峙5#"+v2p8sw.<#ڄ]2MmAx73tm3Z~Ґ (v=b RAfF` ꈑN:zV ޱ9=4m"%D ca5k$HuF+iŰEfhҭqmzޖLc ǦuL;U':@*EHB=aFp{=k`祻1R##mJeggqXa]p xj<OzT D-v/,Z (X˜kNfJxv IoN2=_^^kLC䲷lxiqR3&L29cqMw{N=80襶3%_[DžLM<58JgzeJ7O &K./8Vl=L[+ߟ{uϡeϏ j=}= j*}hqwQ?[ԿPgP/ƺK;;A9\B I3 8MЇEByšr"5 ji:sⰚ6Rj&@imZGq)|E@>y$:N!Y=@qRQ`FnW'"Fy/XuU#-2-Y rV@7h֏2xC}Fb7#}j/?D+{sz7OX,sAܑ,g+woQUt9M$D;Ƒ4aߑFwJJ[?1ϏH~psxy֨;ы52>?R ik㱃طPN玃[χD kUN .ɋkw6[c~u,5qժz~w|념KOf #'[|v񼿺%HO܂`eMjπ=k,ᓗS#3BXm= jSc6va9˚tn,1:lfJ_F?~[SMn l!i9F 0W'\[QYAN%5X_ ٦ 1RM8^.p;X#9ܝ9Sc,@mH A4J-C+.`r9[*Hb,67 .GN\'usrrUih8e=ly?Hr$DqYq&j_ fT&tؿȐmt_wp;32r+ղв"U|pl !Lŵ}Q:͇Q-9X+sy.`fBD7.rMWƊY?!EGAAQ N ^щBtݖlcKÚm=%NohvT<9aܭŝS՞ؐ·Rxb3{- 'J^Y};vov[F&\.bB' ̦=)g|)Z21 t'`D_SZo>+2aV7i$LIG@Xd* aVDwE+mIWo0ղ <>GVq 5-=$6) iM= V㬲&TV_GW/[?3 .HzW AxMPk^}Ru,'9<띂ƙGyoh?v&ۯ >90nLme:͈z]m[uQ `e#Ǽ13u M:I==t1F_h1(ӏA!8Q03Vfʝs\T#d@|wƥG6uKk+P[|D\ '{y9`Dڡ}i+nɉ0fZTLdE!{S'Eo>?Y~`Fт ZJ,FE}`Z*qIWvOՉ{+nl濌u~mk|0 /ģY4)w3(o0\XBf_>pYppi#DU[&OSxY3‚sqƢأBhGm鵋̕9!@/ĸMム|1ᱣ<_k6s~=H.G9ӬBڱ j{ FQMG1Zm-RD3?ue㦔ʋ"؅a2KuQiWFfͿ=0o"W.QjbV^bۤFQ3~/T'> V#9m:OW#LY>y>uEuu •op\y0}NB]|þ 0ſJ: o5-;t6qpw*wg>Ka7g W{xT񅚬ݯ.Tw&'u OC?m4=o feCߖk7|G7[ʑc:i tYqXP"v^J][?9pvSߞGg[|1^qBmjx"p=d[K6KGRb!y";]ΐ>]XBs2aofkdW[粞XҋUH kك@̯17gWȕ,x\S9a?!\8}/ *9)4qҫN8#"w'\T T! ySN:gDViyPrlҒwgi: 3 1ftPֲ:Y#ᮕw7ɴ1M{\孋8綈Mܨqyv:8H j{+.JtNWϫ2^}cHn 3rEz.Nv3! M/K{R9HɉY;YX1\eL0,S~W;U=Ww7wÐ愙U:62DŜ@=g{Kv]G׫xFAP8vzHQ4caT3Kw*.n9K4H(>:KkS?=q5SFZР20Cf/}S7);x"Ch0K{@^Bi0-e51*\iX^SG#' pK,ছ4j5}{6>u\όFZ.Vw pAyDt(rGkצ7Ŷ?dVwC9򓈜ص,Wu+`MTĞd o$'z`*~w[qi_s7Ǯ+m iC]#3@Ӆ 5u~30U[4/j?Y+]1uGDRjDeкTO>[ :H:+L3Jk֭w9RPXD(+ ml ZH:*nT>FrΧBUP$jY>$_C%8{?{p9󘮵R·6ʲ͍vEvKmh{;kf0?[=m]E ֝[} 9 sLXܓ'x>kNi36M'OlBK&M1ll~pTQ-ۨH<4/~p=٪b_:53 +&8KIEp\imׄ K͝`+dWޥk͐$?E;c|IWȄ^(QsL tbB^.qKj U Z{p[{z שoP<]5qTFsǟ)^ =l6W1TN߇zw M.AMX4[ϫBtu:`GHKx)%@bm͵$L0ֆֲ31'+_"U)򒞨@>DZycXh/&rg:#a۽ syXHM JmHГcQq3g'ƈT@|V2}(KPk4tLLWj6yTk8 Ӯsul$3 c)M,8ׂ.uǜpRr>QTz=nv< jC9k5q[0 5H`88& [@G0 W+7)VM¸*LR9シ=;}D`dWD-EJucmžS>W> t/|d6C[yEs|ӏnX>gɴV h<= ?7W P> W-4j4op{<;z4 -Cde8yxFIM#٠Sx5'y/QMr׳޶s/gA5P "ʓCSڒcVA?6KgujlSנ1/d z` lX^ ^Zy,ZL!-7~dVw8 Ț)IAw͈M&8%; cQ^#щIYpYX]yᶨas*l{k%%Q5qChu#+ ~ߥn!qNfFItiX:BΛ4TLul߉w?~Hnzu[D)V7+<@KoY#Bk#̌-(@Zm@yۙ[gbٿKBϴ\8_Hz~Vg͹Ozy>nq>]}#o`35Rszu}s3rc|Ax1 NTzM@n{3ٙd|%aWoeFgekjO&0PNh{piq!+DjpyPliNHꯞ;#JHW!ot nWwNotMMq]dVw{aoMvQ%lv|AF?fb=7DeLzjy1lwh]6:qa=2iEV> Bfޖ т'NˆG.vW|H%N@(}]~|lDԦ nB4wqZh1O=kzlGAG¤, zxGY^|J QP±/t|uRcj^2KZS-puorkI4@u~t*4ХPGK>zNL.a!{dݔbd$ iyN {+/ٿPk] h|.Eh㉕^;QN~|Lq`(ɺPM0K3o־\'wi{Vk)'<t+:W6В XGڴT(Yoi_ MUgDKTgyՠ$ CuTk'@;iyJ.ኢ %!> NV[nHש8n8r* Er] ~0Ǝ_KarPvFXض%u88pcHcErtE|}1-%|BuP3fT OUd~;"y }6)'d,!ߘTL]A7hN@āRJl,w-d}5SaqGL%҄.& b^0bg;ބZ@VYsiFbGzm*6G tgndOp!Òn74Ն@VbXf6eLbL+#/V6x=y"XE3߇sA>:^_ȍ^f:Sv=ЯeZm&$\$b',W/f4=ַ-ǭI$r֋x? /n_4mt,+t6Ձ{ fG °`9!4N,cd,.gh#mtLSx`iXaܗTU?b>[llQ-@r=rGV3YOіڼbO=T4GPs+fn@lErE7V\3ޔe +zmNۥjۏ9QLb֤H_3Bw2 /^sw;~Kj_)u><#\"~v#ıۮ°8O\Ou6P=cE)@I3PYW'd \U܀PtZ2 |p][4tmZ +X:|F1BXykp 4w!N l &$Fź{ю\nÖiI'&`k:*KI:H,ӭ LkˆLԠ9bJͬ3j]Ir gȞ0YTUijc??}<1񕟾R z8np^_}w5HXWh]ΞH\&= ya4s_HB1ˡʪG8ĶIlޯH5OTHlee*e|Ps:|Q=ޯdgJ[\;ᷞ><dGy[!ޗߞѪCG_р)#I/'SCsrWYYzGinGj#ߛ?ue2Xr+믶 }(jlB1#Kg3 X̳O~T#f$KGxv=,ʒ+ҦςI(KiR~{lVX+:+6oP-kWн4Jo(/?_֞1Ҩ]5Z?A(Iٕr/igg^Nw   2cA %$Iafw'ϼy9vιr}:xw9l% C2lLmR>@-2&3qwE 5P]V?Gm%iQyv$l5-e8nE2p˞jZB5"mt|#.{}9 ?Pb Z{,ԣwSV#AL<y>p'X=$Q?6'1i;=3X*:A/_ݙ0]b[7'P\3j ic0Ę;Y/Aܖ]8Xx8&k{t3dBW?hXm+{QZtv\ꛗ[.S5iwEx7^ .ϭ"B^xn/υ lIq^qt p^H{MqSU^oil 6 MX5-kq[79 -7GC{.!{y /I7Kj!ݸ*Ⱥ֙ld© rǷ.td;z6Rg>xT}` #F?-v<ӜUPujr*\ >Mmb,N,dxEL+ s_4h@~G< K=m zX5YJ,:GQZsSZ5\++/u۔ "253aW}mw ǧmw-­{3ceO?dBWO?:-.=jәqia='Wpc3QcvV ځHn,֓M?@c1&YR:k^~{> _"Y?"r axn9uBiYv0 *y$.4dmyHYVx08"+nlD'qm WGY [%{NZA:Bݛ筨d?{z ΍gy6??w+K_g_Eh,_Ӗ W,B ٪oK] R)FmMJQwZ D\QMl:Bļn9]kd"j͈ǻ7?]O-^N%,k1[[O&xz1,90\׻pX>w?Gy:培s+_`9ڨ7d0|c1_U01͘@fwW\뱄%u|ѫ~ *|`j {Ru.`x;zn-6p1uE6=_\[-~/zBEEt0XXp bE8kfO۰?/fC_#SxT/:hݱ[tqNn?ҽ3}%`lsWP`P\2 +o.Ata֜4;@M>3IwCaE6C,O$UN5Zw5}h5:YGc~=?;vaҥХUŝ#n;xRÃ?_~g.Wmw|%4=45[40T`l?AΌmg*G>kqc .#tTG.ȸUZ iۀy)Ĝ+@9 ʩ$S\ؔEnR8 ܼ(*Aiͳ%gKgUmVaaL(Qz6Y{8J]F dyWr@Y$֡o:(8pb! Dd=>+ :?jves28Y;{abooC? C氦)-2Ҹ>lѭ(@K;%y8>0ŷmtQ( c{%s+&j='8H?}ߧ]YgI|?~l#ˢxM׳%uzp[@Zi|vd'GJtq fx#^ nj& Uh>T#j1_ټ(^گl`Ҍ\/ s5]$};U5nwF/+x'gWiugur+&I~tݓ,4c@ f0ӄO VƗյ_^z`0 ׀cX p:IE*̼}"wO82?! ʽF!=K˷OPWT%2xPlz&#AI#2Uz6"2 Dsݏ=ct"y=q8oeOig&#\0ev|@x@yo>{ڏ>ȝ>[LA X=ԗجٟ%-ٹܘA ?>!wMz؝ŀ;Yvp) q5\k@aY'>ɵF${5f/H~NH0*w>u2׉HI/xQ._#)x"4پ]=(qgn> "쓥uNpȜ\70 !fΗ~Eûk~ n Y8Ŷ nڈu%?Zb=ovg>x:$\.8n )d/8AGĴgC-P~4\JpP(pbMӫ7%S[=ළ:]9Nx* Էڇ,@a r9:,8v#p T8ŤxG2;:R< \/O F@w+"`LBGd̋87ֽr:geKknƱK 7z;ny _H3h|}V KTnOXDWtQ}VҮ9?hIL9)bD}`k3^X"# +`Y[1'Z:𿞞U>ۿq@a1aξvW'C_:po#ۛLnIxnA ٹg؃0M h̸{F'l}K:; nig6^#sYKI 64͗3ؕk;a͞<+voTK0HgјϏC-ap :M:}O̮5zDpgc ZNРP^bοƳ`o"-ٝjb5NZbCo=Ϙ$#M=A(?Tfj,_<MוxtRN3HmCFǥUv%p| uyt̫o|X-__jM LMS6yLဌV2$`0}r~~z70 DLgLȈ!`% ̊TVEynmՎuO_gZ;o!XBJ@`0A1f[TΨUH~Oz4 nyOgpw~.i_}~5nN㩫|W= </hbt< 3h䗷Ph_u/|x ܰCK]̆:~2d Vòɤ gvd_،C77Vٲ4:#z6ȭ^PҞ@ H~6qZdu+áUfu`D3ݳҮ|ŋ㾂?c I+F|W̨3Q&yO ݎ1E7/Ͽ8W #Dʈ-}-) 1mdT+He4 /̠yj(`|7~IT(\7r0_[m-81"/>BJ$ۆi~+z]ֱi̇-}D6ls.4=;)#X@<3!Gdhd.q vjF V7˞*H?ܗrP@'iAZ񴒡M{Ԟt%T̯|To'm< 6zOf@ii1a~^/%p & 8LuS5<,RZՅr7m1-X ԏOwqKO0n^|4{)<ˏ>o]_pU+=cz,7L$"Ƕ { ) 6#HgnYmܱ_n}Sh1Rz~I{=uD F%;#02YP ZDot3҂*av2/qBߓdpy>Y~)FdrU-z[hrXf$Ӥj8QM$ C;v;|y^&Ӌ't;n(W!v %^m Ԃ2S dhaXUHk3t/Y;`_)1Sr宆*GTUq[:a7QuVuZS}hSo-#ݼ2+D{_K_jOS0ct^#G% H8aTdܮf 6m@,QmRM{o^Ka6ե)j2π_b.$ȑ;8:Y ɵqxiCTc͏N؍uT=틮̙gMS^vlHݞ: RT6@ЎY9SAf|D^joE!|!'9{:jlXa z˯\xl*\:ke[}`YP1Yk痢&&6l_5% =㒜kNayU* [nwZ3nlo`MaAuw3y{KqEjf˅Ζ`87(58Ob_<ȗQas:2GW̜YR^W~)O. yVN~"rkj1Z-c4D>۫gtqa+ :j{טM} ;4rpJd8=2ʌڼGшnlPZ@7@0xg֓~黉1|&x_g]^X_ZNE@ p_ZT082F:/XGc(1Q䓧߁"s]G""+#` cNp֫{cQ  Q4Q@q /~'gwܱW$O5$SELTgQ+c31< LC F{WP_G}/[.v.s`p1;-pZl<%\ 3Ƕ|;|zCx_KL tkDXLNr/,͏ ,[_z)0ǃa RP~t2b;c_:E*ּucj"kD8,ʱ 1[/޼^Fz6Z~>ZLK`x3BC ы4 NL*/}/n?"w^VϊC a<0w zE>6}-)ib?R*SOsaG$_h9oRb~4~>ui]񓃊`U r7Roڮ}KuφMmՕynd)a-ENh?iQZR'vk8/-Y19;M3]W %;jj~vuX$<15>Y %˨ Êͱg[16: v3CsrRBtb6'tv%m@{4};jw-W{`Nn3܏B\Z^]xX_{̏:e_kOz6uX/pX`رB$:l/yPƲپ$AzL;skz9\H .taJ:J<%q| Cֺ͝ ]1>i[C6(ULFYA Vd(ӄ⵶3?Yg@ƪPڊO{Mn|EW#]MOw7h'cP=C4MVD4ย%ka+d/d卦2ݥH?e-RFr*qt/ͻN}0[<`Qn)%K.Z>~9B&,OVf[W{ZڝVw3o!2`KܰIEvk [.@7 feɨLTP9kAmVi7pB aRoS=ҽ֒v\usN C[D8:{.g3f>bvܭI^ܓ5~3w+;o?p8b9X6c,v|h \zniahPa]{|(|Ĥ!YPKFAIژ>7ڙKm >x9Q >W2ьUV| c]- H+4Y^9w{͙FA ԰:)Z+s1M࡜Jf3GIX۶?_U|gXRF77W: R+R,/d6å25P |68抠vZNDYy|IEsDi3GFVc6jN 0k_<6{ׂ&bЩQ5:͔䢒TBDWx@_i SCUy : Utw|C=f_^:xR58Z/nj7+QϾBKwG1vLt-pk?y뢿b/?pz\ӺD%,3 7TM|O=/N^֏,ՍYտQqT۲gAoI%0?.|B3Pa[$Ϣtot҉eT-%I詎yrӆ[7Y-i !#@_ Ok|\y/:tRϷE1MS+I5o'²^ 2 %}>R_$侣'ڝ +?}F"}1;i_3|,7K+ߊ O /a/F/}?p}k MEI!8B;FPblٜr-eo>d/x7#& &qWڥfhzT,/u)Sb8E51@ߺZN]}gK 1~m{_~cjρ!o'x)hה_QŠu2:?@cnpR:y됰 )e2E+x{R{x`(Dou4; r~׵ FCþK0ςNkgB>אO{haP/ K([a[ ^*ΌY헿nPf]TA's?,#ʇJ}Z* sy=XB)wiR*=ZbB3gR};xU(CCh #UV c@^nulTo$4k 9)#h.Mam7KsZ#El.e@p=|82̐g4)dd>ޣɋ UM=d Tl¿Vk'ڄt ʾwdxdN̺&rTA0C'^]bg2NҠEyj}ҧf50)o03;zzƇd6(O4<<`V7PO9 hDSÿߏֺ=r:#FZ vD4OA3o)q( `j$8N!HQdua4Q?m6lK}pZjiTAII6|$Nu]ao%xD6HeyJ }k ܍k> y`B?d Eœ]Ocbʈ6mt=;;9Ir>}NS57I9j#9;~Q 5*)t, s= dv YS 4}U< Q_DUΙV ֠r$VD>$ۥ9RwGq̦ 6=WlT=\Z +nQp٢qU* <nXmֵsxOYSL*a5k$tR Z샫ssha:&\wqɰI&t8]AMg=NOz4;,NMgGv"`w] OY!0Lt55jėt[jmmD9stO]a:D//cXD;&(Tj08̅<Ϛ7k;w:ǽ r7- laRNmUg]mŏZFJ}ov1Rvt*B? `-٠6fG֣ؔXH+5Yi@-BN6θ1rsgzp:z#'!!1F/k3Gr\a(1Ţzg5 qΣ*JufysGͽR"Uot%}3TFF:KC)ox'!fԟ}(8R<鳪V୿ ot05>*hoWߘι{~Yzdyw?[x!i>r^{#‹Y${- (4P)ɱqQ̣McyCt(t7ã?8v\*왳s kdSFAloTde[3Y HD?o_yRd ϶{8LitLJ{<}zw+d\k=joW)x#$j4Vj9e}5i)J#DYjс9D ~Ϟ|mf6q_HmG&ǜ'b<>eq[Wnt% ޗ>ew{)OC1z\E}|,&z0tfN=x@ucb k_|.w#MDZ|=!k?yo~=ǡSWU__\^W]7칗Uf/_X7Ջ|BA c؞BZ#tD/O<7CB%v.Jj za-:[ V'rDIe.UFEг;m[&:de2x/uOg'q'Ot`=ALLa'Of(K4_Ĺ5˺w{E7:]bꌑ/?.#1 7I-%B=π>S{ϮTz[Ğɀ dbwd4Y0Wл=@B^j/^y| A&{_7DC|9~=&_B \}57>v8(AWVa5V5;q)m>0h:buD+D+vLp]DzY{_n&N!ݕ1].s8@`9k ӵkKr1<'ʡX]UBGu>G7C \oZɅ\՛w(ޢ]i#Hl! Epۭ5aѱ&L l5E:``S5~iv`9Бsk>m\P- -CtrQ9Qf MJX095}c \(—i%6 ~w!%-;^Y>l ZYثOgGX?"6}?uחorZȱֻcL ]5=ؒd7(5&K/%qnOZ Mnű$lY+S4Lm?//\D]!Ӄ|RCY=D&(CJqwS\-faH[x_9w\"ˑnuh5W+ 闃 k'ɇ?hȏ".o.WA_|ڻ9~_dh R{#eOOƹzYѾT?& YBvO-Mmi2 ˙ tY&d `6/v'$KE)^'x2A,{1#y_p5KAC=ro!qϥ$hv`Zh"kp2֓$("wn},$.{:!}u4JOLI"]=4񥭕x"@q+~jf”!%СphN,$O8Z* 41}Rg+6k= HS'awEVD~z :NHI֯D!ܬ~J)*q-tzXB~q5zb3Xۑ1l!՜K;[ 6@,gYtA .ȅ*Xl.?fU} A|`[h+3}336='},|-TIS :hh.kur|+" b M([Brsߔޢv_`O" oE<Ȍz"[@Pɸ )/NE-l>byzR?5>yt: 0iWL 5W~ƣ᩶pqJ6@o>)Wh8C.g\4]{t1c3s_F'C#Wp͊Bnt+z0)TCCnB{ eq#6llyўBZ ދ眿zp ՞PQ;#Tp>طj$wa]ݎ[,jRBRj`81x.͸ g5jۘ{Uqm%V3mwszkmz>y4>H,6NE=|ub59Ab;(]d'@a KVhz 7}eQylq!ۘkuf_՜Obi0frfQ8!Φúl/E[g5/@M|5bAu`F0>W):Hxbiw<`E8Xxs60?GW*Dc~@hh JGo=>;=~ t5ozF#^`fn:3k ph)ݝ\`SF\x6r 9_x~.gH}/3ASSL<MT3Ռr΅ErYoó)LoQJ_g Jhgt<'(($ݩ CNlD7<h̜4dw#=}n2R91]r]2V dIq80^k⹒.<OΧb|nP vq0ԉ~K'ZV2K-[K*.|hPTKt`rr7wB<[']-]zg<=5d ztԜHMp5)b^UrI`_dEɩ L!/['{Ǣƺήi\h{đ#=qH$T&r4 QˆMpk ( gx"?yRC2i.{Y:5*~jSM$\0e{R@x^ Xhg3,ڀdIɘJ p+3yxxtO![;0=\g֋U}zy.u2n;wncI4Oݬ: /ZJȣM,X9m} 6 TK2Кi{ tgh`/# ܫ³m<(-9  3d3Iv&ysTwzNв-Tf3C0:CʳtGI# vy\B-c?Z].ZhsOvZ](2 eqY[]޵U󌳱Ym]~έ{@=z7\QϦ gaz2 sK`w ?&nZ +eg,#;NRJL]=08#2.pCzɥʼnRbЫgF?S_:'ϞO(BNN)W0x iP,7C|2 놯XcT]#-Y{/Ŕ'64m2S$rq\xNg:w,YWmύڪr00&̣j̵GŻ<dy{|kwv%juN%ܭ9#>p\n%/=;NK Y+*|.xڪ/YpiMyd-13m@9yj=mR9 UpZ^v .>j촞=À˜`xO9 lWyW{O0;LxU[>oS´{jbh^•E3BwC,1( c*jz Mz<AvK*!9UYħ6drӇ"paHAG#qK v4c=qz#9֨= ǧ胵qGw2EMpuKZt2d{h,SU7#尡\{d0)h jZ bdׅ$%+>k%P=M.'@BP&/= .  EPK_řGad1Q<)37$l+}(d~u9Vs2zC3X p䮽xZ%ӛUAy@0@AB#< DGB)aå+2?Z?:<1^t?QuvYx ~-;Y!C}NgmzmA w/CIqɄ%};vZrNp|$c,~bogޚۓ8Rq]ybUG8p=k5:ŬA|P" ;`L+nIJsd-lUW 2fo\hNo^U{$7UQԍtD}& |2$o,A^SQiTmgh%|fr8qw.S>/!x;YiA Y p]$Kqeͮ'kKPh&l߫xvRr~2c@p ZڳoaVw;? 􊍷0ISFuE}rsVmq[ b|,v# Z /I湍.-Tퟂ=m4!X`e2HRLɘ {ޔ}] ϣ#K^s ß:!iSff| {I7ȵixj&}DL= $/K}(m ݄,x$ :'l|t|#4>e< 'QRvQ,l+6= Bfq:Mod{rl//!(|9n&Bpg PN* ~kH/avQ$3I$CX3^7#c\v t=x+OI6d'8N[?X6\ ?{8嚘T  ?'Zme# `Ԡ:}*/eaub^f]X|C3ZZ;}Z|i]2p@4z./*5 &iIU b.!.^ X-Q3VRӤ)nϐq1{+ 6K]f!>?BTQR;( E2L-{8/lY7gA u:cFh"yԎ BЍlr/a4qa<@uOl`TgxijJ~!ؿMeU%c -D+,0n)7Dx;]cpƿ|#N&Hg t2gO+ xĚ^w~[+e4^Mf5?sX>}l6<Q @JHrBk]g"/2`7Y8*D;Oai~COyu} ,VEmHh'qIf$ʳ N0. I}c'bK,./r/CF>r~i1]> _VY?rrOfTȫI{n#`e$$ OFM9`ThGű-4TY|.(slY==g~u=dHi/K>*l[5g1_lkD.({07@]7'10{3enO+TŁJW+՝ޭiW^r뼪>9y.W^dzI|vB3XY/,51kKfKz[13V`?7AH}Zt~i{BUlاAn#k{msI F}&JIKl1^:wVB[%3 QFf\u8G72]QsNѡcIǃ'$Z!.6IIˋcS Z EEN죍W>Z6E|x桉mqũr{UY-P{ Fk?D, Ԏ(]flNAhad+G=z#i v%cY.[.C0}ߺEE4<2Ѐ.t_N?<\]1'u|&u%/oCErD_[ueʟn[PGaWyDP: ~Kk#m%-Ói P-8QXVӭL w{7>j\h~,ׯ 63[SWn7t }+`l]|ZCL+.Scn}F!3z1- ~GIW|ռ4 pdMEpI@}ARNK+v%շM|m7֧rΰ5(هV<.w]ܜmS8 Kq_z>Dm0%r=KъBYt"2V];Aǝb)m|fX@D =XNX vvoˆoʼn3xm=[Ũ t: xJ<@P=Lkd2St##geHt=M xɄtO"QB3I5"j̏)55C ñvH׬#aurz=.TF~r_uӷDK";Dt;Qx_n^n<ꁂGM(qO70purmkSY:6)3oRm`L@і"|时6duD0P>:YOK}C# ;:U8A:Y#θQ6𠪌7.Վҥoڪe?cVGBsBr!F;چ/[RfaDeHfO%G;t Ngq"7ۂ+G]mVV?{9|,aMw:3N뵕KZ*M':S Giמ6Yϛ8䩘_1ͫ' 59Vx,EJdѭ9;rO{sgfc#,Z@!μvR^-@""w؈a8S.4r2 ~2X\H~ W H ,˔ilB61MwF<Oܰ]\8YkjŰ5ҩnv\9/c +V"yiUԕR][73) 5NoYh L!~0U;skܗ؉'g"NSOc@>6 M(+47ֈ1~_ Ҵ=ӃQCc)bRJ yEIfqq?ʳ (Xh+qrkaQ(LuP W. Gp:<%0pJP+@ٸ63]^&hu:JXVTƢ9 tCd> 35} Ƥh-1Mq'^q2~P/8|x7qR݉Ԗ~ BKRߢIn(Dd4I/ʩٴ olT$9r\=Wo`O'yk,ѽ+䗚Vt;mzW^x0SG L'~c`}BeJK'l4O’}qV tZBfxq}2j 8GHa9L6g (NQ}/:U#o^ܰB˨Nd9Y{z=v`0 &0`r<LB U-W2lNZI5)-Z&#Z(qtԁw !76Mӕ ~Kn4 Bo֦bۺ&^>;COA_* AL:Y$D1b!֌{vew`1N/֦O6J>{cVfms]:%خGe@ݺ^_^BvBMwEp"5 2Wn{ P T]A&UUr>T ln7xaoHApƣ9׻ -_\4_OqMYh&>! @ňK ^c Βd(qrh^"X^:iFMrDb: oBH-A՞Fܕ[2mQgv<2tJ>%.=xw1 &0o ޹kGӊKO/tDzpȔM_ ߆Moӷk{(m~.0gSA"O? ②=k.]o ЫPr܀ćBhѹ^x g"WF8G acn+~'P5ϒh  vSC.WŞf$#T^voj =sͥ -3mdG:K6pvBu}/:k¼Mo}֨%[}ȢfW5i*_,ێڻS&::B~ֿ<3a  `݀~zW8.QB?kJ*p7- <%P7/l.:wntpN8Sw$hDs!IE$a{:/8Gg^sO{ڀ,3 16\SvdGZ avd7YoMQZabfDJ4V'VPBx(0bG/Saճ.z+5`|:p}~&tIڭ,\I%O£?K&}UYD\VFx:Q D0U7Ɓ3E8" ϑ>՟[X؍2%NVEiN8j? Y{" fڶ|^&tOd: Zl⑝&xcpj:w8]H> \{~r0GrPPd`w},“tL16 .GSF#U<)}eZ*Yp)o-.Ob*\Y`i$Xɟ&;JhmlC=`h|\= T,''F56=հbL59ڞFKIۦ  qOD9(1Raړ<%U*-/u~Gdbz,xvݗ9(z~\\7Zִ)sK~N /l=x ϭ\JlMQ_-qL`Shm^E-K]wԕ0zySQ@T_A bXw7޻U]f̚bw"@R `" * 0$Zl̴U{79_/~WZ Rg1W0}hsr+ ӢZf`ӫ󼯶90l0 |% Kԗb%2 /"TDD't@@ޚpxm%=O->ŤHejv'KSZ}a.,;"qAf]Y\qiaҭ tJe;f܏Sn#5sݝZ qSO=I87egkx| έ :1V&#^(m)wam#XP' /NBJ;vm"'Ke[~lHn%<9|f@a{Gykc?ҞGWvD0xAH![ud‘#w+J jm770ښ`3gg(WPNjA0[s5rf1NMS{~!zښ%muntW}EgO-Ze;Rp6S659. h'vOb1vԷΦOe]JlTAƮEz[i oF{Kk\{[{ S_Gџ7)0!^{+9r7ȻRE!o1v{b#sD\<1K&9tˤOV[pB9ȅi `<(wy~c} AF4ײpsnm0S˭ǰ+l 3%]#/OhUѤZ/Hi2|G}TǞ]2 CDVŐ!uA Vj"S9j+^'3UMZN5@WY ]B3!P%m:?]/b4xXpϻ0".s4'ZSOFӲ!QE:;:\<[5kR<%װٳW)> nhq-2>AqA{t[¿%~@1^Mi9|f#I *Sۑ3+K@g$d_u@Q_d\]= \ƯlsݭwE,9DMS-G^E.)'&lcl5C^o=(~μau!U,l) /.BKOcjkPw{۫h5w[50V[ Ǽꚓu'Z^p|-[BlwL:RjƀwF~ $9qh%yfMktM-BNgnw2K&&EǂDTEjFzd!>K`wKSb(#E%X;]jNl}MA_e0›|ak_{oL8&7Wspb\ͪϞ}oQxyrJ8_1b D#Z :zVl]~Z "-_:"/zty^AA4Hr0i9a aF;8P"> Z#M_]⇣f𴎿WqۣؖYqקʹ~qEaB^B듘*&;7/-|vH=7tu:'XF\`kvr>*KN_]a/mѐx޸l&H~D1l H n[[vty-|95N k.; ֠XMGA~lkz@,8"ɵKhچD.yT6Y E4]KGb~5Ee L1}e8eM_X2F-|TL-{*I1 gHh_se9Q7╆%7~u-y4'C8tiПܴQ&4>IDN篕$y{֚w%B^ ?oȦ[T"nz}^ #v?>M҉GFWQ;K)O`MXM[zA 69Z"}=Y/YwBkx-7k=$] @ۧVFzC3KsBHLxooSqt_@n܈^`Pn4zve; ?df胁tߕq|SNdK ˷,8@ЌoUwzXtX!UOixdyz{dƔq NucK hh}0ҷ"gW+.K):(ixh!\_ݵDn2́%h2}L1.8' |ޟm%^y(Jr%QH?B](Sy}۰^?C_4 Y$Y8 >R˪Jl͂[܏GTb㎘dCQfTiw}!0PEWkX"|HKY/ 'mpo;N-o~{q!bvn=QZG,I}@?z|_̇8+='0ؚ 3".ۓi.7,h3Zl?wi8>{FoϟC0Za43ĊhW35cxQSZP.<48kd_Ű܆ÑRHp MeS6ЮClR¡-Ɵ~v댜;^WڋOx4qEpTL9Zxgxz%0ȂR#vxzhm|:YcŸdHoBbee.i֒tк9 mLݜQ bwh4 b^7x4QI%"n_ʮi5Bx^-"=u,h+R`5fϜH8N@c/N ypj 6S;zARSTCc*gkY;[D]`#)F\{ht/ Q7^Ì*m ԕP'#חnO/쏤dH&SOKI U)qk}"~ewҴt_K?ͤǭ elE.J_O_Kbt~1$B?/s#̥7 Vù/J:x8F2YgMGɭ5oh2H8Gl )'=gؚ!5V<h /ݕ?L9o=[r.%jS'X2\D5"qW@iW#]-ۨR1ȍȏ?8daslOu"Vh5b[L͕ qʞaN(4b04[a` 'xjuR%>|ez%|&cTU -W¾_|zPSaDٙ[plPqNK`!DM[ 9(:ݴ ]|/JG뺻ۍ~p6u,8>t]{.,G`:{Zbޑ dp쒐v E|m}kf?y1vs* bH) 9%]0 q!S/Sɺ<#>EX QBTʺ E)g/Gl&ՊZrOΑ`Dc*`&r(9+i>`؎Xpַ=KzԘ$ :nD%.+9^a*ݱ S6:b3;k2h gҝ5sW 9K#,:6qؼh4raޢ=&f7窔pxZo#86 #KSƁû [/ɥd->)Y!'oC\>ET{zup1??I$\Mp~qa`I8l ݏaD\T撁5sڥ\f %/UP$!\rKOvzllV VdvzR"ao%;qa֨L==Xқ;snRpFGKoMnI_}~5v@T N.뉵>L\/)}oFW^{ xl=)BhB% `LyNNSݎU_|~|t7itŝ`1IqW|<˦Q7?..s]m N$4MҊ cqưPEћ{\ 9DK'/X~Yw-]+#IZk 37Pޚ2ZD-9f: (/|ҹ?>XLKc\| ˋZP1;'h`ir9ߢu X%k\#G0xE;D1  3|,hYfI eED @Makrn9IEYLI=IdͮqJ= r[I>bpyrlxV&Ťqc+Q#ݐgJ$'1TPyk;IhPfIҜf ~ ^BD(bia>A JxHE$ʤ%Z̊Le2@֣QnHQw8yAkgfbyNߏ"~Pǔ@*|%yh4yQxc8;dN\;vX mqJ1Vp5S~72qBJha ó@lm\M,E +7SVRAg1UDZ]'q ]-\.9y7"_IaE`tFT銹0F-D/˹4C7uG.~ ӹQM/wގaNJ׿Kv`#x73g|1ݨ^Uofye8t J `F_Me_7{ .95 qGye]ui|=Q_Yk7ׯPSok:C<,.6E##^'&$:Q2^fWè ʽ;AWc,ȡ@/MIBGd˷3bG沯^F| GL{%Zon1 .[F/Ҙͅ1֞M_&ە(5h%X3y(a7wV}i$dgL\oQZOzYx+D줴YK`H8K|24C,-(recU{a -x&'I䤀Eˆ6k6lމ.Z񪜜~ /:tu,Q m5ӃP.~ɧp$:&3NLK0g$7e,ߩ K5W@|Fzq1= ꆱ;GQdGH$l-$XitZ2]-C'T h/*蓱C U]u6[f=բV◳}a%;% K*N$#nXJ>ԝJqF q/yroR9 'pȅ l0Aar/@7{~uә7E?䏅m*I{Z'[LؖƤeTIòI~w%L;n{.wLeٍ"~$5hFGx7[jלRͿD5Cͽk 9kr% i[c89X$>~fgFX4ɧ+&1R*Cmj=X RbJ`j{ϗfb|}U>ۼؚd+ @'P/g9qNV>nBJ1b6>xq+ÝĭM'ӱ*x!D99F]$٨ Q}g<9xGq!>I| `r y3s)u`:%!տ@6ݮ.QCjzZK lXfϑӢUg߻A|]/֥:  ]߉~y|ZIq4eЫ^wrt2II 21۳J\%u$+7&t rr_La:iz,UkGjlѾ';`he +P*9`䛙5( FAZ#-ϮPm򫓃?}@rtb*qC9rUT/b7>3av10ӌ<ܝbGt1"AX m mir6"9dB[=+ta7[1LWZ&Ngyd@/1˥uf 9NwԮq*J&V<#6~o%JƹP?cW6wO-*n\vLi|JTb9Zh[ۍҁ썹vwչiVmM ":gA\oNBGq>=$2 %!-n$ߺ.Nc[3;ؿOO~e aAznnÍ=$+j'Čz?E_<.@0a@8Qdo@I67Ha6KC1s՝)Liι2/< l䜾?凒Aq9ԯ qW+RR3Yu8[b:)t-+8s&\]d: /b8y!:_?몀a?OjιksmXLDًwKП4 zwNyԝۓLUz 0-֫zȽP= P]e9Oqg?8rLJHZs]Bz_%FW}E9$D>o gSI&N _/<}1Z=GP< .W ~q>MB363H;aHu%'z}#uI@OgTXҌdc~2{MbRDv^5:^`ww'ikt|͹oe{s|^XM:#Ҳ1LQXk5iESk)*Oos|*H˞\ˈu9iDA9Tzș,e"1'2GN?,쮾NB27HMXgY,c%auyl8@Ff tۧzn#x5gH:w2M U &~x &%8娣P7+1c%KYME;s_ř1ux;oOGx{_e΂4E[[qh[O廝+h2\ogL,R%aĩ~$d;#1oFO-qA'lG^~_{morT_T9)RDiyzcY#@ {sZhΆȗR͞cr}r_$UϽv/z1óMC΅EY Ƃ=K^F;~4jGhx0bg}8K!\]5*BD<`1;EU: űTji/4-8@+G<z+Y;%;Y /կ|  :;o#ԓ E)mzu> &TZソHj]Onb ꃌĴ\{ +8qS8%h51O ?K9R$X7 )l:\[i+"]~vJS# Å?0ؕB4J@-<nËAJAl̻N]7V{G 9k8Ni:_7, .WA-ڱ&"7Ѓ۾FGl:2^1x<,7"3iNi*mxIopt6 ԜY_)X\M 7?`08ka"Mc87[x8}/oj8l Ap%xE͒l*z׽3/2t ī`8:X[oIsJi`*ܗRhU\>_)XэN޹ރ6~?__,omD 0Dh,jxx$=kېr%lzzdw2P|]]/+" - ]N!z Tʣ1'@gtx O 櫙RՏƳ_3t|uer} 7ÕT:A0r" bul.{">[%iYjb{u6{73o:K)-?]b 4eLm>O܋|"w1/Y!cҏm"Et:?qC<=?t>s|&t' |Èo5ucO {-=W>"gźpŠA1zƶɗf"4SYhpBɀ),uk_IS}WFk٭Ew&o;m uh ށvlm8 W*ὸ%2eGYC.36f Vu//x3V XRZ K*G-+z0 sDd,^;!=qCL+Վ%d |&QlQ J^@װ@ѩ5'B af_ zb[d+7' Y. sF9pU"^Qg,4F Jxi9VzV Zqg3qRťѢ*xԲ/( Dګeb}$@ǥHX]4 BJcV2!ԈPNȾd 85Lb)^h w./"Mrm9)JU`0e8)3qi:QH&Cg EP\\X{_;riHDJs"j~qk)͉Wp P;xEgiS7.٨HQO}-BWMɸi{!.N<]pU:y-XpuVA$.fqMҌ,/\%japý.dc,!r$@*׆i6fr+ !=m?)X"sЄVCVfFSC5Kn`&,6sKU%8}=8;<JLDثFp0?]Nl{W#hſ7g>wԁF֢1<ޝ;\}JMsA2ӛCILI]?E P7KhݏӀD'\{Eck?FZz`VOn=u(1=u;ͧ8S K뗼Q-<[}o=s9!KaVt d[X˨&"\kt 2HP|L!q`ⓥLZ!(u/7 .BGQ ǔ8i*E]3켠E<|}8rcŠiS+T3i*r ]@bW s@c9xҔSa]%0f䎈ZĆ:SgKV IF-*S9?4GH\XX芡wc^Em+#.M 9![J? ~Y4m^Q*/y + h"aMuTI̘6ZH1YE{#ү|\;!t2l>g~-؁F#k=.)tUz'OGXzOJMQ6jdm:A#8A@g4=u@ꤥfC޸Jjt.P|-FH޺[?R\9<9Ln$6{ @BLRds>/ Ȍ\/b_GrH(pb>d81 `0Ǯ@iq-oYMk篘5|z-<|{ȃS Y87x U.a^0G$29uH|Hå˄N 94bD:aurR߿ogj'P #-h۟ى7~\^y7嘛g3@"tZy6H@ael,fy>;/]kʹA|n#p2w ^Owv7F .-A`?X7Ij}:bH̔8 8S<=jIfwA]gx }X~)ʸ+Ga T!%=&2I>Ո :]qn4 ǹHK3䝰<{3P0вޯ{ϡItuH'jn SaыL3ɻx"\;(&"smk[;]kɻ]Um||`Z+ώkcMxBG3ײd{ˢKNS9غ1>~DlÆfq'[W~X6&؝]%B 6MgmO^7܀<l.i ynjmE$[Qމe9 f F?qVA|z: O) Q :]xg3 3>5Id<x# 4qSlt5 Īs#n|!B,Kv0BҊIVu!Z1EaZ2w@L85&}2 BS,eQabu Z`+Q|V3M惁i9p\OضX0 IiBpޝfRS5Et̛3 _8Z a!ny2i:4rƑR\sPwcV&+EcJ6NG#uqc>NgTÒ[SQ f9C])4ȥE!%1 R*UB@ 2d!JZ(Dmj@A1s2`).j7I9~uLg׭5¶SuW 3ѱ^)H\8oV75ϺP^`d.|#>2{piKL'$~Nj2d k~`$ⶭ:&_"߯lMqQ,;!؛wb}p!LQwE!>z_ Ɓ8KKS?.HZM'Ys}TͫpчP f4B9^Ḫ-#s%Ixk}E ?};LR& -74r58t\&˽R,'=8n}#{?(O@#b> Dh(\x!eLHחdjEeZV Kp/YyFwF/L·O3Z > 7I܂{++e_ N\~WpZ(,E\% Tzw27Q.-0ycZxb"H01g}GK^n8IBP&Dt˜3RۓA GS"Lˢl _nO SHn@gyjw >K+R1Bt{#92h|-w<~r4vt/~->m|Umr](p3(?egaw!G8-p1}[Vsf;z<[4 mgu&?~ MB7 {aIAkMX8zbZrr,]Q3oCwd[T3%}}K,4|?P[ @ HķK cd&bTwܾotXjvHssGnopJn]AϼlބpQ*ͩ:AV_..'.\2A!_C^۳/-z/,vdܝ6d< z2inBeC樮ePVFd  )]rWGvxM<1~>SvPi o[~dbkߍnTNi'aye:VSod$ Dr:X0̠bT{M⩗ಘt!qhNZ!s&"%DUnz Wnٌcm șJYZ~o8,=Z$߭_4cî:9`0"9fMeI Яp,,$|R¼'^u`{׋N͏e#Ŋcc@UQ':EɃfp6UVpIՖD:@ lQѸG1 3ʰ#H7\x|c<_aCӒ|КځdS/NxTKmE\!y‚a 1g. XubW-7hMg[ۘo|XL&l u;w>Ov"RRX9 _)iKZx le̓iYB;t4]$>^W/c=b5lu8r9@ 7D_ꛇ 9*cx,VM"c8`K}bi 3m1ߖO98y>NpVl/=^~:/6c) \3Ro;2Q3l3Q6N^MG1}N+5x$PAAOHڂ]Kk _QĜu +&~x¯{-R.mA[UwL9ض0#!HC %Mu*FC\VH,MC E*֩,tdI#D $هhdv|`z7e_1=\Kqs ]-K'RGļy3VRd][)D<-/umіǻ&9ܴ?ā&C_aa7WFg ă]ߵFvTV0%,f\^3*$gzB]=H!OO*‹ezD@?b>J(KUHB^tk 7#a)ef#GP$Ga^zHJXU2B7'X69"2;j@:b =V|6u憎KK.f2O2(W(r 7 aX0=Is_xƟ?XHtZ!;nFQ3"E꠪xxxԧù3 eN5_rU*s5X2sk^[`RJ);q|A6WJ< ~)$K`ONU^o=^(YHʫ2+ðv41'/Z|:1ຕ^G_>ƖRtOe S~;ŵUaI#z}%:ݠN )3.cd~" C:W>g$b.1 yY ;`Ρ7NQp B@۽zvX]~On_޲>= #3~ͿŅj?<<2=Զ k6E6E 2٥J||փ5so`^fPFe#UZkfV Yަ F147SZYM&B^ JJ1@^סXaQ~8c.Ll+.Q?[̵, + |25`р` x 4GǙ_ ?@~ -^B\Ul F턊s[ J[Tp'fߣ@(O0׮%97ŎI> zds5d4QBNl'}rY x`\Ofby%Nzz8ief,J@$GȝtS!)<޵Gg5/f̱U- { &١o0ac{y3jJhH7ҏM$\-XѵIWbW] ‘JOs;`(=XĂ 1kވ=G4^*9{Tsg6z_qY'Wsfw!y'5PRi3^qoD'pP>eogvEB {ysb`kćT76ZYx1wG1WoJ7Go6 yNa<;Rِ>_͐78m_id%̎89>&B9wBF"/nJ~Xf1DxJ܁Z[#[2߱B)S>ĩ F/apPjv cAx,`$q@-p _4:_Ϧ빻x_8~Vx~ؽwȡc 4[V+wEǺ"c͔K5S h lڌ/Kcm\ ME)Au0.P"FC{'D[\,.-ǻt+tMmB̌e>nDJ /a VN)u.9%|2{f֪ ͌ i/n:M *B؉nTÿcr#Ʋ&}RtW=s&ɜ0reչ: ڸ9ޜ l7 Ic# ;xR׃ɷq)wIif cD-Mvn)!q͐0lS3SDd.WoݪT;_Qs&n x𶖐"y,w Q 9M&֙~r9v% &FPF.½e*ك"$ fXa`b'̮>6~16S@1h-]3qQ0{&SfE 8+2֘,N}.DZē ؘ>>oqw%\kfMc \->f ,F]fZB oGϽ9";/C#дZ ۥG^!hTLHՏ)l%IRꤷn,"1r >[ZӕDN >l/l?N@dcGrK߉kߠp8 @jb$sbBi, PO*E %MkF^G͡I[gIQFX_"Vg Ldž'i,zy"kZBk4QW_QtTpmN[[3Z^βF!.}0sb} Nн0-Ct<' 7.kv5ɐLtPBq$İLV>)q(?ɨY_s &0}iGzt9IU qZ \ Tp6d- &."] L8a5>z{Vmffq ;ΚB"|K#ɐMG ymy,0a=W{K0Yurb"/L5$߆Q抜0܋̱Z'8MhQ mL9B97Dx( KFs kb C$Nt3&VNiReU=R&%nzHڗt)&µ 4DyدP9v 1 73Yvb^"$)85|(4Rd0y3|<2Bp;A|?h4o- )a^8DuΚ;3GY%qݪ]2ɜ .#[ꯢ^ut0kz\Cu>%N0vҟGE[۵1 8Cե)sޕV' χr _. ~#d%0~0@_ryZc!jTVe<攛 i 25s|dىϗ;` N&/ƕ4V$=<=*@BGG;>j4ǏF9ؾ+hNgGs&tƟ25N.äJr1C7SiE.hbnx#k<*D;Et#wҕA}ocVx3L=LzFϣKib#6 ug˚t]HI]*~$mR)~g~ߥ#§/쓰0oY_w38S]zbs<} |t0U Io^ {/>u!ay0[m2fӘg%x.S͎xZ}Zɉ4jT$ظ=1W'[|釽Z;-#7k\Gȕԉdim?SX_c#p ѤŧTXx3NGme?s']ԯ>}|4xu-h:iOlv|ӝѝY18tSV'|\̂~*[6pt-+NoU(}].y|t+ ꥣ%^R @,2()68X݂+\Փ wdQUG >(f˗tY݌6z&d-oz*^rZAZ6V !꾍)O1t*%oΩ\&1P Sfu@ 6t_}QC?ZtH[©(-JZt)9گmWYl7?g#Wi~ㄾLHRte.uL20$"P.Ťa/0_@Hf7#.ed0 L\,1KYHuc`\4&42^I]z (Р{:R.!FT7a[2E;tHIDCqb4U;ĎQA`_=E8dї$`0DI3H$zbTIº!EYeaT_>3j{pN!~6y_[wGuZtCIqp2 W.ɴGVsĴ?yGdhCϕ"*ˌ7#5t' &oǛ xE9s%CԿ`7hh  Rh}(cpr"4Vg` -2yhQ,2 ʦ^KODleVd.gmSlB'0Z78L+Tp=Kܚ݈*Sljudi=5 RLN` 3d/ x޸m%t!K ! ,3 g b?>~LWot1\@]Eb{x7߇:N2$`+]O^Ky<5BNgb]}><$zAJ1L*+H).At $7V&+?uoNWISŃ qڿY|pJY:[4+oŶ_BN0u~Z0~0<#׎4"K4ڰ_ x>/fr4*GLn"\ . ^`˒{<f%=hb7P0gvxx܀9ZMh|ܛ&nдG4Vh??xJZ{x;k5P`FxEPQ|xC#MqM[&'&+.6Kut'}?ۙd}Ϩd5y K#`2y=XNxܼ8 @DH*٧z_Lt޶!1wB ǎ෸DN *W陵\r(ON~CG܉G,c{fVz޻yuQ48/쓞_O@ll4Bz|F"(ÝA)τ*Uuy'j3>f)?h@,Y?:g{o2t ?gRN>1Z]}1s~?C2iWÝ Oxi!l YmxAGP0& 2WGK0_2M޻rQc߭e$>^Ӽrբ=`D/VO&֣a)On슟F]t뷕xu5tQsF5W|K!o.|6T!"#p3WRR{x0?)uv/&9ۣ-etUK n:f}}uzC6cwݽߚ|a|m<`>P_>wMwrIOk ^\'db2iM0*+Ѝ<%A#>Kz!WA/{ #*&X)paK~Μ\1`T;ױ9cH0׭0Bf"85n陆.z[缃xa1i8ńz\ONƗ, \//a+e ^V;i YOZXBz vXԏcARpܐAMZW%[䀣cf 8Z*Lj'T}N׌_ؚؖǥ N}duMlsn^m6덭\'mt!&q#tSSN'{/~m"> 6ag1z5aK,#oE% N)-AN,"B^<کy=~;l}?0ŝm;ƊȂb- CDoEGA8+m+w<>`vCқwS#I+BFj!o˼Jy}\o7OAK퀆N #k^Xz׾y/\5 9*3RVc$ES0lZ_6R6y1[isOGOe%_$1{[Ivqģ>PBV8cDp+K;N;7p0q,H# =81Vr\b gc'2i. LeHoE{o:_/>~hq8#~]uk5 5n>$jQdT~W߮dV4w)'TE( uCeЦd^c8eU7謉ɣ0;#I[.kUD pLU0uR]:.h&N/ߥ@G?ńGR&Y fك'kv0`% 8vR~ cְa.2mPv͵Drs;8MdgK[bΚIq9 0 z'jnHwwܶ3Xk,Kb ֥#?zl\7 x@>>GEne{Y-Jpj&c=Ea@q6d ~3xe'o$%fZI*Ҡԓ0V( !<zbUsuq f>xjq=kv3 Lbd4xy{^K]7pP\qpU2"uvgMp!`7&ŝb)g@k/jc|9)!z4ۗx!JNjMyu߇ۣo$k-J7lRqb`T͇zPxYUUT>AdͦZJK*է#Dx5Q-Vq&2`N_*-(]W ]!>W])2Ecҍ︝ǹ>f(lJ"@F},=T)W]C YV+c5E|rV沓#ŏk#ƎIFXg2ɕ.֢D7c 71,zoIT ɡp*#<”^?7n; }`ԃ";Ba[SZCu%ཇp\a>? Hm e~P߼}3`&L+F.|Y$n0r93q3'#uSPg|m?0A^(I]HһKcvz?%5@75|\VS*x`nA/]y2@On}+e<3qaz0C 80 KcyA{_2'iuDΞM߉BZa W^ۺYg/~̌W%D=v~uQnƁ QX/LaV-Ʌqf<'eQP{!3T&@AOգj4J#glwMK>Ze'FdPol%k0fφ*<`o(wXIc n%{`]ǰck`ysfnx"Nhq+\WŘ!nح'vJC\7vf1WԦľ"41B֙.?d%3M|̶ 8e٨!, !/AC7F@NeEsF j5wᇇ"Gέ\~Ckjcw}1Es3~ʫM,pf~֒vANsK>ߚo;(ӎ(';vaݩ#RY\S7ż{NC'\='ޛ/41cD́$kVs 9apz5;wcm &c!f!KJaYtWCT!ɤOVRԶ7VMޒ$1J>An"egϯ24黶 `D:^ݤenE8U0cWu*wc RI I+mD9זa"pPT\a1I7zi֬4/"7zb0)/c4Zpd;US^譗$6ɔP?ɿ4 EYjm{g'PY7V·lbbl5o'-r;6Ї~5B4to)qjBΘs^fqlyل6ŃkŚi[_Ok23 0>7?yQބPl s]±jؠ([)rta^j>w>GdMi#m|RI}ŕv=;3s~zk˯#̽ Úӝ|-*י}_ki?bZx梘/ 0Z!+N{{Η&-MnH$Cĩ7-OEARRMpqV}}oH~ntF`K7U^9 ,,1]|WE 3s6w'_=󼕾*B;hR觲,14,qzO)Q/~l} v3` |DFs ;@*|VU p$P*r-sv{pwFC.hI Ä YJK5IQb#ɛDW@ c)+=НoٹؼGҷF GuZ z8Vgmڼ?s7$Ѕ9]hJwy˅ghW#j6ٽ2-˜6_*@#v˹ '#5=Yعƀ6i:߮ {_֬Y +W:[=ȖL)l ץ]w wke)^ɝL9, P1K.FkP|.nU'؁;etлLX׵Rr;ץ(yHMd8m3C*W!#@q~>BypUGa^) CUS–d?_[Xn!b1/qzq-O!脎#C>fZMcHw((W288I%N Hz Ǹ93FoEh^̲ d-i5=UݘxE˳((cjKȌD)k?P=Fb/RbpLڠ:UZR ٮ#NS]3R|_녁ӳ&^Dx+!GZ8-6J v'7gEk9&>L)$M>>Ujw))RX ;ׄclV] *sԴ[[ Lg髝״Xf6/-U!PMڗ &'.M/RS n7‘`YhoWO5EMJ-JZ67gK Z1- uG„XOPHbOe]VeqC"5cj~?Ѯ\8PSɳy?,gOW,Es`4`FKPu=z`q7귽NAp>X ~יp`^ _z &ĩ\v?F}k1;di>@C`i3]FٻLgzU|^v@ud8J3BZ&౵ZiߝƊ,T}K~% ǥ}j@>+c"Ӽ~ǯ!?*#pnc1=9w4scUm|#u.Βilf],rR03H֗Zsݪ\O>E_acj E$ x9fw k %k7>2}NxDQ \kKl_ö'SWu^Ni`@0oj_S6h/8cmzA?- wbm1q9A-5{;^n>F1jTAoGv MrG^[M4OQ}q{dA6c81#O{i6h_` '_) .ᣘxGPҲaw3o,m'jBo_~dI_re)g֯ G?kX9 6E祩qi\̳5+|g`Gd;C?^d ͹YZo|GZk2}NsD q\ڍb2f* q߃/ < (ff Eg`J]6e̤o[d6n.~h SjS27zF+m`7 ɛFf}tvDq5EKpW: 72VQ2xEF^# rh*PP8 7E}^mw)ȕBNX ڞڎͭ &!KG*8l3q]7ߴ=ޓ"6rsp9,x._wIړ~^HiN ځ2 sR[AKKreGDLj]0D<ʔN8*Ja&qtg=G,pL""Ⲋv~2RΔ5f""*Q44>1Xɷ@ռl\ovkk(< YU'Kζ`Y.`Mj+_%QQLI6\LbrBuomX[ßy<&nh'«\1AĝޛeZ`n 7W\W3j2Q؎x8lc vY9dVP+,oa,݉Õjg'·X#V~%9ӝ޼O!OyJ2ܓG*8=7=lM]~YgtVoǗYSAXz7ˠ !*w{|8]O_̍? Y&)j•uWwA]lYo_}Wr~DB,kzP ڜb89=9t6CӔoЁU8y<UFquۏ,lHkoby `%J5=m&mvQZqM){I캅"2Vw굀%K]ռHs[a4s ?s᥍ӆgmiD-! FfMKMl1-@9E37蚢Ƙ8[&tBCgp;(M`ߺt2N@ڶeF~mG(;1`bwJ65]R۹:#ɲ?jƶ2 ;R~|: .(7V)6K݉N'IMA*m\MΈ8LS7Jae|VR%ev\i_c`@`jIf;|6ۺ]߾T;6E5}42ѥki7b5bBJ=-W7$| Y"(.TDzFD6{R\Xۍ53f5[Ze(45;~>X:9Ɯ"cj#oiM3#4#^Qd;hFj'0m3+\>_aФy02? SD'(RIcʱ%j! e(xʄͽ1r8-ܟL8ۣ궚~vǁ'sHҡlWxkڟ"nDr9sAA/ o~Խ_ewܷucpzƃwa_qR"ş|`7h.o|N,WxpL~Fln-SɏRo(+)׌ek>H)1=AqXPLu+\;h9_V{sQBN/d^詈:bF<I?2Z'lQF|V~̆ݾOpjkuS@w\LOUak9㺭nDn$T%xdRXDZl; BV<s@KL%޾{e:lAt/M掳gh.l.'(={?:y}$6.nk0lvM#+eCО⭤$۳s#r%\9An ^,kIKf%)*>Ym2>H`b#ou0Sf#H΋wU\ٖZs/D%v9s7ɮȥppאhBGcF(ӘRrgY#k"[Ͱ&B_tp-No-t`:M6e<Zݿј孆\i!Ԇ.XwY -2qdR;9]184qW(|ɡXw1uV_h!_^SkvZ W3J`C:- `\Æ@zԡ ^jݚrҽ$^;g1_2x`dQr'Blmj-&4HvUY2,'mL m̥1FUF FCد:٧ .7v -ENnvH9{ʉTR*6Ƨdp"uudn f߿f(BXäewXbfo*(RL{CAEl~rj'~]j;D a+7ua%Lܣr30t9kvs"h5>c_?csCXn.Y}wN&酣Uf LGOiK|}z*B[hJ㫢ù'rfEh96[c_ 7Hb?(*{Py\\ō^?bٚN{] K Rt挛t҅0Jݩݧx`zAHhR %9;0x ɝ8pRO%`A➲IbF`d: _/A`[g@ @ O{SPE|zv u̷%0]\_^I2-D`zA4)Фu{Mc\|8|L&2&Sڛ7J">S!3WN7Rxkz8괿l&|D6ptPrlP؜CSKΞ# MYIg Mk" ނ53cp6's 6'#{)ImK1߾z{X0ru )q-z*emG[:dzQ1ugx|Q񉂮&'aYgV2'렼 ߫ &.uB_kƅ|J&qgcpٙ׹ZusY>Trk^=U~ydc'<_}}nlY]_&g '[l$.?ܙWLz/߿ב 0Ětqi~u  to+zע=_0%z~9{$S𞘖fsȞ\NC]ѩץd}eU^S{wVN~uzTe_Gl)?$^R}Бa UxIB0?Ys+;$7:uiⵇ#IO| Y1T3svd 1~js `Luj{gXTD>H4_<׻|?*Nb}#/zB}g̃zlC^ hT)ĩNR3oR^ez}ɯ7LS]zX@f@Ʒâ+Cwt N7B>Ó[ )/T`Q7ڍ񡘿tN0/o27J;1Ȳ3H* @ZՕ=,(w#HkW 2i8OԦO;Q_ʖXHwo`\:H)ktЉ/!l3]RqTGҰNx\4<b@e`VMAM#~&h_',=Ht5 n${[TvCXn=IM$՝5\пt m4高V؝\KӘ.4 N-t.q\ҭj6x;t0kl_F:&fڏX}AA{ԫsi̋A{Z jϫpC &ù,x9mn~Ln%dzv}th 9>]3\~͈3$u#gBH~vT,?8,(hzCw7]xP2F-" }!W}D/fKy]f W +VK(߭ 1ψG7]&YVqXjx9Ww?2[+HܚB[:g3\pb,y謄&nq W3LXv/LH>^Mʼ|^gK>6 ߙL.p Paw}PD‘C˞p\B oc8"Ϭ?7g~H8ٻ\ٻ|^(q\^ sٕ4X-Q\yPD"#.F:O'ӡٻ녶?H}zSrdsx }*5@)$ =vHٌehp@{C7"8S[Qʸ99v!Qi(mC㦵|9;99 ́z+Q[84&2pu5K_-֣|?rV(U {') Иkb8_[n@"0 IYáTI2 #/ ֫J\}8s] 12\V4>y.\ĤWLQ8%Si* sa1N~rFq0j#N&$ۅeNi*)&o{uzGڮw{(Ct0TU؏NGf^~޿5L\NחuߞoQg@^dl Ykz~f]H$oɗl'Ux?)Y^CD]B[m511tM#uK^!71 /ŘaW٧(YNާxУjDn#t3U4={~ш=-5zտ'ډn"?j^%odBg<_Mr@جXdnJa֘-쵝/mv˟p7| _w!d@.#ds\Cvk"/rRG ].̅GVv{g c>W+t zac[MKJj.7LI4n3zTJA4 Gզx_\$%Kl--eGkn~jOŜS65G@<  `qA> ?8˒ O%Ƌ}[ 6xj=OVQ:'7!o&x"đKrMw$iJްtSՄ"gЀm>if[\ճz>΂Tx@[<^/ X1O]]Ǝȃ$XGm,4G+*#h$}2\{ ~aط]N[PWqn2 \-,N QZ P;}Cxw ek١|¦P;p=O3o*O]I7rO7X`@r8! b58>KjlcBd7DFcۋK5&?Bz'ל]XqW&zq2zw˕]+RI'31Jaw) iŽ4|[=:9Yq mZk[-k)'k#H9ЙM pgOs 2s*A%୉)ߞ6~8h0 ʍN3̖O.tMh A)8~;-Z8VYFy.v_K*@?!J_Q`Y~=:T%-^&zQ1`6CgId3FWf{Cka-ph^.Y8'\~Pk.f4kmh @<_QFD.(4n:VJRAEDBtH.|Jb9K|b,!bDO&AӳD@UaA&7y/U)55G>gYI.26bOsgO@&Wލ~~ᄲRSܙnE\zzbRxqt9fo{T&EG()nF{d:#>UUӣ.3oAewX4u7ȧVf@jB%f#%'R j mdXa-g|J艁mۋ访M\qמ6tlrp`_*]4ճI{^ )!žP?eNs cŽn+un9Fe1Dn:A5TƓ)OVOg#kp8R φBbCk6e/LmYIAI>au C뭁^N{hj,kEog'T_aM''mʱ*[Ċg |JPҍ4"t}"_SEQ˽Y؀?O&[{.|le,~:0/WQy`C;ɻۈn mZ3nz̅7кc 5H1JQNov%T1/eymY]N+)na<[CXN =\hz#wqs:=ѷ{wmOQ{nA`1> _{EЮAogƂ=,Grܜ}•I\-fmqbpİdz5PFM/6jWTls1js$70DJ Δ8z&G-.И#wI[+ޠ-@ g2E&4 )L_6j52ˋV}-m FPEEu}IuIH% ӱ}^RaÇ[i헴qݘ8#4\/n sț]pKU#&7$è~gOnyMe|xU㟧0o aZ4@8$AWV<a^ - ,2lvMKCQ;2IvhDߍSuSvt7w(MI %`A|Eԃ-\޹.d G{uRFo{,, ~am7wt." XK2 3<'V*{ѠGxT{cӰW m/W 4Oj3 vb[\7b2-L;KW+͡O|X6u#zVH IʛxV`*VkLyuW^mh >-d8A>)2 cn`p;R^0ّǩ叩!dvz7] W'W"UfBn} /,Y}at#<8tl.@t%* zÍDf4O-5\"X#k%;ݟ=`7WտrBtrCГݵ0JBOd=K$}<7&92,(S9ud8, 4N-H !3rs6'4ˎiP-ư>0޹sz~ ,Pr5l3v0q]j8.brWj '@%\r ,5.Ѹ<]qVi&RPOXqRW'tipˏ_ ͍bвiuR}{؝@;+F/V^o=#t~uBhrbV4*Esmo녶'zTo6+^(s~=1zYjUF>S5^6abDT=<@QIੂ) !=RJR,s2le-$>QS[G HpBX9&6?5wzoaBrFog@Hc?2{L^SIlyWG-0鯧}Z(1{"y?Nz}'Akbeؽ~X&~ar3?껴@4*|訫RyKQElo縣+0b~u0߁3Ue*p~?/XTHL|F-!l>@_o&(N[|UT>+}mO@= 8+#w*gd4d5@lycU/RZ~?O>\mgHҡ~BFi 8e Z*cj-Dnl#/(靈*im l5XN7;Ʊl>C?O)Ō0ټ;Y'hzwuijݯOƷ_})m1#5n]JBHOGW:'{jW 6vJgz]T_&NI!JƋ++3`6 [o)A ^LUo" Hj0#f+m(k˰e[bmi#-mBX5CL$felfda&ȹ$6xmvSup8[Hs(KrS=c43-o* kt#ESb$%8pmj3!@%Ņ=gY2y;~(zQw-ϗYu hn6Ÿɏ0pͺشwӳ.аCNFB1,;YwO}XG}̸༙fTdmX܍E˸OfKk/}̠͝}Ig# ݤj ~*\M䓄)ߩyyKg)&TlZb B08Y(1ɡ(fMy/Au18J2o7z7No_9{Vxa@,0 5AKAU0`JSaZw-aռDӆ;%Db4xw3zM^HRN4>˿ I<" + &ydy' nNfa <h14(@1 F?_=->7`wzw[f7iDS+.Es2Eqm=-X7l+1VX,N9URtrZAT0S6ьހDYGS}j(,[cdgȜҺ_U/E&ydYiplgx)y36c?kZ֝eiHw4~!é)4F,/4d1zY;7jRFY♲8w^FyGm "6N!Cxs{ʩY4A m(%ce$O!%:Z;x7{r[o_ Ow:&WI#AjbqΈVv؋|Ĭ%z|jt0ෑė|/~ 8_MNL]˟c^;uM2_^lz,xb帺r8&R⍽lwp%:ho'A/uXbE +%ĤQ76Oް} *su esڛkǣ/s;߰| J[[H7dµz}ggRi2XRjj `Sm,0ʆ߼1Ym}9`Y4{k_x "RC$suHzfo*r<6b|$Kk<,mv5$mF;B.1%cxM<.6ߕ>g *%#ZhdSmtAbk;uDAH>N{[}ٸR/XxF As띲dDiOpz &B/b;fgx=MJܿ\E${j!#du]l[Jͬ8=.n\.l|Rv:ʄ|`ilwڝ @T s c< %J,l<%כ W܉2{Q;%|?͐Jk䎨8{Ԏ:8H= $vE Q8,ėbz ܯ$c[6EpC3GI(Y -t{JC"Y_:Ey1 Ǯa{õ:8$F=zXs(_.dLW":BءI:m60dTƒ,dN̶w -65W.bPk@xdoLgI.- &8cAaOt]261R .#GhY5CIM_!y69W?~|bsI]`s?G85) :AWP72tyϦ/ ۛAЅ, •ŝr'k4P_q ~7l95lj Vnfp.;ɶ ~j 9 .zө&,_vbc=ݏ;)&5d<ǃ:b\l(J=<5:,N p>~RzR`Խx2\,BK|K-.-i؞D~" %;j6]|N.dAzIŀUC^ޢcds]9&[xSE]Cqv-5Vya0s'O3_@4P}S7* bXh_|nAnRՏtYnݮ9~OOb:W\oz?U{FW Y<".at Ekj:]V"!E$-W,OÌZsL) u?znXkaqtk$C{g^$.#%a"P,ֵK[Rs&qxd~~b93ZDs苯2%ք .ɐǽ8}˾X,/p6}Y> oLUee1"*U@彴a&MPp#| U٥܄/.Y;/Q בUA#+v%s W$Bo~9pWx h%*6|1Nڷ YjR^U!G֭OF }e,-nB$H˻W4{y,cHAxvk$$HdYfeUgs /w{Q tFmgڤ,t&1)u*i[7%!0?|5b/у2'ge?N+UAQuzhј`D6\0U%bQl!p'u[vwš$yNM_XtRctv~օrڦZnDF;qe,SY=eW:DC +լobrUzn]x"Ѿ?dJXD9 u"k;;^~"[bYe7N.5 BFUpY[*\j_q l۠r[L;nN-!p'oW&ckd=A̤~)\wlnZs`qq]'I4jՔvIƉ7JhLMl|( 6蜝֯n6w:va Pso5ܚ (Cjq*DȆ7缾9a5%;;ZJ}XX-1׹poOq+`΃5] 5͛Lr}Ȃ#!|._S?KMc``k-ߘx=|ƭ1#r҈8R#q-McU+mufjJۤȭ'~p6jm?agJRU HȜNoGUk(\$8a-/wWԃErc ;Ex4ji?V+u012jD;!<7;B6rbfZ78^^^̃N|K*QD7;!Bt/V7@|LĻ43HHG(~+| .s5?ߠ{{`ol5S>dwT٥KDyЩj+c}^Ϯ_$҉u{[qcz[ ;L޼; Dhk%-Aq ~zAӟ?wu iLL'elAm.тTߔ˩ vEjA 8c aq=\q7F,HbhADzS6u,1mT`yY 1C< Qo^8ma @~w*6֠X{GN"~\4Tq2xq! X1WWC[؋96qM߳{Y+o6/\j=kp[1%\\ĥ32LTM; b[ܱܾ4L5w;ya^+4Pz@~xYo;l>4݃]|.Hyzbck:mҁGP$QN$#)糅yȆsV'wQc OYRol~}Ϩoe9#2+׫^q[8sw>YkТF5A0~i$Ip|T+bo-t5st& !2dWRo P -&W~;;6@= L(Wl,vؙ05],g\_Aɱʆ{*1r;ܘ\oXɮA*[*VboAPQع"ht:E!}J#H(8 5犕駗r=yo# skr3&E2ݟ `62^[! a iK&pݾO/@Aۇy|slޑ܎Vu<2Ӷ}zż{7(j?^7#rU}߹Jm)(a,"硽$)wR"d R>4W`֧FЎ%ƐM+~"Q3y<\JܽX<lPd,/w;b7B- "::Ӗ3|ok߅f1MdEGUu_j>T ߃nG|a z36ǰ~ܟ5V'Sx6vڭ5[ǧAwcKb'(?b޵,pbO:ttwsϧ7M,G6X 7e+ bZW+N_+dl[k~. \Hx}!!WB:br 4:Miן u"G|]--Iҋ=F2roKEcOQƄe467㈬n-3f.ҼZxR\hHo# y@lo]H=;,(xhUbsaBN!0Q$_󭅥D֤xJb20n_m_Mf_5Xu?@l`chaz !Zf' VX7x|w[-~nXq"ʄ{Zȵl,U;V V~J.ZK>F5e ="ݔ[|s1r;){:b.x׀!=2٭'XA.4g5E@/oVNS3Mu|^|O6[d^(XTGH1= w lnK4g|x9eߝc2]%~W=!Ä7g|+͈E |HLv|Hr4*)Q7p91o9Hg)툀&%%zњ5A2gi)8 vxcv%]gBH%ۑ(B%RCgvL?[&ql=}rFGFv f-})(7Y -Aüssխ˩sOؙeD#lpla`҂Z^¢ͳ;{g\9s|}w;#"ʋa?X ^ET:%h?8(8;;Fӕ)+VhIFujoX7ߞB򪭬nU\a ǻmz-6"FȘ؄dVh~iu)r25ʈt8PLOMj|^֏+bށP|i'[z)G4:<8)>慾7'3@gFW]R( L+r1܃B4 ;LZd11e ]fd3|ղNXalkzSk/=o&r(޵ev 0,bKQV?;@ۣ J!lA7ƑUA)HV{810F"2=hm}9B}\f(K']c\x'(|v 3,Qߏ!1xPv#<4E+erM DBR2<*;{E<կ;p!#-=B洟ņKL\$M=zҍ@ȑ$8M79^IIo'Kc*NM<%6_YWߌ<(^)\$7Zc5DipQ]|4j0FBnNu;Dh r9Hjttvt>Wxm׬ϑHS![Myxb\v#4&Y֓co|…;bCw3cZL4ZXD׷]js ^:ICu\d[r5+kv& pSFv6XMN޾r )~{1ڿXJкB6ͫ8/06ӭUR9+GBk|/1[1ay8 DdZƎ-B΂-]uXZNH#厱A/j~͔T$v!u[zX@ӣk}o//j;4.J'qXӱ.r5ّ^V p;w?Q vd6F[SY/y+r?}q;ל";t$,_"돖#%jG:D?E(!TrlKbA7*qZdHArt'/>*?#W \Yozwf$UKԾk}M(py=`/bM$ȵ ҧL* eCSߗc36"3RmE3I>.n6t/]=TYSq}8&|^rxRb\n@:P-c2_ΆU֮[ow`>B_zaZNQZQA;煹8Nc'Vж(-9DHeM$9 ^'YUfTPK#t}XiN`C-q+oN!S*fW46H<8@65t]$m`Cc>$uYwPi,Dsgy}ằOm+:h(M Cg%ALyt;U&N,_+ be3%dՓRNjJډDص{;ASjfda2E9rG9Ӗ͆h4P/§x8$C\Y]!J^a魅2=mI9/ƺK0!/|fI|Ub(5`0FCA TuXŠd`{X{GAp2P\Kt:cA(ck,F,*k0a,hj<3YD`0at/ 8ZMܝxM)υ;|.ܣ-UPrcDxBZ.#W)4ۑ QfqqK lf,K+{ >[S| T5 ):T8HKAf"I]lC gBwu?34Ƀ,?>v6],')Ra^/ jm"ҭOTI{fG*wǺnMpj- /l&cHT2z^Z@_d1 }gd^)nIV@H7CZ .) eᎃjz~!)doN}pڕ6l;C;nɲ/ `,hmc籉~s'܊[L5:!b[˃7joNr!fعpaɹ S%؇5VMZLr 8b Hb:^]hf|tf {F`gٷvrnRkph ٥z;c$&$N\d޶RH fkwu lp/X-ixq"`pșmzL}r/<`.D< kֿu#,rpa3@{ |w%|L^vc%]ICJ/-cqRz-ǰjY#B@Jۦ?2MhsY^[!9MyX8/I6~,gu 7NvliL%Evg/˕蛵VcnΑ=,z{+jӲg]ji-`O >%'Eј]ln_RҭDJahԏpT9RQS2ʸ,k"zېur0D'v~?C*ѩIbwCRcڢ[ 51Y7%=SuC"7P.fH~G.#xt*~SrN"8>j1QBbw} 1wR bl P Uѕ#(ũQSŘ='{tΫIqxpu_/UGhfa^nXGw cLz\JA?,^`ZRi+O;m,e0^I@ǁjE@} lCOD`D;}f,f kذn#2zʃ3aՙ1X a D)ie6ItN<9ń,usVO7O Qw!T;.qkG+E~INk TAy{LaN PvW˞R}g&`E 6%8xb@P{FsFث,kN/v)6L"Bbs0]$ќB9db8[22 Nl_*Z HKd& kHϝӀ\0<Ը~)Lv?d/@r׼ZKc鼭,4QUCO:JU'Ŭb;kdeu׳!V GID9Qvye'SwC ;+R^,Rwv Z5eWH毕ζ C{JKmqɤR{x6I l$4wޤH;>&Fא-ׅ*r@I_X^mGy a;tDۊv*aux?pEd 3Գܰ&y jMw̎J,Z I=^=DX*<3O򮧩3oFʾF+T2⎍I*,R6By]f攗kO2vF{nM֭O\\~nfsujpeS+Wb{pM"1ᘴyz=|*V}.޿pW~ k2!(ߙ(8[Y~tON$.jX yܷfڬHiҔ rɌzeqnr'i[L7cycYŲЃ7},~HHFX~p9l_nseA]=wZcx_й(.|8 )g3L]pfF&Q(€y[w{IN|o6 yq5N>?::exH1A&mBv fMXc<(KMn*qr:_^9 Mt{ȓW q IHмqyO{ r7gԫR1 3)bvzIg#= .qM 7o- z2Y[ pård^~o bwWZ{ޤ%9pp{6 rRNW* /|@B\|;xϤIKѮwtl #lqXx?v' =rfpln3TZ+e7>Q٥ {2dC`7ť%p鶙NSS\>4jLsn޽X?\8~ P@}+! QN";lLࢼ?W& 02@d6g], ؗ Pm|Dδ沏%Ln:8y;}oudE~d̻U  2Ƹϔٹx rڔPDsG" ƀ{%t7Sُђ%(CsG11@ 9ɥˡ/p4u@ɢZ]'PU©% X=BH *㌹(ܘ*l5Ƒj-clDҬ`&ךlwD׫Y1Ӥq3<0j3b ñ*.CLKaaUSfMY\i]bgB峬Q:&D}(nV8Bk$-}=%fnh0cĎ%3,|?<(Kv!`Ob-y$ ~ '25<(fԅ҇u2jdcyOR͟g:>)P~n@[,gd~zidN0>Z`uum8zߦi[YXK̀:|_K`P-}ص1@,R^aeprF@"&1VLl33S\ WC0`@:P"`Ӕhnк|N,'Y)wY(I_\E59A *%{c]Bx:t/N0Pi!^D_D4h&2Z7.wu_$xGEx::WU~r7﫩:,`|F !E,}odb_זimTY#U˾HύE8Fؓ!h $o1us UBrF zi\C'm&Sb0 Pp N[@ezHĆV޼6P]վlN[f.qod#Yӑ[=}8_QBG򞼐\Ё^H׋.>@-bnTw :Zۍ/Fj>]jnwos_H)OZثl!?pTW&Mo(f_:D˿ankG 7Bc&Vx5*\]Xޭ6=4_oJ2uMSI=@[njZpc^g\Ŋ-/ .?\_S-6ܦʹ*f E-1(F$.6pᎷ{ ]IԂR X$!/PqrMQn}m#QAcfZw 34rDaGMY*\ifArqSka)0ľa y^)C}1&9oa5u?c$iG= l&fˣ~vwt5EYf(`5 h;, Jc\|q^`{kE:wH$z @t跒R&bTr;/ Ƚ%Z ]38%Ux&?jsD?dOND~q} 1SہwMNyȱﴲov/zlDRw;v+'y z\$OlJk33}x'6cɶ"MWc2Hd#]\vL? ^љo+bnK&v?܆n?G&f 9z[US󃍻7%/ܭFy1wan_W`IPI,vy>{*m) jƍ\u(rqDNk- $ႈNifo`}^׭HsfX&)Cd59Uq3}RMW5/-(fj 9Ϩ= V % (mwd$~K!>k0jpb@~NGT;̠ɺ[4[.R1 eY󩘭HqaC fÔd90F,x֬]1$]lk ,.5+]o+aLIƮCn65&_'6"HLd%<&@,h]3Ecvj5nyb򼿍GAN:T'E@\;f 8|Y)~F~vj|kj,\ƭhDSciˑQ%|'p9{?p0 zC#Eо酵?PoVPRB"a[qA$S;hhVE)M O؉0Y8ip.|E_6 %X?ޖt-<]Jʆ6M[#>_~llkQP0!/Zh[Fn}0ע$,9ƒ)p| jMO39 ;S?LWseڮ Fs (n7'1 X+B8PpOrԺrk}o/ɡqa͝s-~XZ5y@Z!d9hI @7/EkdPEopq!~v,f+QQ`|C)x/r_Niݷ e)l[_M)W`_Dᾂ-c:U-F^zp;nwcY3=E,48|Ta۶")ҡ"CX~_[rÅ~抆V^DR.v8I&EL` ~.w92(]ψz87yy>3oJ)eKTq#D"v|/O~G/Nݘg9?o%^c@|)gz7fIɵp+uߵwIU'; DC+.vuzlhJF;-Vekd+颇7LBU~J5exY\K=fMyj`s a:ZO2d,^ R%/)_}Q@?r9~(qczI=59nɴY33-`!BNz, *= (y$/B&52+KDaZtQeF%3:2R"LRA8V=:1Yi:Cl76ai1`޽W9rP5Y5"9Q}DՍ˔{( v_уC-[V'Au59rNl'U>ɔI۝讀`Z,tG O#a۫rn?^J́f%q D?=qk}ǞJR`x =̑',t~"6GehB& *\eZ.kY,?}n!d8xؤrQ Kr U *.1ThF{+~)`,ZسۨNo`?ԥj Ȁ!rՄg*/>`'9V#3HfS);K^PǗ&$[,IsC5DB*\R5O43d:潨j^Vpqle9+z{1ڬPWO?>8nzlᧁڠ] 5QbΗ5R模.5Uqh_@dzaInɽ.yΡxg/,_G|t 3ACCi}i[\1?]ߙ? íqLU_w9&'Ϥ^5ИŠ¸@bQ 0P߼}eQ7{ Cc{H®_Jtxh gR4W?wNUF mÚ,H\b \OvsZɵPIbxo: CS0 b g51ݭn?X"OȹQ%p ^I}S|- FI|}L{Ytjqɲѿbb8>8ʠѢb8W{q@aQ:ZGQQZo q#ZMQڲc&7\q#?4ow Msn:9'm+66ɝad?4ʐZj43!Y^s~ui@+oIqQCC _iJhe eyx 'Ck ᵡQ}({W0.K/>4UzGb%4߲&~a'Np4ȅBy9/l2{ҦvC {$CC]1yPOcA]Z"C nÏs5ڋaCO$n)"Ip{p-Ń2uX4 A2խ[]L_RFp *"fТ[w4dub)$ӕH|j@{M:GV0n31Y<` rY*J fU⹄Dcfr!RXmŊ\VA**lpB7D@6t)l.Xn4+l]5hp-gv۟^,bHi)M4MEuF =DbM?-`8jUyd}>p@6/{&@ؤgEz.٨-y~#pѨ0 O aRm+WNbA`Ҟ/{y'_mkSr(3o<(Yba,ޟx@P- Qo픟댺.66hl,y_ʃEҖdy(g:rdv҃Tn.Nf99G7K)hyճ"4E2AnE<О}du" G[lh eMg(H[٪8z7@Oy LhE {u*RDlC;)W$E{D؏vƄxC;Q\e m9j;{T̠$uztF7@(arɾjR0vi -8shCXlͽ2]̿*k, #?:XX@H<הạaLqQ(J̼hwN`~FI#?ZhUSchl7c}4W`fb(4+A WA]tx%`~<>E-k(M{L>sj¡_bS,if0[ 1KT[:IFu`?3ȵQcjgTD:#e٩*9z]n #K,%@B;5tgj!cGiܗ3ͥ ipgQ:?+?͵IL;妖[ N ,B Eڮ˿|}fA4rʼnOdh]6PN4 vMy{b Y=d2@¸y8ī}uOhamvYiɜodW|BʅO=kx2K-x'or=2"w=NbN@0+}˄{=F 39/LV:F!"roʻ7K0b=\*|ǐGBou/w7ǣGLbf>d`R[~Ҽp#@_dAB#A(1пZuWv[rHMB!#>TO(?&"H@!>ys͍\$4EtQ+[=n( l㒎K-C@m0 l@8dnQzsA Erbsbg"rtV ?#5 13 VSn=ݗ*df "AER\hPr!O 6[н'f@<8)@T|N@0%>E<{?:~1,2q~4c׺mF2u")>]nxuj + -=NO8d8Ix6pd^i~9 Uu+:e)+1=sewǵZ<[7i9#J?glC.gX\3;A{{F,xO3}:Ѯ.6TAK9/8=nZg?\F'V%baTջ? _ oaYblטʎu,b3YL01Xlfe,HPf"菤8pTXBL^` lf\Cv5s2ӷ(Q,H4zcD]AY BRw=aG@G`Z|\ ӚAYLGފiet۞?Yz ċd'Xn[6@PsbRr-@.6aEE5[SlުY$]kinӜ%p,'{5vxA`X&amCCz݃$|)Qt^\v 5vTr,Ü OcJFBqeqX#3]jw728*7l%]:frܰzW P8l4Z%Q֘ptZ9\9{ fq9KM=C Ծ{l B&qEI (p&rƈ`~l֌[V鋅g皺5]wmJ3H"W s[SIAoq9Ɍ| Bxj7;A1ŘܾDi=v+hrV%餐͈՟c:'zB}*xf0!xL?⬎Gu]pz}ɝ`o&{dN 5$2rz"D-_>d];OQFC5Ze뺩F^(zsG_D0+or0;+ dׅµ$1] 1դy*7hL/2vGÕ؝+) }/ó&r[n0Ѭ%\N`Cr|:7F QzSX`.dA4^`4 TjTU.ώ, $b9wμqBa/Y841@%U hq _%YIRS&sqV;L<}w7))WJiŤzN,)ϡ+I0„@Dk^ }@TSz+~ Eלg~tNA!a Q2`%.ݔR嘏_4uTHph0`zWg㮻+d^SGY2k,yX_Mڛ zMtƪcዬ^_oF j@uc+<RU<(9]7 Q\ ͊ȥ?s?Dבtw͝⳹Qݠ!NTs(ol=ȕ`h0c'r]\] 4si}MspݷGo jb['oGbGbK|j.aK ]3R|Hjۭ`cU=>WhGޑPtQ56+ ؊n%C_jgd#4v&9;PwfړI ddǙ @/Ȃn@̎ߌ8g 58u ||{mi \tdq|X4CZiFaCiQgt@,5Du3A̵=Mt8dh+XPAW}`6Z]ɡyxʢ_ K*!g yE}3ɍa@+ϝ+czrt{7Wp$O3^?r䛣\ Pፂ-O9|<$.۴`8H|Ίd Z~{4zSj3/n0.ƫ}sâPrT=g0`i]vu<6?"4Uόm-drj-% ! &r Eۼ̇l_%;DOm. ^VGI M|庮mu&u؞ sMYWoƣLsf Э/o?{+&Ve ApGC0dp LBKVdbnkm>Sد-y)p~UɓzŒ2_{֯@AdesLA)OcJw\`'%K,Q`>\+R~@L##;8]nx 0c-Y4?h$pj(xUq>uOߙv]!v9b]gjS tӥmɷM q<RvCĠ _Ho }-Mv$1xޝx Dh;ˤb}}:6 ^&g9dev5ehWM|rOlES@p3@oլ9㄁, ~JwJh—.ЭR9) Q~7I^t"U<:FI\&KN& 9bC5ib Q&)v# 9T}(Oxe"z!`9;Tz跾#roff̥a0zQk-i/(fp#x5~"ٲ}E6*$j;`@Nm% Hƒn.r ;d AͪzqI/>;7x:.3qSZK " Zql"p HЗ.q`>w^$O`[ \dS[&p>R(/0FDur!m$ "a HtD|Eзs\N"%6Gd\]W|9s(fjEUkr4Jf>z@%]'楧K׈;,+ҏ]ZEi<ΣRܐL#kyNٵ/ fI#1ӌܲ$ !:bHTC[? pvj8={Մt boO b]u{w!tvacpNqwt?D Blz 'o+ɝ nR؄4!ӱ y@v邑.%[x=cΎ{5תR3LSW>B[0(xh2^@:GkLk .A R e,Sb;=sA:ap9ɝ9r/ը#[#8+",3oà+hE?AzUՀUPyRQ[m"R?̼RgKỴB0} Zvjh)֝\(0Kv9p,k8NJ%?zy' 뿉+"5:uSZ[%r=DN2Cb;|~7*.,w\mZA1C4-ws:,h;gZ_"36'J,l:EG8-4˰%0(rH|"t|RC}x倫^\<:D72r߼mh [MkHx|ɏVcnk|~U@0K[`x?E ɕcj!Z` \pKNSo*dXYvC.m*|ΈG| Zrsdz2MbwHAF'q1,k;t-u]qjYۖ_>6Y$kyU$Y:1OZ;ZuL?Z,~eޠ0❢Ud?.^<]mtX'Gv,R0sFU,c-LEE_j';6H.sC N5 }QBoq3͞d+0\C@Kwn^khj~^]/--*pIZ8q8m1h؋$QXBKv{QC)D <`HAGn'1w5;k@.=zv]t^E/_e81o7 ߮ق8?SdRFL)Q|4VjJ0ټt R zZxn ?]ٸ $y:Oa2Wddպ<&D9KH-" V"3Y4gKR;A? WRa Ӎrp'>dFw%K>ow֦I}2lm †J 굹U܋u)lҏl΍io];jwVcO\>!vc^`+0H-ңe* CLaU QSq)1Dyv~;\=% $(%|#N<ngSo10fg  +hy7$:w` oG/0Ǻ?JFʴ0!fSV ctYa .T#j7Ũٕڒ/ϿdoEV^wk@p uu?bM'~fggXc[Jήh i br]?x@|GzFtbXۉ*F7{A_RõO.=&%KfK"Una4DUR{K>{wme,/yue/auX<(쐳p1F6ž*4Ih}tIO/b@x08KVZ=oS0bVO?qW'OԿzDt vm>j noh .^ NBa?QoafB||鉈C-d[F8y&붏aj;?E^ &k /S8 dڸc> 9ì~Ej1ζsɜ^0ki:]Xm0]ɳΙ8c}`'IKp*TkNz(LpNGo,pd#ʣl(fQ2f+X&?; hWǏأ>ڊ_ԻWɮ0 yÉ;cafi#tG1Mt<[80fI@yZER{yygh/ϝnVFvX]z|\G{YR_h^ [dxve$U\Dq^_۷qDbdM 6L@?xu] RG}h1_crey VٝȨ64LmTYjc7cצ)7lOz&Fb({W'3tsv|/wQ? *Pc!bQ~h@Y%?pa2qE>1WB~SB!~q&=knRC@r4̄^xW13|p(Gbs?Z!cr N%RY%cT-@oqq)XlV^UlƱν[)}oz*r镭qEySqފCX%ZJwNj ms˘R7'/isIj|?vIi/s22P4o4? Ii V1~^EFh?3EL+#*G԰mA*4kuz'E n4[9bV@.] %󵝟.motvMjq/]9hnoz"-=mW5w)цias+[KX 9cDqp)lGCqpf_o"5s0%Xo+H%Ɉثx=cS>2.#BZQ5 \9ߧW Tph+`W|=ٽ>5aO,E߻msQfRQ{\"͂ML,%qQSˤpI꒽iNu(졷3+kL 3#Ťc;M哷Th_r{}pW G}0U_ڏ`ڱ[GՒU9cv: WP:&z|CMK|x?)v=fv wnr+6fۃo`e}˼N懟ETQkfhC醑Yn‹IU'q}Shk'tf]i ;YӫItS)Q^+˜⩷$Φtl\`:D5h^>O>[= M.!vum>ܻ'WAepedRBjq:*G+%Tɝ.DΚ 6:¶.RhՐiY(5277 OL~泑`u9m"*&Wq`: 믘tl6 !3b@-#$$HeK"B MY4\5tVثB-笎s,5b Ozb6ڱFj]V .\w0")Эwγc6֪/Ǟ=梗!G܈ C Ι݃![nqVca06+*%G۵Dx)ػCX2t*ΰovVkstyWdxl"inl|F{ `hm>w >qs &&~ G9*: \9<q8(JO>\bܛ^䪅n La>|5[N1mUB͢Xw/ѐ}G[h?9Eu/yg2 hPiQ5|7WoQ׾<#d@ >g`^OqȈ@2| fY]^v?Н,fz4N}h/9][pFO$xg$$:`k PaVp %[˔+t1n}A)g>QMbxH-f4Ϲ@=}6:fLxmEwݫf^r\ wW~<^ڱ]ڕw1&gv"2s^&IhJ*]5~dOpȃϠ3 cЃdvP URtMzKMM7*e?X,6{]49-;G0YB;O/~cw86 ̏W{~pH|prF.Ҙߐ@s{)ADLҺ8h j mh9Z aM-t9 , B|kO)UF@!@gXdsn4[q_!ZONKCVr|z.f zܨ[DėAk٦+h\LS1lA4!Nr̸8"7ym:&  ԑXuHfଯ+䳮 v#j8z Ucr8-É|F1#Ai~Est)Q?蹑 vIn'ӾxO5 S R|:Ck<;G۝IU0GޢEABi<\奯w([U&VO旄`NH #gY1 'k` G.~Q=*uT!FZjtFh gp +j {1rst/=K&!+ G+E۞e!t:i3xqGދZ(9 D)O.8$d!pdʒ4<5~ALAOm3a2Q!.&ƒ̊LqD{Witgͻi#GzVtV>A3G=0 GD(_?<=>sM0O1NoΎ9E9p9D)aˋGt9^YkD]JG| Y&3bNGj@H3:c&_=g[F2P*:EJ$v)W kmrJZ^yxqd[D$0,F͍-=" 8v?\eXLC{s19. /j49€I^PL*z.S*׀W o<¢PKo{dxe};CGLg$oVjwe}ꧾُ1)!]2 l2Y/6ˣIMeOht)D6 FMr]D]a2#HU;p-ZwӐ֢u˗~ a\|(g`P!J?0K}=+gl>yp^<}?|:X'ƕdr W {n]f[XZ qV-<ڽ;𴛥w)m`̑RqS8'-K@o3}a{eUUZji#i OŞA5,F-h #".󽇙73~Z~|nr%g̤l^oxIk ~G7XHې@4# M>'eטe4z1W_@f!9TD>qB-ݼXI;*FՋ0C?!657ODIuzTW'mO#@)u_ Xdi V(W,U\*z.bHǩ F%oAG)?M5B/B@V_gB(PE@HGC@+,Zjڹ۽Yh~棭7Hr}AeYsR"*1J1NkZx0 P虍{܂9eYW *ds-ph-7S 5vN~?sfHȝznw=|1r1JJP6 s{Jn )rbbj_ 0ȅot.D"u|EC"˾G@9r/&yŸF[lt )P Ym7B97kT,i$ ׺:@WjbbD]a^[;!iN\U܇:Ҥ!@C £răΊ+0j տC]퍉;UnUoD^ Bn-,;jI×I,FYD|PzOS6S aUy7 ܲ?c\s6X<^{mg>,7NXML q1Rg-OQoQCHbJ0,B)fJ?B<Q*F; p9OG^ӕ{3=։U [hc;K\:V6O$Gb^Z&WsG|_H|TE|[TH\]4ޟkxVt=e^d </o G 1sOefܧ;ӗ}G?=B~^e݈6.0[2sGav7)pLwE(.Sm7(Xx͞NwF(` }@V"C(4H@{I.JBQzQ?޿&Uu G=fk2^G?EQ*;x$xK~{<Jzw%Fol5w?=@%**s򒀇F):l ]¡^k_p&fg;kF֚;Ȗs% ))"#Dnv5cӮQE"=c"gDX2n#jq(GO,)QG_3Yua:F/+&4+H Mf7Šv艶zy<~]81r+Z#{{ViÐ?>1veg5h7pzC}r/ol:m{ @"ݶ΂I$:k!K_!z_?3{R0} 6@NWc${ YD?ڏ\aI"4˕pl:-7NK@PiNAtq'9. aSI݌%ᰮ{gX4 }l{ i/;QU/fZ)a)cS!Qp:⢿$ gmxZ8{ tzvh,cz>0G.Zb( 9Z^!b{:v.FP5] -,IQg$a ̀Dm\ɀ Lҩ0\6J AXu8sJ paMmhfT$H舽>}e`c`"-np2kRN2!H|qV;rlMFqýe0Zw2ŀIZh!' RxgH2~n]{f6qz!@!*M~̼^5-ZmoB!l~)9RAe1e)r9`SEmgyzq`L%VԷ?dAY_dnϬhf¿T]{cxBcWby)[`~@ƶDŽ< =v9i4LLQkP xI`|I0[`X/PruǝE,Ħδx{1^,{%Slr .!t3?+EQV ƕ~%TD:5NSlHD#B\z[*oj7^&6njHFIՀ#(d{d($|tWB2=A ~޼}#ѿmƯ= $4\6L_X/Vp FRhTym]V^aSazrNl㛖ET&$r-αԺ˭SuDE*-gWM]rOۡ=p9]r KEk`!a`(h- [[8Mɚbj(ȅoZyDP̀'$>8#Qk0 |'[%;2CuR$Heڮ\ zq9 p!\w.D|NF]2= ĭ笑G.AR(o˝x|V? qc#ª7K%LΓőXyw 24R>o_Ƨ(ow{ltm8a]۵%w|{'Vd[JL}ey,,["2tB-RxH$CS w@}㭬2=n f.Rn5M_1 wXRdqiE` m]@Xӓl>wF]s6\1 O\F;i 0>/NMTޝ2*S)lP=U;4@R!zF}̫e܏n#ul8&_dTAq4ެqhhmً \:7G: 2Fd#8y (l._~B/ sHG}uݪx9bbN[j 0ޙjK?qg#BʤY5"CN 4 äwl p;zwu0Q x 9`,n /Uno޶q )x<=!FY|@<%w7b)/3'Iׁ2͵R9^ۿ% -Y9aVXsj$Fboà[eҦX$*|m 9v#yOyZl;pX؃.ky?t<)] Z\t<=xjܸ<:&r>xt )%AӰN#6 7A_vh@ẍtzqJ_ 3+u~#97Qi< mĽi$;'gQ.]gY*艅^ JX17UkcPus܊5FoC`+l'〾*2z$xcQné/rw#Mr JU ݺ,W$%>CpW \k3k[uDM+ʿ?~ @譈<@KAb9 ?5ɚz 2|"xfQ&祘6~%)4ɧZfs=MLw8B 镾~rZz%} 5sN6:SZz_RH7_o.n3+e GuaU'{c+ZaS\Ұ}ּ3-+31t~6[yM4a. 1aW`?K"@86Ї?@ 4ǕMyL72@~VHX)d?*`ҒMSc<S`nTf@f<-`l#Tudu竬JEM\CSwg!rC Djθv`iuءj " /4\ZʒtՅ A,0ia8u|7k O.\HB<XBW:Sѽp2\$]Hm]]ޮ^-\,:f/JkGx<=XcS5O܍7^]%B AĬoyv\oavSJ`o MI;{& cFTqhv~|/AeB^em< 2$,H+[;py]X4T7VǐHii${ ̺9@zm*wn̻gМʜjN3w$p)"^j$su#ŭM3? b+b7U˶2t*2;3C_ϯ ˾_m"|zsW'ƪ:`$֫6PN+<ſ?=."^ ҺM@zC$*̌d#4x9[>4%̚&D#MlOp1{2<}Z\,q獉.a{G gt7>fWݳ)kB$AvNjIrG[za Oņ7٘4VjtdBLC{=nt3(B-Mji~*˼KŬ{ q*jh/׎}9;Y80zuv,K0Pŀ6fZMCh K_zES3t,%G |yI;YiWCdh}0lƣM}(k'=}8npvXCQn0x1RCpFR69lH2uyPcWɬM%HѕZ[&SW4|8O2 a8fzKE1dְgx^/[fK5v^z1R靌'F!\Ēx*ڲOgQ^i=i [ރb܅ݎZ/ce1=|ɒ[N.6jBȼ-4o)SVc2^`}Hh+V;&bc;z?F cGVˇrr5%ON96 $5$Z`lW\5.z"S *deP#BZG-R~J$ﯪχT*8- eҎC)q.fakX/PgOСʗ#tS#ϧ繶 F-#>Cox*wy tzEr 3BT4.w׵-~+1/tPD8Vyz+n]V ~D vG$l-KZJ~7?O^ZD&7;Euԍ_-z./ :7srf|=~LkKs- L116$a@ Iz永HhL|i>ļhL+ыv S$37?_WG\vgrߤ#1 #Ggmnn}YѫhV5m{Oo/U*/| c3bH耑d)$)|1X73cG`2=br= 1ν ΥG[K9Azp؞_N6`"4S|iErh,d.rIzm{LxrpPq/krt@wk٘$v 3M%qj4Q/yyp^^\?摢˾c T!Ǐ=poBzS ݺzǰP \?l~ ~ ]'1=處s-GQ#M jGt/|?L1(%5'D`*V_YvCā s+-ۨC2].Pͤk#UBZ_ 7.M|Jwkl>\92_$M?-sofX=R=/!:赊Ke1NW;l _ /XeD솎ބׁx4CL0d>hI^(0LJ0C=‹!-_M6x"9):M 7'f}Ɠ`խ0xuB6a1wr A?Jg *4.m69vMnj:w(><}ߪvqpmNC(1تN(Dm2lڏۻ{=> }vbڥk`ʮDN'RI߸I:Qs 2r]XhK l+GENd5R@ѝ,WvuSi7.!=1˥pMV&XLJH8]rv$h%[&-wt=g\lKtXqT]Y1 ܒk[房۝nnX"=EmVAEFħZO/MwT9:= Ƴd7W-Ɓ4W}8WfED+r,Co=Jwʝmec?8~^/שּׂDa˸GT=zo4̞?2o^tZ82YH-|{| >Ʋfkx.#{ (IGW444om?`|'+{ۗ|TDw$d~(ZJtv˃'X*xvxR_[7>$C ܴnUwtj&/<Uc ҭ=qL2ns:Uyu&6HP_sw94)'of3m W26,<AwF2$Yfȏ,n&dbvLLFS.d:LF)!Pa2]pmSs7ZR::G Cෟ1bH&w|+;M.G0o3߅k._] u 3Re(֐"Jؘ9uwz4e^-| Ǔ%4!EM<:B!+tmW>wy~U=Ryuϓ||e:?JNfQMd2n0Vf<&.z8 yZ= xfV\mCQ&*c['MqvL\3@mnR|VYc0*[ N-)EQ\J;@{{r>uYC 9E`vZ_#ױHy(Su;޾mNDV׈ƃ0H4YfSG$'Y&ģNM)"boO_EUv;<X'eeح9GݣN-L+6O,^n?e<Hƪ`=ωsO:{{X5=tqTN OucܸqaPbC*9g0@6ah"ߙBhǰ8P78={e}eK7-lv)> bux[@R[ m)Z[Ma1#B#7t^ ]b۾:Z{.2tj\=*{Ђ=|BF-7#F\&9|; X^NԨm0;i&q7rR| `zQi`(8ŷX "$aFNVaPO{&C1! v%TAߋO672C';qH+xuo5cӉb7Y/!6Y?tʪq_Jt:>̟3ˮ2UF,'zG4Yu O4V"#PBŰoZ˅ꂓ ֮쟶/$9I}RFOWӨ>T{.?Odn0ÈFb 7Ф+?GV?.ߑ1$oG"?jЉdU߅Eʉ%D?|Ӫ[/YM -l>FQ`} VQ'ou.l; ڞ*1IB<^G9*a.S2N?5 אcm} B {Y8zXGJG+GQ/*t%KG6^3i õ,k iEqwyqݲ;O1:Dܴء1a=Γ`D(N<-(Bx&i \^04H'㿃.b&ۡgY݄Q Y 0 ՌH FR>w_A=QV`3JU&u9@A-)Rί|8Ԗ*֜e4*8^yzxb?O6^R({9@!&O^^ ]Hy4'ɿ Y&{?zx` XQv(N*w׳#OJh$X,& m@)+HJy8C.VyH{G @TlܐL*cg[+rNIŗp{fS[jLOv򪐖{|jk.I+d %uSΜzzزs .s:1 VA8HdiRZջ@VBI_~QfN!2-.+ZCIHne71nax !tJ׵tܢA3;YϦ9 (%i>2FR28yW' #O]@̱>'ϣ'g(c^J*-JpRq|rR䠁9e+<:U E'eca ?f[؅ ,;86O OI1"h4SHYU\4|t{n龄enpg^;$=H!lnlu7=sha ;Wwk~aW0:UljrOP!7&Yƍ^o_gԈVMz%JjDfcA:3P` B/]ݮ&C&3v6UcD` :a+X3В"Cua׆a%rK 'LNC~x8Z(*]#5y) Kp`(wː2%Tz 'J`S<.ьC8Vyctjܟd 6^2KBT 囸AxTc|ͽG\x?ܩ+EuG9q徛nOU::H<$J=09@#F%y3IZ+ШJ-npn~u9+I@ձ7#8įH2:ǧt}WHAIkisoX`:?:=?Bׯ㉅\Ʒ)bDF Vj&{;8î0ͦr!Tqgq*D,/7S _e@!y}or3p2::v T>|;VP^ k^cٜBFy7/S27[t]tK8D6MRgK{Vb}1A6$;$'jͻ~/zƤyy7?^7G%l M̧@Nh;LI𦓎kL1[/:u1PYBF/籏DH]0"\"% mz@qw9zTʘ+zυk`; [vY(Xc/7P­!P&r"cC#M?MڿDtZ_tKos8e sRMFI'\ꊈڦ\%쌗G1U;M\ޟJnÃp*^1ך\qQx6ZX8{gStc8-ܾݡT?us=uH®lw--^ 冷 OPݷ!d&ba5({Gecoma9X pҏc&[ tzu )!/`(el@PCQV*Xҙ%ԭy@ &Y/ چq>80PIbQGI0 pփ ݛa< e鋬ӌ8Ysʥo:KGwx9.^ZT1oKv -gals }£Ja6Jj=]'2)lk:x˛1IlW$t(Ur2}*K7Ľ񫹔N(eÏ@%E-6~h0yvA7aA]>QH )h,;9sjJxn{ۡL/a/CLL&d^>.. Ƴɰ!1ՉDVڷ_Q jNWH R =LGe? 1S]j?;d$VH2!hBt>k k9UKzGʝW} PaD6qu:z \k"j`&I *=uR@O} ]k XHbM#t}t;a*'e ."FJ~ *xjՠkhY90@54.d}) LF3nclF- ȧ{3sv`zÇIXn4)ϗ ~A%U(#\g0%5}~!Rٻp?4HE̓); Oi#o^2)nLa(=;çtk<R2pڠg=yD::&q7X֮`7 6nBs%kQ^!&S"c>f߼g$ۦgAd_0P6".Zб*[9?X^[AΓFLE"sœI+/{ а< bg=t:1vz| OG1tfOr8G#wo= 3IZbwR,>މyS<mmRcn#oSE2N3b;p9{Bo 3,Rs˷N:`,HJD/5)qOq:U:3hG񕫱UpOܵ-F7峈Ye+;cOi\?LǙ%X%7n׀]0piD`gVS?.d vd%`n8f~f) -*<{X_樫N#b"M_9Rɹ]xFԴY՚G@N>&;"Dո-c x>ljK H&,c+و=/teSaW3Ob,ٚq7 ;#^z@zb{*[r/j+!Mg̲iN 82r?ݤ#Rڵxh!b<"ʯ/~·Kh}ڪٽ奏}r}/?H}b-9^Bjlowu+",Mc\(f@iQUbv"d4K͡w&"I:;[Z[=mO{yZZ$d}ȆK,6;hs%;~debyW.ۀ:Bke{d!+t`izwKwPmZ2'TFP/_eL̃-)RK`@| (;htT(^`س:гi7c$!]l>^D^{Ns5uٞpbFGGsk UgmTx)Bs6 7Y !N Um u gISDl۴뛋4 $o-^8ӈ:rTfh_pj%Ӏ.z[S^pɴ=͈:=;cN>ؼx8F<  |SI8BI$(x2_bO-cw@3 =X:XXt$Nj`*p:;. >018KY#kC qH(5g@(5Q?WGc4P)= :ƈ}d.>hE%y8MIۡzK4/P$)p" _"}zHΫk8jXdH^jvMG?ErW~=z?Fȝ`N&rsO+o 4b#/Vg\w ..\._->[P[FYB[朼x>=~}:N|{3=`f 3&HϢK F$SqתҩBlʰnt-\%`',C~.}w*m.v(Zc.%Lx ةVi%YpOv= 0)to!݂8MR?Zvd\(H}ԜϢ&CfbvO]?1awϼwpN~*B"uPKH{ro ڇG\njpO"buP11ŏB\9gP'an>Q}iZ=7( a)>cN2˛0ql7Ԩ7^6=EUϞ5-qA 3"O$/GhfأM#qwLUMd|z7XlJ^RvH %EgYwFvΩzwFUT'#GM43]j8:ϞC)_ELNXH+'QsLj!7"nCRF*4r .~{`Q")rJZKڌ vK$Y|]Tse<?pAmS_: $P41H?*msij_d–L: gR>0{oRK붼ՒAG?KZH&>&FDdFҮۤ;YQĎЏޛN:,Oh}ܨDGy6qL2\4eF? //y`mB, SL2zO/YEwOa܃w= "˛?9wSj]-Y7*J E(.p=E"zPqZ`_mU$~S!ݞƖζ"&b9,%+ˋ. 2hSbտWK񻧷;s#We!/]Y:[KzdЬ9\NB_H+C*9/Gؑ,;+_Y CXs zpp'd&qG6|rH*|M6at;o-|wT)s*"zC`Gr\ ]jѴ}Tt4UaLhȾǜms%)ҧGJkVqI)m8GSٛnxYaK`\FN \"hy9{%EMp8a# 7vr@ajEenˌNO4G0͝(Y,e m'6.qݗu⇱t<:j_/g׾? QVnpX=m$cf{V%MۮB%BTkd;ǁ_ّM2 \D\ȴ Q` 1(E,ju'4;s fsn5$|"%fD߿CQ{8-W "Cioیz:xU 7B~5i.&<] }a-hi6&ᡐm1*^]lpB̼ pOll.[HY ÒI 큨5"0=RGJ2>g۱ueӷ屑zDDhT}AQBDԫz6\C'wxw^Y6i髣DnH%J/kn1ζ[׏]>y+d֔דg9mx6ݼ;wko~,g2, $, \?T,™G[Of?88ǩt0h\GW NZZ 7 ǹrwY_Vvך}k?ފ }6#H*.fNkq2-+[F"C4>,ѿc<lXzC;RG<-W:E2;?cm ywInEE ,NDI 9dS?ؑ>]s:`g=Z-KrIw ڃSgowh$Nzf/ye%QkH2)x XRkMw"1MUj3{cȂ2ךŸز2<K횿gXExX_:nLWn3pvqa=B̙Pe_DU3|xC0ņkv](% }'6VEGրS[SI- h~uݶ0HЃ>##_"u+ctqHM`PHPv+C.oX$.t?k`Qa'ɛMb{^ CV6"ʨ8QrU|IxRL,tOQlf࠘C0@gj}f9|U$˱k[\71I.ٴ< J_:_R/ҿ _O}6{!'~: lj*UU wƜ(lnd\_'H\,)&rjAtOCwNaM\$gs{ bjh.Z`|]Pɂ~{xdWi>OpzoHZ]z)Q\qBSH^v> >~ՙ ziToPmxa$dshѰ 4a HN]xR ~:ym68͘ 0)"jvEJ:\:ѺfN Kf9ۍ ,!+'_̞MtZ+2/dzsB_SlAVpv:Y?ͣ@{t]ʯXYF:b͗#)u?uLZ(Ln'4%<5QO؈]m/'1a$Rwa@+w^{q')^: a#rcr#2g1& M9v<@fܜ,! AW F\qN^rXښzo)29tt 2 p|v: >(2C vg=:FڍMESK5 }JrWS롭:纺H #_×%0I\%vnk? ݁mgd\Ƿ#86@Z,ZH~ns 6lfv""|>n[Auepad{V\{皌=C=.^:N-&t0~IW\1(]=}!kvg7?yEHbbXӀ:DADŽF}s$Tu-K /Rf)j1һtxz5o? gDR٘ndiRjSLyֿ }#B&.mK)Xz\!VY767JL;_CϜ7}6Uhs/|iٟF'tJjB/`w3y"\'++|)"sJW>=u^A}p'jOx2)6g7UZ iY;J\}Ym Ma|- r/v `* D/'J?*#bލzBo<FKZ2qP1)YSRjg/N3'lcqa*$9 :hY%%*g}l#_`iepGjU||/:ӏ^ˊzB{ʝ t|Hߋkȕ/wG V7|vC kVkSǘ)1 B$謍hNTZTVzbnĠC›[?˥~$t5 ȓەv.49zMorwFv2I2$YW q!yCu..blp ӂڦg P[Ď"HfIi{/8-5] u%+$ ;pu0v?QNzf#?loILG*}mJ*2k}R65/xz8)fpnf0dn(7oDF YSht=z$@Q wgϭq.@v rrsyJE:΍ a }G6DHD_ϩh!R _w FRkrߋ^-Dw%$2'g'r腙1 J:t‘_Rh|^Z3'ҤA vJFqU{ҝS[~bWL\ l&>eC*cton%kD4=-VްrRL,,T?'+#5O-*Ϝ;7(o/BaFR+CzxR3np~ᒼR4r V-1%Ġ0IzL2xDqYNw0 z@rXY(ȿg AyfV>8bkMfyˢ].>5$%t9̶gkI'X.NMoxyR}4/^3 &sJѶ`Tr\`zye#Cc𥕝rT<, W$vvƬd{uaX~(X̭FA ;56.9YnQb4'yjM`U4WɈH09y3*FN]'dQH&`>ց>L)+F&/ ܒ2`|Sjf[,1}5t^k*pMTAfTm uY$6; ' %ș!ZJtC}Q% }.:uW-ߢ"ʀ,.6b}bj)' WM3eJe@5v/mrë+2^|GpARFAt8%OM +֚S 1l91\@Iֺa?BH\V1ơ+3lSѫ;O79 BuGN/Yl+M][OY~3TޟjDm#iݛNl-ѡ~,}xXghkU!woJ 4lK+;aZ<^Ɩu"FuxtprS!+F0D8Q6Ґ*|vC_+ՋpE*AK)MJؽ7$۶F\)Z|ՄEXkn]L? 'b@ ;x+ړDk~Dڅ Z9NOUr|XhB"/l8=N&,#֊.(XhN꬘bmu>5tUت+*v#R!p>44*+03|uw"O>L9nrNhl G7usR +Һ}.8擋O3]I3g `WBh#q.7{WRa%)xK&ImJ2;4fRJ*7W=WGVz| Ӂxq UB$ğ!Nt@_)c"qгָ]PqۯGR/VcKZЏ ͦ=ﺹlQ1_!}qckaPF|^a+FyucgbrK7.t2tzi(lqcl#k(fm=-1H2指A`wSNWàXz-bD#-+odPuL*UcZ"7svըHPwc* Qrt;X,U:IG|g6 CܾI;' #sLJ~!4{ja&x i,*B qMՙEDGG.,!nb>*&½ز2Z_RQ ZQ#g|˨Ӎ0,HWa>ڽZS iO0܄v,kʴ0a(/ {GkW7b&7kO|53N_+ӫRѼT5^0pl|e"T?ÿJ>: nJZ^WN{UcOq"SF 0_T/ޏ++Vt}ggtϗ}%_‹JDxe޾@iQ%vKwUjQC;y9~~P,#*-C- o)#UUvdSH*ڇi$T%ƒȦ* NXе(X-M{_Mõ3MI LZ$z(q_u&ٝEk9 qTCT\V!7.س3ꉉ^' `B/r>l}$W>r263q%"L]2TS2"%O?}->^ z\*qTl6 D[hWcM 3jf^I?7>\4w?_Ǎ_n\=gi*؜W v{g--rwu6һZ~,%-$xʧVxJx6_GM'}=X81$LOJ1XR`3LCb]ilyY9jd18'dC2sΟ3%*X 5L(sٛr,}Zڍ̮نYm =F~Dg,A쬑[))flᎶό2ulhF?XBvepv_ћ{ueRyM(J6QP>ώ7[1#}9;?nUޟf['9N pFqfttAЎk&6*ekHv#ɩ5m5J\7D] !1S*`"7/ _:Ź?\!3_i "*h5y<(X@df4XQl"}5V´i\z.сuC\lDk% g6-EH[H/zyYOx{'ȳ&"m*̿tMTWPH9TnigyZ8଑gZE$X`zG&7 bDb| z/"n:[ϳf\Ij(Րz {?Lzl3%GǦ+Yw?'*Ɗczs@k!~^}MþRf,|2?n4ؽ/X^ [:N:%PXoõ5fUU@RB!PA;uj4i?Fܹ;T#Q]X) =۪݆V%ugW_g/2/^t$f42V e]ދoVg1] Xmm+P)컩"錼Y!JC#"}P? )+SvAY+ܹK{qQq>1[ ^!ߔӕ0A-xUky}2O|[2տOXdVW&E}ڦ%79Re< m*両jX0M0M/wbo~㺥֟'W2` `bA 8yV9p+.#zlN 2L+%3մxz!UHm?mԢ{)ՙ2(8sJ)FC.SBֻ\K.(., in嶄2{?.,vKc>HծAKoU W#?YtDwI_gS%bZZi=qm0ǃ886>ԼEsreEgWI@kXa7g:iH:0$쳂W _ JE6 6st(ȱH~FV*`4l*SvąeyQrݧBaMH< %G&nM BU q=V$-.[VV/p1lۘo4.zTO*7];yNM.;F)3' Zv;Ovz3 zkz-ھ?nš"7(r#hN^f oG{TjQ<' J5AnLJO]irak2vd4!HT .G3"';7{5,Xޟɮa*;ѩRv'tw)f+hYwd`c_:gܫ٤ę>uNh2ֽ=2Y4rG՛ȷb꿨,q1iAzi4s7R@Ga Zv(uy%0a^!*`,2'j@\w#.\\F+эոY@vY =AND 'zp}c\,@%>E+LdRGl<3DAyôI3gp_sʢS'-1kp`4148z JɺX v >yB8SF|H4]ja\񅿆@0˘{HeHbl'fvIʟi5]d(NBf1'k|O Bk;#Z}Us$rsmPnwkC˧-`"4{ Ó6-u'ɿ*zF3 `^ߜπiZʛqZKPs$L5'*:L?` 4 5k,ɼZv{j'stÑ+j:0 B?7uqyqɐ|cۛj3]*UȮNjfX(;R !;LnoM{!,Fh:RQ*vB#\vQ+Lc0kpaKXmEnm E|s&0 1IH/#'^p8q8s`o,eܝ[p>Ohs:ʧK^ N^D{aq3)%w4!aJl{Ό?s.wI)(Ys^ !LYJxʏ{MUݜ靥>r;3$ Sբ3=Tt,Qo|<Xe}jhh#m_^<Jru2UY/(Iei+aEߺU 1˝tG_xy_&&1b۞NTKA9c&m02UlZm&UͿ3v$+-Dz-~HTRNh:i{U]p1moc1C cL=a}(eBՒ\sL5-$ P ~":D"OSKwXk3.ˈ:Ibܫ'i^tIPLj$oeLf7Uj&2{8v6Wmy/.r|ݯ3L>_wnaIc; F_Jck8:WTk58"sH,[4{[50(i."[Ģ\.ǢO OfiwdᝃƛՁrrmIlǓ:$X:JLZѰV^hXz͇@߿# N&yl/UΡ/!P<-|haRy?ӐQx*"q<\#^6!Pƍ]ԏذ*7uJ#Uf*Ăil< [˞^Yl+8xÞfn e#Tx|â8J,{ce辵?$6HF ~cACe7 \smk{ $nGo)1ů/?['4O=?E? n@L~ d9J&bH-! χ݋hʋ]=TS}9Fͅ2Pp31)GC)1lfQw1zd8,XbFoCc{joˌ`MX'잒'K2n qڡ1୴&5_\nElлO:mnSyӖS3 qMz5fDB:ZNxy oēVız +K# VEL:o5͎4kK.ĠzdtcS) SY)4LʣNv Aw:lOo~(^(gŇ 4䵋44T o~x~\Ϯ, $UfjO%iݾ#d Q8 #i+wDN 9ӛ|`R0 _<-0RVNP5"+ Bɸcj2J|6TxO!m~Y0*dz5ۜ t\~t|G }7ubʬL-u06㶵pHQ0aP(/~{hÃTEXS(/єf̪0k2f6eϥ,jEXJ`T&lDbj CkJ¬pҒk)8o$Y-W8:R$w@<B^Ԗ+ʘ.e<\)5%ZP5PkkH"k&;L1ҒHA\te^Fz֪ 0|cr٘Ⴡ;*YE" h @J%ܑjdV;3. ?Oʑ³"P !q^S`0!_^UW3ԋb ԇJ:y!Ph@8iekt45O<7JՇ,;H4n9zSv,`H97x%5(ЦX/$PGe:"FYc %Kyj[goM'mI(@jdo3r00VQ{~4s?MJ 1NܞvHdl+@s8ԃݟ%Rɓi 6p[/)OD?_uZ_c`#YFMȭ H75~Z:S yT7 ONI"SEݾG̜`OuZLb0ՏDl$AdCHȌѳ㛳Dw+.Ϯ;gf5Lⷑn)Oq0ֲUk/&Հ6B9vvJK@Zb &9Z$`v9 T"r6.1hVml@Oe7YjSfU)^*;zQ5t7a\=$03¬M#בEBŔ06.l7gy~ϭlaR0%CAEYjEV33{3A.q)2b2}ퟴ+J3&h=2$bG]x϶! 0W+\`2?_U(Dl^K̵Ⱦw#ŢYivUb f hW*,TcI v48ۃD:Vmj5V'mpBv2vL[sf}Bp"^olk2&MAZ ? ';'cVI ӔV[o+c\z84(UrۀF @pF p+5nߡNGWu2*Q$B%Ο;M)7 ۄr<zk` 8:) .5Vw1 ˹R"tOL|"k%A9q&̘8Foehl Lt6)0H/<']?*!Ne7CL&S=+ˋ]`h%iUC\= >?grYŞUx_3}k_$nV QV:ZX9F< Ī)FR"H^=n(3iENR&ikc Z$5 {NW>D^}/hK܏ ɮq*p01`t*p#jp+7*S"g UP֧2No.hoMᵉ4yɔ =)aTz3ݷM$o$<( 'VWF unΩ*>.D FCi:߾>&Udgrb<*/ݞ L~%ґ/s-Ո\l Z`+^ꥀn"wYp:l6( K8g%0sPxnchIw7F-]yu-7úh `%qptvܡ I90v6/S!r!F"(>u<}(5?0Ev~dFLiƊ/=ZBEJ5ƂIu<%;EaP쉊#đ=R"%NsE*$ yoq̷؆y.`a9F,*^g#^"mn-HVJO Hv WhPHZr D@`OGӕx\ *]xAieKڈx3yR-_Xc.(/H4N1"&1\C3$H$uL'b΢o\*5x[ ٍ%lm fwahzO3 VE,KUJ Jv7rig]NƕJ.0GR xF.Cэd 0O:[ʝ}WAug}ͭ"=#\1TAm)e{N 2k{E2s>8j ZEFw?;9Y8 ^8s#m[XlY 6 q58AƩwIԌlg3m5 r@) lp='8u:}! w*':iI+H<ϽB=F3PˬPBAٿ |:hqEtQb7Ϣ9 xl}Ux}sL<T2n/z^,xTF/0Q!%77Y2M8I@y7eCqð`r\7> =$$Ax-Ǚ xe =x-H18SGj&S/H~6Qgg=t) =8ŁҪ'y8&J ̮zVᦐrfv_ *1OIWhus7GF| ,~t13s`c@hDrSvb12EnD6K8k-$bΰ@@$PW QTb]fPw+nc8k2"mhFof Y1 +$=@ *`RRYY;A?K9v|nE?gHIKxlťKa@XI-DESL NAaL{ZhX:zhz\%$JTsP kBQ\o?C&(<"ع6Ʊ 3J 0d;NS.΄Iq.D7`l~pw3ꏻ~{OHA$ȱ /G;JYR(ȏM3!4O -7z]ěe(UI1z]iΞsU19jW_87pGOGEoH&B!zdTlziĵ'Mft r6u iDz0mb eH@ѻ}͜Zs}(@FiHvԅPFʳ`ߪӏIR_ӈV|nC'ko0 *dl5zЗ,Qr7^L"7UO#-y/ SHOkja&x=4c=SSw˟/>T*&Irw]vĖt#!.p >)+<>̗fWFAG}xOYU\l. m7/w]:%L+π7MfGnVy@f J~#[N4U2|<~5&7ժu<ɧє &h=7TZNǰ R:Dc0q~C:>|:m3G1*a,"VLyBl3g{;i~# E'R 4cqT<ߐU!z 5B o+-Ɣh1cGE[EO(U_:3&n4Qu8_BR5GQ7z+ZZ3 {K_ .> Ulh0ϱ_݂]W)o)W ˶U2L'z\HUwx# )e|&( jw۽,A6>[ KV4.1S%g?:T)} $c@KTO2`_0Rj1B@:Ξ6zQ;3fb+QXHW1c%պW,x,k\jr&Bj m`H5 <sݴ&aװi 5#y]GZҬ^5~tڢPɏD.uSƭ/E TD8BVhlj Mޤ;~ǯP'T")Tʦ}=I1?kC5h=#Ӈ NsMβ0;Hj+IShֺv`Ο=F H~OgR-9+Ȥw66aOQ)=.>Qԁ')$o/@RP7E&\_e"fS=-MEwSʠ4yc e90|7ۅr/~c*z2BZwI~gUE-ezF6e&W?+.@UkFʯt9'u\*Aޟ|<5 \ATF/ 2O\^=/"RD#Y11C33H(R=!awj<*2[dBWL<9Tz-&,!ox4GA>t*_R-3ʹh/d#٣8LX\N1;c"~{I/\0L礰dc™bijhz&QS61 (#2Nx>ap`l/7p3XKWVgVSf~c" t).Q[Fa>lH߷~?]bXS;›J Œ6 J1M0Vs#o‚bX.=(p8UFl.kOdP(N/L"x]./d(~u"-sJ[kb\ Wb6zR0!h'h86PK{Kdp AXQjesJ|3E{@X9?5E4+KN#]:LJˊ2j#&,U?F&pq8R&˃CYΨ%%$ gYm^6JVw˕EݢQ9Go;"t]a>ngO!Ģd8@H߃s޺9BǾ4Tn6wTڎatcGW?W|Vr 6 ml';U GN&8'Z?Go:ؗa&d[uZ~v?] )W Oc5!'-i-r5K^,_I›&Ùc-r~HWlV"B q\PW.Ns,,r#vS볘$UaGD;P/ GXLK϶bfhˬ<#Ÿ}o~k7$[w#d#o~[> dcxMydD9wP:j2 vMA|bQYh/3<4hY۫I8$YR&Ի {>-n%@Ӊ*Q7;,@ 7UUq{hh^Lp^{zJ LEj*r=^|_<',0F8u+:-&2֠8fH70RoWozfEBs+>NyeBߎ4 5?,EEL¦ź1H\:)ʼ]/3-~V+o|j1|mf\sٲ^ޯDrspqx%p4 J=~Yͻ|t-RvW W =%-Qp\Gwj6>Chm@"X#F .i$݊c kozؼ#Oo,Gm^t/|Hp`rZU[+fҟ%XlaYx /B5 GsJ_|y]]JցPo˧j#[ݡD(ϕQsX8L^>KD ŧm?P.<|c?V%q)f5"&pu͒Mqb'?Ltn1蟮cy6 =,=|z-0DdQ (^x3.PBApciC]&߭;Hi2$=JbrdIC-=tsn͔QJ?( [A/9_$a{kI+jidž&$~a Έ@u~  Chx_}Ѫo[н>$d]"ђg=Zj/rnzJl\:k|eNY&lg?t;?zdܕyb 3y:[3"tH;HK\WL'G~ޖȏ3z%ޣ5ꣂr~rcWH?$̣IDo_ޮ )gYx[χA>jd<+s7U)4VJ-`3f^ |~-W`rj"lRܩ'Bkjɕ;4ŭpRs ISm4l7o_ÿ`8-^.w |л} 2P5@O-kߓAq(As Vn[Hxk#aCű|URް_NŴyyꇏ?Gx2"$/,zW/y'lԶ=mZPn %t$,Cnj~+e[-y8]3EO2@ghG2<')TIL:ov65N6 VZ^7twĽHeSR VdyfMAA@:;(*T./,e͐\_bP驽I$^JF$"GH%GU@Exy 'ͦ3FMjV&½ vmȤ>efgf?\ >(wwQ3:W&mw -Y_Eb!zIY [/qT)ڈoxb?lAֵN\w.I?XU*J2zs!!;̬]Bi;L)(wX\% X~~,LK]-WĆ@7~O<w8\X7&QO?Z# t T|fR0奖 V8іTo?i#:Td} -6GRWK9 *2GZ0x6鏦4j6uُai䈆J=7t7 [LCߖQ~f[kUfWgHzCMȹM$I5 H[ƴoMOQ"[hWy66xqoo  `=GGU}Iel\`&i7?"nsF))'˳j4/c|N%7WA'@D"ss gJ{WWkfC0<2SƲ,acR)i>1cZGD:HAi A td@2QT忎/iNk`΢iQ쇟w߁*CJ{bţ4ed[ed]pu6A?8`bXI|= j0sebl{j}kfZR`o5b7yNj &oz'9}])n\l62ͯg˿]M bq2_?V$WB9!5gbT}68dUʯdx?f'a@5R<";QĭY@ّQv6~zn_w\š Qj'Lhz_yH(:Y'/b!͕HynK]↓@-L}07e|z~f(L_4#nE̲852{&-HT]զĩ|wJw%g`p%>7qxh#%]#A{%>^?ap3/RA?E/AF7PNWJMC׻^xgCv9}o__W/n/_޽WS!!~O{?djH~zFp\^f`iy10{pfDJ(91pd$~{&Ӆ,z9o斉-#hiZ?&`6 VPD!ٌsV ;f.[2/ WGzdCŇ]l45G; _iX BLl$wS_rߓMUwO[Mʚ$ dG !F/&i}X$FEn ;ڄyN/Ta&ar>$ L&y>uD!&67FezKLD]r7oRӁc f~4q0T}+Sʾ6r$k~"Yk*&Gw^HIC63Da 3 R)C\Bd}vIS53ouW>aSN;0o4l'dٽ<{', f/S9`X e9'}˴ o+ؚK Juk5}^^F(&aS*Wc:i?~8xBl5Hbn Z ~e,\[oWXuUx)BtSwPRY?о=k`CkDU]|-QcAY*N|1[gVϥXy9DYaYDM["3Z@4DzQ }yn; Lq1V BR#G Y)HzmS{TDU8/͚".Z_FcW_Tܲ6c]`jU#kОծ}|{WCU3D9Ҋ&LJ5:9)U%7Ͷ>mjk} oGP;EOl@CܾHn訴jbȥ' ?T?%u4M2A&*#BJka?*KgV[ɺ;G`\pbCwC`A7Io<N ?]AjJgaɲw.MD<ϱ1 0w&YNi'ΣZoTLci ܀([AD[3x>Jjg\,"SxL|Ş nKI"l, H@Jaƨbd_ ˚uMzUi֗avZnV8<촧9l~,YJQlY 52]R׎IFWɍ.DOc>#RrzO9y^27WQa哔9H!FiL q^|ǝ/$u"/A,%QG-.pmQʏpmW.ļ}L@'MM zvϥ@lZxi7}_`xx=='9]x?d:>z- S14*986\B}c^ ZKʑ9Ys0CrɽȺsºu/\f*#sWF醴;`HK/xԼJHi~{ %i-(E42DZE !|~4 XooMOzo!ǻZM7i 1*V<زN\1t">(}ؚ,;L @Ŧ=PXf&M-6Xy7ӿ|aOEF%L`q.O_=I %, D䞿"GR 8ӍfgZ8?e @͒>bHfǣ,\poȝ`K[Ib"o47Rq0Oο$˸X9 o`.]ײ=Qyï;k¡eC&2KZ}AOx:g|&mIb9Gs}DńXv*#͐$,J,vj1ZTa\ {}@Z̄Pd%Eȶ f *6ۨmߦhh#'ڏKi_\LKqZ~1T9Vz2KY"oQׂ&yot_F̜"FINTء¯\~R^Y9$!lHTHAϼԇ{֜\no,A/ msj=q72瘳Fb7@,/z:bٹ&ކz~o.8ߩU̸骱UsFR͚Y> bxYbQ(Q'D,~q$[` \ʈXw;cMr094m}bqSP  #_\zО<;4ndv)d!GJIESIlƓy.+ȌwA fgql$( 4NҴevޮ ip~peß+G? "v^]kIXv{$3FſOs'[^Q a}r%NC |]7߹HyZ ~}mt/ gSm҇+W-=i 8.WUrBHBo.w=$fW8#Z3'qR*i !Irқ,"o#D$.N,QV&t'j ?.ǒTҍQ7.G_rA\=@T.q`O'A$O{(Io㫘h#*p_mm#ρ'Y<í*ؽNO`4VZU$7'T^oh<}y-:ŞfpDeU׳Kj iIޥ'XZV œA%:'^`$pKo Ҳr;[KhwrrF?Lf^5[k.?OM]a8?W#ds[y6踞LOH'p@G'z .^ۧ%Ebc{L5 (;QdL8d88 簃~΀FX>x} -ݬ߹n)Zh1T ?6hGximʚ?(.VeT~6] [w:gzh+$vIJJg֣Yq^ ${%VH6scJrLEĝv 0v*L!zt0[,$&ϕ;:s naiI~ÃCJK iБ/,ֶT F{UOe WV\jlՌDjٻ"7cÒ+i M&WMnBÜlU3ު@sXĞ$Br;{D@MP H L!a$6y ːyGL+̥9{(gdw?d:;q &N>rS~ގ\0dbaCZfl əl_(w'oa0ciNe,|I.tfR[ݕjƎ|ђM8s6zCx7cRb@-5<uIqi=*Q,.R3bx^=(9ɈEV> Oh%5W7u\V? /d0 pSgH@! Mj\?)̺h Z 7$H[{X5;Zr}CQw㇊t؄E&#hht#pkYaBa77 wvvtfH=E`!W5"n!TdYAR#Ltki&oYReOo xl QyŤ),̴4zF1*ˍ Q@>-RH% @OY(^:Cs~f|,$,??%Ӏ[%4(X`Ԍ6%|u&v-3QWޟ?OGq­5HFVNml2s)Sļfco5ye֗lQ,e>R M!ZP5Knڱ(x:qf"3Ɍ]у%|@Ѹu7nF^V#|YSca}I 4ץKU ^; ω$D3-(,,&fV^QULj np+5|FHtHJ oxi?hYo>j8_/ {#zMT\1%!.o1ysqL[ihb|h{q6ǘ] vq.[%ֶJ N;x_챓S}1paSX1.Lt]j8qp7+AYk[YbJܻP|eM'= ċdL!i2'y8N 3 SXb2Dw)j"XhtE-cfV7C%/ |*P<->ڮ2$bԚB>O3olmmQN/O> F;'Q ﹙ta9wg|KrY1Ϸ]Dh6"? +ḢGt#52BW2NN;s-iﹹJkR`3!g|[tW&`&H [=?2_$ ]ZNsX{k8uJbGlkzvlwQ2% n@wb9rk،g𠌖6?~+c܎&V*V^a#55(ɘ j 7Fp+=C91/܀pn5@\4 R@~ɷ/?TMKJӪvҶ]eTOg@=^`m9 %W&m0%h%}S3LW/Wgg'(FoZob(:Wf#=NI+M^#h{vXyӻVFtfFf初PsOmdY'>֑?^3K| ҶYkk4 m["nCk*!k= #aʄA4~zҤ W}U>v2i\.ٍ?/b_2ֽaPo?O3r,C:1yw  'ߑ +U[̌h`Ӱb\tݲ!oqw^ҸD"najNTlE*Wnʏc pܫ ,5sҕG߈z>DOܼGƎ1uVPx.޼Gun86͡fKP!) Ǭ^̬!0ŷ(oUc D`B$agwXUK51H^J&l!V.O'r0ݛh?Rx3Z2"+*{PIО9Zu2u|,`+}zV^Pb"e0Gc5faŴ rVSCʑBévMnW 8>j(& 6뾒)ʠ!nnbiN\0b8@?r)DD5V]>{AdvDz_pU@Ǝ? 2qߎHGPev[j=~B>$bq(hyAݢbd~+R>-sYp\AދY1 Yċ.SErxM7ҧBrNDYH0g0i۵ 0:DrvfK* 'v7G^'IVAzf΅ovo~ m ;ޟvq=UFޖJ`4&^DJEE1J>a2 Cn[OWʃ~S\GJvL>dl E NPq쎧sJF%f!\N|?φ]J"g'zX !Wn*7N?[m)Ks^w%@{tHxZQ.X;647CVGL@ku)${d=hDEqm9EZq+$RqPw5eՒɶb§,o݉,?Jm<}/Ā WX|yE9]h&4,jx^#'8X2ݧ*e s'Y")l>"PѼ/ơ<6k٭~&k{wQ@&NM,j2.EYHe(  #|+|+^D9gPdefc\ޱnEM3ou_$Dn@"zM-#J0g`aj wczҟ;c++?r<1Mq"{c׽HEZ&)`A0nϥ!V:cwwo :0e?%E^Ӣ#8Tu^D`66>\B/ˡxY9r*z;oJIo6yЕEVVԢc፡Xa#{oLr4%y!b߼|q&m~?n38q!Oxm2sW\4e馄4?,D2D'х-nZ%XL wnX:4-Y!0(Cvl`G;"ܪq(Z9b Gg"/V^)DždpLdZY?e?!s7ІK^= :J[HOMbfLBE7# 2`(!-a Gqn;JtpueݜKh`WۚPEz+imkd( `ė@3FFXĊz!r iPWK< ʌt CE+N_1Ha&c޾q(\f!S>?lw\⎐Z5,`P7Rqf 0&('౜fVDJwFfldka/f3OՖV7ذwT:EP/e倱mYdR4R +-Ws·~_d> LBchi&FY~$`MCA3sj@rY۝6,`G-iZLGEH,"#T*1tA7"+Ş,ǏcA[Vz""Q6L+Ixefk|$e7AZw֫MvfTRBN<C0m!n=XNPЙ YOe7#KHliD7 * &S!ӼȘ(B^f@osp;;F74&hs b dB;xІEr1P6|7 =$}ԋiuiGqgpM<@u钾b3f,NѬ!Li>}jbT>qT֗#kἤa]|lwEXd\L` ^'޼H ?m .#׻nygcę&Q8#1{Noxz|^/l\'%=xD1jD.rc ,eQrt~4=݃QզPg9R@EV%<^<x2AcpB \c)0NOˊ|i4iGberH/@5۔*tj'u>"ϡ 趬|M7$ ZZ:R&_lW!GmI;|*>D4I<̦pI'}& }8@G=iVQl{V^pb31V98sR{9?~[$@|͍@?|IMD*jKvgsʿ-?]tuEb'ѻS wmC[iӋJnߎJ Z)8$,+;!ݦHQjtG?S, Qld5>jIDATקOB5qN1K<7B.ѝSBkBOItzHn6oCk8k{E8[vDY)RXfR,GXsO$nf4E?.BD0KzpI*ݎ.+aٗw /O`%ڠ9ThkU8B_&&7r*kS/%u{ozĚ&׳{'%I (8]a=jK1C iI7+|i_"m\t-/MG(݀x?r3r-jX|hck:*նF1S+wi`,js'Vf3msv&֨?z[-A)]}Ar[ VL=O|v.2RyD1$l#(;-t#1S*,Z>1]$rT7ӧc.X"ۑ6xF<1rP3i@Yi|nvĐkPL_u-9=dg,V!ٛ99e$#PsqV{՞ nVA:KTB؄cǃfHMoc2+U[,D;2 ,`ȸ>rKatY*˽o Ќ2?\٣:KޖQSz4ras1gZ%w3Q͸Y٣7R]"qk,J_bF] dTXAnD9Z@ 䓀>9[1UCrɘ>7L!P%ASޜٙZ-~` 2.[S/V-U082c:sKW@r7! G> C*aú|s B*uhNDqU¹6 0o7k3ω),QZ삧a6Ve/Yc#[4H,_)t˘[EllEDZϤ]ŨI {"ФIBM⚗/9I#ھr[X[_4XՓ(;Yc{QOԝ5+,4&C$b +UD{+82s6Mh4r6/?ɋ( ﱨ2MմI vI~k:IENDB`libvpx-1.8.2/build_debug/non_greedy_mv_test_files/ref_frame_16x16.txt000066400000000000000000046160371357355204000257300ustar00rootroot00000000000000486,720 214,214,214,213,214,212,213,215,214,214,215,214,214,214,215,214,214,214,214,216,214,214,214,214,216,215,215,214,214,216,217,216,215,214,217,217,218,217,217,215,216,217,215,218,216,216,218,217,218,219,218,216,219,218,218,220,219,219,219,220,222,219,222,222,222,223,222,223,224,223,222,225,223,224,224,223,225,224,224,224,224,224,225,224,226,226,223,224,226,226,226,226,224,224,227,225,226,227,227,226,225,225,224,224,227,227,227,228,227,229,230,228,230,230,228,229,230,230,230,230,229,229,230,230,230,232,232,233,230,230,231,232,233,232,233,233,232,233,233,231,233,233,233,235,234,235,237,238,236,235,236,236,237,237,236,237,236,236,238,239,239,241,241,240,241,240,240,239,240,240,239,241,240,241,242,241,242,242,242,242,242,243,244,244,245,244,244,244,243,243,244,244,244,243,243,244,245,245,243,244,245,246,246,244,244,245,247,247,245,245,244,247,245,245,246,244,245,245,245,245,245,245,245,245,244,245,246,246,247,247,247,246,247,247,247,247,247,247,247,247,245,245,247,247,247,247,246,247,247,247,247,246,246,247,247,247,246,246,247,247,247,247,247,246,246,246,246,246,247,247,247,247,248,247,247,247,247,247,247,247,247,248,248,247,248,246,247,246,247,247,247,247,247,247,249,241,234,247,247,247,250,249,249,252,252,252,251,251,252,252,252,252,252,252,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,253,253,252,252,253,253,250,250,244,251,252,252,253,253,251,251,230,211,171,147,80,131,251,251,252,252,249,242,206,236,252,252,251,251,252,252,252,252,249,243,215,212,212,184,169,186,193,186,179,152,136,129,129,132,141,118,71,35,27,44,76,82,53,44,33,36,68,94,117,144,123,107,77,36,45,21,18,34,48,55,47,57,66,66,62,61,68,68,74,73,71,65,120,214,249,252,249,248,223,184,205,244,251,251,250,250,251,249,249,250,251,252,250,250,238,240,252,251,248,230,195,218,217,183,229,249,245,229,136,177,243,234,143,105,108,67,101,177,193,223,171,92,55,21,48,55,45,91,71,36,36,57,76,63,17,159,250,250,250,250,253,253,251,251,252,252,252,252,252,252,252,252,194,110,29,128,248,226,206,165,176,235,252,252,250,242,247,247,249,249,247,248,247,247,246,247,247,246,247,246,247,247,246,247,246,250,243,226,233,252,240,127,96,63,7,118,238,246,250,250,250,248,219,210,225,212,198,213,226,237,251,246,242,236,182,170,229,246,250,250,248,247,246,244,244,242,243,243,242,244,244,244,244,243,242,241,242,241,241,241,241,241,241,241,239,240,239,239,238,238,239,238,238,237,236,237,238,235,234,235,233,234,233,231,231,230,227,228,229,229,227,228,227,227,227,229,229,227,225,226,223,223,224,222,224,222,223,222,222,230,233,239,237,217,212,225,230,224,223,222,218,217,218,216,214,219,217,216,217,217,215,214,215,217,217,213,214,214,213,214,214,212,210,211,210,208,210,208,208,210,206,209,207,202,205,204,201,201,200,199,200,197,199,195,199,114,5,1,4,7,9,8,10,10,11,11,11,11,214,214,214,213,214,212,213,215,214,214,215,214,214,214,215,214,214,214,214,216,214,214,214,214,216,215,215,214,214,216,217,216,215,214,217,217,218,217,217,215,216,217,215,218,216,216,218,217,218,219,218,216,219,218,218,220,219,219,219,220,222,219,222,222,222,223,222,223,224,223,222,225,223,224,224,223,225,224,224,224,224,224,225,224,226,226,223,224,226,226,226,226,224,224,227,225,226,227,227,226,225,225,224,224,227,227,227,228,227,229,230,228,230,230,228,229,230,230,230,230,229,229,230,230,230,232,232,233,230,230,231,232,233,232,233,233,232,233,233,231,233,233,233,235,234,235,237,238,236,235,236,236,237,237,236,237,236,236,238,239,239,241,241,240,241,240,240,239,240,240,239,241,240,241,242,241,242,242,242,242,242,243,244,244,245,244,244,244,243,243,244,244,244,243,243,244,245,245,243,244,245,246,246,244,244,245,247,247,245,245,244,247,245,245,246,244,245,245,245,245,245,245,245,245,244,245,246,246,247,247,247,246,247,247,247,247,247,247,247,247,245,245,247,247,247,247,246,247,247,247,247,246,246,247,247,247,246,246,247,247,247,247,247,246,246,246,246,246,247,247,247,247,248,247,247,247,247,247,247,247,247,248,248,247,248,246,247,246,247,247,247,247,247,247,249,241,234,247,247,247,250,249,249,252,252,252,251,251,252,252,252,252,252,252,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,253,253,252,252,253,253,250,250,244,251,252,252,253,253,251,251,230,211,171,147,80,131,251,251,252,252,249,242,206,236,252,252,251,251,252,252,252,252,249,243,215,212,212,184,169,186,193,186,179,152,136,129,129,132,141,118,71,35,27,44,76,82,53,44,33,36,68,94,117,144,123,107,77,36,45,21,18,34,48,55,47,57,66,66,62,61,68,68,74,73,71,65,120,214,249,252,249,248,223,184,205,244,251,251,250,250,251,249,249,250,251,252,250,250,238,240,252,251,248,230,195,218,217,183,229,249,245,229,136,177,243,234,143,105,108,67,101,177,193,223,171,92,55,21,48,55,45,91,71,36,36,57,76,63,17,159,250,250,250,250,253,253,251,251,252,252,252,252,252,252,252,252,194,110,29,128,248,226,206,165,176,235,252,252,250,242,247,247,249,249,247,248,247,247,246,247,247,246,247,246,247,247,246,247,246,250,243,226,233,252,240,127,96,63,7,118,238,246,250,250,250,248,219,210,225,212,198,213,226,237,251,246,242,236,182,170,229,246,250,250,248,247,246,244,244,242,243,243,242,244,244,244,244,243,242,241,242,241,241,241,241,241,241,241,239,240,239,239,238,238,239,238,238,237,236,237,238,235,234,235,233,234,233,231,231,230,227,228,229,229,227,228,227,227,227,229,229,227,225,226,223,223,224,222,224,222,223,222,222,230,233,239,237,217,212,225,230,224,223,222,218,217,218,216,214,219,217,216,217,217,215,214,215,217,217,213,214,214,213,214,214,212,210,211,210,208,210,208,208,210,206,209,207,202,205,204,201,201,200,199,200,197,199,195,199,114,5,1,4,7,9,8,10,10,11,11,11,11,214,218,218,215,218,217,215,215,216,218,217,217,219,218,216,215,219,217,216,217,218,221,221,221,220,218,219,220,220,218,217,219,218,220,220,217,218,217,220,223,220,221,219,220,219,217,219,217,218,219,221,222,220,220,220,221,221,221,221,221,223,224,223,225,224,223,224,223,224,226,226,228,227,227,227,225,228,229,229,227,226,228,229,229,229,229,229,230,229,226,226,228,227,227,230,229,229,227,228,229,227,230,230,230,230,231,230,230,232,228,230,230,229,231,230,232,231,230,231,232,233,230,233,233,231,234,235,236,235,236,234,232,234,233,236,237,236,237,237,236,239,237,236,238,238,237,237,238,240,238,239,241,240,240,240,241,241,240,240,240,241,240,241,242,243,240,242,243,244,244,242,244,244,244,245,244,244,244,244,246,245,246,246,245,245,245,247,247,247,247,246,245,246,245,246,246,246,247,248,248,249,248,248,247,248,249,248,249,248,249,248,249,247,249,250,248,249,248,248,248,248,249,249,248,248,249,249,249,249,249,250,251,250,249,250,249,249,249,249,249,248,249,249,248,249,250,248,249,250,250,251,251,250,250,250,249,249,250,250,250,250,250,249,250,249,249,250,250,249,250,250,251,251,251,251,249,251,251,251,252,251,250,250,249,251,250,250,251,247,249,250,250,250,252,248,233,242,250,249,250,249,249,250,251,251,251,251,251,250,252,250,245,248,251,252,247,249,252,252,251,250,249,250,250,250,248,250,251,249,248,242,243,249,252,251,251,251,251,251,250,251,251,251,252,250,251,245,237,252,240,207,180,212,229,246,251,247,228,171,179,151,123,49,149,250,250,252,252,250,238,178,234,251,251,250,250,253,253,252,252,252,252,252,252,252,252,252,252,252,249,253,253,252,252,253,253,249,249,209,216,252,252,252,252,252,252,253,253,245,190,135,134,118,107,49,100,248,248,250,250,253,253,252,252,253,253,252,252,252,252,252,252,252,252,252,252,252,252,249,242,205,167,207,246,251,251,251,251,252,252,251,250,252,252,250,250,235,252,252,252,252,245,252,252,218,167,225,251,172,125,116,104,100,85,87,96,91,76,69,62,55,77,95,93,65,34,41,49,68,90,52,32,41,65,69,59,17,150,247,247,249,249,252,252,250,248,245,247,249,249,237,245,250,252,223,173,118,103,162,210,240,188,165,207,234,248,251,247,245,249,249,249,249,249,249,250,249,250,248,248,250,248,248,249,249,247,249,250,251,239,225,252,222,119,104,47,8,136,242,246,250,250,252,251,223,209,223,229,214,202,218,235,252,242,192,113,42,15,89,181,225,246,248,248,248,247,247,245,244,244,244,244,244,244,245,242,242,242,242,244,241,240,241,241,240,240,239,239,239,239,238,237,238,238,238,237,236,234,234,236,236,232,232,233,232,234,231,230,230,228,229,229,227,226,226,228,227,226,225,226,224,225,227,222,223,224,222,222,220,224,231,229,203,153,120,83,73,129,183,212,225,227,226,222,222,221,216,216,214,213,215,218,217,217,215,214,214,212,213,214,214,211,212,212,212,212,211,213,213,210,210,209,207,208,208,207,206,205,202,203,205,201,200,198,201,196,200,115,3,1,6,8,9,7,10,10,10,11,10,10,214,218,218,215,218,217,215,215,216,218,217,217,219,218,216,215,219,217,216,217,218,221,221,221,220,218,219,220,220,218,217,219,218,220,220,217,218,217,220,223,220,221,219,220,219,217,219,217,218,219,221,222,220,220,220,221,221,221,221,221,223,224,223,225,224,223,224,223,224,226,226,228,227,227,227,225,228,229,229,227,226,228,229,229,229,229,229,230,229,226,226,228,227,227,230,229,229,227,228,229,227,230,230,230,230,231,230,230,232,228,230,230,229,231,230,232,231,230,231,232,233,230,233,233,231,234,235,236,235,236,234,232,234,233,236,237,236,237,237,236,239,237,236,238,238,237,237,238,240,238,239,241,240,240,240,241,241,240,240,240,241,240,241,242,243,240,242,243,244,244,242,244,244,244,245,244,244,244,244,246,245,246,246,245,245,245,247,247,247,247,246,245,246,245,246,246,246,247,248,248,249,248,248,247,248,249,248,249,248,249,248,249,247,249,250,248,249,248,248,248,248,249,249,248,248,249,249,249,249,249,250,251,250,249,250,249,249,249,249,249,248,249,249,248,249,250,248,249,250,250,251,251,250,250,250,249,249,250,250,250,250,250,249,250,249,249,250,250,249,250,250,251,251,251,251,249,251,251,251,252,251,250,250,249,251,250,250,251,247,249,250,250,250,252,248,233,242,250,249,250,249,249,250,251,251,251,251,251,250,252,250,245,248,251,252,247,249,252,252,251,250,249,250,250,250,248,250,251,249,248,242,243,249,252,251,251,251,251,251,250,251,251,251,252,250,251,245,237,252,240,207,180,212,229,246,251,247,228,171,179,151,123,49,149,250,250,252,252,250,238,178,234,251,251,250,250,253,253,252,252,252,252,252,252,252,252,252,252,252,249,253,253,252,252,253,253,249,249,209,216,252,252,252,252,252,252,253,253,245,190,135,134,118,107,49,100,248,248,250,250,253,253,252,252,253,253,252,252,252,252,252,252,252,252,252,252,252,252,249,242,205,167,207,246,251,251,251,251,252,252,251,250,252,252,250,250,235,252,252,252,252,245,252,252,218,167,225,251,172,125,116,104,100,85,87,96,91,76,69,62,55,77,95,93,65,34,41,49,68,90,52,32,41,65,69,59,17,150,247,247,249,249,252,252,250,248,245,247,249,249,237,245,250,252,223,173,118,103,162,210,240,188,165,207,234,248,251,247,245,249,249,249,249,249,249,250,249,250,248,248,250,248,248,249,249,247,249,250,251,239,225,252,222,119,104,47,8,136,242,246,250,250,252,251,223,209,223,229,214,202,218,235,252,242,192,113,42,15,89,181,225,246,248,248,248,247,247,245,244,244,244,244,244,244,245,242,242,242,242,244,241,240,241,241,240,240,239,239,239,239,238,237,238,238,238,237,236,234,234,236,236,232,232,233,232,234,231,230,230,228,229,229,227,226,226,228,227,226,225,226,224,225,227,222,223,224,222,222,220,224,231,229,203,153,120,83,73,129,183,212,225,227,226,222,222,221,216,216,214,213,215,218,217,217,215,214,214,212,213,214,214,211,212,212,212,212,211,213,213,210,210,209,207,208,208,207,206,205,202,203,205,201,200,198,201,196,200,115,3,1,6,8,9,7,10,10,10,11,10,10,215,216,216,214,217,215,216,216,218,216,216,219,217,218,215,216,217,216,218,217,218,215,215,215,217,218,217,219,217,219,217,218,218,218,219,217,217,215,215,217,217,218,218,217,219,217,220,219,220,221,218,220,220,218,220,218,220,222,218,222,221,222,222,222,224,224,224,227,224,223,223,225,226,226,226,224,226,229,228,226,226,224,227,225,226,226,226,227,227,229,227,227,227,227,226,229,227,225,226,225,227,229,228,227,229,229,230,229,230,232,230,229,229,231,229,231,231,230,231,232,230,230,232,230,232,233,233,233,234,233,234,232,234,235,233,234,233,234,234,233,233,235,237,236,235,236,236,239,238,238,238,239,237,237,239,239,237,239,240,239,239,239,240,241,241,240,241,243,240,242,242,241,242,243,242,244,244,244,244,244,245,245,245,245,246,246,245,244,246,246,245,246,245,245,245,246,247,245,246,248,248,248,247,247,246,247,248,249,249,248,247,248,249,249,248,247,247,247,249,248,249,249,249,249,248,248,247,248,248,247,249,248,249,250,249,248,248,248,248,249,248,249,249,249,249,249,249,249,248,248,248,249,249,249,250,249,248,250,249,248,249,250,249,247,249,249,249,249,249,250,249,249,250,250,248,249,249,249,249,248,248,248,249,250,249,249,248,248,247,248,248,248,248,252,244,232,244,249,248,249,247,247,248,249,250,249,249,249,248,250,248,243,249,251,252,250,252,252,252,251,247,248,249,247,247,247,248,248,248,249,246,242,242,247,250,249,250,249,248,247,248,248,247,250,247,250,237,230,245,188,181,208,243,244,252,252,248,216,170,179,157,134,49,149,250,250,252,244,250,229,141,191,247,247,248,243,248,252,250,251,251,252,241,252,252,208,240,251,241,249,252,252,252,252,252,252,252,252,252,252,252,252,252,252,253,253,252,252,239,165,127,125,110,104,36,85,248,248,249,249,252,252,251,251,252,252,252,252,252,252,251,248,252,252,251,251,251,252,250,240,230,202,204,249,251,251,251,251,252,250,250,249,251,250,250,248,235,252,251,251,242,242,252,241,149,112,205,242,193,130,101,104,102,115,109,96,97,81,81,64,66,68,72,117,110,65,50,47,82,90,52,29,37,70,65,57,12,141,246,246,249,249,252,252,250,250,244,244,247,252,240,244,250,252,245,219,178,95,113,186,242,204,186,220,225,248,251,250,246,247,248,248,248,246,249,249,248,249,247,248,247,247,248,247,247,247,247,248,248,242,222,252,210,116,104,35,11,142,243,247,251,248,252,243,224,207,219,229,222,203,209,230,234,234,196,146,60,66,145,208,231,243,245,245,244,244,244,243,242,244,244,241,243,241,242,243,240,239,240,239,239,239,239,239,238,239,238,236,237,237,234,237,235,236,237,235,233,234,232,231,230,229,230,228,229,229,229,228,226,227,224,225,226,226,225,224,224,224,222,224,224,222,222,222,221,222,222,224,230,231,241,225,173,131,103,77,48,94,171,217,233,230,227,222,219,219,217,215,215,214,215,217,215,214,212,212,213,214,213,214,215,214,212,212,208,212,210,208,212,210,212,210,207,208,206,204,204,205,205,202,200,199,200,196,200,197,200,114,4,0,4,7,9,8,10,10,11,11,12,10,217,217,217,215,215,213,214,216,215,214,215,215,213,215,215,214,215,214,216,217,214,217,217,216,217,214,215,214,214,217,215,214,214,215,217,217,216,214,214,216,213,215,216,214,215,218,217,217,217,217,217,217,217,218,220,215,217,218,219,220,217,218,217,221,222,218,222,221,221,221,221,222,222,222,222,225,224,224,223,223,224,224,225,227,225,225,225,224,223,225,226,226,226,225,227,227,228,230,226,226,226,226,229,226,225,228,228,227,228,228,231,231,231,231,230,232,229,229,229,230,232,229,230,231,232,233,231,231,230,231,232,232,232,232,232,232,230,233,232,232,233,232,233,236,236,235,237,236,236,235,234,237,234,235,236,236,238,238,239,240,239,239,239,237,239,241,241,240,239,241,239,241,242,241,241,241,242,244,243,244,244,244,245,245,244,244,244,244,244,244,244,244,244,244,244,244,245,246,245,245,247,246,246,245,246,247,247,248,247,248,248,247,248,248,247,247,248,248,249,248,248,247,247,247,247,248,247,247,247,248,248,247,246,247,247,247,247,248,248,247,250,248,248,249,248,249,248,247,248,248,249,249,248,248,248,247,247,247,247,249,248,248,247,248,247,247,248,247,248,248,249,249,248,248,249,248,248,248,249,248,247,248,247,247,248,248,246,247,247,248,249,247,248,251,236,228,245,249,250,250,249,248,247,248,249,249,248,248,249,252,246,243,251,250,251,236,215,238,242,248,250,247,249,248,248,247,247,247,248,248,247,244,241,243,248,248,249,249,248,248,248,249,248,248,248,251,230,222,248,215,214,237,251,240,239,252,247,206,159,167,146,125,43,152,250,249,251,243,252,239,171,208,249,249,248,244,246,251,249,251,250,248,233,252,218,174,238,252,250,251,252,229,225,239,252,252,251,251,252,252,250,246,235,252,248,252,252,252,234,156,132,135,119,116,32,77,248,248,247,247,249,252,250,250,244,247,252,250,252,252,245,247,252,251,251,251,252,252,251,245,241,214,202,248,252,252,250,250,252,251,251,250,252,252,250,245,239,252,250,250,233,243,238,143,95,145,249,249,200,137,106,101,118,126,115,98,87,83,74,69,65,62,67,92,123,102,60,56,99,85,40,27,41,73,57,57,12,122,246,246,249,249,252,252,249,250,243,241,246,251,241,242,247,252,247,226,212,129,108,152,222,219,195,227,224,248,252,252,247,244,249,249,249,247,248,248,247,249,247,247,247,246,247,247,247,247,247,247,248,248,229,252,199,110,97,29,14,152,243,248,250,249,249,250,225,206,217,229,228,206,204,226,249,232,207,200,188,190,220,240,244,245,244,245,244,242,242,241,243,242,242,242,243,241,240,240,240,239,239,239,239,239,239,239,238,237,240,237,235,236,233,234,236,235,234,234,232,231,232,231,229,229,230,229,226,229,229,227,228,226,225,225,225,227,225,224,224,223,222,223,221,219,218,218,220,220,226,231,240,246,241,198,150,122,106,87,69,110,166,209,238,239,236,227,223,222,217,219,217,215,217,217,213,214,215,215,213,211,214,214,211,210,213,211,210,212,211,208,210,208,210,208,207,210,206,206,206,207,204,202,198,199,201,198,201,198,200,113,4,1,4,7,9,8,10,9,10,10,10,10,217,220,218,217,218,215,215,215,215,214,214,216,214,214,213,214,216,213,215,215,216,218,219,218,215,214,216,217,214,216,216,214,214,213,215,217,216,214,215,215,216,217,218,215,216,213,215,216,215,215,215,218,218,218,219,218,218,218,218,218,217,217,219,219,221,220,218,222,220,222,223,222,222,222,224,222,223,224,222,224,226,225,225,224,224,224,224,224,224,226,227,226,224,228,226,228,226,225,229,227,229,226,227,227,228,229,227,230,229,229,231,231,229,230,230,230,229,231,233,231,230,230,232,232,231,232,230,232,230,230,233,231,231,232,234,234,232,236,237,235,235,236,235,236,237,237,236,235,235,236,236,236,237,237,236,236,237,238,237,239,241,238,238,241,239,239,240,240,240,239,241,241,241,242,241,241,243,244,243,244,244,243,244,244,244,244,244,244,242,244,244,244,245,246,244,243,244,244,247,247,246,247,247,247,247,247,247,246,248,249,247,248,248,247,248,247,249,247,247,248,246,248,248,248,248,247,247,247,247,247,247,247,247,247,247,247,248,248,247,248,248,248,248,248,249,249,247,247,247,248,248,247,248,247,248,248,248,249,248,248,248,247,249,249,248,247,247,247,248,248,248,249,249,249,250,249,247,249,249,249,249,248,247,247,247,249,249,247,247,247,247,247,249,248,228,230,246,248,249,248,249,250,248,247,248,249,250,249,249,248,242,245,252,249,243,171,119,187,232,249,249,246,248,248,249,249,247,247,247,247,247,247,240,240,244,247,250,247,248,247,247,248,248,247,247,250,231,245,252,227,221,214,201,201,237,249,247,200,156,162,139,118,37,157,249,248,252,242,252,249,184,202,247,248,248,245,243,250,249,251,252,251,225,241,225,222,252,252,246,250,227,172,211,241,252,252,248,246,252,251,250,241,230,252,244,251,252,252,233,150,134,140,120,123,33,77,248,248,247,247,249,252,250,247,234,246,252,249,250,250,241,251,252,249,252,252,252,252,252,241,250,221,189,239,252,252,250,250,252,250,250,250,252,251,250,238,245,252,251,247,228,232,183,161,148,212,253,253,169,107,93,89,98,101,87,76,74,59,62,53,50,49,46,54,79,100,63,65,100,76,39,30,48,71,59,60,12,134,246,246,249,249,252,252,249,250,244,240,244,251,244,240,249,252,246,223,236,174,105,130,200,209,179,216,208,227,251,251,248,245,250,248,248,247,247,247,247,248,247,247,247,246,247,246,246,247,246,247,248,251,236,245,182,105,91,19,18,163,243,248,251,247,252,242,226,204,217,229,223,216,205,223,234,217,206,227,252,252,251,251,247,247,245,244,243,241,241,240,240,241,240,239,239,239,239,239,239,238,240,238,236,237,236,236,236,234,236,233,235,236,233,234,232,232,231,231,231,229,228,227,229,227,229,229,228,225,225,225,224,224,225,223,222,224,221,222,224,220,220,219,220,218,219,220,219,228,231,242,238,200,171,146,126,114,107,95,80,78,102,127,181,221,238,231,223,220,221,216,215,214,212,212,214,213,212,214,214,212,210,211,212,211,209,210,208,209,210,210,211,207,207,208,205,207,205,206,204,201,203,202,201,199,202,198,200,196,201,114,3,1,4,7,8,7,10,10,10,11,10,10,218,220,220,220,220,216,216,217,215,214,217,217,216,217,213,213,217,216,215,217,216,217,217,217,216,215,217,217,217,216,215,217,217,215,215,215,215,216,215,216,214,213,216,216,216,217,216,217,216,216,216,218,217,216,218,217,217,218,218,219,218,220,218,220,220,218,220,220,223,226,225,222,223,224,223,225,224,225,224,223,225,222,224,224,221,225,222,224,224,223,225,224,227,225,226,224,226,226,223,226,226,227,228,226,225,228,230,229,227,229,229,229,231,228,230,229,231,232,229,230,229,231,233,232,232,232,231,232,232,232,236,235,233,232,232,232,234,234,233,234,234,236,236,236,236,237,237,234,238,237,235,239,236,238,236,235,237,237,237,238,237,237,239,240,241,239,241,241,239,241,239,240,240,240,242,243,242,242,242,244,243,243,244,243,242,244,244,244,242,243,243,244,244,245,248,246,245,246,246,246,247,247,247,247,248,249,249,249,248,247,249,249,248,248,247,247,247,245,248,248,248,249,247,247,246,246,247,247,248,249,248,246,248,249,247,247,248,248,248,247,247,247,247,248,248,249,248,247,249,248,248,247,247,248,247,248,249,248,247,249,249,247,247,247,247,249,248,248,248,248,248,248,249,247,249,249,247,249,247,248,248,249,249,249,249,248,249,249,249,248,248,247,250,247,228,239,251,248,249,248,249,248,248,248,246,248,248,250,250,247,243,246,252,249,240,182,169,231,248,252,249,246,247,247,247,247,247,247,247,247,247,248,247,241,240,243,248,249,248,248,247,247,246,247,249,250,230,245,250,186,171,199,220,229,250,252,247,201,169,177,155,125,42,168,249,249,252,242,252,252,175,174,239,246,249,247,243,247,252,250,252,249,242,250,229,237,252,251,212,229,243,232,252,248,252,252,248,246,252,247,250,241,234,252,247,252,252,252,226,146,125,128,118,111,29,83,247,247,247,247,249,252,249,246,231,243,252,250,250,250,241,252,252,250,250,250,252,251,248,246,243,212,173,226,252,252,250,250,252,251,250,251,250,252,250,237,249,252,252,237,218,210,220,219,222,252,250,229,129,99,92,92,96,86,85,81,81,78,71,73,69,60,57,52,63,83,67,81,107,63,40,32,53,74,59,62,12,141,246,246,249,249,252,252,250,250,245,241,247,251,248,241,248,252,249,217,237,207,129,139,195,229,208,207,182,200,244,251,251,243,248,247,247,249,248,247,247,247,247,247,247,247,245,246,247,247,247,247,245,249,245,234,161,92,83,17,25,174,246,248,251,250,250,250,227,204,217,228,230,227,209,224,245,201,201,237,252,252,246,252,247,244,245,242,241,239,241,240,239,239,239,239,239,239,238,238,237,239,237,237,236,237,236,234,232,233,233,233,235,234,232,232,231,229,229,230,230,227,224,226,225,225,226,224,225,224,222,222,223,223,220,223,223,222,223,221,221,220,220,220,219,219,218,221,228,231,240,209,159,130,110,96,96,95,88,84,80,75,49,34,67,135,200,225,224,222,219,218,218,216,212,210,214,215,213,214,213,211,210,212,212,210,213,210,211,213,212,211,209,207,210,208,209,209,206,206,203,204,202,203,201,198,200,196,198,194,201,114,3,1,4,7,9,8,10,9,10,11,10,10,219,222,221,219,219,214,217,218,215,215,215,217,217,215,214,214,215,212,216,217,213,215,216,217,218,215,217,215,213,216,217,215,214,214,215,214,213,213,217,216,214,215,215,215,217,216,217,215,216,215,217,215,215,216,214,219,219,220,220,219,219,219,221,219,221,223,223,224,223,223,224,223,223,222,223,224,225,224,221,224,220,222,225,221,223,224,225,224,223,221,223,225,223,225,222,226,227,225,226,226,228,225,227,226,226,227,225,227,228,229,230,228,226,230,229,229,230,231,232,227,231,232,231,233,231,234,234,234,233,232,232,232,234,233,235,233,231,232,232,234,233,233,235,235,234,235,237,237,236,237,236,236,236,237,239,236,237,237,235,237,238,236,236,237,237,238,237,237,239,239,240,241,242,243,244,242,243,243,242,243,243,244,242,243,242,244,244,243,243,245,245,243,243,244,245,246,245,245,246,245,246,246,247,248,246,248,249,249,249,249,247,247,249,248,248,248,248,247,247,247,247,248,247,248,247,244,247,247,248,247,246,247,248,248,249,249,247,248,247,247,246,248,247,247,248,247,247,247,247,247,248,247,247,248,246,247,248,248,248,247,248,247,248,248,247,247,247,247,247,246,247,247,247,247,247,248,248,249,249,247,247,248,249,248,248,247,248,247,248,249,249,249,250,242,234,247,248,248,249,248,248,247,248,246,247,246,247,248,248,245,244,250,249,249,252,247,252,252,251,249,248,248,248,246,246,246,246,246,245,247,248,248,247,247,240,239,244,245,250,248,247,248,247,247,248,248,224,237,215,182,215,230,243,250,252,252,248,204,174,169,157,127,39,173,249,245,251,241,251,251,198,187,240,248,249,248,242,245,250,249,252,251,245,251,224,229,244,243,242,251,252,236,252,252,252,252,250,244,252,249,249,241,235,252,229,245,253,253,226,128,107,116,103,118,33,83,248,248,247,247,249,252,250,247,235,246,252,248,250,248,245,252,252,250,252,252,251,248,247,247,241,220,181,229,252,252,249,247,250,252,249,250,250,252,249,236,252,252,250,226,229,252,253,253,250,252,247,186,119,114,110,121,117,114,113,116,124,110,112,109,108,75,96,86,91,105,80,118,124,61,35,27,62,77,62,62,9,136,247,247,249,249,252,252,248,250,245,239,247,251,250,240,244,249,252,219,230,234,148,141,179,217,214,187,165,187,227,248,252,242,245,246,247,247,247,247,247,248,247,247,247,247,247,246,246,246,245,245,246,247,252,225,139,94,74,10,29,176,243,248,252,245,249,243,220,201,216,228,230,225,215,236,234,180,189,223,241,239,230,245,243,242,241,240,240,239,239,241,239,239,239,239,238,237,238,239,238,236,237,237,236,235,235,232,233,233,233,232,233,231,228,229,229,228,226,227,226,224,227,226,226,223,225,224,222,224,224,224,223,221,223,220,221,223,219,217,218,217,219,220,219,218,219,226,231,231,188,139,110,98,95,92,97,95,95,91,87,73,63,52,35,57,129,200,219,218,219,216,216,216,215,214,212,211,212,212,212,212,212,213,210,212,212,211,212,210,212,209,210,210,209,210,207,211,208,206,208,204,204,203,200,199,198,197,200,195,202,113,4,1,4,7,9,8,10,10,10,11,11,10,222,223,221,220,220,218,219,218,218,217,217,217,217,218,217,215,217,214,215,219,217,217,218,218,216,217,217,215,215,213,215,216,212,216,215,215,217,214,213,214,214,214,218,216,216,218,215,217,217,217,216,218,218,218,219,218,220,220,218,218,219,221,219,221,225,224,223,222,222,223,222,222,224,224,224,224,223,223,224,225,225,221,223,224,223,226,224,224,224,226,225,223,223,224,226,224,224,225,224,227,225,227,229,226,227,227,228,227,227,229,228,226,230,229,228,229,231,232,232,234,231,233,232,231,234,233,233,233,233,231,233,232,235,234,234,235,234,235,235,237,235,235,237,236,234,236,237,236,238,235,237,238,236,237,239,239,237,239,239,239,239,238,239,240,238,239,240,240,239,242,241,241,242,241,241,241,242,242,242,243,244,243,242,243,243,244,244,243,245,245,243,244,243,244,245,246,246,245,246,246,247,246,247,248,248,249,247,247,248,248,249,249,247,248,249,249,249,247,246,247,249,249,248,247,249,249,247,248,247,247,247,245,248,247,248,249,248,247,248,247,249,249,246,248,248,247,248,247,247,248,248,249,248,248,247,247,249,249,249,248,248,247,247,248,247,248,247,247,247,247,248,248,248,247,249,249,247,248,248,248,248,248,250,251,249,248,248,248,249,249,248,249,251,240,240,250,250,249,248,249,250,249,249,249,248,248,247,249,247,243,248,251,250,249,252,252,252,252,249,247,246,247,247,247,247,245,247,247,247,247,245,247,247,248,244,240,242,246,249,248,247,247,247,249,249,246,226,248,242,211,232,244,234,218,241,250,249,201,162,160,139,113,41,176,249,243,251,239,250,252,212,200,246,248,249,248,246,245,251,250,252,247,242,247,178,202,252,252,249,250,246,227,246,242,251,251,250,245,251,251,249,243,235,250,163,158,234,249,225,137,119,113,107,110,29,90,247,247,247,247,250,252,250,249,240,246,252,249,251,245,248,252,251,250,252,252,251,246,248,252,238,245,198,208,252,250,250,250,251,252,249,251,250,252,244,240,252,252,248,241,252,252,252,252,222,224,169,122,113,118,128,122,126,119,93,91,95,101,104,94,86,79,85,83,84,95,99,136,115,51,34,25,61,72,60,60,12,140,247,247,249,249,252,252,249,251,248,238,244,249,252,244,241,248,252,226,218,244,163,100,132,167,184,171,168,198,215,243,252,243,247,247,247,247,248,247,246,246,247,246,245,246,246,247,248,245,247,244,248,247,252,217,118,87,68,11,30,179,243,249,251,247,246,240,214,198,216,229,231,234,220,233,237,160,178,225,233,232,229,244,240,240,243,239,239,237,238,238,237,237,237,237,239,237,237,236,237,237,236,235,235,234,233,232,231,231,232,230,229,229,227,229,227,227,228,225,224,227,225,226,224,222,225,224,224,225,223,222,222,224,223,222,220,220,221,216,216,217,218,216,219,220,222,229,227,184,136,105,92,101,125,132,120,109,106,98,91,85,74,60,50,36,57,150,201,211,219,217,219,216,213,213,212,210,212,213,213,214,214,212,212,212,210,210,211,211,210,209,207,210,210,209,208,207,208,205,205,202,201,203,199,200,199,196,202,196,200,113,4,1,4,8,9,7,10,10,10,11,10,10,220,222,221,220,223,220,220,222,220,218,217,218,217,219,219,215,219,218,217,218,217,218,215,216,216,214,217,216,217,215,213,214,216,216,215,214,214,212,214,215,213,215,214,212,212,214,217,216,216,215,219,217,218,221,219,220,218,220,219,218,221,220,221,219,220,221,222,224,221,222,220,220,223,223,221,220,220,221,223,225,222,222,222,220,222,224,222,221,224,222,222,222,222,226,223,221,224,222,222,226,227,224,227,226,227,226,226,228,227,228,227,229,227,229,231,230,231,231,233,233,233,232,233,232,229,232,232,233,231,230,232,232,232,234,235,231,235,236,234,237,237,236,234,237,236,235,236,233,235,237,236,237,238,236,236,236,237,238,236,237,237,237,238,239,238,238,239,240,240,240,241,241,241,241,241,241,240,240,242,242,241,243,242,242,243,242,244,243,244,244,243,244,244,246,246,246,246,246,247,247,247,247,245,247,247,247,247,248,248,248,247,247,248,246,247,247,249,249,249,249,248,249,246,247,248,247,249,247,247,247,247,246,247,247,246,246,247,248,249,249,249,247,248,249,247,247,247,248,247,247,248,249,248,247,247,247,248,248,247,248,249,247,247,247,247,248,247,248,247,248,248,249,248,248,248,249,249,247,247,248,248,249,250,248,248,249,249,248,248,248,248,251,247,238,245,251,248,250,249,248,249,249,250,247,248,247,247,248,245,245,248,250,247,245,248,252,247,246,247,247,248,246,247,247,247,247,246,247,246,247,247,245,247,248,248,244,240,240,247,248,247,247,247,246,250,242,236,252,240,224,234,203,189,214,244,250,248,201,167,153,141,109,37,187,250,245,250,236,251,252,228,178,208,247,250,250,247,242,251,249,252,245,242,205,172,241,252,252,249,250,237,190,225,241,252,252,249,240,252,252,250,239,237,249,192,212,251,251,227,142,130,137,113,110,28,95,247,247,247,246,248,252,249,247,237,240,250,252,251,243,252,252,251,250,250,252,250,245,251,252,245,248,204,215,252,252,250,250,251,252,249,250,250,252,241,244,252,252,244,252,252,252,252,174,95,78,64,58,84,77,73,81,83,92,64,45,48,43,49,46,49,52,44,46,51,57,57,80,80,59,47,50,72,69,58,53,11,139,247,247,249,249,252,252,250,251,248,238,242,246,251,245,239,245,252,232,206,229,188,144,144,162,220,194,172,221,219,239,252,248,248,247,247,247,245,246,246,245,246,245,244,244,245,245,245,244,245,243,247,245,250,223,115,89,63,7,42,194,245,249,252,247,247,229,215,197,215,230,230,234,223,236,234,146,179,228,236,231,225,241,237,241,240,238,239,236,236,237,237,236,235,236,236,236,235,235,236,234,235,233,232,231,231,229,229,229,228,228,227,226,229,225,225,225,222,224,227,224,226,224,222,221,224,223,220,222,220,222,220,219,222,219,220,219,219,219,218,216,216,217,217,220,228,225,188,139,106,92,83,110,134,135,122,105,94,80,72,61,61,53,48,36,22,101,177,204,223,221,218,214,216,214,209,212,211,211,212,212,211,211,212,211,208,206,211,210,208,208,208,208,207,209,206,206,205,206,204,200,203,203,200,198,199,198,198,192,201,114,3,1,4,8,8,8,10,10,11,11,10,10,216,220,220,219,221,216,219,218,221,219,218,219,219,219,216,215,217,215,215,217,214,216,215,214,218,216,214,218,218,214,216,216,213,214,214,215,214,212,214,214,214,210,214,214,213,214,214,217,215,215,217,218,217,215,218,217,218,221,220,223,223,221,221,221,220,221,222,224,218,222,222,219,219,221,221,217,219,221,222,219,221,222,222,221,221,221,221,221,221,222,219,224,222,220,222,223,225,224,224,225,222,223,225,226,225,224,225,224,227,226,228,229,227,228,226,227,229,229,230,230,229,232,232,230,231,232,231,231,233,233,234,230,232,232,232,234,233,234,234,234,236,234,234,235,233,234,233,234,237,235,236,235,235,236,237,236,235,238,236,235,237,235,236,238,236,236,237,238,238,240,237,240,240,240,242,241,242,241,241,241,242,242,241,241,241,242,244,244,243,243,243,245,244,243,244,244,245,247,247,246,247,247,246,247,246,247,248,248,248,247,246,248,247,246,247,248,249,248,248,248,248,247,249,247,247,247,246,248,248,248,249,247,247,248,247,248,247,247,249,248,248,248,247,248,248,248,248,247,249,249,247,247,248,248,249,249,249,248,248,248,247,248,247,249,248,247,247,247,249,249,248,248,250,249,249,249,249,248,249,249,249,249,248,249,249,248,248,249,249,249,247,250,246,237,247,250,249,248,249,249,248,249,249,249,249,248,247,247,242,246,251,249,246,244,247,249,247,245,246,247,247,247,248,247,247,247,246,247,247,247,247,246,247,247,247,247,242,240,242,246,247,247,247,247,250,241,234,252,230,201,184,187,222,238,252,252,249,206,175,173,153,111,44,190,249,247,249,236,249,252,223,167,205,246,250,251,249,245,247,250,251,239,244,227,218,251,252,250,238,249,215,211,242,244,252,252,250,243,252,252,248,238,237,252,251,252,253,253,224,145,134,132,117,111,27,95,247,247,247,247,248,252,250,247,242,242,249,252,249,246,252,252,252,252,250,252,250,243,252,252,243,250,222,227,252,252,250,250,251,249,248,250,251,252,240,248,252,249,246,252,253,196,123,75,50,28,2,9,28,33,45,39,44,98,84,46,35,32,32,30,36,33,29,37,62,76,60,48,46,53,55,68,76,62,46,49,12,106,240,242,248,248,252,252,249,251,248,239,241,245,250,247,238,244,252,246,200,227,238,187,204,215,250,232,193,244,231,240,252,249,247,247,247,248,247,246,247,246,245,245,244,245,244,244,244,244,247,244,247,244,251,241,141,95,62,6,53,210,245,250,252,247,246,240,217,199,216,230,231,233,225,250,230,133,180,232,231,227,224,242,237,239,241,237,237,236,236,235,235,236,237,236,235,234,232,232,232,230,230,231,232,232,229,227,228,226,227,228,224,224,224,224,223,224,224,224,225,224,223,223,223,223,223,221,222,221,220,222,218,217,218,221,218,219,215,215,218,217,218,214,218,223,229,210,155,112,89,77,75,90,105,105,102,95,85,73,65,60,62,60,57,47,24,95,186,221,239,228,221,217,218,217,214,215,210,209,213,213,213,212,212,212,209,211,210,208,210,209,208,210,206,208,207,205,206,204,205,200,200,201,198,197,197,197,198,192,202,114,3,1,5,8,8,8,10,10,10,11,11,10,218,222,219,217,220,218,217,220,221,221,222,222,217,216,218,216,216,214,215,215,217,216,217,216,215,217,215,216,216,214,217,217,216,217,214,216,214,213,215,214,214,214,215,213,215,215,214,214,215,217,217,216,216,216,218,218,215,219,220,219,222,220,221,221,221,221,221,223,221,221,219,221,222,219,222,220,221,219,219,221,219,220,220,223,219,222,223,221,223,219,221,224,221,221,223,221,224,222,222,224,222,223,224,222,223,224,223,225,226,227,225,226,225,226,229,226,228,229,229,231,230,229,232,232,231,232,231,232,232,232,232,232,232,235,235,234,236,234,234,235,236,235,234,236,233,235,235,235,237,236,236,236,236,237,239,237,237,237,238,237,237,239,239,236,236,239,240,239,238,237,237,239,241,241,241,241,242,242,241,240,241,242,240,240,240,241,242,241,244,242,243,244,243,244,244,244,244,245,245,245,247,246,247,248,248,247,246,247,247,247,246,245,248,248,249,248,249,249,248,249,248,248,246,247,249,249,250,247,247,247,247,247,247,248,249,249,249,247,247,247,249,249,248,248,249,249,249,248,249,249,249,250,249,249,249,249,249,249,248,247,248,247,248,249,247,247,247,247,249,248,247,248,248,248,249,248,248,248,248,250,249,249,250,249,249,247,248,248,247,249,250,250,240,241,250,249,249,248,249,249,247,248,247,247,249,249,250,243,241,245,248,249,245,246,249,248,247,244,244,247,247,247,247,244,247,247,247,247,246,247,245,245,247,247,247,247,247,241,240,243,247,247,248,247,249,236,231,243,191,196,228,225,239,237,249,251,249,207,170,164,153,115,43,192,249,248,249,236,251,252,239,193,211,247,250,250,250,245,246,250,252,244,252,235,225,251,249,226,211,249,240,240,250,247,252,252,250,241,252,252,247,236,232,252,252,252,252,252,222,140,125,130,113,109,29,99,247,247,247,247,249,252,250,248,244,246,248,249,245,248,252,252,252,250,250,252,246,248,252,252,241,243,201,190,252,252,250,250,248,251,250,251,251,247,237,250,252,247,241,253,195,91,28,20,57,26,12,27,33,42,54,51,56,81,68,51,39,31,34,36,37,34,36,55,89,84,58,44,28,35,40,66,78,61,50,59,10,93,227,240,250,250,252,252,249,252,248,242,241,244,250,249,236,241,253,253,207,206,247,210,202,224,251,249,201,237,231,214,247,246,244,244,247,246,246,247,246,246,245,244,245,246,248,245,244,242,246,244,244,243,253,251,148,102,69,6,59,212,242,250,252,242,243,231,219,201,213,227,227,225,222,253,217,114,173,231,232,226,226,243,236,241,240,239,237,236,236,235,233,235,236,235,234,233,233,232,233,230,231,229,230,229,231,232,227,227,224,225,225,222,226,222,222,225,223,224,222,223,223,224,223,219,221,219,221,222,219,222,222,220,218,215,217,219,217,216,215,217,217,222,229,236,240,198,134,101,91,85,78,91,87,93,97,92,99,94,93,89,91,86,81,86,65,117,194,219,243,243,245,233,227,224,214,213,213,213,213,211,210,209,210,211,212,212,211,208,206,208,209,206,207,207,204,203,202,204,203,203,203,200,198,198,198,198,200,193,202,113,4,1,4,8,9,8,10,10,11,11,10,11,218,223,222,220,222,220,220,219,222,220,219,219,218,216,216,218,214,215,217,216,216,216,218,217,217,217,218,217,215,217,215,216,217,218,217,215,214,215,214,212,215,213,215,215,213,215,212,214,213,215,214,215,220,215,216,215,220,219,218,222,220,222,222,222,223,223,221,223,221,222,222,222,221,220,219,220,222,218,220,221,220,218,220,221,219,219,221,220,220,222,221,223,222,222,223,221,221,222,223,223,222,224,225,224,225,221,223,224,226,224,224,227,227,228,229,230,230,232,230,229,232,232,232,231,231,234,232,233,234,231,234,235,234,234,233,237,236,235,236,237,238,235,237,237,235,236,234,235,237,237,238,237,237,236,236,238,237,237,237,237,240,238,239,239,236,238,239,238,239,240,237,239,239,240,241,239,240,241,241,240,242,242,240,241,243,243,243,243,244,245,244,244,244,244,244,244,245,246,246,247,247,247,247,248,248,247,247,247,248,247,247,248,247,246,247,247,247,247,247,248,249,249,250,248,248,249,248,249,249,249,248,247,247,248,248,249,249,249,249,248,248,247,249,249,249,249,249,249,247,249,248,248,248,248,249,247,248,247,248,248,249,249,248,248,248,248,248,248,247,248,247,248,248,247,248,248,249,249,249,249,247,250,249,249,249,247,249,249,248,249,249,249,240,244,251,248,250,249,248,248,248,248,248,249,249,250,249,244,242,248,247,247,245,247,249,247,248,244,246,247,245,248,247,247,248,246,245,247,245,245,247,245,247,245,247,248,247,246,241,240,244,248,249,247,251,237,242,247,221,236,252,236,208,225,249,250,248,200,160,152,138,97,44,198,249,247,250,239,252,252,244,190,208,246,251,251,250,249,246,249,252,243,250,212,190,243,241,242,251,252,243,252,252,245,252,252,250,242,252,252,247,232,226,250,251,251,252,252,215,127,105,109,103,101,29,107,247,247,247,246,250,252,249,247,244,243,245,248,242,252,252,252,251,249,250,252,244,249,252,252,238,235,189,168,244,249,249,249,249,251,250,251,251,246,241,252,252,234,230,219,156,98,33,11,44,39,34,49,46,44,51,44,55,87,73,55,44,30,32,37,32,28,46,77,89,71,57,44,32,34,45,77,80,61,59,55,19,145,242,242,250,250,252,252,248,252,247,244,242,244,247,251,239,237,250,252,220,192,240,220,201,217,231,240,194,214,193,171,221,240,244,244,249,250,247,248,249,245,246,247,246,246,245,244,244,243,247,243,246,241,253,245,139,101,67,5,67,218,240,250,252,245,239,232,222,203,210,224,225,229,224,251,202,95,168,231,227,227,227,241,236,239,239,236,234,233,234,233,232,235,234,232,232,231,232,230,231,230,232,230,226,227,227,229,227,224,225,225,224,226,226,222,221,222,223,223,222,222,222,220,222,223,221,220,220,222,220,221,219,218,220,218,217,217,217,217,218,223,228,235,245,245,221,167,129,117,108,111,113,118,114,107,93,85,92,93,92,87,82,77,79,75,70,72,73,119,197,225,246,246,249,226,212,214,214,215,212,210,210,211,210,208,208,210,210,206,209,208,207,207,204,204,202,205,206,201,202,203,202,202,201,200,200,198,205,199,201,113,3,1,4,7,9,8,10,10,11,11,10,12,218,223,222,221,221,219,222,222,220,219,216,219,218,220,219,217,215,217,217,217,220,214,215,214,214,218,213,214,214,214,214,215,216,214,214,216,214,214,216,214,214,214,216,213,214,214,213,215,213,214,216,214,217,216,216,218,217,218,221,219,222,220,220,220,222,224,221,221,221,220,220,220,222,220,221,218,220,221,220,220,220,220,217,218,218,220,219,218,222,219,218,220,221,221,218,220,222,218,221,223,222,221,223,224,224,224,221,223,221,223,224,226,225,226,228,227,228,229,230,231,231,230,232,232,230,232,234,232,231,233,233,232,232,233,231,233,235,232,234,234,235,234,234,233,234,237,233,236,235,234,235,234,233,234,234,236,237,235,236,236,235,235,235,236,237,236,235,237,237,238,237,237,239,237,239,239,237,238,239,239,240,240,240,242,242,242,242,242,243,240,243,244,243,244,244,245,246,246,246,245,246,247,246,248,247,247,247,247,248,247,248,248,246,247,248,247,248,246,248,249,247,247,248,249,248,248,248,248,247,247,248,247,249,249,247,247,249,249,248,249,249,249,247,249,249,249,249,247,247,248,248,247,247,248,247,248,249,249,248,248,248,247,248,248,247,248,249,249,248,248,248,249,249,248,249,248,249,248,248,249,248,248,249,249,248,248,249,248,249,249,251,246,237,247,250,247,249,248,248,248,248,248,248,249,248,249,246,241,246,247,247,245,244,249,248,248,249,244,244,247,247,247,247,247,247,247,246,245,245,246,246,247,245,246,246,246,247,249,245,242,241,246,248,248,248,237,251,252,236,250,231,190,209,236,252,252,249,209,166,152,131,91,47,199,250,250,249,235,252,252,247,191,174,238,251,251,250,247,242,249,252,243,250,191,191,247,252,249,251,251,237,246,232,232,252,252,250,240,252,252,249,231,225,252,252,252,252,252,207,120,102,112,94,98,24,101,248,248,247,246,249,252,250,246,245,246,246,242,244,252,252,252,252,249,250,251,241,251,252,252,241,245,227,178,237,249,250,250,248,251,250,249,251,241,243,252,247,217,220,239,227,190,121,91,93,83,79,84,62,46,49,47,53,89,80,54,49,35,36,35,30,36,52,89,92,61,44,34,39,44,57,83,79,60,63,50,37,191,246,242,251,250,252,251,249,252,244,245,242,241,246,250,242,236,244,252,236,193,230,237,202,219,221,222,201,197,180,168,208,238,246,243,246,246,246,247,246,247,246,246,245,244,245,242,244,242,244,242,245,242,253,240,128,94,57,4,80,224,243,250,252,241,237,228,222,204,211,223,226,227,235,250,186,80,159,228,232,223,229,240,234,236,234,233,233,233,233,234,232,230,230,230,230,229,229,229,227,228,226,226,228,224,223,226,225,226,225,225,224,222,225,223,223,224,222,222,221,222,221,220,221,218,220,222,218,217,216,219,217,216,218,216,217,216,214,219,226,233,246,246,221,185,159,143,135,126,118,122,126,127,113,110,77,59,69,49,49,50,46,51,47,45,47,42,43,20,25,106,163,199,224,209,208,214,215,212,210,212,212,210,210,209,206,208,208,206,206,205,205,205,205,206,205,204,202,203,202,201,203,200,200,201,202,203,203,197,205,113,4,1,5,9,9,9,10,10,10,11,12,12,220,222,219,220,223,221,221,219,222,221,219,219,220,219,219,219,217,217,216,217,216,216,216,214,215,214,214,213,213,214,214,216,217,214,214,214,216,215,217,216,216,215,215,215,214,216,214,217,215,217,216,215,217,216,218,218,218,216,217,221,221,221,221,220,220,220,222,223,221,221,222,219,222,222,219,219,222,222,217,221,219,217,220,221,219,217,218,219,221,219,218,220,219,218,221,221,221,220,219,221,222,221,221,222,224,222,224,225,222,222,225,227,227,226,229,228,227,230,229,232,230,229,232,230,232,232,230,233,232,234,235,234,235,235,234,234,234,235,235,235,233,233,234,234,237,237,236,236,237,237,236,236,236,237,236,239,236,236,237,237,237,235,237,237,236,238,238,238,238,238,239,239,239,239,240,240,239,239,239,239,240,240,241,239,241,241,242,244,242,244,244,244,245,245,245,245,245,246,246,245,246,246,246,246,247,247,247,248,247,247,246,247,248,245,247,247,246,247,245,247,249,248,249,248,247,249,249,249,249,249,247,247,249,248,248,249,249,249,249,249,248,248,249,249,249,249,249,249,248,249,248,249,248,247,249,248,249,248,249,248,249,249,248,250,248,248,249,249,249,248,247,249,249,248,249,248,248,248,249,249,248,248,248,248,248,249,249,249,249,249,250,242,239,249,248,249,249,247,248,248,248,248,248,247,249,250,244,243,249,249,245,243,247,249,247,247,248,247,245,247,246,247,247,246,245,245,247,245,247,246,247,247,247,246,246,247,247,248,247,243,240,242,247,249,247,235,248,239,205,199,198,212,232,249,252,246,249,208,174,169,153,96,53,209,249,249,249,234,252,250,247,190,151,222,250,252,249,247,241,247,249,231,223,201,227,251,252,252,250,250,203,198,222,239,252,252,251,241,252,252,250,229,234,252,251,251,252,252,210,131,122,126,111,99,21,113,248,248,247,245,249,252,250,246,244,248,246,235,238,252,252,252,252,250,250,247,245,251,249,252,248,249,240,181,228,249,250,250,249,252,249,251,250,238,247,252,243,238,252,252,252,252,193,151,132,81,71,102,91,66,71,69,74,65,46,54,46,41,36,33,38,46,66,82,71,53,35,38,47,46,68,81,79,61,62,39,41,207,247,239,252,250,252,250,249,251,244,247,244,239,246,249,246,231,241,252,251,210,218,247,213,195,174,180,223,205,188,214,217,234,246,242,249,247,246,247,247,246,245,246,245,246,245,244,245,243,246,241,246,241,253,232,122,97,48,3,90,226,241,250,252,242,237,230,223,205,207,224,230,230,244,253,169,68,151,226,221,221,232,238,237,237,234,232,231,232,233,232,229,230,231,229,229,225,229,226,225,227,225,226,225,226,226,225,225,224,226,225,223,223,222,222,223,223,221,221,221,220,220,220,221,218,218,221,217,219,218,218,216,214,217,216,217,215,218,226,229,241,226,177,146,116,102,94,91,85,69,74,71,63,66,94,89,65,52,39,36,31,34,33,31,31,32,41,56,62,48,39,10,21,113,174,202,214,211,214,210,211,209,209,210,208,207,210,210,205,206,207,207,208,207,208,207,204,205,206,204,202,203,205,201,202,205,203,205,199,204,113,3,1,5,8,9,9,12,10,10,11,12,12,219,222,220,221,222,219,218,220,220,219,219,221,218,217,218,218,216,217,218,214,217,214,215,216,216,222,216,213,214,215,217,215,216,217,216,216,215,215,214,216,214,213,214,212,215,214,214,217,217,217,217,217,217,217,216,218,219,220,219,217,219,219,222,222,220,221,220,221,222,220,220,222,220,216,219,219,218,219,218,217,221,222,222,218,217,219,220,221,219,217,218,219,219,220,219,220,222,224,223,223,222,222,224,220,221,223,223,225,225,225,225,227,229,229,228,228,227,227,230,229,229,229,229,231,231,229,234,232,229,234,232,234,234,233,234,234,234,233,235,233,236,235,235,233,233,236,234,235,233,234,237,235,234,236,236,237,236,236,238,235,237,238,237,239,237,237,237,238,238,237,238,237,239,238,238,239,238,240,240,239,240,240,240,241,241,241,240,243,244,243,243,244,245,246,246,245,245,245,246,245,247,247,246,248,245,246,245,245,247,245,248,247,245,247,248,249,247,246,248,247,248,247,249,249,247,247,248,248,248,247,248,248,248,248,247,249,249,247,249,249,248,249,248,249,248,248,248,247,248,247,249,248,248,249,247,248,248,247,248,248,247,248,248,249,248,248,247,248,247,247,247,247,247,248,247,248,249,247,248,248,248,248,247,247,249,248,248,249,247,250,249,240,243,249,248,248,248,248,249,249,248,247,247,248,248,247,242,245,251,249,245,242,245,248,247,245,247,246,242,246,247,247,247,246,247,245,247,247,247,247,247,247,245,247,247,247,247,248,248,250,244,239,243,249,247,229,242,213,200,236,236,240,234,236,251,244,249,209,170,169,154,100,57,206,249,249,249,236,252,248,248,205,179,239,252,252,249,250,243,248,245,245,246,223,251,252,250,226,235,239,213,240,240,246,251,250,251,240,252,252,250,224,225,252,250,251,252,252,217,139,123,137,116,110,28,109,249,249,248,247,250,252,248,244,244,248,245,232,246,252,251,251,252,250,248,245,246,251,249,252,248,245,242,187,229,250,250,250,248,250,250,251,249,238,250,252,238,245,252,252,252,252,162,100,87,43,50,92,105,72,97,163,139,90,44,70,84,44,37,50,66,82,94,79,59,45,42,39,44,45,62,81,75,60,63,33,40,208,248,240,252,249,252,251,250,250,243,247,244,239,244,248,249,233,237,252,252,223,204,247,194,144,135,153,232,221,186,229,230,228,245,240,247,248,247,248,245,247,247,246,245,244,246,245,244,242,245,242,245,241,252,225,117,97,47,3,104,230,239,250,250,239,235,224,221,205,205,219,228,230,247,234,148,62,145,226,225,225,230,237,234,236,235,232,234,232,229,231,231,229,229,229,228,229,226,224,226,227,225,227,227,223,224,227,225,224,225,225,224,224,224,223,222,221,223,222,220,223,220,220,222,219,220,218,217,220,217,218,218,217,218,215,215,215,217,225,210,168,137,102,83,73,46,24,22,36,45,41,46,42,46,73,73,63,57,34,34,34,33,35,32,33,43,68,71,67,54,42,24,11,83,159,197,214,207,210,211,209,209,210,207,208,212,209,209,208,209,207,207,208,205,204,205,207,206,206,206,205,205,205,205,204,201,202,206,202,206,112,4,1,4,8,10,9,10,10,11,11,12,12,222,222,222,219,218,218,217,220,220,219,221,219,219,219,217,215,218,217,215,215,214,215,217,216,216,214,215,217,215,216,217,215,214,217,215,214,217,214,217,216,217,216,214,215,217,215,216,217,218,220,216,219,220,219,220,219,221,222,219,219,220,217,218,222,221,222,222,220,221,217,219,218,218,218,218,219,219,221,219,220,218,218,219,218,217,218,219,218,220,219,219,219,223,221,220,220,220,223,221,222,219,220,222,222,224,221,221,221,223,225,224,224,224,226,227,225,227,229,225,229,229,228,229,229,231,229,231,231,230,230,230,229,231,231,231,234,233,234,232,233,234,235,234,233,233,234,235,235,235,233,234,235,234,234,232,236,237,236,236,235,236,234,237,237,237,238,235,237,238,237,237,236,239,237,236,237,238,239,237,239,239,239,241,240,242,243,241,242,242,243,243,243,243,244,244,244,246,245,244,245,246,246,246,246,246,246,244,244,246,247,247,247,247,245,246,249,248,249,247,247,248,249,248,247,247,248,248,248,247,248,249,250,250,248,247,247,247,247,249,249,248,248,249,248,248,248,248,249,247,248,248,248,248,248,247,249,248,248,248,248,248,248,248,247,248,248,248,248,247,250,248,249,247,247,247,247,248,247,248,248,249,249,247,250,249,249,249,248,248,250,248,239,246,252,248,249,248,248,248,247,248,247,247,248,249,246,242,247,249,248,246,245,247,246,247,248,249,247,242,246,247,247,247,246,247,247,247,247,249,247,246,247,245,246,246,247,245,246,248,248,247,243,241,247,246,240,252,243,237,252,252,220,217,242,251,248,249,208,165,156,140,83,50,202,249,249,249,236,251,246,249,228,193,243,252,250,250,249,246,247,247,252,245,236,249,249,225,219,250,251,239,252,252,244,252,251,250,239,250,252,250,221,225,251,250,252,252,252,214,131,115,119,105,103,26,124,249,249,247,245,252,252,248,246,242,252,247,244,252,252,251,251,251,252,245,246,251,252,249,252,248,242,240,181,225,249,250,250,248,252,250,250,245,237,252,249,240,252,251,253,247,153,79,68,95,61,51,84,100,79,123,196,181,131,92,159,139,67,45,69,111,118,137,104,69,51,38,45,45,35,60,79,74,56,65,35,41,213,249,243,252,249,252,250,250,249,243,248,247,238,244,247,251,238,234,247,252,235,198,239,222,142,128,166,239,221,172,227,241,226,245,244,246,250,246,248,244,244,246,245,245,244,246,245,244,244,245,241,246,241,253,214,111,93,34,1,118,234,240,250,252,243,237,229,221,205,204,215,230,231,251,236,131,60,141,228,215,222,235,236,237,234,235,234,230,231,230,229,229,229,229,227,229,225,226,228,225,229,224,224,224,222,223,225,226,224,228,224,221,225,224,223,223,221,223,221,221,222,218,219,220,217,219,217,216,217,217,218,214,215,214,214,215,212,216,213,155,101,66,23,40,48,32,36,36,44,46,46,46,43,45,64,66,63,56,37,40,36,33,37,35,43,63,74,71,65,46,48,18,49,155,193,210,212,207,212,208,210,208,209,210,208,210,210,208,208,208,208,207,207,206,205,207,207,207,207,207,206,205,207,203,206,205,202,207,200,206,113,4,1,4,9,10,8,10,10,11,12,12,12,217,221,220,220,220,218,219,220,217,218,220,218,221,219,218,218,216,216,218,215,218,217,214,217,214,215,215,214,218,214,216,217,215,217,214,214,214,213,213,217,217,216,218,217,218,218,218,217,214,218,218,218,220,220,218,216,220,220,220,221,221,220,222,218,220,222,219,220,220,220,220,220,220,221,219,219,220,218,218,217,218,217,217,218,220,218,218,219,219,220,220,220,221,222,220,221,220,219,219,220,220,221,221,220,219,222,222,222,223,224,226,224,225,225,226,226,227,227,227,227,228,229,229,229,230,228,230,229,230,232,231,232,234,233,234,232,232,232,234,233,234,234,233,234,235,236,233,235,235,233,233,233,235,237,234,235,236,235,235,234,236,234,235,237,236,236,237,236,237,237,238,239,237,237,237,240,238,237,238,238,239,240,241,241,241,241,241,242,244,243,245,244,243,244,243,244,245,244,246,244,245,246,245,247,247,247,247,247,247,245,247,247,248,249,246,247,248,248,249,248,248,247,247,247,247,247,248,247,248,249,248,248,248,248,247,247,248,248,248,248,248,247,248,249,247,249,249,247,249,248,248,248,248,247,247,248,248,248,248,248,248,247,247,247,247,248,247,247,247,247,248,248,248,248,247,247,248,248,249,247,247,248,248,248,249,248,247,249,247,251,244,237,249,249,249,249,249,249,248,248,248,249,247,249,249,241,244,248,248,247,243,248,247,246,247,245,249,249,244,244,247,247,247,246,245,245,247,246,247,246,247,247,246,247,247,247,248,247,247,247,249,249,243,244,242,245,252,240,241,241,201,212,245,251,252,242,249,202,152,155,134,84,51,198,249,249,248,238,252,247,249,219,174,214,248,251,249,249,246,250,240,252,222,176,216,239,249,249,252,250,242,252,242,237,252,251,250,241,249,252,250,227,234,252,250,251,252,252,210,120,99,108,99,103,24,117,248,248,247,245,249,252,249,244,244,252,242,235,252,252,250,250,251,250,242,247,252,252,250,252,250,235,239,164,172,246,250,251,249,249,250,250,240,241,252,243,243,252,223,234,174,92,81,81,136,121,71,54,68,45,79,170,151,116,87,157,128,48,40,39,72,100,137,118,72,50,42,48,44,47,68,78,71,57,66,32,51,219,249,248,252,248,252,249,250,249,241,247,247,239,240,244,251,240,234,245,251,246,199,230,250,192,164,176,225,244,205,223,248,224,237,244,239,248,245,246,246,244,245,245,244,245,245,246,247,245,247,242,247,242,252,207,107,89,31,0,125,236,239,250,249,240,234,222,220,207,204,212,226,235,247,229,128,58,137,230,217,222,231,237,236,235,232,229,232,229,228,230,229,228,229,226,224,226,225,225,226,224,222,224,224,224,225,226,226,224,224,224,224,223,220,222,220,220,220,218,218,220,220,217,220,217,217,218,214,216,214,217,215,212,215,214,213,215,215,214,153,92,65,21,15,25,47,66,57,55,49,47,47,41,58,79,73,63,55,41,36,32,33,35,30,50,82,80,75,56,49,27,61,180,221,216,214,214,216,212,209,210,206,207,208,209,208,208,208,207,211,208,208,209,206,208,209,211,208,208,207,206,206,206,203,202,203,204,205,198,205,113,3,1,5,8,9,9,12,10,10,12,11,11,220,221,218,217,219,218,215,217,217,217,220,216,216,219,220,217,218,217,216,217,218,218,217,218,218,217,216,216,217,215,217,216,217,215,217,218,215,217,216,216,217,217,217,218,218,217,219,218,219,219,217,218,220,218,217,217,217,219,220,220,218,221,219,218,221,219,219,220,221,222,222,222,222,222,220,220,219,219,219,218,220,220,218,219,218,220,219,216,219,216,220,220,221,219,218,222,220,221,220,221,222,221,222,224,222,222,225,224,223,222,226,227,226,227,227,226,228,229,227,229,227,229,230,230,229,230,232,229,229,233,231,230,232,232,231,232,233,233,232,231,233,233,234,235,234,232,233,232,233,234,232,235,236,235,235,233,235,236,234,235,237,236,237,236,237,238,236,239,239,236,237,238,238,237,239,237,238,239,239,239,239,240,241,241,241,241,242,243,242,244,244,244,244,244,244,245,244,244,246,246,246,246,245,246,247,247,247,247,246,248,247,245,249,248,248,249,247,247,247,248,248,249,248,248,249,249,249,247,249,249,248,249,248,248,249,249,249,248,249,250,249,249,249,248,248,248,248,249,249,248,248,248,248,247,247,248,247,248,248,248,247,247,248,247,248,248,247,247,248,248,247,247,247,248,249,249,247,248,249,249,248,248,248,248,249,247,247,247,249,251,241,241,251,249,249,247,249,249,248,249,247,247,248,248,246,241,246,248,248,244,245,248,247,246,245,245,248,249,247,245,247,248,247,246,246,246,245,245,248,246,247,248,248,248,247,249,247,248,247,248,248,249,248,244,235,233,253,214,185,199,218,236,249,250,251,243,249,209,166,169,144,85,60,202,249,249,248,240,252,249,247,217,163,210,251,250,250,249,251,246,237,235,181,194,246,252,252,252,250,241,211,232,236,239,252,251,251,240,247,252,250,229,236,252,250,251,252,252,207,129,117,113,103,99,21,122,249,249,247,245,252,252,249,246,245,251,215,190,237,249,251,251,252,250,237,249,252,250,250,251,251,240,243,152,163,245,250,252,249,248,251,249,239,243,252,227,235,217,156,218,180,170,150,60,120,109,73,46,54,51,67,127,131,119,63,133,109,47,52,27,65,81,106,113,80,51,44,41,39,57,81,77,73,59,64,32,49,222,249,249,251,248,252,249,249,248,243,247,247,240,240,244,251,245,233,243,251,251,204,213,252,213,162,172,211,241,214,215,249,205,202,232,233,249,249,248,246,243,247,245,244,244,244,244,245,243,244,243,246,245,252,200,104,87,24,1,143,236,242,250,250,241,231,224,219,211,206,212,226,229,249,224,127,56,135,234,211,224,232,231,238,232,233,230,227,229,229,227,228,227,225,226,227,226,223,225,223,225,224,223,226,224,226,226,223,222,223,224,223,224,219,220,221,219,222,218,218,217,218,221,216,214,217,214,217,217,214,216,214,215,213,214,214,211,218,216,170,132,115,77,72,77,66,77,79,67,55,49,50,49,59,72,67,62,58,44,39,34,31,34,41,73,86,86,70,60,44,40,148,217,226,217,214,217,212,212,206,207,207,206,210,209,211,210,208,209,208,210,208,208,208,208,210,210,208,211,206,203,206,206,202,202,203,200,204,201,208,113,2,1,5,8,9,8,12,10,10,11,12,12,219,222,218,217,217,218,217,217,217,219,218,217,220,216,217,217,215,217,218,215,216,218,217,218,217,217,217,217,217,214,216,216,216,218,216,217,218,217,217,217,218,216,220,219,217,218,218,219,221,220,218,220,220,220,219,220,221,220,220,218,219,218,219,218,218,220,221,222,221,221,221,220,219,221,220,221,219,217,221,219,219,220,218,217,218,221,220,218,219,217,217,218,218,219,220,218,218,220,220,221,220,221,222,220,222,223,222,222,224,224,223,222,223,224,223,226,225,225,227,224,226,225,226,226,230,230,229,231,230,231,231,232,232,229,232,232,232,232,232,232,233,232,235,234,232,233,232,232,232,230,231,232,230,232,234,232,235,237,237,237,236,236,237,236,236,237,236,237,239,239,237,239,240,240,238,239,239,238,239,239,240,241,241,241,242,242,243,243,242,243,245,243,244,245,244,245,245,243,244,245,245,247,245,246,245,245,246,247,247,245,247,247,247,248,247,246,249,249,248,248,247,247,248,248,248,247,248,248,249,248,249,250,250,249,248,248,248,249,249,249,249,247,248,250,248,248,248,248,248,248,247,248,248,247,249,249,248,249,249,249,249,248,248,248,248,249,248,248,247,248,248,248,248,247,248,247,248,247,247,248,248,248,249,249,250,250,249,248,249,249,239,244,252,247,248,249,249,249,250,250,248,249,249,250,245,242,249,249,247,244,245,249,246,248,245,245,252,252,252,252,251,248,245,246,246,245,247,246,247,248,247,247,245,247,247,247,247,247,247,247,249,249,248,248,226,229,239,190,209,240,249,220,218,241,249,244,249,211,173,177,150,101,57,195,249,249,248,242,252,249,249,235,181,208,252,252,249,249,250,250,218,209,214,243,251,251,246,244,249,208,199,241,237,243,252,252,252,240,248,252,250,230,235,252,253,253,252,252,231,160,137,134,105,95,15,113,248,248,249,249,252,252,251,250,251,251,206,203,243,248,251,251,251,246,240,252,250,251,250,251,252,239,250,191,176,248,252,252,250,248,252,246,235,248,248,207,218,207,207,252,193,186,167,86,134,117,85,53,51,47,77,163,134,98,62,147,114,51,64,56,106,96,94,101,71,51,39,42,36,56,91,73,73,59,59,32,45,214,249,249,251,246,252,249,250,248,242,247,249,244,239,241,249,246,233,240,250,252,222,205,252,241,169,165,195,225,215,199,238,199,179,215,224,245,249,249,247,244,245,245,244,244,243,244,245,245,249,250,252,252,251,208,125,101,33,7,151,238,248,251,252,234,234,226,218,211,206,214,220,232,248,223,126,53,132,231,214,226,232,236,233,232,234,230,230,229,227,229,226,226,228,225,226,225,224,223,225,223,222,226,224,223,222,223,223,222,224,219,221,222,220,220,219,221,219,220,217,217,218,214,214,214,215,214,214,217,215,213,214,212,212,213,212,212,218,232,202,168,151,116,107,88,71,71,89,87,72,65,50,57,66,69,57,54,53,39,38,43,49,58,69,85,80,65,66,50,44,11,74,190,208,221,220,210,210,208,212,207,207,211,208,210,210,208,210,209,210,209,208,211,210,208,206,209,206,206,206,205,205,204,205,205,204,202,205,200,206,112,4,1,4,8,10,8,10,10,11,11,11,11,217,219,218,217,220,215,216,219,219,218,217,216,216,217,217,214,217,215,217,216,216,218,215,217,216,215,217,215,217,217,218,217,215,216,214,215,215,218,217,215,219,219,220,219,218,220,219,221,222,221,221,222,223,220,220,222,221,221,217,221,220,220,220,220,221,219,221,221,221,219,217,219,221,221,220,222,220,218,223,218,218,219,218,221,219,219,217,218,220,217,220,219,218,221,218,221,220,219,221,220,220,221,219,222,220,221,222,222,223,222,223,223,224,224,226,225,226,226,224,227,226,225,224,228,228,227,227,228,230,230,229,228,231,231,230,232,230,230,234,233,232,231,231,231,231,232,232,233,233,233,232,234,233,232,233,232,234,234,237,236,235,237,236,236,237,237,237,237,238,239,240,239,237,239,241,239,237,237,238,239,239,240,241,241,242,243,242,242,242,243,243,244,245,243,244,244,244,244,245,244,245,245,246,246,246,247,246,247,246,247,249,249,249,247,247,248,249,249,249,248,248,248,248,248,248,248,248,248,250,250,248,248,247,247,249,248,249,248,248,248,248,247,248,248,249,249,249,249,248,249,248,247,248,248,249,249,249,248,248,249,247,249,249,247,247,247,247,248,249,248,247,247,247,249,248,248,247,248,249,249,249,248,249,249,249,248,249,249,250,245,236,245,249,248,249,249,247,249,247,248,248,248,249,248,242,246,249,248,245,244,248,248,247,248,247,248,249,245,252,245,240,247,247,246,246,245,246,246,248,245,247,247,247,249,247,247,247,247,247,247,249,249,249,250,237,239,252,212,230,250,220,203,227,247,252,243,249,210,168,167,138,88,51,189,249,249,247,242,250,246,249,243,180,192,246,250,249,247,252,249,245,245,229,250,249,249,198,187,242,237,241,252,247,247,251,250,251,240,246,252,250,239,244,253,253,253,253,253,230,153,127,122,108,92,16,115,248,248,250,250,252,252,252,252,252,252,239,244,252,252,250,250,252,244,244,252,248,250,251,250,253,244,250,188,161,245,252,252,250,246,252,243,236,252,242,229,251,232,250,253,191,250,186,91,127,79,67,43,57,60,81,178,148,70,31,152,122,57,61,46,113,101,69,56,44,48,46,39,36,61,93,81,71,55,64,30,41,211,249,249,250,244,252,248,250,248,243,247,249,247,239,243,247,249,236,237,249,252,234,202,244,252,206,172,199,205,214,220,229,202,186,211,217,239,250,247,247,245,245,246,245,246,244,246,252,252,253,253,252,252,251,227,139,103,31,10,172,241,250,250,253,253,240,229,218,212,205,206,223,227,249,222,127,52,131,229,206,229,236,231,236,230,235,229,229,230,229,227,229,228,226,226,225,225,222,226,222,221,223,222,224,221,225,224,222,224,222,220,219,220,219,219,219,218,218,216,219,215,215,217,216,215,216,214,215,215,212,214,214,214,212,213,213,212,218,229,210,163,129,107,99,80,53,61,80,83,81,107,118,113,102,71,62,60,48,42,75,112,111,104,106,107,76,64,60,48,29,24,120,179,197,218,214,214,208,210,210,208,212,210,209,210,213,212,210,212,212,211,209,210,212,207,207,210,206,208,207,206,205,206,204,204,205,202,206,200,204,113,4,1,4,8,10,8,10,9,11,11,11,11,220,220,218,220,218,217,217,217,218,216,218,217,215,218,219,218,217,217,217,217,217,218,218,216,217,215,214,217,216,216,217,217,217,215,215,215,215,215,215,214,218,217,218,220,218,219,219,219,220,220,219,219,220,220,219,219,218,216,219,218,217,218,219,217,219,218,220,219,220,219,219,222,221,221,219,223,220,220,221,217,220,220,218,219,217,218,217,217,220,219,220,220,218,220,220,220,221,221,217,220,220,220,222,219,222,221,220,221,221,222,222,221,223,224,223,226,223,226,229,225,228,228,228,227,228,229,226,230,230,227,230,230,230,231,232,230,232,232,230,232,230,229,231,228,231,235,232,231,233,232,232,234,233,234,236,231,235,235,233,236,234,235,237,235,234,237,236,237,238,238,237,239,240,239,238,238,237,236,236,238,239,239,241,241,242,242,243,243,244,243,244,244,244,245,244,244,242,243,245,245,244,245,245,245,245,245,247,247,247,245,248,249,248,249,248,247,248,247,247,248,247,248,247,247,247,248,249,249,248,248,249,248,247,249,249,249,248,248,249,247,248,249,248,249,248,249,247,248,249,248,248,248,249,248,248,247,248,248,248,249,248,248,247,248,248,248,248,249,247,247,247,247,247,248,249,248,248,247,248,249,248,247,248,249,250,248,249,249,249,243,239,249,249,248,248,248,249,247,248,247,248,249,247,244,242,247,249,247,244,246,248,248,247,248,248,246,239,186,172,205,234,249,247,248,245,245,249,248,247,247,246,247,249,249,248,247,247,247,247,247,247,248,250,251,238,252,245,195,207,204,226,231,249,252,249,242,249,203,153,152,124,83,42,186,249,249,247,240,247,246,249,242,190,184,239,250,250,250,252,252,241,230,208,217,239,239,224,239,252,242,252,252,245,248,248,249,251,240,249,252,250,246,237,251,245,243,251,232,151,103,109,122,126,123,57,144,247,247,250,250,253,253,252,252,252,252,244,253,252,252,250,250,250,240,247,252,248,249,249,248,252,237,248,194,144,238,252,252,250,246,252,237,241,252,236,243,252,249,252,247,211,253,202,103,78,32,32,27,60,48,69,194,154,68,39,148,105,47,64,38,109,101,69,54,39,48,45,39,32,44,75,72,73,57,64,36,42,211,249,249,249,243,252,248,250,246,244,249,248,250,241,242,245,249,241,234,245,252,248,200,221,250,235,184,166,158,201,230,222,211,211,213,214,236,248,248,248,247,244,243,244,244,247,252,252,252,253,253,252,252,248,181,99,69,13,34,186,235,212,214,234,234,232,226,215,210,208,208,219,229,247,212,126,51,134,227,208,230,232,234,234,233,231,227,227,225,227,227,224,225,226,224,223,222,224,222,222,224,224,220,220,222,223,225,222,222,222,218,221,220,218,217,217,217,216,216,214,215,214,213,214,214,215,212,212,213,209,211,211,212,214,212,210,210,213,212,166,122,105,92,105,97,88,70,78,71,96,148,141,145,123,109,95,71,49,54,125,150,139,124,123,122,108,83,60,39,31,135,220,221,210,210,213,211,210,208,209,210,211,211,212,211,209,211,212,210,210,208,208,207,208,208,207,209,207,208,208,206,207,205,206,203,204,204,204,199,205,113,2,1,5,8,8,8,12,10,10,11,10,10,217,222,217,219,219,215,219,219,219,218,216,219,220,215,216,214,217,215,215,217,215,217,217,217,218,217,218,218,220,216,215,219,217,218,218,218,217,217,216,216,218,215,219,218,217,221,220,221,219,220,219,217,218,218,220,217,218,218,216,219,218,218,217,217,219,218,220,218,218,220,219,221,221,220,218,220,218,217,221,215,220,220,216,219,216,219,218,220,221,218,220,219,221,219,219,221,220,217,221,220,217,220,220,221,219,221,221,222,220,222,222,223,222,222,224,222,224,223,224,226,226,229,227,228,227,227,228,227,229,230,230,229,231,228,229,230,229,230,229,230,231,232,232,229,230,232,229,229,230,230,232,233,232,232,233,232,232,231,232,234,233,233,234,236,237,237,237,239,239,238,238,239,238,237,237,236,237,237,238,237,239,239,241,240,240,242,241,243,243,243,244,243,243,242,245,244,242,244,244,244,246,245,246,245,246,246,245,247,247,247,246,247,246,245,246,247,249,249,248,247,249,248,248,248,248,248,248,250,249,249,249,249,248,249,249,249,249,249,249,248,248,249,248,248,248,248,249,248,250,248,247,248,249,249,249,249,248,248,248,248,248,248,247,247,248,248,248,248,247,249,248,248,247,248,247,249,248,249,248,247,247,247,250,248,247,248,248,248,249,239,242,250,247,248,248,248,249,248,247,248,249,249,249,243,243,248,248,247,244,248,249,248,247,247,248,249,232,169,185,229,240,249,246,246,246,247,248,247,247,246,247,247,248,249,247,248,248,247,247,247,249,249,248,249,234,252,225,154,209,247,253,241,237,249,247,243,249,204,164,162,136,82,48,202,250,250,247,238,248,247,249,242,188,196,244,250,250,247,252,250,238,184,147,210,252,252,245,252,252,237,217,234,241,250,250,245,252,242,246,252,250,250,205,134,82,80,91,99,107,118,135,143,155,165,128,150,198,190,189,188,196,192,177,170,183,157,201,250,251,251,251,251,249,240,249,251,249,251,251,248,252,237,239,222,165,227,252,251,250,248,252,233,245,251,231,249,252,249,254,206,200,253,127,39,59,27,30,29,61,47,63,188,164,81,35,130,107,61,64,39,112,101,71,55,41,48,50,37,34,34,49,71,75,60,66,32,46,215,249,249,249,242,252,247,250,248,244,249,247,250,245,245,248,251,244,233,244,252,251,203,196,249,239,146,117,139,190,240,201,182,232,228,216,244,248,250,248,246,247,244,245,243,251,251,252,195,126,104,87,92,99,79,66,63,38,39,85,132,142,144,197,222,220,222,211,213,212,207,220,225,249,229,129,50,139,226,202,231,234,233,237,229,233,228,228,227,226,226,228,226,226,225,225,225,221,223,221,222,223,222,225,222,222,221,222,223,221,219,222,220,217,219,217,215,215,215,215,214,215,216,214,213,216,214,213,214,211,211,212,211,210,214,209,213,211,215,182,127,94,84,110,112,96,74,63,47,83,137,132,128,128,117,94,64,49,55,113,131,116,102,105,118,102,82,68,34,40,159,223,222,223,215,213,212,212,210,208,209,212,211,212,213,212,213,211,212,213,212,212,210,212,210,206,209,208,208,209,207,206,206,205,206,205,204,206,201,206,113,2,1,5,8,9,8,12,9,11,12,10,10,215,220,218,218,220,215,217,217,219,217,216,218,214,215,214,214,214,213,214,217,215,215,219,218,217,215,218,218,217,217,216,215,215,217,217,217,216,217,217,217,218,217,217,218,218,220,220,221,217,219,222,217,220,219,215,217,219,218,217,218,219,219,218,219,218,215,220,218,220,218,219,220,218,220,219,221,217,220,220,217,220,218,216,216,217,219,220,220,218,219,221,219,218,220,218,216,217,219,218,220,218,219,222,218,221,221,220,221,219,221,222,221,222,221,222,223,223,225,224,222,227,226,225,228,229,227,224,229,227,226,229,228,229,229,229,229,230,229,228,229,229,230,232,230,230,229,229,232,230,233,233,232,231,231,233,232,232,233,233,234,234,235,236,235,236,237,237,237,238,238,237,239,239,238,239,237,237,237,239,239,239,239,238,239,240,240,241,241,242,241,242,242,243,243,243,242,242,244,244,244,244,244,244,245,245,245,245,246,246,245,246,247,246,247,246,245,247,247,247,247,247,247,248,247,248,249,248,248,249,249,249,249,248,247,247,248,247,248,248,248,247,247,248,249,248,248,248,248,249,248,248,249,248,248,248,248,249,248,248,247,247,249,247,247,248,248,249,247,247,248,247,248,247,249,247,248,249,249,248,247,248,248,248,247,247,247,247,249,246,237,244,249,248,249,247,247,247,247,248,248,248,248,246,241,247,248,248,244,245,249,247,247,245,246,247,248,250,243,252,251,247,247,245,247,247,247,247,246,245,246,248,248,247,247,247,247,247,248,247,247,246,249,249,246,230,252,238,205,230,251,246,191,224,249,247,245,249,209,171,173,145,95,65,219,251,251,248,238,246,246,249,249,207,190,240,250,249,250,252,252,227,179,206,248,251,251,238,252,247,174,182,232,235,252,248,243,252,242,244,252,249,251,178,95,66,57,83,84,105,131,135,141,139,150,129,132,113,88,88,94,100,87,86,77,63,53,173,242,250,250,251,251,248,244,252,250,250,249,250,248,252,244,247,225,156,218,252,252,250,250,252,230,249,246,243,252,251,251,199,113,183,186,99,69,61,65,36,24,79,47,62,197,162,65,36,131,98,66,70,39,108,93,58,48,42,49,49,36,34,33,45,72,79,57,67,34,45,214,249,249,249,240,252,247,250,248,243,249,248,251,246,241,248,251,249,232,240,248,250,216,184,238,238,162,125,170,217,244,202,180,244,243,218,231,244,251,249,247,247,245,244,248,251,251,165,84,55,23,6,6,9,11,36,56,49,59,46,22,69,56,140,200,206,215,201,207,210,211,220,229,248,234,136,53,145,224,206,234,232,235,232,229,232,226,229,227,224,227,227,227,227,225,225,222,222,221,219,221,222,226,225,224,225,222,220,222,222,219,222,220,217,218,215,217,217,216,215,218,218,217,216,215,214,212,213,214,213,214,211,214,212,210,211,211,214,229,217,167,123,86,105,118,103,69,56,42,70,118,119,131,125,112,81,61,41,47,112,111,104,90,99,107,94,78,59,33,29,139,198,208,224,213,214,210,210,213,213,211,213,211,211,215,213,212,212,211,214,211,211,210,212,212,211,212,210,211,209,208,208,205,205,205,203,205,207,202,207,113,3,1,4,8,10,9,10,9,11,11,11,11,219,221,219,219,221,217,218,218,217,220,215,217,216,214,217,217,217,214,217,217,216,218,214,215,219,218,219,215,218,217,218,218,217,217,217,217,217,218,218,217,217,218,220,217,219,221,219,220,219,220,218,220,219,217,217,216,220,220,218,220,219,219,220,220,220,220,220,220,222,220,222,222,223,223,221,221,219,221,220,217,220,217,219,222,218,222,219,219,219,219,222,219,222,222,221,221,222,218,220,220,219,222,221,222,221,222,223,223,220,222,221,221,223,223,224,221,224,224,224,226,227,228,227,229,229,227,229,229,229,229,227,229,231,229,231,231,231,230,230,230,229,230,232,232,232,233,233,232,234,232,232,234,232,234,234,234,235,233,234,235,232,234,235,234,237,236,235,235,237,237,237,240,237,239,241,239,240,239,240,240,240,241,242,241,241,243,241,243,241,242,242,241,243,241,242,242,241,243,244,244,245,245,245,245,245,246,246,247,247,247,245,246,245,246,249,249,248,248,247,247,248,247,248,248,249,248,248,247,248,249,249,249,248,249,248,249,248,248,249,249,247,249,249,249,248,249,249,248,249,248,249,249,249,248,248,248,247,249,249,248,248,249,248,248,248,247,249,249,247,248,248,248,249,249,250,249,247,249,248,248,249,249,249,248,248,248,248,250,245,237,248,250,248,249,247,247,249,248,247,247,248,249,244,242,248,249,248,245,246,248,247,248,246,246,247,250,252,252,252,252,241,244,245,249,247,247,247,244,245,245,247,246,247,248,247,247,247,247,247,246,247,248,250,245,237,252,243,211,224,202,213,219,241,252,246,244,249,207,172,162,138,80,61,218,251,251,248,238,248,244,250,247,205,188,220,249,250,250,252,246,243,204,225,249,248,243,181,208,239,220,224,242,244,252,251,242,251,244,243,251,251,251,180,102,93,84,85,84,80,84,80,81,78,77,67,79,80,71,64,84,94,71,81,75,66,47,149,241,251,251,250,250,246,247,251,249,252,249,250,247,252,250,247,238,157,196,250,250,249,249,248,231,252,237,248,252,235,227,145,115,210,237,160,81,66,84,64,57,95,65,59,168,151,75,48,131,99,56,66,71,127,86,55,45,42,51,48,35,36,26,43,75,70,56,67,34,47,211,249,249,249,242,249,249,250,248,245,250,249,250,248,241,245,249,252,236,239,248,249,233,184,220,251,221,166,203,222,246,245,211,252,243,179,202,233,248,252,246,248,244,245,252,253,197,106,65,52,59,48,28,24,39,54,61,63,63,55,53,33,2,115,196,200,211,192,202,206,204,219,225,251,249,143,58,152,222,203,233,232,235,234,228,234,224,226,229,224,226,227,224,225,224,223,223,223,224,222,223,223,225,225,223,225,223,221,221,222,220,220,219,218,221,217,216,217,215,216,216,215,215,214,215,214,213,212,214,212,210,213,209,208,210,210,216,213,229,206,161,129,101,116,109,92,69,53,35,80,121,107,115,96,86,78,62,45,52,108,105,104,107,91,82,69,60,63,23,58,174,193,207,218,211,213,209,211,213,212,212,216,213,213,214,213,214,211,212,213,210,213,212,213,211,208,214,210,209,210,207,208,206,205,203,204,205,208,205,206,112,4,1,4,7,10,8,10,10,10,11,11,11,216,220,219,217,221,218,216,217,217,216,214,217,216,217,217,215,217,217,216,217,214,215,217,214,215,217,218,215,216,215,214,217,217,214,214,213,215,217,214,218,217,215,218,217,217,218,219,219,219,217,220,217,218,218,218,221,217,221,219,217,220,221,220,219,219,217,221,219,218,220,222,222,219,220,219,220,219,222,220,218,222,222,220,221,220,220,220,220,219,218,220,222,222,222,224,222,220,220,220,221,219,220,222,222,223,220,220,223,219,222,221,222,222,221,224,221,223,225,225,225,225,225,226,227,227,227,227,230,226,228,230,226,228,227,227,227,229,229,230,229,231,230,229,228,229,232,230,233,230,231,231,230,231,232,230,232,233,235,235,232,233,234,232,234,234,232,235,235,236,235,235,236,239,237,236,238,237,237,239,238,239,240,241,241,240,241,243,242,241,243,242,243,242,241,243,242,242,244,243,244,245,244,244,245,245,245,245,246,245,243,246,247,246,247,248,246,248,249,248,247,247,248,248,249,248,248,248,248,248,248,248,249,250,248,250,248,248,248,248,248,247,248,248,247,248,247,248,248,247,248,247,249,248,248,248,247,248,247,248,248,248,248,247,248,249,248,248,247,248,249,248,249,247,247,247,247,247,247,247,247,248,248,248,249,249,250,249,249,240,240,250,249,248,248,249,249,247,248,247,246,249,249,242,245,250,247,245,242,247,247,245,247,246,245,246,247,247,247,249,242,239,242,243,246,246,246,245,245,245,246,245,245,245,248,247,246,247,248,247,247,247,247,249,243,237,252,219,179,199,214,232,223,252,252,246,243,249,201,152,145,122,75,56,215,251,251,248,236,245,242,249,246,204,177,218,249,250,250,252,248,248,198,198,235,239,243,208,230,253,223,244,252,241,252,251,243,251,245,243,249,251,251,176,114,102,73,57,47,56,51,45,47,41,48,42,45,41,48,37,60,77,45,55,50,52,45,38,135,227,246,248,246,244,248,248,250,251,248,249,249,252,252,238,230,162,191,251,251,250,250,240,233,247,209,228,171,141,193,184,212,253,253,181,109,73,94,62,55,98,60,72,168,148,60,39,113,77,57,66,77,125,78,48,42,47,57,44,42,42,24,42,72,72,56,61,27,53,213,249,249,248,242,249,248,251,248,244,250,248,250,250,242,244,247,251,239,236,245,250,242,186,209,252,242,174,198,224,233,247,215,244,234,167,180,214,243,250,246,244,243,243,252,250,152,85,67,68,79,79,62,57,61,59,63,61,60,61,68,40,20,151,215,210,218,190,197,195,203,221,226,252,244,151,63,150,213,207,234,228,233,231,230,230,225,228,226,225,226,222,223,225,220,221,221,222,221,222,224,222,223,223,221,222,219,219,220,219,218,218,217,218,218,215,216,215,213,214,213,211,213,213,213,213,211,212,209,210,212,211,213,208,211,209,210,212,225,202,147,116,77,73,61,53,55,55,42,81,131,122,110,77,62,50,53,39,51,101,103,113,107,85,66,55,50,49,17,73,184,200,214,217,210,213,210,210,212,212,211,211,211,215,214,212,212,213,211,212,212,212,211,211,211,210,208,208,210,206,206,207,205,205,204,204,202,205,201,208,113,2,1,5,8,8,8,12,10,11,12,12,10,215,218,218,215,219,216,215,216,215,216,217,215,214,215,213,213,215,212,214,215,215,220,216,217,216,215,217,214,219,217,216,215,214,217,217,216,214,214,215,217,217,217,219,217,217,217,218,218,217,219,217,220,219,219,222,220,220,219,219,221,220,217,220,220,220,220,219,220,219,219,220,219,222,220,221,221,220,221,218,219,221,220,221,221,221,223,221,222,225,225,223,222,221,221,221,222,224,219,222,222,222,221,220,222,220,220,218,221,221,220,222,222,218,220,222,222,225,224,224,225,225,224,226,227,225,226,226,225,226,229,229,229,228,226,227,229,230,229,230,228,231,231,229,229,229,229,230,230,231,231,231,234,234,233,233,231,232,233,232,234,234,232,233,236,235,232,232,233,235,234,235,237,236,237,239,237,237,237,237,239,240,240,241,240,240,241,240,241,241,242,243,242,242,243,244,244,244,243,244,244,246,246,244,245,245,246,245,245,246,245,246,247,248,248,246,247,248,249,247,247,248,247,249,249,248,247,248,248,248,248,247,249,248,248,249,249,250,251,249,248,248,249,248,248,248,247,247,247,249,248,249,248,248,248,248,249,248,249,247,247,249,249,248,248,248,247,248,247,247,247,247,248,248,247,248,247,247,249,249,247,247,247,248,247,248,247,248,248,238,242,249,248,249,249,249,250,248,248,247,247,249,244,242,247,249,248,245,245,247,247,247,247,245,246,248,246,247,245,247,247,242,242,242,245,245,244,247,245,247,247,245,247,246,248,248,247,247,247,248,247,248,248,249,238,230,247,189,201,244,243,240,207,235,251,244,244,249,194,160,154,131,72,72,232,252,252,248,236,246,242,250,249,213,191,221,249,250,249,252,245,238,164,177,242,251,250,222,248,252,231,245,252,242,252,251,241,250,246,241,248,251,251,174,120,105,71,55,56,60,62,60,61,53,57,53,53,49,55,36,61,85,43,48,52,68,22,31,155,240,248,249,244,245,249,249,250,249,247,250,249,252,251,223,203,149,188,250,250,249,249,235,240,237,190,208,187,206,245,214,246,252,179,160,157,118,113,73,45,94,79,65,151,161,89,42,105,96,78,64,61,105,62,51,50,48,57,44,57,53,26,49,74,70,57,61,26,51,217,249,249,248,246,246,247,252,247,246,249,248,248,250,243,243,247,251,245,234,242,250,250,196,194,246,245,171,186,226,227,237,204,225,228,188,189,201,238,252,248,246,244,245,252,229,123,92,85,88,91,87,71,46,44,49,53,50,55,46,55,39,35,187,234,225,231,195,200,202,208,212,201,240,251,156,68,151,205,202,235,226,231,229,225,233,227,226,226,223,225,225,224,225,223,222,220,222,221,221,223,223,223,222,222,221,217,218,220,217,216,216,217,216,214,215,217,217,214,215,214,212,213,209,214,212,210,212,210,212,211,213,211,210,214,210,212,209,224,199,137,103,69,71,57,44,49,45,41,101,143,131,120,87,68,46,44,39,67,108,101,112,99,78,73,55,46,39,13,103,193,200,217,215,211,212,211,213,213,212,212,212,214,214,213,212,213,212,211,214,210,212,210,212,212,209,211,207,208,208,206,207,206,204,204,207,206,207,203,207,112,3,1,5,8,9,8,10,10,10,11,12,12,216,216,216,214,214,214,213,217,215,213,214,215,214,214,216,214,214,214,215,214,214,216,217,215,216,219,216,217,220,217,219,218,218,215,218,216,215,217,216,220,218,217,219,216,219,218,219,220,219,217,217,219,220,220,220,221,218,222,222,219,221,221,221,221,221,221,220,222,221,220,222,222,219,220,221,219,220,221,222,222,218,220,220,223,222,221,224,222,222,223,223,222,222,221,223,221,221,224,221,222,220,221,219,217,221,219,220,222,219,224,222,221,223,222,223,223,227,224,225,227,227,224,224,227,225,226,229,229,228,228,227,229,229,229,229,229,229,228,229,228,229,229,229,230,231,231,231,232,231,231,232,233,231,233,233,230,233,233,232,232,235,234,232,232,232,233,235,236,236,236,236,237,236,237,237,237,237,237,237,239,242,241,241,241,241,241,241,242,242,243,241,242,242,242,245,243,242,243,245,244,244,245,245,245,245,246,245,245,245,245,245,247,246,247,247,246,249,249,249,248,247,246,247,248,248,248,248,248,248,248,249,248,247,248,248,248,249,248,248,247,249,248,248,247,247,248,248,248,247,247,249,249,249,248,248,249,248,249,248,248,248,247,248,247,248,247,247,247,247,247,247,247,247,247,248,247,249,249,248,248,249,248,248,249,248,247,249,247,237,246,249,247,249,249,249,247,247,247,248,249,249,241,243,248,248,246,244,248,249,247,247,247,246,247,247,247,246,245,247,247,244,242,243,242,243,245,246,247,247,246,246,246,247,247,247,247,247,247,247,245,249,249,251,235,236,252,218,231,253,253,221,188,226,245,245,239,250,198,169,169,144,84,84,240,252,252,249,236,245,242,250,251,228,184,197,246,251,251,251,236,214,189,215,249,250,248,234,252,253,225,232,244,240,252,251,245,249,246,241,246,251,251,165,125,107,72,50,57,66,70,76,64,64,68,61,61,57,63,53,87,92,70,95,79,75,25,114,241,252,252,248,241,248,249,247,248,247,247,248,249,250,252,214,218,155,141,244,249,249,249,231,246,236,241,252,241,252,252,242,220,141,106,178,242,180,136,107,89,108,66,61,164,181,101,80,129,115,115,72,70,107,63,65,61,51,55,41,57,57,39,59,72,70,55,62,28,47,208,249,249,248,247,246,244,250,247,247,249,249,249,250,245,242,246,252,247,232,241,252,252,206,188,232,248,191,171,229,214,195,207,218,223,226,212,193,230,246,249,246,252,252,252,230,116,94,83,87,108,94,59,39,35,37,41,41,49,35,52,31,35,192,234,234,234,217,222,222,212,228,222,250,231,126,73,167,204,211,232,225,234,227,227,227,224,227,224,223,225,224,223,224,222,224,221,221,221,222,224,223,222,220,221,221,220,219,218,219,218,217,216,218,216,216,216,214,215,214,215,212,211,212,213,214,212,211,212,212,209,208,212,206,211,210,212,211,220,192,132,94,66,80,73,56,52,56,50,102,152,131,122,95,66,51,47,38,93,115,100,111,93,76,69,53,50,18,42,161,209,209,215,210,211,211,210,208,211,211,213,215,212,213,214,214,213,211,211,211,210,210,211,210,212,211,211,210,209,208,208,207,205,204,206,206,205,208,203,207,112,3,1,4,8,9,8,10,10,11,11,10,11,218,220,218,217,218,216,214,215,216,214,216,215,214,214,214,215,217,215,217,218,215,216,214,217,215,216,219,217,216,217,219,216,218,216,217,217,216,219,217,217,218,217,220,220,220,221,218,220,220,221,217,218,219,219,221,219,219,219,218,222,222,220,223,220,219,221,220,221,222,223,223,220,222,220,220,222,220,223,222,221,224,222,221,222,221,221,222,221,220,219,222,223,221,222,223,222,222,222,223,221,222,222,220,222,221,220,220,221,222,219,220,221,224,226,223,224,226,226,225,226,227,225,229,228,227,228,227,229,227,228,229,228,228,227,227,227,225,226,229,229,229,228,229,229,231,231,231,230,229,231,232,233,232,232,234,232,233,234,232,234,234,234,234,234,237,237,237,237,237,237,235,236,236,236,235,234,236,236,238,239,239,239,240,240,241,240,240,241,240,241,242,242,242,242,242,243,243,242,244,243,245,245,244,246,246,245,245,246,246,246,247,247,248,247,247,248,247,247,247,247,248,247,248,247,247,248,248,248,248,248,248,249,248,248,248,248,247,248,247,248,248,247,248,249,248,248,249,248,248,249,249,247,248,248,249,248,247,248,248,248,248,248,247,247,248,248,249,248,247,248,248,248,248,248,247,247,247,249,247,247,248,247,249,247,247,247,249,243,238,247,249,248,249,247,247,250,247,248,247,249,247,240,247,248,247,245,244,248,247,247,247,247,246,246,246,247,247,247,247,247,245,244,245,244,244,243,244,244,246,247,247,247,246,246,245,245,247,247,245,247,248,250,252,236,246,252,210,233,227,206,233,213,233,241,232,237,250,197,167,154,130,77,83,233,251,251,249,238,244,240,250,252,222,190,196,243,250,252,250,228,221,195,217,250,251,246,245,252,231,181,189,235,238,252,251,243,246,246,236,243,250,240,165,136,116,88,57,60,66,50,53,59,54,57,58,53,50,60,55,80,80,57,73,78,69,26,161,241,249,249,244,241,251,248,249,249,248,250,248,249,250,252,232,241,165,125,227,246,249,249,226,241,235,252,252,252,252,252,229,196,113,81,195,246,231,177,121,94,110,85,64,153,173,112,81,96,114,122,81,91,131,113,100,67,53,54,34,52,54,33,57,74,74,55,61,32,39,200,249,249,248,248,247,242,251,245,247,249,248,248,249,248,241,245,250,251,232,236,249,252,222,184,222,251,207,157,181,150,158,227,225,196,234,240,197,226,248,247,252,252,253,253,191,101,81,73,91,104,96,57,37,39,34,37,40,46,39,46,26,37,197,237,239,248,234,245,239,243,248,246,247,210,96,80,184,204,205,234,226,234,228,227,229,223,226,224,224,225,223,224,224,222,224,223,221,222,222,221,222,222,221,218,221,218,220,219,214,218,216,216,217,214,213,214,214,212,215,214,214,214,209,214,212,210,213,212,212,208,211,209,207,209,207,211,208,218,198,139,93,59,78,79,61,57,69,62,123,152,128,122,88,66,50,42,69,117,121,110,116,101,81,56,47,30,27,118,214,223,218,216,214,213,212,210,207,210,212,213,214,214,213,213,214,215,213,210,210,210,211,212,211,208,211,212,212,211,209,208,207,206,208,207,206,205,208,207,207,111,3,1,4,8,9,8,10,10,11,11,10,12,216,221,218,216,217,216,217,216,215,217,214,214,217,214,215,213,215,217,214,215,214,216,218,214,216,217,215,215,217,215,215,214,215,216,216,218,216,216,217,217,217,218,220,218,221,218,218,219,219,222,219,219,220,219,220,218,217,220,220,219,222,221,221,220,220,221,220,221,219,220,222,220,220,222,221,220,222,220,222,221,219,221,218,219,218,218,219,220,220,218,220,219,220,219,219,220,220,221,221,220,222,220,222,220,220,220,218,221,220,222,222,222,224,222,224,223,224,225,226,227,226,225,226,228,227,228,228,225,228,229,228,228,226,227,225,226,229,225,226,224,226,228,229,229,229,228,230,230,229,229,230,232,231,234,233,231,232,233,232,234,236,234,235,235,235,235,235,235,235,235,236,237,236,237,237,237,237,237,238,238,239,239,239,239,242,241,240,241,241,242,241,243,243,242,243,242,244,242,243,244,243,245,244,245,245,246,246,245,247,246,248,249,246,247,247,246,247,247,247,247,247,247,247,247,246,247,247,247,247,247,247,247,247,248,248,247,248,248,249,248,248,247,247,248,247,249,248,248,249,248,247,247,247,247,248,248,248,247,246,248,248,247,247,248,248,247,248,247,247,247,247,248,248,248,248,247,247,247,247,248,247,249,248,247,247,248,249,239,240,249,248,248,247,249,248,247,249,247,248,249,243,241,248,249,247,244,246,248,246,247,246,245,247,247,246,246,246,246,247,247,247,244,246,246,246,245,244,246,247,247,247,247,247,247,246,245,247,247,247,247,247,247,250,227,245,241,167,196,213,237,253,236,229,218,222,233,249,186,147,142,124,69,83,234,251,251,249,237,243,239,248,251,225,196,205,247,251,252,250,221,211,201,229,251,252,231,209,219,186,177,218,242,241,251,252,247,246,243,237,248,250,237,152,129,124,97,69,69,67,57,51,44,51,51,47,49,46,52,44,68,80,51,54,62,51,35,162,239,250,250,243,244,252,248,247,247,248,249,249,250,249,252,236,252,198,132,233,247,249,247,225,237,239,252,253,253,232,184,164,182,163,142,213,249,218,124,83,66,85,77,67,151,149,74,78,84,100,116,79,113,158,133,98,51,54,61,46,63,50,34,57,70,73,56,64,27,49,217,250,250,248,248,249,239,248,243,247,249,247,246,248,249,240,244,248,251,235,234,245,252,231,181,212,244,223,121,117,181,195,244,222,165,211,244,213,223,245,246,252,252,224,165,113,77,88,76,85,101,87,56,39,36,34,40,45,42,36,46,24,46,208,234,234,234,222,231,234,234,252,252,245,205,92,97,194,203,216,229,227,233,228,225,227,225,227,227,226,225,224,225,225,224,222,225,225,222,222,222,221,219,221,222,220,216,218,218,216,217,214,216,216,215,214,212,213,214,213,212,210,213,211,208,211,213,209,207,210,208,209,210,206,211,207,207,208,217,210,160,107,72,89,85,69,66,82,70,120,168,136,120,88,55,57,45,83,150,122,119,117,90,77,58,39,22,98,203,229,221,215,215,214,212,210,210,211,211,213,214,212,214,214,214,212,213,213,212,211,213,212,209,211,213,212,211,212,212,209,209,208,208,205,205,208,205,208,203,208,113,2,1,5,8,8,8,10,10,10,12,10,10,214,218,216,216,216,214,216,218,217,214,217,216,216,216,216,214,214,214,213,213,214,216,214,217,215,215,214,214,216,214,216,214,217,217,217,218,214,216,216,218,218,218,220,217,217,220,219,218,218,220,220,219,219,217,221,219,217,219,220,220,218,220,222,219,223,223,222,221,218,222,219,218,220,220,219,219,219,219,219,217,219,219,217,218,220,218,220,219,220,220,218,220,221,221,220,221,220,218,220,221,222,221,218,220,220,220,220,222,222,220,222,222,222,224,221,223,223,223,225,226,227,226,227,226,225,226,227,228,226,226,228,229,230,229,229,227,227,229,227,228,227,229,229,228,229,229,230,230,232,232,231,231,232,232,234,234,235,234,234,233,233,235,235,236,236,234,235,235,237,236,237,238,236,240,238,238,240,239,239,239,240,240,238,239,241,240,242,243,240,241,242,242,241,243,243,244,242,242,244,244,245,245,245,246,246,247,245,247,246,246,246,245,247,246,248,248,248,248,246,247,247,247,247,246,247,247,248,247,247,247,248,248,247,248,248,248,249,249,248,248,247,248,249,249,249,249,249,248,248,249,249,249,249,248,248,248,247,248,248,248,248,247,248,248,250,249,247,247,248,248,248,247,246,247,247,248,248,248,247,247,248,247,248,248,247,250,249,238,245,250,247,248,247,247,248,248,249,249,249,248,240,244,249,248,246,244,247,248,247,247,246,246,247,248,247,247,245,246,247,248,247,243,246,248,247,245,244,244,247,245,248,248,246,247,247,247,247,247,247,247,247,247,249,218,232,232,184,229,238,252,234,205,231,224,212,224,250,190,163,150,124,71,89,236,251,251,249,237,245,241,246,252,226,199,203,247,252,252,248,211,207,206,232,233,233,186,197,244,223,214,238,252,243,252,252,249,247,245,243,252,249,181,113,117,108,87,43,46,51,37,45,40,43,44,39,43,43,49,45,71,78,60,67,69,42,42,169,235,250,250,241,245,252,249,249,250,247,249,248,250,248,252,238,252,212,141,227,247,250,245,223,236,250,250,227,179,123,152,195,198,168,176,246,245,139,106,73,61,87,74,60,142,160,97,82,85,116,110,85,129,150,122,80,45,52,63,71,81,56,42,61,70,71,52,61,29,57,229,250,250,249,249,252,237,244,241,246,249,247,249,248,251,241,241,245,251,241,229,242,252,245,183,200,240,229,164,161,223,229,248,242,174,207,250,211,196,235,239,251,236,123,87,46,62,78,71,87,94,89,55,45,35,34,44,33,41,30,46,21,57,221,236,240,240,212,213,214,231,243,251,246,202,92,117,204,199,215,234,230,236,229,229,229,225,227,226,226,227,226,225,224,223,225,224,224,224,222,222,223,222,222,220,218,217,219,218,216,217,217,215,214,214,213,214,214,213,214,210,210,211,210,211,210,210,210,208,208,208,212,207,210,210,205,210,208,214,221,193,143,100,96,90,62,58,76,60,125,158,141,129,80,56,48,39,93,136,123,120,107,84,53,51,25,50,170,223,231,218,217,215,212,212,210,212,214,213,217,215,214,215,213,214,212,213,215,212,213,215,214,214,213,214,215,211,210,212,210,210,209,206,208,208,208,206,208,205,208,113,2,1,5,7,8,8,10,10,10,12,10,10,217,218,218,218,217,215,214,216,213,215,215,216,215,212,216,215,214,215,214,216,214,214,216,215,214,214,216,215,213,214,214,213,214,217,217,217,218,216,215,217,218,219,219,218,219,217,216,217,219,219,217,218,220,219,219,218,218,217,217,217,219,218,219,221,218,217,219,219,219,219,220,219,218,220,219,220,221,218,220,218,217,217,220,221,219,219,218,220,219,220,221,220,222,220,218,220,221,219,222,221,217,220,219,217,219,220,219,220,221,221,220,221,220,222,221,222,223,222,222,221,225,225,224,225,224,224,225,225,227,227,226,227,226,227,228,227,226,228,229,229,229,228,229,229,230,227,228,229,231,232,234,232,229,232,232,233,233,234,235,234,234,233,235,234,233,236,237,236,235,235,237,236,236,237,237,237,237,237,239,238,238,239,237,239,239,239,238,240,240,239,239,241,241,239,242,242,243,242,242,242,243,245,244,245,245,245,245,245,246,245,244,244,245,245,248,248,246,247,247,247,247,246,247,247,248,248,247,247,247,247,248,248,246,248,248,248,248,247,248,248,247,248,249,248,248,248,247,247,247,247,247,248,248,247,248,247,247,248,247,247,247,247,247,249,248,247,248,248,249,247,248,249,247,249,247,247,248,247,248,247,247,249,248,248,248,250,245,237,248,250,247,249,247,248,247,247,249,247,249,245,242,248,249,248,244,243,247,247,247,246,245,247,247,246,246,246,248,249,247,245,247,243,244,247,247,248,246,246,244,245,246,249,249,248,247,246,247,247,246,245,246,249,245,222,252,245,213,245,211,206,213,219,248,239,209,224,251,193,171,163,139,80,90,238,251,251,249,236,244,236,244,251,239,210,187,235,249,252,245,210,186,162,206,219,244,223,230,252,239,227,246,247,240,252,250,250,247,242,235,253,207,128,106,114,85,34,27,36,37,26,24,25,29,23,26,28,21,32,36,68,79,57,73,69,47,128,236,246,250,250,239,245,249,250,246,249,249,248,247,250,248,252,237,240,206,127,196,248,250,237,219,225,248,191,152,188,196,222,215,183,176,194,247,235,161,147,120,76,77,89,66,127,184,141,87,72,120,115,94,138,154,112,89,59,56,59,76,83,55,47,59,66,67,47,61,25,51,221,250,250,249,249,252,239,241,240,245,248,247,247,248,251,244,239,244,250,245,230,239,251,250,188,190,235,246,207,173,224,233,233,252,222,223,252,198,165,217,234,251,239,115,73,56,61,87,78,93,103,91,60,38,37,39,44,31,37,32,47,18,67,230,234,234,234,217,212,216,231,237,250,246,185,80,128,208,200,224,231,231,233,230,230,229,226,226,227,224,226,225,223,223,225,223,222,221,222,222,219,220,218,218,218,220,217,215,216,214,218,212,214,215,213,216,213,213,211,211,213,211,214,211,208,212,208,209,212,208,207,207,208,208,208,207,207,208,212,223,215,160,110,109,99,71,57,75,63,114,153,135,125,90,61,58,50,99,130,106,110,94,59,59,39,24,116,211,225,224,220,222,214,213,214,212,211,214,213,214,214,214,216,214,214,214,214,214,214,213,212,214,214,214,211,212,212,213,211,209,212,209,211,208,207,208,205,210,206,208,111,3,1,3,8,9,8,10,10,11,10,10,11,219,221,220,215,216,213,214,216,214,214,214,214,218,213,214,215,213,217,215,216,215,215,214,216,216,215,215,217,219,216,218,217,217,217,217,218,216,220,217,217,218,217,220,218,220,219,218,220,218,218,219,217,219,220,219,220,222,220,219,218,218,221,219,218,221,220,219,221,221,220,218,222,223,221,221,221,222,222,220,220,220,222,220,219,220,218,221,220,219,222,220,220,220,220,218,220,221,219,221,219,218,220,221,220,219,219,217,221,220,219,221,222,223,223,223,225,226,225,224,224,224,225,225,224,225,225,226,227,226,227,227,227,226,226,229,226,228,227,228,230,228,231,228,229,229,230,232,230,231,229,232,231,229,231,230,231,232,234,236,234,233,236,231,235,236,235,237,234,236,236,235,236,237,239,238,237,237,237,238,238,238,239,239,240,241,240,240,240,240,240,240,240,241,242,240,243,242,242,245,243,246,245,245,245,244,246,245,246,245,245,245,245,245,245,246,246,246,247,247,248,248,246,246,247,248,248,247,247,247,247,247,247,247,247,247,247,247,248,248,248,247,248,249,249,248,249,248,248,248,248,247,248,248,248,249,247,247,249,249,249,250,249,248,247,248,247,248,248,248,248,247,247,247,248,249,247,248,247,247,247,248,247,248,249,250,252,241,239,251,249,249,249,248,249,247,248,247,247,248,242,244,249,249,247,244,246,249,249,247,247,247,246,247,246,245,245,246,247,246,245,246,244,244,247,246,248,247,246,246,244,246,246,247,248,247,247,246,247,245,245,245,250,244,217,253,236,195,199,197,241,236,243,252,245,221,219,250,181,158,145,125,79,94,234,251,251,250,239,244,237,244,251,231,209,179,210,245,252,241,193,151,173,242,249,252,224,240,252,224,208,237,240,235,251,250,250,249,237,224,248,193,119,119,111,60,20,24,43,36,23,24,23,26,22,21,26,22,21,31,71,79,60,77,53,84,218,251,251,250,249,245,248,249,250,244,248,247,248,247,252,245,252,235,214,190,116,179,246,249,231,192,200,219,177,207,237,248,252,214,200,195,218,253,253,188,135,111,73,86,78,66,120,167,137,100,105,151,131,98,135,134,105,89,62,56,62,66,74,68,54,63,75,69,50,59,24,54,218,251,251,249,249,252,243,236,236,244,249,248,248,247,250,247,239,243,249,249,228,234,248,250,200,178,230,243,226,167,182,232,224,247,214,208,248,204,176,206,229,251,235,121,97,69,83,92,98,116,113,91,53,48,37,36,45,39,44,34,51,18,78,238,238,245,245,225,227,224,236,236,242,246,167,69,143,213,202,224,232,233,235,229,231,227,227,225,224,224,225,224,224,226,223,223,220,218,220,219,219,217,217,215,215,216,215,216,215,215,214,214,212,211,212,213,213,214,211,212,210,211,212,208,212,210,209,210,208,208,206,208,206,207,205,207,209,206,211,220,218,170,122,113,110,88,81,79,65,126,140,129,132,95,74,73,81,118,115,107,106,92,84,77,35,62,206,234,222,221,216,222,214,213,212,210,214,214,213,215,215,214,214,215,216,216,213,215,214,212,212,209,212,213,211,212,214,213,211,209,212,209,208,211,210,208,207,211,207,210,112,3,1,3,8,9,8,10,10,11,10,10,10,215,217,217,217,216,214,214,215,215,217,216,214,214,215,215,214,216,214,213,215,216,214,214,213,214,214,216,216,215,217,217,217,217,217,215,215,218,218,218,218,217,216,218,219,219,218,219,218,216,217,216,215,217,218,219,216,219,219,216,218,218,220,222,222,222,221,223,221,218,220,219,219,220,221,220,219,220,219,221,221,221,218,218,219,217,218,217,219,218,217,220,220,218,220,220,218,219,217,221,223,219,221,222,221,222,221,219,221,222,222,222,224,223,222,222,223,222,221,224,222,226,224,222,224,224,224,225,224,224,226,225,229,227,226,228,226,227,228,226,229,227,228,228,226,230,229,229,230,229,229,229,229,231,231,230,231,231,232,234,234,235,236,237,237,237,234,235,235,237,235,236,236,237,240,238,240,238,237,238,238,241,241,239,239,240,239,239,241,240,241,241,242,242,240,243,242,242,243,244,244,245,246,247,245,246,246,246,245,244,245,247,248,248,248,247,247,247,246,247,247,246,246,246,247,247,246,247,247,247,248,246,248,247,248,247,247,248,247,249,248,247,248,247,247,249,249,248,247,247,248,248,247,247,247,248,248,248,249,248,249,248,248,248,247,247,248,247,247,247,247,248,247,248,248,248,247,247,247,248,247,247,247,248,247,249,250,237,241,249,248,249,250,248,247,247,247,248,248,247,242,246,250,247,245,245,248,248,247,247,246,245,246,246,247,246,245,245,245,245,244,247,245,241,247,244,245,247,247,246,245,244,244,247,247,247,247,246,246,245,245,244,250,236,212,238,188,185,233,232,252,236,220,238,237,224,220,239,156,133,136,118,71,87,234,251,251,250,237,242,239,242,249,233,201,173,218,250,252,225,176,180,214,252,252,252,210,236,252,211,190,228,238,236,252,248,250,249,237,232,254,185,132,114,79,46,9,30,41,31,27,27,29,27,27,22,24,22,29,40,76,74,61,59,34,128,243,252,249,252,246,245,247,247,248,244,249,246,248,248,248,246,252,244,224,199,110,172,245,251,226,199,206,220,214,250,250,253,230,177,196,206,219,253,251,193,176,135,95,73,83,71,121,194,165,104,87,123,107,84,108,137,121,96,59,50,54,59,76,77,68,66,80,77,49,63,22,58,222,251,251,249,249,252,247,232,229,244,248,249,249,247,248,247,237,239,245,249,229,229,245,251,214,171,222,242,232,175,155,219,223,214,205,204,229,223,199,195,217,252,227,119,99,76,89,110,110,113,89,70,60,47,37,34,27,38,34,26,45,17,82,234,234,234,234,227,229,235,243,234,242,232,157,79,166,216,207,225,228,231,231,230,228,226,223,224,225,222,224,223,223,219,219,221,220,218,217,216,215,216,216,217,214,212,212,214,214,212,214,211,211,208,210,210,208,209,208,210,208,207,208,208,208,209,206,207,207,205,206,206,205,204,206,205,204,205,208,217,226,189,143,121,106,93,84,92,70,111,133,116,120,85,76,84,90,122,112,107,125,124,105,86,33,102,226,232,217,216,214,214,212,214,211,212,213,215,214,214,214,212,214,214,215,212,214,214,213,212,213,213,209,209,212,213,210,211,210,209,210,209,211,206,206,209,207,208,204,208,113,2,1,4,7,8,7,10,9,10,11,12,10,219,218,217,216,221,214,216,216,213,217,216,214,215,215,216,214,214,215,212,215,214,215,216,216,216,214,216,215,216,213,218,218,216,217,217,219,218,218,217,220,221,219,218,216,217,215,219,220,217,217,217,218,219,220,219,218,218,217,218,217,217,219,218,218,218,220,218,219,221,219,220,218,218,219,219,219,218,220,220,220,219,221,219,218,219,219,219,220,219,220,220,221,219,219,217,220,221,219,222,220,220,222,220,221,219,223,222,223,222,218,222,221,222,224,222,224,222,223,226,222,223,222,222,224,223,224,225,224,227,226,225,227,226,226,227,227,228,229,228,228,228,229,228,229,229,228,228,227,229,227,230,231,231,234,230,232,233,232,233,234,236,236,232,234,236,236,237,237,237,236,237,237,237,237,238,238,239,238,238,239,238,240,240,239,240,241,243,242,242,241,241,242,241,242,242,243,243,244,245,242,245,245,244,245,245,246,246,246,247,247,247,247,248,248,248,248,247,246,248,248,248,248,247,246,246,248,247,247,247,247,247,247,247,248,247,248,248,247,249,248,247,247,248,248,247,248,248,248,247,248,247,248,249,248,248,248,248,247,247,247,248,248,248,248,249,248,248,248,248,248,248,249,248,247,248,247,247,247,249,248,247,248,249,249,250,247,235,246,251,248,250,248,247,247,247,247,247,247,244,241,247,249,245,243,246,248,247,247,245,246,247,247,247,247,245,245,247,246,246,245,247,246,241,246,247,247,247,248,249,246,246,245,245,247,247,249,249,247,245,246,245,251,232,212,244,220,232,249,239,238,178,206,240,242,226,235,246,169,161,150,122,75,94,235,252,252,250,236,241,240,244,252,243,222,194,225,252,252,222,188,188,232,250,250,239,196,240,252,215,201,241,249,242,252,247,250,250,239,230,251,181,101,61,36,20,2,6,29,29,19,28,32,34,38,39,35,32,38,45,73,58,54,39,33,174,243,251,242,248,245,249,250,245,247,244,249,248,248,248,250,247,251,249,245,247,149,166,247,249,212,212,251,252,240,252,251,248,166,82,148,175,226,252,252,201,134,126,75,65,94,128,141,151,122,74,46,47,60,56,90,128,129,99,59,49,66,61,76,95,66,66,88,80,55,61,19,61,223,251,251,249,249,252,249,232,223,243,246,247,248,248,249,249,239,237,244,252,235,226,241,251,229,167,208,243,235,202,153,207,208,179,218,223,214,216,224,198,205,252,218,113,85,57,74,67,50,44,30,37,47,47,36,27,26,21,24,18,39,12,88,240,239,248,244,229,235,240,247,245,249,246,154,95,190,218,211,227,229,231,229,230,227,225,225,224,224,224,224,220,221,219,218,219,218,217,219,218,220,219,218,216,214,214,214,215,213,212,213,213,213,210,213,212,210,211,208,210,208,207,211,207,208,207,207,208,205,206,208,205,205,206,205,207,207,206,213,217,228,201,146,116,86,83,91,83,78,125,130,105,95,65,69,96,112,134,116,110,128,123,98,79,24,45,160,185,211,223,214,215,210,215,213,213,215,212,212,217,216,214,215,216,214,213,214,215,213,214,215,213,212,210,210,212,211,212,213,210,213,209,209,210,206,209,205,206,207,209,113,2,1,5,8,8,8,12,9,10,11,10,10,220,222,220,219,219,214,214,214,214,217,217,217,215,214,215,214,214,213,214,214,213,214,214,215,216,213,214,217,216,216,216,215,217,218,217,217,217,218,217,215,219,218,219,218,219,220,219,220,219,218,219,220,220,217,220,218,219,220,217,222,218,219,219,216,218,217,219,220,220,220,219,221,219,218,219,218,220,218,216,219,219,220,220,220,218,220,220,218,216,217,219,218,217,217,217,218,218,216,218,219,218,219,220,218,221,222,220,219,220,222,219,222,221,219,223,222,222,222,225,222,223,222,222,225,224,225,224,225,227,225,226,227,225,223,227,228,226,227,225,227,226,226,229,225,226,228,227,227,227,226,229,231,232,231,231,230,232,233,232,234,234,237,235,234,234,233,238,234,236,235,237,237,235,238,237,237,239,239,239,239,239,238,238,239,240,239,240,242,241,241,240,241,241,240,241,242,243,242,244,244,244,244,242,242,244,244,245,245,245,246,246,246,246,246,244,245,247,247,248,246,246,246,246,245,247,248,247,247,247,247,247,248,247,247,248,247,248,247,248,247,247,247,247,248,248,248,249,248,249,247,247,248,248,248,247,247,247,247,247,247,247,248,248,247,248,248,247,247,248,248,249,248,248,248,248,247,247,247,248,248,248,247,248,249,250,243,238,248,247,248,248,247,247,247,247,248,248,246,240,241,247,247,245,242,248,247,246,247,246,246,247,247,247,246,246,246,246,247,247,245,247,247,242,247,247,247,247,246,247,247,247,244,244,244,245,246,247,247,244,244,246,251,232,232,252,233,243,219,182,225,210,229,252,239,225,239,250,193,171,162,137,79,92,238,252,252,250,237,243,238,244,252,247,232,199,209,249,252,218,196,198,235,250,250,229,182,242,252,232,230,252,252,245,252,248,251,248,239,223,240,164,94,70,49,39,7,7,14,30,27,27,48,51,55,53,55,49,47,33,44,18,64,110,112,231,246,250,244,243,246,249,249,244,247,244,249,247,248,247,247,248,250,252,240,250,166,155,243,217,195,221,252,251,245,252,252,247,140,84,131,173,236,250,250,171,101,65,43,35,93,142,154,141,70,31,36,47,48,60,92,107,77,65,59,60,68,70,70,81,58,70,105,85,49,61,21,55,214,251,251,250,250,252,252,238,228,244,246,248,247,247,247,249,240,234,242,250,240,225,236,252,242,171,198,242,237,223,171,188,216,180,216,239,189,201,242,212,204,252,190,80,44,6,4,6,7,10,7,13,29,41,33,33,29,30,25,33,36,18,102,233,233,234,234,229,237,246,250,249,234,234,150,114,204,215,213,225,230,230,227,229,227,223,223,223,222,222,222,221,220,218,219,219,217,219,218,218,217,216,215,214,214,215,213,213,213,212,213,209,212,212,214,211,209,211,209,210,210,209,208,208,207,208,206,206,208,208,207,207,208,206,206,206,206,207,209,210,196,160,142,118,93,83,83,87,75,124,134,111,116,75,75,95,112,134,113,118,126,113,94,73,29,20,105,171,212,226,217,216,212,214,215,214,215,217,217,217,214,215,216,215,217,216,217,215,214,212,212,214,214,213,208,210,212,212,211,211,212,210,211,210,210,209,206,210,208,210,111,3,1,3,7,9,7,10,10,11,11,10,11,221,220,220,217,222,215,216,219,217,217,217,216,214,216,217,215,217,216,214,215,213,215,216,218,217,218,219,218,218,217,219,218,217,220,219,219,216,219,219,217,219,218,218,222,221,221,219,221,220,222,222,220,222,221,221,219,222,221,222,222,220,220,220,220,219,220,216,218,220,218,220,218,218,220,220,219,221,220,221,220,219,220,218,220,220,220,217,218,218,218,219,218,217,219,220,218,219,218,220,219,220,222,220,221,219,220,219,221,222,220,224,222,222,223,221,223,223,223,223,223,225,223,222,225,224,226,224,225,226,227,228,225,225,224,225,225,225,227,226,225,224,225,226,226,225,225,229,229,229,229,229,231,229,231,231,233,231,232,232,231,232,232,234,236,236,235,235,236,235,236,236,237,237,239,240,237,238,239,240,239,239,240,239,240,241,241,241,241,242,240,241,240,239,241,240,243,244,241,244,244,244,244,245,244,245,245,244,246,244,244,246,246,245,244,245,245,246,247,247,246,246,246,247,246,246,247,246,247,247,247,247,246,247,247,247,247,248,247,248,248,247,248,248,249,248,247,247,247,247,248,248,248,248,247,247,248,248,247,247,248,247,247,248,248,248,249,247,247,249,248,248,247,248,247,249,247,247,248,247,247,247,248,249,248,249,238,240,251,247,247,247,248,249,247,247,247,249,245,241,246,249,248,244,246,249,247,247,247,246,247,247,246,247,247,247,247,247,246,248,248,247,249,242,244,248,247,249,247,247,247,247,246,246,244,242,246,246,246,246,245,247,250,227,235,252,196,202,214,232,252,236,250,247,234,219,243,250,191,163,140,123,69,91,238,252,252,250,238,243,240,244,252,238,214,183,184,240,251,217,199,194,239,250,250,232,196,252,252,253,218,232,251,246,251,248,251,249,242,228,241,228,194,176,178,199,226,221,155,82,18,36,63,61,61,60,54,46,71,101,116,73,183,210,170,250,250,251,241,237,244,245,248,245,246,244,247,247,247,247,247,248,250,252,238,250,184,146,206,185,185,237,252,241,242,240,252,192,116,175,198,202,249,250,250,183,125,117,86,89,145,195,205,198,135,75,80,102,113,118,127,118,84,62,55,49,51,50,62,74,55,62,110,89,50,66,18,56,214,250,250,250,249,252,252,246,227,241,247,247,249,248,247,248,242,235,239,250,247,227,232,252,252,183,181,238,236,229,179,159,169,168,221,243,184,178,241,223,181,222,167,94,84,55,58,53,54,59,45,49,53,33,27,42,47,40,36,25,35,10,117,241,235,249,237,230,242,245,250,249,249,245,139,134,217,214,216,225,225,228,227,227,226,223,224,224,221,221,221,221,221,220,219,218,219,217,217,216,215,215,216,214,213,214,213,213,213,213,211,212,213,212,214,208,211,210,206,208,207,208,210,208,208,208,208,209,207,207,208,207,206,205,206,207,207,205,210,203,171,141,127,134,115,91,79,77,82,117,135,128,131,98,85,100,113,132,111,114,131,117,103,72,21,98,191,201,218,217,216,218,214,214,214,214,213,215,218,217,215,217,217,213,216,217,215,216,217,216,213,212,214,212,212,212,213,211,208,207,213,208,205,210,207,210,207,208,207,207,112,3,1,3,7,9,7,10,10,10,11,10,10,217,219,217,217,219,216,216,216,217,217,215,213,215,217,217,214,218,216,214,216,215,215,215,217,220,219,215,218,218,218,217,217,217,217,220,221,217,217,218,220,220,218,220,219,218,219,218,219,221,221,220,221,220,218,218,220,218,219,220,220,217,220,220,217,219,218,221,219,218,219,217,218,218,220,223,219,220,220,221,221,219,218,217,220,220,220,220,219,219,221,222,221,221,219,221,220,220,219,219,219,218,218,219,217,217,220,221,218,220,220,220,221,220,220,224,220,221,222,221,220,222,223,221,222,221,221,223,222,225,223,223,225,225,224,226,225,226,229,225,226,225,225,226,226,229,229,228,228,229,230,230,228,232,232,230,231,231,232,232,232,231,234,235,236,237,235,237,236,236,236,236,236,237,238,237,237,237,237,237,239,239,242,240,240,241,242,242,244,242,242,241,241,240,240,241,241,243,242,243,243,242,243,243,245,246,244,245,246,246,245,245,246,245,245,244,244,245,246,247,246,245,245,247,246,245,247,246,245,247,246,247,246,246,248,247,246,246,246,246,247,247,247,248,247,248,248,247,247,247,248,248,248,248,247,247,247,247,247,247,247,246,247,247,246,248,247,247,249,248,248,247,247,248,247,248,248,247,247,247,247,247,248,247,250,247,236,244,249,247,248,247,247,247,248,247,247,247,240,242,248,247,245,242,246,248,246,247,246,247,246,247,246,246,247,247,247,246,247,247,246,247,249,244,244,247,247,248,247,247,247,247,247,248,246,244,241,243,245,246,247,248,249,218,226,233,212,240,240,252,252,208,214,235,227,214,241,249,178,134,125,124,70,85,239,252,252,250,237,244,240,244,251,239,209,186,191,252,252,203,192,200,247,251,251,242,224,253,253,225,144,173,226,241,250,249,251,246,247,241,252,253,253,250,252,252,252,248,193,113,26,81,118,61,55,57,43,63,204,250,236,158,226,160,141,250,246,252,226,223,245,245,251,246,243,242,245,245,246,247,246,247,247,252,229,243,187,160,195,161,196,252,252,252,204,168,206,148,127,213,212,222,252,252,252,247,190,154,158,164,211,218,210,203,137,99,115,132,154,145,144,133,100,87,65,60,55,37,56,71,59,57,102,88,55,63,17,62,217,250,250,249,248,251,249,244,219,233,247,248,248,244,245,248,245,234,238,247,248,229,227,250,252,190,169,225,241,226,176,103,142,198,214,249,198,179,250,214,150,192,214,229,243,242,250,248,243,247,241,245,235,147,91,93,81,56,25,31,31,19,127,233,230,234,234,240,244,246,252,234,215,217,122,152,223,214,217,218,227,223,223,222,223,222,221,222,221,221,218,218,218,218,219,220,216,215,215,215,215,215,214,214,213,212,211,211,212,213,211,209,209,208,210,210,209,209,208,207,208,207,207,207,206,208,206,207,206,207,207,206,207,206,205,205,206,207,211,218,194,139,117,116,109,90,81,83,78,121,134,133,139,99,110,119,137,133,108,113,117,117,101,69,22,122,225,223,225,221,215,215,213,217,214,213,216,214,215,217,214,214,216,214,214,214,216,216,214,214,214,212,211,212,211,211,211,208,207,207,207,207,207,206,205,208,208,208,205,208,113,2,1,4,7,7,8,10,9,10,10,10,10,218,219,217,216,220,217,217,217,216,216,217,215,217,215,215,216,214,216,216,218,217,217,215,217,217,215,215,216,216,216,217,217,219,217,217,220,218,218,218,217,218,219,221,221,219,220,218,219,220,220,219,220,220,217,221,218,218,219,221,221,218,220,220,220,220,219,217,220,220,220,221,220,219,220,220,219,220,220,219,218,222,220,218,220,220,219,218,221,219,219,219,221,221,218,221,218,219,218,217,218,217,218,219,218,219,219,219,221,220,219,222,221,218,222,220,221,223,220,222,222,222,224,222,223,222,222,221,223,224,222,225,223,224,226,226,227,225,226,228,226,228,227,226,227,226,226,227,226,227,227,227,229,231,230,228,231,231,234,232,232,234,233,234,234,235,235,235,237,237,235,235,236,237,237,238,238,237,238,239,240,240,241,240,241,242,242,242,241,241,241,241,242,241,242,242,243,242,243,244,243,243,243,244,244,246,245,245,245,245,246,246,245,244,245,245,245,245,246,244,245,246,246,246,246,247,246,246,245,246,247,248,247,247,247,247,247,248,247,248,248,248,248,247,247,247,247,247,247,247,249,247,247,248,248,248,247,248,248,248,249,247,247,247,247,248,248,247,248,248,248,248,248,248,246,247,247,247,247,247,248,248,248,247,249,247,236,246,249,246,248,248,247,247,248,248,249,246,240,245,249,250,245,244,248,247,247,246,247,245,246,248,246,248,247,246,247,247,246,247,247,246,249,245,243,247,246,247,247,246,247,246,247,249,248,247,246,244,242,246,246,250,247,221,245,252,237,252,231,211,201,184,223,234,225,211,241,249,176,152,146,144,85,92,242,252,252,249,237,245,239,244,252,247,236,206,203,252,243,199,198,214,247,250,250,228,177,253,237,194,90,65,191,244,252,250,246,246,251,243,251,253,237,224,224,249,251,246,183,101,5,72,105,68,58,52,37,50,223,253,253,152,97,40,88,234,240,248,214,214,247,248,252,245,242,240,244,248,247,247,244,247,245,252,193,222,203,163,195,154,216,253,240,225,146,181,246,124,147,231,201,239,252,253,253,247,184,101,103,104,108,97,74,93,99,71,53,44,49,57,61,66,60,63,66,78,71,41,60,78,63,58,102,89,52,61,16,70,218,249,249,249,249,252,250,249,224,226,243,246,248,246,247,247,246,237,237,245,251,232,224,244,252,212,160,213,243,227,205,172,178,226,224,244,234,198,249,225,162,199,236,252,252,252,252,252,252,253,253,252,252,203,134,121,99,60,35,27,29,13,143,234,227,247,238,245,252,251,253,218,185,191,125,176,229,212,216,222,227,226,224,224,221,220,220,220,219,219,218,217,217,217,217,217,217,214,214,214,213,215,212,213,214,214,212,211,210,208,210,210,207,206,211,207,208,208,208,210,206,208,208,206,207,207,205,206,206,207,207,205,206,207,206,207,207,206,216,215,181,154,141,136,123,98,82,74,82,120,155,159,147,109,110,113,107,116,103,122,150,132,112,44,54,197,220,212,222,220,222,215,214,216,216,217,219,213,216,218,215,217,216,215,215,215,215,215,213,214,214,211,211,209,210,212,211,210,207,205,208,208,207,207,208,210,209,212,207,208,112,2,1,4,7,7,7,10,9,10,10,10,10,217,222,219,217,221,216,218,217,216,215,215,217,218,216,215,214,216,217,216,217,215,214,219,216,217,219,217,216,217,217,217,219,217,219,218,218,217,219,222,221,219,217,220,221,220,221,219,220,221,223,220,220,222,220,220,223,220,220,219,219,219,219,218,218,220,220,221,220,219,218,217,221,219,217,219,219,222,218,220,220,217,220,218,218,219,217,220,221,217,219,220,218,220,217,218,220,219,221,219,218,220,218,221,221,218,220,220,219,219,220,222,221,219,219,221,222,220,220,220,223,223,223,223,225,224,225,226,223,223,224,225,226,227,224,224,226,226,225,225,227,229,227,227,227,227,226,227,227,227,229,230,230,230,229,230,230,230,233,235,232,232,233,232,232,234,236,237,235,235,235,236,237,240,239,237,238,239,240,239,239,240,240,239,242,243,243,242,241,241,241,242,241,240,241,241,242,242,243,243,243,243,242,244,244,242,242,244,244,244,244,244,244,244,245,244,244,245,245,246,246,245,246,245,244,245,245,246,248,246,245,246,246,246,249,246,245,247,247,248,247,246,247,246,248,247,247,247,247,248,246,247,247,247,247,247,247,247,247,247,247,247,248,247,247,247,248,247,247,246,246,248,248,247,247,247,247,247,247,247,247,247,247,248,250,240,235,247,249,248,248,247,247,247,247,246,247,244,241,247,248,246,244,247,250,247,247,246,245,245,244,246,246,247,247,246,245,245,245,245,246,246,247,246,242,245,247,247,246,246,247,247,247,247,247,248,247,244,242,242,244,250,247,226,252,252,241,230,175,207,225,211,235,240,223,212,246,250,184,159,151,146,109,124,246,252,252,250,243,246,240,242,250,249,234,215,195,242,239,211,216,232,248,248,248,172,100,212,216,171,80,35,142,232,250,250,246,242,252,236,234,251,219,202,186,222,250,222,150,92,2,60,92,62,56,51,29,36,212,251,213,71,21,11,71,215,239,250,207,224,250,248,252,245,241,239,242,247,246,247,246,247,248,251,215,208,220,204,178,143,219,214,147,164,175,253,253,141,198,234,199,248,252,252,252,248,170,108,84,17,6,9,2,57,110,68,36,20,27,27,46,74,50,37,30,57,56,38,69,78,68,57,100,93,52,61,19,61,206,249,249,249,249,252,251,249,231,226,240,248,247,246,247,246,249,242,237,241,250,237,222,239,252,229,162,195,240,234,233,214,194,234,224,237,224,174,231,234,199,210,215,250,252,252,252,252,252,253,253,252,252,174,124,118,88,55,27,30,25,22,173,234,230,234,234,252,252,253,252,204,188,189,135,195,228,213,212,218,227,223,224,223,221,218,218,219,220,219,218,217,215,215,216,216,216,214,211,213,214,214,211,211,212,211,212,211,210,209,209,210,208,208,210,207,207,209,207,205,209,206,206,207,207,207,205,207,206,205,205,206,205,206,206,206,209,205,210,222,199,152,123,118,115,93,79,96,119,141,137,128,120,88,82,69,66,71,87,123,129,113,91,39,36,163,215,216,227,217,217,219,217,217,215,215,217,217,217,214,215,217,217,214,215,216,214,214,216,214,214,210,213,212,210,212,211,208,207,207,207,208,206,208,208,211,208,210,209,208,112,2,1,3,7,9,7,10,10,10,11,10,10,217,218,217,218,217,214,214,216,217,218,217,215,218,216,217,217,217,217,216,217,215,217,218,219,218,219,219,216,214,217,217,216,216,216,218,217,217,220,218,217,220,218,217,220,218,221,217,218,220,220,222,219,222,220,225,222,219,220,219,220,220,220,219,219,218,219,218,221,221,220,218,218,218,217,221,221,219,220,218,217,220,218,217,221,221,221,221,221,218,218,220,220,219,216,221,220,221,222,219,222,221,220,219,218,219,219,219,218,220,220,219,221,220,220,222,221,222,221,223,222,222,224,224,223,223,224,223,226,224,222,225,224,226,225,224,227,228,228,227,226,226,227,226,227,228,227,229,230,230,229,229,230,229,229,230,231,230,231,232,234,231,231,233,234,234,235,234,234,236,237,237,237,238,239,239,240,240,239,239,239,241,242,240,241,243,242,243,243,244,242,241,243,242,243,243,242,243,243,243,244,244,243,244,245,245,243,244,244,244,244,245,245,245,246,244,245,245,246,245,245,245,244,245,247,245,244,248,246,246,246,245,245,247,247,246,247,247,247,247,246,247,247,247,248,248,247,247,247,247,248,247,248,248,247,247,248,247,247,247,246,247,247,247,246,247,247,247,247,247,248,247,248,248,247,248,247,248,248,248,247,247,246,248,249,237,236,248,247,249,249,247,247,248,249,249,249,241,242,249,248,245,243,247,247,247,247,246,247,246,246,246,246,247,245,246,247,245,244,245,244,246,247,247,242,244,247,246,247,246,246,246,246,247,248,247,247,247,244,244,241,249,238,215,247,224,191,215,217,246,247,232,241,235,221,210,245,250,179,150,137,132,117,147,247,252,252,250,249,251,243,245,251,247,225,206,197,234,236,228,204,219,249,248,248,144,38,156,202,188,113,9,106,226,248,249,244,243,252,232,221,243,213,201,177,213,251,186,137,88,1,74,86,62,61,50,36,30,204,248,132,22,6,26,147,237,245,251,214,241,252,247,250,245,245,239,242,247,245,248,244,250,244,249,197,149,166,188,194,137,201,190,174,226,229,248,219,84,187,217,198,251,248,251,250,249,218,103,78,23,8,14,11,37,72,62,41,36,36,33,60,87,55,37,34,63,51,29,74,80,66,58,104,97,51,62,18,62,206,246,249,249,249,252,249,250,240,226,234,247,247,247,248,247,249,245,237,239,249,244,223,231,252,248,171,179,235,240,235,222,174,214,235,223,221,177,202,218,203,218,205,230,247,248,248,244,241,242,239,253,252,146,108,112,79,50,24,36,19,33,208,243,234,250,246,253,253,235,223,195,200,186,148,211,228,213,211,217,224,219,222,222,220,218,218,218,218,215,216,215,214,218,215,213,214,215,215,214,214,212,211,210,212,211,210,210,210,211,209,210,207,206,211,206,206,207,205,208,207,209,206,204,207,205,205,206,206,206,206,207,208,206,204,206,206,206,210,214,182,139,118,100,73,60,64,101,136,149,141,106,78,49,49,48,47,62,83,105,102,78,60,22,21,133,196,220,231,218,219,219,217,220,219,217,217,219,217,217,216,218,216,215,215,215,215,214,215,214,215,214,214,215,212,212,211,209,209,209,211,208,211,211,210,209,206,211,208,210,112,3,1,3,8,9,8,10,10,11,11,10,11,215,216,218,216,216,215,217,217,216,218,218,218,214,217,216,216,218,217,214,217,218,216,218,217,217,217,217,217,216,217,217,219,218,218,216,218,217,217,217,217,218,219,220,219,219,220,219,216,215,218,217,220,220,218,218,219,220,219,220,220,216,221,220,220,220,218,219,219,219,219,220,220,218,220,222,219,222,217,219,221,219,220,218,219,219,219,221,220,218,221,220,219,219,216,220,218,218,220,217,220,218,217,218,217,219,219,221,220,219,220,220,221,221,222,220,219,221,223,223,223,222,221,222,221,221,222,223,224,224,227,223,224,225,224,226,225,224,226,227,226,224,224,226,227,227,229,227,225,228,229,229,231,231,229,231,231,231,231,229,232,232,234,235,234,235,234,235,235,237,237,237,239,239,238,239,240,239,239,238,239,239,239,241,243,240,242,243,242,243,244,245,244,244,242,242,242,243,243,244,243,242,242,242,244,245,244,244,245,244,244,245,245,244,245,244,245,245,244,244,244,244,244,244,244,247,246,245,246,246,247,248,246,246,246,246,247,247,247,247,246,247,247,247,246,246,246,246,247,247,247,248,246,246,246,246,247,247,247,247,248,247,247,247,247,246,247,247,247,247,246,247,247,247,247,247,247,247,247,247,246,245,245,249,247,234,241,248,247,248,247,248,247,247,248,249,244,241,247,248,247,242,244,247,246,247,247,247,247,246,247,245,244,245,245,246,245,244,244,244,244,246,246,247,243,243,246,246,246,246,246,245,247,246,247,247,245,247,246,246,241,248,226,202,234,213,224,244,232,252,237,205,221,229,213,207,246,249,172,141,136,120,98,97,152,201,245,249,249,250,243,244,249,250,226,211,207,228,203,160,122,181,244,246,233,125,54,135,190,197,138,73,159,243,250,249,241,245,251,237,222,237,211,198,178,218,252,175,138,77,1,75,85,67,61,56,38,34,206,242,147,39,71,192,241,251,250,242,196,236,252,244,250,245,246,236,240,245,244,244,245,246,249,246,184,69,68,130,147,206,250,241,240,232,227,247,158,87,193,193,212,252,249,252,251,249,225,141,87,55,57,38,25,74,71,51,54,37,35,50,74,70,46,45,42,58,46,35,74,83,67,56,106,97,53,61,24,61,196,247,249,249,249,252,247,246,246,228,229,245,245,247,248,247,248,245,236,237,247,247,222,227,251,250,183,167,222,244,242,237,178,191,245,222,208,211,196,188,211,229,207,224,247,244,244,243,236,239,234,252,244,131,106,100,76,49,22,38,15,56,226,234,234,244,252,253,237,213,222,202,205,176,163,220,225,217,211,220,221,219,222,219,217,219,217,217,218,215,216,216,217,215,214,214,214,214,213,213,213,213,211,210,209,208,210,210,211,209,208,210,209,206,207,206,206,207,207,207,205,205,207,206,206,205,205,206,207,205,207,208,208,208,205,208,209,207,210,213,170,128,112,99,98,75,71,124,152,162,163,143,104,71,63,71,73,87,110,120,105,83,36,44,154,217,216,215,219,219,219,218,217,218,217,218,219,216,216,217,217,214,215,215,215,215,218,218,216,213,214,214,215,215,213,212,209,211,212,210,208,208,208,208,206,208,208,209,208,209,112,2,1,5,7,8,8,10,9,10,10,10,10,218,218,218,217,218,217,217,216,214,217,216,217,217,215,217,217,217,217,216,217,218,218,217,217,217,219,218,217,219,217,217,217,217,218,219,218,216,217,218,217,217,216,219,219,217,219,219,219,218,218,219,219,218,217,221,218,219,219,218,218,218,219,219,219,221,219,219,219,216,217,217,217,220,220,221,220,220,223,221,220,222,220,220,219,217,220,218,218,222,219,219,220,219,220,218,217,218,221,220,220,219,219,219,218,221,220,218,220,222,221,218,221,223,221,221,220,223,220,222,224,221,221,223,222,223,224,222,223,223,223,225,224,225,225,225,227,225,225,227,225,227,228,226,229,229,230,229,227,228,227,228,230,229,227,228,226,229,230,230,233,231,232,234,233,236,237,235,235,236,234,236,238,239,239,239,240,239,239,240,239,240,241,242,241,243,242,242,244,242,244,244,244,243,243,242,242,242,242,242,242,244,244,244,244,244,244,244,245,244,242,245,243,245,245,242,245,245,247,246,244,245,244,245,246,245,245,247,247,246,247,247,247,247,247,247,247,246,247,247,247,247,247,247,246,246,246,247,246,246,247,247,247,248,247,246,247,247,246,247,247,247,247,246,246,248,247,247,247,247,247,247,247,247,247,247,247,248,247,247,247,249,249,249,243,234,245,249,247,248,248,247,247,246,246,246,242,241,248,249,246,242,245,248,247,248,247,247,247,246,246,245,244,245,246,246,244,245,245,246,245,246,247,247,246,242,247,245,245,246,245,245,244,247,247,246,246,247,246,248,246,249,231,226,252,232,242,252,228,219,183,193,229,232,212,207,249,248,182,153,140,127,92,60,53,59,175,249,247,249,240,242,249,251,240,224,214,154,117,126,38,99,232,249,244,155,52,88,175,232,202,141,204,251,251,249,242,247,251,238,226,231,206,196,179,223,252,169,141,75,0,75,84,73,62,54,33,41,221,247,197,116,195,249,249,249,249,230,200,240,249,247,250,246,247,235,237,244,244,245,243,246,245,248,189,41,7,32,115,237,249,247,229,151,198,223,139,183,242,202,231,252,249,252,247,251,251,212,162,89,76,57,86,142,105,66,50,46,45,54,69,49,43,56,51,42,24,41,81,81,69,51,107,106,50,64,22,56,195,244,249,249,249,252,248,245,248,234,226,242,246,247,246,247,246,247,236,232,245,250,229,220,244,250,198,163,208,242,246,238,188,168,239,222,188,217,179,161,208,236,210,205,230,238,239,240,237,240,232,253,237,130,100,97,71,48,28,37,0,84,239,235,246,253,251,245,203,203,229,212,206,165,180,226,225,214,206,220,221,219,220,217,217,216,217,216,217,215,214,217,214,217,214,214,214,212,213,211,214,212,211,213,212,210,209,210,208,210,206,209,210,206,208,206,208,207,207,208,203,206,206,205,209,206,206,207,205,207,207,205,206,207,209,208,206,209,215,228,211,161,136,117,115,132,135,154,169,162,165,151,116,94,96,93,99,100,107,111,101,77,54,136,217,239,233,216,219,218,220,218,217,217,214,219,219,219,216,217,218,214,218,216,215,217,217,217,214,214,215,214,214,215,212,211,210,209,209,208,209,207,209,209,208,210,210,212,208,212,113,2,1,4,7,7,8,10,9,10,11,10,10,217,217,218,216,218,217,214,215,216,214,215,214,217,218,217,217,216,214,216,219,219,218,218,217,218,217,216,215,214,217,217,216,214,216,217,218,218,220,219,218,218,217,218,220,221,220,220,220,221,222,218,219,219,217,218,219,220,220,220,220,219,219,219,219,220,217,219,221,219,220,219,219,218,218,220,219,220,219,218,220,221,220,220,221,219,218,220,219,217,219,219,218,221,222,221,219,221,221,218,220,219,220,220,219,222,219,220,220,222,221,219,222,220,220,222,221,222,220,222,226,222,223,225,224,225,224,224,223,223,223,221,224,226,225,228,226,227,228,225,228,228,227,229,227,225,228,228,227,229,226,225,228,229,228,228,229,230,230,232,232,230,233,232,234,233,236,235,230,235,234,235,234,236,235,235,237,238,240,238,240,240,240,240,240,240,241,241,241,241,242,243,241,241,242,242,243,242,242,242,241,241,243,243,244,242,243,244,243,243,243,244,245,244,245,245,245,246,245,246,245,248,247,247,248,246,245,245,245,247,247,247,246,247,246,246,247,245,246,246,246,247,245,246,246,246,247,246,246,246,246,247,247,248,247,246,248,246,247,247,246,247,247,247,247,246,247,247,247,246,246,246,246,247,246,246,245,247,247,247,247,247,247,248,241,236,245,247,247,247,247,247,247,247,248,246,238,243,248,246,244,242,247,247,248,247,246,245,245,245,245,245,244,244,244,245,245,244,244,244,246,245,244,247,244,241,245,246,245,246,245,244,245,246,246,246,246,246,246,247,247,252,229,230,252,222,240,213,178,221,220,225,237,236,211,208,251,250,181,161,155,131,86,55,53,6,59,185,227,249,243,242,249,252,242,231,186,87,99,120,18,62,203,252,252,186,124,125,182,243,222,179,210,252,252,249,245,247,250,244,229,230,204,196,182,226,252,166,142,71,1,75,87,71,55,56,32,41,224,249,191,143,236,252,230,237,243,230,202,240,251,244,248,245,248,236,237,244,244,244,244,244,249,249,173,54,61,55,71,220,246,247,192,166,249,226,163,232,248,207,247,252,250,250,245,252,252,249,204,108,57,7,49,118,92,55,44,53,47,61,79,43,50,75,50,32,13,46,84,83,68,48,106,108,53,62,22,50,182,244,248,249,249,252,247,244,249,239,224,239,245,244,245,246,245,247,237,228,239,250,232,222,240,251,220,163,191,236,238,235,184,119,188,183,148,225,196,132,190,226,183,162,205,231,236,242,235,237,231,253,231,127,108,92,76,43,34,34,15,112,233,234,234,253,253,226,187,223,234,218,185,152,193,224,226,213,203,218,219,221,220,218,215,217,216,217,216,214,215,214,215,213,216,216,211,213,213,212,211,211,212,212,212,211,211,211,210,209,208,208,206,206,206,205,206,205,205,207,205,205,205,206,207,206,205,206,208,205,204,206,208,207,207,208,208,209,217,232,206,167,144,119,111,111,113,115,105,100,89,91,91,72,60,50,52,51,55,62,60,58,45,108,215,226,227,226,218,216,215,220,218,218,221,219,219,220,220,218,217,218,217,218,217,216,215,214,215,214,217,214,215,215,212,211,211,211,210,210,208,210,210,210,208,210,211,212,210,210,112,2,1,2,7,9,7,10,9,10,10,10,10,217,218,218,216,218,216,217,219,215,217,217,218,215,215,218,216,217,219,217,220,219,218,218,216,219,218,220,219,218,217,219,219,218,218,218,220,217,220,220,217,222,218,219,218,219,219,219,221,219,220,217,219,219,221,221,219,221,218,222,222,220,220,218,219,223,219,219,222,218,219,221,220,219,217,219,218,218,221,219,221,221,220,221,220,222,224,222,219,220,221,222,222,221,222,222,221,221,221,219,217,222,223,221,222,223,222,223,224,222,222,220,222,221,222,223,222,225,222,224,225,223,225,225,225,225,224,224,225,226,227,227,225,226,226,226,226,226,227,229,227,228,229,227,227,227,227,227,229,229,231,230,229,231,231,234,233,234,234,231,233,232,232,233,232,232,233,235,236,236,235,235,234,234,237,237,238,240,239,240,238,239,241,241,239,241,241,239,242,242,242,242,243,244,243,244,244,244,244,244,243,244,243,241,242,241,241,244,242,244,242,244,244,244,244,244,246,245,245,245,246,246,245,247,245,246,246,246,247,247,247,247,247,247,246,246,247,247,246,245,245,246,248,247,247,248,248,246,247,247,245,246,246,246,245,246,246,247,247,247,247,246,247,247,247,247,247,247,247,246,247,246,246,245,245,246,247,247,246,247,247,247,247,249,237,238,248,247,246,246,247,245,246,246,248,244,239,246,247,246,242,244,247,246,246,246,245,246,245,245,245,246,246,245,245,244,244,245,244,244,245,245,245,247,246,241,243,245,245,247,245,244,244,244,245,245,247,246,244,249,248,250,224,229,230,187,204,213,222,250,233,226,231,231,205,212,250,247,178,150,150,132,81,56,98,78,111,203,235,250,237,244,249,252,229,218,183,50,115,175,25,60,205,248,253,224,191,186,201,241,221,189,204,245,251,246,244,247,250,244,234,224,197,194,184,232,250,154,149,71,1,80,84,70,51,56,33,47,234,248,170,158,250,251,215,232,235,210,199,238,246,245,248,245,249,240,236,244,243,244,244,244,247,251,198,181,205,125,67,137,191,190,195,226,252,218,168,251,228,210,252,252,249,249,249,252,252,249,201,134,89,28,6,14,9,29,39,42,44,55,63,49,59,83,52,23,19,48,92,86,69,46,105,112,56,63,19,46,180,239,248,250,249,252,247,245,249,246,225,231,244,245,245,245,243,248,242,229,236,248,240,221,231,252,236,169,181,228,244,221,196,96,92,166,183,237,236,159,186,225,171,161,190,224,240,238,236,238,230,252,225,119,105,94,73,45,33,29,4,148,245,250,250,253,241,199,206,251,241,213,156,146,207,222,230,209,207,223,217,221,220,218,216,213,213,214,217,218,216,216,213,214,212,210,212,212,215,213,212,212,208,210,210,209,211,210,211,212,210,209,207,208,207,206,209,205,204,207,207,207,206,205,207,207,207,208,207,206,205,206,207,208,210,209,210,211,216,212,163,113,97,68,59,69,50,41,44,36,35,71,84,69,42,28,33,20,37,50,62,30,42,167,212,217,222,221,222,216,218,218,218,222,222,221,220,222,219,218,220,217,220,219,217,219,217,219,218,219,219,216,217,216,213,212,212,212,212,213,214,212,213,214,211,210,211,213,211,210,112,3,1,3,7,9,7,10,10,10,10,10,10,214,217,218,215,216,215,214,216,218,217,218,215,217,217,215,218,217,217,218,218,217,217,217,215,219,218,217,219,218,220,218,219,217,219,219,218,218,217,217,218,218,220,220,218,217,216,219,218,216,219,220,220,220,218,220,220,219,220,217,217,218,218,221,219,220,219,220,219,218,218,218,218,219,218,220,219,222,222,221,222,221,222,221,222,222,221,221,221,221,219,221,222,221,222,222,221,222,221,221,222,219,220,220,222,223,221,221,221,221,219,222,222,222,222,223,221,222,225,225,223,222,224,224,222,225,224,224,226,224,226,225,225,226,222,224,225,225,224,222,224,226,225,226,227,226,226,226,228,226,226,227,229,231,231,231,232,232,231,232,231,231,230,231,232,231,232,233,232,235,233,233,234,235,236,235,237,237,237,237,237,238,240,240,240,240,240,240,242,240,241,242,241,243,243,243,244,243,242,242,241,244,242,241,243,241,242,243,242,244,243,245,244,245,245,245,246,245,246,248,246,248,247,245,246,246,246,248,248,248,248,247,247,247,247,246,248,246,248,245,245,246,245,244,244,247,246,244,245,244,244,245,245,245,244,244,245,245,246,246,246,246,245,246,246,245,246,246,246,246,247,246,245,246,245,246,248,247,246,245,246,245,249,244,234,240,247,247,247,246,246,245,245,247,245,240,242,247,247,244,241,245,246,245,245,244,245,245,244,244,243,245,246,245,245,245,245,245,245,244,244,244,244,245,246,241,241,245,245,245,243,244,244,244,246,246,246,245,244,247,249,249,212,229,233,201,236,226,227,230,184,209,231,231,205,214,250,244,164,138,139,128,67,110,238,224,227,232,223,242,230,234,242,253,237,223,168,86,174,168,66,127,232,253,253,228,206,203,214,244,214,197,198,231,251,239,242,244,247,244,233,225,198,195,186,232,233,144,143,61,3,82,86,68,47,51,29,46,235,248,134,165,250,245,218,226,220,206,200,233,246,243,245,242,246,239,231,240,244,243,243,241,249,240,238,252,252,169,100,58,69,202,222,250,250,153,177,252,207,222,252,247,250,245,250,249,252,243,221,223,212,147,127,115,71,42,12,18,54,62,63,50,93,90,37,29,19,54,92,86,68,44,108,107,55,61,24,50,179,243,248,250,249,252,245,243,247,247,231,225,240,246,247,246,243,246,242,226,230,247,242,218,224,253,246,176,168,215,241,226,206,140,140,203,212,234,251,198,212,224,185,199,198,217,236,237,235,235,228,253,212,116,103,88,77,34,34,16,31,194,233,251,251,244,208,202,229,252,201,159,111,163,218,223,229,201,207,221,217,218,216,215,214,215,212,215,213,213,215,212,211,210,211,210,210,213,211,210,211,210,208,208,210,209,208,209,208,208,210,210,207,207,207,207,206,205,205,205,205,206,207,206,208,206,205,206,206,206,208,207,206,206,210,206,208,209,214,195,127,91,69,36,53,57,43,49,37,35,39,53,72,66,55,41,30,28,44,59,63,18,73,198,221,225,220,216,218,218,218,218,220,218,221,220,219,220,218,220,217,217,217,217,215,215,216,215,215,215,216,215,214,213,214,212,210,212,214,213,210,211,213,212,210,212,214,212,207,211,112,1,1,5,7,8,7,10,10,9,11,10,10,217,220,217,217,217,214,216,218,218,217,216,217,217,217,216,215,218,217,219,218,217,218,218,218,221,217,219,217,216,219,220,219,221,220,219,220,218,219,218,218,222,217,218,220,219,219,219,219,218,220,219,219,218,220,221,219,218,217,218,219,221,220,218,218,221,220,221,220,218,218,220,221,222,222,222,223,222,225,222,221,224,224,224,223,223,223,222,221,221,223,221,222,221,222,224,221,223,225,223,222,224,222,222,223,222,224,223,221,222,222,223,224,222,224,223,227,226,226,229,225,226,225,225,225,224,226,226,225,226,225,227,226,225,225,225,226,226,227,226,225,226,227,229,226,226,226,225,228,229,229,229,228,231,230,228,230,232,230,232,231,230,233,233,234,233,232,232,232,234,234,234,234,236,236,236,237,237,239,239,239,241,241,241,240,239,242,241,241,242,241,244,242,242,244,244,244,242,244,244,244,244,243,243,244,244,244,246,244,245,245,245,246,247,247,248,249,248,249,248,248,247,247,249,248,249,247,247,247,247,247,247,247,246,247,246,246,245,244,245,245,246,244,244,244,245,246,245,245,248,246,245,244,244,245,247,246,245,245,246,248,246,246,246,247,247,246,246,247,246,247,249,248,247,246,246,247,245,246,246,245,247,249,241,230,242,247,247,248,246,247,246,246,247,244,238,245,248,247,242,241,246,247,247,247,246,245,246,246,245,244,246,245,245,245,244,245,245,245,244,244,245,245,244,244,242,241,243,245,246,245,246,246,244,245,245,247,245,246,245,249,246,222,251,244,226,245,200,193,217,206,222,238,233,201,217,249,243,172,146,142,123,59,127,244,250,249,209,202,240,229,229,235,246,240,247,202,93,165,204,153,217,251,253,253,208,208,202,211,236,210,218,211,237,252,239,247,242,248,243,236,230,207,191,181,237,218,136,145,57,2,83,91,71,46,49,36,36,235,242,121,183,244,245,217,223,212,206,213,236,246,242,245,243,245,239,227,236,244,243,244,243,248,246,241,252,252,167,129,69,60,209,238,245,232,101,184,245,196,238,252,248,251,247,250,249,252,229,236,252,252,253,253,249,186,84,14,57,150,71,41,31,76,94,31,31,24,55,100,85,67,42,98,113,57,61,20,56,189,243,248,250,249,252,245,244,247,249,239,222,238,245,244,245,244,244,244,232,232,245,247,224,219,248,252,193,165,203,239,233,222,168,152,222,236,225,245,199,192,212,199,231,208,211,235,237,237,235,230,252,204,110,105,81,75,41,23,8,73,242,249,251,251,211,199,215,252,243,156,80,70,181,223,225,233,199,211,218,215,219,214,214,216,216,213,214,214,212,212,213,212,211,213,212,214,211,209,210,212,212,209,210,210,208,209,210,209,208,208,210,209,207,208,208,208,207,205,207,205,205,207,207,206,207,206,205,205,206,206,205,205,207,208,208,210,212,220,195,146,113,78,60,65,63,56,60,54,39,57,69,55,48,43,39,30,38,55,58,42,19,123,224,224,229,221,220,219,217,219,218,222,222,217,220,220,219,219,219,218,217,218,218,219,217,215,217,215,215,214,214,215,213,214,213,213,214,214,212,211,212,212,212,210,212,210,210,208,212,113,1,1,4,7,8,8,10,9,10,11,10,10,217,219,217,220,220,216,216,218,217,217,217,217,218,215,216,217,216,219,220,219,218,220,218,216,219,219,218,217,217,219,220,221,218,218,220,220,220,219,218,218,219,219,220,218,220,220,219,220,219,221,220,218,222,222,222,218,219,221,217,220,220,218,222,219,219,220,220,219,221,222,221,221,221,221,222,224,224,224,223,224,226,226,224,225,226,224,224,224,224,224,224,225,221,222,226,226,225,224,223,224,224,225,225,223,223,223,226,225,224,224,227,224,224,224,224,226,226,225,227,226,228,227,224,224,228,226,225,227,227,227,224,226,226,225,224,227,227,229,228,228,230,227,228,227,230,229,229,232,227,232,232,229,232,232,233,232,231,232,232,233,233,234,236,235,236,235,236,235,234,234,235,235,234,235,236,237,238,238,238,239,239,239,239,240,241,240,241,242,241,241,243,243,243,243,243,243,243,243,243,245,244,243,245,244,243,245,244,244,245,243,244,245,245,246,249,248,247,247,247,246,247,247,247,247,247,247,247,247,247,246,247,247,246,246,246,246,245,244,244,244,244,244,245,244,245,244,244,245,245,244,244,245,246,246,244,245,246,245,244,244,246,245,246,246,245,247,246,247,246,246,246,246,247,246,246,246,246,246,245,244,246,247,236,235,245,247,247,247,247,248,246,247,249,241,240,246,247,246,242,244,246,246,247,246,244,245,245,245,247,245,245,245,244,244,244,244,244,245,244,244,245,244,243,244,242,237,240,245,244,243,245,245,244,246,244,245,245,246,245,249,241,220,252,231,192,211,205,231,238,226,242,240,229,198,220,250,246,178,153,151,131,53,112,243,251,251,218,176,225,240,237,228,240,229,228,208,150,184,189,205,252,252,251,251,202,212,202,212,242,229,245,229,242,251,239,249,244,247,241,234,232,198,177,184,239,198,134,141,53,3,78,81,64,49,50,41,51,239,249,111,160,237,243,221,215,217,226,215,234,249,241,244,242,245,241,228,231,241,242,243,244,248,246,234,253,221,134,156,90,62,195,234,245,177,85,216,232,203,248,249,249,247,247,249,250,250,215,238,252,252,252,252,249,208,124,33,84,180,71,30,15,69,84,32,34,19,67,104,81,65,36,98,109,57,60,28,54,196,249,249,249,249,252,247,243,248,248,244,224,232,245,244,246,244,244,246,231,226,239,249,231,218,241,252,208,163,193,234,235,221,185,145,186,236,224,234,204,167,173,199,246,221,203,236,239,235,237,230,252,198,113,107,88,70,29,8,27,156,234,251,252,230,204,213,234,234,202,125,52,60,188,219,227,228,198,211,216,217,218,213,218,216,215,214,215,214,212,215,215,213,213,212,211,212,212,210,212,213,212,210,208,208,210,210,208,209,208,208,209,209,210,209,208,209,211,208,207,208,208,207,205,210,207,206,208,205,206,206,205,207,208,207,206,210,214,219,204,172,173,155,118,108,81,69,73,58,94,113,84,67,51,45,39,37,46,58,27,40,147,224,243,221,221,225,221,222,217,216,218,218,218,219,217,218,218,218,217,217,217,216,216,216,216,216,215,215,218,215,216,215,213,215,214,215,214,213,215,212,212,212,211,211,210,210,212,210,210,110,3,1,3,7,9,7,9,9,10,10,10,10,219,221,218,219,222,218,218,219,219,219,220,221,218,218,221,218,221,219,219,221,218,222,218,219,220,216,221,219,217,218,219,218,219,220,219,218,219,220,219,219,222,221,222,220,220,222,221,223,223,221,222,220,219,219,219,222,222,222,220,221,221,220,222,223,223,221,223,222,223,223,222,221,221,224,225,225,224,225,226,227,228,229,228,229,227,226,227,226,226,228,226,228,227,226,227,225,226,229,226,225,229,227,227,227,226,228,228,228,227,227,229,226,226,228,223,226,224,227,227,224,229,225,226,227,226,229,228,227,226,223,226,225,225,226,226,229,228,225,227,229,228,229,229,229,229,230,230,231,231,232,234,231,232,233,234,236,233,234,234,235,236,236,236,235,235,236,236,234,233,236,235,235,236,235,237,237,237,239,238,239,237,238,238,238,240,241,241,239,242,242,243,243,242,242,242,242,243,243,244,244,245,245,245,244,242,243,245,245,247,246,246,245,246,246,247,247,246,247,246,247,247,247,247,247,249,247,247,247,247,247,247,247,246,246,246,246,245,246,245,244,244,245,247,247,245,244,245,246,247,246,244,244,244,245,246,246,245,244,245,244,245,246,246,247,247,246,246,247,247,247,245,245,246,246,248,246,247,247,246,246,249,247,235,237,248,245,246,247,246,247,247,247,245,237,242,247,247,244,241,246,246,244,247,245,245,246,245,245,245,244,245,244,244,244,244,244,244,244,244,244,245,245,245,244,242,235,239,243,244,244,241,244,243,246,244,246,246,246,245,249,233,212,239,191,194,238,229,252,252,226,220,228,219,193,221,250,247,174,144,143,130,54,108,244,252,252,249,202,194,203,235,246,237,220,214,187,180,203,202,221,252,252,250,247,185,219,206,215,232,187,230,229,239,242,236,250,241,249,241,235,229,200,181,187,245,181,118,136,49,8,66,73,62,49,57,41,46,212,168,87,156,206,238,212,213,226,232,213,230,248,242,247,242,244,241,232,234,239,241,242,244,247,247,217,220,182,150,212,118,41,150,206,227,156,158,249,222,210,251,248,250,247,248,246,251,243,210,241,252,252,249,248,249,177,117,39,84,168,57,31,19,75,73,29,32,20,65,93,83,69,42,79,111,62,51,27,45,192,249,249,249,247,252,246,242,248,246,248,224,224,244,245,245,244,244,247,237,226,236,247,236,215,232,252,225,169,181,223,239,223,199,160,165,228,231,204,198,166,134,175,240,235,201,218,231,229,233,237,252,191,120,111,88,50,16,4,83,243,252,252,248,210,208,231,252,252,158,104,36,66,199,212,231,223,193,214,213,218,217,212,214,214,214,213,214,213,211,211,213,211,209,212,211,210,211,212,213,212,211,210,211,210,210,210,209,206,210,209,209,208,207,211,208,209,209,208,210,211,210,209,209,208,208,208,208,209,208,206,208,208,207,205,207,211,213,213,189,193,226,225,194,146,103,72,47,34,80,126,107,81,60,45,43,34,51,29,36,154,230,235,245,222,225,221,220,218,214,216,216,220,220,218,220,218,220,219,216,217,218,217,215,217,215,216,215,214,216,214,217,214,212,213,213,213,214,214,211,214,211,213,213,212,211,208,214,212,211,111,2,1,3,7,9,7,10,10,10,10,10,10,220,224,219,221,222,218,219,220,220,219,220,220,220,219,219,222,218,220,222,220,221,219,222,222,221,221,218,219,221,219,221,221,219,220,219,219,220,220,222,221,224,220,223,220,220,222,222,225,220,224,222,220,222,219,223,220,220,222,222,224,221,224,224,221,221,222,224,220,222,222,220,224,222,223,225,225,225,227,227,226,228,227,228,230,229,229,228,228,229,229,227,228,229,227,226,229,229,229,229,230,230,228,230,230,230,230,228,229,228,226,231,227,226,229,229,229,227,226,227,224,227,226,225,226,226,228,229,226,226,228,228,229,230,228,227,229,226,228,229,227,229,230,229,227,229,230,232,233,230,232,232,232,234,234,235,232,235,236,236,235,234,235,234,236,235,232,234,234,236,236,235,236,233,237,238,237,239,238,239,239,239,240,238,240,239,240,240,240,240,241,243,242,242,242,244,243,244,244,244,243,244,245,243,243,244,244,245,245,245,245,245,245,245,244,246,246,247,246,246,247,247,247,247,248,246,247,247,246,247,247,247,247,246,246,246,246,246,246,245,246,244,244,245,244,244,245,245,246,245,245,244,244,244,244,243,244,244,244,245,244,245,245,245,245,246,246,246,246,245,246,247,247,246,245,246,246,246,245,246,246,249,244,233,241,246,246,249,245,245,244,245,249,244,238,244,248,247,242,242,246,245,245,246,246,245,246,245,245,245,244,244,244,244,244,246,244,244,245,244,243,244,244,242,242,242,237,237,241,241,242,243,243,242,243,243,244,246,247,244,251,227,211,239,207,226,250,240,252,224,199,226,230,216,196,225,251,243,158,127,136,115,44,111,244,252,252,249,214,187,181,214,246,251,240,204,174,198,205,203,221,247,252,249,237,152,203,203,190,153,94,173,204,230,228,231,248,241,249,243,233,231,214,179,190,242,163,119,132,41,20,65,56,55,57,50,57,31,49,96,138,177,208,227,201,197,218,234,209,230,247,241,245,243,245,240,233,229,237,242,243,243,248,245,228,231,171,175,224,125,45,108,222,250,160,208,251,207,224,251,248,250,245,248,244,251,233,209,248,249,250,243,242,248,156,101,26,87,155,37,28,25,83,66,26,30,22,61,84,82,78,50,92,116,62,51,39,25,128,244,244,247,247,251,246,242,248,246,250,232,220,238,248,247,247,245,246,243,227,234,248,240,216,226,251,237,174,175,217,236,231,210,171,163,217,223,161,197,198,114,150,223,227,162,173,220,226,235,241,252,181,110,104,66,41,34,115,183,248,250,250,231,215,229,234,254,220,135,98,30,103,211,215,234,216,195,208,212,219,215,210,214,211,213,212,213,212,211,211,210,210,210,210,209,210,210,208,210,212,210,212,212,211,211,208,212,209,207,207,209,210,206,208,208,207,208,208,208,208,207,208,209,209,207,208,208,206,208,207,206,205,207,206,206,208,213,198,179,197,223,235,213,157,103,57,25,13,23,43,65,65,51,46,33,34,22,8,110,223,244,241,228,223,226,220,215,217,215,217,219,219,218,217,219,219,218,219,218,217,217,217,217,214,214,217,215,215,214,213,213,212,212,215,214,213,212,212,213,210,212,211,208,210,212,212,212,211,212,113,1,1,5,8,8,7,10,9,10,11,10,10,220,223,221,221,219,217,218,219,221,220,217,218,221,222,219,219,221,219,220,219,220,223,219,219,220,217,219,218,218,220,220,221,219,220,221,221,222,220,219,217,221,221,219,220,220,220,220,221,218,218,223,223,222,222,220,221,223,222,222,222,223,222,221,221,220,222,222,221,222,225,224,223,225,225,226,226,226,227,229,229,229,230,229,229,231,230,231,230,231,230,228,227,230,229,229,230,230,230,230,232,231,230,231,230,232,232,228,230,228,229,231,229,230,231,228,229,228,229,227,226,229,226,226,226,224,226,227,227,229,229,229,227,228,228,228,229,231,229,229,230,229,229,229,231,229,230,230,232,230,230,232,228,230,232,232,232,232,236,234,234,234,231,232,235,235,235,236,236,236,237,237,238,239,236,237,237,237,237,237,239,240,239,237,237,239,240,241,241,240,240,240,240,241,241,244,242,243,245,245,243,244,243,243,242,242,242,244,245,245,245,244,244,245,244,245,245,244,247,246,247,245,245,247,248,247,246,244,245,246,248,247,246,246,246,247,247,245,244,245,244,244,245,245,244,246,246,245,246,244,244,244,244,245,245,245,244,244,244,245,244,245,245,244,244,246,245,244,246,244,245,245,245,244,244,246,245,246,246,245,246,248,240,230,241,246,245,246,246,246,245,246,247,239,239,247,247,245,241,245,247,246,245,245,244,245,245,244,245,245,244,244,244,244,244,244,245,246,245,244,243,243,243,242,240,243,239,237,241,242,242,244,245,243,244,242,244,245,247,244,251,225,232,252,214,236,245,204,208,220,229,245,248,224,203,224,246,237,161,135,128,113,41,104,245,247,251,251,228,218,179,188,231,245,252,224,184,228,199,204,224,240,252,239,169,92,206,196,152,103,17,83,174,233,222,232,247,240,251,245,232,226,210,165,182,245,179,149,141,60,49,63,65,59,43,52,51,40,35,103,206,208,229,243,193,192,217,233,212,231,246,242,244,242,244,240,233,229,234,240,244,241,248,244,224,184,119,160,210,125,97,214,249,244,181,240,251,196,232,252,247,249,245,248,247,251,222,217,251,247,250,242,244,247,155,99,24,94,155,38,27,33,90,55,23,32,25,64,76,82,77,53,89,116,74,45,46,10,72,226,243,249,249,251,247,241,248,244,248,237,217,235,246,247,247,245,247,245,230,234,244,246,221,220,249,247,189,169,207,238,230,213,182,119,162,194,151,204,231,162,158,218,217,151,160,203,223,235,251,236,146,100,81,66,25,84,237,252,252,250,229,221,226,249,252,252,183,124,91,32,150,223,214,236,209,199,214,212,217,214,212,213,213,214,213,214,211,212,210,210,211,210,211,211,212,210,211,211,212,210,210,211,211,210,210,211,209,208,208,209,207,208,208,208,210,208,208,207,208,206,207,210,209,208,207,207,206,207,207,205,206,205,208,210,212,213,188,174,198,216,222,220,189,155,140,110,71,58,49,31,33,24,6,49,93,153,209,237,241,235,230,228,221,219,216,216,218,217,217,220,219,217,218,218,216,218,217,218,218,218,218,217,217,214,217,213,214,215,213,214,212,213,214,214,212,212,213,212,212,211,214,213,210,213,211,213,211,211,113,1,1,4,7,8,7,10,9,9,10,10,10,221,224,220,221,222,217,219,220,219,220,221,220,220,224,221,221,221,220,224,218,221,220,219,218,217,217,217,219,220,219,219,220,220,223,221,220,222,221,220,218,222,218,219,220,219,222,220,221,221,221,221,223,223,221,224,221,221,223,221,223,220,222,223,220,224,223,224,224,223,225,224,225,225,224,227,226,225,227,229,229,230,230,230,230,231,232,232,233,231,231,231,231,231,229,231,232,229,230,230,229,232,229,229,231,229,231,228,227,230,229,230,228,229,229,229,228,227,229,231,227,229,228,228,228,227,227,227,226,229,228,229,227,226,229,229,231,228,227,229,227,227,229,229,230,230,231,230,231,232,231,232,231,231,231,231,233,233,232,234,233,234,235,234,236,233,233,237,236,237,237,235,239,237,236,236,236,238,237,236,235,237,237,237,240,238,239,239,240,240,239,241,241,240,241,241,242,242,241,244,244,241,241,241,243,244,243,244,245,243,245,245,245,245,245,246,244,245,244,245,245,244,244,244,245,245,245,244,244,245,246,247,246,246,245,246,245,245,246,244,244,245,245,245,245,245,245,244,244,244,244,245,245,245,245,242,245,244,243,243,243,243,245,244,244,244,243,244,244,244,245,244,244,245,245,246,246,244,244,246,247,248,234,232,245,245,247,245,243,246,245,247,244,235,240,246,246,241,241,245,245,245,245,246,246,244,244,245,244,245,243,244,244,241,245,243,243,244,243,243,243,242,242,242,242,243,238,235,238,241,243,243,244,243,244,244,244,243,245,245,252,220,229,243,191,208,196,190,240,242,248,249,243,223,205,225,241,234,164,151,146,115,40,99,240,248,251,248,232,243,208,170,207,238,252,227,203,248,200,221,224,232,253,182,109,62,154,157,87,37,3,95,201,246,229,241,251,252,253,252,249,246,234,176,199,252,223,199,162,71,52,71,72,63,49,47,54,42,49,200,248,231,252,252,211,207,243,252,227,247,252,252,252,250,249,244,239,233,234,241,243,243,248,243,205,118,66,131,136,101,174,251,252,202,174,251,236,200,244,248,249,247,247,249,249,252,212,223,251,245,250,243,244,248,150,101,29,98,161,42,22,41,91,49,24,29,27,63,84,81,71,50,76,107,74,48,48,11,92,242,245,248,248,250,244,238,247,242,246,245,222,227,244,246,246,245,245,248,232,229,240,246,229,217,244,251,200,165,194,229,234,219,174,106,80,144,185,219,248,191,182,221,217,185,184,197,219,242,252,205,117,89,90,64,92,180,234,253,253,234,232,233,234,252,234,216,142,136,74,60,182,220,224,230,205,202,212,215,218,213,213,215,212,213,211,214,210,212,213,211,211,210,211,210,211,213,212,211,210,210,212,211,208,210,210,209,208,212,211,208,211,209,209,208,209,210,206,209,210,209,210,208,208,211,210,206,207,210,206,208,210,207,211,211,215,211,187,189,207,211,221,229,233,238,234,221,226,201,137,77,16,92,211,239,242,251,251,249,249,232,225,222,217,217,217,218,217,217,220,218,217,218,218,219,216,216,217,218,217,218,215,215,216,218,218,215,216,214,215,215,215,215,214,212,216,214,212,213,212,211,212,213,210,211,210,214,212,210,112,2,1,2,6,8,6,9,9,9,9,9,9,221,224,222,223,225,222,222,220,222,222,222,223,223,220,220,223,222,220,224,224,221,222,220,223,221,220,223,220,221,222,220,220,221,221,219,218,221,221,221,222,221,221,223,223,221,221,223,224,223,228,221,221,224,222,226,225,222,222,223,224,226,226,226,227,228,226,226,224,223,226,225,226,224,228,226,225,227,228,229,227,229,231,230,231,231,231,232,230,231,231,232,232,232,232,232,232,231,232,231,232,231,230,232,228,231,230,229,229,227,230,228,227,229,229,230,229,231,232,229,229,231,227,229,229,229,229,230,232,230,230,232,230,230,230,228,231,229,230,230,229,229,230,231,230,231,230,232,234,232,232,233,231,232,232,232,234,233,235,233,234,237,237,236,235,234,235,237,236,237,235,236,237,237,239,238,239,240,239,239,237,237,237,239,238,241,240,239,241,241,242,240,241,242,242,243,242,243,242,242,243,243,243,242,242,243,243,243,244,243,244,244,244,246,244,245,246,245,246,245,246,245,244,246,244,244,245,245,245,244,244,244,245,245,244,245,246,247,245,245,244,245,246,245,245,245,243,244,244,245,245,244,244,244,244,243,244,244,244,245,244,244,245,244,243,245,245,245,246,244,245,245,246,245,244,246,246,245,245,244,245,245,232,235,244,245,246,245,244,245,245,245,240,236,244,245,244,240,241,246,245,244,244,245,245,245,245,245,245,245,244,243,245,243,244,242,243,244,242,242,241,241,240,243,243,244,241,234,240,243,243,244,241,243,245,245,246,242,244,246,248,209,224,212,170,215,227,233,252,237,214,227,237,221,212,231,245,239,182,175,155,126,41,93,238,242,251,243,230,249,230,197,207,222,252,225,171,236,156,179,214,217,227,118,32,10,131,127,64,33,89,227,249,252,252,252,252,252,252,252,252,252,252,220,226,252,250,202,146,63,51,69,76,58,54,56,53,49,85,233,248,252,252,252,239,248,252,253,253,253,253,253,253,253,253,252,252,252,248,249,251,248,251,247,214,132,66,71,43,69,181,250,227,105,173,251,214,207,245,247,248,246,249,245,252,245,202,232,251,244,250,243,249,248,146,73,11,84,132,40,20,66,92,41,28,23,35,64,81,83,70,55,71,107,77,44,49,12,83,243,244,249,249,249,245,238,246,243,246,247,225,220,239,246,246,246,244,247,236,225,235,249,232,214,236,252,216,166,184,225,231,223,203,150,127,169,216,219,235,193,162,207,208,208,212,194,218,247,242,164,126,117,65,86,193,252,252,251,250,235,240,244,248,252,252,168,106,132,73,93,203,220,223,231,204,200,214,213,219,213,213,214,212,211,209,213,211,211,213,212,210,211,210,211,211,211,212,210,211,212,212,209,211,211,210,209,208,209,208,211,212,212,210,208,210,209,208,211,212,209,209,210,210,211,211,209,210,209,209,210,212,210,211,215,217,201,184,199,214,213,218,226,231,244,249,251,251,233,178,114,80,184,234,252,252,251,251,238,231,225,220,219,217,217,218,217,216,217,217,219,219,217,219,218,219,218,215,216,217,216,215,218,217,216,217,214,216,216,214,217,216,216,215,213,215,213,213,213,213,212,212,214,213,212,211,213,212,212,112,2,1,2,6,8,7,9,9,9,10,10,10,221,224,223,223,223,220,222,223,222,220,220,220,222,222,218,223,222,223,225,221,225,222,222,222,222,225,222,222,221,220,219,219,220,220,222,222,223,223,222,222,223,223,223,225,225,224,224,224,224,225,226,224,224,224,226,223,222,225,226,226,225,228,227,227,227,227,227,223,226,227,226,228,225,227,226,226,229,227,230,228,228,230,230,234,232,231,232,230,229,229,229,231,232,232,234,234,234,233,232,231,232,234,230,232,231,232,231,230,230,229,230,229,231,229,230,231,231,230,230,229,230,230,231,229,230,231,230,231,234,232,233,236,233,232,231,231,231,232,230,230,231,231,231,230,229,231,230,230,232,230,232,230,232,234,233,235,234,233,234,234,234,235,233,236,236,235,236,236,235,237,237,237,238,237,240,241,240,240,240,239,239,238,239,239,238,240,241,241,240,240,241,242,241,241,242,242,243,242,242,242,241,241,241,241,242,242,241,243,244,244,244,244,244,245,245,245,246,245,246,244,244,244,241,243,244,244,245,246,246,246,245,244,245,245,247,245,244,245,246,245,244,245,245,245,244,244,243,244,243,244,244,244,244,243,242,244,244,243,245,245,244,245,244,244,244,244,243,244,245,244,245,244,245,245,243,244,243,244,243,246,242,225,237,245,244,245,243,244,245,244,245,235,236,245,245,242,239,243,246,245,244,242,242,244,244,244,243,241,243,243,243,241,244,243,241,242,241,241,241,241,240,241,241,241,242,240,237,239,241,242,242,242,244,243,244,243,243,242,246,246,208,234,224,208,247,240,243,230,194,214,226,236,217,212,236,248,242,177,171,162,141,66,103,233,243,250,241,226,248,237,214,210,203,251,192,117,179,82,129,206,208,204,85,21,106,220,225,214,205,243,253,253,253,253,253,253,253,253,231,208,193,193,141,148,192,150,122,78,32,45,44,48,49,47,44,51,34,54,178,163,157,213,181,163,204,240,247,229,252,252,252,252,253,253,253,253,252,252,252,252,253,253,250,244,162,91,64,4,43,198,247,177,67,177,250,200,220,246,244,248,244,247,242,252,234,200,238,249,244,248,245,249,241,144,77,9,18,67,45,30,74,92,35,27,24,36,62,76,81,71,54,70,114,81,42,55,12,82,241,243,247,247,248,242,237,245,243,245,249,233,217,236,245,244,244,243,246,238,224,231,246,237,211,227,252,226,173,173,215,232,227,218,206,167,188,236,221,208,194,174,174,187,212,224,200,216,249,212,145,117,97,80,146,245,252,252,251,241,234,239,242,247,253,218,111,85,115,74,119,204,211,226,225,205,200,211,212,215,213,214,212,210,212,209,211,211,210,210,209,210,211,208,209,211,212,209,209,209,210,210,211,212,212,209,210,208,209,210,209,210,210,210,208,208,210,209,209,208,208,210,210,208,208,208,210,210,211,211,209,211,209,212,216,208,185,184,203,214,218,219,217,220,226,234,240,246,210,154,106,71,198,233,228,245,235,234,225,221,220,218,216,216,219,215,218,218,215,217,218,218,217,217,218,215,216,216,217,217,216,217,216,217,215,214,215,214,214,217,214,212,215,214,212,212,212,214,213,213,212,213,210,208,213,212,212,210,212,113,1,1,3,6,7,7,10,8,9,10,9,9,220,224,225,224,225,221,223,222,222,224,221,221,224,221,222,223,223,223,223,222,222,223,220,223,224,221,224,221,222,222,221,222,224,223,225,224,223,225,223,227,225,225,225,226,225,225,226,226,227,227,226,226,227,225,226,227,229,228,227,229,226,226,227,227,227,226,226,226,228,227,226,227,228,229,228,229,228,229,229,229,232,232,231,231,234,234,232,233,232,231,233,233,233,233,233,234,234,234,232,233,234,231,233,230,232,230,230,232,228,231,231,231,231,230,232,231,228,231,230,231,234,229,231,233,232,232,232,234,233,234,235,234,235,237,235,234,233,233,231,230,232,231,232,232,233,232,230,231,231,232,231,231,232,232,233,232,233,235,234,234,235,236,235,236,236,235,237,236,237,235,236,239,237,237,239,238,240,238,239,238,239,242,240,241,240,241,242,241,240,240,240,242,242,241,242,241,242,242,243,243,241,241,242,241,243,242,243,242,245,246,244,244,245,245,245,245,245,244,244,243,242,241,241,242,244,245,245,247,244,245,245,245,244,244,244,244,246,245,245,244,245,245,244,244,245,245,244,243,244,243,244,243,242,244,244,244,244,244,244,242,244,245,245,245,245,245,245,244,244,244,244,245,245,244,244,244,243,245,245,246,235,225,240,244,244,246,243,243,244,245,243,234,240,244,244,240,239,244,245,244,245,244,243,242,245,244,243,242,242,245,243,243,242,242,242,241,243,241,240,241,241,241,241,241,241,242,236,235,240,242,242,240,243,242,244,242,241,241,248,240,211,250,229,217,252,213,183,223,224,237,242,235,214,214,237,250,236,151,143,165,176,117,145,247,249,251,244,224,242,233,222,226,198,229,144,53,122,33,88,198,206,245,190,204,252,252,252,252,252,252,243,229,206,155,155,137,124,115,99,88,65,42,24,15,13,54,56,21,31,44,42,46,47,43,42,42,38,42,45,27,12,9,8,10,10,17,34,34,69,105,131,163,186,220,231,247,253,253,252,252,252,252,252,252,226,144,94,25,43,207,229,157,131,231,248,199,235,250,248,249,244,248,245,252,222,202,246,246,247,249,248,252,231,208,134,18,1,14,45,63,98,78,29,30,20,44,63,73,84,65,53,62,107,88,46,54,11,82,242,244,249,249,246,243,240,245,243,245,246,241,217,227,244,244,244,243,246,245,228,229,244,241,216,222,249,238,184,166,203,229,224,224,220,187,169,223,220,174,190,198,155,166,226,238,220,220,241,180,102,89,61,108,226,252,250,250,246,245,242,244,239,248,249,181,68,87,118,81,142,204,214,229,233,214,218,227,221,221,213,213,214,211,213,211,212,212,212,210,210,210,211,214,211,211,210,210,210,211,213,208,209,210,209,210,209,212,211,210,213,212,210,211,211,212,212,209,209,211,212,211,210,210,208,209,213,213,213,214,215,213,213,216,218,198,177,191,212,217,222,221,219,219,220,225,231,243,195,149,90,65,189,223,218,231,224,225,218,219,220,218,217,218,220,220,222,218,217,218,219,218,217,218,220,217,215,217,220,218,216,214,216,217,214,215,216,217,216,216,215,214,214,214,214,214,214,215,215,211,212,213,212,211,211,214,212,209,213,112,1,1,4,6,7,7,10,8,9,10,9,9,223,224,224,222,225,225,224,224,224,222,226,224,225,222,221,225,221,223,222,222,222,220,225,222,223,225,221,224,224,222,224,223,225,222,223,224,222,223,223,223,225,225,224,227,227,226,226,225,226,227,228,227,227,230,230,229,229,228,227,229,229,230,227,226,228,227,228,227,228,229,228,229,227,230,231,230,232,230,231,231,231,232,232,233,232,231,235,235,233,236,235,236,235,237,235,234,236,235,232,231,231,231,231,231,231,230,229,231,233,230,232,229,231,234,231,230,229,229,230,231,230,231,232,233,235,234,237,238,237,237,236,235,235,234,234,234,234,232,231,234,232,232,232,233,234,232,234,234,234,233,233,232,232,232,233,235,234,235,236,235,236,236,236,236,237,236,237,235,235,237,238,238,238,237,239,240,238,240,238,238,241,240,240,240,241,242,241,242,242,241,241,240,239,240,242,243,242,240,241,242,242,242,243,243,243,242,243,243,243,244,244,244,244,244,245,245,244,242,244,242,241,241,242,242,242,243,243,243,244,245,246,246,245,245,245,244,245,246,246,246,245,245,243,244,245,244,244,245,244,244,243,243,244,243,242,243,243,244,244,243,242,242,242,242,243,243,242,242,242,243,243,242,241,244,244,243,244,244,243,247,232,226,244,243,244,244,242,244,245,245,237,232,243,246,242,237,241,243,242,243,243,244,243,244,243,242,244,243,244,242,243,243,243,242,240,242,242,242,242,239,240,241,241,241,242,242,236,234,239,242,242,241,242,240,243,240,242,239,248,232,203,234,203,205,214,187,220,247,240,241,231,236,212,217,240,246,239,156,134,145,160,143,187,250,248,252,243,223,241,230,221,240,224,224,128,62,94,3,83,220,252,252,253,253,253,252,210,173,138,137,104,96,99,73,77,68,69,74,59,50,29,12,6,14,10,48,76,29,43,53,50,54,53,51,49,45,42,39,42,36,38,30,8,8,9,10,10,10,10,10,10,11,11,11,27,53,101,130,171,213,243,249,252,245,248,210,132,69,44,115,142,142,181,250,240,200,243,249,249,248,245,246,247,252,209,208,249,245,250,247,252,252,219,221,163,42,11,7,22,83,114,73,29,31,21,41,67,78,80,63,50,50,89,85,45,54,14,71,238,244,249,249,247,242,236,246,241,243,246,247,223,220,241,244,244,243,243,246,229,224,240,244,218,215,243,243,193,163,190,225,222,220,224,199,163,210,222,146,196,231,156,172,231,249,240,211,185,118,82,56,85,176,252,252,250,249,244,244,242,243,239,252,234,155,63,84,111,95,153,209,233,242,251,248,236,249,234,220,216,217,214,212,213,209,212,212,212,213,212,210,210,212,212,211,212,210,213,211,210,214,210,210,211,210,211,213,211,211,214,212,213,211,210,212,212,212,213,211,212,212,211,211,213,213,214,215,213,214,214,216,217,218,220,195,181,203,216,220,221,222,222,221,222,230,240,246,193,144,79,65,194,216,219,230,217,224,217,219,218,217,216,215,219,220,219,217,218,220,220,220,218,220,217,214,219,217,219,217,217,217,214,217,217,217,216,216,216,215,215,214,216,213,214,215,214,213,211,214,213,214,213,212,212,209,213,212,210,112,2,1,3,7,8,6,9,9,10,10,9,9,223,226,225,224,226,223,225,226,225,224,225,223,225,225,226,225,222,223,223,226,225,226,226,226,225,223,227,225,225,224,224,224,225,224,225,225,225,227,224,225,226,226,226,228,227,226,229,229,229,229,227,229,231,229,230,229,231,229,230,230,228,230,230,231,231,231,229,230,231,230,230,231,230,231,234,233,233,233,232,234,233,232,231,232,234,232,234,236,235,234,236,236,235,236,235,236,234,232,232,232,233,230,232,232,233,233,231,232,231,230,230,229,232,232,232,231,229,232,229,229,232,231,234,235,236,238,239,237,237,237,238,236,236,237,236,235,235,236,238,236,236,235,234,233,235,235,235,237,239,237,236,238,236,236,236,236,235,237,236,236,234,235,237,234,235,237,236,237,240,238,238,239,237,239,239,239,239,239,239,240,241,240,239,239,241,242,240,240,241,243,243,241,241,239,240,243,242,240,241,242,241,242,242,241,243,241,243,243,242,242,242,244,242,244,243,243,243,243,244,242,242,242,243,243,243,243,242,243,243,245,245,243,244,244,245,245,242,243,246,245,246,245,244,246,244,244,242,242,243,243,242,244,244,243,243,243,243,243,241,243,243,242,243,243,243,242,244,243,243,244,244,244,244,244,243,244,243,243,246,244,225,231,245,244,244,242,243,244,244,243,232,236,246,245,240,237,242,245,245,245,242,242,243,242,242,241,240,242,242,242,242,241,242,241,240,239,240,241,242,243,241,242,241,241,242,243,240,232,237,240,241,241,241,241,242,240,241,236,247,219,192,222,174,182,229,226,248,252,202,212,230,235,210,217,238,247,241,162,141,128,108,81,155,243,232,242,233,220,238,227,222,237,238,252,182,106,103,35,151,245,253,253,159,121,150,127,98,78,51,55,60,78,93,100,105,100,120,122,110,101,87,94,112,105,61,79,95,73,61,51,56,56,54,43,50,82,69,54,94,92,40,56,83,68,70,63,54,28,12,12,9,10,11,10,9,11,10,12,10,12,16,57,110,119,204,167,147,125,65,71,31,122,224,248,231,203,247,247,250,248,247,243,250,252,198,220,251,244,251,246,253,253,199,207,129,33,37,16,18,51,85,63,33,27,19,48,69,74,76,55,42,38,80,90,47,50,14,62,228,243,249,249,246,239,239,245,242,245,245,251,229,213,236,244,244,244,242,245,231,220,236,245,222,208,236,244,208,169,180,219,222,219,222,207,164,178,234,199,219,252,188,163,231,246,246,156,105,89,70,71,131,241,252,252,250,249,247,242,234,237,238,251,229,137,54,64,109,108,162,216,238,247,226,198,200,234,245,225,214,215,214,214,211,211,213,210,214,211,212,212,209,213,210,210,212,213,214,210,214,211,214,213,212,217,212,213,214,212,214,214,215,213,212,213,214,214,212,212,211,210,212,214,213,214,216,216,217,217,219,220,219,223,217,186,189,213,218,223,222,218,221,224,231,244,250,240,167,123,63,65,174,207,222,230,217,222,218,219,221,218,219,219,217,219,219,218,220,221,219,219,220,218,217,217,218,219,217,215,219,219,216,218,217,218,216,214,216,215,214,215,216,214,214,215,213,214,214,213,214,211,212,214,213,213,213,212,212,112,2,1,3,7,8,6,9,9,10,10,10,10,222,225,225,224,225,222,223,225,227,224,227,226,223,224,225,227,224,224,225,225,229,227,224,225,227,229,226,226,225,224,227,227,228,227,226,227,225,227,227,226,228,228,228,227,229,227,229,229,229,231,230,231,232,232,233,231,231,231,232,232,229,231,232,231,232,231,232,231,231,232,231,231,229,231,231,229,231,231,232,232,233,233,229,231,234,234,234,234,234,235,233,232,233,233,234,232,232,230,229,230,234,234,232,232,232,232,231,231,232,229,232,232,230,231,230,232,234,231,231,232,232,233,234,235,235,236,237,237,237,236,237,235,237,237,238,235,235,238,237,237,236,236,236,238,239,239,240,239,237,240,240,237,237,237,238,237,236,237,236,237,235,235,235,235,237,236,238,236,236,236,238,238,237,238,237,239,239,239,240,240,239,238,240,239,238,239,241,241,240,240,240,242,241,241,241,241,242,240,241,241,242,241,241,242,241,241,241,242,242,243,242,241,243,243,244,242,241,241,240,240,242,240,240,240,241,241,240,239,240,241,241,242,241,240,244,243,244,244,242,243,242,243,243,244,243,242,242,242,241,241,241,242,241,243,243,243,244,244,242,242,242,241,241,242,242,241,242,242,243,241,242,241,241,243,242,242,244,243,246,238,221,234,244,243,244,243,243,242,245,238,231,240,245,241,237,238,242,241,243,242,241,242,242,243,241,241,241,242,241,241,241,240,240,240,240,240,239,241,241,240,240,240,239,239,240,242,240,233,235,240,239,240,240,240,240,237,239,236,248,211,214,236,191,226,243,212,208,213,210,229,232,237,201,212,240,248,242,148,122,124,103,34,98,228,229,237,223,208,236,225,215,235,248,252,225,214,167,73,147,159,127,106,55,47,46,57,80,73,66,60,49,57,77,112,185,204,232,244,251,251,249,252,247,195,78,68,129,126,83,45,64,62,52,49,150,206,139,152,200,154,95,193,251,247,250,250,252,249,238,238,223,174,102,54,21,7,6,8,9,8,10,9,13,11,81,77,83,96,73,88,22,112,246,247,221,211,248,244,249,247,245,242,252,239,191,232,252,244,250,243,252,252,196,193,104,17,91,69,23,31,49,64,42,28,26,45,69,66,68,52,39,38,82,89,42,55,14,67,228,243,249,249,244,239,239,247,241,244,240,246,237,212,228,240,241,244,238,244,233,218,230,243,229,207,229,247,218,174,171,210,220,215,218,202,139,143,197,185,246,252,173,141,177,241,246,155,93,83,65,91,198,252,252,252,250,248,242,237,235,240,234,251,206,138,73,79,119,123,167,218,251,178,110,75,45,160,221,213,220,210,214,214,215,214,210,212,211,210,211,211,214,212,210,212,213,212,214,212,210,213,213,215,214,213,214,214,212,212,213,210,213,212,215,216,212,213,213,211,211,211,212,214,214,215,215,215,216,217,217,218,223,224,201,182,201,218,217,218,220,223,225,234,248,248,232,179,117,89,25,34,169,208,225,229,217,224,217,220,218,218,218,218,217,215,218,221,219,218,216,218,215,216,217,215,219,215,217,217,216,217,215,218,217,217,216,215,214,214,216,214,213,214,216,214,214,216,214,215,214,215,214,212,215,213,214,212,214,113,1,1,4,6,7,7,10,9,9,10,9,9,223,227,227,225,227,225,226,226,224,226,227,225,226,225,224,225,226,226,224,224,227,227,227,225,227,225,226,226,227,229,227,230,229,227,228,230,227,229,230,227,227,229,229,231,229,229,229,230,231,230,232,233,234,232,235,235,235,235,234,234,234,232,230,233,232,231,231,232,232,232,231,231,230,230,233,231,232,233,233,234,231,234,231,231,234,234,234,233,234,233,234,233,232,234,233,235,232,231,233,232,232,232,232,232,230,230,231,231,232,232,232,232,233,232,233,232,232,233,234,233,234,236,237,237,237,238,238,236,237,238,238,239,237,237,237,236,235,235,238,238,237,239,238,241,242,239,242,241,240,242,240,241,241,240,239,239,240,240,239,239,240,240,239,239,239,239,239,240,240,236,237,240,240,239,239,239,241,240,239,238,237,237,239,240,240,240,239,240,241,240,241,240,241,240,241,242,240,240,241,241,241,243,241,241,241,239,240,239,242,244,245,242,242,244,243,241,240,241,239,240,239,237,237,238,238,239,239,240,238,238,240,240,241,241,241,244,243,243,243,243,245,242,244,244,242,244,243,243,243,242,242,242,243,242,241,243,241,243,244,243,242,241,242,242,241,241,242,242,240,243,242,240,241,242,241,241,243,242,245,230,220,238,242,242,243,240,242,243,245,233,229,241,245,241,235,241,244,242,242,242,241,243,243,242,242,241,242,241,242,242,242,242,240,239,240,239,240,240,239,239,238,241,241,240,241,241,241,234,235,240,239,239,238,239,239,238,240,240,249,211,221,242,203,219,206,186,221,240,235,247,236,237,197,212,240,247,238,148,130,124,102,41,114,240,240,246,224,208,230,218,214,246,253,253,234,176,167,98,76,59,54,68,64,89,81,61,110,165,180,136,90,47,30,126,239,249,251,251,252,252,253,253,249,204,95,79,140,129,95,75,82,79,68,91,174,194,153,126,147,139,94,198,247,236,251,252,252,252,253,253,246,160,79,69,33,139,225,220,195,148,113,69,47,53,91,46,48,39,39,81,22,150,251,251,217,221,249,245,247,245,244,240,252,226,194,240,247,242,245,242,250,252,217,197,90,16,132,101,32,23,21,57,57,38,29,57,79,54,60,46,33,39,71,92,46,51,20,106,245,245,250,250,244,237,237,244,240,241,241,244,243,215,221,240,240,244,239,242,238,218,225,240,233,208,221,243,226,182,166,199,220,211,218,191,151,106,126,152,150,144,94,73,84,189,240,177,139,83,69,170,252,252,250,249,247,242,244,240,231,227,237,245,185,148,98,104,131,125,167,209,247,175,57,20,8,33,147,201,218,217,212,215,213,213,212,210,211,212,212,212,213,214,213,211,212,212,213,214,214,212,215,214,214,217,212,214,214,213,214,214,213,213,214,214,214,214,213,214,213,213,214,213,216,216,216,214,216,216,217,217,224,222,192,186,208,220,220,222,227,232,245,247,250,201,145,107,63,18,24,138,221,223,227,225,221,222,219,217,218,215,217,219,220,218,220,219,217,220,220,220,217,216,218,217,217,217,220,217,217,217,217,218,217,219,218,216,217,216,216,214,215,216,214,215,214,216,217,214,215,216,213,212,213,214,214,211,213,112,1,1,4,6,7,6,10,8,9,10,10,10,227,231,226,227,229,229,229,226,230,226,228,228,227,228,227,227,226,227,227,226,227,227,228,227,226,229,228,226,228,227,230,230,229,229,229,229,231,230,230,231,230,229,230,230,233,231,231,233,231,233,233,234,235,235,234,236,236,232,235,234,232,232,232,234,235,234,235,232,232,232,233,234,232,234,235,235,235,234,234,233,234,235,234,235,234,232,232,234,233,232,235,232,232,234,234,235,231,232,234,234,236,232,231,232,232,232,232,232,233,233,235,233,234,235,234,234,234,234,234,235,236,237,238,236,238,241,239,238,236,238,241,237,237,237,237,237,236,238,236,238,238,240,241,240,240,239,242,240,243,243,242,242,241,241,241,242,240,242,242,242,242,241,241,240,239,239,240,240,239,240,240,241,241,240,239,239,240,239,238,238,238,237,237,238,237,240,239,239,240,241,241,241,239,239,242,242,241,239,241,242,241,240,239,237,237,238,239,240,240,243,244,243,242,241,242,241,239,239,239,237,238,238,239,238,239,240,239,239,237,240,239,239,242,240,244,242,242,242,242,243,241,241,242,243,244,243,243,244,244,243,244,242,243,243,244,242,241,244,243,244,243,242,241,241,240,241,241,240,241,240,241,242,241,242,240,241,242,243,242,224,225,241,241,242,241,241,243,243,239,227,235,244,243,238,236,241,241,241,244,242,241,243,242,241,240,241,241,241,240,240,241,240,240,240,240,239,239,239,240,240,240,238,239,239,240,239,241,236,230,237,239,239,240,238,239,239,241,242,246,203,225,210,153,205,218,223,252,249,231,232,232,242,191,214,241,249,242,152,136,141,122,46,123,244,246,251,236,213,225,216,223,241,251,238,108,74,118,134,108,51,50,65,73,161,240,243,252,252,252,252,183,114,67,77,193,240,249,247,212,199,171,145,143,128,76,74,111,69,52,55,44,49,46,46,51,55,51,53,45,45,45,50,86,89,105,143,147,165,203,155,146,75,52,40,73,230,252,252,252,252,251,236,158,133,110,39,52,47,44,60,37,179,252,252,214,230,250,244,247,242,243,240,252,214,198,246,244,244,247,243,252,252,224,195,75,6,131,83,23,39,17,34,55,55,43,58,63,36,55,49,29,26,61,87,47,43,26,141,246,246,250,249,245,234,237,244,240,242,240,243,247,220,214,237,241,244,239,241,239,221,223,235,238,211,214,238,232,190,163,185,217,211,217,212,198,169,115,61,39,16,27,81,82,183,230,162,128,79,126,219,252,252,251,247,243,242,246,239,229,229,234,234,175,152,125,129,135,137,147,128,196,117,46,29,5,54,164,207,227,220,214,214,212,215,211,213,212,211,212,212,211,213,212,212,214,212,213,213,216,217,214,217,216,214,216,214,215,215,214,216,218,218,217,217,217,217,214,213,215,214,213,216,217,214,217,221,219,219,220,222,227,211,183,193,215,217,226,232,239,246,252,232,171,124,78,28,4,83,190,231,247,225,224,225,222,222,217,217,217,219,221,219,220,220,220,220,216,219,220,219,218,220,217,217,220,218,217,217,217,217,218,219,216,218,215,218,217,214,217,219,217,216,215,214,214,216,215,214,212,213,214,214,214,212,215,212,212,112,2,1,3,8,9,7,9,9,10,10,9,10,227,229,229,229,230,229,227,229,230,230,230,230,230,229,229,229,229,229,229,230,229,229,228,227,228,229,231,230,230,232,229,231,232,232,232,231,230,232,234,230,232,233,231,232,232,232,233,235,235,234,235,237,237,237,238,236,236,237,236,235,234,235,234,235,236,235,238,237,235,234,232,235,233,233,236,232,234,234,231,232,232,233,235,233,234,234,233,235,234,235,234,233,234,231,233,233,232,234,233,234,232,235,234,231,232,234,235,234,235,237,235,236,236,234,237,235,235,236,237,237,237,237,237,239,238,239,237,239,240,236,238,238,237,238,237,237,238,237,238,238,239,239,241,241,242,241,243,243,242,243,242,243,243,242,242,243,242,241,242,243,243,242,240,241,241,241,242,240,243,242,241,244,242,241,241,239,240,239,237,238,238,239,237,237,238,238,240,240,239,241,241,241,243,241,242,241,240,240,240,235,234,235,234,237,237,237,240,240,241,241,240,241,241,242,240,239,238,237,236,238,237,239,242,241,242,243,240,240,238,237,237,237,241,241,240,242,241,241,240,240,241,239,241,242,241,243,242,242,242,242,243,243,243,243,242,244,243,244,245,242,241,241,242,240,240,240,239,241,239,240,240,240,241,240,240,240,240,244,239,220,229,242,241,244,241,241,242,243,235,227,240,244,242,234,237,243,241,241,241,242,241,241,241,239,240,241,239,239,240,239,238,240,240,240,239,239,240,240,238,238,238,240,239,238,240,237,241,237,231,237,239,241,238,240,240,239,241,244,244,198,217,208,198,240,237,224,234,208,194,230,234,243,186,213,245,251,245,156,141,148,128,51,134,244,247,251,240,224,234,229,225,234,160,83,47,37,85,131,122,64,27,0,46,208,251,251,250,250,244,216,203,172,143,142,76,57,67,42,14,33,53,17,16,37,29,24,74,53,25,21,15,21,21,29,31,28,27,26,22,19,19,30,22,31,33,21,37,36,37,42,47,43,41,41,31,157,236,237,252,252,250,196,149,154,129,62,66,74,70,86,61,148,243,226,214,237,247,248,245,244,245,245,252,200,208,248,243,247,245,243,252,252,205,156,55,7,134,70,21,64,39,33,32,62,60,46,42,25,51,47,30,30,56,93,51,46,16,84,243,245,250,250,241,235,238,243,242,241,243,243,246,229,208,228,238,239,239,239,240,222,218,233,237,214,208,232,236,198,166,178,215,214,216,219,217,201,145,94,78,41,7,42,71,200,227,127,87,81,189,251,251,251,249,246,249,249,244,226,231,236,253,236,167,162,148,149,143,143,131,47,90,79,36,33,16,177,248,228,231,216,217,215,212,213,213,214,212,213,214,213,212,212,214,212,214,213,213,217,217,215,216,217,218,218,216,216,215,217,215,214,216,216,218,217,218,218,215,214,215,216,215,216,219,217,217,219,221,220,222,224,223,198,179,207,218,225,238,244,249,248,196,139,99,56,5,26,151,227,246,246,237,228,225,223,221,217,216,217,220,220,220,218,217,217,220,219,219,218,217,219,220,218,217,217,218,218,217,217,217,217,217,218,220,218,215,217,217,216,216,216,215,215,215,215,214,212,213,215,213,211,212,211,213,212,215,212,212,112,2,1,2,6,8,7,8,8,9,10,10,10,227,229,228,229,228,228,229,226,229,230,232,231,229,230,229,230,231,230,229,229,230,229,229,229,231,230,231,233,234,232,232,234,234,236,233,234,233,232,232,232,234,234,236,235,236,235,234,236,235,237,236,236,236,235,237,239,237,237,236,237,238,237,237,235,236,236,236,236,237,237,236,236,237,235,234,235,233,232,233,233,233,233,234,235,234,236,236,236,233,230,235,234,232,234,232,232,232,234,234,234,234,232,232,232,234,232,232,233,234,232,234,234,232,234,233,235,235,236,236,237,237,236,237,237,236,237,236,238,236,237,237,236,237,237,237,237,237,237,238,240,240,241,241,240,242,243,242,241,242,242,243,244,243,245,243,244,243,243,244,244,244,244,244,243,242,243,245,244,245,245,244,245,243,242,243,241,239,239,236,237,239,237,240,237,236,238,237,238,239,239,239,239,240,240,239,241,240,239,236,231,229,231,234,235,239,240,241,240,239,240,238,239,240,238,239,237,239,238,238,239,240,241,241,242,243,240,239,239,237,237,236,236,238,237,239,240,241,240,239,240,240,239,240,240,241,240,240,241,241,242,242,243,244,243,245,244,244,245,244,241,242,242,241,240,240,241,240,240,240,240,241,240,238,240,240,239,240,244,231,216,232,241,240,241,241,241,241,241,229,232,243,244,239,232,240,242,240,242,240,239,239,239,238,240,241,239,239,240,237,238,239,239,239,239,239,237,237,237,237,237,237,239,239,240,240,237,240,238,228,234,240,238,239,240,237,239,237,244,237,207,241,220,219,250,209,178,214,221,221,239,237,240,175,216,246,252,246,165,145,139,124,57,144,243,248,251,239,223,242,245,220,133,69,51,51,63,91,174,192,143,72,26,12,92,192,174,139,114,78,53,55,101,134,130,80,12,8,27,15,19,73,45,16,20,13,30,73,57,22,13,10,20,14,20,29,19,16,17,18,19,18,17,20,23,22,19,24,34,30,28,34,33,31,25,31,39,38,34,48,134,61,66,86,137,117,81,185,200,150,115,89,83,130,182,204,245,247,245,246,244,244,250,249,190,220,247,244,247,244,242,253,253,172,146,50,2,141,61,35,110,56,39,27,36,65,53,41,20,47,55,33,26,51,90,50,50,18,65,228,243,250,250,239,233,239,243,243,242,239,241,247,238,209,221,237,237,240,236,238,225,214,229,241,220,206,228,237,207,174,171,205,214,214,216,186,122,89,142,189,123,28,15,32,160,213,143,84,129,227,249,249,249,245,245,247,247,238,226,232,234,253,216,160,161,149,152,143,155,126,44,132,130,88,51,54,219,249,247,227,212,220,214,214,212,210,212,213,215,211,212,213,212,212,212,212,212,214,215,214,215,216,217,214,214,216,215,214,214,214,216,214,217,217,217,218,217,218,219,217,217,217,216,218,218,220,221,218,218,221,224,215,185,189,214,228,241,249,249,222,173,122,79,37,7,91,205,241,243,248,233,229,223,223,219,216,215,218,220,220,218,218,219,217,217,218,220,218,217,217,218,217,217,218,219,217,218,220,218,217,217,217,217,216,219,217,215,217,217,217,215,214,214,216,216,215,215,212,212,213,213,215,212,214,214,214,213,214,113,1,1,4,6,7,7,9,7,9,10,9,10,226,229,226,228,227,226,226,227,230,229,232,230,229,231,231,231,230,232,232,230,229,229,229,230,230,231,229,230,233,233,234,235,236,236,236,237,237,234,233,235,234,234,235,236,238,238,237,237,234,237,238,237,237,235,237,237,236,235,237,237,236,240,238,238,239,235,238,237,236,239,238,239,238,239,238,235,235,235,235,236,236,237,236,235,236,235,236,236,235,236,236,234,234,234,235,234,232,233,235,235,231,232,232,234,235,234,235,233,233,235,231,232,234,232,236,237,235,236,238,239,237,235,236,236,236,237,236,238,237,237,239,237,237,239,238,237,240,239,240,241,241,243,243,244,243,242,244,244,244,243,244,244,244,243,243,245,245,244,245,245,244,245,243,246,244,244,244,245,245,244,244,244,244,243,244,241,239,239,238,239,239,239,239,240,241,239,239,238,237,240,239,239,239,239,239,236,236,232,232,231,230,235,234,236,239,238,238,239,238,239,238,237,239,238,240,240,239,239,239,243,243,244,244,241,241,240,237,235,235,235,237,236,237,238,237,240,239,240,239,238,240,238,240,240,241,240,239,240,242,242,243,244,243,244,244,245,244,244,245,244,242,244,242,240,241,239,241,240,239,240,239,240,239,240,238,239,243,241,223,219,239,241,239,239,238,240,243,236,226,237,244,242,236,233,241,241,240,240,239,241,240,241,240,240,240,240,240,240,238,237,237,237,237,239,239,237,237,238,239,239,239,237,238,239,236,238,240,236,229,232,237,238,238,238,237,239,238,247,233,207,244,206,184,206,202,214,250,245,245,248,238,237,178,223,247,252,243,145,119,110,96,46,148,243,250,250,237,227,245,252,190,99,61,55,37,63,141,240,235,187,166,92,58,44,27,47,17,11,42,18,38,39,20,93,128,98,59,25,15,25,72,47,26,19,17,31,68,65,22,14,8,16,17,17,24,17,13,21,11,17,18,18,18,21,19,16,22,20,22,17,18,27,17,24,22,25,32,24,40,34,30,39,56,98,76,91,187,234,170,97,81,63,87,163,219,248,250,248,242,243,243,252,236,184,230,245,243,245,243,243,253,253,166,132,37,6,136,48,44,109,47,44,27,24,44,57,51,24,46,55,36,28,55,94,53,50,19,51,219,242,249,249,236,235,239,241,243,242,241,242,243,245,213,212,236,237,242,238,239,232,214,226,237,226,205,222,239,217,179,164,199,216,207,215,179,113,43,19,116,161,117,98,69,114,187,139,101,188,251,251,249,247,246,249,248,239,229,233,241,248,253,195,156,154,141,148,137,159,120,54,186,188,120,89,6,93,195,208,226,214,214,214,211,216,215,214,215,214,214,215,215,213,212,212,213,214,214,217,217,217,217,219,217,215,214,217,217,217,217,217,217,216,218,219,219,218,218,220,218,219,219,218,221,220,219,217,218,218,223,224,203,186,204,230,246,247,247,190,143,103,51,12,36,156,229,240,251,240,232,225,223,222,218,219,219,217,216,220,219,218,219,218,220,218,217,217,218,220,220,220,219,218,220,220,218,218,218,218,218,217,219,216,218,218,216,217,215,215,217,215,214,214,216,214,215,215,213,214,214,215,212,210,214,214,213,213,215,113,1,1,3,7,7,7,10,8,10,10,10,10,227,230,229,229,229,226,228,227,229,231,230,230,231,230,230,231,230,232,232,231,234,232,230,232,232,231,232,231,232,232,234,232,234,237,234,237,237,237,238,237,237,236,237,237,238,239,237,237,234,236,237,236,238,238,239,239,237,238,238,238,237,237,237,237,238,239,238,240,240,238,239,239,240,239,237,236,238,237,238,237,236,237,236,238,236,235,235,236,236,236,237,238,237,235,237,237,234,236,236,236,237,235,237,235,235,235,234,235,235,236,236,234,235,235,234,234,235,238,235,237,238,236,237,237,236,237,236,236,237,237,239,239,238,237,239,239,239,241,241,241,241,241,241,242,243,242,242,242,243,242,242,242,243,243,245,245,244,245,245,246,246,246,245,245,245,244,245,244,245,246,244,244,244,241,240,237,237,237,238,241,240,238,238,237,239,240,239,239,238,239,236,237,236,235,232,231,232,232,231,230,232,232,234,235,235,236,237,238,237,237,237,238,239,240,240,239,240,240,242,243,243,242,241,241,240,237,234,235,234,236,236,236,237,238,239,240,240,239,240,239,240,239,239,240,241,241,242,241,240,241,241,242,242,243,245,245,245,244,244,244,243,243,240,240,238,239,238,239,240,239,240,239,238,239,239,238,242,240,220,222,239,241,240,240,239,240,241,231,224,241,243,238,233,235,241,241,240,240,240,240,240,239,241,239,239,239,240,239,238,239,237,237,237,237,237,237,238,238,238,237,239,239,237,239,238,238,238,240,230,226,237,238,238,239,235,240,238,248,226,206,223,168,192,232,228,246,252,240,227,237,239,240,185,229,247,252,231,119,95,89,71,17,126,242,248,250,235,228,245,253,194,101,57,6,2,12,90,188,161,154,177,132,54,14,11,43,36,29,49,24,37,39,17,15,46,134,127,80,48,29,78,49,17,15,14,21,55,73,24,14,14,13,21,22,26,19,16,15,16,15,19,17,15,17,15,20,24,23,18,19,24,20,19,20,23,25,26,21,18,24,19,48,67,95,80,45,53,63,84,81,87,22,66,193,230,250,247,245,244,243,243,252,223,185,238,239,244,244,242,244,253,249,116,117,44,15,144,39,49,98,37,40,25,29,35,35,66,45,46,59,67,104,134,121,57,55,20,44,214,243,250,250,235,236,238,241,240,240,242,241,241,248,221,207,230,234,241,236,238,235,218,224,234,231,206,216,237,223,191,166,188,212,209,217,221,198,155,50,37,120,149,155,136,94,99,96,149,228,252,252,250,246,251,248,244,232,228,240,245,234,233,171,152,130,111,134,133,155,106,73,184,158,113,75,11,3,71,184,222,224,217,213,217,215,215,214,214,214,213,215,216,215,217,214,216,216,214,217,217,220,219,217,219,219,219,217,222,220,217,219,218,221,218,220,220,218,219,220,220,219,220,222,221,218,218,220,219,220,227,222,198,199,229,246,252,227,160,118,72,24,4,36,162,229,249,249,232,232,225,224,220,216,220,222,220,220,221,220,220,220,220,220,222,221,220,220,220,220,220,219,220,220,219,220,219,219,218,218,220,218,217,220,219,218,219,217,217,217,215,215,217,216,215,215,214,215,215,216,216,214,216,214,214,212,213,212,214,113,2,1,2,6,8,7,8,8,10,10,10,9,229,231,227,229,230,227,228,229,228,229,232,229,231,231,231,233,231,231,231,232,232,233,233,232,230,233,231,233,232,230,231,231,236,235,235,237,237,237,236,237,237,237,237,238,239,240,239,238,240,238,240,239,236,237,239,240,239,238,242,240,240,239,237,240,240,239,241,241,239,240,239,239,239,237,238,239,237,236,239,239,239,241,239,240,240,239,237,237,238,237,237,236,237,237,238,237,236,237,237,235,235,236,237,237,234,235,236,233,234,235,234,236,236,236,236,235,236,236,237,235,236,237,239,238,238,237,235,234,237,236,235,237,237,239,240,239,240,238,240,240,241,241,241,242,241,243,243,242,242,242,242,243,243,244,244,244,245,245,245,245,245,245,245,245,247,246,245,246,245,245,244,243,245,242,239,236,237,239,240,241,239,238,238,239,241,240,240,241,237,236,236,233,234,233,233,232,234,233,234,234,232,230,229,232,232,232,235,239,239,238,239,238,241,240,238,240,239,240,241,241,239,239,239,237,239,237,235,237,236,237,236,236,239,236,239,239,239,240,237,239,240,240,241,239,241,240,242,243,243,243,241,243,245,244,244,245,245,244,244,244,242,242,241,240,241,239,239,238,241,239,239,240,237,237,237,240,242,234,216,226,241,240,241,241,241,244,237,224,232,241,242,234,231,239,241,241,241,240,239,239,239,239,240,239,240,240,237,239,238,237,237,236,236,237,238,237,237,237,236,237,237,237,236,237,237,237,237,240,232,227,235,238,238,239,237,240,237,249,218,206,225,206,245,250,232,217,226,200,213,241,241,241,191,234,245,252,222,111,101,94,77,8,100,239,249,250,238,227,245,252,207,84,28,19,41,87,75,48,28,51,136,163,119,38,23,71,38,41,54,22,51,35,16,26,11,22,77,131,110,104,111,47,18,10,16,11,46,76,24,11,15,14,20,17,21,16,16,16,14,16,14,13,17,18,16,20,24,20,17,16,19,21,15,18,20,20,28,15,22,18,31,63,69,97,65,51,47,22,42,74,93,21,56,198,240,250,251,244,241,240,245,252,204,193,244,236,245,243,241,240,253,232,95,110,36,27,145,34,63,91,32,38,17,32,29,25,51,59,71,98,153,174,169,154,77,52,17,34,204,243,250,250,234,235,239,242,241,240,238,240,238,244,232,204,226,237,239,240,236,241,227,228,240,237,211,211,235,227,199,171,178,214,210,222,231,238,221,114,93,147,150,152,127,75,19,57,196,247,249,249,247,247,250,245,232,226,238,244,246,253,210,153,151,117,95,123,131,151,97,104,213,141,88,65,4,49,163,204,222,222,217,217,212,217,215,212,212,214,216,215,214,216,215,216,217,217,218,219,220,222,220,220,219,219,221,221,221,219,219,221,219,220,220,220,221,218,219,220,222,222,222,223,223,220,221,221,221,222,229,219,199,224,249,245,184,132,87,32,5,23,98,176,240,242,236,233,228,225,223,218,217,217,220,220,220,222,222,221,217,220,220,220,221,218,219,220,220,221,219,220,220,219,220,220,220,219,218,219,219,217,218,220,220,217,217,220,219,217,215,218,217,216,215,215,218,216,216,217,217,214,214,214,214,216,216,214,212,111,2,1,2,6,8,7,9,9,9,10,9,10,228,231,230,229,229,227,228,229,228,229,229,227,232,232,232,232,231,234,231,232,230,231,230,229,232,231,230,231,233,231,233,235,236,237,237,239,236,237,237,238,237,236,237,237,239,239,240,240,240,241,239,239,240,239,239,240,239,239,240,239,239,241,239,240,241,241,240,240,240,239,241,239,237,239,239,238,239,239,237,236,238,237,238,240,240,239,239,239,237,240,238,238,239,237,237,239,237,237,236,235,237,233,237,235,234,235,232,234,235,234,235,235,236,234,238,237,236,238,236,238,237,237,237,237,238,237,235,234,236,236,237,237,237,238,241,240,239,241,239,239,240,242,241,240,242,243,240,243,243,242,244,242,243,243,242,242,242,244,244,245,246,245,246,246,246,247,243,244,244,243,242,243,244,241,240,240,241,241,242,241,240,239,238,240,241,241,240,239,237,237,234,235,234,235,234,235,233,235,235,233,232,229,229,229,232,234,235,236,238,240,239,238,239,239,238,239,239,238,236,237,237,239,237,236,236,235,236,236,236,235,237,237,237,237,237,239,238,238,239,239,239,239,239,240,239,240,242,243,244,243,244,243,244,244,244,245,246,246,245,244,244,243,241,241,240,240,239,239,239,237,239,237,237,237,239,240,242,226,213,232,241,239,238,237,240,241,231,224,235,242,240,231,234,240,240,241,240,239,239,238,239,239,238,237,239,237,238,238,236,237,237,238,237,236,236,235,237,237,237,237,237,237,237,237,237,237,237,240,236,229,235,237,237,239,239,240,240,248,220,227,244,213,232,220,184,195,235,226,235,247,242,242,200,236,248,252,220,106,88,85,73,11,108,240,249,250,235,228,245,253,199,69,57,78,107,93,54,35,26,29,71,155,181,113,64,75,46,55,65,48,60,36,26,32,16,22,13,39,107,164,156,66,30,12,11,11,40,71,31,16,13,14,22,17,17,15,15,14,15,13,18,19,12,13,17,26,22,18,17,16,18,14,16,13,18,18,19,17,13,19,49,83,75,70,54,53,39,22,51,68,78,33,72,193,238,250,247,241,240,236,245,251,190,201,245,234,247,239,242,243,251,218,77,116,44,33,143,28,70,84,21,36,21,27,30,25,32,54,76,100,133,134,124,104,71,54,13,33,207,242,249,250,229,234,239,241,241,238,238,236,237,242,236,212,223,244,252,252,252,252,251,249,246,244,218,205,231,227,202,171,171,202,215,223,236,243,217,144,144,171,156,148,130,78,54,133,228,247,249,249,248,246,248,240,229,231,245,244,243,234,181,147,151,134,124,128,132,151,133,156,207,107,78,49,9,116,238,237,228,218,217,218,212,215,212,211,214,214,214,218,217,216,219,220,222,222,224,222,220,223,222,220,222,218,222,222,222,222,220,222,217,218,219,220,220,220,223,227,230,236,239,233,228,222,221,220,219,227,231,214,217,237,209,151,114,61,9,14,117,175,196,243,248,237,232,227,221,221,217,217,218,219,223,220,220,221,217,217,219,219,217,217,219,217,219,219,217,217,218,219,219,218,218,220,216,218,220,217,217,216,218,219,217,218,220,218,218,219,218,215,217,220,218,215,214,214,215,215,214,214,212,213,213,213,215,212,217,114,1,1,3,7,7,7,10,9,10,10,9,9,225,227,229,229,229,227,226,229,227,229,232,230,233,231,230,233,232,232,234,232,234,232,232,234,234,234,231,232,234,232,233,235,235,235,235,236,238,236,237,240,238,239,237,240,239,240,242,237,240,239,240,240,239,240,240,240,241,241,241,240,240,241,241,241,241,239,240,240,241,240,239,240,241,240,240,241,240,239,239,237,239,238,238,239,239,239,238,239,239,239,239,237,239,238,236,235,235,236,236,237,237,235,236,234,234,234,234,235,235,234,235,236,236,237,237,236,235,235,236,236,238,236,236,237,237,236,234,234,235,236,237,237,236,237,239,239,239,240,240,240,242,241,241,242,241,241,241,243,242,241,242,242,241,239,237,236,241,241,244,246,244,243,241,243,242,241,240,240,240,241,242,241,242,242,243,244,244,242,243,242,242,240,239,238,239,240,239,238,235,235,236,237,236,237,238,234,236,235,232,232,230,229,229,229,232,235,237,239,239,239,239,239,239,242,239,240,239,239,237,237,236,235,237,233,234,233,231,235,235,234,236,234,237,235,238,238,238,239,240,239,239,240,241,242,242,240,241,241,242,243,245,245,243,243,245,245,245,246,246,245,242,242,242,242,241,241,241,239,241,239,238,239,238,237,238,242,240,218,216,237,240,238,238,238,240,238,222,226,241,241,237,232,239,241,240,241,239,240,240,241,242,239,240,239,237,239,238,239,238,240,240,237,237,235,237,236,237,237,238,237,236,238,236,238,239,240,238,240,237,228,234,239,237,237,239,241,243,250,215,232,229,169,202,213,215,236,252,241,252,244,239,238,201,242,244,252,210,87,63,53,59,6,107,239,247,250,234,229,245,253,200,73,62,66,64,59,61,55,44,50,39,99,188,178,122,71,27,55,68,54,61,43,32,34,24,35,22,31,20,63,154,97,68,42,23,13,34,76,30,12,12,14,16,17,18,14,17,17,15,19,16,16,15,17,32,33,27,20,12,17,19,14,17,16,15,20,21,15,22,21,68,101,77,62,43,54,33,39,69,66,61,21,77,207,246,248,248,239,240,235,247,241,180,212,244,236,247,237,240,243,251,206,62,105,50,46,137,22,76,73,21,36,8,22,22,28,35,30,53,101,110,77,59,53,48,57,30,79,222,244,250,250,228,234,239,239,244,239,236,236,236,239,248,229,238,249,252,252,252,252,252,240,251,249,223,205,225,229,208,180,164,196,214,224,207,178,193,183,183,183,156,137,109,99,128,227,252,252,250,247,251,247,243,231,232,244,249,247,253,226,159,148,154,151,150,141,141,155,173,188,129,61,59,35,12,150,241,231,234,217,218,216,214,214,211,212,210,216,214,221,228,230,243,249,248,240,234,232,230,227,225,223,220,220,222,221,222,221,223,223,218,219,219,221,222,225,234,239,248,248,251,249,228,226,224,223,230,236,240,214,197,167,124,83,28,6,80,198,217,215,235,240,238,230,227,223,218,219,218,220,219,218,218,219,220,219,220,220,218,217,217,219,218,217,220,221,218,217,218,218,217,217,218,218,220,219,217,218,218,218,221,221,218,218,219,215,216,219,217,217,217,215,215,214,216,215,214,214,213,215,214,214,214,214,216,214,216,113,1,1,4,7,7,7,10,8,9,10,9,9,225,229,226,225,229,226,226,227,228,227,229,230,230,229,231,230,230,233,232,231,232,235,233,234,235,235,236,234,233,236,237,235,236,236,238,240,237,237,237,238,238,237,239,239,240,240,240,240,238,239,240,239,239,238,237,239,239,240,241,239,241,241,239,240,239,238,238,238,240,239,239,239,238,240,238,239,241,239,239,239,240,240,239,238,237,238,239,237,234,237,237,237,239,236,236,236,235,236,236,235,237,236,237,235,235,235,234,234,234,236,234,235,236,234,236,233,234,235,232,235,233,235,237,236,237,235,234,237,237,237,237,237,237,239,237,237,239,238,240,241,242,242,242,241,242,242,241,242,240,239,238,235,234,234,235,234,236,239,241,242,238,235,234,232,230,231,233,235,239,240,241,242,243,244,244,245,244,243,244,242,241,241,239,239,236,237,238,238,237,236,236,235,235,237,235,235,235,236,236,235,231,231,230,231,232,233,238,239,241,241,242,241,242,242,241,241,239,240,239,235,234,233,232,232,233,229,231,231,232,233,234,233,232,235,235,236,236,235,237,236,236,237,238,238,239,238,238,239,242,242,243,244,245,245,244,243,244,244,244,241,242,241,241,244,242,243,241,241,241,242,239,237,237,237,238,242,235,216,222,240,240,239,239,237,241,234,220,230,240,239,232,229,238,240,241,240,239,239,239,239,238,239,238,237,239,238,239,239,240,241,239,239,239,237,236,236,238,237,237,237,239,239,238,239,238,239,239,239,239,227,229,236,237,240,240,239,245,243,209,216,199,190,233,236,239,246,252,223,221,239,238,233,200,239,243,252,212,100,81,81,72,9,106,238,245,250,232,229,244,252,231,114,46,5,17,53,66,54,40,48,44,54,115,171,180,108,32,30,57,53,62,41,33,33,33,39,30,35,30,19,63,119,157,112,72,33,29,74,35,14,14,13,20,18,17,17,17,16,13,18,15,12,19,26,39,37,28,19,21,22,21,27,16,22,27,27,27,29,30,40,63,83,86,67,52,57,42,58,92,87,54,12,107,232,250,248,242,236,239,233,249,231,174,222,240,232,244,236,238,244,251,193,54,90,51,57,125,22,84,65,10,32,10,25,19,24,30,31,26,62,85,56,45,33,48,43,41,117,230,246,249,249,237,239,247,252,251,247,247,242,239,241,253,237,212,206,234,240,176,164,144,157,236,250,232,205,227,238,218,191,168,187,216,195,123,75,98,136,137,127,107,81,68,126,198,234,250,250,252,249,250,246,237,228,237,247,248,234,234,191,151,148,157,150,148,139,150,181,155,125,69,33,51,15,34,208,247,233,238,220,220,216,214,214,216,215,217,215,218,231,236,247,246,246,251,251,252,251,249,243,231,228,225,223,223,219,219,221,220,222,221,222,222,222,223,232,241,243,235,222,227,246,244,226,227,232,243,246,232,175,136,98,53,11,26,126,229,242,222,203,229,237,232,226,221,219,214,220,221,221,220,218,221,217,219,221,218,220,219,220,219,218,220,219,220,219,220,220,218,218,220,219,218,219,217,218,217,218,220,217,218,219,218,217,215,217,217,215,214,215,217,214,215,215,214,215,215,216,215,214,214,214,216,214,215,213,213,113,2,1,2,6,8,7,10,9,9,10,9,10,230,229,229,229,229,229,227,229,227,226,230,227,229,229,229,234,231,232,231,227,232,231,232,233,230,234,236,234,237,236,236,239,238,238,240,239,240,239,239,239,238,239,237,239,240,240,240,238,239,237,237,237,237,239,240,239,239,240,239,240,242,241,238,239,240,240,239,240,240,238,240,240,239,239,239,241,239,239,237,239,241,240,240,239,239,238,238,238,237,239,238,237,237,238,237,238,239,238,237,237,236,234,235,234,236,236,236,237,236,235,234,235,236,236,234,235,234,234,234,233,235,234,237,237,238,237,237,238,239,239,240,240,241,239,239,240,239,241,240,240,240,240,241,240,242,241,240,242,240,237,237,236,237,238,238,238,237,235,235,234,230,230,228,230,232,234,236,239,241,244,245,244,245,244,245,245,244,245,242,241,240,240,241,237,236,235,235,239,239,239,237,236,235,237,236,236,237,236,234,234,233,232,232,233,234,236,239,241,242,244,245,246,243,242,243,242,242,241,238,237,233,233,234,231,232,230,232,235,234,236,236,234,234,235,236,235,235,235,236,235,236,236,236,236,237,238,237,240,241,242,243,243,244,245,245,246,246,245,245,242,241,241,243,245,246,245,243,243,241,241,240,239,239,239,239,242,229,214,231,242,240,240,236,240,239,224,223,237,240,236,228,234,240,239,241,239,238,239,239,239,239,239,240,238,237,239,237,240,241,242,242,240,242,240,239,239,239,238,240,239,241,241,238,240,239,237,239,240,242,229,229,238,240,241,240,241,245,240,203,223,215,216,252,245,237,248,236,193,226,238,241,225,192,241,241,252,227,146,142,134,119,42,139,243,248,251,234,229,245,251,241,128,27,2,12,57,62,45,48,45,28,35,39,91,184,152,53,24,41,50,55,36,17,23,23,33,17,35,15,60,189,225,242,214,193,126,110,118,31,8,11,12,14,14,17,19,17,14,16,22,20,16,34,37,33,33,37,37,32,35,35,39,39,34,39,40,41,39,44,53,75,79,69,65,67,64,56,78,92,88,45,59,174,245,251,244,243,236,239,235,252,213,177,234,234,237,241,235,237,248,251,175,40,59,65,69,108,29,87,56,12,32,10,24,20,16,24,34,28,38,73,87,70,48,76,51,36,138,230,246,250,251,246,250,252,252,253,253,251,244,240,244,253,208,121,43,30,79,28,53,89,74,196,241,249,248,249,250,231,204,173,185,216,208,112,17,17,40,49,37,44,27,29,122,229,245,247,247,252,249,249,240,233,239,247,252,252,252,235,161,149,156,159,162,142,139,150,156,126,61,25,24,33,2,91,239,252,252,238,225,229,221,218,213,216,218,216,220,225,238,228,179,150,152,198,237,250,250,252,252,241,226,226,225,222,220,222,222,221,221,223,222,221,222,225,235,228,168,123,86,112,204,228,232,240,248,250,238,166,114,77,24,5,83,205,237,250,217,183,218,231,231,223,218,220,220,220,220,219,220,221,221,222,220,220,221,220,219,219,221,221,220,220,219,220,220,220,221,220,221,218,220,221,218,220,220,218,220,220,220,219,220,220,219,218,218,218,214,217,220,218,217,217,218,216,216,215,216,215,215,214,214,214,216,217,213,214,112,2,1,3,7,8,6,9,9,9,10,9,10,229,230,228,227,231,229,230,229,230,229,229,230,229,232,233,232,232,235,234,232,233,232,231,232,232,231,231,232,232,235,236,234,235,236,237,239,240,238,239,240,239,239,239,239,240,236,236,237,234,233,234,237,236,235,237,237,238,238,238,238,239,239,238,237,239,238,239,240,237,239,238,239,239,240,237,236,237,237,238,236,239,237,237,239,240,239,236,237,237,239,236,236,239,238,238,237,238,236,236,236,234,233,232,233,236,236,236,234,235,235,235,235,234,232,234,234,232,234,232,234,234,235,236,235,236,237,236,239,238,238,239,237,239,240,239,239,241,239,240,241,241,241,241,240,241,240,242,242,241,240,239,240,240,238,237,236,235,232,229,230,229,226,228,232,234,237,240,241,243,245,247,247,247,246,245,245,244,241,240,239,237,237,238,236,235,235,236,238,236,235,237,236,234,235,233,233,234,230,232,232,232,233,233,234,235,238,240,240,242,244,243,244,245,245,242,243,242,242,238,238,237,234,233,232,231,230,232,232,234,235,232,234,231,231,232,232,234,233,232,234,233,231,233,236,235,232,235,237,239,242,243,244,244,244,245,244,245,244,243,242,242,242,244,244,244,244,243,242,240,241,239,238,240,239,238,241,222,214,233,239,239,238,238,240,234,217,223,238,239,231,225,235,238,237,239,237,237,238,237,238,238,239,239,239,240,239,239,240,241,241,242,243,242,240,241,239,241,241,239,240,238,237,237,238,238,239,239,239,241,235,232,239,242,241,243,242,249,237,215,241,223,232,242,222,211,203,230,215,241,245,237,214,189,240,239,252,231,160,147,140,122,53,171,244,247,250,227,229,245,249,236,129,40,1,11,49,66,57,41,30,22,30,42,35,121,180,121,53,44,44,53,28,19,20,18,19,24,30,35,190,252,252,250,179,130,162,178,176,63,16,7,5,14,13,17,14,17,14,17,22,21,33,43,37,24,45,70,60,71,72,71,65,45,46,42,49,50,47,54,65,77,79,72,70,70,60,61,74,84,64,33,57,182,246,250,246,238,234,236,234,250,197,182,236,232,240,239,239,239,250,249,173,74,46,73,74,69,38,93,45,15,32,8,21,16,21,18,29,28,86,171,185,145,78,89,84,50,124,217,242,251,251,250,239,217,235,247,248,242,229,244,249,250,195,82,23,3,11,10,72,96,24,137,246,251,251,253,253,249,216,186,179,214,227,151,59,41,54,46,54,34,17,24,52,158,240,250,250,250,247,245,238,238,239,245,250,234,229,187,149,153,153,160,158,138,141,146,139,85,27,29,15,31,8,92,226,246,246,249,229,228,216,221,215,214,214,216,218,230,237,166,89,81,48,18,109,119,152,199,229,230,225,226,226,225,224,223,222,220,224,221,220,220,222,223,231,186,93,36,17,11,69,184,232,251,251,208,148,101,56,4,27,153,232,250,250,220,183,202,225,229,220,219,220,217,220,221,220,218,219,221,220,217,217,219,217,219,221,219,218,217,217,221,220,222,221,218,220,217,218,220,218,218,220,218,218,219,217,217,217,220,219,216,220,219,216,217,219,219,218,218,214,218,215,214,218,216,214,214,216,213,213,217,214,214,212,215,114,1,1,4,7,7,7,10,9,10,11,10,10,226,230,228,230,232,229,227,230,230,231,233,232,232,231,232,232,233,234,232,233,232,231,233,233,234,232,232,232,235,234,234,235,233,235,236,237,238,238,239,237,238,239,239,239,237,237,237,236,237,236,235,235,237,238,237,238,237,239,238,240,240,239,241,239,237,238,238,239,240,238,239,239,238,237,237,238,237,239,237,239,239,237,239,237,239,237,237,237,237,236,235,237,235,237,236,236,237,236,237,237,236,236,235,235,236,232,235,234,234,234,234,234,232,234,234,234,236,235,234,235,234,235,235,235,235,237,240,236,237,237,237,237,237,239,240,241,240,241,240,240,239,241,242,241,243,241,241,241,243,243,242,240,241,237,235,235,231,229,231,230,229,231,231,234,236,237,239,240,243,246,247,247,245,246,246,243,240,238,239,236,235,236,234,234,235,235,237,237,236,235,236,235,233,235,232,232,233,232,235,236,233,234,236,235,237,240,242,244,243,245,244,244,245,244,244,243,244,244,244,243,239,236,234,231,230,229,230,233,233,232,232,230,230,230,231,231,231,232,231,232,231,230,233,231,233,232,235,239,240,244,245,245,244,244,244,244,243,244,243,241,244,244,245,246,244,244,243,243,241,241,242,241,241,239,240,240,220,223,240,240,239,238,237,240,226,215,231,237,235,226,228,236,237,238,239,237,236,237,239,239,240,240,241,240,239,240,241,242,241,244,243,243,243,242,242,242,241,240,240,238,239,240,239,240,239,237,239,239,244,237,233,241,243,240,242,242,249,237,217,244,207,190,220,203,188,221,243,229,251,236,237,204,185,241,236,252,223,147,133,124,105,49,169,244,248,251,228,228,244,249,238,188,119,35,15,31,47,44,26,18,20,21,36,45,68,130,179,137,64,49,49,27,20,17,14,24,31,150,250,250,250,250,224,105,19,2,110,203,128,70,29,9,13,10,14,15,11,20,15,20,24,57,125,84,26,30,59,79,85,89,80,59,39,33,31,35,42,42,55,62,75,86,69,62,62,46,61,77,72,50,9,66,211,246,250,240,234,234,233,235,246,184,191,241,238,251,252,252,252,252,250,200,117,41,86,80,34,47,93,36,17,24,15,44,33,27,17,23,38,115,184,160,118,49,60,100,96,133,203,240,252,252,211,135,74,33,55,176,229,238,252,252,250,146,73,29,2,30,16,8,12,12,132,187,155,133,175,234,250,247,188,172,207,238,191,94,78,66,57,63,39,28,29,1,95,235,250,250,249,245,247,242,246,246,247,252,253,202,150,142,155,153,162,158,136,141,145,123,48,9,16,22,20,27,28,14,128,222,250,250,224,222,221,221,223,218,227,234,247,250,171,93,104,57,7,7,8,10,27,147,219,225,235,223,228,228,233,237,239,235,228,225,224,224,222,227,183,89,45,29,8,91,211,238,250,177,123,76,27,5,97,212,245,246,250,226,181,187,222,227,220,220,223,222,221,221,221,220,219,219,220,219,219,218,217,219,220,220,220,220,218,221,219,217,219,218,219,220,218,221,218,219,223,218,221,219,217,218,217,217,219,219,217,219,218,218,218,219,220,217,218,217,217,215,215,217,215,215,214,216,214,213,216,217,216,212,216,113,2,1,4,7,7,8,10,8,9,11,10,10,229,228,229,229,229,226,227,229,229,230,232,232,232,233,231,231,231,230,230,229,231,231,233,235,232,232,233,235,235,234,236,235,234,237,237,238,238,237,239,237,236,237,235,238,237,234,237,236,236,237,234,237,237,237,238,237,239,237,238,237,237,238,239,238,238,237,236,238,237,237,237,239,238,238,237,236,236,236,237,236,239,237,236,237,237,236,236,237,235,234,236,237,235,235,236,234,236,237,237,236,237,235,233,235,233,232,232,233,231,232,234,232,233,233,235,233,232,234,232,234,235,234,235,235,236,234,235,234,234,234,236,234,236,238,237,240,241,239,241,240,239,239,239,239,242,241,242,244,242,240,238,237,234,232,232,232,233,233,233,232,233,234,235,236,233,234,237,240,241,244,244,241,240,239,237,235,236,235,233,233,232,234,234,233,234,235,237,237,235,236,234,233,233,232,233,233,234,233,236,235,234,234,235,237,238,239,241,242,243,243,244,244,244,246,244,244,245,245,243,243,239,235,232,232,230,230,233,232,234,231,231,231,231,232,232,230,231,231,230,229,229,231,229,231,234,235,237,238,241,244,245,246,245,244,244,244,244,244,244,244,243,244,245,244,246,244,244,242,241,241,241,240,239,241,244,237,222,231,243,240,240,236,240,238,220,223,237,239,232,224,234,239,236,239,240,238,239,242,240,238,241,242,243,243,244,241,241,243,241,243,243,242,244,244,243,242,243,242,242,241,242,240,240,242,242,240,240,240,241,239,235,240,244,242,244,242,248,226,214,214,179,211,231,222,225,244,252,201,225,237,235,202,185,238,235,252,212,151,139,128,102,39,170,243,246,251,227,232,243,244,244,253,227,118,35,1,3,25,22,16,20,17,32,48,60,50,121,184,130,87,39,23,23,10,19,18,91,235,248,232,198,121,103,45,13,21,8,94,150,85,104,49,32,11,13,15,14,14,13,23,22,157,249,137,29,11,67,111,83,61,48,36,22,21,19,24,29,37,55,63,85,88,71,53,35,34,50,55,46,29,103,173,209,238,232,237,236,236,233,243,244,186,218,250,252,252,252,252,252,252,248,199,133,32,84,77,12,52,86,29,25,10,97,157,62,25,12,23,65,135,168,113,77,39,45,131,130,137,211,239,251,249,164,87,26,5,18,122,222,249,253,253,165,87,31,7,9,7,18,128,145,89,60,12,6,14,17,81,193,235,207,165,193,241,214,129,101,64,34,39,37,30,107,68,119,234,250,251,250,247,249,243,246,245,248,234,234,176,147,147,153,155,167,155,139,138,134,137,99,96,41,40,33,28,27,27,18,95,212,234,246,219,223,222,225,230,239,249,249,249,170,84,12,60,198,176,62,18,21,136,242,237,232,230,237,245,250,250,249,249,234,228,229,228,236,249,226,167,127,66,69,194,246,208,164,109,64,16,28,160,236,243,252,249,231,186,174,212,224,224,223,223,222,220,224,221,219,221,219,220,221,221,220,219,221,222,221,219,220,221,223,222,220,220,217,218,221,222,222,220,221,219,218,220,220,221,218,218,220,218,217,218,219,219,218,217,218,219,218,220,219,215,219,219,217,217,217,216,216,217,215,215,217,216,216,216,217,112,2,1,3,8,8,6,10,9,10,10,10,10,228,232,229,229,229,227,231,231,231,231,231,233,233,233,234,232,232,233,232,233,232,232,233,231,232,231,234,233,233,234,235,238,237,239,239,239,238,237,237,234,235,234,234,235,236,235,236,234,235,236,237,238,237,236,237,240,239,235,235,237,237,235,235,235,237,238,237,238,237,236,236,236,236,236,236,235,237,238,236,237,235,235,237,236,236,234,237,236,237,236,235,235,233,235,237,236,235,234,236,235,233,236,232,234,234,233,231,231,232,231,232,231,231,233,233,229,232,232,230,232,231,232,234,234,233,233,234,232,232,232,235,236,237,240,239,240,240,238,239,237,239,243,241,242,241,239,240,239,239,236,235,233,231,232,231,233,233,236,240,236,236,235,235,234,234,236,237,240,237,239,236,235,232,229,231,230,231,232,232,231,231,233,230,233,233,232,235,234,236,234,234,234,232,233,233,236,235,234,235,236,236,237,236,238,240,239,238,239,239,239,242,243,245,245,245,245,244,244,241,237,232,229,229,231,231,230,232,233,232,233,232,230,230,229,232,232,230,230,229,232,232,230,233,232,237,237,236,241,243,244,244,244,245,245,244,245,244,244,245,245,245,242,242,244,243,243,242,242,242,242,244,242,244,244,246,236,224,236,241,239,240,239,240,231,217,229,241,237,227,226,237,239,239,239,240,241,244,245,245,245,245,245,246,245,245,245,241,242,243,244,242,242,241,242,242,243,242,242,243,242,244,244,243,241,240,240,239,240,240,240,237,241,245,241,243,245,247,221,207,222,211,244,252,235,235,252,215,182,230,233,237,196,191,243,236,253,206,157,150,139,109,43,177,243,247,251,228,236,245,244,235,252,251,174,136,54,1,7,15,14,16,15,27,48,67,38,30,132,187,139,50,12,10,12,24,51,40,65,64,39,90,51,37,45,15,21,12,63,62,71,160,148,92,54,25,19,19,17,7,23,20,125,210,89,33,15,56,102,66,48,34,15,20,17,17,29,18,30,63,81,96,73,49,53,29,31,27,51,92,130,172,134,174,226,232,243,238,246,252,252,252,229,251,251,252,224,161,143,171,241,244,194,127,15,82,91,12,60,67,27,29,5,161,193,53,26,12,35,97,174,195,150,117,91,75,97,107,141,236,248,251,251,179,77,29,5,23,194,251,251,248,168,78,21,1,15,2,58,211,249,235,97,7,5,8,13,21,1,54,188,204,178,188,237,233,169,144,81,30,35,11,66,214,165,151,240,243,251,250,248,249,240,243,248,251,251,214,158,150,153,153,165,167,148,138,135,140,169,181,216,185,76,14,13,35,29,30,7,74,211,248,243,223,219,234,237,249,240,182,134,87,43,42,155,241,241,191,59,32,154,241,241,240,238,242,240,245,235,229,244,249,237,240,239,249,249,250,223,142,74,15,110,167,131,109,48,14,74,190,241,250,250,242,235,196,171,196,222,226,221,223,223,222,221,219,220,220,219,219,218,219,220,222,221,220,219,218,218,219,221,220,221,221,219,221,218,220,220,220,220,222,222,220,218,219,218,218,220,221,219,218,219,219,220,218,220,219,219,219,218,220,217,218,219,218,217,217,221,214,214,214,215,217,215,216,213,215,113,2,1,2,7,8,7,10,9,10,10,10,10,227,229,230,229,229,228,230,232,230,230,232,230,234,234,230,232,232,231,234,233,234,232,231,232,229,230,232,230,233,235,234,235,233,237,236,237,237,232,234,233,233,235,234,236,235,232,235,234,234,235,233,235,236,235,239,237,236,237,236,236,237,234,235,236,236,237,235,236,235,236,235,234,235,235,235,236,234,236,237,235,235,235,234,232,236,235,233,235,234,232,234,234,234,234,234,235,234,234,233,232,234,233,232,235,235,233,233,231,232,231,230,230,230,232,232,232,232,231,230,231,231,232,233,231,230,230,232,231,233,234,232,235,237,240,237,237,236,237,238,236,239,237,239,239,236,234,234,234,234,235,232,232,234,232,231,234,234,235,236,237,239,237,237,238,237,237,239,239,237,235,233,232,231,231,231,231,233,231,233,234,231,232,232,230,230,232,233,232,230,232,233,232,232,230,232,233,233,234,235,236,238,240,241,240,239,238,238,240,240,240,241,242,243,243,245,242,243,242,236,232,229,227,227,227,228,228,229,229,231,233,232,231,228,230,232,233,234,231,233,235,234,233,232,235,238,238,240,242,245,245,245,244,244,244,245,246,245,246,246,245,244,244,244,242,244,241,243,243,243,243,241,244,244,245,246,231,227,241,241,241,240,239,240,224,220,235,240,234,225,231,238,239,241,241,245,247,246,246,245,245,245,244,244,244,245,245,243,243,243,244,242,241,240,242,244,242,243,242,242,244,245,244,244,241,241,241,241,242,241,243,238,241,244,242,242,245,246,220,230,233,222,249,239,212,188,212,222,211,251,238,231,196,197,242,239,252,200,155,123,125,105,44,185,243,249,251,226,236,243,241,223,248,233,194,232,164,145,69,3,3,12,14,22,53,72,42,23,73,162,195,93,24,8,20,46,46,35,29,27,38,96,82,44,30,12,25,19,65,72,32,36,113,185,135,84,51,30,19,14,27,29,31,47,50,38,15,17,55,56,39,19,10,17,15,21,24,15,35,50,71,100,66,41,19,17,44,88,191,196,169,123,133,222,235,243,251,251,251,251,251,252,227,252,252,248,138,101,99,45,141,159,133,99,10,94,98,19,65,60,24,35,8,49,110,60,29,14,48,165,204,179,123,112,96,40,44,57,106,248,248,250,234,123,49,16,1,104,240,252,252,175,68,24,2,33,70,101,220,253,253,135,23,28,78,102,56,11,8,19,129,198,184,198,231,228,169,145,63,24,13,33,189,234,223,148,217,248,247,248,244,244,242,245,234,234,227,186,156,156,152,156,164,164,146,135,133,156,212,201,250,232,107,88,136,97,44,24,23,18,97,233,237,236,224,235,242,196,123,54,10,1,18,11,25,162,226,220,125,17,58,191,232,249,247,211,145,124,83,110,199,241,238,237,214,214,242,229,185,156,107,61,46,39,39,42,10,82,194,248,248,246,239,238,210,171,180,216,227,225,223,221,223,223,221,220,217,218,218,221,225,228,231,231,231,224,221,220,219,218,219,220,220,218,217,219,221,216,217,221,220,221,217,219,221,219,220,220,220,217,220,218,218,217,218,220,217,217,220,218,218,219,220,217,217,216,217,216,213,214,214,213,215,216,218,216,214,214,114,2,1,4,6,7,8,10,8,9,11,10,10,225,229,228,229,229,226,228,229,232,231,233,233,230,232,232,231,233,232,232,235,235,235,230,229,229,227,229,230,232,234,235,234,234,236,233,235,234,232,235,233,233,235,236,238,232,231,234,232,235,234,235,234,234,233,234,236,235,237,239,239,236,235,239,236,236,237,234,234,235,233,235,235,234,234,234,231,232,232,232,232,233,234,234,234,232,233,235,231,232,236,232,233,234,234,233,232,233,231,231,231,232,231,231,236,233,236,234,232,231,227,230,230,228,230,233,231,232,232,232,233,232,231,233,234,232,232,233,234,233,234,236,236,236,236,235,235,239,234,233,233,233,235,231,232,233,231,233,231,233,235,237,236,236,235,232,235,236,236,236,237,239,238,238,238,238,239,237,238,236,236,231,230,233,232,235,233,235,235,236,235,231,235,232,233,232,233,235,232,232,231,232,232,231,231,233,231,232,237,239,239,241,242,241,240,241,241,240,241,241,241,242,241,242,242,241,241,240,237,232,228,227,227,225,228,229,227,228,231,231,233,234,229,232,231,233,236,231,234,233,234,234,230,236,236,237,241,240,243,245,244,245,244,244,242,242,245,246,246,246,245,245,246,245,244,242,245,244,245,245,244,244,243,246,247,241,230,233,244,242,241,242,242,239,225,232,244,242,236,230,239,243,241,245,245,248,249,248,247,247,246,246,247,244,245,244,244,245,243,244,242,241,241,240,242,242,244,245,244,244,245,245,244,245,244,243,243,242,244,245,245,242,237,242,243,240,246,241,215,225,224,186,204,209,181,192,240,240,230,248,235,232,193,201,241,241,252,191,145,112,116,96,39,185,244,248,250,226,236,244,242,225,237,225,209,252,252,245,174,71,12,1,5,4,21,53,36,21,9,16,109,123,56,21,26,36,34,25,28,32,45,94,83,42,29,19,29,34,67,79,40,30,27,64,136,165,128,76,41,28,32,33,30,29,30,36,20,17,39,41,33,15,15,18,11,20,27,21,42,40,35,63,69,112,146,174,179,171,230,191,130,120,169,249,249,252,252,250,250,210,160,105,68,130,194,146,53,122,104,25,21,11,23,49,13,74,103,42,74,50,22,32,3,75,116,57,32,17,78,184,193,123,63,43,129,141,74,24,55,216,236,183,99,38,8,13,5,42,191,194,148,80,12,11,98,239,251,252,252,240,125,34,111,246,248,244,188,95,15,15,125,188,203,203,235,191,104,63,18,8,24,97,205,251,243,125,185,250,245,249,245,249,249,250,253,236,201,160,152,158,151,160,167,161,143,136,133,170,235,177,148,110,103,232,248,208,109,35,23,12,25,159,233,249,236,247,201,120,59,2,3,9,17,39,17,37,154,205,144,53,14,94,219,249,249,205,100,49,19,9,75,179,210,220,137,102,130,111,109,137,138,113,92,39,22,27,35,154,238,249,248,239,238,223,179,167,209,226,225,224,222,223,221,220,221,220,224,228,233,243,247,249,249,249,246,233,226,223,224,219,221,221,222,221,217,222,219,221,221,223,223,219,221,222,219,219,221,221,220,218,220,220,220,220,221,220,219,218,218,219,218,219,216,216,215,217,216,215,216,214,217,216,214,214,217,216,213,218,113,2,1,4,7,7,8,10,8,10,10,9,10,229,230,229,229,230,227,229,230,230,229,233,232,231,232,230,234,232,233,232,231,233,231,232,229,226,225,227,227,230,230,231,235,236,237,234,235,233,232,234,234,234,235,235,236,232,231,232,232,232,232,232,231,232,233,235,234,236,239,237,236,240,235,237,237,236,238,235,237,236,235,234,232,232,230,232,232,230,231,231,231,231,231,234,232,232,231,232,234,233,232,234,232,233,235,234,234,233,231,232,233,232,232,231,233,236,235,233,230,231,229,230,233,230,231,231,228,229,231,232,234,235,235,235,234,234,232,232,232,234,235,235,235,235,235,232,232,233,230,232,232,233,230,231,232,228,229,231,228,231,232,232,235,235,231,234,235,233,237,236,237,239,237,239,240,237,234,234,234,234,233,232,232,231,231,232,232,233,234,235,235,236,236,235,234,234,234,233,231,230,231,229,231,232,231,233,235,236,236,238,236,239,239,239,241,240,240,241,242,240,240,240,240,240,240,240,237,235,230,229,226,226,225,226,227,227,231,231,230,231,232,231,230,230,232,232,235,235,232,232,232,232,232,231,234,236,235,239,241,242,244,245,244,244,243,243,243,244,245,245,246,245,246,245,244,246,245,245,244,245,245,245,245,245,247,240,227,237,244,240,241,241,245,236,229,242,245,244,237,238,244,246,247,247,247,247,247,247,246,245,245,246,246,247,247,245,245,244,244,244,243,242,240,241,242,244,245,244,244,245,247,244,243,245,244,244,244,242,243,242,244,241,235,239,241,239,246,233,206,212,187,177,220,221,222,231,252,226,189,237,237,233,196,202,241,242,252,187,159,122,118,94,38,186,243,249,250,225,236,244,240,230,228,221,230,247,252,251,218,147,142,109,32,4,19,20,5,6,10,9,29,93,108,33,8,21,17,23,17,27,35,65,82,39,31,20,42,51,68,83,48,38,26,49,37,61,153,164,113,66,47,38,34,30,23,22,30,33,31,30,29,24,15,18,15,21,30,27,49,19,45,123,118,188,249,252,222,200,226,176,122,107,193,250,250,251,251,158,97,34,2,5,9,24,13,34,21,31,65,17,29,31,19,61,33,78,125,63,84,47,15,35,5,170,200,56,29,39,88,122,112,94,36,29,160,136,55,35,9,26,55,56,27,32,119,127,60,27,4,23,46,12,44,175,250,253,253,252,252,136,37,112,241,252,252,251,185,96,18,14,162,223,225,237,192,118,56,26,17,6,114,185,194,233,162,58,121,234,245,248,248,250,252,234,234,211,171,142,141,145,151,162,169,157,141,136,141,181,225,168,46,9,62,214,239,240,162,73,22,17,8,75,213,247,247,248,183,90,43,4,19,145,210,145,76,11,21,98,118,68,19,27,149,241,242,222,123,78,21,2,20,122,203,231,173,64,26,4,19,45,54,78,105,111,115,117,135,194,238,246,242,237,233,195,165,196,226,227,223,222,220,222,223,219,222,230,238,249,249,251,251,249,249,233,239,241,224,221,221,223,222,220,219,222,220,220,220,218,220,222,221,221,220,220,220,222,221,218,219,220,221,219,218,219,219,220,218,219,218,216,220,219,218,217,216,217,217,216,216,217,215,217,217,215,215,217,215,214,113,3,1,2,7,9,7,10,9,10,10,10,10,226,227,228,227,227,228,230,228,229,228,229,229,227,229,229,229,232,231,232,231,227,231,228,230,229,229,226,225,228,230,234,231,232,234,234,233,233,231,234,233,236,235,232,231,229,229,229,229,229,229,230,229,233,232,231,233,231,235,235,235,235,235,235,233,232,232,232,234,234,231,232,230,231,230,229,232,230,231,232,230,232,230,231,232,232,234,230,232,232,233,233,233,234,235,235,234,234,231,232,233,235,232,231,233,233,236,232,230,231,231,231,231,231,230,229,230,228,230,230,232,233,234,236,234,234,232,232,234,233,236,235,236,235,233,234,231,232,231,234,235,234,233,231,234,235,230,233,232,234,235,234,232,231,234,234,233,234,233,234,236,237,237,238,235,232,232,229,229,230,230,229,229,230,228,230,232,233,233,235,236,235,236,233,234,232,231,230,229,229,229,229,231,229,229,229,229,232,233,234,234,235,236,237,239,239,238,239,240,238,237,240,238,239,240,238,238,235,231,232,228,225,228,227,230,231,232,232,230,230,231,231,229,231,229,230,230,229,232,230,230,231,230,233,230,232,235,237,242,243,245,245,245,246,246,245,246,246,246,247,247,247,247,244,245,246,246,245,246,246,245,246,246,248,246,235,230,241,244,242,241,242,244,232,235,247,246,241,239,244,248,247,247,248,247,247,247,246,245,246,246,246,246,246,247,244,244,245,245,245,243,244,244,243,244,244,245,245,245,246,246,245,244,245,245,245,245,244,242,241,241,241,236,237,241,239,244,231,201,208,207,221,252,242,224,218,223,198,203,241,237,227,192,207,241,245,250,188,162,127,129,95,39,194,243,250,250,225,236,246,242,234,216,222,237,217,226,237,228,233,252,215,220,245,207,199,210,205,186,169,145,154,184,154,95,64,40,11,9,27,20,55,77,39,33,34,67,57,70,90,44,39,35,46,40,34,40,93,167,159,122,78,47,23,19,33,28,36,31,24,31,23,13,17,16,19,26,14,92,148,204,207,163,245,252,198,168,234,223,139,86,116,216,251,251,253,150,50,7,1,17,21,26,26,28,35,16,135,198,121,56,10,10,66,49,72,152,109,93,32,18,17,51,252,229,83,32,38,96,110,98,66,52,36,62,75,46,31,16,27,22,8,88,213,249,243,179,81,22,3,41,33,137,252,252,252,252,223,130,19,59,232,244,245,201,84,37,17,2,93,247,251,251,185,108,59,26,42,12,48,225,243,177,147,76,8,51,217,244,248,248,245,247,235,225,189,149,131,136,139,150,166,165,152,141,140,141,190,219,186,134,21,7,151,230,243,191,83,28,11,16,27,125,231,236,242,185,90,40,15,137,236,238,237,180,83,24,15,93,117,57,6,42,187,228,236,145,44,18,7,43,168,217,245,202,101,51,27,30,25,19,54,129,175,196,191,150,99,149,234,236,242,214,170,187,222,231,223,224,224,220,221,222,220,231,239,246,247,247,244,193,151,133,135,190,228,227,231,230,228,225,223,222,221,221,221,219,221,223,223,222,222,222,223,220,221,222,220,221,223,221,221,220,219,217,218,220,221,220,219,220,219,220,220,219,219,218,218,218,217,216,217,216,216,218,219,217,217,113,2,1,3,7,9,8,10,9,10,10,10,10,228,229,229,228,229,223,227,229,230,227,229,228,229,230,230,232,231,232,230,231,231,231,232,232,235,232,230,230,230,230,231,230,230,230,231,232,231,231,229,227,227,229,229,227,225,226,226,227,226,227,227,229,229,227,231,232,229,232,233,233,231,229,233,231,230,230,226,229,229,229,229,229,229,231,230,230,226,230,230,229,230,229,229,229,231,230,230,230,231,232,233,235,234,233,232,236,234,231,233,230,230,230,229,230,231,229,231,231,230,229,229,231,228,229,230,228,229,229,230,230,233,233,233,235,235,233,235,235,234,235,234,233,234,235,234,234,235,235,237,237,235,235,235,236,235,236,236,234,237,239,236,235,232,232,233,231,229,229,230,232,235,234,231,232,230,227,228,227,226,226,226,225,228,227,229,231,233,234,234,232,230,229,231,229,226,226,226,229,229,229,227,229,229,232,232,232,232,232,232,231,234,231,234,235,235,239,237,237,235,236,237,237,237,237,240,237,237,235,235,235,233,232,229,228,230,231,231,229,229,229,231,229,226,227,227,230,231,228,231,233,233,232,234,234,232,232,237,241,242,245,248,245,245,245,244,245,246,247,246,245,245,246,245,245,246,246,245,246,246,245,244,245,246,243,232,231,243,245,245,244,245,237,227,240,247,243,239,240,246,245,247,247,246,247,245,246,245,244,246,246,246,245,246,246,246,246,245,244,242,242,243,245,244,244,245,246,246,246,246,244,245,245,244,244,244,244,244,244,242,241,244,234,234,238,238,246,224,215,228,217,230,244,212,175,195,238,227,230,250,235,221,189,208,241,243,245,187,164,120,114,96,52,198,243,249,249,225,237,245,245,234,208,221,245,225,232,240,233,243,252,249,252,252,252,252,252,253,253,252,230,252,252,252,252,251,251,205,128,81,46,71,76,32,51,66,89,67,83,100,44,38,43,48,44,34,38,34,44,114,155,152,112,29,7,31,41,39,31,39,33,26,34,31,16,15,76,202,246,250,235,173,198,250,230,103,104,219,243,164,73,128,231,252,252,170,62,11,0,22,27,29,29,34,35,50,15,92,241,241,141,41,35,78,57,84,144,119,101,27,39,36,101,251,204,72,32,27,57,59,61,56,59,55,64,61,46,38,21,35,19,33,135,250,250,248,248,187,97,68,53,61,204,245,245,236,142,74,33,9,132,248,248,244,194,91,38,59,154,240,246,244,168,95,36,23,37,36,8,112,243,246,174,91,31,15,15,165,235,245,248,237,239,225,206,170,147,134,142,150,156,164,166,148,139,138,131,140,119,136,78,4,64,182,222,249,210,107,37,11,20,21,8,114,214,249,243,124,32,5,7,66,181,247,247,160,64,20,83,183,144,74,20,45,162,155,103,50,14,6,90,227,235,249,219,146,106,65,60,55,33,62,123,141,127,125,64,17,34,191,233,239,189,171,215,227,227,223,223,223,222,221,221,223,232,233,192,160,150,135,110,73,100,71,110,222,234,250,250,242,238,230,228,224,220,221,222,223,222,222,221,218,221,220,220,221,220,221,220,220,220,220,220,221,221,220,218,220,220,217,217,218,218,220,218,218,218,217,219,218,217,217,214,217,218,218,217,216,115,2,1,4,7,8,7,10,9,10,11,10,10,228,230,231,230,226,228,231,227,230,230,229,232,231,232,232,232,235,232,232,233,234,236,232,235,234,237,234,234,233,230,231,229,232,232,232,231,229,227,228,225,226,226,226,227,224,225,224,225,226,227,229,227,228,229,229,230,230,232,233,232,232,231,229,229,229,230,231,230,229,229,230,230,231,230,227,229,229,229,230,228,230,229,230,229,228,230,229,232,234,234,236,235,235,235,234,237,235,234,234,230,232,228,229,231,229,229,230,231,230,232,230,231,231,228,230,229,229,231,232,234,232,233,237,236,236,235,235,238,237,238,237,236,234,235,237,238,237,236,239,239,239,241,236,236,235,234,239,236,236,234,235,235,234,235,230,231,232,228,230,230,230,231,230,228,229,228,227,227,227,227,227,226,227,227,226,226,226,229,230,229,227,227,227,227,226,227,227,228,228,227,228,228,228,228,230,228,230,232,232,232,230,232,233,234,236,237,237,237,237,237,238,237,240,239,239,239,240,239,241,239,236,234,230,230,229,230,231,230,229,229,228,228,229,229,231,233,232,234,232,235,236,235,238,237,235,235,238,243,244,247,246,246,247,246,247,246,246,247,245,244,246,246,245,245,247,246,246,247,246,245,244,245,245,240,229,235,244,243,244,244,244,235,232,244,244,241,237,243,247,247,247,247,247,247,247,247,246,246,246,246,246,246,246,247,246,246,247,244,243,244,244,244,244,245,244,244,246,246,246,246,246,245,244,245,245,244,244,244,245,243,243,236,232,237,240,242,221,210,222,194,185,212,204,205,231,251,233,220,241,235,218,196,213,240,242,244,187,147,107,109,83,47,200,243,249,250,224,239,245,246,228,197,227,251,234,237,228,223,229,240,227,247,253,239,240,250,249,251,248,212,251,251,252,252,253,253,249,205,108,27,38,107,152,159,141,101,50,74,107,53,34,39,52,45,34,41,36,39,32,27,145,244,203,202,229,190,99,4,69,173,190,199,181,205,251,251,253,253,200,145,163,215,248,200,56,77,185,217,169,84,153,240,251,214,90,14,2,14,28,32,32,35,34,33,39,32,22,166,251,237,128,66,80,71,81,140,136,84,24,103,88,77,182,102,53,45,14,31,29,49,47,78,99,67,54,48,55,55,48,23,23,168,251,251,252,252,241,122,41,34,28,92,104,63,36,15,29,32,14,134,247,247,247,247,228,251,253,253,252,211,108,61,7,23,33,38,16,45,200,245,249,196,133,99,44,2,122,231,245,248,234,232,217,189,155,138,137,145,158,156,166,163,142,141,135,129,99,27,7,18,80,210,246,228,250,228,149,69,11,13,19,29,16,50,202,217,198,108,16,13,35,169,241,244,228,113,55,13,95,151,85,60,42,33,36,50,14,52,185,245,249,239,249,249,222,173,113,70,36,37,31,34,53,43,100,110,21,26,141,216,206,171,196,227,232,231,235,237,233,232,228,226,227,238,202,139,92,44,39,31,39,86,70,121,233,238,252,252,252,252,246,236,229,226,223,223,222,221,223,222,220,220,222,222,222,223,221,219,222,223,223,220,220,221,222,220,220,221,222,220,219,222,219,220,219,220,219,216,217,217,217,217,218,217,218,216,218,114,2,1,4,7,8,8,10,9,10,11,10,10,228,230,228,227,230,228,229,229,227,227,231,227,230,231,230,231,229,232,234,232,230,231,233,235,232,234,233,231,232,230,230,229,229,230,231,230,228,228,226,224,225,224,223,222,222,224,221,224,225,225,227,229,227,225,229,229,229,229,227,228,229,228,226,227,230,227,227,229,229,228,229,230,228,227,225,227,229,229,228,229,227,227,228,228,228,229,232,231,232,232,232,235,232,235,233,230,231,230,230,229,229,230,229,230,230,230,230,229,231,231,230,230,228,230,230,229,230,231,233,235,235,236,235,234,234,233,237,238,238,236,236,236,233,232,233,236,234,234,236,236,236,235,232,229,228,230,231,230,229,229,229,230,230,229,229,228,226,223,227,228,229,231,229,229,226,227,227,225,226,225,224,224,223,224,224,223,224,223,224,225,226,229,225,224,224,227,228,225,227,226,226,226,224,224,225,229,229,228,229,228,230,229,232,233,232,233,234,233,234,236,235,237,236,236,239,238,239,238,236,236,232,232,228,227,229,229,232,230,229,227,227,226,227,229,229,230,231,231,234,234,234,236,237,236,235,236,240,242,244,246,245,245,247,248,246,246,246,246,245,245,244,244,245,245,245,246,245,246,246,244,245,245,244,235,229,240,244,244,244,244,242,231,239,246,243,238,239,245,247,247,247,247,246,248,246,246,246,245,245,244,245,247,246,246,245,245,245,244,244,244,245,246,244,244,243,242,244,245,246,245,244,244,244,244,245,245,246,244,243,242,243,237,231,235,240,242,215,205,200,185,211,232,226,226,242,240,188,205,242,234,218,197,214,242,241,240,191,161,120,102,81,44,194,243,250,250,223,242,241,248,225,192,231,251,240,230,232,237,206,212,200,224,245,142,165,230,221,252,246,191,247,250,251,250,250,251,236,186,75,2,13,120,203,169,148,100,46,59,101,61,30,41,51,47,42,44,39,30,44,10,90,241,252,252,248,245,165,37,130,242,252,252,252,252,252,242,249,222,133,157,161,239,246,146,106,87,169,220,149,96,181,244,251,174,46,6,8,17,29,37,38,39,44,46,37,36,15,116,249,249,174,83,65,41,72,148,135,49,36,148,92,65,68,36,70,38,19,28,41,54,38,89,117,69,61,53,50,60,64,15,43,209,250,250,252,202,133,80,34,17,21,32,29,38,41,45,33,37,18,92,238,239,248,248,253,253,252,249,139,43,4,1,24,32,42,25,38,197,248,248,247,245,178,153,165,86,141,234,248,252,234,225,202,174,153,139,134,142,155,155,159,155,136,137,138,157,176,141,208,228,229,249,251,247,251,251,209,123,53,21,17,20,27,27,22,116,215,232,237,229,241,249,250,250,240,173,90,27,7,17,36,37,25,35,27,37,199,242,250,250,251,251,248,248,224,163,108,63,34,29,24,24,11,46,137,226,148,43,57,125,146,188,224,237,240,245,249,249,248,246,229,226,237,243,210,131,76,36,15,24,7,25,34,95,168,166,198,227,243,243,251,251,230,226,227,222,223,222,220,220,220,223,220,220,221,220,218,220,222,220,219,221,221,221,220,221,220,217,219,221,220,219,218,220,218,218,221,220,217,216,216,215,215,216,218,217,216,114,3,1,3,7,9,8,10,10,10,11,10,10,227,230,230,229,227,227,227,227,229,229,229,229,227,229,229,226,229,229,232,230,230,232,229,230,231,232,231,230,229,227,228,227,226,229,230,230,231,228,228,226,225,222,222,222,223,225,225,227,226,226,227,226,224,224,226,229,229,229,229,227,227,228,227,228,228,226,226,227,226,227,226,225,226,226,225,225,227,226,226,227,227,227,226,226,227,229,230,231,233,232,232,231,229,231,230,230,229,227,228,229,233,229,230,232,229,229,230,231,229,227,227,228,228,227,229,229,230,232,234,236,234,236,237,234,236,237,237,238,236,235,233,234,233,233,234,232,233,235,236,233,230,229,226,227,227,225,230,227,229,230,231,232,229,232,231,226,225,221,225,228,229,229,226,224,228,226,224,224,224,224,224,222,222,224,223,224,224,224,225,224,229,228,226,225,225,224,225,225,226,225,224,225,226,225,227,225,225,227,226,227,225,229,232,230,231,231,231,233,234,233,237,236,237,238,239,240,238,235,235,233,231,230,228,229,228,233,232,229,229,228,225,226,226,226,228,226,229,230,231,232,232,235,237,240,241,242,242,242,244,246,246,246,249,246,247,247,247,247,246,248,246,246,247,245,247,248,246,247,245,245,245,247,245,233,232,244,246,246,247,245,237,234,244,248,243,238,242,244,246,248,247,247,247,247,247,246,246,246,246,245,244,245,246,245,245,246,247,244,245,245,244,246,245,243,245,245,245,246,246,245,245,244,245,245,245,245,244,243,241,241,242,238,231,233,243,240,214,206,215,214,243,252,221,193,207,231,211,224,247,237,213,204,221,244,239,242,201,166,138,121,85,48,198,243,249,249,220,241,241,252,219,184,236,248,244,230,198,228,200,148,130,188,240,168,199,243,242,252,248,205,248,249,237,222,192,190,168,148,123,73,78,160,207,141,137,108,53,53,92,65,31,42,55,44,42,45,43,35,42,16,80,221,179,199,253,232,182,51,96,240,252,252,252,252,251,190,208,191,165,183,194,247,193,151,158,175,204,206,160,139,215,247,248,147,30,9,6,24,29,22,129,207,202,179,105,53,7,97,247,247,182,95,20,15,57,105,95,32,24,79,63,72,79,42,55,34,16,47,49,37,34,33,48,57,61,66,34,41,28,29,195,249,251,190,88,33,5,24,29,23,24,33,38,39,42,36,39,35,38,14,93,201,239,241,238,212,136,66,22,3,19,43,41,49,46,74,207,246,251,251,251,251,196,199,252,155,151,242,252,252,242,216,187,160,145,141,137,143,157,149,158,153,135,141,141,185,245,246,251,251,252,252,252,252,253,253,246,210,117,50,15,12,23,23,30,8,68,215,245,245,248,248,251,251,246,174,110,37,10,18,17,34,27,27,25,118,232,234,247,247,250,250,250,207,138,98,47,27,18,14,21,25,24,14,146,226,240,202,96,89,153,214,248,245,245,245,246,246,238,241,225,229,244,242,192,129,74,27,18,12,75,107,59,42,26,8,15,18,89,183,231,238,249,231,228,226,228,223,223,222,221,223,222,221,222,223,223,220,221,220,221,222,221,222,222,219,221,221,220,220,220,221,219,220,220,218,219,219,220,217,217,220,218,218,219,218,218,113,3,1,3,7,9,8,10,9,10,11,10,11,224,226,227,225,226,227,227,229,229,228,230,227,230,229,225,227,226,227,230,231,230,230,230,229,228,230,229,228,226,222,220,222,224,227,227,226,225,223,224,223,225,222,223,224,222,225,224,224,224,223,224,224,224,223,226,224,224,225,226,225,224,225,223,225,226,221,225,223,222,224,223,222,223,225,225,226,226,224,225,225,226,226,226,227,225,229,228,230,231,229,230,230,230,229,229,227,227,227,226,227,229,230,230,230,227,227,227,229,227,225,227,224,225,227,227,227,227,231,231,230,232,234,234,235,235,234,236,232,234,234,233,234,233,234,234,233,231,232,234,232,231,231,229,231,230,229,229,229,230,230,231,232,232,230,229,227,226,225,225,224,226,226,224,224,220,220,222,221,223,221,222,222,222,222,222,221,221,220,224,224,222,224,222,224,225,226,225,223,224,223,224,225,222,224,223,223,225,225,227,226,227,226,227,226,227,230,231,229,230,232,233,234,234,236,237,236,236,236,232,232,230,229,229,230,230,229,232,229,228,226,224,225,225,227,226,226,226,227,230,228,230,232,235,239,241,242,243,244,243,245,245,246,246,247,247,245,245,244,245,245,244,245,246,246,245,246,245,245,244,244,247,247,242,233,239,245,244,244,245,245,235,238,246,246,240,238,245,246,247,246,246,247,245,247,246,247,247,245,246,245,245,245,244,244,244,245,245,244,245,244,246,245,246,246,244,242,245,245,245,246,245,245,244,244,244,245,243,241,243,242,241,238,229,230,244,235,214,219,223,218,237,215,182,191,230,249,229,243,250,235,212,207,219,244,237,239,202,167,138,115,86,57,203,244,247,248,220,239,241,252,208,184,241,244,246,213,178,202,177,158,148,203,252,199,220,246,252,252,249,203,224,198,186,194,146,171,166,200,213,143,154,177,203,140,162,117,63,68,89,73,30,46,54,41,38,46,42,36,41,14,76,190,139,154,208,170,183,122,95,190,243,247,249,245,205,196,251,210,189,199,194,224,183,176,203,190,213,200,156,167,230,248,240,120,24,6,11,26,39,150,251,251,251,251,147,53,8,86,247,232,162,98,21,10,42,61,74,49,55,87,57,71,66,40,35,18,43,49,33,35,37,29,28,43,57,51,43,16,72,225,253,253,133,44,6,7,32,25,27,25,34,35,36,44,37,43,42,45,45,48,37,15,68,101,91,60,38,33,48,68,74,83,81,101,103,131,190,213,169,154,163,146,113,95,128,93,76,172,234,234,234,196,165,151,146,140,135,142,151,151,162,148,136,142,144,163,166,141,226,234,200,188,225,237,246,246,250,240,149,60,18,25,30,25,27,35,16,26,166,222,251,251,252,252,212,130,68,22,14,15,22,30,29,31,33,28,139,198,228,235,251,251,246,159,93,51,25,24,20,46,77,59,44,19,81,163,235,215,179,178,206,235,248,247,217,150,145,134,145,223,226,235,248,212,149,97,64,28,5,45,93,89,67,51,39,35,39,35,35,9,82,208,243,243,225,220,226,226,223,222,220,222,223,222,221,223,222,221,222,221,223,222,219,221,222,222,221,221,219,218,218,219,219,217,216,218,217,217,220,219,218,216,219,220,220,216,217,115,2,1,5,8,8,8,10,10,11,11,12,10,224,228,225,224,226,226,227,225,228,227,228,229,228,227,229,229,229,230,231,231,230,231,231,231,231,229,228,230,229,229,225,223,224,225,224,221,222,222,222,223,224,221,224,223,222,221,222,222,220,222,224,224,220,222,224,222,220,221,222,223,223,224,223,222,222,224,223,224,226,224,224,224,225,225,223,225,228,225,225,224,225,227,227,226,228,229,229,229,229,230,230,230,230,231,229,228,228,228,229,230,230,229,229,231,229,229,229,226,229,227,227,228,225,226,227,228,228,227,230,231,231,233,233,233,235,234,233,232,230,232,234,236,235,236,234,232,234,234,235,233,233,235,235,237,234,232,232,227,229,232,230,232,230,230,229,227,226,226,227,224,224,223,224,223,222,222,222,222,223,223,222,222,224,221,220,221,218,220,219,218,222,222,223,224,226,224,224,225,222,224,223,222,224,221,222,224,223,227,229,229,228,229,229,226,227,230,229,229,232,230,233,232,232,235,234,237,239,236,234,232,233,233,232,230,229,232,231,232,230,230,230,231,231,227,229,227,227,226,229,229,227,230,231,236,236,240,243,243,246,246,245,245,245,247,248,246,245,244,244,245,244,246,247,245,247,246,245,246,246,247,248,249,241,235,244,246,244,245,248,240,236,243,247,244,239,240,246,246,246,246,244,247,247,247,246,247,246,246,247,245,245,245,244,244,245,244,245,244,245,247,247,246,246,246,245,244,245,245,245,245,244,244,244,243,242,241,243,242,241,240,240,241,230,227,243,231,211,214,211,181,200,210,198,224,252,252,219,218,245,233,208,212,223,245,231,244,204,163,139,113,84,50,200,244,248,250,222,244,242,252,206,194,249,244,249,238,194,211,211,195,178,218,252,220,234,247,252,252,208,149,145,150,180,198,211,248,229,218,188,155,158,162,189,128,174,124,81,86,86,79,30,49,54,43,37,39,44,36,42,12,121,244,170,164,201,170,171,127,104,150,225,248,250,240,194,218,252,210,205,160,175,244,143,170,212,176,217,168,139,184,229,250,237,120,21,5,10,22,124,242,253,253,237,153,67,21,5,84,246,226,154,86,15,14,70,91,66,59,81,91,76,59,44,39,32,33,44,38,32,33,34,32,35,41,44,55,35,117,243,247,242,103,15,1,19,31,29,34,37,44,45,53,57,62,69,72,78,81,85,75,80,53,26,32,41,50,42,56,64,60,67,69,69,66,63,60,48,27,12,14,15,27,36,48,48,39,13,100,237,243,224,174,145,145,138,141,137,137,145,148,155,143,134,141,144,140,69,10,6,8,12,14,11,19,53,92,133,147,128,81,47,51,51,53,50,46,41,43,16,41,146,198,201,160,96,46,27,28,20,16,24,30,27,32,33,35,33,29,18,73,199,217,243,179,83,38,21,12,89,214,211,132,57,37,39,15,24,45,95,218,244,235,249,222,141,83,55,14,38,153,217,239,243,171,95,70,31,49,105,89,84,57,47,54,39,38,34,30,35,38,21,77,211,225,231,221,223,226,225,225,223,223,221,224,223,222,224,222,223,223,224,222,220,221,225,224,221,222,221,222,220,221,221,220,221,220,220,217,220,222,217,217,220,220,220,217,220,116,3,1,5,9,9,9,12,10,11,12,13,12,224,225,227,224,226,222,222,226,226,223,227,227,225,226,227,229,227,228,231,229,229,232,229,229,229,230,231,229,231,230,228,227,223,223,222,221,222,220,220,221,223,222,222,221,221,222,222,222,222,222,220,218,221,219,220,221,220,220,222,225,221,220,220,219,222,218,222,223,219,223,223,223,225,224,223,224,225,223,223,225,226,225,226,228,226,228,229,229,229,231,230,229,229,229,232,230,229,229,229,227,229,230,228,227,229,231,227,226,226,226,227,224,227,227,226,229,226,229,229,227,231,230,234,235,233,235,232,230,231,230,234,235,233,233,232,234,231,232,235,234,235,235,237,239,235,232,230,229,230,229,227,226,227,227,229,227,226,225,224,226,226,225,223,223,221,220,221,220,222,224,223,221,222,222,220,222,222,220,220,219,223,223,223,225,224,224,225,222,225,224,222,223,221,221,222,223,222,224,222,223,225,224,226,227,227,227,227,229,231,230,232,233,230,234,235,234,236,235,234,236,234,232,230,231,232,230,232,232,232,234,234,232,230,230,227,225,228,229,228,226,226,226,227,229,232,237,239,241,241,242,242,241,242,243,245,245,244,244,245,246,246,245,246,245,244,245,246,246,245,247,248,246,238,236,246,245,245,247,245,237,236,246,247,242,237,242,245,245,247,245,244,244,245,246,246,247,246,246,246,246,245,244,244,244,245,245,244,244,244,245,246,246,247,245,246,245,245,245,245,244,244,244,243,243,240,241,240,242,240,238,237,240,232,224,241,224,204,197,192,197,239,226,208,229,244,223,185,220,246,230,207,213,226,245,231,245,199,151,124,99,73,55,205,244,249,247,222,244,243,252,203,208,251,241,250,244,208,237,232,212,187,227,252,227,224,193,200,209,162,128,148,184,236,251,251,252,241,198,163,160,133,135,180,111,192,132,88,81,78,84,25,53,56,46,33,43,44,38,34,23,158,243,206,222,247,192,199,160,61,94,184,233,242,203,175,244,249,211,162,130,222,203,91,174,188,160,205,139,127,186,233,249,237,127,26,2,10,19,152,244,250,214,64,3,1,10,6,133,251,224,168,86,22,26,32,66,64,52,61,61,55,51,39,33,43,38,36,37,40,39,35,33,34,28,39,55,47,126,196,198,77,9,6,38,42,50,63,73,94,105,121,133,132,130,128,109,95,78,69,69,59,55,56,43,47,60,58,58,58,63,65,69,69,70,69,76,75,66,59,61,66,66,71,81,87,69,44,102,229,226,187,152,137,141,138,142,142,141,149,146,153,141,134,145,149,141,71,17,15,30,40,42,39,34,40,39,39,43,42,42,38,35,39,52,57,55,60,54,55,42,36,27,15,39,47,55,43,44,27,24,34,29,32,30,26,31,33,29,30,33,27,125,222,195,155,47,32,26,42,126,231,194,128,49,34,29,27,12,43,169,230,236,242,184,120,72,49,16,28,170,241,241,215,115,79,29,44,145,162,139,77,35,45,38,31,32,33,43,37,35,27,12,127,217,226,232,224,225,225,224,224,225,222,221,224,223,223,222,223,223,223,224,223,222,223,223,224,222,221,223,223,223,223,224,222,223,221,223,224,219,220,220,219,221,221,219,218,115,4,1,4,8,10,9,10,10,11,12,11,11,222,227,225,224,227,223,225,223,225,226,225,226,226,226,226,227,226,225,227,226,228,230,231,227,228,229,229,231,229,229,228,227,228,226,225,223,222,221,221,218,220,220,220,222,220,222,224,223,223,219,218,219,218,220,222,219,220,220,220,223,220,221,222,220,221,221,221,218,219,219,220,222,221,223,221,224,224,222,225,224,225,227,227,227,229,228,226,230,229,230,230,229,229,229,229,229,230,228,227,227,229,227,227,229,229,229,229,227,227,228,227,227,226,225,224,224,224,227,227,227,228,228,229,230,231,230,231,231,229,230,231,232,231,230,233,232,232,233,232,234,235,234,232,234,233,231,232,229,227,227,224,225,223,222,225,226,227,226,228,226,226,226,226,224,221,222,221,220,224,224,224,222,223,222,220,223,221,222,221,219,221,221,221,221,222,222,222,223,222,220,219,221,223,220,220,222,221,221,221,224,222,223,226,225,227,228,226,226,227,227,229,229,230,232,232,236,235,236,237,235,234,237,237,236,235,237,237,236,236,236,234,235,234,230,230,230,231,229,229,225,227,229,226,228,230,234,236,236,238,237,238,240,241,241,242,241,242,245,244,245,245,246,244,244,244,244,246,245,244,245,246,243,234,238,247,248,246,247,243,234,241,247,246,241,241,245,244,244,246,247,246,246,246,246,246,248,247,247,246,247,246,246,244,244,245,244,244,244,245,245,246,246,246,247,244,244,245,245,246,245,246,243,243,244,241,242,242,242,240,238,235,239,232,224,237,222,205,206,212,228,252,221,187,189,219,229,214,235,250,227,204,217,229,246,227,247,200,150,124,94,66,48,204,245,250,249,223,244,244,252,203,215,250,238,247,249,211,223,252,248,190,195,220,181,139,113,160,206,196,214,201,217,252,252,252,252,240,163,137,169,139,160,190,130,190,106,78,73,83,105,22,45,57,41,41,45,38,43,33,26,163,243,195,223,249,232,237,225,112,38,125,196,202,171,174,233,208,172,135,162,252,203,75,171,187,151,199,119,116,198,231,248,247,151,45,3,8,10,99,240,247,202,66,4,8,1,111,250,253,251,173,82,30,19,50,109,84,49,57,57,48,44,39,40,35,36,36,38,39,38,39,34,33,33,53,56,53,113,108,83,74,72,93,97,112,116,108,109,104,92,86,86,72,60,62,59,56,57,66,69,72,76,77,82,98,114,121,127,131,136,142,133,146,149,139,147,146,149,136,123,122,120,113,111,97,94,78,92,200,199,155,139,134,144,139,145,147,150,153,154,152,138,135,144,159,144,83,51,43,46,54,53,64,60,59,59,58,59,50,54,50,43,41,44,39,39,40,40,42,47,45,42,50,53,60,62,58,64,67,56,53,50,42,43,37,35,37,28,33,30,37,9,67,171,179,139,57,35,19,26,178,223,151,86,42,29,39,13,47,205,239,240,249,224,143,83,49,14,98,226,247,218,150,83,49,122,201,186,148,79,49,40,25,29,28,36,39,35,38,33,49,16,78,196,218,235,228,222,224,221,224,223,222,222,225,224,222,224,225,223,223,222,222,222,225,225,222,222,222,225,223,222,222,224,225,223,222,221,222,223,220,220,221,220,222,221,220,114,4,1,5,9,10,9,10,10,12,12,11,11,223,225,226,221,226,223,222,224,224,223,225,227,224,226,226,226,226,224,226,227,228,230,227,229,231,229,231,230,231,230,231,232,229,229,229,227,226,222,218,222,217,220,219,218,220,219,220,217,217,219,219,218,220,220,221,219,218,220,217,221,220,219,220,221,222,220,222,221,217,222,220,219,222,220,221,222,222,222,224,223,225,228,225,226,226,225,226,228,229,231,229,231,231,230,232,230,232,230,230,230,227,228,229,229,230,231,229,230,233,231,232,228,227,225,223,224,223,226,225,225,227,226,229,226,224,227,227,227,228,227,229,229,228,232,229,232,232,233,231,230,233,233,234,231,230,232,230,228,227,224,221,222,224,223,225,224,226,226,225,227,226,225,226,226,222,222,222,224,224,224,222,220,221,220,219,222,220,219,220,218,220,219,220,221,218,219,220,217,221,220,219,222,219,221,221,221,219,223,222,223,225,224,225,223,226,226,227,223,227,227,227,230,227,232,232,231,234,232,234,235,237,239,240,238,240,240,239,239,237,237,237,238,237,236,233,230,231,229,230,229,229,227,229,231,233,234,232,235,236,237,237,239,240,241,242,241,242,242,242,242,242,242,242,243,245,246,246,246,245,247,247,240,233,242,246,245,246,246,237,236,245,246,243,237,242,245,245,246,246,245,245,246,245,245,245,246,246,246,246,246,245,246,246,245,244,244,244,244,243,244,245,246,247,246,245,244,245,245,244,245,244,244,244,244,242,242,239,240,237,237,235,238,235,226,232,220,213,216,215,200,220,196,187,221,247,247,229,240,247,222,203,219,230,243,220,249,203,164,142,108,71,49,208,244,249,246,220,241,245,251,190,214,247,236,248,251,211,195,227,232,139,101,134,152,168,168,229,250,230,238,222,238,251,249,251,252,197,109,129,174,171,186,200,139,165,69,48,55,69,97,19,46,57,46,42,40,45,44,21,31,168,234,189,215,243,222,240,248,131,49,110,188,226,161,144,160,108,130,148,220,252,150,122,213,189,194,183,106,162,218,240,244,249,188,70,3,4,2,43,210,252,252,201,133,182,244,253,253,252,179,91,36,12,33,27,74,76,42,41,45,42,39,39,33,35,37,36,38,35,36,42,44,51,63,80,95,99,85,67,82,100,92,73,74,65,59,57,56,113,60,62,73,85,99,112,123,135,139,141,145,138,138,133,130,133,119,107,95,77,67,65,58,56,55,49,46,49,43,42,45,39,36,35,24,24,24,31,40,103,140,145,142,139,139,142,147,156,154,154,155,152,136,136,152,162,134,69,27,38,15,24,27,35,36,38,44,50,46,54,57,50,56,58,64,59,56,57,53,52,51,49,48,46,42,41,49,57,63,63,72,73,73,75,73,66,62,59,57,44,50,36,36,26,30,153,134,77,34,32,178,236,212,160,98,62,27,33,19,119,227,250,250,249,227,152,99,56,18,35,152,157,128,105,43,99,204,234,185,107,69,33,24,101,146,124,82,51,39,36,34,44,9,77,193,205,231,231,225,226,222,225,223,223,224,222,223,225,225,223,220,225,224,221,224,224,222,222,223,225,226,224,226,224,225,223,226,224,221,224,220,221,220,219,223,221,220,221,116,3,1,5,8,9,9,12,10,11,12,12,12,221,222,221,223,225,221,223,221,220,222,222,223,225,223,224,226,228,226,226,228,226,226,229,229,229,229,229,229,230,231,231,234,228,231,232,229,227,221,221,219,220,218,218,218,215,218,217,215,218,215,218,219,217,219,218,217,217,217,221,219,218,219,217,216,218,219,221,219,217,217,217,218,220,220,218,220,219,220,222,222,225,224,223,224,225,224,226,229,229,228,229,230,229,226,227,229,231,229,229,226,229,229,228,232,230,231,231,231,230,232,229,230,230,227,227,225,222,223,224,223,225,224,224,224,223,223,225,225,224,227,227,227,230,230,232,232,231,232,231,231,232,232,230,231,231,229,229,228,225,222,223,223,223,226,226,224,224,224,226,226,227,226,223,226,225,225,222,221,223,220,223,218,217,218,217,220,219,218,219,220,221,219,218,218,217,218,218,218,221,221,220,220,220,218,220,220,221,223,221,222,220,222,224,223,224,225,225,227,225,225,228,227,230,229,229,231,231,234,233,235,236,239,239,238,240,240,237,238,237,236,237,239,237,234,235,232,230,229,229,229,228,228,227,231,231,232,234,234,237,235,237,237,240,243,242,244,242,241,239,238,240,242,243,242,244,245,246,246,244,244,246,235,229,241,244,245,248,244,234,238,247,244,240,239,245,246,246,247,246,247,246,246,246,246,246,246,245,246,246,245,245,244,246,246,244,242,244,244,244,244,246,245,246,246,245,244,243,244,244,244,244,244,244,245,243,243,240,239,237,237,237,239,239,232,229,217,210,207,180,195,238,214,241,250,252,236,192,225,249,217,203,219,229,240,215,252,202,160,149,117,74,55,214,244,250,246,221,240,247,243,177,214,248,245,252,252,218,147,159,161,122,114,156,211,227,223,252,252,234,240,212,234,251,239,252,247,177,120,142,169,175,188,203,150,148,72,63,24,32,90,27,69,57,36,34,35,46,44,23,93,220,234,191,212,237,213,234,242,128,24,90,198,225,148,139,136,140,159,165,217,174,112,155,243,209,240,192,85,187,238,239,241,247,224,127,28,2,9,12,88,223,240,249,249,253,253,253,210,112,45,1,12,24,27,34,30,39,51,41,39,45,43,45,42,44,51,57,62,69,74,87,103,111,110,98,77,63,60,45,50,57,50,53,61,76,86,100,118,129,128,125,139,146,138,128,117,93,70,61,55,46,39,35,34,33,30,31,29,31,27,24,25,17,23,19,17,24,30,35,33,34,36,30,23,21,13,22,11,32,96,141,152,143,146,148,154,155,155,157,160,151,136,138,153,166,121,53,32,33,28,23,18,16,14,13,16,18,22,23,23,26,26,33,32,35,42,48,56,58,61,62,64,69,63,63,61,59,62,55,53,47,48,54,58,66,71,77,78,72,73,66,57,55,48,34,72,70,27,157,239,248,232,150,111,63,31,34,18,70,210,241,241,229,144,104,83,69,33,21,15,14,10,19,9,92,234,227,143,101,45,58,196,235,235,241,184,118,75,51,31,39,8,72,203,209,229,234,225,228,222,226,223,224,224,222,225,225,224,221,222,227,225,226,225,224,224,225,227,224,224,222,228,225,222,226,223,224,222,222,223,219,221,221,221,223,220,221,116,3,1,5,8,9,9,11,9,11,12,11,11,219,220,222,218,223,220,221,221,223,222,220,222,222,221,221,222,222,223,224,227,226,229,229,226,229,227,230,229,229,231,229,229,226,228,229,225,226,224,221,221,218,216,216,216,213,214,216,214,217,218,217,216,218,217,215,214,217,219,217,219,217,216,216,215,216,214,216,218,215,218,219,216,220,216,216,217,218,219,219,220,219,221,220,223,225,222,224,225,225,226,228,226,226,225,224,224,225,227,226,229,228,229,230,227,229,229,228,227,229,229,230,230,231,229,226,224,225,223,221,221,222,222,222,220,220,221,222,223,224,226,227,226,227,231,230,232,229,230,231,229,230,229,230,232,231,229,226,224,224,223,223,223,221,221,222,222,222,221,222,225,222,222,223,221,222,224,225,222,221,219,219,221,219,218,218,220,217,219,221,222,222,221,221,217,217,216,218,217,220,218,215,219,220,221,220,220,219,218,219,219,221,221,220,220,225,224,224,223,223,225,227,227,226,229,228,229,230,229,232,235,236,236,235,236,237,236,237,236,235,236,235,235,232,232,231,229,229,229,227,227,228,226,229,228,230,232,232,235,234,234,235,239,241,240,240,241,240,240,238,239,240,241,243,243,244,243,243,244,244,244,241,227,225,238,241,241,246,239,229,240,246,243,238,241,245,245,244,244,245,246,247,247,245,245,247,247,247,246,245,246,245,244,244,244,244,243,244,244,244,243,244,245,245,245,244,244,245,245,245,245,245,244,245,243,242,242,240,239,236,236,235,235,237,235,224,214,199,196,211,240,252,203,218,230,222,206,196,229,248,214,201,218,223,237,214,252,194,149,134,108,70,53,210,244,250,242,219,238,246,238,181,224,250,250,248,232,187,144,167,182,187,160,181,237,247,225,246,252,220,228,187,197,239,238,252,239,179,146,141,163,192,171,206,146,151,136,101,37,37,111,58,75,61,36,36,35,38,46,140,221,236,224,186,210,240,211,233,234,122,22,41,139,196,187,217,188,167,201,188,195,159,99,182,232,194,252,163,83,198,235,242,236,243,250,203,127,53,3,3,25,57,145,198,216,203,163,110,60,37,15,41,60,53,64,61,66,65,69,75,76,80,84,86,91,99,91,91,93,86,76,63,63,57,50,48,53,55,64,73,85,103,117,137,141,139,134,127,107,80,67,53,44,39,28,29,28,31,27,20,23,25,25,19,14,19,26,25,30,28,31,35,19,17,21,23,36,34,35,25,18,25,33,39,38,19,21,14,47,111,139,155,158,143,150,148,150,145,148,160,155,141,132,141,157,158,101,40,23,29,46,49,29,21,13,15,16,16,24,24,20,20,24,22,21,20,16,19,19,22,27,31,43,44,46,60,69,80,91,94,89,83,76,67,60,57,53,48,49,54,60,68,75,69,74,67,54,59,56,180,237,248,211,130,75,39,34,32,38,23,22,142,198,148,99,79,70,67,66,55,37,11,15,12,125,234,241,227,135,95,55,19,100,198,228,248,234,165,104,64,30,46,10,77,206,208,232,235,224,228,222,227,224,224,224,225,226,225,226,225,224,226,225,224,227,227,225,226,225,225,224,226,226,225,225,224,223,222,223,221,218,220,222,219,220,222,217,218,115,4,1,4,8,10,9,10,10,11,11,11,12,220,224,220,220,221,216,218,220,220,220,221,220,219,218,222,222,222,224,224,228,227,226,229,227,228,229,228,227,227,226,227,226,222,224,223,220,221,221,222,222,219,219,215,216,219,217,214,214,217,215,217,217,215,216,213,217,217,215,218,216,215,214,215,215,214,215,216,215,217,216,217,219,217,218,221,220,218,218,218,219,222,221,221,223,222,224,224,227,227,224,223,225,225,225,226,224,229,228,229,227,228,230,228,229,229,227,229,230,229,231,227,230,230,228,228,227,224,221,222,220,223,224,223,222,220,219,221,220,220,224,223,223,227,227,228,231,229,233,232,233,230,230,231,230,230,230,226,224,222,220,223,219,218,222,219,220,222,224,224,223,224,223,223,222,222,224,222,220,223,222,223,221,222,221,221,220,220,223,222,221,224,222,219,220,218,218,217,218,219,217,220,218,219,218,219,220,217,219,218,221,223,223,222,222,225,224,224,224,225,225,226,225,224,226,227,228,229,230,231,232,234,232,234,234,234,236,236,235,235,234,234,232,230,229,229,227,227,229,227,227,227,227,230,230,230,234,234,236,237,235,237,237,240,239,240,240,237,239,237,238,241,240,243,243,243,242,243,245,245,244,240,225,229,242,241,244,245,233,232,242,245,241,238,244,245,244,246,245,246,247,246,246,246,246,247,245,245,247,246,246,246,245,244,244,245,242,244,244,243,245,245,245,246,246,245,245,245,245,245,245,244,244,244,245,243,242,240,237,237,235,235,237,239,240,218,208,208,217,228,253,194,78,131,198,236,232,223,243,247,209,203,219,222,232,215,252,188,146,128,101,63,52,214,244,250,242,220,246,252,239,194,229,230,205,189,189,200,185,238,248,241,173,159,232,247,226,238,251,223,196,145,187,241,247,252,226,176,165,160,181,206,154,193,133,165,185,156,76,55,125,66,89,65,45,39,41,30,61,216,246,219,198,177,210,239,208,234,237,130,66,54,103,165,219,252,210,177,130,173,237,167,165,200,184,153,210,122,69,200,228,244,240,239,249,252,243,210,134,73,113,54,7,14,5,10,8,10,17,45,67,74,92,105,113,120,113,115,110,97,103,100,82,67,61,59,55,51,53,52,53,62,65,80,90,108,129,136,144,137,131,110,94,75,52,43,35,32,27,23,29,28,19,15,12,15,25,25,23,18,25,29,29,26,14,24,40,35,37,37,33,35,26,19,30,41,44,35,19,19,15,16,22,31,53,33,38,51,97,196,181,161,161,155,158,151,150,148,158,158,149,135,133,143,164,155,82,29,12,16,40,60,54,36,27,14,19,31,27,25,18,13,19,23,23,28,22,16,13,17,24,19,22,23,22,24,27,28,35,47,55,69,78,81,87,90,83,80,76,66,64,57,55,51,57,61,69,59,50,171,208,176,159,99,70,43,31,34,33,36,39,16,31,76,126,137,71,74,100,96,153,196,214,245,246,253,253,236,173,104,36,5,12,15,55,198,228,199,128,71,45,46,8,82,206,211,233,234,225,226,226,226,225,226,223,227,227,226,227,227,228,229,226,229,228,228,227,227,228,226,229,225,225,223,226,227,221,225,222,222,219,219,223,221,222,225,220,218,115,4,1,4,9,10,9,10,10,12,12,11,11,214,220,218,218,219,217,216,217,220,219,218,217,218,221,220,221,220,222,225,224,224,225,226,226,227,224,226,223,223,227,223,224,222,221,222,219,220,216,218,219,218,214,215,218,214,215,215,213,215,212,215,214,213,215,212,213,213,213,213,215,210,212,215,212,213,213,213,214,213,212,215,213,215,215,218,216,217,217,218,220,219,223,222,223,224,222,224,224,225,223,223,223,223,224,225,228,226,227,227,228,227,227,228,226,226,226,226,229,228,227,227,227,227,224,225,224,222,219,220,221,222,224,223,221,218,221,222,219,220,222,223,224,224,224,225,226,226,228,228,227,228,228,227,227,228,229,227,222,222,219,216,218,220,220,221,222,220,223,224,223,223,222,223,221,225,222,220,219,219,221,222,224,221,220,222,221,220,216,218,220,218,220,219,217,217,217,219,217,217,218,217,217,215,215,215,214,217,217,220,218,220,222,221,221,222,222,222,220,221,222,222,222,223,225,223,223,226,226,226,226,228,227,228,231,232,233,234,231,232,235,234,232,229,229,228,226,227,226,227,226,226,226,226,229,233,233,234,234,233,235,236,234,236,236,236,238,236,237,235,236,238,239,241,240,243,242,243,245,244,244,235,221,231,243,241,245,240,226,236,244,244,237,237,245,244,246,245,246,246,246,246,246,246,245,246,244,244,244,244,244,244,244,244,244,245,243,243,242,242,242,244,246,246,246,244,245,245,246,245,245,243,243,244,242,241,241,238,236,235,234,232,235,237,240,214,208,213,222,217,202,125,13,103,220,251,244,220,235,244,203,206,218,219,229,215,252,193,160,141,110,61,46,214,244,251,248,228,250,252,219,165,179,181,187,210,227,247,214,229,241,250,182,140,228,249,230,230,239,167,179,197,205,245,250,236,197,165,178,160,192,210,155,201,134,182,187,148,83,42,138,81,87,81,50,42,49,24,70,231,246,197,176,177,212,240,208,232,246,169,160,111,88,149,228,250,178,112,103,212,198,110,152,207,177,154,190,71,76,201,214,226,217,229,242,252,252,251,227,177,183,110,54,29,18,19,29,45,46,46,46,50,46,49,53,53,53,52,53,49,49,93,47,53,58,63,67,77,88,103,120,122,129,139,131,110,90,69,55,42,35,28,19,22,14,14,12,14,22,26,29,26,24,17,12,22,27,23,19,25,37,30,32,29,27,39,43,42,45,43,33,25,22,39,46,46,48,34,26,21,16,24,24,41,52,60,87,105,170,210,171,165,158,159,163,158,158,154,163,165,149,134,135,146,164,140,65,24,24,23,44,64,62,60,37,15,28,33,29,18,14,15,13,16,19,30,27,22,18,17,29,27,28,27,29,24,24,22,15,16,21,21,24,34,35,46,59,65,78,83,90,98,93,90,79,69,59,57,51,50,36,50,79,77,76,65,60,54,50,48,41,33,37,22,54,101,75,53,42,136,225,243,244,249,249,247,247,244,222,130,47,6,11,14,76,220,235,202,143,91,47,37,6,102,211,219,238,229,225,227,222,226,223,225,226,226,227,227,228,226,228,229,229,226,227,228,225,229,225,226,226,224,225,224,224,222,223,222,222,222,223,219,220,222,222,224,220,220,116,3,1,5,8,9,9,12,10,11,12,12,12,214,217,214,216,219,215,215,216,218,218,219,218,218,221,221,219,221,220,221,223,224,224,224,224,225,227,225,224,225,224,225,227,224,225,224,221,223,218,217,214,213,217,215,212,213,213,213,214,215,214,212,215,214,213,213,214,212,211,214,212,214,212,212,212,212,214,214,213,214,211,213,213,213,214,216,216,217,218,219,217,221,220,221,224,222,224,224,223,225,223,223,224,223,224,226,226,230,228,229,229,227,227,224,227,226,226,224,227,226,224,223,225,222,221,221,221,223,219,222,222,223,223,222,220,219,222,222,220,220,222,220,222,226,223,225,224,222,227,228,227,227,228,227,229,230,227,224,222,223,220,221,224,224,223,221,221,223,225,222,223,226,222,224,222,221,224,223,224,225,225,226,224,224,221,221,221,220,220,219,218,219,218,217,217,217,218,218,219,217,217,219,215,216,217,216,215,215,219,218,216,218,220,220,221,222,221,221,222,220,221,222,221,223,224,224,224,225,226,226,227,229,227,228,229,232,233,232,231,231,232,233,235,232,230,227,224,226,224,226,228,227,225,225,227,228,231,232,233,234,235,238,238,239,237,237,237,240,240,237,237,238,240,240,240,241,243,244,244,245,243,230,221,236,241,240,244,231,225,239,244,241,235,240,244,244,245,245,245,244,245,244,245,245,245,247,245,245,244,244,244,245,245,245,244,244,243,243,245,244,244,245,244,244,244,245,245,245,245,244,245,244,244,242,243,242,239,239,236,236,234,236,236,237,242,214,210,210,198,176,223,142,16,149,237,248,208,190,232,242,203,206,217,220,225,217,252,191,163,145,124,71,45,215,243,250,250,223,228,193,154,151,189,221,234,245,248,252,210,189,201,232,178,149,228,252,210,163,202,182,208,233,223,251,252,201,163,159,162,153,184,197,182,226,142,186,172,133,72,35,132,83,95,68,53,49,47,24,74,243,243,207,180,184,220,244,212,226,249,179,222,179,79,147,230,217,156,111,142,245,168,59,159,236,202,184,172,92,139,212,188,196,196,208,219,228,245,253,229,187,184,92,58,59,57,62,66,84,84,86,83,88,85,79,77,74,74,76,85,86,91,93,118,135,127,134,122,113,108,90,78,68,47,40,37,27,27,24,22,22,23,23,21,19,12,12,10,14,23,27,30,33,32,18,12,16,19,15,15,37,42,27,27,38,46,50,52,51,45,43,34,23,38,51,52,54,54,45,29,33,48,53,48,51,55,97,131,155,239,216,160,165,159,162,162,160,160,158,170,164,147,140,138,152,169,123,51,38,39,26,36,54,64,71,55,36,33,31,24,24,22,13,17,11,21,32,30,30,19,20,28,24,33,33,30,30,27,24,19,23,24,22,21,20,20,23,18,20,21,27,39,53,70,83,98,114,118,105,86,74,53,49,55,51,57,68,76,75,73,72,68,63,55,48,40,43,44,39,39,27,69,101,100,94,78,113,167,214,227,198,172,160,180,238,242,248,240,177,141,99,40,17,9,139,222,229,237,227,229,226,226,227,224,227,229,227,227,231,230,227,229,230,229,229,230,231,229,227,228,226,227,226,227,225,225,224,225,224,225,229,224,223,223,224,225,226,223,222,116,3,1,5,8,9,9,12,10,11,12,12,12,212,217,213,214,215,213,217,214,217,214,216,217,218,218,217,221,220,220,220,221,222,223,224,222,223,222,225,223,222,225,224,226,224,225,224,223,224,219,219,216,216,216,213,215,212,214,215,213,216,212,214,214,213,218,214,216,214,213,214,215,213,211,213,212,213,214,212,214,214,212,215,213,214,213,215,215,215,218,218,218,216,217,219,220,222,220,223,222,223,222,226,226,224,227,224,226,225,226,228,225,225,227,225,225,226,226,227,227,224,223,224,222,221,222,224,222,224,222,226,224,222,222,220,220,217,217,217,217,218,219,219,222,222,222,224,224,225,226,229,229,228,227,226,228,227,224,223,217,221,222,222,226,222,224,223,222,223,224,223,224,224,224,224,222,224,224,225,224,223,225,224,223,222,220,220,220,221,221,218,220,217,218,220,219,222,218,218,218,217,219,216,215,216,217,218,217,217,215,217,217,218,216,217,219,218,220,220,221,222,223,223,223,223,223,223,222,224,225,227,227,227,225,225,227,226,229,229,230,230,228,231,232,231,231,226,223,226,225,225,227,227,227,227,227,230,230,231,233,234,235,236,236,237,237,235,237,239,237,238,237,237,237,237,238,240,241,240,242,245,236,223,224,237,241,241,243,224,227,243,242,238,234,244,244,243,244,244,245,244,244,244,244,245,244,244,244,244,244,244,243,243,244,243,243,243,244,244,246,245,244,244,244,244,244,244,245,243,243,243,244,242,243,245,243,242,240,238,237,237,237,237,237,239,244,218,199,192,204,219,252,143,49,169,221,232,215,207,237,244,198,209,218,216,222,219,252,183,151,125,105,71,46,211,243,250,208,152,188,187,183,202,225,249,249,252,241,252,204,177,188,207,164,136,221,223,184,189,231,199,229,248,217,252,240,173,160,170,158,154,186,188,216,245,132,180,163,148,84,19,118,76,90,71,44,44,48,22,76,235,245,221,177,188,221,239,213,221,245,171,230,216,101,146,218,186,130,132,195,252,185,118,199,244,189,218,217,131,188,209,199,211,192,184,185,199,219,232,183,128,78,10,2,6,8,8,10,21,36,48,65,79,78,79,77,77,83,83,78,80,78,61,69,61,59,54,48,46,45,57,77,71,26,15,25,18,20,14,12,15,18,20,32,32,18,15,12,12,16,23,45,44,40,34,22,24,15,15,20,36,46,29,19,39,47,46,52,48,40,28,27,44,54,57,58,51,50,46,33,34,49,58,57,57,56,119,132,181,234,198,160,159,147,165,161,162,165,166,178,165,148,142,145,160,164,103,48,53,49,37,23,25,56,70,68,54,36,34,30,38,33,24,19,17,27,35,33,32,29,16,17,25,32,37,36,29,32,32,27,29,27,34,30,19,17,25,22,15,16,15,24,24,23,30,34,43,60,68,83,91,97,93,82,71,63,59,53,53,58,66,74,78,79,77,76,69,61,62,54,45,38,39,42,42,36,38,27,19,96,199,230,250,250,252,252,243,195,142,107,41,26,14,64,223,232,236,236,227,230,228,226,228,226,227,227,231,232,229,229,230,232,229,229,226,230,230,226,227,226,228,230,228,227,226,226,228,228,228,227,225,226,225,226,226,224,225,223,224,115,3,0,4,9,10,9,10,10,12,11,12,12,212,214,213,212,213,214,213,212,215,214,214,215,214,214,217,214,216,218,220,222,220,221,223,222,220,221,220,221,222,224,225,223,222,225,222,222,225,220,220,219,219,222,219,220,220,218,216,213,214,212,215,214,214,213,214,214,213,215,215,215,214,214,214,212,214,212,213,213,214,211,211,216,213,214,216,214,217,216,215,217,219,217,221,222,217,221,222,222,222,221,222,222,225,226,224,225,224,224,224,221,224,223,224,226,224,224,224,225,224,224,222,222,222,223,223,222,225,223,225,224,223,221,219,219,219,216,215,217,217,220,219,222,225,222,222,225,227,227,225,224,226,226,224,225,224,223,221,219,221,223,225,224,224,224,222,222,222,224,225,225,224,222,225,224,225,226,224,226,225,223,221,218,218,220,220,221,220,220,219,217,218,218,221,221,222,221,221,219,219,219,219,219,218,220,217,218,215,216,219,218,221,217,217,218,218,217,222,223,222,221,223,222,222,223,221,223,222,224,224,221,224,225,225,225,226,226,227,228,229,229,231,230,231,230,227,223,225,225,224,226,224,225,228,228,230,229,232,231,232,233,234,233,235,236,236,237,237,237,237,236,236,237,236,239,239,239,238,240,241,231,215,223,239,237,243,236,222,236,245,241,238,238,244,245,244,244,245,245,244,245,244,245,245,244,245,244,244,244,245,244,244,243,244,244,243,244,243,245,245,245,244,244,244,244,243,241,243,244,243,243,243,242,242,242,241,239,239,237,239,238,238,238,239,243,211,197,205,224,237,252,114,55,195,234,251,232,229,249,243,199,211,217,212,223,223,252,188,154,125,108,71,55,203,204,181,184,183,239,226,214,230,227,246,242,240,235,252,212,174,200,226,177,133,212,244,237,226,248,193,207,238,182,218,213,159,174,189,178,196,208,200,229,231,116,164,188,163,105,33,91,72,89,68,47,47,52,20,74,233,241,217,152,171,218,236,214,216,243,173,214,230,113,134,178,131,162,167,221,241,132,162,240,236,175,221,212,147,205,216,214,214,196,201,199,190,193,194,132,95,87,70,81,69,57,46,30,20,16,32,39,32,29,27,23,29,28,22,25,17,25,24,18,36,36,38,32,36,71,119,142,81,24,24,18,15,14,11,14,19,28,24,24,35,34,33,18,13,16,27,45,41,49,50,44,41,31,20,15,24,27,25,24,34,46,45,39,41,35,19,39,55,54,56,58,47,33,30,26,25,46,59,68,54,68,136,134,211,252,177,156,150,150,163,163,165,165,174,186,165,145,146,150,170,164,84,56,76,66,60,36,40,62,67,73,63,42,37,35,34,34,24,19,23,36,30,30,34,33,30,17,24,31,30,36,35,37,36,32,29,29,41,37,22,16,16,22,18,16,27,36,31,29,27,19,18,14,18,25,33,45,61,69,79,95,93,90,82,72,65,59,61,59,61,72,76,81,77,72,71,69,59,56,61,51,57,47,46,37,20,104,206,227,239,212,172,132,110,67,35,9,74,221,245,245,235,236,232,231,231,230,229,230,231,231,233,233,228,229,232,230,229,229,229,227,229,231,228,226,229,227,229,228,228,229,229,231,229,230,228,226,228,226,228,226,225,225,222,116,5,1,4,9,10,9,10,10,12,12,13,12,210,213,211,211,214,212,213,209,213,213,214,212,213,215,213,214,213,216,218,219,221,219,220,219,221,220,220,221,220,223,222,221,221,224,222,220,222,220,220,220,222,221,219,221,219,219,216,213,214,214,214,214,212,214,214,214,214,213,214,214,212,211,214,211,209,211,211,208,210,210,213,212,211,213,214,212,214,217,216,216,217,217,219,217,220,218,217,219,221,222,224,222,221,221,221,225,223,222,220,219,218,221,220,219,222,221,223,223,220,221,222,220,221,219,220,221,221,220,222,219,217,218,217,218,217,218,217,220,217,219,221,221,222,221,221,220,222,222,223,222,222,224,220,221,221,222,222,219,224,223,222,224,222,222,221,220,221,223,222,221,221,223,224,224,222,223,222,225,224,220,220,217,220,220,223,223,222,221,219,221,218,221,221,220,223,222,223,222,217,221,221,221,219,216,216,214,216,215,215,216,217,217,217,217,216,218,218,219,220,222,222,220,223,222,222,222,224,223,223,225,223,222,223,226,225,225,228,229,230,228,228,228,229,229,228,227,224,222,224,226,223,224,225,225,227,226,228,229,232,234,234,233,233,235,234,235,236,236,235,234,233,235,236,236,237,237,236,236,236,222,211,224,234,237,240,226,222,240,244,238,234,241,245,244,245,243,244,243,243,243,244,242,244,242,242,243,242,243,242,243,244,242,243,243,244,242,242,243,244,243,244,242,242,242,241,242,242,242,242,243,242,243,241,240,241,238,237,238,237,235,237,235,241,239,215,193,209,223,199,228,96,97,239,246,252,233,219,242,238,196,212,215,209,219,231,252,198,178,146,120,95,66,176,209,224,227,216,250,226,212,225,219,240,231,238,228,250,205,169,203,243,218,152,235,251,251,221,178,150,192,209,177,206,187,156,187,204,181,208,218,183,200,204,113,182,174,159,147,74,95,68,95,70,48,53,50,18,71,230,245,207,124,171,223,236,215,211,244,165,184,205,127,112,78,98,165,200,217,146,135,199,248,219,150,198,167,126,198,206,214,221,214,216,206,200,214,178,152,183,198,228,241,241,247,245,244,239,217,149,73,55,51,46,50,42,37,35,27,31,36,34,40,60,53,39,16,53,127,161,162,70,23,26,14,11,11,9,21,43,46,39,26,21,37,44,41,28,15,19,19,40,51,47,53,46,44,39,29,23,21,13,24,49,54,46,32,23,22,44,53,55,47,45,56,44,30,12,24,44,56,56,60,51,47,108,147,234,234,159,152,145,150,167,160,161,161,179,185,156,140,149,159,170,142,61,39,56,76,69,38,42,74,71,76,66,37,34,30,36,27,22,16,17,24,23,36,37,35,32,22,21,17,25,40,27,33,35,22,19,29,35,31,25,17,16,12,19,30,41,44,39,32,23,18,14,15,16,17,19,20,25,26,36,42,53,69,81,95,104,103,93,85,83,77,68,66,60,62,63,66,69,69,68,71,77,74,63,63,42,16,24,24,38,53,50,53,64,27,56,189,243,243,250,236,235,235,231,229,228,231,231,231,235,233,232,232,229,231,228,231,231,228,228,228,230,229,230,230,230,229,228,230,229,229,230,228,229,229,227,229,227,224,227,228,226,225,224,116,3,1,5,9,9,9,11,9,12,12,12,12,210,212,210,212,211,210,212,208,212,211,211,211,213,213,214,212,215,216,217,218,220,219,218,219,218,219,218,221,219,218,223,220,220,221,219,219,221,220,218,220,219,218,218,220,216,215,216,214,215,213,215,215,215,215,217,215,214,217,214,215,213,213,214,211,212,208,208,211,211,211,211,212,207,211,213,211,212,213,214,214,215,214,213,216,216,217,218,218,221,220,223,220,219,222,219,222,222,220,218,218,220,218,219,221,220,220,222,220,221,219,220,220,218,218,216,218,217,216,216,216,217,214,218,218,217,219,220,220,218,220,220,220,221,219,220,221,220,222,223,222,224,226,222,222,220,219,219,220,222,221,222,222,223,224,223,223,223,221,219,219,219,219,221,222,222,221,221,222,222,219,218,219,221,223,223,227,223,221,222,223,222,221,223,222,223,224,223,221,220,220,221,219,215,217,214,214,216,217,218,215,219,218,217,219,220,218,219,218,219,217,220,221,220,222,221,223,222,223,223,220,224,223,219,224,225,226,228,226,227,224,227,227,226,227,227,228,227,224,225,224,224,225,224,225,230,228,228,229,230,233,233,233,232,233,234,232,232,234,233,232,234,234,235,235,234,235,235,237,231,215,210,227,234,236,235,218,227,242,241,234,236,243,242,242,241,242,242,242,244,243,244,242,242,243,242,244,243,242,243,243,244,242,243,242,242,242,241,242,241,243,244,242,241,243,242,241,242,241,243,241,242,243,242,241,242,240,239,238,237,235,237,234,241,235,217,194,193,195,210,249,95,118,247,247,251,189,198,238,233,197,214,218,208,225,237,252,186,146,118,108,75,68,220,245,251,244,223,251,207,212,226,213,239,229,235,231,246,225,181,185,220,200,143,217,251,214,170,160,178,224,244,213,191,183,166,200,199,151,203,203,160,176,193,120,171,182,126,174,157,131,94,90,69,47,47,56,18,67,233,247,210,131,177,229,239,216,209,245,175,162,181,158,117,58,88,181,194,192,151,145,224,246,209,149,174,115,117,218,214,225,225,210,213,215,220,230,179,158,225,241,242,243,251,252,253,253,250,210,162,132,131,129,125,118,111,95,82,80,93,90,85,96,89,53,31,21,94,136,147,144,55,27,24,12,15,12,10,33,54,45,54,42,18,23,40,50,47,36,27,28,32,49,52,49,48,46,46,45,41,36,27,46,62,46,51,45,39,49,51,57,50,38,23,30,38,19,15,35,58,60,55,49,38,19,55,164,251,217,145,148,141,157,160,159,162,167,191,177,146,146,157,165,174,124,40,21,31,58,67,35,36,62,79,77,48,23,25,31,33,34,34,19,14,15,22,32,32,34,37,31,21,18,20,23,27,30,30,22,12,23,27,19,20,23,30,40,43,51,55,49,46,32,20,14,17,12,23,33,29,33,26,29,23,23,27,22,28,39,57,73,78,96,123,146,152,152,134,126,109,93,82,66,63,59,61,59,60,57,55,61,47,42,44,46,47,52,41,11,116,223,250,250,243,235,238,234,233,229,230,231,235,234,235,236,233,231,232,232,231,232,231,230,230,231,230,229,229,229,230,227,231,232,230,228,229,229,227,228,223,228,227,224,226,226,226,224,224,116,4,1,6,9,9,9,12,10,11,12,12,12,206,214,212,210,210,208,209,209,211,210,210,210,212,211,212,214,213,216,218,215,217,218,218,215,217,220,219,220,218,220,220,219,220,220,218,219,221,219,221,220,220,220,218,218,218,216,213,216,214,215,216,214,216,216,213,213,214,215,213,216,216,215,216,214,213,214,214,212,212,211,212,214,212,210,212,212,213,213,212,214,214,214,215,214,215,216,220,221,219,218,218,217,216,216,217,217,216,218,219,220,218,218,219,218,219,220,219,218,217,218,219,217,216,217,215,218,216,212,215,215,215,216,215,217,217,217,218,218,220,220,219,220,217,217,219,222,222,218,221,221,221,223,221,223,221,217,219,220,221,222,220,219,221,223,222,223,224,223,221,222,220,218,221,221,220,221,219,222,221,220,222,221,221,221,223,222,222,222,224,225,222,222,220,219,221,221,222,224,222,223,221,218,218,217,214,215,219,217,218,219,220,220,220,220,221,220,218,218,217,219,218,218,220,218,221,222,222,225,222,222,223,222,223,223,221,224,225,222,224,224,225,224,226,224,224,226,225,226,225,222,221,222,224,226,228,229,230,229,231,233,232,230,231,233,232,232,232,232,232,234,234,232,234,231,233,233,235,237,229,214,215,232,235,237,226,216,235,242,237,232,236,244,243,241,242,241,241,242,241,241,242,241,243,242,241,242,241,242,242,243,242,241,241,242,242,240,241,242,241,243,241,240,240,240,240,241,241,241,241,242,242,241,240,241,242,240,240,239,239,235,237,235,244,231,214,183,194,227,240,249,104,115,233,221,227,196,209,243,235,203,226,222,211,217,216,209,131,126,104,93,68,57,216,245,252,236,216,249,199,215,228,210,238,230,236,229,243,234,198,184,183,164,111,155,201,220,222,178,198,246,252,230,193,191,172,208,185,146,210,208,179,174,196,136,187,138,71,207,198,161,125,97,61,36,44,57,19,64,226,248,193,101,160,216,235,213,210,243,190,156,181,218,159,78,121,145,185,213,98,159,237,239,201,185,190,97,170,246,222,223,225,228,224,207,213,224,169,200,239,224,224,222,232,227,211,154,106,89,90,108,133,128,113,113,110,133,146,139,128,105,114,150,125,68,29,24,114,139,145,130,42,25,29,25,17,12,13,19,49,55,58,39,22,13,23,46,50,54,48,39,42,42,48,52,47,50,48,47,53,45,46,50,49,48,54,50,54,53,50,52,40,34,27,43,47,35,45,55,71,77,76,81,77,50,94,206,234,202,145,141,145,156,165,159,166,180,191,162,137,147,160,174,169,110,47,41,41,48,57,44,33,49,72,60,39,15,16,22,30,39,34,26,17,14,21,27,20,26,35,36,29,22,17,21,32,36,32,25,17,12,19,33,43,44,49,53,53,56,54,47,41,28,21,16,22,35,42,44,40,31,27,21,15,19,18,25,24,22,26,51,77,67,61,76,98,134,145,145,156,159,157,148,141,136,129,118,103,100,97,90,89,74,60,46,27,16,22,7,99,222,247,248,238,232,235,231,233,231,233,235,235,234,236,235,233,235,233,233,231,232,232,231,232,232,230,231,231,229,229,230,230,231,229,230,230,227,229,227,225,226,226,225,225,226,226,226,223,115,3,0,5,9,10,9,10,10,12,12,11,11,205,210,208,209,210,208,210,207,212,212,213,210,211,210,210,212,213,212,212,216,216,217,217,217,217,217,220,219,219,217,221,219,218,220,220,219,219,220,218,217,217,218,216,215,215,215,217,216,215,214,215,216,215,213,212,212,211,213,214,212,213,214,215,212,216,215,215,216,212,212,212,212,211,212,211,213,213,210,212,213,214,214,215,216,214,215,214,216,218,215,215,215,214,215,214,214,216,216,216,220,217,220,219,220,221,217,220,218,220,217,220,217,216,219,215,215,213,214,214,214,216,214,216,215,216,219,217,217,217,218,218,217,219,217,215,218,218,218,218,219,220,221,221,220,220,218,222,223,221,222,218,221,221,221,223,219,220,220,222,222,220,221,219,219,220,221,222,219,219,221,222,223,223,223,222,221,223,222,220,222,222,219,219,219,218,219,223,223,222,222,223,222,217,218,218,218,218,220,222,223,222,221,221,221,220,218,219,219,218,216,218,217,217,222,220,221,221,222,223,220,224,224,222,223,223,222,222,222,223,223,227,225,227,232,227,227,225,226,226,222,224,224,222,223,228,230,230,231,229,230,231,232,231,231,234,231,233,234,231,232,234,233,232,232,232,232,235,234,224,211,220,234,236,238,219,224,240,241,234,228,242,243,242,243,240,241,242,242,240,240,242,242,243,244,242,242,241,241,242,241,241,242,242,242,242,242,241,241,243,245,243,242,240,241,241,242,243,241,243,241,242,240,240,241,240,241,240,241,240,238,239,237,244,227,220,194,215,235,235,250,89,96,203,220,247,219,237,249,239,213,227,209,174,177,188,215,161,149,125,101,63,60,216,243,249,231,218,243,191,222,226,212,236,227,234,230,239,235,196,180,199,178,113,147,220,243,250,177,188,246,252,229,182,179,171,214,189,184,226,208,202,159,184,151,184,106,21,221,199,160,167,113,69,21,33,56,17,66,227,248,182,83,132,194,233,218,210,244,191,151,185,247,199,94,76,132,210,156,72,171,240,233,199,196,194,142,199,239,221,219,221,223,229,226,229,213,163,200,231,227,239,237,232,197,127,79,49,62,77,93,87,48,39,35,45,66,98,114,85,39,32,103,116,89,39,41,135,127,148,120,29,18,28,42,34,18,16,30,54,61,61,32,13,13,20,48,56,43,32,32,34,42,50,47,52,48,50,49,50,54,48,56,61,68,70,69,77,77,83,92,97,94,101,111,110,111,121,122,119,124,127,135,117,84,152,248,249,190,147,149,144,160,160,159,171,186,188,150,143,153,166,179,159,96,59,65,57,65,66,66,70,61,61,60,55,51,47,48,48,47,45,34,27,27,31,20,15,19,28,38,36,35,31,33,36,36,37,32,24,27,37,50,54,53,54,57,55,52,49,36,25,22,25,40,42,43,39,35,37,38,34,19,16,15,17,16,29,28,38,93,114,98,68,43,27,25,33,43,47,53,61,66,75,88,104,107,113,112,110,95,80,69,39,27,22,16,36,111,221,241,242,243,238,235,231,231,233,233,231,233,235,236,235,234,235,236,233,233,234,234,232,233,232,231,234,233,232,230,232,229,230,229,227,232,231,229,228,230,227,226,226,227,228,227,227,225,223,116,4,1,5,10,10,9,10,10,11,12,11,12,205,208,205,207,206,207,207,206,209,210,207,207,210,209,210,210,210,211,212,214,216,214,217,214,215,216,212,216,217,214,214,215,215,216,214,215,215,214,216,215,214,212,212,212,215,215,215,218,214,212,214,214,216,214,212,213,210,210,210,208,209,209,211,212,211,213,212,211,214,212,211,209,210,212,211,211,210,212,211,211,212,210,212,211,214,214,214,213,211,214,214,215,214,211,214,214,214,216,213,215,215,217,220,217,219,218,217,219,217,218,216,215,218,215,214,216,215,213,214,214,211,215,215,216,216,215,215,214,218,216,215,214,215,217,213,216,217,217,223,220,222,222,219,221,220,220,221,220,218,217,215,218,220,222,220,219,219,220,218,219,218,215,217,214,217,219,218,218,217,220,219,222,220,218,219,218,218,218,219,217,216,217,218,219,217,219,219,219,219,218,219,221,218,218,217,215,217,216,218,219,220,221,219,220,220,216,217,218,216,219,218,217,220,219,223,222,219,222,222,223,222,220,221,222,223,224,223,222,220,223,224,224,227,226,230,229,224,225,223,222,223,220,222,225,226,227,226,225,226,229,231,231,230,231,232,230,230,231,232,232,229,232,233,232,230,229,233,229,217,208,224,235,239,232,217,232,244,239,229,232,240,240,240,240,241,240,239,239,241,241,241,240,241,241,241,242,240,241,241,241,242,241,243,242,242,242,242,243,242,242,243,242,240,242,240,240,241,240,240,240,240,241,240,240,240,237,239,238,239,239,239,238,241,225,217,200,209,187,207,234,89,123,232,243,252,221,223,241,224,190,198,179,170,202,230,251,187,172,142,125,81,62,220,243,244,227,218,232,185,226,225,213,235,224,231,229,234,232,186,184,198,187,165,174,229,248,250,164,185,243,235,190,131,156,187,217,184,194,220,204,193,128,171,149,164,88,24,225,191,136,174,144,92,20,19,51,17,72,230,247,177,93,157,200,239,220,206,244,189,139,195,246,207,102,81,170,219,141,74,178,234,220,193,190,163,129,203,228,219,214,215,222,225,222,240,211,158,206,209,212,222,236,239,148,105,72,56,77,79,50,29,33,42,56,55,39,54,96,92,58,16,57,109,109,52,64,138,119,147,102,23,14,27,58,54,50,49,51,60,57,48,23,11,9,20,46,46,30,22,25,30,46,55,60,68,76,84,93,112,117,125,136,134,137,129,117,118,107,113,116,109,109,100,101,100,104,118,113,117,109,117,128,99,93,195,250,234,176,144,150,150,156,161,166,181,186,169,136,141,156,165,180,140,78,55,47,43,46,51,54,56,59,57,56,51,57,63,57,64,62,63,63,60,59,50,48,42,46,42,51,49,41,39,36,35,37,38,42,37,37,41,50,52,55,54,53,54,48,41,33,37,41,43,52,43,35,27,36,47,42,44,37,21,15,15,16,33,35,52,112,131,110,89,49,27,27,42,38,29,19,26,28,22,26,28,37,40,44,50,55,51,60,83,104,150,208,244,244,252,252,241,244,237,236,232,230,233,232,233,234,233,233,232,231,232,232,233,233,234,233,232,232,233,233,233,233,229,230,231,229,230,231,230,229,228,228,227,225,227,227,222,222,225,228,226,223,223,116,4,1,5,8,9,9,11,9,11,12,11,11,204,209,205,206,207,206,206,204,204,204,208,207,208,208,208,210,211,212,210,213,212,213,213,213,214,214,215,214,214,214,214,213,214,213,214,216,214,216,214,217,214,213,214,215,217,213,214,214,214,213,212,210,212,213,210,212,210,210,211,207,207,208,207,207,212,210,209,209,210,212,212,212,211,212,213,213,210,211,212,208,210,210,213,212,214,213,211,213,212,212,211,211,212,213,214,214,215,214,214,217,214,217,215,215,220,214,217,217,215,215,214,215,214,216,216,214,214,214,214,214,214,213,216,215,214,215,215,214,213,216,217,216,215,214,217,217,217,218,219,218,221,221,217,218,219,219,217,217,218,217,217,220,221,218,222,222,220,218,217,217,214,217,215,217,217,217,218,217,219,218,216,219,218,216,217,218,216,215,216,216,217,218,217,217,217,215,215,214,214,218,219,216,220,220,217,216,214,217,215,216,217,217,220,220,220,219,220,218,218,219,218,218,218,220,220,218,221,221,222,221,222,221,220,221,222,221,222,224,223,223,224,224,226,225,224,224,223,224,222,219,221,221,224,226,225,226,228,229,230,230,231,232,230,231,232,231,232,232,232,231,229,230,230,230,228,230,231,224,214,214,231,239,236,221,218,239,244,238,231,236,242,240,241,241,240,241,241,240,240,238,239,239,240,240,240,240,240,242,241,241,241,241,242,241,241,243,243,242,242,243,242,242,240,241,241,241,241,240,240,240,240,240,241,239,240,240,239,240,237,239,236,238,243,217,210,177,186,201,235,242,97,152,246,245,232,180,201,208,201,192,216,204,197,229,245,252,187,170,149,129,84,69,222,243,245,223,220,225,189,235,222,215,234,225,231,228,231,234,205,196,165,162,188,182,230,250,248,154,165,218,187,165,143,177,207,210,171,189,203,205,203,145,184,150,140,65,70,250,165,84,137,160,143,58,12,29,11,70,231,248,174,115,201,208,236,219,207,249,185,155,208,205,143,77,107,183,185,134,123,199,230,210,190,158,123,150,213,229,227,212,213,220,224,222,237,194,168,230,225,214,206,231,183,107,107,69,73,87,44,33,36,61,95,108,115,84,56,103,103,71,30,39,109,125,70,95,129,112,145,78,16,19,8,41,59,57,61,56,58,47,36,28,24,28,44,67,70,76,88,99,111,122,122,131,141,138,135,127,129,122,116,118,111,117,106,107,105,96,106,109,111,110,112,107,116,113,110,116,101,103,93,92,54,99,231,250,247,162,140,151,151,159,162,174,185,178,150,135,150,163,181,175,112,63,46,39,41,50,55,56,58,58,60,59,59,62,53,51,50,50,49,50,53,47,50,54,55,63,61,63,63,61,59,53,53,47,47,42,41,44,44,48,49,53,52,57,60,54,46,41,46,49,50,55,36,21,24,33,49,48,49,35,24,16,14,21,38,38,57,106,114,115,87,59,29,27,65,73,66,62,66,58,48,46,48,53,56,66,58,72,169,238,251,251,252,252,252,252,252,252,241,240,238,234,234,234,233,234,236,236,235,234,235,234,231,233,233,233,232,232,234,232,233,233,233,232,231,233,232,230,231,232,229,229,229,229,227,227,227,226,228,227,226,225,226,226,223,116,4,1,5,8,9,9,10,9,11,12,11,11,200,206,206,206,205,202,204,205,205,206,207,207,207,206,207,206,206,208,209,210,209,208,211,209,211,212,210,211,212,212,213,214,214,214,212,211,213,212,213,215,213,214,214,212,214,213,209,209,210,211,212,208,208,207,206,206,207,208,208,206,208,208,208,210,210,208,208,209,209,208,210,210,210,209,207,210,208,210,209,211,210,208,210,212,212,210,211,212,212,211,211,212,212,212,212,210,212,213,212,213,214,214,214,214,214,215,214,214,214,214,214,214,215,215,215,215,213,211,211,213,213,212,212,212,212,214,214,214,215,213,215,214,217,217,215,217,215,216,217,217,217,218,218,218,219,218,217,216,216,217,218,220,217,219,219,216,214,214,215,215,216,216,215,215,216,214,216,214,215,219,214,214,216,215,214,215,215,214,214,213,215,218,216,218,216,217,213,213,215,214,214,215,215,214,217,215,214,214,213,214,214,217,218,217,217,217,218,217,217,217,219,220,218,219,218,218,216,220,222,220,220,221,222,220,219,220,221,221,219,221,220,220,222,223,222,220,222,223,222,221,222,221,223,226,225,225,227,229,229,230,230,232,230,229,232,229,230,231,230,230,229,230,230,229,228,229,229,222,211,218,233,237,234,216,225,240,240,231,229,241,240,241,241,239,241,239,239,238,239,238,238,238,239,239,239,240,240,240,240,240,241,241,241,241,241,241,240,240,241,239,239,241,240,240,239,240,242,240,240,239,238,240,239,238,238,237,237,237,236,236,234,239,237,219,201,191,214,223,250,238,96,136,214,212,203,169,200,226,218,211,240,214,200,229,240,251,177,164,136,110,72,66,218,242,242,221,224,218,194,241,221,217,231,225,230,226,231,235,211,229,174,143,185,161,201,246,222,128,170,219,189,210,181,184,209,197,179,189,196,207,214,179,212,158,102,78,148,249,146,48,97,134,168,128,55,36,8,37,194,246,174,128,202,193,227,218,210,252,166,149,194,123,118,113,97,148,170,132,151,227,222,207,196,147,107,169,238,230,229,216,210,219,222,223,236,178,179,235,225,236,229,240,159,96,115,73,89,76,39,24,80,130,128,117,91,117,113,109,118,84,37,41,97,125,85,106,124,108,130,61,11,14,5,15,33,55,57,62,66,70,80,77,125,89,157,143,134,138,119,135,133,132,128,129,128,118,109,112,113,109,106,90,84,87,93,95,78,67,79,66,61,59,51,46,43,39,44,40,58,60,24,36,6,119,238,250,234,148,138,152,148,162,168,178,183,166,137,139,155,169,179,166,82,33,27,17,22,17,27,25,30,36,28,34,37,38,38,44,43,44,53,49,56,55,56,56,48,56,53,53,54,53,57,63,61,65,67,61,61,69,66,65,58,60,58,57,59,37,34,33,24,35,56,51,30,22,18,31,49,54,49,31,24,19,17,32,50,41,54,109,114,125,99,56,30,46,109,130,141,147,149,131,117,119,133,144,147,149,138,123,181,243,251,251,252,252,249,249,246,240,237,241,236,238,237,233,235,235,237,238,237,236,236,235,235,234,233,234,233,232,233,233,233,233,232,232,232,232,232,230,232,233,231,231,229,229,228,227,227,227,226,226,227,226,228,226,223,116,2,0,4,8,9,8,10,10,11,11,12,12,201,205,201,205,205,203,204,204,204,206,208,203,206,205,206,207,207,207,206,210,206,209,206,205,210,211,210,210,208,210,210,209,212,210,212,210,210,213,211,212,212,213,211,210,213,208,209,210,210,210,208,208,207,207,206,208,210,207,209,208,206,207,209,208,210,208,208,211,211,209,210,211,209,208,207,209,208,208,210,210,210,213,208,209,210,211,210,212,214,213,213,213,213,211,212,213,212,213,212,214,214,215,215,214,214,213,214,214,211,215,213,215,214,213,216,211,215,214,211,212,211,211,214,212,214,212,214,215,213,214,214,215,215,216,218,217,216,216,217,216,215,216,216,219,218,215,217,216,217,216,219,218,217,217,217,216,214,214,217,216,216,218,216,214,214,215,214,215,215,215,215,215,216,213,212,215,214,215,216,215,216,214,214,217,216,216,217,216,214,214,214,214,212,213,215,215,215,215,215,216,218,216,216,218,217,218,220,218,219,218,222,221,221,221,218,217,220,219,218,218,219,218,219,220,219,219,219,220,217,218,222,219,221,220,221,222,218,221,221,221,223,222,222,225,226,226,227,226,228,230,230,230,229,226,229,229,228,227,228,230,229,228,229,226,228,234,229,220,212,221,234,239,226,214,233,239,235,225,232,240,239,242,240,240,238,239,239,238,240,239,239,236,237,239,238,239,238,239,240,239,241,240,241,240,240,241,240,241,241,239,239,240,238,240,240,240,242,239,240,240,237,239,238,238,239,237,237,236,234,237,231,242,239,224,226,220,233,217,245,194,96,161,200,213,222,207,234,240,233,219,242,209,192,229,236,249,172,155,127,101,63,61,215,240,242,220,224,212,199,242,218,214,229,227,229,225,229,232,207,214,155,151,196,133,150,188,200,175,203,227,206,223,173,177,212,192,186,186,193,207,212,196,219,163,80,103,203,250,141,37,67,91,132,171,139,63,6,24,167,245,168,120,215,185,220,218,210,248,162,159,198,160,190,147,81,119,156,155,198,244,214,201,213,150,98,187,238,229,234,218,208,217,222,228,229,166,193,236,225,233,233,252,141,87,112,74,100,63,19,47,118,133,90,59,19,33,88,126,123,81,43,40,99,115,89,120,114,120,120,44,34,46,49,71,86,114,120,122,133,128,124,122,131,131,121,110,109,117,127,136,134,133,122,112,101,78,63,56,56,52,53,52,41,39,28,36,53,54,62,60,50,39,31,29,30,30,35,29,24,27,23,23,12,159,252,252,227,136,148,143,154,167,175,190,172,147,133,147,161,173,181,139,63,33,28,15,27,33,19,28,24,30,28,17,18,19,17,17,16,18,22,17,25,35,30,35,38,45,55,56,57,58,56,58,57,57,56,57,58,59,72,80,85,84,73,75,65,56,42,27,29,29,48,46,25,16,14,25,42,54,46,39,31,22,39,48,48,41,56,113,118,127,108,60,42,106,152,139,110,83,109,128,140,136,127,116,104,123,123,124,107,108,198,237,248,245,237,238,238,238,236,239,237,238,238,236,235,235,236,236,236,238,238,236,234,235,235,234,233,232,234,235,233,235,233,233,233,231,231,230,230,232,231,231,228,225,226,227,226,225,226,226,226,226,228,226,222,117,4,1,4,8,10,9,10,10,11,11,11,11,200,204,202,202,205,199,201,204,203,204,204,204,206,206,205,205,207,207,207,206,208,205,205,206,207,208,209,208,209,209,207,210,208,209,209,208,211,210,210,211,210,210,209,210,208,209,210,209,209,207,208,206,206,208,210,211,207,208,208,205,207,206,206,206,208,207,207,205,206,208,208,206,208,209,206,209,208,209,208,208,208,209,209,208,210,210,210,210,212,210,212,213,209,209,211,210,212,212,213,214,213,213,212,213,212,212,214,212,214,212,212,212,211,212,213,212,211,212,214,212,211,211,213,212,210,212,214,211,210,211,213,215,214,216,217,215,218,216,216,219,217,217,214,213,217,216,216,216,217,218,218,217,216,215,216,216,215,215,217,212,214,217,212,214,212,213,215,213,214,214,214,214,214,215,214,214,216,214,215,216,214,214,214,215,214,217,216,214,215,214,214,214,213,215,217,215,214,215,214,215,218,217,218,217,216,219,217,217,220,219,221,220,220,221,217,218,214,215,217,214,216,215,217,217,218,216,215,219,217,220,220,219,220,217,217,217,218,218,218,220,222,220,225,225,224,224,224,227,227,229,228,229,225,224,228,226,227,224,226,228,226,228,226,227,227,230,224,212,211,225,236,231,215,217,233,235,227,223,235,238,238,239,236,236,237,235,234,237,237,237,237,237,237,235,237,236,236,237,236,239,237,238,240,239,241,239,239,238,237,238,239,240,239,238,239,239,239,238,239,239,237,238,238,236,236,236,235,236,233,234,236,249,245,252,237,231,211,169,203,149,103,196,234,243,237,197,225,243,222,210,236,200,200,234,237,249,173,164,137,105,61,66,223,243,241,226,221,194,199,240,214,215,227,224,227,224,226,229,190,198,142,141,210,149,150,199,220,190,189,209,209,219,172,188,217,184,198,184,197,202,212,196,193,141,56,152,234,250,155,47,81,66,89,136,168,134,28,21,177,245,171,124,222,197,212,216,215,226,139,194,236,184,174,127,132,145,136,173,225,249,207,202,207,145,110,182,229,226,230,223,211,215,219,231,218,157,207,233,223,234,229,252,123,78,97,73,103,49,22,50,130,123,65,74,48,29,71,125,119,70,35,47,105,107,92,121,112,117,120,104,136,138,142,146,138,125,125,125,120,125,117,134,130,96,111,95,79,83,82,79,67,57,53,38,40,53,47,37,26,28,53,66,59,29,19,55,78,76,87,90,81,56,32,32,33,31,33,30,29,26,30,11,71,213,251,251,208,137,150,146,156,173,178,178,159,136,138,152,163,174,168,113,47,34,29,28,40,34,37,36,32,30,18,21,19,15,24,16,13,18,19,14,19,17,13,19,17,16,20,23,25,24,40,46,51,58,61,63,62,63,71,74,70,71,70,75,83,80,76,67,61,59,61,55,36,24,21,24,37,52,49,48,53,47,51,49,46,29,54,113,123,127,105,49,73,146,157,99,55,41,70,106,107,81,57,49,46,49,51,60,87,78,113,216,243,242,241,234,237,238,236,236,237,237,236,237,238,236,236,239,238,237,237,236,236,233,232,234,231,232,234,235,232,233,233,232,233,231,232,232,229,229,229,227,225,227,225,226,225,227,226,224,227,226,226,225,223,117,3,1,5,8,9,8,10,10,11,12,12,12,200,204,201,202,200,200,201,203,200,200,203,200,203,203,202,203,205,202,205,207,205,207,205,207,208,208,206,206,210,208,207,207,205,207,207,206,207,206,207,208,207,208,208,210,208,207,209,208,207,207,207,207,207,209,206,206,207,205,207,206,204,205,206,204,203,204,205,207,204,204,205,205,206,205,206,206,207,210,207,208,207,208,207,210,211,211,211,209,211,209,210,210,210,211,213,212,210,212,211,210,211,211,211,213,214,211,214,214,213,214,212,212,212,210,210,209,213,210,210,212,212,212,212,210,212,213,213,211,211,214,214,216,217,217,215,214,216,218,219,216,216,216,215,216,218,216,217,219,217,215,217,216,215,214,216,216,216,214,216,216,214,212,212,213,214,212,212,213,213,213,215,214,212,214,213,216,214,212,214,213,214,213,214,216,213,212,213,214,215,214,213,214,214,216,216,212,214,215,214,214,217,218,215,218,217,217,217,218,218,218,220,219,218,216,215,214,215,216,214,217,218,215,217,217,217,218,216,219,218,216,219,216,219,217,215,217,217,220,221,218,219,222,221,224,221,220,227,226,225,225,227,226,226,227,227,227,225,228,227,226,227,226,228,224,228,231,220,212,213,225,232,224,211,224,236,231,224,227,237,235,237,238,236,237,235,235,234,233,235,235,236,235,235,235,234,237,237,237,238,239,239,238,239,239,239,237,237,240,239,237,238,239,238,239,239,238,238,235,237,237,234,236,237,236,236,235,236,235,236,239,242,252,233,200,181,183,169,155,211,160,113,195,227,236,213,193,226,238,219,201,235,199,204,237,234,249,173,168,144,120,71,66,221,244,242,227,213,181,200,238,216,215,227,226,225,226,226,233,208,217,150,160,227,188,210,206,231,199,146,204,218,200,172,174,193,181,204,177,193,197,216,188,160,103,58,203,250,251,177,63,94,93,57,74,130,191,112,77,201,247,176,107,193,190,211,209,212,207,147,198,222,154,112,154,192,115,93,175,238,248,203,206,185,124,120,185,226,226,229,229,213,216,223,239,206,159,223,232,227,234,234,252,124,69,85,78,117,52,18,48,129,113,69,107,106,120,124,125,92,45,29,66,120,107,99,124,101,125,118,108,151,137,142,137,138,146,128,119,99,90,93,84,67,53,53,49,38,29,19,33,57,65,50,19,29,72,82,71,44,27,71,95,82,37,49,95,90,57,75,118,108,77,40,29,33,34,30,29,30,27,36,14,148,247,249,249,190,135,156,143,170,179,178,165,131,126,146,154,165,175,152,79,36,33,27,28,32,38,29,29,23,22,21,22,22,21,21,21,24,16,22,20,19,19,20,21,21,24,13,18,18,17,47,53,46,32,23,30,36,39,51,64,81,87,79,81,72,71,72,74,80,85,85,80,69,62,59,51,46,42,53,51,52,47,43,39,34,31,48,111,118,120,108,47,80,151,121,75,40,52,97,96,82,70,76,81,66,57,45,29,36,60,80,165,236,240,244,237,238,239,237,239,237,238,238,236,238,236,236,238,238,237,238,236,236,235,235,236,234,235,233,233,231,232,232,231,231,232,232,229,228,229,228,224,227,229,227,226,227,227,225,226,226,226,228,225,223,117,2,0,5,9,9,9,10,9,11,12,12,12,197,201,199,202,202,200,200,201,201,202,199,199,199,198,202,201,201,201,202,204,204,204,206,207,205,206,206,205,208,207,207,207,205,207,207,205,207,206,206,208,205,207,203,206,206,204,208,206,206,205,206,206,207,206,205,206,204,206,204,204,204,202,204,203,202,201,204,206,205,204,205,205,205,206,207,208,207,208,208,207,207,207,208,209,207,208,208,208,208,208,211,208,208,208,210,211,210,209,210,211,208,211,210,211,212,212,214,213,212,210,210,213,211,211,211,212,213,211,211,211,211,210,212,212,210,212,211,212,212,213,214,214,214,213,215,214,216,217,215,213,211,214,214,217,218,217,217,214,215,214,216,213,213,213,213,217,213,212,217,213,214,214,212,214,211,212,210,210,211,211,212,212,212,213,214,212,213,211,212,213,212,212,214,212,211,212,212,211,211,213,212,214,214,212,212,212,214,214,214,214,215,215,214,217,217,217,219,217,218,219,221,218,219,215,214,217,214,215,216,216,217,216,216,215,218,216,216,217,214,217,217,215,217,217,218,219,217,217,220,221,222,220,222,220,221,222,223,225,224,223,224,227,227,226,226,226,227,226,227,225,225,225,225,226,227,225,218,210,216,229,230,216,215,232,234,227,220,227,235,232,236,237,236,237,235,232,233,234,233,233,234,233,234,234,235,234,234,237,237,237,237,239,237,236,237,237,238,238,237,237,236,237,238,239,237,235,236,235,235,236,235,235,236,237,237,239,242,242,247,244,244,220,121,78,79,123,169,209,250,153,97,187,224,242,229,209,235,240,209,202,235,194,204,234,234,247,170,155,125,107,66,70,219,244,242,234,207,184,216,240,217,215,226,225,225,226,227,236,210,226,183,180,229,184,204,200,232,205,136,211,195,158,149,150,178,181,207,172,193,193,220,184,146,90,71,231,246,251,184,64,100,105,81,30,62,141,155,144,217,250,171,87,182,201,212,204,204,191,179,202,201,179,150,206,204,79,44,171,247,236,197,195,174,111,120,210,231,229,235,230,221,217,225,244,196,174,236,232,231,238,237,252,143,74,82,84,117,56,26,38,112,130,69,66,87,108,89,75,51,29,49,117,135,108,101,115,107,128,118,135,163,129,117,104,92,81,57,44,40,32,25,25,19,21,45,73,61,38,23,39,82,92,64,17,44,113,127,125,79,61,108,108,88,42,78,108,75,39,25,89,114,88,48,34,33,32,36,33,29,39,19,46,207,249,248,248,180,141,151,153,181,165,156,149,132,132,147,153,167,177,127,59,27,29,25,29,27,22,27,25,22,23,27,19,23,22,23,24,23,22,22,26,22,25,21,21,18,20,18,17,22,16,48,68,50,31,17,15,18,20,33,51,52,53,60,60,70,83,88,96,96,83,76,69,75,75,75,80,73,68,61,64,54,44,36,23,30,35,49,112,113,122,101,53,96,120,96,64,39,78,106,98,111,126,135,146,97,120,69,54,34,39,58,132,225,233,243,242,239,241,235,238,237,236,237,237,237,240,238,236,238,238,234,235,233,233,233,233,234,231,232,234,233,232,232,232,233,230,229,231,230,229,229,227,229,229,227,229,227,226,225,225,230,225,228,228,222,117,4,0,4,8,10,9,10,10,11,11,12,11,200,201,200,199,200,201,200,199,199,201,204,202,201,201,200,200,203,202,204,204,200,203,205,203,203,204,206,205,207,206,207,210,205,205,207,206,208,206,208,207,205,206,204,205,203,204,207,206,204,204,204,203,205,207,206,205,207,207,208,206,205,206,205,205,205,205,207,207,205,203,205,203,204,203,206,208,206,207,206,208,206,206,206,206,206,208,208,210,209,208,211,208,210,208,208,209,208,211,210,209,211,212,209,210,212,212,213,211,211,211,212,213,212,211,212,210,211,212,210,210,210,211,213,212,212,213,212,212,214,215,213,214,214,212,214,214,213,213,214,214,214,214,214,214,214,216,214,214,214,213,214,214,214,215,213,214,213,214,215,213,212,214,214,211,212,211,212,211,211,212,212,213,213,216,211,212,212,211,213,211,213,211,212,214,214,211,211,212,214,214,212,215,214,212,213,211,212,214,215,213,214,214,213,216,215,217,217,217,218,217,217,216,216,214,214,214,214,214,215,216,217,213,214,214,214,217,214,216,215,215,217,216,218,218,217,218,219,220,222,220,222,224,218,222,222,224,224,223,225,223,224,224,225,224,224,226,224,226,223,224,226,224,223,223,227,224,212,206,221,231,228,212,220,234,231,221,218,229,233,232,234,234,233,235,232,232,231,231,233,232,234,233,234,235,234,235,236,236,236,234,236,236,237,237,237,236,236,236,236,237,236,236,234,237,236,235,236,234,235,235,235,236,238,243,246,247,247,241,234,206,180,165,102,49,42,145,211,210,228,139,125,221,244,251,233,215,232,239,202,195,236,187,205,235,234,249,162,150,122,99,61,64,219,244,246,238,213,205,238,247,222,214,228,229,224,227,224,237,200,198,170,174,232,188,196,187,234,188,107,179,164,165,182,167,172,197,217,183,205,196,228,178,171,98,74,236,237,251,181,62,98,115,92,49,7,77,124,177,237,251,166,91,202,212,222,200,180,197,209,199,196,151,159,249,192,86,95,174,244,224,194,216,169,106,165,238,238,236,233,233,227,214,229,247,183,188,241,231,234,235,238,252,181,96,98,94,127,72,37,27,66,128,113,65,28,43,44,36,40,53,106,139,136,70,83,113,105,139,95,69,68,42,33,24,19,22,26,55,68,38,24,19,13,24,63,96,94,87,57,60,107,105,72,25,63,123,122,122,129,127,113,108,96,38,72,110,84,43,39,95,100,83,44,29,35,33,29,37,29,39,15,88,250,250,249,249,172,152,157,160,174,152,143,143,137,145,158,160,171,168,102,35,26,29,27,27,31,23,17,26,25,26,24,24,27,20,25,24,23,22,27,26,22,24,21,25,23,23,26,23,21,24,58,71,51,33,18,21,29,54,63,57,64,63,50,30,20,24,32,45,65,77,81,87,81,74,69,63,64,75,80,76,78,66,64,55,51,45,54,120,122,117,102,60,96,118,84,57,35,93,124,113,120,85,66,67,105,155,122,79,56,47,51,107,214,231,237,244,235,237,236,237,236,236,236,236,238,238,240,238,233,234,233,233,233,233,232,233,232,231,230,231,233,233,230,231,229,230,231,232,230,227,230,227,228,226,225,229,225,227,226,226,227,228,229,225,224,116,3,0,4,8,10,9,10,10,12,11,10,12,199,203,197,200,201,198,199,198,201,202,201,203,203,202,204,199,201,203,203,204,203,205,205,204,204,205,204,203,206,202,205,205,205,207,204,206,204,205,206,206,205,205,202,206,202,202,204,203,203,202,205,203,205,203,205,204,203,205,202,204,202,201,204,202,204,207,206,204,204,202,202,203,202,204,204,203,202,203,203,203,208,206,207,205,205,207,205,208,206,207,208,207,210,209,208,210,209,205,208,211,208,208,208,209,209,208,211,211,209,209,211,211,211,210,211,210,208,210,211,209,208,209,212,211,210,211,212,212,211,212,213,212,213,213,213,213,215,214,214,214,213,216,212,211,213,214,214,214,214,213,213,212,215,216,211,214,214,212,215,212,212,212,210,214,211,210,211,210,210,208,211,212,211,211,212,214,212,214,214,211,213,214,213,212,211,212,212,211,213,214,212,212,212,211,212,213,212,214,214,214,212,211,213,212,213,215,214,215,215,214,214,213,214,214,214,214,213,215,214,214,214,215,213,214,217,212,215,217,216,216,216,217,217,217,216,215,218,219,221,220,220,219,221,222,222,222,221,223,222,223,223,222,221,222,222,223,223,223,223,222,223,222,222,223,222,218,208,205,224,231,217,210,225,232,226,217,225,231,231,231,232,233,230,232,232,230,230,232,230,230,231,230,233,232,233,233,232,236,236,236,235,236,234,234,235,235,234,235,235,233,234,233,232,234,235,236,234,235,235,237,238,244,249,249,246,234,211,180,166,170,194,226,174,89,127,212,191,212,224,132,129,221,237,238,202,184,222,235,196,196,229,184,213,236,235,248,175,169,141,113,60,65,219,244,251,238,198,214,251,251,222,211,226,227,223,226,221,236,203,191,158,147,207,185,190,166,178,163,141,189,162,185,212,190,189,215,214,192,211,196,230,174,186,104,73,235,227,247,176,56,89,118,106,56,27,49,56,146,230,251,144,83,198,212,227,191,144,170,208,157,129,141,168,243,158,88,98,147,237,213,205,241,181,101,171,246,237,235,232,230,229,216,229,239,175,203,239,231,237,231,245,252,219,127,87,103,131,101,62,23,35,93,128,114,67,49,49,49,76,108,131,136,78,28,93,107,116,121,53,21,15,17,20,20,11,19,59,92,96,77,43,21,21,23,68,114,122,131,122,103,108,100,74,27,64,116,94,88,88,116,132,104,89,42,34,70,100,81,79,103,93,64,33,34,32,34,34,29,37,41,39,161,234,249,250,234,166,149,155,149,163,174,191,163,139,153,152,155,170,150,82,19,26,27,29,27,26,22,23,26,23,24,26,22,23,24,26,23,23,24,23,26,25,25,24,22,25,22,24,30,19,33,58,66,48,34,20,31,62,67,71,60,46,42,39,34,22,16,17,35,56,44,33,38,48,61,71,77,69,67,67,63,61,67,83,89,91,79,96,134,124,117,107,70,97,110,88,56,34,89,133,131,93,56,39,29,57,113,122,90,54,55,54,89,207,231,234,240,233,238,231,233,236,236,237,235,236,236,238,235,235,235,234,236,233,232,232,232,233,231,230,232,231,231,230,231,227,228,227,227,230,228,228,226,226,226,224,223,224,225,223,224,225,222,226,225,222,118,3,1,6,9,9,8,10,10,11,12,12,12,195,199,199,198,200,197,198,199,200,199,200,200,199,200,201,201,205,200,200,202,204,205,203,203,205,203,203,202,203,204,201,205,202,205,205,203,205,201,202,202,203,206,203,204,204,201,202,203,201,203,206,203,204,204,204,202,202,203,201,201,203,202,204,204,203,205,205,204,204,205,205,202,205,206,203,203,203,202,203,205,205,206,206,205,205,204,204,206,206,205,206,205,206,206,206,207,207,206,207,208,208,207,208,208,210,209,209,210,209,207,209,208,208,210,211,209,208,210,210,209,211,210,211,209,211,212,209,207,210,211,211,212,214,212,211,212,213,212,211,213,212,212,214,214,214,212,210,209,210,211,209,210,211,211,211,213,213,211,210,212,211,213,211,210,211,210,208,208,212,209,210,210,208,210,211,214,212,210,212,211,211,212,211,210,209,211,213,211,211,210,211,212,208,210,214,211,212,212,212,212,211,214,211,211,213,213,213,213,214,214,212,212,215,214,214,213,213,211,213,214,211,213,216,215,216,217,216,217,215,215,215,219,217,215,217,217,217,217,220,219,220,221,219,221,219,221,221,218,220,218,220,221,221,221,221,221,219,221,220,219,220,220,221,223,221,216,206,209,226,224,211,211,226,231,222,216,227,229,229,229,231,230,229,231,229,231,231,229,230,227,230,228,228,230,230,232,232,232,233,233,234,231,234,235,231,235,234,234,233,232,233,232,234,234,232,234,237,237,243,247,248,246,239,215,184,166,170,184,200,218,236,252,205,115,167,238,227,242,226,116,110,201,219,222,196,191,222,236,193,196,230,186,220,245,251,252,191,181,144,117,63,60,217,244,249,192,142,171,231,249,221,210,224,228,226,228,219,236,209,214,178,137,189,180,174,154,162,164,191,188,171,204,213,187,164,217,207,188,207,194,226,165,187,90,75,231,226,247,167,54,81,110,98,59,33,40,12,89,199,246,148,81,190,206,238,192,103,174,177,95,143,168,213,195,80,100,123,119,205,209,219,235,172,108,169,235,232,238,230,230,233,215,230,228,170,219,239,233,236,241,251,251,212,81,57,87,136,135,95,45,22,50,100,133,147,138,112,122,139,153,140,82,27,17,105,109,113,116,37,21,21,20,19,17,26,42,93,125,132,128,80,39,31,25,66,101,91,93,101,125,117,93,80,24,54,104,92,45,10,39,75,101,95,60,26,33,91,122,118,118,83,50,44,47,59,57,65,64,72,50,87,239,251,245,250,246,162,153,148,144,164,206,231,170,138,146,147,166,171,134,73,43,43,41,41,38,35,32,36,37,33,34,29,25,27,27,29,27,24,24,22,26,25,24,22,23,23,22,24,25,22,36,66,64,43,24,27,56,65,55,54,68,63,37,22,23,21,21,53,85,90,66,45,51,42,21,23,36,48,57,68,73,70,66,71,72,79,95,104,106,101,118,112,74,87,116,94,67,36,63,122,141,121,65,64,81,70,105,118,99,60,48,54,85,206,232,233,242,234,239,230,232,236,236,236,236,237,235,236,236,236,236,235,236,233,233,233,233,233,231,232,231,231,230,231,231,228,229,227,228,229,227,227,225,226,226,225,226,224,224,224,222,224,225,224,224,224,118,3,1,5,8,9,9,10,10,11,12,12,12,193,200,194,198,198,196,196,198,197,194,198,199,197,199,199,198,202,198,199,202,199,201,200,201,201,202,203,202,203,201,205,203,204,205,203,203,200,204,203,203,205,202,203,207,203,204,202,205,206,202,203,203,203,205,206,206,203,204,204,206,204,201,205,203,204,206,204,206,205,203,204,205,204,203,204,204,202,201,204,204,205,202,204,205,202,204,204,207,205,203,205,205,204,203,204,205,202,203,206,206,208,208,208,208,207,206,207,207,206,205,205,207,210,208,208,208,207,208,209,208,208,208,208,210,210,210,212,210,209,210,209,210,210,208,207,210,212,210,210,210,211,211,211,214,212,212,212,207,208,210,212,207,206,211,207,210,211,209,211,210,210,210,209,208,209,208,207,207,209,208,211,209,209,208,208,210,206,207,208,208,210,211,210,211,210,212,209,210,213,210,209,211,211,211,210,212,213,210,213,212,209,210,211,211,211,212,212,211,213,213,212,213,212,210,213,213,210,210,211,212,212,213,212,212,215,213,213,213,215,216,214,215,216,216,218,216,217,217,217,217,217,218,217,215,217,218,216,218,218,218,218,219,220,218,218,218,220,218,218,216,218,217,217,222,216,210,202,212,228,216,203,218,228,225,217,220,229,229,229,228,228,226,226,227,226,229,227,227,226,226,227,225,229,227,230,231,230,229,227,229,229,232,231,231,232,234,231,232,231,229,230,229,232,234,237,240,243,247,245,237,216,191,170,168,181,197,217,230,239,242,248,252,188,83,166,247,219,247,186,95,129,217,242,247,226,216,240,241,194,205,235,192,227,249,253,253,187,160,125,97,55,58,210,243,246,127,36,65,164,238,224,210,226,225,226,227,222,232,206,191,170,167,207,194,181,205,167,173,213,179,184,203,205,141,114,213,190,183,199,195,224,162,188,83,89,235,228,247,168,56,74,110,97,56,33,42,18,48,181,243,161,126,196,214,244,189,152,217,196,141,173,196,200,158,86,177,196,116,163,178,218,221,143,113,181,230,232,236,233,230,235,224,235,211,173,229,237,232,240,250,250,248,122,34,29,65,118,137,138,80,47,32,44,98,141,160,142,131,129,146,135,66,13,41,128,96,125,99,34,28,12,23,30,27,38,80,114,105,100,120,114,77,36,25,70,86,68,38,36,66,89,92,74,25,43,97,94,67,35,16,61,113,135,104,71,72,108,128,123,131,119,107,113,127,133,139,125,131,128,68,148,234,250,249,252,234,159,150,152,177,212,232,202,140,131,139,158,191,177,125,84,79,72,69,74,69,66,57,57,64,59,56,57,53,47,50,45,41,39,39,35,31,31,32,26,25,27,25,24,18,22,50,69,60,39,26,36,60,67,49,56,76,69,46,24,26,33,50,86,124,129,95,76,58,32,26,21,38,69,61,42,36,38,52,67,84,94,86,83,90,108,117,104,72,79,123,122,81,46,38,74,136,149,150,141,103,88,119,122,92,61,47,50,98,211,233,234,245,236,235,233,235,235,234,236,235,235,235,234,234,235,235,235,233,232,232,231,230,232,231,231,232,231,228,227,229,228,228,227,226,227,225,227,229,226,224,225,226,225,225,224,222,223,224,228,225,222,117,5,1,4,8,10,9,10,10,11,11,11,11,197,198,197,195,199,197,197,199,198,195,197,201,198,199,200,197,200,198,200,200,198,201,201,200,200,201,204,202,205,203,201,203,199,201,201,202,203,202,206,201,203,203,201,200,205,206,202,205,203,203,203,203,205,205,206,204,206,205,203,203,207,203,202,203,204,205,204,205,204,201,203,204,206,203,202,205,203,201,202,203,202,201,204,203,202,205,206,202,203,203,203,203,203,205,204,204,205,202,204,204,204,205,207,207,206,206,206,206,208,204,206,206,204,205,205,205,206,207,206,207,207,205,208,207,207,208,207,208,209,208,207,209,209,207,208,210,209,207,208,209,208,208,209,209,208,210,210,212,208,208,208,208,210,208,209,208,209,208,206,211,206,206,208,206,207,210,210,207,209,208,209,210,208,208,207,208,209,206,208,207,208,210,208,211,211,210,210,209,211,210,210,212,209,211,211,212,212,211,211,212,211,211,210,211,213,210,212,211,211,213,214,214,213,210,210,212,212,212,211,212,211,213,213,214,215,213,214,215,213,213,215,215,216,216,218,217,218,216,216,214,217,219,215,217,216,216,216,218,220,217,215,216,216,218,221,217,217,217,218,216,216,217,217,217,214,207,201,218,226,208,208,220,226,220,213,223,230,227,228,227,226,227,227,227,226,228,225,227,227,227,229,226,228,227,229,229,228,229,230,231,230,229,232,231,231,232,229,228,230,231,232,233,235,239,243,243,236,220,195,173,167,178,199,214,230,239,242,244,239,236,238,245,181,79,164,239,168,163,142,102,155,243,251,251,238,230,248,246,202,217,245,199,197,194,194,193,142,137,122,123,92,101,207,240,190,43,2,5,109,237,226,211,223,223,227,223,224,230,201,171,164,173,205,216,202,222,175,178,217,141,156,187,181,156,150,231,187,192,205,223,242,188,205,82,114,243,237,251,167,67,82,120,105,57,37,49,15,73,200,247,190,137,205,221,249,200,167,249,222,173,165,142,224,150,98,226,235,145,113,166,251,192,94,100,202,243,236,239,231,231,235,229,236,192,179,236,232,237,250,250,251,144,33,2,19,30,76,118,142,139,89,54,41,27,88,143,112,58,46,122,129,73,20,72,135,103,137,97,27,27,19,24,38,34,67,101,113,77,54,103,113,103,63,28,63,92,76,48,13,27,68,101,103,68,113,146,150,139,134,140,139,147,162,162,146,145,131,123,117,117,123,123,118,125,127,130,130,125,100,79,200,251,251,251,252,235,159,152,165,214,252,242,165,117,129,139,174,217,177,113,89,84,66,58,58,57,57,54,59,55,57,62,62,63,66,65,72,70,64,66,59,63,62,54,53,50,49,39,35,35,43,66,72,63,44,19,33,68,79,74,72,71,60,43,24,48,71,58,57,79,88,71,46,30,25,22,24,62,87,74,44,19,21,15,23,33,43,59,81,90,107,113,100,80,66,104,146,116,67,52,39,56,84,82,72,64,89,133,113,83,51,42,60,111,218,235,236,244,234,235,233,236,235,234,233,232,234,236,232,232,233,232,231,231,232,232,232,232,233,232,232,231,232,231,230,229,227,229,228,230,230,225,226,227,225,224,224,226,225,224,225,224,224,227,225,226,226,116,5,1,4,8,10,9,10,10,11,11,11,11,193,196,194,196,197,195,195,198,198,198,197,197,198,199,196,195,199,198,200,200,199,199,199,201,201,199,199,201,201,202,201,200,201,200,203,203,200,203,200,201,203,201,204,201,199,205,202,203,202,201,203,201,203,206,204,204,203,203,204,203,205,205,204,204,204,205,205,202,204,204,203,207,203,202,202,201,203,202,203,202,203,201,201,204,201,201,203,204,205,203,204,205,206,205,204,206,202,206,205,203,205,202,205,204,206,205,204,206,208,206,204,206,203,204,205,207,204,203,205,206,205,204,205,208,208,206,208,207,206,208,206,207,208,207,207,207,207,206,208,209,207,208,207,208,207,208,210,208,208,208,209,207,210,211,207,207,208,207,207,207,207,207,206,208,208,208,207,209,208,207,210,206,208,208,208,209,209,210,208,208,207,208,208,210,208,211,211,208,211,211,210,208,211,210,210,211,212,210,212,210,210,214,213,212,211,209,211,211,213,212,211,211,212,212,212,210,208,212,212,211,212,212,213,214,214,213,213,215,214,212,212,216,215,213,216,216,214,212,215,215,215,215,215,215,217,217,214,215,214,214,215,216,217,217,217,214,217,217,217,216,217,216,218,217,208,203,201,219,221,203,212,223,223,214,216,226,225,225,224,225,226,225,224,224,227,226,225,226,227,225,227,226,225,226,227,225,227,228,230,229,227,230,227,229,229,226,229,228,231,233,239,241,239,235,219,199,179,167,177,192,210,227,238,244,241,239,238,232,230,228,235,236,182,81,159,222,162,153,113,100,139,207,214,220,183,186,227,225,190,214,241,174,113,97,107,131,142,156,152,163,154,141,162,167,144,48,1,3,113,237,236,217,231,229,230,224,227,220,213,191,187,186,183,199,184,200,148,151,163,110,164,196,201,199,199,250,212,227,235,242,253,206,188,62,103,242,252,252,185,81,81,121,105,57,36,48,23,75,229,246,212,159,212,236,247,186,184,247,235,163,103,187,248,113,68,208,249,181,129,163,252,190,69,130,227,243,243,236,228,230,233,234,236,171,186,239,229,250,250,250,170,41,1,8,12,38,48,70,118,129,153,105,66,41,31,107,104,55,17,33,83,78,33,96,125,102,141,75,26,31,17,30,29,42,91,100,97,80,72,76,78,101,95,76,100,111,112,86,83,100,120,157,156,160,173,163,162,155,139,150,143,121,113,118,108,106,105,99,108,117,116,105,109,121,117,116,103,98,59,83,220,252,250,251,253,230,160,168,199,234,234,199,136,121,130,143,195,216,141,75,65,51,65,59,65,61,59,64,64,69,61,59,63,59,57,53,54,56,57,56,57,60,64,66,70,71,71,65,68,67,69,81,81,71,52,39,35,63,91,91,84,63,43,31,21,60,90,74,55,70,87,62,32,24,23,21,39,81,91,65,41,24,19,14,15,19,23,19,29,35,61,97,101,86,55,60,109,146,113,70,58,49,50,48,50,78,111,130,90,64,43,43,69,141,238,241,240,235,231,239,232,232,232,231,231,232,234,231,230,233,232,230,232,231,230,231,229,231,231,230,230,229,229,229,227,227,227,226,226,225,224,227,225,223,224,224,225,224,223,225,224,225,226,224,227,224,224,118,3,0,5,9,9,9,12,10,11,12,12,12,194,196,195,193,194,194,194,193,197,196,196,198,194,198,199,196,200,195,195,199,195,200,198,199,200,198,200,200,201,199,201,201,201,200,200,202,202,202,202,202,203,202,204,203,204,206,202,206,204,203,204,205,206,202,203,203,206,205,203,204,206,203,205,206,203,204,204,202,205,207,206,202,203,204,203,204,201,202,201,200,204,203,202,202,203,202,201,202,204,204,202,203,205,205,202,204,204,204,205,205,203,201,206,203,203,203,203,205,206,204,206,206,206,204,204,204,205,204,203,206,205,206,210,206,207,208,208,208,206,208,206,207,207,206,207,206,208,208,207,206,206,208,207,209,210,208,208,208,209,208,210,208,208,207,207,206,206,208,206,208,207,206,209,207,207,209,208,209,210,206,208,207,208,209,207,208,208,206,208,208,206,213,211,210,213,209,211,211,209,211,212,211,209,210,210,210,208,211,212,208,212,214,211,212,211,213,213,210,212,210,210,210,212,212,211,211,212,212,211,213,212,213,210,211,212,212,210,210,212,213,213,212,212,211,214,214,214,217,214,214,214,214,214,213,214,215,215,214,213,215,214,214,213,214,216,214,214,215,216,214,214,214,217,214,209,202,206,223,208,203,219,224,220,212,218,225,224,223,226,224,224,223,224,225,223,227,223,224,224,224,225,224,227,226,226,229,226,224,227,226,229,228,228,227,228,229,232,237,239,239,234,221,202,182,174,181,193,210,223,236,241,244,243,239,236,230,229,228,225,225,232,230,188,84,149,234,197,179,124,102,118,141,141,146,127,125,157,160,135,174,188,122,99,98,114,133,148,162,164,176,166,153,154,154,145,93,51,61,147,239,241,225,241,234,230,223,228,217,219,175,179,208,171,182,154,167,147,127,185,153,210,249,241,236,214,249,219,220,181,153,129,90,98,12,59,196,238,249,150,60,73,125,114,66,39,50,28,81,237,246,246,160,193,240,239,189,201,246,198,139,127,230,246,112,78,217,252,226,123,97,184,174,109,165,237,242,239,236,231,230,227,237,228,153,201,237,237,250,251,203,62,2,9,6,31,43,39,36,54,94,125,142,113,62,38,62,111,97,42,23,28,48,38,110,119,99,128,56,27,29,17,26,32,74,107,96,99,65,51,59,74,122,130,142,145,144,153,166,168,160,142,137,139,128,139,128,123,116,116,110,100,88,69,67,64,69,59,56,58,59,56,49,44,49,49,41,34,46,11,69,237,250,251,251,252,221,162,188,232,252,246,155,119,130,131,152,217,206,103,37,14,16,24,29,29,25,33,32,38,44,46,50,46,51,48,50,59,57,54,61,60,58,61,56,60,57,55,59,57,60,64,67,66,76,76,68,63,70,88,82,69,54,39,38,28,57,99,112,113,100,79,51,33,28,26,24,51,90,86,59,36,29,24,19,22,24,24,21,19,18,55,92,103,93,64,32,52,109,136,131,109,96,82,81,101,127,139,112,72,44,35,60,95,201,246,243,239,231,234,235,232,231,230,233,233,233,232,232,230,230,232,230,230,231,230,228,230,229,227,227,227,227,229,229,228,225,225,226,227,225,224,224,225,225,223,224,223,224,224,226,225,224,226,228,225,224,225,118,3,0,5,8,8,9,12,10,11,11,12,12,194,194,194,193,193,189,193,193,192,194,195,194,194,196,196,196,194,195,196,194,197,197,196,199,197,197,199,199,196,198,198,197,201,198,198,199,196,201,200,199,200,198,201,200,201,200,200,200,201,202,200,201,199,201,202,200,202,200,200,201,201,203,201,201,201,201,203,201,202,200,201,203,203,200,199,202,201,198,200,202,201,200,203,201,199,200,200,200,200,200,201,201,200,201,201,201,201,205,203,203,203,201,203,202,200,201,204,203,203,202,202,203,203,206,202,203,204,203,204,205,205,205,205,205,204,204,204,206,203,204,207,206,207,204,206,206,207,203,203,206,205,206,207,205,205,205,206,205,202,207,208,205,206,203,206,206,205,206,206,204,205,208,206,206,207,206,207,208,207,208,206,205,208,206,207,205,204,208,206,206,209,207,210,212,209,211,210,206,210,212,208,207,210,207,208,210,209,210,208,210,208,206,209,208,209,211,211,210,210,208,212,211,210,211,213,211,207,210,210,208,210,209,210,210,210,210,212,210,210,212,210,214,214,213,212,212,215,212,214,214,213,214,212,210,213,216,213,212,214,214,212,212,214,213,212,212,212,211,212,213,214,213,214,211,203,201,210,220,202,207,222,222,216,208,220,222,222,223,222,222,224,222,223,223,225,223,220,224,223,223,225,222,223,226,224,225,226,224,224,224,223,227,228,229,234,234,235,229,219,203,186,175,179,191,205,219,232,240,242,241,237,233,229,227,225,227,223,225,224,226,230,223,190,80,149,233,220,178,91,78,107,147,153,149,137,122,152,134,111,143,153,116,106,103,95,103,112,125,139,152,139,141,148,144,133,103,100,108,141,211,219,204,234,242,237,228,227,211,222,151,162,206,170,208,162,199,226,178,234,210,238,249,228,217,145,126,103,89,60,20,14,18,54,32,29,94,129,134,71,42,52,99,104,63,54,62,27,59,184,226,155,66,130,178,167,174,164,128,88,61,125,228,208,76,86,185,206,188,137,38,76,139,112,208,238,231,243,232,232,231,223,241,223,159,211,243,249,250,225,114,11,5,8,18,42,44,27,19,37,51,89,102,112,96,46,36,90,127,96,56,46,38,33,122,124,105,110,45,26,22,19,37,67,116,129,115,114,94,99,113,117,133,130,134,135,126,133,131,137,141,126,122,112,113,107,85,89,76,59,51,49,44,37,34,30,41,48,35,24,29,28,24,32,39,36,42,53,55,15,121,251,251,252,252,244,166,162,209,244,234,199,131,122,130,133,173,226,172,74,24,15,25,27,30,22,12,22,27,42,49,45,30,17,20,30,29,27,39,39,33,36,41,43,45,50,53,56,59,57,57,55,55,57,59,59,65,66,70,69,72,76,79,85,85,72,81,107,117,124,94,50,38,31,29,24,36,72,96,81,53,31,26,27,27,31,27,30,27,25,21,57,95,104,99,66,41,32,83,130,141,144,154,159,143,144,139,96,66,42,38,48,66,174,241,249,241,235,236,233,230,230,231,233,233,233,232,233,233,233,233,233,231,231,230,228,229,230,229,229,229,228,229,228,227,225,226,226,226,229,229,228,228,227,226,226,226,226,226,226,228,226,225,227,225,228,226,222,117,4,1,4,8,10,8,10,10,11,12,11,11,191,194,194,195,194,192,194,194,195,191,192,195,193,194,196,193,194,193,195,197,195,197,194,197,196,196,198,198,198,200,199,195,201,200,199,200,197,198,198,198,199,198,200,198,197,199,198,200,200,199,199,198,199,200,201,200,199,200,201,200,202,200,199,202,200,200,202,202,202,200,201,200,200,202,201,201,200,202,199,200,201,197,199,200,200,198,200,200,199,200,202,200,201,201,201,200,200,204,199,202,202,200,204,202,202,206,203,203,204,205,203,201,203,202,203,202,203,202,203,203,203,203,203,203,204,205,205,204,202,204,204,204,205,204,206,206,203,203,205,203,203,205,204,204,204,204,204,204,205,206,206,204,203,204,206,207,203,203,205,203,203,203,205,205,203,206,206,204,205,203,206,207,206,208,206,206,207,206,207,207,208,208,203,206,205,206,209,207,210,208,207,209,210,206,207,211,208,208,211,210,208,207,211,210,210,210,209,211,209,207,208,208,211,208,208,208,207,208,209,210,210,210,210,209,210,210,211,211,210,213,210,212,212,212,213,212,211,212,208,211,212,212,211,209,212,211,212,211,212,211,210,212,211,212,212,214,212,210,212,214,213,212,215,210,202,198,215,214,199,212,220,217,207,211,220,222,221,218,222,220,222,221,220,222,220,224,221,221,225,220,224,223,222,222,221,225,223,222,227,225,229,232,232,233,232,217,203,188,174,177,188,202,217,229,235,237,236,235,234,229,227,225,225,225,223,225,223,224,223,225,227,223,198,78,133,212,209,156,51,47,106,166,179,182,160,157,179,159,132,165,170,117,112,85,63,62,72,90,91,94,90,95,106,109,93,87,84,76,84,173,184,173,222,232,237,229,222,205,227,149,157,217,165,204,190,236,252,198,203,142,122,117,77,71,26,2,14,23,81,129,131,167,127,98,133,143,165,177,105,42,35,87,95,49,40,44,36,25,107,160,139,66,83,144,151,145,118,65,61,84,121,173,130,84,112,158,186,181,129,49,39,76,117,225,245,227,238,235,234,233,226,246,213,158,229,248,251,246,119,36,2,3,18,34,47,29,19,18,31,56,75,87,95,94,73,40,42,89,133,122,93,68,76,154,124,122,106,29,47,62,105,142,141,153,134,123,125,112,124,125,129,135,134,146,145,136,133,123,101,90,69,56,51,45,42,30,29,36,46,53,30,36,41,17,32,59,66,52,34,41,37,28,32,42,51,50,87,69,20,186,251,251,252,241,164,113,165,229,252,241,152,112,129,128,134,198,229,134,47,18,14,27,30,33,21,18,30,34,51,63,51,34,19,40,42,35,48,49,39,26,13,27,31,21,25,23,30,25,28,31,36,40,48,52,61,67,63,59,59,53,56,61,91,120,118,117,98,105,100,73,63,57,44,40,36,70,100,96,78,47,32,29,27,30,34,33,30,31,31,28,56,87,111,105,75,41,53,123,134,118,100,101,122,128,100,69,57,39,42,33,45,160,245,245,249,230,234,231,230,228,230,232,231,233,234,232,232,232,231,231,232,231,230,230,231,231,231,230,228,228,229,229,229,228,226,228,229,229,227,228,228,226,226,225,226,228,227,227,227,228,226,228,227,225,228,228,224,117,3,0,4,8,10,9,10,10,11,12,12,12,193,196,193,192,192,192,193,193,193,196,192,191,194,192,194,193,194,195,194,193,195,197,194,198,197,196,199,196,198,198,198,197,198,198,199,200,197,199,198,198,198,196,198,199,199,197,200,199,198,200,200,200,200,199,198,197,198,199,201,200,200,202,201,200,201,200,201,202,200,200,200,200,201,200,200,200,200,200,201,201,199,198,200,198,200,199,198,202,202,198,199,200,201,201,200,203,200,201,201,202,202,200,203,202,203,200,203,201,201,206,205,205,202,201,202,201,202,203,201,203,204,202,201,203,204,204,203,205,203,201,206,202,203,204,202,204,204,203,204,202,203,203,203,203,205,205,203,206,202,205,205,203,205,205,206,204,205,204,204,203,203,203,201,205,204,204,205,206,204,205,206,208,208,205,209,206,205,207,205,207,208,205,206,206,204,206,210,206,208,210,207,207,207,206,207,208,206,208,209,210,210,210,210,211,211,208,207,210,210,209,206,207,208,207,210,209,208,210,209,208,208,208,208,208,208,208,210,208,208,210,208,211,208,207,208,210,210,209,210,209,206,209,211,210,211,212,211,209,211,211,211,211,213,212,211,211,211,211,211,210,210,214,207,204,200,202,216,203,200,218,218,211,207,216,220,216,220,218,220,217,220,220,218,218,222,221,220,222,221,224,222,222,224,223,223,224,225,225,230,232,229,230,218,203,190,179,181,186,200,213,225,232,233,235,234,232,229,227,226,223,223,225,225,225,222,226,223,227,220,226,223,223,200,83,120,199,239,172,64,87,129,171,172,171,156,163,189,164,152,180,158,113,119,88,60,69,59,59,51,59,61,55,57,61,69,82,81,53,60,157,155,125,159,186,219,225,219,207,237,155,187,242,146,133,127,184,172,67,56,19,0,12,9,59,66,58,74,77,117,143,183,159,136,151,142,189,234,238,126,52,57,120,111,48,41,47,29,75,207,247,199,114,172,210,173,196,163,149,182,168,217,189,130,164,199,206,203,181,138,66,59,54,112,243,242,233,233,232,234,231,229,245,195,157,240,249,252,156,26,3,2,11,29,44,32,18,21,26,46,53,88,106,89,100,89,57,38,35,76,122,153,149,148,165,110,119,128,128,161,163,165,147,133,125,113,131,145,139,144,130,121,108,97,90,70,56,47,35,32,24,25,30,26,29,45,32,22,47,77,81,60,67,66,29,36,63,76,63,34,40,43,33,39,42,53,29,62,69,84,230,251,251,250,191,138,130,190,245,251,192,127,118,131,129,152,218,199,103,22,10,15,23,32,40,29,24,28,21,47,55,43,32,29,57,49,39,58,56,33,21,33,50,41,23,22,29,24,19,20,22,24,39,43,35,25,33,35,39,52,52,59,62,69,80,83,83,87,89,97,98,89,90,83,74,103,127,129,114,77,46,30,25,29,32,34,30,27,32,33,30,61,91,106,106,77,44,83,133,94,66,35,49,103,93,67,47,39,50,25,55,185,242,249,249,233,233,229,229,229,229,230,229,231,231,232,233,231,229,230,231,230,230,231,230,230,229,230,227,226,229,228,227,227,227,228,228,227,227,227,225,225,226,227,226,227,228,227,226,226,228,226,225,229,227,227,224,222,118,4,1,5,8,9,9,12,10,11,12,11,12,193,194,190,193,189,191,191,190,193,193,193,193,193,191,193,191,195,194,194,196,193,195,194,196,196,196,196,197,197,195,198,195,199,196,195,199,196,199,198,198,198,195,198,198,195,198,196,196,198,197,199,199,199,198,198,198,195,198,200,196,200,198,198,201,196,200,200,199,199,197,201,198,199,200,199,198,198,202,197,199,199,198,198,197,200,199,199,199,199,198,200,201,199,198,200,202,202,201,200,202,202,200,200,200,200,200,200,201,199,200,200,200,201,201,202,200,201,200,203,201,200,203,201,202,202,203,203,201,203,203,204,203,201,203,204,203,202,202,203,205,203,203,203,201,204,204,203,205,203,205,205,201,205,203,200,204,204,203,202,203,204,204,206,205,205,202,204,206,206,206,205,206,205,206,206,203,206,205,206,205,205,207,205,209,206,207,207,206,208,207,208,207,208,207,206,206,206,206,208,207,205,206,209,207,208,208,208,208,207,208,208,207,210,206,205,206,207,208,204,204,206,207,208,207,206,206,208,210,205,207,208,208,207,208,208,208,208,208,207,210,207,210,211,210,210,211,212,210,212,211,211,212,208,210,209,208,212,212,210,210,209,210,207,204,197,208,216,198,208,219,215,206,208,218,216,217,217,217,218,214,220,216,217,220,218,221,218,219,222,218,221,219,223,225,224,232,230,230,230,217,202,190,178,177,188,199,213,223,228,234,233,232,229,226,224,223,223,222,224,223,221,223,225,226,222,226,223,222,219,227,217,223,196,65,121,212,252,217,136,139,151,171,159,152,146,168,191,158,130,149,141,116,132,107,87,84,57,53,48,63,65,55,49,53,66,90,73,47,73,150,131,76,90,132,197,229,214,211,239,170,194,252,134,45,20,23,27,9,40,66,78,117,137,178,180,189,191,143,134,130,144,141,99,95,84,102,146,172,107,37,41,109,117,57,57,57,33,47,146,181,131,71,121,144,120,146,141,145,156,122,127,117,83,136,123,118,149,129,150,136,79,51,134,243,246,232,230,231,234,232,230,245,182,188,248,248,188,49,2,12,5,28,42,36,24,21,27,45,56,54,83,114,102,88,99,87,59,46,39,43,64,75,109,141,95,137,157,133,157,139,149,141,129,137,130,127,111,76,55,43,39,31,28,37,44,32,21,27,19,18,27,38,41,49,78,55,30,42,81,96,56,64,82,63,57,75,77,49,25,29,46,55,56,49,51,48,53,50,140,243,251,251,241,186,161,153,214,250,224,147,113,123,139,131,175,237,164,83,70,57,55,55,50,49,45,35,34,29,29,60,56,52,56,47,33,30,51,51,38,33,48,54,29,22,27,27,26,19,24,31,46,65,64,45,26,16,29,32,31,41,35,36,40,51,62,76,84,87,81,76,79,83,94,103,110,114,105,104,92,66,54,50,41,39,34,31,33,29,35,30,60,96,103,105,83,47,41,58,57,46,55,91,95,75,49,44,50,28,116,239,249,249,250,236,232,233,233,227,227,228,229,229,230,231,230,230,231,231,230,232,230,230,231,230,231,229,229,231,229,229,229,227,227,227,228,227,227,227,227,226,226,228,227,228,227,228,229,226,227,229,222,225,228,226,228,226,224,118,4,1,5,8,9,9,12,10,11,12,11,11,189,192,193,192,194,191,188,190,190,191,189,189,194,189,191,191,192,193,191,193,193,194,193,193,195,197,197,194,194,195,197,195,196,196,198,196,194,199,195,196,198,198,196,195,195,193,196,195,195,197,195,195,198,196,197,198,199,200,199,199,197,197,199,198,198,198,198,199,196,196,198,199,199,200,199,196,198,196,200,199,196,198,201,198,198,198,196,198,196,197,198,198,197,198,199,199,198,200,200,199,200,199,200,199,201,200,201,200,198,203,200,199,200,201,199,201,201,202,200,199,201,201,200,201,201,199,200,202,200,201,203,202,205,203,202,202,204,202,204,203,199,206,203,201,203,200,204,204,201,201,200,201,203,202,201,202,205,204,203,203,205,204,205,207,205,203,203,206,205,206,206,205,207,204,206,205,205,205,204,203,203,205,207,206,207,208,206,205,205,205,202,205,207,205,207,207,205,205,206,206,204,207,207,205,206,206,206,207,204,207,207,207,205,205,205,204,206,204,207,206,204,206,205,206,205,207,209,204,206,206,205,206,206,208,206,205,206,206,207,206,208,209,207,208,208,207,208,206,209,208,210,209,209,208,207,210,209,209,209,207,211,212,205,207,202,220,218,202,214,217,208,205,211,214,215,215,213,213,215,215,217,217,217,218,217,217,219,217,218,219,221,224,226,230,230,228,218,204,194,181,177,185,195,209,220,226,229,230,231,229,225,222,222,222,219,222,223,220,226,223,223,223,221,223,220,223,219,222,218,227,216,222,178,51,93,194,251,208,143,152,167,176,164,161,142,146,173,138,105,132,139,121,128,95,83,71,50,60,49,47,54,56,61,66,70,85,75,48,57,113,111,107,148,180,217,233,207,218,241,159,191,239,141,95,115,105,76,97,198,216,223,247,237,247,225,252,232,215,229,225,227,99,96,160,139,173,184,213,134,45,39,89,113,54,43,48,28,45,114,153,141,81,72,99,124,158,133,134,115,86,120,51,43,131,132,122,138,131,160,160,111,45,119,234,237,240,229,228,235,224,239,237,183,220,250,181,79,7,5,7,16,43,42,25,25,26,46,50,59,53,29,76,106,94,92,102,103,75,57,48,41,21,57,112,101,132,139,133,139,127,120,87,77,53,34,39,49,36,33,34,30,17,29,58,62,50,31,35,33,18,30,46,40,45,72,81,62,72,93,79,48,37,77,110,143,131,91,81,83,100,103,113,123,114,120,118,131,116,100,206,243,243,244,207,154,125,197,251,193,133,120,131,133,139,201,226,127,72,124,136,156,64,91,69,63,67,61,57,58,57,71,73,57,50,43,42,55,67,63,57,46,36,22,22,27,25,29,32,27,24,43,65,62,42,25,32,57,48,49,59,56,36,18,19,30,32,31,45,48,61,78,81,85,80,79,74,77,87,93,91,91,84,73,68,63,51,46,39,34,29,59,93,101,110,80,51,35,30,46,66,101,101,83,54,44,54,66,186,248,248,251,251,238,234,234,230,226,229,228,230,230,228,232,233,233,231,229,229,231,231,228,229,230,228,231,231,231,230,229,228,225,228,228,227,228,228,228,229,229,229,229,227,229,227,228,227,229,229,226,229,229,228,228,226,228,227,225,117,4,0,5,10,10,8,10,10,11,12,12,12,189,191,193,193,192,191,190,190,192,191,191,192,192,193,195,191,193,189,189,193,192,194,193,192,191,193,192,192,193,193,195,194,195,195,196,196,193,194,195,198,196,195,196,195,196,198,194,194,195,194,196,195,195,196,198,197,197,199,197,196,198,195,196,200,197,199,198,198,198,196,200,198,196,194,196,199,198,200,198,198,198,198,200,196,197,198,196,197,196,196,198,198,198,197,198,199,200,199,198,200,201,200,201,198,199,201,200,201,201,203,200,200,201,199,201,200,203,199,199,202,199,200,197,199,200,199,200,200,202,200,200,201,200,202,201,204,205,203,204,205,204,201,203,202,203,199,202,203,200,205,203,203,204,202,204,203,202,204,204,204,203,203,203,203,205,202,203,204,203,204,205,205,204,206,206,204,205,203,205,205,205,207,204,205,205,205,205,205,207,203,204,205,205,204,203,206,205,205,203,205,205,203,206,204,205,205,204,208,206,206,205,203,206,203,204,204,204,205,204,203,203,203,203,203,204,206,204,205,205,205,205,205,205,206,207,205,207,207,207,208,205,206,207,207,208,209,207,208,208,208,208,205,207,208,208,207,209,208,207,208,210,206,214,212,217,244,219,206,216,214,208,205,213,213,213,216,213,214,216,213,218,215,217,219,217,220,217,221,224,225,228,225,227,217,200,194,180,177,184,191,205,218,225,227,230,229,228,225,223,225,223,224,223,221,218,222,224,222,222,223,224,223,222,224,223,221,221,222,222,227,213,217,168,42,78,173,241,210,147,166,170,183,178,176,162,159,165,132,135,163,141,117,77,20,26,34,29,33,29,33,31,36,32,46,65,66,63,37,33,77,105,166,238,250,244,226,205,224,240,168,191,234,171,203,238,203,185,186,249,247,247,251,236,244,236,247,213,200,192,227,181,105,131,177,188,227,244,252,193,80,79,108,115,52,33,44,25,81,214,247,227,146,131,196,251,251,201,160,151,181,221,103,71,222,237,242,224,186,223,191,163,69,94,216,224,238,229,225,236,225,242,231,185,249,220,97,27,1,11,11,39,47,22,23,26,39,51,61,54,12,79,111,108,94,83,111,107,122,99,70,66,32,83,119,103,139,121,66,50,34,29,33,34,24,16,44,68,60,49,60,53,30,21,55,84,53,24,34,43,46,49,52,48,50,68,111,156,171,170,142,137,153,155,153,162,164,145,143,137,139,125,110,93,77,71,62,63,56,52,22,115,217,250,234,144,87,174,245,167,128,128,133,136,149,229,207,89,51,35,29,43,42,41,38,39,42,43,44,48,50,49,59,63,65,69,69,69,69,73,65,58,56,51,48,42,40,47,36,31,27,32,56,54,44,46,59,60,47,55,61,51,36,25,44,50,30,34,33,30,29,26,32,42,52,66,74,84,85,81,79,80,86,100,111,101,93,79,63,61,50,71,96,99,115,85,57,58,81,116,133,118,81,54,38,45,93,221,245,250,250,248,239,233,234,231,226,227,226,230,230,230,230,229,231,232,231,231,229,228,231,229,228,228,228,228,229,227,227,230,229,228,229,229,230,228,229,229,228,230,231,229,229,231,232,230,229,229,230,229,229,227,230,230,227,230,230,226,117,3,0,4,8,9,9,10,9,11,11,11,11,190,193,194,192,195,191,190,191,191,193,192,190,192,191,192,191,193,192,190,193,194,191,191,193,193,192,193,194,194,194,196,195,193,194,197,195,192,194,194,194,195,194,194,195,195,194,193,196,194,195,199,194,198,197,196,195,195,196,194,198,194,196,198,193,199,196,193,194,196,195,196,195,194,195,196,198,197,198,196,198,196,198,197,195,200,196,196,196,196,197,195,198,198,196,195,198,197,198,199,198,200,198,199,198,197,197,198,197,198,199,200,199,198,201,198,200,199,198,198,199,199,200,200,201,201,201,202,199,199,200,200,199,201,201,203,202,201,200,202,202,201,204,200,200,203,201,200,200,202,202,202,203,201,201,203,200,201,202,201,203,202,203,201,203,205,204,204,203,202,205,205,203,204,202,206,203,204,203,206,206,205,207,205,204,202,202,203,205,205,205,203,204,204,204,205,205,202,203,205,203,205,203,203,202,204,204,202,202,202,206,203,203,203,202,204,202,203,203,203,201,202,201,202,203,202,206,205,203,206,204,203,203,202,204,204,206,203,203,205,205,203,206,205,203,206,204,207,206,206,206,205,206,205,205,208,208,204,206,207,205,207,204,212,201,137,184,190,202,215,212,202,206,211,212,213,214,213,213,214,214,215,215,213,217,220,220,223,224,226,226,218,205,195,182,175,180,190,201,214,222,223,228,227,223,224,220,220,220,221,221,222,222,220,223,223,222,223,220,222,223,224,221,222,223,222,221,222,222,222,226,214,218,149,51,103,196,252,227,156,161,175,184,172,168,158,162,187,160,165,184,149,64,4,1,17,32,21,22,25,26,22,22,27,53,58,63,41,44,73,128,167,196,234,234,234,217,200,227,233,159,208,239,152,210,243,201,194,217,249,230,234,221,213,175,189,186,140,145,148,231,160,88,150,171,171,215,224,251,189,72,68,94,121,54,32,35,14,84,227,246,242,164,128,234,252,252,178,174,187,213,222,118,163,249,250,251,212,204,240,228,214,85,91,206,212,230,232,222,236,224,249,229,195,245,119,20,15,1,22,32,46,29,22,26,36,53,52,61,19,85,219,178,122,89,67,89,94,103,109,107,108,99,134,128,123,124,52,23,19,17,14,29,69,45,18,46,78,85,59,61,74,55,44,74,83,48,42,46,66,103,140,157,165,190,121,180,200,198,187,169,147,131,117,88,65,53,43,45,39,37,36,29,29,29,25,23,19,24,21,27,15,109,158,203,143,91,185,234,150,130,126,140,140,174,234,169,61,21,10,10,20,14,18,17,15,21,19,24,18,22,27,29,27,34,36,36,44,42,46,51,57,60,64,63,63,69,69,61,54,54,47,56,73,61,59,57,38,30,48,55,39,33,53,62,54,36,42,39,34,31,19,22,33,46,53,50,42,46,61,68,75,74,71,74,83,97,101,100,97,89,94,103,100,116,96,108,155,165,164,116,71,48,37,41,100,216,249,249,250,239,237,233,233,228,225,226,228,231,230,228,229,231,228,228,228,229,230,229,230,231,228,228,229,228,228,228,230,228,229,230,229,229,229,228,230,229,230,233,231,229,231,231,230,229,230,233,229,228,227,231,228,226,227,229,231,229,227,118,4,1,5,8,9,9,10,10,11,12,11,11,193,190,191,190,192,190,190,194,192,192,192,191,192,192,192,192,194,191,190,192,190,190,192,194,195,196,195,196,195,194,194,193,196,193,193,194,191,193,192,191,195,192,193,197,193,194,194,196,194,195,198,193,193,193,195,197,195,197,196,196,196,194,196,196,193,194,196,197,196,194,195,194,194,197,197,197,197,197,195,196,197,198,198,196,196,196,195,194,196,197,198,198,198,199,198,198,196,198,198,197,196,196,198,199,200,198,197,198,198,198,199,198,198,198,197,199,198,197,201,199,199,203,200,201,201,200,200,198,198,200,202,203,200,202,200,197,199,198,201,203,200,201,200,200,202,199,201,199,199,201,199,202,201,200,200,202,202,200,203,201,201,201,200,204,204,203,203,205,205,203,204,205,204,204,203,204,205,203,208,207,204,204,202,203,202,201,203,202,204,202,203,203,200,202,203,205,201,203,200,203,204,202,202,200,203,205,201,201,200,200,204,201,201,200,200,203,202,201,201,201,202,202,202,201,202,202,202,203,201,204,203,202,204,203,205,203,202,204,203,205,204,204,205,204,205,204,205,206,207,206,206,204,205,203,206,207,205,208,207,206,208,197,209,129,45,86,156,210,215,204,202,206,213,212,211,210,207,210,212,211,214,215,217,220,223,223,222,214,201,192,184,174,181,189,200,215,220,225,226,224,225,223,221,220,217,215,219,218,221,221,219,222,221,222,222,222,221,221,222,220,220,222,222,222,223,221,224,220,223,222,215,207,146,55,119,217,252,230,158,159,155,170,165,162,154,175,200,164,171,183,130,62,11,2,24,38,30,28,33,25,29,35,33,54,59,53,30,64,124,199,239,214,229,235,230,214,204,229,227,158,196,211,166,206,199,158,152,212,230,197,185,171,155,141,184,170,167,174,198,252,116,112,181,164,167,206,224,240,182,66,49,71,116,73,32,40,13,61,222,239,216,154,107,213,251,163,117,173,206,189,164,149,213,249,238,242,171,184,228,200,221,127,76,180,211,216,241,231,237,228,252,228,187,162,28,2,16,14,42,45,36,17,24,39,52,54,63,23,67,208,252,203,145,110,57,69,65,82,71,80,89,91,142,132,136,120,33,15,19,19,17,35,86,78,42,41,85,83,41,29,51,92,126,146,144,151,171,190,200,202,200,191,179,168,142,123,102,72,59,40,31,25,16,22,22,16,18,22,15,21,20,22,18,17,23,15,21,20,23,14,27,10,48,144,136,112,205,207,135,139,133,142,147,201,237,128,40,23,8,9,14,13,17,14,15,15,14,17,18,14,17,19,14,16,15,21,20,14,19,21,24,28,28,35,37,46,46,49,59,56,57,65,67,70,71,61,52,49,57,69,64,67,62,46,29,26,39,35,31,28,39,38,53,78,76,66,41,24,24,34,36,46,61,70,77,89,88,83,86,95,106,96,94,108,97,95,109,95,66,55,42,45,61,73,179,242,249,249,237,236,229,231,231,226,226,231,231,229,230,229,230,233,230,229,229,228,229,226,228,231,229,230,228,228,229,228,231,228,230,231,229,231,231,231,232,232,231,231,231,231,230,231,230,230,229,231,230,229,229,229,229,229,229,228,230,229,226,118,3,1,6,9,9,9,10,10,11,12,11,11,191,193,193,192,194,193,188,192,190,190,191,191,192,188,190,188,188,191,193,189,192,193,193,193,191,196,193,191,193,193,194,191,188,192,194,192,191,192,193,191,193,193,193,193,192,196,192,193,193,191,194,193,194,192,191,193,193,193,193,196,193,195,195,192,195,193,194,196,195,193,194,194,194,194,195,196,193,198,198,196,196,197,196,196,198,195,198,196,195,198,198,195,196,194,196,198,193,197,196,195,195,194,198,196,196,198,198,198,198,197,197,196,196,199,196,200,200,199,199,198,200,198,198,199,198,200,199,198,198,200,200,200,203,199,200,201,200,199,202,200,199,199,200,200,202,202,199,203,199,196,201,198,200,201,200,201,200,202,202,200,200,202,203,201,202,200,200,200,202,203,202,201,201,200,202,204,202,203,205,203,202,201,201,202,203,200,201,200,200,201,200,203,203,200,200,201,199,200,200,199,202,200,202,202,200,200,200,200,200,199,200,199,200,201,200,199,199,200,200,199,199,201,203,201,201,200,200,200,201,202,202,203,199,202,201,200,201,202,205,205,205,205,205,204,206,204,205,205,204,204,202,204,203,203,204,206,204,206,206,208,207,203,191,100,23,77,181,214,214,203,201,211,210,214,210,207,211,212,213,216,220,221,219,222,212,199,192,181,177,180,186,196,210,216,223,227,223,225,220,220,219,219,220,217,219,219,218,218,220,222,222,221,220,220,221,219,220,221,221,222,223,222,221,222,223,221,224,220,224,218,212,207,126,61,135,217,252,229,155,157,160,173,163,166,184,179,179,136,156,162,163,156,110,123,130,100,29,21,47,33,33,29,43,80,67,67,74,99,173,231,234,208,219,230,220,206,209,234,223,148,179,186,163,203,180,106,107,187,160,160,153,162,186,189,231,212,217,197,232,217,58,136,191,161,165,199,232,240,183,68,43,68,125,79,32,50,18,66,233,241,214,156,103,210,200,122,129,196,198,150,146,164,236,245,236,240,168,198,207,187,224,189,90,116,196,206,229,227,229,240,253,220,127,66,1,15,21,22,53,33,20,22,27,50,51,66,29,42,186,245,249,198,143,115,56,84,84,80,71,66,50,67,138,137,151,100,25,22,14,22,22,19,53,82,93,103,96,79,77,110,141,160,182,195,191,190,189,164,137,107,71,47,33,32,69,20,22,22,15,19,16,13,17,15,19,22,24,30,29,23,24,23,16,23,23,23,22,21,22,20,18,22,23,71,94,148,228,197,135,141,139,151,159,224,216,96,34,18,7,16,16,17,17,16,16,16,17,20,20,15,18,21,15,16,19,19,19,16,15,15,16,16,13,15,18,17,21,18,27,31,28,42,42,50,55,60,66,65,71,81,81,74,65,49,40,39,42,41,41,44,43,33,59,83,77,65,36,36,61,57,54,64,66,49,37,44,54,75,92,93,87,97,98,102,88,59,41,39,41,45,63,87,84,107,213,244,248,246,235,235,235,229,230,231,230,229,230,231,231,230,230,229,231,229,228,228,228,230,228,228,231,230,229,229,232,232,231,231,231,232,231,233,236,234,234,234,233,234,232,232,234,231,232,233,234,235,229,233,231,229,232,231,232,232,230,230,227,117,4,0,4,8,10,9,10,10,11,11,11,11,193,193,194,190,192,192,188,193,190,190,190,192,191,189,190,190,190,191,193,192,193,192,191,191,191,193,191,192,193,193,193,192,192,189,191,192,191,192,193,190,191,191,193,193,191,192,191,193,191,192,193,195,195,192,192,192,193,193,193,194,192,193,194,195,193,192,194,193,194,194,193,193,191,192,191,195,193,193,196,195,196,194,195,194,195,197,197,195,196,195,194,193,195,195,195,195,196,196,193,195,196,196,195,196,198,196,194,196,198,195,198,196,198,199,197,200,198,198,199,198,196,199,198,198,198,198,201,200,199,199,198,199,199,200,203,202,201,200,201,200,199,200,201,202,201,199,201,199,201,201,198,201,197,200,200,199,200,199,201,200,200,201,201,201,199,199,199,200,200,200,201,201,200,202,202,202,206,202,200,202,200,202,201,202,200,199,204,200,202,200,201,202,202,200,200,203,199,202,199,199,200,200,203,199,200,201,199,201,198,198,200,196,198,198,198,201,200,199,199,199,201,201,200,199,199,199,201,201,199,202,200,198,201,201,201,202,200,203,201,201,204,202,204,202,202,205,204,202,205,201,201,203,202,201,200,204,205,205,204,208,206,202,212,143,115,186,217,223,211,199,208,208,213,211,212,214,214,220,219,220,219,213,201,190,184,176,179,186,194,208,217,220,224,223,222,220,217,217,216,215,214,215,215,216,219,219,221,220,220,222,220,221,222,219,220,222,221,222,221,222,223,223,219,221,223,218,222,217,225,218,215,192,119,66,143,223,250,225,172,178,172,188,170,169,152,124,156,133,145,166,183,203,196,223,213,148,51,40,57,29,33,29,53,109,83,120,127,132,217,250,240,198,212,229,212,206,212,236,224,163,191,185,193,170,152,145,122,183,182,196,193,211,215,232,230,213,205,166,201,132,49,178,198,159,158,197,241,234,174,60,39,50,104,75,29,46,19,66,236,246,201,162,144,220,206,167,171,179,191,160,117,175,220,221,231,224,174,198,207,195,235,240,130,68,142,158,198,223,235,250,253,202,63,10,3,22,28,39,46,21,22,21,49,55,61,34,31,160,247,247,248,206,155,107,41,61,53,76,72,76,55,70,132,122,141,81,20,22,18,44,50,61,70,94,139,165,167,141,141,141,139,121,104,93,63,51,37,29,24,15,16,16,15,17,17,14,21,20,13,17,22,19,22,29,34,40,43,46,48,43,37,27,22,29,18,17,27,19,24,17,23,27,23,23,50,177,250,196,142,146,143,156,185,243,188,67,31,14,15,20,15,24,20,15,19,22,18,15,20,18,17,15,17,18,19,19,17,17,16,17,16,16,15,14,13,15,15,14,16,15,17,18,16,21,22,25,36,33,39,49,58,61,60,69,78,84,78,72,69,54,57,41,45,72,64,53,50,76,83,66,79,83,75,55,26,27,44,38,32,40,55,78,98,105,90,63,48,55,68,93,121,128,95,130,240,244,248,241,234,237,231,231,232,232,231,232,231,230,231,231,230,231,231,229,228,228,231,231,230,229,229,232,231,232,232,231,231,232,233,233,233,235,236,234,234,236,234,236,236,235,234,235,234,236,235,234,232,233,232,229,230,229,230,232,231,231,226,117,4,1,4,8,9,9,10,10,11,11,11,11,190,193,194,189,189,190,188,193,190,189,191,190,191,188,191,191,189,191,193,190,193,191,191,191,189,194,192,193,192,192,191,191,193,191,192,192,190,191,193,190,191,191,190,191,190,194,191,192,193,191,193,191,190,193,193,194,193,194,193,193,193,192,193,191,193,194,191,191,192,194,193,193,194,191,191,193,191,191,191,189,193,194,191,194,194,193,195,193,194,193,194,194,194,195,195,195,194,195,193,192,194,193,194,195,193,196,195,194,195,193,197,195,195,198,196,198,196,196,200,196,197,198,198,200,199,200,198,199,200,198,199,196,199,198,197,198,197,200,200,200,202,202,200,201,201,202,202,200,200,199,201,199,203,200,199,200,199,199,199,199,200,199,199,199,200,200,198,199,198,196,200,199,197,200,201,202,200,200,201,201,201,200,201,199,200,198,201,200,199,200,199,202,200,199,200,200,199,201,200,199,200,200,199,198,198,199,198,197,198,199,199,198,200,196,196,198,198,198,196,198,199,200,199,199,200,198,199,201,198,199,199,198,200,201,201,202,199,199,201,199,199,200,203,200,199,201,204,203,203,202,201,202,201,204,203,202,202,205,204,207,202,207,222,194,204,233,231,217,204,202,210,217,218,228,230,225,223,213,208,200,190,185,176,174,182,193,206,212,216,219,220,220,218,214,217,213,212,216,211,214,215,215,216,217,217,217,219,220,220,220,221,221,220,221,220,220,219,222,221,220,221,220,220,221,221,219,219,218,224,213,210,189,117,79,157,229,247,203,166,168,169,201,179,146,113,79,138,145,158,167,173,184,163,177,175,136,48,46,68,31,40,28,69,97,56,158,193,185,234,234,234,198,224,236,214,213,221,242,229,170,170,213,235,179,199,166,151,215,221,239,233,222,213,225,179,183,158,166,234,133,112,236,220,191,178,212,251,239,162,66,39,38,87,78,24,31,25,46,218,246,210,160,174,248,239,174,157,200,240,163,106,194,246,230,248,221,170,209,208,212,246,252,199,94,76,132,213,239,252,252,250,121,19,8,4,25,42,42,24,16,22,45,56,59,44,22,140,241,251,251,251,241,196,122,30,38,34,54,54,59,40,83,135,112,136,66,32,67,100,155,169,173,171,150,143,136,107,71,39,34,33,24,23,19,12,18,19,14,19,12,12,18,15,17,26,22,20,22,22,21,22,24,31,40,47,43,40,51,53,47,41,28,28,20,18,25,18,22,22,15,31,24,26,24,32,177,234,180,146,144,147,161,196,222,134,34,22,15,21,22,27,20,23,23,19,26,21,16,14,19,19,15,15,17,18,24,22,19,19,18,22,19,16,18,14,19,20,17,17,16,24,21,16,19,16,17,17,16,22,19,23,27,34,39,48,68,79,88,101,102,89,80,75,92,99,89,80,76,61,48,68,77,61,38,45,73,65,40,23,24,19,45,83,97,101,83,110,127,129,142,146,123,91,181,247,247,243,239,235,235,231,231,233,233,231,231,232,232,231,231,231,231,231,230,229,230,230,232,229,228,230,228,230,231,231,231,231,233,232,233,236,234,233,234,234,234,233,235,232,232,233,232,233,231,233,235,229,233,231,230,231,228,230,230,231,231,229,118,2,1,5,9,9,9,10,10,11,12,12,12,190,193,193,190,191,186,190,191,189,190,190,191,188,192,188,190,191,189,191,188,193,192,191,193,192,191,192,193,191,192,191,191,195,189,190,191,191,189,190,191,191,192,190,192,191,189,192,190,191,194,191,193,191,189,195,190,191,191,191,192,191,194,191,191,194,193,194,191,193,193,190,194,192,193,193,193,192,191,193,192,195,194,194,192,194,194,193,194,195,196,197,193,196,194,193,195,193,193,193,194,192,193,192,194,195,193,193,194,194,192,195,195,195,195,194,196,195,197,196,196,197,196,197,196,195,198,196,195,196,196,199,199,198,198,200,199,200,198,201,201,200,203,200,203,202,200,202,199,200,198,198,200,199,202,200,200,199,199,198,198,200,199,201,201,199,199,199,200,200,199,199,199,199,201,200,199,200,199,198,199,200,199,199,199,199,199,199,200,199,200,198,198,203,199,198,199,198,201,202,200,199,199,198,198,198,199,198,198,198,198,197,195,199,196,194,195,196,196,198,195,196,198,194,199,198,196,198,198,198,199,197,198,200,199,200,199,198,199,200,200,200,198,200,201,200,202,201,201,203,200,202,201,201,200,202,203,203,204,205,208,198,209,209,191,216,230,218,209,197,207,224,228,248,249,244,224,198,193,185,179,178,183,190,200,212,216,220,218,219,218,215,215,214,213,213,212,212,214,213,214,214,214,217,217,216,220,223,222,222,222,223,222,222,223,222,224,222,224,225,223,226,226,228,229,227,229,229,226,229,219,213,185,145,128,182,239,238,184,143,121,157,205,174,166,149,92,130,162,175,176,178,162,120,128,142,133,55,53,71,41,43,32,52,46,31,192,251,243,253,253,251,230,249,252,236,241,249,252,238,119,125,224,252,210,214,184,167,229,250,250,236,193,215,230,188,245,207,222,252,174,208,250,252,229,196,235,252,252,190,65,46,53,100,85,18,35,26,51,200,245,237,147,198,248,252,186,185,250,245,131,120,246,252,252,252,227,211,225,215,234,251,251,245,149,83,137,230,246,253,253,146,31,6,9,12,38,44,25,18,13,39,55,61,47,23,121,240,247,251,251,252,252,207,112,46,50,45,55,55,44,44,111,136,120,146,141,154,167,162,175,156,128,93,55,36,25,33,18,15,16,19,22,14,18,14,18,21,19,27,24,28,27,24,23,24,28,22,22,30,26,30,37,45,50,42,47,48,39,46,42,36,24,14,17,23,20,22,17,23,22,32,27,29,27,25,184,235,160,148,143,154,171,200,190,82,11,15,17,22,27,25,27,21,28,31,29,30,20,19,18,17,16,19,19,16,18,22,27,23,22,21,17,20,22,21,27,31,31,27,31,32,29,32,25,21,18,16,16,15,15,16,18,18,20,19,25,34,35,42,57,69,82,102,119,125,123,101,77,57,45,68,72,61,56,78,89,62,34,28,31,18,56,96,106,102,90,121,132,142,132,133,104,103,237,248,243,245,234,238,234,230,236,234,232,231,232,232,233,233,232,233,233,233,232,231,231,229,229,229,228,228,228,230,230,231,231,232,231,230,232,235,233,233,232,235,233,232,234,233,234,233,234,232,231,229,232,231,229,231,229,230,230,231,233,231,231,229,118,3,1,5,9,9,9,10,10,11,12,12,12,190,193,192,190,193,192,190,193,191,190,193,191,189,188,193,190,188,192,193,190,191,190,191,191,190,192,189,187,189,191,191,190,188,190,189,190,190,191,191,189,189,189,188,190,191,191,188,187,193,190,191,190,191,190,187,192,189,191,191,191,191,191,193,191,192,190,189,193,190,194,191,190,193,189,191,194,189,190,194,194,195,193,193,195,193,194,198,194,196,194,195,194,191,196,195,193,193,196,195,196,194,194,195,193,194,196,195,193,196,197,195,195,197,194,194,195,196,195,198,195,194,196,195,198,197,197,196,197,199,198,198,200,200,199,200,199,200,202,201,200,199,200,199,200,199,199,199,198,197,200,201,199,200,198,201,198,197,198,199,200,198,198,199,199,199,199,198,199,200,199,201,201,199,200,200,199,198,199,201,200,201,200,199,199,201,200,200,198,198,198,196,199,198,198,199,198,198,199,199,199,198,196,199,198,198,198,196,197,196,196,195,196,198,196,198,198,195,197,197,196,198,198,196,196,198,198,198,196,196,199,197,198,196,196,200,200,199,198,199,198,200,199,200,200,200,200,199,200,203,200,200,202,201,201,202,204,203,205,206,204,197,207,193,180,208,211,213,204,199,217,224,214,177,164,170,174,179,178,184,196,204,214,221,223,222,222,221,219,218,220,217,218,218,218,223,219,221,224,223,226,225,229,229,230,232,236,237,237,240,239,241,241,241,243,239,242,244,243,245,244,249,250,251,252,250,252,252,252,252,243,231,211,191,151,189,248,250,199,165,105,139,213,202,236,215,155,177,172,196,193,170,139,101,124,139,137,64,52,69,38,52,39,46,24,20,188,247,247,252,252,250,237,250,250,245,251,252,252,230,99,92,207,237,186,192,177,162,195,214,181,202,179,224,230,206,249,201,233,214,150,208,243,227,182,139,200,246,235,160,76,48,57,96,91,28,29,39,39,172,230,140,83,152,211,151,131,169,201,160,81,112,202,222,205,198,193,186,181,155,163,200,235,218,117,66,108,186,198,200,147,47,14,10,10,32,45,33,18,14,34,53,54,56,18,93,222,252,252,252,252,250,217,171,101,50,70,60,78,72,57,69,125,137,127,167,143,134,100,52,42,29,27,22,18,19,16,15,19,20,13,18,26,29,33,31,33,34,33,41,45,41,44,31,32,24,24,34,28,30,26,38,46,43,48,62,53,67,79,49,30,29,18,21,24,19,22,25,21,22,28,35,30,40,23,51,217,212,144,146,141,165,180,185,163,80,33,22,28,32,22,28,29,26,22,33,34,28,28,18,18,15,17,15,15,22,17,16,18,27,26,30,27,22,26,24,32,36,39,50,37,37,37,35,33,24,21,17,19,18,21,16,14,15,14,17,15,17,17,15,24,30,33,44,54,63,75,89,103,97,90,102,111,103,83,77,60,41,34,27,32,29,74,106,108,112,86,105,125,135,126,119,87,131,240,246,241,243,233,233,233,236,229,232,234,233,236,236,235,235,235,234,234,232,231,232,232,231,231,232,232,233,233,233,233,236,235,231,233,234,232,234,235,233,234,234,235,236,236,233,235,234,232,232,231,232,230,228,230,230,230,230,230,231,230,235,232,226,118,4,1,3,8,10,9,10,10,11,12,11,11,192,195,194,193,194,191,193,193,193,193,191,196,192,191,191,191,192,192,194,191,193,189,190,191,189,190,192,192,189,193,188,188,193,190,192,190,192,191,193,190,190,195,189,191,193,190,192,192,191,191,189,192,191,191,194,191,192,190,189,192,190,193,191,192,194,189,192,190,189,194,190,193,192,188,190,193,192,193,193,191,193,190,194,195,193,194,194,193,193,194,193,192,196,193,194,196,194,196,195,194,193,196,193,195,196,195,195,195,196,196,196,197,196,195,195,195,196,196,196,198,198,199,200,199,200,200,199,203,202,200,202,200,203,202,204,202,200,201,199,200,198,200,199,198,199,196,200,197,200,202,201,203,199,200,199,199,200,202,198,199,200,198,202,200,200,199,199,200,200,198,199,200,198,199,198,198,201,200,200,201,201,199,199,200,199,199,196,198,200,200,197,198,199,198,199,199,197,199,198,198,198,198,198,196,198,198,197,198,197,196,198,197,198,197,199,195,197,199,198,197,198,198,195,197,198,197,199,196,195,199,195,197,197,196,198,198,200,198,198,198,198,201,199,200,199,199,200,202,203,200,202,201,205,205,205,205,205,206,208,204,201,206,188,200,217,220,222,216,217,225,208,103,23,11,85,179,208,223,229,236,242,246,245,243,241,240,241,240,240,240,244,245,244,247,246,248,251,251,252,252,251,251,252,252,252,252,252,252,252,252,252,252,252,252,252,252,253,253,253,253,252,252,252,252,250,250,252,252,252,252,245,218,210,147,159,244,253,225,205,114,141,211,225,245,242,204,190,170,176,168,158,120,101,124,133,139,91,93,89,81,97,83,95,80,84,181,187,171,185,169,160,141,153,154,158,183,174,180,149,74,78,127,127,111,104,106,93,75,84,70,105,97,129,110,95,120,95,124,99,66,96,100,116,105,32,42,73,78,102,66,46,51,78,88,37,31,33,46,50,47,48,9,35,43,29,24,49,71,50,53,61,32,12,6,56,107,122,128,80,48,21,40,61,47,68,72,76,88,69,51,30,19,12,23,45,35,28,21,23,47,60,63,24,72,208,245,252,252,254,254,211,170,138,85,89,100,93,98,99,84,99,137,123,135,125,55,29,20,15,16,16,16,18,17,20,21,17,19,25,23,18,24,37,44,38,38,36,36,37,41,45,49,50,44,41,33,33,31,30,29,37,54,55,71,60,91,154,117,66,41,17,19,21,20,25,22,24,24,33,35,41,34,53,18,82,245,186,132,144,140,182,189,169,144,90,70,77,57,45,33,24,29,27,27,28,34,31,31,24,19,21,16,18,19,19,21,18,15,16,18,29,25,29,30,28,33,32,39,43,46,39,38,41,34,31,22,21,23,18,21,22,19,22,22,19,18,18,21,18,16,17,20,17,20,29,32,36,49,61,73,86,94,101,101,83,63,50,45,42,40,33,70,101,108,108,99,103,107,115,106,112,59,93,222,234,243,247,230,233,232,235,233,234,234,233,234,235,235,231,231,234,234,234,233,231,233,235,236,235,234,234,234,233,233,233,235,233,233,234,233,233,233,234,234,237,236,233,232,232,233,233,232,232,231,230,233,231,232,233,231,232,230,231,232,231,232,229,117,4,1,4,8,10,9,10,10,12,12,10,12,192,195,193,193,195,191,189,191,192,192,190,191,192,192,192,191,192,191,191,189,190,192,191,192,190,190,194,193,191,193,190,190,192,191,191,190,190,189,190,193,192,192,189,193,191,190,192,189,192,189,191,194,190,190,190,193,191,191,190,191,189,191,191,189,193,191,190,193,189,191,189,190,195,191,193,190,192,194,193,194,195,193,194,194,191,194,194,193,196,193,195,193,191,195,192,193,192,194,194,191,193,195,194,193,194,191,194,194,194,196,192,194,197,194,197,197,195,194,197,196,196,199,197,198,199,200,203,200,200,200,200,201,200,200,200,199,201,201,199,199,197,199,200,201,198,197,198,199,200,200,201,199,201,201,202,202,202,201,201,201,198,200,202,202,201,200,199,201,201,200,201,200,200,201,201,200,201,200,200,199,199,199,198,199,200,197,200,198,198,200,199,198,198,196,199,197,198,198,198,198,197,197,197,195,197,197,197,197,197,198,197,196,197,197,195,194,193,195,195,196,195,196,196,198,197,193,196,196,198,197,195,201,200,198,200,201,200,200,202,202,203,202,207,207,207,210,210,212,214,214,217,217,219,218,220,223,223,226,228,218,223,224,210,224,236,245,242,229,226,223,201,123,59,95,191,246,252,252,251,251,251,251,251,251,251,251,251,251,250,250,251,251,251,251,251,251,250,250,251,251,249,250,246,248,249,242,244,242,241,238,239,225,214,228,209,214,222,214,214,199,184,170,166,167,159,163,171,187,192,179,153,157,172,104,104,172,170,171,163,95,122,147,150,189,160,141,142,116,117,107,120,107,98,114,107,117,114,120,125,115,130,129,144,134,121,116,79,74,68,60,59,48,55,41,31,60,50,62,69,69,88,58,40,53,50,57,48,24,29,42,60,38,56,36,37,27,19,68,41,43,57,28,53,85,36,19,18,4,46,63,52,53,59,88,54,35,40,42,47,36,65,49,57,44,20,26,41,57,32,49,74,49,12,10,26,78,110,126,137,71,20,9,16,37,64,67,84,72,47,45,23,16,23,42,43,27,25,30,51,53,63,32,50,185,248,248,252,252,252,181,142,128,94,95,122,142,142,142,137,120,130,141,118,130,92,21,14,17,14,19,13,14,20,19,26,28,27,23,27,28,31,30,26,36,43,41,41,50,56,48,45,52,46,50,53,50,46,30,37,35,36,47,50,50,44,99,159,116,69,43,16,21,21,24,23,24,29,21,33,44,46,45,51,17,97,226,144,129,147,175,217,178,154,141,103,92,112,106,64,36,36,26,27,33,32,30,27,24,27,24,18,24,24,23,24,19,23,20,18,16,16,23,26,27,25,30,27,33,49,56,46,37,44,41,35,28,24,29,24,20,22,20,22,24,22,27,24,21,22,16,17,19,18,19,21,16,17,23,24,29,33,45,63,76,89,96,93,87,77,67,53,81,103,108,119,94,76,72,73,66,61,18,96,233,238,247,242,232,238,231,236,233,232,234,233,233,233,234,235,234,233,233,235,234,232,235,233,234,236,234,234,233,233,232,232,233,233,233,232,233,233,232,232,233,232,234,232,232,231,231,234,230,231,232,230,232,229,229,230,230,231,231,231,230,231,230,227,118,3,0,6,10,9,9,11,9,11,12,12,11,192,193,191,192,191,190,191,191,193,192,191,192,191,188,191,193,187,188,189,191,195,190,192,191,191,191,189,189,190,189,189,191,190,188,191,191,188,188,190,189,191,189,187,190,189,191,190,188,190,188,189,192,189,188,187,186,191,191,190,191,190,190,189,189,193,190,191,192,189,191,189,191,192,191,192,191,190,191,192,192,195,193,194,194,194,196,192,192,193,193,195,193,195,193,194,194,193,194,192,195,195,194,194,194,195,193,194,196,194,195,195,196,195,195,196,195,196,194,196,198,196,197,198,196,198,201,200,201,202,203,202,199,199,198,200,200,199,200,196,196,197,197,198,196,196,196,199,198,200,201,199,200,200,201,200,201,200,201,200,205,201,199,204,199,199,198,200,200,199,201,201,203,202,201,200,202,199,196,199,199,199,198,199,199,199,200,198,196,198,197,198,198,196,197,198,198,197,197,198,198,197,199,198,197,198,199,195,195,198,198,198,196,195,194,196,195,196,196,193,196,196,196,195,194,196,194,195,194,196,202,201,210,217,217,220,220,222,224,228,227,226,231,230,235,236,235,237,237,241,242,245,245,243,244,245,246,247,248,246,233,236,225,222,228,219,229,203,193,203,219,234,203,200,227,246,242,227,218,210,207,194,190,184,184,183,178,177,177,173,169,167,166,168,168,159,157,152,150,146,145,146,136,137,134,130,125,121,117,113,111,107,93,100,137,101,85,100,95,96,89,81,71,66,64,64,73,66,61,67,65,67,68,81,70,73,70,62,80,79,69,87,71,64,69,59,79,77,69,74,69,83,84,87,93,89,98,104,119,107,112,119,109,116,113,104,96,71,63,65,63,61,57,56,50,45,55,52,61,79,99,103,78,59,68,58,66,61,47,64,69,83,65,69,49,71,45,31,67,53,51,53,54,80,105,60,65,60,24,61,83,72,59,62,97,63,40,46,42,39,49,83,74,87,71,55,54,70,62,61,103,78,56,50,16,69,63,63,133,142,160,85,31,20,20,53,81,96,75,42,34,17,23,45,42,31,21,29,51,50,62,39,39,165,243,252,252,252,252,160,90,81,65,44,87,139,148,162,166,173,155,156,139,110,126,67,16,18,17,19,20,23,29,29,29,34,34,23,17,18,21,29,29,26,25,37,55,61,64,60,49,49,54,57,54,49,55,70,67,60,48,34,32,37,63,79,97,92,61,45,27,21,22,24,26,26,26,23,27,38,44,50,33,49,19,110,184,100,132,156,223,250,162,144,141,110,100,95,74,64,39,23,29,36,35,35,34,29,33,31,31,36,34,41,41,34,29,21,21,21,19,19,17,19,22,16,25,23,39,56,49,56,46,45,42,39,35,24,27,28,27,24,26,29,21,28,29,22,26,25,24,27,18,18,25,17,19,21,17,19,22,20,19,23,27,40,50,62,84,103,109,104,124,112,102,112,85,59,56,57,57,50,24,159,239,249,249,238,233,236,231,233,232,232,233,234,233,232,235,235,233,235,234,235,236,235,235,233,233,235,234,233,233,235,236,235,234,234,232,233,233,234,233,232,233,231,230,229,232,233,231,232,229,232,231,229,230,228,229,228,229,230,226,229,232,231,228,227,119,4,1,5,8,9,9,12,10,11,12,12,11,191,194,194,195,194,189,191,193,192,191,192,193,190,187,192,192,190,189,190,190,188,188,189,190,188,187,189,188,188,190,188,187,191,190,187,187,188,188,188,189,189,188,186,188,188,188,190,188,191,189,189,189,190,190,189,189,189,190,190,189,187,187,189,189,191,190,188,190,188,189,189,190,190,190,190,190,192,189,189,191,192,193,193,194,192,192,193,193,195,192,193,191,193,194,189,195,193,194,196,193,196,196,196,194,196,196,196,195,196,196,192,194,196,194,194,196,199,198,197,199,198,197,199,201,200,199,199,202,203,201,200,202,200,200,199,199,198,197,195,199,196,196,195,196,196,196,200,200,199,200,202,200,201,200,199,200,201,202,201,200,200,201,201,200,201,200,201,201,201,200,200,200,200,200,200,200,200,199,198,199,199,199,198,198,199,198,197,195,198,198,193,195,197,196,198,198,198,194,194,196,196,194,197,195,198,196,195,196,196,198,197,199,194,198,197,195,196,195,197,195,194,196,194,193,194,193,195,192,200,207,214,239,240,241,242,244,247,243,245,242,244,243,241,237,237,235,232,231,226,223,222,217,217,210,204,204,201,200,197,185,188,196,199,164,127,139,127,132,141,146,158,150,157,155,144,129,116,107,102,97,87,81,84,76,77,79,71,78,73,70,66,61,64,62,61,93,51,49,51,50,54,55,48,48,51,41,49,48,46,46,46,38,49,118,82,51,60,58,70,63,86,84,90,93,91,99,85,60,55,63,57,66,66,53,71,66,56,60,63,57,72,61,47,62,47,61,63,55,74,63,68,81,68,83,78,88,85,83,91,90,84,74,84,83,87,88,77,75,69,74,77,70,78,75,57,64,51,77,81,62,86,87,79,81,53,88,74,72,71,65,65,57,75,45,64,54,49,82,66,45,42,50,72,107,72,56,41,15,73,102,75,81,71,99,68,45,53,42,39,44,50,27,64,72,63,45,65,77,73,112,63,44,32,20,69,72,30,70,127,127,162,94,36,23,12,56,76,55,34,31,22,41,53,33,21,27,47,49,61,45,27,137,244,246,252,252,253,170,131,124,135,115,59,65,80,108,105,134,130,131,150,124,109,122,60,14,22,24,19,20,29,35,31,34,37,42,35,24,19,16,19,18,22,24,16,32,48,53,57,60,61,60,48,57,54,55,91,118,113,92,70,66,86,103,104,73,43,33,27,25,16,32,48,46,37,31,35,38,46,47,50,49,47,30,109,146,96,161,193,234,234,138,144,130,79,60,33,43,49,33,23,30,43,37,43,39,49,53,38,42,41,39,30,31,44,39,35,29,26,23,18,22,15,18,17,19,36,55,61,62,78,63,48,47,43,36,31,34,35,33,34,30,32,33,31,31,32,40,40,35,27,27,26,21,20,23,23,23,20,18,15,16,18,17,21,24,25,31,52,63,72,88,91,96,102,85,61,56,59,70,60,59,193,240,249,249,239,236,236,234,236,231,234,235,233,234,236,236,235,235,233,234,236,234,233,236,234,236,236,234,237,237,235,234,236,234,234,236,233,235,235,235,234,233,231,231,232,233,231,231,233,229,230,231,230,231,231,232,230,231,230,227,229,230,231,231,227,117,4,1,4,9,10,9,11,10,12,12,11,11,193,196,193,194,193,192,190,194,191,191,192,193,192,189,191,190,189,190,189,190,190,189,193,190,191,190,188,190,188,189,190,189,190,190,189,189,189,188,191,187,189,188,186,190,189,189,189,188,191,190,189,191,190,193,190,189,191,189,190,191,187,188,189,189,193,189,189,190,190,191,188,193,189,191,191,190,193,189,191,193,193,192,194,194,191,193,193,194,194,194,194,193,194,192,193,193,194,195,196,196,194,197,197,197,198,196,196,198,198,196,198,196,196,197,196,198,198,197,199,198,198,202,201,200,202,201,200,199,200,202,201,204,204,200,200,200,198,198,199,199,197,198,202,199,200,198,200,200,199,199,200,201,200,201,199,201,203,202,201,200,198,199,201,199,203,203,201,202,198,201,199,196,197,199,199,199,198,200,200,197,199,196,196,198,197,198,196,197,199,195,198,198,195,196,195,196,194,192,196,194,196,195,194,194,194,196,194,195,193,193,196,194,196,196,195,197,196,196,195,194,194,193,197,193,192,193,193,192,206,205,190,198,198,193,191,188,186,182,179,173,171,168,157,154,153,147,141,132,124,121,118,115,114,111,103,100,98,101,107,94,101,141,172,107,40,58,67,81,71,65,72,65,70,69,65,62,64,61,61,63,56,63,55,60,67,62,68,68,69,62,60,57,52,55,50,55,51,49,49,50,57,50,56,56,50,55,54,54,53,53,53,46,61,119,91,52,55,55,62,69,89,102,107,107,106,113,99,70,61,57,67,59,66,65,57,66,61,59,56,56,61,58,52,57,53,54,59,53,59,55,59,55,53,54,54,51,52,54,51,53,51,50,53,53,52,58,58,55,55,59,51,59,57,52,46,45,46,51,73,53,39,62,69,68,59,73,76,61,54,54,48,37,57,44,59,36,61,103,61,33,24,44,49,87,70,48,41,8,54,92,82,96,77,98,73,42,52,42,41,49,32,19,56,75,55,22,66,57,62,98,46,41,23,14,81,45,36,25,18,72,78,127,93,31,19,42,60,41,31,35,45,55,37,24,25,39,56,55,50,25,113,235,250,250,252,252,174,155,211,246,251,216,69,9,8,15,41,49,51,68,113,111,110,114,46,19,25,15,27,26,19,24,26,29,31,43,52,38,24,22,22,17,23,21,19,18,20,38,42,50,54,49,56,59,69,80,93,93,117,123,103,111,113,115,98,61,37,18,22,18,27,69,71,66,77,90,74,54,63,66,59,46,48,46,116,123,108,200,235,253,207,117,147,117,51,31,48,70,53,35,24,22,36,48,57,51,47,58,54,51,48,43,38,36,42,41,35,44,37,25,26,23,24,22,21,23,36,56,63,69,71,51,33,32,29,29,42,48,42,41,41,42,42,42,44,46,43,40,44,44,42,36,31,34,30,26,27,20,21,22,18,24,22,22,18,18,22,18,23,25,33,47,59,84,101,89,65,69,87,103,102,84,181,239,249,249,242,234,236,236,234,234,234,236,235,235,234,234,234,234,234,233,234,236,233,235,236,236,238,236,234,235,234,235,234,234,236,235,236,236,232,233,233,234,232,233,234,230,232,233,233,232,232,231,230,231,230,231,228,229,232,229,230,232,231,230,227,118,4,1,4,8,10,9,10,10,12,12,12,12,191,194,194,193,192,190,189,191,191,192,189,192,193,189,191,191,191,190,188,189,191,191,190,191,193,192,191,191,192,190,188,189,190,189,188,190,189,188,189,189,187,187,190,191,190,186,189,189,186,189,188,189,188,187,190,189,189,188,188,191,191,191,193,191,189,191,190,190,191,191,191,191,191,192,190,190,190,190,191,193,193,191,192,193,191,193,195,194,195,194,195,196,195,196,195,198,194,195,195,196,196,197,201,197,198,199,198,198,197,199,199,199,198,198,200,196,200,199,199,202,199,200,199,200,202,202,201,201,201,203,203,202,200,200,199,199,199,198,200,200,199,200,198,199,200,200,200,199,199,199,199,199,199,200,200,201,200,201,199,201,200,199,201,200,201,199,200,199,200,198,198,198,198,198,199,200,198,198,199,199,196,196,200,197,195,196,196,196,194,194,195,196,193,194,195,193,195,193,194,196,194,194,197,195,195,194,194,194,194,196,194,194,190,194,191,194,195,193,195,193,196,193,191,192,196,195,194,191,198,153,71,62,57,51,49,42,44,35,39,37,36,35,31,31,31,26,24,25,24,25,26,27,24,20,25,28,33,35,37,34,37,108,154,84,16,30,43,49,49,43,44,43,48,48,52,48,52,48,50,52,51,55,53,49,50,54,51,57,50,57,53,44,54,48,51,52,53,49,50,52,49,53,48,50,53,48,49,50,50,45,48,43,34,77,65,44,43,36,48,41,42,68,53,50,47,55,53,47,40,39,45,46,39,45,48,40,48,39,42,44,36,40,38,37,37,43,39,34,43,37,36,42,36,38,30,36,36,36,37,33,36,35,36,33,32,37,43,38,39,39,40,35,33,36,32,33,24,45,81,43,37,57,57,48,36,51,55,51,42,51,30,39,42,31,53,14,36,70,54,21,17,27,22,47,36,55,42,3,27,60,78,105,70,97,81,44,57,39,54,46,19,24,60,73,37,10,36,35,56,71,16,35,19,18,53,21,9,23,19,14,40,49,108,75,36,59,33,33,34,42,53,43,28,17,37,55,53,54,23,89,222,244,252,252,252,185,164,207,249,249,250,204,68,19,6,16,33,62,43,22,95,106,118,110,34,23,23,23,34,19,20,20,19,22,30,39,46,53,47,32,24,20,23,26,21,24,19,21,19,29,35,38,55,56,59,72,88,76,66,80,93,117,125,125,141,104,55,23,17,18,68,93,42,95,155,141,92,48,45,49,36,41,72,61,130,159,168,230,234,252,176,115,153,100,48,79,138,100,86,41,37,22,18,24,36,50,53,56,59,59,57,59,49,40,46,43,34,36,42,38,25,27,21,22,21,17,31,35,44,49,50,44,39,30,30,33,31,38,37,38,38,46,57,60,60,55,47,47,49,49,49,50,46,41,44,40,37,38,28,27,31,27,18,25,21,21,24,17,16,19,30,33,68,94,98,98,83,110,139,149,141,117,139,191,241,245,236,232,237,233,235,233,233,233,233,233,233,233,233,234,233,233,235,231,233,235,234,236,235,236,236,236,236,235,237,235,235,236,234,236,233,234,234,233,235,231,232,233,231,233,231,230,230,229,229,230,229,229,229,229,228,228,231,232,231,228,227,119,3,0,5,9,9,9,11,9,11,12,11,11,187,192,188,193,191,190,192,192,191,190,191,190,193,191,193,191,192,190,191,193,192,192,191,191,191,192,190,191,189,188,190,188,189,190,186,188,189,186,190,189,187,190,188,191,191,189,188,189,191,188,188,189,188,191,190,190,188,189,191,191,189,192,192,190,191,190,191,189,190,192,188,193,191,190,193,192,193,192,193,195,195,195,194,195,197,194,194,194,193,194,193,194,196,195,197,196,199,196,195,198,197,200,199,199,201,201,202,200,199,199,200,201,200,200,202,201,200,200,201,200,200,200,200,201,203,203,205,203,205,205,204,201,200,200,200,201,200,203,203,201,200,200,200,199,200,198,201,201,200,200,203,200,199,201,199,201,200,200,202,201,200,201,200,198,199,198,198,199,200,200,196,199,198,198,198,198,198,198,199,195,199,196,198,197,194,195,195,196,196,192,196,195,194,196,194,197,194,195,197,192,194,194,194,193,193,196,190,193,196,192,196,190,195,197,193,196,194,194,193,194,193,192,194,194,195,194,195,193,198,117,6,3,5,6,10,9,10,8,10,11,10,11,10,10,10,10,10,10,10,10,11,11,11,11,10,11,12,12,10,12,10,32,77,46,6,11,9,21,25,29,32,29,34,32,35,32,36,35,35,36,32,37,35,32,38,33,34,35,35,34,31,37,33,32,33,35,36,33,33,34,35,35,30,30,34,36,33,32,32,32,35,31,24,45,53,34,25,27,29,28,32,36,33,31,33,28,32,35,26,26,27,27,33,33,31,34,30,29,35,29,29,31,30,26,26,29,31,31,28,28,30,25,30,32,27,28,25,26,27,30,29,31,31,34,31,25,29,29,29,29,27,29,30,30,28,35,19,35,75,36,41,46,56,36,27,42,53,32,16,24,10,29,23,26,29,24,65,43,21,14,27,54,58,73,31,31,28,30,35,37,73,104,72,91,85,45,55,47,64,78,43,35,84,96,87,74,78,86,120,128,95,123,114,118,132,103,131,120,127,108,84,174,159,155,101,27,31,28,41,50,44,24,21,33,49,54,64,30,73,207,247,247,252,252,186,164,216,251,250,250,245,143,73,61,47,53,75,90,32,41,119,105,131,99,27,31,20,25,36,26,24,17,20,24,21,29,46,50,56,49,31,33,39,36,29,27,23,24,21,21,26,22,29,26,46,71,88,91,87,91,112,146,138,157,160,131,78,37,21,26,120,88,45,113,120,105,69,36,25,15,19,57,97,71,138,203,237,252,236,234,150,121,163,83,53,131,122,119,95,47,30,22,20,17,22,39,47,59,63,57,58,47,56,45,39,43,39,40,34,46,45,38,27,21,22,19,22,20,29,35,51,62,60,67,67,57,49,48,46,46,41,57,71,64,60,53,55,54,47,45,51,53,50,55,55,52,51,52,46,32,22,21,19,27,23,29,25,17,22,18,30,39,77,99,106,103,107,159,164,163,155,124,101,82,144,206,228,239,236,234,233,236,236,234,235,233,232,231,233,235,234,236,235,235,234,235,235,235,237,236,233,236,236,234,235,236,236,235,234,233,235,234,232,233,232,231,232,232,232,230,231,230,228,231,229,230,231,228,229,229,229,228,230,230,230,229,226,119,3,1,5,8,9,9,11,10,11,12,12,11,188,191,191,189,191,190,189,190,191,191,190,192,191,190,190,190,190,190,190,192,192,188,192,192,191,191,190,189,191,190,189,189,188,189,188,187,188,189,189,191,188,190,189,187,190,188,191,191,191,190,188,191,189,189,192,188,193,191,189,191,191,191,188,193,189,190,193,190,194,192,192,193,192,192,190,194,194,194,194,193,193,196,194,193,196,195,197,196,196,193,192,193,195,197,198,197,198,198,199,199,200,200,200,201,201,201,202,202,201,203,200,202,203,198,200,200,202,201,200,203,203,203,204,203,204,205,203,202,205,205,202,205,203,203,202,202,205,202,201,201,202,200,201,203,199,198,200,202,201,200,202,200,202,200,200,199,197,200,198,199,197,199,200,196,198,198,198,196,198,198,196,196,198,196,198,198,197,198,198,197,196,196,196,195,196,195,195,195,198,196,192,194,192,192,194,194,196,193,194,194,193,193,193,192,193,195,193,192,194,192,193,197,193,195,194,195,194,192,196,193,193,192,192,193,193,194,193,196,202,145,75,63,46,54,53,48,49,39,43,38,36,33,31,29,23,21,21,21,17,14,16,16,12,12,12,12,10,11,11,12,10,15,51,42,10,11,10,12,12,17,24,24,30,29,29,29,31,31,28,33,29,31,30,34,31,31,29,29,26,24,27,99,19,14,16,13,12,13,13,13,13,14,13,14,13,13,14,15,15,14,13,14,15,15,14,15,13,15,17,15,14,13,14,14,15,14,14,15,19,22,32,56,50,36,45,55,56,46,32,30,35,39,57,50,22,29,28,30,36,39,51,47,39,44,58,60,41,35,39,33,33,44,34,62,83,53,36,122,41,130,57,129,54,134,123,145,144,134,144,122,81,104,124,122,108,126,160,132,102,140,168,191,191,206,224,210,185,110,177,236,246,238,236,188,146,213,232,249,189,89,57,78,59,92,86,46,43,57,212,251,214,171,177,243,243,201,151,143,223,247,245,251,252,226,246,243,250,252,252,252,252,252,250,153,49,38,15,31,43,47,25,12,23,48,48,66,36,47,194,245,249,249,252,188,155,203,246,249,253,253,208,99,82,81,82,91,86,121,53,87,140,106,136,84,20,22,24,28,31,27,25,18,22,26,23,29,34,46,58,54,51,54,59,59,55,51,43,49,62,55,56,60,56,54,113,153,134,123,98,105,130,118,91,92,86,71,63,61,43,69,157,108,83,106,55,42,42,32,18,14,38,94,95,72,128,234,234,234,236,234,164,146,168,71,67,153,107,114,105,52,25,9,21,15,15,20,18,30,43,45,46,54,52,51,44,34,37,39,39,44,51,51,39,29,35,45,45,30,40,55,56,66,76,84,75,59,59,61,58,56,55,59,61,54,50,49,56,62,63,45,33,45,60,62,51,39,36,38,30,26,20,15,28,29,23,29,31,24,17,18,24,44,83,94,104,103,108,146,152,150,127,103,75,20,73,191,235,246,235,231,237,234,235,236,233,233,235,233,234,234,233,234,233,234,236,234,236,236,234,236,235,236,234,234,235,233,233,236,235,235,235,234,234,232,233,233,232,231,231,234,231,229,230,230,229,230,228,230,229,229,230,228,229,227,230,231,226,118,4,1,4,9,10,9,10,10,12,12,12,12,189,193,190,192,191,189,190,191,193,191,191,191,191,188,191,190,190,191,193,192,190,191,194,195,193,193,191,191,192,191,191,191,191,193,188,187,191,189,190,188,189,192,188,191,190,191,191,191,194,191,194,191,190,192,189,191,190,192,191,193,193,190,191,193,194,194,193,193,191,193,193,193,194,191,194,194,194,198,193,196,194,194,198,197,195,194,195,195,197,196,196,198,199,199,200,199,200,201,201,201,201,205,203,199,201,202,201,204,203,201,203,203,206,204,205,203,203,203,205,206,204,204,205,204,203,202,207,206,205,203,206,205,205,202,202,205,201,203,203,203,205,202,202,204,201,200,203,201,201,200,202,201,200,202,201,199,198,199,200,200,199,199,198,195,198,198,198,198,199,198,198,198,198,198,196,197,198,198,197,197,198,196,196,194,197,198,195,195,196,196,195,193,193,195,193,195,195,194,194,194,193,193,195,192,192,193,193,196,195,193,195,190,194,195,191,194,192,193,193,194,193,194,199,194,193,196,193,198,214,208,209,228,228,231,231,229,230,229,229,227,227,224,223,221,222,221,217,217,219,216,213,214,210,208,206,206,202,194,195,194,183,151,159,174,174,183,181,184,177,177,175,173,170,167,165,160,163,162,160,158,159,166,166,166,174,173,181,180,185,184,188,196,194,199,200,204,206,207,209,211,215,217,216,224,225,225,225,232,232,231,234,234,223,170,192,231,236,244,243,235,214,213,213,209,212,208,219,235,243,241,246,235,122,106,211,250,175,83,61,139,222,248,239,173,165,206,217,238,247,250,250,250,240,242,241,163,78,75,64,33,44,50,59,208,248,235,249,241,252,252,249,248,249,252,252,252,252,238,246,217,125,112,164,173,122,128,225,232,217,252,253,253,253,253,252,237,176,153,251,251,252,252,252,238,227,252,252,252,220,117,69,65,35,72,89,48,35,70,230,245,246,226,218,228,251,226,140,186,249,250,252,252,253,224,242,250,246,252,252,252,252,252,210,92,28,2,22,37,48,27,15,22,35,52,55,54,19,84,239,243,249,236,168,167,210,239,251,251,250,252,143,27,69,71,71,78,80,101,53,115,147,107,138,70,15,27,24,33,29,24,27,21,19,27,18,20,31,36,40,44,54,64,71,67,83,86,109,139,127,122,128,142,146,163,184,162,145,121,100,113,99,72,45,36,36,42,39,56,73,100,131,87,74,60,28,55,45,20,22,26,93,115,93,49,108,240,250,248,252,246,180,170,160,52,72,150,108,106,80,42,23,12,19,17,17,21,24,18,18,18,19,43,56,50,47,43,46,47,40,43,46,43,44,38,46,68,70,61,59,62,64,66,73,68,72,76,63,54,47,51,53,51,63,65,66,62,57,57,53,38,21,16,22,32,32,22,15,18,19,17,19,18,24,27,28,27,29,32,24,23,22,46,89,89,97,95,84,120,127,118,86,88,137,164,222,247,242,243,236,239,235,235,236,235,232,234,237,234,233,235,235,234,234,234,233,234,235,236,236,236,237,238,236,233,234,235,234,236,235,233,235,234,233,231,232,233,235,232,230,231,231,229,229,231,228,230,228,229,230,228,230,230,230,230,231,231,225,117,4,0,4,8,10,9,10,10,11,12,12,12,188,192,190,186,189,188,188,189,191,190,189,191,190,192,191,187,192,193,192,193,193,191,190,189,190,192,190,190,192,190,190,191,191,191,192,193,191,190,189,191,190,191,192,191,192,191,192,192,189,191,193,191,191,190,197,190,190,190,191,192,188,193,188,191,192,192,195,191,192,191,190,191,191,192,193,193,195,193,193,194,194,196,195,195,196,192,194,193,196,198,196,197,198,198,199,201,202,203,202,203,205,203,202,203,201,199,203,202,203,204,202,205,203,203,206,204,205,206,204,205,203,205,205,204,204,205,205,203,206,205,201,203,201,204,205,203,205,203,204,203,202,200,201,201,199,200,202,200,199,201,199,200,202,198,200,200,200,201,196,201,199,198,200,199,200,195,196,198,198,196,198,199,195,198,198,198,195,196,197,195,196,194,196,195,195,195,195,195,193,195,193,193,193,193,195,193,192,193,193,192,193,193,195,193,194,194,192,194,194,193,193,191,195,194,192,195,194,195,195,192,193,194,193,194,193,193,193,196,205,208,223,241,243,248,249,251,251,251,251,251,252,252,252,252,252,252,252,252,252,252,253,253,252,252,252,252,252,252,252,252,252,252,252,248,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,252,252,252,242,252,252,253,253,253,253,253,253,252,252,252,252,252,252,252,252,249,243,113,118,243,246,237,203,203,247,253,253,252,252,252,252,253,253,252,252,252,252,252,252,249,182,85,81,64,29,37,41,61,217,252,252,252,252,253,253,252,252,252,252,250,250,250,241,248,230,103,92,168,173,60,47,232,247,224,252,252,252,220,229,186,176,141,155,251,251,251,239,252,198,195,250,246,252,184,99,75,75,43,61,87,46,41,53,207,247,198,212,122,162,241,177,165,207,234,234,223,251,216,163,208,213,214,227,252,252,243,206,97,13,4,5,25,50,32,19,16,40,50,57,52,38,63,145,244,244,239,160,152,219,247,252,250,243,248,181,83,60,89,75,53,77,90,74,55,133,127,113,132,53,20,25,29,34,33,29,21,18,21,21,19,26,21,28,29,39,54,61,76,83,138,173,157,169,170,157,155,147,141,160,158,118,87,73,68,68,53,45,39,28,19,21,33,46,53,63,64,62,67,62,73,74,32,23,32,89,161,113,71,41,48,186,234,252,232,187,122,101,86,26,38,91,68,77,67,39,21,10,21,14,19,21,21,25,22,24,20,16,26,34,39,29,26,28,22,23,28,34,35,29,31,46,56,51,46,48,58,72,72,53,56,81,75,59,59,62,63,57,63,69,69,64,55,41,28,23,20,17,16,17,20,18,17,20,15,16,23,19,27,32,27,34,40,36,30,23,27,61,89,97,103,97,69,74,81,76,33,103,244,244,247,247,241,241,239,238,234,234,235,233,236,238,235,236,237,234,236,236,235,235,234,235,233,232,235,234,232,234,231,233,233,231,233,233,232,231,231,232,230,232,233,233,231,231,231,230,228,230,229,229,230,230,228,228,229,230,227,227,230,229,231,231,227,118,3,1,5,8,9,9,11,9,11,12,11,11,185,190,188,188,190,188,189,188,190,188,189,189,189,190,189,190,189,191,189,188,192,192,191,190,190,191,191,190,190,188,190,192,191,190,191,191,191,188,191,194,191,192,190,192,191,190,194,191,192,193,192,191,196,193,194,196,192,195,193,192,192,192,194,193,192,192,191,192,191,192,194,190,191,191,194,195,193,195,195,194,191,193,194,193,195,194,195,195,198,194,198,198,198,199,197,200,200,200,203,204,201,201,205,202,203,203,200,205,203,201,202,201,205,203,204,204,202,205,203,203,205,205,207,206,206,205,205,205,203,202,203,201,200,200,199,201,200,200,203,199,202,200,199,200,199,200,200,199,200,198,199,199,198,198,198,197,199,198,198,200,199,198,197,198,198,198,197,197,198,196,197,195,196,194,194,196,195,193,196,196,194,193,194,194,194,193,194,195,191,191,196,192,192,194,194,193,192,195,193,195,193,192,193,193,194,193,193,193,192,193,195,189,192,191,193,193,193,194,190,194,192,192,195,191,193,193,191,192,195,196,200,202,207,208,206,209,209,210,213,213,210,212,213,215,216,215,217,219,222,220,221,220,223,226,227,230,221,222,230,231,236,234,234,229,229,235,237,239,241,244,241,243,245,246,246,246,250,250,249,251,251,250,251,251,251,251,251,250,251,251,251,251,251,251,252,252,252,252,250,249,249,249,249,248,248,248,247,246,246,245,245,251,246,215,239,251,244,246,244,246,244,247,248,248,248,247,249,249,226,148,214,174,51,83,201,245,244,251,247,251,253,253,252,219,212,219,196,191,189,196,200,183,164,179,175,94,51,68,60,50,61,38,34,160,204,209,222,211,244,245,252,245,252,252,251,251,232,208,223,212,187,200,245,229,146,95,209,193,149,203,198,170,152,209,196,189,144,188,251,239,239,212,243,171,169,227,220,251,163,91,77,75,65,59,99,46,39,66,224,248,174,124,82,175,204,122,138,198,227,226,217,230,181,163,186,193,206,223,251,251,215,106,19,2,6,17,44,34,24,17,28,54,56,59,20,107,184,210,246,239,163,145,212,250,252,252,243,239,147,152,134,121,117,49,57,59,91,83,57,128,119,118,121,39,19,31,22,28,31,28,24,18,23,24,25,20,19,22,24,40,46,54,67,80,167,170,160,198,168,152,119,98,105,130,125,89,77,59,56,51,33,29,25,16,14,22,21,29,33,39,47,29,35,49,46,39,18,48,142,190,175,97,43,35,16,112,239,239,183,99,45,22,19,15,14,38,50,66,56,28,21,16,12,19,23,23,21,20,31,28,23,22,21,17,19,18,17,17,16,14,19,19,14,21,22,21,34,33,33,40,50,67,55,43,57,71,73,55,36,33,54,68,59,44,34,30,24,17,19,20,16,19,18,21,23,22,21,17,17,24,22,19,32,38,43,56,49,45,36,23,36,71,103,103,107,107,63,49,73,76,26,98,235,239,247,247,239,241,236,237,235,236,234,233,236,237,236,235,238,236,234,236,234,234,236,234,233,234,234,235,233,233,233,232,234,231,232,232,231,231,230,233,230,232,233,230,231,230,230,231,229,231,230,229,229,231,228,229,231,230,231,229,231,233,231,231,228,118,3,0,6,9,9,9,12,10,11,12,11,11,186,190,188,188,188,185,186,186,185,186,188,191,188,185,188,186,189,189,187,189,188,190,192,191,192,192,193,192,191,191,192,190,190,192,193,192,190,191,191,193,193,191,192,191,192,193,191,192,192,192,194,194,196,193,194,193,194,194,192,194,193,197,195,195,194,194,193,192,195,193,195,196,195,193,193,196,195,196,198,195,195,195,195,195,195,192,194,196,197,195,196,199,197,199,199,198,199,200,201,200,201,201,202,201,203,204,203,201,202,203,201,203,203,202,203,203,203,202,205,204,204,204,203,205,203,204,204,204,204,203,202,201,202,201,200,199,199,198,199,200,199,197,201,201,198,199,200,199,200,200,198,198,197,196,198,198,199,200,199,198,194,196,198,194,196,194,196,196,196,196,195,196,195,195,194,197,195,195,194,193,193,193,194,193,193,193,192,193,193,194,194,194,194,191,194,193,194,193,193,194,194,193,192,192,191,191,190,191,190,191,194,192,192,192,191,193,191,191,193,190,191,193,194,193,190,190,192,193,194,194,191,195,194,195,195,194,193,193,196,194,197,196,194,198,196,197,197,198,199,198,198,198,200,199,201,203,194,198,203,206,207,206,200,200,207,206,205,206,207,208,209,212,211,210,214,216,213,214,216,215,216,217,218,218,220,220,218,220,219,218,221,221,222,221,221,221,220,222,222,221,219,220,220,220,221,221,221,219,220,222,218,221,224,203,217,227,220,222,217,219,218,219,221,220,216,218,223,222,178,74,152,174,92,83,165,229,186,186,177,165,156,157,153,124,130,128,115,100,89,108,116,92,84,102,108,90,110,140,129,139,145,127,75,57,55,39,42,36,48,54,56,65,99,150,168,149,128,137,156,186,207,244,252,252,247,127,132,122,65,122,180,174,200,228,207,215,134,198,251,234,241,205,244,171,168,219,220,251,160,88,70,73,62,53,97,52,33,67,232,247,170,142,120,201,156,59,117,174,229,225,221,218,161,190,189,179,208,238,251,251,145,44,6,1,11,29,42,30,21,32,48,56,62,29,70,217,230,219,249,167,138,207,252,252,252,248,241,146,132,245,211,141,112,62,36,34,64,36,59,131,95,124,107,29,28,24,29,36,34,27,22,22,19,26,24,21,20,21,21,32,41,46,59,45,108,113,138,203,148,114,92,78,95,123,111,74,83,76,66,50,32,31,21,18,24,21,19,18,21,23,23,26,22,20,21,21,26,131,211,199,145,63,36,27,9,56,152,106,100,38,35,72,63,33,45,83,75,60,52,50,40,21,21,20,22,24,26,29,22,31,31,25,27,26,24,24,21,22,22,16,17,21,22,19,17,17,19,31,50,38,47,58,41,35,48,69,74,56,33,15,23,35,34,29,22,19,20,19,18,23,22,20,23,24,25,19,25,42,45,33,21,23,43,58,58,57,54,48,39,25,46,88,101,105,106,105,69,60,86,98,53,62,205,232,243,249,238,239,231,237,236,235,238,235,234,237,236,234,236,235,236,236,234,235,234,234,234,234,235,233,235,234,232,233,236,235,233,233,231,231,234,233,233,233,231,231,231,230,230,230,230,231,231,230,231,228,232,229,229,231,227,231,232,230,231,230,228,118,4,0,4,8,10,9,10,10,12,11,12,11,185,188,187,188,187,185,188,187,188,186,186,186,185,188,188,188,187,187,189,189,189,189,189,191,191,191,191,189,192,191,191,191,191,191,191,190,192,191,191,191,191,191,192,193,193,194,196,191,191,194,191,192,196,191,193,194,191,195,193,193,194,194,195,194,194,193,193,194,197,194,196,195,194,194,195,197,194,197,199,197,196,198,197,195,197,196,197,198,198,195,196,198,198,199,198,201,200,200,201,199,200,200,201,199,202,202,201,202,202,202,201,202,200,202,201,202,204,203,204,203,204,203,205,203,201,203,203,203,202,202,201,203,203,200,199,199,200,199,200,198,198,196,198,199,198,198,196,197,197,198,198,198,196,198,200,198,199,198,195,197,196,196,195,194,193,196,196,194,196,196,193,194,194,194,195,195,193,194,196,193,195,194,193,194,195,193,192,193,193,191,194,194,192,192,193,192,191,191,191,191,191,192,193,193,193,191,192,193,190,189,192,191,193,190,192,193,193,192,193,191,189,192,190,190,192,192,191,191,191,193,194,192,193,194,193,192,193,196,195,194,194,197,195,194,195,196,198,195,196,195,196,198,198,199,200,197,188,198,202,200,205,198,193,200,203,204,202,200,204,203,200,206,208,206,205,205,207,206,206,207,208,209,210,210,211,212,211,212,212,212,211,211,214,215,216,215,216,216,215,215,216,215,217,214,215,215,211,214,216,217,217,216,222,191,198,222,214,217,212,214,214,213,214,214,211,213,223,223,179,102,179,174,113,93,106,141,106,98,71,67,57,64,96,95,112,102,105,119,111,141,160,113,98,149,167,162,182,198,197,200,195,162,118,74,43,25,17,12,10,10,9,10,11,10,10,11,10,24,39,53,78,129,155,199,198,104,126,139,97,174,231,218,245,232,208,175,107,196,239,231,236,203,244,171,166,221,216,251,157,85,75,65,60,44,101,63,40,57,222,246,139,183,196,223,152,100,166,182,233,220,222,196,150,212,184,163,217,249,251,181,49,4,3,7,33,38,34,24,26,48,53,65,27,49,184,246,232,216,184,120,191,250,252,252,250,250,149,131,218,252,237,134,91,81,85,25,37,23,64,123,87,120,84,25,35,22,33,38,33,30,19,24,21,20,21,23,26,20,24,23,25,35,33,32,46,38,114,165,108,92,76,76,94,98,69,55,62,51,47,35,29,35,27,24,23,19,19,24,24,15,21,23,24,24,15,28,21,82,131,106,81,53,33,15,10,19,69,85,46,15,59,147,110,106,203,246,191,118,135,125,68,29,16,21,25,23,18,26,29,27,30,34,37,32,33,31,28,28,24,28,33,39,39,33,28,23,27,46,55,50,50,42,39,36,41,52,65,66,48,27,17,19,18,17,20,18,22,20,22,22,22,26,21,24,24,24,73,111,81,54,35,25,60,71,55,59,54,55,46,24,57,90,99,95,95,108,81,64,79,77,52,61,176,230,238,249,238,237,233,236,235,235,236,234,233,232,234,233,234,235,235,236,233,233,234,234,233,234,233,233,234,235,234,232,234,234,234,234,232,233,233,232,232,232,233,231,234,232,230,231,230,231,231,230,229,230,230,231,231,230,230,230,232,233,232,231,227,117,4,0,4,8,10,9,10,9,11,12,11,11,184,187,186,188,188,187,187,186,188,190,185,187,185,185,191,186,187,187,186,190,190,187,186,188,188,188,189,190,188,188,190,188,190,192,189,193,191,191,192,191,191,189,192,190,192,191,189,192,192,190,192,194,192,191,192,193,193,192,193,192,192,194,192,194,194,192,193,196,194,193,198,193,191,196,196,195,194,196,198,194,199,199,198,199,199,198,199,196,195,198,197,198,199,196,198,199,198,198,199,200,199,198,200,198,199,200,198,200,202,200,199,202,201,203,201,202,203,202,205,201,202,204,205,203,200,204,200,202,200,200,203,202,203,201,200,199,199,199,197,197,197,199,200,199,198,198,198,196,196,196,194,194,197,196,196,197,193,194,195,195,195,193,193,193,198,195,194,193,195,193,191,192,194,195,191,193,191,194,193,193,193,191,193,191,193,193,190,192,190,191,193,188,192,192,192,191,189,191,191,192,192,190,191,192,191,190,191,190,188,190,190,190,193,191,190,192,192,192,191,190,192,188,188,187,190,191,191,192,191,190,192,193,190,193,191,193,193,193,194,193,197,195,197,196,194,197,196,197,193,195,198,198,198,196,198,190,190,200,199,201,202,195,194,200,205,204,203,206,200,205,204,201,206,202,207,205,204,205,207,208,207,210,208,210,210,208,207,207,210,210,212,214,214,213,214,214,214,212,213,213,214,214,214,215,213,212,215,212,212,214,215,214,220,185,187,217,212,215,212,212,214,212,212,212,209,210,225,219,190,126,135,121,95,97,98,98,92,104,103,120,89,64,63,39,93,153,181,212,214,214,229,147,73,159,230,188,160,185,177,145,138,132,102,83,159,214,209,202,206,145,139,129,46,6,18,37,115,62,28,11,9,23,19,30,58,34,106,137,116,239,251,233,249,176,162,148,107,212,232,232,234,200,246,176,169,217,217,251,152,87,75,71,63,34,96,76,30,45,201,189,146,222,216,200,110,132,204,198,244,213,226,177,149,224,171,172,235,248,212,75,5,9,8,22,46,32,24,23,42,54,66,45,32,155,248,248,227,159,139,174,237,252,251,251,251,170,131,207,251,251,198,84,104,112,120,45,55,24,81,135,101,134,77,24,35,26,37,37,31,32,17,19,24,24,18,24,29,21,23,24,28,24,24,21,41,69,130,120,79,74,61,63,61,49,46,34,36,36,30,33,39,44,45,35,23,21,23,29,24,19,21,27,26,21,29,22,33,86,88,51,49,33,22,15,14,20,64,105,64,69,124,143,86,84,184,231,201,92,155,118,69,45,33,33,39,38,23,25,24,28,29,34,31,31,37,24,24,29,31,53,68,63,65,53,38,27,31,38,36,47,50,35,36,38,45,52,65,80,67,53,46,34,27,26,19,22,21,26,19,21,27,21,24,28,22,80,177,163,101,97,72,59,71,71,60,61,55,54,43,32,57,84,90,90,95,112,89,65,78,80,42,45,191,235,239,249,235,238,234,235,236,233,235,232,233,235,233,236,235,234,235,233,233,233,234,233,232,231,233,233,233,233,232,233,232,233,233,235,233,233,232,231,231,233,234,232,232,232,231,231,231,231,230,231,232,229,232,230,231,234,229,232,232,232,233,233,229,118,3,0,5,9,9,9,12,10,11,12,12,12,186,187,184,184,185,186,190,186,187,189,186,188,186,186,189,186,188,188,186,187,185,186,186,188,186,186,189,188,190,189,189,188,191,191,188,191,190,190,191,193,190,189,190,188,193,190,192,192,190,193,189,191,194,191,193,193,193,193,194,192,192,194,191,191,190,191,193,193,194,191,194,198,195,196,196,196,196,196,194,195,198,199,198,196,200,197,195,197,198,198,198,196,196,195,196,196,197,196,198,201,199,199,199,197,199,196,198,199,200,202,202,202,201,203,203,200,201,200,201,200,202,204,205,203,202,203,201,201,200,201,202,201,199,202,202,198,198,197,198,198,200,198,198,194,196,199,196,198,196,196,195,195,196,196,196,194,197,196,192,194,195,193,194,194,194,195,193,193,193,190,191,193,191,194,196,195,193,192,193,193,194,192,194,190,190,193,190,190,191,191,192,191,190,191,191,191,193,194,193,193,191,193,191,190,191,190,190,193,190,192,190,190,193,191,191,191,191,189,193,191,191,193,189,191,192,191,193,193,193,193,192,193,191,190,193,193,190,192,194,192,192,196,196,196,195,195,196,194,196,199,197,196,194,198,199,192,192,201,200,200,201,191,196,203,201,205,203,204,205,202,203,206,206,205,206,204,205,204,206,207,209,211,210,209,210,209,208,211,210,210,211,210,211,214,212,212,212,212,214,213,214,212,215,213,213,215,212,214,212,212,213,211,226,196,199,230,221,218,213,213,212,211,212,210,211,207,226,194,131,108,96,83,84,71,60,62,84,153,211,251,193,99,161,205,252,252,252,252,250,214,228,143,73,149,198,156,109,126,121,104,90,84,95,100,211,241,245,248,248,248,246,188,118,93,119,215,243,200,174,101,62,67,23,28,27,14,38,22,68,190,212,225,177,126,185,141,139,225,226,236,229,201,244,179,168,214,214,251,151,70,77,65,67,36,101,106,66,60,117,151,180,248,199,123,65,153,227,211,249,211,224,163,170,225,191,200,250,235,99,16,6,18,17,36,40,26,19,38,56,62,50,27,128,243,250,250,170,113,188,229,253,253,244,245,170,142,209,251,252,242,124,108,108,82,74,51,84,72,120,144,119,151,69,21,37,27,38,34,33,32,19,23,19,26,24,21,24,24,35,30,30,33,27,32,51,88,110,74,54,60,51,51,34,34,34,33,32,19,17,39,49,44,51,46,24,24,25,27,29,24,26,25,27,25,23,28,53,107,91,47,43,22,17,16,24,31,72,135,124,106,145,121,67,48,18,67,80,47,76,62,61,61,60,67,59,57,42,26,21,23,21,27,28,27,33,27,29,29,53,61,50,34,34,47,50,50,50,36,26,39,51,68,75,73,65,64,80,86,82,79,77,69,63,50,40,34,33,29,29,27,24,28,32,45,77,136,137,94,99,152,135,89,79,63,63,61,51,48,49,30,55,86,92,89,84,107,92,75,70,83,40,44,200,236,242,249,234,238,234,235,234,235,233,233,234,233,236,236,236,234,233,234,233,235,233,233,232,233,233,233,231,230,233,233,233,231,233,233,232,232,234,231,232,232,233,233,231,231,231,231,231,231,231,230,233,233,229,230,230,232,233,232,234,234,233,233,230,118,3,0,4,9,9,9,12,10,11,12,12,12,186,188,184,185,188,185,184,186,185,186,186,187,188,187,188,188,186,185,188,185,186,187,189,189,187,188,187,189,188,188,189,188,189,191,188,189,188,189,187,190,190,188,192,190,192,192,190,191,191,190,191,192,190,193,191,192,192,190,194,191,193,193,191,193,193,194,195,194,193,194,195,195,196,195,196,194,194,196,198,193,194,196,196,198,196,195,198,196,196,199,196,198,198,198,198,198,196,198,198,195,197,198,200,200,199,200,199,199,202,199,201,204,199,201,200,200,200,200,201,199,202,201,200,201,199,201,198,203,200,198,201,199,201,197,196,199,198,197,197,200,197,196,199,196,199,196,195,198,196,195,195,196,195,194,195,196,193,197,196,194,195,194,195,193,194,194,192,192,196,192,192,192,193,193,192,193,193,194,193,194,192,191,193,190,194,193,194,193,190,193,191,190,191,191,190,191,191,192,189,190,190,188,190,192,192,189,193,192,190,192,191,190,192,191,191,191,192,191,192,191,191,194,194,189,191,193,192,192,194,194,195,194,193,194,194,194,192,196,195,193,197,195,198,196,196,196,195,196,198,200,198,199,196,199,196,188,200,200,200,203,198,192,199,203,203,205,204,207,205,205,202,203,201,203,210,205,205,206,205,207,208,209,212,212,210,211,212,209,210,211,210,211,211,212,213,213,212,214,213,213,215,214,213,215,214,213,211,213,214,214,213,217,235,206,216,249,232,220,214,212,214,210,211,212,208,209,233,172,90,77,55,55,67,43,39,44,75,147,208,244,200,198,252,252,252,252,253,253,191,116,111,90,73,66,63,76,65,53,55,48,50,57,57,56,76,93,110,117,141,152,140,97,53,67,113,209,230,215,196,149,158,127,108,120,165,122,47,41,33,66,137,206,189,169,206,154,166,232,228,237,225,202,241,187,170,212,217,251,153,79,80,58,62,25,98,115,96,85,120,146,178,226,169,141,123,198,224,216,247,218,224,163,187,229,201,217,237,142,33,9,12,19,41,42,27,20,33,50,62,55,22,98,235,245,251,218,111,152,232,246,252,252,249,169,137,202,250,250,252,158,87,106,87,27,29,24,59,93,123,134,125,131,46,21,37,25,38,36,34,24,23,21,23,27,21,26,25,25,34,29,33,35,34,33,60,92,87,71,52,49,41,38,34,16,20,23,16,21,18,29,54,60,63,58,45,24,23,29,24,24,23,29,29,30,31,37,79,105,87,66,42,23,30,42,57,75,92,123,151,111,81,113,88,68,49,22,29,51,65,53,69,68,82,84,64,70,59,40,26,15,19,21,24,24,23,27,24,30,55,55,26,16,19,24,50,77,83,75,76,97,109,122,124,97,80,71,61,59,59,77,93,97,103,89,78,71,63,63,53,34,31,50,73,99,115,97,59,42,52,62,63,57,63,57,53,59,53,53,52,37,45,84,91,97,88,100,92,66,69,65,27,63,228,241,244,248,234,239,233,236,234,233,235,233,233,234,233,234,234,236,236,235,235,234,235,236,236,234,231,231,233,233,234,233,233,233,231,232,232,231,232,234,233,233,232,231,231,231,232,232,231,231,230,231,231,230,234,231,230,233,230,233,233,231,233,233,229,118,4,0,4,7,10,9,10,10,11,12,11,11,187,191,188,188,188,188,186,185,185,186,188,188,187,184,187,185,184,187,186,188,189,187,186,188,186,186,185,185,188,187,188,186,188,187,186,190,187,190,188,188,190,189,191,187,191,192,187,190,190,189,190,189,191,190,190,193,194,192,192,192,193,194,193,193,193,194,194,194,196,193,197,195,191,193,197,196,194,194,193,197,196,195,197,196,197,196,197,196,199,194,198,198,197,195,195,197,196,196,196,199,198,198,197,198,199,196,198,199,199,199,200,199,200,201,202,200,201,201,201,203,202,201,201,199,198,200,199,199,201,198,198,199,198,200,199,197,199,198,198,198,198,196,196,197,197,196,195,197,195,196,196,194,194,194,194,193,193,193,195,194,193,194,193,195,193,195,195,191,196,193,195,192,193,195,192,193,192,194,191,190,193,193,194,194,193,192,191,193,191,192,193,190,191,191,190,190,191,190,192,190,191,190,188,190,189,189,191,192,190,191,189,191,193,189,192,192,190,192,191,193,191,191,193,192,191,191,194,193,190,193,192,193,193,193,196,195,194,198,198,198,198,198,196,196,198,196,196,196,198,199,197,198,200,203,196,192,198,201,203,203,196,198,205,206,206,205,205,207,208,205,204,202,202,206,208,208,207,208,209,209,210,210,211,211,213,212,210,210,211,211,213,213,212,215,213,214,214,210,212,210,213,214,213,214,213,217,214,214,215,212,212,218,232,178,150,214,230,217,212,211,213,212,212,212,211,207,230,155,61,36,11,61,108,103,98,90,84,80,60,78,48,63,153,157,152,122,108,74,16,6,29,21,24,24,12,33,29,26,27,29,25,21,29,19,29,28,33,34,24,33,28,31,31,45,74,82,79,83,96,119,135,128,119,145,181,145,112,89,66,59,152,245,204,221,212,141,195,230,233,239,223,203,238,197,173,208,214,251,156,82,93,52,64,14,69,73,36,76,116,125,160,234,186,145,156,216,199,200,242,223,205,160,206,229,217,203,147,50,3,18,16,36,42,27,22,27,51,61,56,28,81,222,247,247,221,139,137,205,249,249,250,251,173,124,186,238,250,250,169,90,96,108,34,46,109,87,64,73,101,128,126,111,33,23,39,27,38,34,35,27,23,26,26,24,25,23,25,30,33,38,36,42,35,60,144,177,141,87,66,61,64,60,29,16,18,17,16,24,24,21,57,72,72,79,64,44,27,26,28,27,29,30,40,37,31,49,86,95,61,37,29,25,52,60,63,66,64,89,149,145,96,76,83,97,87,87,55,48,69,64,64,65,75,67,55,55,53,44,27,16,16,22,20,27,29,27,29,32,55,44,23,22,19,24,57,84,91,101,98,92,77,71,65,53,50,47,44,34,39,61,69,78,92,104,110,116,129,120,89,57,81,126,128,113,78,53,35,24,27,23,20,30,37,40,49,55,57,51,52,40,45,80,95,98,97,103,92,64,39,42,45,139,246,246,249,249,240,240,233,236,236,236,234,236,238,238,238,236,236,236,236,234,234,234,234,236,237,238,234,233,233,233,236,236,238,235,233,234,236,234,236,236,236,236,236,236,233,232,234,233,234,233,231,233,234,232,231,235,233,234,234,234,234,232,235,236,228,117,4,0,4,8,10,9,10,10,11,12,12,12,184,189,184,187,189,182,185,185,187,188,187,186,186,184,184,186,184,186,184,184,188,184,186,185,186,186,184,188,184,184,185,186,186,186,188,186,188,187,187,189,186,185,184,190,189,187,190,188,188,190,192,189,187,192,188,189,189,187,193,190,191,191,190,192,188,189,193,191,191,192,194,192,193,193,193,194,195,196,195,194,196,196,194,194,195,195,195,193,194,194,191,194,196,195,194,193,197,194,194,197,196,194,196,195,194,197,196,196,200,199,199,201,198,200,200,200,198,198,203,199,199,198,198,200,196,200,200,198,195,196,199,196,196,197,195,196,196,195,194,195,195,193,193,192,192,195,194,193,194,191,194,193,196,194,192,195,191,193,190,192,196,190,190,192,192,192,191,191,192,192,194,191,192,193,192,194,190,191,194,193,193,193,191,192,190,191,191,190,190,192,191,189,191,192,190,190,189,191,190,190,191,193,190,190,191,187,191,189,188,191,190,189,190,190,189,190,190,192,191,191,191,190,191,192,193,189,190,191,191,191,190,191,193,192,194,196,194,197,197,197,197,196,197,196,197,196,198,198,197,199,197,200,200,203,197,193,202,200,204,199,194,200,207,206,206,210,205,205,206,206,206,206,202,204,204,205,208,208,207,205,207,207,210,211,212,212,208,211,211,211,211,214,214,214,214,214,212,213,212,211,212,210,214,214,211,212,212,216,211,211,211,216,208,92,39,129,198,215,215,212,211,211,212,210,208,212,221,144,52,56,79,97,86,75,83,56,54,69,41,28,23,26,15,27,44,14,46,54,19,15,15,14,14,21,21,18,23,20,26,29,17,19,22,18,24,19,24,26,27,26,19,28,25,44,48,59,59,52,27,39,95,55,71,63,72,92,75,87,47,68,189,243,192,200,183,147,213,223,230,236,218,204,234,198,173,206,213,251,147,77,95,59,66,19,61,83,23,46,118,155,219,250,163,78,138,200,173,202,236,223,182,156,210,232,221,141,64,5,10,22,31,48,34,25,24,41,60,63,32,61,201,244,250,230,139,159,187,231,250,233,251,178,114,177,220,249,254,193,94,83,107,84,26,68,118,83,67,76,92,118,131,96,27,28,35,33,43,33,31,24,22,26,31,24,23,25,24,32,38,48,70,53,69,167,201,180,143,122,129,130,92,49,24,15,21,19,18,23,25,27,63,73,74,91,76,61,41,29,29,27,27,32,39,38,34,42,55,43,24,26,23,26,42,48,50,44,50,59,121,155,125,118,87,98,106,96,90,69,60,63,70,67,66,49,40,36,30,36,34,20,18,17,20,27,23,30,23,28,57,51,31,21,23,33,46,51,53,50,39,30,29,31,33,33,33,28,26,32,45,55,58,57,62,69,76,87,105,118,107,103,123,123,98,71,47,27,21,26,21,15,21,21,34,53,60,60,55,54,56,38,44,95,105,99,76,97,103,66,29,61,101,179,247,247,250,250,240,243,237,236,235,234,233,234,236,239,235,235,236,235,234,233,235,235,234,233,235,234,236,235,235,234,235,236,233,236,235,234,235,233,234,233,233,234,233,235,233,233,235,234,234,234,234,233,233,233,233,230,231,235,229,231,233,232,236,231,229,118,2,0,5,8,9,9,10,10,11,12,12,10,184,190,184,187,186,186,186,187,189,187,187,186,188,185,188,188,185,187,185,186,185,186,185,186,189,188,187,187,188,188,188,187,190,187,188,188,188,190,187,190,189,189,189,188,188,190,189,189,190,188,193,190,191,191,190,191,188,190,191,190,191,189,191,191,191,193,190,193,194,192,193,191,193,194,194,194,193,195,191,197,195,195,196,194,194,196,196,195,196,192,196,194,194,195,198,197,197,198,197,199,196,198,196,198,198,195,199,200,201,201,203,200,199,198,199,199,196,199,198,197,199,200,199,196,197,198,199,197,199,197,197,198,197,200,196,194,198,196,196,194,192,196,194,195,194,193,192,194,192,192,192,191,194,193,193,193,192,192,193,192,193,192,190,193,189,192,193,192,194,191,194,191,191,192,193,191,193,194,194,194,192,192,191,194,194,191,191,191,189,191,193,192,192,194,192,190,190,189,194,192,192,193,191,191,191,193,189,190,191,191,191,190,190,190,191,192,191,191,190,193,192,189,193,193,193,189,190,193,192,194,192,194,194,193,195,195,196,196,197,198,198,200,201,200,202,199,199,200,200,201,200,200,200,200,193,199,203,203,206,196,198,206,209,205,207,207,207,209,207,209,210,208,208,203,203,205,209,210,207,207,210,210,211,209,212,211,210,213,212,211,214,212,210,214,212,212,214,212,213,211,213,213,212,210,212,214,210,212,212,214,213,222,195,78,7,88,191,216,216,210,210,209,210,211,207,211,219,177,107,54,44,63,34,33,46,38,39,63,53,36,24,22,24,45,63,29,59,44,17,21,14,24,19,19,18,20,19,22,24,24,20,15,25,14,18,23,17,27,25,22,22,21,25,51,44,44,76,78,39,49,66,51,60,43,52,44,50,53,27,76,191,191,133,200,156,159,229,208,233,229,222,208,232,206,173,203,216,251,152,73,96,62,73,30,93,104,30,51,139,215,251,251,136,79,159,207,181,207,237,223,152,154,214,242,198,68,15,7,7,30,43,38,23,18,42,55,64,42,47,177,246,246,239,150,162,220,210,246,250,235,182,124,171,227,251,251,251,134,77,103,95,73,35,84,84,39,88,95,81,114,124,85,23,30,37,31,41,33,33,26,25,24,28,29,29,28,27,42,98,147,163,137,146,199,169,137,122,119,130,88,53,33,17,22,19,23,24,23,28,31,66,72,66,90,85,69,55,31,24,34,33,35,38,33,28,22,19,17,22,21,18,19,23,34,38,36,45,35,52,74,74,122,129,101,94,102,110,104,93,83,70,66,62,46,33,23,21,26,33,27,22,20,23,26,25,27,33,21,36,59,48,41,34,41,39,28,25,23,17,17,21,17,15,19,25,25,32,38,46,46,50,49,35,43,53,62,64,67,65,70,79,69,58,50,32,24,21,23,25,23,32,27,51,76,69,67,64,54,55,44,45,107,122,101,78,84,100,63,67,110,95,98,165,221,249,249,242,242,238,238,236,235,234,236,236,233,235,234,234,236,235,236,234,232,233,233,233,234,233,234,234,233,234,233,234,235,233,235,238,236,237,236,233,235,235,233,233,235,235,234,234,236,235,233,231,231,233,232,232,234,233,232,232,233,234,232,229,120,3,0,5,8,9,9,12,10,11,12,13,12,185,191,186,185,188,185,187,186,187,186,188,189,187,189,191,188,185,188,189,185,187,186,189,186,185,188,186,187,187,187,188,188,184,186,186,186,189,189,186,187,189,190,187,188,187,186,190,188,189,189,189,188,189,190,189,188,190,191,190,190,189,189,191,192,190,189,192,191,193,192,192,191,191,194,194,196,193,193,195,196,196,196,197,193,196,196,193,195,196,196,195,196,195,194,197,197,196,197,194,194,195,197,198,196,197,199,198,198,200,200,198,197,198,199,198,197,198,198,197,196,200,198,196,198,196,196,196,196,197,196,193,196,196,197,197,195,195,195,196,197,196,194,195,195,192,194,195,193,193,192,193,191,192,193,192,191,191,191,189,192,194,191,193,192,191,191,189,192,189,192,193,190,192,192,193,194,194,192,192,192,194,196,194,193,191,190,190,193,193,191,193,192,193,191,191,192,192,190,191,192,189,193,190,190,192,189,191,192,191,189,191,192,188,192,192,193,190,193,193,192,192,192,193,191,191,191,196,193,191,192,190,192,192,194,194,196,196,198,198,198,199,201,202,200,201,200,201,202,200,201,199,200,203,198,192,202,206,205,204,194,201,207,209,208,206,207,207,210,209,208,208,208,208,208,205,204,205,209,212,210,210,209,210,210,210,213,212,211,211,210,210,210,212,212,214,213,212,213,212,211,210,212,212,211,209,214,211,213,211,210,214,223,218,127,90,165,212,220,215,210,211,209,208,210,208,215,211,212,122,5,2,21,29,33,35,31,37,64,56,45,36,29,25,66,66,42,74,41,21,28,13,23,23,15,23,15,24,27,27,26,17,22,18,21,19,18,19,25,31,22,17,19,42,53,36,57,77,80,55,67,72,56,63,38,57,63,53,39,33,128,178,151,170,220,151,179,230,206,235,227,222,203,228,211,174,201,214,252,151,71,104,63,72,41,74,98,39,62,157,223,249,218,127,151,234,226,184,207,242,212,147,177,212,235,131,26,9,4,19,45,40,26,12,37,54,61,47,29,155,244,249,249,160,156,219,242,222,236,252,171,135,182,226,249,252,252,244,96,84,108,77,76,29,57,93,96,111,83,86,126,126,71,21,29,29,34,39,29,33,24,27,29,26,51,80,49,49,135,226,231,198,151,142,177,143,122,112,83,59,38,32,18,17,20,23,21,25,29,36,32,46,72,68,93,90,68,57,36,31,32,31,36,34,33,26,22,19,17,16,15,17,16,18,21,23,21,28,27,29,27,53,122,132,131,103,83,97,100,109,96,87,75,55,50,30,22,19,22,27,32,29,17,21,21,28,32,24,25,22,39,61,60,61,57,35,20,15,16,22,18,19,22,18,17,19,25,21,33,41,39,37,35,33,46,41,39,43,43,45,55,62,52,43,33,21,21,23,19,25,30,36,31,61,89,71,73,68,51,61,43,38,104,116,111,94,87,101,88,102,109,62,27,18,89,228,236,248,237,235,240,238,238,237,236,232,236,235,235,236,237,236,235,235,235,235,235,235,234,234,232,234,234,234,236,233,235,234,236,238,236,238,236,236,236,233,235,235,235,233,233,235,232,233,236,234,233,231,232,233,233,230,231,231,231,236,233,229,118,4,0,4,8,10,9,10,9,11,12,11,11,184,189,187,186,185,187,187,187,188,186,188,188,188,186,186,186,187,187,188,188,186,189,187,185,187,188,188,186,187,187,188,186,187,187,187,185,184,187,185,188,188,186,187,186,189,188,186,188,190,186,187,187,191,189,189,192,190,192,188,190,190,191,191,189,193,191,190,191,190,192,192,193,192,193,195,195,196,195,193,196,195,192,194,194,191,192,195,194,194,192,196,195,196,197,196,194,196,197,194,196,195,199,199,196,197,197,197,199,198,196,198,196,198,196,198,196,196,201,199,198,198,199,195,198,198,196,199,194,197,197,196,197,195,194,194,194,196,194,195,193,193,195,193,195,191,192,194,194,194,192,193,193,193,191,191,193,194,191,191,193,193,191,192,192,190,194,193,189,190,192,191,190,191,192,193,191,195,193,192,193,191,193,192,193,192,191,191,191,192,194,194,193,193,194,191,193,194,193,192,190,191,188,190,192,190,191,192,193,191,192,193,193,191,194,196,191,193,194,194,195,196,195,193,191,192,193,193,195,193,194,194,193,194,194,194,196,195,198,198,200,204,204,203,203,203,201,202,202,204,204,204,202,205,198,198,206,205,210,198,196,207,207,208,208,208,207,208,209,207,208,206,208,208,208,209,203,203,203,205,210,210,210,210,209,211,209,210,211,210,210,212,212,210,213,210,211,212,210,213,212,212,212,213,212,213,214,212,215,212,214,211,224,228,202,201,217,231,222,214,212,212,212,212,212,213,212,207,212,144,43,11,21,11,27,30,27,30,42,55,33,32,36,28,54,58,47,72,39,30,54,39,44,42,24,29,24,19,25,21,29,21,15,21,18,24,21,25,37,31,24,26,29,64,59,46,71,87,78,72,92,51,51,55,48,61,42,59,31,25,135,221,194,212,231,137,207,231,204,237,221,223,198,227,212,173,198,215,251,152,62,92,65,64,38,57,89,52,38,149,237,233,208,126,178,251,218,186,197,244,207,167,208,193,155,61,3,12,6,41,42,25,20,29,54,60,54,29,126,245,245,251,169,154,214,243,247,200,245,201,129,198,238,250,250,253,253,202,45,89,101,66,64,26,31,50,76,61,48,98,136,128,55,21,36,33,35,40,32,34,23,22,29,42,106,139,133,147,189,224,180,120,89,76,108,102,83,62,34,29,19,23,19,20,26,25,28,34,39,38,37,31,47,62,66,62,57,75,57,37,36,27,37,38,31,25,24,23,17,16,15,14,15,15,18,18,19,18,18,19,23,55,100,129,131,116,103,87,64,75,93,90,79,65,57,34,12,17,15,32,38,27,24,20,20,31,32,25,20,25,18,21,39,39,31,19,22,22,24,28,23,23,27,26,32,27,19,27,21,23,31,32,31,43,57,45,44,35,23,24,26,35,30,28,23,20,22,20,24,35,30,29,29,66,85,64,72,66,55,57,51,35,84,106,114,99,89,105,103,122,80,72,82,34,23,153,231,249,247,235,236,237,233,236,234,232,233,236,236,235,236,234,232,233,234,236,234,233,234,233,234,234,235,235,235,236,236,233,235,239,235,235,235,235,236,234,236,234,234,233,234,233,233,234,233,233,231,232,232,231,233,232,234,235,234,235,234,229,117,4,0,3,8,10,9,10,10,12,12,11,11,184,190,187,185,188,185,190,189,187,188,188,188,188,188,186,185,186,189,186,188,187,185,190,188,186,186,186,189,188,186,187,184,188,189,188,190,186,186,187,187,188,187,185,186,187,187,189,188,188,187,188,188,190,188,191,188,188,191,186,191,188,189,194,190,191,191,190,193,195,194,192,195,194,193,193,192,196,197,195,195,192,194,194,192,194,192,192,197,195,193,194,195,195,196,195,193,194,194,197,198,198,197,199,200,196,198,198,198,201,197,199,199,196,197,198,196,198,198,198,198,197,196,198,197,195,198,200,197,197,198,196,196,195,193,193,194,196,198,194,193,196,196,192,193,193,191,193,191,194,192,192,191,191,192,190,190,194,191,191,193,191,191,191,191,193,191,191,193,192,193,192,191,194,192,192,193,191,192,194,192,191,191,192,193,193,193,194,192,192,192,194,192,195,194,196,197,196,198,194,193,192,194,190,189,192,192,192,194,195,193,195,193,194,197,196,194,194,195,194,198,198,193,194,194,194,195,193,192,193,193,195,196,196,196,198,199,198,198,200,203,205,201,204,205,205,206,206,207,205,207,206,207,205,195,202,207,208,207,199,201,208,208,207,208,209,209,208,208,207,210,211,209,209,212,210,206,207,204,204,208,211,212,210,210,212,210,210,210,211,212,209,211,214,210,213,212,210,210,212,214,213,213,212,214,212,214,212,214,214,214,213,219,221,205,223,224,220,222,214,214,212,210,211,210,212,212,198,219,182,129,126,79,12,8,21,23,24,37,50,33,21,28,24,45,41,54,73,37,53,73,49,69,60,31,41,23,18,23,15,22,20,25,20,25,28,30,30,28,33,34,49,64,90,87,77,87,83,59,75,79,50,51,37,38,20,19,51,36,90,210,234,197,228,200,134,228,223,203,232,218,224,198,224,216,176,200,212,251,147,59,94,48,72,63,74,116,49,46,171,217,247,185,71,180,244,213,196,191,247,197,190,229,129,68,14,9,15,20,49,33,23,24,46,63,57,27,107,237,250,250,171,145,212,239,247,233,201,199,150,186,249,249,251,249,251,243,95,24,89,84,44,48,59,93,75,57,27,57,126,144,117,38,21,33,28,35,36,34,28,23,24,29,60,135,112,124,119,115,115,71,44,41,37,47,49,43,31,19,16,19,16,22,36,33,51,75,65,55,49,35,31,30,34,45,46,41,62,74,64,43,33,36,30,33,32,27,27,18,17,20,15,19,16,17,17,15,16,14,18,27,36,55,63,81,86,82,78,67,71,68,71,78,67,51,33,19,14,20,27,30,29,29,21,17,28,26,26,28,23,26,20,16,29,22,21,23,24,24,25,29,26,27,27,33,33,29,29,26,19,34,48,46,58,60,73,68,54,36,21,19,19,18,21,23,24,21,28,31,31,37,33,29,62,78,64,66,62,55,60,54,38,76,97,118,105,92,104,109,109,78,98,115,83,39,106,223,245,245,241,231,235,237,234,233,233,234,234,232,234,233,234,235,231,232,233,234,234,234,236,236,233,233,234,235,235,234,236,237,235,234,236,234,234,234,233,234,233,235,234,234,235,235,233,232,231,235,234,232,233,232,230,231,233,233,234,232,229,119,3,0,6,10,9,9,11,10,11,12,11,11,187,191,188,188,187,189,186,187,188,185,189,188,191,189,187,188,188,187,189,188,189,189,185,189,188,187,189,187,188,186,187,188,186,188,188,188,188,188,187,188,186,183,188,188,187,186,187,187,186,187,189,188,188,185,190,191,186,189,187,188,188,188,190,189,191,191,190,192,193,192,193,190,194,195,195,196,193,196,195,195,196,196,197,195,193,195,197,195,195,196,196,196,195,195,195,195,195,196,194,194,196,194,194,195,198,200,197,196,197,196,196,197,198,196,197,197,197,194,196,196,196,197,198,196,193,195,196,194,197,193,194,195,194,197,196,195,192,194,196,192,193,193,193,194,193,193,192,190,196,194,192,194,193,194,192,191,192,191,190,191,191,190,193,193,190,191,193,192,192,191,191,193,191,192,192,193,191,191,194,193,193,194,196,196,193,198,198,194,194,194,196,194,195,196,195,199,198,197,196,192,193,191,195,194,195,199,197,196,196,198,195,194,195,198,199,198,198,196,197,195,194,193,193,193,194,192,193,194,193,194,195,195,196,197,198,200,199,198,202,203,203,206,205,205,205,204,208,208,206,206,208,209,204,198,208,210,209,205,197,207,208,208,210,210,210,209,210,211,210,211,212,211,211,210,208,207,212,208,204,205,207,211,210,210,213,208,209,212,209,210,209,210,210,211,211,214,212,211,212,212,212,210,213,213,212,212,211,214,212,214,212,218,215,198,211,204,214,218,210,210,211,212,212,209,216,210,201,200,184,163,200,203,63,12,12,14,22,27,47,29,19,22,18,42,44,57,72,35,35,39,32,53,51,38,36,22,20,19,17,22,18,21,22,23,27,24,30,30,20,36,53,68,70,58,68,63,61,42,74,56,35,47,21,61,67,89,189,179,151,210,197,154,223,175,143,239,210,210,226,217,223,193,226,217,176,192,211,251,150,75,120,63,74,92,107,129,73,50,178,246,250,149,42,175,233,238,227,188,251,185,213,225,55,17,8,6,24,37,45,22,29,51,53,61,26,82,222,243,251,178,140,198,241,246,240,229,151,132,191,239,252,249,236,247,230,120,45,38,111,63,39,96,88,95,100,103,57,96,139,139,109,28,25,32,31,42,33,37,32,19,26,41,98,101,61,47,29,34,38,33,35,30,21,19,27,21,16,25,21,19,42,70,72,68,96,118,108,76,52,43,40,35,30,27,27,30,28,39,55,48,31,24,31,35,32,24,22,21,19,22,18,18,18,19,16,16,16,15,14,22,34,34,38,47,43,59,73,64,71,66,56,62,54,48,31,15,14,14,24,30,42,32,26,23,18,29,33,26,26,28,29,27,25,25,26,24,24,27,24,27,26,25,33,32,31,34,33,32,27,54,71,53,53,56,62,74,71,53,36,24,27,26,21,28,32,27,26,30,31,30,34,32,58,75,55,57,69,64,56,56,37,69,99,120,114,101,114,106,110,76,66,84,84,54,80,189,241,243,245,235,235,236,236,235,236,236,232,234,234,232,233,234,234,232,234,235,234,236,234,235,234,235,236,236,234,233,235,234,233,233,234,235,234,235,234,235,234,234,234,232,233,234,234,233,233,235,233,233,234,235,232,231,234,234,234,234,229,118,3,0,5,9,9,9,11,10,11,12,12,12,184,188,186,188,189,185,186,187,186,186,186,188,186,184,187,188,185,186,184,187,186,186,189,187,188,188,189,186,185,188,186,186,189,186,186,186,185,188,188,186,186,189,188,188,186,186,187,188,187,187,186,188,188,188,192,188,187,190,189,192,191,190,193,191,194,192,191,194,193,194,192,192,192,192,195,197,197,194,196,197,194,195,196,194,196,193,194,196,194,195,195,194,196,196,196,194,195,197,194,191,192,194,195,196,195,199,197,194,194,196,196,195,196,196,195,194,195,196,195,195,196,196,194,197,195,196,196,193,196,195,192,193,194,195,193,193,195,193,193,194,193,193,191,194,194,192,191,192,193,192,194,191,193,192,193,194,191,191,191,192,191,193,193,192,193,193,194,193,191,191,191,190,192,194,191,192,193,193,194,196,196,194,195,198,200,198,197,196,198,198,199,198,198,196,198,200,199,198,197,197,197,198,197,198,198,199,199,200,197,197,196,195,195,197,197,196,197,197,195,195,197,193,193,194,192,196,195,194,196,196,198,198,198,200,199,200,199,202,204,203,207,207,208,206,206,206,207,208,208,209,208,209,203,203,213,213,212,200,204,212,213,214,211,213,211,210,212,213,211,210,212,208,207,210,211,209,210,209,206,206,204,209,210,208,212,211,211,211,212,212,211,211,214,211,210,211,211,212,212,213,211,212,212,212,211,212,213,213,212,214,213,217,216,199,207,197,208,220,211,214,207,208,212,208,218,206,199,195,180,134,193,249,141,67,15,10,17,19,43,29,18,23,15,44,40,59,61,23,34,26,27,36,32,34,32,23,23,23,25,30,27,27,27,19,21,25,35,33,27,26,42,71,42,41,58,59,57,37,61,23,22,49,119,234,214,200,252,211,113,129,133,174,232,149,163,243,205,210,225,223,222,196,225,219,172,190,208,251,152,83,152,84,78,38,49,127,62,58,207,244,224,65,47,214,226,228,218,199,251,185,172,127,13,26,4,18,46,39,29,27,44,57,64,32,63,201,248,248,190,133,190,237,247,239,228,185,97,166,245,249,252,217,241,198,149,112,39,71,106,58,66,105,57,36,27,90,101,114,134,133,98,24,35,32,26,42,33,35,32,25,24,90,111,61,37,23,24,16,17,23,19,22,18,18,22,23,19,21,25,38,68,83,103,88,86,104,106,103,78,66,59,47,39,32,26,29,32,28,26,29,24,28,35,39,34,29,29,24,29,24,17,21,16,20,21,16,19,16,12,19,30,35,37,31,30,38,53,63,66,65,64,55,40,40,29,13,14,15,23,38,49,48,31,24,24,27,29,25,25,24,26,27,28,28,28,29,24,31,32,24,31,28,36,39,38,48,44,37,36,57,73,62,46,39,44,46,57,70,65,61,52,47,43,28,29,28,32,30,28,27,33,28,49,76,55,59,73,60,57,62,37,67,98,114,112,110,113,100,98,92,94,83,67,50,64,170,241,240,241,238,238,237,234,237,234,233,235,231,234,234,235,237,235,236,235,233,235,234,233,232,236,236,233,233,233,234,233,235,236,235,234,233,235,235,234,234,234,234,234,233,233,234,233,234,233,234,234,234,235,233,234,232,233,233,235,235,227,117,4,0,4,8,10,9,10,10,12,12,12,12,186,191,186,188,187,185,187,187,188,185,187,187,187,188,186,188,185,185,189,187,187,188,187,188,188,185,189,186,186,190,184,190,187,188,188,184,189,187,189,190,190,189,187,188,188,187,187,187,190,189,190,189,187,188,190,191,188,191,191,188,192,193,194,194,192,193,193,194,195,194,194,192,196,194,194,195,196,196,194,194,196,198,198,197,195,196,198,196,195,195,194,194,197,197,196,195,195,194,192,195,195,194,197,196,196,196,195,194,195,195,194,195,193,195,196,193,196,196,198,195,195,197,194,198,196,196,196,194,197,193,194,195,196,195,193,196,194,194,194,192,195,194,195,193,190,193,192,193,196,194,193,192,194,194,195,193,193,194,194,195,193,193,193,195,193,194,194,193,194,192,195,193,196,194,191,196,196,195,194,194,196,197,197,198,196,198,200,198,200,201,200,198,199,201,200,200,205,202,202,203,201,201,200,201,200,199,201,200,200,200,199,199,200,199,200,197,195,194,196,196,193,195,196,194,198,195,196,198,198,201,201,202,203,202,203,200,203,206,206,206,207,207,208,208,211,210,210,210,212,211,212,212,202,205,213,211,208,204,208,213,213,217,213,212,214,211,213,212,212,212,210,211,210,210,212,211,212,211,214,209,205,206,210,212,212,211,211,213,210,210,213,214,212,213,212,213,211,212,213,213,214,213,216,212,214,214,214,216,212,214,211,217,216,198,209,198,209,220,212,217,212,211,211,211,220,203,198,192,217,163,187,226,132,125,31,38,80,37,40,31,47,47,55,65,35,53,29,7,39,33,31,34,31,29,33,30,27,31,36,36,28,34,30,31,20,27,40,29,27,21,44,70,29,51,36,36,45,18,24,24,117,162,208,252,240,208,240,136,72,118,155,220,232,136,192,241,207,214,215,224,221,197,227,219,174,186,204,251,148,79,148,87,80,31,12,80,49,59,220,248,136,18,94,236,128,153,208,196,249,151,114,44,1,27,6,38,47,31,22,37,51,66,48,46,182,246,251,218,147,189,236,247,234,223,191,115,126,228,251,250,222,224,181,134,217,128,50,48,92,61,79,124,78,87,32,65,84,103,133,131,81,13,34,40,29,42,32,33,36,19,27,83,80,30,24,15,14,19,18,19,22,23,24,24,28,25,19,28,27,44,61,61,79,71,80,89,105,129,106,77,67,56,54,48,31,30,27,31,33,24,28,32,34,35,35,33,26,27,33,27,21,23,18,19,20,18,15,16,17,23,31,36,37,39,35,35,33,43,72,69,60,55,39,33,28,15,16,12,30,55,59,53,39,26,22,26,30,27,21,21,22,29,27,25,29,25,27,23,30,32,27,32,40,88,114,108,98,83,55,48,62,57,62,48,30,35,49,73,87,85,81,76,66,61,51,47,49,41,30,22,30,26,49,72,62,63,75,66,55,63,39,65,101,110,113,97,121,89,49,66,77,82,49,37,39,125,240,238,241,241,237,238,233,238,236,234,235,236,238,236,237,237,238,235,236,235,233,237,236,234,236,236,231,235,235,236,237,233,234,235,234,237,235,233,234,235,235,233,235,233,234,233,233,234,232,233,230,232,232,231,234,235,236,234,234,233,229,118,4,0,4,9,10,9,10,10,12,12,12,11,184,189,189,188,189,185,186,188,187,189,187,187,188,186,188,187,188,186,185,188,185,188,186,187,189,186,188,186,187,189,189,190,191,185,189,191,188,189,188,189,188,188,187,189,190,187,190,188,186,188,189,190,188,190,190,189,190,190,191,190,193,194,194,193,191,193,195,194,194,196,193,196,197,196,196,196,194,195,195,193,194,195,195,196,199,198,196,198,196,194,196,195,198,195,195,194,195,195,194,193,193,192,194,194,191,195,193,195,195,194,195,194,194,191,192,196,196,193,195,194,194,195,196,195,193,197,196,194,197,195,194,196,192,194,193,192,195,196,193,192,192,193,193,192,193,194,192,193,197,191,192,191,194,195,191,196,194,194,194,194,196,197,196,196,196,194,194,193,193,193,194,194,191,192,193,194,192,192,195,194,195,194,196,195,195,198,198,199,200,200,200,201,202,201,201,203,202,203,200,200,203,202,203,203,203,204,202,201,201,203,202,202,203,201,201,200,199,198,196,193,194,195,196,200,198,197,198,198,201,203,205,205,204,204,203,205,207,206,208,208,207,210,210,212,212,211,213,213,214,214,217,210,200,212,214,214,205,204,214,213,214,213,213,212,211,212,214,212,209,214,211,209,213,213,212,211,211,212,210,210,208,208,206,207,214,214,211,211,212,212,212,211,214,214,212,214,213,213,213,213,214,214,216,217,215,216,213,211,211,214,214,214,215,198,208,200,207,220,212,217,212,214,215,212,220,199,195,192,231,196,199,189,99,104,51,156,220,185,214,226,246,247,241,235,198,186,168,164,206,156,57,16,43,50,48,35,27,32,33,45,32,33,33,24,31,37,66,77,87,35,42,65,22,51,22,46,46,66,160,186,247,214,224,251,139,129,166,109,138,197,194,248,207,133,218,236,209,210,217,224,216,198,224,220,174,187,205,251,150,63,130,84,72,29,30,79,53,53,213,236,106,34,97,124,53,162,218,208,220,108,61,12,7,26,22,48,35,21,31,48,63,50,34,148,249,249,248,184,203,235,250,236,202,182,122,147,189,245,251,218,223,170,125,194,253,144,29,47,89,68,75,143,148,148,63,63,81,99,144,126,62,13,33,39,35,44,30,34,31,18,19,33,37,23,20,16,21,20,24,27,27,29,25,32,34,25,31,36,44,92,81,92,99,76,79,90,115,99,49,55,58,75,71,41,29,32,27,28,27,27,34,33,38,37,39,31,25,32,33,27,21,25,25,29,26,19,23,17,20,24,31,32,37,38,34,42,39,42,46,65,68,49,45,35,26,18,20,27,28,31,25,29,37,26,19,28,28,24,21,26,26,29,29,26,28,28,21,25,33,27,34,26,71,158,192,191,184,153,134,100,49,51,67,55,27,21,44,55,65,68,66,68,77,98,112,107,89,55,30,25,24,24,50,79,63,62,72,71,59,63,41,50,90,106,118,101,112,102,56,33,50,85,81,64,23,65,222,235,244,241,233,239,236,236,236,236,236,235,236,235,235,238,234,234,234,233,237,234,235,234,235,236,235,236,234,236,235,236,234,233,235,234,233,234,235,235,235,236,234,235,232,233,232,233,231,231,232,230,233,235,231,232,233,234,235,233,229,118,3,0,4,9,9,9,11,9,11,12,11,12,182,189,185,187,188,184,187,185,187,186,188,186,182,185,185,185,188,186,188,185,186,188,185,188,189,186,189,186,188,190,188,189,189,189,190,190,192,190,189,189,192,188,188,190,189,190,192,191,192,189,190,191,187,189,193,190,192,193,194,193,191,194,194,190,193,195,192,194,194,195,196,195,197,195,195,193,195,197,194,196,196,195,194,196,198,198,197,197,197,195,196,195,196,196,195,195,198,196,194,195,194,193,194,195,193,193,194,194,191,193,195,194,194,192,196,194,193,193,194,192,193,193,195,198,195,195,195,195,196,197,197,195,194,195,195,196,194,196,194,191,195,192,193,194,193,195,194,194,195,195,194,194,194,194,194,194,196,195,195,195,196,196,195,198,196,196,196,198,197,194,198,195,196,193,191,196,193,194,195,196,196,196,198,197,196,200,202,201,202,204,204,204,204,203,204,202,205,203,202,206,205,206,204,204,205,204,204,203,202,206,206,202,204,203,204,201,199,202,199,198,196,198,199,198,203,199,199,202,201,204,204,208,207,204,207,205,206,205,206,211,209,207,211,209,212,213,214,214,214,213,217,207,207,216,217,211,202,212,215,213,214,214,212,211,212,211,213,214,212,214,214,215,213,213,216,213,214,211,212,212,212,211,208,209,210,214,214,213,214,211,212,215,214,215,214,215,214,216,216,214,216,215,215,215,215,214,215,216,211,216,214,216,217,198,208,199,207,220,212,217,214,214,216,216,216,199,190,190,234,195,191,172,104,107,50,178,250,250,253,253,252,252,252,252,251,249,252,249,248,194,49,53,128,113,67,39,24,31,33,42,33,33,33,35,32,90,208,251,243,105,115,188,132,165,168,170,169,217,252,252,252,214,226,179,83,139,178,147,182,189,181,250,173,146,231,225,214,206,218,227,214,195,222,218,178,187,205,251,150,53,107,79,67,24,31,97,61,41,190,209,110,61,9,7,69,226,251,212,176,42,24,17,5,33,39,39,21,24,50,59,56,35,122,239,252,252,200,213,247,251,252,222,173,119,155,215,221,249,236,228,174,117,184,235,249,138,19,25,85,69,26,77,119,102,40,78,90,110,139,119,50,16,45,34,35,39,28,35,29,15,24,19,16,29,24,22,24,24,29,29,35,37,32,30,37,36,39,36,47,97,91,110,117,97,128,117,89,63,48,55,64,63,61,41,24,28,27,29,27,27,34,31,33,42,33,29,26,31,30,26,23,27,28,27,30,27,22,23,18,24,33,34,39,37,38,42,46,50,51,57,59,57,48,38,34,41,47,35,29,22,17,24,20,21,23,23,26,27,23,24,30,31,31,24,27,27,29,27,30,31,34,33,75,155,173,176,171,165,165,158,95,60,98,59,30,21,23,31,33,40,39,45,49,59,93,112,95,58,31,22,24,22,50,83,69,63,80,76,51,63,45,45,90,108,118,99,117,99,61,94,103,120,113,109,60,34,193,236,243,246,232,240,233,234,236,233,235,234,234,234,234,237,235,231,234,236,234,235,236,235,235,236,236,236,234,234,236,234,235,235,233,235,234,234,237,235,234,235,235,233,232,234,238,238,235,236,233,235,236,235,234,232,234,235,235,234,230,119,3,0,5,8,9,9,11,10,11,12,11,11,184,188,184,185,187,185,182,187,185,188,186,185,188,184,186,184,187,186,187,186,185,187,188,186,188,187,189,188,188,189,188,190,189,188,190,189,187,190,189,190,191,190,190,190,192,191,189,189,190,189,190,192,192,192,193,193,193,193,193,194,197,193,191,196,195,193,195,194,195,194,193,195,194,193,196,192,193,196,196,197,195,196,196,197,196,195,196,196,198,198,193,196,196,196,198,194,194,195,195,194,196,195,194,194,193,195,193,196,193,194,195,190,192,193,193,194,193,193,192,193,193,193,196,194,195,196,195,194,197,197,195,198,195,196,196,196,196,196,196,193,194,194,193,194,194,195,195,196,198,194,196,193,194,194,196,198,192,196,197,192,196,194,195,195,196,198,196,198,199,198,198,198,196,196,197,197,198,198,199,199,198,200,199,198,200,203,203,204,206,203,204,205,206,205,204,204,203,204,205,206,208,205,203,205,203,204,205,204,205,205,206,206,206,205,205,205,203,203,202,203,202,201,202,201,202,203,204,204,203,206,206,207,209,208,208,205,208,208,208,210,208,212,212,213,214,214,214,213,214,213,214,203,208,215,214,208,201,214,215,213,215,212,214,212,214,213,213,214,212,214,213,214,214,213,212,214,214,214,215,213,214,216,215,209,207,211,215,217,217,216,216,216,215,217,218,215,216,219,216,217,218,216,217,216,216,217,216,216,215,216,216,215,220,200,206,201,204,221,213,220,213,213,212,212,217,196,189,193,239,207,204,177,113,113,57,176,248,235,249,241,250,250,240,225,195,210,205,214,227,99,42,75,145,152,81,39,24,30,37,44,42,35,34,38,33,29,158,238,243,145,87,186,207,250,250,240,226,251,252,230,198,145,221,205,143,191,188,122,136,147,186,250,146,168,236,222,216,201,223,224,214,193,219,222,180,190,205,251,154,53,95,79,79,33,36,75,63,40,192,209,89,54,6,4,134,245,252,214,96,11,19,6,18,38,37,29,16,44,55,59,30,97,226,245,250,201,201,240,252,252,253,216,131,144,217,241,237,246,247,178,123,173,231,247,250,153,41,43,71,81,45,29,39,42,53,107,97,108,141,104,38,21,43,33,41,39,26,43,28,19,21,20,22,18,30,26,23,29,34,37,39,41,33,30,29,33,41,34,48,62,69,55,67,90,102,81,65,42,50,80,61,57,56,45,37,22,23,30,27,28,34,34,31,36,36,34,26,33,30,31,29,24,28,28,36,28,28,29,18,24,32,35,37,41,44,48,55,54,62,56,54,66,55,40,32,43,49,32,19,18,15,18,17,21,20,22,25,30,21,25,27,34,27,24,30,27,26,33,33,45,65,81,127,156,151,141,139,139,152,147,98,75,97,63,27,29,21,19,20,20,19,24,18,29,51,82,90,63,40,19,24,24,51,80,66,73,79,74,54,63,48,38,88,98,113,102,112,113,92,127,110,79,48,86,90,39,173,237,234,246,236,237,233,233,235,232,233,233,233,233,236,234,234,234,232,233,235,235,235,233,234,233,233,234,235,236,232,233,233,235,235,234,235,234,232,235,235,233,232,234,233,236,239,241,241,240,240,240,239,238,236,236,238,237,238,234,229,117,4,0,4,9,10,9,10,10,11,12,12,11,184,188,185,186,187,184,186,186,185,186,187,189,190,186,188,185,185,188,191,189,187,191,187,188,189,187,189,187,189,191,191,191,190,188,189,188,187,189,188,190,192,191,192,190,190,192,189,191,192,190,192,194,193,195,196,188,193,195,192,195,194,195,196,196,197,196,195,197,195,195,196,196,194,194,198,192,195,196,194,198,198,198,196,197,196,195,199,198,199,196,195,198,195,198,195,194,194,193,193,194,196,195,195,194,193,193,193,193,194,195,192,193,193,191,195,191,193,194,193,196,196,196,193,193,196,194,196,195,197,196,196,197,195,198,198,198,196,197,195,192,196,193,196,196,196,194,194,196,195,194,194,195,193,197,198,195,194,196,198,195,194,197,196,196,195,198,198,195,198,198,200,197,199,199,199,201,198,200,200,200,200,200,202,201,202,204,202,200,202,204,206,206,205,207,207,206,206,206,208,208,207,207,207,207,208,205,208,208,208,209,209,206,209,208,209,207,206,208,205,206,204,204,206,203,206,202,202,206,205,207,206,210,210,208,210,211,214,212,212,211,212,213,214,214,214,215,214,214,215,217,211,204,211,215,212,203,206,216,215,215,214,214,214,214,214,211,212,214,212,214,213,214,214,212,217,213,217,218,214,218,215,216,219,214,208,207,213,217,218,219,217,217,218,217,218,217,218,218,216,218,218,218,219,218,220,217,218,215,214,216,216,215,219,201,206,201,201,220,212,214,214,213,212,215,212,196,184,205,243,220,213,130,62,67,29,115,183,206,218,201,214,207,204,179,143,157,150,179,196,92,65,94,158,160,109,50,16,22,38,48,38,40,36,34,39,35,113,208,231,122,11,59,179,243,249,219,184,220,174,183,158,141,217,210,197,205,162,108,151,169,229,243,134,198,234,226,220,199,224,222,215,195,220,219,179,185,205,251,155,47,73,84,87,57,55,84,69,61,207,171,63,48,4,42,206,248,253,145,33,10,10,11,35,46,25,15,34,52,64,26,77,213,249,249,204,200,235,250,251,248,252,170,157,216,236,245,215,246,193,119,182,223,249,249,252,202,100,30,16,62,71,66,56,72,101,89,103,123,121,84,29,24,34,32,44,36,28,38,29,22,25,21,27,29,27,33,28,33,36,35,42,36,35,33,27,35,39,39,49,77,57,70,101,93,78,64,54,44,64,56,46,59,51,50,37,19,22,28,31,30,28,34,39,34,38,36,30,33,29,29,32,26,23,35,34,35,30,27,29,21,34,37,41,52,41,49,56,56,60,63,53,50,57,44,32,30,33,22,13,16,18,15,19,24,18,29,25,27,26,23,29,29,31,30,28,24,31,43,88,131,149,156,165,151,130,126,127,127,137,134,80,55,74,49,33,28,24,24,24,20,21,19,18,27,33,64,87,75,43,23,24,16,43,78,66,75,76,72,52,59,52,26,83,107,105,97,102,105,107,119,53,64,76,85,107,36,159,236,226,248,238,239,234,235,237,231,236,233,233,233,233,233,233,234,236,233,234,233,231,234,235,236,233,233,235,234,234,233,235,235,233,234,233,232,233,233,234,232,233,236,238,241,243,244,243,245,244,243,243,242,244,244,245,241,238,234,227,117,4,0,4,8,10,9,10,10,11,12,11,11,183,190,186,185,186,185,184,190,186,186,186,184,187,183,184,184,185,185,185,184,187,185,187,189,189,188,187,189,190,187,190,191,191,187,188,189,188,191,188,190,187,189,192,189,189,189,192,190,192,191,193,193,191,192,191,190,191,193,194,194,195,194,193,196,193,196,193,194,196,194,196,193,194,196,196,192,196,197,194,196,195,195,196,195,195,195,198,194,195,196,196,196,194,193,191,193,195,193,191,193,193,193,195,194,193,191,191,194,190,192,192,191,196,192,192,194,192,194,196,196,192,192,196,194,194,194,194,195,193,196,194,196,195,195,193,193,194,194,196,192,196,196,196,197,195,196,195,191,196,194,194,195,195,193,192,196,193,196,197,193,197,195,197,199,198,198,198,200,196,197,199,198,200,199,201,202,200,200,198,200,199,200,204,201,201,201,201,200,201,202,205,206,205,206,205,205,208,208,206,206,207,208,206,206,206,208,208,207,210,208,205,207,208,208,206,206,208,207,206,208,208,206,207,207,206,203,205,206,204,209,209,208,211,209,211,211,212,212,214,214,213,215,216,214,214,212,212,213,212,214,205,203,213,215,208,202,211,217,214,212,213,214,214,212,212,212,214,215,213,214,213,213,214,214,215,216,215,214,214,213,217,216,214,217,214,208,206,212,217,218,217,218,217,218,219,218,220,218,217,218,220,219,219,217,218,219,217,217,216,214,216,212,219,202,202,202,199,220,212,215,212,211,214,217,210,191,192,209,232,159,111,76,25,38,19,47,69,135,187,178,190,183,188,170,141,175,185,234,210,106,120,108,173,186,126,66,11,24,39,39,43,34,41,33,62,140,200,241,219,150,57,9,129,213,213,146,129,183,164,215,197,176,214,198,158,148,162,173,205,203,251,216,130,224,232,231,215,198,223,217,217,195,222,219,174,183,203,251,158,48,71,65,63,54,56,78,89,62,131,94,34,47,5,93,249,249,198,68,12,7,6,27,47,32,12,30,47,66,37,56,189,243,251,190,196,238,251,252,249,251,191,159,228,235,237,228,215,195,135,171,235,245,249,249,253,219,84,35,7,34,79,111,71,107,61,59,112,125,122,64,19,25,31,33,45,38,29,36,32,25,24,27,27,30,31,29,32,32,35,37,40,34,37,37,31,30,45,29,41,94,72,107,125,105,70,66,86,74,84,54,30,49,51,40,27,18,27,27,23,34,29,29,34,34,33,27,33,32,28,32,25,26,30,27,33,32,33,34,22,30,44,48,62,61,45,50,57,51,53,54,49,35,33,39,29,23,24,19,19,16,15,27,23,27,24,23,28,25,25,25,29,31,27,27,26,29,36,104,152,157,160,152,145,132,124,113,116,121,133,122,61,36,44,30,21,29,27,25,27,27,27,19,19,22,21,42,74,71,44,22,27,19,37,81,66,66,68,68,54,56,54,25,77,105,107,92,95,103,100,105,54,110,149,159,125,21,144,232,235,249,240,242,236,233,238,236,238,237,236,235,235,234,236,238,237,238,233,231,236,237,240,237,236,236,235,236,238,238,234,233,233,231,233,234,233,234,233,233,235,239,241,244,245,244,244,247,245,244,244,246,243,244,243,240,236,232,229,118,3,0,4,8,9,9,10,9,11,12,11,11,184,188,187,186,186,184,185,186,186,190,188,184,185,183,186,186,187,185,185,187,188,190,189,188,193,189,190,191,190,193,189,190,191,188,191,188,189,190,189,194,190,190,193,190,190,191,192,192,190,193,193,194,194,191,197,195,195,195,194,195,196,195,194,196,193,196,194,192,195,194,196,195,193,196,196,193,197,195,195,195,196,196,192,197,196,195,196,196,199,194,195,196,194,196,194,193,196,194,194,195,194,194,194,195,192,192,193,191,194,192,192,193,193,195,196,193,193,194,193,194,191,191,191,195,198,196,195,195,197,195,195,196,193,198,193,192,193,194,196,192,196,196,196,196,195,194,194,193,195,196,194,194,194,195,197,196,195,195,194,196,198,196,198,197,196,200,199,199,201,199,200,200,201,201,200,202,200,201,202,201,200,201,204,203,202,201,203,206,206,204,206,207,205,210,207,207,205,205,211,206,207,207,206,208,208,208,208,207,208,208,208,207,209,207,208,211,210,210,210,210,212,211,211,210,213,212,207,210,210,214,212,215,214,211,214,214,213,216,217,216,217,214,214,215,216,213,211,213,214,213,202,206,216,214,206,206,213,214,216,215,212,214,212,214,215,211,214,212,214,216,213,214,212,215,216,212,215,214,212,212,214,214,215,214,216,218,210,209,213,217,217,220,219,218,217,217,221,219,219,218,219,219,220,220,219,218,220,218,216,214,213,212,222,205,203,201,199,218,214,214,214,211,215,218,207,192,185,211,186,77,26,9,13,27,32,63,29,58,129,161,200,196,216,178,150,216,229,251,178,102,132,106,188,189,145,84,7,26,35,39,44,33,44,26,78,173,219,249,232,207,137,26,93,138,164,123,137,186,182,223,203,202,164,160,142,164,188,190,214,208,251,182,150,241,223,235,208,200,227,219,221,197,225,218,175,181,201,251,155,51,95,73,47,60,58,70,91,48,48,36,78,59,9,146,250,234,95,19,14,4,23,44,36,18,23,47,63,47,45,165,246,246,178,167,232,251,252,243,252,191,155,211,242,239,216,236,160,100,158,229,249,249,247,247,239,159,156,116,58,59,55,70,64,57,38,62,131,130,112,56,16,29,33,35,39,35,34,37,23,25,27,24,27,28,32,39,34,28,36,33,37,37,34,27,30,36,33,36,75,97,92,91,67,69,50,56,66,37,54,43,39,59,36,27,22,17,23,23,33,34,28,33,30,36,35,31,33,29,29,28,25,24,31,33,32,37,34,31,35,64,90,84,88,71,51,53,48,43,40,39,33,30,21,28,32,22,19,16,17,23,39,34,25,20,21,25,24,24,19,28,29,29,33,28,28,24,75,143,153,128,118,115,116,115,112,104,111,122,133,115,57,27,20,16,24,29,30,31,26,31,22,26,28,21,24,36,61,52,35,23,20,26,39,77,71,62,61,60,57,46,55,29,65,107,104,100,95,98,101,104,49,80,125,125,91,13,124,227,240,250,242,243,240,240,243,240,241,242,243,243,242,242,243,243,243,244,243,243,245,245,244,241,239,237,238,241,242,241,239,236,237,239,238,238,239,238,235,237,240,241,243,244,246,245,245,247,247,245,246,247,244,242,241,241,242,244,234,116,3,1,6,9,9,9,10,10,11,12,11,11,182,186,185,183,186,184,184,186,184,184,186,186,188,186,186,187,186,188,188,188,190,189,192,190,188,193,191,190,193,190,189,190,190,191,191,191,189,191,191,190,193,192,191,190,191,193,194,192,192,193,194,193,193,194,196,195,195,193,193,194,191,193,195,197,192,193,195,193,196,193,194,195,191,194,194,193,195,197,193,194,194,194,196,194,193,193,195,197,198,196,194,195,195,195,196,196,195,191,194,195,192,191,193,190,192,194,194,195,193,195,192,193,196,194,192,194,196,191,193,192,191,193,191,193,191,190,194,198,195,194,195,194,194,193,194,192,191,192,193,193,194,193,193,193,193,195,198,194,196,195,193,196,195,193,196,194,192,196,197,196,198,199,199,198,199,201,200,200,198,202,199,197,204,201,203,202,200,203,200,201,203,201,201,200,202,203,203,206,207,209,208,205,208,208,208,208,208,206,205,208,210,211,209,208,210,208,208,207,209,207,207,208,207,209,212,212,214,213,213,213,211,214,214,217,216,212,214,214,214,217,218,217,216,215,217,217,215,217,219,218,215,215,214,212,215,213,210,212,214,212,203,212,218,207,203,208,215,214,213,214,214,214,210,211,211,211,213,215,214,213,215,214,214,212,214,213,212,214,215,214,214,215,213,214,217,216,219,212,209,213,217,222,218,220,217,218,219,221,220,217,218,218,222,220,220,220,217,217,219,214,216,213,221,206,199,206,198,217,214,214,216,214,216,218,208,189,193,192,150,77,16,10,33,60,71,87,42,70,163,190,221,217,224,166,157,222,240,225,124,100,139,98,178,182,113,81,14,24,34,40,39,31,48,19,79,153,194,249,191,215,170,37,64,162,221,169,192,188,168,162,160,128,142,192,160,190,179,176,175,214,251,151,169,241,220,240,207,204,224,218,222,193,222,221,174,179,204,251,159,48,108,96,69,75,61,63,96,60,60,114,195,103,26,180,248,133,36,17,6,16,40,42,22,23,43,57,57,38,135,240,249,199,147,214,252,252,236,232,188,149,205,234,249,214,219,217,71,84,201,244,247,246,246,229,93,85,135,147,85,36,37,46,47,51,36,100,143,137,122,45,15,28,29,32,47,35,31,39,24,27,29,21,26,29,31,35,37,31,35,33,29,34,34,32,29,35,31,38,56,74,84,67,48,59,41,27,41,35,31,26,42,43,23,22,20,19,19,24,29,28,29,29,32,29,33,32,32,32,30,28,27,29,24,37,35,35,42,37,50,126,163,133,98,111,75,52,45,35,36,36,34,27,27,26,33,29,25,32,31,51,57,38,23,19,24,21,21,28,27,24,26,31,29,23,28,29,95,150,126,119,114,110,107,102,109,109,116,117,130,106,54,26,21,24,22,29,26,30,32,29,37,34,25,21,24,24,32,31,24,26,24,24,36,80,68,60,60,66,61,48,59,30,70,118,114,101,97,101,92,102,72,51,47,51,42,7,141,241,247,249,244,242,243,244,244,242,244,246,246,247,247,247,248,249,248,248,248,249,251,248,249,247,245,246,246,247,245,246,244,244,244,246,247,243,243,243,240,242,245,246,245,245,247,245,247,247,246,247,247,247,244,244,243,245,249,250,237,116,4,1,4,8,10,9,9,10,11,12,12,12,181,186,186,184,189,185,183,183,185,187,184,184,185,185,184,185,187,186,188,188,188,190,190,191,192,189,193,193,191,192,190,191,191,191,192,189,191,190,190,191,189,192,192,191,191,189,193,194,193,195,191,195,194,190,195,191,193,195,192,193,196,193,193,196,195,195,191,195,196,194,194,195,195,193,197,198,198,196,195,195,194,194,191,194,194,192,195,194,195,195,196,198,195,193,196,192,192,195,191,192,191,191,191,193,192,191,191,193,193,192,193,190,193,193,189,190,193,194,192,192,192,192,194,194,194,191,191,194,191,193,191,191,194,194,191,192,193,193,192,193,191,192,195,194,195,194,194,194,194,196,195,198,197,194,196,195,193,200,200,198,198,196,200,199,201,203,201,202,201,200,202,200,201,201,201,202,203,203,201,201,202,203,201,200,205,204,205,208,207,204,208,210,206,208,206,208,208,205,209,207,211,211,211,211,207,208,209,210,208,208,208,207,212,209,211,215,211,214,216,216,216,215,216,215,215,215,215,215,216,217,215,217,216,216,217,216,216,215,215,215,215,214,214,213,213,214,210,210,216,208,205,214,214,207,203,211,214,212,213,213,213,214,211,212,212,212,214,214,214,213,213,212,212,213,213,212,214,215,214,214,213,212,217,216,214,216,217,219,212,209,211,217,222,223,220,219,219,217,221,219,220,218,218,221,219,219,220,216,217,215,220,215,221,207,200,204,196,217,217,214,219,215,218,224,211,199,211,232,204,128,97,89,96,100,79,87,56,120,216,188,207,215,232,185,168,223,230,171,97,103,151,106,174,174,52,60,31,21,38,39,45,32,46,15,72,145,190,232,165,210,199,58,60,206,249,211,220,194,146,129,164,136,150,189,155,139,106,145,189,247,249,143,200,241,221,243,206,205,221,218,221,194,227,222,174,182,206,251,158,53,100,93,74,56,46,50,96,87,128,185,237,135,89,201,143,54,21,15,13,33,42,28,26,37,54,62,35,105,226,246,215,149,194,245,252,250,225,175,146,201,239,252,239,203,183,121,87,179,237,247,246,245,227,78,14,49,112,132,78,24,27,45,50,51,41,115,144,136,113,34,19,36,30,33,45,31,29,39,27,28,25,25,28,26,30,40,35,33,32,29,38,36,34,31,26,29,34,33,44,59,65,42,53,77,32,42,60,30,34,28,30,32,14,27,24,18,22,27,34,25,28,30,29,37,33,35,39,32,33,30,27,25,26,34,37,37,42,39,63,129,156,125,98,92,76,61,45,29,33,33,29,24,28,25,25,32,44,47,53,53,40,25,21,26,19,22,23,19,27,22,27,28,27,27,30,22,96,144,122,117,109,106,110,113,111,113,113,108,116,94,53,28,22,24,28,32,33,31,26,30,36,33,32,29,26,28,29,26,26,21,24,25,33,77,71,61,66,72,69,47,62,34,63,125,115,111,96,98,86,85,92,84,57,39,20,89,249,249,249,249,244,248,242,244,244,243,247,249,247,248,249,249,250,248,250,249,249,249,247,248,250,250,249,247,248,248,247,247,247,246,247,247,247,245,244,244,244,244,245,245,248,248,247,247,248,246,247,247,245,245,244,245,248,250,251,250,236,116,4,1,4,8,9,9,10,9,11,12,11,11,183,187,185,184,185,184,185,184,184,187,186,184,186,183,185,188,188,188,188,187,191,194,191,194,196,194,194,191,193,192,191,194,192,191,190,189,190,192,192,192,193,191,193,194,191,191,193,192,190,190,192,191,190,190,194,195,192,194,192,194,193,193,193,196,193,193,195,194,199,196,194,195,196,198,197,199,196,197,194,195,196,197,198,194,195,196,194,195,196,194,195,196,198,196,193,197,194,188,193,193,191,194,192,190,193,191,193,191,191,192,193,192,192,192,192,190,192,191,196,195,191,194,193,192,193,193,192,192,193,193,192,190,191,192,193,192,195,195,192,194,193,194,193,193,194,194,196,190,195,197,195,200,198,198,201,197,199,203,200,199,200,200,200,200,200,200,200,200,200,201,201,200,203,200,201,204,200,202,202,203,202,202,204,204,206,204,207,208,206,208,209,209,209,209,209,207,208,209,208,210,210,208,209,211,210,210,210,209,211,210,211,211,212,214,213,214,214,214,212,216,215,214,215,215,214,214,217,215,215,214,214,214,216,214,212,215,215,214,214,213,214,214,214,213,212,213,210,213,213,205,205,215,211,203,210,211,212,213,210,212,213,214,213,212,213,214,214,214,213,212,214,213,213,212,213,214,212,214,212,210,213,215,213,215,217,215,217,215,220,216,211,212,220,230,233,234,225,221,221,219,219,218,220,217,220,218,219,220,217,216,216,214,222,208,198,208,196,214,220,219,222,224,229,231,210,197,209,223,225,186,187,169,132,119,78,134,129,120,221,208,213,234,244,194,179,220,208,128,96,115,153,113,181,191,24,29,20,37,71,62,55,31,40,11,110,176,198,223,186,227,205,108,77,190,207,170,169,170,179,194,230,178,205,166,95,109,98,147,182,244,210,134,227,245,234,244,205,211,223,224,222,192,226,222,173,178,205,251,153,48,97,71,50,61,40,49,63,86,135,163,243,193,183,163,37,18,22,11,27,42,29,22,36,50,66,34,84,207,249,229,148,181,232,249,252,239,186,149,205,242,252,247,234,170,74,125,152,230,249,249,244,220,87,21,37,89,100,117,136,71,43,44,52,49,70,141,122,129,95,23,23,30,33,36,48,35,31,36,24,25,22,26,34,26,29,37,35,35,32,33,35,34,29,31,28,27,32,29,45,43,35,24,44,56,42,68,57,37,32,28,38,23,17,31,23,27,24,24,31,29,34,33,29,38,33,31,38,32,33,26,27,28,31,39,33,34,32,37,59,121,120,87,96,82,69,59,34,28,32,29,23,22,25,25,25,34,45,34,39,43,29,24,27,23,19,23,24,22,23,30,27,30,31,22,33,26,91,139,121,122,101,99,89,71,76,86,101,105,111,81,53,29,19,30,26,29,33,26,32,30,33,38,32,32,29,28,30,29,33,29,26,29,35,81,72,65,69,66,70,49,63,40,50,117,115,107,105,107,78,49,75,105,112,99,173,246,252,252,251,248,250,249,247,242,244,247,248,248,248,249,248,248,249,248,248,251,251,250,249,247,249,249,249,249,249,248,246,250,248,248,248,247,247,246,248,246,244,244,245,247,247,246,248,248,247,245,246,245,244,244,245,247,249,251,250,249,236,116,3,1,4,8,10,10,10,10,11,12,11,11,183,189,187,184,187,185,185,188,187,188,185,185,188,190,187,187,190,190,192,191,191,194,193,195,194,194,195,193,193,189,190,192,192,193,191,191,193,191,196,194,192,193,194,196,194,194,195,191,190,193,191,193,191,192,193,191,193,193,195,194,195,194,195,195,191,194,194,194,194,196,198,198,199,198,198,198,195,195,196,196,198,199,195,196,198,194,195,196,195,197,196,195,195,196,198,196,195,194,193,196,193,191,190,191,195,190,192,193,192,192,193,191,191,192,192,194,194,193,194,196,197,196,196,194,193,194,195,193,192,193,194,194,193,193,193,195,196,195,195,195,195,196,197,195,194,196,194,197,198,198,200,203,205,203,201,202,203,203,202,202,202,203,205,201,201,202,200,205,202,200,203,202,202,200,204,203,199,200,204,205,204,206,205,208,208,205,208,208,210,211,211,212,209,211,212,212,212,210,212,209,211,211,210,214,212,214,212,212,213,213,213,211,215,212,214,215,212,216,214,214,215,213,213,214,214,215,217,213,215,216,213,214,215,216,213,212,215,214,214,215,214,212,215,214,212,214,213,217,212,201,212,215,207,206,212,214,213,211,213,216,212,214,214,214,215,214,214,213,213,212,212,214,213,211,213,214,214,212,212,214,214,214,217,216,215,217,217,219,218,223,215,211,215,229,248,248,239,226,222,221,219,219,219,221,219,218,219,217,220,217,220,213,220,211,200,211,200,222,229,226,231,225,218,205,169,158,167,142,142,182,240,202,159,126,95,205,189,129,218,214,225,242,237,170,155,219,184,111,115,123,170,121,184,211,27,5,29,92,105,91,68,36,20,83,221,209,200,217,211,221,216,168,77,92,107,135,173,198,199,240,241,208,208,125,98,139,170,177,185,221,143,122,210,216,224,237,212,215,229,230,226,195,228,222,173,177,205,251,145,71,100,79,88,68,48,45,48,56,95,184,252,252,211,81,1,24,21,27,42,32,18,29,48,62,39,59,184,244,242,159,173,224,250,250,252,205,150,203,247,252,251,243,234,95,97,194,183,245,247,247,234,83,25,43,93,116,116,166,199,129,77,73,93,97,123,142,122,127,78,20,25,36,32,36,47,28,33,39,24,27,18,25,29,25,30,36,36,37,31,29,38,31,32,30,27,29,34,35,27,19,19,24,24,24,34,65,56,38,33,21,24,22,24,25,22,30,26,30,33,29,32,31,31,37,36,33,34,35,29,25,25,32,32,31,40,36,31,39,51,91,130,99,74,83,55,38,38,30,26,20,25,21,24,28,28,42,46,48,40,30,24,23,24,21,23,22,27,29,29,32,31,34,31,26,27,26,53,106,125,118,99,76,60,54,35,38,66,88,101,88,62,37,27,27,31,30,28,30,29,30,28,29,32,39,36,33,35,33,41,34,33,35,38,84,72,66,68,64,68,51,69,41,45,111,105,110,107,112,95,61,68,105,102,113,187,206,242,252,251,251,248,251,247,247,249,247,247,248,248,249,248,249,249,248,251,250,248,250,250,249,251,249,249,251,251,251,248,249,249,249,251,249,248,247,247,245,244,245,247,247,247,247,246,245,246,247,247,246,246,246,248,249,251,250,248,249,236,117,3,1,4,8,9,9,10,9,11,12,12,12,185,190,187,188,186,186,187,187,188,189,189,188,190,189,190,189,188,190,191,191,193,194,192,195,191,193,195,190,193,193,193,194,193,193,192,193,193,197,196,192,196,193,193,196,193,193,193,192,194,194,194,193,194,192,192,195,192,197,196,198,196,196,197,196,196,194,196,198,198,196,198,201,200,200,200,199,198,194,197,199,196,198,200,197,198,197,196,196,195,196,195,195,198,198,195,193,192,193,194,192,192,193,193,191,194,193,193,194,193,191,194,192,194,193,193,191,194,196,193,196,195,198,198,195,196,195,196,196,194,193,194,197,197,195,196,197,200,198,198,199,196,200,197,197,197,198,199,198,201,200,202,203,203,203,205,205,202,204,204,203,203,204,203,203,203,205,205,203,205,205,205,204,204,202,202,206,203,204,204,203,205,206,208,206,206,207,208,210,210,211,210,211,209,210,211,212,212,211,211,212,214,213,214,213,211,214,214,213,214,212,213,211,214,213,211,212,210,214,214,216,214,210,214,216,214,214,214,214,215,214,216,214,212,215,213,214,213,214,215,211,215,212,211,213,211,214,211,216,208,203,212,211,204,209,216,212,214,214,213,214,214,213,211,211,213,213,211,214,213,211,214,214,215,213,213,214,212,214,213,215,214,215,215,216,216,216,219,217,221,223,219,215,196,178,192,217,231,229,222,222,220,221,221,217,222,218,219,218,217,217,218,216,224,218,207,219,208,222,222,207,195,181,177,172,155,165,158,99,118,172,231,181,169,124,79,229,218,90,146,182,192,215,201,154,155,195,154,120,134,142,198,130,192,238,62,15,62,128,119,104,84,38,24,145,250,217,185,219,226,187,185,151,50,40,61,154,215,223,210,231,167,144,198,142,108,153,190,202,234,239,127,126,192,179,194,203,190,203,227,234,228,199,235,229,178,177,212,252,128,67,125,109,96,79,43,36,34,82,159,234,252,252,144,22,4,26,30,38,34,18,23,43,61,54,45,153,248,248,171,171,222,248,252,252,234,152,197,244,252,252,250,245,177,126,186,235,198,244,244,244,104,11,48,87,118,113,125,153,177,114,51,33,70,77,99,134,117,132,63,15,29,33,29,41,51,32,35,37,23,27,23,27,29,25,33,39,36,36,39,31,29,36,36,32,30,36,33,32,29,26,26,23,18,18,38,59,47,37,47,23,17,26,21,23,26,29,28,31,37,30,31,32,32,39,34,34,32,34,31,27,27,23,34,39,30,33,37,38,50,78,110,127,96,56,52,35,23,27,23,19,22,20,23,32,33,44,59,54,32,21,19,19,22,25,23,22,25,30,31,26,31,36,31,25,29,27,33,63,106,126,93,74,77,74,62,42,34,68,108,114,95,58,33,29,27,33,28,30,30,32,30,33,38,34,36,35,36,38,39,39,34,35,42,85,74,60,61,64,77,53,71,52,33,110,116,110,115,116,104,88,66,89,53,27,5,79,218,247,250,250,248,250,249,251,249,247,247,248,249,249,249,250,249,249,251,251,249,249,251,252,252,251,250,251,251,251,250,252,250,251,251,251,251,247,248,245,244,245,246,246,247,247,245,245,247,246,245,245,245,246,248,249,249,249,250,249,236,116,4,1,4,8,9,9,10,9,11,12,12,12,187,193,190,186,191,191,187,191,189,189,191,190,191,190,191,191,192,191,193,195,194,194,194,191,193,194,194,195,194,195,194,193,193,196,196,193,196,196,197,195,198,198,196,195,193,197,199,195,196,195,195,196,194,197,196,194,196,195,195,196,196,191,194,196,196,197,196,198,198,196,196,199,198,201,200,200,199,196,198,199,198,198,195,198,198,196,198,198,197,198,197,195,197,197,194,197,195,195,194,194,193,193,193,191,193,192,193,192,192,194,195,194,192,195,195,193,197,196,196,197,196,197,199,199,198,198,195,197,198,196,199,199,198,198,198,200,198,199,200,200,199,200,200,199,200,199,199,200,201,203,205,202,201,202,203,204,205,203,204,204,202,202,203,204,204,206,202,206,203,202,207,203,205,206,206,206,204,205,206,205,207,208,206,208,208,209,212,210,210,210,210,212,211,211,210,208,210,210,212,212,215,214,210,214,213,212,211,212,212,214,214,211,213,212,214,213,211,215,213,213,214,212,214,212,214,213,214,211,211,217,217,214,214,214,213,214,214,212,212,213,215,212,212,213,212,211,212,214,204,204,216,207,204,212,213,214,214,212,212,212,211,214,212,212,212,211,212,212,214,214,210,214,212,212,214,214,214,212,215,216,214,215,216,216,216,217,217,219,218,222,218,224,184,85,71,125,199,230,225,225,220,222,221,220,223,219,220,218,221,220,224,222,230,225,207,213,188,189,178,163,175,178,192,200,182,201,207,188,178,185,198,127,174,117,56,201,196,94,146,152,156,198,199,162,144,170,134,137,145,165,220,125,192,252,126,56,89,157,143,124,120,57,34,181,251,191,157,219,199,130,150,158,108,63,83,160,229,207,192,197,127,159,190,168,133,125,159,196,252,244,142,196,233,202,198,190,178,183,205,211,213,194,235,232,187,183,217,238,89,49,101,96,91,50,28,28,59,129,198,249,252,164,50,13,11,27,37,36,24,20,34,55,55,43,141,243,249,182,169,222,247,249,249,235,154,177,243,251,249,244,245,159,179,192,226,248,202,248,236,107,34,44,90,115,114,120,114,81,120,91,34,19,45,52,103,127,120,117,42,23,29,31,34,44,53,27,39,40,22,27,24,23,29,26,33,37,34,38,31,34,33,35,39,30,31,32,32,30,29,28,27,28,23,23,31,47,43,55,51,21,24,19,26,37,24,31,37,34,34,30,29,28,34,37,31,33,32,31,33,26,27,29,30,34,32,31,31,60,59,56,119,135,96,64,47,40,38,26,21,18,16,22,26,27,36,47,54,49,23,23,23,18,22,18,29,25,27,26,24,33,32,30,32,29,24,29,22,38,71,98,109,91,100,105,71,64,42,55,99,124,145,100,53,33,25,29,30,29,32,39,31,34,38,36,39,35,38,41,38,37,40,35,37,89,76,61,62,64,81,55,70,56,28,99,116,116,117,112,112,71,58,47,48,11,59,214,251,251,250,248,251,250,249,249,249,249,250,249,249,250,249,251,251,249,249,249,251,251,251,251,252,251,250,251,251,252,250,251,251,249,250,249,248,248,248,247,245,246,247,245,245,245,245,247,246,245,244,244,246,247,249,249,249,250,250,250,237,115,4,1,4,8,9,9,10,9,11,11,12,12,187,194,191,193,191,191,193,191,192,191,192,193,192,189,194,194,195,197,194,196,196,195,194,198,193,194,197,191,196,195,192,194,192,194,197,195,194,197,196,196,198,196,199,198,194,196,197,196,196,195,198,198,198,198,198,196,195,198,195,198,197,196,197,196,198,197,197,199,196,198,199,198,198,199,197,196,199,195,196,196,199,199,198,199,198,198,199,198,198,198,195,198,196,198,198,195,196,194,195,196,196,194,195,193,194,194,195,195,194,195,198,196,196,195,195,197,196,197,196,198,196,198,201,200,199,198,198,198,198,200,201,201,200,200,200,198,200,199,201,201,200,203,201,202,203,201,200,200,204,205,203,203,202,200,201,200,202,204,203,204,203,204,203,203,203,203,204,202,204,206,202,205,206,205,207,207,204,206,208,208,208,209,208,206,207,210,210,211,210,208,212,213,210,210,210,210,209,208,211,211,213,213,211,212,211,213,211,211,213,210,213,209,213,213,210,212,213,213,212,213,213,212,213,214,212,213,213,212,213,211,212,212,212,213,210,212,211,213,214,213,214,209,212,215,212,211,211,210,202,212,212,201,205,211,214,212,212,212,214,209,211,211,211,213,210,214,211,210,211,211,212,211,212,212,211,214,214,214,213,214,216,217,215,215,214,216,217,215,216,217,221,226,203,133,94,116,177,217,226,227,219,219,220,218,222,220,222,225,226,224,224,221,217,205,177,175,164,178,190,195,210,219,229,223,198,214,232,245,211,182,202,129,172,111,62,191,173,118,197,205,191,224,222,167,134,157,130,143,141,179,222,92,174,249,175,67,77,151,149,137,120,61,32,185,251,151,134,209,183,149,185,224,196,108,85,169,200,157,198,218,187,190,188,141,107,145,182,214,252,208,144,227,242,235,238,230,207,188,202,200,199,179,217,214,178,186,214,201,69,50,80,67,64,25,25,19,54,116,147,246,201,57,19,10,17,38,41,31,15,27,53,60,39,56,193,243,189,162,210,246,249,249,234,162,160,217,251,245,236,236,164,152,240,214,245,240,212,246,92,35,44,86,123,110,119,101,59,59,121,85,35,21,130,186,157,132,110,105,37,18,36,34,35,42,47,33,39,34,29,28,22,29,27,28,33,32,34,35,33,33,31,33,33,36,33,29,30,29,31,33,33,34,31,27,24,34,77,84,64,33,19,30,31,36,31,28,37,37,32,29,32,28,30,36,33,29,36,30,32,34,22,28,32,28,34,37,76,96,66,53,63,100,98,68,49,40,44,41,29,19,21,19,17,27,38,44,39,33,24,16,26,26,22,24,20,27,25,27,35,30,35,30,31,29,23,24,26,29,36,68,101,118,122,118,86,71,50,47,84,108,158,168,96,45,28,23,31,34,34,35,34,37,38,41,39,33,39,35,34,38,36,40,39,87,81,62,64,63,74,54,63,63,25,84,116,110,120,107,105,72,53,56,18,112,248,248,248,248,252,252,249,251,249,247,248,249,251,250,251,250,250,248,248,248,249,250,248,250,251,251,250,250,250,251,251,251,249,250,249,248,247,248,248,247,247,245,246,245,245,245,247,247,245,245,246,245,245,245,244,246,248,249,250,251,250,250,237,116,3,1,4,8,9,8,10,9,11,12,11,11,189,194,195,191,193,192,191,193,192,193,195,195,195,194,194,196,199,198,195,196,194,196,197,194,197,196,193,193,194,193,195,195,194,194,193,194,193,196,194,194,195,195,196,195,194,196,197,196,197,196,198,198,195,196,198,198,200,199,197,199,198,198,199,197,200,198,198,199,198,199,197,198,197,198,197,194,198,196,196,197,193,198,196,198,200,198,198,200,199,195,197,197,198,195,196,198,196,196,198,197,197,197,196,197,197,196,197,194,196,196,195,197,196,200,199,197,199,198,198,197,199,200,200,199,198,201,200,200,200,200,203,201,204,202,200,203,200,201,203,201,201,202,204,200,203,201,200,204,200,204,203,201,203,203,202,202,201,202,201,201,203,202,202,204,204,203,203,205,205,205,207,205,206,204,207,207,207,208,208,208,207,207,210,210,208,209,207,207,210,209,209,211,209,210,212,210,212,210,210,210,210,212,208,211,212,212,213,211,212,212,210,211,213,211,212,211,211,212,210,212,212,212,213,211,213,212,214,212,212,213,212,214,212,213,211,211,212,210,212,211,213,211,212,213,212,212,213,208,201,212,208,200,209,213,211,212,211,212,213,212,212,210,211,212,212,214,212,211,213,212,210,212,212,212,214,213,214,214,214,217,215,214,217,218,216,215,214,217,216,218,217,228,232,217,208,188,183,194,211,225,223,224,222,221,225,222,224,222,218,209,201,191,191,188,173,192,190,211,229,227,235,231,235,219,188,211,227,242,185,160,215,155,178,109,96,236,193,114,210,239,215,230,219,155,138,160,133,156,136,195,206,75,174,249,183,57,75,144,137,120,101,54,27,190,242,122,160,244,190,181,216,249,224,108,66,139,194,197,237,227,226,198,145,119,141,193,215,237,251,170,149,238,234,236,235,239,222,214,237,229,214,181,205,199,166,167,179,177,71,56,79,50,62,31,27,45,86,107,134,184,84,21,22,9,29,45,28,19,31,45,57,51,42,96,223,190,157,207,238,249,247,240,164,166,218,245,252,229,231,162,153,224,250,221,243,237,171,108,25,53,83,116,118,119,80,58,46,56,141,78,32,31,130,164,150,132,117,94,22,26,32,38,39,44,45,29,39,34,22,25,23,28,27,33,33,32,35,34,32,36,33,29,40,39,33,32,29,34,34,31,33,35,30,28,28,32,65,81,69,47,24,30,34,37,32,31,29,27,33,27,29,27,27,37,33,27,35,39,29,29,29,28,33,27,49,84,118,149,90,44,51,59,92,89,66,57,41,39,26,16,17,24,24,21,41,44,29,22,19,24,29,26,29,24,27,27,29,33,32,33,32,34,29,31,30,27,27,21,30,36,56,79,91,86,65,59,43,47,58,92,157,190,158,66,25,23,22,28,30,34,34,39,38,36,40,37,33,37,37,36,37,36,36,83,78,57,62,55,75,54,60,71,25,81,122,110,116,109,120,87,80,59,66,223,252,252,249,249,252,252,251,250,247,249,250,252,250,250,252,250,249,248,248,248,249,248,249,248,248,249,250,251,250,251,250,249,248,249,249,249,248,247,248,247,246,247,245,245,245,244,245,246,245,246,246,247,247,247,247,248,248,250,251,252,251,251,237,116,3,1,4,8,9,9,10,9,10,12,12,12,191,196,194,195,195,192,193,191,193,194,195,194,196,194,196,195,194,194,195,195,195,194,193,195,193,195,194,191,195,195,193,196,196,193,193,192,195,194,193,193,192,196,194,193,194,194,197,196,197,197,195,194,197,198,195,194,198,199,196,197,197,195,195,196,199,197,196,197,197,198,198,199,198,199,196,196,198,195,199,196,196,197,195,198,196,198,201,198,199,198,195,197,194,197,197,194,197,196,198,196,194,198,199,198,196,197,196,194,198,198,197,197,198,198,197,200,198,198,199,197,199,198,198,200,200,200,198,201,199,201,200,200,202,201,203,203,204,201,205,207,203,203,203,206,203,202,203,202,204,203,200,201,203,203,203,200,205,202,201,203,200,202,203,201,202,205,205,200,204,206,204,206,205,204,205,206,209,208,207,207,207,208,206,208,208,209,207,209,210,209,208,208,211,209,209,207,210,210,211,208,208,211,207,210,212,209,212,214,212,211,211,210,212,211,211,211,212,210,210,211,209,211,210,210,212,211,211,210,210,211,212,210,211,214,210,212,210,211,212,210,212,211,211,211,209,211,212,202,205,214,199,205,212,210,212,211,212,210,210,209,214,212,211,214,210,212,213,211,212,212,214,213,212,214,213,214,214,212,214,214,214,215,213,216,215,216,216,216,217,217,220,221,227,234,241,234,204,189,198,215,225,229,228,224,223,214,207,198,193,193,194,199,207,214,201,214,211,219,233,227,227,223,224,204,182,206,224,239,155,150,227,172,191,97,102,249,217,119,175,220,177,194,212,158,148,160,150,165,143,214,215,94,178,237,204,102,91,138,129,115,70,43,27,189,223,105,200,249,190,186,210,217,160,94,66,131,184,234,250,215,198,141,171,142,169,222,215,249,245,148,169,241,225,237,227,235,217,214,238,235,227,202,234,224,177,162,151,171,98,57,65,54,85,60,101,150,169,178,98,99,36,15,21,11,44,37,22,27,36,63,47,58,133,181,186,157,199,233,250,250,244,179,152,215,246,251,244,235,171,137,214,252,249,208,249,213,41,36,35,92,114,115,127,87,40,82,62,74,146,67,26,33,28,59,118,127,122,75,21,26,33,38,48,56,45,28,44,31,23,28,24,28,27,34,34,28,34,39,28,31,33,35,33,33,34,33,30,32,38,34,36,33,37,32,29,31,40,61,71,54,27,29,36,36,28,29,29,30,27,29,32,29,34,33,30,32,33,36,28,28,30,35,44,74,113,98,141,190,112,50,40,40,79,96,75,59,46,39,29,18,27,26,27,28,34,34,24,17,17,24,25,30,31,27,29,33,32,35,35,34,41,33,34,30,31,37,28,25,30,30,23,28,40,44,49,49,35,32,42,46,95,162,133,60,28,21,23,30,38,37,36,38,34,34,40,36,35,35,38,41,33,38,30,78,84,53,65,65,81,56,57,81,27,70,124,113,122,109,119,97,78,62,47,200,250,250,248,248,253,253,250,250,249,249,249,252,251,251,250,250,250,247,248,247,248,248,247,249,248,250,251,251,251,250,249,249,248,250,249,249,249,249,248,247,248,246,247,246,245,244,246,247,246,246,248,248,247,248,248,251,250,252,251,250,250,251,238,115,4,1,4,8,9,9,10,9,11,12,11,11,197,199,198,196,196,196,196,196,194,192,194,197,196,195,196,196,196,195,198,198,196,198,198,198,196,196,196,199,199,196,197,194,193,194,196,195,194,194,195,196,196,195,194,192,192,194,195,192,194,196,193,195,196,196,194,195,196,196,195,197,198,196,195,194,195,194,196,196,193,197,196,197,198,196,198,198,198,198,197,199,198,201,199,197,200,199,201,200,199,196,196,195,195,196,195,196,197,196,198,196,196,199,196,198,196,198,198,197,203,200,197,200,199,200,200,199,200,202,201,199,198,199,198,200,200,199,201,201,207,203,202,203,202,205,206,207,204,203,206,206,205,206,205,206,207,203,205,203,199,204,203,202,203,204,205,204,202,204,201,201,203,203,204,203,203,205,204,205,205,205,205,205,206,204,206,207,207,207,208,209,210,207,207,210,207,207,207,208,209,207,208,209,209,207,208,210,208,206,209,210,211,211,210,210,208,210,211,210,210,211,210,209,210,209,209,210,208,209,210,211,211,210,211,210,211,212,211,208,210,211,210,212,209,211,210,210,214,211,210,210,214,212,211,211,210,212,209,203,210,210,199,207,212,213,211,212,212,210,212,211,211,211,210,212,210,211,211,211,209,212,212,214,212,211,214,214,216,214,213,215,216,214,214,214,215,216,215,215,214,216,218,220,225,226,233,241,234,216,205,202,206,214,214,207,204,200,203,200,207,211,215,223,224,230,207,214,209,212,227,217,222,221,219,196,181,210,227,229,141,161,240,190,190,88,96,246,218,108,150,205,167,199,220,160,161,157,157,167,154,242,214,96,187,234,232,151,93,122,130,114,68,39,21,191,210,108,219,241,171,141,170,184,164,154,101,113,169,243,192,170,198,177,155,114,170,189,216,250,225,137,191,241,225,238,225,236,211,208,232,229,225,201,237,237,206,185,162,215,109,19,36,57,99,70,124,167,211,180,60,38,16,19,21,29,41,21,21,35,50,54,42,137,216,169,162,196,237,245,248,252,188,161,199,237,250,233,241,178,139,207,251,252,247,199,240,125,18,49,70,121,112,123,91,34,64,127,63,89,146,50,29,41,96,118,120,127,116,65,12,26,39,42,47,53,42,30,48,33,29,30,20,32,24,34,32,23,33,31,31,33,32,36,36,31,34,32,30,37,37,39,41,39,42,32,30,34,32,49,70,59,34,26,34,38,35,21,27,28,27,32,26,33,29,30,31,33,36,36,31,33,57,84,109,119,121,137,153,147,87,37,29,31,59,65,55,50,49,46,26,27,24,19,19,27,31,18,22,16,23,24,21,30,26,30,24,26,31,29,37,36,34,36,34,36,31,32,33,31,25,30,28,26,24,28,54,54,30,20,20,23,27,46,51,39,32,19,27,29,33,38,37,42,40,33,40,39,36,36,33,37,34,37,30,78,83,57,66,57,76,65,56,73,30,57,127,120,117,106,117,101,60,41,22,130,244,244,246,246,253,253,250,250,251,252,250,250,251,252,251,249,249,249,250,250,248,249,248,249,249,249,249,249,249,249,249,249,249,251,251,252,251,251,250,248,247,247,247,245,245,244,245,245,246,249,249,249,251,250,249,251,250,252,252,252,250,250,237,115,4,1,4,8,9,8,10,9,11,12,12,12,198,202,198,200,201,199,200,200,200,198,198,198,198,198,196,199,199,197,198,195,198,198,199,200,202,199,198,200,204,202,198,198,198,198,198,196,196,193,195,197,195,198,195,194,198,195,194,195,191,192,194,194,197,194,194,194,193,195,193,193,196,198,198,194,196,198,198,198,195,198,196,198,196,196,198,195,199,195,198,198,200,202,198,199,199,199,199,198,199,198,198,197,195,194,195,198,198,198,196,198,198,200,198,199,200,200,200,200,201,201,200,201,202,201,201,201,200,201,201,201,202,202,203,199,200,203,204,205,203,205,207,204,205,206,208,211,207,208,207,207,207,207,207,205,206,208,206,204,204,205,204,205,205,205,205,202,205,203,205,207,205,206,207,205,203,205,206,206,206,206,204,207,204,206,208,205,207,204,206,208,206,208,207,206,209,208,206,208,208,207,208,208,210,206,207,208,206,207,210,208,213,212,207,209,210,209,208,210,207,209,209,208,209,208,208,208,211,210,208,210,212,212,211,211,211,209,211,213,212,211,209,207,209,210,208,210,210,210,210,212,212,210,210,212,209,213,206,202,212,202,201,214,211,212,210,210,211,210,211,210,211,209,211,212,210,211,212,209,212,212,212,212,212,214,213,214,215,214,214,213,215,216,214,215,213,215,216,216,219,221,223,223,227,230,230,233,228,220,196,181,178,181,191,200,212,214,218,221,222,226,225,223,222,225,204,211,204,204,223,217,220,221,218,188,184,213,230,220,126,172,246,191,192,74,80,214,211,135,148,229,192,216,226,142,153,159,168,167,168,247,205,99,192,236,247,190,88,112,124,120,66,30,28,185,193,108,212,189,124,130,197,208,205,205,109,98,162,202,163,201,210,188,148,59,117,192,225,250,198,131,217,234,227,237,222,236,206,211,230,227,223,197,233,232,202,160,158,222,99,27,47,59,53,62,89,131,155,121,43,23,12,19,35,31,29,21,32,51,56,42,97,229,217,143,194,229,246,230,250,217,162,222,235,252,237,230,191,143,200,248,252,252,246,174,118,50,27,84,98,123,118,90,47,55,115,144,46,104,137,39,33,135,208,164,136,120,108,53,15,13,19,40,44,46,36,28,41,29,28,27,19,24,31,32,30,31,30,29,29,35,30,27,36,27,36,33,26,34,35,34,41,45,44,34,31,36,26,45,68,62,37,24,35,36,29,31,23,25,31,28,29,30,30,34,31,33,32,37,30,51,137,170,118,112,148,153,119,59,27,27,24,27,49,64,64,60,53,38,23,16,19,20,21,25,22,21,15,25,25,21,22,24,26,24,20,27,28,31,32,34,38,31,34,35,33,33,28,33,33,25,27,31,22,35,61,53,31,24,22,21,16,23,27,29,33,28,25,30,35,39,35,36,41,37,39,36,35,35,37,39,31,39,34,74,88,55,67,61,79,66,46,82,39,43,120,114,116,107,113,106,57,48,15,81,237,240,246,246,253,253,250,250,252,251,252,251,251,249,249,250,248,248,249,250,250,248,249,247,248,249,248,248,247,248,249,249,249,251,250,251,251,250,248,248,247,246,246,246,245,245,246,247,247,246,248,250,249,249,249,252,249,252,252,250,251,250,237,116,3,1,4,8,9,9,10,9,11,12,12,12,200,202,202,201,204,200,200,203,205,203,202,204,202,201,202,203,202,199,202,200,199,201,200,204,204,203,201,204,205,202,204,202,201,200,199,199,200,198,198,199,198,196,196,198,198,197,199,196,195,197,198,198,196,195,195,196,196,198,195,194,195,196,196,198,197,198,199,198,200,198,198,198,200,200,200,200,198,200,199,199,198,199,199,199,201,200,200,200,198,198,198,198,198,198,198,198,200,198,198,198,198,199,198,202,200,200,202,199,201,198,198,204,200,200,203,204,201,201,202,202,204,208,206,206,208,205,208,205,208,208,208,209,208,209,208,207,206,209,210,208,208,211,207,208,207,207,209,206,206,206,206,207,206,205,206,205,206,206,204,207,207,205,203,204,204,204,206,205,205,205,204,205,208,205,208,208,206,207,205,206,206,206,207,208,206,209,210,206,207,206,207,209,209,207,206,207,207,208,208,209,209,208,208,208,208,208,208,209,208,208,208,207,208,209,209,210,210,211,211,209,208,210,209,207,210,210,210,210,211,210,210,210,210,210,208,210,211,208,210,210,211,208,210,211,211,214,201,205,210,200,211,214,214,214,212,212,211,213,212,211,212,212,215,214,212,214,212,212,212,214,213,212,212,214,215,214,216,214,214,215,215,218,217,218,219,222,221,223,228,226,225,226,223,215,205,192,182,183,188,192,191,186,193,209,221,227,229,225,225,223,224,223,219,223,202,208,208,204,222,218,222,224,217,186,189,217,231,213,134,190,245,194,191,81,88,210,230,149,138,233,201,209,193,127,152,161,174,157,180,250,205,125,201,234,249,229,130,112,125,117,76,43,20,169,167,103,167,139,150,193,234,215,214,199,94,63,136,196,200,239,213,188,118,69,147,208,241,251,168,147,233,229,230,232,223,234,204,214,229,227,222,198,231,230,198,137,133,232,134,65,92,86,38,73,127,94,96,66,37,31,14,36,39,28,25,22,43,61,46,83,201,243,160,171,232,244,234,214,193,162,208,252,251,245,234,184,152,206,249,252,252,252,237,84,39,14,27,101,105,127,87,35,66,100,153,122,33,132,125,35,35,115,201,155,125,127,98,45,10,19,16,22,27,30,32,35,44,26,24,26,22,25,29,33,29,33,30,28,33,34,32,29,27,34,31,28,34,27,29,40,43,43,39,36,38,34,33,41,60,72,45,25,27,38,33,26,31,24,31,30,29,31,29,32,32,25,28,29,33,44,104,169,133,109,114,84,45,23,14,13,21,26,47,72,71,50,40,38,25,23,17,19,23,20,21,20,21,21,23,19,27,27,24,23,25,30,25,30,29,31,38,32,36,39,32,31,31,31,31,27,31,30,22,41,59,47,34,27,23,27,29,29,33,24,29,36,29,29,34,38,36,37,38,35,40,32,33,37,34,41,37,39,32,78,91,55,69,57,74,71,50,78,51,34,110,120,115,107,109,116,67,52,16,62,229,237,246,246,253,253,251,251,252,252,252,251,249,248,249,250,249,249,250,249,249,251,250,249,248,249,249,249,248,247,249,249,249,250,249,250,250,251,251,248,247,247,247,246,245,245,247,246,248,249,248,248,249,249,250,252,251,250,250,251,250,249,236,116,4,1,4,8,9,9,10,9,12,11,12,12,200,204,201,201,205,202,203,201,202,202,202,204,203,203,200,202,201,199,205,202,203,206,203,207,205,204,205,204,206,206,204,205,202,202,201,198,200,200,198,198,199,199,197,196,199,197,197,199,198,198,195,195,196,196,197,196,197,195,195,197,198,195,196,198,199,198,196,198,200,200,199,201,199,198,200,198,200,199,203,200,198,198,198,199,199,199,201,201,196,198,196,195,198,198,197,197,198,199,198,198,199,200,201,200,200,201,200,202,204,201,201,200,202,203,204,202,201,202,205,202,204,209,208,208,208,208,208,207,208,209,210,210,212,210,208,210,208,210,208,208,209,208,209,208,208,208,210,210,207,206,207,208,207,207,207,203,206,206,206,207,205,205,205,203,206,203,205,206,203,207,206,206,206,203,205,205,207,205,206,207,206,208,205,205,205,204,207,205,206,207,204,205,208,207,207,210,208,207,208,206,208,207,207,208,206,207,206,208,208,208,209,208,209,209,210,209,209,208,209,207,209,210,209,209,209,208,210,208,210,210,209,208,209,208,208,207,211,211,207,211,209,208,210,210,210,210,200,210,207,200,213,214,214,214,213,212,212,212,214,213,214,212,211,213,213,212,214,213,215,214,217,221,221,224,225,224,226,225,227,230,229,233,236,234,233,235,237,236,234,232,226,212,198,187,190,197,204,215,224,235,232,224,218,217,225,229,233,233,234,232,231,233,232,239,217,221,220,216,238,231,234,237,226,195,206,231,247,214,141,214,252,205,201,86,122,238,241,161,120,214,187,191,165,110,154,172,179,165,202,250,214,152,217,234,251,247,126,111,97,105,80,33,29,129,157,117,175,171,174,218,228,187,167,145,107,78,124,170,215,235,175,149,136,136,188,238,246,248,149,169,241,221,236,227,225,235,204,212,227,227,220,195,230,236,187,125,181,252,154,68,85,89,60,151,151,72,58,65,57,25,33,40,33,23,30,39,60,43,55,178,248,192,148,194,244,234,223,192,149,192,233,252,244,231,199,136,202,248,252,252,250,250,154,36,34,3,41,99,118,87,31,56,104,130,151,87,37,137,112,31,22,82,130,120,126,127,88,26,23,26,25,28,24,45,39,35,48,26,27,24,22,32,27,31,36,30,33,35,30,36,34,35,32,27,31,29,30,33,34,31,34,35,36,41,38,38,34,29,63,70,52,29,21,31,26,30,32,32,30,34,35,31,32,27,31,27,31,37,32,33,46,115,105,46,49,38,24,21,13,14,14,31,41,59,66,47,42,35,27,25,21,21,19,14,18,25,21,21,23,25,25,24,25,24,27,27,24,33,28,35,40,32,36,30,31,35,29,33,30,32,26,25,27,43,63,42,28,29,23,29,32,26,32,31,33,33,32,33,27,33,42,38,36,39,36,39,35,34,34,35,36,37,32,80,96,57,72,55,71,78,50,74,53,33,95,115,116,109,110,115,69,51,19,54,218,237,247,247,251,252,252,252,251,251,249,248,247,248,249,249,249,249,250,249,249,247,251,250,248,250,249,248,249,248,249,248,248,249,248,251,249,250,249,248,248,246,248,248,247,247,248,248,249,249,249,251,249,250,249,252,252,251,249,249,250,250,237,116,4,1,4,8,9,9,10,10,11,12,11,11,200,204,204,202,206,200,203,204,202,201,200,200,200,202,202,199,200,200,203,203,205,204,203,204,204,205,205,206,206,203,202,203,203,202,200,198,200,199,199,198,198,198,199,200,199,198,197,197,199,197,198,198,196,198,196,196,197,194,195,198,197,195,195,197,194,196,200,200,200,200,201,200,199,198,199,198,198,201,201,201,200,200,200,198,198,197,198,200,198,198,198,195,199,199,197,197,197,198,200,201,200,201,198,201,201,199,202,199,200,204,204,204,204,203,204,204,200,203,204,204,206,207,205,206,208,207,209,208,210,209,211,211,209,210,209,209,205,207,208,209,210,206,205,208,207,206,207,206,208,206,208,207,205,203,205,205,206,206,205,206,205,206,204,205,204,205,206,203,204,205,205,202,204,203,202,204,205,206,205,205,204,205,205,206,203,204,206,205,206,206,206,206,206,205,205,206,207,208,205,207,206,206,208,207,211,208,208,208,207,206,207,207,207,206,209,208,208,209,208,208,208,210,209,208,211,210,207,210,208,209,208,206,208,209,209,208,210,209,210,209,210,208,208,209,213,207,199,211,203,205,216,210,212,211,212,213,213,214,212,212,212,212,211,212,210,212,211,217,220,225,241,244,244,244,247,248,250,251,252,250,248,251,248,246,244,247,246,237,233,226,219,221,228,236,249,252,252,252,252,252,252,252,252,245,239,235,239,243,249,251,252,252,252,252,248,245,248,243,252,252,252,252,249,217,235,251,252,234,167,232,253,196,162,66,110,244,251,172,141,224,223,214,161,123,165,192,200,189,239,252,234,170,231,249,250,250,123,83,72,65,63,34,12,120,185,156,202,205,198,216,179,163,165,174,147,105,111,142,195,177,184,167,163,201,234,251,252,237,146,205,246,231,240,230,233,237,207,212,225,227,219,195,229,237,184,145,202,252,150,51,63,70,92,175,110,53,53,57,57,38,43,35,24,29,44,53,53,44,135,243,205,165,171,213,237,224,196,152,193,233,252,252,246,206,145,178,243,250,251,251,248,172,35,14,50,42,86,122,86,49,59,107,134,130,131,64,44,141,92,29,33,45,107,125,121,137,71,16,29,28,33,40,47,56,35,33,46,26,20,27,24,27,29,27,32,31,32,33,33,27,29,33,29,29,31,30,35,31,29,32,25,29,33,31,32,29,36,34,59,74,51,37,21,22,28,34,33,31,33,26,32,35,29,31,36,34,33,37,34,32,40,88,66,45,49,26,23,14,14,17,15,21,39,59,58,49,42,36,27,18,14,21,19,19,21,19,24,26,27,24,25,24,25,24,26,26,29,36,29,33,36,34,34,30,36,33,26,33,32,28,31,29,33,51,55,42,31,28,33,32,33,29,31,35,28,29,32,31,30,31,35,39,38,31,34,37,31,35,29,34,32,33,31,71,98,59,72,65,54,62,52,62,57,28,89,119,107,107,102,115,75,49,28,30,183,236,247,247,252,252,252,249,251,249,249,248,248,248,249,249,249,248,248,251,250,249,250,248,248,248,249,250,249,249,249,248,246,247,248,249,248,249,249,248,248,247,247,248,247,249,249,249,249,248,249,250,250,251,251,252,251,251,250,250,249,250,237,115,3,1,4,8,9,9,10,10,11,12,11,11,201,204,203,202,202,201,202,202,202,201,202,200,201,200,202,204,201,204,203,204,206,200,203,203,203,205,203,201,202,204,205,203,202,201,201,198,200,201,200,200,201,199,198,200,201,202,199,199,199,196,201,200,199,197,196,197,196,196,197,195,198,198,197,197,198,200,197,200,202,199,201,201,198,199,199,198,199,200,201,200,200,200,202,200,200,199,198,201,198,201,198,198,199,198,200,199,199,200,198,201,203,201,201,200,202,200,200,202,200,201,202,203,201,202,204,203,204,204,206,205,208,206,203,208,208,209,209,207,207,208,210,211,212,208,208,208,207,208,207,208,208,205,206,208,207,207,207,206,206,206,208,206,207,206,205,205,204,206,206,206,207,203,204,203,204,202,205,207,203,205,207,205,205,205,203,203,205,205,205,207,204,204,204,204,206,203,206,203,203,207,204,207,206,205,208,207,205,206,205,205,210,208,207,207,207,210,207,207,208,207,209,208,209,210,209,208,207,210,210,208,210,209,208,210,210,210,210,209,210,208,210,209,209,208,209,210,209,211,210,212,211,208,211,211,214,204,205,210,201,211,214,213,212,209,212,213,213,214,214,212,213,212,212,212,211,209,211,212,212,206,203,205,202,201,204,204,207,207,216,211,182,173,170,171,176,165,151,137,138,147,164,188,199,206,207,206,208,210,203,201,201,199,191,183,187,186,183,189,197,202,196,198,197,200,187,185,189,181,199,199,201,201,183,164,186,205,214,173,124,186,197,133,112,19,57,176,171,128,124,190,200,189,145,117,153,179,180,182,235,250,207,160,220,247,252,241,103,95,82,70,57,15,34,149,212,151,213,194,184,200,179,213,224,229,183,165,103,108,178,190,232,173,190,208,244,252,250,240,164,246,252,252,252,247,250,248,212,212,227,226,220,194,236,231,162,158,212,230,113,39,53,72,139,189,84,42,59,87,60,37,49,17,20,34,52,53,43,111,229,220,172,185,187,216,229,195,141,182,235,253,250,250,241,149,171,233,250,250,249,249,171,39,8,12,74,81,121,95,30,74,105,137,126,93,101,41,57,136,77,24,73,173,160,132,131,118,68,16,32,36,34,53,69,62,33,39,42,27,29,26,21,29,29,32,29,29,33,27,32,30,31,34,30,55,73,67,63,58,44,34,30,27,32,29,30,30,35,35,54,76,59,36,23,26,29,33,33,33,31,27,33,31,28,30,36,30,30,36,33,32,33,53,47,47,55,34,17,14,15,15,14,23,37,55,56,46,44,39,31,20,19,20,18,22,16,24,27,21,25,24,24,29,28,24,26,25,29,32,30,33,37,37,33,33,33,32,29,34,32,33,31,31,40,57,59,35,29,37,27,32,34,31,31,31,38,29,30,30,26,32,37,32,35,38,29,37,34,32,34,35,33,32,31,71,95,59,72,69,41,37,38,48,44,29,71,113,113,102,99,118,94,55,27,28,157,233,249,249,252,252,252,250,249,248,249,249,247,249,249,249,249,248,249,249,248,249,249,248,249,249,248,247,248,247,247,245,246,246,245,248,247,249,249,248,248,246,247,246,248,248,248,248,248,248,249,250,249,250,249,252,250,250,250,249,250,250,236,117,4,1,4,8,9,9,10,9,11,11,12,12,201,204,201,200,205,199,202,202,201,203,201,200,202,203,202,200,203,203,203,204,203,203,204,202,203,203,203,202,201,200,199,204,203,202,203,200,200,201,200,201,199,199,200,198,198,199,201,200,200,200,201,200,199,201,200,200,198,198,198,198,200,201,200,201,202,200,200,200,201,200,201,201,201,200,200,198,200,201,200,198,201,200,198,201,201,203,200,199,197,200,203,202,200,199,200,199,202,200,200,202,201,203,202,204,203,202,205,203,202,204,202,204,205,203,205,205,203,207,205,205,206,207,207,208,209,210,208,207,208,209,211,209,209,208,207,208,207,209,207,210,210,207,208,208,207,208,208,207,207,207,209,207,206,205,207,207,207,205,205,205,205,205,203,203,203,202,204,206,203,204,206,204,205,202,203,206,205,204,203,203,204,204,206,206,205,204,203,205,205,205,204,206,206,207,210,208,206,207,207,208,209,208,207,204,210,210,208,207,208,212,210,208,210,209,210,209,210,210,210,210,208,208,208,210,213,212,210,210,208,207,207,210,211,209,213,213,213,212,211,214,212,212,213,213,215,205,210,208,203,213,214,214,212,209,212,211,212,213,212,214,210,212,211,211,211,213,211,212,188,130,108,99,98,96,92,90,78,74,102,88,56,60,57,54,44,34,26,19,19,21,38,56,63,57,62,63,54,53,49,51,50,48,43,45,59,79,89,89,93,84,69,55,50,51,51,52,57,54,51,50,50,53,54,50,48,53,57,53,24,29,46,89,132,48,5,19,13,14,35,71,78,70,72,77,83,86,89,92,103,106,98,73,103,108,119,128,64,119,122,84,79,36,37,129,127,77,128,94,100,139,137,159,165,145,125,114,53,98,138,158,175,101,99,111,185,227,243,157,127,226,218,227,226,220,224,232,210,210,229,231,220,193,243,223,163,177,198,200,114,57,49,83,203,208,68,36,75,107,69,40,35,15,29,49,56,36,89,215,239,196,207,219,198,210,200,140,172,225,252,252,249,249,173,165,227,246,250,250,249,177,35,33,51,82,99,88,92,44,64,118,136,129,77,73,89,33,77,133,71,21,100,201,182,136,126,122,41,18,40,33,40,51,57,43,27,39,48,24,30,28,20,29,33,31,33,30,32,27,31,35,46,57,73,153,164,150,158,153,95,40,42,36,23,28,33,27,39,35,57,77,61,41,23,27,27,36,34,27,31,30,35,33,27,32,34,35,30,32,32,31,28,26,36,45,34,21,15,13,15,15,14,28,33,40,52,49,46,42,39,23,18,16,19,26,21,25,20,22,26,27,23,22,27,21,24,28,29,32,28,36,35,31,32,36,34,27,35,32,29,36,31,29,42,62,52,33,25,22,27,33,29,27,34,33,29,29,27,31,29,29,33,35,33,36,35,32,33,40,32,36,33,36,32,65,106,57,68,84,51,43,39,38,45,26,59,113,106,107,103,116,103,55,32,27,120,232,251,251,250,249,250,249,249,246,247,247,248,248,249,249,248,249,249,247,247,247,248,249,249,248,247,248,249,247,246,246,245,246,245,246,248,248,247,247,247,246,245,247,245,245,247,249,248,248,249,247,249,249,250,252,249,249,248,251,250,249,237,115,4,1,4,8,9,9,10,9,11,11,12,12,200,205,204,203,204,203,202,200,201,202,201,201,202,201,203,201,200,201,200,200,204,203,204,200,201,204,202,202,200,201,203,201,201,202,202,200,200,201,200,201,200,200,199,200,199,199,198,198,202,198,200,198,200,200,200,201,199,201,199,200,202,200,200,198,200,201,198,203,201,199,201,200,200,199,200,201,199,200,201,199,196,199,202,199,201,199,201,201,198,201,199,200,203,200,202,198,199,202,201,202,203,203,203,201,202,203,204,204,203,204,202,201,205,204,206,205,203,205,205,206,211,207,205,208,205,207,205,208,207,207,208,205,207,207,209,210,207,207,206,206,208,208,205,207,208,206,207,206,206,204,205,206,206,205,205,203,206,204,203,204,203,205,205,203,204,205,204,205,203,202,203,201,202,205,203,203,204,204,201,202,203,205,205,203,205,204,205,206,206,208,205,206,206,207,210,207,208,207,205,208,208,208,208,208,208,208,208,210,211,208,210,208,210,209,210,212,209,209,210,210,211,208,210,212,210,211,211,211,211,210,211,211,213,213,212,213,214,214,212,214,213,212,212,222,220,214,224,213,211,215,212,214,213,214,210,209,212,211,212,212,212,212,210,210,212,213,211,216,199,163,153,153,148,145,145,134,104,89,103,117,115,129,136,128,125,118,118,113,110,106,105,108,102,106,100,87,97,103,103,107,105,105,105,107,110,128,131,128,125,122,113,101,103,110,109,104,111,103,108,108,104,105,96,97,104,109,106,77,53,78,97,132,171,77,61,95,67,56,44,63,89,87,84,107,114,106,99,92,86,85,94,65,66,66,68,89,59,119,123,89,78,50,72,100,83,42,66,58,50,64,59,66,52,46,38,44,39,77,105,83,68,38,47,25,59,98,95,46,44,108,98,109,116,111,137,199,205,213,230,230,224,195,245,203,160,195,178,195,162,114,66,120,253,189,43,28,40,78,62,33,21,17,45,51,44,60,189,249,217,226,248,242,188,193,129,122,204,239,251,251,239,171,165,219,249,249,248,248,198,53,26,72,116,123,119,59,36,72,117,139,122,75,66,97,76,34,90,131,63,27,50,126,144,131,134,98,37,18,25,24,27,29,31,22,23,49,46,31,28,23,29,27,30,31,31,33,34,33,69,68,130,146,155,170,140,169,179,160,132,89,64,38,29,25,27,29,35,33,51,78,66,46,20,22,29,30,30,33,33,30,35,29,30,33,34,30,29,31,34,33,27,25,18,21,19,15,21,20,16,17,16,24,30,41,63,65,61,51,33,20,17,22,21,23,23,23,23,24,24,25,29,24,21,24,27,28,29,27,26,37,37,39,32,32,39,27,33,37,27,34,33,27,53,64,47,32,27,27,22,31,34,34,39,46,42,33,35,34,34,34,35,30,31,35,36,35,30,33,36,33,29,40,27,62,103,60,75,83,56,64,67,50,53,29,56,119,115,113,106,121,99,57,36,35,83,209,253,253,249,247,250,248,246,248,248,246,246,247,248,249,249,249,249,248,246,247,248,249,249,248,248,248,249,248,246,245,244,245,247,247,244,247,247,248,248,247,245,244,247,247,249,249,248,249,251,249,249,248,251,252,250,251,250,251,250,250,237,116,3,1,4,8,9,9,10,10,12,12,12,10,199,205,202,201,206,197,202,202,199,203,201,202,202,201,202,201,199,200,201,200,200,202,201,200,200,202,200,199,201,200,200,203,200,199,201,199,202,201,200,201,199,201,200,200,200,199,199,197,199,198,199,198,198,199,197,198,200,200,198,199,200,201,198,199,200,198,198,202,198,196,201,199,198,199,199,201,200,202,202,197,200,200,200,201,199,200,198,201,200,199,200,201,199,199,198,198,201,201,202,201,200,200,202,202,201,201,201,201,201,204,201,203,205,202,204,203,202,203,205,204,204,206,203,204,206,206,205,206,206,207,206,204,208,205,207,208,207,206,203,208,206,205,207,207,205,204,204,205,203,202,205,203,206,206,205,203,203,205,205,202,202,202,201,203,204,205,204,202,201,204,204,203,203,200,203,203,201,203,204,203,202,202,204,205,204,204,206,206,205,205,204,206,206,207,209,208,207,207,208,210,208,207,209,209,211,208,210,211,208,212,212,210,211,210,210,210,211,211,209,212,211,208,210,210,210,210,208,212,212,212,213,213,212,213,216,215,215,214,213,215,213,212,217,231,216,222,230,210,214,214,213,212,211,208,212,211,214,213,210,212,210,212,212,211,212,214,217,222,223,229,236,236,233,225,225,219,190,183,211,218,235,247,248,249,247,251,252,252,251,251,248,245,244,244,236,219,236,244,243,246,242,246,246,249,247,244,239,227,223,222,225,225,231,244,240,231,240,227,244,249,248,242,208,222,239,251,233,187,208,241,237,204,177,89,150,249,234,205,173,165,226,191,168,191,200,205,173,232,250,250,238,185,220,240,240,233,107,110,113,74,76,46,76,173,191,152,142,174,171,134,103,122,160,125,174,178,118,100,90,151,117,123,156,130,153,139,139,68,101,166,152,163,151,151,161,208,206,210,228,229,217,197,240,177,168,198,159,216,171,92,101,198,252,112,2,94,105,49,39,26,15,24,49,43,39,145,240,220,230,249,252,228,177,131,69,145,233,244,248,247,168,158,223,242,248,248,246,196,54,27,62,108,110,111,87,33,48,99,139,121,71,48,97,96,61,38,109,136,53,26,42,108,138,132,139,97,26,9,15,16,30,27,31,40,34,44,51,25,28,23,20,31,26,33,32,27,37,39,116,183,190,180,124,99,100,149,104,62,74,75,59,28,32,24,27,32,36,27,46,83,67,41,24,21,28,29,30,30,30,34,33,30,30,29,30,34,32,33,29,26,30,26,19,19,18,19,21,16,17,18,26,36,29,35,64,73,69,44,27,29,13,17,24,23,24,17,23,29,24,24,21,22,24,26,29,24,30,32,27,35,33,34,36,34,40,38,34,34,33,32,30,33,63,65,39,26,22,24,33,38,52,65,64,60,59,62,71,75,61,40,28,33,33,34,38,29,31,34,30,34,31,38,27,57,104,59,64,83,63,103,109,65,61,33,55,125,105,116,104,112,107,57,30,48,64,159,249,249,249,249,249,249,245,247,247,247,246,248,247,249,249,249,250,250,247,247,248,248,249,248,246,245,245,246,246,247,245,245,247,246,246,246,247,247,245,245,247,247,246,246,249,249,248,250,249,249,248,249,251,252,250,251,251,251,250,251,237,115,3,1,4,8,9,9,10,9,11,12,12,12,198,200,200,202,203,200,199,199,199,201,202,201,202,202,200,202,200,201,203,200,201,201,201,199,200,201,198,200,200,198,198,199,201,199,198,199,200,198,200,201,199,200,198,200,198,198,198,197,199,197,199,198,200,199,199,199,198,200,196,200,198,197,202,199,200,199,195,198,199,199,198,199,201,200,200,198,198,198,200,200,199,200,199,198,200,200,200,200,200,199,199,200,199,198,200,200,200,200,199,201,199,201,201,199,200,200,202,203,201,201,202,201,202,200,203,203,202,205,204,202,203,201,203,207,204,204,204,206,203,206,208,205,206,205,205,206,208,207,208,204,206,206,205,206,204,204,203,206,205,204,202,202,205,203,202,202,205,205,205,204,202,202,199,201,202,202,202,203,201,200,204,205,201,202,201,201,203,203,202,200,201,204,202,203,203,205,205,204,206,204,206,207,206,208,208,208,210,209,209,207,210,208,207,208,207,210,210,210,212,211,211,210,212,210,212,214,212,212,210,211,212,210,212,212,210,212,211,211,213,213,213,214,214,213,215,216,216,215,213,214,213,212,214,224,171,146,185,201,214,215,213,212,210,212,212,210,212,214,211,211,214,214,215,216,217,217,216,214,212,209,210,212,208,211,217,225,202,197,235,241,242,246,242,240,243,245,248,247,249,251,251,252,250,252,248,236,249,251,251,251,251,252,250,249,249,248,245,241,231,230,227,231,233,244,245,231,246,232,244,251,252,245,212,229,245,252,236,211,248,249,248,214,167,77,150,246,249,249,202,190,242,192,142,140,122,103,139,230,252,252,253,227,249,252,251,251,155,124,129,85,81,53,57,184,248,218,233,252,210,203,209,247,252,212,252,252,221,129,128,204,164,201,221,211,252,252,209,139,195,249,251,251,245,245,236,242,210,212,225,226,214,194,220,167,197,192,164,227,111,30,71,198,226,34,47,205,131,41,29,17,27,33,39,24,116,219,208,218,246,253,250,206,114,91,130,202,248,229,235,180,152,218,247,249,245,246,200,56,28,57,107,110,113,84,33,35,87,129,124,79,47,97,105,64,50,61,135,128,45,30,100,159,142,130,142,82,23,16,15,31,31,42,48,49,45,50,42,25,25,21,28,27,35,28,29,29,39,74,88,157,171,127,97,108,122,128,81,66,83,77,57,45,33,20,24,32,34,25,56,81,66,47,23,21,26,31,34,32,33,33,31,29,27,31,31,29,31,31,43,50,38,30,27,26,29,22,18,23,20,23,38,39,33,38,48,56,56,41,30,17,15,21,21,27,27,22,24,26,27,27,24,27,29,29,28,29,32,31,25,36,32,36,39,34,33,34,37,34,28,35,29,39,66,61,37,23,19,27,35,33,49,61,79,77,92,131,128,102,74,58,44,35,36,29,33,34,31,32,32,35,29,38,28,57,106,65,57,74,68,118,128,70,66,36,57,128,108,108,97,115,101,64,29,57,62,100,243,244,249,249,245,250,244,246,247,248,247,247,248,248,249,249,250,247,247,248,246,245,246,248,245,245,246,246,246,246,245,245,244,245,246,248,247,247,247,245,245,246,246,247,246,248,248,249,249,248,248,248,250,250,249,251,250,252,250,250,237,116,4,1,4,8,9,9,10,9,11,12,11,11,197,202,200,200,200,196,201,200,198,203,198,199,201,200,199,201,200,200,200,199,202,202,200,198,198,199,200,199,199,199,198,199,196,200,198,196,203,197,199,201,199,200,197,199,197,196,198,197,199,200,197,200,201,203,203,200,201,200,200,200,198,200,199,199,200,199,200,199,198,199,200,200,199,198,199,200,197,198,198,196,199,199,199,199,200,200,198,201,200,198,200,201,200,199,199,201,203,200,200,201,202,200,200,204,200,201,200,200,202,200,201,201,200,200,201,204,201,203,204,203,205,203,203,203,204,207,202,204,205,206,207,203,204,203,206,205,203,205,205,207,205,206,206,204,201,203,205,203,204,204,207,205,203,203,202,204,203,204,202,202,203,201,203,203,205,204,203,201,203,203,201,203,202,199,202,202,203,202,202,203,203,203,204,202,204,205,204,206,207,207,206,206,205,207,209,207,206,207,207,207,207,207,209,207,210,208,210,211,209,211,211,210,212,214,213,214,215,213,212,213,215,211,212,212,213,215,213,213,213,215,215,215,216,216,217,214,217,213,215,214,211,214,214,207,92,52,150,201,218,217,213,214,214,214,216,210,213,214,214,216,217,219,216,214,208,203,199,194,189,192,194,197,203,208,217,225,205,199,224,219,223,223,216,219,217,218,218,219,221,221,223,224,225,225,222,212,226,230,221,226,225,224,222,223,220,222,222,222,222,213,210,208,208,213,207,202,218,207,221,228,229,214,180,212,222,236,199,186,232,234,234,193,153,66,142,244,241,232,138,102,117,66,42,41,28,27,20,50,87,110,106,74,133,162,179,157,59,105,108,94,111,59,55,148,194,159,185,219,174,188,199,249,225,176,208,168,212,135,104,159,95,150,208,212,252,252,189,158,224,241,251,247,241,244,237,243,206,210,224,225,218,197,204,181,220,169,134,184,87,53,63,176,159,1,144,240,85,39,22,23,39,45,30,52,194,202,203,244,252,252,230,143,66,150,192,242,244,223,177,150,214,249,249,247,246,209,63,32,59,101,116,113,89,31,35,96,130,132,96,63,102,113,69,51,44,61,141,111,41,32,53,148,151,134,141,60,26,28,28,37,40,58,57,60,50,48,36,18,28,22,24,28,36,32,27,33,41,48,50,81,114,112,105,102,117,137,104,81,66,47,42,44,37,19,20,29,34,26,57,84,68,51,25,19,23,27,36,28,31,33,30,33,39,37,33,35,33,39,89,82,39,33,28,37,35,33,30,22,25,26,33,43,39,42,41,44,42,33,21,16,19,19,24,23,25,25,23,26,24,33,50,43,35,28,31,27,27,32,27,35,37,36,34,36,37,35,33,36,33,35,30,46,83,60,36,22,16,29,33,35,37,63,90,87,117,123,102,98,97,91,61,42,37,34,33,33,34,34,34,31,31,40,26,59,111,71,54,51,45,70,76,50,51,33,44,122,109,110,93,107,117,72,30,51,65,51,201,240,249,249,247,252,247,246,247,245,245,245,248,248,249,250,249,249,249,248,248,247,246,246,246,245,244,244,244,244,244,245,244,244,244,244,245,246,245,244,248,248,247,247,246,246,247,246,248,249,248,249,250,252,248,248,250,251,250,250,237,116,3,1,4,8,9,9,10,9,11,12,10,12,198,202,200,198,202,199,200,199,199,200,200,199,198,199,200,200,197,199,200,199,200,198,199,198,198,199,198,199,198,198,198,197,198,199,199,196,199,197,199,198,198,198,196,195,198,200,197,198,199,198,200,200,200,200,199,201,200,200,199,200,201,199,198,199,199,201,200,199,200,199,198,199,199,196,199,199,200,200,199,201,199,198,198,196,199,200,199,199,200,198,199,199,199,199,200,200,200,202,199,201,199,200,200,199,202,201,199,199,200,200,200,199,200,200,199,200,202,204,202,202,205,204,203,205,205,206,206,205,204,205,205,201,203,203,203,205,205,203,202,206,205,204,204,204,201,204,204,205,201,204,206,203,204,203,204,204,202,202,201,202,202,204,203,203,201,200,202,201,203,203,204,203,201,203,203,202,202,205,204,202,203,205,203,203,204,204,205,204,207,205,206,208,205,208,205,207,208,204,207,206,208,207,209,209,209,210,208,209,212,209,212,212,212,212,210,214,213,214,213,212,214,214,214,214,213,214,216,214,217,216,214,214,213,213,214,214,213,213,212,211,212,211,218,206,115,118,191,211,220,212,216,215,217,224,221,218,216,217,214,214,210,203,198,194,192,188,194,195,199,208,210,216,217,220,221,228,213,202,222,214,216,219,213,215,214,215,216,216,218,219,218,220,216,226,225,210,232,231,219,217,216,219,218,217,219,218,218,220,219,222,214,208,203,206,205,188,207,197,212,228,225,203,175,209,220,232,171,176,224,222,233,185,151,67,145,241,236,216,83,31,44,49,79,105,110,84,88,79,68,74,58,27,27,45,66,64,12,32,42,41,81,63,43,94,93,48,71,110,83,98,107,122,105,66,70,49,107,92,53,54,29,144,217,203,250,248,154,175,229,228,235,227,228,229,226,234,203,213,226,229,226,191,193,207,236,151,122,162,115,89,96,192,90,17,212,203,38,26,24,34,54,41,30,116,188,186,232,253,251,241,197,114,121,208,222,229,240,187,145,206,252,252,248,248,217,71,34,60,101,112,113,91,35,36,107,147,134,96,55,90,117,78,52,40,36,72,146,105,45,22,31,130,139,134,123,47,31,29,33,40,39,45,45,47,42,28,21,24,23,25,29,29,34,31,28,29,29,61,87,77,77,69,59,42,66,87,63,45,22,29,33,23,31,29,25,27,36,29,63,91,73,50,24,21,21,30,32,33,33,28,29,57,85,58,33,50,60,112,135,98,62,52,43,39,56,76,49,26,23,23,37,43,52,43,39,43,32,25,29,19,20,24,24,29,25,28,24,26,23,69,98,50,42,34,25,33,34,31,32,36,37,38,34,36,36,33,33,32,33,31,29,66,88,57,30,21,18,30,42,46,60,77,73,61,77,87,88,103,112,106,81,66,52,36,36,33,33,35,32,36,31,38,26,65,122,79,45,36,28,32,39,32,33,24,38,107,111,107,94,100,106,72,39,46,69,28,136,237,252,252,250,248,249,249,247,246,247,247,249,248,248,248,249,249,249,248,245,244,245,245,245,246,244,244,244,244,244,244,246,245,246,245,244,244,244,245,247,247,246,246,248,247,245,247,248,249,248,249,250,250,250,250,250,251,250,250,237,116,4,1,4,8,9,9,10,9,12,12,13,13,200,203,200,201,200,196,199,200,200,200,198,199,198,196,199,200,198,199,199,198,198,198,198,197,196,197,198,197,198,199,199,198,198,199,199,198,200,194,197,199,195,199,194,197,198,196,198,196,198,198,196,200,196,198,197,196,200,198,197,199,201,199,198,198,200,198,199,198,199,199,196,200,200,200,198,197,199,199,200,199,200,198,199,199,198,198,198,199,199,198,198,198,198,197,199,199,199,198,199,200,200,200,198,201,199,199,203,198,198,199,199,199,198,200,199,199,200,201,202,200,201,201,200,203,203,202,202,203,203,202,201,202,203,202,204,203,201,203,203,204,201,201,205,203,201,202,203,202,203,201,203,200,202,205,202,203,200,202,204,203,203,200,203,200,202,202,202,201,202,204,200,202,205,200,203,203,203,203,202,204,203,206,205,203,206,205,203,203,204,207,206,207,208,206,207,205,208,206,206,207,206,208,206,205,207,206,208,210,209,208,210,210,212,212,212,213,214,212,212,214,213,214,213,212,212,214,213,214,214,215,215,212,214,214,214,213,213,211,214,213,210,213,221,223,189,215,237,222,220,222,228,231,242,241,227,215,211,207,199,195,192,191,189,193,198,206,211,214,215,216,217,218,216,217,217,224,206,198,220,210,213,216,213,218,215,217,217,217,217,219,218,219,215,227,231,204,231,236,219,223,219,219,217,216,216,220,220,221,221,220,225,220,211,212,204,194,201,194,210,225,229,198,182,218,226,232,172,193,229,232,240,191,158,65,152,245,244,222,136,130,149,167,190,212,211,220,244,248,243,235,210,153,190,211,221,200,59,31,11,17,77,66,41,100,153,125,141,152,129,118,113,110,99,110,130,138,158,108,58,47,67,183,234,213,252,244,153,211,239,232,251,240,248,245,244,249,222,232,240,244,241,184,196,244,237,170,149,164,160,108,128,195,68,76,246,123,6,37,22,49,50,39,96,128,184,226,253,253,250,203,161,168,184,230,199,214,192,155,199,238,252,252,247,213,72,29,57,105,114,108,99,37,47,117,143,144,86,66,59,63,83,55,48,52,52,89,153,94,44,19,63,157,115,122,101,33,35,36,24,22,23,19,19,25,27,16,24,24,22,24,27,36,31,34,34,50,64,72,82,61,49,40,47,36,25,53,53,27,48,59,27,26,46,46,31,22,29,24,63,94,75,55,26,23,22,27,35,29,33,31,33,85,114,80,74,139,165,174,150,131,159,132,79,80,155,141,60,31,23,21,25,44,61,48,42,41,29,29,33,19,23,22,24,53,49,32,26,36,64,108,108,76,64,52,35,38,71,60,34,37,36,40,35,31,33,35,33,28,32,30,34,79,96,52,30,19,22,61,65,43,51,57,57,47,38,41,62,71,72,78,71,61,43,33,29,33,33,37,35,30,31,41,28,61,121,81,52,51,49,36,32,38,31,39,44,104,114,107,98,98,105,73,43,35,77,29,41,188,247,247,246,246,249,251,249,247,248,246,246,246,247,248,248,247,247,246,247,247,246,245,247,247,244,244,244,244,245,244,244,245,244,244,246,244,245,245,245,247,245,246,245,246,246,245,246,246,246,249,249,250,249,249,251,251,250,250,237,115,4,1,4,9,9,9,10,9,11,11,11,11,198,201,198,198,198,198,198,196,199,199,199,197,198,197,198,198,199,197,196,199,196,196,198,198,199,196,196,199,195,197,195,195,197,194,198,197,198,197,196,197,198,196,196,196,195,195,193,198,197,198,198,196,198,196,196,198,196,198,197,197,199,199,200,199,196,196,197,195,198,198,196,199,198,198,198,196,201,197,196,198,196,200,198,198,199,198,197,200,200,199,199,201,200,197,200,199,198,197,196,198,199,199,198,199,198,199,200,199,199,198,201,202,200,200,198,200,200,202,202,201,204,201,200,201,201,202,201,200,201,203,203,199,204,201,201,203,203,204,201,205,203,201,202,201,203,203,201,203,201,201,200,200,202,202,203,201,199,200,202,203,201,199,200,201,201,203,203,202,203,201,201,205,203,201,201,203,201,203,204,202,203,205,203,203,205,204,206,205,205,203,202,204,204,207,205,206,204,203,207,205,207,206,207,205,207,208,206,208,209,209,210,212,210,210,211,212,212,212,212,213,216,212,211,212,214,213,214,212,212,213,212,213,212,215,215,212,214,213,213,213,212,213,222,211,207,236,237,229,225,230,236,217,217,222,211,204,200,201,199,201,205,212,217,221,226,225,230,226,225,227,226,225,223,224,223,225,209,199,223,224,223,227,224,227,229,230,231,232,230,230,230,234,234,241,228,159,189,238,238,240,234,235,235,235,239,238,238,239,240,243,241,244,244,240,236,213,225,214,216,237,235,208,203,239,246,247,191,219,249,249,250,186,134,63,150,245,252,238,208,207,217,223,226,216,215,252,252,252,252,252,252,246,252,252,250,250,126,38,28,39,88,57,39,162,249,251,252,252,229,173,234,247,251,231,242,251,215,230,180,120,146,210,241,225,252,246,167,242,252,252,252,252,253,253,252,252,251,251,252,252,252,185,210,251,191,129,124,199,207,116,158,155,61,170,220,50,8,33,33,52,49,132,134,127,217,253,252,244,218,141,180,212,206,208,179,159,141,199,224,252,248,248,222,67,24,49,94,112,114,97,43,52,108,141,140,94,55,56,29,41,64,55,57,61,51,98,150,78,38,14,96,158,102,117,85,33,35,31,13,17,19,19,23,20,25,28,23,22,23,21,28,34,31,33,36,80,104,69,40,43,46,44,57,48,55,84,75,53,65,63,40,41,60,54,39,23,28,27,62,114,95,67,30,16,24,24,35,30,34,31,38,66,81,86,88,113,122,93,57,58,121,134,115,120,141,112,57,31,17,24,43,98,113,60,34,33,31,30,26,22,25,22,29,57,61,46,59,73,95,113,129,158,146,91,73,133,137,73,31,27,38,35,35,30,36,36,31,36,32,31,44,97,92,47,27,16,46,86,69,41,39,65,74,53,37,55,68,61,50,46,47,36,27,24,27,33,33,34,33,34,33,35,29,56,114,89,58,62,61,48,58,53,47,50,51,123,121,104,98,93,111,82,41,32,76,49,30,89,224,236,245,245,250,251,250,251,248,246,247,245,246,248,248,248,246,245,246,248,246,245,245,245,244,244,244,244,243,244,244,244,241,243,244,244,246,244,244,244,245,244,246,246,244,246,246,248,247,249,248,248,250,248,249,249,248,249,236,116,4,1,4,8,9,9,10,10,12,13,12,12,195,201,199,199,200,196,198,198,198,198,197,198,198,196,198,197,197,198,194,196,199,196,197,199,195,195,196,196,195,193,195,195,196,200,197,194,197,193,195,196,196,198,193,197,197,194,198,197,195,197,198,201,196,200,199,197,198,198,200,198,199,196,198,199,199,199,199,196,197,199,198,199,198,199,198,199,200,198,198,196,198,198,199,202,202,201,200,200,199,198,199,199,198,200,199,200,200,198,201,200,201,201,198,202,199,199,200,198,199,201,201,199,198,200,199,199,200,199,201,198,199,202,199,200,203,201,203,200,200,203,202,202,201,201,203,203,203,203,200,202,200,201,202,201,203,201,203,201,201,203,203,202,200,200,201,204,201,202,204,200,199,200,201,200,202,199,200,200,199,203,200,200,203,199,203,203,200,202,202,205,201,202,203,202,203,202,203,204,203,205,205,205,203,200,206,205,205,205,205,204,205,207,206,207,207,207,210,209,209,213,211,211,215,213,214,212,213,212,214,215,215,218,216,217,217,220,217,219,221,219,222,222,225,224,224,225,224,225,229,230,230,230,230,214,216,244,245,244,240,240,203,110,97,165,216,227,227,235,241,245,248,250,251,249,251,252,251,251,248,248,247,248,247,248,249,247,235,233,248,249,250,251,251,251,252,252,251,251,252,252,251,251,252,252,217,106,150,233,250,250,252,252,251,251,252,252,251,251,252,252,252,252,252,252,252,236,235,233,229,225,229,200,208,236,242,234,179,224,234,232,219,140,113,59,143,240,240,217,187,182,165,150,139,147,175,234,246,246,247,246,222,167,217,240,252,228,89,69,69,82,64,18,37,133,199,199,194,202,170,129,190,215,236,190,212,199,171,245,186,139,145,183,194,185,246,188,149,227,227,233,232,217,218,216,214,212,205,226,239,240,234,135,184,215,99,74,103,195,191,123,173,92,54,209,150,2,28,35,26,43,122,204,133,153,246,252,241,215,152,142,236,227,181,186,136,122,189,229,243,252,244,236,91,22,48,90,113,111,103,46,51,102,129,139,84,63,56,33,14,19,55,53,54,55,49,76,120,78,33,26,128,153,96,118,69,21,23,14,18,21,16,17,21,24,23,29,23,26,27,22,27,25,30,31,32,58,106,97,65,53,40,67,83,78,108,134,118,105,112,94,67,53,50,46,33,19,26,26,67,113,107,74,36,22,17,22,33,34,31,36,83,95,45,33,24,27,24,27,29,19,27,29,33,31,26,43,77,53,31,33,112,201,145,64,35,31,29,27,27,24,21,19,27,34,47,45,47,48,51,41,33,83,113,102,104,116,94,60,34,29,33,31,35,34,34,31,34,37,41,31,60,108,75,44,29,22,69,76,71,56,54,107,101,74,74,97,99,64,39,35,31,24,33,29,23,29,27,34,31,36,37,37,26,52,115,90,75,70,67,63,76,71,57,62,50,121,131,107,95,88,111,90,45,22,163,177,63,46,135,227,250,250,251,247,251,252,251,246,248,247,247,245,247,247,246,244,244,245,244,244,247,245,244,244,243,242,244,242,242,244,243,242,243,244,244,244,244,243,244,244,245,245,246,247,246,248,246,248,248,247,247,247,247,248,247,247,236,116,3,1,4,8,9,9,10,9,12,12,12,12,200,203,200,200,199,199,196,196,200,199,198,200,202,198,199,199,196,196,195,196,196,196,196,195,198,196,197,198,196,195,196,195,196,196,196,196,197,195,196,197,197,195,194,197,198,198,196,198,197,198,196,195,198,198,198,200,196,198,197,197,199,194,198,200,197,199,198,199,199,198,198,197,198,202,199,197,200,197,201,200,200,200,199,200,200,199,201,199,198,200,200,202,200,199,200,200,202,202,201,202,202,200,199,198,196,199,200,198,201,200,201,199,201,203,199,202,199,201,200,198,201,200,199,200,200,200,200,200,200,200,201,201,205,203,202,202,200,200,200,202,200,203,203,202,200,200,202,204,203,202,201,203,202,200,201,202,202,200,202,202,200,200,201,200,200,200,199,201,200,202,201,200,201,201,202,200,202,200,201,203,203,202,201,203,203,201,204,206,205,205,205,206,206,203,204,208,208,206,206,206,208,206,208,208,209,208,208,213,213,213,212,212,211,212,214,212,212,215,219,226,234,237,240,239,242,243,246,246,246,247,247,249,249,251,251,251,252,252,252,252,251,251,246,223,236,249,251,248,235,240,194,95,101,192,244,248,252,252,249,249,251,251,250,249,249,246,249,248,245,245,244,243,242,244,248,248,240,232,241,246,243,244,244,240,242,239,241,235,237,232,221,228,226,218,173,104,133,184,205,222,221,218,218,219,217,213,214,211,205,210,205,198,198,198,199,174,181,184,178,186,169,155,158,166,167,146,116,138,127,125,126,106,106,77,96,139,150,135,112,93,94,83,77,89,85,114,137,126,138,108,100,57,51,90,118,114,58,86,107,86,51,28,53,62,57,59,51,59,76,35,42,38,83,61,47,74,53,107,112,104,107,104,103,80,133,71,68,130,98,95,79,68,77,72,73,58,76,90,86,98,141,101,71,125,98,117,112,161,149,120,191,61,79,217,74,5,45,23,25,92,204,203,140,193,251,251,226,148,141,200,249,202,157,175,149,192,234,251,250,250,247,103,29,47,93,112,113,105,51,44,104,137,134,91,62,67,34,18,12,17,39,53,56,57,46,71,110,73,33,41,152,137,101,118,49,19,19,11,16,19,23,22,21,25,26,24,27,23,24,25,28,30,31,34,31,28,67,106,93,54,54,122,139,105,116,139,142,120,111,97,60,44,37,40,32,20,19,26,52,78,84,64,33,30,23,22,30,34,27,58,130,96,34,21,17,26,26,25,30,28,28,24,24,28,24,47,103,80,36,42,134,172,90,49,42,32,31,27,29,32,18,29,53,34,23,21,20,17,22,25,21,25,30,30,30,29,32,74,63,33,38,32,34,33,33,31,34,40,46,53,83,100,61,39,21,42,88,94,85,67,75,170,144,103,117,133,128,78,53,51,29,34,46,34,21,24,29,33,33,34,37,37,31,43,91,82,79,86,70,69,83,71,68,69,48,123,132,110,102,92,112,94,38,34,207,236,177,121,38,168,245,245,246,244,252,252,249,249,250,248,248,247,245,246,246,245,246,247,244,244,245,245,244,244,244,244,242,242,243,243,244,244,244,243,244,244,244,242,244,244,245,246,245,246,246,246,247,248,248,248,248,248,249,250,249,248,236,116,4,1,4,8,9,9,10,10,11,12,11,11,198,198,196,198,199,195,196,198,197,197,198,195,196,197,197,196,198,195,195,196,196,196,194,195,194,195,196,195,195,193,196,194,193,197,194,193,198,196,196,194,196,199,197,197,197,195,197,196,196,197,196,196,196,198,195,197,197,196,197,196,198,196,198,195,195,195,196,195,197,198,197,200,196,198,198,196,198,196,199,200,199,200,200,199,198,198,200,199,199,200,200,199,199,200,200,199,200,200,199,198,200,202,199,200,200,199,200,198,200,200,198,201,200,203,200,200,201,201,203,199,201,201,199,202,203,199,199,203,201,202,199,201,202,200,201,203,201,202,202,203,199,203,203,201,205,199,200,202,200,202,203,200,201,201,200,202,200,201,201,200,200,199,200,200,201,199,202,200,198,202,200,201,201,201,202,203,201,203,202,203,204,202,201,203,203,203,203,204,205,203,204,204,205,205,208,207,208,207,208,209,206,206,209,207,208,212,211,212,212,213,212,211,214,211,214,210,212,227,228,246,250,250,252,252,251,251,251,251,251,251,251,251,250,250,250,250,248,248,242,236,230,220,221,219,188,184,190,182,186,199,190,155,170,192,191,194,191,181,174,173,174,171,176,167,166,159,155,160,157,163,162,162,159,157,163,159,158,153,156,159,158,155,151,151,151,152,151,144,160,148,119,125,121,137,128,112,130,118,116,122,127,127,119,116,113,111,106,100,99,97,93,87,80,87,80,96,85,100,126,110,134,116,112,118,92,72,51,57,46,60,71,83,96,74,77,75,81,79,61,76,90,81,90,73,38,39,41,46,34,21,34,37,19,16,41,52,53,110,125,101,52,46,67,63,33,22,23,41,44,25,47,16,37,21,24,41,33,59,31,80,97,69,78,57,33,8,35,55,25,27,17,23,29,42,46,18,46,35,11,27,128,100,42,97,134,147,141,174,104,136,198,63,151,231,41,15,40,3,55,178,206,191,170,223,250,244,179,134,191,242,250,158,127,174,210,246,252,252,249,249,129,27,42,85,121,117,104,49,49,98,133,143,89,70,68,39,19,13,15,23,46,68,64,68,51,95,127,67,23,55,160,130,118,118,42,18,18,15,17,16,24,26,26,25,20,24,27,23,26,25,30,29,31,30,34,30,38,78,77,53,48,80,88,76,114,134,103,82,59,54,47,29,32,39,39,24,18,31,54,68,72,51,38,53,35,24,30,30,28,50,115,89,33,27,15,27,35,37,41,34,37,33,22,29,31,51,114,90,40,31,46,62,44,40,44,33,25,32,33,24,19,39,56,35,24,20,17,24,26,24,24,23,25,23,28,27,38,93,88,42,35,35,34,27,32,36,43,71,77,81,94,81,54,27,38,69,107,100,73,51,81,130,113,120,128,134,120,78,62,49,39,51,49,32,23,22,26,31,37,31,32,43,32,34,54,41,54,65,46,42,50,53,65,77,48,113,139,122,111,90,118,108,37,32,190,227,229,167,25,46,213,238,247,247,249,251,248,251,251,248,247,245,245,246,245,245,245,247,245,244,245,242,243,243,244,244,244,243,244,244,244,244,244,245,242,242,244,244,244,244,244,245,245,248,247,247,246,245,247,247,245,247,247,248,247,247,236,115,4,1,4,8,9,9,10,10,11,11,12,12,195,199,195,196,196,196,196,195,198,196,198,198,195,194,194,197,194,198,197,198,196,195,196,194,195,193,193,195,196,195,195,194,195,195,195,194,195,195,194,196,193,195,198,194,196,196,192,195,196,195,197,198,197,195,195,198,197,196,198,196,198,195,196,196,196,198,195,197,198,198,198,196,198,196,194,198,198,198,195,196,199,196,199,198,200,198,198,200,201,200,199,200,199,199,201,202,201,200,200,200,199,198,201,203,201,200,200,203,203,200,200,198,200,200,200,200,197,201,201,200,199,202,201,201,202,199,202,201,200,201,201,198,200,200,200,200,200,201,200,200,201,202,199,199,201,203,202,200,198,200,201,200,200,200,201,201,201,200,200,200,198,199,199,200,200,199,199,202,201,200,201,201,201,200,202,201,203,201,203,205,200,203,203,204,205,202,204,204,203,205,205,205,206,208,211,208,209,209,210,210,209,208,210,211,211,212,212,212,213,213,214,213,213,212,212,210,212,223,190,177,183,180,181,175,176,171,170,167,166,163,158,155,151,150,143,140,141,134,129,125,116,119,169,179,98,73,87,91,97,103,101,108,113,104,98,89,93,82,72,83,71,73,76,79,73,66,66,61,67,80,84,81,73,62,62,64,68,65,64,67,66,63,65,59,59,60,69,64,101,105,48,53,52,89,112,108,119,106,88,63,62,64,56,56,50,49,47,47,45,51,48,47,45,40,43,63,61,51,79,102,115,121,107,94,111,92,60,53,25,44,61,71,88,68,63,77,72,83,94,89,109,90,90,87,36,24,24,29,28,32,45,56,47,44,41,39,69,118,135,110,65,57,68,80,56,41,36,53,62,50,55,22,57,34,37,63,36,51,27,61,90,71,86,72,46,16,16,37,24,32,23,37,43,35,46,27,53,39,7,25,70,60,41,84,114,114,163,161,84,155,179,93,210,236,49,18,23,6,121,191,190,207,201,229,242,201,148,181,242,249,230,128,97,163,232,246,253,253,249,160,36,41,87,111,118,84,57,46,95,127,137,94,57,59,39,19,14,12,25,47,81,101,103,117,88,141,141,44,12,76,161,112,129,112,33,21,11,16,22,19,23,23,30,27,21,23,25,21,24,26,28,31,27,29,37,28,35,54,77,69,35,33,25,68,98,76,56,49,37,53,46,24,37,51,43,28,21,24,53,68,60,49,55,61,33,31,33,27,33,34,96,95,41,29,24,29,35,45,40,38,37,36,24,29,32,73,122,74,23,22,28,31,37,37,36,37,33,37,30,29,19,34,53,30,25,19,20,27,27,27,26,23,24,24,25,33,41,109,101,46,36,25,36,34,32,31,68,87,82,99,85,63,39,26,57,77,89,77,53,42,29,55,113,142,127,82,71,57,58,55,42,46,36,27,31,29,25,27,32,36,33,32,33,35,30,29,29,32,31,28,27,29,59,69,43,108,141,134,113,89,115,100,52,14,129,154,147,129,25,29,168,237,250,250,252,252,248,249,249,248,245,245,245,245,245,244,244,245,245,244,243,242,243,244,244,242,242,244,244,242,244,242,244,244,244,244,244,245,244,244,244,243,245,244,244,245,244,245,246,246,244,244,246,247,247,246,236,116,4,1,5,9,9,9,10,9,12,12,12,12,196,199,198,195,199,196,195,198,197,199,196,195,197,198,196,192,196,197,195,193,197,194,194,196,193,193,194,198,195,193,197,196,194,196,195,195,195,191,195,196,196,195,195,196,196,196,196,194,193,196,196,196,196,195,196,195,194,195,195,196,195,194,199,196,196,198,198,198,197,197,197,200,199,198,198,199,199,198,198,196,196,195,200,199,198,199,198,197,198,200,200,201,200,199,202,198,199,199,199,200,199,200,199,200,202,200,201,201,202,201,200,202,196,200,200,200,200,199,200,200,201,200,200,198,200,198,200,202,200,201,201,202,200,201,202,201,200,201,200,200,200,202,204,200,203,201,199,202,200,201,200,200,202,201,200,201,202,202,200,200,201,198,199,202,200,201,201,200,199,198,200,201,201,198,199,201,200,204,203,205,203,203,205,203,205,202,205,206,206,207,208,210,210,210,211,212,213,212,212,214,212,213,214,212,214,214,213,214,213,214,214,211,212,211,212,212,210,207,104,19,17,13,12,12,11,11,9,10,11,10,10,10,10,10,10,10,10,10,12,10,12,9,61,91,27,5,10,9,12,10,13,12,10,12,12,12,13,12,13,13,12,12,12,12,12,12,12,12,13,13,12,12,13,13,13,14,13,13,14,13,14,14,13,14,13,12,14,12,21,23,12,13,12,14,15,17,23,18,14,14,13,14,14,15,14,13,14,14,14,14,15,15,13,15,13,18,17,15,18,15,60,76,60,66,53,72,89,58,21,10,11,17,30,22,35,67,67,85,90,84,84,66,66,26,8,26,26,27,30,31,33,41,40,31,32,22,43,93,133,114,74,65,68,86,46,32,24,25,25,44,34,12,58,28,33,28,12,30,11,27,38,39,77,95,48,27,21,20,27,32,33,29,36,27,29,11,33,39,12,13,24,12,39,83,66,86,174,105,95,194,189,171,248,190,21,38,15,46,150,174,210,238,212,233,201,144,184,240,249,249,199,141,133,174,227,248,249,249,156,44,38,81,109,109,112,46,53,102,130,138,89,66,55,35,19,10,18,27,46,86,117,124,123,129,101,156,134,30,18,105,156,102,122,87,29,20,9,20,19,22,24,24,25,24,27,24,24,25,23,24,29,27,27,36,34,32,33,55,93,59,27,22,21,60,77,67,39,30,27,35,36,24,46,57,42,32,21,19,40,55,61,45,49,53,32,29,27,29,37,21,66,96,45,30,28,30,38,39,37,38,37,38,31,28,28,74,113,48,19,24,29,41,30,40,41,39,38,36,31,24,16,25,57,36,24,21,19,29,23,28,28,22,32,24,29,28,65,122,81,39,31,29,30,33,36,44,73,85,81,93,81,50,35,23,54,74,67,59,41,37,33,45,88,98,71,56,49,36,47,54,45,39,34,32,33,27,28,29,34,35,27,37,33,33,33,27,39,33,37,36,27,29,37,48,27,96,139,131,119,83,106,101,58,30,45,57,24,42,26,28,182,244,250,250,246,252,249,250,249,249,246,244,245,245,244,245,245,245,245,244,244,244,244,244,244,242,244,243,244,244,243,244,242,244,244,244,244,243,245,243,244,244,243,244,243,244,244,244,244,244,243,243,244,246,243,245,237,116,4,1,5,9,9,9,11,9,12,12,13,12,196,201,197,199,198,195,197,196,198,194,195,196,193,197,198,194,194,193,194,196,195,197,194,196,196,196,198,195,196,194,195,195,196,198,196,195,196,194,197,196,194,196,197,193,195,198,196,197,194,194,197,193,195,196,195,196,194,194,195,196,196,195,198,196,198,198,196,197,196,195,195,198,197,197,196,198,198,197,196,198,200,199,199,195,200,200,199,200,198,199,200,200,199,199,199,198,198,199,200,198,198,199,201,198,198,199,198,200,199,198,200,199,200,200,199,202,200,202,199,199,200,200,199,198,202,200,199,200,199,200,199,200,201,200,202,201,200,203,200,201,201,199,200,203,201,200,201,198,200,201,201,200,200,203,201,202,201,201,203,200,201,200,200,199,201,203,201,200,199,199,201,201,200,200,199,200,203,200,204,205,203,207,204,204,205,203,204,207,209,208,212,213,214,214,214,212,214,214,215,214,216,217,213,215,216,214,214,213,214,213,213,212,214,213,212,213,216,211,115,48,41,28,32,28,25,22,19,19,19,18,16,17,16,14,12,12,13,10,11,10,13,8,24,37,12,9,12,10,11,10,12,11,11,12,12,13,12,12,12,12,12,12,13,13,13,13,13,14,12,12,12,12,13,12,13,13,14,14,13,13,13,13,13,13,14,14,14,14,13,14,14,15,13,14,14,14,15,14,14,14,17,15,14,14,15,16,15,17,15,15,15,14,16,15,14,14,14,15,14,15,13,34,46,28,37,41,59,79,49,21,13,12,27,14,48,76,62,91,81,62,82,64,48,14,10,14,13,15,20,24,33,32,24,42,38,10,44,95,120,105,80,61,72,81,49,23,15,30,19,36,23,26,61,39,57,32,36,54,30,39,66,63,71,92,63,64,49,42,129,32,21,17,18,15,21,17,36,42,48,117,93,75,134,95,34,125,168,82,165,210,160,171,161,49,29,139,104,103,148,174,241,244,207,199,141,166,229,250,250,231,160,139,176,166,234,247,249,165,41,36,72,116,108,103,57,58,101,134,139,104,63,64,39,14,13,15,29,48,84,111,125,116,111,113,92,154,117,22,24,121,141,95,125,74,22,19,11,18,16,24,23,21,24,23,24,24,22,23,22,26,28,29,28,33,35,29,27,38,79,71,39,33,21,39,48,41,36,19,15,27,33,33,49,61,50,33,22,23,44,66,63,46,60,55,31,23,21,24,34,26,65,97,42,31,24,24,38,37,37,33,35,35,28,33,32,77,103,40,21,19,30,42,36,43,44,46,38,38,29,22,19,19,49,37,24,22,18,27,25,22,28,22,29,27,29,31,61,112,61,27,29,29,35,24,33,53,83,84,81,92,69,44,27,17,48,60,65,59,57,55,48,57,53,52,52,39,42,42,47,44,37,38,29,26,28,33,27,29,33,36,36,28,33,36,33,35,36,36,36,42,36,33,35,31,26,93,142,130,116,78,98,100,67,33,38,57,38,43,20,91,250,250,250,247,245,252,247,252,249,246,248,247,245,245,244,243,245,245,245,244,243,245,246,244,244,243,244,244,244,244,244,244,244,244,244,244,244,244,244,244,244,243,244,244,244,244,243,244,244,244,242,243,244,245,246,244,235,116,5,1,4,9,10,9,11,10,12,12,11,12,198,200,200,197,198,196,194,196,194,194,193,194,194,196,197,193,194,195,195,193,194,194,196,195,195,196,195,195,196,194,195,195,193,196,194,193,196,197,199,194,195,196,195,196,195,194,192,194,196,195,195,194,196,195,194,196,196,198,195,196,196,195,195,194,196,195,195,197,196,196,196,196,196,195,197,198,198,199,200,196,198,198,199,194,194,199,200,200,197,198,197,195,200,198,200,199,198,198,198,199,200,200,198,198,198,198,198,199,199,200,198,201,201,201,201,198,199,198,200,200,199,201,199,200,199,199,202,199,201,200,200,203,200,200,204,204,201,203,201,204,200,201,203,199,201,200,200,202,201,201,202,199,200,201,202,203,200,202,203,202,202,202,204,204,201,202,203,203,204,205,206,202,203,202,202,205,203,203,202,205,206,205,206,204,206,206,206,211,212,212,212,212,212,212,213,212,213,213,213,215,214,215,214,214,217,213,211,212,212,211,212,213,215,212,214,215,219,231,212,215,223,219,226,223,224,222,222,221,216,217,214,212,212,208,205,201,198,195,190,183,174,164,165,158,147,158,160,159,155,151,152,144,139,136,131,129,122,117,117,118,119,120,125,125,123,127,127,125,129,131,129,130,129,134,134,136,139,138,129,130,145,141,139,142,141,145,144,139,135,95,117,146,136,132,109,107,109,108,127,136,143,157,156,153,158,155,158,164,159,160,159,158,161,158,157,162,147,157,149,139,142,126,145,181,146,115,157,156,139,168,135,91,66,15,87,160,144,156,130,112,104,101,157,168,179,193,191,199,199,198,165,151,175,191,203,123,75,104,117,108,75,57,81,124,141,142,168,198,183,200,169,132,168,172,219,198,155,161,172,219,215,151,118,107,109,192,234,249,247,226,214,232,233,229,229,220,222,232,240,239,234,232,163,63,55,158,149,118,226,170,87,136,70,4,133,239,133,102,150,201,252,222,159,134,153,206,249,249,233,184,85,132,178,194,240,246,186,40,27,67,102,107,98,58,51,90,135,133,92,71,59,41,16,10,13,22,46,80,111,110,113,103,101,109,108,158,96,9,34,137,128,101,122,52,21,20,11,16,12,24,25,21,24,25,24,17,23,24,24,27,29,26,29,31,27,30,25,33,91,83,55,52,61,81,65,43,34,22,20,33,29,38,53,55,61,46,14,25,47,33,29,31,60,58,29,31,20,24,30,31,85,104,39,27,27,22,34,33,35,31,30,37,27,33,28,78,110,44,30,19,28,44,39,55,75,81,74,62,49,32,19,21,48,30,26,21,16,30,22,27,30,24,29,22,31,32,63,100,47,26,32,28,34,36,38,67,89,82,71,61,44,28,22,19,48,66,75,78,76,70,60,77,69,47,33,28,46,42,35,36,36,33,31,28,29,29,33,29,32,34,33,33,32,36,35,34,36,32,34,41,41,37,33,39,28,101,143,130,120,75,100,101,69,29,40,104,117,143,216,253,252,252,251,247,252,250,249,247,247,248,245,246,247,244,245,244,246,246,245,244,243,244,244,244,245,244,244,243,244,244,241,242,244,244,244,243,243,242,244,243,244,244,242,243,243,242,241,243,244,244,244,244,245,245,244,244,234,116,3,1,4,9,9,9,11,10,12,12,13,12,197,200,199,200,200,195,195,195,197,196,195,196,194,194,194,196,197,196,195,195,194,196,195,197,196,192,195,194,198,197,194,194,193,194,193,195,198,193,197,197,194,197,196,194,195,196,195,195,194,194,197,194,194,195,194,195,196,194,195,195,195,193,196,194,194,195,196,197,194,194,196,194,198,198,195,200,198,199,198,199,196,194,199,197,198,197,195,198,198,195,198,199,196,199,199,196,197,197,200,196,198,198,198,198,198,199,198,200,198,198,199,199,198,197,199,200,199,200,200,200,201,200,202,200,200,201,202,201,200,201,200,202,203,202,205,203,203,203,201,204,205,207,206,203,203,202,202,200,203,205,205,205,205,204,203,205,205,203,205,204,206,204,207,205,205,205,203,206,208,206,202,205,203,205,206,205,207,205,206,208,204,205,206,205,208,208,209,211,212,212,214,212,210,213,213,212,214,213,214,210,211,214,212,212,214,214,213,211,213,214,212,212,214,214,213,215,218,229,232,250,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,252,252,253,253,253,253,253,253,253,253,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,253,253,252,252,251,234,249,252,252,252,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,252,252,251,250,232,244,252,252,246,194,219,251,232,251,241,214,168,103,162,236,192,187,159,151,125,145,238,250,252,252,253,253,253,253,252,252,252,252,248,184,93,101,109,107,83,68,89,153,234,244,252,252,216,252,248,218,220,233,252,211,193,219,244,252,248,208,132,117,122,219,252,252,252,252,252,252,252,252,252,252,252,252,250,231,251,251,138,19,92,184,110,145,223,98,104,181,113,100,230,252,128,111,173,225,246,184,98,149,209,244,248,248,204,145,129,139,215,227,248,202,57,34,64,99,101,99,58,44,88,127,139,95,61,57,45,16,11,16,23,46,81,108,117,113,108,99,100,97,112,154,80,7,52,147,121,105,111,44,21,17,11,18,15,24,24,19,24,21,19,26,22,27,27,26,30,28,24,27,26,36,26,43,99,80,92,89,71,72,68,58,43,42,34,28,29,36,44,54,69,45,18,23,46,46,25,15,33,39,31,28,16,27,27,36,115,105,38,31,22,23,33,32,33,29,29,33,34,38,31,90,128,58,26,23,34,39,33,125,188,189,186,166,122,74,25,26,48,31,24,18,20,26,29,24,27,27,27,21,29,28,63,103,47,31,33,26,37,33,41,78,83,57,42,30,24,22,19,22,57,92,77,75,92,71,53,74,81,58,37,31,33,24,25,33,42,29,22,33,32,31,29,29,26,27,38,35,34,33,38,34,30,35,33,42,35,37,35,37,31,90,146,127,123,80,93,103,79,18,89,239,252,252,252,252,252,252,251,249,249,249,245,245,247,247,246,245,244,244,243,243,244,244,244,244,244,242,242,242,244,244,244,242,241,242,243,244,243,241,241,241,241,241,241,241,242,242,244,242,241,241,241,241,242,244,241,243,244,243,244,242,234,118,3,1,4,9,9,9,10,9,11,12,13,12,197,198,199,197,199,197,196,196,195,198,195,195,197,195,194,193,196,196,193,195,197,195,198,197,196,196,196,194,196,196,198,199,195,198,196,197,196,193,196,193,195,195,193,196,195,199,198,196,196,195,199,195,195,196,193,194,194,194,196,193,193,196,197,197,194,194,196,195,196,197,196,197,198,197,198,196,194,198,198,196,196,198,196,197,198,199,198,195,196,199,201,198,196,198,198,199,199,198,198,198,198,196,196,198,200,199,198,199,199,196,198,199,199,198,199,201,201,201,200,200,198,200,200,203,201,201,204,200,202,202,202,204,202,202,203,203,203,206,204,203,205,207,206,205,204,204,204,203,206,205,207,205,206,207,205,206,205,205,203,205,207,206,206,206,205,206,206,208,207,206,207,205,208,206,206,208,207,207,208,208,206,207,207,207,211,211,211,213,211,210,214,212,212,212,213,213,212,212,214,214,210,214,213,208,214,216,214,215,214,214,213,214,213,214,214,213,214,215,218,225,227,231,234,232,236,235,237,238,239,241,240,242,242,242,242,244,247,245,245,245,247,246,249,247,244,249,249,250,252,250,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,251,252,252,252,251,244,252,252,252,252,252,252,252,252,252,231,239,251,252,252,252,252,252,252,252,252,244,247,251,251,251,249,250,249,248,251,250,250,249,246,252,245,232,246,237,230,212,232,250,251,221,185,227,236,235,251,232,214,184,141,217,208,154,170,181,182,153,170,237,249,252,252,249,247,251,249,219,244,252,252,250,149,83,97,91,103,70,78,75,133,237,231,230,174,184,249,231,217,171,181,222,193,199,207,235,249,247,226,128,71,115,163,188,227,248,235,227,245,247,244,244,246,228,239,185,179,243,214,87,14,151,164,93,154,134,57,109,191,156,128,228,226,115,137,208,229,238,144,125,230,252,252,251,227,155,227,215,195,230,241,200,71,43,63,98,103,101,62,36,88,124,128,97,52,59,42,17,14,12,26,48,84,108,114,120,113,114,103,97,99,118,145,63,6,77,151,110,110,100,31,20,16,14,18,18,24,16,22,24,21,27,24,23,28,28,32,30,27,29,30,25,31,22,35,67,63,116,99,69,59,36,37,39,37,45,33,24,30,29,51,59,45,20,17,40,49,28,24,43,37,26,27,14,25,22,62,133,95,36,27,22,24,31,33,35,32,31,39,30,42,33,98,158,73,37,16,38,36,54,216,251,250,241,223,205,157,62,39,50,27,27,16,18,27,22,27,26,29,23,18,31,28,75,109,62,29,29,24,34,32,41,64,56,40,25,29,36,30,26,53,116,110,79,84,70,52,28,34,55,50,47,44,39,29,26,48,45,26,24,21,29,33,32,31,27,32,37,33,33,38,31,37,32,31,33,34,30,41,37,39,29,76,141,132,125,87,94,105,86,20,95,238,252,252,252,252,252,248,250,247,247,247,245,245,245,247,247,244,243,242,241,242,244,242,244,244,242,242,242,244,242,241,241,243,243,242,242,244,244,242,242,240,241,241,241,240,241,241,241,239,240,241,241,241,242,244,243,242,244,245,244,244,233,117,3,1,4,8,9,9,11,9,11,12,12,11,196,200,197,199,200,196,197,197,198,198,198,199,196,194,195,199,195,196,194,194,196,194,196,194,197,197,195,198,194,194,195,197,198,197,199,197,196,194,196,196,194,192,193,195,196,193,196,196,194,196,198,194,194,194,194,194,194,195,196,195,194,195,194,196,198,195,195,198,195,196,197,195,198,195,193,198,195,194,198,199,196,196,198,195,197,198,197,196,196,196,199,198,195,196,199,196,199,196,198,197,197,199,197,196,195,199,196,198,198,197,199,199,199,199,197,197,198,200,200,199,202,200,202,203,203,201,205,205,204,204,203,205,205,201,204,203,205,206,205,208,203,206,206,207,205,202,206,205,205,206,208,207,206,205,205,206,206,206,207,205,206,208,207,204,203,205,205,205,206,208,208,208,207,207,207,207,206,208,208,208,205,205,207,208,210,210,208,211,210,212,214,211,212,212,213,212,211,212,212,212,213,215,211,212,213,214,214,211,212,212,214,213,214,214,213,215,214,214,215,216,215,215,215,214,215,216,218,217,217,218,217,217,217,218,217,218,218,217,220,216,217,219,217,217,212,212,217,219,218,219,216,218,219,219,222,221,224,222,223,223,222,221,223,222,221,222,221,223,223,220,223,222,222,220,214,224,221,225,218,210,225,220,223,227,224,224,221,223,226,203,197,216,220,225,223,222,224,220,221,219,210,212,214,217,218,214,220,215,216,217,217,217,218,215,220,213,202,214,207,189,180,212,224,231,193,166,215,218,208,227,207,188,153,133,190,169,154,186,194,187,151,171,221,231,230,221,222,218,226,217,186,220,223,230,250,123,81,87,78,84,69,88,61,151,208,155,203,168,190,239,210,172,105,179,220,193,197,187,222,228,233,235,153,73,113,136,141,200,223,221,209,217,223,219,220,226,210,214,186,153,184,176,47,61,193,125,112,135,96,72,87,141,135,84,160,185,113,171,216,234,212,155,203,249,246,246,229,142,125,198,229,214,247,175,72,49,56,106,104,110,71,42,89,122,134,98,58,60,39,17,14,13,29,48,87,112,116,117,113,111,106,94,101,101,125,148,49,5,100,143,104,117,82,25,19,8,19,19,17,22,21,27,24,21,26,24,27,26,29,29,30,32,27,27,28,38,33,32,34,39,93,77,47,85,75,48,57,66,69,43,21,20,26,40,55,45,19,22,43,41,32,56,67,39,26,22,16,30,23,79,128,66,27,26,24,27,33,30,27,32,32,36,31,41,36,64,137,101,60,46,83,82,126,194,185,144,159,155,152,104,55,49,53,22,21,18,16,30,25,28,25,22,26,21,30,27,69,118,74,39,27,26,37,31,41,60,60,44,34,65,74,27,53,117,144,134,83,62,59,34,30,48,71,43,34,37,29,31,36,45,35,23,30,19,25,34,29,32,24,26,36,30,32,37,32,34,35,37,29,33,34,41,36,39,31,63,138,137,122,93,95,115,98,18,69,237,252,252,250,248,251,244,249,248,244,245,245,245,244,245,244,244,244,242,244,242,243,242,239,241,239,239,242,241,241,242,242,242,243,241,241,242,242,242,240,241,239,240,240,241,241,239,239,239,239,241,242,244,243,244,242,244,244,244,246,244,233,116,4,1,4,8,9,9,10,10,12,12,12,12,199,199,199,198,199,198,199,199,198,199,198,196,198,196,196,198,195,198,196,197,196,192,198,194,196,196,195,196,196,195,195,195,194,197,195,196,197,197,199,194,195,196,196,196,195,195,194,194,193,194,194,192,196,196,194,194,196,195,196,195,198,195,194,195,193,193,195,193,194,195,196,195,193,195,198,197,194,196,196,196,196,195,195,196,193,193,197,197,197,197,197,193,194,198,194,196,196,196,200,197,197,198,196,196,198,198,198,197,200,198,198,198,198,199,198,199,199,200,200,203,203,202,202,202,201,203,206,206,206,204,204,207,206,205,206,205,203,204,206,204,206,205,207,208,206,208,205,206,207,205,207,205,206,206,204,205,204,203,205,206,203,203,206,204,205,205,205,207,204,207,208,204,208,209,208,207,208,209,207,209,206,209,210,208,212,210,211,214,213,212,211,213,213,212,211,212,212,211,212,212,212,213,213,212,212,212,212,212,211,212,211,212,212,214,213,213,215,213,217,215,214,215,215,215,214,214,213,215,217,215,215,214,215,216,214,217,215,214,214,214,214,213,214,214,207,208,211,211,212,212,214,211,213,213,212,212,212,214,211,211,214,212,212,211,212,214,213,213,213,210,211,212,213,207,208,215,212,215,212,196,212,212,209,216,214,214,213,213,220,202,188,217,224,214,208,209,209,208,210,209,204,196,202,209,209,207,208,208,208,208,206,205,210,207,211,205,191,207,199,173,178,210,216,217,165,160,207,218,212,212,195,169,141,110,164,173,166,187,160,132,103,143,213,222,221,214,217,215,218,210,178,214,214,225,235,107,64,60,60,77,71,92,68,172,202,169,215,175,220,186,167,166,138,227,217,181,158,181,226,221,229,250,155,67,123,124,150,200,215,227,210,215,219,219,219,234,208,212,158,105,173,113,45,140,191,108,122,116,65,80,76,129,143,83,169,185,150,205,229,214,173,163,168,175,134,110,88,45,5,105,179,204,197,60,40,61,98,112,110,72,41,86,125,134,102,61,56,41,16,11,17,25,47,87,111,117,114,104,117,104,92,97,103,102,131,139,34,14,117,134,103,109,67,22,17,15,21,21,21,19,22,26,24,22,25,29,39,46,47,46,40,36,33,27,32,39,30,28,25,35,76,95,103,146,136,90,67,40,35,33,30,27,39,55,46,57,41,26,42,34,43,83,63,33,21,18,22,24,27,90,110,43,29,26,28,36,30,35,30,34,32,32,33,36,40,31,124,109,94,200,235,214,127,86,52,33,31,24,43,37,35,57,36,16,27,18,24,29,24,27,29,28,26,24,29,24,54,108,95,49,30,26,34,35,49,79,67,37,76,116,75,44,103,137,165,150,90,96,61,47,41,70,79,34,56,47,26,39,42,36,23,24,25,23,22,29,38,26,26,29,29,30,33,33,36,32,31,36,31,33,39,38,37,49,31,57,133,135,130,103,101,115,99,18,55,232,252,252,250,242,249,242,247,245,245,245,244,245,243,244,244,243,241,243,241,241,241,240,241,241,239,239,240,241,242,241,240,241,242,242,241,240,240,241,240,240,239,239,239,239,240,238,238,239,240,242,242,245,244,244,242,242,244,242,241,242,233,116,3,1,4,9,10,9,10,9,12,12,12,12,195,199,198,198,199,194,194,196,197,197,193,196,196,195,193,195,195,196,195,193,196,195,196,196,197,193,191,195,194,194,194,195,195,193,197,195,195,194,194,194,194,196,196,198,196,195,195,193,194,195,195,195,193,196,198,195,194,196,197,193,194,196,196,196,196,194,194,194,193,194,193,194,195,196,194,193,194,195,194,193,194,196,194,195,195,194,193,195,197,193,194,194,195,195,195,194,196,194,196,197,196,194,192,198,196,195,198,197,198,195,198,198,196,199,198,200,202,200,200,201,204,203,203,202,204,203,203,205,203,206,205,204,205,204,206,204,202,201,201,204,203,206,206,203,204,206,205,206,204,202,204,200,205,206,204,203,202,202,202,203,203,205,204,205,206,205,206,207,207,205,206,207,206,206,206,208,208,209,208,211,209,210,212,210,211,211,211,212,211,209,212,210,210,212,212,212,211,212,210,211,213,212,210,211,210,211,211,211,212,212,213,212,213,213,212,212,212,212,213,212,212,213,212,213,214,212,213,212,214,215,214,215,214,215,214,214,214,211,213,210,211,212,212,214,210,211,212,211,209,212,213,210,209,210,212,211,208,210,212,208,209,207,210,209,210,212,207,208,209,207,207,209,210,203,205,210,209,211,210,195,208,215,203,215,219,212,212,206,222,215,192,230,234,213,208,204,210,204,203,208,203,199,199,205,209,204,206,205,203,205,205,206,207,205,208,205,189,207,195,159,184,211,214,214,149,160,208,210,215,220,198,167,125,107,164,161,149,168,148,133,98,125,203,221,221,213,212,212,222,206,172,213,211,229,223,102,77,72,89,69,73,95,61,193,216,199,224,184,184,143,190,180,192,235,147,142,158,198,229,218,222,247,137,61,112,134,174,185,201,212,208,224,217,216,226,234,213,175,108,100,160,65,69,192,156,114,139,71,27,50,62,139,178,128,187,213,183,228,221,146,101,72,55,42,25,27,17,10,10,110,184,150,81,21,57,95,108,121,77,35,64,118,131,94,62,59,45,21,14,14,27,45,81,106,111,114,107,103,97,96,103,102,93,98,125,118,22,19,123,123,103,108,57,21,20,15,22,21,16,23,21,27,25,29,50,68,83,80,78,66,57,51,39,32,32,35,34,31,22,50,139,159,122,155,135,97,86,38,27,33,36,45,54,59,59,72,44,27,43,33,62,89,64,37,23,19,18,26,27,82,108,64,37,27,30,36,32,30,28,35,34,35,35,37,34,66,178,139,74,209,232,186,128,108,94,78,44,41,43,32,41,61,30,19,22,29,24,27,31,24,29,27,29,27,30,30,34,95,107,57,33,23,30,37,63,91,66,49,137,141,74,87,120,119,179,165,118,138,74,79,68,51,56,28,52,38,23,40,33,31,29,26,25,21,28,30,26,28,29,29,31,28,31,35,33,34,29,32,35,34,33,41,41,38,35,49,131,136,128,116,101,111,101,27,52,229,252,252,250,239,246,243,244,241,241,242,239,241,244,243,241,244,241,240,239,239,240,239,237,238,238,240,241,240,241,243,242,242,243,240,241,244,241,241,240,239,240,239,239,237,238,239,239,238,238,241,242,241,241,242,239,240,241,241,242,241,233,117,3,1,4,8,9,9,10,10,11,12,12,12,195,196,198,197,198,194,195,194,195,198,196,195,195,195,194,196,195,195,194,196,198,196,198,193,195,197,194,196,195,195,196,195,197,197,195,198,195,194,195,195,197,196,196,196,195,196,198,198,197,198,196,194,196,196,196,195,197,196,197,195,195,194,196,198,196,198,194,194,196,194,198,196,194,196,194,196,195,195,195,195,196,195,194,194,193,195,196,192,194,194,195,193,194,196,194,196,194,194,196,193,195,195,196,196,198,195,197,196,196,197,198,198,197,196,199,199,200,200,200,201,202,202,202,206,203,202,202,201,203,203,205,204,202,203,205,204,203,203,204,202,203,202,200,201,203,203,200,202,201,202,205,203,203,202,204,203,202,203,205,205,206,205,204,207,205,204,206,205,206,206,206,205,206,206,205,207,208,208,209,207,206,208,210,211,209,211,210,210,212,209,210,211,210,211,210,211,209,210,213,212,212,209,208,212,212,211,212,211,212,214,211,212,212,212,214,212,212,212,214,212,214,212,213,214,213,214,213,213,213,211,214,213,213,214,213,214,214,213,212,213,213,210,214,212,210,211,211,212,210,209,211,208,212,210,212,210,208,210,210,209,207,206,208,209,206,208,207,207,207,206,208,208,208,203,205,209,210,210,211,196,203,216,197,207,223,208,208,204,215,198,131,166,214,210,208,203,205,203,203,206,205,201,196,202,205,204,206,204,201,203,208,203,206,205,206,208,189,209,191,150,188,210,215,208,151,176,207,210,212,222,205,162,133,120,155,152,162,185,178,184,144,153,210,217,224,212,212,210,218,201,169,212,212,235,216,134,105,80,110,78,78,77,50,164,200,199,181,169,185,178,222,186,202,186,136,165,177,221,223,217,225,241,120,80,125,128,186,160,162,193,207,232,213,223,233,240,178,127,110,125,130,43,148,183,97,148,133,34,10,42,36,107,174,111,141,167,167,195,127,64,27,27,41,56,66,60,53,47,47,196,197,62,43,43,103,107,111,85,26,50,105,129,94,61,60,43,19,12,15,25,48,79,101,111,112,114,103,90,101,99,105,101,93,105,122,100,15,30,136,114,110,116,50,25,17,11,22,22,21,24,24,26,37,64,93,117,118,109,101,85,78,71,65,54,44,50,43,48,39,55,130,149,87,116,118,110,117,45,62,63,38,49,57,66,72,77,38,29,44,33,67,98,71,34,15,15,19,27,24,32,76,73,45,43,46,47,45,48,46,47,45,50,48,48,46,104,177,95,31,50,76,71,55,117,143,127,102,66,45,37,46,57,43,35,27,26,29,28,32,30,30,31,35,29,42,31,63,144,113,53,33,23,27,24,52,93,67,105,185,114,87,110,117,95,133,150,141,162,77,73,77,44,37,26,38,29,26,34,33,28,27,22,23,27,24,30,30,29,39,38,29,34,39,33,31,36,33,37,32,34,38,37,36,39,36,43,119,136,125,116,98,104,111,35,36,217,250,250,250,238,246,242,245,239,240,240,239,241,241,242,241,241,239,237,239,238,236,237,237,237,237,236,238,238,239,243,243,242,242,240,240,244,241,243,240,239,240,239,241,240,242,241,240,239,239,241,241,241,240,241,240,240,241,241,242,243,233,116,3,1,4,8,9,9,11,9,11,12,11,11,193,200,198,196,199,194,196,198,196,197,195,198,195,197,197,195,197,196,195,196,195,195,196,196,194,195,197,197,197,198,199,195,195,196,196,195,196,198,197,196,196,196,196,197,195,195,197,196,195,194,196,196,196,196,197,196,196,195,196,195,194,197,194,194,197,194,196,198,196,196,197,196,197,194,193,196,194,195,197,194,195,194,193,195,193,196,194,193,194,193,195,194,193,193,194,194,196,193,195,195,195,199,195,195,196,197,196,196,198,198,198,198,198,198,198,196,198,200,200,201,200,202,202,203,204,203,202,203,201,203,203,200,203,200,203,204,200,203,200,203,201,201,205,202,203,203,201,204,201,199,205,202,201,200,200,203,203,202,202,201,202,205,203,204,203,202,204,204,204,204,205,205,205,205,205,206,207,207,206,207,207,208,208,207,208,209,211,209,208,208,209,209,208,210,210,209,210,210,209,211,212,212,210,211,210,212,213,211,211,208,212,211,209,212,214,214,214,214,214,212,214,214,214,212,211,212,213,212,213,214,213,213,209,212,211,212,213,212,214,212,212,211,212,214,212,211,209,210,208,211,211,210,210,209,210,210,210,208,210,208,210,207,207,206,208,207,205,208,206,208,208,209,205,200,210,209,208,208,212,197,196,214,169,166,207,205,210,206,208,175,61,73,171,203,209,202,202,203,203,207,203,201,196,193,205,206,204,203,203,202,205,202,201,204,202,208,188,206,190,149,196,207,216,203,153,182,203,209,210,220,202,154,115,113,171,178,185,195,171,198,176,167,206,211,223,216,214,211,221,198,168,211,212,234,199,141,116,64,104,87,97,79,24,120,167,151,168,198,177,208,198,121,198,196,178,197,200,225,215,217,229,233,117,130,164,136,183,141,141,178,194,230,222,227,240,176,105,132,139,148,84,74,214,139,19,148,147,35,54,66,33,62,108,71,62,78,84,82,53,27,43,53,81,93,92,56,73,71,83,190,123,52,62,74,113,105,85,34,54,106,136,108,63,61,46,23,12,12,29,54,88,102,108,115,110,115,96,94,111,110,102,90,84,124,143,102,44,90,162,112,116,116,44,19,12,17,25,19,17,24,28,42,67,104,116,141,131,111,101,91,94,101,103,100,93,78,79,83,85,93,96,118,84,135,105,157,122,50,113,57,52,61,63,77,71,70,36,33,37,28,61,78,57,29,17,15,22,27,23,20,27,39,77,90,84,91,82,87,97,98,89,94,95,92,98,83,56,30,29,24,52,50,41,93,122,125,105,70,38,27,22,37,45,39,41,32,37,39,40,43,39,40,45,46,53,43,85,122,72,35,27,25,22,27,38,67,70,113,175,87,70,120,113,87,83,87,110,127,50,59,83,52,48,43,46,39,39,47,45,31,19,21,23,24,31,37,36,48,53,61,69,63,56,42,32,35,35,28,37,31,33,39,33,38,40,36,110,136,132,125,95,104,113,48,26,199,249,246,250,235,245,240,242,240,237,240,239,239,241,239,238,237,235,237,236,236,234,234,232,232,232,233,236,239,241,244,242,241,242,239,238,240,242,241,242,242,243,242,242,244,243,243,241,242,243,242,242,242,240,239,236,237,237,239,241,241,234,116,3,1,4,9,9,9,10,9,11,12,12,12,196,198,198,195,196,196,197,196,198,197,196,197,196,196,195,198,195,195,198,197,195,193,197,198,195,197,195,197,196,194,197,195,196,198,197,196,194,196,198,196,198,196,195,200,196,198,198,195,196,196,196,198,198,198,198,195,196,196,195,195,195,198,196,196,196,194,196,198,197,195,199,194,195,197,194,196,195,194,196,195,194,194,196,195,195,194,194,196,197,195,195,196,194,198,195,194,195,198,198,195,197,198,198,196,197,194,196,195,194,198,195,196,199,196,199,199,200,201,200,200,204,203,202,203,200,202,202,203,203,199,202,201,199,204,201,200,203,200,202,202,202,200,201,202,203,202,200,202,201,201,203,200,201,200,203,202,200,201,204,202,200,203,201,202,200,203,205,202,206,204,203,203,203,206,205,205,204,203,206,205,208,207,210,210,208,211,205,207,208,207,208,208,210,209,209,208,211,210,210,211,210,208,210,212,210,210,212,213,211,212,213,212,214,212,213,212,212,213,213,210,211,212,213,210,211,212,210,213,214,212,212,212,211,212,212,213,211,212,212,210,211,210,212,210,209,210,210,210,209,209,210,207,209,208,210,210,208,208,207,207,210,210,206,209,208,207,208,208,208,207,206,210,201,201,208,207,208,206,208,199,193,212,160,141,198,204,210,205,206,196,105,100,170,202,213,205,201,203,203,205,205,203,198,194,199,204,203,204,204,201,202,201,203,203,205,207,186,209,188,158,205,207,218,192,145,190,205,208,208,221,196,146,114,99,160,181,178,157,143,196,166,153,204,201,215,211,218,213,221,195,169,214,217,228,156,131,106,75,118,88,115,72,26,103,170,179,205,211,169,193,135,123,215,201,188,179,208,224,213,214,234,222,134,201,201,160,161,128,145,144,163,226,231,239,198,74,84,142,180,152,60,141,235,96,57,196,127,55,77,66,62,48,59,56,63,66,42,51,44,53,50,39,44,40,44,39,29,38,30,56,46,59,90,85,112,78,43,48,94,137,112,70,62,50,23,11,15,26,49,91,110,112,115,111,112,110,97,103,115,102,93,79,112,179,163,125,108,130,164,101,104,105,35,25,12,14,24,18,22,27,41,61,103,139,125,117,109,106,98,92,111,126,139,137,119,93,97,120,120,103,93,95,105,118,98,151,78,24,73,45,62,78,92,102,66,50,37,27,33,20,30,38,29,23,18,19,19,23,22,26,24,26,77,95,90,95,69,111,149,124,101,136,139,121,100,46,21,21,29,34,67,56,71,142,141,141,116,56,31,27,25,17,26,51,56,56,63,59,51,61,69,73,78,78,84,69,69,58,28,27,23,21,29,28,27,40,32,75,118,59,56,79,100,85,70,74,87,98,73,86,117,81,55,71,68,55,56,62,42,30,25,27,29,30,44,48,59,63,71,93,102,93,76,60,46,31,28,33,35,32,34,40,34,33,39,25,100,143,134,127,93,97,106,48,23,189,249,244,250,236,243,235,239,239,237,238,237,238,237,236,237,236,234,235,235,232,232,234,233,234,236,236,238,240,244,244,242,241,239,239,243,244,243,244,243,243,244,244,244,243,243,242,242,244,244,243,242,241,240,239,237,236,237,239,241,241,232,116,3,1,4,8,10,9,10,9,11,12,12,12,194,199,196,196,200,195,195,197,195,197,194,194,194,194,196,197,197,196,196,196,196,194,195,196,194,196,196,196,194,196,198,195,197,196,194,195,195,198,197,195,199,196,198,196,194,196,195,196,198,197,196,195,198,198,197,196,196,198,198,197,193,196,197,197,197,198,197,196,197,196,198,197,196,197,197,197,196,195,197,194,195,196,193,193,194,196,192,193,196,195,197,195,194,196,195,196,199,194,197,197,196,199,197,196,196,198,196,197,195,196,196,196,198,199,201,197,199,200,200,202,200,200,198,201,202,202,200,200,200,200,203,199,202,203,202,203,200,203,200,202,203,200,200,199,200,199,199,200,201,204,202,202,203,202,203,203,203,200,200,202,202,203,202,202,203,202,202,204,202,202,203,204,205,204,205,205,203,205,206,205,205,205,207,207,206,207,206,206,206,207,206,207,208,208,209,207,211,210,209,211,207,209,211,210,210,209,210,209,212,209,211,211,209,210,210,209,209,211,210,210,213,210,210,213,212,213,210,210,212,210,212,213,210,211,209,211,212,209,210,211,212,209,211,207,208,210,207,211,209,209,209,208,206,206,208,209,208,207,208,206,205,206,206,204,206,205,203,207,205,205,206,205,200,202,207,205,206,204,207,200,189,211,188,165,201,205,208,202,205,213,181,189,197,199,212,203,205,201,199,203,199,201,200,193,193,199,202,201,203,202,202,204,200,200,198,210,189,203,182,151,208,207,217,177,147,195,200,211,207,223,192,149,116,72,122,148,160,159,150,201,172,156,197,191,205,204,207,210,224,194,169,212,219,210,130,134,103,75,119,94,116,73,44,158,216,205,243,171,131,207,150,156,214,178,160,172,217,224,215,211,241,203,153,242,221,173,146,148,158,125,162,219,235,245,137,31,107,153,164,150,145,191,167,72,88,160,86,58,54,38,56,44,48,59,69,79,73,61,53,44,29,21,17,23,21,18,15,16,15,35,54,85,97,83,82,47,43,84,125,108,72,63,47,27,12,13,21,46,89,104,109,111,107,110,99,100,100,103,105,89,85,115,185,221,170,145,132,146,160,95,110,92,23,19,12,17,19,21,32,26,56,107,125,132,115,92,94,96,90,76,87,92,91,95,76,55,61,79,88,77,68,82,92,83,44,76,43,35,67,37,59,82,83,78,53,48,37,23,30,26,18,16,18,20,16,17,24,27,26,29,39,62,74,64,47,39,28,21,34,32,24,25,36,31,33,32,21,30,24,48,76,46,121,195,169,131,86,43,27,40,35,26,14,33,40,34,51,33,38,55,49,52,78,87,116,124,81,60,31,30,30,21,33,27,27,26,22,73,91,54,37,48,94,83,80,95,79,104,101,114,137,55,42,78,74,55,47,49,46,43,43,42,44,53,54,63,70,71,88,106,107,98,90,71,63,45,32,34,28,38,36,29,35,36,39,29,87,137,134,128,94,93,108,59,19,171,246,242,249,232,238,237,236,237,234,237,233,233,235,233,233,232,231,231,233,235,233,235,238,240,240,240,243,244,242,243,241,241,240,241,241,242,242,241,241,244,243,243,243,241,241,241,243,241,240,241,239,241,236,235,234,234,235,237,236,237,231,117,3,1,5,8,9,9,10,10,11,12,12,12,198,198,198,196,197,199,196,196,196,196,197,197,195,195,197,196,195,194,195,194,196,196,198,194,195,196,194,198,198,197,196,195,196,194,198,198,195,198,195,195,196,196,195,198,197,196,198,197,197,196,196,196,194,196,198,198,196,197,198,196,196,198,194,195,196,194,198,194,194,197,198,194,197,199,196,196,196,197,197,198,195,195,196,195,198,195,194,196,194,194,195,195,195,196,197,198,196,197,198,198,199,198,199,198,198,197,197,197,196,198,198,198,200,197,199,200,201,200,200,201,201,200,199,199,202,201,201,201,200,200,201,201,200,202,201,203,203,200,201,200,200,200,202,201,199,201,201,202,203,199,201,202,203,204,202,202,203,202,200,202,199,201,202,203,202,201,202,203,204,200,203,204,201,202,203,201,202,205,206,205,205,203,206,204,202,204,204,207,206,208,208,207,207,207,210,209,209,209,208,210,209,211,211,211,210,207,209,208,209,210,212,209,208,209,208,210,208,211,211,208,210,210,208,208,210,211,211,212,211,210,211,212,212,212,210,208,210,210,209,211,211,208,212,210,207,207,208,207,208,208,209,208,209,208,207,206,206,207,207,205,206,205,205,207,205,205,206,206,206,205,208,204,197,204,206,206,206,205,208,203,191,211,203,190,210,207,206,198,205,214,201,221,209,197,208,201,203,200,198,200,199,203,202,197,192,197,203,203,203,202,205,202,201,201,199,206,188,202,167,156,211,205,217,168,147,194,202,212,206,221,188,143,101,74,140,178,183,168,169,211,169,137,186,200,205,195,201,200,214,192,173,215,217,198,134,162,109,79,108,82,99,44,66,198,231,218,211,138,177,218,171,172,145,141,167,200,232,220,217,212,245,177,167,250,228,188,132,163,184,152,157,189,225,236,107,34,138,135,155,182,225,187,84,65,67,89,42,29,27,14,35,23,27,36,40,45,43,37,27,21,22,15,19,17,16,18,16,19,18,54,74,97,106,80,50,42,83,106,105,67,62,56,21,13,11,29,46,81,113,112,112,107,107,98,96,107,101,100,87,75,119,184,235,197,120,128,131,142,145,97,115,83,15,20,17,16,22,24,34,41,110,182,152,115,105,88,78,86,94,70,48,45,48,51,48,49,46,55,69,70,72,71,76,62,41,44,35,39,48,35,46,70,58,56,52,47,49,41,38,27,17,15,17,20,26,26,24,36,45,63,87,85,77,64,67,66,35,28,26,23,29,27,26,25,35,27,30,42,40,57,66,71,148,189,146,103,62,52,46,37,35,34,29,25,22,23,24,17,17,24,21,19,31,68,103,116,112,74,56,54,43,37,36,31,27,21,48,105,87,53,29,63,116,85,89,81,108,157,92,54,61,44,41,51,51,36,37,47,50,56,56,56,60,60,76,87,99,107,108,112,99,97,95,88,76,61,43,37,31,31,34,26,35,32,40,28,79,140,135,131,98,94,102,59,15,156,245,241,249,230,233,234,232,234,231,235,232,232,231,230,232,231,231,230,231,232,230,238,244,246,245,243,244,244,244,242,241,241,241,242,240,242,239,240,241,241,242,241,242,240,242,242,238,239,240,240,241,239,237,235,232,233,234,234,237,237,229,117,3,1,5,9,9,9,11,10,11,12,11,11,196,198,196,198,199,197,197,198,196,196,197,201,199,196,195,195,197,195,195,195,196,197,195,194,196,197,196,195,192,195,198,195,196,195,195,198,198,198,196,193,198,194,198,198,198,199,194,195,194,192,194,195,196,196,197,195,194,196,195,195,196,195,192,193,194,195,195,196,196,193,196,197,195,196,198,197,196,196,196,195,195,198,198,196,196,195,195,195,196,195,196,195,195,198,196,197,200,198,199,198,200,199,199,200,199,200,195,198,196,200,201,198,198,196,198,198,200,199,199,199,199,200,200,201,199,199,200,200,201,202,201,199,202,200,198,201,200,199,199,199,200,200,203,200,200,200,200,202,200,202,199,201,201,201,202,199,200,201,201,201,199,200,200,201,202,204,204,203,200,201,202,203,202,203,203,203,204,204,205,202,207,206,205,207,205,205,205,208,206,207,208,205,207,206,208,207,208,207,206,211,206,208,211,210,210,208,210,210,211,207,211,212,208,209,208,210,210,210,212,211,211,209,209,208,210,211,208,210,211,210,210,209,209,211,210,209,211,210,208,209,209,208,209,208,210,210,206,207,205,206,207,206,207,207,204,207,207,205,206,205,206,203,205,207,204,206,205,206,206,203,206,202,199,204,207,206,205,202,206,207,191,201,213,189,198,209,206,199,200,208,195,207,203,194,205,201,205,204,202,204,201,201,203,203,198,195,203,203,205,205,203,204,203,205,201,208,194,199,169,170,216,206,210,158,156,200,201,212,209,217,174,137,100,104,187,207,178,159,179,214,165,128,186,197,206,205,193,195,210,186,175,219,208,196,155,176,131,81,95,97,81,30,70,169,182,194,207,158,203,225,174,149,153,167,181,221,229,223,220,224,243,136,168,249,233,198,141,169,193,150,124,181,216,216,98,76,146,138,162,202,249,151,49,59,72,57,20,23,14,11,19,20,21,19,24,20,21,18,20,17,15,18,17,17,18,18,18,20,37,73,86,101,94,50,60,77,105,99,63,57,55,29,12,11,23,44,76,106,120,107,106,111,96,98,106,102,96,89,83,110,167,206,197,133,89,106,99,116,127,98,114,61,19,24,12,24,24,31,29,61,163,187,131,107,75,54,50,42,51,43,32,35,34,38,47,40,31,44,63,76,77,72,64,46,41,40,34,36,41,43,55,74,59,61,57,46,60,62,60,39,23,18,16,19,36,64,69,70,76,122,128,98,74,75,78,58,40,30,28,32,36,41,46,57,69,75,88,106,124,139,161,149,133,141,125,122,118,92,93,97,85,78,67,64,51,43,37,29,27,19,23,19,44,76,88,104,92,99,92,73,71,59,57,44,41,25,71,119,87,63,25,78,127,84,103,89,129,136,61,44,53,45,31,30,35,44,43,50,53,55,60,57,77,90,84,90,86,87,100,96,85,79,84,84,78,71,55,43,33,30,35,31,33,33,39,29,69,143,139,130,104,89,104,71,16,152,245,244,249,234,235,229,231,236,229,233,232,230,232,232,232,232,230,229,229,229,229,238,244,245,245,245,244,242,243,242,239,240,239,241,240,240,241,239,240,241,243,241,239,240,240,240,241,239,242,241,240,242,236,235,233,234,233,234,236,234,229,118,3,1,4,8,10,9,10,10,11,12,11,11,197,199,198,199,199,197,198,198,199,198,200,200,197,197,197,198,196,195,197,194,194,194,197,193,194,196,193,197,195,195,196,195,195,194,196,194,194,195,195,196,194,196,196,195,193,194,195,193,195,196,196,191,193,194,193,193,195,194,193,193,193,193,193,194,195,192,195,195,193,196,195,195,196,195,197,195,196,197,194,196,196,195,196,197,197,197,194,195,195,196,197,195,196,196,198,198,196,198,199,197,198,196,196,198,197,198,199,199,196,199,200,199,200,198,198,199,199,199,198,196,198,199,199,200,200,201,200,203,201,201,200,199,200,198,199,201,200,199,201,201,199,199,202,204,202,199,199,200,203,201,204,201,200,199,200,200,200,201,200,203,202,203,200,202,202,203,202,199,204,201,203,203,203,204,202,201,202,203,201,202,202,202,204,203,206,205,206,206,204,204,204,208,207,207,208,207,207,205,207,208,207,208,208,208,205,206,211,210,209,208,210,209,208,209,207,210,208,210,210,208,209,211,212,210,209,210,210,208,210,211,208,208,209,211,210,209,210,208,208,207,209,207,210,208,208,208,207,207,205,205,207,205,206,208,204,205,206,205,204,205,207,203,206,204,202,201,201,206,204,203,206,199,199,206,204,205,204,202,205,206,190,195,208,194,194,205,203,196,204,205,193,201,200,192,201,203,203,204,202,206,202,203,203,202,202,196,203,206,205,205,202,203,204,204,203,210,198,195,160,176,218,212,208,156,174,206,203,215,209,210,165,133,105,116,177,167,144,164,196,223,172,125,184,202,207,205,202,201,204,181,177,215,196,199,165,157,103,83,118,107,84,57,71,99,152,222,202,174,196,175,162,157,187,194,201,230,220,226,221,237,227,98,165,244,235,215,160,173,166,131,130,188,219,213,165,134,147,165,182,213,235,95,35,66,47,35,16,21,17,15,22,20,20,18,17,18,17,17,22,20,14,13,20,17,14,15,18,25,59,87,81,82,55,47,81,101,102,66,59,53,24,14,19,29,46,81,100,113,116,108,107,100,94,103,105,103,94,89,128,183,184,143,133,99,81,88,67,98,115,109,122,57,17,22,10,24,33,35,30,71,157,159,119,83,57,44,37,33,40,37,43,45,39,51,45,30,24,34,51,60,61,54,53,44,46,41,35,38,33,45,50,64,61,57,62,55,59,69,67,63,51,35,27,34,72,91,78,77,94,98,86,57,36,35,34,30,29,35,42,68,89,127,158,185,212,203,205,196,185,184,175,151,93,55,74,79,89,113,119,139,133,117,127,131,132,127,97,82,67,53,45,28,34,44,47,70,63,71,71,48,63,76,81,78,62,35,86,117,81,52,51,119,111,90,108,64,102,109,53,52,56,46,39,39,46,56,51,52,57,64,65,66,101,98,63,53,50,61,62,62,61,59,61,69,70,66,62,48,34,30,35,32,36,29,38,24,60,144,145,136,108,89,110,90,45,165,241,241,250,240,239,234,233,234,231,233,230,231,232,232,232,229,231,231,232,233,235,244,245,244,245,244,244,241,241,240,240,240,241,241,239,241,240,241,241,241,241,240,240,240,239,240,239,240,240,240,240,240,239,236,234,234,235,234,236,233,225,117,3,1,4,8,10,9,10,10,11,12,12,12,199,201,201,196,196,198,199,200,198,199,196,196,196,196,198,198,198,196,194,198,198,196,195,194,196,196,195,196,195,196,196,195,194,195,197,194,195,194,193,194,196,194,193,193,193,193,193,191,194,194,193,191,193,194,194,193,193,194,194,193,191,192,193,195,191,194,194,193,194,193,196,194,196,196,194,194,195,196,195,195,196,196,196,194,196,196,194,193,196,195,197,198,199,196,196,199,198,195,194,196,193,194,199,197,196,196,198,198,195,198,196,197,200,200,199,198,200,198,198,199,198,199,200,200,201,199,200,200,200,200,200,200,202,202,200,200,199,201,203,201,201,201,201,201,202,203,201,201,202,202,201,201,200,201,200,199,201,203,201,201,202,202,202,199,201,200,202,203,202,201,203,203,199,202,203,201,203,204,202,203,204,202,203,202,203,204,206,206,202,205,208,206,209,208,205,206,207,208,208,208,208,210,206,207,208,207,208,205,208,206,208,207,206,210,208,207,207,208,208,208,208,207,208,207,209,210,208,210,209,208,208,209,209,210,209,208,208,208,208,206,208,207,208,206,206,206,206,207,204,206,206,208,207,205,204,204,203,205,204,203,204,200,203,204,202,204,202,201,203,202,202,198,200,205,202,206,205,204,203,206,195,188,208,199,193,203,198,199,205,205,193,202,205,192,198,206,207,201,201,204,205,203,202,206,203,194,197,201,203,204,203,203,204,202,201,210,202,184,153,174,211,214,199,165,183,203,205,212,208,197,148,114,93,82,143,161,157,185,207,224,180,130,171,196,203,212,203,200,211,180,174,201,176,207,137,83,81,86,133,112,73,65,61,130,214,234,208,170,129,164,165,146,181,163,203,230,214,229,221,247,198,81,183,236,235,225,185,164,124,151,158,188,212,216,204,160,184,213,196,231,191,66,62,55,24,18,22,22,15,22,18,19,21,20,17,16,19,15,17,18,18,18,15,19,19,19,16,41,85,84,66,52,46,71,97,98,68,60,53,27,16,77,83,68,87,115,114,120,117,111,99,98,98,100,109,93,93,107,160,202,127,72,83,89,94,98,72,91,113,122,122,41,14,19,14,38,35,35,33,93,171,133,89,67,48,42,42,42,49,50,42,48,49,37,31,22,23,33,39,57,51,48,44,44,46,33,34,35,34,39,45,53,48,46,55,60,55,63,68,73,72,59,64,86,103,81,63,52,52,63,59,45,24,18,17,27,48,72,129,191,217,214,200,191,165,142,135,87,73,58,87,107,54,38,34,33,62,39,77,88,46,50,53,56,84,128,150,164,160,147,115,75,63,44,32,42,29,30,27,26,41,44,59,69,71,68,103,93,77,91,97,136,112,93,88,45,102,101,41,46,42,44,46,45,55,55,55,61,67,69,59,50,73,75,46,47,45,46,51,51,54,53,51,52,54,62,58,51,44,33,32,30,35,29,36,28,51,144,147,133,106,89,113,98,58,143,210,229,250,250,244,239,236,232,227,233,231,229,231,229,230,229,229,236,239,242,244,245,245,244,242,241,240,240,242,239,239,239,237,239,238,239,241,240,240,238,240,240,238,239,239,239,239,236,239,239,237,235,230,230,228,230,233,233,234,233,229,118,3,1,5,9,9,9,12,10,11,12,12,12,200,204,201,199,199,200,200,198,198,199,197,199,196,198,198,196,198,197,198,199,199,199,198,195,195,198,198,197,196,195,199,198,195,194,196,197,198,194,194,198,195,193,193,195,194,196,196,194,196,194,195,192,193,194,194,195,193,193,194,193,193,194,195,193,194,193,190,194,193,193,196,194,195,194,196,196,197,198,197,196,197,198,195,196,195,195,197,197,197,195,198,198,197,197,194,196,197,196,197,196,198,197,196,199,198,195,198,198,197,199,200,198,198,198,198,199,197,199,199,201,203,199,199,201,198,201,201,199,200,202,201,200,201,201,202,201,202,201,200,202,202,202,202,200,203,201,201,201,201,201,202,200,202,202,203,203,202,202,202,201,200,203,200,202,202,201,201,203,205,202,204,202,202,202,203,203,203,206,204,203,204,207,208,207,203,206,207,206,208,205,208,209,208,209,208,209,208,208,209,209,207,209,208,208,208,209,208,204,208,207,208,208,208,209,206,208,210,208,208,208,209,209,210,208,208,208,208,207,209,209,209,210,206,207,206,207,207,205,208,210,207,205,208,206,207,207,205,205,205,205,206,205,204,205,205,204,203,204,203,202,203,200,200,198,203,206,202,204,203,205,202,194,203,203,203,203,205,206,203,207,200,190,206,203,194,199,199,201,207,206,193,200,209,197,195,202,207,206,204,205,202,206,204,202,208,199,195,201,207,208,204,203,201,203,202,207,204,173,152,178,208,214,188,155,183,200,205,214,196,190,128,93,88,89,169,175,182,200,207,218,170,123,163,201,203,200,203,205,211,184,183,182,165,196,132,110,75,71,114,86,77,71,71,169,250,220,164,158,146,203,170,127,150,156,214,222,214,225,223,248,170,102,207,234,238,235,200,165,139,169,169,182,178,186,181,167,213,221,212,225,180,87,67,35,8,19,17,22,22,19,20,19,19,19,19,20,16,14,16,17,16,15,16,15,15,23,31,61,88,71,47,50,69,88,88,65,61,55,27,15,74,154,173,138,106,108,110,120,110,96,95,103,98,101,98,98,103,100,117,118,68,54,88,87,103,107,81,98,107,127,109,29,19,19,25,41,29,28,26,56,92,68,54,47,39,38,43,46,47,54,55,48,39,30,21,22,27,36,37,44,48,39,39,38,32,35,35,27,31,37,45,53,52,53,53,53,51,54,59,61,66,72,80,90,80,52,42,34,31,30,31,20,17,37,59,103,160,190,187,169,148,111,77,54,50,49,57,61,56,57,68,74,62,56,58,46,49,58,48,49,52,45,44,46,46,49,55,63,86,122,141,155,142,99,71,53,35,26,14,19,27,23,32,44,56,76,82,83,93,105,118,122,95,80,59,48,81,57,43,49,40,37,43,53,56,56,57,69,66,61,53,43,45,39,44,51,47,48,47,40,46,46,46,50,53,59,63,61,56,47,36,29,39,30,36,34,49,136,151,128,107,87,110,100,70,97,133,190,240,244,250,250,242,233,231,231,230,229,229,229,229,227,233,239,245,245,245,247,247,245,243,243,242,241,243,242,240,239,237,239,239,239,239,240,240,240,239,237,237,239,237,238,236,237,238,236,236,234,231,228,229,230,232,232,236,236,229,118,3,1,5,9,9,9,10,10,11,12,13,12,198,199,198,199,197,200,198,198,200,196,198,200,197,198,198,196,196,196,195,200,199,196,196,194,197,195,193,198,196,199,201,196,195,194,196,196,194,194,195,194,193,193,194,193,193,196,197,195,194,194,194,192,194,194,192,193,193,191,190,193,194,193,193,194,191,194,194,193,193,192,196,193,194,198,194,196,196,195,196,196,197,194,197,196,196,198,198,197,196,197,198,196,195,196,195,196,196,194,195,193,196,199,197,196,196,197,196,197,197,197,198,197,198,200,198,198,199,199,198,200,199,200,201,199,202,200,200,203,200,200,200,199,202,201,200,203,204,202,202,202,200,200,201,202,200,198,200,202,200,199,202,201,201,204,204,203,202,204,201,204,203,201,203,200,203,203,202,202,203,203,204,203,203,207,205,202,205,206,203,206,205,205,206,206,208,204,205,206,207,206,206,205,206,208,208,211,208,207,209,210,208,208,207,209,209,208,210,208,209,207,208,208,207,208,209,209,208,209,209,207,207,209,208,206,209,209,207,207,208,207,208,205,207,207,207,207,208,207,206,205,206,208,206,205,207,207,207,205,204,206,203,205,202,203,205,201,203,203,200,201,204,202,201,203,203,202,203,202,203,203,198,199,204,203,202,202,201,200,201,206,201,187,203,207,194,195,200,206,205,208,197,197,208,202,193,195,205,205,204,203,202,202,203,205,205,203,198,195,203,205,202,203,204,204,200,207,208,160,153,184,207,216,176,154,188,201,208,210,196,189,130,107,93,79,153,167,195,206,208,211,160,130,167,212,213,207,198,198,213,185,176,175,184,209,155,172,123,81,96,70,87,83,58,177,244,153,173,169,179,245,148,144,173,185,235,214,216,218,227,242,147,129,212,227,238,233,218,190,173,178,172,182,157,178,159,162,203,204,202,199,153,92,55,7,9,16,11,22,24,19,18,19,20,18,18,17,16,18,18,21,14,16,18,17,16,23,53,72,83,56,41,77,96,92,64,62,51,33,7,44,145,194,215,139,74,97,106,116,105,92,101,104,97,92,95,103,107,98,85,81,91,130,127,90,101,101,83,112,113,131,100,26,22,19,24,27,15,19,21,33,53,50,46,38,31,36,41,50,56,62,57,36,33,23,36,55,43,48,46,46,48,46,31,35,35,29,33,29,27,37,35,43,50,54,57,49,51,50,50,54,55,63,67,59,43,33,34,23,16,17,24,46,78,149,203,205,170,126,80,55,49,51,52,51,56,53,52,53,50,42,36,49,76,79,69,65,61,67,63,62,65,55,47,39,37,44,53,61,61,69,66,78,113,127,134,104,70,59,37,26,23,14,23,27,35,53,65,66,71,83,78,73,64,57,55,48,51,44,43,50,27,29,42,55,57,59,69,64,52,55,48,48,39,31,40,40,45,50,49,51,48,46,44,52,55,57,63,64,62,49,37,37,34,35,35,36,36,116,156,129,103,84,107,93,86,114,90,98,173,224,243,244,233,229,230,230,231,228,227,227,227,228,234,243,245,244,245,245,244,244,244,244,244,241,239,239,239,238,237,240,238,238,240,238,237,238,238,237,237,237,237,238,236,234,233,232,233,233,232,231,229,231,232,234,236,237,230,117,3,1,4,8,10,9,12,10,11,12,13,13,194,199,198,200,200,198,200,196,197,197,198,200,196,198,198,198,199,196,198,196,195,196,196,198,195,196,198,196,198,197,198,195,195,194,192,193,195,195,196,193,191,193,193,193,194,193,191,196,194,192,195,194,195,195,191,193,194,192,193,192,191,193,193,194,195,194,193,195,194,196,198,197,196,195,196,196,195,196,196,196,198,198,198,198,194,196,198,198,196,193,195,194,196,198,197,196,195,196,194,196,198,196,198,196,197,198,195,198,196,195,198,198,196,198,198,198,198,200,198,202,204,200,203,203,202,203,200,201,204,204,201,202,203,200,201,201,205,203,202,205,201,201,201,201,203,202,203,203,203,202,203,201,203,204,202,203,203,203,205,205,203,204,202,201,203,203,201,205,203,203,203,203,207,204,205,205,207,206,205,205,205,206,206,206,205,206,207,205,208,208,207,207,208,207,207,209,207,208,207,208,208,208,210,208,207,208,211,210,211,208,212,208,206,207,207,209,208,208,208,208,207,207,209,208,208,208,208,208,206,207,207,206,207,207,208,205,207,207,206,206,205,205,206,206,205,206,204,204,206,205,207,203,201,201,204,204,201,203,200,201,204,200,202,202,200,199,198,202,200,201,198,198,203,200,202,200,202,202,201,202,202,187,198,208,192,192,203,203,205,206,194,196,205,203,195,191,202,205,204,202,200,205,201,201,206,202,200,196,199,207,202,202,202,201,202,204,208,155,160,191,206,213,172,166,191,198,213,212,192,199,142,115,81,74,143,160,206,198,212,206,164,154,165,212,212,214,213,199,204,182,172,167,213,215,180,187,124,133,131,95,93,76,51,142,215,178,210,188,200,217,155,191,198,214,235,212,219,212,233,214,139,159,211,225,232,230,225,200,194,182,166,175,169,189,171,178,213,209,180,135,93,46,23,8,9,14,12,21,25,23,17,19,19,16,19,17,18,16,13,17,18,17,19,21,25,46,60,75,59,46,74,103,101,69,60,55,25,15,15,74,134,141,162,87,50,99,98,107,101,100,111,104,93,99,101,101,109,91,84,136,171,154,122,96,107,100,86,110,125,137,83,17,23,19,14,20,17,22,12,19,38,40,44,40,44,65,73,70,58,45,35,30,26,42,79,81,65,63,57,53,45,39,32,31,30,28,33,31,25,27,27,36,36,46,54,41,45,42,41,41,47,53,46,40,32,18,18,17,29,55,98,172,218,204,157,104,57,49,58,51,53,42,40,34,27,29,21,24,21,17,20,46,70,55,57,56,57,59,50,54,51,50,33,20,27,25,35,43,49,67,70,77,74,74,83,102,114,101,84,67,47,32,18,16,19,32,48,53,57,58,55,51,54,50,45,49,46,36,33,27,23,35,54,55,54,58,54,54,44,49,48,37,36,28,36,45,39,48,51,57,55,48,46,48,46,47,51,51,56,49,44,45,39,36,36,38,28,99,155,130,107,92,97,89,102,108,92,72,78,117,134,182,217,226,230,224,229,227,225,229,228,232,240,244,243,239,238,242,244,244,244,244,243,241,239,238,237,236,237,239,238,238,240,239,237,238,238,235,236,236,235,235,233,233,231,228,229,230,230,232,233,232,233,234,236,235,229,117,4,1,4,9,10,9,11,10,12,12,13,13,196,200,199,198,199,200,197,196,199,197,198,198,194,198,198,196,198,197,196,196,195,196,197,196,197,197,198,196,194,194,194,194,193,193,194,193,192,193,195,193,194,193,193,194,194,192,193,193,193,195,195,193,194,193,194,193,194,195,191,192,193,194,192,194,192,195,196,195,195,194,198,194,197,198,195,195,196,197,194,199,196,195,197,196,196,194,197,195,194,195,193,193,194,196,196,194,194,195,196,194,198,195,195,199,198,198,198,200,198,198,198,195,196,197,199,198,200,198,200,202,201,203,203,202,200,203,203,203,201,202,203,203,203,203,205,201,200,201,201,202,202,204,202,203,205,204,203,203,203,203,205,201,201,202,202,200,201,203,202,202,204,203,201,201,203,201,203,203,203,206,204,205,203,204,206,204,203,206,206,207,204,206,205,207,209,205,206,205,206,207,207,208,206,208,206,207,208,208,207,208,208,209,211,208,208,208,208,209,209,210,210,208,209,208,209,210,207,208,208,208,208,209,208,206,206,206,205,208,207,206,207,204,207,208,203,205,207,206,206,204,203,206,204,205,205,205,206,201,204,203,204,206,200,203,204,200,201,200,201,200,198,201,199,201,200,201,201,197,201,200,192,198,201,199,200,200,201,199,199,203,203,188,192,206,193,191,200,204,199,203,193,192,208,204,198,190,197,203,201,203,201,201,202,203,202,202,203,199,196,200,201,203,200,203,200,206,210,153,174,190,201,208,170,168,185,202,213,194,185,175,119,119,83,91,159,181,216,190,217,196,165,157,139,201,209,212,212,207,218,185,163,179,229,198,143,149,106,139,149,127,121,83,29,135,208,201,230,151,205,206,152,193,188,221,226,212,218,208,239,188,139,181,208,226,225,226,228,205,200,175,145,154,165,196,191,208,207,229,188,79,44,16,8,12,12,12,13,18,25,27,25,17,19,17,15,20,15,18,13,14,17,15,14,24,36,49,80,60,51,71,91,98,68,60,54,29,14,14,46,110,63,45,113,69,63,119,113,115,119,116,117,99,97,105,100,101,90,71,101,174,179,131,106,94,110,98,79,118,124,127,70,10,24,15,19,25,19,19,20,19,30,37,56,85,83,79,68,52,35,29,33,48,71,95,126,110,75,82,66,37,31,27,32,29,30,29,27,30,25,25,21,23,31,35,42,35,29,28,29,29,31,29,31,27,23,20,27,54,113,194,227,200,142,77,44,49,57,50,42,36,24,21,16,17,17,13,16,17,15,14,17,40,59,50,67,84,84,78,59,46,42,32,18,13,17,16,17,25,30,34,43,55,68,80,83,73,68,83,102,111,92,68,47,27,17,18,30,37,41,41,35,44,41,40,51,41,32,29,24,24,24,48,47,42,57,42,54,44,40,66,43,31,45,56,56,50,48,43,39,47,54,54,46,43,43,43,48,46,49,47,44,47,43,47,39,40,28,86,153,135,120,99,92,79,101,122,106,90,69,56,87,178,227,230,228,223,226,224,229,230,232,238,243,243,234,234,238,239,239,242,238,237,237,236,236,235,236,236,237,239,238,237,236,235,235,234,234,234,235,233,232,234,232,232,230,228,226,227,231,230,231,232,232,233,233,232,227,118,4,1,5,9,9,9,12,10,12,12,12,11,195,203,198,197,198,196,201,198,198,195,195,198,196,199,196,193,196,195,198,196,194,196,195,195,196,197,198,198,198,196,196,197,193,195,195,193,193,191,193,192,192,191,193,193,194,192,193,195,192,193,194,193,194,193,192,192,190,191,193,194,193,195,195,192,192,191,194,195,193,196,191,193,194,193,196,194,196,195,193,193,193,195,195,197,194,193,193,194,195,194,195,194,194,195,194,194,193,195,194,198,199,196,198,198,200,199,199,199,198,201,199,199,199,197,200,201,200,202,200,201,203,200,203,203,202,204,203,203,202,199,202,204,204,202,201,202,202,204,202,202,201,202,202,203,202,203,202,200,205,202,202,200,203,203,199,202,203,200,201,203,203,203,202,203,203,202,203,205,204,203,203,206,205,204,206,203,206,206,208,210,205,208,207,207,207,208,208,205,207,206,205,206,207,208,207,210,209,211,209,209,209,208,209,209,210,209,210,209,210,207,208,212,208,209,208,207,209,208,206,208,206,208,208,204,207,206,206,207,206,208,206,207,208,204,206,204,204,205,205,204,203,205,202,204,202,203,203,201,202,199,204,201,201,203,200,203,201,201,200,198,201,198,200,200,199,202,201,200,200,199,196,199,201,199,200,201,200,201,201,199,206,191,191,203,191,190,199,202,204,203,192,193,201,201,201,191,191,201,202,203,201,200,200,198,202,201,204,199,191,200,202,204,199,201,202,208,206,155,185,194,198,202,157,171,194,203,208,181,149,137,128,141,101,87,155,207,225,191,221,188,162,155,122,195,206,207,212,203,223,188,165,189,214,135,107,129,96,123,132,140,140,107,51,131,201,208,167,139,230,195,127,145,182,224,222,217,214,214,237,160,159,188,210,237,217,225,230,213,204,172,134,128,164,197,191,141,131,173,128,59,22,7,8,12,20,17,17,19,22,24,24,27,17,19,15,14,18,15,16,18,15,19,19,29,46,69,63,51,85,99,90,68,60,56,30,17,16,20,96,110,39,59,109,104,126,151,149,145,117,112,117,108,110,107,101,93,101,125,137,152,137,108,107,101,109,89,81,115,128,120,50,14,17,15,22,22,23,33,39,34,25,37,94,95,76,69,53,51,43,52,73,96,110,125,132,94,52,44,39,24,26,29,26,31,27,26,24,18,17,16,17,17,18,27,32,30,29,34,38,44,49,48,56,61,61,65,124,203,224,200,135,71,46,53,53,45,31,19,21,12,19,18,17,17,14,18,13,16,14,20,15,37,53,61,93,102,112,75,58,45,31,22,10,16,13,16,17,15,18,17,23,32,35,45,59,69,77,78,67,73,103,110,88,67,41,27,30,39,34,30,31,29,35,38,41,33,28,21,17,22,24,30,31,43,46,53,49,40,73,62,38,72,96,94,87,69,57,50,37,36,35,49,51,44,45,37,39,36,41,38,26,31,37,50,45,47,33,73,162,148,123,109,90,79,98,116,118,101,112,190,217,243,243,227,232,227,229,227,230,237,235,240,242,236,236,237,240,243,241,239,236,235,235,234,234,236,237,237,236,235,236,235,234,234,233,232,232,232,231,230,233,233,233,231,226,224,223,224,225,228,228,230,230,229,229,229,228,117,3,1,6,9,9,10,12,10,11,12,11,12,196,199,198,198,197,196,198,198,198,195,198,198,197,195,196,198,198,195,195,197,196,197,198,196,198,196,196,196,197,196,194,195,192,192,193,191,193,191,193,190,190,195,190,191,192,191,191,192,193,189,192,191,191,194,189,190,191,189,192,193,191,192,193,197,193,191,191,193,193,194,191,190,193,190,191,192,193,193,192,192,191,193,195,193,194,193,194,196,194,197,195,197,194,194,195,193,194,196,198,195,198,199,199,199,198,198,198,199,197,199,199,198,199,199,200,199,200,200,201,200,202,201,200,203,200,202,202,201,203,202,202,204,202,202,202,204,203,202,204,203,201,202,203,203,201,203,203,200,203,201,202,203,202,202,202,204,202,203,201,201,206,203,203,204,202,204,206,202,202,204,202,205,205,204,205,205,207,207,205,204,205,206,206,208,208,207,208,207,210,209,207,207,208,208,210,212,210,210,210,211,208,208,210,210,211,212,212,211,212,211,209,209,210,209,210,211,208,208,208,205,207,205,207,207,207,207,206,208,204,208,209,205,205,205,204,203,203,202,203,202,203,206,203,204,202,200,203,201,203,202,201,200,200,204,201,199,202,200,199,200,198,199,200,200,198,197,199,200,200,196,196,201,200,199,200,199,200,200,201,201,203,194,187,197,194,192,196,201,201,203,196,188,201,203,203,197,189,199,202,199,200,200,200,201,201,202,200,200,196,194,198,203,199,202,197,208,199,147,198,195,200,198,162,180,192,207,204,183,160,146,149,159,111,98,179,226,226,202,227,188,176,174,135,199,203,206,211,201,220,171,160,194,180,126,119,123,88,121,119,112,134,115,77,136,176,197,174,163,234,139,93,155,196,231,220,222,214,224,228,145,184,193,210,239,213,223,226,224,210,181,142,150,186,186,137,74,33,67,66,31,22,9,14,14,20,16,17,20,20,24,27,26,19,17,15,17,16,14,20,12,15,17,27,43,57,62,49,77,100,99,71,57,55,33,16,10,22,40,107,90,30,88,119,98,107,113,112,90,71,77,98,124,121,107,92,92,162,206,145,106,100,97,114,104,102,87,80,124,141,110,34,12,16,21,36,35,45,49,38,29,14,38,84,78,67,59,62,63,60,63,60,77,74,49,45,47,37,31,20,20,25,24,28,28,32,30,34,38,43,46,55,67,66,72,79,84,104,136,160,169,176,182,187,176,170,168,187,198,139,119,46,48,53,41,28,17,13,15,15,17,15,19,15,16,17,15,15,17,19,16,15,31,48,61,67,61,65,53,40,40,23,16,15,14,16,15,17,17,16,18,16,19,19,27,35,46,54,69,77,74,64,80,119,117,93,83,74,69,95,54,51,51,41,37,40,36,29,20,15,21,16,22,30,31,44,48,39,50,56,40,69,112,139,131,104,92,75,61,51,41,34,34,44,46,39,39,34,27,32,27,24,21,18,30,39,53,34,62,159,155,127,107,92,84,92,117,114,100,141,236,250,250,242,227,229,225,229,229,234,239,242,240,241,241,235,239,243,243,240,239,239,237,235,237,237,237,238,235,235,235,233,233,234,230,229,230,230,230,229,230,232,230,231,227,224,226,221,222,223,224,225,229,230,229,232,233,229,117,4,1,4,9,10,9,11,10,12,12,12,12,196,202,198,196,200,196,197,196,198,196,199,198,195,196,194,196,196,197,198,197,198,198,199,198,198,196,196,196,196,194,193,194,192,193,193,194,191,193,191,189,193,192,191,190,194,191,191,192,190,192,192,191,191,194,192,193,193,191,192,191,193,192,191,192,193,192,193,193,192,195,193,193,193,190,193,193,192,193,193,194,194,192,193,194,193,194,194,193,194,195,196,195,194,193,193,196,195,197,197,196,199,200,200,198,202,199,198,200,202,201,195,198,198,200,202,200,199,198,200,202,204,201,200,200,202,203,200,202,202,202,202,202,202,200,202,205,202,200,202,203,204,202,203,203,201,203,202,200,204,202,203,203,203,205,200,203,202,199,203,203,203,201,200,202,202,203,205,205,203,204,204,205,205,205,206,206,206,204,204,207,204,208,207,207,207,207,206,207,210,209,207,210,210,210,212,210,209,210,211,212,211,213,213,211,211,208,211,210,213,211,211,213,209,211,211,211,212,209,211,210,208,210,209,208,210,209,211,210,207,208,209,207,206,202,204,204,202,205,203,202,202,203,203,202,203,202,200,202,202,200,200,200,201,200,198,199,198,198,200,199,198,199,199,200,198,199,201,200,199,194,197,200,199,196,198,200,199,198,199,198,202,196,184,191,196,195,196,202,200,201,196,192,200,201,202,200,189,195,201,199,199,200,203,200,201,200,201,201,198,193,194,202,198,204,198,211,188,150,204,195,193,194,169,174,193,210,203,196,172,157,147,139,103,122,218,248,239,212,235,193,189,192,141,200,206,204,211,206,218,155,162,205,199,151,161,182,106,96,105,119,122,81,74,118,181,213,184,207,180,95,113,188,228,230,219,225,210,236,203,139,210,171,211,240,211,222,219,229,222,193,160,178,209,181,77,39,53,38,33,24,21,23,17,19,21,16,22,18,19,21,24,27,17,21,15,15,17,12,15,20,19,21,28,48,55,51,66,94,90,73,63,55,33,14,13,19,37,64,108,56,47,117,90,60,48,41,54,48,45,45,51,95,117,100,82,91,155,162,108,78,76,80,86,83,94,78,80,123,137,95,25,9,13,23,34,36,33,28,18,21,16,18,47,41,41,33,28,38,36,34,39,45,43,39,44,48,47,46,49,60,66,70,76,81,98,124,146,169,170,175,187,191,191,181,177,178,174,185,185,175,160,149,134,121,113,85,68,55,51,53,46,39,29,16,13,16,15,15,17,13,22,17,15,21,18,19,12,17,17,16,17,26,50,52,48,40,55,42,34,32,13,13,14,15,16,17,19,17,15,16,22,21,14,21,19,27,35,41,55,71,79,78,75,111,131,139,156,145,157,146,139,144,125,105,93,77,65,57,43,39,40,31,32,36,26,35,30,36,36,32,59,79,99,106,96,70,72,84,74,65,53,47,47,47,50,51,40,30,26,27,23,20,21,23,27,50,49,49,148,153,122,117,94,90,95,108,109,99,102,157,221,242,242,234,230,223,226,230,235,241,241,240,241,238,240,244,244,245,241,239,241,239,238,238,238,239,237,235,234,230,231,231,230,231,231,230,228,228,227,229,229,227,227,227,227,224,224,225,221,223,223,227,229,231,235,237,231,116,3,1,5,10,10,9,12,10,12,12,12,13,199,201,200,198,198,196,198,199,198,199,198,198,197,198,196,195,197,192,195,195,194,196,199,196,199,198,198,198,195,196,193,195,192,194,194,194,193,191,191,192,192,192,193,191,190,192,193,194,191,192,193,192,192,193,192,191,193,191,192,191,191,192,191,191,192,192,191,194,191,191,190,192,194,191,191,192,193,192,194,191,193,192,192,192,190,193,192,193,193,194,194,197,195,194,195,194,195,196,198,195,197,197,197,199,198,199,199,200,198,199,200,198,199,200,200,199,199,201,204,202,200,199,201,202,199,202,202,200,202,201,201,201,201,201,200,201,199,202,204,203,203,203,202,203,203,204,202,201,205,202,201,203,203,204,202,202,202,202,202,203,205,203,202,203,203,201,203,204,203,204,203,204,204,205,205,205,206,204,207,210,208,207,206,208,207,208,208,208,210,208,210,211,211,210,211,212,212,210,212,212,212,214,211,208,208,209,209,211,211,209,211,210,209,211,211,210,210,210,210,210,209,207,208,207,210,211,210,210,207,211,207,208,207,203,204,202,204,201,205,204,201,202,201,203,202,200,201,199,201,202,201,200,200,200,202,200,200,203,198,198,199,200,200,195,198,198,198,200,196,193,197,198,198,198,199,198,198,199,198,196,199,200,179,188,205,196,193,199,199,201,195,188,199,200,199,198,190,190,200,202,201,200,199,200,200,197,199,200,197,195,191,200,196,200,199,213,178,141,204,191,189,182,158,182,196,200,192,164,121,117,119,124,112,132,210,216,194,199,228,190,189,191,132,190,204,203,208,203,206,158,193,217,209,168,185,191,118,137,112,105,112,90,83,148,223,211,194,186,172,133,148,220,229,227,218,220,211,237,172,141,219,145,203,235,212,221,211,229,224,194,181,186,212,172,59,47,57,41,19,13,24,19,19,23,18,17,22,19,16,19,22,23,18,20,14,12,16,17,19,17,19,27,46,48,42,66,92,92,68,60,56,31,16,14,19,33,61,93,108,70,94,124,71,68,71,57,57,45,33,39,29,45,88,99,98,86,108,113,81,84,81,77,90,75,83,68,70,118,118,77,27,28,28,30,42,40,36,36,49,47,46,50,53,57,61,56,64,64,69,81,81,80,90,121,135,147,150,166,173,175,178,167,92,157,164,174,169,164,123,126,137,127,98,81,67,56,57,57,59,64,60,56,62,60,53,49,48,48,43,42,37,17,14,15,13,16,14,15,19,17,16,17,14,20,19,15,21,19,17,19,14,30,61,57,41,41,39,41,36,17,16,18,15,16,16,16,18,19,16,16,14,18,18,15,15,20,19,23,34,55,63,77,86,74,71,64,66,73,113,101,124,137,138,146,155,151,153,150,150,139,110,87,69,66,57,51,49,37,52,36,41,45,48,71,63,39,44,51,59,66,69,65,56,55,55,67,62,43,38,27,28,30,24,20,25,51,42,43,131,150,128,113,99,93,92,110,103,101,95,89,164,235,249,248,231,223,226,233,244,250,250,250,251,251,251,251,250,250,247,239,236,234,232,236,235,234,231,230,229,229,228,227,229,228,229,229,229,226,225,226,228,227,227,225,224,225,222,222,222,224,225,227,229,233,235,235,231,118,3,1,5,9,10,9,12,10,12,13,13,12,198,203,200,199,199,198,198,198,201,196,198,197,195,198,198,199,199,197,197,196,198,197,196,197,197,195,195,195,195,195,192,194,193,191,193,192,191,193,192,192,192,191,193,190,193,193,193,193,192,195,192,193,192,190,192,190,190,191,193,191,191,191,191,192,190,191,190,188,193,193,191,194,193,190,192,190,192,195,191,193,191,192,193,190,194,193,196,196,192,193,194,195,196,195,194,196,196,197,194,196,199,198,198,197,199,198,200,199,198,199,197,201,201,201,200,200,200,198,201,199,200,201,201,201,201,200,201,203,200,202,200,202,203,203,204,203,203,202,203,203,202,202,202,202,203,201,201,201,203,203,202,202,202,204,202,205,204,203,205,202,205,205,202,206,205,203,204,205,203,205,205,201,205,205,204,207,205,206,207,207,205,207,211,208,208,210,208,212,210,210,211,211,212,212,214,213,213,213,214,212,211,211,208,210,209,208,209,209,212,210,211,210,210,211,208,211,211,208,208,206,207,207,207,208,210,210,209,208,208,210,211,210,208,207,208,206,206,207,207,206,204,204,203,204,204,202,204,205,203,206,204,205,204,203,204,201,203,200,202,203,199,198,200,200,197,200,200,200,194,193,197,198,199,197,198,198,196,197,199,196,202,199,178,190,203,198,193,199,199,200,198,188,197,198,199,201,193,188,196,203,201,199,199,198,198,196,200,198,199,196,191,199,198,202,198,211,165,141,205,196,186,177,170,185,200,197,176,136,90,105,115,126,128,123,138,125,124,165,226,182,181,189,127,186,206,202,209,204,188,160,204,195,170,114,176,196,107,138,139,146,99,47,78,171,233,190,176,176,185,171,188,236,229,229,221,219,215,236,142,167,225,129,215,230,217,223,209,226,226,203,195,171,158,133,51,61,45,15,19,13,19,22,25,24,22,18,19,22,19,16,17,17,23,29,18,16,17,15,17,19,21,36,46,42,63,98,99,78,63,57,35,18,13,16,34,61,92,117,133,97,106,107,77,99,78,55,63,43,42,47,33,32,66,106,147,150,160,150,139,147,134,139,136,135,139,118,118,127,118,89,104,135,141,156,152,148,150,163,178,186,179,182,178,175,181,172,176,168,170,166,160,139,143,158,150,149,143,138,136,124,107,99,77,66,66,69,65,64,56,53,58,61,63,57,57,59,57,57,57,61,63,59,60,54,47,46,43,40,35,26,16,23,22,18,21,15,15,17,18,17,16,19,17,16,16,23,17,18,19,18,16,41,92,81,64,54,49,51,32,15,13,16,15,16,18,17,19,21,17,18,16,17,17,18,18,19,22,25,19,25,46,59,68,74,76,66,71,63,66,68,59,66,57,62,68,67,86,100,125,152,148,143,153,151,150,140,120,102,83,73,67,56,53,57,50,39,36,32,37,36,42,51,55,52,57,69,67,60,38,30,31,33,31,30,55,53,41,30,107,149,116,113,103,93,97,104,104,116,110,92,162,224,234,246,235,229,242,248,251,251,252,252,252,252,253,253,252,252,251,237,231,229,230,231,227,226,225,226,227,225,229,229,226,228,229,225,226,227,226,225,227,227,225,225,224,224,223,225,226,227,228,229,232,234,236,234,228,117,3,1,5,9,9,9,11,10,11,12,12,13,201,205,200,198,199,194,196,198,198,198,195,198,196,197,198,199,199,195,197,198,198,199,199,195,194,194,195,194,191,194,193,195,195,193,192,191,192,190,190,191,191,191,191,191,191,191,189,191,191,188,189,190,193,191,190,192,192,190,190,189,191,191,188,191,189,189,191,191,191,192,192,190,193,191,191,192,193,191,194,191,193,192,192,194,193,193,196,195,195,194,194,196,196,199,196,195,198,197,196,194,194,197,195,197,199,199,200,200,198,199,201,200,201,200,199,200,202,200,199,200,200,200,202,200,199,200,200,200,201,201,201,201,201,200,200,203,202,201,201,199,201,202,202,201,200,202,204,202,203,203,203,204,200,203,203,202,204,203,204,202,204,203,200,202,203,202,203,202,202,206,205,204,206,205,206,204,206,203,207,207,205,210,208,208,208,210,211,211,212,208,211,212,212,212,213,213,213,213,212,212,212,210,210,209,208,208,208,208,208,207,209,211,207,208,210,210,210,209,210,210,210,208,208,207,207,208,208,209,209,212,210,208,207,205,206,207,209,207,208,207,206,208,207,207,206,205,208,207,207,207,208,207,208,207,206,205,205,206,205,207,202,203,203,201,201,202,199,200,195,194,200,199,199,199,199,198,198,198,196,199,201,196,182,188,201,199,196,198,198,200,196,189,196,199,198,200,201,188,193,198,198,200,196,198,198,199,200,198,198,199,192,192,198,199,200,206,162,150,199,196,182,181,171,177,198,193,190,143,101,111,101,121,116,97,94,79,81,145,219,180,183,199,135,177,204,200,212,202,172,172,182,141,148,89,179,200,103,109,99,136,129,90,77,168,186,164,193,178,188,151,185,232,221,230,222,220,226,225,128,193,225,131,224,226,218,226,210,224,217,210,181,93,82,84,54,59,13,16,25,13,18,19,21,24,22,21,24,19,19,16,15,21,19,25,18,17,19,17,16,24,34,44,41,49,90,101,84,66,56,40,16,14,17,37,63,84,113,122,101,83,103,110,83,90,88,73,89,75,61,64,52,49,71,100,132,133,134,120,116,122,113,119,119,117,126,111,110,107,92,91,119,147,148,162,158,147,141,151,151,146,147,141,128,128,125,110,98,86,87,75,66,63,57,61,62,65,63,63,62,62,68,65,61,70,63,69,67,63,56,56,61,55,54,49,46,44,44,42,39,39,32,37,36,24,26,27,29,35,35,39,41,45,46,47,36,16,16,13,20,19,14,18,16,22,20,17,18,21,19,21,20,72,132,119,97,84,78,77,47,15,14,13,16,21,21,17,21,17,19,18,18,19,18,18,23,42,57,55,48,32,31,39,37,41,47,50,54,55,60,63,66,66,72,71,66,66,63,59,63,57,63,65,62,83,94,115,125,89,135,130,127,121,121,106,89,86,71,62,56,53,54,48,48,53,59,61,53,50,34,18,26,29,39,52,54,47,33,21,84,127,118,107,102,95,89,110,106,120,119,99,130,134,146,211,229,238,248,248,246,246,231,226,233,240,247,247,230,235,247,224,226,227,225,224,221,222,220,222,222,224,224,224,229,227,229,228,226,229,226,225,223,223,223,224,226,226,227,228,228,230,230,229,230,233,232,230,228,117,4,1,5,9,10,10,10,10,12,12,13,12,202,206,203,202,201,198,197,199,200,200,199,199,200,199,198,195,197,198,198,199,196,194,198,195,194,195,198,194,194,194,193,195,196,194,193,193,193,193,193,190,191,190,190,191,189,191,192,190,190,192,189,193,192,192,193,191,190,189,191,188,189,191,192,189,191,191,191,192,191,192,190,193,191,190,194,190,191,193,190,193,191,192,195,192,196,196,196,196,195,197,196,198,199,198,198,198,196,199,198,196,197,199,198,197,199,198,199,199,202,200,200,199,200,200,198,200,199,200,201,198,202,202,202,201,201,200,200,202,202,202,201,203,202,200,202,202,202,203,202,205,203,206,206,202,207,207,206,208,206,208,207,206,206,206,204,203,203,203,205,203,205,206,203,201,204,206,206,205,203,205,204,202,204,205,206,207,207,206,206,208,209,208,208,210,210,211,212,213,212,212,212,210,211,212,212,214,213,213,213,212,213,210,207,208,209,208,211,208,207,210,208,208,210,209,208,209,207,208,210,210,210,208,210,208,206,208,209,209,209,212,208,208,209,209,210,206,206,208,211,210,208,210,207,208,208,208,209,208,207,210,208,209,208,207,208,207,208,203,206,210,206,205,209,208,205,206,204,203,196,199,200,198,199,196,199,200,198,198,199,196,196,196,187,185,198,200,194,195,195,198,198,188,197,198,197,201,199,197,190,197,200,199,200,198,201,199,199,198,199,200,194,190,194,199,205,199,156,164,203,197,163,159,171,185,192,182,182,133,105,106,91,105,93,77,66,62,71,139,215,178,190,204,148,175,203,207,212,185,153,165,159,154,173,111,194,171,76,91,88,95,73,108,120,184,199,199,204,147,160,134,186,221,205,218,214,219,235,207,121,223,211,131,226,218,224,224,218,214,220,205,105,56,46,60,53,36,12,15,18,15,23,19,16,21,19,18,25,13,19,20,14,19,18,19,23,19,16,20,20,31,46,41,44,74,99,94,65,56,42,18,12,19,35,65,92,100,110,84,41,43,98,107,111,148,113,131,177,120,103,125,102,124,142,119,100,71,61,52,52,52,55,63,55,57,53,55,65,63,60,58,61,62,67,68,59,63,60,58,63,63,66,60,59,62,65,63,70,66,69,65,69,71,67,65,63,75,67,67,65,55,56,54,54,53,47,47,41,41,31,29,36,27,27,27,21,20,23,19,18,21,17,21,20,16,17,19,22,43,57,65,66,75,82,80,61,23,12,18,19,19,26,19,20,23,23,24,19,23,19,24,32,112,177,146,133,128,139,135,64,18,12,16,17,18,20,17,19,19,17,19,20,21,22,19,33,70,94,89,77,69,56,41,36,21,18,19,24,27,27,32,36,42,42,51,54,50,57,55,55,64,63,64,65,65,66,63,62,59,62,65,60,69,87,106,112,127,133,128,128,109,107,94,88,95,83,83,81,69,58,53,42,39,42,45,46,30,36,27,69,127,127,118,104,102,100,103,103,118,114,88,82,47,59,164,232,245,249,223,165,139,115,109,118,138,157,141,98,154,215,217,227,220,220,218,217,221,219,221,222,218,219,221,221,223,225,226,229,227,225,226,226,222,222,225,225,226,226,228,228,229,230,228,229,229,229,228,225,117,4,1,5,9,11,10,12,10,12,13,13,13,200,203,202,203,204,200,200,201,201,200,201,201,200,198,199,200,198,198,198,199,197,196,198,193,197,197,194,196,193,196,193,195,196,196,194,193,195,192,195,191,189,192,192,193,191,191,192,193,190,190,191,190,192,191,190,190,189,191,193,191,190,191,189,192,191,191,193,188,190,190,191,190,191,192,190,191,192,192,194,189,193,193,194,195,195,196,194,197,196,198,198,199,195,196,198,197,199,198,199,199,199,199,199,200,199,200,200,198,198,200,198,197,200,200,200,202,200,198,199,200,200,200,201,201,202,201,199,202,203,203,203,203,204,204,205,203,204,204,203,205,206,207,209,209,208,210,208,206,209,208,210,209,208,211,208,207,204,205,205,205,206,205,205,204,205,204,204,205,205,201,202,204,202,204,205,204,208,207,210,208,207,211,209,211,208,210,212,212,211,212,213,213,211,211,212,212,212,210,211,208,210,210,209,210,210,208,207,209,210,208,207,209,207,210,209,208,210,208,209,208,209,210,208,207,210,208,209,213,211,210,209,207,210,210,208,210,209,210,210,209,208,209,209,209,209,208,208,208,208,209,210,208,208,207,207,208,210,208,207,208,206,208,208,207,207,206,208,203,198,202,201,200,199,199,199,197,198,199,198,197,193,193,193,184,195,201,193,194,196,199,197,188,194,199,195,199,200,192,188,191,197,197,196,196,196,197,196,195,196,197,198,190,190,193,208,188,153,176,197,200,154,150,184,189,185,165,143,105,107,100,74,91,66,56,49,47,71,140,214,177,190,212,155,170,213,211,207,168,157,183,163,171,199,127,149,122,77,93,98,90,53,48,71,184,202,231,171,136,176,131,207,220,201,206,200,211,230,179,129,247,203,139,232,220,224,220,227,218,228,187,59,44,58,52,31,22,17,19,21,15,22,19,19,24,18,20,23,16,19,14,18,18,12,27,21,23,20,23,31,39,38,48,64,84,95,62,59,47,17,14,13,32,61,89,104,103,114,76,20,35,85,80,105,144,101,130,141,113,114,94,116,162,155,113,93,88,79,63,73,71,75,94,93,93,83,87,96,85,84,66,75,99,90,84,77,75,74,83,85,76,75,69,71,72,72,75,74,66,63,61,58,50,46,44,37,45,38,36,37,23,32,30,26,31,25,22,19,19,18,16,19,17,14,17,18,17,18,14,15,15,16,19,14,16,21,18,33,85,102,116,126,139,143,148,111,36,12,14,17,23,23,17,27,22,23,23,23,22,19,24,39,122,169,146,142,139,152,151,78,20,15,14,18,15,19,24,18,18,21,26,18,20,22,20,40,109,135,131,131,112,102,84,59,23,10,14,14,15,18,15,18,19,21,25,21,30,29,29,30,37,41,51,51,50,57,57,61,66,63,62,65,62,61,56,57,95,59,65,65,70,89,101,113,116,120,123,131,141,137,142,124,105,98,84,76,72,95,60,118,153,157,143,116,116,105,111,103,106,105,87,83,65,80,203,244,244,237,113,49,62,66,66,53,43,53,49,11,78,187,211,228,221,218,217,217,219,216,217,217,218,220,220,218,219,222,224,225,225,224,224,225,226,226,225,225,224,225,223,223,225,224,223,227,226,225,225,224,119,4,1,6,9,10,10,13,10,12,13,13,13,202,203,202,203,204,204,200,204,202,201,203,202,202,202,202,201,200,199,200,198,198,198,198,197,196,198,197,193,195,196,196,193,193,194,194,194,194,193,192,193,196,194,193,193,192,192,191,193,189,188,189,190,191,191,190,191,190,190,193,190,190,191,191,189,190,192,192,192,190,191,193,193,192,190,192,189,193,194,192,196,194,196,197,196,196,195,194,197,196,197,196,198,199,198,199,200,202,200,201,200,198,200,200,201,203,200,202,201,200,199,201,201,202,199,200,203,200,203,202,200,199,200,201,200,201,203,202,203,203,205,205,204,205,207,208,206,205,205,208,207,208,210,208,210,212,211,211,211,212,211,212,212,211,212,210,211,210,209,209,208,208,206,203,204,204,205,202,204,205,203,205,206,206,206,207,209,208,210,211,210,210,207,208,208,210,212,212,212,210,210,210,212,211,210,212,210,210,210,212,210,211,211,211,213,210,210,210,209,210,211,208,210,210,210,210,211,211,211,211,210,212,213,211,212,211,211,212,210,210,212,210,211,212,210,210,212,211,210,211,210,210,210,212,211,208,208,210,209,207,209,207,208,208,207,208,206,209,209,207,210,205,204,210,206,203,206,208,200,200,207,203,203,204,201,200,201,199,199,200,194,191,198,199,183,191,202,198,198,198,198,195,182,194,199,195,200,194,196,188,187,198,197,196,194,195,195,197,195,193,194,197,193,191,187,209,179,155,185,193,199,146,158,187,179,180,155,153,129,117,86,74,87,48,35,27,44,85,173,223,178,189,207,156,159,205,215,197,170,199,201,169,185,203,107,110,129,95,121,107,81,67,44,22,117,183,204,170,183,185,144,212,217,212,214,195,210,217,148,164,252,199,160,235,231,226,228,234,217,233,164,39,55,51,31,23,16,17,17,14,23,24,19,18,19,26,21,25,19,19,16,13,16,12,23,27,23,28,29,37,42,46,65,80,87,69,59,46,23,11,15,30,51,83,106,110,103,119,86,23,61,107,83,99,89,62,80,76,68,63,64,84,122,101,61,40,31,41,53,80,66,46,55,57,66,59,104,120,103,112,76,49,45,53,42,45,39,36,46,37,37,38,44,37,33,38,32,33,31,25,24,23,18,22,23,16,22,19,27,32,30,38,33,33,36,40,24,13,16,14,17,19,15,15,15,16,20,16,17,16,20,20,14,22,14,24,21,55,177,178,179,179,179,186,184,169,56,7,16,21,23,25,26,22,20,24,27,23,22,21,21,54,121,163,153,142,144,148,162,108,37,11,11,15,16,18,19,19,18,25,26,36,19,23,22,55,152,163,147,160,159,157,157,108,34,13,12,13,15,16,16,14,18,16,17,17,20,17,18,22,18,23,24,25,27,31,35,40,40,49,50,48,59,59,60,61,66,66,64,64,61,66,65,62,70,65,62,70,85,105,125,129,134,139,146,141,141,152,132,146,155,146,125,97,96,106,118,122,125,120,112,116,120,181,241,244,244,185,46,4,49,74,80,52,36,37,27,12,86,199,216,229,222,217,218,218,220,215,217,217,215,216,217,214,218,219,220,222,221,220,220,222,221,222,223,221,221,221,224,223,223,221,217,222,223,225,223,222,118,4,1,6,9,10,10,13,10,12,13,13,13,204,205,203,202,203,199,202,203,204,202,202,202,203,200,199,201,200,201,202,199,198,198,200,196,198,199,196,198,198,198,197,196,194,194,194,194,194,193,196,195,193,193,193,194,191,193,192,192,194,193,194,190,194,193,192,194,192,193,193,193,194,193,191,194,192,192,193,192,195,191,193,194,192,192,195,194,195,194,196,198,198,198,200,199,196,199,200,200,197,196,199,200,198,201,201,199,200,199,199,201,201,200,203,203,199,201,200,201,202,201,201,199,199,202,201,201,199,201,203,203,205,203,204,203,202,205,203,204,207,204,206,208,207,207,208,208,210,210,207,210,208,209,210,210,212,211,211,211,212,212,213,214,212,211,212,211,213,213,208,208,208,207,208,204,201,201,203,202,206,206,207,205,207,210,208,211,211,209,210,209,208,208,208,208,208,211,211,210,210,211,209,208,211,210,212,213,212,213,214,212,213,212,212,211,210,208,209,211,210,210,212,212,212,211,210,211,211,210,211,213,212,214,213,212,213,210,211,212,211,211,212,211,211,212,212,212,210,210,211,209,210,210,210,209,208,209,211,210,209,208,208,208,208,210,206,208,212,208,207,207,208,207,206,206,206,207,208,201,202,207,205,206,203,203,200,199,202,199,200,195,194,198,202,185,190,203,198,198,197,200,198,186,194,200,196,198,196,198,192,185,193,198,198,197,196,193,194,195,196,196,196,191,192,188,208,169,161,195,186,203,146,160,198,174,182,178,169,152,136,106,105,81,51,55,57,110,154,218,226,176,185,210,158,130,198,214,188,186,213,191,157,156,177,116,165,177,113,108,90,84,76,52,23,93,184,223,171,187,171,150,227,219,206,209,202,219,206,144,184,248,181,144,217,238,236,236,223,174,153,100,45,66,29,18,18,16,19,18,20,19,22,19,21,21,20,22,15,21,19,15,16,14,18,17,22,30,29,40,42,42,60,77,90,66,63,47,24,18,12,30,47,78,109,114,115,107,109,76,39,83,112,107,121,84,56,119,79,42,68,45,59,110,116,57,19,81,72,63,100,75,28,21,21,32,36,95,98,92,101,53,24,22,23,28,24,24,19,15,19,18,19,15,19,17,17,19,21,21,14,19,14,16,21,16,21,17,17,46,57,61,69,73,76,77,85,47,15,16,13,14,14,18,20,18,20,17,19,16,16,20,20,22,20,14,24,16,74,195,184,185,173,174,173,181,175,59,15,13,13,25,27,25,25,25,27,25,25,26,26,23,53,101,142,163,141,139,144,162,148,56,16,10,13,26,22,17,23,24,19,28,24,21,26,22,67,144,155,157,164,169,170,180,162,70,16,9,14,14,13,18,16,19,16,21,21,16,16,14,19,19,17,16,20,24,31,28,29,29,31,31,29,39,36,39,43,44,53,53,60,66,66,72,74,76,73,69,72,68,64,69,71,71,72,70,74,78,114,77,91,84,71,69,57,64,74,91,98,101,110,107,113,130,151,175,153,153,119,36,27,46,70,83,70,54,50,37,17,148,242,226,234,218,223,220,219,223,219,220,217,215,216,217,217,219,217,219,219,217,220,219,221,220,219,220,220,219,220,221,221,220,219,219,222,223,224,222,222,119,5,1,7,10,12,10,12,12,13,13,13,13,205,205,203,205,203,202,201,201,203,204,205,202,200,201,202,200,199,199,200,200,201,202,201,203,201,199,199,199,199,198,198,196,198,196,196,195,195,196,192,193,193,193,194,193,193,193,193,194,191,195,195,193,195,195,193,192,196,194,193,193,195,193,193,194,195,193,192,196,197,194,196,196,193,198,195,194,196,195,198,198,199,199,198,199,199,199,198,198,200,201,199,200,201,200,200,198,201,199,200,201,201,202,199,203,200,200,202,199,201,200,202,202,202,200,202,202,201,202,201,202,205,205,205,205,206,205,204,205,204,205,204,207,207,205,207,205,210,208,208,208,209,212,209,210,212,211,211,208,212,211,211,213,211,212,212,212,212,211,213,208,210,208,204,203,202,204,201,202,203,204,205,204,206,209,209,210,207,208,208,208,210,209,211,210,211,210,211,210,209,210,208,212,210,208,213,211,213,214,213,212,212,211,212,211,210,211,210,211,213,210,210,214,212,212,212,212,212,210,212,212,211,212,211,213,213,214,214,211,212,212,212,212,211,212,210,210,212,211,209,211,212,207,209,208,206,208,208,208,209,210,210,207,208,208,210,208,211,212,206,209,205,204,207,206,205,209,207,200,205,208,206,205,205,203,202,201,199,199,196,193,195,200,203,186,187,201,199,197,198,198,200,190,194,199,193,198,196,198,197,186,188,196,196,194,195,193,194,196,194,195,197,194,193,188,203,160,171,195,186,200,151,192,207,177,184,154,141,139,141,129,113,62,46,55,105,188,207,237,218,171,186,208,177,144,194,208,181,208,214,161,118,149,199,142,196,161,90,117,87,94,89,61,51,164,242,233,177,156,154,160,214,216,216,213,199,225,191,134,206,182,130,139,174,232,244,237,169,88,56,41,54,46,12,20,18,16,22,19,18,18,21,24,19,19,21,21,21,18,19,15,14,15,17,18,23,29,46,41,38,65,71,79,72,61,54,26,19,41,48,59,69,95,117,117,120,93,98,89,55,95,118,103,125,120,124,134,65,58,56,51,87,134,132,76,88,143,146,71,111,132,53,22,24,39,76,120,92,89,92,46,32,27,25,28,28,32,22,16,15,18,19,14,17,21,15,16,14,15,18,19,16,16,16,22,14,22,20,89,136,139,156,160,165,174,190,80,10,15,8,15,13,15,18,15,17,16,18,18,19,21,20,17,18,15,25,19,74,171,130,135,124,107,117,116,114,42,18,15,15,24,24,26,29,29,28,29,28,32,29,22,52,78,141,174,141,148,139,155,168,75,17,13,11,24,20,16,22,28,29,24,22,25,28,29,72,143,151,150,165,158,163,176,194,108,24,14,11,14,17,18,16,17,18,21,17,16,16,15,16,17,19,19,20,31,50,47,44,43,34,33,35,29,19,19,19,22,22,29,28,34,37,36,49,47,55,57,58,65,63,74,77,79,79,80,73,71,73,66,70,62,65,73,84,80,73,50,38,39,40,53,58,63,56,43,24,40,43,29,37,47,66,71,63,50,59,22,52,231,250,232,232,219,224,218,219,222,218,220,219,218,218,221,220,220,220,218,220,220,220,220,220,222,221,220,219,220,219,220,220,219,220,221,222,222,221,221,223,118,5,0,6,10,11,11,13,12,13,13,14,14,203,210,204,204,205,202,205,202,204,203,205,202,204,205,201,203,202,201,202,202,203,204,201,200,203,200,201,200,199,201,197,198,198,199,198,198,198,194,196,198,197,196,194,194,196,195,194,196,194,191,193,196,196,193,195,196,195,196,195,195,196,197,195,198,194,196,199,194,197,197,198,198,197,199,198,194,198,196,198,200,199,199,200,200,200,200,199,199,199,200,199,201,200,200,200,200,201,199,198,200,200,200,203,203,201,203,203,203,200,201,205,203,202,202,204,204,205,206,203,204,204,205,205,207,206,203,205,203,207,206,205,203,204,206,206,204,206,206,208,208,208,211,208,207,209,212,210,211,211,211,210,211,210,209,214,208,211,212,209,212,209,208,206,202,202,203,205,206,206,203,204,204,207,209,209,212,209,207,209,208,207,208,210,210,210,210,211,210,209,208,210,212,211,209,210,210,210,211,211,210,211,211,213,211,211,212,212,212,210,212,211,212,212,210,212,212,210,211,208,211,212,211,210,212,213,211,211,212,211,210,211,211,212,210,210,211,210,208,208,209,209,207,211,211,207,209,208,208,208,207,210,211,209,210,213,210,214,222,215,208,208,208,209,207,203,207,202,199,206,207,207,206,204,206,203,205,204,204,198,193,201,199,202,191,183,198,196,200,201,200,202,190,197,201,198,200,196,199,198,194,187,194,200,194,196,195,194,195,192,193,194,193,196,189,193,148,175,199,182,204,163,210,220,170,153,123,129,155,161,135,110,43,41,46,103,213,214,232,209,168,190,210,189,145,201,205,175,222,177,137,143,187,222,141,165,109,81,113,89,99,75,46,61,199,251,203,141,160,173,174,217,205,202,212,209,230,166,150,188,87,75,84,117,205,243,217,92,56,57,41,49,27,12,25,21,19,19,15,22,16,22,22,19,21,15,20,21,18,16,18,14,15,23,23,21,39,45,36,61,70,74,66,60,54,31,12,63,111,103,98,97,103,112,120,103,90,118,135,123,118,93,81,110,123,121,119,69,73,93,88,87,90,124,117,88,147,167,87,94,149,103,45,42,63,132,147,93,92,87,43,44,42,43,49,57,57,21,12,15,13,17,16,18,15,19,17,17,19,17,16,17,19,19,16,21,23,29,160,208,192,208,197,199,194,227,120,7,11,11,15,18,19,16,18,17,17,19,20,23,16,27,26,19,22,27,19,81,134,81,62,54,51,53,52,60,36,24,14,21,24,23,27,28,28,28,31,34,28,28,31,59,87,142,173,131,149,140,147,174,86,22,15,12,22,23,23,29,27,29,30,17,28,26,29,87,134,132,122,123,130,130,148,173,112,32,13,10,16,24,17,19,18,15,18,15,20,21,19,18,17,19,17,20,40,84,92,81,73,67,63,59,43,21,16,15,15,18,18,18,19,21,23,22,24,27,29,32,33,39,38,44,53,49,57,63,64,66,69,74,89,99,108,128,128,104,72,42,24,32,42,61,77,60,46,40,46,44,44,57,75,75,65,69,71,54,22,143,238,248,231,227,221,223,216,217,217,217,220,217,217,221,220,218,220,216,218,217,217,218,215,217,219,219,217,220,221,220,219,219,220,220,217,220,218,223,217,218,120,5,1,6,10,11,10,12,11,12,14,13,13,206,205,205,205,203,202,203,203,205,206,203,204,202,203,204,200,203,203,200,200,200,200,200,201,199,198,199,201,202,200,200,198,200,199,198,200,199,199,198,199,198,196,197,199,198,198,198,198,194,198,197,197,199,198,198,197,197,199,200,199,199,199,201,199,196,197,196,198,197,198,201,199,198,201,198,199,200,199,200,200,199,200,198,200,201,201,202,201,200,201,201,202,203,200,201,201,204,203,200,202,204,200,201,203,202,203,204,204,204,200,203,202,201,203,203,203,203,203,205,204,203,204,205,205,204,208,206,205,206,207,208,207,204,203,207,204,206,207,205,207,206,207,208,206,209,208,209,210,208,212,208,208,205,207,210,210,211,208,212,211,208,209,208,208,205,203,204,204,205,205,206,205,206,207,208,209,208,209,208,208,208,209,210,210,209,208,211,209,207,210,210,211,210,211,212,209,209,207,211,210,211,211,210,211,212,213,213,212,214,214,212,212,212,214,212,211,213,212,210,212,212,213,212,212,208,211,213,213,213,210,211,210,211,211,210,209,211,209,208,208,208,207,210,210,210,210,209,205,209,208,207,209,205,208,216,204,203,222,218,208,206,206,207,206,205,206,201,201,207,205,208,204,205,209,209,208,210,206,200,203,206,205,206,198,186,202,204,201,205,203,207,196,199,208,205,206,203,202,203,201,189,191,200,198,200,198,196,197,195,193,194,198,195,199,185,138,188,198,189,204,162,208,173,142,168,149,164,190,195,159,101,49,44,35,105,189,195,225,200,171,198,188,125,112,165,176,185,201,155,160,176,224,173,85,175,131,93,101,71,92,55,41,56,190,207,150,154,179,197,184,228,212,201,198,200,227,144,164,178,75,73,61,79,165,235,190,49,42,64,46,33,14,19,27,21,17,22,20,17,22,22,21,20,20,19,21,19,21,17,15,17,16,24,24,37,46,39,48,63,77,66,60,63,26,26,62,144,208,142,115,110,109,121,100,105,104,153,201,171,109,37,73,117,103,115,109,90,107,74,66,72,51,95,130,146,164,176,94,76,148,130,81,27,63,157,132,87,99,71,42,62,66,85,106,132,106,31,13,15,12,23,19,15,17,17,19,16,18,21,15,22,15,23,25,20,27,39,177,194,172,186,161,166,166,200,95,9,16,7,16,21,16,19,19,23,23,21,26,25,23,28,30,23,27,22,34,96,113,60,46,43,36,39,45,46,25,22,18,20,24,24,28,26,24,30,34,35,36,32,27,51,70,124,141,113,134,128,137,159,79,24,16,10,22,23,29,26,29,30,24,29,31,27,36,98,139,97,63,64,66,86,85,112,83,29,17,12,20,17,21,17,17,22,16,21,19,16,21,23,22,21,19,18,48,147,181,156,153,141,133,126,88,41,19,14,15,16,18,19,17,18,20,19,19,22,19,19,23,18,23,21,23,32,29,35,39,37,40,44,52,45,67,116,129,117,93,120,138,147,148,116,92,75,74,61,57,60,87,111,112,100,95,118,107,90,78,165,238,231,225,230,217,223,219,222,218,214,219,218,218,216,219,217,217,220,215,215,216,217,216,216,218,217,218,217,217,216,217,220,220,218,218,218,219,220,218,220,118,5,1,6,10,10,10,13,12,12,13,13,13,202,206,203,204,206,202,202,203,204,203,205,201,205,203,199,202,200,200,199,200,199,198,201,200,199,197,201,201,200,201,198,197,199,201,202,201,200,200,198,196,198,196,198,198,198,198,196,198,198,196,197,198,198,198,198,197,198,198,198,198,199,199,199,199,199,201,198,197,199,198,199,198,198,199,198,196,200,199,200,200,198,199,200,201,198,199,200,201,201,200,201,203,202,204,201,201,202,200,201,201,203,201,201,203,200,203,204,203,201,202,200,201,202,200,202,203,205,203,201,204,203,204,203,204,207,203,207,204,205,208,203,206,206,206,207,205,206,204,206,205,205,210,206,208,208,207,208,207,210,211,208,207,204,203,208,208,209,208,209,212,208,206,208,205,203,206,203,204,206,208,209,207,207,208,212,209,207,206,205,207,207,210,208,210,210,210,209,208,209,210,208,210,212,208,208,210,207,208,207,207,211,208,210,211,209,209,212,211,212,212,211,212,212,211,214,210,209,213,210,211,210,209,208,209,211,209,210,209,212,211,210,210,209,209,208,208,210,208,210,211,208,208,208,210,208,209,205,206,208,206,207,206,207,205,207,173,133,170,202,206,207,203,206,205,208,208,198,205,206,204,207,204,205,206,205,207,207,205,202,207,211,210,210,205,191,201,210,207,209,206,210,199,199,210,208,210,206,209,207,207,197,192,204,203,202,200,199,202,200,198,197,200,198,208,184,141,198,208,184,181,131,150,102,98,170,168,176,184,185,146,101,51,53,37,90,177,180,222,203,173,205,163,89,121,99,147,202,201,156,178,190,202,129,89,198,167,115,98,82,91,60,49,57,165,194,174,179,210,195,178,230,215,219,205,201,206,132,192,149,46,116,83,83,165,229,162,39,59,47,24,23,14,19,16,23,23,22,23,20,23,18,21,23,17,18,19,19,17,19,19,18,21,28,33,42,41,53,63,64,63,64,59,36,12,48,125,178,188,101,90,117,113,111,92,99,115,147,183,181,114,42,63,114,131,113,120,123,108,101,74,64,42,50,118,165,198,152,62,57,149,151,71,6,44,153,121,91,98,79,76,126,144,171,173,196,174,44,9,9,14,23,16,19,18,19,21,17,20,19,16,23,22,22,21,24,19,55,171,135,102,102,80,87,79,122,57,19,20,9,21,24,24,18,21,27,26,28,27,20,27,34,27,25,30,29,37,96,97,53,50,42,41,45,46,42,27,19,22,24,28,29,24,31,39,39,41,42,38,27,26,48,52,93,107,69,85,80,83,101,60,29,19,15,28,29,31,34,31,33,31,24,30,24,47,115,120,78,47,48,45,51,56,62,55,29,17,16,19,23,22,22,17,21,21,21,25,22,22,15,21,18,22,19,46,154,190,179,182,184,189,189,170,94,27,14,17,21,17,21,19,17,19,19,24,22,20,22,21,21,19,21,16,24,36,39,39,33,38,29,34,29,46,97,113,105,172,251,251,245,177,85,42,29,32,66,79,90,101,97,106,96,101,122,135,128,81,130,211,216,233,226,216,222,217,221,221,219,217,216,218,217,215,217,218,216,218,215,214,217,217,217,216,217,218,214,214,217,217,216,216,217,217,218,216,223,217,218,120,4,0,5,10,12,10,13,12,13,13,13,13,202,204,202,203,206,203,203,207,203,203,203,203,204,203,203,200,201,202,200,201,200,200,200,200,199,200,201,199,199,201,199,199,198,199,198,196,199,198,198,197,199,199,198,199,194,199,200,198,198,198,197,198,199,200,201,200,200,200,200,199,199,196,199,200,199,199,199,200,198,200,200,200,200,202,202,200,200,199,199,200,200,200,199,200,201,201,199,202,199,201,202,199,201,201,203,200,202,201,199,202,201,201,203,202,200,204,204,203,201,200,201,201,205,202,202,203,204,204,202,204,203,204,205,205,201,203,204,205,206,206,204,209,209,205,208,205,205,206,205,205,206,205,210,208,208,208,208,212,210,212,207,206,207,206,207,204,207,208,208,210,207,208,206,205,204,205,204,205,208,207,208,207,209,208,207,206,204,205,206,206,207,208,207,208,208,208,210,208,209,213,210,211,211,208,207,209,208,208,211,209,209,211,210,211,211,208,211,211,211,208,210,212,210,211,211,211,211,211,212,213,210,209,210,210,208,211,210,209,210,209,212,210,210,211,208,210,211,210,208,210,210,207,208,207,208,208,207,207,208,208,203,205,208,207,207,162,105,141,195,206,210,205,205,205,205,205,200,203,203,205,206,204,205,204,204,206,206,199,204,207,207,211,210,210,194,199,211,205,210,208,208,200,199,210,206,210,207,208,210,207,205,195,203,207,207,207,202,206,205,203,202,203,201,212,184,151,206,214,187,160,87,120,78,78,141,127,146,155,149,121,91,50,44,33,98,182,182,223,201,185,212,174,148,152,86,124,224,196,167,146,160,203,150,126,214,156,100,110,98,94,63,60,62,178,213,202,193,217,187,179,232,208,221,213,229,193,122,207,143,49,88,71,91,181,230,165,73,68,32,11,22,14,19,14,20,24,21,21,22,21,22,22,22,18,16,19,16,20,22,19,21,27,33,39,43,52,71,72,63,59,62,34,26,7,63,106,103,118,47,83,110,96,108,93,104,100,93,119,168,154,83,76,97,120,119,123,132,124,106,68,44,30,41,49,95,108,69,26,62,171,118,49,4,54,159,110,103,104,105,141,167,179,193,175,196,170,41,7,8,13,24,15,21,23,24,19,21,24,22,20,31,29,28,25,29,23,71,140,74,47,57,41,50,47,56,36,24,22,16,22,23,25,28,26,24,27,26,27,24,25,37,34,22,33,27,44,105,93,66,66,60,62,68,71,62,35,35,41,40,49,53,52,50,66,66,61,59,56,54,55,64,63,108,76,38,49,39,46,52,41,24,20,22,31,26,33,37,36,29,34,32,27,33,53,117,113,63,47,44,37,45,41,52,38,20,25,18,23,24,26,27,27,23,18,26,26,27,19,18,24,24,22,21,54,133,174,154,166,174,181,185,196,146,39,14,16,15,20,18,24,21,20,26,21,22,24,23,23,23,17,22,23,24,44,71,77,62,63,59,60,40,50,102,110,111,208,249,249,224,114,49,28,11,74,123,97,71,48,53,72,82,86,116,119,117,59,73,218,232,232,226,215,225,217,219,218,218,217,219,217,215,218,217,217,215,214,216,214,217,214,214,217,213,216,217,217,218,217,215,214,215,218,219,219,222,217,218,117,5,0,6,10,11,10,13,12,12,13,14,13,202,205,203,202,205,200,201,203,200,202,202,200,202,200,200,201,201,201,200,202,200,198,199,199,202,201,199,198,198,196,198,200,198,196,194,195,198,198,194,198,199,197,198,198,198,194,196,198,198,198,198,199,200,199,200,198,200,201,201,201,198,200,198,197,200,201,200,200,200,199,199,199,201,200,201,199,201,203,200,200,203,203,200,203,200,200,202,199,200,200,198,199,199,198,199,199,202,199,200,201,202,201,199,202,200,200,202,201,203,203,200,203,203,202,204,203,204,201,205,209,203,206,205,202,204,205,206,202,205,206,202,206,204,204,205,205,205,202,205,204,203,206,205,207,208,207,204,205,208,207,208,206,207,207,206,207,208,206,209,208,206,207,206,204,205,206,207,207,206,205,205,207,206,204,203,202,203,204,204,206,206,208,207,209,209,208,208,208,209,210,210,210,210,208,208,209,207,208,208,206,209,208,207,210,210,210,212,208,209,209,209,211,210,210,210,208,211,211,211,214,212,211,210,210,210,207,209,210,210,209,211,211,210,211,207,208,211,208,209,209,208,208,205,207,205,205,206,207,207,205,206,205,205,206,212,202,177,185,205,205,206,206,204,203,204,200,197,203,204,204,205,204,203,205,206,207,201,200,206,211,208,209,210,211,198,197,211,207,211,208,212,202,199,211,207,211,207,207,207,209,207,198,200,208,211,208,208,207,208,206,206,206,205,220,178,156,208,213,183,154,117,158,118,101,144,143,165,167,161,130,93,49,52,43,122,210,186,224,200,184,217,198,219,198,101,128,200,183,145,141,181,206,164,128,197,153,103,120,96,86,79,42,42,152,210,176,163,221,174,189,229,206,225,210,231,172,138,232,170,111,125,54,103,226,250,174,65,46,10,15,22,16,21,16,23,28,20,21,22,16,19,23,19,16,19,16,15,19,21,27,27,32,48,37,43,69,76,76,60,61,42,18,15,30,100,74,53,82,62,87,86,95,103,93,112,104,80,87,153,182,153,101,76,93,85,104,115,107,95,58,39,33,37,29,27,45,39,45,123,128,57,29,4,70,150,89,101,97,95,118,118,127,132,117,138,106,27,22,13,13,25,24,30,22,26,31,23,20,29,29,29,36,31,24,31,27,87,123,54,49,53,40,50,42,53,28,21,29,16,23,22,23,26,27,28,34,35,34,34,50,56,52,57,67,65,79,150,157,137,138,133,144,149,161,127,126,144,152,173,132,164,169,174,183,178,179,173,174,168,172,168,156,153,72,45,47,38,46,45,49,40,42,42,46,49,45,47,42,45,33,29,35,33,59,112,120,89,63,57,57,50,53,59,36,20,20,17,27,23,27,29,30,27,24,33,22,21,24,24,21,22,25,25,63,131,144,111,110,122,130,137,160,123,52,16,11,13,19,21,23,21,21,26,23,24,22,23,26,22,18,21,24,22,64,152,164,149,134,125,122,93,116,121,101,95,184,249,177,95,37,32,23,75,142,120,76,66,55,42,75,86,72,79,85,90,56,122,237,247,243,242,231,232,221,219,215,217,217,214,214,217,217,217,216,215,216,214,214,216,214,213,214,212,215,214,215,216,214,217,216,217,218,219,217,219,216,220,120,4,0,6,9,10,10,13,12,12,13,12,12,202,205,203,204,203,199,201,202,200,200,203,200,201,200,202,201,202,200,198,198,199,201,200,199,199,197,198,198,197,199,199,198,197,198,197,198,195,198,201,195,200,195,196,200,196,198,196,196,198,199,201,200,197,198,198,199,199,197,200,199,201,199,199,200,199,198,200,202,199,199,198,199,199,200,199,199,200,199,201,199,199,198,200,202,200,199,201,200,200,203,201,200,200,198,201,199,201,200,200,202,200,199,200,203,201,202,202,203,201,201,202,200,203,202,204,204,203,202,203,205,202,203,204,205,203,204,203,203,204,206,204,203,203,202,205,203,203,203,202,204,205,204,206,204,205,206,206,205,206,208,207,210,207,207,206,204,207,207,205,206,207,208,205,206,207,206,208,209,208,207,206,207,207,205,205,204,203,204,204,206,208,208,208,208,208,208,208,209,209,210,208,208,211,208,208,209,206,208,208,208,208,209,208,208,211,208,209,208,210,208,212,211,210,213,208,209,211,209,212,211,209,211,208,210,208,207,210,209,211,208,208,210,208,208,208,207,208,207,207,209,209,209,210,208,206,208,205,203,203,204,206,205,205,210,213,215,213,213,212,208,208,206,208,204,203,198,201,206,202,204,206,206,205,209,206,202,199,202,210,207,208,210,207,210,200,194,209,205,207,207,211,206,198,210,210,210,207,206,209,209,211,201,196,205,211,208,210,210,210,208,210,208,210,217,174,174,215,198,170,185,159,204,163,145,198,189,208,189,194,163,99,51,52,51,134,226,191,219,197,193,212,198,237,216,114,137,157,137,166,181,203,205,141,138,225,144,103,118,93,97,70,40,21,106,176,154,174,217,163,197,223,206,214,213,222,141,159,250,194,120,113,98,178,250,226,120,51,19,8,22,22,12,23,15,24,28,23,26,19,21,20,20,19,17,17,16,16,21,21,21,30,45,42,47,66,76,72,62,61,43,23,17,15,69,113,41,63,109,78,85,71,95,109,103,111,113,105,83,133,191,188,153,90,79,86,113,108,71,75,58,34,24,27,24,27,29,40,135,151,71,18,24,9,69,119,73,106,80,53,53,45,52,46,48,61,51,25,19,15,23,29,28,27,31,31,30,31,27,30,39,37,30,31,29,31,35,107,110,49,63,54,49,59,60,56,29,30,35,37,49,50,59,66,73,82,92,118,131,149,178,182,188,199,216,203,198,204,184,185,181,171,170,160,162,171,177,183,193,193,184,177,170,173,177,175,177,173,176,176,164,155,151,142,103,95,81,77,95,98,98,116,131,151,168,152,157,145,132,122,98,77,73,61,79,157,173,148,133,124,122,120,112,92,39,13,22,17,24,30,23,30,35,29,33,27,26,35,24,25,25,26,27,22,74,124,122,67,45,47,57,69,85,86,45,26,12,17,22,18,23,22,23,21,22,20,24,21,25,24,24,22,19,25,93,203,221,211,206,211,205,186,184,147,105,81,164,199,71,17,29,20,54,132,142,67,66,118,89,89,97,83,70,54,56,79,55,104,225,244,244,250,246,241,226,223,216,217,214,214,214,215,216,213,214,213,215,215,212,214,212,214,214,211,214,212,215,214,214,216,216,215,213,216,217,216,214,218,118,4,1,6,9,10,10,13,12,11,13,13,13,200,200,200,199,203,199,200,201,199,202,201,200,200,200,200,200,199,198,199,200,198,199,200,197,198,198,198,197,199,199,200,199,197,197,198,198,196,200,194,196,198,195,197,195,197,195,198,199,198,199,199,199,201,201,198,199,201,200,200,200,199,199,200,201,202,198,198,199,199,198,200,200,199,198,199,199,199,201,200,200,201,201,201,200,198,199,200,200,200,202,201,202,201,199,203,200,202,200,199,201,199,201,200,202,202,200,203,201,202,203,199,203,203,201,203,203,206,201,200,202,200,204,203,203,203,203,205,204,205,203,201,205,201,203,205,205,205,203,204,204,203,202,205,202,203,205,203,206,207,206,208,207,208,208,204,205,205,203,206,208,205,208,207,207,208,207,207,207,208,208,208,207,205,206,205,205,205,205,205,205,208,209,207,207,210,208,208,208,206,208,208,208,208,206,207,208,209,207,208,208,208,209,207,210,210,208,209,210,209,209,208,211,208,210,211,212,212,209,210,211,209,209,208,206,206,207,210,208,209,209,208,208,209,211,207,206,207,207,207,207,208,208,206,207,208,207,207,205,205,206,204,205,206,204,206,211,209,203,207,208,204,205,205,206,203,198,203,205,207,206,206,206,206,208,206,203,200,206,209,207,205,208,207,212,204,190,205,205,207,206,208,203,196,211,207,209,209,206,209,209,211,206,197,200,211,210,208,209,210,208,212,208,213,214,166,188,224,199,184,194,190,208,175,173,195,179,184,172,206,175,100,54,41,38,141,231,193,214,190,191,207,196,232,204,144,136,141,164,179,201,183,169,140,171,234,137,105,98,77,90,72,38,27,111,191,170,202,221,157,206,213,204,211,214,200,124,178,244,188,121,91,67,162,206,136,70,18,12,13,22,21,17,27,22,18,24,23,22,18,19,22,20,20,15,17,18,21,22,21,31,44,40,48,62,77,78,57,61,39,24,15,21,29,92,108,27,85,118,91,92,87,122,120,104,108,117,119,105,133,165,185,167,129,101,85,99,76,67,80,53,30,19,26,30,42,50,119,164,82,38,15,20,7,75,104,74,106,74,41,43,40,43,45,42,50,35,16,25,17,22,29,28,33,31,33,28,31,32,30,37,38,38,32,36,35,42,136,136,95,126,76,91,105,106,91,73,106,129,128,182,191,201,206,206,214,214,208,203,200,202,198,196,193,183,162,149,125,98,122,87,78,73,61,71,61,71,70,130,126,63,64,61,60,63,56,62,59,60,65,55,59,67,62,67,77,67,71,81,89,85,112,128,146,125,173,179,184,179,175,178,187,177,169,112,184,184,167,159,160,161,155,166,134,73,33,35,30,37,32,31,29,31,32,35,35,31,39,29,29,37,37,33,29,93,124,97,49,37,47,41,48,54,56,37,23,17,17,23,29,22,23,25,20,27,22,26,24,21,20,27,23,21,24,71,169,188,190,196,194,199,188,194,146,107,89,136,141,27,27,34,12,71,137,117,46,67,128,111,125,111,77,69,57,47,51,39,32,152,221,220,227,220,239,232,222,217,217,214,215,214,213,216,214,215,213,213,213,214,213,213,214,212,213,214,215,214,214,214,214,214,214,214,215,215,219,213,217,119,5,0,5,9,11,10,12,12,12,12,13,13,203,203,201,201,200,198,202,201,200,202,203,200,200,200,199,198,200,198,199,200,197,199,197,198,198,197,201,196,195,199,196,198,195,198,198,199,195,195,202,197,201,199,197,200,197,199,199,198,201,197,199,201,199,202,201,199,201,200,200,200,200,198,199,200,199,200,200,201,201,199,199,201,202,201,198,199,201,200,200,200,202,201,200,201,199,200,199,201,200,200,199,198,203,199,202,199,199,200,200,201,200,201,200,199,201,201,200,202,203,204,203,205,206,203,203,203,204,202,202,204,201,201,202,204,202,203,205,202,203,202,203,204,204,202,202,205,204,204,205,204,204,202,204,203,206,203,205,206,204,206,206,208,204,204,204,204,205,204,203,205,203,206,207,206,208,208,206,204,206,206,206,206,203,203,202,201,205,205,205,204,207,210,209,208,208,209,206,206,206,208,207,207,210,208,207,208,208,208,206,206,207,207,207,210,209,206,209,208,208,207,207,207,206,208,208,211,209,208,212,208,208,208,205,209,208,208,207,206,207,207,207,209,208,206,205,206,210,209,206,206,207,206,205,207,203,206,207,205,206,205,206,206,204,205,203,205,207,202,204,203,202,202,207,207,200,199,206,205,206,209,206,206,204,208,205,201,205,209,210,210,207,206,208,210,208,191,204,208,204,206,208,205,196,205,208,209,209,207,205,205,208,211,202,196,209,212,209,208,208,206,210,206,215,210,162,201,229,198,193,205,181,177,147,136,143,134,143,162,219,174,92,51,39,43,134,233,195,201,184,194,202,203,214,201,200,174,181,196,190,171,147,174,121,171,230,141,107,77,75,98,73,48,53,169,222,187,220,208,155,206,208,207,205,218,172,124,199,234,186,127,115,51,64,97,48,37,16,9,14,16,27,21,22,17,20,24,23,25,16,17,21,14,19,17,15,21,18,20,24,47,44,40,67,75,72,65,60,42,28,31,48,66,78,139,109,71,127,122,129,119,93,123,126,121,107,115,118,109,126,136,150,145,112,81,59,55,53,38,52,37,24,31,39,59,101,139,112,60,24,21,20,15,16,91,92,81,117,72,51,44,46,55,59,58,51,28,16,24,19,28,33,29,40,46,44,48,50,55,64,69,74,81,84,84,96,128,185,183,177,190,168,178,182,184,184,169,176,179,189,196,182,181,159,141,141,121,103,88,77,75,66,61,60,57,57,66,59,59,65,61,59,59,66,69,73,80,78,77,69,64,74,75,75,70,69,71,66,65,65,60,65,71,68,74,79,68,64,66,64,62,60,60,58,56,56,63,66,67,77,96,113,125,120,117,125,118,113,113,115,122,123,143,145,113,125,131,111,93,71,60,51,47,52,49,47,45,40,42,34,34,36,33,35,90,117,92,63,49,48,46,49,53,54,35,23,22,26,25,24,29,24,22,24,26,27,27,26,24,25,25,19,22,27,66,125,134,130,126,132,130,128,137,118,104,83,121,101,33,33,26,11,82,137,103,54,54,84,98,121,99,78,65,46,49,61,44,37,103,108,99,128,165,214,234,224,215,216,217,219,217,215,214,213,214,212,215,214,214,214,214,214,213,215,215,214,214,213,216,214,213,213,213,217,216,218,214,217,117,5,1,6,10,11,10,13,12,13,13,13,13,202,205,203,201,203,201,200,200,199,198,198,199,200,200,198,200,200,199,200,199,196,199,198,198,200,196,196,196,198,197,198,197,197,199,199,198,196,197,199,198,199,197,201,201,200,197,201,200,196,200,200,200,199,201,200,200,200,196,199,199,198,199,200,198,199,201,201,202,200,199,201,201,199,198,198,200,198,200,199,198,199,200,201,200,202,199,200,200,198,198,199,200,199,199,200,200,199,200,201,201,199,200,202,197,199,201,203,201,204,203,202,205,201,202,205,203,204,201,203,203,203,205,204,205,204,202,202,203,202,202,204,204,203,205,201,203,203,200,204,203,205,201,206,203,201,206,203,205,202,203,206,205,205,205,203,203,206,203,203,206,206,206,205,207,205,205,205,203,205,204,206,205,205,204,203,204,202,205,207,205,206,204,207,207,207,206,206,207,204,207,207,206,207,207,208,207,206,205,207,207,205,208,208,206,205,206,208,207,208,208,205,208,207,209,206,208,210,207,210,210,208,209,208,206,209,208,207,207,207,205,205,207,205,206,205,203,206,207,206,206,206,206,204,206,204,202,206,204,205,205,202,204,203,202,203,202,205,205,206,206,203,202,205,203,199,200,203,205,205,204,203,206,209,207,200,201,206,206,208,208,206,207,204,206,207,190,198,205,206,206,207,205,195,208,208,207,208,207,208,204,207,209,204,195,202,211,208,209,210,205,209,207,217,201,162,213,227,181,163,154,150,118,95,127,147,163,164,198,240,179,97,44,22,36,142,240,208,207,189,210,218,220,193,189,225,179,199,184,170,175,170,183,107,146,196,139,108,84,91,98,67,46,63,177,218,193,223,190,159,210,202,205,206,214,145,140,211,221,179,90,81,65,83,54,22,33,10,17,16,22,28,21,23,24,18,22,26,17,22,21,15,14,19,16,16,22,20,30,42,42,42,52,69,68,57,71,63,45,68,110,133,146,141,158,139,117,130,106,108,95,81,103,119,130,108,110,92,67,81,75,100,102,81,53,38,43,37,35,27,25,40,55,83,134,124,73,39,18,20,15,16,11,31,110,90,92,121,66,56,54,61,89,91,100,76,35,42,54,63,72,77,87,125,133,156,162,180,190,200,208,198,194,188,186,162,168,181,152,156,132,138,128,116,115,103,83,74,71,127,72,65,65,62,57,67,64,69,70,70,76,65,70,71,70,70,71,77,72,63,56,62,60,55,61,61,59,57,51,45,43,47,49,48,49,41,50,38,44,49,45,48,50,53,53,58,55,61,58,53,59,58,57,51,53,57,59,57,56,57,54,63,61,59,57,55,63,65,57,59,67,72,85,99,96,156,167,168,172,155,142,139,139,137,133,113,95,83,67,62,53,53,45,40,96,141,146,109,93,80,66,74,74,65,37,23,19,25,29,31,32,29,29,23,30,28,24,27,27,24,24,24,25,29,70,107,87,62,53,54,53,53,55,66,96,95,111,92,41,27,19,20,71,139,113,76,45,41,51,68,68,61,53,46,61,71,69,59,73,55,34,61,87,154,213,219,220,218,217,217,213,216,214,214,214,213,212,210,211,212,213,214,212,215,212,212,213,212,214,213,213,213,214,216,214,217,215,217,120,6,1,7,10,11,10,13,12,12,14,13,13,200,200,201,200,200,199,200,198,198,200,199,200,199,199,199,198,201,198,195,196,197,198,196,198,197,194,197,195,198,198,196,200,197,198,198,195,197,196,198,195,198,200,198,200,198,199,200,199,199,199,197,197,196,199,201,198,200,196,198,201,200,200,199,199,200,200,200,200,200,199,199,200,198,199,198,197,199,200,200,200,199,199,199,199,199,200,199,199,199,199,200,198,200,200,201,200,202,200,199,204,200,200,201,202,202,200,200,201,201,201,202,200,202,203,202,202,202,203,203,203,203,205,202,203,201,202,206,204,204,202,206,204,204,205,202,203,204,203,204,205,205,203,203,204,204,201,204,202,203,204,203,203,205,204,203,206,203,203,205,205,206,205,207,205,205,205,202,204,207,206,205,207,204,205,205,203,205,206,205,205,205,205,208,206,206,207,206,206,205,205,207,203,205,205,203,205,205,207,207,205,205,208,205,207,208,205,209,207,206,208,204,207,208,208,208,207,206,207,208,204,207,208,205,208,207,207,207,206,207,206,206,205,206,205,206,207,208,206,205,205,204,204,203,204,203,203,203,202,206,203,202,206,203,201,203,203,205,205,203,203,201,203,207,203,195,203,206,205,207,205,205,203,205,204,198,203,207,204,204,206,205,206,207,205,208,191,194,205,205,206,204,203,195,204,206,206,207,206,207,207,206,206,207,194,198,206,206,206,206,205,208,207,216,193,166,227,217,141,116,129,153,111,102,157,190,206,204,237,252,192,103,57,40,48,135,246,241,222,216,241,246,243,163,139,158,126,141,160,202,185,191,195,87,100,177,153,108,89,86,80,59,41,57,129,177,192,221,182,163,208,200,202,212,203,124,162,213,217,193,96,75,51,44,39,16,29,18,17,25,16,27,22,21,24,23,23,21,26,25,19,20,15,18,18,23,23,23,41,39,42,61,66,59,63,115,145,131,133,145,150,156,149,137,137,128,113,100,72,71,71,72,79,87,107,110,108,63,52,50,24,55,67,76,59,32,32,22,25,27,40,63,71,79,67,42,27,18,19,19,10,16,12,53,116,87,105,116,77,101,119,155,179,181,193,167,160,189,202,210,213,203,203,197,190,186,185,184,179,169,151,136,117,103,88,66,70,68,58,53,57,59,54,56,59,63,61,65,72,77,74,76,87,87,86,81,78,67,64,60,57,55,48,47,40,40,36,31,36,31,26,28,21,27,25,25,22,20,28,21,19,24,20,18,19,19,24,16,16,21,18,23,24,23,23,19,24,28,26,29,31,29,34,33,35,37,38,42,44,49,55,58,61,56,62,67,66,71,67,63,63,62,60,64,65,64,72,80,92,105,120,128,142,151,149,159,157,163,158,148,143,136,120,122,159,170,178,163,151,155,154,150,140,97,36,27,24,29,34,27,37,38,36,32,24,30,23,31,27,24,28,23,28,33,80,99,68,56,43,51,45,47,41,35,76,92,112,92,52,29,15,20,55,98,113,93,58,31,24,33,42,59,49,53,63,67,65,66,67,55,38,48,48,85,200,235,236,232,219,216,214,216,215,214,212,212,210,212,214,212,211,212,211,213,212,209,212,211,212,213,212,210,211,215,215,217,211,216,119,4,1,6,10,11,10,13,12,13,13,14,14,196,201,199,198,202,198,198,200,200,198,201,199,199,198,197,200,199,197,197,197,196,198,198,196,198,196,198,198,197,197,196,196,196,196,196,196,197,198,198,196,198,198,199,197,198,197,199,200,199,200,198,199,198,199,198,198,201,198,199,200,200,199,200,200,198,199,198,200,200,199,199,198,200,198,199,200,199,200,199,201,198,199,199,198,199,199,201,199,199,203,201,201,199,200,201,199,199,201,200,200,201,199,201,200,201,200,200,199,201,200,201,205,202,201,203,204,205,203,204,202,202,203,202,201,202,205,204,204,202,201,204,202,204,204,202,205,204,202,204,202,203,201,206,202,201,205,201,205,203,204,204,203,203,204,204,203,204,203,204,203,202,205,205,205,202,205,206,206,206,205,206,204,203,203,204,203,205,207,206,207,205,205,205,205,206,205,205,204,205,204,204,206,205,206,205,204,204,204,205,206,205,204,205,206,209,208,207,208,207,206,206,205,204,207,206,208,208,205,208,205,205,203,205,204,205,209,205,207,205,205,205,205,205,205,207,203,205,204,203,205,205,205,203,203,203,202,203,201,201,205,203,202,201,202,205,203,203,204,202,202,200,201,206,199,199,205,201,206,206,204,204,204,205,199,199,205,204,205,204,205,203,204,206,206,210,195,191,204,206,206,206,206,196,203,207,205,208,204,206,204,205,206,208,199,194,205,209,205,205,208,206,206,216,185,181,236,211,110,89,145,173,123,124,162,178,183,177,203,191,148,93,89,105,65,98,192,192,160,162,193,202,211,117,69,71,45,103,193,232,193,178,156,86,107,206,164,103,97,87,87,65,68,56,96,182,204,221,172,161,209,200,201,220,187,122,186,213,220,214,122,83,39,17,17,15,30,17,28,29,23,23,23,25,23,23,23,22,25,22,19,19,19,21,17,28,24,36,42,45,65,78,71,63,127,174,180,175,160,134,122,128,122,98,94,91,86,79,72,86,74,66,63,62,72,84,84,54,47,56,41,79,58,69,46,23,23,23,27,50,64,57,42,29,26,19,16,17,15,15,15,23,11,74,116,87,107,112,137,180,185,195,194,187,141,152,156,186,174,167,140,126,112,93,119,61,53,57,53,47,55,53,51,53,51,45,48,47,47,59,48,54,63,56,61,59,56,58,52,54,50,47,45,39,41,32,31,30,24,26,21,22,22,18,21,21,18,15,19,19,16,15,14,16,16,17,21,19,21,23,24,21,21,23,20,19,21,17,15,16,16,21,21,20,18,20,19,18,16,19,19,15,19,19,20,19,21,25,26,31,29,30,33,31,39,39,39,48,50,53,53,54,62,63,64,63,69,68,69,71,68,68,68,71,71,77,88,106,120,124,134,142,145,145,146,132,127,127,150,160,154,162,164,136,84,69,62,54,57,53,50,52,52,46,44,37,36,37,34,36,31,30,29,36,83,101,78,66,57,57,55,61,44,40,101,100,113,111,50,28,18,15,30,64,98,104,77,53,33,27,41,49,52,63,57,60,61,57,64,67,50,64,68,111,227,244,244,250,236,232,228,223,217,214,212,212,212,213,212,211,213,214,211,212,212,211,213,212,214,215,212,213,214,215,217,218,214,215,118,6,1,6,10,12,10,13,13,13,13,14,14,200,200,200,199,200,196,200,199,198,199,196,199,198,195,196,197,198,197,195,198,196,194,198,198,197,195,198,198,197,197,196,195,196,198,196,196,198,195,198,198,199,198,196,198,198,199,199,198,196,200,199,199,201,200,200,198,199,199,198,199,198,198,200,199,200,197,198,200,199,199,196,201,199,200,202,198,200,198,200,200,197,199,201,201,199,199,200,200,198,198,201,201,200,200,200,200,199,198,201,202,200,201,198,198,198,201,198,199,200,199,201,199,201,200,201,204,201,201,201,204,205,204,202,203,205,201,200,201,203,203,201,201,202,203,204,200,201,203,201,203,203,202,202,203,203,203,203,202,202,201,202,202,203,203,203,203,202,203,204,203,205,205,205,203,201,204,204,204,205,206,206,205,205,206,204,202,205,207,205,206,206,203,206,203,204,205,203,203,203,203,204,204,206,205,203,207,203,204,204,203,206,205,204,204,206,203,204,206,206,206,204,206,207,205,205,206,206,205,206,206,205,206,204,205,205,204,203,203,202,203,203,202,206,204,203,204,203,204,203,205,203,205,205,202,202,203,203,203,206,202,200,203,202,201,203,202,204,205,203,204,201,202,203,197,197,203,201,202,201,202,205,204,200,196,202,205,204,204,204,207,204,204,206,202,209,200,189,200,204,205,203,205,193,202,208,206,207,205,204,204,203,206,208,202,194,198,208,206,205,205,200,205,213,181,193,217,162,92,101,132,130,106,120,131,133,129,125,124,95,101,66,92,139,84,41,87,87,39,36,33,60,98,64,37,22,17,77,171,203,177,146,148,105,155,244,146,88,90,94,97,76,67,36,123,226,216,222,166,166,210,198,209,222,158,131,210,208,230,213,108,60,30,29,18,12,24,34,49,50,37,28,22,28,28,19,22,23,19,18,22,22,18,20,23,25,36,42,44,61,76,93,69,87,141,162,175,160,143,102,92,115,89,54,32,39,45,50,66,63,62,56,55,52,55,59,71,61,56,47,45,134,132,69,43,21,29,35,45,44,38,31,18,19,17,18,17,20,16,15,20,17,22,98,111,90,113,102,113,118,95,93,85,67,66,59,55,56,51,50,50,51,53,53,55,52,48,51,54,49,48,55,48,44,43,41,38,34,34,34,36,31,39,35,29,28,27,29,23,19,21,21,19,16,18,17,15,16,16,19,15,17,17,19,23,21,21,19,18,19,15,18,17,19,25,27,17,23,24,19,26,22,27,29,28,24,19,19,15,18,18,24,23,30,41,46,45,33,26,17,17,17,17,17,21,27,23,25,22,26,23,20,29,21,22,23,20,24,20,27,26,28,32,33,39,43,48,48,49,60,65,69,70,67,67,66,62,65,69,68,68,67,70,71,72,70,68,68,76,92,93,99,118,120,118,139,145,153,151,131,139,139,129,122,102,92,87,82,73,71,68,59,49,50,96,141,149,132,110,110,102,101,96,111,130,110,121,112,63,26,20,15,21,33,66,101,98,80,61,48,46,43,47,54,60,66,68,62,64,76,64,103,103,129,217,217,231,246,246,249,245,241,234,229,227,223,220,218,213,212,212,214,210,212,215,211,213,214,214,214,214,212,213,215,214,216,214,217,117,5,0,6,10,12,10,13,12,13,13,14,13,198,203,200,200,199,199,200,200,201,198,199,198,199,199,197,198,198,197,197,198,199,197,197,198,199,197,194,196,198,196,194,196,196,197,199,200,198,197,196,197,198,198,200,199,198,197,196,199,198,199,200,201,199,199,202,200,200,199,200,199,199,198,197,198,196,200,199,197,199,198,200,200,200,199,201,200,200,200,199,199,199,200,200,201,200,199,199,199,200,200,199,201,200,201,202,200,201,203,202,203,202,202,200,199,203,200,201,201,201,201,199,202,200,201,201,199,200,200,203,202,202,202,203,204,200,202,204,202,203,201,202,203,202,202,200,201,201,202,203,201,202,202,203,203,204,203,202,204,203,203,202,203,203,202,202,204,204,203,205,204,204,204,203,203,204,204,202,204,206,207,208,205,206,206,205,203,204,206,206,208,204,204,204,205,205,204,205,203,205,203,204,204,201,205,206,205,204,202,203,206,204,204,204,204,205,202,203,204,204,207,207,205,206,205,204,207,207,206,205,205,206,205,204,205,203,202,200,203,203,203,203,203,204,203,204,201,203,203,204,203,200,201,201,203,202,202,204,203,201,202,203,202,203,201,203,203,201,205,205,205,203,204,203,194,198,205,202,202,203,205,204,204,200,197,203,204,203,204,203,203,202,204,203,202,207,202,190,198,205,203,200,204,195,199,206,203,205,204,206,204,206,205,206,207,194,194,204,206,206,206,203,208,204,171,194,148,84,84,103,103,82,91,127,127,111,98,116,113,72,90,78,87,117,88,64,120,125,79,53,13,6,57,69,61,42,31,38,61,118,165,183,189,145,193,239,117,103,113,94,103,73,62,41,145,243,218,217,164,174,214,199,214,217,128,143,213,208,234,190,100,49,36,40,62,56,36,47,64,45,64,99,44,19,22,19,23,22,22,24,21,17,21,19,27,38,42,39,56,79,105,147,135,116,117,127,138,130,121,87,56,73,71,54,40,33,31,31,42,43,42,44,45,44,47,50,62,66,57,97,159,214,203,134,66,35,24,21,21,19,19,19,17,16,17,19,19,16,19,14,23,16,39,118,108,109,127,77,62,62,52,52,51,51,59,54,54,49,47,50,44,48,48,48,44,39,44,41,36,33,31,33,33,25,30,24,19,21,18,22,19,17,17,22,19,15,19,19,22,21,23,21,25,22,18,20,17,17,15,22,17,21,23,27,31,33,34,29,26,18,15,20,28,29,29,23,19,19,18,19,24,22,21,24,32,33,27,19,19,20,16,21,35,69,88,100,97,75,36,15,16,19,23,25,30,20,18,21,15,24,17,19,22,24,30,22,19,19,17,20,18,18,20,21,25,21,24,25,27,28,28,37,36,44,46,43,50,51,49,57,66,63,65,68,70,67,67,67,67,63,62,67,67,69,79,103,116,130,143,146,152,153,167,162,163,170,182,188,178,133,172,165,142,136,149,168,184,173,166,162,169,172,157,153,153,120,122,128,71,30,16,11,16,20,40,66,96,112,95,78,69,51,45,48,57,72,64,66,63,74,89,112,102,84,108,99,129,185,219,239,243,247,248,249,248,243,241,231,227,222,217,214,211,212,209,208,210,212,213,211,211,212,210,214,211,212,213,216,120,5,1,7,10,11,10,13,12,12,14,14,13,200,200,200,200,200,196,199,199,199,201,198,200,198,198,199,197,198,198,195,199,199,196,196,195,199,198,197,198,196,197,197,198,200,199,199,199,200,199,199,200,200,201,201,200,201,199,199,201,200,202,199,199,200,200,200,201,200,199,199,201,202,202,202,199,201,200,199,201,199,198,198,200,200,199,200,198,200,200,200,202,202,204,201,200,201,202,201,201,203,203,203,203,204,201,202,203,201,203,203,203,203,203,201,203,202,201,199,199,199,200,201,201,202,199,200,201,202,204,203,202,202,201,203,203,201,204,203,204,204,201,201,202,203,201,202,201,201,203,202,200,203,203,203,200,201,202,200,201,203,205,205,203,204,203,205,204,203,203,202,203,205,202,202,203,203,203,203,203,206,204,206,206,206,204,205,207,205,207,206,202,205,203,203,201,200,204,203,204,204,203,205,203,203,205,204,205,203,203,206,204,204,205,203,203,206,201,204,206,203,205,203,205,205,204,205,204,205,204,205,203,205,204,201,204,203,202,202,202,203,204,204,201,204,202,201,204,203,202,200,203,201,200,203,202,203,203,202,200,201,201,202,204,201,200,203,200,201,202,203,207,204,203,199,196,201,203,202,203,202,202,203,201,196,202,205,202,203,203,204,203,201,201,202,200,205,204,190,196,207,202,202,205,193,199,205,202,204,203,204,205,204,203,204,207,202,191,200,206,205,206,203,209,201,170,198,102,16,35,68,98,80,107,147,130,116,103,136,119,73,115,88,78,124,99,96,193,190,137,110,44,36,71,93,97,76,62,64,49,84,155,207,210,157,202,210,117,126,124,116,108,81,73,42,153,238,210,214,157,179,214,193,226,195,101,157,214,213,231,157,78,53,31,127,213,116,49,53,37,69,214,210,35,8,24,16,26,22,19,15,21,23,27,28,38,46,44,53,75,89,114,159,165,155,114,95,97,101,107,66,23,31,51,54,54,42,40,39,34,31,27,35,36,38,34,29,46,38,89,162,178,190,146,95,42,12,15,8,13,14,13,21,21,29,36,39,44,24,20,18,22,18,56,135,107,122,125,58,49,51,47,47,44,37,42,40,35,35,32,31,28,30,27,26,23,24,22,18,17,17,17,16,17,19,18,14,19,17,18,17,17,16,16,16,20,26,21,22,21,24,24,21,28,29,33,27,16,22,17,15,21,19,29,46,64,76,67,58,41,21,14,26,34,17,20,33,27,14,18,19,17,29,35,22,17,29,41,37,19,22,14,31,89,105,120,146,128,83,41,20,17,24,32,27,18,21,21,15,19,17,20,22,20,24,34,38,27,22,16,16,16,20,21,22,22,23,21,16,22,17,24,18,19,23,22,26,23,30,29,27,32,41,42,47,50,53,61,66,66,69,70,69,66,70,74,69,69,77,73,71,81,77,84,97,99,113,129,136,147,162,167,161,150,145,135,129,129,132,141,151,153,155,158,159,132,116,129,130,84,28,20,14,15,17,27,41,59,106,139,132,99,69,59,48,36,46,52,53,65,69,63,78,71,65,53,20,25,67,114,144,172,200,225,234,237,245,246,247,244,240,231,222,217,215,212,209,212,211,211,209,209,212,210,212,213,215,212,215,118,5,0,7,10,11,11,14,12,12,14,14,14,199,200,199,198,196,194,196,196,198,197,197,197,195,197,196,196,197,198,195,194,195,196,199,197,196,198,196,197,199,195,200,199,195,200,199,201,201,203,201,198,198,199,200,199,199,199,202,203,200,202,201,202,200,200,202,200,201,200,202,202,202,202,203,203,201,200,199,200,199,198,198,198,199,196,199,198,199,202,199,200,200,203,200,200,202,202,203,204,203,204,205,204,201,203,204,202,203,201,202,201,202,202,201,202,203,200,201,200,200,200,197,200,200,202,200,199,200,200,201,201,203,200,201,203,201,201,204,202,202,201,202,203,200,203,201,201,201,199,201,202,201,200,200,202,203,202,202,202,201,203,203,202,201,200,202,201,201,202,201,202,205,203,202,202,201,202,202,201,203,200,204,205,204,204,203,205,203,203,203,201,203,204,201,201,202,201,205,201,200,203,202,202,202,203,200,201,203,201,202,203,203,200,199,201,202,202,206,203,201,203,203,202,202,203,204,205,202,202,203,203,203,202,204,201,202,203,204,202,201,203,199,200,203,201,202,200,201,202,200,202,201,203,204,202,202,200,201,201,200,200,201,201,201,200,198,199,200,200,200,200,202,203,193,194,201,201,201,200,200,200,200,196,195,200,203,202,200,202,200,201,200,202,199,201,203,204,191,191,201,200,200,203,193,198,206,203,206,203,203,202,201,203,203,204,203,193,193,203,205,205,200,210,195,175,214,108,23,43,77,110,96,116,136,132,110,90,92,76,64,64,57,61,119,95,39,108,135,105,94,66,57,45,45,74,76,80,84,81,112,150,194,151,118,212,202,116,118,90,99,118,71,62,36,138,225,205,207,151,182,212,193,231,160,77,183,221,226,217,113,70,42,50,174,244,101,63,42,33,163,249,243,57,2,12,12,17,17,24,17,16,25,30,35,51,46,49,74,79,93,56,61,105,142,128,79,81,86,92,63,41,31,43,53,50,50,50,51,43,34,28,29,31,29,26,34,49,85,136,154,134,77,43,21,15,9,13,14,13,16,29,33,44,58,53,59,46,24,24,16,23,16,81,136,98,126,122,56,27,21,19,22,20,23,21,18,25,21,18,22,19,19,21,22,18,16,15,17,17,15,14,18,18,18,23,17,19,17,21,18,16,20,19,29,28,20,24,22,22,21,20,22,16,26,37,37,28,25,20,20,19,24,56,99,118,109,99,87,58,25,23,33,22,15,26,30,23,20,17,18,20,29,30,21,19,19,32,47,29,19,14,55,131,158,132,108,95,78,61,28,20,33,25,15,24,24,17,17,16,16,18,23,23,16,19,33,36,31,18,17,15,17,28,27,26,32,29,21,24,19,19,16,16,20,22,23,25,26,26,26,20,25,29,25,27,30,30,31,36,40,39,46,48,53,55,62,69,79,84,87,92,82,81,79,77,71,65,66,59,107,64,67,69,70,66,69,72,68,101,105,91,91,99,101,74,86,120,130,85,38,17,14,17,17,20,18,31,56,89,104,104,102,87,64,43,36,42,48,54,56,54,57,57,61,59,47,46,41,44,60,76,105,132,143,156,177,203,213,233,245,246,241,227,220,217,216,213,213,214,212,211,212,214,215,212,216,217,219,119,6,1,7,11,12,11,14,13,15,15,15,15,198,201,198,200,201,196,198,197,197,196,195,197,196,197,198,195,193,196,198,197,198,196,198,197,198,196,194,199,194,196,198,196,199,199,203,203,202,203,200,200,200,201,200,198,200,200,200,200,200,204,202,201,202,203,203,205,204,203,204,204,205,204,203,203,201,202,200,201,199,199,200,199,198,198,200,200,203,202,203,201,201,201,202,201,199,200,203,205,202,203,202,205,205,202,202,201,200,204,202,200,200,202,203,201,201,202,202,203,202,201,199,200,200,198,201,199,200,199,200,202,201,200,200,202,201,202,199,202,202,201,200,200,203,201,202,202,203,202,199,200,200,200,200,200,202,202,201,201,204,203,203,202,201,203,202,203,201,202,203,202,203,200,202,203,201,200,202,200,203,203,203,202,204,203,204,205,200,203,204,203,204,203,201,202,201,203,200,203,202,200,203,202,202,201,199,202,203,200,202,202,202,202,201,200,201,199,200,203,201,201,202,203,201,203,202,202,201,200,200,199,202,202,200,201,200,202,201,200,200,200,202,197,200,201,200,201,200,200,202,201,199,199,200,200,199,201,200,201,202,199,200,200,201,200,201,202,201,199,198,200,203,201,193,196,202,200,201,200,200,201,198,194,197,202,201,201,202,202,199,201,200,200,199,200,201,203,195,185,198,202,200,200,191,196,203,202,203,204,204,202,202,202,201,201,203,194,190,199,203,202,199,207,189,179,231,153,76,69,91,122,96,101,109,110,69,20,28,28,30,41,36,30,105,88,13,48,78,127,87,35,55,14,36,67,63,48,29,59,107,137,166,104,121,221,193,118,81,66,83,91,74,66,34,149,227,206,204,150,191,208,193,231,109,81,211,221,230,148,69,69,46,35,127,138,69,62,34,128,217,246,231,91,49,15,11,25,17,17,21,25,34,35,49,49,51,72,80,94,95,59,46,63,115,135,93,59,62,77,82,73,47,39,51,53,44,46,51,43,45,40,34,32,28,33,71,135,159,173,125,50,33,13,12,14,11,14,15,31,40,51,53,53,55,59,53,34,21,18,16,22,19,103,133,98,138,116,45,14,11,14,14,14,17,15,15,17,21,22,21,20,24,24,19,20,24,22,18,15,16,17,15,21,21,24,23,22,24,24,20,19,16,33,36,18,19,24,26,17,16,21,21,19,26,22,29,41,35,23,18,19,31,110,155,156,136,116,111,69,46,30,27,22,22,26,14,17,25,17,22,29,19,17,25,23,17,31,48,40,29,14,68,160,157,140,109,104,112,66,37,29,23,14,18,26,19,21,18,16,18,18,24,23,18,18,21,40,44,25,20,18,24,42,49,59,63,51,46,33,17,18,16,23,27,29,26,19,22,30,23,24,28,25,25,20,21,19,23,21,18,22,23,26,27,23,30,37,43,44,44,55,56,59,58,57,64,61,61,63,64,64,71,66,68,75,72,75,69,73,71,66,60,61,63,49,90,109,119,95,37,25,15,15,16,19,22,17,23,39,60,89,119,115,87,59,45,37,36,35,43,40,50,48,49,52,43,57,66,67,65,61,59,63,64,73,81,102,122,155,197,223,246,246,235,227,220,216,216,218,216,217,215,217,219,218,222,223,222,117,6,1,7,11,13,12,14,13,15,15,15,15,194,198,199,199,199,196,196,196,196,196,197,198,198,196,194,195,195,197,196,195,198,199,198,196,197,198,197,196,198,195,196,199,200,201,201,205,207,205,204,203,202,204,203,202,201,201,200,201,203,202,204,204,201,203,205,202,205,207,205,204,205,204,205,203,203,205,200,200,203,202,200,198,199,201,205,205,206,207,202,204,203,204,205,203,205,203,203,202,200,202,202,203,200,201,200,202,204,202,204,203,203,203,203,205,205,201,204,203,203,203,200,201,200,199,199,199,199,202,200,199,201,199,200,200,200,203,203,203,202,200,202,200,202,204,199,199,202,202,201,200,200,200,201,200,200,202,200,202,200,201,202,203,202,201,201,202,203,201,201,202,202,201,201,201,202,199,202,202,201,202,200,202,203,203,203,202,201,200,200,202,201,201,200,201,200,200,202,201,201,201,199,201,201,203,202,200,203,203,199,201,203,202,201,202,202,201,201,203,201,201,202,200,203,204,204,202,199,200,200,200,200,200,201,198,200,198,200,200,199,201,198,199,200,199,199,200,200,200,199,201,198,197,199,200,199,198,199,198,199,198,199,200,200,199,201,200,199,200,201,199,203,200,193,200,201,199,198,199,201,202,196,196,201,202,199,199,202,200,199,201,200,200,199,199,199,204,195,186,195,200,198,199,194,195,203,201,202,201,203,201,199,200,199,199,201,198,189,194,200,201,200,208,181,184,233,180,125,69,70,103,76,89,81,76,46,19,26,18,21,27,46,32,112,104,49,89,125,176,110,55,38,12,55,60,57,27,11,42,83,154,189,112,140,188,159,134,87,76,77,81,74,68,38,163,235,211,201,147,199,205,206,220,76,106,224,227,170,69,49,35,14,27,32,36,31,55,143,220,237,205,169,197,224,198,115,21,14,14,23,31,36,45,43,56,74,81,80,76,85,78,91,94,96,140,97,62,46,51,74,74,60,32,35,41,54,51,41,46,45,42,35,37,48,103,159,182,160,89,48,33,19,13,13,12,18,27,45,55,54,57,54,60,60,54,52,28,17,17,15,20,35,129,133,103,141,103,36,10,10,14,16,14,16,16,20,24,21,18,24,21,17,23,23,17,23,35,31,23,19,15,19,20,33,36,29,30,27,27,22,18,29,36,23,19,27,24,22,18,20,19,24,27,24,22,16,31,55,36,19,18,50,144,170,174,120,101,117,72,56,35,25,31,22,19,14,19,21,30,35,19,16,14,19,26,33,31,33,52,32,24,65,113,105,111,108,73,73,48,39,32,19,19,21,16,14,19,24,20,24,19,18,19,21,23,16,33,50,35,22,17,49,92,106,108,97,84,71,46,24,15,26,32,28,19,22,23,18,20,25,21,19,24,30,29,28,25,17,19,17,19,24,20,20,22,22,19,22,20,23,27,31,30,27,34,32,37,38,38,44,42,46,48,54,57,58,69,61,65,70,72,68,67,70,77,113,106,105,96,49,21,16,18,17,18,20,20,22,20,24,40,51,69,73,66,59,50,43,31,31,35,42,45,41,41,37,45,60,72,88,84,81,66,60,60,63,63,54,64,97,150,212,233,245,241,230,218,217,218,220,222,220,220,222,224,225,224,225,119,6,1,8,11,12,12,14,12,14,15,14,14,193,199,195,198,197,194,196,195,196,196,196,197,195,194,195,194,196,194,193,195,195,194,198,195,196,198,197,200,195,195,198,200,201,199,204,203,205,205,203,204,203,203,205,203,201,203,204,203,205,206,205,205,204,203,204,203,204,205,207,208,206,208,206,206,206,206,205,206,204,204,205,203,203,203,204,205,206,203,205,205,203,205,203,202,206,206,206,204,203,205,202,203,201,202,201,203,202,203,202,202,204,204,204,202,203,203,202,200,201,200,198,199,200,199,198,199,201,200,199,201,199,198,201,201,200,201,198,201,200,201,202,200,201,200,203,201,199,201,201,200,199,202,200,199,201,199,201,201,202,201,202,201,199,200,198,200,201,199,202,202,202,203,200,201,200,201,202,200,201,200,201,202,204,201,199,201,200,200,201,199,199,201,200,202,201,201,200,203,201,201,201,197,202,200,200,202,198,199,200,201,199,201,201,202,201,200,201,199,201,202,202,200,200,203,199,201,204,200,200,200,203,201,200,201,200,201,200,201,200,199,201,202,200,196,198,201,199,197,199,199,199,200,198,199,198,199,199,200,199,195,200,199,196,198,199,198,197,199,199,198,201,200,198,199,198,200,200,199,200,197,191,197,200,200,199,201,199,202,199,199,200,197,199,198,199,200,198,185,193,201,200,198,191,193,200,203,200,199,198,198,199,199,198,200,202,198,193,189,200,200,203,205,173,190,225,200,169,84,59,73,61,70,70,54,13,26,27,24,35,29,54,32,97,89,30,79,145,184,102,84,50,25,63,50,29,18,93,159,157,186,193,130,139,142,167,156,102,104,75,72,78,60,29,159,230,208,191,143,205,200,219,203,63,156,241,169,77,9,7,9,27,27,39,23,23,135,221,248,212,125,171,252,252,252,215,51,4,30,43,54,57,43,45,65,93,84,82,88,83,86,100,99,92,122,100,66,51,40,55,62,55,30,21,24,39,60,51,40,27,27,32,55,127,158,183,136,47,24,15,17,16,11,17,24,44,53,54,58,59,59,62,61,60,62,51,21,20,16,25,16,47,146,121,115,137,83,33,13,12,16,22,20,17,20,30,22,22,31,21,16,19,18,18,24,27,25,32,32,23,18,15,37,45,50,57,52,53,51,31,27,40,24,17,26,21,17,21,21,17,20,23,19,25,25,23,20,39,49,32,16,73,139,129,117,104,99,88,61,53,35,29,31,24,19,19,16,23,34,32,20,19,19,17,27,35,29,22,49,40,28,63,82,81,127,100,77,62,31,49,24,23,25,13,19,17,19,23,32,24,15,19,15,21,32,24,23,40,44,27,30,111,168,166,157,117,104,91,53,26,24,27,20,22,26,24,19,18,18,17,27,32,21,23,27,37,27,21,20,16,21,26,29,27,22,24,22,17,17,20,22,15,21,20,24,27,26,25,24,29,24,26,24,23,29,29,34,29,35,36,39,39,44,41,52,107,97,106,106,46,28,19,19,22,22,31,25,21,19,21,20,22,29,42,48,52,55,44,41,36,31,41,39,33,33,39,42,48,64,72,76,74,76,64,63,57,54,56,51,51,66,127,170,205,236,227,219,221,223,225,225,222,221,224,227,227,224,224,118,6,2,8,11,12,12,15,14,15,16,15,15,196,199,196,197,197,194,198,197,196,196,195,195,197,194,193,196,192,197,196,195,196,196,199,197,198,199,200,198,199,198,198,198,200,203,201,201,203,203,203,204,203,204,204,205,203,203,203,205,205,202,204,205,203,205,207,206,203,202,203,204,208,208,208,206,208,209,205,205,208,207,208,211,205,203,203,204,202,205,204,204,205,201,204,203,203,204,206,205,205,201,204,205,200,203,200,201,203,200,202,202,201,203,203,200,202,202,203,201,201,203,200,201,200,199,199,198,199,199,198,199,200,200,201,201,201,199,196,200,199,199,200,202,202,201,200,199,200,197,198,199,200,199,199,200,200,200,198,201,199,198,202,203,199,200,199,200,199,200,201,200,201,200,200,201,201,198,200,200,198,201,200,199,204,199,200,201,200,202,199,199,200,201,201,203,201,201,200,200,202,200,199,199,200,199,200,199,200,201,198,200,200,200,199,200,199,199,202,199,199,200,202,200,198,199,200,199,198,199,197,200,198,198,201,196,198,199,199,201,199,201,199,197,200,200,199,200,199,197,197,196,199,198,198,202,198,198,198,199,200,198,198,198,198,197,196,199,197,198,199,200,201,194,200,200,199,200,200,200,199,194,192,200,198,200,199,198,200,195,199,198,196,196,196,196,196,200,198,185,186,200,201,198,190,189,200,199,197,198,196,196,198,199,199,198,199,200,195,187,196,201,202,199,173,199,217,198,194,132,99,81,66,77,72,41,4,23,36,62,40,35,60,38,67,69,12,45,137,146,73,57,19,33,66,55,52,69,180,220,201,212,184,129,143,165,207,171,103,122,90,77,78,50,25,158,224,205,187,147,208,196,224,159,63,213,202,74,24,24,24,44,33,36,43,29,117,211,242,242,145,130,210,249,249,249,163,71,75,105,124,108,62,41,49,87,93,66,109,101,96,95,97,97,88,116,95,66,49,68,79,62,63,35,34,24,27,38,44,39,29,39,66,118,154,154,87,32,21,17,15,13,14,23,43,55,63,59,56,61,62,63,63,56,52,41,27,15,18,19,24,12,70,149,107,114,132,71,25,16,15,17,21,16,21,27,26,22,29,25,18,15,14,17,21,22,28,26,17,35,33,23,15,36,64,71,92,102,106,79,34,33,36,18,29,27,17,17,20,22,27,28,19,18,17,20,30,28,27,47,46,20,61,111,81,78,85,104,89,41,44,39,22,18,24,34,21,24,34,17,21,24,23,23,22,22,19,29,35,55,47,31,55,81,89,111,133,98,52,33,36,26,16,26,24,16,20,22,26,25,19,22,17,16,26,24,24,16,37,57,25,64,156,165,173,156,98,112,92,63,46,27,23,14,24,27,21,18,17,20,18,34,28,16,21,22,38,35,26,22,17,21,24,27,28,27,27,23,21,18,21,20,19,25,26,30,26,28,31,24,25,25,23,24,23,17,25,21,20,23,21,24,21,26,24,43,98,96,108,106,58,31,19,14,32,46,50,55,41,27,28,19,15,19,22,20,26,30,39,37,34,39,37,50,45,33,37,39,45,49,59,59,54,56,59,55,55,59,61,67,51,30,50,69,118,199,217,219,224,221,225,223,224,225,225,224,225,225,224,118,6,2,8,12,13,12,15,14,15,15,16,15,198,198,197,198,196,196,196,196,196,198,195,196,195,193,195,194,197,194,195,198,195,195,200,199,199,200,198,199,196,197,198,196,199,200,203,201,202,203,203,204,204,203,204,200,200,204,202,200,201,203,204,203,203,204,203,205,203,203,202,205,205,207,207,208,208,207,208,207,207,208,211,207,206,205,203,206,205,205,205,203,204,205,206,203,205,205,204,205,203,204,204,203,203,203,204,205,201,204,206,203,201,201,202,203,203,202,202,199,202,203,201,204,200,198,199,198,199,199,198,200,198,198,202,199,201,200,199,200,199,198,201,200,200,200,199,201,200,201,200,198,201,199,198,201,200,199,200,200,201,200,200,201,200,200,199,202,201,198,200,200,199,201,199,199,200,200,199,196,199,199,200,201,202,200,199,200,200,201,202,202,202,201,202,201,201,201,199,200,197,200,202,198,200,198,199,200,199,200,201,200,198,198,198,200,200,198,201,199,200,199,200,199,198,198,196,199,198,198,198,198,199,197,198,198,199,200,197,196,196,197,197,196,198,197,198,196,198,196,199,198,196,198,195,199,197,198,196,197,198,194,200,200,199,199,198,199,197,198,200,199,193,194,198,199,198,198,198,196,195,193,196,199,198,198,198,198,198,196,196,199,196,195,200,195,196,195,198,186,181,198,199,197,189,190,199,199,196,198,196,197,198,196,196,196,200,197,199,187,190,198,206,191,170,204,198,174,158,134,134,135,146,133,145,112,61,83,88,124,83,60,91,38,78,66,4,53,147,130,112,150,77,49,57,116,185,184,223,206,181,163,154,141,133,188,215,121,94,123,95,99,74,38,24,158,226,209,178,150,218,200,232,131,67,204,98,29,6,73,187,117,82,53,36,125,227,251,213,157,159,196,246,225,214,155,76,97,119,130,142,104,62,80,102,83,65,69,110,101,90,98,92,102,100,108,89,75,70,65,65,56,53,37,32,28,19,25,22,33,36,96,148,146,143,71,27,21,12,16,11,14,27,48,60,57,61,59,59,61,68,71,66,62,44,29,22,19,17,21,29,12,95,154,99,122,122,57,25,15,16,21,21,22,30,31,27,26,21,16,17,22,16,19,27,21,21,27,22,23,39,35,19,69,102,99,104,101,81,51,46,38,28,24,23,21,17,24,19,27,36,22,18,18,16,22,30,32,22,40,49,22,48,81,80,80,117,149,81,36,42,26,15,15,17,28,41,36,20,22,16,19,33,34,23,15,16,23,34,71,57,24,48,68,53,87,102,66,35,33,37,17,15,15,24,26,22,27,19,14,22,24,21,27,26,14,22,24,40,66,35,104,158,143,139,114,105,89,76,59,37,26,15,20,21,17,20,20,23,26,23,21,27,23,26,22,29,44,31,23,17,26,34,39,41,37,32,31,31,26,19,25,30,32,25,22,25,21,27,25,20,26,29,36,27,19,23,18,21,23,18,21,17,21,24,55,112,103,118,112,61,37,15,13,30,56,78,78,69,57,43,33,23,19,19,19,21,19,19,30,29,35,43,61,61,46,38,36,41,45,46,50,50,47,51,51,57,60,56,50,51,37,50,36,57,163,202,219,227,221,224,225,227,226,226,225,226,225,224,117,6,2,7,12,13,12,15,13,15,15,16,15,197,199,195,198,197,196,196,197,197,196,198,194,196,194,195,198,197,198,197,197,196,198,200,200,200,200,197,198,201,197,196,197,198,202,201,202,204,201,203,200,202,205,202,203,198,202,204,203,203,198,201,204,204,204,203,204,204,205,205,206,207,207,209,208,209,203,203,205,205,205,204,206,203,207,207,206,207,207,205,208,203,205,207,204,206,203,205,203,202,203,202,204,202,205,203,201,204,201,203,203,204,205,204,201,201,200,200,201,199,201,201,202,200,200,199,198,198,199,201,200,198,199,199,196,199,198,196,200,200,200,199,198,200,199,202,201,199,201,199,198,199,198,198,198,200,203,200,199,200,200,200,200,199,200,199,198,199,200,198,198,199,199,198,200,200,199,200,200,200,199,200,198,200,197,199,199,199,201,199,200,200,199,198,200,200,200,198,197,198,199,200,200,197,198,200,198,199,199,198,198,198,198,198,199,199,197,198,198,199,199,197,198,197,199,198,195,199,198,196,199,198,198,198,197,199,199,194,198,195,196,199,196,197,197,198,198,198,197,199,199,200,198,195,199,197,199,198,195,196,192,197,196,197,199,195,198,196,196,197,198,196,192,201,198,196,198,195,196,191,192,196,199,196,196,196,195,194,193,198,193,194,194,195,196,195,197,198,190,179,194,198,198,187,184,197,197,196,194,196,196,193,194,194,195,198,196,196,189,185,191,205,184,173,203,181,150,132,138,174,179,199,193,208,199,126,105,178,207,177,115,125,59,132,94,5,84,166,137,177,247,153,56,57,170,232,221,228,168,140,124,169,146,116,178,174,92,83,123,96,101,73,32,24,156,226,208,175,160,226,217,248,141,70,115,30,5,43,129,237,160,110,34,99,225,251,242,149,172,217,252,250,189,117,66,77,114,107,117,88,53,89,136,100,65,57,77,90,86,97,85,79,107,101,89,89,83,59,64,50,36,42,32,30,26,14,21,26,20,52,146,157,145,112,49,17,12,15,13,15,19,51,66,59,59,59,62,63,62,64,71,74,83,74,56,38,23,22,22,26,27,128,145,100,127,107,56,30,31,41,42,33,29,37,33,29,18,15,17,22,24,23,29,24,13,20,26,29,33,37,41,28,104,149,134,113,74,85,66,49,44,18,14,17,26,25,21,31,26,19,27,23,20,27,27,19,29,25,45,53,32,21,60,87,94,152,125,53,38,40,27,22,19,16,35,37,32,24,17,18,22,39,39,24,16,17,18,40,92,72,28,47,62,32,43,55,41,27,35,35,15,16,16,16,33,36,17,18,18,19,29,35,27,19,14,19,20,50,78,43,107,157,111,96,101,93,103,67,36,37,19,29,23,12,19,23,20,33,30,21,20,15,27,33,31,28,39,44,28,16,39,54,64,77,71,66,70,51,24,29,35,29,28,28,22,23,22,22,27,33,33,28,35,33,26,23,19,23,18,21,22,18,22,23,75,128,110,111,107,71,31,19,17,23,50,79,92,85,79,71,53,43,36,21,17,18,17,17,26,19,27,41,49,64,69,58,44,40,37,37,51,56,54,54,53,50,51,48,49,54,45,67,53,65,165,203,221,231,223,224,225,225,223,222,228,228,223,225,118,6,2,8,12,12,12,15,14,15,16,15,16,196,197,196,197,196,198,197,195,197,194,196,199,196,198,199,198,201,198,198,200,198,200,203,203,203,200,202,202,199,201,201,201,197,199,201,200,201,202,202,202,203,200,201,202,202,203,202,200,205,204,201,202,201,202,204,205,203,203,204,203,206,208,206,206,205,206,205,205,206,204,207,206,205,207,205,205,207,207,206,205,206,205,205,203,204,205,204,203,203,203,201,203,207,204,200,201,201,204,203,204,204,203,203,201,204,201,201,202,200,201,200,200,200,199,201,201,198,199,199,199,198,200,202,197,199,196,199,203,200,200,201,199,199,200,199,200,199,199,200,198,200,201,200,199,199,196,199,198,199,200,197,200,198,201,199,201,202,196,199,198,199,198,197,199,199,198,199,200,200,198,198,196,199,199,197,200,198,198,198,198,198,198,199,198,199,199,198,199,196,198,199,196,199,199,198,198,198,199,199,198,199,198,198,198,198,197,197,196,199,199,198,198,198,199,198,200,198,199,198,198,200,198,200,199,199,196,198,198,196,200,198,199,199,198,200,198,200,198,196,198,198,197,198,199,198,199,197,199,195,195,198,195,197,198,196,198,196,199,201,198,190,195,199,200,198,199,198,193,192,195,196,197,197,198,195,193,195,194,194,195,193,193,195,192,196,194,196,191,179,192,197,200,186,181,194,194,195,194,194,194,194,195,193,195,194,192,196,193,185,188,204,174,179,202,177,175,159,177,186,183,198,172,189,204,159,82,139,243,208,134,117,67,141,121,50,129,181,128,174,207,113,101,122,171,189,177,155,129,152,149,198,136,98,182,171,111,99,92,84,102,70,42,24,155,225,212,178,166,234,198,201,152,69,44,4,7,26,143,143,87,72,69,189,250,252,189,159,223,252,252,249,129,43,63,108,130,118,76,41,77,133,134,83,61,69,112,92,60,105,117,99,104,90,80,105,105,93,88,71,59,53,33,29,25,16,21,24,37,97,144,126,84,54,28,15,15,10,15,15,32,60,63,63,61,63,59,69,66,46,41,39,60,78,89,81,46,18,22,17,46,147,134,98,128,92,41,29,66,100,78,41,32,37,31,29,19,13,18,19,27,31,27,18,15,16,23,41,34,37,41,39,134,150,134,128,99,106,67,53,41,16,13,15,19,29,38,27,23,13,19,26,27,37,22,16,21,25,49,71,36,23,60,73,73,61,44,37,36,46,28,23,24,34,33,20,31,30,26,30,35,27,22,31,23,17,37,45,94,77,25,60,54,26,33,33,27,27,37,39,20,15,16,24,24,23,27,18,14,21,32,32,21,18,16,24,23,68,93,40,78,116,101,87,83,153,126,46,39,29,26,24,21,16,16,22,27,28,25,19,21,18,19,33,31,23,30,47,34,19,55,84,117,127,124,135,98,45,28,36,24,26,30,28,22,21,21,23,27,34,32,24,27,33,41,28,18,29,26,26,27,27,28,22,83,127,102,103,103,77,42,20,16,26,54,76,93,89,80,83,76,67,57,41,27,20,21,22,21,21,22,25,33,56,76,74,65,48,33,43,54,55,50,46,39,42,47,61,71,65,54,61,43,89,204,223,226,228,222,223,223,225,222,224,226,228,223,223,118,5,2,9,13,12,12,15,14,14,15,15,15,198,198,197,199,195,197,195,198,199,195,198,198,199,197,198,197,198,199,199,200,200,202,203,203,203,204,205,202,201,200,203,200,200,203,203,202,203,201,202,202,202,203,200,200,199,200,198,197,201,199,200,202,199,200,203,204,203,203,201,205,207,205,207,206,207,206,208,208,207,206,205,207,207,207,205,203,208,206,204,208,203,205,206,203,206,201,203,206,203,203,204,203,203,205,204,204,205,204,205,204,202,203,203,203,203,203,202,202,203,201,203,202,198,203,200,199,200,201,200,199,201,199,198,198,200,200,198,198,199,200,198,201,200,198,200,199,198,199,198,198,199,195,198,200,198,199,199,198,197,198,196,197,198,196,198,196,198,199,198,198,197,200,198,197,198,197,199,199,198,197,198,195,198,195,196,195,193,197,194,197,197,197,196,198,201,198,199,197,196,197,197,199,196,194,196,196,198,198,198,196,198,197,193,198,198,196,199,198,199,198,198,198,198,199,197,198,198,196,196,199,197,196,199,198,197,198,198,200,199,199,199,198,199,199,198,198,199,197,199,198,198,199,197,199,197,198,199,195,198,197,198,199,197,199,196,197,199,198,199,197,194,193,197,198,199,197,197,194,194,200,198,197,196,198,195,194,196,193,199,195,194,195,195,191,191,194,197,196,180,187,195,198,187,179,195,191,191,195,194,194,193,194,192,193,193,193,196,196,184,189,200,171,186,194,175,175,168,178,178,159,160,149,172,212,200,121,78,157,167,139,110,61,122,81,17,113,182,131,130,104,99,156,184,192,145,136,133,147,166,170,195,119,155,210,184,149,100,81,75,92,68,44,25,151,227,209,181,151,181,128,130,118,39,17,6,17,26,49,53,40,83,173,233,250,218,169,207,250,251,251,190,45,44,90,132,132,91,42,45,113,142,93,71,46,85,140,77,93,170,162,109,103,92,101,110,117,118,97,84,73,65,46,43,30,26,26,38,88,144,135,67,36,18,15,16,17,23,20,20,41,69,71,62,63,66,72,86,78,42,25,27,31,39,65,99,80,30,21,15,58,157,126,107,123,73,39,22,84,119,73,47,42,37,18,22,27,21,17,28,27,18,23,24,14,21,26,31,34,43,50,49,130,131,110,121,94,98,57,46,41,12,14,14,21,33,36,31,16,16,23,26,37,24,20,19,17,18,60,89,43,30,59,45,47,38,31,30,36,46,28,24,37,26,22,16,22,32,45,47,23,20,18,24,42,57,34,39,109,71,28,62,44,32,36,26,26,31,39,33,21,21,32,30,17,20,28,25,29,37,22,17,26,27,23,34,36,79,93,37,41,81,98,101,137,170,96,31,35,29,16,19,19,22,24,22,27,21,21,23,19,19,34,29,27,34,30,54,40,37,82,102,149,122,100,99,72,48,37,33,22,33,25,23,25,24,18,31,37,19,29,27,22,28,45,38,20,24,28,30,29,26,31,26,80,125,102,103,107,87,46,21,16,21,46,63,81,86,92,87,87,87,76,67,50,36,27,21,21,23,20,22,24,34,51,69,73,72,51,39,42,32,39,43,46,51,48,74,87,73,57,57,57,148,231,231,228,225,222,224,223,224,224,224,225,229,224,225,118,6,2,7,12,14,13,14,14,15,15,15,15,199,203,201,203,200,198,200,199,200,199,201,203,198,201,200,198,201,201,201,201,201,202,202,203,201,203,203,200,200,200,200,200,200,203,203,202,204,199,200,200,200,203,199,201,203,202,203,200,200,202,198,202,203,203,203,203,204,203,205,205,205,205,205,208,206,208,208,205,210,208,207,206,207,208,208,207,206,207,208,207,207,207,206,204,203,203,204,203,203,206,205,203,206,202,205,205,202,205,205,205,200,200,204,201,204,202,202,203,201,200,201,202,200,200,200,203,202,201,200,198,201,199,199,198,200,199,198,202,198,196,199,196,200,200,198,199,199,198,198,200,199,198,199,199,199,196,197,197,197,198,196,198,196,198,198,198,197,198,198,196,199,197,195,196,195,198,198,196,196,198,197,194,196,196,195,195,195,198,197,195,197,198,195,193,198,198,196,196,195,195,199,197,196,196,195,198,195,195,196,195,196,194,198,198,197,196,195,198,197,196,197,197,197,196,198,196,195,198,196,198,196,197,198,195,199,198,198,198,198,198,197,198,198,198,198,195,198,199,198,198,200,198,198,198,197,197,197,201,196,198,199,199,200,198,194,196,198,196,200,193,191,195,196,199,200,197,194,193,196,199,196,196,194,196,197,196,195,193,195,195,195,195,195,195,195,195,197,195,182,183,193,199,187,177,193,196,194,195,193,194,192,192,193,191,194,195,192,196,189,188,192,170,184,176,140,133,123,137,133,120,145,166,191,220,220,185,115,66,94,124,104,66,98,73,4,76,161,125,77,73,151,194,208,170,143,168,160,170,159,164,184,132,198,210,193,162,86,101,85,78,61,35,23,153,230,224,174,83,132,124,69,39,7,29,43,47,50,55,39,53,183,250,252,214,179,203,245,250,247,226,81,26,63,105,133,99,45,55,76,105,92,71,63,23,71,140,137,186,219,153,111,105,101,121,100,85,79,77,77,66,61,46,42,33,34,30,54,125,149,98,34,19,14,10,17,35,46,25,14,48,69,66,66,69,81,92,90,60,35,27,23,20,32,44,82,109,42,25,13,79,165,107,117,116,57,43,61,99,103,76,62,44,26,14,12,24,29,32,26,14,17,18,27,31,27,22,15,20,50,60,46,95,92,85,105,101,83,40,46,33,17,17,15,29,19,22,29,26,18,29,30,17,26,24,20,23,28,66,92,40,34,55,29,27,26,27,27,31,49,33,31,34,30,24,17,19,36,53,49,30,23,18,27,60,57,29,54,120,68,24,61,38,26,39,23,28,26,39,38,23,38,31,21,19,22,25,36,39,27,19,15,19,27,52,48,25,90,96,35,35,71,87,88,75,61,43,35,35,17,16,18,19,29,39,27,16,16,18,27,29,33,25,16,19,28,39,69,49,56,117,127,152,126,92,110,85,51,38,35,33,22,18,23,22,29,36,32,21,16,27,28,30,33,45,42,27,35,38,43,43,51,57,40,108,134,111,101,104,94,44,24,16,18,20,36,74,96,112,112,98,93,86,81,76,57,45,27,17,19,22,19,18,26,24,37,62,73,71,56,50,53,48,49,52,55,65,72,66,69,62,57,137,230,239,234,224,225,222,224,225,221,223,224,226,229,224,224,117,5,2,7,11,13,12,13,12,15,16,15,15,203,205,205,204,203,206,201,203,202,200,204,200,201,203,203,202,203,203,201,201,203,203,203,201,202,203,200,205,203,201,202,201,202,200,201,200,202,203,199,200,201,199,202,202,200,203,202,200,203,203,205,204,202,204,202,201,205,204,203,204,205,207,207,208,207,207,207,208,209,210,208,208,209,209,207,205,212,208,208,213,207,208,210,208,208,205,205,204,205,205,205,206,205,203,203,207,203,203,203,201,203,202,203,203,201,200,201,201,202,201,201,201,200,203,200,199,199,200,200,200,200,197,200,200,198,200,200,199,199,198,199,198,196,198,200,198,199,199,200,198,200,199,196,199,198,198,199,198,198,198,197,198,198,198,198,197,198,199,197,198,197,196,199,196,197,198,196,196,196,195,200,198,197,196,199,197,195,199,195,198,197,197,197,196,199,195,195,196,196,196,198,198,195,199,198,195,196,196,197,191,196,198,193,198,198,194,196,198,196,194,194,198,197,196,195,197,198,197,198,197,196,198,198,198,197,198,197,196,196,196,198,199,197,199,198,198,199,194,199,198,197,197,195,197,197,198,197,195,197,196,198,200,198,198,197,194,198,199,197,191,192,196,198,199,200,197,188,193,198,200,195,190,194,196,194,195,195,192,196,193,193,198,195,195,193,193,194,196,186,182,192,199,194,180,192,197,193,198,193,194,195,194,193,195,193,191,193,198,190,192,183,168,188,160,128,124,124,138,145,136,168,191,188,212,216,210,185,85,26,70,100,81,101,69,5,55,116,109,60,104,191,189,185,151,166,195,162,134,138,189,169,118,188,156,178,146,77,119,91,83,61,30,19,155,236,217,171,72,122,120,57,34,4,117,171,87,69,60,46,98,231,249,203,171,205,229,247,247,215,99,45,51,81,92,98,53,81,122,117,71,57,64,41,13,34,102,166,215,195,133,109,109,100,114,91,53,51,65,75,57,59,50,39,37,29,34,85,141,131,74,22,17,12,12,18,47,67,32,19,51,73,74,78,96,111,105,68,31,31,20,11,15,22,35,85,116,53,27,21,113,157,94,121,103,50,41,92,120,102,91,61,45,24,14,17,19,36,27,19,15,15,14,24,41,27,15,21,12,46,81,41,62,69,68,131,132,72,32,37,29,15,26,29,21,20,17,26,32,41,33,17,15,16,29,34,46,25,63,100,39,41,51,27,29,32,22,27,34,46,46,27,22,22,28,26,36,37,30,46,48,35,29,57,42,33,40,78,131,60,32,67,40,37,49,33,33,36,47,36,26,28,23,30,28,18,34,39,29,27,20,17,24,43,49,34,29,101,96,37,41,66,50,39,44,31,22,37,39,21,17,17,18,35,34,22,21,19,24,31,36,29,26,22,17,24,49,89,51,84,129,134,144,105,116,113,80,56,45,36,27,23,23,19,24,36,39,24,21,16,17,28,47,35,41,48,33,48,60,70,81,116,121,98,142,149,118,102,111,102,46,26,19,18,31,46,84,117,131,125,114,85,84,90,86,76,63,49,29,19,21,23,21,18,24,24,35,54,73,78,79,64,51,44,53,68,55,56,47,65,40,75,207,243,241,225,220,225,221,220,221,220,221,220,224,228,223,225,118,5,2,7,12,12,12,15,13,15,15,15,15,205,206,205,206,201,201,202,201,202,203,205,204,201,203,201,200,199,200,200,200,201,199,200,202,200,201,202,204,202,203,203,202,202,203,204,203,201,199,199,200,201,202,201,201,201,200,201,200,203,204,200,203,203,202,202,203,203,203,206,202,205,208,206,207,207,208,209,207,207,206,208,205,208,210,208,208,208,208,209,208,210,209,208,207,207,207,204,205,207,210,207,204,205,202,206,205,204,204,201,204,205,204,204,203,203,203,202,202,201,202,202,203,201,200,200,200,200,198,201,198,200,200,200,200,200,199,199,200,195,198,199,199,200,197,198,197,199,200,198,199,196,197,197,197,198,198,199,200,199,198,199,200,199,198,198,199,197,198,196,196,199,199,199,198,198,198,195,198,196,197,196,196,196,195,199,198,196,196,197,196,195,198,196,196,201,197,194,195,194,196,195,194,194,196,197,198,195,196,196,194,195,194,196,193,194,197,193,196,195,193,196,196,194,194,195,195,195,196,196,198,195,195,196,195,196,196,195,196,194,195,196,195,195,196,198,195,195,195,195,195,196,196,195,197,196,195,194,195,194,197,196,194,198,198,196,195,194,195,198,191,194,197,197,196,196,195,190,196,197,196,194,193,193,194,193,194,194,194,195,192,195,194,191,191,190,194,193,197,190,177,193,199,196,187,192,200,194,194,193,193,193,192,195,195,192,194,193,195,194,198,178,165,189,173,153,156,170,179,176,168,198,200,185,202,203,212,213,158,59,26,76,76,106,102,38,74,105,114,113,141,156,168,181,161,177,167,138,122,160,208,152,102,130,118,188,141,86,129,91,88,72,31,38,158,191,163,155,87,103,78,32,25,16,164,162,80,68,39,132,156,244,227,146,199,233,237,244,218,93,56,65,87,92,72,54,96,134,159,114,77,66,29,20,10,29,97,123,133,101,81,105,105,106,106,93,81,59,53,67,67,66,46,36,36,30,59,125,149,110,49,14,11,14,25,39,63,73,47,18,48,84,98,112,122,141,117,53,27,37,27,19,21,21,31,100,121,49,28,28,131,153,88,125,92,41,30,73,99,68,63,51,47,31,16,22,28,24,24,26,17,16,30,26,23,27,20,19,20,75,89,30,59,55,85,141,97,57,29,34,31,14,33,27,20,16,16,18,46,54,34,20,19,17,31,57,46,17,73,103,36,46,59,31,38,40,25,30,44,50,33,19,21,20,30,44,39,25,22,23,39,65,66,35,15,24,31,99,145,54,39,83,66,78,82,51,76,74,58,47,20,19,18,26,31,38,38,25,23,30,33,23,42,32,27,38,56,123,92,30,43,63,33,25,38,29,21,33,40,26,21,24,23,24,22,28,31,19,36,38,26,29,32,23,28,41,65,98,49,83,130,119,113,98,95,100,69,42,37,22,25,29,24,27,31,28,29,24,19,17,28,29,33,36,42,58,36,71,101,106,120,155,157,107,118,126,118,103,116,113,55,26,24,40,58,66,78,87,99,108,103,102,88,95,104,97,90,69,43,25,22,22,21,18,23,23,24,36,51,74,81,71,62,57,52,53,41,35,37,54,30,96,220,241,235,228,225,229,220,223,222,221,224,224,227,228,224,225,117,5,1,8,12,12,12,14,12,14,15,15,15,203,204,202,204,202,203,199,202,203,204,203,202,205,203,202,200,198,199,200,198,199,202,201,199,199,200,200,200,199,201,202,200,200,199,200,199,199,200,200,201,200,199,200,199,201,203,201,200,201,201,201,203,202,203,204,201,202,202,202,205,206,205,204,208,210,207,207,206,206,206,206,207,208,207,208,208,209,208,208,208,206,209,208,207,208,206,206,207,207,205,204,206,204,204,206,202,200,201,203,202,202,201,200,201,203,201,201,201,200,201,200,199,201,200,199,201,199,200,199,197,200,199,200,198,196,198,198,197,196,196,198,200,197,196,199,195,196,198,198,196,198,197,195,199,196,195,198,196,195,197,196,199,197,198,199,197,198,197,198,197,196,194,197,196,196,198,197,197,197,196,196,194,196,197,194,193,193,196,197,198,195,197,196,194,197,195,194,195,194,194,194,195,196,195,195,198,196,195,196,194,196,194,194,195,195,194,193,196,194,195,196,195,195,195,194,196,194,195,196,195,194,193,194,194,195,194,194,193,195,192,193,194,193,195,196,194,195,194,194,195,194,195,193,195,194,193,194,193,196,194,193,194,193,196,194,192,196,195,191,191,193,194,195,193,194,193,192,196,194,193,195,194,191,193,192,193,193,191,195,195,193,194,194,194,191,192,194,196,191,176,186,195,196,179,188,195,191,196,192,194,195,191,191,191,192,194,193,196,195,207,174,158,174,157,154,161,171,165,155,156,196,200,179,200,194,204,214,205,130,33,56,71,94,96,42,100,119,138,144,124,156,186,188,157,135,144,153,158,191,195,120,102,148,156,222,132,87,116,76,103,72,36,52,132,137,93,132,100,76,42,23,25,17,89,84,56,47,127,214,174,195,172,174,248,246,244,210,97,53,71,102,93,54,46,73,127,160,116,83,78,39,12,12,27,51,84,75,39,25,26,55,89,108,118,113,113,107,81,72,69,52,33,31,31,33,89,143,141,81,23,16,14,31,67,66,70,71,74,48,65,111,112,120,116,112,74,34,21,24,31,27,24,23,67,148,105,33,22,39,148,136,96,124,81,36,24,64,96,84,54,34,45,34,27,32,21,14,16,27,31,35,29,21,12,19,27,33,46,91,86,37,60,52,47,63,41,39,35,35,39,26,16,23,23,25,17,41,41,38,44,32,21,21,53,33,35,34,81,105,36,60,86,43,73,64,37,69,67,56,36,19,25,19,35,42,36,24,21,20,41,73,57,34,23,25,19,125,163,49,54,124,123,191,183,133,185,141,74,37,12,16,14,21,37,43,24,23,25,21,38,55,41,14,24,33,61,133,90,32,55,52,27,24,31,26,24,36,41,25,27,35,24,17,14,23,30,44,40,23,16,22,38,39,53,33,58,103,48,64,88,88,91,90,133,107,57,40,29,22,19,33,35,39,33,17,17,26,27,31,37,21,19,24,53,68,54,121,146,148,143,119,96,69,68,90,118,98,107,115,60,34,37,57,63,44,45,44,45,59,108,117,109,101,96,106,104,88,58,34,23,20,18,23,20,17,25,26,33,56,76,81,78,66,37,26,27,35,45,48,18,132,231,243,247,246,243,239,230,234,235,235,236,232,226,229,226,223,117,5,1,6,10,13,12,13,12,14,14,15,15,202,201,204,201,201,202,202,204,203,203,205,201,199,204,200,205,200,200,204,198,202,200,200,202,199,200,199,199,200,199,201,198,199,200,198,201,200,201,202,198,201,199,198,200,200,199,200,201,200,200,201,203,203,201,201,202,201,201,203,203,205,205,206,209,209,208,208,208,208,207,208,206,208,206,207,208,205,206,206,204,208,210,206,208,207,208,207,203,203,204,205,204,204,202,205,204,203,203,201,203,200,200,200,200,200,200,200,200,199,199,199,200,198,198,198,197,200,198,200,198,199,197,195,198,196,197,197,198,198,197,196,197,198,195,196,193,195,198,194,198,196,197,198,197,198,196,198,197,198,197,197,197,198,197,196,199,197,199,198,198,198,195,198,197,198,199,196,198,197,195,195,197,198,197,196,196,197,195,196,196,196,198,199,197,198,196,197,198,195,198,194,197,197,193,191,194,196,196,194,195,195,194,197,193,195,196,196,197,196,194,195,196,194,195,197,196,193,193,193,196,194,195,196,193,198,194,191,195,194,196,197,196,194,196,194,193,196,194,195,194,191,195,195,194,194,196,193,192,194,194,194,193,195,195,193,192,196,195,189,190,195,193,196,196,190,193,192,193,194,195,192,196,197,194,194,193,191,192,196,192,194,195,192,195,193,193,193,194,197,180,184,194,195,179,178,195,192,194,193,193,193,192,191,191,192,196,195,196,199,199,155,135,142,114,116,133,150,132,132,156,194,196,179,196,192,194,206,216,172,118,113,69,71,79,44,122,134,123,132,114,182,214,169,143,139,161,165,184,189,142,112,124,169,203,219,100,86,105,68,87,60,39,39,121,108,64,111,77,51,29,18,24,34,73,41,34,117,211,252,162,152,193,229,250,246,215,101,49,73,102,96,74,50,65,85,125,106,78,71,44,26,11,29,46,79,117,69,43,36,37,36,40,89,121,130,125,121,120,110,74,50,27,24,35,34,101,151,110,47,22,17,25,55,63,46,42,54,77,110,138,126,116,77,41,38,24,17,11,17,20,30,33,46,132,139,47,27,20,47,155,127,97,123,75,34,24,55,117,93,44,35,40,42,33,31,17,12,14,19,43,39,24,14,19,15,35,62,42,97,88,37,63,37,29,32,27,37,28,43,37,18,19,14,24,29,36,36,21,16,38,47,49,51,26,16,24,40,97,107,29,92,137,129,200,144,134,178,124,71,36,27,27,35,32,20,27,35,24,39,50,36,50,57,33,37,69,142,168,52,39,81,87,147,125,95,141,99,62,46,17,21,19,29,36,33,31,26,20,26,48,44,38,19,25,22,66,141,86,32,57,55,25,43,38,21,32,39,39,26,37,32,19,15,17,22,30,40,32,18,26,19,35,61,44,26,65,101,50,37,62,61,84,148,165,92,33,37,29,18,21,26,45,45,27,22,19,26,44,41,29,16,19,19,61,85,63,138,154,145,139,101,92,86,56,66,116,97,103,112,66,36,37,57,42,28,32,30,31,51,100,122,104,94,100,108,108,102,73,36,25,30,26,23,19,16,22,24,30,42,60,76,81,63,35,19,22,31,42,33,39,188,233,250,250,248,248,249,249,249,247,246,246,240,232,227,225,223,116,5,2,7,11,12,12,14,13,15,15,15,15,204,205,205,206,203,201,204,205,207,205,202,204,202,204,201,203,200,200,202,201,201,200,200,200,201,200,202,200,200,200,198,200,199,200,199,201,200,198,201,201,200,200,200,200,200,200,200,201,202,201,205,205,201,202,204,204,204,202,203,205,205,202,204,206,205,206,206,207,206,206,206,206,206,203,206,205,203,207,206,208,207,205,206,203,204,205,206,204,204,202,203,206,204,203,204,203,203,203,201,202,201,201,200,200,202,198,198,199,198,198,198,196,200,196,195,199,195,198,197,197,198,194,197,198,196,198,198,196,194,196,196,196,195,196,200,194,197,196,194,198,196,199,195,195,196,196,197,195,195,195,196,198,196,194,195,196,196,195,193,196,197,196,198,197,194,197,197,197,196,197,197,198,197,195,197,197,198,198,198,197,196,197,198,198,201,199,196,199,199,195,196,198,193,195,196,197,195,195,197,196,197,193,195,194,196,197,196,198,197,196,196,194,195,194,193,194,194,196,193,196,196,194,195,194,195,194,192,193,193,193,198,194,195,192,193,193,195,196,195,195,193,193,195,196,193,192,197,194,194,194,194,193,192,192,194,192,193,195,188,192,193,192,193,189,189,193,193,190,191,195,194,193,196,195,193,193,193,193,193,192,193,192,190,191,194,194,193,192,195,182,182,196,196,178,176,194,194,195,190,190,193,192,193,193,196,197,195,189,181,185,142,131,134,122,142,154,166,152,152,171,195,196,177,192,193,191,200,208,171,169,163,73,70,94,90,190,151,130,157,122,197,174,136,156,167,185,162,166,168,154,141,115,160,200,177,88,87,103,71,82,48,33,82,129,95,74,89,44,35,29,14,40,61,75,37,92,191,245,247,146,197,252,244,245,208,105,52,62,107,105,79,55,72,107,95,77,72,69,45,22,15,33,60,83,115,155,118,71,53,38,42,39,32,70,116,129,131,119,121,106,74,45,26,23,43,134,155,85,31,10,16,38,61,43,22,19,15,34,77,136,145,101,60,24,22,17,31,32,15,16,26,27,41,68,51,25,31,13,66,162,115,108,132,67,32,16,41,85,61,35,28,42,33,22,26,23,19,16,33,32,32,36,20,16,37,52,39,55,119,81,32,56,26,27,31,22,35,26,40,40,18,18,15,19,39,39,28,17,23,18,46,71,40,18,14,22,34,110,110,31,83,102,116,181,122,132,120,85,52,29,40,48,32,21,16,24,31,48,48,23,21,22,48,67,85,36,92,148,57,32,43,39,42,44,38,39,36,53,50,23,24,28,30,24,22,35,33,31,40,33,25,43,39,30,39,80,137,79,29,78,69,48,71,53,38,55,49,46,39,19,21,27,18,19,32,27,26,33,30,22,38,49,31,35,35,83,104,47,29,57,61,96,115,81,50,40,39,29,26,32,33,33,42,39,30,28,55,44,28,35,21,24,41,104,83,77,139,119,107,102,108,89,76,52,49,113,103,105,113,73,35,47,45,32,29,23,27,26,46,83,105,114,98,103,106,113,125,82,33,39,76,56,30,24,17,23,19,24,39,51,65,81,66,41,26,24,25,43,31,55,208,226,249,236,205,229,246,243,228,222,219,230,240,235,227,221,221,118,5,1,8,11,13,13,14,13,14,15,15,15,204,205,203,203,204,205,202,205,203,202,205,203,205,204,202,203,198,202,203,200,200,199,199,200,200,198,200,199,200,199,201,201,199,199,200,200,198,198,201,199,199,200,201,201,200,200,202,206,204,201,206,205,204,203,205,204,204,204,205,203,205,205,203,204,203,203,204,204,204,206,205,204,205,204,206,203,203,207,206,203,203,205,204,205,206,206,203,203,206,204,205,205,204,204,204,204,202,203,201,204,203,203,200,202,200,198,200,200,200,198,196,199,198,197,198,197,197,194,195,197,198,196,197,198,195,198,197,196,196,198,198,196,200,198,199,198,198,199,195,198,198,197,195,194,198,196,195,195,194,196,196,197,196,196,195,194,194,196,193,197,198,194,195,196,197,196,195,197,198,196,198,198,196,197,197,197,196,195,199,198,198,198,196,197,196,197,196,196,196,198,198,198,198,196,199,198,195,198,197,195,195,195,199,196,195,198,197,196,199,196,197,196,196,196,194,196,193,198,198,195,196,196,195,195,199,195,194,197,195,194,194,193,196,195,194,194,196,194,194,197,191,196,194,194,196,191,193,192,193,194,194,193,193,195,197,193,194,193,188,193,194,194,194,187,189,196,193,191,192,193,193,194,196,193,191,193,191,194,194,191,193,191,189,193,194,191,192,192,196,186,176,194,202,183,180,191,191,193,193,193,194,194,195,195,193,186,179,174,188,190,146,162,168,161,187,193,193,160,185,192,192,198,173,191,194,194,196,200,142,158,173,73,84,103,134,248,155,140,192,136,167,155,154,172,171,153,136,173,170,181,167,99,154,186,173,113,89,98,85,87,50,50,115,139,97,86,72,22,27,42,46,55,72,55,94,214,244,228,172,139,243,251,243,198,89,51,64,101,128,115,73,57,81,129,105,78,71,39,25,13,26,60,88,113,130,157,131,116,112,73,50,38,37,29,47,92,122,133,126,118,124,103,62,37,53,113,117,56,19,10,15,49,64,33,19,15,13,16,25,74,117,107,68,25,33,85,100,73,35,21,22,28,31,22,26,21,28,10,91,160,103,122,128,63,22,14,18,34,30,25,29,36,30,15,17,23,31,33,25,18,16,36,39,48,51,29,18,57,126,76,38,57,23,29,34,21,30,29,47,46,23,16,21,33,26,27,29,27,15,34,50,41,47,38,20,31,54,97,100,40,40,55,34,49,46,40,41,39,51,33,44,48,30,25,21,18,44,53,42,28,27,31,56,95,90,28,69,118,46,36,40,42,51,45,45,46,26,47,43,25,45,32,19,22,20,26,39,42,29,24,23,29,44,65,43,54,129,71,40,127,113,128,159,95,120,123,78,51,17,17,13,21,29,35,29,21,17,23,33,46,50,26,16,21,47,100,102,46,39,67,50,44,45,33,37,29,47,35,30,41,25,23,25,42,49,63,51,21,18,32,34,59,86,122,84,68,132,101,87,129,104,105,83,34,36,99,107,103,121,75,41,53,47,30,21,21,16,28,48,63,120,132,112,113,101,118,126,79,26,85,137,84,44,26,23,19,20,24,34,49,65,81,76,60,33,36,45,37,37,29,115,145,130,118,84,141,180,175,174,156,152,171,209,229,222,216,219,117,6,2,8,12,13,12,14,13,14,15,16,15,202,201,200,202,203,203,201,200,205,205,203,205,201,203,201,203,200,201,200,201,201,200,200,199,199,198,201,200,200,202,201,201,200,202,202,201,201,199,198,198,200,199,200,201,201,200,201,201,202,203,205,201,203,203,202,206,204,202,203,204,205,205,205,207,207,204,203,204,205,206,206,203,203,204,205,202,201,202,204,205,201,202,205,204,202,203,205,205,204,203,203,205,203,201,204,200,200,201,200,201,200,202,201,201,201,197,199,201,200,200,199,197,198,198,197,200,197,198,198,196,198,198,198,198,196,196,197,197,197,197,196,198,198,196,197,193,196,197,194,196,194,195,193,197,194,192,195,195,196,194,196,196,196,196,194,198,194,196,195,196,195,195,197,196,196,196,196,198,198,197,193,196,195,196,195,193,196,194,196,198,196,196,196,195,196,196,195,195,196,194,195,196,196,198,195,195,198,196,196,197,196,195,196,198,198,195,196,194,196,195,195,196,194,198,197,195,194,198,196,196,195,193,195,194,194,197,194,196,195,195,195,191,195,195,194,193,193,193,194,195,193,194,192,193,193,193,192,192,194,193,191,191,192,193,194,193,193,191,190,194,194,192,191,188,193,192,193,194,193,192,192,193,194,194,193,191,190,191,192,194,192,190,192,193,194,191,193,191,192,189,176,190,198,188,182,187,192,194,191,193,196,193,192,185,178,176,181,186,202,191,158,172,163,165,181,181,171,157,194,192,190,198,174,190,194,191,195,194,143,166,166,61,61,89,134,220,121,116,153,115,160,169,183,171,134,122,151,184,175,187,141,121,198,195,188,134,91,117,95,93,61,66,120,90,81,101,57,17,42,81,63,76,47,76,210,253,252,178,171,178,246,248,210,100,36,65,101,121,121,83,69,60,92,113,89,79,41,24,18,22,49,87,112,118,119,137,130,116,121,111,98,59,34,40,31,37,73,118,141,147,147,141,142,116,68,37,40,31,20,14,13,46,65,41,28,21,12,19,15,26,80,122,88,33,122,188,139,72,29,24,36,46,35,27,25,22,27,27,115,157,101,128,121,52,20,12,15,18,22,25,33,36,22,15,14,23,41,29,19,15,11,24,56,62,26,16,15,29,111,77,43,67,23,45,50,28,48,39,47,39,26,33,30,25,16,11,29,33,34,38,30,17,33,60,41,79,56,90,105,34,39,36,38,42,29,45,37,33,58,49,26,29,52,49,35,61,74,50,63,73,66,68,117,113,127,92,131,130,33,101,51,61,69,62,62,74,55,59,63,57,58,51,48,46,53,39,42,42,31,23,21,28,50,59,34,50,124,73,36,108,96,139,173,121,158,155,80,42,21,13,16,19,36,37,23,18,17,17,34,50,43,21,26,21,43,113,101,48,43,63,36,24,34,24,31,35,33,46,42,28,23,19,20,34,64,59,33,21,18,22,58,66,74,124,71,56,100,83,108,130,139,136,68,37,28,95,114,93,114,87,44,47,52,34,24,25,26,39,44,59,113,140,126,126,112,110,107,56,24,122,164,90,55,33,21,19,22,27,35,49,62,76,77,68,48,42,41,49,50,43,47,25,36,54,34,50,105,131,134,113,89,101,149,189,206,214,220,118,6,2,9,12,13,13,15,14,15,15,16,16,203,204,202,202,202,202,202,203,200,203,205,201,204,201,201,201,200,201,201,199,202,201,200,200,201,202,201,201,201,202,202,203,201,201,202,199,203,200,200,200,200,201,202,203,201,199,200,200,200,204,203,202,202,200,203,203,203,205,203,205,204,202,202,203,204,203,204,204,202,203,204,203,202,202,202,204,205,202,202,202,203,201,202,201,200,200,201,204,203,201,202,200,200,200,200,200,200,200,201,202,199,203,200,200,199,198,201,199,201,200,198,199,200,198,196,198,197,198,199,196,199,198,198,198,198,198,193,196,196,197,193,195,198,193,196,195,196,194,194,195,193,193,193,194,195,196,194,194,194,195,195,194,193,195,194,194,199,196,193,196,195,195,196,193,194,196,197,195,196,196,195,196,195,194,194,195,198,195,196,194,195,196,194,196,195,194,196,195,197,197,196,193,193,195,196,198,196,194,196,195,195,196,197,195,193,197,195,196,198,194,198,195,194,196,195,195,196,196,196,193,194,195,194,194,195,195,196,194,192,193,196,193,194,195,193,193,194,193,193,194,190,194,191,193,196,191,193,193,192,192,191,192,194,191,191,191,190,189,190,194,193,191,188,189,193,192,193,191,190,193,193,195,192,193,195,192,192,193,193,191,191,194,193,190,193,190,193,191,190,193,180,184,198,187,183,189,190,195,197,193,190,181,173,177,182,184,191,191,208,179,128,136,114,120,148,133,136,152,199,187,181,198,170,188,194,189,195,193,159,194,174,55,53,69,122,174,74,70,134,139,146,173,162,140,139,145,190,173,142,179,136,160,227,167,171,119,91,130,90,90,64,68,97,49,60,89,52,15,50,76,78,64,61,177,250,250,199,175,234,208,246,234,99,49,57,99,118,114,73,57,62,71,85,84,77,53,24,12,24,47,83,113,117,116,107,117,119,107,106,105,121,107,75,56,47,49,36,47,98,135,159,165,152,151,127,79,47,27,19,16,12,27,51,49,46,33,14,21,18,20,42,103,113,101,183,148,48,22,23,51,68,61,55,41,28,30,31,45,142,141,106,141,110,46,17,15,16,16,20,23,30,42,31,16,23,30,29,25,24,22,12,37,45,45,38,22,14,42,127,71,62,92,62,107,83,61,100,70,55,35,21,43,29,20,17,14,16,42,44,27,17,19,18,55,73,164,193,135,106,33,32,43,46,51,39,48,44,35,56,48,53,52,74,101,105,119,119,118,143,161,177,182,188,191,180,183,212,187,161,184,171,160,157,149,162,160,150,150,127,132,137,127,139,124,103,95,70,64,69,53,53,69,45,45,26,59,135,63,28,60,49,43,51,42,50,52,56,51,28,21,20,28,29,25,28,24,16,33,30,28,43,41,32,37,66,102,96,43,50,56,25,29,34,23,28,31,43,37,28,33,24,23,29,37,39,52,41,26,25,50,63,41,79,138,79,39,57,61,66,117,127,87,52,31,27,97,120,93,120,91,41,42,51,41,29,24,23,31,41,42,59,83,93,110,105,97,92,38,93,180,162,118,84,59,33,23,23,26,37,48,61,69,76,75,52,35,39,57,69,67,64,37,33,40,40,81,111,107,101,84,73,62,95,166,198,212,220,118,6,3,9,12,13,12,14,13,14,15,15,16,202,206,201,200,202,203,204,204,204,200,202,202,200,201,202,204,203,201,200,200,201,204,203,200,202,201,202,201,200,201,200,202,200,201,201,200,199,201,203,199,202,200,201,205,199,200,204,200,200,202,202,201,201,201,202,200,201,201,202,202,200,203,201,202,201,200,204,201,202,200,203,201,200,202,204,201,202,201,199,201,199,200,200,199,201,198,198,198,200,200,198,200,199,197,198,198,201,199,200,200,199,200,199,200,202,199,200,199,199,202,200,201,198,198,201,199,198,198,198,196,198,199,198,198,193,198,198,196,198,195,198,197,196,198,196,194,194,196,193,196,194,194,195,195,196,195,195,195,196,194,196,196,196,192,191,196,195,195,193,194,195,193,194,196,195,193,192,194,194,192,194,195,192,194,194,194,197,193,196,195,193,194,195,194,192,195,195,195,195,195,197,196,195,193,194,193,196,193,194,194,192,196,196,195,193,196,196,195,198,193,193,195,196,195,195,193,193,193,191,195,196,194,195,193,193,195,194,194,193,193,193,193,195,192,192,193,193,192,193,193,193,193,195,193,193,194,193,194,193,193,192,191,192,193,191,194,191,187,191,191,193,190,190,190,194,191,192,191,187,191,193,193,193,194,194,191,190,190,191,191,191,190,191,191,190,189,189,193,191,190,181,181,195,190,186,191,194,194,188,179,175,174,179,190,191,190,192,195,203,150,115,121,118,141,154,144,136,167,206,187,182,194,173,187,194,189,194,196,171,201,184,101,94,102,96,99,35,77,190,171,148,142,132,151,156,191,190,122,150,193,120,178,203,116,177,122,95,118,66,83,61,65,67,33,37,65,45,29,53,62,49,61,159,249,252,200,163,215,249,211,213,122,35,66,92,122,111,69,63,71,81,83,79,69,47,35,15,33,51,78,114,118,122,110,103,111,106,104,100,95,108,120,120,133,70,37,49,42,33,63,125,152,162,157,150,144,105,68,51,33,18,13,22,34,37,28,22,25,17,18,27,67,112,143,178,99,27,19,33,95,117,97,75,57,33,34,39,69,148,133,108,144,101,29,17,15,14,21,19,23,29,49,40,29,34,16,14,21,29,30,36,36,19,21,48,41,43,101,145,50,99,145,136,231,153,153,202,112,60,29,23,23,22,33,19,15,37,33,32,31,25,16,44,50,39,194,113,85,112,19,39,36,42,45,42,51,54,57,122,93,122,151,181,200,206,213,164,209,218,210,203,197,203,182,186,188,201,193,185,201,186,181,184,195,204,205,201,205,191,189,194,198,200,192,148,179,175,165,162,149,136,120,89,65,41,109,149,51,23,37,33,34,31,38,35,24,41,40,24,34,37,25,14,17,27,24,41,35,22,23,35,48,53,61,42,81,97,40,55,59,27,43,41,24,43,37,38,36,17,26,27,33,40,27,18,27,41,44,60,54,31,14,69,130,76,39,55,48,39,62,63,45,38,29,24,96,125,92,115,104,47,25,35,38,30,23,27,26,26,26,24,42,51,72,89,103,97,99,149,168,137,124,124,84,55,32,26,28,33,53,60,71,81,78,59,37,35,39,48,58,61,41,59,89,105,120,108,87,80,76,79,106,156,199,216,217,220,118,7,2,8,12,14,12,14,13,14,15,14,14,203,206,206,206,203,202,202,201,203,205,201,201,202,201,202,201,203,202,203,203,205,203,202,203,200,202,202,202,202,202,200,200,203,202,204,203,203,202,202,204,205,202,204,202,201,201,201,202,200,200,198,199,201,200,202,200,199,201,198,203,201,203,200,200,201,201,203,201,201,200,202,200,200,203,202,202,200,199,201,202,201,199,200,198,199,199,198,199,198,199,200,199,199,200,196,200,200,198,196,197,198,199,199,200,199,197,199,199,200,201,199,200,201,198,199,199,198,200,200,196,200,198,196,197,196,197,196,198,196,198,198,196,197,195,198,197,197,196,196,197,198,197,194,196,193,194,197,195,195,197,195,194,195,196,194,192,195,193,191,196,197,194,194,195,195,193,192,192,192,194,195,196,194,193,194,195,194,194,195,192,197,198,194,196,193,195,197,193,194,194,195,196,195,194,195,195,193,195,194,192,192,193,194,196,194,196,194,193,195,193,193,193,193,195,195,191,194,195,193,194,193,193,193,192,195,193,193,195,193,194,193,192,193,193,193,193,194,189,192,194,192,194,194,194,192,191,193,191,190,191,193,192,190,189,194,192,187,190,193,192,191,189,190,191,193,195,191,190,190,189,191,191,191,192,191,193,193,191,191,189,192,190,189,193,193,191,191,190,191,194,184,179,196,194,189,192,181,178,173,175,182,187,192,196,191,191,195,200,195,131,136,168,165,185,188,165,171,194,208,189,181,200,175,184,197,191,198,191,179,202,195,123,125,158,125,73,46,139,232,188,131,144,163,168,176,184,168,138,190,200,95,161,163,139,210,121,110,119,68,76,57,61,56,19,29,57,43,45,66,54,45,145,241,250,231,156,191,232,250,194,95,55,53,104,110,115,68,68,106,98,87,83,75,46,30,16,29,57,83,112,122,118,116,101,101,118,106,101,105,87,104,109,117,141,78,39,49,40,43,34,34,78,128,160,163,154,137,141,151,100,53,36,18,12,19,22,20,22,17,37,59,55,84,131,160,91,32,13,69,163,134,141,120,59,36,33,38,94,159,112,112,141,79,30,21,12,19,25,20,25,36,43,40,43,27,13,17,14,23,47,39,18,14,17,25,72,73,77,128,51,57,60,77,141,76,77,99,60,60,39,15,17,15,32,31,36,32,17,13,30,36,45,56,51,36,52,62,110,94,28,55,55,57,70,86,123,148,177,200,202,224,222,225,212,194,187,183,181,184,170,142,144,139,125,119,122,131,122,125,133,127,131,141,155,158,162,179,179,170,172,165,164,162,162,172,184,188,170,177,174,185,188,170,169,147,170,159,67,57,61,46,49,39,36,44,31,40,38,21,45,34,22,26,13,21,42,45,36,19,20,22,45,70,53,32,80,97,45,73,78,43,82,69,49,77,51,46,34,17,24,29,47,36,15,14,15,24,54,63,31,18,11,41,119,85,42,56,45,27,39,41,31,27,26,24,98,132,94,121,107,54,24,19,27,25,30,23,27,45,57,46,34,37,66,116,125,131,116,90,60,53,59,74,97,82,56,27,24,35,43,61,65,77,78,61,42,29,44,60,55,55,90,133,141,124,108,92,76,76,112,171,206,230,243,229,225,224,116,6,1,8,11,12,12,14,13,15,16,15,16,203,206,205,205,203,202,202,200,201,201,204,204,202,200,201,204,201,199,202,201,202,205,203,200,203,200,203,206,202,204,202,202,205,204,205,205,203,206,204,201,203,202,200,200,199,201,204,202,200,199,199,200,200,202,199,200,201,200,201,198,199,201,199,200,200,201,201,199,201,200,199,200,202,199,200,200,201,201,200,204,201,200,202,201,201,198,199,201,201,199,199,199,199,200,200,197,200,198,199,198,198,198,198,199,199,199,198,199,198,200,197,197,200,200,199,198,198,199,199,198,199,196,196,197,197,198,195,195,197,196,196,197,196,195,196,196,195,195,194,198,196,193,195,194,196,195,195,195,192,193,193,193,194,195,194,196,194,193,193,193,192,193,194,194,193,194,193,193,194,192,193,195,194,193,193,193,194,193,195,194,193,195,194,193,193,191,194,195,194,194,191,194,193,191,194,192,194,193,191,199,196,191,191,196,194,193,191,193,196,194,194,193,191,191,193,194,192,194,196,195,193,191,192,192,193,189,191,194,193,193,193,194,193,195,192,192,191,192,193,195,194,192,194,192,194,195,193,191,191,193,192,193,193,194,192,192,188,187,194,194,189,189,192,190,193,191,192,194,190,188,189,191,190,191,193,195,193,191,194,190,190,190,190,191,191,191,189,193,192,193,194,182,194,188,174,174,170,179,184,188,193,195,192,195,191,192,193,203,177,122,145,163,173,181,165,158,183,204,207,190,179,196,178,183,196,188,194,193,182,193,192,125,130,194,153,118,94,171,236,160,137,155,187,159,152,190,167,162,211,151,64,152,162,182,208,96,121,130,97,98,80,77,41,9,32,92,65,71,59,37,135,224,249,249,157,173,224,235,249,124,48,61,90,124,108,74,62,111,129,94,74,77,50,26,14,24,46,80,104,116,124,112,107,103,113,144,118,98,99,92,105,96,104,124,81,78,70,41,39,32,35,31,48,95,131,153,146,151,161,163,145,105,64,34,23,12,21,17,22,53,50,40,48,91,139,105,57,19,26,55,24,50,108,80,37,33,43,118,154,110,116,131,68,26,30,23,37,37,38,42,42,52,34,24,29,22,17,19,33,33,25,24,21,15,42,69,59,100,124,54,36,38,29,48,30,37,41,28,60,39,12,17,17,21,48,44,27,25,32,46,70,88,93,86,89,137,98,156,149,120,177,163,191,198,201,227,218,213,204,169,173,162,155,143,128,109,119,135,136,134,134,129,132,129,138,138,141,146,146,154,156,151,139,148,142,165,181,178,184,186,178,170,169,170,161,156,154,128,136,143,153,162,158,168,167,180,174,152,174,158,139,117,85,63,57,57,49,58,42,33,32,32,31,26,41,31,24,28,26,24,34,44,40,45,49,109,103,36,118,105,125,221,119,141,166,75,53,29,23,30,30,31,30,24,18,23,35,42,38,39,38,12,80,141,78,51,60,33,29,35,29,30,27,26,28,100,136,103,115,113,60,27,18,24,28,29,27,40,80,85,80,60,36,117,150,133,108,56,44,24,24,26,42,95,102,62,40,28,29,32,46,53,56,53,44,104,151,148,140,131,137,150,148,118,95,82,73,99,155,210,239,246,246,238,229,224,223,115,4,2,7,11,12,12,13,12,15,15,15,15,201,206,205,200,204,201,202,202,203,200,200,203,201,199,200,201,201,202,201,203,203,202,203,203,201,204,206,206,206,205,205,203,204,206,203,202,204,202,202,199,198,198,199,199,202,202,199,201,200,199,201,200,200,200,202,201,199,200,198,200,199,200,200,201,202,200,200,198,199,200,201,201,201,202,200,201,202,203,200,200,199,200,204,200,202,201,201,202,201,200,199,199,201,204,200,200,200,199,200,198,198,199,199,198,196,199,198,196,198,198,196,198,198,198,199,198,198,198,198,197,196,198,197,196,198,196,194,196,195,198,198,198,197,196,195,196,196,193,195,196,194,191,193,198,195,193,196,195,197,196,194,194,194,196,195,193,194,193,193,195,193,193,195,193,195,193,192,194,194,193,193,194,193,195,193,191,193,191,193,190,193,195,190,194,193,194,194,191,193,192,193,194,191,190,194,194,191,193,196,194,195,192,193,193,189,193,192,194,193,191,193,193,192,193,193,192,191,192,193,193,192,192,194,193,192,192,192,191,193,194,194,193,192,194,193,191,194,192,193,193,190,191,192,192,193,193,192,192,192,191,192,191,193,193,194,190,188,193,191,191,188,189,193,192,193,193,189,192,193,189,190,195,193,193,192,193,198,193,193,192,193,190,189,190,190,195,193,192,196,199,194,173,172,169,163,179,184,192,193,193,196,194,195,194,190,193,195,196,155,105,122,123,143,153,122,139,189,201,205,191,177,199,177,185,199,191,198,193,188,189,189,137,134,150,119,97,81,146,196,187,163,158,155,128,174,198,177,173,171,112,60,159,180,198,166,73,130,135,111,124,115,73,23,21,129,204,118,78,56,106,216,246,239,177,161,224,236,232,179,60,75,98,105,116,64,73,107,134,118,67,71,54,25,12,18,43,77,105,118,128,119,105,102,98,112,156,141,101,94,84,96,100,116,122,83,98,89,52,44,31,37,36,32,35,49,89,126,140,150,153,159,171,160,110,64,40,27,23,17,22,24,22,25,34,86,115,87,49,21,24,26,77,114,79,42,20,46,138,153,104,122,118,53,25,47,51,60,64,60,59,54,43,24,12,23,34,31,35,23,15,23,29,26,48,51,26,30,97,130,52,37,50,54,55,44,61,45,35,52,44,26,21,32,55,61,69,71,65,88,123,132,151,168,176,193,200,186,205,189,202,226,196,200,184,174,169,149,138,125,123,118,123,139,144,150,140,145,146,141,147,150,144,132,130,131,129,127,120,110,104,98,99,90,89,89,99,105,108,124,136,143,145,156,152,142,155,159,162,165,162,166,155,153,147,137,143,140,151,185,186,185,184,169,168,146,123,125,102,84,67,47,56,77,71,43,27,21,26,33,40,44,31,17,28,65,126,105,33,89,93,132,190,104,140,149,65,60,36,36,37,20,21,23,29,28,42,34,21,23,45,59,63,122,157,74,46,61,29,24,37,25,30,30,26,29,93,126,99,121,115,75,33,22,34,35,34,35,43,60,95,123,74,93,174,150,92,51,32,32,21,21,25,67,135,113,71,42,26,22,29,50,50,76,114,177,214,204,196,174,160,141,124,99,74,64,84,147,206,239,248,249,239,236,231,225,222,219,115,4,1,7,11,13,12,13,13,14,14,15,14,204,206,206,206,203,200,203,205,202,203,202,200,201,202,202,202,203,201,203,203,206,205,204,205,208,208,205,206,206,207,209,205,206,205,205,205,202,204,203,203,200,200,203,200,201,200,200,200,198,201,201,201,200,202,205,203,202,203,206,204,205,204,203,204,201,205,201,203,202,200,202,202,203,203,203,204,202,202,201,203,202,200,203,201,203,201,201,202,199,200,201,201,203,201,201,200,200,200,199,199,200,198,200,199,198,197,199,199,194,196,197,196,196,196,194,197,198,196,193,194,196,197,196,194,195,197,197,195,195,195,195,195,195,195,197,194,194,195,194,195,194,193,196,193,194,193,193,195,195,194,194,194,193,193,193,195,192,193,194,194,195,195,195,195,194,194,193,194,196,194,194,198,192,192,195,193,192,193,193,193,193,193,193,193,193,193,192,194,191,192,193,193,196,194,196,193,196,195,191,195,193,193,191,191,194,192,193,192,192,193,191,193,193,194,194,191,193,191,189,193,191,194,195,193,195,194,195,194,194,192,193,194,193,194,192,193,194,191,192,194,193,191,193,193,191,192,193,192,192,193,193,192,191,192,192,188,189,192,193,189,189,193,195,193,191,193,192,191,194,192,188,193,193,191,192,194,196,195,192,189,193,193,194,193,193,195,198,196,192,184,174,162,169,178,181,192,193,194,195,194,191,191,191,193,190,196,197,193,145,107,135,146,163,156,124,150,199,203,203,193,176,200,184,184,203,193,200,198,191,186,192,146,99,96,71,68,46,65,123,185,160,123,153,146,194,191,154,164,174,137,63,121,163,173,149,95,125,124,109,127,99,48,7,58,177,190,115,72,128,200,238,206,155,179,210,247,234,140,73,38,104,115,105,71,57,109,127,120,77,66,50,25,20,19,39,79,105,121,125,117,107,104,97,100,95,136,140,103,101,84,112,131,139,123,87,87,59,53,49,44,45,27,36,36,31,40,54,100,133,151,154,165,169,163,149,119,86,45,31,22,14,14,21,18,31,75,102,84,76,71,108,168,130,60,33,18,54,151,139,103,127,107,46,19,30,38,45,42,41,46,46,43,20,15,13,30,48,29,19,16,18,26,45,50,24,21,12,53,110,61,38,57,58,56,47,49,42,34,53,51,50,62,80,105,125,117,130,154,187,205,199,200,196,201,195,184,170,169,147,155,148,115,121,107,114,128,133,146,136,136,140,145,147,145,137,108,101,81,63,53,45,41,33,29,33,39,33,34,22,24,30,29,29,27,24,28,33,27,29,37,39,37,39,47,53,84,88,103,100,128,141,147,152,156,150,147,141,137,139,142,154,154,169,177,177,172,168,178,160,159,143,141,147,116,87,60,52,41,61,60,42,27,22,16,60,137,103,37,49,41,34,56,38,44,48,36,55,48,43,29,17,24,19,29,46,33,19,19,15,37,79,77,118,152,66,52,64,27,41,50,31,35,35,24,32,103,127,102,110,112,78,37,29,41,54,56,37,37,20,52,121,104,155,177,94,58,35,27,23,20,41,61,141,175,103,61,33,32,20,97,207,214,215,189,172,173,159,148,132,105,84,77,67,78,133,197,239,247,250,244,234,229,226,226,226,219,219,117,5,1,7,11,12,11,14,12,14,15,15,14,203,208,206,203,207,203,203,203,205,206,204,203,203,202,204,203,205,205,204,206,206,206,206,208,207,210,209,209,208,206,207,207,206,206,205,205,205,204,203,204,204,205,205,200,202,201,201,206,201,202,203,203,205,205,206,204,204,206,207,207,207,205,209,208,207,206,207,207,206,208,208,206,206,206,206,207,204,205,204,204,205,203,205,202,205,204,202,203,202,202,205,201,200,202,198,200,200,200,200,198,201,201,200,200,198,200,199,199,200,197,198,199,198,198,198,196,197,198,198,198,195,198,195,194,197,194,197,194,194,195,193,194,194,195,194,194,193,194,194,195,193,195,194,194,194,193,195,193,194,193,193,193,192,193,193,192,196,193,191,194,194,194,191,194,194,191,193,193,193,193,194,195,193,193,194,193,194,191,193,191,191,194,193,194,192,192,193,194,194,194,193,193,194,194,196,194,193,192,194,193,195,193,195,196,193,198,194,194,194,194,195,191,194,194,192,195,194,194,194,194,194,193,193,194,195,194,194,196,193,191,195,196,193,196,196,194,196,195,193,192,197,196,196,196,191,194,193,194,193,193,193,192,193,194,193,183,191,193,189,190,191,194,192,191,192,191,192,194,193,194,190,190,193,191,193,192,194,193,193,193,192,194,196,198,198,195,187,177,173,173,177,177,181,193,193,199,198,193,196,195,191,190,192,192,192,201,198,193,145,134,181,187,203,174,152,185,208,207,205,196,179,207,189,187,203,196,202,201,194,179,176,110,73,92,88,74,56,61,84,136,136,139,162,174,193,135,148,182,186,180,52,72,160,179,182,123,121,123,103,121,89,26,8,45,114,123,66,127,232,240,214,146,166,229,234,246,139,59,58,54,116,108,67,67,102,136,111,84,76,44,21,15,23,38,66,106,122,116,113,109,101,110,118,118,69,72,108,99,100,104,137,142,141,127,71,47,47,57,55,57,46,23,19,26,35,35,38,38,60,110,134,151,167,154,156,163,168,148,93,56,37,31,21,12,24,32,57,85,112,146,166,139,70,33,26,16,73,160,124,108,131,91,38,10,16,16,25,25,21,31,38,45,27,15,27,29,30,36,27,16,19,38,37,35,24,23,15,56,118,63,35,33,21,27,23,29,22,40,73,80,95,121,160,182,184,193,200,212,222,213,198,187,177,162,142,136,126,122,109,122,125,117,125,121,127,133,137,125,95,83,61,41,46,39,37,24,18,21,22,19,23,24,15,17,19,21,21,29,25,32,30,24,17,19,27,26,31,25,30,32,27,21,16,24,20,24,23,20,28,29,36,49,59,76,93,104,117,118,125,133,142,151,159,162,148,151,159,159,167,172,174,168,178,174,156,159,134,139,118,85,81,60,38,45,57,108,110,43,38,41,48,45,33,49,38,30,53,41,33,37,28,23,30,43,33,24,24,19,19,40,61,60,126,170,65,63,84,50,94,89,53,72,65,27,46,126,132,112,110,108,90,40,40,66,69,57,50,47,19,70,145,128,158,126,50,41,27,22,18,29,45,71,115,102,63,47,28,28,28,149,245,242,220,160,127,116,117,115,104,93,67,83,140,200,242,249,250,249,236,233,231,225,224,222,223,223,222,117,4,1,7,11,11,11,14,12,15,15,14,14,207,207,206,205,206,205,205,204,200,204,206,204,205,202,203,203,202,204,202,204,205,204,208,205,205,209,208,206,207,203,206,203,202,203,207,206,206,209,205,204,207,205,204,203,200,205,204,203,201,202,202,201,204,204,204,205,204,200,202,204,207,207,205,208,207,206,205,207,205,207,209,206,208,206,205,206,205,205,203,205,203,205,207,204,205,203,205,202,201,202,200,199,201,200,200,200,199,202,199,196,198,198,199,198,200,199,198,198,197,198,198,197,199,198,198,197,197,198,198,197,197,196,199,198,196,196,195,194,194,194,193,195,195,196,194,192,195,195,194,193,194,193,193,193,195,194,192,194,190,191,194,191,194,194,194,195,190,192,193,192,193,192,193,192,191,189,191,194,194,191,191,195,193,192,195,193,193,193,193,193,194,192,192,193,193,196,193,192,192,195,196,194,195,193,192,190,194,196,192,194,194,194,193,191,195,193,195,193,193,195,194,195,193,192,194,191,194,194,193,198,191,194,194,195,194,191,197,191,192,192,191,194,191,194,194,193,196,193,196,197,195,193,191,196,194,193,194,192,194,194,194,194,193,196,189,188,196,191,189,190,191,192,192,194,191,192,195,191,193,193,193,190,191,194,190,194,193,193,198,194,194,195,194,192,184,175,174,177,181,189,199,196,190,194,193,200,200,196,197,194,196,198,196,196,196,204,193,178,135,139,188,198,201,173,174,200,210,210,208,199,179,204,194,186,207,199,204,203,197,174,159,102,57,80,95,95,73,78,92,149,173,135,169,184,164,144,181,184,200,188,55,131,204,180,203,133,122,134,90,107,68,62,71,88,127,91,60,148,248,219,147,160,220,250,246,159,52,63,88,83,110,74,57,99,134,131,84,78,44,19,14,23,49,63,95,115,115,117,103,101,107,125,125,110,38,41,88,85,106,97,116,110,105,103,44,45,76,81,95,86,48,31,19,19,45,44,37,34,30,37,59,102,144,157,150,152,159,171,176,149,119,76,49,39,27,23,23,36,53,56,56,51,31,25,27,13,79,159,113,113,137,86,29,11,22,30,32,39,54,60,62,53,35,32,31,18,21,27,39,34,33,31,19,23,38,38,47,122,128,53,38,33,34,43,52,68,66,89,134,165,184,198,208,193,181,184,172,174,170,160,141,139,137,134,133,139,146,112,142,143,124,110,98,70,56,38,29,36,26,27,21,23,31,26,24,16,17,15,18,20,20,23,18,18,15,18,18,23,26,28,23,15,21,16,18,24,24,32,32,21,19,22,19,15,20,21,21,23,23,19,20,27,28,31,26,24,32,53,60,72,93,107,126,130,144,147,146,157,155,155,142,141,162,165,161,168,169,172,173,153,156,130,101,110,98,123,89,32,46,50,60,61,61,63,55,39,49,34,22,34,40,51,40,29,21,22,29,29,42,39,26,29,105,157,53,99,136,110,208,150,140,166,80,36,65,146,140,120,107,107,98,39,52,63,45,35,45,43,23,128,169,135,132,62,38,39,21,22,19,21,32,43,52,50,39,36,24,28,20,125,193,159,160,118,99,99,111,122,121,128,165,219,250,250,251,251,241,231,231,229,222,223,225,223,225,225,222,116,4,1,6,11,12,11,13,13,13,14,14,14,203,207,208,205,205,204,204,205,203,204,203,202,204,206,204,202,203,200,202,202,202,205,207,205,205,205,204,205,205,204,205,207,206,206,207,208,210,208,206,206,207,206,206,206,206,204,203,203,200,204,203,204,202,201,202,202,204,203,202,204,205,203,205,204,204,204,206,205,204,205,204,205,207,205,205,206,202,206,205,205,207,202,205,204,204,205,202,204,203,202,203,201,202,200,201,202,199,199,202,199,198,199,198,198,196,201,197,197,199,198,199,196,198,199,198,197,197,196,196,195,195,197,196,197,198,196,196,196,196,198,195,196,195,193,197,198,196,196,197,195,191,193,194,195,193,192,195,194,195,194,194,195,192,193,196,193,196,193,191,191,193,193,190,194,193,192,194,193,192,194,194,193,193,192,191,191,194,194,193,192,195,195,195,193,194,193,193,194,192,196,192,194,191,193,194,193,195,191,194,191,193,191,193,193,191,194,192,193,193,195,195,193,194,193,193,194,193,193,192,193,194,193,193,193,193,194,191,194,193,193,196,192,191,195,195,194,191,191,194,189,194,194,192,194,191,194,193,193,193,191,191,192,194,192,187,186,193,188,186,191,192,193,193,194,191,189,191,193,192,191,190,188,190,191,193,193,195,195,197,198,191,190,180,172,173,180,187,192,198,200,202,200,196,197,194,198,200,197,195,198,198,198,202,200,203,203,173,141,106,118,160,170,167,154,186,204,200,208,204,201,182,206,195,184,207,200,202,206,200,162,160,139,64,78,109,100,76,71,105,184,204,146,166,168,159,177,202,152,184,167,67,209,216,162,212,125,116,129,81,84,53,109,155,185,181,102,59,122,205,128,136,214,246,248,174,68,57,90,113,80,63,53,88,133,128,96,70,44,21,11,27,56,78,100,117,122,116,110,105,111,117,135,139,95,24,54,92,89,104,87,103,121,131,107,66,89,108,97,106,89,60,44,19,21,51,62,56,32,21,28,32,33,59,98,129,142,143,146,153,168,173,161,140,110,75,49,34,24,24,18,19,23,19,23,23,22,92,150,107,122,128,72,25,14,26,34,32,50,98,97,68,52,43,41,27,16,17,18,43,59,49,36,38,59,68,106,118,139,115,50,77,97,125,146,158,185,193,205,205,192,186,178,165,151,137,127,137,128,133,137,137,136,139,141,132,124,107,92,60,51,41,25,21,23,37,33,33,31,16,20,27,25,27,18,16,19,16,21,15,21,21,16,22,19,18,19,17,19,19,25,30,20,19,17,14,22,24,23,27,21,21,18,20,19,16,22,19,21,21,20,19,21,25,34,35,24,27,32,32,27,28,30,35,45,59,89,111,124,131,141,147,142,150,145,134,139,132,142,147,154,160,159,155,163,146,152,99,36,46,35,43,48,41,45,39,36,51,35,21,26,51,61,48,29,21,21,27,49,46,26,23,11,64,130,63,80,86,84,147,103,112,123,70,35,81,155,139,126,107,108,102,45,51,59,39,24,28,26,68,165,155,107,73,37,31,34,29,29,24,21,27,27,29,30,31,35,51,75,106,160,137,107,114,95,98,115,141,137,153,226,249,249,251,251,249,241,233,229,227,225,223,222,224,227,228,227,224,115,4,1,6,11,12,11,12,11,14,14,15,14,204,206,204,203,206,203,201,202,202,204,206,201,203,203,201,201,202,204,202,202,204,204,207,206,204,205,205,204,205,205,206,205,206,204,207,205,206,207,206,206,205,204,203,207,202,205,204,205,206,203,205,204,201,201,204,205,205,205,206,202,203,205,202,204,202,202,207,204,202,205,205,203,203,204,203,202,203,203,203,203,203,205,202,201,201,199,202,202,202,202,200,201,201,199,198,198,198,200,199,199,199,197,200,197,197,197,196,199,197,198,198,196,198,193,198,196,194,197,196,197,198,198,197,197,195,194,197,197,196,194,196,196,194,197,195,194,197,193,196,195,194,194,192,194,195,194,193,198,196,194,196,192,193,193,192,193,193,192,191,193,191,193,193,191,194,193,193,194,191,191,194,195,193,191,194,193,191,191,194,192,192,193,193,193,192,193,193,191,191,192,192,190,191,192,193,190,189,192,192,193,191,191,192,192,192,190,193,192,192,193,191,192,190,192,194,191,194,191,189,195,191,192,192,191,194,189,192,193,191,193,192,193,191,191,192,193,193,191,192,191,190,190,191,191,190,193,193,190,191,191,193,190,190,191,183,189,193,187,190,192,190,189,190,189,190,190,192,188,190,190,189,189,188,194,194,194,195,194,191,188,182,174,177,181,189,195,200,200,201,202,203,201,199,199,193,200,199,196,199,199,199,199,200,199,204,196,171,130,101,122,152,155,131,150,203,205,195,201,203,202,180,204,196,181,205,199,202,205,189,143,163,194,141,106,110,116,98,89,110,200,204,119,141,160,160,205,178,120,202,162,84,190,168,166,226,122,109,127,83,84,48,151,206,220,186,83,50,79,115,93,175,239,246,203,73,58,79,117,112,57,59,89,130,133,93,80,50,26,15,27,59,87,111,118,125,121,106,106,109,124,118,139,133,53,17,93,110,81,100,80,120,130,116,121,83,87,77,70,85,66,49,36,15,24,62,84,76,50,39,32,36,32,25,32,47,86,103,103,130,131,150,148,163,167,157,152,113,77,53,37,28,23,25,23,26,24,110,145,97,128,115,59,22,8,19,19,35,92,115,82,63,53,31,34,36,32,45,54,76,85,84,88,95,119,131,135,146,151,148,152,184,190,206,212,205,199,191,181,154,139,139,135,125,125,136,137,121,147,145,125,100,93,71,50,40,35,34,53,29,32,21,15,18,32,51,48,39,38,15,19,24,19,16,17,19,18,22,16,17,17,15,18,16,19,20,20,23,34,50,49,46,36,25,22,18,26,41,46,45,36,29,22,15,16,18,18,16,20,18,19,19,17,21,26,32,31,33,25,19,39,41,31,28,20,24,31,36,39,46,63,84,90,123,129,134,137,131,136,130,122,129,128,148,153,148,163,123,108,106,67,56,48,35,35,33,39,60,40,22,39,42,49,52,42,28,28,39,38,40,29,25,20,103,143,60,51,51,31,47,37,43,48,39,29,52,127,137,122,99,106,100,47,54,60,52,43,35,61,129,155,91,57,45,22,23,29,29,32,28,34,38,34,43,54,67,105,145,170,167,160,113,93,125,109,107,127,122,92,128,207,241,242,251,251,236,228,229,223,223,223,217,219,223,226,226,223,224,116,4,1,7,11,11,11,14,13,14,15,15,14,204,207,204,203,206,205,206,203,203,205,204,205,204,203,201,203,204,201,203,206,208,206,206,206,206,206,204,207,207,204,204,206,206,207,205,205,206,205,208,206,205,204,205,208,205,203,204,205,205,206,205,203,204,205,204,204,204,203,204,203,206,205,204,204,205,206,203,204,203,206,205,204,204,205,203,202,201,202,203,203,206,202,201,203,204,202,199,200,200,198,198,199,201,198,198,200,200,199,200,199,201,201,199,201,198,202,199,200,200,195,201,198,198,198,198,198,198,197,198,197,196,197,195,196,194,195,198,197,198,198,195,196,195,195,196,195,194,194,196,194,197,196,195,196,194,195,195,193,195,195,193,196,193,194,193,192,196,193,192,195,194,193,196,194,191,193,193,194,195,196,192,194,196,193,192,192,194,194,194,194,193,194,193,191,192,193,195,193,191,193,191,191,191,193,193,191,193,189,193,190,190,193,191,190,191,191,191,193,194,192,191,193,192,192,191,193,193,194,190,193,194,189,191,190,191,192,191,194,194,190,191,192,189,192,190,192,193,191,193,194,193,192,193,191,190,191,189,190,190,191,192,193,193,189,191,193,193,196,196,196,193,194,195,194,192,192,191,193,193,195,196,191,193,193,197,195,190,183,177,184,185,188,193,198,201,202,202,199,201,200,198,200,200,193,192,199,201,200,199,199,198,197,200,201,207,189,181,153,132,177,198,172,134,174,216,205,197,196,205,205,181,200,193,178,203,201,199,208,182,139,182,222,176,117,110,136,121,104,107,175,175,137,163,144,169,202,147,153,239,152,59,135,152,200,242,131,111,127,98,90,43,161,220,226,143,13,10,66,117,126,208,245,203,87,43,77,101,117,90,62,104,125,127,100,81,62,23,17,31,61,93,107,122,122,122,113,99,108,115,113,98,111,90,14,40,141,103,79,97,79,105,76,58,105,87,66,67,55,44,36,36,24,14,34,51,65,74,75,68,46,36,32,30,30,27,29,44,57,87,108,123,131,139,137,144,173,188,187,166,133,114,85,54,50,35,39,125,135,96,122,99,42,14,14,11,23,52,89,90,51,42,48,48,55,61,71,86,110,138,134,141,160,179,181,175,181,179,182,174,178,179,160,157,152,149,145,144,141,129,134,134,133,131,122,122,103,94,72,46,33,24,25,18,17,20,25,22,29,30,24,14,16,36,39,41,48,47,38,21,16,16,14,20,17,19,17,19,30,29,34,30,22,19,16,21,30,53,69,63,60,64,56,50,35,36,68,69,64,66,60,57,43,24,15,19,21,15,16,23,25,18,21,20,18,20,21,24,18,33,75,74,53,49,29,17,21,27,29,26,30,34,36,49,46,71,97,112,123,130,147,146,143,137,133,128,147,139,156,170,136,134,107,74,76,60,59,79,57,67,63,37,32,45,57,62,56,30,19,30,41,46,72,152,152,59,42,45,44,49,37,47,42,29,26,31,96,133,124,102,98,106,61,38,48,63,71,76,95,96,75,47,35,29,24,18,23,28,35,44,55,72,91,129,155,160,165,163,154,134,133,123,137,158,112,92,83,51,31,24,64,135,180,230,242,224,224,224,222,222,220,223,221,221,223,223,221,222,116,4,1,8,11,12,12,14,13,14,14,15,14,200,204,203,203,205,203,203,203,203,202,203,201,203,204,203,204,201,202,203,205,206,203,206,203,204,205,204,207,205,205,206,206,205,205,208,205,205,206,205,204,205,204,205,205,204,205,202,206,206,206,206,203,205,207,204,203,205,202,205,204,206,205,203,206,204,202,203,202,205,204,205,204,200,204,205,205,207,203,204,203,200,201,201,205,201,201,201,200,201,199,203,201,201,200,198,200,199,203,200,200,202,198,203,201,200,203,199,199,200,201,200,198,200,199,200,199,199,198,198,198,198,195,195,196,198,199,195,197,196,196,198,196,197,197,194,196,197,195,198,195,194,196,194,195,194,193,196,196,195,194,195,195,194,195,192,192,195,194,194,194,193,194,192,191,194,192,192,194,192,193,193,194,193,192,194,195,193,191,193,192,194,195,193,195,194,195,194,193,192,191,196,193,193,192,192,194,193,193,191,193,193,190,191,190,192,190,193,194,190,190,191,191,190,191,191,193,193,190,192,194,191,191,192,190,192,191,190,190,189,191,193,192,190,193,191,191,192,192,193,194,193,192,190,190,192,193,192,192,194,193,192,194,197,193,199,199,193,196,200,200,200,200,198,201,200,198,198,199,199,198,199,196,193,188,187,184,184,187,188,193,196,200,204,199,199,200,200,199,199,198,199,198,199,189,184,200,200,198,199,195,196,196,198,203,199,174,173,148,159,202,208,181,152,200,219,206,198,189,201,204,179,200,196,175,198,198,203,212,179,156,196,207,165,115,118,136,110,84,66,125,163,175,174,125,153,186,146,194,230,93,61,141,168,224,235,134,110,121,112,79,22,144,239,239,127,5,36,149,173,167,241,209,95,56,67,107,107,83,53,66,131,123,88,87,59,32,18,23,53,78,113,124,121,122,111,106,106,109,109,94,66,62,59,15,56,155,101,82,98,78,115,126,118,113,83,105,95,57,37,22,23,21,30,49,43,33,33,49,55,36,29,21,24,32,29,27,21,26,33,49,74,101,127,123,123,139,152,169,176,177,184,177,162,147,118,120,155,128,95,115,83,37,27,20,42,59,89,78,71,41,44,59,77,123,127,146,143,181,189,192,189,195,190,181,178,171,156,144,140,122,112,112,124,138,145,148,149,142,133,113,93,82,66,51,33,24,16,18,22,21,16,17,17,16,16,16,23,19,19,21,17,31,38,34,44,62,54,29,21,18,15,15,16,18,21,21,40,59,69,65,56,45,23,21,29,69,77,78,96,98,91,78,78,58,85,100,107,112,92,74,83,81,44,19,14,19,34,79,53,49,35,24,19,20,16,15,17,33,76,87,69,71,54,26,20,16,27,34,30,38,29,24,22,18,21,25,37,41,69,106,124,129,129,133,139,137,127,141,139,133,150,148,160,160,133,127,131,122,133,105,69,58,31,90,100,60,37,24,26,39,85,70,125,146,57,48,51,53,59,50,55,46,42,29,26,96,145,133,98,105,115,65,29,22,45,60,55,57,47,35,34,34,26,19,33,40,56,69,107,154,165,177,174,164,150,135,129,133,142,146,126,123,152,116,61,38,33,24,35,31,19,47,145,216,224,223,223,221,225,225,222,224,225,223,223,223,222,116,4,1,6,11,13,12,12,11,15,15,14,14,200,203,201,203,204,201,201,199,202,201,199,203,202,203,201,203,201,201,204,203,204,200,206,203,203,204,203,205,204,205,203,206,206,205,205,207,206,205,206,204,207,205,203,204,203,206,206,204,205,204,202,203,203,205,203,202,203,203,203,201,205,205,205,205,203,203,202,204,203,202,200,202,202,202,203,202,204,204,204,201,202,202,201,206,203,200,203,202,202,202,203,200,200,202,203,203,203,202,202,199,202,202,200,201,200,203,200,200,203,198,204,201,200,200,198,199,200,198,198,198,199,201,199,199,198,198,199,198,200,197,196,198,199,198,197,197,196,196,196,195,197,194,194,197,195,197,196,195,197,196,198,196,192,196,194,194,196,195,195,195,194,193,192,193,193,196,192,191,193,193,194,194,195,194,193,191,195,193,189,193,194,191,193,193,194,195,193,191,193,192,192,194,193,194,191,192,193,192,195,193,194,194,194,194,192,191,193,194,196,192,192,191,191,194,191,193,192,190,191,192,193,192,191,193,196,193,195,191,192,195,194,193,189,193,194,195,194,195,193,194,194,193,193,193,196,195,193,194,194,196,195,206,202,201,221,201,196,203,199,204,199,199,203,200,206,204,201,205,200,199,199,195,191,185,188,190,192,195,199,202,199,201,203,198,195,198,198,196,196,197,198,195,200,185,177,196,197,199,198,196,196,197,199,199,182,139,127,106,133,184,176,153,152,205,218,208,203,184,194,203,182,199,193,177,201,198,202,208,162,169,188,152,159,128,115,119,92,84,67,91,154,213,147,90,176,193,164,206,181,68,53,98,158,218,231,141,115,123,93,61,12,113,234,252,169,118,185,252,212,206,225,101,60,73,100,114,76,67,76,87,117,87,83,63,25,21,22,47,77,109,131,127,125,116,105,116,116,113,113,95,48,21,43,15,60,154,92,84,96,84,141,143,147,122,98,145,100,66,49,29,53,55,57,51,44,39,23,24,31,20,16,17,19,32,40,33,19,25,30,28,30,39,55,81,110,125,129,137,140,134,145,159,175,192,189,179,165,117,104,120,86,65,84,118,149,156,144,154,164,161,174,170,179,185,182,179,179,181,186,174,160,149,139,134,130,115,113,116,114,127,134,138,151,151,130,110,86,59,45,34,26,30,34,24,19,12,14,17,13,19,16,16,17,15,17,13,17,20,20,17,27,36,24,39,81,93,51,25,22,16,15,17,15,19,19,35,79,82,51,52,77,83,59,30,82,109,91,165,145,107,100,102,102,108,134,146,147,104,139,87,98,136,62,30,22,55,120,148,124,86,71,39,14,24,17,16,23,51,67,66,88,77,44,24,17,16,19,28,33,23,22,19,19,16,19,23,22,25,19,26,35,46,61,76,89,103,111,122,135,123,131,138,146,161,146,154,150,150,165,156,162,146,141,135,131,89,60,49,49,90,71,41,118,143,57,38,25,26,38,36,45,45,34,32,30,98,152,130,99,92,117,80,33,17,16,27,28,33,25,29,38,39,48,63,85,118,148,169,182,177,160,148,136,136,135,141,147,141,125,94,50,64,135,106,50,33,24,29,35,34,48,12,64,195,214,227,226,220,227,221,221,222,223,223,225,223,223,115,4,1,6,11,12,11,14,13,14,14,14,15,200,202,198,199,202,199,199,200,202,200,200,201,202,198,198,199,200,200,201,203,202,202,204,202,201,202,201,204,202,201,203,204,204,204,204,203,204,203,206,204,204,204,204,200,202,203,202,206,202,203,203,202,202,201,201,203,205,202,203,203,203,203,203,202,201,204,201,200,202,202,201,202,202,203,204,203,202,201,201,203,205,205,200,202,202,205,205,200,202,201,201,200,203,201,204,204,200,202,199,201,202,201,202,202,200,200,200,199,201,200,198,199,200,200,200,201,199,199,200,199,200,198,199,198,198,198,197,200,198,199,199,195,197,198,196,198,198,198,198,197,197,199,196,196,195,195,197,193,196,196,196,195,194,194,195,195,194,193,192,194,194,194,194,195,195,192,194,195,195,193,193,193,193,195,195,196,191,193,194,192,194,193,192,193,192,194,192,193,193,190,194,189,192,192,193,193,191,194,192,192,194,194,192,192,194,193,193,192,195,196,194,192,193,196,195,194,195,193,193,196,195,194,195,195,195,196,195,198,198,196,195,197,196,195,195,194,195,196,195,196,196,197,196,195,199,196,195,198,196,198,195,205,183,162,189,193,200,202,199,200,198,201,200,203,204,203,200,199,196,191,191,188,191,194,199,196,197,200,197,200,198,198,200,194,196,196,196,198,197,196,196,195,199,186,171,195,200,195,199,197,197,198,201,198,170,124,116,103,131,164,131,125,143,194,220,209,202,186,187,203,180,195,199,176,198,196,203,189,139,158,154,148,169,130,98,87,94,99,97,115,174,206,140,131,194,183,158,202,162,73,41,55,153,221,231,155,110,96,73,41,10,125,233,252,202,206,252,252,205,185,123,57,81,100,105,76,56,101,110,89,86,76,55,30,12,23,47,87,118,126,134,130,118,119,99,130,130,112,114,98,60,27,38,17,50,141,98,88,95,93,81,49,101,93,110,152,80,49,57,87,110,82,61,46,45,32,23,22,23,23,19,17,20,34,50,46,37,35,33,31,23,19,22,26,37,56,84,108,124,132,141,136,148,159,165,162,138,113,111,112,85,117,175,202,222,214,167,204,192,198,189,173,165,153,149,132,147,128,126,122,111,113,117,113,124,127,123,125,126,122,105,88,61,42,33,28,24,27,26,30,36,32,25,15,15,14,13,13,13,18,19,17,16,15,16,18,19,20,16,21,30,29,26,49,89,80,37,22,18,14,14,21,16,20,19,54,108,73,47,49,63,108,84,75,137,122,107,174,124,46,53,105,147,119,141,136,89,92,128,98,74,125,98,43,85,160,164,109,72,105,125,71,27,14,17,16,39,47,52,99,112,90,41,19,22,10,23,21,20,17,17,25,18,17,21,24,20,20,23,19,19,31,34,36,33,36,45,66,81,95,116,116,136,144,147,142,135,145,148,163,180,184,167,168,153,155,164,122,153,136,94,84,146,125,59,39,29,23,27,23,32,30,29,31,32,90,139,133,91,84,114,84,34,22,16,22,39,48,57,73,84,110,134,152,178,175,173,157,143,132,131,148,150,152,150,133,111,96,58,23,17,108,153,75,50,36,24,25,31,39,55,19,57,193,214,225,231,219,223,220,220,219,220,222,225,223,222,115,4,1,7,10,12,12,13,13,14,15,14,14,200,204,199,199,200,199,198,201,202,199,201,201,199,200,198,199,200,199,201,198,203,201,201,200,200,204,200,204,202,201,202,203,203,200,204,206,203,202,203,203,204,201,201,203,200,202,201,203,204,201,200,201,201,201,203,202,206,202,202,203,200,201,200,200,201,201,201,200,199,204,203,201,202,201,203,201,203,201,203,201,200,202,201,203,200,201,202,200,201,199,202,201,198,201,201,199,199,201,202,201,200,199,200,200,199,198,200,200,204,199,200,198,197,200,196,200,199,197,199,199,196,197,199,198,198,200,199,196,198,196,200,198,196,199,196,197,198,199,197,196,196,195,196,194,196,195,195,195,194,193,193,195,195,197,194,193,196,192,195,196,195,195,192,196,193,197,194,193,196,194,194,195,193,194,194,193,194,192,193,195,194,191,195,196,194,195,190,191,195,195,192,192,194,194,192,195,194,194,197,193,191,193,193,194,195,194,193,194,194,193,195,193,195,197,193,194,196,196,196,196,197,199,198,201,200,197,201,198,200,200,195,198,196,199,197,196,195,197,196,196,198,195,196,198,197,196,196,196,194,199,197,203,136,79,153,191,199,203,198,208,203,204,203,196,200,194,190,191,189,191,193,198,198,197,199,198,199,198,198,194,195,194,196,199,196,196,196,197,197,196,198,197,202,190,170,194,200,198,200,196,196,200,207,196,178,162,155,148,184,179,122,129,143,169,212,215,201,191,185,200,182,192,198,173,196,199,202,171,145,159,155,168,182,143,93,96,109,105,92,107,177,226,166,168,175,130,177,214,142,76,50,98,218,243,243,160,74,66,55,20,45,178,235,232,160,179,248,248,142,76,60,64,110,110,72,53,94,132,108,55,42,47,34,16,18,50,98,130,137,133,128,120,115,103,82,91,97,98,107,106,83,41,39,12,45,154,111,99,112,52,59,38,58,96,90,103,29,29,95,108,87,44,38,36,35,32,25,27,24,24,21,26,19,28,39,36,43,37,26,22,18,16,17,17,21,21,24,35,51,77,97,110,130,142,146,156,147,141,149,122,107,152,184,197,194,175,167,156,150,137,133,127,120,111,115,120,119,134,141,128,133,140,125,114,101,92,73,50,33,30,25,24,16,17,18,18,21,13,24,27,24,24,19,19,16,12,20,17,15,18,17,17,16,15,17,16,14,16,19,17,22,24,30,44,62,55,27,16,15,15,15,20,16,19,16,56,118,108,93,79,49,104,110,105,156,102,90,144,105,103,90,148,125,63,106,117,124,101,155,87,71,147,122,97,148,150,86,66,27,82,161,97,33,14,10,24,43,42,43,101,126,78,32,21,17,13,18,21,21,17,19,20,16,18,21,19,20,18,24,21,16,24,30,36,34,23,31,35,39,41,44,55,76,100,111,122,123,137,141,154,168,162,163,160,165,170,182,179,168,171,143,135,162,125,78,80,63,60,60,52,54,51,55,57,47,103,156,143,107,86,117,87,42,43,46,73,102,136,153,172,174,169,171,163,156,136,129,141,152,148,153,166,145,121,93,63,56,44,41,11,79,212,148,53,43,33,30,26,42,45,57,22,72,214,210,222,230,217,223,220,223,222,220,222,222,217,220,116,3,0,7,11,12,12,13,11,14,15,14,14,206,207,205,203,205,201,200,202,201,202,199,200,202,200,201,199,199,198,201,202,198,199,203,200,199,201,200,203,199,200,201,199,202,203,202,202,202,202,202,203,202,201,207,202,202,202,200,200,200,201,200,200,202,202,199,201,201,200,201,201,199,201,202,202,202,201,202,201,203,202,201,200,200,201,199,203,200,201,203,199,201,199,200,204,201,201,203,200,200,200,203,200,203,199,201,201,200,202,200,200,199,200,201,200,199,203,200,199,200,199,200,198,199,198,196,199,199,200,198,198,198,199,197,195,198,196,198,198,196,196,195,196,198,198,196,196,194,195,197,194,195,193,194,196,191,195,194,194,195,193,196,195,193,195,194,195,196,194,195,194,194,194,192,196,194,193,193,191,193,192,194,196,193,193,191,193,193,190,195,194,194,196,193,192,195,193,196,196,194,195,195,194,195,192,192,193,195,195,191,196,194,193,195,195,196,194,195,195,198,196,195,195,193,196,198,196,197,196,195,198,198,198,198,198,198,199,198,200,200,198,198,198,196,198,196,196,196,197,196,195,196,193,195,196,198,198,198,198,195,198,198,200,142,120,180,200,217,214,216,226,209,204,196,191,192,191,193,196,199,201,203,204,205,205,206,203,200,201,201,202,200,200,201,201,204,200,200,202,202,203,202,203,206,204,179,194,209,207,210,207,206,210,209,185,170,165,158,165,208,192,143,172,169,142,189,221,216,209,195,205,191,201,210,185,204,210,199,175,181,173,159,153,171,192,128,119,123,84,77,82,163,214,162,160,134,160,220,191,108,63,50,147,249,249,239,112,39,43,35,20,97,231,227,206,126,109,205,167,51,38,61,95,117,73,59,95,124,134,81,47,46,24,19,25,35,88,129,141,138,129,121,118,108,86,42,33,53,62,93,103,93,57,46,39,66,161,127,96,66,48,67,15,77,109,72,30,8,66,131,98,49,39,60,53,37,39,29,24,28,26,25,27,24,26,24,19,24,19,16,16,16,15,18,21,19,19,24,20,22,28,41,40,50,57,79,97,110,148,156,151,153,171,188,192,201,184,166,165,153,151,143,141,150,128,127,118,81,119,96,78,69,53,45,33,32,31,20,22,15,15,21,18,19,17,15,14,16,16,17,21,16,21,17,14,22,18,16,22,17,14,19,19,19,18,17,16,17,16,21,15,27,36,43,60,59,56,40,35,24,14,18,17,19,22,16,31,89,123,110,91,48,83,92,98,126,75,89,100,103,123,118,88,49,22,53,77,113,149,131,81,134,159,142,106,155,98,79,143,95,162,199,101,21,9,13,14,33,48,86,94,78,48,27,15,14,16,16,17,17,23,21,15,21,22,19,20,21,19,18,21,18,19,21,36,38,32,44,36,33,24,21,23,34,34,47,56,65,91,111,130,136,153,162,162,151,154,157,150,156,147,151,152,159,154,130,181,166,166,156,141,143,137,141,141,143,169,179,171,127,91,113,92,61,108,135,160,192,182,166,155,140,138,130,129,142,139,159,170,155,128,113,98,69,55,38,39,32,39,36,30,194,241,115,41,41,26,29,33,42,37,60,29,71,214,210,220,230,216,222,215,218,218,219,222,223,220,219,115,4,0,6,10,13,12,13,12,13,14,15,14,205,210,207,206,206,203,204,205,205,200,201,203,201,203,199,201,200,198,201,197,202,200,200,202,200,205,201,200,200,200,194,198,202,200,199,199,201,200,201,201,200,200,201,202,201,202,201,200,201,202,203,204,203,202,201,200,201,199,200,201,200,198,200,201,202,199,200,200,200,202,199,201,201,198,199,200,199,198,201,200,200,202,199,199,199,200,201,200,199,199,200,200,197,199,202,200,200,199,198,198,198,199,199,199,196,198,199,196,199,196,197,198,197,199,198,198,198,196,198,196,196,198,196,198,195,197,193,195,199,193,195,195,195,198,194,196,195,193,195,194,194,193,194,194,193,194,194,194,193,195,195,194,193,192,193,193,195,190,191,190,191,193,190,191,191,192,192,192,193,193,193,191,192,192,194,193,193,194,193,195,194,193,196,193,193,195,196,198,195,194,192,195,195,195,194,194,196,194,195,194,193,198,198,194,195,195,195,195,196,194,196,193,194,196,194,196,194,195,197,199,197,198,197,199,198,198,201,196,199,198,196,198,198,198,197,198,199,201,201,203,203,205,205,206,207,208,210,208,212,211,213,221,193,207,235,231,233,218,211,218,214,210,210,211,217,217,224,226,228,227,225,228,227,227,227,226,223,222,227,225,226,226,226,225,226,226,223,226,227,227,227,228,229,227,207,211,226,226,229,225,221,231,210,166,137,128,127,151,197,167,160,217,199,138,159,225,239,236,225,225,212,223,232,208,229,235,205,198,212,177,158,158,214,239,139,96,103,93,86,73,119,142,112,142,154,219,248,133,65,61,39,107,235,239,151,46,6,32,31,43,166,246,213,205,115,92,163,74,27,71,93,124,79,54,97,123,133,87,79,63,28,16,17,48,87,124,128,122,127,124,122,121,113,81,69,77,60,44,89,104,66,76,96,63,48,130,102,46,60,48,45,17,105,165,72,36,63,139,162,84,44,89,104,69,54,53,37,22,24,24,23,27,19,25,21,23,23,19,20,13,17,17,16,16,18,18,19,16,19,26,30,34,27,22,26,33,37,44,59,51,66,81,92,114,121,121,122,116,101,93,83,73,63,50,43,40,33,25,24,30,23,21,26,27,26,19,17,21,17,14,17,17,17,17,22,20,16,18,17,21,22,16,18,15,17,19,19,18,15,21,16,19,21,18,19,18,24,21,25,19,34,85,101,108,105,85,86,75,49,24,17,15,17,22,16,17,34,40,71,103,48,67,74,90,104,64,110,101,65,56,41,44,40,38,44,38,64,63,89,139,165,144,122,89,131,68,81,182,111,169,157,38,23,13,18,23,33,71,141,114,63,55,30,26,19,15,19,22,21,18,21,21,17,23,20,20,20,20,21,20,23,21,20,28,29,31,38,25,21,22,15,19,22,24,21,27,27,32,44,58,76,97,118,127,134,141,146,142,139,147,141,142,146,148,167,180,183,182,185,181,184,193,189,188,181,169,175,165,123,103,125,101,101,138,145,161,150,142,126,131,141,133,139,134,141,137,127,114,79,48,34,38,41,38,33,27,22,33,23,94,248,233,78,45,44,19,29,29,36,39,56,27,69,202,210,225,229,214,224,220,219,220,217,221,224,222,223,113,4,1,6,11,13,12,13,13,14,14,14,14,206,205,206,205,205,205,206,205,202,203,199,201,200,198,201,199,202,201,200,200,200,203,201,202,203,203,203,200,204,203,195,201,202,200,204,200,202,204,202,204,201,200,203,202,201,204,201,200,202,203,206,206,205,203,204,202,201,202,200,201,200,202,200,203,200,199,202,200,202,200,203,198,198,202,198,202,199,201,201,200,203,201,198,199,198,197,198,200,200,199,198,198,200,196,197,198,200,199,197,198,198,198,199,196,195,200,198,198,196,195,197,196,198,197,196,198,196,197,196,196,195,198,198,195,196,194,195,198,195,195,196,194,194,194,193,194,195,193,195,195,195,194,193,195,192,195,194,193,194,196,193,190,191,193,190,192,193,190,191,192,191,193,191,194,194,191,193,194,195,190,192,194,193,196,194,193,196,196,195,196,194,194,194,195,195,193,196,192,193,194,194,197,197,195,195,193,192,196,193,195,196,194,197,194,195,195,197,195,193,196,194,196,196,194,194,194,196,194,194,196,193,193,196,199,198,199,198,199,195,195,196,198,197,199,203,208,220,229,230,229,233,234,233,237,237,234,240,237,242,239,234,244,222,243,249,241,230,160,134,193,229,246,245,244,248,248,249,249,248,249,249,249,248,248,243,250,237,229,242,243,246,242,242,240,239,245,229,229,238,240,241,240,238,232,224,209,209,215,222,226,212,225,200,160,133,127,129,136,159,131,170,229,198,139,123,188,226,235,226,218,212,215,226,212,214,225,188,197,201,136,159,176,229,226,108,96,110,101,113,95,85,61,80,139,141,234,208,70,65,72,31,30,122,139,66,11,14,38,29,92,214,246,217,235,139,103,121,59,67,88,112,88,59,92,128,128,91,80,57,30,17,21,45,89,127,132,128,121,120,128,132,124,88,79,102,84,69,43,93,72,48,67,74,48,27,63,77,69,62,25,29,62,144,177,85,100,133,165,157,73,50,121,108,78,74,63,47,21,23,21,24,27,22,27,22,24,24,19,21,20,15,17,21,15,15,19,16,13,15,16,25,36,37,23,27,31,25,31,19,19,24,20,19,23,25,25,28,29,34,33,32,22,26,29,31,19,18,24,29,39,29,30,24,18,27,19,16,18,14,17,17,17,19,17,15,19,19,19,19,18,17,16,20,19,17,18,19,17,19,19,17,19,17,20,21,19,25,17,29,16,94,196,201,187,170,169,171,160,90,25,11,15,21,15,19,25,15,33,84,96,38,57,71,90,118,75,101,72,40,44,36,45,37,35,44,47,53,52,69,111,147,124,120,84,106,55,55,131,53,52,42,17,27,14,16,27,74,129,162,129,91,90,57,63,35,19,23,16,22,22,22,22,22,21,21,23,19,20,15,19,19,22,22,15,18,23,20,17,22,20,16,16,27,29,24,24,26,22,26,34,37,41,39,46,61,84,95,106,113,120,132,141,150,147,145,149,148,147,142,139,147,141,141,145,136,128,135,145,131,117,144,134,112,140,139,143,141,126,132,133,134,124,109,94,76,52,49,47,41,37,28,36,39,36,25,25,23,23,28,115,248,181,66,55,35,25,27,31,48,46,62,26,73,206,211,224,226,214,220,218,222,218,218,222,222,221,222,116,4,1,8,12,12,11,14,13,14,14,15,15,204,204,203,205,205,205,203,201,202,200,200,200,199,202,198,199,200,200,201,199,203,200,201,204,202,202,204,204,220,234,206,200,202,200,204,203,207,203,202,205,203,202,205,203,201,202,205,203,201,205,206,206,205,206,206,205,206,204,205,204,203,205,207,202,205,206,207,206,204,205,201,203,203,201,203,204,203,204,202,200,200,199,201,200,200,200,199,198,200,200,201,200,198,200,200,202,201,198,199,200,199,198,199,197,196,199,199,196,198,197,196,196,196,200,198,195,195,194,198,196,197,196,197,196,192,197,195,197,195,193,195,193,192,194,194,194,193,191,195,194,193,193,193,192,194,195,193,193,192,193,192,192,193,193,190,190,194,192,196,194,194,195,193,195,194,192,193,193,192,194,196,195,196,192,195,196,195,194,196,198,194,196,198,196,198,194,195,193,195,196,196,197,195,197,195,195,197,196,194,196,193,195,197,195,197,197,196,194,194,195,195,196,197,196,194,196,195,196,195,196,196,197,195,199,198,198,197,197,202,198,195,199,195,200,203,195,200,205,205,205,204,203,200,200,198,193,194,192,187,174,162,203,187,165,158,151,139,83,73,119,158,167,164,162,163,161,160,155,155,153,151,156,151,149,146,168,167,132,144,148,147,149,141,144,149,161,149,127,133,137,139,141,140,139,161,145,129,133,153,170,128,134,125,125,125,120,122,130,149,120,177,231,199,139,88,106,125,135,135,130,137,131,131,128,136,137,123,139,113,71,105,124,141,128,83,114,133,116,126,113,99,49,54,90,81,145,113,36,51,60,36,27,41,41,43,29,43,46,53,165,245,246,232,242,168,116,103,84,99,100,68,62,87,122,127,93,87,54,33,17,23,49,92,138,128,129,130,126,133,125,130,105,66,48,70,76,71,80,57,42,31,45,49,38,31,70,91,62,36,30,128,116,67,80,86,125,128,130,113,56,39,78,76,64,67,65,46,17,19,23,27,28,23,27,27,21,25,23,21,22,17,19,16,19,17,13,17,16,14,17,17,28,37,29,35,31,22,15,16,19,17,21,16,20,18,19,16,21,19,21,31,28,31,29,20,16,19,37,61,59,46,43,19,14,23,15,18,14,16,19,19,23,15,17,19,16,17,17,18,18,21,18,16,19,17,18,16,17,21,20,19,22,17,21,20,16,25,15,26,18,63,160,164,170,166,166,178,149,78,20,12,16,15,19,26,22,34,80,104,61,29,71,66,107,145,83,55,40,39,37,42,58,56,60,66,71,59,55,55,58,75,131,158,98,102,54,46,107,72,43,14,14,27,17,21,29,106,161,162,141,137,136,139,135,71,24,13,18,17,18,23,19,20,21,19,22,21,19,17,16,22,18,19,21,19,21,17,17,19,19,17,19,19,19,22,18,19,20,22,29,36,37,30,27,41,39,37,40,39,45,51,71,91,103,111,106,112,120,122,122,126,132,134,133,133,125,136,145,126,139,163,139,123,128,124,127,107,95,76,66,55,41,43,33,30,27,27,30,30,38,39,41,36,26,23,29,17,27,24,79,144,80,58,60,33,31,33,37,46,55,53,36,140,226,220,228,224,216,222,217,219,217,219,221,221,220,221,116,4,1,8,13,12,12,15,13,14,15,15,15,202,202,201,202,202,200,201,200,200,204,201,203,200,198,201,199,201,200,202,200,198,201,200,202,202,200,206,203,240,252,205,201,202,198,206,200,203,205,203,200,204,203,203,200,201,205,200,203,204,205,206,206,205,205,206,204,205,205,201,206,205,203,206,206,205,206,206,204,206,204,204,205,202,203,205,205,203,203,202,204,202,200,201,203,201,202,202,200,202,200,200,200,201,199,200,199,200,198,198,202,199,198,200,199,200,201,197,198,197,197,198,198,197,195,197,196,198,198,196,196,195,196,196,194,195,196,193,196,195,193,195,191,194,195,197,194,194,195,194,194,192,194,194,194,192,191,192,193,193,193,192,193,193,192,193,196,194,196,196,196,194,194,193,193,195,196,193,193,194,194,195,193,195,196,194,194,195,196,196,194,197,196,196,196,195,193,193,196,194,195,193,196,195,194,195,193,194,195,193,195,193,193,195,195,196,193,194,195,194,193,194,196,193,193,193,194,196,193,195,195,196,196,195,196,195,197,198,196,198,198,198,198,193,199,190,120,79,80,71,69,66,62,56,55,53,46,46,36,33,29,29,105,81,14,12,10,18,12,10,16,10,15,14,14,14,11,12,11,12,12,11,12,10,14,8,34,52,15,9,12,10,14,11,15,9,37,40,10,13,12,13,11,15,10,49,62,25,19,34,76,33,35,57,60,65,71,78,111,153,149,199,212,180,143,85,51,15,6,12,12,28,23,10,15,12,32,34,32,36,16,44,36,29,21,30,110,132,121,129,119,107,70,52,49,15,42,35,18,65,64,59,57,43,25,31,44,66,48,78,184,241,249,221,234,180,134,122,94,108,68,61,80,110,127,93,89,57,29,21,26,46,96,139,131,125,128,126,133,129,109,100,71,41,35,63,83,91,55,45,25,32,73,54,35,38,74,69,33,50,127,224,137,22,35,75,105,75,76,81,58,40,30,39,56,56,48,34,23,24,24,33,31,23,28,26,27,27,22,26,23,25,29,19,19,21,19,16,16,18,17,21,19,18,22,26,20,17,19,19,18,18,18,16,20,19,15,17,18,17,19,23,27,20,20,20,16,30,53,69,59,53,45,25,16,15,18,18,18,19,18,16,19,21,16,16,21,22,17,22,19,21,21,16,20,19,18,24,16,19,24,17,19,19,22,19,20,24,16,21,28,35,63,66,63,62,56,53,44,38,21,15,16,19,22,27,34,69,117,86,40,46,63,65,114,141,65,43,34,24,24,42,77,95,127,123,65,36,40,57,54,47,113,165,106,92,53,41,87,97,57,20,13,20,20,17,27,81,142,162,150,160,124,173,158,72,23,12,14,20,18,21,22,19,22,21,21,19,20,22,22,19,17,20,19,19,23,16,21,23,19,22,18,16,15,20,21,19,19,16,19,34,41,40,41,34,33,33,27,28,30,24,27,37,36,41,42,51,61,65,79,85,88,98,102,107,99,101,89,88,104,100,86,59,76,50,49,52,39,33,29,29,25,21,24,25,23,22,22,26,30,30,33,25,27,29,27,21,24,29,27,40,45,55,50,28,34,42,41,41,44,21,71,210,235,226,223,221,220,220,220,218,217,220,221,223,220,221,116,6,2,8,12,13,12,15,14,16,16,16,15,204,205,201,202,204,203,205,200,202,202,201,200,200,202,201,201,203,201,201,201,201,199,201,201,202,201,207,198,241,252,200,204,203,197,205,199,205,202,201,205,201,201,202,200,202,201,202,203,203,205,204,204,205,206,206,206,204,203,205,205,204,204,204,203,206,203,205,205,202,204,205,205,202,202,206,205,203,205,201,204,204,202,202,201,203,202,202,201,203,199,203,201,202,202,198,200,199,201,199,200,199,200,200,199,201,198,200,201,199,199,201,201,197,200,196,198,199,197,198,196,199,195,197,197,196,198,196,196,194,197,198,198,196,195,196,196,198,195,195,197,196,194,196,198,195,196,196,197,196,196,196,193,195,197,194,198,198,194,198,195,196,198,198,196,195,195,194,194,194,197,196,194,196,194,194,196,198,195,194,195,195,196,196,194,195,195,192,193,193,194,194,194,193,194,192,196,195,191,193,194,193,194,195,194,194,193,195,194,194,195,194,195,194,194,193,194,192,192,193,196,195,195,196,195,196,199,197,196,198,198,195,196,196,199,185,115,79,76,72,70,68,68,63,68,65,61,62,62,59,63,61,77,87,71,56,57,59,61,68,60,54,45,46,45,42,47,46,46,46,46,46,46,45,45,37,59,103,69,43,40,38,44,35,38,35,49,85,61,35,29,29,32,31,23,69,78,23,12,17,67,59,58,67,66,56,46,65,95,153,184,207,179,130,94,62,51,22,11,15,9,27,27,12,16,13,14,15,20,25,17,30,35,24,12,27,71,85,110,130,123,103,67,51,34,3,35,43,26,74,88,75,55,28,19,32,50,70,35,65,181,205,177,168,170,149,146,128,96,59,46,88,120,121,92,84,54,29,18,28,54,94,135,132,128,120,119,121,131,117,68,76,51,48,62,72,102,67,41,15,33,118,148,69,39,61,55,22,48,104,141,198,78,36,69,65,79,60,67,55,59,51,25,18,33,38,33,42,41,33,24,28,31,24,31,31,28,28,23,25,29,23,30,27,22,24,21,22,15,20,19,15,20,19,18,17,15,21,17,16,18,15,21,20,17,17,17,19,17,17,17,16,19,18,18,20,27,40,42,61,70,55,37,20,15,15,17,19,16,20,21,19,19,19,17,17,21,17,17,19,21,19,17,20,24,21,17,19,18,16,24,23,18,20,22,23,18,18,24,27,30,35,51,51,41,29,29,30,22,27,21,12,21,23,21,33,53,104,121,65,52,61,57,66,90,84,45,39,23,21,25,28,61,118,131,72,32,19,26,32,61,68,95,128,112,93,41,40,66,103,88,28,18,17,19,22,23,45,70,83,81,91,84,58,49,37,19,17,16,23,25,18,26,27,19,20,19,21,22,17,21,20,17,21,24,24,17,20,22,17,19,20,18,21,19,19,21,19,19,16,19,21,31,34,34,27,23,34,65,76,57,45,32,22,26,29,29,33,37,39,30,27,32,34,34,33,33,37,37,33,36,44,36,30,33,31,38,38,26,22,21,25,25,25,22,26,25,28,27,23,26,21,23,24,25,24,25,24,28,27,25,31,30,39,43,46,47,49,49,46,32,25,134,240,237,222,219,218,219,220,218,217,219,219,220,222,220,220,115,6,2,8,12,13,12,15,15,15,15,16,16,199,203,203,203,203,200,204,203,200,204,199,200,203,203,202,201,203,202,201,202,201,202,201,200,201,201,209,195,243,252,196,205,203,197,205,200,207,202,202,203,202,203,203,203,203,203,203,205,204,203,203,204,202,203,207,203,204,206,202,206,204,205,205,204,204,206,206,205,206,202,204,205,203,204,203,201,202,202,203,203,203,205,200,201,202,201,200,200,202,202,202,197,200,200,200,200,199,200,198,200,202,201,200,198,199,199,200,202,201,201,199,199,201,199,200,198,197,199,198,199,200,198,200,198,197,199,196,200,197,198,200,197,199,198,199,196,198,199,197,198,197,199,196,196,200,196,196,198,195,196,196,198,198,197,196,195,195,195,195,196,196,196,195,194,197,198,195,196,196,195,196,193,196,196,193,194,193,194,195,193,194,194,194,196,195,193,198,198,193,194,194,196,193,193,195,192,193,193,194,193,193,195,194,193,193,195,194,194,194,192,193,194,194,193,193,193,194,192,192,196,196,196,197,196,196,196,195,196,197,197,195,196,196,199,205,191,193,200,199,205,206,203,205,210,208,209,210,211,211,211,191,171,189,218,225,216,218,217,217,221,219,220,215,218,216,217,219,219,222,218,221,222,220,218,210,220,219,214,215,214,216,214,216,212,207,210,209,212,210,207,208,205,200,198,205,176,156,169,172,191,170,161,148,130,139,120,105,117,160,190,186,157,90,44,36,98,169,183,176,176,174,153,171,171,129,154,160,138,110,102,163,153,145,127,113,104,81,124,134,115,91,46,53,98,91,101,87,59,74,67,55,33,17,12,36,57,61,29,100,198,117,79,92,79,98,130,125,63,29,64,125,126,94,86,49,29,17,21,61,103,125,131,128,123,114,116,120,126,101,38,30,33,44,60,95,90,44,24,48,116,194,208,127,82,52,13,63,162,142,49,71,65,68,78,60,71,47,56,52,49,61,34,23,23,34,46,55,56,44,28,27,32,32,29,17,28,32,22,28,28,27,25,20,24,27,23,24,18,18,19,17,19,18,19,17,18,17,16,17,20,19,19,19,22,15,19,19,14,21,18,18,21,18,15,22,33,34,59,97,90,61,31,21,21,17,17,15,19,18,19,20,21,17,17,19,20,17,19,18,20,21,17,19,22,21,21,20,20,19,19,22,21,21,20,22,24,24,19,28,31,36,50,45,37,34,29,29,19,19,20,19,22,23,24,29,59,128,113,68,83,81,59,62,74,46,33,32,21,34,28,54,126,145,97,41,26,30,25,29,43,71,88,97,111,62,51,71,69,116,111,51,19,12,19,18,25,48,52,49,41,33,27,29,27,17,24,19,16,21,21,24,26,25,25,21,22,24,24,27,24,27,21,23,21,20,25,20,19,19,17,19,23,21,19,23,21,21,17,18,24,24,21,19,22,21,35,80,122,107,79,57,29,19,29,33,31,38,31,24,22,20,22,23,24,23,22,23,22,20,25,28,40,38,37,42,34,27,21,25,20,16,25,25,24,24,22,22,24,28,28,21,28,25,22,24,30,28,25,47,36,28,36,35,48,68,68,59,61,54,123,167,231,248,225,222,221,217,215,214,215,215,215,217,220,218,215,219,117,6,2,8,12,13,12,14,12,14,15,16,15,199,203,201,201,201,200,203,202,205,203,201,203,202,201,201,200,204,200,203,204,200,203,202,202,204,200,206,193,244,252,191,203,200,197,206,200,206,203,199,202,200,202,205,202,204,203,203,205,203,201,203,203,201,204,201,203,204,203,205,205,203,206,204,204,206,202,205,204,204,204,205,204,202,202,203,203,202,202,203,202,200,201,203,202,200,201,200,200,201,199,200,197,201,198,200,200,198,200,198,199,199,199,199,199,200,198,199,199,199,197,198,199,198,201,199,199,198,194,198,196,200,198,197,197,196,199,196,199,198,198,198,196,195,196,196,197,198,195,196,197,196,196,198,196,196,196,196,195,198,196,194,196,198,200,195,198,196,195,194,195,196,194,195,196,195,194,196,196,195,196,194,193,196,194,196,195,192,194,193,194,193,193,194,193,191,194,194,196,193,194,196,193,194,196,193,192,194,194,195,193,191,195,195,192,193,193,194,193,195,194,189,193,193,195,193,192,195,192,192,195,191,195,197,196,196,195,194,195,196,195,193,193,194,199,206,209,224,230,235,235,235,239,238,241,239,240,242,242,240,242,239,214,205,239,252,252,246,244,249,246,251,252,248,246,248,251,251,250,249,252,250,248,250,251,246,246,249,240,247,249,250,251,248,252,245,249,247,241,251,248,249,247,248,244,251,232,218,242,252,252,213,200,188,187,205,168,132,128,165,189,160,112,67,32,55,174,241,250,252,252,250,221,252,242,205,248,238,196,174,203,252,230,210,179,201,179,112,132,123,93,56,31,102,219,199,183,163,91,77,40,27,35,22,27,41,63,59,54,181,200,88,83,89,49,40,78,75,34,58,116,125,92,81,52,25,16,29,53,107,134,122,118,118,118,119,123,125,130,94,39,33,35,35,45,61,61,31,48,111,177,202,178,100,40,15,10,104,212,128,21,44,62,92,72,64,71,44,49,43,50,49,41,31,40,58,69,68,59,61,39,27,25,29,31,26,29,33,28,27,35,31,25,26,25,27,27,27,25,24,19,19,21,20,18,21,23,14,20,18,18,21,18,19,17,20,19,18,18,15,15,20,18,18,20,29,37,30,81,129,98,55,31,20,17,14,19,20,15,23,19,16,20,21,19,17,22,21,21,18,21,23,16,20,19,23,20,20,19,18,22,22,20,20,23,22,21,20,21,39,33,28,47,50,48,44,46,30,25,23,15,20,21,21,24,30,67,132,117,67,75,101,79,56,56,41,27,26,22,34,27,89,160,150,102,67,71,44,24,31,23,59,82,94,67,58,113,118,68,117,138,63,21,12,17,17,29,43,51,51,30,29,30,24,30,23,21,23,20,21,23,30,27,26,27,23,21,22,30,25,27,24,24,31,20,23,27,23,19,19,19,17,21,17,18,21,24,17,17,18,23,24,18,18,17,34,78,115,104,77,71,61,33,16,22,24,24,33,27,17,18,22,21,18,24,23,21,21,20,20,20,24,33,41,33,35,29,22,23,21,23,23,22,28,24,22,23,27,27,25,27,25,24,22,27,23,32,25,65,104,53,27,34,48,57,76,77,68,69,99,217,246,246,243,216,225,217,214,214,212,210,213,214,214,217,217,214,218,116,5,1,8,12,12,13,16,13,14,15,15,15,200,204,201,203,205,201,202,202,200,200,200,200,199,199,198,201,200,200,202,202,202,201,204,202,206,203,205,193,244,245,180,199,200,202,204,199,207,203,204,203,199,201,200,200,203,203,203,201,203,203,201,204,204,202,200,203,203,203,201,202,203,203,203,203,201,201,200,200,204,202,204,203,202,203,203,203,205,202,202,201,200,202,199,201,201,199,199,198,199,199,200,199,200,199,200,198,198,200,198,199,198,198,198,198,199,198,195,198,195,196,199,196,199,198,198,195,196,199,197,199,197,195,198,198,195,196,194,197,194,195,198,195,196,195,196,197,197,196,196,197,195,195,194,193,195,194,198,195,193,196,196,196,196,193,194,195,198,195,195,196,192,197,194,193,195,193,192,192,193,192,197,195,193,195,191,194,196,194,192,191,194,193,193,194,193,192,194,194,194,192,192,194,194,193,192,193,193,194,194,192,193,192,193,194,193,191,190,191,192,193,192,193,194,192,193,194,193,193,192,191,193,193,193,194,194,196,195,194,195,194,196,196,195,194,196,199,200,205,204,206,206,205,207,208,208,208,207,208,209,211,213,203,184,189,214,224,217,210,211,212,212,212,212,212,212,213,213,214,214,212,213,212,213,214,212,214,211,213,214,213,211,212,214,211,212,211,213,213,211,212,214,214,212,215,217,204,199,210,228,213,178,177,175,190,213,167,124,148,182,185,141,82,39,39,122,212,234,229,221,225,222,197,221,197,186,227,173,160,181,221,249,174,151,177,226,192,122,133,96,60,72,52,135,244,208,212,165,83,55,10,63,98,55,39,60,66,63,154,249,219,107,115,105,41,17,27,49,67,109,97,78,73,51,27,17,29,49,103,131,122,118,122,120,116,125,128,127,117,111,73,40,29,34,37,39,34,31,97,162,178,161,92,33,38,56,18,66,141,82,52,76,65,77,76,66,66,40,39,36,39,54,33,30,44,70,84,73,60,58,56,29,32,53,49,33,25,30,25,30,34,27,27,21,29,29,25,25,27,24,21,22,23,25,21,21,19,18,22,18,22,19,20,20,24,23,17,18,23,19,16,21,20,21,19,26,35,27,53,83,66,39,18,14,17,15,18,19,22,22,22,21,17,21,21,22,21,21,21,21,23,19,20,24,17,23,24,22,20,19,22,22,19,22,25,22,22,23,24,41,45,39,59,75,73,81,84,57,31,21,18,17,20,23,23,23,56,146,123,87,109,127,110,63,51,31,22,31,43,67,55,71,83,59,71,79,72,42,30,44,33,39,74,85,66,96,157,98,54,126,149,76,20,12,14,23,28,37,51,55,38,41,44,29,27,18,25,23,24,25,21,27,26,27,27,23,24,29,29,24,25,25,26,27,23,26,27,23,23,22,19,24,20,18,21,25,24,17,22,21,21,19,17,24,28,59,88,77,76,80,80,50,27,21,15,17,18,25,21,22,23,19,20,22,19,24,23,19,22,21,24,23,24,27,26,21,21,25,25,27,23,24,24,23,27,27,24,24,27,27,26,27,26,33,25,27,26,41,150,125,46,39,38,55,66,85,78,71,76,78,200,245,237,230,213,218,216,212,211,211,214,213,214,214,216,217,215,216,115,6,2,8,12,13,12,15,14,15,15,15,16,205,207,205,202,203,201,199,202,203,202,201,202,201,200,203,202,201,202,201,201,202,203,203,203,206,205,206,193,234,219,167,195,195,199,205,200,203,202,205,206,203,201,203,200,204,203,200,203,203,201,203,203,202,203,203,200,201,201,200,200,202,202,203,201,201,201,201,203,201,202,203,203,203,202,202,201,202,202,202,201,198,200,202,199,200,200,200,202,200,198,198,198,200,197,199,199,199,200,198,198,198,201,199,196,198,198,198,198,199,199,198,196,195,197,197,197,198,196,197,193,194,193,193,195,196,195,191,195,195,195,196,195,196,195,195,195,196,194,193,194,196,195,196,195,196,194,193,193,195,197,196,196,192,193,194,193,194,194,196,193,194,194,193,193,191,191,192,192,191,194,193,192,195,193,193,193,195,195,191,193,193,194,193,195,192,192,192,193,192,192,193,191,193,194,192,193,195,194,192,193,193,190,194,193,194,194,192,193,194,194,191,194,194,195,193,193,194,193,194,194,193,193,194,192,194,196,196,195,194,195,195,195,194,194,194,195,196,195,197,195,198,198,198,200,198,199,196,199,199,199,204,199,188,166,178,207,208,204,199,198,200,200,198,199,200,199,200,199,197,199,200,198,199,199,200,198,199,199,200,200,198,198,198,196,197,198,198,198,199,198,197,199,195,198,200,186,182,187,204,170,128,143,158,191,207,152,124,173,207,180,130,63,30,102,198,231,217,204,196,203,197,184,194,147,162,187,147,159,179,225,177,72,108,193,230,186,141,109,89,81,74,66,127,196,156,126,100,65,30,2,72,130,82,79,71,63,167,237,246,182,74,84,39,16,19,28,84,105,90,57,49,49,27,19,30,57,102,125,128,113,117,121,110,118,129,119,108,119,124,98,63,57,53,38,31,38,92,156,179,162,103,100,110,127,130,64,44,69,71,86,79,62,77,65,57,50,42,37,33,39,39,31,30,49,64,74,69,54,57,73,87,106,105,73,36,30,28,27,34,33,29,24,27,26,23,29,27,29,30,23,25,27,27,31,25,19,21,21,24,27,22,20,24,24,22,22,20,19,18,17,19,19,18,18,24,39,44,66,77,57,37,23,14,15,15,20,19,21,28,19,22,23,18,19,20,22,21,21,23,21,19,25,25,21,22,22,22,19,20,23,23,20,27,24,25,28,27,30,48,53,61,120,135,141,155,134,92,53,25,13,17,20,19,29,19,49,128,136,145,139,152,143,95,53,22,23,34,81,115,98,82,61,34,25,52,63,71,83,60,35,32,64,98,75,117,159,101,94,148,165,75,18,15,9,21,31,39,57,71,66,81,79,51,29,24,23,22,22,22,23,26,27,24,30,22,26,29,27,30,27,24,25,25,25,28,29,26,29,24,22,27,24,21,27,23,21,23,21,19,19,21,23,24,41,72,53,67,131,137,95,39,24,21,17,23,16,18,22,19,23,17,23,23,19,21,18,27,20,17,27,19,22,18,21,29,21,23,24,26,24,26,26,27,24,26,26,25,25,29,30,30,28,29,29,29,17,116,200,96,69,88,66,73,74,93,71,76,76,43,161,219,214,226,211,215,211,212,216,213,213,216,214,214,216,216,216,216,115,6,2,7,11,14,12,15,14,14,15,15,15,201,203,202,202,201,203,202,204,203,200,204,203,205,204,203,204,200,201,201,203,205,201,201,202,206,204,203,195,235,233,184,194,193,203,201,201,203,201,203,202,203,204,205,201,201,203,204,201,201,201,200,203,202,202,201,201,202,201,200,200,201,200,199,201,200,200,202,203,201,201,205,202,204,203,201,202,200,200,201,200,201,200,201,199,200,200,199,200,199,200,199,199,200,199,199,198,197,199,198,199,199,199,200,197,198,197,197,199,196,196,198,199,197,196,197,197,198,196,195,196,195,196,195,196,195,196,194,197,196,194,195,192,194,194,193,193,193,195,196,194,196,196,196,200,195,197,198,196,199,197,196,195,195,194,196,192,193,194,192,195,193,193,194,193,196,193,193,194,194,193,195,193,191,193,193,193,192,192,196,196,195,193,193,194,192,192,195,193,193,193,192,192,191,193,191,193,194,193,193,192,192,192,192,193,192,195,196,193,195,196,193,193,194,194,193,194,194,194,193,193,195,192,194,194,193,193,190,193,196,193,193,194,194,193,195,198,191,194,194,195,197,196,199,198,196,197,198,198,199,196,200,202,198,174,163,189,203,207,202,194,197,198,198,198,195,195,198,198,195,196,198,196,198,196,196,195,196,196,199,198,193,194,197,195,194,196,191,195,196,194,196,193,194,196,196,186,184,182,186,160,115,111,128,178,182,130,146,195,215,184,119,55,90,191,230,225,206,197,193,199,194,182,176,138,176,187,162,176,194,199,103,44,133,186,178,177,139,112,106,81,81,77,127,152,78,62,49,42,41,3,34,72,76,81,57,149,240,246,206,91,66,37,5,16,11,45,108,86,55,43,39,31,17,31,56,95,127,120,118,118,107,100,105,128,124,103,108,111,128,118,113,100,53,24,42,108,166,199,161,116,128,150,160,154,131,66,50,57,53,74,72,70,73,55,43,45,42,45,46,55,56,37,43,53,67,64,53,67,102,122,130,145,144,107,59,32,26,29,30,35,28,27,24,28,25,28,34,32,28,26,24,28,31,33,34,27,23,22,29,27,24,21,24,23,24,25,15,19,21,18,21,20,21,19,47,80,95,117,95,62,63,60,34,21,21,19,19,26,22,19,24,23,21,19,17,19,21,26,21,19,23,21,19,22,27,23,24,19,18,26,24,21,30,31,27,28,22,32,53,79,124,169,199,162,145,141,105,72,30,12,16,19,15,20,25,28,89,113,124,145,133,152,110,42,21,27,16,96,166,145,122,87,36,53,97,105,139,105,39,23,24,71,95,94,124,148,145,141,174,149,49,15,18,12,15,34,54,107,123,139,169,137,85,46,22,28,25,24,24,16,22,27,24,27,24,27,25,31,33,26,27,27,30,27,29,26,24,25,26,21,27,26,26,30,24,23,24,26,18,23,21,19,24,48,60,38,66,130,125,67,35,25,17,16,18,19,22,21,20,20,19,26,22,19,22,22,24,23,20,22,21,21,23,27,22,24,26,20,27,27,24,25,24,29,28,24,24,27,30,29,27,29,32,33,19,83,223,168,89,160,129,83,57,49,52,47,71,63,27,143,205,206,227,211,214,208,210,213,210,215,212,215,217,216,217,214,219,116,5,1,8,12,12,12,14,12,14,15,14,14,200,200,201,201,203,202,199,204,201,202,203,203,204,200,203,202,203,203,200,200,201,203,200,203,205,204,207,203,252,252,208,201,200,204,200,199,203,200,202,201,202,202,203,201,201,202,201,203,200,200,200,201,201,200,203,202,201,202,202,201,200,199,200,201,200,200,199,200,198,199,201,199,200,200,201,201,199,199,200,202,200,201,200,199,199,200,201,198,199,200,201,200,196,199,199,199,200,198,198,194,196,198,199,198,196,199,196,196,196,196,196,197,198,196,197,196,196,198,198,195,198,194,197,196,195,198,195,197,193,192,193,195,195,194,195,196,198,197,197,198,198,197,196,196,200,198,196,198,196,196,194,195,197,195,198,194,195,196,194,194,196,196,197,197,196,195,198,198,194,198,193,196,197,193,196,193,194,193,193,196,191,193,195,194,194,195,195,193,192,191,193,194,192,195,191,192,191,192,194,192,195,190,192,193,191,196,191,191,194,194,193,193,194,194,191,191,193,191,192,192,193,193,193,194,196,193,194,193,193,191,194,195,192,196,196,194,194,194,196,197,196,198,198,196,198,198,196,196,196,196,199,198,203,194,177,179,194,204,205,202,195,196,196,198,196,195,197,196,196,195,195,194,193,196,195,193,196,195,196,201,197,196,196,192,196,194,193,193,193,194,193,195,191,191,196,185,193,183,170,159,135,119,132,167,132,100,155,208,217,201,134,126,189,224,227,207,193,198,193,197,192,174,166,157,192,177,151,159,184,187,93,54,97,100,124,177,164,123,109,71,79,114,127,92,37,27,35,78,49,23,38,50,54,66,160,227,244,195,96,92,97,65,14,12,33,92,95,49,50,37,22,22,26,53,79,106,117,118,125,102,96,96,105,115,107,108,108,113,122,117,111,79,36,20,79,169,188,156,81,81,118,130,120,78,56,51,44,44,45,57,72,61,56,41,41,45,46,46,61,81,63,30,41,56,75,112,103,111,122,123,122,122,129,103,70,35,22,31,26,31,34,26,29,25,27,33,29,36,25,25,30,25,37,29,33,29,19,31,34,30,29,25,29,27,24,25,19,19,18,23,23,17,29,18,110,193,171,171,146,134,145,124,83,30,10,17,18,22,24,23,22,22,21,21,21,19,18,29,21,20,27,20,22,22,24,26,26,22,21,30,29,27,29,23,29,29,31,33,66,71,112,174,137,134,131,114,89,55,40,18,14,18,18,19,19,27,73,85,94,84,110,148,74,34,26,21,31,129,148,93,113,105,53,107,148,145,146,77,26,26,15,61,97,107,122,130,128,136,139,93,36,21,17,17,24,50,117,176,212,172,173,167,116,72,33,31,21,17,24,18,21,23,29,22,21,25,27,34,30,33,25,27,34,28,26,27,27,26,21,24,28,28,28,29,22,24,30,24,28,18,25,25,22,45,56,54,76,88,65,47,27,17,21,15,19,21,21,19,19,27,19,23,20,20,25,20,21,24,26,21,20,25,26,21,27,26,24,24,31,24,21,27,27,28,30,27,25,29,28,31,29,27,38,18,84,236,239,119,97,177,149,88,68,40,41,38,66,50,25,150,212,211,227,211,211,210,214,214,211,215,215,213,215,218,217,216,218,115,5,2,8,11,12,12,15,13,14,15,15,15,200,200,202,200,201,200,201,202,200,201,203,202,202,201,201,202,199,200,201,200,203,200,200,203,203,203,203,206,252,252,230,209,203,202,201,201,202,200,200,201,200,200,200,198,201,199,199,200,199,199,200,200,200,200,200,200,201,201,200,200,202,200,199,202,199,198,200,200,200,198,198,197,199,199,200,200,199,197,198,200,199,199,199,198,200,199,198,198,197,199,199,197,200,196,198,200,196,198,196,196,199,198,196,196,198,198,198,196,195,196,196,197,195,196,195,195,198,195,196,196,196,195,195,197,195,196,194,194,195,194,196,194,195,197,196,198,199,196,198,197,199,197,199,202,196,197,196,196,196,194,198,195,194,197,197,195,195,198,196,196,199,196,197,195,196,198,196,197,195,194,195,195,195,197,195,195,198,195,193,193,193,191,194,194,196,193,193,194,193,195,193,193,193,195,193,193,196,190,193,194,193,194,192,194,192,193,193,193,194,191,192,193,193,195,192,191,192,192,192,191,193,191,193,193,193,194,192,196,194,193,193,193,196,195,195,195,193,199,197,196,199,197,196,197,197,195,196,195,196,198,194,198,201,199,197,184,185,199,204,205,203,198,199,202,199,196,194,196,198,195,196,195,195,194,196,194,193,192,195,199,195,194,195,193,192,196,193,193,195,194,195,197,195,192,195,186,201,184,160,164,151,164,184,180,116,105,181,210,215,194,179,197,219,226,206,197,192,194,196,197,198,170,162,182,180,114,105,162,213,184,72,43,35,63,160,207,170,119,99,69,78,100,95,48,4,67,114,93,68,48,78,62,61,174,228,245,191,96,88,107,132,84,37,66,121,132,84,49,40,23,13,37,47,79,114,108,110,113,119,106,96,101,101,108,105,109,115,110,108,92,71,52,24,28,131,188,134,76,8,42,104,103,87,46,80,76,41,42,46,57,61,51,47,43,38,47,42,33,45,64,46,34,57,103,155,155,133,117,110,113,107,110,94,59,36,23,22,34,27,30,35,31,31,27,29,29,31,28,29,28,28,32,34,36,33,29,23,32,44,36,36,35,33,33,32,29,19,21,22,23,22,16,25,24,95,191,191,205,180,180,189,191,122,25,8,12,15,20,22,22,19,21,25,22,26,22,21,24,20,29,24,26,26,24,25,29,25,20,24,24,29,26,24,27,27,35,26,32,70,75,89,103,139,118,110,110,71,64,43,21,16,12,19,20,25,24,74,107,104,109,119,104,55,45,29,22,33,101,98,47,101,129,60,95,146,83,53,40,22,29,16,61,83,114,114,107,127,92,121,109,35,19,19,14,32,50,118,206,160,150,146,122,108,61,40,43,23,15,23,21,27,22,25,28,24,26,34,39,28,30,27,27,31,29,34,27,29,27,25,28,24,33,29,25,29,28,27,26,29,29,23,21,25,57,72,93,118,77,57,47,33,24,19,18,21,24,20,19,22,23,23,24,21,24,24,22,23,21,25,18,27,25,22,28,23,26,25,28,31,24,25,29,25,28,29,31,29,29,28,30,31,32,28,58,212,242,182,67,72,134,104,74,76,49,39,45,62,25,38,190,226,219,224,209,216,213,217,216,213,213,213,215,213,214,218,214,218,116,5,1,7,12,13,12,15,13,14,15,15,15,202,202,201,200,201,199,199,204,203,200,202,200,200,199,202,201,199,202,201,202,200,201,202,200,201,201,201,200,253,253,243,213,198,202,200,201,201,198,203,201,200,200,200,201,201,200,200,202,200,200,198,199,200,201,200,199,201,200,201,199,198,203,200,200,200,200,200,200,200,200,200,200,200,200,200,200,199,199,201,198,197,198,199,200,197,198,199,199,199,197,197,197,198,200,196,196,198,199,199,197,198,198,197,196,196,201,199,198,198,198,198,196,198,195,196,199,196,198,196,196,198,196,200,196,196,199,197,198,198,199,198,199,198,196,196,195,194,194,196,195,196,196,197,198,195,195,196,196,196,196,194,197,198,194,197,196,196,198,195,196,196,197,198,196,196,194,193,195,193,196,193,193,194,193,197,195,198,195,193,196,194,196,193,193,195,193,195,194,193,191,192,196,193,193,191,194,194,192,195,191,193,193,191,193,193,194,191,193,191,192,193,189,192,191,193,193,193,193,194,193,193,191,192,193,193,192,194,196,195,195,195,195,196,197,197,196,196,195,195,198,195,198,196,198,201,197,198,197,198,197,197,196,200,200,201,192,181,191,200,208,205,204,201,207,206,198,199,196,197,195,196,198,196,199,196,194,197,193,193,197,196,193,194,193,194,195,193,192,195,195,192,195,194,192,196,187,200,177,148,159,166,181,204,191,129,151,205,208,202,184,188,211,217,207,199,197,192,193,192,202,198,155,172,200,125,60,131,215,230,139,47,23,9,112,210,213,141,67,70,54,73,74,59,21,34,203,192,87,57,74,66,84,196,244,243,157,90,84,103,116,74,116,132,144,134,102,89,39,20,20,40,65,75,117,118,107,113,108,108,104,113,99,103,112,110,111,104,100,90,81,50,36,18,60,152,139,69,14,15,83,141,129,72,76,114,78,42,38,47,51,42,46,48,39,41,40,33,34,29,44,66,83,138,150,164,157,114,103,99,102,82,53,43,25,19,17,17,27,32,29,30,31,30,28,29,31,27,31,31,26,29,27,35,38,33,32,29,31,39,44,31,36,41,32,41,30,24,29,24,26,24,23,23,24,60,111,129,148,135,139,144,102,53,22,11,15,23,17,26,21,28,27,24,27,24,27,19,28,29,26,27,30,31,27,26,27,20,18,27,29,29,26,27,30,31,25,33,29,71,58,80,114,95,118,92,81,62,63,49,17,19,16,21,22,25,25,53,110,133,116,96,108,81,46,26,20,31,73,95,97,155,144,117,152,135,78,61,42,25,25,29,70,88,112,93,126,151,150,188,101,17,20,9,20,24,46,101,133,171,115,108,118,84,67,42,41,22,14,23,20,27,18,24,26,26,31,32,34,24,31,35,24,28,32,28,30,34,29,30,29,30,29,22,32,26,26,30,27,30,29,20,33,29,85,150,150,160,117,86,87,71,56,32,19,18,17,20,26,24,24,19,26,22,19,30,26,23,23,24,24,24,27,26,23,27,22,25,30,22,29,27,28,24,29,29,24,31,29,29,33,32,44,41,104,231,211,103,66,64,72,59,61,66,45,41,44,39,12,123,233,232,221,217,213,214,213,214,214,211,214,211,213,216,215,218,214,218,115,5,1,7,12,13,12,14,13,14,15,16,15,198,199,201,200,201,199,200,198,198,201,198,198,199,199,201,199,201,197,200,201,200,203,200,202,200,203,200,198,241,250,206,197,197,195,198,200,199,199,199,198,200,200,199,199,199,199,199,200,197,198,201,200,199,200,200,200,199,200,201,200,201,197,198,201,200,201,200,200,199,199,200,200,199,200,198,197,200,199,199,198,197,199,199,198,198,199,198,198,198,200,196,199,200,196,198,199,198,196,198,198,199,198,195,196,196,196,197,195,198,197,196,199,198,200,198,195,198,196,199,196,197,198,200,198,194,198,200,198,197,198,198,197,198,199,194,198,198,197,198,195,199,196,196,198,195,195,195,196,196,194,197,193,194,197,194,195,194,196,195,194,197,195,197,196,194,194,194,192,192,194,194,195,194,193,192,193,192,195,194,194,194,191,195,191,194,195,195,194,192,195,192,193,193,194,192,192,193,190,194,193,193,191,190,193,189,191,191,191,191,193,193,189,190,193,192,190,192,192,195,193,191,192,192,195,194,193,192,196,195,193,196,198,196,195,196,196,196,198,197,195,197,196,199,200,200,199,199,198,197,200,199,197,198,199,200,199,188,181,193,205,206,199,188,197,204,200,200,199,198,198,197,195,197,198,198,197,196,197,196,195,196,194,196,197,194,195,193,193,193,192,194,196,196,192,193,189,191,153,117,139,169,191,207,171,126,172,216,198,189,189,197,203,200,199,194,196,193,191,197,200,190,151,184,184,101,65,181,236,157,81,48,9,37,174,239,179,107,46,40,45,56,50,25,19,89,193,136,68,73,41,97,204,245,245,141,80,82,105,107,44,84,144,143,122,98,83,46,25,25,35,79,125,117,117,113,114,114,95,102,111,112,101,105,117,110,106,94,81,101,83,41,26,16,85,119,64,8,37,97,119,110,65,76,73,87,74,44,44,36,43,40,39,45,38,35,34,29,32,33,43,108,154,159,162,139,118,116,103,71,50,36,35,42,45,32,15,16,22,28,38,33,29,29,28,33,24,30,29,31,29,28,34,32,37,33,29,29,33,42,39,41,35,38,39,35,34,29,25,25,24,30,29,24,33,49,59,57,51,36,37,35,31,27,17,17,15,19,24,17,25,30,25,31,25,23,31,29,31,29,27,27,27,28,25,33,30,25,22,30,26,30,31,22,29,29,33,27,36,58,69,77,66,105,64,62,65,48,65,41,26,19,14,19,24,29,30,29,58,83,69,102,147,92,53,36,27,24,39,65,107,120,96,90,98,82,94,102,50,22,25,44,97,102,139,94,90,144,151,141,42,14,17,16,19,36,39,79,151,106,112,101,84,87,49,50,45,22,22,20,23,24,28,29,22,28,27,33,31,29,34,25,31,35,31,27,31,33,29,27,24,32,29,26,28,27,29,30,29,28,27,22,33,31,96,166,183,189,173,144,168,167,135,50,15,15,20,21,26,27,21,26,24,24,22,29,22,27,24,21,27,22,28,22,25,28,19,26,31,24,27,29,28,29,24,22,32,24,28,34,32,41,47,74,141,177,134,96,98,75,49,51,46,54,49,41,40,29,114,235,242,228,215,217,214,214,211,212,211,214,212,214,214,216,216,219,213,215,117,4,2,8,12,12,12,15,13,15,16,15,15,196,199,196,199,199,198,198,198,199,195,199,199,198,200,199,198,199,198,196,197,198,198,198,198,200,198,200,193,249,240,173,187,192,194,197,198,200,193,199,199,200,200,198,199,198,198,196,199,198,200,198,198,200,200,200,198,198,197,198,198,198,201,199,199,199,197,199,197,198,200,200,200,199,197,197,195,199,199,196,198,197,197,198,199,198,198,198,198,197,198,198,196,199,196,194,197,198,198,198,197,198,198,197,198,196,196,196,196,197,195,198,196,199,199,197,200,195,196,198,198,197,193,199,197,197,199,197,198,198,200,198,199,198,198,199,200,199,199,200,198,199,198,196,196,195,198,198,196,194,196,197,195,196,194,192,194,195,194,193,195,194,196,193,192,193,191,193,193,191,193,193,193,193,190,194,190,191,192,191,194,193,191,193,193,193,189,191,191,192,192,191,193,191,193,191,191,192,189,193,190,191,192,188,190,191,192,193,193,191,189,194,190,194,191,191,191,189,193,191,193,195,193,194,196,195,196,199,198,199,195,193,196,197,195,196,199,196,196,197,194,197,198,196,198,197,198,199,199,199,200,200,195,197,198,199,198,197,184,181,196,201,196,162,170,197,199,201,197,199,199,197,200,195,198,198,194,198,196,195,196,196,196,195,194,194,194,196,193,195,195,193,194,192,192,197,191,181,146,110,113,150,186,191,149,116,179,203,189,197,193,201,198,196,197,191,194,191,195,194,203,174,144,194,185,87,64,128,126,60,47,90,55,98,237,235,166,121,55,52,34,34,40,12,39,92,101,59,53,45,113,214,249,246,146,82,86,107,93,57,79,108,122,84,90,79,33,29,15,38,78,130,143,117,120,113,109,104,105,111,106,105,103,116,114,103,94,83,92,113,103,56,24,30,73,61,21,43,157,170,84,97,100,94,81,84,99,53,25,31,34,43,45,35,33,35,30,26,30,33,31,103,155,152,128,126,112,78,53,27,31,35,39,53,54,47,34,21,19,21,32,39,32,32,30,26,32,32,27,32,29,30,35,30,33,33,30,31,36,42,45,37,36,44,42,35,35,25,27,27,25,28,27,26,31,45,49,50,40,26,31,25,23,29,19,16,18,20,22,21,22,33,34,27,32,25,26,31,29,29,27,32,27,27,27,27,32,24,28,30,26,32,27,24,32,33,29,30,33,58,59,74,83,45,79,55,35,47,54,57,23,19,21,18,24,29,25,31,71,89,115,149,163,130,66,39,27,22,27,27,33,66,59,55,56,47,51,44,21,24,22,73,120,118,171,110,65,56,56,57,26,16,21,15,27,22,34,79,66,100,58,61,63,35,59,48,40,22,25,23,17,30,26,29,26,26,30,29,33,32,30,26,31,32,33,33,30,32,25,25,24,31,34,23,30,29,28,30,29,29,29,27,29,29,69,123,144,167,162,176,174,168,129,42,21,12,16,18,21,26,23,26,23,28,33,29,24,28,24,24,27,24,25,28,24,24,26,23,30,27,27,25,29,29,29,29,27,29,30,36,36,39,48,117,170,161,130,128,140,111,93,81,63,55,42,37,27,125,229,251,249,219,220,219,214,212,211,212,213,213,216,214,214,217,216,218,215,220,115,4,2,8,11,12,12,14,12,15,16,15,15,195,197,198,198,196,195,199,199,198,199,198,199,201,197,198,197,199,198,199,200,196,198,195,195,198,200,202,195,250,237,171,201,199,198,199,197,199,198,200,201,201,200,199,200,199,200,200,200,199,199,200,200,199,199,199,199,201,198,199,200,201,198,198,200,196,198,199,200,199,197,199,198,200,199,200,200,198,198,198,198,198,196,199,197,194,199,197,198,196,198,198,199,199,198,197,197,198,199,199,198,199,199,199,198,199,198,200,197,198,200,198,199,196,198,197,195,200,196,198,198,198,197,196,198,196,198,199,197,198,198,198,197,198,200,198,201,198,198,199,198,198,198,199,199,196,197,197,196,195,193,195,194,196,195,195,196,193,193,193,194,196,192,193,191,191,193,192,190,191,194,194,192,193,192,191,193,192,192,186,190,191,191,191,188,193,190,190,189,190,192,189,192,193,193,191,193,191,190,193,190,191,191,190,193,190,190,193,190,191,193,195,191,191,194,193,194,192,192,193,194,192,197,195,192,198,196,198,199,196,198,198,196,196,194,195,196,197,198,196,195,193,195,194,192,197,196,197,196,198,198,197,198,199,198,199,199,200,195,179,184,199,200,174,171,196,200,201,198,194,198,199,198,198,195,196,199,196,196,197,195,196,195,196,192,195,197,196,196,196,193,191,191,193,196,201,191,169,158,143,120,159,169,154,126,122,183,190,195,203,201,200,193,196,197,192,196,193,192,201,198,165,160,214,164,70,36,16,57,63,126,179,95,159,231,222,180,117,55,39,9,55,149,64,98,196,116,34,37,141,227,243,242,139,84,87,114,84,56,97,113,146,108,61,60,39,24,19,39,79,124,139,123,120,112,103,100,104,115,108,104,104,106,116,104,89,83,88,104,118,124,103,73,54,48,16,63,117,201,160,69,142,147,134,110,120,108,48,30,25,31,35,40,39,31,27,33,30,29,24,25,56,84,94,93,69,43,36,31,28,28,36,34,33,37,50,56,33,17,23,24,35,35,32,32,27,30,23,29,33,28,31,35,34,33,36,33,29,36,47,45,36,41,43,45,41,35,30,25,29,28,24,31,26,27,38,50,49,39,36,35,29,25,24,20,17,16,24,25,22,27,33,30,35,37,33,33,29,32,35,33,31,34,29,27,35,29,32,33,27,35,34,28,30,31,27,31,31,37,57,87,108,69,112,100,53,64,52,74,69,49,26,14,24,24,27,28,73,156,166,152,159,160,118,66,45,36,21,30,27,45,107,89,77,107,84,36,19,22,29,29,110,107,125,182,150,143,84,60,58,16,19,16,15,22,29,48,66,88,45,67,66,33,52,45,49,45,22,21,21,22,27,26,29,30,28,25,32,29,27,30,25,32,29,30,32,31,27,31,33,26,35,29,29,29,25,33,28,26,33,29,26,31,37,50,66,66,75,93,100,97,60,48,34,17,16,18,22,27,22,31,27,29,34,37,36,31,27,25,28,27,32,34,27,25,29,26,27,27,28,32,28,29,28,28,31,28,31,34,33,33,36,48,93,131,144,149,150,154,147,146,139,108,69,44,24,18,159,233,249,238,224,227,224,219,219,217,215,215,216,214,218,216,215,216,218,216,215,115,5,2,7,11,13,12,14,13,14,14,15,15,196,200,198,198,198,196,197,199,200,198,199,199,198,198,199,195,198,198,196,199,198,198,198,199,199,197,202,194,249,238,175,214,203,198,200,197,201,196,201,199,199,201,199,200,199,199,200,199,198,198,198,200,199,199,198,199,201,200,201,200,198,199,200,198,198,198,197,198,200,199,200,198,198,198,200,199,201,198,198,200,198,198,197,198,198,197,199,198,197,198,197,197,198,197,199,200,197,198,196,196,199,198,198,198,198,200,199,198,198,197,199,196,199,197,196,198,198,198,196,194,195,195,198,197,197,198,198,198,197,196,195,197,197,198,197,195,198,198,198,196,196,197,198,198,196,197,196,195,196,194,195,195,194,196,198,193,192,195,195,194,192,193,193,193,192,192,196,191,191,194,192,194,193,191,195,192,192,192,192,191,192,191,191,192,191,191,193,193,191,192,191,194,194,192,192,191,194,192,193,191,192,191,193,193,192,192,189,191,191,191,193,191,194,192,191,194,193,196,195,194,195,194,196,197,194,194,196,195,195,198,196,197,194,193,194,194,193,193,195,193,194,196,193,193,195,197,195,198,197,195,196,196,198,198,196,199,199,198,192,177,186,203,200,200,200,200,203,201,201,197,195,198,197,198,196,196,195,194,196,194,195,194,193,195,196,194,193,193,192,192,195,193,192,197,203,189,159,151,160,156,176,156,128,134,148,195,200,204,220,207,199,194,195,196,194,195,194,196,204,192,171,207,208,122,58,33,18,116,188,207,215,148,219,223,174,168,96,36,21,9,169,225,81,139,179,85,64,157,235,245,214,112,89,85,109,71,53,99,132,141,109,94,57,30,24,21,38,84,132,134,125,116,116,110,95,110,112,109,104,104,110,98,106,92,75,92,102,121,126,124,112,74,39,13,73,155,100,97,71,60,117,113,112,113,110,76,48,31,19,34,36,33,31,34,29,24,31,29,28,31,28,39,43,37,34,22,29,25,17,27,33,32,31,43,54,59,55,38,27,20,31,43,37,31,29,29,29,33,30,32,32,27,30,33,33,35,31,37,46,42,37,38,38,43,46,34,29,27,25,28,31,26,29,27,40,55,59,58,58,61,44,31,18,16,18,17,21,25,24,27,31,31,36,33,31,29,32,35,33,33,33,31,35,30,34,37,31,34,35,35,28,29,31,27,31,31,36,31,78,154,163,148,161,168,146,165,165,166,171,103,31,10,15,21,24,39,154,199,128,99,63,52,51,47,60,52,36,34,24,101,171,158,181,182,111,36,14,29,29,88,117,81,81,97,126,152,149,134,72,26,24,12,24,24,36,71,108,76,115,104,66,71,49,77,76,69,34,18,20,19,33,26,28,31,26,33,31,31,33,30,32,32,27,31,30,32,29,26,33,32,33,27,25,31,30,27,26,29,29,28,29,27,41,53,54,57,48,34,31,29,26,31,24,17,18,17,24,24,25,27,29,32,35,42,35,35,33,26,29,28,33,36,35,29,24,32,29,27,32,27,34,34,32,33,31,34,32,40,33,85,124,66,66,67,84,110,122,141,141,160,165,144,78,45,24,37,203,234,249,246,235,244,238,235,235,229,228,224,219,218,213,214,215,214,218,213,214,114,5,2,7,11,12,12,13,12,14,14,15,15,194,201,200,200,199,199,198,200,198,198,199,198,200,197,197,199,199,196,198,200,196,200,198,199,202,198,200,196,247,232,153,202,202,197,201,197,203,199,199,198,196,197,196,200,198,198,199,199,199,196,199,198,199,200,198,200,200,200,201,199,200,200,198,202,201,200,200,199,200,200,201,198,198,198,200,202,197,200,200,198,198,196,201,199,197,202,196,198,198,198,199,197,198,198,198,199,198,198,197,198,198,196,198,198,199,197,198,198,196,199,198,199,198,200,199,196,199,195,198,196,196,198,198,198,198,199,198,195,197,198,199,196,198,197,196,198,194,195,194,196,196,194,194,193,194,195,196,196,196,195,194,193,196,194,194,194,192,194,192,193,195,191,193,191,192,192,192,194,193,193,191,192,193,190,193,194,192,192,192,194,194,190,191,189,192,193,194,193,191,193,195,194,193,192,193,193,193,191,192,192,193,194,191,191,190,191,193,190,191,191,194,190,189,191,192,195,193,193,193,194,193,194,193,192,196,194,193,193,193,195,195,197,195,193,195,194,192,192,195,194,194,194,194,195,192,193,195,194,195,192,193,194,193,195,193,192,197,198,198,186,177,191,199,204,204,196,198,202,202,200,198,194,194,195,193,193,193,194,194,195,194,194,196,191,193,194,192,193,191,192,193,193,193,201,201,184,148,118,149,168,175,151,106,124,170,221,195,193,222,224,198,194,193,194,194,194,194,198,210,188,158,217,169,73,91,73,34,126,128,140,148,164,243,200,113,107,50,19,5,27,142,129,54,95,71,63,175,245,244,190,90,86,89,99,59,63,109,137,141,90,97,75,31,21,23,43,84,131,134,120,116,119,119,96,105,118,105,103,105,111,106,97,95,79,84,106,121,120,110,103,57,27,13,29,148,196,63,21,51,70,79,95,114,113,87,40,35,67,45,29,26,33,31,36,38,25,24,28,27,26,26,29,19,27,31,23,24,18,26,24,27,35,41,52,57,63,60,60,50,32,39,57,66,59,57,57,59,57,53,54,57,54,57,61,56,58,56,57,60,66,66,57,63,68,61,55,52,46,71,41,42,44,44,42,54,89,97,88,112,100,66,49,24,14,21,21,22,20,21,24,31,34,31,36,31,31,29,26,36,29,31,37,30,28,27,29,27,25,30,28,30,29,27,27,35,30,36,29,67,164,186,141,149,147,159,171,161,181,127,103,24,15,12,15,30,40,137,149,73,45,46,54,56,72,76,64,60,51,29,136,153,99,115,128,117,34,26,31,74,107,96,100,59,41,42,64,78,118,116,46,21,13,30,24,66,151,166,170,180,193,171,170,178,171,190,136,40,15,14,15,27,27,29,33,36,33,33,33,37,34,39,44,34,34,33,28,34,33,36,31,38,31,27,28,27,29,28,34,31,32,34,29,36,44,53,57,41,27,26,28,24,29,20,20,23,24,25,24,28,29,29,36,41,40,45,38,29,32,30,28,29,42,38,25,29,29,32,30,35,29,34,39,33,33,35,35,41,36,64,197,215,137,105,64,57,78,78,85,115,146,151,120,67,96,127,160,237,222,220,239,235,238,236,237,233,235,233,232,227,219,220,212,215,217,216,214,216,116,4,1,7,11,12,11,15,12,14,14,14,15,197,200,198,196,199,199,198,196,196,198,198,199,198,198,198,197,200,197,196,196,196,199,199,199,200,198,200,196,246,214,105,170,188,191,203,195,200,196,198,201,197,198,198,200,198,198,200,199,200,199,199,200,198,201,200,200,199,199,201,200,201,200,199,200,198,200,200,198,198,198,200,199,200,200,198,198,198,197,197,197,199,199,198,200,198,197,199,198,197,198,198,197,198,198,196,195,197,197,198,198,198,198,198,199,197,196,200,199,198,199,199,198,200,198,198,198,196,196,199,197,199,198,198,199,195,196,196,195,196,198,198,197,195,195,196,197,196,196,197,195,194,195,196,195,192,193,195,194,196,194,194,194,193,194,192,192,193,193,193,191,191,195,194,195,195,191,196,193,191,192,193,192,192,191,194,194,191,193,193,192,193,191,193,191,191,193,193,191,192,194,193,193,191,193,194,191,193,192,193,192,191,193,192,192,190,190,189,191,196,194,192,192,193,191,189,193,191,191,191,191,193,191,191,192,191,191,194,193,194,191,193,193,192,194,193,195,191,192,195,195,193,192,193,194,193,193,193,193,193,192,193,191,194,192,194,195,191,194,196,195,181,177,189,198,199,199,194,195,200,201,200,197,197,195,193,191,192,191,195,193,194,196,191,193,196,192,194,200,194,202,205,198,198,207,200,174,138,98,134,156,163,141,85,108,188,235,189,119,146,202,200,195,196,191,194,192,196,203,202,157,121,132,77,89,166,117,37,37,55,34,74,158,237,151,24,7,14,29,7,54,110,61,111,165,129,182,240,242,167,86,83,96,89,51,72,111,145,140,90,76,81,61,26,19,47,91,127,136,122,116,117,114,106,101,105,104,96,105,114,105,97,88,86,97,97,116,121,112,76,36,50,65,89,92,136,130,42,76,68,78,86,94,116,68,21,23,111,125,60,24,19,31,29,30,29,25,23,25,33,26,25,31,21,27,24,24,24,24,29,22,22,33,34,42,60,70,71,64,76,57,65,139,167,175,173,171,171,178,178,178,173,165,171,164,168,168,163,157,161,180,191,186,180,185,169,155,155,147,127,131,137,140,142,91,146,188,171,173,151,151,134,87,46,41,47,53,50,50,58,57,67,82,78,78,82,74,74,71,77,80,76,80,76,75,72,68,65,61,66,63,61,61,62,63,62,61,63,57,101,166,128,47,27,41,41,48,49,49,57,41,27,14,12,17,29,25,68,104,67,71,69,89,107,109,95,64,81,57,74,122,66,33,14,53,109,61,34,76,83,100,133,108,101,81,57,57,28,88,143,65,20,11,27,19,92,189,201,170,167,170,177,185,179,186,188,133,33,13,16,12,22,24,28,34,31,33,29,31,34,28,42,38,31,31,33,31,30,33,32,33,34,27,24,31,29,29,25,33,23,26,31,21,29,37,48,52,45,37,43,39,23,27,18,24,25,20,27,25,29,30,32,34,47,45,42,44,28,33,29,30,35,34,39,31,32,32,32,31,32,31,33,36,36,33,36,41,39,41,51,156,189,141,141,101,87,101,81,60,49,80,83,53,37,121,179,182,190,144,160,179,181,186,178,180,185,200,214,224,229,231,227,221,219,217,217,212,214,115,4,1,7,11,12,11,15,13,14,14,15,14,194,196,196,196,199,194,193,198,197,197,197,198,196,193,199,198,198,195,196,199,194,200,197,197,199,197,201,199,249,217,91,160,183,187,203,192,196,193,199,200,198,199,195,198,198,199,199,198,199,198,200,202,200,199,198,200,200,198,201,199,199,199,196,199,200,196,198,199,199,197,198,197,198,198,195,199,196,196,194,196,198,195,198,199,196,197,193,195,195,195,196,197,198,198,199,198,196,196,198,198,194,199,196,196,198,195,198,196,199,198,194,199,195,195,195,194,196,196,196,194,194,195,196,196,196,195,195,193,194,194,195,195,196,193,195,195,194,196,194,195,195,196,195,193,194,193,193,195,193,192,193,193,193,191,194,191,190,193,193,193,192,192,191,192,191,192,195,192,194,195,193,193,193,193,194,193,194,192,191,192,193,191,192,193,192,192,192,193,195,193,193,195,194,193,191,194,195,193,194,195,194,193,193,192,191,192,195,192,191,191,193,191,191,193,190,193,191,192,192,191,191,191,191,191,190,189,194,193,193,193,193,193,193,193,194,194,195,193,193,191,191,193,191,193,194,196,194,193,194,190,195,191,192,194,190,193,194,192,194,195,194,177,177,193,196,196,194,191,194,198,201,198,198,198,197,198,193,195,191,191,193,191,196,193,198,196,207,213,204,220,211,197,206,212,181,164,139,105,118,130,132,123,95,90,165,234,212,115,112,193,198,207,197,193,194,193,208,204,183,113,54,20,55,158,230,130,56,70,102,87,85,144,193,104,15,8,122,145,59,199,132,102,185,220,246,249,229,163,92,84,104,87,55,86,114,146,118,82,62,38,53,46,29,54,94,130,132,120,113,119,110,108,112,95,98,97,101,116,112,99,83,83,96,105,108,97,109,70,38,95,145,165,157,116,76,50,51,86,81,72,87,76,77,25,20,87,180,160,56,28,18,21,27,27,24,26,25,24,26,27,25,25,23,27,24,21,24,27,30,26,26,26,29,33,44,71,82,75,71,71,67,118,146,150,160,160,165,168,175,178,170,161,170,169,159,165,156,145,152,163,184,185,177,162,120,154,150,150,101,153,156,172,154,109,131,184,188,145,154,141,123,107,83,88,89,91,95,94,107,115,143,169,170,167,159,173,166,172,190,179,183,185,192,193,179,178,167,174,165,164,172,175,182,181,183,184,193,188,193,159,81,40,32,27,33,38,35,36,39,27,21,14,15,16,19,21,43,57,97,149,137,122,103,117,128,112,99,79,76,80,46,37,29,38,82,60,66,60,91,128,148,165,146,139,120,101,81,140,133,39,25,10,21,22,89,188,152,83,43,46,60,62,65,61,61,57,30,27,26,27,32,31,42,45,46,45,51,46,44,43,41,47,41,38,41,40,40,41,38,38,41,38,39,40,33,37,41,32,36,35,33,33,34,47,60,79,70,71,84,66,40,27,21,22,19,21,29,22,30,33,35,44,50,41,43,44,29,30,29,30,29,34,37,32,34,31,31,32,34,33,33,33,36,39,39,41,49,49,55,133,137,104,97,42,47,108,108,51,25,30,43,25,25,116,150,144,141,110,118,124,125,127,124,126,130,148,163,193,217,227,236,230,230,227,222,218,215,114,5,2,7,11,13,12,13,12,14,15,15,15,195,200,196,196,196,197,196,196,197,198,194,196,196,195,197,193,195,195,193,195,198,194,197,195,196,199,201,211,251,217,123,185,189,193,200,193,199,195,196,200,198,198,199,199,199,197,198,196,198,197,198,201,197,198,199,200,199,201,201,198,199,196,197,199,199,198,198,198,197,198,199,197,199,198,199,199,196,197,198,198,198,198,197,197,195,197,197,195,198,196,197,197,196,198,196,198,199,196,197,194,195,196,195,197,195,198,198,195,196,198,196,195,197,196,196,198,198,195,197,195,195,194,193,198,195,196,194,194,195,195,195,196,199,195,195,196,195,195,195,197,195,194,193,193,195,196,193,193,195,194,194,194,192,194,193,195,196,192,194,194,194,193,191,193,191,190,193,193,191,192,192,190,193,194,191,193,192,193,192,189,193,192,193,191,191,193,191,191,193,193,190,192,193,194,193,194,194,193,196,194,195,193,191,194,191,193,194,189,193,194,192,192,192,193,194,193,192,192,191,191,190,192,191,192,193,192,194,188,191,191,192,193,193,193,191,194,192,195,193,195,192,191,196,194,195,195,193,193,194,194,194,192,195,193,193,193,192,195,193,196,194,190,177,179,193,197,193,192,193,193,195,198,201,199,198,199,196,195,195,193,192,193,201,198,211,219,224,229,170,178,200,194,209,179,144,129,113,116,121,132,115,100,108,75,123,214,226,132,115,203,216,213,212,203,205,202,217,198,136,94,26,22,95,216,247,131,64,84,151,150,105,133,152,163,157,121,227,160,109,210,136,107,110,161,241,204,106,84,87,99,63,70,107,125,148,111,73,56,41,25,38,44,63,108,131,133,112,113,118,115,108,108,116,96,98,103,101,106,102,92,86,98,101,113,105,88,62,13,69,148,169,161,123,74,37,31,40,60,60,79,75,52,39,55,100,142,184,118,44,32,17,17,20,23,23,23,21,27,29,32,31,27,21,23,28,22,25,26,23,25,27,21,27,36,35,52,74,81,78,74,66,60,43,37,34,34,36,49,55,51,46,48,50,53,50,42,62,58,60,57,48,50,39,45,54,60,46,47,48,42,47,46,51,31,98,119,127,139,103,112,75,68,64,34,36,37,48,58,60,64,69,89,90,88,81,91,87,90,118,114,111,105,134,130,117,116,103,124,120,130,122,130,150,131,143,147,159,172,153,104,94,92,62,55,57,60,57,64,51,19,17,16,20,22,18,19,43,86,120,136,99,106,160,175,177,180,154,84,76,86,57,60,57,57,66,46,68,132,170,209,203,159,136,118,137,137,148,141,51,9,25,12,16,26,46,85,79,62,46,36,36,38,40,40,40,50,55,55,54,56,63,82,105,118,122,131,136,134,129,119,123,111,104,108,103,99,96,101,94,87,90,88,80,81,77,81,76,71,70,68,76,76,67,90,145,148,152,164,155,131,67,36,28,26,34,30,32,33,37,48,57,66,73,63,57,59,49,45,44,49,49,51,51,44,41,38,38,44,41,39,36,36,45,47,41,50,57,87,132,165,137,77,46,20,22,95,96,44,21,21,43,56,100,150,138,133,133,118,128,116,112,117,114,120,121,122,123,150,170,182,212,224,228,231,229,220,214,113,5,2,7,11,14,12,13,12,15,15,14,15,198,198,198,197,198,197,199,198,195,197,195,198,198,194,198,196,197,198,194,196,195,199,198,195,200,198,199,210,252,211,142,210,198,200,200,194,200,195,200,198,199,199,198,198,196,198,198,198,198,198,200,200,200,199,198,201,197,196,200,198,200,198,197,198,196,199,199,198,198,198,198,196,197,198,199,200,198,199,196,196,198,198,196,199,199,198,198,197,199,197,196,199,196,196,197,196,199,196,196,197,194,198,198,197,198,195,198,196,196,196,195,197,195,198,195,194,194,196,196,196,194,195,198,193,193,193,196,196,193,196,198,194,196,192,195,195,192,197,192,192,195,196,194,196,194,193,194,194,193,192,194,194,192,192,195,193,194,194,192,195,194,193,193,192,195,191,191,192,192,192,191,191,189,192,194,190,194,191,190,192,192,193,193,193,193,191,191,190,192,192,191,192,192,194,193,191,194,193,193,193,192,196,191,191,194,192,194,192,193,193,194,192,193,193,193,196,196,195,195,195,191,193,194,193,195,190,196,195,193,191,190,193,193,193,192,193,192,191,194,193,192,194,193,193,191,191,193,193,191,193,192,193,194,192,193,193,194,192,194,195,194,196,187,178,184,197,200,194,192,192,194,194,200,201,198,196,194,198,197,197,200,199,214,195,220,234,232,201,110,174,215,198,226,178,124,102,71,103,177,154,97,83,83,65,69,110,148,136,134,220,235,221,216,220,214,207,218,159,150,163,94,96,113,197,209,91,59,36,79,97,65,113,188,252,253,163,131,88,89,125,67,65,26,79,155,101,94,93,88,58,71,117,136,135,103,79,53,35,27,24,55,70,114,141,129,124,107,114,110,107,109,109,109,94,102,101,95,97,83,75,79,94,112,122,120,71,23,5,74,132,129,111,56,51,52,32,37,48,62,69,54,40,50,99,132,132,125,71,30,39,32,16,23,22,26,26,24,29,26,38,32,25,25,21,24,25,28,26,23,25,25,24,30,27,34,41,61,78,77,79,76,71,53,30,32,27,30,44,45,41,42,46,42,44,37,49,76,84,98,59,34,32,42,78,78,72,60,37,39,36,33,45,34,48,85,109,135,90,93,71,67,68,44,26,18,23,55,80,77,83,53,62,84,72,71,77,42,27,66,78,71,41,60,69,53,72,30,34,72,84,57,31,51,55,63,55,37,35,36,27,94,138,119,118,122,113,116,104,46,16,13,16,19,18,29,23,77,141,114,81,117,174,139,115,103,105,141,128,67,69,79,91,91,101,119,153,150,200,245,198,158,101,76,76,79,91,112,122,59,19,19,12,16,18,24,43,72,89,78,62,54,55,61,60,55,53,56,63,61,69,92,128,95,143,149,142,145,141,147,146,141,132,142,156,151,151,145,144,151,149,144,132,139,143,133,141,142,144,148,87,155,160,150,152,174,212,176,165,170,140,105,67,51,52,57,60,59,56,64,99,128,143,136,125,132,123,113,112,122,125,123,131,118,102,101,98,109,107,94,87,81,94,94,74,41,60,126,153,155,164,147,111,77,54,101,148,125,93,92,136,153,145,157,160,141,140,137,132,136,127,125,122,124,131,129,132,119,120,127,138,168,178,189,211,225,221,219,115,4,1,7,11,11,11,14,12,13,14,14,14,194,200,197,197,197,196,195,197,196,198,197,198,196,194,197,193,197,200,197,197,198,197,198,198,198,199,195,203,251,209,162,214,194,202,196,197,200,193,198,200,197,198,199,198,198,198,198,198,198,197,198,199,198,197,199,201,197,199,199,198,197,197,200,199,197,195,196,197,198,198,199,197,196,199,198,198,198,196,196,196,197,195,197,198,198,195,194,198,196,195,195,198,198,199,198,194,195,193,196,196,196,197,193,198,194,196,196,197,197,194,196,196,196,195,196,194,194,194,195,196,194,194,197,195,194,193,192,195,194,192,192,192,194,192,193,195,194,191,191,194,193,194,193,193,194,195,194,193,193,193,194,194,192,193,193,191,192,191,194,192,194,194,193,194,191,191,192,193,193,191,191,190,190,192,189,191,190,191,195,190,194,189,189,192,194,194,192,194,192,193,193,193,193,192,193,192,193,191,193,191,193,196,195,192,191,194,194,190,193,190,189,193,191,190,193,193,194,193,194,195,194,193,189,192,193,191,195,193,193,194,191,193,192,191,193,193,192,191,193,192,192,191,193,191,191,193,191,192,193,189,191,191,191,192,193,194,191,193,191,193,196,193,196,185,177,189,194,197,193,191,195,192,195,198,198,198,195,193,189,191,196,199,211,147,139,191,150,122,138,232,244,188,220,171,117,94,71,107,180,182,92,51,60,46,37,21,51,74,118,207,221,204,200,209,207,199,178,121,165,209,138,107,100,131,139,93,66,35,35,27,17,84,205,247,242,154,76,81,199,150,53,61,4,31,85,88,116,83,53,86,115,146,129,83,60,40,37,23,23,44,76,115,133,134,121,110,110,112,106,110,113,108,110,102,104,101,90,83,73,71,87,110,120,129,76,26,21,6,66,114,122,89,77,101,60,31,39,51,54,47,44,41,54,110,103,84,91,47,29,43,43,29,17,17,21,23,23,30,32,38,33,25,26,24,24,29,24,25,27,27,24,26,25,27,32,36,47,69,80,84,88,81,69,51,39,35,30,37,43,48,45,42,41,39,38,76,130,121,113,63,28,73,118,121,89,77,54,36,40,48,44,39,41,29,81,74,109,99,62,76,46,63,53,16,20,14,55,110,113,116,71,81,125,125,124,128,66,25,94,133,102,19,49,94,95,87,18,37,111,160,57,57,132,98,115,81,29,22,30,18,97,152,109,129,136,130,145,101,29,10,15,14,22,21,23,24,78,152,81,94,173,95,60,57,53,23,36,104,77,65,100,139,168,171,218,182,148,213,139,38,20,16,64,71,73,48,85,169,90,25,17,11,26,18,24,35,87,133,123,111,104,106,111,89,42,31,39,41,49,44,38,55,65,66,74,61,68,56,50,71,69,59,53,59,66,65,64,58,59,72,74,64,62,76,66,60,58,74,86,80,102,111,88,116,160,143,150,136,129,118,81,77,58,53,59,60,70,70,92,122,133,133,128,128,131,141,134,141,160,145,145,153,150,159,159,163,168,160,170,174,168,159,152,76,50,129,153,158,140,137,135,140,175,171,182,186,153,151,159,170,171,157,149,137,128,142,139,133,137,127,128,130,126,131,136,130,120,120,115,113,125,128,134,170,212,223,223,115,4,1,7,11,10,11,12,11,13,14,14,13,197,198,197,198,199,195,195,197,196,198,195,196,197,195,198,196,195,196,196,198,198,200,196,197,197,199,198,206,252,217,185,215,187,200,195,194,200,197,197,196,197,198,198,196,198,198,198,198,198,196,196,197,199,199,198,200,198,199,199,197,199,197,197,200,198,198,197,195,198,195,198,198,200,198,196,196,193,198,197,196,197,196,196,196,193,196,197,198,197,197,196,196,195,196,196,194,196,194,196,198,196,195,198,196,195,197,194,196,195,194,196,194,197,195,193,196,196,196,193,194,194,193,196,194,194,194,194,196,194,196,196,193,196,195,197,195,192,193,194,195,192,193,194,193,189,191,192,193,193,194,195,195,193,192,192,191,191,192,192,191,191,192,194,191,190,190,190,191,192,192,192,190,188,192,189,189,193,191,190,191,190,189,191,191,194,192,191,192,192,193,192,194,192,191,191,191,192,191,191,194,191,192,191,191,193,189,192,191,189,189,190,192,193,190,190,192,191,191,191,193,189,191,192,192,193,191,192,191,193,191,190,191,190,191,192,195,192,190,190,189,191,191,191,190,193,191,193,191,191,195,189,190,191,190,192,192,193,192,194,195,192,196,193,193,181,174,188,196,197,194,191,192,192,190,196,200,199,200,191,190,186,187,192,61,60,100,42,19,61,130,161,103,148,122,110,99,78,108,150,148,98,49,76,86,66,73,27,29,64,88,162,151,150,165,165,169,146,103,121,129,62,84,66,79,120,104,69,9,45,25,28,126,184,245,235,173,107,174,253,158,86,48,2,65,105,105,89,57,97,119,137,116,72,55,37,30,16,26,59,87,112,121,132,116,107,113,101,103,107,117,116,107,115,110,107,90,87,81,87,99,109,128,118,78,34,18,20,29,100,102,87,100,102,122,72,37,43,41,46,40,45,43,48,84,87,73,69,56,27,26,37,35,26,12,18,19,22,33,32,32,27,27,26,27,26,24,25,27,29,26,25,25,27,27,31,37,54,79,84,91,114,106,86,72,54,41,37,39,44,47,44,41,39,43,42,85,115,114,104,40,39,109,125,132,104,73,49,27,41,41,40,37,36,46,47,87,74,41,63,32,46,59,40,24,20,12,44,110,113,125,68,68,122,107,128,150,71,18,103,137,93,20,38,106,114,118,63,50,124,160,57,103,164,108,141,96,29,32,45,25,119,164,117,116,110,119,124,74,19,14,14,13,20,23,24,23,67,138,66,122,157,64,77,71,97,51,11,87,91,61,73,124,174,182,144,72,110,169,54,8,66,72,92,75,89,71,64,140,107,41,20,12,18,21,28,36,106,152,144,133,134,139,150,97,37,43,46,53,61,52,58,66,72,93,97,89,92,43,36,105,102,63,30,30,64,79,72,43,39,74,83,64,60,71,75,54,39,63,78,51,33,29,56,87,99,136,121,110,117,88,81,59,36,34,28,36,43,41,46,55,66,74,63,53,57,70,68,65,61,72,76,108,74,69,79,83,92,96,96,109,94,96,74,49,103,150,152,138,129,129,134,158,192,194,185,171,156,160,150,151,145,129,127,124,122,131,128,125,132,127,131,128,122,125,122,128,127,126,122,115,127,122,110,136,184,212,224,116,5,2,6,11,12,12,14,13,15,15,15,15,196,200,198,198,198,199,199,199,199,199,198,200,196,194,196,197,198,198,198,196,197,198,196,197,196,198,191,204,251,224,194,210,188,203,193,198,196,193,198,197,196,196,197,195,195,195,196,198,197,198,198,195,199,196,195,198,196,198,197,197,196,196,198,194,196,197,198,198,197,195,196,199,196,197,197,198,194,194,198,197,196,196,196,197,196,195,199,196,195,196,195,196,193,195,195,194,195,195,198,195,194,197,193,193,195,194,197,196,194,196,192,195,197,196,193,194,194,193,194,192,193,194,193,193,194,193,191,193,193,194,193,191,194,192,191,194,193,191,193,195,193,194,194,193,190,191,192,191,190,193,191,193,193,194,191,192,195,190,194,189,191,191,188,192,191,190,189,192,192,190,191,190,191,193,192,192,190,191,190,188,191,191,190,190,191,190,189,193,189,190,193,190,193,191,191,191,190,192,191,190,192,192,189,190,190,190,191,188,191,190,191,193,190,188,192,193,189,190,189,191,190,189,188,189,189,190,190,190,190,191,193,193,192,190,191,191,191,193,193,189,190,191,190,189,191,191,190,194,192,189,191,189,192,192,191,194,190,194,192,191,195,191,193,194,188,179,174,187,197,193,193,192,191,193,190,191,198,201,200,200,189,193,197,82,14,87,56,5,5,10,7,43,89,72,96,89,71,99,84,108,111,98,128,93,73,75,53,40,31,50,109,111,103,113,125,146,140,99,84,64,29,61,53,56,117,101,53,21,43,14,74,203,178,142,123,97,87,160,234,106,54,23,24,146,117,67,82,97,140,137,90,66,57,81,65,24,36,64,123,128,106,121,110,110,109,98,90,102,117,111,106,114,114,105,95,81,84,83,99,116,125,116,67,31,14,23,46,98,135,88,76,92,93,102,77,43,30,34,40,42,40,39,37,59,71,72,63,50,38,18,25,29,18,17,16,22,24,24,27,32,28,26,23,24,25,24,28,33,28,28,27,26,34,27,31,41,74,91,84,100,109,81,64,75,73,44,33,39,41,45,45,42,43,39,35,55,96,104,92,38,40,121,142,130,126,117,56,31,30,39,43,36,51,49,105,63,54,81,36,54,39,61,68,33,27,14,35,100,99,81,35,51,113,107,138,120,34,4,84,130,90,19,51,112,115,131,115,99,126,133,24,57,92,79,116,58,21,36,40,36,151,172,108,108,85,98,106,63,24,12,15,12,24,25,27,20,63,137,72,120,149,59,43,55,137,93,21,104,104,65,36,39,69,48,24,60,176,176,19,49,129,42,105,134,105,92,41,130,126,42,24,13,16,20,27,37,122,163,134,123,119,133,140,80,22,44,62,58,74,73,83,74,83,121,131,129,132,41,47,159,131,55,19,31,97,110,77,33,42,114,125,67,78,121,117,97,88,136,104,45,26,35,50,77,107,95,112,93,92,78,69,65,36,32,24,34,43,44,50,61,94,95,65,57,74,84,52,35,66,79,81,68,39,41,41,44,46,34,36,37,39,38,42,88,142,158,130,130,131,142,146,151,174,167,167,162,152,150,145,143,137,130,132,134,129,129,122,121,126,121,128,127,117,117,118,124,128,124,122,128,141,139,120,117,149,168,196,118,6,3,8,11,13,12,14,13,14,14,15,15,198,200,198,199,199,198,199,199,200,200,199,199,200,197,199,199,198,199,197,196,196,199,195,200,192,193,179,169,248,212,186,208,184,202,194,196,196,192,199,198,195,195,196,195,196,195,196,198,196,198,199,196,197,196,198,195,194,196,198,196,197,197,196,197,193,196,196,196,198,194,199,196,196,195,195,199,197,198,196,195,196,194,195,197,196,198,194,196,194,195,197,193,198,196,194,194,196,196,194,193,193,193,195,193,192,196,193,196,195,194,198,194,196,196,195,195,193,193,193,196,193,193,196,194,195,198,194,193,191,193,191,190,192,193,194,192,191,191,191,192,192,192,192,194,193,193,193,191,193,195,192,192,194,192,191,193,193,194,192,192,193,192,193,193,192,191,193,193,193,190,191,193,190,191,191,190,190,191,193,191,190,191,192,190,188,191,193,190,191,190,190,189,189,192,190,191,191,192,190,191,191,190,191,191,193,191,192,190,190,190,190,190,190,191,191,191,191,190,190,189,190,190,190,190,188,190,190,192,189,189,191,192,193,190,192,191,190,191,190,192,191,190,191,191,194,190,191,190,191,193,192,193,192,191,192,192,191,191,192,190,188,190,190,193,194,189,176,174,190,196,195,195,192,191,194,192,191,198,199,204,198,212,250,137,80,149,133,62,3,9,5,25,92,70,80,73,77,113,84,92,141,147,107,59,53,72,35,46,44,57,112,141,135,128,141,156,152,155,154,113,97,97,81,105,137,94,51,93,63,8,69,126,93,107,74,44,29,141,191,49,24,4,39,122,77,86,114,131,141,82,67,72,103,147,84,48,72,108,143,114,105,109,102,106,103,90,77,94,109,106,109,114,106,84,83,84,88,89,110,131,108,64,20,19,27,61,108,127,155,157,132,87,84,114,75,33,23,28,29,38,36,28,29,39,59,60,61,55,42,22,26,39,36,31,19,16,24,25,23,27,26,24,22,26,29,23,27,29,31,31,26,25,26,33,30,37,64,77,76,74,66,50,48,63,70,46,35,36,39,46,40,42,37,39,37,59,99,105,89,36,39,101,103,92,127,110,54,28,32,39,45,39,42,110,144,155,168,150,145,129,131,147,157,117,30,9,30,87,75,43,11,53,125,116,134,103,34,11,84,129,86,18,53,116,130,128,122,143,147,133,21,24,65,85,133,71,19,31,33,53,174,163,102,92,75,82,87,69,29,14,15,19,29,24,25,24,57,144,100,107,112,51,69,141,226,96,15,111,130,89,59,44,78,39,60,204,241,155,11,60,117,62,120,115,113,90,134,179,114,51,15,14,16,27,24,41,141,152,110,95,88,102,105,61,22,34,50,67,77,77,76,56,65,92,71,99,121,34,54,153,100,42,22,28,98,95,69,24,28,102,114,46,63,123,128,129,142,152,85,33,28,29,57,71,63,86,59,63,65,34,60,55,40,30,27,39,39,47,46,68,105,114,113,112,120,64,39,92,105,124,101,52,42,39,51,51,39,37,32,45,35,45,113,148,173,152,127,133,144,153,152,150,151,149,157,154,145,155,147,142,151,147,147,150,146,146,141,136,139,136,139,132,128,123,121,135,130,117,117,130,146,146,130,121,128,130,157,111,12,2,9,11,14,12,15,13,13,15,15,14,193,200,199,199,199,197,198,195,199,203,198,199,200,199,198,198,199,198,197,198,196,198,195,200,197,204,183,157,214,172,178,203,188,203,191,198,197,195,201,196,195,199,197,196,196,196,196,195,196,196,198,198,199,194,195,198,197,197,196,194,196,196,198,198,197,197,197,196,193,198,198,198,196,196,198,198,195,195,198,195,197,197,193,196,197,195,195,195,197,196,195,195,193,193,195,195,194,192,194,195,193,194,196,195,194,193,195,193,192,194,193,193,193,194,192,193,193,193,196,192,193,196,194,193,193,193,195,195,192,194,192,192,194,193,195,192,191,193,193,195,191,191,192,191,193,192,193,193,192,191,190,194,192,191,191,192,193,189,192,191,191,192,190,192,193,191,190,192,189,187,192,190,189,191,190,192,188,190,193,190,191,190,191,187,189,191,191,190,189,189,188,189,190,190,188,189,187,193,190,189,191,188,190,191,190,191,194,192,190,189,190,191,191,189,189,190,190,191,188,193,191,190,191,189,191,191,189,188,189,188,192,191,189,191,193,193,192,192,192,189,189,191,190,191,194,191,190,191,190,191,193,190,191,191,192,193,189,191,192,189,189,193,191,194,190,193,187,172,179,190,198,194,191,191,191,193,192,193,192,196,196,232,253,177,148,216,204,190,155,117,62,47,83,74,92,56,78,159,120,98,109,143,116,29,24,39,30,43,35,39,106,113,110,98,93,95,162,205,170,159,125,153,150,128,141,59,88,207,100,33,77,39,66,163,123,41,57,185,152,15,6,15,114,131,97,130,134,120,95,70,76,132,153,164,90,55,84,98,127,99,92,97,98,103,93,82,71,80,94,103,107,97,77,83,96,92,97,98,124,110,53,23,14,28,68,114,125,129,147,144,120,89,112,109,61,45,21,17,29,34,35,33,27,30,37,42,51,56,44,27,37,59,63,46,24,16,21,17,26,26,23,26,23,26,30,24,26,29,23,29,28,27,27,29,32,34,47,55,57,63,61,56,58,60,39,22,32,41,45,40,37,43,39,40,33,71,104,98,88,42,16,41,61,63,71,54,42,34,33,41,41,44,35,113,204,190,201,199,196,212,205,212,209,147,37,7,22,74,78,46,14,44,116,98,103,119,58,24,96,132,83,14,38,117,123,80,83,127,158,144,20,57,99,98,134,66,15,33,32,81,186,132,78,79,60,74,81,71,37,14,14,19,24,26,27,19,40,101,111,98,74,76,122,230,215,57,8,90,164,153,113,98,119,136,203,248,202,119,10,54,111,93,125,85,66,150,173,183,108,10,22,14,13,33,27,59,148,133,85,69,56,67,72,57,27,17,43,78,83,83,90,79,87,54,30,83,113,34,57,139,84,44,22,28,90,92,61,25,37,105,102,31,79,113,75,96,113,131,69,30,24,30,54,69,88,45,62,68,40,43,57,59,47,36,24,28,35,46,58,49,57,98,110,85,61,31,35,88,116,114,75,36,33,42,57,53,46,35,42,48,63,135,170,160,156,148,130,145,148,144,145,136,137,139,150,149,149,165,155,146,150,148,150,153,152,158,155,157,162,156,160,153,141,142,137,141,131,116,118,128,135,139,134,121,120,113,139,107,14,2,10,10,14,12,13,12,13,15,15,14,195,198,194,198,199,198,196,196,198,198,199,197,198,197,199,199,198,200,198,199,197,198,196,200,200,225,228,167,153,135,169,197,191,200,193,198,200,198,198,197,196,198,199,198,197,198,196,196,195,196,198,196,198,196,195,195,197,199,198,196,197,194,195,197,195,196,197,196,196,196,199,196,196,197,197,198,197,197,196,196,195,194,195,197,196,195,195,196,194,194,194,191,196,196,193,193,194,194,192,194,194,194,195,193,190,194,193,193,195,193,193,193,195,193,192,193,195,193,194,194,190,194,194,192,192,192,191,194,193,190,194,193,192,193,194,193,192,193,194,194,193,191,191,192,190,190,193,190,190,193,190,189,191,190,192,191,189,193,191,191,189,191,190,188,190,188,191,191,191,192,192,190,190,192,190,190,192,189,191,189,189,191,190,190,190,190,189,188,189,190,190,190,191,192,189,191,190,188,188,187,191,188,191,196,189,200,205,194,193,189,191,191,188,190,190,190,190,191,191,192,192,190,191,191,190,192,191,193,191,190,190,191,190,189,192,190,192,192,190,191,193,193,192,191,193,191,191,190,189,190,191,190,192,189,189,191,193,191,194,193,188,191,190,190,192,193,195,186,177,180,193,199,195,195,193,192,194,194,194,187,191,234,234,162,163,218,229,236,244,249,200,126,95,69,91,56,73,155,120,75,55,91,119,81,52,60,33,43,43,27,99,33,39,21,18,50,149,173,125,113,90,128,127,80,102,31,103,194,99,170,177,35,131,211,118,60,125,214,122,23,18,109,212,165,127,139,113,93,81,54,87,122,150,165,81,59,76,90,113,89,85,104,110,108,93,85,78,89,94,95,97,83,83,99,107,102,103,113,94,50,22,20,27,77,116,123,131,133,129,87,77,98,95,84,39,49,39,31,30,31,36,33,31,26,29,35,48,51,35,27,48,72,66,57,37,14,15,22,24,22,21,23,25,28,32,27,28,26,27,29,31,30,30,24,34,67,81,86,92,91,91,95,95,75,62,56,71,83,112,77,73,73,74,73,68,97,104,97,91,56,46,51,70,76,66,55,51,53,55,56,63,53,72,150,157,117,97,89,107,111,108,103,113,78,33,33,41,71,114,67,29,76,108,87,97,117,95,79,121,129,92,19,70,116,101,50,25,92,137,144,41,35,98,116,93,29,25,37,44,139,178,84,66,61,64,70,71,81,44,25,24,17,29,32,27,23,19,44,66,96,101,95,131,165,95,15,14,18,84,147,156,170,137,178,171,167,119,50,6,13,32,77,116,74,84,121,161,108,24,27,15,21,16,27,35,83,160,103,54,55,49,57,70,66,32,15,33,74,83,101,126,113,77,21,23,96,112,29,70,146,75,57,43,54,121,93,66,45,64,128,99,28,95,117,45,30,57,104,63,31,26,35,86,121,119,129,120,98,81,69,97,103,88,55,26,26,35,73,92,57,72,98,104,63,56,40,39,107,108,93,48,29,40,42,65,66,45,39,36,81,147,173,142,87,103,129,130,153,154,144,132,126,130,135,146,150,155,162,151,146,142,139,144,155,155,155,156,155,159,155,155,151,147,147,142,141,131,122,129,136,137,132,120,107,112,118,144,105,15,3,10,10,14,12,15,13,14,15,14,15,191,196,195,195,195,194,196,195,198,195,198,198,196,195,197,198,196,199,198,197,197,199,196,196,193,223,230,149,132,133,170,198,193,200,193,199,197,196,199,197,196,196,196,196,198,197,196,197,198,195,197,198,198,198,197,198,199,197,199,197,198,197,196,197,196,196,197,198,198,198,196,198,197,198,199,198,197,197,196,194,195,196,196,194,194,195,196,194,193,194,195,195,194,192,195,195,195,196,193,194,192,192,193,194,192,193,196,193,193,195,195,193,195,195,192,193,191,191,193,191,192,189,190,193,194,192,192,193,193,193,193,193,192,191,193,194,192,194,191,193,193,191,193,190,190,189,193,192,192,191,192,191,189,191,189,191,192,190,190,191,189,191,191,189,191,190,189,190,189,190,190,188,189,191,190,189,189,189,191,189,188,191,191,188,191,190,188,187,188,191,189,191,191,193,196,196,195,193,191,192,195,197,198,204,195,190,208,202,192,191,189,192,193,189,191,190,191,190,188,191,187,190,191,190,189,192,190,190,193,189,190,191,189,190,188,186,190,190,191,190,188,191,191,191,190,190,191,189,192,193,192,191,190,191,191,191,191,190,192,193,191,193,191,191,192,195,194,197,187,178,189,199,206,199,199,200,201,203,199,188,204,252,212,142,159,206,217,229,241,251,217,145,97,68,93,91,81,112,136,110,57,49,97,141,151,95,31,49,93,116,126,118,107,93,80,92,139,132,108,69,63,95,77,69,81,75,71,86,104,197,129,46,165,165,57,68,175,204,125,55,91,211,219,152,137,102,84,76,31,33,46,70,136,154,69,58,69,86,112,73,95,106,114,120,100,96,89,101,102,98,92,91,101,113,118,104,110,78,37,19,22,38,77,120,131,117,121,122,117,101,98,95,62,13,25,105,98,49,22,22,29,27,28,29,23,28,47,55,37,30,61,77,66,60,51,46,42,29,18,19,22,22,24,30,26,27,29,26,30,30,27,27,32,28,50,139,179,178,168,161,166,162,179,169,155,161,164,174,173,175,182,180,179,170,163,160,139,136,148,151,170,174,162,162,180,161,139,143,153,166,165,162,175,184,147,93,83,78,83,93,88,100,112,122,137,145,160,174,159,173,167,165,158,138,150,157,150,145,164,159,150,136,144,141,134,122,104,122,141,151,108,126,133,128,125,94,111,127,152,199,164,108,116,110,115,118,115,123,92,75,70,64,76,75,75,67,68,53,57,98,116,129,121,81,29,38,56,27,48,86,121,137,135,125,118,109,43,28,39,31,26,54,100,89,91,117,74,35,39,31,38,33,39,48,54,141,169,70,52,55,60,70,71,76,49,24,28,71,52,54,110,87,41,14,57,121,89,30,103,134,92,111,89,97,129,96,95,78,75,134,85,23,99,107,47,12,34,98,61,40,17,44,132,160,181,158,169,159,152,162,165,171,159,86,32,18,35,114,123,103,99,104,100,111,135,86,76,100,87,71,33,26,41,63,81,63,47,38,41,106,183,174,110,40,64,122,138,166,158,142,135,132,135,137,146,139,134,139,139,143,136,136,141,148,147,146,145,144,143,139,144,143,141,145,144,145,145,144,146,148,139,129,118,111,119,129,149,104,14,2,10,10,14,12,13,13,13,13,15,14,192,198,194,195,196,194,194,194,195,197,195,193,199,195,195,195,193,196,195,196,196,196,195,198,189,200,198,170,193,176,181,196,192,200,195,198,197,195,196,199,197,197,198,196,195,198,196,196,196,195,199,196,198,197,198,198,195,196,196,196,197,198,200,199,197,197,198,196,196,197,195,197,198,197,198,196,197,194,194,196,196,198,196,196,194,194,193,195,196,195,196,193,195,196,193,194,194,193,191,193,195,193,192,193,193,193,194,193,192,193,192,193,193,192,192,193,192,190,192,192,190,191,192,192,193,194,194,196,193,194,192,191,192,190,190,189,191,193,190,188,189,190,192,191,189,191,192,190,189,191,190,190,192,191,192,193,194,193,191,196,192,192,190,190,192,188,190,193,189,187,188,187,186,190,186,184,190,189,190,188,187,187,188,189,190,191,192,189,191,193,190,195,196,206,212,212,211,204,205,205,209,212,214,210,157,148,182,202,201,191,191,190,190,190,189,187,189,191,187,192,190,188,190,190,193,189,190,191,190,190,188,191,190,189,192,190,192,189,189,193,188,189,190,188,189,190,190,191,189,191,191,191,191,191,192,187,191,191,193,192,190,193,196,198,203,206,201,202,207,197,190,198,211,211,207,208,213,212,211,193,229,234,169,130,143,181,208,217,234,221,168,118,87,59,77,86,101,111,119,147,127,93,93,149,168,126,59,35,104,165,201,214,216,212,183,164,137,130,116,75,114,97,107,171,124,157,136,59,38,81,53,71,179,107,8,87,178,136,60,76,179,245,171,121,101,79,59,36,14,39,55,71,140,126,48,55,71,86,99,69,95,102,97,111,102,89,89,100,85,87,105,102,108,116,117,93,76,39,13,24,48,79,108,129,129,111,112,112,105,73,93,63,56,16,103,196,127,46,28,11,21,24,24,18,24,32,62,75,37,36,63,76,77,71,62,56,49,35,22,19,20,24,26,28,29,28,29,28,29,33,31,28,30,31,53,126,151,103,81,81,83,88,90,113,91,76,78,81,113,105,109,119,106,102,106,97,78,84,101,113,129,130,130,133,135,128,128,125,128,130,130,127,134,119,90,92,95,93,100,101,107,120,137,144,149,160,162,167,109,151,154,153,139,134,139,136,127,128,132,134,100,139,160,154,159,147,147,155,132,146,141,174,159,148,160,161,190,183,177,181,156,145,163,161,169,165,167,181,174,184,179,175,178,178,183,179,169,162,153,139,144,153,142,141,130,145,159,140,144,141,150,149,146,147,139,121,117,121,100,124,120,114,134,133,133,113,103,91,113,131,118,147,154,160,172,209,179,112,115,121,127,129,137,132,96,73,91,106,63,81,106,81,92,68,124,132,101,83,127,128,105,130,105,97,111,96,111,74,84,123,79,59,100,91,39,33,79,104,64,51,36,66,143,159,143,113,108,116,121,124,126,134,137,71,29,27,29,73,99,92,95,57,54,113,139,98,56,75,63,41,34,27,46,70,65,56,49,36,27,61,131,149,114,74,113,137,137,158,146,137,136,141,136,139,141,131,126,124,128,139,145,142,131,130,127,125,125,130,136,136,141,141,145,151,148,152,152,148,147,142,136,136,146,153,151,132,142,105,13,1,10,10,13,11,14,13,14,15,14,14,193,196,194,195,196,194,195,194,195,195,195,195,194,195,195,196,194,195,196,193,196,197,193,196,193,203,203,212,249,213,191,202,192,203,194,198,196,195,200,197,197,199,199,200,198,195,196,196,196,195,197,196,195,197,196,195,199,195,197,198,194,196,196,197,196,194,195,195,197,195,195,194,193,197,196,194,197,194,194,194,193,195,193,194,194,197,195,193,192,193,193,193,194,192,194,193,193,191,192,193,194,196,193,193,192,192,192,193,193,191,191,192,191,191,191,190,190,190,192,193,193,191,194,196,198,203,208,204,203,199,192,193,190,191,194,192,190,191,190,193,190,190,192,191,191,188,192,190,191,191,190,189,190,194,193,205,206,204,205,205,200,191,188,191,189,189,191,189,190,191,192,191,189,194,186,188,191,188,192,191,190,194,193,191,191,194,200,200,207,210,204,207,198,196,195,196,218,216,214,211,201,209,201,181,133,101,155,204,205,204,195,190,193,189,192,190,190,191,189,191,190,191,191,192,190,192,191,190,190,187,190,190,191,190,187,190,190,190,189,187,191,191,191,191,188,190,191,189,190,190,192,192,191,191,189,188,191,195,196,196,198,205,202,200,210,214,204,205,200,200,188,174,196,202,206,194,195,211,204,194,252,224,130,126,116,141,155,169,199,183,122,93,68,49,67,105,120,118,124,108,131,141,144,145,152,151,80,18,12,22,84,141,167,197,180,171,144,143,110,86,168,133,148,171,139,232,170,58,118,107,57,120,194,72,9,131,161,110,67,125,213,185,111,87,81,53,28,18,17,95,81,72,145,94,42,62,70,99,121,92,105,85,92,102,87,91,81,73,62,81,107,118,122,105,105,70,29,22,18,64,106,113,123,119,128,114,107,98,73,66,88,93,80,118,188,201,96,42,31,9,16,18,27,20,18,27,54,60,35,62,91,103,84,62,53,42,32,25,19,18,18,27,27,22,24,25,31,28,31,29,29,30,34,28,44,117,120,112,97,79,85,71,71,66,69,63,59,67,60,55,65,62,57,61,61,60,56,59,63,60,63,54,57,55,51,57,54,62,56,49,50,57,55,48,49,54,58,57,66,66,54,57,60,56,49,53,55,52,46,51,44,46,48,49,46,40,47,41,46,39,41,48,60,63,57,52,51,55,50,53,50,70,61,67,69,66,81,67,75,77,61,76,83,87,93,86,92,105,98,105,96,97,102,98,117,110,109,113,112,109,92,102,112,120,124,148,145,134,153,128,118,105,106,108,102,118,120,141,139,144,149,136,125,112,121,123,123,143,158,161,173,185,196,176,159,170,148,133,150,146,158,162,165,170,165,168,179,173,144,142,154,148,149,156,158,158,143,147,157,136,133,142,132,136,138,129,145,127,125,147,133,136,152,153,146,150,166,166,136,153,151,165,180,155,127,76,77,77,83,92,87,95,105,80,63,58,59,87,103,118,94,62,67,110,134,92,84,81,52,57,54,59,67,76,73,61,63,51,42,35,66,114,138,129,132,124,131,156,141,138,138,145,134,131,137,127,124,121,122,137,127,123,125,123,122,121,122,132,144,139,142,145,149,151,147,145,144,137,130,130,136,151,179,189,174,153,153,104,12,1,9,9,12,11,14,13,13,14,14,14,193,198,197,194,194,194,195,196,191,194,195,195,197,194,194,196,191,195,198,193,196,196,193,198,196,202,201,225,248,214,193,202,192,203,201,204,205,203,204,204,205,205,205,203,200,199,196,197,198,198,198,197,198,197,194,196,195,196,196,196,196,195,194,194,197,196,195,194,193,194,196,196,196,193,195,196,198,195,195,197,196,195,193,194,197,195,196,197,195,193,193,194,196,193,193,192,192,193,193,194,193,195,192,194,195,191,191,192,191,191,193,192,191,191,190,191,191,190,191,193,193,194,198,203,207,216,211,213,212,203,198,194,195,195,193,194,197,193,192,196,193,194,196,193,195,193,199,203,196,197,195,190,193,192,196,203,202,209,199,202,200,190,193,189,191,189,190,194,196,206,203,203,209,220,205,199,204,200,203,203,208,211,208,208,208,217,213,205,220,223,218,219,177,139,122,131,176,185,181,166,145,140,133,139,114,101,141,196,218,220,213,203,206,201,201,196,190,193,190,194,192,193,194,190,193,191,190,190,190,193,190,195,192,190,191,190,194,191,192,193,189,193,192,192,194,190,188,190,190,190,189,193,193,195,197,196,202,204,208,210,211,213,193,164,169,192,185,173,163,151,149,134,155,164,144,137,139,159,153,174,246,167,119,137,97,112,100,121,147,150,127,84,69,58,84,118,136,143,128,130,96,100,174,159,136,156,93,25,15,18,26,16,17,57,80,96,110,124,95,108,221,146,73,87,105,161,89,115,243,155,92,159,175,85,85,173,189,170,152,167,151,92,66,63,46,22,19,33,65,134,77,74,129,64,45,65,76,114,132,95,110,93,98,110,84,86,78,71,62,90,112,125,129,98,59,37,22,26,45,111,127,121,115,123,118,108,100,83,88,77,114,125,135,165,174,139,59,34,39,24,14,17,27,21,21,22,27,47,68,94,101,93,67,37,30,23,19,18,17,16,15,24,22,24,25,29,27,27,29,29,30,30,35,26,49,124,105,166,155,147,146,142,141,142,157,157,155,148,130,127,126,127,115,117,117,105,115,116,121,118,123,129,129,124,130,134,86,140,141,141,90,139,142,132,125,123,135,136,138,131,120,114,113,115,103,102,105,110,108,103,104,99,103,110,107,97,101,101,101,107,102,96,93,92,105,95,84,87,79,83,83,94,81,93,83,63,77,94,85,73,59,60,51,56,57,65,57,61,63,61,55,58,67,59,64,59,59,60,56,64,59,57,51,54,60,55,59,54,59,50,50,49,47,52,50,50,57,61,114,66,62,57,61,50,51,49,59,59,56,56,51,63,53,51,47,43,45,57,66,61,72,73,84,82,85,113,99,100,84,97,96,91,105,104,98,103,111,110,101,96,96,102,96,106,114,105,119,105,118,123,92,101,131,141,151,151,148,139,142,158,158,153,141,128,103,88,87,78,79,98,96,97,109,104,125,121,121,130,128,139,130,119,127,134,128,117,116,113,112,119,124,127,134,137,102,127,145,123,115,105,107,129,143,145,141,131,147,161,160,162,159,157,131,124,122,112,118,115,115,118,104,116,132,141,140,132,127,138,141,139,145,146,150,153,149,150,146,144,145,142,149,161,185,191,185,169,172,110,9,1,9,10,13,12,13,12,13,14,14,14,191,198,196,196,193,191,195,194,196,196,194,195,194,198,197,193,193,190,195,193,195,198,193,197,196,199,194,223,247,207,182,198,191,203,215,224,222,224,229,227,228,227,221,217,211,202,196,199,198,198,196,194,197,194,194,194,196,197,194,196,197,196,196,194,195,194,194,196,195,193,194,194,196,199,192,195,198,194,197,196,195,198,197,195,196,192,193,195,193,196,195,193,195,194,194,191,190,193,195,191,193,192,189,193,193,193,190,194,194,192,193,192,192,193,193,193,192,193,191,192,193,196,193,191,177,166,174,168,194,206,198,202,203,202,203,211,207,205,202,206,200,205,208,201,204,200,223,216,210,209,198,194,193,192,163,160,162,155,147,173,196,192,193,193,191,190,199,198,214,225,211,207,214,243,218,205,218,213,219,216,217,220,220,219,217,220,205,155,150,177,184,189,150,114,92,78,108,117,119,111,86,80,81,90,118,114,129,177,184,214,229,223,225,218,217,208,207,204,197,199,207,203,199,198,197,202,198,195,204,202,200,203,201,200,201,206,203,201,203,204,205,198,192,192,190,192,193,191,193,191,193,196,200,204,205,209,208,198,186,183,188,191,153,110,118,148,146,137,127,115,122,116,115,105,84,73,80,97,84,163,224,111,115,125,97,117,102,127,116,133,163,113,70,63,81,112,145,158,118,127,115,59,135,149,93,144,122,47,28,24,34,31,25,42,16,9,36,58,88,150,205,155,95,57,80,69,33,166,236,101,103,156,162,155,164,208,179,191,192,139,71,56,55,37,26,31,54,90,136,152,63,78,110,33,51,76,83,106,88,79,104,77,86,91,61,58,64,81,87,102,113,130,112,46,27,17,29,62,93,123,117,112,123,110,112,101,81,103,101,92,113,122,118,108,110,83,45,31,43,48,24,14,21,24,22,25,31,47,76,84,67,51,39,27,23,17,15,15,23,22,19,21,19,28,27,24,27,30,32,28,31,29,33,34,36,63,70,65,60,48,65,66,65,74,78,77,72,66,65,64,72,75,72,80,75,76,73,75,84,88,95,94,107,97,108,113,106,120,118,125,116,128,122,114,112,108,120,120,127,115,114,119,119,122,111,124,118,117,127,118,122,118,116,130,131,134,128,124,144,142,141,134,130,139,133,132,132,120,125,128,139,143,129,146,129,126,125,132,136,117,114,109,106,108,114,113,116,117,118,121,106,118,118,123,137,134,134,125,132,141,135,127,121,123,120,116,114,101,95,95,107,113,115,111,107,109,103,106,105,112,112,108,113,106,103,100,102,96,93,83,80,76,79,84,72,74,68,74,73,57,59,61,63,63,62,60,66,62,70,69,65,69,65,71,61,69,63,57,60,55,65,59,52,60,58,56,57,56,63,61,48,57,46,54,59,63,62,57,62,61,61,55,60,57,58,59,61,56,60,64,62,69,71,74,83,85,89,91,83,88,80,79,86,81,80,81,95,101,104,119,110,108,115,120,118,123,137,132,129,122,118,106,104,106,108,103,115,120,116,132,130,122,99,104,106,101,107,107,103,102,97,112,135,141,141,137,135,129,136,135,137,141,138,142,146,158,165,160,163,158,152,157,173,181,179,163,167,110,10,2,9,10,14,12,13,12,14,15,14,14,196,198,194,195,198,196,198,197,196,198,198,193,198,197,196,198,196,195,196,196,198,197,192,196,196,203,192,227,248,195,175,194,185,191,217,229,231,234,234,237,243,241,239,233,217,201,198,199,196,193,195,196,197,196,195,198,195,197,198,197,196,196,196,195,197,194,194,195,195,198,195,193,194,194,193,195,196,196,196,194,198,195,194,196,196,194,191,193,194,193,192,193,196,191,195,195,194,195,192,194,195,193,192,194,193,191,196,194,192,191,193,194,191,192,190,191,191,191,193,193,193,192,183,139,99,106,98,110,171,206,208,214,216,213,218,216,215,211,220,222,207,217,215,209,201,195,211,210,204,210,207,198,208,188,148,134,114,125,111,158,200,196,205,196,200,200,203,185,177,181,153,157,188,204,171,175,196,190,179,167,160,167,173,164,151,168,142,81,83,96,112,130,116,122,117,96,87,84,96,98,97,89,87,111,135,116,106,108,110,168,188,182,188,193,206,207,211,216,205,196,201,204,203,199,193,204,205,199,211,202,189,201,208,203,198,201,206,197,191,195,199,205,199,201,202,202,204,205,201,200,200,198,193,186,192,193,179,155,133,130,136,142,125,106,109,131,131,126,121,116,119,109,100,91,76,55,59,71,78,195,193,85,113,99,100,112,83,166,152,126,180,124,79,57,75,106,120,133,95,101,92,78,104,105,89,107,129,112,97,76,95,74,48,22,11,15,61,81,95,108,142,199,113,105,173,75,36,158,166,55,116,132,127,181,200,190,156,181,189,149,82,61,51,42,59,72,81,107,136,116,69,96,80,25,56,83,86,101,77,59,89,51,36,71,46,41,49,70,83,97,90,84,49,19,21,39,75,110,127,127,116,114,105,106,97,93,100,106,113,91,118,117,99,82,57,63,53,31,27,45,31,14,17,22,25,25,32,34,50,51,32,33,33,21,22,19,21,35,44,39,22,19,23,23,25,29,28,27,30,33,29,32,35,28,30,28,32,37,39,52,59,55,50,37,22,21,20,23,33,43,44,43,45,44,43,38,45,41,45,43,44,47,42,43,40,42,65,41,38,35,70,37,39,36,39,35,34,38,40,44,42,44,44,41,42,43,40,42,47,47,45,41,45,46,44,48,47,40,46,52,53,51,44,47,48,46,44,50,50,45,48,51,47,51,51,51,53,49,55,50,54,54,54,58,59,62,60,65,57,59,73,68,69,79,92,91,98,85,97,98,97,100,103,101,87,100,99,88,94,98,104,110,114,116,114,119,115,116,110,121,119,122,130,123,137,125,84,139,136,123,127,132,128,137,125,130,135,122,113,102,107,98,100,106,96,99,108,120,116,132,130,125,133,125,128,125,124,131,127,122,88,121,124,117,117,119,84,120,115,108,78,89,95,98,107,110,97,113,122,104,95,101,107,110,94,91,98,96,101,103,97,98,94,86,86,79,86,76,55,68,65,66,70,71,74,79,88,83,90,88,78,82,80,83,107,67,119,66,69,65,61,53,52,55,50,49,53,47,46,56,59,59,73,106,124,121,120,103,91,91,83,92,98,109,132,143,133,133,136,130,127,126,129,130,127,139,147,153,160,152,149,142,158,165,163,139,138,107,13,2,11,10,14,11,14,12,14,15,15,15,196,200,195,198,199,197,200,197,197,200,199,198,196,198,198,195,199,197,197,196,197,201,195,199,199,202,195,234,248,206,177,188,151,105,122,128,125,133,132,141,145,149,166,197,208,198,197,200,197,195,198,198,195,195,197,198,196,194,197,196,194,193,196,196,196,197,195,197,194,196,195,193,198,195,193,196,196,195,195,195,194,196,194,194,196,195,195,196,196,194,194,194,196,191,193,193,191,196,195,193,197,198,196,195,195,194,195,196,192,191,193,194,194,195,193,196,194,196,199,204,198,208,189,129,112,113,108,105,163,205,201,200,205,185,162,177,179,178,199,207,162,166,211,213,179,121,131,141,156,205,217,211,223,200,144,127,124,139,130,170,209,210,216,210,210,213,206,133,91,86,69,117,167,158,119,114,133,122,105,83,80,81,92,87,77,96,92,78,69,68,71,83,88,117,122,96,89,89,107,114,122,126,122,122,121,92,75,81,62,81,85,85,100,108,132,148,175,202,182,145,156,171,179,169,169,191,181,155,176,166,141,162,179,174,154,172,178,154,143,149,176,203,205,212,220,217,210,205,211,207,201,179,156,152,150,149,134,122,106,101,110,106,91,93,109,119,124,128,123,114,120,113,112,111,127,91,76,72,105,232,157,70,98,88,101,110,112,197,165,110,175,147,77,56,63,74,47,65,108,118,104,75,101,101,79,86,105,177,171,130,122,114,78,100,97,92,208,147,111,132,117,126,81,171,189,87,85,171,140,65,141,83,89,186,153,97,104,184,208,189,91,57,51,63,90,74,63,67,88,74,55,81,59,29,60,79,89,99,79,59,72,48,42,63,46,42,57,64,70,80,60,39,22,29,52,76,119,132,132,135,120,101,102,96,89,107,101,111,107,99,99,63,68,59,56,65,62,42,15,20,22,19,15,15,23,22,29,31,31,26,27,29,24,25,24,25,24,40,44,30,24,23,16,24,27,29,30,29,32,29,31,31,31,32,33,29,31,40,63,72,75,56,32,25,15,22,24,33,48,49,46,52,48,44,47,43,46,44,41,47,42,41,49,43,39,41,38,40,37,42,39,38,39,38,41,34,39,33,36,40,35,47,39,44,42,37,44,39,46,42,40,48,46,46,46,48,53,46,45,46,46,48,45,44,44,42,46,37,38,36,41,44,34,42,37,37,36,40,41,34,40,36,37,40,39,41,41,43,40,44,41,41,37,42,43,40,42,39,38,39,39,39,41,39,43,41,37,45,41,44,43,37,45,44,41,44,40,48,42,42,46,39,44,41,41,48,44,42,43,41,47,45,47,49,49,55,50,51,49,47,46,47,51,53,50,49,59,60,67,71,72,82,81,81,83,95,92,100,99,90,96,95,104,103,101,99,105,112,113,113,103,116,111,112,119,119,129,130,128,126,128,123,134,131,117,124,116,120,134,137,126,128,122,115,120,114,111,105,99,107,109,110,122,119,122,125,123,122,117,118,117,117,120,121,107,104,118,113,118,113,101,84,84,72,74,73,68,70,55,63,63,73,105,149,160,154,139,112,98,94,84,76,87,110,141,149,150,142,144,135,128,130,120,109,98,104,111,118,138,142,136,126,136,147,147,120,122,100,16,2,11,11,14,11,14,13,15,15,15,14,190,194,193,198,200,198,200,200,203,203,205,204,203,202,196,198,197,196,199,194,196,196,196,198,196,202,198,243,250,207,180,194,117,18,5,11,9,14,15,19,16,17,57,150,199,200,201,202,200,197,202,198,195,198,199,201,200,198,198,196,198,196,195,197,199,198,197,195,196,196,195,198,198,196,198,199,194,195,199,198,197,196,195,196,196,195,196,196,197,197,196,196,195,194,194,195,195,197,195,196,200,196,200,204,200,200,199,198,197,195,196,194,195,193,198,203,203,213,215,215,210,202,196,156,139,159,141,133,162,171,146,150,158,105,95,115,120,114,136,141,79,108,189,211,130,62,66,67,106,174,210,194,192,161,121,112,100,123,130,175,193,193,203,194,200,203,191,103,49,57,101,159,165,131,102,87,100,92,76,69,69,69,77,85,87,104,101,91,98,92,94,100,98,115,113,91,93,109,116,120,130,125,110,87,84,71,66,77,67,71,54,64,80,75,97,120,142,165,155,123,132,149,152,152,149,161,147,119,146,145,129,145,146,141,141,154,153,138,142,145,159,181,190,188,186,184,164,167,184,184,171,156,150,130,124,115,118,124,102,103,111,143,123,109,101,90,104,102,107,104,141,163,161,164,182,159,104,88,166,241,118,81,102,94,145,173,174,229,188,94,146,158,107,55,61,77,35,53,134,211,169,83,89,113,97,89,82,149,196,187,159,149,125,174,178,148,213,151,164,158,54,28,56,168,137,68,121,178,134,114,166,92,118,163,95,29,26,133,167,156,89,44,41,38,61,59,51,55,67,57,52,78,51,39,66,80,96,83,57,46,71,54,47,73,46,52,63,65,69,48,33,23,32,70,100,124,135,132,135,118,108,101,87,87,98,110,101,93,99,75,41,18,33,51,43,59,66,45,21,28,36,38,28,12,18,21,22,26,30,25,19,26,26,29,28,29,29,37,32,21,24,29,24,19,29,24,31,33,24,31,30,30,33,30,36,33,45,69,85,79,56,35,22,19,18,21,42,54,55,61,56,50,54,53,53,50,47,46,44,40,44,47,42,39,41,43,40,49,44,45,46,36,39,33,31,33,34,29,31,39,37,41,41,39,43,43,49,51,46,51,52,49,52,54,53,55,53,55,55,52,51,54,54,55,50,49,49,40,38,44,43,42,41,42,41,37,39,39,36,35,39,39,35,41,41,43,38,38,43,43,40,45,45,38,43,39,41,46,44,43,47,44,45,46,45,44,44,44,43,45,43,46,45,39,42,41,39,44,38,39,45,41,44,37,41,39,34,38,35,36,35,33,36,40,38,41,36,34,41,37,41,39,37,41,43,41,37,41,41,41,41,43,40,35,44,37,36,40,39,43,41,39,44,42,43,36,41,43,43,41,68,73,41,42,46,45,81,44,48,48,48,48,48,48,51,49,53,54,85,60,58,57,59,79,60,57,59,59,61,67,68,73,80,86,85,90,84,78,90,99,99,96,106,110,96,106,84,110,111,126,110,101,104,103,115,101,103,102,97,101,108,113,125,152,157,153,147,126,115,109,106,114,125,149,156,160,159,153,146,142,141,125,112,107,98,96,90,97,117,124,131,120,121,123,129,115,113,100,17,3,12,10,16,13,14,13,14,15,15,15,184,190,186,199,201,201,205,202,203,205,208,210,208,205,198,194,193,193,194,193,197,195,196,197,198,198,201,249,250,205,181,199,120,15,4,10,17,27,25,29,40,32,50,158,214,214,222,218,210,213,212,206,203,208,217,215,214,209,204,204,203,202,201,204,202,202,203,202,200,200,201,203,205,204,208,210,205,207,213,206,206,205,200,202,201,200,203,198,203,207,203,202,201,200,204,201,199,203,199,211,214,214,221,214,211,214,209,205,200,193,189,183,184,188,199,206,206,212,209,212,164,121,137,118,123,131,105,106,119,103,86,87,90,71,69,92,100,77,86,107,69,86,167,166,91,67,56,45,74,118,150,123,129,120,99,93,85,93,96,142,155,145,151,141,144,150,155,101,75,95,158,207,165,113,101,117,116,107,108,104,109,107,119,121,123,131,122,123,120,128,130,133,139,135,127,123,132,140,132,112,103,100,86,97,106,100,110,116,105,102,93,116,133,131,138,151,149,138,149,158,157,168,171,161,162,160,155,149,160,162,163,168,155,160,168,184,178,165,170,169,162,163,131,103,98,91,81,98,138,151,165,162,163,151,129,112,119,126,112,120,164,205,185,151,119,88,75,73,75,88,159,198,198,196,225,184,119,117,223,227,88,120,108,110,182,224,242,244,191,84,119,163,131,69,64,125,108,101,163,212,162,87,105,105,109,86,32,100,188,229,221,230,177,212,217,106,140,120,103,127,114,46,72,165,98,81,141,137,130,162,184,196,175,124,82,22,4,53,51,69,49,30,32,17,39,45,44,35,42,38,61,94,55,56,81,92,94,75,63,55,74,61,55,75,54,47,58,55,45,27,23,34,74,118,121,122,123,128,116,99,95,92,96,95,104,117,93,83,72,37,19,15,42,46,38,57,65,47,22,39,53,54,49,21,17,18,22,26,26,28,23,25,24,28,26,27,29,37,38,34,38,37,43,23,19,29,28,31,28,34,28,29,25,33,39,54,76,86,82,48,25,18,18,18,22,41,56,55,51,59,57,55,57,51,52,56,54,51,48,47,46,49,50,44,44,46,48,48,47,45,25,24,21,15,23,21,18,16,25,33,33,35,36,42,46,48,52,56,51,46,54,55,55,53,53,55,55,58,53,54,54,55,54,54,55,53,53,45,46,42,43,44,39,46,36,32,26,22,28,26,26,26,26,35,36,37,38,40,40,39,43,43,37,43,46,50,51,53,55,53,57,50,55,57,53,56,52,52,49,54,57,53,56,50,50,45,44,45,40,43,38,39,41,39,43,37,37,35,36,39,39,37,38,36,36,34,32,34,32,36,42,41,42,41,42,46,40,43,41,42,42,41,39,39,38,37,42,41,40,36,43,43,38,41,44,41,38,41,41,40,42,40,39,39,42,42,41,35,39,42,37,41,45,42,44,49,52,54,44,48,49,48,53,49,50,44,43,45,46,49,48,46,53,52,57,56,54,55,59,61,57,51,38,38,39,35,37,39,38,49,56,57,87,102,113,98,83,98,103,112,108,108,99,105,110,116,127,119,120,125,132,141,149,153,154,154,160,155,145,143,143,136,122,119,132,118,105,95,93,108,112,111,106,105,110,97,114,99,18,4,12,11,15,13,15,13,15,15,15,15,191,188,181,188,187,188,190,183,187,177,180,192,196,200,195,188,189,189,188,187,192,191,190,192,192,193,195,253,249,188,168,213,128,22,33,30,27,31,33,36,46,46,88,195,227,220,222,225,219,207,214,208,203,205,210,212,212,214,214,212,210,217,222,217,211,213,214,215,208,211,215,216,214,214,222,220,208,216,218,217,221,220,215,215,217,214,213,212,221,220,218,222,214,219,218,217,215,215,203,211,214,192,208,214,215,220,214,213,196,184,184,187,194,187,191,179,137,132,144,147,97,66,83,78,90,95,80,88,92,88,76,72,77,82,95,99,100,101,108,105,89,97,119,96,67,75,79,72,88,96,107,117,132,107,97,118,147,152,134,151,145,133,114,100,101,109,126,115,125,148,188,200,173,134,136,134,128,122,138,141,126,117,118,124,130,122,120,131,133,130,133,133,131,144,148,152,153,148,125,98,94,99,97,119,159,149,152,149,120,128,130,151,164,135,128,143,131,115,147,171,168,171,172,170,170,177,178,168,181,184,188,199,178,169,181,194,178,150,141,151,162,132,94,65,69,81,83,137,168,182,189,179,184,162,148,131,127,134,123,138,163,207,219,220,206,189,175,166,160,157,187,194,181,202,220,184,95,121,226,171,98,155,115,122,184,235,236,197,177,108,115,146,149,94,69,118,161,180,130,120,129,111,106,85,81,24,4,58,90,160,200,196,151,166,190,125,132,102,69,178,160,78,154,178,78,108,142,108,174,172,163,224,178,108,65,12,12,35,32,63,44,28,29,17,31,27,32,27,31,40,74,87,57,66,65,71,66,55,64,59,50,38,39,55,46,39,42,35,28,31,54,66,98,116,105,97,101,105,103,88,85,89,95,101,110,117,105,75,35,16,24,41,59,70,51,54,60,35,26,49,63,61,57,32,13,17,18,21,21,24,24,20,24,28,24,21,32,37,39,51,48,42,47,34,24,24,24,29,34,31,29,30,35,44,58,74,77,70,44,24,21,17,16,15,35,54,54,57,57,54,57,56,55,53,53,60,50,51,56,53,54,57,51,48,48,50,48,50,48,35,21,16,21,17,16,15,18,19,17,25,36,39,38,45,49,51,54,55,51,50,51,55,56,56,54,58,57,54,54,56,57,54,53,55,54,55,53,47,44,40,44,44,40,38,23,21,17,19,17,17,19,18,23,22,32,33,39,36,39,43,40,37,30,46,56,57,59,59,61,60,61,62,56,55,63,55,55,56,51,57,55,54,60,49,45,49,44,41,35,42,44,39,41,44,39,39,38,34,46,38,37,34,20,23,21,23,25,21,25,27,35,41,42,44,45,44,49,47,46,49,47,42,39,39,40,39,42,41,38,41,39,42,50,42,38,42,44,45,43,41,43,47,37,44,46,39,42,36,40,35,35,39,36,41,49,55,55,53,48,54,54,53,58,54,53,55,51,55,54,53,58,54,55,61,60,63,59,64,69,61,53,36,23,23,27,28,37,36,27,47,51,54,71,82,87,74,77,100,115,106,98,85,64,67,68,88,98,83,113,127,131,131,126,130,133,150,154,144,136,139,139,129,132,149,152,138,123,117,104,109,117,109,105,98,108,101,111,97,19,4,12,11,16,13,15,16,14,15,15,15,194,180,158,155,145,141,151,151,141,129,128,143,162,184,192,191,192,190,187,188,192,192,195,196,194,196,177,234,242,143,165,215,131,28,33,29,25,30,28,41,42,51,82,176,178,136,150,166,157,153,176,177,152,120,134,146,165,188,194,210,200,202,210,210,200,199,213,216,199,199,215,216,194,171,187,207,185,179,193,187,202,214,209,208,216,220,214,196,192,205,207,213,214,208,210,212,209,208,166,158,139,113,141,152,146,154,176,210,216,219,244,251,245,202,168,108,67,77,84,99,87,82,98,101,118,129,121,129,117,108,117,100,103,107,113,117,110,119,123,125,110,91,94,78,85,118,120,141,139,132,143,151,146,93,94,146,207,213,176,177,162,143,126,108,118,127,151,150,151,195,201,187,177,154,135,126,112,110,136,134,113,95,98,113,116,114,104,109,111,117,110,100,109,121,131,146,140,125,113,100,105,105,97,131,145,127,136,135,120,126,132,152,136,108,102,100,93,93,147,180,159,150,134,138,157,151,144,128,145,170,173,169,145,127,141,150,119,108,107,120,144,127,101,95,127,160,174,196,195,184,179,162,161,146,132,129,136,135,132,133,140,170,209,232,249,249,251,251,249,243,230,200,168,142,146,118,79,120,182,126,94,144,116,118,143,193,236,175,162,120,108,120,149,129,72,67,149,197,116,102,122,131,147,113,84,42,14,55,19,62,96,113,120,114,204,162,181,158,127,205,124,99,198,151,65,128,110,104,191,124,78,162,131,57,36,6,39,122,121,91,62,61,74,63,59,49,41,48,57,61,77,69,46,44,42,48,47,34,46,47,37,32,31,33,26,31,28,22,37,65,91,94,118,108,81,88,85,105,94,77,75,80,106,111,120,122,73,32,21,22,52,68,97,112,74,59,55,31,37,66,69,62,66,51,43,39,24,17,19,23,21,27,22,24,28,21,26,29,30,44,55,50,46,40,27,21,25,33,28,33,31,29,44,63,77,74,54,32,24,19,17,16,17,40,53,54,57,60,61,57,55,55,57,53,54,53,57,55,57,56,50,54,55,55,51,50,46,47,44,29,22,18,17,21,17,20,20,17,19,23,39,42,43,44,50,55,52,56,50,48,57,55,53,54,57,59,53,54,53,59,58,52,53,51,56,53,51,51,50,46,44,46,44,32,18,20,18,21,18,17,19,16,18,25,29,32,34,33,44,46,39,30,39,58,54,57,61,63,61,60,61,60,60,57,60,61,59,54,56,49,52,53,51,52,41,50,45,37,42,41,43,44,38,42,40,38,44,41,45,45,26,23,19,16,21,17,22,18,20,26,31,36,39,46,41,46,50,52,51,53,50,46,41,42,43,41,44,41,43,46,42,43,44,39,43,46,44,43,45,43,41,45,41,42,46,38,30,27,21,24,29,24,27,34,35,42,48,49,52,50,53,56,56,55,55,58,56,59,56,55,59,60,63,63,62,61,61,66,63,50,38,26,29,28,27,35,51,56,65,79,55,53,51,39,57,60,59,83,87,68,53,47,36,62,74,68,77,77,107,119,121,118,106,112,125,132,136,124,121,116,102,107,117,129,142,120,118,129,122,134,134,126,115,124,133,111,130,103,16,4,11,12,15,13,15,15,15,16,16,15,183,160,138,123,108,112,127,143,145,127,118,116,139,181,203,209,206,201,203,204,211,210,212,215,214,214,196,238,224,133,155,205,109,27,46,22,29,33,29,36,42,51,72,130,95,50,60,81,78,91,144,128,89,63,70,89,114,139,156,168,136,116,125,137,129,147,158,162,139,130,149,148,116,87,123,154,113,112,121,114,132,141,134,131,156,178,157,104,111,129,141,158,128,126,134,141,153,160,105,83,87,64,90,78,74,98,133,208,246,249,250,250,250,207,135,96,93,108,107,119,116,122,143,145,159,157,143,150,135,130,139,120,119,128,115,113,112,122,128,128,128,104,115,127,148,182,183,182,163,143,149,160,139,88,90,137,184,178,162,170,151,149,141,131,132,129,145,153,187,192,170,157,143,130,109,102,93,79,107,114,107,99,92,97,101,95,95,103,98,103,99,86,94,112,115,117,120,128,115,104,120,114,99,107,110,99,109,114,96,94,108,131,125,101,97,94,88,97,150,170,143,119,99,100,111,108,103,80,112,141,128,118,95,92,102,105,101,110,125,137,147,136,142,133,141,160,151,165,137,118,118,111,113,107,105,108,112,117,115,107,109,117,128,141,144,146,154,167,178,195,211,216,212,200,202,166,92,151,234,152,92,106,101,127,186,228,229,175,155,141,144,109,133,152,97,31,132,219,128,97,122,147,186,246,241,214,180,159,99,112,141,159,182,137,148,130,192,170,150,203,80,125,207,104,86,151,136,139,153,80,22,63,73,44,29,9,62,142,152,122,90,124,138,125,107,83,77,73,76,83,90,67,41,34,30,42,44,39,41,39,53,53,43,33,29,25,21,39,74,109,111,109,129,112,88,89,97,100,88,91,88,97,114,113,111,74,29,20,19,45,85,97,112,121,114,111,73,23,30,64,80,93,89,69,63,55,33,19,14,22,17,19,27,21,27,24,25,23,25,40,45,53,52,42,36,27,24,26,29,33,30,47,66,77,69,48,28,24,18,17,15,22,53,55,56,57,56,57,55,57,54,59,56,54,60,57,52,56,59,57,57,51,50,50,48,50,49,44,40,24,20,17,18,18,18,21,21,22,17,31,40,44,45,45,49,51,54,50,52,55,51,49,49,49,55,51,52,55,58,57,53,56,55,59,54,53,55,53,56,56,51,50,42,25,21,21,16,21,21,19,19,22,19,22,31,36,44,49,51,45,25,28,48,61,60,61,63,59,57,56,59,59,60,61,57,58,55,51,55,50,48,52,46,48,48,44,41,38,44,42,40,42,42,46,43,48,46,46,48,39,22,19,17,19,16,21,22,17,21,16,37,37,40,44,42,46,49,53,54,50,46,51,48,45,42,43,48,42,46,43,44,44,41,45,42,48,44,45,47,45,47,45,45,42,33,26,20,16,20,22,18,22,25,24,27,36,42,41,44,49,52,55,51,51,50,53,57,55,55,55,56,61,63,60,60,62,63,61,56,42,28,29,29,29,27,37,58,99,142,118,69,67,84,75,74,70,63,69,60,38,38,29,33,84,96,103,111,104,128,129,126,118,112,116,123,118,106,108,106,105,97,92,87,108,106,83,83,109,130,128,125,123,124,134,137,121,141,102,15,4,11,12,15,14,16,15,15,16,16,15,166,149,125,112,114,115,132,162,173,171,159,155,164,193,211,210,208,205,203,202,205,206,206,207,214,219,229,247,237,139,119,138,57,25,39,24,29,32,31,35,38,56,75,122,100,64,68,78,85,104,142,132,113,99,122,106,111,126,120,120,76,57,53,62,71,78,88,100,83,62,67,80,78,61,71,94,77,76,80,69,71,78,72,65,89,101,88,66,63,74,89,86,65,59,59,84,103,109,93,83,77,81,95,92,95,99,103,151,178,178,173,174,181,144,113,108,117,115,119,132,122,124,131,134,140,122,114,131,124,114,123,116,117,122,117,114,111,121,120,122,126,124,147,166,186,223,237,218,190,160,145,149,138,106,112,129,136,130,118,127,119,125,128,123,109,92,109,157,195,177,141,124,119,106,81,77,75,67,93,116,107,85,78,75,79,84,100,105,89,95,86,79,108,130,110,102,126,132,118,103,125,124,103,113,101,96,105,114,112,84,83,107,105,103,101,102,104,103,128,124,100,100,82,81,103,107,107,96,118,131,112,98,92,89,95,110,113,132,149,152,156,150,158,151,107,91,98,95,87,88,82,80,89,91,96,89,96,102,99,97,88,84,69,57,55,53,54,61,69,105,154,200,230,233,245,217,116,134,203,152,121,122,100,161,241,237,193,160,160,158,174,111,119,173,157,73,153,238,123,98,101,99,186,247,250,250,251,251,187,232,204,177,212,107,100,121,148,123,172,160,61,172,163,71,129,201,252,184,95,59,16,28,43,49,38,33,55,59,62,65,56,85,102,88,88,76,72,77,73,69,72,63,52,34,27,42,41,43,40,36,57,69,53,29,20,28,34,69,106,113,114,101,128,112,95,109,88,106,107,105,113,111,125,108,61,28,12,26,49,82,110,102,110,112,118,121,72,30,51,94,111,102,80,61,46,30,26,24,16,15,18,25,24,24,29,27,30,25,27,29,35,51,56,49,48,34,22,27,29,34,50,74,79,57,38,24,20,16,15,19,31,50,59,57,54,57,57,54,54,61,57,59,56,56,56,53,61,57,55,51,58,55,52,54,46,50,47,45,40,24,18,19,17,27,21,19,24,18,19,30,42,40,46,49,52,54,53,56,54,47,51,45,41,49,49,51,54,55,53,57,55,57,59,51,54,58,57,57,57,55,57,51,44,30,21,17,19,25,19,19,18,22,19,30,48,45,52,53,43,35,20,32,55,57,61,61,59,60,50,57,56,55,60,55,58,59,53,53,48,47,54,47,55,53,47,47,39,42,41,43,44,44,47,56,49,49,47,48,40,27,21,18,22,17,21,20,26,21,18,23,29,41,41,42,45,46,48,53,51,54,52,44,45,45,45,46,43,45,48,44,44,43,46,50,48,50,51,51,47,50,52,49,50,42,26,22,21,21,22,21,21,24,23,22,25,33,37,38,47,47,48,50,49,54,54,50,57,56,54,49,53,62,57,60,56,59,57,54,48,31,29,33,33,33,26,38,87,125,149,137,104,126,129,123,134,123,98,98,84,86,100,90,101,120,139,148,152,135,128,131,132,128,121,125,137,134,122,117,119,118,111,101,102,111,100,87,89,116,110,101,107,93,87,105,116,109,128,96,17,5,11,12,14,14,16,14,15,16,15,16,144,122,116,117,120,109,110,130,143,155,171,175,180,174,166,167,164,155,141,126,127,120,116,117,142,154,183,245,236,136,67,62,9,18,36,21,31,29,30,40,41,52,99,149,128,110,116,137,133,141,173,167,165,141,148,141,127,131,122,115,101,102,93,83,77,92,102,103,86,70,69,76,81,85,92,89,80,88,84,87,94,89,89,81,84,95,91,92,96,96,103,107,92,83,81,95,113,120,112,110,110,101,125,122,115,112,89,84,86,85,76,75,86,84,100,109,105,104,109,118,112,109,111,107,111,97,101,113,107,109,115,116,116,121,118,123,118,116,111,102,127,145,151,159,199,226,234,234,233,213,197,190,165,122,111,117,129,122,100,95,95,100,110,113,108,94,141,184,191,152,105,119,119,105,87,88,102,108,127,140,122,98,83,92,107,108,117,122,99,100,101,98,133,153,124,118,142,146,122,108,127,128,126,124,112,114,127,142,136,113,99,103,108,99,103,112,109,104,104,96,82,84,100,110,123,135,126,119,127,119,111,110,100,96,110,116,123,152,156,154,153,152,157,155,108,67,81,101,92,93,95,93,97,97,95,95,101,101,103,94,94,85,67,71,81,84,73,67,57,86,108,131,147,133,165,172,125,81,91,126,139,131,113,115,160,143,88,76,74,73,86,63,93,151,172,88,98,119,103,119,79,38,82,222,252,252,251,251,232,186,148,110,166,137,124,145,108,126,188,91,83,188,81,87,173,200,249,160,84,38,2,40,54,51,45,44,44,36,39,43,41,41,53,59,57,61,55,46,42,41,47,45,38,32,28,36,35,29,33,40,55,46,33,28,27,31,55,95,100,105,110,100,101,87,100,98,98,112,106,118,116,124,112,56,24,14,29,60,87,115,111,100,101,113,111,115,100,77,109,116,99,71,44,30,26,24,22,18,15,17,24,24,22,27,33,24,28,30,26,28,29,42,51,51,56,46,27,22,32,55,76,73,54,33,21,19,20,17,21,41,56,60,60,53,57,55,56,55,55,56,56,57,54,52,52,54,57,56,57,55,53,53,55,55,49,51,45,43,37,20,22,18,17,24,20,17,19,24,21,31,39,41,46,47,47,57,59,53,55,53,38,31,37,47,52,50,60,55,53,56,54,56,51,56,56,59,56,53,58,54,53,53,39,19,21,18,19,19,22,22,18,19,24,38,42,50,49,46,31,22,20,29,57,60,61,58,56,57,54,57,60,51,59,59,53,54,51,53,53,50,52,53,50,47,40,38,44,44,41,41,45,50,48,50,51,50,52,45,41,28,21,22,19,19,21,20,20,19,21,20,34,43,42,46,49,49,49,54,57,49,46,53,46,46,45,44,45,41,47,46,47,52,51,48,50,51,51,58,55,59,57,55,51,36,27,20,25,21,20,21,21,22,21,23,22,35,39,39,47,48,43,47,48,50,52,49,52,48,50,54,57,55,55,59,56,56,59,53,40,30,29,36,33,27,26,66,112,127,131,119,120,126,128,136,144,139,116,119,130,138,148,144,149,153,146,145,138,124,124,120,123,125,125,136,153,155,138,129,125,125,125,123,127,137,122,121,117,116,114,96,97,75,64,86,84,85,110,88,20,5,12,12,15,13,15,15,15,16,15,15,117,109,117,117,117,106,93,87,79,90,107,129,133,114,102,110,119,110,85,64,53,44,46,46,69,75,131,239,239,128,55,58,12,13,33,34,31,25,36,44,43,52,93,154,153,149,149,165,165,164,174,168,163,115,137,143,128,128,121,126,129,140,139,145,141,146,149,146,127,92,91,104,114,113,112,116,102,105,106,108,117,109,107,111,116,105,114,126,122,125,135,132,125,130,120,126,129,127,119,116,118,128,141,123,108,102,102,98,90,77,69,84,94,95,105,105,108,110,110,112,106,110,114,109,110,110,107,111,109,112,119,113,115,116,116,122,125,126,124,114,122,140,137,165,203,214,214,227,252,252,246,236,200,149,120,113,128,129,106,97,94,98,118,139,147,153,195,215,180,136,132,144,142,143,150,156,170,170,163,178,168,163,166,168,169,141,136,152,141,151,155,137,139,144,129,134,152,147,134,127,139,141,139,136,125,127,125,137,142,127,127,112,99,103,111,121,120,109,110,103,98,119,125,123,145,146,126,112,109,107,106,112,104,117,146,133,125,156,159,154,151,149,153,161,130,74,87,107,114,112,106,106,105,103,101,98,109,110,104,108,118,151,134,93,106,107,100,109,99,98,92,91,84,45,93,141,151,107,45,51,66,72,48,37,66,69,53,46,35,33,21,11,26,71,146,101,41,11,84,158,83,35,9,28,127,205,219,244,177,131,139,126,203,156,148,154,95,167,143,46,133,166,133,183,129,73,139,103,45,22,20,61,53,41,32,29,30,30,32,29,27,29,34,33,37,34,33,30,23,29,27,27,25,20,27,27,21,27,27,31,37,41,30,40,42,47,69,90,104,108,112,94,95,93,87,99,100,110,115,116,125,99,50,21,15,32,67,99,98,106,109,93,95,107,116,112,101,94,111,76,40,36,26,26,17,19,15,21,23,21,22,21,24,27,26,29,28,25,33,28,31,31,46,57,55,62,40,29,52,75,71,44,29,21,21,15,18,29,42,55,64,61,57,55,53,58,58,56,55,55,55,60,51,55,57,55,54,57,59,55,59,55,56,51,51,50,47,37,24,26,23,17,21,22,16,21,19,19,21,34,42,39,43,48,52,55,54,54,55,44,35,34,37,53,57,59,61,57,57,55,56,57,57,57,55,57,57,55,55,59,57,47,25,19,25,21,18,21,19,19,19,18,24,35,44,49,51,34,22,24,17,44,58,59,63,55,59,59,57,60,52,56,56,51,61,53,58,60,54,59,53,52,51,47,46,43,42,41,43,42,48,53,50,49,47,48,50,49,36,21,22,23,20,19,24,25,18,24,22,24,36,45,45,45,51,51,52,50,53,55,50,54,47,48,44,45,49,46,51,50,53,57,53,45,49,49,56,57,57,57,52,55,46,34,20,24,28,17,20,27,22,23,26,24,34,36,38,44,48,47,50,50,49,50,54,51,46,49,54,53,55,60,57,62,60,58,61,52,33,34,38,39,36,21,39,104,129,113,117,110,107,99,94,107,99,107,92,101,124,134,141,131,146,149,128,111,119,119,122,124,119,122,133,141,148,142,125,131,134,127,122,121,143,150,128,121,114,122,119,108,111,87,84,83,78,81,106,94,19,5,13,12,16,13,15,15,15,16,15,15,95,110,121,122,122,110,103,80,67,63,72,83,92,97,96,127,164,165,144,130,110,95,96,92,88,80,141,245,246,142,102,126,50,19,29,31,33,26,32,34,39,50,93,157,170,180,180,178,178,175,162,145,139,110,134,150,129,122,118,127,136,141,150,160,166,172,170,174,159,128,111,116,121,118,114,112,118,113,107,114,111,107,116,117,113,101,105,126,123,130,135,136,137,143,134,126,122,114,113,112,119,132,141,121,98,95,105,102,103,95,86,105,111,107,107,101,103,112,115,117,112,115,114,111,116,117,110,111,119,111,118,121,110,110,116,128,126,126,136,139,131,129,129,136,151,150,144,152,179,199,216,221,213,197,182,146,149,125,109,112,114,114,124,136,171,226,226,215,179,161,171,176,183,186,192,209,205,194,171,173,164,198,228,222,179,138,140,168,184,182,169,132,111,111,103,105,128,139,135,130,134,125,122,127,131,124,113,113,110,112,127,115,100,104,117,128,124,117,112,118,125,125,131,130,143,148,112,100,98,102,122,118,120,159,215,190,139,155,152,157,153,146,158,161,147,96,73,98,118,125,109,106,108,131,137,111,126,127,125,126,173,251,234,123,97,105,110,140,135,108,101,113,91,108,186,200,207,173,65,18,10,20,10,21,120,169,139,133,132,140,110,45,21,27,122,151,154,115,156,188,117,96,63,23,20,78,95,152,172,153,198,190,212,146,160,139,108,180,97,102,184,192,198,171,92,18,25,46,45,44,49,57,38,26,23,23,27,29,25,24,27,27,27,27,30,29,26,31,24,22,22,21,26,19,22,21,20,25,28,26,29,44,51,61,53,53,68,95,108,106,105,91,98,90,98,112,105,119,117,118,96,45,21,12,39,71,108,109,87,101,92,90,71,80,101,107,103,90,96,55,33,38,51,35,17,24,36,48,34,19,18,18,27,29,29,29,28,29,28,31,24,27,38,51,60,66,59,50,59,61,39,24,19,17,18,22,30,51,58,67,64,57,59,58,55,55,57,55,59,53,57,56,55,60,60,57,55,57,53,54,53,52,56,51,52,46,42,33,19,28,26,21,18,23,24,23,25,18,29,43,39,46,50,49,55,54,54,52,44,38,30,32,42,56,63,59,57,59,63,59,57,59,59,55,53,56,56,61,59,57,53,33,22,27,21,22,23,23,21,19,26,20,32,42,47,48,43,27,21,21,22,49,63,60,64,61,60,61,59,59,57,60,57,53,59,60,60,61,57,59,55,57,57,49,48,44,45,42,48,47,48,56,51,51,54,53,48,38,28,19,19,23,16,18,20,22,18,19,20,25,41,43,43,49,50,52,50,51,51,50,55,54,50,50,51,51,52,48,56,58,55,54,48,49,51,53,53,54,55,55,55,53,38,25,21,25,23,18,25,23,18,27,27,24,30,40,41,43,49,45,52,54,48,54,53,55,55,55,56,59,59,60,63,65,60,63,61,43,39,38,44,39,38,22,69,142,131,120,128,117,98,68,66,71,71,90,67,78,115,128,128,122,132,136,123,121,117,115,134,140,130,130,146,143,140,133,116,129,122,114,118,122,149,151,123,121,120,122,130,126,133,114,110,109,95,106,117,91,19,4,13,12,15,14,15,14,15,15,15,15,92,106,112,116,122,114,108,96,78,75,73,79,89,86,94,144,206,218,204,196,181,162,150,132,129,124,181,248,249,159,156,211,104,37,33,30,34,29,36,28,37,45,79,133,139,167,169,165,164,153,139,116,120,118,139,139,120,115,120,129,141,132,125,134,137,148,139,155,159,128,117,110,103,93,99,111,102,105,106,106,109,108,108,111,111,89,93,113,125,131,128,124,129,132,118,114,116,115,118,120,127,129,113,107,111,108,105,107,123,112,104,112,108,107,111,111,121,126,131,142,131,121,115,108,104,113,112,111,115,104,120,114,107,112,117,131,121,107,119,143,149,136,101,91,96,84,84,89,108,136,164,183,198,224,237,222,182,148,125,113,111,111,104,100,163,241,230,181,159,160,174,180,186,191,184,191,199,178,139,140,125,160,202,195,142,103,118,156,181,161,135,108,98,93,86,110,120,117,118,120,120,94,101,121,121,119,90,82,84,84,112,114,125,133,131,135,129,112,116,122,113,112,113,110,129,118,89,90,106,135,152,150,152,190,247,226,150,154,152,155,159,145,153,157,159,116,71,83,105,120,118,113,145,181,177,137,135,134,122,126,162,239,226,126,94,101,132,167,170,117,78,96,92,118,176,233,231,161,131,88,77,63,55,57,127,208,177,182,158,196,192,96,81,24,94,172,179,172,206,201,126,116,105,89,83,57,57,156,157,158,165,150,199,153,169,114,139,182,125,196,222,221,189,118,68,10,12,30,63,71,51,34,23,24,23,27,29,25,29,30,27,28,27,29,28,28,25,24,24,18,23,22,26,20,23,25,24,24,25,29,29,47,67,78,56,53,83,109,105,91,97,77,81,98,103,115,108,117,120,77,42,16,15,39,72,114,125,110,98,96,99,104,71,71,96,103,113,102,107,88,86,107,103,68,26,25,49,51,29,22,24,22,25,24,31,28,28,29,24,26,27,29,31,46,61,66,76,61,46,27,15,19,18,19,26,39,55,57,63,68,61,59,62,57,56,60,57,59,59,60,66,61,59,61,57,63,63,57,56,53,53,55,57,51,45,41,45,30,23,21,22,25,19,21,24,27,23,27,39,43,42,50,54,53,50,51,54,39,39,36,25,35,40,51,58,60,60,60,61,57,61,62,57,55,56,55,55,55,56,57,45,27,23,24,25,27,18,19,23,21,20,30,39,42,46,46,42,28,31,24,37,63,63,64,65,61,57,57,57,61,55,56,57,58,65,63,58,54,58,57,56,60,57,54,50,45,51,51,55,53,50,54,51,52,48,51,51,37,26,19,23,22,19,23,19,17,25,24,20,35,43,43,48,49,50,53,57,55,57,55,51,53,60,62,54,56,58,57,54,48,53,46,49,55,52,53,52,55,56,57,51,42,31,22,25,30,21,22,24,19,24,22,21,23,31,46,47,51,52,56,54,53,60,60,60,56,64,68,68,66,64,65,66,65,60,59,53,44,41,45,38,46,44,44,125,160,141,140,148,133,115,76,68,85,84,105,77,96,128,127,136,127,137,136,131,135,131,132,145,148,141,135,132,113,125,130,108,123,118,125,134,137,151,148,143,147,141,134,132,137,142,127,125,124,121,110,109,84,19,6,12,12,14,13,15,15,15,16,15,15,91,97,103,118,122,119,119,104,93,82,84,77,78,85,71,83,128,143,140,145,152,147,137,134,148,176,229,251,248,142,170,244,158,62,25,17,33,34,44,49,39,46,67,91,88,118,123,104,108,101,86,89,95,94,108,110,99,97,100,107,101,100,98,88,93,83,91,109,117,104,95,93,81,92,98,96,99,108,110,108,110,112,109,114,115,100,103,122,134,127,111,113,112,110,107,112,122,123,121,122,140,124,105,120,127,128,127,124,131,127,119,126,129,120,125,133,134,141,148,149,137,129,118,110,99,105,109,100,110,94,110,111,103,115,117,136,117,103,103,120,131,131,104,83,84,75,77,78,84,102,124,137,139,151,178,210,207,208,181,162,116,98,77,63,136,192,169,122,124,129,137,145,134,132,122,132,132,130,117,121,93,98,127,130,107,95,106,122,136,130,113,105,101,118,125,129,121,101,102,104,109,86,105,113,101,88,72,73,62,72,85,98,126,137,135,130,118,117,125,122,110,109,116,104,99,97,89,117,141,165,178,170,168,184,220,188,147,165,156,160,160,142,149,150,168,147,83,71,79,105,113,137,171,181,184,156,135,112,104,98,95,165,149,122,107,121,152,171,183,145,78,76,81,107,127,139,160,169,221,156,153,166,118,95,95,162,199,165,132,152,174,130,71,10,50,139,145,123,189,165,74,83,75,79,90,78,97,120,139,136,88,141,185,162,137,83,151,159,168,235,193,229,170,59,38,2,31,60,57,54,32,25,32,35,29,27,29,28,30,33,29,25,32,24,27,26,24,24,19,24,22,19,25,24,25,24,26,25,27,32,42,66,74,80,60,79,106,98,92,85,84,64,82,102,103,105,101,110,75,30,18,21,49,84,116,124,127,100,95,107,103,156,114,88,100,107,117,112,119,105,124,139,145,101,39,33,34,33,26,30,29,21,24,29,29,35,34,31,24,23,28,27,26,43,61,68,81,79,46,21,19,17,17,27,50,55,61,67,62,66,63,61,60,61,63,61,65,61,62,64,62,62,62,60,53,55,55,51,57,54,54,55,55,53,45,46,39,23,17,20,23,21,19,23,26,24,22,30,43,45,51,53,52,51,53,54,49,44,36,33,29,34,43,52,57,60,56,55,60,58,57,60,60,57,58,53,49,52,54,50,41,28,19,23,21,19,17,19,22,21,23,31,42,45,48,44,39,37,34,27,46,66,62,61,65,59,55,56,53,55,54,54,54,60,56,59,60,61,56,55,61,57,60,61,50,52,60,55,53,55,60,54,50,53,53,47,49,35,19,23,17,19,25,19,19,25,24,21,30,37,46,46,47,57,57,52,54,57,55,52,58,64,57,56,55,58,59,55,49,45,44,44,48,54,53,57,61,57,55,51,50,35,25,26,21,25,22,24,21,17,22,23,27,25,43,57,54,58,61,61,63,60,55,61,60,61,69,66,63,63,61,61,60,62,59,55,49,41,42,33,37,48,57,91,145,142,127,139,141,141,127,111,121,122,114,143,127,116,122,130,135,112,132,145,131,139,132,132,151,149,136,113,101,84,105,125,111,118,120,137,143,130,129,132,144,162,153,141,130,121,128,113,116,111,105,104,102,86,19,4,12,11,15,13,15,14,15,15,15,15,110,113,112,122,124,126,129,118,110,108,107,103,104,100,95,94,92,87,67,76,101,98,98,103,142,195,232,252,246,105,128,237,157,39,27,18,28,37,63,78,48,45,67,83,72,91,81,66,77,67,74,77,83,82,87,92,90,92,89,89,93,86,86,87,83,87,78,85,97,92,96,97,102,101,96,96,96,104,104,98,101,111,111,124,127,109,119,124,133,119,100,106,107,107,101,105,124,120,117,120,136,136,129,136,121,125,129,120,131,135,141,144,135,117,117,117,128,136,138,147,131,134,131,124,113,114,129,125,131,110,121,116,107,116,113,122,116,110,99,118,151,171,147,123,110,96,98,94,97,92,103,115,103,85,110,178,212,229,233,218,182,146,78,47,86,142,107,67,93,99,104,106,108,110,96,87,91,100,96,96,88,87,89,105,107,109,112,97,110,117,113,109,124,136,127,131,100,82,88,98,113,97,108,102,84,78,63,78,69,61,74,66,86,112,122,127,120,124,134,120,113,123,120,116,112,116,133,152,175,184,186,184,171,164,168,160,155,173,165,166,168,153,150,139,167,176,106,68,68,93,113,129,150,150,147,139,130,118,105,91,96,107,109,119,145,160,173,174,186,174,114,86,94,121,125,131,107,189,240,185,245,216,152,104,109,185,199,172,148,161,135,81,66,9,45,159,165,135,181,131,42,42,38,60,66,69,89,122,144,106,113,171,171,139,76,105,148,127,189,157,85,147,113,27,7,28,67,57,38,34,33,31,27,39,42,35,32,30,27,28,28,29,23,27,25,24,30,21,22,23,20,22,23,21,29,26,24,26,27,43,59,84,79,63,57,87,98,98,91,73,78,63,89,110,98,97,97,62,29,17,27,59,98,122,122,116,110,92,97,89,104,129,88,97,105,106,117,105,110,108,117,120,129,133,86,42,33,37,42,45,43,36,22,25,33,31,32,30,26,28,27,30,34,51,67,73,97,103,66,42,51,39,41,51,56,61,63,66,65,62,61,60,60,57,57,60,58,63,59,56,60,53,55,54,50,54,47,49,55,51,46,53,53,52,48,45,37,19,19,23,22,23,19,19,18,19,30,40,50,49,47,57,50,53,57,55,50,42,39,34,38,37,44,54,54,57,57,61,58,57,64,53,56,61,59,55,52,51,50,48,32,20,23,19,17,22,19,21,22,20,27,42,46,49,48,38,29,30,29,33,53,63,59,61,61,53,55,54,50,49,54,56,57,57,58,57,55,60,62,59,53,59,63,59,61,59,57,59,61,58,56,55,53,53,49,49,46,40,20,18,25,19,21,15,17,24,27,27,33,43,44,53,57,57,57,57,57,55,55,58,62,58,59,58,55,50,53,50,41,39,40,47,49,52,60,60,60,61,59,51,49,33,18,22,24,25,24,22,16,21,27,27,27,39,51,55,59,61,59,62,61,57,60,57,62,63,60,64,59,66,62,58,62,59,60,57,45,47,44,43,77,77,74,107,122,105,106,120,114,114,124,127,152,142,143,165,127,127,122,117,117,102,132,139,118,120,116,122,139,143,137,124,117,107,118,131,121,113,99,107,113,105,100,105,125,145,145,131,119,98,93,90,98,112,103,95,108,86,21,6,11,12,15,13,15,14,15,15,15,15,116,123,117,119,112,117,134,136,138,139,142,141,145,150,151,167,160,137,113,100,107,104,103,101,111,139,196,250,237,79,88,180,74,31,33,17,25,26,65,82,46,45,75,89,89,93,79,78,85,86,89,84,84,87,89,92,93,88,87,94,88,93,98,91,96,90,90,90,93,96,107,131,126,111,98,94,94,91,92,87,96,105,108,123,122,106,109,112,115,112,97,104,106,101,101,107,118,117,117,115,125,128,134,136,103,103,106,104,117,135,136,123,125,108,105,110,122,133,140,139,131,136,126,135,131,139,150,155,160,134,137,119,106,112,105,109,107,107,113,159,189,189,166,139,113,116,141,129,111,108,126,134,113,116,112,156,181,197,232,234,238,215,127,101,132,133,113,69,66,63,71,117,140,128,121,125,104,90,99,108,96,103,118,117,116,121,108,93,105,105,104,107,112,123,108,102,81,81,111,127,123,103,118,112,93,79,74,74,71,73,64,57,58,71,107,128,122,133,128,107,102,112,125,129,127,133,156,172,179,182,178,171,170,164,167,160,159,171,170,169,165,166,163,121,126,156,124,102,96,110,111,112,118,111,112,103,107,120,129,117,107,111,102,109,143,170,181,162,169,169,130,117,125,129,129,137,128,225,222,191,251,213,163,103,128,214,201,136,126,137,103,68,50,5,66,205,173,143,189,100,26,66,78,122,129,125,142,142,140,89,146,178,146,126,102,179,196,154,148,78,5,37,34,32,57,55,56,35,29,28,23,37,28,31,43,43,39,33,29,26,33,25,30,27,23,29,20,24,24,24,24,22,26,29,31,23,24,27,35,53,70,100,88,60,41,75,97,87,78,70,86,82,104,120,109,89,50,27,25,29,62,98,111,120,116,104,106,84,90,90,83,85,35,56,68,85,97,94,100,84,110,103,123,141,116,75,38,48,57,57,56,51,33,20,31,38,26,27,29,25,26,44,61,58,55,74,103,94,54,62,79,60,59,59,62,67,68,69,62,56,55,59,55,59,56,53,56,53,53,50,57,56,51,55,54,55,53,47,59,54,51,57,55,50,54,53,30,19,22,24,20,19,23,20,22,22,25,48,54,53,54,53,54,56,56,55,48,42,42,33,38,47,46,50,58,57,55,56,57,59,59,54,53,58,58,53,52,53,50,41,22,22,19,21,21,24,21,20,22,21,31,44,50,53,42,36,31,29,32,29,52,59,58,59,60,55,55,55,53,49,50,52,54,59,54,56,46,48,55,59,59,54,54,56,59,55,59,55,56,57,57,57,53,53,52,55,51,28,19,21,20,19,21,24,19,18,21,24,39,48,50,58,57,53,59,61,57,55,60,59,58,58,57,60,57,60,57,44,46,43,47,54,47,60,57,60,57,57,57,52,47,25,20,21,22,27,21,15,23,19,21,26,24,43,59,59,56,61,61,58,50,49,57,61,60,61,61,58,59,63,63,60,57,57,60,54,42,58,101,140,162,140,124,116,102,83,96,100,78,84,95,120,132,131,134,132,105,97,92,92,113,111,127,111,97,113,112,122,139,152,150,145,146,135,127,119,129,122,82,78,97,103,99,101,108,125,117,111,108,92,83,81,92,102,101,95,107,95,19,5,13,11,14,13,14,14,14,15,15,15,116,118,105,112,104,110,130,130,136,141,144,137,143,146,168,205,204,191,153,129,123,118,119,113,120,116,171,250,233,78,69,84,11,37,35,16,29,26,39,45,46,57,89,113,103,111,98,92,100,101,101,89,93,94,98,105,95,92,89,92,96,88,96,94,91,92,93,100,96,91,107,133,125,115,110,100,89,88,101,105,98,106,115,116,101,90,111,111,113,108,101,116,113,115,111,111,117,111,110,102,106,106,110,120,97,100,108,107,110,111,111,111,113,105,116,121,128,137,126,127,113,103,101,110,116,113,118,124,127,115,116,103,100,103,100,104,98,104,112,141,157,150,128,110,106,128,163,137,110,115,132,134,112,106,122,143,159,169,176,178,191,242,218,208,167,159,189,139,131,81,57,112,151,157,155,159,137,125,128,137,133,122,129,132,117,120,114,85,89,94,80,81,90,92,83,78,77,97,124,130,108,108,119,100,98,91,98,99,84,81,75,62,51,60,95,99,106,120,126,107,101,121,113,112,120,142,165,171,177,172,168,170,169,168,167,168,167,166,169,167,164,166,178,114,75,123,131,130,125,138,116,103,102,91,94,77,78,101,123,106,108,107,98,106,113,132,143,139,133,125,120,129,133,120,127,118,158,252,201,215,251,221,166,123,201,221,136,79,110,184,137,49,42,1,78,184,131,152,198,101,42,95,132,166,159,155,146,177,141,104,170,145,154,170,172,230,245,158,92,42,1,19,36,65,71,46,35,24,24,29,26,31,34,33,34,40,44,37,30,29,26,26,26,24,24,23,23,23,23,26,24,29,29,25,28,27,25,27,43,54,82,108,100,77,54,76,77,78,77,81,94,89,119,128,102,51,17,17,37,66,105,116,114,108,106,108,95,85,92,78,118,103,17,14,30,69,87,90,69,53,88,98,119,131,102,80,59,53,54,62,67,63,47,27,22,32,33,33,33,26,46,57,54,41,31,55,61,55,39,59,81,68,63,59,62,71,70,61,56,56,55,56,57,53,58,55,49,54,53,55,53,50,56,60,57,59,59,57,55,54,59,54,55,57,51,42,28,20,17,21,23,21,21,16,24,19,32,51,52,52,50,54,53,50,54,53,49,39,32,29,35,54,54,55,50,53,57,55,56,52,54,51,56,57,47,52,55,53,48,36,21,18,24,26,21,21,24,23,23,24,34,50,52,51,39,35,34,37,29,36,52,55,52,59,59,53,53,50,52,45,48,49,51,48,50,45,30,38,47,53,58,53,53,51,54,57,50,54,54,58,55,53,57,52,50,52,42,25,17,22,20,21,24,22,20,15,24,32,46,52,55,63,53,57,57,59,56,55,62,59,61,65,65,65,59,61,55,45,46,45,50,49,54,59,59,62,62,60,58,53,37,19,19,22,23,19,17,22,25,19,20,26,37,56,60,59,57,65,62,50,48,51,58,58,59,59,56,59,60,60,60,63,63,55,61,49,43,116,162,184,187,160,148,113,85,79,81,87,73,77,90,102,121,109,106,100,88,77,53,71,99,113,117,100,89,107,125,134,141,153,148,131,136,133,114,108,147,148,106,101,108,114,114,113,105,112,113,108,113,108,106,96,89,80,81,84,101,89,19,5,13,12,16,13,15,15,15,16,15,16,155,141,112,120,116,110,111,107,111,113,118,109,108,112,120,158,169,166,135,122,122,118,116,122,139,133,196,250,228,71,72,64,4,49,28,26,31,23,36,41,44,55,103,122,113,119,112,111,110,116,116,112,108,110,116,123,124,117,115,110,113,115,115,107,107,110,119,125,113,113,107,117,117,116,111,95,96,103,113,103,107,112,109,110,89,95,117,120,114,114,118,126,125,121,126,125,122,112,90,91,94,80,92,110,99,106,117,113,101,107,104,109,116,107,117,114,108,107,115,111,92,90,92,95,82,83,84,91,105,104,108,98,102,98,97,99,92,86,88,111,110,111,108,111,97,114,147,124,108,99,92,89,69,69,86,99,149,193,190,139,149,234,201,157,88,98,202,230,225,179,149,150,145,117,131,139,141,146,148,137,128,118,122,124,105,110,106,90,102,98,95,91,95,102,82,77,80,84,111,116,91,93,97,84,79,99,135,131,120,108,88,79,68,81,98,95,95,124,137,120,125,118,99,116,143,158,171,171,163,170,171,170,169,164,168,168,168,170,168,166,163,166,173,141,94,112,127,122,128,136,131,117,109,111,103,78,67,78,96,93,95,97,102,105,96,84,94,107,97,99,102,117,128,112,129,123,202,252,212,242,251,243,174,160,225,192,85,64,169,211,177,62,32,7,93,204,143,181,210,125,80,101,153,155,136,162,163,179,120,144,171,150,181,196,185,165,188,136,71,19,5,49,61,64,43,31,33,29,29,27,32,33,31,33,32,33,43,44,43,26,28,33,24,24,23,24,23,26,23,29,27,27,30,21,26,26,29,39,51,61,82,105,103,86,66,71,75,87,91,99,98,97,129,93,41,19,16,42,78,106,119,114,101,107,104,105,99,83,96,99,140,146,40,14,31,69,95,84,78,74,93,98,124,114,67,70,96,97,57,57,67,68,61,39,26,23,33,33,35,51,60,48,30,22,20,35,59,55,50,81,83,65,59,62,63,69,69,61,53,50,53,56,55,52,55,49,55,57,55,51,50,54,54,58,53,61,59,55,60,56,53,53,53,54,53,44,26,19,21,18,19,17,17,20,22,28,39,50,50,49,49,47,53,51,57,57,47,38,29,35,45,50,50,56,53,56,55,51,56,59,55,53,53,56,53,54,53,50,44,30,18,19,19,16,21,19,18,21,18,29,48,51,50,50,40,35,39,36,31,41,55,49,51,53,51,45,35,36,39,39,37,36,32,33,28,26,25,25,34,43,56,53,50,56,53,53,51,53,57,49,56,57,50,52,54,46,37,24,15,19,16,24,24,19,19,22,20,31,54,57,54,59,57,57,56,57,59,60,58,56,62,61,61,63,57,61,50,43,46,42,53,56,54,59,57,61,62,59,57,45,23,21,17,19,21,19,25,21,20,21,23,34,50,60,59,60,64,63,60,47,41,54,57,57,57,58,59,54,60,62,57,59,66,60,63,43,58,126,159,156,146,131,105,81,56,44,84,107,78,77,99,129,122,115,110,105,113,91,83,79,93,107,105,95,98,122,130,141,145,150,136,118,110,106,110,118,165,172,138,124,109,104,105,105,108,122,121,119,125,128,116,99,93,74,71,59,69,75,21,7,12,12,15,13,14,14,14,14,15,15,198,162,136,132,130,120,119,111,101,120,135,117,121,119,100,114,117,120,115,122,118,115,116,127,151,150,214,249,229,71,69,49,13,52,21,35,31,26,43,40,45,54,104,120,105,113,115,117,115,125,126,115,118,110,115,128,132,137,123,118,120,123,128,115,110,109,119,128,127,125,115,109,105,105,115,112,104,109,115,112,110,117,116,109,100,104,125,129,130,123,121,132,130,129,130,121,125,113,100,95,81,80,93,104,97,98,103,103,90,85,89,95,104,98,113,97,88,92,103,122,105,113,116,115,110,105,112,119,128,128,120,118,113,108,114,102,103,101,104,112,109,122,122,111,100,104,115,111,110,85,62,77,78,63,55,51,97,157,177,151,143,151,127,97,24,28,121,181,237,237,221,201,156,109,84,78,84,113,121,107,102,90,102,106,91,98,112,112,119,127,105,99,101,95,111,101,77,85,101,107,84,98,103,77,81,84,121,134,127,117,96,88,77,91,126,121,145,159,146,131,123,129,126,147,163,161,158,156,148,152,160,160,159,160,160,159,163,163,162,157,153,157,173,158,126,114,103,118,127,135,128,113,106,102,89,72,84,92,106,91,77,84,86,87,68,60,62,80,91,87,92,107,105,112,118,139,242,252,228,227,244,240,156,157,220,155,47,32,97,191,175,69,36,21,145,253,185,159,180,143,107,115,182,158,151,179,182,139,96,172,174,168,180,164,110,42,86,83,34,19,36,66,56,38,29,27,32,36,35,35,34,33,33,32,32,31,28,46,46,35,32,21,31,25,23,32,25,25,26,31,23,24,28,24,23,27,38,50,57,80,101,95,81,63,69,91,94,94,93,117,112,88,70,30,18,24,50,87,111,117,109,100,105,98,105,104,88,97,106,94,160,177,90,66,52,74,87,88,93,97,107,106,115,96,78,92,113,136,94,49,63,67,71,54,31,21,24,43,52,51,39,23,21,22,16,24,41,60,67,91,84,63,67,61,69,70,61,54,55,49,55,55,56,53,48,55,55,52,53,55,55,55,55,58,63,55,56,59,53,55,53,53,53,54,49,39,24,17,20,17,18,18,19,19,32,36,39,49,44,48,41,39,42,42,46,43,38,29,35,31,36,48,44,49,55,58,57,56,57,55,54,50,53,56,53,51,49,47,44,27,19,19,16,19,19,21,19,16,27,41,46,53,50,45,43,34,37,32,32,54,59,57,54,55,37,19,18,16,17,15,20,21,17,19,17,20,20,15,32,50,55,53,50,52,48,52,53,50,51,51,53,54,55,47,51,46,26,21,21,17,16,19,22,18,19,23,29,43,60,51,52,57,54,59,53,57,55,59,59,50,57,57,57,61,61,50,42,38,38,40,46,56,49,56,59,57,61,58,54,38,26,17,17,22,19,21,21,27,21,19,26,44,63,65,61,66,60,57,56,41,46,55,60,64,59,63,61,62,66,63,69,70,64,65,55,38,92,146,139,126,114,86,73,68,51,62,113,125,69,63,96,105,112,115,118,105,100,90,93,93,86,84,83,101,112,118,120,127,136,145,126,101,101,101,102,99,137,156,133,110,76,64,65,77,92,104,111,116,126,122,114,95,91,83,53,39,34,43,24,8,14,13,14,13,14,15,15,15,15,15,171,155,144,131,126,128,131,126,115,122,141,130,137,139,115,109,101,117,121,131,128,120,126,135,147,147,216,250,228,74,70,43,12,50,19,29,30,29,44,43,47,56,101,117,92,96,108,111,108,119,115,108,108,101,100,103,113,117,113,98,96,110,115,111,101,89,93,108,112,109,102,109,105,110,121,117,119,114,117,118,122,118,112,118,107,122,136,133,130,115,121,126,124,126,126,120,109,113,103,98,92,84,111,117,97,90,87,87,77,80,71,73,93,94,104,101,95,93,110,115,131,145,145,151,145,147,147,150,134,133,130,126,125,115,103,107,121,122,117,115,118,122,126,122,112,107,102,102,109,102,101,121,111,79,56,51,56,78,93,104,126,121,117,147,127,51,35,112,153,164,220,195,194,181,138,104,83,88,89,74,83,73,86,110,103,115,122,118,123,130,113,94,72,79,115,104,92,84,104,121,108,125,127,118,96,79,82,99,120,113,123,123,96,120,139,158,163,131,120,117,131,149,149,162,165,149,148,146,135,143,152,155,157,152,151,152,148,147,148,148,145,152,170,162,140,122,108,105,111,111,115,100,95,105,74,69,99,122,110,68,56,51,53,61,53,44,55,79,88,92,88,94,84,89,107,179,234,252,229,213,240,226,163,188,221,148,40,6,12,46,84,40,31,40,159,245,140,104,122,132,111,144,203,163,161,176,174,100,137,210,193,182,154,118,70,14,8,27,39,53,68,48,26,29,27,27,29,24,40,39,35,36,29,33,29,29,29,25,44,47,34,24,22,26,24,27,27,28,27,24,24,26,27,27,28,31,49,54,70,93,99,73,62,72,82,101,103,107,106,118,84,49,27,15,33,65,95,121,116,105,101,104,101,99,102,92,94,104,113,95,150,176,94,92,75,76,80,86,104,102,103,105,113,98,98,111,113,122,88,59,54,63,75,64,48,21,31,49,44,38,21,17,24,17,21,30,53,57,68,90,74,63,66,71,78,68,55,56,55,55,56,61,54,53,57,59,60,59,53,57,61,55,57,59,56,59,58,60,53,53,55,53,55,50,50,32,16,24,13,19,20,16,21,24,40,44,43,45,42,40,37,39,33,25,27,27,24,27,25,24,22,33,36,43,56,54,54,53,54,54,54,55,51,50,49,54,50,44,39,26,15,20,21,16,18,18,18,29,32,32,39,47,48,41,34,39,36,27,39,50,44,50,53,48,30,11,17,19,14,16,16,15,15,19,21,17,19,22,47,57,57,58,52,51,48,55,49,48,50,50,53,49,51,51,45,41,25,16,18,18,16,19,22,15,22,33,39,50,53,54,55,56,58,51,53,57,53,58,53,47,54,52,51,53,54,47,35,31,42,43,46,53,49,56,55,53,53,53,47,31,23,21,16,17,18,19,21,18,20,25,39,56,61,63,68,65,62,56,45,36,48,63,60,64,66,63,75,71,68,71,68,65,52,44,34,72,143,134,119,118,100,72,74,92,108,124,143,127,66,43,55,58,55,75,74,47,42,38,72,89,75,70,60,69,78,84,81,87,108,125,113,100,110,95,67,47,83,113,94,71,48,34,44,53,57,72,81,97,98,102,88,77,83,64,55,34,21,29,17,12,15,14,15,15,15,15,16,15,16,15,132,155,167,126,115,124,141,140,108,113,117,98,123,132,117,117,109,122,116,120,122,119,130,134,122,124,210,251,226,71,74,43,13,48,24,37,26,30,43,46,53,58,112,122,97,101,101,108,109,118,114,100,115,107,105,101,96,102,101,101,103,110,113,113,107,94,92,106,103,107,107,102,116,121,122,117,113,122,120,118,120,117,117,116,115,130,149,138,118,118,113,111,111,117,119,113,107,95,108,117,100,101,121,115,108,108,102,112,100,97,94,96,113,103,107,109,122,105,94,106,114,128,123,135,139,127,130,129,130,122,121,127,96,85,82,80,119,111,97,96,96,109,120,127,124,117,109,106,114,116,128,128,102,94,88,91,79,57,69,80,88,116,163,190,169,80,46,99,128,128,144,162,212,232,207,162,115,87,65,63,75,81,98,116,126,128,130,124,121,125,116,121,106,96,117,107,106,114,123,134,128,139,149,128,102,80,75,81,123,150,153,137,120,133,145,141,119,82,76,114,146,172,176,185,185,177,178,180,165,178,190,180,184,181,172,160,159,163,176,185,177,190,193,165,156,148,115,93,95,107,115,113,121,129,99,90,110,118,112,75,56,63,60,68,65,56,49,60,78,77,83,84,77,84,106,219,251,251,234,218,250,236,187,200,225,168,61,9,2,12,46,41,39,75,129,141,89,43,64,104,128,195,193,171,171,177,157,142,221,245,169,158,157,97,57,9,11,49,72,69,42,35,31,26,27,29,33,35,27,37,44,39,33,29,31,28,31,29,25,39,42,35,29,23,32,27,30,29,22,28,24,27,30,28,36,48,55,65,88,103,78,64,83,88,98,103,110,121,120,97,48,28,23,28,69,101,120,111,96,103,95,94,105,97,108,100,89,99,97,74,124,149,93,95,83,87,88,96,106,101,105,104,109,110,109,105,93,111,90,49,47,54,79,77,64,42,35,39,25,23,19,18,18,23,38,56,66,73,78,62,41,58,84,89,80,67,70,67,67,67,69,69,57,47,69,72,70,63,61,67,65,66,63,59,60,58,62,63,57,57,55,55,57,54,47,29,21,17,16,20,23,19,16,31,45,41,46,42,37,40,30,41,41,28,31,24,26,27,19,22,26,25,31,41,48,42,54,62,59,60,57,58,57,57,58,57,55,53,39,20,18,17,19,19,19,22,20,20,19,20,25,28,33,40,39,34,33,27,24,28,29,25,37,39,22,15,17,17,15,17,15,17,17,18,19,19,16,29,51,47,51,50,46,44,41,53,50,54,59,54,51,47,49,48,46,30,20,19,17,20,19,18,22,19,32,42,41,51,52,54,56,55,55,58,57,51,48,47,47,49,45,41,49,41,48,42,39,39,39,44,37,44,43,52,54,50,49,45,37,21,22,21,15,22,16,21,23,19,28,42,55,61,65,62,60,63,57,48,42,43,55,69,64,73,78,75,79,74,71,55,51,47,34,34,31,96,122,108,136,132,99,85,106,130,149,153,145,131,96,77,59,30,48,67,52,30,28,42,110,140,102,87,86,92,87,75,72,87,99,122,115,112,111,74,53,33,53,84,70,77,83,66,63,63,53,50,63,71,76,78,68,62,68,53,47,66,79,76,23,6,14,12,16,15,16,15,15,16,16,16,142,174,188,127,110,134,170,168,120,122,121,101,122,130,128,126,120,139,124,122,125,124,120,126,118,125,211,253,224,66,86,48,14,49,22,32,27,27,44,50,55,63,120,135,108,101,103,117,111,126,126,109,118,110,105,102,104,114,120,116,105,109,117,120,113,109,105,111,119,116,113,109,110,117,116,107,104,103,110,112,116,114,107,111,104,131,141,128,118,110,124,111,105,109,113,119,105,93,111,124,113,103,105,94,99,112,112,114,105,113,113,117,130,116,114,118,131,114,92,89,96,96,88,102,108,92,84,94,109,105,111,108,77,78,83,97,104,89,81,97,101,105,106,91,108,112,110,100,93,103,97,86,77,102,121,118,122,115,99,102,85,106,134,139,121,69,59,114,127,98,96,120,132,176,185,181,180,172,137,107,109,101,99,121,126,127,123,125,122,116,128,137,131,128,128,111,123,136,114,117,117,129,135,111,98,89,73,83,113,134,142,107,92,117,108,104,89,59,56,91,163,200,192,207,220,219,224,219,198,201,220,214,209,207,182,170,182,205,221,228,205,213,220,163,134,135,105,92,95,92,115,124,131,127,100,96,89,106,129,116,107,99,89,95,74,54,53,41,66,60,70,114,114,98,126,234,252,252,250,232,251,232,180,185,185,172,112,36,19,100,164,61,53,110,104,62,39,12,18,101,167,214,188,179,188,187,174,201,250,174,94,123,114,52,18,6,59,96,72,48,30,32,36,31,28,32,33,30,34,33,39,43,36,34,28,27,31,29,25,29,40,41,39,27,28,30,33,26,23,33,22,24,31,30,42,57,60,83,95,78,77,83,96,96,101,117,112,114,83,35,19,25,51,65,99,108,99,99,97,98,77,92,103,100,108,84,79,65,51,39,108,141,84,97,76,93,100,88,102,103,98,103,111,104,98,93,92,102,101,83,59,52,74,84,76,59,41,17,17,23,17,18,27,44,55,60,65,76,84,64,61,92,93,94,79,78,84,75,83,77,70,63,53,55,76,89,83,78,70,73,76,73,71,65,66,66,64,60,63,61,57,63,57,55,46,34,24,19,16,18,19,17,23,28,44,41,37,34,32,30,29,39,29,35,32,19,20,26,27,24,23,24,26,37,35,32,54,59,62,63,64,61,55,62,62,56,55,50,37,20,19,16,16,21,18,22,20,21,20,27,22,20,36,41,37,39,33,23,21,20,20,30,41,31,21,16,16,17,14,19,19,19,15,17,19,19,23,23,31,34,31,37,32,28,29,38,54,55,54,56,54,52,48,52,46,31,18,16,19,21,21,17,21,24,34,42,45,55,59,53,56,53,53,55,53,53,49,47,41,41,43,44,44,38,43,40,40,34,32,38,41,47,43,44,50,49,46,44,35,19,18,18,19,22,14,22,24,30,47,49,58,61,60,57,62,60,51,45,39,49,66,74,72,81,78,77,84,65,44,36,37,33,32,28,36,89,109,112,145,132,91,104,146,146,147,133,116,129,124,110,109,116,133,143,121,107,112,133,186,184,139,141,152,164,156,129,123,125,137,148,139,136,123,95,76,46,83,125,98,99,107,90,88,83,77,57,49,67,77,89,71,65,66,45,72,108,157,113,12,5,10,13,14,12,15,15,15,15,16,16,122,169,180,132,126,149,188,184,147,160,150,128,148,148,153,149,146,165,149,148,151,147,141,146,130,145,224,253,218,69,105,56,15,51,21,36,24,29,44,51,54,59,122,131,117,113,105,118,113,119,120,112,115,103,106,115,120,128,120,110,103,107,113,115,109,109,110,121,119,119,121,112,120,118,117,103,89,101,103,113,112,104,103,91,98,111,130,134,129,141,135,124,112,121,123,123,124,112,122,123,110,96,85,78,90,99,89,88,75,83,88,94,112,102,107,105,118,116,97,100,107,111,103,104,90,79,80,88,94,90,96,95,81,93,100,105,104,85,85,101,102,97,84,70,77,95,103,89,77,71,77,69,65,92,97,107,123,126,120,114,94,78,76,93,101,74,74,91,91,102,114,94,92,113,139,167,219,245,216,174,166,132,105,124,121,111,109,107,103,113,125,118,116,129,136,130,136,127,91,84,88,110,135,103,92,97,74,60,77,98,122,118,85,91,96,87,66,70,57,67,143,183,178,208,231,230,232,218,170,174,210,211,200,193,184,171,193,213,214,214,172,201,217,131,90,103,101,104,105,98,97,110,124,97,66,66,61,83,125,139,136,105,79,85,70,57,40,44,85,80,99,128,139,108,149,253,253,253,235,214,240,203,171,147,178,183,150,132,134,227,201,71,54,79,54,7,26,28,28,110,192,200,189,194,211,201,179,199,187,119,57,38,45,18,26,67,85,63,41,36,30,31,35,39,37,28,27,33,36,35,32,39,44,43,28,23,27,24,31,26,27,40,46,41,29,29,24,24,25,28,26,29,39,41,56,59,83,90,73,88,96,92,93,94,107,116,108,66,28,19,28,57,83,97,112,106,100,99,100,104,92,92,98,100,105,90,84,67,61,51,112,141,84,92,80,99,88,85,100,91,97,92,94,93,89,94,93,105,107,112,100,64,64,75,83,73,45,20,14,18,13,33,50,57,61,63,77,89,101,97,96,103,110,101,74,63,69,64,66,67,55,46,43,48,62,69,83,81,74,67,72,66,67,73,64,69,66,68,66,65,63,59,59,58,42,29,22,14,18,17,21,23,16,22,33,34,28,24,29,33,34,29,24,22,30,27,19,26,23,29,25,22,27,29,38,37,42,48,53,59,57,54,44,41,52,42,37,39,28,20,19,16,21,20,21,21,18,20,24,29,26,29,33,39,39,34,31,27,22,24,31,33,31,25,18,14,19,21,19,21,21,22,20,23,19,16,21,25,27,29,23,25,24,21,25,30,48,64,56,55,58,54,51,46,46,27,19,21,17,22,18,22,22,21,45,51,51,56,56,58,57,56,58,59,56,50,51,45,43,48,50,51,47,43,44,35,38,39,40,40,43,53,48,50,51,45,48,41,27,25,23,17,22,25,19,19,26,40,50,54,57,60,62,64,66,63,48,45,46,54,69,79,72,76,78,61,60,45,35,26,24,31,31,29,48,120,112,92,111,95,92,120,139,127,115,103,89,103,99,101,153,182,194,193,181,171,170,193,201,179,145,141,173,184,179,156,136,141,141,156,153,155,154,129,112,77,121,171,117,89,92,88,113,124,106,90,91,98,114,122,103,99,103,94,112,152,187,115,10,3,10,12,14,12,15,14,14,14,15,15,92,124,141,118,128,151,166,162,149,162,150,145,155,150,160,152,150,163,149,151,163,161,152,157,142,161,235,253,206,64,117,54,13,46,17,37,16,30,42,46,58,64,113,122,115,111,101,113,95,96,107,108,123,118,116,115,119,125,116,110,102,106,113,111,108,107,110,113,113,117,116,121,128,124,118,108,112,115,120,113,103,101,104,111,108,127,143,148,147,148,148,135,140,152,142,149,147,141,149,137,133,128,119,108,110,109,104,98,81,71,69,83,92,101,105,102,125,116,117,136,136,140,116,99,97,92,92,95,94,81,91,92,95,88,76,97,96,83,80,92,83,81,74,43,64,90,106,98,74,71,78,72,72,85,88,89,104,113,118,120,107,104,95,103,118,104,94,78,75,92,121,121,109,125,110,125,153,185,188,185,207,174,121,120,123,118,92,79,69,83,114,105,107,123,133,124,123,115,101,105,87,83,116,110,92,87,81,58,54,67,118,140,107,90,80,80,74,65,55,59,121,167,159,192,239,240,237,206,160,159,193,212,193,186,184,181,189,195,192,189,155,168,161,98,84,106,95,109,116,113,117,109,119,77,39,42,55,94,98,103,97,69,63,69,57,45,32,56,162,157,95,77,126,129,188,234,240,212,165,175,195,172,129,160,160,113,114,73,120,160,127,37,27,66,56,22,87,94,63,133,171,184,205,207,230,223,142,127,146,74,33,14,12,50,85,74,48,32,34,34,27,35,34,37,40,38,34,33,34,30,33,36,37,45,36,30,25,27,32,27,27,29,45,44,37,28,23,29,24,30,35,36,36,48,57,64,68,68,73,90,100,99,99,95,112,103,50,23,18,41,70,86,108,99,103,105,101,108,102,100,97,105,104,102,108,100,104,99,104,77,139,152,86,103,82,104,95,77,93,96,100,88,85,93,94,93,94,105,117,118,123,96,70,71,80,76,57,29,16,20,41,49,55,58,62,74,80,100,104,96,110,125,119,98,52,40,48,48,50,46,48,46,42,42,44,55,62,57,57,53,48,51,56,51,47,48,57,53,53,51,45,53,46,37,32,23,20,18,15,19,17,19,20,22,25,24,25,31,34,29,35,34,25,27,32,24,25,25,24,30,26,30,32,37,40,34,39,46,42,34,41,40,30,26,30,30,31,29,19,21,20,18,22,21,20,17,22,26,22,27,25,23,31,35,35,35,31,28,28,29,38,31,24,19,17,18,18,18,25,23,20,24,19,22,18,19,21,25,29,24,25,22,23,26,28,29,45,56,57,57,53,54,55,53,37,22,24,19,17,21,19,20,19,33,38,47,51,53,61,55,57,55,60,60,51,51,47,43,49,47,51,51,48,46,38,36,41,37,45,46,49,57,51,52,55,48,45,39,29,21,22,17,19,18,22,24,22,44,53,60,66,60,66,66,62,63,49,46,50,56,60,66,61,73,69,50,43,30,29,27,27,27,33,31,93,128,93,66,60,69,86,93,101,78,75,84,71,73,73,104,152,165,171,174,178,160,155,171,158,148,132,131,146,135,139,130,119,114,104,128,145,143,141,137,134,104,152,190,122,86,85,107,143,146,141,135,141,151,140,136,124,134,150,147,164,171,181,111,9,3,9,11,13,12,15,13,15,16,15,15,83,94,93,93,104,123,140,139,136,142,131,139,145,132,144,135,129,137,125,136,143,150,147,148,142,164,237,252,196,63,96,37,18,46,24,39,19,30,40,46,56,67,117,126,114,110,100,98,94,98,105,109,121,123,105,100,105,111,107,103,113,108,112,121,108,109,105,110,110,106,106,119,129,118,118,114,110,120,117,110,104,110,117,122,133,142,159,155,144,154,151,150,151,154,156,157,158,156,166,171,177,174,159,149,149,159,160,158,142,126,125,125,135,139,135,137,143,124,127,152,151,134,110,108,103,101,98,103,103,84,92,99,94,81,57,83,89,85,80,82,78,67,65,50,58,90,111,118,114,92,95,83,86,104,99,112,110,113,117,119,129,128,117,118,117,93,88,86,70,79,102,114,128,146,136,93,89,112,131,163,218,198,133,123,112,112,84,62,55,70,112,115,113,113,121,119,113,116,121,117,103,98,97,94,102,109,95,100,95,80,85,108,83,64,71,80,90,77,55,66,137,176,152,157,214,251,249,201,155,166,198,217,207,191,183,171,179,182,190,186,151,138,118,89,98,114,87,101,120,123,108,101,121,92,74,72,69,94,93,85,83,53,61,111,115,145,131,154,232,191,171,141,131,172,227,236,181,138,104,129,173,128,136,157,84,99,127,81,34,64,67,28,19,69,152,119,122,101,94,170,179,206,231,212,197,194,101,61,78,46,13,10,70,88,63,48,36,29,30,37,27,34,34,31,44,39,33,34,40,31,34,35,31,35,34,39,27,28,30,25,31,31,34,41,41,35,27,25,33,35,34,38,50,55,62,64,81,78,74,106,104,105,113,106,74,31,20,28,50,86,101,107,101,90,101,99,102,101,97,98,98,105,109,105,101,103,110,105,111,86,161,172,92,104,84,112,100,94,97,86,100,91,101,94,96,100,96,109,111,124,133,97,75,73,80,83,68,49,29,42,57,59,59,69,73,80,97,99,95,103,119,127,129,81,46,45,42,47,46,43,46,48,43,40,41,42,46,47,46,48,39,34,42,43,39,45,43,43,40,35,44,39,34,34,30,21,22,18,24,17,22,24,21,29,18,24,29,29,26,27,30,26,27,35,37,23,26,21,25,32,27,33,40,37,36,43,41,44,43,32,32,37,29,30,27,35,42,28,19,18,23,21,19,19,18,22,19,23,27,24,26,30,29,26,27,30,25,27,28,38,41,28,28,17,15,18,17,17,17,26,21,19,17,18,22,19,21,28,31,32,31,25,29,29,36,34,37,47,41,43,40,44,49,46,34,21,22,17,17,24,16,20,21,23,34,35,44,54,59,57,56,57,56,55,53,50,41,38,46,46,51,53,44,39,37,39,44,40,42,42,43,51,51,51,53,55,50,36,23,23,21,16,21,22,16,22,29,40,57,66,67,69,63,63,64,57,54,50,51,54,50,53,52,68,68,45,37,28,29,27,25,28,30,40,89,124,103,83,67,72,70,61,66,48,39,50,63,90,111,113,122,125,130,141,154,146,136,142,123,123,114,107,98,83,115,122,111,93,76,101,121,115,108,117,136,133,166,188,130,100,112,122,138,140,135,146,156,153,141,128,138,150,160,164,169,163,157,106,13,2,11,10,15,13,15,14,15,16,16,15,92,93,87,81,80,89,106,120,133,136,133,139,135,127,133,134,124,124,124,126,139,148,143,148,140,167,239,253,190,54,77,20,22,44,19,40,22,32,36,48,58,71,126,130,124,112,99,102,104,117,106,100,101,101,102,89,92,101,98,111,108,108,116,107,105,102,106,102,105,102,95,115,110,103,106,105,101,101,106,102,105,109,115,120,122,122,128,138,130,135,130,127,134,140,130,123,116,120,137,147,170,167,152,140,138,156,169,177,169,167,168,173,180,181,174,158,135,100,112,130,132,131,100,101,106,89,90,94,86,73,81,78,84,73,63,87,99,106,91,77,65,87,77,61,87,101,123,126,113,107,96,93,101,108,114,120,112,107,115,105,117,119,100,111,98,84,76,81,81,69,87,97,96,111,119,113,97,101,107,127,160,146,127,107,93,90,75,85,82,105,123,94,87,109,153,163,127,114,109,127,129,119,118,120,119,118,93,117,110,103,82,63,57,70,76,89,111,112,81,74,117,160,147,100,131,193,179,115,106,147,198,214,191,182,169,154,141,141,152,157,147,140,116,98,103,106,93,105,113,101,83,82,120,111,98,92,83,110,103,113,105,73,109,163,208,252,251,251,253,253,253,213,182,211,234,213,160,143,132,158,156,122,152,139,107,170,184,122,103,115,119,63,16,84,164,139,142,113,161,208,209,245,227,145,125,150,69,34,21,10,33,67,81,59,43,33,33,32,36,30,28,37,29,30,31,37,42,35,37,33,29,27,30,33,38,44,35,34,31,31,38,29,24,28,41,42,35,34,36,39,36,44,61,65,76,90,96,99,98,116,118,118,92,46,31,18,27,65,98,112,107,94,93,93,99,99,100,100,94,85,77,97,95,102,101,97,106,99,109,78,177,174,78,102,81,105,100,92,91,76,94,96,103,97,96,95,85,105,123,119,83,40,41,67,89,91,86,66,61,74,62,63,60,68,76,87,94,89,103,77,61,56,47,50,51,49,46,48,50,51,52,48,46,46,41,39,43,49,48,41,38,33,42,40,43,41,40,37,40,43,41,38,40,35,30,23,19,24,17,24,21,22,25,22,24,23,26,35,34,24,22,27,34,40,39,31,24,17,31,42,40,36,38,42,39,39,37,43,49,42,33,29,24,29,32,35,46,35,24,22,19,21,22,27,26,27,24,23,30,28,27,27,30,32,33,33,22,27,38,41,42,27,19,18,22,23,20,21,21,29,29,20,18,21,22,21,23,29,36,35,29,28,30,32,42,37,23,36,32,30,34,28,32,32,26,25,17,17,18,19,19,19,23,23,27,28,31,38,37,51,50,48,55,53,56,44,36,34,38,46,48,48,42,44,38,35,36,33,34,33,40,46,45,43,49,50,48,34,17,21,18,18,19,20,22,22,30,46,49,57,64,60,59,60,61,59,54,53,51,57,56,56,52,61,61,43,36,29,32,29,27,29,28,39,60,91,101,104,88,73,69,62,85,61,26,51,82,104,105,90,76,76,88,94,131,141,141,139,99,100,102,83,78,83,114,111,106,96,73,94,106,96,85,93,122,118,144,156,118,119,120,110,109,104,111,117,125,123,115,116,124,136,136,143,156,148,141,99,16,3,11,11,15,13,15,15,15,16,15,15,95,97,93,93,84,77,76,107,122,136,146,146,137,132,143,137,132,132,129,142,147,155,145,143,136,166,239,253,192,84,102,21,29,46,17,33,24,36,34,47,62,64,110,124,118,116,103,106,112,116,103,88,89,101,105,99,105,110,112,108,112,99,103,115,99,103,101,96,99,101,91,102,111,96,91,98,94,100,104,92,95,98,101,110,100,92,97,108,106,98,101,97,109,111,92,66,51,71,93,107,117,117,125,118,106,115,133,149,151,154,157,165,176,168,143,118,87,61,83,95,94,108,99,91,88,78,67,66,64,54,50,48,61,61,64,97,113,120,100,95,87,89,93,93,117,115,103,84,70,80,105,116,127,130,121,117,103,114,125,103,95,117,94,93,98,83,91,88,79,77,79,87,81,87,118,122,111,107,104,106,109,95,90,96,87,88,94,119,109,93,105,90,89,99,143,152,116,110,119,131,154,153,134,139,131,101,84,98,99,95,85,88,82,84,85,70,104,124,116,96,87,110,99,56,34,70,75,52,70,120,181,183,155,156,162,145,127,99,103,111,118,134,122,112,93,98,98,105,103,83,64,75,100,91,84,70,84,106,103,105,110,93,112,184,216,250,251,251,244,215,244,168,128,220,239,193,199,165,168,173,122,128,161,201,187,191,154,116,118,149,141,41,2,51,146,161,132,160,226,220,232,228,133,84,63,85,38,5,29,41,71,66,54,39,38,33,33,36,26,38,36,34,32,31,35,27,36,42,33,32,33,29,33,29,36,38,41,39,36,33,29,29,23,30,31,45,50,39,39,40,49,57,69,90,104,98,99,102,114,135,123,77,38,16,23,40,78,107,112,110,92,96,94,95,102,96,92,101,97,79,75,83,99,101,99,98,104,99,103,84,187,184,75,87,74,102,87,78,71,60,90,90,105,96,98,90,77,117,100,49,26,18,26,51,76,102,101,66,72,82,54,63,70,72,83,81,90,104,83,48,22,12,23,36,48,46,48,50,50,51,48,50,50,52,48,44,50,49,52,53,46,45,44,44,41,37,39,44,47,41,39,42,39,33,30,22,19,16,19,24,17,21,23,19,28,26,29,38,34,25,19,29,43,43,47,34,32,28,40,66,47,39,37,39,39,38,39,45,50,46,35,25,26,30,34,38,47,36,28,20,19,19,21,29,24,26,30,28,29,28,26,35,33,33,34,22,23,32,45,44,43,35,22,27,29,33,33,33,30,30,39,44,42,32,21,24,28,33,30,31,35,29,36,39,42,38,31,28,22,32,26,27,29,29,23,19,23,17,23,20,20,24,24,27,30,28,32,32,34,41,37,46,49,46,39,30,25,31,37,43,46,41,39,35,33,26,29,31,22,27,34,44,39,42,43,40,35,26,23,22,22,19,21,22,21,28,37,40,48,54,56,59,56,61,62,55,57,54,54,58,56,55,52,56,57,45,39,34,28,33,35,28,33,42,47,61,68,69,64,67,54,63,83,52,50,80,57,57,61,40,38,42,35,42,106,139,130,111,79,98,100,92,92,96,111,100,107,92,83,104,105,93,80,84,98,96,94,116,116,116,125,108,93,94,100,96,105,103,101,97,104,105,96,115,137,138,139,100,18,3,12,12,16,13,16,15,15,16,16,16,89,101,103,108,117,108,103,108,110,128,145,148,140,140,142,137,136,139,147,150,153,157,141,135,126,160,237,251,189,107,127,34,30,36,25,35,22,35,34,45,59,69,94,95,101,107,102,96,98,109,98,87,86,104,108,92,116,127,119,119,100,100,105,103,107,97,97,89,99,98,86,107,120,112,97,97,89,93,98,86,94,97,100,108,107,95,96,101,86,83,87,97,105,96,78,66,53,62,69,69,87,89,113,117,110,115,113,120,114,121,122,132,141,109,80,63,43,36,56,50,59,94,85,81,85,66,66,58,57,53,53,42,53,68,61,84,101,99,89,100,83,89,101,109,132,87,68,65,42,72,86,125,137,114,120,119,118,117,129,110,101,108,87,93,80,89,98,102,101,75,92,104,104,110,137,143,114,112,125,123,108,78,66,87,114,107,104,114,90,64,78,92,89,78,99,89,84,89,101,118,132,134,117,121,116,93,93,106,122,117,113,128,122,108,103,92,123,153,158,145,93,75,57,45,42,55,65,62,90,122,157,160,146,152,165,157,128,104,97,92,97,105,106,103,93,93,93,95,94,83,77,89,89,68,49,60,92,113,104,84,79,69,90,145,145,174,148,134,109,79,137,79,78,160,176,196,235,199,190,159,145,151,206,242,218,160,87,89,106,125,108,41,12,68,161,162,147,208,246,187,188,172,96,57,10,16,24,29,56,69,60,42,31,43,41,34,38,33,30,35,40,33,33,39,35,29,30,37,38,34,29,29,27,27,36,34,43,48,37,35,27,24,31,30,39,37,45,56,45,49,59,83,97,103,108,97,98,111,122,107,58,29,24,26,56,89,110,112,110,105,90,92,91,92,98,95,101,103,94,87,84,97,97,108,105,95,105,95,107,89,191,200,91,84,68,89,76,68,74,73,94,98,106,104,96,110,97,67,35,21,22,14,19,37,66,84,66,40,62,71,57,60,66,77,74,95,103,101,71,34,18,17,18,32,41,46,51,50,44,45,46,47,48,47,47,49,43,50,49,49,49,44,50,51,48,42,41,44,42,43,42,42,42,35,24,23,17,16,20,22,18,23,24,26,25,31,33,35,38,31,30,36,39,46,47,40,30,27,50,57,48,40,42,42,43,43,35,50,50,41,36,25,29,30,33,43,48,34,21,19,22,16,18,23,22,29,33,28,27,25,32,29,28,35,33,26,30,41,41,46,49,36,24,31,39,34,38,39,34,38,53,66,53,31,17,27,29,31,40,28,33,38,36,37,41,39,34,28,24,29,25,25,32,37,21,20,22,21,21,19,23,24,25,29,30,35,33,32,38,32,36,47,43,37,29,26,28,26,29,38,39,39,34,24,27,29,31,28,24,33,28,33,35,28,39,30,28,23,19,27,22,24,21,23,26,33,45,46,47,48,59,59,55,57,55,57,55,55,53,51,59,53,50,47,49,46,39,34,29,35,39,39,44,43,49,51,45,48,55,55,41,34,76,70,63,71,44,39,30,55,63,46,37,43,105,128,115,98,93,106,100,99,106,104,98,99,111,97,98,110,106,95,84,93,102,87,77,101,110,126,137,126,115,106,104,103,111,103,107,93,86,94,81,103,118,118,124,92,20,4,13,12,16,14,16,15,15,16,16,16,105,121,130,153,189,186,170,172,137,117,132,143,136,129,131,126,128,135,145,150,148,158,139,134,120,161,239,248,169,96,128,22,38,38,22,38,16,32,35,43,60,68,83,78,89,100,79,81,75,77,84,79,84,93,89,84,98,114,123,112,114,98,98,109,100,95,93,97,101,105,95,121,138,123,109,91,85,95,98,84,87,97,101,116,121,114,113,102,92,101,110,93,76,78,91,98,97,87,73,85,85,77,101,110,120,128,114,105,88,100,118,131,121,81,60,55,44,35,43,53,70,91,88,73,81,80,62,63,65,70,72,57,63,65,60,65,71,74,73,77,77,99,108,103,103,61,43,62,63,73,74,85,104,100,107,104,111,117,102,89,88,115,101,89,107,108,101,111,132,120,109,113,94,113,152,148,125,128,134,119,107,89,68,92,107,103,98,99,83,66,81,75,78,63,61,65,54,66,76,71,87,93,81,105,114,125,134,152,152,153,151,141,160,154,161,163,182,195,178,142,96,86,67,63,74,85,77,76,103,122,135,134,136,145,146,131,119,103,108,104,92,93,91,105,86,95,92,83,93,90,116,119,78,61,72,93,116,96,88,87,84,61,39,90,63,56,27,58,68,25,94,34,100,196,148,196,240,205,224,210,195,207,202,241,198,139,119,116,132,150,141,99,100,100,137,163,166,228,229,127,146,139,49,28,3,29,49,61,69,48,47,34,36,41,38,37,29,34,39,32,39,43,35,37,33,32,30,30,39,35,33,33,33,33,39,35,36,36,39,38,31,37,44,51,42,39,45,53,50,55,96,115,110,97,95,105,107,98,65,35,22,24,46,71,94,111,119,101,102,101,87,93,83,87,103,101,106,100,88,96,100,98,104,100,102,103,108,106,99,88,160,209,136,72,51,78,84,89,88,96,103,103,115,107,119,97,46,26,21,23,18,15,30,53,63,64,63,46,69,67,50,62,63,80,103,86,92,98,55,27,19,14,22,34,41,42,47,48,43,47,45,45,45,43,47,39,44,44,43,47,42,45,48,51,50,51,49,43,42,46,41,40,43,37,27,20,24,16,22,22,21,27,36,33,29,30,33,37,37,34,36,43,46,47,47,42,34,30,46,55,42,41,44,39,41,45,41,50,53,44,36,27,27,24,35,51,47,23,24,22,21,19,15,16,21,31,28,27,24,22,33,29,31,36,29,32,40,45,44,46,47,44,32,35,40,36,38,35,35,42,44,61,75,47,24,27,32,33,36,34,32,39,44,38,36,38,33,28,33,29,26,26,36,39,20,20,19,17,21,21,24,22,23,25,30,34,28,36,33,35,39,42,40,31,25,26,19,23,30,27,30,29,30,23,29,33,29,27,29,26,25,29,28,31,29,28,24,20,22,22,22,24,22,24,23,34,43,43,46,44,50,49,54,53,48,53,53,50,52,53,51,50,55,48,43,42,33,36,29,41,43,33,44,43,44,45,35,43,44,46,34,69,112,89,79,55,23,41,63,101,114,98,93,82,121,133,119,122,112,113,103,106,101,85,90,104,124,103,110,114,95,86,75,93,101,85,74,91,105,115,137,131,126,120,110,105,109,109,111,95,91,103,91,102,103,92,93,77,23,5,15,13,15,15,15,16,15,16,16,16,152,156,160,188,235,224,189,186,142,115,108,114,111,107,112,104,114,110,109,118,128,145,141,142,135,179,246,247,150,62,87,21,50,41,24,34,22,33,31,51,62,72,104,94,89,85,73,74,66,62,66,74,74,73,64,55,78,95,102,113,108,102,98,108,103,92,97,93,106,103,99,121,128,125,103,96,89,104,104,88,95,98,103,114,112,112,107,101,99,99,97,71,65,70,87,124,124,113,88,71,74,57,71,84,98,110,108,108,91,107,129,130,102,87,78,69,64,58,76,75,76,100,92,92,91,79,78,69,83,97,108,83,65,69,59,80,73,53,57,69,78,103,104,77,71,59,77,74,65,66,58,77,86,87,86,88,122,118,78,55,80,113,133,122,98,99,91,123,156,146,118,89,83,104,156,156,137,133,127,106,101,98,73,82,85,88,91,81,71,61,71,62,67,61,52,57,60,90,79,66,64,68,83,127,150,155,169,170,179,159,148,166,165,158,160,139,139,146,134,100,73,72,74,77,64,74,81,62,78,105,112,112,109,122,122,102,95,89,94,91,83,85,84,100,99,101,91,86,86,85,105,117,83,77,91,100,103,75,84,98,92,66,60,92,74,69,26,99,135,45,129,76,136,203,114,162,195,206,228,218,227,179,146,169,155,148,153,147,156,157,148,145,142,129,149,187,177,184,181,98,98,77,29,15,17,67,70,59,48,37,50,50,39,38,40,37,32,39,36,37,38,36,31,37,32,24,34,29,34,38,39,41,37,38,36,33,29,31,37,39,48,67,69,53,42,49,59,69,62,54,77,105,87,73,91,103,81,42,24,19,30,56,86,98,99,107,99,97,101,92,83,90,83,88,101,103,104,89,100,105,98,103,95,102,108,103,101,99,96,77,118,195,173,77,45,84,89,94,91,95,107,108,115,113,72,39,21,20,20,16,25,46,71,81,75,62,51,49,84,74,59,68,81,98,81,83,89,82,42,14,15,19,23,35,36,38,44,45,46,43,38,44,45,40,42,39,36,37,40,44,44,49,47,48,53,49,48,44,45,44,44,46,46,34,25,30,33,33,25,34,33,27,42,33,33,35,36,44,43,39,37,46,47,45,43,37,33,35,53,53,40,47,46,44,41,39,45,44,44,44,31,31,22,30,42,40,41,24,22,19,23,21,21,26,24,32,27,28,29,27,38,26,32,38,27,36,42,43,45,49,50,45,39,37,39,40,39,38,38,39,29,46,79,67,40,32,27,31,36,33,37,37,42,46,40,36,37,37,38,31,28,27,25,31,21,17,17,19,19,22,23,24,31,33,27,31,28,33,36,40,45,36,31,28,21,22,22,26,37,37,38,37,29,24,35,31,33,35,28,33,31,33,41,39,33,28,26,24,18,20,21,16,20,23,21,31,34,34,41,39,44,44,42,40,41,45,33,45,45,42,45,42,44,36,39,46,41,37,34,39,36,35,42,32,32,27,28,39,57,53,88,138,134,109,105,72,38,87,90,112,122,119,125,121,142,130,124,117,101,110,112,103,76,72,99,110,112,98,105,98,77,62,49,76,91,88,76,91,89,83,110,120,127,112,94,101,110,113,125,99,84,105,97,111,104,97,91,68,24,6,14,14,14,13,15,16,16,16,16,16,188,178,139,143,198,167,113,120,103,88,86,85,88,86,89,84,87,74,78,84,91,123,130,147,136,187,246,246,139,47,79,14,47,39,29,40,20,34,34,51,63,78,117,104,104,97,76,100,94,78,84,87,76,55,49,49,63,74,86,91,106,101,97,103,107,95,95,106,107,104,92,107,109,108,112,107,110,118,117,110,115,113,105,100,93,92,98,83,80,81,73,71,77,79,99,116,117,110,78,57,39,50,63,49,59,81,92,98,101,108,122,112,95,89,95,95,77,83,93,92,93,98,97,98,88,76,89,95,99,105,101,88,89,83,87,98,82,67,69,68,68,92,96,69,82,97,89,83,63,55,50,59,71,72,89,98,109,121,81,53,85,125,123,87,97,95,94,124,139,142,113,89,89,127,165,159,146,132,112,94,94,100,98,78,67,86,81,74,63,53,60,59,71,57,48,44,67,101,87,64,73,82,90,141,155,155,141,141,150,147,163,161,145,95,68,58,55,71,63,45,43,57,69,107,65,64,81,57,92,109,126,120,96,101,100,88,90,84,84,86,75,83,80,87,77,85,91,84,87,74,78,94,79,73,78,79,87,81,82,72,80,92,107,125,81,65,65,154,217,90,119,160,203,169,108,193,220,188,191,184,150,107,73,81,70,121,169,146,163,170,162,172,180,176,187,222,151,106,122,53,41,22,21,48,50,69,57,47,33,41,51,47,43,41,40,40,38,35,41,35,39,36,36,37,30,32,28,29,37,43,46,42,36,35,36,26,32,33,32,59,82,91,64,45,53,68,103,114,85,71,56,59,83,90,81,55,37,22,26,46,69,97,113,110,103,103,104,99,95,83,79,91,86,83,96,100,101,90,95,107,100,97,97,101,108,103,94,99,84,80,97,158,201,111,61,76,85,96,84,103,107,110,103,49,25,21,20,19,15,35,61,87,105,116,105,103,71,59,104,82,87,105,93,87,78,79,85,66,33,19,13,18,34,36,35,35,39,41,34,39,33,36,42,39,36,42,48,46,52,53,51,53,48,48,47,49,45,47,44,46,44,41,46,31,29,34,34,34,32,31,33,34,37,37,36,39,41,44,44,40,42,43,49,47,50,40,29,37,56,55,37,42,42,42,39,40,46,41,37,32,33,27,27,34,39,44,34,26,27,19,23,23,24,32,33,29,28,29,24,28,39,25,34,41,32,41,42,45,44,47,47,48,37,35,38,33,39,38,34,35,32,33,61,76,64,40,29,33,34,38,37,40,45,37,38,36,36,36,39,36,31,32,33,24,18,23,21,21,25,24,20,30,33,28,31,31,30,42,39,39,34,27,35,29,22,19,19,27,44,50,49,39,21,30,35,35,39,32,38,38,36,48,45,44,41,39,28,19,19,20,20,19,23,19,33,32,30,36,34,40,43,49,45,38,39,41,38,30,37,36,37,36,35,35,35,40,38,37,33,36,37,35,40,30,23,26,25,53,72,90,137,149,122,127,145,113,119,118,84,98,105,125,143,137,128,113,117,109,93,105,107,83,64,94,125,116,103,80,89,98,83,81,77,93,95,83,67,75,66,59,87,96,113,96,75,85,103,119,125,101,81,87,90,121,137,133,121,80,22,4,13,14,15,13,15,15,16,16,16,16,133,132,87,65,91,73,74,84,74,88,78,77,83,79,82,64,72,72,71,75,86,98,110,129,119,182,245,246,138,60,84,21,46,30,25,36,18,34,34,47,67,84,117,109,105,92,73,107,109,103,93,92,87,75,56,52,59,66,73,79,83,86,89,104,103,90,98,102,111,103,94,93,95,103,105,119,117,122,115,112,118,108,97,92,78,87,84,78,78,64,72,90,103,94,107,99,85,100,72,64,67,71,79,63,64,77,81,93,97,102,104,91,83,88,100,103,100,93,109,115,101,101,90,76,63,63,85,95,99,82,78,73,87,91,81,99,89,78,71,67,65,91,96,89,94,96,97,85,77,59,56,65,66,98,95,88,87,92,97,87,112,120,120,121,107,99,102,117,118,127,131,109,118,141,157,157,153,133,123,99,95,101,96,106,68,87,83,69,71,66,75,61,66,71,49,53,60,63,68,75,101,105,83,91,108,98,70,66,92,110,125,120,92,54,37,20,28,32,27,33,26,67,77,149,144,71,74,107,130,139,134,136,111,89,94,87,92,97,100,97,84,78,90,75,61,79,76,85,76,77,70,79,88,62,55,52,62,77,66,44,51,91,125,110,71,70,88,159,190,118,148,213,226,129,131,240,232,199,133,97,129,76,10,11,8,90,151,124,145,150,159,169,168,164,168,184,103,52,57,19,18,35,61,68,47,47,47,46,42,44,46,43,46,44,43,38,36,39,36,33,34,39,39,31,30,35,34,38,45,36,39,40,38,30,26,34,30,31,36,76,105,89,69,52,41,72,118,132,113,115,87,88,110,91,49,28,26,36,58,87,108,107,110,105,98,105,105,110,101,86,93,96,88,97,97,89,101,97,90,95,98,103,97,88,97,99,93,91,83,81,74,114,200,160,77,74,94,103,97,114,116,75,59,21,19,20,17,28,48,73,99,110,108,110,122,127,118,83,79,97,112,100,84,83,77,92,92,54,26,16,15,16,32,35,31,35,32,36,36,36,42,39,32,41,49,44,53,49,52,54,49,53,46,50,45,46,50,43,47,43,45,42,41,32,27,32,27,24,24,26,22,33,44,35,34,37,35,42,46,46,47,45,45,48,45,37,29,40,57,50,39,32,29,35,33,39,37,27,27,24,27,29,36,45,43,38,30,22,23,23,27,26,24,30,27,27,24,27,38,42,39,43,50,41,41,43,38,44,42,39,41,40,35,41,36,31,32,30,25,30,30,29,34,66,84,51,31,30,31,34,40,38,37,39,31,38,39,30,32,34,24,28,33,25,18,27,27,24,27,28,33,29,30,32,35,33,39,43,39,33,23,29,33,24,29,23,18,31,46,49,44,31,21,21,34,34,33,33,31,31,36,49,48,44,41,36,27,18,26,25,24,26,25,27,33,41,35,32,36,32,38,50,52,48,44,46,41,36,34,39,36,30,35,32,34,34,32,37,31,33,39,32,34,27,23,23,26,49,74,101,125,118,115,124,134,132,122,129,108,105,109,128,139,124,107,106,121,107,86,100,96,76,80,106,123,115,104,86,106,123,116,124,103,103,76,54,61,68,55,45,57,69,76,74,70,81,99,111,122,105,90,89,91,133,159,161,147,91,17,3,12,12,15,14,15,14,15,16,16,16,58,59,41,36,43,54,72,89,87,86,89,89,77,71,70,66,78,70,81,87,88,100,98,107,102,179,244,245,135,66,97,13,46,27,19,35,18,38,36,45,65,85,114,99,90,75,65,88,105,96,80,86,98,96,90,69,60,59,53,61,67,76,83,94,100,87,95,106,109,101,93,96,97,102,101,92,90,92,79,83,101,105,107,107,88,81,83,74,81,77,75,96,112,99,107,81,66,87,83,95,97,95,102,89,92,97,102,98,97,87,81,77,68,74,79,78,91,115,121,124,114,109,105,96,81,66,77,85,79,69,83,87,84,78,64,86,104,93,89,63,65,87,97,84,76,96,109,110,82,72,72,81,107,122,111,77,70,92,108,121,125,128,140,134,105,92,104,107,96,108,136,142,146,152,152,150,152,155,144,109,83,77,111,99,71,89,75,61,66,69,76,74,68,72,68,63,60,63,72,69,83,95,67,63,53,49,46,37,47,72,108,104,89,71,45,37,38,49,47,49,71,89,90,173,134,57,81,102,141,119,107,117,96,101,103,112,113,115,116,100,93,92,84,78,71,84,75,74,77,62,60,65,89,74,60,46,41,59,56,39,40,69,91,83,69,84,82,80,127,171,237,243,156,92,179,252,229,146,35,69,149,83,25,26,24,95,125,64,53,55,66,84,74,75,68,71,50,23,40,25,45,69,62,56,39,41,44,50,48,46,49,50,49,49,45,37,42,37,36,35,37,38,33,35,36,38,41,42,44,37,34,38,39,36,32,36,33,33,45,83,100,89,69,57,42,41,87,120,137,140,134,132,82,38,24,28,46,73,101,111,113,108,102,97,101,101,100,101,99,97,99,106,103,105,103,91,104,100,98,99,97,104,93,89,94,93,89,96,90,83,80,87,180,190,103,73,91,108,111,107,53,23,23,12,23,21,38,62,89,110,108,109,107,115,113,123,99,89,106,98,94,86,81,82,79,83,85,49,22,15,15,23,33,39,43,46,42,44,43,44,46,36,33,39,45,43,47,49,49,49,47,45,43,51,46,42,45,44,42,39,39,40,36,26,31,34,26,18,19,17,19,38,39,37,36,29,29,41,43,44,42,40,40,43,42,31,28,43,57,51,42,32,23,40,45,41,30,23,27,24,24,31,39,36,43,41,24,22,24,20,23,20,19,23,20,27,18,27,38,44,45,33,44,39,42,39,39,44,46,41,34,42,35,43,35,27,31,25,27,33,31,36,33,41,80,81,46,27,27,29,32,34,38,35,34,36,36,30,29,34,30,33,28,18,21,24,26,22,27,23,25,24,27,38,37,34,37,34,26,29,29,38,38,24,24,28,31,40,51,45,39,27,20,23,21,21,26,22,24,26,38,48,45,42,39,34,23,19,22,28,27,27,25,31,33,39,45,39,38,31,28,39,44,51,54,50,51,47,46,46,38,34,39,33,33,33,32,33,31,32,31,36,34,25,19,18,36,51,65,83,87,91,94,105,104,92,111,105,109,111,97,117,118,100,91,92,102,87,70,86,106,101,91,100,111,108,103,93,105,121,120,127,100,85,54,45,77,83,56,27,40,45,61,77,83,93,103,121,122,117,122,105,104,146,171,167,147,94,17,3,12,12,15,14,15,14,15,17,16,16,56,54,47,43,47,48,74,85,75,89,93,88,71,69,78,72,81,85,94,93,105,110,111,116,104,185,242,242,124,58,72,10,46,31,24,33,23,32,36,44,66,83,101,95,80,70,62,77,92,95,65,71,76,78,92,79,71,57,57,60,54,66,80,87,89,87,96,104,107,95,99,113,105,97,84,69,63,74,74,79,94,114,125,122,94,87,96,87,99,97,82,108,118,102,101,74,76,105,96,98,99,98,106,99,84,100,103,102,89,71,77,63,63,68,46,55,89,109,128,133,124,134,133,141,129,105,99,93,83,81,103,100,98,86,63,92,111,102,81,63,60,87,98,91,91,97,113,112,78,63,71,94,111,117,94,70,89,109,121,106,109,111,119,121,82,87,97,79,74,83,112,136,141,144,139,129,137,144,141,99,71,75,115,129,71,82,60,49,53,58,64,73,76,75,64,74,101,106,94,64,69,95,66,56,73,80,65,49,59,99,132,120,116,98,99,87,77,86,75,104,116,117,90,113,113,68,92,118,111,100,92,91,91,93,97,99,110,96,88,87,89,86,87,61,77,84,69,83,74,71,60,61,78,63,73,60,42,56,59,57,48,42,61,56,44,69,54,86,186,239,241,150,65,92,172,243,189,80,8,29,114,107,102,103,99,124,72,11,10,12,13,17,17,20,14,31,43,34,50,62,69,54,52,51,44,34,45,51,47,53,48,51,50,45,41,38,43,34,33,39,33,33,39,39,41,43,46,45,37,36,33,32,36,36,43,41,35,42,46,71,89,83,68,55,49,42,47,81,141,147,108,65,26,20,34,60,89,101,105,110,102,97,100,94,98,98,93,98,92,88,83,97,100,104,103,96,103,104,102,103,96,100,97,91,99,111,101,98,108,105,96,89,152,210,141,84,92,107,78,38,21,19,16,16,26,51,72,96,109,104,108,111,111,107,105,98,97,100,94,81,84,78,78,80,74,88,75,39,19,15,14,32,40,46,48,43,49,46,45,45,40,38,38,45,45,39,46,43,44,46,39,41,44,40,39,38,32,36,40,33,36,36,32,29,32,32,15,19,19,17,21,34,36,27,31,26,37,31,28,35,33,32,25,27,31,28,30,45,51,43,40,34,32,36,29,32,33,27,26,25,32,28,29,41,42,39,26,21,25,19,21,18,18,21,21,21,21,32,34,39,32,29,39,38,45,40,43,49,43,38,34,37,34,30,32,31,31,37,36,32,35,39,35,33,63,97,73,34,31,29,29,34,32,34,31,31,33,31,36,34,31,29,22,20,21,18,23,20,22,21,24,33,29,34,35,34,31,24,25,26,40,44,32,25,27,34,43,49,48,42,38,27,16,23,25,19,23,21,24,29,37,42,43,48,42,29,18,22,24,24,25,21,21,29,29,37,39,39,42,39,30,27,29,42,50,53,53,54,51,49,40,39,44,40,40,37,35,36,35,38,37,33,31,24,21,17,37,50,52,60,62,77,77,77,82,83,79,74,87,104,103,112,99,71,66,79,89,69,73,114,131,120,103,102,118,101,91,72,61,67,72,97,81,65,49,39,65,52,41,32,40,88,94,94,94,99,108,110,113,115,126,115,111,136,148,137,115,78,21,5,13,13,16,15,14,15,16,16,16,16,79,90,77,64,54,69,79,81,85,87,100,87,65,61,67,82,97,104,110,106,125,130,120,130,125,196,243,243,117,48,68,8,51,29,26,36,22,31,35,50,66,86,92,96,84,67,65,71,94,87,77,56,39,51,65,73,78,74,66,72,64,70,82,95,110,84,89,104,106,105,105,111,99,90,84,75,84,97,87,99,113,128,140,116,108,112,109,107,122,139,106,112,127,94,95,79,102,117,90,87,100,102,128,131,83,72,89,103,94,79,87,84,87,96,77,67,100,128,136,137,132,142,130,147,150,134,133,128,127,114,120,106,109,137,103,106,123,98,94,87,96,113,126,114,109,95,130,164,86,73,69,84,109,90,83,70,67,93,96,86,93,91,112,107,84,103,99,88,98,96,103,114,126,134,131,127,123,122,112,94,85,78,126,153,111,84,77,61,52,53,55,51,59,80,59,74,114,116,110,85,79,100,87,119,128,106,104,78,78,97,120,124,129,140,135,132,109,107,119,120,122,113,84,97,104,114,139,124,119,116,108,101,91,93,76,66,57,78,92,79,76,77,92,79,74,79,74,99,89,81,83,58,61,58,82,86,74,81,73,73,59,53,69,54,53,60,41,142,251,241,144,45,30,45,80,163,125,29,2,25,76,98,120,117,106,114,88,53,79,73,41,41,36,75,77,61,57,57,92,76,53,44,42,48,44,44,44,49,51,53,47,50,52,39,43,39,36,36,32,37,36,40,42,42,48,45,45,42,31,36,37,36,37,42,47,47,42,47,61,77,87,78,64,57,54,53,39,51,100,88,45,28,23,45,72,95,101,98,100,97,97,93,97,98,91,96,97,104,98,74,69,78,94,99,96,97,99,100,101,101,103,98,93,105,127,137,89,77,105,152,128,57,91,177,184,108,61,42,24,22,13,19,20,31,64,86,107,102,112,117,109,108,102,100,99,108,94,86,83,74,79,74,73,81,86,81,66,36,16,20,16,33,41,39,40,37,36,39,38,37,42,36,37,42,36,39,42,46,41,39,42,41,37,30,28,34,35,34,35,33,36,35,29,33,46,38,18,16,18,18,27,32,29,33,32,33,31,27,32,27,29,29,29,31,33,39,45,43,45,44,42,43,43,39,33,39,37,33,30,36,39,27,32,37,43,36,19,21,23,19,20,18,21,21,26,29,27,35,36,33,29,30,39,37,42,39,34,39,33,27,29,30,24,23,31,29,31,32,36,33,27,29,27,29,39,68,85,61,32,29,29,32,38,32,31,29,31,35,34,32,33,28,18,22,22,17,19,22,20,23,26,25,32,29,30,29,23,24,21,35,47,40,38,34,34,45,47,53,47,40,34,22,19,21,25,19,24,23,22,36,39,42,46,45,37,26,24,22,16,21,24,20,23,23,35,39,38,45,42,39,34,30,23,29,35,41,46,50,49,51,47,42,44,46,50,44,44,39,43,37,35,36,29,26,24,21,33,44,42,51,62,66,59,59,67,74,72,49,70,108,115,112,92,60,68,84,86,96,101,110,130,135,121,112,110,96,87,67,47,42,50,72,64,52,49,41,37,42,39,31,75,121,118,93,98,113,106,98,84,87,101,100,97,102,97,87,83,66,23,7,13,13,16,15,15,15,16,16,16,16,87,103,105,88,86,96,97,92,90,96,101,83,49,53,69,74,92,97,101,108,134,135,127,137,133,200,243,243,103,53,68,10,50,24,29,27,27,34,37,50,72,86,87,75,66,62,66,72,91,100,91,81,34,30,43,59,72,67,70,77,76,71,81,103,139,109,81,98,116,110,110,113,97,101,97,94,110,119,118,122,121,134,136,113,114,111,108,100,129,157,104,107,112,93,78,62,102,106,88,85,91,100,161,184,75,48,78,125,122,81,99,91,112,127,104,104,127,134,139,117,109,139,87,120,154,142,147,143,146,128,123,96,128,187,130,111,125,108,109,111,119,129,113,104,100,80,151,194,76,61,80,97,115,92,89,77,61,63,87,117,177,159,113,112,120,154,152,132,140,151,139,141,151,160,160,158,154,134,138,111,90,89,110,141,122,113,92,68,57,54,55,57,46,63,77,92,110,96,83,78,82,94,95,147,130,128,140,124,108,103,117,130,131,131,130,126,127,122,123,131,127,111,84,99,125,112,123,122,105,115,118,103,98,107,104,87,84,100,91,80,69,81,101,86,71,71,79,92,88,80,70,56,66,68,60,98,120,89,63,59,51,77,113,78,75,76,18,97,187,200,178,32,2,43,39,82,54,7,13,51,89,87,99,95,89,102,93,102,109,103,99,84,67,153,125,112,105,88,89,50,38,37,39,45,43,42,42,45,50,56,53,52,46,39,39,32,39,39,34,37,39,37,46,50,45,39,39,40,40,39,31,40,43,42,49,48,49,63,86,92,92,80,57,49,55,53,51,49,53,49,35,40,54,88,93,108,101,97,96,91,98,98,93,91,95,96,102,101,101,94,75,85,93,97,96,98,101,100,99,101,98,104,121,139,121,113,53,37,95,138,81,21,17,113,177,93,41,18,12,18,16,17,39,71,90,104,109,111,113,112,105,103,99,104,106,91,74,66,70,73,75,69,76,79,84,89,59,27,18,15,18,34,37,38,42,39,38,39,39,37,33,34,41,37,37,36,39,39,42,46,35,28,32,30,27,34,34,34,38,33,34,30,23,46,57,46,34,29,22,25,31,31,35,39,37,37,38,33,37,32,25,33,35,38,44,41,40,42,40,42,45,46,46,46,44,45,42,39,46,44,44,35,32,39,45,31,19,23,19,24,25,22,23,30,30,23,34,31,37,35,25,31,29,29,28,34,26,25,27,21,24,24,22,29,29,27,24,24,26,27,32,24,22,20,24,36,65,83,52,24,33,32,32,33,29,36,30,33,34,29,32,31,17,19,19,18,21,16,21,17,27,32,23,32,26,20,19,19,25,46,48,44,43,39,42,46,43,51,41,39,32,23,24,22,24,23,27,25,24,31,36,42,47,45,34,25,18,23,23,21,20,22,21,27,38,37,39,41,44,40,39,36,27,21,27,31,37,46,45,47,50,49,47,44,45,47,46,46,44,43,46,39,32,19,20,29,33,41,43,54,59,53,58,48,60,83,93,77,62,105,110,106,94,58,70,77,94,112,92,90,99,124,111,95,105,111,124,118,114,107,102,103,89,92,87,84,95,86,92,81,109,152,121,107,118,131,117,101,97,96,107,104,103,90,84,90,100,83,22,7,14,12,15,15,16,15,15,16,16,16,89,117,112,103,110,122,114,105,104,104,111,90,57,44,48,49,66,74,87,104,139,134,121,139,132,197,240,240,103,57,54,9,49,24,32,27,22,35,35,54,80,104,75,55,44,42,57,64,87,84,124,123,46,30,39,53,65,67,66,55,54,61,60,96,179,120,77,108,131,129,105,115,105,112,110,101,120,127,116,124,102,108,129,82,87,84,69,74,110,153,101,83,85,71,69,61,88,91,79,69,81,81,158,192,69,49,86,145,125,67,66,63,85,100,106,110,118,116,120,89,94,127,53,78,142,143,134,116,114,104,112,84,117,200,121,83,104,98,101,85,92,95,89,66,73,63,135,182,54,61,84,107,118,83,124,188,148,100,110,159,237,171,130,135,149,197,181,146,146,165,184,179,178,201,205,205,190,160,152,126,100,93,115,122,97,95,83,53,62,73,70,61,45,57,67,99,113,84,75,71,83,96,90,116,127,133,146,132,128,122,124,127,120,119,116,119,125,119,123,128,123,109,101,111,122,111,105,111,104,103,105,103,123,137,140,128,106,110,92,80,72,81,103,87,81,77,86,89,83,90,83,65,84,76,53,89,116,106,89,52,32,59,107,105,111,76,22,34,49,142,156,24,12,80,105,98,37,16,69,103,113,108,98,98,95,93,102,95,108,103,108,95,90,134,102,98,89,39,39,45,42,38,47,45,43,46,51,52,52,60,55,46,46,39,44,37,36,39,35,38,44,46,47,45,41,41,42,36,37,41,40,40,44,44,46,58,62,74,93,96,99,83,65,54,49,53,55,51,49,45,60,88,112,117,102,106,101,90,93,101,100,93,91,98,94,98,102,99,103,98,106,103,98,98,102,105,101,101,100,101,108,125,134,128,96,52,34,44,42,57,50,35,17,40,125,116,65,24,6,17,23,49,73,92,108,106,109,97,110,108,90,96,98,98,83,72,63,63,66,71,74,69,80,83,88,80,49,23,16,16,21,37,36,39,41,36,35,35,38,37,37,37,36,39,32,35,39,39,43,38,34,30,33,35,28,30,35,31,34,31,32,27,29,46,50,39,29,33,19,24,33,30,38,37,39,47,44,36,34,31,36,34,35,45,44,45,34,32,33,38,44,45,49,45,47,44,44,46,47,42,42,42,39,38,36,27,21,26,29,28,32,30,28,29,28,21,24,30,23,29,28,23,24,22,28,30,22,27,28,26,26,22,24,29,29,22,19,24,26,20,23,24,21,26,24,23,42,83,77,42,27,24,31,31,34,36,33,33,31,37,30,24,21,23,22,21,23,19,21,23,30,28,22,27,30,21,18,23,33,46,48,44,46,45,44,45,40,42,39,35,34,23,20,28,30,30,33,31,31,40,40,40,47,42,29,25,21,24,20,18,20,21,27,31,38,38,34,41,43,45,45,42,31,25,26,23,26,31,40,43,46,50,44,48,43,46,49,44,47,45,43,45,30,21,23,25,37,42,46,53,52,52,51,52,67,92,112,92,72,88,85,85,81,55,50,67,90,87,89,61,52,84,90,98,127,147,159,169,170,167,159,156,152,154,158,153,153,156,157,144,157,162,141,140,154,149,135,139,137,136,137,136,132,120,131,127,143,107,13,4,11,12,15,13,15,13,15,16,16,15,89,105,102,98,118,133,134,131,126,127,135,129,87,65,51,27,42,67,89,105,141,136,114,128,132,200,241,241,81,45,45,4,44,22,31,29,29,40,37,55,94,137,124,96,59,39,68,67,85,94,150,155,83,77,58,56,64,74,85,47,32,47,59,99,179,120,78,107,134,115,94,107,95,103,99,96,106,95,84,82,65,103,125,60,61,63,58,60,107,160,111,96,89,84,93,97,115,103,96,89,98,97,168,192,73,61,90,147,125,63,74,74,96,99,104,106,106,108,102,89,120,173,71,52,99,108,106,95,95,88,100,86,122,169,97,83,92,92,102,87,89,102,90,78,101,95,182,210,84,85,90,107,116,104,188,252,221,116,113,155,165,144,142,141,158,153,123,120,125,146,160,159,153,170,172,152,128,90,93,92,79,88,81,76,59,51,47,56,61,66,55,56,50,55,41,69,81,76,81,72,71,70,53,78,83,101,122,114,113,116,120,118,95,99,113,117,120,121,116,104,98,100,110,113,125,138,130,136,136,122,132,132,134,143,141,119,109,122,116,104,84,77,80,79,79,79,89,78,81,80,73,62,75,86,61,122,143,122,116,82,51,83,127,122,113,78,72,78,43,66,133,59,24,115,147,139,101,96,117,126,125,108,114,106,108,114,107,128,124,132,126,106,103,100,69,49,45,34,38,47,53,48,46,53,52,57,57,50,55,57,53,59,50,42,41,34,41,38,45,47,43,42,34,39,40,35,36,37,38,44,43,39,42,50,67,85,77,62,72,85,102,130,119,71,48,51,53,51,55,49,66,118,128,116,101,93,88,87,92,87,94,90,92,97,98,98,92,100,102,105,104,101,94,101,99,104,104,96,96,108,111,94,69,53,60,30,36,43,34,37,40,46,17,69,152,117,92,39,12,30,45,78,98,110,109,91,93,92,83,86,83,84,81,75,66,64,65,61,66,67,79,76,81,86,87,75,42,21,20,16,34,38,36,36,39,41,32,36,38,41,38,37,37,33,41,41,43,36,34,37,29,30,30,27,33,38,31,27,32,28,27,31,36,50,35,22,24,22,19,23,32,37,37,44,38,39,45,39,43,36,44,47,39,44,44,41,29,27,27,30,43,44,44,47,42,41,46,42,45,47,42,44,40,39,39,29,26,24,20,21,22,21,26,24,33,28,27,33,29,32,31,31,36,33,34,34,34,31,26,30,33,25,24,31,23,22,20,21,27,19,25,22,22,24,20,22,27,58,84,67,40,27,26,36,29,30,36,30,30,34,33,20,21,23,22,27,27,24,28,26,29,33,26,31,29,20,19,25,42,44,35,42,45,40,44,42,41,42,42,34,30,29,29,32,32,33,36,34,36,38,38,40,39,33,26,29,23,24,22,21,27,24,35,33,31,39,38,40,39,39,42,37,38,40,24,22,28,21,31,36,36,46,44,49,48,42,50,48,46,48,48,37,29,21,19,27,42,48,54,59,57,49,47,48,57,63,69,71,51,67,64,66,69,55,48,40,46,57,71,50,25,38,52,103,170,174,178,188,188,190,197,208,199,192,192,190,194,190,191,176,177,182,171,180,170,162,160,168,172,162,169,160,157,158,163,158,163,110,12,2,10,11,15,12,15,15,15,16,16,15,80,91,90,92,109,140,148,150,139,130,153,163,155,139,115,49,42,80,93,108,147,131,105,114,120,199,241,241,73,54,46,8,48,17,36,29,23,33,37,54,97,169,199,209,150,90,101,102,141,183,233,231,167,134,106,83,74,91,122,104,68,72,81,128,207,135,89,100,115,110,76,96,94,102,107,100,103,91,80,84,83,137,171,93,69,74,90,84,116,175,149,147,152,151,159,162,162,157,165,158,174,161,214,216,111,97,102,152,145,134,154,151,162,158,159,156,159,152,152,155,188,229,136,70,56,75,85,83,92,73,87,75,112,160,135,147,153,156,158,154,161,158,157,148,180,181,232,249,154,134,106,125,127,100,173,240,172,107,114,120,123,93,105,95,101,102,60,61,69,79,71,53,46,48,44,31,17,10,12,12,14,14,13,14,13,13,14,15,14,14,14,14,15,15,14,13,14,14,17,20,21,31,20,14,17,40,77,88,103,101,102,98,94,110,121,125,121,118,105,105,107,100,126,117,125,159,155,153,142,141,145,134,127,131,135,120,128,140,131,118,93,81,72,66,67,82,89,77,65,48,40,42,75,76,59,139,151,111,104,91,110,133,151,117,97,98,112,122,81,87,155,103,44,111,134,118,112,117,126,109,116,116,108,115,114,110,120,121,128,134,125,114,116,110,62,65,92,79,68,56,62,57,48,53,54,51,55,57,53,57,53,61,51,41,44,45,53,50,49,41,37,45,39,38,32,31,37,41,46,41,46,47,69,105,109,111,97,84,63,87,139,157,145,67,41,45,48,59,56,50,54,87,121,117,94,84,79,78,84,84,88,87,84,98,91,93,92,96,104,98,93,92,99,100,107,103,96,100,97,92,59,32,31,32,37,34,31,36,32,39,41,39,33,152,216,132,104,51,26,54,74,106,111,98,89,79,73,60,61,71,80,72,66,67,66,66,63,72,73,75,90,81,87,92,77,52,19,21,19,21,31,38,42,34,41,40,46,46,38,37,40,38,33,39,41,41,36,35,32,28,31,30,31,27,31,35,29,32,33,28,29,29,46,47,24,22,22,16,22,26,31,37,39,40,41,41,41,44,45,43,45,40,39,43,46,43,34,37,32,32,34,38,47,44,43,41,45,47,39,43,42,39,42,41,38,24,22,20,21,26,17,21,28,28,35,31,37,38,37,39,33,36,41,39,39,36,38,41,33,32,31,28,35,32,32,28,21,24,21,22,27,23,23,20,24,23,22,36,65,84,61,30,26,33,34,28,33,35,35,35,30,21,20,26,24,19,20,24,26,26,29,28,33,27,19,21,21,30,39,41,40,42,39,39,42,43,39,41,43,32,33,29,24,26,26,30,32,28,36,41,38,38,38,32,23,24,23,22,23,29,24,29,31,31,34,36,38,40,41,41,41,37,34,33,30,24,21,26,23,27,29,34,40,44,51,50,50,51,48,50,56,42,23,22,21,31,50,60,62,67,55,51,51,44,51,48,45,46,44,47,50,58,57,57,53,53,63,65,96,79,39,22,23,120,196,194,205,205,206,217,229,238,220,206,213,212,214,217,212,199,200,202,199,198,190,187,186,199,199,194,203,198,188,180,188,184,193,115,7,2,8,11,14,13,15,13,15,15,15,15,82,90,83,97,102,130,139,138,130,130,160,194,241,250,223,112,79,105,100,102,141,130,105,110,116,193,244,243,87,110,81,12,51,22,32,29,25,36,34,47,79,155,218,250,217,125,140,136,213,247,252,252,185,167,147,139,137,142,178,157,138,152,155,188,246,184,120,110,133,132,117,134,135,147,149,145,152,150,141,155,160,206,235,154,127,162,205,162,157,179,176,194,203,196,195,194,193,203,207,212,213,188,199,186,152,148,136,170,185,195,209,203,206,203,204,201,196,197,204,194,191,194,164,131,74,66,71,74,68,58,71,86,136,179,186,206,202,206,208,199,199,206,193,197,214,195,200,185,174,168,106,120,107,65,92,129,136,110,93,93,64,21,11,7,22,28,13,23,31,38,44,45,49,133,79,60,33,16,15,15,14,14,13,14,14,13,15,14,14,14,13,13,14,14,13,14,13,13,14,14,15,18,20,18,15,16,38,67,81,85,92,118,123,129,133,125,125,107,89,103,108,117,146,137,132,154,150,137,124,117,117,111,112,105,121,134,129,126,118,113,107,115,101,78,79,95,94,75,71,60,42,48,78,79,47,89,111,110,107,88,109,131,136,111,89,89,105,89,76,68,154,153,51,88,96,66,64,72,98,98,92,92,102,99,92,92,87,93,80,92,95,90,87,84,59,66,113,120,116,101,105,86,57,47,43,50,49,53,46,44,51,55,47,47,50,48,51,50,41,39,44,41,35,33,34,38,37,41,45,47,57,80,137,150,114,99,96,119,108,117,138,107,60,27,35,47,48,54,57,55,44,57,81,97,91,81,75,72,84,83,89,83,88,90,91,93,88,97,95,96,93,95,106,107,101,97,100,96,88,77,64,41,31,32,32,35,33,31,35,35,35,37,34,162,183,118,136,51,42,83,101,114,78,67,73,64,62,63,64,73,71,63,62,67,70,76,71,70,90,104,96,102,95,65,38,23,17,15,17,22,36,41,45,35,36,41,45,48,41,38,37,39,36,36,40,34,32,31,32,29,33,29,27,31,28,35,32,30,28,32,31,27,49,36,17,25,17,18,19,25,36,36,43,39,41,43,42,40,44,41,41,46,40,42,40,37,39,41,39,36,35,44,46,43,43,47,46,38,45,43,39,44,41,38,39,24,20,24,14,21,21,21,23,31,41,36,36,39,37,36,37,37,41,35,40,37,35,38,38,36,36,34,33,31,30,25,23,28,22,19,27,24,22,23,24,22,21,27,41,73,81,54,26,24,33,33,33,33,39,35,27,19,19,22,19,21,22,21,24,24,29,33,24,26,19,29,31,33,44,36,39,38,44,41,41,42,37,41,39,39,41,31,29,30,24,29,29,28,34,42,40,39,37,29,23,24,23,24,23,18,22,25,31,33,36,41,40,37,39,43,34,40,41,39,38,25,29,23,24,27,25,25,34,34,46,50,49,50,50,50,53,44,24,19,20,38,65,76,73,57,53,52,55,54,48,40,35,44,39,46,45,42,45,57,76,95,108,113,147,105,63,36,53,176,213,208,217,206,216,226,228,225,208,207,214,208,210,218,217,205,202,202,204,206,203,203,204,216,220,215,226,217,208,205,207,213,216,113,5,2,8,11,12,12,15,13,15,15,15,15,73,89,78,90,94,117,125,127,124,130,189,237,249,249,250,160,116,128,117,124,146,151,159,160,141,198,247,242,104,169,126,31,53,14,32,27,25,32,35,53,69,105,155,238,209,141,159,152,229,252,247,221,173,177,179,194,192,194,201,195,194,201,191,201,228,188,156,147,160,184,183,190,190,192,200,196,196,194,195,200,193,210,212,183,201,243,252,235,199,208,197,203,202,194,206,225,218,198,208,195,190,128,92,97,129,183,175,189,192,199,209,200,201,195,196,196,191,191,188,170,105,81,130,172,124,78,59,59,63,54,85,124,168,194,203,206,205,207,196,189,193,189,200,196,199,148,87,85,137,176,113,94,83,65,83,122,137,102,95,81,36,15,24,24,57,84,110,155,179,213,242,249,253,253,249,237,227,227,224,224,217,214,212,210,210,204,200,205,211,202,196,199,194,191,180,173,170,166,166,156,160,165,174,171,128,112,97,67,80,86,102,123,127,109,107,122,113,105,88,81,87,95,139,141,134,144,121,119,111,96,103,95,102,103,121,131,111,108,126,133,125,131,122,104,90,96,84,68,71,71,73,77,93,74,47,63,83,100,88,64,86,117,121,89,73,73,63,57,53,50,126,163,61,45,69,34,22,32,59,65,69,81,84,81,73,73,76,65,56,55,56,58,53,56,42,36,43,70,102,98,113,116,104,92,76,84,96,87,70,64,56,50,42,50,49,41,43,38,46,39,39,41,34,39,39,44,43,42,50,54,72,83,141,145,96,103,105,137,135,108,60,20,24,27,66,64,54,55,51,60,54,46,46,55,65,67,71,83,91,90,95,92,93,95,93,99,97,97,99,97,90,101,106,95,106,95,83,82,79,95,97,80,47,30,31,35,34,32,32,32,31,42,29,78,91,76,136,65,33,62,60,66,51,57,60,57,72,73,74,75,66,50,61,71,71,76,71,81,93,92,95,84,53,30,19,21,14,17,23,31,42,39,39,38,39,37,38,39,45,38,36,42,39,39,35,33,36,29,26,33,35,30,34,34,31,31,31,32,29,33,31,36,32,20,21,24,19,16,18,24,35,36,38,39,39,39,39,38,44,42,39,42,41,39,39,39,42,45,40,43,42,42,45,42,45,44,43,43,44,41,36,41,44,38,34,24,17,19,22,17,16,18,26,37,39,37,42,41,37,38,36,39,39,39,37,38,37,36,38,37,32,29,29,29,25,28,24,29,22,28,33,22,27,27,22,25,26,29,29,46,78,69,40,22,29,33,33,35,33,33,21,15,22,19,19,24,19,21,20,24,35,27,24,25,22,21,27,37,39,39,36,39,39,39,48,44,40,40,39,45,38,35,36,29,33,28,29,35,39,43,36,38,37,28,25,20,20,22,20,21,22,23,29,37,34,38,36,39,42,37,41,39,43,40,38,33,30,25,22,25,19,25,33,29,33,39,44,42,44,53,50,40,21,25,24,44,90,97,95,85,87,99,99,83,79,78,84,89,95,79,37,32,36,50,62,83,98,103,128,105,90,66,136,229,213,211,213,208,221,221,201,198,199,208,214,193,193,208,204,199,192,198,211,206,207,206,205,218,218,218,225,219,208,205,219,225,222,113,5,2,8,12,13,13,14,13,15,16,15,15,66,79,75,106,104,121,127,129,131,137,204,240,249,249,250,139,105,148,152,160,174,201,245,245,196,214,250,226,105,193,126,22,45,14,35,26,24,36,29,52,72,116,132,147,148,130,160,144,179,217,183,156,154,181,180,190,199,198,200,194,194,192,168,118,106,138,178,185,194,202,201,208,198,197,194,191,192,193,192,188,164,85,103,144,223,251,252,234,189,201,177,182,185,188,226,250,209,190,187,174,129,41,24,22,80,169,200,213,192,191,188,187,188,182,187,178,184,176,165,110,40,36,73,158,168,108,58,60,74,92,128,157,189,197,197,196,190,191,184,179,177,192,194,184,161,77,50,50,92,165,148,117,89,89,100,132,148,118,112,169,231,243,252,252,252,252,252,252,253,253,252,252,252,252,252,252,252,252,252,252,253,253,252,252,253,253,253,253,252,252,253,253,253,253,253,253,253,253,252,252,253,253,253,253,252,252,229,168,112,98,101,117,110,89,96,112,110,106,109,92,64,78,111,125,131,127,110,109,119,117,98,119,134,110,127,131,112,131,143,134,110,110,123,105,92,81,59,59,57,79,90,81,77,54,46,69,76,71,68,48,75,103,105,94,76,77,86,67,58,44,96,168,78,23,33,26,41,19,32,56,66,81,63,62,61,49,62,71,71,55,47,50,40,53,44,35,36,37,62,77,89,110,134,130,113,131,158,159,148,134,78,30,34,45,68,47,48,42,45,43,40,41,44,42,41,43,50,64,81,95,74,48,119,137,108,128,120,112,65,28,20,20,36,70,117,105,73,53,50,55,48,39,29,38,58,78,91,93,93,92,97,98,101,104,106,96,96,104,98,101,96,103,103,105,92,75,70,79,87,100,113,113,91,51,34,34,32,35,31,34,33,33,33,53,45,39,85,54,31,29,32,52,51,56,53,70,80,81,77,71,71,68,67,72,76,74,74,84,83,77,62,42,23,17,21,15,19,24,43,49,41,43,37,41,42,35,34,36,35,37,43,38,39,34,34,33,28,29,29,32,30,35,34,31,33,33,32,26,34,33,29,37,25,20,24,17,21,21,19,29,35,36,34,38,37,38,36,36,47,39,41,43,43,42,41,41,37,42,41,39,43,43,42,39,43,48,39,43,42,41,47,43,39,35,27,21,22,17,22,21,15,26,30,35,38,39,39,37,37,34,41,38,35,39,36,38,35,40,35,32,34,23,23,29,30,29,33,34,40,36,33,33,37,35,30,37,33,29,29,28,57,81,66,38,24,25,29,30,35,27,18,21,19,17,17,19,21,19,23,28,24,26,25,22,18,18,37,41,39,38,39,35,38,36,39,36,41,40,35,41,38,40,38,30,28,31,35,38,37,39,40,36,33,25,19,20,18,21,21,19,20,21,30,34,27,33,34,28,32,31,36,39,34,35,33,33,33,29,25,22,24,23,22,27,29,34,37,42,43,47,45,40,26,61,153,171,147,129,118,122,141,146,139,141,137,143,157,156,160,148,107,86,58,51,55,68,74,80,113,113,98,113,197,230,207,211,205,208,218,200,185,191,200,211,203,183,186,199,200,199,196,202,204,202,207,204,200,210,208,205,216,201,193,202,214,214,208,114,5,0,8,11,13,12,13,12,15,15,14,15,62,82,72,118,123,124,141,137,148,151,195,239,245,245,236,114,110,145,161,182,201,243,252,252,247,240,252,220,108,165,74,13,39,14,32,23,26,33,35,53,70,112,105,77,66,82,134,128,117,137,141,146,177,179,171,181,184,190,182,179,171,166,100,22,11,59,155,199,209,199,180,178,177,169,164,164,167,178,171,160,73,1,10,55,178,246,248,214,166,152,142,169,174,185,210,212,186,181,179,139,53,9,17,20,36,104,182,217,201,184,181,184,182,181,180,183,185,174,124,68,40,45,61,103,167,155,89,64,81,122,159,182,197,188,189,187,183,179,171,170,180,190,179,155,86,32,34,37,46,95,139,111,63,35,41,83,105,96,173,250,252,252,253,253,252,252,253,253,252,252,253,253,253,253,252,252,253,253,252,252,252,252,253,253,252,252,252,252,252,252,252,252,253,253,253,253,252,252,252,252,252,252,252,252,252,252,251,251,210,130,96,105,109,90,108,137,115,118,120,109,100,103,117,121,129,126,114,115,118,117,125,139,137,97,117,137,129,151,114,79,68,84,117,105,98,79,42,59,81,80,62,53,56,51,42,62,74,74,71,53,44,57,95,95,82,93,92,79,72,74,73,152,99,10,45,59,67,55,33,42,83,86,103,102,48,33,57,93,88,73,71,51,51,67,42,46,39,35,57,78,94,101,132,137,119,121,139,143,121,87,54,36,38,75,95,92,63,45,53,42,46,49,48,48,44,61,90,110,110,111,108,63,111,134,125,130,64,35,20,15,34,71,105,115,129,130,122,59,53,73,34,16,21,60,90,95,100,99,97,97,105,104,103,100,96,97,98,106,96,96,98,106,103,83,77,79,83,86,92,98,112,124,125,98,50,31,30,36,36,34,35,31,27,54,58,41,99,68,27,21,19,45,53,62,67,75,83,78,69,73,83,73,76,75,71,82,81,91,76,49,34,17,17,17,19,23,29,48,59,53,39,38,37,42,40,36,40,39,36,41,39,36,32,35,37,32,35,36,33,33,35,34,37,33,30,32,31,34,36,32,33,43,37,28,27,27,24,23,22,22,29,26,28,32,29,38,41,43,42,41,41,43,40,42,44,43,37,42,47,44,45,41,44,42,40,44,42,43,42,42,42,41,42,34,22,25,19,18,21,21,22,25,30,30,29,34,38,35,36,37,38,39,36,41,39,36,43,37,33,25,20,21,26,29,33,29,24,38,36,30,26,27,31,27,37,32,33,31,26,34,39,62,81,64,34,24,30,29,29,22,17,18,18,23,18,19,19,22,26,27,22,21,22,18,24,29,32,32,31,31,37,39,37,36,34,36,35,35,39,38,36,37,36,32,34,34,33,41,34,39,39,36,28,21,24,16,16,19,16,25,19,29,36,37,31,27,29,18,23,24,23,27,24,26,24,33,33,27,27,27,24,23,27,25,26,29,32,27,42,45,50,27,51,212,250,250,223,171,154,141,150,149,152,160,163,170,170,171,178,169,160,143,107,89,77,81,84,100,123,116,111,158,219,216,203,206,197,194,193,183,177,193,199,201,194,182,190,201,201,204,196,189,197,197,203,196,195,206,198,198,200,190,182,196,210,200,200,114,6,1,8,11,13,12,13,12,15,14,13,13,83,96,92,134,123,124,139,142,149,147,196,237,248,248,235,130,122,144,144,169,200,250,252,252,252,252,252,221,134,175,68,17,40,8,35,27,29,36,36,57,76,108,73,48,59,71,115,125,141,163,166,177,179,185,177,168,178,176,179,164,163,123,49,16,17,33,90,170,206,193,164,155,157,155,157,150,165,168,165,119,51,34,37,31,77,170,215,195,164,149,160,184,171,180,176,169,174,173,163,82,27,34,32,33,29,43,130,200,205,194,179,176,185,181,184,180,175,146,79,61,76,89,84,68,125,176,136,94,102,146,171,195,189,173,179,172,178,177,172,173,184,175,159,102,33,21,34,31,24,22,62,99,74,58,26,23,33,37,54,120,235,248,252,237,223,209,205,188,179,156,138,112,135,231,252,252,251,251,252,252,252,252,252,252,252,252,251,251,252,252,252,252,252,252,252,252,252,252,253,253,252,252,253,253,253,253,252,252,252,173,92,74,87,93,104,130,119,116,104,111,127,140,140,127,132,132,120,125,123,108,105,118,105,70,114,137,137,127,84,59,73,94,100,100,107,102,75,80,84,87,72,56,62,75,60,50,60,69,77,52,45,27,55,87,94,106,98,91,91,105,84,150,124,30,85,140,144,115,83,76,108,157,169,122,46,24,71,93,88,77,84,87,83,105,91,82,68,65,78,92,110,109,111,115,98,73,64,56,54,63,83,89,93,101,88,91,78,89,88,66,63,53,53,50,53,81,119,129,113,110,119,104,131,131,84,51,7,17,21,55,95,117,125,112,113,122,132,107,117,86,28,9,34,99,114,105,105,101,101,106,105,108,99,92,102,104,107,104,89,97,110,97,87,81,77,81,93,98,94,106,110,119,114,107,50,23,30,32,33,39,33,36,28,63,68,76,196,114,17,19,14,47,59,76,78,78,77,69,71,70,78,82,79,73,82,89,88,71,42,23,16,17,19,18,25,40,57,65,59,48,38,37,41,42,42,41,39,39,42,33,36,37,33,36,33,37,36,33,31,36,37,40,39,33,36,33,33,37,42,37,39,57,51,52,49,37,39,38,30,27,30,19,29,34,27,33,37,41,38,41,40,36,41,41,41,37,43,45,42,41,39,45,43,36,42,44,44,42,43,39,39,42,42,29,19,25,26,29,32,29,28,30,27,35,30,32,41,33,33,38,39,41,38,35,39,35,37,37,29,17,17,34,32,21,28,24,27,28,27,29,22,25,21,26,29,27,25,32,31,29,36,41,73,78,54,30,24,29,29,24,18,19,20,23,18,18,21,27,25,27,21,21,25,18,23,24,30,30,31,32,36,34,34,36,34,40,31,38,39,33,37,34,37,36,34,40,36,35,36,32,38,31,27,26,22,16,20,21,21,23,20,35,30,33,32,24,27,27,25,21,24,23,24,24,25,31,35,37,29,27,26,25,31,26,25,27,24,31,37,35,45,18,79,211,247,238,229,217,196,167,147,129,128,141,143,149,154,158,163,162,170,171,152,141,122,103,113,130,116,96,122,201,230,215,217,215,201,187,181,183,192,200,198,203,204,199,202,208,210,209,194,197,201,199,198,188,196,203,194,195,198,190,191,199,203,198,205,113,5,1,7,11,13,12,12,12,14,15,14,14,98,130,113,144,126,115,128,113,125,122,189,244,252,252,238,200,149,125,118,124,161,167,199,244,251,251,252,217,144,186,76,21,37,16,38,24,28,39,39,63,98,153,130,91,101,104,134,150,170,190,186,188,183,184,178,169,167,175,172,165,132,70,59,67,71,60,47,108,184,199,180,165,163,174,171,170,166,160,122,86,112,130,131,79,41,109,200,189,170,172,205,198,160,165,163,165,168,155,89,43,56,89,105,101,60,34,75,152,200,205,188,180,191,189,189,177,150,90,62,93,100,111,114,74,88,152,174,141,139,165,186,194,181,175,175,176,179,183,181,186,186,171,129,102,107,105,113,104,96,120,160,230,251,251,209,144,70,25,16,10,90,141,145,146,177,237,252,239,229,189,135,115,199,253,252,252,251,251,252,252,252,252,252,252,252,252,248,249,252,252,251,251,252,252,251,251,253,253,251,251,249,249,212,137,186,246,245,245,252,169,79,60,60,83,96,127,126,120,100,101,125,131,136,125,108,115,126,124,128,111,81,95,101,74,118,142,120,127,97,89,95,97,96,97,117,113,99,101,123,121,104,100,93,83,66,66,54,63,77,49,42,27,46,80,96,94,89,105,105,131,110,150,153,53,101,144,146,133,89,114,174,194,169,111,46,33,61,79,68,73,92,79,95,147,146,133,122,111,94,74,85,96,89,66,63,70,66,71,97,131,150,139,129,109,75,71,83,95,107,103,98,88,89,97,73,73,98,110,120,119,134,131,145,109,28,12,12,53,84,110,126,119,122,118,119,120,123,105,131,82,20,12,47,120,116,111,110,104,106,105,105,96,96,100,108,108,101,94,99,106,97,78,67,78,84,90,93,98,103,109,113,113,85,38,20,26,24,29,35,35,32,37,29,55,69,92,212,146,39,22,27,66,75,81,75,80,73,68,74,72,76,74,84,85,89,89,57,33,19,18,21,17,19,32,51,63,70,69,61,48,38,36,34,39,44,36,40,40,36,36,29,35,34,31,34,35,33,33,34,36,39,42,38,37,38,33,39,36,40,40,42,69,70,70,66,64,66,54,49,28,23,26,22,27,23,30,33,39,41,38,39,37,41,38,39,43,38,41,40,37,46,41,38,40,42,46,43,44,39,39,41,40,39,27,26,37,39,38,36,40,38,32,34,34,31,36,34,36,37,35,37,37,37,37,36,31,37,36,25,17,27,36,24,21,23,26,27,25,24,19,21,19,23,23,18,25,24,24,31,27,35,33,44,81,75,51,25,21,29,19,21,30,28,29,31,25,30,29,23,21,17,23,17,22,22,27,29,29,33,30,35,35,33,30,33,35,29,32,34,36,34,30,35,32,35,38,33,34,33,34,29,31,29,17,22,22,22,19,23,26,27,40,32,33,29,24,31,27,28,29,30,29,30,34,24,33,37,36,32,27,24,27,27,25,29,23,29,29,34,26,44,124,196,229,194,185,191,198,208,203,200,162,132,121,114,123,128,137,143,144,157,165,165,165,150,136,147,146,102,95,166,231,235,234,242,236,225,213,216,221,229,230,222,232,235,230,227,232,229,224,221,220,220,213,213,211,214,210,206,208,205,201,204,208,211,210,211,113,4,0,7,10,12,12,14,12,14,14,14,14,92,120,117,150,125,108,107,103,88,53,111,215,249,249,242,215,104,89,79,87,83,50,56,107,139,192,245,187,144,175,61,41,34,19,36,21,34,34,39,69,123,199,184,150,137,136,162,177,193,197,188,184,183,183,180,166,168,166,167,141,74,52,101,130,120,88,47,56,118,191,207,188,183,181,193,178,163,124,49,86,155,187,175,124,86,117,198,169,161,188,201,186,137,141,149,144,153,86,24,24,91,171,162,168,108,37,57,96,171,206,189,176,182,189,180,151,100,52,61,108,90,103,130,77,66,96,146,138,128,171,192,194,181,177,186,185,190,193,196,186,173,116,92,149,152,162,185,225,252,252,252,252,252,252,253,253,243,215,160,122,170,210,241,252,252,252,252,252,252,252,252,252,253,253,251,251,252,252,252,252,250,249,252,251,252,252,250,252,252,252,252,252,252,252,253,253,251,251,252,252,229,121,48,38,86,168,141,158,173,110,88,70,103,117,116,137,141,138,104,108,113,93,98,99,86,77,84,98,112,97,92,125,116,114,148,123,114,118,120,112,102,95,89,99,110,99,110,122,115,114,108,94,79,76,66,109,97,58,71,61,58,50,83,92,87,89,97,103,106,129,122,156,177,66,63,112,99,92,102,174,194,153,147,110,48,28,54,75,69,72,73,78,113,158,151,154,150,126,119,80,80,98,99,87,93,120,122,122,136,165,172,137,118,113,89,93,89,92,116,112,98,107,124,113,91,63,55,92,129,145,133,103,76,59,75,92,103,120,130,146,138,139,139,130,127,121,125,113,117,84,53,29,74,122,109,113,111,108,110,99,101,99,101,105,105,99,86,97,104,89,95,84,80,84,87,95,99,101,87,113,93,47,45,16,16,16,22,37,38,29,37,34,34,39,52,66,143,167,97,42,59,96,86,79,71,76,76,73,74,75,71,73,88,83,67,43,27,17,16,18,17,26,42,58,66,64,69,70,61,48,28,34,39,34,41,39,36,33,32,34,37,37,31,35,35,34,33,31,35,37,36,42,35,36,40,38,40,36,36,41,45,73,84,77,80,81,87,74,58,35,17,21,24,23,25,24,29,35,34,34,39,40,38,36,38,39,40,38,39,41,45,44,44,41,41,43,42,41,36,40,39,38,39,23,27,36,36,39,34,31,31,30,33,37,28,35,38,30,37,38,38,36,37,38,36,30,35,33,25,18,28,24,17,18,22,25,23,23,25,19,21,27,27,27,23,23,20,24,27,33,28,28,32,59,86,72,41,24,22,19,26,35,33,38,36,37,36,31,27,19,19,22,22,21,23,27,32,32,29,28,35,35,32,32,32,33,33,36,30,31,35,37,34,32,34,36,31,32,30,28,36,29,22,26,33,31,31,27,34,31,31,32,32,35,31,29,29,36,34,35,34,32,36,35,39,36,37,35,30,30,24,27,25,27,25,29,24,26,32,33,184,246,250,250,216,198,183,171,180,198,215,205,179,155,131,113,108,113,122,129,142,149,152,159,155,152,162,148,120,147,188,210,210,213,226,226,221,222,225,229,238,234,231,237,239,238,233,236,237,235,229,230,233,230,236,230,230,224,223,225,208,207,217,222,226,218,217,113,2,0,6,10,12,10,13,12,14,14,14,13,90,110,105,152,130,118,113,107,111,75,60,100,162,211,229,143,50,51,55,69,68,42,39,56,57,135,219,128,115,151,52,37,32,20,34,19,34,36,39,60,98,171,174,147,154,156,179,191,196,194,184,182,176,181,177,171,162,156,146,77,36,58,133,152,143,115,66,39,54,154,194,184,156,150,162,152,142,65,19,84,162,183,190,184,165,152,160,142,149,174,178,150,125,149,146,127,112,41,9,45,108,177,177,191,127,46,55,50,114,181,175,123,120,136,146,116,58,44,71,111,87,94,120,72,65,69,98,111,109,169,208,197,188,188,195,200,200,210,193,173,122,37,45,113,171,242,252,252,252,252,253,253,253,253,252,252,253,253,252,252,252,252,252,252,252,252,250,250,252,252,252,252,253,253,252,252,250,252,251,250,245,249,252,252,251,251,252,252,252,252,252,252,251,251,253,253,251,251,249,237,94,36,41,45,77,89,83,127,153,68,81,108,131,154,131,139,123,125,107,91,79,70,88,87,61,49,67,81,95,110,113,140,118,104,135,122,110,122,139,123,88,83,91,96,99,100,118,118,108,108,95,74,73,47,51,109,77,55,62,97,133,121,126,116,104,125,148,142,137,143,139,157,188,102,52,94,130,137,136,192,150,115,135,87,46,35,77,92,62,66,71,89,125,149,152,148,143,145,151,132,108,105,105,85,89,120,131,130,131,151,150,112,116,123,100,52,87,148,146,131,87,79,104,118,89,83,77,82,127,111,63,17,7,84,146,128,123,134,139,140,144,142,129,119,117,112,106,100,112,92,97,99,102,121,98,105,111,103,101,101,107,108,105,96,106,83,79,103,106,105,103,105,107,109,100,102,107,108,96,68,35,20,19,12,20,35,53,71,56,37,33,33,33,33,36,25,83,174,170,85,59,78,81,83,67,77,77,71,74,73,79,83,69,49,33,17,16,24,15,18,36,49,66,69,67,72,71,69,64,43,31,36,36,30,35,36,34,34,33,41,38,36,33,34,34,38,34,30,36,38,39,39,36,35,39,41,40,39,45,39,40,74,77,76,77,79,85,72,65,39,19,19,21,21,28,29,25,29,26,32,34,31,35,37,36,40,35,38,39,36,48,44,39,44,38,44,42,38,39,36,37,42,34,28,25,29,36,37,33,31,33,35,29,33,33,35,35,31,39,34,37,38,34,39,32,35,36,33,30,18,28,24,18,18,18,22,21,17,21,24,29,40,42,39,29,26,20,24,29,29,30,27,29,36,69,89,63,35,16,14,18,30,34,37,37,36,37,29,26,18,19,22,19,21,24,27,27,26,32,30,33,31,30,32,30,35,28,32,32,32,33,31,34,32,37,36,32,35,34,31,29,25,21,21,39,39,38,33,34,42,37,35,27,32,31,31,33,31,39,33,34,36,35,38,33,34,32,35,36,31,31,27,26,34,24,32,27,34,20,70,245,246,250,250,247,245,218,196,178,170,184,185,197,203,194,168,143,127,122,145,153,145,149,157,162,158,165,165,156,170,169,163,143,130,136,133,143,154,152,158,167,166,170,173,182,183,178,187,184,189,184,188,199,201,207,199,203,204,208,210,191,193,202,212,221,219,219,113,3,0,7,11,12,12,13,12,15,15,14,14,90,108,95,139,123,113,122,157,192,190,163,110,113,108,87,39,37,76,79,98,93,74,84,93,83,138,184,85,78,97,24,36,29,28,36,16,34,34,41,62,86,124,127,134,150,171,193,190,191,187,182,176,175,174,174,168,147,142,84,25,34,64,128,144,145,135,132,113,60,103,152,150,115,97,125,139,74,29,29,92,151,165,189,222,223,190,190,125,116,167,164,125,122,179,184,132,123,74,33,43,101,181,176,203,124,44,61,37,73,136,148,98,77,112,128,71,49,60,76,107,59,84,113,71,74,106,185,222,194,181,206,201,190,185,185,180,168,181,177,145,89,91,181,250,253,253,252,252,253,253,250,248,252,252,252,252,252,252,253,253,253,253,252,252,250,247,250,252,252,252,252,252,252,252,251,240,229,252,252,249,248,250,252,252,251,251,252,252,252,252,252,252,252,251,253,254,250,250,222,122,71,64,83,91,73,70,61,113,128,78,90,83,125,134,116,107,90,84,70,71,68,76,101,110,88,81,96,111,119,120,148,140,89,78,130,137,123,100,100,104,95,97,97,98,91,95,116,129,120,106,98,83,84,69,31,43,46,77,102,134,129,161,156,123,120,137,146,137,131,134,121,117,163,126,45,66,103,113,84,88,84,53,67,66,49,32,60,87,51,57,84,112,126,145,138,126,139,145,189,168,138,98,64,60,55,78,77,92,105,117,117,120,130,113,73,16,53,144,159,132,96,57,64,89,115,117,97,83,57,30,12,13,45,98,127,100,81,83,99,110,96,103,96,84,81,74,86,81,85,81,83,89,98,105,98,112,104,100,105,99,99,97,102,113,123,117,75,88,122,103,69,63,100,116,115,98,110,87,43,25,15,19,19,20,46,69,91,110,97,63,39,27,34,31,30,34,65,155,187,97,38,53,69,76,70,74,74,71,77,79,76,57,35,23,21,17,22,21,27,46,57,64,66,69,77,69,69,70,61,40,33,35,34,34,35,36,35,36,36,41,39,35,37,38,38,37,34,36,37,37,40,36,39,42,43,43,45,44,38,35,46,71,66,63,64,60,69,61,55,39,17,23,15,23,31,28,29,24,28,30,31,27,27,34,29,30,34,32,32,33,37,36,39,39,41,45,38,44,44,33,34,35,34,24,21,34,39,39,38,35,39,33,31,32,25,31,27,33,34,33,38,36,36,37,41,29,30,36,27,23,33,22,20,21,15,23,19,22,22,17,27,30,34,39,31,27,23,24,26,32,27,27,31,28,41,74,86,58,22,13,16,24,37,36,33,36,36,35,27,18,20,23,18,21,25,25,27,25,24,24,28,27,28,29,30,33,29,29,31,32,30,32,31,32,33,33,33,30,32,31,24,25,23,21,35,39,36,41,43,40,37,37,27,29,36,26,33,33,26,33,29,34,36,29,34,32,34,37,35,33,31,31,26,30,30,33,21,40,15,92,246,246,249,249,248,249,243,232,216,196,180,172,185,205,220,229,237,212,174,177,177,173,176,178,180,177,183,181,174,171,162,141,97,51,21,12,20,28,27,36,39,39,42,46,55,58,61,63,71,75,79,83,93,101,108,109,117,129,140,142,130,136,152,164,182,178,191,114,7,3,9,12,14,13,14,12,15,16,16,16,102,119,93,123,101,101,137,211,243,243,240,180,140,87,42,46,71,106,131,154,146,109,123,131,118,172,191,85,66,63,23,41,32,30,31,25,30,35,46,64,95,132,129,130,162,182,188,181,178,173,173,173,168,162,165,155,127,76,24,12,33,53,78,87,105,129,205,189,63,76,111,136,126,117,141,75,28,23,44,117,162,173,185,227,204,165,181,109,96,173,194,125,118,230,250,165,158,134,69,40,90,177,171,198,118,40,61,39,53,80,151,151,141,148,83,54,70,81,87,110,83,98,113,87,93,174,251,251,227,147,168,173,159,133,107,92,82,103,113,139,211,249,253,253,252,252,252,252,250,250,250,251,252,252,252,252,252,252,252,252,252,252,251,249,250,251,252,252,252,252,252,252,252,252,251,244,230,252,252,252,251,251,252,252,252,252,252,252,252,252,253,253,251,251,249,237,190,165,95,57,57,54,55,71,81,68,57,77,96,97,112,94,98,103,85,83,83,79,74,95,80,88,130,130,103,102,99,104,115,109,126,134,111,121,140,135,99,57,76,81,93,107,103,102,103,103,110,121,106,110,89,77,112,93,78,43,45,109,105,101,98,92,89,73,63,72,72,52,57,48,48,36,95,142,59,10,19,35,36,37,35,44,48,49,56,38,59,76,54,56,89,141,149,131,120,125,132,150,185,168,167,150,131,115,104,97,67,74,92,105,104,128,131,108,95,24,20,73,100,113,103,81,63,92,134,117,61,31,17,9,48,97,108,104,97,66,54,41,51,62,65,66,65,67,59,57,62,65,69,62,66,68,79,96,92,98,99,103,108,94,101,112,132,148,152,97,42,67,86,61,32,27,42,86,105,83,49,31,23,16,15,20,37,57,76,99,122,131,142,118,66,34,38,40,36,59,72,103,146,133,74,40,62,74,75,78,73,78,77,73,49,23,20,17,18,21,19,37,52,65,66,66,73,67,71,72,68,67,61,41,39,39,32,37,35,38,36,38,45,44,45,42,35,37,38,35,42,41,41,35,38,40,39,42,45,47,37,43,44,42,47,60,54,42,42,40,43,42,41,24,23,24,20,29,24,25,24,28,27,32,32,24,32,27,28,32,34,26,27,33,29,31,34,35,33,36,38,40,38,31,32,34,27,20,27,34,36,40,34,33,34,34,31,26,27,27,31,32,29,29,39,35,29,35,33,33,31,27,21,19,35,24,18,21,17,22,23,22,19,22,26,24,32,31,29,25,14,22,25,29,33,27,27,30,24,49,83,79,45,18,14,25,37,40,39,37,43,34,29,23,18,19,22,20,22,29,24,27,27,24,31,28,26,27,26,32,31,25,29,29,29,31,27,29,32,31,30,24,29,27,28,24,18,23,35,38,46,47,43,42,41,39,26,26,29,30,33,28,29,32,29,33,32,32,35,34,35,33,35,36,32,28,33,27,26,35,22,41,15,122,247,247,249,246,241,247,245,247,241,233,220,206,196,186,213,234,252,252,176,166,171,169,175,169,163,158,163,166,155,151,144,127,87,39,15,9,11,12,14,13,14,15,14,14,14,15,14,15,15,15,15,15,15,15,15,16,23,29,38,44,38,51,61,76,92,91,110,94,19,5,13,12,15,12,16,15,15,16,16,16,113,134,105,126,97,84,113,188,222,222,192,115,124,88,59,96,108,104,129,154,153,123,139,155,145,193,197,85,74,63,31,50,29,27,32,28,34,32,50,74,115,155,147,156,178,183,182,174,169,170,174,169,160,160,164,152,98,50,21,32,56,49,49,44,61,91,181,155,41,83,109,113,134,109,98,32,13,30,69,140,182,181,201,226,168,152,182,120,111,183,229,155,132,236,252,208,169,174,122,63,102,170,126,126,69,39,66,43,59,58,115,169,163,110,64,57,83,102,84,110,101,118,113,110,131,147,204,202,132,72,91,99,90,76,60,65,79,98,123,187,251,252,252,252,252,252,252,252,250,251,252,252,252,252,252,252,252,252,252,252,252,252,250,251,252,252,252,252,252,252,252,252,252,252,252,252,252,252,253,253,252,252,253,253,252,252,253,253,253,253,253,253,252,252,252,252,160,88,43,41,59,31,47,65,86,68,61,102,105,127,156,136,119,107,86,92,120,109,103,92,89,132,139,117,91,87,77,83,96,93,117,119,126,124,133,132,99,58,59,69,66,87,88,96,97,88,109,113,104,109,79,78,114,112,97,66,41,66,63,48,41,38,40,39,38,41,36,25,36,25,31,13,36,139,78,15,16,35,42,37,44,41,48,53,63,48,56,92,66,79,112,149,162,135,147,160,163,164,168,168,178,151,131,115,112,134,111,101,104,118,97,95,112,107,90,29,6,12,11,66,78,83,92,118,91,45,20,14,24,78,120,140,153,125,76,42,24,21,30,35,37,42,39,38,43,41,42,38,42,45,43,54,71,84,81,87,94,103,105,101,115,139,143,113,76,48,39,44,42,37,32,32,29,39,57,55,26,21,16,15,22,49,64,89,110,128,128,129,136,133,108,91,104,64,39,66,61,75,105,150,125,56,50,63,79,86,81,73,48,35,27,21,19,17,21,29,50,63,68,70,69,77,75,72,68,59,61,57,51,44,40,39,39,38,30,39,38,38,43,43,41,40,39,37,34,38,43,37,42,42,37,40,39,43,42,42,46,46,39,41,50,52,39,34,33,26,28,29,31,24,20,27,19,26,25,19,21,28,25,24,29,34,31,24,29,33,28,27,27,29,27,28,29,29,25,29,30,30,29,28,34,26,21,19,22,25,24,31,29,29,27,24,23,20,29,21,21,27,25,25,26,28,25,25,28,26,27,23,17,31,35,21,20,17,16,20,22,23,22,18,26,27,22,24,33,26,20,24,27,29,26,26,22,26,28,28,55,85,76,34,15,22,35,35,35,40,27,35,33,19,19,21,21,20,29,26,24,29,27,27,24,24,29,26,26,24,30,28,26,31,25,28,24,27,32,27,29,25,27,27,23,24,19,19,31,40,41,44,47,42,40,38,31,27,27,28,31,29,29,33,32,33,35,33,29,33,35,28,33,33,37,35,29,27,29,32,29,38,23,168,248,246,249,243,243,245,240,245,245,245,245,238,230,211,211,185,210,182,86,83,90,90,90,87,84,79,81,83,82,75,93,101,101,133,139,131,121,102,88,89,111,96,87,73,75,63,60,61,53,44,39,38,48,49,43,37,26,16,19,25,24,22,20,26,29,31,34,43,24,10,15,14,16,15,15,16,16,16,16,16,152,166,148,162,133,108,89,101,103,96,74,50,82,83,103,155,144,103,81,93,110,100,134,157,148,194,196,89,77,65,30,46,30,31,33,24,35,34,46,83,141,205,215,216,224,218,220,217,213,208,209,209,211,208,226,174,91,116,109,102,101,92,104,90,94,98,122,114,83,135,159,110,94,85,64,37,45,65,96,167,190,183,234,237,164,164,196,142,137,195,213,158,118,188,246,191,155,172,159,117,138,166,66,62,58,69,97,74,79,57,73,92,87,81,71,76,98,123,97,87,84,96,100,125,119,84,107,95,54,26,36,53,76,92,103,153,205,242,250,250,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,252,253,253,250,250,247,244,144,76,46,79,111,93,86,92,81,51,59,106,134,155,159,150,139,114,108,107,103,89,83,98,103,114,117,92,81,91,76,78,79,96,125,119,103,92,124,136,101,61,39,37,38,51,86,60,42,56,105,122,110,114,74,93,114,105,94,38,35,50,39,46,43,51,45,42,37,64,84,49,51,52,69,53,53,143,126,67,86,98,78,66,50,54,72,78,63,40,73,112,132,164,156,162,146,134,158,160,160,147,141,87,53,31,11,10,17,57,69,71,79,81,44,38,47,56,76,41,14,17,14,57,104,92,95,57,15,10,18,66,107,131,132,132,141,110,48,16,24,24,29,32,31,33,32,33,27,28,31,29,28,25,31,50,63,74,73,84,89,88,94,102,111,84,52,38,31,36,38,36,34,32,36,35,34,34,35,36,27,19,14,33,63,79,105,122,126,131,124,118,113,119,146,162,152,73,29,58,57,60,74,125,162,106,49,37,66,86,64,42,22,17,21,15,25,24,45,64,65,74,74,75,73,75,74,62,64,61,52,59,55,36,38,35,39,41,39,45,46,44,42,46,46,41,40,39,40,40,43,39,43,46,43,43,40,42,43,42,44,42,37,43,53,56,35,20,25,21,26,24,27,21,20,22,21,18,17,27,20,21,25,21,24,27,28,23,32,24,26,27,24,24,29,27,26,24,23,25,24,23,26,28,27,23,20,20,20,23,21,24,22,20,22,23,26,18,19,19,20,22,24,28,24,19,19,25,21,23,21,21,18,28,38,17,15,21,19,23,20,20,21,20,23,19,23,23,22,24,21,23,23,27,24,27,25,24,24,22,29,62,86,66,36,17,22,28,29,27,29,26,23,24,22,24,21,22,27,28,24,22,22,21,26,30,21,26,27,25,29,27,27,25,27,28,23,26,27,23,28,27,26,21,22,26,19,18,26,34,34,36,35,34,34,31,27,31,24,30,37,32,30,34,33,26,35,31,31,33,32,32,31,36,32,30,36,26,28,30,32,30,35,208,251,251,248,246,247,242,238,240,239,244,246,249,248,233,223,128,61,39,6,11,12,16,24,29,35,28,23,29,21,19,65,141,191,230,244,239,235,225,217,216,216,224,216,202,207,195,193,196,182,179,171,178,181,181,177,156,139,131,138,140,122,110,94,81,69,56,50,45,21,7,11,12,15,14,14,15,14,15,15,15,198,206,190,209,203,185,137,120,108,92,80,103,151,135,148,182,160,128,103,96,99,82,93,118,127,184,185,84,71,44,25,46,27,33,32,28,33,33,50,71,145,234,247,247,248,245,240,243,223,208,242,249,235,233,232,133,109,187,120,135,128,111,130,122,131,121,129,125,118,181,229,120,51,69,101,125,131,151,173,214,234,229,234,208,177,141,142,120,123,177,134,87,75,84,152,118,103,141,146,136,160,186,113,88,77,97,115,89,106,80,73,59,63,73,92,85,105,133,104,98,83,99,96,88,88,105,141,112,79,59,58,68,104,131,155,205,232,242,250,250,252,252,252,252,252,252,253,253,252,252,252,252,252,252,252,252,252,252,253,253,253,253,252,252,253,253,252,252,252,252,252,252,253,253,251,251,244,159,165,175,170,163,160,158,123,152,164,156,146,160,157,150,152,145,148,132,55,65,79,104,124,92,94,89,78,67,69,116,139,146,131,120,120,120,101,82,66,57,91,92,92,98,89,98,94,92,73,69,88,95,120,108,78,71,101,102,96,61,34,34,31,55,68,48,42,49,107,119,112,110,73,93,101,82,71,34,33,39,39,39,33,73,74,54,49,112,153,125,134,138,145,130,108,171,172,117,141,136,105,117,92,110,140,67,37,23,77,131,122,168,148,123,111,74,81,69,68,64,19,7,12,11,16,16,14,14,27,51,69,89,93,97,110,155,203,229,243,247,248,178,253,155,51,10,5,47,101,132,140,130,117,113,124,92,57,49,61,84,84,89,89,77,70,71,69,52,63,58,32,27,43,64,57,65,71,80,79,85,93,79,56,25,30,38,29,32,37,33,33,31,31,36,36,41,35,35,36,30,44,65,91,120,121,118,108,103,90,85,92,101,110,114,110,51,29,57,66,70,66,92,150,155,95,42,47,49,31,23,16,18,20,21,40,54,66,72,72,73,78,81,72,70,71,71,101,122,123,113,91,63,90,63,75,76,72,73,69,78,82,79,98,71,66,66,70,66,69,76,74,76,81,69,71,98,63,66,67,63,62,62,60,52,25,19,17,18,21,21,23,21,22,23,21,24,24,21,25,25,24,28,23,28,27,21,30,38,38,41,27,19,26,22,24,26,25,25,25,31,23,22,26,22,19,17,22,22,22,21,20,25,19,24,24,19,21,20,24,23,21,23,19,20,19,24,23,21,27,21,17,29,34,22,20,17,15,25,18,19,24,17,17,22,21,20,18,19,21,24,25,19,20,22,27,23,24,23,22,30,66,93,66,28,15,17,23,27,24,22,22,21,18,18,20,24,27,23,20,24,22,21,24,27,22,23,26,24,26,24,27,24,28,24,21,25,23,27,26,22,26,26,20,19,18,18,24,21,28,29,25,26,27,29,27,24,26,29,25,26,33,29,26,28,27,30,31,27,30,31,29,31,32,29,30,27,31,23,42,16,55,237,253,253,251,251,249,248,246,243,245,246,248,249,246,238,214,109,8,6,10,11,22,26,25,40,53,51,49,47,45,42,98,180,213,240,241,231,227,218,217,211,209,217,216,219,227,218,219,224,225,231,229,230,231,229,236,224,212,215,225,223,215,202,191,173,164,147,143,102,12,2,10,10,13,12,15,13,13,14,13,13,172,178,158,169,186,182,160,170,174,159,137,160,181,146,134,151,170,164,136,119,108,82,78,91,111,172,164,61,37,22,11,37,30,31,29,24,32,39,52,58,92,157,182,185,171,145,162,158,114,126,175,193,161,123,109,66,83,137,103,113,104,92,98,78,94,88,84,94,90,160,236,96,22,88,147,196,218,241,251,251,242,182,157,161,174,122,106,98,101,155,96,59,53,50,73,45,69,96,86,79,136,153,98,99,80,89,101,95,111,105,91,89,81,71,83,81,87,105,97,98,101,97,87,75,60,75,121,114,110,85,63,72,79,128,173,194,155,119,168,214,194,208,236,237,253,253,250,250,253,253,252,252,252,252,252,252,253,253,252,252,253,253,253,253,253,253,252,252,253,253,253,253,252,252,190,212,232,33,24,38,32,34,41,95,31,27,49,26,37,39,34,34,53,46,58,60,55,75,89,101,84,81,70,73,80,81,98,113,122,120,118,105,110,117,105,89,88,92,86,89,93,106,107,93,72,79,80,83,89,89,97,93,69,45,54,76,75,60,58,48,31,56,80,40,35,46,107,118,110,110,62,77,74,68,69,44,58,55,47,35,21,66,80,50,54,127,143,99,116,112,132,122,83,129,148,122,124,95,70,101,108,122,123,81,45,26,70,68,37,72,40,18,21,9,15,12,13,14,13,14,20,27,73,108,138,182,210,237,251,251,252,252,252,252,252,252,252,252,246,246,245,101,21,46,93,143,153,152,115,108,109,119,120,98,93,102,135,149,158,161,157,157,150,150,149,136,146,101,35,24,62,84,62,67,64,77,86,77,74,74,63,44,36,27,26,34,38,34,33,33,35,43,36,41,38,35,35,41,62,81,90,89,82,82,73,62,61,71,66,42,40,34,42,34,37,63,69,82,68,71,110,170,156,62,24,15,18,20,14,19,25,46,65,70,72,73,76,78,72,73,69,66,66,75,117,157,171,187,179,160,147,143,150,151,152,148,151,151,160,159,151,165,159,158,160,166,161,156,169,158,164,160,155,156,145,152,160,154,148,119,76,40,16,12,12,12,16,16,21,27,22,27,29,31,35,29,34,46,48,46,46,41,37,41,53,82,87,77,48,31,36,33,35,29,33,32,31,29,28,29,26,22,23,22,19,22,17,16,23,19,17,18,18,21,19,21,23,21,19,24,24,20,22,28,24,18,18,24,18,30,39,24,20,17,15,24,23,22,20,19,21,18,19,23,19,23,23,19,22,24,23,22,22,25,23,21,25,21,33,84,92,57,23,15,19,21,19,18,26,19,23,24,24,24,22,19,26,24,21,24,21,22,27,27,24,24,23,21,21,28,27,22,27,26,21,24,24,23,24,19,19,22,20,19,23,19,19,24,21,25,24,25,21,21,25,23,27,22,24,29,29,28,27,25,27,29,29,29,24,31,32,29,31,27,38,26,33,15,88,240,252,252,252,252,252,252,249,249,249,249,249,249,249,216,206,163,116,73,16,10,11,11,18,45,68,77,87,87,83,74,98,130,154,193,203,198,186,173,175,171,172,171,168,180,191,188,188,193,200,207,203,208,201,203,209,200,202,202,208,214,211,208,204,201,203,198,199,112,5,2,8,10,12,11,13,12,14,15,14,14,130,134,97,95,119,110,104,141,171,150,113,143,141,89,73,98,125,131,126,120,123,107,92,99,119,170,155,57,50,35,16,31,30,48,42,37,42,45,57,56,71,73,61,63,66,80,86,82,43,53,81,73,37,21,35,27,49,61,65,87,78,76,78,53,59,39,51,61,44,123,215,87,24,92,153,205,226,243,235,184,106,65,81,137,219,214,213,179,175,207,167,155,134,101,103,97,86,85,76,57,49,56,62,74,74,77,79,79,97,98,94,86,78,66,86,81,79,84,89,98,81,86,70,62,45,46,76,74,73,67,67,52,53,81,115,139,62,7,11,12,17,27,44,43,63,87,116,118,92,93,98,89,85,109,113,76,69,84,82,53,77,79,40,47,53,48,60,61,93,178,90,110,52,27,6,128,210,27,13,15,16,10,96,207,48,19,43,19,27,15,20,30,62,50,57,86,80,110,104,104,88,57,95,77,54,65,64,76,98,124,130,130,116,105,103,117,125,115,119,94,97,105,93,83,55,73,80,55,68,69,73,76,61,45,48,50,61,63,57,61,35,64,88,43,19,51,100,117,127,104,65,91,72,68,79,81,105,113,128,111,104,121,117,73,76,134,116,60,54,47,79,85,47,47,66,75,88,84,63,62,39,61,59,51,56,35,43,30,12,35,28,27,29,25,57,125,130,167,145,240,252,252,252,252,253,253,253,253,252,252,248,248,243,243,228,200,139,172,116,93,79,43,71,118,139,162,151,117,100,107,104,120,116,100,96,106,130,136,135,140,132,132,129,132,132,139,139,69,24,15,57,94,86,92,70,77,85,91,94,91,101,84,60,32,31,37,29,33,36,37,36,41,38,34,40,37,35,45,51,63,60,62,63,60,56,68,71,64,65,37,28,27,35,40,37,63,75,83,74,74,84,130,175,106,23,9,15,18,19,37,58,61,72,74,75,75,73,73,67,73,60,65,75,64,77,97,108,129,112,126,96,108,117,118,118,122,125,121,127,126,100,125,115,124,128,124,110,115,102,111,119,108,126,104,117,99,122,109,96,75,50,34,21,14,12,16,18,21,23,24,32,28,34,37,34,37,54,69,72,75,65,65,66,74,120,158,171,148,85,53,57,72,67,55,58,50,37,30,35,34,26,33,26,21,24,18,15,19,19,15,17,23,21,27,23,23,27,23,27,22,25,27,25,25,24,21,26,21,24,26,24,21,21,17,19,24,19,20,21,21,21,24,20,19,23,24,22,19,23,28,24,23,26,23,23,21,23,21,17,44,84,82,45,18,17,15,20,20,19,23,24,25,26,29,26,28,28,24,26,27,24,26,35,38,30,28,23,27,26,24,25,21,23,21,24,23,23,27,24,22,19,16,21,21,23,22,22,19,24,24,18,24,28,26,31,26,29,31,22,29,27,30,24,25,34,30,33,26,29,31,32,31,30,28,30,31,39,14,97,226,172,206,229,245,251,251,249,249,249,249,249,249,241,200,223,237,242,229,191,151,109,49,56,33,52,34,44,50,45,42,38,48,68,109,143,151,153,154,165,171,176,171,160,172,183,178,173,179,182,181,176,181,174,176,171,162,171,168,174,176,178,178,177,181,184,184,192,113,7,1,8,11,13,11,14,13,15,15,15,14,120,118,88,83,108,75,62,97,132,102,71,123,119,71,37,57,67,68,106,143,181,177,162,165,170,207,196,139,149,134,121,131,122,122,107,84,60,62,74,78,92,87,89,98,101,97,117,109,63,59,53,33,27,44,53,47,61,56,49,75,59,80,89,75,77,55,81,87,61,124,208,117,80,123,137,163,164,148,92,60,41,32,63,143,253,253,253,253,242,240,248,249,180,107,118,134,113,84,86,80,72,57,39,57,66,78,86,81,84,77,78,81,69,74,97,92,82,84,88,89,87,103,110,119,92,62,47,54,87,87,90,91,63,65,56,53,42,66,59,13,22,19,19,17,20,24,72,83,24,18,25,23,20,36,50,24,16,24,32,19,53,86,34,13,22,18,28,28,59,165,76,8,16,26,13,141,243,34,24,29,21,31,131,253,83,33,76,42,53,32,30,46,87,70,81,112,120,110,81,69,35,44,60,46,43,32,45,78,105,112,115,103,99,92,97,112,115,122,101,94,76,100,116,97,82,86,79,47,52,59,52,50,59,51,53,63,44,48,57,63,45,59,80,58,69,74,146,165,149,118,87,116,109,124,130,133,165,171,194,195,190,198,175,132,93,121,108,46,39,16,22,32,14,15,12,13,15,15,14,14,14,15,23,40,65,78,116,135,162,210,220,247,253,253,252,252,253,253,252,252,252,252,248,248,238,234,232,208,169,150,118,111,99,79,43,6,12,15,13,15,47,79,141,158,142,132,110,102,101,101,104,120,100,103,106,105,120,120,122,119,112,113,109,111,117,118,86,28,10,10,70,105,108,98,77,90,98,101,103,103,103,120,108,66,36,32,37,36,39,33,32,34,36,37,32,37,40,30,34,50,61,59,59,66,74,70,69,67,60,40,34,29,36,37,39,69,79,87,74,64,45,35,125,165,77,18,12,28,50,58,66,67,67,79,79,73,73,74,69,65,62,79,93,67,65,63,39,30,29,29,27,23,29,34,32,32,35,29,31,33,32,32,27,25,29,29,26,21,31,27,28,28,25,36,32,28,28,28,24,27,23,21,19,18,17,19,25,22,22,24,21,26,22,24,25,23,35,34,35,44,36,42,36,39,39,74,131,121,69,37,47,45,42,45,38,35,31,29,28,23,25,27,22,21,21,24,18,20,19,17,23,24,25,23,21,25,29,24,21,25,22,23,27,23,23,24,22,21,23,21,17,16,19,19,17,20,21,25,19,20,24,23,24,17,21,25,18,22,23,24,27,22,27,24,23,28,23,25,26,25,56,88,77,39,17,15,19,22,27,25,26,33,33,34,28,31,35,25,29,32,27,32,40,42,42,31,31,32,31,35,30,28,30,29,31,31,24,28,28,23,26,23,19,17,22,21,18,19,19,21,23,27,31,35,35,32,34,34,35,32,36,38,35,37,38,39,35,40,34,35,38,35,41,41,43,39,46,33,61,106,74,92,142,200,238,217,202,204,212,212,214,212,195,182,203,234,251,251,249,239,211,157,111,85,56,37,29,29,32,25,24,21,24,43,63,91,108,131,157,171,187,178,171,184,188,187,181,187,191,186,183,178,165,166,155,155,169,164,165,157,150,155,161,164,166,163,182,113,8,2,9,11,14,12,15,13,14,15,15,15,122,113,81,65,98,80,74,85,119,84,61,127,152,132,76,71,54,56,110,168,232,240,220,222,231,244,238,207,230,211,193,214,206,224,215,164,127,123,121,109,121,129,149,163,148,116,125,132,97,99,89,65,53,65,88,95,118,113,94,109,77,101,124,102,126,132,164,159,139,181,237,198,181,201,184,172,156,141,118,139,127,106,130,158,224,243,251,222,217,208,246,236,123,106,108,122,83,55,69,79,96,74,53,51,57,78,87,78,64,55,66,84,70,74,94,86,91,83,91,89,92,148,178,212,169,90,59,65,95,122,155,134,92,81,88,110,144,208,179,117,183,217,205,179,154,125,250,246,150,152,174,180,172,209,205,156,179,174,169,160,228,250,171,166,159,136,166,173,207,253,178,141,135,139,184,253,243,57,97,132,151,119,190,253,110,115,128,104,120,94,99,103,129,103,103,133,103,75,48,42,59,48,48,33,26,64,90,97,99,95,100,118,119,111,107,92,84,90,100,92,98,121,117,101,105,134,110,53,46,63,66,61,54,46,59,57,47,43,43,49,77,109,107,130,145,149,186,165,142,93,81,128,142,167,164,171,180,181,198,198,196,192,174,144,109,79,27,3,12,11,13,13,16,17,20,23,33,56,126,132,160,199,222,241,252,252,253,253,252,252,253,253,252,252,246,246,245,245,220,193,184,168,148,109,119,21,27,18,18,22,12,16,23,27,20,8,20,39,20,14,91,141,160,153,125,105,86,90,108,113,118,129,96,89,107,106,115,112,121,114,107,113,108,114,115,118,77,9,27,66,111,103,91,98,91,107,96,98,95,100,109,115,115,99,49,30,32,44,36,28,37,33,37,43,44,50,47,28,34,57,56,63,72,76,74,64,64,67,63,41,31,31,31,40,39,62,76,73,62,33,26,17,29,129,143,60,31,51,62,74,76,70,75,70,73,77,70,67,66,78,86,88,85,61,62,58,33,16,19,20,21,20,20,22,22,23,21,19,22,19,18,21,24,21,20,22,19,21,20,20,22,17,22,22,18,20,19,22,22,17,23,18,21,21,17,20,18,22,25,17,17,16,22,23,18,20,20,26,21,22,23,21,18,22,20,26,80,100,71,27,12,14,21,21,24,24,19,21,20,19,20,23,19,18,20,23,19,17,18,19,21,19,16,19,19,16,23,18,16,20,18,21,19,18,20,19,17,21,16,21,19,16,22,18,18,23,18,24,24,24,25,21,27,27,20,23,22,19,24,19,21,21,21,23,23,20,21,21,21,21,24,64,91,76,42,14,17,22,27,29,22,26,25,24,29,29,27,27,24,33,24,32,38,42,40,32,30,26,34,29,29,34,31,28,30,30,29,28,25,23,26,25,20,24,24,15,21,21,21,21,29,35,33,34,34,33,35,33,35,40,39,41,42,38,41,42,43,42,40,44,41,47,44,44,51,50,52,51,57,57,44,52,57,98,148,154,143,148,151,154,156,152,152,141,148,162,197,232,222,229,225,185,171,172,141,127,117,123,120,116,110,115,101,89,92,98,115,131,153,170,172,156,152,162,171,177,187,202,201,198,195,175,154,160,158,165,175,165,163,151,145,152,162,159,159,161,181,113,9,4,10,10,13,11,15,13,14,15,15,14,151,116,78,49,81,74,102,117,152,122,85,163,230,218,135,146,124,104,116,142,217,214,179,205,223,217,199,175,193,177,138,143,155,207,232,153,95,101,112,122,130,136,155,160,167,160,159,145,140,142,128,128,100,89,119,143,189,182,164,194,156,165,184,184,225,238,247,247,250,250,252,252,251,251,251,251,239,240,252,252,250,250,253,250,223,194,210,216,227,197,245,224,84,95,131,139,96,52,57,66,90,78,60,69,74,75,71,60,55,58,79,89,75,75,85,82,83,82,83,74,82,134,149,179,127,80,69,70,88,106,122,101,83,94,123,159,160,222,191,129,210,233,245,226,200,214,253,253,218,214,225,214,218,237,234,196,219,240,222,229,249,249,206,206,236,248,236,223,204,248,214,208,242,233,210,247,198,65,141,186,188,121,173,248,118,118,122,103,119,98,101,92,127,121,110,121,96,85,97,116,126,106,66,44,83,122,126,91,77,115,135,140,118,95,89,64,40,57,79,126,121,104,98,76,109,148,122,66,36,46,76,76,57,41,51,56,46,55,66,84,126,150,155,170,171,157,166,130,82,72,85,107,110,127,147,146,149,169,165,140,149,141,116,127,125,104,57,47,102,115,136,167,200,235,250,250,252,252,253,253,252,252,252,252,250,250,243,241,230,224,196,187,171,112,94,72,58,66,25,3,14,27,42,44,17,8,13,15,21,30,19,21,19,24,51,46,42,65,44,31,73,104,146,116,93,81,74,100,116,126,150,154,84,72,83,103,112,112,116,105,112,114,115,113,116,112,85,90,116,117,113,95,104,101,97,107,96,98,99,108,111,118,104,53,30,28,44,42,34,36,34,34,34,38,43,53,51,37,45,59,69,84,73,71,69,65,75,66,64,47,29,29,35,38,36,60,61,44,27,22,15,18,14,41,153,146,74,57,63,83,80,75,64,62,64,72,70,72,92,88,90,80,69,54,41,37,25,19,13,17,19,15,27,20,21,21,18,22,16,21,18,18,24,21,19,19,16,22,22,17,21,17,20,20,17,20,16,18,21,19,21,19,19,18,21,17,16,21,17,20,20,21,20,21,19,20,19,17,23,18,23,20,15,23,19,21,63,96,80,30,11,17,16,20,18,18,21,21,21,19,20,23,19,22,20,19,19,20,19,16,23,20,19,18,21,23,17,19,19,19,15,18,21,19,16,17,20,17,23,19,16,18,18,19,20,22,18,27,24,24,25,20,24,19,22,25,18,21,19,19,21,19,18,18,21,21,21,20,21,18,18,30,68,96,69,29,15,18,22,18,17,24,21,21,20,20,21,23,17,23,18,23,45,45,40,21,21,23,19,22,22,24,24,21,22,24,26,27,25,18,21,27,21,22,18,18,21,18,20,23,24,27,27,23,24,27,33,27,28,30,27,33,31,33,24,34,33,30,33,32,35,36,34,35,39,39,39,47,40,41,46,51,30,20,109,151,148,159,156,160,156,149,152,147,142,142,155,172,177,183,183,181,193,212,213,206,211,212,206,197,205,206,196,188,181,179,170,174,183,176,165,146,141,140,146,163,179,195,196,201,199,175,157,165,165,173,176,165,167,164,153,161,172,167,169,176,191,113,8,3,10,12,12,12,14,13,14,15,15,14,192,149,135,124,128,98,132,162,236,189,118,205,251,250,200,239,238,220,179,159,209,211,200,238,244,206,195,174,208,190,137,132,115,191,235,139,107,120,134,151,138,144,141,142,191,200,178,173,170,171,163,157,145,141,160,167,218,206,186,251,221,226,252,252,253,253,252,252,252,252,252,252,253,253,252,252,252,252,252,252,252,252,253,251,221,232,250,244,253,184,250,193,32,100,117,144,118,87,89,86,116,92,69,71,83,77,67,72,74,80,84,93,90,101,101,73,73,78,86,74,59,79,63,60,63,56,51,71,76,66,74,72,85,116,119,98,74,181,130,49,74,91,110,105,137,172,225,247,153,180,152,92,66,130,154,66,131,158,173,226,246,174,121,148,201,246,194,163,131,180,230,206,199,172,126,204,123,69,120,95,113,59,124,167,67,97,63,70,90,66,64,61,110,112,112,126,105,122,142,139,119,83,93,102,105,127,117,93,110,130,140,118,81,78,57,47,42,21,37,92,99,83,61,40,76,98,74,55,33,40,46,57,58,43,45,55,33,52,129,141,160,174,150,161,147,130,138,100,73,72,95,92,76,92,116,133,151,167,146,134,152,152,178,216,237,248,250,250,252,252,253,253,253,253,252,252,251,251,239,239,240,245,210,158,128,112,104,60,33,19,3,16,31,14,10,12,12,27,18,9,18,25,29,45,55,43,31,39,46,44,42,45,42,100,134,52,41,81,56,40,39,71,95,75,78,77,96,123,138,141,152,147,80,68,86,102,110,112,111,107,109,113,112,109,109,110,108,116,105,97,113,103,111,107,100,107,93,111,112,117,103,59,59,23,32,38,32,38,33,31,35,36,36,37,42,48,46,43,64,79,73,75,71,68,75,70,78,77,64,45,32,33,34,35,37,52,46,33,35,30,23,24,42,56,104,154,134,80,51,66,79,73,66,61,65,70,85,92,89,76,78,72,60,46,26,14,18,21,15,16,20,17,18,18,19,20,16,19,19,20,23,18,22,22,21,21,17,20,17,17,19,19,19,19,21,24,17,21,18,18,20,19,21,17,17,21,16,17,15,22,22,16,20,17,21,21,17,21,19,18,18,17,21,20,21,14,48,101,87,39,13,12,18,21,17,16,19,18,18,21,19,19,15,18,18,15,18,17,20,22,17,20,18,18,17,20,18,17,21,19,20,19,19,20,18,17,17,18,20,19,17,17,21,18,19,24,19,26,23,27,27,24,28,24,22,21,24,24,19,19,17,21,23,15,19,19,20,16,17,19,23,22,38,77,87,63,23,14,19,16,21,22,18,17,24,19,22,19,20,23,16,29,40,46,29,17,19,20,19,21,22,18,23,21,23,20,20,22,19,24,18,18,20,19,22,19,19,22,20,19,23,18,21,20,19,29,19,19,21,18,24,22,23,24,21,21,23,25,21,23,22,27,22,24,27,23,29,26,23,31,28,32,23,27,131,190,190,199,190,197,191,183,183,179,178,164,162,167,156,164,176,179,191,185,192,206,214,220,203,199,212,222,220,213,215,218,211,204,201,191,186,182,171,165,165,171,178,175,177,193,202,188,184,192,184,184,183,183,183,179,180,189,195,186,194,196,200,114,7,2,7,11,13,12,15,13,14,15,15,15,226,212,231,233,227,165,181,199,248,220,119,203,244,242,220,252,243,243,218,178,222,244,253,253,252,252,251,251,252,252,243,237,181,223,241,190,174,183,174,186,146,134,138,145,193,174,170,175,179,160,119,146,163,184,187,153,171,143,143,217,159,175,238,225,230,206,220,201,226,250,250,251,252,252,252,252,252,252,252,252,252,244,240,178,157,192,208,230,218,171,247,184,45,78,72,103,104,107,141,122,128,103,64,65,70,64,83,96,94,84,77,92,107,121,109,77,70,74,84,79,68,83,71,77,61,61,48,63,88,75,92,77,110,131,146,127,74,155,125,91,101,72,75,90,123,132,217,250,207,219,214,113,63,132,174,115,144,158,155,218,240,203,172,148,199,248,214,215,169,205,247,227,221,208,137,169,133,90,98,93,104,69,101,93,72,96,70,101,103,87,88,86,127,106,104,131,106,117,132,92,68,51,75,71,73,99,107,131,129,122,113,89,64,73,95,91,57,24,13,44,58,55,57,38,48,48,41,42,39,46,37,61,77,76,71,33,17,39,118,152,149,141,117,125,113,117,135,123,119,126,141,154,157,174,213,232,242,250,250,242,252,252,252,252,252,252,252,252,250,250,241,237,216,194,161,137,145,115,65,38,19,105,129,30,4,8,10,14,13,25,10,19,42,17,16,39,40,56,59,47,41,54,47,71,118,91,46,37,42,61,67,89,128,153,168,86,48,83,51,40,40,31,52,59,101,113,128,147,144,145,135,108,66,80,98,114,107,109,119,107,110,107,118,114,106,106,101,98,74,89,110,105,108,99,100,111,113,117,116,84,38,21,30,42,45,45,33,22,37,34,36,39,33,37,34,45,57,69,74,73,66,62,67,67,69,75,77,81,73,41,31,33,31,38,33,45,54,45,55,56,56,63,70,77,80,107,162,136,66,56,60,70,68,65,73,85,89,80,75,63,69,72,61,38,21,16,16,19,17,24,21,19,23,17,20,22,19,21,18,15,21,20,17,18,16,24,18,17,21,15,23,21,21,15,22,26,15,22,21,19,18,20,20,17,20,15,17,20,17,21,18,19,20,19,21,21,17,16,23,18,20,19,19,19,20,19,35,88,91,46,18,15,19,18,17,19,18,13,21,19,21,24,19,22,17,23,18,21,23,19,21,16,18,17,17,20,20,20,19,19,18,16,16,17,17,18,20,17,17,19,17,17,19,20,22,26,23,28,31,27,29,30,26,23,26,25,25,26,22,19,18,20,20,15,18,19,17,20,19,18,17,21,19,41,80,88,59,20,14,17,17,20,15,22,20,17,21,20,24,20,18,26,42,45,25,17,22,18,22,19,21,21,20,20,21,21,18,21,18,18,18,18,19,19,20,20,21,19,17,23,22,19,20,19,21,21,19,23,21,23,18,19,23,20,22,22,19,21,22,24,22,21,22,25,22,23,21,26,24,22,22,31,21,65,183,200,190,190,181,191,182,178,166,165,179,181,171,150,141,149,150,163,181,178,179,174,174,175,158,154,187,204,194,186,187,198,199,192,190,187,194,197,199,200,199,193,181,173,157,177,203,204,217,223,208,201,205,212,209,206,205,212,218,208,208,201,208,114,5,2,6,11,13,12,14,13,14,15,15,14,204,223,234,233,224,158,165,165,244,181,83,169,222,159,134,197,202,194,121,92,121,178,221,250,244,210,250,239,251,251,240,227,131,197,236,134,119,127,136,171,93,101,120,130,170,131,131,172,176,113,76,112,151,177,188,122,97,92,96,160,81,70,100,101,139,84,98,92,150,203,197,198,164,243,254,254,252,252,252,252,252,231,236,141,75,120,116,148,191,178,251,181,40,81,60,85,76,96,133,112,124,101,83,75,66,60,77,104,88,78,73,85,83,89,92,72,57,61,73,67,56,98,111,91,84,78,62,55,71,99,111,83,79,84,93,96,61,145,90,92,117,65,85,88,125,108,162,217,148,176,160,65,58,105,155,103,110,98,94,156,160,144,131,118,176,208,184,204,159,201,244,184,194,183,147,179,114,90,99,91,112,74,102,86,74,96,68,101,97,95,103,110,128,96,104,126,100,102,101,74,62,54,57,44,42,71,106,118,114,77,70,79,75,99,114,111,105,86,49,31,26,44,47,38,58,41,47,56,42,46,20,54,84,67,69,53,30,71,132,134,142,145,145,168,173,198,230,237,250,250,252,252,252,252,252,252,252,252,245,244,239,242,252,240,229,212,204,203,177,128,105,85,39,13,8,9,14,18,9,16,12,67,98,20,8,10,26,41,38,43,34,46,67,51,71,112,87,119,95,53,55,63,49,76,139,131,66,32,48,89,86,87,95,95,93,53,40,49,43,39,23,41,69,96,137,132,128,144,143,136,131,105,68,85,106,115,112,116,113,114,110,111,113,95,93,84,92,103,87,98,107,101,105,94,97,117,119,99,45,26,21,23,60,56,69,54,23,21,24,40,35,37,39,37,49,66,74,74,71,73,60,64,76,71,69,67,77,74,66,50,31,34,36,37,35,48,57,45,63,79,88,93,86,78,72,81,131,172,117,53,46,57,78,87,88,83,75,72,69,60,71,68,54,41,21,17,16,21,18,19,24,17,22,18,16,21,19,19,19,21,19,19,21,22,18,19,17,21,22,17,20,16,19,21,21,19,19,19,22,19,19,19,18,21,19,20,18,18,17,16,21,19,21,18,22,20,17,19,21,21,16,19,24,19,19,21,25,75,104,49,19,16,15,17,19,22,17,16,17,19,19,20,24,20,17,19,19,18,18,20,18,20,21,18,21,19,17,22,18,17,19,18,17,17,24,17,19,21,17,18,19,18,19,23,23,30,28,29,30,33,33,26,31,29,27,27,27,28,21,15,19,19,18,19,23,19,17,19,21,19,21,20,20,22,48,91,90,46,17,17,17,21,20,19,21,19,22,21,18,17,21,27,48,46,26,21,17,17,25,21,16,18,17,18,21,21,19,20,18,18,21,19,20,20,19,19,19,21,19,18,19,17,22,20,24,19,16,19,17,22,21,19,23,22,18,22,19,22,20,18,23,23,24,19,21,23,21,24,19,23,31,23,40,131,190,168,157,146,140,155,145,131,127,159,205,205,188,174,164,155,141,170,219,229,227,198,165,151,128,113,133,148,151,131,128,139,136,141,143,144,152,163,170,181,178,162,150,128,131,148,181,198,216,224,205,203,212,226,224,215,216,212,215,196,185,184,193,114,6,1,9,12,12,12,14,12,15,15,15,15,97,124,137,147,119,72,91,97,199,162,94,169,189,108,42,60,57,58,46,55,78,117,156,219,193,145,193,172,218,211,167,166,70,151,221,64,39,61,88,137,65,66,87,106,134,88,111,155,152,110,81,107,113,127,150,111,103,92,118,204,124,59,49,77,111,60,75,63,123,147,112,148,134,182,253,253,252,252,252,252,249,249,244,128,50,80,63,98,132,163,240,141,28,78,67,87,59,60,96,92,109,118,112,95,79,68,76,87,79,76,77,69,61,56,73,67,61,61,56,64,58,81,93,84,64,73,76,53,48,58,86,64,53,41,67,69,59,85,39,39,55,61,71,78,97,72,69,78,51,36,34,27,31,46,57,33,43,40,38,71,50,33,41,47,71,93,98,128,107,122,146,110,134,142,105,101,66,57,56,57,70,56,63,51,62,59,42,64,46,58,100,119,126,96,96,96,64,68,87,103,103,65,48,42,45,50,59,87,104,95,81,76,76,73,81,111,118,97,81,48,13,9,15,15,25,42,61,72,72,63,57,74,87,109,134,148,184,230,249,249,252,252,252,252,252,252,253,253,253,253,253,253,250,244,240,229,201,222,165,88,54,116,188,180,154,131,147,155,131,79,61,44,23,27,12,10,12,16,18,12,16,57,95,94,58,24,45,57,34,50,35,90,132,84,73,83,115,136,137,84,37,46,53,46,71,86,58,44,41,56,59,53,53,53,76,57,36,65,62,41,33,107,146,143,159,134,128,134,129,137,141,125,111,105,109,120,113,112,115,111,114,98,89,73,75,74,73,98,97,105,101,104,111,95,105,98,59,36,17,22,16,49,85,95,102,39,15,23,30,39,44,36,41,64,73,77,71,73,77,75,69,72,75,69,79,82,83,79,69,47,32,32,37,37,29,51,54,47,66,81,96,88,79,80,72,66,90,137,163,114,48,57,80,93,81,71,76,70,68,63,66,64,56,36,22,18,18,22,20,24,19,19,21,18,21,21,21,18,18,19,20,18,17,18,18,19,21,19,18,23,19,18,22,19,21,19,20,21,19,20,21,22,19,17,22,24,16,20,20,20,23,19,18,20,20,19,20,17,21,19,17,21,19,23,17,21,19,57,114,66,21,15,14,18,15,20,24,21,20,18,18,21,21,19,20,22,15,17,21,17,21,21,21,24,18,19,17,20,23,17,19,17,18,17,19,19,22,24,18,18,18,19,22,23,23,29,23,28,32,25,28,29,29,28,26,24,29,30,23,20,18,16,21,18,19,20,18,21,21,18,15,17,21,20,28,55,91,82,41,17,15,17,18,20,22,16,19,22,18,18,20,25,48,41,19,22,17,19,24,18,18,22,20,20,20,24,21,21,20,16,20,19,21,21,19,18,22,22,16,21,20,23,21,19,19,21,21,19,21,17,20,19,21,23,20,20,21,23,24,20,19,19,19,18,26,23,21,19,22,18,26,21,61,150,164,155,145,125,125,132,124,147,169,217,252,241,244,244,235,220,185,222,252,252,250,241,203,186,119,72,72,83,119,118,93,101,101,108,125,117,106,92,94,111,100,82,73,77,65,84,114,127,152,153,141,141,152,159,164,167,172,167,168,151,151,149,162,113,10,4,10,11,15,13,15,14,15,16,16,15,54,55,60,72,69,66,111,130,229,190,125,212,239,160,91,102,96,104,118,143,151,180,183,228,181,135,196,163,217,200,163,166,75,179,222,55,59,59,82,134,65,70,76,93,98,66,88,118,141,127,103,99,92,110,162,163,141,118,153,250,160,85,69,105,144,100,125,83,54,39,67,146,186,210,242,251,250,250,251,251,251,251,246,125,78,124,100,96,71,92,160,42,36,73,65,86,49,64,63,72,101,105,115,96,97,79,66,89,69,77,92,101,66,48,44,56,62,55,64,74,74,85,85,81,78,66,69,57,67,78,69,83,91,64,108,159,130,168,154,77,69,110,102,71,84,69,79,90,97,138,170,143,181,176,208,168,144,113,141,184,116,107,71,42,72,53,56,83,68,80,77,61,85,88,72,52,16,36,41,35,46,42,46,46,41,46,42,43,31,61,121,135,115,92,77,60,45,76,107,106,89,54,51,49,49,52,30,60,110,109,93,64,36,23,43,63,49,51,63,49,15,15,16,36,87,113,155,178,195,231,249,249,252,252,253,253,253,253,252,252,252,252,252,252,246,240,237,230,214,203,200,208,195,156,160,181,150,172,108,8,4,52,157,133,80,65,67,72,55,40,41,37,41,77,89,101,130,147,115,44,65,129,147,152,84,24,71,72,42,60,41,86,132,76,39,42,71,83,80,64,45,39,49,53,47,56,77,71,39,45,52,67,97,97,123,89,39,87,88,41,90,170,174,156,151,134,131,137,112,91,124,145,113,112,110,112,113,115,115,96,78,74,78,74,72,69,81,77,84,98,96,109,122,103,59,31,19,19,19,27,40,90,94,70,77,35,20,19,29,54,52,49,58,71,79,80,73,70,77,76,77,76,74,80,83,77,84,64,42,25,19,27,29,35,33,49,57,48,67,79,86,82,76,78,74,65,51,83,152,166,108,62,57,74,75,70,69,66,66,66,66,63,52,35,19,19,18,19,22,22,24,22,23,19,21,21,16,19,17,21,17,17,20,16,21,21,19,22,20,21,21,21,18,24,20,18,19,17,21,18,21,19,18,21,18,23,18,16,19,18,21,19,21,21,24,21,21,21,16,21,17,20,22,22,22,18,17,54,112,72,29,17,12,18,17,19,16,23,18,21,19,20,19,22,23,23,23,16,23,19,19,23,17,19,21,20,19,17,22,18,17,20,20,15,19,16,26,23,17,24,23,24,25,24,33,33,29,27,26,28,29,33,33,27,27,33,28,27,26,17,17,19,19,22,16,19,22,18,21,16,21,19,20,22,19,31,62,91,76,33,16,18,18,19,19,21,23,19,17,23,19,22,53,38,21,22,18,24,19,19,22,17,16,18,19,21,17,23,21,20,19,21,25,17,18,21,21,21,18,21,23,21,21,22,24,16,25,25,17,22,18,22,20,21,19,19,24,19,19,23,23,21,21,18,22,23,20,20,26,22,27,22,48,110,122,111,116,91,105,109,117,159,182,204,216,212,225,226,227,207,166,217,245,245,249,208,206,171,88,51,47,73,140,140,127,129,118,139,149,139,114,88,49,37,31,23,37,55,39,34,41,47,52,54,33,32,33,36,44,58,75,66,121,165,184,173,135,97,17,3,12,11,15,12,14,14,16,16,16,16,106,113,118,141,141,136,166,166,250,206,131,217,235,191,125,154,148,152,170,185,198,221,221,250,177,134,200,186,237,214,189,193,95,197,221,57,66,60,85,132,71,53,39,36,51,34,48,78,115,144,120,95,98,150,225,217,167,117,155,247,168,81,71,123,150,141,152,105,39,1,26,131,250,237,229,253,245,245,197,181,172,168,204,123,134,180,130,105,69,87,136,70,58,54,43,78,66,69,62,57,74,82,103,98,90,86,83,103,87,67,112,118,77,51,31,35,50,53,65,81,87,86,89,89,75,72,61,50,92,120,86,113,139,84,144,174,202,252,253,192,107,145,137,86,72,72,125,234,252,252,252,252,253,253,252,252,252,250,249,252,250,250,225,193,151,45,24,39,29,40,42,29,39,42,43,43,39,37,41,41,38,41,39,37,40,34,32,43,25,59,116,130,109,75,81,68,54,76,71,57,42,22,22,26,28,20,12,12,19,44,50,51,53,55,69,86,95,106,153,177,190,221,246,249,253,253,252,252,253,253,253,253,252,252,247,247,243,241,223,206,196,182,162,171,164,134,145,174,179,154,147,149,145,110,118,122,80,112,65,5,6,18,67,66,57,60,59,83,77,94,151,147,145,159,182,212,217,231,214,95,39,93,127,129,74,32,63,69,46,55,37,41,61,51,49,44,46,36,71,102,46,57,90,97,114,104,108,83,50,46,42,68,100,98,103,82,42,63,62,31,91,169,160,145,134,129,136,141,89,38,99,125,99,106,105,105,108,100,87,65,59,62,69,71,78,81,83,91,89,92,100,97,68,39,27,17,17,23,33,78,118,99,53,33,36,27,23,17,32,55,60,63,60,72,80,80,75,74,77,79,69,80,88,85,89,63,49,29,19,19,16,22,34,41,40,59,66,77,87,85,87,78,78,76,66,67,60,53,98,157,163,99,39,48,61,69,71,65,64,76,92,72,51,26,18,22,14,22,24,21,23,23,22,20,23,21,16,19,20,21,19,19,22,20,19,19,20,19,18,20,21,18,21,20,22,21,21,25,19,21,22,21,18,19,22,18,22,21,19,20,19,24,19,21,21,21,24,18,23,17,17,19,17,23,21,22,18,29,98,87,34,19,14,16,21,18,22,19,19,21,20,20,18,21,20,17,20,23,23,21,22,17,23,22,19,24,17,21,20,18,20,18,19,18,19,21,22,22,18,18,23,20,19,26,38,36,25,26,26,32,29,29,34,28,29,29,27,24,23,22,16,18,18,22,24,19,17,22,25,21,22,17,19,21,21,22,32,77,99,68,29,15,20,16,17,21,21,19,19,23,18,23,49,36,19,20,16,24,20,19,23,20,19,23,21,20,24,18,21,22,19,20,23,21,24,16,19,21,21,25,21,19,20,20,21,18,21,20,21,21,19,19,22,22,19,19,23,20,19,23,21,23,26,20,24,23,20,21,17,23,27,21,41,56,64,66,63,57,57,56,56,86,79,88,89,88,98,88,103,91,62,107,131,124,123,104,128,96,52,53,54,85,139,156,129,128,113,121,137,131,135,105,44,29,35,33,41,46,41,39,40,43,34,19,13,17,21,21,24,18,17,13,122,227,237,225,147,71,16,3,14,12,16,14,13,16,17,16,16,16,122,124,132,154,156,149,163,168,250,188,107,195,232,162,92,111,88,108,150,169,200,231,233,252,195,163,218,200,245,244,227,199,96,206,218,66,75,51,85,130,63,48,29,24,25,25,39,35,89,163,132,94,91,117,197,175,103,69,95,173,101,51,47,91,130,133,158,121,77,29,51,118,205,213,210,244,214,212,170,79,21,44,104,97,139,149,104,91,95,162,234,124,79,66,65,79,66,72,56,61,71,81,120,97,76,77,86,132,137,85,106,145,93,93,72,51,49,55,77,69,84,79,80,88,61,68,62,54,108,90,105,118,122,75,97,141,177,246,252,213,70,114,128,96,95,94,129,198,247,247,250,250,249,249,252,252,252,244,235,231,203,241,249,222,128,31,6,10,21,35,39,39,38,31,41,43,37,39,36,43,42,38,37,41,53,59,55,41,34,23,30,59,61,75,107,87,31,8,11,12,13,14,15,19,20,24,24,35,123,105,148,196,219,236,252,252,253,253,252,252,253,253,252,252,252,252,246,246,236,222,184,155,144,133,112,95,86,102,121,125,145,138,116,120,120,94,85,118,124,104,71,52,60,53,71,62,37,76,56,22,23,11,67,137,179,191,191,194,167,178,214,200,173,188,181,169,156,168,151,56,14,21,49,63,43,42,44,51,50,46,41,43,69,59,57,61,43,20,91,151,76,64,90,126,149,112,96,75,54,52,44,43,53,51,54,56,42,53,50,32,94,158,153,139,132,131,133,141,83,50,110,125,97,108,112,109,89,69,63,54,63,73,78,80,80,82,96,97,100,93,63,37,24,23,19,20,33,46,54,69,77,67,43,28,22,16,25,32,39,51,57,61,64,72,79,84,75,76,77,75,83,88,88,75,48,32,22,22,19,17,27,45,58,66,68,84,90,93,99,79,75,74,69,71,65,69,73,64,61,92,152,148,96,53,37,53,69,73,72,102,95,68,56,27,19,16,22,24,19,24,20,24,21,22,20,25,24,18,24,17,19,21,21,21,18,18,19,21,20,21,24,15,20,23,19,21,23,17,21,21,20,24,21,21,19,25,19,20,23,17,24,20,17,20,21,21,23,20,22,21,19,19,17,21,19,18,25,25,77,101,51,20,12,16,22,18,21,22,19,23,20,22,25,20,18,21,21,18,22,23,25,22,19,23,20,18,21,19,21,21,19,17,17,22,17,23,24,19,19,23,20,22,21,19,30,35,31,31,30,26,31,31,30,29,29,32,28,29,21,17,20,18,20,19,20,24,23,21,19,20,19,20,24,23,18,19,20,35,79,88,61,29,15,19,17,19,18,25,18,21,23,29,47,31,22,23,17,21,23,19,21,21,21,19,22,23,21,22,21,23,21,21,21,24,21,21,23,22,23,21,21,24,24,18,25,22,19,23,17,24,20,21,19,21,19,21,24,21,21,21,21,25,24,21,25,23,23,23,23,22,24,25,31,50,53,59,57,53,56,48,36,35,26,16,18,15,18,15,50,45,16,46,43,34,39,38,63,47,46,66,65,77,105,96,77,69,46,57,69,71,91,84,50,45,44,43,46,50,48,44,39,37,20,23,66,115,141,145,129,93,47,29,171,232,241,240,158,69,12,5,14,13,14,14,15,15,16,17,16,15,131,117,80,76,111,125,158,169,238,164,105,208,241,172,100,84,29,74,130,164,197,192,205,248,206,174,202,213,237,236,194,130,86,202,205,86,85,52,90,128,81,87,84,64,63,54,56,62,92,130,117,88,49,49,72,43,27,17,19,33,18,21,17,42,57,90,118,116,110,122,146,148,173,151,155,169,120,146,166,114,48,27,48,73,120,87,57,76,87,160,197,96,68,83,85,104,110,95,59,60,81,96,106,87,72,60,76,146,148,110,142,167,133,141,115,92,74,65,84,75,105,102,87,102,76,71,69,88,134,107,80,106,83,52,95,106,127,183,236,131,19,71,101,139,163,154,144,99,61,131,169,159,153,177,253,253,252,240,235,228,174,141,142,162,90,15,9,12,30,38,39,44,38,39,40,35,39,37,42,49,40,37,38,33,83,120,93,47,12,13,11,24,67,89,114,91,41,36,37,42,74,115,151,194,219,239,251,251,253,253,252,252,252,252,252,252,250,250,246,246,227,197,180,190,174,147,139,107,98,86,27,4,10,30,56,41,28,53,76,90,93,78,60,42,51,42,23,17,28,44,48,66,87,114,153,131,125,170,118,60,43,36,136,215,240,237,203,175,111,98,121,88,83,85,67,60,58,68,87,91,109,110,115,129,136,123,106,110,44,50,49,54,110,74,72,66,62,42,60,124,73,37,35,66,81,59,60,55,63,64,63,73,64,66,76,71,44,50,67,46,99,149,149,145,133,141,139,137,61,60,128,111,101,108,98,80,69,55,67,61,74,84,83,89,83,88,97,92,67,39,23,21,18,28,35,38,51,63,74,56,79,106,72,28,15,26,56,62,43,39,38,51,49,63,69,70,79,79,84,90,91,78,59,35,25,18,16,17,23,39,65,81,92,96,102,105,84,91,84,72,75,66,76,77,75,74,68,67,59,52,93,163,173,113,43,34,67,78,83,79,71,56,46,29,16,23,22,24,24,22,26,19,23,27,21,20,23,18,18,23,21,22,18,19,20,19,24,20,19,21,24,19,20,24,21,21,21,20,22,23,19,22,23,17,21,19,18,20,18,21,24,21,21,19,19,24,20,19,23,20,25,19,21,19,19,19,24,24,60,106,66,24,12,15,19,22,21,19,18,19,24,19,22,23,19,21,21,22,17,22,21,19,26,19,21,21,19,20,22,21,19,20,19,19,19,19,22,21,18,17,19,19,22,22,32,29,28,32,27,28,32,27,30,30,30,30,24,24,22,20,22,19,18,23,21,19,26,21,20,21,21,17,20,20,22,20,20,23,41,83,91,60,26,15,18,23,21,23,19,22,26,32,49,30,20,26,16,21,23,21,26,21,22,23,17,26,21,23,26,19,23,18,23,25,21,21,19,24,21,21,23,23,22,16,22,24,21,23,25,23,23,24,21,21,23,23,24,23,24,21,21,23,26,25,24,22,24,26,22,23,21,21,36,53,63,62,61,51,38,31,17,15,15,16,16,16,16,19,65,51,32,57,31,17,25,28,57,43,58,81,72,77,56,50,49,54,37,39,56,48,56,69,55,51,73,84,91,103,98,71,50,39,36,81,139,206,227,226,218,191,132,109,228,238,241,234,178,99,9,4,13,11,14,13,16,14,16,17,15,15,214,175,89,50,101,142,191,214,249,191,156,241,251,233,188,155,76,90,141,171,165,127,159,221,162,120,160,185,229,188,111,97,92,194,174,101,119,81,116,126,104,138,133,123,123,120,130,125,134,141,100,89,71,57,68,54,54,56,62,72,54,66,70,64,69,77,86,97,129,159,181,163,155,132,111,120,83,84,107,108,93,69,53,53,69,57,53,61,61,98,114,46,63,90,98,129,153,122,59,62,71,77,94,68,67,73,86,132,134,103,131,178,122,137,136,123,113,85,86,87,129,124,92,111,63,51,55,110,161,98,72,29,39,65,55,56,45,54,27,8,13,18,31,77,106,96,77,31,4,23,34,28,25,26,77,105,110,112,159,177,131,91,90,148,95,10,6,10,14,21,36,39,32,34,32,32,38,36,50,59,41,36,29,12,83,139,134,109,81,87,92,122,158,174,192,205,220,245,250,250,252,252,253,253,252,252,253,253,249,249,243,237,212,177,151,150,141,105,89,91,42,7,8,17,19,29,68,60,74,73,20,5,31,77,72,38,21,27,42,47,49,50,54,51,46,57,28,17,12,63,106,119,140,160,182,141,130,167,128,71,46,26,93,139,119,97,75,67,57,59,69,61,66,108,105,81,88,117,141,164,179,184,188,183,120,156,125,125,69,73,65,42,73,55,65,56,55,59,39,50,61,63,36,55,75,61,59,62,79,98,97,98,99,95,101,92,57,48,69,45,67,112,125,129,126,131,143,144,84,87,113,79,82,78,72,72,57,60,64,71,81,78,89,85,87,87,64,43,31,26,29,37,33,38,56,80,108,124,130,136,134,145,124,57,50,75,81,60,38,33,40,48,47,59,62,70,74,78,85,80,59,37,24,16,20,16,22,41,54,77,90,99,108,101,96,86,84,81,76,73,69,75,77,77,70,64,69,66,63,63,55,95,170,179,119,58,48,74,68,39,29,32,31,22,22,23,22,24,23,30,19,23,30,23,23,17,25,19,17,25,18,21,23,23,24,20,21,22,26,22,17,23,23,19,19,20,23,19,18,23,20,24,22,21,20,22,19,20,20,22,21,19,21,21,24,18,23,18,22,21,17,21,22,21,22,23,18,25,38,83,69,26,22,14,19,19,20,17,19,27,19,23,19,19,22,20,24,18,20,23,22,22,20,20,22,20,23,24,21,18,19,18,18,20,18,17,24,20,19,20,18,22,17,24,31,29,29,26,29,27,28,28,32,29,29,35,30,27,17,19,22,16,22,18,18,24,21,24,24,21,22,21,23,20,22,22,20,22,22,52,90,89,53,22,18,17,21,23,18,22,21,39,53,25,21,25,19,19,21,21,24,21,24,22,20,24,18,22,22,25,21,19,24,18,24,21,21,22,22,28,22,22,22,21,25,20,18,26,23,24,22,21,26,22,24,19,22,24,24,28,23,21,24,26,24,21,23,22,21,26,24,23,37,61,69,71,57,31,22,22,26,32,27,25,25,19,18,27,84,70,57,66,32,30,36,36,50,39,58,83,80,72,60,56,57,52,34,42,55,49,62,64,49,98,165,200,222,236,230,208,182,167,154,172,201,234,229,227,205,187,154,147,241,245,238,229,205,111,6,3,9,12,14,12,14,14,15,15,15,16,246,245,174,122,167,205,252,252,251,244,232,249,253,253,246,244,159,139,152,165,160,127,143,173,127,116,135,139,160,133,109,117,126,166,150,130,147,122,146,139,114,154,165,172,203,216,235,235,223,212,195,195,198,208,216,204,213,214,219,225,220,223,219,224,212,201,196,192,202,212,216,203,181,169,184,186,135,81,54,55,79,80,47,44,72,63,88,92,78,127,143,89,103,132,141,155,146,146,131,90,54,41,61,66,72,71,73,116,98,87,129,137,111,116,123,154,133,105,112,92,97,82,55,79,49,32,34,62,101,48,42,44,57,75,52,60,58,42,33,22,22,31,30,36,30,24,30,28,27,26,28,27,26,24,27,24,29,41,61,74,57,55,81,121,67,14,20,14,16,17,20,22,17,20,25,24,29,19,73,83,32,37,41,92,187,213,219,232,245,247,250,250,252,248,252,252,248,248,246,245,233,199,223,215,174,160,134,122,118,107,92,71,53,29,10,18,24,15,13,16,15,13,13,13,14,14,15,14,14,14,14,12,18,25,29,55,78,88,92,102,97,93,94,71,73,57,35,15,11,27,21,25,37,44,36,14,17,27,24,15,20,14,49,73,61,55,53,101,130,151,159,134,135,145,106,52,50,61,80,85,78,78,77,71,66,64,52,59,42,47,45,39,48,55,67,69,103,80,19,76,143,89,45,104,104,53,47,46,53,51,56,61,48,53,52,47,45,44,42,41,42,62,95,98,108,125,140,148,110,91,83,76,94,89,82,71,65,72,80,73,85,89,89,91,66,48,30,31,42,41,42,50,65,88,104,127,143,148,149,140,143,139,124,105,85,88,77,59,46,29,36,41,46,73,76,74,77,69,60,42,25,18,21,16,24,33,53,69,76,96,101,99,98,89,87,84,66,77,77,78,85,75,73,64,64,64,67,70,66,65,60,49,95,170,182,144,81,41,40,34,23,22,24,21,24,21,24,30,24,24,22,24,28,25,23,23,25,22,21,22,20,22,21,21,26,19,19,22,21,26,20,24,22,17,19,21,19,20,24,23,18,22,24,19,21,18,20,22,23,20,17,23,24,19,23,24,20,21,23,19,23,20,22,26,19,24,18,21,26,71,77,33,22,15,19,21,19,23,22,19,18,18,19,19,21,27,20,19,21,19,24,19,22,19,18,25,21,17,21,19,19,18,18,21,16,19,24,16,23,23,17,21,19,24,33,27,27,26,27,29,30,29,31,34,26,29,32,23,19,17,19,20,20,21,20,21,24,21,21,19,19,22,25,22,17,22,19,24,24,21,56,95,87,52,23,19,21,16,23,21,22,38,36,21,24,17,18,22,22,26,24,19,19,19,20,23,19,26,23,23,22,18,24,26,18,22,21,21,23,22,20,21,23,23,22,23,21,20,21,21,26,20,23,18,20,25,23,25,21,22,20,24,29,20,21,24,20,21,27,29,21,23,44,69,84,89,96,116,150,171,181,189,184,184,176,154,136,161,192,157,157,161,126,107,121,141,126,72,66,89,84,94,78,67,61,55,41,41,56,51,65,61,55,120,184,216,246,246,247,247,241,230,218,222,241,245,215,198,172,155,129,135,220,238,228,233,224,111,4,1,7,11,14,11,13,14,15,15,15,15,247,247,246,197,236,245,249,249,252,240,218,250,252,252,248,248,188,138,159,151,148,136,147,152,131,142,149,137,143,129,120,142,150,169,168,184,198,192,209,184,177,206,230,248,252,252,252,252,253,253,253,253,252,252,252,252,252,252,252,252,253,253,252,252,252,252,253,253,252,252,252,252,246,251,253,253,224,124,81,69,76,100,94,90,118,120,153,165,152,176,186,124,146,200,209,217,193,190,140,115,59,37,43,57,74,70,63,92,99,132,152,166,127,107,118,140,156,124,104,83,70,66,72,122,92,76,68,66,77,60,122,127,162,194,159,214,247,247,247,230,225,228,239,239,224,216,236,237,239,243,230,229,228,220,217,142,181,181,193,188,184,185,193,210,196,208,227,230,232,228,235,235,229,230,151,223,217,147,247,247,245,247,252,252,253,253,252,252,253,253,253,253,252,241,223,213,194,191,181,184,156,49,65,128,139,135,101,72,48,27,14,10,14,13,14,19,26,36,51,68,71,66,63,68,68,125,73,76,71,84,47,13,29,53,93,132,134,113,97,82,66,42,35,37,30,33,27,23,28,29,26,25,24,34,33,26,36,67,87,41,20,18,101,153,130,103,65,66,75,85,89,73,67,54,15,8,13,11,14,12,13,13,13,14,75,26,55,81,46,34,41,38,54,92,141,144,152,102,36,116,160,78,61,93,65,42,46,39,45,49,47,52,42,44,46,37,43,34,34,38,49,70,80,87,95,103,120,129,94,76,66,67,98,98,90,78,71,77,84,91,98,98,71,47,42,38,40,36,37,51,71,88,110,122,129,137,137,134,126,124,117,99,90,76,73,72,72,66,42,38,41,42,48,66,79,79,65,41,27,22,21,22,19,29,56,67,80,80,80,94,90,97,95,81,75,72,78,78,87,92,68,74,67,63,71,73,81,66,71,77,60,66,60,96,180,210,163,62,26,33,24,20,19,24,22,24,27,23,31,29,27,29,27,23,28,23,21,26,21,20,21,24,27,19,21,24,21,18,21,27,29,29,24,21,22,19,23,27,22,22,22,19,17,22,24,21,24,21,21,19,24,24,22,25,23,19,23,18,22,21,19,23,20,24,19,18,20,22,16,66,81,33,25,16,17,23,17,23,21,16,22,21,21,23,21,21,22,17,21,21,21,26,19,19,23,16,22,22,19,20,20,22,16,18,19,19,26,19,23,20,16,17,23,23,32,34,22,26,30,32,30,29,29,33,28,27,29,25,22,18,19,18,20,24,18,19,21,21,18,18,22,19,21,19,19,21,20,24,19,21,31,62,99,84,45,24,18,18,24,21,19,35,34,24,19,21,22,18,22,23,25,22,24,22,20,25,22,23,19,19,21,21,29,21,20,21,22,26,23,22,23,27,21,22,23,22,23,23,22,19,17,17,24,27,23,24,24,24,24,17,25,25,22,28,19,24,23,22,25,21,27,31,44,64,70,113,188,245,252,252,252,252,253,253,252,252,252,252,251,251,250,250,236,228,248,248,239,155,94,108,89,88,76,50,56,58,47,38,53,56,71,65,77,134,149,152,189,242,245,243,234,235,237,241,245,241,204,196,183,170,137,155,226,228,227,243,237,108,1,1,8,11,10,12,13,12,14,14,15,14,246,246,249,218,244,246,240,240,240,129,118,197,207,237,237,236,144,108,143,134,139,132,144,155,147,159,159,155,160,161,165,184,212,243,250,250,252,252,252,252,253,253,252,252,253,253,253,253,253,253,253,253,253,253,252,252,253,253,253,253,253,253,253,253,252,252,253,253,252,252,252,252,252,252,253,253,247,192,144,119,125,157,162,155,165,167,210,229,183,171,136,100,156,202,223,237,210,185,118,76,80,79,74,54,79,86,93,124,105,106,133,147,134,134,94,111,137,110,82,71,70,77,101,192,174,134,136,123,142,111,155,164,207,242,215,252,252,252,253,253,252,252,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,253,253,253,253,252,252,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,253,253,252,252,253,253,252,252,253,253,251,76,100,237,233,218,158,121,69,48,78,99,141,173,204,237,251,251,252,252,253,253,252,252,252,252,252,252,252,250,196,61,96,223,216,177,97,63,67,57,61,74,101,141,181,213,249,249,252,252,253,253,252,252,253,253,252,252,251,209,76,23,110,165,110,62,11,17,24,22,46,59,83,113,140,162,177,167,168,164,148,165,138,121,125,136,120,106,64,30,49,37,39,97,124,103,103,79,46,69,78,49,50,56,51,60,65,71,80,85,105,112,117,120,101,92,71,30,63,66,36,69,75,71,77,85,98,90,81,61,55,66,80,82,81,74,69,81,95,97,76,51,41,36,33,29,33,50,73,83,97,112,116,122,127,125,121,120,119,102,85,66,69,66,59,73,75,63,48,36,45,45,42,66,63,49,34,17,21,19,18,31,51,66,80,83,90,95,81,91,81,78,77,67,71,73,78,84,79,76,72,64,63,68,76,70,74,75,74,77,73,74,78,68,74,162,177,63,27,27,20,26,18,21,20,20,25,24,24,26,25,25,29,26,25,24,24,24,25,22,19,24,22,23,20,23,23,18,28,34,19,32,36,21,21,24,29,24,20,24,21,21,26,26,28,19,21,24,19,24,23,21,24,21,25,21,21,26,21,23,19,18,21,21,21,22,19,21,24,46,85,47,18,21,17,20,20,24,19,21,27,21,18,20,22,21,18,23,25,20,24,23,21,20,19,21,22,21,19,17,20,19,21,21,19,21,22,18,23,19,21,23,21,31,30,30,28,25,30,26,31,35,31,27,23,30,29,27,25,18,19,19,20,19,20,19,20,22,24,21,18,21,20,21,21,21,21,22,19,25,25,23,67,100,80,43,20,19,18,17,27,45,33,19,22,21,19,23,23,25,23,21,28,23,23,24,21,22,22,24,26,20,18,25,20,24,24,23,23,21,22,23,22,18,21,23,21,24,21,21,24,22,23,23,24,24,25,22,17,22,22,25,23,23,23,19,27,23,25,20,23,25,29,45,33,112,234,246,250,250,253,253,253,253,253,253,253,253,253,253,253,253,251,251,253,253,247,190,128,116,60,41,21,14,39,57,52,48,51,67,85,87,112,141,120,83,91,140,181,191,213,237,249,249,248,248,238,242,238,230,217,231,248,244,241,244,236,105,1,1,7,11,10,12,13,12,14,14,13,13,247,247,242,191,207,244,227,193,111,37,68,150,159,163,173,174,109,105,140,136,153,144,150,165,165,169,163,165,183,218,242,251,238,236,230,210,236,244,241,241,231,230,217,213,213,209,215,213,209,208,206,207,210,216,151,217,211,211,210,200,200,198,181,179,156,173,201,199,202,211,215,200,189,208,208,194,160,148,131,112,126,158,144,128,146,124,167,156,124,104,74,67,101,114,122,134,118,88,68,37,78,106,103,64,74,98,109,137,90,71,87,90,108,97,51,82,94,67,74,71,71,70,81,149,136,113,113,85,130,109,116,121,136,144,129,229,237,236,207,174,232,196,138,211,227,184,234,217,235,239,234,241,242,250,252,252,252,252,253,253,252,252,253,253,252,252,253,253,252,252,250,250,253,253,252,252,253,253,252,252,252,252,253,253,252,252,252,252,252,252,252,252,252,252,252,252,246,246,231,229,204,42,71,176,195,214,199,216,239,252,253,253,252,252,252,252,253,253,253,253,252,252,252,252,252,252,249,249,231,226,146,17,86,193,204,189,174,200,237,249,253,253,252,252,253,253,253,253,253,253,252,252,253,253,252,252,246,245,251,150,35,12,81,132,110,146,178,213,239,247,252,252,253,253,253,253,253,253,252,252,252,252,249,194,158,147,98,85,46,41,52,33,41,49,55,54,53,57,60,78,77,81,95,88,86,104,102,99,110,118,133,144,151,141,79,104,68,32,71,53,32,59,68,87,99,100,90,79,83,78,83,95,97,92,81,86,86,85,75,55,40,29,23,23,34,51,68,84,95,105,113,108,115,114,114,112,107,99,78,63,44,49,66,61,66,72,77,67,45,42,43,37,41,44,46,34,24,30,22,32,48,64,81,89,90,92,93,87,80,88,78,66,69,73,77,75,66,66,57,59,53,63,73,64,69,69,70,69,76,80,78,81,61,44,23,23,58,50,33,29,23,22,24,26,20,21,21,25,33,22,26,30,24,22,25,27,28,26,23,23,21,19,19,25,22,22,22,29,33,23,23,26,20,20,26,24,23,24,22,20,19,24,26,28,20,21,26,18,23,23,31,19,25,25,20,25,19,26,23,21,22,17,20,23,21,23,20,24,27,37,84,62,26,21,21,24,21,22,24,20,21,26,21,24,23,24,22,21,27,22,21,26,27,24,23,21,19,17,19,24,16,16,22,21,17,20,22,19,19,20,17,17,22,24,34,32,29,31,27,30,30,32,32,27,29,31,29,28,24,22,19,21,19,20,20,20,18,19,21,23,22,20,20,19,23,21,19,24,25,22,22,19,36,76,100,76,34,20,19,17,29,45,27,19,24,20,21,19,25,19,23,24,19,24,21,24,24,23,21,24,20,22,23,22,27,25,25,17,23,25,17,23,21,22,21,20,21,18,22,23,21,22,19,24,26,21,23,24,21,21,27,23,23,27,24,21,23,25,23,24,23,29,25,30,19,111,217,230,242,229,218,217,223,225,219,226,237,235,222,240,246,224,227,249,252,252,226,179,156,112,59,53,34,16,39,57,63,55,69,91,102,104,112,125,98,66,50,79,152,174,221,243,251,251,251,251,253,253,252,252,253,253,250,250,244,244,236,105,2,1,7,11,12,11,12,12,14,14,15,14,244,244,243,147,100,125,113,133,87,17,79,170,224,237,215,200,152,134,151,160,191,172,153,165,163,168,178,199,223,252,246,246,219,150,101,35,65,151,122,99,71,35,21,7,14,12,14,21,16,12,15,15,15,23,31,29,36,39,39,26,23,29,36,50,23,12,14,19,45,79,81,29,6,33,71,64,65,78,85,87,83,77,54,53,74,74,71,55,30,51,52,45,53,41,43,45,31,45,38,49,68,78,93,81,76,81,102,108,89,88,98,94,80,76,53,66,71,79,86,76,70,56,50,65,46,45,37,37,78,61,69,60,48,51,37,83,81,30,12,19,40,23,24,68,89,112,140,147,169,165,172,203,208,235,250,250,249,249,248,248,242,238,222,208,199,192,184,183,179,173,118,67,107,162,178,193,174,136,169,208,215,209,203,202,189,176,165,151,137,131,128,117,110,103,85,75,64,66,66,81,82,15,30,74,117,196,228,243,245,245,246,246,245,245,241,238,228,216,200,189,182,169,152,131,116,105,100,70,46,58,48,4,44,177,236,250,251,251,252,252,251,251,250,250,248,248,244,244,234,227,214,191,167,160,136,105,69,38,90,63,15,1,91,238,253,253,252,252,252,252,252,252,252,252,253,253,251,251,244,242,229,227,173,81,48,46,61,71,49,32,40,37,90,125,88,51,47,63,71,85,84,82,89,76,77,77,56,53,48,46,51,47,52,53,50,63,61,46,71,59,44,86,129,143,131,122,104,84,84,80,110,128,130,116,94,79,54,42,32,23,27,24,35,51,66,84,94,108,120,112,108,112,111,104,100,96,77,69,57,45,55,51,74,77,63,74,74,68,49,44,46,39,33,39,51,33,44,64,66,82,95,104,108,101,97,99,84,87,79,79,74,77,81,84,81,61,57,52,57,53,52,58,63,66,66,70,75,73,81,81,64,47,29,18,18,16,18,33,42,30,16,26,25,27,26,20,23,24,26,23,26,23,24,22,21,36,39,24,24,27,21,25,22,24,25,19,31,31,19,20,24,22,21,24,29,28,21,19,26,19,24,23,22,23,24,20,20,29,24,23,23,28,23,17,25,23,21,24,21,25,20,21,28,19,22,22,37,42,20,27,85,76,32,19,28,41,23,25,31,29,24,21,31,28,23,19,23,27,22,22,24,29,24,27,24,17,21,24,20,17,19,18,18,18,19,20,21,16,22,21,21,18,19,32,31,31,31,27,27,31,30,28,27,33,31,25,28,25,23,18,17,23,19,20,22,18,22,21,21,23,19,20,21,21,25,17,22,26,21,23,21,26,21,34,84,100,76,35,19,19,27,45,24,19,22,20,22,22,24,27,19,21,24,21,22,23,24,24,25,18,22,26,22,26,22,23,27,24,19,26,22,19,23,25,26,20,23,24,19,22,20,22,24,21,23,24,23,21,24,19,24,23,28,21,23,27,24,21,20,25,24,19,24,29,22,134,204,190,195,180,178,188,196,199,202,212,221,214,203,212,204,189,195,189,203,193,160,172,158,129,135,158,153,118,82,81,73,63,76,94,93,95,100,103,114,101,67,126,218,210,235,247,250,250,251,251,252,252,252,252,253,253,248,230,221,244,237,107,4,1,6,10,12,12,12,13,14,14,15,15,183,200,197,130,113,121,127,142,97,63,117,173,214,250,250,250,237,184,188,201,235,191,133,148,146,163,197,225,232,216,224,221,144,103,89,22,23,59,32,15,16,14,15,13,14,14,14,15,14,14,15,15,25,14,15,16,16,17,23,22,24,34,52,96,42,9,19,11,39,74,66,25,7,34,68,56,61,88,104,92,63,54,29,39,67,70,63,39,42,47,51,54,50,46,54,59,50,53,53,62,66,73,79,73,77,71,89,114,93,79,80,77,81,71,63,79,72,73,83,60,45,30,39,61,26,19,18,27,74,45,41,42,31,33,33,61,34,17,22,38,89,54,48,104,141,199,227,238,249,247,245,245,239,238,226,213,204,188,175,157,139,116,103,92,91,93,105,115,122,110,39,1,24,85,102,94,46,2,32,44,45,45,38,35,27,21,16,22,19,15,15,14,15,16,21,44,59,79,88,109,122,34,23,15,36,113,111,103,87,84,86,78,70,59,46,33,34,27,27,24,12,15,15,24,43,46,54,21,46,105,61,8,46,134,173,158,153,154,139,128,117,122,86,71,62,44,37,32,24,14,12,15,15,16,29,48,21,18,96,65,34,11,74,203,217,154,176,134,187,176,171,149,129,131,119,113,126,70,51,38,19,11,12,25,36,10,81,132,63,35,48,43,110,150,100,49,29,37,47,47,47,47,43,46,43,48,45,50,47,51,72,76,92,101,97,137,129,84,140,83,49,62,116,111,86,67,58,40,46,59,80,104,90,65,40,31,24,19,22,22,34,48,66,82,93,105,114,120,114,115,113,107,105,95,85,69,60,61,56,60,60,60,82,78,74,79,81,75,45,43,50,40,33,44,52,29,69,107,114,114,108,116,105,93,94,94,80,74,78,78,76,84,77,66,64,53,55,57,54,59,58,60,67,69,71,67,75,87,75,53,34,19,18,20,17,16,16,25,39,37,30,21,23,25,26,19,21,29,22,25,28,19,24,26,22,24,22,24,22,21,27,23,23,24,25,33,27,24,20,22,25,21,29,24,32,45,32,21,22,25,27,21,22,24,24,29,22,22,29,24,23,24,27,22,24,24,21,25,24,21,25,31,33,23,19,22,27,25,24,21,68,87,38,23,30,29,19,25,37,21,19,26,29,25,16,23,24,16,21,24,23,34,28,22,25,24,22,23,24,20,17,19,22,17,19,24,18,21,23,16,17,21,29,30,29,29,24,30,29,31,31,25,30,32,24,31,27,26,21,19,24,19,18,21,26,22,17,22,22,23,18,21,24,22,25,22,27,24,19,21,30,23,19,25,43,88,98,70,33,14,33,40,29,23,17,21,22,21,26,23,24,24,22,21,22,24,23,22,22,23,21,19,24,23,22,27,25,21,26,22,22,25,22,24,21,19,23,21,25,20,23,19,24,30,21,24,18,28,29,21,25,19,26,25,21,29,21,24,21,22,27,22,30,22,32,151,176,153,160,154,162,165,173,177,174,181,193,189,184,189,187,173,157,135,132,141,130,144,122,89,126,185,205,151,87,64,42,38,57,64,56,52,61,89,111,104,103,200,249,232,231,246,249,249,250,250,249,249,251,251,253,253,237,183,191,241,238,107,3,2,7,11,12,12,14,12,13,14,14,14,230,243,250,250,250,250,249,249,237,201,227,243,252,252,252,252,251,251,246,245,240,180,125,143,144,169,222,250,223,181,166,198,196,196,193,132,125,139,105,112,129,148,122,80,96,77,94,116,102,93,92,101,88,93,96,109,114,107,121,112,128,141,149,175,97,78,99,84,95,109,108,58,45,85,98,68,88,110,120,117,90,78,57,60,72,84,77,62,67,71,67,64,51,59,76,98,87,74,55,57,77,69,65,60,56,51,69,87,81,57,41,35,24,33,31,47,48,46,57,39,35,24,46,77,54,64,35,59,129,103,127,141,149,153,130,170,165,125,118,134,183,141,134,198,229,245,242,234,220,178,169,155,118,129,131,126,116,106,104,94,85,82,79,78,76,65,58,56,59,61,28,4,24,60,49,33,19,9,15,14,16,14,14,22,38,57,71,83,87,90,87,88,88,92,94,99,97,98,95,103,112,47,15,14,17,42,23,14,14,12,15,14,15,15,17,34,53,72,87,81,76,84,101,109,103,102,110,67,73,131,97,41,16,39,32,10,17,14,15,15,15,15,14,16,18,31,36,43,48,55,67,48,8,46,100,117,72,33,107,102,57,33,14,54,41,34,22,11,21,12,16,16,14,14,14,15,14,16,13,28,30,20,19,53,59,23,81,108,62,48,49,35,53,61,51,49,39,43,50,56,58,66,80,84,99,109,120,141,144,155,155,148,146,136,132,122,116,96,75,46,37,31,33,52,49,48,31,30,46,46,43,41,38,27,26,23,20,25,38,55,69,79,97,99,105,116,108,114,114,112,104,98,88,68,69,59,50,65,68,65,65,72,93,86,79,101,86,51,38,34,38,41,34,43,50,30,78,106,106,113,98,96,96,91,87,84,68,83,82,87,81,69,73,59,57,46,53,57,62,64,66,69,67,71,70,82,74,50,37,21,17,22,16,18,21,17,16,33,39,38,33,22,24,21,23,22,22,21,27,24,21,22,28,24,24,23,22,29,24,24,19,24,27,23,28,28,24,28,25,27,24,23,29,22,36,41,24,22,21,29,24,21,24,25,24,24,22,23,26,24,27,24,22,24,26,23,25,23,28,28,30,33,30,33,23,22,25,20,27,20,46,92,50,28,22,15,24,27,22,23,19,24,24,21,24,23,21,21,22,31,32,26,30,29,25,24,24,32,27,17,20,19,19,19,17,24,19,19,22,24,17,24,26,27,32,30,31,31,25,31,33,32,28,27,35,28,23,26,21,23,20,19,24,19,19,21,23,20,22,23,18,19,17,19,21,20,16,23,22,19,21,23,22,19,24,44,89,97,66,27,33,39,21,20,20,18,23,24,23,25,24,24,25,22,23,23,23,23,22,23,20,22,20,23,24,26,24,23,24,17,23,22,22,25,17,21,25,25,20,24,22,21,24,20,24,22,22,27,21,22,26,21,23,30,26,23,26,22,22,23,24,23,29,22,41,116,118,120,129,122,130,133,133,128,120,121,128,137,138,142,145,134,128,106,113,117,99,102,61,27,61,107,131,89,46,73,66,25,21,24,16,16,26,49,66,71,113,226,248,210,228,245,248,248,250,250,249,249,251,251,251,251,214,173,184,217,231,110,3,2,8,12,12,13,14,13,14,15,15,15,252,252,252,252,252,252,250,250,249,241,251,251,252,252,254,254,251,251,248,248,240,163,115,141,146,185,248,250,246,179,137,219,252,252,248,215,202,243,196,190,235,239,173,139,174,165,185,234,242,225,217,225,160,226,246,253,246,234,249,249,252,252,251,251,167,137,179,156,134,128,119,59,39,91,107,83,96,125,142,137,126,107,66,57,63,70,74,78,75,62,55,59,65,54,71,90,76,55,51,51,65,66,53,42,41,38,61,70,63,61,51,38,25,35,32,35,55,85,108,113,130,99,114,156,135,138,109,145,227,196,207,206,214,200,156,205,178,122,97,102,105,85,126,162,170,177,155,131,122,39,25,48,32,84,83,82,92,82,90,89,97,91,83,78,63,42,21,12,14,15,22,17,19,66,81,105,56,16,97,123,126,125,128,142,144,157,158,158,156,149,144,141,120,105,90,72,47,35,33,25,52,35,39,26,37,112,119,125,115,120,124,125,127,128,145,151,171,171,171,155,139,125,111,95,70,61,67,39,64,81,45,47,29,58,66,77,91,86,85,95,95,111,122,127,145,151,151,136,133,117,110,86,36,66,78,78,35,41,100,60,42,37,17,57,66,78,59,46,50,38,14,19,23,21,19,26,39,35,34,43,44,41,45,51,49,36,54,50,44,49,45,41,44,50,67,88,103,128,134,147,153,155,154,150,150,147,141,133,91,100,81,56,45,36,29,28,27,27,23,21,41,33,44,76,61,47,36,30,41,36,36,35,21,21,20,29,38,58,76,89,101,103,114,105,107,107,107,109,104,98,80,73,60,55,58,56,62,63,66,67,78,94,105,105,88,65,42,24,21,21,23,40,38,65,63,66,100,106,104,102,88,92,84,75,71,74,79,84,84,77,65,55,55,56,53,57,57,55,64,65,67,75,70,68,65,52,40,22,18,19,17,17,19,21,19,26,29,39,38,34,33,28,39,25,21,27,24,24,25,24,21,25,22,24,23,22,31,24,22,28,24,24,27,27,26,24,21,26,21,24,27,20,24,20,24,24,17,24,24,23,25,23,22,23,25,24,24,24,23,23,28,24,23,29,22,23,24,25,23,29,27,24,38,33,23,25,22,23,24,25,41,91,66,28,18,20,26,21,27,20,23,25,25,26,23,29,19,22,29,27,29,25,31,28,21,24,32,28,19,22,18,16,19,19,18,21,21,19,18,20,16,21,24,32,29,30,30,31,33,29,30,34,30,29,32,28,24,26,19,21,24,18,23,19,21,21,19,19,19,20,22,22,18,24,21,16,23,24,22,19,21,19,23,25,18,23,50,94,100,60,45,33,20,21,19,23,19,21,23,21,19,22,21,24,22,21,24,19,22,26,24,24,25,19,25,24,22,24,22,29,22,20,22,19,21,19,22,21,25,21,24,24,22,24,22,27,20,21,26,21,23,22,21,26,24,20,23,23,25,19,25,25,24,21,48,119,122,135,130,122,137,134,124,122,125,125,125,122,124,130,136,131,132,128,125,123,112,105,96,89,88,105,114,91,108,138,116,79,63,80,94,94,92,88,91,98,146,235,243,218,248,248,251,251,251,251,251,251,251,251,249,244,216,202,202,199,200,112,6,1,7,11,14,12,15,13,15,15,15,15,148,157,179,189,200,175,163,186,135,94,138,173,184,159,150,192,217,229,245,240,233,153,115,133,118,133,160,210,199,131,95,127,195,210,218,117,112,162,110,104,145,150,95,94,151,118,152,214,217,188,169,199,180,194,210,225,206,170,227,234,245,245,246,246,120,108,157,113,118,126,122,51,21,92,108,73,67,90,118,128,119,100,66,55,44,53,67,82,87,58,51,57,50,51,44,40,23,39,37,39,50,47,54,48,45,36,89,103,117,151,151,174,143,128,123,104,139,192,217,229,245,175,158,169,127,170,120,124,219,161,113,107,130,120,102,113,63,51,47,14,29,14,61,111,71,90,97,105,100,27,26,26,31,51,29,29,25,27,32,51,68,75,88,96,106,110,113,114,117,129,103,38,47,142,182,199,127,56,130,178,171,144,126,109,94,92,84,83,80,79,81,91,87,93,98,94,96,90,100,100,125,96,53,39,79,180,201,200,190,179,171,151,132,125,123,125,119,101,98,85,71,63,54,53,56,62,75,47,61,96,63,44,25,96,175,179,183,164,155,156,141,135,130,128,130,118,105,88,64,53,57,45,22,51,79,92,56,54,143,136,66,26,27,155,186,173,131,101,125,83,80,63,68,66,45,46,38,39,49,55,44,50,52,53,55,86,122,112,110,123,116,61,78,128,153,159,150,154,131,119,111,86,62,47,44,42,29,29,21,24,22,17,22,16,17,17,14,16,24,24,44,44,53,69,57,57,29,30,36,24,28,24,21,28,35,52,69,83,111,115,111,107,108,108,107,107,99,103,90,70,60,61,58,64,66,57,69,72,70,78,87,95,109,78,44,28,17,24,21,18,40,55,66,94,104,101,112,106,103,103,79,79,75,70,78,78,75,74,69,63,57,50,56,58,60,53,57,60,65,66,68,74,67,57,37,20,20,21,15,17,17,19,23,27,27,29,40,47,37,37,39,76,94,37,23,23,20,26,22,21,26,24,24,24,27,27,21,22,24,22,21,31,28,24,25,21,21,31,24,22,26,20,24,20,22,23,24,26,22,29,29,26,24,25,25,21,26,26,21,27,24,28,27,25,22,23,22,23,33,24,23,24,24,21,19,22,27,24,28,29,27,79,80,33,23,18,24,22,25,23,21,24,21,26,31,20,22,21,24,25,24,25,26,23,24,27,24,22,21,22,19,21,19,20,21,23,22,20,21,15,21,23,27,32,30,35,27,24,28,23,31,33,34,32,30,32,31,29,19,20,21,17,22,26,23,23,21,19,22,22,21,20,21,18,20,24,22,19,20,21,22,24,23,19,22,21,24,61,109,103,77,37,18,27,17,21,20,21,21,19,23,19,24,22,22,25,24,21,22,23,24,24,27,23,19,23,23,27,23,23,24,24,24,24,23,22,22,18,25,22,24,27,21,20,24,23,19,24,22,21,22,23,27,22,25,26,22,24,23,23,25,19,27,22,69,125,111,134,130,137,160,147,140,157,173,181,166,160,169,171,184,182,195,182,181,189,179,205,219,202,202,211,207,196,198,204,189,176,186,204,201,201,200,193,171,170,198,230,246,246,251,251,251,251,250,250,249,249,251,251,247,240,242,242,224,196,188,109,7,1,7,10,12,11,12,12,14,14,14,14,17,17,27,45,46,21,18,33,26,33,76,88,83,75,92,146,181,193,197,200,218,150,118,129,115,93,26,14,43,64,38,9,18,20,16,13,19,29,18,32,46,45,25,32,38,24,41,52,48,36,28,33,36,32,33,54,50,25,32,47,86,96,115,118,41,49,52,56,101,122,166,89,55,123,118,85,55,57,59,60,73,79,89,105,54,57,62,82,94,67,48,50,59,70,83,49,42,71,96,53,61,76,105,134,132,98,158,189,191,244,246,242,195,165,147,153,195,237,240,217,226,188,188,148,73,103,66,80,155,118,73,60,76,72,77,91,55,53,51,37,25,17,48,54,27,34,49,64,66,14,31,38,22,66,53,80,44,51,81,95,110,128,143,158,166,171,129,173,169,168,127,31,36,128,147,148,69,39,107,103,86,71,61,54,39,37,33,36,53,74,97,114,131,143,139,148,155,157,157,146,146,92,36,25,73,141,131,119,87,76,61,43,38,26,36,26,20,19,31,54,78,93,113,126,134,151,148,78,98,137,73,52,19,91,124,139,118,85,66,47,27,24,19,15,16,16,24,34,66,83,107,82,44,127,165,172,86,77,174,164,96,38,23,102,130,118,63,32,44,46,51,44,41,46,58,69,73,107,134,167,147,142,145,160,164,169,177,161,158,187,167,77,44,63,62,51,42,39,27,29,26,19,22,16,18,21,23,16,21,21,21,23,18,22,19,19,29,29,39,38,39,38,59,98,85,57,27,18,25,20,21,23,33,50,54,61,64,73,91,99,100,100,108,97,103,99,92,77,59,54,57,67,67,66,71,69,72,79,79,86,88,75,54,38,25,21,20,16,30,55,73,86,100,101,101,104,105,93,98,75,66,80,79,90,83,74,61,59,61,55,54,50,55,60,63,59,56,62,67,66,66,56,38,27,19,17,18,18,19,20,21,25,27,34,44,44,47,39,41,41,38,64,132,71,29,18,21,20,25,23,20,25,20,21,31,25,39,37,24,24,24,29,23,22,24,22,27,26,23,24,22,17,25,24,22,24,28,24,29,29,24,24,24,27,25,26,21,22,28,24,22,25,27,19,24,23,22,28,24,25,24,18,23,22,26,29,21,22,22,27,23,67,85,40,22,16,23,23,23,23,27,21,25,27,22,21,22,22,24,27,20,20,24,26,26,24,23,18,19,23,21,19,19,19,21,20,21,22,22,21,17,22,23,29,36,33,30,24,31,32,31,29,30,30,29,28,33,31,20,19,23,21,18,21,20,21,20,23,21,18,25,21,19,22,21,18,21,19,20,23,19,17,19,24,27,22,18,30,62,110,99,53,26,18,22,22,23,21,19,24,22,19,26,24,18,24,19,24,21,22,23,22,23,24,25,21,24,21,22,20,24,25,22,23,21,23,17,20,27,18,21,22,21,26,23,25,19,24,22,23,26,21,27,29,26,24,26,24,23,18,20,24,24,22,52,76,56,66,82,105,118,105,100,128,148,154,145,141,143,162,181,190,201,192,200,205,212,244,250,248,245,250,239,217,204,209,213,208,218,229,210,198,210,204,187,178,186,208,217,215,226,221,207,206,204,203,202,203,212,210,217,220,242,238,208,190,187,112,5,2,9,10,13,12,14,12,14,15,14,14,25,42,77,82,88,47,46,81,47,93,155,165,177,179,183,174,148,118,126,169,213,154,130,139,129,101,25,16,43,74,45,32,36,21,19,16,62,63,43,63,70,66,46,47,39,24,50,48,29,17,28,38,29,44,35,57,56,33,34,16,27,42,77,96,58,107,112,89,155,208,246,198,110,177,211,198,134,68,60,54,63,99,149,163,110,93,83,91,94,57,61,75,113,158,195,169,132,174,174,164,112,89,145,217,232,155,189,177,143,185,179,168,101,71,86,110,155,182,162,146,198,240,252,178,53,105,105,138,229,202,119,93,85,66,80,86,73,74,76,57,45,39,44,70,60,71,60,78,78,29,74,60,81,141,135,149,143,147,146,141,131,124,122,122,117,116,111,98,91,79,47,18,23,37,35,45,27,25,73,87,100,108,120,124,125,128,126,132,129,135,139,145,137,143,139,124,121,97,93,75,64,41,24,32,32,37,26,32,42,57,73,89,104,107,111,116,116,116,127,127,146,166,171,171,158,152,131,61,63,72,47,60,29,35,54,28,32,35,46,62,79,92,97,100,101,109,120,136,158,171,181,142,73,106,134,117,51,19,63,71,50,46,17,40,60,66,71,73,109,120,132,118,127,130,142,166,163,166,182,191,168,150,146,152,125,93,71,47,53,71,53,34,35,22,19,27,16,19,21,17,21,21,23,20,17,24,20,21,22,19,24,26,29,35,35,36,34,32,28,25,29,22,57,91,66,37,17,15,15,22,33,44,57,59,59,63,62,64,68,76,86,87,88,93,85,75,58,58,57,55,57,61,66,72,71,77,80,83,88,77,53,30,24,19,24,21,24,42,62,84,96,93,98,97,91,93,86,79,76,65,65,85,80,69,71,54,55,53,58,56,53,59,65,62,61,61,62,67,63,55,39,26,16,16,22,18,15,19,30,28,27,38,40,42,48,44,40,44,37,34,28,36,100,112,64,23,14,21,22,22,22,24,24,22,25,29,35,32,19,32,29,20,24,22,24,27,22,20,26,24,23,22,22,23,27,25,23,24,22,30,23,23,31,29,22,23,27,27,26,27,24,25,25,20,26,24,24,27,23,23,22,23,24,25,25,26,23,24,21,20,19,53,93,53,23,16,22,26,24,22,24,26,21,27,20,19,25,24,23,21,22,22,29,29,22,21,21,22,19,24,25,18,17,21,21,21,21,20,20,20,19,21,28,30,27,34,28,32,31,27,32,25,30,30,27,34,29,28,23,18,22,18,17,22,19,24,19,21,23,20,23,20,20,19,20,20,21,25,19,23,20,17,20,22,23,21,20,25,28,65,106,83,50,22,15,23,22,21,26,20,19,24,24,24,21,22,21,21,22,26,23,23,22,20,24,26,21,23,24,21,22,29,21,24,24,21,23,23,25,21,23,21,23,24,21,23,25,23,21,27,24,21,24,23,23,25,28,22,22,17,23,28,27,29,53,89,58,42,36,38,55,47,42,45,61,68,59,69,72,88,112,113,129,119,137,150,149,189,203,186,189,182,155,138,141,165,179,173,169,162,132,130,133,134,122,109,113,118,130,118,116,112,99,104,103,107,115,125,135,143,152,158,178,172,155,145,157,110,12,3,11,11,15,13,16,14,15,16,16,15,94,118,155,145,141,87,95,131,94,162,231,245,252,252,216,164,128,119,141,184,219,160,145,155,146,118,31,29,92,136,71,47,65,45,46,68,97,82,78,95,97,90,66,73,66,61,87,80,58,48,61,66,70,81,81,100,99,91,76,48,71,77,127,139,95,144,117,114,200,242,249,174,83,200,241,228,120,90,106,120,134,134,167,155,112,137,134,130,127,64,48,94,147,202,249,193,177,240,233,211,111,49,107,171,204,138,104,71,38,94,45,55,31,38,59,69,68,69,71,94,155,161,186,131,71,110,93,109,186,163,86,67,75,92,121,124,119,123,129,125,65,13,54,136,153,146,145,147,139,72,93,89,103,173,152,115,130,105,94,66,44,27,18,21,27,36,104,73,85,108,107,39,17,87,134,142,77,51,132,175,188,184,182,121,170,166,151,130,109,82,67,55,49,55,54,63,67,64,80,95,128,118,58,20,56,112,138,150,151,168,182,190,199,189,186,168,154,136,110,91,87,88,88,88,80,82,68,33,47,65,47,51,36,45,87,114,140,146,160,172,180,190,183,178,165,141,125,109,111,106,110,69,29,59,54,62,29,35,69,73,101,62,39,53,98,164,170,176,192,182,178,162,156,148,133,104,103,83,97,47,36,33,27,32,28,21,24,20,22,17,16,37,35,23,16,20,21,18,22,23,22,21,24,24,24,27,23,23,27,26,31,40,33,32,30,20,23,24,26,31,24,22,33,42,31,27,21,16,23,45,64,73,71,66,66,64,62,66,68,75,78,80,90,80,66,48,48,66,61,60,61,57,69,71,79,83,87,78,49,35,27,22,18,20,22,43,62,67,82,90,96,94,95,87,88,85,77,71,79,71,75,75,64,60,54,57,55,51,54,54,62,67,58,61,69,66,65,53,40,31,19,19,17,17,21,21,22,24,32,41,40,38,42,44,39,45,44,44,41,30,28,19,34,96,102,48,19,15,24,19,24,27,20,26,21,22,28,21,26,29,23,21,25,22,22,27,24,19,19,26,22,21,28,25,22,29,22,22,31,29,23,23,28,27,22,26,28,22,27,25,26,25,20,22,23,29,23,22,21,25,27,22,33,22,20,29,23,25,21,22,21,35,85,65,27,19,16,22,23,24,24,24,24,23,21,21,24,25,22,21,24,31,30,20,26,22,20,23,21,25,18,24,23,17,21,21,26,24,19,18,22,23,30,27,31,36,31,28,27,32,31,34,29,29,30,29,32,25,20,21,19,20,23,20,22,23,17,21,24,21,17,19,24,21,20,23,24,25,21,20,22,21,19,22,24,23,19,25,19,43,86,98,79,46,23,17,20,21,23,20,24,24,21,21,21,24,19,22,24,21,24,19,22,20,24,24,22,20,19,26,18,21,24,23,26,21,18,22,24,20,22,22,26,28,24,26,22,22,22,22,25,23,26,24,22,27,21,24,26,22,26,32,34,29,87,114,61,33,27,24,47,49,42,39,47,55,44,59,52,50,67,69,79,55,82,86,73,100,101,98,105,91,84,81,98,143,155,137,119,112,90,83,86,66,66,68,61,60,61,53,52,53,47,46,36,44,63,71,72,65,79,86,100,98,90,94,110,97,21,5,14,12,17,15,17,16,16,17,17,16,98,132,165,145,139,88,87,109,81,162,241,248,252,252,247,239,234,230,232,236,235,158,145,153,151,123,33,22,88,141,78,72,85,64,57,66,92,89,108,122,111,94,63,49,53,69,92,79,60,58,56,60,67,68,77,87,75,81,71,68,79,74,79,77,78,96,79,86,153,204,224,130,72,145,149,126,113,120,153,181,176,169,123,95,105,146,149,139,128,73,54,62,95,125,159,132,134,186,175,171,114,64,76,113,136,88,61,19,17,31,20,55,47,46,69,69,47,21,14,37,34,13,16,33,55,36,24,61,57,48,51,47,79,131,174,181,186,198,200,183,61,8,22,83,99,86,87,82,72,31,64,48,64,92,58,65,61,69,62,55,48,45,50,59,84,106,122,139,145,165,159,80,28,102,177,160,66,33,99,115,113,103,94,88,72,66,57,53,48,41,33,29,27,35,61,87,110,128,153,163,186,161,77,32,81,156,152,142,130,134,132,127,120,92,78,65,56,43,31,19,14,15,15,36,63,105,105,48,101,112,57,57,21,74,169,185,174,153,157,151,134,124,106,89,62,52,40,23,21,11,34,46,68,121,152,171,135,164,193,178,156,118,61,73,132,132,133,100,80,69,49,38,31,34,27,25,21,26,18,17,22,21,19,18,21,17,20,19,18,21,22,42,42,34,23,21,24,21,21,23,27,22,22,25,21,25,34,31,38,39,33,31,32,37,39,46,35,21,27,35,34,33,34,23,24,33,33,49,71,89,94,84,76,70,65,65,63,66,70,71,75,78,78,76,60,63,61,61,66,61,65,64,71,76,81,73,49,35,28,20,24,22,25,41,59,76,80,89,92,92,91,87,92,84,79,80,75,76,84,82,71,64,60,57,61,55,51,54,58,61,64,66,60,68,64,53,42,27,22,15,18,21,19,22,21,27,36,39,43,45,44,43,40,47,46,40,41,42,43,37,29,26,24,40,97,95,36,18,16,25,23,24,27,18,25,24,25,26,29,27,21,21,25,23,23,22,21,24,24,24,24,21,30,28,23,24,24,29,24,27,25,22,39,30,26,27,25,30,23,24,19,23,26,22,26,24,30,21,24,25,21,25,25,24,23,23,21,23,24,26,26,27,78,75,33,22,18,19,25,29,23,17,26,28,25,30,22,29,22,18,32,21,22,28,21,23,22,27,22,18,27,31,27,19,24,28,23,20,22,17,20,28,24,30,32,31,31,29,30,28,29,30,27,27,33,31,25,30,21,23,22,21,24,19,19,22,21,21,23,19,24,23,21,20,21,23,21,19,21,21,22,21,19,21,23,18,23,22,22,47,51,79,105,81,44,19,18,27,19,19,24,21,23,25,29,20,25,29,22,23,21,24,24,23,24,22,21,26,22,23,24,22,24,24,21,19,22,24,21,21,26,23,24,21,23,25,24,28,21,25,24,21,29,25,26,23,23,24,22,24,29,55,40,53,150,171,132,107,61,31,35,40,45,39,35,46,48,67,54,53,70,65,69,56,66,62,50,60,48,59,100,91,68,62,93,136,129,113,103,107,91,88,81,61,63,61,62,54,53,50,60,64,46,41,35,50,55,53,46,35,45,42,49,48,53,57,60,63,25,9,16,15,17,15,16,17,16,17,16,17,83,107,139,137,145,105,92,108,80,154,233,248,253,253,253,253,252,252,243,243,230,148,139,150,151,130,71,45,90,133,91,113,119,84,78,95,124,114,124,125,109,101,59,41,39,65,101,99,67,50,61,59,57,62,83,103,89,95,82,87,120,81,50,36,75,113,72,95,166,221,247,119,71,169,131,93,119,166,138,131,164,206,192,98,61,86,93,101,86,62,74,70,51,48,62,52,70,84,91,141,129,100,90,87,91,67,55,36,46,53,42,68,59,59,66,71,71,53,39,39,63,57,45,53,78,74,52,77,96,68,83,85,66,56,89,120,113,113,136,92,31,36,70,108,87,64,69,71,71,24,42,42,61,105,102,130,136,146,153,149,162,161,160,160,152,143,129,113,106,107,104,44,6,56,89,61,35,36,38,46,48,48,45,49,53,115,93,104,120,123,135,134,130,134,129,136,134,131,138,127,133,105,51,32,51,47,31,30,35,33,36,33,21,22,39,57,77,88,95,103,110,87,102,110,120,139,122,84,102,103,59,47,38,31,59,66,53,47,51,50,37,36,45,66,107,135,140,141,137,129,141,152,165,184,188,192,157,147,132,112,103,55,47,38,74,34,28,29,26,17,23,20,19,19,15,25,21,21,22,17,22,21,19,23,26,25,23,23,23,26,27,40,39,33,35,29,40,51,57,62,66,67,46,38,37,42,47,49,48,50,61,71,85,98,116,105,67,20,23,39,42,49,44,37,29,40,59,66,76,83,82,79,69,64,67,60,64,67,71,65,59,63,51,56,65,57,61,60,62,67,62,69,73,69,54,37,30,25,19,19,22,40,62,73,79,94,98,94,98,88,86,84,84,82,81,79,76,75,80,68,56,57,57,57,56,57,56,63,66,59,64,66,64,53,39,26,21,22,20,17,19,22,24,32,35,45,42,46,49,45,42,40,43,43,41,45,38,42,47,44,41,33,35,32,63,106,81,33,17,23,23,29,24,20,24,24,30,32,23,21,25,26,21,30,22,20,22,21,24,24,27,26,24,27,24,24,28,22,27,27,23,26,23,27,24,24,27,24,24,20,21,27,25,19,21,24,19,23,23,24,24,30,30,27,23,21,25,19,30,28,23,25,61,89,45,23,19,21,26,21,21,22,21,31,35,23,22,28,27,26,21,27,34,29,22,20,22,21,22,18,27,29,18,24,25,29,25,19,22,17,21,25,31,28,27,28,27,34,30,29,30,31,30,33,32,31,29,23,21,22,21,19,22,20,25,19,23,24,15,21,22,20,24,22,19,21,17,24,19,20,23,18,23,18,18,19,27,24,24,49,41,37,89,108,79,43,20,16,20,22,19,26,22,23,24,21,29,24,22,21,24,24,21,22,22,21,23,25,22,23,26,22,23,21,20,23,23,23,22,29,22,19,21,22,23,22,24,20,20,27,25,25,26,24,24,22,23,21,26,24,48,53,72,175,218,218,215,193,167,117,62,22,30,35,33,36,43,44,50,56,59,59,65,49,62,62,65,71,37,75,125,101,97,75,85,100,85,92,86,82,68,81,85,56,70,71,74,67,55,61,65,66,56,55,58,60,53,59,71,57,49,46,43,46,51,48,50,54,23,10,15,14,15,15,16,16,16,17,16,16,147,137,148,149,181,171,166,172,137,181,234,246,253,253,252,252,252,252,237,232,208,127,133,148,157,171,156,108,127,156,101,113,97,71,61,74,101,80,81,77,86,96,64,45,38,56,91,96,54,33,45,52,51,48,76,84,75,95,67,101,115,60,41,18,73,98,64,84,154,206,177,69,66,160,105,61,109,120,80,71,100,188,154,60,48,38,53,63,54,59,87,82,74,52,46,43,27,26,26,86,98,59,66,61,64,79,74,39,50,56,39,64,57,72,90,72,76,85,101,104,103,103,103,112,95,89,73,86,90,88,109,63,34,18,21,46,24,12,15,16,61,176,245,251,222,158,161,190,202,113,95,69,120,194,180,190,169,172,164,152,151,139,118,92,75,53,27,11,9,16,33,47,30,34,100,119,63,63,148,177,177,177,174,174,171,179,179,180,184,174,169,154,142,125,105,77,56,41,35,32,48,50,38,35,31,84,112,136,145,152,163,168,166,155,169,184,194,197,191,183,174,152,129,106,77,66,63,34,63,55,41,58,39,42,45,81,118,120,122,119,121,127,144,170,183,189,189,179,177,146,135,117,91,80,60,62,34,31,22,19,29,14,33,39,29,17,24,24,23,22,21,21,18,21,23,26,22,24,24,24,24,24,25,24,29,48,63,69,55,36,41,44,46,69,78,106,133,138,134,119,103,76,59,54,62,84,101,107,108,103,113,128,136,146,154,131,65,28,20,32,56,69,62,44,37,39,45,49,44,41,57,66,63,63,53,57,63,65,63,55,49,57,59,50,56,61,59,62,63,60,66,59,50,36,27,27,20,20,28,41,58,74,77,86,95,95,95,90,86,88,79,71,79,77,87,80,66,58,62,55,50,57,49,53,59,59,64,66,66,64,61,54,41,28,21,21,16,15,21,25,23,33,37,39,45,45,48,46,45,44,42,44,43,40,44,43,44,49,41,43,46,42,40,40,41,78,107,81,35,21,27,24,41,35,23,27,27,34,39,28,23,21,22,27,27,20,24,24,22,27,25,27,27,25,27,29,24,30,29,26,25,19,24,25,30,31,27,26,27,22,22,29,22,25,23,23,28,23,24,25,23,36,33,21,22,19,36,39,28,18,31,29,44,93,58,24,22,21,30,29,23,27,28,25,24,25,26,26,27,28,23,22,30,21,20,26,23,26,20,18,25,22,25,24,31,27,24,25,17,20,24,27,28,27,33,30,33,29,31,34,28,32,33,27,30,34,29,30,21,23,23,19,22,21,29,23,20,19,22,25,19,22,20,20,25,25,23,23,24,18,21,22,19,21,25,24,24,22,30,62,42,24,40,93,114,78,38,15,19,22,23,20,24,22,21,23,28,20,23,25,27,26,17,26,23,20,22,27,22,22,25,21,21,22,18,22,26,24,24,21,22,23,24,23,24,26,26,24,22,30,26,22,25,28,24,22,27,26,25,41,59,73,152,204,201,204,200,204,211,198,114,56,46,34,35,33,34,37,41,46,49,49,54,56,59,61,60,61,45,61,94,82,101,78,50,50,48,70,63,56,48,53,60,59,66,62,68,66,59,61,61,63,61,58,59,60,50,69,90,80,65,45,51,50,48,50,47,48,24,12,16,15,17,16,16,16,16,16,17,17,230,224,197,186,241,251,248,248,198,207,247,248,253,252,248,246,237,239,227,220,190,117,128,141,163,204,224,162,150,139,70,90,74,47,42,52,64,56,68,84,104,109,91,70,57,64,73,77,60,52,62,68,71,69,74,67,65,79,71,95,105,69,45,15,54,77,59,61,76,90,81,33,42,80,55,66,74,76,62,56,67,89,81,50,39,43,47,53,41,57,78,77,79,84,88,76,72,59,43,74,83,101,136,160,139,114,92,57,55,50,36,59,78,86,94,82,82,84,104,98,82,72,74,82,65,47,29,42,15,36,41,26,26,33,62,99,92,73,100,130,190,248,234,233,201,131,142,185,238,132,49,32,54,97,84,80,71,64,54,45,38,27,22,29,25,29,27,25,31,58,90,59,22,68,143,154,84,69,131,124,155,131,122,118,108,111,109,96,119,69,61,47,42,49,46,43,33,32,37,38,91,93,53,34,44,158,182,182,176,171,167,158,148,98,140,145,143,124,111,96,82,67,61,63,54,47,44,57,93,116,130,142,102,71,68,191,206,206,179,149,138,91,129,102,91,77,49,46,41,29,23,24,24,26,23,22,22,21,23,16,24,18,33,46,25,22,21,18,21,24,20,23,27,31,34,33,30,30,33,36,33,39,36,33,71,102,129,137,85,61,71,64,95,135,129,123,123,101,74,54,64,74,81,105,101,117,116,122,123,115,132,124,121,103,63,41,33,27,35,35,35,40,43,39,29,39,38,41,44,37,58,61,60,63,63,60,60,60,60,59,57,78,74,57,55,54,63,61,61,59,47,38,27,24,22,22,30,38,61,77,79,89,92,97,91,85,92,81,79,78,78,83,84,73,62,62,54,54,55,49,50,57,55,56,61,61,57,59,62,51,37,29,19,21,19,20,20,21,31,30,41,43,44,46,44,49,45,42,44,44,43,46,41,41,47,45,37,46,43,41,45,46,41,38,49,46,85,115,64,33,17,41,70,30,22,22,24,39,35,26,23,23,26,26,25,20,23,23,19,29,27,26,27,24,24,26,26,26,27,24,22,27,25,25,30,34,36,31,27,30,26,20,29,28,23,25,25,29,24,22,23,26,24,24,21,33,54,37,23,23,32,31,32,90,75,33,17,33,40,20,27,31,33,25,25,36,28,25,28,21,20,24,18,19,27,27,22,22,20,24,23,25,23,23,27,29,26,23,23,19,22,24,28,29,31,29,29,28,31,31,27,31,29,29,31,26,29,31,23,19,20,19,24,23,23,26,20,20,28,21,19,22,21,24,20,22,26,21,22,24,25,20,23,26,21,21,24,18,41,74,36,18,28,42,101,106,76,37,15,21,18,22,24,20,21,23,21,24,22,21,27,23,21,22,26,21,26,26,21,19,22,22,22,24,22,19,20,21,22,27,26,25,24,21,26,24,21,23,18,25,27,21,25,33,45,47,53,60,53,78,123,139,149,143,113,117,117,115,157,181,152,140,116,77,51,33,35,37,35,40,45,40,51,57,60,57,49,57,57,59,50,41,70,61,49,53,41,53,57,61,54,45,47,49,55,53,58,57,57,55,53,59,61,65,55,57,63,65,73,67,65,56,48,49,46,44,48,48,24,12,16,15,16,16,16,17,16,16,16,17,224,242,204,181,240,245,241,224,150,186,243,249,253,249,244,243,237,241,227,222,186,110,125,129,145,185,216,151,128,116,59,101,94,66,50,68,104,95,115,143,157,160,152,131,118,120,134,136,114,111,121,131,139,132,134,126,120,137,120,135,122,128,118,51,93,120,118,95,84,110,112,83,84,113,94,113,141,127,113,114,103,101,72,57,51,45,83,69,53,47,61,69,65,68,97,129,132,129,104,139,157,180,246,244,203,136,71,61,59,46,39,65,68,62,69,63,66,63,77,53,43,49,36,49,41,31,20,24,22,39,89,109,147,163,194,235,238,239,229,224,218,209,160,97,63,33,29,84,113,63,41,17,32,60,71,100,96,109,111,110,113,124,143,153,158,159,159,150,152,151,148,100,24,28,99,81,37,41,57,49,28,24,26,33,47,50,49,64,74,81,83,79,105,122,133,138,134,139,138,141,142,125,84,31,46,104,98,78,63,43,29,21,23,33,40,56,61,61,58,61,80,103,142,158,153,143,145,151,155,159,166,148,104,60,70,117,119,102,59,32,34,30,26,25,23,21,19,21,23,17,20,25,15,23,23,21,23,20,26,28,26,27,48,48,41,43,40,39,50,51,54,51,52,86,103,83,53,48,79,109,107,83,46,36,118,162,141,118,85,59,56,55,71,81,53,47,47,57,71,78,85,81,91,101,108,103,105,112,117,117,111,84,50,36,29,28,34,48,85,94,65,42,27,31,25,33,46,42,41,42,59,63,65,68,65,64,67,68,74,73,69,85,83,59,63,66,57,57,48,39,27,19,26,24,24,39,60,76,78,84,92,91,95,84,86,87,79,77,69,83,84,78,79,56,48,55,58,51,55,54,57,60,59,62,57,60,62,53,40,28,22,19,18,19,23,20,28,36,38,47,47,38,46,49,47,46,46,40,39,46,44,49,48,48,48,46,45,46,42,41,45,41,46,46,44,40,38,86,106,66,30,22,34,22,24,24,21,27,21,22,24,29,24,24,29,24,24,25,24,25,21,26,27,26,29,24,23,26,22,28,23,26,34,25,28,30,29,24,22,30,23,26,25,24,25,22,25,24,23,24,25,22,22,29,26,25,27,24,22,27,27,24,27,72,82,37,24,21,25,24,28,29,24,23,21,27,24,24,24,23,20,19,26,26,26,23,21,25,24,24,26,26,24,25,27,24,30,27,22,18,18,29,28,31,27,26,30,26,30,30,27,31,27,27,33,33,31,28,23,19,20,22,19,19,19,19,22,21,26,20,23,21,17,25,23,23,26,20,22,22,26,20,22,29,24,17,29,21,55,81,28,27,25,24,47,90,111,70,36,17,15,23,20,22,18,21,27,22,22,25,24,21,22,24,24,20,23,24,21,24,24,22,23,22,21,21,21,26,23,19,24,20,22,29,27,20,24,24,24,24,22,27,28,61,90,101,128,117,100,146,169,143,145,105,74,73,50,57,94,142,153,165,160,122,107,53,32,35,35,39,41,49,47,53,58,53,42,52,60,53,47,37,56,48,51,61,47,45,53,67,59,43,43,46,44,45,46,49,55,53,51,49,54,55,54,57,48,49,43,48,56,50,54,45,47,46,45,48,23,12,16,16,16,16,16,17,16,17,16,17,184,227,168,125,169,192,181,118,65,141,213,241,252,249,249,248,243,243,227,222,181,107,123,125,125,133,113,71,132,134,90,127,129,114,77,99,128,134,160,173,182,176,179,176,168,174,177,182,174,177,177,177,180,174,182,173,174,175,164,158,133,161,134,71,124,166,163,146,139,152,155,145,148,159,154,158,155,164,160,152,144,127,99,99,78,74,100,87,64,51,57,63,60,39,52,71,92,102,82,109,100,121,165,130,115,91,52,63,66,33,42,39,47,44,59,57,45,43,52,41,28,45,56,65,65,89,105,137,174,205,233,247,240,238,230,222,198,177,164,111,78,57,19,9,19,27,22,28,59,76,112,100,128,168,174,201,208,210,213,214,214,219,228,223,144,184,163,146,131,114,108,61,9,24,34,36,20,42,75,107,103,120,144,155,164,175,175,116,120,208,210,206,137,205,187,175,152,136,123,110,96,77,37,27,42,29,46,71,88,99,109,126,142,158,157,164,159,148,94,159,152,143,154,152,139,115,90,88,56,36,40,37,27,43,44,24,25,27,29,22,19,15,22,21,21,19,20,23,19,23,22,21,29,25,19,26,23,33,43,55,68,76,116,130,141,171,149,153,189,191,192,163,149,189,208,175,98,89,153,189,181,149,81,91,209,219,127,100,76,50,47,27,44,49,59,81,84,94,104,103,96,98,100,103,98,110,105,104,77,66,41,27,32,34,34,48,84,131,154,155,164,115,34,22,55,63,42,39,44,46,67,71,73,68,63,67,74,79,77,90,86,93,72,73,76,61,53,36,27,20,21,24,23,39,53,61,69,81,84,83,81,83,81,82,80,78,75,69,76,79,72,61,55,50,51,54,54,59,60,62,59,60,57,63,64,53,42,24,21,19,19,17,19,19,27,36,39,42,49,49,45,46,44,43,46,45,43,46,46,45,49,49,46,48,46,48,45,45,44,41,44,44,48,46,42,41,31,37,98,109,54,21,17,24,29,24,21,24,25,27,25,22,24,27,29,27,24,25,26,26,24,25,21,29,32,27,28,28,27,25,23,29,28,23,29,31,22,28,29,27,24,27,22,24,27,22,32,22,21,23,23,24,21,29,27,27,26,25,31,22,22,28,21,56,91,51,23,15,23,25,20,29,24,19,27,27,23,27,26,18,20,21,23,21,20,24,21,27,22,24,29,18,21,24,25,29,30,27,24,24,19,24,29,27,28,30,27,29,32,26,32,31,27,29,29,32,32,35,27,18,21,19,21,21,22,23,19,21,21,21,20,21,21,22,22,19,27,22,20,25,22,22,22,22,23,26,23,22,71,71,25,29,22,16,23,45,97,102,67,37,22,22,21,21,19,24,25,25,25,22,24,24,19,22,28,24,21,24,21,23,24,26,24,17,26,25,20,18,24,22,20,25,23,28,24,20,26,23,24,24,24,24,39,99,123,113,113,93,101,132,126,116,118,111,96,91,76,65,80,108,122,131,123,117,108,55,32,30,36,43,51,50,48,47,41,41,41,57,59,53,56,59,63,48,55,66,51,57,60,69,56,48,50,48,50,43,52,48,51,50,41,43,61,64,39,38,41,39,44,36,43,33,36,45,36,47,50,43,23,12,17,16,16,16,16,17,17,17,17,16,200,228,166,135,167,185,210,142,87,160,214,244,252,251,252,248,245,248,227,226,181,109,129,124,121,93,48,51,137,169,144,166,171,134,92,128,163,174,179,184,187,181,185,181,186,187,195,191,199,203,198,199,199,193,194,198,191,182,174,153,113,143,110,56,98,134,171,164,146,159,160,170,181,179,175,163,140,143,157,157,148,139,137,145,104,82,105,84,80,70,54,59,52,37,24,14,24,33,34,49,28,18,31,36,66,90,84,110,78,16,12,14,42,74,83,85,63,72,98,96,117,125,150,174,185,223,233,240,236,233,222,196,178,143,121,98,60,40,51,43,21,21,21,20,88,127,65,50,62,86,135,160,194,188,174,170,141,130,122,124,125,127,134,121,97,88,98,104,129,127,129,116,51,26,84,114,75,98,162,183,191,201,206,194,188,174,147,134,128,131,120,122,130,108,98,89,84,90,93,104,106,83,70,49,51,103,150,181,185,178,187,184,184,176,156,144,112,112,110,91,69,42,37,32,34,30,21,20,18,18,24,19,23,41,33,25,19,22,24,22,28,19,27,29,27,32,27,37,41,49,60,61,61,68,65,67,55,76,136,161,183,189,193,190,200,208,188,203,217,209,219,165,162,201,197,159,116,135,165,189,186,167,78,76,181,135,73,50,37,54,49,47,89,113,109,116,121,121,123,127,116,108,114,109,103,87,72,48,34,30,33,39,38,72,107,129,145,146,155,153,169,130,37,35,90,101,63,42,47,56,74,70,68,63,65,74,69,81,81,94,104,103,93,76,63,40,25,25,22,27,25,33,51,62,68,72,76,82,82,78,76,69,72,66,67,72,76,78,68,61,55,49,52,53,57,55,59,57,57,65,65,68,63,60,46,29,23,17,21,22,17,23,25,35,41,43,50,48,45,47,46,47,43,48,49,41,51,48,48,49,45,49,48,46,46,43,43,42,45,48,41,41,43,43,40,43,42,31,54,105,98,48,19,21,22,19,19,27,29,29,31,24,24,24,21,22,25,25,25,26,24,24,31,27,24,30,29,20,18,26,28,32,30,26,22,27,25,28,24,31,39,24,26,23,26,27,25,24,22,21,25,30,25,32,28,28,29,25,36,35,29,32,28,41,92,67,27,20,18,24,22,23,29,30,27,30,29,27,22,24,25,22,18,24,22,22,24,20,26,25,31,31,22,25,24,28,28,27,33,20,16,30,27,25,29,30,33,26,29,29,32,33,30,29,28,37,32,25,24,19,18,21,18,20,20,19,24,17,25,24,20,20,23,23,19,24,21,22,22,23,22,19,21,22,16,25,29,20,81,66,19,28,18,18,18,23,53,99,103,67,34,17,21,22,22,20,24,23,18,21,23,23,20,21,24,22,27,22,19,25,22,22,21,25,24,21,21,24,22,25,27,21,26,23,23,23,25,21,23,25,31,26,39,98,113,94,84,74,88,97,78,65,93,104,113,117,98,88,74,89,85,81,78,75,83,65,65,66,61,77,94,92,90,87,92,105,106,104,97,81,88,90,91,77,69,79,81,73,75,81,60,50,58,54,49,50,57,56,74,87,87,87,86,90,85,84,77,84,95,97,94,87,89,72,56,64,67,55,22,9,16,14,16,16,16,16,16,17,16,16,196,221,178,171,200,227,243,168,132,200,244,250,252,252,252,249,243,243,224,226,171,112,132,125,139,114,93,111,150,126,104,125,116,94,48,92,124,123,137,129,132,125,131,132,126,132,129,140,138,142,142,142,144,134,143,141,148,153,163,125,63,76,50,13,44,71,79,85,77,92,96,110,134,131,129,110,109,113,115,124,122,112,98,122,102,87,90,80,79,65,46,41,53,46,54,53,60,81,80,118,84,65,90,84,128,125,99,141,92,24,40,57,141,158,169,170,162,185,201,199,205,201,201,211,206,208,164,177,138,112,73,45,39,29,46,39,18,10,61,74,40,36,19,55,156,187,132,50,15,42,81,134,145,124,98,60,46,34,27,36,55,65,69,76,102,119,125,152,179,171,178,166,47,63,160,188,111,121,165,162,146,127,118,82,57,39,18,12,17,17,22,38,129,89,115,140,158,179,193,193,178,159,112,66,84,121,132,122,94,78,68,53,37,39,35,31,30,28,61,24,24,18,25,19,19,26,20,21,26,21,24,30,28,42,43,37,27,27,33,26,28,30,39,44,55,66,107,155,181,210,219,214,218,218,217,206,132,152,216,224,227,222,214,208,201,202,200,198,201,193,200,155,129,168,174,179,178,185,196,198,148,136,65,33,78,97,92,93,85,83,94,109,136,135,125,134,129,128,134,141,125,105,87,69,38,30,34,31,37,35,61,87,122,138,142,142,135,136,129,128,89,25,16,21,75,97,76,77,64,67,75,66,71,65,64,66,75,81,78,92,110,105,73,45,28,23,22,23,27,29,50,57,62,68,68,69,73,77,71,66,71,66,71,66,64,69,74,63,53,48,47,48,51,55,56,59,63,66,64,61,71,61,46,35,20,15,21,21,21,17,26,37,42,43,44,49,48,46,46,45,45,46,49,48,49,53,51,53,45,46,41,44,49,45,48,39,44,47,40,42,41,38,41,39,42,44,39,43,45,69,112,96,41,16,19,21,25,33,24,37,38,27,28,24,25,24,24,26,20,26,27,31,29,28,28,23,24,24,27,27,30,33,35,24,21,27,27,27,27,45,37,22,26,27,24,23,21,25,29,23,30,33,26,27,24,30,27,29,36,32,29,27,28,32,82,80,39,29,19,23,23,21,31,31,24,27,29,27,22,28,26,22,23,17,22,23,19,30,26,28,42,27,19,23,22,28,30,26,29,26,24,32,30,28,31,31,26,30,33,33,30,30,31,29,30,35,34,29,23,24,20,19,22,22,22,24,24,23,23,27,24,23,22,27,24,20,26,23,27,23,22,28,23,22,21,29,21,35,85,49,21,27,16,22,22,23,27,50,100,103,69,32,15,21,24,23,24,25,23,23,21,27,24,26,25,22,22,18,25,27,24,22,20,23,23,22,25,27,22,21,23,27,21,24,26,21,26,19,23,31,30,22,34,85,124,125,105,108,113,113,92,82,103,108,114,114,106,101,93,100,93,79,90,79,98,108,125,125,124,133,141,139,137,139,147,159,154,146,130,113,120,111,109,108,101,102,94,100,108,101,85,78,82,83,87,87,91,95,120,140,135,138,134,141,150,138,150,148,155,164,167,151,141,133,119,122,128,99,17,3,13,12,16,13,15,15,16,15,15,15,142,190,191,216,243,243,246,163,128,205,248,252,252,252,253,248,244,245,224,226,162,108,131,128,149,151,174,199,144,36,2,21,35,47,36,30,27,33,37,36,37,34,36,30,36,37,36,35,39,40,39,39,39,36,40,38,40,69,111,87,37,41,32,39,35,25,30,29,30,29,26,25,36,35,35,40,46,50,42,56,60,46,43,52,65,67,67,49,36,29,24,45,93,150,178,173,171,188,191,193,155,162,191,181,183,168,154,184,175,184,210,226,243,240,236,231,220,217,200,168,160,128,113,104,74,66,62,48,42,54,29,8,19,36,52,50,39,32,50,61,42,37,37,105,229,250,225,114,14,10,27,30,36,32,27,29,31,36,34,40,50,42,45,57,73,73,77,78,88,78,76,75,33,26,71,81,46,41,50,48,38,24,22,21,25,50,74,94,118,131,134,144,150,158,160,154,150,169,160,135,116,78,64,45,35,35,28,30,28,27,28,23,23,25,24,28,20,21,29,24,27,27,27,23,26,27,26,28,27,30,29,27,37,41,35,34,27,22,22,32,44,52,56,66,104,139,169,175,175,178,179,190,188,186,191,166,92,113,161,163,165,154,160,149,147,157,145,155,151,146,157,100,98,129,141,159,160,170,155,136,92,65,31,46,108,117,125,92,70,66,61,83,104,104,99,93,84,75,73,73,50,31,33,34,39,37,47,61,82,107,119,134,133,139,132,132,134,130,125,89,32,5,65,90,56,80,79,83,81,67,67,61,71,64,71,77,83,96,85,100,83,51,39,21,23,19,26,36,50,64,64,65,66,70,70,73,73,69,68,68,63,68,73,68,75,66,50,51,49,49,52,50,56,58,63,66,66,74,71,62,55,28,23,22,19,24,14,21,31,36,44,45,44,49,50,47,51,48,48,53,47,52,47,47,54,50,52,45,46,47,44,40,46,48,42,45,47,46,46,45,41,39,45,39,41,45,45,47,46,41,79,118,86,39,19,20,25,22,24,36,30,21,25,27,27,29,25,23,30,27,25,25,31,24,27,25,24,34,22,30,33,27,31,26,32,31,24,30,27,28,24,25,27,23,26,28,25,24,23,30,32,36,39,27,25,24,31,29,44,53,31,25,24,29,71,92,75,47,17,18,26,27,21,27,36,25,23,22,24,31,25,35,28,24,25,21,29,25,27,23,24,22,21,27,27,30,28,24,29,28,25,27,31,31,34,32,26,31,30,31,31,33,29,29,29,35,34,26,29,22,19,21,23,22,22,19,23,22,22,29,24,23,26,22,20,24,25,22,24,20,20,23,24,27,19,28,20,46,89,38,27,27,13,23,20,23,21,25,55,101,104,67,35,21,19,22,24,23,20,24,23,24,21,22,25,22,23,19,20,25,24,21,24,21,22,25,25,24,23,21,19,24,25,27,17,26,26,23,28,24,26,21,31,59,94,108,116,137,153,166,151,141,147,139,142,135,137,138,128,134,122,117,117,130,144,144,154,148,146,155,155,146,141,144,148,159,157,148,142,125,124,117,113,117,111,111,112,121,128,125,117,113,111,110,114,119,122,127,150,161,162,166,172,181,180,181,177,177,180,183,186,181,179,161,160,166,173,111,10,2,9,11,14,11,14,14,15,15,15,15,110,184,236,249,246,246,252,147,117,207,241,251,252,252,252,248,246,243,225,225,155,117,138,134,147,118,155,177,101,20,6,27,36,49,38,27,23,21,34,25,29,27,21,27,25,30,24,27,26,27,29,27,29,25,30,46,53,71,70,45,53,52,37,50,52,41,45,51,42,35,30,21,29,27,30,33,39,44,27,23,26,26,28,47,50,56,58,47,51,64,109,106,171,245,251,228,231,235,211,198,177,205,226,209,206,197,205,232,229,227,226,217,211,192,173,152,128,117,94,92,122,101,69,36,8,11,22,13,23,69,59,22,44,54,63,48,33,38,40,40,39,36,54,160,249,249,252,202,46,5,13,14,21,18,26,23,23,32,32,28,36,37,42,44,46,46,39,35,27,31,42,47,45,46,44,48,61,79,78,84,99,116,122,114,126,139,150,157,97,160,150,122,93,75,55,39,29,36,42,36,30,22,27,39,44,25,20,27,25,29,29,27,25,31,29,30,33,32,34,31,28,26,30,34,27,27,31,27,29,32,40,38,21,24,25,23,23,20,23,21,24,30,30,34,29,33,39,39,38,35,31,34,33,31,36,40,46,44,22,26,35,35,36,39,38,30,35,38,38,34,39,42,46,45,41,63,52,43,35,26,29,30,47,47,37,37,39,44,46,47,40,32,35,36,39,45,41,42,42,45,43,50,50,54,72,88,101,118,132,133,129,122,125,123,125,125,125,118,113,108,88,101,130,108,60,68,61,73,65,63,66,64,73,73,87,90,94,105,77,45,35,28,21,24,25,36,60,76,80,77,80,76,76,71,74,71,65,62,65,68,71,67,69,68,65,58,50,52,50,47,50,56,65,66,66,62,75,72,51,34,27,22,19,19,18,23,29,36,47,44,46,50,46,53,51,53,50,57,53,51,53,52,53,50,51,48,50,48,46,45,46,46,48,44,46,41,39,49,39,43,46,39,42,43,49,46,44,42,39,39,43,80,109,81,38,17,23,26,25,21,18,23,25,28,27,28,24,24,28,31,28,28,30,29,29,19,30,34,27,27,33,33,29,33,33,29,23,32,29,22,30,27,29,29,31,29,24,29,30,34,27,35,38,21,27,29,31,34,49,45,22,23,31,32,52,93,80,36,14,19,25,23,22,33,30,19,26,28,27,22,23,30,22,20,24,26,22,26,23,24,28,23,29,25,26,29,29,30,31,23,26,32,27,32,29,34,27,32,33,28,28,29,33,32,31,31,33,31,19,23,21,21,24,22,23,25,22,19,20,23,24,21,17,24,21,19,24,23,24,22,23,20,18,28,21,25,19,57,84,29,23,20,17,24,20,26,22,21,28,56,101,99,68,34,15,23,21,22,21,23,22,26,25,23,23,19,25,22,22,22,22,23,26,23,16,22,24,23,23,23,24,26,29,25,22,22,24,20,27,27,24,23,31,59,59,55,77,113,162,197,194,183,178,175,173,168,172,183,178,166,154,155,159,177,173,171,169,162,167,175,172,160,153,152,152,159,155,153,142,126,130,125,126,133,117,122,126,139,130,136,139,137,139,139,132,134,139,148,156,169,165,170,177,179,182,176,173,174,177,178,177,175,179,166,167,173,183,114,8,2,9,10,13,12,14,12,15,15,15,15,92,163,203,244,233,230,239,100,114,202,237,251,252,251,252,246,247,240,223,224,143,125,154,159,167,76,52,55,17,24,27,22,37,42,41,33,29,31,27,34,33,29,32,28,34,30,28,36,29,29,33,33,32,29,35,44,66,98,66,36,38,34,34,44,60,55,52,66,72,62,44,48,55,47,55,52,43,39,41,71,87,94,105,120,139,139,156,170,175,196,205,217,235,247,242,212,193,177,155,137,139,159,167,153,152,142,133,148,128,114,89,78,63,45,38,19,13,26,50,78,137,122,67,46,27,23,22,24,25,50,41,32,49,44,46,54,38,32,49,51,43,38,91,166,225,239,243,228,101,24,21,18,30,35,41,43,45,46,53,50,47,50,55,57,57,57,63,61,84,100,110,113,65,62,96,126,157,156,148,149,157,156,131,111,95,78,57,42,43,36,34,33,24,27,21,24,24,24,22,23,26,23,32,40,45,33,27,33,32,37,31,29,30,32,27,35,34,29,26,29,32,20,29,26,19,25,29,22,23,33,51,41,22,22,22,18,23,24,20,23,22,22,23,24,24,22,24,23,24,25,18,22,23,21,19,29,48,38,20,20,25,24,22,24,25,21,25,29,31,36,44,39,38,37,26,31,28,23,19,22,22,28,50,39,24,19,29,25,29,47,42,39,39,39,54,66,66,61,58,63,75,93,100,115,123,117,126,122,119,120,115,121,113,120,115,113,117,113,132,141,125,103,92,69,61,70,59,62,65,67,72,67,84,85,95,94,70,49,34,25,24,23,27,44,61,77,98,101,94,102,97,89,81,79,81,78,68,59,71,76,67,66,66,66,63,56,51,48,47,53,56,63,73,69,73,67,52,37,25,19,17,17,20,20,31,38,41,48,44,50,46,50,55,54,56,57,57,56,55,49,52,53,49,49,44,48,46,47,48,42,49,50,45,39,43,47,39,39,43,45,41,45,46,44,39,41,41,39,44,40,40,34,73,110,78,35,23,23,20,22,23,30,26,27,31,26,29,25,29,30,28,27,30,30,27,29,31,32,26,30,32,31,37,28,28,29,22,29,29,30,34,24,31,38,30,27,27,32,28,29,30,29,27,23,31,28,28,27,29,24,25,33,28,29,36,84,79,31,21,24,27,19,27,24,23,26,29,29,23,23,22,27,25,26,29,21,25,28,24,27,24,28,24,28,29,25,25,27,33,27,25,28,29,28,34,31,31,35,31,33,31,32,27,30,30,34,36,22,21,20,21,23,22,22,23,22,23,23,22,27,21,20,24,22,24,26,25,27,25,23,22,23,24,21,23,24,17,74,74,20,30,27,19,23,21,25,21,23,26,26,56,97,96,62,29,21,18,24,24,19,23,21,26,22,24,24,20,27,22,26,25,23,27,23,26,23,22,23,24,24,21,22,30,23,21,26,24,24,22,24,28,24,52,81,98,122,112,77,117,186,201,206,200,187,185,184,193,194,188,187,172,181,193,195,195,186,189,186,185,188,188,181,178,180,177,179,177,171,162,151,162,160,155,155,155,162,164,167,160,162,170,171,175,168,165,160,160,158,161,171,173,174,174,179,175,170,169,169,173,171,171,171,174,168,173,176,184,114,8,1,9,10,14,12,15,13,15,15,14,15,105,142,147,173,150,146,165,81,126,201,236,251,253,253,251,245,245,238,223,214,124,133,169,193,203,64,5,9,19,37,28,24,35,36,33,36,33,28,33,30,36,40,41,44,36,42,42,39,42,40,41,39,43,48,44,49,43,68,92,45,35,42,45,49,50,56,40,66,85,88,96,143,133,139,155,166,182,195,191,199,207,213,211,213,210,208,214,222,216,215,212,203,152,180,164,146,139,141,147,130,109,111,117,128,123,71,23,65,25,17,12,19,30,32,36,36,25,35,49,53,69,51,53,47,28,71,146,154,93,51,19,25,50,34,45,57,39,33,50,63,51,81,141,158,159,157,182,169,104,60,29,38,54,51,57,58,67,76,83,100,101,124,143,155,165,162,152,146,142,140,136,123,72,47,56,76,84,60,49,48,45,38,34,34,28,21,23,25,25,23,29,27,18,24,28,27,27,29,33,26,34,33,37,46,40,32,28,27,28,32,26,25,28,26,24,28,25,24,27,24,25,26,23,24,25,22,22,22,24,34,51,44,23,22,22,24,24,23,23,24,22,22,24,23,27,22,21,23,25,22,17,23,25,23,21,29,46,42,24,19,20,24,25,30,32,37,40,41,38,33,27,24,24,24,24,22,19,27,25,18,21,33,53,40,21,23,27,22,52,60,39,47,52,48,83,106,103,101,94,108,120,122,116,122,123,112,108,113,110,105,105,105,110,111,115,117,125,120,114,90,65,53,54,56,59,64,58,63,62,77,85,83,89,90,75,53,33,23,24,23,31,46,64,91,100,108,105,100,97,97,98,87,89,78,80,73,74,77,70,70,68,64,75,75,66,61,53,57,50,60,61,59,63,59,53,35,27,20,18,24,17,24,32,36,44,51,53,47,52,55,57,54,53,57,55,55,57,58,55,46,42,48,48,46,44,43,50,47,49,45,45,44,44,44,43,45,41,41,44,42,41,43,40,34,43,40,42,39,41,41,42,39,31,78,111,75,33,13,23,27,29,26,27,31,25,27,25,24,27,28,27,33,26,30,36,26,26,29,33,31,30,34,31,26,24,27,24,29,30,30,27,22,28,29,23,29,27,23,29,29,29,33,32,25,31,29,27,29,24,29,28,30,23,31,31,70,94,46,23,21,17,28,25,22,27,33,28,32,41,31,25,24,26,25,29,28,24,26,26,26,27,21,24,24,30,31,29,33,27,24,24,32,29,28,34,32,32,35,31,30,28,31,34,27,29,31,32,32,24,18,23,22,22,24,20,22,25,19,19,21,25,23,17,24,21,19,23,24,21,21,25,19,26,23,23,27,27,85,66,19,29,19,22,21,21,23,19,28,26,29,29,55,101,92,60,32,17,19,22,24,21,23,22,24,23,25,24,19,22,23,23,24,24,25,19,21,25,20,24,23,28,24,22,24,24,30,21,24,25,19,44,84,89,99,142,181,170,82,55,123,171,193,193,197,196,184,169,165,200,212,190,189,187,184,185,189,186,184,187,186,183,184,187,186,185,190,185,184,180,181,189,184,188,188,194,188,184,189,177,187,189,188,189,186,186,181,178,175,176,182,177,177,181,178,178,178,177,177,177,177,174,171,174,170,176,174,184,115,9,2,8,10,14,12,12,12,13,15,15,14,145,171,168,189,158,158,158,76,135,203,243,250,253,253,252,245,244,232,218,198,89,105,155,199,222,69,6,11,21,45,38,36,32,32,36,34,36,34,32,32,39,44,47,54,54,53,57,59,57,57,63,56,53,57,56,51,59,105,111,87,86,93,116,133,141,151,169,192,204,208,224,239,242,241,240,235,235,233,226,227,224,208,191,170,159,138,137,134,112,101,87,66,46,36,39,84,137,190,213,160,120,99,108,118,106,55,3,11,24,38,40,41,47,51,52,38,37,49,59,56,53,47,42,39,77,174,251,251,201,116,24,11,59,75,68,51,36,36,55,48,67,152,192,128,82,76,77,93,62,75,98,117,141,134,147,152,153,159,143,140,135,126,125,121,120,110,79,51,39,36,33,33,30,35,34,19,28,27,21,29,23,25,21,27,29,24,29,19,27,26,26,31,31,40,33,40,48,51,57,56,54,49,46,45,42,37,37,46,45,45,49,47,44,48,47,44,50,43,50,50,46,47,51,49,50,51,49,51,29,44,58,45,46,46,48,41,40,40,37,43,42,48,56,53,50,48,47,47,46,42,48,43,48,50,46,61,63,54,34,27,28,41,45,47,49,48,46,43,39,34,40,39,36,38,35,34,29,31,30,32,30,44,56,39,29,21,42,57,54,52,31,41,50,55,91,103,96,84,87,98,104,104,97,111,111,103,104,102,104,101,101,104,103,114,121,120,102,73,57,36,33,47,55,53,58,61,61,65,68,81,84,73,65,44,33,26,23,24,23,51,66,87,113,107,102,101,100,100,96,93,84,86,83,83,83,89,82,62,65,65,71,78,79,76,69,64,59,54,61,62,62,60,49,37,29,22,22,20,18,23,26,38,44,46,54,54,56,56,53,57,54,58,56,51,55,48,53,51,46,44,44,47,47,49,43,44,49,45,41,47,44,43,41,39,42,42,44,45,48,43,41,41,43,39,41,45,38,42,41,44,47,35,33,32,82,109,70,34,21,24,28,63,50,23,22,27,26,26,29,28,28,23,30,27,29,27,25,37,30,34,30,27,25,20,29,26,28,30,29,33,25,27,24,19,25,33,34,31,27,29,25,29,33,31,32,26,27,29,28,34,29,29,29,27,23,50,93,63,30,22,19,27,27,27,31,25,23,42,37,19,22,29,29,30,26,25,28,22,28,29,20,27,29,24,32,36,31,29,27,28,29,32,32,24,33,33,34,31,33,33,30,27,29,33,30,31,35,26,25,25,19,20,23,19,21,27,22,23,21,21,21,21,19,24,22,20,24,16,23,19,24,24,21,20,24,18,44,93,48,20,27,17,24,21,23,23,19,21,28,29,22,32,64,100,101,63,29,17,22,22,25,24,21,21,23,28,17,23,27,22,23,21,28,25,21,21,24,24,22,25,19,27,23,22,23,23,26,26,25,34,141,182,111,66,63,121,130,41,7,23,77,139,167,190,195,191,160,166,239,250,221,187,178,176,179,177,177,175,178,179,179,181,178,167,171,186,185,184,186,185,192,191,194,196,189,171,181,188,177,184,186,186,189,189,192,193,191,187,186,186,184,185,183,185,185,184,186,184,185,183,184,180,181,180,184,183,188,116,7,1,9,10,13,12,14,11,14,15,15,14,177,194,198,213,184,181,145,75,136,202,245,251,253,252,251,246,243,233,217,181,52,64,139,192,214,65,9,14,26,55,49,53,47,38,38,37,40,43,43,46,54,53,64,61,55,99,64,77,91,107,122,137,148,162,179,178,182,212,223,214,234,243,242,242,237,236,234,232,227,225,220,219,214,206,199,182,169,150,128,122,118,114,116,116,112,84,74,72,46,39,24,12,10,14,11,31,103,145,155,125,107,99,97,105,106,65,5,16,50,49,38,40,46,56,50,35,38,63,103,110,111,98,87,94,149,242,250,250,247,145,36,16,75,107,93,60,31,38,75,87,118,171,151,90,63,35,32,26,45,101,129,91,125,112,118,97,83,53,39,39,33,33,67,27,23,24,27,26,19,26,24,26,29,35,33,22,25,27,23,27,31,29,33,37,36,39,43,45,49,51,56,53,49,75,104,131,152,172,182,190,203,190,186,184,179,179,187,197,192,187,182,148,194,200,198,198,193,192,188,153,184,182,186,194,194,199,200,198,190,158,191,183,193,193,187,116,75,82,85,135,154,174,187,184,183,174,181,182,175,169,178,196,206,202,188,178,152,91,27,9,33,76,141,159,155,164,160,163,153,149,153,149,154,147,145,139,118,106,95,108,123,137,129,97,72,85,96,113,103,68,42,46,38,48,80,79,81,70,70,86,87,82,80,87,94,94,101,98,95,98,105,108,109,85,85,63,45,37,34,34,36,48,50,58,61,61,68,69,62,65,59,46,30,24,19,22,28,47,63,86,101,112,102,94,91,93,98,97,91,84,80,77,81,83,87,77,63,69,66,78,74,73,81,72,65,61,63,69,70,63,52,37,27,20,22,18,19,21,27,41,47,51,50,50,52,56,59,53,58,49,47,54,53,51,48,50,46,45,44,47,47,46,44,43,49,48,44,43,44,43,44,43,42,41,43,46,45,45,42,40,39,41,43,42,42,36,45,42,42,45,40,36,36,27,32,81,109,69,27,17,33,56,35,20,26,24,25,31,26,27,29,27,28,29,25,28,33,29,29,35,29,26,29,26,24,28,33,32,27,29,27,21,23,29,32,31,50,48,27,26,26,33,30,27,29,28,32,27,29,37,27,26,30,30,26,39,89,75,34,23,17,25,24,29,32,24,24,26,25,21,29,29,31,28,29,24,20,32,23,29,39,31,25,24,35,34,34,31,32,24,28,33,29,31,31,35,29,33,34,30,33,32,33,29,32,34,31,29,22,23,19,19,22,22,27,22,20,20,23,24,22,24,22,22,19,24,26,19,21,27,24,20,30,22,27,21,54,92,34,24,26,16,24,22,26,21,21,22,24,24,21,26,29,63,105,93,59,29,19,21,22,24,17,24,24,22,25,21,23,17,19,22,23,28,26,21,23,24,24,23,25,19,21,29,22,24,27,30,25,53,180,178,101,92,57,46,40,14,17,12,37,102,126,154,192,206,178,185,248,247,190,179,177,173,176,176,171,172,173,178,177,179,173,157,165,182,179,177,178,179,186,186,186,181,170,152,168,184,178,180,180,179,183,188,187,189,193,188,182,185,184,185,188,186,185,186,183,185,184,184,186,185,189,185,190,187,193,115,7,1,8,10,13,12,14,13,14,15,14,14,185,195,189,191,167,171,146,81,139,201,249,251,253,253,252,246,244,237,222,172,43,66,141,201,207,49,8,15,20,53,57,59,53,45,53,43,48,62,66,82,95,113,127,142,152,163,181,188,200,214,224,231,236,236,235,230,228,230,222,224,223,218,215,201,184,168,149,131,111,92,81,76,65,48,38,29,32,21,17,14,28,65,115,152,153,94,69,93,71,57,59,59,33,17,14,18,74,127,130,113,110,119,133,121,95,33,3,44,74,66,35,32,51,56,55,35,25,59,103,100,96,87,95,128,189,250,248,246,229,145,33,9,66,133,157,125,36,55,155,151,125,78,55,82,78,55,27,28,31,28,35,41,31,27,30,33,22,22,26,20,26,18,27,24,21,27,25,28,23,27,28,29,36,39,38,34,32,39,45,50,50,57,58,51,69,92,107,130,159,176,194,204,206,214,219,225,225,229,236,235,240,235,234,241,237,236,237,234,224,221,223,230,242,246,238,237,238,238,240,234,235,238,242,241,244,243,249,250,249,249,245,236,211,203,162,110,116,151,213,236,240,245,236,237,236,241,244,244,248,245,246,241,228,191,152,127,96,101,136,172,200,222,239,241,231,230,231,235,229,232,238,237,234,220,222,231,202,171,162,175,198,217,216,186,167,151,140,122,119,109,83,81,53,61,85,71,72,61,66,81,77,71,66,76,83,90,95,93,94,94,103,101,71,52,40,34,39,36,36,36,43,52,51,58,59,61,67,63,53,42,27,26,25,19,34,45,69,80,96,116,96,93,95,91,89,87,87,84,80,81,84,84,80,74,68,65,66,71,81,74,71,66,67,67,65,69,70,66,59,44,21,20,25,22,19,19,26,40,53,53,52,57,49,52,55,51,55,46,53,51,46,52,55,53,42,47,49,43,46,42,46,49,49,47,46,44,41,41,43,43,37,44,45,44,47,40,46,41,37,41,39,39,39,38,43,45,43,43,43,38,38,41,34,26,22,32,87,106,72,35,18,26,23,27,25,23,27,26,26,27,28,29,26,26,25,24,26,29,30,31,29,24,27,24,24,30,29,27,27,27,22,29,28,29,30,29,43,30,25,28,30,27,27,27,21,29,32,26,30,28,30,29,24,24,29,29,69,87,45,27,24,24,25,25,22,24,23,22,24,29,33,34,27,30,30,27,22,21,22,34,38,20,20,29,29,32,29,34,39,30,43,46,34,33,32,31,33,30,35,35,28,28,33,33,30,29,31,31,22,22,23,19,22,22,19,24,20,24,24,22,24,22,22,19,23,23,23,18,23,27,19,21,23,20,29,17,66,81,25,30,24,19,24,19,28,26,21,27,24,22,20,25,26,32,66,99,89,60,32,21,17,23,25,19,20,23,21,23,30,23,21,23,24,23,22,24,28,22,24,29,27,23,22,25,23,27,18,30,22,88,174,139,163,175,145,125,94,59,30,37,87,122,117,115,159,204,178,182,215,173,145,149,157,159,164,167,163,164,168,170,169,172,167,151,157,169,164,164,166,165,166,162,162,162,154,141,159,174,167,176,174,171,173,175,171,167,175,177,174,177,176,174,177,183,183,180,178,179,181,179,183,184,185,186,188,185,191,115,7,1,8,11,13,11,14,13,15,15,14,15,175,177,171,170,154,165,123,69,124,191,242,251,253,253,251,250,244,241,230,178,59,93,165,218,222,75,56,73,84,125,127,151,149,152,166,171,184,203,212,223,232,234,234,227,222,168,215,212,209,202,195,189,178,160,153,127,127,112,77,109,112,68,65,42,19,9,13,13,19,16,18,34,33,32,26,19,26,28,26,29,36,43,96,127,113,81,61,79,81,92,109,128,141,145,104,66,105,139,133,110,99,100,110,112,110,53,1,57,130,135,129,139,147,147,113,54,23,38,56,49,36,48,92,134,169,201,216,208,190,109,22,4,76,128,135,102,35,59,137,116,59,29,21,59,61,33,23,15,24,27,21,27,27,26,25,17,33,27,26,30,27,33,26,37,46,43,43,31,39,46,47,48,56,59,54,53,66,90,112,146,167,189,203,207,213,214,214,220,232,234,236,230,220,211,199,189,173,172,179,183,188,188,190,194,193,190,185,180,176,180,184,187,184,171,168,179,188,192,190,188,193,186,186,185,191,206,205,214,211,206,166,91,68,77,131,154,179,212,211,219,212,209,211,216,224,220,210,191,174,145,117,87,60,59,78,118,152,184,208,223,224,220,221,202,193,196,186,182,183,185,193,195,193,186,192,201,175,141,128,133,142,161,181,171,162,139,122,103,97,103,90,88,91,107,119,84,85,88,77,89,77,68,72,67,80,79,80,89,87,86,78,69,49,45,43,43,38,38,44,43,72,89,63,58,59,59,52,36,33,24,23,26,30,50,63,83,105,106,100,104,92,89,84,81,78,83,78,78,79,87,84,74,77,61,72,69,77,87,71,77,61,65,66,75,79,65,62,45,27,26,21,19,21,19,27,43,51,55,60,49,49,51,50,50,44,47,53,47,48,53,51,48,50,46,44,48,44,46,48,46,50,46,45,44,43,39,45,45,38,43,43,44,43,43,42,42,44,40,39,38,38,39,43,41,43,39,39,39,40,39,41,37,35,33,24,26,32,82,108,69,31,16,21,27,26,27,26,24,27,28,27,32,24,26,28,24,29,27,29,31,26,25,23,25,32,29,26,26,22,30,25,27,28,26,27,26,29,23,27,33,23,26,27,27,29,28,35,25,24,29,27,27,29,31,27,29,54,89,61,27,25,24,27,24,23,23,26,25,33,29,35,41,27,24,24,27,21,22,30,26,17,17,24,27,29,36,33,30,39,39,44,46,41,41,38,27,29,27,33,31,32,36,33,28,30,29,31,29,21,23,27,25,21,24,19,21,24,18,22,22,24,23,20,24,22,17,21,23,20,22,21,24,24,20,21,26,83,67,21,30,20,16,23,24,26,21,24,24,24,22,22,23,21,28,33,61,102,93,61,33,15,21,23,21,20,19,25,23,23,25,24,24,22,26,22,22,23,25,24,26,26,22,25,23,25,26,31,19,57,175,207,213,230,217,215,214,200,166,155,140,112,113,92,51,89,141,134,145,171,157,147,154,152,141,146,156,150,149,146,145,143,146,139,120,128,141,139,139,139,137,133,133,133,139,139,127,144,146,143,153,152,152,148,153,141,139,155,157,158,162,162,163,166,169,170,169,167,171,173,177,176,174,180,182,188,178,186,116,8,2,9,11,13,11,14,12,13,14,14,13,143,150,142,148,141,144,104,48,98,165,227,252,250,250,250,250,246,246,242,212,119,151,195,246,243,155,177,192,207,217,205,218,221,223,222,220,220,218,216,216,215,221,206,184,162,141,132,122,110,98,91,88,83,76,70,62,79,69,14,8,11,18,33,39,45,39,40,48,48,46,43,50,51,44,41,38,33,35,39,41,52,47,45,39,33,24,32,43,36,54,87,138,198,249,228,156,148,168,197,193,188,190,200,204,229,147,19,79,182,223,220,212,208,187,168,69,28,61,85,77,60,78,122,138,95,101,116,129,113,70,36,14,30,49,60,61,39,39,54,51,57,38,49,62,27,25,24,21,29,29,32,37,38,43,45,47,55,60,59,60,74,78,56,66,132,137,86,74,88,118,143,160,173,184,197,204,208,217,225,232,238,241,240,232,230,216,209,203,198,194,186,186,175,171,174,170,164,158,155,156,164,159,154,155,150,150,151,156,165,169,164,150,141,134,141,155,162,168,170,173,175,169,166,180,193,186,160,120,102,97,63,55,82,138,179,199,203,196,193,185,188,197,201,192,171,140,106,68,41,39,67,97,124,152,169,183,190,201,198,191,183,174,171,165,168,165,160,154,155,165,166,174,174,170,182,189,165,125,112,114,97,99,111,109,100,93,86,77,94,92,77,91,114,141,136,102,103,107,108,100,84,81,81,74,76,78,78,81,72,71,67,61,57,50,56,49,51,48,42,54,101,89,60,59,46,41,29,26,21,24,37,58,76,84,98,98,100,95,87,90,81,83,81,77,75,76,82,72,75,77,71,61,59,55,64,81,80,80,75,67,72,73,81,81,73,51,33,22,19,21,16,21,27,38,53,53,54,54,51,51,47,49,46,51,45,47,53,46,50,50,49,47,48,45,47,50,45,48,51,44,50,48,42,45,43,43,40,42,44,43,46,42,42,42,39,37,40,38,37,37,41,49,42,41,36,41,43,37,43,38,35,42,39,35,27,24,26,35,86,108,69,29,16,25,30,26,26,27,30,24,24,29,31,29,25,32,30,27,26,31,23,25,28,28,32,22,29,28,25,29,30,27,28,27,24,26,32,27,25,31,48,33,28,30,27,30,26,29,25,27,21,23,32,28,25,26,38,87,76,35,23,20,26,20,24,24,24,31,27,31,35,31,23,25,27,26,23,19,22,28,22,24,24,26,28,34,29,36,31,25,35,32,45,44,41,34,25,33,32,28,34,29,34,29,33,32,26,23,25,26,21,24,26,23,19,21,19,27,21,19,24,24,24,20,22,21,19,20,22,25,23,25,17,26,24,34,90,53,18,31,18,17,27,24,17,23,23,22,22,21,24,23,23,27,23,29,70,105,92,64,31,14,19,20,24,24,19,24,23,20,24,23,24,23,20,20,25,22,19,25,20,24,24,23,28,28,29,24,139,230,238,240,202,170,165,195,204,192,199,177,119,96,63,13,9,30,53,97,163,194,222,233,212,177,177,189,178,169,160,156,153,153,155,142,141,150,153,154,152,148,142,145,146,149,154,146,146,132,113,124,129,135,136,135,122,117,133,139,139,145,143,146,149,147,148,151,151,157,161,160,166,165,169,171,177,165,168,113,10,2,10,10,13,12,15,13,14,15,15,15,148,152,150,154,155,162,132,109,146,190,243,247,248,248,247,247,243,243,242,216,166,180,184,207,193,156,192,190,192,193,179,174,163,159,152,144,133,122,102,77,68,84,95,98,106,104,106,105,104,112,111,116,125,127,134,129,150,134,75,39,6,19,32,59,91,96,94,100,98,93,101,82,71,69,71,58,50,42,37,55,83,97,89,76,66,53,33,36,26,33,70,142,228,251,251,231,229,248,252,252,253,253,252,252,245,198,28,45,144,166,143,111,93,85,54,32,35,60,142,183,176,145,110,90,55,35,22,17,17,17,34,30,50,59,53,51,27,42,46,49,54,53,62,52,52,42,47,49,59,53,54,75,89,120,140,150,173,181,206,205,232,189,92,126,214,226,224,213,224,231,241,235,237,229,226,221,215,221,212,208,199,194,186,185,184,179,182,179,181,177,172,173,173,177,178,179,174,163,161,158,161,163,159,162,157,160,170,166,173,175,165,163,156,153,156,163,171,172,176,177,191,187,187,180,163,152,73,35,35,86,151,160,185,202,210,208,199,193,170,152,132,105,111,66,41,43,66,99,130,156,175,185,199,208,204,193,181,179,175,172,169,164,165,165,169,174,166,162,167,163,160,165,160,159,170,179,170,130,108,112,101,74,75,41,19,49,69,61,52,62,57,84,127,147,147,117,124,135,132,131,110,105,98,90,92,88,90,87,88,83,89,84,76,74,72,66,66,64,53,57,63,53,41,38,27,27,27,25,43,60,75,92,105,98,89,88,90,91,89,83,85,84,83,74,72,77,81,75,71,64,57,53,54,55,75,73,75,76,73,83,77,84,74,49,39,22,20,19,14,19,27,42,51,53,54,57,53,49,53,46,45,46,49,46,50,53,48,49,49,49,48,48,49,44,48,51,49,47,40,47,45,42,46,44,44,43,45,43,42,45,40,43,45,36,37,40,38,39,42,44,45,39,37,41,40,41,37,37,37,38,44,41,41,37,30,24,28,24,31,81,103,71,29,33,36,22,28,27,24,25,26,28,27,25,32,28,29,32,24,27,24,27,30,25,33,24,26,26,26,33,28,25,26,26,27,33,28,31,28,48,56,24,26,27,34,29,28,31,22,29,24,28,22,26,28,23,31,71,93,46,19,18,21,25,22,21,30,33,27,29,26,24,28,30,26,21,24,26,21,21,27,29,24,26,29,29,29,37,31,20,30,34,30,41,44,33,25,29,36,27,33,32,27,33,31,30,29,27,24,17,22,24,22,27,22,24,21,21,25,23,22,20,24,25,24,20,23,19,26,25,18,28,19,26,21,45,92,39,24,30,17,27,24,22,25,23,24,24,19,23,24,21,23,25,23,21,32,71,110,97,61,26,17,26,27,20,20,21,22,26,26,23,22,25,22,22,18,27,23,22,23,25,25,27,23,29,22,57,180,227,211,170,131,100,114,168,175,170,196,195,153,149,104,18,7,13,13,42,114,213,253,253,251,230,232,248,238,233,223,220,219,227,229,226,231,232,233,234,230,227,224,224,225,228,229,215,212,190,165,177,181,182,181,177,159,151,160,158,158,159,154,151,149,146,150,147,146,148,149,150,153,151,154,155,165,146,145,108,13,2,9,10,13,10,14,13,13,13,14,14,191,194,192,193,194,205,196,193,202,206,221,222,206,194,182,176,181,174,170,153,131,133,123,127,108,90,124,121,122,121,119,122,114,119,120,122,121,115,99,35,5,13,46,105,142,155,158,159,166,175,183,191,196,195,206,196,205,192,153,139,50,20,58,105,155,151,144,137,131,129,125,122,113,110,123,122,114,64,29,77,133,179,186,162,151,98,85,77,59,81,104,171,238,248,241,227,227,233,253,253,248,247,226,196,194,93,6,20,71,75,47,38,36,35,37,26,25,71,171,206,168,102,59,69,63,49,17,12,9,26,33,37,89,107,88,62,31,38,64,62,59,62,57,59,101,139,153,174,194,200,211,216,220,221,227,231,232,232,232,229,240,156,100,174,210,214,210,223,226,224,217,207,193,185,185,174,168,171,169,169,169,170,171,164,169,170,174,167,166,171,168,175,169,174,173,171,170,168,167,165,163,160,169,170,167,167,166,169,170,165,167,174,170,159,171,171,175,187,180,189,190,187,131,76,68,78,95,102,145,194,212,230,232,222,203,176,156,123,80,51,24,18,47,77,101,115,131,153,174,191,198,196,194,188,180,177,173,176,165,169,173,161,168,171,174,169,167,167,168,168,155,158,155,155,167,174,166,144,125,108,96,86,70,24,4,27,47,39,33,36,31,81,133,154,147,127,132,139,137,123,111,106,97,90,93,90,93,100,98,100,105,107,96,87,75,78,92,74,58,46,36,29,28,27,25,31,46,64,71,82,89,92,90,89,82,76,81,80,79,86,78,81,78,81,79,76,83,75,65,51,50,57,66,69,76,78,74,75,76,85,79,60,40,21,19,19,21,18,25,38,48,51,56,54,50,49,51,49,46,49,42,48,53,48,49,46,48,49,45,45,46,43,45,46,46,46,45,40,43,44,42,43,40,44,46,46,45,39,38,41,41,39,37,38,44,43,46,39,39,38,36,40,37,39,39,38,38,42,42,40,42,39,36,35,33,26,23,28,26,30,80,108,75,46,25,19,24,24,30,31,27,28,26,24,32,32,24,26,27,25,29,29,23,26,27,25,28,27,39,28,22,29,25,26,29,32,28,23,29,29,29,23,29,33,29,31,27,26,28,24,26,24,24,31,26,24,24,54,97,60,26,17,25,26,31,32,27,29,28,33,25,27,31,27,35,29,23,23,20,27,26,27,25,21,31,32,31,34,27,21,29,41,39,30,35,34,29,33,30,29,29,32,29,23,29,29,28,30,21,21,24,19,21,23,23,24,22,24,20,20,21,24,27,21,19,22,24,23,24,23,24,23,19,25,18,65,86,25,26,25,18,25,25,22,23,23,19,23,20,27,24,23,26,20,23,21,25,30,69,113,96,60,31,18,21,19,24,24,19,23,24,22,21,22,24,22,28,23,24,23,23,29,22,23,21,31,23,75,210,229,184,186,173,137,145,178,171,166,182,184,181,194,162,112,122,91,39,21,96,194,248,248,250,232,249,252,252,252,252,252,252,252,253,253,253,253,253,253,253,253,253,253,252,252,253,253,251,251,249,249,249,249,246,246,239,237,238,233,231,229,223,214,206,195,195,189,184,182,174,171,171,161,158,155,157,136,130,103,14,1,9,9,12,10,12,11,12,12,13,13,178,178,171,167,158,165,154,158,163,140,123,97,83,71,53,40,48,63,76,90,112,134,143,166,150,124,139,137,143,145,146,155,153,159,165,169,170,169,160,81,9,22,73,142,184,186,182,167,162,164,159,159,154,149,147,136,137,127,121,97,21,23,60,98,128,122,122,128,132,138,149,150,167,171,183,192,198,94,21,77,155,210,222,210,175,134,112,95,105,132,132,165,208,223,201,162,136,134,178,168,133,107,84,69,59,23,7,53,99,118,110,111,124,124,74,24,17,65,148,118,73,61,57,103,104,87,80,83,105,139,41,30,139,154,122,86,67,102,107,112,83,34,36,110,168,239,240,244,245,242,231,219,207,201,197,188,189,188,184,186,188,90,78,177,184,181,174,168,163,158,167,162,165,163,162,159,153,156,150,151,156,160,163,159,158,161,160,154,154,160,162,160,158,158,157,150,151,160,164,169,164,159,157,145,139,137,135,138,148,152,152,160,173,182,179,186,185,182,167,106,103,81,45,54,78,151,186,211,222,220,203,171,150,119,87,51,28,22,44,89,117,134,124,165,161,151,145,152,162,166,169,179,176,165,168,166,158,164,164,155,157,159,163,162,156,165,170,170,160,147,151,159,156,150,160,162,163,154,137,106,72,95,112,57,8,32,59,59,72,78,68,96,122,121,116,98,112,105,95,93,83,92,89,94,89,87,101,107,111,107,114,103,105,95,85,89,72,55,33,24,24,25,29,39,50,63,76,79,92,85,81,80,79,77,73,76,74,76,77,71,78,82,86,81,74,77,65,63,53,50,64,71,77,76,84,81,76,79,68,61,44,25,16,19,26,20,25,38,48,55,50,51,52,51,49,46,47,50,42,50,49,47,49,49,46,46,45,45,47,48,50,48,44,45,46,45,40,46,48,40,44,45,47,43,44,44,40,41,41,43,38,42,43,44,45,43,45,39,41,39,36,43,37,40,39,39,45,40,38,41,35,37,37,35,33,24,26,21,24,28,30,82,104,78,35,21,27,21,26,29,25,24,32,30,24,30,31,22,24,29,29,26,25,27,27,24,28,34,26,24,27,28,33,26,29,27,24,25,21,28,28,27,34,32,32,33,27,27,27,29,23,26,25,25,22,24,30,39,86,74,33,23,20,29,31,36,31,24,33,29,26,24,18,37,49,29,18,25,28,24,27,27,27,29,28,29,32,29,24,24,28,34,37,34,35,33,31,35,27,30,29,30,32,27,28,31,31,28,24,19,21,23,23,24,26,23,20,21,21,23,29,21,19,23,23,20,23,23,22,22,23,22,22,24,26,78,71,18,27,22,17,22,23,26,24,16,23,28,20,23,21,22,23,23,25,26,28,22,33,80,114,97,66,29,15,23,18,23,23,24,25,23,24,23,25,21,30,29,20,24,19,24,23,24,27,24,40,126,218,213,185,215,225,184,166,184,182,171,178,182,177,196,189,166,191,172,130,147,171,178,162,159,179,170,215,250,251,251,249,249,253,253,252,252,253,253,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,250,247,243,239,234,226,219,217,213,199,194,186,183,157,148,107,10,1,8,8,11,10,12,10,11,13,13,12,105,109,101,98,87,90,71,65,93,103,106,96,82,86,56,18,13,24,69,117,152,181,208,249,227,173,178,170,176,176,179,181,173,177,173,176,171,169,163,83,16,28,73,127,145,134,118,102,90,81,74,69,64,62,62,53,61,60,69,59,12,27,73,130,168,168,171,179,188,193,198,204,207,213,217,228,220,113,28,69,159,236,244,239,211,123,93,89,110,155,127,103,112,127,120,77,36,38,86,118,127,126,133,150,182,74,9,112,190,205,183,168,161,152,104,41,19,58,127,87,52,66,64,108,106,129,186,237,249,249,111,38,129,144,125,72,97,158,145,152,120,36,21,113,212,220,212,209,205,208,200,181,170,169,164,156,163,167,159,165,136,40,92,162,152,163,145,137,122,127,135,126,132,126,142,154,160,164,144,149,152,151,143,144,152,151,154,141,150,151,144,136,131,149,150,134,137,162,175,193,185,177,176,153,155,147,131,147,162,159,147,165,196,184,193,185,165,134,51,12,46,119,155,179,207,212,206,174,140,111,61,31,14,28,62,92,105,120,156,191,185,184,199,193,184,154,161,179,199,203,184,178,157,160,162,140,156,168,158,145,156,164,167,160,159,170,174,167,140,152,163,167,152,157,176,162,162,156,143,108,78,92,117,81,48,76,96,86,106,118,93,90,82,80,79,78,80,84,92,86,83,97,97,102,102,97,108,108,107,116,121,120,105,90,62,39,34,22,26,31,30,42,55,67,77,87,89,92,79,80,82,78,79,71,78,81,75,74,75,86,87,88,78,76,66,60,61,56,60,66,81,81,79,80,77,87,85,66,42,27,20,21,24,24,27,42,49,50,51,50,49,49,51,51,49,49,46,48,49,50,48,45,48,49,41,48,48,49,50,47,52,47,42,41,44,46,44,46,44,43,47,50,43,42,42,45,39,41,41,42,41,44,46,43,39,37,38,39,40,36,37,40,37,38,45,42,36,39,36,41,36,33,38,37,38,31,26,22,26,28,25,29,79,108,75,42,21,22,24,24,26,29,26,26,23,27,31,25,25,29,28,23,27,28,22,27,30,28,24,27,27,24,27,31,29,22,32,26,23,26,33,32,29,46,43,25,25,30,24,24,23,22,25,24,27,27,29,25,68,91,48,26,21,30,33,32,29,27,28,27,28,24,26,28,32,24,21,24,20,39,39,23,27,36,29,27,35,29,25,21,27,32,33,31,37,35,23,31,32,30,34,30,33,29,29,24,26,27,19,21,20,22,21,24,24,20,27,23,23,20,27,28,22,23,21,22,23,22,23,23,27,22,25,17,34,87,54,23,26,15,24,21,22,23,18,21,23,25,18,25,23,25,24,20,27,21,23,21,24,32,71,113,99,62,32,21,22,24,23,23,21,19,27,25,18,25,23,22,24,23,21,24,24,24,27,35,92,119,153,142,109,179,208,169,154,152,162,163,167,183,188,203,197,182,206,216,219,240,237,170,76,24,43,77,133,175,207,202,174,175,219,244,252,252,252,252,252,252,253,253,252,252,252,252,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,253,252,252,252,252,252,249,244,242,241,234,233,226,226,207,198,114,4,1,6,9,10,9,12,10,13,13,13,13,106,113,113,118,109,118,63,39,91,132,152,147,141,141,91,22,16,34,91,148,173,191,227,245,231,170,163,152,154,152,145,144,129,122,119,113,108,94,79,29,13,39,61,95,96,84,79,71,78,79,87,94,96,105,116,117,124,126,149,128,27,30,94,168,215,216,221,225,227,230,228,224,226,216,203,195,188,65,1,35,89,145,142,126,89,59,58,50,82,122,98,59,42,35,38,24,14,43,126,210,253,253,250,250,246,141,23,110,181,191,150,118,108,97,56,31,26,51,136,150,128,132,146,123,66,137,219,253,242,237,86,16,102,93,56,42,116,199,167,170,129,18,16,113,192,200,191,194,191,189,188,179,171,178,166,152,167,167,167,160,100,50,127,168,155,167,163,163,145,155,165,160,165,152,158,166,180,182,161,165,174,176,162,158,165,177,169,155,164,169,170,153,157,170,181,186,171,183,199,229,229,177,168,165,199,223,199,185,200,227,222,206,200,190,143,106,96,127,174,156,175,211,214,207,168,137,108,66,40,40,78,127,143,160,183,218,241,222,207,218,228,201,190,199,229,207,179,199,234,234,178,159,157,196,229,210,177,181,212,212,190,179,202,216,193,178,188,217,212,186,184,217,224,191,183,163,165,160,139,128,98,99,112,82,73,95,96,109,120,115,103,93,86,84,93,76,95,100,101,106,97,105,99,104,102,103,113,106,110,125,117,82,50,33,30,24,27,29,35,51,54,65,76,86,91,88,88,80,80,78,81,78,78,79,70,82,80,79,84,86,87,78,74,64,57,59,57,67,74,76,96,89,89,89,85,78,42,29,21,17,17,21,29,38,51,51,54,58,51,50,53,51,50,51,48,45,50,52,49,49,44,47,44,41,46,44,49,49,47,47,47,44,44,42,43,46,46,44,43,46,47,44,41,41,40,40,41,39,45,47,45,43,37,40,39,39,42,35,41,37,41,44,39,42,34,33,38,36,38,36,30,35,37,35,39,32,29,25,27,24,21,27,31,84,114,84,52,24,19,26,26,27,25,26,26,24,28,33,32,25,26,29,30,22,26,29,25,23,25,27,25,28,25,26,26,30,29,24,28,31,32,25,29,35,33,22,28,28,19,27,22,25,29,28,39,29,26,25,50,93,66,37,26,24,29,28,29,29,32,22,28,24,22,33,22,21,26,22,29,36,29,22,29,35,35,32,29,33,24,22,25,29,32,31,37,32,31,28,32,28,35,31,28,28,31,27,29,26,22,22,17,21,26,23,22,21,27,27,19,21,22,26,21,23,22,20,26,18,25,24,23,21,27,19,50,94,43,23,28,16,25,19,21,26,18,24,22,19,22,25,23,24,25,25,22,23,23,24,21,21,33,75,111,95,63,29,16,22,19,23,23,21,24,23,24,21,24,24,19,26,24,24,27,25,30,89,92,44,80,72,55,122,149,130,116,99,116,141,161,185,200,220,211,184,211,229,229,246,231,200,122,14,9,27,48,83,130,142,114,122,179,233,238,236,224,217,236,241,248,248,252,252,252,252,252,252,253,253,252,252,252,252,252,252,253,253,252,252,252,252,251,251,250,250,252,252,248,240,236,243,245,250,253,248,250,247,227,112,3,1,5,9,10,10,12,12,13,13,13,13,150,160,160,165,154,155,87,56,116,151,172,168,162,166,122,45,33,53,100,151,161,170,188,226,169,97,102,93,95,89,88,79,71,71,66,66,66,65,58,23,18,62,97,141,157,159,168,174,187,192,200,206,208,214,219,215,218,210,218,175,60,39,99,188,238,245,239,235,227,216,194,180,159,134,110,90,72,17,18,35,25,23,15,16,11,16,39,69,74,73,71,79,55,21,35,64,107,146,188,249,252,252,246,246,240,98,2,56,98,96,61,47,40,30,28,34,25,53,152,209,187,197,206,208,84,46,157,179,159,107,8,19,81,83,73,44,132,237,194,165,120,16,13,106,183,188,184,164,119,115,145,164,184,233,249,200,176,188,228,228,103,87,214,248,215,190,185,224,210,178,179,207,249,225,181,181,227,251,193,170,204,249,236,176,181,240,251,205,179,210,251,244,177,197,250,250,189,157,165,203,135,85,125,149,243,250,205,186,242,248,222,141,148,93,45,56,133,248,250,190,141,126,92,50,21,65,184,220,147,152,234,250,247,203,235,253,252,199,185,252,251,193,168,224,250,204,149,165,210,170,92,122,160,245,250,213,162,208,250,225,153,175,249,248,164,161,230,251,216,166,211,251,248,181,162,140,158,155,138,131,122,118,103,66,55,91,103,107,118,118,117,114,111,109,105,106,106,106,112,110,100,108,99,100,109,118,129,114,105,82,33,23,30,24,33,42,51,62,70,76,74,83,88,81,82,82,82,82,77,81,82,79,78,74,83,88,84,86,80,69,71,62,50,64,58,67,69,72,82,89,105,103,98,79,46,33,19,19,19,18,29,37,51,59,51,59,53,55,53,46,51,49,46,49,53,46,44,49,47,46,43,45,48,45,46,52,46,44,44,49,40,41,46,40,43,44,53,45,41,42,42,41,40,45,39,41,40,47,45,41,43,41,39,39,42,38,39,41,38,39,39,38,41,34,40,38,35,40,32,34,36,39,38,37,38,32,33,27,23,28,29,27,29,27,73,117,86,51,21,23,31,27,23,24,23,29,33,32,27,26,30,26,28,28,26,24,29,28,24,33,28,21,29,28,27,24,25,30,31,34,28,25,24,26,22,23,27,24,27,24,27,25,39,49,28,29,24,24,35,81,93,42,24,19,27,32,28,25,22,29,25,27,29,23,22,26,26,25,26,30,20,24,30,30,31,29,32,27,27,22,23,29,35,34,32,33,33,27,27,35,33,29,31,29,33,28,30,29,23,19,20,21,19,24,20,21,26,26,23,17,24,19,22,28,19,27,21,22,23,18,27,20,27,23,67,86,28,24,24,19,22,16,25,23,20,24,22,21,24,23,17,26,21,21,23,23,26,21,24,25,21,33,71,114,98,60,36,18,19,21,23,24,19,24,26,23,24,22,21,28,26,25,26,31,95,138,82,27,78,85,48,91,111,125,136,105,141,168,165,189,203,214,191,165,192,208,208,227,232,240,201,104,116,94,43,28,48,82,118,155,178,174,150,156,140,127,149,155,166,216,249,229,160,186,207,195,198,208,218,242,251,251,249,249,252,252,252,252,252,252,250,250,250,250,251,250,237,221,222,236,250,252,252,252,252,252,237,109,2,0,6,9,12,12,11,11,13,14,14,13,182,181,178,175,169,165,91,74,129,162,171,160,149,153,109,36,42,54,92,120,112,108,123,136,75,46,70,71,83,87,89,103,110,118,130,139,155,160,167,78,28,84,135,205,229,237,244,240,248,245,248,248,248,247,247,242,238,227,219,162,44,25,65,108,191,178,159,135,120,97,74,62,56,56,59,69,67,23,28,43,44,50,33,29,48,78,90,99,73,51,64,89,92,59,84,156,206,227,184,188,221,202,168,138,107,20,14,53,76,89,83,94,98,95,49,61,57,87,128,140,114,134,183,191,35,14,87,92,90,67,9,34,115,128,99,60,118,196,190,204,163,33,7,93,169,177,153,100,43,54,103,131,166,245,251,165,158,199,250,210,72,137,249,250,201,165,188,235,191,149,157,204,250,204,155,165,197,213,165,152,188,208,190,156,171,237,253,175,159,200,251,217,163,184,236,231,147,120,123,102,74,100,138,158,217,227,163,146,162,169,95,60,63,72,34,60,76,115,117,61,36,54,125,84,107,160,247,250,200,193,237,249,208,165,177,214,215,163,154,198,205,172,163,187,207,166,141,144,192,110,88,144,148,206,226,170,139,165,197,170,148,158,188,177,150,156,198,218,173,152,174,216,196,157,151,133,146,147,132,135,135,136,109,70,61,54,57,97,106,103,110,107,108,111,109,108,115,108,110,118,114,117,102,115,118,103,86,56,30,25,25,35,39,47,67,81,93,93,88,83,81,79,85,83,79,80,83,83,88,84,86,83,84,90,81,84,76,77,66,63,63,51,56,56,64,73,71,92,96,100,108,85,55,32,19,21,21,19,24,35,51,60,59,58,55,53,53,53,53,49,46,49,50,51,47,45,45,44,45,44,44,47,45,47,44,43,51,46,44,45,43,46,43,45,46,44,39,44,46,40,45,45,45,43,43,45,42,42,43,42,39,42,37,41,35,36,38,44,41,37,38,34,39,36,35,36,33,36,40,39,41,38,31,36,33,35,33,29,23,35,58,31,21,27,23,70,109,86,57,27,20,24,20,26,24,37,35,24,30,24,20,24,24,28,34,21,28,24,26,27,23,26,31,23,24,27,25,36,31,28,26,24,30,23,27,28,26,47,35,24,22,29,44,30,27,23,22,24,34,73,92,61,27,23,24,29,27,26,28,22,27,29,27,26,30,30,26,24,27,29,19,24,26,28,29,29,33,29,27,21,34,31,34,33,33,33,30,26,29,33,30,31,29,31,29,30,33,29,26,19,16,22,24,22,27,19,22,23,21,27,23,18,19,19,24,24,21,23,23,21,18,25,22,23,84,69,19,26,20,19,23,18,24,21,16,25,23,22,24,19,23,24,21,22,24,27,22,24,22,24,21,23,37,72,106,98,62,32,19,19,23,26,22,19,27,21,25,22,22,28,20,24,25,87,162,171,109,89,149,134,97,121,141,177,193,170,199,191,179,205,205,200,181,172,194,194,199,222,226,242,217,175,211,173,84,29,9,36,104,141,154,137,133,167,143,128,137,131,127,176,234,151,52,74,115,117,120,140,150,204,243,216,183,154,182,206,213,224,202,234,253,253,249,249,251,250,237,213,220,245,252,252,251,251,252,252,231,111,4,1,6,10,11,11,13,12,14,13,14,14,160,159,148,137,128,127,66,59,108,117,121,100,92,89,37,10,41,57,88,112,107,114,120,134,110,110,142,150,171,184,195,200,208,218,222,224,225,227,222,104,21,71,137,208,229,232,233,223,221,214,205,198,193,181,171,157,148,128,108,62,16,21,17,23,23,15,14,12,13,15,14,31,64,91,123,150,147,55,38,110,192,238,235,229,222,185,95,59,70,64,71,71,37,39,68,110,126,103,70,62,85,101,107,117,112,27,24,118,178,186,155,151,148,127,95,95,127,122,104,81,41,57,97,77,9,55,146,180,208,190,46,29,118,125,100,38,83,146,142,208,206,49,3,83,166,188,176,153,126,130,146,141,144,155,146,135,142,165,192,134,50,120,190,178,162,152,152,168,153,147,155,171,194,154,147,150,160,162,139,148,150,152,139,143,148,155,155,141,144,142,159,147,143,154,174,165,139,153,158,178,163,182,212,194,213,181,109,98,73,52,47,55,92,74,38,25,15,51,78,102,154,174,188,188,189,198,206,188,159,160,173,177,147,145,152,160,151,142,151,165,159,152,163,167,167,145,153,165,180,159,155,173,146,159,162,137,148,161,171,157,160,169,169,162,159,171,177,171,153,161,157,160,142,150,161,124,145,145,139,138,139,141,131,111,83,66,66,94,99,97,100,93,95,95,89,104,110,103,105,111,108,101,88,66,45,22,19,27,30,36,39,73,104,107,111,99,91,88,84,82,82,89,88,83,82,82,83,83,85,84,81,86,86,78,70,61,66,66,57,62,66,64,64,63,64,83,83,93,111,96,63,34,30,21,18,18,24,39,45,56,60,61,59,55,51,52,53,54,50,47,53,50,47,46,47,47,49,44,42,47,48,49,48,47,39,41,45,45,41,43,45,46,47,43,41,41,41,41,42,43,44,45,42,46,44,37,39,37,41,39,42,37,35,44,40,38,41,42,39,36,39,38,37,37,33,39,43,36,35,39,34,33,34,32,35,34,33,27,23,29,39,25,21,24,24,23,60,108,93,57,26,16,30,26,32,34,22,23,27,24,24,31,27,24,24,29,29,24,24,23,26,27,27,26,24,29,39,30,23,26,21,26,28,22,32,25,36,38,24,26,25,29,22,26,26,25,25,32,34,45,95,75,33,27,24,24,26,23,27,29,25,25,30,28,27,25,24,28,25,29,25,16,26,30,31,31,32,32,27,22,30,34,30,32,36,36,34,24,35,34,26,33,29,26,33,36,34,25,24,19,21,19,17,22,22,23,22,24,22,24,24,23,26,23,20,22,24,25,21,20,20,27,27,42,91,53,20,26,17,19,21,26,21,26,22,19,23,22,23,18,25,23,16,26,21,22,24,21,27,24,21,23,24,33,69,105,99,57,32,19,20,22,18,29,29,23,26,23,27,23,24,25,62,120,137,120,101,129,150,125,106,130,150,171,181,170,181,178,179,195,198,200,194,185,194,184,172,183,178,188,172,152,195,155,119,123,54,20,38,57,89,97,133,171,163,160,163,154,140,174,241,152,29,77,134,132,134,143,143,184,199,154,113,61,100,141,149,162,117,167,245,246,247,247,252,252,242,221,221,242,249,249,249,234,231,206,182,107,9,1,9,10,12,11,13,12,13,14,14,13,88,89,84,70,68,63,26,39,78,92,95,87,82,89,41,17,53,83,146,176,190,205,229,245,201,185,199,201,220,225,226,227,225,226,222,218,213,212,196,80,23,54,92,162,173,164,146,127,118,100,94,83,73,70,59,46,31,16,11,15,28,39,33,42,49,42,37,22,18,13,25,44,69,83,93,105,87,44,47,116,204,251,245,243,232,146,39,32,75,98,96,40,12,22,45,73,57,49,38,44,79,139,174,194,172,32,20,103,158,150,103,93,80,63,30,70,137,147,157,144,117,127,198,167,45,107,228,252,242,228,76,30,95,88,56,33,108,169,146,165,165,49,1,41,110,162,203,218,205,196,190,175,162,163,150,144,170,166,157,67,50,150,174,168,154,156,163,162,150,163,163,160,161,139,152,155,160,158,150,165,167,165,150,155,158,159,150,145,158,154,155,144,151,159,159,159,159,186,208,218,217,207,178,158,127,54,25,15,41,45,19,33,55,88,116,136,155,174,184,191,205,213,205,193,180,177,175,157,152,165,167,162,151,160,164,166,166,158,158,155,138,130,134,143,146,147,157,165,182,182,178,168,156,162,155,150,154,161,161,156,160,162,170,165,159,144,163,162,159,164,158,158,156,162,163,134,139,151,143,131,133,142,130,128,129,110,105,111,100,93,96,89,84,78,77,77,78,76,71,49,41,35,19,24,19,27,33,68,71,104,121,134,124,112,101,93,91,87,86,80,85,89,88,81,78,84,85,78,80,77,83,80,73,65,57,59,60,66,63,66,70,66,76,75,89,95,91,102,66,35,31,18,20,25,23,31,43,58,57,58,62,57,57,52,53,49,47,55,50,52,51,42,43,47,49,44,44,45,47,47,46,44,43,45,40,43,41,46,45,47,46,39,43,39,42,44,39,38,46,43,46,42,41,45,38,39,40,38,37,40,41,42,41,44,42,39,37,37,34,35,41,36,39,34,33,43,40,37,33,34,37,39,28,29,33,29,39,32,25,21,23,22,24,26,19,33,25,52,108,95,62,35,20,31,25,23,28,22,24,23,29,31,24,31,25,28,21,19,32,26,31,27,24,27,30,39,25,19,25,29,27,23,27,27,27,24,21,28,24,29,34,21,26,24,24,29,24,34,36,36,80,90,46,23,20,25,29,27,26,26,30,26,27,30,24,23,27,24,29,27,25,25,21,30,33,27,31,34,23,23,26,30,32,36,34,31,34,33,29,32,33,27,28,35,29,30,38,29,18,21,25,19,19,20,19,20,23,23,21,24,21,24,24,23,21,21,26,24,19,24,17,28,19,57,89,31,21,25,18,24,21,23,21,19,22,22,25,24,22,23,24,24,22,25,24,24,19,24,23,18,26,25,27,22,33,63,104,98,67,37,21,23,20,21,23,24,21,20,26,25,27,61,92,83,62,48,64,87,63,56,63,74,95,83,95,89,110,120,117,137,142,147,143,133,130,118,124,137,148,162,149,141,163,142,168,191,116,96,101,46,46,74,97,135,154,173,187,185,175,208,248,168,52,113,163,163,160,166,155,193,213,162,110,66,120,153,156,158,91,146,242,243,247,247,252,252,245,228,213,214,225,220,201,185,179,160,144,97,15,2,10,10,14,12,14,14,14,15,14,15,88,94,101,107,107,111,57,72,135,162,177,170,169,177,99,36,63,100,163,212,219,236,245,245,226,190,188,186,193,186,184,177,166,162,151,145,131,127,111,29,13,30,63,103,91,76,59,49,39,35,39,46,53,63,73,79,76,66,49,20,17,39,72,98,96,101,98,103,110,101,99,84,74,68,54,40,22,13,34,67,125,146,125,119,109,105,88,80,90,110,150,106,16,24,89,132,131,120,110,100,110,138,132,102,59,15,14,29,46,47,33,34,27,24,15,46,138,153,170,212,191,228,245,245,102,96,208,232,218,130,14,37,98,109,85,44,142,230,212,190,131,19,12,25,26,57,121,190,212,197,193,184,179,182,171,185,181,183,122,34,98,176,181,170,164,165,163,170,166,166,166,163,167,157,164,161,163,162,168,172,166,165,165,170,166,168,165,167,166,165,170,163,162,160,178,188,184,201,202,198,151,83,39,7,6,10,12,16,42,94,130,152,178,188,198,208,211,212,202,191,179,176,173,163,163,162,167,168,167,162,165,172,171,171,173,170,166,166,157,155,151,143,138,143,155,155,159,163,167,165,164,162,162,168,171,170,171,165,155,155,157,149,141,139,128,118,129,135,139,151,149,154,153,163,166,133,132,140,139,130,125,128,130,127,126,124,117,123,110,109,107,95,86,75,69,66,63,53,46,39,37,46,55,61,83,93,99,121,125,132,129,121,107,91,94,86,83,77,80,77,81,81,77,80,80,89,88,79,86,80,73,74,61,62,66,61,76,72,66,75,72,71,84,95,109,103,72,39,23,27,24,26,26,30,44,50,59,59,55,59,57,57,56,55,46,50,53,53,51,48,53,43,42,47,44,50,47,45,45,43,45,42,46,45,42,45,46,46,42,43,40,46,43,43,41,37,42,44,44,42,44,43,43,38,42,40,33,39,38,38,46,37,39,39,34,42,35,40,40,36,37,36,41,38,34,37,35,36,37,33,36,30,33,34,33,36,37,32,28,28,23,25,26,22,27,27,30,31,48,96,107,81,44,22,23,25,23,27,22,27,30,26,24,23,28,27,25,30,30,24,25,26,26,31,36,28,21,23,23,27,22,25,29,26,28,23,25,30,27,33,24,30,33,24,27,29,30,28,36,36,54,92,67,34,21,21,30,22,26,34,32,27,26,26,27,28,24,29,32,29,28,24,23,28,34,33,29,29,27,22,29,36,33,30,33,29,31,30,34,29,33,33,26,33,31,29,36,27,16,23,23,19,22,18,21,21,19,21,20,29,23,22,20,24,23,19,24,24,25,21,26,23,23,77,77,27,23,22,17,22,21,24,23,20,21,21,23,22,24,24,22,28,22,24,27,22,26,26,22,24,23,21,25,21,25,32,60,104,102,68,40,24,17,22,23,21,27,19,23,29,66,101,77,59,50,49,53,60,48,35,48,50,49,46,48,39,41,51,45,43,49,53,52,41,78,146,168,194,214,222,224,221,238,197,198,203,132,188,191,105,78,55,81,93,118,177,208,216,200,225,247,169,60,116,147,153,159,168,164,202,226,170,112,86,148,173,167,170,96,148,244,244,247,247,252,252,245,237,202,179,182,176,159,145,144,139,144,100,16,2,10,10,14,12,15,13,14,15,15,15,170,178,183,184,190,174,98,110,167,197,210,204,198,195,103,42,55,93,162,193,206,221,236,239,185,141,135,121,115,105,93,84,78,73,66,58,49,42,24,9,29,48,78,116,112,113,119,122,123,128,137,140,146,149,155,156,164,169,167,108,29,51,124,194,215,203,198,180,172,155,134,113,99,86,73,55,28,10,30,83,117,131,130,139,162,184,205,108,107,147,195,173,30,27,111,165,165,150,127,104,107,111,82,27,7,11,33,63,81,76,90,115,130,151,89,88,172,158,139,166,144,160,228,168,19,37,88,75,35,10,8,33,78,83,61,50,137,221,233,234,175,31,9,104,131,52,31,90,155,196,207,202,198,195,200,196,190,133,41,70,156,188,178,168,168,162,169,173,168,170,164,166,171,168,172,167,166,168,166,166,161,160,163,167,166,169,170,171,176,175,184,184,185,200,208,210,177,116,79,57,16,6,11,14,39,88,137,163,188,198,203,216,212,208,201,190,186,181,171,160,158,159,171,173,167,166,164,168,166,165,166,168,170,168,163,160,163,168,173,177,180,179,174,176,175,174,170,163,167,164,165,169,168,169,167,169,167,162,162,160,159,150,161,160,153,145,152,155,139,149,149,147,156,168,170,135,109,123,134,137,138,125,119,120,117,108,103,112,118,123,125,120,117,103,98,107,99,92,96,95,107,109,114,130,124,134,126,121,120,105,109,102,95,93,87,76,68,71,71,76,77,76,77,85,84,81,79,69,73,66,63,68,61,63,62,65,67,78,70,73,73,79,102,91,71,47,25,27,23,24,29,34,47,53,53,60,63,59,53,52,55,55,51,48,50,53,53,55,47,48,46,45,47,44,50,45,46,43,37,47,46,45,47,46,45,48,44,39,45,43,40,44,47,40,44,44,46,46,36,45,38,35,41,36,42,41,38,43,39,41,36,38,34,37,39,35,36,41,36,36,43,38,33,37,36,38,33,33,34,33,38,35,35,35,37,35,33,30,35,30,24,24,24,24,21,26,24,24,24,31,83,106,75,50,24,22,19,21,30,25,26,27,24,27,29,22,27,28,24,27,27,24,28,34,27,27,24,28,22,27,28,21,28,23,25,27,27,34,29,22,35,30,24,27,27,26,29,27,29,31,37,81,84,45,17,18,27,23,27,32,33,26,28,26,25,31,30,23,31,45,24,21,20,27,34,31,30,28,24,26,26,33,36,32,33,29,30,32,31,31,29,32,32,29,33,41,34,22,22,24,18,22,24,21,21,24,21,20,25,21,21,21,22,19,22,24,26,23,23,24,22,16,34,89,56,19,27,16,19,21,22,21,25,24,21,21,22,24,21,24,24,23,24,18,25,25,24,24,20,21,23,23,23,24,21,21,33,65,115,113,71,46,18,24,26,19,25,25,24,66,108,82,55,50,53,54,53,59,42,36,46,48,53,51,54,47,51,63,48,38,41,29,22,87,185,227,244,251,253,253,252,252,253,244,244,199,106,168,181,118,84,65,56,33,61,131,172,191,172,203,247,144,53,98,104,114,128,150,150,193,220,152,93,90,158,172,169,168,100,160,244,244,247,247,252,252,247,245,217,178,176,171,154,141,141,138,154,109,14,2,10,10,14,12,14,14,14,15,14,14,198,197,196,193,194,174,97,107,156,183,194,178,174,170,83,27,45,71,124,148,148,154,170,156,89,74,72,61,63,56,57,61,67,76,80,92,98,108,79,16,37,87,143,193,197,205,206,208,209,211,212,214,200,189,192,184,190,189,202,126,22,37,103,152,164,150,131,125,117,110,105,113,116,129,141,155,114,31,59,128,196,237,246,247,227,212,156,109,135,183,218,155,35,22,78,115,111,86,72,65,77,91,75,64,26,9,69,143,193,203,208,227,233,245,162,121,202,202,174,156,74,45,48,12,8,19,21,26,29,39,31,34,46,59,54,33,103,158,166,224,211,53,15,125,203,174,66,16,53,107,155,184,189,186,174,154,80,24,66,144,192,184,171,170,170,170,169,168,170,171,167,164,168,167,171,165,171,169,163,171,170,168,167,166,171,178,182,188,199,212,215,212,202,166,134,110,39,1,11,23,65,105,143,171,192,214,221,226,216,207,194,184,184,178,174,174,174,174,166,160,155,159,162,166,174,169,169,171,167,171,171,175,174,166,166,160,165,171,169,173,177,177,174,170,169,166,165,167,170,173,177,180,172,154,144,137,135,137,137,150,168,179,184,185,181,176,181,181,174,171,166,170,170,168,169,133,104,108,131,149,138,121,118,116,118,114,105,110,109,124,125,130,133,116,127,129,130,126,123,124,126,121,122,118,113,113,108,107,100,102,101,92,87,86,80,74,69,64,78,72,78,81,82,85,71,69,65,60,67,63,66,66,66,68,73,64,78,86,81,82,87,82,59,39,28,26,25,21,32,44,44,55,59,61,59,56,64,54,54,52,52,59,53,49,47,53,47,50,50,45,45,44,50,47,42,47,47,49,46,43,45,46,43,49,42,40,43,44,43,41,40,42,46,49,45,40,38,40,41,38,38,39,39,36,42,39,43,38,41,39,39,38,34,39,38,39,33,39,36,35,34,34,37,33,36,34,32,34,39,33,32,36,34,33,32,34,30,35,36,28,21,25,25,23,25,21,26,28,23,39,41,78,114,90,61,31,25,29,25,24,28,23,26,31,25,27,23,25,27,29,29,29,30,23,27,21,25,24,27,34,26,28,23,21,29,29,29,27,23,23,24,29,21,25,29,32,31,31,32,28,30,63,88,61,32,21,21,31,31,28,33,27,30,31,27,32,28,21,29,29,21,23,26,27,29,31,34,24,29,24,21,36,34,31,33,35,33,28,29,29,33,35,34,30,29,35,37,27,21,20,19,22,17,21,17,22,21,22,23,19,24,21,23,22,21,21,23,21,22,23,23,16,54,87,39,24,25,19,17,22,27,18,16,24,27,22,24,21,22,20,23,25,20,25,24,25,24,23,24,23,23,22,24,22,24,21,21,34,60,111,110,72,44,24,22,21,25,23,69,105,77,55,44,50,53,49,53,60,52,38,46,55,52,55,57,47,56,60,49,37,42,37,103,205,227,237,230,249,249,249,250,250,253,253,249,215,91,101,109,97,115,94,34,6,12,20,51,78,66,125,200,89,46,85,64,86,96,106,107,157,211,136,68,94,151,169,164,164,105,160,245,245,247,247,252,252,249,249,239,195,182,181,173,160,152,148,163,110,12,2,10,12,14,13,15,13,14,15,15,14,181,180,178,168,165,121,81,85,125,136,136,114,105,93,30,29,39,55,90,90,86,80,87,78,70,86,102,117,129,143,153,162,171,178,183,191,194,207,152,45,42,85,154,209,216,215,208,206,206,200,192,184,162,149,141,123,115,104,97,44,13,41,65,97,103,97,107,112,131,146,155,170,185,203,206,218,142,50,55,139,203,251,236,220,139,68,63,46,103,134,131,82,5,19,44,65,51,46,48,57,91,131,152,170,99,18,84,177,224,232,226,222,211,207,112,89,179,211,222,211,114,65,81,23,7,76,139,162,172,137,40,49,88,74,60,42,117,163,142,165,170,42,15,121,190,197,173,98,22,12,38,64,74,73,55,15,27,86,145,194,191,171,167,164,162,160,159,165,162,166,167,164,164,165,165,164,171,172,171,173,177,181,186,198,205,211,218,217,211,191,159,117,61,83,8,25,63,100,141,173,190,208,217,217,217,205,195,186,181,177,172,174,170,170,175,172,172,171,168,168,166,160,166,167,167,171,166,169,171,171,171,171,170,166,162,165,168,166,161,154,150,155,156,158,160,162,157,152,164,169,171,178,173,165,154,151,148,146,148,160,170,175,184,181,177,173,180,184,182,181,178,175,165,154,159,141,116,105,109,121,118,108,109,112,114,113,114,112,116,124,125,123,122,122,124,125,120,118,120,116,114,110,108,109,100,106,105,107,103,97,104,87,86,80,75,74,65,71,73,79,84,81,74,71,65,67,65,59,67,67,71,68,66,73,76,78,78,85,92,80,60,39,23,22,24,24,29,40,47,57,62,57,57,56,55,57,57,53,50,51,54,54,50,49,49,50,47,49,49,47,54,50,48,43,44,46,47,45,41,46,46,52,43,44,44,44,43,41,42,40,45,48,49,41,37,39,40,42,40,41,38,36,43,41,43,40,35,41,37,36,40,33,36,38,40,39,35,34,32,35,33,39,32,28,39,35,33,35,37,33,33,35,33,28,33,31,31,32,33,34,24,24,25,24,26,26,26,24,29,34,32,47,82,134,111,68,41,21,21,20,22,24,26,22,27,25,23,27,27,31,24,31,24,23,27,26,25,31,37,29,22,27,28,22,24,27,27,27,23,23,25,28,33,29,35,39,28,28,30,28,24,45,87,84,39,20,28,34,29,29,29,26,29,26,31,27,19,20,24,19,25,36,27,24,30,32,24,33,25,24,33,28,33,33,32,34,29,32,32,26,36,36,31,30,35,33,37,29,22,20,19,24,24,17,24,21,22,21,22,24,23,24,28,19,24,24,21,27,21,20,21,23,71,78,28,25,20,16,24,18,22,29,19,24,24,19,24,22,23,24,20,24,23,22,26,22,21,22,22,22,22,24,25,25,21,25,27,22,32,61,108,112,68,40,24,19,23,66,107,78,49,48,50,51,46,46,59,61,49,40,41,48,53,55,52,50,53,62,46,35,43,47,139,189,195,216,206,208,200,194,210,234,250,250,249,214,123,107,88,100,149,156,86,32,11,12,12,17,11,54,147,70,42,77,75,93,77,63,63,141,203,121,56,74,125,143,142,147,96,160,247,247,248,248,253,253,252,252,247,210,189,189,184,177,170,160,173,111,11,3,10,12,14,13,15,13,15,15,15,15,124,114,96,90,89,61,26,59,78,79,77,56,53,36,10,36,61,86,122,139,141,144,151,156,159,182,195,200,212,211,216,219,216,215,212,213,211,215,164,47,32,72,125,170,166,155,135,124,118,105,95,88,79,71,70,61,60,59,57,20,16,60,108,158,169,172,182,189,198,202,204,213,208,213,208,208,142,34,39,97,160,196,170,79,44,57,51,59,66,64,57,25,8,35,77,98,103,102,103,119,155,206,215,232,132,27,82,145,187,170,142,125,95,68,12,49,139,164,200,236,181,164,190,89,42,132,205,225,205,135,49,68,107,84,47,52,150,225,199,177,122,10,21,121,187,191,186,185,157,105,44,36,44,50,71,102,135,170,199,193,179,171,166,160,153,152,150,150,154,159,162,163,167,166,170,171,180,184,186,198,197,201,202,191,178,156,131,101,64,19,5,11,24,47,98,153,178,201,214,210,205,196,187,183,176,172,167,165,162,165,167,164,169,167,169,168,163,163,157,162,165,163,170,171,171,166,166,172,170,173,170,163,152,146,158,162,169,173,163,155,154,157,160,160,163,165,157,148,145,149,153,164,168,169,168,171,177,175,179,176,165,155,155,162,163,164,166,173,174,172,171,174,175,173,177,163,134,105,83,86,96,96,96,103,106,99,105,116,118,127,124,120,122,118,122,122,120,118,118,112,114,109,111,114,106,106,105,103,99,99,97,88,83,84,81,77,76,67,82,81,78,72,60,66,61,69,66,65,66,69,69,77,84,73,81,87,92,73,56,38,27,19,21,29,34,38,50,55,59,61,56,60,59,59,54,53,58,44,51,54,49,55,53,49,45,48,49,48,52,53,49,46,46,44,43,43,48,47,45,47,43,46,44,43,47,43,43,40,43,46,46,41,41,44,43,39,39,35,42,42,44,44,35,43,37,36,37,38,39,34,39,41,34,41,37,32,38,33,34,35,31,36,32,29,39,33,41,37,25,34,31,32,31,31,34,34,33,35,33,31,29,25,22,25,24,22,29,26,27,27,25,37,26,42,99,107,80,51,26,19,29,28,26,20,27,32,21,25,27,29,27,23,27,24,27,28,28,33,28,24,22,26,29,25,28,28,27,27,23,24,25,27,32,33,48,39,20,28,22,24,26,30,70,92,57,28,28,24,30,27,29,29,23,31,27,21,22,24,22,25,37,34,26,29,27,30,31,28,26,22,31,31,33,31,33,36,34,36,27,28,37,30,28,29,34,35,33,31,16,20,19,22,20,20,23,19,23,18,21,21,20,21,19,26,24,27,22,24,24,21,19,32,92,63,16,27,23,16,21,24,19,24,23,25,23,24,26,18,24,24,29,26,20,22,22,26,23,19,23,25,27,24,27,23,23,26,26,27,24,29,59,108,107,71,47,21,75,111,76,47,33,50,48,45,50,49,53,61,47,33,45,44,50,55,55,50,54,63,53,40,39,41,82,122,163,215,212,208,200,185,186,202,209,211,219,171,144,136,104,111,191,235,207,197,188,167,125,80,59,124,172,51,33,92,105,122,84,53,62,143,203,121,50,52,68,84,87,100,73,149,247,247,249,249,252,252,250,253,248,224,195,190,193,182,179,176,176,110,10,1,10,12,14,12,15,13,15,15,15,15,60,63,53,52,42,19,22,53,94,114,128,128,132,100,25,43,81,115,187,200,208,204,212,210,208,210,209,210,206,204,199,191,188,184,174,163,153,151,87,16,26,47,90,108,89,76,66,60,64,64,66,82,89,105,117,128,145,155,150,54,16,73,138,201,208,206,210,205,202,193,189,181,160,146,129,113,50,4,32,74,110,115,100,75,71,108,122,107,136,146,144,85,2,55,126,169,158,143,132,130,165,200,203,198,93,2,50,103,115,88,67,61,67,75,29,71,154,160,152,191,170,171,201,89,33,90,150,137,95,55,22,35,35,39,27,37,138,220,231,226,159,21,21,121,183,190,187,191,196,199,182,169,165,174,184,190,202,198,182,176,168,167,168,157,156,160,159,160,160,163,170,174,178,186,192,195,198,191,177,157,136,114,85,56,24,5,11,10,14,11,30,111,166,190,205,211,207,196,188,179,172,172,171,164,163,165,169,162,159,162,162,165,163,161,162,159,160,158,156,159,159,161,167,171,171,171,167,170,170,165,165,162,161,160,162,169,170,167,163,164,168,170,168,168,166,168,171,165,165,165,166,171,169,170,168,169,172,174,174,174,165,160,155,159,169,173,180,175,162,151,159,173,179,176,180,174,145,104,80,81,87,94,93,88,94,87,96,111,118,126,123,125,119,115,122,122,121,119,115,111,119,112,110,111,108,107,101,102,96,103,104,93,96,88,93,77,79,83,77,76,69,64,66,63,61,72,71,65,75,69,73,80,87,92,71,78,49,38,24,22,26,27,34,39,52,57,55,60,60,61,60,51,54,55,48,51,57,57,52,50,49,49,50,51,49,47,51,55,50,47,43,45,45,47,42,42,46,47,43,45,46,43,42,42,44,38,44,45,42,41,38,36,43,41,39,40,41,41,49,39,36,45,37,36,39,38,37,39,35,37,38,40,39,31,36,36,34,36,31,38,33,29,39,36,36,30,34,33,32,33,34,36,33,33,34,35,35,38,29,35,33,27,28,21,26,22,26,27,23,24,24,26,27,30,36,81,110,84,58,37,21,21,24,20,30,25,25,29,26,31,29,23,24,28,27,33,31,23,25,28,24,29,27,31,28,19,27,28,25,31,27,29,30,28,33,26,24,23,25,32,25,26,49,88,77,37,19,26,31,25,24,27,30,27,24,24,23,22,25,31,31,32,28,32,24,27,34,27,27,26,29,29,30,36,33,33,32,28,31,31,34,36,31,27,27,31,35,33,19,19,23,19,23,22,21,24,21,20,21,20,17,22,24,23,28,24,21,28,22,20,17,59,92,40,19,27,16,23,26,22,22,21,24,26,21,22,24,21,18,26,26,21,24,24,25,21,23,22,21,26,17,23,26,21,27,25,23,28,25,23,31,57,97,108,79,77,118,67,39,37,42,49,46,44,49,48,50,57,42,35,43,48,51,53,54,51,52,61,50,34,37,37,57,61,100,156,172,201,212,212,212,216,210,196,182,133,121,115,97,118,199,246,251,251,253,253,248,247,230,246,223,88,83,123,124,141,112,91,110,188,232,157,83,52,50,51,42,63,60,149,247,247,249,249,251,251,216,237,248,227,205,191,193,180,179,179,179,111,10,1,10,12,14,12,14,13,14,15,15,14,124,135,133,139,128,60,47,101,146,187,200,188,197,146,46,42,78,134,183,201,203,194,192,185,177,171,161,149,145,136,121,111,103,97,90,81,63,46,17,12,39,57,90,110,104,114,121,136,147,157,168,178,187,193,201,204,209,215,195,78,19,67,133,186,189,172,163,147,131,117,102,91,68,61,49,35,17,15,50,100,152,167,168,127,116,142,147,147,187,226,236,134,23,59,135,164,152,123,103,89,104,121,101,84,22,10,58,99,127,117,137,157,176,179,103,128,206,203,169,143,100,112,122,46,23,44,59,69,48,32,27,21,29,30,31,28,78,150,169,227,193,33,17,118,184,184,174,175,182,188,196,203,201,199,193,190,183,177,169,163,165,163,162,164,164,170,172,177,185,190,198,198,191,181,169,146,127,97,64,42,17,23,29,36,47,46,41,31,39,82,146,191,214,199,194,189,176,178,169,167,171,173,174,176,172,174,173,169,168,170,169,167,168,165,167,165,163,166,166,169,170,169,170,168,167,168,167,166,165,163,165,169,169,168,170,169,167,168,167,170,171,167,168,169,165,172,173,177,179,177,175,171,165,165,159,159,158,155,164,167,169,170,172,174,174,179,174,168,159,157,162,165,165,165,169,174,148,110,92,81,84,95,96,81,77,69,79,99,115,126,120,117,119,116,115,118,114,116,116,110,109,106,108,105,101,101,101,102,103,111,111,100,89,79,67,73,80,76,81,73,74,79,73,74,71,72,67,76,79,78,79,92,93,66,52,34,22,22,29,33,34,43,52,56,60,60,59,57,56,57,54,58,59,54,50,53,55,55,54,47,53,49,45,51,54,52,45,48,49,49,48,45,49,46,47,44,48,46,45,46,42,42,40,43,47,43,43,41,39,39,43,43,36,43,43,41,42,41,40,37,34,40,36,41,38,35,45,34,43,39,37,38,35,41,33,38,39,31,36,33,33,39,33,33,33,33,35,33,36,31,28,33,34,35,36,38,29,33,33,29,35,29,27,22,25,22,30,23,20,27,24,28,29,29,27,34,69,113,104,73,48,24,22,25,28,22,23,31,24,28,23,28,28,27,29,21,27,29,29,24,27,32,29,26,28,21,20,24,26,33,30,26,31,32,23,24,30,29,28,30,28,29,33,76,92,47,27,25,25,27,27,26,22,27,25,25,27,23,30,24,28,32,26,34,31,34,29,26,23,25,34,33,31,33,37,34,30,33,34,31,33,35,28,29,28,29,30,25,24,21,22,24,23,22,25,23,22,22,24,18,20,24,19,22,22,23,22,21,23,20,28,81,80,27,22,23,15,18,23,23,25,23,19,24,22,20,23,22,22,22,24,21,26,22,19,24,24,24,25,24,18,27,27,21,29,25,28,29,19,26,32,31,53,99,133,130,83,33,39,42,38,43,46,54,55,60,60,52,49,51,48,48,53,49,49,51,52,54,42,35,38,43,46,47,39,58,79,114,149,162,183,202,209,213,185,125,100,88,83,89,141,193,219,244,246,250,253,253,253,253,244,181,198,188,136,134,126,134,181,242,246,226,171,140,130,127,112,120,110,179,250,250,250,250,246,229,168,208,246,235,216,188,196,180,175,183,172,108,11,0,9,10,13,12,14,13,15,15,15,14,190,199,186,190,172,82,64,108,161,194,198,189,185,136,41,33,59,105,144,157,146,127,121,109,96,87,81,72,64,60,60,57,55,61,66,70,78,84,33,14,55,97,159,186,187,195,201,203,210,208,207,210,209,210,206,201,193,191,170,56,12,43,85,131,117,101,84,75,66,56,62,70,79,94,113,132,63,18,89,162,235,253,249,191,137,147,141,137,177,215,227,130,16,33,88,113,94,71,57,50,71,81,85,87,26,17,101,167,200,199,207,210,213,196,108,129,205,232,207,178,87,45,61,18,25,29,33,34,30,36,22,33,36,38,48,34,78,139,132,169,158,23,19,109,173,181,167,165,163,169,171,172,171,170,166,165,166,164,166,169,168,171,174,173,179,191,193,192,190,176,160,135,103,77,49,27,23,46,70,92,115,136,156,169,183,190,190,178,122,158,158,153,148,146,148,152,155,155,155,157,153,156,160,160,163,163,169,169,170,168,170,171,165,163,165,163,161,160,163,168,167,165,165,164,160,163,163,162,165,165,167,166,166,170,170,169,171,172,170,174,173,168,162,162,164,164,165,159,159,161,158,158,155,148,153,160,165,163,165,171,169,171,171,174,173,169,167,162,168,168,165,165,159,162,164,168,165,132,104,94,74,92,102,82,77,55,61,73,97,114,115,119,112,116,117,114,113,114,107,107,104,102,107,103,106,114,110,108,105,103,97,77,66,55,73,72,69,83,77,77,77,79,75,74,77,78,83,76,87,89,61,58,56,29,37,24,25,37,38,51,55,53,62,60,60,58,58,56,54,57,54,55,55,57,53,51,55,52,51,50,47,52,53,51,50,48,50,48,46,47,48,47,50,49,47,42,45,46,42,49,44,44,45,43,48,43,44,46,40,39,34,39,40,37,45,42,36,39,40,38,36,39,39,39,39,37,37,37,39,32,37,38,32,36,32,33,34,29,35,36,34,34,33,31,33,35,35,32,34,34,35,39,31,29,33,30,29,30,29,33,27,29,29,25,23,19,24,26,22,21,29,26,31,27,47,52,24,44,101,115,84,55,33,26,23,20,27,30,23,21,29,25,29,28,28,29,24,27,27,26,23,27,29,23,23,26,27,28,29,33,30,27,29,27,29,27,24,31,38,33,22,31,33,50,89,67,32,21,21,26,24,27,25,24,27,24,26,24,29,35,29,28,27,34,36,33,33,29,26,24,31,32,36,32,30,38,35,29,29,33,29,35,28,29,34,28,27,25,21,19,21,21,19,22,20,17,22,18,21,21,21,20,22,23,23,22,23,23,27,19,40,96,57,21,24,21,18,20,24,24,22,24,20,22,24,24,21,19,27,26,22,22,22,21,23,21,26,27,21,26,22,25,20,23,34,16,24,23,26,24,24,26,50,118,124,108,79,29,54,39,36,34,44,49,54,64,57,62,64,61,50,53,54,55,53,48,53,51,41,42,42,40,43,46,44,48,35,32,86,62,89,127,147,165,158,122,115,101,83,73,118,171,188,187,164,162,174,190,201,211,203,193,220,185,102,63,57,94,182,242,251,251,248,243,242,240,230,229,207,241,252,252,252,252,246,235,186,229,252,250,230,195,201,184,175,185,172,105,12,0,10,10,13,12,14,13,14,15,15,14,186,188,179,181,146,63,62,94,136,173,163,152,142,84,31,31,46,78,98,95,77,65,63,59,61,68,73,81,98,109,121,136,145,151,162,172,175,182,93,25,63,116,182,212,212,213,205,205,195,188,178,164,156,146,132,117,96,90,63,14,18,32,71,98,89,92,103,116,130,142,156,174,182,200,206,214,127,39,94,168,250,251,248,208,137,139,130,113,113,122,119,45,13,29,47,64,52,53,69,88,139,177,193,190,68,31,112,182,218,196,186,167,147,112,49,90,157,183,213,222,130,47,27,13,26,26,19,23,23,22,34,43,65,75,69,99,164,213,197,175,108,7,25,119,178,176,162,165,165,164,164,162,164,166,167,167,167,176,177,180,190,191,198,196,177,161,139,116,90,55,36,25,33,56,83,111,138,160,172,185,193,199,195,192,193,188,189,183,184,176,159,148,144,138,144,143,139,143,143,143,145,141,142,149,145,145,151,153,152,154,155,158,151,150,155,154,158,159,159,160,158,161,165,159,158,159,160,160,163,165,160,162,167,169,169,170,171,173,168,170,168,164,166,169,166,167,162,160,161,163,166,167,163,167,171,178,177,172,174,171,169,164,162,165,165,165,168,165,162,169,174,174,174,169,171,180,183,168,122,88,78,87,87,83,90,77,65,59,78,108,111,113,117,115,116,116,116,114,112,108,107,113,114,111,113,117,107,97,82,67,63,65,69,73,78,78,81,81,85,75,73,81,70,76,86,83,94,97,69,54,35,27,26,27,36,36,48,53,59,61,60,61,60,62,64,61,53,53,55,56,52,55,55,49,49,52,52,53,55,49,51,53,50,46,48,53,44,47,49,53,51,46,47,46,46,41,38,46,46,47,40,45,47,41,44,42,40,39,36,41,39,41,44,42,42,39,38,40,33,39,40,34,37,39,39,37,38,37,33,31,32,36,35,36,32,37,34,33,39,32,34,34,33,35,32,32,35,36,34,35,33,33,29,30,31,27,33,33,29,32,30,30,34,26,25,25,28,27,22,27,23,24,31,27,30,28,21,27,28,65,110,100,72,45,24,24,27,25,24,23,25,26,27,25,24,29,27,19,23,31,32,29,26,25,27,31,25,32,29,29,26,29,29,25,33,24,25,33,29,27,26,26,28,33,76,87,52,26,16,25,24,24,23,29,23,26,31,25,37,33,27,24,30,39,32,31,35,35,24,25,31,33,34,26,38,33,31,33,28,33,31,33,29,33,29,29,31,29,17,21,22,19,19,21,21,22,23,20,21,24,23,25,19,19,23,17,24,21,23,18,61,94,36,24,30,17,15,19,26,21,22,28,23,19,25,23,20,21,24,21,23,21,20,27,22,27,22,23,26,21,22,21,28,33,34,25,25,27,22,27,25,52,110,98,61,74,78,78,69,42,39,38,53,48,45,56,52,57,57,54,55,50,54,55,56,54,51,55,52,50,49,44,46,49,50,52,50,46,45,33,28,35,53,89,110,124,141,126,101,103,141,180,199,188,164,150,139,146,150,151,148,150,161,130,52,15,9,27,118,193,240,250,252,252,252,252,252,252,253,253,252,252,252,252,248,246,232,251,250,250,242,199,210,192,180,190,167,102,13,1,10,10,13,11,13,13,14,15,15,14,152,149,136,113,83,46,53,73,99,125,98,81,63,27,18,41,62,92,106,111,116,122,132,143,153,165,172,179,190,197,208,208,211,212,211,214,213,203,102,29,53,99,162,188,184,169,157,143,128,113,99,92,83,73,66,59,49,61,45,10,29,73,133,168,167,178,187,204,208,213,219,219,222,221,219,213,108,19,55,114,183,210,191,137,118,132,124,89,27,5,10,18,32,38,49,51,41,41,71,121,174,196,200,185,71,28,89,143,165,136,113,84,57,33,24,91,135,142,159,213,132,10,12,15,28,33,30,24,21,28,29,60,118,162,165,152,175,212,215,211,127,4,34,125,176,175,166,168,166,166,164,165,168,167,177,186,192,198,192,182,167,147,130,105,74,51,27,28,48,66,97,125,147,168,184,198,200,199,196,188,181,174,167,161,160,159,162,165,169,173,166,159,156,156,155,153,155,154,155,154,154,157,153,153,154,151,152,149,149,149,149,148,151,151,150,151,155,158,155,157,159,158,156,156,155,156,156,157,158,160,162,167,169,166,168,166,168,169,169,171,167,168,167,167,167,168,168,171,177,174,174,176,173,174,177,173,172,170,168,166,162,162,161,160,160,163,165,165,162,152,157,171,172,173,169,168,169,169,146,122,89,78,64,63,106,96,96,98,118,134,128,133,127,120,120,118,122,122,117,121,112,117,121,108,100,90,83,74,72,61,68,79,85,86,90,81,85,90,83,75,79,85,86,93,86,76,55,40,29,29,29,35,38,43,52,54,64,63,61,66,60,59,57,53,59,55,55,53,55,56,51,51,51,53,51,50,53,55,54,53,48,50,52,50,45,49,49,49,51,49,53,44,46,46,42,42,39,40,49,49,42,40,43,45,45,43,43,41,37,40,42,39,43,38,39,40,39,33,37,41,42,41,38,33,34,36,33,37,41,34,32,37,39,37,39,39,37,33,38,32,29,37,33,34,36,33,38,35,33,31,29,37,29,34,30,21,35,30,30,37,28,25,27,31,32,21,22,26,22,27,29,24,27,29,28,28,27,27,24,28,53,101,111,88,62,31,20,17,27,26,29,32,24,26,27,30,21,27,32,27,31,23,22,28,26,30,33,33,35,26,22,32,30,24,26,32,24,24,28,24,34,27,26,29,48,89,71,33,24,23,24,28,25,28,21,28,29,22,27,25,25,25,28,34,30,33,35,29,25,29,32,31,35,36,35,37,31,29,30,34,35,34,31,31,29,32,31,24,23,24,18,21,22,20,23,18,20,21,22,19,16,25,19,23,22,18,23,23,26,27,85,74,22,30,19,19,21,19,23,18,22,24,24,24,22,21,19,22,22,21,26,26,23,22,23,24,25,20,27,25,19,26,24,34,29,21,31,25,27,28,67,110,86,54,12,61,128,125,100,76,100,100,107,86,66,57,37,74,72,41,64,45,44,52,51,49,43,55,51,54,46,46,48,51,56,54,55,51,55,47,46,42,27,53,87,131,168,165,130,120,134,181,223,232,219,204,195,189,188,182,171,167,160,144,108,74,48,49,79,86,130,185,224,248,252,252,252,252,252,252,253,253,252,252,251,251,252,252,252,252,247,208,216,203,187,198,168,96,15,1,10,10,12,12,13,13,14,15,15,14,78,69,58,43,23,21,44,55,72,94,91,89,93,43,13,53,97,148,181,196,201,203,210,214,212,213,212,212,213,211,213,207,201,198,191,182,181,163,63,20,39,73,118,118,108,91,79,77,70,72,81,95,105,120,134,148,155,169,139,33,38,103,170,216,218,222,220,215,213,201,192,179,163,143,129,112,30,1,33,49,76,75,93,101,109,132,123,105,59,20,14,21,34,46,60,76,71,79,91,100,136,131,122,91,27,33,61,92,94,81,71,56,48,29,32,123,188,178,145,156,90,9,15,15,27,34,34,27,28,26,28,51,86,105,100,99,104,128,160,208,149,16,21,105,167,179,174,176,174,181,189,192,193,187,181,170,150,128,105,79,54,27,21,41,63,92,116,141,159,178,194,200,200,199,194,185,179,175,168,167,166,163,162,160,162,166,170,166,168,169,168,164,162,160,158,160,167,165,161,158,158,160,160,160,155,157,160,156,155,154,155,157,158,158,159,157,159,159,154,157,157,157,158,154,157,159,156,155,158,160,163,165,166,165,162,163,164,160,162,168,165,166,169,167,165,168,166,167,170,165,162,162,158,161,161,165,167,170,169,164,163,167,171,171,167,167,171,174,167,159,159,159,158,152,153,152,155,166,194,219,192,122,54,65,96,95,118,124,126,134,128,132,132,125,123,119,116,115,110,106,104,98,89,74,69,75,84,92,83,75,83,94,92,96,93,86,93,91,87,75,95,105,84,63,43,28,26,29,30,41,45,54,55,56,61,61,62,60,65,60,56,60,59,55,57,55,54,57,54,54,53,50,51,53,53,51,55,54,53,50,50,50,50,47,46,50,51,49,47,48,46,46,48,44,46,45,46,46,43,46,42,40,44,44,46,43,43,46,38,40,38,35,41,35,39,39,34,42,42,39,40,42,36,33,36,35,35,34,33,34,37,37,38,39,35,32,34,38,33,36,34,33,33,34,38,30,31,31,30,33,28,33,32,24,34,34,34,35,32,29,24,31,30,31,30,26,25,24,26,22,22,24,24,27,30,32,23,40,69,36,21,30,78,123,100,76,46,28,24,29,33,29,29,27,24,27,24,36,29,24,26,22,27,30,32,31,27,33,27,27,27,31,33,22,27,32,25,27,31,30,27,36,40,27,31,72,87,55,27,17,22,25,23,27,27,27,26,26,31,25,26,25,29,32,29,33,34,27,23,29,31,33,36,33,36,33,29,29,29,33,34,36,36,32,29,34,33,29,19,19,16,21,27,19,23,21,22,22,23,23,20,22,22,23,22,22,17,25,23,51,99,53,23,27,15,19,26,19,25,23,23,29,19,28,23,17,26,23,21,21,26,24,23,20,22,24,22,21,26,24,23,24,23,26,25,27,25,24,33,80,113,77,55,16,42,164,185,160,127,107,126,149,155,119,91,69,82,106,106,101,83,54,48,50,46,35,43,42,44,45,39,42,37,44,52,47,49,53,58,55,51,47,43,54,71,128,180,171,152,147,146,189,250,251,250,250,246,243,242,242,238,226,222,219,208,204,186,162,117,60,77,96,115,144,184,229,242,246,246,248,246,246,251,251,253,253,252,252,253,253,249,209,214,208,197,207,178,100,12,1,9,9,13,12,13,13,14,14,14,14,57,59,61,67,23,27,50,70,116,138,171,178,169,99,41,65,106,161,198,208,211,206,210,203,198,194,188,186,177,168,158,148,139,126,116,102,85,53,11,18,42,68,93,93,98,104,107,122,141,154,168,180,189,196,205,211,208,228,169,57,41,87,161,198,194,178,156,145,126,113,103,89,79,59,56,44,12,16,53,93,116,110,125,134,136,138,139,135,128,146,121,51,34,77,137,157,146,133,121,109,95,78,62,49,27,31,47,57,60,50,45,35,33,30,49,145,217,232,190,155,61,6,25,9,26,34,26,27,29,32,24,46,69,75,83,92,103,136,146,186,160,32,56,136,178,195,191,197,194,191,182,162,141,112,89,64,35,21,31,51,72,101,129,147,165,183,195,200,202,196,190,181,175,171,165,165,164,163,162,159,156,158,159,157,157,160,160,160,162,163,159,159,156,152,155,152,159,160,158,155,151,150,153,158,155,153,160,158,156,159,157,155,158,157,155,156,154,151,147,151,151,156,157,157,160,159,159,155,155,156,153,154,156,158,156,157,159,155,158,161,157,159,160,159,159,156,150,150,147,142,148,149,148,151,155,160,166,165,170,165,165,163,165,173,166,168,168,170,174,165,165,160,151,145,153,155,152,152,195,252,252,209,89,59,94,105,123,113,99,97,92,101,108,106,102,91,84,83,81,83,86,77,78,69,76,91,98,92,96,88,92,97,87,93,93,90,101,101,103,89,69,50,33,33,24,29,40,46,54,55,58,61,63,60,61,64,60,58,57,61,56,57,55,55,57,55,53,53,56,51,49,53,53,48,54,54,52,53,51,49,46,49,50,47,50,47,45,49,46,44,49,46,46,44,47,46,39,42,40,42,45,41,44,44,45,45,35,45,37,34,40,36,42,39,35,37,41,41,35,38,37,36,36,37,35,34,38,33,35,43,36,33,34,29,35,37,34,34,34,36,34,35,35,36,31,30,36,31,29,29,31,26,31,36,33,35,27,29,31,26,27,28,28,33,33,28,23,24,26,26,24,22,26,31,24,26,25,53,60,25,21,30,25,50,71,115,96,65,44,29,20,24,20,23,28,24,31,27,24,27,24,22,29,33,32,27,34,32,28,33,26,27,29,29,27,24,28,32,36,25,29,36,29,27,23,50,88,71,39,22,20,26,26,27,23,24,24,30,31,26,28,21,25,31,34,35,32,29,22,27,33,35,33,33,36,33,32,29,27,31,32,35,33,28,34,30,35,31,17,20,20,24,22,21,23,16,25,26,24,20,19,20,19,26,19,21,22,23,18,72,94,29,25,27,15,22,18,27,22,19,21,20,25,22,26,24,22,23,19,24,22,18,24,28,22,23,23,27,24,22,24,26,26,24,26,23,30,45,94,111,71,48,21,28,137,219,201,168,151,139,116,116,133,125,115,100,115,117,119,123,113,103,99,99,101,89,94,100,111,114,102,86,60,44,55,49,35,33,36,50,47,42,41,47,41,68,113,132,147,161,160,206,252,252,252,252,253,253,253,253,252,252,250,250,250,250,246,239,204,165,159,116,59,33,55,89,113,132,158,185,198,206,212,220,231,239,244,246,250,250,232,171,188,191,183,200,180,103,12,1,10,10,13,12,12,12,13,14,14,14,145,151,154,108,57,53,46,78,132,178,198,199,195,105,38,61,98,147,179,184,171,161,153,147,134,123,115,107,99,86,78,69,62,59,58,49,58,47,10,29,66,112,162,175,177,181,186,189,196,197,194,196,193,196,194,191,187,190,129,23,22,57,101,127,102,84,72,65,67,73,84,95,107,120,141,136,37,25,102,159,208,200,207,191,149,145,138,137,157,184,163,73,39,79,135,153,125,109,99,98,92,77,62,38,23,25,23,30,34,28,22,19,21,29,37,100,173,207,221,210,80,7,22,11,26,27,27,26,24,31,26,69,131,149,151,150,150,148,138,146,100,57,135,200,204,185,164,143,121,97,68,42,24,27,44,69,96,117,148,164,179,187,198,199,190,187,178,181,175,170,163,160,165,163,161,159,163,166,158,152,152,155,160,159,159,159,157,156,157,150,152,157,158,154,149,153,159,159,160,154,150,149,157,159,159,158,156,157,156,159,157,158,158,152,147,152,150,144,144,145,150,151,155,152,153,153,151,147,150,150,145,144,142,151,149,151,152,143,148,150,144,143,139,139,138,131,131,132,139,141,141,147,146,144,144,137,133,132,131,132,127,123,128,134,139,143,146,147,145,148,148,145,143,139,151,155,147,136,163,232,253,253,135,66,67,93,121,104,99,95,89,86,91,97,88,74,77,85,76,69,75,88,96,93,98,103,103,103,96,98,100,101,84,86,101,93,94,80,54,36,32,30,33,39,45,51,55,64,67,65,60,62,64,59,57,58,60,54,50,54,60,54,54,57,55,53,56,51,47,56,50,57,55,49,51,51,49,52,53,49,50,49,49,48,49,45,47,46,46,53,47,46,45,48,43,37,46,44,39,44,42,44,41,46,44,39,42,36,35,41,35,36,39,40,39,42,41,36,33,36,40,36,34,31,37,41,39,37,37,33,34,38,34,34,33,30,32,35,33,36,37,26,34,33,31,31,29,34,31,32,31,33,35,30,29,24,29,31,31,25,27,32,32,33,33,30,29,27,26,23,25,25,39,40,22,28,27,27,27,26,28,28,28,24,31,73,113,118,93,56,36,27,21,22,27,25,24,26,26,27,27,25,31,32,32,33,27,29,27,34,36,28,26,26,31,25,26,34,29,21,31,29,23,21,28,31,69,91,57,27,15,28,28,27,27,24,30,26,27,23,25,21,25,36,30,29,33,31,25,29,34,32,38,35,37,31,29,33,26,34,30,30,29,32,30,33,33,29,22,20,20,22,23,19,22,20,25,19,20,24,21,22,24,21,20,25,20,22,40,97,62,20,30,14,20,22,21,23,22,23,22,27,24,22,23,23,28,20,23,26,21,22,22,25,21,24,21,20,28,24,24,24,24,25,26,31,68,115,104,62,41,20,35,118,197,210,183,176,170,150,117,100,109,124,129,117,121,118,120,135,139,139,134,132,137,139,149,159,165,160,158,147,133,139,119,95,73,51,48,36,46,40,35,44,26,28,47,65,96,120,132,182,248,249,251,251,253,253,253,253,253,253,252,252,253,253,252,252,251,251,246,210,141,68,25,26,19,46,108,158,175,174,171,165,171,180,177,185,195,191,157,116,142,147,141,158,152,99,15,1,11,10,14,12,13,12,13,14,15,15,185,186,175,102,60,49,42,71,109,152,173,174,160,71,24,43,69,112,128,116,104,87,82,72,66,63,59,61,70,81,85,98,110,118,137,143,159,128,42,35,86,144,197,210,208,200,198,201,193,184,176,165,160,147,132,118,92,91,45,6,22,42,86,98,97,105,114,134,147,161,173,182,193,198,210,186,78,39,98,162,208,195,199,168,130,132,133,120,101,103,75,35,32,45,78,89,83,78,61,54,50,37,29,27,21,23,32,29,27,26,24,28,27,38,33,73,124,146,192,227,101,7,17,10,25,29,24,25,16,19,27,40,69,74,72,71,63,63,45,47,30,44,138,155,127,92,50,30,28,34,57,81,107,130,155,179,186,193,201,200,191,185,178,171,167,155,154,151,157,155,158,160,164,158,151,162,160,163,162,157,154,156,159,159,158,159,162,160,154,153,148,151,158,160,161,153,157,155,154,152,151,149,150,153,150,151,153,152,154,156,157,153,158,156,152,154,150,156,154,150,150,148,146,142,149,150,152,153,147,149,152,150,150,149,149,148,146,146,146,147,143,139,142,136,138,139,136,145,145,137,139,137,138,136,135,125,106,109,111,107,109,109,115,122,122,124,118,112,113,121,124,129,135,135,147,139,126,122,152,199,249,250,158,77,52,79,107,99,104,98,100,104,105,108,98,96,101,95,87,89,86,98,100,101,106,108,107,98,104,106,107,107,87,78,60,38,38,30,27,36,43,45,52,53,57,60,63,68,63,64,57,62,60,53,58,56,56,57,55,52,59,57,50,57,50,55,53,51,53,49,54,50,50,53,50,53,47,53,51,48,57,47,49,46,48,49,41,47,48,48,53,50,49,39,43,44,43,42,43,44,43,45,43,44,45,34,37,42,36,38,40,40,39,39,35,38,38,34,41,37,38,37,35,39,39,35,33,33,38,36,31,35,33,34,37,35,34,32,29,34,37,33,31,34,29,33,33,27,29,31,35,27,29,34,29,28,29,30,26,29,30,33,28,31,27,27,26,25,21,23,25,29,46,33,23,22,25,27,27,33,25,41,53,27,24,26,46,102,134,113,75,50,27,24,30,25,25,22,25,24,25,31,30,34,30,30,26,25,36,31,27,28,26,27,26,29,32,21,24,25,31,29,27,26,29,53,61,88,77,33,27,23,23,26,19,22,27,23,26,32,24,24,32,30,38,30,34,28,25,29,31,39,27,37,35,31,35,29,28,35,33,31,33,28,36,34,33,27,22,18,17,21,21,24,18,19,22,20,19,24,21,24,21,21,28,17,24,19,61,92,35,25,28,16,23,23,19,22,20,24,24,21,22,24,23,29,23,18,27,23,23,22,23,22,22,23,24,27,21,23,29,26,21,25,33,80,124,83,54,38,19,33,107,183,200,179,172,181,177,160,139,131,116,115,126,131,142,130,145,146,152,155,145,142,148,156,161,160,163,158,161,161,169,165,155,136,130,120,108,108,98,92,82,79,67,53,36,35,45,53,67,108,170,214,235,245,250,250,252,252,252,252,253,253,253,253,252,252,253,253,252,252,238,178,143,93,53,61,96,153,189,202,203,190,175,165,157,156,157,149,130,118,139,134,120,122,117,87,20,2,10,9,14,13,14,13,13,15,14,14,176,169,146,72,37,46,46,66,86,114,110,101,72,17,27,47,64,90,86,81,80,84,93,104,113,125,141,151,164,174,183,191,200,200,209,203,213,163,57,45,84,145,188,198,192,183,174,160,145,126,109,96,85,73,63,55,45,49,28,8,52,100,153,181,183,191,196,202,207,206,206,204,199,195,195,162,59,22,67,116,140,129,131,122,113,125,127,109,71,56,47,26,27,32,42,45,39,36,23,24,21,19,18,23,30,26,33,32,29,46,55,62,71,78,97,144,177,152,153,178,76,7,14,8,24,32,25,19,17,17,21,30,27,29,30,25,32,50,48,40,39,36,44,43,49,66,83,103,126,146,164,184,195,200,202,196,183,173,176,169,167,162,160,154,148,148,139,146,149,145,150,153,153,156,154,152,153,157,158,156,160,154,152,155,150,153,162,157,156,151,150,150,153,154,155,155,150,148,148,150,153,150,151,150,150,152,146,147,151,152,145,147,153,155,153,154,156,157,158,150,149,145,147,141,143,149,150,153,150,149,148,151,155,151,153,154,151,145,152,152,149,153,152,155,164,161,157,160,155,153,149,147,148,149,154,144,137,141,145,157,155,152,148,144,143,141,135,130,137,141,139,139,141,143,137,116,106,108,160,191,204,204,158,136,99,102,98,87,108,98,99,106,111,115,108,107,105,103,97,95,97,93,95,101,105,115,110,90,100,96,79,66,36,29,30,33,41,39,48,53,55,53,60,65,56,58,60,65,60,54,58,55,57,57,59,53,56,60,55,56,53,54,53,49,54,57,52,51,55,58,51,55,54,46,53,49,49,57,52,48,49,48,49,49,51,46,47,46,49,48,47,49,46,49,43,41,44,37,42,44,43,42,39,38,42,41,36,42,37,42,39,36,36,38,40,39,39,36,38,39,36,42,42,36,37,34,35,30,32,36,36,32,33,36,36,34,31,38,33,30,31,36,31,26,33,35,34,33,29,30,34,35,31,31,31,26,31,29,29,33,31,26,29,31,25,28,32,29,21,23,27,25,24,25,24,26,22,29,32,26,25,48,46,21,23,35,30,29,54,103,135,118,79,48,29,20,17,22,21,24,33,31,32,28,29,29,27,36,35,27,24,29,31,24,33,31,28,27,27,25,29,28,29,24,30,53,39,61,86,61,29,21,26,24,24,30,29,23,33,31,22,27,33,35,33,36,34,27,24,26,34,35,32,33,34,35,30,28,32,34,31,37,33,27,35,29,35,28,20,22,18,21,21,28,21,18,23,20,17,23,20,19,21,19,21,21,20,29,90,69,19,27,22,17,22,22,24,26,22,23,19,23,22,20,22,24,28,26,25,22,21,22,26,21,21,23,23,27,24,25,19,24,27,53,113,116,79,46,32,27,41,92,160,195,181,168,174,182,173,168,171,155,131,111,112,139,166,170,173,174,170,164,154,154,158,162,165,162,162,158,156,160,163,160,150,149,158,158,148,148,144,118,112,110,91,81,58,41,36,27,17,25,58,89,117,132,149,181,217,241,246,248,251,251,253,253,253,253,252,252,252,252,252,252,248,201,156,124,113,164,211,237,246,226,198,186,177,174,177,163,156,161,180,171,148,134,112,83,20,1,10,9,14,12,13,14,13,14,14,13,107,112,86,46,40,45,51,79,89,94,78,65,45,15,39,66,99,137,143,162,167,173,187,192,197,201,204,211,211,210,207,206,208,199,192,177,183,126,30,29,57,102,144,144,134,113,101,86,69,63,63,64,76,93,103,126,133,157,89,17,66,120,189,213,210,209,203,202,193,185,170,154,140,122,119,80,15,27,56,89,107,103,136,128,122,128,129,102,48,32,31,24,22,22,25,26,21,24,18,23,22,17,25,24,30,39,30,31,42,83,152,114,177,181,186,212,225,191,156,129,39,14,17,10,24,29,28,21,20,24,30,35,42,35,35,42,43,44,45,38,30,24,76,123,142,160,168,183,192,188,187,185,179,173,166,162,151,150,152,147,148,149,147,146,147,148,153,147,152,148,145,145,150,149,143,150,145,152,150,150,158,151,152,152,153,152,149,151,153,158,153,152,147,144,151,151,149,142,146,152,154,152,158,155,153,155,152,157,153,145,148,149,153,158,153,147,146,154,152,149,148,149,151,149,151,145,149,156,150,148,145,143,153,156,157,156,148,155,154,152,157,146,151,157,159,153,148,145,150,158,155,144,147,153,155,153,142,158,167,168,168,160,155,155,163,160,158,138,159,160,158,151,146,150,145,122,98,84,118,171,192,164,129,141,128,127,109,94,94,75,73,84,101,105,102,115,106,104,102,94,103,91,98,108,112,87,101,73,52,41,34,35,32,40,45,45,53,51,55,59,63,63,63,57,59,61,57,60,52,53,56,57,59,55,57,59,59,59,54,55,52,49,54,50,56,55,54,54,49,53,51,53,52,54,53,51,55,52,49,51,48,44,51,49,45,51,49,47,46,49,47,42,47,45,47,40,41,47,40,40,42,39,39,41,38,39,41,42,42,39,40,36,37,40,32,38,39,39,42,38,43,34,34,36,32,35,35,36,34,33,37,30,32,33,39,31,29,31,31,35,34,35,32,32,31,33,29,29,36,28,28,30,34,31,25,31,33,29,26,26,30,28,25,28,26,26,27,29,29,27,24,22,26,25,23,27,25,25,27,27,24,24,24,29,34,31,23,23,27,33,72,125,134,105,74,47,29,19,29,29,27,28,31,34,27,28,34,33,28,28,31,23,28,28,26,33,26,28,29,27,32,30,28,24,24,30,21,37,81,84,49,19,21,27,25,25,23,25,30,29,24,29,36,35,35,32,37,25,23,31,36,35,34,34,36,38,32,35,32,32,31,32,33,32,31,33,38,23,22,21,18,18,20,26,20,21,22,20,19,26,21,19,20,24,19,19,21,52,89,39,20,28,18,19,21,21,27,20,21,23,18,24,18,23,23,23,24,22,24,24,24,21,24,24,25,26,24,25,22,21,25,30,78,114,92,64,43,32,31,50,89,134,181,193,181,175,176,179,177,179,174,165,149,131,102,109,145,164,178,180,178,173,162,156,155,158,162,162,163,162,156,157,155,151,152,150,153,149,148,150,141,132,130,114,100,78,62,53,53,44,28,28,51,80,86,82,77,84,109,124,138,157,182,200,206,218,226,231,247,248,253,253,253,253,253,248,222,193,189,218,237,238,229,211,191,194,194,200,208,191,186,197,217,212,194,172,147,96,14,1,10,9,13,12,13,13,13,15,14,14,47,39,27,27,42,47,59,100,125,152,147,154,115,35,44,79,136,183,200,210,208,210,210,205,204,199,194,190,179,169,159,154,139,125,110,87,81,40,8,26,26,59,80,78,79,71,81,92,109,128,142,160,178,189,200,210,208,214,127,33,59,112,182,204,188,175,159,144,133,120,113,105,91,91,79,50,25,29,56,101,118,130,168,163,140,137,132,91,25,8,18,21,25,21,24,21,22,29,19,21,22,19,26,19,24,33,31,29,39,83,131,134,120,121,127,142,179,189,180,137,40,14,16,10,24,31,30,35,39,34,42,51,51,48,54,59,62,61,57,50,53,105,160,182,185,173,164,158,162,156,148,148,140,131,134,139,139,131,135,133,132,138,145,150,152,152,149,153,152,143,144,147,143,145,141,147,145,142,143,136,141,144,147,148,146,141,140,139,150,154,150,150,147,139,146,151,150,150,148,153,155,154,158,161,160,156,150,156,159,154,154,160,159,152,146,141,144,147,155,150,154,152,150,153,155,153,148,155,155,160,155,149,160,160,163,159,150,148,154,155,155,155,143,141,142,139,128,131,141,141,136,133,140,142,136,129,137,141,132,130,132,138,139,150,158,151,146,138,132,127,137,133,123,137,148,137,104,60,50,73,105,92,61,87,109,123,108,94,92,65,42,43,49,61,82,99,103,100,99,93,91,90,84,86,71,50,40,33,36,39,41,47,48,56,55,56,55,54,59,59,66,66,66,62,52,59,56,52,58,54,54,57,57,57,51,54,57,54,53,52,53,55,54,57,56,49,54,56,49,55,54,50,51,51,56,51,48,51,54,45,48,44,45,51,51,53,45,43,44,44,44,49,49,44,51,45,34,41,44,36,42,42,39,39,41,46,38,43,36,41,40,35,42,39,38,37,38,37,36,39,35,36,34,37,37,33,37,34,35,41,34,28,37,33,35,38,30,27,33,34,34,29,34,35,27,34,32,27,34,35,27,30,32,31,34,31,24,27,25,28,32,24,28,25,30,29,27,29,32,27,27,31,26,24,23,23,30,26,22,24,29,27,26,37,35,24,30,29,26,22,28,40,84,137,139,116,75,48,33,25,32,23,31,29,28,34,30,27,24,27,30,29,28,27,23,29,28,29,29,28,28,22,25,22,21,31,26,29,60,91,78,37,18,31,24,19,27,22,26,25,26,24,34,37,29,35,29,27,31,28,36,41,32,36,34,35,33,34,33,33,37,32,30,34,35,30,34,31,21,22,20,23,19,20,23,21,21,16,27,23,15,21,21,23,21,22,24,77,70,22,27,18,18,22,27,21,19,27,22,24,20,24,24,24,21,18,22,21,27,27,21,21,26,23,27,24,22,26,24,27,58,108,108,76,50,27,32,32,63,95,124,173,193,190,181,172,170,174,172,171,162,153,164,155,120,98,92,118,145,163,178,166,151,146,149,161,161,155,159,163,162,159,155,157,156,153,152,152,151,152,154,148,151,144,129,110,87,87,75,73,73,84,122,139,140,121,97,80,76,81,82,102,122,125,128,132,136,139,155,182,201,204,204,222,237,227,228,231,245,250,243,186,147,156,159,174,189,220,236,222,219,223,241,243,233,216,194,111,7,1,8,10,13,12,14,12,14,15,14,14,98,95,66,48,40,45,55,92,127,169,185,199,141,47,51,77,134,178,193,196,186,179,173,163,147,136,123,110,103,89,76,71,63,54,46,42,47,22,19,41,41,66,97,118,141,159,178,189,202,213,219,222,222,230,218,219,213,205,110,27,39,79,139,146,129,111,94,84,84,93,98,102,114,115,111,63,19,24,31,47,69,120,184,172,145,135,132,90,21,16,16,19,24,29,25,22,23,23,24,21,21,25,25,19,24,36,35,29,37,48,66,117,51,44,56,98,134,168,211,196,84,21,14,24,44,40,46,50,41,36,37,39,38,67,110,126,114,116,153,170,174,176,173,156,147,146,142,142,139,134,133,131,128,126,122,134,139,138,135,134,140,139,141,150,151,148,145,142,148,138,139,144,142,142,141,145,139,135,137,137,139,142,141,138,143,145,140,136,137,141,134,139,143,144,146,145,147,147,150,151,148,139,141,148,146,141,135,143,154,155,153,146,155,156,149,153,158,161,152,149,150,146,146,147,148,143,145,147,155,161,157,152,155,152,153,146,141,146,141,142,150,149,146,144,142,139,145,146,142,142,146,145,151,152,148,150,150,145,136,137,146,145,150,158,160,147,132,141,126,129,134,131,124,145,160,145,128,92,59,31,61,36,63,76,82,98,94,105,145,175,122,34,6,10,24,46,55,62,65,60,63,50,43,60,42,45,45,49,46,51,51,55,61,60,57,56,56,53,61,63,61,61,62,57,54,60,60,60,60,55,57,53,51,59,52,57,56,55,60,55,57,55,54,48,54,57,50,53,53,53,50,49,59,56,53,52,52,50,50,51,49,48,46,46,46,50,46,42,45,44,47,49,48,44,43,43,38,42,43,40,42,43,45,41,39,41,39,37,39,38,35,38,37,36,42,39,33,35,37,36,35,37,32,36,34,32,38,37,36,35,30,32,31,38,35,29,32,35,35,32,34,31,34,31,25,36,27,30,31,28,33,32,33,28,30,31,24,28,30,30,26,24,27,28,29,28,27,30,25,25,30,23,24,26,24,27,23,25,24,27,25,32,35,29,33,35,31,24,24,23,27,29,27,40,86,138,138,110,81,52,29,26,23,21,29,28,24,29,29,28,27,30,32,24,24,32,26,27,30,27,29,21,20,27,27,28,31,29,40,76,88,67,31,21,23,21,22,21,23,22,24,24,32,38,34,34,32,29,27,26,39,34,35,39,36,39,27,29,35,34,35,28,33,29,29,35,35,25,26,32,16,21,23,23,18,20,20,18,23,20,19,22,25,19,27,19,41,78,42,21,29,19,15,27,19,22,25,18,24,24,23,20,22,21,21,25,26,22,25,24,20,23,26,22,25,25,21,26,44,92,113,88,58,35,28,34,32,60,102,119,156,194,191,184,168,157,155,167,162,160,153,149,168,166,143,112,94,90,106,137,170,169,160,159,166,169,168,160,158,168,166,169,165,160,161,159,160,155,156,157,157,163,169,165,164,145,129,116,99,100,100,116,139,150,153,153,132,113,115,116,125,136,134,129,122,129,128,115,123,129,141,141,133,175,171,170,177,188,210,206,177,107,74,122,139,148,174,224,251,249,245,244,248,248,249,249,230,112,4,1,6,11,13,12,12,12,14,15,14,14,173,145,92,64,43,37,48,62,86,138,165,180,124,45,37,56,108,139,142,131,116,106,96,81,74,66,55,54,57,61,66,83,90,108,122,128,147,76,17,47,72,119,146,177,204,211,218,208,210,208,203,192,182,167,157,154,136,117,54,19,35,47,87,89,85,84,78,80,86,76,72,60,47,48,39,32,25,29,29,33,43,87,150,144,127,127,129,90,35,13,14,21,26,28,22,26,17,24,26,22,24,24,25,23,27,34,31,31,52,81,124,128,127,134,131,139,155,159,184,184,94,46,34,31,42,33,32,29,24,25,22,29,30,76,153,166,170,173,187,191,181,173,152,144,147,153,157,153,145,141,141,147,152,147,146,150,153,149,147,146,143,144,143,141,143,141,141,141,146,143,137,139,146,144,139,145,141,138,139,141,142,142,144,141,143,147,143,137,137,134,133,139,148,152,153,151,148,146,147,149,147,142,134,134,142,142,141,141,139,142,141,142,144,140,143,150,156,155,154,148,144,145,145,143,147,147,145,149,145,146,139,136,136,127,136,135,140,142,141,137,140,153,153,160,149,146,153,151,153,152,150,149,153,159,160,162,166,159,156,152,152,148,141,141,145,145,141,149,156,152,156,143,139,150,151,143,109,81,59,49,45,44,56,71,77,70,68,113,221,252,245,188,131,75,25,4,11,15,24,27,39,43,44,48,50,53,56,59,63,61,64,63,65,59,50,59,53,51,57,62,59,56,65,61,59,56,56,55,56,56,55,55,52,53,51,56,54,57,57,57,55,50,54,53,48,54,53,50,53,51,55,48,51,54,49,53,52,50,47,50,51,47,46,43,44,49,49,49,40,44,47,45,46,44,44,43,41,42,44,43,42,41,40,43,42,37,39,41,37,35,40,40,37,41,37,35,33,38,36,34,39,36,36,37,38,37,33,32,36,33,33,36,33,29,38,30,33,35,27,33,30,27,35,35,29,31,27,33,37,27,31,29,28,27,29,30,30,26,29,29,28,31,32,27,21,29,25,26,29,29,26,27,24,24,26,27,25,22,24,29,24,28,30,26,31,23,27,27,23,29,27,25,22,27,26,44,86,134,143,129,101,59,42,29,21,24,24,25,23,31,31,29,27,23,28,24,29,33,27,28,26,26,24,22,29,33,27,26,29,38,79,83,49,29,19,20,27,22,26,21,18,27,33,39,36,32,32,27,27,28,32,37,37,31,34,42,31,29,37,37,29,31,30,31,33,33,34,24,22,22,21,20,19,24,21,23,20,20,23,22,24,25,21,19,26,24,59,64,26,23,24,16,22,24,21,23,25,24,20,27,26,24,19,24,24,22,23,19,21,23,24,27,24,21,25,23,32,83,115,95,66,41,28,27,26,38,72,91,112,141,184,193,178,171,160,154,153,158,160,161,159,149,162,174,165,155,129,115,95,113,160,179,189,185,178,177,169,164,170,173,175,176,174,172,166,167,161,162,165,164,167,165,173,169,171,162,146,132,113,109,114,125,139,146,153,155,150,145,148,155,162,173,165,150,148,159,160,148,139,137,141,133,127,139,146,139,132,133,143,139,125,72,51,92,96,104,137,202,245,248,252,239,248,248,252,252,239,110,3,0,6,11,13,12,12,12,14,15,14,14,157,122,71,54,42,46,47,63,73,102,129,142,76,26,45,45,80,85,83,69,56,62,65,71,84,100,106,121,137,147,160,170,176,180,188,193,187,98,46,59,94,147,171,191,184,168,158,142,132,126,132,107,88,107,73,63,56,39,28,33,30,51,72,69,69,46,42,42,33,28,25,21,24,22,24,29,30,36,28,39,41,65,120,114,114,123,125,89,25,15,14,17,28,28,23,25,27,28,25,22,19,21,24,25,25,35,37,36,43,98,150,160,155,151,152,137,110,79,62,46,42,31,17,21,33,36,33,37,34,32,29,31,32,55,132,170,170,170,166,156,156,159,155,155,145,150,155,151,153,154,153,152,155,152,150,154,154,157,154,147,149,146,143,145,138,140,144,141,146,146,147,145,139,134,141,143,143,141,143,143,139,148,146,149,150,144,151,145,145,148,150,152,152,156,151,154,157,155,158,158,155,150,143,147,147,148,149,133,132,135,140,128,121,129,125,122,122,134,138,134,136,131,134,136,134,137,141,125,120,129,133,130,129,134,146,146,143,151,152,146,145,146,145,149,147,146,145,146,142,136,136,134,139,141,136,132,124,123,124,118,115,107,100,97,105,110,119,127,126,129,124,122,111,105,106,93,74,45,47,50,52,32,30,51,49,53,45,63,173,250,252,252,251,248,175,95,24,4,10,17,28,44,55,61,59,66,75,74,72,76,67,57,60,57,53,54,50,50,53,57,63,59,55,61,57,54,53,53,55,56,57,53,59,54,52,59,57,52,57,50,53,52,53,51,47,54,57,56,49,49,53,55,52,49,50,50,53,50,48,51,48,44,47,46,45,44,47,51,47,41,44,45,46,49,44,44,43,44,38,42,39,36,40,38,41,35,35,37,36,38,38,39,34,34,33,39,36,34,42,37,42,36,39,41,29,33,35,34,37,33,31,35,33,30,33,34,30,30,29,33,31,29,31,31,30,31,35,35,27,29,29,28,27,27,33,28,24,30,29,29,29,23,25,25,28,29,22,26,26,27,31,27,29,27,22,22,28,29,21,24,28,22,28,27,27,24,30,24,24,33,35,27,24,27,29,37,36,80,85,142,152,147,111,90,47,27,25,29,18,24,27,24,30,26,29,25,26,31,29,24,25,25,21,26,32,29,28,25,25,24,51,89,75,44,27,19,23,23,24,22,19,25,35,49,38,32,36,27,24,30,41,33,36,30,35,39,28,29,29,36,34,29,33,32,35,34,33,31,24,19,21,28,19,23,22,18,21,19,19,20,20,22,19,21,19,39,69,40,22,27,20,18,18,23,22,21,24,24,23,22,21,25,28,21,19,21,20,24,25,23,24,21,23,25,34,66,115,113,78,52,33,25,28,25,39,80,99,104,120,161,186,181,167,165,159,156,161,169,157,155,150,145,161,172,168,181,177,152,114,108,128,158,180,187,176,165,160,160,164,171,171,176,169,168,173,171,171,172,177,175,169,171,173,174,177,170,153,137,130,123,125,139,142,151,160,167,160,157,159,151,159,174,163,152,146,150,152,148,145,134,141,137,129,128,136,131,129,132,145,143,138,104,91,85,51,49,72,134,184,206,206,206,236,244,251,251,236,111,3,0,6,10,12,12,12,12,14,14,15,14,121,83,52,51,44,37,46,53,61,76,73,66,40,41,60,66,94,110,120,120,124,133,148,158,172,179,178,190,191,191,192,193,191,188,185,179,166,78,34,50,78,132,147,145,117,95,84,74,73,69,72,57,60,61,53,60,54,40,23,23,21,23,33,27,28,23,21,26,25,19,22,27,20,20,22,28,36,44,47,53,47,80,129,125,126,128,119,68,24,17,12,20,20,24,29,27,23,24,22,24,19,23,22,21,28,33,39,31,39,49,57,62,57,57,46,41,35,31,27,22,25,28,26,29,45,64,78,87,82,81,86,94,99,124,154,157,163,162,160,152,148,151,149,145,134,128,134,132,131,137,137,134,136,136,142,142,146,151,150,147,147,149,152,152,155,147,148,147,145,147,143,143,141,139,145,147,140,148,152,152,147,145,147,152,155,147,148,145,147,152,147,139,137,141,134,137,145,141,147,146,133,131,139,139,137,140,136,137,141,141,136,136,141,139,131,125,119,125,138,142,142,141,141,140,140,135,131,130,132,132,123,129,145,151,156,150,148,146,143,135,134,132,131,134,123,122,122,128,137,129,125,122,111,111,111,95,100,110,112,114,122,127,120,117,114,112,108,113,120,104,105,98,96,94,76,77,75,65,59,60,61,63,53,38,41,41,39,27,93,181,203,251,252,252,252,241,167,90,27,11,21,33,50,62,72,81,84,77,78,72,64,54,48,53,54,52,53,48,53,60,64,66,56,55,56,55,57,52,57,57,55,59,59,55,54,56,55,55,51,56,53,53,51,53,55,53,49,51,57,51,54,53,49,50,52,54,49,49,47,46,53,45,50,47,44,53,46,48,46,43,46,47,47,43,45,42,45,40,36,39,34,39,37,35,42,34,38,39,41,36,35,38,34,42,31,33,39,33,40,39,39,35,32,36,36,31,32,32,29,29,35,41,35,31,36,34,31,33,31,30,29,30,29,33,36,31,31,29,27,32,31,32,27,28,32,25,27,28,24,27,26,27,27,25,27,27,24,31,26,23,27,21,21,26,24,22,24,24,27,29,25,30,21,27,32,24,31,26,27,31,28,29,28,37,28,27,34,32,25,36,73,134,160,147,132,87,55,39,29,24,22,26,23,26,27,31,27,21,29,22,25,24,29,25,26,29,23,26,27,29,30,65,95,71,30,22,20,24,41,27,23,27,38,45,38,38,32,27,26,30,36,40,34,32,36,38,28,30,33,29,36,32,30,34,34,35,32,27,24,18,22,24,19,20,21,22,22,19,21,18,20,24,21,22,28,54,46,27,21,24,23,18,22,18,19,24,25,21,24,24,21,28,23,22,24,22,27,31,29,20,22,20,31,68,109,118,84,55,36,24,32,24,23,43,87,108,106,115,154,190,184,171,171,163,157,164,169,165,156,153,158,162,168,171,170,173,173,169,143,112,98,109,145,167,166,171,162,159,169,168,165,168,166,170,170,174,177,178,181,177,176,171,171,169,177,176,165,153,150,153,152,152,158,162,160,166,157,151,151,141,151,169,175,163,142,136,136,144,143,139,135,134,130,121,118,119,128,145,162,165,169,155,150,134,83,61,47,74,106,125,130,136,183,201,210,218,215,116,5,1,7,10,13,13,13,12,14,15,15,15,47,31,29,42,32,36,37,46,58,63,81,84,52,50,65,94,155,174,193,186,188,193,191,191,190,190,184,183,179,178,174,172,164,149,138,132,100,36,31,39,64,97,100,105,94,94,100,100,92,87,53,66,51,38,35,33,28,27,27,25,15,25,24,18,29,23,24,22,19,27,23,27,21,19,23,30,38,46,78,98,116,154,181,155,145,141,131,73,14,16,15,18,19,25,28,20,24,30,25,27,23,26,25,28,37,46,43,42,40,34,31,29,34,23,22,25,27,30,27,29,34,45,55,72,122,164,194,207,194,191,199,198,192,184,174,160,150,148,142,142,146,142,139,131,127,126,128,134,129,134,131,123,125,122,127,133,132,132,136,134,140,139,133,142,148,149,146,145,143,134,135,142,147,153,153,149,150,151,152,147,136,144,140,139,142,139,139,139,140,137,131,127,127,130,121,118,122,119,131,129,121,122,123,129,128,133,139,139,143,141,138,141,144,141,130,125,120,122,129,133,148,137,128,128,131,139,144,141,137,130,126,130,141,137,129,133,128,118,124,131,141,137,131,126,117,113,103,112,121,120,130,125,120,127,132,139,139,139,141,146,158,168,167,161,150,128,119,116,102,90,90,95,96,88,93,97,75,80,78,77,84,80,81,70,43,37,39,34,79,136,134,179,249,253,252,252,252,235,154,83,22,5,24,31,55,69,71,71,72,64,60,47,47,54,49,49,49,50,54,60,63,57,55,61,53,54,61,53,59,57,57,56,57,59,49,51,50,54,56,53,57,54,55,50,48,52,50,52,49,48,51,50,52,53,52,50,53,49,42,49,47,46,50,51,51,48,45,44,46,42,42,42,45,44,44,42,40,43,35,38,38,37,37,37,38,39,40,39,39,40,37,35,36,36,36,35,35,37,39,31,35,33,35,39,32,33,33,34,34,36,36,29,35,33,33,37,29,30,29,30,32,33,35,26,29,32,24,29,32,31,31,27,28,30,29,26,25,25,27,31,28,19,29,25,27,30,24,31,24,22,27,21,21,24,22,24,25,21,26,28,27,25,27,27,25,29,22,24,28,29,22,36,41,33,37,31,29,28,22,25,27,37,56,99,151,156,145,127,84,55,68,33,25,22,27,25,24,23,27,26,27,29,30,27,25,29,30,33,31,26,29,35,71,84,48,32,16,35,51,22,20,25,38,45,41,42,34,21,27,34,34,40,31,34,38,29,30,33,29,35,28,30,33,28,40,37,34,26,18,16,22,20,19,27,24,21,21,20,19,24,24,23,24,22,52,58,29,19,23,24,21,21,22,21,22,22,23,23,21,22,19,19,22,27,24,23,24,23,24,24,39,66,116,124,89,60,37,30,29,26,21,25,49,84,113,110,104,134,185,194,177,174,175,163,163,168,168,166,157,168,175,172,174,172,165,165,164,170,162,140,116,93,103,132,163,179,179,177,173,171,167,167,170,171,167,168,166,166,170,171,173,169,165,168,174,175,171,158,155,151,154,147,156,158,155,159,157,150,144,142,156,177,183,171,153,149,153,159,157,146,149,150,149,129,121,125,132,145,163,169,175,178,186,178,152,139,115,105,100,85,63,65,111,133,160,171,186,119,8,2,8,11,12,10,15,13,13,14,14,14,17,34,40,43,36,42,38,44,58,77,124,141,78,48,72,116,165,191,203,194,194,190,183,175,168,164,152,144,136,128,122,113,107,92,79,66,39,25,39,36,68,107,117,118,113,106,85,61,39,32,33,25,22,19,18,21,26,23,29,27,23,27,21,23,25,26,22,20,24,24,27,24,18,30,28,27,31,57,116,160,182,196,194,154,140,142,139,84,24,11,12,16,26,31,27,30,31,32,40,36,36,42,37,36,41,37,35,27,24,25,24,22,26,24,23,26,36,41,46,59,77,124,161,190,210,211,206,198,183,182,181,181,178,164,162,157,149,149,147,146,137,130,132,130,135,136,141,139,142,139,134,141,130,122,124,120,118,117,129,138,136,125,117,124,132,130,133,137,142,146,146,145,136,134,134,145,141,133,127,117,128,130,127,124,125,131,134,130,130,129,127,139,140,143,134,122,127,131,132,129,129,125,132,128,119,125,132,131,131,126,111,117,130,137,132,130,130,113,107,120,129,125,121,136,142,141,148,142,141,136,128,131,137,130,111,109,116,125,139,137,137,126,122,118,114,129,116,100,97,113,127,129,118,131,141,132,142,126,121,137,151,148,134,142,133,115,100,83,84,79,85,81,83,97,95,89,74,75,81,77,83,90,74,70,66,48,45,36,92,122,82,98,129,188,252,252,252,252,252,240,159,85,23,6,12,27,38,45,60,66,66,57,60,59,49,46,50,51,52,56,60,61,60,61,60,57,59,57,57,55,55,58,49,55,59,52,53,55,53,55,57,53,53,49,53,48,52,50,48,50,52,51,47,54,50,52,53,49,46,43,46,49,49,40,47,46,43,49,43,44,45,48,41,41,44,46,40,39,42,34,37,40,38,37,37,38,40,37,38,37,35,35,36,36,37,40,35,34,38,33,32,34,35,34,34,34,36,35,29,38,33,30,33,30,33,29,35,35,33,29,29,31,31,33,29,29,32,29,30,34,32,35,28,27,28,25,29,25,26,25,26,25,32,27,28,30,21,26,22,32,26,26,23,23,24,27,22,22,23,24,27,24,24,26,27,29,26,23,29,27,33,43,38,38,46,32,21,25,29,31,23,23,28,32,45,78,127,154,161,153,128,96,62,46,29,19,23,21,22,24,31,27,24,31,29,28,30,32,27,26,26,26,42,76,81,48,23,31,29,18,24,28,38,46,39,37,42,33,31,44,39,34,28,36,38,29,33,27,36,33,31,30,27,37,35,33,38,25,17,21,21,17,24,23,18,23,19,21,24,21,20,24,21,34,59,37,23,23,22,22,16,23,23,18,24,23,18,23,23,24,22,23,28,25,21,21,22,24,48,83,119,115,89,58,34,32,29,27,23,22,37,64,98,118,115,103,119,168,190,174,162,169,170,165,165,167,167,163,165,167,170,170,164,164,165,162,162,165,165,160,149,113,92,101,134,170,182,181,177,168,165,175,176,169,165,157,151,151,157,166,164,160,158,162,174,173,165,150,139,139,137,142,147,148,153,165,166,155,152,152,164,173,178,173,162,170,169,162,155,155,160,168,167,148,146,147,142,154,169,170,179,188,199,200,193,191,175,164,142,113,64,28,51,72,104,130,152,111,15,2,10,12,13,11,14,12,13,15,15,15,49,49,45,48,45,45,45,46,51,63,107,117,59,46,61,94,148,167,174,160,152,137,125,113,110,92,92,87,69,73,70,66,72,74,74,73,46,30,26,28,41,51,53,44,40,36,24,27,19,20,22,16,22,22,22,22,22,24,29,26,24,26,26,24,22,23,24,22,23,22,21,27,24,21,24,24,27,46,85,118,130,140,142,118,125,134,135,86,23,16,17,28,32,39,39,39,43,41,39,35,34,29,29,21,28,30,29,31,19,23,25,19,28,34,36,54,71,108,122,137,184,203,201,202,184,169,163,161,159,156,158,153,152,149,149,152,149,150,149,146,146,138,137,137,141,139,137,146,137,127,118,127,139,136,131,125,125,122,131,140,142,134,125,133,133,131,129,133,147,146,135,125,109,111,122,120,111,98,103,118,120,121,123,123,110,130,127,135,143,146,143,142,148,145,135,132,135,125,136,125,120,125,128,124,116,124,123,128,117,105,107,120,128,132,126,122,112,94,101,108,119,117,122,115,119,132,135,136,137,120,112,120,124,131,121,123,130,129,136,127,115,101,95,109,124,127,118,113,99,91,95,89,87,92,99,90,77,82,88,104,99,85,98,111,116,118,118,102,94,91,89,97,96,90,92,89,63,76,86,81,70,82,64,67,70,64,60,42,54,94,94,92,86,106,150,205,253,253,252,252,252,242,142,81,17,2,9,13,34,48,59,58,60,59,50,49,53,54,50,57,64,59,59,66,61,58,56,57,57,52,57,53,54,52,52,59,55,57,52,54,54,50,54,54,49,53,50,51,56,50,49,52,49,50,55,53,49,50,50,49,48,45,49,46,43,46,48,44,41,49,46,41,44,37,42,42,39,39,40,41,37,41,36,33,37,43,39,33,39,39,35,35,36,38,34,35,38,36,33,35,35,34,34,34,42,28,32,38,31,37,29,24,36,30,30,37,29,33,34,30,29,30,33,27,36,29,28,34,27,33,28,25,30,23,28,29,24,24,29,27,21,31,27,25,27,21,27,24,24,26,23,23,27,23,23,25,22,22,23,18,22,24,24,28,23,24,24,23,26,30,35,36,33,35,34,23,29,30,29,29,27,30,26,28,32,32,34,48,76,130,100,162,154,138,104,67,48,35,27,29,23,22,26,26,31,35,28,23,29,30,24,29,27,49,81,73,45,23,20,23,29,29,38,43,38,39,37,39,46,39,40,41,38,38,34,33,35,36,36,34,29,31,29,34,36,32,37,25,21,16,20,23,20,23,19,21,24,19,19,21,21,23,27,53,49,24,21,23,19,21,21,17,21,23,22,19,24,21,22,24,23,23,24,30,21,37,67,97,123,120,89,50,41,27,28,29,18,24,25,41,79,108,124,115,105,113,155,187,172,160,161,167,167,166,172,167,162,160,160,165,164,160,160,157,162,168,162,163,167,168,168,150,130,105,102,127,156,175,180,174,166,168,163,160,158,158,151,146,156,166,163,160,157,165,176,175,165,150,146,141,143,143,146,151,156,168,169,162,153,156,163,169,167,158,154,163,163,158,153,148,154,165,170,159,155,149,151,158,170,176,182,191,202,208,204,205,197,192,185,162,114,70,56,42,65,78,113,105,17,4,11,10,15,12,14,13,13,15,14,15,63,63,41,50,48,42,51,49,52,52,72,75,47,42,49,69,105,120,113,97,81,76,75,78,75,78,83,84,89,96,88,80,78,65,50,44,34,26,25,19,23,23,25,26,22,24,22,24,24,20,19,19,23,23,24,21,21,23,24,29,24,26,25,20,26,23,21,23,20,24,22,22,21,22,20,21,27,40,64,71,87,106,122,127,142,146,139,87,33,29,24,37,45,36,40,34,30,31,27,24,24,25,24,17,27,30,28,28,29,36,40,41,53,63,88,134,170,183,195,201,198,194,174,156,158,155,149,153,151,152,159,162,158,156,155,148,143,139,139,143,141,143,146,140,138,127,130,128,135,130,113,116,128,147,141,136,132,123,132,135,131,127,120,124,131,123,115,120,110,95,94,87,95,110,112,111,94,94,105,110,108,103,100,94,91,83,93,105,116,117,101,103,105,112,100,98,107,112,121,113,117,113,119,131,131,128,130,127,112,110,114,124,125,121,120,113,114,108,104,110,106,102,92,93,109,112,106,112,119,107,105,108,112,121,115,117,117,114,124,116,116,110,114,127,128,116,97,98,103,97,84,81,82,101,92,74,99,102,98,98,104,120,128,138,130,123,128,125,111,105,125,120,107,96,91,94,96,88,80,76,74,80,70,67,69,66,68,53,52,67,93,143,170,162,136,142,171,219,253,253,252,252,252,248,181,110,44,7,6,11,19,26,46,52,53,55,55,56,57,61,67,61,55,57,56,57,57,59,60,54,50,59,53,54,57,53,51,51,55,49,53,53,51,53,53,53,50,54,47,50,52,48,46,52,52,50,49,50,49,44,47,45,47,42,46,43,46,50,45,45,42,39,43,41,42,43,38,42,38,35,38,34,36,35,35,36,35,34,39,41,36,36,35,34,34,35,33,33,34,35,36,35,35,38,34,30,36,33,31,32,31,30,32,33,29,35,33,27,35,29,32,27,28,33,28,33,33,28,31,29,27,29,25,29,28,28,28,27,28,29,29,24,24,27,27,25,23,31,22,24,24,23,29,22,20,21,23,27,25,19,24,27,21,26,27,23,25,47,42,22,29,26,32,31,28,35,30,26,33,36,31,29,24,28,31,25,26,29,30,36,43,71,115,155,162,159,148,134,108,68,52,43,36,29,29,29,28,22,27,26,26,27,27,23,46,78,69,46,22,18,24,26,33,34,36,36,37,33,36,48,50,51,42,39,39,35,34,30,36,34,29,34,34,31,31,34,39,26,20,20,21,19,19,23,19,22,21,23,24,23,22,23,53,53,31,21,21,20,19,20,24,18,20,27,20,21,24,19,22,23,20,31,39,61,89,131,146,107,71,47,35,31,27,26,23,21,24,30,53,80,116,111,100,104,108,151,185,186,171,159,161,165,163,163,170,166,156,152,159,161,158,165,162,163,171,163,160,165,169,173,179,179,169,139,105,95,113,153,183,182,171,168,160,156,159,163,162,162,163,169,172,169,169,174,179,177,170,160,150,151,148,150,153,154,161,162,166,163,158,160,167,170,168,167,162,162,155,151,148,143,143,155,168,157,148,151,151,153,163,165,166,176,189,198,200,200,200,195,188,189,166,136,110,71,59,49,76,87,21,4,12,10,15,13,13,14,14,14,14,14,50,50,48,49,51,56,56,56,58,56,59,49,37,43,46,66,86,92,96,97,100,100,120,106,106,102,97,58,57,48,34,27,32,27,24,20,27,27,21,20,22,26,21,22,27,27,23,21,25,24,18,26,20,21,25,18,20,24,26,26,24,28,24,21,25,26,24,24,21,22,23,21,23,17,22,22,31,50,83,106,118,133,137,136,130,122,95,63,40,28,23,28,33,31,27,22,27,27,24,26,19,21,24,20,27,30,29,34,41,69,109,141,160,168,182,203,212,198,191,184,180,179,165,162,160,159,167,154,148,154,155,159,160,156,147,141,139,142,141,135,134,139,141,133,124,109,105,118,136,137,128,130,137,144,140,140,130,120,131,117,128,124,103,114,119,128,128,123,108,98,103,120,127,131,127,132,130,123,127,107,125,105,122,115,112,77,98,103,83,83,80,82,99,94,91,113,95,115,105,119,136,126,122,126,128,110,103,101,92,114,103,115,114,108,112,116,134,110,99,92,79,86,94,98,99,92,89,97,108,100,102,106,111,118,104,105,113,101,88,99,114,114,116,116,111,81,71,86,106,86,91,94,101,100,107,126,130,126,120,130,146,148,150,137,112,103,98,103,110,111,122,119,104,96,107,105,99,89,91,105,103,91,74,66,65,65,64,57,64,63,66,109,168,123,110,110,125,136,167,177,253,253,252,252,252,252,202,133,69,15,4,9,14,35,49,56,53,56,57,62,63,57,61,57,57,57,56,55,56,55,56,57,57,56,53,52,53,53,52,51,54,56,54,53,53,50,48,52,52,47,45,49,51,54,54,45,47,49,47,45,45,45,41,43,46,46,45,41,43,38,40,42,43,43,37,39,38,38,39,36,35,38,39,36,36,36,34,35,34,35,39,35,34,32,33,33,33,37,35,35,36,35,33,29,37,32,33,29,27,39,29,32,33,29,34,33,30,33,29,29,33,28,30,33,29,28,28,29,29,31,27,27,31,25,27,29,24,27,28,29,26,23,23,24,30,23,29,26,22,25,26,24,20,23,23,22,22,21,20,25,21,23,21,22,27,21,35,49,34,25,23,21,27,24,33,30,32,33,29,34,26,24,30,26,30,29,28,25,23,37,29,27,34,41,55,99,100,167,167,159,151,144,137,114,106,60,45,37,31,25,24,32,22,26,29,47,77,67,45,30,22,24,37,39,33,45,51,77,126,137,98,56,47,45,40,34,26,31,31,34,31,31,33,26,33,29,36,27,23,30,22,24,18,20,22,22,22,19,21,19,21,42,56,35,22,23,22,22,19,19,19,22,20,19,25,22,19,27,36,43,63,88,128,153,130,88,54,42,31,29,30,20,21,21,22,25,39,60,83,106,109,92,104,99,130,175,182,177,157,155,155,155,158,157,159,159,155,153,157,158,160,163,167,163,161,160,159,165,159,157,173,178,178,160,134,105,88,113,154,175,178,177,169,169,165,169,169,165,173,174,175,170,166,173,177,178,170,161,143,139,139,148,152,162,169,169,169,167,163,166,168,169,181,185,173,162,156,154,155,156,156,167,179,162,150,152,151,158,157,155,158,160,179,195,196,198,198,193,187,187,184,171,155,121,102,84,88,87,20,2,11,10,14,12,13,12,13,15,13,13,56,55,53,66,64,72,77,80,82,71,57,39,30,34,45,69,105,115,103,98,92,76,55,46,38,32,26,26,24,21,18,21,29,21,23,23,24,31,24,22,23,23,22,24,25,21,21,22,23,24,20,22,19,21,20,19,22,22,22,22,22,26,29,27,24,22,27,24,25,24,23,27,23,23,23,28,33,53,78,100,108,83,71,52,51,58,46,40,27,19,16,24,27,29,28,23,22,23,23,24,23,22,24,23,27,25,29,37,65,152,212,227,217,201,186,175,174,161,153,154,149,154,154,146,147,136,128,131,133,140,146,145,137,139,120,107,110,117,129,131,129,128,133,123,119,117,109,108,120,137,141,138,140,137,131,139,125,113,126,128,129,127,124,136,136,131,143,148,136,138,145,153,155,145,148,144,144,140,141,149,144,147,152,146,145,128,113,112,114,127,126,129,130,136,135,137,130,127,143,149,153,139,127,120,105,103,99,108,121,125,124,117,102,95,105,118,116,99,93,105,118,123,119,118,125,114,116,127,130,136,147,146,150,148,145,152,144,140,128,125,131,111,113,126,114,105,101,113,117,118,122,118,117,124,122,112,104,93,96,103,101,100,105,96,75,75,79,82,92,103,97,78,74,81,91,96,97,91,89,91,82,66,57,55,57,55,55,56,57,54,41,35,39,57,75,116,144,112,86,93,141,204,253,253,252,252,252,252,239,165,96,34,5,9,22,36,51,51,53,59,61,63,64,57,61,61,55,59,60,56,56,55,50,59,59,48,56,54,52,55,54,53,53,53,50,51,48,53,49,53,52,50,50,47,48,49,49,45,46,45,43,45,45,45,47,40,39,44,42,44,43,41,40,42,43,41,38,35,36,40,38,33,38,36,39,38,34,35,28,38,34,34,36,34,33,37,38,35,37,31,29,38,34,32,30,30,35,31,34,33,33,33,32,32,34,34,27,32,29,32,33,29,33,27,28,29,29,26,28,28,21,31,27,25,31,26,23,26,27,28,25,26,27,20,34,28,23,26,24,24,19,26,26,19,25,24,24,25,24,22,19,19,23,25,22,17,27,27,22,29,27,24,21,26,30,27,25,30,25,23,26,27,26,32,38,30,27,25,31,31,27,28,27,30,27,31,31,42,62,86,125,153,170,166,173,165,156,147,141,132,117,93,71,66,59,60,92,125,117,94,65,92,124,135,155,167,196,209,212,198,133,64,53,51,47,37,23,30,35,30,28,32,29,29,33,33,28,31,44,41,39,36,27,28,21,16,21,19,22,21,40,51,38,23,16,22,20,16,24,16,21,24,22,35,39,52,66,82,122,137,149,129,84,66,43,34,30,27,29,21,19,21,21,27,36,50,70,85,94,98,96,97,100,117,157,177,170,159,157,157,159,157,157,156,162,167,160,163,165,164,164,167,165,158,164,160,161,159,152,154,161,167,169,169,171,147,107,92,111,137,163,182,178,176,174,174,170,166,169,170,169,170,165,169,177,179,171,162,155,155,154,159,169,174,177,177,176,171,168,165,165,175,184,184,170,158,157,162,168,163,165,178,178,162,150,154,154,158,165,159,151,156,181,198,205,207,203,191,178,184,182,181,168,155,152,132,136,99,15,1,9,9,13,12,13,12,13,14,14,14,73,89,95,109,118,123,124,87,123,103,61,30,22,25,27,35,37,36,36,27,30,21,24,23,18,21,21,21,19,22,21,20,22,21,22,24,24,27,26,28,22,24,24,23,27,26,28,22,20,24,21,26,22,20,21,19,24,18,21,27,26,26,23,28,24,27,28,26,31,29,29,33,33,33,34,37,39,40,39,41,39,32,24,31,30,37,40,30,22,17,22,23,30,32,24,25,25,23,21,27,25,19,21,23,22,19,21,20,51,121,164,174,160,134,138,109,112,107,110,114,120,125,125,120,110,120,115,103,109,117,129,132,131,137,134,118,125,117,131,128,137,136,124,122,131,134,128,107,101,115,116,119,110,114,118,125,106,96,111,129,129,134,123,117,107,94,86,94,105,115,110,113,113,99,101,87,85,80,91,107,122,100,117,108,105,105,97,97,120,122,125,122,120,128,120,127,127,101,128,125,93,91,98,96,87,87,124,114,118,118,131,112,88,84,86,104,95,105,128,141,143,136,116,112,117,99,118,118,123,128,128,121,121,134,124,121,136,137,128,135,130,119,95,114,126,115,117,132,132,133,122,117,133,121,113,110,106,107,97,95,84,77,83,84,68,91,89,84,96,89,87,93,98,105,88,94,86,75,72,74,69,59,53,61,61,62,63,57,67,60,46,48,30,46,68,105,145,171,122,130,101,123,159,202,245,253,252,252,252,252,250,187,90,35,6,10,29,34,45,60,60,59,61,61,60,60,56,54,57,53,53,51,56,60,54,55,54,57,54,54,53,51,51,53,53,45,47,52,51,51,54,49,42,49,46,48,49,45,50,44,48,42,43,45,39,43,40,43,45,44,40,40,42,40,40,37,32,39,35,33,35,37,39,36,35,33,32,35,32,34,33,33,39,38,34,35,32,29,33,30,36,28,30,31,31,35,32,32,32,30,27,36,31,31,33,27,30,31,27,33,30,21,32,30,26,28,26,27,28,27,29,25,27,26,27,24,27,30,27,27,27,27,26,22,28,27,23,18,22,21,24,28,23,27,23,23,29,19,20,26,20,25,22,21,24,21,24,24,25,24,23,26,27,24,22,26,26,25,23,31,24,27,27,31,34,22,33,32,27,38,30,24,29,27,26,32,23,26,31,31,34,42,52,60,81,116,137,152,169,172,177,174,173,178,167,158,155,177,199,183,184,201,198,202,193,191,172,153,136,101,51,31,35,35,42,35,32,29,31,34,29,29,28,30,30,31,32,34,41,48,51,55,49,48,40,40,41,32,34,53,65,41,31,53,52,34,35,39,45,75,55,68,88,88,142,146,146,137,100,68,53,75,34,33,27,29,22,19,24,22,21,22,30,46,56,76,86,92,96,97,99,100,113,145,177,177,165,167,168,169,169,168,164,165,173,170,165,162,165,164,162,162,162,160,160,163,163,159,155,159,162,160,164,172,183,177,147,118,93,97,136,171,184,184,175,174,171,171,173,165,169,167,167,169,169,171,164,162,170,168,165,169,171,181,181,177,177,175,170,168,168,170,176,171,162,159,161,167,169,172,170,170,171,160,155,155,152,160,162,160,162,173,197,214,217,210,203,190,178,182,183,174,169,163,165,159,157,107,13,1,9,9,12,10,13,13,14,14,12,13,123,137,134,134,122,113,96,73,47,33,33,22,21,22,18,23,21,24,21,22,23,19,23,22,23,23,26,22,24,21,18,21,22,24,20,21,21,22,27,29,24,25,21,24,27,21,24,23,24,22,22,22,19,22,20,19,22,24,21,28,30,27,27,31,34,35,34,35,41,41,40,35,39,35,29,32,27,28,24,27,29,23,23,24,22,22,21,28,25,21,24,23,33,36,31,33,35,33,34,30,28,24,26,25,22,24,29,34,46,74,96,123,130,122,98,81,89,89,92,104,116,113,100,95,103,108,108,119,112,105,115,120,129,147,142,141,141,137,154,158,165,163,155,144,144,144,133,132,111,101,93,81,90,83,84,101,90,86,101,105,112,109,92,96,100,84,77,77,90,93,79,81,77,63,67,47,45,55,49,48,45,47,55,62,70,76,77,77,76,81,76,88,87,89,92,84,81,65,46,33,36,47,63,68,80,97,105,111,103,99,93,88,77,73,87,95,104,109,110,118,113,94,84,78,76,77,90,112,114,102,93,83,82,80,76,87,77,72,78,93,103,79,81,106,113,108,96,97,100,106,107,101,100,103,103,108,119,111,96,87,76,62,69,75,65,70,77,78,76,71,81,81,80,79,79,84,68,57,57,63,61,59,70,69,68,68,76,78,62,66,64,62,65,57,70,87,128,160,186,191,168,137,104,109,130,167,216,251,252,252,252,252,204,139,86,49,32,17,19,23,33,33,41,49,51,56,56,60,56,53,55,53,58,61,54,60,54,50,50,49,57,53,50,53,46,53,52,48,50,47,49,50,46,46,47,51,49,47,52,44,40,43,41,44,45,46,41,42,45,42,43,44,39,39,41,39,37,38,39,37,36,34,42,33,37,34,34,29,32,40,35,40,36,32,34,38,34,35,32,29,33,35,32,30,38,32,26,32,32,30,34,29,29,30,30,37,31,28,29,30,28,25,32,29,26,26,30,33,27,23,24,27,23,29,28,24,28,23,23,24,30,28,21,27,25,21,24,27,26,22,25,27,23,22,21,21,24,19,24,22,27,28,20,29,24,20,22,26,22,24,24,24,22,25,28,25,29,22,24,26,24,30,27,29,24,27,36,25,54,50,26,28,26,28,30,32,23,25,29,29,33,29,32,27,29,25,28,34,43,52,56,69,71,84,102,107,124,131,122,131,145,148,139,101,71,36,35,36,19,42,65,37,24,22,35,39,23,35,28,25,33,29,33,28,27,33,33,31,23,33,45,49,60,63,62,68,71,66,103,121,101,98,86,92,96,103,117,136,141,130,144,146,141,120,83,63,52,40,41,29,27,27,22,27,19,22,24,21,21,25,25,42,61,71,72,86,95,82,100,102,94,112,149,179,181,170,167,173,174,170,173,167,163,164,163,165,158,156,162,160,163,164,162,163,163,160,163,163,165,165,158,160,162,168,177,175,170,153,119,89,99,135,162,179,181,173,171,169,169,165,162,170,168,164,160,157,158,165,168,166,169,170,178,180,176,178,176,180,177,171,171,172,171,168,166,164,169,170,177,177,170,167,166,168,170,162,154,154,158,159,157,170,193,207,213,208,198,185,175,179,178,174,166,159,168,158,164,111,11,2,9,9,14,10,13,13,13,14,13,14,75,61,41,35,28,25,18,39,17,22,24,19,20,21,20,21,22,26,25,19,20,28,29,28,29,30,28,24,23,18,19,21,22,21,19,22,22,18,26,23,24,24,23,22,26,24,21,27,23,24,22,24,24,22,24,21,24,25,25,35,40,40,37,39,44,37,39,41,34,33,34,29,26,22,21,24,22,22,31,26,24,24,22,22,22,29,23,22,29,22,28,30,41,54,68,86,91,90,86,78,69,64,63,69,66,105,100,93,95,105,122,135,137,112,109,95,96,89,93,103,108,97,84,72,78,98,102,103,96,89,96,105,105,103,94,101,118,112,116,108,134,134,134,129,120,120,118,113,115,112,103,100,104,100,99,101,101,102,115,109,106,112,103,122,137,141,138,133,126,110,128,129,107,100,122,130,136,131,98,68,69,69,75,91,96,105,107,94,98,93,94,101,92,83,102,75,68,83,53,87,94,105,111,102,112,102,88,112,102,86,87,92,90,96,107,122,106,104,91,94,96,96,87,77,82,82,99,108,108,106,103,101,95,97,90,80,67,78,69,87,90,87,102,112,106,92,79,71,85,88,98,97,82,86,94,92,83,97,90,77,81,74,80,72,70,68,68,87,77,76,94,79,84,81,72,80,62,67,66,63,68,64,64,74,63,69,75,76,64,54,60,63,62,70,55,33,85,87,139,162,199,176,153,124,102,107,122,160,212,251,252,252,240,210,172,141,104,66,43,25,15,11,13,14,24,35,42,53,56,57,60,59,56,57,51,55,56,53,54,54,56,48,50,54,51,52,50,50,50,51,48,49,46,49,47,48,50,49,42,43,43,39,44,44,43,46,43,45,44,43,41,41,42,42,36,39,40,37,40,33,35,35,37,39,35,37,35,30,32,35,39,35,32,33,32,35,28,30,36,31,37,34,33,29,29,37,31,29,29,32,27,31,30,29,31,23,32,29,25,32,29,30,24,28,29,32,26,31,28,23,28,27,29,23,27,25,27,24,23,28,26,24,25,28,24,23,21,21,24,23,25,20,20,22,22,22,19,22,22,21,21,25,24,22,22,22,24,17,19,20,24,24,24,29,24,22,24,22,24,24,24,25,28,29,28,31,24,39,59,39,27,29,27,32,29,27,27,30,29,30,26,28,31,30,22,24,30,29,28,28,28,27,23,29,32,28,36,35,29,32,51,63,74,67,53,46,47,23,47,145,122,47,26,27,41,36,29,30,32,33,30,31,31,31,30,32,31,27,19,22,25,24,39,46,41,60,71,63,76,88,93,95,100,70,72,91,91,81,76,80,53,52,39,68,33,29,31,29,23,22,24,21,19,24,22,21,24,27,30,37,53,64,74,82,76,90,90,81,98,92,102,143,181,182,165,157,154,153,160,159,153,149,146,147,151,150,157,157,159,163,160,163,160,162,161,160,165,163,168,165,159,159,158,156,159,167,172,171,147,111,93,94,124,158,167,171,166,165,166,159,164,167,165,163,160,157,156,163,169,169,167,169,167,164,163,162,165,167,165,168,172,173,176,171,169,170,175,177,174,171,164,167,173,173,174,168,159,159,160,154,155,167,173,191,193,194,192,184,178,180,178,171,170,160,162,157,159,111,13,1,9,9,12,11,13,11,12,13,14,14,18,20,21,20,18,16,17,19,19,22,21,24,24,25,22,23,21,23,26,22,26,26,29,40,44,44,47,47,35,26,21,18,23,20,17,18,23,19,25,24,26,27,19,24,27,24,24,27,21,24,29,26,29,28,30,34,34,36,39,38,41,37,34,35,34,37,29,27,26,23,23,22,21,23,22,23,21,27,29,22,28,25,27,25,22,28,24,23,29,37,52,61,92,135,174,202,207,206,207,216,205,191,188,192,195,189,169,147,137,135,128,122,109,105,114,118,124,124,121,117,125,128,113,99,92,97,94,96,86,81,100,104,101,85,76,89,97,90,75,57,60,70,74,79,95,96,94,106,112,123,126,123,125,116,118,136,142,141,132,116,119,117,115,136,146,145,137,124,119,122,131,135,131,129,136,139,151,156,136,111,104,105,110,116,128,137,131,115,122,139,137,144,137,138,124,95,99,116,137,153,158,165,156,131,138,140,130,120,109,105,101,102,105,118,124,106,97,92,79,79,81,78,91,98,99,92,95,105,103,113,114,107,101,88,94,104,101,96,91,102,116,115,110,107,93,91,92,81,79,87,100,103,101,98,90,87,85,84,89,95,100,89,94,87,81,91,96,99,100,111,108,90,89,87,84,84,76,83,80,81,78,67,59,57,63,61,66,70,63,65,63,70,68,61,59,48,57,59,75,103,119,147,160,173,179,152,117,105,117,141,182,196,185,193,194,190,181,161,151,150,146,111,66,27,7,10,12,15,26,41,51,49,52,58,53,55,55,50,57,54,52,51,53,52,50,54,48,47,53,46,50,47,46,48,46,49,45,48,45,44,48,39,45,41,48,48,40,47,38,43,41,39,39,37,43,44,42,36,33,34,31,35,34,34,34,28,39,37,34,31,31,33,36,34,35,30,32,36,36,36,30,29,30,31,29,33,29,25,29,32,32,31,30,27,28,31,27,29,31,23,30,31,29,29,27,31,22,24,29,24,26,27,27,24,25,27,28,23,22,26,23,23,24,26,24,24,24,25,24,19,25,22,20,23,22,21,21,25,24,22,22,21,24,21,19,25,24,23,20,22,22,18,24,23,23,22,27,24,24,28,26,27,24,33,25,24,27,22,30,27,24,32,29,37,39,29,29,27,32,30,27,31,30,24,30,29,22,24,27,29,26,27,31,30,28,30,25,23,25,26,26,40,63,68,77,77,95,110,169,226,150,53,38,41,48,44,42,48,47,49,46,48,43,36,40,38,44,33,22,17,22,24,21,31,37,45,39,32,33,29,35,40,29,34,34,29,33,30,29,27,27,25,21,26,19,21,23,18,25,23,22,25,25,20,24,24,25,41,50,63,65,79,84,72,77,86,88,87,95,92,132,177,184,174,157,155,148,147,152,145,146,145,144,152,152,160,165,160,164,161,161,163,160,161,159,156,160,160,160,152,147,151,148,152,156,160,170,168,163,145,121,96,92,119,140,162,176,169,160,147,151,154,157,159,158,155,155,162,165,162,162,158,149,149,148,141,134,142,150,158,164,163,167,165,158,162,169,166,165,166,162,160,162,159,159,155,155,162,155,152,152,159,168,175,188,195,194,190,188,190,184,180,176,169,169,156,166,113,11,2,9,10,12,11,13,12,12,14,14,13,21,21,17,19,19,21,17,17,20,20,21,24,25,21,21,27,20,18,21,24,27,29,33,49,57,61,69,66,52,35,20,17,21,17,21,16,21,25,22,26,27,29,26,22,29,29,29,27,30,36,35,37,39,42,39,42,33,34,35,29,38,28,29,25,22,24,21,23,24,20,24,23,20,18,21,23,24,25,24,24,29,29,23,25,24,24,29,26,44,64,67,137,96,162,132,144,153,153,163,155,194,139,158,149,138,143,125,118,117,111,101,88,98,105,124,141,145,147,133,129,128,122,123,120,117,108,114,122,109,101,95,93,92,96,90,92,96,97,74,78,75,84,93,91,98,101,101,104,110,116,118,104,115,117,123,131,136,123,128,119,112,116,110,118,116,117,106,97,90,88,98,95,88,85,92,105,114,134,128,112,125,127,122,114,110,111,108,98,110,123,131,123,136,131,109,96,108,118,130,119,121,125,118,104,110,113,111,96,95,93,93,93,84,90,105,87,84,84,78,84,78,90,108,116,108,96,100,100,99,105,100,93,98,84,95,92,99,103,110,110,114,116,120,110,102,109,101,94,90,95,94,97,94,88,84,83,78,89,82,82,83,86,93,91,93,95,89,91,94,83,93,80,86,83,81,78,74,81,87,81,71,71,59,60,61,61,61,64,65,66,69,59,64,66,56,54,52,44,35,61,51,108,142,175,193,184,171,138,101,94,123,125,122,137,148,169,180,191,208,249,252,252,231,180,133,87,37,8,9,11,13,21,35,43,49,56,57,58,55,52,51,51,54,52,50,49,47,52,47,49,50,48,46,46,44,48,48,44,47,45,45,43,43,42,41,42,46,42,39,39,37,41,37,44,42,36,33,37,35,33,37,35,39,37,33,33,33,33,34,31,31,32,34,35,36,34,31,35,31,30,31,31,29,31,33,29,32,32,27,28,29,33,27,29,29,27,28,29,27,31,33,23,30,29,26,24,27,25,22,26,28,25,26,27,22,28,29,25,22,24,23,26,28,23,26,23,22,22,21,22,20,20,27,24,25,25,19,24,24,21,20,17,20,25,23,24,24,19,24,22,21,19,18,23,22,22,26,24,25,29,28,27,24,26,25,28,24,23,23,27,30,29,26,39,35,27,35,33,27,29,27,29,25,22,27,27,27,27,28,27,27,24,24,30,27,23,25,29,24,25,24,36,40,45,59,68,69,68,123,125,68,46,41,39,51,75,109,98,98,112,103,90,78,81,89,90,73,48,27,26,32,30,38,37,39,37,21,23,21,19,25,24,27,25,23,22,22,19,21,22,24,19,19,24,21,20,21,29,23,23,24,22,24,25,28,30,42,55,62,66,68,78,76,69,74,95,89,83,96,113,161,184,181,168,159,162,155,152,160,158,160,158,153,159,164,166,167,163,164,161,165,163,160,160,156,155,156,155,147,143,141,143,148,154,157,156,158,161,162,158,152,121,93,88,103,147,166,171,166,152,150,153,153,152,156,158,155,156,154,154,152,150,151,146,145,139,137,141,143,151,154,149,145,145,149,155,159,159,162,161,160,157,158,156,153,152,155,160,158,157,159,164,165,171,183,194,200,199,198,191,188,184,181,172,171,164,169,113,11,2,9,10,14,10,14,13,14,14,14,15,21,21,18,21,17,21,19,19,18,18,21,20,27,23,19,21,21,21,19,23,25,27,41,51,60,63,76,77,54,32,19,22,17,21,22,19,23,19,31,33,30,33,33,37,35,39,37,35,39,40,38,37,36,31,31,26,23,19,19,27,30,25,22,21,22,26,26,22,23,21,22,22,21,23,19,21,29,21,21,30,23,25,29,23,29,31,30,33,49,63,71,79,74,65,66,62,48,44,54,67,66,64,54,39,43,60,74,90,103,97,102,106,95,96,97,90,89,83,81,63,62,84,94,105,110,118,123,130,135,121,92,84,103,112,104,101,100,108,118,111,110,124,123,109,103,107,102,95,90,76,70,77,89,90,91,90,86,94,114,115,99,89,76,75,79,78,79,81,73,78,87,83,78,76,74,78,87,94,98,97,107,104,101,106,92,86,84,78,80,86,88,90,86,91,86,83,83,78,83,71,73,76,71,77,74,69,72,72,83,81,82,87,86,89,89,88,88,94,90,93,101,101,110,111,114,110,101,101,94,97,93,89,101,97,99,99,105,108,106,109,110,107,103,106,108,105,102,108,111,104,105,101,95,89,96,97,96,103,101,101,97,88,101,97,98,102,102,102,89,89,80,73,86,83,81,75,72,78,80,84,85,79,71,68,63,64,72,71,68,61,63,70,69,68,64,61,59,59,49,41,50,68,83,90,103,131,137,128,139,159,171,130,79,74,95,114,132,140,162,228,253,253,252,252,252,252,201,155,103,60,21,6,11,12,17,19,40,45,48,53,50,53,50,49,51,55,54,57,49,46,48,46,47,48,46,44,47,54,49,45,43,44,44,43,41,42,43,40,41,39,39,39,35,43,35,32,38,32,39,32,32,37,33,35,36,37,36,29,34,28,29,35,33,36,36,32,33,29,33,35,29,32,30,29,31,36,30,31,31,29,34,31,29,25,26,29,29,30,31,24,29,28,27,28,23,27,26,26,27,25,30,24,25,24,28,28,19,26,29,20,24,29,19,27,19,25,24,19,24,21,25,22,24,24,24,22,20,21,23,23,23,19,23,23,21,19,20,23,18,24,22,19,22,21,21,24,22,25,26,20,27,23,24,27,28,25,18,27,24,26,27,27,27,27,29,28,29,31,35,30,25,29,24,29,31,26,30,25,30,26,31,26,23,30,27,25,24,27,27,27,31,38,38,32,40,39,29,27,43,49,34,36,34,33,48,106,155,141,143,151,145,132,120,134,149,150,141,91,36,27,31,29,32,28,21,24,22,23,23,19,24,20,22,28,26,21,21,23,22,22,19,23,23,19,23,22,22,26,24,20,24,31,31,39,42,47,55,52,66,72,70,84,71,70,96,89,84,93,122,164,184,177,168,167,166,165,157,162,166,162,163,162,158,162,166,165,165,163,165,162,165,160,160,162,161,160,163,158,155,155,153,157,156,165,164,160,160,157,166,165,161,153,126,102,86,105,142,162,175,172,168,162,158,154,156,162,159,155,159,154,150,149,148,155,157,155,152,155,155,158,156,149,147,151,151,157,161,161,164,163,162,160,162,156,155,157,158,166,163,165,165,167,172,175,179,187,194,194,192,189,188,184,181,174,173,165,173,114,10,2,8,10,12,10,13,12,13,14,13,13,18,21,21,19,21,17,17,19,18,19,18,19,24,21,21,21,26,21,19,23,21,29,34,50,57,63,71,66,51,30,22,22,20,20,19,25,30,30,34,38,42,43,39,39,44,38,37,32,33,28,23,23,22,26,19,22,24,19,24,27,27,26,23,24,23,23,24,24,23,25,23,24,21,20,23,17,19,24,20,18,25,27,33,34,30,43,42,36,39,36,49,50,58,57,52,50,44,48,46,59,71,73,84,93,96,97,88,118,133,104,132,113,103,94,89,66,70,83,81,81,87,90,116,120,131,137,136,131,129,132,98,87,96,124,131,109,106,130,118,108,97,96,84,74,75,84,83,64,56,57,62,77,60,53,49,56,78,55,57,91,72,59,61,85,71,77,97,89,81,94,78,72,75,82,83,83,87,95,88,79,83,86,95,105,109,105,83,80,76,71,78,86,83,80,85,91,83,76,69,67,68,81,77,89,88,84,93,96,105,98,99,97,99,101,99,101,105,113,104,104,107,106,108,106,107,103,110,106,105,106,109,114,109,103,105,105,124,120,119,117,111,107,105,108,97,109,106,116,114,115,121,118,122,121,117,120,115,112,118,108,107,99,97,100,95,101,94,93,91,82,90,87,89,81,81,83,71,81,96,99,83,76,63,62,66,73,78,66,73,73,77,71,74,69,71,69,67,66,62,55,46,46,39,38,36,34,39,73,110,152,177,130,84,66,67,68,59,71,94,154,230,252,253,253,252,252,252,252,252,220,175,128,83,46,12,9,11,12,21,33,43,45,50,50,54,50,52,52,46,46,45,46,45,47,43,44,48,55,55,42,42,42,39,44,42,38,41,39,42,39,34,38,39,39,37,37,38,37,36,38,34,35,39,35,30,31,35,38,35,33,34,36,34,31,29,34,34,25,31,30,32,33,33,36,27,32,32,34,34,27,24,29,29,26,30,29,24,23,27,23,30,32,24,29,27,30,25,26,26,25,22,26,29,20,22,24,24,26,29,25,25,23,21,24,26,20,24,21,20,29,24,22,23,22,19,24,21,21,21,23,24,16,26,22,19,24,19,19,23,22,19,19,24,19,21,19,19,24,24,27,23,27,22,28,23,29,54,35,25,23,25,26,28,37,32,32,30,36,42,26,27,33,27,27,24,28,31,26,23,28,28,26,24,24,30,28,28,25,24,29,29,38,40,39,36,36,31,23,30,30,29,33,32,39,46,81,142,136,137,142,138,134,124,126,130,153,157,111,37,14,20,19,24,20,23,27,18,24,27,21,27,19,20,24,20,22,24,23,24,25,24,24,21,21,24,20,21,26,25,22,31,36,39,48,48,48,55,65,75,68,71,78,75,88,91,81,92,122,167,184,177,170,162,160,162,165,162,164,168,168,171,166,157,160,159,161,165,160,163,163,166,160,160,163,160,162,166,164,166,167,163,168,166,167,164,163,167,165,165,165,169,164,155,137,103,88,97,131,166,180,179,173,168,163,162,160,158,159,158,151,148,152,158,158,159,164,165,158,161,166,160,158,159,159,157,157,161,162,160,162,167,162,162,162,165,162,162,171,171,168,169,172,170,176,179,186,186,184,188,181,181,176,173,170,172,162,170,116,10,2,9,10,12,11,14,12,13,14,14,14,23,20,19,19,19,20,19,18,22,18,20,18,18,23,21,24,21,25,20,22,24,24,35,44,48,56,59,63,49,29,29,24,27,27,31,31,36,36,34,42,39,34,36,32,29,29,27,23,22,24,23,20,23,23,24,23,20,23,22,26,27,25,24,24,24,22,23,24,24,25,22,21,23,18,16,17,21,21,19,19,23,29,38,37,30,38,37,39,39,31,37,49,52,49,49,52,45,44,52,63,76,90,92,105,120,109,98,87,91,93,87,105,100,91,81,71,81,85,101,102,102,96,87,91,83,87,88,74,69,84,92,82,80,83,96,107,104,108,109,98,82,68,76,80,80,86,80,85,86,92,96,90,84,76,80,80,85,83,83,79,82,94,98,106,106,114,120,121,116,113,107,95,95,100,103,111,105,107,106,97,99,93,96,104,110,113,101,97,95,92,98,97,94,100,101,104,104,108,110,99,98,98,97,107,104,107,104,100,110,104,108,108,105,103,103,112,113,114,112,107,110,112,111,105,108,107,104,111,116,119,116,109,116,116,118,120,128,131,123,120,112,116,120,113,111,107,113,122,116,117,122,120,123,117,118,111,108,115,104,99,99,102,106,97,96,93,94,92,89,103,105,97,98,95,104,110,103,96,95,90,87,76,69,71,64,67,65,67,80,77,71,67,72,65,62,69,61,57,56,55,54,52,49,41,38,34,33,30,46,80,107,110,101,89,77,60,46,43,48,70,123,159,193,225,238,253,253,253,252,252,253,253,247,211,170,122,86,48,17,8,11,11,18,36,34,41,47,44,35,42,47,39,49,46,47,48,45,50,49,50,40,43,41,41,41,38,46,39,34,33,38,39,34,36,37,41,34,37,38,36,35,31,34,32,31,33,36,29,37,31,30,33,27,32,32,32,34,32,30,35,31,32,27,33,29,33,28,26,32,24,34,25,29,29,25,27,27,26,27,33,23,22,29,30,25,25,24,23,21,25,24,24,26,23,26,27,28,23,22,25,21,19,21,29,19,24,26,21,27,16,19,21,21,20,22,23,21,24,23,22,19,22,23,22,21,26,22,19,23,20,21,19,16,21,22,21,22,19,18,23,24,30,27,30,22,50,61,25,24,24,24,23,30,27,33,32,30,34,23,30,28,28,27,34,33,26,28,28,33,32,26,25,28,27,28,29,33,37,33,27,32,37,39,42,38,37,29,24,28,35,35,35,29,41,45,68,119,125,124,126,132,129,119,101,115,141,156,119,39,14,14,14,23,24,24,22,19,26,23,21,23,24,17,21,24,18,26,22,23,23,23,24,19,24,20,25,27,19,28,29,42,44,42,52,50,56,62,69,78,64,76,90,76,87,88,90,113,151,181,173,166,156,146,145,153,163,164,167,171,169,172,167,158,162,156,163,162,160,162,159,162,160,161,162,162,163,166,167,167,165,167,168,168,168,166,170,168,166,168,167,165,166,169,166,139,104,84,91,128,159,179,181,178,173,165,164,155,153,155,151,153,157,163,162,162,161,160,160,158,160,158,157,158,153,150,153,161,160,158,158,155,155,159,163,170,168,165,169,167,168,166,164,167,169,173,177,176,175,174,170,172,166,169,168,170,166,174,115,10,0,9,10,12,11,13,12,13,14,14,14,21,22,16,19,17,17,20,19,18,16,22,20,21,24,19,21,21,23,27,23,20,25,31,36,39,43,47,49,42,31,42,30,31,37,28,36,26,32,28,24,31,24,28,23,19,23,24,22,20,24,23,19,23,22,22,21,20,21,22,24,30,28,23,29,26,19,23,24,24,23,17,23,23,17,18,21,22,17,21,18,20,29,29,31,29,27,27,31,32,31,32,32,33,35,38,39,38,36,36,37,37,40,42,40,39,45,67,37,39,39,33,65,41,40,42,40,39,47,49,53,57,78,59,46,33,38,46,35,38,34,74,35,71,71,59,69,91,84,90,89,88,83,93,92,98,96,93,96,94,109,112,105,104,106,105,102,111,100,111,105,103,99,110,111,116,110,105,117,117,112,112,101,109,112,112,104,105,109,106,107,103,99,98,97,103,113,107,119,116,118,121,115,107,107,113,118,110,110,107,115,115,116,119,112,108,105,107,112,113,115,116,111,118,117,114,119,122,123,122,125,120,124,117,114,122,116,118,110,114,111,110,116,113,122,122,116,117,115,116,116,120,114,124,124,118,116,114,117,118,117,110,107,110,107,105,112,116,114,110,110,116,111,109,107,101,102,97,97,102,99,102,98,104,110,107,108,95,89,91,85,89,86,75,79,72,79,69,67,77,71,70,73,70,69,64,63,58,52,52,55,55,53,49,51,53,57,63,48,49,63,82,85,81,83,85,79,69,62,51,48,68,121,103,125,147,178,196,218,252,252,252,252,253,253,252,252,250,195,152,122,84,44,14,9,56,12,24,36,36,40,47,46,42,44,46,46,42,45,49,46,44,44,42,42,42,38,40,41,39,39,36,39,35,40,39,38,35,31,34,42,36,35,38,25,38,35,33,32,29,37,27,27,29,26,32,29,29,33,30,33,35,28,30,35,29,30,28,28,30,27,29,29,29,27,27,25,27,24,30,29,24,25,33,26,24,23,22,26,17,22,24,24,24,26,29,26,24,27,24,20,23,26,25,20,24,24,21,24,21,19,21,19,22,21,23,27,23,24,18,19,24,19,23,23,22,21,21,24,21,18,23,18,19,22,19,24,18,19,21,21,22,20,24,28,24,31,35,21,24,25,24,28,20,29,23,30,29,21,28,27,29,30,38,34,28,28,30,34,35,24,29,26,25,31,27,34,40,41,27,22,33,38,43,42,34,36,27,23,28,32,32,31,37,36,44,53,94,105,101,107,115,113,102,97,106,130,147,111,39,15,17,16,25,24,20,24,20,19,22,25,24,21,24,26,19,23,22,24,27,23,19,23,21,20,23,24,22,29,34,39,44,42,47,51,54,57,66,72,66,67,84,89,79,83,88,108,149,164,167,165,160,148,138,139,151,162,156,160,161,162,168,163,162,164,157,159,157,154,158,159,160,157,162,163,160,164,162,159,157,159,157,159,158,158,163,160,163,160,159,162,164,164,168,169,159,144,114,85,91,119,154,177,181,177,168,164,160,157,161,156,159,162,162,165,160,162,160,155,160,160,157,160,156,156,157,154,153,152,151,150,152,153,154,156,163,160,159,160,157,159,157,156,158,163,160,167,165,163,164,159,164,163,165,164,168,161,174,116,10,1,8,10,13,11,13,13,13,13,14,13,18,17,21,18,19,19,19,17,21,20,21,23,24,24,24,24,22,25,28,29,30,29,32,34,32,33,39,34,37,33,29,34,24,21,27,24,22,21,28,26,23,22,19,25,24,25,27,24,21,23,21,21,23,22,24,19,24,21,24,23,26,28,21,23,20,20,24,20,17,18,20,17,19,17,20,18,20,18,18,21,22,31,40,49,46,48,53,59,61,59,57,54,55,52,48,43,41,43,40,45,51,52,46,42,42,42,46,51,42,41,38,41,47,37,40,45,46,48,52,53,59,69,85,81,75,83,83,87,80,79,78,74,82,79,88,85,88,103,99,98,98,100,106,105,108,110,108,108,102,105,120,128,123,117,116,108,111,113,118,120,110,116,122,125,119,113,100,98,106,112,112,107,117,117,114,111,108,114,111,105,105,110,112,111,113,118,117,118,127,117,127,120,117,125,119,120,114,116,114,114,118,123,120,122,112,113,121,118,125,118,120,122,120,121,120,122,124,125,127,122,123,124,126,127,125,125,117,111,111,111,110,107,112,117,117,111,107,108,111,113,116,123,129,123,125,124,115,112,100,103,112,115,119,119,118,117,118,119,118,114,117,114,106,100,104,98,94,92,99,100,87,94,100,98,87,74,86,92,89,79,74,73,74,87,88,83,75,68,75,90,83,73,74,81,77,77,66,56,59,53,57,57,55,60,69,81,90,72,69,77,72,67,60,58,72,93,97,99,96,114,132,111,107,101,88,104,111,126,155,187,218,239,253,253,252,252,252,252,252,252,244,213,184,153,114,90,37,6,10,11,23,33,38,42,39,44,44,42,45,51,47,45,40,40,39,35,37,39,33,36,39,37,41,42,46,41,31,37,35,33,35,33,36,36,33,29,28,32,36,33,29,34,28,27,28,31,33,24,35,33,22,33,31,30,31,28,31,33,32,21,29,29,26,26,22,29,23,23,34,22,20,26,19,29,24,23,24,17,29,27,24,26,25,25,21,24,22,29,22,24,25,21,28,25,24,24,22,19,21,23,22,20,24,19,22,23,19,24,19,20,22,21,22,20,20,19,23,21,16,23,19,23,24,20,21,21,21,22,22,21,19,20,23,19,22,24,27,29,26,23,23,26,24,25,25,26,20,30,32,25,27,33,39,28,23,31,34,32,37,31,24,31,28,29,31,29,32,29,24,26,33,39,41,37,36,37,27,21,27,32,33,34,35,35,39,48,63,85,90,95,98,89,93,85,104,107,127,107,36,24,17,17,24,22,23,20,23,21,22,25,18,28,22,23,24,17,26,21,21,23,24,22,20,27,24,29,25,33,42,35,47,44,49,60,56,59,64,71,73,72,91,81,76,92,103,149,171,163,167,163,163,163,152,153,159,154,147,147,155,157,163,162,160,166,159,160,159,155,157,157,154,153,163,162,160,165,160,157,160,151,148,148,144,144,147,148,150,155,152,151,153,158,160,166,171,171,155,122,93,83,116,145,169,175,172,171,162,165,162,159,162,164,159,160,162,159,160,162,161,163,165,169,165,164,167,161,155,153,158,155,152,155,155,154,155,154,150,148,144,147,150,151,153,152,153,158,157,159,160,158,162,163,165,165,167,162,170,113,12,2,8,10,13,11,13,12,14,15,13,14,22,20,17,23,18,19,19,22,20,21,24,20,31,28,28,34,33,33,34,36,37,38,36,35,35,31,32,29,24,25,24,20,22,19,17,21,22,24,24,20,25,22,23,29,23,23,27,22,24,23,22,20,19,23,19,21,21,18,24,21,22,24,18,22,19,17,21,21,19,21,18,20,17,17,18,19,20,16,24,23,21,31,45,54,63,63,74,89,62,64,64,60,59,57,63,57,58,61,66,73,66,53,55,46,42,37,53,52,49,46,53,59,57,52,57,57,62,70,54,47,47,72,97,109,103,108,113,104,114,122,134,130,129,107,113,115,107,111,107,108,107,110,109,112,118,114,117,113,107,113,115,118,117,113,115,108,107,107,112,117,118,117,124,131,127,116,116,113,117,124,124,124,121,122,128,122,118,118,116,110,115,115,123,126,120,123,113,117,117,118,119,118,129,125,132,124,122,124,116,122,118,113,128,126,126,121,119,126,122,124,123,119,126,130,128,127,125,123,122,118,114,125,125,122,125,122,122,117,119,121,117,120,119,121,122,123,122,121,123,123,127,113,119,120,117,122,119,114,113,120,118,122,121,118,119,121,122,117,110,111,108,116,108,109,105,103,92,99,98,89,88,86,89,93,84,84,87,86,92,85,92,94,82,85,93,77,72,66,76,73,66,66,70,76,77,75,68,62,60,59,63,63,63,63,70,62,79,70,74,81,84,71,59,59,71,81,81,87,98,104,117,93,111,105,107,105,88,71,76,89,111,137,168,194,211,235,252,252,253,253,252,252,252,252,199,228,154,120,108,64,25,8,9,17,24,28,34,41,40,46,50,43,36,40,40,38,38,39,39,36,39,41,34,36,37,42,37,34,35,34,39,33,34,34,33,35,30,33,29,30,31,30,33,24,29,29,24,30,25,28,30,25,31,34,30,29,32,29,25,26,24,27,24,24,27,21,32,27,29,22,21,19,24,26,26,25,27,28,23,27,26,27,21,26,26,19,24,21,21,25,27,20,22,22,19,20,23,21,25,19,23,26,18,20,22,22,21,19,19,22,21,22,23,19,20,21,22,19,21,21,22,20,17,17,21,21,18,19,21,22,19,24,23,18,22,18,24,24,25,27,24,29,26,23,26,24,27,34,30,25,27,25,28,27,32,32,31,42,42,34,29,28,29,29,32,32,28,29,25,32,35,39,44,36,38,35,25,24,31,32,39,32,34,38,42,43,67,104,112,114,108,97,95,95,109,110,142,126,50,23,12,16,21,20,20,19,24,24,18,23,24,20,21,26,21,20,20,21,24,20,21,25,21,24,30,29,31,33,33,39,51,46,57,58,59,62,74,75,75,81,83,88,93,107,144,171,174,162,159,163,169,165,163,165,163,161,150,153,159,158,159,159,162,167,166,168,162,159,156,160,162,157,163,164,164,163,158,159,164,158,157,156,145,148,151,146,152,156,156,155,148,153,162,167,177,179,174,160,137,103,87,104,135,165,177,179,168,162,165,161,165,161,159,160,159,168,169,167,171,172,169,173,170,171,171,163,160,156,161,166,157,158,155,155,163,158,154,150,149,148,152,155,152,151,153,161,159,158,162,160,160,162,164,160,163,153,169,115,10,1,9,10,12,10,14,12,12,14,14,14,21,20,22,21,23,20,24,27,22,29,29,27,33,35,33,34,36,33,34,30,29,27,21,27,26,21,23,22,26,20,17,21,18,19,18,18,18,22,21,26,24,22,23,23,23,17,23,22,21,24,19,19,18,18,21,18,17,17,24,20,19,21,17,17,17,17,17,17,19,19,19,18,16,17,17,18,19,19,19,18,18,29,39,53,57,50,45,42,49,53,56,55,52,54,58,57,63,67,70,69,62,57,59,54,50,50,49,50,51,49,51,59,61,62,64,66,71,65,57,51,51,53,66,81,90,98,86,77,83,105,116,121,122,125,132,120,117,117,114,118,116,117,112,107,116,113,106,111,113,112,117,117,111,112,113,110,112,110,117,122,114,120,117,117,120,117,108,115,128,137,138,122,116,115,122,127,123,116,115,116,124,124,126,124,120,119,118,112,110,109,116,112,110,110,111,125,109,111,117,118,123,118,110,112,112,115,118,123,131,127,129,127,131,132,134,128,117,124,124,115,121,121,125,124,119,122,122,122,125,127,129,129,136,134,128,135,126,124,130,122,115,106,111,118,119,122,125,129,134,127,116,116,115,118,124,127,127,127,125,124,120,122,126,121,119,110,117,109,103,89,74,76,81,93,89,86,91,81,83,85,93,105,90,83,76,70,69,67,64,60,60,64,72,74,71,74,72,71,71,68,68,65,58,53,54,59,66,65,74,77,76,74,61,46,54,64,57,57,59,78,80,62,84,112,138,149,134,115,89,66,53,53,64,83,108,136,162,184,205,220,237,246,253,253,253,253,220,248,250,211,177,149,147,114,55,30,12,9,14,21,37,38,35,32,36,33,37,42,34,34,36,38,32,33,39,41,33,34,39,34,35,32,32,35,32,32,32,31,29,37,32,27,29,29,25,27,28,27,24,27,29,29,29,29,30,24,23,28,32,27,27,24,24,31,26,27,26,22,25,25,29,27,26,23,27,29,25,23,28,23,19,26,23,24,25,24,23,25,24,23,21,23,28,21,19,21,25,26,23,22,22,19,20,21,20,21,21,22,23,21,23,20,19,24,19,19,20,19,23,20,18,22,22,22,21,19,18,18,21,20,19,21,19,20,19,23,22,16,19,24,22,24,27,24,27,24,37,35,29,36,32,26,30,31,31,31,42,53,36,33,29,32,27,27,31,27,27,26,32,29,35,39,41,41,39,42,27,24,32,34,38,34,37,39,42,43,77,137,145,135,135,118,113,109,127,134,167,150,63,27,11,14,21,22,24,18,24,25,19,23,24,23,19,19,25,21,23,24,22,21,21,23,27,27,22,27,34,35,37,43,50,54,56,56,63,68,78,79,78,87,96,92,105,152,173,179,169,157,160,160,165,169,162,162,160,164,168,165,165,160,159,164,167,170,166,167,163,160,155,161,165,163,171,165,160,155,149,148,159,162,166,163,160,160,156,153,158,165,164,159,157,160,161,161,165,172,173,176,170,136,104,84,101,132,163,182,174,169,166,166,167,160,158,159,162,164,165,169,167,166,164,161,159,162,164,158,154,159,166,165,161,159,156,162,170,163,163,163,158,160,164,165,163,165,168,174,175,170,165,165,161,160,162,154,154,148,163,114,11,1,9,10,12,11,13,11,13,14,14,14,24,22,25,27,33,26,32,33,27,34,33,34,33,31,33,27,26,24,27,23,19,22,19,19,22,19,23,22,20,18,23,23,18,21,18,17,20,24,27,20,25,24,19,22,22,24,19,18,20,19,18,17,20,20,17,21,19,19,20,22,21,16,18,18,19,18,18,17,20,21,19,21,18,20,18,18,19,18,19,18,19,33,37,51,54,45,48,57,56,57,54,54,63,57,74,65,66,66,89,89,89,85,76,63,76,66,59,63,66,72,70,83,89,86,87,88,103,99,90,82,68,72,79,77,79,86,85,86,71,95,76,88,112,108,128,124,125,124,127,126,125,123,119,120,119,118,119,115,122,121,128,131,126,118,115,121,125,123,125,125,118,122,119,115,117,122,122,118,126,127,129,120,118,124,121,123,126,120,122,127,125,129,128,121,118,122,120,123,124,120,118,118,115,106,108,107,122,127,121,127,126,111,107,110,113,110,115,125,126,130,122,115,123,126,128,121,118,129,126,118,117,121,123,124,124,120,125,124,123,129,125,134,134,128,112,109,114,109,111,110,120,123,123,121,118,116,120,117,121,128,120,124,120,130,127,128,135,129,131,132,131,134,141,138,134,138,136,129,120,102,92,93,96,96,90,82,77,72,63,63,87,95,94,96,78,83,89,83,82,79,84,73,92,93,88,76,68,73,70,78,79,64,59,54,60,57,57,60,57,61,61,54,48,49,42,46,54,52,51,66,71,64,92,120,139,156,148,156,154,143,127,108,90,91,84,89,101,127,118,130,150,165,189,215,236,210,217,253,253,253,252,252,252,241,221,194,153,115,82,112,26,9,16,19,24,36,33,30,35,37,33,33,32,34,36,36,39,35,34,27,34,32,33,32,30,34,33,33,28,28,25,27,30,30,27,24,26,27,30,26,24,24,21,29,23,25,27,23,25,27,29,24,28,26,24,24,26,27,22,25,28,24,30,25,25,25,20,24,22,22,27,24,23,27,28,24,24,24,24,23,26,22,24,20,21,27,20,19,20,17,22,24,21,19,19,22,21,22,20,21,21,18,22,20,21,21,17,19,21,22,21,21,21,23,21,21,21,19,24,19,21,19,20,19,18,21,20,21,19,23,24,24,25,25,23,36,33,24,30,29,29,29,35,27,30,31,34,35,29,35,32,33,35,31,26,27,30,33,31,24,30,41,42,45,41,40,27,23,29,40,40,32,40,36,38,41,63,113,116,123,116,98,98,103,112,109,146,130,55,27,15,16,24,21,20,22,24,21,17,24,20,21,21,20,25,23,22,19,20,25,23,23,29,29,27,25,31,39,40,45,56,54,56,62,64,70,80,80,86,108,89,105,149,174,180,172,171,164,159,165,172,162,156,157,156,164,165,162,163,155,158,165,155,160,163,161,156,155,152,156,160,160,166,162,160,149,136,141,159,162,165,166,161,165,160,157,165,169,163,159,148,146,145,141,148,151,157,163,172,163,137,103,84,98,127,163,177,177,173,170,173,163,162,161,158,162,155,158,156,151,154,151,151,155,156,154,158,161,167,166,159,162,162,164,167,163,164,160,163,163,168,170,168,171,175,180,177,172,170,162,159,160,162,156,157,148,163,113,12,1,8,10,13,11,12,12,12,14,13,13,30,32,37,33,35,36,31,31,30,28,25,23,25,27,22,18,19,20,17,18,22,21,18,22,19,20,21,20,23,19,21,21,18,19,20,19,22,24,24,22,18,22,22,21,18,21,19,16,21,18,16,17,18,17,17,21,21,26,24,19,23,19,19,19,17,24,21,21,19,21,21,19,20,20,19,15,22,19,19,20,21,30,36,49,45,46,54,61,71,67,76,83,79,79,89,109,112,105,106,107,109,104,89,68,62,51,46,51,54,66,75,95,106,103,104,102,118,124,120,99,93,91,93,92,87,106,107,96,87,87,77,89,100,111,120,121,130,118,125,129,121,132,131,126,126,129,133,128,130,127,125,128,127,131,127,126,129,121,125,134,128,125,128,129,131,134,125,120,117,117,121,120,126,125,121,120,114,110,117,119,125,125,124,122,119,124,132,134,134,134,134,130,125,116,115,131,131,131,133,125,128,125,120,128,123,120,118,116,125,120,122,115,112,125,133,132,136,143,127,116,118,124,123,122,131,129,127,124,130,130,130,129,125,127,113,120,118,118,125,121,132,133,131,129,124,124,125,125,130,127,127,133,130,128,118,112,107,113,122,127,133,136,139,137,138,130,128,129,131,118,111,110,108,104,87,84,95,93,101,89,90,98,100,110,105,103,109,101,97,95,85,92,91,87,92,83,75,77,75,74,74,73,61,59,59,61,67,57,53,52,50,51,46,46,48,47,54,51,49,67,81,80,95,106,126,146,143,163,185,186,182,173,164,147,130,122,113,92,83,75,69,72,90,130,157,148,184,234,241,251,239,242,252,252,252,252,252,249,238,226,198,147,113,78,49,19,6,15,22,27,36,31,31,36,33,34,36,34,31,29,33,30,32,30,29,38,26,30,29,28,32,27,29,27,26,25,26,24,26,24,27,23,25,25,22,28,25,29,27,24,25,24,22,24,24,29,24,25,27,24,27,22,26,24,22,25,24,24,24,26,24,22,28,25,22,22,20,23,27,25,23,23,19,21,24,23,21,18,22,23,20,21,20,22,25,24,21,20,22,21,16,23,23,16,22,20,17,26,17,21,24,20,21,21,23,20,23,18,20,22,17,20,24,20,18,22,23,19,19,19,19,19,22,21,25,33,25,27,32,25,29,33,29,29,31,33,31,29,31,38,37,48,44,27,31,31,34,33,29,28,34,43,42,35,40,39,27,24,26,41,44,35,42,30,34,46,48,61,67,67,61,54,52,58,54,56,74,73,55,26,15,17,18,25,21,19,24,26,22,22,23,21,25,21,20,22,21,22,24,22,23,24,28,28,28,26,26,39,43,52,54,53,65,65,65,74,83,85,96,98,106,147,173,177,166,159,165,163,162,160,162,156,152,153,149,153,155,154,153,155,157,155,148,151,157,155,153,157,151,158,158,154,163,160,157,155,153,156,165,163,165,161,163,171,163,165,169,167,164,153,147,141,139,137,139,143,147,161,166,169,163,139,113,89,89,125,157,173,173,169,171,165,160,163,160,159,155,155,152,152,155,154,150,155,155,162,167,164,168,165,161,162,158,161,163,153,156,153,156,164,165,164,163,168,169,170,168,165,163,157,156,161,164,155,163,158,167,114,11,2,8,10,13,11,13,12,13,13,13,13,36,29,30,29,28,27,24,22,16,19,23,19,23,19,20,22,21,27,20,18,21,21,20,17,25,24,21,22,24,24,17,23,20,22,19,18,24,19,19,17,20,20,17,18,18,18,18,19,17,19,19,16,19,17,17,20,19,19,22,20,17,19,16,18,18,19,22,17,19,20,21,18,17,22,18,17,18,19,20,18,21,27,27,33,31,33,42,42,39,48,47,57,57,60,62,59,57,64,69,71,70,67,61,49,48,38,38,44,39,39,44,55,64,65,67,71,66,82,63,61,50,69,68,49,45,50,64,55,48,43,47,51,63,89,89,81,75,80,88,98,96,101,104,110,109,117,128,128,128,120,118,111,119,128,128,129,121,123,129,133,129,124,125,124,119,118,116,117,113,108,117,115,115,122,125,126,128,122,126,135,128,127,124,125,128,133,138,134,136,129,124,128,123,127,131,134,139,130,129,125,126,124,128,138,136,134,129,127,125,133,129,124,128,130,134,134,137,126,122,128,127,131,131,129,131,124,122,123,124,129,131,128,128,137,134,132,138,136,142,139,134,129,125,131,131,125,124,132,133,129,133,132,128,124,114,117,122,117,123,127,127,126,123,118,110,110,110,110,108,113,113,111,103,108,108,110,116,115,117,106,106,110,107,104,103,107,99,88,81,83,83,92,92,75,75,75,74,65,57,66,67,67,71,66,68,69,73,60,61,57,54,53,48,50,51,60,62,53,55,73,85,81,108,87,81,85,105,100,102,102,98,95,89,100,109,110,113,106,97,89,93,52,51,66,64,57,107,131,138,159,169,179,194,218,234,245,252,252,252,252,253,253,242,215,201,171,136,114,117,55,22,15,14,25,28,25,29,26,31,27,29,34,29,29,29,26,27,31,31,28,29,27,29,30,29,28,24,27,24,28,27,26,27,29,27,32,25,27,24,24,29,21,26,20,28,25,25,27,24,24,20,26,24,21,28,24,20,27,24,21,24,24,19,25,24,23,28,23,20,24,24,22,23,23,17,23,20,22,22,19,24,23,21,19,23,23,20,18,18,20,20,22,19,19,19,19,22,20,23,21,18,22,23,21,21,22,22,15,24,27,19,23,22,19,22,21,19,19,22,24,20,17,20,19,22,23,22,25,26,28,26,28,29,34,34,30,36,36,35,39,40,49,39,28,29,30,43,42,27,31,33,39,44,43,40,39,28,26,33,39,43,37,42,40,41,41,42,51,46,51,41,33,41,36,38,38,48,49,41,27,17,18,18,21,24,21,21,22,26,24,19,18,21,22,21,26,22,19,23,19,24,22,23,27,26,29,35,35,40,56,55,57,70,67,68,86,86,86,97,113,150,174,175,163,156,158,164,158,155,156,160,153,157,156,151,150,147,151,157,153,155,161,156,159,160,158,162,165,162,163,157,153,162,155,154,161,160,163,168,162,160,157,157,162,154,156,159,162,160,160,152,153,158,151,153,156,163,160,164,165,166,162,153,128,84,83,112,140,156,165,165,159,159,160,162,164,164,162,157,160,163,159,155,159,162,168,169,160,160,158,158,160,159,158,159,156,153,155,158,160,163,159,159,160,163,165,161,162,160,158,160,160,163,162,167,161,171,115,11,2,10,10,12,11,14,12,13,14,14,13,19,25,22,17,25,20,16,17,17,20,22,26,21,21,22,21,23,20,21,20,22,22,21,22,19,21,20,23,24,19,19,18,21,23,16,17,17,16,19,17,20,21,15,23,22,16,21,18,19,18,17,19,16,21,19,17,18,20,18,17,19,17,18,18,18,18,17,21,20,20,19,16,17,17,20,19,16,21,21,18,19,21,20,19,21,19,19,21,22,28,26,32,35,39,35,33,30,31,30,34,35,36,36,33,44,40,42,44,36,38,38,34,41,39,45,46,42,43,39,48,51,53,50,49,46,47,43,37,37,38,43,46,54,60,59,62,66,54,59,74,79,87,93,101,108,114,125,124,124,121,116,113,112,128,127,122,128,129,128,127,125,120,125,116,104,105,111,122,122,113,116,120,119,124,131,130,129,128,134,134,135,132,130,132,130,131,127,122,121,126,126,125,128,129,126,129,129,134,137,125,128,128,131,141,146,140,137,136,134,132,132,133,135,139,133,123,128,130,122,122,131,138,134,133,135,133,128,124,127,129,136,134,132,142,141,143,139,137,139,134,135,131,128,130,123,120,121,128,135,130,129,131,125,128,130,135,140,142,141,133,129,125,117,109,110,107,105,111,113,110,116,113,108,117,112,119,114,99,110,106,110,107,89,89,89,92,86,74,79,73,74,88,88,75,81,84,72,73,63,63,67,65,69,74,74,73,67,59,70,69,64,66,62,58,51,54,53,51,60,65,80,70,69,69,59,65,51,47,48,43,57,60,63,71,80,88,103,119,130,136,132,121,91,87,52,25,50,38,36,59,64,81,99,129,151,163,186,207,220,241,252,252,252,252,252,252,251,247,223,200,186,157,127,95,59,32,25,15,26,28,26,28,24,31,24,23,24,25,26,27,29,30,28,25,30,26,27,26,29,29,27,29,32,32,23,28,25,25,25,23,26,23,25,28,23,24,25,27,23,20,29,23,21,23,23,26,24,24,23,22,25,24,18,22,24,22,21,23,25,24,23,19,20,21,22,21,22,24,24,21,22,22,16,20,18,21,23,18,21,21,20,19,22,18,23,19,18,23,23,21,22,22,19,21,18,21,24,20,20,23,18,19,19,22,25,20,16,20,21,19,21,17,22,21,18,19,22,24,23,27,23,26,32,29,36,46,45,38,39,35,34,32,31,35,27,29,52,38,19,32,38,39,45,42,44,36,29,28,34,47,42,39,41,35,40,41,46,50,45,46,34,29,32,35,33,39,40,39,40,30,22,17,19,18,22,22,18,22,24,21,20,21,21,19,24,22,17,23,19,19,25,25,24,30,27,27,36,35,46,56,59,64,74,77,86,95,91,92,115,159,181,179,172,162,163,162,155,150,153,156,158,159,163,163,163,164,165,162,160,160,168,168,167,165,164,163,166,165,155,163,157,161,168,164,169,171,165,160,162,159,160,152,152,155,150,152,151,155,159,160,161,164,167,164,165,163,166,166,165,163,162,169,173,156,117,83,77,97,131,158,168,168,166,166,165,170,168,168,167,166,165,166,161,162,162,163,166,162,162,159,160,165,160,164,166,164,166,165,163,161,162,162,161,166,165,164,165,162,162,162,159,158,163,162,167,157,166,115,11,0,9,10,12,11,13,11,12,13,14,13,19,21,20,16,17,17,18,19,22,16,21,23,19,21,23,20,15,23,21,22,21,16,19,19,18,17,19,19,17,17,19,18,17,19,18,17,19,17,16,20,19,17,17,16,19,19,18,17,16,17,18,18,20,19,17,15,21,19,18,21,16,19,16,19,21,19,22,22,23,20,19,18,19,19,18,21,19,18,19,22,19,17,21,18,19,22,19,22,20,19,21,21,26,24,27,24,23,29,29,31,32,36,44,43,49,54,52,57,52,50,46,39,43,42,56,52,47,47,45,52,61,66,70,69,61,59,50,54,55,52,65,66,82,75,78,84,85,92,77,106,99,128,128,123,130,130,128,131,128,132,128,125,125,124,128,130,128,131,126,124,129,129,130,130,129,126,127,133,134,125,127,130,131,130,128,124,124,125,128,130,131,129,131,132,120,118,114,111,126,128,127,130,124,125,126,128,135,131,128,126,126,128,131,133,137,137,136,131,126,129,123,124,130,133,130,126,132,131,131,139,134,133,135,141,141,136,133,138,139,135,135,132,133,141,146,139,136,132,132,135,140,138,135,133,135,139,134,142,141,144,133,131,134,133,130,133,139,136,132,137,122,123,123,125,115,114,125,127,123,120,119,122,118,120,111,107,101,93,107,113,110,104,90,82,80,89,84,82,80,77,79,90,84,81,92,77,85,87,75,68,69,74,66,65,73,72,74,68,70,78,74,75,65,69,61,55,56,59,60,56,64,56,60,56,61,52,50,44,44,40,71,49,52,74,71,67,99,103,127,128,127,143,146,141,86,57,87,86,90,68,58,55,48,53,69,87,99,122,147,168,197,215,226,232,236,245,246,248,251,251,252,249,236,218,205,200,155,123,117,87,66,43,27,24,26,25,24,28,28,25,29,24,27,24,28,28,24,28,28,30,27,33,26,26,30,24,29,27,29,24,27,21,26,29,21,23,21,24,26,25,24,25,26,26,24,23,23,24,21,21,23,19,23,28,21,29,20,22,21,19,24,20,21,20,21,29,24,20,21,19,19,23,17,23,21,18,24,19,18,17,24,26,19,23,22,17,18,21,18,18,21,21,25,22,21,23,21,22,24,20,24,21,19,22,19,22,17,21,19,18,20,20,21,17,24,22,15,21,22,19,23,26,28,30,44,50,36,35,39,32,36,40,33,33,26,29,35,26,27,34,34,43,45,40,49,43,24,29,41,47,39,43,41,37,45,42,45,46,43,42,39,35,33,34,33,36,39,45,42,26,25,19,20,21,21,24,16,20,19,21,22,18,23,19,22,20,23,26,21,21,22,25,24,24,30,28,36,40,46,52,53,69,79,84,100,91,89,123,167,183,182,174,164,166,161,159,160,152,154,159,162,158,162,163,171,171,163,165,163,160,159,159,155,155,160,160,157,154,156,160,159,163,168,165,169,168,158,159,165,165,164,157,162,167,161,156,153,155,152,151,157,162,165,160,155,157,158,163,165,159,159,160,176,172,147,124,95,77,90,129,159,175,179,174,171,164,164,166,167,167,167,168,166,163,163,166,164,166,165,160,163,163,165,162,164,169,169,165,162,165,168,167,167,171,171,169,167,169,170,165,161,156,164,161,166,159,166,114,12,2,8,10,13,11,14,13,14,14,12,14,22,23,19,20,18,21,21,18,19,18,22,19,20,21,19,22,19,21,18,17,20,17,17,18,19,19,20,17,16,17,17,19,17,19,19,18,17,16,17,16,17,16,19,21,16,18,19,17,20,18,17,17,18,19,17,19,19,17,19,18,16,18,18,22,19,21,23,22,20,20,20,16,18,17,18,19,17,19,18,19,22,26,28,24,23,28,26,24,27,23,23,30,32,37,43,42,45,47,50,52,50,49,54,55,61,65,71,81,74,67,65,70,72,73,71,62,60,55,50,55,50,47,57,69,74,65,63,68,80,88,91,93,96,92,87,89,84,83,89,101,112,117,122,124,127,128,128,126,127,136,136,132,125,126,122,123,127,125,128,130,130,134,147,142,140,139,137,141,143,137,131,130,132,132,131,128,129,125,125,125,127,127,127,121,121,123,120,127,132,136,121,120,131,128,136,135,128,130,122,123,131,128,132,129,122,118,119,126,120,123,127,131,131,135,141,135,144,146,143,144,141,141,141,139,141,141,143,150,145,144,146,148,154,153,144,134,136,139,146,142,143,149,148,154,153,153,153,153,155,155,152,155,157,155,148,140,143,147,147,147,141,139,139,136,128,141,153,148,145,133,126,130,132,134,127,124,116,121,128,121,120,107,97,92,84,91,85,74,78,81,85,77,80,67,59,66,72,77,69,79,83,76,85,79,78,89,84,76,67,54,64,67,70,68,53,62,55,54,49,51,55,54,48,48,51,48,46,48,50,44,49,43,45,46,48,51,56,86,105,110,112,124,149,159,117,117,158,165,167,162,130,101,71,69,76,64,58,47,48,67,89,113,136,152,162,183,196,206,224,231,235,236,235,250,251,237,229,227,234,227,215,197,183,179,129,87,66,57,36,20,30,25,26,21,20,29,27,26,25,22,22,27,26,28,29,29,24,24,28,22,24,21,24,27,30,23,21,26,25,29,27,21,23,23,21,22,25,22,23,25,21,28,22,24,23,17,24,18,19,21,21,22,21,21,24,20,18,21,24,17,19,19,19,23,21,21,21,23,19,21,19,19,22,20,23,20,22,24,16,21,24,19,20,22,19,23,21,19,18,19,18,20,21,19,21,18,24,18,19,21,21,20,17,22,23,23,19,18,18,20,23,22,24,25,34,36,32,33,34,39,38,37,32,29,29,31,31,25,34,34,30,45,47,44,45,41,33,26,40,46,44,42,39,44,44,42,43,46,42,39,45,35,35,34,29,34,38,44,36,24,23,23,21,22,23,23,19,18,21,21,21,22,22,22,23,24,23,23,23,22,21,20,29,28,28,29,30,41,43,53,62,78,84,84,90,82,105,155,173,171,165,162,167,160,160,166,167,166,164,160,155,149,155,158,163,162,156,152,153,152,151,149,150,147,148,156,157,161,161,159,155,153,155,156,160,159,154,158,160,162,162,159,163,167,165,160,161,160,151,150,157,162,165,155,147,147,152,154,158,155,150,159,168,166,162,160,135,102,86,93,125,153,174,174,164,160,148,149,155,156,160,160,160,160,158,161,160,164,171,166,164,166,164,166,163,160,160,155,158,158,163,165,159,166,168,169,174,171,171,166,164,161,166,163,163,159,165,113,12,2,8,10,13,10,12,12,13,13,14,15,23,20,18,19,18,17,20,20,18,19,19,17,19,18,20,19,17,21,16,18,17,18,20,17,17,17,17,18,18,18,16,16,17,16,17,17,19,17,17,20,16,18,20,17,18,19,19,19,18,18,18,17,19,19,17,19,18,22,17,19,18,20,24,17,21,22,19,19,21,20,21,17,16,18,15,18,21,18,22,18,23,33,29,32,37,33,32,32,34,33,35,34,35,43,39,48,49,39,56,56,49,45,51,57,58,49,55,66,77,83,55,57,73,65,59,59,72,66,65,59,52,47,48,46,50,47,40,55,66,68,73,76,87,89,84,77,63,55,80,92,106,107,114,103,98,104,107,108,111,118,119,127,133,131,130,115,117,124,116,117,116,108,110,117,131,127,118,120,131,137,141,141,132,129,133,135,130,119,121,128,132,130,126,118,117,135,139,138,140,138,137,139,141,142,137,134,139,143,139,143,145,142,133,118,114,125,132,133,131,137,140,145,146,145,141,137,140,135,139,142,144,147,141,145,146,145,146,151,155,151,149,151,148,136,134,139,139,135,136,136,138,132,141,136,134,133,140,142,145,142,143,146,161,174,160,160,142,164,145,144,142,142,141,136,145,146,145,137,130,122,119,117,112,115,112,108,113,117,120,109,106,100,85,100,83,95,83,73,79,79,63,61,57,59,67,67,65,71,67,83,99,84,81,84,75,83,72,78,64,72,81,68,53,57,57,57,54,53,54,57,51,55,59,54,58,57,56,55,50,52,49,54,65,58,61,58,60,83,81,78,94,97,110,129,99,118,142,146,162,162,134,116,113,113,120,105,95,69,50,38,72,46,43,52,66,92,110,121,141,149,158,174,194,205,210,217,214,224,232,239,242,247,251,234,218,202,195,193,171,149,139,134,101,52,25,24,30,28,28,23,43,26,24,24,30,23,27,29,23,23,23,23,24,26,23,25,24,24,28,25,23,21,24,23,21,21,24,27,23,27,21,23,23,21,23,22,22,22,24,24,21,21,21,19,21,19,19,21,22,19,20,20,15,24,19,21,28,17,22,19,21,21,19,21,18,23,19,22,19,17,22,21,17,19,25,18,23,25,22,21,18,19,22,17,22,19,21,21,18,21,20,24,21,21,22,24,20,22,22,19,24,19,19,26,26,27,28,34,36,34,40,38,34,33,29,33,29,33,34,32,40,45,49,46,45,37,27,29,44,46,48,44,44,43,42,46,39,48,47,39,40,35,36,36,28,34,33,33,35,25,19,17,21,19,17,20,20,19,22,23,21,24,20,21,19,19,20,22,23,21,21,24,25,27,32,30,33,39,44,47,67,81,87,89,87,120,145,155,159,150,148,156,154,155,162,161,162,164,155,153,156,145,155,158,164,157,151,151,150,146,147,151,160,149,148,151,156,159,160,160,155,153,149,145,147,151,152,152,153,152,152,151,155,155,155,158,162,163,160,162,166,170,163,154,150,156,155,154,149,149,154,153,160,160,156,160,159,141,117,92,83,112,144,159,168,160,146,139,145,148,150,151,155,158,152,153,155,163,159,161,168,160,161,162,158,158,156,155,157,160,160,158,155,158,161,167,171,170,167,165,165,161,167,164,165,156,168,116,11,2,9,10,12,11,13,11,12,14,14,14,20,20,20,21,18,17,17,19,19,16,17,18,19,18,16,17,17,18,17,18,17,18,18,16,17,18,17,16,17,16,17,20,18,17,16,18,17,16,16,16,17,16,18,18,21,19,17,17,17,17,17,21,17,19,20,16,17,18,19,18,19,18,19,19,17,21,19,20,18,18,19,19,19,17,17,20,20,17,20,20,19,25,21,26,25,25,29,30,33,24,24,28,29,31,32,28,30,29,30,29,27,22,30,33,30,34,30,28,27,31,37,36,38,37,44,54,61,57,49,49,50,44,44,38,32,32,31,25,37,47,50,61,70,81,82,82,81,77,82,92,104,95,91,89,83,87,94,104,101,103,110,122,128,128,132,133,129,136,134,129,116,95,97,108,123,113,104,113,117,118,124,127,126,126,126,136,128,122,133,131,136,137,132,133,137,139,141,142,147,148,146,153,151,141,139,144,149,154,157,157,157,148,144,138,135,139,129,134,134,128,124,122,130,128,127,121,120,129,127,121,118,104,102,111,126,129,126,129,129,129,114,108,114,125,136,137,135,132,128,113,109,115,110,111,107,105,103,100,105,108,110,113,125,127,125,129,132,134,132,131,125,113,120,134,130,122,116,107,107,107,108,107,102,102,96,94,100,110,104,93,101,94,90,110,115,103,86,77,90,86,77,64,79,90,84,77,68,68,88,104,96,86,73,63,64,65,64,90,92,83,75,59,63,57,60,60,53,59,57,57,55,58,61,60,65,70,67,66,58,62,70,84,91,68,71,78,72,76,73,68,75,65,77,91,69,82,77,80,102,89,86,99,116,148,147,135,137,131,118,105,88,65,57,43,42,53,43,34,32,39,57,79,97,113,136,148,157,160,174,192,198,202,205,214,208,209,221,229,229,237,241,228,200,178,162,147,126,108,87,89,84,29,23,34,24,27,22,25,23,27,27,21,23,24,28,25,22,21,25,25,21,25,22,22,26,24,21,25,26,22,24,23,21,17,24,22,21,24,24,23,22,16,18,21,21,22,19,21,20,21,21,19,25,22,21,21,16,20,21,22,17,19,21,16,22,21,18,20,22,23,16,19,21,16,19,22,17,25,22,17,22,19,22,19,22,18,22,20,20,21,20,22,21,25,18,22,22,19,21,21,21,20,22,21,21,23,28,35,33,37,39,38,40,34,36,35,29,37,43,40,41,54,49,46,44,39,27,31,48,46,42,45,45,43,42,39,44,51,47,45,36,38,39,35,32,33,36,34,34,29,19,20,17,20,22,19,24,20,19,22,23,20,21,23,19,25,17,22,26,20,21,23,26,23,27,33,31,46,48,48,66,77,95,113,141,164,165,165,162,159,162,161,155,154,154,151,151,159,162,157,159,159,160,157,165,172,166,161,156,148,146,156,160,145,147,155,148,148,151,154,160,160,160,154,153,159,155,159,155,150,147,151,158,155,150,150,155,165,166,165,168,161,161,160,162,166,163,158,153,154,155,155,157,156,155,160,162,169,154,123,97,83,105,131,158,171,156,149,153,155,157,155,159,160,158,160,154,155,154,152,158,156,156,158,157,157,157,155,157,161,162,160,155,157,156,162,169,169,170,164,163,160,162,162,165,164,169,113,11,2,9,10,12,11,14,12,12,14,13,13,20,16,17,20,18,16,18,17,16,18,17,16,17,16,17,17,17,17,18,19,17,16,17,16,16,16,17,17,17,17,16,17,17,17,16,17,17,16,18,17,16,22,18,18,20,16,18,19,16,17,17,18,18,16,17,17,18,17,17,18,20,16,21,20,17,22,17,21,17,20,21,18,19,17,20,17,21,20,18,21,19,19,24,27,32,30,28,31,30,28,26,29,26,26,28,33,26,19,25,27,26,26,23,24,24,26,28,24,30,33,33,39,37,48,60,61,64,63,55,53,52,52,46,45,42,41,38,44,44,49,57,53,54,60,73,65,71,100,99,76,80,63,54,75,89,102,114,118,117,117,117,124,113,110,112,120,138,146,139,136,139,132,128,127,120,116,118,118,117,108,105,108,120,113,129,130,128,137,137,136,131,130,130,123,130,131,125,130,123,121,111,103,106,110,113,117,108,109,114,107,110,116,125,129,132,136,123,122,122,122,118,120,131,122,135,128,122,112,97,76,71,73,87,109,123,130,130,117,107,106,109,119,122,130,133,130,134,128,123,123,121,116,118,122,116,108,77,86,99,100,102,106,105,94,84,88,91,83,91,97,99,104,118,120,114,110,104,113,112,114,122,115,99,98,89,96,102,107,102,100,116,112,109,117,104,99,96,88,115,101,95,82,83,94,92,101,101,116,98,101,86,73,55,48,60,74,82,96,81,62,59,61,57,62,55,56,59,53,52,57,50,52,55,54,54,50,53,53,54,51,53,63,67,65,70,63,55,55,55,60,61,53,46,53,51,47,53,39,42,35,61,60,94,115,111,104,129,143,117,132,117,109,131,110,101,91,74,62,53,60,63,63,31,69,46,53,67,73,87,111,123,130,145,153,165,174,183,189,203,212,200,212,210,205,210,211,209,214,218,202,144,107,110,97,88,61,39,40,45,32,27,26,21,23,23,22,19,21,22,20,22,26,25,23,22,22,23,21,23,22,21,21,22,26,23,22,25,20,20,26,17,23,21,22,26,15,23,22,18,23,20,19,24,16,20,21,19,19,21,20,18,18,19,16,17,24,21,19,20,19,21,18,21,23,18,21,18,19,21,22,21,16,20,19,22,19,21,24,15,21,20,19,19,19,22,21,21,18,21,19,24,22,19,21,19,22,27,26,27,37,35,36,41,34,32,37,34,44,39,30,45,49,50,53,45,36,24,26,45,46,48,46,42,42,43,42,43,55,46,43,40,35,35,35,37,32,35,37,35,24,23,16,19,22,18,20,18,21,20,21,21,19,21,19,22,23,22,25,22,22,24,24,26,23,25,30,37,43,43,48,74,85,112,165,158,170,176,167,175,172,168,167,166,163,163,159,158,165,159,165,163,162,164,158,166,164,166,160,161,160,152,161,159,151,159,157,146,144,153,159,161,160,166,168,163,171,168,167,166,157,160,162,165,160,156,151,156,162,156,155,152,151,154,154,160,160,160,159,153,157,158,155,157,162,159,160,169,174,173,155,127,98,83,98,133,160,169,167,162,160,161,157,157,157,158,160,149,149,145,146,150,146,153,154,152,155,160,159,159,159,163,159,155,159,162,168,171,170,168,165,160,155,156,155,165,157,168,115,11,1,8,10,12,11,13,12,14,13,13,13,18,17,17,17,16,17,16,17,17,17,17,16,17,16,17,16,17,17,17,16,16,16,17,18,16,17,18,16,18,18,16,19,16,17,17,16,17,16,19,19,16,18,20,18,17,18,19,17,18,18,17,16,19,20,17,19,19,17,17,17,20,20,17,19,17,17,18,18,20,19,18,18,17,18,18,18,22,18,20,20,19,24,23,29,30,33,33,30,32,26,28,22,24,26,27,26,25,27,29,29,36,32,34,31,30,33,26,34,39,39,44,42,40,45,51,61,81,97,87,62,50,49,51,49,45,44,40,35,39,38,37,40,37,40,38,44,53,50,51,50,52,63,75,87,103,118,137,141,134,136,137,132,123,122,125,128,127,131,127,133,146,150,149,137,134,130,134,141,133,126,117,117,118,119,128,134,137,141,143,131,123,119,110,108,115,123,122,118,117,98,81,78,84,91,93,87,71,66,71,60,68,88,104,113,120,128,115,128,133,118,118,125,137,142,143,137,137,132,113,101,115,129,145,153,153,148,132,124,130,139,139,136,132,131,130,126,122,130,142,142,143,142,141,150,143,131,125,130,137,127,125,133,138,129,119,117,110,112,120,118,117,122,139,146,138,125,119,120,127,128,125,122,116,116,103,103,104,104,101,105,112,107,107,103,85,81,87,95,110,104,106,96,83,95,100,123,128,111,100,74,65,63,59,62,63,76,91,87,60,59,62,53,58,57,61,60,51,55,57,56,54,54,51,48,53,53,54,55,54,66,61,62,63,52,56,51,48,51,53,50,53,49,46,46,44,49,44,41,44,41,50,62,65,79,69,64,73,88,95,96,121,129,128,116,117,120,110,104,98,109,98,78,67,59,62,45,46,37,27,34,39,50,61,71,86,99,112,126,128,142,151,160,173,184,197,197,203,214,199,200,191,179,190,186,179,178,183,173,132,100,91,83,70,54,36,31,40,40,26,23,24,22,21,21,21,18,24,20,21,22,19,24,26,20,22,20,18,23,20,20,24,21,19,19,20,22,16,21,23,22,22,19,25,17,23,25,19,24,19,20,20,19,19,21,19,18,23,21,19,22,16,27,21,16,24,18,22,19,19,21,20,23,22,21,17,22,22,18,19,22,19,23,22,22,21,19,19,22,19,21,21,18,27,20,15,21,18,27,25,27,34,39,39,39,35,32,37,35,30,34,43,50,57,55,53,37,23,30,45,45,43,45,42,46,46,45,46,45,42,41,33,33,32,32,37,28,36,32,32,22,18,23,17,22,17,20,21,19,22,19,23,21,20,22,19,22,22,20,24,21,22,26,26,25,23,34,39,41,39,53,68,75,122,152,157,167,168,176,175,169,165,163,168,166,159,159,159,157,155,147,150,160,160,160,159,154,151,154,165,165,165,170,166,163,166,165,157,162,169,166,162,160,159,159,161,167,166,168,168,162,165,165,163,163,158,159,159,160,151,150,154,147,152,152,149,155,153,153,156,157,159,156,156,161,162,163,163,170,172,170,158,134,108,83,99,131,152,170,171,168,162,153,152,152,154,159,147,143,140,145,151,148,153,152,151,156,158,159,157,161,162,158,159,163,162,167,170,167,170,163,163,159,159,158,160,158,165,112,13,2,9,10,13,11,13,12,13,13,13,13,17,17,18,17,18,18,17,20,16,17,18,16,17,17,17,16,19,18,18,17,16,17,20,19,16,19,16,19,17,16,17,18,17,19,21,17,17,16,19,20,17,21,17,17,19,20,19,16,19,19,16,19,19,16,17,17,18,18,18,18,16,17,17,19,19,18,19,18,17,18,19,16,18,19,17,17,19,24,19,21,23,25,27,22,21,30,23,23,23,21,22,20,20,21,19,23,25,23,27,21,28,28,22,26,25,23,24,23,24,31,34,34,35,35,39,37,44,49,53,58,61,56,52,43,46,36,42,41,32,37,40,42,42,47,47,40,45,47,47,46,47,59,81,100,106,119,134,139,143,142,139,130,136,136,130,127,125,127,127,139,147,141,141,146,148,146,147,143,143,143,143,141,128,131,140,137,135,145,136,131,120,118,124,118,125,123,125,119,131,112,109,116,139,135,130,121,114,103,113,112,116,123,119,109,111,119,113,107,106,121,123,126,139,145,149,148,139,133,143,152,155,162,161,151,139,132,135,131,125,123,127,143,139,139,137,125,131,132,130,135,147,147,153,149,149,154,145,148,137,135,140,143,144,144,139,137,141,142,145,145,139,144,153,147,139,133,127,135,133,134,116,111,116,122,119,119,117,114,106,105,109,102,104,95,80,81,81,97,79,97,106,106,103,106,105,101,104,81,81,66,70,71,71,75,71,66,63,56,61,65,58,54,56,63,65,59,51,53,59,59,57,57,57,59,58,59,71,71,59,83,75,71,59,49,56,56,53,59,52,57,56,51,53,49,48,49,53,48,51,53,53,61,55,58,50,50,55,75,76,76,87,92,47,60,87,84,99,102,107,123,122,92,107,116,103,92,89,74,67,63,63,57,55,46,41,40,42,46,64,60,70,95,115,130,125,143,153,158,171,178,174,170,185,189,195,199,200,190,184,184,187,180,136,168,171,175,154,100,72,76,89,44,43,36,39,47,37,27,25,24,23,17,15,17,23,20,19,19,19,19,16,21,20,20,18,15,21,22,21,18,24,23,19,19,22,23,20,19,20,17,23,22,17,20,20,21,19,19,23,20,22,21,17,23,19,21,19,17,22,18,17,19,21,19,19,21,19,22,17,24,18,19,19,19,26,18,21,20,24,21,21,24,18,21,21,20,22,22,23,27,27,33,37,37,36,25,31,32,25,36,46,49,49,54,51,40,31,40,56,50,49,46,45,44,47,49,44,35,33,36,34,29,34,31,33,29,29,34,29,23,23,20,19,21,18,23,20,22,20,22,19,17,20,22,21,19,25,21,24,27,25,24,25,24,31,34,36,36,42,53,66,73,83,105,133,158,171,178,159,160,168,165,165,162,154,150,146,144,146,147,155,157,152,159,160,155,156,153,162,163,164,166,165,163,169,170,169,177,177,169,161,163,155,147,149,161,160,166,170,161,160,157,155,155,153,160,163,161,159,163,159,155,158,152,153,153,156,157,153,156,155,155,156,159,159,157,156,158,163,161,164,162,143,113,91,98,124,152,171,174,170,156,152,157,155,157,152,148,144,147,152,152,158,154,151,152,154,158,157,157,160,157,159,162,161,164,165,163,165,164,165,164,163,162,165,151,160,113,12,1,9,9,12,11,14,12,12,14,13,13,19,17,17,18,16,17,18,16,17,17,17,17,16,16,16,16,17,17,18,17,17,19,17,17,16,18,19,17,18,16,17,21,17,17,18,17,19,19,16,20,17,18,20,20,21,15,17,17,19,17,17,19,17,19,16,17,19,17,20,17,19,19,18,19,20,22,18,17,22,18,20,17,19,19,16,22,19,21,19,19,23,25,26,31,31,36,30,26,30,26,30,24,24,23,18,26,21,24,22,22,29,24,28,26,24,34,27,30,34,26,28,29,34,37,46,46,45,45,50,53,50,54,53,55,58,58,55,57,51,46,48,51,59,57,50,55,54,60,56,47,44,63,87,98,110,107,101,99,101,105,115,121,121,118,111,113,118,119,120,123,121,117,116,116,120,121,132,141,134,136,141,147,139,129,139,136,133,134,127,116,120,134,141,139,127,117,116,124,128,128,139,153,156,148,144,151,152,147,164,170,169,163,140,115,114,120,119,124,130,132,133,137,136,143,149,145,141,134,141,144,151,148,139,139,136,133,131,121,122,136,134,142,140,132,133,125,127,132,142,144,143,143,133,135,141,137,141,141,140,136,134,138,139,142,137,141,140,149,154,143,137,134,135,127,126,134,135,131,139,131,119,123,118,125,129,133,130,125,121,117,108,93,101,99,107,110,104,104,100,100,113,120,115,116,107,99,80,68,71,78,84,72,83,96,76,69,66,59,55,55,63,66,65,55,55,59,56,62,54,52,57,59,55,60,61,53,56,60,59,59,55,52,52,50,47,55,54,52,50,48,52,50,55,50,53,54,52,53,52,62,54,48,51,48,44,51,72,76,74,76,76,77,69,79,95,97,94,101,104,113,101,93,99,110,116,114,123,122,137,136,125,108,108,103,84,77,55,49,27,29,34,30,28,36,51,54,72,83,89,110,124,138,154,160,154,149,157,165,177,187,185,175,181,202,208,205,187,177,174,174,163,148,162,176,169,127,89,81,72,58,47,43,49,67,53,27,29,25,27,23,19,22,18,21,18,23,20,15,17,21,19,16,21,22,18,19,19,18,24,19,21,18,17,21,21,26,18,21,19,22,23,18,21,17,17,22,17,19,19,15,22,20,17,22,19,19,16,19,23,21,17,22,21,18,19,19,24,22,20,21,17,20,20,20,24,21,18,21,24,20,27,27,34,38,33,31,29,32,34,41,43,45,48,44,51,48,48,68,69,57,54,45,47,45,46,48,44,41,30,33,28,27,30,30,36,29,32,31,32,22,19,19,19,21,18,20,16,24,22,20,22,20,22,23,21,22,22,22,25,26,21,22,27,26,28,35,36,37,46,69,81,66,73,84,110,146,163,160,145,153,160,162,164,159,156,153,153,153,158,157,157,155,146,160,167,160,157,153,158,158,159,158,150,155,160,164,167,172,167,160,165,165,158,151,154,162,164,167,164,160,165,157,157,164,163,168,160,159,160,161,166,159,163,165,163,163,163,159,151,157,160,158,156,156,157,158,152,152,152,154,154,162,168,144,120,94,87,112,143,165,174,165,160,157,159,160,159,161,151,156,155,154,153,149,151,155,156,156,155,156,157,156,160,160,160,160,157,156,155,155,162,165,171,167,164,154,159,112,12,1,9,9,12,10,14,12,12,14,13,13,15,16,16,16,16,16,17,17,17,17,17,17,16,16,18,16,17,16,17,18,15,18,17,17,18,16,16,17,18,18,17,17,17,17,16,23,19,18,21,21,20,22,22,17,17,17,17,17,17,17,17,16,19,18,20,18,17,17,17,20,17,19,19,18,22,19,21,20,17,21,18,16,17,17,20,20,17,19,17,23,24,29,25,34,35,37,39,35,36,34,34,28,29,30,29,28,26,26,33,32,31,30,34,36,38,41,40,43,35,32,34,38,49,45,49,50,48,55,53,50,46,51,51,65,83,82,70,70,63,60,57,50,76,65,63,67,76,70,75,67,70,91,101,122,124,118,107,91,96,94,96,95,106,108,110,107,105,111,111,115,107,96,94,99,121,106,118,133,131,127,131,132,124,128,127,131,128,135,122,126,128,135,144,136,142,137,136,141,141,133,130,138,134,131,135,142,141,146,153,156,158,143,126,114,136,149,147,145,157,162,159,146,133,132,137,137,131,137,144,139,137,142,141,139,139,138,139,140,148,147,146,152,131,132,131,127,122,132,136,132,131,124,115,123,129,134,131,147,140,141,139,141,141,139,131,137,141,137,137,137,134,129,128,132,134,130,129,130,135,142,136,129,137,129,126,120,117,110,111,113,110,100,100,98,105,111,110,111,108,113,117,119,120,105,98,88,79,76,85,79,83,85,86,69,69,65,69,62,64,63,57,62,63,61,60,57,62,67,59,64,63,61,63,63,62,59,53,54,61,53,59,53,49,49,53,61,52,50,56,51,48,49,51,59,57,55,57,47,50,58,46,51,44,46,51,54,67,68,72,75,107,100,71,96,98,97,99,94,99,83,75,77,83,98,100,97,107,116,128,131,101,114,116,119,92,94,103,89,71,67,54,41,43,36,47,47,25,21,25,29,30,42,84,70,77,77,98,106,125,140,143,146,149,164,166,175,176,177,177,173,162,185,208,189,170,146,141,152,148,137,144,153,175,163,111,85,90,98,84,58,57,65,39,25,29,57,32,28,23,24,22,18,19,21,21,19,18,22,19,20,19,15,21,18,19,19,15,18,18,19,22,20,21,21,16,17,19,22,21,21,21,17,17,20,17,18,17,17,22,18,19,18,20,21,17,22,19,17,19,19,27,23,17,24,19,22,26,20,22,20,30,26,30,37,34,34,31,29,34,34,40,48,47,44,47,51,48,65,73,66,61,46,46,42,44,50,44,41,33,30,35,31,32,36,29,38,34,28,32,22,20,18,17,17,20,21,20,21,19,22,18,20,18,25,26,17,27,23,23,27,22,26,30,29,33,37,44,56,66,81,92,85,78,80,94,125,131,137,147,159,157,151,150,147,156,161,170,165,168,166,162,157,153,163,161,158,143,134,140,151,146,155,150,146,150,153,149,150,156,159,161,160,155,165,173,170,166,163,158,159,161,160,165,169,168,166,163,161,162,162,160,164,169,170,171,169,168,164,155,156,161,161,162,162,159,160,157,157,162,156,162,163,166,163,150,126,89,87,103,134,162,171,174,169,166,166,163,165,162,160,162,156,157,152,154,159,162,160,154,156,156,158,159,162,163,158,151,149,148,146,153,159,163,163,166,151,159,113,12,1,8,10,13,11,12,12,13,13,14,13,17,17,17,17,17,18,17,17,18,17,16,19,18,17,18,19,18,18,17,16,17,16,16,17,17,19,16,17,18,18,18,19,19,19,17,23,22,21,21,19,24,19,20,17,17,19,17,17,19,19,19,18,16,16,17,17,16,20,18,17,19,18,19,19,20,18,19,23,19,17,18,18,18,18,19,19,17,19,18,18,21,26,23,25,26,26,26,24,31,31,34,40,41,39,34,35,33,36,38,37,49,47,49,49,46,52,49,56,50,45,48,46,56,57,51,51,53,57,65,63,57,55,53,71,86,88,79,79,83,75,74,76,92,103,106,116,124,111,97,103,113,123,126,133,141,139,129,127,128,119,108,105,114,126,125,124,110,106,115,118,118,115,120,133,129,120,120,125,123,111,106,105,106,109,116,113,111,118,118,120,119,114,104,107,112,119,128,139,138,134,131,118,105,104,112,114,116,108,108,106,103,97,99,110,129,134,130,137,135,139,136,138,130,123,129,134,137,136,134,130,129,129,141,136,133,134,139,138,136,146,132,132,124,118,119,117,118,120,129,130,125,116,114,117,124,123,123,130,131,132,134,131,139,138,139,140,129,132,134,130,128,124,122,116,114,117,115,111,114,119,128,133,129,117,100,98,95,100,106,114,118,111,109,98,100,97,87,95,114,122,117,114,98,95,81,73,83,85,80,73,76,82,78,71,71,71,71,75,77,75,74,64,62,64,63,61,63,72,67,63,65,63,63,63,65,57,56,58,61,61,60,61,53,54,53,53,60,56,53,54,53,57,61,61,61,54,59,49,50,55,42,49,50,51,54,52,57,54,56,61,61,69,61,50,45,51,53,46,51,51,52,55,52,62,69,59,59,66,77,70,63,77,91,94,90,101,113,119,110,103,95,93,76,59,67,66,62,46,41,42,37,35,33,34,35,36,41,33,30,39,59,70,89,100,105,119,121,120,128,137,135,139,141,139,136,131,137,152,139,150,168,150,142,144,133,139,145,154,153,139,155,130,100,96,95,105,93,87,86,81,60,55,59,46,39,37,31,27,25,24,27,23,20,17,16,21,18,19,17,17,21,21,19,20,21,19,16,19,19,19,22,19,17,18,19,16,18,16,17,22,19,24,21,20,20,22,21,19,21,19,19,20,24,23,22,22,26,23,26,25,25,29,31,36,34,40,28,29,34,34,43,47,42,45,54,47,36,46,59,63,60,54,49,44,48,50,46,42,36,29,35,31,29,35,29,35,31,33,30,18,22,19,19,23,21,24,21,21,21,17,24,22,20,23,18,22,25,20,25,30,27,30,37,42,53,54,57,73,76,86,82,77,82,70,84,97,110,135,147,161,150,142,139,137,157,170,176,169,164,162,154,158,160,162,157,152,153,148,151,150,154,162,153,151,154,148,139,146,155,158,159,157,153,152,157,155,155,151,148,155,159,162,167,168,162,165,167,164,164,160,161,157,159,163,160,163,162,159,151,153,158,157,162,166,168,169,165,163,166,168,163,164,170,166,165,156,133,103,92,98,127,156,173,179,172,167,169,171,160,161,158,163,160,152,159,160,162,156,153,159,158,159,159,159,161,157,156,153,151,151,153,160,164,158,159,153,159,111,14,0,9,10,13,11,13,12,13,14,14,13,19,18,16,17,20,18,19,18,17,17,18,18,19,19,18,19,20,17,16,17,19,16,16,17,19,18,16,19,17,19,21,17,21,24,20,20,21,21,23,17,18,19,20,21,20,17,17,18,16,19,17,17,18,17,18,17,17,17,18,21,17,18,20,17,19,18,19,18,19,19,18,20,15,19,19,17,17,17,17,22,30,30,38,40,34,31,26,31,42,49,53,55,51,44,43,37,38,43,44,50,61,64,57,59,61,54,54,52,45,36,38,42,38,36,42,44,41,47,50,53,52,47,43,50,51,51,58,53,44,41,41,46,62,57,54,57,87,112,94,128,117,115,99,90,90,112,117,116,123,114,106,102,105,105,115,111,108,110,105,119,125,122,124,130,131,131,119,121,119,110,107,107,112,112,120,103,92,100,117,109,104,118,105,103,104,119,122,132,139,131,122,109,107,104,103,105,104,109,113,109,113,107,110,117,128,112,106,105,103,104,110,108,108,112,121,128,132,131,116,116,125,131,132,127,125,123,121,118,122,116,112,121,124,126,118,123,127,132,139,140,139,133,122,126,134,128,126,125,128,126,126,126,128,131,132,141,139,128,119,113,107,100,105,115,105,111,108,99,97,104,118,120,116,121,116,105,117,115,116,109,108,106,104,97,110,101,99,102,101,100,95,103,89,96,93,85,95,101,89,70,69,70,75,83,87,83,68,60,61,64,60,52,52,53,60,58,64,61,49,54,57,55,62,55,56,58,49,57,56,55,53,45,54,51,51,54,51,55,54,51,57,54,56,57,55,63,52,45,51,51,54,44,49,53,51,54,51,53,53,51,53,47,44,49,48,48,48,43,39,46,42,47,47,51,57,50,56,59,61,69,53,59,60,75,73,78,85,93,92,84,84,98,82,68,110,78,74,99,87,76,69,64,63,51,45,45,38,36,39,38,39,39,41,42,39,42,36,39,45,49,63,62,73,85,93,96,102,112,112,123,124,110,113,115,134,138,123,137,142,133,142,137,127,135,139,133,145,141,145,112,136,149,139,120,111,93,84,84,70,71,67,53,47,41,38,39,34,31,29,33,29,40,46,24,35,18,24,22,19,21,17,21,17,16,19,21,19,18,18,20,20,22,20,17,20,22,24,19,21,21,22,22,19,26,19,22,26,22,27,29,28,29,29,29,36,39,42,36,33,31,41,47,42,49,49,45,30,38,48,56,60,57,57,50,54,51,52,51,32,33,35,30,34,33,32,35,31,31,35,24,18,18,17,17,22,20,21,21,18,22,20,25,22,24,25,27,28,28,39,39,40,45,52,56,63,68,74,71,75,78,74,76,72,69,79,85,93,110,123,132,129,128,135,147,156,159,160,158,157,158,151,155,155,151,148,159,157,158,166,161,164,172,167,166,173,159,154,170,168,166,162,158,150,137,137,139,151,150,148,157,160,155,157,159,159,157,155,158,163,158,154,150,156,159,152,154,152,150,148,151,150,152,157,158,164,166,160,159,164,163,161,165,168,165,168,172,155,142,115,97,106,122,153,171,175,174,168,165,157,153,158,158,162,159,159,164,162,159,156,155,153,154,155,155,160,163,160,159,161,161,163,161,164,158,158,146,157,113,12,1,9,10,12,10,13,12,12,14,13,13,17,17,18,18,16,17,19,23,16,18,19,18,19,20,16,18,20,17,19,17,18,17,18,17,17,18,17,19,19,16,19,20,19,23,16,21,21,21,25,20,20,17,19,19,16,17,17,19,16,18,22,18,17,18,16,17,17,17,18,17,18,18,17,20,19,18,17,17,19,20,18,18,20,17,19,20,19,19,20,19,33,34,33,40,39,37,36,43,49,57,55,56,43,40,42,29,30,29,24,30,32,33,32,27,29,26,29,29,28,26,21,26,28,30,34,38,46,41,37,48,44,45,44,46,39,40,39,29,34,33,30,37,31,34,39,39,43,60,77,93,91,79,77,66,69,91,106,108,109,107,102,98,101,108,108,112,112,120,124,117,116,117,122,127,120,119,120,115,119,117,122,116,116,117,126,130,122,122,120,119,118,131,136,132,125,123,118,119,121,122,126,118,116,117,121,120,122,121,130,130,133,129,125,131,130,125,125,126,122,122,103,107,107,106,117,122,127,128,125,125,133,134,136,130,128,133,128,118,110,108,113,125,125,125,125,130,136,139,136,135,137,137,138,137,145,142,139,137,135,132,133,128,129,117,116,119,113,117,110,107,114,118,122,123,120,113,119,112,104,117,116,113,123,120,117,119,116,122,117,96,93,98,96,95,107,109,101,95,85,78,74,96,107,108,106,101,110,107,90,79,73,67,76,74,76,71,55,53,50,52,55,47,53,53,53,57,52,51,55,55,56,48,53,54,48,56,56,57,55,52,53,51,53,54,48,51,55,53,53,54,50,53,51,51,57,55,50,43,54,48,46,50,42,49,49,48,49,47,50,48,53,57,55,58,58,57,58,56,58,53,54,48,49,50,54,50,49,65,74,75,57,55,69,63,59,57,61,56,72,68,69,91,74,62,66,71,65,51,59,61,78,100,113,107,102,92,84,80,69,59,48,42,39,42,42,39,39,36,36,39,40,47,45,45,50,44,44,61,44,63,72,78,92,84,104,100,99,113,116,114,123,127,136,130,126,141,126,129,126,124,124,130,127,128,131,122,121,120,128,135,137,132,129,119,113,117,112,110,92,91,91,74,87,76,64,66,57,49,40,42,43,39,36,29,29,27,29,23,16,25,19,20,19,17,19,19,21,17,20,24,23,19,19,17,23,21,21,26,24,26,26,28,28,27,33,40,37,34,30,26,40,43,47,52,53,47,33,40,51,54,55,56,63,59,57,55,46,45,40,39,35,31,34,27,32,37,29,34,35,24,19,17,20,19,19,24,21,27,25,27,32,29,33,37,41,43,41,45,50,53,55,60,59,56,65,61,65,63,64,68,70,83,80,71,77,77,83,89,101,107,109,142,162,160,157,144,138,144,153,159,153,151,151,142,135,134,141,150,152,151,152,158,155,158,167,166,163,160,160,165,166,160,149,141,144,154,161,158,156,162,153,149,154,152,156,151,148,153,157,152,152,151,154,160,150,152,155,152,149,154,156,151,148,146,151,152,151,154,158,156,154,156,156,156,158,165,165,155,143,124,104,97,114,145,162,173,172,161,152,143,147,155,159,164,162,160,160,158,159,162,161,159,160,158,162,162,161,165,161,165,168,164,165,158,158,149,159,111,12,1,9,10,13,12,13,12,13,14,14,14,16,16,18,17,17,17,18,18,17,16,17,17,17,17,18,17,16,18,17,17,19,19,17,16,17,17,18,20,19,18,18,18,18,21,23,19,21,23,23,24,20,18,17,16,20,21,16,18,17,19,19,15,19,18,16,18,17,20,18,16,16,18,17,20,20,16,18,16,19,19,19,22,17,18,20,16,19,17,19,29,30,38,42,45,43,43,48,51,57,59,55,48,44,42,46,46,36,36,33,32,29,31,27,26,31,26,27,26,27,28,31,37,37,37,42,41,45,47,36,37,42,46,38,37,34,35,41,35,35,36,36,33,30,36,38,42,48,48,57,68,78,81,87,92,90,106,113,117,108,106,110,104,114,114,110,121,116,116,122,126,122,126,128,135,115,132,125,125,128,123,118,118,113,115,122,127,124,117,120,125,126,128,128,141,132,124,115,108,109,118,109,111,116,118,118,119,120,114,110,116,123,119,124,119,114,122,119,131,131,128,123,116,120,112,130,141,142,130,132,128,130,125,133,135,138,140,144,129,123,130,131,133,127,128,127,128,130,120,119,127,137,154,152,144,144,142,141,140,132,124,127,125,122,114,110,114,124,131,133,136,134,137,138,127,123,131,129,128,127,115,114,123,121,118,111,94,93,97,94,92,100,107,97,100,98,84,75,76,79,80,78,95,95,102,102,97,97,105,100,90,101,80,77,57,57,64,66,70,63,60,53,56,58,55,48,55,49,55,57,53,59,52,56,55,54,63,54,54,57,53,53,53,54,54,55,57,49,56,56,54,54,51,56,53,47,50,52,48,53,46,49,49,51,47,46,53,44,52,59,46,50,56,56,59,56,55,58,59,61,71,63,54,44,48,51,48,57,56,57,60,56,71,67,54,57,63,54,61,58,57,51,66,50,49,43,42,53,50,53,64,73,92,102,98,103,96,106,113,76,99,90,86,83,79,74,69,53,55,51,42,52,48,53,45,37,44,46,47,39,45,47,45,66,55,50,51,56,58,60,66,80,85,90,88,89,103,93,107,107,93,97,108,105,101,118,107,114,122,132,131,131,131,137,120,133,151,156,148,112,118,132,100,116,139,103,141,107,139,123,119,126,123,96,91,100,90,96,74,53,57,57,49,38,31,54,44,46,25,22,26,27,20,18,26,20,21,22,22,19,18,25,24,24,26,28,33,33,29,27,21,29,40,43,48,56,51,27,38,55,57,57,54,56,57,64,53,50,46,31,34,35,33,29,25,33,29,26,35,36,25,21,26,27,27,31,36,38,38,39,42,43,45,51,45,52,53,50,53,51,53,51,56,57,49,51,49,53,59,65,74,77,82,76,74,71,71,78,90,102,106,122,158,172,172,155,133,129,135,145,153,155,158,159,149,139,141,144,156,145,153,143,144,153,152,151,150,147,142,141,151,156,154,151,154,159,157,167,162,160,163,152,152,153,155,158,157,158,155,156,154,153,149,154,159,152,160,165,163,164,165,164,157,154,147,146,152,151,151,157,156,155,157,153,152,154,158,151,151,147,140,128,94,84,104,133,157,164,165,156,146,148,153,163,165,162,161,160,163,160,162,162,159,159,158,160,160,155,157,160,162,166,164,162,158,163,153,162,113,12,1,8,10,13,11,12,12,13,14,13,13,17,15,19,19,16,17,16,17,17,19,18,18,20,17,17,17,17,18,16,17,19,17,16,18,18,19,18,20,17,19,19,19,21,19,22,24,22,22,18,21,21,18,18,17,17,16,18,20,17,15,19,17,18,19,16,18,17,17,17,16,19,19,16,19,20,20,19,17,20,22,18,21,17,20,20,19,21,15,22,24,27,27,29,34,39,36,31,37,36,40,44,41,31,29,34,39,36,35,34,32,29,27,29,21,27,27,28,27,24,34,29,27,31,32,30,29,28,32,27,23,23,24,28,21,30,30,30,34,33,29,34,32,31,37,36,40,40,53,59,59,63,78,92,95,110,117,119,117,111,117,117,112,115,112,114,109,103,111,113,123,122,119,115,102,106,105,109,110,111,110,114,108,107,113,111,115,111,113,113,110,117,122,114,120,120,123,119,110,112,117,127,126,121,114,111,112,113,111,109,107,105,106,103,97,98,94,105,114,115,120,113,123,121,116,116,117,126,127,122,119,113,101,103,103,102,124,127,122,123,127,132,134,133,123,121,119,116,117,118,124,141,149,146,130,127,136,137,129,120,118,123,120,125,120,128,141,139,145,136,128,123,114,118,125,128,125,125,134,128,125,119,118,118,104,111,110,105,103,98,103,112,110,103,100,92,84,72,67,83,89,82,81,74,78,98,86,82,97,100,97,100,92,82,74,80,86,93,88,77,73,66,63,67,63,57,58,55,65,66,59,62,53,58,63,57,55,53,55,51,52,56,53,55,52,53,54,56,57,59,58,50,55,55,48,55,54,52,55,55,55,53,52,55,52,51,54,56,49,51,46,48,50,44,50,50,50,50,50,49,54,56,47,46,47,53,49,49,50,43,46,44,51,55,45,46,46,48,39,49,49,47,50,46,56,46,46,52,69,63,59,72,64,66,60,60,59,56,66,69,67,69,75,81,95,104,99,98,79,76,78,80,89,80,77,73,61,55,59,45,54,48,42,50,37,48,41,46,48,37,41,41,47,46,46,53,52,50,56,55,61,58,66,61,74,93,82,91,102,107,101,108,112,103,106,123,125,126,132,114,125,120,108,128,127,128,131,141,152,145,135,141,151,151,149,145,149,161,142,130,141,133,131,117,116,104,87,89,83,90,77,52,51,42,27,28,41,59,55,61,62,21,8,15,24,24,23,31,29,24,19,23,31,36,43,53,41,23,40,59,59,61,55,51,54,59,57,53,44,29,32,33,29,28,29,37,29,29,35,36,37,41,42,43,44,50,48,50,57,50,53,58,51,50,49,47,47,45,45,37,48,45,41,46,42,50,53,59,57,66,81,82,82,70,73,72,66,81,87,113,118,132,167,176,179,170,146,129,131,139,147,149,150,161,165,158,159,159,163,158,157,166,159,155,152,151,144,135,133,143,155,150,145,153,161,157,157,154,147,154,159,156,157,160,162,160,157,158,158,161,163,160,153,158,165,161,165,169,167,168,171,168,165,160,157,158,158,158,155,158,162,160,161,160,156,159,162,157,152,148,153,153,125,101,88,97,127,151,170,174,169,165,164,165,168,165,165,163,166,162,162,162,161,165,162,162,154,151,152,151,158,161,160,160,156,161,155,162,112,13,1,8,9,13,11,12,12,14,14,13,13,20,17,18,18,17,19,17,17,20,21,21,21,19,19,21,19,19,18,19,21,20,20,22,19,18,17,19,21,18,17,18,20,18,19,25,19,22,21,21,22,16,19,18,16,18,17,19,17,17,18,18,17,17,19,16,16,17,18,19,19,20,18,19,21,16,18,22,20,19,17,20,19,19,19,18,21,17,21,23,19,18,22,22,27,23,19,22,18,20,24,29,27,24,24,27,25,23,30,23,20,29,20,21,23,22,20,24,29,24,23,24,25,29,24,26,30,34,35,35,30,30,29,27,39,43,46,48,42,43,41,48,45,44,49,48,46,44,46,50,66,76,85,90,92,86,86,96,107,102,118,115,113,105,103,101,97,105,103,103,102,99,102,93,84,83,89,96,107,104,104,110,105,106,105,117,123,120,122,118,113,112,118,112,113,110,114,116,116,124,114,127,118,107,109,115,115,119,119,119,127,113,103,116,114,116,120,109,114,107,112,116,113,118,111,116,115,108,108,101,103,100,95,92,88,95,97,99,95,102,109,110,112,107,110,111,117,117,118,113,110,112,115,116,111,121,122,123,123,118,119,118,117,132,122,131,140,137,129,120,111,108,113,111,114,118,120,113,114,112,106,106,104,110,115,122,122,116,105,108,100,105,102,105,112,101,97,95,87,91,86,79,72,66,74,81,87,95,84,85,97,95,93,108,112,112,104,91,89,95,87,96,94,93,96,95,85,87,82,83,61,59,58,57,63,61,53,53,50,51,49,53,57,52,51,52,52,56,54,53,50,52,50,51,55,50,51,57,49,46,46,50,53,53,55,49,50,50,51,52,49,49,45,39,42,47,48,43,45,41,43,42,43,43,42,43,44,44,43,46,43,47,45,45,41,40,41,40,42,46,40,50,59,56,55,60,60,51,48,46,46,48,47,46,48,47,43,43,53,48,53,51,56,57,64,74,75,68,68,64,73,86,93,100,92,87,72,65,73,68,68,62,57,59,52,48,51,46,42,47,45,43,38,43,39,43,44,35,42,42,38,45,43,43,46,50,49,44,53,55,47,57,63,61,71,66,79,86,87,83,97,92,96,104,113,118,105,117,138,116,113,116,122,138,125,124,134,144,145,125,133,138,150,157,157,141,137,138,163,153,143,137,145,154,160,177,198,218,224,223,207,184,184,170,158,139,136,91,39,30,46,66,39,9,11,16,26,22,39,53,58,63,57,50,47,51,54,56,43,27,30,31,30,35,37,35,33,31,36,42,53,57,49,57,54,53,53,46,51,47,43,45,44,39,36,38,36,34,32,36,39,39,46,38,46,53,49,61,55,63,70,66,67,59,55,53,56,59,64,92,100,112,139,145,153,156,150,149,144,147,151,154,154,159,163,159,161,159,163,165,165,177,165,153,151,147,140,135,138,150,159,159,153,156,165,162,154,154,150,150,155,154,161,164,163,156,153,155,160,171,174,170,162,160,163,161,160,160,160,158,163,170,165,163,165,164,165,157,154,162,163,164,159,159,161,161,164,160,159,157,162,166,157,142,118,95,94,119,149,170,172,171,162,159,160,159,152,153,163,160,159,165,165,166,160,160,160,151,150,156,159,162,158,156,155,162,152,160,112,12,1,9,10,12,10,13,12,13,13,13,12,24,28,31,29,29,29,31,34,29,34,31,33,38,38,42,36,40,41,38,39,36,39,36,39,40,32,28,21,18,19,18,19,17,21,22,17,19,24,22,19,20,17,17,17,17,17,15,18,18,17,17,17,18,16,20,20,16,19,17,17,21,17,17,17,18,17,19,20,21,17,19,21,15,19,21,18,21,23,21,22,28,27,26,24,23,26,22,24,27,31,31,31,39,28,29,34,32,33,31,33,32,29,30,30,30,29,25,27,23,23,27,25,29,27,33,38,35,42,40,33,32,26,34,33,34,44,47,51,48,53,61,52,57,55,53,56,51,55,65,77,89,96,92,84,80,82,90,96,91,101,110,108,106,104,116,107,110,108,97,106,103,105,109,105,108,108,109,113,112,107,108,97,92,101,111,119,115,120,122,119,121,119,117,124,122,117,108,104,108,110,109,105,108,115,118,120,118,111,114,117,119,121,122,128,136,129,120,117,117,117,112,115,116,120,119,118,118,111,105,103,107,102,102,101,87,85,77,85,100,101,100,96,98,101,106,109,114,114,112,106,101,99,103,104,115,118,111,121,127,130,130,133,141,128,126,127,114,114,120,122,126,122,115,113,108,112,103,96,93,92,97,95,103,105,108,112,108,110,101,103,108,105,105,102,103,112,109,101,93,89,97,91,84,70,75,76,72,72,72,83,89,88,89,91,89,84,80,84,90,93,99,97,100,107,104,106,76,68,81,64,62,60,59,59,55,55,54,56,55,56,59,57,55,51,58,54,59,58,53,55,50,48,53,53,57,54,53,54,45,53,51,49,53,48,49,47,47,49,49,43,44,44,44,46,48,46,42,45,52,49,52,47,44,45,43,46,50,44,43,48,49,49,43,43,47,44,45,43,47,51,60,61,59,66,71,63,54,49,46,47,41,38,43,45,41,44,49,47,44,51,48,40,46,52,51,47,61,64,62,62,67,71,66,73,64,59,68,70,66,68,77,83,70,64,67,66,66,66,66,59,55,53,56,55,51,48,51,49,47,47,42,40,40,41,40,39,39,36,40,41,41,39,43,41,36,49,48,50,50,53,57,65,74,74,77,63,87,98,82,95,93,99,113,112,106,109,136,125,122,141,136,149,152,157,159,166,167,154,153,163,188,217,232,234,237,249,251,251,250,250,250,250,253,253,251,251,243,229,233,245,249,237,185,141,94,18,2,11,9,17,37,53,53,53,55,54,57,42,29,35,27,30,37,39,38,33,34,30,39,48,51,51,46,41,44,44,42,39,37,36,33,31,34,31,33,36,34,33,36,40,39,43,44,44,52,50,50,50,54,60,55,49,51,48,46,49,56,52,67,81,92,108,110,125,138,157,168,158,155,157,160,160,161,159,157,160,159,166,168,174,175,155,150,149,145,141,141,147,154,160,156,153,157,168,171,169,164,157,152,144,149,158,159,160,152,152,152,155,163,166,164,159,160,163,165,163,154,148,153,159,158,164,161,160,168,163,159,157,163,166,162,163,162,158,158,159,158,157,159,165,165,159,162,145,123,101,88,108,134,153,160,160,158,154,151,148,144,149,152,156,163,166,166,166,168,163,160,160,160,165,165,163,160,155,158,149,160,113,12,0,10,10,12,10,13,12,12,14,13,12,59,58,62,59,58,63,62,66,63,66,71,71,76,79,87,89,93,105,107,113,111,110,109,108,92,69,54,42,31,28,19,16,18,16,23,18,19,21,17,22,20,16,20,16,16,16,16,17,17,17,16,17,16,17,17,18,19,17,17,17,17,18,21,18,18,19,17,15,22,20,20,21,19,16,20,19,20,21,21,26,26,22,25,29,31,22,28,32,29,39,46,47,42,41,39,36,43,42,36,34,30,30,31,27,24,24,22,18,23,21,24,18,21,29,28,28,23,29,26,25,30,27,31,29,24,35,38,37,47,45,56,45,38,38,41,42,45,50,62,69,75,75,46,49,67,72,77,84,83,89,87,78,83,96,104,103,108,101,105,107,105,116,118,118,119,125,122,124,121,117,120,118,118,114,110,112,112,117,121,114,119,118,115,127,120,114,112,107,111,114,117,118,113,110,113,110,108,113,115,125,123,119,125,122,125,128,120,128,120,121,118,116,125,114,117,117,122,125,126,124,120,115,109,102,89,89,103,113,123,127,122,113,108,113,116,127,122,126,124,126,131,129,123,118,113,116,109,117,131,137,140,142,130,121,114,108,109,114,121,121,114,122,128,125,120,114,106,105,110,106,107,96,93,101,100,98,103,104,91,92,90,93,97,105,99,101,99,92,92,79,83,73,66,77,81,66,71,83,59,84,81,51,49,50,58,58,65,54,61,68,80,61,60,63,77,87,59,53,59,59,71,59,56,60,61,61,64,59,55,57,65,69,57,59,62,59,63,50,55,55,52,57,54,57,55,53,57,54,50,46,51,53,52,50,51,49,49,49,48,42,43,48,42,44,44,43,49,48,51,57,55,51,53,54,50,54,48,48,52,46,55,54,44,50,46,36,48,43,40,50,46,40,46,45,55,46,43,41,44,46,42,41,44,42,37,45,46,45,48,50,44,39,40,46,56,57,61,57,57,57,54,47,49,63,51,51,57,59,63,63,65,66,69,65,68,69,71,77,77,76,69,70,67,65,72,63,63,63,60,61,53,55,49,48,52,51,45,46,47,44,43,42,41,37,34,35,39,39,37,36,39,42,53,43,53,45,45,55,48,57,59,59,68,73,75,77,91,93,91,101,124,125,136,139,127,134,145,156,179,209,235,247,251,245,248,251,252,252,252,252,252,252,252,252,252,252,253,253,253,253,252,252,252,252,244,187,75,26,5,5,13,9,21,37,47,57,54,48,38,34,35,29,39,35,31,35,35,38,34,39,41,38,34,32,37,37,32,32,34,33,30,32,31,29,35,35,38,39,39,39,41,37,37,51,45,42,48,45,56,58,54,57,49,49,49,53,53,56,66,78,89,81,93,113,137,160,165,155,156,147,151,157,167,160,155,153,156,163,174,167,173,158,151,154,155,151,157,153,153,162,159,154,161,169,179,174,166,158,146,147,150,152,155,154,155,156,160,157,158,157,155,153,155,164,168,164,154,152,153,155,158,158,158,160,163,160,155,155,158,160,161,158,160,157,155,153,153,156,157,155,153,156,152,156,152,124,96,81,103,127,148,162,165,162,159,150,149,149,150,154,157,161,160,165,164,161,159,158,158,158,162,160,162,155,159,152,157,112,12,1,9,9,13,11,13,12,13,13,13,13,131,126,125,124,131,134,134,139,136,144,145,156,160,162,171,171,177,174,174,171,162,162,148,150,139,98,66,45,33,25,22,18,13,17,17,15,19,18,17,22,16,18,18,14,17,18,15,15,15,16,16,15,18,16,17,17,17,18,20,19,16,17,17,18,17,19,18,17,18,16,18,17,17,18,19,18,19,18,18,21,18,19,23,24,21,22,20,27,36,35,42,40,34,31,28,27,29,31,26,25,20,24,29,23,22,23,26,24,23,22,22,31,27,27,29,22,32,33,29,33,41,41,37,40,38,44,47,52,52,53,57,59,55,47,44,38,33,40,54,57,64,62,64,66,65,71,72,83,87,86,81,78,80,87,101,95,94,93,102,110,111,110,113,119,120,126,122,120,118,118,124,126,121,112,107,105,106,113,115,111,111,113,110,110,113,116,114,122,126,122,125,112,113,114,111,112,107,110,117,113,117,114,109,115,115,117,121,118,118,118,121,123,119,113,107,112,118,125,128,131,130,128,131,124,120,127,129,128,125,125,120,120,124,125,125,127,125,124,123,120,125,123,117,114,117,113,107,110,109,109,109,115,117,111,109,99,94,110,117,113,114,116,113,115,109,107,112,117,118,111,113,106,106,97,92,95,97,95,84,84,78,88,101,100,99,99,105,98,91,75,81,83,87,94,99,95,99,102,94,92,82,77,78,76,75,75,62,58,59,50,65,54,49,59,57,73,60,54,56,54,66,59,54,61,66,66,60,54,51,60,57,56,57,57,57,53,57,54,50,61,52,50,54,50,53,52,52,55,53,53,56,53,52,49,51,49,51,47,52,50,49,50,49,47,46,45,48,51,46,47,47,48,46,49,43,45,45,42,46,43,47,51,44,44,45,46,42,38,46,42,43,42,37,41,42,40,44,43,36,45,45,39,41,49,42,35,45,40,42,45,39,42,43,45,47,48,46,42,46,52,50,41,38,49,44,46,46,52,57,49,48,50,53,51,51,46,53,56,53,54,53,54,55,55,56,54,55,60,63,64,66,69,60,64,68,59,56,56,62,60,56,58,46,47,45,49,47,50,49,40,43,41,45,46,43,40,41,44,39,45,41,43,47,46,48,48,53,46,56,55,53,66,72,83,88,96,115,136,160,180,194,205,208,210,217,226,233,240,253,253,253,253,252,252,252,252,253,253,253,253,252,252,252,252,252,252,214,178,150,117,55,7,6,11,10,19,39,52,46,39,34,32,32,40,39,35,33,34,36,29,34,31,27,36,29,34,36,33,33,33,35,34,36,35,42,39,37,42,34,39,41,36,41,43,42,39,49,53,55,57,57,57,57,55,54,60,54,60,69,71,73,75,85,104,130,145,141,133,134,140,141,150,159,163,154,146,151,154,151,153,152,146,150,151,154,158,153,153,152,153,159,158,159,160,162,165,153,149,150,149,159,160,155,157,156,163,168,163,157,155,152,155,162,163,169,169,163,163,160,158,156,159,162,159,161,159,153,154,152,151,152,157,155,151,151,150,157,159,160,159,156,155,160,159,163,155,131,108,92,101,125,150,172,178,174,166,160,160,155,152,156,155,149,153,155,153,156,151,148,150,155,159,159,154,156,150,159,112,13,2,9,10,13,11,12,12,13,13,13,13,137,135,139,138,145,146,146,151,147,157,165,162,165,163,162,154,148,139,112,106,95,75,57,45,43,42,41,30,26,21,18,16,15,18,15,17,21,17,17,18,20,17,16,19,20,16,15,16,17,17,16,17,17,17,16,17,19,19,16,16,18,17,19,19,17,17,17,17,18,16,20,21,17,17,20,20,19,22,24,24,27,33,27,24,29,26,28,30,36,38,38,33,30,30,31,33,31,33,39,36,31,37,32,27,31,33,34,32,27,35,44,40,35,35,39,40,38,34,46,46,47,46,43,35,41,45,47,58,61,68,67,65,52,55,47,45,50,54,69,65,88,92,93,101,94,96,95,97,99,103,101,107,112,111,112,112,105,107,109,119,117,116,110,112,110,113,113,112,116,113,115,121,109,107,105,107,113,113,113,107,112,115,118,118,117,122,122,120,129,120,114,112,112,117,121,118,114,120,117,115,115,112,119,117,122,122,112,118,115,119,118,114,117,109,113,115,117,122,124,123,123,122,120,118,118,118,118,115,116,117,110,123,126,123,124,117,118,120,111,114,115,112,115,122,117,115,108,104,103,103,106,106,108,101,101,99,102,109,115,119,106,105,111,104,107,114,115,121,117,116,118,117,114,107,107,105,100,92,99,89,92,97,102,102,96,113,113,98,102,95,97,94,93,92,99,120,123,116,113,109,105,107,91,100,79,79,83,78,71,56,67,70,69,76,64,73,71,59,56,56,58,53,54,64,55,61,59,50,57,56,56,55,49,59,56,55,54,47,54,54,51,52,48,50,52,52,49,51,57,56,57,51,46,53,53,49,55,53,52,54,51,53,48,47,49,48,48,51,48,48,48,41,42,41,43,44,40,44,44,45,44,38,48,43,39,41,42,41,45,41,39,43,42,40,42,39,40,44,42,41,44,44,46,40,44,42,39,39,39,42,42,46,41,41,39,45,47,42,39,39,45,41,39,39,41,41,38,39,40,43,43,36,42,41,39,42,40,47,42,45,43,46,50,42,52,47,53,57,57,60,56,61,62,59,64,61,63,63,65,65,59,69,62,61,69,70,76,68,57,59,56,54,57,60,57,49,56,58,50,45,44,49,50,47,44,44,45,47,44,48,50,45,49,45,42,53,54,61,68,83,89,94,103,106,128,137,141,160,176,187,204,224,239,251,252,252,253,243,253,253,234,252,252,252,253,253,251,251,252,252,234,134,125,25,1,9,10,19,33,37,38,34,40,38,38,38,36,35,35,35,29,38,35,35,41,33,34,37,39,37,37,37,31,35,35,34,37,34,36,36,36,33,35,35,35,41,42,45,51,51,48,54,57,56,52,56,54,53,58,54,60,55,74,80,96,96,110,130,142,150,136,140,137,146,142,130,133,129,121,127,129,128,146,132,144,146,147,143,149,153,154,151,151,146,149,146,143,147,144,148,155,155,160,159,162,150,156,157,162,159,146,146,151,160,164,165,166,161,156,151,150,152,159,162,159,160,154,158,162,155,155,159,160,158,152,150,153,152,155,160,158,160,161,163,167,162,160,138,113,96,89,120,147,169,182,174,170,162,159,156,153,154,148,152,156,156,155,151,149,148,148,153,158,153,158,150,159,113,12,1,10,10,12,10,13,12,13,14,13,13,125,126,128,132,134,129,128,126,126,133,120,118,100,89,79,65,54,41,35,32,31,34,27,26,29,29,27,21,24,17,18,18,16,17,21,21,21,19,19,17,17,21,16,15,18,17,18,16,16,19,17,17,17,17,19,18,18,17,20,20,17,19,20,17,17,17,16,18,19,23,16,16,18,18,22,22,21,26,29,34,43,37,36,37,40,40,47,46,48,46,44,43,41,43,37,36,42,39,33,36,34,30,28,28,33,31,31,31,34,34,40,44,40,38,31,33,36,33,40,46,47,44,39,35,41,44,41,45,47,53,51,53,48,53,56,59,62,60,86,101,114,121,121,119,120,123,118,116,115,122,126,126,121,114,124,121,122,121,117,115,119,118,118,112,110,113,112,118,118,117,122,122,121,116,118,123,119,117,118,117,117,122,126,122,115,118,122,131,126,118,122,122,128,124,123,122,121,127,125,119,129,130,130,131,125,131,129,128,130,119,120,123,128,127,125,125,124,125,125,129,122,114,115,116,122,121,122,123,122,119,114,124,122,117,118,120,119,125,129,127,123,127,138,136,134,129,123,127,129,130,122,123,113,111,116,118,124,127,125,117,113,111,113,118,117,117,120,124,119,114,117,114,121,118,120,123,117,114,116,117,115,120,113,105,103,103,98,93,106,105,103,95,87,79,87,88,92,89,91,103,98,95,94,89,74,85,94,94,89,60,72,68,64,68,65,71,61,62,60,63,68,67,66,60,65,58,49,56,51,57,57,55,56,55,63,53,56,54,53,60,49,55,55,55,61,50,51,51,55,57,50,51,49,47,51,50,52,51,51,56,50,47,48,46,52,51,46,49,49,48,44,44,46,46,48,48,45,43,42,43,44,40,40,45,43,40,43,44,42,38,46,52,45,42,40,44,45,46,44,42,42,46,47,44,43,50,47,42,42,44,44,42,39,44,48,46,48,47,46,46,47,44,43,45,40,41,39,42,42,40,40,39,41,39,42,44,46,44,42,47,43,46,39,43,46,46,45,47,54,45,45,50,47,57,60,57,57,51,57,58,57,67,56,53,59,58,69,63,60,61,60,63,57,61,61,62,72,73,61,60,67,66,65,67,60,59,56,53,55,50,54,53,53,61,57,53,51,45,46,43,53,53,42,48,50,45,46,53,65,67,85,103,118,138,151,186,185,156,187,228,239,250,253,253,253,253,253,253,252,252,252,252,242,221,156,84,21,2,9,11,24,24,26,31,33,32,24,29,34,30,36,37,34,42,37,36,34,41,40,33,40,37,33,34,33,31,38,37,33,30,30,33,31,31,39,39,36,38,44,48,44,51,51,50,53,57,53,51,49,53,54,53,62,58,69,84,105,127,154,158,136,139,140,149,145,131,134,127,123,130,139,143,150,150,154,158,153,150,148,148,149,146,147,143,144,146,141,147,146,144,148,150,145,148,149,139,145,152,150,150,140,139,148,153,160,152,152,157,157,160,158,157,162,159,161,162,156,162,163,162,160,166,167,160,157,152,155,151,157,158,159,158,160,165,165,168,163,160,139,114,91,84,109,137,163,169,170,170,164,156,152,153,154,161,163,161,163,162,159,155,156,153,158,157,159,154,164,114,12,0,9,10,13,10,13,12,12,14,13,13,118,123,119,113,110,96,78,76,73,49,46,37,39,31,32,35,29,29,24,29,22,27,28,23,24,22,28,27,20,26,22,17,21,21,21,18,21,19,19,19,16,17,20,18,16,18,18,18,16,16,17,20,20,16,17,21,19,18,19,15,18,19,16,21,19,20,19,16,21,17,18,21,17,16,18,20,20,25,21,28,30,28,32,33,28,28,38,22,31,33,28,37,29,29,31,28,29,29,27,28,33,37,31,29,31,31,32,39,38,44,44,40,39,29,35,33,29,31,27,24,27,37,25,29,34,28,34,33,42,40,46,49,44,47,46,53,56,59,68,81,88,102,111,122,117,115,122,119,125,122,113,117,112,118,114,112,115,118,123,124,128,126,124,125,123,123,122,122,125,124,125,123,120,125,121,125,127,122,122,119,126,130,132,127,119,126,128,124,128,126,128,128,128,128,126,122,124,123,118,113,126,122,128,124,122,123,124,122,119,119,126,133,128,127,131,127,128,122,121,120,122,119,118,124,128,128,125,118,122,123,119,123,117,124,127,125,129,130,127,125,122,128,127,125,128,129,125,128,128,128,126,121,126,127,133,127,134,130,116,113,107,116,118,124,119,118,121,117,114,98,106,107,109,113,116,113,106,114,112,115,106,106,95,90,89,89,78,90,91,79,75,91,83,75,68,68,80,81,93,99,75,95,93,80,70,82,92,74,78,60,70,55,51,59,51,60,50,54,57,62,60,66,58,61,64,55,56,53,55,55,58,54,55,57,53,58,66,56,59,57,57,59,54,61,56,53,55,53,54,56,51,49,51,49,55,49,50,50,55,56,44,53,50,45,53,48,44,49,47,43,48,51,44,45,53,51,47,45,45,44,44,42,45,44,45,43,39,44,45,46,43,46,47,44,44,44,47,48,44,40,43,47,41,45,47,43,45,43,46,48,48,45,42,44,43,45,46,45,43,41,43,41,48,44,42,45,42,40,38,45,42,40,46,38,39,46,42,43,45,43,41,48,44,40,42,39,42,43,47,45,36,47,47,43,49,45,50,40,42,47,42,54,47,42,40,48,50,47,55,51,53,48,45,51,54,54,64,65,65,66,59,62,65,67,68,62,61,62,61,64,71,75,81,76,76,77,70,77,61,66,90,95,80,68,78,76,58,66,71,59,55,46,46,48,57,91,78,37,56,132,156,174,213,250,251,250,240,253,253,253,253,253,252,252,246,246,208,145,59,5,5,10,12,14,12,14,14,22,36,34,35,41,36,36,38,36,39,32,34,34,30,39,33,27,33,33,34,33,30,27,32,27,31,33,34,39,34,37,39,44,51,49,43,49,49,57,62,59,57,52,56,65,70,65,76,85,89,92,125,134,141,135,145,134,120,125,133,148,152,163,164,169,164,166,171,169,154,158,156,157,154,157,163,156,160,156,150,149,150,149,145,142,145,144,144,142,151,157,156,153,157,153,150,154,149,144,150,154,158,163,163,155,158,161,159,159,152,153,160,158,162,166,161,152,153,158,158,162,156,157,161,159,163,166,168,167,165,160,156,148,121,101,88,101,130,154,171,171,170,164,156,153,156,165,165,166,165,167,165,163,162,160,161,159,166,159,165,113,12,1,9,10,12,10,13,12,13,13,13,13,92,86,69,53,44,38,35,31,28,28,23,28,25,23,26,27,26,22,23,25,26,27,27,26,24,22,25,28,26,19,20,21,18,21,22,20,22,22,15,19,21,15,21,20,17,17,17,17,19,18,17,18,19,16,21,19,17,19,15,17,18,21,18,18,19,18,18,19,22,17,21,19,18,19,17,24,23,20,21,27,23,21,26,22,19,20,29,22,22,24,29,26,27,31,27,33,30,28,27,25,34,36,29,33,34,33,34,34,41,36,37,31,26,36,35,38,31,33,39,32,36,34,34,26,27,31,34,36,42,44,41,40,45,47,48,54,56,54,54,52,63,84,102,105,98,96,100,106,108,107,109,115,113,116,117,113,113,119,125,123,120,113,117,117,122,120,123,125,125,123,121,120,122,127,121,123,127,125,124,122,122,123,130,129,122,122,115,117,121,124,127,123,119,118,118,116,117,117,111,105,107,104,102,105,101,103,106,116,117,117,133,130,130,131,136,131,126,122,113,118,114,110,114,113,116,115,114,114,113,105,112,122,121,126,124,123,123,124,122,118,112,114,120,117,125,122,122,116,107,111,106,122,123,125,127,126,127,114,117,115,117,124,123,121,112,101,101,109,110,108,113,123,126,119,111,113,112,108,116,102,91,93,82,91,93,93,95,97,94,89,94,97,90,84,81,76,83,90,89,89,99,101,94,73,53,57,54,57,71,53,54,42,45,56,50,52,50,56,48,52,56,50,61,61,67,62,51,57,53,60,55,60,60,55,59,48,57,55,54,55,48,53,49,56,56,51,56,51,55,54,54,54,61,56,50,60,54,52,54,53,54,50,49,54,48,50,50,50,49,48,53,48,51,50,48,47,48,49,45,44,44,43,48,47,39,45,42,40,44,43,43,36,46,40,40,47,42,44,41,42,38,44,43,43,44,42,45,41,43,46,44,45,44,40,40,42,41,35,38,46,39,41,43,40,43,40,38,41,42,39,39,41,41,44,44,40,40,40,40,44,37,46,40,39,43,40,40,36,41,44,41,41,43,36,38,41,35,42,38,39,40,39,37,34,41,36,36,50,46,42,45,43,43,41,46,48,51,55,55,50,49,58,53,54,57,66,71,71,63,61,74,76,78,68,73,77,72,74,79,92,110,115,116,116,127,131,137,153,154,142,127,107,112,108,107,120,72,22,27,64,92,107,125,160,201,175,182,250,253,253,252,252,253,253,252,252,250,250,229,205,200,190,195,200,194,193,178,204,171,45,29,39,28,35,29,30,36,29,31,36,32,32,29,29,32,30,33,30,30,24,24,27,33,31,31,37,37,37,35,39,47,50,45,44,50,59,64,64,62,61,62,71,83,76,90,89,84,101,117,116,114,124,117,105,97,117,140,152,160,163,169,171,174,163,161,165,155,155,163,164,160,167,170,169,165,157,151,151,153,158,155,154,153,155,153,148,160,162,162,177,176,170,165,155,152,146,150,151,154,163,159,158,159,163,160,160,155,153,158,158,158,159,155,153,152,155,161,161,164,160,158,154,160,168,166,166,163,164,160,160,156,132,108,89,96,125,154,170,170,165,162,157,158,168,170,169,163,165,165,159,163,155,160,159,162,158,163,113,12,1,9,10,13,10,13,12,13,13,13,13,34,31,35,24,24,27,19,24,20,22,21,19,25,21,21,29,28,24,29,24,22,23,28,27,25,27,24,27,29,19,17,18,17,17,21,21,17,18,17,19,17,17,18,18,19,19,17,20,19,17,18,19,18,16,19,17,17,17,17,20,19,17,19,23,16,18,19,19,18,21,22,18,18,21,19,20,22,20,24,20,21,26,21,27,30,27,33,38,35,33,34,36,39,41,35,37,36,35,28,29,29,30,32,36,29,26,35,27,31,29,36,35,29,36,33,35,46,44,46,49,45,45,37,38,34,36,40,38,45,47,53,55,60,67,72,76,75,75,85,89,103,108,120,103,104,99,104,110,99,107,101,112,112,118,117,112,122,118,118,117,110,107,111,115,118,118,121,122,117,114,116,118,119,124,118,115,109,108,110,105,108,108,103,100,101,98,107,109,108,112,119,123,117,108,114,116,121,116,110,109,104,110,112,112,115,116,110,112,110,127,127,122,122,131,132,130,132,122,124,124,125,124,120,122,122,123,122,115,108,109,115,127,122,113,118,125,124,125,124,117,123,127,127,129,126,125,120,106,103,112,115,125,128,120,122,118,115,116,123,129,126,126,118,106,101,109,110,114,122,125,129,120,114,115,125,125,112,110,103,104,108,116,127,122,119,121,113,119,113,105,107,111,115,100,97,83,78,87,91,100,85,83,84,71,50,55,64,78,72,72,63,49,51,61,56,57,57,65,56,56,55,50,55,66,62,56,60,56,58,57,60,56,57,60,54,56,52,57,58,53,56,53,57,57,50,56,58,63,55,55,57,66,60,53,62,56,60,55,52,53,53,55,47,54,51,50,54,49,52,51,50,53,55,54,47,49,49,44,46,49,43,49,42,44,47,44,43,39,43,46,42,43,44,35,43,41,39,44,38,42,40,45,48,41,43,41,44,39,39,42,42,41,41,43,41,39,40,43,40,38,38,42,40,40,42,42,39,39,38,38,44,44,39,37,38,39,42,40,36,42,44,35,39,42,39,37,40,36,36,42,39,42,38,38,41,38,39,35,40,42,33,37,40,33,34,39,34,40,35,36,43,35,40,38,41,43,41,46,48,45,48,54,52,54,59,63,62,54,53,60,60,65,66,59,66,69,79,75,75,89,101,121,125,124,135,136,152,156,155,153,133,159,182,169,165,154,131,98,120,129,141,125,97,97,139,76,117,210,251,252,252,252,253,253,252,252,253,253,252,252,252,252,252,252,252,252,251,251,226,67,32,33,25,31,26,29,28,27,29,29,29,29,26,27,25,28,24,29,27,22,27,24,24,33,24,30,34,29,36,37,37,42,42,42,43,52,60,62,61,55,57,63,75,70,93,85,109,122,112,107,109,121,110,123,112,122,131,146,149,147,152,158,155,150,151,155,160,170,179,173,163,160,162,158,155,151,151,152,158,165,161,162,163,157,157,151,155,153,158,175,174,170,162,159,156,152,155,154,153,155,159,159,161,165,162,161,159,159,159,154,154,149,155,152,154,155,157,161,161,161,153,149,157,155,158,160,160,159,157,164,162,162,136,106,87,93,121,143,162,166,166,164,160,167,165,163,158,156,160,159,156,156,160,158,160,151,160,113,12,0,9,9,12,10,13,11,12,14,13,13,21,19,24,22,22,20,20,24,23,24,21,24,25,23,24,23,27,25,24,24,22,26,25,29,27,21,25,23,20,24,21,15,21,20,16,18,17,19,20,17,17,19,17,18,17,17,21,16,17,16,17,18,17,20,17,17,18,18,19,19,21,18,20,19,18,19,19,20,19,19,20,18,22,23,16,22,18,23,23,22,29,26,29,33,34,36,39,33,32,26,27,30,34,33,40,39,37,43,39,41,45,49,43,43,42,35,37,41,43,44,45,46,43,40,43,48,48,45,46,47,43,42,50,49,47,45,49,40,42,50,53,61,73,85,96,100,98,99,95,103,101,98,96,101,117,118,121,117,126,122,117,116,111,115,113,117,114,114,118,117,110,104,110,120,125,120,122,118,113,116,116,116,116,119,117,114,110,105,103,110,112,108,111,113,113,117,119,119,118,119,123,122,117,115,115,119,127,127,124,120,125,125,124,127,125,120,116,110,107,113,121,113,111,114,115,114,109,114,122,130,132,130,127,129,131,125,129,127,123,122,123,127,119,122,125,122,125,127,125,125,129,131,128,127,131,127,128,123,118,129,127,131,127,126,119,118,117,117,129,119,125,124,108,110,117,113,116,115,116,113,105,109,111,116,115,115,112,108,116,118,119,130,129,132,128,123,125,131,122,111,112,111,110,110,109,97,90,88,77,71,68,67,80,77,85,95,96,98,93,93,84,65,66,66,69,70,73,80,58,58,51,45,61,58,59,59,62,61,60,63,57,57,55,55,60,57,59,57,58,62,61,58,54,57,55,56,55,54,53,50,56,53,57,53,46,53,49,50,51,48,55,50,48,53,49,47,47,52,50,51,52,47,49,49,47,49,49,47,46,48,43,44,45,47,44,44,45,45,47,45,45,46,42,41,48,42,41,44,45,42,43,46,43,43,40,42,44,43,41,38,39,43,41,43,49,46,46,45,40,44,44,46,43,45,44,42,40,40,45,41,45,40,39,42,38,40,40,34,40,44,37,41,38,34,38,36,38,38,36,37,39,43,41,41,45,44,44,38,38,41,38,41,40,34,41,41,35,40,36,33,36,39,38,42,42,42,41,46,50,40,46,49,46,51,53,55,49,51,54,53,51,55,58,54,62,70,75,68,67,81,91,100,113,113,115,116,106,106,105,113,122,137,162,150,168,172,142,143,160,188,189,150,113,109,104,57,72,133,148,178,217,248,253,253,252,252,253,253,252,252,253,253,252,252,253,253,252,252,226,44,21,21,12,31,21,27,27,30,28,29,27,27,24,27,27,21,30,27,27,23,21,27,28,27,29,28,28,33,30,32,34,33,31,36,39,42,50,52,52,54,57,54,64,62,67,79,97,117,105,104,115,134,161,150,133,137,132,136,143,137,133,130,139,153,166,168,165,170,171,166,154,153,157,162,160,157,158,159,159,162,163,161,159,160,151,154,154,145,154,160,160,153,153,157,155,152,156,150,148,150,151,156,156,160,157,162,159,155,159,155,150,151,150,155,155,151,154,153,160,165,161,150,148,150,144,151,155,155,154,158,162,156,152,132,101,86,88,108,137,160,170,170,164,162,160,158,153,157,158,157,163,158,162,160,160,154,160,113,13,1,9,10,12,11,14,12,12,14,13,13,19,17,20,19,24,22,22,22,24,23,18,22,22,24,25,23,25,27,24,25,25,22,27,27,26,26,29,24,19,22,16,18,18,19,22,19,21,16,19,22,17,17,17,19,16,17,17,19,21,17,16,18,17,19,18,17,20,20,17,20,21,17,21,18,18,19,18,22,18,17,16,19,18,19,19,18,22,21,22,27,32,29,25,29,31,29,28,25,26,19,25,28,33,33,30,27,29,37,35,37,36,36,37,33,38,38,31,46,50,46,45,39,35,43,40,40,39,36,35,36,38,41,39,47,46,45,47,46,48,42,39,42,59,71,80,91,94,100,95,98,103,102,108,111,119,113,121,108,124,125,121,123,125,120,124,120,116,114,110,116,119,121,114,122,125,116,120,123,122,124,125,124,121,127,122,123,121,123,122,118,125,130,131,129,130,127,130,126,123,123,119,123,120,115,122,116,118,116,124,127,118,120,119,118,120,121,126,123,123,120,122,118,116,118,115,119,124,116,120,119,122,122,122,122,122,126,128,128,124,125,122,126,126,120,123,125,125,125,122,121,123,122,124,128,125,129,135,131,131,130,132,127,125,122,119,120,121,122,119,112,108,110,115,117,117,116,107,98,103,106,107,111,110,103,101,101,105,111,110,106,101,98,104,106,105,108,103,104,93,90,90,92,96,105,107,112,108,102,92,79,85,82,92,97,102,99,103,98,88,97,94,95,100,104,103,100,93,95,80,89,80,74,79,61,65,61,66,59,54,60,56,53,56,55,56,60,55,56,57,62,60,56,59,59,54,54,57,54,54,49,53,53,50,50,48,51,51,51,49,50,53,55,50,50,49,46,52,49,47,47,50,48,48,48,42,47,44,46,50,46,46,47,49,47,49,43,46,47,37,48,43,44,49,41,48,43,44,41,40,44,41,43,42,43,48,41,45,42,36,40,41,39,44,42,41,48,39,38,44,41,42,47,46,43,39,39,42,42,41,39,46,41,38,41,39,42,41,41,42,40,39,42,36,36,37,39,41,37,40,39,39,38,43,38,40,44,37,36,39,38,41,45,41,36,36,39,36,33,36,37,38,36,38,40,40,39,39,40,37,44,45,42,42,45,46,44,47,50,43,46,47,52,50,49,57,53,61,57,58,68,64,66,83,85,59,63,73,82,101,98,100,105,124,126,142,132,108,122,140,165,160,149,172,192,146,101,97,116,105,103,117,141,178,208,233,249,253,253,252,250,253,253,252,252,253,253,252,252,209,38,16,13,12,29,17,30,27,24,26,27,25,31,29,25,26,22,28,23,24,27,24,22,21,26,29,29,35,32,28,35,39,32,29,33,35,42,39,47,52,54,57,52,57,59,51,60,74,89,98,114,130,145,164,138,122,125,130,145,156,151,142,137,143,153,166,160,154,160,164,146,154,151,158,164,165,164,165,161,158,155,153,158,161,160,161,164,160,153,152,155,152,147,146,146,150,148,146,146,143,141,142,146,148,150,151,156,157,155,157,157,152,152,155,158,158,156,156,157,160,160,160,156,154,151,151,151,151,155,155,155,153,157,151,150,133,114,93,81,103,130,154,164,164,162,156,154,150,152,156,161,163,163,165,161,169,159,166,113,11,1,9,10,13,11,12,12,13,13,13,13,21,20,22,21,24,24,18,24,22,24,20,19,22,24,26,24,26,24,24,28,23,26,27,23,23,24,27,20,21,19,17,18,17,19,19,19,16,21,19,20,18,18,19,18,18,17,17,17,17,19,19,18,20,19,18,17,16,17,16,21,18,17,21,19,21,21,18,18,17,20,18,19,22,19,18,18,19,23,22,28,29,23,24,27,23,27,28,23,22,22,25,22,22,29,25,27,31,31,30,30,27,29,27,21,27,28,24,29,30,36,35,36,32,29,29,30,34,30,35,42,36,41,38,38,45,42,47,45,42,42,42,39,47,55,61,76,81,93,93,99,106,111,118,117,116,102,99,99,107,112,122,130,130,128,124,125,117,125,127,127,130,130,122,120,122,128,128,123,120,127,129,122,121,128,130,128,131,132,128,123,125,127,124,126,125,122,125,122,122,125,123,122,126,126,128,127,122,121,124,124,130,134,125,124,129,134,138,137,139,137,134,129,128,137,136,134,131,132,134,128,131,129,128,128,124,121,125,122,122,129,129,129,121,124,130,124,128,129,128,125,125,124,126,137,134,130,135,133,129,137,134,130,126,122,121,126,127,122,124,122,118,116,120,122,128,117,116,114,117,117,115,126,118,115,114,117,113,109,104,97,93,98,104,103,104,98,88,86,86,89,81,84,93,101,108,111,112,113,109,107,105,97,103,104,107,102,105,102,86,103,118,116,123,118,118,104,102,106,102,120,110,107,102,80,74,61,57,55,59,55,54,59,56,59,54,63,57,55,67,64,56,53,56,53,55,57,53,57,55,51,53,52,54,50,53,57,51,55,52,51,53,50,51,48,53,52,51,50,48,54,50,47,49,45,47,49,51,50,40,49,51,46,50,46,47,48,47,47,42,40,43,46,47,43,45,40,43,45,42,45,42,43,40,45,44,42,44,44,43,40,39,43,36,43,45,42,44,39,46,42,37,45,42,37,43,42,36,39,41,38,41,37,37,39,42,39,37,42,36,40,40,39,40,38,41,41,42,36,39,40,39,41,35,37,39,36,35,38,36,41,41,42,39,39,44,38,39,36,39,38,34,38,38,39,34,37,42,35,35,40,43,36,42,39,39,46,42,53,44,40,49,45,48,48,53,45,46,56,51,55,54,49,57,61,49,48,53,72,72,86,82,72,89,95,114,92,97,108,112,132,125,152,190,209,179,125,135,161,145,114,86,74,79,98,127,148,189,204,190,198,202,198,197,199,204,210,222,251,179,19,15,9,18,29,21,23,26,29,27,28,29,26,21,25,27,31,29,24,24,22,26,27,23,32,31,30,32,35,34,32,39,32,36,35,36,41,39,50,51,51,51,52,53,51,54,61,61,75,94,120,133,139,139,113,103,110,129,152,165,165,157,155,155,155,151,145,146,155,161,162,159,151,152,162,166,167,168,160,156,148,153,153,152,161,156,161,159,160,164,159,160,154,152,151,147,141,147,148,155,155,147,148,146,143,139,143,146,148,150,153,155,160,160,157,156,158,163,160,162,164,165,161,162,162,155,158,160,158,155,159,157,159,160,160,158,151,128,98,85,95,124,148,159,167,165,159,152,153,152,155,157,156,161,160,164,159,169,114,11,1,9,10,13,11,12,12,13,14,13,13,28,21,20,20,24,21,19,23,24,24,23,21,22,27,23,24,24,26,24,21,21,24,28,26,31,28,26,24,19,18,18,18,19,20,19,19,17,21,20,16,19,23,16,20,21,17,19,17,19,16,19,20,17,18,17,18,19,18,21,19,16,21,18,19,20,19,21,18,18,21,19,16,19,22,17,20,22,24,25,24,21,25,24,25,28,27,27,27,26,29,26,23,26,22,24,28,26,24,29,24,27,27,24,25,25,31,28,27,36,35,35,34,32,31,28,29,31,35,37,37,38,38,42,48,42,41,48,43,50,42,40,46,44,51,55,53,64,69,79,89,90,93,102,105,112,108,116,122,127,129,128,126,123,115,111,111,116,120,117,124,126,127,130,124,126,119,118,120,121,125,124,124,130,129,132,133,128,129,126,118,115,123,128,130,134,128,128,131,132,137,128,133,128,121,131,127,129,134,126,137,133,131,114,120,128,136,132,137,134,140,143,138,131,141,136,139,145,146,146,139,132,133,131,130,125,127,124,123,128,129,121,129,136,135,137,130,131,133,134,139,138,136,137,139,129,130,136,132,135,132,136,129,131,139,133,136,135,141,145,147,141,132,127,132,128,128,127,127,119,125,128,131,122,112,114,113,118,118,117,113,121,129,133,133,122,113,118,103,125,110,101,110,107,112,106,95,89,84,99,106,102,92,100,110,107,105,112,108,84,107,93,81,70,84,85,84,89,81,84,95,88,115,82,62,69,67,71,82,71,81,86,78,69,60,62,62,61,61,53,53,49,55,54,51,54,52,51,47,61,55,53,54,52,57,56,54,58,53,51,53,51,53,51,55,51,51,49,50,49,50,52,45,48,46,45,51,44,42,47,47,46,46,48,43,48,46,44,51,41,38,43,43,46,44,45,45,42,44,45,42,41,46,44,42,38,45,46,38,43,42,40,42,42,41,44,42,40,44,45,39,37,41,36,39,46,41,40,37,34,39,38,36,41,37,42,40,38,41,34,40,39,36,39,39,37,38,40,35,38,39,37,39,34,42,38,38,38,35,37,39,40,41,39,37,39,41,38,39,38,33,37,38,37,39,33,36,38,37,35,40,38,36,41,39,41,42,44,45,39,46,41,34,49,45,43,43,36,42,45,47,47,44,46,51,51,53,57,53,66,67,62,57,59,73,77,60,74,78,82,101,101,126,153,169,149,119,134,171,165,164,142,125,92,128,150,131,147,139,138,125,99,90,97,98,101,107,126,177,93,16,24,9,21,24,19,26,20,27,28,25,24,24,24,24,27,23,26,24,26,24,25,24,24,29,25,30,32,33,38,38,32,34,34,38,41,39,43,47,47,49,49,51,51,55,62,60,62,64,87,92,99,102,115,84,100,110,116,125,139,151,162,171,163,157,152,156,154,165,169,165,159,151,150,156,158,156,156,156,150,142,141,143,149,150,154,151,153,159,160,162,161,158,155,151,147,147,155,163,165,167,162,159,153,144,142,139,142,146,148,152,155,157,155,151,149,153,157,157,154,153,159,162,162,161,156,156,162,158,153,153,153,155,155,163,155,159,147,129,108,79,91,112,142,162,170,171,166,162,158,152,148,148,151,152,159,152,167,115,11,1,9,10,12,11,14,12,13,14,14,13,22,24,17,21,21,19,23,22,21,24,18,21,22,22,23,23,25,23,22,21,25,27,24,21,30,22,22,25,24,23,16,16,17,19,19,16,20,20,19,19,19,19,19,16,17,17,17,17,23,21,16,20,20,21,19,17,22,21,19,19,19,16,24,23,17,19,18,23,19,22,21,19,20,19,24,19,20,21,21,25,19,24,19,20,28,21,24,26,20,24,24,23,25,22,21,22,24,23,22,26,23,26,29,20,27,25,27,33,27,27,32,29,29,29,29,26,31,35,37,33,29,34,33,38,40,39,41,43,37,37,42,47,54,55,53,57,57,66,71,80,81,79,93,103,116,124,131,129,132,129,124,118,122,124,120,121,118,121,121,123,124,125,125,127,122,123,126,131,126,127,131,129,129,128,128,125,127,124,122,127,133,136,134,135,133,137,141,131,130,132,127,124,122,122,130,134,134,129,125,118,114,114,106,116,127,132,124,111,118,127,131,129,128,133,130,134,136,140,142,132,133,128,133,136,130,133,129,130,130,126,122,123,127,131,133,132,129,132,132,131,132,128,122,120,122,122,126,130,132,136,127,126,137,141,139,141,141,138,135,131,135,132,130,128,128,127,125,113,118,121,122,125,117,117,114,115,114,126,128,125,128,132,127,130,134,120,116,118,111,107,106,108,118,117,105,98,95,87,78,87,90,88,94,90,87,92,98,83,75,65,59,50,50,59,54,64,64,57,57,60,55,60,51,39,61,68,83,79,77,92,84,72,68,68,64,61,56,57,59,52,60,58,55,59,56,52,51,56,58,54,54,54,52,56,57,49,56,55,51,51,54,56,51,59,56,49,54,52,48,49,52,51,56,45,50,50,50,48,48,51,44,44,46,46,45,46,46,48,43,49,45,41,48,45,45,44,45,42,45,42,39,44,44,46,42,42,39,42,44,39,44,41,39,40,38,39,42,41,41,41,41,41,42,39,45,42,41,46,41,41,41,39,42,45,43,45,44,40,36,37,46,42,39,37,34,37,41,37,36,39,40,39,38,38,40,42,39,40,38,37,37,39,41,36,39,37,40,34,36,39,33,39,34,37,37,34,37,37,37,35,38,35,37,40,38,43,34,40,43,40,41,36,38,36,42,41,35,39,36,42,44,40,43,46,43,51,57,57,56,52,47,50,56,55,53,49,60,54,61,81,83,99,113,125,118,110,117,141,200,223,205,190,180,187,203,196,184,147,105,74,62,74,84,78,70,42,45,77,39,28,29,10,23,22,23,25,17,24,21,26,25,22,25,27,27,17,29,28,27,24,22,31,27,24,26,30,35,32,39,39,34,37,38,39,44,45,46,53,47,48,52,54,57,57,60,65,67,64,69,85,83,86,104,98,110,122,110,102,115,137,159,170,165,159,157,158,160,169,170,163,155,151,153,155,156,149,147,153,152,140,148,152,152,155,153,154,154,159,158,153,153,153,154,159,157,153,162,164,165,167,165,161,159,160,157,154,153,159,155,156,155,155,157,146,146,151,158,154,148,150,150,154,152,157,161,159,161,160,154,152,149,151,154,153,156,158,159,155,136,111,91,87,110,139,162,173,178,174,168,166,158,151,151,146,148,148,162,112,13,1,10,10,12,11,14,12,13,14,14,13,22,20,17,18,19,23,22,22,21,21,22,24,23,23,24,21,24,27,22,23,26,24,23,23,28,20,23,24,18,17,20,18,17,19,16,17,16,21,20,17,19,16,17,16,17,21,19,18,18,19,19,17,19,19,18,18,21,19,18,18,18,19,18,17,20,23,20,22,19,18,20,19,21,17,21,21,24,21,22,20,19,24,22,24,24,22,23,21,24,27,22,22,24,20,22,24,22,30,24,24,27,24,26,24,25,24,25,30,27,24,29,24,29,31,30,29,28,31,33,27,27,27,34,37,36,40,41,40,44,47,51,55,61,57,61,61,59,55,51,54,57,67,100,111,113,108,106,99,95,98,106,115,126,129,129,127,127,124,135,137,133,127,128,128,124,128,130,128,128,130,127,121,125,128,126,123,125,128,125,130,135,139,129,123,126,122,126,125,125,129,128,131,127,132,136,127,125,118,127,120,113,116,115,111,124,118,115,124,118,117,120,118,130,123,124,126,125,125,133,137,136,132,134,135,132,137,138,125,121,126,119,122,114,128,126,125,129,129,125,116,117,112,111,113,117,124,126,128,127,125,128,129,134,140,136,136,128,122,115,110,105,105,117,107,114,120,114,110,119,116,118,121,119,124,111,123,115,114,114,116,122,127,128,126,118,111,119,123,112,119,123,110,122,115,103,122,134,120,106,102,88,92,96,103,93,84,83,65,68,76,83,61,55,70,60,66,67,40,51,61,39,64,59,51,41,65,45,78,72,58,52,47,55,56,64,49,51,61,53,57,57,53,54,61,60,59,64,59,60,55,57,56,53,55,53,55,52,55,55,52,55,53,53,49,48,54,51,55,53,55,51,49,59,53,55,51,49,53,53,44,45,53,51,53,48,43,48,48,46,51,52,42,44,48,46,41,47,45,41,45,44,46,43,41,43,42,44,39,47,42,40,47,38,42,39,41,45,44,43,46,39,40,44,40,44,40,46,42,38,42,42,41,38,41,42,39,45,42,36,39,39,41,45,36,40,43,35,40,39,36,40,39,33,40,40,39,38,37,40,37,38,37,34,42,34,37,36,34,38,35,36,39,34,37,36,37,39,32,36,39,35,33,39,37,38,39,38,42,35,39,39,37,41,34,35,41,33,40,36,37,40,41,43,41,48,57,57,46,48,48,51,54,50,55,61,67,67,55,62,80,84,102,107,98,97,104,104,120,170,192,187,179,171,171,172,161,148,130,121,121,127,140,137,146,129,103,109,108,43,18,27,9,22,26,17,23,23,23,20,23,25,20,26,23,24,26,22,29,24,25,27,23,23,30,32,29,32,35,39,35,36,41,40,43,47,47,49,48,51,56,58,63,62,66,63,73,66,65,80,94,101,102,119,114,125,147,134,121,134,145,154,141,136,130,135,141,140,147,150,149,147,143,147,152,153,152,154,155,160,146,163,165,167,162,162,160,161,162,160,152,153,159,164,163,158,157,162,162,162,162,154,155,155,155,159,159,158,163,165,160,159,160,160,153,151,155,156,158,155,155,153,150,150,151,151,154,155,155,156,155,147,145,145,152,151,157,151,155,147,135,118,95,90,101,135,157,170,175,174,171,162,150,148,145,146,147,161,112,13,1,9,10,14,11,13,12,13,14,13,12,22,22,16,24,22,19,22,18,24,24,22,21,23,22,24,24,19,24,23,21,26,24,23,24,23,24,22,20,22,19,17,17,18,17,17,17,17,16,19,18,18,17,17,21,18,20,19,18,19,17,17,18,17,18,19,18,18,19,21,17,22,21,20,21,17,20,19,21,17,19,22,19,22,19,17,20,21,27,25,27,27,31,34,28,26,27,31,33,38,35,27,30,32,25,30,29,25,26,25,31,30,35,31,29,36,27,27,29,25,28,27,31,33,29,34,33,29,40,43,41,39,36,41,38,43,48,45,48,53,56,57,57,69,72,69,78,71,70,56,53,55,57,77,76,75,73,73,76,85,100,112,119,126,128,128,127,128,126,129,127,127,127,123,132,130,128,125,128,126,124,124,119,125,125,127,126,125,126,123,131,131,124,125,124,122,123,123,124,128,130,131,135,132,130,128,122,118,118,127,128,130,135,134,136,127,125,124,129,133,128,134,139,138,136,134,130,117,116,122,121,134,132,131,130,129,133,128,127,132,133,129,127,127,126,123,126,129,126,120,118,124,126,125,130,140,145,141,140,127,125,126,129,133,130,132,135,131,123,122,117,122,118,110,113,113,115,126,124,123,122,119,114,113,114,109,112,107,108,111,118,114,117,110,101,106,104,118,128,126,129,139,136,126,113,101,111,130,125,119,120,103,100,116,114,101,105,113,116,128,127,114,98,96,92,76,78,82,83,81,77,81,84,81,77,86,73,78,84,76,67,61,65,67,74,69,60,57,55,53,48,55,47,59,61,55,65,66,68,65,50,53,52,47,49,51,49,51,54,51,53,48,54,50,52,50,45,50,48,56,49,55,53,49,54,51,51,49,52,51,48,51,49,48,47,52,47,47,50,45,46,48,45,44,41,48,46,43,46,46,45,45,46,39,45,48,42,40,49,43,40,51,44,45,40,42,43,40,46,44,44,42,44,46,44,42,40,45,42,38,43,44,39,44,41,39,44,42,42,43,41,41,43,46,42,39,42,38,37,41,37,37,41,40,38,38,39,39,39,40,42,40,36,37,36,38,39,37,39,38,34,36,33,36,42,39,39,38,41,39,38,36,35,44,41,40,39,39,42,34,39,41,42,40,41,37,37,36,36,41,40,44,44,47,49,55,63,59,48,54,58,60,51,46,64,78,79,67,56,61,74,82,103,98,71,76,93,87,92,124,140,146,143,133,124,116,109,103,107,134,165,187,196,177,168,181,184,183,156,64,30,25,13,28,22,27,25,23,31,22,27,26,25,27,26,26,23,28,27,29,29,29,32,29,30,28,31,39,35,39,44,40,41,44,48,53,53,55,57,59,60,62,67,71,68,61,63,66,67,93,123,120,121,133,125,132,149,151,142,154,161,150,139,127,117,121,128,122,133,139,141,142,137,142,149,154,159,161,160,156,153,159,159,159,160,158,159,158,165,167,164,163,163,163,162,165,165,162,162,162,160,162,158,156,156,154,152,151,160,159,159,156,157,163,159,158,160,162,165,163,162,164,160,152,147,146,146,150,156,155,154,150,147,145,144,152,154,151,154,152,153,149,118,92,83,95,122,146,167,173,165,163,152,150,144,147,146,155,111,14,1,9,10,14,11,13,12,13,14,14,14,19,21,23,21,25,19,23,20,21,27,18,23,24,17,24,24,23,23,24,25,21,28,23,18,27,24,21,25,21,19,17,21,19,18,20,18,18,17,19,18,22,20,17,17,22,19,16,19,18,18,21,17,16,18,19,22,19,17,19,18,19,22,19,17,23,21,20,22,17,20,20,22,22,19,21,21,23,27,26,29,31,32,35,28,30,39,43,43,38,41,46,45,44,37,31,30,35,30,31,41,35,38,38,38,34,35,36,28,33,32,34,37,36,39,39,42,36,49,53,47,45,46,45,48,54,59,61,60,54,56,59,70,86,96,110,100,87,99,78,84,80,81,86,99,94,97,89,91,100,109,118,119,122,120,120,117,115,112,107,120,122,121,125,123,127,125,123,127,125,128,126,125,120,121,123,121,128,130,136,135,131,132,131,132,132,131,131,129,131,134,134,134,130,133,130,124,125,128,138,136,136,141,141,142,145,144,136,138,140,138,142,143,148,152,145,129,116,115,125,128,127,131,136,133,124,125,136,137,134,132,135,126,126,127,125,131,133,131,129,125,130,130,138,137,134,134,129,127,125,125,128,133,129,141,134,127,122,127,127,133,137,134,138,139,141,141,138,136,135,136,136,134,120,121,122,123,131,128,129,120,112,107,99,91,93,106,109,107,102,102,111,110,106,95,92,98,94,102,119,119,112,118,122,100,86,96,107,107,113,122,122,94,102,98,90,89,88,93,94,84,81,79,84,87,108,78,74,74,90,77,80,80,69,66,73,57,55,53,55,58,55,63,57,49,49,50,54,53,54,49,52,54,51,54,50,47,47,47,50,49,55,50,49,53,51,50,49,52,50,54,52,55,52,47,49,47,47,45,48,47,47,47,44,50,47,44,46,45,46,46,42,44,43,44,45,46,48,49,43,44,49,47,45,43,45,42,44,43,45,44,41,44,40,44,43,44,40,41,46,41,42,44,47,43,43,41,40,39,41,42,40,42,44,42,42,41,37,39,41,45,42,42,43,42,38,39,40,39,42,39,40,38,37,38,39,42,36,43,39,36,43,37,41,36,35,42,35,42,37,36,37,37,36,32,38,38,36,39,43,38,38,42,38,36,41,36,34,36,38,41,39,38,39,39,35,37,37,36,42,40,44,45,41,48,51,50,51,48,52,53,54,46,48,56,54,59,55,43,52,50,55,73,61,54,44,69,62,57,75,98,106,90,78,60,97,107,102,92,104,122,142,149,129,125,138,167,173,160,92,41,24,17,30,24,27,29,23,29,25,30,27,29,29,22,32,29,26,29,22,30,33,27,29,29,36,35,34,40,41,41,44,41,44,47,49,50,56,57,56,60,62,60,66,63,59,55,55,70,93,114,120,123,124,110,111,139,145,129,144,141,149,141,139,143,140,141,149,153,152,151,148,146,146,155,166,161,160,163,152,151,162,161,158,156,151,150,149,153,159,155,152,153,154,154,151,154,151,157,156,158,164,162,159,152,148,150,149,151,153,150,151,156,157,157,157,163,160,160,161,159,156,158,159,152,147,145,147,149,146,143,146,142,141,145,147,149,145,152,154,151,150,141,122,97,82,89,115,139,157,161,160,153,147,144,144,139,152,109,14,1,10,10,12,11,14,12,12,14,13,13,20,22,19,22,23,22,23,21,24,22,21,18,26,27,22,25,24,24,22,22,22,25,22,24,24,23,24,24,22,18,20,19,18,17,17,19,19,19,20,19,16,19,20,19,15,21,22,17,19,17,18,17,19,18,19,19,16,19,21,16,20,21,18,20,19,18,21,27,17,20,24,19,21,17,24,25,23,17,24,30,27,35,31,32,33,38,40,34,37,38,38,42,43,45,41,33,41,37,35,36,32,36,35,34,36,32,30,34,32,33,36,39,37,39,39,36,40,35,38,43,44,45,44,45,49,49,52,51,51,54,57,55,71,84,95,108,89,93,93,103,113,117,132,125,130,130,125,120,122,118,118,121,118,116,115,110,107,104,106,120,127,126,116,114,110,110,108,118,124,121,123,122,127,130,130,131,133,135,133,139,135,129,128,128,129,127,125,125,130,128,127,128,130,127,127,125,128,137,137,135,124,118,123,131,136,141,139,136,134,133,139,142,143,141,132,129,136,137,142,134,128,127,133,132,133,137,133,128,129,128,127,126,128,133,133,141,141,140,141,139,141,144,140,129,122,121,118,125,129,140,142,137,137,141,141,140,139,135,130,124,125,127,125,135,143,145,147,137,145,144,139,127,124,133,128,129,131,133,112,95,100,101,95,101,111,105,93,90,86,81,86,83,87,80,86,98,92,90,101,118,118,128,130,100,75,64,71,72,75,77,75,83,96,90,93,89,83,86,89,84,72,63,75,81,75,70,60,59,60,50,52,53,50,57,55,53,48,50,54,58,70,62,61,57,53,61,49,63,69,64,68,61,63,64,59,58,63,63,55,50,60,62,59,63,54,54,51,53,55,53,54,49,52,49,47,52,52,45,46,49,48,51,50,47,49,51,49,50,48,50,40,46,53,47,53,51,47,49,47,48,47,47,50,45,44,49,43,45,45,45,48,43,42,47,47,43,44,44,46,44,42,40,42,44,46,40,41,41,42,41,39,40,43,42,38,42,42,41,37,41,42,39,41,36,39,44,43,38,38,40,37,39,36,40,39,40,37,44,44,37,39,39,43,37,35,39,40,39,38,40,36,34,42,34,38,39,38,40,38,39,38,44,43,36,40,35,33,39,36,44,32,35,41,37,43,41,40,39,42,44,48,46,41,47,49,48,53,50,47,49,52,54,54,52,42,45,46,44,51,43,46,50,53,53,51,56,49,36,46,73,81,56,45,66,97,115,108,83,77,77,95,101,85,82,83,115,144,149,108,47,22,19,27,28,29,28,25,30,28,30,32,28,29,28,29,29,27,27,29,29,32,29,36,32,32,37,38,40,34,37,44,44,44,49,48,47,57,52,50,57,59,61,62,67,57,56,59,72,90,102,116,122,116,101,117,132,127,113,116,116,114,132,162,173,164,161,165,173,162,159,153,148,151,160,164,159,158,158,158,162,166,164,160,157,151,152,153,155,157,154,147,147,147,142,144,143,142,147,148,152,160,162,157,153,153,148,146,147,148,149,146,148,151,151,153,153,153,155,153,151,152,153,156,155,152,150,149,150,139,141,143,145,147,146,142,138,142,144,146,144,147,149,137,125,101,89,90,105,132,148,162,158,160,152,147,143,150,108,14,1,10,10,12,11,14,13,13,14,13,13,21,19,22,22,19,19,23,18,20,23,23,23,21,22,24,24,21,21,23,22,25,23,28,24,21,25,27,24,19,19,16,16,19,18,18,17,18,19,19,18,19,20,21,19,20,20,16,17,17,19,19,19,21,18,18,19,17,20,21,20,21,19,21,19,19,22,20,20,25,18,19,23,20,19,23,21,24,24,21,25,27,29,28,24,27,25,23,26,31,24,25,30,29,33,31,32,30,33,33,29,31,35,34,35,28,33,38,32,36,36,36,42,37,42,42,41,41,37,37,41,42,42,43,47,45,47,51,48,48,49,50,56,67,69,75,79,78,85,73,77,101,103,113,106,115,122,126,126,135,132,124,123,122,124,127,131,123,134,135,137,131,128,128,119,117,118,120,125,118,120,116,121,130,131,133,132,130,130,130,130,128,128,129,126,123,125,129,128,131,130,123,120,123,130,127,125,128,131,130,122,118,119,122,132,133,123,126,127,121,117,129,129,129,130,132,140,136,133,131,127,124,129,134,137,133,132,132,131,130,129,127,127,135,141,139,140,135,141,143,144,159,155,153,145,135,143,146,146,149,147,142,144,143,146,138,146,140,139,139,141,139,134,131,130,132,134,139,136,137,125,115,106,103,116,126,120,121,120,111,113,113,119,119,122,112,96,94,111,122,123,113,113,107,99,103,116,101,101,109,111,114,119,116,101,106,111,89,103,95,99,88,88,90,86,90,85,62,76,71,79,48,70,74,74,45,42,64,65,55,47,55,64,68,63,70,70,68,72,78,83,77,83,78,75,80,76,68,100,87,81,80,69,83,77,81,92,90,84,73,62,65,75,74,69,60,50,56,57,51,51,55,51,47,49,52,50,51,51,51,49,55,51,46,53,48,54,53,55,55,51,53,50,54,54,51,52,53,53,51,46,42,51,48,53,50,36,46,44,46,44,43,46,45,47,41,46,43,42,44,41,44,44,45,42,42,39,40,39,40,43,41,39,33,44,42,42,43,38,38,38,37,40,41,39,40,39,41,38,37,41,37,40,43,34,40,39,35,42,36,34,46,37,36,36,33,44,38,38,39,40,38,37,41,33,38,39,39,39,37,40,42,38,38,40,35,35,42,38,35,36,37,35,37,39,37,38,41,46,41,41,44,43,50,48,50,46,50,48,52,69,68,69,63,52,45,46,53,54,52,48,54,55,52,60,66,67,65,46,60,64,88,81,71,79,87,98,104,95,103,93,97,94,92,82,66,83,102,115,94,44,28,26,27,28,33,31,29,30,29,34,30,30,30,24,28,32,33,31,26,35,29,29,33,33,37,30,37,36,33,42,37,42,44,37,46,44,48,47,50,55,55,57,57,66,54,53,55,64,78,96,109,123,121,117,128,134,131,124,118,116,123,139,160,164,163,171,172,161,148,148,143,146,153,154,155,149,158,159,158,160,162,161,160,163,157,150,152,151,154,154,150,150,152,149,154,155,152,156,155,152,159,159,163,163,159,154,145,143,154,152,145,146,143,142,148,151,153,155,152,152,148,144,149,149,150,151,150,152,141,141,148,147,150,148,144,143,142,144,144,143,136,137,141,143,131,112,95,83,102,125,144,155,161,159,154,146,154,108,14,1,9,10,13,11,13,12,13,14,13,14,21,21,17,23,22,20,21,21,19,20,18,21,25,24,27,20,24,21,21,29,23,24,24,26,22,23,26,21,20,17,17,18,17,19,21,17,20,17,17,20,18,19,19,20,19,19,19,18,21,20,17,19,19,19,20,19,18,21,24,20,17,22,23,20,22,18,22,22,21,21,17,24,22,20,29,21,22,22,23,24,23,23,21,25,21,24,27,27,27,22,23,27,32,24,26,32,30,29,27,31,30,30,34,34,34,29,29,32,35,37,39,42,40,43,45,43,46,46,44,43,45,44,44,46,46,46,48,45,46,46,46,53,66,59,63,69,74,91,87,95,93,91,93,87,95,103,119,134,141,141,137,137,128,132,141,139,141,141,137,137,132,130,130,127,134,144,144,142,138,136,139,134,132,127,127,133,132,133,128,128,124,124,130,134,140,139,143,142,146,144,134,132,134,137,136,128,126,122,115,116,116,123,132,136,135,131,135,130,131,131,129,125,121,129,134,135,130,119,127,135,141,143,144,134,135,141,145,141,130,133,136,137,141,149,141,137,134,141,148,140,142,144,147,146,146,151,144,146,142,134,134,130,131,132,131,131,132,136,141,144,144,141,134,128,127,125,132,127,133,130,125,122,123,127,120,125,119,125,133,143,144,138,141,144,136,126,124,139,141,136,143,148,141,122,123,134,129,114,98,100,101,100,94,90,100,98,105,108,122,124,103,97,93,93,98,93,95,99,94,83,91,89,82,77,73,81,79,79,79,72,73,76,86,95,101,94,89,90,92,81,77,76,81,64,64,83,76,77,57,61,72,71,82,72,78,89,90,88,86,67,64,67,68,69,61,57,60,58,47,54,53,49,51,49,55,46,53,54,50,54,49,57,50,54,53,48,53,51,55,52,52,50,54,50,53,48,50,53,46,48,46,43,46,45,44,42,42,46,48,49,46,45,42,46,43,46,43,39,46,41,46,46,42,42,41,43,43,40,42,40,42,37,41,41,39,39,39,41,37,42,37,41,45,39,46,41,37,40,41,39,36,40,39,41,36,37,41,37,41,38,41,38,37,42,33,37,41,41,39,33,39,38,36,35,41,39,35,41,36,41,44,37,40,41,39,39,39,41,36,36,38,41,39,37,40,41,39,42,41,40,39,44,51,44,46,47,47,45,59,62,56,65,59,55,48,49,61,65,54,59,60,53,66,71,81,95,81,73,73,71,95,102,93,84,69,78,96,104,106,92,93,85,87,86,59,60,65,83,80,45,35,27,26,32,32,29,30,34,32,30,32,30,33,29,32,34,30,32,31,29,31,31,31,32,32,35,37,36,36,39,41,42,41,43,44,41,47,47,48,51,50,57,57,61,56,53,53,62,70,68,99,116,120,127,132,125,130,139,129,125,138,149,150,149,157,164,153,146,137,145,152,155,154,152,151,147,152,158,160,156,155,154,158,165,161,155,153,158,159,164,162,159,157,153,162,169,163,158,155,154,152,155,160,163,168,168,162,152,155,156,150,151,148,150,153,153,157,160,162,162,160,157,155,150,149,145,146,151,148,151,149,150,150,149,151,150,146,149,145,139,139,137,139,146,146,136,114,88,85,94,122,142,161,162,162,153,158,112,14,1,9,10,13,11,13,12,13,14,14,14,18,21,20,26,24,22,21,23,24,17,22,22,22,22,21,24,25,22,24,24,24,27,24,28,24,24,24,18,22,19,19,18,21,22,17,19,17,19,17,19,18,19,22,19,18,20,19,22,21,18,21,19,20,19,20,18,20,21,19,21,21,19,23,23,19,23,19,26,22,21,25,18,21,22,23,21,24,23,23,21,21,22,22,27,24,26,30,25,29,27,33,30,29,32,31,35,33,39,34,28,39,38,33,35,36,39,39,44,45,39,54,46,50,54,47,50,52,52,56,54,50,57,49,56,53,45,55,51,55,53,67,70,77,76,88,84,82,89,110,111,107,92,92,90,91,86,99,110,117,120,123,125,122,127,125,127,128,127,130,131,125,123,133,135,143,147,145,139,128,137,136,130,133,125,127,130,124,122,123,123,118,127,120,133,136,147,151,153,153,152,138,149,134,139,134,133,130,129,123,127,128,131,140,141,137,139,131,136,134,131,136,128,131,131,131,130,125,132,139,139,139,142,149,145,141,147,145,137,141,137,132,129,132,125,129,124,128,130,122,124,121,122,130,132,136,125,128,127,122,120,106,107,109,119,124,125,119,115,127,135,126,144,135,118,115,122,132,113,127,134,131,132,128,121,122,119,120,123,131,151,131,122,131,130,131,126,120,124,126,136,139,133,128,126,126,120,113,99,95,94,91,89,80,79,61,87,90,86,85,102,83,85,82,88,114,78,106,98,104,108,102,94,68,65,78,72,84,67,73,69,66,66,70,80,89,75,70,72,73,50,51,60,53,54,52,67,53,49,55,57,58,60,66,62,61,65,72,70,70,57,59,64,65,70,62,62,58,64,71,61,57,49,50,53,52,56,48,49,53,45,50,49,50,50,45,49,44,48,50,48,50,49,43,51,47,42,47,45,48,45,43,46,42,46,44,44,49,43,43,47,49,44,44,44,42,46,41,47,44,39,46,39,44,44,39,45,44,41,40,42,38,46,38,39,42,36,41,38,42,42,38,42,38,40,40,38,44,37,41,42,33,39,39,36,41,39,37,40,35,41,38,39,41,37,40,40,42,34,41,44,37,40,40,42,41,41,39,39,43,40,44,44,39,41,38,44,41,39,41,38,43,39,42,40,40,40,37,42,40,39,41,40,42,46,43,44,46,45,44,45,47,52,49,47,50,55,63,60,54,57,61,59,67,67,74,88,71,63,66,69,80,80,81,85,68,65,73,78,84,55,54,60,61,63,54,50,55,66,65,48,37,24,32,34,29,29,35,29,31,36,30,32,31,29,29,28,30,34,33,30,30,34,32,31,35,29,38,33,30,42,38,41,42,39,44,45,46,48,48,50,55,54,59,60,64,58,59,73,79,68,77,80,96,107,105,104,111,132,146,122,132,141,133,134,133,148,145,147,147,153,157,163,154,149,148,145,147,151,153,152,153,148,154,159,159,159,156,148,152,155,153,157,154,156,159,162,162,155,151,148,151,148,152,153,154,155,158,154,152,150,147,151,151,150,153,152,158,155,160,157,158,159,159,158,155,152,147,151,147,149,152,145,148,150,149,147,147,149,147,148,141,144,142,143,145,139,129,106,86,75,90,115,141,152,160,160,163,111,14,1,10,10,12,11,14,12,13,14,14,13,21,22,20,19,19,19,26,23,19,23,21,24,19,27,26,19,24,27,21,19,24,29,25,23,22,23,24,21,23,19,18,18,17,20,20,17,19,18,17,21,17,23,21,21,20,16,21,19,17,20,22,18,18,19,21,20,21,19,23,23,22,21,21,23,21,21,19,19,24,21,23,22,19,25,27,18,24,22,21,24,23,22,25,30,28,34,29,28,37,30,22,27,30,31,38,27,34,33,29,34,37,37,38,42,43,46,45,47,50,49,52,53,57,56,53,55,51,56,56,51,48,46,51,53,57,54,61,67,79,88,94,92,92,97,97,91,81,77,80,93,89,89,84,80,81,79,87,92,100,105,114,132,133,131,132,125,130,124,125,127,127,126,130,134,130,129,120,120,117,114,122,122,125,122,121,126,122,131,128,134,135,131,137,134,133,127,131,133,131,134,140,141,132,127,127,134,136,136,136,137,141,139,143,139,134,131,131,128,121,127,132,131,128,127,122,125,132,131,137,135,142,145,147,149,139,130,130,135,134,131,123,120,126,138,137,131,125,123,119,115,117,128,137,131,127,122,124,133,127,118,117,118,126,129,122,118,110,109,116,130,145,150,139,125,125,113,105,97,105,112,110,108,110,106,100,117,125,136,137,129,115,105,107,106,124,124,117,124,122,131,131,124,114,93,87,92,96,98,100,98,96,94,81,84,92,88,83,68,76,80,71,75,69,78,71,54,57,60,73,78,96,83,55,55,49,62,57,53,65,64,66,70,69,75,72,64,67,64,61,60,61,57,58,58,57,58,54,62,57,55,62,60,60,57,53,56,57,55,55,50,56,54,61,65,61,60,61,64,61,62,55,59,56,55,53,54,53,47,56,50,49,49,48,54,51,46,46,45,44,46,52,42,46,49,47,45,46,46,48,50,46,51,50,45,47,48,50,44,44,49,48,45,48,47,44,44,42,46,42,41,46,43,39,44,46,45,45,47,40,38,46,43,39,42,39,42,45,39,43,44,39,41,41,38,43,39,40,46,37,42,41,43,41,42,45,39,44,41,45,43,38,45,43,43,45,46,48,42,44,46,48,47,46,43,45,45,43,47,43,40,38,42,42,40,39,43,39,39,38,37,39,39,42,40,36,39,39,41,45,42,44,43,40,43,46,44,45,49,50,48,55,60,48,47,54,61,61,55,54,57,57,53,57,62,62,65,58,53,62,67,66,57,73,93,80,64,58,60,71,63,59,45,48,61,53,57,51,51,53,50,41,32,34,32,34,35,35,35,31,34,33,32,32,29,37,35,33,34,32,35,29,34,36,32,32,33,39,33,37,41,36,44,44,43,43,42,47,49,48,52,53,57,62,65,67,64,63,80,86,67,61,67,78,90,98,87,96,125,130,118,127,139,127,127,139,146,145,154,162,164,171,170,153,146,143,143,145,145,145,145,152,148,149,148,145,146,147,150,149,153,154,150,152,151,157,165,165,160,159,159,158,154,149,146,140,141,151,150,151,148,148,154,155,152,150,155,157,157,153,150,153,150,155,160,163,154,150,153,153,158,156,155,156,153,152,148,139,141,142,139,144,147,146,147,143,144,141,132,115,91,81,79,103,126,149,152,167,117,12,2,10,10,13,11,14,12,13,14,14,13,22,19,21,23,20,22,20,21,22,20,25,23,22,25,25,24,21,25,24,23,21,24,25,22,25,26,25,19,19,19,21,20,18,18,17,21,19,19,18,20,20,17,17,21,20,17,19,22,19,19,23,19,17,21,19,18,21,22,17,21,21,18,21,21,20,20,21,24,24,19,23,26,24,20,21,21,20,23,21,26,29,24,24,27,22,29,31,27,29,29,32,26,25,26,30,27,25,34,29,36,31,29,34,41,39,42,48,48,50,54,55,50,61,55,56,53,46,49,44,49,57,56,49,48,49,55,73,88,85,99,86,87,83,85,84,82,76,75,82,83,79,95,76,97,93,100,104,111,116,120,123,128,131,138,140,139,134,132,129,129,126,123,127,126,129,125,125,122,121,129,134,134,139,138,139,137,134,131,137,135,141,141,141,140,125,129,128,118,118,129,117,137,122,112,108,120,129,122,129,135,137,132,131,129,127,118,121,135,131,137,132,141,140,137,140,141,134,133,141,142,135,127,120,134,129,121,136,138,139,129,123,132,130,133,132,147,132,137,134,137,137,139,135,132,130,131,139,142,145,137,130,128,122,122,123,124,122,119,120,125,132,137,132,133,128,121,117,116,125,124,127,127,114,122,136,133,133,142,132,122,114,109,123,129,127,125,131,117,123,122,129,133,127,108,116,122,126,128,129,120,136,137,132,120,122,117,112,96,96,94,91,97,92,77,80,68,69,70,74,94,95,93,84,72,67,80,77,76,93,102,101,95,91,81,91,83,84,86,65,79,56,59,66,69,76,77,69,66,59,55,57,60,55,58,60,54,53,51,54,54,51,51,55,63,58,59,53,52,54,57,61,59,60,58,55,60,60,55,57,53,51,51,53,52,52,53,49,48,50,47,48,47,46,49,48,47,47,47,42,50,49,47,53,47,47,47,46,47,46,49,45,46,48,48,43,45,43,43,45,44,47,41,43,45,45,45,45,45,40,39,43,41,43,43,43,42,40,40,41,42,39,44,43,36,43,49,43,40,44,42,42,44,40,44,40,40,40,40,43,41,42,46,43,42,46,44,46,45,43,42,43,46,47,42,47,44,41,39,39,41,42,41,40,41,41,41,43,37,37,38,37,39,39,41,41,36,39,41,36,40,41,37,41,44,47,48,53,48,52,54,53,57,46,48,53,53,58,56,50,51,53,51,52,58,59,59,52,49,64,70,66,68,84,90,87,85,75,91,86,76,84,56,48,63,57,57,47,51,48,39,43,36,32,34,29,34,36,31,36,33,35,36,35,35,31,29,39,39,35,30,33,34,39,36,36,38,37,37,37,42,39,42,48,41,43,43,45,51,49,54,50,53,57,56,61,60,60,63,63,62,59,57,65,86,101,95,94,127,144,130,140,152,138,137,145,149,155,160,162,168,174,169,161,154,149,144,158,158,162,158,160,160,158,146,139,138,152,157,159,158,152,139,137,144,157,162,168,165,162,164,159,157,158,157,148,144,140,145,150,151,149,155,155,148,147,150,152,149,143,141,139,145,148,152,152,154,153,152,157,157,154,154,158,153,150,144,141,146,141,139,139,140,142,142,146,147,152,154,145,126,96,79,83,93,125,143,162,114,14,2,10,11,13,11,13,12,13,14,14,14,19,19,17,21,20,19,24,19,21,19,22,24,27,31,24,22,25,21,27,19,24,28,25,26,25,25,19,20,20,19,20,16,17,18,16,24,18,19,23,17,23,18,17,22,22,18,16,22,19,20,23,17,21,20,21,19,20,22,17,19,19,19,25,24,18,21,19,24,24,17,27,21,19,27,22,23,24,29,27,28,35,24,25,28,24,26,27,27,27,32,24,27,32,26,29,29,30,31,32,31,27,33,30,35,35,41,46,38,50,45,44,51,49,53,56,56,59,63,64,70,76,79,81,78,77,81,87,84,87,72,74,82,77,79,79,84,80,80,86,91,103,114,121,121,123,125,122,126,128,125,120,115,115,111,118,119,125,125,124,125,126,131,126,131,137,139,151,152,145,142,146,146,153,155,145,141,130,131,125,123,124,122,130,129,129,127,128,133,132,128,135,140,141,133,120,117,126,131,131,135,135,128,127,131,132,133,137,145,144,149,145,139,146,147,151,148,151,149,145,139,134,131,122,129,129,132,141,142,137,136,130,125,135,126,122,107,107,126,134,133,119,118,123,127,141,142,146,146,144,142,138,136,125,131,136,143,137,123,128,122,121,123,125,129,130,129,127,139,144,135,137,132,134,139,145,148,136,131,120,120,129,148,146,130,131,128,124,116,114,131,140,138,141,142,150,153,154,150,132,136,146,155,148,146,141,135,133,120,117,108,112,119,113,103,96,89,101,84,71,80,90,100,87,84,80,87,93,97,101,99,88,80,78,69,72,80,87,74,65,60,55,59,61,71,71,61,71,68,63,71,66,59,52,61,70,73,63,59,57,58,53,53,59,54,50,50,56,48,51,52,56,59,48,57,56,51,51,55,50,48,54,51,53,45,53,54,52,51,49,51,51,51,49,49,45,51,52,45,46,48,45,48,44,45,47,43,43,48,46,44,45,41,45,43,45,44,42,43,42,49,47,43,42,42,47,46,46,45,44,44,48,43,43,43,43,43,40,42,43,44,46,49,43,42,43,41,41,41,34,40,41,41,42,37,40,38,42,41,41,41,39,46,40,41,41,43,41,42,42,40,43,43,43,41,44,43,40,42,41,40,40,36,42,41,36,43,38,39,38,36,42,36,40,38,35,43,41,40,42,36,40,42,43,45,46,54,46,46,42,46,49,40,44,44,41,45,47,45,50,45,51,49,52,57,61,63,53,55,67,72,64,60,77,81,89,101,88,90,94,84,72,48,55,68,53,48,49,48,44,45,43,38,29,32,38,35,37,36,37,31,38,36,35,42,34,33,36,35,37,39,32,34,40,35,37,33,38,41,39,39,43,45,45,43,45,47,46,50,52,55,57,51,53,55,53,55,57,53,54,52,59,53,52,80,97,89,92,129,154,138,148,160,145,143,146,154,164,165,161,159,171,179,172,166,160,166,174,170,173,177,175,170,166,160,152,154,161,165,170,166,154,136,137,148,157,159,162,161,159,159,161,164,167,169,157,151,151,150,153,154,154,154,155,151,146,150,152,152,144,136,142,145,146,147,153,153,151,151,150,152,152,155,154,155,154,151,152,152,150,147,141,141,141,142,145,144,149,152,157,151,135,109,91,81,95,112,143,112,16,2,10,10,14,12,13,12,13,14,14,14,19,19,21,19,19,21,21,21,20,23,24,19,25,23,22,25,22,24,23,21,23,25,25,19,24,26,21,24,20,19,16,19,21,16,17,20,18,19,21,18,19,18,22,22,21,20,21,20,18,22,20,20,22,22,24,22,22,27,22,22,25,21,21,21,24,22,27,25,20,21,20,28,19,22,28,26,22,22,23,24,30,24,27,25,23,32,28,28,31,29,27,29,31,33,35,29,34,36,36,36,30,33,41,38,45,47,47,48,51,50,45,49,47,53,60,77,103,114,77,117,108,105,103,103,78,108,74,70,75,65,95,85,117,96,94,97,96,96,108,121,131,132,130,128,123,128,123,120,120,121,121,120,126,121,117,121,122,125,126,130,133,140,143,145,145,146,142,147,142,134,127,119,116,124,126,130,135,130,128,119,118,122,130,133,128,132,141,135,146,148,156,162,141,157,153,152,143,136,141,146,141,139,137,138,149,151,153,155,151,148,138,146,150,143,137,132,136,142,143,137,139,141,143,139,130,128,132,130,131,131,130,135,139,113,128,120,131,133,134,135,132,137,141,141,142,141,143,141,137,139,138,136,136,141,152,140,141,150,148,145,143,138,134,131,132,136,141,140,128,123,115,121,119,132,147,136,121,117,111,117,125,125,122,117,118,122,119,124,131,117,115,116,129,136,137,128,122,122,127,122,120,123,117,114,108,104,104,93,96,91,93,90,90,94,98,88,96,85,69,70,68,66,72,77,77,80,82,82,72,78,69,44,65,50,52,48,50,53,53,56,50,60,60,52,57,60,62,69,66,63,66,59,54,59,64,68,76,66,64,61,57,60,55,54,51,46,53,56,52,57,54,57,53,47,51,48,50,53,50,49,49,46,48,50,51,52,47,53,50,47,53,53,51,47,50,52,48,48,50,47,42,45,43,41,41,46,48,42,42,46,45,45,44,43,44,43,45,42,46,40,47,46,44,43,43,45,44,46,45,49,47,43,44,44,41,41,47,44,42,41,45,44,42,42,40,42,42,40,42,42,39,41,41,41,39,42,42,39,37,38,41,36,39,41,37,39,39,36,39,42,38,36,42,42,42,38,38,41,36,35,38,37,33,38,37,36,39,36,42,42,41,40,37,41,34,42,43,39,42,48,47,41,43,43,44,47,44,44,47,41,42,46,42,46,42,50,53,45,46,45,51,56,62,61,57,57,57,58,66,63,66,72,84,76,77,79,57,56,63,58,56,54,56,61,56,55,45,45,42,35,42,42,34,32,34,36,37,35,34,36,39,34,39,36,35,35,33,41,37,38,36,39,36,34,37,38,37,39,41,43,43,44,46,43,48,45,48,50,49,55,56,52,53,53,51,53,53,55,60,51,58,61,55,71,82,75,78,116,137,121,134,147,140,143,147,151,164,163,150,145,157,174,177,171,161,163,164,167,173,170,173,160,164,163,160,160,159,160,165,163,157,151,148,149,156,158,157,153,152,148,150,161,159,158,157,151,149,152,154,155,153,149,154,155,153,153,151,153,147,142,146,148,149,151,148,151,150,141,148,149,147,151,149,151,153,152,150,151,150,145,146,141,140,142,137,143,139,144,147,147,147,132,119,90,82,87,111,103,19,2,12,10,14,11,14,12,12,14,14,13,20,17,23,19,19,19,19,22,24,21,18,25,24,22,22,24,24,18,22,22,23,24,24,30,28,21,23,24,20,20,16,17,19,19,17,19,18,19,21,19,23,15,19,22,19,22,17,23,22,16,21,22,19,22,22,24,20,19,27,21,25,24,26,24,20,29,22,24,24,25,22,24,27,24,29,29,24,25,26,27,26,24,29,25,25,25,29,26,28,33,29,31,31,36,31,23,35,30,30,30,34,34,33,36,42,45,45,48,48,51,52,54,53,59,66,70,70,69,76,71,71,66,65,68,69,62,53,51,59,55,65,79,87,102,101,105,110,115,133,139,141,134,134,131,131,129,125,122,121,124,124,131,136,136,138,133,133,128,132,135,136,143,138,143,143,133,130,125,113,111,114,114,121,129,127,135,132,130,130,133,134,137,141,136,140,141,135,137,138,136,136,132,135,142,149,147,151,149,141,136,131,130,127,131,133,131,131,135,143,153,152,155,155,142,134,127,129,127,122,124,130,130,127,127,130,137,129,130,129,136,140,131,145,136,141,144,150,149,136,138,141,144,144,140,136,130,140,141,138,141,143,142,133,135,132,137,144,147,152,147,137,126,132,137,146,153,148,145,136,126,135,141,148,150,145,144,143,145,145,136,117,101,94,99,101,107,119,128,118,101,93,88,93,111,118,96,84,100,108,105,92,92,87,85,80,80,84,73,85,81,69,67,74,88,102,92,97,97,81,76,61,56,60,73,81,78,75,78,69,66,61,51,61,48,46,43,47,55,57,55,57,59,55,58,59,50,50,54,55,64,66,63,57,59,54,62,66,58,57,57,59,61,60,57,54,57,59,57,62,56,61,59,54,55,54,61,63,63,65,59,60,59,60,60,53,47,53,54,52,53,52,51,51,53,51,51,50,53,45,49,48,45,50,47,44,46,47,43,48,48,45,46,47,44,43,43,42,43,42,44,44,44,39,41,45,44,46,36,39,42,41,45,42,43,44,45,43,43,40,41,47,42,44,42,47,41,41,47,42,44,42,47,45,43,42,39,43,43,41,39,35,44,43,40,39,39,39,37,41,39,41,40,37,39,43,40,43,41,33,40,39,41,42,37,41,42,40,40,40,47,43,43,46,41,38,45,49,41,47,45,46,47,44,49,50,46,47,53,56,49,49,47,45,47,55,62,63,57,52,55,55,59,69,61,50,60,57,58,63,54,69,87,93,76,66,59,50,57,56,55,64,60,56,56,49,56,48,42,48,50,45,42,39,37,32,35,40,33,35,38,34,37,39,40,33,36,41,45,40,38,37,37,38,34,42,37,37,40,46,46,39,44,47,47,44,49,48,41,56,54,55,56,53,55,55,57,51,61,69,54,71,66,63,66,57,59,67,105,123,108,118,130,136,145,141,145,160,160,139,131,144,155,168,172,159,149,151,154,160,158,152,150,152,159,156,157,155,153,160,159,160,153,149,154,154,150,149,146,142,143,147,143,141,146,145,151,151,154,158,159,160,157,162,165,160,156,155,154,152,150,149,152,150,148,153,155,158,155,152,153,154,154,149,149,151,151,151,153,151,150,151,145,146,142,142,144,141,139,142,142,144,144,136,117,92,75,86,86,23,3,12,10,14,12,14,13,12,14,14,14,20,19,24,21,19,22,17,20,23,19,21,27,24,22,25,29,24,19,21,24,27,24,25,23,25,31,24,24,19,17,20,19,21,18,21,19,17,24,19,19,22,17,24,19,19,27,20,19,19,23,27,17,24,23,23,22,24,24,17,21,24,23,26,23,22,25,22,23,23,25,21,24,25,28,26,23,32,29,26,29,26,25,36,35,34,33,30,36,33,33,34,33,31,33,33,32,38,33,32,38,35,36,35,42,45,41,45,47,50,52,55,51,52,55,56,55,53,54,56,53,53,48,55,52,67,51,44,44,41,51,44,51,60,76,94,96,113,122,124,132,136,136,146,149,157,160,144,148,145,150,150,145,144,139,141,143,139,139,139,137,131,134,127,130,129,127,132,122,122,122,124,133,141,142,135,130,135,141,148,148,148,144,141,141,141,141,135,130,127,122,120,121,126,131,135,139,139,135,136,141,140,141,135,129,122,116,118,122,131,142,147,148,145,142,141,143,143,141,132,130,139,138,136,139,144,143,145,141,142,144,143,139,132,131,133,139,144,137,130,127,121,124,128,123,131,134,142,148,145,148,142,135,130,127,127,131,129,135,139,131,127,128,125,129,138,131,142,145,151,154,150,148,133,128,141,145,134,127,127,124,119,112,118,118,113,122,128,122,111,114,120,114,104,98,86,96,84,77,92,92,99,99,90,86,92,101,101,92,107,108,103,110,94,106,95,82,82,79,109,89,71,60,52,51,59,62,65,71,73,69,68,77,77,72,60,65,74,75,70,64,62,65,63,62,59,55,60,60,56,56,58,55,50,63,61,54,60,54,59,52,58,60,61,61,54,59,60,57,55,55,59,52,50,54,50,55,56,57,62,59,55,55,61,56,53,54,51,49,51,55,55,53,51,51,47,51,51,47,50,52,47,51,50,47,49,50,49,48,45,46,47,47,46,41,46,43,40,45,46,41,45,44,42,42,42,43,40,45,40,41,44,38,48,42,40,42,39,45,42,42,44,39,40,39,40,40,39,38,43,44,41,40,46,41,38,40,40,42,39,39,43,42,41,45,42,38,41,39,37,39,43,39,41,42,44,45,36,42,41,42,46,46,46,43,41,50,46,43,44,44,47,45,42,43,44,43,44,43,43,46,44,49,49,45,50,50,50,49,50,49,53,54,50,58,51,59,61,54,55,47,52,58,57,50,50,49,56,55,50,51,59,70,76,66,59,50,56,53,56,67,63,57,56,61,58,59,57,47,48,45,42,39,36,37,38,34,33,37,37,37,36,38,38,39,35,41,38,38,39,37,41,35,33,42,38,36,39,43,41,39,42,41,42,42,45,47,45,44,47,49,45,47,57,47,51,54,55,58,73,64,76,72,80,80,74,73,94,128,130,113,122,125,132,139,134,141,160,167,153,138,141,141,164,171,155,147,144,147,151,148,146,149,138,148,152,146,151,154,157,153,148,153,151,149,148,147,141,141,144,144,145,149,143,143,148,148,150,156,159,160,162,158,157,163,163,157,157,160,152,149,148,148,149,150,151,153,154,154,156,153,151,155,150,151,150,148,153,153,151,153,152,148,148,151,146,146,139,145,142,143,150,143,145,131,115,96,80,71,23,4,12,11,15,13,13,14,13,14,14,14,21,23,21,21,20,23,22,21,22,24,26,19,24,26,24,23,24,27,23,28,27,23,24,29,23,29,29,24,23,15,19,17,19,20,17,22,21,19,18,20,20,18,23,20,22,21,19,24,18,21,25,23,23,23,21,19,24,21,22,25,24,24,23,26,23,21,22,27,30,21,25,22,21,27,27,26,25,31,30,34,38,39,39,45,41,37,39,36,39,36,37,42,38,41,40,42,39,39,45,36,41,47,44,41,46,45,45,50,50,51,53,49,56,50,44,44,45,53,50,47,44,43,53,64,79,68,74,68,63,59,48,50,54,77,95,101,103,99,104,114,122,122,127,131,141,151,157,150,146,137,128,130,126,137,139,135,134,131,139,135,128,129,122,125,130,130,132,132,132,134,136,139,137,137,137,137,137,142,147,147,142,141,141,143,139,137,136,133,136,132,132,136,137,137,131,129,128,131,130,135,134,134,128,128,129,122,120,117,122,127,129,133,129,130,133,137,144,146,147,139,146,150,151,149,148,150,140,140,137,142,136,129,141,137,145,146,145,139,128,130,131,137,140,142,143,141,141,137,143,147,141,131,124,132,130,124,122,121,126,141,142,128,122,117,113,110,114,120,118,113,110,102,97,97,96,93,89,90,83,86,103,128,138,130,124,124,124,127,129,142,150,135,128,127,125,111,99,98,104,113,116,114,100,95,111,125,117,108,126,133,129,121,109,104,84,64,56,50,74,95,82,72,71,64,64,59,64,76,73,72,73,68,95,90,88,88,83,77,64,59,54,61,59,65,73,79,73,63,51,54,60,63,64,66,58,54,55,47,54,54,51,55,63,53,52,57,52,51,56,55,47,50,53,53,53,53,53,51,57,53,51,50,54,50,51,55,49,52,53,53,48,52,57,49,48,51,49,53,52,50,50,48,50,54,53,49,47,52,46,49,51,48,44,42,48,53,43,46,50,48,47,48,49,48,46,47,44,41,48,45,43,42,44,39,43,41,40,45,43,40,39,39,36,44,37,41,42,41,44,39,45,45,45,47,43,42,46,42,48,46,43,39,38,46,39,42,41,35,43,38,39,38,38,45,41,43,40,42,49,43,44,46,44,45,40,44,45,46,43,44,45,46,43,40,46,43,44,45,45,47,41,45,49,48,49,50,49,49,48,51,52,53,53,54,50,44,54,54,48,48,45,49,48,52,47,47,47,48,51,46,48,51,58,54,55,57,50,53,59,60,55,54,53,59,60,58,53,53,48,44,41,38,37,37,38,34,33,35,39,35,37,39,36,39,37,36,39,41,36,36,39,36,39,39,39,39,39,42,41,39,41,44,42,44,48,42,42,42,45,49,44,46,49,53,53,55,49,60,81,78,84,78,91,100,102,121,126,137,138,128,141,132,131,136,132,146,166,179,171,150,144,148,164,171,159,150,152,151,153,156,153,151,145,151,147,141,150,153,157,150,148,153,151,151,158,153,148,148,148,152,160,164,160,160,159,161,156,156,155,158,163,155,155,158,157,161,166,163,160,164,160,155,153,152,158,154,159,162,158,155,152,150,148,149,149,146,145,144,148,146,145,145,144,146,146,146,146,148,150,146,147,150,148,142,138,123,99,77,21,3,11,11,14,12,12,13,13,14,14,13,23,23,22,23,21,21,20,23,23,25,26,22,25,24,24,24,26,29,26,26,24,25,26,24,28,22,23,29,21,21,24,19,19,19,21,23,19,21,22,19,21,24,26,24,19,24,24,22,24,20,23,25,21,24,25,23,23,26,27,24,25,29,27,25,28,28,30,27,31,27,25,29,29,26,29,24,27,31,26,33,31,34,32,32,37,33,36,40,42,41,39,43,50,41,46,46,49,53,45,51,55,53,53,65,73,74,84,78,76,81,92,110,112,119,127,131,134,137,130,126,115,110,108,88,93,80,80,83,89,104,105,107,96,98,96,91,94,92,96,94,99,99,102,98,89,97,101,107,114,118,128,134,136,139,135,137,131,135,142,140,148,148,149,145,137,137,139,139,136,139,141,141,137,134,141,136,137,139,136,135,138,136,136,141,147,153,151,139,133,136,134,139,143,142,134,137,143,150,155,150,148,148,147,146,152,150,146,151,155,154,148,146,148,154,158,160,152,145,145,136,138,125,124,127,120,125,121,131,134,137,139,139,137,130,131,129,129,127,134,146,147,155,157,157,156,155,158,140,126,122,132,145,151,155,153,153,151,147,131,121,131,134,141,152,150,145,145,155,156,149,141,134,145,154,149,142,140,144,142,136,122,120,126,124,131,131,131,134,121,106,115,119,124,132,120,108,115,132,137,129,130,131,132,134,127,124,127,131,130,122,112,96,96,113,120,125,138,128,112,83,69,82,93,101,100,101,102,97,102,106,95,81,85,76,66,53,49,63,65,72,71,66,59,70,66,61,65,66,76,81,78,78,72,71,63,64,79,80,87,79,72,62,73,69,55,55,50,51,55,50,51,50,49,57,51,53,52,45,48,47,51,48,48,53,53,47,51,55,41,46,53,47,49,49,45,49,49,51,53,49,51,54,51,51,48,51,52,52,53,53,55,56,53,47,49,49,55,53,53,46,50,50,50,50,53,49,45,50,51,44,49,47,44,46,45,46,41,48,44,44,45,46,44,43,43,43,45,43,46,44,45,42,48,45,45,51,42,43,42,43,46,45,44,45,43,41,41,46,43,43,49,45,42,42,46,49,45,45,45,46,45,41,44,47,43,43,46,44,44,45,46,47,42,45,42,38,46,45,47,45,50,47,41,44,49,45,46,46,46,49,43,47,46,42,44,47,45,46,48,47,48,47,45,48,48,48,49,51,47,48,54,50,54,51,56,53,50,59,53,55,56,58,56,49,51,48,49,49,50,45,39,43,42,38,41,41,41,39,39,40,40,37,36,38,42,37,40,41,38,42,38,42,39,40,44,37,42,39,44,46,41,40,46,51,41,45,48,45,50,43,48,48,49,54,56,63,61,74,69,62,77,78,87,91,99,119,113,101,93,113,146,135,139,147,153,162,162,163,168,172,174,159,151,157,165,165,163,164,160,160,160,160,158,162,162,162,170,163,165,164,155,159,152,145,152,158,162,159,152,155,155,152,151,150,156,160,158,158,150,148,155,157,152,144,142,146,148,151,148,149,148,146,152,150,151,150,152,152,149,149,148,149,147,154,155,147,146,143,141,139,134,141,139,139,137,129,130,131,131,134,136,139,144,141,145,139,143,105,15,1,10,10,13,12,14,12,13,14,14,13,23,23,22,23,21,21,20,23,23,25,26,22,25,24,24,24,26,29,26,26,24,25,26,24,28,22,23,29,21,21,24,19,19,19,21,23,19,21,22,19,21,24,26,24,19,24,24,22,24,20,23,25,21,24,25,23,23,26,27,24,25,29,27,25,28,28,30,27,31,27,25,29,29,26,29,24,27,31,26,33,31,34,32,32,37,33,36,40,42,41,39,43,50,41,46,46,49,53,45,51,55,53,53,65,73,74,84,78,76,81,92,110,112,119,127,131,134,137,130,126,115,110,108,88,93,80,80,83,89,104,105,107,96,98,96,91,94,92,96,94,99,99,102,98,89,97,101,107,114,118,128,134,136,139,135,137,131,135,142,140,148,148,149,145,137,137,139,139,136,139,141,141,137,134,141,136,137,139,136,135,138,136,136,141,147,153,151,139,133,136,134,139,143,142,134,137,143,150,155,150,148,148,147,146,152,150,146,151,155,154,148,146,148,154,158,160,152,145,145,136,138,125,124,127,120,125,121,131,134,137,139,139,137,130,131,129,129,127,134,146,147,155,157,157,156,155,158,140,126,122,132,145,151,155,153,153,151,147,131,121,131,134,141,152,150,145,145,155,156,149,141,134,145,154,149,142,140,144,142,136,122,120,126,124,131,131,131,134,121,106,115,119,124,132,120,108,115,132,137,129,130,131,132,134,127,124,127,131,130,122,112,96,96,113,120,125,138,128,112,83,69,82,93,101,100,101,102,97,102,106,95,81,85,76,66,53,49,63,65,72,71,66,59,70,66,61,65,66,76,81,78,78,72,71,63,64,79,80,87,79,72,62,73,69,55,55,50,51,55,50,51,50,49,57,51,53,52,45,48,47,51,48,48,53,53,47,51,55,41,46,53,47,49,49,45,49,49,51,53,49,51,54,51,51,48,51,52,52,53,53,55,56,53,47,49,49,55,53,53,46,50,50,50,50,53,49,45,50,51,44,49,47,44,46,45,46,41,48,44,44,45,46,44,43,43,43,45,43,46,44,45,42,48,45,45,51,42,43,42,43,46,45,44,45,43,41,41,46,43,43,49,45,42,42,46,49,45,45,45,46,45,41,44,47,43,43,46,44,44,45,46,47,42,45,42,38,46,45,47,45,50,47,41,44,49,45,46,46,46,49,43,47,46,42,44,47,45,46,48,47,48,47,45,48,48,48,49,51,47,48,54,50,54,51,56,53,50,59,53,55,56,58,56,49,51,48,49,49,50,45,39,43,42,38,41,41,41,39,39,40,40,37,36,38,42,37,40,41,38,42,38,42,39,40,44,37,42,39,44,46,41,40,46,51,41,45,48,45,50,43,48,48,49,54,56,63,61,74,69,62,77,78,87,91,99,119,113,101,93,113,146,135,139,147,153,162,162,163,168,172,174,159,151,157,165,165,163,164,160,160,160,160,158,162,162,162,170,163,165,164,155,159,152,145,152,158,162,159,152,155,155,152,151,150,156,160,158,158,150,148,155,157,152,144,142,146,148,151,148,149,148,146,152,150,151,150,152,152,149,149,148,149,147,154,155,147,146,143,141,139,134,141,139,139,137,129,130,131,131,134,136,139,144,141,145,139,143,105,15,1,10,10,13,12,14,12,13,14,14,13,29,24,24,21,24,25,23,29,24,26,26,29,29,27,25,27,30,24,27,24,27,26,29,28,26,27,24,29,24,23,25,23,24,21,21,25,23,22,24,26,26,21,25,24,27,27,21,27,26,22,29,27,29,30,23,29,32,27,26,29,30,29,27,33,29,27,35,27,27,29,30,28,33,32,30,34,29,29,28,37,30,28,32,33,36,33,33,35,36,35,41,42,49,46,49,52,49,55,59,57,57,64,63,63,73,82,97,87,98,108,102,109,101,107,111,107,110,115,112,111,105,101,110,110,122,111,117,122,124,128,119,128,128,131,134,134,140,138,141,134,129,126,124,128,132,141,144,155,160,152,144,141,144,144,145,147,147,149,149,147,145,141,136,135,138,139,139,138,138,141,139,139,141,139,145,145,141,144,142,147,146,142,138,127,127,128,136,142,150,153,142,140,139,144,147,148,149,148,138,129,133,141,144,137,135,127,123,128,132,135,141,147,145,150,143,134,133,139,136,134,143,141,141,139,136,147,147,148,145,142,145,146,149,153,148,146,145,140,148,146,144,146,141,141,139,141,148,147,145,139,134,133,138,143,135,128,127,142,153,142,133,119,112,114,123,124,119,129,126,126,133,136,131,129,127,128,121,144,111,15,2,10,10,14,11,14,12,12,14,13,13,15,13,13,13,13,14,13,14,14,14,14,14,14,14,14,14,15,15,14,14,14,14,15,14,15,14,14,15,15,15,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,14,14,15,15,15,15,15,15,16,16,15,15,15,15,16,15,15,15,16,15,16,17,16,16,16,15,16,15,16,16,16,16,15,16,15,16,16,15,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,17,16,17,17,16,16,16,16,16,16,16,16,16,16,16,16,17,17,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,17,16,16,16,16,17,16,16,16,17,17,16,16,16,17,17,16,17,16,17,16,16,16,16,17,17,17,17,16,17,16,17,17,16,16,16,17,16,17,17,16,17,17,17,16,16,17,16,17,17,16,17,16,17,16,16,16,16,17,16,16,16,16,17,16,16,16,16,17,17,17,16,16,17,16,17,16,17,16,16,17,17,16,16,17,17,16,17,17,17,17,17,16,17,17,16,17,17,16,16,16,17,17,16,17,17,16,16,16,16,16,17,17,16,16,16,17,16,16,16,16,17,16,17,16,16,17,16,17,17,17,16,16,17,16,17,16,16,16,17,17,16,17,17,17,17,16,16,17,17,17,16,17,16,17,17,16,17,16,17,17,17,16,16,17,17,17,17,16,16,16,17,17,16,16,17,17,17,17,16,17,17,17,17,16,16,17,17,17,16,16,17,17,17,16,16,17,17,17,17,16,16,17,17,16,16,16,16,17,16,16,17,16,17,16,17,16,16,17,16,17,17,16,17,16,17,16,17,17,17,17,17,16,16,17,17,17,17,16,17,16,16,17,16,17,16,17,17,16,17,16,17,16,16,16,16,17,17,17,17,29,24,24,21,24,25,23,29,24,26,26,29,29,27,25,27,30,24,27,24,27,26,29,28,26,27,24,29,24,23,25,23,24,21,21,25,23,22,24,26,26,21,25,24,27,27,21,27,26,22,29,27,29,30,23,29,32,27,26,29,30,29,27,33,29,27,35,27,27,29,30,28,33,32,30,34,29,29,28,37,30,28,32,33,36,33,33,35,36,35,41,42,49,46,49,52,49,55,59,57,57,64,63,63,73,82,97,87,98,108,102,109,101,107,111,107,110,115,112,111,105,101,110,110,122,111,117,122,124,128,119,128,128,131,134,134,140,138,141,134,129,126,124,128,132,141,144,155,160,152,144,141,144,144,145,147,147,149,149,147,145,141,136,135,138,139,139,138,138,141,139,139,141,139,145,145,141,144,142,147,146,142,138,127,127,128,136,142,150,153,142,140,139,144,147,148,149,148,138,129,133,141,144,137,135,127,123,128,132,135,141,147,145,150,143,134,133,139,136,134,143,141,141,139,136,147,147,148,145,142,145,146,149,153,148,146,145,140,148,146,144,146,141,141,139,141,148,147,145,139,134,133,138,143,135,128,127,142,153,142,133,119,112,114,123,124,119,129,126,126,133,136,131,129,127,128,121,144,111,15,2,10,10,14,11,14,12,12,14,13,13,15,13,13,13,13,14,13,14,14,14,14,14,14,14,14,14,15,15,14,14,14,14,15,14,15,14,14,15,15,15,14,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,14,14,15,15,15,15,15,15,16,16,15,15,15,15,16,15,15,15,16,15,16,17,16,16,16,15,16,15,16,16,16,16,15,16,15,16,16,15,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,17,16,17,17,16,16,16,16,16,16,16,16,16,16,16,16,17,17,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,16,16,16,17,16,16,16,16,16,16,17,16,16,16,16,17,16,16,16,17,17,16,16,16,17,17,16,17,16,17,16,16,16,16,17,17,17,17,16,17,16,17,17,16,16,16,17,16,17,17,16,17,17,17,16,16,17,16,17,17,16,17,16,17,16,16,16,16,17,16,16,16,16,17,16,16,16,16,17,17,17,16,16,17,16,17,16,17,16,16,17,17,16,16,17,17,16,17,17,17,17,17,16,17,17,16,17,17,16,16,16,17,17,16,17,17,16,16,16,16,16,17,17,16,16,16,17,16,16,16,16,17,16,17,16,16,17,16,17,17,17,16,16,17,16,17,16,16,16,17,17,16,17,17,17,17,16,16,17,17,17,16,17,16,17,17,16,17,16,17,17,17,16,16,17,17,17,17,16,16,16,17,17,16,16,17,17,17,17,16,17,17,17,17,16,16,17,17,17,16,16,17,17,17,16,16,17,17,17,17,16,16,17,17,16,16,16,16,17,16,16,17,16,17,16,17,16,16,17,16,17,17,16,17,16,17,16,17,17,17,17,17,16,16,17,17,17,17,16,17,16,16,17,16,17,16,17,17,16,17,16,17,16,16,16,16,17,17,17,17,libvpx-1.8.2/codereview.settings000066400000000000000000000002521357355204000167470ustar00rootroot00000000000000# This file is used by git cl to get repository specific information. GERRIT_HOST: True CODE_REVIEW_SERVER: chromium-review.googlesource.com GERRIT_SQUASH_UPLOADS: False libvpx-1.8.2/configure000077500000000000000000000605241357355204000147500ustar00rootroot00000000000000#!/bin/sh ## ## configure ## ## This script is the front-end to the build system. It provides a similar ## interface to standard configure scripts with some extra bits for dealing ## with toolchains that differ from the standard POSIX interface and ## for extracting subsets of the source tree. In theory, reusable parts ## of this script were intended to live in build/make/configure.sh, ## but in practice, the line is pretty blurry. ## ## This build system is based in part on the FFmpeg configure script. ## #source_path="`dirname \"$0\"`" source_path=${0%/*} . "${source_path}/build/make/configure.sh" show_help(){ show_help_pre cat << EOF Advanced options: ${toggle_libs} libraries ${toggle_examples} examples ${toggle_tools} tools ${toggle_docs} documentation ${toggle_unit_tests} unit tests ${toggle_decode_perf_tests} build decoder perf tests with unit tests ${toggle_encode_perf_tests} build encoder perf tests with unit tests --cpu=CPU tune for the specified CPU (ARM: cortex-a8, X86: sse3) --libc=PATH path to alternate libc --size-limit=WxH max size to allow in the decoder --as={yasm|nasm|auto} use specified assembler [auto, yasm preferred] ${toggle_codec_srcs} in/exclude codec library source code ${toggle_debug_libs} in/exclude debug version of libraries ${toggle_static_msvcrt} use static MSVCRT (VS builds only) ${toggle_vp9_highbitdepth} use VP9 high bit depth (10/12) profiles ${toggle_better_hw_compatibility} enable encoder to produce streams with better hardware decoder compatibility ${toggle_vp8} VP8 codec support ${toggle_vp9} VP9 codec support ${toggle_internal_stats} output of encoder internal stats for debug, if supported (encoders) ${toggle_postproc} postprocessing ${toggle_vp9_postproc} vp9 specific postprocessing ${toggle_multithread} multithreaded encoding and decoding ${toggle_spatial_resampling} spatial sampling (scaling) support ${toggle_realtime_only} enable this option while building for real-time encoding ${toggle_onthefly_bitpacking} enable on-the-fly bitpacking in real-time encoding ${toggle_error_concealment} enable this option to get a decoder which is able to conceal losses ${toggle_coefficient_range_checking} enable decoder to check if intermediate transform coefficients are in valid range ${toggle_runtime_cpu_detect} runtime cpu detection ${toggle_shared} shared library support ${toggle_static} static library support ${toggle_small} favor smaller size over speed ${toggle_postproc_visualizer} macro block / block level visualizers ${toggle_multi_res_encoding} enable multiple-resolution encoding ${toggle_temporal_denoising} enable temporal denoising and disable the spatial denoiser ${toggle_vp9_temporal_denoising} enable vp9 temporal denoising ${toggle_webm_io} enable input from and output to WebM container ${toggle_libyuv} enable libyuv Codecs: Codecs can be selectively enabled or disabled individually, or by family: --disable- is equivalent to: --disable--encoder --disable--decoder Codecs available in this distribution: EOF #restore editor state ' family=""; last_family=""; c=""; str=""; for c in ${CODECS}; do family=${c%_*} if [ "${family}" != "${last_family}" ]; then [ -z "${str}" ] || echo "${str}" str="$(printf ' %10s:' ${family})" fi str="${str} $(printf '%10s' ${c#*_})" last_family=${family} done echo "${str}" show_help_post } ## ## BEGIN APPLICATION SPECIFIC CONFIGURATION ## # all_platforms is a list of all supported target platforms. Maintain # alphabetically by architecture, generic-gnu last. all_platforms="${all_platforms} arm64-android-gcc" all_platforms="${all_platforms} arm64-darwin-gcc" all_platforms="${all_platforms} arm64-linux-gcc" all_platforms="${all_platforms} arm64-win64-gcc" all_platforms="${all_platforms} arm64-win64-vs15" all_platforms="${all_platforms} armv7-android-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-darwin-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-rvct" #neon Cortex-A8 all_platforms="${all_platforms} armv7-linux-gcc" #neon Cortex-A8 all_platforms="${all_platforms} armv7-none-rvct" #neon Cortex-A8 all_platforms="${all_platforms} armv7-win32-gcc" all_platforms="${all_platforms} armv7-win32-vs14" all_platforms="${all_platforms} armv7-win32-vs15" all_platforms="${all_platforms} armv7s-darwin-gcc" all_platforms="${all_platforms} armv8-linux-gcc" all_platforms="${all_platforms} mips32-linux-gcc" all_platforms="${all_platforms} mips64-linux-gcc" all_platforms="${all_platforms} ppc64le-linux-gcc" all_platforms="${all_platforms} sparc-solaris-gcc" all_platforms="${all_platforms} x86-android-gcc" all_platforms="${all_platforms} x86-darwin8-gcc" all_platforms="${all_platforms} x86-darwin8-icc" all_platforms="${all_platforms} x86-darwin9-gcc" all_platforms="${all_platforms} x86-darwin9-icc" all_platforms="${all_platforms} x86-darwin10-gcc" all_platforms="${all_platforms} x86-darwin11-gcc" all_platforms="${all_platforms} x86-darwin12-gcc" all_platforms="${all_platforms} x86-darwin13-gcc" all_platforms="${all_platforms} x86-darwin14-gcc" all_platforms="${all_platforms} x86-darwin15-gcc" all_platforms="${all_platforms} x86-darwin16-gcc" all_platforms="${all_platforms} x86-darwin17-gcc" all_platforms="${all_platforms} x86-iphonesimulator-gcc" all_platforms="${all_platforms} x86-linux-gcc" all_platforms="${all_platforms} x86-linux-icc" all_platforms="${all_platforms} x86-os2-gcc" all_platforms="${all_platforms} x86-solaris-gcc" all_platforms="${all_platforms} x86-win32-gcc" all_platforms="${all_platforms} x86-win32-vs14" all_platforms="${all_platforms} x86-win32-vs15" all_platforms="${all_platforms} x86-win32-vs16" all_platforms="${all_platforms} x86_64-android-gcc" all_platforms="${all_platforms} x86_64-darwin9-gcc" all_platforms="${all_platforms} x86_64-darwin10-gcc" all_platforms="${all_platforms} x86_64-darwin11-gcc" all_platforms="${all_platforms} x86_64-darwin12-gcc" all_platforms="${all_platforms} x86_64-darwin13-gcc" all_platforms="${all_platforms} x86_64-darwin14-gcc" all_platforms="${all_platforms} x86_64-darwin15-gcc" all_platforms="${all_platforms} x86_64-darwin16-gcc" all_platforms="${all_platforms} x86_64-darwin17-gcc" all_platforms="${all_platforms} x86_64-darwin18-gcc" all_platforms="${all_platforms} x86_64-iphonesimulator-gcc" all_platforms="${all_platforms} x86_64-linux-gcc" all_platforms="${all_platforms} x86_64-linux-icc" all_platforms="${all_platforms} x86_64-solaris-gcc" all_platforms="${all_platforms} x86_64-win64-gcc" all_platforms="${all_platforms} x86_64-win64-vs14" all_platforms="${all_platforms} x86_64-win64-vs15" all_platforms="${all_platforms} x86_64-win64-vs16" all_platforms="${all_platforms} generic-gnu" # all_targets is a list of all targets that can be configured # note that these should be in dependency order for now. all_targets="libs examples tools docs" # all targets available are enabled, by default. for t in ${all_targets}; do [ -f "${source_path}/${t}.mk" ] && enable_feature ${t} done if ! diff --version >/dev/null; then die "diff missing: Try installing diffutils via your package manager." fi if ! perl --version >/dev/null; then die "Perl is required to build" fi if [ "`cd \"${source_path}\" && pwd`" != "`pwd`" ]; then # test to see if source_path already configured if [ -f "${source_path}/vpx_config.h" ]; then die "source directory already configured; run 'make distclean' there first" fi fi # check installed doxygen version doxy_version=$(doxygen --version 2>/dev/null) doxy_major=${doxy_version%%.*} if [ ${doxy_major:-0} -ge 1 ]; then doxy_version=${doxy_version#*.} doxy_minor=${doxy_version%%.*} doxy_patch=${doxy_version##*.} [ $doxy_major -gt 1 ] && enable_feature doxygen [ $doxy_minor -gt 5 ] && enable_feature doxygen [ $doxy_minor -eq 5 ] && [ $doxy_patch -ge 3 ] && enable_feature doxygen fi # disable codecs when their source directory does not exist [ -d "${source_path}/vp8" ] || disable_codec vp8 [ -d "${source_path}/vp9" ] || disable_codec vp9 # install everything except the sources, by default. sources will have # to be enabled when doing dist builds, since that's no longer a common # case. enabled doxygen && enable_feature install_docs enable_feature install_bins enable_feature install_libs enable_feature static enable_feature optimizations enable_feature dependency_tracking enable_feature spatial_resampling enable_feature multithread enable_feature os_support enable_feature temporal_denoising CODECS=" vp8_encoder vp8_decoder vp9_encoder vp9_decoder " CODEC_FAMILIES=" vp8 vp9 " ARCH_LIST=" arm mips x86 x86_64 ppc " ARCH_EXT_LIST_X86=" mmx sse sse2 sse3 ssse3 sse4_1 avx avx2 avx512 " ARCH_EXT_LIST_LOONGSON=" mmi " ARCH_EXT_LIST=" neon neon_asm mips32 dspr2 msa mips64 ${ARCH_EXT_LIST_X86} vsx ${ARCH_EXT_LIST_LOONGSON} " HAVE_LIST=" ${ARCH_EXT_LIST} vpx_ports pthread_h unistd_h " EXPERIMENT_LIST=" fp_mb_stats emulate_hardware non_greedy_mv rate_ctrl " CONFIG_LIST=" dependency_tracking external_build install_docs install_bins install_libs install_srcs debug gprof gcov rvct gcc msvs pic big_endian codec_srcs debug_libs dequant_tokens dc_recon runtime_cpu_detect postproc vp9_postproc multithread internal_stats ${CODECS} ${CODEC_FAMILIES} encoders decoders static_msvcrt spatial_resampling realtime_only onthefly_bitpacking error_concealment shared static small postproc_visualizer os_support unit_tests webm_io libyuv decode_perf_tests encode_perf_tests multi_res_encoding temporal_denoising vp9_temporal_denoising consistent_recode coefficient_range_checking vp9_highbitdepth better_hw_compatibility experimental size_limit always_adjust_bpm bitstream_debug mismatch_debug ${EXPERIMENT_LIST} " CMDLINE_SELECT=" dependency_tracking external_build extra_warnings werror install_docs install_bins install_libs install_srcs debug gprof gcov pic optimizations ccache runtime_cpu_detect thumb libs examples tools docs libc as size_limit codec_srcs debug_libs dequant_tokens dc_recon postproc vp9_postproc multithread internal_stats ${CODECS} ${CODEC_FAMILIES} static_msvcrt spatial_resampling realtime_only onthefly_bitpacking error_concealment shared static small postproc_visualizer unit_tests webm_io libyuv decode_perf_tests encode_perf_tests multi_res_encoding temporal_denoising vp9_temporal_denoising consistent_recode coefficient_range_checking better_hw_compatibility vp9_highbitdepth experimental always_adjust_bpm bitstream_debug mismatch_debug " process_cmdline() { for opt do optval="${opt#*=}" case "$opt" in --disable-codecs) for c in ${CODEC_FAMILIES}; do disable_codec $c; done ;; --enable-?*|--disable-?*) eval `echo "$opt" | sed 's/--/action=/;s/-/ option=/;s/-/_/g'` if is_in ${option} ${EXPERIMENT_LIST}; then if enabled experimental; then ${action}_feature $option else log_echo "Ignoring $opt -- not in experimental mode." fi elif is_in ${option} "${CODECS} ${CODEC_FAMILIES}"; then ${action}_codec ${option} else process_common_cmdline $opt fi ;; *) process_common_cmdline "$opt" ;; esac done } post_process_cmdline() { if enabled coefficient_range_checking; then echo "coefficient-range-checking is for decoders only, disabling encoders:" soft_disable vp8_encoder soft_disable vp9_encoder fi c="" # Enable all detected codecs, if they haven't been disabled for c in ${CODECS}; do soft_enable $c; done # Enable the codec family if any component of that family is enabled for c in ${CODECS}; do enabled $c && enable_feature ${c%_*} done # Set the {en,de}coders variable if any algorithm in that class is enabled for c in ${CODECS}; do enabled ${c} && enable_feature ${c##*_}s done } process_targets() { enabled child || write_common_config_banner write_common_target_config_h ${BUILD_PFX}vpx_config.h write_common_config_targets enabled win_arm64_neon_h_workaround && write_win_arm64_neon_h_workaround ${BUILD_PFX}arm_neon.h # Calculate the default distribution name, based on the enabled features cf="" DIST_DIR=vpx for cf in $CODEC_FAMILIES; do if enabled ${cf}_encoder && enabled ${cf}_decoder; then DIST_DIR="${DIST_DIR}-${cf}" elif enabled ${cf}_encoder; then DIST_DIR="${DIST_DIR}-${cf}cx" elif enabled ${cf}_decoder; then DIST_DIR="${DIST_DIR}-${cf}dx" fi done enabled debug_libs && DIST_DIR="${DIST_DIR}-debug" enabled codec_srcs && DIST_DIR="${DIST_DIR}-src" ! enabled postproc && ! enabled vp9_postproc && DIST_DIR="${DIST_DIR}-nopost" ! enabled multithread && DIST_DIR="${DIST_DIR}-nomt" ! enabled install_docs && DIST_DIR="${DIST_DIR}-nodocs" DIST_DIR="${DIST_DIR}-${tgt_isa}-${tgt_os}" case "${tgt_os}" in win*) enabled static_msvcrt && DIST_DIR="${DIST_DIR}mt" || DIST_DIR="${DIST_DIR}md" DIST_DIR="${DIST_DIR}-${tgt_cc}" ;; esac if [ -f "${source_path}/build/make/version.sh" ]; then ver=`"$source_path/build/make/version.sh" --bare "$source_path"` DIST_DIR="${DIST_DIR}-${ver}" VERSION_STRING=${ver} ver=${ver%%-*} VERSION_PATCH=${ver##*.} ver=${ver%.*} VERSION_MINOR=${ver##*.} ver=${ver#v} VERSION_MAJOR=${ver%.*} fi enabled child || cat <> config.mk PREFIX=${prefix} ifeq (\$(MAKECMDGOALS),dist) DIST_DIR?=${DIST_DIR} else DIST_DIR?=\$(DESTDIR)${prefix} endif LIBSUBDIR=${libdir##${prefix}/} VERSION_STRING=${VERSION_STRING} VERSION_MAJOR=${VERSION_MAJOR} VERSION_MINOR=${VERSION_MINOR} VERSION_PATCH=${VERSION_PATCH} CONFIGURE_ARGS=${CONFIGURE_ARGS} EOF enabled child || echo "CONFIGURE_ARGS?=${CONFIGURE_ARGS}" >> config.mk # # Write makefiles for all enabled targets # for tgt in libs examples tools docs solution; do tgt_fn="$tgt-$toolchain.mk" if enabled $tgt; then echo "Creating makefiles for ${toolchain} ${tgt}" write_common_target_config_mk $tgt_fn ${BUILD_PFX}vpx_config.h #write_${tgt}_config fi done } process_detect() { if enabled shared; then # Can only build shared libs on a subset of platforms. Doing this check # here rather than at option parse time because the target auto-detect # magic happens after the command line has been parsed. case "${tgt_os}" in linux|os2|solaris|darwin*|iphonesimulator*) # Supported platforms ;; *) if enabled gnu; then echo "--enable-shared is only supported on ELF; assuming this is OK" else die "--enable-shared only supported on ELF, OS/2, and Darwin for now" fi ;; esac fi if [ -z "$CC" ] || enabled external_build; then echo "Bypassing toolchain for environment detection." enable_feature external_build check_header() { log fake_check_header "$@" header=$1 shift var=`echo $header | sed 's/[^A-Za-z0-9_]/_/g'` disable_feature $var # Headers common to all environments case $header in stdio.h) true; ;; *) result=false for d in "$@"; do [ -f "${d##-I}/$header" ] && result=true && break done ${result:-true} esac && enable_feature $var # Specialize windows and POSIX environments. case $toolchain in *-win*-*) # Don't check for any headers in Windows builds. false ;; *) case $header in pthread.h) true;; unistd.h) true;; *) false;; esac && enable_feature $var esac enabled $var } check_ld() { true } check_lib() { true } fi check_header stdio.h || die "Unable to invoke compiler: ${CC} ${CFLAGS}" check_ld < #include int main(void) { return pthread_create(NULL, NULL, NULL, NULL); } EOF check_header unistd.h # for sysconf(3) and friends. check_header vpx/vpx_integer.h -I${source_path} && enable_feature vpx_ports if enabled neon && ! enabled external_build; then check_header arm_neon.h || die "Unable to find arm_neon.h" fi } process_toolchain() { process_common_toolchain # Enable some useful compiler flags if enabled gcc; then enabled werror && check_add_cflags -Werror check_add_cflags -Wall check_add_cflags -Wdeclaration-after-statement check_add_cflags -Wdisabled-optimization check_add_cflags -Wfloat-conversion check_add_cflags -Wparentheses-equality check_add_cflags -Wpointer-arith check_add_cflags -Wtype-limits check_add_cflags -Wcast-qual check_add_cflags -Wvla check_add_cflags -Wimplicit-function-declaration check_add_cflags -Wmissing-declarations check_add_cflags -Wmissing-prototypes check_add_cflags -Wuninitialized check_add_cflags -Wunused check_add_cflags -Wextra # check_add_cflags also adds to cxxflags. gtest does not do well with # these flags so add them explicitly to CFLAGS only. check_cflags -Wundef && add_cflags_only -Wundef check_cflags -Wframe-larger-than=52000 && \ add_cflags_only -Wframe-larger-than=52000 if enabled mips || [ -z "${INLINE}" ]; then enabled extra_warnings || check_add_cflags -Wno-unused-function fi # Enforce c89 for c files. Don't be too strict about it though. Allow # gnu extensions like "//" for comments. check_cflags -std=gnu89 && add_cflags_only -std=gnu89 # Avoid this warning for third_party C++ sources. Some reorganization # would be needed to apply this only to test/*.cc. check_cflags -Wshorten-64-to-32 && add_cflags_only -Wshorten-64-to-32 # Quiet gcc 6 vs 7 abi warnings: # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=77728 if enabled arm; then check_add_cxxflags -Wno-psabi fi # disable some warnings specific to libyuv. check_cxxflags -Wno-missing-declarations \ && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-missing-declarations" check_cxxflags -Wno-missing-prototypes \ && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-missing-prototypes" check_cxxflags -Wno-unused-parameter \ && LIBYUV_CXXFLAGS="${LIBYUV_CXXFLAGS} -Wno-unused-parameter" fi if enabled icc; then enabled werror && check_add_cflags -Werror check_add_cflags -Wall check_add_cflags -Wpointer-arith # ICC has a number of floating point optimizations that we disable # in favor of deterministic output WRT to other compilers add_cflags -fp-model precise fi # Enable extra, harmless warnings. These might provide additional insight # to what the compiler is doing and why, but in general, but they shouldn't # be treated as fatal, even if we're treating warnings as errors. GCC_EXTRA_WARNINGS=" -Wdisabled-optimization -Winline " enabled gcc && EXTRA_WARNINGS="${GCC_EXTRA_WARNINGS}" RVCT_EXTRA_WARNINGS=" --remarks " enabled rvct && EXTRA_WARNINGS="${RVCT_EXTRA_WARNINGS}" if enabled extra_warnings; then for w in ${EXTRA_WARNINGS}; do check_add_cflags ${w} enabled gcc && enabled werror && check_add_cflags -Wno-error=${w} done fi # ccache only really works on gcc toolchains enabled gcc || soft_disable ccache if enabled mips; then enable_feature dequant_tokens enable_feature dc_recon fi if enabled internal_stats; then enable_feature vp9_postproc fi # Enable the postbuild target if building for visual studio. case "$tgt_cc" in vs*) enable_feature msvs enable_feature solution vs_version=${tgt_cc##vs} VCPROJ_SFX=vcxproj gen_vcproj_cmd=${source_path}/build/make/gen_msvs_vcxproj.sh enabled werror && gen_vcproj_cmd="${gen_vcproj_cmd} --enable-werror" all_targets="${all_targets} solution" INLINE="__inline" ;; esac # Other toolchain specific defaults case $toolchain in x86*) soft_enable postproc;; esac if enabled postproc_visualizer; then enabled postproc || die "postproc_visualizer requires postproc to be enabled" fi # Enable unit tests by default if we have a working C++ compiler. case "$toolchain" in *-vs*) soft_enable unit_tests soft_enable webm_io soft_enable libyuv ;; *-android-*) check_add_cxxflags -std=c++11 && soft_enable webm_io soft_enable libyuv # GTestLog must be modified to use Android logging utilities. ;; *-darwin-*) # iOS/ARM builds do not work with gtest. This does not match # x86 targets. ;; *-iphonesimulator-*) check_add_cxxflags -std=c++11 && soft_enable webm_io soft_enable libyuv ;; *-win*) # Some mingw toolchains don't have pthread available by default. # Treat these more like visual studio where threading in gtest # would be disabled for the same reason. check_add_cxxflags -std=c++11 && soft_enable unit_tests \ && soft_enable webm_io check_cxx "$@" <> ${BUILD_PFX}vpx_config.c #include "vpx/vpx_codec.h" static const char* const cfg = "$CONFIGURE_ARGS"; const char *vpx_codec_build_config(void) {return cfg;} EOF libvpx-1.8.2/docs.mk000066400000000000000000000031071357355204000143140ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## INSTALL_MAPS += docs/% docs/% INSTALL_MAPS += src/% % INSTALL_MAPS += % % # Static documentation authored in doxygen CODEC_DOX := mainpage.dox \ keywords.dox \ usage.dox \ usage_cx.dox \ usage_dx.dox \ # Other doxy files sourced in Markdown TXT_DOX = $(call enabled,TXT_DOX) EXAMPLE_PATH += $(SRC_PATH_BARE) #for CHANGELOG, README, etc EXAMPLE_PATH += $(SRC_PATH_BARE)/examples doxyfile: $(if $(findstring examples, $(ALL_TARGETS)),examples.doxy) doxyfile: libs.doxy_template libs.doxy @echo " [CREATE] $@" @cat $^ > $@ @echo "STRIP_FROM_PATH += $(SRC_PATH_BARE) $(BUILD_ROOT)" >> $@ @echo "INPUT += $(addprefix $(SRC_PATH_BARE)/,$(CODEC_DOX))" >> $@; @echo "INPUT += $(TXT_DOX)" >> $@; @echo "EXAMPLE_PATH += $(EXAMPLE_PATH)" >> $@ CLEAN-OBJS += doxyfile $(wildcard docs/html/*) docs/html/index.html: doxyfile $(CODEC_DOX) $(TXT_DOX) @echo " [DOXYGEN] $<" @doxygen $< DOCS-yes += docs/html/index.html DIST-DOCS-yes = $(wildcard docs/html/*) DIST-DOCS-$(CONFIG_CODEC_SRCS) += $(addprefix src/,$(CODEC_DOX)) DIST-DOCS-$(CONFIG_CODEC_SRCS) += src/libs.doxy_template DIST-DOCS-yes += CHANGELOG DIST-DOCS-yes += README libvpx-1.8.2/examples.mk000066400000000000000000000477071357355204000152200ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## LIBYUV_SRCS += third_party/libyuv/include/libyuv/basic_types.h \ third_party/libyuv/include/libyuv/convert.h \ third_party/libyuv/include/libyuv/convert_argb.h \ third_party/libyuv/include/libyuv/convert_from.h \ third_party/libyuv/include/libyuv/cpu_id.h \ third_party/libyuv/include/libyuv/planar_functions.h \ third_party/libyuv/include/libyuv/rotate.h \ third_party/libyuv/include/libyuv/row.h \ third_party/libyuv/include/libyuv/scale.h \ third_party/libyuv/include/libyuv/scale_row.h \ third_party/libyuv/source/cpu_id.cc \ third_party/libyuv/source/planar_functions.cc \ third_party/libyuv/source/row_any.cc \ third_party/libyuv/source/row_common.cc \ third_party/libyuv/source/row_gcc.cc \ third_party/libyuv/source/row_msa.cc \ third_party/libyuv/source/row_neon.cc \ third_party/libyuv/source/row_neon64.cc \ third_party/libyuv/source/row_win.cc \ third_party/libyuv/source/scale.cc \ third_party/libyuv/source/scale_any.cc \ third_party/libyuv/source/scale_common.cc \ third_party/libyuv/source/scale_gcc.cc \ third_party/libyuv/source/scale_msa.cc \ third_party/libyuv/source/scale_neon.cc \ third_party/libyuv/source/scale_neon64.cc \ third_party/libyuv/source/scale_win.cc \ LIBWEBM_COMMON_SRCS += third_party/libwebm/common/hdr_util.cc \ third_party/libwebm/common/hdr_util.h \ third_party/libwebm/common/webmids.h LIBWEBM_MUXER_SRCS += third_party/libwebm/mkvmuxer/mkvmuxer.cc \ third_party/libwebm/mkvmuxer/mkvmuxerutil.cc \ third_party/libwebm/mkvmuxer/mkvwriter.cc \ third_party/libwebm/mkvmuxer/mkvmuxer.h \ third_party/libwebm/mkvmuxer/mkvmuxertypes.h \ third_party/libwebm/mkvmuxer/mkvmuxerutil.h \ third_party/libwebm/mkvparser/mkvparser.h \ third_party/libwebm/mkvmuxer/mkvwriter.h LIBWEBM_PARSER_SRCS = third_party/libwebm/mkvparser/mkvparser.cc \ third_party/libwebm/mkvparser/mkvreader.cc \ third_party/libwebm/mkvparser/mkvparser.h \ third_party/libwebm/mkvparser/mkvreader.h # Add compile flags and include path for libwebm sources. ifeq ($(CONFIG_WEBM_IO),yes) CXXFLAGS += -D__STDC_CONSTANT_MACROS -D__STDC_LIMIT_MACROS INC_PATH-yes += $(SRC_PATH_BARE)/third_party/libwebm endif # List of examples to build. UTILS are tools meant for distribution # while EXAMPLES demonstrate specific portions of the API. UTILS-$(CONFIG_DECODERS) += vpxdec.c vpxdec.SRCS += md5_utils.c md5_utils.h vpxdec.SRCS += vpx_ports/mem_ops.h vpxdec.SRCS += vpx_ports/mem_ops_aligned.h vpxdec.SRCS += vpx_ports/msvc.h vpxdec.SRCS += vpx_ports/vpx_timer.h vpxdec.SRCS += vpx/vpx_integer.h vpxdec.SRCS += args.c args.h vpxdec.SRCS += ivfdec.c ivfdec.h vpxdec.SRCS += y4minput.c y4minput.h vpxdec.SRCS += tools_common.c tools_common.h vpxdec.SRCS += y4menc.c y4menc.h ifeq ($(CONFIG_LIBYUV),yes) vpxdec.SRCS += $(LIBYUV_SRCS) $(BUILD_PFX)third_party/libyuv/%.cc.o: CXXFLAGS += ${LIBYUV_CXXFLAGS} endif ifeq ($(CONFIG_WEBM_IO),yes) vpxdec.SRCS += $(LIBWEBM_COMMON_SRCS) vpxdec.SRCS += $(LIBWEBM_MUXER_SRCS) vpxdec.SRCS += $(LIBWEBM_PARSER_SRCS) vpxdec.SRCS += webmdec.cc webmdec.h endif vpxdec.GUID = BA5FE66F-38DD-E034-F542-B1578C5FB950 vpxdec.DESCRIPTION = Full featured decoder UTILS-$(CONFIG_ENCODERS) += vpxenc.c vpxenc.SRCS += args.c args.h y4minput.c y4minput.h vpxenc.h vpxenc.SRCS += ivfdec.c ivfdec.h vpxenc.SRCS += ivfenc.c ivfenc.h vpxenc.SRCS += rate_hist.c rate_hist.h vpxenc.SRCS += tools_common.c tools_common.h vpxenc.SRCS += warnings.c warnings.h vpxenc.SRCS += vpx_ports/mem_ops.h vpxenc.SRCS += vpx_ports/mem_ops_aligned.h vpxenc.SRCS += vpx_ports/msvc.h vpxenc.SRCS += vpx_ports/vpx_timer.h vpxenc.SRCS += vpxstats.c vpxstats.h ifeq ($(CONFIG_LIBYUV),yes) vpxenc.SRCS += $(LIBYUV_SRCS) endif ifeq ($(CONFIG_WEBM_IO),yes) vpxenc.SRCS += $(LIBWEBM_COMMON_SRCS) vpxenc.SRCS += $(LIBWEBM_MUXER_SRCS) vpxenc.SRCS += $(LIBWEBM_PARSER_SRCS) vpxenc.SRCS += webmenc.cc webmenc.h endif vpxenc.GUID = 548DEC74-7A15-4B2B-AFC3-AA102E7C25C1 vpxenc.DESCRIPTION = Full featured encoder EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_spatial_svc_encoder.c vp9_spatial_svc_encoder.SRCS += args.c args.h vp9_spatial_svc_encoder.SRCS += ivfenc.c ivfenc.h vp9_spatial_svc_encoder.SRCS += y4minput.c y4minput.h vp9_spatial_svc_encoder.SRCS += tools_common.c tools_common.h vp9_spatial_svc_encoder.SRCS += video_common.h vp9_spatial_svc_encoder.SRCS += video_writer.h video_writer.c vp9_spatial_svc_encoder.SRCS += vpx_ports/msvc.h vp9_spatial_svc_encoder.SRCS += vpxstats.c vpxstats.h vp9_spatial_svc_encoder.SRCS += examples/svc_encodeframe.c vp9_spatial_svc_encoder.SRCS += examples/svc_context.h vp9_spatial_svc_encoder.GUID = 4A38598D-627D-4505-9C7B-D4020C84100D vp9_spatial_svc_encoder.DESCRIPTION = VP9 Spatial SVC Encoder ifneq ($(CONFIG_SHARED),yes) EXAMPLES-$(CONFIG_VP9_ENCODER) += resize_util.c endif EXAMPLES-$(CONFIG_ENCODERS) += vpx_temporal_svc_encoder.c vpx_temporal_svc_encoder.SRCS += ivfenc.c ivfenc.h vpx_temporal_svc_encoder.SRCS += y4minput.c y4minput.h vpx_temporal_svc_encoder.SRCS += tools_common.c tools_common.h vpx_temporal_svc_encoder.SRCS += video_common.h vpx_temporal_svc_encoder.SRCS += video_writer.h video_writer.c vpx_temporal_svc_encoder.SRCS += vpx_ports/msvc.h vpx_temporal_svc_encoder.GUID = B18C08F2-A439-4502-A78E-849BE3D60947 vpx_temporal_svc_encoder.DESCRIPTION = Temporal SVC Encoder EXAMPLES-$(CONFIG_DECODERS) += simple_decoder.c simple_decoder.GUID = D3BBF1E9-2427-450D-BBFF-B2843C1D44CC simple_decoder.SRCS += ivfdec.h ivfdec.c simple_decoder.SRCS += y4minput.c y4minput.h simple_decoder.SRCS += tools_common.h tools_common.c simple_decoder.SRCS += video_common.h simple_decoder.SRCS += video_reader.h video_reader.c simple_decoder.SRCS += vpx_ports/mem_ops.h simple_decoder.SRCS += vpx_ports/mem_ops_aligned.h simple_decoder.SRCS += vpx_ports/msvc.h simple_decoder.DESCRIPTION = Simplified decoder loop EXAMPLES-$(CONFIG_DECODERS) += postproc.c postproc.SRCS += ivfdec.h ivfdec.c postproc.SRCS += y4minput.c y4minput.h postproc.SRCS += tools_common.h tools_common.c postproc.SRCS += video_common.h postproc.SRCS += video_reader.h video_reader.c postproc.SRCS += vpx_ports/mem_ops.h postproc.SRCS += vpx_ports/mem_ops_aligned.h postproc.SRCS += vpx_ports/msvc.h postproc.GUID = 65E33355-F35E-4088-884D-3FD4905881D7 postproc.DESCRIPTION = Decoder postprocessor control EXAMPLES-$(CONFIG_DECODERS) += decode_to_md5.c decode_to_md5.SRCS += md5_utils.h md5_utils.c decode_to_md5.SRCS += ivfdec.h ivfdec.c decode_to_md5.SRCS += y4minput.c y4minput.h decode_to_md5.SRCS += tools_common.h tools_common.c decode_to_md5.SRCS += video_common.h decode_to_md5.SRCS += video_reader.h video_reader.c decode_to_md5.SRCS += vpx_ports/mem_ops.h decode_to_md5.SRCS += vpx_ports/mem_ops_aligned.h decode_to_md5.SRCS += vpx_ports/msvc.h decode_to_md5.GUID = 59120B9B-2735-4BFE-B022-146CA340FE42 decode_to_md5.DESCRIPTION = Frame by frame MD5 checksum EXAMPLES-$(CONFIG_ENCODERS) += simple_encoder.c simple_encoder.SRCS += ivfenc.h ivfenc.c simple_encoder.SRCS += y4minput.c y4minput.h simple_encoder.SRCS += tools_common.h tools_common.c simple_encoder.SRCS += video_common.h simple_encoder.SRCS += video_writer.h video_writer.c simple_encoder.SRCS += vpx_ports/msvc.h simple_encoder.GUID = 4607D299-8A71-4D2C-9B1D-071899B6FBFD simple_encoder.DESCRIPTION = Simplified encoder loop EXAMPLES-$(CONFIG_VP9_ENCODER) += vp9_lossless_encoder.c vp9_lossless_encoder.SRCS += ivfenc.h ivfenc.c vp9_lossless_encoder.SRCS += y4minput.c y4minput.h vp9_lossless_encoder.SRCS += tools_common.h tools_common.c vp9_lossless_encoder.SRCS += video_common.h vp9_lossless_encoder.SRCS += video_writer.h video_writer.c vp9_lossless_encoder.SRCS += vpx_ports/msvc.h vp9_lossless_encoder.GUID = B63C7C88-5348-46DC-A5A6-CC151EF93366 vp9_lossless_encoder.DESCRIPTION = Simplified lossless VP9 encoder EXAMPLES-$(CONFIG_ENCODERS) += twopass_encoder.c twopass_encoder.SRCS += ivfenc.h ivfenc.c twopass_encoder.SRCS += y4minput.c y4minput.h twopass_encoder.SRCS += tools_common.h tools_common.c twopass_encoder.SRCS += video_common.h twopass_encoder.SRCS += video_writer.h video_writer.c twopass_encoder.SRCS += vpx_ports/msvc.h twopass_encoder.GUID = 73494FA6-4AF9-4763-8FBB-265C92402FD8 twopass_encoder.DESCRIPTION = Two-pass encoder loop EXAMPLES-$(CONFIG_DECODERS) += decode_with_drops.c decode_with_drops.SRCS += ivfdec.h ivfdec.c decode_with_drops.SRCS += y4minput.c y4minput.h decode_with_drops.SRCS += tools_common.h tools_common.c decode_with_drops.SRCS += video_common.h decode_with_drops.SRCS += video_reader.h video_reader.c decode_with_drops.SRCS += vpx_ports/mem_ops.h decode_with_drops.SRCS += vpx_ports/mem_ops_aligned.h decode_with_drops.SRCS += vpx_ports/msvc.h decode_with_drops.GUID = CE5C53C4-8DDA-438A-86ED-0DDD3CDB8D26 decode_with_drops.DESCRIPTION = Drops frames while decoding EXAMPLES-$(CONFIG_ENCODERS) += set_maps.c set_maps.SRCS += ivfenc.h ivfenc.c set_maps.SRCS += y4minput.c y4minput.h set_maps.SRCS += tools_common.h tools_common.c set_maps.SRCS += video_common.h set_maps.SRCS += video_writer.h video_writer.c set_maps.SRCS += vpx_ports/msvc.h set_maps.GUID = ECB2D24D-98B8-4015-A465-A4AF3DCC145F set_maps.DESCRIPTION = Set active and ROI maps EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8cx_set_ref.c vp8cx_set_ref.SRCS += ivfenc.h ivfenc.c vp8cx_set_ref.SRCS += y4minput.c y4minput.h vp8cx_set_ref.SRCS += tools_common.h tools_common.c vp8cx_set_ref.SRCS += video_common.h vp8cx_set_ref.SRCS += video_writer.h video_writer.c vp8cx_set_ref.SRCS += vpx_ports/msvc.h vp8cx_set_ref.GUID = C5E31F7F-96F6-48BD-BD3E-10EBF6E8057A vp8cx_set_ref.DESCRIPTION = VP8 set encoder reference frame ifeq ($(CONFIG_VP9_ENCODER),yes) ifeq ($(CONFIG_DECODERS),yes) EXAMPLES-yes += vp9cx_set_ref.c vp9cx_set_ref.SRCS += ivfenc.h ivfenc.c vp9cx_set_ref.SRCS += y4minput.c y4minput.h vp9cx_set_ref.SRCS += tools_common.h tools_common.c vp9cx_set_ref.SRCS += video_common.h vp9cx_set_ref.SRCS += video_writer.h video_writer.c vp9cx_set_ref.GUID = 65D7F14A-2EE6-4293-B958-AB5107A03B55 vp9cx_set_ref.DESCRIPTION = VP9 set encoder reference frame endif endif ifeq ($(CONFIG_MULTI_RES_ENCODING),yes) ifeq ($(CONFIG_LIBYUV),yes) EXAMPLES-$(CONFIG_VP8_ENCODER) += vp8_multi_resolution_encoder.c vp8_multi_resolution_encoder.SRCS += ivfenc.h ivfenc.c vp8_multi_resolution_encoder.SRCS += y4minput.c y4minput.h vp8_multi_resolution_encoder.SRCS += tools_common.h tools_common.c vp8_multi_resolution_encoder.SRCS += video_writer.h video_writer.c vp8_multi_resolution_encoder.SRCS += vpx_ports/msvc.h vp8_multi_resolution_encoder.SRCS += $(LIBYUV_SRCS) vp8_multi_resolution_encoder.GUID = 04f8738e-63c8-423b-90fa-7c2703a374de vp8_multi_resolution_encoder.DESCRIPTION = VP8 Multiple-resolution Encoding endif endif # Handle extra library flags depending on codec configuration # We should not link to math library (libm) on RVCT # when building for bare-metal targets ifeq ($(CONFIG_OS_SUPPORT), yes) CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m else ifeq ($(CONFIG_GCC), yes) CODEC_EXTRA_LIBS-$(CONFIG_VP8) += m CODEC_EXTRA_LIBS-$(CONFIG_VP9) += m endif endif # # End of specified files. The rest of the build rules should happen # automagically from here. # # Examples need different flags based on whether we're building # from an installed tree or a version controlled tree. Determine # the proper paths. ifeq ($(HAVE_ALT_TREE_LAYOUT),yes) LIB_PATH-yes := $(SRC_PATH_BARE)/../lib INC_PATH-yes := $(SRC_PATH_BARE)/../include else LIB_PATH-yes += $(if $(BUILD_PFX),$(BUILD_PFX),.) INC_PATH-$(CONFIG_VP8_DECODER) += $(SRC_PATH_BARE)/vp8 INC_PATH-$(CONFIG_VP8_ENCODER) += $(SRC_PATH_BARE)/vp8 INC_PATH-$(CONFIG_VP9_DECODER) += $(SRC_PATH_BARE)/vp9 INC_PATH-$(CONFIG_VP9_ENCODER) += $(SRC_PATH_BARE)/vp9 endif INC_PATH-$(CONFIG_LIBYUV) += $(SRC_PATH_BARE)/third_party/libyuv/include LIB_PATH := $(call enabled,LIB_PATH) INC_PATH := $(call enabled,INC_PATH) INTERNAL_CFLAGS = $(addprefix -I,$(INC_PATH)) INTERNAL_LDFLAGS += $(addprefix -L,$(LIB_PATH)) # Expand list of selected examples to build (as specified above) UTILS = $(call enabled,UTILS) EXAMPLES = $(addprefix examples/,$(call enabled,EXAMPLES)) ALL_EXAMPLES = $(UTILS) $(EXAMPLES) UTIL_SRCS = $(foreach ex,$(UTILS),$($(ex:.c=).SRCS)) ALL_SRCS = $(foreach ex,$(ALL_EXAMPLES),$($(notdir $(ex:.c=)).SRCS)) CODEC_EXTRA_LIBS=$(sort $(call enabled,CODEC_EXTRA_LIBS)) # Expand all example sources into a variable containing all sources # for that example (not just them main one specified in UTILS/EXAMPLES) # and add this file to the list (for MSVS workspace generation) $(foreach ex,$(ALL_EXAMPLES),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) examples.mk)) # Create build/install dependencies for all examples. The common case # is handled here. The MSVS case is handled below. NOT_MSVS = $(if $(CONFIG_MSVS),,yes) DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(ALL_EXAMPLES:.c=$(EXE_SFX))) INSTALL-BINS-$(NOT_MSVS) += $(addprefix bin/,$(UTILS:.c=$(EXE_SFX))) DIST-SRCS-yes += $(ALL_SRCS) INSTALL-SRCS-yes += $(UTIL_SRCS) OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS)) BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(ALL_EXAMPLES:.c=$(EXE_SFX))) # Instantiate linker template for all examples. CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx) ifneq ($(filter darwin%,$(TGT_OS)),) SHARED_LIB_SUF=.dylib else ifneq ($(filter os2%,$(TGT_OS)),) SHARED_LIB_SUF=_dll.a else SHARED_LIB_SUF=.so endif endif CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a) $(foreach bin,$(BINS-yes),\ $(eval $(bin):$(LIB_PATH)/lib$(CODEC_LIB)$(CODEC_LIB_SUF))\ $(eval $(call linker_template,$(bin),\ $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) \ -l$(CODEC_LIB) $(addprefix -l,$(CODEC_EXTRA_LIBS))\ ))) # The following pairs define a mapping of locations in the distribution # tree to locations in the source/build trees. INSTALL_MAPS += src/%.c %.c INSTALL_MAPS += src/% $(SRC_PATH_BARE)/% INSTALL_MAPS += bin/% % INSTALL_MAPS += % % # Set up additional MSVS environment ifeq ($(CONFIG_MSVS),yes) CODEC_LIB=$(if $(CONFIG_SHARED),vpx,$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd)) # This variable uses deferred expansion intentionally, since the results of # $(wildcard) may change during the course of the Make. VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),bin/$(p)/% $(p)/Release/%) endif # Build Visual Studio Projects. We use a template here to instantiate # explicit rules rather than using an implicit rule because we want to # leverage make's VPATH searching rather than specifying the paths on # each file in ALL_EXAMPLES. This has the unfortunate side effect that # touching the source files trigger a rebuild of the project files # even though there is no real dependency there (the dependency is on # the makefiles). We may want to revisit this. define vcproj_template $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX) $(if $(quiet),@echo " [vcproj] $$@") $(qexec)$$(GEN_VCPROJ)\ --exe\ --target=$$(TOOLCHAIN)\ --name=$$(@:.$(VCPROJ_SFX)=)\ --ver=$$(CONFIG_VS_VERSION)\ --proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\ --src-path-bare="$(SRC_PATH_BARE)" \ $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \ --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \ $$(INTERNAL_LDFLAGS) $$(LDFLAGS) -l$$(CODEC_LIB) $$^ endef ALL_EXAMPLES_BASENAME := $(notdir $(ALL_EXAMPLES)) PROJECTS-$(CONFIG_MSVS) += $(ALL_EXAMPLES_BASENAME:.c=.$(VCPROJ_SFX)) INSTALL-BINS-$(CONFIG_MSVS) += $(foreach p,$(VS_PLATFORMS),\ $(addprefix bin/$(p)/,$(ALL_EXAMPLES_BASENAME:.c=.exe))) $(foreach proj,$(call enabled,PROJECTS),\ $(eval $(call vcproj_template,$(proj)))) # # Documentation Rules # %.dox: %.c @echo " [DOXY] $@" @mkdir -p $(dir $@) @echo "/*!\page example_$(@F:.dox=) $(@F:.dox=)" > $@ @echo " \includelineno $(> $@ @echo "*/" >> $@ samples.dox: examples.mk @echo " [DOXY] $@" @echo "/*!\page samples Sample Code" > $@ @echo " This SDK includes a number of sample applications."\ "Each sample documents a feature of the SDK in both prose"\ "and the associated C code."\ "The following samples are included: ">>$@ @$(foreach ex,$(sort $(notdir $(EXAMPLES:.c=))),\ echo " - \subpage example_$(ex) $($(ex).DESCRIPTION)" >> $@;) @echo >> $@ @echo " In addition, the SDK contains a number of utilities."\ "Since these utilities are built upon the concepts described"\ "in the sample code listed above, they are not documented in"\ "pieces like the samples are. Their source is included here"\ "for reference. The following utilities are included:" >> $@ @$(foreach ex,$(sort $(UTILS:.c=)),\ echo " - \subpage example_$(ex) $($(ex).DESCRIPTION)" >> $@;) @echo "*/" >> $@ CLEAN-OBJS += examples.doxy samples.dox $(ALL_EXAMPLES:.c=.dox) DOCS-yes += examples.doxy samples.dox examples.doxy: samples.dox $(ALL_EXAMPLES:.c=.dox) @echo "INPUT += $^" > $@ @echo "ENABLED_SECTIONS += samples" >> $@ libvpx-1.8.2/examples/000077500000000000000000000000001357355204000146505ustar00rootroot00000000000000libvpx-1.8.2/examples/decode_to_md5.c000066400000000000000000000076371357355204000175230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Frame-by-frame MD5 Checksum // =========================== // // This example builds upon the simple decoder loop to show how checksums // of the decoded output can be generated. These are used for validating // decoder implementations against the reference implementation, for example. // // MD5 algorithm // ------------- // The Message-Digest 5 (MD5) is a well known hash function. We have provided // an implementation derived from the RSA Data Security, Inc. MD5 Message-Digest // Algorithm for your use. Our implmentation only changes the interface of this // reference code. You must include the `md5_utils.h` header for access to these // functions. // // Processing The Decoded Data // --------------------------- // Each row of the image is passed to the MD5 accumulator. First the Y plane // is processed, then U, then V. It is important to honor the image's `stride` // values. #include #include #include #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "../md5_utils.h" #include "../tools_common.h" #include "../video_reader.h" #include "./vpx_config.h" static void get_image_md5(const vpx_image_t *img, unsigned char digest[16]) { int plane, y; MD5Context md5; MD5Init(&md5); for (plane = 0; plane < 3; ++plane) { const unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = plane ? (img->d_w + 1) >> 1 : img->d_w; const int h = plane ? (img->d_h + 1) >> 1 : img->d_h; for (y = 0; y < h; ++y) { MD5Update(&md5, buf, w); buf += stride; } } MD5Final(digest, &md5); } static void print_md5(FILE *stream, unsigned char digest[16]) { int i; for (i = 0; i < 16; ++i) fprintf(stream, "%02x", digest[i]); } static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } int main(int argc, char **argv) { int frame_cnt = 0; FILE *outfile = NULL; vpx_codec_ctx_t codec; VpxVideoReader *reader = NULL; const VpxVideoInfo *info = NULL; const VpxInterface *decoder = NULL; exec_name = argv[0]; if (argc != 3) die("Invalid number of arguments."); reader = vpx_video_reader_open(argv[1]); if (!reader) die("Failed to open %s for reading.", argv[1]); if (!(outfile = fopen(argv[2], "wb"))) die("Failed to open %s for writing.", argv[2]); info = vpx_video_reader_get_info(reader); decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); if (!decoder) die("Unknown input codec."); printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) die_codec(&codec, "Failed to initialize decoder"); while (vpx_video_reader_read_frame(reader)) { vpx_codec_iter_t iter = NULL; vpx_image_t *img = NULL; size_t frame_size = 0; const unsigned char *frame = vpx_video_reader_get_frame(reader, &frame_size); if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) die_codec(&codec, "Failed to decode frame"); while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { unsigned char digest[16]; get_image_md5(img, digest); print_md5(outfile, digest); fprintf(outfile, " img-%dx%d-%04d.i420\n", img->d_w, img->d_h, ++frame_cnt); } } printf("Processed %d frames.\n", frame_cnt); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_reader_close(reader); fclose(outfile); return EXIT_SUCCESS; } libvpx-1.8.2/examples/decode_with_drops.c000066400000000000000000000104701357355204000205030ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Decode With Drops Example // ========================= // // This is an example utility which drops a series of frames, as specified // on the command line. This is useful for observing the error recovery // features of the codec. // // Usage // ----- // This example adds a single argument to the `simple_decoder` example, // which specifies the range or pattern of frames to drop. The parameter is // parsed as follows: // // Dropping A Range Of Frames // -------------------------- // To drop a range of frames, specify the starting frame and the ending // frame to drop, separated by a dash. The following command will drop // frames 5 through 10 (base 1). // // $ ./decode_with_drops in.ivf out.i420 5-10 // // // Dropping A Pattern Of Frames // ---------------------------- // To drop a pattern of frames, specify the number of frames to drop and // the number of frames after which to repeat the pattern, separated by // a forward-slash. The following command will drop 3 of 7 frames. // Specifically, it will decode 4 frames, then drop 3 frames, and then // repeat. // // $ ./decode_with_drops in.ivf out.i420 3/7 // // // Extra Variables // --------------- // This example maintains the pattern passed on the command line in the // `n`, `m`, and `is_range` variables: // // // Making The Drop Decision // ------------------------ // The example decides whether to drop the frame based on the current // frame number, immediately before decoding the frame. #include #include #include #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "../tools_common.h" #include "../video_reader.h" #include "./vpx_config.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } int main(int argc, char **argv) { int frame_cnt = 0; FILE *outfile = NULL; vpx_codec_ctx_t codec; const VpxInterface *decoder = NULL; VpxVideoReader *reader = NULL; const VpxVideoInfo *info = NULL; int n = 0; int m = 0; int is_range = 0; char *nptr = NULL; exec_name = argv[0]; if (argc != 4) die("Invalid number of arguments."); reader = vpx_video_reader_open(argv[1]); if (!reader) die("Failed to open %s for reading.", argv[1]); if (!(outfile = fopen(argv[2], "wb"))) die("Failed to open %s for writing.", argv[2]); n = (int)strtol(argv[3], &nptr, 0); m = (int)strtol(nptr + 1, NULL, 0); is_range = (*nptr == '-'); if (!n || !m || (*nptr != '-' && *nptr != '/')) die("Couldn't parse pattern %s.\n", argv[3]); info = vpx_video_reader_get_info(reader); decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); if (!decoder) die("Unknown input codec."); printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) die_codec(&codec, "Failed to initialize decoder."); while (vpx_video_reader_read_frame(reader)) { vpx_codec_iter_t iter = NULL; vpx_image_t *img = NULL; size_t frame_size = 0; int skip; const unsigned char *frame = vpx_video_reader_get_frame(reader, &frame_size); if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) die_codec(&codec, "Failed to decode frame."); ++frame_cnt; skip = (is_range && frame_cnt >= n && frame_cnt <= m) || (!is_range && m - (frame_cnt - 1) % m <= n); if (!skip) { putc('.', stdout); while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) vpx_img_write(img, outfile); } else { putc('X', stdout); } fflush(stdout); } printf("Processed %d frames.\n", frame_cnt); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", info->frame_width, info->frame_height, argv[2]); vpx_video_reader_close(reader); fclose(outfile); return EXIT_SUCCESS; } libvpx-1.8.2/examples/postproc.c000066400000000000000000000107641357355204000166750ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Postprocessing Decoder // ====================== // // This example adds postprocessing to the simple decoder loop. // // Initializing Postprocessing // --------------------------- // You must inform the codec that you might request postprocessing at // initialization time. This is done by passing the VPX_CODEC_USE_POSTPROC // flag to `vpx_codec_dec_init`. If the codec does not support // postprocessing, this call will return VPX_CODEC_INCAPABLE. For // demonstration purposes, we also fall back to default initialization if // the codec does not provide support. // // Using Adaptive Postprocessing // ----------------------------- // VP6 provides "adaptive postprocessing." It will automatically select the // best postprocessing filter on a frame by frame basis based on the amount // of time remaining before the user's specified deadline expires. The // special value 0 indicates that the codec should take as long as // necessary to provide the best quality frame. This example gives the // codec 15ms (15000us) to return a frame. Remember that this is a soft // deadline, and the codec may exceed it doing its regular processing. In // these cases, no additional postprocessing will be done. // // Codec Specific Postprocessing Controls // -------------------------------------- // Some codecs provide fine grained controls over their built-in // postprocessors. VP8 is one example. The following sample code toggles // postprocessing on and off every 15 frames. #include #include #include #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "../tools_common.h" #include "../video_reader.h" #include "./vpx_config.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } int main(int argc, char **argv) { int frame_cnt = 0; FILE *outfile = NULL; vpx_codec_ctx_t codec; vpx_codec_err_t res; VpxVideoReader *reader = NULL; const VpxInterface *decoder = NULL; const VpxVideoInfo *info = NULL; exec_name = argv[0]; if (argc != 3) die("Invalid number of arguments."); reader = vpx_video_reader_open(argv[1]); if (!reader) die("Failed to open %s for reading.", argv[1]); if (!(outfile = fopen(argv[2], "wb"))) die("Failed to open %s for writing", argv[2]); info = vpx_video_reader_get_info(reader); decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); if (!decoder) die("Unknown input codec."); printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); res = vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, VPX_CODEC_USE_POSTPROC); if (res == VPX_CODEC_INCAPABLE) die_codec(&codec, "Postproc not supported by this decoder."); if (res) die_codec(&codec, "Failed to initialize decoder."); while (vpx_video_reader_read_frame(reader)) { vpx_codec_iter_t iter = NULL; vpx_image_t *img = NULL; size_t frame_size = 0; const unsigned char *frame = vpx_video_reader_get_frame(reader, &frame_size); ++frame_cnt; if (frame_cnt % 30 == 1) { vp8_postproc_cfg_t pp = { 0, 0, 0 }; if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) die_codec(&codec, "Failed to turn off postproc."); } else if (frame_cnt % 30 == 16) { vp8_postproc_cfg_t pp = { VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE, 4, 0 }; if (vpx_codec_control(&codec, VP8_SET_POSTPROC, &pp)) die_codec(&codec, "Failed to turn on postproc."); }; // Decode the frame with 15ms deadline if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 15000)) die_codec(&codec, "Failed to decode frame"); while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { vpx_img_write(img, outfile); } } printf("Processed %d frames.\n", frame_cnt); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", info->frame_width, info->frame_height, argv[2]); vpx_video_reader_close(reader); fclose(outfile); return EXIT_SUCCESS; } libvpx-1.8.2/examples/resize_util.c000066400000000000000000000061361357355204000173600ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "../tools_common.h" #include "../vp9/encoder/vp9_resize.h" static const char *exec_name = NULL; static void usage() { printf("Usage:\n"); printf("%s x x ", exec_name); printf(" []\n"); } void usage_exit(void) { usage(); exit(EXIT_FAILURE); } static int parse_dim(char *v, int *width, int *height) { char *x = strchr(v, 'x'); if (x == NULL) x = strchr(v, 'X'); if (x == NULL) return 0; *width = atoi(v); *height = atoi(&x[1]); if (*width <= 0 || *height <= 0) return 0; else return 1; } int main(int argc, char *argv[]) { char *fin, *fout; FILE *fpin, *fpout; uint8_t *inbuf, *outbuf; uint8_t *inbuf_u, *outbuf_u; uint8_t *inbuf_v, *outbuf_v; int f, frames; int width, height, target_width, target_height; exec_name = argv[0]; if (argc < 5) { printf("Incorrect parameters:\n"); usage(); return 1; } fin = argv[1]; fout = argv[4]; if (!parse_dim(argv[2], &width, &height)) { printf("Incorrect parameters: %s\n", argv[2]); usage(); return 1; } if (!parse_dim(argv[3], &target_width, &target_height)) { printf("Incorrect parameters: %s\n", argv[3]); usage(); return 1; } fpin = fopen(fin, "rb"); if (fpin == NULL) { printf("Can't open file %s to read\n", fin); usage(); return 1; } fpout = fopen(fout, "wb"); if (fpout == NULL) { printf("Can't open file %s to write\n", fout); usage(); return 1; } if (argc >= 6) frames = atoi(argv[5]); else frames = INT_MAX; printf("Input size: %dx%d\n", width, height); printf("Target size: %dx%d, Frames: ", target_width, target_height); if (frames == INT_MAX) printf("All\n"); else printf("%d\n", frames); inbuf = (uint8_t *)malloc(width * height * 3 / 2); outbuf = (uint8_t *)malloc(target_width * target_height * 3 / 2); inbuf_u = inbuf + width * height; inbuf_v = inbuf_u + width * height / 4; outbuf_u = outbuf + target_width * target_height; outbuf_v = outbuf_u + target_width * target_height / 4; f = 0; while (f < frames) { if (fread(inbuf, width * height * 3 / 2, 1, fpin) != 1) break; vp9_resize_frame420(inbuf, width, inbuf_u, inbuf_v, width / 2, height, width, outbuf, target_width, outbuf_u, outbuf_v, target_width / 2, target_height, target_width); fwrite(outbuf, target_width * target_height * 3 / 2, 1, fpout); f++; } printf("%d frames processed\n", f); fclose(fpin); fclose(fpout); free(inbuf); free(outbuf); return 0; } libvpx-1.8.2/examples/set_maps.c000066400000000000000000000164141357355204000166350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // VP8 Set Active and ROI Maps // =========================== // // This is an example demonstrating how to control the VP8 encoder's // ROI and Active maps. // // ROI (Reigon of Interest) maps are a way for the application to assign // each macroblock in the image to a region, and then set quantizer and // filtering parameters on that image. // // Active maps are a way for the application to specify on a // macroblock-by-macroblock basis whether there is any activity in that // macroblock. // // // Configuration // ------------- // An ROI map is set on frame 22. If the width of the image in macroblocks // is evenly divisble by 4, then the output will appear to have distinct // columns, where the quantizer, loopfilter, and static threshold differ // from column to column. // // An active map is set on frame 33. If the width of the image in macroblocks // is evenly divisble by 4, then the output will appear to have distinct // columns, where one column will have motion and the next will not. // // The active map is cleared on frame 44. // // Observing The Effects // --------------------- // Use the `simple_decoder` example to decode this sample, and observe // the change in the image at frames 22, 33, and 44. #include #include #include #include #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../tools_common.h" #include "../video_writer.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } static void set_roi_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { unsigned int i; vpx_roi_map_t roi; memset(&roi, 0, sizeof(roi)); roi.rows = (cfg->g_h + 15) / 16; roi.cols = (cfg->g_w + 15) / 16; roi.delta_q[0] = 0; roi.delta_q[1] = -2; roi.delta_q[2] = -4; roi.delta_q[3] = -6; roi.delta_lf[0] = 0; roi.delta_lf[1] = 1; roi.delta_lf[2] = 2; roi.delta_lf[3] = 3; roi.static_threshold[0] = 1500; roi.static_threshold[1] = 1000; roi.static_threshold[2] = 500; roi.static_threshold[3] = 0; roi.roi_map = (uint8_t *)malloc(roi.rows * roi.cols); for (i = 0; i < roi.rows * roi.cols; ++i) roi.roi_map[i] = i % 4; if (vpx_codec_control(codec, VP8E_SET_ROI_MAP, &roi)) die_codec(codec, "Failed to set ROI map"); free(roi.roi_map); } static void set_active_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { unsigned int i; vpx_active_map_t map = { 0, 0, 0 }; map.rows = (cfg->g_h + 15) / 16; map.cols = (cfg->g_w + 15) / 16; map.active_map = (uint8_t *)malloc(map.rows * map.cols); for (i = 0; i < map.rows * map.cols; ++i) map.active_map[i] = i % 2; if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map)) die_codec(codec, "Failed to set active map"); free(map.active_map); } static void unset_active_map(const vpx_codec_enc_cfg_t *cfg, vpx_codec_ctx_t *codec) { vpx_active_map_t map = { 0, 0, 0 }; map.rows = (cfg->g_h + 15) / 16; map.cols = (cfg->g_w + 15) / 16; map.active_map = NULL; if (vpx_codec_control(codec, VP8E_SET_ACTIVEMAP, &map)) die_codec(codec, "Failed to set active map"); } static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, int frame_index, VpxVideoWriter *writer) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) { die_codec(codec, "Failed to write compressed frame"); } printf(keyframe ? "K" : "."); fflush(stdout); } } return got_pkts; } int main(int argc, char **argv) { FILE *infile = NULL; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_count = 0; vpx_image_t raw; vpx_codec_err_t res; VpxVideoInfo info; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; const int fps = 2; // TODO(dkovalev) add command line argument const double bits_per_pixel_per_frame = 0.067; exec_name = argv[0]; if (argc != 6) die("Invalid number of arguments"); memset(&info, 0, sizeof(info)); encoder = get_vpx_encoder_by_name(argv[1]); if (encoder == NULL) { die("Unsupported codec."); } assert(encoder != NULL); info.codec_fourcc = encoder->fourcc; info.frame_width = (int)strtol(argv[2], NULL, 0); info.frame_height = (int)strtol(argv[3], NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; if (info.frame_width <= 0 || info.frame_height <= 0 || (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); } if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, info.frame_height, 1)) { die("Failed to allocate image."); } printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&codec, "Failed to get default codec config."); cfg.g_w = info.frame_width; cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = (unsigned int)(bits_per_pixel_per_frame * cfg.g_w * cfg.g_h * fps / 1000); cfg.g_lag_in_frames = 0; writer = vpx_video_writer_open(argv[5], kContainerIVF, &info); if (!writer) die("Failed to open %s for writing.", argv[5]); if (!(infile = fopen(argv[4], "rb"))) die("Failed to open %s for reading.", argv[4]); if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); // Encode frames. while (vpx_img_read(&raw, infile)) { ++frame_count; if (frame_count == 22 && encoder->fourcc == VP8_FOURCC) { set_roi_map(&cfg, &codec); } else if (frame_count == 33) { set_active_map(&cfg, &codec); } else if (frame_count == 44) { unset_active_map(&cfg, &codec); } encode_frame(&codec, &raw, frame_count, writer); } // Flush encoder. while (encode_frame(&codec, NULL, -1, writer)) { } printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_count); vpx_img_free(&raw); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_writer_close(writer); return EXIT_SUCCESS; } libvpx-1.8.2/examples/simple_decoder.c000066400000000000000000000126211357355204000177740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Simple Decoder // ============== // // This is an example of a simple decoder loop. It takes an input file // containing the compressed data (in IVF format), passes it through the // decoder, and writes the decompressed frames to disk. Other decoder // examples build upon this one. // // The details of the IVF format have been elided from this example for // simplicity of presentation, as IVF files will not generally be used by // your application. In general, an IVF file consists of a file header, // followed by a variable number of frames. Each frame consists of a frame // header followed by a variable length payload. The length of the payload // is specified in the first four bytes of the frame header. The payload is // the raw compressed data. // // Standard Includes // ----------------- // For decoders, you only have to include `vpx_decoder.h` and then any // header files for the specific codecs you use. In this case, we're using // vp8. // // Initializing The Codec // ---------------------- // The libvpx decoder is initialized by the call to vpx_codec_dec_init(). // Determining the codec interface to use is handled by VpxVideoReader and the // functions prefixed with vpx_video_reader_. Discussion of those functions is // beyond the scope of this example, but the main gist is to open the input file // and parse just enough of it to determine if it's a VPx file and which VPx // codec is contained within the file. // Note the NULL pointer passed to vpx_codec_dec_init(). We do that in this // example because we want the algorithm to determine the stream configuration // (width/height) and allocate memory automatically. // // Decoding A Frame // ---------------- // Once the frame has been read into memory, it is decoded using the // `vpx_codec_decode` function. The call takes a pointer to the data // (`frame`) and the length of the data (`frame_size`). No application data // is associated with the frame in this example, so the `user_priv` // parameter is NULL. The `deadline` parameter is left at zero for this // example. This parameter is generally only used when doing adaptive post // processing. // // Codecs may produce a variable number of output frames for every call to // `vpx_codec_decode`. These frames are retrieved by the // `vpx_codec_get_frame` iterator function. The iterator variable `iter` is // initialized to NULL each time `vpx_codec_decode` is called. // `vpx_codec_get_frame` is called in a loop, returning a pointer to a // decoded image or NULL to indicate the end of list. // // Processing The Decoded Data // --------------------------- // In this example, we simply write the encoded data to disk. It is // important to honor the image's `stride` values. // // Cleanup // ------- // The `vpx_codec_destroy` call frees any memory allocated by the codec. // // Error Handling // -------------- // This example does not special case any error return codes. If there was // an error, a descriptive message is printed and the program exits. With // few exceptions, vpx_codec functions return an enumerated error status, // with the value `0` indicating success. #include #include #include #include "vpx/vpx_decoder.h" #include "../tools_common.h" #include "../video_reader.h" #include "./vpx_config.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } int main(int argc, char **argv) { int frame_cnt = 0; FILE *outfile = NULL; vpx_codec_ctx_t codec; VpxVideoReader *reader = NULL; const VpxInterface *decoder = NULL; const VpxVideoInfo *info = NULL; exec_name = argv[0]; if (argc != 3) die("Invalid number of arguments."); reader = vpx_video_reader_open(argv[1]); if (!reader) die("Failed to open %s for reading.", argv[1]); if (!(outfile = fopen(argv[2], "wb"))) die("Failed to open %s for writing.", argv[2]); info = vpx_video_reader_get_info(reader); decoder = get_vpx_decoder_by_fourcc(info->codec_fourcc); if (!decoder) die("Unknown input codec."); printf("Using %s\n", vpx_codec_iface_name(decoder->codec_interface())); if (vpx_codec_dec_init(&codec, decoder->codec_interface(), NULL, 0)) die_codec(&codec, "Failed to initialize decoder."); while (vpx_video_reader_read_frame(reader)) { vpx_codec_iter_t iter = NULL; vpx_image_t *img = NULL; size_t frame_size = 0; const unsigned char *frame = vpx_video_reader_get_frame(reader, &frame_size); if (vpx_codec_decode(&codec, frame, (unsigned int)frame_size, NULL, 0)) die_codec(&codec, "Failed to decode frame."); while ((img = vpx_codec_get_frame(&codec, &iter)) != NULL) { vpx_img_write(img, outfile); ++frame_cnt; } } printf("Processed %d frames.\n", frame_cnt); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); printf("Play: ffplay -f rawvideo -pix_fmt yuv420p -s %dx%d %s\n", info->frame_width, info->frame_height, argv[2]); vpx_video_reader_close(reader); fclose(outfile); return EXIT_SUCCESS; } libvpx-1.8.2/examples/simple_encoder.c000066400000000000000000000217101357355204000200050ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Simple Encoder // ============== // // This is an example of a simple encoder loop. It takes an input file in // YV12 format, passes it through the encoder, and writes the compressed // frames to disk in IVF format. Other decoder examples build upon this // one. // // The details of the IVF format have been elided from this example for // simplicity of presentation, as IVF files will not generally be used by // your application. In general, an IVF file consists of a file header, // followed by a variable number of frames. Each frame consists of a frame // header followed by a variable length payload. The length of the payload // is specified in the first four bytes of the frame header. The payload is // the raw compressed data. // // Standard Includes // ----------------- // For encoders, you only have to include `vpx_encoder.h` and then any // header files for the specific codecs you use. In this case, we're using // vp8. // // Getting The Default Configuration // --------------------------------- // Encoders have the notion of "usage profiles." For example, an encoder // may want to publish default configurations for both a video // conferencing application and a best quality offline encoder. These // obviously have very different default settings. Consult the // documentation for your codec to see if it provides any default // configurations. All codecs provide a default configuration, number 0, // which is valid for material in the vacinity of QCIF/QVGA. // // Updating The Configuration // --------------------------------- // Almost all applications will want to update the default configuration // with settings specific to their usage. Here we set the width and height // of the video file to that specified on the command line. We also scale // the default bitrate based on the ratio between the default resolution // and the resolution specified on the command line. // // Initializing The Codec // ---------------------- // The encoder is initialized by the following code. // // Encoding A Frame // ---------------- // The frame is read as a continuous block (size width * height * 3 / 2) // from the input file. If a frame was read (the input file has not hit // EOF) then the frame is passed to the encoder. Otherwise, a NULL // is passed, indicating the End-Of-Stream condition to the encoder. The // `frame_cnt` is reused as the presentation time stamp (PTS) and each // frame is shown for one frame-time in duration. The flags parameter is // unused in this example. The deadline is set to VPX_DL_REALTIME to // make the example run as quickly as possible. // Forced Keyframes // ---------------- // Keyframes can be forced by setting the VPX_EFLAG_FORCE_KF bit of the // flags passed to `vpx_codec_control()`. In this example, we force a // keyframe every frames. Note, the output stream can // contain additional keyframes beyond those that have been forced using the // VPX_EFLAG_FORCE_KF flag because of automatic keyframe placement by the // encoder. // // Processing The Encoded Data // --------------------------- // Each packet of type `VPX_CODEC_CX_FRAME_PKT` contains the encoded data // for this frame. We write a IVF frame header, followed by the raw data. // // Cleanup // ------- // The `vpx_codec_destroy` call frees any memory allocated by the codec. // // Error Handling // -------------- // This example does not special case any error return codes. If there was // an error, a descriptive message is printed and the program exits. With // few exeptions, vpx_codec functions return an enumerated error status, // with the value `0` indicating success. // // Error Resiliency Features // ------------------------- // Error resiliency is controlled by the g_error_resilient member of the // configuration structure. Use the `decode_with_drops` example to decode with // frames 5-10 dropped. Compare the output for a file encoded with this example // versus one encoded with the `simple_encoder` example. #include #include #include #include "vpx/vpx_encoder.h" #include "../tools_common.h" #include "../video_writer.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s " " \n" "See comments in simple_encoder.c for more information.\n", exec_name); exit(EXIT_FAILURE); } static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, int frame_index, int flags, VpxVideoWriter *writer) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) { die_codec(codec, "Failed to write compressed frame"); } printf(keyframe ? "K" : "."); fflush(stdout); } } return got_pkts; } // TODO(tomfinegan): Improve command line parsing and add args for bitrate/fps. int main(int argc, char **argv) { FILE *infile = NULL; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_count = 0; vpx_image_t raw; vpx_codec_err_t res; VpxVideoInfo info = { 0, 0, 0, { 0, 0 } }; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; const int fps = 30; const int bitrate = 200; int keyframe_interval = 0; int max_frames = 0; int frames_encoded = 0; const char *codec_arg = NULL; const char *width_arg = NULL; const char *height_arg = NULL; const char *infile_arg = NULL; const char *outfile_arg = NULL; const char *keyframe_interval_arg = NULL; exec_name = argv[0]; if (argc != 9) die("Invalid number of arguments"); codec_arg = argv[1]; width_arg = argv[2]; height_arg = argv[3]; infile_arg = argv[4]; outfile_arg = argv[5]; keyframe_interval_arg = argv[6]; max_frames = (int)strtol(argv[8], NULL, 0); encoder = get_vpx_encoder_by_name(codec_arg); if (!encoder) die("Unsupported codec."); info.codec_fourcc = encoder->fourcc; info.frame_width = (int)strtol(width_arg, NULL, 0); info.frame_height = (int)strtol(height_arg, NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; if (info.frame_width <= 0 || info.frame_height <= 0 || (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); } if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, info.frame_height, 1)) { die("Failed to allocate image."); } keyframe_interval = (int)strtol(keyframe_interval_arg, NULL, 0); if (keyframe_interval < 0) die("Invalid keyframe interval value."); printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&codec, "Failed to get default codec config."); cfg.g_w = info.frame_width; cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = bitrate; cfg.g_error_resilient = (vpx_codec_er_flags_t)strtoul(argv[7], NULL, 0); writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); if (!writer) die("Failed to open %s for writing.", outfile_arg); if (!(infile = fopen(infile_arg, "rb"))) die("Failed to open %s for reading.", infile_arg); if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); // Encode frames. while (vpx_img_read(&raw, infile)) { int flags = 0; if (keyframe_interval > 0 && frame_count % keyframe_interval == 0) flags |= VPX_EFLAG_FORCE_KF; encode_frame(&codec, &raw, frame_count++, flags, writer); frames_encoded++; if (max_frames > 0 && frames_encoded >= max_frames) break; } // Flush encoder. while (encode_frame(&codec, NULL, -1, 0, writer)) { } printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_count); vpx_img_free(&raw); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_writer_close(writer); return EXIT_SUCCESS; } libvpx-1.8.2/examples/svc_context.h000066400000000000000000000063471357355204000173720ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /** * SvcContext - input parameters and state to encode a multi-layered * spatial SVC frame */ #ifndef VPX_EXAMPLES_SVC_CONTEXT_H_ #define VPX_EXAMPLES_SVC_CONTEXT_H_ #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { #endif typedef enum SVC_LOG_LEVEL { SVC_LOG_ERROR, SVC_LOG_INFO, SVC_LOG_DEBUG } SVC_LOG_LEVEL; typedef struct { // public interface to svc_command options int spatial_layers; // number of spatial layers int temporal_layers; // number of temporal layers int temporal_layering_mode; SVC_LOG_LEVEL log_level; // amount of information to display int output_rc_stat; // for outputting rc stats int speed; // speed setting for codec int threads; int aqmode; // turns on aq-mode=3 (cyclic_refresh): 0=off, 1=on. // private storage for vpx_svc_encode void *internal; } SvcContext; #define OPTION_BUFFER_SIZE 1024 #define COMPONENTS 4 // psnr & sse statistics maintained for total, y, u, v typedef struct SvcInternal { char options[OPTION_BUFFER_SIZE]; // set by vpx_svc_set_options // values extracted from option, quantizers vpx_svc_extra_cfg_t svc_params; int enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; int bitrates[VPX_MAX_LAYERS]; // accumulated statistics double psnr_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; // total/Y/U/V uint64_t sse_sum[VPX_SS_MAX_LAYERS][COMPONENTS]; uint32_t bytes_sum[VPX_SS_MAX_LAYERS]; // codec encoding values int width; // width of highest layer int height; // height of highest layer int kf_dist; // distance between keyframes // state variables int psnr_pkt_received; int layer; int use_multiple_frame_contexts; vpx_codec_ctx_t *codec_ctx; } SvcInternal_t; /** * Set SVC options * options are supplied as a single string separated by spaces * Format: encoding-mode= * layers= * scaling-factors=/,/,... * quantizers=,,... */ vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options); /** * initialize SVC encoding */ vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg); /** * encode a frame of video with multiple layers */ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, struct vpx_image *rawimg, vpx_codec_pts_t pts, int64_t duration, int deadline); /** * finished with svc encoding, release allocated resources */ void vpx_svc_release(SvcContext *svc_ctx); /** * dump accumulated statistics and reset accumulated values */ void vpx_svc_dump_statistics(SvcContext *svc_ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_EXAMPLES_SVC_CONTEXT_H_ libvpx-1.8.2/examples/svc_encodeframe.c000066400000000000000000000541311357355204000201430ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /** * @file * VP9 SVC encoding support via libvpx */ #include #include #include #include #include #include #include #define VPX_DISABLE_CTRL_TYPECHECKS 1 #include "./vpx_config.h" #include "./svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_onyxc_int.h" #ifdef __MINGW32__ #define strtok_r strtok_s #ifndef MINGW_HAS_SECURE_API // proto from /usr/x86_64-w64-mingw32/include/sec_api/string_s.h _CRTIMP char *__cdecl strtok_s(char *str, const char *delim, char **context); #endif /* MINGW_HAS_SECURE_API */ #endif /* __MINGW32__ */ #ifdef _MSC_VER #define strdup _strdup #define strtok_r strtok_s #endif #define SVC_REFERENCE_FRAMES 8 #define SUPERFRAME_SLOTS (8) #define SUPERFRAME_BUFFER_SIZE (SUPERFRAME_SLOTS * sizeof(uint32_t) + 2) #define MAX_QUANTIZER 63 static const int DEFAULT_SCALE_FACTORS_NUM[VPX_SS_MAX_LAYERS] = { 4, 5, 7, 11, 16 }; static const int DEFAULT_SCALE_FACTORS_DEN[VPX_SS_MAX_LAYERS] = { 16, 16, 16, 16, 16 }; static const int DEFAULT_SCALE_FACTORS_NUM_2x[VPX_SS_MAX_LAYERS] = { 1, 2, 4 }; static const int DEFAULT_SCALE_FACTORS_DEN_2x[VPX_SS_MAX_LAYERS] = { 4, 4, 4 }; typedef enum { QUANTIZER = 0, BITRATE, SCALE_FACTOR, AUTO_ALT_REF, ALL_OPTION_TYPES } LAYER_OPTION_TYPE; static const int option_max_values[ALL_OPTION_TYPES] = { 63, INT_MAX, INT_MAX, 1 }; static const int option_min_values[ALL_OPTION_TYPES] = { 0, 0, 1, 0 }; // One encoded frame typedef struct FrameData { void *buf; // compressed data buffer size_t size; // length of compressed data vpx_codec_frame_flags_t flags; /**< flags for this frame */ struct FrameData *next; } FrameData; static SvcInternal_t *get_svc_internal(SvcContext *svc_ctx) { if (svc_ctx == NULL) return NULL; if (svc_ctx->internal == NULL) { SvcInternal_t *const si = (SvcInternal_t *)malloc(sizeof(*si)); if (si != NULL) { memset(si, 0, sizeof(*si)); } svc_ctx->internal = si; } return (SvcInternal_t *)svc_ctx->internal; } static const SvcInternal_t *get_const_svc_internal(const SvcContext *svc_ctx) { if (svc_ctx == NULL) return NULL; return (const SvcInternal_t *)svc_ctx->internal; } static int svc_log(SvcContext *svc_ctx, SVC_LOG_LEVEL level, const char *fmt, ...) { char buf[512]; int retval = 0; va_list ap; if (level > svc_ctx->log_level) { return retval; } va_start(ap, fmt); retval = vsnprintf(buf, sizeof(buf), fmt, ap); va_end(ap); printf("%s", buf); return retval; } static vpx_codec_err_t extract_option(LAYER_OPTION_TYPE type, char *input, int *value0, int *value1) { if (type == SCALE_FACTOR) { *value0 = (int)strtol(input, &input, 10); if (*input++ != '/') return VPX_CODEC_INVALID_PARAM; *value1 = (int)strtol(input, &input, 10); if (*value0 < option_min_values[SCALE_FACTOR] || *value1 < option_min_values[SCALE_FACTOR] || *value0 > option_max_values[SCALE_FACTOR] || *value1 > option_max_values[SCALE_FACTOR] || *value0 > *value1) // num shouldn't be greater than den return VPX_CODEC_INVALID_PARAM; } else { *value0 = atoi(input); if (*value0 < option_min_values[type] || *value0 > option_max_values[type]) return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } static vpx_codec_err_t parse_layer_options_from_string(SvcContext *svc_ctx, LAYER_OPTION_TYPE type, const char *input, int *option0, int *option1) { int i; vpx_codec_err_t res = VPX_CODEC_OK; char *input_string; char *token; const char *delim = ","; char *save_ptr; int num_layers = svc_ctx->spatial_layers; if (type == BITRATE) num_layers = svc_ctx->spatial_layers * svc_ctx->temporal_layers; if (input == NULL || option0 == NULL || (option1 == NULL && type == SCALE_FACTOR)) return VPX_CODEC_INVALID_PARAM; input_string = strdup(input); if (input_string == NULL) return VPX_CODEC_MEM_ERROR; token = strtok_r(input_string, delim, &save_ptr); for (i = 0; i < num_layers; ++i) { if (token != NULL) { res = extract_option(type, token, option0 + i, option1 + i); if (res != VPX_CODEC_OK) break; token = strtok_r(NULL, delim, &save_ptr); } else { break; } } if (res == VPX_CODEC_OK && i != num_layers) { svc_log(svc_ctx, SVC_LOG_ERROR, "svc: layer params type: %d %d values required, " "but only %d specified\n", type, num_layers, i); res = VPX_CODEC_INVALID_PARAM; } free(input_string); return res; } /** * Parse SVC encoding options * Format: encoding-mode=,layers= * scale-factors=/,/,... * quantizers=,,... * svc_mode = [i|ip|alt_ip|gf] */ static vpx_codec_err_t parse_options(SvcContext *svc_ctx, const char *options) { char *input_string; char *option_name; char *option_value; char *input_ptr = NULL; SvcInternal_t *const si = get_svc_internal(svc_ctx); vpx_codec_err_t res = VPX_CODEC_OK; int i, alt_ref_enabled = 0; if (options == NULL) return VPX_CODEC_OK; input_string = strdup(options); if (input_string == NULL) return VPX_CODEC_MEM_ERROR; // parse option name option_name = strtok_r(input_string, "=", &input_ptr); while (option_name != NULL) { // parse option value option_value = strtok_r(NULL, " ", &input_ptr); if (option_value == NULL) { svc_log(svc_ctx, SVC_LOG_ERROR, "option missing value: %s\n", option_name); res = VPX_CODEC_INVALID_PARAM; break; } if (strcmp("spatial-layers", option_name) == 0) { svc_ctx->spatial_layers = atoi(option_value); } else if (strcmp("temporal-layers", option_name) == 0) { svc_ctx->temporal_layers = atoi(option_value); } else if (strcmp("scale-factors", option_name) == 0) { res = parse_layer_options_from_string(svc_ctx, SCALE_FACTOR, option_value, si->svc_params.scaling_factor_num, si->svc_params.scaling_factor_den); if (res != VPX_CODEC_OK) break; } else if (strcmp("max-quantizers", option_name) == 0) { res = parse_layer_options_from_string(svc_ctx, QUANTIZER, option_value, si->svc_params.max_quantizers, NULL); if (res != VPX_CODEC_OK) break; } else if (strcmp("min-quantizers", option_name) == 0) { res = parse_layer_options_from_string(svc_ctx, QUANTIZER, option_value, si->svc_params.min_quantizers, NULL); if (res != VPX_CODEC_OK) break; } else if (strcmp("auto-alt-refs", option_name) == 0) { res = parse_layer_options_from_string(svc_ctx, AUTO_ALT_REF, option_value, si->enable_auto_alt_ref, NULL); if (res != VPX_CODEC_OK) break; } else if (strcmp("bitrates", option_name) == 0) { res = parse_layer_options_from_string(svc_ctx, BITRATE, option_value, si->bitrates, NULL); if (res != VPX_CODEC_OK) break; } else if (strcmp("multi-frame-contexts", option_name) == 0) { si->use_multiple_frame_contexts = atoi(option_value); } else { svc_log(svc_ctx, SVC_LOG_ERROR, "invalid option: %s\n", option_name); res = VPX_CODEC_INVALID_PARAM; break; } option_name = strtok_r(NULL, "=", &input_ptr); } free(input_string); for (i = 0; i < svc_ctx->spatial_layers; ++i) { if (si->svc_params.max_quantizers[i] > MAX_QUANTIZER || si->svc_params.max_quantizers[i] < 0 || si->svc_params.min_quantizers[i] > si->svc_params.max_quantizers[i] || si->svc_params.min_quantizers[i] < 0) res = VPX_CODEC_INVALID_PARAM; } if (si->use_multiple_frame_contexts && (svc_ctx->spatial_layers > 3 || svc_ctx->spatial_layers * svc_ctx->temporal_layers > 4)) res = VPX_CODEC_INVALID_PARAM; for (i = 0; i < svc_ctx->spatial_layers; ++i) alt_ref_enabled += si->enable_auto_alt_ref[i]; if (alt_ref_enabled > REF_FRAMES - svc_ctx->spatial_layers) { svc_log(svc_ctx, SVC_LOG_ERROR, "svc: auto alt ref: Maxinum %d(REF_FRAMES - layers) layers could" "enabled auto alt reference frame, but % layers are enabled\n", REF_FRAMES - svc_ctx->spatial_layers, alt_ref_enabled); res = VPX_CODEC_INVALID_PARAM; } return res; } vpx_codec_err_t vpx_svc_set_options(SvcContext *svc_ctx, const char *options) { SvcInternal_t *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || options == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } strncpy(si->options, options, sizeof(si->options)); si->options[sizeof(si->options) - 1] = '\0'; return VPX_CODEC_OK; } static vpx_codec_err_t assign_layer_bitrates( const SvcContext *svc_ctx, vpx_codec_enc_cfg_t *const enc_cfg) { int i; const SvcInternal_t *const si = get_const_svc_internal(svc_ctx); int sl, tl, spatial_layer_target; if (svc_ctx->temporal_layering_mode != 0) { if (si->bitrates[0] != 0) { unsigned int total_bitrate = 0; for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { total_bitrate += si->bitrates[sl * svc_ctx->temporal_layers + svc_ctx->temporal_layers - 1]; for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { enc_cfg->ss_target_bitrate[sl * svc_ctx->temporal_layers] += (unsigned int)si->bitrates[sl * svc_ctx->temporal_layers + tl]; enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + tl] = si->bitrates[sl * svc_ctx->temporal_layers + tl]; if (tl > 0 && (si->bitrates[sl * svc_ctx->temporal_layers + tl] <= si->bitrates[sl * svc_ctx->temporal_layers + tl - 1])) return VPX_CODEC_INVALID_PARAM; } } if (total_bitrate != enc_cfg->rc_target_bitrate) return VPX_CODEC_INVALID_PARAM; } else { float total = 0; float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { if (si->svc_params.scaling_factor_den[sl] > 0) { alloc_ratio[sl] = (float)(pow(2, sl)); total += alloc_ratio[sl]; } } for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { enc_cfg->ss_target_bitrate[sl] = spatial_layer_target = (unsigned int)(enc_cfg->rc_target_bitrate * alloc_ratio[sl] / total); if (svc_ctx->temporal_layering_mode == 3) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = (spatial_layer_target * 6) / 10; // 60% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = (spatial_layer_target * 8) / 10; // 80% enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 2] = spatial_layer_target; } else if (svc_ctx->temporal_layering_mode == 2 || svc_ctx->temporal_layering_mode == 1) { enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers] = spatial_layer_target * 2 / 3; enc_cfg->layer_target_bitrate[sl * svc_ctx->temporal_layers + 1] = spatial_layer_target; } else { // User should explicitly assign bitrates in this case. assert(0); } } } } else { if (si->bitrates[0] != 0) { unsigned int total_bitrate = 0; for (i = 0; i < svc_ctx->spatial_layers; ++i) { enc_cfg->ss_target_bitrate[i] = (unsigned int)si->bitrates[i]; enc_cfg->layer_target_bitrate[i] = (unsigned int)si->bitrates[i]; total_bitrate += si->bitrates[i]; } if (total_bitrate != enc_cfg->rc_target_bitrate) return VPX_CODEC_INVALID_PARAM; } else { float total = 0; float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; for (i = 0; i < svc_ctx->spatial_layers; ++i) { if (si->svc_params.scaling_factor_den[i] > 0) { alloc_ratio[i] = (float)(si->svc_params.scaling_factor_num[i] * 1.0 / si->svc_params.scaling_factor_den[i]); alloc_ratio[i] *= alloc_ratio[i]; total += alloc_ratio[i]; } } for (i = 0; i < VPX_SS_MAX_LAYERS; ++i) { if (total > 0) { enc_cfg->layer_target_bitrate[i] = (unsigned int)(enc_cfg->rc_target_bitrate * alloc_ratio[i] / total); } } } } return VPX_CODEC_OK; } vpx_codec_err_t vpx_svc_init(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *enc_cfg) { vpx_codec_err_t res; int i, sl, tl; SvcInternal_t *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || codec_ctx == NULL || iface == NULL || enc_cfg == NULL) { return VPX_CODEC_INVALID_PARAM; } if (si == NULL) return VPX_CODEC_MEM_ERROR; si->codec_ctx = codec_ctx; si->width = enc_cfg->g_w; si->height = enc_cfg->g_h; si->kf_dist = enc_cfg->kf_max_dist; if (svc_ctx->spatial_layers == 0) svc_ctx->spatial_layers = VPX_SS_DEFAULT_LAYERS; if (svc_ctx->spatial_layers < 1 || svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) { svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers: invalid value: %d\n", svc_ctx->spatial_layers); return VPX_CODEC_INVALID_PARAM; } // Note: temporal_layering_mode only applies to one-pass CBR // si->svc_params.temporal_layering_mode = svc_ctx->temporal_layering_mode; if (svc_ctx->temporal_layering_mode == 3) { svc_ctx->temporal_layers = 3; } else if (svc_ctx->temporal_layering_mode == 2 || svc_ctx->temporal_layering_mode == 1) { svc_ctx->temporal_layers = 2; } for (sl = 0; sl < VPX_SS_MAX_LAYERS; ++sl) { si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM[sl]; si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN[sl]; si->svc_params.speed_per_layer[sl] = svc_ctx->speed; } if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS && svc_ctx->spatial_layers <= 3) { for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { int sl2 = (svc_ctx->spatial_layers == 2) ? sl + 1 : sl; si->svc_params.scaling_factor_num[sl] = DEFAULT_SCALE_FACTORS_NUM_2x[sl2]; si->svc_params.scaling_factor_den[sl] = DEFAULT_SCALE_FACTORS_DEN_2x[sl2]; } if (svc_ctx->spatial_layers == 1) { si->svc_params.scaling_factor_num[0] = 1; si->svc_params.scaling_factor_den[0] = 1; } } for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { i = sl * svc_ctx->temporal_layers + tl; si->svc_params.max_quantizers[i] = MAX_QUANTIZER; si->svc_params.min_quantizers[i] = 0; if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS) { si->svc_params.max_quantizers[i] = 56; si->svc_params.min_quantizers[i] = 2; } } } // Parse aggregate command line options. Options must start with // "layers=xx" then followed by other options res = parse_options(svc_ctx, si->options); if (res != VPX_CODEC_OK) return res; if (svc_ctx->spatial_layers < 1) svc_ctx->spatial_layers = 1; if (svc_ctx->spatial_layers > VPX_SS_MAX_LAYERS) svc_ctx->spatial_layers = VPX_SS_MAX_LAYERS; if (svc_ctx->temporal_layers < 1) svc_ctx->temporal_layers = 1; if (svc_ctx->temporal_layers > VPX_TS_MAX_LAYERS) svc_ctx->temporal_layers = VPX_TS_MAX_LAYERS; if (svc_ctx->temporal_layers * svc_ctx->spatial_layers > VPX_MAX_LAYERS) { svc_log(svc_ctx, SVC_LOG_ERROR, "spatial layers * temporal layers exceeds the maximum number of " "allowed layers of %d\n", svc_ctx->spatial_layers * svc_ctx->temporal_layers, VPX_MAX_LAYERS); return VPX_CODEC_INVALID_PARAM; } res = assign_layer_bitrates(svc_ctx, enc_cfg); if (res != VPX_CODEC_OK) { svc_log(svc_ctx, SVC_LOG_ERROR, "layer bitrates incorrect: \n" "1) spatial layer bitrates should sum up to target \n" "2) temporal layer bitrates should be increasing within \n" "a spatial layer \n"); return VPX_CODEC_INVALID_PARAM; } if (svc_ctx->temporal_layers > 1) { int i; for (i = 0; i < svc_ctx->temporal_layers; ++i) { enc_cfg->ts_target_bitrate[i] = enc_cfg->rc_target_bitrate / svc_ctx->temporal_layers; enc_cfg->ts_rate_decimator[i] = 1 << (svc_ctx->temporal_layers - 1 - i); } } if (svc_ctx->threads) enc_cfg->g_threads = svc_ctx->threads; // Modify encoder configuration enc_cfg->ss_number_layers = svc_ctx->spatial_layers; enc_cfg->ts_number_layers = svc_ctx->temporal_layers; if (enc_cfg->rc_end_usage == VPX_CBR) { enc_cfg->rc_resize_allowed = 0; enc_cfg->rc_min_quantizer = 2; enc_cfg->rc_max_quantizer = 56; enc_cfg->rc_undershoot_pct = 50; enc_cfg->rc_overshoot_pct = 50; enc_cfg->rc_buf_initial_sz = 500; enc_cfg->rc_buf_optimal_sz = 600; enc_cfg->rc_buf_sz = 1000; } for (tl = 0; tl < svc_ctx->temporal_layers; ++tl) { for (sl = 0; sl < svc_ctx->spatial_layers; ++sl) { i = sl * svc_ctx->temporal_layers + tl; if (enc_cfg->rc_end_usage == VPX_CBR && enc_cfg->g_pass == VPX_RC_ONE_PASS) { si->svc_params.max_quantizers[i] = enc_cfg->rc_max_quantizer; si->svc_params.min_quantizers[i] = enc_cfg->rc_min_quantizer; } } } if (enc_cfg->g_error_resilient == 0 && si->use_multiple_frame_contexts == 0) enc_cfg->g_error_resilient = 1; // Initialize codec res = vpx_codec_enc_init(codec_ctx, iface, enc_cfg, VPX_CODEC_USE_PSNR); if (res != VPX_CODEC_OK) { svc_log(svc_ctx, SVC_LOG_ERROR, "svc_enc_init error\n"); return res; } if (svc_ctx->spatial_layers > 1 || svc_ctx->temporal_layers > 1) { vpx_codec_control(codec_ctx, VP9E_SET_SVC, 1); vpx_codec_control(codec_ctx, VP9E_SET_SVC_PARAMETERS, &si->svc_params); } return VPX_CODEC_OK; } /** * Encode a frame into multiple layers * Create a superframe containing the individual layers */ vpx_codec_err_t vpx_svc_encode(SvcContext *svc_ctx, vpx_codec_ctx_t *codec_ctx, struct vpx_image *rawimg, vpx_codec_pts_t pts, int64_t duration, int deadline) { vpx_codec_err_t res; vpx_codec_iter_t iter; const vpx_codec_cx_pkt_t *cx_pkt; SvcInternal_t *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || codec_ctx == NULL || si == NULL) { return VPX_CODEC_INVALID_PARAM; } res = vpx_codec_encode(codec_ctx, rawimg, pts, (uint32_t)duration, 0, deadline); if (res != VPX_CODEC_OK) { return res; } // save compressed data iter = NULL; while ((cx_pkt = vpx_codec_get_cx_data(codec_ctx, &iter))) { switch (cx_pkt->kind) { case VPX_CODEC_PSNR_PKT: { } ++si->psnr_pkt_received; break; default: { break; } } } return VPX_CODEC_OK; } static double calc_psnr(double d) { if (d == 0) return 100; return -10.0 * log(d) / log(10.0); } // dump accumulated statistics and reset accumulated values void vpx_svc_dump_statistics(SvcContext *svc_ctx) { int number_of_frames; int i, j; uint32_t bytes_total = 0; double scale[COMPONENTS]; double psnr[COMPONENTS]; double mse[COMPONENTS]; double y_scale; SvcInternal_t *const si = get_svc_internal(svc_ctx); if (svc_ctx == NULL || si == NULL) return; number_of_frames = si->psnr_pkt_received; if (number_of_frames <= 0) return; svc_log(svc_ctx, SVC_LOG_INFO, "\n"); for (i = 0; i < svc_ctx->spatial_layers; ++i) { svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d Average PSNR=[%2.3f, %2.3f, %2.3f, %2.3f], Bytes=[%u]\n", i, si->psnr_sum[i][0] / number_of_frames, si->psnr_sum[i][1] / number_of_frames, si->psnr_sum[i][2] / number_of_frames, si->psnr_sum[i][3] / number_of_frames, si->bytes_sum[i]); // the following psnr calculation is deduced from ffmpeg.c#print_report y_scale = si->width * si->height * 255.0 * 255.0 * number_of_frames; scale[1] = y_scale; scale[2] = scale[3] = y_scale / 4; // U or V scale[0] = y_scale * 1.5; // total for (j = 0; j < COMPONENTS; j++) { psnr[j] = calc_psnr(si->sse_sum[i][j] / scale[j]); mse[j] = si->sse_sum[i][j] * 255.0 * 255.0 / scale[j]; } svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d Overall PSNR=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, psnr[0], psnr[1], psnr[2], psnr[3]); svc_log(svc_ctx, SVC_LOG_INFO, "Layer %d Overall MSE=[%2.3f, %2.3f, %2.3f, %2.3f]\n", i, mse[0], mse[1], mse[2], mse[3]); bytes_total += si->bytes_sum[i]; // Clear sums for next time. si->bytes_sum[i] = 0; for (j = 0; j < COMPONENTS; ++j) { si->psnr_sum[i][j] = 0; si->sse_sum[i][j] = 0; } } // only display statistics once si->psnr_pkt_received = 0; svc_log(svc_ctx, SVC_LOG_INFO, "Total Bytes=[%u]\n", bytes_total); } void vpx_svc_release(SvcContext *svc_ctx) { SvcInternal_t *si; if (svc_ctx == NULL) return; // do not use get_svc_internal as it will unnecessarily allocate an // SvcInternal_t if it was not already allocated si = (SvcInternal_t *)svc_ctx->internal; if (si != NULL) { free(si); svc_ctx->internal = NULL; } } libvpx-1.8.2/examples/twopass_encoder.c000066400000000000000000000202261357355204000202150ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Two Pass Encoder // ================ // // This is an example of a two pass encoder loop. It takes an input file in // YV12 format, passes it through the encoder twice, and writes the compressed // frames to disk in IVF format. It builds upon the simple_encoder example. // // Twopass Variables // ----------------- // Twopass mode needs to track the current pass number and the buffer of // statistics packets. // // Updating The Configuration // --------------------------------- // In two pass mode, the configuration has to be updated on each pass. The // statistics buffer is passed on the last pass. // // Encoding A Frame // ---------------- // Encoding a frame in two pass mode is identical to the simple encoder // example. To increase the quality while sacrificing encoding speed, // VPX_DL_BEST_QUALITY can be used in place of VPX_DL_GOOD_QUALITY. // // Processing Statistics Packets // ----------------------------- // Each packet of type `VPX_CODEC_CX_FRAME_PKT` contains the encoded data // for this frame. We write a IVF frame header, followed by the raw data. // // // Pass Progress Reporting // ----------------------------- // It's sometimes helpful to see when each pass completes. // // // Clean-up // ----------------------------- // Destruction of the encoder instance must be done on each pass. The // raw image should be destroyed at the end as usual. #include #include #include #include "vpx/vpx_encoder.h" #include "../tools_common.h" #include "../video_writer.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s " "\n", exec_name); exit(EXIT_FAILURE); } static int get_frame_stats(vpx_codec_ctx_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned int duration, vpx_enc_frame_flags_t flags, unsigned int deadline, vpx_fixed_buf_t *stats) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags, deadline); if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to get frame stats."); while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_STATS_PKT) { const uint8_t *const pkt_buf = pkt->data.twopass_stats.buf; const size_t pkt_size = pkt->data.twopass_stats.sz; stats->buf = realloc(stats->buf, stats->sz + pkt_size); memcpy((uint8_t *)stats->buf + stats->sz, pkt_buf, pkt_size); stats->sz += pkt_size; } } return got_pkts; } static int encode_frame(vpx_codec_ctx_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned int duration, vpx_enc_frame_flags_t flags, unsigned int deadline, VpxVideoWriter *writer) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(ctx, img, pts, duration, flags, deadline); if (res != VPX_CODEC_OK) die_codec(ctx, "Failed to encode frame."); while ((pkt = vpx_codec_get_cx_data(ctx, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) die_codec(ctx, "Failed to write compressed frame."); printf(keyframe ? "K" : "."); fflush(stdout); } } return got_pkts; } static vpx_fixed_buf_t pass0(vpx_image_t *raw, FILE *infile, const VpxInterface *encoder, const vpx_codec_enc_cfg_t *cfg, int max_frames) { vpx_codec_ctx_t codec; int frame_count = 0; vpx_fixed_buf_t stats = { NULL, 0 }; if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); // Calculate frame statistics. while (vpx_img_read(raw, infile)) { ++frame_count; get_frame_stats(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, &stats); if (max_frames > 0 && frame_count >= max_frames) break; } // Flush encoder. while (get_frame_stats(&codec, NULL, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, &stats)) { } printf("Pass 0 complete. Processed %d frames.\n", frame_count); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); return stats; } static void pass1(vpx_image_t *raw, FILE *infile, const char *outfile_name, const VpxInterface *encoder, const vpx_codec_enc_cfg_t *cfg, int max_frames) { VpxVideoInfo info = { encoder->fourcc, cfg->g_w, cfg->g_h, { cfg->g_timebase.num, cfg->g_timebase.den } }; VpxVideoWriter *writer = NULL; vpx_codec_ctx_t codec; int frame_count = 0; writer = vpx_video_writer_open(outfile_name, kContainerIVF, &info); if (!writer) die("Failed to open %s for writing", outfile_name); if (vpx_codec_enc_init(&codec, encoder->codec_interface(), cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); // Encode frames. while (vpx_img_read(raw, infile)) { ++frame_count; encode_frame(&codec, raw, frame_count, 1, 0, VPX_DL_GOOD_QUALITY, writer); if (max_frames > 0 && frame_count >= max_frames) break; } // Flush encoder. while (encode_frame(&codec, NULL, -1, 1, 0, VPX_DL_GOOD_QUALITY, writer)) { } printf("\n"); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_writer_close(writer); printf("Pass 1 complete. Processed %d frames.\n", frame_count); } int main(int argc, char **argv) { FILE *infile = NULL; int w, h; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; vpx_image_t raw; vpx_codec_err_t res; vpx_fixed_buf_t stats; const VpxInterface *encoder = NULL; const int fps = 30; // TODO(dkovalev) add command line argument const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument const char *const codec_arg = argv[1]; const char *const width_arg = argv[2]; const char *const height_arg = argv[3]; const char *const infile_arg = argv[4]; const char *const outfile_arg = argv[5]; int max_frames = 0; exec_name = argv[0]; if (argc != 7) die("Invalid number of arguments."); max_frames = (int)strtol(argv[6], NULL, 0); encoder = get_vpx_encoder_by_name(codec_arg); if (!encoder) die("Unsupported codec."); w = (int)strtol(width_arg, NULL, 0); h = (int)strtol(height_arg, NULL, 0); if (w <= 0 || h <= 0 || (w % 2) != 0 || (h % 2) != 0) die("Invalid frame size: %dx%d", w, h); if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, w, h, 1)) die("Failed to allocate image", w, h); printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); // Configuration res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&codec, "Failed to get default codec config."); cfg.g_w = w; cfg.g_h = h; cfg.g_timebase.num = 1; cfg.g_timebase.den = fps; cfg.rc_target_bitrate = bitrate; if (!(infile = fopen(infile_arg, "rb"))) die("Failed to open %s for reading", infile_arg); // Pass 0 cfg.g_pass = VPX_RC_FIRST_PASS; stats = pass0(&raw, infile, encoder, &cfg, max_frames); // Pass 1 rewind(infile); cfg.g_pass = VPX_RC_LAST_PASS; cfg.rc_twopass_stats_in = stats; pass1(&raw, infile, outfile_arg, encoder, &cfg, max_frames); free(stats.buf); vpx_img_free(&raw); fclose(infile); return EXIT_SUCCESS; } libvpx-1.8.2/examples/vp8_multi_resolution_encoder.c000066400000000000000000000551611357355204000227350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* * This is an example demonstrating multi-resolution encoding in VP8. * High-resolution input video is down-sampled to lower-resolutions. The * encoder then encodes the video and outputs multiple bitstreams with * different resolutions. * * This test also allows for settings temporal layers for each spatial layer. * Different number of temporal layers per spatial stream may be used. * Currently up to 3 temporal layers per spatial stream (encoder) are supported * in this test. */ #include "./vpx_config.h" #include #include #include #include #include #include #include #include "vpx_ports/vpx_timer.h" #include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" #include "vpx_ports/mem_ops.h" #include "../tools_common.h" #define interface (vpx_codec_vp8_cx()) #define fourcc 0x30385056 void usage_exit(void) { exit(EXIT_FAILURE); } /* * The input video frame is downsampled several times to generate a multi-level * hierarchical structure. NUM_ENCODERS is defined as the number of encoding * levels required. For example, if the size of input video is 1280x720, * NUM_ENCODERS is 3, and down-sampling factor is 2, the encoder outputs 3 * bitstreams with resolution of 1280x720(level 0), 640x360(level 1), and * 320x180(level 2) respectively. */ /* Number of encoders (spatial resolutions) used in this test. */ #define NUM_ENCODERS 3 /* Maximum number of temporal layers allowed for this test. */ #define MAX_NUM_TEMPORAL_LAYERS 3 /* This example uses the scaler function in libyuv. */ #include "third_party/libyuv/include/libyuv/basic_types.h" #include "third_party/libyuv/include/libyuv/scale.h" #include "third_party/libyuv/include/libyuv/cpu_id.h" int (*read_frame_p)(FILE *f, vpx_image_t *img); static int mulres_read_frame(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; to_read = img->w * img->h * 3 / 2; nbytes = fread(img->planes[0], 1, to_read, f); if (nbytes != to_read) { res = 0; if (nbytes > 0) printf("Warning: Read partial frame. Check your width & height!\n"); } return res; } static int mulres_read_frame_by_row(FILE *f, vpx_image_t *img) { size_t nbytes, to_read; int res = 1; int plane; for (plane = 0; plane < 3; plane++) { unsigned char *ptr; int w = (plane ? (1 + img->d_w) / 2 : img->d_w); int h = (plane ? (1 + img->d_h) / 2 : img->d_h); int r; /* Determine the correct plane based on the image format. The for-loop * always counts in Y,U,V order, but this may not match the order of * the data on disk. */ switch (plane) { case 1: ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U]; break; case 2: ptr = img->planes[img->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V]; break; default: ptr = img->planes[plane]; } for (r = 0; r < h; r++) { to_read = w; nbytes = fread(ptr, 1, to_read, f); if (nbytes != to_read) { res = 0; if (nbytes > 0) printf("Warning: Read partial frame. Check your width & height!\n"); break; } ptr += img->stride[plane]; } if (!res) break; } return res; } static void write_ivf_file_header(FILE *outfile, const vpx_codec_enc_cfg_t *cfg, int frame_cnt) { char header[32]; if (cfg->g_pass != VPX_RC_ONE_PASS && cfg->g_pass != VPX_RC_LAST_PASS) return; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; mem_put_le16(header + 4, 0); /* version */ mem_put_le16(header + 6, 32); /* headersize */ mem_put_le32(header + 8, fourcc); /* headersize */ mem_put_le16(header + 12, cfg->g_w); /* width */ mem_put_le16(header + 14, cfg->g_h); /* height */ mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ mem_put_le32(header + 24, frame_cnt); /* length */ mem_put_le32(header + 28, 0); /* unused */ (void)fwrite(header, 1, 32, outfile); } static void write_ivf_frame_header(FILE *outfile, const vpx_codec_cx_pkt_t *pkt) { char header[12]; vpx_codec_pts_t pts; if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; pts = pkt->data.frame.pts; mem_put_le32(header, (int)pkt->data.frame.sz); mem_put_le32(header + 4, pts & 0xFFFFFFFF); mem_put_le32(header + 8, pts >> 32); (void)fwrite(header, 1, 12, outfile); } /* Temporal scaling parameters */ /* This sets all the temporal layer parameters given |num_temporal_layers|, * including the target bit allocation across temporal layers. Bit allocation * parameters will be passed in as user parameters in another version. */ static void set_temporal_layer_pattern(int num_temporal_layers, vpx_codec_enc_cfg_t *cfg, int bitrate, int *layer_flags) { assert(num_temporal_layers <= MAX_NUM_TEMPORAL_LAYERS); switch (num_temporal_layers) { case 1: { /* 1-layer */ cfg->ts_number_layers = 1; cfg->ts_periodicity = 1; cfg->ts_rate_decimator[0] = 1; cfg->ts_layer_id[0] = 0; cfg->ts_target_bitrate[0] = bitrate; // Update L only. layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; } case 2: { /* 2-layers, with sync point at first frame of layer 1. */ cfg->ts_number_layers = 2; cfg->ts_periodicity = 2; cfg->ts_rate_decimator[0] = 2; cfg->ts_rate_decimator[1] = 1; cfg->ts_layer_id[0] = 0; cfg->ts_layer_id[1] = 1; // Use 60/40 bit allocation as example. cfg->ts_target_bitrate[0] = (int)(0.6f * bitrate); cfg->ts_target_bitrate[1] = bitrate; /* 0=L, 1=GF */ // ARF is used as predictor for all frames, and is only updated on // key frame. Sync point every 8 frames. // Layer 0: predict from L and ARF, update L and G. layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF; // Layer 1: sync point: predict from L and ARF, and update G. layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; // Layer 0, predict from L and ARF, update L. layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; // Layer 1: predict from L, G and ARF, and update G. layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 0 layer_flags[4] = layer_flags[2]; // Layer 1 layer_flags[5] = layer_flags[3]; // Layer 0 layer_flags[6] = layer_flags[4]; // Layer 1 layer_flags[7] = layer_flags[5]; break; } case 3: default: { // 3-layers structure where ARF is used as predictor for all frames, // and is only updated on key frame. // Sync points for layer 1 and 2 every 8 frames. cfg->ts_number_layers = 3; cfg->ts_periodicity = 4; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; cfg->ts_layer_id[0] = 0; cfg->ts_layer_id[1] = 2; cfg->ts_layer_id[2] = 1; cfg->ts_layer_id[3] = 2; // Use 45/20/35 bit allocation as example. cfg->ts_target_bitrate[0] = (int)(0.45f * bitrate); cfg->ts_target_bitrate[1] = (int)(0.65f * bitrate); cfg->ts_target_bitrate[2] = bitrate; /* 0=L, 1=GF, 2=ARF */ // Layer 0: predict from L and ARF; update L and G. layer_flags[0] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; // Layer 2: sync point: predict from L and ARF; update none. layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 1: sync point: predict from L and ARF; update G. layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; // Layer 2: predict from L, G, ARF; update none. layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 0: predict from L and ARF; update L. layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; // Layer 2: predict from L, G, ARF; update none. layer_flags[5] = layer_flags[3]; // Layer 1: predict from L, G, ARF; update G. layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; // Layer 2: predict from L, G, ARF; update none. layer_flags[7] = layer_flags[3]; break; } } } /* The periodicity of the pattern given the number of temporal layers. */ static int periodicity_to_num_layers[MAX_NUM_TEMPORAL_LAYERS] = { 1, 8, 8 }; int main(int argc, char **argv) { FILE *infile, *outfile[NUM_ENCODERS]; FILE *downsampled_input[NUM_ENCODERS - 1]; char filename[50]; vpx_codec_ctx_t codec[NUM_ENCODERS]; vpx_codec_enc_cfg_t cfg[NUM_ENCODERS]; int frame_cnt = 0; vpx_image_t raw[NUM_ENCODERS]; vpx_codec_err_t res[NUM_ENCODERS]; int i; int width; int height; int length_frame; int frame_avail; int got_data; int flags = 0; int layer_id = 0; int layer_flags[VPX_TS_MAX_PERIODICITY * NUM_ENCODERS] = { 0 }; int flag_periodicity; /*Currently, only realtime mode is supported in multi-resolution encoding.*/ int arg_deadline = VPX_DL_REALTIME; /* Set show_psnr to 1/0 to show/not show PSNR. Choose show_psnr=0 if you don't need to know PSNR, which will skip PSNR calculation and save encoding time. */ int show_psnr = 0; int key_frame_insert = 0; uint64_t psnr_sse_total[NUM_ENCODERS] = { 0 }; uint64_t psnr_samples_total[NUM_ENCODERS] = { 0 }; double psnr_totals[NUM_ENCODERS][4] = { { 0, 0 } }; int psnr_count[NUM_ENCODERS] = { 0 }; int64_t cx_time = 0; /* Set the required target bitrates for each resolution level. * If target bitrate for highest-resolution level is set to 0, * (i.e. target_bitrate[0]=0), we skip encoding at that level. */ unsigned int target_bitrate[NUM_ENCODERS] = { 1000, 500, 100 }; /* Enter the frame rate of the input video */ int framerate = 30; /* Set down-sampling factor for each resolution level. dsf[0] controls down sampling from level 0 to level 1; dsf[1] controls down sampling from level 1 to level 2; dsf[2] is not used. */ vpx_rational_t dsf[NUM_ENCODERS] = { { 2, 1 }, { 2, 1 }, { 1, 1 } }; /* Set the number of temporal layers for each encoder/resolution level, * starting from highest resoln down to lowest resoln. */ unsigned int num_temporal_layers[NUM_ENCODERS] = { 3, 3, 3 }; if (argc != (7 + 3 * NUM_ENCODERS)) die("Usage: %s " " \n", argv[0]); printf("Using %s\n", vpx_codec_iface_name(interface)); width = (int)strtol(argv[1], NULL, 0); height = (int)strtol(argv[2], NULL, 0); framerate = (int)strtol(argv[3], NULL, 0); if (width < 16 || width % 2 || height < 16 || height % 2) die("Invalid resolution: %ldx%ld", width, height); /* Open input video file for encoding */ if (!(infile = fopen(argv[4], "rb"))) die("Failed to open %s for reading", argv[4]); /* Open output file for each encoder to output bitstreams */ for (i = 0; i < NUM_ENCODERS; i++) { if (!target_bitrate[i]) { outfile[i] = NULL; continue; } if (!(outfile[i] = fopen(argv[i + 5], "wb"))) die("Failed to open %s for writing", argv[i + 4]); } // Bitrates per spatial layer: overwrite default rates above. for (i = 0; i < NUM_ENCODERS; i++) { target_bitrate[i] = (int)strtol(argv[NUM_ENCODERS + 5 + i], NULL, 0); } // Temporal layers per spatial layers: overwrite default settings above. for (i = 0; i < NUM_ENCODERS; i++) { num_temporal_layers[i] = (int)strtol(argv[2 * NUM_ENCODERS + 5 + i], NULL, 0); if (num_temporal_layers[i] < 1 || num_temporal_layers[i] > 3) die("Invalid temporal layers: %d, Must be 1, 2, or 3. \n", num_temporal_layers); } /* Open file to write out each spatially downsampled input stream. */ for (i = 0; i < NUM_ENCODERS - 1; i++) { // Highest resoln is encoder 0. if (sprintf(filename, "ds%d.yuv", NUM_ENCODERS - i) < 0) { return EXIT_FAILURE; } downsampled_input[i] = fopen(filename, "wb"); } key_frame_insert = (int)strtol(argv[3 * NUM_ENCODERS + 5], NULL, 0); show_psnr = (int)strtol(argv[3 * NUM_ENCODERS + 6], NULL, 0); /* Populate default encoder configuration */ for (i = 0; i < NUM_ENCODERS; i++) { res[i] = vpx_codec_enc_config_default(interface, &cfg[i], 0); if (res[i]) { printf("Failed to get config: %s\n", vpx_codec_err_to_string(res[i])); return EXIT_FAILURE; } } /* * Update the default configuration according to needs of the application. */ /* Highest-resolution encoder settings */ cfg[0].g_w = width; cfg[0].g_h = height; cfg[0].rc_dropframe_thresh = 0; cfg[0].rc_end_usage = VPX_CBR; cfg[0].rc_resize_allowed = 0; cfg[0].rc_min_quantizer = 2; cfg[0].rc_max_quantizer = 56; cfg[0].rc_undershoot_pct = 100; cfg[0].rc_overshoot_pct = 15; cfg[0].rc_buf_initial_sz = 500; cfg[0].rc_buf_optimal_sz = 600; cfg[0].rc_buf_sz = 1000; cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ cfg[0].g_lag_in_frames = 0; /* Disable automatic keyframe placement */ /* Note: These 3 settings are copied to all levels. But, except the lowest * resolution level, all other levels are set to VPX_KF_DISABLED internally. */ cfg[0].kf_mode = VPX_KF_AUTO; cfg[0].kf_min_dist = 3000; cfg[0].kf_max_dist = 3000; cfg[0].rc_target_bitrate = target_bitrate[0]; /* Set target bitrate */ cfg[0].g_timebase.num = 1; /* Set fps */ cfg[0].g_timebase.den = framerate; /* Other-resolution encoder settings */ for (i = 1; i < NUM_ENCODERS; i++) { memcpy(&cfg[i], &cfg[0], sizeof(vpx_codec_enc_cfg_t)); cfg[i].rc_target_bitrate = target_bitrate[i]; /* Note: Width & height of other-resolution encoders are calculated * from the highest-resolution encoder's size and the corresponding * down_sampling_factor. */ { unsigned int iw = cfg[i - 1].g_w * dsf[i - 1].den + dsf[i - 1].num - 1; unsigned int ih = cfg[i - 1].g_h * dsf[i - 1].den + dsf[i - 1].num - 1; cfg[i].g_w = iw / dsf[i - 1].num; cfg[i].g_h = ih / dsf[i - 1].num; } /* Make width & height to be multiplier of 2. */ // Should support odd size ??? if ((cfg[i].g_w) % 2) cfg[i].g_w++; if ((cfg[i].g_h) % 2) cfg[i].g_h++; } // Set the number of threads per encode/spatial layer. // (1, 1, 1) means no encoder threading. cfg[0].g_threads = 1; cfg[1].g_threads = 1; cfg[2].g_threads = 1; /* Allocate image for each encoder */ for (i = 0; i < NUM_ENCODERS; i++) if (!vpx_img_alloc(&raw[i], VPX_IMG_FMT_I420, cfg[i].g_w, cfg[i].g_h, 32)) die("Failed to allocate image", cfg[i].g_w, cfg[i].g_h); if (raw[0].stride[VPX_PLANE_Y] == (int)raw[0].d_w) read_frame_p = mulres_read_frame; else read_frame_p = mulres_read_frame_by_row; for (i = 0; i < NUM_ENCODERS; i++) if (outfile[i]) write_ivf_file_header(outfile[i], &cfg[i], 0); /* Temporal layers settings */ for (i = 0; i < NUM_ENCODERS; i++) { set_temporal_layer_pattern(num_temporal_layers[i], &cfg[i], cfg[i].rc_target_bitrate, &layer_flags[i * VPX_TS_MAX_PERIODICITY]); } /* Initialize multi-encoder */ if (vpx_codec_enc_init_multi(&codec[0], interface, &cfg[0], NUM_ENCODERS, (show_psnr ? VPX_CODEC_USE_PSNR : 0), &dsf[0])) die_codec(&codec[0], "Failed to initialize encoder"); /* The extra encoding configuration parameters can be set as follows. */ /* Set encoding speed */ for (i = 0; i < NUM_ENCODERS; i++) { int speed = -6; /* Lower speed for the lowest resolution. */ if (i == NUM_ENCODERS - 1) speed = -4; if (vpx_codec_control(&codec[i], VP8E_SET_CPUUSED, speed)) die_codec(&codec[i], "Failed to set cpu_used"); } /* Set static threshold = 1 for all encoders */ for (i = 0; i < NUM_ENCODERS; i++) { if (vpx_codec_control(&codec[i], VP8E_SET_STATIC_THRESHOLD, 1)) die_codec(&codec[i], "Failed to set static threshold"); } /* Set NOISE_SENSITIVITY to do TEMPORAL_DENOISING */ /* Enable denoising for the highest-resolution encoder. */ if (vpx_codec_control(&codec[0], VP8E_SET_NOISE_SENSITIVITY, 1)) die_codec(&codec[0], "Failed to set noise_sensitivity"); if (vpx_codec_control(&codec[1], VP8E_SET_NOISE_SENSITIVITY, 1)) die_codec(&codec[1], "Failed to set noise_sensitivity"); for (i = 2; i < NUM_ENCODERS; i++) { if (vpx_codec_control(&codec[i], VP8E_SET_NOISE_SENSITIVITY, 0)) die_codec(&codec[i], "Failed to set noise_sensitivity"); } /* Set the number of token partitions */ for (i = 0; i < NUM_ENCODERS; i++) { if (vpx_codec_control(&codec[i], VP8E_SET_TOKEN_PARTITIONS, 1)) die_codec(&codec[i], "Failed to set static threshold"); } /* Set the max intra target bitrate */ for (i = 0; i < NUM_ENCODERS; i++) { unsigned int max_intra_size_pct = (int)(((double)cfg[0].rc_buf_optimal_sz * 0.5) * framerate / 10); if (vpx_codec_control(&codec[i], VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct)) die_codec(&codec[i], "Failed to set static threshold"); // printf("%d %d \n",i,max_intra_size_pct); } frame_avail = 1; got_data = 0; while (frame_avail || got_data) { struct vpx_usec_timer timer; vpx_codec_iter_t iter[NUM_ENCODERS] = { NULL }; const vpx_codec_cx_pkt_t *pkt[NUM_ENCODERS]; flags = 0; frame_avail = read_frame_p(infile, &raw[0]); if (frame_avail) { for (i = 1; i < NUM_ENCODERS; i++) { /*Scale the image down a number of times by downsampling factor*/ /* FilterMode 1 or 2 give better psnr than FilterMode 0. */ I420Scale( raw[i - 1].planes[VPX_PLANE_Y], raw[i - 1].stride[VPX_PLANE_Y], raw[i - 1].planes[VPX_PLANE_U], raw[i - 1].stride[VPX_PLANE_U], raw[i - 1].planes[VPX_PLANE_V], raw[i - 1].stride[VPX_PLANE_V], raw[i - 1].d_w, raw[i - 1].d_h, raw[i].planes[VPX_PLANE_Y], raw[i].stride[VPX_PLANE_Y], raw[i].planes[VPX_PLANE_U], raw[i].stride[VPX_PLANE_U], raw[i].planes[VPX_PLANE_V], raw[i].stride[VPX_PLANE_V], raw[i].d_w, raw[i].d_h, 1); /* Write out down-sampled input. */ length_frame = cfg[i].g_w * cfg[i].g_h * 3 / 2; if (fwrite(raw[i].planes[0], 1, length_frame, downsampled_input[NUM_ENCODERS - i - 1]) != (unsigned int)length_frame) { return EXIT_FAILURE; } } } /* Set the flags (reference and update) for all the encoders.*/ for (i = 0; i < NUM_ENCODERS; i++) { layer_id = cfg[i].ts_layer_id[frame_cnt % cfg[i].ts_periodicity]; flags = 0; flag_periodicity = periodicity_to_num_layers[num_temporal_layers[i] - 1]; flags = layer_flags[i * VPX_TS_MAX_PERIODICITY + frame_cnt % flag_periodicity]; // Key frame flag for first frame. if (frame_cnt == 0) { flags |= VPX_EFLAG_FORCE_KF; } if (frame_cnt > 0 && frame_cnt == key_frame_insert) { flags = VPX_EFLAG_FORCE_KF; } vpx_codec_control(&codec[i], VP8E_SET_FRAME_FLAGS, flags); vpx_codec_control(&codec[i], VP8E_SET_TEMPORAL_LAYER_ID, layer_id); } /* Encode each frame at multi-levels */ /* Note the flags must be set to 0 in the encode call if they are set for each frame with the vpx_codec_control(), as done above. */ vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec[0], frame_avail ? &raw[0] : NULL, frame_cnt, 1, 0, arg_deadline)) { die_codec(&codec[0], "Failed to encode frame"); } vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); for (i = NUM_ENCODERS - 1; i >= 0; i--) { got_data = 0; while ((pkt[i] = vpx_codec_get_cx_data(&codec[i], &iter[i]))) { got_data = 1; switch (pkt[i]->kind) { case VPX_CODEC_CX_FRAME_PKT: write_ivf_frame_header(outfile[i], pkt[i]); (void)fwrite(pkt[i]->data.frame.buf, 1, pkt[i]->data.frame.sz, outfile[i]); break; case VPX_CODEC_PSNR_PKT: if (show_psnr) { int j; psnr_sse_total[i] += pkt[i]->data.psnr.sse[0]; psnr_samples_total[i] += pkt[i]->data.psnr.samples[0]; for (j = 0; j < 4; j++) { psnr_totals[i][j] += pkt[i]->data.psnr.psnr[j]; } psnr_count[i]++; } break; default: break; } fflush(stdout); } } frame_cnt++; } printf("\n"); printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 1000000 * (double)frame_cnt / (double)cx_time); fclose(infile); printf("Processed %ld frames.\n", (long int)frame_cnt - 1); for (i = 0; i < NUM_ENCODERS; i++) { /* Calculate PSNR and print it out */ if ((show_psnr) && (psnr_count[i] > 0)) { int j; double ovpsnr = sse_to_psnr(psnr_samples_total[i], 255.0, psnr_sse_total[i]); fprintf(stderr, "\n ENC%d PSNR (Overall/Avg/Y/U/V)", i); fprintf(stderr, " %.3lf", ovpsnr); for (j = 0; j < 4; j++) { fprintf(stderr, " %.3lf", psnr_totals[i][j] / psnr_count[i]); } } if (vpx_codec_destroy(&codec[i])) die_codec(&codec[i], "Failed to destroy codec"); vpx_img_free(&raw[i]); if (!outfile[i]) continue; /* Try to rewrite the file header with the actual frame count */ if (!fseek(outfile[i], 0, SEEK_SET)) write_ivf_file_header(outfile[i], &cfg[i], frame_cnt - 1); fclose(outfile[i]); } return EXIT_SUCCESS; } libvpx-1.8.2/examples/vp8cx_set_ref.c000066400000000000000000000135001357355204000175720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // VP8 Set Reference Frame // ======================= // // This is an example demonstrating how to overwrite the VP8 encoder's // internal reference frame. In the sample we set the last frame to the // current frame. If this is done at a cut scene it will avoid a keyframe. // This technique could be used to bounce between two cameras. // // Note that the decoder would also have to set the reference frame to the // same value on the same frame, or the video will become corrupt. // // Usage // ----- // This example adds a single argument to the `simple_encoder` example, // which specifies the frame number to update the reference frame on. // The parameter is parsed as follows: // // // Extra Variables // --------------- // This example maintains the frame number passed on the command line // in the `update_frame_num` variable. // // // Configuration // ------------- // // The reference frame is updated on the frame specified on the command // line. // // Observing The Effects // --------------------- // Use the `simple_encoder` example to encode a sample with a cut scene. // Determine the frame number of the cut scene by looking for a generated // key-frame (indicated by a 'K'). Supply that frame number as an argument // to this example, and observe that no key-frame is generated. #include #include #include #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vp8/common/common.h" #include "../tools_common.h" #include "../video_writer.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, int frame_index, VpxVideoWriter *writer) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, 0, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) { die_codec(codec, "Failed to write compressed frame"); } printf(keyframe ? "K" : "."); fflush(stdout); } } return got_pkts; } int main(int argc, char **argv) { FILE *infile = NULL; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_count = 0; vpx_image_t raw; vpx_codec_err_t res; VpxVideoInfo info; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; int update_frame_num = 0; const int fps = 30; // TODO(dkovalev) add command line argument const int bitrate = 200; // kbit/s TODO(dkovalev) add command line argument vp8_zero(codec); vp8_zero(cfg); vp8_zero(info); exec_name = argv[0]; if (argc != 6) die("Invalid number of arguments"); // TODO(dkovalev): add vp9 support and rename the file accordingly encoder = get_vpx_encoder_by_name("vp8"); if (!encoder) die("Unsupported codec."); update_frame_num = atoi(argv[5]); if (!update_frame_num) die("Couldn't parse frame number '%s'\n", argv[5]); info.codec_fourcc = encoder->fourcc; info.frame_width = (int)strtol(argv[1], NULL, 0); info.frame_height = (int)strtol(argv[2], NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; if (info.frame_width <= 0 || info.frame_height <= 0 || (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); } if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, info.frame_height, 1)) { die("Failed to allocate image."); } printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&codec, "Failed to get default codec config."); cfg.g_w = info.frame_width; cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = bitrate; writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); if (!writer) die("Failed to open %s for writing.", argv[4]); if (!(infile = fopen(argv[3], "rb"))) die("Failed to open %s for reading.", argv[3]); if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); // Encode frames. while (vpx_img_read(&raw, infile)) { if (frame_count + 1 == update_frame_num) { vpx_ref_frame_t ref; ref.frame_type = VP8_LAST_FRAME; ref.img = raw; if (vpx_codec_control(&codec, VP8_SET_REFERENCE, &ref)) die_codec(&codec, "Failed to set reference frame"); } encode_frame(&codec, &raw, frame_count++, writer); } // Flush encoder. while (encode_frame(&codec, NULL, -1, writer)) { } printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_count); vpx_img_free(&raw); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_writer_close(writer); return EXIT_SUCCESS; } libvpx-1.8.2/examples/vp9_lossless_encoder.c000066400000000000000000000102171357355204000211610ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vpx/vpx_encoder.h" #include "vpx/vp8cx.h" #include "vp9/common/vp9_common.h" #include "../tools_common.h" #include "../video_writer.h" static const char *exec_name; void usage_exit(void) { fprintf(stderr, "vp9_lossless_encoder: Example demonstrating VP9 lossless " "encoding feature. Supports raw input only.\n"); fprintf(stderr, "Usage: %s \n", exec_name); exit(EXIT_FAILURE); } static int encode_frame(vpx_codec_ctx_t *codec, vpx_image_t *img, int frame_index, int flags, VpxVideoWriter *writer) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; const vpx_codec_err_t res = vpx_codec_encode(codec, img, frame_index, 1, flags, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(codec, "Failed to encode frame"); while ((pkt = vpx_codec_get_cx_data(codec, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) { die_codec(codec, "Failed to write compressed frame"); } printf(keyframe ? "K" : "."); fflush(stdout); } } return got_pkts; } int main(int argc, char **argv) { FILE *infile = NULL; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_count = 0; vpx_image_t raw; vpx_codec_err_t res; VpxVideoInfo info; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; const int fps = 30; vp9_zero(info); exec_name = argv[0]; if (argc < 5) die("Invalid number of arguments"); encoder = get_vpx_encoder_by_name("vp9"); if (!encoder) die("Unsupported codec."); info.codec_fourcc = encoder->fourcc; info.frame_width = (int)strtol(argv[1], NULL, 0); info.frame_height = (int)strtol(argv[2], NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; if (info.frame_width <= 0 || info.frame_height <= 0 || (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); } if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, info.frame_height, 1)) { die("Failed to allocate image."); } printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&codec, "Failed to get default codec config."); cfg.g_w = info.frame_width; cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; writer = vpx_video_writer_open(argv[4], kContainerIVF, &info); if (!writer) die("Failed to open %s for writing.", argv[4]); if (!(infile = fopen(argv[3], "rb"))) die("Failed to open %s for reading.", argv[3]); if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) die_codec(&codec, "Failed to initialize encoder"); if (vpx_codec_control_(&codec, VP9E_SET_LOSSLESS, 1)) die_codec(&codec, "Failed to use lossless mode"); // Encode frames. while (vpx_img_read(&raw, infile)) { encode_frame(&codec, &raw, frame_count++, 0, writer); } // Flush encoder. while (encode_frame(&codec, NULL, -1, 0, writer)) { } printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_count); vpx_img_free(&raw); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec."); vpx_video_writer_close(writer); return EXIT_SUCCESS; } libvpx-1.8.2/examples/vp9_spatial_svc_encoder.c000066400000000000000000001373051357355204000216320ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* * This is an example demonstrating how to implement a multi-layer * VP9 encoding scheme based on spatial scalability for video applications * that benefit from a scalable bitstream. */ #include #include #include #include #include #include "../args.h" #include "../tools_common.h" #include "../video_writer.h" #include "../vpx_ports/vpx_timer.h" #include "./svc_context.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "../vpxstats.h" #include "vp9/encoder/vp9_encoder.h" #include "./y4minput.h" #define OUTPUT_RC_STATS 1 #define SIMULCAST_MODE 0 static const arg_def_t outputfile = ARG_DEF("o", "output", 1, "Output filename"); static const arg_def_t skip_frames_arg = ARG_DEF("s", "skip-frames", 1, "input frames to skip"); static const arg_def_t frames_arg = ARG_DEF("f", "frames", 1, "number of frames to encode"); static const arg_def_t threads_arg = ARG_DEF("th", "threads", 1, "number of threads to use"); #if OUTPUT_RC_STATS static const arg_def_t output_rc_stats_arg = ARG_DEF("rcstat", "output_rc_stats", 1, "output rc stats"); #endif static const arg_def_t width_arg = ARG_DEF("w", "width", 1, "source width"); static const arg_def_t height_arg = ARG_DEF("h", "height", 1, "source height"); static const arg_def_t timebase_arg = ARG_DEF("t", "timebase", 1, "timebase (num/den)"); static const arg_def_t bitrate_arg = ARG_DEF( "b", "target-bitrate", 1, "encoding bitrate, in kilobits per second"); static const arg_def_t spatial_layers_arg = ARG_DEF("sl", "spatial-layers", 1, "number of spatial SVC layers"); static const arg_def_t temporal_layers_arg = ARG_DEF("tl", "temporal-layers", 1, "number of temporal SVC layers"); static const arg_def_t temporal_layering_mode_arg = ARG_DEF("tlm", "temporal-layering-mode", 1, "temporal layering scheme." "VP9E_TEMPORAL_LAYERING_MODE"); static const arg_def_t kf_dist_arg = ARG_DEF("k", "kf-dist", 1, "number of frames between keyframes"); static const arg_def_t scale_factors_arg = ARG_DEF("r", "scale-factors", 1, "scale factors (lowest to highest layer)"); static const arg_def_t passes_arg = ARG_DEF("p", "passes", 1, "Number of passes (1/2)"); static const arg_def_t pass_arg = ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)"); static const arg_def_t fpf_name_arg = ARG_DEF(NULL, "fpf", 1, "First pass statistics file name"); static const arg_def_t min_q_arg = ARG_DEF(NULL, "min-q", 1, "Minimum quantizer"); static const arg_def_t max_q_arg = ARG_DEF(NULL, "max-q", 1, "Maximum quantizer"); static const arg_def_t min_bitrate_arg = ARG_DEF(NULL, "min-bitrate", 1, "Minimum bitrate"); static const arg_def_t max_bitrate_arg = ARG_DEF(NULL, "max-bitrate", 1, "Maximum bitrate"); static const arg_def_t lag_in_frame_arg = ARG_DEF(NULL, "lag-in-frames", 1, "Number of frame to input before " "generating any outputs"); static const arg_def_t rc_end_usage_arg = ARG_DEF(NULL, "rc-end-usage", 1, "0 - 3: VBR, CBR, CQ, Q"); static const arg_def_t speed_arg = ARG_DEF("sp", "speed", 1, "speed configuration"); static const arg_def_t aqmode_arg = ARG_DEF("aq", "aqmode", 1, "aq-mode off/on"); static const arg_def_t bitrates_arg = ARG_DEF("bl", "bitrates", 1, "bitrates[sl * num_tl + tl]"); static const arg_def_t dropframe_thresh_arg = ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); static const struct arg_enum_list tune_content_enum[] = { { "default", VP9E_CONTENT_DEFAULT }, { "screen", VP9E_CONTENT_SCREEN }, { "film", VP9E_CONTENT_FILM }, { NULL, 0 } }; static const arg_def_t tune_content_arg = ARG_DEF_ENUM( NULL, "tune-content", 1, "Tune content type", tune_content_enum); static const arg_def_t inter_layer_pred_arg = ARG_DEF( NULL, "inter-layer-pred", 1, "0 - 3: On, Off, Key-frames, Constrained"); #if CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 } }; static const arg_def_t bitdepth_arg = ARG_DEF_ENUM( "d", "bit-depth", 1, "Bit depth for codec 8, 10 or 12. ", bitdepth_enum); #endif // CONFIG_VP9_HIGHBITDEPTH static const arg_def_t *svc_args[] = { &frames_arg, &outputfile, &width_arg, &height_arg, &timebase_arg, &bitrate_arg, &skip_frames_arg, &spatial_layers_arg, &kf_dist_arg, &scale_factors_arg, &passes_arg, &pass_arg, &fpf_name_arg, &min_q_arg, &max_q_arg, &min_bitrate_arg, &max_bitrate_arg, &temporal_layers_arg, &temporal_layering_mode_arg, &lag_in_frame_arg, &threads_arg, &aqmode_arg, #if OUTPUT_RC_STATS &output_rc_stats_arg, #endif #if CONFIG_VP9_HIGHBITDEPTH &bitdepth_arg, #endif &speed_arg, &rc_end_usage_arg, &bitrates_arg, &dropframe_thresh_arg, &tune_content_arg, &inter_layer_pred_arg, NULL }; static const uint32_t default_frames_to_skip = 0; static const uint32_t default_frames_to_code = 60 * 60; static const uint32_t default_width = 1920; static const uint32_t default_height = 1080; static const uint32_t default_timebase_num = 1; static const uint32_t default_timebase_den = 60; static const uint32_t default_bitrate = 1000; static const uint32_t default_spatial_layers = 5; static const uint32_t default_temporal_layers = 1; static const uint32_t default_kf_dist = 100; static const uint32_t default_temporal_layering_mode = 0; static const uint32_t default_output_rc_stats = 0; static const int32_t default_speed = -1; // -1 means use library default. static const uint32_t default_threads = 0; // zero means use library default. typedef struct { const char *output_filename; uint32_t frames_to_code; uint32_t frames_to_skip; struct VpxInputContext input_ctx; stats_io_t rc_stats; int passes; int pass; int tune_content; int inter_layer_pred; } AppInput; static const char *exec_name; void usage_exit(void) { fprintf(stderr, "Usage: %s input_filename -o output_filename\n", exec_name); fprintf(stderr, "Options:\n"); arg_show_usage(stderr, svc_args); exit(EXIT_FAILURE); } static void parse_command_line(int argc, const char **argv_, AppInput *app_input, SvcContext *svc_ctx, vpx_codec_enc_cfg_t *enc_cfg) { struct arg arg; char **argv = NULL; char **argi = NULL; char **argj = NULL; vpx_codec_err_t res; int passes = 0; int pass = 0; const char *fpf_file_name = NULL; unsigned int min_bitrate = 0; unsigned int max_bitrate = 0; char string_options[1024] = { 0 }; // initialize SvcContext with parameters that will be passed to vpx_svc_init svc_ctx->log_level = SVC_LOG_DEBUG; svc_ctx->spatial_layers = default_spatial_layers; svc_ctx->temporal_layers = default_temporal_layers; svc_ctx->temporal_layering_mode = default_temporal_layering_mode; #if OUTPUT_RC_STATS svc_ctx->output_rc_stat = default_output_rc_stats; #endif svc_ctx->speed = default_speed; svc_ctx->threads = default_threads; // start with default encoder configuration res = vpx_codec_enc_config_default(vpx_codec_vp9_cx(), enc_cfg, 0); if (res) { die("Failed to get config: %s\n", vpx_codec_err_to_string(res)); } // update enc_cfg with app default values enc_cfg->g_w = default_width; enc_cfg->g_h = default_height; enc_cfg->g_timebase.num = default_timebase_num; enc_cfg->g_timebase.den = default_timebase_den; enc_cfg->rc_target_bitrate = default_bitrate; enc_cfg->kf_min_dist = default_kf_dist; enc_cfg->kf_max_dist = default_kf_dist; enc_cfg->rc_end_usage = VPX_CQ; // initialize AppInput with default values app_input->frames_to_code = default_frames_to_code; app_input->frames_to_skip = default_frames_to_skip; // process command line options argv = argv_dup(argc - 1, argv_ + 1); for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { arg.argv_step = 1; if (arg_match(&arg, &frames_arg, argi)) { app_input->frames_to_code = arg_parse_uint(&arg); } else if (arg_match(&arg, &outputfile, argi)) { app_input->output_filename = arg.val; } else if (arg_match(&arg, &width_arg, argi)) { enc_cfg->g_w = arg_parse_uint(&arg); } else if (arg_match(&arg, &height_arg, argi)) { enc_cfg->g_h = arg_parse_uint(&arg); } else if (arg_match(&arg, &timebase_arg, argi)) { enc_cfg->g_timebase = arg_parse_rational(&arg); } else if (arg_match(&arg, &bitrate_arg, argi)) { enc_cfg->rc_target_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &skip_frames_arg, argi)) { app_input->frames_to_skip = arg_parse_uint(&arg); } else if (arg_match(&arg, &spatial_layers_arg, argi)) { svc_ctx->spatial_layers = arg_parse_uint(&arg); } else if (arg_match(&arg, &temporal_layers_arg, argi)) { svc_ctx->temporal_layers = arg_parse_uint(&arg); #if OUTPUT_RC_STATS } else if (arg_match(&arg, &output_rc_stats_arg, argi)) { svc_ctx->output_rc_stat = arg_parse_uint(&arg); #endif } else if (arg_match(&arg, &speed_arg, argi)) { svc_ctx->speed = arg_parse_uint(&arg); if (svc_ctx->speed > 9) { warn("Mapping speed %d to speed 9.\n", svc_ctx->speed); } } else if (arg_match(&arg, &aqmode_arg, argi)) { svc_ctx->aqmode = arg_parse_uint(&arg); } else if (arg_match(&arg, &threads_arg, argi)) { svc_ctx->threads = arg_parse_uint(&arg); } else if (arg_match(&arg, &temporal_layering_mode_arg, argi)) { svc_ctx->temporal_layering_mode = enc_cfg->temporal_layering_mode = arg_parse_int(&arg); if (svc_ctx->temporal_layering_mode) { enc_cfg->g_error_resilient = 1; } } else if (arg_match(&arg, &kf_dist_arg, argi)) { enc_cfg->kf_min_dist = arg_parse_uint(&arg); enc_cfg->kf_max_dist = enc_cfg->kf_min_dist; } else if (arg_match(&arg, &scale_factors_arg, argi)) { strncat(string_options, " scale-factors=", sizeof(string_options) - strlen(string_options) - 1); strncat(string_options, arg.val, sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &bitrates_arg, argi)) { strncat(string_options, " bitrates=", sizeof(string_options) - strlen(string_options) - 1); strncat(string_options, arg.val, sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &passes_arg, argi)) { passes = arg_parse_uint(&arg); if (passes < 1 || passes > 2) { die("Error: Invalid number of passes (%d)\n", passes); } } else if (arg_match(&arg, &pass_arg, argi)) { pass = arg_parse_uint(&arg); if (pass < 1 || pass > 2) { die("Error: Invalid pass selected (%d)\n", pass); } } else if (arg_match(&arg, &fpf_name_arg, argi)) { fpf_file_name = arg.val; } else if (arg_match(&arg, &min_q_arg, argi)) { strncat(string_options, " min-quantizers=", sizeof(string_options) - strlen(string_options) - 1); strncat(string_options, arg.val, sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &max_q_arg, argi)) { strncat(string_options, " max-quantizers=", sizeof(string_options) - strlen(string_options) - 1); strncat(string_options, arg.val, sizeof(string_options) - strlen(string_options) - 1); } else if (arg_match(&arg, &min_bitrate_arg, argi)) { min_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &max_bitrate_arg, argi)) { max_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &lag_in_frame_arg, argi)) { enc_cfg->g_lag_in_frames = arg_parse_uint(&arg); } else if (arg_match(&arg, &rc_end_usage_arg, argi)) { enc_cfg->rc_end_usage = arg_parse_uint(&arg); #if CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &bitdepth_arg, argi)) { enc_cfg->g_bit_depth = arg_parse_enum_or_int(&arg); switch (enc_cfg->g_bit_depth) { case VPX_BITS_8: enc_cfg->g_input_bit_depth = 8; enc_cfg->g_profile = 0; break; case VPX_BITS_10: enc_cfg->g_input_bit_depth = 10; enc_cfg->g_profile = 2; break; case VPX_BITS_12: enc_cfg->g_input_bit_depth = 12; enc_cfg->g_profile = 2; break; default: die("Error: Invalid bit depth selected (%d)\n", enc_cfg->g_bit_depth); break; } #endif // CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &dropframe_thresh_arg, argi)) { enc_cfg->rc_dropframe_thresh = arg_parse_uint(&arg); } else if (arg_match(&arg, &tune_content_arg, argi)) { app_input->tune_content = arg_parse_uint(&arg); } else if (arg_match(&arg, &inter_layer_pred_arg, argi)) { app_input->inter_layer_pred = arg_parse_uint(&arg); } else { ++argj; } } // There will be a space in front of the string options if (strlen(string_options) > 0) vpx_svc_set_options(svc_ctx, string_options + 1); if (passes == 0 || passes == 1) { if (pass) { fprintf(stderr, "pass is ignored since there's only one pass\n"); } enc_cfg->g_pass = VPX_RC_ONE_PASS; } else { if (pass == 0) { die("pass must be specified when passes is 2\n"); } if (fpf_file_name == NULL) { die("fpf must be specified when passes is 2\n"); } if (pass == 1) { enc_cfg->g_pass = VPX_RC_FIRST_PASS; if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 0)) { fatal("Failed to open statistics store"); } } else { enc_cfg->g_pass = VPX_RC_LAST_PASS; if (!stats_open_file(&app_input->rc_stats, fpf_file_name, 1)) { fatal("Failed to open statistics store"); } enc_cfg->rc_twopass_stats_in = stats_get(&app_input->rc_stats); } app_input->passes = passes; app_input->pass = pass; } if (enc_cfg->rc_target_bitrate > 0) { if (min_bitrate > 0) { enc_cfg->rc_2pass_vbr_minsection_pct = min_bitrate * 100 / enc_cfg->rc_target_bitrate; } if (max_bitrate > 0) { enc_cfg->rc_2pass_vbr_maxsection_pct = max_bitrate * 100 / enc_cfg->rc_target_bitrate; } } // Check for unrecognized options for (argi = argv; *argi; ++argi) if (argi[0][0] == '-' && strlen(argi[0]) > 1) die("Error: Unrecognized option %s\n", *argi); if (argv[0] == NULL) { usage_exit(); } app_input->input_ctx.filename = argv[0]; free(argv); open_input_file(&app_input->input_ctx); if (app_input->input_ctx.file_type == FILE_TYPE_Y4M) { enc_cfg->g_w = app_input->input_ctx.width; enc_cfg->g_h = app_input->input_ctx.height; } if (enc_cfg->g_w < 16 || enc_cfg->g_w % 2 || enc_cfg->g_h < 16 || enc_cfg->g_h % 2) die("Invalid resolution: %d x %d\n", enc_cfg->g_w, enc_cfg->g_h); printf( "Codec %s\nframes: %d, skip: %d\n" "layers: %d\n" "width %d, height: %d,\n" "num: %d, den: %d, bitrate: %d,\n" "gop size: %d\n", vpx_codec_iface_name(vpx_codec_vp9_cx()), app_input->frames_to_code, app_input->frames_to_skip, svc_ctx->spatial_layers, enc_cfg->g_w, enc_cfg->g_h, enc_cfg->g_timebase.num, enc_cfg->g_timebase.den, enc_cfg->rc_target_bitrate, enc_cfg->kf_max_dist); } #if OUTPUT_RC_STATS // For rate control encoding stats. struct RateControlStats { // Number of input frames per layer. int layer_input_frames[VPX_MAX_LAYERS]; // Total (cumulative) number of encoded frames per layer. int layer_tot_enc_frames[VPX_MAX_LAYERS]; // Number of encoded non-key frames per layer. int layer_enc_frames[VPX_MAX_LAYERS]; // Framerate per layer (cumulative). double layer_framerate[VPX_MAX_LAYERS]; // Target average frame size per layer (per-frame-bandwidth per layer). double layer_pfb[VPX_MAX_LAYERS]; // Actual average frame size per layer. double layer_avg_frame_size[VPX_MAX_LAYERS]; // Average rate mismatch per layer (|target - actual| / target). double layer_avg_rate_mismatch[VPX_MAX_LAYERS]; // Actual encoding bitrate per layer (cumulative). double layer_encoding_bitrate[VPX_MAX_LAYERS]; // Average of the short-time encoder actual bitrate. // TODO(marpan): Should we add these short-time stats for each layer? double avg_st_encoding_bitrate; // Variance of the short-time encoder actual bitrate. double variance_st_encoding_bitrate; // Window (number of frames) for computing short-time encoding bitrate. int window_size; // Number of window measurements. int window_count; }; // Note: these rate control stats assume only 1 key frame in the // sequence (i.e., first frame only). static void set_rate_control_stats(struct RateControlStats *rc, vpx_codec_enc_cfg_t *cfg) { unsigned int sl, tl; // Set the layer (cumulative) framerate and the target layer (non-cumulative) // per-frame-bandwidth, for the rate control encoding stats below. const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; for (sl = 0; sl < cfg->ss_number_layers; ++sl) { for (tl = 0; tl < cfg->ts_number_layers; ++tl) { const int layer = sl * cfg->ts_number_layers + tl; if (cfg->ts_number_layers == 1) rc->layer_framerate[layer] = framerate; else rc->layer_framerate[layer] = framerate / cfg->ts_rate_decimator[tl]; if (tl > 0) { rc->layer_pfb[layer] = 1000.0 * (cfg->layer_target_bitrate[layer] - cfg->layer_target_bitrate[layer - 1]) / (rc->layer_framerate[layer] - rc->layer_framerate[layer - 1]); } else { rc->layer_pfb[layer] = 1000.0 * cfg->layer_target_bitrate[layer] / rc->layer_framerate[layer]; } rc->layer_input_frames[layer] = 0; rc->layer_enc_frames[layer] = 0; rc->layer_tot_enc_frames[layer] = 0; rc->layer_encoding_bitrate[layer] = 0.0; rc->layer_avg_frame_size[layer] = 0.0; rc->layer_avg_rate_mismatch[layer] = 0.0; } } rc->window_count = 0; rc->window_size = 15; rc->avg_st_encoding_bitrate = 0.0; rc->variance_st_encoding_bitrate = 0.0; } static void printout_rate_control_summary(struct RateControlStats *rc, vpx_codec_enc_cfg_t *cfg, int frame_cnt) { unsigned int sl, tl; double perc_fluctuation = 0.0; int tot_num_frames = 0; printf("Total number of processed frames: %d\n\n", frame_cnt - 1); printf("Rate control layer stats for sl%d tl%d layer(s):\n\n", cfg->ss_number_layers, cfg->ts_number_layers); for (sl = 0; sl < cfg->ss_number_layers; ++sl) { tot_num_frames = 0; for (tl = 0; tl < cfg->ts_number_layers; ++tl) { const int layer = sl * cfg->ts_number_layers + tl; const int num_dropped = (tl > 0) ? (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer]) : (rc->layer_input_frames[layer] - rc->layer_enc_frames[layer] - 1); tot_num_frames += rc->layer_input_frames[layer]; rc->layer_encoding_bitrate[layer] = 0.001 * rc->layer_framerate[layer] * rc->layer_encoding_bitrate[layer] / tot_num_frames; rc->layer_avg_frame_size[layer] = rc->layer_avg_frame_size[layer] / rc->layer_enc_frames[layer]; rc->layer_avg_rate_mismatch[layer] = 100.0 * rc->layer_avg_rate_mismatch[layer] / rc->layer_enc_frames[layer]; printf("For layer#: sl%d tl%d \n", sl, tl); printf("Bitrate (target vs actual): %d %f.0 kbps\n", cfg->layer_target_bitrate[layer], rc->layer_encoding_bitrate[layer]); printf("Average frame size (target vs actual): %f %f bits\n", rc->layer_pfb[layer], rc->layer_avg_frame_size[layer]); printf("Average rate_mismatch: %f\n", rc->layer_avg_rate_mismatch[layer]); printf( "Number of input frames, encoded (non-key) frames, " "and percent dropped frames: %d %d %f.0 \n", rc->layer_input_frames[layer], rc->layer_enc_frames[layer], 100.0 * num_dropped / rc->layer_input_frames[layer]); printf("\n"); } } rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; rc->variance_st_encoding_bitrate = rc->variance_st_encoding_bitrate / rc->window_count - (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / rc->avg_st_encoding_bitrate; printf("Short-time stats, for window of %d frames: \n", rc->window_size); printf("Average, rms-variance, and percent-fluct: %f %f %f \n", rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), perc_fluctuation); printf("Num of input, num of encoded (super) frames: %d %d \n", frame_cnt, tot_num_frames); } static vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, uint64_t sizes[8], int *count) { // A chunk ending with a byte matching 0xc0 is an invalid chunk unless // it is a super frame index. If the last byte of real video compression // data is 0xc0 the encoder must add a 0 byte. If we have the marker but // not the associated matching marker byte at the front of the index we have // an invalid bitstream and need to return an error. uint8_t marker; marker = *(data + data_sz - 1); *count = 0; if ((marker & 0xe0) == 0xc0) { const uint32_t frames = (marker & 0x7) + 1; const uint32_t mag = ((marker >> 3) & 0x3) + 1; const size_t index_sz = 2 + mag * frames; // This chunk is marked as having a superframe index but doesn't have // enough data for it, thus it's an invalid superframe index. if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; { const uint8_t marker2 = *(data + data_sz - index_sz); // This chunk is marked as having a superframe index but doesn't have // the matching marker byte at the front of the index therefore it's an // invalid chunk. if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; } { // Found a valid superframe index. uint32_t i, j; const uint8_t *x = &data[data_sz - index_sz + 1]; for (i = 0; i < frames; ++i) { uint32_t this_sz = 0; for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); sizes[i] = this_sz; } *count = frames; } } return VPX_CODEC_OK; } #endif // Example pattern for spatial layers and 2 temporal layers used in the // bypass/flexible mode. The pattern corresponds to the pattern // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in // non-flexible mode. static void set_frame_flags_bypass_mode_ex0( int tl, int num_spatial_layers, int is_key_frame, vpx_svc_ref_frame_config_t *ref_frame_config) { int sl; for (sl = 0; sl < num_spatial_layers; ++sl) ref_frame_config->update_buffer_slot[sl] = 0; for (sl = 0; sl < num_spatial_layers; ++sl) { // Set the buffer idx. if (tl == 0) { ref_frame_config->lst_fb_idx[sl] = sl; if (sl) { if (is_key_frame) { ref_frame_config->lst_fb_idx[sl] = sl - 1; ref_frame_config->gld_fb_idx[sl] = sl; } else { ref_frame_config->gld_fb_idx[sl] = sl - 1; } } else { ref_frame_config->gld_fb_idx[sl] = 0; } ref_frame_config->alt_fb_idx[sl] = 0; } else if (tl == 1) { ref_frame_config->lst_fb_idx[sl] = sl; ref_frame_config->gld_fb_idx[sl] = num_spatial_layers + sl - 1; ref_frame_config->alt_fb_idx[sl] = num_spatial_layers + sl; } // Set the reference and update flags. if (!tl) { if (!sl) { // Base spatial and base temporal (sl = 0, tl = 0) ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->lst_fb_idx[sl]; } else { if (is_key_frame) { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->gld_fb_idx[sl]; } else { // Non-zero spatiall layer. ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 1; ref_frame_config->reference_alt_ref[sl] = 1; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->lst_fb_idx[sl]; } } } else if (tl == 1) { if (!sl) { // Base spatial and top temporal (tl = 1) ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->alt_fb_idx[sl]; } else { // Non-zero spatial. if (sl < num_spatial_layers - 1) { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 1; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->alt_fb_idx[sl]; } else if (sl == num_spatial_layers - 1) { // Top spatial and top temporal (non-reference -- doesn't update any // reference buffers) ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 1; ref_frame_config->reference_alt_ref[sl] = 0; } } } } } // Example pattern for 2 spatial layers and 2 temporal layers used in the // bypass/flexible mode, except only 1 spatial layer when temporal_layer_id = 1. static void set_frame_flags_bypass_mode_ex1( int tl, int num_spatial_layers, int is_key_frame, vpx_svc_ref_frame_config_t *ref_frame_config) { int sl; for (sl = 0; sl < num_spatial_layers; ++sl) ref_frame_config->update_buffer_slot[sl] = 0; if (tl == 0) { if (is_key_frame) { ref_frame_config->lst_fb_idx[1] = 0; ref_frame_config->gld_fb_idx[1] = 1; } else { ref_frame_config->lst_fb_idx[1] = 1; ref_frame_config->gld_fb_idx[1] = 0; } ref_frame_config->alt_fb_idx[1] = 0; ref_frame_config->lst_fb_idx[0] = 0; ref_frame_config->gld_fb_idx[0] = 0; ref_frame_config->alt_fb_idx[0] = 0; } if (tl == 1) { ref_frame_config->lst_fb_idx[0] = 0; ref_frame_config->gld_fb_idx[0] = 1; ref_frame_config->alt_fb_idx[0] = 2; ref_frame_config->lst_fb_idx[1] = 1; ref_frame_config->gld_fb_idx[1] = 2; ref_frame_config->alt_fb_idx[1] = 3; } // Set the reference and update flags. if (tl == 0) { // Base spatial and base temporal (sl = 0, tl = 0) ref_frame_config->reference_last[0] = 1; ref_frame_config->reference_golden[0] = 0; ref_frame_config->reference_alt_ref[0] = 0; ref_frame_config->update_buffer_slot[0] |= 1 << ref_frame_config->lst_fb_idx[0]; if (is_key_frame) { ref_frame_config->reference_last[1] = 1; ref_frame_config->reference_golden[1] = 0; ref_frame_config->reference_alt_ref[1] = 0; ref_frame_config->update_buffer_slot[1] |= 1 << ref_frame_config->gld_fb_idx[1]; } else { // Non-zero spatiall layer. ref_frame_config->reference_last[1] = 1; ref_frame_config->reference_golden[1] = 1; ref_frame_config->reference_alt_ref[1] = 1; ref_frame_config->update_buffer_slot[1] |= 1 << ref_frame_config->lst_fb_idx[1]; } } if (tl == 1) { // Top spatial and top temporal (non-reference -- doesn't update any // reference buffers) ref_frame_config->reference_last[1] = 1; ref_frame_config->reference_golden[1] = 0; ref_frame_config->reference_alt_ref[1] = 0; } } #if CONFIG_VP9_DECODER && !SIMULCAST_MODE static void test_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, const int frames_out, int *mismatch_seen) { vpx_image_t enc_img, dec_img; struct vp9_ref_frame ref_enc, ref_dec; if (*mismatch_seen) return; /* Get the internal reference frame */ ref_enc.idx = 0; ref_dec.idx = 0; vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc); enc_img = ref_enc.img; vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec); dec_img = ref_dec.img; #if CONFIG_VP9_HIGHBITDEPTH if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) != (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) { if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, enc_img.d_w, enc_img.d_h, 16); vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img); } if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, dec_img.d_w, dec_img.d_h, 16); vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img); } } #endif if (!compare_img(&enc_img, &dec_img)) { int y[4], u[4], v[4]; #if CONFIG_VP9_HIGHBITDEPTH if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { find_mismatch_high(&enc_img, &dec_img, y, u, v); } else { find_mismatch(&enc_img, &dec_img, y, u, v); } #else find_mismatch(&enc_img, &dec_img, y, u, v); #endif decoder->err = 1; printf( "Encode/decode mismatch on frame %d at" " Y[%d, %d] {%d/%d}," " U[%d, %d] {%d/%d}," " V[%d, %d] {%d/%d}\n", frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], v[2], v[3]); *mismatch_seen = frames_out; } vpx_img_free(&enc_img); vpx_img_free(&dec_img); } #endif #if OUTPUT_RC_STATS static void svc_output_rc_stats( vpx_codec_ctx_t *codec, vpx_codec_enc_cfg_t *enc_cfg, vpx_svc_layer_id_t *layer_id, const vpx_codec_cx_pkt_t *cx_pkt, struct RateControlStats *rc, VpxVideoWriter **outfile, const uint32_t frame_cnt, const double framerate) { int num_layers_encoded = 0; unsigned int sl, tl; uint64_t sizes[8]; uint64_t sizes_parsed[8]; int count = 0; double sum_bitrate = 0.0; double sum_bitrate2 = 0.0; vp9_zero(sizes); vp9_zero(sizes_parsed); vpx_codec_control(codec, VP9E_GET_SVC_LAYER_ID, layer_id); parse_superframe_index(cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, sizes_parsed, &count); if (enc_cfg->ss_number_layers == 1) sizes[0] = cx_pkt->data.frame.sz; for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { sizes[sl] = 0; if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { sizes[sl] = sizes_parsed[num_layers_encoded]; num_layers_encoded++; } } for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { unsigned int sl2; uint64_t tot_size = 0; #if SIMULCAST_MODE for (sl2 = 0; sl2 < sl; ++sl2) { if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; } vpx_video_writer_write_frame(outfile[sl], (uint8_t *)(cx_pkt->data.frame.buf) + tot_size, (size_t)(sizes[sl]), cx_pkt->data.frame.pts); #else for (sl2 = 0; sl2 <= sl; ++sl2) { if (cx_pkt->data.frame.spatial_layer_encoded[sl2]) tot_size += sizes[sl2]; } if (tot_size > 0) vpx_video_writer_write_frame(outfile[sl], cx_pkt->data.frame.buf, (size_t)(tot_size), cx_pkt->data.frame.pts); #endif // SIMULCAST_MODE } for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { if (cx_pkt->data.frame.spatial_layer_encoded[sl]) { for (tl = layer_id->temporal_layer_id; tl < enc_cfg->ts_number_layers; ++tl) { const int layer = sl * enc_cfg->ts_number_layers + tl; ++rc->layer_tot_enc_frames[layer]; rc->layer_encoding_bitrate[layer] += 8.0 * sizes[sl]; // Keep count of rate control stats per layer, for non-key // frames. if (tl == (unsigned int)layer_id->temporal_layer_id && !(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { rc->layer_avg_frame_size[layer] += 8.0 * sizes[sl]; rc->layer_avg_rate_mismatch[layer] += fabs(8.0 * sizes[sl] - rc->layer_pfb[layer]) / rc->layer_pfb[layer]; ++rc->layer_enc_frames[layer]; } } } } // Update for short-time encoding bitrate states, for moving // window of size rc->window, shifted by rc->window / 2. // Ignore first window segment, due to key frame. if (frame_cnt > (unsigned int)rc->window_size) { for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { if (cx_pkt->data.frame.spatial_layer_encoded[sl]) sum_bitrate += 0.001 * 8.0 * sizes[sl] * framerate; } if (frame_cnt % rc->window_size == 0) { rc->window_count += 1; rc->avg_st_encoding_bitrate += sum_bitrate / rc->window_size; rc->variance_st_encoding_bitrate += (sum_bitrate / rc->window_size) * (sum_bitrate / rc->window_size); } } // Second shifted window. if (frame_cnt > (unsigned int)(rc->window_size + rc->window_size / 2)) { for (sl = 0; sl < enc_cfg->ss_number_layers; ++sl) { sum_bitrate2 += 0.001 * 8.0 * sizes[sl] * framerate; } if (frame_cnt > (unsigned int)(2 * rc->window_size) && frame_cnt % rc->window_size == 0) { rc->window_count += 1; rc->avg_st_encoding_bitrate += sum_bitrate2 / rc->window_size; rc->variance_st_encoding_bitrate += (sum_bitrate2 / rc->window_size) * (sum_bitrate2 / rc->window_size); } } } #endif int main(int argc, const char **argv) { AppInput app_input; VpxVideoWriter *writer = NULL; VpxVideoInfo info; vpx_codec_ctx_t encoder; vpx_codec_enc_cfg_t enc_cfg; SvcContext svc_ctx; vpx_svc_frame_drop_t svc_drop_frame; uint32_t i; uint32_t frame_cnt = 0; vpx_image_t raw; vpx_codec_err_t res; int pts = 0; /* PTS starts at 0 */ int frame_duration = 1; /* 1 timebase tick per frame */ int end_of_stream = 0; int frames_received = 0; #if OUTPUT_RC_STATS VpxVideoWriter *outfile[VPX_SS_MAX_LAYERS] = { NULL }; struct RateControlStats rc; vpx_svc_layer_id_t layer_id; vpx_svc_ref_frame_config_t ref_frame_config; unsigned int sl; double framerate = 30.0; #endif struct vpx_usec_timer timer; int64_t cx_time = 0; #if CONFIG_INTERNAL_STATS FILE *f = fopen("opsnr.stt", "a"); #endif #if CONFIG_VP9_DECODER && !SIMULCAST_MODE int mismatch_seen = 0; vpx_codec_ctx_t decoder; #endif memset(&svc_ctx, 0, sizeof(svc_ctx)); memset(&app_input, 0, sizeof(AppInput)); memset(&info, 0, sizeof(VpxVideoInfo)); memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); memset(&rc, 0, sizeof(struct RateControlStats)); exec_name = argv[0]; /* Setup default input stream settings */ app_input.input_ctx.framerate.numerator = 30; app_input.input_ctx.framerate.denominator = 1; app_input.input_ctx.only_i420 = 1; app_input.input_ctx.bit_depth = 0; parse_command_line(argc, argv, &app_input, &svc_ctx, &enc_cfg); // Y4M reader handles its own allocation. if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { // Allocate image buffer #if CONFIG_VP9_HIGHBITDEPTH if (!vpx_img_alloc(&raw, enc_cfg.g_input_bit_depth == 8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, enc_cfg.g_w, enc_cfg.g_h, 32)) { die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); } #else if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, enc_cfg.g_w, enc_cfg.g_h, 32)) { die("Failed to allocate image %dx%d\n", enc_cfg.g_w, enc_cfg.g_h); } #endif // CONFIG_VP9_HIGHBITDEPTH } // Initialize codec if (vpx_svc_init(&svc_ctx, &encoder, vpx_codec_vp9_cx(), &enc_cfg) != VPX_CODEC_OK) die("Failed to initialize encoder\n"); #if CONFIG_VP9_DECODER && !SIMULCAST_MODE if (vpx_codec_dec_init( &decoder, get_vpx_decoder_by_name("vp9")->codec_interface(), NULL, 0)) die("Failed to initialize decoder\n"); #endif #if OUTPUT_RC_STATS rc.window_count = 1; rc.window_size = 15; // Silence a static analysis warning. rc.avg_st_encoding_bitrate = 0.0; rc.variance_st_encoding_bitrate = 0.0; if (svc_ctx.output_rc_stat) { set_rate_control_stats(&rc, &enc_cfg); framerate = enc_cfg.g_timebase.den / enc_cfg.g_timebase.num; } #endif info.codec_fourcc = VP9_FOURCC; info.frame_width = enc_cfg.g_w; info.frame_height = enc_cfg.g_h; info.time_base.numerator = enc_cfg.g_timebase.num; info.time_base.denominator = enc_cfg.g_timebase.den; if (!(app_input.passes == 2 && app_input.pass == 1)) { // We don't save the bitstream for the 1st pass on two pass rate control writer = vpx_video_writer_open(app_input.output_filename, kContainerIVF, &info); if (!writer) die("Failed to open %s for writing\n", app_input.output_filename); } #if OUTPUT_RC_STATS // Write out spatial layer stream. // TODO(marpan/jianj): allow for writing each spatial and temporal stream. if (svc_ctx.output_rc_stat) { for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { char file_name[PATH_MAX]; snprintf(file_name, sizeof(file_name), "%s_s%d.ivf", app_input.output_filename, sl); outfile[sl] = vpx_video_writer_open(file_name, kContainerIVF, &info); if (!outfile[sl]) die("Failed to open %s for writing", file_name); } } #endif // skip initial frames for (i = 0; i < app_input.frames_to_skip; ++i) read_frame(&app_input.input_ctx, &raw); if (svc_ctx.speed != -1) vpx_codec_control(&encoder, VP8E_SET_CPUUSED, svc_ctx.speed); if (svc_ctx.threads) { vpx_codec_control(&encoder, VP9E_SET_TILE_COLUMNS, get_msb(svc_ctx.threads)); if (svc_ctx.threads > 1) vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 1); else vpx_codec_control(&encoder, VP9E_SET_ROW_MT, 0); } if (svc_ctx.speed >= 5 && svc_ctx.aqmode == 1) vpx_codec_control(&encoder, VP9E_SET_AQ_MODE, 3); if (svc_ctx.speed >= 5) vpx_codec_control(&encoder, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&encoder, VP8E_SET_MAX_INTRA_BITRATE_PCT, 900); vpx_codec_control(&encoder, VP9E_SET_SVC_INTER_LAYER_PRED, app_input.inter_layer_pred); vpx_codec_control(&encoder, VP9E_SET_NOISE_SENSITIVITY, 0); vpx_codec_control(&encoder, VP9E_SET_TUNE_CONTENT, app_input.tune_content); svc_drop_frame.framedrop_mode = FULL_SUPERFRAME_DROP; for (sl = 0; sl < (unsigned int)svc_ctx.spatial_layers; ++sl) svc_drop_frame.framedrop_thresh[sl] = enc_cfg.rc_dropframe_thresh; svc_drop_frame.max_consec_drop = INT_MAX; vpx_codec_control(&encoder, VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); // Encode frames while (!end_of_stream) { vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *cx_pkt; // Example patterns for bypass/flexible mode: // example_pattern = 0: 2 temporal layers, and spatial_layers = 1,2,3. Exact // to fixed SVC patterns. example_pattern = 1: 2 spatial and 2 temporal // layers, with SL0 only has TL0, and SL1 has both TL0 and TL1. This example // uses the extended API. int example_pattern = 0; if (frame_cnt >= app_input.frames_to_code || !read_frame(&app_input.input_ctx, &raw)) { // We need one extra vpx_svc_encode call at end of stream to flush // encoder and get remaining data end_of_stream = 1; } // For BYPASS/FLEXIBLE mode, set the frame flags (reference and updates) // and the buffer indices for each spatial layer of the current // (super)frame to be encoded. The spatial and temporal layer_id for the // current frame also needs to be set. // TODO(marpan): Should rename the "VP9E_TEMPORAL_LAYERING_MODE_BYPASS" // mode to "VP9E_LAYERING_MODE_BYPASS". if (svc_ctx.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { layer_id.spatial_layer_id = 0; // Example for 2 temporal layers. if (frame_cnt % 2 == 0) { layer_id.temporal_layer_id = 0; for (i = 0; i < VPX_SS_MAX_LAYERS; i++) layer_id.temporal_layer_id_per_spatial[i] = 0; } else { layer_id.temporal_layer_id = 1; for (i = 0; i < VPX_SS_MAX_LAYERS; i++) layer_id.temporal_layer_id_per_spatial[i] = 1; } if (example_pattern == 1) { // example_pattern 1 is hard-coded for 2 spatial and 2 temporal layers. assert(svc_ctx.spatial_layers == 2); assert(svc_ctx.temporal_layers == 2); if (frame_cnt % 2 == 0) { // Spatial layer 0 and 1 are encoded. layer_id.temporal_layer_id_per_spatial[0] = 0; layer_id.temporal_layer_id_per_spatial[1] = 0; layer_id.spatial_layer_id = 0; } else { // Only spatial layer 1 is encoded here. layer_id.temporal_layer_id_per_spatial[1] = 1; layer_id.spatial_layer_id = 1; } } vpx_codec_control(&encoder, VP9E_SET_SVC_LAYER_ID, &layer_id); // TODO(jianj): Fix the parameter passing for "is_key_frame" in // set_frame_flags_bypass_model() for case of periodic key frames. if (example_pattern == 0) { set_frame_flags_bypass_mode_ex0(layer_id.temporal_layer_id, svc_ctx.spatial_layers, frame_cnt == 0, &ref_frame_config); } else if (example_pattern == 1) { set_frame_flags_bypass_mode_ex1(layer_id.temporal_layer_id, svc_ctx.spatial_layers, frame_cnt == 0, &ref_frame_config); } ref_frame_config.duration[0] = frame_duration * 1; ref_frame_config.duration[1] = frame_duration * 1; vpx_codec_control(&encoder, VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); // Keep track of input frames, to account for frame drops in rate control // stats/metrics. for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + layer_id.temporal_layer_id]; } } else { // For the fixed pattern SVC, temporal layer is given by superframe count. unsigned int tl = 0; if (enc_cfg.ts_number_layers == 2) tl = (frame_cnt % 2 != 0); else if (enc_cfg.ts_number_layers == 3) { if (frame_cnt % 2 != 0) tl = 2; if ((frame_cnt > 1) && ((frame_cnt - 2) % 4 == 0)) tl = 1; } for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) ++rc.layer_input_frames[sl * enc_cfg.ts_number_layers + tl]; } vpx_usec_timer_start(&timer); res = vpx_svc_encode( &svc_ctx, &encoder, (end_of_stream ? NULL : &raw), pts, frame_duration, svc_ctx.speed >= 5 ? VPX_DL_REALTIME : VPX_DL_GOOD_QUALITY); vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); fflush(stdout); if (res != VPX_CODEC_OK) { die_codec(&encoder, "Failed to encode frame"); } while ((cx_pkt = vpx_codec_get_cx_data(&encoder, &iter)) != NULL) { switch (cx_pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: { SvcInternal_t *const si = (SvcInternal_t *)svc_ctx.internal; if (cx_pkt->data.frame.sz > 0) { vpx_video_writer_write_frame(writer, cx_pkt->data.frame.buf, cx_pkt->data.frame.sz, cx_pkt->data.frame.pts); #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { svc_output_rc_stats(&encoder, &enc_cfg, &layer_id, cx_pkt, &rc, outfile, frame_cnt, framerate); } #endif } /* printf("SVC frame: %d, kf: %d, size: %d, pts: %d\n", frames_received, !!(cx_pkt->data.frame.flags & VPX_FRAME_IS_KEY), (int)cx_pkt->data.frame.sz, (int)cx_pkt->data.frame.pts); */ if (enc_cfg.ss_number_layers == 1 && enc_cfg.ts_number_layers == 1) si->bytes_sum[0] += (int)cx_pkt->data.frame.sz; ++frames_received; #if CONFIG_VP9_DECODER && !SIMULCAST_MODE if (vpx_codec_decode(&decoder, cx_pkt->data.frame.buf, (unsigned int)cx_pkt->data.frame.sz, NULL, 0)) die_codec(&decoder, "Failed to decode frame."); #endif break; } case VPX_CODEC_STATS_PKT: { stats_write(&app_input.rc_stats, cx_pkt->data.twopass_stats.buf, cx_pkt->data.twopass_stats.sz); break; } default: { break; } } #if CONFIG_VP9_DECODER && !SIMULCAST_MODE vpx_codec_control(&encoder, VP9E_GET_SVC_LAYER_ID, &layer_id); // Don't look for mismatch on top spatial and top temporal layers as they // are non reference frames. if ((enc_cfg.ss_number_layers > 1 || enc_cfg.ts_number_layers > 1) && !(layer_id.temporal_layer_id > 0 && layer_id.temporal_layer_id == (int)enc_cfg.ts_number_layers - 1 && cx_pkt->data.frame .spatial_layer_encoded[enc_cfg.ss_number_layers - 1])) { test_decode(&encoder, &decoder, frame_cnt, &mismatch_seen); } #endif } if (!end_of_stream) { ++frame_cnt; pts += frame_duration; } } printf("Processed %d frames\n", frame_cnt); close_input_file(&app_input.input_ctx); #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { printout_rate_control_summary(&rc, &enc_cfg, frame_cnt); printf("\n"); } #endif if (vpx_codec_destroy(&encoder)) die_codec(&encoder, "Failed to destroy codec"); if (app_input.passes == 2) stats_close(&app_input.rc_stats, 1); if (writer) { vpx_video_writer_close(writer); } #if OUTPUT_RC_STATS if (svc_ctx.output_rc_stat) { for (sl = 0; sl < enc_cfg.ss_number_layers; ++sl) { vpx_video_writer_close(outfile[sl]); } } #endif #if CONFIG_INTERNAL_STATS if (mismatch_seen) { fprintf(f, "First mismatch occurred in frame %d\n", mismatch_seen); } else { fprintf(f, "No mismatch detected in recon buffers\n"); } fclose(f); #endif printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 1000000 * (double)frame_cnt / (double)cx_time); if (app_input.input_ctx.file_type != FILE_TYPE_Y4M) { vpx_img_free(&raw); } // display average size, psnr vpx_svc_dump_statistics(&svc_ctx); vpx_svc_release(&svc_ctx); return EXIT_SUCCESS; } libvpx-1.8.2/examples/vp9cx_set_ref.c000066400000000000000000000232751357355204000176050ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // VP9 Set Reference Frame // ============================ // // This is an example demonstrating how to overwrite the VP9 encoder's // internal reference frame. In the sample we set the last frame to the // current frame. This technique could be used to bounce between two cameras. // // The decoder would also have to set the reference frame to the same value // on the same frame, or the video will become corrupt. The 'test_decode' // variable is set to 1 in this example that tests if the encoder and decoder // results are matching. // // Usage // ----- // This example encodes a raw video. And the last argument passed in specifies // the frame number to update the reference frame on. For example, run // examples/vp9cx_set_ref 352 288 in.yuv out.ivf 4 30 // The parameter is parsed as follows: // // // Extra Variables // --------------- // This example maintains the frame number passed on the command line // in the `update_frame_num` variable. // // // Configuration // ------------- // // The reference frame is updated on the frame specified on the command // line. // // Observing The Effects // --------------------- // The encoder and decoder results should be matching when the same reference // frame setting operation is done in both encoder and decoder. Otherwise, // the encoder/decoder mismatch would be seen. #include #include #include #include "vpx/vp8cx.h" #include "vpx/vpx_decoder.h" #include "vpx/vpx_encoder.h" #include "vp9/common/vp9_common.h" #include "./tools_common.h" #include "./video_writer.h" static const char *exec_name; void usage_exit() { fprintf(stderr, "Usage: %s " " \n", exec_name); exit(EXIT_FAILURE); } static void testing_decode(vpx_codec_ctx_t *encoder, vpx_codec_ctx_t *decoder, unsigned int frame_out, int *mismatch_seen) { vpx_image_t enc_img, dec_img; struct vp9_ref_frame ref_enc, ref_dec; if (*mismatch_seen) return; ref_enc.idx = 0; ref_dec.idx = 0; if (vpx_codec_control(encoder, VP9_GET_REFERENCE, &ref_enc)) die_codec(encoder, "Failed to get encoder reference frame"); enc_img = ref_enc.img; if (vpx_codec_control(decoder, VP9_GET_REFERENCE, &ref_dec)) die_codec(decoder, "Failed to get decoder reference frame"); dec_img = ref_dec.img; if (!compare_img(&enc_img, &dec_img)) { int y[4], u[4], v[4]; *mismatch_seen = 1; find_mismatch(&enc_img, &dec_img, y, u, v); printf( "Encode/decode mismatch on frame %d at" " Y[%d, %d] {%d/%d}," " U[%d, %d] {%d/%d}," " V[%d, %d] {%d/%d}", frame_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], v[2], v[3]); } vpx_img_free(&enc_img); vpx_img_free(&dec_img); } static int encode_frame(vpx_codec_ctx_t *ecodec, vpx_image_t *img, unsigned int frame_in, VpxVideoWriter *writer, int test_decode, vpx_codec_ctx_t *dcodec, unsigned int *frame_out, int *mismatch_seen) { int got_pkts = 0; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt = NULL; int got_data; const vpx_codec_err_t res = vpx_codec_encode(ecodec, img, frame_in, 1, 0, VPX_DL_GOOD_QUALITY); if (res != VPX_CODEC_OK) die_codec(ecodec, "Failed to encode frame"); got_data = 0; while ((pkt = vpx_codec_get_cx_data(ecodec, &iter)) != NULL) { got_pkts = 1; if (pkt->kind == VPX_CODEC_CX_FRAME_PKT) { const int keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0; if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { *frame_out += 1; } if (!vpx_video_writer_write_frame(writer, pkt->data.frame.buf, pkt->data.frame.sz, pkt->data.frame.pts)) { die_codec(ecodec, "Failed to write compressed frame"); } printf(keyframe ? "K" : "."); fflush(stdout); got_data = 1; // Decode 1 frame. if (test_decode) { if (vpx_codec_decode(dcodec, pkt->data.frame.buf, (unsigned int)pkt->data.frame.sz, NULL, 0)) die_codec(dcodec, "Failed to decode frame."); } } } // Mismatch checking if (got_data && test_decode) { testing_decode(ecodec, dcodec, *frame_out, mismatch_seen); } return got_pkts; } int main(int argc, char **argv) { FILE *infile = NULL; // Encoder vpx_codec_ctx_t ecodec; vpx_codec_enc_cfg_t cfg; unsigned int frame_in = 0; vpx_image_t raw; vpx_codec_err_t res; VpxVideoInfo info; VpxVideoWriter *writer = NULL; const VpxInterface *encoder = NULL; // Test encoder/decoder mismatch. int test_decode = 1; // Decoder vpx_codec_ctx_t dcodec; unsigned int frame_out = 0; // The frame number to set reference frame on unsigned int update_frame_num = 0; int mismatch_seen = 0; const int fps = 30; const int bitrate = 500; const char *width_arg = NULL; const char *height_arg = NULL; const char *infile_arg = NULL; const char *outfile_arg = NULL; const char *update_frame_num_arg = NULL; unsigned int limit = 0; vp9_zero(ecodec); vp9_zero(cfg); vp9_zero(info); exec_name = argv[0]; if (argc < 6) die("Invalid number of arguments"); width_arg = argv[1]; height_arg = argv[2]; infile_arg = argv[3]; outfile_arg = argv[4]; update_frame_num_arg = argv[5]; encoder = get_vpx_encoder_by_name("vp9"); if (!encoder) die("Unsupported codec."); update_frame_num = (unsigned int)strtoul(update_frame_num_arg, NULL, 0); // In VP9, the reference buffers (cm->buffer_pool->frame_bufs[i].buf) are // allocated while calling vpx_codec_encode(), thus, setting reference for // 1st frame isn't supported. if (update_frame_num <= 1) { die("Couldn't parse frame number '%s'\n", update_frame_num_arg); } if (argc > 6) { limit = (unsigned int)strtoul(argv[6], NULL, 0); if (update_frame_num > limit) die("Update frame number couldn't larger than limit\n"); } info.codec_fourcc = encoder->fourcc; info.frame_width = (int)strtol(width_arg, NULL, 0); info.frame_height = (int)strtol(height_arg, NULL, 0); info.time_base.numerator = 1; info.time_base.denominator = fps; if (info.frame_width <= 0 || info.frame_height <= 0 || (info.frame_width % 2) != 0 || (info.frame_height % 2) != 0) { die("Invalid frame size: %dx%d", info.frame_width, info.frame_height); } if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, info.frame_width, info.frame_height, 1)) { die("Failed to allocate image."); } printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) die_codec(&ecodec, "Failed to get default codec config."); cfg.g_w = info.frame_width; cfg.g_h = info.frame_height; cfg.g_timebase.num = info.time_base.numerator; cfg.g_timebase.den = info.time_base.denominator; cfg.rc_target_bitrate = bitrate; cfg.g_lag_in_frames = 3; writer = vpx_video_writer_open(outfile_arg, kContainerIVF, &info); if (!writer) die("Failed to open %s for writing.", outfile_arg); if (!(infile = fopen(infile_arg, "rb"))) die("Failed to open %s for reading.", infile_arg); if (vpx_codec_enc_init(&ecodec, encoder->codec_interface(), &cfg, 0)) die_codec(&ecodec, "Failed to initialize encoder"); // Disable alt_ref. if (vpx_codec_control(&ecodec, VP8E_SET_ENABLEAUTOALTREF, 0)) die_codec(&ecodec, "Failed to set enable auto alt ref"); if (test_decode) { const VpxInterface *decoder = get_vpx_decoder_by_name("vp9"); if (vpx_codec_dec_init(&dcodec, decoder->codec_interface(), NULL, 0)) die_codec(&dcodec, "Failed to initialize decoder."); } // Encode frames. while (vpx_img_read(&raw, infile)) { if (limit && frame_in >= limit) break; if (update_frame_num > 1 && frame_out + 1 == update_frame_num) { vpx_ref_frame_t ref; ref.frame_type = VP8_LAST_FRAME; ref.img = raw; // Set reference frame in encoder. if (vpx_codec_control(&ecodec, VP8_SET_REFERENCE, &ref)) die_codec(&ecodec, "Failed to set reference frame"); printf(" "); // If set_reference in decoder is commented out, the enc/dec mismatch // would be seen. if (test_decode) { if (vpx_codec_control(&dcodec, VP8_SET_REFERENCE, &ref)) die_codec(&dcodec, "Failed to set reference frame"); } } encode_frame(&ecodec, &raw, frame_in, writer, test_decode, &dcodec, &frame_out, &mismatch_seen); frame_in++; if (mismatch_seen) break; } // Flush encoder. if (!mismatch_seen) while (encode_frame(&ecodec, NULL, frame_in, writer, test_decode, &dcodec, &frame_out, &mismatch_seen)) { } printf("\n"); fclose(infile); printf("Processed %d frames.\n", frame_out); if (test_decode) { if (!mismatch_seen) printf("Encoder/decoder results are matching.\n"); else printf("Encoder/decoder results are NOT matching.\n"); } if (test_decode) if (vpx_codec_destroy(&dcodec)) die_codec(&dcodec, "Failed to destroy decoder"); vpx_img_free(&raw); if (vpx_codec_destroy(&ecodec)) die_codec(&ecodec, "Failed to destroy encoder."); vpx_video_writer_close(writer); return EXIT_SUCCESS; } libvpx-1.8.2/examples/vpx_dec_fuzzer.cc000066400000000000000000000070241357355204000202170ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* * Fuzzer for libvpx decoders * ========================== * Requirements * -------------- * Requires Clang 6.0 or above as -fsanitize=fuzzer is used as a linker * option. * Steps to build * -------------- * Clone libvpx repository $git clone https://chromium.googlesource.com/webm/libvpx * Create a directory in parallel to libvpx and change directory $mkdir vpx_dec_fuzzer $cd vpx_dec_fuzzer/ * Enable sanitizers (Supported: address integer memory thread undefined) $source ../libvpx/tools/set_analyzer_env.sh address * Configure libvpx. * Note --size-limit and VPX_MAX_ALLOCABLE_MEMORY are defined to avoid * Out of memory errors when running generated fuzzer binary $../libvpx/configure --disable-unit-tests --size-limit=12288x12288 \ --extra-cflags="-fsanitize=fuzzer-no-link \ -DVPX_MAX_ALLOCABLE_MEMORY=1073741824" \ --disable-webm-io --enable-debug --disable-vp8-encoder \ --disable-vp9-encoder --disable-examples * Build libvpx $make -j32 * Build vp9 fuzzer $ $CXX $CXXFLAGS -std=c++11 -DDECODER=vp9 \ -fsanitize=fuzzer -I../libvpx -I. -Wl,--start-group \ ../libvpx/examples/vpx_dec_fuzzer.cc -o ./vpx_dec_fuzzer_vp9 \ ./libvpx.a -Wl,--end-group * DECODER should be defined as vp9 or vp8 to enable vp9/vp8 * * create a corpus directory and copy some ivf files there. * Based on which codec (vp8/vp9) is being tested, it is recommended to * have corresponding ivf files in corpus directory * Empty corpus directoy also is acceptable, though not recommended $mkdir CORPUS && cp some-files CORPUS * Run fuzzing: $./vpx_dec_fuzzer_vp9 CORPUS * References: * http://llvm.org/docs/LibFuzzer.html * https://github.com/google/oss-fuzz */ #include #include #include #include #include #include #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "vpx_ports/mem_ops.h" #define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ #define IVF_FILE_HDR_SZ 32 #define VPXD_INTERFACE(name) VPXD_INTERFACE_(name) #define VPXD_INTERFACE_(name) vpx_codec_##name##_dx() extern "C" void usage_exit(void) { exit(EXIT_FAILURE); } extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { if (size <= IVF_FILE_HDR_SZ) { return 0; } vpx_codec_ctx_t codec; // Set thread count in the range [1, 64]. const unsigned int threads = (data[IVF_FILE_HDR_SZ] & 0x3f) + 1; vpx_codec_dec_cfg_t cfg = { threads, 0, 0 }; if (vpx_codec_dec_init(&codec, VPXD_INTERFACE(DECODER), &cfg, 0)) { return 0; } data += IVF_FILE_HDR_SZ; size -= IVF_FILE_HDR_SZ; while (size > IVF_FRAME_HDR_SZ) { size_t frame_size = mem_get_le32(data); size -= IVF_FRAME_HDR_SZ; data += IVF_FRAME_HDR_SZ; frame_size = std::min(size, frame_size); const vpx_codec_err_t err = vpx_codec_decode(&codec, data, frame_size, nullptr, 0); static_cast(err); vpx_codec_iter_t iter = nullptr; vpx_image_t *img = nullptr; while ((img = vpx_codec_get_frame(&codec, &iter)) != nullptr) { } data += frame_size; size -= frame_size; } vpx_codec_destroy(&codec); return 0; } libvpx-1.8.2/examples/vpx_temporal_svc_encoder.c000066400000000000000000001124251357355204000221130ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // This is an example demonstrating how to implement a multi-layer VPx // encoding scheme based on temporal scalability for video applications // that benefit from a scalable bitstream. #include #include #include #include #include #include "./vpx_config.h" #include "./y4minput.h" #include "../vpx_ports/vpx_timer.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_ports/bitops.h" #include "../tools_common.h" #include "../video_writer.h" #define ROI_MAP 0 #define zero(Dest) memset(&(Dest), 0, sizeof(Dest)); static const char *exec_name; void usage_exit(void) { exit(EXIT_FAILURE); } // Denoiser states for vp8, for temporal denoising. enum denoiserStateVp8 { kVp8DenoiserOff, kVp8DenoiserOnYOnly, kVp8DenoiserOnYUV, kVp8DenoiserOnYUVAggressive, kVp8DenoiserOnAdaptive }; // Denoiser states for vp9, for temporal denoising. enum denoiserStateVp9 { kVp9DenoiserOff, kVp9DenoiserOnYOnly, // For SVC: denoise the top two spatial layers. kVp9DenoiserOnYTwoSpatialLayers }; static int mode_to_num_layers[13] = { 1, 2, 2, 3, 3, 3, 3, 5, 2, 3, 3, 3, 3 }; // For rate control encoding stats. struct RateControlMetrics { // Number of input frames per layer. int layer_input_frames[VPX_TS_MAX_LAYERS]; // Total (cumulative) number of encoded frames per layer. int layer_tot_enc_frames[VPX_TS_MAX_LAYERS]; // Number of encoded non-key frames per layer. int layer_enc_frames[VPX_TS_MAX_LAYERS]; // Framerate per layer layer (cumulative). double layer_framerate[VPX_TS_MAX_LAYERS]; // Target average frame size per layer (per-frame-bandwidth per layer). double layer_pfb[VPX_TS_MAX_LAYERS]; // Actual average frame size per layer. double layer_avg_frame_size[VPX_TS_MAX_LAYERS]; // Average rate mismatch per layer (|target - actual| / target). double layer_avg_rate_mismatch[VPX_TS_MAX_LAYERS]; // Actual encoding bitrate per layer (cumulative). double layer_encoding_bitrate[VPX_TS_MAX_LAYERS]; // Average of the short-time encoder actual bitrate. // TODO(marpan): Should we add these short-time stats for each layer? double avg_st_encoding_bitrate; // Variance of the short-time encoder actual bitrate. double variance_st_encoding_bitrate; // Window (number of frames) for computing short-timee encoding bitrate. int window_size; // Number of window measurements. int window_count; int layer_target_bitrate[VPX_MAX_LAYERS]; }; // Note: these rate control metrics assume only 1 key frame in the // sequence (i.e., first frame only). So for temporal pattern# 7 // (which has key frame for every frame on base layer), the metrics // computation will be off/wrong. // TODO(marpan): Update these metrics to account for multiple key frames // in the stream. static void set_rate_control_metrics(struct RateControlMetrics *rc, vpx_codec_enc_cfg_t *cfg) { int i = 0; // Set the layer (cumulative) framerate and the target layer (non-cumulative) // per-frame-bandwidth, for the rate control encoding stats below. const double framerate = cfg->g_timebase.den / cfg->g_timebase.num; const int ts_number_layers = cfg->ts_number_layers; rc->layer_framerate[0] = framerate / cfg->ts_rate_decimator[0]; rc->layer_pfb[0] = 1000.0 * rc->layer_target_bitrate[0] / rc->layer_framerate[0]; for (i = 0; i < ts_number_layers; ++i) { if (i > 0) { rc->layer_framerate[i] = framerate / cfg->ts_rate_decimator[i]; rc->layer_pfb[i] = 1000.0 * (rc->layer_target_bitrate[i] - rc->layer_target_bitrate[i - 1]) / (rc->layer_framerate[i] - rc->layer_framerate[i - 1]); } rc->layer_input_frames[i] = 0; rc->layer_enc_frames[i] = 0; rc->layer_tot_enc_frames[i] = 0; rc->layer_encoding_bitrate[i] = 0.0; rc->layer_avg_frame_size[i] = 0.0; rc->layer_avg_rate_mismatch[i] = 0.0; } rc->window_count = 0; rc->window_size = 15; rc->avg_st_encoding_bitrate = 0.0; rc->variance_st_encoding_bitrate = 0.0; // Target bandwidth for the whole stream. // Set to layer_target_bitrate for highest layer (total bitrate). cfg->rc_target_bitrate = rc->layer_target_bitrate[ts_number_layers - 1]; } static void printout_rate_control_summary(struct RateControlMetrics *rc, vpx_codec_enc_cfg_t *cfg, int frame_cnt) { unsigned int i = 0; int tot_num_frames = 0; double perc_fluctuation = 0.0; printf("Total number of processed frames: %d\n\n", frame_cnt - 1); printf("Rate control layer stats for %d layer(s):\n\n", cfg->ts_number_layers); for (i = 0; i < cfg->ts_number_layers; ++i) { const int num_dropped = (i > 0) ? (rc->layer_input_frames[i] - rc->layer_enc_frames[i]) : (rc->layer_input_frames[i] - rc->layer_enc_frames[i] - 1); tot_num_frames += rc->layer_input_frames[i]; rc->layer_encoding_bitrate[i] = 0.001 * rc->layer_framerate[i] * rc->layer_encoding_bitrate[i] / tot_num_frames; rc->layer_avg_frame_size[i] = rc->layer_avg_frame_size[i] / rc->layer_enc_frames[i]; rc->layer_avg_rate_mismatch[i] = 100.0 * rc->layer_avg_rate_mismatch[i] / rc->layer_enc_frames[i]; printf("For layer#: %d \n", i); printf("Bitrate (target vs actual): %d %f \n", rc->layer_target_bitrate[i], rc->layer_encoding_bitrate[i]); printf("Average frame size (target vs actual): %f %f \n", rc->layer_pfb[i], rc->layer_avg_frame_size[i]); printf("Average rate_mismatch: %f \n", rc->layer_avg_rate_mismatch[i]); printf( "Number of input frames, encoded (non-key) frames, " "and perc dropped frames: %d %d %f \n", rc->layer_input_frames[i], rc->layer_enc_frames[i], 100.0 * num_dropped / rc->layer_input_frames[i]); printf("\n"); } rc->avg_st_encoding_bitrate = rc->avg_st_encoding_bitrate / rc->window_count; rc->variance_st_encoding_bitrate = rc->variance_st_encoding_bitrate / rc->window_count - (rc->avg_st_encoding_bitrate * rc->avg_st_encoding_bitrate); perc_fluctuation = 100.0 * sqrt(rc->variance_st_encoding_bitrate) / rc->avg_st_encoding_bitrate; printf("Short-time stats, for window of %d frames: \n", rc->window_size); printf("Average, rms-variance, and percent-fluct: %f %f %f \n", rc->avg_st_encoding_bitrate, sqrt(rc->variance_st_encoding_bitrate), perc_fluctuation); if ((frame_cnt - 1) != tot_num_frames) die("Error: Number of input frames not equal to output! \n"); } #if ROI_MAP static void set_roi_map(const char *enc_name, vpx_codec_enc_cfg_t *cfg, vpx_roi_map_t *roi) { unsigned int i, j; int block_size = 0; uint8_t is_vp8 = strncmp(enc_name, "vp8", 3) == 0 ? 1 : 0; uint8_t is_vp9 = strncmp(enc_name, "vp9", 3) == 0 ? 1 : 0; if (!is_vp8 && !is_vp9) { die("unsupported codec."); } zero(*roi); block_size = is_vp9 && !is_vp8 ? 8 : 16; // ROI is based on the segments (4 for vp8, 8 for vp9), smallest unit for // segment is 16x16 for vp8, 8x8 for vp9. roi->rows = (cfg->g_h + block_size - 1) / block_size; roi->cols = (cfg->g_w + block_size - 1) / block_size; // Applies delta QP on the segment blocks, varies from -63 to 63. // Setting to negative means lower QP (better quality). // Below we set delta_q to the extreme (-63) to show strong effect. // VP8 uses the first 4 segments. VP9 uses all 8 segments. zero(roi->delta_q); roi->delta_q[1] = -63; // Applies delta loopfilter strength on the segment blocks, varies from -63 to // 63. Setting to positive means stronger loopfilter. VP8 uses the first 4 // segments. VP9 uses all 8 segments. zero(roi->delta_lf); if (is_vp8) { // Applies skip encoding threshold on the segment blocks, varies from 0 to // UINT_MAX. Larger value means more skipping of encoding is possible. // This skip threshold only applies on delta frames. zero(roi->static_threshold); } if (is_vp9) { // Apply skip segment. Setting to 1 means this block will be copied from // previous frame. zero(roi->skip); } if (is_vp9) { // Apply ref frame segment. // -1 : Do not apply this segment. // 0 : Froce using intra. // 1 : Force using last. // 2 : Force using golden. // 3 : Force using alfref but not used in non-rd pickmode for 0 lag. memset(roi->ref_frame, -1, sizeof(roi->ref_frame)); roi->ref_frame[1] = 1; } // Use 2 states: 1 is center square, 0 is the rest. roi->roi_map = (uint8_t *)calloc(roi->rows * roi->cols, sizeof(*roi->roi_map)); for (i = 0; i < roi->rows; ++i) { for (j = 0; j < roi->cols; ++j) { if (i > (roi->rows >> 2) && i < ((roi->rows * 3) >> 2) && j > (roi->cols >> 2) && j < ((roi->cols * 3) >> 2)) { roi->roi_map[i * roi->cols + j] = 1; } } } } #endif // Temporal scaling parameters: // NOTE: The 3 prediction frames cannot be used interchangeably due to // differences in the way they are handled throughout the code. The // frames should be allocated to layers in the order LAST, GF, ARF. // Other combinations work, but may produce slightly inferior results. static void set_temporal_layer_pattern(int layering_mode, vpx_codec_enc_cfg_t *cfg, int *layer_flags, int *flag_periodicity) { switch (layering_mode) { case 0: { // 1-layer. int ids[1] = { 0 }; cfg->ts_periodicity = 1; *flag_periodicity = 1; cfg->ts_number_layers = 1; cfg->ts_rate_decimator[0] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // Update L only. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; } case 1: { // 2-layers, 2-frame period. int ids[2] = { 0, 1 }; cfg->ts_periodicity = 2; *flag_periodicity = 2; cfg->ts_number_layers = 2; cfg->ts_rate_decimator[0] = 2; cfg->ts_rate_decimator[1] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); #if 1 // 0=L, 1=GF, Intra-layer prediction enabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; #else // 0=L, 1=GF, Intra-layer prediction disabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; layer_flags[1] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_REF_LAST; #endif break; } case 2: { // 2-layers, 3-frame period. int ids[3] = { 0, 1, 1 }; cfg->ts_periodicity = 3; *flag_periodicity = 3; cfg->ts_number_layers = 2; cfg->ts_rate_decimator[0] = 3; cfg->ts_rate_decimator[1] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, Intra-layer prediction enabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[1] = layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; break; } case 3: { // 3-layers, 6-frame period. int ids[6] = { 0, 2, 2, 1, 2, 2 }; cfg->ts_periodicity = 6; *flag_periodicity = 6; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 6; cfg->ts_rate_decimator[1] = 3; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; layer_flags[1] = layer_flags[2] = layer_flags[4] = layer_flags[5] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; break; } case 4: { // 3-layers, 4-frame period. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 4; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; layer_flags[1] = layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; } case 5: { // 3-layers, 4-frame period. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 4; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled in layer 1, disabled // in layer 2. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; layer_flags[1] = layer_flags[3] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; break; } case 6: { // 3-layers, 4-frame period. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 4; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF, Intra-layer prediction enabled. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[2] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; layer_flags[1] = layer_flags[3] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; break; } case 7: { // NOTE: Probably of academic interest only. // 5-layers, 16-frame period. int ids[16] = { 0, 4, 3, 4, 2, 4, 3, 4, 1, 4, 3, 4, 2, 4, 3, 4 }; cfg->ts_periodicity = 16; *flag_periodicity = 16; cfg->ts_number_layers = 5; cfg->ts_rate_decimator[0] = 16; cfg->ts_rate_decimator[1] = 8; cfg->ts_rate_decimator[2] = 4; cfg->ts_rate_decimator[3] = 2; cfg->ts_rate_decimator[4] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); layer_flags[0] = VPX_EFLAG_FORCE_KF; layer_flags[1] = layer_flags[3] = layer_flags[5] = layer_flags[7] = layer_flags[9] = layer_flags[11] = layer_flags[13] = layer_flags[15] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[2] = layer_flags[6] = layer_flags[10] = layer_flags[14] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_GF; layer_flags[4] = layer_flags[12] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_ARF; layer_flags[8] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF; break; } case 8: { // 2-layers, with sync point at first frame of layer 1. int ids[2] = { 0, 1 }; cfg->ts_periodicity = 2; *flag_periodicity = 8; cfg->ts_number_layers = 2; cfg->ts_rate_decimator[0] = 2; cfg->ts_rate_decimator[1] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF. // ARF is used as predictor for all frames, and is only updated on // key frame. Sync point every 8 frames. // Layer 0: predict from L and ARF, update L and G. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF; // Layer 1: sync point: predict from L and ARF, and update G. layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; // Layer 0, predict from L and ARF, update L. layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; // Layer 1: predict from L, G and ARF, and update G. layer_flags[3] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 0. layer_flags[4] = layer_flags[2]; // Layer 1. layer_flags[5] = layer_flags[3]; // Layer 0. layer_flags[6] = layer_flags[4]; // Layer 1. layer_flags[7] = layer_flags[5]; break; } case 9: { // 3-layers: Sync points for layer 1 and 2 every 8 frames. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 8; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; layer_flags[3] = layer_flags[5] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; layer_flags[4] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[6] = VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; layer_flags[7] = VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_ENTROPY; break; } case 10: { // 3-layers structure where ARF is used as predictor for all frames, // and is only updated on key frame. // Sync points for layer 1 and 2 every 8 frames. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 8; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF. // Layer 0: predict from L and ARF; update L and G. layer_flags[0] = VPX_EFLAG_FORCE_KF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; // Layer 2: sync point: predict from L and ARF; update none. layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 1: sync point: predict from L and ARF; update G. layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; // Layer 2: predict from L, G, ARF; update none. layer_flags[3] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; // Layer 0: predict from L and ARF; update L. layer_flags[4] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; // Layer 2: predict from L, G, ARF; update none. layer_flags[5] = layer_flags[3]; // Layer 1: predict from L, G, ARF; update G. layer_flags[6] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; // Layer 2: predict from L, G, ARF; update none. layer_flags[7] = layer_flags[3]; break; } case 11: { // 3-layers structure with one reference frame. // This works same as temporal_layering_mode 3. // This was added to compare with vp9_spatial_svc_encoder. // 3-layers, 4-frame period. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 4; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF, Intra-layer prediction disabled. layer_flags[0] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; layer_flags[2] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; layer_flags[1] = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; layer_flags[3] = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF; break; } case 12: default: { // 3-layers structure as in case 10, but no sync/refresh points for // layer 1 and 2. int ids[4] = { 0, 2, 1, 2 }; cfg->ts_periodicity = 4; *flag_periodicity = 8; cfg->ts_number_layers = 3; cfg->ts_rate_decimator[0] = 4; cfg->ts_rate_decimator[1] = 2; cfg->ts_rate_decimator[2] = 1; memcpy(cfg->ts_layer_id, ids, sizeof(ids)); // 0=L, 1=GF, 2=ARF. // Layer 0: predict from L and ARF; update L. layer_flags[0] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; layer_flags[4] = layer_flags[0]; // Layer 1: predict from L, G, ARF; update G. layer_flags[2] = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; layer_flags[6] = layer_flags[2]; // Layer 2: predict from L, G, ARF; update none. layer_flags[1] = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; layer_flags[3] = layer_flags[1]; layer_flags[5] = layer_flags[1]; layer_flags[7] = layer_flags[1]; break; } } } int main(int argc, char **argv) { VpxVideoWriter *outfile[VPX_TS_MAX_LAYERS] = { NULL }; vpx_codec_ctx_t codec; vpx_codec_enc_cfg_t cfg; int frame_cnt = 0; vpx_image_t raw; vpx_codec_err_t res; unsigned int width; unsigned int height; uint32_t error_resilient = 0; int speed; int frame_avail; int got_data; int flags = 0; unsigned int i; int pts = 0; // PTS starts at 0. int frame_duration = 1; // 1 timebase tick per frame. int layering_mode = 0; int layer_flags[VPX_TS_MAX_PERIODICITY] = { 0 }; int flag_periodicity = 1; #if ROI_MAP vpx_roi_map_t roi; #endif vpx_svc_layer_id_t layer_id; const VpxInterface *encoder = NULL; struct VpxInputContext input_ctx; struct RateControlMetrics rc; int64_t cx_time = 0; const int min_args_base = 13; #if CONFIG_VP9_HIGHBITDEPTH vpx_bit_depth_t bit_depth = VPX_BITS_8; int input_bit_depth = 8; const int min_args = min_args_base + 1; #else const int min_args = min_args_base; #endif // CONFIG_VP9_HIGHBITDEPTH double sum_bitrate = 0.0; double sum_bitrate2 = 0.0; double framerate = 30.0; zero(rc.layer_target_bitrate); memset(&layer_id, 0, sizeof(vpx_svc_layer_id_t)); memset(&input_ctx, 0, sizeof(input_ctx)); /* Setup default input stream settings */ input_ctx.framerate.numerator = 30; input_ctx.framerate.denominator = 1; input_ctx.only_i420 = 1; input_ctx.bit_depth = 0; exec_name = argv[0]; // Check usage and arguments. if (argc < min_args) { #if CONFIG_VP9_HIGHBITDEPTH die("Usage: %s " " " " " " ... \n", argv[0]); #else die("Usage: %s " " " " " " ... \n", argv[0]); #endif // CONFIG_VP9_HIGHBITDEPTH } encoder = get_vpx_encoder_by_name(argv[3]); if (!encoder) die("Unsupported codec."); printf("Using %s\n", vpx_codec_iface_name(encoder->codec_interface())); width = (unsigned int)strtoul(argv[4], NULL, 0); height = (unsigned int)strtoul(argv[5], NULL, 0); if (width < 16 || width % 2 || height < 16 || height % 2) { die("Invalid resolution: %d x %d", width, height); } layering_mode = (int)strtol(argv[12], NULL, 0); if (layering_mode < 0 || layering_mode > 13) { die("Invalid layering mode (0..12) %s", argv[12]); } if (argc != min_args + mode_to_num_layers[layering_mode]) { die("Invalid number of arguments"); } input_ctx.filename = argv[1]; open_input_file(&input_ctx); #if CONFIG_VP9_HIGHBITDEPTH switch (strtol(argv[argc - 1], NULL, 0)) { case 8: bit_depth = VPX_BITS_8; input_bit_depth = 8; break; case 10: bit_depth = VPX_BITS_10; input_bit_depth = 10; break; case 12: bit_depth = VPX_BITS_12; input_bit_depth = 12; break; default: die("Invalid bit depth (8, 10, 12) %s", argv[argc - 1]); } // Y4M reader has its own allocation. if (input_ctx.file_type != FILE_TYPE_Y4M) { if (!vpx_img_alloc( &raw, bit_depth == VPX_BITS_8 ? VPX_IMG_FMT_I420 : VPX_IMG_FMT_I42016, width, height, 32)) { die("Failed to allocate image", width, height); } } #else // Y4M reader has its own allocation. if (input_ctx.file_type != FILE_TYPE_Y4M) { if (!vpx_img_alloc(&raw, VPX_IMG_FMT_I420, width, height, 32)) { die("Failed to allocate image", width, height); } } #endif // CONFIG_VP9_HIGHBITDEPTH // Populate encoder configuration. res = vpx_codec_enc_config_default(encoder->codec_interface(), &cfg, 0); if (res) { printf("Failed to get config: %s\n", vpx_codec_err_to_string(res)); return EXIT_FAILURE; } // Update the default configuration with our settings. cfg.g_w = width; cfg.g_h = height; #if CONFIG_VP9_HIGHBITDEPTH if (bit_depth != VPX_BITS_8) { cfg.g_bit_depth = bit_depth; cfg.g_input_bit_depth = input_bit_depth; cfg.g_profile = 2; } #endif // CONFIG_VP9_HIGHBITDEPTH // Timebase format e.g. 30fps: numerator=1, demoninator = 30. cfg.g_timebase.num = (int)strtol(argv[6], NULL, 0); cfg.g_timebase.den = (int)strtol(argv[7], NULL, 0); speed = (int)strtol(argv[8], NULL, 0); if (speed < 0) { die("Invalid speed setting: must be positive"); } if (strncmp(encoder->name, "vp9", 3) == 0 && speed > 9) { warn("Mapping speed %d to speed 9.\n", speed); } for (i = min_args_base; (int)i < min_args_base + mode_to_num_layers[layering_mode]; ++i) { rc.layer_target_bitrate[i - 13] = (int)strtol(argv[i], NULL, 0); if (strncmp(encoder->name, "vp8", 3) == 0) cfg.ts_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13]; else if (strncmp(encoder->name, "vp9", 3) == 0) cfg.layer_target_bitrate[i - 13] = rc.layer_target_bitrate[i - 13]; } // Real time parameters. cfg.rc_dropframe_thresh = (unsigned int)strtoul(argv[9], NULL, 0); cfg.rc_end_usage = VPX_CBR; cfg.rc_min_quantizer = 2; cfg.rc_max_quantizer = 56; if (strncmp(encoder->name, "vp9", 3) == 0) cfg.rc_max_quantizer = 52; cfg.rc_undershoot_pct = 50; cfg.rc_overshoot_pct = 50; cfg.rc_buf_initial_sz = 600; cfg.rc_buf_optimal_sz = 600; cfg.rc_buf_sz = 1000; // Disable dynamic resizing by default. cfg.rc_resize_allowed = 0; // Use 1 thread as default. cfg.g_threads = (unsigned int)strtoul(argv[11], NULL, 0); error_resilient = (uint32_t)strtoul(argv[10], NULL, 0); if (error_resilient != 0 && error_resilient != 1) { die("Invalid value for error resilient (0, 1): %d.", error_resilient); } // Enable error resilient mode. cfg.g_error_resilient = error_resilient; cfg.g_lag_in_frames = 0; cfg.kf_mode = VPX_KF_AUTO; // Disable automatic keyframe placement. cfg.kf_min_dist = cfg.kf_max_dist = 3000; cfg.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; set_temporal_layer_pattern(layering_mode, &cfg, layer_flags, &flag_periodicity); set_rate_control_metrics(&rc, &cfg); if (input_ctx.file_type == FILE_TYPE_Y4M) { if (input_ctx.width != cfg.g_w || input_ctx.height != cfg.g_h) { die("Incorrect width or height: %d x %d", cfg.g_w, cfg.g_h); } if (input_ctx.framerate.numerator != cfg.g_timebase.den || input_ctx.framerate.denominator != cfg.g_timebase.num) { die("Incorrect framerate: numerator %d denominator %d", cfg.g_timebase.num, cfg.g_timebase.den); } } framerate = cfg.g_timebase.den / cfg.g_timebase.num; // Open an output file for each stream. for (i = 0; i < cfg.ts_number_layers; ++i) { char file_name[PATH_MAX]; VpxVideoInfo info; info.codec_fourcc = encoder->fourcc; info.frame_width = cfg.g_w; info.frame_height = cfg.g_h; info.time_base.numerator = cfg.g_timebase.num; info.time_base.denominator = cfg.g_timebase.den; snprintf(file_name, sizeof(file_name), "%s_%d.ivf", argv[2], i); outfile[i] = vpx_video_writer_open(file_name, kContainerIVF, &info); if (!outfile[i]) die("Failed to open %s for writing", file_name); assert(outfile[i] != NULL); } // No spatial layers in this encoder. cfg.ss_number_layers = 1; // Initialize codec. #if CONFIG_VP9_HIGHBITDEPTH if (vpx_codec_enc_init( &codec, encoder->codec_interface(), &cfg, bit_depth == VPX_BITS_8 ? 0 : VPX_CODEC_USE_HIGHBITDEPTH)) #else if (vpx_codec_enc_init(&codec, encoder->codec_interface(), &cfg, 0)) #endif // CONFIG_VP9_HIGHBITDEPTH die_codec(&codec, "Failed to initialize encoder"); if (strncmp(encoder->name, "vp8", 3) == 0) { vpx_codec_control(&codec, VP8E_SET_CPUUSED, -speed); vpx_codec_control(&codec, VP8E_SET_NOISE_SENSITIVITY, kVp8DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP8E_SET_GF_CBR_BOOST_PCT, 0); #if ROI_MAP set_roi_map(encoder->name, &cfg, &roi); if (vpx_codec_control(&codec, VP8E_SET_ROI_MAP, &roi)) die_codec(&codec, "Failed to set ROI map"); #endif } else if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_svc_extra_cfg_t svc_params; memset(&svc_params, 0, sizeof(svc_params)); vpx_codec_control(&codec, VP8E_SET_CPUUSED, speed); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 3); vpx_codec_control(&codec, VP9E_SET_GF_CBR_BOOST_PCT, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PARALLEL_DECODING, 0); vpx_codec_control(&codec, VP9E_SET_FRAME_PERIODIC_BOOST, 0); vpx_codec_control(&codec, VP9E_SET_NOISE_SENSITIVITY, kVp9DenoiserOff); vpx_codec_control(&codec, VP8E_SET_STATIC_THRESHOLD, 1); vpx_codec_control(&codec, VP9E_SET_TUNE_CONTENT, 0); vpx_codec_control(&codec, VP9E_SET_TILE_COLUMNS, get_msb(cfg.g_threads)); #if ROI_MAP set_roi_map(encoder->name, &cfg, &roi); if (vpx_codec_control(&codec, VP9E_SET_ROI_MAP, &roi)) die_codec(&codec, "Failed to set ROI map"); vpx_codec_control(&codec, VP9E_SET_AQ_MODE, 0); #endif if (cfg.g_threads > 1) vpx_codec_control(&codec, VP9E_SET_ROW_MT, 1); else vpx_codec_control(&codec, VP9E_SET_ROW_MT, 0); if (vpx_codec_control(&codec, VP9E_SET_SVC, layering_mode > 0 ? 1 : 0)) die_codec(&codec, "Failed to set SVC"); for (i = 0; i < cfg.ts_number_layers; ++i) { svc_params.max_quantizers[i] = cfg.rc_max_quantizer; svc_params.min_quantizers[i] = cfg.rc_min_quantizer; } svc_params.scaling_factor_num[0] = cfg.g_h; svc_params.scaling_factor_den[0] = cfg.g_h; vpx_codec_control(&codec, VP9E_SET_SVC_PARAMETERS, &svc_params); } if (strncmp(encoder->name, "vp8", 3) == 0) { vpx_codec_control(&codec, VP8E_SET_SCREEN_CONTENT_MODE, 0); } vpx_codec_control(&codec, VP8E_SET_TOKEN_PARTITIONS, 1); // This controls the maximum target size of the key frame. // For generating smaller key frames, use a smaller max_intra_size_pct // value, like 100 or 200. { const int max_intra_size_pct = 1000; vpx_codec_control(&codec, VP8E_SET_MAX_INTRA_BITRATE_PCT, max_intra_size_pct); } frame_avail = 1; while (frame_avail || got_data) { struct vpx_usec_timer timer; vpx_codec_iter_t iter = NULL; const vpx_codec_cx_pkt_t *pkt; // Update the temporal layer_id. No spatial layers in this test. layer_id.spatial_layer_id = 0; layer_id.temporal_layer_id = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; layer_id.temporal_layer_id_per_spatial[0] = layer_id.temporal_layer_id; if (strncmp(encoder->name, "vp9", 3) == 0) { vpx_codec_control(&codec, VP9E_SET_SVC_LAYER_ID, &layer_id); } else if (strncmp(encoder->name, "vp8", 3) == 0) { vpx_codec_control(&codec, VP8E_SET_TEMPORAL_LAYER_ID, layer_id.temporal_layer_id); } flags = layer_flags[frame_cnt % flag_periodicity]; if (layering_mode == 0) flags = 0; frame_avail = read_frame(&input_ctx, &raw); if (frame_avail) ++rc.layer_input_frames[layer_id.temporal_layer_id]; vpx_usec_timer_start(&timer); if (vpx_codec_encode(&codec, frame_avail ? &raw : NULL, pts, 1, flags, VPX_DL_REALTIME)) { die_codec(&codec, "Failed to encode frame"); } vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); // Reset KF flag. if (layering_mode != 7) { layer_flags[0] &= ~VPX_EFLAG_FORCE_KF; } got_data = 0; while ((pkt = vpx_codec_get_cx_data(&codec, &iter))) { got_data = 1; switch (pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: for (i = cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity]; i < cfg.ts_number_layers; ++i) { vpx_video_writer_write_frame(outfile[i], pkt->data.frame.buf, pkt->data.frame.sz, pts); ++rc.layer_tot_enc_frames[i]; rc.layer_encoding_bitrate[i] += 8.0 * pkt->data.frame.sz; // Keep count of rate control stats per layer (for non-key frames). if (i == cfg.ts_layer_id[frame_cnt % cfg.ts_periodicity] && !(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) { rc.layer_avg_frame_size[i] += 8.0 * pkt->data.frame.sz; rc.layer_avg_rate_mismatch[i] += fabs(8.0 * pkt->data.frame.sz - rc.layer_pfb[i]) / rc.layer_pfb[i]; ++rc.layer_enc_frames[i]; } } // Update for short-time encoding bitrate states, for moving window // of size rc->window, shifted by rc->window / 2. // Ignore first window segment, due to key frame. if (frame_cnt > rc.window_size) { sum_bitrate += 0.001 * 8.0 * pkt->data.frame.sz * framerate; if (frame_cnt % rc.window_size == 0) { rc.window_count += 1; rc.avg_st_encoding_bitrate += sum_bitrate / rc.window_size; rc.variance_st_encoding_bitrate += (sum_bitrate / rc.window_size) * (sum_bitrate / rc.window_size); sum_bitrate = 0.0; } } // Second shifted window. if (frame_cnt > rc.window_size + rc.window_size / 2) { sum_bitrate2 += 0.001 * 8.0 * pkt->data.frame.sz * framerate; if (frame_cnt > 2 * rc.window_size && frame_cnt % rc.window_size == 0) { rc.window_count += 1; rc.avg_st_encoding_bitrate += sum_bitrate2 / rc.window_size; rc.variance_st_encoding_bitrate += (sum_bitrate2 / rc.window_size) * (sum_bitrate2 / rc.window_size); sum_bitrate2 = 0.0; } } break; default: break; } } ++frame_cnt; pts += frame_duration; } close_input_file(&input_ctx); printout_rate_control_summary(&rc, &cfg, frame_cnt); printf("\n"); printf("Frame cnt and encoding time/FPS stats for encoding: %d %f %f \n", frame_cnt, 1000 * (float)cx_time / (double)(frame_cnt * 1000000), 1000000 * (double)frame_cnt / (double)cx_time); if (vpx_codec_destroy(&codec)) die_codec(&codec, "Failed to destroy codec"); // Try to rewrite the output file headers with the actual frame count. for (i = 0; i < cfg.ts_number_layers; ++i) vpx_video_writer_close(outfile[i]); if (input_ctx.file_type != FILE_TYPE_Y4M) { vpx_img_free(&raw); } #if ROI_MAP free(roi.roi_map); #endif return EXIT_SUCCESS; } libvpx-1.8.2/ivfdec.c000066400000000000000000000061661357355204000144470ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vpx_ports/mem_ops.h" #include "./ivfdec.h" static const char *IVF_SIGNATURE = "DKIF"; static void fix_framerate(int *num, int *den) { // Some versions of vpxenc used 1/(2*fps) for the timebase, so // we can guess the framerate using only the timebase in this // case. Other files would require reading ahead to guess the // timebase, like we do for webm. if (*den > 0 && *den < 1000000000 && *num > 0 && *num < 1000) { // Correct for the factor of 2 applied to the timebase in the encoder. if (*num & 1) *den *= 2; else *num /= 2; } else { // Don't know FPS for sure, and don't have readahead code // (yet?), so just default to 30fps. *num = 30; *den = 1; } } int file_is_ivf(struct VpxInputContext *input_ctx) { char raw_hdr[32]; int is_ivf = 0; if (fread(raw_hdr, 1, 32, input_ctx->file) == 32) { if (memcmp(IVF_SIGNATURE, raw_hdr, 4) == 0) { is_ivf = 1; if (mem_get_le16(raw_hdr + 4) != 0) { fprintf(stderr, "Error: Unrecognized IVF version! This file may not" " decode properly."); } input_ctx->fourcc = mem_get_le32(raw_hdr + 8); input_ctx->width = mem_get_le16(raw_hdr + 12); input_ctx->height = mem_get_le16(raw_hdr + 14); input_ctx->framerate.numerator = mem_get_le32(raw_hdr + 16); input_ctx->framerate.denominator = mem_get_le32(raw_hdr + 20); fix_framerate(&input_ctx->framerate.numerator, &input_ctx->framerate.denominator); } } if (!is_ivf) { rewind(input_ctx->file); input_ctx->detect.buf_read = 0; } else { input_ctx->detect.position = 4; } return is_ivf; } int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, size_t *buffer_size) { char raw_header[IVF_FRAME_HDR_SZ] = { 0 }; size_t frame_size = 0; if (fread(raw_header, IVF_FRAME_HDR_SZ, 1, infile) != 1) { if (!feof(infile)) warn("Failed to read frame size"); } else { frame_size = mem_get_le32(raw_header); if (frame_size > 256 * 1024 * 1024) { warn("Read invalid frame size (%u)", (unsigned int)frame_size); frame_size = 0; } if (frame_size > *buffer_size) { uint8_t *new_buffer = realloc(*buffer, 2 * frame_size); if (new_buffer) { *buffer = new_buffer; *buffer_size = 2 * frame_size; } else { warn("Failed to allocate compressed data buffer"); frame_size = 0; } } } if (!feof(infile)) { if (fread(*buffer, 1, frame_size, infile) != frame_size) { warn("Failed to read full frame"); return 1; } *bytes_read = frame_size; return 0; } return 1; } libvpx-1.8.2/ivfdec.h000066400000000000000000000013661357355204000144510ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_IVFDEC_H_ #define VPX_IVFDEC_H_ #include "./tools_common.h" #ifdef __cplusplus extern "C" { #endif int file_is_ivf(struct VpxInputContext *input); int ivf_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, size_t *buffer_size); #ifdef __cplusplus } /* extern "C" */ #endif #endif // VPX_IVFDEC_H_ libvpx-1.8.2/ivfenc.c000066400000000000000000000033341357355204000144530ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./ivfenc.h" #include "vpx/vpx_encoder.h" #include "vpx_ports/mem_ops.h" void ivf_write_file_header(FILE *outfile, const struct vpx_codec_enc_cfg *cfg, unsigned int fourcc, int frame_cnt) { char header[32]; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; mem_put_le16(header + 4, 0); // version mem_put_le16(header + 6, 32); // header size mem_put_le32(header + 8, fourcc); // fourcc mem_put_le16(header + 12, cfg->g_w); // width mem_put_le16(header + 14, cfg->g_h); // height mem_put_le32(header + 16, cfg->g_timebase.den); // rate mem_put_le32(header + 20, cfg->g_timebase.num); // scale mem_put_le32(header + 24, frame_cnt); // length mem_put_le32(header + 28, 0); // unused fwrite(header, 1, 32, outfile); } void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size) { char header[12]; mem_put_le32(header, (int)frame_size); mem_put_le32(header + 4, (int)(pts & 0xFFFFFFFF)); mem_put_le32(header + 8, (int)(pts >> 32)); fwrite(header, 1, 12, outfile); } void ivf_write_frame_size(FILE *outfile, size_t frame_size) { char header[4]; mem_put_le32(header, (int)frame_size); fwrite(header, 1, 4, outfile); } libvpx-1.8.2/ivfenc.h000066400000000000000000000016371357355204000144640ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_IVFENC_H_ #define VPX_IVFENC_H_ #include "./tools_common.h" struct vpx_codec_enc_cfg; struct vpx_codec_cx_pkt; #ifdef __cplusplus extern "C" { #endif void ivf_write_file_header(FILE *outfile, const struct vpx_codec_enc_cfg *cfg, uint32_t fourcc, int frame_cnt); void ivf_write_frame_header(FILE *outfile, int64_t pts, size_t frame_size); void ivf_write_frame_size(FILE *outfile, size_t frame_size); #ifdef __cplusplus } /* extern "C" */ #endif #endif // VPX_IVFENC_H_ libvpx-1.8.2/keywords.dox000066400000000000000000000041241357355204000154160ustar00rootroot00000000000000/*!\page rfc2119 RFC2119 Keywords The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in RFC 2119. Specifically, the following definitions are used: \section MUST \anchor REQUIRED \anchor SHALL This word, or the terms "REQUIRED" or "SHALL", mean that the definition is an absolute requirement of the specification. \section MUSTNOT MUST NOT \anchor SHALLNOT This phrase, or the phrase "SHALL NOT", mean that the definition is an absolute prohibition of the specification. \section SHOULD \anchor RECOMMENDED This word, or the adjective "RECOMMENDED", mean that there may exist valid reasons in particular circumstances to ignore a particular item, but the full implications must be understood and carefully weighed before choosing a different course. \section SHOULDNOT SHOULD NOT \anchor NOTRECOMMENDED This phrase, or the phrase "NOT RECOMMENDED" mean that there may exist valid reasons in particular circumstances when the particular behavior is acceptable or even useful, but the full implications should be understood and the case carefully weighed before implementing any behavior described with this label. \section MAY \anchor OPTIONAL This word, or the adjective "OPTIONAL", mean that an item is truly optional. One vendor may choose to include the item because a particular marketplace requires it or because the vendor feels that it enhances the product while another vendor may omit the same item. An implementation which does not include a particular option \ref MUST be prepared to interoperate with another implementation which does include the option, though perhaps with reduced functionality. In the same vein an implementation which does include a particular option \ref MUST be prepared to interoperate with another implementation which does not include the option (except, of course, for the feature the option provides.) */ libvpx-1.8.2/libs.doxy_template000066400000000000000000001503701357355204000165710ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # Doxyfile 1.5.4 # This file describes the settings to be used by the documentation system # doxygen (www.doxygen.org) for a project # # All text after a hash (#) is considered a comment and will be ignored # The format is: # TAG = value [value, ...] # For lists items can also be appended using: # TAG += value [value, ...] # Values that contain spaces should be placed between quotes (" ") #--------------------------------------------------------------------------- # Project related configuration options #--------------------------------------------------------------------------- # This tag specifies the encoding used for all characters in the config file that # follow. The default is UTF-8 which is also the encoding used for all text before # the first occurrence of this tag. Doxygen uses libiconv (or the iconv built into # libc) for the transcoding. See http://www.gnu.org/software/libiconv for the list of # possible encodings. DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded # by quotes) that should identify the project. PROJECT_NAME = "WebM Codec SDK" # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # base path where the generated documentation will be put. # If a relative path is entered, it will be relative to the location # where doxygen was started. If left blank the current directory will be used. OUTPUT_DIRECTORY = docs # If the CREATE_SUBDIRS tag is set to YES, then doxygen will create # 4096 sub-directories (in 2 levels) under the output directory of each output # format and will distribute the generated files over these directories. # Enabling this option can be useful when feeding doxygen a huge amount of # source files, where putting all generated files in the same directory would # otherwise cause performance problems for the file system. CREATE_SUBDIRS = NO # The OUTPUT_LANGUAGE tag is used to specify the language in which all # documentation generated by doxygen is written. Doxygen will use this # information to generate all constant output in the proper language. # The default language is English, other supported languages are: # Afrikaans, Arabic, Brazilian, Catalan, Chinese, Chinese-Traditional, # Croatian, Czech, Danish, Dutch, Finnish, French, German, Greek, Hungarian, # Italian, Japanese, Japanese-en (Japanese with English messages), Korean, # Korean-en, Lithuanian, Norwegian, Polish, Portuguese, Romanian, Russian, # Serbian, Slovak, Slovene, Spanish, Swedish, and Ukrainian. OUTPUT_LANGUAGE = English # If the BRIEF_MEMBER_DESC tag is set to YES (the default) Doxygen will # include brief member descriptions after the members that are listed in # the file and class documentation (similar to java_doc). # Set to NO to disable this. BRIEF_MEMBER_DESC = YES # If the REPEAT_BRIEF tag is set to YES (the default) Doxygen will prepend # the brief description of a member or function before the detailed description. # Note: if both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the # brief descriptions will be completely suppressed. REPEAT_BRIEF = YES # This tag implements a quasi-intelligent brief description abbreviator # that is used to form the text in various listings. Each string # in this list, if found as the leading text of the brief description, will be # stripped from the text and the result after processing the whole list, is # used as the annotated text. Otherwise, the brief description is used as-is. # If left blank, the following values are used ("$name" is automatically # replaced with the name of the entity): "The $name class" "The $name widget" # "The $name file" "is" "provides" "specifies" "contains" # "represents" "a" "an" "the" ABBREVIATE_BRIEF = # If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then # Doxygen will generate a detailed section even if there is only a brief # description. ALWAYS_DETAILED_SEC = NO # If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all # inherited members of a class in the documentation of that class as if those # members were ordinary class members. Constructors, destructors and assignment # operators of the base classes will not be shown. INLINE_INHERITED_MEMB = NO # If the FULL_PATH_NAMES tag is set to YES then Doxygen will prepend the full # path before files name in the file list and in the header files. If set # to NO the shortest path that makes the file name unique will be used. FULL_PATH_NAMES = YES # If the FULL_PATH_NAMES tag is set to YES then the STRIP_FROM_PATH tag # can be used to strip a user-defined part of the path. Stripping is # only done if one of the specified strings matches the left-hand part of # the path. The tag can be used to show relative paths in the file list. # If left blank the directory from which doxygen is run is used as the # path to strip. STRIP_FROM_PATH = # The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of # the path mentioned in the documentation of a class, which tells # the reader which header file to include in order to use a class. # If left blank only the name of the header file containing the class # definition is used. Otherwise one should specify the include paths that # are normally passed to the compiler using the -I flag. STRIP_FROM_INC_PATH = # If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter # (but less readable) file names. This can be useful is your file systems # doesn't support long names like on DOS, Mac, or CD-ROM. SHORT_NAMES = NO # If the JAVADOC_AUTOBRIEF tag is set to YES then Doxygen # will interpret the first line (until the first dot) of a java_doc-style # comment as the brief description. If set to NO, the java_doc # comments will behave just like regular Qt-style comments # (thus requiring an explicit @brief command for a brief description.) JAVADOC_AUTOBRIEF = NO # If the QT_AUTOBRIEF tag is set to YES then Doxygen will # interpret the first line (until the first dot) of a Qt-style # comment as the brief description. If set to NO, the comments # will behave just like regular Qt-style comments (thus requiring # an explicit \brief command for a brief description.) QT_AUTOBRIEF = NO # The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make Doxygen # treat a multi-line C++ special comment block (i.e. a block of //! or /// # comments) as a brief description. This used to be the default behaviour. # The new default is to treat a multi-line C++ comment block as a detailed # description. Set this tag to YES if you prefer the old behaviour instead. MULTILINE_CPP_IS_BRIEF = NO # If the INHERIT_DOCS tag is set to YES (the default) then an undocumented # member inherits the documentation from any documented member that it # re-implements. INHERIT_DOCS = YES # If the SEPARATE_MEMBER_PAGES tag is set to YES, then doxygen will produce # a new page for each member. If set to NO, the documentation of a member will # be part of the file/class/namespace that contains it. SEPARATE_MEMBER_PAGES = NO # The TAB_SIZE tag can be used to set the number of spaces in a tab. # Doxygen uses this value to replace tabs by spaces in code fragments. TAB_SIZE = 4 # This tag can be used to specify a number of aliases that acts # as commands in the documentation. An alias has the form "name=value". # For example adding "sideeffect=\par Side Effects:\n" will allow you to # put the command \sideeffect (or @sideeffect) in the documentation, which # will result in a user-defined paragraph with heading "Side Effects:". # You can put \n's in the value part of an alias to insert newlines. ALIASES = # Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C # sources only. Doxygen will then generate output that is more tailored for C. # For instance, some of the names that are used will be different. The list # of all members will be omitted, etc. OPTIMIZE_OUTPUT_FOR_C = YES # Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java # sources only. Doxygen will then generate output that is more tailored for Java. # For instance, namespaces will be presented as packages, qualified scopes # will look different, etc. OPTIMIZE_OUTPUT_JAVA = NO # If you use STL classes (i.e. std::string, std::vector, etc.) but do not want to # include (a tag file for) the STL sources as input, then you should # set this tag to YES in order to let doxygen match functions declarations and # definitions whose arguments contain STL classes (e.g. func(std::string); v.s. # func(std::string) {}). This also make the inheritance and collaboration # diagrams that involve STL classes more complete and accurate. BUILTIN_STL_SUPPORT = NO # If you use Microsoft's C++/CLI language, you should set this option to YES to # enable parsing support. CPP_CLI_SUPPORT = NO # Set the SIP_SUPPORT tag to YES if your project consists of sip sources only. # Doxygen will parse them like normal C++ but will assume all classes use public # instead of private inheritance when no explicit protection keyword is present. SIP_SUPPORT = NO # If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC # tag is set to YES, then doxygen will reuse the documentation of the first # member in the group (if any) for the other members of the group. By default # all members of a group must be documented explicitly. DISTRIBUTE_GROUP_DOC = NO # Set the SUBGROUPING tag to YES (the defqault) to allow class member groups of # the same type (for instance a group of public functions) to be put as a # subgroup of that type (e.g. under the Public Functions section). Set it to # NO to prevent subgrouping. Alternatively, this can be done per class using # the \nosubgrouping command. SUBGROUPING = YES # When TYPEDEF_HIDES_STRUCT is enabled, a typedef of a struct (or union) is # documented as struct with the name of the typedef. So # typedef struct type_s {} type_t, will appear in the documentation as a struct # with name type_t. When disabled the typedef will appear as a member of a file, # namespace, or class. And the struct will be named type_s. This can typically # be useful for C code where the coding convention is that all structs are # typedef'ed and only the typedef is referenced never the struct's name. TYPEDEF_HIDES_STRUCT = NO #--------------------------------------------------------------------------- # Build related configuration options #--------------------------------------------------------------------------- # If the EXTRACT_ALL tag is set to YES doxygen will assume all entities in # documentation are documented, even if no documentation was available. # Private class members and static file members will be hidden unless # the EXTRACT_PRIVATE and EXTRACT_STATIC tags are set to YES EXTRACT_ALL = NO # If the EXTRACT_PRIVATE tag is set to YES all private members of a class # will be included in the documentation. EXTRACT_PRIVATE = NO # If the EXTRACT_STATIC tag is set to YES all static members of a file # will be included in the documentation. EXTRACT_STATIC = NO # If the EXTRACT_LOCAL_CLASSES tag is set to YES classes (and structs) # defined locally in source files will be included in the documentation. # If set to NO only classes defined in header files are included. EXTRACT_LOCAL_CLASSES = YES # This flag is only useful for Objective-C code. When set to YES local # methods, which are defined in the implementation section but not in # the interface are included in the documentation. # If set to NO (the default) only methods in the interface are included. EXTRACT_LOCAL_METHODS = NO # If this flag is set to YES, the members of anonymous namespaces will be extracted # and appear in the documentation as a namespace called 'anonymous_namespace{file}', # where file will be replaced with the base name of the file that contains the anonymous # namespace. By default anonymous namespace are hidden. EXTRACT_ANON_NSPACES = NO # If the HIDE_UNDOC_MEMBERS tag is set to YES, Doxygen will hide all # undocumented members of documented classes, files or namespaces. # If set to NO (the default) these members will be included in the # various overviews, but no documentation section is generated. # This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_MEMBERS = NO # If the HIDE_UNDOC_CLASSES tag is set to YES, Doxygen will hide all # undocumented classes that are normally visible in the class hierarchy. # If set to NO (the default) these classes will be included in the various # overviews. This option has no effect if EXTRACT_ALL is enabled. HIDE_UNDOC_CLASSES = NO # If the HIDE_FRIEND_COMPOUNDS tag is set to YES, Doxygen will hide all # friend (class|struct|union) declarations. # If set to NO (the default) these declarations will be included in the # documentation. HIDE_FRIEND_COMPOUNDS = NO # If the HIDE_IN_BODY_DOCS tag is set to YES, Doxygen will hide any # documentation blocks found inside the body of a function. # If set to NO (the default) these blocks will be appended to the # function's detailed documentation block. HIDE_IN_BODY_DOCS = NO # The INTERNAL_DOCS tag determines if documentation # that is typed after a \internal command is included. If the tag is set # to NO (the default) then the documentation will be excluded. # Set it to YES to include the internal documentation. INTERNAL_DOCS = NO # If the CASE_SENSE_NAMES tag is set to NO then Doxygen will only generate # file names in lower-case letters. If set to YES upper-case letters are also # allowed. This is useful if you have classes or files whose names only differ # in case and if your file system supports case sensitive file names. Windows # and Mac users are advised to set this option to NO. CASE_SENSE_NAMES = YES # If the HIDE_SCOPE_NAMES tag is set to NO (the default) then Doxygen # will show members with their full class and namespace scopes in the # documentation. If set to YES the scope will be hidden. HIDE_SCOPE_NAMES = NO # If the SHOW_INCLUDE_FILES tag is set to YES (the default) then Doxygen # will put a list of the files that are included by a file in the documentation # of that file. SHOW_INCLUDE_FILES = YES # If the INLINE_INFO tag is set to YES (the default) then a tag [inline] # is inserted in the documentation for inline members. INLINE_INFO = YES # If the SORT_MEMBER_DOCS tag is set to YES (the default) then doxygen # will sort the (detailed) documentation of file and class members # alphabetically by member name. If set to NO the members will appear in # declaration order. SORT_MEMBER_DOCS = NO # If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the # brief documentation of file, namespace and class members alphabetically # by member name. If set to NO (the default) the members will appear in # declaration order. SORT_BRIEF_DOCS = NO # If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be # sorted by fully-qualified names, including namespaces. If set to # NO (the default), the class list will be sorted only by class name, # not including the namespace part. # Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES. # Note: This option applies only to the class list, not to the # alphabetical list. SORT_BY_SCOPE_NAME = NO # The GENERATE_TODOLIST tag can be used to enable (YES) or # disable (NO) the todo list. This list is created by putting \todo # commands in the documentation. GENERATE_TODOLIST = YES # The GENERATE_TESTLIST tag can be used to enable (YES) or # disable (NO) the test list. This list is created by putting \test # commands in the documentation. GENERATE_TESTLIST = YES # The GENERATE_BUGLIST tag can be used to enable (YES) or # disable (NO) the bug list. This list is created by putting \bug # commands in the documentation. GENERATE_BUGLIST = YES # The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or # disable (NO) the deprecated list. This list is created by putting # \deprecated commands in the documentation. GENERATE_DEPRECATEDLIST= YES # The ENABLED_SECTIONS tag can be used to enable conditional # documentation sections, marked by \if sectionname ... \endif. ENABLED_SECTIONS = # The MAX_INITIALIZER_LINES tag determines the maximum number of lines # the initial value of a variable or define consists of for it to appear in # the documentation. If the initializer consists of more lines than specified # here it will be hidden. Use a value of 0 to hide initializers completely. # The appearance of the initializer of individual variables and defines in the # documentation can be controlled using \showinitializer or \hideinitializer # command in the documentation regardless of this setting. MAX_INITIALIZER_LINES = 30 # Set the SHOW_USED_FILES tag to NO to disable the list of files generated # at the bottom of the documentation of classes and structs. If set to YES the # list will mention the files that were used to generate the documentation. SHOW_USED_FILES = YES # The FILE_VERSION_FILTER tag can be used to specify a program or script that # doxygen should invoke to get the current version for each file (typically from the # version control system). Doxygen will invoke the program by executing (via # popen()) the command , where is the value of # the FILE_VERSION_FILTER tag, and is the name of an input file # provided by doxygen. Whatever the program writes to standard output # is used as the file version. See the manual for examples. FILE_VERSION_FILTER = #--------------------------------------------------------------------------- # configuration options related to warning and progress messages #--------------------------------------------------------------------------- # The QUIET tag can be used to turn on/off the messages that are generated # by doxygen. Possible values are YES and NO. If left blank NO is used. QUIET = YES # The WARNINGS tag can be used to turn on/off the warning messages that are # generated by doxygen. Possible values are YES and NO. If left blank # NO is used. WARNINGS = YES # If WARN_IF_UNDOCUMENTED is set to YES, then doxygen will generate warnings # for undocumented members. If EXTRACT_ALL is set to YES then this flag will # automatically be disabled. WARN_IF_UNDOCUMENTED = YES # If WARN_IF_DOC_ERROR is set to YES, doxygen will generate warnings for # potential errors in the documentation, such as not documenting some # parameters in a documented function, or documenting parameters that # don't exist or using markup commands wrongly. WARN_IF_DOC_ERROR = YES # This WARN_NO_PARAMDOC option can be abled to get warnings for # functions that are documented, but have no documentation for their parameters # or return value. If set to NO (the default) doxygen will only warn about # wrong or incomplete parameter documentation, but not about the absence of # documentation. WARN_NO_PARAMDOC = NO # The WARN_FORMAT tag determines the format of the warning messages that # doxygen can produce. The string should contain the $file, $line, and $text # tags, which will be replaced by the file and line number from which the # warning originated and the warning text. Optionally the format may contain # $version, which will be replaced by the version of the file (if it could # be obtained via FILE_VERSION_FILTER) WARN_FORMAT = "$file:$line: $text" # The WARN_LOGFILE tag can be used to specify a file to which warning # and error messages should be written. If left blank the output is written # to stderr. WARN_LOGFILE = #--------------------------------------------------------------------------- # configuration options related to the input files #--------------------------------------------------------------------------- # The INPUT tag can be used to specify the files and/or directories that contain # documented source files. You may enter file names like "myfile.cpp" or # directories like "/usr/src/myproject". Separate the files or directories # with spaces. INPUT = # This tag can be used to specify the character encoding of the source files that # doxygen parses. Internally doxygen uses the UTF-8 encoding, which is also the default # input encoding. Doxygen uses libiconv (or the iconv built into libc) for the transcoding. # See http://www.gnu.org/software/libiconv for the list of possible encodings. INPUT_ENCODING = UTF-8 # If the value of the INPUT tag contains directories, you can use the # FILE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank the following patterns are tested: # *.c *.cc *.cxx *.cpp *.c++ *.java *.ii *.ixx *.ipp *.i++ *.inl *.h *.hh *.hxx # *.hpp *.h++ *.idl *.odl *.cs *.php *.php3 *.inc *.m *.mm *.py *.f90 FILE_PATTERNS = # The RECURSIVE tag can be used to turn specify whether or not subdirectories # should be searched for input files as well. Possible values are YES and NO. # If left blank NO is used. RECURSIVE = NO # The EXCLUDE tag can be used to specify files and/or directories that should # excluded from the INPUT source files. This way you can easily exclude a # subdirectory from a directory tree whose root is specified with the INPUT tag. EXCLUDE = # The EXCLUDE_SYMLINKS tag can be used select whether or not files or # directories that are symbolic links (a Unix filesystem feature) are excluded # from the input. EXCLUDE_SYMLINKS = NO # If the value of the INPUT tag contains directories, you can use the # EXCLUDE_PATTERNS tag to specify one or more wildcard patterns to exclude # certain files from those directories. Note that the wildcards are matched # against the file with absolute path, so to exclude all test directories # for example use the pattern */test/* EXCLUDE_PATTERNS = # The EXCLUDE_SYMBOLS tag can be used to specify one or more symbol names # (namespaces, classes, functions, etc.) that should be excluded from the output. # The symbol name can be a fully qualified name, a word, or if the wildcard * is used, # a substring. Examples: ANamespace, AClass, AClass::ANamespace, ANamespace::*Test EXCLUDE_SYMBOLS = # The EXAMPLE_PATH tag can be used to specify one or more files or # directories that contain example code fragments that are included (see # the \include command). EXAMPLE_PATH = # If the value of the EXAMPLE_PATH tag contains directories, you can use the # EXAMPLE_PATTERNS tag to specify one or more wildcard pattern (like *.cpp # and *.h) to filter out the source-files in the directories. If left # blank all files are included. EXAMPLE_PATTERNS = # If the EXAMPLE_RECURSIVE tag is set to YES then subdirectories will be # searched for input files to be used with the \include or \dontinclude # commands irrespective of the value of the RECURSIVE tag. # Possible values are YES and NO. If left blank NO is used. EXAMPLE_RECURSIVE = NO # The IMAGE_PATH tag can be used to specify one or more files or # directories that contain image that are included in the documentation (see # the \image command). IMAGE_PATH = # The INPUT_FILTER tag can be used to specify a program that doxygen should # invoke to filter for each input file. Doxygen will invoke the filter program # by executing (via popen()) the command , where # is the value of the INPUT_FILTER tag, and is the name of an # input file. Doxygen will then use the output that the filter program writes # to standard output. If FILTER_PATTERNS is specified, this tag will be # ignored. INPUT_FILTER = # The FILTER_PATTERNS tag can be used to specify filters on a per file pattern # basis. Doxygen will compare the file name with each pattern and apply the # filter if there is a match. The filters are a list of the form: # pattern=filter (like *.cpp=my_cpp_filter). See INPUT_FILTER for further # info on how filters are used. If FILTER_PATTERNS is empty, INPUT_FILTER # is applied to all files. FILTER_PATTERNS = # If the FILTER_SOURCE_FILES tag is set to YES, the input filter (if set using # INPUT_FILTER) will be used to filter the input files when producing source # files to browse (i.e. when SOURCE_BROWSER is set to YES). FILTER_SOURCE_FILES = NO #--------------------------------------------------------------------------- # configuration options related to source browsing #--------------------------------------------------------------------------- # If the SOURCE_BROWSER tag is set to YES then a list of source files will # be generated. Documented entities will be cross-referenced with these sources. # Note: To get rid of all source code in the generated output, make sure also # VERBATIM_HEADERS is set to NO. If you have enabled CALL_GRAPH or CALLER_GRAPH # then you must also enable this option. If you don't then doxygen will produce # a warning and turn it on anyway SOURCE_BROWSER = NO # Setting the INLINE_SOURCES tag to YES will include the body # of functions and classes directly in the documentation. INLINE_SOURCES = NO # Setting the STRIP_CODE_COMMENTS tag to YES (the default) will instruct # doxygen to hide any special comment blocks from generated source code # fragments. Normal C and C++ comments will always remain visible. STRIP_CODE_COMMENTS = YES # If the REFERENCED_BY_RELATION tag is set to YES (the default) # then for each documented function all documented # functions referencing it will be listed. REFERENCED_BY_RELATION = YES # If the REFERENCES_RELATION tag is set to YES (the default) # then for each documented function all documented entities # called/used by that function will be listed. REFERENCES_RELATION = YES # If the REFERENCES_LINK_SOURCE tag is set to YES (the default) # and SOURCE_BROWSER tag is set to YES, then the hyperlinks from # functions in REFERENCES_RELATION and REFERENCED_BY_RELATION lists will # link to the source code. Otherwise they will link to the documentstion. REFERENCES_LINK_SOURCE = YES # If the USE_HTAGS tag is set to YES then the references to source code # will point to the HTML generated by the htags(1) tool instead of doxygen # built-in source browser. The htags tool is part of GNU's global source # tagging system (see http://www.gnu.org/software/global/global.html). You # will need version 4.8.6 or higher. USE_HTAGS = NO # If the VERBATIM_HEADERS tag is set to YES (the default) then Doxygen # will generate a verbatim copy of the header file for each class for # which an include is specified. Set to NO to disable this. VERBATIM_HEADERS = YES #--------------------------------------------------------------------------- # configuration options related to the alphabetical class index #--------------------------------------------------------------------------- # If the ALPHABETICAL_INDEX tag is set to YES, an alphabetical index # of all compounds will be generated. Enable this if the project # contains a lot of classes, structs, unions or interfaces. ALPHABETICAL_INDEX = NO # If the alphabetical index is enabled (see ALPHABETICAL_INDEX) then # the COLS_IN_ALPHA_INDEX tag can be used to specify the number of columns # in which this list will be split (can be a number in the range [1..20]) COLS_IN_ALPHA_INDEX = 5 # In case all classes in a project start with a common prefix, all # classes will be put under the same header in the alphabetical index. # The IGNORE_PREFIX tag can be used to specify one or more prefixes that # should be ignored while generating the index headers. IGNORE_PREFIX = #--------------------------------------------------------------------------- # configuration options related to the HTML output #--------------------------------------------------------------------------- # If the GENERATE_HTML tag is set to YES (the default) Doxygen will # generate HTML output. GENERATE_HTML = YES # The HTML_OUTPUT tag is used to specify where the HTML docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `html' will be used as the default path. HTML_OUTPUT = html # The HTML_FILE_EXTENSION tag can be used to specify the file extension for # each generated HTML page (for example: .htm,.php,.asp). If it is left blank # doxygen will generate files with .html extension. HTML_FILE_EXTENSION = .html # The HTML_HEADER tag can be used to specify a personal HTML header for # each generated HTML page. If it is left blank doxygen will generate a # standard header. HTML_HEADER = # The HTML_FOOTER tag can be used to specify a personal HTML footer for # each generated HTML page. If it is left blank doxygen will generate a # standard footer. HTML_FOOTER = # The HTML_STYLESHEET tag can be used to specify a user-defined cascading # style sheet that is used by each HTML page. It can be used to # fine-tune the look of the HTML output. If the tag is left blank doxygen # will generate a default style sheet. Note that doxygen will try to copy # the style sheet file to the HTML output directory, so don't put your own # stylesheet in the HTML output directory as well, or it will be erased! HTML_STYLESHEET = # If the GENERATE_HTMLHELP tag is set to YES, additional index files # will be generated that can be used as input for tools like the # Microsoft HTML help workshop to generate a compressed HTML help file (.chm) # of the generated HTML documentation. GENERATE_HTMLHELP = NO # If the HTML_DYNAMIC_SECTIONS tag is set to YES then the generated HTML # documentation will contain sections that can be hidden and shown after the # page has loaded. For this to work a browser that supports # java_script and DHTML is required (for instance Mozilla 1.0+, Firefox # Netscape 6.0+, Internet explorer 5.0+, Konqueror, or Safari). HTML_DYNAMIC_SECTIONS = NO # If the GENERATE_HTMLHELP tag is set to YES, the CHM_FILE tag can # be used to specify the file name of the resulting .chm file. You # can add a path in front of the file if the result should not be # written to the html output directory. CHM_FILE = # If the GENERATE_HTMLHELP tag is set to YES, the HHC_LOCATION tag can # be used to specify the location (absolute path including file name) of # the HTML help compiler (hhc.exe). If non-empty doxygen will try to run # the HTML help compiler on the generated index.hhp. HHC_LOCATION = # If the GENERATE_HTMLHELP tag is set to YES, the GENERATE_CHI flag # controls if a separate .chi index file is generated (YES) or that # it should be included in the master .chm file (NO). GENERATE_CHI = NO # If the GENERATE_HTMLHELP tag is set to YES, the BINARY_TOC flag # controls whether a binary table of contents is generated (YES) or a # normal table of contents (NO) in the .chm file. BINARY_TOC = NO # The TOC_EXPAND flag can be set to YES to add extra items for group members # to the contents of the HTML help documentation and to the tree view. TOC_EXPAND = NO # The DISABLE_INDEX tag can be used to turn on/off the condensed index at # top of each HTML page. The value NO (the default) enables the index and # the value YES disables it. DISABLE_INDEX = NO # This tag can be used to set the number of enum values (range [1..20]) # that doxygen will group on one line in the generated HTML documentation. ENUM_VALUES_PER_LINE = 4 # If the GENERATE_TREEVIEW tag is set to YES, a side panel will be # generated containing a tree-like index structure (just like the one that # is generated for HTML Help). For this to work a browser that supports # java_script, DHTML, CSS and frames is required (for instance Mozilla 1.0+, # Netscape 6.0+, Internet explorer 5.0+, or Konqueror). Windows users are # probably better off using the HTML help feature. GENERATE_TREEVIEW = NO # If the treeview is enabled (see GENERATE_TREEVIEW) then this tag can be # used to set the initial width (in pixels) of the frame in which the tree # is shown. TREEVIEW_WIDTH = 250 #--------------------------------------------------------------------------- # configuration options related to the la_te_x output #--------------------------------------------------------------------------- # If the GENERATE_LATEX tag is set to YES (the default) Doxygen will # generate Latex output. GENERATE_LATEX = YES # The LATEX_OUTPUT tag is used to specify where the la_te_x docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `latex' will be used as the default path. LATEX_OUTPUT = latex # The LATEX_CMD_NAME tag can be used to specify the la_te_x command name to be # invoked. If left blank `latex' will be used as the default command name. LATEX_CMD_NAME = latex # The MAKEINDEX_CMD_NAME tag can be used to specify the command name to # generate index for la_te_x. If left blank `makeindex' will be used as the # default command name. MAKEINDEX_CMD_NAME = makeindex # If the COMPACT_LATEX tag is set to YES Doxygen generates more compact # la_te_x documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_LATEX = YES # The PAPER_TYPE tag can be used to set the paper type that is used # by the printer. Possible values are: a4, a4wide, letter, legal and # executive. If left blank a4wide will be used. PAPER_TYPE = letter # The EXTRA_PACKAGES tag can be to specify one or more names of la_te_x # packages that should be included in the la_te_x output. EXTRA_PACKAGES = # The LATEX_HEADER tag can be used to specify a personal la_te_x header for # the generated latex document. The header should contain everything until # the first chapter. If it is left blank doxygen will generate a # standard header. Notice: only use this tag if you know what you are doing! LATEX_HEADER = # If the PDF_HYPERLINKS tag is set to YES, the la_te_x that is generated # is prepared for conversion to pdf (using ps2pdf). The pdf file will # contain links (just like the HTML output) instead of page references # This makes the output suitable for online browsing using a pdf viewer. PDF_HYPERLINKS = YES # If the USE_PDFLATEX tag is set to YES, pdflatex will be used instead of # plain latex in the generated Makefile. Set this option to YES to get a # higher quality PDF documentation. USE_PDFLATEX = YES # If the LATEX_BATCHMODE tag is set to YES, doxygen will add the \\batchmode. # command to the generated la_te_x files. This will instruct la_te_x to keep # running if errors occur, instead of asking the user for help. # This option is also used when generating formulas in HTML. LATEX_BATCHMODE = NO # If LATEX_HIDE_INDICES is set to YES then doxygen will not # include the index chapters (such as File Index, Compound Index, etc.) # in the output. LATEX_HIDE_INDICES = NO #--------------------------------------------------------------------------- # configuration options related to the RTF output #--------------------------------------------------------------------------- # If the GENERATE_RTF tag is set to YES Doxygen will generate RTF output # The RTF output is optimized for Word 97 and may not look very pretty with # other RTF readers or editors. GENERATE_RTF = NO # The RTF_OUTPUT tag is used to specify where the RTF docs will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `rtf' will be used as the default path. RTF_OUTPUT = rtf # If the COMPACT_RTF tag is set to YES Doxygen generates more compact # RTF documents. This may be useful for small projects and may help to # save some trees in general. COMPACT_RTF = NO # If the RTF_HYPERLINKS tag is set to YES, the RTF that is generated # will contain hyperlink fields. The RTF file will # contain links (just like the HTML output) instead of page references. # This makes the output suitable for online browsing using WORD or other # programs which support those fields. # Note: wordpad (write) and others do not support links. RTF_HYPERLINKS = NO # Load stylesheet definitions from file. Syntax is similar to doxygen's # config file, i.e. a series of assignments. You only have to provide # replacements, missing definitions are set to their default value. RTF_STYLESHEET_FILE = # Set optional variables used in the generation of an rtf document. # Syntax is similar to doxygen's config file. RTF_EXTENSIONS_FILE = #--------------------------------------------------------------------------- # configuration options related to the man page output #--------------------------------------------------------------------------- # If the GENERATE_MAN tag is set to YES (the default) Doxygen will # generate man pages GENERATE_MAN = NO # The MAN_OUTPUT tag is used to specify where the man pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `man' will be used as the default path. MAN_OUTPUT = man # The MAN_EXTENSION tag determines the extension that is added to # the generated man pages (default is the subroutine's section .3) MAN_EXTENSION = .3 # If the MAN_LINKS tag is set to YES and Doxygen generates man output, # then it will generate one additional man file for each entity # documented in the real man page(s). These additional files # only source the real man page, but without them the man command # would be unable to find the correct page. The default is NO. MAN_LINKS = YES #--------------------------------------------------------------------------- # configuration options related to the XML output #--------------------------------------------------------------------------- # If the GENERATE_XML tag is set to YES Doxygen will # generate an XML file that captures the structure of # the code including all documentation. GENERATE_XML = NO # The XML_OUTPUT tag is used to specify where the XML pages will be put. # If a relative path is entered the value of OUTPUT_DIRECTORY will be # put in front of it. If left blank `xml' will be used as the default path. XML_OUTPUT = xml # If the XML_PROGRAMLISTING tag is set to YES Doxygen will # dump the program listings (including syntax highlighting # and cross-referencing information) to the XML output. Note that # enabling this will significantly increase the size of the XML output. XML_PROGRAMLISTING = YES #--------------------------------------------------------------------------- # configuration options for the auto_gen Definitions output #--------------------------------------------------------------------------- # If the GENERATE_AUTOGEN_DEF tag is set to YES Doxygen will # generate an auto_gen Definitions (see autogen.sf.net) file # that captures the structure of the code including all # documentation. Note that this feature is still experimental # and incomplete at the moment. GENERATE_AUTOGEN_DEF = NO #--------------------------------------------------------------------------- # configuration options related to the Perl module output #--------------------------------------------------------------------------- # If the GENERATE_PERLMOD tag is set to YES Doxygen will # generate a Perl module file that captures the structure of # the code including all documentation. Note that this # feature is still experimental and incomplete at the # moment. GENERATE_PERLMOD = NO # If the PERLMOD_LATEX tag is set to YES Doxygen will generate # the necessary Makefile rules, Perl scripts and la_te_x code to be able # to generate PDF and DVI output from the Perl module output. PERLMOD_LATEX = NO # If the PERLMOD_PRETTY tag is set to YES the Perl module output will be # nicely formatted so it can be parsed by a human reader. This is useful # if you want to understand what is going on. On the other hand, if this # tag is set to NO the size of the Perl module output will be much smaller # and Perl will parse it just the same. PERLMOD_PRETTY = YES # The names of the make variables in the generated doxyrules.make file # are prefixed with the string contained in PERLMOD_MAKEVAR_PREFIX. # This is useful so different doxyrules.make files included by the same # Makefile don't overwrite each other's variables. PERLMOD_MAKEVAR_PREFIX = #--------------------------------------------------------------------------- # Configuration options related to the preprocessor #--------------------------------------------------------------------------- # If the ENABLE_PREPROCESSING tag is set to YES (the default) Doxygen will # evaluate all C-preprocessor directives found in the sources and include # files. ENABLE_PREPROCESSING = YES # If the MACRO_EXPANSION tag is set to YES Doxygen will expand all macro # names in the source code. If set to NO (the default) only conditional # compilation will be performed. Macro expansion can be done in a controlled # way by setting EXPAND_ONLY_PREDEF to YES. MACRO_EXPANSION = YES # If the EXPAND_ONLY_PREDEF and MACRO_EXPANSION tags are both set to YES # then the macro expansion is limited to the macros specified with the # PREDEFINED and EXPAND_AS_DEFINED tags. EXPAND_ONLY_PREDEF = NO # If the SEARCH_INCLUDES tag is set to YES (the default) the includes files # in the INCLUDE_PATH (see below) will be search if a #include is found. SEARCH_INCLUDES = YES # The INCLUDE_PATH tag can be used to specify one or more directories that # contain include files that are not input files but should be processed by # the preprocessor. INCLUDE_PATH = # You can use the INCLUDE_FILE_PATTERNS tag to specify one or more wildcard # patterns (like *.h and *.hpp) to filter out the header-files in the # directories. If left blank, the patterns specified with FILE_PATTERNS will # be used. INCLUDE_FILE_PATTERNS = *.h # The PREDEFINED tag can be used to specify one or more macro names that # are defined before the preprocessor is started (similar to the -D option of # gcc). The argument of the tag is a list of macros of the form: name # or name=definition (no spaces). If the definition and the = are # omitted =1 is assumed. To prevent a macro definition from being # undefined via #undef or recursively expanded use the := operator # instead of the = operator. PREDEFINED = # If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then # this tag can be used to specify a list of macro names that should be expanded. # The macro definition that is found in the sources will be used. # Use the PREDEFINED tag if you want to use a different macro definition. EXPAND_AS_DEFINED = # If the SKIP_FUNCTION_MACROS tag is set to YES (the default) then # doxygen's preprocessor will remove all function-like macros that are alone # on a line, have an all uppercase name, and do not end with a semicolon. Such # function macros are typically used for boiler-plate code, and will confuse # the parser if not removed. SKIP_FUNCTION_MACROS = YES #--------------------------------------------------------------------------- # Configuration::additions related to external references #--------------------------------------------------------------------------- # The TAGFILES option can be used to specify one or more tagfiles. # Optionally an initial location of the external documentation # can be added for each tagfile. The format of a tag file without # this location is as follows: # TAGFILES = file1 file2 ... # Adding location for the tag files is done as follows: # TAGFILES = file1=loc1 "file2 = loc2" ... # where "loc1" and "loc2" can be relative or absolute paths or # URLs. If a location is present for each tag, the installdox tool # does not have to be run to correct the links. # Note that each tag file must have a unique name # (where the name does NOT include the path) # If a tag file is not located in the directory in which doxygen # is run, you must also specify the path to the tagfile here. TAGFILES = # When a file name is specified after GENERATE_TAGFILE, doxygen will create # a tag file that is based on the input files it reads. GENERATE_TAGFILE = # If the ALLEXTERNALS tag is set to YES all external classes will be listed # in the class index. If set to NO only the inherited external classes # will be listed. ALLEXTERNALS = NO # If the EXTERNAL_GROUPS tag is set to YES all external groups will be listed # in the modules index. If set to NO, only the current project's groups will # be listed. EXTERNAL_GROUPS = YES # The PERL_PATH should be the absolute path and name of the perl script # interpreter (i.e. the result of `which perl'). PERL_PATH = /usr/bin/perl #--------------------------------------------------------------------------- # Configuration options related to the dot tool #--------------------------------------------------------------------------- # If the CLASS_DIAGRAMS tag is set to YES (the default) Doxygen will # generate a inheritance diagram (in HTML, RTF and la_te_x) for classes with base # or super classes. Setting the tag to NO turns the diagrams off. Note that # this option is superseded by the HAVE_DOT option below. This is only a # fallback. It is recommended to install and use dot, since it yields more # powerful graphs. CLASS_DIAGRAMS = YES # You can define message sequence charts within doxygen comments using the \msc # command. Doxygen will then run the mscgen tool (see http://www.mcternan.me.uk/mscgen/) to # produce the chart and insert it in the documentation. The MSCGEN_PATH tag allows you to # specify the directory where the mscgen tool resides. If left empty the tool is assumed to # be found in the default search path. MSCGEN_PATH = # If set to YES, the inheritance and collaboration graphs will hide # inheritance and usage relations if the target is undocumented # or is not a class. HIDE_UNDOC_RELATIONS = YES # If you set the HAVE_DOT tag to YES then doxygen will assume the dot tool is # available from the path. This tool is part of Graphviz, a graph visualization # toolkit from AT&T and Lucent Bell Labs. The other options in this section # have no effect if this option is set to NO (the default) HAVE_DOT = NO # If the CLASS_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect inheritance relations. Setting this tag to YES will force the # the CLASS_DIAGRAMS tag to NO. CLASS_GRAPH = YES # If the COLLABORATION_GRAPH and HAVE_DOT tags are set to YES then doxygen # will generate a graph for each documented class showing the direct and # indirect implementation dependencies (inheritance, containment, and # class references variables) of the class with other documented classes. COLLABORATION_GRAPH = YES # If the GROUP_GRAPHS and HAVE_DOT tags are set to YES then doxygen # will generate a graph for groups, showing the direct groups dependencies GROUP_GRAPHS = YES # If the UML_LOOK tag is set to YES doxygen will generate inheritance and # collaboration diagrams in a style similar to the OMG's Unified Modeling # Language. UML_LOOK = NO # If set to YES, the inheritance and collaboration graphs will show the # relations between templates and their instances. TEMPLATE_RELATIONS = NO # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDE_GRAPH, and HAVE_DOT # tags are set to YES then doxygen will generate a graph for each documented # file showing the direct and indirect include dependencies of the file with # other documented files. INCLUDE_GRAPH = YES # If the ENABLE_PREPROCESSING, SEARCH_INCLUDES, INCLUDED_BY_GRAPH, and # HAVE_DOT tags are set to YES then doxygen will generate a graph for each # documented header file showing the documented files that directly or # indirectly include this file. INCLUDED_BY_GRAPH = YES # If the CALL_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will # generate a call dependency graph for every global function or class method. # Note that enabling this option will significantly increase the time of a run. # So in most cases it will be better to enable call graphs for selected # functions only using the \callgraph command. CALL_GRAPH = NO # If the CALLER_GRAPH, SOURCE_BROWSER and HAVE_DOT tags are set to YES then doxygen will # generate a caller dependency graph for every global function or class method. # Note that enabling this option will significantly increase the time of a run. # So in most cases it will be better to enable caller graphs for selected # functions only using the \callergraph command. CALLER_GRAPH = NO # If the GRAPHICAL_HIERARCHY and HAVE_DOT tags are set to YES then doxygen # will graphical hierarchy of all classes instead of a textual one. GRAPHICAL_HIERARCHY = YES # If the DIRECTORY_GRAPH, SHOW_DIRECTORIES and HAVE_DOT tags are set to YES # then doxygen will show the dependencies a directory has on other directories # in a graphical way. The dependency relations are determined by the #include # relations between the files in the directories. DIRECTORY_GRAPH = YES # The DOT_IMAGE_FORMAT tag can be used to set the image format of the images # generated by dot. Possible values are png, jpg, or gif # If left blank png will be used. DOT_IMAGE_FORMAT = png # The tag DOT_PATH can be used to specify the path where the dot tool can be # found. If left blank, it is assumed the dot tool can be found in the path. DOT_PATH = # The DOTFILE_DIRS tag can be used to specify one or more directories that # contain dot files that are included in the documentation (see the # \dotfile command). DOTFILE_DIRS = # The MAX_DOT_GRAPH_MAX_NODES tag can be used to set the maximum number of # nodes that will be shown in the graph. If the number of nodes in a graph # becomes larger than this value, doxygen will truncate the graph, which is # visualized by representing a node as a red box. Note that doxygen if the number # of direct children of the root node in a graph is already larger than # MAX_DOT_GRAPH_NOTES then the graph will not be shown at all. Also note # that the size of a graph can be further restricted by MAX_DOT_GRAPH_DEPTH. DOT_GRAPH_MAX_NODES = 50 # The MAX_DOT_GRAPH_DEPTH tag can be used to set the maximum depth of the # graphs generated by dot. A depth value of 3 means that only nodes reachable # from the root by following a path via at most 3 edges will be shown. Nodes # that lay further from the root node will be omitted. Note that setting this # option to 1 or 2 may greatly reduce the computation time needed for large # code bases. Also note that the size of a graph can be further restricted by # DOT_GRAPH_MAX_NODES. Using a depth of 0 means no depth restriction. MAX_DOT_GRAPH_DEPTH = 0 # Set the DOT_TRANSPARENT tag to YES to generate images with a transparent # background. This is disabled by default, which results in a white background. # Warning: Depending on the platform used, enabling this option may lead to # badly anti-aliased labels on the edges of a graph (i.e. they become hard to # read). DOT_TRANSPARENT = YES # Set the DOT_MULTI_TARGETS tag to YES allow dot to generate multiple output # files in one run (i.e. multiple -o and -T options on the command line). This # makes dot run faster, but since only newer versions of dot (>1.8.10) # support this, this feature is disabled by default. DOT_MULTI_TARGETS = NO # If the GENERATE_LEGEND tag is set to YES (the default) Doxygen will # generate a legend page explaining the meaning of the various boxes and # arrows in the dot generated graphs. GENERATE_LEGEND = YES # If the DOT_CLEANUP tag is set to YES (the default) Doxygen will # remove the intermediate dot files that are used to generate # the various graphs. DOT_CLEANUP = YES #--------------------------------------------------------------------------- # Configuration::additions related to the search engine #--------------------------------------------------------------------------- # The SEARCHENGINE tag specifies whether or not a search engine should be # used. If set to NO the values of all tags below this one will be ignored. SEARCHENGINE = NO libvpx-1.8.2/libs.mk000066400000000000000000000546571357355204000143350ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # ARM assembly files are written in RVCT-style. We use some make magic to # filter those files to allow GCC compilation ifeq ($(VPX_ARCH_ARM),yes) ASM:=$(if $(filter yes,$(CONFIG_GCC)$(CONFIG_MSVS)),.asm.S,.asm) else ASM:=.asm endif # # Rule to generate runtime cpu detection files # define rtcd_h_template $$(BUILD_PFX)$(1).h: $$(SRC_PATH_BARE)/$(2) @echo " [CREATE] $$@" $$(qexec)$$(SRC_PATH_BARE)/build/make/rtcd.pl --arch=$$(TGT_ISA) \ --sym=$(1) \ --config=$$(CONFIG_DIR)$$(target)-$$(TOOLCHAIN).mk \ $$(RTCD_OPTIONS) $$^ > $$@ CLEAN-OBJS += $$(BUILD_PFX)$(1).h RTCD += $$(BUILD_PFX)$(1).h endef CODEC_SRCS-yes += CHANGELOG CODEC_SRCS-yes += libs.mk include $(SRC_PATH_BARE)/vpx/vpx_codec.mk CODEC_SRCS-yes += $(addprefix vpx/,$(call enabled,API_SRCS)) CODEC_DOC_SRCS += $(addprefix vpx/,$(call enabled,API_DOC_SRCS)) include $(SRC_PATH_BARE)/vpx_mem/vpx_mem.mk CODEC_SRCS-yes += $(addprefix vpx_mem/,$(call enabled,MEM_SRCS)) include $(SRC_PATH_BARE)/vpx_scale/vpx_scale.mk CODEC_SRCS-yes += $(addprefix vpx_scale/,$(call enabled,SCALE_SRCS)) include $(SRC_PATH_BARE)/vpx_ports/vpx_ports.mk CODEC_SRCS-yes += $(addprefix vpx_ports/,$(call enabled,PORTS_SRCS)) include $(SRC_PATH_BARE)/vpx_dsp/vpx_dsp.mk CODEC_SRCS-yes += $(addprefix vpx_dsp/,$(call enabled,DSP_SRCS)) include $(SRC_PATH_BARE)/vpx_util/vpx_util.mk CODEC_SRCS-yes += $(addprefix vpx_util/,$(call enabled,UTIL_SRCS)) ifeq ($(CONFIG_VP8),yes) VP8_PREFIX=vp8/ include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8_common.mk endif ifeq ($(CONFIG_VP8_ENCODER),yes) include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8cx.mk CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_CX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_CX_EXPORTS)) INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% CODEC_DOC_SECTIONS += vp8 vp8_encoder endif ifeq ($(CONFIG_VP8_DECODER),yes) include $(SRC_PATH_BARE)/$(VP8_PREFIX)vp8dx.mk CODEC_SRCS-yes += $(addprefix $(VP8_PREFIX),$(call enabled,VP8_DX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP8_PREFIX),$(VP8_DX_EXPORTS)) INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP8_PREFIX)/% CODEC_DOC_SECTIONS += vp8 vp8_decoder endif ifeq ($(CONFIG_VP9),yes) VP9_PREFIX=vp9/ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9_common.mk endif ifeq ($(CONFIG_VP9_ENCODER),yes) VP9_PREFIX=vp9/ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9cx.mk CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_CX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_CX_EXPORTS)) CODEC_SRCS-yes += $(VP9_PREFIX)vp9cx.mk vpx/vp8.h vpx/vp8cx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8cx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8cx.h CODEC_DOC_SECTIONS += vp9 vp9_encoder endif ifeq ($(CONFIG_VP9_DECODER),yes) VP9_PREFIX=vp9/ include $(SRC_PATH_BARE)/$(VP9_PREFIX)vp9dx.mk CODEC_SRCS-yes += $(addprefix $(VP9_PREFIX),$(call enabled,VP9_DX_SRCS)) CODEC_EXPORTS-yes += $(addprefix $(VP9_PREFIX),$(VP9_DX_EXPORTS)) CODEC_SRCS-yes += $(VP9_PREFIX)vp9dx.mk vpx/vp8.h vpx/vp8dx.h INSTALL-LIBS-yes += include/vpx/vp8.h include/vpx/vp8dx.h INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/$(VP9_PREFIX)/% CODEC_DOC_SRCS += vpx/vp8.h vpx/vp8dx.h CODEC_DOC_SECTIONS += vp9 vp9_decoder endif ifeq ($(CONFIG_ENCODERS),yes) CODEC_DOC_SECTIONS += encoder endif ifeq ($(CONFIG_DECODERS),yes) CODEC_DOC_SECTIONS += decoder endif ifeq ($(CONFIG_MSVS),yes) CODEC_LIB=$(if $(CONFIG_STATIC_MSVCRT),vpxmt,vpxmd) GTEST_LIB=$(if $(CONFIG_STATIC_MSVCRT),gtestmt,gtestmd) # This variable uses deferred expansion intentionally, since the results of # $(wildcard) may change during the course of the Make. VS_PLATFORMS = $(foreach d,$(wildcard */Release/$(CODEC_LIB).lib),$(word 1,$(subst /, ,$(d)))) endif # The following pairs define a mapping of locations in the distribution # tree to locations in the source/build trees. INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/vpx/% INSTALL_MAPS += include/vpx/% $(SRC_PATH_BARE)/vpx_ports/% INSTALL_MAPS += $(LIBSUBDIR)/% % INSTALL_MAPS += src/% $(SRC_PATH_BARE)/% ifeq ($(CONFIG_MSVS),yes) INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Release/%) INSTALL_MAPS += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/% $(p)/Debug/%) endif CODEC_SRCS-yes += build/make/version.sh CODEC_SRCS-yes += build/make/rtcd.pl CODEC_SRCS-yes += vpx_ports/emmintrin_compat.h CODEC_SRCS-yes += vpx_ports/mem_ops.h CODEC_SRCS-yes += vpx_ports/mem_ops_aligned.h CODEC_SRCS-yes += vpx_ports/vpx_once.h CODEC_SRCS-yes += $(BUILD_PFX)vpx_config.c INSTALL-SRCS-no += $(BUILD_PFX)vpx_config.c ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += third_party/x86inc/x86inc.asm INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += vpx_dsp/x86/bitdepth_conversion_sse2.asm endif CODEC_EXPORTS-yes += vpx/exports_com CODEC_EXPORTS-$(CONFIG_ENCODERS) += vpx/exports_enc CODEC_EXPORTS-$(CONFIG_DECODERS) += vpx/exports_dec INSTALL-LIBS-yes += include/vpx/vpx_codec.h INSTALL-LIBS-yes += include/vpx/vpx_frame_buffer.h INSTALL-LIBS-yes += include/vpx/vpx_image.h INSTALL-LIBS-yes += include/vpx/vpx_integer.h INSTALL-LIBS-$(CONFIG_DECODERS) += include/vpx/vpx_decoder.h INSTALL-LIBS-$(CONFIG_ENCODERS) += include/vpx/vpx_encoder.h ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) INSTALL-LIBS-yes += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB).lib) INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/$(CODEC_LIB)d.lib) INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.dll) INSTALL-LIBS-$(CONFIG_SHARED) += $(foreach p,$(VS_PLATFORMS),$(LIBSUBDIR)/$(p)/vpx.exp) endif else INSTALL-LIBS-$(CONFIG_STATIC) += $(LIBSUBDIR)/libvpx.a INSTALL-LIBS-$(CONFIG_DEBUG_LIBS) += $(LIBSUBDIR)/libvpx_g.a endif CODEC_SRCS=$(call enabled,CODEC_SRCS) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(CODEC_SRCS) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(call enabled,CODEC_EXPORTS) # Generate a list of all enabled sources, in particular for exporting to gyp # based build systems. libvpx_srcs.txt: @echo " [CREATE] $@" @echo $(CODEC_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@ CLEAN-OBJS += libvpx_srcs.txt # Assembly files that are included, but don't define symbols themselves. # Filtered out to avoid Windows build warnings. ASM_INCLUDES := \ third_party/x86inc/x86inc.asm \ vpx_config.asm \ vpx_ports/x86_abi_support.asm \ vpx_dsp/x86/bitdepth_conversion_sse2.asm \ ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) vpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)$(SRC_PATH_BARE)/build/make/gen_msvs_def.sh\ --name=vpx\ --out=$@ $^ CLEAN-OBJS += vpx.def vpx.$(VCPROJ_SFX): VCPROJ_SRCS=$(filter-out $(addprefix %, $(ASM_INCLUDES)), $^) vpx.$(VCPROJ_SFX): $(CODEC_SRCS) vpx.def @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ $(if $(CONFIG_SHARED),--dll,--lib) \ --target=$(TOOLCHAIN) \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --name=vpx \ --proj-guid=DCE19DAF-69AC-46DB-B14A-39F0FAA5DB74 \ --module-def=vpx.def \ --ver=$(CONFIG_VS_VERSION) \ --src-path-bare="$(SRC_PATH_BARE)" \ --out=$@ $(CFLAGS) \ $(filter $(SRC_PATH_BARE)/vp8/%.c, $(VCPROJ_SRCS)) \ $(filter $(SRC_PATH_BARE)/vp8/%.h, $(VCPROJ_SRCS)) \ $(filter $(SRC_PATH_BARE)/vp9/%.c, $(VCPROJ_SRCS)) \ $(filter $(SRC_PATH_BARE)/vp9/%.h, $(VCPROJ_SRCS)) \ $(filter $(SRC_PATH_BARE)/vpx/%, $(VCPROJ_SRCS)) \ $(filter $(SRC_PATH_BARE)/vpx_dsp/%, $(VCPROJ_SRCS)) \ $(filter-out $(addprefix $(SRC_PATH_BARE)/, \ vp8/%.c vp8/%.h vp9/%.c vp9/%.h vpx/% vpx_dsp/%), \ $(VCPROJ_SRCS)) \ --src-path-bare="$(SRC_PATH_BARE)" \ PROJECTS-yes += vpx.$(VCPROJ_SFX) vpx.$(VCPROJ_SFX): vpx_config.asm vpx.$(VCPROJ_SFX): $(RTCD) endif else LIBVPX_OBJS=$(call objs, $(filter-out $(ASM_INCLUDES), $(CODEC_SRCS))) OBJS-yes += $(LIBVPX_OBJS) LIBS-$(if yes,$(CONFIG_STATIC)) += $(BUILD_PFX)libvpx.a $(BUILD_PFX)libvpx_g.a $(BUILD_PFX)libvpx_g.a: $(LIBVPX_OBJS) SO_VERSION_MAJOR := 6 SO_VERSION_MINOR := 2 SO_VERSION_PATCH := 0 ifeq ($(filter darwin%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib SHARED_LIB_SUF := .dylib EXPORT_FILE := libvpx.syms LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \ libvpx.dylib ) else ifeq ($(filter iphonesimulator%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx.$(SO_VERSION_MAJOR).dylib SHARED_LIB_SUF := .dylib EXPORT_FILE := libvpx.syms LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, libvpx.dylib) else ifeq ($(filter os2%,$(TGT_OS)),$(TGT_OS)) LIBVPX_SO := libvpx$(SO_VERSION_MAJOR).dll SHARED_LIB_SUF := _dll.a EXPORT_FILE := libvpx.def LIBVPX_SO_SYMLINKS := LIBVPX_SO_IMPLIB := libvpx_dll.a else LIBVPX_SO := libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR).$(SO_VERSION_PATCH) SHARED_LIB_SUF := .so EXPORT_FILE := libvpx.ver LIBVPX_SO_SYMLINKS := $(addprefix $(LIBSUBDIR)/, \ libvpx.so libvpx.so.$(SO_VERSION_MAJOR) \ libvpx.so.$(SO_VERSION_MAJOR).$(SO_VERSION_MINOR)) endif endif endif LIBS-$(CONFIG_SHARED) += $(BUILD_PFX)$(LIBVPX_SO)\ $(notdir $(LIBVPX_SO_SYMLINKS)) \ $(if $(LIBVPX_SO_IMPLIB), $(BUILD_PFX)$(LIBVPX_SO_IMPLIB)) $(BUILD_PFX)$(LIBVPX_SO): $(LIBVPX_OBJS) $(EXPORT_FILE) $(BUILD_PFX)$(LIBVPX_SO): extralibs += -lm $(BUILD_PFX)$(LIBVPX_SO): SONAME = libvpx.so.$(SO_VERSION_MAJOR) $(BUILD_PFX)$(LIBVPX_SO): EXPORTS_FILE = $(EXPORT_FILE) libvpx.def: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)echo LIBRARY $(LIBVPX_SO:.dll=) INITINSTANCE TERMINSTANCE > $@ $(qexec)echo "DATA MULTIPLE NONSHARED" >> $@ $(qexec)echo "EXPORTS" >> $@ $(qexec)awk '!/vpx_svc_*/ {print "_"$$2}' $^ >>$@ CLEAN-OBJS += libvpx.def libvpx_dll.a: $(LIBVPX_SO) @echo " [IMPLIB] $@" $(qexec)emximp -o $@ $< CLEAN-OBJS += libvpx_dll.a define libvpx_symlink_template $(1): $(2) @echo " [LN] $(2) $$@" $(qexec)mkdir -p $$(dir $$@) $(qexec)ln -sf $(2) $$@ endef $(eval $(call libvpx_symlink_template,\ $(addprefix $(BUILD_PFX),$(notdir $(LIBVPX_SO_SYMLINKS))),\ $(BUILD_PFX)$(LIBVPX_SO))) $(eval $(call libvpx_symlink_template,\ $(addprefix $(DIST_DIR)/,$(LIBVPX_SO_SYMLINKS)),\ $(LIBVPX_SO))) INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBVPX_SO_SYMLINKS) INSTALL-LIBS-$(CONFIG_SHARED) += $(LIBSUBDIR)/$(LIBVPX_SO) INSTALL-LIBS-$(CONFIG_SHARED) += $(if $(LIBVPX_SO_IMPLIB),$(LIBSUBDIR)/$(LIBVPX_SO_IMPLIB)) LIBS-yes += vpx.pc vpx.pc: config.mk libs.mk @echo " [CREATE] $@" $(qexec)echo '# pkg-config file from libvpx $(VERSION_STRING)' > $@ $(qexec)echo 'prefix=$(PREFIX)' >> $@ $(qexec)echo 'exec_prefix=$${prefix}' >> $@ $(qexec)echo 'libdir=$${prefix}/$(LIBSUBDIR)' >> $@ $(qexec)echo 'includedir=$${prefix}/include' >> $@ $(qexec)echo '' >> $@ $(qexec)echo 'Name: vpx' >> $@ $(qexec)echo 'Description: WebM Project VPx codec implementation' >> $@ $(qexec)echo 'Version: $(VERSION_MAJOR).$(VERSION_MINOR).$(VERSION_PATCH)' >> $@ $(qexec)echo 'Requires:' >> $@ $(qexec)echo 'Conflicts:' >> $@ $(qexec)echo 'Libs: -L$${libdir} -lvpx -lm' >> $@ ifeq ($(HAVE_PTHREAD_H),yes) $(qexec)echo 'Libs.private: -lm -lpthread' >> $@ else $(qexec)echo 'Libs.private: -lm' >> $@ endif $(qexec)echo 'Cflags: -I$${includedir}' >> $@ INSTALL-LIBS-yes += $(LIBSUBDIR)/pkgconfig/vpx.pc INSTALL_MAPS += $(LIBSUBDIR)/pkgconfig/%.pc %.pc CLEAN-OBJS += vpx.pc endif libvpx.ver: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)echo "{ global:" > $@ $(qexec)for f in $?; do awk '{print $$2";"}' < $$f >>$@; done $(qexec)echo "local: *; };" >> $@ CLEAN-OBJS += libvpx.ver libvpx.syms: $(call enabled,CODEC_EXPORTS) @echo " [CREATE] $@" $(qexec)awk '{print "_"$$2}' $^ >$@ CLEAN-OBJS += libvpx.syms # # Rule to make assembler configuration file from C configuration file # ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) # YASM $(BUILD_PFX)vpx_config.asm: $(BUILD_PFX)vpx_config.h @echo " [CREATE] $@" @egrep "#define [A-Z0-9_]+ [01]" $< \ | awk '{print $$2 " equ " $$3}' > $@ else ADS2GAS=$(if $(filter yes,$(CONFIG_GCC)),| $(ASM_CONVERSION)) $(BUILD_PFX)vpx_config.asm: $(BUILD_PFX)vpx_config.h @echo " [CREATE] $@" @egrep "#define [A-Z0-9_]+ [01]" $< \ | awk '{print $$2 " EQU " $$3}' $(ADS2GAS) > $@ @echo " END" $(ADS2GAS) >> $@ CLEAN-OBJS += $(BUILD_PFX)vpx_config.asm endif # # Add assembler dependencies for configuration. # $(filter %.S.o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(filter %$(ASM).o,$(OBJS-yes)): $(BUILD_PFX)vpx_config.asm $(shell $(SRC_PATH_BARE)/build/make/version.sh "$(SRC_PATH_BARE)" $(BUILD_PFX)vpx_version.h) CLEAN-OBJS += $(BUILD_PFX)vpx_version.h # # Add include path for libwebm sources. # ifeq ($(CONFIG_WEBM_IO),yes) CXXFLAGS += -I$(SRC_PATH_BARE)/third_party/libwebm endif ## ## libvpx test directives ## ifeq ($(CONFIG_UNIT_TESTS),yes) LIBVPX_TEST_DATA_PATH ?= . include $(SRC_PATH_BARE)/test/test.mk LIBVPX_TEST_SRCS=$(addprefix test/,$(call enabled,LIBVPX_TEST_SRCS)) LIBVPX_TEST_BIN=./test_libvpx$(EXE_SFX) LIBVPX_TEST_DATA=$(addprefix $(LIBVPX_TEST_DATA_PATH)/,\ $(call enabled,LIBVPX_TEST_DATA)) libvpx_test_data_url=https://storage.googleapis.com/downloads.webmproject.org/test_data/libvpx/$(1) TEST_INTRA_PRED_SPEED_BIN=./test_intra_pred_speed$(EXE_SFX) TEST_INTRA_PRED_SPEED_SRCS=$(addprefix test/,$(call enabled,TEST_INTRA_PRED_SPEED_SRCS)) TEST_INTRA_PRED_SPEED_OBJS := $(sort $(call objs,$(TEST_INTRA_PRED_SPEED_SRCS))) libvpx_test_srcs.txt: @echo " [CREATE] $@" @echo $(LIBVPX_TEST_SRCS) | xargs -n1 echo | LC_ALL=C sort -u > $@ CLEAN-OBJS += libvpx_test_srcs.txt $(LIBVPX_TEST_DATA): $(SRC_PATH_BARE)/test/test-data.sha1 @echo " [DOWNLOAD] $@" # Attempt to download the file using curl, retrying once if it fails for a # partial file (18). $(qexec)( \ trap 'rm -f $@' INT TERM; \ curl="curl --retry 1 -L -o $@ $(call libvpx_test_data_url,$(@F))"; \ $$curl; \ case "$$?" in \ 18) $$curl -C -;; \ esac \ ) testdata:: $(LIBVPX_TEST_DATA) $(qexec)[ -x "$$(which sha1sum)" ] && sha1sum=sha1sum;\ [ -x "$$(which shasum)" ] && sha1sum=shasum;\ [ -x "$$(which sha1)" ] && sha1sum=sha1;\ if [ -n "$${sha1sum}" ]; then\ set -e;\ echo "Checking test data:";\ for f in $(call enabled,LIBVPX_TEST_DATA); do\ grep $$f $(SRC_PATH_BARE)/test/test-data.sha1 |\ (cd $(LIBVPX_TEST_DATA_PATH); $${sha1sum} -c);\ done; \ else\ echo "Skipping test data integrity check, sha1sum not found.";\ fi ifeq ($(CONFIG_EXTERNAL_BUILD),yes) ifeq ($(CONFIG_MSVS),yes) gtest.$(VCPROJ_SFX): $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ --lib \ --target=$(TOOLCHAIN) \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --name=gtest \ --proj-guid=EC00E1EC-AF68-4D92-A255-181690D1C9B1 \ --ver=$(CONFIG_VS_VERSION) \ --src-path-bare="$(SRC_PATH_BARE)" \ -D_VARIADIC_MAX=10 \ --out=gtest.$(VCPROJ_SFX) $(SRC_PATH_BARE)/third_party/googletest/src/src/gtest-all.cc \ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" -I"$(SRC_PATH_BARE)/third_party/googletest/src" PROJECTS-$(CONFIG_MSVS) += gtest.$(VCPROJ_SFX) test_libvpx.$(VCPROJ_SFX): $(LIBVPX_TEST_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX) @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ --exe \ --target=$(TOOLCHAIN) \ --name=test_libvpx \ -D_VARIADIC_MAX=10 \ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \ --ver=$(CONFIG_VS_VERSION) \ --src-path-bare="$(SRC_PATH_BARE)" \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ $(if $(CONFIG_WEBM_IO),-I"$(SRC_PATH_BARE)/third_party/libwebm") \ -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^ PROJECTS-$(CONFIG_MSVS) += test_libvpx.$(VCPROJ_SFX) LIBVPX_TEST_BIN := $(addprefix $(TGT_OS:win64=x64)/Release/,$(notdir $(LIBVPX_TEST_BIN))) ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),) PROJECTS-$(CONFIG_MSVS) += test_intra_pred_speed.$(VCPROJ_SFX) test_intra_pred_speed.$(VCPROJ_SFX): $(TEST_INTRA_PRED_SPEED_SRCS) vpx.$(VCPROJ_SFX) gtest.$(VCPROJ_SFX) @echo " [CREATE] $@" $(qexec)$(GEN_VCPROJ) \ --exe \ --target=$(TOOLCHAIN) \ --name=test_intra_pred_speed \ -D_VARIADIC_MAX=10 \ --proj-guid=CD837F5F-52D8-4314-A370-895D614166A7 \ --ver=$(CONFIG_VS_VERSION) \ --src-path-bare="$(SRC_PATH_BARE)" \ $(if $(CONFIG_STATIC_MSVCRT),--static-crt) \ --out=$@ $(INTERNAL_CFLAGS) $(CFLAGS) \ -I. -I"$(SRC_PATH_BARE)/third_party/googletest/src/include" \ -L. -l$(CODEC_LIB) -l$(GTEST_LIB) $^ endif # TEST_INTRA_PRED_SPEED endif else include $(SRC_PATH_BARE)/third_party/googletest/gtest.mk GTEST_SRCS := $(addprefix third_party/googletest/src/,$(call enabled,GTEST_SRCS)) GTEST_OBJS=$(call objs,$(GTEST_SRCS)) ifeq ($(filter win%,$(TGT_OS)),$(TGT_OS)) # Disabling pthreads globally will cause issues on darwin and possibly elsewhere $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += -DGTEST_HAS_PTHREAD=0 endif GTEST_INCLUDES := -I$(SRC_PATH_BARE)/third_party/googletest/src GTEST_INCLUDES += -I$(SRC_PATH_BARE)/third_party/googletest/src/include $(GTEST_OBJS) $(GTEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES) OBJS-yes += $(GTEST_OBJS) LIBS-yes += $(BUILD_PFX)libgtest.a $(BUILD_PFX)libgtest_g.a $(BUILD_PFX)libgtest_g.a: $(GTEST_OBJS) LIBVPX_TEST_OBJS=$(sort $(call objs,$(LIBVPX_TEST_SRCS))) $(LIBVPX_TEST_OBJS) $(LIBVPX_TEST_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES) OBJS-yes += $(LIBVPX_TEST_OBJS) BINS-yes += $(LIBVPX_TEST_BIN) CODEC_LIB=$(if $(CONFIG_DEBUG_LIBS),vpx_g,vpx) CODEC_LIB_SUF=$(if $(CONFIG_SHARED),$(SHARED_LIB_SUF),.a) TEST_LIBS := lib$(CODEC_LIB)$(CODEC_LIB_SUF) libgtest.a $(LIBVPX_TEST_BIN): $(TEST_LIBS) $(eval $(call linkerxx_template,$(LIBVPX_TEST_BIN), \ $(LIBVPX_TEST_OBJS) \ -L. -lvpx -lgtest $(extralibs) -lm)) ifneq ($(strip $(TEST_INTRA_PRED_SPEED_OBJS)),) $(TEST_INTRA_PRED_SPEED_OBJS) $(TEST_INTRA_PRED_SPEED_OBJS:.o=.d): CXXFLAGS += $(GTEST_INCLUDES) OBJS-yes += $(TEST_INTRA_PRED_SPEED_OBJS) BINS-yes += $(TEST_INTRA_PRED_SPEED_BIN) $(TEST_INTRA_PRED_SPEED_BIN): $(TEST_LIBS) $(eval $(call linkerxx_template,$(TEST_INTRA_PRED_SPEED_BIN), \ $(TEST_INTRA_PRED_SPEED_OBJS) \ -L. -lvpx -lgtest $(extralibs) -lm)) endif # TEST_INTRA_PRED_SPEED endif # CONFIG_UNIT_TESTS # Install test sources only if codec source is included INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(patsubst $(SRC_PATH_BARE)/%,%,\ $(shell find $(SRC_PATH_BARE)/third_party/googletest -type f)) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(LIBVPX_TEST_SRCS) INSTALL-SRCS-$(CONFIG_CODEC_SRCS) += $(TEST_INTRA_PRED_SPEED_SRCS) define test_shard_template test:: test_shard.$(1) test-no-data-check:: test_shard_ndc.$(1) test_shard.$(1) test_shard_ndc.$(1): $(LIBVPX_TEST_BIN) @set -e; \ export GTEST_SHARD_INDEX=$(1); \ export GTEST_TOTAL_SHARDS=$(2); \ $(LIBVPX_TEST_BIN) test_shard.$(1): testdata .PHONY: test_shard.$(1) endef NUM_SHARDS := 10 SHARDS := 0 1 2 3 4 5 6 7 8 9 $(foreach s,$(SHARDS),$(eval $(call test_shard_template,$(s),$(NUM_SHARDS)))) endif ## ## documentation directives ## CLEAN-OBJS += libs.doxy DOCS-yes += libs.doxy libs.doxy: $(CODEC_DOC_SRCS) @echo " [CREATE] $@" @rm -f $@ @echo "INPUT += $^" >> $@ @echo "INCLUDE_PATH += ." >> $@; @echo "ENABLED_SECTIONS += $(sort $(CODEC_DOC_SECTIONS))" >> $@ ## Generate rtcd.h for all objects ifeq ($(CONFIG_DEPENDENCY_TRACKING),yes) $(OBJS-yes:.o=.d): $(RTCD) else $(OBJS-yes): $(RTCD) endif ## Update the global src list SRCS += $(CODEC_SRCS) $(LIBVPX_TEST_SRCS) $(GTEST_SRCS) ## ## vpxdec/vpxenc tests. ## ifeq ($(CONFIG_UNIT_TESTS),yes) TEST_BIN_PATH = . ifeq ($(CONFIG_MSVS),yes) # MSVC will build both Debug and Release configurations of tools in a # sub directory named for the current target. Assume the user wants to # run the Release tools, and assign TEST_BIN_PATH accordingly. # TODO(tomfinegan): Is this adequate for ARM? # TODO(tomfinegan): Support running the debug versions of tools? TEST_BIN_PATH := $(addsuffix /$(TGT_OS:win64=x64)/Release, $(TEST_BIN_PATH)) endif utiltest utiltest-no-data-check: $(qexec)$(SRC_PATH_BARE)/test/vpxdec.sh \ --test-data-path $(LIBVPX_TEST_DATA_PATH) \ --bin-path $(TEST_BIN_PATH) $(qexec)$(SRC_PATH_BARE)/test/vpxenc.sh \ --test-data-path $(LIBVPX_TEST_DATA_PATH) \ --bin-path $(TEST_BIN_PATH) utiltest: testdata else utiltest utiltest-no-data-check: @echo Unit tests must be enabled to make the utiltest target. endif ## ## Example tests. ## ifeq ($(CONFIG_UNIT_TESTS),yes) # All non-MSVC targets output example targets in a sub dir named examples. EXAMPLES_BIN_PATH = examples ifeq ($(CONFIG_MSVS),yes) # MSVC will build both Debug and Release configurations of the examples in a # sub directory named for the current target. Assume the user wants to # run the Release tools, and assign EXAMPLES_BIN_PATH accordingly. # TODO(tomfinegan): Is this adequate for ARM? # TODO(tomfinegan): Support running the debug versions of tools? EXAMPLES_BIN_PATH := $(TGT_OS:win64=x64)/Release endif exampletest exampletest-no-data-check: examples $(qexec)$(SRC_PATH_BARE)/test/examples.sh \ --test-data-path $(LIBVPX_TEST_DATA_PATH) \ --bin-path $(EXAMPLES_BIN_PATH) exampletest: testdata else exampletest exampletest-no-data-check: @echo Unit tests must be enabled to make the exampletest target. endif libvpx-1.8.2/mainpage.dox000066400000000000000000000030151357355204000153260ustar00rootroot00000000000000/*!\mainpage WebM Codec SDK \section main_contents Page Contents - \ref main_intro - \ref main_startpoints - \ref main_support \section main_intro Introduction Welcome to the WebM Codec SDK. This SDK allows you to integrate your applications with the VP8 and VP9 video codecs, high quality, royalty free, open source codecs deployed on billions of computers and devices worldwide. This distribution of the WebM Codec SDK includes the following support: \if vp8_encoder - \ref vp8_encoder \endif \if vp8_decoder - \ref vp8_decoder \endif \section main_startpoints Starting Points - Consult the \ref changelog for a complete list of improvements in this release. - The \ref readme contains instructions on recompiling the sample applications. - Read the \ref usage "usage" for a narrative on codec usage. \if samples - Read the \ref samples "sample code" for examples of how to interact with the codec. \endif - \ref codec reference \if encoder - \ref encoder reference \endif \if decoder - \ref decoder reference \endif \section main_support Support Options & FAQ The WebM project is an open source project supported by its community. For questions about this SDK, please mail the apps-devel@webmproject.org list. To contribute, see http://www.webmproject.org/code/contribute and mail codec-devel@webmproject.org. */ /*!\page changelog CHANGELOG \verbinclude CHANGELOG */ /*!\page readme README \verbinclude README */ /*!\defgroup codecs Supported Codecs */ libvpx-1.8.2/md5_utils.c000066400000000000000000000174701357355204000151140ustar00rootroot00000000000000/* * This code implements the MD5 message-digest algorithm. * The algorithm is due to Ron Rivest. This code was * written by Colin Plumb in 1993, no copyright is claimed. * This code is in the public domain; do with it what you wish. * * Equivalent code is available from RSA Data Security, Inc. * This code has been tested against that, and is equivalent, * except that you don't need to include two pages of legalese * with every copy. * * To compute the message digest of a chunk of bytes, declare an * MD5Context structure, pass it to MD5Init, call MD5Update as * needed on buffers full of bytes, and then call MD5Final, which * will fill a supplied 16-byte array with the digest. * * Changed so as no longer to depend on Colin Plumb's `usual.h' header * definitions * - Ian Jackson . * Still in the public domain. */ #include /* for memcpy() */ #include "md5_utils.h" static void byteSwap(UWORD32 *buf, unsigned words) { md5byte *p; /* Only swap bytes for big endian machines */ int i = 1; if (*(char *)&i == 1) return; p = (md5byte *)buf; do { *buf++ = (UWORD32)((unsigned)p[3] << 8 | p[2]) << 16 | ((unsigned)p[1] << 8 | p[0]); p += 4; } while (--words); } /* * Start MD5 accumulation. Set bit count to 0 and buffer to mysterious * initialization constants. */ void MD5Init(struct MD5Context *ctx) { ctx->buf[0] = 0x67452301; ctx->buf[1] = 0xefcdab89; ctx->buf[2] = 0x98badcfe; ctx->buf[3] = 0x10325476; ctx->bytes[0] = 0; ctx->bytes[1] = 0; } /* * Update context to reflect the concatenation of another buffer full * of bytes. */ void MD5Update(struct MD5Context *ctx, md5byte const *buf, unsigned len) { UWORD32 t; /* Update byte count */ t = ctx->bytes[0]; if ((ctx->bytes[0] = t + len) < t) ctx->bytes[1]++; /* Carry from low to high */ t = 64 - (t & 0x3f); /* Space available in ctx->in (at least 1) */ if (t > len) { memcpy((md5byte *)ctx->in + 64 - t, buf, len); return; } /* First chunk is an odd size */ memcpy((md5byte *)ctx->in + 64 - t, buf, t); byteSwap(ctx->in, 16); MD5Transform(ctx->buf, ctx->in); buf += t; len -= t; /* Process data in 64-byte chunks */ while (len >= 64) { memcpy(ctx->in, buf, 64); byteSwap(ctx->in, 16); MD5Transform(ctx->buf, ctx->in); buf += 64; len -= 64; } /* Handle any remaining bytes of data. */ memcpy(ctx->in, buf, len); } /* * Final wrapup - pad to 64-byte boundary with the bit pattern * 1 0* (64-bit count of bits processed, MSB-first) */ void MD5Final(md5byte digest[16], struct MD5Context *ctx) { int count = ctx->bytes[0] & 0x3f; /* Number of bytes in ctx->in */ md5byte *p = (md5byte *)ctx->in + count; /* Set the first char of padding to 0x80. There is always room. */ *p++ = 0x80; /* Bytes of padding needed to make 56 bytes (-8..55) */ count = 56 - 1 - count; if (count < 0) { /* Padding forces an extra block */ memset(p, 0, count + 8); byteSwap(ctx->in, 16); MD5Transform(ctx->buf, ctx->in); p = (md5byte *)ctx->in; count = 56; } memset(p, 0, count); byteSwap(ctx->in, 14); /* Append length in bits and transform */ ctx->in[14] = ctx->bytes[0] << 3; ctx->in[15] = ctx->bytes[1] << 3 | ctx->bytes[0] >> 29; MD5Transform(ctx->buf, ctx->in); byteSwap(ctx->buf, 4); memcpy(digest, ctx->buf, 16); memset(ctx, 0, sizeof(*ctx)); /* In case it's sensitive */ } #ifndef ASM_MD5 /* The four core functions - F1 is optimized somewhat */ /* #define F1(x, y, z) (x & y | ~x & z) */ #define F1(x, y, z) (z ^ (x & (y ^ z))) #define F2(x, y, z) F1(z, x, y) #define F3(x, y, z) (x ^ y ^ z) #define F4(x, y, z) (y ^ (x | ~z)) /* This is the central step in the MD5 algorithm. */ #define MD5STEP(f, w, x, y, z, in, s) \ (w += f(x, y, z) + in, w = (w << s | w >> (32 - s)) + x) #if defined(__clang__) && defined(__has_attribute) #if __has_attribute(no_sanitize) #define VPX_NO_UNSIGNED_OVERFLOW_CHECK \ __attribute__((no_sanitize("unsigned-integer-overflow"))) #endif #endif #ifndef VPX_NO_UNSIGNED_OVERFLOW_CHECK #define VPX_NO_UNSIGNED_OVERFLOW_CHECK #endif /* * The core of the MD5 algorithm, this alters an existing MD5 hash to * reflect the addition of 16 longwords of new data. MD5Update blocks * the data and converts bytes into longwords for this routine. */ VPX_NO_UNSIGNED_OVERFLOW_CHECK void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]) { UWORD32 a, b, c, d; a = buf[0]; b = buf[1]; c = buf[2]; d = buf[3]; MD5STEP(F1, a, b, c, d, in[0] + 0xd76aa478, 7); MD5STEP(F1, d, a, b, c, in[1] + 0xe8c7b756, 12); MD5STEP(F1, c, d, a, b, in[2] + 0x242070db, 17); MD5STEP(F1, b, c, d, a, in[3] + 0xc1bdceee, 22); MD5STEP(F1, a, b, c, d, in[4] + 0xf57c0faf, 7); MD5STEP(F1, d, a, b, c, in[5] + 0x4787c62a, 12); MD5STEP(F1, c, d, a, b, in[6] + 0xa8304613, 17); MD5STEP(F1, b, c, d, a, in[7] + 0xfd469501, 22); MD5STEP(F1, a, b, c, d, in[8] + 0x698098d8, 7); MD5STEP(F1, d, a, b, c, in[9] + 0x8b44f7af, 12); MD5STEP(F1, c, d, a, b, in[10] + 0xffff5bb1, 17); MD5STEP(F1, b, c, d, a, in[11] + 0x895cd7be, 22); MD5STEP(F1, a, b, c, d, in[12] + 0x6b901122, 7); MD5STEP(F1, d, a, b, c, in[13] + 0xfd987193, 12); MD5STEP(F1, c, d, a, b, in[14] + 0xa679438e, 17); MD5STEP(F1, b, c, d, a, in[15] + 0x49b40821, 22); MD5STEP(F2, a, b, c, d, in[1] + 0xf61e2562, 5); MD5STEP(F2, d, a, b, c, in[6] + 0xc040b340, 9); MD5STEP(F2, c, d, a, b, in[11] + 0x265e5a51, 14); MD5STEP(F2, b, c, d, a, in[0] + 0xe9b6c7aa, 20); MD5STEP(F2, a, b, c, d, in[5] + 0xd62f105d, 5); MD5STEP(F2, d, a, b, c, in[10] + 0x02441453, 9); MD5STEP(F2, c, d, a, b, in[15] + 0xd8a1e681, 14); MD5STEP(F2, b, c, d, a, in[4] + 0xe7d3fbc8, 20); MD5STEP(F2, a, b, c, d, in[9] + 0x21e1cde6, 5); MD5STEP(F2, d, a, b, c, in[14] + 0xc33707d6, 9); MD5STEP(F2, c, d, a, b, in[3] + 0xf4d50d87, 14); MD5STEP(F2, b, c, d, a, in[8] + 0x455a14ed, 20); MD5STEP(F2, a, b, c, d, in[13] + 0xa9e3e905, 5); MD5STEP(F2, d, a, b, c, in[2] + 0xfcefa3f8, 9); MD5STEP(F2, c, d, a, b, in[7] + 0x676f02d9, 14); MD5STEP(F2, b, c, d, a, in[12] + 0x8d2a4c8a, 20); MD5STEP(F3, a, b, c, d, in[5] + 0xfffa3942, 4); MD5STEP(F3, d, a, b, c, in[8] + 0x8771f681, 11); MD5STEP(F3, c, d, a, b, in[11] + 0x6d9d6122, 16); MD5STEP(F3, b, c, d, a, in[14] + 0xfde5380c, 23); MD5STEP(F3, a, b, c, d, in[1] + 0xa4beea44, 4); MD5STEP(F3, d, a, b, c, in[4] + 0x4bdecfa9, 11); MD5STEP(F3, c, d, a, b, in[7] + 0xf6bb4b60, 16); MD5STEP(F3, b, c, d, a, in[10] + 0xbebfbc70, 23); MD5STEP(F3, a, b, c, d, in[13] + 0x289b7ec6, 4); MD5STEP(F3, d, a, b, c, in[0] + 0xeaa127fa, 11); MD5STEP(F3, c, d, a, b, in[3] + 0xd4ef3085, 16); MD5STEP(F3, b, c, d, a, in[6] + 0x04881d05, 23); MD5STEP(F3, a, b, c, d, in[9] + 0xd9d4d039, 4); MD5STEP(F3, d, a, b, c, in[12] + 0xe6db99e5, 11); MD5STEP(F3, c, d, a, b, in[15] + 0x1fa27cf8, 16); MD5STEP(F3, b, c, d, a, in[2] + 0xc4ac5665, 23); MD5STEP(F4, a, b, c, d, in[0] + 0xf4292244, 6); MD5STEP(F4, d, a, b, c, in[7] + 0x432aff97, 10); MD5STEP(F4, c, d, a, b, in[14] + 0xab9423a7, 15); MD5STEP(F4, b, c, d, a, in[5] + 0xfc93a039, 21); MD5STEP(F4, a, b, c, d, in[12] + 0x655b59c3, 6); MD5STEP(F4, d, a, b, c, in[3] + 0x8f0ccc92, 10); MD5STEP(F4, c, d, a, b, in[10] + 0xffeff47d, 15); MD5STEP(F4, b, c, d, a, in[1] + 0x85845dd1, 21); MD5STEP(F4, a, b, c, d, in[8] + 0x6fa87e4f, 6); MD5STEP(F4, d, a, b, c, in[15] + 0xfe2ce6e0, 10); MD5STEP(F4, c, d, a, b, in[6] + 0xa3014314, 15); MD5STEP(F4, b, c, d, a, in[13] + 0x4e0811a1, 21); MD5STEP(F4, a, b, c, d, in[4] + 0xf7537e82, 6); MD5STEP(F4, d, a, b, c, in[11] + 0xbd3af235, 10); MD5STEP(F4, c, d, a, b, in[2] + 0x2ad7d2bb, 15); MD5STEP(F4, b, c, d, a, in[9] + 0xeb86d391, 21); buf[0] += a; buf[1] += b; buf[2] += c; buf[3] += d; } #undef VPX_NO_UNSIGNED_OVERFLOW_CHECK #endif libvpx-1.8.2/md5_utils.h000066400000000000000000000026741357355204000151210ustar00rootroot00000000000000/* * This is the header file for the MD5 message-digest algorithm. * The algorithm is due to Ron Rivest. This code was * written by Colin Plumb in 1993, no copyright is claimed. * This code is in the public domain; do with it what you wish. * * Equivalent code is available from RSA Data Security, Inc. * This code has been tested against that, and is equivalent, * except that you don't need to include two pages of legalese * with every copy. * * To compute the message digest of a chunk of bytes, declare an * MD5Context structure, pass it to MD5Init, call MD5Update as * needed on buffers full of bytes, and then call MD5Final, which * will fill a supplied 16-byte array with the digest. * * Changed so as no longer to depend on Colin Plumb's `usual.h' * header definitions * - Ian Jackson . * Still in the public domain. */ #ifndef VPX_MD5_UTILS_H_ #define VPX_MD5_UTILS_H_ #ifdef __cplusplus extern "C" { #endif #define md5byte unsigned char #define UWORD32 unsigned int typedef struct MD5Context MD5Context; struct MD5Context { UWORD32 buf[4]; UWORD32 bytes[2]; UWORD32 in[16]; }; void MD5Init(struct MD5Context *context); void MD5Update(struct MD5Context *context, md5byte const *buf, unsigned len); void MD5Final(unsigned char digest[16], struct MD5Context *context); void MD5Transform(UWORD32 buf[4], UWORD32 const in[16]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_MD5_UTILS_H_ libvpx-1.8.2/rate_hist.c000066400000000000000000000172721357355204000151710ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "./rate_hist.h" #define RATE_BINS 100 #define HIST_BAR_MAX 40 struct hist_bucket { int low; int high; int count; }; struct rate_hist { int64_t *pts; int *sz; int samples; int frames; struct hist_bucket bucket[RATE_BINS]; int total; }; struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg, const vpx_rational_t *fps) { int i; struct rate_hist *hist = calloc(1, sizeof(*hist)); if (hist == NULL || cfg == NULL || fps == NULL || fps->num == 0 || fps->den == 0) { destroy_rate_histogram(hist); return NULL; } // Determine the number of samples in the buffer. Use the file's framerate // to determine the number of frames in rc_buf_sz milliseconds, with an // adjustment (5/4) to account for alt-refs hist->samples = cfg->rc_buf_sz * 5 / 4 * fps->num / fps->den / 1000; // prevent division by zero if (hist->samples == 0) hist->samples = 1; hist->frames = 0; hist->total = 0; hist->pts = calloc(hist->samples, sizeof(*hist->pts)); hist->sz = calloc(hist->samples, sizeof(*hist->sz)); for (i = 0; i < RATE_BINS; i++) { hist->bucket[i].low = INT_MAX; hist->bucket[i].high = 0; hist->bucket[i].count = 0; } return hist; } void destroy_rate_histogram(struct rate_hist *hist) { if (hist) { free(hist->pts); free(hist->sz); free(hist); } } void update_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt) { int i; int64_t then = 0; int64_t avg_bitrate = 0; int64_t sum_sz = 0; const int64_t now = pkt->data.frame.pts * 1000 * (uint64_t)cfg->g_timebase.num / (uint64_t)cfg->g_timebase.den; int idx; if (hist == NULL || cfg == NULL || pkt == NULL) return; idx = hist->frames++ % hist->samples; hist->pts[idx] = now; hist->sz[idx] = (int)pkt->data.frame.sz; if (now < cfg->rc_buf_initial_sz) return; if (!cfg->rc_target_bitrate) return; then = now; /* Sum the size over the past rc_buf_sz ms */ for (i = hist->frames; i > 0 && hist->frames - i < hist->samples; i--) { const int i_idx = (i - 1) % hist->samples; then = hist->pts[i_idx]; if (now - then > cfg->rc_buf_sz) break; sum_sz += hist->sz[i_idx]; } if (now == then) return; avg_bitrate = sum_sz * 8 * 1000 / (now - then); idx = (int)(avg_bitrate * (RATE_BINS / 2) / (cfg->rc_target_bitrate * 1000)); if (idx < 0) idx = 0; if (idx > RATE_BINS - 1) idx = RATE_BINS - 1; if (hist->bucket[idx].low > avg_bitrate) hist->bucket[idx].low = (int)avg_bitrate; if (hist->bucket[idx].high < avg_bitrate) hist->bucket[idx].high = (int)avg_bitrate; hist->bucket[idx].count++; hist->total++; } static int merge_hist_buckets(struct hist_bucket *bucket, int max_buckets, int *num_buckets) { int small_bucket = 0, merge_bucket = INT_MAX, big_bucket = 0; int buckets; int i; assert(bucket != NULL); assert(num_buckets != NULL); buckets = *num_buckets; /* Find the extrema for this list of buckets */ big_bucket = small_bucket = 0; for (i = 0; i < buckets; i++) { if (bucket[i].count < bucket[small_bucket].count) small_bucket = i; if (bucket[i].count > bucket[big_bucket].count) big_bucket = i; } /* If we have too many buckets, merge the smallest with an adjacent * bucket. */ while (buckets > max_buckets) { int last_bucket = buckets - 1; /* merge the small bucket with an adjacent one. */ if (small_bucket == 0) merge_bucket = 1; else if (small_bucket == last_bucket) merge_bucket = last_bucket - 1; else if (bucket[small_bucket - 1].count < bucket[small_bucket + 1].count) merge_bucket = small_bucket - 1; else merge_bucket = small_bucket + 1; assert(abs(merge_bucket - small_bucket) <= 1); assert(small_bucket < buckets); assert(big_bucket < buckets); assert(merge_bucket < buckets); if (merge_bucket < small_bucket) { bucket[merge_bucket].high = bucket[small_bucket].high; bucket[merge_bucket].count += bucket[small_bucket].count; } else { bucket[small_bucket].high = bucket[merge_bucket].high; bucket[small_bucket].count += bucket[merge_bucket].count; merge_bucket = small_bucket; } assert(bucket[merge_bucket].low != bucket[merge_bucket].high); buckets--; /* Remove the merge_bucket from the list, and find the new small * and big buckets while we're at it */ big_bucket = small_bucket = 0; for (i = 0; i < buckets; i++) { if (i > merge_bucket) bucket[i] = bucket[i + 1]; if (bucket[i].count < bucket[small_bucket].count) small_bucket = i; if (bucket[i].count > bucket[big_bucket].count) big_bucket = i; } } *num_buckets = buckets; return bucket[big_bucket].count; } static void show_histogram(const struct hist_bucket *bucket, int buckets, int total, int scale) { const char *pat1, *pat2; int i; assert(bucket != NULL); switch ((int)(log(bucket[buckets - 1].high) / log(10)) + 1) { case 1: case 2: pat1 = "%4d %2s: "; pat2 = "%4d-%2d: "; break; case 3: pat1 = "%5d %3s: "; pat2 = "%5d-%3d: "; break; case 4: pat1 = "%6d %4s: "; pat2 = "%6d-%4d: "; break; case 5: pat1 = "%7d %5s: "; pat2 = "%7d-%5d: "; break; case 6: pat1 = "%8d %6s: "; pat2 = "%8d-%6d: "; break; case 7: pat1 = "%9d %7s: "; pat2 = "%9d-%7d: "; break; default: pat1 = "%12d %10s: "; pat2 = "%12d-%10d: "; break; } for (i = 0; i < buckets; i++) { int len; int j; float pct; pct = (float)(100.0 * bucket[i].count / total); len = HIST_BAR_MAX * bucket[i].count / scale; if (len < 1) len = 1; assert(len <= HIST_BAR_MAX); if (bucket[i].low == bucket[i].high) fprintf(stderr, pat1, bucket[i].low, ""); else fprintf(stderr, pat2, bucket[i].low, bucket[i].high); for (j = 0; j < HIST_BAR_MAX; j++) fprintf(stderr, j < len ? "=" : " "); fprintf(stderr, "\t%5d (%6.2f%%)\n", bucket[i].count, pct); } } void show_q_histogram(const int counts[64], int max_buckets) { struct hist_bucket bucket[64]; int buckets = 0; int total = 0; int scale; int i; for (i = 0; i < 64; i++) { if (counts[i]) { bucket[buckets].low = bucket[buckets].high = i; bucket[buckets].count = counts[i]; buckets++; total += counts[i]; } } fprintf(stderr, "\nQuantizer Selection:\n"); scale = merge_hist_buckets(bucket, max_buckets, &buckets); show_histogram(bucket, buckets, total, scale); } void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg, int max_buckets) { int i, scale; int buckets = 0; if (hist == NULL || cfg == NULL) return; for (i = 0; i < RATE_BINS; i++) { if (hist->bucket[i].low == INT_MAX) continue; hist->bucket[buckets++] = hist->bucket[i]; } fprintf(stderr, "\nRate (over %dms window):\n", cfg->rc_buf_sz); scale = merge_hist_buckets(hist->bucket, max_buckets, &buckets); show_histogram(hist->bucket, buckets, hist->total, scale); } libvpx-1.8.2/rate_hist.h000066400000000000000000000022251357355204000151660ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_RATE_HIST_H_ #define VPX_RATE_HIST_H_ #include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { #endif struct rate_hist; struct rate_hist *init_rate_histogram(const vpx_codec_enc_cfg_t *cfg, const vpx_rational_t *fps); void destroy_rate_histogram(struct rate_hist *hist); void update_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt); void show_q_histogram(const int counts[64], int max_buckets); void show_rate_histogram(struct rate_hist *hist, const vpx_codec_enc_cfg_t *cfg, int max_buckets); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_RATE_HIST_H_ libvpx-1.8.2/solution.mk000066400000000000000000000021761357355204000152450ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # libvpx reverse dependencies (targets that depend on libvpx) VPX_NONDEPS=$(addsuffix .$(VCPROJ_SFX),vpx gtest) VPX_RDEPS=$(foreach vcp,\ $(filter-out $(VPX_NONDEPS),$^), --dep=$(vcp:.$(VCPROJ_SFX)=):vpx) vpx.sln: $(wildcard *.$(VCPROJ_SFX)) @echo " [CREATE] $@" $(SRC_PATH_BARE)/build/make/gen_msvs_sln.sh \ $(if $(filter vpx.$(VCPROJ_SFX),$^),$(VPX_RDEPS)) \ --dep=test_libvpx:gtest \ --ver=$(CONFIG_VS_VERSION)\ --out=$@ $^ vpx.sln.mk: vpx.sln @true PROJECTS-yes += vpx.sln vpx.sln.mk -include vpx.sln.mk # Always install this file, as it is an unconditional post-build rule. INSTALL_MAPS += src/% $(SRC_PATH_BARE)/% INSTALL-SRCS-yes += $(target).mk libvpx-1.8.2/test/000077500000000000000000000000001357355204000140115ustar00rootroot00000000000000libvpx-1.8.2/test/acm_random.h000066400000000000000000000052101357355204000162600ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_ACM_RANDOM_H_ #define VPX_TEST_ACM_RANDOM_H_ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_integer.h" namespace libvpx_test { class ACMRandom { public: ACMRandom() : random_(DeterministicSeed()) {} explicit ACMRandom(int seed) : random_(seed) {} void Reset(int seed) { random_.Reseed(seed); } uint16_t Rand16(void) { const uint32_t value = random_.Generate(testing::internal::Random::kMaxRange); return (value >> 15) & 0xffff; } int32_t Rand20Signed(void) { // Use 20 bits: values between 524287 and -524288. const uint32_t value = random_.Generate(1048576); return static_cast(value) - 524288; } int16_t Rand16Signed(void) { // Use 16 bits: values between 32767 and -32768. return static_cast(random_.Generate(65536)); } int16_t Rand13Signed(void) { // Use 13 bits: values between 4095 and -4096. const uint32_t value = random_.Generate(8192); return static_cast(value) - 4096; } int16_t Rand9Signed(void) { // Use 9 bits: values between 255 (0x0FF) and -256 (0x100). const uint32_t value = random_.Generate(512); return static_cast(value) - 256; } uint8_t Rand8(void) { const uint32_t value = random_.Generate(testing::internal::Random::kMaxRange); // There's a bit more entropy in the upper bits of this implementation. return (value >> 23) & 0xff; } uint8_t Rand8Extremes(void) { // Returns a random value near 0 or near 255, to better exercise // saturation behavior. const uint8_t r = Rand8(); return static_cast((r < 128) ? r << 4 : r >> 4); } uint32_t RandRange(const uint32_t range) { // testing::internal::Random::Generate provides values in the range // testing::internal::Random::kMaxRange. assert(range <= testing::internal::Random::kMaxRange); return random_.Generate(range); } int PseudoUniform(int range) { return random_.Generate(range); } int operator()(int n) { return PseudoUniform(n); } static int DeterministicSeed(void) { return 0xbaba; } private: testing::internal::Random random_; }; } // namespace libvpx_test #endif // VPX_TEST_ACM_RANDOM_H_ libvpx-1.8.2/test/active_map_refresh_test.cc000066400000000000000000000102761357355204000212130ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/y4m_video_source.h" namespace { // Check if any pixel in a 16x16 macroblock varies between frames. int CheckMb(const vpx_image_t ¤t, const vpx_image_t &previous, int mb_r, int mb_c) { for (int plane = 0; plane < 3; plane++) { int r = 16 * mb_r; int c0 = 16 * mb_c; int r_top = std::min(r + 16, static_cast(current.d_h)); int c_top = std::min(c0 + 16, static_cast(current.d_w)); r = std::max(r, 0); c0 = std::max(c0, 0); if (plane > 0 && current.x_chroma_shift) { c_top = (c_top + 1) >> 1; c0 >>= 1; } if (plane > 0 && current.y_chroma_shift) { r_top = (r_top + 1) >> 1; r >>= 1; } for (; r < r_top; ++r) { for (int c = c0; c < c_top; ++c) { if (current.planes[plane][current.stride[plane] * r + c] != previous.planes[plane][previous.stride[plane] * r + c]) { return 1; } } } } return 0; } void GenerateMap(int mb_rows, int mb_cols, const vpx_image_t ¤t, const vpx_image_t &previous, uint8_t *map) { for (int mb_r = 0; mb_r < mb_rows; ++mb_r) { for (int mb_c = 0; mb_c < mb_cols; ++mb_c) { map[mb_r * mb_cols + mb_c] = CheckMb(current, previous, mb_r, mb_c); } } } const int kAqModeCyclicRefresh = 3; class ActiveMapRefreshTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: ActiveMapRefreshTest() : EncoderTest(GET_PARAM(0)) {} virtual ~ActiveMapRefreshTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); cpu_used_ = GET_PARAM(2); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { ::libvpx_test::Y4mVideoSource *y4m_video = static_cast(video); if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP9E_SET_AQ_MODE, kAqModeCyclicRefresh); } else if (video->frame() >= 2 && video->img()) { vpx_image_t *current = video->img(); vpx_image_t *previous = y4m_holder_->img(); ASSERT_TRUE(previous != NULL); vpx_active_map_t map = vpx_active_map_t(); const int width = static_cast(current->d_w); const int height = static_cast(current->d_h); const int mb_width = (width + 15) / 16; const int mb_height = (height + 15) / 16; uint8_t *active_map = new uint8_t[mb_width * mb_height]; GenerateMap(mb_height, mb_width, *current, *previous, active_map); map.cols = mb_width; map.rows = mb_height; map.active_map = active_map; encoder->Control(VP8E_SET_ACTIVEMAP, &map); delete[] active_map; } if (video->img()) { y4m_video->SwapBuffers(y4m_holder_); } } int cpu_used_; ::libvpx_test::Y4mVideoSource *y4m_holder_; }; TEST_P(ActiveMapRefreshTest, Test) { cfg_.g_lag_in_frames = 0; cfg_.g_profile = 1; cfg_.rc_target_bitrate = 600; cfg_.rc_resize_allowed = 0; cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 30; cfg_.g_pass = VPX_RC_ONE_PASS; cfg_.rc_end_usage = VPX_CBR; cfg_.kf_max_dist = 90000; ::libvpx_test::Y4mVideoSource video("desktop_credits.y4m", 0, 30); ::libvpx_test::Y4mVideoSource video_holder("desktop_credits.y4m", 0, 30); video_holder.Begin(); y4m_holder_ = &video_holder; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(ActiveMapRefreshTest, ::testing::Values(::libvpx_test::kRealTime), ::testing::Range(5, 6)); } // namespace libvpx-1.8.2/test/active_map_test.cc000066400000000000000000000057161357355204000175000ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class ActiveMapTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: static const int kWidth = 208; static const int kHeight = 144; ActiveMapTest() : EncoderTest(GET_PARAM(0)) {} virtual ~ActiveMapTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); cpu_used_ = GET_PARAM(2); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); } else if (video->frame() == 3) { vpx_active_map_t map = vpx_active_map_t(); /* clang-format off */ uint8_t active_map[9 * 13] = { 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, }; /* clang-format on */ map.cols = (kWidth + 15) / 16; map.rows = (kHeight + 15) / 16; ASSERT_EQ(map.cols, 13u); ASSERT_EQ(map.rows, 9u); map.active_map = active_map; encoder->Control(VP8E_SET_ACTIVEMAP, &map); } else if (video->frame() == 15) { vpx_active_map_t map = vpx_active_map_t(); map.cols = (kWidth + 15) / 16; map.rows = (kHeight + 15) / 16; map.active_map = NULL; encoder->Control(VP8E_SET_ACTIVEMAP, &map); } } int cpu_used_; }; TEST_P(ActiveMapTest, Test) { // Validate that this non multiple of 64 wide clip encodes cfg_.g_lag_in_frames = 0; cfg_.rc_target_bitrate = 400; cfg_.rc_resize_allowed = 0; cfg_.g_pass = VPX_RC_ONE_PASS; cfg_.rc_end_usage = VPX_CBR; cfg_.kf_max_dist = 90000; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", kWidth, kHeight, 30, 1, 0, 20); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(ActiveMapTest, ::testing::Values(::libvpx_test::kRealTime), ::testing::Range(0, 9)); } // namespace libvpx-1.8.2/test/add_noise_test.cc000066400000000000000000000114611357355204000173070ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/postproc.h" #include "vpx_mem/vpx_mem.h" namespace { static const int kNoiseSize = 3072; // TODO(jimbankoski): make width and height integers not unsigned. typedef void (*AddNoiseFunc)(uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch); typedef std::tuple AddNoiseTestFPParam; class AddNoiseTest : public ::testing::Test, public ::testing::WithParamInterface { public: virtual void TearDown() { libvpx_test::ClearSystemState(); } virtual ~AddNoiseTest() {} }; double stddev6(char a, char b, char c, char d, char e, char f) { const double n = (a + b + c + d + e + f) / 6.0; const double v = ((a - n) * (a - n) + (b - n) * (b - n) + (c - n) * (c - n) + (d - n) * (d - n) + (e - n) * (e - n) + (f - n) * (f - n)) / 6.0; return sqrt(v); } TEST_P(AddNoiseTest, CheckNoiseAdded) { const int width = 64; const int height = 64; const int image_size = width * height; int8_t noise[kNoiseSize]; const int clamp = vpx_setup_noise(GET_PARAM(0), noise, kNoiseSize); uint8_t *const s = reinterpret_cast(vpx_calloc(image_size, sizeof(*s))); ASSERT_TRUE(s != NULL); memset(s, 99, image_size * sizeof(*s)); ASM_REGISTER_STATE_CHECK( GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure we don't end up having either the same or no added // noise either vertically or horizontally. for (int i = 0; i < image_size - 6 * width - 6; ++i) { const double hd = stddev6(s[i] - 99, s[i + 1] - 99, s[i + 2] - 99, s[i + 3] - 99, s[i + 4] - 99, s[i + 5] - 99); const double vd = stddev6(s[i] - 99, s[i + width] - 99, s[i + 2 * width] - 99, s[i + 3 * width] - 99, s[i + 4 * width] - 99, s[i + 5 * width] - 99); EXPECT_NE(hd, 0); EXPECT_NE(vd, 0); } // Initialize pixels in the image to 255 and check for roll over. memset(s, 255, image_size); ASM_REGISTER_STATE_CHECK( GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure don't roll over. for (int i = 0; i < image_size; ++i) { EXPECT_GT(static_cast(s[i]), clamp) << "i = " << i; } // Initialize pixels in the image to 0 and check for roll under. memset(s, 0, image_size); ASM_REGISTER_STATE_CHECK( GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); // Check to make sure don't roll under. for (int i = 0; i < image_size; ++i) { EXPECT_LT(static_cast(s[i]), 255 - clamp) << "i = " << i; } vpx_free(s); } TEST_P(AddNoiseTest, CheckCvsAssembly) { const int width = 64; const int height = 64; const int image_size = width * height; int8_t noise[kNoiseSize]; const int clamp = vpx_setup_noise(4.4, noise, kNoiseSize); uint8_t *const s = reinterpret_cast(vpx_calloc(image_size, 1)); uint8_t *const d = reinterpret_cast(vpx_calloc(image_size, 1)); ASSERT_TRUE(s != NULL); ASSERT_TRUE(d != NULL); memset(s, 99, image_size); memset(d, 99, image_size); srand(0); ASM_REGISTER_STATE_CHECK( GET_PARAM(1)(s, noise, clamp, clamp, width, height, width)); srand(0); ASM_REGISTER_STATE_CHECK( vpx_plane_add_noise_c(d, noise, clamp, clamp, width, height, width)); for (int i = 0; i < image_size; ++i) { EXPECT_EQ(static_cast(s[i]), static_cast(d[i])) << "i = " << i; } vpx_free(d); vpx_free(s); } using std::make_tuple; INSTANTIATE_TEST_CASE_P( C, AddNoiseTest, ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_c), make_tuple(4.4, vpx_plane_add_noise_c))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, AddNoiseTest, ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_sse2), make_tuple(4.4, vpx_plane_add_noise_sse2))); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, AddNoiseTest, ::testing::Values(make_tuple(3.25, vpx_plane_add_noise_msa), make_tuple(4.4, vpx_plane_add_noise_msa))); #endif } // namespace libvpx-1.8.2/test/alt_ref_aq_segment_test.cc000066400000000000000000000114241357355204000212000ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class AltRefAqSegmentTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: AltRefAqSegmentTest() : EncoderTest(GET_PARAM(0)) {} virtual ~AltRefAqSegmentTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); set_cpu_used_ = GET_PARAM(2); aq_mode_ = 0; alt_ref_aq_mode_ = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_ALT_REF_AQ, alt_ref_aq_mode_); encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100); } } int set_cpu_used_; int aq_mode_; int alt_ref_aq_mode_; }; // Validate that this ALT_REF_AQ/AQ segmentation mode // (ALT_REF_AQ=0, AQ=0/no_aq) // encodes and decodes without a mismatch. TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ0) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 0; alt_ref_aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this ALT_REF_AQ/AQ segmentation mode // (ALT_REF_AQ=0, AQ=1/variance_aq) // encodes and decodes without a mismatch. TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ1) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 1; alt_ref_aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this ALT_REF_AQ/AQ segmentation mode // (ALT_REF_AQ=0, AQ=2/complexity_aq) // encodes and decodes without a mismatch. TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ2) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 2; alt_ref_aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this ALT_REF_AQ/AQ segmentation mode // (ALT_REF_AQ=0, AQ=3/cyclicrefresh_aq) // encodes and decodes without a mismatch. TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ3) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 3; alt_ref_aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this ALT_REF_AQ/AQ segmentation mode // (ALT_REF_AQ=0, AQ=4/equator360_aq) // encodes and decodes without a mismatch. TEST_P(AltRefAqSegmentTest, TestNoMisMatchAltRefAQ4) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_VBR; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 4; alt_ref_aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(AltRefAqSegmentTest, ::testing::Values(::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood), ::testing::Range(2, 5)); } // namespace libvpx-1.8.2/test/altref_test.cc000066400000000000000000000117121357355204000166360ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { #if CONFIG_VP8_ENCODER // lookahead range: [kLookAheadMin, kLookAheadMax). const int kLookAheadMin = 5; const int kLookAheadMax = 26; class AltRefTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: AltRefTest() : EncoderTest(GET_PARAM(0)), altref_count_(0) {} virtual ~AltRefTest() {} virtual void SetUp() { InitializeConfig(); SetMode(libvpx_test::kTwoPassGood); } virtual void BeginPassHook(unsigned int /*pass*/) { altref_count_ = 0; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_CPUUSED, 3); } } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.frame.flags & VPX_FRAME_IS_INVISIBLE) ++altref_count_; } int altref_count() const { return altref_count_; } private: int altref_count_; }; TEST_P(AltRefTest, MonotonicTimestamps) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 1000; cfg_.g_lag_in_frames = GET_PARAM(1); libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_GE(altref_count(), 1); } VP8_INSTANTIATE_TEST_CASE(AltRefTest, ::testing::Range(kLookAheadMin, kLookAheadMax)); #endif // CONFIG_VP8_ENCODER class AltRefForcedKeyTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: AltRefForcedKeyTestLarge() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), cpu_used_(GET_PARAM(2)), forced_kf_frame_num_(1), frame_num_(0) {} virtual ~AltRefForcedKeyTestLarge() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); cfg_.rc_end_usage = VPX_VBR; cfg_.g_threads = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); #if CONFIG_VP9_ENCODER // override test default for tile columns if necessary. if (GET_PARAM(0) == &libvpx_test::kVP9) { encoder->Control(VP9E_SET_TILE_COLUMNS, 6); } #endif } frame_flags_ = (video->frame() == forced_kf_frame_num_) ? VPX_EFLAG_FORCE_KF : 0; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { if (frame_num_ == forced_kf_frame_num_) { ASSERT_TRUE(!!(pkt->data.frame.flags & VPX_FRAME_IS_KEY)) << "Frame #" << frame_num_ << " isn't a keyframe!"; } ++frame_num_; } ::libvpx_test::TestMode encoding_mode_; int cpu_used_; unsigned int forced_kf_frame_num_; unsigned int frame_num_; }; TEST_P(AltRefForcedKeyTestLarge, Frame1IsKey) { const vpx_rational timebase = { 1, 30 }; const int lag_values[] = { 3, 15, 25, -1 }; forced_kf_frame_num_ = 1; for (int i = 0; lag_values[i] != -1; ++i) { frame_num_ = 0; cfg_.g_lag_in_frames = lag_values[i]; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } } TEST_P(AltRefForcedKeyTestLarge, ForcedFrameIsKey) { const vpx_rational timebase = { 1, 30 }; const int lag_values[] = { 3, 15, 25, -1 }; for (int i = 0; lag_values[i] != -1; ++i) { frame_num_ = 0; forced_kf_frame_num_ = lag_values[i] - 1; cfg_.g_lag_in_frames = lag_values[i]; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } } VP8_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge, ::testing::Values(::libvpx_test::kOnePassGood), ::testing::Range(0, 9)); VP9_INSTANTIATE_TEST_CASE(AltRefForcedKeyTestLarge, ::testing::Values(::libvpx_test::kOnePassGood), ::testing::Range(0, 9)); } // namespace libvpx-1.8.2/test/android/000077500000000000000000000000001357355204000154315ustar00rootroot00000000000000libvpx-1.8.2/test/android/Android.mk000066400000000000000000000036321357355204000173460ustar00rootroot00000000000000# Copyright (c) 2013 The WebM project authors. All Rights Reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. # # This make file builds vpx_test app for android. # The test app itself runs on the command line through adb shell # The paths are really messed up as the libvpx make file # expects to be made from a parent directory. CUR_WD := $(call my-dir) BINDINGS_DIR := $(CUR_WD)/../../.. LOCAL_PATH := $(CUR_WD)/../../.. #libwebm include $(CLEAR_VARS) include $(BINDINGS_DIR)/libvpx/third_party/libwebm/Android.mk LOCAL_PATH := $(CUR_WD)/../../.. #libvpx include $(CLEAR_VARS) LOCAL_STATIC_LIBRARIES := libwebm include $(BINDINGS_DIR)/libvpx/build/make/Android.mk LOCAL_PATH := $(CUR_WD)/../.. #libgtest include $(CLEAR_VARS) LOCAL_ARM_MODE := arm LOCAL_CPP_EXTENSION := .cc LOCAL_MODULE := gtest LOCAL_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/ LOCAL_C_INCLUDES += $(LOCAL_PATH)/third_party/googletest/src/include/ LOCAL_EXPORT_C_INCLUDES := $(LOCAL_PATH)/third_party/googletest/src/include/ LOCAL_SRC_FILES := ./third_party/googletest/src/src/gtest-all.cc include $(BUILD_STATIC_LIBRARY) #libvpx_test include $(CLEAR_VARS) LOCAL_ARM_MODE := arm LOCAL_MODULE := libvpx_test LOCAL_STATIC_LIBRARIES := gtest libwebm ifeq ($(ENABLE_SHARED),1) LOCAL_SHARED_LIBRARIES := vpx else LOCAL_STATIC_LIBRARIES += vpx endif include $(LOCAL_PATH)/test/test.mk LOCAL_C_INCLUDES := $(BINDINGS_DIR) FILTERED_SRC := $(sort $(filter %.cc %.c, $(LIBVPX_TEST_SRCS-yes))) LOCAL_SRC_FILES := $(addprefix ./test/, $(FILTERED_SRC)) # some test files depend on *_rtcd.h, ensure they're generated first. $(eval $(call rtcd_dep_template)) include $(BUILD_EXECUTABLE) libvpx-1.8.2/test/android/README000066400000000000000000000023111357355204000163060ustar00rootroot00000000000000Android.mk will build vpx unittests on android. 1) Configure libvpx from the parent directory: ./libvpx/configure --target=armv7-android-gcc --enable-external-build \ --enable-postproc --disable-install-srcs --enable-multi-res-encoding \ --enable-temporal-denoising --disable-unit-tests --disable-install-docs \ --disable-examples --disable-runtime-cpu-detect 2) From the parent directory, invoke ndk-build: NDK_PROJECT_PATH=. ndk-build APP_BUILD_SCRIPT=./libvpx/test/android/Android.mk \ APP_ABI=armeabi-v7a APP_PLATFORM=android-18 APP_OPTIM=release \ APP_STL=c++_static Note: Both adb and ndk-build are available at: https://developer.android.com/studio#downloads https://developer.android.com/ndk/downloads 3) Run get_files.py to download the test files: python get_files.py -i /path/to/test-data.sha1 -o /path/to/put/files \ -u http://downloads.webmproject.org/test_data/libvpx 4) Transfer files to device using adb. Ensure you have proper permissions for the target adb push /path/to/test_files /data/local/tmp adb push /path/to/built_libs /data/local/tmp NOTE: Built_libs defaults to parent_dir/libs/armeabi-v7a 5) Run tests: adb shell (on device) cd /data/local/tmp LD_LIBRARY_PATH=. ./vpx_test libvpx-1.8.2/test/android/get_files.py000066400000000000000000000062231357355204000177470ustar00rootroot00000000000000# Copyright (c) 2013 The WebM project authors. All Rights Reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. # # This simple script pulls test files from the webm homepage # It is intelligent enough to only pull files if # 1) File / test_data folder does not exist # 2) SHA mismatch import pycurl import csv import hashlib import re import os.path import time import itertools import sys import getopt #globals url = '' file_list_path = '' local_resource_path = '' # Helper functions: # A simple function which returns the sha hash of a file in hex def get_file_sha(filename): try: sha_hash = hashlib.sha1() with open(filename, 'rb') as file: buf = file.read(HASH_CHUNK) while len(buf) > 0: sha_hash.update(buf) buf = file.read(HASH_CHUNK) return sha_hash.hexdigest() except IOError: print "Error reading " + filename # Downloads a file from a url, and then checks the sha against the passed # in sha def download_and_check_sha(url, filename, sha): path = os.path.join(local_resource_path, filename) fp = open(path, "wb") curl = pycurl.Curl() curl.setopt(pycurl.URL, url + "/" + filename) curl.setopt(pycurl.WRITEDATA, fp) curl.perform() curl.close() fp.close() return get_file_sha(path) == sha #constants ftp_retries = 3 SHA_COL = 0 NAME_COL = 1 EXPECTED_COL = 2 HASH_CHUNK = 65536 # Main script try: opts, args = \ getopt.getopt(sys.argv[1:], \ "u:i:o:", ["url=", "input_csv=", "output_dir="]) except: print 'get_files.py -u -i -o ' sys.exit(2) for opt, arg in opts: if opt == '-u': url = arg elif opt in ("-i", "--input_csv"): file_list_path = os.path.join(arg) elif opt in ("-o", "--output_dir"): local_resource_path = os.path.join(arg) if len(sys.argv) != 7: print "Expects two paths and a url!" exit(1) if not os.path.isdir(local_resource_path): os.makedirs(local_resource_path) file_list_csv = open(file_list_path, "rb") # Our 'csv' file uses multiple spaces as a delimiter, python's # csv class only uses single character delimiters, so we convert them below file_list_reader = csv.reader((re.sub(' +', ' ', line) \ for line in file_list_csv), delimiter = ' ') file_shas = [] file_names = [] for row in file_list_reader: if len(row) != EXPECTED_COL: continue file_shas.append(row[SHA_COL]) file_names.append(row[NAME_COL]) file_list_csv.close() # Download files, only if they don't already exist and have correct shas for filename, sha in itertools.izip(file_names, file_shas): path = os.path.join(local_resource_path, filename) if os.path.isfile(path) \ and get_file_sha(path) == sha: print path + ' exists, skipping' continue for retry in range(0, ftp_retries): print "Downloading " + path if not download_and_check_sha(url, filename, sha): print "Sha does not match, retrying..." else: break libvpx-1.8.2/test/android/scrape_gtest_log.py000066400000000000000000000030331357355204000213260ustar00rootroot00000000000000# Copyright (c) 2014 The WebM project authors. All Rights Reserved. # # Use of this source code is governed by a BSD-style license # that can be found in the LICENSE file in the root of the source # tree. An additional intellectual property rights grant can be found # in the file PATENTS. All contributing project authors may # be found in the AUTHORS file in the root of the source tree. """Standalone script which parses a gtest log for json. Json is returned returns as an array. This script is used by the libvpx waterfall to gather json results mixed in with gtest logs. This is dubious software engineering. """ import getopt import json import os import re import sys def main(): if len(sys.argv) != 3: print "Expects a file to write json to!" exit(1) try: opts, _ = \ getopt.getopt(sys.argv[1:], \ 'o:', ['output-json=']) except getopt.GetOptError: print 'scrape_gtest_log.py -o ' sys.exit(2) output_json = '' for opt, arg in opts: if opt in ('-o', '--output-json'): output_json = os.path.join(arg) blob = sys.stdin.read() json_string = '[' + ','.join('{' + x + '}' for x in re.findall(r'{([^}]*.?)}', blob)) + ']' print blob output = json.dumps(json.loads(json_string), indent=4, sort_keys=True) print output path = os.path.dirname(output_json) if path and not os.path.exists(path): os.makedirs(path) outfile = open(output_json, 'w') outfile.write(output) if __name__ == '__main__': sys.exit(main()) libvpx-1.8.2/test/aq_segment_test.cc000066400000000000000000000065041357355204000175070ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class AqSegmentTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: AqSegmentTest() : EncoderTest(GET_PARAM(0)) {} virtual ~AqSegmentTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); set_cpu_used_ = GET_PARAM(2); aq_mode_ = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 100); } } int set_cpu_used_; int aq_mode_; }; // Validate that this AQ segmentation mode (AQ=1, variance_ap) // encodes and decodes without a mismatch. TEST_P(AqSegmentTest, TestNoMisMatchAQ1) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this AQ segmentation mode (AQ=2, complexity_aq) // encodes and decodes without a mismatch. TEST_P(AqSegmentTest, TestNoMisMatchAQ2) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 2; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } // Validate that this AQ segmentation mode (AQ=3, cyclic_refresh_aq) // encodes and decodes without a mismatch. TEST_P(AqSegmentTest, TestNoMisMatchAQ3) { cfg_.rc_min_quantizer = 8; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_target_bitrate = 300; aq_mode_ = 3; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 100); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(AqSegmentTest, ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood), ::testing::Range(3, 9)); } // namespace libvpx-1.8.2/test/avg_test.cc000066400000000000000000000550171357355204000161440ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_codec.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; namespace { template class AverageTestBase : public ::testing::Test { public: AverageTestBase(int width, int height) : width_(width), height_(height), source_data_(NULL), source_stride_(0), bit_depth_(8) {} virtual void TearDown() { vpx_free(source_data_); source_data_ = NULL; libvpx_test::ClearSystemState(); } protected: // Handle blocks up to 4 blocks 64x64 with stride up to 128 static const int kDataAlignment = 16; static const int kDataBlockSize = 64 * 128; virtual void SetUp() { source_data_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); ASSERT_TRUE(source_data_ != NULL); source_stride_ = (width_ + 31) & ~31; bit_depth_ = 8; rnd_.Reset(ACMRandom::DeterministicSeed()); } // Sum Pixels static unsigned int ReferenceAverage8x8(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 8; ++h) { for (int w = 0; w < 8; ++w) average += source[h * pitch + w]; } return ((average + 32) >> 6); } static unsigned int ReferenceAverage4x4(const Pixel *source, int pitch) { unsigned int average = 0; for (int h = 0; h < 4; ++h) { for (int w = 0; w < 4; ++w) average += source[h * pitch + w]; } return ((average + 8) >> 4); } void FillConstant(Pixel fill_constant) { for (int i = 0; i < width_ * height_; ++i) { source_data_[i] = fill_constant; } } void FillRandom() { for (int i = 0; i < width_ * height_; ++i) { source_data_[i] = rnd_.Rand16() & ((1 << bit_depth_) - 1); } } int width_, height_; Pixel *source_data_; int source_stride_; int bit_depth_; ACMRandom rnd_; }; typedef unsigned int (*AverageFunction)(const uint8_t *s, int pitch); typedef std::tuple AvgFunc; class AverageTest : public AverageTestBase, public ::testing::WithParamInterface { public: AverageTest() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: void CheckAverages() { const int block_size = GET_PARAM(3); unsigned int expected = 0; if (block_size == 8) { expected = ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_); } else if (block_size == 4) { expected = ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_); } ASM_REGISTER_STATE_CHECK( GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_)); unsigned int actual = GET_PARAM(4)(source_data_ + GET_PARAM(2), source_stride_); EXPECT_EQ(expected, actual); } }; #if CONFIG_VP9_HIGHBITDEPTH class AverageTestHBD : public AverageTestBase, public ::testing::WithParamInterface { public: AverageTestHBD() : AverageTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: void CheckAverages() { const int block_size = GET_PARAM(3); unsigned int expected = 0; if (block_size == 8) { expected = ReferenceAverage8x8(source_data_ + GET_PARAM(2), source_stride_); } else if (block_size == 4) { expected = ReferenceAverage4x4(source_data_ + GET_PARAM(2), source_stride_); } ASM_REGISTER_STATE_CHECK(GET_PARAM(4)( CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_)); unsigned int actual = GET_PARAM(4)( CONVERT_TO_BYTEPTR(source_data_ + GET_PARAM(2)), source_stride_); EXPECT_EQ(expected, actual); } }; #endif // CONFIG_VP9_HIGHBITDEPTH typedef void (*IntProRowFunc)(int16_t hbuf[16], uint8_t const *ref, const int ref_stride, const int height); typedef std::tuple IntProRowParam; class IntProRowTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProRowTest() : AverageTestBase(16, GET_PARAM(0)), hbuf_asm_(NULL), hbuf_c_(NULL) { asm_func_ = GET_PARAM(1); c_func_ = GET_PARAM(2); } protected: virtual void SetUp() { source_data_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(source_data_[0]))); ASSERT_TRUE(source_data_ != NULL); hbuf_asm_ = reinterpret_cast( vpx_memalign(kDataAlignment, sizeof(*hbuf_asm_) * 16)); hbuf_c_ = reinterpret_cast( vpx_memalign(kDataAlignment, sizeof(*hbuf_c_) * 16)); } virtual void TearDown() { vpx_free(source_data_); source_data_ = NULL; vpx_free(hbuf_c_); hbuf_c_ = NULL; vpx_free(hbuf_asm_); hbuf_asm_ = NULL; } void RunComparison() { ASM_REGISTER_STATE_CHECK(c_func_(hbuf_c_, source_data_, 0, height_)); ASM_REGISTER_STATE_CHECK(asm_func_(hbuf_asm_, source_data_, 0, height_)); EXPECT_EQ(0, memcmp(hbuf_c_, hbuf_asm_, sizeof(*hbuf_c_) * 16)) << "Output mismatch"; } private: IntProRowFunc asm_func_; IntProRowFunc c_func_; int16_t *hbuf_asm_; int16_t *hbuf_c_; }; typedef int16_t (*IntProColFunc)(uint8_t const *ref, const int width); typedef std::tuple IntProColParam; class IntProColTest : public AverageTestBase, public ::testing::WithParamInterface { public: IntProColTest() : AverageTestBase(GET_PARAM(0), 1), sum_asm_(0), sum_c_(0) { asm_func_ = GET_PARAM(1); c_func_ = GET_PARAM(2); } protected: void RunComparison() { ASM_REGISTER_STATE_CHECK(sum_c_ = c_func_(source_data_, width_)); ASM_REGISTER_STATE_CHECK(sum_asm_ = asm_func_(source_data_, width_)); EXPECT_EQ(sum_c_, sum_asm_) << "Output mismatch"; } private: IntProColFunc asm_func_; IntProColFunc c_func_; int16_t sum_asm_; int16_t sum_c_; }; typedef int (*SatdFunc)(const tran_low_t *coeffs, int length); typedef std::tuple SatdTestParam; class SatdTest : public ::testing::Test, public ::testing::WithParamInterface { protected: virtual void SetUp() { satd_size_ = GET_PARAM(0); satd_func_ = GET_PARAM(1); rnd_.Reset(ACMRandom::DeterministicSeed()); src_ = reinterpret_cast( vpx_memalign(16, sizeof(*src_) * satd_size_)); ASSERT_TRUE(src_ != NULL); } virtual void TearDown() { libvpx_test::ClearSystemState(); vpx_free(src_); } void FillConstant(const tran_low_t val) { for (int i = 0; i < satd_size_; ++i) src_[i] = val; } virtual void FillRandom() = 0; void Check(const int expected) { int total; ASM_REGISTER_STATE_CHECK(total = satd_func_(src_, satd_size_)); EXPECT_EQ(expected, total); } tran_low_t *GetCoeff() const { return src_; } int satd_size_; ACMRandom rnd_; tran_low_t *src_; private: SatdFunc satd_func_; }; class SatdLowbdTest : public SatdTest { protected: virtual void FillRandom() { for (int i = 0; i < satd_size_; ++i) { const int16_t tmp = rnd_.Rand16Signed(); src_[i] = (tran_low_t)tmp; } } }; typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size); typedef std::tuple BlockErrorTestFPParam; class BlockErrorTestFP : public ::testing::Test, public ::testing::WithParamInterface { protected: virtual void SetUp() { txfm_size_ = GET_PARAM(0); block_error_func_ = GET_PARAM(1); rnd_.Reset(ACMRandom::DeterministicSeed()); coeff_ = reinterpret_cast( vpx_memalign(16, sizeof(*coeff_) * txfm_size_)); dqcoeff_ = reinterpret_cast( vpx_memalign(16, sizeof(*dqcoeff_) * txfm_size_)); ASSERT_TRUE(coeff_ != NULL); ASSERT_TRUE(dqcoeff_ != NULL); } virtual void TearDown() { libvpx_test::ClearSystemState(); vpx_free(coeff_); vpx_free(dqcoeff_); } void FillConstant(const tran_low_t coeff_val, const tran_low_t dqcoeff_val) { for (int i = 0; i < txfm_size_; ++i) coeff_[i] = coeff_val; for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = dqcoeff_val; } void FillRandom() { // Just two fixed seeds rnd_.Reset(0xb0b9); for (int i = 0; i < txfm_size_; ++i) coeff_[i] = rnd_.Rand16() >> 1; rnd_.Reset(0xb0c8); for (int i = 0; i < txfm_size_; ++i) dqcoeff_[i] = rnd_.Rand16() >> 1; } void Check(const int64_t expected) { int64_t total; ASM_REGISTER_STATE_CHECK( total = block_error_func_(coeff_, dqcoeff_, txfm_size_)); EXPECT_EQ(expected, total); } tran_low_t *GetCoeff() const { return coeff_; } tran_low_t *GetDQCoeff() const { return dqcoeff_; } int txfm_size_; private: tran_low_t *coeff_; tran_low_t *dqcoeff_; BlockErrorFunc block_error_func_; ACMRandom rnd_; }; TEST_P(AverageTest, MinValue) { FillConstant(0); CheckAverages(); } TEST_P(AverageTest, MaxValue) { FillConstant(255); CheckAverages(); } TEST_P(AverageTest, Random) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. for (int i = 0; i < 1000; i++) { FillRandom(); CheckAverages(); } } #if CONFIG_VP9_HIGHBITDEPTH TEST_P(AverageTestHBD, MinValue) { FillConstant(0); CheckAverages(); } TEST_P(AverageTestHBD, MaxValue) { FillConstant((1 << VPX_BITS_12) - 1); CheckAverages(); } TEST_P(AverageTestHBD, Random) { bit_depth_ = VPX_BITS_12; // The reference frame, but not the source frame, may be unaligned for // certain types of searches. for (int i = 0; i < 1000; i++) { FillRandom(); CheckAverages(); } } #endif // CONFIG_VP9_HIGHBITDEPTH TEST_P(IntProRowTest, MinValue) { FillConstant(0); RunComparison(); } TEST_P(IntProRowTest, MaxValue) { FillConstant(255); RunComparison(); } TEST_P(IntProRowTest, Random) { FillRandom(); RunComparison(); } TEST_P(IntProColTest, MinValue) { FillConstant(0); RunComparison(); } TEST_P(IntProColTest, MaxValue) { FillConstant(255); RunComparison(); } TEST_P(IntProColTest, Random) { FillRandom(); RunComparison(); } TEST_P(SatdLowbdTest, MinValue) { const int kMin = -32640; const int expected = -kMin * satd_size_; FillConstant(kMin); Check(expected); } TEST_P(SatdLowbdTest, MaxValue) { const int kMax = 32640; const int expected = kMax * satd_size_; FillConstant(kMax); Check(expected); } TEST_P(SatdLowbdTest, Random) { int expected; switch (satd_size_) { case 16: expected = 261036; break; case 64: expected = 991732; break; case 256: expected = 4136358; break; case 1024: expected = 16677592; break; default: FAIL() << "Invalid satd size (" << satd_size_ << ") valid: 16/64/256/1024"; } FillRandom(); Check(expected); } TEST_P(SatdLowbdTest, DISABLED_Speed) { const int kCountSpeedTestBlock = 20000; vpx_usec_timer timer; const int blocksize = GET_PARAM(0); FillRandom(); tran_low_t *coeff = GetCoeff(); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { GET_PARAM(1)(coeff, blocksize); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); } #if CONFIG_VP9_HIGHBITDEPTH class SatdHighbdTest : public SatdTest { protected: virtual void FillRandom() { for (int i = 0; i < satd_size_; ++i) { src_[i] = rnd_.Rand20Signed(); } } }; TEST_P(SatdHighbdTest, MinValue) { const int kMin = -524280; const int expected = -kMin * satd_size_; FillConstant(kMin); Check(expected); } TEST_P(SatdHighbdTest, MaxValue) { const int kMax = 524280; const int expected = kMax * satd_size_; FillConstant(kMax); Check(expected); } TEST_P(SatdHighbdTest, Random) { int expected; switch (satd_size_) { case 16: expected = 5249712; break; case 64: expected = 18362120; break; case 256: expected = 66100520; break; case 1024: expected = 266094734; break; default: FAIL() << "Invalid satd size (" << satd_size_ << ") valid: 16/64/256/1024"; } FillRandom(); Check(expected); } TEST_P(SatdHighbdTest, DISABLED_Speed) { const int kCountSpeedTestBlock = 20000; vpx_usec_timer timer; const int blocksize = GET_PARAM(0); FillRandom(); tran_low_t *coeff = GetCoeff(); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { GET_PARAM(1)(coeff, blocksize); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); } #endif // CONFIG_VP9_HIGHBITDEPTH TEST_P(BlockErrorTestFP, MinValue) { const int64_t kMin = -32640; const int64_t expected = kMin * kMin * txfm_size_; FillConstant(kMin, 0); Check(expected); } TEST_P(BlockErrorTestFP, MaxValue) { const int64_t kMax = 32640; const int64_t expected = kMax * kMax * txfm_size_; FillConstant(kMax, 0); Check(expected); } TEST_P(BlockErrorTestFP, Random) { int64_t expected; switch (txfm_size_) { case 16: expected = 2051681432; break; case 64: expected = 11075114379; break; case 256: expected = 44386271116; break; case 1024: expected = 184774996089; break; default: FAIL() << "Invalid satd size (" << txfm_size_ << ") valid: 16/64/256/1024"; } FillRandom(); Check(expected); } TEST_P(BlockErrorTestFP, DISABLED_Speed) { const int kCountSpeedTestBlock = 20000; vpx_usec_timer timer; const int blocksize = GET_PARAM(0); FillRandom(); tran_low_t *coeff = GetCoeff(); tran_low_t *dqcoeff = GetDQCoeff(); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { GET_PARAM(1)(coeff, dqcoeff, blocksize); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("blocksize: %4d time: %4d us\n", blocksize, elapsed_time); } using std::make_tuple; INSTANTIATE_TEST_CASE_P( C, AverageTest, ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_avg_8x8_c), make_tuple(16, 16, 1, 4, &vpx_avg_4x4_c))); #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, AverageTestHBD, ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_c), make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_c))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, AverageTestHBD, ::testing::Values(make_tuple(16, 16, 1, 8, &vpx_highbd_avg_8x8_sse2), make_tuple(16, 16, 1, 4, &vpx_highbd_avg_4x4_sse2))); #endif // HAVE_SSE2 INSTANTIATE_TEST_CASE_P(C, SatdHighbdTest, ::testing::Values(make_tuple(16, &vpx_satd_c), make_tuple(64, &vpx_satd_c), make_tuple(256, &vpx_satd_c), make_tuple(1024, &vpx_satd_c))); #endif // CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(C, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_c), make_tuple(64, &vpx_satd_c), make_tuple(256, &vpx_satd_c), make_tuple(1024, &vpx_satd_c))); INSTANTIATE_TEST_CASE_P( C, BlockErrorTestFP, ::testing::Values(make_tuple(16, &vp9_block_error_fp_c), make_tuple(64, &vp9_block_error_fp_c), make_tuple(256, &vp9_block_error_fp_c), make_tuple(1024, &vp9_block_error_fp_c))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, AverageTest, ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_sse2), make_tuple(16, 16, 5, 8, &vpx_avg_8x8_sse2), make_tuple(32, 32, 15, 8, &vpx_avg_8x8_sse2), make_tuple(16, 16, 0, 4, &vpx_avg_4x4_sse2), make_tuple(16, 16, 5, 4, &vpx_avg_4x4_sse2), make_tuple(32, 32, 15, 4, &vpx_avg_4x4_sse2))); INSTANTIATE_TEST_CASE_P( SSE2, IntProRowTest, ::testing::Values(make_tuple(16, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c), make_tuple(32, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c), make_tuple(64, &vpx_int_pro_row_sse2, &vpx_int_pro_row_c))); INSTANTIATE_TEST_CASE_P( SSE2, IntProColTest, ::testing::Values(make_tuple(16, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c), make_tuple(32, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c), make_tuple(64, &vpx_int_pro_col_sse2, &vpx_int_pro_col_c))); INSTANTIATE_TEST_CASE_P(SSE2, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_sse2), make_tuple(64, &vpx_satd_sse2), make_tuple(256, &vpx_satd_sse2), make_tuple(1024, &vpx_satd_sse2))); INSTANTIATE_TEST_CASE_P( SSE2, BlockErrorTestFP, ::testing::Values(make_tuple(16, &vp9_block_error_fp_sse2), make_tuple(64, &vp9_block_error_fp_sse2), make_tuple(256, &vp9_block_error_fp_sse2), make_tuple(1024, &vp9_block_error_fp_sse2))); #endif // HAVE_SSE2 #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P(AVX2, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_avx2), make_tuple(64, &vpx_satd_avx2), make_tuple(256, &vpx_satd_avx2), make_tuple(1024, &vpx_satd_avx2))); #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( AVX2, SatdHighbdTest, ::testing::Values(make_tuple(16, &vpx_highbd_satd_avx2), make_tuple(64, &vpx_highbd_satd_avx2), make_tuple(256, &vpx_highbd_satd_avx2), make_tuple(1024, &vpx_highbd_satd_avx2))); #endif // CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( AVX2, BlockErrorTestFP, ::testing::Values(make_tuple(16, &vp9_block_error_fp_avx2), make_tuple(64, &vp9_block_error_fp_avx2), make_tuple(256, &vp9_block_error_fp_avx2), make_tuple(1024, &vp9_block_error_fp_avx2))); #endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, AverageTest, ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_neon), make_tuple(16, 16, 5, 8, &vpx_avg_8x8_neon), make_tuple(32, 32, 15, 8, &vpx_avg_8x8_neon), make_tuple(16, 16, 0, 4, &vpx_avg_4x4_neon), make_tuple(16, 16, 5, 4, &vpx_avg_4x4_neon), make_tuple(32, 32, 15, 4, &vpx_avg_4x4_neon))); INSTANTIATE_TEST_CASE_P( NEON, IntProRowTest, ::testing::Values(make_tuple(16, &vpx_int_pro_row_neon, &vpx_int_pro_row_c), make_tuple(32, &vpx_int_pro_row_neon, &vpx_int_pro_row_c), make_tuple(64, &vpx_int_pro_row_neon, &vpx_int_pro_row_c))); INSTANTIATE_TEST_CASE_P( NEON, IntProColTest, ::testing::Values(make_tuple(16, &vpx_int_pro_col_neon, &vpx_int_pro_col_c), make_tuple(32, &vpx_int_pro_col_neon, &vpx_int_pro_col_c), make_tuple(64, &vpx_int_pro_col_neon, &vpx_int_pro_col_c))); INSTANTIATE_TEST_CASE_P(NEON, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_neon), make_tuple(64, &vpx_satd_neon), make_tuple(256, &vpx_satd_neon), make_tuple(1024, &vpx_satd_neon))); // TODO(jianj): Remove the highbitdepth flag once the SIMD functions are // in place. #if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( NEON, BlockErrorTestFP, ::testing::Values(make_tuple(16, &vp9_block_error_fp_neon), make_tuple(64, &vp9_block_error_fp_neon), make_tuple(256, &vp9_block_error_fp_neon), make_tuple(1024, &vp9_block_error_fp_neon))); #endif // !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, AverageTest, ::testing::Values(make_tuple(16, 16, 0, 8, &vpx_avg_8x8_msa), make_tuple(16, 16, 5, 8, &vpx_avg_8x8_msa), make_tuple(32, 32, 15, 8, &vpx_avg_8x8_msa), make_tuple(16, 16, 0, 4, &vpx_avg_4x4_msa), make_tuple(16, 16, 5, 4, &vpx_avg_4x4_msa), make_tuple(32, 32, 15, 4, &vpx_avg_4x4_msa))); INSTANTIATE_TEST_CASE_P( MSA, IntProRowTest, ::testing::Values(make_tuple(16, &vpx_int_pro_row_msa, &vpx_int_pro_row_c), make_tuple(32, &vpx_int_pro_row_msa, &vpx_int_pro_row_c), make_tuple(64, &vpx_int_pro_row_msa, &vpx_int_pro_row_c))); INSTANTIATE_TEST_CASE_P( MSA, IntProColTest, ::testing::Values(make_tuple(16, &vpx_int_pro_col_msa, &vpx_int_pro_col_c), make_tuple(32, &vpx_int_pro_col_msa, &vpx_int_pro_col_c), make_tuple(64, &vpx_int_pro_col_msa, &vpx_int_pro_col_c))); // TODO(jingning): Remove the highbitdepth flag once the SIMD functions are // in place. #if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(MSA, SatdLowbdTest, ::testing::Values(make_tuple(16, &vpx_satd_msa), make_tuple(64, &vpx_satd_msa), make_tuple(256, &vpx_satd_msa), make_tuple(1024, &vpx_satd_msa))); #endif // !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_MSA } // namespace libvpx-1.8.2/test/bench.cc000066400000000000000000000022761357355204000154060ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "test/bench.h" #include "vpx_ports/vpx_timer.h" void AbstractBench::RunNTimes(int n) { for (int r = 0; r < VPX_BENCH_ROBUST_ITER; r++) { vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int j = 0; j < n; ++j) { Run(); } vpx_usec_timer_mark(&timer); times_[r] = static_cast(vpx_usec_timer_elapsed(&timer)); } } void AbstractBench::PrintMedian(const char *title) { std::sort(times_, times_ + VPX_BENCH_ROBUST_ITER); const int med = times_[VPX_BENCH_ROBUST_ITER >> 1]; int sad = 0; for (int t = 0; t < VPX_BENCH_ROBUST_ITER; t++) { sad += abs(times_[t] - med); } printf("[%10s] %s %.1f ms ( ±%.1f ms )\n", "BENCH ", title, med / 1000.0, sad / (VPX_BENCH_ROBUST_ITER * 1000.0)); } libvpx-1.8.2/test/bench.h000066400000000000000000000015031357355204000152400ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_BENCH_H_ #define VPX_TEST_BENCH_H_ // Number of iterations used to compute median run time. #define VPX_BENCH_ROBUST_ITER 15 class AbstractBench { public: void RunNTimes(int n); void PrintMedian(const char *title); protected: // Implement this method and put the code to benchmark in it. virtual void Run() = 0; private: int times_[VPX_BENCH_ROBUST_ITER]; }; #endif // VPX_TEST_BENCH_H_ libvpx-1.8.2/test/blockiness_test.cc000066400000000000000000000147411357355204000175220ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #if CONFIG_VP9_ENCODER #include "./vp9_rtcd.h" #endif #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx_mem/vpx_mem.h" #include "vp9/encoder/vp9_blockiness.h" using libvpx_test::ACMRandom; namespace { class BlockinessTestBase : public ::testing::Test { public: BlockinessTestBase(int width, int height) : width_(width), height_(height) {} static void SetUpTestCase() { source_data_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); reference_data_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); } static void TearDownTestCase() { vpx_free(source_data_); source_data_ = NULL; vpx_free(reference_data_); reference_data_ = NULL; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: // Handle frames up to 640x480 static const int kDataAlignment = 16; static const int kDataBufferSize = 640 * 480; virtual void SetUp() { source_stride_ = (width_ + 31) & ~31; reference_stride_ = width_ * 2; rnd_.Reset(ACMRandom::DeterministicSeed()); } void FillConstant(uint8_t *data, int stride, uint8_t fill_constant, int width, int height) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data[h * stride + w] = fill_constant; } } } void FillConstant(uint8_t *data, int stride, uint8_t fill_constant) { FillConstant(data, stride, fill_constant, width_, height_); } void FillRandom(uint8_t *data, int stride, int width, int height) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data[h * stride + w] = rnd_.Rand8(); } } } void FillRandom(uint8_t *data, int stride) { FillRandom(data, stride, width_, height_); } void FillRandomBlocky(uint8_t *data, int stride) { for (int h = 0; h < height_; h += 4) { for (int w = 0; w < width_; w += 4) { FillRandom(data + h * stride + w, stride, 4, 4); } } } void FillCheckerboard(uint8_t *data, int stride) { for (int h = 0; h < height_; h += 4) { for (int w = 0; w < width_; w += 4) { if (((h / 4) ^ (w / 4)) & 1) { FillConstant(data + h * stride + w, stride, 255, 4, 4); } else { FillConstant(data + h * stride + w, stride, 0, 4, 4); } } } } void Blur(uint8_t *data, int stride, int taps) { int sum = 0; int half_taps = taps / 2; for (int h = 0; h < height_; ++h) { for (int w = 0; w < taps; ++w) { sum += data[w + h * stride]; } for (int w = taps; w < width_; ++w) { sum += data[w + h * stride] - data[w - taps + h * stride]; data[w - half_taps + h * stride] = (sum + half_taps) / taps; } } for (int w = 0; w < width_; ++w) { for (int h = 0; h < taps; ++h) { sum += data[h + w * stride]; } for (int h = taps; h < height_; ++h) { sum += data[w + h * stride] - data[(h - taps) * stride + w]; data[(h - half_taps) * stride + w] = (sum + half_taps) / taps; } } } int width_, height_; static uint8_t *source_data_; int source_stride_; static uint8_t *reference_data_; int reference_stride_; ACMRandom rnd_; }; #if CONFIG_VP9_ENCODER typedef std::tuple BlockinessParam; class BlockinessVP9Test : public BlockinessTestBase, public ::testing::WithParamInterface { public: BlockinessVP9Test() : BlockinessTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: double GetBlockiness() const { return vp9_get_blockiness(source_data_, source_stride_, reference_data_, reference_stride_, width_, height_); } }; #endif // CONFIG_VP9_ENCODER uint8_t *BlockinessTestBase::source_data_ = NULL; uint8_t *BlockinessTestBase::reference_data_ = NULL; #if CONFIG_VP9_ENCODER TEST_P(BlockinessVP9Test, SourceBlockierThanReference) { // Source is blockier than reference. FillRandomBlocky(source_data_, source_stride_); FillConstant(reference_data_, reference_stride_, 128); const double super_blocky = GetBlockiness(); EXPECT_DOUBLE_EQ(0.0, super_blocky) << "Blocky source should produce 0 blockiness."; } TEST_P(BlockinessVP9Test, ReferenceBlockierThanSource) { // Source is blockier than reference. FillConstant(source_data_, source_stride_, 128); FillRandomBlocky(reference_data_, reference_stride_); const double super_blocky = GetBlockiness(); EXPECT_GT(super_blocky, 0.0) << "Blocky reference should score high for blockiness."; } TEST_P(BlockinessVP9Test, BlurringDecreasesBlockiness) { // Source is blockier than reference. FillConstant(source_data_, source_stride_, 128); FillRandomBlocky(reference_data_, reference_stride_); const double super_blocky = GetBlockiness(); Blur(reference_data_, reference_stride_, 4); const double less_blocky = GetBlockiness(); EXPECT_GT(super_blocky, less_blocky) << "A straight blur should decrease blockiness."; } TEST_P(BlockinessVP9Test, WorstCaseBlockiness) { // Source is blockier than reference. FillConstant(source_data_, source_stride_, 128); FillCheckerboard(reference_data_, reference_stride_); const double super_blocky = GetBlockiness(); Blur(reference_data_, reference_stride_, 4); const double less_blocky = GetBlockiness(); EXPECT_GT(super_blocky, less_blocky) << "A straight blur should decrease blockiness."; } #endif // CONFIG_VP9_ENCODER using std::make_tuple; //------------------------------------------------------------------------------ // C functions #if CONFIG_VP9_ENCODER const BlockinessParam c_vp9_tests[] = { make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238) }; INSTANTIATE_TEST_CASE_P(C, BlockinessVP9Test, ::testing::ValuesIn(c_vp9_tests)); #endif } // namespace libvpx-1.8.2/test/borders_test.cc000066400000000000000000000055251357355204000170260ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class BordersTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: BordersTest() : EncoderTest(GET_PARAM(0)) {} virtual ~BordersTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, 1); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { } } }; TEST_P(BordersTest, TestEncodeHighBitrate) { // Validate that this non multiple of 64 wide clip encodes and decodes // without a mismatch when passing in a very low max q. This pushes // the encoder to producing lots of big partitions which will likely // extend into the border and test the border condition. cfg_.g_lag_in_frames = 25; cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 2000; cfg_.rc_max_quantizer = 10; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 40); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(BordersTest, TestLowBitrate) { // Validate that this clip encodes and decodes without a mismatch // when passing in a very high min q. This pushes the encoder to producing // lots of small partitions which might will test the other condition. cfg_.g_lag_in_frames = 25; cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 200; cfg_.rc_min_quantizer = 40; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 40); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(BordersTest, ::testing::Values(::libvpx_test::kTwoPassGood)); } // namespace libvpx-1.8.2/test/buffer.h000066400000000000000000000257341357355204000154460ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_BUFFER_H_ #define VPX_TEST_BUFFER_H_ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" namespace libvpx_test { template class Buffer { public: Buffer(int width, int height, int top_padding, int left_padding, int right_padding, int bottom_padding) : width_(width), height_(height), top_padding_(top_padding), left_padding_(left_padding), right_padding_(right_padding), bottom_padding_(bottom_padding), alignment_(0), padding_value_(0), stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {} Buffer(int width, int height, int top_padding, int left_padding, int right_padding, int bottom_padding, unsigned int alignment) : width_(width), height_(height), top_padding_(top_padding), left_padding_(left_padding), right_padding_(right_padding), bottom_padding_(bottom_padding), alignment_(alignment), padding_value_(0), stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {} Buffer(int width, int height, int padding) : width_(width), height_(height), top_padding_(padding), left_padding_(padding), right_padding_(padding), bottom_padding_(padding), alignment_(0), padding_value_(0), stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {} Buffer(int width, int height, int padding, unsigned int alignment) : width_(width), height_(height), top_padding_(padding), left_padding_(padding), right_padding_(padding), bottom_padding_(padding), alignment_(alignment), padding_value_(0), stride_(0), raw_size_(0), num_elements_(0), raw_buffer_(NULL) {} ~Buffer() { if (alignment_) { vpx_free(raw_buffer_); } else { delete[] raw_buffer_; } } T *TopLeftPixel() const; int stride() const { return stride_; } // Set the buffer (excluding padding) to 'value'. void Set(const T value); // Set the buffer (excluding padding) to the output of ACMRandom function // 'rand_func'. void Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)()); // Set the buffer (excluding padding) to the output of ACMRandom function // 'RandRange' with range 'low' to 'high' which typically must be within // testing::internal::Random::kMaxRange (1u << 31). However, because we want // to allow negative low (and high) values, it is restricted to INT32_MAX // here. void Set(ACMRandom *rand_class, const T low, const T high); // Copy the contents of Buffer 'a' (excluding padding). void CopyFrom(const Buffer &a); void DumpBuffer() const; // Highlight the differences between two buffers if they are the same size. void PrintDifference(const Buffer &a) const; bool HasPadding() const; // Sets all the values in the buffer to 'padding_value'. void SetPadding(const T padding_value); // Checks if all the values (excluding padding) are equal to 'value' if the // Buffers are the same size. bool CheckValues(const T value) const; // Check that padding matches the expected value or there is no padding. bool CheckPadding() const; // Compare the non-padding portion of two buffers if they are the same size. bool CheckValues(const Buffer &a) const; bool Init() { if (raw_buffer_ != NULL) return false; EXPECT_GT(width_, 0); EXPECT_GT(height_, 0); EXPECT_GE(top_padding_, 0); EXPECT_GE(left_padding_, 0); EXPECT_GE(right_padding_, 0); EXPECT_GE(bottom_padding_, 0); stride_ = left_padding_ + width_ + right_padding_; num_elements_ = stride_ * (top_padding_ + height_ + bottom_padding_); raw_size_ = num_elements_ * sizeof(T); if (alignment_) { EXPECT_GE(alignment_, sizeof(T)); // Ensure alignment of the first value will be preserved. EXPECT_EQ((left_padding_ * sizeof(T)) % alignment_, 0u); // Ensure alignment of the subsequent rows will be preserved when there is // a stride. if (stride_ != width_) { EXPECT_EQ((stride_ * sizeof(T)) % alignment_, 0u); } raw_buffer_ = reinterpret_cast(vpx_memalign(alignment_, raw_size_)); } else { raw_buffer_ = new (std::nothrow) T[num_elements_]; } EXPECT_TRUE(raw_buffer_ != NULL); SetPadding(std::numeric_limits::max()); return !::testing::Test::HasFailure(); } private: bool BufferSizesMatch(const Buffer &a) const; const int width_; const int height_; const int top_padding_; const int left_padding_; const int right_padding_; const int bottom_padding_; const unsigned int alignment_; T padding_value_; int stride_; int raw_size_; int num_elements_; T *raw_buffer_; }; template T *Buffer::TopLeftPixel() const { if (!raw_buffer_) return NULL; return raw_buffer_ + (top_padding_ * stride_) + left_padding_; } template void Buffer::Set(const T value) { if (!raw_buffer_) return; T *src = TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { src[width] = value; } src += stride_; } } template void Buffer::Set(ACMRandom *rand_class, T (ACMRandom::*rand_func)()) { if (!raw_buffer_) return; T *src = TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { src[width] = (*rand_class.*rand_func)(); } src += stride_; } } template void Buffer::Set(ACMRandom *rand_class, const T low, const T high) { if (!raw_buffer_) return; EXPECT_LE(low, high); EXPECT_LE(static_cast(high) - low, std::numeric_limits::max()); T *src = TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { // 'low' will be promoted to unsigned given the return type of RandRange. // Store the value as an int to avoid unsigned overflow warnings when // 'low' is negative. const int32_t value = static_cast((*rand_class).RandRange(high - low)); src[width] = static_cast(value + low); } src += stride_; } } template void Buffer::CopyFrom(const Buffer &a) { if (!raw_buffer_) return; if (!BufferSizesMatch(a)) return; T *a_src = a.TopLeftPixel(); T *b_src = this->TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { b_src[width] = a_src[width]; } a_src += a.stride(); b_src += this->stride(); } } template void Buffer::DumpBuffer() const { if (!raw_buffer_) return; for (int height = 0; height < height_ + top_padding_ + bottom_padding_; ++height) { for (int width = 0; width < stride_; ++width) { printf("%4d", raw_buffer_[height + width * stride_]); } printf("\n"); } } template bool Buffer::HasPadding() const { if (!raw_buffer_) return false; return top_padding_ || left_padding_ || right_padding_ || bottom_padding_; } template void Buffer::PrintDifference(const Buffer &a) const { if (!raw_buffer_) return; if (!BufferSizesMatch(a)) return; T *a_src = a.TopLeftPixel(); T *b_src = TopLeftPixel(); printf("This buffer:\n"); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { if (a_src[width] != b_src[width]) { printf("*%3d", b_src[width]); } else { printf("%4d", b_src[width]); } } printf("\n"); a_src += a.stride(); b_src += this->stride(); } a_src = a.TopLeftPixel(); b_src = TopLeftPixel(); printf("Reference buffer:\n"); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { if (a_src[width] != b_src[width]) { printf("*%3d", a_src[width]); } else { printf("%4d", a_src[width]); } } printf("\n"); a_src += a.stride(); b_src += this->stride(); } } template void Buffer::SetPadding(const T padding_value) { if (!raw_buffer_) return; padding_value_ = padding_value; T *src = raw_buffer_; for (int i = 0; i < num_elements_; ++i) { src[i] = padding_value; } } template bool Buffer::CheckValues(const T value) const { if (!raw_buffer_) return false; T *src = TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { if (value != src[width]) { return false; } } src += stride_; } return true; } template bool Buffer::CheckPadding() const { if (!raw_buffer_) return false; if (!HasPadding()) return true; // Top padding. T const *top = raw_buffer_; for (int i = 0; i < stride_ * top_padding_; ++i) { if (padding_value_ != top[i]) { return false; } } // Left padding. T const *left = TopLeftPixel() - left_padding_; for (int height = 0; height < height_; ++height) { for (int width = 0; width < left_padding_; ++width) { if (padding_value_ != left[width]) { return false; } } left += stride_; } // Right padding. T const *right = TopLeftPixel() + width_; for (int height = 0; height < height_; ++height) { for (int width = 0; width < right_padding_; ++width) { if (padding_value_ != right[width]) { return false; } } right += stride_; } // Bottom padding T const *bottom = raw_buffer_ + (top_padding_ + height_) * stride_; for (int i = 0; i < stride_ * bottom_padding_; ++i) { if (padding_value_ != bottom[i]) { return false; } } return true; } template bool Buffer::CheckValues(const Buffer &a) const { if (!raw_buffer_) return false; if (!BufferSizesMatch(a)) return false; T *a_src = a.TopLeftPixel(); T *b_src = this->TopLeftPixel(); for (int height = 0; height < height_; ++height) { for (int width = 0; width < width_; ++width) { if (a_src[width] != b_src[width]) { return false; } } a_src += a.stride(); b_src += this->stride(); } return true; } template bool Buffer::BufferSizesMatch(const Buffer &a) const { if (!raw_buffer_) return false; if (a.width_ != this->width_ || a.height_ != this->height_) { printf( "Reference buffer of size %dx%d does not match this buffer which is " "size %dx%d\n", a.width_, a.height_, this->width_, this->height_); return false; } return true; } } // namespace libvpx_test #endif // VPX_TEST_BUFFER_H_ libvpx-1.8.2/test/byte_alignment_test.cc000066400000000000000000000133551357355204000203670ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/md5_helper.h" #include "test/util.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif namespace { #if CONFIG_WEBM_IO const int kLegacyByteAlignment = 0; const int kLegacyYPlaneByteAlignment = 32; const int kNumPlanesToCheck = 3; const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; const char kVP9Md5File[] = "vp90-2-02-size-lf-1920x1080.webm.md5"; struct ByteAlignmentTestParam { int byte_alignment; vpx_codec_err_t expected_value; bool decode_remaining; }; const ByteAlignmentTestParam kBaTestParams[] = { { kLegacyByteAlignment, VPX_CODEC_OK, true }, { 32, VPX_CODEC_OK, true }, { 64, VPX_CODEC_OK, true }, { 128, VPX_CODEC_OK, true }, { 256, VPX_CODEC_OK, true }, { 512, VPX_CODEC_OK, true }, { 1024, VPX_CODEC_OK, true }, { 1, VPX_CODEC_INVALID_PARAM, false }, { -2, VPX_CODEC_INVALID_PARAM, false }, { 4, VPX_CODEC_INVALID_PARAM, false }, { 16, VPX_CODEC_INVALID_PARAM, false }, { 255, VPX_CODEC_INVALID_PARAM, false }, { 2048, VPX_CODEC_INVALID_PARAM, false }, }; // Class for testing byte alignment of reference buffers. class ByteAlignmentTest : public ::testing::TestWithParam { protected: ByteAlignmentTest() : video_(NULL), decoder_(NULL), md5_file_(NULL) {} virtual void SetUp() { video_ = new libvpx_test::WebMVideoSource(kVP9TestFile); ASSERT_TRUE(video_ != NULL); video_->Init(); video_->Begin(); const vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); decoder_ = new libvpx_test::VP9Decoder(cfg, 0); ASSERT_TRUE(decoder_ != NULL); OpenMd5File(kVP9Md5File); } virtual void TearDown() { if (md5_file_ != NULL) fclose(md5_file_); delete decoder_; delete video_; } void SetByteAlignment(int byte_alignment, vpx_codec_err_t expected_value) { decoder_->Control(VP9_SET_BYTE_ALIGNMENT, byte_alignment, expected_value); } vpx_codec_err_t DecodeOneFrame(int byte_alignment_to_check) { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); CheckDecodedFrames(byte_alignment_to_check); if (res == VPX_CODEC_OK) video_->Next(); return res; } vpx_codec_err_t DecodeRemainingFrames(int byte_alignment_to_check) { for (; video_->cxdata() != NULL; video_->Next()) { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); if (res != VPX_CODEC_OK) return res; CheckDecodedFrames(byte_alignment_to_check); } return VPX_CODEC_OK; } private: // Check if |data| is aligned to |byte_alignment_to_check|. // |byte_alignment_to_check| must be a power of 2. void CheckByteAlignment(const uint8_t *data, int byte_alignment_to_check) { ASSERT_EQ(0u, reinterpret_cast(data) % byte_alignment_to_check); } // Iterate through the planes of the decoded frames and check for // alignment based off |byte_alignment_to_check|. void CheckDecodedFrames(int byte_alignment_to_check) { libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData(); const vpx_image_t *img; // Get decompressed data while ((img = dec_iter.Next()) != NULL) { if (byte_alignment_to_check == kLegacyByteAlignment) { CheckByteAlignment(img->planes[0], kLegacyYPlaneByteAlignment); } else { for (int i = 0; i < kNumPlanesToCheck; ++i) { CheckByteAlignment(img->planes[i], byte_alignment_to_check); } } CheckMd5(*img); } } // TODO(fgalligan): Move the MD5 testing code into another class. void OpenMd5File(const std::string &md5_file_name_) { md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: " << md5_file_name_; } void CheckMd5(const vpx_image_t &img) { ASSERT_TRUE(md5_file_ != NULL); char expected_md5[33]; char junk[128]; // Read correct md5 checksums. const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); ASSERT_NE(EOF, res) << "Read md5 data failed"; expected_md5[32] = '\0'; ::libvpx_test::MD5 md5_res; md5_res.Add(&img); const char *const actual_md5 = md5_res.Get(); // Check md5 match. ASSERT_STREQ(expected_md5, actual_md5) << "MD5 checksums don't match"; } libvpx_test::WebMVideoSource *video_; libvpx_test::VP9Decoder *decoder_; FILE *md5_file_; }; TEST_F(ByteAlignmentTest, SwitchByteAlignment) { const int num_elements = 14; const int byte_alignments[] = { 0, 32, 64, 128, 256, 512, 1024, 0, 1024, 32, 512, 64, 256, 128 }; for (int i = 0; i < num_elements; ++i) { SetByteAlignment(byte_alignments[i], VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame(byte_alignments[i])); } SetByteAlignment(byte_alignments[0], VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(byte_alignments[0])); } TEST_P(ByteAlignmentTest, TestAlignment) { const ByteAlignmentTestParam t = GetParam(); SetByteAlignment(t.byte_alignment, t.expected_value); if (t.decode_remaining) { ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames(t.byte_alignment)); } } INSTANTIATE_TEST_CASE_P(Alignments, ByteAlignmentTest, ::testing::ValuesIn(kBaTestParams)); #endif // CONFIG_WEBM_IO } // namespace libvpx-1.8.2/test/clear_system_state.h000066400000000000000000000014441357355204000200570ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_CLEAR_SYSTEM_STATE_H_ #define VPX_TEST_CLEAR_SYSTEM_STATE_H_ #include "./vpx_config.h" #include "vpx_ports/system_state.h" namespace libvpx_test { // Reset system to a known state. This function should be used for all non-API // test cases. inline void ClearSystemState() { vpx_clear_system_state(); } } // namespace libvpx_test #endif // VPX_TEST_CLEAR_SYSTEM_STATE_H_ libvpx-1.8.2/test/codec_factory.h000066400000000000000000000171311357355204000167710ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_CODEC_FACTORY_H_ #define VPX_TEST_CODEC_FACTORY_H_ #include #include "./vpx_config.h" #include "vpx/vpx_decoder.h" #include "vpx/vpx_encoder.h" #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif #if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif #include "test/decode_test_driver.h" #include "test/encode_test_driver.h" namespace libvpx_test { const int kCodecFactoryParam = 0; class CodecFactory { public: CodecFactory() {} virtual ~CodecFactory() {} virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const = 0; virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flags) const = 0; virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) const = 0; virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg, int usage) const = 0; }; /* Provide CodecTestWithParams classes for a variable number of parameters * to avoid having to include a pointer to the CodecFactory in every test * definition. */ template class CodecTestWithParam : public ::testing::TestWithParam< std::tuple > {}; template class CodecTestWith2Params : public ::testing::TestWithParam< std::tuple > {}; template class CodecTestWith3Params : public ::testing::TestWithParam< std::tuple > {}; template class CodecTestWith4Params : public ::testing::TestWithParam< std::tuple > {}; /* * VP8 Codec Definitions */ #if CONFIG_VP8 class VP8Decoder : public Decoder { public: explicit VP8Decoder(vpx_codec_dec_cfg_t cfg) : Decoder(cfg) {} VP8Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag) : Decoder(cfg, flag) {} protected: virtual vpx_codec_iface_t *CodecInterface() const { #if CONFIG_VP8_DECODER return &vpx_codec_vp8_dx_algo; #else return NULL; #endif } }; class VP8Encoder : public Encoder { public: VP8Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) : Encoder(cfg, deadline, init_flags, stats) {} protected: virtual vpx_codec_iface_t *CodecInterface() const { #if CONFIG_VP8_ENCODER return &vpx_codec_vp8_cx_algo; #else return NULL; #endif } }; class VP8CodecFactory : public CodecFactory { public: VP8CodecFactory() : CodecFactory() {} virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const { return CreateDecoder(cfg, 0); } virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flags) const { #if CONFIG_VP8_DECODER return new VP8Decoder(cfg, flags); #else (void)cfg; (void)flags; return NULL; #endif } virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) const { #if CONFIG_VP8_ENCODER return new VP8Encoder(cfg, deadline, init_flags, stats); #else (void)cfg; (void)deadline; (void)init_flags; (void)stats; return NULL; #endif } virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg, int usage) const { #if CONFIG_VP8_ENCODER return vpx_codec_enc_config_default(&vpx_codec_vp8_cx_algo, cfg, usage); #else (void)cfg; (void)usage; return VPX_CODEC_INCAPABLE; #endif } }; const libvpx_test::VP8CodecFactory kVP8; #define VP8_INSTANTIATE_TEST_CASE(test, ...) \ INSTANTIATE_TEST_CASE_P( \ VP8, test, \ ::testing::Combine( \ ::testing::Values(static_cast( \ &libvpx_test::kVP8)), \ __VA_ARGS__)) #else #define VP8_INSTANTIATE_TEST_CASE(test, ...) #endif // CONFIG_VP8 /* * VP9 Codec Definitions */ #if CONFIG_VP9 class VP9Decoder : public Decoder { public: explicit VP9Decoder(vpx_codec_dec_cfg_t cfg) : Decoder(cfg) {} VP9Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag) : Decoder(cfg, flag) {} protected: virtual vpx_codec_iface_t *CodecInterface() const { #if CONFIG_VP9_DECODER return &vpx_codec_vp9_dx_algo; #else return NULL; #endif } }; class VP9Encoder : public Encoder { public: VP9Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) : Encoder(cfg, deadline, init_flags, stats) {} protected: virtual vpx_codec_iface_t *CodecInterface() const { #if CONFIG_VP9_ENCODER return &vpx_codec_vp9_cx_algo; #else return NULL; #endif } }; class VP9CodecFactory : public CodecFactory { public: VP9CodecFactory() : CodecFactory() {} virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg) const { return CreateDecoder(cfg, 0); } virtual Decoder *CreateDecoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flags) const { #if CONFIG_VP9_DECODER return new VP9Decoder(cfg, flags); #else (void)cfg; (void)flags; return NULL; #endif } virtual Encoder *CreateEncoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) const { #if CONFIG_VP9_ENCODER return new VP9Encoder(cfg, deadline, init_flags, stats); #else (void)cfg; (void)deadline; (void)init_flags; (void)stats; return NULL; #endif } virtual vpx_codec_err_t DefaultEncoderConfig(vpx_codec_enc_cfg_t *cfg, int usage) const { #if CONFIG_VP9_ENCODER return vpx_codec_enc_config_default(&vpx_codec_vp9_cx_algo, cfg, usage); #else (void)cfg; (void)usage; return VPX_CODEC_INCAPABLE; #endif } }; const libvpx_test::VP9CodecFactory kVP9; #define VP9_INSTANTIATE_TEST_CASE(test, ...) \ INSTANTIATE_TEST_CASE_P( \ VP9, test, \ ::testing::Combine( \ ::testing::Values(static_cast( \ &libvpx_test::kVP9)), \ __VA_ARGS__)) #else #define VP9_INSTANTIATE_TEST_CASE(test, ...) #endif // CONFIG_VP9 } // namespace libvpx_test #endif // VPX_TEST_CODEC_FACTORY_H_ libvpx-1.8.2/test/comp_avg_pred_test.cc000066400000000000000000000145451357355204000201750ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/buffer.h" #include "test/register_state_check.h" #include "vpx_ports/vpx_timer.h" namespace { using ::libvpx_test::ACMRandom; using ::libvpx_test::Buffer; typedef void (*AvgPredFunc)(uint8_t *a, const uint8_t *b, int w, int h, const uint8_t *c, int c_stride); uint8_t avg_with_rounding(uint8_t a, uint8_t b) { return (a + b + 1) >> 1; } void reference_pred(const Buffer &pred, const Buffer &ref, int width, int height, Buffer *avg) { ASSERT_TRUE(avg->TopLeftPixel() != NULL); ASSERT_TRUE(pred.TopLeftPixel() != NULL); ASSERT_TRUE(ref.TopLeftPixel() != NULL); for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { avg->TopLeftPixel()[y * avg->stride() + x] = avg_with_rounding(pred.TopLeftPixel()[y * pred.stride() + x], ref.TopLeftPixel()[y * ref.stride() + x]); } } } class AvgPredTest : public ::testing::TestWithParam { public: virtual void SetUp() { avg_pred_func_ = GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); } protected: AvgPredFunc avg_pred_func_; ACMRandom rnd_; }; TEST_P(AvgPredTest, SizeCombinations) { // This is called as part of the sub pixel variance. As such it must be one of // the variance block sizes. for (int width_pow = 2; width_pow <= 6; ++width_pow) { for (int height_pow = width_pow - 1; height_pow <= width_pow + 1; ++height_pow) { // Don't test 4x2 or 64x128 if (height_pow == 1 || height_pow == 7) continue; // The sse2 special-cases when ref width == stride, so make sure to test // it. for (int ref_padding = 0; ref_padding < 2; ref_padding++) { const int width = 1 << width_pow; const int height = 1 << height_pow; // Only the reference buffer may have a stride not equal to width. Buffer ref = Buffer(width, height, ref_padding ? 8 : 0); ASSERT_TRUE(ref.Init()); Buffer pred = Buffer(width, height, 0, 16); ASSERT_TRUE(pred.Init()); Buffer avg_ref = Buffer(width, height, 0, 16); ASSERT_TRUE(avg_ref.Init()); Buffer avg_chk = Buffer(width, height, 0, 16); ASSERT_TRUE(avg_chk.Init()); ref.Set(&rnd_, &ACMRandom::Rand8); pred.Set(&rnd_, &ACMRandom::Rand8); reference_pred(pred, ref, width, height, &avg_ref); ASM_REGISTER_STATE_CHECK( avg_pred_func_(avg_chk.TopLeftPixel(), pred.TopLeftPixel(), width, height, ref.TopLeftPixel(), ref.stride())); EXPECT_TRUE(avg_chk.CheckValues(avg_ref)); if (HasFailure()) { printf("Width: %d Height: %d\n", width, height); avg_chk.PrintDifference(avg_ref); return; } } } } } TEST_P(AvgPredTest, CompareReferenceRandom) { const int width = 64; const int height = 32; Buffer ref = Buffer(width, height, 8); ASSERT_TRUE(ref.Init()); Buffer pred = Buffer(width, height, 0, 16); ASSERT_TRUE(pred.Init()); Buffer avg_ref = Buffer(width, height, 0, 16); ASSERT_TRUE(avg_ref.Init()); Buffer avg_chk = Buffer(width, height, 0, 16); ASSERT_TRUE(avg_chk.Init()); for (int i = 0; i < 500; ++i) { ref.Set(&rnd_, &ACMRandom::Rand8); pred.Set(&rnd_, &ACMRandom::Rand8); reference_pred(pred, ref, width, height, &avg_ref); ASM_REGISTER_STATE_CHECK(avg_pred_func_(avg_chk.TopLeftPixel(), pred.TopLeftPixel(), width, height, ref.TopLeftPixel(), ref.stride())); EXPECT_TRUE(avg_chk.CheckValues(avg_ref)); if (HasFailure()) { printf("Width: %d Height: %d\n", width, height); avg_chk.PrintDifference(avg_ref); return; } } } TEST_P(AvgPredTest, DISABLED_Speed) { for (int width_pow = 2; width_pow <= 6; ++width_pow) { for (int height_pow = width_pow - 1; height_pow <= width_pow + 1; ++height_pow) { // Don't test 4x2 or 64x128 if (height_pow == 1 || height_pow == 7) continue; for (int ref_padding = 0; ref_padding < 2; ref_padding++) { const int width = 1 << width_pow; const int height = 1 << height_pow; Buffer ref = Buffer(width, height, ref_padding ? 8 : 0); ASSERT_TRUE(ref.Init()); Buffer pred = Buffer(width, height, 0, 16); ASSERT_TRUE(pred.Init()); Buffer avg = Buffer(width, height, 0, 16); ASSERT_TRUE(avg.Init()); ref.Set(&rnd_, &ACMRandom::Rand8); pred.Set(&rnd_, &ACMRandom::Rand8); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int i = 0; i < 10000000 / (width * height); ++i) { avg_pred_func_(avg.TopLeftPixel(), pred.TopLeftPixel(), width, height, ref.TopLeftPixel(), ref.stride()); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("Average Test (ref_padding: %d) %dx%d time: %5d us\n", ref_padding, width, height, elapsed_time); } } } } INSTANTIATE_TEST_CASE_P(C, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_c)); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_sse2)); #endif // HAVE_SSE2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_neon)); #endif // HAVE_NEON #if HAVE_VSX INSTANTIATE_TEST_CASE_P(VSX, AvgPredTest, ::testing::Values(&vpx_comp_avg_pred_vsx)); #endif // HAVE_VSX } // namespace libvpx-1.8.2/test/config_test.cc000066400000000000000000000033201357355204000166220ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/video_source.h" namespace { class ConfigTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: ConfigTest() : EncoderTest(GET_PARAM(0)), frame_count_in_(0), frame_count_out_(0), frame_count_max_(0) {} virtual ~ConfigTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); } virtual void BeginPassHook(unsigned int /*pass*/) { frame_count_in_ = 0; frame_count_out_ = 0; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource * /*video*/) { ++frame_count_in_; abort_ |= (frame_count_in_ >= frame_count_max_); } virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) { ++frame_count_out_; } unsigned int frame_count_in_; unsigned int frame_count_out_; unsigned int frame_count_max_; }; TEST_P(ConfigTest, LagIsDisabled) { frame_count_max_ = 2; cfg_.g_lag_in_frames = 15; libvpx_test::DummyVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_EQ(frame_count_in_, frame_count_out_); } VP8_INSTANTIATE_TEST_CASE(ConfigTest, ONE_PASS_TEST_MODES); } // namespace libvpx-1.8.2/test/consistency_test.cc000066400000000000000000000157031357355204000177260ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #if CONFIG_VP9_ENCODER #include "./vp9_rtcd.h" #endif #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx_dsp/ssim.h" #include "vpx_mem/vpx_mem.h" extern "C" double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, int width, int height, Ssimv *sv2, Metrics *m, int do_inconsistency); using libvpx_test::ACMRandom; namespace { class ConsistencyTestBase : public ::testing::Test { public: ConsistencyTestBase(int width, int height) : width_(width), height_(height) {} static void SetUpTestCase() { source_data_[0] = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); reference_data_[0] = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); source_data_[1] = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); reference_data_[1] = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); ssim_array_ = new Ssimv[kDataBufferSize / 16]; } static void ClearSsim() { memset(ssim_array_, 0, kDataBufferSize / 16); } static void TearDownTestCase() { vpx_free(source_data_[0]); source_data_[0] = NULL; vpx_free(reference_data_[0]); reference_data_[0] = NULL; vpx_free(source_data_[1]); source_data_[1] = NULL; vpx_free(reference_data_[1]); reference_data_[1] = NULL; delete[] ssim_array_; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: // Handle frames up to 640x480 static const int kDataAlignment = 16; static const int kDataBufferSize = 640 * 480; virtual void SetUp() { source_stride_ = (width_ + 31) & ~31; reference_stride_ = width_ * 2; rnd_.Reset(ACMRandom::DeterministicSeed()); } void FillRandom(uint8_t *data, int stride, int width, int height) { for (int h = 0; h < height; ++h) { for (int w = 0; w < width; ++w) { data[h * stride + w] = rnd_.Rand8(); } } } void FillRandom(uint8_t *data, int stride) { FillRandom(data, stride, width_, height_); } void Copy(uint8_t *reference, uint8_t *source) { memcpy(reference, source, kDataBufferSize); } void Blur(uint8_t *data, int stride, int taps) { int sum = 0; int half_taps = taps / 2; for (int h = 0; h < height_; ++h) { for (int w = 0; w < taps; ++w) { sum += data[w + h * stride]; } for (int w = taps; w < width_; ++w) { sum += data[w + h * stride] - data[w - taps + h * stride]; data[w - half_taps + h * stride] = (sum + half_taps) / taps; } } for (int w = 0; w < width_; ++w) { for (int h = 0; h < taps; ++h) { sum += data[h + w * stride]; } for (int h = taps; h < height_; ++h) { sum += data[w + h * stride] - data[(h - taps) * stride + w]; data[(h - half_taps) * stride + w] = (sum + half_taps) / taps; } } } int width_, height_; static uint8_t *source_data_[2]; int source_stride_; static uint8_t *reference_data_[2]; int reference_stride_; static Ssimv *ssim_array_; Metrics metrics_; ACMRandom rnd_; }; #if CONFIG_VP9_ENCODER typedef std::tuple ConsistencyParam; class ConsistencyVP9Test : public ConsistencyTestBase, public ::testing::WithParamInterface { public: ConsistencyVP9Test() : ConsistencyTestBase(GET_PARAM(0), GET_PARAM(1)) {} protected: double CheckConsistency(int frame) { EXPECT_LT(frame, 2) << "Frame to check has to be less than 2."; return vpx_get_ssim_metrics(source_data_[frame], source_stride_, reference_data_[frame], reference_stride_, width_, height_, ssim_array_, &metrics_, 1); } }; #endif // CONFIG_VP9_ENCODER uint8_t *ConsistencyTestBase::source_data_[2] = { NULL, NULL }; uint8_t *ConsistencyTestBase::reference_data_[2] = { NULL, NULL }; Ssimv *ConsistencyTestBase::ssim_array_ = NULL; #if CONFIG_VP9_ENCODER TEST_P(ConsistencyVP9Test, ConsistencyIsZero) { FillRandom(source_data_[0], source_stride_); Copy(source_data_[1], source_data_[0]); Copy(reference_data_[0], source_data_[0]); Blur(reference_data_[0], reference_stride_, 3); Copy(reference_data_[1], source_data_[0]); Blur(reference_data_[1], reference_stride_, 3); double inconsistency = CheckConsistency(1); inconsistency = CheckConsistency(0); EXPECT_EQ(inconsistency, 0.0) << "Should have 0 inconsistency if they are exactly the same."; // If sources are not consistent reference frames inconsistency should // be less than if the source is consistent. FillRandom(source_data_[0], source_stride_); FillRandom(source_data_[1], source_stride_); FillRandom(reference_data_[0], reference_stride_); FillRandom(reference_data_[1], reference_stride_); CheckConsistency(0); inconsistency = CheckConsistency(1); Copy(source_data_[1], source_data_[0]); CheckConsistency(0); double inconsistency2 = CheckConsistency(1); EXPECT_LT(inconsistency, inconsistency2) << "Should have less inconsistency if source itself is inconsistent."; // Less of a blur should be less inconsistent than more blur coming off a // a frame with no blur. ClearSsim(); FillRandom(source_data_[0], source_stride_); Copy(source_data_[1], source_data_[0]); Copy(reference_data_[0], source_data_[0]); Copy(reference_data_[1], source_data_[0]); Blur(reference_data_[1], reference_stride_, 4); CheckConsistency(0); inconsistency = CheckConsistency(1); ClearSsim(); Copy(reference_data_[1], source_data_[0]); Blur(reference_data_[1], reference_stride_, 8); CheckConsistency(0); inconsistency2 = CheckConsistency(1); EXPECT_LT(inconsistency, inconsistency2) << "Stronger Blur should produce more inconsistency."; } #endif // CONFIG_VP9_ENCODER using std::make_tuple; //------------------------------------------------------------------------------ // C functions #if CONFIG_VP9_ENCODER const ConsistencyParam c_vp9_tests[] = { make_tuple(320, 240), make_tuple(318, 242), make_tuple(318, 238) }; INSTANTIATE_TEST_CASE_P(C, ConsistencyVP9Test, ::testing::ValuesIn(c_vp9_tests)); #endif } // namespace libvpx-1.8.2/test/convolve_test.cc000066400000000000000000001541731357355204000172250ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_filter.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" namespace { static const unsigned int kMaxDimension = 64; typedef void (*ConvolveFunc)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h); typedef void (*WrapperFilterBlock2d8Func)( const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height, int use_highbd); struct ConvolveFunctions { ConvolveFunctions(ConvolveFunc copy, ConvolveFunc avg, ConvolveFunc h8, ConvolveFunc h8_avg, ConvolveFunc v8, ConvolveFunc v8_avg, ConvolveFunc hv8, ConvolveFunc hv8_avg, ConvolveFunc sh8, ConvolveFunc sh8_avg, ConvolveFunc sv8, ConvolveFunc sv8_avg, ConvolveFunc shv8, ConvolveFunc shv8_avg, int bd) : use_highbd_(bd) { copy_[0] = copy; copy_[1] = avg; h8_[0] = h8; h8_[1] = h8_avg; v8_[0] = v8; v8_[1] = v8_avg; hv8_[0] = hv8; hv8_[1] = hv8_avg; sh8_[0] = sh8; sh8_[1] = sh8_avg; sv8_[0] = sv8; sv8_[1] = sv8_avg; shv8_[0] = shv8; shv8_[1] = shv8_avg; } ConvolveFunc copy_[2]; ConvolveFunc h8_[2]; ConvolveFunc v8_[2]; ConvolveFunc hv8_[2]; ConvolveFunc sh8_[2]; // scaled horiz ConvolveFunc sv8_[2]; // scaled vert ConvolveFunc shv8_[2]; // scaled horiz/vert int use_highbd_; // 0 if high bitdepth not used, else the actual bit depth. }; typedef std::tuple ConvolveParam; #define ALL_SIZES(convolve_fn) \ make_tuple(4, 4, &convolve_fn), make_tuple(8, 4, &convolve_fn), \ make_tuple(4, 8, &convolve_fn), make_tuple(8, 8, &convolve_fn), \ make_tuple(16, 8, &convolve_fn), make_tuple(8, 16, &convolve_fn), \ make_tuple(16, 16, &convolve_fn), make_tuple(32, 16, &convolve_fn), \ make_tuple(16, 32, &convolve_fn), make_tuple(32, 32, &convolve_fn), \ make_tuple(64, 32, &convolve_fn), make_tuple(32, 64, &convolve_fn), \ make_tuple(64, 64, &convolve_fn) // Reference 8-tap subpixel filter, slightly modified to fit into this test. #define VP9_FILTER_WEIGHT 128 #define VP9_FILTER_SHIFT 7 uint8_t clip_pixel(int x) { return x < 0 ? 0 : x > 255 ? 255 : x; } void filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height) { // Between passes, we use an intermediate buffer whose height is extended to // have enough horizontally filtered values as input for the vertical pass. // This buffer is allocated to be big enough for the largest block type we // support. const int kInterp_Extend = 4; const unsigned int intermediate_height = (kInterp_Extend - 1) + output_height + kInterp_Extend; unsigned int i, j; // Size of intermediate_buffer is max_intermediate_height * filter_max_width, // where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height // + kInterp_Extend // = 3 + 16 + 4 // = 23 // and filter_max_width = 16 // uint8_t intermediate_buffer[71 * kMaxDimension]; vp9_zero(intermediate_buffer); const int intermediate_next_stride = 1 - static_cast(intermediate_height * output_width); // Horizontal pass (src -> transposed intermediate). uint8_t *output_ptr = intermediate_buffer; const int src_next_row_stride = src_stride - output_width; src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); for (i = 0; i < intermediate_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *output_ptr = clip_pixel(temp >> VP9_FILTER_SHIFT); ++src_ptr; output_ptr += intermediate_height; } src_ptr += src_next_row_stride; output_ptr += intermediate_next_stride; } // Vertical pass (transposed intermediate -> dst). src_ptr = intermediate_buffer; const int dst_next_row_stride = dst_stride - output_width; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *dst_ptr++ = clip_pixel(temp >> VP9_FILTER_SHIFT); src_ptr += intermediate_height; } src_ptr += intermediate_next_stride; dst_ptr += dst_next_row_stride; } } void block2d_average_c(uint8_t *src, unsigned int src_stride, uint8_t *output_ptr, unsigned int output_stride, unsigned int output_width, unsigned int output_height) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; } output_ptr += output_stride; } } void filter_average_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height) { uint8_t tmp[kMaxDimension * kMaxDimension]; assert(output_width <= kMaxDimension); assert(output_height <= kMaxDimension); filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, output_width, output_height); block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height); } #if CONFIG_VP9_HIGHBITDEPTH void highbd_filter_block2d_8_c(const uint16_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height, int bd) { // Between passes, we use an intermediate buffer whose height is extended to // have enough horizontally filtered values as input for the vertical pass. // This buffer is allocated to be big enough for the largest block type we // support. const int kInterp_Extend = 4; const unsigned int intermediate_height = (kInterp_Extend - 1) + output_height + kInterp_Extend; /* Size of intermediate_buffer is max_intermediate_height * filter_max_width, * where max_intermediate_height = (kInterp_Extend - 1) + filter_max_height * + kInterp_Extend * = 3 + 16 + 4 * = 23 * and filter_max_width = 16 */ uint16_t intermediate_buffer[71 * kMaxDimension]; const int intermediate_next_stride = 1 - static_cast(intermediate_height * output_width); vp9_zero(intermediate_buffer); // Horizontal pass (src -> transposed intermediate). { uint16_t *output_ptr = intermediate_buffer; const int src_next_row_stride = src_stride - output_width; unsigned int i, j; src_ptr -= (kInterp_Extend - 1) * src_stride + (kInterp_Extend - 1); for (i = 0; i < intermediate_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... const int temp = (src_ptr[0] * hfilter[0]) + (src_ptr[1] * hfilter[1]) + (src_ptr[2] * hfilter[2]) + (src_ptr[3] * hfilter[3]) + (src_ptr[4] * hfilter[4]) + (src_ptr[5] * hfilter[5]) + (src_ptr[6] * hfilter[6]) + (src_ptr[7] * hfilter[7]) + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *output_ptr = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); ++src_ptr; output_ptr += intermediate_height; } src_ptr += src_next_row_stride; output_ptr += intermediate_next_stride; } } // Vertical pass (transposed intermediate -> dst). { uint16_t *src_ptr = intermediate_buffer; const int dst_next_row_stride = dst_stride - output_width; unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { // Apply filter... const int temp = (src_ptr[0] * vfilter[0]) + (src_ptr[1] * vfilter[1]) + (src_ptr[2] * vfilter[2]) + (src_ptr[3] * vfilter[3]) + (src_ptr[4] * vfilter[4]) + (src_ptr[5] * vfilter[5]) + (src_ptr[6] * vfilter[6]) + (src_ptr[7] * vfilter[7]) + (VP9_FILTER_WEIGHT >> 1); // Rounding // Normalize back to 0-255... *dst_ptr++ = clip_pixel_highbd(temp >> VP9_FILTER_SHIFT, bd); src_ptr += intermediate_height; } src_ptr += intermediate_next_stride; dst_ptr += dst_next_row_stride; } } } void highbd_block2d_average_c(uint16_t *src, unsigned int src_stride, uint16_t *output_ptr, unsigned int output_stride, unsigned int output_width, unsigned int output_height) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { output_ptr[j] = (output_ptr[j] + src[i * src_stride + j] + 1) >> 1; } output_ptr += output_stride; } } void highbd_filter_average_block2d_8_c( const uint16_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint16_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height, int bd) { uint16_t tmp[kMaxDimension * kMaxDimension]; assert(output_width <= kMaxDimension); assert(output_height <= kMaxDimension); highbd_filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, tmp, 64, output_width, output_height, bd); highbd_block2d_average_c(tmp, 64, dst_ptr, dst_stride, output_width, output_height); } #endif // CONFIG_VP9_HIGHBITDEPTH void wrapper_filter_average_block2d_8_c( const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height, int use_highbd) { #if CONFIG_VP9_HIGHBITDEPTH if (use_highbd == 0) { filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, output_width, output_height); } else { highbd_filter_average_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter, vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height, use_highbd); } #else ASSERT_EQ(0, use_highbd); filter_average_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, output_width, output_height); #endif } void wrapper_filter_block2d_8_c(const uint8_t *src_ptr, const unsigned int src_stride, const int16_t *hfilter, const int16_t *vfilter, uint8_t *dst_ptr, unsigned int dst_stride, unsigned int output_width, unsigned int output_height, int use_highbd) { #if CONFIG_VP9_HIGHBITDEPTH if (use_highbd == 0) { filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, output_width, output_height); } else { highbd_filter_block2d_8_c(CAST_TO_SHORTPTR(src_ptr), src_stride, hfilter, vfilter, CAST_TO_SHORTPTR(dst_ptr), dst_stride, output_width, output_height, use_highbd); } #else ASSERT_EQ(0, use_highbd); filter_block2d_8_c(src_ptr, src_stride, hfilter, vfilter, dst_ptr, dst_stride, output_width, output_height); #endif } class ConvolveTest : public ::testing::TestWithParam { public: static void SetUpTestCase() { // Force input_ to be unaligned, output to be 16 byte aligned. input_ = reinterpret_cast( vpx_memalign(kDataAlignment, kInputBufferSize + 1)) + 1; output_ = reinterpret_cast( vpx_memalign(kDataAlignment, kOutputBufferSize)); output_ref_ = reinterpret_cast( vpx_memalign(kDataAlignment, kOutputBufferSize)); #if CONFIG_VP9_HIGHBITDEPTH input16_ = reinterpret_cast(vpx_memalign( kDataAlignment, (kInputBufferSize + 1) * sizeof(uint16_t))) + 1; output16_ = reinterpret_cast( vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); output16_ref_ = reinterpret_cast( vpx_memalign(kDataAlignment, (kOutputBufferSize) * sizeof(uint16_t))); #endif } virtual void TearDown() { libvpx_test::ClearSystemState(); } static void TearDownTestCase() { vpx_free(input_ - 1); input_ = NULL; vpx_free(output_); output_ = NULL; vpx_free(output_ref_); output_ref_ = NULL; #if CONFIG_VP9_HIGHBITDEPTH vpx_free(input16_ - 1); input16_ = NULL; vpx_free(output16_); output16_ = NULL; vpx_free(output16_ref_); output16_ref_ = NULL; #endif } protected: static const int kDataAlignment = 16; static const int kOuterBlockSize = 256; static const int kInputStride = kOuterBlockSize; static const int kOutputStride = kOuterBlockSize; static const int kInputBufferSize = kOuterBlockSize * kOuterBlockSize; static const int kOutputBufferSize = kOuterBlockSize * kOuterBlockSize; int Width() const { return GET_PARAM(0); } int Height() const { return GET_PARAM(1); } int BorderLeft() const { const int center = (kOuterBlockSize - Width()) / 2; return (center + (kDataAlignment - 1)) & ~(kDataAlignment - 1); } int BorderTop() const { return (kOuterBlockSize - Height()) / 2; } bool IsIndexInBorder(int i) { return (i < BorderTop() * kOuterBlockSize || i >= (BorderTop() + Height()) * kOuterBlockSize || i % kOuterBlockSize < BorderLeft() || i % kOuterBlockSize >= (BorderLeft() + Width())); } virtual void SetUp() { UUT_ = GET_PARAM(2); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ != 0) { mask_ = (1 << UUT_->use_highbd_) - 1; } else { mask_ = 255; } #endif /* Set up guard blocks for an inner block centered in the outer block */ for (int i = 0; i < kOutputBufferSize; ++i) { if (IsIndexInBorder(i)) { output_[i] = 255; #if CONFIG_VP9_HIGHBITDEPTH output16_[i] = mask_; #endif } else { output_[i] = 0; #if CONFIG_VP9_HIGHBITDEPTH output16_[i] = 0; #endif } } ::libvpx_test::ACMRandom prng; for (int i = 0; i < kInputBufferSize; ++i) { if (i & 1) { input_[i] = 255; #if CONFIG_VP9_HIGHBITDEPTH input16_[i] = mask_; #endif } else { input_[i] = prng.Rand8Extremes(); #if CONFIG_VP9_HIGHBITDEPTH input16_[i] = prng.Rand16() & mask_; #endif } } } void SetConstantInput(int value) { memset(input_, value, kInputBufferSize); #if CONFIG_VP9_HIGHBITDEPTH vpx_memset16(input16_, value, kInputBufferSize); #endif } void CopyOutputToRef() { memcpy(output_ref_, output_, kOutputBufferSize); #if CONFIG_VP9_HIGHBITDEPTH memcpy(output16_ref_, output16_, kOutputBufferSize * sizeof(output16_ref_[0])); #endif } void CheckGuardBlocks() { for (int i = 0; i < kOutputBufferSize; ++i) { if (IsIndexInBorder(i)) { EXPECT_EQ(255, output_[i]); } } } uint8_t *input() const { const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { return input_ + offset; } else { return CAST_TO_BYTEPTR(input16_ + offset); } #else return input_ + offset; #endif } uint8_t *output() const { const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { return output_ + offset; } else { return CAST_TO_BYTEPTR(output16_ + offset); } #else return output_ + offset; #endif } uint8_t *output_ref() const { const int offset = BorderTop() * kOuterBlockSize + BorderLeft(); #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { return output_ref_ + offset; } else { return CAST_TO_BYTEPTR(output16_ref_ + offset); } #else return output_ref_ + offset; #endif } uint16_t lookup(uint8_t *list, int index) const { #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { return list[index]; } else { return CAST_TO_SHORTPTR(list)[index]; } #else return list[index]; #endif } void assign_val(uint8_t *list, int index, uint16_t val) const { #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0) { list[index] = (uint8_t)val; } else { CAST_TO_SHORTPTR(list)[index] = val; } #else list[index] = (uint8_t)val; #endif } const ConvolveFunctions *UUT_; static uint8_t *input_; static uint8_t *output_; static uint8_t *output_ref_; #if CONFIG_VP9_HIGHBITDEPTH static uint16_t *input16_; static uint16_t *output16_; static uint16_t *output16_ref_; int mask_; #endif }; uint8_t *ConvolveTest::input_ = NULL; uint8_t *ConvolveTest::output_ = NULL; uint8_t *ConvolveTest::output_ref_ = NULL; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *ConvolveTest::input16_ = NULL; uint16_t *ConvolveTest::output16_ = NULL; uint16_t *ConvolveTest::output16_ref_ = NULL; #endif TEST_P(ConvolveTest, GuardBlocks) { CheckGuardBlocks(); } TEST_P(ConvolveTest, DISABLED_Copy_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve_copy_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_Avg_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve_avg_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_Scale_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->shv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve_scale_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_8Tap_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->hv8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve8_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_8Tap_Horiz_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->h8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve8_horiz_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_8Tap_Vert_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->v8_[0](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve8_vert_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_4Tap_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->hv8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve4_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_4Tap_Horiz_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->h8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve4_horiz_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_4Tap_Vert_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const fourtap = vp9_filter_kernels[FOURTAP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->v8_[0](in, kInputStride, out, kOutputStride, fourtap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve4_vert_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, DISABLED_8Tap_Avg_Speed) { const uint8_t *const in = input(); uint8_t *const out = output(); const InterpKernel *const eighttap = vp9_filter_kernels[EIGHTTAP_SHARP]; const int kNumTests = 5000000; const int width = Width(); const int height = Height(); vpx_usec_timer timer; SetConstantInput(127); vpx_usec_timer_start(&timer); for (int n = 0; n < kNumTests; ++n) { UUT_->hv8_[1](in, kInputStride, out, kOutputStride, eighttap, 8, 16, 8, 16, width, height); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("convolve8_avg_%dx%d_%d: %d us\n", width, height, UUT_->use_highbd_ ? UUT_->use_highbd_ : 8, elapsed_time); } TEST_P(ConvolveTest, Copy) { uint8_t *const in = input(); uint8_t *const out = output(); ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(out, y * kOutputStride + x), lookup(in, y * kInputStride + x)) << "(" << x << "," << y << ")"; } } TEST_P(ConvolveTest, Avg) { uint8_t *const in = input(); uint8_t *const out = output(); uint8_t *const out_ref = output_ref(); CopyOutputToRef(); ASM_REGISTER_STATE_CHECK(UUT_->copy_[1](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(out, y * kOutputStride + x), ROUND_POWER_OF_TWO(lookup(in, y * kInputStride + x) + lookup(out_ref, y * kOutputStride + x), 1)) << "(" << x << "," << y << ")"; } } TEST_P(ConvolveTest, CopyHoriz) { uint8_t *const in = input(); uint8_t *const out = output(); ASM_REGISTER_STATE_CHECK(UUT_->sh8_[0](in, kInputStride, out, kOutputStride, vp9_filter_kernels[0], 0, 16, 0, 16, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(out, y * kOutputStride + x), lookup(in, y * kInputStride + x)) << "(" << x << "," << y << ")"; } } TEST_P(ConvolveTest, CopyVert) { uint8_t *const in = input(); uint8_t *const out = output(); ASM_REGISTER_STATE_CHECK(UUT_->sv8_[0](in, kInputStride, out, kOutputStride, vp9_filter_kernels[0], 0, 16, 0, 16, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(out, y * kOutputStride + x), lookup(in, y * kInputStride + x)) << "(" << x << "," << y << ")"; } } TEST_P(ConvolveTest, Copy2D) { uint8_t *const in = input(); uint8_t *const out = output(); ASM_REGISTER_STATE_CHECK(UUT_->shv8_[0](in, kInputStride, out, kOutputStride, vp9_filter_kernels[0], 0, 16, 0, 16, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(out, y * kOutputStride + x), lookup(in, y * kInputStride + x)) << "(" << x << "," << y << ")"; } } const int kNumFilterBanks = 5; const int kNumFilters = 16; TEST(ConvolveTest, FiltersWontSaturateWhenAddedPairwise) { for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { const InterpKernel *filters = vp9_filter_kernels[static_cast(filter_bank)]; for (int i = 0; i < kNumFilters; i++) { const int p0 = filters[i][0] + filters[i][1]; const int p1 = filters[i][2] + filters[i][3]; const int p2 = filters[i][4] + filters[i][5]; const int p3 = filters[i][6] + filters[i][7]; EXPECT_LE(p0, 128); EXPECT_LE(p1, 128); EXPECT_LE(p2, 128); EXPECT_LE(p3, 128); EXPECT_LE(p0 + p3, 128); EXPECT_LE(p0 + p3 + p1, 128); EXPECT_LE(p0 + p3 + p1 + p2, 128); EXPECT_EQ(p0 + p1 + p2 + p3, 128); } } } const WrapperFilterBlock2d8Func wrapper_filter_block2d_8[2] = { wrapper_filter_block2d_8_c, wrapper_filter_average_block2d_8_c }; TEST_P(ConvolveTest, MatchesReferenceSubpixelFilter) { for (int i = 0; i < 2; ++i) { uint8_t *const in = input(); uint8_t *const out = output(); #if CONFIG_VP9_HIGHBITDEPTH uint8_t ref8[kOutputStride * kMaxDimension]; uint16_t ref16[kOutputStride * kMaxDimension]; uint8_t *ref; if (UUT_->use_highbd_ == 0) { ref = ref8; } else { ref = CAST_TO_BYTEPTR(ref16); } #else uint8_t ref[kOutputStride * kMaxDimension]; #endif // Populate ref and out with some random data ::libvpx_test::ACMRandom prng; for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) { uint16_t r; #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { r = prng.Rand8Extremes(); } else { r = prng.Rand16() & mask_; } #else r = prng.Rand8Extremes(); #endif assign_val(out, y * kOutputStride + x, r); assign_val(ref, y * kOutputStride + x, r); } } for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { const InterpKernel *filters = vp9_filter_kernels[static_cast(filter_bank)]; for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { wrapper_filter_block2d_8[i](in, kInputStride, filters[filter_x], filters[filter_y], ref, kOutputStride, Width(), Height(), UUT_->use_highbd_); if (filter_x && filter_y) ASM_REGISTER_STATE_CHECK( UUT_->hv8_[i](in, kInputStride, out, kOutputStride, filters, filter_x, 16, filter_y, 16, Width(), Height())); else if (filter_y) ASM_REGISTER_STATE_CHECK( UUT_->v8_[i](in, kInputStride, out, kOutputStride, filters, 0, 16, filter_y, 16, Width(), Height())); else if (filter_x) ASM_REGISTER_STATE_CHECK( UUT_->h8_[i](in, kInputStride, out, kOutputStride, filters, filter_x, 16, 0, 16, Width(), Height())); else ASM_REGISTER_STATE_CHECK(UUT_->copy_[i](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(ref, y * kOutputStride + x), lookup(out, y * kOutputStride + x)) << "mismatch at (" << x << "," << y << "), " << "filters (" << filter_bank << "," << filter_x << "," << filter_y << ")"; } } } } } } TEST_P(ConvolveTest, FilterExtremes) { uint8_t *const in = input(); uint8_t *const out = output(); #if CONFIG_VP9_HIGHBITDEPTH uint8_t ref8[kOutputStride * kMaxDimension]; uint16_t ref16[kOutputStride * kMaxDimension]; uint8_t *ref; if (UUT_->use_highbd_ == 0) { ref = ref8; } else { ref = CAST_TO_BYTEPTR(ref16); } #else uint8_t ref[kOutputStride * kMaxDimension]; #endif // Populate ref and out with some random data ::libvpx_test::ACMRandom prng; for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) { uint16_t r; #if CONFIG_VP9_HIGHBITDEPTH if (UUT_->use_highbd_ == 0 || UUT_->use_highbd_ == 8) { r = prng.Rand8Extremes(); } else { r = prng.Rand16() & mask_; } #else r = prng.Rand8Extremes(); #endif assign_val(out, y * kOutputStride + x, r); assign_val(ref, y * kOutputStride + x, r); } } for (int axis = 0; axis < 2; axis++) { int seed_val = 0; while (seed_val < 256) { for (int y = 0; y < 8; ++y) { for (int x = 0; x < 8; ++x) { #if CONFIG_VP9_HIGHBITDEPTH assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, ((seed_val >> (axis ? y : x)) & 1) * mask_); #else assign_val(in, y * kOutputStride + x - SUBPEL_TAPS / 2 + 1, ((seed_val >> (axis ? y : x)) & 1) * 255); #endif if (axis) seed_val++; } if (axis) { seed_val -= 8; } else { seed_val++; } } if (axis) seed_val += 8; for (int filter_bank = 0; filter_bank < kNumFilterBanks; ++filter_bank) { const InterpKernel *filters = vp9_filter_kernels[static_cast(filter_bank)]; for (int filter_x = 0; filter_x < kNumFilters; ++filter_x) { for (int filter_y = 0; filter_y < kNumFilters; ++filter_y) { wrapper_filter_block2d_8_c(in, kInputStride, filters[filter_x], filters[filter_y], ref, kOutputStride, Width(), Height(), UUT_->use_highbd_); if (filter_x && filter_y) ASM_REGISTER_STATE_CHECK( UUT_->hv8_[0](in, kInputStride, out, kOutputStride, filters, filter_x, 16, filter_y, 16, Width(), Height())); else if (filter_y) ASM_REGISTER_STATE_CHECK( UUT_->v8_[0](in, kInputStride, out, kOutputStride, filters, 0, 16, filter_y, 16, Width(), Height())); else if (filter_x) ASM_REGISTER_STATE_CHECK( UUT_->h8_[0](in, kInputStride, out, kOutputStride, filters, filter_x, 16, 0, 16, Width(), Height())); else ASM_REGISTER_STATE_CHECK(UUT_->copy_[0](in, kInputStride, out, kOutputStride, NULL, 0, 0, 0, 0, Width(), Height())); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) ASSERT_EQ(lookup(ref, y * kOutputStride + x), lookup(out, y * kOutputStride + x)) << "mismatch at (" << x << "," << y << "), " << "filters (" << filter_bank << "," << filter_x << "," << filter_y << ")"; } } } } } } } /* This test exercises that enough rows and columns are filtered with every possible initial fractional positions and scaling steps. */ #if !CONFIG_VP9_HIGHBITDEPTH static const ConvolveFunc scaled_2d_c_funcs[2] = { vpx_scaled_2d_c, vpx_scaled_avg_2d_c }; TEST_P(ConvolveTest, CheckScalingFiltering) { uint8_t *const in = input(); uint8_t *const out = output(); uint8_t ref[kOutputStride * kMaxDimension]; ::libvpx_test::ACMRandom prng; for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) { const uint16_t r = prng.Rand8Extremes(); assign_val(in, y * kInputStride + x, r); } } for (int i = 0; i < 2; ++i) { for (INTERP_FILTER filter_type = 0; filter_type < 4; ++filter_type) { const InterpKernel *const eighttap = vp9_filter_kernels[filter_type]; for (int frac = 0; frac < 16; ++frac) { for (int step = 1; step <= 32; ++step) { /* Test the horizontal and vertical filters in combination. */ scaled_2d_c_funcs[i](in, kInputStride, ref, kOutputStride, eighttap, frac, step, frac, step, Width(), Height()); ASM_REGISTER_STATE_CHECK( UUT_->shv8_[i](in, kInputStride, out, kOutputStride, eighttap, frac, step, frac, step, Width(), Height())); CheckGuardBlocks(); for (int y = 0; y < Height(); ++y) { for (int x = 0; x < Width(); ++x) { ASSERT_EQ(lookup(ref, y * kOutputStride + x), lookup(out, y * kOutputStride + x)) << "x == " << x << ", y == " << y << ", frac == " << frac << ", step == " << step; } } } } } } } #endif using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH #define WRAP(func, bd) \ void wrap_##func##_##bd( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ vpx_highbd_##func(reinterpret_cast(src), src_stride, \ reinterpret_cast(dst), dst_stride, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ } #if HAVE_SSE2 && VPX_ARCH_X86_64 WRAP(convolve_copy_sse2, 8) WRAP(convolve_avg_sse2, 8) WRAP(convolve_copy_sse2, 10) WRAP(convolve_avg_sse2, 10) WRAP(convolve_copy_sse2, 12) WRAP(convolve_avg_sse2, 12) WRAP(convolve8_horiz_sse2, 8) WRAP(convolve8_avg_horiz_sse2, 8) WRAP(convolve8_vert_sse2, 8) WRAP(convolve8_avg_vert_sse2, 8) WRAP(convolve8_sse2, 8) WRAP(convolve8_avg_sse2, 8) WRAP(convolve8_horiz_sse2, 10) WRAP(convolve8_avg_horiz_sse2, 10) WRAP(convolve8_vert_sse2, 10) WRAP(convolve8_avg_vert_sse2, 10) WRAP(convolve8_sse2, 10) WRAP(convolve8_avg_sse2, 10) WRAP(convolve8_horiz_sse2, 12) WRAP(convolve8_avg_horiz_sse2, 12) WRAP(convolve8_vert_sse2, 12) WRAP(convolve8_avg_vert_sse2, 12) WRAP(convolve8_sse2, 12) WRAP(convolve8_avg_sse2, 12) #endif // HAVE_SSE2 && VPX_ARCH_X86_64 #if HAVE_AVX2 WRAP(convolve_copy_avx2, 8) WRAP(convolve_avg_avx2, 8) WRAP(convolve8_horiz_avx2, 8) WRAP(convolve8_avg_horiz_avx2, 8) WRAP(convolve8_vert_avx2, 8) WRAP(convolve8_avg_vert_avx2, 8) WRAP(convolve8_avx2, 8) WRAP(convolve8_avg_avx2, 8) WRAP(convolve_copy_avx2, 10) WRAP(convolve_avg_avx2, 10) WRAP(convolve8_avx2, 10) WRAP(convolve8_horiz_avx2, 10) WRAP(convolve8_vert_avx2, 10) WRAP(convolve8_avg_avx2, 10) WRAP(convolve8_avg_horiz_avx2, 10) WRAP(convolve8_avg_vert_avx2, 10) WRAP(convolve_copy_avx2, 12) WRAP(convolve_avg_avx2, 12) WRAP(convolve8_avx2, 12) WRAP(convolve8_horiz_avx2, 12) WRAP(convolve8_vert_avx2, 12) WRAP(convolve8_avg_avx2, 12) WRAP(convolve8_avg_horiz_avx2, 12) WRAP(convolve8_avg_vert_avx2, 12) #endif // HAVE_AVX2 #if HAVE_NEON WRAP(convolve_copy_neon, 8) WRAP(convolve_avg_neon, 8) WRAP(convolve_copy_neon, 10) WRAP(convolve_avg_neon, 10) WRAP(convolve_copy_neon, 12) WRAP(convolve_avg_neon, 12) WRAP(convolve8_horiz_neon, 8) WRAP(convolve8_avg_horiz_neon, 8) WRAP(convolve8_vert_neon, 8) WRAP(convolve8_avg_vert_neon, 8) WRAP(convolve8_neon, 8) WRAP(convolve8_avg_neon, 8) WRAP(convolve8_horiz_neon, 10) WRAP(convolve8_avg_horiz_neon, 10) WRAP(convolve8_vert_neon, 10) WRAP(convolve8_avg_vert_neon, 10) WRAP(convolve8_neon, 10) WRAP(convolve8_avg_neon, 10) WRAP(convolve8_horiz_neon, 12) WRAP(convolve8_avg_horiz_neon, 12) WRAP(convolve8_vert_neon, 12) WRAP(convolve8_avg_vert_neon, 12) WRAP(convolve8_neon, 12) WRAP(convolve8_avg_neon, 12) #endif // HAVE_NEON WRAP(convolve_copy_c, 8) WRAP(convolve_avg_c, 8) WRAP(convolve8_horiz_c, 8) WRAP(convolve8_avg_horiz_c, 8) WRAP(convolve8_vert_c, 8) WRAP(convolve8_avg_vert_c, 8) WRAP(convolve8_c, 8) WRAP(convolve8_avg_c, 8) WRAP(convolve_copy_c, 10) WRAP(convolve_avg_c, 10) WRAP(convolve8_horiz_c, 10) WRAP(convolve8_avg_horiz_c, 10) WRAP(convolve8_vert_c, 10) WRAP(convolve8_avg_vert_c, 10) WRAP(convolve8_c, 10) WRAP(convolve8_avg_c, 10) WRAP(convolve_copy_c, 12) WRAP(convolve_avg_c, 12) WRAP(convolve8_horiz_c, 12) WRAP(convolve8_avg_horiz_c, 12) WRAP(convolve8_vert_c, 12) WRAP(convolve8_avg_vert_c, 12) WRAP(convolve8_c, 12) WRAP(convolve8_avg_c, 12) #undef WRAP const ConvolveFunctions convolve8_c( wrap_convolve_copy_c_8, wrap_convolve_avg_c_8, wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); const ConvolveFunctions convolve10_c( wrap_convolve_copy_c_10, wrap_convolve_avg_c_10, wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); const ConvolveFunctions convolve12_c( wrap_convolve_copy_c_12, wrap_convolve_avg_c_12, wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c), ALL_SIZES(convolve10_c), ALL_SIZES(convolve12_c) }; #else const ConvolveFunctions convolve8_c( vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_c, vpx_convolve8_avg_horiz_c, vpx_convolve8_vert_c, vpx_convolve8_avg_vert_c, vpx_convolve8_c, vpx_convolve8_avg_c, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve_c[] = { ALL_SIZES(convolve8_c) }; #endif INSTANTIATE_TEST_CASE_P(C, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_c)); #if HAVE_SSE2 && VPX_ARCH_X86_64 #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_sse2( wrap_convolve_copy_sse2_8, wrap_convolve_avg_sse2_8, wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, wrap_convolve8_horiz_sse2_8, wrap_convolve8_avg_horiz_sse2_8, wrap_convolve8_vert_sse2_8, wrap_convolve8_avg_vert_sse2_8, wrap_convolve8_sse2_8, wrap_convolve8_avg_sse2_8, 8); const ConvolveFunctions convolve10_sse2( wrap_convolve_copy_sse2_10, wrap_convolve_avg_sse2_10, wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, wrap_convolve8_horiz_sse2_10, wrap_convolve8_avg_horiz_sse2_10, wrap_convolve8_vert_sse2_10, wrap_convolve8_avg_vert_sse2_10, wrap_convolve8_sse2_10, wrap_convolve8_avg_sse2_10, 10); const ConvolveFunctions convolve12_sse2( wrap_convolve_copy_sse2_12, wrap_convolve_avg_sse2_12, wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, wrap_convolve8_horiz_sse2_12, wrap_convolve8_avg_horiz_sse2_12, wrap_convolve8_vert_sse2_12, wrap_convolve8_avg_vert_sse2_12, wrap_convolve8_sse2_12, wrap_convolve8_avg_sse2_12, 12); const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2), ALL_SIZES(convolve10_sse2), ALL_SIZES(convolve12_sse2) }; #else const ConvolveFunctions convolve8_sse2( vpx_convolve_copy_sse2, vpx_convolve_avg_sse2, vpx_convolve8_horiz_sse2, vpx_convolve8_avg_horiz_sse2, vpx_convolve8_vert_sse2, vpx_convolve8_avg_vert_sse2, vpx_convolve8_sse2, vpx_convolve8_avg_sse2, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve_sse2[] = { ALL_SIZES(convolve8_sse2) }; #endif // CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(SSE2, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_sse2)); #endif #if HAVE_SSSE3 const ConvolveFunctions convolve8_ssse3( vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_ssse3, vpx_convolve8_avg_horiz_ssse3, vpx_convolve8_vert_ssse3, vpx_convolve8_avg_vert_ssse3, vpx_convolve8_ssse3, vpx_convolve8_avg_ssse3, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_ssse3, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve8_ssse3[] = { ALL_SIZES(convolve8_ssse3) }; INSTANTIATE_TEST_CASE_P(SSSE3, ConvolveTest, ::testing::ValuesIn(kArrayConvolve8_ssse3)); #endif #if HAVE_AVX2 #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_avx2( wrap_convolve_copy_avx2_8, wrap_convolve_avg_avx2_8, wrap_convolve8_horiz_avx2_8, wrap_convolve8_avg_horiz_avx2_8, wrap_convolve8_vert_avx2_8, wrap_convolve8_avg_vert_avx2_8, wrap_convolve8_avx2_8, wrap_convolve8_avg_avx2_8, wrap_convolve8_horiz_c_8, wrap_convolve8_avg_horiz_c_8, wrap_convolve8_vert_c_8, wrap_convolve8_avg_vert_c_8, wrap_convolve8_c_8, wrap_convolve8_avg_c_8, 8); const ConvolveFunctions convolve10_avx2( wrap_convolve_copy_avx2_10, wrap_convolve_avg_avx2_10, wrap_convolve8_horiz_avx2_10, wrap_convolve8_avg_horiz_avx2_10, wrap_convolve8_vert_avx2_10, wrap_convolve8_avg_vert_avx2_10, wrap_convolve8_avx2_10, wrap_convolve8_avg_avx2_10, wrap_convolve8_horiz_c_10, wrap_convolve8_avg_horiz_c_10, wrap_convolve8_vert_c_10, wrap_convolve8_avg_vert_c_10, wrap_convolve8_c_10, wrap_convolve8_avg_c_10, 10); const ConvolveFunctions convolve12_avx2( wrap_convolve_copy_avx2_12, wrap_convolve_avg_avx2_12, wrap_convolve8_horiz_avx2_12, wrap_convolve8_avg_horiz_avx2_12, wrap_convolve8_vert_avx2_12, wrap_convolve8_avg_vert_avx2_12, wrap_convolve8_avx2_12, wrap_convolve8_avg_avx2_12, wrap_convolve8_horiz_c_12, wrap_convolve8_avg_horiz_c_12, wrap_convolve8_vert_c_12, wrap_convolve8_avg_vert_c_12, wrap_convolve8_c_12, wrap_convolve8_avg_c_12, 12); const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2), ALL_SIZES(convolve10_avx2), ALL_SIZES(convolve12_avx2) }; INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::ValuesIn(kArrayConvolve8_avx2)); #else // !CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_avx2( vpx_convolve_copy_c, vpx_convolve_avg_c, vpx_convolve8_horiz_avx2, vpx_convolve8_avg_horiz_avx2, vpx_convolve8_vert_avx2, vpx_convolve8_avg_vert_avx2, vpx_convolve8_avx2, vpx_convolve8_avg_avx2, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve8_avx2[] = { ALL_SIZES(convolve8_avx2) }; INSTANTIATE_TEST_CASE_P(AVX2, ConvolveTest, ::testing::ValuesIn(kArrayConvolve8_avx2)); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_AVX2 #if HAVE_NEON #if CONFIG_VP9_HIGHBITDEPTH const ConvolveFunctions convolve8_neon( wrap_convolve_copy_neon_8, wrap_convolve_avg_neon_8, wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, wrap_convolve8_horiz_neon_8, wrap_convolve8_avg_horiz_neon_8, wrap_convolve8_vert_neon_8, wrap_convolve8_avg_vert_neon_8, wrap_convolve8_neon_8, wrap_convolve8_avg_neon_8, 8); const ConvolveFunctions convolve10_neon( wrap_convolve_copy_neon_10, wrap_convolve_avg_neon_10, wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, wrap_convolve8_horiz_neon_10, wrap_convolve8_avg_horiz_neon_10, wrap_convolve8_vert_neon_10, wrap_convolve8_avg_vert_neon_10, wrap_convolve8_neon_10, wrap_convolve8_avg_neon_10, 10); const ConvolveFunctions convolve12_neon( wrap_convolve_copy_neon_12, wrap_convolve_avg_neon_12, wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, wrap_convolve8_horiz_neon_12, wrap_convolve8_avg_horiz_neon_12, wrap_convolve8_vert_neon_12, wrap_convolve8_avg_vert_neon_12, wrap_convolve8_neon_12, wrap_convolve8_avg_neon_12, 12); const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon), ALL_SIZES(convolve10_neon), ALL_SIZES(convolve12_neon) }; #else const ConvolveFunctions convolve8_neon( vpx_convolve_copy_neon, vpx_convolve_avg_neon, vpx_convolve8_horiz_neon, vpx_convolve8_avg_horiz_neon, vpx_convolve8_vert_neon, vpx_convolve8_avg_vert_neon, vpx_convolve8_neon, vpx_convolve8_avg_neon, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_neon, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve_neon[] = { ALL_SIZES(convolve8_neon) }; #endif // CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(NEON, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_neon)); #endif // HAVE_NEON #if HAVE_DSPR2 const ConvolveFunctions convolve8_dspr2( vpx_convolve_copy_dspr2, vpx_convolve_avg_dspr2, vpx_convolve8_horiz_dspr2, vpx_convolve8_avg_horiz_dspr2, vpx_convolve8_vert_dspr2, vpx_convolve8_avg_vert_dspr2, vpx_convolve8_dspr2, vpx_convolve8_avg_dspr2, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve8_dspr2[] = { ALL_SIZES(convolve8_dspr2) }; INSTANTIATE_TEST_CASE_P(DSPR2, ConvolveTest, ::testing::ValuesIn(kArrayConvolve8_dspr2)); #endif // HAVE_DSPR2 #if HAVE_MSA const ConvolveFunctions convolve8_msa( vpx_convolve_copy_msa, vpx_convolve_avg_msa, vpx_convolve8_horiz_msa, vpx_convolve8_avg_horiz_msa, vpx_convolve8_vert_msa, vpx_convolve8_avg_vert_msa, vpx_convolve8_msa, vpx_convolve8_avg_msa, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_msa, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve8_msa[] = { ALL_SIZES(convolve8_msa) }; INSTANTIATE_TEST_CASE_P(MSA, ConvolveTest, ::testing::ValuesIn(kArrayConvolve8_msa)); #endif // HAVE_MSA #if HAVE_VSX const ConvolveFunctions convolve8_vsx( vpx_convolve_copy_vsx, vpx_convolve_avg_vsx, vpx_convolve8_horiz_vsx, vpx_convolve8_avg_horiz_vsx, vpx_convolve8_vert_vsx, vpx_convolve8_avg_vert_vsx, vpx_convolve8_vsx, vpx_convolve8_avg_vsx, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve_vsx[] = { ALL_SIZES(convolve8_vsx) }; INSTANTIATE_TEST_CASE_P(VSX, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_vsx)); #endif // HAVE_VSX #if HAVE_MMI const ConvolveFunctions convolve8_mmi( vpx_convolve_copy_c, vpx_convolve_avg_mmi, vpx_convolve8_horiz_mmi, vpx_convolve8_avg_horiz_mmi, vpx_convolve8_vert_mmi, vpx_convolve8_avg_vert_mmi, vpx_convolve8_mmi, vpx_convolve8_avg_mmi, vpx_scaled_horiz_c, vpx_scaled_avg_horiz_c, vpx_scaled_vert_c, vpx_scaled_avg_vert_c, vpx_scaled_2d_c, vpx_scaled_avg_2d_c, 0); const ConvolveParam kArrayConvolve_mmi[] = { ALL_SIZES(convolve8_mmi) }; INSTANTIATE_TEST_CASE_P(MMI, ConvolveTest, ::testing::ValuesIn(kArrayConvolve_mmi)); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/cpu_speed_test.cc000066400000000000000000000124271357355204000173340ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" namespace { const int kMaxPSNR = 100; class CpuSpeedTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: CpuSpeedTest() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)), min_psnr_(kMaxPSNR), tune_content_(VP9E_CONTENT_DEFAULT) {} virtual ~CpuSpeedTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { cfg_.g_lag_in_frames = 25; cfg_.rc_end_usage = VPX_VBR; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; } } virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPSNR; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.psnr.psnr[0] < min_psnr_) min_psnr_ = pkt->data.psnr.psnr[0]; } ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; double min_psnr_; int tune_content_; }; TEST_P(CpuSpeedTest, TestQ0) { // Validate that this non multiple of 64 wide clip encodes and decodes // without a mismatch when passing in a very low max q. This pushes // the encoder to producing lots of big partitions which will likely // extend into the border and test the border condition. cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 20); init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_GE(min_psnr_, kMaxPSNR); } TEST_P(CpuSpeedTest, TestScreencastQ0) { ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25); cfg_.g_timebase = video.timebase(); cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 0; cfg_.rc_min_quantizer = 0; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_GE(min_psnr_, kMaxPSNR); } TEST_P(CpuSpeedTest, TestTuneScreen) { ::libvpx_test::Y4mVideoSource video("screendata.y4m", 0, 25); cfg_.g_timebase = video.timebase(); cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_minsection_pct = 2000; cfg_.rc_target_bitrate = 2000; cfg_.rc_max_quantizer = 63; cfg_.rc_min_quantizer = 0; tune_content_ = VP9E_CONTENT_SCREEN; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(CpuSpeedTest, TestEncodeHighBitrate) { // Validate that this non multiple of 64 wide clip encodes and decodes // without a mismatch when passing in a very low max q. This pushes // the encoder to producing lots of big partitions which will likely // extend into the border and test the border condition. cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 12000; cfg_.rc_max_quantizer = 10; cfg_.rc_min_quantizer = 0; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 20); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(CpuSpeedTest, TestLowBitrate) { // Validate that this clip encodes and decodes without a mismatch // when passing in a very high min q. This pushes the encoder to producing // lots of small partitions which might will test the other condition. cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 200; cfg_.rc_min_quantizer = 40; ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 20); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP9_INSTANTIATE_TEST_CASE(CpuSpeedTest, ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(0, 10)); } // namespace libvpx-1.8.2/test/cq_test.cc000066400000000000000000000101301357355204000157550ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { // CQ level range: [kCQLevelMin, kCQLevelMax). const int kCQLevelMin = 4; const int kCQLevelMax = 63; const int kCQLevelStep = 8; const unsigned int kCQTargetBitrate = 2000; class CQTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { public: // maps the cqlevel to the bitrate produced. typedef std::map BitrateMap; static void SetUpTestCase() { bitrates_.clear(); } static void TearDownTestCase() { ASSERT_TRUE(!HasFailure()) << "skipping bitrate validation due to earlier failure."; uint32_t prev_actual_bitrate = kCQTargetBitrate; for (BitrateMap::const_iterator iter = bitrates_.begin(); iter != bitrates_.end(); ++iter) { const uint32_t cq_actual_bitrate = iter->second; EXPECT_LE(cq_actual_bitrate, prev_actual_bitrate) << "cq_level: " << iter->first << ", bitrate should decrease with increase in CQ level."; prev_actual_bitrate = cq_actual_bitrate; } } protected: CQTest() : EncoderTest(GET_PARAM(0)), cq_level_(GET_PARAM(1)) { init_flags_ = VPX_CODEC_USE_PSNR; } virtual ~CQTest() {} virtual void SetUp() { InitializeConfig(); SetMode(libvpx_test::kTwoPassGood); } virtual void BeginPassHook(unsigned int /*pass*/) { file_size_ = 0; psnr_ = 0.0; n_frames_ = 0; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (video->frame() == 0) { if (cfg_.rc_end_usage == VPX_CQ) { encoder->Control(VP8E_SET_CQ_LEVEL, cq_level_); } encoder->Control(VP8E_SET_CPUUSED, 3); } } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { psnr_ += pow(10.0, pkt->data.psnr.psnr[0] / 10.0); n_frames_++; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { file_size_ += pkt->data.frame.sz; } double GetLinearPSNROverBitrate() const { double avg_psnr = log10(psnr_ / n_frames_) * 10.0; return pow(10.0, avg_psnr / 10.0) / file_size_; } int cq_level() const { return cq_level_; } size_t file_size() const { return file_size_; } int n_frames() const { return n_frames_; } static BitrateMap bitrates_; private: int cq_level_; size_t file_size_; double psnr_; int n_frames_; }; CQTest::BitrateMap CQTest::bitrates_; TEST_P(CQTest, LinearPSNRIsHigherForCQLevel) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = kCQTargetBitrate; cfg_.g_lag_in_frames = 25; cfg_.rc_end_usage = VPX_CQ; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double cq_psnr_lin = GetLinearPSNROverBitrate(); const unsigned int cq_actual_bitrate = static_cast(file_size()) * 8 * 30 / (n_frames() * 1000); EXPECT_LE(cq_actual_bitrate, kCQTargetBitrate); bitrates_[cq_level()] = cq_actual_bitrate; // try targeting the approximate same bitrate with VBR mode cfg_.rc_end_usage = VPX_VBR; cfg_.rc_target_bitrate = cq_actual_bitrate; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double vbr_psnr_lin = GetLinearPSNROverBitrate(); EXPECT_GE(cq_psnr_lin, vbr_psnr_lin); } VP8_INSTANTIATE_TEST_CASE(CQTest, ::testing::Range(kCQLevelMin, kCQLevelMax, kCQLevelStep)); } // namespace libvpx-1.8.2/test/cx_set_ref.sh000077500000000000000000000034771357355204000165040ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2016 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx cx_set_ref example. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to cx_set_ref_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. cx_set_ref_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs cx_set_ref and updates the reference frame before encoding frame 90. # $1 is the codec name. vpx_set_ref() { local codec="$1" local encoder="${LIBVPX_BIN_PATH}/${codec}cx_set_ref${VPX_TEST_EXE_SUFFIX}" local output_file="${VPX_TEST_OUTPUT_DIR}/${codec}cx_set_ref_${codec}.ivf" local ref_frame_num=90 if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ "${ref_frame_num}" ${devnull} [ -e "${output_file}" ] || return 1 } cx_set_ref_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then vpx_set_ref vp8 || return 1 fi } cx_set_ref_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then vpx_set_ref vp9 || return 1 fi } cx_set_ref_tests="cx_set_ref_vp8 cx_set_ref_vp9" run_tests cx_set_ref_verify_environment "${cx_set_ref_tests}" libvpx-1.8.2/test/dct16x16_test.cc000066400000000000000000000724051357355204000166470ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_scan.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { const int kNumCoeffs = 256; const double C1 = 0.995184726672197; const double C2 = 0.98078528040323; const double C3 = 0.956940335732209; const double C4 = 0.923879532511287; const double C5 = 0.881921264348355; const double C6 = 0.831469612302545; const double C7 = 0.773010453362737; const double C8 = 0.707106781186548; const double C9 = 0.634393284163646; const double C10 = 0.555570233019602; const double C11 = 0.471396736825998; const double C12 = 0.38268343236509; const double C13 = 0.290284677254462; const double C14 = 0.195090322016128; const double C15 = 0.098017140329561; void butterfly_16x16_dct_1d(double input[16], double output[16]) { double step[16]; double intermediate[16]; double temp1, temp2; // step 1 step[0] = input[0] + input[15]; step[1] = input[1] + input[14]; step[2] = input[2] + input[13]; step[3] = input[3] + input[12]; step[4] = input[4] + input[11]; step[5] = input[5] + input[10]; step[6] = input[6] + input[9]; step[7] = input[7] + input[8]; step[8] = input[7] - input[8]; step[9] = input[6] - input[9]; step[10] = input[5] - input[10]; step[11] = input[4] - input[11]; step[12] = input[3] - input[12]; step[13] = input[2] - input[13]; step[14] = input[1] - input[14]; step[15] = input[0] - input[15]; // step 2 output[0] = step[0] + step[7]; output[1] = step[1] + step[6]; output[2] = step[2] + step[5]; output[3] = step[3] + step[4]; output[4] = step[3] - step[4]; output[5] = step[2] - step[5]; output[6] = step[1] - step[6]; output[7] = step[0] - step[7]; temp1 = step[8] * C7; temp2 = step[15] * C9; output[8] = temp1 + temp2; temp1 = step[9] * C11; temp2 = step[14] * C5; output[9] = temp1 - temp2; temp1 = step[10] * C3; temp2 = step[13] * C13; output[10] = temp1 + temp2; temp1 = step[11] * C15; temp2 = step[12] * C1; output[11] = temp1 - temp2; temp1 = step[11] * C1; temp2 = step[12] * C15; output[12] = temp2 + temp1; temp1 = step[10] * C13; temp2 = step[13] * C3; output[13] = temp2 - temp1; temp1 = step[9] * C5; temp2 = step[14] * C11; output[14] = temp2 + temp1; temp1 = step[8] * C9; temp2 = step[15] * C7; output[15] = temp2 - temp1; // step 3 step[0] = output[0] + output[3]; step[1] = output[1] + output[2]; step[2] = output[1] - output[2]; step[3] = output[0] - output[3]; temp1 = output[4] * C14; temp2 = output[7] * C2; step[4] = temp1 + temp2; temp1 = output[5] * C10; temp2 = output[6] * C6; step[5] = temp1 + temp2; temp1 = output[5] * C6; temp2 = output[6] * C10; step[6] = temp2 - temp1; temp1 = output[4] * C2; temp2 = output[7] * C14; step[7] = temp2 - temp1; step[8] = output[8] + output[11]; step[9] = output[9] + output[10]; step[10] = output[9] - output[10]; step[11] = output[8] - output[11]; step[12] = output[12] + output[15]; step[13] = output[13] + output[14]; step[14] = output[13] - output[14]; step[15] = output[12] - output[15]; // step 4 output[0] = (step[0] + step[1]); output[8] = (step[0] - step[1]); temp1 = step[2] * C12; temp2 = step[3] * C4; temp1 = temp1 + temp2; output[4] = 2 * (temp1 * C8); temp1 = step[2] * C4; temp2 = step[3] * C12; temp1 = temp2 - temp1; output[12] = 2 * (temp1 * C8); output[2] = 2 * ((step[4] + step[5]) * C8); output[14] = 2 * ((step[7] - step[6]) * C8); temp1 = step[4] - step[5]; temp2 = step[6] + step[7]; output[6] = (temp1 + temp2); output[10] = (temp1 - temp2); intermediate[8] = step[8] + step[14]; intermediate[9] = step[9] + step[15]; temp1 = intermediate[8] * C12; temp2 = intermediate[9] * C4; temp1 = temp1 - temp2; output[3] = 2 * (temp1 * C8); temp1 = intermediate[8] * C4; temp2 = intermediate[9] * C12; temp1 = temp2 + temp1; output[13] = 2 * (temp1 * C8); output[9] = 2 * ((step[10] + step[11]) * C8); intermediate[11] = step[10] - step[11]; intermediate[12] = step[12] + step[13]; intermediate[13] = step[12] - step[13]; intermediate[14] = step[8] - step[14]; intermediate[15] = step[9] - step[15]; output[15] = (intermediate[11] + intermediate[12]); output[1] = -(intermediate[11] - intermediate[12]); output[7] = 2 * (intermediate[13] * C8); temp1 = intermediate[14] * C12; temp2 = intermediate[15] * C4; temp1 = temp1 - temp2; output[11] = -2 * (temp1 * C8); temp1 = intermediate[14] * C4; temp2 = intermediate[15] * C12; temp1 = temp2 + temp1; output[5] = 2 * (temp1 * C8); } void reference_16x16_dct_2d(int16_t input[256], double output[256]) { // First transform columns for (int i = 0; i < 16; ++i) { double temp_in[16], temp_out[16]; for (int j = 0; j < 16; ++j) temp_in[j] = input[j * 16 + i]; butterfly_16x16_dct_1d(temp_in, temp_out); for (int j = 0; j < 16; ++j) output[j * 16 + i] = temp_out[j]; } // Then transform rows for (int i = 0; i < 16; ++i) { double temp_in[16], temp_out[16]; for (int j = 0; j < 16; ++j) temp_in[j] = output[j + i * 16]; butterfly_16x16_dct_1d(temp_in, temp_out); // Scale by some magic number for (int j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j] / 2; } } typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, int tx_type); typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); typedef std::tuple Dct16x16Param; typedef std::tuple Ht16x16Param; typedef std::tuple Idct16x16Param; void fdct16x16_ref(const int16_t *in, tran_low_t *out, int stride, int /*tx_type*/) { vpx_fdct16x16_c(in, out, stride); } void idct16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, int /*tx_type*/) { vpx_idct16x16_256_add_c(in, dest, stride); } void fht16x16_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { vp9_fht16x16_c(in, out, stride, tx_type); } void iht16x16_ref(const tran_low_t *in, uint8_t *dest, int stride, int tx_type) { vp9_iht16x16_256_add_c(in, dest, stride, tx_type); } #if CONFIG_VP9_HIGHBITDEPTH void idct16x16_10(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct16x16_12(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); } void idct16x16_10_ref(const tran_low_t *in, uint8_t *out, int stride, int /*tx_type*/) { idct16x16_10(in, out, stride); } void idct16x16_12_ref(const tran_low_t *in, uint8_t *out, int stride, int /*tx_type*/) { idct16x16_12(in, out, stride); } void iht16x16_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10); } void iht16x16_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht16x16_256_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12); } #if HAVE_SSE2 void idct16x16_10_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct16x16_10_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); } void idct16x16_256_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct16x16_256_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_256_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); } void idct16x16_10_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct16x16_10_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct16x16_10_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); } #endif // HAVE_SSE2 #endif // CONFIG_VP9_HIGHBITDEPTH class Trans16x16TestBase { public: virtual ~Trans16x16TestBase() {} protected: virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0; virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0; void RunAccuracyCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); uint32_t max_error = 0; int64_t total_error = 0; const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); #endif // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); test_input_block[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand16() & mask_; dst16[j] = rnd.Rand16() & mask_; test_input_block[j] = src16[j] - dst16[j]; #endif } } ASM_REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_temp_block, pitch_)); if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int32_t diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; if (max_error < error) max_error = error; total_error += error; } } EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) << "Error: 16x16 FHT/IHT has an individual round trip error > 1"; EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) << "Error: 16x16 FHT/IHT has average round trip error > 1 per block"; } void RunCoeffCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); } fwd_txfm_ref(input_block, output_ref_block, pitch_, tx_type_); ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, output_block, pitch_)); // The minimum quant value is 4. for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(output_block[j], output_ref_block[j]); } } void RunMemCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; } if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; } else if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; } fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); ASM_REGISTER_STATE_CHECK( RunFwdTxfm(input_extreme_block, output_block, pitch_)); // The minimum quant value is 4. for (int j = 0; j < kNumCoeffs; ++j) { EXPECT_EQ(output_block[j], output_ref_block[j]); EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) << "Error: 16x16 FDCT has coefficient larger than 4*DCT_MAX_VALUE"; } } } void RunQuantCheck(int dc_thred, int ac_thred) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 100000; DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); #endif for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { input_extreme_block[j] = rnd.Rand8() % 2 ? mask_ : -mask_; } if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; } if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; } fwd_txfm_ref(input_extreme_block, output_ref_block, pitch_, tx_type_); // clear reconstructed pixel buffers memset(dst, 0, kNumCoeffs * sizeof(uint8_t)); memset(ref, 0, kNumCoeffs * sizeof(uint8_t)); #if CONFIG_VP9_HIGHBITDEPTH memset(dst16, 0, kNumCoeffs * sizeof(uint16_t)); memset(ref16, 0, kNumCoeffs * sizeof(uint16_t)); #endif // quantization with maximum allowed step sizes output_ref_block[0] = (output_ref_block[0] / dc_thred) * dc_thred; for (int j = 1; j < kNumCoeffs; ++j) { output_ref_block[j] = (output_ref_block[j] / ac_thred) * ac_thred; } if (bit_depth_ == VPX_BITS_8) { inv_txfm_ref(output_ref_block, ref, pitch_, tx_type_); ASM_REGISTER_STATE_CHECK(RunInvTxfm(output_ref_block, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { inv_txfm_ref(output_ref_block, CAST_TO_BYTEPTR(ref16), pitch_, tx_type_); ASM_REGISTER_STATE_CHECK( RunInvTxfm(output_ref_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } if (bit_depth_ == VPX_BITS_8) { for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref[j], dst[j]); #if CONFIG_VP9_HIGHBITDEPTH } else { for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(ref16[j], dst16[j]); #endif } } } void RunInvAccuracyCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); #endif // CONFIG_VP9_HIGHBITDEPTH for (int i = 0; i < count_test_block; ++i) { double out_r[kNumCoeffs]; // Initialize a test block with input range [-255, 255]. for (int j = 0; j < kNumCoeffs; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); in[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand16() & mask_; dst16[j] = rnd.Rand16() & mask_; in[j] = src16[j] - dst16[j]; #endif // CONFIG_VP9_HIGHBITDEPTH } } reference_16x16_dct_2d(in, out_r); for (int j = 0; j < kNumCoeffs; ++j) { coeff[j] = static_cast(round(out_r[j])); } if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, 16)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), 16)); #endif // CONFIG_VP9_HIGHBITDEPTH } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const uint32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const uint32_t diff = dst[j] - src[j]; #endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; EXPECT_GE(1u, error) << "Error: 16x16 IDCT has error " << error << " at index " << j; } } } void CompareInvReference(IdctFunc ref_txfm, int thresh) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 10000; const int eob = 10; const int16_t *scan = vp9_default_scan_orders[TX_16X16].scan; DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); #endif // CONFIG_VP9_HIGHBITDEPTH for (int i = 0; i < count_test_block; ++i) { for (int j = 0; j < kNumCoeffs; ++j) { if (j < eob) { // Random values less than the threshold, either positive or negative coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2)); } else { coeff[scan[j]] = 0; } if (bit_depth_ == VPX_BITS_8) { dst[j] = 0; ref[j] = 0; #if CONFIG_VP9_HIGHBITDEPTH } else { dst16[j] = 0; ref16[j] = 0; #endif // CONFIG_VP9_HIGHBITDEPTH } } if (bit_depth_ == VPX_BITS_8) { ref_txfm(coeff, ref, pitch_); ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); } else { #if CONFIG_VP9_HIGHBITDEPTH ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_); ASM_REGISTER_STATE_CHECK( RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif // CONFIG_VP9_HIGHBITDEPTH } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const uint32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; #else const uint32_t diff = dst[j] - ref[j]; #endif // CONFIG_VP9_HIGHBITDEPTH const uint32_t error = diff * diff; EXPECT_EQ(0u, error) << "Error: 16x16 IDCT Comparison has error " << error << " at index " << j; } } } int pitch_; int tx_type_; vpx_bit_depth_t bit_depth_; int mask_; FhtFunc fwd_txfm_ref; IhtFunc inv_txfm_ref; }; class Trans16x16DCT : public Trans16x16TestBase, public ::testing::TestWithParam { public: virtual ~Trans16x16DCT() {} virtual void SetUp() { fwd_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); tx_type_ = GET_PARAM(2); bit_depth_ = GET_PARAM(3); pitch_ = 16; fwd_txfm_ref = fdct16x16_ref; inv_txfm_ref = idct16x16_ref; mask_ = (1 << bit_depth_) - 1; #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth_) { case VPX_BITS_10: inv_txfm_ref = idct16x16_10_ref; break; case VPX_BITS_12: inv_txfm_ref = idct16x16_12_ref; break; default: inv_txfm_ref = idct16x16_ref; break; } #else inv_txfm_ref = idct16x16_ref; #endif } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { fwd_txfm_(in, out, stride); } void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } FdctFunc fwd_txfm_; IdctFunc inv_txfm_; }; TEST_P(Trans16x16DCT, AccuracyCheck) { RunAccuracyCheck(); } TEST_P(Trans16x16DCT, CoeffCheck) { RunCoeffCheck(); } TEST_P(Trans16x16DCT, MemCheck) { RunMemCheck(); } TEST_P(Trans16x16DCT, QuantCheck) { // Use maximally allowed quantization step sizes for DC and AC // coefficients respectively. RunQuantCheck(1336, 1828); } TEST_P(Trans16x16DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } class Trans16x16HT : public Trans16x16TestBase, public ::testing::TestWithParam { public: virtual ~Trans16x16HT() {} virtual void SetUp() { fwd_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); tx_type_ = GET_PARAM(2); bit_depth_ = GET_PARAM(3); pitch_ = 16; fwd_txfm_ref = fht16x16_ref; inv_txfm_ref = iht16x16_ref; mask_ = (1 << bit_depth_) - 1; #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth_) { case VPX_BITS_10: inv_txfm_ref = iht16x16_10; break; case VPX_BITS_12: inv_txfm_ref = iht16x16_12; break; default: inv_txfm_ref = iht16x16_ref; break; } #else inv_txfm_ref = iht16x16_ref; #endif } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { fwd_txfm_(in, out, stride, tx_type_); } void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride, tx_type_); } FhtFunc fwd_txfm_; IhtFunc inv_txfm_; }; TEST_P(Trans16x16HT, AccuracyCheck) { RunAccuracyCheck(); } TEST_P(Trans16x16HT, CoeffCheck) { RunCoeffCheck(); } TEST_P(Trans16x16HT, MemCheck) { RunMemCheck(); } TEST_P(Trans16x16HT, QuantCheck) { // The encoder skips any non-DC intra prediction modes, // when the quantization step size goes beyond 988. RunQuantCheck(429, 729); } class InvTrans16x16DCT : public Trans16x16TestBase, public ::testing::TestWithParam { public: virtual ~InvTrans16x16DCT() {} virtual void SetUp() { ref_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); thresh_ = GET_PARAM(2); bit_depth_ = GET_PARAM(3); pitch_ = 16; mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunFwdTxfm(int16_t * /*in*/, tran_low_t * /*out*/, int /*stride*/) {} void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } IdctFunc ref_txfm_; IdctFunc inv_txfm_; int thresh_; }; TEST_P(InvTrans16x16DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, Trans16x16DCT, ::testing::Values( make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_12, 0, VPX_BITS_12), make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P(C, Trans16x16DCT, ::testing::Values(make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, Trans16x16HT, ::testing::Values( make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 0, VPX_BITS_10), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 1, VPX_BITS_10), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 2, VPX_BITS_10), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_10, 3, VPX_BITS_10), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 0, VPX_BITS_12), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 1, VPX_BITS_12), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 2, VPX_BITS_12), make_tuple(&vp9_highbd_fht16x16_c, &iht16x16_12, 3, VPX_BITS_12), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans16x16HT, ::testing::Values( make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_c, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( NEON, Trans16x16DCT, ::testing::Values(make_tuple(&vpx_fdct16x16_neon, &vpx_idct16x16_256_add_neon, 0, VPX_BITS_8))); #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16DCT, ::testing::Values(make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_sse2, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16HT, ::testing::Values(make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 0, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_sse2, 3, VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16DCT, ::testing::Values( make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_10_sse2, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct16x16_sse2, &idct16x16_12, 0, VPX_BITS_12), make_tuple(&vpx_highbd_fdct16x16_c, &idct16x16_256_add_12_sse2, 0, VPX_BITS_12), make_tuple(&vpx_fdct16x16_sse2, &vpx_idct16x16_256_add_c, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, Trans16x16HT, ::testing::Values( make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_sse2, &vp9_iht16x16_256_add_c, 3, VPX_BITS_8))); // Optimizations take effect at a threshold of 3155, so we use a value close to // that to test both branches. INSTANTIATE_TEST_CASE_P( SSE2, InvTrans16x16DCT, ::testing::Values(make_tuple(&idct16x16_10_add_10_c, &idct16x16_10_add_10_sse2, 3167, VPX_BITS_10), make_tuple(&idct16x16_10, &idct16x16_256_add_10_sse2, 3167, VPX_BITS_10), make_tuple(&idct16x16_10_add_12_c, &idct16x16_10_add_12_sse2, 3167, VPX_BITS_12), make_tuple(&idct16x16_12, &idct16x16_256_add_12_sse2, 3167, VPX_BITS_12))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(MSA, Trans16x16DCT, ::testing::Values(make_tuple(&vpx_fdct16x16_msa, &vpx_idct16x16_256_add_msa, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( MSA, Trans16x16HT, ::testing::Values( make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 0, VPX_BITS_8), make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 1, VPX_BITS_8), make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 2, VPX_BITS_8), make_tuple(&vp9_fht16x16_msa, &vp9_iht16x16_256_add_msa, 3, VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(VSX, Trans16x16DCT, ::testing::Values(make_tuple(&vpx_fdct16x16_c, &vpx_idct16x16_256_add_vsx, 0, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace libvpx-1.8.2/test/dct32x32_test.cc000066400000000000000000000341141357355204000166360ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { const int kNumCoeffs = 1024; const double kPi = 3.141592653589793238462643383279502884; void reference_32x32_dct_1d(const double in[32], double out[32]) { const double kInvSqrt2 = 0.707106781186547524400844362104; for (int k = 0; k < 32; k++) { out[k] = 0.0; for (int n = 0; n < 32; n++) { out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 64.0); } if (k == 0) out[k] = out[k] * kInvSqrt2; } } void reference_32x32_dct_2d(const int16_t input[kNumCoeffs], double output[kNumCoeffs]) { // First transform columns for (int i = 0; i < 32; ++i) { double temp_in[32], temp_out[32]; for (int j = 0; j < 32; ++j) temp_in[j] = input[j * 32 + i]; reference_32x32_dct_1d(temp_in, temp_out); for (int j = 0; j < 32; ++j) output[j * 32 + i] = temp_out[j]; } // Then transform rows for (int i = 0; i < 32; ++i) { double temp_in[32], temp_out[32]; for (int j = 0; j < 32; ++j) temp_in[j] = output[j + i * 32]; reference_32x32_dct_1d(temp_in, temp_out); // Scale by some magic number for (int j = 0; j < 32; ++j) output[j + i * 32] = temp_out[j] / 4; } } typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); typedef std::tuple Trans32x32Param; #if CONFIG_VP9_HIGHBITDEPTH void idct32x32_10(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct32x32_12(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct32x32_1024_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); } #endif // CONFIG_VP9_HIGHBITDEPTH class Trans32x32Test : public AbstractBench, public ::testing::TestWithParam { public: virtual ~Trans32x32Test() {} virtual void SetUp() { fwd_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); version_ = GET_PARAM(2); // 0: high precision forward transform // 1: low precision version for rd loop bit_depth_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: int version_; vpx_bit_depth_t bit_depth_; int mask_; FwdTxfmFunc fwd_txfm_; InvTxfmFunc inv_txfm_; int16_t *bench_in_; tran_low_t *bench_out_; virtual void Run(); }; void Trans32x32Test::Run() { fwd_txfm_(bench_in_, bench_out_, 32); } TEST_P(Trans32x32Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); uint32_t max_error = 0; int64_t total_error = 0; const int count_test_block = 10000; DECLARE_ALIGNED(16, int16_t, test_input_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, test_temp_block[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); #endif for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); test_input_block[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand16() & mask_; dst16[j] = rnd.Rand16() & mask_; test_input_block[j] = src16[j] - dst16[j]; #endif } } ASM_REGISTER_STATE_CHECK(fwd_txfm_(test_input_block, test_temp_block, 32)); if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(inv_txfm_(test_temp_block, dst, 32)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( inv_txfm_(test_temp_block, CAST_TO_BYTEPTR(dst16), 32)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int32_t diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int32_t diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; if (max_error < error) max_error = error; total_error += error; } } if (version_ == 1) { max_error /= 2; total_error /= 45; } EXPECT_GE(1u << 2 * (bit_depth_ - 8), max_error) << "Error: 32x32 FDCT/IDCT has an individual round-trip error > 1"; EXPECT_GE(count_test_block << 2 * (bit_depth_ - 8), total_error) << "Error: 32x32 FDCT/IDCT has average round-trip error > 1 per block"; } TEST_P(Trans32x32Test, CoeffCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, input_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); for (int i = 0; i < count_test_block; ++i) { for (int j = 0; j < kNumCoeffs; ++j) { input_block[j] = (rnd.Rand16() & mask_) - (rnd.Rand16() & mask_); } const int stride = 32; vpx_fdct32x32_c(input_block, output_ref_block, stride); ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block, output_block, stride)); if (version_ == 0) { for (int j = 0; j < kNumCoeffs; ++j) EXPECT_EQ(output_block[j], output_ref_block[j]) << "Error: 32x32 FDCT versions have mismatched coefficients"; } else { for (int j = 0; j < kNumCoeffs; ++j) EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) << "Error: 32x32 FDCT rd has mismatched coefficients"; } } } TEST_P(Trans32x32Test, MemCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 2000; DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { input_extreme_block[j] = rnd.Rand8() & 1 ? mask_ : -mask_; } if (i == 0) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = mask_; } else if (i == 1) { for (int j = 0; j < kNumCoeffs; ++j) input_extreme_block[j] = -mask_; } const int stride = 32; vpx_fdct32x32_c(input_extreme_block, output_ref_block, stride); ASM_REGISTER_STATE_CHECK( fwd_txfm_(input_extreme_block, output_block, stride)); // The minimum quant value is 4. for (int j = 0; j < kNumCoeffs; ++j) { if (version_ == 0) { EXPECT_EQ(output_block[j], output_ref_block[j]) << "Error: 32x32 FDCT versions have mismatched coefficients"; } else { EXPECT_GE(6, abs(output_block[j] - output_ref_block[j])) << "Error: 32x32 FDCT rd has mismatched coefficients"; } EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_ref_block[j])) << "Error: 32x32 FDCT C has coefficient larger than 4*DCT_MAX_VALUE"; EXPECT_GE(4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block[j])) << "Error: 32x32 FDCT has coefficient larger than " << "4*DCT_MAX_VALUE"; } } } TEST_P(Trans32x32Test, DISABLED_Speed) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, int16_t, input_extreme_block[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_block[kNumCoeffs]); bench_in_ = input_extreme_block; bench_out_ = output_block; RunNTimes(INT16_MAX); PrintMedian("32x32"); } TEST_P(Trans32x32Test, InverseAccuracy) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); #endif for (int i = 0; i < count_test_block; ++i) { double out_r[kNumCoeffs]; // Initialize a test block with input range [-255, 255] for (int j = 0; j < kNumCoeffs; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); in[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand16() & mask_; dst16[j] = rnd.Rand16() & mask_; in[j] = src16[j] - dst16[j]; #endif } } reference_32x32_dct_2d(in, out_r); for (int j = 0; j < kNumCoeffs; ++j) { coeff[j] = static_cast(round(out_r[j])); } if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, dst, 32)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK(inv_txfm_(coeff, CAST_TO_BYTEPTR(dst16), 32)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int diff = dst[j] - src[j]; #endif const int error = diff * diff; EXPECT_GE(1, error) << "Error: 32x32 IDCT has error " << error << " at index " << j; } } } using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, Trans32x32Test, ::testing::Values( make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_10, 1, VPX_BITS_10), make_tuple(&vpx_highbd_fdct32x32_c, &idct32x32_12, 0, VPX_BITS_12), make_tuple(&vpx_highbd_fdct32x32_rd_c, &idct32x32_12, 1, VPX_BITS_12), make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_c, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( NEON, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_neon, &vpx_idct32x32_1024_add_neon, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_neon, &vpx_idct32x32_1024_add_neon, 1, VPX_BITS_8))); #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, Trans32x32Test, ::testing::Values( make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_10, 1, VPX_BITS_10), make_tuple(&vpx_highbd_fdct32x32_sse2, &idct32x32_12, 0, VPX_BITS_12), make_tuple(&vpx_highbd_fdct32x32_rd_sse2, &idct32x32_12, 1, VPX_BITS_12), make_tuple(&vpx_fdct32x32_sse2, &vpx_idct32x32_1024_add_c, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_sse2, &vpx_idct32x32_1024_add_c, 1, VPX_BITS_8))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( AVX2, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_avx2, &vpx_idct32x32_1024_add_sse2, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_avx2, &vpx_idct32x32_1024_add_sse2, 1, VPX_BITS_8))); #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( MSA, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_msa, &vpx_idct32x32_1024_add_msa, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_msa, &vpx_idct32x32_1024_add_msa, 1, VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( VSX, Trans32x32Test, ::testing::Values(make_tuple(&vpx_fdct32x32_c, &vpx_idct32x32_1024_add_vsx, 0, VPX_BITS_8), make_tuple(&vpx_fdct32x32_rd_vsx, &vpx_idct32x32_1024_add_vsx, 1, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace libvpx-1.8.2/test/dct_partial_test.cc000066400000000000000000000141261357355204000176510ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" using libvpx_test::ACMRandom; using libvpx_test::Buffer; using std::make_tuple; using std::tuple; namespace { typedef void (*PartialFdctFunc)(const int16_t *in, tran_low_t *out, int stride); typedef tuple PartialFdctParam; tran_low_t partial_fdct_ref(const Buffer &in, int size) { int64_t sum = 0; if (in.TopLeftPixel() != NULL) { for (int y = 0; y < size; ++y) { for (int x = 0; x < size; ++x) { sum += in.TopLeftPixel()[y * in.stride() + x]; } } } else { assert(0); } switch (size) { case 4: sum *= 2; break; case 8: /*sum = sum;*/ break; case 16: sum >>= 1; break; case 32: sum >>= 3; break; } return static_cast(sum); } class PartialFdctTest : public ::testing::TestWithParam { public: PartialFdctTest() { fwd_txfm_ = GET_PARAM(0); size_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunTest() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int16_t maxvalue = clip_pixel_highbd(std::numeric_limits::max(), bit_depth_); const int16_t minvalue = -maxvalue; Buffer input_block = Buffer(size_, size_, 8, size_ == 4 ? 0 : 16); ASSERT_TRUE(input_block.Init()); Buffer output_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(output_block.Init()); if (output_block.TopLeftPixel() != NULL) { for (int i = 0; i < 100; ++i) { if (i == 0) { input_block.Set(maxvalue); } else if (i == 1) { input_block.Set(minvalue); } else { input_block.Set(&rnd, minvalue, maxvalue); } ASM_REGISTER_STATE_CHECK(fwd_txfm_(input_block.TopLeftPixel(), output_block.TopLeftPixel(), input_block.stride())); EXPECT_EQ(partial_fdct_ref(input_block, size_), output_block.TopLeftPixel()[0]); } } else { assert(0); } } PartialFdctFunc fwd_txfm_; vpx_bit_depth_t bit_depth_; int size_; }; TEST_P(PartialFdctTest, PartialFdctTest) { RunTest(); } #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, PartialFdctTest, ::testing::Values(make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_12), make_tuple(&vpx_highbd_fdct32x32_1_c, 32, VPX_BITS_10), make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8), make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_12), make_tuple(&vpx_highbd_fdct16x16_1_c, 16, VPX_BITS_10), make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8), make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_12), make_tuple(&vpx_highbd_fdct8x8_1_c, 8, VPX_BITS_10), make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8), make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct32x32_1_c, 32, VPX_BITS_8), make_tuple(&vpx_fdct16x16_1_c, 16, VPX_BITS_8), make_tuple(&vpx_fdct8x8_1_c, 8, VPX_BITS_8), make_tuple(&vpx_fdct4x4_1_c, 4, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct32x32_1_sse2, 32, VPX_BITS_8), make_tuple(&vpx_fdct16x16_1_sse2, 16, VPX_BITS_8), make_tuple(&vpx_fdct8x8_1_sse2, 8, VPX_BITS_8), make_tuple(&vpx_fdct4x4_1_sse2, 4, VPX_BITS_8))); #endif // HAVE_SSE2 #if HAVE_NEON #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( NEON, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8), make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8), make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_12), make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_10), make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8), make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( NEON, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct32x32_1_neon, 32, VPX_BITS_8), make_tuple(&vpx_fdct16x16_1_neon, 16, VPX_BITS_8), make_tuple(&vpx_fdct8x8_1_neon, 8, VPX_BITS_8), make_tuple(&vpx_fdct4x4_1_neon, 4, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON #if HAVE_MSA #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P(MSA, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct8x8_1_msa, 8, VPX_BITS_8))); #else // !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( MSA, PartialFdctTest, ::testing::Values(make_tuple(&vpx_fdct32x32_1_msa, 32, VPX_BITS_8), make_tuple(&vpx_fdct16x16_1_msa, 16, VPX_BITS_8), make_tuple(&vpx_fdct8x8_1_msa, 8, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_MSA } // namespace libvpx-1.8.2/test/dct_test.cc000066400000000000000000000654351357355204000161460ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" using libvpx_test::ACMRandom; using libvpx_test::Buffer; using std::make_tuple; using std::tuple; namespace { typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, int tx_type); typedef void (*FhtFuncRef)(const Buffer &in, Buffer *out, int size, int tx_type); typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); typedef void (*IhtWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type, int bd); template void fdct_wrapper(const int16_t *in, tran_low_t *out, int stride, int tx_type) { (void)tx_type; fn(in, out, stride); } template void idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, int bd) { (void)tx_type; (void)bd; fn(in, out, stride); } template void iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, int bd) { (void)bd; fn(in, out, stride, tx_type); } #if CONFIG_VP9_HIGHBITDEPTH typedef void (*HighbdIdctFunc)(const tran_low_t *in, uint16_t *out, int stride, int bd); typedef void (*HighbdIhtFunc)(const tran_low_t *in, uint16_t *out, int stride, int tx_type, int bd); template void highbd_idct_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, int bd) { (void)tx_type; fn(in, CAST_TO_SHORTPTR(out), stride, bd); } template void highbd_iht_wrapper(const tran_low_t *in, uint8_t *out, int stride, int tx_type, int bd) { fn(in, CAST_TO_SHORTPTR(out), stride, tx_type, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH struct FuncInfo { FhtFunc ft_func; IhtWithBdFunc it_func; int size; int pixel_size; }; /* forward transform, inverse transform, size, transform type, bit depth */ typedef tuple DctParam; void fdct_ref(const Buffer &in, Buffer *out, int size, int /*tx_type*/) { const int16_t *i = in.TopLeftPixel(); const int i_stride = in.stride(); tran_low_t *o = out->TopLeftPixel(); if (size == 4) { vpx_fdct4x4_c(i, o, i_stride); } else if (size == 8) { vpx_fdct8x8_c(i, o, i_stride); } else if (size == 16) { vpx_fdct16x16_c(i, o, i_stride); } else if (size == 32) { vpx_fdct32x32_c(i, o, i_stride); } } void fht_ref(const Buffer &in, Buffer *out, int size, int tx_type) { const int16_t *i = in.TopLeftPixel(); const int i_stride = in.stride(); tran_low_t *o = out->TopLeftPixel(); if (size == 4) { vp9_fht4x4_c(i, o, i_stride, tx_type); } else if (size == 8) { vp9_fht8x8_c(i, o, i_stride, tx_type); } else if (size == 16) { vp9_fht16x16_c(i, o, i_stride, tx_type); } } void fwht_ref(const Buffer &in, Buffer *out, int size, int /*tx_type*/) { ASSERT_EQ(size, 4); vp9_fwht4x4_c(in.TopLeftPixel(), out->TopLeftPixel(), in.stride()); } class TransTestBase : public ::testing::TestWithParam { public: virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); const int idx = GET_PARAM(0); const FuncInfo *func_info = &(GET_PARAM(1)[idx]); tx_type_ = GET_PARAM(2); bit_depth_ = GET_PARAM(3); fwd_txfm_ = func_info->ft_func; inv_txfm_ = func_info->it_func; size_ = func_info->size; pixel_size_ = func_info->pixel_size; max_pixel_value_ = (1 << bit_depth_) - 1; // Randomize stride_ to a value less than or equal to 1024 stride_ = rnd_(1024) + 1; if (stride_ < size_) { stride_ = size_; } // Align stride_ to 16 if it's bigger than 16. if (stride_ > 16) { stride_ &= ~15; } block_size_ = size_ * stride_; src_ = reinterpret_cast( vpx_memalign(16, pixel_size_ * block_size_)); ASSERT_TRUE(src_ != NULL); dst_ = reinterpret_cast( vpx_memalign(16, pixel_size_ * block_size_)); ASSERT_TRUE(dst_ != NULL); } virtual void TearDown() { vpx_free(src_); src_ = NULL; vpx_free(dst_); dst_ = NULL; libvpx_test::ClearSystemState(); } void InitMem() { if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; if (pixel_size_ == 1) { for (int j = 0; j < block_size_; ++j) { src_[j] = rnd_.Rand16() & max_pixel_value_; } for (int j = 0; j < block_size_; ++j) { dst_[j] = rnd_.Rand16() & max_pixel_value_; } } else { ASSERT_EQ(pixel_size_, 2); uint16_t *const src = reinterpret_cast(src_); uint16_t *const dst = reinterpret_cast(dst_); for (int j = 0; j < block_size_; ++j) { src[j] = rnd_.Rand16() & max_pixel_value_; } for (int j = 0; j < block_size_; ++j) { dst[j] = rnd_.Rand16() & max_pixel_value_; } } } void RunFwdTxfm(const Buffer &in, Buffer *out) { fwd_txfm_(in.TopLeftPixel(), out->TopLeftPixel(), in.stride(), tx_type_); } void RunInvTxfm(const Buffer &in, uint8_t *out) { inv_txfm_(in.TopLeftPixel(), out, stride_, tx_type_, bit_depth_); } protected: void RunAccuracyCheck(int limit) { if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); Buffer test_input_block = Buffer(size_, size_, 8, size_ == 4 ? 0 : 16); ASSERT_TRUE(test_input_block.Init()); ASSERT_TRUE(test_input_block.TopLeftPixel() != NULL); Buffer test_temp_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(test_temp_block.Init()); uint32_t max_error = 0; int64_t total_error = 0; const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { InitMem(); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { if (pixel_size_ == 1) { test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = src_[h * stride_ + w] - dst_[h * stride_ + w]; } else { ASSERT_EQ(pixel_size_, 2); const uint16_t *const src = reinterpret_cast(src_); const uint16_t *const dst = reinterpret_cast(dst_); test_input_block.TopLeftPixel()[h * test_input_block.stride() + w] = src[h * stride_ + w] - dst[h * stride_ + w]; } } } ASM_REGISTER_STATE_CHECK(RunFwdTxfm(test_input_block, &test_temp_block)); ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; if (pixel_size_ == 1) { diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { ASSERT_EQ(pixel_size_, 2); const uint16_t *const src = reinterpret_cast(src_); const uint16_t *const dst = reinterpret_cast(dst_); diff = dst[h * stride_ + w] - src[h * stride_ + w]; } const uint32_t error = diff * diff; if (max_error < error) max_error = error; total_error += error; } } } EXPECT_GE(static_cast(limit), max_error) << "Error: " << size_ << "x" << size_ << " transform/inverse transform has an individual round trip error > " << limit; EXPECT_GE(count_test_block * limit, total_error) << "Error: " << size_ << "x" << size_ << " transform/inverse transform has average round trip error > " << limit << " per block"; } void RunCoeffCheck() { if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 5000; Buffer input_block = Buffer(size_, size_, 8, size_ == 4 ? 0 : 16); ASSERT_TRUE(input_block.Init()); Buffer output_ref_block = Buffer(size_, size_, 0); ASSERT_TRUE(output_ref_block.Init()); Buffer output_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(output_block.Init()); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-max_pixel_value_, // max_pixel_value_]. input_block.Set(&rnd, -max_pixel_value_, max_pixel_value_); fwd_txfm_ref(input_block, &output_ref_block, size_, tx_type_); ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_block, &output_block)); // The minimum quant value is 4. EXPECT_TRUE(output_block.CheckValues(output_ref_block)); if (::testing::Test::HasFailure()) { printf("Size: %d Transform type: %d\n", size_, tx_type_); output_block.PrintDifference(output_ref_block); return; } } } void RunMemCheck() { if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 5000; Buffer input_extreme_block = Buffer(size_, size_, 8, size_ == 4 ? 0 : 16); ASSERT_TRUE(input_extreme_block.Init()); Buffer output_ref_block = Buffer(size_, size_, 0); ASSERT_TRUE(output_ref_block.Init()); Buffer output_block = Buffer(size_, size_, 0, 16); ASSERT_TRUE(output_block.Init()); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with -max_pixel_value_ or max_pixel_value_. if (i == 0) { input_extreme_block.Set(max_pixel_value_); } else if (i == 1) { input_extreme_block.Set(-max_pixel_value_); } else { ASSERT_TRUE(input_extreme_block.TopLeftPixel() != NULL); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { input_extreme_block .TopLeftPixel()[h * input_extreme_block.stride() + w] = rnd.Rand8() % 2 ? max_pixel_value_ : -max_pixel_value_; } } } fwd_txfm_ref(input_extreme_block, &output_ref_block, size_, tx_type_); ASM_REGISTER_STATE_CHECK(RunFwdTxfm(input_extreme_block, &output_block)); // The minimum quant value is 4. EXPECT_TRUE(output_block.CheckValues(output_ref_block)); ASSERT_TRUE(output_block.TopLeftPixel() != NULL); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { EXPECT_GE( 4 * DCT_MAX_VALUE << (bit_depth_ - 8), abs(output_block.TopLeftPixel()[h * output_block.stride() + w])) << "Error: " << size_ << "x" << size_ << " transform has coefficient larger than 4*DCT_MAX_VALUE" << " at " << w << "," << h; if (::testing::Test::HasFailure()) { printf("Size: %d Transform type: %d\n", size_, tx_type_); output_block.DumpBuffer(); return; } } } } } void RunInvAccuracyCheck(int limit) { if (pixel_size_ == 1 && bit_depth_ > VPX_BITS_8) return; ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; Buffer in = Buffer(size_, size_, 4); ASSERT_TRUE(in.Init()); Buffer coeff = Buffer(size_, size_, 0, 16); ASSERT_TRUE(coeff.Init()); Buffer dst = Buffer(size_, size_, 0, 16); ASSERT_TRUE(dst.Init()); Buffer src = Buffer(size_, size_, 0); ASSERT_TRUE(src.Init()); Buffer dst16 = Buffer(size_, size_, 0, 16); ASSERT_TRUE(dst16.Init()); Buffer src16 = Buffer(size_, size_, 0); ASSERT_TRUE(src16.Init()); for (int i = 0; i < count_test_block; ++i) { InitMem(); ASSERT_TRUE(in.TopLeftPixel() != NULL); // Initialize a test block with input range [-max_pixel_value_, // max_pixel_value_]. for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { if (pixel_size_ == 1) { in.TopLeftPixel()[h * in.stride() + w] = src_[h * stride_ + w] - dst_[h * stride_ + w]; } else { ASSERT_EQ(pixel_size_, 2); const uint16_t *const src = reinterpret_cast(src_); const uint16_t *const dst = reinterpret_cast(dst_); in.TopLeftPixel()[h * in.stride() + w] = src[h * stride_ + w] - dst[h * stride_ + w]; } } } fwd_txfm_ref(in, &coeff, size_, tx_type_); ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst_)); for (int h = 0; h < size_; ++h) { for (int w = 0; w < size_; ++w) { int diff; if (pixel_size_ == 1) { diff = dst_[h * stride_ + w] - src_[h * stride_ + w]; } else { ASSERT_EQ(pixel_size_, 2); const uint16_t *const src = reinterpret_cast(src_); const uint16_t *const dst = reinterpret_cast(dst_); diff = dst[h * stride_ + w] - src[h * stride_ + w]; } const uint32_t error = diff * diff; EXPECT_GE(static_cast(limit), error) << "Error: " << size_ << "x" << size_ << " inverse transform has error " << error << " at " << w << "," << h; if (::testing::Test::HasFailure()) { printf("Size: %d Transform type: %d\n", size_, tx_type_); return; } } } } } FhtFunc fwd_txfm_; FhtFuncRef fwd_txfm_ref; IhtWithBdFunc inv_txfm_; ACMRandom rnd_; uint8_t *src_; uint8_t *dst_; vpx_bit_depth_t bit_depth_; int tx_type_; int max_pixel_value_; int size_; int stride_; int pixel_size_; int block_size_; }; /* -------------------------------------------------------------------------- */ class TransDCT : public TransTestBase { public: TransDCT() { fwd_txfm_ref = fdct_ref; } }; TEST_P(TransDCT, AccuracyCheck) { int t = 1; if (size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2) { t = 2; } else if (size_ == 32 && bit_depth_ > 10 && pixel_size_ == 2) { t = 7; } RunAccuracyCheck(t); } TEST_P(TransDCT, CoeffCheck) { RunCoeffCheck(); } TEST_P(TransDCT, MemCheck) { RunMemCheck(); } TEST_P(TransDCT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } static const FuncInfo dct_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH { &fdct_wrapper, &highbd_idct_wrapper, 4, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 8, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 16, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 32, 2 }, #endif { &fdct_wrapper, &idct_wrapper, 4, 1 }, { &fdct_wrapper, &idct_wrapper, 8, 1 }, { &fdct_wrapper, &idct_wrapper, 16, 1 }, { &fdct_wrapper, &idct_wrapper, 32, 1 } }; INSTANTIATE_TEST_CASE_P( C, TransDCT, ::testing::Combine( ::testing::Range(0, static_cast(sizeof(dct_c_func_info) / sizeof(dct_c_func_info[0]))), ::testing::Values(dct_c_func_info), ::testing::Values(0), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #if !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 static const FuncInfo dct_sse2_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH { &fdct_wrapper, &highbd_idct_wrapper, 4, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 8, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 16, 2 }, { &fdct_wrapper, &highbd_idct_wrapper, 32, 2 }, #endif { &fdct_wrapper, &idct_wrapper, 4, 1 }, { &fdct_wrapper, &idct_wrapper, 8, 1 }, { &fdct_wrapper, &idct_wrapper, 16, 1 }, { &fdct_wrapper, &idct_wrapper, 32, 1 } }; INSTANTIATE_TEST_CASE_P( SSE2, TransDCT, ::testing::Combine( ::testing::Range(0, static_cast(sizeof(dct_sse2_func_info) / sizeof(dct_sse2_func_info[0]))), ::testing::Values(dct_sse2_func_info), ::testing::Values(0), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #endif // HAVE_SSE2 #if HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 // vpx_fdct8x8_ssse3 is only available in 64 bit builds. static const FuncInfo dct_ssse3_func_info = { &fdct_wrapper, &idct_wrapper, 8, 1 }; // TODO(johannkoenig): high bit depth fdct8x8. INSTANTIATE_TEST_CASE_P(SSSE3, TransDCT, ::testing::Values(make_tuple(0, &dct_ssse3_func_info, 0, VPX_BITS_8))); #endif // HAVE_SSSE3 && !CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 #if HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH static const FuncInfo dct_avx2_func_info = { &fdct_wrapper, &idct_wrapper, 32, 1 }; // TODO(johannkoenig): high bit depth fdct32x32. INSTANTIATE_TEST_CASE_P(AVX2, TransDCT, ::testing::Values(make_tuple(0, &dct_avx2_func_info, 0, VPX_BITS_8))); #endif // HAVE_AVX2 && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON static const FuncInfo dct_neon_func_info[4] = { { &fdct_wrapper, &idct_wrapper, 4, 1 }, { &fdct_wrapper, &idct_wrapper, 8, 1 }, { &fdct_wrapper, &idct_wrapper, 16, 1 }, { &fdct_wrapper, &idct_wrapper, 32, 1 } }; INSTANTIATE_TEST_CASE_P( NEON, TransDCT, ::testing::Combine(::testing::Range(0, 4), ::testing::Values(dct_neon_func_info), ::testing::Values(0), ::testing::Values(VPX_BITS_8))); #endif // HAVE_NEON #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH static const FuncInfo dct_msa_func_info[4] = { { &fdct_wrapper, &idct_wrapper, 4, 1 }, { &fdct_wrapper, &idct_wrapper, 8, 1 }, { &fdct_wrapper, &idct_wrapper, 16, 1 }, { &fdct_wrapper, &idct_wrapper, 32, 1 } }; INSTANTIATE_TEST_CASE_P(MSA, TransDCT, ::testing::Combine(::testing::Range(0, 4), ::testing::Values(dct_msa_func_info), ::testing::Values(0), ::testing::Values(VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH static const FuncInfo dct_vsx_func_info = { &fdct_wrapper, &idct_wrapper, 4, 1 }; INSTANTIATE_TEST_CASE_P(VSX, TransDCT, ::testing::Values(make_tuple(0, &dct_vsx_func_info, 0, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && #endif // !CONFIG_EMULATE_HARDWARE /* -------------------------------------------------------------------------- */ class TransHT : public TransTestBase { public: TransHT() { fwd_txfm_ref = fht_ref; } }; TEST_P(TransHT, AccuracyCheck) { RunAccuracyCheck(size_ == 16 && bit_depth_ > 10 && pixel_size_ == 2 ? 2 : 1); } TEST_P(TransHT, CoeffCheck) { RunCoeffCheck(); } TEST_P(TransHT, MemCheck) { RunMemCheck(); } TEST_P(TransHT, InvAccuracyCheck) { RunInvAccuracyCheck(1); } static const FuncInfo ht_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, 4, 2 }, { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper, 8, 2 }, { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper, 16, 2 }, #endif { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } }; INSTANTIATE_TEST_CASE_P( C, TransHT, ::testing::Combine( ::testing::Range(0, static_cast(sizeof(ht_c_func_info) / sizeof(ht_c_func_info[0]))), ::testing::Values(ht_c_func_info), ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #if !CONFIG_EMULATE_HARDWARE #if HAVE_NEON static const FuncInfo ht_neon_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, 4, 2 }, { &vp9_highbd_fht8x8_c, &highbd_iht_wrapper, 8, 2 }, { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper, 16, 2 }, #endif { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } }; INSTANTIATE_TEST_CASE_P( NEON, TransHT, ::testing::Combine( ::testing::Range(0, static_cast(sizeof(ht_neon_func_info) / sizeof(ht_neon_func_info[0]))), ::testing::Values(ht_neon_func_info), ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #endif // HAVE_NEON #if HAVE_SSE2 static const FuncInfo ht_sse2_func_info[3] = { { &vp9_fht4x4_sse2, &iht_wrapper, 4, 1 }, { &vp9_fht8x8_sse2, &iht_wrapper, 8, 1 }, { &vp9_fht16x16_sse2, &iht_wrapper, 16, 1 } }; INSTANTIATE_TEST_CASE_P(SSE2, TransHT, ::testing::Combine(::testing::Range(0, 3), ::testing::Values(ht_sse2_func_info), ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8))); #endif // HAVE_SSE2 #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH static const FuncInfo ht_sse4_1_func_info[3] = { { &vp9_highbd_fht4x4_c, &highbd_iht_wrapper, 4, 2 }, { vp9_highbd_fht8x8_c, &highbd_iht_wrapper, 8, 2 }, { &vp9_highbd_fht16x16_c, &highbd_iht_wrapper, 16, 2 } }; INSTANTIATE_TEST_CASE_P( SSE4_1, TransHT, ::testing::Combine(::testing::Range(0, 3), ::testing::Values(ht_sse4_1_func_info), ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH static const FuncInfo ht_vsx_func_info[3] = { { &vp9_fht4x4_c, &iht_wrapper, 4, 1 }, { &vp9_fht8x8_c, &iht_wrapper, 8, 1 }, { &vp9_fht16x16_c, &iht_wrapper, 16, 1 } }; INSTANTIATE_TEST_CASE_P(VSX, TransHT, ::testing::Combine(::testing::Range(0, 3), ::testing::Values(ht_vsx_func_info), ::testing::Range(0, 4), ::testing::Values(VPX_BITS_8))); #endif // HAVE_VSX #endif // !CONFIG_EMULATE_HARDWARE /* -------------------------------------------------------------------------- */ class TransWHT : public TransTestBase { public: TransWHT() { fwd_txfm_ref = fwht_ref; } }; TEST_P(TransWHT, AccuracyCheck) { RunAccuracyCheck(0); } TEST_P(TransWHT, CoeffCheck) { RunCoeffCheck(); } TEST_P(TransWHT, MemCheck) { RunMemCheck(); } TEST_P(TransWHT, InvAccuracyCheck) { RunInvAccuracyCheck(0); } static const FuncInfo wht_c_func_info[] = { #if CONFIG_VP9_HIGHBITDEPTH { &fdct_wrapper, &highbd_idct_wrapper, 4, 2 }, #endif { &fdct_wrapper, &idct_wrapper, 4, 1 } }; INSTANTIATE_TEST_CASE_P( C, TransWHT, ::testing::Combine( ::testing::Range(0, static_cast(sizeof(wht_c_func_info) / sizeof(wht_c_func_info[0]))), ::testing::Values(wht_c_func_info), ::testing::Values(0), ::testing::Values(VPX_BITS_8, VPX_BITS_10, VPX_BITS_12))); #if HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE static const FuncInfo wht_sse2_func_info = { &fdct_wrapper, &idct_wrapper, 4, 1 }; INSTANTIATE_TEST_CASE_P(SSE2, TransWHT, ::testing::Values(make_tuple(0, &wht_sse2_func_info, 0, VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_EMULATE_HARDWARE #if HAVE_VSX && !CONFIG_EMULATE_HARDWARE && !CONFIG_VP9_HIGHBITDEPTH static const FuncInfo wht_vsx_func_info = { &fdct_wrapper, &idct_wrapper, 4, 1 }; INSTANTIATE_TEST_CASE_P(VSX, TransWHT, ::testing::Values(make_tuple(0, &wht_vsx_func_info, 0, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_EMULATE_HARDWARE } // namespace libvpx-1.8.2/test/decode_api_test.cc000066400000000000000000000167011357355204000174400ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/ivf_video_source.h" #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" namespace { #define NELEMENTS(x) static_cast(sizeof(x) / sizeof(x[0])) TEST(DecodeAPI, InvalidParams) { static const vpx_codec_iface_t *kCodecs[] = { #if CONFIG_VP8_DECODER &vpx_codec_vp8_dx_algo, #endif #if CONFIG_VP9_DECODER &vpx_codec_vp9_dx_algo, #endif }; uint8_t buf[1] = { 0 }; vpx_codec_ctx_t dec; EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_dec_init(NULL, NULL, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_dec_init(&dec, NULL, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(NULL, NULL, 0, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(NULL, buf, 0, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(NULL, buf, NELEMENTS(buf), NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(NULL, NULL, NELEMENTS(buf), NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL)); EXPECT_TRUE(vpx_codec_error(NULL) != NULL); for (int i = 0; i < NELEMENTS(kCodecs); ++i) { EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_dec_init(NULL, kCodecs[i], NULL, 0)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, kCodecs[i], NULL, 0)); EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM, vpx_codec_decode(&dec, buf, NELEMENTS(buf), NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(&dec, NULL, NELEMENTS(buf), NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_decode(&dec, buf, 0, NULL, 0)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); } } #if CONFIG_VP8_DECODER TEST(DecodeAPI, OptionalParams) { vpx_codec_ctx_t dec; #if CONFIG_ERROR_CONCEALMENT EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL, VPX_CODEC_USE_ERROR_CONCEALMENT)); #else EXPECT_EQ(VPX_CODEC_INCAPABLE, vpx_codec_dec_init(&dec, &vpx_codec_vp8_dx_algo, NULL, VPX_CODEC_USE_ERROR_CONCEALMENT)); #endif // CONFIG_ERROR_CONCEALMENT } #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER // Test VP9 codec controls after a decode error to ensure the code doesn't // misbehave. void TestVp9Controls(vpx_codec_ctx_t *dec) { static const int kControls[] = { VP8D_GET_LAST_REF_UPDATES, VP8D_GET_FRAME_CORRUPTED, VP9D_GET_DISPLAY_SIZE, VP9D_GET_FRAME_SIZE }; int val[2]; for (int i = 0; i < NELEMENTS(kControls); ++i) { const vpx_codec_err_t res = vpx_codec_control_(dec, kControls[i], val); switch (kControls[i]) { case VP8D_GET_FRAME_CORRUPTED: EXPECT_EQ(VPX_CODEC_ERROR, res) << kControls[i]; break; default: EXPECT_EQ(VPX_CODEC_OK, res) << kControls[i]; break; } EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_control_(dec, kControls[i], NULL)); } vp9_ref_frame_t ref; ref.idx = 0; EXPECT_EQ(VPX_CODEC_ERROR, vpx_codec_control(dec, VP9_GET_REFERENCE, &ref)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_control(dec, VP9_GET_REFERENCE, NULL)); vpx_ref_frame_t ref_copy; const int width = 352; const int height = 288; ASSERT_TRUE( vpx_img_alloc(&ref_copy.img, VPX_IMG_FMT_I420, width, height, 1) != NULL); ref_copy.frame_type = VP8_LAST_FRAME; EXPECT_EQ(VPX_CODEC_ERROR, vpx_codec_control(dec, VP8_COPY_REFERENCE, &ref_copy)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_control(dec, VP8_COPY_REFERENCE, NULL)); vpx_img_free(&ref_copy.img); } TEST(DecodeAPI, Vp9InvalidDecode) { const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo; const char filename[] = "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf"; libvpx_test::IVFVideoSource video(filename); video.Init(); video.Begin(); ASSERT_TRUE(!HasFailure()); vpx_codec_ctx_t dec; EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0)); const uint32_t frame_size = static_cast(video.frame_size()); #if CONFIG_VP9_HIGHBITDEPTH EXPECT_EQ(VPX_CODEC_MEM_ERROR, vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0)); #else EXPECT_EQ(VPX_CODEC_UNSUP_BITSTREAM, vpx_codec_decode(&dec, video.cxdata(), frame_size, NULL, 0)); #endif vpx_codec_iter_t iter = NULL; EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter)); TestVp9Controls(&dec); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); } void TestPeekInfo(const uint8_t *const data, uint32_t data_sz, uint32_t peek_size) { const vpx_codec_iface_t *const codec = &vpx_codec_vp9_dx_algo; // Verify behavior of vpx_codec_decode. vpx_codec_decode doesn't even get // to decoder_peek_si_internal on frames of size < 8. if (data_sz >= 8) { vpx_codec_ctx_t dec; EXPECT_EQ(VPX_CODEC_OK, vpx_codec_dec_init(&dec, codec, NULL, 0)); EXPECT_EQ((data_sz < peek_size) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_CORRUPT_FRAME, vpx_codec_decode(&dec, data, data_sz, NULL, 0)); vpx_codec_iter_t iter = NULL; EXPECT_EQ(NULL, vpx_codec_get_frame(&dec, &iter)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&dec)); } // Verify behavior of vpx_codec_peek_stream_info. vpx_codec_stream_info_t si; si.sz = sizeof(si); EXPECT_EQ((data_sz < peek_size) ? VPX_CODEC_UNSUP_BITSTREAM : VPX_CODEC_OK, vpx_codec_peek_stream_info(codec, data, data_sz, &si)); } TEST(DecodeAPI, Vp9PeekStreamInfo) { // The first 9 bytes are valid and the rest of the bytes are made up. Until // size 10, this should return VPX_CODEC_UNSUP_BITSTREAM and after that it // should return VPX_CODEC_CORRUPT_FRAME. const uint8_t data[32] = { 0x85, 0xa4, 0xc1, 0xa1, 0x38, 0x81, 0xa3, 0x49, 0x83, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, }; for (uint32_t data_sz = 1; data_sz <= 32; ++data_sz) { TestPeekInfo(data, data_sz, 10); } } TEST(DecodeAPI, Vp9PeekStreamInfoTruncated) { // This profile 1 header requires 10.25 bytes, ensure // vpx_codec_peek_stream_info doesn't over read. const uint8_t profile1_data[10] = { 0xa4, 0xe9, 0x30, 0x68, 0x53, 0xe9, 0x30, 0x68, 0x53, 0x04 }; for (uint32_t data_sz = 1; data_sz <= 10; ++data_sz) { TestPeekInfo(profile1_data, data_sz, 11); } } #endif // CONFIG_VP9_DECODER TEST(DecodeAPI, HighBitDepthCapability) { // VP8 should not claim VP9 HBD as a capability. #if CONFIG_VP8_DECODER const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_dx_algo); EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0); #endif #if CONFIG_VP9_DECODER const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_dx_algo); #if CONFIG_VP9_HIGHBITDEPTH EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH); #else EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0); #endif #endif } } // namespace libvpx-1.8.2/test/decode_corrupted.cc000066400000000000000000000062201357355204000176320ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/i420_video_source.h" #include "vpx_mem/vpx_mem.h" namespace { class DecodeCorruptedFrameTest : public ::libvpx_test::EncoderTest, public ::testing::TestWithParam< std::tuple > { public: DecodeCorruptedFrameTest() : EncoderTest(GET_PARAM(0)) {} protected: virtual ~DecodeCorruptedFrameTest() {} virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; // Set small key frame distance such that we insert more key frames. cfg_.kf_max_dist = 3; dec_cfg_.threads = 1; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) encoder->Control(VP8E_SET_CPUUSED, 7); } virtual void MismatchHook(const vpx_image_t * /*img1*/, const vpx_image_t * /*img2*/) {} virtual const vpx_codec_cx_pkt_t *MutateEncoderOutputHook( const vpx_codec_cx_pkt_t *pkt) { // Don't edit frame packet on key frame. if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) return pkt; if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return pkt; memcpy(&modified_pkt_, pkt, sizeof(*pkt)); // Halve the size so it's corrupted to decoder. modified_pkt_.data.frame.sz = modified_pkt_.data.frame.sz / 2; return &modified_pkt_; } virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, const libvpx_test::VideoSource & /*video*/, libvpx_test::Decoder *decoder) { EXPECT_NE(res_dec, VPX_CODEC_MEM_ERROR) << decoder->DecodeError(); return VPX_CODEC_MEM_ERROR != res_dec; } vpx_codec_cx_pkt_t modified_pkt_; }; TEST_P(DecodeCorruptedFrameTest, DecodeCorruptedFrame) { cfg_.rc_target_bitrate = 200; cfg_.g_error_resilient = 0; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } #if CONFIG_VP9 INSTANTIATE_TEST_CASE_P( VP9, DecodeCorruptedFrameTest, ::testing::Values( static_cast(&libvpx_test::kVP9))); #endif // CONFIG_VP9 #if CONFIG_VP8 INSTANTIATE_TEST_CASE_P( VP8, DecodeCorruptedFrameTest, ::testing::Values( static_cast(&libvpx_test::kVP8))); #endif // CONFIG_VP8 } // namespace libvpx-1.8.2/test/decode_perf_test.cc000066400000000000000000000213051357355204000176170ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/ivf_video_source.h" #include "test/md5_helper.h" #include "test/util.h" #include "test/webm_video_source.h" #include "vpx_ports/vpx_timer.h" #include "./ivfenc.h" #include "./vpx_version.h" using std::make_tuple; namespace { #define VIDEO_NAME 0 #define THREADS 1 const double kUsecsInSec = 1000000.0; const char kNewEncodeOutputFile[] = "new_encode.ivf"; /* DecodePerfTest takes a tuple of filename + number of threads to decode with */ typedef std::tuple DecodePerfParam; const DecodePerfParam kVP9DecodePerfVectors[] = { make_tuple("vp90-2-bbb_426x240_tile_1x1_180kbps.webm", 1), make_tuple("vp90-2-bbb_640x360_tile_1x2_337kbps.webm", 2), make_tuple("vp90-2-bbb_854x480_tile_1x2_651kbps.webm", 2), make_tuple("vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm", 4), make_tuple("vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm", 1), make_tuple("vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm", 4), make_tuple("vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm", 4), make_tuple("vp90-2-sintel_426x182_tile_1x1_171kbps.webm", 1), make_tuple("vp90-2-sintel_640x272_tile_1x2_318kbps.webm", 2), make_tuple("vp90-2-sintel_854x364_tile_1x2_621kbps.webm", 2), make_tuple("vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm", 4), make_tuple("vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm", 4), make_tuple("vp90-2-tos_426x178_tile_1x1_181kbps.webm", 1), make_tuple("vp90-2-tos_640x266_tile_1x2_336kbps.webm", 2), make_tuple("vp90-2-tos_854x356_tile_1x2_656kbps.webm", 2), make_tuple("vp90-2-tos_854x356_tile_1x2_fpm_546kbps.webm", 2), make_tuple("vp90-2-tos_1280x534_tile_1x4_1306kbps.webm", 4), make_tuple("vp90-2-tos_1280x534_tile_1x4_fpm_952kbps.webm", 4), make_tuple("vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm", 4), }; /* In order to reflect real world performance as much as possible, Perf tests *DO NOT* do any correctness checks. Please run them alongside correctness tests to ensure proper codec integrity. Furthermore, in this test we deliberately limit the amount of system calls we make to avoid OS preemption. TODO(joshualitt) create a more detailed perf measurement test to collect power/temp/min max frame decode times/etc */ class DecodePerfTest : public ::testing::TestWithParam {}; TEST_P(DecodePerfTest, PerfTest) { const char *const video_name = GET_PARAM(VIDEO_NAME); const unsigned threads = GET_PARAM(THREADS); libvpx_test::WebMVideoSource video(video_name); video.Init(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); cfg.threads = threads; libvpx_test::VP9Decoder decoder(cfg, 0); vpx_usec_timer t; vpx_usec_timer_start(&t); for (video.Begin(); video.cxdata() != NULL; video.Next()) { decoder.DecodeFrame(video.cxdata(), video.frame_size()); } vpx_usec_timer_mark(&t); const double elapsed_secs = double(vpx_usec_timer_elapsed(&t)) / kUsecsInSec; const unsigned frames = video.frame_number(); const double fps = double(frames) / elapsed_secs; printf("{\n"); printf("\t\"type\" : \"decode_perf_test\",\n"); printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); printf("\t\"videoName\" : \"%s\",\n", video_name); printf("\t\"threadCount\" : %u,\n", threads); printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); printf("\t\"totalFrames\" : %u,\n", frames); printf("\t\"framesPerSecond\" : %f\n", fps); printf("}\n"); } INSTANTIATE_TEST_CASE_P(VP9, DecodePerfTest, ::testing::ValuesIn(kVP9DecodePerfVectors)); class VP9NewEncodeDecodePerfTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: VP9NewEncodeDecodePerfTest() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), speed_(0), outfile_(0), out_frames_(0) {} virtual ~VP9NewEncodeDecodePerfTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); cfg_.g_lag_in_frames = 25; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_dropframe_thresh = 0; cfg_.rc_undershoot_pct = 50; cfg_.rc_overshoot_pct = 50; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; cfg_.rc_resize_allowed = 0; cfg_.rc_end_usage = VPX_VBR; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, speed_); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 2); } } virtual void BeginPassHook(unsigned int /*pass*/) { const std::string data_path = getenv("LIBVPX_TEST_DATA_PATH"); const std::string path_to_source = data_path + "/" + kNewEncodeOutputFile; outfile_ = fopen(path_to_source.c_str(), "wb"); ASSERT_TRUE(outfile_ != NULL); } virtual void EndPassHook() { if (outfile_ != NULL) { if (!fseek(outfile_, 0, SEEK_SET)) { ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_); } fclose(outfile_); outfile_ = NULL; } } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++out_frames_; // Write initial file header if first frame. if (pkt->data.frame.pts == 0) { ivf_write_file_header(outfile_, &cfg_, VP9_FOURCC, out_frames_); } // Write frame header and data. ivf_write_frame_header(outfile_, out_frames_, pkt->data.frame.sz); ASSERT_EQ(fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_), pkt->data.frame.sz); } virtual bool DoDecode() const { return false; } void set_speed(unsigned int speed) { speed_ = speed; } private: libvpx_test::TestMode encoding_mode_; uint32_t speed_; FILE *outfile_; uint32_t out_frames_; }; struct EncodePerfTestVideo { EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, uint32_t bitrate_, int frames_) : name(name_), width(width_), height(height_), bitrate(bitrate_), frames(frames_) {} const char *name; uint32_t width; uint32_t height; uint32_t bitrate; int frames; }; const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), }; TEST_P(VP9NewEncodeDecodePerfTest, PerfTest) { SetUp(); // TODO(JBB): Make this work by going through the set of given files. const int i = 0; const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate; init_flags_ = VPX_CODEC_USE_PSNR; const char *video_name = kVP9EncodePerfTestVectors[i].name; libvpx_test::I420VideoSource video( video_name, kVP9EncodePerfTestVectors[i].width, kVP9EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0, kVP9EncodePerfTestVectors[i].frames); set_speed(2); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const uint32_t threads = 4; libvpx_test::IVFVideoSource decode_video(kNewEncodeOutputFile); decode_video.Init(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); cfg.threads = threads; libvpx_test::VP9Decoder decoder(cfg, 0); vpx_usec_timer t; vpx_usec_timer_start(&t); for (decode_video.Begin(); decode_video.cxdata() != NULL; decode_video.Next()) { decoder.DecodeFrame(decode_video.cxdata(), decode_video.frame_size()); } vpx_usec_timer_mark(&t); const double elapsed_secs = static_cast(vpx_usec_timer_elapsed(&t)) / kUsecsInSec; const unsigned decode_frames = decode_video.frame_number(); const double fps = static_cast(decode_frames) / elapsed_secs; printf("{\n"); printf("\t\"type\" : \"decode_perf_test\",\n"); printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); printf("\t\"videoName\" : \"%s\",\n", kNewEncodeOutputFile); printf("\t\"threadCount\" : %u,\n", threads); printf("\t\"decodeTimeSecs\" : %f,\n", elapsed_secs); printf("\t\"totalFrames\" : %u,\n", decode_frames); printf("\t\"framesPerSecond\" : %f\n", fps); printf("}\n"); } VP9_INSTANTIATE_TEST_CASE(VP9NewEncodeDecodePerfTest, ::testing::Values(::libvpx_test::kTwoPassGood)); } // namespace libvpx-1.8.2/test/decode_svc_test.cc000066400000000000000000000107571357355204000174670ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/ivf_video_source.h" #include "test/test_vectors.h" #include "test/util.h" namespace { const unsigned int kNumFrames = 19; class DecodeSvcTest : public ::libvpx_test::DecoderTest, public ::libvpx_test::CodecTestWithParam { protected: DecodeSvcTest() : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)) {} virtual ~DecodeSvcTest() {} virtual void PreDecodeFrameHook( const libvpx_test::CompressedVideoSource &video, libvpx_test::Decoder *decoder) { if (video.frame_number() == 0) decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, spatial_layer_); } virtual void DecompressedFrameHook(const vpx_image_t &img, const unsigned int frame_number) { ASSERT_EQ(img.d_w, width_); ASSERT_EQ(img.d_h, height_); total_frames_ = frame_number; } int spatial_layer_; unsigned int width_; unsigned int height_; unsigned int total_frames_; }; // SVC test vector is 1280x720, with 3 spatial layers, and 20 frames. // Decode the SVC test vector, which has 3 spatial layers, and decode up to // spatial layer 0. Verify the resolution of each decoded frame and the total // number of frames decoded. This results in 1/4x1/4 resolution (320x180). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer0) { const std::string filename = GET_PARAM(1); std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); total_frames_ = 0; spatial_layer_ = 0; width_ = 320; height_ = 180; ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); ASSERT_EQ(total_frames_, kNumFrames); } // Decode the SVC test vector, which has 3 spatial layers, and decode up to // spatial layer 1. Verify the resolution of each decoded frame and the total // number of frames decoded. This results in 1/2x1/2 resolution (640x360). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer1) { const std::string filename = GET_PARAM(1); std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); total_frames_ = 0; spatial_layer_ = 1; width_ = 640; height_ = 360; ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); ASSERT_EQ(total_frames_, kNumFrames); } // Decode the SVC test vector, which has 3 spatial layers, and decode up to // spatial layer 2. Verify the resolution of each decoded frame and the total // number of frames decoded. This results in the full resolution (1280x720). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer2) { const std::string filename = GET_PARAM(1); std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); total_frames_ = 0; spatial_layer_ = 2; width_ = 1280; height_ = 720; ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); ASSERT_EQ(total_frames_, kNumFrames); } // Decode the SVC test vector, which has 3 spatial layers, and decode up to // spatial layer 10. Verify the resolution of each decoded frame and the total // number of frames decoded. This is beyond the number of spatial layers, so // the decoding should result in the full resolution (1280x720). TEST_P(DecodeSvcTest, DecodeSvcTestUpToSpatialLayer10) { const std::string filename = GET_PARAM(1); std::unique_ptr video; video.reset(new libvpx_test::IVFVideoSource(filename)); ASSERT_TRUE(video.get() != NULL); video->Init(); total_frames_ = 0; spatial_layer_ = 10; width_ = 1280; height_ = 720; ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); ASSERT_EQ(total_frames_, kNumFrames); } VP9_INSTANTIATE_TEST_CASE( DecodeSvcTest, ::testing::ValuesIn(libvpx_test::kVP9TestVectorsSvc, libvpx_test::kVP9TestVectorsSvc + libvpx_test::kNumVP9TestVectorsSvc)); } // namespace libvpx-1.8.2/test/decode_test_driver.cc000066400000000000000000000101311357355204000201510ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/register_state_check.h" #include "test/video_source.h" namespace libvpx_test { const char kVP8Name[] = "WebM Project VP8"; vpx_codec_err_t Decoder::PeekStream(const uint8_t *cxdata, size_t size, vpx_codec_stream_info_t *stream_info) { return vpx_codec_peek_stream_info( CodecInterface(), cxdata, static_cast(size), stream_info); } vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size) { return DecodeFrame(cxdata, size, NULL); } vpx_codec_err_t Decoder::DecodeFrame(const uint8_t *cxdata, size_t size, void *user_priv) { vpx_codec_err_t res_dec; InitOnce(); API_REGISTER_STATE_CHECK( res_dec = vpx_codec_decode( &decoder_, cxdata, static_cast(size), user_priv, 0)); return res_dec; } bool Decoder::IsVP8() const { const char *codec_name = GetDecoderName(); return strncmp(kVP8Name, codec_name, sizeof(kVP8Name) - 1) == 0; } void DecoderTest::HandlePeekResult(Decoder *const decoder, CompressedVideoSource *video, const vpx_codec_err_t res_peek) { const bool is_vp8 = decoder->IsVP8(); if (is_vp8) { /* Vp8's implementation of PeekStream returns an error if the frame you * pass it is not a keyframe, so we only expect VPX_CODEC_OK on the first * frame, which must be a keyframe. */ if (video->frame_number() == 0) { ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " << vpx_codec_err_to_string(res_peek); } } else { /* The Vp9 implementation of PeekStream returns an error only if the * data passed to it isn't a valid Vp9 chunk. */ ASSERT_EQ(VPX_CODEC_OK, res_peek) << "Peek return failed: " << vpx_codec_err_to_string(res_peek); } } void DecoderTest::RunLoop(CompressedVideoSource *video, const vpx_codec_dec_cfg_t &dec_cfg) { Decoder *const decoder = codec_->CreateDecoder(dec_cfg, flags_); ASSERT_TRUE(decoder != NULL); bool end_of_file = false; // Decode frames. for (video->Begin(); !::testing::Test::HasFailure() && !end_of_file; video->Next()) { PreDecodeFrameHook(*video, decoder); vpx_codec_stream_info_t stream_info; stream_info.sz = sizeof(stream_info); if (video->cxdata() != NULL) { const vpx_codec_err_t res_peek = decoder->PeekStream( video->cxdata(), video->frame_size(), &stream_info); HandlePeekResult(decoder, video, res_peek); ASSERT_FALSE(::testing::Test::HasFailure()); vpx_codec_err_t res_dec = decoder->DecodeFrame(video->cxdata(), video->frame_size()); if (!HandleDecodeResult(res_dec, *video, decoder)) break; } else { // Signal end of the file to the decoder. const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0); ASSERT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); end_of_file = true; } DxDataIterator dec_iter = decoder->GetDxData(); const vpx_image_t *img = NULL; // Get decompressed data while (!::testing::Test::HasFailure() && (img = dec_iter.Next())) { DecompressedFrameHook(*img, video->frame_number()); } } delete decoder; } void DecoderTest::RunLoop(CompressedVideoSource *video) { vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); RunLoop(video, dec_cfg); } void DecoderTest::set_cfg(const vpx_codec_dec_cfg_t &dec_cfg) { memcpy(&cfg_, &dec_cfg, sizeof(cfg_)); } void DecoderTest::set_flags(const vpx_codec_flags_t flags) { flags_ = flags; } } // namespace libvpx_test libvpx-1.8.2/test/decode_test_driver.h000066400000000000000000000121731357355204000200230ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_DECODE_TEST_DRIVER_H_ #define VPX_TEST_DECODE_TEST_DRIVER_H_ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "vpx/vpx_decoder.h" namespace libvpx_test { class CodecFactory; class CompressedVideoSource; // Provides an object to handle decoding output class DxDataIterator { public: explicit DxDataIterator(vpx_codec_ctx_t *decoder) : decoder_(decoder), iter_(NULL) {} const vpx_image_t *Next() { return vpx_codec_get_frame(decoder_, &iter_); } private: vpx_codec_ctx_t *decoder_; vpx_codec_iter_t iter_; }; // Provides a simplified interface to manage one video decoding. // Similar to Encoder class, the exact services should be added // as more tests are added. class Decoder { public: explicit Decoder(vpx_codec_dec_cfg_t cfg) : cfg_(cfg), flags_(0), init_done_(false) { memset(&decoder_, 0, sizeof(decoder_)); } Decoder(vpx_codec_dec_cfg_t cfg, const vpx_codec_flags_t flag) : cfg_(cfg), flags_(flag), init_done_(false) { memset(&decoder_, 0, sizeof(decoder_)); } virtual ~Decoder() { vpx_codec_destroy(&decoder_); } vpx_codec_err_t PeekStream(const uint8_t *cxdata, size_t size, vpx_codec_stream_info_t *stream_info); vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size); vpx_codec_err_t DecodeFrame(const uint8_t *cxdata, size_t size, void *user_priv); DxDataIterator GetDxData() { return DxDataIterator(&decoder_); } void Control(int ctrl_id, int arg) { Control(ctrl_id, arg, VPX_CODEC_OK); } void Control(int ctrl_id, const void *arg) { InitOnce(); const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); } void Control(int ctrl_id, int arg, vpx_codec_err_t expected_value) { InitOnce(); const vpx_codec_err_t res = vpx_codec_control_(&decoder_, ctrl_id, arg); ASSERT_EQ(expected_value, res) << DecodeError(); } const char *DecodeError() { const char *detail = vpx_codec_error_detail(&decoder_); return detail ? detail : vpx_codec_error(&decoder_); } // Passes the external frame buffer information to libvpx. vpx_codec_err_t SetFrameBufferFunctions( vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *user_priv) { InitOnce(); return vpx_codec_set_frame_buffer_functions(&decoder_, cb_get, cb_release, user_priv); } const char *GetDecoderName() const { return vpx_codec_iface_name(CodecInterface()); } bool IsVP8() const; vpx_codec_ctx_t *GetDecoder() { return &decoder_; } protected: virtual vpx_codec_iface_t *CodecInterface() const = 0; void InitOnce() { if (!init_done_) { const vpx_codec_err_t res = vpx_codec_dec_init(&decoder_, CodecInterface(), &cfg_, flags_); ASSERT_EQ(VPX_CODEC_OK, res) << DecodeError(); init_done_ = true; } } vpx_codec_ctx_t decoder_; vpx_codec_dec_cfg_t cfg_; vpx_codec_flags_t flags_; bool init_done_; }; // Common test functionality for all Decoder tests. class DecoderTest { public: // Main decoding loop virtual void RunLoop(CompressedVideoSource *video); virtual void RunLoop(CompressedVideoSource *video, const vpx_codec_dec_cfg_t &dec_cfg); virtual void set_cfg(const vpx_codec_dec_cfg_t &dec_cfg); virtual void set_flags(const vpx_codec_flags_t flags); // Hook to be called before decompressing every frame. virtual void PreDecodeFrameHook(const CompressedVideoSource & /*video*/, Decoder * /*decoder*/) {} // Hook to be called to handle decode result. Return true to continue. virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, const CompressedVideoSource & /*video*/, Decoder *decoder) { EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); return VPX_CODEC_OK == res_dec; } // Hook to be called on every decompressed frame. virtual void DecompressedFrameHook(const vpx_image_t & /*img*/, const unsigned int /*frame_number*/) {} // Hook to be called on peek result virtual void HandlePeekResult(Decoder *const decoder, CompressedVideoSource *video, const vpx_codec_err_t res_peek); protected: explicit DecoderTest(const CodecFactory *codec) : codec_(codec), cfg_(), flags_(0) {} virtual ~DecoderTest() {} const CodecFactory *codec_; vpx_codec_dec_cfg_t cfg_; vpx_codec_flags_t flags_; }; } // namespace libvpx_test #endif // VPX_TEST_DECODE_TEST_DRIVER_H_ libvpx-1.8.2/test/decode_to_md5.sh000077500000000000000000000046641357355204000170540ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx decode_to_md5 example. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to decode_to_md5_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: Make sure input is available: # $VP8_IVF_FILE and $VP9_IVF_FILE are required. decode_to_md5_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs decode_to_md5 on $1 and captures the md5 sum for the final frame. $2 is # interpreted as codec name and used solely to name the output file. $3 is the # expected md5 sum: It must match that of the final frame. decode_to_md5() { local decoder="${LIBVPX_BIN_PATH}/decode_to_md5${VPX_TEST_EXE_SUFFIX}" local input_file="$1" local codec="$2" local expected_md5="$3" local output_file="${VPX_TEST_OUTPUT_DIR}/decode_to_md5_${codec}" if [ ! -x "${decoder}" ]; then elog "${decoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ ${devnull} [ -e "${output_file}" ] || return 1 local md5_last_frame="$(tail -n1 "${output_file}" | awk '{print $1}')" local actual_md5="$(echo "${md5_last_frame}" | awk '{print $1}')" [ "${actual_md5}" = "${expected_md5}" ] || return 1 } decode_to_md5_vp8() { # expected MD5 sum for the last frame. local expected_md5="56794d911b02190212bca92f88ad60c6" if [ "$(vp8_decode_available)" = "yes" ]; then decode_to_md5 "${VP8_IVF_FILE}" "vp8" "${expected_md5}" fi } decode_to_md5_vp9() { # expected MD5 sum for the last frame. local expected_md5="2952c0eae93f3dadd1aa84c50d3fd6d2" if [ "$(vp9_decode_available)" = "yes" ]; then decode_to_md5 "${VP9_IVF_FILE}" "vp9" "${expected_md5}" fi } decode_to_md5_tests="decode_to_md5_vp8 decode_to_md5_vp9" run_tests decode_to_md5_verify_environment "${decode_to_md5_tests}" libvpx-1.8.2/test/decode_with_drops.sh000077500000000000000000000054201357355204000200360ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx decode_with_drops example. To add new tests to ## this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to decode_with_drops_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: Make sure input is available: # $VP8_IVF_FILE and $VP9_IVF_FILE are required. decode_with_drops_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs decode_with_drops on $1, $2 is interpreted as codec name and used solely # to name the output file. $3 is the drop mode, and is passed directly to # decode_with_drops. decode_with_drops() { local decoder="${LIBVPX_BIN_PATH}/decode_with_drops${VPX_TEST_EXE_SUFFIX}" local input_file="$1" local codec="$2" local output_file="${VPX_TEST_OUTPUT_DIR}/decode_with_drops_${codec}" local drop_mode="$3" if [ ! -x "${decoder}" ]; then elog "${decoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ "${drop_mode}" ${devnull} [ -e "${output_file}" ] || return 1 } # Decodes $VP8_IVF_FILE while dropping frames, twice: once in sequence mode, # and once in pattern mode. # Note: This test assumes that $VP8_IVF_FILE has exactly 29 frames, and could # break if the file is modified. decode_with_drops_vp8() { if [ "$(vp8_decode_available)" = "yes" ]; then # Test sequence mode: Drop frames 2-28. decode_with_drops "${VP8_IVF_FILE}" "vp8" "2-28" # Test pattern mode: Drop 3 of every 4 frames. decode_with_drops "${VP8_IVF_FILE}" "vp8" "3/4" fi } # Decodes $VP9_IVF_FILE while dropping frames, twice: once in sequence mode, # and once in pattern mode. # Note: This test assumes that $VP9_IVF_FILE has exactly 20 frames, and could # break if the file is modified. decode_with_drops_vp9() { if [ "$(vp9_decode_available)" = "yes" ]; then # Test sequence mode: Drop frames 2-28. decode_with_drops "${VP9_IVF_FILE}" "vp9" "2-19" # Test pattern mode: Drop 3 of every 4 frames. decode_with_drops "${VP9_IVF_FILE}" "vp9" "3/4" fi } decode_with_drops_tests="decode_with_drops_vp8 decode_with_drops_vp9" run_tests decode_with_drops_verify_environment "${decode_with_drops_tests}" libvpx-1.8.2/test/encode_api_test.cc000066400000000000000000000142741357355204000174550ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" namespace { #define NELEMENTS(x) static_cast(sizeof(x) / sizeof(x[0])) TEST(EncodeAPI, InvalidParams) { static const vpx_codec_iface_t *kCodecs[] = { #if CONFIG_VP8_ENCODER &vpx_codec_vp8_cx_algo, #endif #if CONFIG_VP9_ENCODER &vpx_codec_vp9_cx_algo, #endif }; uint8_t buf[1] = { 0 }; vpx_image_t img; vpx_codec_ctx_t enc; vpx_codec_enc_cfg_t cfg; EXPECT_EQ(&img, vpx_img_wrap(&img, VPX_IMG_FMT_I420, 1, 1, 1, buf)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, NULL, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, NULL, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, NULL, 0, 0, 0, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_encode(NULL, &img, 0, 0, 0, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_destroy(NULL)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_config_default(NULL, NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_config_default(NULL, &cfg, 0)); EXPECT_TRUE(vpx_codec_error(NULL) != NULL); for (int i = 0; i < NELEMENTS(kCodecs); ++i) { SCOPED_TRACE(vpx_codec_iface_name(kCodecs[i])); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(NULL, kCodecs[i], NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init(&enc, kCodecs[i], NULL, 0)); EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_enc_config_default(kCodecs[i], &cfg, 1)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(kCodecs[i], &cfg, 0)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, kCodecs[i], &cfg, 0)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, NULL, 0, 0, 0, 0)); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc)); } } TEST(EncodeAPI, HighBitDepthCapability) { // VP8 should not claim VP9 HBD as a capability. #if CONFIG_VP8_ENCODER const vpx_codec_caps_t vp8_caps = vpx_codec_get_caps(&vpx_codec_vp8_cx_algo); EXPECT_EQ(vp8_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0); #endif #if CONFIG_VP9_ENCODER const vpx_codec_caps_t vp9_caps = vpx_codec_get_caps(&vpx_codec_vp9_cx_algo); #if CONFIG_VP9_HIGHBITDEPTH EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, VPX_CODEC_CAP_HIGHBITDEPTH); #else EXPECT_EQ(vp9_caps & VPX_CODEC_CAP_HIGHBITDEPTH, 0); #endif #endif } #if CONFIG_VP8_ENCODER TEST(EncodeAPI, ImageSizeSetting) { const int width = 711; const int height = 360; const int bps = 12; vpx_image_t img; vpx_codec_ctx_t enc; vpx_codec_enc_cfg_t cfg; uint8_t *img_buf = reinterpret_cast( calloc(width * height * bps / 8, sizeof(*img_buf))); vpx_codec_enc_config_default(vpx_codec_vp8_cx(), &cfg, 0); cfg.g_w = width; cfg.g_h = height; vpx_img_wrap(&img, VPX_IMG_FMT_I420, width, height, 1, img_buf); vpx_codec_enc_init(&enc, vpx_codec_vp8_cx(), &cfg, 0); EXPECT_EQ(VPX_CODEC_OK, vpx_codec_encode(&enc, &img, 0, 1, 0, 0)); free(img_buf); vpx_codec_destroy(&enc); } #endif // Set up 2 spatial streams with 2 temporal layers per stream, and generate // invalid configuration by setting the temporal layer rate allocation // (ts_target_bitrate[]) to 0 for both layers. This should fail independent of // CONFIG_MULTI_RES_ENCODING. TEST(EncodeAPI, MultiResEncode) { static const vpx_codec_iface_t *kCodecs[] = { #if CONFIG_VP8_ENCODER &vpx_codec_vp8_cx_algo, #endif #if CONFIG_VP9_ENCODER &vpx_codec_vp9_cx_algo, #endif }; const int width = 1280; const int height = 720; const int width_down = width / 2; const int height_down = height / 2; const int target_bitrate = 1000; const int framerate = 30; for (int c = 0; c < NELEMENTS(kCodecs); ++c) { const vpx_codec_iface_t *const iface = kCodecs[c]; vpx_codec_ctx_t enc[2]; vpx_codec_enc_cfg_t cfg[2]; vpx_rational_t dsf[2] = { { 2, 1 }, { 2, 1 } }; memset(enc, 0, sizeof(enc)); for (int i = 0; i < 2; i++) { vpx_codec_enc_config_default(iface, &cfg[i], 0); } /* Highest-resolution encoder settings */ cfg[0].g_w = width; cfg[0].g_h = height; cfg[0].rc_dropframe_thresh = 0; cfg[0].rc_end_usage = VPX_CBR; cfg[0].rc_resize_allowed = 0; cfg[0].rc_min_quantizer = 2; cfg[0].rc_max_quantizer = 56; cfg[0].rc_undershoot_pct = 100; cfg[0].rc_overshoot_pct = 15; cfg[0].rc_buf_initial_sz = 500; cfg[0].rc_buf_optimal_sz = 600; cfg[0].rc_buf_sz = 1000; cfg[0].g_error_resilient = 1; /* Enable error resilient mode */ cfg[0].g_lag_in_frames = 0; cfg[0].kf_mode = VPX_KF_AUTO; cfg[0].kf_min_dist = 3000; cfg[0].kf_max_dist = 3000; cfg[0].rc_target_bitrate = target_bitrate; /* Set target bitrate */ cfg[0].g_timebase.num = 1; /* Set fps */ cfg[0].g_timebase.den = framerate; memcpy(&cfg[1], &cfg[0], sizeof(cfg[0])); cfg[1].rc_target_bitrate = 500; cfg[1].g_w = width_down; cfg[1].g_h = height_down; for (int i = 0; i < 2; i++) { cfg[i].ts_number_layers = 2; cfg[i].ts_periodicity = 2; cfg[i].ts_rate_decimator[0] = 2; cfg[i].ts_rate_decimator[1] = 1; cfg[i].ts_layer_id[0] = 0; cfg[i].ts_layer_id[1] = 1; // Invalid parameters. cfg[i].ts_target_bitrate[0] = 0; cfg[i].ts_target_bitrate[1] = 0; } // VP9 should report incapable, VP8 invalid for all configurations. const char kVP9Name[] = "WebM Project VP9"; const bool is_vp9 = strncmp(kVP9Name, vpx_codec_iface_name(iface), sizeof(kVP9Name) - 1) == 0; EXPECT_EQ(is_vp9 ? VPX_CODEC_INCAPABLE : VPX_CODEC_INVALID_PARAM, vpx_codec_enc_init_multi(&enc[0], iface, &cfg[0], 2, 0, &dsf[0])); for (int i = 0; i < 2; i++) { vpx_codec_destroy(&enc[i]); } } } } // namespace libvpx-1.8.2/test/encode_perf_test.cc000066400000000000000000000147231357355204000176370ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_version.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vpx_ports/vpx_timer.h" namespace { const int kMaxPsnr = 100; const double kUsecsInSec = 1000000.0; struct EncodePerfTestVideo { EncodePerfTestVideo(const char *name_, uint32_t width_, uint32_t height_, uint32_t bitrate_, int frames_) : name(name_), width(width_), height(height_), bitrate(bitrate_), frames(frames_) {} const char *name; uint32_t width; uint32_t height; uint32_t bitrate; int frames; }; const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { EncodePerfTestVideo("desktop_640_360_30.yuv", 640, 360, 200, 2484), EncodePerfTestVideo("kirland_640_480_30.yuv", 640, 480, 200, 300), EncodePerfTestVideo("macmarcomoving_640_480_30.yuv", 640, 480, 200, 987), EncodePerfTestVideo("macmarcostationary_640_480_30.yuv", 640, 480, 200, 718), EncodePerfTestVideo("niklas_640_480_30.yuv", 640, 480, 200, 471), EncodePerfTestVideo("tacomanarrows_640_480_30.yuv", 640, 480, 200, 300), EncodePerfTestVideo("tacomasmallcameramovement_640_480_30.yuv", 640, 480, 200, 300), EncodePerfTestVideo("thaloundeskmtg_640_480_30.yuv", 640, 480, 200, 300), EncodePerfTestVideo("niklas_1280_720_30.yuv", 1280, 720, 600, 470), }; const int kEncodePerfTestSpeeds[] = { 5, 6, 7, 8, 9 }; const int kEncodePerfTestThreads[] = { 1, 2, 4 }; #define NELEMENTS(x) (sizeof((x)) / sizeof((x)[0])) class VP9EncodePerfTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: VP9EncodePerfTest() : EncoderTest(GET_PARAM(0)), min_psnr_(kMaxPsnr), nframes_(0), encoding_mode_(GET_PARAM(1)), speed_(0), threads_(1) {} virtual ~VP9EncodePerfTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); cfg_.g_lag_in_frames = 0; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_dropframe_thresh = 0; cfg_.rc_undershoot_pct = 50; cfg_.rc_overshoot_pct = 50; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; cfg_.rc_resize_allowed = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.g_error_resilient = 1; cfg_.g_threads = threads_; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { const int log2_tile_columns = 3; encoder->Control(VP8E_SET_CPUUSED, speed_); encoder->Control(VP9E_SET_TILE_COLUMNS, log2_tile_columns); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0); } } virtual void BeginPassHook(unsigned int /*pass*/) { min_psnr_ = kMaxPsnr; nframes_ = 0; } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.psnr.psnr[0] < min_psnr_) { min_psnr_ = pkt->data.psnr.psnr[0]; } } // for performance reasons don't decode virtual bool DoDecode() const { return false; } double min_psnr() const { return min_psnr_; } void set_speed(unsigned int speed) { speed_ = speed; } void set_threads(unsigned int threads) { threads_ = threads; } private: double min_psnr_; unsigned int nframes_; libvpx_test::TestMode encoding_mode_; unsigned speed_; unsigned int threads_; }; TEST_P(VP9EncodePerfTest, PerfTest) { for (size_t i = 0; i < NELEMENTS(kVP9EncodePerfTestVectors); ++i) { for (size_t j = 0; j < NELEMENTS(kEncodePerfTestSpeeds); ++j) { for (size_t k = 0; k < NELEMENTS(kEncodePerfTestThreads); ++k) { if (kVP9EncodePerfTestVectors[i].width < 512 && kEncodePerfTestThreads[k] > 1) { continue; } else if (kVP9EncodePerfTestVectors[i].width < 1024 && kEncodePerfTestThreads[k] > 2) { continue; } set_threads(kEncodePerfTestThreads[k]); SetUp(); const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = kVP9EncodePerfTestVectors[i].bitrate; init_flags_ = VPX_CODEC_USE_PSNR; const unsigned frames = kVP9EncodePerfTestVectors[i].frames; const char *video_name = kVP9EncodePerfTestVectors[i].name; libvpx_test::I420VideoSource video( video_name, kVP9EncodePerfTestVectors[i].width, kVP9EncodePerfTestVectors[i].height, timebase.den, timebase.num, 0, kVP9EncodePerfTestVectors[i].frames); set_speed(kEncodePerfTestSpeeds[j]); vpx_usec_timer t; vpx_usec_timer_start(&t); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); vpx_usec_timer_mark(&t); const double elapsed_secs = vpx_usec_timer_elapsed(&t) / kUsecsInSec; const double fps = frames / elapsed_secs; const double minimum_psnr = min_psnr(); std::string display_name(video_name); if (kEncodePerfTestThreads[k] > 1) { char thread_count[32]; snprintf(thread_count, sizeof(thread_count), "_t-%d", kEncodePerfTestThreads[k]); display_name += thread_count; } printf("{\n"); printf("\t\"type\" : \"encode_perf_test\",\n"); printf("\t\"version\" : \"%s\",\n", VERSION_STRING_NOSP); printf("\t\"videoName\" : \"%s\",\n", display_name.c_str()); printf("\t\"encodeTimeSecs\" : %f,\n", elapsed_secs); printf("\t\"totalFrames\" : %u,\n", frames); printf("\t\"framesPerSecond\" : %f,\n", fps); printf("\t\"minPsnr\" : %f,\n", minimum_psnr); printf("\t\"speed\" : %d,\n", kEncodePerfTestSpeeds[j]); printf("\t\"threads\" : %d\n", kEncodePerfTestThreads[k]); printf("}\n"); } } } } VP9_INSTANTIATE_TEST_CASE(VP9EncodePerfTest, ::testing::Values(::libvpx_test::kRealTime)); } // namespace libvpx-1.8.2/test/encode_test_driver.cc000066400000000000000000000205641357355204000201760ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/encode_test_driver.h" #include "test/register_state_check.h" #include "test/video_source.h" namespace libvpx_test { void Encoder::InitEncoder(VideoSource *video) { vpx_codec_err_t res; const vpx_image_t *img = video->img(); if (video->img() && !encoder_.priv) { cfg_.g_w = img->d_w; cfg_.g_h = img->d_h; cfg_.g_timebase = video->timebase(); cfg_.rc_twopass_stats_in = stats_->buf(); res = vpx_codec_enc_init(&encoder_, CodecInterface(), &cfg_, init_flags_); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); #if CONFIG_VP9_ENCODER if (CodecInterface() == &vpx_codec_vp9_cx_algo) { // Default to 1 tile column for VP9. const int log2_tile_columns = 0; res = vpx_codec_control_(&encoder_, VP9E_SET_TILE_COLUMNS, log2_tile_columns); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } else #endif { #if CONFIG_VP8_ENCODER ASSERT_EQ(&vpx_codec_vp8_cx_algo, CodecInterface()) << "Unknown Codec Interface"; #endif } } } void Encoder::EncodeFrame(VideoSource *video, const unsigned long frame_flags) { if (video->img()) { EncodeFrameInternal(*video, frame_flags); } else { Flush(); } // Handle twopass stats CxDataIterator iter = GetCxData(); while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { if (pkt->kind != VPX_CODEC_STATS_PKT) continue; stats_->Append(*pkt); } } void Encoder::EncodeFrameInternal(const VideoSource &video, const unsigned long frame_flags) { vpx_codec_err_t res; const vpx_image_t *img = video.img(); // Handle frame resizing if (cfg_.g_w != img->d_w || cfg_.g_h != img->d_h) { cfg_.g_w = img->d_w; cfg_.g_h = img->d_h; res = vpx_codec_enc_config_set(&encoder_, &cfg_); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } // Encode the frame API_REGISTER_STATE_CHECK(res = vpx_codec_encode(&encoder_, img, video.pts(), video.duration(), frame_flags, deadline_)); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Encoder::Flush() { const vpx_codec_err_t res = vpx_codec_encode(&encoder_, NULL, 0, 0, 0, deadline_); if (!encoder_.priv) ASSERT_EQ(VPX_CODEC_ERROR, res) << EncoderError(); else ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void EncoderTest::InitializeConfig() { const vpx_codec_err_t res = codec_->DefaultEncoderConfig(&cfg_, 0); dec_cfg_ = vpx_codec_dec_cfg_t(); ASSERT_EQ(VPX_CODEC_OK, res); } void EncoderTest::SetMode(TestMode mode) { switch (mode) { case kRealTime: deadline_ = VPX_DL_REALTIME; break; case kOnePassGood: case kTwoPassGood: deadline_ = VPX_DL_GOOD_QUALITY; break; case kOnePassBest: case kTwoPassBest: deadline_ = VPX_DL_BEST_QUALITY; break; default: ASSERT_TRUE(false) << "Unexpected mode " << mode; } if (mode == kTwoPassGood || mode == kTwoPassBest) { passes_ = 2; } else { passes_ = 1; } } // The function should return "true" most of the time, therefore no early // break-out is implemented within the match checking process. static bool compare_img(const vpx_image_t *img1, const vpx_image_t *img2) { bool match = (img1->fmt == img2->fmt) && (img1->cs == img2->cs) && (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h); if (!match) return false; const unsigned int width_y = img1->d_w; const unsigned int height_y = img1->d_h; unsigned int i; for (i = 0; i < height_y; ++i) { match = (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], width_y) == 0) && match; } const unsigned int width_uv = (img1->d_w + 1) >> 1; const unsigned int height_uv = (img1->d_h + 1) >> 1; for (i = 0; i < height_uv; ++i) { match = (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], width_uv) == 0) && match; } for (i = 0; i < height_uv; ++i) { match = (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], width_uv) == 0) && match; } return match; } void EncoderTest::MismatchHook(const vpx_image_t * /*img1*/, const vpx_image_t * /*img2*/) { ASSERT_TRUE(0) << "Encode/Decode mismatch found"; } void EncoderTest::RunLoop(VideoSource *video) { vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); stats_.Reset(); ASSERT_TRUE(passes_ == 1 || passes_ == 2); for (unsigned int pass = 0; pass < passes_; pass++) { last_pts_ = 0; if (passes_ == 1) { cfg_.g_pass = VPX_RC_ONE_PASS; } else if (pass == 0) { cfg_.g_pass = VPX_RC_FIRST_PASS; } else { cfg_.g_pass = VPX_RC_LAST_PASS; } BeginPassHook(pass); std::unique_ptr encoder( codec_->CreateEncoder(cfg_, deadline_, init_flags_, &stats_)); ASSERT_TRUE(encoder.get() != NULL); ASSERT_NO_FATAL_FAILURE(video->Begin()); encoder->InitEncoder(video); ASSERT_FALSE(::testing::Test::HasFatalFailure()); unsigned long dec_init_flags = 0; // NOLINT // Use fragment decoder if encoder outputs partitions. // NOTE: fragment decoder and partition encoder are only supported by VP8. if (init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) { dec_init_flags |= VPX_CODEC_USE_INPUT_FRAGMENTS; } std::unique_ptr decoder( codec_->CreateDecoder(dec_cfg, dec_init_flags)); bool again; for (again = true; again; video->Next()) { again = (video->img() != NULL); PreEncodeFrameHook(video); PreEncodeFrameHook(video, encoder.get()); encoder->EncodeFrame(video, frame_flags_); PostEncodeFrameHook(encoder.get()); CxDataIterator iter = encoder->GetCxData(); bool has_cxdata = false; bool has_dxdata = false; while (const vpx_codec_cx_pkt_t *pkt = iter.Next()) { pkt = MutateEncoderOutputHook(pkt); again = true; switch (pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: has_cxdata = true; if (decoder.get() != NULL && DoDecode()) { PreDecodeFrameHook(video, decoder.get()); vpx_codec_err_t res_dec = decoder->DecodeFrame( (const uint8_t *)pkt->data.frame.buf, pkt->data.frame.sz); if (!HandleDecodeResult(res_dec, *video, decoder.get())) break; has_dxdata = true; } ASSERT_GE(pkt->data.frame.pts, last_pts_); last_pts_ = pkt->data.frame.pts; FramePktHook(pkt); break; case VPX_CODEC_PSNR_PKT: PSNRPktHook(pkt); break; case VPX_CODEC_STATS_PKT: StatsPktHook(pkt); break; default: break; } } // Flush the decoder when there are no more fragments. if ((init_flags_ & VPX_CODEC_USE_OUTPUT_PARTITION) && has_dxdata) { const vpx_codec_err_t res_dec = decoder->DecodeFrame(NULL, 0); if (!HandleDecodeResult(res_dec, *video, decoder.get())) break; } if (has_dxdata && has_cxdata) { const vpx_image_t *img_enc = encoder->GetPreviewFrame(); DxDataIterator dec_iter = decoder->GetDxData(); const vpx_image_t *img_dec = dec_iter.Next(); if (img_enc && img_dec) { const bool res = compare_img(img_enc, img_dec); if (!res) { // Mismatch MismatchHook(img_enc, img_dec); } } if (img_dec) DecompressedFrameHook(*img_dec, video->pts()); } if (!Continue()) break; } EndPassHook(); if (!Continue()) break; } } } // namespace libvpx_test libvpx-1.8.2/test/encode_test_driver.h000066400000000000000000000226371357355204000200430ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_ENCODE_TEST_DRIVER_H_ #define VPX_TEST_ENCODE_TEST_DRIVER_H_ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif #include "vpx/vpx_encoder.h" namespace libvpx_test { class CodecFactory; class VideoSource; enum TestMode { kRealTime, kOnePassGood, kOnePassBest, kTwoPassGood, kTwoPassBest }; #define ALL_TEST_MODES \ ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, \ ::libvpx_test::kOnePassBest, ::libvpx_test::kTwoPassGood, \ ::libvpx_test::kTwoPassBest) #define ONE_PASS_TEST_MODES \ ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, \ ::libvpx_test::kOnePassBest) #define TWO_PASS_TEST_MODES \ ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kTwoPassBest) // Provides an object to handle the libvpx get_cx_data() iteration pattern class CxDataIterator { public: explicit CxDataIterator(vpx_codec_ctx_t *encoder) : encoder_(encoder), iter_(NULL) {} const vpx_codec_cx_pkt_t *Next() { return vpx_codec_get_cx_data(encoder_, &iter_); } private: vpx_codec_ctx_t *encoder_; vpx_codec_iter_t iter_; }; // Implements an in-memory store for libvpx twopass statistics class TwopassStatsStore { public: void Append(const vpx_codec_cx_pkt_t &pkt) { buffer_.append(reinterpret_cast(pkt.data.twopass_stats.buf), pkt.data.twopass_stats.sz); } vpx_fixed_buf_t buf() { const vpx_fixed_buf_t buf = { &buffer_[0], buffer_.size() }; return buf; } void Reset() { buffer_.clear(); } protected: std::string buffer_; }; // Provides a simplified interface to manage one video encoding pass, given // a configuration and video source. // // TODO(jkoleszar): The exact services it provides and the appropriate // level of abstraction will be fleshed out as more tests are written. class Encoder { public: Encoder(vpx_codec_enc_cfg_t cfg, unsigned long deadline, const unsigned long init_flags, TwopassStatsStore *stats) : cfg_(cfg), deadline_(deadline), init_flags_(init_flags), stats_(stats) { memset(&encoder_, 0, sizeof(encoder_)); } virtual ~Encoder() { vpx_codec_destroy(&encoder_); } CxDataIterator GetCxData() { return CxDataIterator(&encoder_); } void InitEncoder(VideoSource *video); const vpx_image_t *GetPreviewFrame() { return vpx_codec_get_preview_frame(&encoder_); } // This is a thin wrapper around vpx_codec_encode(), so refer to // vpx_encoder.h for its semantics. void EncodeFrame(VideoSource *video, const unsigned long frame_flags); // Convenience wrapper for EncodeFrame() void EncodeFrame(VideoSource *video) { EncodeFrame(video, 0); } void Control(int ctrl_id, int arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, int *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_scaling_mode *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_svc_layer_id *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_svc_ref_frame_config *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_svc_parameters *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_svc_frame_drop *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, struct vpx_svc_spatial_layer_sync *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER void Control(int ctrl_id, vpx_active_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } void Control(int ctrl_id, vpx_roi_map_t *arg) { const vpx_codec_err_t res = vpx_codec_control_(&encoder_, ctrl_id, arg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); } #endif void Config(const vpx_codec_enc_cfg_t *cfg) { const vpx_codec_err_t res = vpx_codec_enc_config_set(&encoder_, cfg); ASSERT_EQ(VPX_CODEC_OK, res) << EncoderError(); cfg_ = *cfg; } void set_deadline(unsigned long deadline) { deadline_ = deadline; } protected: virtual vpx_codec_iface_t *CodecInterface() const = 0; const char *EncoderError() { const char *detail = vpx_codec_error_detail(&encoder_); return detail ? detail : vpx_codec_error(&encoder_); } // Encode an image void EncodeFrameInternal(const VideoSource &video, const unsigned long frame_flags); // Flush the encoder on EOS void Flush(); vpx_codec_ctx_t encoder_; vpx_codec_enc_cfg_t cfg_; unsigned long deadline_; unsigned long init_flags_; TwopassStatsStore *stats_; }; // Common test functionality for all Encoder tests. // // This class is a mixin which provides the main loop common to all // encoder tests. It provides hooks which can be overridden by subclasses // to implement each test's specific behavior, while centralizing the bulk // of the boilerplate. Note that it doesn't inherit the gtest testing // classes directly, so that tests can be parameterized differently. class EncoderTest { protected: explicit EncoderTest(const CodecFactory *codec) : codec_(codec), abort_(false), init_flags_(0), frame_flags_(0), last_pts_(0) { // Default to 1 thread. cfg_.g_threads = 1; } virtual ~EncoderTest() {} // Initialize the cfg_ member with the default configuration. void InitializeConfig(); // Map the TestMode enum to the deadline_ and passes_ variables. void SetMode(TestMode mode); // Set encoder flag. void set_init_flags(unsigned long flag) { // NOLINT(runtime/int) init_flags_ = flag; } // Main loop virtual void RunLoop(VideoSource *video); // Hook to be called at the beginning of a pass. virtual void BeginPassHook(unsigned int /*pass*/) {} // Hook to be called at the end of a pass. virtual void EndPassHook() {} // Hook to be called before encoding a frame. virtual void PreEncodeFrameHook(VideoSource * /*video*/) {} virtual void PreEncodeFrameHook(VideoSource * /*video*/, Encoder * /*encoder*/) {} virtual void PreDecodeFrameHook(VideoSource * /*video*/, Decoder * /*decoder*/) {} virtual void PostEncodeFrameHook(Encoder * /*encoder*/) {} // Hook to be called on every compressed data packet. virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {} // Hook to be called on every PSNR packet. virtual void PSNRPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {} // Hook to be called on every first pass stats packet. virtual void StatsPktHook(const vpx_codec_cx_pkt_t * /*pkt*/) {} // Hook to determine whether the encode loop should continue. virtual bool Continue() const { return !(::testing::Test::HasFatalFailure() || abort_); } const CodecFactory *codec_; // Hook to determine whether to decode frame after encoding virtual bool DoDecode() const { return 1; } // Hook to handle encode/decode mismatch virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2); // Hook to be called on every decompressed frame. virtual void DecompressedFrameHook(const vpx_image_t & /*img*/, vpx_codec_pts_t /*pts*/) {} // Hook to be called to handle decode result. Return true to continue. virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, const VideoSource & /*video*/, Decoder *decoder) { EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); return VPX_CODEC_OK == res_dec; } // Hook that can modify the encoder's output data virtual const vpx_codec_cx_pkt_t *MutateEncoderOutputHook( const vpx_codec_cx_pkt_t *pkt) { return pkt; } bool abort_; vpx_codec_enc_cfg_t cfg_; vpx_codec_dec_cfg_t dec_cfg_; unsigned int passes_; unsigned long deadline_; TwopassStatsStore stats_; unsigned long init_flags_; unsigned long frame_flags_; vpx_codec_pts_t last_pts_; }; } // namespace libvpx_test #endif // VPX_TEST_ENCODE_TEST_DRIVER_H_ libvpx-1.8.2/test/error_resilience_test.cc000066400000000000000000000471721357355204000207250ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { const int kMaxErrorFrames = 12; const int kMaxDroppableFrames = 12; class ErrorResilienceTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: ErrorResilienceTestLarge() : EncoderTest(GET_PARAM(0)), svc_support_(GET_PARAM(2)), psnr_(0.0), nframes_(0), mismatch_psnr_(0.0), mismatch_nframes_(0), encoding_mode_(GET_PARAM(1)) { Reset(); } virtual ~ErrorResilienceTestLarge() {} void Reset() { error_nframes_ = 0; droppable_nframes_ = 0; pattern_switch_ = 0; } virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); } virtual void BeginPassHook(unsigned int /*pass*/) { psnr_ = 0.0; nframes_ = 0; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { psnr_ += pkt->data.psnr.psnr[0]; nframes_++; } // // Frame flags and layer id for temporal layers. // For two layers, test pattern is: // 1 3 // 0 2 ..... // LAST is updated on base/layer 0, GOLDEN updated on layer 1. // Non-zero pattern_switch parameter means pattern will switch to // not using LAST for frame_num >= pattern_switch. int SetFrameFlags(int frame_num, int num_temp_layers, int pattern_switch) { int frame_flags = 0; if (num_temp_layers == 2) { if (frame_num % 2 == 0) { if (frame_num < pattern_switch || pattern_switch == 0) { // Layer 0: predict from LAST and ARF, update LAST. frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; } else { // Layer 0: predict from GF and ARF, update GF. frame_flags = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; } } else { if (frame_num < pattern_switch || pattern_switch == 0) { // Layer 1: predict from L, GF, and ARF, update GF. frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; } else { // Layer 1: predict from GF and ARF, update GF. frame_flags = VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ARF; } } } return frame_flags; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video) { frame_flags_ &= ~(VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF); // For temporal layer case. if (cfg_.ts_number_layers > 1) { frame_flags_ = SetFrameFlags(video->frame(), cfg_.ts_number_layers, pattern_switch_); for (unsigned int i = 0; i < droppable_nframes_; ++i) { if (droppable_frames_[i] == video->frame()) { std::cout << "Encoding droppable frame: " << droppable_frames_[i] << "\n"; } } } else { if (droppable_nframes_ > 0 && (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) { for (unsigned int i = 0; i < droppable_nframes_; ++i) { if (droppable_frames_[i] == video->frame()) { std::cout << "Encoding droppable frame: " << droppable_frames_[i] << "\n"; frame_flags_ |= (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF); return; } } } } } double GetAveragePsnr() const { if (nframes_) return psnr_ / nframes_; return 0.0; } double GetAverageMismatchPsnr() const { if (mismatch_nframes_) return mismatch_psnr_ / mismatch_nframes_; return 0.0; } virtual bool DoDecode() const { if (error_nframes_ > 0 && (cfg_.g_pass == VPX_RC_LAST_PASS || cfg_.g_pass == VPX_RC_ONE_PASS)) { for (unsigned int i = 0; i < error_nframes_; ++i) { if (error_frames_[i] == nframes_ - 1) { std::cout << " Skipping decoding frame: " << error_frames_[i] << "\n"; return 0; } } } return 1; } virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) { double mismatch_psnr = compute_psnr(img1, img2); mismatch_psnr_ += mismatch_psnr; ++mismatch_nframes_; // std::cout << "Mismatch frame psnr: " << mismatch_psnr << "\n"; } void SetErrorFrames(int num, unsigned int *list) { if (num > kMaxErrorFrames) { num = kMaxErrorFrames; } else if (num < 0) { num = 0; } error_nframes_ = num; for (unsigned int i = 0; i < error_nframes_; ++i) { error_frames_[i] = list[i]; } } void SetDroppableFrames(int num, unsigned int *list) { if (num > kMaxDroppableFrames) { num = kMaxDroppableFrames; } else if (num < 0) { num = 0; } droppable_nframes_ = num; for (unsigned int i = 0; i < droppable_nframes_; ++i) { droppable_frames_[i] = list[i]; } } unsigned int GetMismatchFrames() { return mismatch_nframes_; } void SetPatternSwitch(int frame_switch) { pattern_switch_ = frame_switch; } bool svc_support_; private: double psnr_; unsigned int nframes_; unsigned int error_nframes_; unsigned int droppable_nframes_; unsigned int pattern_switch_; double mismatch_psnr_; unsigned int mismatch_nframes_; unsigned int error_frames_[kMaxErrorFrames]; unsigned int droppable_frames_[kMaxDroppableFrames]; libvpx_test::TestMode encoding_mode_; }; TEST_P(ErrorResilienceTestLarge, OnVersusOff) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 2000; cfg_.g_lag_in_frames = 10; init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 30); // Error resilient mode OFF. cfg_.g_error_resilient = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_resilience_off = GetAveragePsnr(); EXPECT_GT(psnr_resilience_off, 25.0); // Error resilient mode ON. cfg_.g_error_resilient = 1; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_resilience_on = GetAveragePsnr(); EXPECT_GT(psnr_resilience_on, 25.0); // Test that turning on error resilient mode hurts by 10% at most. if (psnr_resilience_off > 0.0) { const double psnr_ratio = psnr_resilience_on / psnr_resilience_off; EXPECT_GE(psnr_ratio, 0.9); EXPECT_LE(psnr_ratio, 1.1); } } // Check for successful decoding and no encoder/decoder mismatch // if we lose (i.e., drop before decoding) a set of droppable // frames (i.e., frames that don't update any reference buffers). // Check both isolated and consecutive loss. TEST_P(ErrorResilienceTestLarge, DropFramesWithoutRecovery) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; // FIXME(debargha): Fix this to work for any lag. // Currently this test only works for lag = 0 cfg_.g_lag_in_frames = 0; init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 40); // Error resilient mode ON. cfg_.g_error_resilient = 1; cfg_.kf_mode = VPX_KF_DISABLED; // Set an arbitrary set of error frames same as droppable frames. // In addition to isolated loss/drop, add a long consecutive series // (of size 9) of dropped frames. unsigned int num_droppable_frames = 11; unsigned int droppable_frame_list[] = { 5, 16, 22, 23, 24, 25, 26, 27, 28, 29, 30 }; SetDroppableFrames(num_droppable_frames, droppable_frame_list); SetErrorFrames(num_droppable_frames, droppable_frame_list); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Test that no mismatches have been found std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; EXPECT_EQ(GetMismatchFrames(), (unsigned int)0); // Reset previously set of error/droppable frames. Reset(); #if 0 // TODO(jkoleszar): This test is disabled for the time being as too // sensitive. It's not clear how to set a reasonable threshold for // this behavior. // Now set an arbitrary set of error frames that are non-droppable unsigned int num_error_frames = 3; unsigned int error_frame_list[] = {3, 10, 20}; SetErrorFrames(num_error_frames, error_frame_list); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Test that dropping an arbitrary set of inter frames does not hurt too much // Note the Average Mismatch PSNR is the average of the PSNR between // decoded frame and encoder's version of the same frame for all frames // with mismatch. const double psnr_resilience_mismatch = GetAverageMismatchPsnr(); std::cout << " Mismatch PSNR: " << psnr_resilience_mismatch << "\n"; EXPECT_GT(psnr_resilience_mismatch, 20.0); #endif } // Check for successful decoding and no encoder/decoder mismatch // if we lose (i.e., drop before decoding) the enhancement layer frames for a // two layer temporal pattern. The base layer does not predict from the top // layer, so successful decoding is expected. TEST_P(ErrorResilienceTestLarge, 2LayersDropEnhancement) { // This test doesn't run if SVC is not supported. if (!svc_support_) return; const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; // 2 Temporal layers, no spatial layers, CBR mode. cfg_.ss_number_layers = 1; cfg_.ts_number_layers = 2; cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; cfg_.ts_periodicity = 2; cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate; init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 40); // Error resilient mode ON. cfg_.g_error_resilient = 1; cfg_.kf_mode = VPX_KF_DISABLED; SetPatternSwitch(0); // The odd frames are the enhancement layer for 2 layer pattern, so set // those frames as droppable. Drop the last 7 frames. unsigned int num_droppable_frames = 7; unsigned int droppable_frame_list[] = { 27, 29, 31, 33, 35, 37, 39 }; SetDroppableFrames(num_droppable_frames, droppable_frame_list); SetErrorFrames(num_droppable_frames, droppable_frame_list); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Test that no mismatches have been found std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; EXPECT_EQ(GetMismatchFrames(), (unsigned int)0); // Reset previously set of error/droppable frames. Reset(); } // Check for successful decoding and no encoder/decoder mismatch // for a two layer temporal pattern, where at some point in the // sequence, the LAST ref is not used anymore. TEST_P(ErrorResilienceTestLarge, 2LayersNoRefLast) { // This test doesn't run if SVC is not supported. if (!svc_support_) return; const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; // 2 Temporal layers, no spatial layers, CBR mode. cfg_.ss_number_layers = 1; cfg_.ts_number_layers = 2; cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; cfg_.ts_periodicity = 2; cfg_.ts_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; cfg_.ts_target_bitrate[1] = cfg_.rc_target_bitrate; init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 100); // Error resilient mode ON. cfg_.g_error_resilient = 1; cfg_.kf_mode = VPX_KF_DISABLED; SetPatternSwitch(60); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Test that no mismatches have been found std::cout << " Mismatch frames: " << GetMismatchFrames() << "\n"; EXPECT_EQ(GetMismatchFrames(), (unsigned int)0); // Reset previously set of error/droppable frames. Reset(); } class ErrorResilienceTestLargeCodecControls : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: ErrorResilienceTestLargeCodecControls() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)) { Reset(); } virtual ~ErrorResilienceTestLargeCodecControls() {} void Reset() { last_pts_ = 0; tot_frame_number_ = 0; // For testing up to 3 layers. for (int i = 0; i < 3; ++i) { bits_total_[i] = 0; } duration_ = 0.0; } virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); } // // Frame flags and layer id for temporal layers. // // For two layers, test pattern is: // 1 3 // 0 2 ..... // For three layers, test pattern is: // 1 3 5 7 // 2 6 // 0 4 .... // LAST is always update on base/layer 0, GOLDEN is updated on layer 1, // and ALTREF is updated on top layer for 3 layer pattern. int SetFrameFlags(int frame_num, int num_temp_layers) { int frame_flags = 0; if (num_temp_layers == 2) { if (frame_num % 2 == 0) { // Layer 0: predict from L and ARF, update L. frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; } else { // Layer 1: predict from L, G and ARF, and update G. frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; } } else if (num_temp_layers == 3) { if (frame_num % 4 == 0) { // Layer 0: predict from L, update L. frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF; } else if ((frame_num - 2) % 4 == 0) { // Layer 1: predict from L, G, update G. frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_REF_ARF; } else if ((frame_num - 1) % 2 == 0) { // Layer 2: predict from L, G, ARF; update ARG. frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; } } return frame_flags; } int SetLayerId(int frame_num, int num_temp_layers) { int layer_id = 0; if (num_temp_layers == 2) { if (frame_num % 2 == 0) { layer_id = 0; } else { layer_id = 1; } } else if (num_temp_layers == 3) { if (frame_num % 4 == 0) { layer_id = 0; } else if ((frame_num - 2) % 4 == 0) { layer_id = 1; } else if ((frame_num - 1) % 2 == 0) { layer_id = 2; } } return layer_id; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (cfg_.ts_number_layers > 1) { int layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers); int frame_flags = SetFrameFlags(video->frame(), cfg_.ts_number_layers); if (video->frame() > 0) { encoder->Control(VP8E_SET_TEMPORAL_LAYER_ID, layer_id); encoder->Control(VP8E_SET_FRAME_FLAGS, frame_flags); } const vpx_rational_t tb = video->timebase(); timebase_ = static_cast(tb.num) / tb.den; duration_ = 0; return; } } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { // Time since last timestamp = duration. vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; if (duration > 1) { // Update counter for total number of frames (#frames input to encoder). // Needed for setting the proper layer_id below. tot_frame_number_ += static_cast(duration - 1); } int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers); const size_t frame_size_in_bits = pkt->data.frame.sz * 8; // Update the total encoded bits. For temporal layers, update the cumulative // encoded bits per layer. for (int i = layer; i < static_cast(cfg_.ts_number_layers); ++i) { bits_total_[i] += frame_size_in_bits; } // Update the most recent pts. last_pts_ = pkt->data.frame.pts; ++tot_frame_number_; } virtual void EndPassHook(void) { duration_ = (last_pts_ + 1) * timebase_; if (cfg_.ts_number_layers > 1) { for (int layer = 0; layer < static_cast(cfg_.ts_number_layers); ++layer) { if (bits_total_[layer]) { // Effective file datarate: effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_; } } } } double effective_datarate_[3]; private: libvpx_test::TestMode encoding_mode_; vpx_codec_pts_t last_pts_; double timebase_; int64_t bits_total_[3]; double duration_; int tot_frame_number_; }; // Check two codec controls used for: // (1) for setting temporal layer id, and (2) for settings encoder flags. // This test invokes those controls for each frame, and verifies encoder/decoder // mismatch and basic rate control response. // TODO(marpan): Maybe move this test to datarate_test.cc. TEST_P(ErrorResilienceTestLargeCodecControls, CodecControl3TemporalLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_dropframe_thresh = 1; cfg_.g_lag_in_frames = 0; cfg_.kf_mode = VPX_KF_DISABLED; cfg_.g_error_resilient = 1; // 3 Temporal layers. Framerate decimation (4, 2, 1). cfg_.ts_number_layers = 3; cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.ts_periodicity = 4; cfg_.ts_layer_id[0] = 0; cfg_.ts_layer_id[1] = 2; cfg_.ts_layer_id[2] = 1; cfg_.ts_layer_id[3] = 2; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 200); for (int i = 200; i <= 800; i += 200) { cfg_.rc_target_bitrate = i; Reset(); // 40-20-40 bitrate allocation for 3 temporal layers. cfg_.ts_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; cfg_.ts_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; cfg_.ts_target_bitrate[2] = cfg_.rc_target_bitrate; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { ASSERT_GE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 0.75) << " The datarate for the file is lower than target by too much, " "for layer: " << j; ASSERT_LE(effective_datarate_[j], cfg_.ts_target_bitrate[j] * 1.25) << " The datarate for the file is greater than target by too much, " "for layer: " << j; } } } VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, ::testing::Values(true)); VP8_INSTANTIATE_TEST_CASE(ErrorResilienceTestLargeCodecControls, ONE_PASS_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(ErrorResilienceTestLarge, ONE_PASS_TEST_MODES, ::testing::Values(true)); } // namespace libvpx-1.8.2/test/examples.sh000077500000000000000000000016721357355204000161740ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file runs all of the tests for the libvpx examples. ## . $(dirname $0)/tools_common.sh example_tests=$(ls $(dirname $0)/*.sh) # List of script names to exclude. exclude_list="examples stress tools_common" # Filter out the scripts in $exclude_list. for word in ${exclude_list}; do example_tests=$(filter_strings "${example_tests}" "${word}" exclude) done for test in ${example_tests}; do # Source each test script so that exporting variables can be avoided. VPX_TEST_NAME="$(basename ${test%.*})" . "${test}" done libvpx-1.8.2/test/external_frame_buffer_test.cc000066400000000000000000000423731357355204000217150ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/ivf_video_source.h" #include "test/md5_helper.h" #include "test/test_vectors.h" #include "test/util.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif namespace { const int kVideoNameParam = 1; struct ExternalFrameBuffer { uint8_t *data; size_t size; int in_use; }; // Class to manipulate a list of external frame buffers. class ExternalFrameBufferList { public: ExternalFrameBufferList() : num_buffers_(0), num_used_buffers_(0), ext_fb_list_(NULL) {} virtual ~ExternalFrameBufferList() { for (int i = 0; i < num_buffers_; ++i) { delete[] ext_fb_list_[i].data; } delete[] ext_fb_list_; } // Creates the list to hold the external buffers. Returns true on success. bool CreateBufferList(int num_buffers) { if (num_buffers < 0) return false; num_buffers_ = num_buffers; ext_fb_list_ = new ExternalFrameBuffer[num_buffers_]; EXPECT_TRUE(ext_fb_list_ != NULL); memset(ext_fb_list_, 0, sizeof(ext_fb_list_[0]) * num_buffers_); return true; } // Searches the frame buffer list for a free frame buffer. Makes sure // that the frame buffer is at least |min_size| in bytes. Marks that the // frame buffer is in use by libvpx. Finally sets |fb| to point to the // external frame buffer. Returns < 0 on an error. int GetFreeFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) { EXPECT_TRUE(fb != NULL); const int idx = FindFreeBufferIndex(); if (idx == num_buffers_) return -1; if (ext_fb_list_[idx].size < min_size) { delete[] ext_fb_list_[idx].data; ext_fb_list_[idx].data = new uint8_t[min_size]; memset(ext_fb_list_[idx].data, 0, min_size); ext_fb_list_[idx].size = min_size; } SetFrameBuffer(idx, fb); num_used_buffers_++; return 0; } // Test function that will not allocate any data for the frame buffer. // Returns < 0 on an error. int GetZeroFrameBuffer(size_t min_size, vpx_codec_frame_buffer_t *fb) { EXPECT_TRUE(fb != NULL); const int idx = FindFreeBufferIndex(); if (idx == num_buffers_) return -1; if (ext_fb_list_[idx].size < min_size) { delete[] ext_fb_list_[idx].data; ext_fb_list_[idx].data = NULL; ext_fb_list_[idx].size = min_size; } SetFrameBuffer(idx, fb); return 0; } // Marks the external frame buffer that |fb| is pointing to as free. // Returns < 0 on an error. int ReturnFrameBuffer(vpx_codec_frame_buffer_t *fb) { if (fb == NULL) { EXPECT_TRUE(fb != NULL); return -1; } ExternalFrameBuffer *const ext_fb = reinterpret_cast(fb->priv); if (ext_fb == NULL) { EXPECT_TRUE(ext_fb != NULL); return -1; } EXPECT_EQ(1, ext_fb->in_use); ext_fb->in_use = 0; num_used_buffers_--; return 0; } // Checks that the vpx_image_t data is contained within the external frame // buffer private data passed back in the vpx_image_t. void CheckImageFrameBuffer(const vpx_image_t *img) { if (img->fb_priv != NULL) { const struct ExternalFrameBuffer *const ext_fb = reinterpret_cast(img->fb_priv); ASSERT_TRUE(img->planes[0] >= ext_fb->data && img->planes[0] < (ext_fb->data + ext_fb->size)); } } int num_used_buffers() const { return num_used_buffers_; } private: // Returns the index of the first free frame buffer. Returns |num_buffers_| // if there are no free frame buffers. int FindFreeBufferIndex() { int i; // Find a free frame buffer. for (i = 0; i < num_buffers_; ++i) { if (!ext_fb_list_[i].in_use) break; } return i; } // Sets |fb| to an external frame buffer. idx is the index into the frame // buffer list. void SetFrameBuffer(int idx, vpx_codec_frame_buffer_t *fb) { ASSERT_TRUE(fb != NULL); fb->data = ext_fb_list_[idx].data; fb->size = ext_fb_list_[idx].size; ASSERT_EQ(0, ext_fb_list_[idx].in_use); ext_fb_list_[idx].in_use = 1; fb->priv = &ext_fb_list_[idx]; } int num_buffers_; int num_used_buffers_; ExternalFrameBuffer *ext_fb_list_; }; #if CONFIG_WEBM_IO // Callback used by libvpx to request the application to return a frame // buffer of at least |min_size| in bytes. int get_vp9_frame_buffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferList *const fb_list = reinterpret_cast(user_priv); return fb_list->GetFreeFrameBuffer(min_size, fb); } // Callback used by libvpx to tell the application that |fb| is not needed // anymore. int release_vp9_frame_buffer(void *user_priv, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferList *const fb_list = reinterpret_cast(user_priv); return fb_list->ReturnFrameBuffer(fb); } // Callback will not allocate data for frame buffer. int get_vp9_zero_frame_buffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferList *const fb_list = reinterpret_cast(user_priv); return fb_list->GetZeroFrameBuffer(min_size, fb); } // Callback will allocate one less byte than |min_size|. int get_vp9_one_less_byte_frame_buffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferList *const fb_list = reinterpret_cast(user_priv); return fb_list->GetFreeFrameBuffer(min_size - 1, fb); } // Callback will not release the external frame buffer. int do_not_release_vp9_frame_buffer(void *user_priv, vpx_codec_frame_buffer_t *fb) { (void)user_priv; (void)fb; return 0; } #endif // CONFIG_WEBM_IO // Class for testing passing in external frame buffers to libvpx. class ExternalFrameBufferMD5Test : public ::libvpx_test::DecoderTest, public ::libvpx_test::CodecTestWithParam { protected: ExternalFrameBufferMD5Test() : DecoderTest(GET_PARAM(::libvpx_test::kCodecFactoryParam)), md5_file_(NULL), num_buffers_(0) {} virtual ~ExternalFrameBufferMD5Test() { if (md5_file_ != NULL) fclose(md5_file_); } virtual void PreDecodeFrameHook( const libvpx_test::CompressedVideoSource &video, libvpx_test::Decoder *decoder) { if (num_buffers_ > 0 && video.frame_number() == 0) { // Have libvpx use frame buffers we create. ASSERT_TRUE(fb_list_.CreateBufferList(num_buffers_)); ASSERT_EQ(VPX_CODEC_OK, decoder->SetFrameBufferFunctions(GetVP9FrameBuffer, ReleaseVP9FrameBuffer, this)); } } void OpenMD5File(const std::string &md5_file_name_) { md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: " << md5_file_name_; } virtual void DecompressedFrameHook(const vpx_image_t &img, const unsigned int frame_number) { ASSERT_TRUE(md5_file_ != NULL); char expected_md5[33]; char junk[128]; // Read correct md5 checksums. const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); ASSERT_NE(EOF, res) << "Read md5 data failed"; expected_md5[32] = '\0'; ::libvpx_test::MD5 md5_res; md5_res.Add(&img); const char *const actual_md5 = md5_res.Get(); // Check md5 match. ASSERT_STREQ(expected_md5, actual_md5) << "Md5 checksums don't match: frame number = " << frame_number; } // Callback to get a free external frame buffer. Return value < 0 is an // error. static int GetVP9FrameBuffer(void *user_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferMD5Test *const md5Test = reinterpret_cast(user_priv); return md5Test->fb_list_.GetFreeFrameBuffer(min_size, fb); } // Callback to release an external frame buffer. Return value < 0 is an // error. static int ReleaseVP9FrameBuffer(void *user_priv, vpx_codec_frame_buffer_t *fb) { ExternalFrameBufferMD5Test *const md5Test = reinterpret_cast(user_priv); return md5Test->fb_list_.ReturnFrameBuffer(fb); } void set_num_buffers(int num_buffers) { num_buffers_ = num_buffers; } int num_buffers() const { return num_buffers_; } private: FILE *md5_file_; int num_buffers_; ExternalFrameBufferList fb_list_; }; #if CONFIG_WEBM_IO const char kVP9TestFile[] = "vp90-2-02-size-lf-1920x1080.webm"; const char kVP9NonRefTestFile[] = "vp90-2-22-svc_1280x720_1.webm"; // Class for testing passing in external frame buffers to libvpx. class ExternalFrameBufferTest : public ::testing::Test { protected: ExternalFrameBufferTest() : video_(NULL), decoder_(NULL), num_buffers_(0) {} virtual void SetUp() { video_ = new libvpx_test::WebMVideoSource(kVP9TestFile); ASSERT_TRUE(video_ != NULL); video_->Init(); video_->Begin(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); decoder_ = new libvpx_test::VP9Decoder(cfg, 0); ASSERT_TRUE(decoder_ != NULL); } virtual void TearDown() { delete decoder_; decoder_ = NULL; delete video_; video_ = NULL; } // Passes the external frame buffer information to libvpx. vpx_codec_err_t SetFrameBufferFunctions( int num_buffers, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release) { if (num_buffers > 0) { num_buffers_ = num_buffers; EXPECT_TRUE(fb_list_.CreateBufferList(num_buffers_)); } return decoder_->SetFrameBufferFunctions(cb_get, cb_release, &fb_list_); } vpx_codec_err_t DecodeOneFrame() { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); CheckDecodedFrames(); if (res == VPX_CODEC_OK) video_->Next(); return res; } vpx_codec_err_t DecodeRemainingFrames() { for (; video_->cxdata() != NULL; video_->Next()) { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); if (res != VPX_CODEC_OK) return res; CheckDecodedFrames(); } return VPX_CODEC_OK; } void CheckDecodedFrames() { libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData(); const vpx_image_t *img = NULL; // Get decompressed data while ((img = dec_iter.Next()) != NULL) { fb_list_.CheckImageFrameBuffer(img); } } libvpx_test::WebMVideoSource *video_; libvpx_test::VP9Decoder *decoder_; int num_buffers_; ExternalFrameBufferList fb_list_; }; class ExternalFrameBufferNonRefTest : public ExternalFrameBufferTest { protected: virtual void SetUp() { video_ = new libvpx_test::WebMVideoSource(kVP9NonRefTestFile); ASSERT_TRUE(video_ != NULL); video_->Init(); video_->Begin(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); decoder_ = new libvpx_test::VP9Decoder(cfg, 0); ASSERT_TRUE(decoder_ != NULL); } virtual void CheckFrameBufferRelease() { TearDown(); ASSERT_EQ(0, fb_list_.num_used_buffers()); } }; #endif // CONFIG_WEBM_IO // This test runs through the set of test vectors, and decodes them. // Libvpx will call into the application to allocate a frame buffer when // needed. The md5 checksums are computed for each frame in the video file. // If md5 checksums match the correct md5 data, then the test is passed. // Otherwise, the test failed. TEST_P(ExternalFrameBufferMD5Test, ExtFBMD5Match) { const std::string filename = GET_PARAM(kVideoNameParam); // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS + // #VPX_MAXIMUM_WORK_BUFFERS + four jitter buffers. const int jitter_buffers = 4; const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers; set_num_buffers(num_buffers); #if CONFIG_VP8_DECODER // Tell compiler we are not using kVP8TestVectors. (void)libvpx_test::kVP8TestVectors; #endif // Open compressed video file. std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else { #if CONFIG_WEBM_IO video.reset(new libvpx_test::WebMVideoSource(filename)); #else fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", filename.c_str()); return; #endif } ASSERT_TRUE(video.get() != NULL); video->Init(); // Construct md5 file name. const std::string md5_filename = filename + ".md5"; OpenMD5File(md5_filename); // Decode frame, and check the md5 matching. ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); } #if CONFIG_WEBM_IO TEST_F(ExternalFrameBufferTest, MinFrameBuffers) { // Minimum number of external frame buffers for VP9 is // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames()); } TEST_F(ExternalFrameBufferTest, EightJitterBuffers) { // Number of buffers equals #VP9_MAXIMUM_REF_BUFFERS + // #VPX_MAXIMUM_WORK_BUFFERS + eight jitter buffers. const int jitter_buffers = 8; const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS + jitter_buffers; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames()); } TEST_F(ExternalFrameBufferTest, NotEnoughBuffers) { // Minimum number of external frame buffers for VP9 is // #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS. Most files will // only use 5 frame buffers at one time. const int num_buffers = 2; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame()); // Only run this on long clips. Decoding a very short clip will return // VPX_CODEC_OK even with only 2 buffers. ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames()); } TEST_F(ExternalFrameBufferTest, NoRelease) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, do_not_release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame()); ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeRemainingFrames()); } TEST_F(ExternalFrameBufferTest, NullRealloc) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_zero_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame()); } TEST_F(ExternalFrameBufferTest, ReallocOneLessByte) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions( num_buffers, get_vp9_one_less_byte_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_MEM_ERROR, DecodeOneFrame()); } TEST_F(ExternalFrameBufferTest, NullGetFunction) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ( VPX_CODEC_INVALID_PARAM, SetFrameBufferFunctions(num_buffers, NULL, release_vp9_frame_buffer)); } TEST_F(ExternalFrameBufferTest, NullReleaseFunction) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_INVALID_PARAM, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, NULL)); } TEST_F(ExternalFrameBufferTest, SetAfterDecode) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, DecodeOneFrame()); ASSERT_EQ(VPX_CODEC_ERROR, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer)); } TEST_F(ExternalFrameBufferNonRefTest, ReleaseNonRefFrameBuffer) { const int num_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; ASSERT_EQ(VPX_CODEC_OK, SetFrameBufferFunctions(num_buffers, get_vp9_frame_buffer, release_vp9_frame_buffer)); ASSERT_EQ(VPX_CODEC_OK, DecodeRemainingFrames()); CheckFrameBufferRelease(); } #endif // CONFIG_WEBM_IO VP9_INSTANTIATE_TEST_CASE( ExternalFrameBufferMD5Test, ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors)); } // namespace libvpx-1.8.2/test/fdct8x8_test.cc000066400000000000000000000654221357355204000166600ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_scan.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" using libvpx_test::ACMRandom; namespace { const int kNumCoeffs = 64; const double kPi = 3.141592653589793238462643383279502884; const int kSignBiasMaxDiff255 = 1500; const int kSignBiasMaxDiff15 = 10000; typedef void (*FdctFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*IdctFunc)(const tran_low_t *in, uint8_t *out, int stride); typedef void (*FhtFunc)(const int16_t *in, tran_low_t *out, int stride, int tx_type); typedef void (*IhtFunc)(const tran_low_t *in, uint8_t *out, int stride, int tx_type); typedef std::tuple Dct8x8Param; typedef std::tuple Ht8x8Param; typedef std::tuple Idct8x8Param; void reference_8x8_dct_1d(const double in[8], double out[8]) { const double kInvSqrt2 = 0.707106781186547524400844362104; for (int k = 0; k < 8; k++) { out[k] = 0.0; for (int n = 0; n < 8; n++) { out[k] += in[n] * cos(kPi * (2 * n + 1) * k / 16.0); } if (k == 0) out[k] = out[k] * kInvSqrt2; } } void reference_8x8_dct_2d(const int16_t input[kNumCoeffs], double output[kNumCoeffs]) { // First transform columns for (int i = 0; i < 8; ++i) { double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i]; reference_8x8_dct_1d(temp_in, temp_out); for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j]; } // Then transform rows for (int i = 0; i < 8; ++i) { double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8]; reference_8x8_dct_1d(temp_in, temp_out); // Scale by some magic number for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j] * 2; } } void fdct8x8_ref(const int16_t *in, tran_low_t *out, int stride, int /*tx_type*/) { vpx_fdct8x8_c(in, out, stride); } void fht8x8_ref(const int16_t *in, tran_low_t *out, int stride, int tx_type) { vp9_fht8x8_c(in, out, stride, tx_type); } #if CONFIG_VP9_HIGHBITDEPTH void idct8x8_10(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct8x8_12(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); } void iht8x8_10(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 10); } void iht8x8_12(const tran_low_t *in, uint8_t *out, int stride, int tx_type) { vp9_highbd_iht8x8_64_add_c(in, CAST_TO_SHORTPTR(out), stride, tx_type, 12); } #if HAVE_SSE2 void idct8x8_12_add_10_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct8x8_12_add_12_c(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_12_add_c(in, CAST_TO_SHORTPTR(out), stride, 12); } void idct8x8_12_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct8x8_12_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_12_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); } void idct8x8_64_add_10_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 10); } void idct8x8_64_add_12_sse2(const tran_low_t *in, uint8_t *out, int stride) { vpx_highbd_idct8x8_64_add_sse2(in, CAST_TO_SHORTPTR(out), stride, 12); } #endif // HAVE_SSE2 #endif // CONFIG_VP9_HIGHBITDEPTH class FwdTrans8x8TestBase { public: virtual ~FwdTrans8x8TestBase() {} protected: virtual void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) = 0; virtual void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) = 0; void RunSignBiasCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, int16_t, test_input_block[64]); DECLARE_ALIGNED(16, tran_low_t, test_output_block[64]); int count_sign_block[64][2]; const int count_test_block = 100000; memset(count_sign_block, 0, sizeof(count_sign_block)); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) { test_input_block[j] = ((rnd.Rand16() >> (16 - bit_depth_)) & mask_) - ((rnd.Rand16() >> (16 - bit_depth_)) & mask_); } ASM_REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_output_block, pitch_)); for (int j = 0; j < 64; ++j) { if (test_output_block[j] < 0) { ++count_sign_block[j][0]; } else if (test_output_block[j] > 0) { ++count_sign_block[j][1]; } } } for (int j = 0; j < 64; ++j) { const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); const int max_diff = kSignBiasMaxDiff255; EXPECT_LT(diff, max_diff << (bit_depth_ - 8)) << "Error: 8x8 FDCT/FHT has a sign bias > " << 1. * max_diff / count_test_block * 100 << "%" << " for input range [-255, 255] at index " << j << " count0: " << count_sign_block[j][0] << " count1: " << count_sign_block[j][1] << " diff: " << diff; } memset(count_sign_block, 0, sizeof(count_sign_block)); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_ / 16, mask_ / 16]. for (int j = 0; j < 64; ++j) { test_input_block[j] = ((rnd.Rand16() & mask_) >> 4) - ((rnd.Rand16() & mask_) >> 4); } ASM_REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_output_block, pitch_)); for (int j = 0; j < 64; ++j) { if (test_output_block[j] < 0) { ++count_sign_block[j][0]; } else if (test_output_block[j] > 0) { ++count_sign_block[j][1]; } } } for (int j = 0; j < 64; ++j) { const int diff = abs(count_sign_block[j][0] - count_sign_block[j][1]); const int max_diff = kSignBiasMaxDiff15; EXPECT_LT(diff, max_diff << (bit_depth_ - 8)) << "Error: 8x8 FDCT/FHT has a sign bias > " << 1. * max_diff / count_test_block * 100 << "%" << " for input range [-15, 15] at index " << j << " count0: " << count_sign_block[j][0] << " count1: " << count_sign_block[j][1] << " diff: " << diff; } } void RunRoundTripErrorCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; int total_error = 0; const int count_test_block = 100000; DECLARE_ALIGNED(16, int16_t, test_input_block[64]); DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]); DECLARE_ALIGNED(16, uint8_t, dst[64]); DECLARE_ALIGNED(16, uint8_t, src[64]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[64]); DECLARE_ALIGNED(16, uint16_t, src16[64]); #endif for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < 64; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); test_input_block[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand16() & mask_; dst16[j] = rnd.Rand16() & mask_; test_input_block[j] = src16[j] - dst16[j]; #endif } } ASM_REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_temp_block, pitch_)); for (int j = 0; j < 64; ++j) { if (test_temp_block[j] > 0) { test_temp_block[j] += 2; test_temp_block[j] /= 4; test_temp_block[j] *= 4; } else { test_temp_block[j] -= 2; test_temp_block[j] /= 4; test_temp_block[j] *= 4; } } if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } for (int j = 0; j < 64; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int diff = dst[j] - src[j]; #endif const int error = diff * diff; if (max_error < error) max_error = error; total_error += error; } } EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error) << "Error: 8x8 FDCT/IDCT or FHT/IHT has an individual" << " roundtrip error > 1"; EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error) << "Error: 8x8 FDCT/IDCT or FHT/IHT has average roundtrip " << "error > 1/5 per block"; } void RunExtremalCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); int max_error = 0; int total_error = 0; int total_coeff_error = 0; const int count_test_block = 100000; DECLARE_ALIGNED(16, int16_t, test_input_block[64]); DECLARE_ALIGNED(16, tran_low_t, test_temp_block[64]); DECLARE_ALIGNED(16, tran_low_t, ref_temp_block[64]); DECLARE_ALIGNED(16, uint8_t, dst[64]); DECLARE_ALIGNED(16, uint8_t, src[64]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[64]); DECLARE_ALIGNED(16, uint16_t, src16[64]); #endif for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < 64; ++j) { if (bit_depth_ == VPX_BITS_8) { if (i == 0) { src[j] = 255; dst[j] = 0; } else if (i == 1) { src[j] = 0; dst[j] = 255; } else { src[j] = rnd.Rand8() % 2 ? 255 : 0; dst[j] = rnd.Rand8() % 2 ? 255 : 0; } test_input_block[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { if (i == 0) { src16[j] = mask_; dst16[j] = 0; } else if (i == 1) { src16[j] = 0; dst16[j] = mask_; } else { src16[j] = rnd.Rand8() % 2 ? mask_ : 0; dst16[j] = rnd.Rand8() % 2 ? mask_ : 0; } test_input_block[j] = src16[j] - dst16[j]; #endif } } ASM_REGISTER_STATE_CHECK( RunFwdTxfm(test_input_block, test_temp_block, pitch_)); ASM_REGISTER_STATE_CHECK( fwd_txfm_ref(test_input_block, ref_temp_block, pitch_, tx_type_)); if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(RunInvTxfm(test_temp_block, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( RunInvTxfm(test_temp_block, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } for (int j = 0; j < 64; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int diff = dst[j] - src[j]; #endif const int error = diff * diff; if (max_error < error) max_error = error; total_error += error; const int coeff_diff = test_temp_block[j] - ref_temp_block[j]; total_coeff_error += abs(coeff_diff); } EXPECT_GE(1 << 2 * (bit_depth_ - 8), max_error) << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has" << "an individual roundtrip error > 1"; EXPECT_GE((count_test_block << 2 * (bit_depth_ - 8)) / 5, total_error) << "Error: Extremal 8x8 FDCT/IDCT or FHT/IHT has average" << " roundtrip error > 1/5 per block"; EXPECT_EQ(0, total_coeff_error) << "Error: Extremal 8x8 FDCT/FHT has" << "overflow issues in the intermediate steps > 1"; } } void RunInvAccuracyCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, src[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, src16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); #endif for (int i = 0; i < count_test_block; ++i) { double out_r[kNumCoeffs]; // Initialize a test block with input range [-255, 255]. for (int j = 0; j < kNumCoeffs; ++j) { if (bit_depth_ == VPX_BITS_8) { src[j] = rnd.Rand8() % 2 ? 255 : 0; dst[j] = src[j] > 0 ? 0 : 255; in[j] = src[j] - dst[j]; #if CONFIG_VP9_HIGHBITDEPTH } else { src16[j] = rnd.Rand8() % 2 ? mask_ : 0; dst16[j] = src16[j] > 0 ? 0 : mask_; in[j] = src16[j] - dst16[j]; #endif } } reference_8x8_dct_2d(in, out_r); for (int j = 0; j < kNumCoeffs; ++j) { coeff[j] = static_cast(round(out_r[j])); } if (bit_depth_ == VPX_BITS_8) { ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { ASM_REGISTER_STATE_CHECK( RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - src[j] : dst16[j] - src16[j]; #else const int diff = dst[j] - src[j]; #endif const uint32_t error = diff * diff; EXPECT_GE(1u << 2 * (bit_depth_ - 8), error) << "Error: 8x8 IDCT has error " << error << " at index " << j; } } } void RunFwdAccuracyCheck() { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 1000; DECLARE_ALIGNED(16, int16_t, in[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, coeff_r[kNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); for (int i = 0; i < count_test_block; ++i) { double out_r[kNumCoeffs]; // Initialize a test block with input range [-mask_, mask_]. for (int j = 0; j < kNumCoeffs; ++j) { in[j] = rnd.Rand8() % 2 == 0 ? mask_ : -mask_; } RunFwdTxfm(in, coeff, pitch_); reference_8x8_dct_2d(in, out_r); for (int j = 0; j < kNumCoeffs; ++j) { coeff_r[j] = static_cast(round(out_r[j])); } for (int j = 0; j < kNumCoeffs; ++j) { const int32_t diff = coeff[j] - coeff_r[j]; const uint32_t error = diff * diff; EXPECT_GE(9u << 2 * (bit_depth_ - 8), error) << "Error: 8x8 DCT has error " << error << " at index " << j; } } } void CompareInvReference(IdctFunc ref_txfm, int thresh) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 10000; const int eob = 12; DECLARE_ALIGNED(16, tran_low_t, coeff[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, dst[kNumCoeffs]); DECLARE_ALIGNED(16, uint8_t, ref[kNumCoeffs]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, dst16[kNumCoeffs]); DECLARE_ALIGNED(16, uint16_t, ref16[kNumCoeffs]); #endif const int16_t *scan = vp9_default_scan_orders[TX_8X8].scan; for (int i = 0; i < count_test_block; ++i) { for (int j = 0; j < kNumCoeffs; ++j) { if (j < eob) { // Random values less than the threshold, either positive or negative coeff[scan[j]] = rnd(thresh) * (1 - 2 * (i % 2)); } else { coeff[scan[j]] = 0; } if (bit_depth_ == VPX_BITS_8) { dst[j] = 0; ref[j] = 0; #if CONFIG_VP9_HIGHBITDEPTH } else { dst16[j] = 0; ref16[j] = 0; #endif } } if (bit_depth_ == VPX_BITS_8) { ref_txfm(coeff, ref, pitch_); ASM_REGISTER_STATE_CHECK(RunInvTxfm(coeff, dst, pitch_)); #if CONFIG_VP9_HIGHBITDEPTH } else { ref_txfm(coeff, CAST_TO_BYTEPTR(ref16), pitch_); ASM_REGISTER_STATE_CHECK( RunInvTxfm(coeff, CAST_TO_BYTEPTR(dst16), pitch_)); #endif } for (int j = 0; j < kNumCoeffs; ++j) { #if CONFIG_VP9_HIGHBITDEPTH const int diff = bit_depth_ == VPX_BITS_8 ? dst[j] - ref[j] : dst16[j] - ref16[j]; #else const int diff = dst[j] - ref[j]; #endif const uint32_t error = diff * diff; EXPECT_EQ(0u, error) << "Error: 8x8 IDCT has error " << error << " at index " << j; } } } int pitch_; int tx_type_; FhtFunc fwd_txfm_ref; vpx_bit_depth_t bit_depth_; int mask_; }; class FwdTrans8x8DCT : public FwdTrans8x8TestBase, public ::testing::TestWithParam { public: virtual ~FwdTrans8x8DCT() {} virtual void SetUp() { fwd_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); tx_type_ = GET_PARAM(2); pitch_ = 8; fwd_txfm_ref = fdct8x8_ref; bit_depth_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { fwd_txfm_(in, out, stride); } void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } FdctFunc fwd_txfm_; IdctFunc inv_txfm_; }; TEST_P(FwdTrans8x8DCT, SignBiasCheck) { RunSignBiasCheck(); } TEST_P(FwdTrans8x8DCT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); } TEST_P(FwdTrans8x8DCT, ExtremalCheck) { RunExtremalCheck(); } TEST_P(FwdTrans8x8DCT, FwdAccuracyCheck) { RunFwdAccuracyCheck(); } TEST_P(FwdTrans8x8DCT, InvAccuracyCheck) { RunInvAccuracyCheck(); } class FwdTrans8x8HT : public FwdTrans8x8TestBase, public ::testing::TestWithParam { public: virtual ~FwdTrans8x8HT() {} virtual void SetUp() { fwd_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); tx_type_ = GET_PARAM(2); pitch_ = 8; fwd_txfm_ref = fht8x8_ref; bit_depth_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunFwdTxfm(int16_t *in, tran_low_t *out, int stride) { fwd_txfm_(in, out, stride, tx_type_); } void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride, tx_type_); } FhtFunc fwd_txfm_; IhtFunc inv_txfm_; }; TEST_P(FwdTrans8x8HT, SignBiasCheck) { RunSignBiasCheck(); } TEST_P(FwdTrans8x8HT, RoundTripErrorCheck) { RunRoundTripErrorCheck(); } TEST_P(FwdTrans8x8HT, ExtremalCheck) { RunExtremalCheck(); } class InvTrans8x8DCT : public FwdTrans8x8TestBase, public ::testing::TestWithParam { public: virtual ~InvTrans8x8DCT() {} virtual void SetUp() { ref_txfm_ = GET_PARAM(0); inv_txfm_ = GET_PARAM(1); thresh_ = GET_PARAM(2); pitch_ = 8; bit_depth_ = GET_PARAM(3); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: void RunInvTxfm(tran_low_t *out, uint8_t *dst, int stride) { inv_txfm_(out, dst, stride); } void RunFwdTxfm(int16_t * /*out*/, tran_low_t * /*dst*/, int /*stride*/) {} IdctFunc ref_txfm_; IdctFunc inv_txfm_; int thresh_; }; TEST_P(InvTrans8x8DCT, CompareReference) { CompareInvReference(ref_txfm_, thresh_); } using std::make_tuple; #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8DCT, ::testing::Values( make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_10, 0, VPX_BITS_10), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_12, 0, VPX_BITS_12))); #else INSTANTIATE_TEST_CASE_P(C, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 0, VPX_BITS_10), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 1, VPX_BITS_10), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 2, VPX_BITS_10), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_10, 3, VPX_BITS_10), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 0, VPX_BITS_12), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 1, VPX_BITS_12), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 2, VPX_BITS_12), make_tuple(&vp9_highbd_fht8x8_c, &iht8x8_12, 3, VPX_BITS_12), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8))); #else INSTANTIATE_TEST_CASE_P( C, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_NEON && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(NEON, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_neon, &vpx_idct8x8_64_add_neon, 0, VPX_BITS_8))); #if !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( NEON, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 0, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_c, &vp9_iht8x8_64_add_neon, 3, VPX_BITS_8))); #endif // !CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(SSE2, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_sse2, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 0, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_sse2, 3, VPX_BITS_8))); #endif // HAVE_SSE2 && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_sse2, &vpx_idct8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_10_sse2, 12, VPX_BITS_10), make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_64_add_10_sse2, 12, VPX_BITS_10), make_tuple(&vpx_highbd_fdct8x8_c, &idct8x8_64_add_12_sse2, 12, VPX_BITS_12), make_tuple(&vpx_highbd_fdct8x8_sse2, &idct8x8_64_add_12_sse2, 12, VPX_BITS_12))); INSTANTIATE_TEST_CASE_P( SSE2, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 0, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_sse2, &vp9_iht8x8_64_add_c, 3, VPX_BITS_8))); // Optimizations take effect at a threshold of 6201, so we use a value close to // that to test both branches. INSTANTIATE_TEST_CASE_P( SSE2, InvTrans8x8DCT, ::testing::Values( make_tuple(&idct8x8_12_add_10_c, &idct8x8_12_add_10_sse2, 6225, VPX_BITS_10), make_tuple(&idct8x8_10, &idct8x8_64_add_10_sse2, 6225, VPX_BITS_10), make_tuple(&idct8x8_12_add_12_c, &idct8x8_12_add_12_sse2, 6225, VPX_BITS_12), make_tuple(&idct8x8_12, &idct8x8_64_add_12_sse2, 6225, VPX_BITS_12))); #endif // HAVE_SSE2 && CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_SSSE3 && VPX_ARCH_X86_64 && !CONFIG_VP9_HIGHBITDEPTH && \ !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(SSSE3, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_ssse3, &vpx_idct8x8_64_add_sse2, 0, VPX_BITS_8))); #endif #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(MSA, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_msa, &vpx_idct8x8_64_add_msa, 0, VPX_BITS_8))); INSTANTIATE_TEST_CASE_P( MSA, FwdTrans8x8HT, ::testing::Values( make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 0, VPX_BITS_8), make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 1, VPX_BITS_8), make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 2, VPX_BITS_8), make_tuple(&vp9_fht8x8_msa, &vp9_iht8x8_64_add_msa, 3, VPX_BITS_8))); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE INSTANTIATE_TEST_CASE_P(VSX, FwdTrans8x8DCT, ::testing::Values(make_tuple(&vpx_fdct8x8_c, &vpx_idct8x8_64_add_vsx, 0, VPX_BITS_8))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH && !CONFIG_EMULATE_HARDWARE } // namespace libvpx-1.8.2/test/frame_size_tests.cc000066400000000000000000000061701357355204000176720ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/video_source.h" namespace { class VP9FrameSizeTestsLarge : public ::libvpx_test::EncoderTest, public ::testing::Test { protected: VP9FrameSizeTestsLarge() : EncoderTest(&::libvpx_test::kVP9), expected_res_(VPX_CODEC_OK) {} virtual ~VP9FrameSizeTestsLarge() {} virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); } virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, const libvpx_test::VideoSource & /*video*/, libvpx_test::Decoder *decoder) { EXPECT_EQ(expected_res_, res_dec) << decoder->DecodeError(); return !::testing::Test::HasFailure(); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, 7); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } int expected_res_; }; TEST_F(VP9FrameSizeTestsLarge, TestInvalidSizes) { ::libvpx_test::RandomVideoSource video; #if CONFIG_SIZE_LIMIT video.SetSize(DECODE_WIDTH_LIMIT + 16, DECODE_HEIGHT_LIMIT + 16); video.set_limit(2); expected_res_ = VPX_CODEC_CORRUPT_FRAME; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #endif } TEST_F(VP9FrameSizeTestsLarge, ValidSizes) { ::libvpx_test::RandomVideoSource video; #if CONFIG_SIZE_LIMIT video.SetSize(DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); video.set_limit(2); expected_res_ = VPX_CODEC_OK; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #else // This test produces a pretty large single frame allocation, (roughly // 25 megabits). The encoder allocates a good number of these frames // one for each lag in frames (for 2 pass), and then one for each possible // reference buffer (8) - we can end up with up to 30 buffers of roughly this // size or almost 1 gig of memory. // In total the allocations will exceed 2GiB which may cause a failure with // mingw + wine, use a smaller size in that case. #if defined(_WIN32) && !defined(_WIN64) || defined(__OS2__) video.SetSize(4096, 3072); #else video.SetSize(4096, 4096); #endif video.set_limit(2); expected_res_ = VPX_CODEC_OK; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #endif } TEST_F(VP9FrameSizeTestsLarge, OneByOneVideo) { ::libvpx_test::RandomVideoSource video; video.SetSize(1, 1); video.set_limit(2); expected_res_ = VPX_CODEC_OK; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } } // namespace libvpx-1.8.2/test/hadamard_test.cc000066400000000000000000000233731357355204000171300ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "vpx_ports/vpx_timer.h" #include "test/acm_random.h" #include "test/register_state_check.h" namespace { using ::libvpx_test::ACMRandom; typedef void (*HadamardFunc)(const int16_t *a, ptrdiff_t a_stride, tran_low_t *b); void hadamard_loop(const tran_low_t *a, tran_low_t *out) { tran_low_t b[8]; for (int i = 0; i < 8; i += 2) { b[i + 0] = a[i * 8] + a[(i + 1) * 8]; b[i + 1] = a[i * 8] - a[(i + 1) * 8]; } tran_low_t c[8]; for (int i = 0; i < 8; i += 4) { c[i + 0] = b[i + 0] + b[i + 2]; c[i + 1] = b[i + 1] + b[i + 3]; c[i + 2] = b[i + 0] - b[i + 2]; c[i + 3] = b[i + 1] - b[i + 3]; } out[0] = c[0] + c[4]; out[7] = c[1] + c[5]; out[3] = c[2] + c[6]; out[4] = c[3] + c[7]; out[2] = c[0] - c[4]; out[6] = c[1] - c[5]; out[1] = c[2] - c[6]; out[5] = c[3] - c[7]; } void reference_hadamard8x8(const int16_t *a, int a_stride, tran_low_t *b) { tran_low_t input[64]; tran_low_t buf[64]; for (int i = 0; i < 8; ++i) { for (int j = 0; j < 8; ++j) { input[i * 8 + j] = static_cast(a[i * a_stride + j]); } } for (int i = 0; i < 8; ++i) hadamard_loop(input + i, buf + i * 8); for (int i = 0; i < 8; ++i) hadamard_loop(buf + i, b + i * 8); } void reference_hadamard16x16(const int16_t *a, int a_stride, tran_low_t *b) { /* The source is a 16x16 block. The destination is rearranged to 8x32. * Input is 9 bit. */ reference_hadamard8x8(a + 0 + 0 * a_stride, a_stride, b + 0); reference_hadamard8x8(a + 8 + 0 * a_stride, a_stride, b + 64); reference_hadamard8x8(a + 0 + 8 * a_stride, a_stride, b + 128); reference_hadamard8x8(a + 8 + 8 * a_stride, a_stride, b + 192); /* Overlay the 8x8 blocks and combine. */ for (int i = 0; i < 64; ++i) { /* 8x8 steps the range up to 15 bits. */ const tran_low_t a0 = b[0]; const tran_low_t a1 = b[64]; const tran_low_t a2 = b[128]; const tran_low_t a3 = b[192]; /* Prevent the result from escaping int16_t. */ const tran_low_t b0 = (a0 + a1) >> 1; const tran_low_t b1 = (a0 - a1) >> 1; const tran_low_t b2 = (a2 + a3) >> 1; const tran_low_t b3 = (a2 - a3) >> 1; /* Store a 16 bit value. */ b[0] = b0 + b2; b[64] = b1 + b3; b[128] = b0 - b2; b[192] = b1 - b3; ++b; } } void reference_hadamard32x32(const int16_t *a, int a_stride, tran_low_t *b) { reference_hadamard16x16(a + 0 + 0 * a_stride, a_stride, b + 0); reference_hadamard16x16(a + 16 + 0 * a_stride, a_stride, b + 256); reference_hadamard16x16(a + 0 + 16 * a_stride, a_stride, b + 512); reference_hadamard16x16(a + 16 + 16 * a_stride, a_stride, b + 768); for (int i = 0; i < 256; ++i) { const tran_low_t a0 = b[0]; const tran_low_t a1 = b[256]; const tran_low_t a2 = b[512]; const tran_low_t a3 = b[768]; const tran_low_t b0 = (a0 + a1) >> 2; const tran_low_t b1 = (a0 - a1) >> 2; const tran_low_t b2 = (a2 + a3) >> 2; const tran_low_t b3 = (a2 - a3) >> 2; b[0] = b0 + b2; b[256] = b1 + b3; b[512] = b0 - b2; b[768] = b1 - b3; ++b; } } struct HadamardFuncWithSize { HadamardFuncWithSize(HadamardFunc f, int s) : func(f), block_size(s) {} HadamardFunc func; int block_size; }; std::ostream &operator<<(std::ostream &os, const HadamardFuncWithSize &hfs) { return os << "block size: " << hfs.block_size; } class HadamardTestBase : public ::testing::TestWithParam { public: virtual void SetUp() { h_func_ = GetParam().func; bwh_ = GetParam().block_size; block_size_ = bwh_ * bwh_; rnd_.Reset(ACMRandom::DeterministicSeed()); } virtual int16_t Rand() = 0; void ReferenceHadamard(const int16_t *a, int a_stride, tran_low_t *b, int bwh) { if (bwh == 32) reference_hadamard32x32(a, a_stride, b); else if (bwh == 16) reference_hadamard16x16(a, a_stride, b); else reference_hadamard8x8(a, a_stride, b); } void CompareReferenceRandom() { const int kMaxBlockSize = 32 * 32; DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize]); DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]); memset(a, 0, sizeof(a)); memset(b, 0, sizeof(b)); tran_low_t b_ref[kMaxBlockSize]; memset(b_ref, 0, sizeof(b_ref)); for (int i = 0; i < block_size_; ++i) a[i] = Rand(); ReferenceHadamard(a, bwh_, b_ref, bwh_); ASM_REGISTER_STATE_CHECK(h_func_(a, bwh_, b)); // The order of the output is not important. Sort before checking. std::sort(b, b + block_size_); std::sort(b_ref, b_ref + block_size_); EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); } void VaryStride() { const int kMaxBlockSize = 32 * 32; DECLARE_ALIGNED(16, int16_t, a[kMaxBlockSize * 8]); DECLARE_ALIGNED(16, tran_low_t, b[kMaxBlockSize]); memset(a, 0, sizeof(a)); for (int i = 0; i < block_size_ * 8; ++i) a[i] = Rand(); tran_low_t b_ref[kMaxBlockSize]; for (int i = 8; i < 64; i += 8) { memset(b, 0, sizeof(b)); memset(b_ref, 0, sizeof(b_ref)); ReferenceHadamard(a, i, b_ref, bwh_); ASM_REGISTER_STATE_CHECK(h_func_(a, i, b)); // The order of the output is not important. Sort before checking. std::sort(b, b + block_size_); std::sort(b_ref, b_ref + block_size_); EXPECT_EQ(0, memcmp(b, b_ref, sizeof(b))); } } void SpeedTest(int times) { const int kMaxBlockSize = 32 * 32; DECLARE_ALIGNED(16, int16_t, input[kMaxBlockSize]); DECLARE_ALIGNED(16, tran_low_t, output[kMaxBlockSize]); memset(input, 1, sizeof(input)); memset(output, 0, sizeof(output)); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int i = 0; i < times; ++i) { h_func_(input, bwh_, output); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("Hadamard%dx%d[%12d runs]: %d us\n", bwh_, bwh_, times, elapsed_time); } protected: int bwh_; int block_size_; HadamardFunc h_func_; ACMRandom rnd_; }; class HadamardLowbdTest : public HadamardTestBase { protected: virtual int16_t Rand() { return rnd_.Rand9Signed(); } }; TEST_P(HadamardLowbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } TEST_P(HadamardLowbdTest, VaryStride) { VaryStride(); } TEST_P(HadamardLowbdTest, DISABLED_Speed) { SpeedTest(10); SpeedTest(10000); SpeedTest(10000000); } INSTANTIATE_TEST_CASE_P( C, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_c, 8), HadamardFuncWithSize(&vpx_hadamard_16x16_c, 16), HadamardFuncWithSize(&vpx_hadamard_32x32_c, 32))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_sse2, 8), HadamardFuncWithSize(&vpx_hadamard_16x16_sse2, 16), HadamardFuncWithSize(&vpx_hadamard_32x32_sse2, 32))); #endif // HAVE_SSE2 #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P( AVX2, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_16x16_avx2, 16), HadamardFuncWithSize(&vpx_hadamard_32x32_avx2, 32))); #endif // HAVE_AVX2 #if HAVE_SSSE3 && VPX_ARCH_X86_64 INSTANTIATE_TEST_CASE_P( SSSE3, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_ssse3, 8))); #endif // HAVE_SSSE3 && VPX_ARCH_X86_64 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_neon, 8), HadamardFuncWithSize(&vpx_hadamard_16x16_neon, 16))); #endif // HAVE_NEON // TODO(jingning): Remove highbitdepth flag when the SIMD functions are // in place and turn on the unit test. #if !CONFIG_VP9_HIGHBITDEPTH #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_msa, 8), HadamardFuncWithSize(&vpx_hadamard_16x16_msa, 16))); #endif // HAVE_MSA #endif // !CONFIG_VP9_HIGHBITDEPTH #if HAVE_VSX INSTANTIATE_TEST_CASE_P( VSX, HadamardLowbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_hadamard_8x8_vsx, 8), HadamardFuncWithSize(&vpx_hadamard_16x16_vsx, 16))); #endif // HAVE_VSX #if CONFIG_VP9_HIGHBITDEPTH class HadamardHighbdTest : public HadamardTestBase { protected: virtual int16_t Rand() { return rnd_.Rand13Signed(); } }; TEST_P(HadamardHighbdTest, CompareReferenceRandom) { CompareReferenceRandom(); } TEST_P(HadamardHighbdTest, VaryStride) { VaryStride(); } TEST_P(HadamardHighbdTest, DISABLED_Speed) { SpeedTest(10); SpeedTest(10000); SpeedTest(10000000); } INSTANTIATE_TEST_CASE_P( C, HadamardHighbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_c, 8), HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_c, 16), HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_c, 32))); #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P( AVX2, HadamardHighbdTest, ::testing::Values(HadamardFuncWithSize(&vpx_highbd_hadamard_8x8_avx2, 8), HadamardFuncWithSize(&vpx_highbd_hadamard_16x16_avx2, 16), HadamardFuncWithSize(&vpx_highbd_hadamard_32x32_avx2, 32))); #endif // HAVE_AVX2 #endif // CONFIG_VP9_HIGHBITDEPTH } // namespace libvpx-1.8.2/test/i420_video_source.h000066400000000000000000000022331357355204000174060ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_I420_VIDEO_SOURCE_H_ #define VPX_TEST_I420_VIDEO_SOURCE_H_ #include #include #include #include "test/yuv_video_source.h" namespace libvpx_test { // This class extends VideoSource to allow parsing of raw yv12 // so that we can do actual file encodes. class I420VideoSource : public YUVVideoSource { public: I420VideoSource(const std::string &file_name, unsigned int width, unsigned int height, int rate_numerator, int rate_denominator, unsigned int start, int limit) : YUVVideoSource(file_name, VPX_IMG_FMT_I420, width, height, rate_numerator, rate_denominator, start, limit) {} }; } // namespace libvpx_test #endif // VPX_TEST_I420_VIDEO_SOURCE_H_ libvpx-1.8.2/test/idct8x8_test.cc000066400000000000000000000052461357355204000166610ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "vpx/vpx_integer.h" #include "vpx_ports/msvc.h" // for round() using libvpx_test::ACMRandom; namespace { void reference_dct_1d(double input[8], double output[8]) { const double kPi = 3.141592653589793238462643383279502884; const double kInvSqrt2 = 0.707106781186547524400844362104; for (int k = 0; k < 8; k++) { output[k] = 0.0; for (int n = 0; n < 8; n++) { output[k] += input[n] * cos(kPi * (2 * n + 1) * k / 16.0); } if (k == 0) output[k] = output[k] * kInvSqrt2; } } void reference_dct_2d(int16_t input[64], double output[64]) { // First transform columns for (int i = 0; i < 8; ++i) { double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = input[j * 8 + i]; reference_dct_1d(temp_in, temp_out); for (int j = 0; j < 8; ++j) output[j * 8 + i] = temp_out[j]; } // Then transform rows for (int i = 0; i < 8; ++i) { double temp_in[8], temp_out[8]; for (int j = 0; j < 8; ++j) temp_in[j] = output[j + i * 8]; reference_dct_1d(temp_in, temp_out); for (int j = 0; j < 8; ++j) output[j + i * 8] = temp_out[j]; } // Scale by some magic number for (int i = 0; i < 64; ++i) output[i] *= 2; } TEST(VP9Idct8x8Test, AccuracyCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 10000; for (int i = 0; i < count_test_block; ++i) { int16_t input[64]; tran_low_t coeff[64]; double output_r[64]; uint8_t dst[64], src[64]; for (int j = 0; j < 64; ++j) { src[j] = rnd.Rand8(); dst[j] = rnd.Rand8(); } // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 64; ++j) input[j] = src[j] - dst[j]; reference_dct_2d(input, output_r); for (int j = 0; j < 64; ++j) { coeff[j] = static_cast(round(output_r[j])); } vpx_idct8x8_64_add_c(coeff, dst, 8); for (int j = 0; j < 64; ++j) { const int diff = dst[j] - src[j]; const int error = diff * diff; EXPECT_GE(1, error) << "Error: 8x8 FDCT/IDCT has error " << error << " at index " << j; } } } } // namespace libvpx-1.8.2/test/idct_test.cc000066400000000000000000000120401357355204000162770ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "vpx/vpx_integer.h" typedef void (*IdctFunc)(int16_t *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride); namespace { using libvpx_test::Buffer; class IDCTTest : public ::testing::TestWithParam { protected: virtual void SetUp() { UUT = GetParam(); input = new Buffer(4, 4, 0); ASSERT_TRUE(input != NULL); ASSERT_TRUE(input->Init()); predict = new Buffer(4, 4, 3); ASSERT_TRUE(predict != NULL); ASSERT_TRUE(predict->Init()); output = new Buffer(4, 4, 3); ASSERT_TRUE(output != NULL); ASSERT_TRUE(output->Init()); } virtual void TearDown() { delete input; delete predict; delete output; libvpx_test::ClearSystemState(); } IdctFunc UUT; Buffer *input; Buffer *predict; Buffer *output; }; TEST_P(IDCTTest, TestAllZeros) { // When the input is '0' the output will be '0'. input->Set(0); predict->Set(0); output->Set(0); ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(), predict->stride(), output->TopLeftPixel(), output->stride())); ASSERT_TRUE(input->CheckValues(0)); ASSERT_TRUE(input->CheckPadding()); ASSERT_TRUE(output->CheckValues(0)); ASSERT_TRUE(output->CheckPadding()); } TEST_P(IDCTTest, TestAllOnes) { input->Set(0); ASSERT_TRUE(input->TopLeftPixel() != NULL); // When the first element is '4' it will fill the output buffer with '1'. input->TopLeftPixel()[0] = 4; predict->Set(0); output->Set(0); ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(), predict->stride(), output->TopLeftPixel(), output->stride())); ASSERT_TRUE(output->CheckValues(1)); ASSERT_TRUE(output->CheckPadding()); } TEST_P(IDCTTest, TestAddOne) { // Set the transform output to '1' and make sure it gets added to the // prediction buffer. input->Set(0); ASSERT_TRUE(input->TopLeftPixel() != NULL); input->TopLeftPixel()[0] = 4; output->Set(0); uint8_t *pred = predict->TopLeftPixel(); for (int y = 0; y < 4; ++y) { for (int x = 0; x < 4; ++x) { pred[y * predict->stride() + x] = y * 4 + x; } } ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(), predict->stride(), output->TopLeftPixel(), output->stride())); uint8_t const *out = output->TopLeftPixel(); for (int y = 0; y < 4; ++y) { for (int x = 0; x < 4; ++x) { EXPECT_EQ(1 + y * 4 + x, out[y * output->stride() + x]); } } if (HasFailure()) { output->DumpBuffer(); } ASSERT_TRUE(output->CheckPadding()); } TEST_P(IDCTTest, TestWithData) { // Test a single known input. predict->Set(0); int16_t *in = input->TopLeftPixel(); for (int y = 0; y < 4; ++y) { for (int x = 0; x < 4; ++x) { in[y * input->stride() + x] = y * 4 + x; } } ASM_REGISTER_STATE_CHECK(UUT(input->TopLeftPixel(), predict->TopLeftPixel(), predict->stride(), output->TopLeftPixel(), output->stride())); uint8_t *out = output->TopLeftPixel(); for (int y = 0; y < 4; ++y) { for (int x = 0; x < 4; ++x) { switch (y * 4 + x) { case 0: EXPECT_EQ(11, out[y * output->stride() + x]); break; case 2: case 5: case 8: EXPECT_EQ(3, out[y * output->stride() + x]); break; case 10: EXPECT_EQ(1, out[y * output->stride() + x]); break; default: EXPECT_EQ(0, out[y * output->stride() + x]); } } } if (HasFailure()) { output->DumpBuffer(); } ASSERT_TRUE(output->CheckPadding()); } INSTANTIATE_TEST_CASE_P(C, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_c)); #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_neon)); #endif // HAVE_NEON #if HAVE_MMX INSTANTIATE_TEST_CASE_P(MMX, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_mmx)); #endif // HAVE_MMX #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_msa)); #endif // HAVE_MSA #if HAVE_MMI INSTANTIATE_TEST_CASE_P(MMI, IDCTTest, ::testing::Values(vp8_short_idct4x4llm_mmi)); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/invalid_file_test.cc000066400000000000000000000174141357355204000200130ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/ivf_video_source.h" #include "test/util.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif #include "vpx_mem/vpx_mem.h" namespace { struct DecodeParam { int threads; const char *filename; }; std::ostream &operator<<(std::ostream &os, const DecodeParam &dp) { return os << "threads: " << dp.threads << " file: " << dp.filename; } class InvalidFileTest : public ::libvpx_test::DecoderTest, public ::libvpx_test::CodecTestWithParam { protected: InvalidFileTest() : DecoderTest(GET_PARAM(0)), res_file_(NULL) {} virtual ~InvalidFileTest() { if (res_file_ != NULL) fclose(res_file_); } void OpenResFile(const std::string &res_file_name_) { res_file_ = libvpx_test::OpenTestDataFile(res_file_name_); ASSERT_TRUE(res_file_ != NULL) << "Result file open failed. Filename: " << res_file_name_; } virtual bool HandleDecodeResult( const vpx_codec_err_t res_dec, const libvpx_test::CompressedVideoSource &video, libvpx_test::Decoder *decoder) { EXPECT_TRUE(res_file_ != NULL); int expected_res_dec; // Read integer result. const int res = fscanf(res_file_, "%d", &expected_res_dec); EXPECT_NE(res, EOF) << "Read result data failed"; // Check results match. const DecodeParam input = GET_PARAM(1); if (input.threads > 1) { // The serial decode check is too strict for tile-threaded decoding as // there is no guarantee on the decode order nor which specific error // will take precedence. Currently a tile-level error is not forwarded so // the frame will simply be marked corrupt. EXPECT_TRUE(res_dec == expected_res_dec || res_dec == VPX_CODEC_CORRUPT_FRAME) << "Results don't match: frame number = " << video.frame_number() << ". (" << decoder->DecodeError() << "). Expected: " << expected_res_dec << " or " << VPX_CODEC_CORRUPT_FRAME; } else { EXPECT_EQ(expected_res_dec, res_dec) << "Results don't match: frame number = " << video.frame_number() << ". (" << decoder->DecodeError() << ")"; } return !HasFailure(); } void RunTest() { const DecodeParam input = GET_PARAM(1); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); cfg.threads = input.threads; const std::string filename = input.filename; // Open compressed video file. std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else if (filename.substr(filename.length() - 4, 4) == "webm") { #if CONFIG_WEBM_IO video.reset(new libvpx_test::WebMVideoSource(filename)); #else fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", filename.c_str()); return; #endif } ASSERT_TRUE(video.get() != NULL); video->Init(); // Construct result file name. The file holds a list of expected integer // results, one for each decoded frame. Any result that doesn't match // the files list will cause a test failure. const std::string res_filename = filename + ".res"; OpenResFile(res_filename); // Decode frame, and check the md5 matching. ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg)); } private: FILE *res_file_; }; TEST_P(InvalidFileTest, ReturnCode) { RunTest(); } #if CONFIG_VP8_DECODER const DecodeParam kVP8InvalidFileTests[] = { { 1, "invalid-bug-1443.ivf" }, { 1, "invalid-token-partition.ivf" }, { 1, "invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf" }, }; VP8_INSTANTIATE_TEST_CASE(InvalidFileTest, ::testing::ValuesIn(kVP8InvalidFileTests)); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER const DecodeParam kVP9InvalidFileTests[] = { { 1, "invalid-vp90-02-v2.webm" }, #if CONFIG_VP9_HIGHBITDEPTH { 1, "invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf" }, { 1, "invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-." "ivf" }, #endif { 1, "invalid-vp90-03-v3.webm" }, { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf" }, // This file will cause a large allocation which is expected to fail in 32-bit // environments. Test x86 for coverage purposes as the allocation failure will // be in platform agnostic code. #if VPX_ARCH_X86 { 1, "invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf" }, #endif { 1, "invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf" }, { 1, "invalid-vp91-2-mixedrefcsp-444to420.ivf" }, { 1, "invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf" }, { 1, "invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf" }, { 1, "invalid-crbug-667044.webm" }, }; VP9_INSTANTIATE_TEST_CASE(InvalidFileTest, ::testing::ValuesIn(kVP9InvalidFileTests)); #endif // CONFIG_VP9_DECODER // This class will include test vectors that are expected to fail // peek. However they are still expected to have no fatal failures. class InvalidFileInvalidPeekTest : public InvalidFileTest { protected: InvalidFileInvalidPeekTest() : InvalidFileTest() {} virtual void HandlePeekResult(libvpx_test::Decoder *const /*decoder*/, libvpx_test::CompressedVideoSource * /*video*/, const vpx_codec_err_t /*res_peek*/) {} }; TEST_P(InvalidFileInvalidPeekTest, ReturnCode) { RunTest(); } #if CONFIG_VP8_DECODER const DecodeParam kVP8InvalidPeekTests[] = { { 1, "invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf" }, }; VP8_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest, ::testing::ValuesIn(kVP8InvalidPeekTests)); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER const DecodeParam kVP9InvalidFileInvalidPeekTests[] = { { 1, "invalid-vp90-01-v3.webm" }, }; VP9_INSTANTIATE_TEST_CASE(InvalidFileInvalidPeekTest, ::testing::ValuesIn(kVP9InvalidFileInvalidPeekTests)); const DecodeParam kMultiThreadedVP9InvalidFileTests[] = { { 4, "invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm" }, { 4, "invalid-" "vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf" }, { 4, "invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf" }, { 2, "invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf" }, { 4, "invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf" }, { 2, "invalid-crbug-629481.webm" }, { 3, "invalid-crbug-1558.ivf" }, { 4, "invalid-crbug-1562.ivf" }, }; INSTANTIATE_TEST_CASE_P( VP9MultiThreaded, InvalidFileTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP9)), ::testing::ValuesIn(kMultiThreadedVP9InvalidFileTests))); #endif // CONFIG_VP9_DECODER } // namespace libvpx-1.8.2/test/ivf_video_source.h000066400000000000000000000063501357355204000175200ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_IVF_VIDEO_SOURCE_H_ #define VPX_TEST_IVF_VIDEO_SOURCE_H_ #include #include #include #include #include "test/video_source.h" namespace libvpx_test { const unsigned int kCodeBufferSize = 256 * 1024 * 1024; const unsigned int kIvfFileHdrSize = 32; const unsigned int kIvfFrameHdrSize = 12; static unsigned int MemGetLe32(const uint8_t *mem) { return (mem[3] << 24) | (mem[2] << 16) | (mem[1] << 8) | (mem[0]); } // This class extends VideoSource to allow parsing of ivf files, // so that we can do actual file decodes. class IVFVideoSource : public CompressedVideoSource { public: explicit IVFVideoSource(const std::string &file_name) : file_name_(file_name), input_file_(NULL), compressed_frame_buf_(NULL), frame_sz_(0), frame_(0), end_of_file_(false) {} virtual ~IVFVideoSource() { delete[] compressed_frame_buf_; if (input_file_) fclose(input_file_); } virtual void Init() { // Allocate a buffer for read in the compressed video frame. compressed_frame_buf_ = new uint8_t[libvpx_test::kCodeBufferSize]; ASSERT_TRUE(compressed_frame_buf_ != NULL) << "Allocate frame buffer failed"; } virtual void Begin() { input_file_ = OpenTestDataFile(file_name_); ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " << file_name_; // Read file header uint8_t file_hdr[kIvfFileHdrSize]; ASSERT_EQ(kIvfFileHdrSize, fread(file_hdr, 1, kIvfFileHdrSize, input_file_)) << "File header read failed."; // Check file header ASSERT_TRUE(file_hdr[0] == 'D' && file_hdr[1] == 'K' && file_hdr[2] == 'I' && file_hdr[3] == 'F') << "Input is not an IVF file."; FillFrame(); } virtual void Next() { ++frame_; FillFrame(); } void FillFrame() { ASSERT_TRUE(input_file_ != NULL); uint8_t frame_hdr[kIvfFrameHdrSize]; // Check frame header and read a frame from input_file. if (fread(frame_hdr, 1, kIvfFrameHdrSize, input_file_) != kIvfFrameHdrSize) { end_of_file_ = true; } else { end_of_file_ = false; frame_sz_ = MemGetLe32(frame_hdr); ASSERT_LE(frame_sz_, kCodeBufferSize) << "Frame is too big for allocated code buffer"; ASSERT_EQ(frame_sz_, fread(compressed_frame_buf_, 1, frame_sz_, input_file_)) << "Failed to read complete frame"; } } virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : compressed_frame_buf_; } virtual size_t frame_size() const { return frame_sz_; } virtual unsigned int frame_number() const { return frame_; } protected: std::string file_name_; FILE *input_file_; uint8_t *compressed_frame_buf_; size_t frame_sz_; unsigned int frame_; bool end_of_file_; }; } // namespace libvpx_test #endif // VPX_TEST_IVF_VIDEO_SOURCE_H_ libvpx-1.8.2/test/keyframe_test.cc000066400000000000000000000115071357355204000171660ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class KeyframeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: KeyframeTest() : EncoderTest(GET_PARAM(0)) {} virtual ~KeyframeTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); kf_count_ = 0; kf_count_max_ = INT_MAX; kf_do_force_kf_ = false; set_cpu_used_ = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (kf_do_force_kf_) { frame_flags_ = (video->frame() % 3) ? 0 : VPX_EFLAG_FORCE_KF; } if (set_cpu_used_ && video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); } } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.frame.flags & VPX_FRAME_IS_KEY) { kf_pts_list_.push_back(pkt->data.frame.pts); kf_count_++; abort_ |= kf_count_ > kf_count_max_; } } bool kf_do_force_kf_; int kf_count_; int kf_count_max_; std::vector kf_pts_list_; int set_cpu_used_; }; TEST_P(KeyframeTest, TestRandomVideoSource) { // Validate that encoding the RandomVideoSource produces multiple keyframes. // This validates the results of the TestDisableKeyframes test. kf_count_max_ = 2; // early exit successful tests. ::libvpx_test::RandomVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // In realtime mode - auto placed keyframes are exceedingly rare, don't // bother with this check if(GetParam() > 0) if (GET_PARAM(1) > 0) { EXPECT_GT(kf_count_, 1); } } TEST_P(KeyframeTest, TestDisableKeyframes) { cfg_.kf_mode = VPX_KF_DISABLED; kf_count_max_ = 1; // early exit failed tests. ::libvpx_test::RandomVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_EQ(1, kf_count_); } TEST_P(KeyframeTest, TestForceKeyframe) { cfg_.kf_mode = VPX_KF_DISABLED; kf_do_force_kf_ = true; ::libvpx_test::DummyVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // verify that every third frame is a keyframe. for (std::vector::const_iterator iter = kf_pts_list_.begin(); iter != kf_pts_list_.end(); ++iter) { ASSERT_EQ(0, *iter % 3) << "Unexpected keyframe at frame " << *iter; } } TEST_P(KeyframeTest, TestKeyframeMaxDistance) { cfg_.kf_max_dist = 25; ::libvpx_test::DummyVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // verify that keyframe interval matches kf_max_dist for (std::vector::const_iterator iter = kf_pts_list_.begin(); iter != kf_pts_list_.end(); ++iter) { ASSERT_EQ(0, *iter % 25) << "Unexpected keyframe at frame " << *iter; } } TEST_P(KeyframeTest, TestAutoKeyframe) { cfg_.kf_mode = VPX_KF_AUTO; kf_do_force_kf_ = false; // Force a deterministic speed step in Real Time mode, as the faster modes // may not produce a keyframe like we expect. This is necessary when running // on very slow environments (like Valgrind). The step -11 was determined // experimentally as the fastest mode that still throws the keyframe. if (deadline_ == VPX_DL_REALTIME) set_cpu_used_ = -11; // This clip has a cut scene every 30 frames -> Frame 0, 30, 60, 90, 120. // I check only the first 40 frames to make sure there's a keyframe at frame // 0 and 30. ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 40); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // In realtime mode - auto placed keyframes are exceedingly rare, don't // bother with this check if (GET_PARAM(1) > 0) { EXPECT_EQ(2u, kf_pts_list_.size()) << " Not the right number of keyframes "; } // Verify that keyframes match the file keyframes in the file. for (std::vector::const_iterator iter = kf_pts_list_.begin(); iter != kf_pts_list_.end(); ++iter) { if (deadline_ == VPX_DL_REALTIME && *iter > 0) EXPECT_EQ(0, (*iter - 1) % 30) << "Unexpected keyframe at frame " << *iter; else EXPECT_EQ(0, *iter % 30) << "Unexpected keyframe at frame " << *iter; } } VP8_INSTANTIATE_TEST_CASE(KeyframeTest, ALL_TEST_MODES); } // namespace libvpx-1.8.2/test/level_test.cc000066400000000000000000000123541357355204000164730ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { class LevelTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: LevelTest() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), cpu_used_(GET_PARAM(2)), min_gf_internal_(24), target_level_(0), level_(0) {} virtual ~LevelTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { cfg_.g_lag_in_frames = 25; cfg_.rc_end_usage = VPX_VBR; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; } cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_target_bitrate = 400; cfg_.rc_max_quantizer = 63; cfg_.rc_min_quantizer = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP9E_SET_TARGET_LEVEL, target_level_); encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_gf_internal_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } encoder->Control(VP9E_GET_LEVEL, &level_); ASSERT_LE(level_, 51); ASSERT_GE(level_, 0); } ::libvpx_test::TestMode encoding_mode_; int cpu_used_; int min_gf_internal_; int target_level_; int level_; }; TEST_P(LevelTest, TestTargetLevel11Large) { ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime); ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 60); target_level_ = 11; cfg_.rc_target_bitrate = 150; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(target_level_, level_); } TEST_P(LevelTest, TestTargetLevel20Large) { ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime); ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 60); target_level_ = 20; cfg_.rc_target_bitrate = 1200; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(target_level_, level_); } TEST_P(LevelTest, TestTargetLevel31Large) { ASSERT_NE(encoding_mode_, ::libvpx_test::kRealTime); ::libvpx_test::I420VideoSource video("niklas_1280_720_30.y4m", 1280, 720, 30, 1, 0, 60); target_level_ = 31; cfg_.rc_target_bitrate = 8000; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(target_level_, level_); } // Test for keeping level stats only TEST_P(LevelTest, TestTargetLevel0) { ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 40); target_level_ = 0; min_gf_internal_ = 4; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(11, level_); cfg_.rc_target_bitrate = 1600; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(20, level_); } // Test for level control being turned off TEST_P(LevelTest, TestTargetLevel255) { ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 30); target_level_ = 255; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(LevelTest, TestTargetLevelApi) { ::libvpx_test::I420VideoSource video("hantro_odd.yuv", 208, 144, 30, 1, 0, 1); static const vpx_codec_iface_t *codec = &vpx_codec_vp9_cx_algo; vpx_codec_ctx_t enc; vpx_codec_enc_cfg_t cfg; EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_config_default(codec, &cfg, 0)); cfg.rc_target_bitrate = 100; EXPECT_EQ(VPX_CODEC_OK, vpx_codec_enc_init(&enc, codec, &cfg, 0)); for (int level = 0; level <= 256; ++level) { if (level == 10 || level == 11 || level == 20 || level == 21 || level == 30 || level == 31 || level == 40 || level == 41 || level == 50 || level == 51 || level == 52 || level == 60 || level == 61 || level == 62 || level == 0 || level == 1 || level == 255) EXPECT_EQ(VPX_CODEC_OK, vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level)); else EXPECT_EQ(VPX_CODEC_INVALID_PARAM, vpx_codec_control(&enc, VP9E_SET_TARGET_LEVEL, level)); } EXPECT_EQ(VPX_CODEC_OK, vpx_codec_destroy(&enc)); } VP9_INSTANTIATE_TEST_CASE(LevelTest, ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood), ::testing::Range(0, 9)); } // namespace libvpx-1.8.2/test/lpf_test.cc000066400000000000000000000773501357355204000161540ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx/vpx_integer.h" using libvpx_test::ACMRandom; namespace { // Horizontally and Vertically need 32x32: 8 Coeffs preceeding filtered section // 16 Coefs within filtered section // 8 Coeffs following filtered section const int kNumCoeffs = 1024; const int number_of_iterations = 10000; #if CONFIG_VP9_HIGHBITDEPTH typedef uint16_t Pixel; #define PIXEL_WIDTH 16 typedef void (*loop_op_t)(Pixel *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd); typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd); #else typedef uint8_t Pixel; #define PIXEL_WIDTH 8 typedef void (*loop_op_t)(Pixel *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh); typedef void (*dual_loop_op_t)(Pixel *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1); #endif // CONFIG_VP9_HIGHBITDEPTH typedef std::tuple loop8_param_t; typedef std::tuple dualloop8_param_t; void InitInput(Pixel *s, Pixel *ref_s, ACMRandom *rnd, const uint8_t limit, const int mask, const int32_t p, const int i) { uint16_t tmp_s[kNumCoeffs]; for (int j = 0; j < kNumCoeffs;) { const uint8_t val = rnd->Rand8(); if (val & 0x80) { // 50% chance to choose a new value. tmp_s[j] = rnd->Rand16(); j++; } else { // 50% chance to repeat previous value in row X times. int k = 0; while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { if (j < 1) { tmp_s[j] = rnd->Rand16(); } else if (val & 0x20) { // Increment by a value within the limit. tmp_s[j] = static_cast(tmp_s[j - 1] + (limit - 1)); } else { // Decrement by a value within the limit. tmp_s[j] = static_cast(tmp_s[j - 1] - (limit - 1)); } j++; } } } for (int j = 0; j < kNumCoeffs;) { const uint8_t val = rnd->Rand8(); if (val & 0x80) { j++; } else { // 50% chance to repeat previous value in column X times. int k = 0; while (k++ < ((val & 0x1f) + 1) && j < kNumCoeffs) { if (j < 1) { tmp_s[j] = rnd->Rand16(); } else if (val & 0x20) { // Increment by a value within the limit. tmp_s[(j % 32) * 32 + j / 32] = static_cast( tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] + (limit - 1)); } else { // Decrement by a value within the limit. tmp_s[(j % 32) * 32 + j / 32] = static_cast( tmp_s[((j - 1) % 32) * 32 + (j - 1) / 32] - (limit - 1)); } j++; } } } for (int j = 0; j < kNumCoeffs; j++) { if (i % 2) { s[j] = tmp_s[j] & mask; } else { s[j] = tmp_s[p * (j % p) + j / p] & mask; } ref_s[j] = s[j]; } } uint8_t GetOuterThresh(ACMRandom *rnd) { return static_cast(rnd->RandRange(3 * MAX_LOOP_FILTER + 5)); } uint8_t GetInnerThresh(ACMRandom *rnd) { return static_cast(rnd->RandRange(MAX_LOOP_FILTER + 1)); } uint8_t GetHevThresh(ACMRandom *rnd) { return static_cast(rnd->RandRange(MAX_LOOP_FILTER + 1) >> 4); } class Loop8Test6Param : public ::testing::TestWithParam { public: virtual ~Loop8Test6Param() {} virtual void SetUp() { loopfilter_op_ = GET_PARAM(0); ref_loopfilter_op_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: int bit_depth_; int mask_; loop_op_t loopfilter_op_; loop_op_t ref_loopfilter_op_; }; class Loop8Test9Param : public ::testing::TestWithParam { public: virtual ~Loop8Test9Param() {} virtual void SetUp() { loopfilter_op_ = GET_PARAM(0); ref_loopfilter_op_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); mask_ = (1 << bit_depth_) - 1; } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: int bit_depth_; int mask_; dual_loop_op_t loopfilter_op_; dual_loop_op_t ref_loopfilter_op_; }; TEST_P(Loop8Test6Param, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = number_of_iterations; const int32_t p = kNumCoeffs / 32; DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]); DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]); int err_count_total = 0; int first_failure = -1; for (int i = 0; i < count_test_block; ++i) { int err_count = 0; uint8_t tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; InitInput(s, ref_s, &rnd, *limit, mask_, p, i); #if CONFIG_VP9_HIGHBITDEPTH ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_); ASM_REGISTER_STATE_CHECK( loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_)); #else ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; } if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Loop8Test6Param, C output doesn't match SSE2 " "loopfilter output. " << "First failed at test case " << first_failure; } TEST_P(Loop8Test6Param, ValueCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = number_of_iterations; DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]); DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]); int err_count_total = 0; int first_failure = -1; // NOTE: The code in vp9_loopfilter.c:update_sharpness computes mblim as a // function of sharpness_lvl and the loopfilter lvl as: // block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); // ... // memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), // SIMD_WIDTH); // This means that the largest value for mblim will occur when sharpness_lvl // is equal to 0, and lvl is equal to its greatest value (MAX_LOOP_FILTER). // In this case block_inside_limit will be equal to MAX_LOOP_FILTER and // therefore mblim will be equal to (2 * (lvl + 2) + block_inside_limit) = // 2 * (MAX_LOOP_FILTER + 2) + MAX_LOOP_FILTER = 3 * MAX_LOOP_FILTER + 4 for (int i = 0; i < count_test_block; ++i) { int err_count = 0; uint8_t tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; int32_t p = kNumCoeffs / 32; for (int j = 0; j < kNumCoeffs; ++j) { s[j] = rnd.Rand16() & mask_; ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_); ASM_REGISTER_STATE_CHECK( loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh, bit_depth_)); #else ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit, limit, thresh); ASM_REGISTER_STATE_CHECK( loopfilter_op_(s + 8 + p * 8, p, blimit, limit, thresh)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; } if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Loop8Test6Param, C output doesn't match SSE2 " "loopfilter output. " << "First failed at test case " << first_failure; } TEST_P(Loop8Test9Param, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = number_of_iterations; DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]); DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]); int err_count_total = 0; int first_failure = -1; for (int i = 0; i < count_test_block; ++i) { int err_count = 0; uint8_t tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; int32_t p = kNumCoeffs / 32; const uint8_t limit = *limit0 < *limit1 ? *limit0 : *limit1; InitInput(s, ref_s, &rnd, limit, mask_, p, i); #if CONFIG_VP9_HIGHBITDEPTH ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bit_depth_); ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bit_depth_)); #else ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1); ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; } if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Loop8Test9Param, C output doesn't match SSE2 " "loopfilter output. " << "First failed at test case " << first_failure; } TEST_P(Loop8Test9Param, ValueCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = number_of_iterations; DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, s[kNumCoeffs]); DECLARE_ALIGNED(PIXEL_WIDTH, Pixel, ref_s[kNumCoeffs]); int err_count_total = 0; int first_failure = -1; for (int i = 0; i < count_test_block; ++i) { int err_count = 0; uint8_t tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh0[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetOuterThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, blimit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetInnerThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, limit1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; tmp = GetHevThresh(&rnd); DECLARE_ALIGNED(16, const uint8_t, thresh1[16]) = { tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp, tmp }; int32_t p = kNumCoeffs / 32; // TODO(pdlf) can we have non-square here? for (int j = 0; j < kNumCoeffs; ++j) { s[j] = rnd.Rand16() & mask_; ref_s[j] = s[j]; } #if CONFIG_VP9_HIGHBITDEPTH ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bit_depth_); ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bit_depth_)); #else ref_loopfilter_op_(ref_s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1); ASM_REGISTER_STATE_CHECK(loopfilter_op_(s + 8 + p * 8, p, blimit0, limit0, thresh0, blimit1, limit1, thresh1)); #endif // CONFIG_VP9_HIGHBITDEPTH for (int j = 0; j < kNumCoeffs; ++j) { err_count += ref_s[j] != s[j]; } if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Loop8Test9Param, C output doesn't match SSE2" "loopfilter output. " << "First failed at test case " << first_failure; } using std::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 8), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, &vpx_highbd_lpf_vertical_4_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, &vpx_highbd_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 8), make_tuple(&vpx_highbd_lpf_vertical_16_sse2, &vpx_highbd_lpf_vertical_16_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 10), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, &vpx_highbd_lpf_vertical_4_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, &vpx_highbd_lpf_horizontal_16_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 10), make_tuple(&vpx_highbd_lpf_vertical_16_sse2, &vpx_highbd_lpf_vertical_16_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_4_sse2, &vpx_highbd_lpf_horizontal_4_c, 12), make_tuple(&vpx_highbd_lpf_vertical_4_sse2, &vpx_highbd_lpf_vertical_4_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_8_sse2, &vpx_highbd_lpf_horizontal_8_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_16_sse2, &vpx_highbd_lpf_horizontal_16_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_sse2, &vpx_highbd_lpf_horizontal_16_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_8_sse2, &vpx_highbd_lpf_vertical_8_c, 12), make_tuple(&vpx_highbd_lpf_vertical_16_sse2, &vpx_highbd_lpf_vertical_16_c, 12), make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, &vpx_highbd_lpf_vertical_16_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, &vpx_highbd_lpf_vertical_16_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_16_dual_sse2, &vpx_highbd_lpf_vertical_16_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_sse2, &vpx_lpf_horizontal_4_c, 8), make_tuple(&vpx_lpf_horizontal_8_sse2, &vpx_lpf_horizontal_8_c, 8), make_tuple(&vpx_lpf_horizontal_16_sse2, &vpx_lpf_horizontal_16_c, 8), make_tuple(&vpx_lpf_horizontal_16_dual_sse2, &vpx_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_sse2, &vpx_lpf_vertical_4_c, 8), make_tuple(&vpx_lpf_vertical_8_sse2, &vpx_lpf_vertical_8_c, 8), make_tuple(&vpx_lpf_vertical_16_sse2, &vpx_lpf_vertical_16_c, 8), make_tuple(&vpx_lpf_vertical_16_dual_sse2, &vpx_lpf_vertical_16_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif #if HAVE_AVX2 && (!CONFIG_VP9_HIGHBITDEPTH) INSTANTIATE_TEST_CASE_P( AVX2, Loop8Test6Param, ::testing::Values(make_tuple(&vpx_lpf_horizontal_16_avx2, &vpx_lpf_horizontal_16_c, 8), make_tuple(&vpx_lpf_horizontal_16_dual_avx2, &vpx_lpf_horizontal_16_dual_c, 8))); #endif #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, &vpx_highbd_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, &vpx_highbd_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, &vpx_highbd_lpf_vertical_4_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, &vpx_highbd_lpf_vertical_8_dual_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, &vpx_highbd_lpf_horizontal_4_dual_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, &vpx_highbd_lpf_horizontal_8_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, &vpx_highbd_lpf_vertical_4_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, &vpx_highbd_lpf_vertical_8_dual_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_4_dual_sse2, &vpx_highbd_lpf_horizontal_4_dual_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_sse2, &vpx_highbd_lpf_horizontal_8_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_4_dual_sse2, &vpx_highbd_lpf_vertical_4_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_8_dual_sse2, &vpx_highbd_lpf_vertical_8_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( SSE2, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_sse2, &vpx_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_lpf_horizontal_8_dual_sse2, &vpx_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_dual_sse2, &vpx_lpf_vertical_4_dual_c, 8), make_tuple(&vpx_lpf_vertical_8_dual_sse2, &vpx_lpf_vertical_8_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif #if HAVE_NEON #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( NEON, Loop8Test6Param, ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_neon, &vpx_highbd_lpf_horizontal_4_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_4_neon, &vpx_highbd_lpf_horizontal_4_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_4_neon, &vpx_highbd_lpf_horizontal_4_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_8_neon, &vpx_highbd_lpf_horizontal_8_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_8_neon, &vpx_highbd_lpf_horizontal_8_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_8_neon, &vpx_highbd_lpf_horizontal_8_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_16_neon, &vpx_highbd_lpf_horizontal_16_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_16_neon, &vpx_highbd_lpf_horizontal_16_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_16_neon, &vpx_highbd_lpf_horizontal_16_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon, &vpx_highbd_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon, &vpx_highbd_lpf_horizontal_16_dual_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_16_dual_neon, &vpx_highbd_lpf_horizontal_16_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_4_neon, &vpx_highbd_lpf_vertical_4_c, 8), make_tuple(&vpx_highbd_lpf_vertical_4_neon, &vpx_highbd_lpf_vertical_4_c, 10), make_tuple(&vpx_highbd_lpf_vertical_4_neon, &vpx_highbd_lpf_vertical_4_c, 12), make_tuple(&vpx_highbd_lpf_vertical_8_neon, &vpx_highbd_lpf_vertical_8_c, 8), make_tuple(&vpx_highbd_lpf_vertical_8_neon, &vpx_highbd_lpf_vertical_8_c, 10), make_tuple(&vpx_highbd_lpf_vertical_8_neon, &vpx_highbd_lpf_vertical_8_c, 12), make_tuple(&vpx_highbd_lpf_vertical_16_neon, &vpx_highbd_lpf_vertical_16_c, 8), make_tuple(&vpx_highbd_lpf_vertical_16_neon, &vpx_highbd_lpf_vertical_16_c, 10), make_tuple(&vpx_highbd_lpf_vertical_16_neon, &vpx_highbd_lpf_vertical_16_c, 12), make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon, &vpx_highbd_lpf_vertical_16_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon, &vpx_highbd_lpf_vertical_16_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_16_dual_neon, &vpx_highbd_lpf_vertical_16_dual_c, 12))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon, &vpx_highbd_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon, &vpx_highbd_lpf_horizontal_4_dual_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_4_dual_neon, &vpx_highbd_lpf_horizontal_4_dual_c, 12), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon, &vpx_highbd_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon, &vpx_highbd_lpf_horizontal_8_dual_c, 10), make_tuple(&vpx_highbd_lpf_horizontal_8_dual_neon, &vpx_highbd_lpf_horizontal_8_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon, &vpx_highbd_lpf_vertical_4_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon, &vpx_highbd_lpf_vertical_4_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_4_dual_neon, &vpx_highbd_lpf_vertical_4_dual_c, 12), make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon, &vpx_highbd_lpf_vertical_8_dual_c, 8), make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon, &vpx_highbd_lpf_vertical_8_dual_c, 10), make_tuple(&vpx_highbd_lpf_vertical_8_dual_neon, &vpx_highbd_lpf_vertical_8_dual_c, 12))); #else INSTANTIATE_TEST_CASE_P( NEON, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_16_neon, &vpx_lpf_horizontal_16_c, 8), make_tuple(&vpx_lpf_horizontal_16_dual_neon, &vpx_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_lpf_vertical_16_neon, &vpx_lpf_vertical_16_c, 8), make_tuple(&vpx_lpf_vertical_16_dual_neon, &vpx_lpf_vertical_16_dual_c, 8), make_tuple(&vpx_lpf_horizontal_8_neon, &vpx_lpf_horizontal_8_c, 8), make_tuple(&vpx_lpf_vertical_8_neon, &vpx_lpf_vertical_8_c, 8), make_tuple(&vpx_lpf_horizontal_4_neon, &vpx_lpf_horizontal_4_c, 8), make_tuple(&vpx_lpf_vertical_4_neon, &vpx_lpf_vertical_4_c, 8))); INSTANTIATE_TEST_CASE_P( NEON, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_lpf_horizontal_8_dual_neon, &vpx_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_lpf_vertical_8_dual_neon, &vpx_lpf_vertical_8_dual_c, 8), make_tuple(&vpx_lpf_horizontal_4_dual_neon, &vpx_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_dual_neon, &vpx_lpf_vertical_4_dual_c, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_NEON #if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_dspr2, &vpx_lpf_horizontal_4_c, 8), make_tuple(&vpx_lpf_horizontal_8_dspr2, &vpx_lpf_horizontal_8_c, 8), make_tuple(&vpx_lpf_horizontal_16_dspr2, &vpx_lpf_horizontal_16_c, 8), make_tuple(&vpx_lpf_horizontal_16_dual_dspr2, &vpx_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_dspr2, &vpx_lpf_vertical_4_c, 8), make_tuple(&vpx_lpf_vertical_8_dspr2, &vpx_lpf_vertical_8_c, 8), make_tuple(&vpx_lpf_vertical_16_dspr2, &vpx_lpf_vertical_16_c, 8), make_tuple(&vpx_lpf_vertical_16_dual_dspr2, &vpx_lpf_vertical_16_dual_c, 8))); INSTANTIATE_TEST_CASE_P( DSPR2, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_dspr2, &vpx_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_lpf_horizontal_8_dual_dspr2, &vpx_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_dual_dspr2, &vpx_lpf_vertical_4_dual_c, 8), make_tuple(&vpx_lpf_vertical_8_dual_dspr2, &vpx_lpf_vertical_8_dual_c, 8))); #endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) INSTANTIATE_TEST_CASE_P( MSA, Loop8Test6Param, ::testing::Values( make_tuple(&vpx_lpf_horizontal_4_msa, &vpx_lpf_horizontal_4_c, 8), make_tuple(&vpx_lpf_horizontal_8_msa, &vpx_lpf_horizontal_8_c, 8), make_tuple(&vpx_lpf_horizontal_16_msa, &vpx_lpf_horizontal_16_c, 8), make_tuple(&vpx_lpf_horizontal_16_dual_msa, &vpx_lpf_horizontal_16_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_msa, &vpx_lpf_vertical_4_c, 8), make_tuple(&vpx_lpf_vertical_8_msa, &vpx_lpf_vertical_8_c, 8), make_tuple(&vpx_lpf_vertical_16_msa, &vpx_lpf_vertical_16_c, 8))); INSTANTIATE_TEST_CASE_P( MSA, Loop8Test9Param, ::testing::Values(make_tuple(&vpx_lpf_horizontal_4_dual_msa, &vpx_lpf_horizontal_4_dual_c, 8), make_tuple(&vpx_lpf_horizontal_8_dual_msa, &vpx_lpf_horizontal_8_dual_c, 8), make_tuple(&vpx_lpf_vertical_4_dual_msa, &vpx_lpf_vertical_4_dual_c, 8), make_tuple(&vpx_lpf_vertical_8_dual_msa, &vpx_lpf_vertical_8_dual_c, 8))); #endif // HAVE_MSA && (!CONFIG_VP9_HIGHBITDEPTH) } // namespace libvpx-1.8.2/test/md5_helper.h000066400000000000000000000041331357355204000162070ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_MD5_HELPER_H_ #define VPX_TEST_MD5_HELPER_H_ #include "./md5_utils.h" #include "vpx/vpx_decoder.h" namespace libvpx_test { class MD5 { public: MD5() { MD5Init(&md5_); } void Add(const vpx_image_t *img) { for (int plane = 0; plane < 3; ++plane) { const uint8_t *buf = img->planes[plane]; // Calculate the width and height to do the md5 check. For the chroma // plane, we never want to round down and thus skip a pixel so if // we are shifting by 1 (chroma_shift) we add 1 before doing the shift. // This works only for chroma_shift of 0 and 1. const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; const int h = plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift : img->d_h; const int w = (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift : img->d_w) * bytes_per_sample; for (int y = 0; y < h; ++y) { MD5Update(&md5_, buf, w); buf += img->stride[plane]; } } } void Add(const uint8_t *data, size_t size) { MD5Update(&md5_, data, static_cast(size)); } const char *Get(void) { static const char hex[16] = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', }; uint8_t tmp[16]; MD5Context ctx_tmp = md5_; MD5Final(tmp, &ctx_tmp); for (int i = 0; i < 16; i++) { res_[i * 2 + 0] = hex[tmp[i] >> 4]; res_[i * 2 + 1] = hex[tmp[i] & 0xf]; } res_[32] = 0; return res_; } protected: char res_[33]; MD5Context md5_; }; } // namespace libvpx_test #endif // VPX_TEST_MD5_HELPER_H_ libvpx-1.8.2/test/minmax_test.cc000066400000000000000000000071741357355204000166610ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "test/acm_random.h" #include "test/register_state_check.h" namespace { using ::libvpx_test::ACMRandom; typedef void (*MinMaxFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int *min, int *max); class MinMaxTest : public ::testing::TestWithParam { public: virtual void SetUp() { mm_func_ = GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); } protected: MinMaxFunc mm_func_; ACMRandom rnd_; }; void reference_minmax(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int *min_ret, int *max_ret) { int min = 255; int max = 0; for (int i = 0; i < 8; i++) { for (int j = 0; j < 8; j++) { const int diff = abs(a[i * a_stride + j] - b[i * b_stride + j]); if (min > diff) min = diff; if (max < diff) max = diff; } } *min_ret = min; *max_ret = max; } TEST_P(MinMaxTest, MinValue) { for (int i = 0; i < 64; i++) { uint8_t a[64], b[64]; memset(a, 0, sizeof(a)); memset(b, 255, sizeof(b)); b[i] = i; // Set a minimum difference of i. int min, max; ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); EXPECT_EQ(255, max); EXPECT_EQ(i, min); } } TEST_P(MinMaxTest, MaxValue) { for (int i = 0; i < 64; i++) { uint8_t a[64], b[64]; memset(a, 0, sizeof(a)); memset(b, 0, sizeof(b)); b[i] = i; // Set a maximum difference of i. int min, max; ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); EXPECT_EQ(i, max); EXPECT_EQ(0, min); } } TEST_P(MinMaxTest, CompareReference) { uint8_t a[64], b[64]; for (int j = 0; j < 64; j++) { a[j] = rnd_.Rand8(); b[j] = rnd_.Rand8(); } int min_ref, max_ref, min, max; reference_minmax(a, 8, b, 8, &min_ref, &max_ref); ASM_REGISTER_STATE_CHECK(mm_func_(a, 8, b, 8, &min, &max)); EXPECT_EQ(max_ref, max); EXPECT_EQ(min_ref, min); } TEST_P(MinMaxTest, CompareReferenceAndVaryStride) { uint8_t a[8 * 64], b[8 * 64]; for (int i = 0; i < 8 * 64; i++) { a[i] = rnd_.Rand8(); b[i] = rnd_.Rand8(); } for (int a_stride = 8; a_stride <= 64; a_stride += 8) { for (int b_stride = 8; b_stride <= 64; b_stride += 8) { int min_ref, max_ref, min, max; reference_minmax(a, a_stride, b, b_stride, &min_ref, &max_ref); ASM_REGISTER_STATE_CHECK(mm_func_(a, a_stride, b, b_stride, &min, &max)); EXPECT_EQ(max_ref, max) << "when a_stride = " << a_stride << " and b_stride = " << b_stride; EXPECT_EQ(min_ref, min) << "when a_stride = " << a_stride << " and b_stride = " << b_stride; } } } INSTANTIATE_TEST_CASE_P(C, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_c)); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_sse2)); #endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_neon)); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, MinMaxTest, ::testing::Values(&vpx_minmax_8x8_msa)); #endif } // namespace libvpx-1.8.2/test/non_greedy_mv_test.cc000066400000000000000000000151431357355204000202160ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "vp9/encoder/vp9_non_greedy_mv.h" #include "./vpx_dsp_rtcd.h" namespace { static void read_in_mf(const char *filename, int *rows_ptr, int *cols_ptr, MV **buffer_ptr) { FILE *input = fopen(filename, "rb"); int row, col; int idx; ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; fscanf(input, "%d,%d\n", rows_ptr, cols_ptr); *buffer_ptr = (MV *)malloc((*rows_ptr) * (*cols_ptr) * sizeof(MV)); for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) { fscanf(input, "%d,%d;", &row, &col); (*buffer_ptr)[idx].row = row; (*buffer_ptr)[idx].col = col; } fclose(input); } static void read_in_local_var(const char *filename, int *rows_ptr, int *cols_ptr, int (**M_ptr)[MF_LOCAL_STRUCTURE_SIZE]) { FILE *input = fopen(filename, "rb"); int M00, M01, M10, M11; int idx; int int_type; ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; fscanf(input, "%d,%d\n", rows_ptr, cols_ptr); *M_ptr = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc( (*rows_ptr) * (*cols_ptr) * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type)); for (idx = 0; idx < (*rows_ptr) * (*cols_ptr); ++idx) { fscanf(input, "%d,%d,%d,%d;", &M00, &M01, &M10, &M11); (*M_ptr)[idx][0] = M00; (*M_ptr)[idx][1] = M01; (*M_ptr)[idx][2] = M10; (*M_ptr)[idx][3] = M11; } fclose(input); } static void compare_mf(const MV *mf1, const MV *mf2, int rows, int cols, float *mean_ptr, float *std_ptr) { float float_type; float *diffs = (float *)malloc(rows * cols * sizeof(float_type)); int idx; float accu = 0.0f; for (idx = 0; idx < rows * cols; ++idx) { MV mv1 = mf1[idx]; MV mv2 = mf2[idx]; float row_diff2 = (float)((mv1.row - mv2.row) * (mv1.row - mv2.row)); float col_diff2 = (float)((mv1.col - mv2.col) * (mv1.col - mv2.col)); diffs[idx] = sqrt(row_diff2 + col_diff2); accu += diffs[idx]; } *mean_ptr = accu / rows / cols; *std_ptr = 0; for (idx = 0; idx < rows * cols; ++idx) { *std_ptr += (diffs[idx] - (*mean_ptr)) * (diffs[idx] - (*mean_ptr)); } *std_ptr = sqrt(*std_ptr / rows / cols); free(diffs); } static void load_frame_info(const char *filename, YV12_BUFFER_CONFIG *ref_frame_ptr) { FILE *input = fopen(filename, "rb"); int idx; uint8_t data_type; ASSERT_NE(input, nullptr) << "Cannot open file: " << filename << std::endl; fscanf(input, "%d,%d\n", &(ref_frame_ptr->y_height), &(ref_frame_ptr->y_width)); ref_frame_ptr->y_buffer = (uint8_t *)malloc( (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height) * sizeof(data_type)); for (idx = 0; idx < (ref_frame_ptr->y_width) * (ref_frame_ptr->y_height); ++idx) { int value; fscanf(input, "%d,", &value); ref_frame_ptr->y_buffer[idx] = (uint8_t)value; } ref_frame_ptr->y_stride = ref_frame_ptr->y_width; fclose(input); } static int compare_local_var(const int (*local_var1)[MF_LOCAL_STRUCTURE_SIZE], const int (*local_var2)[MF_LOCAL_STRUCTURE_SIZE], int rows, int cols) { int diff = 0; int outter_idx, inner_idx; for (outter_idx = 0; outter_idx < rows * cols; ++outter_idx) { for (inner_idx = 0; inner_idx < MF_LOCAL_STRUCTURE_SIZE; ++inner_idx) { diff += abs(local_var1[outter_idx][inner_idx] - local_var2[outter_idx][inner_idx]); } } return diff / rows / cols; } TEST(non_greedy_mv, smooth_mf) { const char *search_mf_file = "non_greedy_mv_test_files/exhaust_16x16.txt"; const char *local_var_file = "non_greedy_mv_test_files/localVar_16x16.txt"; const char *estimation_file = "non_greedy_mv_test_files/estimation_16x16.txt"; const char *ground_truth_file = "non_greedy_mv_test_files/ground_truth_16x16.txt"; BLOCK_SIZE bsize = BLOCK_32X32; MV *search_mf = NULL; MV *smooth_mf = NULL; MV *estimation = NULL; MV *ground_truth = NULL; int(*local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; int rows = 0, cols = 0; int alpha = 100, max_iter = 100; read_in_mf(search_mf_file, &rows, &cols, &search_mf); read_in_local_var(local_var_file, &rows, &cols, &local_var); read_in_mf(estimation_file, &rows, &cols, &estimation); read_in_mf(ground_truth_file, &rows, &cols, &ground_truth); float sm_mean, sm_std; float est_mean, est_std; smooth_mf = (MV *)malloc(rows * cols * sizeof(MV)); vp9_get_smooth_motion_field(search_mf, local_var, rows, cols, bsize, alpha, max_iter, smooth_mf); compare_mf(smooth_mf, ground_truth, rows, cols, &sm_mean, &sm_std); compare_mf(smooth_mf, estimation, rows, cols, &est_mean, &est_std); EXPECT_LE(sm_mean, 3); EXPECT_LE(est_mean, 2); free(search_mf); free(local_var); free(estimation); free(ground_truth); free(smooth_mf); } TEST(non_greedy_mv, local_var) { const char *ref_frame_file = "non_greedy_mv_test_files/ref_frame_16x16.txt"; const char *cur_frame_file = "non_greedy_mv_test_files/cur_frame_16x16.txt"; const char *gt_local_var_file = "non_greedy_mv_test_files/localVar_16x16.txt"; const char *search_mf_file = "non_greedy_mv_test_files/exhaust_16x16.txt"; BLOCK_SIZE bsize = BLOCK_16X16; int(*gt_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; int(*est_local_var)[MF_LOCAL_STRUCTURE_SIZE] = NULL; YV12_BUFFER_CONFIG ref_frame, cur_frame; int rows, cols; MV *search_mf; int int_type; int local_var_diff; vp9_variance_fn_ptr_t fn; load_frame_info(ref_frame_file, &ref_frame); load_frame_info(cur_frame_file, &cur_frame); read_in_mf(search_mf_file, &rows, &cols, &search_mf); fn.sdf = vpx_sad16x16; est_local_var = (int(*)[MF_LOCAL_STRUCTURE_SIZE])malloc( rows * cols * MF_LOCAL_STRUCTURE_SIZE * sizeof(int_type)); vp9_get_local_structure(&cur_frame, &ref_frame, search_mf, &fn, rows, cols, bsize, est_local_var); read_in_local_var(gt_local_var_file, &rows, &cols, >_local_var); local_var_diff = compare_local_var(est_local_var, gt_local_var, rows, cols); EXPECT_LE(local_var_diff, 1); free(gt_local_var); free(est_local_var); free(ref_frame.y_buffer); } } // namespace libvpx-1.8.2/test/partial_idct_test.cc000066400000000000000000001327421357355204000200270ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_scan.h" #include "vpx/vpx_integer.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; namespace { typedef void (*FwdTxfmFunc)(const int16_t *in, tran_low_t *out, int stride); typedef void (*InvTxfmFunc)(const tran_low_t *in, uint8_t *out, int stride); typedef void (*InvTxfmWithBdFunc)(const tran_low_t *in, uint8_t *out, int stride, int bd); template void wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { (void)bd; fn(in, out, stride); } #if CONFIG_VP9_HIGHBITDEPTH typedef void (*InvTxfmHighbdFunc)(const tran_low_t *in, uint16_t *out, int stride, int bd); template void highbd_wrapper(const tran_low_t *in, uint8_t *out, int stride, int bd) { fn(in, CAST_TO_SHORTPTR(out), stride, bd); } #endif typedef std::tuple PartialInvTxfmParam; const int kMaxNumCoeffs = 1024; const int kCountTestBlock = 1000; class PartialIDctTest : public ::testing::TestWithParam { public: virtual ~PartialIDctTest() {} virtual void SetUp() { rnd_.Reset(ACMRandom::DeterministicSeed()); fwd_txfm_ = GET_PARAM(0); full_inv_txfm_ = GET_PARAM(1); partial_inv_txfm_ = GET_PARAM(2); tx_size_ = GET_PARAM(3); last_nonzero_ = GET_PARAM(4); bit_depth_ = GET_PARAM(5); pixel_size_ = GET_PARAM(6); mask_ = (1 << bit_depth_) - 1; switch (tx_size_) { case TX_4X4: size_ = 4; break; case TX_8X8: size_ = 8; break; case TX_16X16: size_ = 16; break; case TX_32X32: size_ = 32; break; default: FAIL() << "Wrong Size!"; break; } // Randomize stride_ to a value less than or equal to 1024 stride_ = rnd_(1024) + 1; if (stride_ < size_) { stride_ = size_; } // Align stride_ to 16 if it's bigger than 16. if (stride_ > 16) { stride_ &= ~15; } input_block_size_ = size_ * size_; output_block_size_ = size_ * stride_; input_block_ = reinterpret_cast( vpx_memalign(16, sizeof(*input_block_) * input_block_size_)); output_block_ = reinterpret_cast( vpx_memalign(16, pixel_size_ * output_block_size_)); output_block_ref_ = reinterpret_cast( vpx_memalign(16, pixel_size_ * output_block_size_)); } virtual void TearDown() { vpx_free(input_block_); input_block_ = NULL; vpx_free(output_block_); output_block_ = NULL; vpx_free(output_block_ref_); output_block_ref_ = NULL; libvpx_test::ClearSystemState(); } void InitMem() { memset(input_block_, 0, sizeof(*input_block_) * input_block_size_); if (pixel_size_ == 1) { for (int j = 0; j < output_block_size_; ++j) { output_block_[j] = output_block_ref_[j] = rnd_.Rand16() & mask_; } } else { ASSERT_EQ(2, pixel_size_); uint16_t *const output = reinterpret_cast(output_block_); uint16_t *const output_ref = reinterpret_cast(output_block_ref_); for (int j = 0; j < output_block_size_; ++j) { output[j] = output_ref[j] = rnd_.Rand16() & mask_; } } } void InitInput() { const int64_t max_coeff = (32766 << (bit_depth_ - 8)) / 4; int64_t max_energy_leftover = max_coeff * max_coeff; for (int j = 0; j < last_nonzero_; ++j) { tran_low_t coeff = static_cast( sqrt(1.0 * max_energy_leftover) * (rnd_.Rand16() - 32768) / 65536); max_energy_leftover -= static_cast(coeff) * coeff; if (max_energy_leftover < 0) { max_energy_leftover = 0; coeff = 0; } input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = coeff; } } void PrintDiff() { if (memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) { uint16_t ref, opt; for (int y = 0; y < size_; y++) { for (int x = 0; x < size_; x++) { if (pixel_size_ == 1) { ref = output_block_ref_[y * stride_ + x]; opt = output_block_[y * stride_ + x]; } else { ref = reinterpret_cast( output_block_ref_)[y * stride_ + x]; opt = reinterpret_cast(output_block_)[y * stride_ + x]; } if (ref != opt) { printf("dest[%d][%d] diff:%6d (ref),%6d (opt)\n", y, x, ref, opt); } } } printf("\ninput_block_:\n"); for (int y = 0; y < size_; y++) { for (int x = 0; x < size_; x++) { printf("%6d,", input_block_[y * size_ + x]); } printf("\n"); } } } protected: int last_nonzero_; TX_SIZE tx_size_; tran_low_t *input_block_; uint8_t *output_block_; uint8_t *output_block_ref_; int size_; int stride_; int pixel_size_; int input_block_size_; int output_block_size_; int bit_depth_; int mask_; FwdTxfmFunc fwd_txfm_; InvTxfmWithBdFunc full_inv_txfm_; InvTxfmWithBdFunc partial_inv_txfm_; ACMRandom rnd_; }; TEST_P(PartialIDctTest, RunQuantCheck) { const int count_test_block = (size_ != 4) ? kCountTestBlock : 65536; DECLARE_ALIGNED(16, int16_t, input_extreme_block[kMaxNumCoeffs]); DECLARE_ALIGNED(16, tran_low_t, output_ref_block[kMaxNumCoeffs]); InitMem(); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-mask_, mask_]. if (size_ != 4) { if (i == 0) { for (int k = 0; k < input_block_size_; ++k) { input_extreme_block[k] = mask_; } } else if (i == 1) { for (int k = 0; k < input_block_size_; ++k) { input_extreme_block[k] = -mask_; } } else { for (int k = 0; k < input_block_size_; ++k) { input_extreme_block[k] = rnd_.Rand8() % 2 ? mask_ : -mask_; } } } else { // Try all possible combinations. for (int k = 0; k < input_block_size_; ++k) { input_extreme_block[k] = (i & (1 << k)) ? mask_ : -mask_; } } fwd_txfm_(input_extreme_block, output_ref_block, size_); // quantization with minimum allowed step sizes input_block_[0] = (output_ref_block[0] / 4) * 4; for (int k = 1; k < last_nonzero_; ++k) { const int pos = vp9_default_scan_orders[tx_size_].scan[k]; input_block_[pos] = (output_ref_block[pos] / 4) * 4; } ASM_REGISTER_STATE_CHECK( full_inv_txfm_(input_block_, output_block_ref_, stride_, bit_depth_)); ASM_REGISTER_STATE_CHECK( partial_inv_txfm_(input_block_, output_block_, stride_, bit_depth_)); ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) << "Error: partial inverse transform produces different results"; } } TEST_P(PartialIDctTest, ResultsMatch) { for (int i = 0; i < kCountTestBlock; ++i) { InitMem(); InitInput(); ASM_REGISTER_STATE_CHECK( full_inv_txfm_(input_block_, output_block_ref_, stride_, bit_depth_)); ASM_REGISTER_STATE_CHECK( partial_inv_txfm_(input_block_, output_block_, stride_, bit_depth_)); ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) << "Error: partial inverse transform produces different results"; } } TEST_P(PartialIDctTest, AddOutputBlock) { for (int i = 0; i < kCountTestBlock; ++i) { InitMem(); for (int j = 0; j < last_nonzero_; ++j) { input_block_[vp9_default_scan_orders[tx_size_].scan[j]] = 10; } ASM_REGISTER_STATE_CHECK( full_inv_txfm_(input_block_, output_block_ref_, stride_, bit_depth_)); ASM_REGISTER_STATE_CHECK( partial_inv_txfm_(input_block_, output_block_, stride_, bit_depth_)); ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) << "Error: Transform results are not correctly added to output."; } } TEST_P(PartialIDctTest, SingleExtremeCoeff) { const int16_t max_coeff = std::numeric_limits::max(); const int16_t min_coeff = std::numeric_limits::min(); for (int i = 0; i < last_nonzero_; ++i) { memset(input_block_, 0, sizeof(*input_block_) * input_block_size_); // Run once for min and once for max. for (int j = 0; j < 2; ++j) { const int coeff = j ? min_coeff : max_coeff; memset(output_block_, 0, pixel_size_ * output_block_size_); memset(output_block_ref_, 0, pixel_size_ * output_block_size_); input_block_[vp9_default_scan_orders[tx_size_].scan[i]] = coeff; ASM_REGISTER_STATE_CHECK( full_inv_txfm_(input_block_, output_block_ref_, stride_, bit_depth_)); ASM_REGISTER_STATE_CHECK( partial_inv_txfm_(input_block_, output_block_, stride_, bit_depth_)); ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) << "Error: Fails with single coeff of " << coeff << " at " << i << "."; } } } TEST_P(PartialIDctTest, DISABLED_Speed) { // Keep runtime stable with transform size. const int kCountSpeedTestBlock = 500000000 / input_block_size_; InitMem(); InitInput(); for (int i = 0; i < kCountSpeedTestBlock; ++i) { ASM_REGISTER_STATE_CHECK( full_inv_txfm_(input_block_, output_block_ref_, stride_, bit_depth_)); } vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { partial_inv_txfm_(input_block_, output_block_, stride_, bit_depth_); } libvpx_test::ClearSystemState(); vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer) / 1000); printf("idct%dx%d_%d (%s %d) time: %5d ms\n", size_, size_, last_nonzero_, (pixel_size_ == 1) ? "bitdepth" : "high bitdepth", bit_depth_, elapsed_time); ASSERT_EQ(0, memcmp(output_block_ref_, output_block_, pixel_size_ * output_block_size_)) << "Error: partial inverse transform produces different results"; } using std::make_tuple; const PartialInvTxfmParam c_partial_idct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 12, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 8, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 10, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 12, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 8, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 10, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 8, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 10, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 8, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 10, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 8, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 10, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 8, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 10, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 8, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 10, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1024, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 135, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 38, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 1, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 64, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 1, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 16, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 1, 8, 1) }; INSTANTIATE_TEST_CASE_P(C, PartialIDctTest, ::testing::ValuesIn(c_partial_idct_tests)); #if !CONFIG_EMULATE_HARDWARE #if HAVE_NEON const PartialInvTxfmParam neon_partial_idct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 8, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 10, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 8, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 10, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 8, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 10, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 8, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 10, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1024, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 135, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 38, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 1, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 64, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 1, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 16, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 1, 8, 1) }; INSTANTIATE_TEST_CASE_P(NEON, PartialIDctTest, ::testing::ValuesIn(neon_partial_idct_tests)); #endif // HAVE_NEON #if HAVE_SSE2 // 32x32_135_ is implemented using the 1024 version. const PartialInvTxfmParam sse2_partial_idct_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 8, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 10, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 1, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 12, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 8, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 10, 2), make_tuple(&vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 1, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 8, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 10, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 12, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 8, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 10, 2), make_tuple(&vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 1, 12, 2), #endif // CONFIG_VP9_HIGHBITDEPTH make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1024, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 135, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 38, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 1, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 64, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 1, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 16, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 1, 8, 1) }; INSTANTIATE_TEST_CASE_P(SSE2, PartialIDctTest, ::testing::ValuesIn(sse2_partial_idct_tests)); #endif // HAVE_SSE2 #if HAVE_SSSE3 const PartialInvTxfmParam ssse3_partial_idct_tests[] = { make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 135, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1) }; INSTANTIATE_TEST_CASE_P(SSSE3, PartialIDctTest, ::testing::ValuesIn(ssse3_partial_idct_tests)); #endif // HAVE_SSSE3 #if HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH const PartialInvTxfmParam sse4_1_partial_idct_tests[] = { make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 8, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 10, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 1024, 12, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 8, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 10, 2), make_tuple(&vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 135, 12, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 8, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 10, 2), make_tuple( &vpx_highbd_fdct32x32_c, &highbd_wrapper, &highbd_wrapper, TX_32X32, 34, 12, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 8, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 10, 2), make_tuple(&vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 256, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 38, 12, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 8, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 10, 2), make_tuple( &vpx_highbd_fdct16x16_c, &highbd_wrapper, &highbd_wrapper, TX_16X16, 10, 12, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 64, 12, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 8, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 10, 2), make_tuple( &vpx_highbd_fdct8x8_c, &highbd_wrapper, &highbd_wrapper, TX_8X8, 12, 12, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 8, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 10, 2), make_tuple( &vpx_highbd_fdct4x4_c, &highbd_wrapper, &highbd_wrapper, TX_4X4, 16, 12, 2) }; INSTANTIATE_TEST_CASE_P(SSE4_1, PartialIDctTest, ::testing::ValuesIn(sse4_1_partial_idct_tests)); #endif // HAVE_SSE4_1 && CONFIG_VP9_HIGHBITDEPTH #if HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH const PartialInvTxfmParam dspr2_partial_idct_tests[] = { make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1024, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 1, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 64, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 1, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 16, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 1, 8, 1) }; INSTANTIATE_TEST_CASE_P(DSPR2, PartialIDctTest, ::testing::ValuesIn(dspr2_partial_idct_tests)); #endif // HAVE_DSPR2 && !CONFIG_VP9_HIGHBITDEPTH #if HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH // 32x32_135_ is implemented using the 1024 version. const PartialInvTxfmParam msa_partial_idct_tests[] = { make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1024, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 34, 8, 1), make_tuple(&vpx_fdct32x32_c, &wrapper, &wrapper, TX_32X32, 1, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 256, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 10, 8, 1), make_tuple(&vpx_fdct16x16_c, &wrapper, &wrapper, TX_16X16, 1, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 64, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 12, 8, 1), make_tuple(&vpx_fdct8x8_c, &wrapper, &wrapper, TX_8X8, 1, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 16, 8, 1), make_tuple(&vpx_fdct4x4_c, &wrapper, &wrapper, TX_4X4, 1, 8, 1) }; INSTANTIATE_TEST_CASE_P(MSA, PartialIDctTest, ::testing::ValuesIn(msa_partial_idct_tests)); #endif // HAVE_MSA && !CONFIG_VP9_HIGHBITDEPTH #endif // !CONFIG_EMULATE_HARDWARE } // namespace libvpx-1.8.2/test/postproc.sh000077500000000000000000000036431357355204000162270ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx postproc example code. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to postproc_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: Make sure input is available: # $VP8_IVF_FILE and $VP9_IVF_FILE are required. postproc_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs postproc using $1 as input file. $2 is the codec name, and is used # solely to name the output file. postproc() { local decoder="${LIBVPX_BIN_PATH}/postproc${VPX_TEST_EXE_SUFFIX}" local input_file="$1" local codec="$2" local output_file="${VPX_TEST_OUTPUT_DIR}/postproc_${codec}.raw" if [ ! -x "${decoder}" ]; then elog "${decoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ ${devnull} [ -e "${output_file}" ] || return 1 } postproc_vp8() { if [ "$(vp8_decode_available)" = "yes" ]; then postproc "${VP8_IVF_FILE}" vp8 || return 1 fi } postproc_vp9() { if [ "$(vpx_config_option_enabled CONFIG_VP9_POSTPROC)" = "yes" ]; then if [ "$(vp9_decode_available)" = "yes" ]; then postproc "${VP9_IVF_FILE}" vp9 || return 1 fi fi } postproc_tests="postproc_vp8 postproc_vp9" run_tests postproc_verify_environment "${postproc_tests}" libvpx-1.8.2/test/pp_filter_test.cc000066400000000000000000000500411357355204000173430ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" using libvpx_test::ACMRandom; using libvpx_test::Buffer; typedef void (*VpxPostProcDownAndAcrossMbRowFunc)( unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int cols, unsigned char *flimit, int size); typedef void (*VpxMbPostProcAcrossIpFunc)(unsigned char *src, int pitch, int rows, int cols, int flimit); typedef void (*VpxMbPostProcDownFunc)(unsigned char *dst, int pitch, int rows, int cols, int flimit); namespace { // Compute the filter level used in post proc from the loop filter strength int q2mbl(int x) { if (x < 20) x = 20; x = 50 + (x - 50) * 10 / 8; return x * x / 3; } class VpxPostProcDownAndAcrossMbRowTest : public AbstractBench, public ::testing::TestWithParam { public: VpxPostProcDownAndAcrossMbRowTest() : mb_post_proc_down_and_across_(GetParam()) {} virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: virtual void Run(); const VpxPostProcDownAndAcrossMbRowFunc mb_post_proc_down_and_across_; // Size of the underlying data block that will be filtered. int block_width_; int block_height_; Buffer *src_image_; Buffer *dst_image_; uint8_t *flimits_; }; void VpxPostProcDownAndAcrossMbRowTest::Run() { mb_post_proc_down_and_across_( src_image_->TopLeftPixel(), dst_image_->TopLeftPixel(), src_image_->stride(), dst_image_->stride(), block_width_, flimits_, 16); } // Test routine for the VPx post-processing function // vpx_post_proc_down_and_across_mb_row_c. TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckFilterOutput) { // Size of the underlying data block that will be filtered. block_width_ = 16; block_height_ = 16; // 5-tap filter needs 2 padding rows above and below the block in the input. Buffer src_image = Buffer(block_width_, block_height_, 2); ASSERT_TRUE(src_image.Init()); // Filter extends output block by 8 samples at left and right edges. // Though the left padding is only 8 bytes, the assembly code tries to // read 16 bytes before the pointer. Buffer dst_image = Buffer(block_width_, block_height_, 8, 16, 8, 8); ASSERT_TRUE(dst_image.Init()); flimits_ = reinterpret_cast(vpx_memalign(16, block_width_)); (void)memset(flimits_, 255, block_width_); // Initialize pixels in the input: // block pixels to value 1, // border pixels to value 10. src_image.SetPadding(10); src_image.Set(1); // Initialize pixels in the output to 99. dst_image.Set(99); ASM_REGISTER_STATE_CHECK(mb_post_proc_down_and_across_( src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(), dst_image.stride(), block_width_, flimits_, 16)); static const uint8_t kExpectedOutput[] = { 4, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 3, 4 }; uint8_t *pixel_ptr = dst_image.TopLeftPixel(); for (int i = 0; i < block_height_; ++i) { for (int j = 0; j < block_width_; ++j) { ASSERT_EQ(kExpectedOutput[i], pixel_ptr[j]) << "at (" << i << ", " << j << ")"; } pixel_ptr += dst_image.stride(); } vpx_free(flimits_); }; TEST_P(VpxPostProcDownAndAcrossMbRowTest, CheckCvsAssembly) { // Size of the underlying data block that will be filtered. // Y blocks are always a multiple of 16 wide and exactly 16 high. U and V // blocks are always a multiple of 8 wide and exactly 8 high. block_width_ = 136; block_height_ = 16; // 5-tap filter needs 2 padding rows above and below the block in the input. // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16. Buffer src_image = Buffer(block_width_, block_height_, 2, 2, 10, 2); ASSERT_TRUE(src_image.Init()); // Filter extends output block by 8 samples at left and right edges. // Though the left padding is only 8 bytes, there is 'above' padding as well // so when the assembly code tries to read 16 bytes before the pointer it is // not a problem. // SSE2 reads in blocks of 16. Pad an extra 8 in case the width is not %16. Buffer dst_image = Buffer(block_width_, block_height_, 8, 8, 16, 8); ASSERT_TRUE(dst_image.Init()); Buffer dst_image_ref = Buffer(block_width_, block_height_, 8); ASSERT_TRUE(dst_image_ref.Init()); // Filter values are set in blocks of 16 for Y and 8 for U/V. Each macroblock // can have a different filter. SSE2 assembly reads flimits in blocks of 16 so // it must be padded out. const int flimits_width = block_width_ % 16 ? block_width_ + 8 : block_width_; flimits_ = reinterpret_cast(vpx_memalign(16, flimits_width)); ACMRandom rnd; rnd.Reset(ACMRandom::DeterministicSeed()); // Initialize pixels in the input: // block pixels to random values. // border pixels to value 10. src_image.SetPadding(10); src_image.Set(&rnd, &ACMRandom::Rand8); for (int blocks = 0; blocks < block_width_; blocks += 8) { (void)memset(flimits_, 0, sizeof(*flimits_) * flimits_width); for (int f = 0; f < 255; f++) { (void)memset(flimits_ + blocks, f, sizeof(*flimits_) * 8); dst_image.Set(0); dst_image_ref.Set(0); vpx_post_proc_down_and_across_mb_row_c( src_image.TopLeftPixel(), dst_image_ref.TopLeftPixel(), src_image.stride(), dst_image_ref.stride(), block_width_, flimits_, block_height_); ASM_REGISTER_STATE_CHECK(mb_post_proc_down_and_across_( src_image.TopLeftPixel(), dst_image.TopLeftPixel(), src_image.stride(), dst_image.stride(), block_width_, flimits_, block_height_)); ASSERT_TRUE(dst_image.CheckValues(dst_image_ref)); } } vpx_free(flimits_); } TEST_P(VpxPostProcDownAndAcrossMbRowTest, DISABLED_Speed) { // Size of the underlying data block that will be filtered. block_width_ = 16; block_height_ = 16; // 5-tap filter needs 2 padding rows above and below the block in the input. Buffer src_image = Buffer(block_width_, block_height_, 2); ASSERT_TRUE(src_image.Init()); this->src_image_ = &src_image; // Filter extends output block by 8 samples at left and right edges. // Though the left padding is only 8 bytes, the assembly code tries to // read 16 bytes before the pointer. Buffer dst_image = Buffer(block_width_, block_height_, 8, 16, 8, 8); ASSERT_TRUE(dst_image.Init()); this->dst_image_ = &dst_image; flimits_ = reinterpret_cast(vpx_memalign(16, block_width_)); (void)memset(flimits_, 255, block_width_); // Initialize pixels in the input: // block pixels to value 1, // border pixels to value 10. src_image.SetPadding(10); src_image.Set(1); // Initialize pixels in the output to 99. dst_image.Set(99); RunNTimes(INT16_MAX); PrintMedian("16x16"); vpx_free(flimits_); }; class VpxMbPostProcAcrossIpTest : public AbstractBench, public ::testing::TestWithParam { public: VpxMbPostProcAcrossIpTest() : rows_(16), cols_(16), mb_post_proc_across_ip_(GetParam()), src_(Buffer(rows_, cols_, 8, 8, 17, 8)) {} virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: virtual void Run(); void SetCols(unsigned char *s, int rows, int cols, int src_width) { for (int r = 0; r < rows; r++) { for (int c = 0; c < cols; c++) { s[c] = c; } s += src_width; } } void RunComparison(const unsigned char *expected_output, unsigned char *src_c, int rows, int cols, int src_pitch) { for (int r = 0; r < rows; r++) { for (int c = 0; c < cols; c++) { ASSERT_EQ(expected_output[c], src_c[c]) << "at (" << r << ", " << c << ")"; } src_c += src_pitch; } } void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width, int filter_level, const unsigned char *expected_output) { ASM_REGISTER_STATE_CHECK( GetParam()(s, src_width, rows, cols, filter_level)); RunComparison(expected_output, s, rows, cols, src_width); } const int rows_; const int cols_; const VpxMbPostProcAcrossIpFunc mb_post_proc_across_ip_; Buffer src_; }; void VpxMbPostProcAcrossIpTest::Run() { mb_post_proc_across_ip_(src_.TopLeftPixel(), src_.stride(), rows_, cols_, q2mbl(0)); } TEST_P(VpxMbPostProcAcrossIpTest, CheckLowFilterOutput) { ASSERT_TRUE(src_.Init()); src_.SetPadding(10); SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); Buffer expected_output = Buffer(cols_, rows_, 0); ASSERT_TRUE(expected_output.Init()); SetCols(expected_output.TopLeftPixel(), rows_, cols_, expected_output.stride()); RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(0), expected_output.TopLeftPixel()); } TEST_P(VpxMbPostProcAcrossIpTest, CheckMediumFilterOutput) { ASSERT_TRUE(src_.Init()); src_.SetPadding(10); SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); static const unsigned char kExpectedOutput[] = { 2, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 13 }; RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(70), kExpectedOutput); } TEST_P(VpxMbPostProcAcrossIpTest, CheckHighFilterOutput) { ASSERT_TRUE(src_.Init()); src_.SetPadding(10); SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); static const unsigned char kExpectedOutput[] = { 2, 2, 3, 4, 4, 5, 6, 7, 8, 9, 10, 11, 11, 12, 13, 13 }; RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), INT_MAX, kExpectedOutput); SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); RunFilterLevel(src_.TopLeftPixel(), rows_, cols_, src_.stride(), q2mbl(100), kExpectedOutput); } TEST_P(VpxMbPostProcAcrossIpTest, CheckCvsAssembly) { Buffer c_mem = Buffer(cols_, rows_, 8, 8, 17, 8); ASSERT_TRUE(c_mem.Init()); Buffer asm_mem = Buffer(cols_, rows_, 8, 8, 17, 8); ASSERT_TRUE(asm_mem.Init()); // When level >= 100, the filter behaves the same as the level = INT_MAX // When level < 20, it behaves the same as the level = 0 for (int level = 0; level < 100; level++) { c_mem.SetPadding(10); asm_mem.SetPadding(10); SetCols(c_mem.TopLeftPixel(), rows_, cols_, c_mem.stride()); SetCols(asm_mem.TopLeftPixel(), rows_, cols_, asm_mem.stride()); vpx_mbpost_proc_across_ip_c(c_mem.TopLeftPixel(), c_mem.stride(), rows_, cols_, q2mbl(level)); ASM_REGISTER_STATE_CHECK(GetParam()( asm_mem.TopLeftPixel(), asm_mem.stride(), rows_, cols_, q2mbl(level))); ASSERT_TRUE(asm_mem.CheckValues(c_mem)); } } TEST_P(VpxMbPostProcAcrossIpTest, DISABLED_Speed) { ASSERT_TRUE(src_.Init()); src_.SetPadding(10); SetCols(src_.TopLeftPixel(), rows_, cols_, src_.stride()); RunNTimes(100000); PrintMedian("16x16"); } class VpxMbPostProcDownTest : public AbstractBench, public ::testing::TestWithParam { public: VpxMbPostProcDownTest() : rows_(16), cols_(16), mb_post_proc_down_(GetParam()), src_c_(Buffer(rows_, cols_, 8, 8, 8, 17)) {} virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: virtual void Run(); void SetRows(unsigned char *src_c, int rows, int cols, int src_width) { for (int r = 0; r < rows; r++) { memset(src_c, r, cols); src_c += src_width; } } void RunComparison(const unsigned char *expected_output, unsigned char *src_c, int rows, int cols, int src_pitch) { for (int r = 0; r < rows; r++) { for (int c = 0; c < cols; c++) { ASSERT_EQ(expected_output[r * rows + c], src_c[c]) << "at (" << r << ", " << c << ")"; } src_c += src_pitch; } } void RunFilterLevel(unsigned char *s, int rows, int cols, int src_width, int filter_level, const unsigned char *expected_output) { ASM_REGISTER_STATE_CHECK( mb_post_proc_down_(s, src_width, rows, cols, filter_level)); RunComparison(expected_output, s, rows, cols, src_width); } const int rows_; const int cols_; const VpxMbPostProcDownFunc mb_post_proc_down_; Buffer src_c_; }; void VpxMbPostProcDownTest::Run() { mb_post_proc_down_(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, q2mbl(0)); } TEST_P(VpxMbPostProcDownTest, CheckHighFilterOutput) { ASSERT_TRUE(src_c_.Init()); src_c_.SetPadding(10); SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); static const unsigned char kExpectedOutput[] = { 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 3, 4, 4, 3, 3, 3, 4, 4, 3, 4, 4, 3, 3, 4, 5, 4, 4, 4, 4, 4, 4, 4, 5, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 8, 9, 9, 8, 8, 8, 9, 9, 8, 9, 9, 8, 8, 8, 9, 9, 10, 10, 9, 9, 9, 10, 10, 9, 10, 10, 9, 9, 9, 10, 10, 10, 11, 10, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11, 10, 11, 11, 11, 11, 11, 11, 11, 12, 11, 11, 11, 11, 11, 11, 11, 12, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 12, 13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13, 13, 13, 13, 13, 14, 13, 13, 13, 13 }; RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), INT_MAX, kExpectedOutput); src_c_.SetPadding(10); SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), q2mbl(100), kExpectedOutput); } TEST_P(VpxMbPostProcDownTest, CheckMediumFilterOutput) { ASSERT_TRUE(src_c_.Init()); src_c_.SetPadding(10); SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); static const unsigned char kExpectedOutput[] = { 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 12, 12, 13, 12, 13, 12, 13, 12, 12, 12, 13, 12, 13, 12, 13, 12, 13, 13, 13, 14, 13, 13, 13, 13, 13, 13, 13, 14, 13, 13, 13, 13 }; RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), q2mbl(70), kExpectedOutput); } TEST_P(VpxMbPostProcDownTest, CheckLowFilterOutput) { ASSERT_TRUE(src_c_.Init()); src_c_.SetPadding(10); SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); unsigned char *expected_output = new unsigned char[rows_ * cols_]; ASSERT_TRUE(expected_output != NULL); SetRows(expected_output, rows_, cols_, cols_); RunFilterLevel(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride(), q2mbl(0), expected_output); delete[] expected_output; } TEST_P(VpxMbPostProcDownTest, CheckCvsAssembly) { ACMRandom rnd; rnd.Reset(ACMRandom::DeterministicSeed()); ASSERT_TRUE(src_c_.Init()); Buffer src_asm = Buffer(cols_, rows_, 8, 8, 8, 17); ASSERT_TRUE(src_asm.Init()); for (int level = 0; level < 100; level++) { src_c_.SetPadding(10); src_asm.SetPadding(10); src_c_.Set(&rnd, &ACMRandom::Rand8); src_asm.CopyFrom(src_c_); vpx_mbpost_proc_down_c(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, q2mbl(level)); ASM_REGISTER_STATE_CHECK(mb_post_proc_down_( src_asm.TopLeftPixel(), src_asm.stride(), rows_, cols_, q2mbl(level))); ASSERT_TRUE(src_asm.CheckValues(src_c_)); src_c_.SetPadding(10); src_asm.SetPadding(10); src_c_.Set(&rnd, &ACMRandom::Rand8Extremes); src_asm.CopyFrom(src_c_); vpx_mbpost_proc_down_c(src_c_.TopLeftPixel(), src_c_.stride(), rows_, cols_, q2mbl(level)); ASM_REGISTER_STATE_CHECK(mb_post_proc_down_( src_asm.TopLeftPixel(), src_asm.stride(), rows_, cols_, q2mbl(level))); ASSERT_TRUE(src_asm.CheckValues(src_c_)); } } TEST_P(VpxMbPostProcDownTest, DISABLED_Speed) { ASSERT_TRUE(src_c_.Init()); src_c_.SetPadding(10); SetRows(src_c_.TopLeftPixel(), rows_, cols_, src_c_.stride()); RunNTimes(100000); PrintMedian("16x16"); } INSTANTIATE_TEST_CASE_P( C, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_c)); INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcAcrossIpTest, ::testing::Values(vpx_mbpost_proc_across_ip_c)); INSTANTIATE_TEST_CASE_P(C, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_c)); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_sse2)); INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcAcrossIpTest, ::testing::Values(vpx_mbpost_proc_across_ip_sse2)); INSTANTIATE_TEST_CASE_P(SSE2, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_sse2)); #endif // HAVE_SSE2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_neon)); INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcAcrossIpTest, ::testing::Values(vpx_mbpost_proc_across_ip_neon)); INSTANTIATE_TEST_CASE_P(NEON, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_neon)); #endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_msa)); INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcAcrossIpTest, ::testing::Values(vpx_mbpost_proc_across_ip_msa)); INSTANTIATE_TEST_CASE_P(MSA, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_msa)); #endif // HAVE_MSA #if HAVE_VSX INSTANTIATE_TEST_CASE_P( VSX, VpxPostProcDownAndAcrossMbRowTest, ::testing::Values(vpx_post_proc_down_and_across_mb_row_vsx)); INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcAcrossIpTest, ::testing::Values(vpx_mbpost_proc_across_ip_vsx)); INSTANTIATE_TEST_CASE_P(VSX, VpxMbPostProcDownTest, ::testing::Values(vpx_mbpost_proc_down_vsx)); #endif // HAVE_VSX } // namespace libvpx-1.8.2/test/predict_test.cc000066400000000000000000000377111357355204000170220ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp8_rtcd.h" #include "./vpx_config.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/msvc.h" namespace { using libvpx_test::ACMRandom; using std::make_tuple; typedef void (*PredictFunc)(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch); typedef std::tuple PredictParam; class PredictTestBase : public AbstractBench, public ::testing::TestWithParam { public: PredictTestBase() : width_(GET_PARAM(0)), height_(GET_PARAM(1)), predict_(GET_PARAM(2)), src_(NULL), padded_dst_(NULL), dst_(NULL), dst_c_(NULL) {} virtual void SetUp() { src_ = new uint8_t[kSrcSize]; ASSERT_TRUE(src_ != NULL); // padded_dst_ provides a buffer of kBorderSize around the destination // memory to facilitate detecting out of bounds writes. dst_stride_ = kBorderSize + width_ + kBorderSize; padded_dst_size_ = dst_stride_ * (kBorderSize + height_ + kBorderSize); padded_dst_ = reinterpret_cast(vpx_memalign(16, padded_dst_size_)); ASSERT_TRUE(padded_dst_ != NULL); dst_ = padded_dst_ + (kBorderSize * dst_stride_) + kBorderSize; dst_c_ = new uint8_t[16 * 16]; ASSERT_TRUE(dst_c_ != NULL); memset(src_, 0, kSrcSize); memset(padded_dst_, 128, padded_dst_size_); memset(dst_c_, 0, 16 * 16); } virtual void TearDown() { delete[] src_; src_ = NULL; vpx_free(padded_dst_); padded_dst_ = NULL; dst_ = NULL; delete[] dst_c_; dst_c_ = NULL; libvpx_test::ClearSystemState(); } protected: // Make reference arrays big enough for 16x16 functions. Six-tap filters need // 5 extra pixels outside of the macroblock. static const int kSrcStride = 21; static const int kSrcSize = kSrcStride * kSrcStride; static const int kBorderSize = 16; int width_; int height_; PredictFunc predict_; uint8_t *src_; uint8_t *padded_dst_; uint8_t *dst_; int padded_dst_size_; uint8_t *dst_c_; int dst_stride_; bool CompareBuffers(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride) const { for (int height = 0; height < height_; ++height) { EXPECT_EQ(0, memcmp(a + height * a_stride, b + height * b_stride, sizeof(*a) * width_)) << "Row " << height << " does not match."; } return !HasFailure(); } // Given a block of memory 'a' with size 'a_size', determine if all regions // excepting block 'b' described by 'b_stride', 'b_height', and 'b_width' // match pixel value 'c'. bool CheckBorder(const uint8_t *a, int a_size, const uint8_t *b, int b_width, int b_height, int b_stride, uint8_t c) const { const uint8_t *a_end = a + a_size; const int b_size = (b_stride * b_height) + b_width; const uint8_t *b_end = b + b_size; const int left_border = (b_stride - b_width) / 2; const int right_border = left_border + ((b_stride - b_width) % 2); EXPECT_GE(b - left_border, a) << "'b' does not start within 'a'"; EXPECT_LE(b_end + right_border, a_end) << "'b' does not end within 'a'"; // Top border. for (int pixel = 0; pixel < b - a - left_border; ++pixel) { EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in top border."; } // Left border. for (int height = 0; height < b_height; ++height) { for (int width = left_border; width > 0; --width) { EXPECT_EQ(c, b[height * b_stride - width]) << "Mismatch at row " << height << " column " << left_border - width << " in left border."; } } // Right border. for (int height = 0; height < b_height; ++height) { for (int width = b_width; width < b_width + right_border; ++width) { EXPECT_EQ(c, b[height * b_stride + width]) << "Mismatch at row " << height << " column " << width - b_width << " in right border."; } } // Bottom border. for (int pixel = static_cast(b - a + b_size); pixel < a_size; ++pixel) { EXPECT_EQ(c, a[pixel]) << "Mismatch at " << pixel << " in bottom border."; } return !HasFailure(); } void TestWithRandomData(PredictFunc reference) { ACMRandom rnd(ACMRandom::DeterministicSeed()); // Run tests for almost all possible offsets. for (int xoffset = 0; xoffset < 8; ++xoffset) { for (int yoffset = 0; yoffset < 8; ++yoffset) { if (xoffset == 0 && yoffset == 0) { // This represents a copy which is not required to be handled by this // module. continue; } for (int i = 0; i < kSrcSize; ++i) { src_[i] = rnd.Rand8(); } reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_c_, 16); ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_, dst_stride_)); ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_, dst_stride_)); ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_, width_, height_, dst_stride_, 128)); } } } void TestWithUnalignedDst(PredictFunc reference) { ACMRandom rnd(ACMRandom::DeterministicSeed()); // Only the 4x4 need to be able to handle unaligned writes. if (width_ == 4 && height_ == 4) { for (int xoffset = 0; xoffset < 8; ++xoffset) { for (int yoffset = 0; yoffset < 8; ++yoffset) { if (xoffset == 0 && yoffset == 0) { continue; } for (int i = 0; i < kSrcSize; ++i) { src_[i] = rnd.Rand8(); } reference(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_c_, 16); for (int i = 1; i < 4; ++i) { memset(padded_dst_, 128, padded_dst_size_); ASM_REGISTER_STATE_CHECK(predict_(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_ + i, dst_stride_ + i)); ASSERT_TRUE(CompareBuffers(dst_c_, 16, dst_ + i, dst_stride_ + i)); ASSERT_TRUE(CheckBorder(padded_dst_, padded_dst_size_, dst_ + i, width_, height_, dst_stride_ + i, 128)); } } } } } void Run() { for (int xoffset = 0; xoffset < 8; ++xoffset) { for (int yoffset = 0; yoffset < 8; ++yoffset) { if (xoffset == 0 && yoffset == 0) { continue; } predict_(&src_[kSrcStride * 2 + 2], kSrcStride, xoffset, yoffset, dst_, dst_stride_); } } } }; // namespace class SixtapPredictTest : public PredictTestBase {}; TEST_P(SixtapPredictTest, TestWithRandomData) { TestWithRandomData(vp8_sixtap_predict16x16_c); } TEST_P(SixtapPredictTest, TestWithUnalignedDst) { TestWithUnalignedDst(vp8_sixtap_predict16x16_c); } TEST_P(SixtapPredictTest, TestWithPresetData) { // Test input static const uint8_t kTestData[kSrcSize] = { 184, 4, 191, 82, 92, 41, 0, 1, 226, 236, 172, 20, 182, 42, 226, 177, 79, 94, 77, 179, 203, 206, 198, 22, 192, 19, 75, 17, 192, 44, 233, 120, 48, 168, 203, 141, 210, 203, 143, 180, 184, 59, 201, 110, 102, 171, 32, 182, 10, 109, 105, 213, 60, 47, 236, 253, 67, 55, 14, 3, 99, 247, 124, 148, 159, 71, 34, 114, 19, 177, 38, 203, 237, 239, 58, 83, 155, 91, 10, 166, 201, 115, 124, 5, 163, 104, 2, 231, 160, 16, 234, 4, 8, 103, 153, 167, 174, 187, 26, 193, 109, 64, 141, 90, 48, 200, 174, 204, 36, 184, 114, 237, 43, 238, 242, 207, 86, 245, 182, 247, 6, 161, 251, 14, 8, 148, 182, 182, 79, 208, 120, 188, 17, 6, 23, 65, 206, 197, 13, 242, 126, 128, 224, 170, 110, 211, 121, 197, 200, 47, 188, 207, 208, 184, 221, 216, 76, 148, 143, 156, 100, 8, 89, 117, 14, 112, 183, 221, 54, 197, 208, 180, 69, 176, 94, 180, 131, 215, 121, 76, 7, 54, 28, 216, 238, 249, 176, 58, 142, 64, 215, 242, 72, 49, 104, 87, 161, 32, 52, 216, 230, 4, 141, 44, 181, 235, 224, 57, 195, 89, 134, 203, 144, 162, 163, 126, 156, 84, 185, 42, 148, 145, 29, 221, 194, 134, 52, 100, 166, 105, 60, 140, 110, 201, 184, 35, 181, 153, 93, 121, 243, 227, 68, 131, 134, 232, 2, 35, 60, 187, 77, 209, 76, 106, 174, 15, 241, 227, 115, 151, 77, 175, 36, 187, 121, 221, 223, 47, 118, 61, 168, 105, 32, 237, 236, 167, 213, 238, 202, 17, 170, 24, 226, 247, 131, 145, 6, 116, 117, 121, 11, 194, 41, 48, 126, 162, 13, 93, 209, 131, 154, 122, 237, 187, 103, 217, 99, 60, 200, 45, 78, 115, 69, 49, 106, 200, 194, 112, 60, 56, 234, 72, 251, 19, 120, 121, 182, 134, 215, 135, 10, 114, 2, 247, 46, 105, 209, 145, 165, 153, 191, 243, 12, 5, 36, 119, 206, 231, 231, 11, 32, 209, 83, 27, 229, 204, 149, 155, 83, 109, 35, 93, 223, 37, 84, 14, 142, 37, 160, 52, 191, 96, 40, 204, 101, 77, 67, 52, 53, 43, 63, 85, 253, 147, 113, 226, 96, 6, 125, 179, 115, 161, 17, 83, 198, 101, 98, 85, 139, 3, 137, 75, 99, 178, 23, 201, 255, 91, 253, 52, 134, 60, 138, 131, 208, 251, 101, 48, 2, 227, 228, 118, 132, 245, 202, 75, 91, 44, 160, 231, 47, 41, 50, 147, 220, 74, 92, 219, 165, 89, 16 }; // Expected results for xoffset = 2 and yoffset = 2. static const int kExpectedDstStride = 16; static const uint8_t kExpectedDst[256] = { 117, 102, 74, 135, 42, 98, 175, 206, 70, 73, 222, 197, 50, 24, 39, 49, 38, 105, 90, 47, 169, 40, 171, 215, 200, 73, 109, 141, 53, 85, 177, 164, 79, 208, 124, 89, 212, 18, 81, 145, 151, 164, 217, 153, 91, 154, 102, 102, 159, 75, 164, 152, 136, 51, 213, 219, 186, 116, 193, 224, 186, 36, 231, 208, 84, 211, 155, 167, 35, 59, 42, 76, 216, 149, 73, 201, 78, 149, 184, 100, 96, 196, 189, 198, 188, 235, 195, 117, 129, 120, 129, 49, 25, 133, 113, 69, 221, 114, 70, 143, 99, 157, 108, 189, 140, 78, 6, 55, 65, 240, 255, 245, 184, 72, 90, 100, 116, 131, 39, 60, 234, 167, 33, 160, 88, 185, 200, 157, 159, 176, 127, 151, 138, 102, 168, 106, 170, 86, 82, 219, 189, 76, 33, 115, 197, 106, 96, 198, 136, 97, 141, 237, 151, 98, 137, 191, 185, 2, 57, 95, 142, 91, 255, 185, 97, 137, 76, 162, 94, 173, 131, 193, 161, 81, 106, 72, 135, 222, 234, 137, 66, 137, 106, 243, 210, 147, 95, 15, 137, 110, 85, 66, 16, 96, 167, 147, 150, 173, 203, 140, 118, 196, 84, 147, 160, 19, 95, 101, 123, 74, 132, 202, 82, 166, 12, 131, 166, 189, 170, 159, 85, 79, 66, 57, 152, 132, 203, 194, 0, 1, 56, 146, 180, 224, 156, 28, 83, 181, 79, 76, 80, 46, 160, 175, 59, 106, 43, 87, 75, 136, 85, 189, 46, 71, 200, 90 }; ASM_REGISTER_STATE_CHECK( predict_(const_cast(kTestData) + kSrcStride * 2 + 2, kSrcStride, 2, 2, dst_, dst_stride_)); ASSERT_TRUE( CompareBuffers(kExpectedDst, kExpectedDstStride, dst_, dst_stride_)); } INSTANTIATE_TEST_CASE_P( C, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_c), make_tuple(8, 8, &vp8_sixtap_predict8x8_c), make_tuple(8, 4, &vp8_sixtap_predict8x4_c), make_tuple(4, 4, &vp8_sixtap_predict4x4_c))); #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_neon), make_tuple(8, 8, &vp8_sixtap_predict8x8_neon), make_tuple(8, 4, &vp8_sixtap_predict8x4_neon), make_tuple(4, 4, &vp8_sixtap_predict4x4_neon))); #endif #if HAVE_MMX INSTANTIATE_TEST_CASE_P( MMX, SixtapPredictTest, ::testing::Values(make_tuple(4, 4, &vp8_sixtap_predict4x4_mmx))); #endif #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_sse2), make_tuple(8, 8, &vp8_sixtap_predict8x8_sse2), make_tuple(8, 4, &vp8_sixtap_predict8x4_sse2))); #endif #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( SSSE3, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_ssse3), make_tuple(8, 8, &vp8_sixtap_predict8x8_ssse3), make_tuple(8, 4, &vp8_sixtap_predict8x4_ssse3), make_tuple(4, 4, &vp8_sixtap_predict4x4_ssse3))); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_msa), make_tuple(8, 8, &vp8_sixtap_predict8x8_msa), make_tuple(8, 4, &vp8_sixtap_predict8x4_msa), make_tuple(4, 4, &vp8_sixtap_predict4x4_msa))); #endif #if HAVE_MMI INSTANTIATE_TEST_CASE_P( MMI, SixtapPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_sixtap_predict16x16_mmi), make_tuple(8, 8, &vp8_sixtap_predict8x8_mmi), make_tuple(8, 4, &vp8_sixtap_predict8x4_mmi), make_tuple(4, 4, &vp8_sixtap_predict4x4_mmi))); #endif class BilinearPredictTest : public PredictTestBase {}; TEST_P(BilinearPredictTest, TestWithRandomData) { TestWithRandomData(vp8_bilinear_predict16x16_c); } TEST_P(BilinearPredictTest, TestWithUnalignedDst) { TestWithUnalignedDst(vp8_bilinear_predict16x16_c); } TEST_P(BilinearPredictTest, DISABLED_Speed) { const int kCountSpeedTestBlock = 5000000 / (width_ * height_); RunNTimes(kCountSpeedTestBlock); char title[16]; snprintf(title, sizeof(title), "%dx%d", width_, height_); PrintMedian(title); } INSTANTIATE_TEST_CASE_P( C, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_c), make_tuple(8, 8, &vp8_bilinear_predict8x8_c), make_tuple(8, 4, &vp8_bilinear_predict8x4_c), make_tuple(4, 4, &vp8_bilinear_predict4x4_c))); #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_neon), make_tuple(8, 8, &vp8_bilinear_predict8x8_neon), make_tuple(8, 4, &vp8_bilinear_predict8x4_neon), make_tuple(4, 4, &vp8_bilinear_predict4x4_neon))); #endif #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_sse2), make_tuple(8, 8, &vp8_bilinear_predict8x8_sse2), make_tuple(8, 4, &vp8_bilinear_predict8x4_sse2), make_tuple(4, 4, &vp8_bilinear_predict4x4_sse2))); #endif #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( SSSE3, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_ssse3), make_tuple(8, 8, &vp8_bilinear_predict8x8_ssse3))); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, BilinearPredictTest, ::testing::Values(make_tuple(16, 16, &vp8_bilinear_predict16x16_msa), make_tuple(8, 8, &vp8_bilinear_predict8x8_msa), make_tuple(8, 4, &vp8_bilinear_predict8x4_msa), make_tuple(4, 4, &vp8_bilinear_predict4x4_msa))); #endif } // namespace libvpx-1.8.2/test/quantize_test.cc000066400000000000000000000150141357355204000172200ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp8_rtcd.h" #include "./vpx_config.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp8/common/blockd.h" #include "vp8/common/onyx.h" #include "vp8/encoder/block.h" #include "vp8/encoder/onyx_int.h" #include "vp8/encoder/quantize.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" namespace { const int kNumBlocks = 25; const int kNumBlockEntries = 16; typedef void (*VP8Quantize)(BLOCK *b, BLOCKD *d); typedef std::tuple VP8QuantizeParam; using libvpx_test::ACMRandom; using std::make_tuple; // Create and populate a VP8_COMP instance which has a complete set of // quantization inputs as well as a second MACROBLOCKD for output. class QuantizeTestBase { public: virtual ~QuantizeTestBase() { vp8_remove_compressor(&vp8_comp_); vp8_comp_ = NULL; vpx_free(macroblockd_dst_); macroblockd_dst_ = NULL; libvpx_test::ClearSystemState(); } protected: void SetupCompressor() { rnd_.Reset(ACMRandom::DeterministicSeed()); // The full configuration is necessary to generate the quantization tables. VP8_CONFIG vp8_config; memset(&vp8_config, 0, sizeof(vp8_config)); vp8_comp_ = vp8_create_compressor(&vp8_config); // Set the tables based on a quantizer of 0. vp8_set_quantizer(vp8_comp_, 0); // Set up all the block/blockd pointers for the mb in vp8_comp_. vp8cx_frame_init_quantizer(vp8_comp_); // Copy macroblockd from the reference to get pre-set-up dequant values. macroblockd_dst_ = reinterpret_cast( vpx_memalign(32, sizeof(*macroblockd_dst_))); memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_)); // Fix block pointers - currently they point to the blocks in the reference // structure. vp8_setup_block_dptrs(macroblockd_dst_); } void UpdateQuantizer(int q) { vp8_set_quantizer(vp8_comp_, q); memcpy(macroblockd_dst_, &vp8_comp_->mb.e_mbd, sizeof(*macroblockd_dst_)); vp8_setup_block_dptrs(macroblockd_dst_); } void FillCoeffConstant(int16_t c) { for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) { vp8_comp_->mb.coeff[i] = c; } } void FillCoeffRandom() { for (int i = 0; i < kNumBlocks * kNumBlockEntries; ++i) { vp8_comp_->mb.coeff[i] = rnd_.Rand8(); } } void CheckOutput() { EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.qcoeff, macroblockd_dst_->qcoeff, sizeof(*macroblockd_dst_->qcoeff) * kNumBlocks * kNumBlockEntries)) << "qcoeff mismatch"; EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.dqcoeff, macroblockd_dst_->dqcoeff, sizeof(*macroblockd_dst_->dqcoeff) * kNumBlocks * kNumBlockEntries)) << "dqcoeff mismatch"; EXPECT_EQ(0, memcmp(vp8_comp_->mb.e_mbd.eobs, macroblockd_dst_->eobs, sizeof(*macroblockd_dst_->eobs) * kNumBlocks)) << "eobs mismatch"; } VP8_COMP *vp8_comp_; MACROBLOCKD *macroblockd_dst_; private: ACMRandom rnd_; }; class QuantizeTest : public QuantizeTestBase, public ::testing::TestWithParam, public AbstractBench { protected: virtual void SetUp() { SetupCompressor(); asm_quant_ = GET_PARAM(0); c_quant_ = GET_PARAM(1); } virtual void Run() { asm_quant_(&vp8_comp_->mb.block[0], ¯oblockd_dst_->block[0]); } void RunComparison() { for (int i = 0; i < kNumBlocks; ++i) { ASM_REGISTER_STATE_CHECK( c_quant_(&vp8_comp_->mb.block[i], &vp8_comp_->mb.e_mbd.block[i])); ASM_REGISTER_STATE_CHECK( asm_quant_(&vp8_comp_->mb.block[i], ¯oblockd_dst_->block[i])); } CheckOutput(); } private: VP8Quantize asm_quant_; VP8Quantize c_quant_; }; TEST_P(QuantizeTest, TestZeroInput) { FillCoeffConstant(0); RunComparison(); } TEST_P(QuantizeTest, TestLargeNegativeInput) { FillCoeffConstant(0); // Generate a qcoeff which contains 512/-512 (0x0100/0xFE00) to catch issues // like BUG=883 where the constant being compared was incorrectly initialized. vp8_comp_->mb.coeff[0] = -8191; RunComparison(); } TEST_P(QuantizeTest, TestRandomInput) { FillCoeffRandom(); RunComparison(); } TEST_P(QuantizeTest, TestMultipleQ) { for (int q = 0; q < QINDEX_RANGE; ++q) { UpdateQuantizer(q); FillCoeffRandom(); RunComparison(); } } TEST_P(QuantizeTest, DISABLED_Speed) { FillCoeffRandom(); RunNTimes(10000000); PrintMedian("vp8 quantize"); } #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, QuantizeTest, ::testing::Values( make_tuple(&vp8_fast_quantize_b_sse2, &vp8_fast_quantize_b_c), make_tuple(&vp8_regular_quantize_b_sse2, &vp8_regular_quantize_b_c))); #endif // HAVE_SSE2 #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P(SSSE3, QuantizeTest, ::testing::Values(make_tuple(&vp8_fast_quantize_b_ssse3, &vp8_fast_quantize_b_c))); #endif // HAVE_SSSE3 #if HAVE_SSE4_1 INSTANTIATE_TEST_CASE_P( SSE4_1, QuantizeTest, ::testing::Values(make_tuple(&vp8_regular_quantize_b_sse4_1, &vp8_regular_quantize_b_c))); #endif // HAVE_SSE4_1 #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, QuantizeTest, ::testing::Values(make_tuple(&vp8_fast_quantize_b_neon, &vp8_fast_quantize_b_c))); #endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, QuantizeTest, ::testing::Values( make_tuple(&vp8_fast_quantize_b_msa, &vp8_fast_quantize_b_c), make_tuple(&vp8_regular_quantize_b_msa, &vp8_regular_quantize_b_c))); #endif // HAVE_MSA #if HAVE_MMI INSTANTIATE_TEST_CASE_P( MMI, QuantizeTest, ::testing::Values( make_tuple(&vp8_fast_quantize_b_mmi, &vp8_fast_quantize_b_c), make_tuple(&vp8_regular_quantize_b_mmi, &vp8_regular_quantize_b_c))); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/realtime_test.cc000066400000000000000000000041771357355204000171720ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/video_source.h" #include "third_party/googletest/src/include/gtest/gtest.h" namespace { const int kVideoSourceWidth = 320; const int kVideoSourceHeight = 240; const int kFramesToEncode = 2; class RealtimeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: RealtimeTest() : EncoderTest(GET_PARAM(0)), frame_packets_(0) {} virtual ~RealtimeTest() {} virtual void SetUp() { InitializeConfig(); cfg_.g_lag_in_frames = 0; SetMode(::libvpx_test::kRealTime); } virtual void BeginPassHook(unsigned int /*pass*/) { // TODO(tomfinegan): We're changing the pass value here to make sure // we get frames when real time mode is combined with |g_pass| set to // VPX_RC_FIRST_PASS. This is necessary because EncoderTest::RunLoop() sets // the pass value based on the mode passed into EncoderTest::SetMode(), // which overrides the one specified in SetUp() above. cfg_.g_pass = VPX_RC_FIRST_PASS; } virtual void FramePktHook(const vpx_codec_cx_pkt_t * /*pkt*/) { frame_packets_++; } int frame_packets_; }; TEST_P(RealtimeTest, RealtimeFirstPassProducesFrames) { ::libvpx_test::RandomVideoSource video; video.SetSize(kVideoSourceWidth, kVideoSourceHeight); video.set_limit(kFramesToEncode); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_EQ(kFramesToEncode, frame_packets_); } VP8_INSTANTIATE_TEST_CASE(RealtimeTest, ::testing::Values(::libvpx_test::kRealTime)); VP9_INSTANTIATE_TEST_CASE(RealtimeTest, ::testing::Values(::libvpx_test::kRealTime)); } // namespace libvpx-1.8.2/test/register_state_check.h000066400000000000000000000126721357355204000203530ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_REGISTER_STATE_CHECK_H_ #define VPX_TEST_REGISTER_STATE_CHECK_H_ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" // ASM_REGISTER_STATE_CHECK(asm_function) // Minimally validates the environment pre & post function execution. This // variant should be used with assembly functions which are not expected to // fully restore the system state. See platform implementations of // RegisterStateCheck for details. // // API_REGISTER_STATE_CHECK(api_function) // Performs all the checks done by ASM_REGISTER_STATE_CHECK() and any // additional checks to ensure the environment is in a consistent state pre & // post function execution. This variant should be used with API functions. // See platform implementations of RegisterStateCheckXXX for details. // #if defined(_WIN64) && VPX_ARCH_X86_64 #undef NOMINMAX #define NOMINMAX #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #include inline bool operator==(const M128A &lhs, const M128A &rhs) { return (lhs.Low == rhs.Low && lhs.High == rhs.High); } namespace libvpx_test { // Compares the state of xmm[6-15] at construction with their state at // destruction. These registers should be preserved by the callee on // Windows x64. class RegisterStateCheck { public: RegisterStateCheck() { initialized_ = StoreRegisters(&pre_context_); } ~RegisterStateCheck() { Check(); } private: static bool StoreRegisters(CONTEXT *const context) { const HANDLE this_thread = GetCurrentThread(); EXPECT_TRUE(this_thread != NULL); context->ContextFlags = CONTEXT_FLOATING_POINT; const bool context_saved = GetThreadContext(this_thread, context) == TRUE; EXPECT_TRUE(context_saved) << "GetLastError: " << GetLastError(); return context_saved; } // Compares the register state. Returns true if the states match. void Check() const { ASSERT_TRUE(initialized_); CONTEXT post_context; ASSERT_TRUE(StoreRegisters(&post_context)); const M128A *xmm_pre = &pre_context_.Xmm6; const M128A *xmm_post = &post_context.Xmm6; for (int i = 6; i <= 15; ++i) { EXPECT_EQ(*xmm_pre, *xmm_post) << "xmm" << i << " has been modified!"; ++xmm_pre; ++xmm_post; } } bool initialized_; CONTEXT pre_context_; }; #define ASM_REGISTER_STATE_CHECK(statement) \ do { \ libvpx_test::RegisterStateCheck reg_check; \ statement; \ } while (false) } // namespace libvpx_test #elif defined(CONFIG_SHARED) && defined(HAVE_NEON_ASM) && \ defined(CONFIG_VP9) && !CONFIG_SHARED && HAVE_NEON_ASM && CONFIG_VP9 extern "C" { // Save the d8-d15 registers into store. void vpx_push_neon(int64_t *store); } namespace libvpx_test { // Compares the state of d8-d15 at construction with their state at // destruction. These registers should be preserved by the callee on // arm platform. class RegisterStateCheck { public: RegisterStateCheck() { vpx_push_neon(pre_store_); } ~RegisterStateCheck() { Check(); } private: // Compares the register state. Returns true if the states match. void Check() const { int64_t post_store[8]; vpx_push_neon(post_store); for (int i = 0; i < 8; ++i) { EXPECT_EQ(pre_store_[i], post_store[i]) << "d" << i + 8 << " has been modified"; } } int64_t pre_store_[8]; }; #define ASM_REGISTER_STATE_CHECK(statement) \ do { \ libvpx_test::RegisterStateCheck reg_check; \ statement; \ } while (false) } // namespace libvpx_test #else namespace libvpx_test { class RegisterStateCheck {}; #define ASM_REGISTER_STATE_CHECK(statement) statement } // namespace libvpx_test #endif // _WIN64 && VPX_ARCH_X86_64 #if VPX_ARCH_X86 || VPX_ARCH_X86_64 #if defined(__GNUC__) namespace libvpx_test { // Checks the FPU tag word pre/post execution to ensure emms has been called. class RegisterStateCheckMMX { public: RegisterStateCheckMMX() { __asm__ volatile("fstenv %0" : "=rm"(pre_fpu_env_)); } ~RegisterStateCheckMMX() { Check(); } private: // Checks the FPU tag word pre/post execution, returning false if not cleared // to 0xffff. void Check() const { EXPECT_EQ(0xffff, pre_fpu_env_[4]) << "FPU was in an inconsistent state prior to call"; uint16_t post_fpu_env[14]; __asm__ volatile("fstenv %0" : "=rm"(post_fpu_env)); EXPECT_EQ(0xffff, post_fpu_env[4]) << "FPU was left in an inconsistent state after call"; } uint16_t pre_fpu_env_[14]; }; #define API_REGISTER_STATE_CHECK(statement) \ do { \ libvpx_test::RegisterStateCheckMMX reg_check; \ ASM_REGISTER_STATE_CHECK(statement); \ } while (false) } // namespace libvpx_test #endif // __GNUC__ #endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #ifndef API_REGISTER_STATE_CHECK #define API_REGISTER_STATE_CHECK ASM_REGISTER_STATE_CHECK #endif #endif // VPX_TEST_REGISTER_STATE_CHECK_H_ libvpx-1.8.2/test/resize_test.cc000066400000000000000000000563541357355204000166750ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/video_source.h" #include "test/util.h" // Enable(1) or Disable(0) writing of the compressed bitstream. #define WRITE_COMPRESSED_STREAM 0 namespace { #if WRITE_COMPRESSED_STREAM static void mem_put_le16(char *const mem, const unsigned int val) { mem[0] = val; mem[1] = val >> 8; } static void mem_put_le32(char *const mem, const unsigned int val) { mem[0] = val; mem[1] = val >> 8; mem[2] = val >> 16; mem[3] = val >> 24; } static void write_ivf_file_header(const vpx_codec_enc_cfg_t *const cfg, int frame_cnt, FILE *const outfile) { char header[32]; header[0] = 'D'; header[1] = 'K'; header[2] = 'I'; header[3] = 'F'; mem_put_le16(header + 4, 0); /* version */ mem_put_le16(header + 6, 32); /* headersize */ mem_put_le32(header + 8, 0x30395056); /* fourcc (vp9) */ mem_put_le16(header + 12, cfg->g_w); /* width */ mem_put_le16(header + 14, cfg->g_h); /* height */ mem_put_le32(header + 16, cfg->g_timebase.den); /* rate */ mem_put_le32(header + 20, cfg->g_timebase.num); /* scale */ mem_put_le32(header + 24, frame_cnt); /* length */ mem_put_le32(header + 28, 0); /* unused */ (void)fwrite(header, 1, 32, outfile); } static void write_ivf_frame_size(FILE *const outfile, const size_t size) { char header[4]; mem_put_le32(header, static_cast(size)); (void)fwrite(header, 1, 4, outfile); } static void write_ivf_frame_header(const vpx_codec_cx_pkt_t *const pkt, FILE *const outfile) { char header[12]; vpx_codec_pts_t pts; if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; pts = pkt->data.frame.pts; mem_put_le32(header, static_cast(pkt->data.frame.sz)); mem_put_le32(header + 4, pts & 0xFFFFFFFF); mem_put_le32(header + 8, pts >> 32); (void)fwrite(header, 1, 12, outfile); } #endif // WRITE_COMPRESSED_STREAM const unsigned int kInitialWidth = 320; const unsigned int kInitialHeight = 240; struct FrameInfo { FrameInfo(vpx_codec_pts_t _pts, unsigned int _w, unsigned int _h) : pts(_pts), w(_w), h(_h) {} vpx_codec_pts_t pts; unsigned int w; unsigned int h; }; void ScaleForFrameNumber(unsigned int frame, unsigned int initial_w, unsigned int initial_h, unsigned int *w, unsigned int *h, bool flag_codec, bool smaller_width_larger_size_) { if (smaller_width_larger_size_) { if (frame < 30) { *w = initial_w; *h = initial_h; return; } if (frame < 100) { *w = initial_w * 7 / 10; *h = initial_h * 16 / 10; return; } return; } if (frame < 10) { *w = initial_w; *h = initial_h; return; } if (frame < 20) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 30) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 40) { *w = initial_w; *h = initial_h; return; } if (frame < 50) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 60) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 70) { *w = initial_w; *h = initial_h; return; } if (frame < 80) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 90) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 100) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 110) { *w = initial_w; *h = initial_h; return; } if (frame < 120) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 130) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 140) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 150) { *w = initial_w; *h = initial_h; return; } if (frame < 160) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 170) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 180) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 190) { *w = initial_w; *h = initial_h; return; } if (frame < 200) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 210) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 220) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 230) { *w = initial_w; *h = initial_h; return; } if (frame < 240) { *w = initial_w * 3 / 4; *h = initial_h * 3 / 4; return; } if (frame < 250) { *w = initial_w / 2; *h = initial_h / 2; return; } if (frame < 260) { *w = initial_w; *h = initial_h; return; } // Go down very low. if (frame < 270) { *w = initial_w / 4; *h = initial_h / 4; return; } if (flag_codec == 1) { // Cases that only works for VP9. // For VP9: Swap width and height of original. if (frame < 320) { *w = initial_h; *h = initial_w; return; } } *w = initial_w; *h = initial_h; } class ResizingVideoSource : public ::libvpx_test::DummyVideoSource { public: ResizingVideoSource() { SetSize(kInitialWidth, kInitialHeight); limit_ = 350; smaller_width_larger_size_ = false; } bool flag_codec_; bool smaller_width_larger_size_; virtual ~ResizingVideoSource() {} protected: virtual void Next() { ++frame_; unsigned int width; unsigned int height; ScaleForFrameNumber(frame_, kInitialWidth, kInitialHeight, &width, &height, flag_codec_, smaller_width_larger_size_); SetSize(width, height); FillFrame(); } }; class ResizeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: ResizeTest() : EncoderTest(GET_PARAM(0)) {} virtual ~ResizeTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ASSERT_NE(static_cast(pkt->data.frame.width[0]), 0); ASSERT_NE(static_cast(pkt->data.frame.height[0]), 0); encode_frame_width_.push_back(pkt->data.frame.width[0]); encode_frame_height_.push_back(pkt->data.frame.height[0]); } unsigned int GetFrameWidth(size_t idx) const { return encode_frame_width_[idx]; } unsigned int GetFrameHeight(size_t idx) const { return encode_frame_height_[idx]; } virtual void DecompressedFrameHook(const vpx_image_t &img, vpx_codec_pts_t pts) { frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); } std::vector frame_info_list_; std::vector encode_frame_width_; std::vector encode_frame_height_; }; TEST_P(ResizeTest, TestExternalResizeWorks) { ResizingVideoSource video; video.flag_codec_ = false; video.smaller_width_larger_size_ = false; cfg_.g_lag_in_frames = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const unsigned int frame = static_cast(info->pts); unsigned int expected_w; unsigned int expected_h; const size_t idx = info - frame_info_list_.begin(); ASSERT_EQ(info->w, GetFrameWidth(idx)); ASSERT_EQ(info->h, GetFrameHeight(idx)); ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, &expected_h, video.flag_codec_, video.smaller_width_larger_size_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) << "Frame " << frame << " had unexpected height"; } } const unsigned int kStepDownFrame = 3; const unsigned int kStepUpFrame = 6; class ResizeInternalTest : public ResizeTest { protected: #if WRITE_COMPRESSED_STREAM ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {} #else ResizeInternalTest() : ResizeTest(), frame0_psnr_(0.0) {} #endif virtual ~ResizeInternalTest() {} virtual void BeginPassHook(unsigned int /*pass*/) { #if WRITE_COMPRESSED_STREAM outfile_ = fopen("vp90-2-05-resize.ivf", "wb"); #endif } virtual void EndPassHook() { #if WRITE_COMPRESSED_STREAM if (outfile_) { if (!fseek(outfile_, 0, SEEK_SET)) write_ivf_file_header(&cfg_, out_frames_, outfile_); fclose(outfile_); outfile_ = NULL; } #endif } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (change_config_) { int new_q = 60; if (video->frame() == 0) { struct vpx_scaling_mode mode = { VP8E_ONETWO, VP8E_ONETWO }; encoder->Control(VP8E_SET_SCALEMODE, &mode); } if (video->frame() == 1) { struct vpx_scaling_mode mode = { VP8E_NORMAL, VP8E_NORMAL }; encoder->Control(VP8E_SET_SCALEMODE, &mode); cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = new_q; encoder->Config(&cfg_); } } else { if (video->frame() == kStepDownFrame) { struct vpx_scaling_mode mode = { VP8E_FOURFIVE, VP8E_THREEFIVE }; encoder->Control(VP8E_SET_SCALEMODE, &mode); } if (video->frame() == kStepUpFrame) { struct vpx_scaling_mode mode = { VP8E_NORMAL, VP8E_NORMAL }; encoder->Control(VP8E_SET_SCALEMODE, &mode); } } } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0); } #if WRITE_COMPRESSED_STREAM virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++out_frames_; // Write initial file header if first frame. if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); // Write frame header and data. write_ivf_frame_header(pkt, outfile_); (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); } #endif double frame0_psnr_; bool change_config_; #if WRITE_COMPRESSED_STREAM FILE *outfile_; unsigned int out_frames_; #endif }; TEST_P(ResizeInternalTest, TestInternalResizeWorks) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 10); init_flags_ = VPX_CODEC_USE_PSNR; change_config_ = false; // q picked such that initial keyframe on this clip is ~30dB PSNR cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; // If the number of frames being encoded is smaller than g_lag_in_frames // the encoded frame is unavailable using the current API. Comparing // frames to detect mismatch would then not be possible. Set // g_lag_in_frames = 0 to get around this. cfg_.g_lag_in_frames = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const vpx_codec_pts_t pts = info->pts; if (pts >= kStepDownFrame && pts < kStepUpFrame) { ASSERT_EQ(282U, info->w) << "Frame " << pts << " had unexpected width"; ASSERT_EQ(173U, info->h) << "Frame " << pts << " had unexpected height"; } else { EXPECT_EQ(352U, info->w) << "Frame " << pts << " had unexpected width"; EXPECT_EQ(288U, info->h) << "Frame " << pts << " had unexpected height"; } } } TEST_P(ResizeInternalTest, TestInternalResizeChangeConfig) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 10); cfg_.g_w = 352; cfg_.g_h = 288; change_config_ = true; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } class ResizeRealtimeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: ResizeRealtimeTest() : EncoderTest(GET_PARAM(0)) {} virtual ~ResizeRealtimeTest() {} virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP9E_SET_AQ_MODE, 3); encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); } if (change_bitrate_ && video->frame() == 120) { change_bitrate_ = false; cfg_.rc_target_bitrate = 500; encoder->Config(&cfg_); } } virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); set_cpu_used_ = GET_PARAM(2); } virtual void DecompressedFrameHook(const vpx_image_t &img, vpx_codec_pts_t pts) { frame_info_list_.push_back(FrameInfo(pts, img.d_w, img.d_h)); } virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) { double mismatch_psnr = compute_psnr(img1, img2); mismatch_psnr_ += mismatch_psnr; ++mismatch_nframes_; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ASSERT_NE(static_cast(pkt->data.frame.width[0]), 0); ASSERT_NE(static_cast(pkt->data.frame.height[0]), 0); encode_frame_width_.push_back(pkt->data.frame.width[0]); encode_frame_height_.push_back(pkt->data.frame.height[0]); } unsigned int GetMismatchFrames() { return mismatch_nframes_; } unsigned int GetFrameWidth(size_t idx) const { return encode_frame_width_[idx]; } unsigned int GetFrameHeight(size_t idx) const { return encode_frame_height_[idx]; } void DefaultConfig() { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_undershoot_pct = 50; cfg_.rc_overshoot_pct = 50; cfg_.rc_end_usage = VPX_CBR; cfg_.kf_mode = VPX_KF_AUTO; cfg_.g_lag_in_frames = 0; cfg_.kf_min_dist = cfg_.kf_max_dist = 3000; // Enable dropped frames. cfg_.rc_dropframe_thresh = 1; // Enable error_resilience mode. cfg_.g_error_resilient = 1; // Enable dynamic resizing. cfg_.rc_resize_allowed = 1; // Run at low bitrate. cfg_.rc_target_bitrate = 200; } std::vector frame_info_list_; int set_cpu_used_; bool change_bitrate_; double mismatch_psnr_; int mismatch_nframes_; std::vector encode_frame_width_; std::vector encode_frame_height_; }; TEST_P(ResizeRealtimeTest, TestExternalResizeWorks) { ResizingVideoSource video; video.flag_codec_ = true; video.smaller_width_larger_size_ = false; DefaultConfig(); // Disable internal resize for this test. cfg_.rc_resize_allowed = 0; change_bitrate_ = false; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const unsigned int frame = static_cast(info->pts); unsigned int expected_w; unsigned int expected_h; ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, &expected_h, video.flag_codec_, video.smaller_width_larger_size_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) << "Frame " << frame << " had unexpected height"; EXPECT_EQ(static_cast(0), GetMismatchFrames()); } } TEST_P(ResizeRealtimeTest, DISABLED_TestExternalResizeSmallerWidthBiggerSize) { ResizingVideoSource video; video.flag_codec_ = true; video.smaller_width_larger_size_ = true; DefaultConfig(); // Disable internal resize for this test. cfg_.rc_resize_allowed = 0; change_bitrate_ = false; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const unsigned int frame = static_cast(info->pts); unsigned int expected_w; unsigned int expected_h; ScaleForFrameNumber(frame, kInitialWidth, kInitialHeight, &expected_w, &expected_h, video.flag_codec_, video.smaller_width_larger_size_); EXPECT_EQ(expected_w, info->w) << "Frame " << frame << " had unexpected width"; EXPECT_EQ(expected_h, info->h) << "Frame " << frame << " had unexpected height"; EXPECT_EQ(static_cast(0), GetMismatchFrames()); } } // Verify the dynamic resizer behavior for real time, 1 pass CBR mode. // Run at low bitrate, with resize_allowed = 1, and verify that we get // one resize down event. TEST_P(ResizeRealtimeTest, TestInternalResizeDown) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 299); DefaultConfig(); cfg_.g_w = 352; cfg_.g_h = 288; change_bitrate_ = false; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); unsigned int last_w = cfg_.g_w; unsigned int last_h = cfg_.g_h; int resize_count = 0; for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { if (info->w != last_w || info->h != last_h) { // Verify that resize down occurs. ASSERT_LT(info->w, last_w); ASSERT_LT(info->h, last_h); last_w = info->w; last_h = info->h; resize_count++; } } #if CONFIG_VP9_DECODER // Verify that we get 1 resize down event in this test. ASSERT_EQ(1, resize_count) << "Resizing should occur."; EXPECT_EQ(static_cast(0), GetMismatchFrames()); #else printf("Warning: VP9 decoder unavailable, unable to check resize count!\n"); #endif } // Verify the dynamic resizer behavior for real time, 1 pass CBR mode. // Start at low target bitrate, raise the bitrate in the middle of the clip, // scaling-up should occur after bitrate changed. TEST_P(ResizeRealtimeTest, TestInternalResizeDownUpChangeBitRate) { ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 359); DefaultConfig(); cfg_.g_w = 352; cfg_.g_h = 288; change_bitrate_ = true; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; // Disable dropped frames. cfg_.rc_dropframe_thresh = 0; // Starting bitrate low. cfg_.rc_target_bitrate = 80; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); unsigned int last_w = cfg_.g_w; unsigned int last_h = cfg_.g_h; int resize_count = 0; for (std::vector::const_iterator info = frame_info_list_.begin(); info != frame_info_list_.end(); ++info) { const size_t idx = info - frame_info_list_.begin(); ASSERT_EQ(info->w, GetFrameWidth(idx)); ASSERT_EQ(info->h, GetFrameHeight(idx)); if (info->w != last_w || info->h != last_h) { resize_count++; if (resize_count == 1) { // Verify that resize down occurs. ASSERT_LT(info->w, last_w); ASSERT_LT(info->h, last_h); } else if (resize_count == 2) { // Verify that resize up occurs. ASSERT_GT(info->w, last_w); ASSERT_GT(info->h, last_h); } last_w = info->w; last_h = info->h; } } #if CONFIG_VP9_DECODER // Verify that we get 2 resize events in this test. ASSERT_EQ(resize_count, 2) << "Resizing should occur twice."; EXPECT_EQ(static_cast(0), GetMismatchFrames()); #else printf("Warning: VP9 decoder unavailable, unable to check resize count!\n"); #endif } vpx_img_fmt_t CspForFrameNumber(int frame) { if (frame < 10) return VPX_IMG_FMT_I420; if (frame < 20) return VPX_IMG_FMT_I444; return VPX_IMG_FMT_I420; } class ResizeCspTest : public ResizeTest { protected: #if WRITE_COMPRESSED_STREAM ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0), outfile_(NULL), out_frames_(0) {} #else ResizeCspTest() : ResizeTest(), frame0_psnr_(0.0) {} #endif virtual ~ResizeCspTest() {} virtual void BeginPassHook(unsigned int /*pass*/) { #if WRITE_COMPRESSED_STREAM outfile_ = fopen("vp91-2-05-cspchape.ivf", "wb"); #endif } virtual void EndPassHook() { #if WRITE_COMPRESSED_STREAM if (outfile_) { if (!fseek(outfile_, 0, SEEK_SET)) write_ivf_file_header(&cfg_, out_frames_, outfile_); fclose(outfile_); outfile_ = NULL; } #endif } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (CspForFrameNumber(video->frame()) != VPX_IMG_FMT_I420 && cfg_.g_profile != 1) { cfg_.g_profile = 1; encoder->Config(&cfg_); } if (CspForFrameNumber(video->frame()) == VPX_IMG_FMT_I420 && cfg_.g_profile != 0) { cfg_.g_profile = 0; encoder->Config(&cfg_); } } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { if (frame0_psnr_ == 0.) frame0_psnr_ = pkt->data.psnr.psnr[0]; EXPECT_NEAR(pkt->data.psnr.psnr[0], frame0_psnr_, 2.0); } #if WRITE_COMPRESSED_STREAM virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { ++out_frames_; // Write initial file header if first frame. if (pkt->data.frame.pts == 0) write_ivf_file_header(&cfg_, 0, outfile_); // Write frame header and data. write_ivf_frame_header(pkt, outfile_); (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, outfile_); } #endif double frame0_psnr_; #if WRITE_COMPRESSED_STREAM FILE *outfile_; unsigned int out_frames_; #endif }; class ResizingCspVideoSource : public ::libvpx_test::DummyVideoSource { public: ResizingCspVideoSource() { SetSize(kInitialWidth, kInitialHeight); limit_ = 30; } virtual ~ResizingCspVideoSource() {} protected: virtual void Next() { ++frame_; SetImageFormat(CspForFrameNumber(frame_)); FillFrame(); } }; TEST_P(ResizeCspTest, TestResizeCspWorks) { ResizingCspVideoSource video; init_flags_ = VPX_CODEC_USE_PSNR; cfg_.rc_min_quantizer = cfg_.rc_max_quantizer = 48; cfg_.g_lag_in_frames = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP8_INSTANTIATE_TEST_CASE(ResizeTest, ONE_PASS_TEST_MODES); VP9_INSTANTIATE_TEST_CASE(ResizeTest, ::testing::Values(::libvpx_test::kRealTime)); VP9_INSTANTIATE_TEST_CASE(ResizeInternalTest, ::testing::Values(::libvpx_test::kOnePassBest)); VP9_INSTANTIATE_TEST_CASE(ResizeRealtimeTest, ::testing::Values(::libvpx_test::kRealTime), ::testing::Range(5, 9)); VP9_INSTANTIATE_TEST_CASE(ResizeCspTest, ::testing::Values(::libvpx_test::kRealTime)); } // namespace libvpx-1.8.2/test/resize_util.sh000077500000000000000000000044421357355204000167120ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx resize_util example code. To add new tests to ## this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to resize_util_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. resize_util_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Resizes $YUV_RAW_INPUT using the resize_util example. $1 is the output # dimensions that will be passed to resize_util. resize_util() { local resizer="${LIBVPX_BIN_PATH}/resize_util${VPX_TEST_EXE_SUFFIX}" local output_file="${VPX_TEST_OUTPUT_DIR}/resize_util.raw" local frames_to_resize="10" local target_dimensions="$1" # resize_util is available only when CONFIG_SHARED is disabled. if [ -z "$(vpx_config_option_enabled CONFIG_SHARED)" ]; then if [ ! -x "${resizer}" ]; then elog "${resizer} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${resizer}" "${YUV_RAW_INPUT}" \ "${YUV_RAW_INPUT_WIDTH}x${YUV_RAW_INPUT_HEIGHT}" \ "${target_dimensions}" "${output_file}" ${frames_to_resize} \ ${devnull} [ -e "${output_file}" ] || return 1 fi } # Halves each dimension of $YUV_RAW_INPUT using resize_util(). resize_down() { local target_width=$((${YUV_RAW_INPUT_WIDTH} / 2)) local target_height=$((${YUV_RAW_INPUT_HEIGHT} / 2)) resize_util "${target_width}x${target_height}" } # Doubles each dimension of $YUV_RAW_INPUT using resize_util(). resize_up() { local target_width=$((${YUV_RAW_INPUT_WIDTH} * 2)) local target_height=$((${YUV_RAW_INPUT_HEIGHT} * 2)) resize_util "${target_width}x${target_height}" } resize_util_tests="resize_down resize_up" run_tests resize_util_verify_environment "${resize_util_tests}" libvpx-1.8.2/test/sad_test.cc000066400000000000000000001333511357355204000161340ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx/vpx_codec.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/msvc.h" #include "vpx_ports/vpx_timer.h" template struct TestParams { TestParams(int w, int h, Function f, int bd = -1) : width(w), height(h), bit_depth(bd), func(f) {} int width, height, bit_depth; Function func; }; typedef unsigned int (*SadMxNFunc)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); typedef TestParams SadMxNParam; typedef unsigned int (*SadMxNAvgFunc)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); typedef TestParams SadMxNAvgParam; typedef void (*SadMxNx4Func)(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_ptr[], int ref_stride, unsigned int *sad_array); typedef TestParams SadMxNx4Param; typedef void (*SadMxNx8Func)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); typedef TestParams SadMxNx8Param; using libvpx_test::ACMRandom; namespace { template class SADTestBase : public ::testing::TestWithParam { public: explicit SADTestBase(const ParamType ¶ms) : params_(params) {} virtual void SetUp() { source_data8_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBlockSize)); reference_data8_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize)); second_pred8_ = reinterpret_cast(vpx_memalign(kDataAlignment, 64 * 64)); source_data16_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBlockSize * sizeof(uint16_t))); reference_data16_ = reinterpret_cast( vpx_memalign(kDataAlignment, kDataBufferSize * sizeof(uint16_t))); second_pred16_ = reinterpret_cast( vpx_memalign(kDataAlignment, 64 * 64 * sizeof(uint16_t))); if (params_.bit_depth == -1) { use_high_bit_depth_ = false; bit_depth_ = VPX_BITS_8; source_data_ = source_data8_; reference_data_ = reference_data8_; second_pred_ = second_pred8_; #if CONFIG_VP9_HIGHBITDEPTH } else { use_high_bit_depth_ = true; bit_depth_ = static_cast(params_.bit_depth); source_data_ = CONVERT_TO_BYTEPTR(source_data16_); reference_data_ = CONVERT_TO_BYTEPTR(reference_data16_); second_pred_ = CONVERT_TO_BYTEPTR(second_pred16_); #endif // CONFIG_VP9_HIGHBITDEPTH } mask_ = (1 << bit_depth_) - 1; source_stride_ = (params_.width + 63) & ~63; reference_stride_ = params_.width * 2; rnd_.Reset(ACMRandom::DeterministicSeed()); } virtual void TearDown() { vpx_free(source_data8_); source_data8_ = NULL; vpx_free(reference_data8_); reference_data8_ = NULL; vpx_free(second_pred8_); second_pred8_ = NULL; vpx_free(source_data16_); source_data16_ = NULL; vpx_free(reference_data16_); reference_data16_ = NULL; vpx_free(second_pred16_); second_pred16_ = NULL; libvpx_test::ClearSystemState(); } protected: // Handle blocks up to 4 blocks 64x64 with stride up to 128 // crbug.com/webm/1660 // const[expr] should be sufficient for DECLARE_ALIGNED but early // implementations of c++11 appear to have some issues with it. enum { kDataAlignment = 32 }; static const int kDataBlockSize = 64 * 128; static const int kDataBufferSize = 4 * kDataBlockSize; int GetBlockRefOffset(int block_idx) const { return block_idx * kDataBlockSize; } uint8_t *GetReferenceFromOffset(int ref_offset) const { assert((params_.height - 1) * reference_stride_ + params_.width - 1 + ref_offset < kDataBufferSize); #if CONFIG_VP9_HIGHBITDEPTH if (use_high_bit_depth_) { return CONVERT_TO_BYTEPTR(CONVERT_TO_SHORTPTR(reference_data_) + ref_offset); } #endif // CONFIG_VP9_HIGHBITDEPTH return reference_data_ + ref_offset; } uint8_t *GetReference(int block_idx) const { return GetReferenceFromOffset(GetBlockRefOffset(block_idx)); } // Sum of Absolute Differences. Given two blocks, calculate the absolute // difference between two pixels in the same relative location; accumulate. uint32_t ReferenceSAD(int ref_offset) const { uint32_t sad = 0; const uint8_t *const reference8 = GetReferenceFromOffset(ref_offset); const uint8_t *const source8 = source_data_; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *const reference16 = CONVERT_TO_SHORTPTR(GetReferenceFromOffset(ref_offset)); const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); #endif // CONFIG_VP9_HIGHBITDEPTH for (int h = 0; h < params_.height; ++h) { for (int w = 0; w < params_.width; ++w) { if (!use_high_bit_depth_) { sad += abs(source8[h * source_stride_ + w] - reference8[h * reference_stride_ + w]); #if CONFIG_VP9_HIGHBITDEPTH } else { sad += abs(source16[h * source_stride_ + w] - reference16[h * reference_stride_ + w]); #endif // CONFIG_VP9_HIGHBITDEPTH } } } return sad; } // Sum of Absolute Differences Average. Given two blocks, and a prediction // calculate the absolute difference between one pixel and average of the // corresponding and predicted pixels; accumulate. unsigned int ReferenceSADavg(int block_idx) const { unsigned int sad = 0; const uint8_t *const reference8 = GetReference(block_idx); const uint8_t *const source8 = source_data_; const uint8_t *const second_pred8 = second_pred_; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *const reference16 = CONVERT_TO_SHORTPTR(GetReference(block_idx)); const uint16_t *const source16 = CONVERT_TO_SHORTPTR(source_data_); const uint16_t *const second_pred16 = CONVERT_TO_SHORTPTR(second_pred_); #endif // CONFIG_VP9_HIGHBITDEPTH for (int h = 0; h < params_.height; ++h) { for (int w = 0; w < params_.width; ++w) { if (!use_high_bit_depth_) { const int tmp = second_pred8[h * params_.width + w] + reference8[h * reference_stride_ + w]; const uint8_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); sad += abs(source8[h * source_stride_ + w] - comp_pred); #if CONFIG_VP9_HIGHBITDEPTH } else { const int tmp = second_pred16[h * params_.width + w] + reference16[h * reference_stride_ + w]; const uint16_t comp_pred = ROUND_POWER_OF_TWO(tmp, 1); sad += abs(source16[h * source_stride_ + w] - comp_pred); #endif // CONFIG_VP9_HIGHBITDEPTH } } } return sad; } void FillConstant(uint8_t *data, int stride, uint16_t fill_constant) const { uint8_t *data8 = data; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *data16 = CONVERT_TO_SHORTPTR(data); #endif // CONFIG_VP9_HIGHBITDEPTH for (int h = 0; h < params_.height; ++h) { for (int w = 0; w < params_.width; ++w) { if (!use_high_bit_depth_) { data8[h * stride + w] = static_cast(fill_constant); #if CONFIG_VP9_HIGHBITDEPTH } else { data16[h * stride + w] = fill_constant; #endif // CONFIG_VP9_HIGHBITDEPTH } } } } void FillRandomWH(uint8_t *data, int stride, int w, int h) { uint8_t *data8 = data; #if CONFIG_VP9_HIGHBITDEPTH uint16_t *data16 = CONVERT_TO_SHORTPTR(data); #endif // CONFIG_VP9_HIGHBITDEPTH for (int r = 0; r < h; ++r) { for (int c = 0; c < w; ++c) { if (!use_high_bit_depth_) { data8[r * stride + c] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { data16[r * stride + c] = rnd_.Rand16() & mask_; #endif // CONFIG_VP9_HIGHBITDEPTH } } } } void FillRandom(uint8_t *data, int stride) { FillRandomWH(data, stride, params_.width, params_.height); } uint32_t mask_; vpx_bit_depth_t bit_depth_; int source_stride_; int reference_stride_; bool use_high_bit_depth_; uint8_t *source_data_; uint8_t *reference_data_; uint8_t *second_pred_; uint8_t *source_data8_; uint8_t *reference_data8_; uint8_t *second_pred8_; uint16_t *source_data16_; uint16_t *reference_data16_; uint16_t *second_pred16_; ACMRandom rnd_; ParamType params_; }; class SADx8Test : public SADTestBase { public: SADx8Test() : SADTestBase(GetParam()) {} protected: void SADs(unsigned int *results) const { const uint8_t *reference = GetReferenceFromOffset(0); ASM_REGISTER_STATE_CHECK(params_.func( source_data_, source_stride_, reference, reference_stride_, results)); } void CheckSADs() const { uint32_t reference_sad; DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[8]); SADs(exp_sad); for (int offset = 0; offset < 8; ++offset) { reference_sad = ReferenceSAD(offset); EXPECT_EQ(reference_sad, exp_sad[offset]) << "offset " << offset; } } }; class SADx4Test : public SADTestBase { public: SADx4Test() : SADTestBase(GetParam()) {} protected: void SADs(unsigned int *results) const { const uint8_t *references[] = { GetReference(0), GetReference(1), GetReference(2), GetReference(3) }; ASM_REGISTER_STATE_CHECK(params_.func( source_data_, source_stride_, references, reference_stride_, results)); } void CheckSADs() const { uint32_t reference_sad; DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]); SADs(exp_sad); for (int block = 0; block < 4; ++block) { reference_sad = ReferenceSAD(GetBlockRefOffset(block)); EXPECT_EQ(reference_sad, exp_sad[block]) << "block " << block; } } }; class SADTest : public AbstractBench, public SADTestBase { public: SADTest() : SADTestBase(GetParam()) {} protected: unsigned int SAD(int block_idx) const { unsigned int ret; const uint8_t *const reference = GetReference(block_idx); ASM_REGISTER_STATE_CHECK(ret = params_.func(source_data_, source_stride_, reference, reference_stride_)); return ret; } void CheckSAD() const { const unsigned int reference_sad = ReferenceSAD(GetBlockRefOffset(0)); const unsigned int exp_sad = SAD(0); ASSERT_EQ(reference_sad, exp_sad); } void Run() { params_.func(source_data_, source_stride_, reference_data_, reference_stride_); } }; class SADavgTest : public SADTestBase { public: SADavgTest() : SADTestBase(GetParam()) {} protected: unsigned int SAD_avg(int block_idx) const { unsigned int ret; const uint8_t *const reference = GetReference(block_idx); ASM_REGISTER_STATE_CHECK(ret = params_.func(source_data_, source_stride_, reference, reference_stride_, second_pred_)); return ret; } void CheckSAD() const { const unsigned int reference_sad = ReferenceSADavg(0); const unsigned int exp_sad = SAD_avg(0); ASSERT_EQ(reference_sad, exp_sad); } }; TEST_P(SADTest, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(reference_data_, reference_stride_, mask_); CheckSAD(); } TEST_P(SADTest, MaxSrc) { FillConstant(source_data_, source_stride_, mask_); FillConstant(reference_data_, reference_stride_, 0); CheckSAD(); } TEST_P(SADTest, ShortRef) { const int tmp_stride = reference_stride_; reference_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); CheckSAD(); reference_stride_ = tmp_stride; } TEST_P(SADTest, UnalignedRef) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. const int tmp_stride = reference_stride_; reference_stride_ -= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); CheckSAD(); reference_stride_ = tmp_stride; } TEST_P(SADTest, ShortSrc) { const int tmp_stride = source_stride_; source_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); CheckSAD(); source_stride_ = tmp_stride; } TEST_P(SADTest, DISABLED_Speed) { const int kCountSpeedTestBlock = 50000000 / (params_.width * params_.height); FillRandom(source_data_, source_stride_); RunNTimes(kCountSpeedTestBlock); char title[16]; snprintf(title, sizeof(title), "%dx%d", params_.width, params_.height); PrintMedian(title); } TEST_P(SADavgTest, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(reference_data_, reference_stride_, mask_); FillConstant(second_pred_, params_.width, 0); CheckSAD(); } TEST_P(SADavgTest, MaxSrc) { FillConstant(source_data_, source_stride_, mask_); FillConstant(reference_data_, reference_stride_, 0); FillConstant(second_pred_, params_.width, 0); CheckSAD(); } TEST_P(SADavgTest, ShortRef) { const int tmp_stride = reference_stride_; reference_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); FillRandom(second_pred_, params_.width); CheckSAD(); reference_stride_ = tmp_stride; } TEST_P(SADavgTest, UnalignedRef) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. const int tmp_stride = reference_stride_; reference_stride_ -= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); FillRandom(second_pred_, params_.width); CheckSAD(); reference_stride_ = tmp_stride; } TEST_P(SADavgTest, ShortSrc) { const int tmp_stride = source_stride_; source_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(reference_data_, reference_stride_); FillRandom(second_pred_, params_.width); CheckSAD(); source_stride_ = tmp_stride; } TEST_P(SADx4Test, MaxRef) { FillConstant(source_data_, source_stride_, 0); FillConstant(GetReference(0), reference_stride_, mask_); FillConstant(GetReference(1), reference_stride_, mask_); FillConstant(GetReference(2), reference_stride_, mask_); FillConstant(GetReference(3), reference_stride_, mask_); CheckSADs(); } TEST_P(SADx4Test, MaxSrc) { FillConstant(source_data_, source_stride_, mask_); FillConstant(GetReference(0), reference_stride_, 0); FillConstant(GetReference(1), reference_stride_, 0); FillConstant(GetReference(2), reference_stride_, 0); FillConstant(GetReference(3), reference_stride_, 0); CheckSADs(); } TEST_P(SADx4Test, ShortRef) { int tmp_stride = reference_stride_; reference_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(GetReference(0), reference_stride_); FillRandom(GetReference(1), reference_stride_); FillRandom(GetReference(2), reference_stride_); FillRandom(GetReference(3), reference_stride_); CheckSADs(); reference_stride_ = tmp_stride; } TEST_P(SADx4Test, UnalignedRef) { // The reference frame, but not the source frame, may be unaligned for // certain types of searches. int tmp_stride = reference_stride_; reference_stride_ -= 1; FillRandom(source_data_, source_stride_); FillRandom(GetReference(0), reference_stride_); FillRandom(GetReference(1), reference_stride_); FillRandom(GetReference(2), reference_stride_); FillRandom(GetReference(3), reference_stride_); CheckSADs(); reference_stride_ = tmp_stride; } TEST_P(SADx4Test, ShortSrc) { int tmp_stride = source_stride_; source_stride_ >>= 1; FillRandom(source_data_, source_stride_); FillRandom(GetReference(0), reference_stride_); FillRandom(GetReference(1), reference_stride_); FillRandom(GetReference(2), reference_stride_); FillRandom(GetReference(3), reference_stride_); CheckSADs(); source_stride_ = tmp_stride; } TEST_P(SADx4Test, SrcAlignedByWidth) { uint8_t *tmp_source_data = source_data_; source_data_ += params_.width; FillRandom(source_data_, source_stride_); FillRandom(GetReference(0), reference_stride_); FillRandom(GetReference(1), reference_stride_); FillRandom(GetReference(2), reference_stride_); FillRandom(GetReference(3), reference_stride_); CheckSADs(); source_data_ = tmp_source_data; } TEST_P(SADx4Test, DISABLED_Speed) { int tmp_stride = reference_stride_; reference_stride_ -= 1; FillRandom(source_data_, source_stride_); FillRandom(GetReference(0), reference_stride_); FillRandom(GetReference(1), reference_stride_); FillRandom(GetReference(2), reference_stride_); FillRandom(GetReference(3), reference_stride_); const int kCountSpeedTestBlock = 500000000 / (params_.width * params_.height); uint32_t reference_sad[4]; DECLARE_ALIGNED(kDataAlignment, uint32_t, exp_sad[4]); vpx_usec_timer timer; memset(reference_sad, 0, sizeof(reference_sad)); SADs(exp_sad); vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { for (int block = 0; block < 4; ++block) { reference_sad[block] = ReferenceSAD(GetBlockRefOffset(block)); } } vpx_usec_timer_mark(&timer); for (int block = 0; block < 4; ++block) { EXPECT_EQ(reference_sad[block], exp_sad[block]) << "block " << block; } const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer) / 1000); printf("sad%dx%dx4 (%2dbit) time: %5d ms\n", params_.width, params_.height, bit_depth_, elapsed_time); reference_stride_ = tmp_stride; } TEST_P(SADx8Test, Regular) { FillRandomWH(source_data_, source_stride_, params_.width, params_.height); FillRandomWH(GetReferenceFromOffset(0), reference_stride_, params_.width + 8, params_.height); CheckSADs(); } //------------------------------------------------------------------------------ // C functions const SadMxNParam c_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_c), SadMxNParam(64, 32, &vpx_sad64x32_c), SadMxNParam(32, 64, &vpx_sad32x64_c), SadMxNParam(32, 32, &vpx_sad32x32_c), SadMxNParam(32, 16, &vpx_sad32x16_c), SadMxNParam(16, 32, &vpx_sad16x32_c), SadMxNParam(16, 16, &vpx_sad16x16_c), SadMxNParam(16, 8, &vpx_sad16x8_c), SadMxNParam(8, 16, &vpx_sad8x16_c), SadMxNParam(8, 8, &vpx_sad8x8_c), SadMxNParam(8, 4, &vpx_sad8x4_c), SadMxNParam(4, 8, &vpx_sad4x8_c), SadMxNParam(4, 4, &vpx_sad4x4_c), #if CONFIG_VP9_HIGHBITDEPTH SadMxNParam(64, 64, &vpx_highbd_sad64x64_c, 8), SadMxNParam(64, 32, &vpx_highbd_sad64x32_c, 8), SadMxNParam(32, 64, &vpx_highbd_sad32x64_c, 8), SadMxNParam(32, 32, &vpx_highbd_sad32x32_c, 8), SadMxNParam(32, 16, &vpx_highbd_sad32x16_c, 8), SadMxNParam(16, 32, &vpx_highbd_sad16x32_c, 8), SadMxNParam(16, 16, &vpx_highbd_sad16x16_c, 8), SadMxNParam(16, 8, &vpx_highbd_sad16x8_c, 8), SadMxNParam(8, 16, &vpx_highbd_sad8x16_c, 8), SadMxNParam(8, 8, &vpx_highbd_sad8x8_c, 8), SadMxNParam(8, 4, &vpx_highbd_sad8x4_c, 8), SadMxNParam(4, 8, &vpx_highbd_sad4x8_c, 8), SadMxNParam(4, 4, &vpx_highbd_sad4x4_c, 8), SadMxNParam(64, 64, &vpx_highbd_sad64x64_c, 10), SadMxNParam(64, 32, &vpx_highbd_sad64x32_c, 10), SadMxNParam(32, 64, &vpx_highbd_sad32x64_c, 10), SadMxNParam(32, 32, &vpx_highbd_sad32x32_c, 10), SadMxNParam(32, 16, &vpx_highbd_sad32x16_c, 10), SadMxNParam(16, 32, &vpx_highbd_sad16x32_c, 10), SadMxNParam(16, 16, &vpx_highbd_sad16x16_c, 10), SadMxNParam(16, 8, &vpx_highbd_sad16x8_c, 10), SadMxNParam(8, 16, &vpx_highbd_sad8x16_c, 10), SadMxNParam(8, 8, &vpx_highbd_sad8x8_c, 10), SadMxNParam(8, 4, &vpx_highbd_sad8x4_c, 10), SadMxNParam(4, 8, &vpx_highbd_sad4x8_c, 10), SadMxNParam(4, 4, &vpx_highbd_sad4x4_c, 10), SadMxNParam(64, 64, &vpx_highbd_sad64x64_c, 12), SadMxNParam(64, 32, &vpx_highbd_sad64x32_c, 12), SadMxNParam(32, 64, &vpx_highbd_sad32x64_c, 12), SadMxNParam(32, 32, &vpx_highbd_sad32x32_c, 12), SadMxNParam(32, 16, &vpx_highbd_sad32x16_c, 12), SadMxNParam(16, 32, &vpx_highbd_sad16x32_c, 12), SadMxNParam(16, 16, &vpx_highbd_sad16x16_c, 12), SadMxNParam(16, 8, &vpx_highbd_sad16x8_c, 12), SadMxNParam(8, 16, &vpx_highbd_sad8x16_c, 12), SadMxNParam(8, 8, &vpx_highbd_sad8x8_c, 12), SadMxNParam(8, 4, &vpx_highbd_sad8x4_c, 12), SadMxNParam(4, 8, &vpx_highbd_sad4x8_c, 12), SadMxNParam(4, 4, &vpx_highbd_sad4x4_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADTest, ::testing::ValuesIn(c_tests)); const SadMxNAvgParam avg_c_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_c), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_c), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_c), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_c), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_c), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_c), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_c), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_c), SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_c), SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_c), SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_c), SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_c), SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_c), #if CONFIG_VP9_HIGHBITDEPTH SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_c, 8), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_c, 8), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_c, 8), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_c, 8), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_c, 8), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_c, 8), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_c, 8), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_c, 8), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_c, 8), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_c, 8), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_c, 8), SadMxNAvgParam(4, 8, &vpx_highbd_sad4x8_avg_c, 8), SadMxNAvgParam(4, 4, &vpx_highbd_sad4x4_avg_c, 8), SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_c, 10), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_c, 10), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_c, 10), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_c, 10), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_c, 10), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_c, 10), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_c, 10), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_c, 10), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_c, 10), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_c, 10), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_c, 10), SadMxNAvgParam(4, 8, &vpx_highbd_sad4x8_avg_c, 10), SadMxNAvgParam(4, 4, &vpx_highbd_sad4x4_avg_c, 10), SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_c, 12), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_c, 12), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_c, 12), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_c, 12), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_c, 12), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_c, 12), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_c, 12), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_c, 12), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_c, 12), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_c, 12), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_c, 12), SadMxNAvgParam(4, 8, &vpx_highbd_sad4x8_avg_c, 12), SadMxNAvgParam(4, 4, &vpx_highbd_sad4x4_avg_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADavgTest, ::testing::ValuesIn(avg_c_tests)); const SadMxNx4Param x4d_c_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_c), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_c), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_c), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_c), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_c), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_c), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_c), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_c), SadMxNx4Param(8, 16, &vpx_sad8x16x4d_c), SadMxNx4Param(8, 8, &vpx_sad8x8x4d_c), SadMxNx4Param(8, 4, &vpx_sad8x4x4d_c), SadMxNx4Param(4, 8, &vpx_sad4x8x4d_c), SadMxNx4Param(4, 4, &vpx_sad4x4x4d_c), #if CONFIG_VP9_HIGHBITDEPTH SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_c, 8), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_c, 8), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_c, 8), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_c, 8), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_c, 8), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_c, 8), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_c, 8), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_c, 8), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_c, 8), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_c, 8), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_c, 8), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_c, 8), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_c, 8), SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_c, 10), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_c, 10), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_c, 10), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_c, 10), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_c, 10), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_c, 10), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_c, 10), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_c, 10), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_c, 10), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_c, 10), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_c, 10), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_c, 10), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_c, 10), SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_c, 12), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_c, 12), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_c, 12), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_c, 12), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_c, 12), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_c, 12), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_c, 12), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_c, 12), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_c, 12), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_c, 12), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_c, 12), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_c, 12), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_c, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(C, SADx4Test, ::testing::ValuesIn(x4d_c_tests)); // TODO(angiebird): implement the marked-down sad functions const SadMxNx8Param x8_c_tests[] = { // SadMxNx8Param(64, 64, &vpx_sad64x64x8_c), // SadMxNx8Param(64, 32, &vpx_sad64x32x8_c), // SadMxNx8Param(32, 64, &vpx_sad32x64x8_c), SadMxNx8Param(32, 32, &vpx_sad32x32x8_c), // SadMxNx8Param(32, 16, &vpx_sad32x16x8_c), // SadMxNx8Param(16, 32, &vpx_sad16x32x8_c), SadMxNx8Param(16, 16, &vpx_sad16x16x8_c), SadMxNx8Param(16, 8, &vpx_sad16x8x8_c), SadMxNx8Param(8, 16, &vpx_sad8x16x8_c), SadMxNx8Param(8, 8, &vpx_sad8x8x8_c), // SadMxNx8Param(8, 4, &vpx_sad8x4x8_c), // SadMxNx8Param(4, 8, &vpx_sad4x8x8_c), SadMxNx8Param(4, 4, &vpx_sad4x4x8_c), }; INSTANTIATE_TEST_CASE_P(C, SADx8Test, ::testing::ValuesIn(x8_c_tests)); //------------------------------------------------------------------------------ // ARM functions #if HAVE_NEON const SadMxNParam neon_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_neon), SadMxNParam(64, 32, &vpx_sad64x32_neon), SadMxNParam(32, 32, &vpx_sad32x32_neon), SadMxNParam(16, 32, &vpx_sad16x32_neon), SadMxNParam(16, 16, &vpx_sad16x16_neon), SadMxNParam(16, 8, &vpx_sad16x8_neon), SadMxNParam(8, 16, &vpx_sad8x16_neon), SadMxNParam(8, 8, &vpx_sad8x8_neon), SadMxNParam(8, 4, &vpx_sad8x4_neon), SadMxNParam(4, 8, &vpx_sad4x8_neon), SadMxNParam(4, 4, &vpx_sad4x4_neon), }; INSTANTIATE_TEST_CASE_P(NEON, SADTest, ::testing::ValuesIn(neon_tests)); const SadMxNAvgParam avg_neon_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_neon), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_neon), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_neon), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_neon), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_neon), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_neon), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_neon), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_neon), SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_neon), SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_neon), SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_neon), SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_neon), SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_neon), }; INSTANTIATE_TEST_CASE_P(NEON, SADavgTest, ::testing::ValuesIn(avg_neon_tests)); const SadMxNx4Param x4d_neon_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_neon), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_neon), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_neon), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_neon), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_neon), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_neon), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_neon), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_neon), SadMxNx4Param(8, 16, &vpx_sad8x16x4d_neon), SadMxNx4Param(8, 8, &vpx_sad8x8x4d_neon), SadMxNx4Param(8, 4, &vpx_sad8x4x4d_neon), SadMxNx4Param(4, 8, &vpx_sad4x8x4d_neon), SadMxNx4Param(4, 4, &vpx_sad4x4x4d_neon), }; INSTANTIATE_TEST_CASE_P(NEON, SADx4Test, ::testing::ValuesIn(x4d_neon_tests)); #endif // HAVE_NEON //------------------------------------------------------------------------------ // x86 functions #if HAVE_SSE2 const SadMxNParam sse2_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_sse2), SadMxNParam(64, 32, &vpx_sad64x32_sse2), SadMxNParam(32, 64, &vpx_sad32x64_sse2), SadMxNParam(32, 32, &vpx_sad32x32_sse2), SadMxNParam(32, 16, &vpx_sad32x16_sse2), SadMxNParam(16, 32, &vpx_sad16x32_sse2), SadMxNParam(16, 16, &vpx_sad16x16_sse2), SadMxNParam(16, 8, &vpx_sad16x8_sse2), SadMxNParam(8, 16, &vpx_sad8x16_sse2), SadMxNParam(8, 8, &vpx_sad8x8_sse2), SadMxNParam(8, 4, &vpx_sad8x4_sse2), SadMxNParam(4, 8, &vpx_sad4x8_sse2), SadMxNParam(4, 4, &vpx_sad4x4_sse2), #if CONFIG_VP9_HIGHBITDEPTH SadMxNParam(64, 64, &vpx_highbd_sad64x64_sse2, 8), SadMxNParam(64, 32, &vpx_highbd_sad64x32_sse2, 8), SadMxNParam(32, 64, &vpx_highbd_sad32x64_sse2, 8), SadMxNParam(32, 32, &vpx_highbd_sad32x32_sse2, 8), SadMxNParam(32, 16, &vpx_highbd_sad32x16_sse2, 8), SadMxNParam(16, 32, &vpx_highbd_sad16x32_sse2, 8), SadMxNParam(16, 16, &vpx_highbd_sad16x16_sse2, 8), SadMxNParam(16, 8, &vpx_highbd_sad16x8_sse2, 8), SadMxNParam(8, 16, &vpx_highbd_sad8x16_sse2, 8), SadMxNParam(8, 8, &vpx_highbd_sad8x8_sse2, 8), SadMxNParam(8, 4, &vpx_highbd_sad8x4_sse2, 8), SadMxNParam(64, 64, &vpx_highbd_sad64x64_sse2, 10), SadMxNParam(64, 32, &vpx_highbd_sad64x32_sse2, 10), SadMxNParam(32, 64, &vpx_highbd_sad32x64_sse2, 10), SadMxNParam(32, 32, &vpx_highbd_sad32x32_sse2, 10), SadMxNParam(32, 16, &vpx_highbd_sad32x16_sse2, 10), SadMxNParam(16, 32, &vpx_highbd_sad16x32_sse2, 10), SadMxNParam(16, 16, &vpx_highbd_sad16x16_sse2, 10), SadMxNParam(16, 8, &vpx_highbd_sad16x8_sse2, 10), SadMxNParam(8, 16, &vpx_highbd_sad8x16_sse2, 10), SadMxNParam(8, 8, &vpx_highbd_sad8x8_sse2, 10), SadMxNParam(8, 4, &vpx_highbd_sad8x4_sse2, 10), SadMxNParam(64, 64, &vpx_highbd_sad64x64_sse2, 12), SadMxNParam(64, 32, &vpx_highbd_sad64x32_sse2, 12), SadMxNParam(32, 64, &vpx_highbd_sad32x64_sse2, 12), SadMxNParam(32, 32, &vpx_highbd_sad32x32_sse2, 12), SadMxNParam(32, 16, &vpx_highbd_sad32x16_sse2, 12), SadMxNParam(16, 32, &vpx_highbd_sad16x32_sse2, 12), SadMxNParam(16, 16, &vpx_highbd_sad16x16_sse2, 12), SadMxNParam(16, 8, &vpx_highbd_sad16x8_sse2, 12), SadMxNParam(8, 16, &vpx_highbd_sad8x16_sse2, 12), SadMxNParam(8, 8, &vpx_highbd_sad8x8_sse2, 12), SadMxNParam(8, 4, &vpx_highbd_sad8x4_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADTest, ::testing::ValuesIn(sse2_tests)); const SadMxNAvgParam avg_sse2_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_sse2), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_sse2), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_sse2), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_sse2), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_sse2), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_sse2), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_sse2), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_sse2), SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_sse2), SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_sse2), SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_sse2), SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_sse2), SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_sse2), #if CONFIG_VP9_HIGHBITDEPTH SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_sse2, 8), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_sse2, 8), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_sse2, 8), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_sse2, 8), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_sse2, 8), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_sse2, 8), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_sse2, 8), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_sse2, 8), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_sse2, 8), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_sse2, 8), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_sse2, 8), SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_sse2, 10), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_sse2, 10), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_sse2, 10), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_sse2, 10), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_sse2, 10), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_sse2, 10), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_sse2, 10), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_sse2, 10), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_sse2, 10), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_sse2, 10), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_sse2, 10), SadMxNAvgParam(64, 64, &vpx_highbd_sad64x64_avg_sse2, 12), SadMxNAvgParam(64, 32, &vpx_highbd_sad64x32_avg_sse2, 12), SadMxNAvgParam(32, 64, &vpx_highbd_sad32x64_avg_sse2, 12), SadMxNAvgParam(32, 32, &vpx_highbd_sad32x32_avg_sse2, 12), SadMxNAvgParam(32, 16, &vpx_highbd_sad32x16_avg_sse2, 12), SadMxNAvgParam(16, 32, &vpx_highbd_sad16x32_avg_sse2, 12), SadMxNAvgParam(16, 16, &vpx_highbd_sad16x16_avg_sse2, 12), SadMxNAvgParam(16, 8, &vpx_highbd_sad16x8_avg_sse2, 12), SadMxNAvgParam(8, 16, &vpx_highbd_sad8x16_avg_sse2, 12), SadMxNAvgParam(8, 8, &vpx_highbd_sad8x8_avg_sse2, 12), SadMxNAvgParam(8, 4, &vpx_highbd_sad8x4_avg_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADavgTest, ::testing::ValuesIn(avg_sse2_tests)); const SadMxNx4Param x4d_sse2_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_sse2), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_sse2), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_sse2), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_sse2), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_sse2), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_sse2), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_sse2), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_sse2), SadMxNx4Param(8, 16, &vpx_sad8x16x4d_sse2), SadMxNx4Param(8, 8, &vpx_sad8x8x4d_sse2), SadMxNx4Param(8, 4, &vpx_sad8x4x4d_sse2), SadMxNx4Param(4, 8, &vpx_sad4x8x4d_sse2), SadMxNx4Param(4, 4, &vpx_sad4x4x4d_sse2), #if CONFIG_VP9_HIGHBITDEPTH SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_sse2, 8), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_sse2, 8), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_sse2, 8), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_sse2, 8), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_sse2, 8), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_sse2, 8), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_sse2, 8), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_sse2, 8), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_sse2, 8), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_sse2, 8), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_sse2, 8), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_sse2, 8), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_sse2, 8), SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_sse2, 10), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_sse2, 10), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_sse2, 10), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_sse2, 10), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_sse2, 10), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_sse2, 10), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_sse2, 10), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_sse2, 10), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_sse2, 10), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_sse2, 10), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_sse2, 10), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_sse2, 10), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_sse2, 10), SadMxNx4Param(64, 64, &vpx_highbd_sad64x64x4d_sse2, 12), SadMxNx4Param(64, 32, &vpx_highbd_sad64x32x4d_sse2, 12), SadMxNx4Param(32, 64, &vpx_highbd_sad32x64x4d_sse2, 12), SadMxNx4Param(32, 32, &vpx_highbd_sad32x32x4d_sse2, 12), SadMxNx4Param(32, 16, &vpx_highbd_sad32x16x4d_sse2, 12), SadMxNx4Param(16, 32, &vpx_highbd_sad16x32x4d_sse2, 12), SadMxNx4Param(16, 16, &vpx_highbd_sad16x16x4d_sse2, 12), SadMxNx4Param(16, 8, &vpx_highbd_sad16x8x4d_sse2, 12), SadMxNx4Param(8, 16, &vpx_highbd_sad8x16x4d_sse2, 12), SadMxNx4Param(8, 8, &vpx_highbd_sad8x8x4d_sse2, 12), SadMxNx4Param(8, 4, &vpx_highbd_sad8x4x4d_sse2, 12), SadMxNx4Param(4, 8, &vpx_highbd_sad4x8x4d_sse2, 12), SadMxNx4Param(4, 4, &vpx_highbd_sad4x4x4d_sse2, 12), #endif // CONFIG_VP9_HIGHBITDEPTH }; INSTANTIATE_TEST_CASE_P(SSE2, SADx4Test, ::testing::ValuesIn(x4d_sse2_tests)); #endif // HAVE_SSE2 #if HAVE_SSE3 // Only functions are x3, which do not have tests. #endif // HAVE_SSE3 #if HAVE_SSSE3 // Only functions are x3, which do not have tests. #endif // HAVE_SSSE3 #if HAVE_SSE4_1 const SadMxNx8Param x8_sse4_1_tests[] = { SadMxNx8Param(16, 16, &vpx_sad16x16x8_sse4_1), SadMxNx8Param(16, 8, &vpx_sad16x8x8_sse4_1), SadMxNx8Param(8, 16, &vpx_sad8x16x8_sse4_1), SadMxNx8Param(8, 8, &vpx_sad8x8x8_sse4_1), SadMxNx8Param(4, 4, &vpx_sad4x4x8_sse4_1), }; INSTANTIATE_TEST_CASE_P(SSE4_1, SADx8Test, ::testing::ValuesIn(x8_sse4_1_tests)); #endif // HAVE_SSE4_1 #if HAVE_AVX2 const SadMxNParam avx2_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_avx2), SadMxNParam(64, 32, &vpx_sad64x32_avx2), SadMxNParam(32, 64, &vpx_sad32x64_avx2), SadMxNParam(32, 32, &vpx_sad32x32_avx2), SadMxNParam(32, 16, &vpx_sad32x16_avx2), }; INSTANTIATE_TEST_CASE_P(AVX2, SADTest, ::testing::ValuesIn(avx2_tests)); const SadMxNAvgParam avg_avx2_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_avx2), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_avx2), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_avx2), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_avx2), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_avx2), }; INSTANTIATE_TEST_CASE_P(AVX2, SADavgTest, ::testing::ValuesIn(avg_avx2_tests)); const SadMxNx4Param x4d_avx2_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_avx2), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_avx2), }; INSTANTIATE_TEST_CASE_P(AVX2, SADx4Test, ::testing::ValuesIn(x4d_avx2_tests)); const SadMxNx8Param x8_avx2_tests[] = { // SadMxNx8Param(64, 64, &vpx_sad64x64x8_c), SadMxNx8Param(32, 32, &vpx_sad32x32x8_avx2), }; INSTANTIATE_TEST_CASE_P(AVX2, SADx8Test, ::testing::ValuesIn(x8_avx2_tests)); #endif // HAVE_AVX2 #if HAVE_AVX512 const SadMxNx4Param x4d_avx512_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_avx512), }; INSTANTIATE_TEST_CASE_P(AVX512, SADx4Test, ::testing::ValuesIn(x4d_avx512_tests)); #endif // HAVE_AVX512 //------------------------------------------------------------------------------ // MIPS functions #if HAVE_MSA const SadMxNParam msa_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_msa), SadMxNParam(64, 32, &vpx_sad64x32_msa), SadMxNParam(32, 64, &vpx_sad32x64_msa), SadMxNParam(32, 32, &vpx_sad32x32_msa), SadMxNParam(32, 16, &vpx_sad32x16_msa), SadMxNParam(16, 32, &vpx_sad16x32_msa), SadMxNParam(16, 16, &vpx_sad16x16_msa), SadMxNParam(16, 8, &vpx_sad16x8_msa), SadMxNParam(8, 16, &vpx_sad8x16_msa), SadMxNParam(8, 8, &vpx_sad8x8_msa), SadMxNParam(8, 4, &vpx_sad8x4_msa), SadMxNParam(4, 8, &vpx_sad4x8_msa), SadMxNParam(4, 4, &vpx_sad4x4_msa), }; INSTANTIATE_TEST_CASE_P(MSA, SADTest, ::testing::ValuesIn(msa_tests)); const SadMxNAvgParam avg_msa_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_msa), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_msa), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_msa), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_msa), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_msa), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_msa), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_msa), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_msa), SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_msa), SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_msa), SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_msa), SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_msa), SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_msa), }; INSTANTIATE_TEST_CASE_P(MSA, SADavgTest, ::testing::ValuesIn(avg_msa_tests)); const SadMxNx4Param x4d_msa_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_msa), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_msa), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_msa), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_msa), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_msa), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_msa), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_msa), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_msa), SadMxNx4Param(8, 16, &vpx_sad8x16x4d_msa), SadMxNx4Param(8, 8, &vpx_sad8x8x4d_msa), SadMxNx4Param(8, 4, &vpx_sad8x4x4d_msa), SadMxNx4Param(4, 8, &vpx_sad4x8x4d_msa), SadMxNx4Param(4, 4, &vpx_sad4x4x4d_msa), }; INSTANTIATE_TEST_CASE_P(MSA, SADx4Test, ::testing::ValuesIn(x4d_msa_tests)); #endif // HAVE_MSA //------------------------------------------------------------------------------ // VSX functions #if HAVE_VSX const SadMxNParam vsx_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_vsx), SadMxNParam(64, 32, &vpx_sad64x32_vsx), SadMxNParam(32, 64, &vpx_sad32x64_vsx), SadMxNParam(32, 32, &vpx_sad32x32_vsx), SadMxNParam(32, 16, &vpx_sad32x16_vsx), SadMxNParam(16, 32, &vpx_sad16x32_vsx), SadMxNParam(16, 16, &vpx_sad16x16_vsx), SadMxNParam(16, 8, &vpx_sad16x8_vsx), SadMxNParam(8, 16, &vpx_sad8x16_vsx), SadMxNParam(8, 8, &vpx_sad8x8_vsx), SadMxNParam(8, 4, &vpx_sad8x4_vsx), }; INSTANTIATE_TEST_CASE_P(VSX, SADTest, ::testing::ValuesIn(vsx_tests)); const SadMxNAvgParam avg_vsx_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_vsx), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_vsx), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_vsx), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_vsx), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_vsx), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_vsx), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_vsx), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_vsx), }; INSTANTIATE_TEST_CASE_P(VSX, SADavgTest, ::testing::ValuesIn(avg_vsx_tests)); const SadMxNx4Param x4d_vsx_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_vsx), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_vsx), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_vsx), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_vsx), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_vsx), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_vsx), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_vsx), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_vsx), }; INSTANTIATE_TEST_CASE_P(VSX, SADx4Test, ::testing::ValuesIn(x4d_vsx_tests)); #endif // HAVE_VSX //------------------------------------------------------------------------------ // Loongson functions #if HAVE_MMI const SadMxNParam mmi_tests[] = { SadMxNParam(64, 64, &vpx_sad64x64_mmi), SadMxNParam(64, 32, &vpx_sad64x32_mmi), SadMxNParam(32, 64, &vpx_sad32x64_mmi), SadMxNParam(32, 32, &vpx_sad32x32_mmi), SadMxNParam(32, 16, &vpx_sad32x16_mmi), SadMxNParam(16, 32, &vpx_sad16x32_mmi), SadMxNParam(16, 16, &vpx_sad16x16_mmi), SadMxNParam(16, 8, &vpx_sad16x8_mmi), SadMxNParam(8, 16, &vpx_sad8x16_mmi), SadMxNParam(8, 8, &vpx_sad8x8_mmi), SadMxNParam(8, 4, &vpx_sad8x4_mmi), SadMxNParam(4, 8, &vpx_sad4x8_mmi), SadMxNParam(4, 4, &vpx_sad4x4_mmi), }; INSTANTIATE_TEST_CASE_P(MMI, SADTest, ::testing::ValuesIn(mmi_tests)); const SadMxNAvgParam avg_mmi_tests[] = { SadMxNAvgParam(64, 64, &vpx_sad64x64_avg_mmi), SadMxNAvgParam(64, 32, &vpx_sad64x32_avg_mmi), SadMxNAvgParam(32, 64, &vpx_sad32x64_avg_mmi), SadMxNAvgParam(32, 32, &vpx_sad32x32_avg_mmi), SadMxNAvgParam(32, 16, &vpx_sad32x16_avg_mmi), SadMxNAvgParam(16, 32, &vpx_sad16x32_avg_mmi), SadMxNAvgParam(16, 16, &vpx_sad16x16_avg_mmi), SadMxNAvgParam(16, 8, &vpx_sad16x8_avg_mmi), SadMxNAvgParam(8, 16, &vpx_sad8x16_avg_mmi), SadMxNAvgParam(8, 8, &vpx_sad8x8_avg_mmi), SadMxNAvgParam(8, 4, &vpx_sad8x4_avg_mmi), SadMxNAvgParam(4, 8, &vpx_sad4x8_avg_mmi), SadMxNAvgParam(4, 4, &vpx_sad4x4_avg_mmi), }; INSTANTIATE_TEST_CASE_P(MMI, SADavgTest, ::testing::ValuesIn(avg_mmi_tests)); const SadMxNx4Param x4d_mmi_tests[] = { SadMxNx4Param(64, 64, &vpx_sad64x64x4d_mmi), SadMxNx4Param(64, 32, &vpx_sad64x32x4d_mmi), SadMxNx4Param(32, 64, &vpx_sad32x64x4d_mmi), SadMxNx4Param(32, 32, &vpx_sad32x32x4d_mmi), SadMxNx4Param(32, 16, &vpx_sad32x16x4d_mmi), SadMxNx4Param(16, 32, &vpx_sad16x32x4d_mmi), SadMxNx4Param(16, 16, &vpx_sad16x16x4d_mmi), SadMxNx4Param(16, 8, &vpx_sad16x8x4d_mmi), SadMxNx4Param(8, 16, &vpx_sad8x16x4d_mmi), SadMxNx4Param(8, 8, &vpx_sad8x8x4d_mmi), SadMxNx4Param(8, 4, &vpx_sad8x4x4d_mmi), SadMxNx4Param(4, 8, &vpx_sad4x8x4d_mmi), SadMxNx4Param(4, 4, &vpx_sad4x4x4d_mmi), }; INSTANTIATE_TEST_CASE_P(MMI, SADx4Test, ::testing::ValuesIn(x4d_mmi_tests)); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/set_maps.sh000077500000000000000000000034011357355204000161610ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx set_maps example. To add new tests to this file, ## do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to set_maps_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required, and set_maps must exist in # $LIBVPX_BIN_PATH. set_maps_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ -z "$(vpx_tool_path set_maps)" ]; then elog "set_maps not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi } # Runs set_maps using the codec specified by $1. set_maps() { local encoder="$(vpx_tool_path set_maps)" local codec="$1" local output_file="${VPX_TEST_OUTPUT_DIR}/set_maps_${codec}.ivf" eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" \ ${devnull} [ -e "${output_file}" ] || return 1 } set_maps_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then set_maps vp8 || return 1 fi } set_maps_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then set_maps vp9 || return 1 fi } set_maps_tests="set_maps_vp8 set_maps_vp9" run_tests set_maps_verify_environment "${set_maps_tests}" libvpx-1.8.2/test/set_roi.cc000066400000000000000000000137111357355204000157670ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" using libvpx_test::ACMRandom; namespace { TEST(VP8RoiMapTest, ParameterCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); int delta_q[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; int delta_lf[MAX_MB_SEGMENTS] = { -2, -25, 0, 31 }; unsigned int threshold[MAX_MB_SEGMENTS] = { 0, 100, 200, 300 }; const int internalq_trans[] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124, 127, }; // Initialize elements of cpi with valid defaults. VP8_COMP cpi; cpi.mb.e_mbd.mb_segement_abs_delta = SEGMENT_DELTADATA; cpi.cyclic_refresh_mode_enabled = 0; cpi.mb.e_mbd.segmentation_enabled = 0; cpi.mb.e_mbd.update_mb_segmentation_map = 0; cpi.mb.e_mbd.update_mb_segmentation_data = 0; cpi.common.mb_rows = 240 >> 4; cpi.common.mb_cols = 320 >> 4; const int mbs = (cpi.common.mb_rows * cpi.common.mb_cols); memset(cpi.segment_feature_data, 0, sizeof(cpi.segment_feature_data)); // Segment map cpi.segmentation_map = reinterpret_cast(vpx_calloc(mbs, 1)); // Allocate memory for the source memory map. unsigned char *roi_map = reinterpret_cast(vpx_calloc(mbs, 1)); memset(&roi_map[mbs >> 2], 1, (mbs >> 2)); memset(&roi_map[mbs >> 1], 2, (mbs >> 2)); memset(&roi_map[mbs - (mbs >> 2)], 3, (mbs >> 2)); // Do a test call with valid parameters. int roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols, delta_q, delta_lf, threshold); EXPECT_EQ(0, roi_retval) << "vp8_set_roimap roi failed with default test parameters"; // Check that the values in the cpi structure get set as expected. if (roi_retval == 0) { // Check that the segment map got set. const int mapcompare = memcmp(roi_map, cpi.segmentation_map, mbs); EXPECT_EQ(0, mapcompare) << "segment map error"; // Check the q deltas (note the need to translate into // the interanl range of 0-127. for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { const int transq = internalq_trans[abs(delta_q[i])]; if (abs(cpi.segment_feature_data[MB_LVL_ALT_Q][i]) != transq) { EXPECT_EQ(transq, cpi.segment_feature_data[MB_LVL_ALT_Q][i]) << "segment delta_q error"; break; } } // Check the loop filter deltas for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { if (cpi.segment_feature_data[MB_LVL_ALT_LF][i] != delta_lf[i]) { EXPECT_EQ(delta_lf[i], cpi.segment_feature_data[MB_LVL_ALT_LF][i]) << "segment delta_lf error"; break; } } // Check the breakout thresholds for (int i = 0; i < MAX_MB_SEGMENTS; ++i) { unsigned int breakout = static_cast(cpi.segment_encode_breakout[i]); if (threshold[i] != breakout) { EXPECT_EQ(threshold[i], breakout) << "breakout threshold error"; break; } } // Segmentation, and segmentation update flages should be set. EXPECT_EQ(1, cpi.mb.e_mbd.segmentation_enabled) << "segmentation_enabled error"; EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_map) << "update_mb_segmentation_map error"; EXPECT_EQ(1, cpi.mb.e_mbd.update_mb_segmentation_data) << "update_mb_segmentation_data error"; // Try a range of delta q and lf parameters (some legal, some not) for (int i = 0; i < 1000; ++i) { int rand_deltas[4]; int deltas_valid; rand_deltas[0] = rnd(160) - 80; rand_deltas[1] = rnd(160) - 80; rand_deltas[2] = rnd(160) - 80; rand_deltas[3] = rnd(160) - 80; deltas_valid = ((abs(rand_deltas[0]) <= 63) && (abs(rand_deltas[1]) <= 63) && (abs(rand_deltas[2]) <= 63) && (abs(rand_deltas[3]) <= 63)) ? 0 : -1; // Test with random delta q values. roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols, rand_deltas, delta_lf, threshold); EXPECT_EQ(deltas_valid, roi_retval) << "dq range check error"; // One delta_q error shown at a time if (deltas_valid != roi_retval) break; // Test with random loop filter values. roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols, delta_q, rand_deltas, threshold); EXPECT_EQ(deltas_valid, roi_retval) << "dlf range check error"; // One delta loop filter error shown at a time if (deltas_valid != roi_retval) break; } // Test invalid number of rows or colums. roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows + 1, cpi.common.mb_cols, delta_q, delta_lf, threshold); EXPECT_EQ(-1, roi_retval) << "MB rows bounds check error"; roi_retval = vp8_set_roimap(&cpi, roi_map, cpi.common.mb_rows, cpi.common.mb_cols - 1, delta_q, delta_lf, threshold); EXPECT_EQ(-1, roi_retval) << "MB cols bounds check error"; } // Free allocated memory if (cpi.segmentation_map) vpx_free(cpi.segmentation_map); if (roi_map) vpx_free(roi_map); }; } // namespace libvpx-1.8.2/test/simple_decoder.sh000077500000000000000000000036641357355204000173370ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx simple_decoder example code. To add new tests to ## this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to simple_decoder_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: Make sure input is available: # $VP8_IVF_FILE and $VP9_IVF_FILE are required. simple_decoder_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_IVF_FILE}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs simple_decoder using $1 as input file. $2 is the codec name, and is used # solely to name the output file. simple_decoder() { local decoder="${LIBVPX_BIN_PATH}/simple_decoder${VPX_TEST_EXE_SUFFIX}" local input_file="$1" local codec="$2" local output_file="${VPX_TEST_OUTPUT_DIR}/simple_decoder_${codec}.raw" if [ ! -x "${decoder}" ]; then elog "${decoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${decoder}" "${input_file}" "${output_file}" \ ${devnull} [ -e "${output_file}" ] || return 1 } simple_decoder_vp8() { if [ "$(vp8_decode_available)" = "yes" ]; then simple_decoder "${VP8_IVF_FILE}" vp8 || return 1 fi } simple_decoder_vp9() { if [ "$(vp9_decode_available)" = "yes" ]; then simple_decoder "${VP9_IVF_FILE}" vp9 || return 1 fi } simple_decoder_tests="simple_decoder_vp8 simple_decoder_vp9" run_tests simple_decoder_verify_environment "${simple_decoder_tests}" libvpx-1.8.2/test/simple_encode_test.cc000066400000000000000000000144361357355204000201750ustar00rootroot00000000000000#include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "vp9/simple_encode.h" namespace vp9 { namespace { // TODO(angirbid): Find a better way to construct encode info const int w = 352; const int h = 288; const int frame_rate_num = 30; const int frame_rate_den = 1; const int target_bitrate = 1000; const int num_frames = 17; const char infile_path[] = "bus_352x288_420_f20_b8.yuv"; double GetBitrateInKbps(size_t bit_size, int num_frames, int frame_rate_num, int frame_rate_den) { return static_cast(bit_size) / num_frames * frame_rate_num / frame_rate_den / 1000.0; } TEST(SimpleEncode, ComputeFirstPassStats) { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); std::vector> frame_stats = simple_encode.ObserveFirstPassStats(); EXPECT_EQ(frame_stats.size(), static_cast(num_frames)); size_t data_num = frame_stats[0].size(); // Read ObserveFirstPassStats before changing FIRSTPASS_STATS. EXPECT_EQ(data_num, static_cast(25)); for (size_t i = 0; i < frame_stats.size(); ++i) { EXPECT_EQ(frame_stats[i].size(), data_num); // FIRSTPASS_STATS's first element is frame EXPECT_EQ(frame_stats[i][0], i); // FIRSTPASS_STATS's last element is count, and the count is 1 for single // frame stats EXPECT_EQ(frame_stats[i][data_num - 1], 1); } } TEST(SimpleEncode, GetCodingFrameNum) { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); int num_coding_frames = simple_encode.GetCodingFrameNum(); EXPECT_EQ(num_coding_frames, 19); } TEST(SimpleEncode, EncodeFrame) { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); int num_coding_frames = simple_encode.GetCodingFrameNum(); EXPECT_GE(num_coding_frames, num_frames); // The coding frames include actual show frames and alternate reference // frames, i.e. no show frame. int ref_num_alternate_refereces = num_coding_frames - num_frames; int num_alternate_refereces = 0; simple_encode.StartEncode(); size_t total_data_bit_size = 0; for (int i = 0; i < num_coding_frames; ++i) { EncodeFrameResult encode_frame_result; simple_encode.EncodeFrame(&encode_frame_result); if (i == 0) { EXPECT_EQ(encode_frame_result.show_idx, 0); EXPECT_EQ(encode_frame_result.frame_type, kKeyFrame) << "The first coding frame should be key frame"; } if (encode_frame_result.frame_type == kAlternateReference) { ++num_alternate_refereces; } EXPECT_GE(encode_frame_result.show_idx, 0); EXPECT_LT(encode_frame_result.show_idx, num_frames); if (i == num_coding_frames - 1) { EXPECT_EQ(encode_frame_result.show_idx, num_frames - 1) << "The last coding frame should be the last display order"; } EXPECT_GE(encode_frame_result.psnr, 34) << "The psnr is supposed to be greater than 34 given the " "target_bitrate 1000 kbps"; total_data_bit_size += encode_frame_result.coding_data_bit_size; } EXPECT_EQ(num_alternate_refereces, ref_num_alternate_refereces); const double bitrate = GetBitrateInKbps(total_data_bit_size, num_frames, frame_rate_num, frame_rate_den); const double off_target_threshold = 150; EXPECT_LE(fabs(target_bitrate - bitrate), off_target_threshold); simple_encode.EndEncode(); } TEST(SimpleEncode, EncodeFrameWithQuantizeIndex) { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); int num_coding_frames = simple_encode.GetCodingFrameNum(); simple_encode.StartEncode(); for (int i = 0; i < num_coding_frames; ++i) { const int assigned_quantize_index = 100 + i; EncodeFrameResult encode_frame_result; simple_encode.EncodeFrameWithQuantizeIndex(&encode_frame_result, assigned_quantize_index); EXPECT_EQ(encode_frame_result.quantize_index, assigned_quantize_index); } simple_encode.EndEncode(); } TEST(SimpleEncode, EncodeConsistencyTest) { std::vector quantize_index_list; std::vector ref_sse_list; std::vector ref_psnr_list; std::vector ref_bit_size_list; { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); const int num_coding_frames = simple_encode.GetCodingFrameNum(); simple_encode.StartEncode(); for (int i = 0; i < num_coding_frames; ++i) { EncodeFrameResult encode_frame_result; simple_encode.EncodeFrame(&encode_frame_result); quantize_index_list.push_back(encode_frame_result.quantize_index); ref_sse_list.push_back(encode_frame_result.sse); ref_psnr_list.push_back(encode_frame_result.psnr); ref_bit_size_list.push_back(encode_frame_result.coding_data_bit_size); } simple_encode.EndEncode(); } { SimpleEncode simple_encode(w, h, frame_rate_num, frame_rate_den, target_bitrate, num_frames, infile_path); simple_encode.ComputeFirstPassStats(); const int num_coding_frames = simple_encode.GetCodingFrameNum(); EXPECT_EQ(static_cast(num_coding_frames), quantize_index_list.size()); simple_encode.StartEncode(); for (int i = 0; i < num_coding_frames; ++i) { EncodeFrameResult encode_frame_result; simple_encode.EncodeFrameWithQuantizeIndex(&encode_frame_result, quantize_index_list[i]); EXPECT_EQ(encode_frame_result.quantize_index, quantize_index_list[i]); EXPECT_EQ(encode_frame_result.sse, ref_sse_list[i]); EXPECT_DOUBLE_EQ(encode_frame_result.psnr, ref_psnr_list[i]); EXPECT_EQ(encode_frame_result.coding_data_bit_size, ref_bit_size_list[i]); } simple_encode.EndEncode(); } } } // namespace } // namespace vp9 libvpx-1.8.2/test/simple_encoder.sh000077500000000000000000000035251357355204000173450ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx simple_encoder example. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to simple_encoder_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. simple_encoder_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs simple_encoder using the codec specified by $1 with a frame limit of 100. simple_encoder() { local encoder="${LIBVPX_BIN_PATH}/simple_encoder${VPX_TEST_EXE_SUFFIX}" local codec="$1" local output_file="${VPX_TEST_OUTPUT_DIR}/simple_encoder_${codec}.ivf" if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 9999 0 100 \ ${devnull} [ -e "${output_file}" ] || return 1 } simple_encoder_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then simple_encoder vp8 || return 1 fi } simple_encoder_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then simple_encoder vp9 || return 1 fi } simple_encoder_tests="simple_encoder_vp8 simple_encoder_vp9" run_tests simple_encoder_verify_environment "${simple_encoder_tests}" libvpx-1.8.2/test/stress.sh000077500000000000000000000140361357355204000156770ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2016 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file performs a stress test. It runs (STRESS_ONEPASS_MAX_JOBS, ## default=5) one, (STRESS_TWOPASS_MAX_JOBS, default=5) two pass & ## (STRESS_RT_MAX_JOBS, default=5) encodes and (STRESS__DECODE_MAX_JOBS, ## default=30) decodes in parallel. . $(dirname $0)/tools_common.sh YUV="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.yuv" VP8="${LIBVPX_TEST_DATA_PATH}/tos_vp8.webm" VP9="${LIBVPX_TEST_DATA_PATH}/vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm" DATA_URL="http://downloads.webmproject.org/test_data/libvpx/" SHA1_FILE="$(dirname $0)/test-data.sha1" # Set sha1sum to proper sha program (sha1sum, shasum, sha1). This code is # cribbed from libs.mk. [ -x "$(which sha1sum)" ] && sha1sum=sha1sum [ -x "$(which shasum)" ] && sha1sum=shasum [ -x "$(which sha1)" ] && sha1sum=sha1 # Download a file from the url and check its sha1sum. download_and_check_file() { # Get the file from the file path. local root="${1#${LIBVPX_TEST_DATA_PATH}/}" # Download the file using curl. Trap to insure non partial file. (trap "rm -f $1" INT TERM \ && eval "curl --retry 1 -L -o $1 ${DATA_URL}${root} ${devnull}") # Check the sha1 sum of the file. if [ -n "${sha1sum}" ]; then set -e grep ${root} ${SHA1_FILE} \ | (cd ${LIBVPX_TEST_DATA_PATH}; ${sha1sum} -c); fi } # Environment check: Make sure input is available. stress_verify_environment() { if [ ! -e "${SHA1_FILE}" ] ; then echo "Missing ${SHA1_FILE}" return 1 fi for file in "${YUV}" "${VP8}" "${VP9}"; do if [ ! -e "${file}" ] ; then download_and_check_file "${file}" fi done if [ ! -e "${YUV}" ] || [ ! -e "${VP8}" ] || [ ! -e "${VP9}" ] ; then elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ -z "$(vpx_tool_path vpxenc)" ]; then elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi if [ -z "$(vpx_tool_path vpxdec)" ]; then elog "vpxdec not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi } # This function runs tests on libvpx that run multiple encodes and decodes # in parallel in hopes of catching synchronization and/or threading issues. stress() { local decoder="$(vpx_tool_path vpxdec)" local encoder="$(vpx_tool_path vpxenc)" local codec="$1" local webm="$2" local decode_count="$3" local threads="$4" local enc_args="$5" local pids="" local rt_max_jobs=${STRESS_RT_MAX_JOBS:-5} local onepass_max_jobs=${STRESS_ONEPASS_MAX_JOBS:-5} local twopass_max_jobs=${STRESS_TWOPASS_MAX_JOBS:-5} # Enable job control, so we can run multiple processes. set -m # Start $onepass_max_jobs encode jobs in parallel. for i in $(seq ${onepass_max_jobs}); do bitrate=$(($i * 20 + 300)) eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \ "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=1" \ "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.1pass.webm" \ "${enc_args}" ${devnull} & pids="${pids} $!" done # Start $twopass_max_jobs encode jobs in parallel. for i in $(seq ${twopass_max_jobs}); do bitrate=$(($i * 20 + 300)) eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \ "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal --passes=2" \ "--target-bitrate=${bitrate} -o ${VPX_TEST_OUTPUT_DIR}/${i}.2pass.webm" \ "${enc_args}" ${devnull} & pids="${pids} $!" done # Start $rt_max_jobs rt encode jobs in parallel. for i in $(seq ${rt_max_jobs}); do bitrate=$(($i * 20 + 300)) eval "${VPX_TEST_PREFIX}" "${encoder}" "--codec=${codec} -w 1280 -h 720" \ "${YUV}" "-t ${threads} --limit=150 --test-decode=fatal " \ "--target-bitrate=${bitrate} --lag-in-frames=0 --error-resilient=1" \ "--kf-min-dist=3000 --kf-max-dist=3000 --cpu-used=-6 --static-thresh=1" \ "--end-usage=cbr --min-q=2 --max-q=56 --undershoot-pct=100" \ "--overshoot-pct=15 --buf-sz=1000 --buf-initial-sz=500" \ "--buf-optimal-sz=600 --max-intra-rate=900 --resize-allowed=0" \ "--drop-frame=0 --passes=1 --rt --noise-sensitivity=4" \ "-o ${VPX_TEST_OUTPUT_DIR}/${i}.rt.webm" ${devnull} & pids="${pids} $!" done # Start $decode_count decode jobs in parallel. for i in $(seq "${decode_count}"); do eval "${decoder}" "-t ${threads}" "${webm}" "--noblit" ${devnull} & pids="${pids} $!" done # Wait for all parallel jobs to finish. fail=0 for job in "${pids}"; do wait $job || fail=$(($fail + 1)) done return $fail } vp8_stress_test() { local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40} if [ "$(vp8_decode_available)" = "yes" -a \ "$(vp8_encode_available)" = "yes" ]; then stress vp8 "${VP8}" "${vp8_max_jobs}" 4 fi } vp8_stress_test_token_parititions() { local vp8_max_jobs=${STRESS_VP8_DECODE_MAX_JOBS:-40} if [ "$(vp8_decode_available)" = "yes" -a \ "$(vp8_encode_available)" = "yes" ]; then for threads in 2 4 8; do for token_partitions in 1 2 3; do stress vp8 "${VP8}" "${vp8_max_jobs}" ${threads} \ "--token-parts=$token_partitions" done done fi } vp9_stress() { local vp9_max_jobs=${STRESS_VP9_DECODE_MAX_JOBS:-25} if [ "$(vp9_decode_available)" = "yes" -a \ "$(vp9_encode_available)" = "yes" ]; then stress vp9 "${VP9}" "${vp9_max_jobs}" "$@" fi } vp9_stress_test() { for threads in 4 8 64; do vp9_stress "$threads" "--row-mt=0" done } vp9_stress_test_row_mt() { for threads in 4 8 64; do vp9_stress "$threads" "--row-mt=1" done } run_tests stress_verify_environment \ "vp8_stress_test vp8_stress_test_token_parititions vp9_stress_test vp9_stress_test_row_mt" libvpx-1.8.2/test/sum_squares_test.cc000066400000000000000000000073771357355204000177440ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx_ports/mem.h" using libvpx_test::ACMRandom; namespace { const int kNumIterations = 10000; typedef uint64_t (*SSI16Func)(const int16_t *src, int stride, int size); typedef std::tuple SumSquaresParam; class SumSquaresTest : public ::testing::TestWithParam { public: virtual ~SumSquaresTest() {} virtual void SetUp() { ref_func_ = GET_PARAM(0); tst_func_ = GET_PARAM(1); } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: SSI16Func ref_func_; SSI16Func tst_func_; }; TEST_P(SumSquaresTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, int16_t, src[256 * 256]); const int msb = 11; // Up to 12 bit input const int limit = 1 << (msb + 1); for (int k = 0; k < kNumIterations; k++) { const int size = 4 << rnd(6); // Up to 128x128 int stride = 4 << rnd(7); // Up to 256 stride while (stride < size) { // Make sure it's valid stride = 4 << rnd(7); } for (int i = 0; i < size; ++i) { for (int j = 0; j < size; ++j) { src[i * stride + j] = rnd(2) ? rnd(limit) : -rnd(limit); } } const uint64_t res_ref = ref_func_(src, stride, size); uint64_t res_tst; ASM_REGISTER_STATE_CHECK(res_tst = tst_func_(src, stride, size)); ASSERT_EQ(res_ref, res_tst) << "Error: Sum Squares Test" << " C output does not match optimized output."; } } TEST_P(SumSquaresTest, ExtremeValues) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, int16_t, src[256 * 256]); const int msb = 11; // Up to 12 bit input const int limit = 1 << (msb + 1); for (int k = 0; k < kNumIterations; k++) { const int size = 4 << rnd(6); // Up to 128x128 int stride = 4 << rnd(7); // Up to 256 stride while (stride < size) { // Make sure it's valid stride = 4 << rnd(7); } const int val = rnd(2) ? limit - 1 : -(limit - 1); for (int i = 0; i < size; ++i) { for (int j = 0; j < size; ++j) { src[i * stride + j] = val; } } const uint64_t res_ref = ref_func_(src, stride, size); uint64_t res_tst; ASM_REGISTER_STATE_CHECK(res_tst = tst_func_(src, stride, size)); ASSERT_EQ(res_ref, res_tst) << "Error: Sum Squares Test" << " C output does not match optimized output."; } } using std::make_tuple; #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, SumSquaresTest, ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c, &vpx_sum_squares_2d_i16_neon))); #endif // HAVE_NEON #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, SumSquaresTest, ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c, &vpx_sum_squares_2d_i16_sse2))); #endif // HAVE_SSE2 #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, SumSquaresTest, ::testing::Values(make_tuple(&vpx_sum_squares_2d_i16_c, &vpx_sum_squares_2d_i16_msa))); #endif // HAVE_MSA } // namespace libvpx-1.8.2/test/superframe_test.cc000066400000000000000000000065231357355204000175360ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" namespace { const int kTestMode = 0; typedef std::tuple SuperframeTestParam; class SuperframeTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: SuperframeTest() : EncoderTest(GET_PARAM(0)), modified_buf_(NULL), last_sf_pts_(0) {} virtual ~SuperframeTest() {} virtual void SetUp() { InitializeConfig(); const SuperframeTestParam input = GET_PARAM(1); const libvpx_test::TestMode mode = std::get(input); SetMode(mode); sf_count_ = 0; sf_count_max_ = INT_MAX; } virtual void TearDown() { delete[] modified_buf_; } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); } } virtual const vpx_codec_cx_pkt_t *MutateEncoderOutputHook( const vpx_codec_cx_pkt_t *pkt) { if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return pkt; const uint8_t *buffer = reinterpret_cast(pkt->data.frame.buf); const uint8_t marker = buffer[pkt->data.frame.sz - 1]; const int frames = (marker & 0x7) + 1; const int mag = ((marker >> 3) & 3) + 1; const unsigned int index_sz = 2 + mag * frames; if ((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && buffer[pkt->data.frame.sz - index_sz] == marker) { // frame is a superframe. strip off the index. if (modified_buf_) delete[] modified_buf_; modified_buf_ = new uint8_t[pkt->data.frame.sz - index_sz]; memcpy(modified_buf_, pkt->data.frame.buf, pkt->data.frame.sz - index_sz); modified_pkt_ = *pkt; modified_pkt_.data.frame.buf = modified_buf_; modified_pkt_.data.frame.sz -= index_sz; sf_count_++; last_sf_pts_ = pkt->data.frame.pts; return &modified_pkt_; } // Make sure we do a few frames after the last SF abort_ |= sf_count_ > sf_count_max_ && pkt->data.frame.pts - last_sf_pts_ >= 5; return pkt; } int sf_count_; int sf_count_max_; vpx_codec_cx_pkt_t modified_pkt_; uint8_t *modified_buf_; vpx_codec_pts_t last_sf_pts_; }; TEST_P(SuperframeTest, TestSuperframeIndexIsOptional) { sf_count_max_ = 0; // early exit on successful test. cfg_.g_lag_in_frames = 25; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 40); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); EXPECT_EQ(sf_count_, 1); } VP9_INSTANTIATE_TEST_CASE( SuperframeTest, ::testing::Combine(::testing::Values(::libvpx_test::kTwoPassGood), ::testing::Values(0))); } // namespace libvpx-1.8.2/test/svc_datarate_test.cc000066400000000000000000001522621357355204000200270ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/svc_test.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vp9/common/vp9_onyxc_int.h" #include "vpx/vpx_codec.h" #include "vpx_ports/bitops.h" namespace svc_test { namespace { typedef enum { // Inter-layer prediction is on on all frames. INTER_LAYER_PRED_ON, // Inter-layer prediction is off on all frames. INTER_LAYER_PRED_OFF, // Inter-layer prediction is off on non-key frames and non-sync frames. INTER_LAYER_PRED_OFF_NONKEY, // Inter-layer prediction is on on all frames, but constrained such // that any layer S (> 0) can only predict from previous spatial // layer S-1, from the same superframe. INTER_LAYER_PRED_ON_CONSTRAINED } INTER_LAYER_PRED; class DatarateOnePassCbrSvc : public OnePassCbrSvc { public: explicit DatarateOnePassCbrSvc(const ::libvpx_test::CodecFactory *codec) : OnePassCbrSvc(codec) { inter_layer_pred_mode_ = 0; } protected: virtual ~DatarateOnePassCbrSvc() {} virtual void ResetModel() { last_pts_ = 0; duration_ = 0.0; mismatch_psnr_ = 0.0; mismatch_nframes_ = 0; denoiser_on_ = 0; tune_content_ = 0; base_speed_setting_ = 5; spatial_layer_id_ = 0; temporal_layer_id_ = 0; update_pattern_ = 0; memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_)); memset(bits_total_, 0, sizeof(bits_total_)); memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_)); dynamic_drop_layer_ = false; change_bitrate_ = false; last_pts_ref_ = 0; middle_bitrate_ = 0; top_bitrate_ = 0; superframe_count_ = -1; key_frame_spacing_ = 9999; num_nonref_frames_ = 0; layer_framedrop_ = 0; force_key_ = 0; force_key_test_ = 0; insert_layer_sync_ = 0; layer_sync_on_base_ = 0; force_intra_only_frame_ = 0; superframe_has_intra_only_ = 0; use_post_encode_drop_ = 0; denoiser_off_on_ = false; denoiser_enable_layers_ = false; } virtual void BeginPassHook(unsigned int /*pass*/) {} // Example pattern for spatial layers and 2 temporal layers used in the // bypass/flexible mode. The pattern corresponds to the pattern // VP9E_TEMPORAL_LAYERING_MODE_0101 (temporal_layering_mode == 2) used in // non-flexible mode, except that we disable inter-layer prediction. void set_frame_flags_bypass_mode( int tl, int num_spatial_layers, int is_key_frame, vpx_svc_ref_frame_config_t *ref_frame_config) { for (int sl = 0; sl < num_spatial_layers; ++sl) ref_frame_config->update_buffer_slot[sl] = 0; for (int sl = 0; sl < num_spatial_layers; ++sl) { if (tl == 0) { ref_frame_config->lst_fb_idx[sl] = sl; if (sl) { if (is_key_frame) { ref_frame_config->lst_fb_idx[sl] = sl - 1; ref_frame_config->gld_fb_idx[sl] = sl; } else { ref_frame_config->gld_fb_idx[sl] = sl - 1; } } else { ref_frame_config->gld_fb_idx[sl] = 0; } ref_frame_config->alt_fb_idx[sl] = 0; } else if (tl == 1) { ref_frame_config->lst_fb_idx[sl] = sl; ref_frame_config->gld_fb_idx[sl] = VPXMIN(REF_FRAMES - 1, num_spatial_layers + sl - 1); ref_frame_config->alt_fb_idx[sl] = VPXMIN(REF_FRAMES - 1, num_spatial_layers + sl); } if (!tl) { if (!sl) { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->lst_fb_idx[sl]; } else { if (is_key_frame) { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->gld_fb_idx[sl]; } else { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->lst_fb_idx[sl]; } } } else if (tl == 1) { if (!sl) { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->alt_fb_idx[sl]; } else { ref_frame_config->reference_last[sl] = 1; ref_frame_config->reference_golden[sl] = 0; ref_frame_config->reference_alt_ref[sl] = 0; ref_frame_config->update_buffer_slot[sl] |= 1 << ref_frame_config->alt_fb_idx[sl]; } } } } void CheckLayerRateTargeting(int num_spatial_layers, int num_temporal_layers, double thresh_overshoot, double thresh_undershoot) const { for (int sl = 0; sl < num_spatial_layers; ++sl) for (int tl = 0; tl < num_temporal_layers; ++tl) { const int layer = sl * num_temporal_layers + tl; ASSERT_GE(cfg_.layer_target_bitrate[layer], file_datarate_[layer] * thresh_overshoot) << " The datarate for the file exceeds the target by too much!"; ASSERT_LE(cfg_.layer_target_bitrate[layer], file_datarate_[layer] * thresh_undershoot) << " The datarate for the file is lower than the target by too " "much!"; } } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { PreEncodeFrameHookSetup(video, encoder); if (video->frame() == 0) { if (force_intra_only_frame_) { // Decoder sets the color_space for Intra-only frames // to BT_601 (see line 1810 in vp9_decodeframe.c). // So set it here in these tess to avoid encoder-decoder // mismatch check on color space setting. encoder->Control(VP9E_SET_COLOR_SPACE, VPX_CS_BT_601); } encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); if (layer_framedrop_) { vpx_svc_frame_drop_t svc_drop_frame; svc_drop_frame.framedrop_mode = LAYER_DROP; for (int i = 0; i < number_spatial_layers_; i++) svc_drop_frame.framedrop_thresh[i] = 30; svc_drop_frame.max_consec_drop = 30; encoder->Control(VP9E_SET_SVC_FRAME_DROP_LAYER, &svc_drop_frame); } if (use_post_encode_drop_) { encoder->Control(VP9E_SET_POSTENCODE_DROP, use_post_encode_drop_); } } if (denoiser_off_on_) { encoder->Control(VP9E_SET_AQ_MODE, 3); // Set inter_layer_pred to INTER_LAYER_PRED_OFF_NONKEY (K-SVC). encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, 2); if (!denoiser_enable_layers_) { if (video->frame() == 0) encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0); else if (video->frame() == 100) encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1); } else { // Cumulative bitrates for top spatial layers, for // 3 temporal layers. if (video->frame() == 0) { encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 0); // Change layer bitrates to set top spatial layer to 0. // This is for 3 spatial 3 temporal layers. // This will trigger skip encoding/dropping of top spatial layer. cfg_.rc_target_bitrate -= cfg_.layer_target_bitrate[8]; for (int i = 0; i < 3; i++) bitrate_sl3_[i] = cfg_.layer_target_bitrate[i + 6]; cfg_.layer_target_bitrate[6] = 0; cfg_.layer_target_bitrate[7] = 0; cfg_.layer_target_bitrate[8] = 0; encoder->Config(&cfg_); } else if (video->frame() == 100) { // Change layer bitrates to non-zero on top spatial layer. // This will trigger skip encoding of top spatial layer // on key frame (period = 100). for (int i = 0; i < 3; i++) cfg_.layer_target_bitrate[i + 6] = bitrate_sl3_[i]; cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[8]; encoder->Config(&cfg_); } else if (video->frame() == 120) { // Enable denoiser and top spatial layer after key frame (period is // 100). encoder->Control(VP9E_SET_NOISE_SENSITIVITY, 1); } } } if (update_pattern_ && video->frame() >= 100) { vpx_svc_layer_id_t layer_id; if (video->frame() == 100) { cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; encoder->Config(&cfg_); } // Set layer id since the pattern changed. layer_id.spatial_layer_id = 0; layer_id.temporal_layer_id = (video->frame() % 2 != 0); temporal_layer_id_ = layer_id.temporal_layer_id; for (int i = 0; i < number_spatial_layers_; i++) layer_id.temporal_layer_id_per_spatial[i] = temporal_layer_id_; encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); set_frame_flags_bypass_mode(layer_id.temporal_layer_id, number_spatial_layers_, 0, &ref_frame_config); encoder->Control(VP9E_SET_SVC_REF_FRAME_CONFIG, &ref_frame_config); } if (change_bitrate_ && video->frame() == 200) { duration_ = (last_pts_ + 1) * timebase_; for (int sl = 0; sl < number_spatial_layers_; ++sl) { for (int tl = 0; tl < number_temporal_layers_; ++tl) { const int layer = sl * number_temporal_layers_ + tl; const double file_size_in_kb = bits_total_[layer] / 1000.; file_datarate_[layer] = file_size_in_kb / duration_; } } CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); memset(file_datarate_, 0, sizeof(file_datarate_)); memset(bits_total_, 0, sizeof(bits_total_)); int64_t bits_in_buffer_model_tmp[VPX_MAX_LAYERS]; last_pts_ref_ = last_pts_; // Set new target bitarate. cfg_.rc_target_bitrate = cfg_.rc_target_bitrate >> 1; // Buffer level should not reset on dynamic bitrate change. memcpy(bits_in_buffer_model_tmp, bits_in_buffer_model_, sizeof(bits_in_buffer_model_)); AssignLayerBitrates(); memcpy(bits_in_buffer_model_, bits_in_buffer_model_tmp, sizeof(bits_in_buffer_model_)); // Change config to update encoder with new bitrate configuration. encoder->Config(&cfg_); } if (dynamic_drop_layer_) { // TODO(jian): Disable AQ Mode for this test for now. encoder->Control(VP9E_SET_AQ_MODE, 0); if (video->frame() == 0) { // Change layer bitrates to set top layers to 0. This will trigger skip // encoding/dropping of top two spatial layers. cfg_.rc_target_bitrate -= (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]); middle_bitrate_ = cfg_.layer_target_bitrate[1]; top_bitrate_ = cfg_.layer_target_bitrate[2]; cfg_.layer_target_bitrate[1] = 0; cfg_.layer_target_bitrate[2] = 0; encoder->Config(&cfg_); } else if (video->frame() == 50) { // Change layer bitrates to non-zero on two top spatial layers. // This will trigger skip encoding of top two spatial layers. cfg_.layer_target_bitrate[1] = middle_bitrate_; cfg_.layer_target_bitrate[2] = top_bitrate_; cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2] + cfg_.layer_target_bitrate[1]; encoder->Config(&cfg_); } else if (video->frame() == 100) { // Change layer bitrates to set top layers to 0. This will trigger skip // encoding/dropping of top two spatial layers. cfg_.rc_target_bitrate -= (cfg_.layer_target_bitrate[1] + cfg_.layer_target_bitrate[2]); middle_bitrate_ = cfg_.layer_target_bitrate[1]; top_bitrate_ = cfg_.layer_target_bitrate[2]; cfg_.layer_target_bitrate[1] = 0; cfg_.layer_target_bitrate[2] = 0; encoder->Config(&cfg_); } else if (video->frame() == 150) { // Change layer bitrate on second layer to non-zero to start // encoding it again. cfg_.layer_target_bitrate[1] = middle_bitrate_; cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[1]; encoder->Config(&cfg_); } else if (video->frame() == 200) { // Change layer bitrate on top layer to non-zero to start // encoding it again. cfg_.layer_target_bitrate[2] = top_bitrate_; cfg_.rc_target_bitrate += cfg_.layer_target_bitrate[2]; encoder->Config(&cfg_); } } if (force_key_test_ && force_key_) frame_flags_ = VPX_EFLAG_FORCE_KF; if (insert_layer_sync_) { vpx_svc_spatial_layer_sync_t svc_layer_sync; svc_layer_sync.base_layer_intra_only = 0; for (int i = 0; i < number_spatial_layers_; i++) svc_layer_sync.spatial_layer_sync[i] = 0; if (force_intra_only_frame_) { superframe_has_intra_only_ = 0; if (video->frame() == 0) { svc_layer_sync.base_layer_intra_only = 1; svc_layer_sync.spatial_layer_sync[0] = 1; encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); superframe_has_intra_only_ = 1; } else if (video->frame() == 100) { svc_layer_sync.base_layer_intra_only = 1; svc_layer_sync.spatial_layer_sync[0] = 1; encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); superframe_has_intra_only_ = 1; } } else { layer_sync_on_base_ = 0; if (video->frame() == 150) { svc_layer_sync.spatial_layer_sync[1] = 1; encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); } else if (video->frame() == 240) { svc_layer_sync.spatial_layer_sync[2] = 1; encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); } else if (video->frame() == 320) { svc_layer_sync.spatial_layer_sync[0] = 1; layer_sync_on_base_ = 1; encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync); } } } const vpx_rational_t tb = video->timebase(); timebase_ = static_cast(tb.num) / tb.den; duration_ = 0; } vpx_codec_err_t parse_superframe_index(const uint8_t *data, size_t data_sz, uint32_t sizes[8], int *count) { uint8_t marker; marker = *(data + data_sz - 1); *count = 0; if ((marker & 0xe0) == 0xc0) { const uint32_t frames = (marker & 0x7) + 1; const uint32_t mag = ((marker >> 3) & 0x3) + 1; const size_t index_sz = 2 + mag * frames; // This chunk is marked as having a superframe index but doesn't have // enough data for it, thus it's an invalid superframe index. if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; { const uint8_t marker2 = *(data + data_sz - index_sz); // This chunk is marked as having a superframe index but doesn't have // the matching marker byte at the front of the index therefore it's an // invalid chunk. if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; } { uint32_t i, j; const uint8_t *x = &data[data_sz - index_sz + 1]; for (i = 0; i < frames; ++i) { uint32_t this_sz = 0; for (j = 0; j < mag; ++j) this_sz |= (*x++) << (j * 8); sizes[i] = this_sz; } *count = frames; } } return VPX_CODEC_OK; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { uint32_t sizes[8] = { 0 }; uint32_t sizes_parsed[8] = { 0 }; int count = 0; int num_layers_encoded = 0; last_pts_ = pkt->data.frame.pts; const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; if (key_frame) { // For test that inserts layer sync frames: requesting a layer_sync on // the base layer must force key frame. So if any key frame occurs after // first superframe it must due to layer sync on base spatial layer. if (superframe_count_ > 0 && insert_layer_sync_ && !force_intra_only_frame_) { ASSERT_EQ(layer_sync_on_base_, 1); } temporal_layer_id_ = 0; superframe_count_ = 0; } parse_superframe_index(static_cast(pkt->data.frame.buf), pkt->data.frame.sz, sizes_parsed, &count); // Count may be less than number of spatial layers because of frame drops. for (int sl = 0; sl < number_spatial_layers_; ++sl) { if (pkt->data.frame.spatial_layer_encoded[sl]) { sizes[sl] = sizes_parsed[num_layers_encoded]; num_layers_encoded++; } } // For superframe with Intra-only count will be +1 larger // because of no-show frame. if (force_intra_only_frame_ && superframe_has_intra_only_) ASSERT_EQ(count, num_layers_encoded + 1); else ASSERT_EQ(count, num_layers_encoded); // In the constrained frame drop mode, if a given spatial is dropped all // upper layers must be dropped too. if (!layer_framedrop_) { int num_layers_dropped = 0; for (int sl = 0; sl < number_spatial_layers_; ++sl) { if (!pkt->data.frame.spatial_layer_encoded[sl]) { // Check that all upper layers are dropped. num_layers_dropped++; for (int sl2 = sl + 1; sl2 < number_spatial_layers_; ++sl2) ASSERT_EQ(pkt->data.frame.spatial_layer_encoded[sl2], 0); } } if (num_layers_dropped == number_spatial_layers_ - 1) force_key_ = 1; else force_key_ = 0; } // Keep track of number of non-reference frames, needed for mismatch check. // Non-reference frames are top spatial and temporal layer frames, // for TL > 0. if (temporal_layer_id_ == number_temporal_layers_ - 1 && temporal_layer_id_ > 0 && pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1]) num_nonref_frames_++; for (int sl = 0; sl < number_spatial_layers_; ++sl) { sizes[sl] = sizes[sl] << 3; // Update the total encoded bits per layer. // For temporal layers, update the cumulative encoded bits per layer. for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { const int layer = sl * number_temporal_layers_ + tl; bits_total_[layer] += static_cast(sizes[sl]); // Update the per-layer buffer level with the encoded frame size. bits_in_buffer_model_[layer] -= static_cast(sizes[sl]); // There should be no buffer underrun, except on the base // temporal layer, since there may be key frames there. // Fo short key frame spacing, buffer can underrun on individual frames. if (!key_frame && tl > 0 && key_frame_spacing_ < 100) { ASSERT_GE(bits_in_buffer_model_[layer], 0) << "Buffer Underrun at frame " << pkt->data.frame.pts; } } ASSERT_EQ(pkt->data.frame.width[sl], top_sl_width_ * svc_params_.scaling_factor_num[sl] / svc_params_.scaling_factor_den[sl]); ASSERT_EQ(pkt->data.frame.height[sl], top_sl_height_ * svc_params_.scaling_factor_num[sl] / svc_params_.scaling_factor_den[sl]); } } virtual void EndPassHook(void) { if (change_bitrate_) last_pts_ = last_pts_ - last_pts_ref_; duration_ = (last_pts_ + 1) * timebase_; for (int sl = 0; sl < number_spatial_layers_; ++sl) { for (int tl = 0; tl < number_temporal_layers_; ++tl) { const int layer = sl * number_temporal_layers_ + tl; const double file_size_in_kb = bits_total_[layer] / 1000.; file_datarate_[layer] = file_size_in_kb / duration_; } } } virtual void MismatchHook(const vpx_image_t *img1, const vpx_image_t *img2) { double mismatch_psnr = compute_psnr(img1, img2); mismatch_psnr_ += mismatch_psnr; ++mismatch_nframes_; } unsigned int GetMismatchFrames() { return mismatch_nframes_; } unsigned int GetNonRefFrames() { return num_nonref_frames_; } vpx_codec_pts_t last_pts_; double timebase_; int64_t bits_total_[VPX_MAX_LAYERS]; double duration_; double file_datarate_[VPX_MAX_LAYERS]; size_t bits_in_last_frame_; double mismatch_psnr_; int denoiser_on_; int tune_content_; int spatial_layer_id_; bool dynamic_drop_layer_; unsigned int top_sl_width_; unsigned int top_sl_height_; vpx_svc_ref_frame_config_t ref_frame_config; int update_pattern_; bool change_bitrate_; vpx_codec_pts_t last_pts_ref_; int middle_bitrate_; int top_bitrate_; int key_frame_spacing_; int layer_framedrop_; int force_key_; int force_key_test_; int inter_layer_pred_mode_; int insert_layer_sync_; int layer_sync_on_base_; int force_intra_only_frame_; int superframe_has_intra_only_; int use_post_encode_drop_; int bitrate_sl3_[3]; // Denoiser switched on the fly. bool denoiser_off_on_; // Top layer enabled on the fly. bool denoiser_enable_layers_; private: virtual void SetConfig(const int num_temporal_layer) { cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_error_resilient = 1; if (num_temporal_layer == 3) { cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.temporal_layering_mode = 3; } else if (num_temporal_layer == 2) { cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; cfg_.temporal_layering_mode = 2; } else if (num_temporal_layer == 1) { cfg_.ts_rate_decimator[0] = 1; cfg_.temporal_layering_mode = 0; } } unsigned int num_nonref_frames_; unsigned int mismatch_nframes_; }; // Params: speed setting. class DatarateOnePassCbrSvcSingleBR : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWithParam { public: DatarateOnePassCbrSvcSingleBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcSingleBR() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 1 // temporal layer, with screen content mode on and same speed setting for all // layers. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2SL1TLScreenContent1) { SetSvcConfig(2, 1); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 10; cfg_.kf_max_dist = 9999; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; cfg_.rc_target_bitrate = 500; ResetModel(); tune_content_ = 1; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers, with force key frame after frame drop TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLForceKey) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; cfg_.rc_target_bitrate = 100; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.25); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and // 2 temporal layers, with a change on the fly from the fixed SVC pattern to one // generate via SVC_SET_REF_FRAME_CONFIG. The new pattern also disables // inter-layer prediction. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL2TLDynamicPatternChange) { SetSvcConfig(3, 2); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; // Change SVC pattern on the fly. update_pattern_ = 1; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; cfg_.rc_target_bitrate = 800; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal // layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching // of denoiser from off to on (on at frame = 100). Key frame period is set to // 1000 so denoise is enabled on non-key. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TL_DenoiserOffOnFixedLayers) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 1000; ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280, 720, 30, 1, 0, 300); top_sl_width_ = 1280; top_sl_height_ = 720; cfg_.rc_target_bitrate = 1000; ResetModel(); denoiser_off_on_ = true; denoiser_enable_layers_ = false; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Don't check rate targeting on two top spatial layer since they will be // skipped for part of the sequence. CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC with 3 spatial and 3 temporal // layers, for inter_layer_pred=OffKey (K-SVC) and on the fly switching // of denoiser from off to on, for dynamic layers. Start at 2 spatial layers // and enable 3rd spatial layer at frame = 100. Use periodic key frame with // period 100 so enabling of spatial layer occurs at key frame. Enable denoiser // at frame > 100, after the key frame sync. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TL_DenoiserOffOnEnableLayers) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 0; cfg_.kf_max_dist = 100; ::libvpx_test::I420VideoSource video("desktop_office1.1280_720-020.yuv", 1280, 720, 30, 1, 0, 300); top_sl_width_ = 1280; top_sl_height_ = 720; cfg_.rc_target_bitrate = 1000; ResetModel(); denoiser_off_on_ = true; denoiser_enable_layers_ = true; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Don't check rate targeting on two top spatial layer since they will be // skipped for part of the sequence. CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC with 3 spatial layers and on // the fly switching to 1 and then 2 and back to 3 spatial layers. This switch // is done by setting spatial layer bitrates to 0, and then back to non-zero, // during the sequence. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL_DisableEnableLayers) { SetSvcConfig(3, 1); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.temporal_layering_mode = 0; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; cfg_.rc_target_bitrate = 800; ResetModel(); dynamic_drop_layer_ = true; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Don't check rate targeting on two top spatial layer since they will be // skipped for part of the sequence. CheckLayerRateTargeting(number_spatial_layers_ - 2, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Run SVC encoder for 1 temporal layer, 2 spatial layers, with spatial // downscale 5x5. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2SL1TL5x5MultipleRuns) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.ss_number_layers = 2; cfg_.ts_number_layers = 1; cfg_.ts_rate_decimator[0] = 1; cfg_.g_error_resilient = 1; cfg_.g_threads = 3; cfg_.temporal_layering_mode = 0; svc_params_.scaling_factor_num[0] = 256; svc_params_.scaling_factor_den[0] = 1280; svc_params_.scaling_factor_num[1] = 1280; svc_params_.scaling_factor_den[1] = 1280; cfg_.rc_dropframe_thresh = 10; cfg_.kf_max_dist = 999999; cfg_.kf_min_dist = 0; cfg_.ss_target_bitrate[0] = 300; cfg_.ss_target_bitrate[1] = 1400; cfg_.layer_target_bitrate[0] = 300; cfg_.layer_target_bitrate[1] = 1400; cfg_.rc_target_bitrate = 1700; number_spatial_layers_ = cfg_.ss_number_layers; number_temporal_layers_ = cfg_.ts_number_layers; ResetModel(); layer_target_avg_bandwidth_[0] = cfg_.layer_target_bitrate[0] * 1000 / 30; bits_in_buffer_model_[0] = cfg_.layer_target_bitrate[0] * cfg_.rc_buf_initial_sz; layer_target_avg_bandwidth_[1] = cfg_.layer_target_bitrate[1] * 1000 / 30; bits_in_buffer_model_[1] = cfg_.layer_target_bitrate[1] * cfg_.rc_buf_initial_sz; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Params: speed setting and index for bitrate array. class DatarateOnePassCbrSvcMultiBR : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWith2Params { public: DatarateOnePassCbrSvcMultiBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcMultiBR() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Run CIF clip with 1 thread. TEST_P(DatarateOnePassCbrSvcMultiBR, OnePassCbrSvc2SL3TL) { SetSvcConfig(2, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; const int bitrates[3] = { 200, 400, 600 }; // TODO(marpan): Check that effective_datarate for each layer hits the // layer target_bitrate. cfg_.rc_target_bitrate = bitrates[GET_PARAM(2)]; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.75, 1.2); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Params: speed setting, layer framedrop control and index for bitrate array. class DatarateOnePassCbrSvcFrameDropMultiBR : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWith3Params { public: DatarateOnePassCbrSvcFrameDropMultiBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcFrameDropMultiBR() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Run HD clip with 4 threads. TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc2SL3TL4Threads) { SetSvcConfig(2, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 4; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; layer_framedrop_ = 0; const int bitrates[3] = { 200, 400, 600 }; cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; ResetModel(); layer_framedrop_ = GET_PARAM(2); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.64, 1.45); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers. Run HD clip with 4 threads. TEST_P(DatarateOnePassCbrSvcFrameDropMultiBR, OnePassCbrSvc3SL3TL4Threads) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 4; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); top_sl_width_ = 1280; top_sl_height_ = 720; layer_framedrop_ = 0; const int bitrates[3] = { 200, 400, 600 }; cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; ResetModel(); layer_framedrop_ = GET_PARAM(2); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.58, 1.2); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Params: speed setting, inter-layer prediction mode. class DatarateOnePassCbrSvcInterLayerPredSingleBR : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWith2Params { public: DatarateOnePassCbrSvcInterLayerPredSingleBR() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcInterLayerPredSingleBR() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); inter_layer_pred_mode_ = GET_PARAM(2); ResetModel(); } }; // Check basic rate targeting with different inter-layer prediction modes for 1 // pass CBR SVC: 3 spatial layers and 3 temporal layers. Run CIF clip with 1 // thread. TEST_P(DatarateOnePassCbrSvcInterLayerPredSingleBR, OnePassCbrSvc3SL3TL) { // Disable test for inter-layer pred off for now since simulcast_mode fails. if (inter_layer_pred_mode_ == INTER_LAYER_PRED_OFF) return; SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.temporal_layering_mode = 3; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; cfg_.rc_target_bitrate = 800; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check rate targeting with different inter-layer prediction modes for 1 pass // CBR SVC: 3 spatial layers and 3 temporal layers, changing the target bitrate // at the middle of encoding. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLDynamicBitrateChange) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; cfg_.rc_target_bitrate = 800; ResetModel(); change_bitrate_ = true; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } #if CONFIG_VP9_TEMPORAL_DENOISING // Params: speed setting, noise sensitivity, index for bitrate array and inter // layer pred mode. class DatarateOnePassCbrSvcDenoiser : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWith4Params { public: DatarateOnePassCbrSvcDenoiser() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcDenoiser() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); inter_layer_pred_mode_ = GET_PARAM(3); ResetModel(); } }; // Check basic rate targeting for 1 pass CBR SVC with denoising. // 2 spatial layers and 3 temporal layer. Run HD clip with 2 threads. TEST_P(DatarateOnePassCbrSvcDenoiser, OnePassCbrSvc2SL3TLDenoiserOn) { SetSvcConfig(2, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 2; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; number_spatial_layers_ = cfg_.ss_number_layers; number_temporal_layers_ = cfg_.ts_number_layers; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; const int bitrates[3] = { 600, 800, 1000 }; // TODO(marpan): Check that effective_datarate for each layer hits the // layer target_bitrate. // For SVC, noise_sen = 1 means denoising only the top spatial layer // noise_sen = 2 means denoising the two top spatial layers. cfg_.rc_target_bitrate = bitrates[GET_PARAM(3)]; ResetModel(); denoiser_on_ = GET_PARAM(2); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } #endif // Params: speed setting, key frame dist. class DatarateOnePassCbrSvcSmallKF : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWith2Params { public: DatarateOnePassCbrSvcSmallKF() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcSmallKF() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 // temporal layers. Run CIF clip with 1 thread, and few short key frame periods. TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc3SL3TLSmallKf) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 10; cfg_.rc_target_bitrate = 800; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; // For this 3 temporal layer case, pattern repeats every 4 frames, so choose // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). const int kf_dist = GET_PARAM(2); cfg_.kf_max_dist = kf_dist; key_frame_spacing_ = kf_dist; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // TODO(jianj): webm:1554 CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.70, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC: 2 spatial layers and 3 // temporal layers. Run CIF clip with 1 thread, and few short key frame periods. TEST_P(DatarateOnePassCbrSvcSmallKF, OnePassCbrSvc2SL3TLSmallKf) { SetSvcConfig(2, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 10; cfg_.rc_target_bitrate = 400; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; // For this 3 temporal layer case, pattern repeats every 4 frames, so choose // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). const int kf_dist = GET_PARAM(2) + 32; cfg_.kf_max_dist = kf_dist; key_frame_spacing_ = kf_dist; ResetModel(); AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Check basic rate targeting for 1 pass CBR SVC: 3 spatial layers and 3 // temporal layers. Run VGA clip with 1 thread, and place layer sync frames: // one at middle layer first, then another one for top layer, and another // insert for base spatial layer (which forces key frame). TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL3TLSyncFrames) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.kf_max_dist = 9999; cfg_.rc_dropframe_thresh = 10; cfg_.rc_target_bitrate = 400; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; ResetModel(); insert_layer_sync_ = 1; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.78, 1.15); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Run SVC encoder for 3 spatial layers, 1 temporal layer, with // intra-only frame as sync frame on base spatial layer. // Intra_only is inserted at start and in middle of sequence. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc3SL1TLSyncWithIntraOnly) { SetSvcConfig(3, 1); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 4; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; cfg_.rc_target_bitrate = 400; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; ResetModel(); insert_layer_sync_ = 1; // Use intra_only frame for sync on base layer. force_intra_only_frame_ = 1; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, 1.2); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Run SVC encoder for 2 quality layers (same resolution different, // bitrates), 1 temporal layer, with screen content mode. TEST_P(DatarateOnePassCbrSvcSingleBR, OnePassCbrSvc2QL1TLScreen) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.ss_number_layers = 2; cfg_.ts_number_layers = 1; cfg_.ts_rate_decimator[0] = 1; cfg_.temporal_layering_mode = 0; cfg_.g_error_resilient = 1; cfg_.g_threads = 2; svc_params_.scaling_factor_num[0] = 1; svc_params_.scaling_factor_den[0] = 1; svc_params_.scaling_factor_num[1] = 1; svc_params_.scaling_factor_den[1] = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; number_spatial_layers_ = cfg_.ss_number_layers; number_temporal_layers_ = cfg_.ts_number_layers; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); top_sl_width_ = 640; top_sl_height_ = 480; ResetModel(); tune_content_ = 1; // Set the layer bitrates, for 2 spatial layers, 1 temporal. cfg_.rc_target_bitrate = 400; cfg_.ss_target_bitrate[0] = 100; cfg_.ss_target_bitrate[1] = 300; cfg_.layer_target_bitrate[0] = 100; cfg_.layer_target_bitrate[1] = 300; for (int sl = 0; sl < 2; ++sl) { float layer_framerate = 30.0; layer_target_avg_bandwidth_[sl] = static_cast( cfg_.layer_target_bitrate[sl] * 1000.0 / layer_framerate); bits_in_buffer_model_[sl] = cfg_.layer_target_bitrate[sl] * cfg_.rc_buf_initial_sz; } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, 1.25); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Params: speed setting. class DatarateOnePassCbrSvcPostencodeDrop : public DatarateOnePassCbrSvc, public ::libvpx_test::CodecTestWithParam { public: DatarateOnePassCbrSvcPostencodeDrop() : DatarateOnePassCbrSvc(GET_PARAM(0)) { memset(&svc_params_, 0, sizeof(svc_params_)); } virtual ~DatarateOnePassCbrSvcPostencodeDrop() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); speed_setting_ = GET_PARAM(1); ResetModel(); } }; // Run SVC encoder for 2 quality layers (same resolution different, // bitrates), 1 temporal layer, with screen content mode. TEST_P(DatarateOnePassCbrSvcPostencodeDrop, OnePassCbrSvc2QL1TLScreen) { cfg_.rc_buf_initial_sz = 200; cfg_.rc_buf_optimal_sz = 200; cfg_.rc_buf_sz = 400; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 52; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.ss_number_layers = 2; cfg_.ts_number_layers = 1; cfg_.ts_rate_decimator[0] = 1; cfg_.temporal_layering_mode = 0; cfg_.g_error_resilient = 1; cfg_.g_threads = 2; svc_params_.scaling_factor_num[0] = 1; svc_params_.scaling_factor_den[0] = 1; svc_params_.scaling_factor_num[1] = 1; svc_params_.scaling_factor_den[1] = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; number_spatial_layers_ = cfg_.ss_number_layers; number_temporal_layers_ = cfg_.ts_number_layers; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); top_sl_width_ = 352; top_sl_height_ = 288; ResetModel(); base_speed_setting_ = speed_setting_; tune_content_ = 1; use_post_encode_drop_ = 1; // Set the layer bitrates, for 2 spatial layers, 1 temporal. cfg_.rc_target_bitrate = 400; cfg_.ss_target_bitrate[0] = 100; cfg_.ss_target_bitrate[1] = 300; cfg_.layer_target_bitrate[0] = 100; cfg_.layer_target_bitrate[1] = 300; for (int sl = 0; sl < 2; ++sl) { float layer_framerate = 30.0; layer_target_avg_bandwidth_[sl] = static_cast( cfg_.layer_target_bitrate[sl] * 1000.0 / layer_framerate); bits_in_buffer_model_[sl] = cfg_.layer_target_bitrate[sl] * cfg_.rc_buf_initial_sz; } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); CheckLayerRateTargeting(number_spatial_layers_, number_temporal_layers_, 0.73, 1.25); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSingleBR, ::testing::Range(5, 10)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcPostencodeDrop, ::testing::Range(5, 6)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcInterLayerPredSingleBR, ::testing::Range(5, 10), ::testing::Range(0, 3)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcMultiBR, ::testing::Range(5, 10), ::testing::Range(0, 3)); VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcFrameDropMultiBR, ::testing::Range(5, 10), ::testing::Range(0, 2), ::testing::Range(0, 3)); #if CONFIG_VP9_TEMPORAL_DENOISING VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcDenoiser, ::testing::Range(5, 10), ::testing::Range(1, 3), ::testing::Range(0, 3), ::testing::Range(0, 4)); #endif VP9_INSTANTIATE_TEST_CASE(DatarateOnePassCbrSvcSmallKF, ::testing::Range(5, 10), ::testing::Range(32, 36)); } // namespace } // namespace svc_test libvpx-1.8.2/test/svc_end_to_end_test.cc000066400000000000000000000417461357355204000203440ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/svc_test.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vpx/vpx_codec.h" #include "vpx_ports/bitops.h" namespace svc_test { namespace { typedef enum { // Inter-layer prediction is on on all frames. INTER_LAYER_PRED_ON, // Inter-layer prediction is off on all frames. INTER_LAYER_PRED_OFF, // Inter-layer prediction is off on non-key frames and non-sync frames. INTER_LAYER_PRED_OFF_NONKEY, // Inter-layer prediction is on on all frames, but constrained such // that any layer S (> 0) can only predict from previous spatial // layer S-1, from the same superframe. INTER_LAYER_PRED_ON_CONSTRAINED } INTER_LAYER_PRED; class ScalePartitionOnePassCbrSvc : public OnePassCbrSvc, public ::testing::TestWithParam { public: ScalePartitionOnePassCbrSvc() : OnePassCbrSvc(GetParam()), mismatch_nframes_(0), num_nonref_frames_(0) { SetMode(::libvpx_test::kRealTime); } protected: virtual ~ScalePartitionOnePassCbrSvc() {} virtual void SetUp() { InitializeConfig(); speed_setting_ = 7; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { PreEncodeFrameHookSetup(video, encoder); } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { // Keep track of number of non-reference frames, needed for mismatch check. // Non-reference frames are top spatial and temporal layer frames, // for TL > 0. if (temporal_layer_id_ == number_temporal_layers_ - 1 && temporal_layer_id_ > 0 && pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1]) num_nonref_frames_++; } virtual void MismatchHook(const vpx_image_t * /*img1*/, const vpx_image_t * /*img2*/) { ++mismatch_nframes_; } virtual void SetConfig(const int /*num_temporal_layer*/) {} unsigned int GetMismatchFrames() const { return mismatch_nframes_; } unsigned int GetNonRefFrames() const { return num_nonref_frames_; } private: unsigned int mismatch_nframes_; unsigned int num_nonref_frames_; }; TEST_P(ScalePartitionOnePassCbrSvc, OnePassCbrSvc3SL3TL1080P) { SetSvcConfig(3, 3); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 10; cfg_.rc_target_bitrate = 800; cfg_.kf_max_dist = 9999; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_error_resilient = 1; cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.temporal_layering_mode = 3; ::libvpx_test::I420VideoSource video( "slides_code_term_web_plot.1920_1080.yuv", 1920, 1080, 30, 1, 0, 100); // For this 3 temporal layer case, pattern repeats every 4 frames, so choose // 4 key neighboring key frame periods (so key frame will land on 0-2-1-2). AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Params: Inter layer prediction modes. class SyncFrameOnePassCbrSvc : public OnePassCbrSvc, public ::libvpx_test::CodecTestWithParam { public: SyncFrameOnePassCbrSvc() : OnePassCbrSvc(GET_PARAM(0)), current_video_frame_(0), frame_to_start_decode_(0), frame_to_sync_(0), inter_layer_pred_mode_(GET_PARAM(1)), decode_to_layer_before_sync_(-1), decode_to_layer_after_sync_(-1), denoiser_on_(0), intra_only_test_(false), mismatch_nframes_(0), num_nonref_frames_(0) { SetMode(::libvpx_test::kRealTime); memset(&svc_layer_sync_, 0, sizeof(svc_layer_sync_)); } protected: virtual ~SyncFrameOnePassCbrSvc() {} virtual void SetUp() { InitializeConfig(); speed_setting_ = 7; } virtual bool DoDecode() const { return current_video_frame_ >= frame_to_start_decode_; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { current_video_frame_ = video->frame(); PreEncodeFrameHookSetup(video, encoder); if (video->frame() == 0) { // Do not turn off inter-layer pred completely because simulcast mode // fails. if (inter_layer_pred_mode_ != INTER_LAYER_PRED_OFF) encoder->Control(VP9E_SET_SVC_INTER_LAYER_PRED, inter_layer_pred_mode_); encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); if (intra_only_test_) // Decoder sets the color_space for Intra-only frames // to BT_601 (see line 1810 in vp9_decodeframe.c). // So set it here in these tess to avoid encoder-decoder // mismatch check on color space setting. encoder->Control(VP9E_SET_COLOR_SPACE, VPX_CS_BT_601); } if (video->frame() == frame_to_sync_) { encoder->Control(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, &svc_layer_sync_); } } #if CONFIG_VP9_DECODER virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Decoder *decoder) { if (video->frame() < frame_to_sync_) { if (decode_to_layer_before_sync_ >= 0) decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, decode_to_layer_before_sync_); } else { if (decode_to_layer_after_sync_ >= 0) decoder->Control(VP9_DECODE_SVC_SPATIAL_LAYER, decode_to_layer_after_sync_); } } #endif virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { // Keep track of number of non-reference frames, needed for mismatch check. // Non-reference frames are top spatial and temporal layer frames, // for TL > 0. if (temporal_layer_id_ == number_temporal_layers_ - 1 && temporal_layer_id_ > 0 && pkt->data.frame.spatial_layer_encoded[number_spatial_layers_ - 1] && current_video_frame_ >= frame_to_sync_) num_nonref_frames_++; if (intra_only_test_ && current_video_frame_ == frame_to_sync_) { // Intra-only frame is only generated for spatial layers > 1 and <= 3, // among other conditions (see constraint in set_intra_only_frame(). If // intra-only is no allowed then encoder will insert key frame instead. const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; if (number_spatial_layers_ == 1 || number_spatial_layers_ > 3) ASSERT_TRUE(key_frame); else ASSERT_FALSE(key_frame); } } virtual void MismatchHook(const vpx_image_t * /*img1*/, const vpx_image_t * /*img2*/) { if (current_video_frame_ >= frame_to_sync_) ++mismatch_nframes_; } unsigned int GetMismatchFrames() const { return mismatch_nframes_; } unsigned int GetNonRefFrames() const { return num_nonref_frames_; } unsigned int current_video_frame_; unsigned int frame_to_start_decode_; unsigned int frame_to_sync_; int inter_layer_pred_mode_; int decode_to_layer_before_sync_; int decode_to_layer_after_sync_; int denoiser_on_; bool intra_only_test_; vpx_svc_spatial_layer_sync_t svc_layer_sync_; private: virtual void SetConfig(const int num_temporal_layer) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_error_resilient = 1; cfg_.g_threads = 1; cfg_.rc_dropframe_thresh = 30; cfg_.kf_max_dist = 9999; if (num_temporal_layer == 3) { cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.temporal_layering_mode = 3; } else if (num_temporal_layer == 2) { cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; cfg_.temporal_layering_mode = 2; } else if (num_temporal_layer == 1) { cfg_.ts_rate_decimator[0] = 1; cfg_.temporal_layering_mode = 1; } } unsigned int mismatch_nframes_; unsigned int num_nonref_frames_; }; // Test for sync layer for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers. Only start decoding on the sync layer. // Full sync: insert key frame on base layer. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLFullSync) { SetSvcConfig(3, 3); // Sync is on base layer so the frame to sync and the frame to start decoding // is the same. frame_to_start_decode_ = 20; frame_to_sync_ = 20; decode_to_layer_before_sync_ = -1; decode_to_layer_after_sync_ = 2; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 0; svc_layer_sync_.spatial_layer_sync[0] = 1; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Test for sync layer for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Decoding QVGA before sync frame and decode up to // VGA on and after sync. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc2SL3TLSyncToVGA) { SetSvcConfig(2, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 100; decode_to_layer_before_sync_ = 0; decode_to_layer_after_sync_ = 1; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 0; svc_layer_sync_.spatial_layer_sync[0] = 0; svc_layer_sync_.spatial_layer_sync[1] = 1; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); cfg_.rc_target_bitrate = 400; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Test for sync layer for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers. Decoding QVGA and VGA before sync frame and decode up to // HD on and after sync. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncToHD) { SetSvcConfig(3, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 20; decode_to_layer_before_sync_ = 1; decode_to_layer_after_sync_ = 2; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 0; svc_layer_sync_.spatial_layer_sync[0] = 0; svc_layer_sync_.spatial_layer_sync[1] = 0; svc_layer_sync_.spatial_layer_sync[2] = 1; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Test for sync layer for 1 pass CBR SVC: 3 spatial layers and // 3 temporal layers. Decoding QVGA before sync frame and decode up to // HD on and after sync. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncToVGAHD) { SetSvcConfig(3, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 20; decode_to_layer_before_sync_ = 0; decode_to_layer_after_sync_ = 2; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 0; svc_layer_sync_.spatial_layer_sync[0] = 0; svc_layer_sync_.spatial_layer_sync[1] = 1; svc_layer_sync_.spatial_layer_sync[2] = 1; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } #if CONFIG_VP9_TEMPORAL_DENOISING // Test for sync layer for 1 pass CBR SVC: 2 spatial layers and // 3 temporal layers. Decoding QVGA before sync frame and decode up to // VGA on and after sync. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc2SL3TLSyncFrameVGADenoise) { SetSvcConfig(2, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 100; decode_to_layer_before_sync_ = 0; decode_to_layer_after_sync_ = 1; denoiser_on_ = 1; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 0; svc_layer_sync_.spatial_layer_sync[0] = 0; svc_layer_sync_.spatial_layer_sync[1] = 1; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); cfg_.rc_target_bitrate = 400; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } #endif // Start decoding from beginning of sequence, during sequence insert intra-only // on base/qvga layer. Decode all layers. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncFrameIntraOnlyQVGA) { SetSvcConfig(3, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 20; decode_to_layer_before_sync_ = 2; // The superframe containing intra-only layer will have 4 frames. Thus set the // layer to decode after sync frame to 3. decode_to_layer_after_sync_ = 3; intra_only_test_ = true; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 1; svc_layer_sync_.spatial_layer_sync[0] = 1; svc_layer_sync_.spatial_layer_sync[1] = 0; svc_layer_sync_.spatial_layer_sync[2] = 0; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Start decoding from beginning of sequence, during sequence insert intra-only // on base/qvga layer and sync_layer on middle/VGA layer. Decode all layers. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc3SL3TLSyncFrameIntraOnlyVGA) { SetSvcConfig(3, 3); frame_to_start_decode_ = 0; frame_to_sync_ = 20; decode_to_layer_before_sync_ = 2; // The superframe containing intra-only layer will have 4 frames. Thus set the // layer to decode after sync frame to 3. decode_to_layer_after_sync_ = 3; intra_only_test_ = true; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 1; svc_layer_sync_.spatial_layer_sync[0] = 1; svc_layer_sync_.spatial_layer_sync[1] = 1; svc_layer_sync_.spatial_layer_sync[2] = 0; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } // Start decoding from sync frame, insert intra-only on base/qvga layer. Decode // all layers. For 1 spatial layer, it inserts a key frame. TEST_P(SyncFrameOnePassCbrSvc, OnePassCbrSvc1SL3TLSyncFrameIntraOnlyQVGA) { SetSvcConfig(1, 3); frame_to_start_decode_ = 20; frame_to_sync_ = 20; decode_to_layer_before_sync_ = 0; decode_to_layer_after_sync_ = 0; intra_only_test_ = true; // Set up svc layer sync structure. svc_layer_sync_.base_layer_intra_only = 1; svc_layer_sync_.spatial_layer_sync[0] = 1; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); cfg_.rc_target_bitrate = 600; AssignLayerBitrates(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); #if CONFIG_VP9_DECODER // The non-reference frames are expected to be mismatched frames as the // encoder will avoid loopfilter on these frames. EXPECT_EQ(GetNonRefFrames(), GetMismatchFrames()); #endif } VP9_INSTANTIATE_TEST_CASE(SyncFrameOnePassCbrSvc, ::testing::Range(0, 3)); INSTANTIATE_TEST_CASE_P( VP9, ScalePartitionOnePassCbrSvc, ::testing::Values( static_cast(&libvpx_test::kVP9))); } // namespace } // namespace svc_test libvpx-1.8.2/test/svc_test.cc000066400000000000000000000124441357355204000161570ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "test/svc_test.h" namespace svc_test { void OnePassCbrSvc::SetSvcConfig(const int num_spatial_layer, const int num_temporal_layer) { SetConfig(num_temporal_layer); cfg_.ss_number_layers = num_spatial_layer; cfg_.ts_number_layers = num_temporal_layer; if (num_spatial_layer == 1) { svc_params_.scaling_factor_num[0] = 288; svc_params_.scaling_factor_den[0] = 288; } else if (num_spatial_layer == 2) { svc_params_.scaling_factor_num[0] = 144; svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 288; svc_params_.scaling_factor_den[1] = 288; } else if (num_spatial_layer == 3) { svc_params_.scaling_factor_num[0] = 72; svc_params_.scaling_factor_den[0] = 288; svc_params_.scaling_factor_num[1] = 144; svc_params_.scaling_factor_den[1] = 288; svc_params_.scaling_factor_num[2] = 288; svc_params_.scaling_factor_den[2] = 288; } number_spatial_layers_ = cfg_.ss_number_layers; number_temporal_layers_ = cfg_.ts_number_layers; } void OnePassCbrSvc::PreEncodeFrameHookSetup(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { for (int i = 0; i < VPX_MAX_LAYERS; ++i) { svc_params_.max_quantizers[i] = 63; svc_params_.min_quantizers[i] = 0; } svc_params_.speed_per_layer[0] = base_speed_setting_; for (int i = 1; i < VPX_SS_MAX_LAYERS; ++i) { svc_params_.speed_per_layer[i] = speed_setting_; } encoder->Control(VP9E_SET_SVC, 1); encoder->Control(VP9E_SET_SVC_PARAMETERS, &svc_params_); encoder->Control(VP8E_SET_CPUUSED, speed_setting_); encoder->Control(VP9E_SET_AQ_MODE, 3); encoder->Control(VP8E_SET_MAX_INTRA_BITRATE_PCT, 300); encoder->Control(VP9E_SET_TILE_COLUMNS, get_msb(cfg_.g_threads)); encoder->Control(VP9E_SET_ROW_MT, 1); encoder->Control(VP8E_SET_STATIC_THRESHOLD, 1); } superframe_count_++; temporal_layer_id_ = 0; if (number_temporal_layers_ == 2) { temporal_layer_id_ = (superframe_count_ % 2 != 0); } else if (number_temporal_layers_ == 3) { if (superframe_count_ % 2 != 0) temporal_layer_id_ = 2; if (superframe_count_ > 1) { if ((superframe_count_ - 2) % 4 == 0) temporal_layer_id_ = 1; } } frame_flags_ = 0; } void OnePassCbrSvc::PostEncodeFrameHook(::libvpx_test::Encoder *encoder) { vpx_svc_layer_id_t layer_id; encoder->Control(VP9E_GET_SVC_LAYER_ID, &layer_id); temporal_layer_id_ = layer_id.temporal_layer_id; for (int sl = 0; sl < number_spatial_layers_; ++sl) { for (int tl = temporal_layer_id_; tl < number_temporal_layers_; ++tl) { const int layer = sl * number_temporal_layers_ + tl; bits_in_buffer_model_[layer] += static_cast(layer_target_avg_bandwidth_[layer]); } } } void OnePassCbrSvc::AssignLayerBitrates() { int sl, spatial_layer_target; int spatial_layers = cfg_.ss_number_layers; int temporal_layers = cfg_.ts_number_layers; float total = 0; float alloc_ratio[VPX_MAX_LAYERS] = { 0 }; float framerate = 30.0; for (sl = 0; sl < spatial_layers; ++sl) { if (svc_params_.scaling_factor_den[sl] > 0) { alloc_ratio[sl] = static_cast((svc_params_.scaling_factor_num[sl] * 1.0 / svc_params_.scaling_factor_den[sl])); total += alloc_ratio[sl]; } } for (sl = 0; sl < spatial_layers; ++sl) { cfg_.ss_target_bitrate[sl] = spatial_layer_target = static_cast(cfg_.rc_target_bitrate * alloc_ratio[sl] / total); const int index = sl * temporal_layers; if (cfg_.temporal_layering_mode == 3) { cfg_.layer_target_bitrate[index] = spatial_layer_target >> 1; cfg_.layer_target_bitrate[index + 1] = (spatial_layer_target >> 1) + (spatial_layer_target >> 2); cfg_.layer_target_bitrate[index + 2] = spatial_layer_target; } else if (cfg_.temporal_layering_mode == 2) { cfg_.layer_target_bitrate[index] = spatial_layer_target * 2 / 3; cfg_.layer_target_bitrate[index + 1] = spatial_layer_target; } else if (cfg_.temporal_layering_mode <= 1) { cfg_.layer_target_bitrate[index] = spatial_layer_target; } } for (sl = 0; sl < spatial_layers; ++sl) { for (int tl = 0; tl < temporal_layers; ++tl) { const int layer = sl * temporal_layers + tl; float layer_framerate = framerate; if (temporal_layers == 2 && tl == 0) layer_framerate = framerate / 2; if (temporal_layers == 3 && tl == 0) layer_framerate = framerate / 4; if (temporal_layers == 3 && tl == 1) layer_framerate = framerate / 2; layer_target_avg_bandwidth_[layer] = static_cast( cfg_.layer_target_bitrate[layer] * 1000.0 / layer_framerate); bits_in_buffer_model_[layer] = cfg_.layer_target_bitrate[layer] * cfg_.rc_buf_initial_sz; } } } } // namespace svc_test libvpx-1.8.2/test/svc_test.h000066400000000000000000000043711357355204000160210ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_SVC_TEST_H_ #define VPX_TEST_SVC_TEST_H_ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vpx/vpx_codec.h" #include "vpx_ports/bitops.h" namespace svc_test { class OnePassCbrSvc : public ::libvpx_test::EncoderTest { public: explicit OnePassCbrSvc(const ::libvpx_test::CodecFactory *codec) : EncoderTest(codec), base_speed_setting_(0), speed_setting_(0), superframe_count_(0), temporal_layer_id_(0), number_temporal_layers_(0), number_spatial_layers_(0) { memset(&svc_params_, 0, sizeof(svc_params_)); memset(bits_in_buffer_model_, 0, sizeof(bits_in_buffer_model_[0]) * VPX_MAX_LAYERS); memset(layer_target_avg_bandwidth_, 0, sizeof(layer_target_avg_bandwidth_[0]) * VPX_MAX_LAYERS); } protected: virtual ~OnePassCbrSvc() {} virtual void SetConfig(const int num_temporal_layer) = 0; virtual void SetSvcConfig(const int num_spatial_layer, const int num_temporal_layer); virtual void PreEncodeFrameHookSetup(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder); virtual void PostEncodeFrameHook(::libvpx_test::Encoder *encoder); virtual void AssignLayerBitrates(); virtual void MismatchHook(const vpx_image_t *, const vpx_image_t *) {} vpx_svc_extra_cfg_t svc_params_; int64_t bits_in_buffer_model_[VPX_MAX_LAYERS]; int layer_target_avg_bandwidth_[VPX_MAX_LAYERS]; int base_speed_setting_; int speed_setting_; int superframe_count_; int temporal_layer_id_; int number_temporal_layers_; int number_spatial_layers_; }; } // namespace svc_test #endif // VPX_TEST_SVC_TEST_H_ libvpx-1.8.2/test/test-data.mk000066400000000000000000002000231357355204000162250ustar00rootroot00000000000000LIBVPX_TEST_SRCS-yes += test-data.mk # Encoder test source LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_collage_w352h288.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += hantro_odd.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += desktop_office1.1280_720-020.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += slides_code_term_web_plot.1920_1080.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_420_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_422_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_444_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_10_440.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_420_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_422_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_444_20f.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_12_440.yuv LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420_a10-1.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_420.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_422.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_444.y4m LIBVPX_TEST_DATA-$(CONFIG_ENCODERS) += park_joy_90p_8_440.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_credits.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += noisy_clip_640_360.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += rush_hour_444.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += screendata.y4m LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_RATE_CTRL) += bus_352x288_420_f20_b8.yuv # Test vectors LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-001.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-002.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-003.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-004.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-005.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-006.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-007.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-008.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-009.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-010.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-011.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-012.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-013.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-014.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-015.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-016.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-017.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-00-comprehensive-018.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1400.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1411.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1416.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-01-intra-1417.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1402.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1412.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1418.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-02-inter-1424.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-01.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-02.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-03.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-04.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1401.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1403.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1407.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1408.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1409.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1410.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1413.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1414.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1415.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1425.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1426.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1427.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1432.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1435.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1436.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1437.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1441.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-03-segmentation-1442.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1404.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1405.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-04-partitions-1406.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1428.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1429.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1430.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1431.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1433.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1434.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1438.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1439.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1440.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-05-sharpness-1443.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-06-smallsize.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += vp80-06-smallsize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-00.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-00.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-01.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-01.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-02.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-02.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-03.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-03.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-04.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-04.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-05.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-05.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-06.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-06.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-07.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-07.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-09.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-09.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-11.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-11.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-12.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-12.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-13.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-13.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-14.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-14.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-15.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-15.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-17.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-17.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-19.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-19.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-20.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-20.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-21.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-21.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-22.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-22.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-23.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-23.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-24.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-24.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-25.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-25.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-26.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-26.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-27.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-27.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-28.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-28.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-29.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-29.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-30.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-30.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-31.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-31.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-33.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-33.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-35.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-35.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-36.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-36.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-37.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-37.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-38.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-38.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-39.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-39.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-40.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-40.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-41.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-41.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-42.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-42.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-43.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-43.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-44.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-44.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-45.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-45.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-46.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-46.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-47.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-47.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-48.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-48.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-49.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-49.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-50.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-50.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-51.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-51.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-52.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-52.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-53.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-53.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-54.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-54.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-55.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-55.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-56.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-56.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-57.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-57.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-58.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-58.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-59.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-59.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-60.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-60.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-61.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-61.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-62.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-62.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-63.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-00-quantizer-63.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-3.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-5.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-5.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-6.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-6.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-7.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-01-sharpness-7.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-08x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-10x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-16x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-18x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-32x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-34x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-64x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x08.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x08.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x10.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x10.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x18.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x18.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x32.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x32.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x34.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x34.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x64.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-66x66.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-130x132.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x130.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-132x132.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-178x180.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x178.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-180x180.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-02-size-lf-1920x1080.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-deltaq.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-196x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-198x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-200x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-202x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-208x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-210x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-224x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x196.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x196.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x198.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x198.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x200.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x200.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x202.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x202.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x208.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x208.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x210.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x210.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x224.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-226x226.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-03-size-352x288.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-05-resize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-06-bilinear.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-07-frame_parallel-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile-4x4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x2_frame_parallel.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x2_frame_parallel.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4_frame_parallel.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x4_frame_parallel.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-08-tile_1x8_frame_parallel.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-aq2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-aq2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-lf_deltas.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-lf_deltas.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-09-subpixel-00.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-10-show-existing-frame2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x287.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x288.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-351x288.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-352x287.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-11-size-352x287.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_1.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_1.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_2.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-12-droppable_3.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-13-largescaling.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-1-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-16-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-2-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-4-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-16.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-fp-tiles-8-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-15-segkey_adpq.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-16-intra-only.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-17-show-existing-frame.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-17-show-existing-frame.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-18-resize.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-01.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-19-skip-02.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv422.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv440.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp91-2-04-yuv444.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-01.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-20-big_superframe-02.webm.md5 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-10bit-yuv420.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp92-2-20-12bit-yuv420.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv422.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv422.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv440.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv440.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-10bit-yuv444.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp93-2-20-12bit-yuv444.webm.md5 endif # CONFIG_VP9_HIGHBITDEPTH # Invalid files for testing libvpx error checking. LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-bug-1443.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-token-partition.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP8_DECODER) += invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-01-v3.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-02-v2.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-03-v3.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp91-2-mixedrefcsp-444to420.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-vp90-2-07-frame_parallel-3.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-629481.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1558.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1562.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-1562.ivf.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += invalid-crbug-667044.webm.res LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += crbug-1539.rawfile ifeq ($(CONFIG_DECODE_PERF_TESTS),yes) # Encode / Decode test LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv # BBB VP9 streams LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_426x240_tile_1x1_180kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_640x360_tile_1x2_337kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_854x480_tile_1x2_651kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm # Sintel VP9 streams LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-sintel_426x182_tile_1x1_171kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-sintel_640x272_tile_1x2_318kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-sintel_854x364_tile_1x2_621kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm # TOS VP9 streams LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_426x178_tile_1x1_181kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_640x266_tile_1x2_336kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_854x356_tile_1x2_656kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_854x356_tile_1x2_fpm_546kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_1280x534_tile_1x4_1306kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_1280x534_tile_1x4_fpm_952kbps.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm endif # CONFIG_DECODE_PERF_TESTS ifeq ($(CONFIG_ENCODE_PERF_TESTS),yes) LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += desktop_640_360_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += kirland_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcomoving_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += macmarcostationary_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += niklas_1280_720_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomanarrows_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += tacomasmallcameramovement_640_480_30.yuv LIBVPX_TEST_DATA-$(CONFIG_VP9_ENCODER) += thaloundeskmtg_640_480_30.yuv endif # CONFIG_ENCODE_PERF_TESTS # sort and remove duplicates LIBVPX_TEST_DATA-yes := $(sort $(LIBVPX_TEST_DATA-yes)) # VP9 dynamic resizing test (decoder) LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x180_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_320x240_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x360_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_640x480_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_3.ivf.md5 LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm LIBVPX_TEST_DATA-$(CONFIG_VP9_DECODER) += vp90-2-22-svc_1280x720_1.webm.md5 libvpx-1.8.2/test/test-data.sha1000066400000000000000000001770241357355204000164700ustar00rootroot000000000000003eaf216d9fc8b4b9bb8c3956311f49a85974806c *bus_352x288_420_f20_b8.yuv d5dfb0151c9051f8c85999255645d7a23916d3c0 *hantro_collage_w352h288.yuv b87815bf86020c592ccc7a846ba2e28ec8043902 *hantro_odd.yuv 76024eb753cdac6a5e5703aaea189d35c3c30ac7 *invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf 7448d8798a4380162d4b56f9b452e2f6f9e24e7a *invalid-vp90-2-00-quantizer-00.webm.ivf.s5861_r01-05_b6-.v2.ivf.res 83f50908c8dc0ef8760595447a2ff7727489542e *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-.ivf.res c123d1f9f02fb4143abb5e271916e3a3080de8f6 *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf 456d1493e52d32a5c30edf44a27debc1fa6b253a *invalid-vp90-2-00-quantizer-11.webm.ivf.s52984_r01-05_b6-z.ivf.res efafb92b7567bc04c3f1432ea6c268c1c31affd5 *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-2-21-resize_inter_320x180_5_3-4.webm.ivf.s45551_r01-05_b6-.ivf.res fe346136b9b8c1e6f6084cc106485706915795e4 *invalid-vp90-01-v3.webm 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-vp90-01-v3.webm.res d78e2fceba5ac942246503ec8366f879c4775ca5 *invalid-vp90-02-v2.webm 8e2eff4af87d2b561cce2365713269e301457ef3 *invalid-vp90-02-v2.webm.res df1a1453feb3c00d7d89746c7003b4163523bff3 *invalid-vp90-03-v3.webm 4935c62becc68c13642a03db1e6d3e2331c1c612 *invalid-vp90-03-v3.webm.res d637297561dd904eb2c97a9015deeb31c4a1e8d2 *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm 3a204bdbeaa3c6458b77bcebb8366d107267f55d *invalid-vp90-2-08-tile_1x4_frame_parallel_all_key.webm.res 9aa21d8b2cb9d39abe8a7bb6032dc66955fb4342 *noisy_clip_640_360.y4m 0936b837708ae68c034719f8e07596021c2c214f *park_joy_90p_10_420_20f.y4m 5727a853c083c1099f837d27967bc1322d50ed4f *park_joy_90p_10_422_20f.y4m e13489470ef8e8b2a871a5640d795a42a39be58d *park_joy_90p_10_444_20f.y4m c934da6fb8cc54ee2a8c17c54cf6076dac37ead0 *park_joy_90p_10_440.yuv 79b0dc1784635a7f291e21c4e8d66a29c496ab99 *park_joy_90p_12_420_20f.y4m 9cf22b0f809f7464c8b9058f0cfa9d905921cbd1 *park_joy_90p_12_422_20f.y4m 22b2a4abaecc4a9ade6bb503d25fb82367947e85 *park_joy_90p_12_444_20f.y4m 82c1bfcca368c2f22bad7d693d690d5499ecdd11 *park_joy_90p_12_440.yuv b9e1e90aece2be6e2c90d89e6ab2372d5f8c792d *park_joy_90p_8_420_a10-1.y4m 4e0eb61e76f0684188d9bc9f3ce61f6b6b77bb2c *park_joy_90p_8_420.y4m 7a193ff7dfeb96ba5f82b2afd7afa9e1fe83d947 *park_joy_90p_8_422.y4m bdb7856e6bc93599bdda05c2e773a9f22b6c6d03 *park_joy_90p_8_444.y4m 81e1f3843748438b8f2e71db484eb22daf72e939 *park_joy_90p_8_440.yuv b1f1c3ec79114b9a0651af24ce634afb44a9a419 *rush_hour_444.y4m 5184c46ddca8b1fadd16742e8500115bc8f749da *vp80-00-comprehensive-001.ivf 65bf1bbbced81b97bd030f376d1b7f61a224793f *vp80-00-comprehensive-002.ivf 906b4c1e99eb734504c504b3f1ad8052137ce672 *vp80-00-comprehensive-003.ivf ec144b1af53af895db78355785650b96dd3f0ade *vp80-00-comprehensive-004.ivf afc7091785c62f1c121c4554a2830c30704587d9 *vp80-00-comprehensive-005.ivf 42ea9d55c818145d06a9b633b8e85c6a6164fd3e *vp80-00-comprehensive-006.ivf e5b3a73ab79fe024c14309d653d6bed92902ee3b *vp80-00-comprehensive-007.ivf f3c50a58875930adfb84525c0ef59d7e4c08540c *vp80-00-comprehensive-008.ivf 4b2841fdb83db51ae322096ae468bbb9dc2c8362 *vp80-00-comprehensive-009.ivf efbff736e3a91ab6a98c5bc2dce65d645944c7b1 *vp80-00-comprehensive-010.ivf 6b315102cae008d22a3d2c231be92cb704a222f8 *vp80-00-comprehensive-011.ivf f3214a4fea14c2d5ec689936c1613f274c859ee8 *vp80-00-comprehensive-012.ivf e4094e96d308c8a35b74c480a43d853c5294cd34 *vp80-00-comprehensive-013.ivf 5b0adfaf60a69e0aaf3ec021a39d0a68fc0e1b5a *vp80-00-comprehensive-014.ivf e8467688ddf26b5000664f904faf0d70506aa653 *vp80-00-comprehensive-015.ivf aab55582337dfd2a39ff54fb2576a91910d49337 *vp80-00-comprehensive-016.ivf 1ba24724f80203c9bae4f1d0f99d534721980016 *vp80-00-comprehensive-017.ivf 143a15512b46f436280ddb4d0e6411eb4af434f2 *vp80-00-comprehensive-018.ivf c5baeaf5714fdfb3a8bc960a8e33ac438e83b16b *vp80-01-intra-1400.ivf f383955229afe3408453e316d11553d923ca60d5 *vp80-01-intra-1411.ivf 84e1f4343f174c9f3c83f834bac3196fb325bf2c *vp80-01-intra-1416.ivf fb6e712a47dd57a28a3727d2ae2c97a8b7c7ca51 *vp80-01-intra-1417.ivf 71ea772d3e9d315b8cbecf41207b8a237c34853b *vp80-02-inter-1402.ivf d85dbc4271525dcd128c503f936fe69091d1f8d0 *vp80-02-inter-1412.ivf d4e5d3ad56511867d025f93724d090f92ba6ec3d *vp80-02-inter-1418.ivf 91791cbcc37c60f35dbd8090bacb54e5ec6dd4fa *vp80-02-inter-1424.ivf 17fbfe2fea70f6e2f3fa6ca4efaae6c0b03b5f02 *vp80-03-segmentation-01.ivf 3c3600dbbcde08e20d54c66fe3b7eadd4f09bdbb *vp80-03-segmentation-02.ivf c156778d5340967d4b369c490848076e92f1f875 *vp80-03-segmentation-03.ivf d25dcff6c60e87a1af70945b8911b6b4998533b0 *vp80-03-segmentation-04.ivf 362baba2ce454c9db21218f35e81c27a5ed0b730 *vp80-03-segmentation-1401.ivf d223ae7ee748ce07e74c4679bfd219e84aa9f4b0 *vp80-03-segmentation-1403.ivf 033adf7f3a13836a3f1cffcb87c1972900f2b5c6 *vp80-03-segmentation-1407.ivf 4d51dfbf9f3e2c590ec99d1d6f59dd731d04375f *vp80-03-segmentation-1408.ivf f37a62b197c2600d75e0ccfbb31b60efdedac251 *vp80-03-segmentation-1409.ivf eb25bd7bfba5b2f6935018a930f42d123b1e7fcd *vp80-03-segmentation-1410.ivf b9d5c436663a30c27cfff84b53a002e501258843 *vp80-03-segmentation-1413.ivf 6da92b9d1a180cc3a8afe348ab12258f5a37be1a *vp80-03-segmentation-1414.ivf a4f5842602886bd669f115f93d8a35c035cb0948 *vp80-03-segmentation-1415.ivf f295dceb8ef278b77251b3f9df8aee22e161d547 *vp80-03-segmentation-1425.ivf 198dbf9f36f733200e432664cc8c5752d59779de *vp80-03-segmentation-1426.ivf 7704804e32f5de976803929934a7fafe101ac7b0 *vp80-03-segmentation-1427.ivf 831ccd862ea95ca025d2f3bd8b88678752f5416d *vp80-03-segmentation-1432.ivf b3c11978529289f9109f2766fcaba3ebc40e11ef *vp80-03-segmentation-1435.ivf a835a731f5520ebfc1002c40121264d0020559ac *vp80-03-segmentation-1436.ivf 1d1732942f773bb2a5775fcb9689b1579ce28eab *vp80-03-segmentation-1437.ivf db04799adfe089dfdf74dbd43cc05ede7161f99e *vp80-03-segmentation-1441.ivf 7caf39b3f20cfd52b998210878062e52a5edf1e6 *vp80-03-segmentation-1442.ivf 3607f6bb4ee106c38fa1ea370dc4ff8b8cde2261 *vp80-04-partitions-1404.ivf 93cc323b6b6867f1b12dd48773424549c6960a6b *vp80-04-partitions-1405.ivf 047eedb14b865bdac8a3538e63801054e0295e9c *vp80-04-partitions-1406.ivf 0f1233bd2bc33f56ce5e495dbd455d122339f384 *vp80-05-sharpness-1428.ivf 51767fc136488a9535c2a4c38067c542ee2048df *vp80-05-sharpness-1429.ivf 9805aa107672de25d6fb8c35e20d06deca5efe18 *vp80-05-sharpness-1430.ivf 61db6b965f9c27aebe71b85bf2d5877e58e4bbdf *vp80-05-sharpness-1431.ivf 10420d266290d2923555f84af38eeb96edbd3ae8 *vp80-05-sharpness-1433.ivf 3ed24f9a80cddfdf75824ba95cdb4ff9286cb443 *vp80-05-sharpness-1434.ivf c87599cbecd72d4cd4f7ace3313b7a6bc6eb8163 *vp80-05-sharpness-1438.ivf aff51d865c2621b60510459244ea83e958e4baed *vp80-05-sharpness-1439.ivf da386e72b19b5485a6af199c5eb60ef25e510dd1 *vp80-05-sharpness-1440.ivf 6759a095203d96ccd267ce09b1b050b8cc4c2f1f *vp80-05-sharpness-1443.ivf b95d3cc1d0df991e63e150a801710a72f20d9ba0 *vp80-06-smallsize.ivf db55ec7fd02c864ba996ff060b25b1e08611330b *vp80-00-comprehensive-001.ivf.md5 29db0ad011cba1e45f856d5623cd38dac3e3bf19 *vp80-00-comprehensive-002.ivf.md5 e84f258f69e173e7d68f8f8c037a0a3766902182 *vp80-00-comprehensive-003.ivf.md5 eb7912eaf69559a16fd82bc3f5fb1524cf4a4466 *vp80-00-comprehensive-004.ivf.md5 4206f71c94894bd5b5b376f6c09b3817dbc65206 *vp80-00-comprehensive-005.ivf.md5 4f89b356f6f2fecb928f330a10f804f00f5325f5 *vp80-00-comprehensive-006.ivf.md5 2813236a32964dd8007e17648bcf035a20fcda6c *vp80-00-comprehensive-007.ivf.md5 10746c72098f872803c900e17c5680e451f5f498 *vp80-00-comprehensive-008.ivf.md5 39a23d0692ce64421a7bb7cdf6ccec5928d37fff *vp80-00-comprehensive-009.ivf.md5 f6e3de8931a0cc659bda8fbc14050346955e72d4 *vp80-00-comprehensive-010.ivf.md5 101683ec195b6e944f7cd1e468fc8921439363e6 *vp80-00-comprehensive-011.ivf.md5 1f592751ce46d8688998fa0fa4fbdcda0fd4058c *vp80-00-comprehensive-012.ivf.md5 6066176f90ca790251e795fca1a5797d59999841 *vp80-00-comprehensive-013.ivf.md5 2656da94ba93691f23edc4d60b3a09e2be46c217 *vp80-00-comprehensive-014.ivf.md5 c6e0d5f5d61460c8ac8edfa4e701f10312c03133 *vp80-00-comprehensive-015.ivf.md5 ee60fee501d8493e34e8d6a1fe315b51ed09b24a *vp80-00-comprehensive-016.ivf.md5 9f1914ceffcad4546c0a29de3ef591d8bea304dc *vp80-00-comprehensive-017.ivf.md5 e0305178fe288a9fd8082b39e2d03181edb19054 *vp80-00-comprehensive-018.ivf.md5 612494da2fa799cc9d76dcdd835ae6c7cb2e5c05 *vp80-01-intra-1400.ivf.md5 48ea06097ac8269c5e8c2131d3d0639f431fcf0e *vp80-01-intra-1411.ivf.md5 6e2ab4e7677ad0ba868083ca6bc387ee922b400c *vp80-01-intra-1416.ivf.md5 eca0a90348959ce3854142f8d8641b13050e8349 *vp80-01-intra-1417.ivf.md5 920feea203145d5c2258a91c4e6991934a79a99e *vp80-02-inter-1402.ivf.md5 f71d97909fe2b3dd65be7e1f56c72237f0cef200 *vp80-02-inter-1412.ivf.md5 e911254569a30bbb2a237ff8b79f69ed9da0672d *vp80-02-inter-1418.ivf.md5 58c789c50c9bb9cc90580bed291164a0939d28ba *vp80-02-inter-1424.ivf.md5 ff3e2f441327b9c20a0b37c524e0f5a48a36de7b *vp80-03-segmentation-01.ivf.md5 0791f417f076a542ae66fbc3426ab4d94cbd6c75 *vp80-03-segmentation-02.ivf.md5 722e50f1a6a91c34302d68681faffc1c26d1cc57 *vp80-03-segmentation-03.ivf.md5 c701f1885bcfb27fb8e70cc65606b289172ef889 *vp80-03-segmentation-04.ivf.md5 f79bc9ec189a2b4807632a3d0c5bf04a178b5300 *vp80-03-segmentation-1401.ivf.md5 b9aa4c74c0219b639811c44760d0b24cd8bb436a *vp80-03-segmentation-1403.ivf.md5 70d5a2207ca1891bcaebd5cf6dd88ce8d57b4334 *vp80-03-segmentation-1407.ivf.md5 265f962ee781531f9a93b9309461316fd32b2a1d *vp80-03-segmentation-1408.ivf.md5 0c4ecbbd6dc042d30e626d951b65f460dd6cd563 *vp80-03-segmentation-1409.ivf.md5 cf779af36a937f06570a0fca9db64ba133451dee *vp80-03-segmentation-1410.ivf.md5 0e6c5036d51ab078842f133934926c598a9cff02 *vp80-03-segmentation-1413.ivf.md5 eb3930aaf229116c80d507516c34759c3f6cdf69 *vp80-03-segmentation-1414.ivf.md5 123d6c0f72ee87911c4ae7538e87b7d163b22d6c *vp80-03-segmentation-1415.ivf.md5 e70551d1a38920e097a5d8782390b79ecaeb7505 *vp80-03-segmentation-1425.ivf.md5 44e8f4117e46dbb302b2cfd81171cc1a1846e431 *vp80-03-segmentation-1426.ivf.md5 52636e54aee5f95bbace37021bd67de5db767e9a *vp80-03-segmentation-1427.ivf.md5 b1ad3eff20215c28e295b15ef3636ed926d59cba *vp80-03-segmentation-1432.ivf.md5 24c22a552fa28a90e5978f67f57181cc2d7546d7 *vp80-03-segmentation-1435.ivf.md5 96c49c390abfced18a7a8c9b9ea10af778e10edb *vp80-03-segmentation-1436.ivf.md5 f95eb6214571434f1f73ab7833b9ccdf47588020 *vp80-03-segmentation-1437.ivf.md5 1c0700ca27c9b0090a7747a4b0b4dc21d1843181 *vp80-03-segmentation-1441.ivf.md5 81d4f23ca32667ee958bae579c8f5e97ba72eb97 *vp80-03-segmentation-1442.ivf.md5 272efcef07a3a30fbca51bfd566063d8258ec0be *vp80-04-partitions-1404.ivf.md5 66ed219ab812ac801b256d35cf495d193d4cf478 *vp80-04-partitions-1405.ivf.md5 36083f37f56f502bd60ec5e07502ee9e6b8699b0 *vp80-04-partitions-1406.ivf.md5 6ca909bf168a64c09415626294665dc1be3d1973 *vp80-05-sharpness-1428.ivf.md5 1667d2ee2334e5fdea8a8a866f4ccf3cf76f033a *vp80-05-sharpness-1429.ivf.md5 71bcbe5357d36a19df5b07fbe3e27bffa8893f0a *vp80-05-sharpness-1430.ivf.md5 89a09b1dffce2d55770a89e58d9925c70ef79bf8 *vp80-05-sharpness-1431.ivf.md5 08444a18b4e6ba3450c0796dd728d48c399a2dc9 *vp80-05-sharpness-1433.ivf.md5 6d6223719a90c13e848aa2a8a6642098cdb5977a *vp80-05-sharpness-1434.ivf.md5 41d70bb5fa45bc88da1604a0af466930b8dd77b5 *vp80-05-sharpness-1438.ivf.md5 086c56378df81b6cee264d7540a7b8f2b405c7a4 *vp80-05-sharpness-1439.ivf.md5 d32dc2c4165eb266ea4c23c14a45459b363def32 *vp80-05-sharpness-1440.ivf.md5 8c69dc3d8e563f56ffab5ad1e400d9e689dd23df *vp80-05-sharpness-1443.ivf.md5 d6f246df012c241b5fa6c1345019a3703d85c419 *vp80-06-smallsize.ivf.md5 ce881e567fe1d0fbcb2d3e9e6281a1a8d74d82e0 *vp90-2-00-quantizer-00.webm ac5eda33407d0521c7afca43a63fd305c0cd9d13 *vp90-2-00-quantizer-00.webm.md5 2ca0463f2cfb93d25d7dded174db70b7cb87cb48 *vp90-2-00-quantizer-01.webm 10d98884fc6d9a5f47a2057922b8e25dd48d7786 *vp90-2-00-quantizer-01.webm.md5 d80a2920a5e0819d69dcba8fe260c01f820f8982 *vp90-2-00-quantizer-02.webm c964c8e5e04165fabbf1c6ee8ee5121d35921965 *vp90-2-00-quantizer-02.webm.md5 fdef046777b5b75c962b715d809dbe2ea331afb9 *vp90-2-00-quantizer-03.webm f270bee0b0c7aa2bf4c5afe098556b4f3f890faf *vp90-2-00-quantizer-03.webm.md5 66d98609e809394a6ac730787e6724e3badc075a *vp90-2-00-quantizer-04.webm 427433bfe121c4aea1095ec3124fdc174d200e3a *vp90-2-00-quantizer-04.webm.md5 e6e42626d8cadf0b5be16313f69212981b96fee5 *vp90-2-00-quantizer-05.webm c98f6a9a1af4cfd71416792827304266aad4bd46 *vp90-2-00-quantizer-05.webm.md5 413ef09b721f5dcec1a96e937a97e5873c2e6db6 *vp90-2-00-quantizer-06.webm 5080e940a23805c82e578e21b57fc2c511e76376 *vp90-2-00-quantizer-06.webm.md5 4a50a5f4ac717c30dfaae8bb46702e3542e867de *vp90-2-00-quantizer-07.webm 76c429a02b56762e10ee4db88729d8834b3a70f4 *vp90-2-00-quantizer-07.webm.md5 d2f4e464780bf8b7e647efa18ac777a930e62bc0 *vp90-2-00-quantizer-08.webm ab94aabf9316111b52d7c531962ed4123313b6ba *vp90-2-00-quantizer-08.webm.md5 174bc58433936dd79550398d744f1072ce7f5693 *vp90-2-00-quantizer-09.webm e1f7690cd83ccc56d045e17cce552544a5f03810 *vp90-2-00-quantizer-09.webm.md5 52bc1dfd3a97b24d922eb8a31d07527891561f2a *vp90-2-00-quantizer-10.webm 9b37bed893b5f6a4e12f2aa40f02dd40f944d0f8 *vp90-2-00-quantizer-10.webm.md5 10031eecafde1e1d8e6323fe2b2a1d7e77a66869 *vp90-2-00-quantizer-11.webm fe4620a4bb0e4f5cb9bbfedc4039a22b81b0f5c0 *vp90-2-00-quantizer-11.webm.md5 78e9f7bb77e8e348155bbdfa12790789d1d50c34 *vp90-2-00-quantizer-12.webm 0961d060cc8dd469c6dac8d7d75f927c0bb971b8 *vp90-2-00-quantizer-12.webm.md5 133b77a3bbcef652552d74ffc46afbfe3b8a1cba *vp90-2-00-quantizer-13.webm df29e5e0f95772af482f540d776f6b9dea4bfa29 *vp90-2-00-quantizer-13.webm.md5 27323afdaf8987e025c27129c74c86502315a206 *vp90-2-00-quantizer-14.webm ce96a2cc312942f0427a463f15a392870dd69764 *vp90-2-00-quantizer-14.webm.md5 ab58d0b41037829f6bc993910999f4af0212aafd *vp90-2-00-quantizer-15.webm 40f700db606501aa7cb49049624cbdde6409b122 *vp90-2-00-quantizer-15.webm.md5 cd948e66448aafb65998815ce37241f95d7c9ee7 *vp90-2-00-quantizer-16.webm 039b742d149c945ed79c7b9a6384352852a1c116 *vp90-2-00-quantizer-16.webm.md5 62f56e663e13c576764e491cf08f19bd46a71999 *vp90-2-00-quantizer-17.webm 90c5a39bf76e6b3e0a1c0d3e9b68a9fd78be963e *vp90-2-00-quantizer-17.webm.md5 f26ecad7263cd66a614e53ba5d7c00df181affeb *vp90-2-00-quantizer-18.webm cda0a1c0fca2ec2976ae55124a8a67305508bae6 *vp90-2-00-quantizer-18.webm.md5 94bfc4c04fcfe139a63b98c569e8c14ba98c401f *vp90-2-00-quantizer-19.webm 5b8ec169ccf67d8a0a8e46a62eb173f5a1dbaf4f *vp90-2-00-quantizer-19.webm.md5 0ee88e9318985e1e245de78c2c4a665885ab76a7 *vp90-2-00-quantizer-20.webm 4b26f7edb4fcd3a1b4cce9ba3cb8650e3ee6e063 *vp90-2-00-quantizer-20.webm.md5 6a995cb2b1db33da8087321df1e646f95c3e32d1 *vp90-2-00-quantizer-21.webm e216b4a1eceac03efcc433759be54ab8ea87b24b *vp90-2-00-quantizer-21.webm.md5 aa7722fc427e7180115f3c9cd96bb6b2768e7296 *vp90-2-00-quantizer-22.webm 1aa813bd45ae831bf5e79ace4d73dfd25989a07d *vp90-2-00-quantizer-22.webm.md5 7677e5b929ed6d142041f19b8a9cd5822ee1504a *vp90-2-00-quantizer-23.webm 0de0af34abd843d5b37e58baf3ed96a6104b64c3 *vp90-2-00-quantizer-23.webm.md5 b2995cbe1128b2d4926f1b28d01c501ecb6be8c8 *vp90-2-00-quantizer-24.webm db6033af2ba2f2bca62468fb4b8808e474f93923 *vp90-2-00-quantizer-24.webm.md5 8135ba35587fd92cd4667be7896323d9b634401c *vp90-2-00-quantizer-25.webm 3499e00c2cc15876f61f07e3d3cfca54ebcd98fd *vp90-2-00-quantizer-25.webm.md5 af0fa2907746db82d345f6d831fcc1b2862a29fb *vp90-2-00-quantizer-26.webm cd6fe3d14dab48886ebf65be00e6ed9616ebe5a7 *vp90-2-00-quantizer-26.webm.md5 bd0002e91323776beb5ff11e06edcf19fc08e9b9 *vp90-2-00-quantizer-27.webm fe72154ef196067d6c272521012dd79706496cac *vp90-2-00-quantizer-27.webm.md5 fc15eb606f81455ff03df16bf3432296b002c43c *vp90-2-00-quantizer-28.webm 40b2e24b542206a6bfd746ef199e49ccea07678a *vp90-2-00-quantizer-28.webm.md5 3090bbf913cad0b2eddca7228f5ed51a58378b8d *vp90-2-00-quantizer-29.webm eb59745e0912d8ed6c928268bcf265237c9ba93f *vp90-2-00-quantizer-29.webm.md5 c615abdca9c25e1cb110d908edbedfb3b7c92b91 *vp90-2-00-quantizer-30.webm ad0f4fe6733e4e7cdfe8ef8722bb341dcc7538c0 *vp90-2-00-quantizer-30.webm.md5 037d9f242086cfb085518f6416259defa82d5fc2 *vp90-2-00-quantizer-31.webm 4654b40792572f0a790874c6347ef9196d86c1a7 *vp90-2-00-quantizer-31.webm.md5 505899f3f3515044c5c8b3213d9b9d16f614619d *vp90-2-00-quantizer-32.webm 659a2e6dd02df323f62600626859006640b445df *vp90-2-00-quantizer-32.webm.md5 8b32ec9c3b7e5ca8ddc6b8aea1c1cb7ca996bccc *vp90-2-00-quantizer-33.webm 5b175ef1120ddeba4feae1247bf381bbc4e816ce *vp90-2-00-quantizer-33.webm.md5 4d283755d17e287b1d099a80604398f60d7fb6ea *vp90-2-00-quantizer-34.webm 22a739de95acfeb27524e3700b8f678a9ad744d8 *vp90-2-00-quantizer-34.webm.md5 4296f56a892a412d3d4f64824718dd566c4e6459 *vp90-2-00-quantizer-35.webm c532c9c8dc7b3506fc6a51e5c20c17ef0ac039e7 *vp90-2-00-quantizer-35.webm.md5 6f54e11da461e4410dd9075b015e2d9bc1d07dfb *vp90-2-00-quantizer-36.webm 0b3573f5addea4e3eb11a0b85f068299d5bdad78 *vp90-2-00-quantizer-36.webm.md5 210581682a26c2c4375efc785c36e07539888bc2 *vp90-2-00-quantizer-37.webm 2b4fb6f8ba975237858e61cc8f560bcfc87cb38e *vp90-2-00-quantizer-37.webm.md5 a15ef31283dfc4860f837fe200eb32a445f59629 *vp90-2-00-quantizer-38.webm fb76771f3a795054b9936f70da7505c3ac585284 *vp90-2-00-quantizer-38.webm.md5 1df8433a441412831daae6726df89fa70d21b14d *vp90-2-00-quantizer-39.webm 39e162c09a20e7e684868097766347014371fee6 *vp90-2-00-quantizer-39.webm.md5 5330e4788ab9129dbb25a7a7d5411104521248b6 *vp90-2-00-quantizer-40.webm 872cc0f2cc9dbf000f89eadb4d8f9940e48e00b1 *vp90-2-00-quantizer-40.webm.md5 d88d03b982889e399a78d7a06eeb1cf30e6c2da2 *vp90-2-00-quantizer-41.webm 5b4f7217e57fa2a221011d0b32f8d0409496b7b6 *vp90-2-00-quantizer-41.webm.md5 9e16406e3e26955a6e17d455ef1ef64bbfa26e53 *vp90-2-00-quantizer-42.webm 0219d090cf37daabe19256ba8e932ba4874b92e4 *vp90-2-00-quantizer-42.webm.md5 a9b15843486fb05f8cd15437ef279782a42b75db *vp90-2-00-quantizer-43.webm 3c9b0b4c607f9579a31726bfcf56729334ddc686 *vp90-2-00-quantizer-43.webm.md5 1dbc931ac446c91eabe7213efff55b596cccf07c *vp90-2-00-quantizer-44.webm 73bc8f675103abaef3d9f73a2742b3bffd726d23 *vp90-2-00-quantizer-44.webm.md5 7c6c1be15beb9d6201204b018966c8c4f9777efc *vp90-2-00-quantizer-45.webm c907b29da821f790c6748de61f592689312e4e36 *vp90-2-00-quantizer-45.webm.md5 07b434da1a467580f73b32177ee11b3e00f65a0d *vp90-2-00-quantizer-46.webm 7b2b7ce60c50bc970bc0ada46d7a7ce440148da3 *vp90-2-00-quantizer-46.webm.md5 233d0465fb1a6fa36e9f89bd2193ac79bd4d2809 *vp90-2-00-quantizer-47.webm 527e0a9fb932efe915027ffe077f9e8d3a4fb139 *vp90-2-00-quantizer-47.webm.md5 719613df7307e205c3fdb6acfb373849c5ab23c7 *vp90-2-00-quantizer-48.webm 65ab6c9d1b682c183b201c7ff42b90343ce3e304 *vp90-2-00-quantizer-48.webm.md5 3bf04a598325ed0eabae1598ec7f718f715ec672 *vp90-2-00-quantizer-49.webm ac68c4387ce11fcc998d8ba455ab9b2bb361d240 *vp90-2-00-quantizer-49.webm.md5 d59238fb3a654931c9b65a11e7321b40d1f702e9 *vp90-2-00-quantizer-50.webm d0576bfede46fd55659f028f2fd28554ceb3e6cc *vp90-2-00-quantizer-50.webm.md5 3f579785101d4209360dd96f8c2ffe9beddf3bee *vp90-2-00-quantizer-51.webm 89fcfe04f4457a7f02ab4a2f94aacbb88aee5789 *vp90-2-00-quantizer-51.webm.md5 28be5836e2fedefe4babf12fc9b79e460ab0a0f4 *vp90-2-00-quantizer-52.webm f3dd52b70c18345fee740220f35da9c4def2017a *vp90-2-00-quantizer-52.webm.md5 488ad4058c17170665b6acd1021fade9a02771e4 *vp90-2-00-quantizer-53.webm 1cdcb1d4f3a37cf83ad235eb27ec62ed2a01afc7 *vp90-2-00-quantizer-53.webm.md5 682978289cb28cc8c9d39bc797300e45d6039de7 *vp90-2-00-quantizer-54.webm 36c35353f2c03cb099bd710d9994de7d9ed88834 *vp90-2-00-quantizer-54.webm.md5 c398ce49af762a48f10cc4da9fae0769aae5f226 *vp90-2-00-quantizer-55.webm 2cf3570542d984f167ab087f59493c7fb47e0ed2 *vp90-2-00-quantizer-55.webm.md5 3071f18b2fce261aa82d61f81a7ae4ca9a75d0e3 *vp90-2-00-quantizer-56.webm d3f93f8272b6de31cffb011a26f11abb514efb12 *vp90-2-00-quantizer-56.webm.md5 f4e8e14b1f278801a7eb6f11734780a01b1668e9 *vp90-2-00-quantizer-57.webm 6478fdf1d7faf6db5f19dffc5e1363af358699ee *vp90-2-00-quantizer-57.webm.md5 307dc264f57cc618fff211fa44d7f52767ed9660 *vp90-2-00-quantizer-58.webm cf231d4a52d492fa692ea4194ec5eb7511fec54e *vp90-2-00-quantizer-58.webm.md5 1fd7cd596170afce2de0b1441b7674bda5723440 *vp90-2-00-quantizer-59.webm 4681f7ef96f63e085c41bb1a964b0df7e67e0b38 *vp90-2-00-quantizer-59.webm.md5 34cdcc81c0ba7085aefbb22d7b4aa9bca3dd7c62 *vp90-2-00-quantizer-60.webm 58691ef53b6b623810e2c57ded374c77535df935 *vp90-2-00-quantizer-60.webm.md5 e6e812406aab81021bb16e772c1db03f75906cb6 *vp90-2-00-quantizer-61.webm 76436eace62f08ff92b61a0845e66667a027db1b *vp90-2-00-quantizer-61.webm.md5 84d811bceed70c950a6a08e572a6e274866e72b1 *vp90-2-00-quantizer-62.webm 2d937cc011eeddd95222b960982da5cd18db580f *vp90-2-00-quantizer-62.webm.md5 0912b295ba0ea09359315315ffd67d22d046f883 *vp90-2-00-quantizer-63.webm 5a829031055d70565f57dbcd47a6ac33619952b3 *vp90-2-00-quantizer-63.webm.md5 0cf9e5ebe0112bdb47b5887ee5d58eb9d4727c00 *vp90-2-01-sharpness-1.webm 5a0476be4448bae8f8ca17ea236c98793a755948 *vp90-2-01-sharpness-1.webm.md5 51e02d7911810cdf5be8b68ac40aedab479a3179 *vp90-2-01-sharpness-2.webm a0ca5bc87a5ed7c7051f59078daa0d03be1b45b6 *vp90-2-01-sharpness-2.webm.md5 0603f8ad239c07a531d948187f4dafcaf51eda8d *vp90-2-01-sharpness-3.webm 3af8000a69c72fe77881e3176f026c2affb78cc7 *vp90-2-01-sharpness-3.webm.md5 4ca4839f48146252fb261ed88838d80211804841 *vp90-2-01-sharpness-4.webm 08832a1494f84fa9edd40e080bcf2c0e80100c76 *vp90-2-01-sharpness-4.webm.md5 95099dc8f9cbaf9b9a7dd65311923e441ff70731 *vp90-2-01-sharpness-5.webm 93ceee30c140f0b406726c0d896b9db6031c4c7f *vp90-2-01-sharpness-5.webm.md5 ceb4116fb7b078d266d153233b6d62a255a34e4c *vp90-2-01-sharpness-6.webm da83efe59e537ce538e8b03a6eac63cf25849c9a *vp90-2-01-sharpness-6.webm.md5 b5f7cd19aece3880f9d616a778e5cc24c6b9b505 *vp90-2-01-sharpness-7.webm 2957408d20deac8633941a2169f801bae6f086e1 *vp90-2-01-sharpness-7.webm.md5 ffc096c2ce1050450ad462b5fabd2a5220846319 *vp90-2-02-size-08x08.webm e36d2ed6fa2746347710b750586aafa6a01ff3ae *vp90-2-02-size-08x08.webm.md5 895b986f9fd55cd879472b31c6a06b82094418c8 *vp90-2-02-size-08x10.webm 079157a19137ccaebba606f2871f45a397347150 *vp90-2-02-size-08x10.webm.md5 1c5992203e62a2b83040ccbecd748b604e19f4c0 *vp90-2-02-size-08x16.webm 9aa45ffdf2078f883bbed01450031b691819c144 *vp90-2-02-size-08x16.webm.md5 d0a8953da1f85f484487408fee5da9e2a8391901 *vp90-2-02-size-08x18.webm 59a5cc17d354c6a23e5e959d666b1456a5d49c56 *vp90-2-02-size-08x18.webm.md5 1b13461a9fc65cb041bacfe4ea6f02d363397d61 *vp90-2-02-size-08x32.webm 2bdddd6878f05d37d84cde056a3f5e7f926ba3d6 *vp90-2-02-size-08x32.webm.md5 2861f0a0daadb62295b0504a1fbe5b50c79a8f59 *vp90-2-02-size-08x34.webm 6b5812cfb8a82d378ea2913bf009e93668020147 *vp90-2-02-size-08x34.webm.md5 02f948216d4246579dc53c47fe55d8fb264ba251 *vp90-2-02-size-08x64.webm 84b55fdee6d9aa820c7a8c62822446184b191767 *vp90-2-02-size-08x64.webm.md5 4b011242cbf42516efd2b197baebb61dd34562c9 *vp90-2-02-size-08x66.webm 6b1fa0a885947b3cc0fe58f75f838e662bd9bb8b *vp90-2-02-size-08x66.webm.md5 4057796be9dd12df48ab607f502ae6aa70eeeab6 *vp90-2-02-size-10x08.webm 71c752c51aec9f48de286b93f4c20e9c11cad7d0 *vp90-2-02-size-10x08.webm.md5 6583c853fa43fc53d51743eac5f3a43a359d45d0 *vp90-2-02-size-10x10.webm 1da524d24af1944b671d4d3f2b398d6e336584c3 *vp90-2-02-size-10x10.webm.md5 ba442fc03ccd3a705c64c83b36f5ada67d198874 *vp90-2-02-size-10x16.webm 7cfd960f232c34c641a4a2a9411b6fd0efb2fc50 *vp90-2-02-size-10x16.webm.md5 cc92ed40eef14f52e4d080cb2c57939dd8326374 *vp90-2-02-size-10x18.webm db5626275cc55ce970b91c995e74f6838d943aca *vp90-2-02-size-10x18.webm.md5 3a93d501d22325e9fd4c9d8b82e2a432de33c351 *vp90-2-02-size-10x32.webm 5cae51b0c71cfc131651f345f87583eb2903afaf *vp90-2-02-size-10x32.webm.md5 50d2f2b15a9a5178153db44a9e03aaf32b227f67 *vp90-2-02-size-10x34.webm bb0efe058122641e7f73e94497dda2b9e6c21efd *vp90-2-02-size-10x34.webm.md5 01624ec173e533e0b33fd9bdb91eb7360c7c9175 *vp90-2-02-size-10x64.webm b9c0e3b054463546356acf5157f9be92fd34732f *vp90-2-02-size-10x64.webm.md5 2942879baf1c09e96b14d0fc84806abfe129c706 *vp90-2-02-size-10x66.webm bab5f539c2f91952e187456b4beafbb4c01e25ee *vp90-2-02-size-10x66.webm.md5 88d2b63ca5e9ee163d8f20e8886f3df3ff301a66 *vp90-2-02-size-16x08.webm 7f48a0fcf8c25963f3057d7f6669c5f2415834b8 *vp90-2-02-size-16x08.webm.md5 59261eb34c15ea9b5ddd2d416215c1a8b9e6dc1f *vp90-2-02-size-16x10.webm 73a7c209a46dd051c9f7339b6e02ccd5b3b9fc81 *vp90-2-02-size-16x10.webm.md5 066834fef9cf5b9a72932cf4dea5f253e14a976d *vp90-2-02-size-16x16.webm faec542f52f37601cb9c480d887ae9355be99372 *vp90-2-02-size-16x16.webm.md5 195307b4eb3192271ee4a935b0e48deef0c54cc2 *vp90-2-02-size-16x18.webm 5a92e19e624c0376321d4d0e22c0c91995bc23e1 *vp90-2-02-size-16x18.webm.md5 14f3f884216d7ae16ec521f024a2f2d31bbf9c1a *vp90-2-02-size-16x32.webm ea622d1c817dd174556f7ee7ccfe4942b34d4845 *vp90-2-02-size-16x32.webm.md5 2e0501100578a5da9dd47e4beea160f945bdd1ba *vp90-2-02-size-16x34.webm 1b8645ef64239334921c5f56b24ce815e6070b05 *vp90-2-02-size-16x34.webm.md5 89a6797fbebebe93215f367229a9152277f5dcfe *vp90-2-02-size-16x64.webm a03d8c1179ca626a8856fb416d635dbf377979cd *vp90-2-02-size-16x64.webm.md5 0f3a182e0750fcbae0b9eae80c7a53aabafdd18d *vp90-2-02-size-16x66.webm 8cb6736dc2d897c1283919a32068af377d66c59c *vp90-2-02-size-16x66.webm.md5 68fe70dc7914cc1d8d6dcd97388b79196ba3e7f1 *vp90-2-02-size-18x08.webm 874c7fb505be9db3160c57cb405c4dbd5b990dc2 *vp90-2-02-size-18x08.webm.md5 0546352dd78496d4dd86c3727ac2ff36c9e72032 *vp90-2-02-size-18x10.webm 1d80eb36557ea5f25a386495a36f93da0f25316b *vp90-2-02-size-18x10.webm.md5 60fe99e5f5cc99706efa3e0b894e45cbcf0d6330 *vp90-2-02-size-18x16.webm 1ab6cdd89a53662995d103546e6611c84f9292ab *vp90-2-02-size-18x16.webm.md5 f9a8f5fb749d69fd555db6ca093b7f77800c7b4f *vp90-2-02-size-18x18.webm ace8a66328f7802b15f9989c2720c029c6abd279 *vp90-2-02-size-18x18.webm.md5 a197123a527ec25913a9bf52dc8c347749e00045 *vp90-2-02-size-18x32.webm 34fbd7036752232d1663e70d7f7cdc93f7129202 *vp90-2-02-size-18x32.webm.md5 f219655a639a774a2c9c0a9f45c28dc0b5e75e24 *vp90-2-02-size-18x34.webm 2c4d622a9ea548791c1a07903d3702e9774388bb *vp90-2-02-size-18x34.webm.md5 5308578da48c677d477a5404e19391d1303033c9 *vp90-2-02-size-18x64.webm e7fd4462527bac38559518ba80e41847db880f15 *vp90-2-02-size-18x64.webm.md5 e109a7e013bd179f97e378542e1e81689ed06802 *vp90-2-02-size-18x66.webm 45c04e422fb383c1f3be04beefaa4490e83bdb1a *vp90-2-02-size-18x66.webm.md5 38844cae5d99caf445f7de33c3ae78494ce36c01 *vp90-2-02-size-32x08.webm ad018be39e493ca2405225034b1a5b7a42af6f3a *vp90-2-02-size-32x08.webm.md5 7b57eaad55906f9de9903c8657a3fcb2aaf792ea *vp90-2-02-size-32x10.webm 2294425d4e55d275af5e25a0beac9738a1b4ee73 *vp90-2-02-size-32x10.webm.md5 f47ca2ced0d47f761bb0a5fdcd911d3f450fdcc1 *vp90-2-02-size-32x16.webm ae10981d93913f0ab1f28c1146255e01769aa8c0 *vp90-2-02-size-32x16.webm.md5 08b23ad838b6cf1fbfe3ad7e7775d95573e815fc *vp90-2-02-size-32x18.webm 1ba76f4c4a4ac7aabfa3ce195c1b473535eb7cc8 *vp90-2-02-size-32x18.webm.md5 d5b88ae6c8c25c53dee74d9f1e6ca64244349a57 *vp90-2-02-size-32x32.webm e39c067a8ee2da52a51641eb1cb7f8eba935eb6b *vp90-2-02-size-32x32.webm.md5 529429920dc36bd899059fa75a767f02c8c60874 *vp90-2-02-size-32x34.webm 56888e7834f52b106e8911e3a7fc0f473b609995 *vp90-2-02-size-32x34.webm.md5 38e848e160391c2b1a55040aadde613b9f4bf15e *vp90-2-02-size-32x64.webm 8950485fb3f68b0e8be234db860e4ec5f5490fd0 *vp90-2-02-size-32x64.webm.md5 5e8670f0b8ec9cefa8795b8959ffbe1a8e1aea94 *vp90-2-02-size-32x66.webm 225df9d7d72ec711b0b60f4aeb65311c97db054a *vp90-2-02-size-32x66.webm.md5 695f929e2ce6fb11a1f180322d46c5cb1c97fa61 *vp90-2-02-size-34x08.webm 5bb4262030018dd01883965c6aa6070185924ef6 *vp90-2-02-size-34x08.webm.md5 5adf74ec906d2ad3f7526e06bd29f5ad7d966a90 *vp90-2-02-size-34x10.webm 71c100b437d3e8701632ae8d65c3555339b1c68f *vp90-2-02-size-34x10.webm.md5 d0918923c987fba2d00193d83797b21289fe54aa *vp90-2-02-size-34x16.webm 5d5a52f3535b4d2698dd3d87f4a13fdc9b57163d *vp90-2-02-size-34x16.webm.md5 553ab0042cf87f5e668ec31b2e4b2a4b6ec196fd *vp90-2-02-size-34x18.webm a164c7f3c424987df2340496e6a8cf76e973f0f1 *vp90-2-02-size-34x18.webm.md5 baf3e233634f150de81c18ba5d8848068e1c3c54 *vp90-2-02-size-34x32.webm 22a79d3bd1c9b85dfe8c70bb2e19f08a92a8be03 *vp90-2-02-size-34x32.webm.md5 6d50a533774a7167350e4a7ef43c94a5622179a2 *vp90-2-02-size-34x34.webm 0c099638e79c273546523e06704553e42eb00b00 *vp90-2-02-size-34x34.webm.md5 698cdd0a5e895cc202c488675e682a8c537ede4f *vp90-2-02-size-34x64.webm 9317b63987cddab8389510a27b86f9f3d46e3fa5 *vp90-2-02-size-34x64.webm.md5 4b5335ca06f082b6b69f584eb8e7886bdcafefd3 *vp90-2-02-size-34x66.webm e18d68b35428f46a84a947c646804a51ef1d7cec *vp90-2-02-size-34x66.webm.md5 a54ae7b494906ec928a876e8290e5574f2f9f6a2 *vp90-2-02-size-64x08.webm 87f9f7087b6489d45e9e4b38ede2c5aef4a4928f *vp90-2-02-size-64x08.webm.md5 24522c70804a3c23d937df2d829ae63965b23f38 *vp90-2-02-size-64x10.webm 447ce03938ab53bffcb4a841ee0bfaa90462dcb9 *vp90-2-02-size-64x10.webm.md5 2a5035d035d214ae614af8051930690ef623989b *vp90-2-02-size-64x16.webm 84e355761dd2e0361b904c84c52a0dd0384d89cf *vp90-2-02-size-64x16.webm.md5 3a293ef4e270a19438e59b817fbe5f43eed4d36b *vp90-2-02-size-64x18.webm 666824e5ba746779eb46079e0631853dcc86d48b *vp90-2-02-size-64x18.webm.md5 ed32fae837095c9e8fc95d223ec68101812932c2 *vp90-2-02-size-64x32.webm 97086eadedce1d0d9c072b585ba7b49aec69b1e7 *vp90-2-02-size-64x32.webm.md5 696c7a7250bdfff594f4dfd88af34239092ecd00 *vp90-2-02-size-64x34.webm 253a1d38d452e7826b086846c6f872f829c276bb *vp90-2-02-size-64x34.webm.md5 fc508e0e3c2e6872c60919a60b812c5232e9c2b0 *vp90-2-02-size-64x64.webm 2cd6ebeca0f82e9f505616825c07950371b905ab *vp90-2-02-size-64x64.webm.md5 0f8a4fc1d6521187660425c283f08dff8c66e476 *vp90-2-02-size-64x66.webm 5806be11a1d346be235f88d3683e69f73746166c *vp90-2-02-size-64x66.webm.md5 273b0c36e3658685cde250408a478116d7ae92f1 *vp90-2-02-size-66x08.webm 23c3cd0dca20a2f71f036e77ea92025ff4e7a298 *vp90-2-02-size-66x08.webm.md5 4844c59c3306d1e671bb0568f00e344bf797e66e *vp90-2-02-size-66x10.webm e041eaf6841d775f8fde8bbb4949d2733fdaab7f *vp90-2-02-size-66x10.webm.md5 bdf3f1582b234fcd2805ffec59f9d716a2345302 *vp90-2-02-size-66x16.webm 2ec85ee18119e6798968571ea6e1b93ca386e3af *vp90-2-02-size-66x16.webm.md5 0acce9af12b13b025d5274013da7ef6f568f075f *vp90-2-02-size-66x18.webm 77c4d53e2a5c96b70af9d575fe6811e0f5ee627b *vp90-2-02-size-66x18.webm.md5 682b36a25774bbdedcd603f504d18eb63f0167d4 *vp90-2-02-size-66x32.webm 53728fae2a428f16d376a29f341a64ddca97996a *vp90-2-02-size-66x32.webm.md5 e71b70e901e29eaa6672a6aa4f37f6f5faa02bd6 *vp90-2-02-size-66x34.webm f69a6a555e3f614b0a35f9bfc313d8ebb35bc725 *vp90-2-02-size-66x34.webm.md5 4151b8c29452d5c2266397a7b9bf688899a2937b *vp90-2-02-size-66x64.webm 69486e7fd9e380b6c97a03d3e167affc79f73840 *vp90-2-02-size-66x64.webm.md5 68784a1ecac776fe2a3f230345af32f06f123536 *vp90-2-02-size-66x66.webm 7f008c7f48d55e652fbd6bac405b51e0015c94f2 *vp90-2-02-size-66x66.webm.md5 7e1bc449231ac1c5c2a11c9a6333b3e828763798 *vp90-2-03-size-196x196.webm 6788a561466dace32d500194bf042e19cccc35e1 *vp90-2-03-size-196x196.webm.md5 a170c9a88ec1dd854c7a471ff55fb2a97ac31870 *vp90-2-03-size-196x198.webm 6bf9d6a8e2bdc5bf4f8a78071a3fed5ca02ad6f2 *vp90-2-03-size-196x198.webm.md5 68f861d21c4c8b03d572c3d3fcd9f4fbf1f4503f *vp90-2-03-size-196x200.webm bbfc260b2bfd872cc6054272bb6b7f959a9e1c6e *vp90-2-03-size-196x200.webm.md5 fc34889feeca2b7e5b27b4f1ce22d2e2b8e3e4b1 *vp90-2-03-size-196x202.webm 158ee72af578f39aad0c3b8f4cbed2fc78b57e0f *vp90-2-03-size-196x202.webm.md5 dd28fb7247af534bdf5e6795a3ac429610489a0b *vp90-2-03-size-196x208.webm 7546be847efce2d1c0a23f807bfb03f91b764e1e *vp90-2-03-size-196x208.webm.md5 41d5cf5ed65b722a1b6dc035e67f978ea8ffecf8 *vp90-2-03-size-196x210.webm 9444fdf632d6a1b6143f4cb10fed8f63c1d67ec1 *vp90-2-03-size-196x210.webm.md5 5007bc618143437c009d6dde5fc2e86f72d37dc2 *vp90-2-03-size-196x224.webm 858361d8f79b44df5545feabbc9754ec9ede632f *vp90-2-03-size-196x224.webm.md5 0bcbe357fbc776c3fa68e7117179574ed7564a44 *vp90-2-03-size-196x226.webm 72006a5f42031a43d70a2cd9fc1958962a86628f *vp90-2-03-size-196x226.webm.md5 000239f048cceaac055558e97ef07078ebf65502 *vp90-2-03-size-198x196.webm 2d6841901b72000c5340f30be602853438c1b787 *vp90-2-03-size-198x196.webm.md5 ae75b766306a6404c3b3b35a6b6d53633c14fbdb *vp90-2-03-size-198x198.webm 3f2544b4f3b4b643a98f2c3b15ea5826fc702fa1 *vp90-2-03-size-198x198.webm.md5 95ffd573fa84ccef1cd59e1583e6054f56a5c83d *vp90-2-03-size-198x200.webm 5d537e3c9b9c54418c79677543454c4cda3de1af *vp90-2-03-size-198x200.webm.md5 ecc845bf574375f469bc91bf5c75c79dc00073d6 *vp90-2-03-size-198x202.webm 1b59f5e111265615a7a459eeda8cc9045178d228 *vp90-2-03-size-198x202.webm.md5 432fb27144fe421b9f51cf44d2750a26133ed585 *vp90-2-03-size-198x208.webm a58a67f4fb357c73ca078aeecbc0f782975630b1 *vp90-2-03-size-198x208.webm.md5 ff5058e7e6a47435046612afc8536f2040989e6f *vp90-2-03-size-198x210.webm 18d3be7935e52217e2e9400b6f2c681a9e45dc89 *vp90-2-03-size-198x210.webm.md5 a0d55263c1ed2c03817454dd4ec4090d36dbc864 *vp90-2-03-size-198x224.webm efa366a299817e2da51c00623b165aab9fbb8d91 *vp90-2-03-size-198x224.webm.md5 ccd142fa2920fc85bb753f049160c1c353ad1574 *vp90-2-03-size-198x226.webm 534524a0b2dbff852e0b92ef09939db072f83243 *vp90-2-03-size-198x226.webm.md5 0d483b94ed40abc8ab6e49f960432ee54ad9c7f1 *vp90-2-03-size-200x196.webm 41795f548181717906e7a504ba551f06c32102ae *vp90-2-03-size-200x196.webm.md5 f6c2dc54e0989d50f01333fe40c91661fcbf849a *vp90-2-03-size-200x198.webm 43df5d8c46a40089441392e6d096c588c1079a68 *vp90-2-03-size-200x198.webm.md5 2f6e9df82e44fc145f0d9212dcccbed3de605e23 *vp90-2-03-size-200x200.webm 757b2ef96b82093255725bab9690bbafe27f3caf *vp90-2-03-size-200x200.webm.md5 40c5ea60415642a4a2e75c0d127b06309baadfab *vp90-2-03-size-200x202.webm 3022c4a1c625b5dc04fdb1052d17d45b4171cfba *vp90-2-03-size-200x202.webm.md5 6942ed5b27476bb8506d10e600d6ff60887780ca *vp90-2-03-size-200x208.webm c4ab8c66f3cf2dc8e8dd7abae9ac21f4d32cd6be *vp90-2-03-size-200x208.webm.md5 71dbc99b83c49d1da45589b91eabb98e2f4a7b1e *vp90-2-03-size-200x210.webm 3f0b40da7eef7974b9bc326562f251feb67d9c7c *vp90-2-03-size-200x210.webm.md5 6b6b8489081cfefb377cc5f18eb754ec2383f655 *vp90-2-03-size-200x224.webm a259df2ac0e294492e3f9d4315baa34cab044f04 *vp90-2-03-size-200x224.webm.md5 c9adc1c9bb07559349a0b054df4af56f7a6edbb9 *vp90-2-03-size-200x226.webm 714cec61e3575581e4f1a0e3921f4dfdbbd316c5 *vp90-2-03-size-200x226.webm.md5 f9bdc936bdf53f8be9ce78fecd41a21d31ff3943 *vp90-2-03-size-202x196.webm 5b8e2e50fcea2c43b12fc067b8a9cc117af77bda *vp90-2-03-size-202x196.webm.md5 c7b66ea3da87613deb47ff24a111247d3c384fec *vp90-2-03-size-202x198.webm 517e91204b25586da943556f4adc5951c9be8bee *vp90-2-03-size-202x198.webm.md5 935ef56b01cfdb4265a7e24696645209ccb20970 *vp90-2-03-size-202x200.webm 55b8ec4a2513183144a8e27564596c06c7576fce *vp90-2-03-size-202x200.webm.md5 849acf75e4f1d8d90046704e1103a18c64f30e35 *vp90-2-03-size-202x202.webm c79afc6660df2824e7df314e5bfd71f0d8acf76b *vp90-2-03-size-202x202.webm.md5 17b3a4d55576b770626ccb856b9f1a6c8f6ae476 *vp90-2-03-size-202x208.webm 0b887ff30409c58f2ccdc3bfacd6be7c69f8997a *vp90-2-03-size-202x208.webm.md5 032d0ade4230fb2eef6d19915a7a1c9aa4a52617 *vp90-2-03-size-202x210.webm f78f8e79533c0c88dd2bfdcec9b1c07848568ece *vp90-2-03-size-202x210.webm.md5 915a38c31fe425d5b93c837121cfa8082f5ea5bc *vp90-2-03-size-202x224.webm bf52a104074d0c5942aa7a5b31e11db47e43d48e *vp90-2-03-size-202x224.webm.md5 be5cfde35666fa435e47d544d9258215beb1cf29 *vp90-2-03-size-202x226.webm 2fa2f87502fda756b319389c8975204e130a2e3f *vp90-2-03-size-202x226.webm.md5 15d908e97862b5b4bf295610df011fb9aa09909b *vp90-2-03-size-208x196.webm 50c60792305d6a99be376dd596a6ff979325e6cc *vp90-2-03-size-208x196.webm.md5 a367c7bc9fde56d6f4848cc573c7d4c1ce75e348 *vp90-2-03-size-208x198.webm be85fb2c8d435a75484231356f07d06ebddd13cd *vp90-2-03-size-208x198.webm.md5 05fd46deb7288e7253742091f56e54a9a441a187 *vp90-2-03-size-208x200.webm 74f8ec3b3a2fe81767ed1ab36a47bc0062d6223c *vp90-2-03-size-208x200.webm.md5 d8985c4b386513a7385a4b3639bf91e469f1378b *vp90-2-03-size-208x202.webm 0614a1e8d92048852adcf605a51333f5fabc7f03 *vp90-2-03-size-208x202.webm.md5 28b002242238479165ba4fb87ee6b442c64b32e4 *vp90-2-03-size-208x208.webm 37de5aca59bb900228400b0e115d3229edb9dcc0 *vp90-2-03-size-208x208.webm.md5 c545be0050c2fad7c68427dbf86c62a739e94ab3 *vp90-2-03-size-208x210.webm d646eccb3cd578f94b54777e32b88898bef6e17a *vp90-2-03-size-208x210.webm.md5 63a0cfe295b661026dd7b1bebb67acace1db766f *vp90-2-03-size-208x224.webm 85c0361d93bf85a335248fef2767ff43eeef23db *vp90-2-03-size-208x224.webm.md5 f911cc718d66e4fe8a865226088939c9eb1b7825 *vp90-2-03-size-208x226.webm a6d583a57876e7b7ec48625b2b2cdbcf70cab837 *vp90-2-03-size-208x226.webm.md5 5bbb0f36da9a4683cf04e724124d8696332911bf *vp90-2-03-size-210x196.webm a3580fc7816d7fbcfb54fdba501cabbd06ba2f1d *vp90-2-03-size-210x196.webm.md5 8db64d6f9ce36dd382013b42ae4e292deba697bc *vp90-2-03-size-210x198.webm eda20f8268c7f4147bead4059e9c4897e09140a9 *vp90-2-03-size-210x198.webm.md5 ce391505eeaf1d12406563101cd6b2dbbbb44bfc *vp90-2-03-size-210x200.webm 79d73b7f623082d2a00aa33e95c79d11c7d9c3a8 *vp90-2-03-size-210x200.webm.md5 852db6fdc206e72391fc69b807f1954934679949 *vp90-2-03-size-210x202.webm f69414c5677ed2f2b8b37ae76429e509a92276a5 *vp90-2-03-size-210x202.webm.md5 c424cc3edd2308da7d33f27acb36b54db5bf2595 *vp90-2-03-size-210x208.webm 27b18562faa1b3184256f4eae8114b539b3e9d3e *vp90-2-03-size-210x208.webm.md5 dd029eba719d50a2851592fa8b9b2efe88904930 *vp90-2-03-size-210x210.webm c853a1670465eaa04ca31b3511995f1b6ed4f58f *vp90-2-03-size-210x210.webm.md5 d962e8ae676c54d0c3ea04ec7c04b37ae6a786e3 *vp90-2-03-size-210x224.webm 93b793e79d987065b39ad8e2e71244368435fc25 *vp90-2-03-size-210x224.webm.md5 3d0825fe83bcc125be1f78145ff43ca6d7588784 *vp90-2-03-size-210x226.webm 5230f31a57ca3b5311698a12035d2644533b3ec4 *vp90-2-03-size-210x226.webm.md5 6622f8bd9279e1ce45509a58a31a990052d45e14 *vp90-2-03-size-224x196.webm 65411da07f60113f2be05c807879072b161d561e *vp90-2-03-size-224x196.webm.md5 6744ff2ee2c41eb08c62ff30880833b6d77b585b *vp90-2-03-size-224x198.webm 46ea3641d41acd4bff347b224646c060d5620385 *vp90-2-03-size-224x198.webm.md5 8eb91f3416a1404705f370caecd74b2b458351b1 *vp90-2-03-size-224x200.webm 196aefb854c8b95b9330263d6690b7ee15693ecf *vp90-2-03-size-224x200.webm.md5 256a5a23ef4e6d5ef2871af5afb8cd13d28cec00 *vp90-2-03-size-224x202.webm 840ad8455dcf2be378c14b007e66fa642fc8196d *vp90-2-03-size-224x202.webm.md5 db4606480ab48b96c9a6ff5e639f1f1aea2a12e4 *vp90-2-03-size-224x208.webm 40b9801d5620467499ac70fa6b7c40aaa5e1c331 *vp90-2-03-size-224x208.webm.md5 e37159e687fe1cb24cffddfae059301adbaf4212 *vp90-2-03-size-224x210.webm 1e4acd4b6334ae260c3eed08652d0ba8122073f2 *vp90-2-03-size-224x210.webm.md5 0de1eb4bb6285ae621e4f2b613d2aa4a8c95a130 *vp90-2-03-size-224x224.webm 37db449ad86fb286c2c02d94aa8fe0379c05044a *vp90-2-03-size-224x224.webm.md5 32ebbf903a7d7881bcfe59639f1d472371f3bf27 *vp90-2-03-size-224x226.webm 5cc3ac5dc9f6912491aa2ddac863f8187f34c569 *vp90-2-03-size-224x226.webm.md5 9480ff5c2c32b1870ac760c87514912616e6cf01 *vp90-2-03-size-226x196.webm fe83655c0f1888f0af7b047785f01ba7ca9f1324 *vp90-2-03-size-226x196.webm.md5 09cad4221996315cdddad4e502dbfabf53ca1d6a *vp90-2-03-size-226x198.webm e3ddfdc650acb95adb45abd9b634e1f09ea8ac96 *vp90-2-03-size-226x198.webm.md5 c34f49d55fe39e3f0b607e3cc95e30244225cecb *vp90-2-03-size-226x200.webm abb83edc868a3523ccd4e5523fac2efbe7c3df1f *vp90-2-03-size-226x200.webm.md5 d17bc08eedfc60c4c23d576a6c964a21bf854d1f *vp90-2-03-size-226x202.webm 1d22d2d0f375251c2d5a1acb4714bc35d963865b *vp90-2-03-size-226x202.webm.md5 9bd537c4f92a25596ccd29fedfe181feac948b92 *vp90-2-03-size-226x208.webm 6feb0e7325386275719f3511ada9e248a2ae7df4 *vp90-2-03-size-226x208.webm.md5 4487067f6cedd495b93696b44b37fe0a3e7eda14 *vp90-2-03-size-226x210.webm 49a8fa87945f47208168d541c068e78d878075d5 *vp90-2-03-size-226x210.webm.md5 559fea2f8da42b33c1aa1dbc34d1d6781009847a *vp90-2-03-size-226x224.webm 83c6d8f2969b759e10e5c6542baca1265c874c29 *vp90-2-03-size-226x224.webm.md5 fe0af2ee47b1e5f6a66db369e2d7e9d870b38dce *vp90-2-03-size-226x226.webm 94ad19b8b699cea105e2ff18f0df2afd7242bcf7 *vp90-2-03-size-226x226.webm.md5 52bc1dfd3a97b24d922eb8a31d07527891561f2a *vp90-2-03-size-352x288.webm 3084d6d0a1eec22e85a394422fbc8faae58930a5 *vp90-2-03-size-352x288.webm.md5 b6524e4084d15b5d0caaa3d3d1368db30cbee69c *vp90-2-03-deltaq.webm 65f45ec9a55537aac76104818278e0978f94a678 *vp90-2-03-deltaq.webm.md5 4dbb87494c7f565ffc266c98d17d0d8c7a5c5aba *vp90-2-05-resize.ivf 7f6d8879336239a43dbb6c9f13178cb11cf7ed09 *vp90-2-05-resize.ivf.md5 bf61ddc1f716eba58d4c9837d4e91031d9ce4ffe *vp90-2-06-bilinear.webm f6235f937552e11d8eb331ec55da6b3aa596b9ac *vp90-2-06-bilinear.webm.md5 0c83a1e414fde3bccd6dc451bbaee68e59974c76 *vp90-2-07-frame_parallel.webm e5c2c9fb383e5bf3b563480adaeba5b7e3475ecd *vp90-2-07-frame_parallel.webm.md5 086c7edcffd699ae7d99d710fd7e53b18910ca5b *vp90-2-08-tile_1x2_frame_parallel.webm e981ecaabb29a80e0cbc1f4002384965ce8e95bb *vp90-2-08-tile_1x2_frame_parallel.webm.md5 ed79be026a6f28646c5825da1c12d1fbc70f96a4 *vp90-2-08-tile_1x2.webm 45b404e025841c9750895fc1a9f6bd384fe6a315 *vp90-2-08-tile_1x2.webm.md5 cf8ea970c776797aae71dac8317ea926d9431cab *vp90-2-08-tile_1x4_frame_parallel.webm a481fbea465010b57af5a19ebf6d4a5cfe5b9278 *vp90-2-08-tile_1x4_frame_parallel.webm.md5 0203ec456277a01aec401e7fb6c72c9a7e5e3f9d *vp90-2-08-tile_1x4.webm c9b237dfcc01c1b414fbcaa481d014a906ef7998 *vp90-2-08-tile_1x4.webm.md5 20c75157e91ab41f82f70ffa73d5d01df8469287 *vp90-2-08-tile-4x4.webm ae7451810247fd13975cc257aa0301ff17102255 *vp90-2-08-tile-4x4.webm.md5 2ec6e15422ac7a61af072dc5f27fcaf1942ce116 *vp90-2-08-tile-4x1.webm 0094f5ee5e46345017c30e0aa4835b550212d853 *vp90-2-08-tile-4x1.webm.md5 edea45dac4a3c2e5372339f8851d24c9bef803d6 *vp90-2-09-subpixel-00.ivf 5428efc4bf92191faedf4a727fcd1d94966a7abc *vp90-2-09-subpixel-00.ivf.md5 8cdd435d89029987ee196896e21520e5f879f04d *vp90-2-bbb_1280x720_tile_1x4_1310kbps.webm 091b373aa2ecb59aa5c647affd5bcafcc7547364 *vp90-2-bbb_1920x1080_tile_1x1_2581kbps.webm 87ee28032b0963a44b73a850fcc816a6dc83efbb *vp90-2-bbb_1920x1080_tile_1x4_2586kbps.webm c6ce25c4bfd4bdfc2932b70428e3dfe11210ec4f *vp90-2-bbb_1920x1080_tile_1x4_fpm_2304kbps.webm 2064bdb22aa71c2691e0469fb62e8087a43f08f8 *vp90-2-bbb_426x240_tile_1x1_180kbps.webm 8080eda22694910162f0996e8a962612f381a57f *vp90-2-bbb_640x360_tile_1x2_337kbps.webm a484b335c27ea189c0f0d77babea4a510ce12d50 *vp90-2-bbb_854x480_tile_1x2_651kbps.webm 3eacf1f006250be4cc5c92a7ef146e385ee62653 *vp90-2-sintel_1280x546_tile_1x4_1257kbps.webm 217f089a16447490823127b36ce0d945522accfd *vp90-2-sintel_1920x818_tile_1x4_fpm_2279kbps.webm eedb3c641e60dacbe082491a16df529a5c9187df *vp90-2-sintel_426x182_tile_1x1_171kbps.webm cb7e4955af183dff33bcba0c837f0922ab066400 *vp90-2-sintel_640x272_tile_1x2_318kbps.webm 48613f9380e2580002f8a09d6e412ea4e89a52b9 *vp90-2-sintel_854x364_tile_1x2_621kbps.webm 990a91f24dd284562d21d714ae773dff5452cad8 *vp90-2-tos_1280x534_tile_1x4_1306kbps.webm aa402217577a659cfc670157735b4b8e9aa670fe *vp90-2-tos_1280x534_tile_1x4_fpm_952kbps.webm b6dd558c90bca466b4bcbd03b3371648186465a7 *vp90-2-tos_1920x800_tile_1x4_fpm_2335kbps.webm 1a9c2914ba932a38f0a143efc1ad0e318e78888b *vp90-2-tos_426x178_tile_1x1_181kbps.webm a3d2b09f24debad4747a1b3066f572be4273bced *vp90-2-tos_640x266_tile_1x2_336kbps.webm c64b03b5c090e6888cb39685c31f00a6b79fa45c *vp90-2-tos_854x356_tile_1x2_656kbps.webm 94b533dbcf94292001e27cc51fec87f9e8c90c0b *vp90-2-tos_854x356_tile_1x2_fpm_546kbps.webm 0e7cd4135b231c9cea8d76c19f9e84b6fd77acec *vp90-2-08-tile_1x8_frame_parallel.webm c9b6850af28579b031791066457f4cb40df6e1c7 *vp90-2-08-tile_1x8_frame_parallel.webm.md5 e448b6e83490bca0f8d58b4f4b1126a17baf4b0c *vp90-2-08-tile_1x8.webm 5e524165f0397e6141d914f4f0a66267d7658376 *vp90-2-08-tile_1x8.webm.md5 a34e14923d6d17b1144254d8187d7f85b700a63c *vp90-2-02-size-lf-1920x1080.webm e3b28ddcfaeb37fb4d132b93f92642a9ad17c22d *vp90-2-02-size-lf-1920x1080.webm.md5 d48c5db1b0f8e60521a7c749696b8067886033a3 *vp90-2-09-aq2.webm 84c1599298aac78f2fc05ae2274575d10569dfa0 *vp90-2-09-aq2.webm.md5 55fc55ed73d578ed60fad05692579873f8bad758 *vp90-2-09-lf_deltas.webm 54638c38009198c38c8f3b25c182b709b6c1fd2e *vp90-2-09-lf_deltas.webm.md5 510d95f3beb3b51c572611fdaeeece12277dac30 *vp90-2-10-show-existing-frame.webm 14d631096f4bfa2d71f7f739aec1448fb3c33bad *vp90-2-10-show-existing-frame.webm.md5 d2feea7728e8d2c615981d0f47427a4a5a45d881 *vp90-2-10-show-existing-frame2.webm 5f7c7811baa3e4f03be1dd78c33971b727846821 *vp90-2-10-show-existing-frame2.webm.md5 b4318e75f73a6a08992c7326de2fb589c2a794c7 *vp90-2-11-size-351x287.webm b3c48382cf7d0454e83a02497c229d27720f9e20 *vp90-2-11-size-351x287.webm.md5 8e0096475ea2535bac71d3e2fc09e0c451c444df *vp90-2-11-size-351x288.webm 19e003804ec1dfc5464813b32339a15d5ba7b42f *vp90-2-11-size-351x288.webm.md5 40cd1d6a188d7a88b21ebac1e573d3f270ab261e *vp90-2-11-size-352x287.webm 68f515abe3858fc1eded46c8e6b2f727d43b5331 *vp90-2-11-size-352x287.webm.md5 9a510769ff23db410880ec3029d433e87d17f7fc *vp90-2-12-droppable_1.ivf 952eaac6eefa6f62179ed1db3e922fd42fecc624 *vp90-2-12-droppable_1.ivf.md5 9a510769ff23db410880ec3029d433e87d17f7fc *vp90-2-12-droppable_2.ivf 92a756469fa438220524e7fa6ac1d38c89514d17 *vp90-2-12-droppable_2.ivf.md5 c21e97e4ba486520118d78b01a5cb6e6dc33e190 *vp90-2-12-droppable_3.ivf 601abc9e4176c70f82ac0381365e9b151fdd24cd *vp90-2-12-droppable_3.ivf.md5 61c640dad23cd4f7ad811b867e7b7e3521f4e3ba *vp90-2-13-largescaling.webm bca1b02eebdb088fa3f389fe0e7571e75a71f523 *vp90-2-13-largescaling.webm.md5 c740708fa390806eebaf669909c1285ab464f886 *vp90-2-14-resize-fp-tiles-1-2.webm c7b85ffd8e11500f73f52e7dc5a47f57c393d47f *vp90-2-14-resize-fp-tiles-1-2.webm.md5 ec8faa352a08f7033c60f29f80d505e2d7daa103 *vp90-2-14-resize-fp-tiles-1-4.webm 6852c783fb421bda5ded3d4c5a3ffc46de03fbc1 *vp90-2-14-resize-fp-tiles-1-4.webm.md5 8af61853ac0d07c4cb5bf7c2016661ba350b3497 *vp90-2-14-resize-fp-tiles-1-8.webm 571353bac89fea60b5706073409aa3c0d42aefe9 *vp90-2-14-resize-fp-tiles-1-8.webm.md5 b1c187ed69931496b82ec194017a79831bafceef *vp90-2-14-resize-fp-tiles-1-16.webm 1c199a41afe42ce303944d70089eaaa2263b4a09 *vp90-2-14-resize-fp-tiles-1-16.webm.md5 8eaae5a6f2dff934610b0c7a917d7f583ba74aa5 *vp90-2-14-resize-fp-tiles-2-1.webm db18fcf915f7ffaea6c39feab8bda6c1688af011 *vp90-2-14-resize-fp-tiles-2-1.webm.md5 bc3046d138941e2a20e9ceec0ff6d25c25d12af3 *vp90-2-14-resize-fp-tiles-4-1.webm 393211b808030d09a79927b17a4374b2f68a60ae *vp90-2-14-resize-fp-tiles-4-1.webm.md5 6e8f8e31721a0f7f68a2964e36e0e698c2e276b1 *vp90-2-14-resize-fp-tiles-8-1.webm 491fd3cd78fb0577bfe905bb64bbf64bd7d29140 *vp90-2-14-resize-fp-tiles-8-1.webm.md5 cc5958da2a7edf739cd2cfeb18bd05e77903087e *vp90-2-14-resize-fp-tiles-16-1.webm 0b58daf55aaf9063bf5b4fb33393d18b417dc428 *vp90-2-14-resize-fp-tiles-16-1.webm.md5 821eeecc9d8c6a316134dd42d1ff057787d8047b *vp90-2-14-resize-fp-tiles-2-4.webm 374c549f2839a3d0b732c4e3650700144037e76c *vp90-2-14-resize-fp-tiles-2-4.webm.md5 dff8c8e49aacea9f4c7f22cb882da984e2a1b405 *vp90-2-14-resize-fp-tiles-2-8.webm e5b8820a7c823b21297d6e889e57ec401882c210 *vp90-2-14-resize-fp-tiles-2-8.webm.md5 77629e4b23e32896aadf6e994c78bd4ffa1c7797 *vp90-2-14-resize-fp-tiles-2-16.webm 1937f5df032664ac345d4613ad4417b4967b1230 *vp90-2-14-resize-fp-tiles-2-16.webm.md5 380ba5702bb1ec7947697314ab0300b5c56a1665 *vp90-2-14-resize-fp-tiles-4-2.webm fde7b30d2aa64c1e851a4852f655d79fc542cf66 *vp90-2-14-resize-fp-tiles-4-2.webm.md5 dc784b258ffa2abc2ae693d11792acf0bb9cb74f *vp90-2-14-resize-fp-tiles-8-2.webm edf26f0130aeee8342d49c2c8f0793ad008782d9 *vp90-2-14-resize-fp-tiles-8-2.webm.md5 8e575789fd63ebf69e8eff1b9a4351a249a73bee *vp90-2-14-resize-fp-tiles-16-2.webm b6415318c1c589a1f64b9d569ce3cabbec2e0d52 *vp90-2-14-resize-fp-tiles-16-2.webm.md5 e3adc944a11c4c5517e63664c84ebb0847b64d81 *vp90-2-14-resize-fp-tiles-4-8.webm 03cba0532bc90a05b1990db830bf5701e24e7982 *vp90-2-14-resize-fp-tiles-4-8.webm.md5 3b27a991eb6d78dce38efab35b7db682e8cbbee3 *vp90-2-14-resize-fp-tiles-4-16.webm 5d16b7f82bf59f802724ddfd97abb487150b1c9d *vp90-2-14-resize-fp-tiles-4-16.webm.md5 d5fed8c28c1d4c7e232ebbd25cf758757313ed96 *vp90-2-14-resize-fp-tiles-8-4.webm 5a8ff8a52cbbde7bfab569beb6d971c5f8b904f7 *vp90-2-14-resize-fp-tiles-8-4.webm.md5 17a5faa023d77ee9dad423a4e0d3145796bbc500 *vp90-2-14-resize-fp-tiles-16-4.webm 2ef8daa3c3e750fd745130d0a76a39fe86f0448f *vp90-2-14-resize-fp-tiles-16-4.webm.md5 9361e031f5cc990d8740863e310abb5167ae351e *vp90-2-14-resize-fp-tiles-8-16.webm 57f13a2197486584f4e1a4f82ad969f3abc5a1a2 *vp90-2-14-resize-fp-tiles-8-16.webm.md5 5803fc6fcbfb47b7661f3fcc6499158a32b56675 *vp90-2-14-resize-fp-tiles-16-8.webm be0fe64a1a4933696ff92d93f9bdecdbd886dc13 *vp90-2-14-resize-fp-tiles-16-8.webm.md5 0ac0f6d20a0afed77f742a3b9acb59fd7b9cb093 *vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm 1765315acccfe6cd12230e731369fcb15325ebfa *vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm.md5 4a2b7a683576fe8e330c7d1c4f098ff4e70a43a8 *vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm 1ef480392112b3509cb190afbb96f9a38dd9fbac *vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm.md5 e615575ded499ea1d992f3b38e3baa434509cdcd *vp90-2-15-segkey.webm e3ab35d4316c5e81325c50f5236ceca4bc0d35df *vp90-2-15-segkey.webm.md5 9b7ca2cac09d34c4a5d296c1900f93b1e2f69d0d *vp90-2-15-segkey_adpq.webm 8f46ba5f785d0c2170591a153e0d0d146a7c8090 *vp90-2-15-segkey_adpq.webm.md5 698a6910a97486b833073ef0c0b18d75dce57ee8 *vp90-2-16-intra-only.webm 5661b0168752969f055eec37b05fa9fa947dc7eb *vp90-2-16-intra-only.webm.md5 c01bb7938f9a9f25e0c37afdec2f2fb73b6cc7fa *vp90-2-17-show-existing-frame.webm cc75f351818b9a619818f5cc77b9bc013d0c1e11 *vp90-2-17-show-existing-frame.webm.md5 013708bd043f0821a3e56fb8404d82e7a0c7af6c *vp91-2-04-yuv422.webm 1e58a7d23adad830a672f1733c9d2ae17890d59c *vp91-2-04-yuv422.webm.md5 25d78f28948789d159a9453ebc13048b818251b1 *vp91-2-04-yuv440.webm 81b3870b27a7f695ef6a43e87ab04bbdb5aee2f5 *vp91-2-04-yuv440.webm.md5 0321d507ce62dedc8a51b4e9011f7a19aed9c3dc *vp91-2-04-yuv444.webm 367e423dd41fdb49aa028574a2cfec5c2f325c5c *vp91-2-04-yuv444.webm.md5 f77673b566f686853adefe0c578ad251b7241281 *vp92-2-20-10bit-yuv420.webm abdedfaddacbbe1a15ac7a54e86360f03629fb7a *vp92-2-20-10bit-yuv420.webm.md5 0c2c355a1b17b28537c5a3b19997c8783b69f1af *vp92-2-20-12bit-yuv420.webm afb2c2798703e039189b0a15c8ac5685aa51d33f *vp92-2-20-12bit-yuv420.webm.md5 0d661bc6e83da33238981481efd1b1802d323d88 *vp93-2-20-10bit-yuv422.webm 10318907063db22eb02fad332556edbbecd443cc *vp93-2-20-10bit-yuv422.webm.md5 ebc6be2f7511a0bdeac0b18c67f84ba7168839c7 *vp93-2-20-12bit-yuv422.webm 235232267c6a1dc8a11e45d600f1c99d2f8b42d4 *vp93-2-20-12bit-yuv422.webm.md5 f76b11b26d4beaceac7a7e7729dd5054d095164f *vp93-2-20-10bit-yuv440.webm 757b33b5ac969c5999999488a731a3d1e6d9fb88 *vp93-2-20-10bit-yuv440.webm.md5 df8807dbd29bec795c2db9c3c18e511fbb988101 *vp93-2-20-12bit-yuv440.webm ea4100930c3f59a1c23fbb33ab0ea01151cae159 *vp93-2-20-12bit-yuv440.webm.md5 189c1b5f404ff41a50a7fc96341085ad541314a9 *vp93-2-20-10bit-yuv444.webm 2dd0177c2f9d970b6e698892634c653630f91f40 *vp93-2-20-10bit-yuv444.webm.md5 bd44cf6e1c27343e3639df9ac21346aedd5d6973 *vp93-2-20-12bit-yuv444.webm f36e5bdf5ec3213f32c0ddc82f95d82c5133bf27 *vp93-2-20-12bit-yuv444.webm.md5 eb438c6540eb429f74404eedfa3228d409c57874 *desktop_640_360_30.yuv 89e70ebd22c27d275fe14dc2f1a41841a6d8b9ab *kirland_640_480_30.yuv 33c533192759e5bb4f07abfbac389dc259db4686 *macmarcomoving_640_480_30.yuv 8bfaab121080821b8f03b23467911e59ec59b8fe *macmarcostationary_640_480_30.yuv 70894878d916a599842d9ad0dcd24e10c13e5467 *niklas_640_480_30.yuv 8784b6df2d8cc946195a90ac00540500d2e522e4 *tacomanarrows_640_480_30.yuv edd86a1f5e62fd9da9a9d46078247759c2638009 *tacomasmallcameramovement_640_480_30.yuv 9a70e8b7d14fba9234d0e51dce876635413ce444 *thaloundeskmtg_640_480_30.yuv e7d315dbf4f3928779e0dc624311196d44491d32 *niklas_1280_720_30.yuv c77e4a26616add298a05dd5d12397be22c0e40c5 *vp90-2-18-resize.ivf c12918cf0a716417fba2de35c3fc5ab90e52dfce *vp90-2-18-resize.ivf.md5 717da707afcaa1f692ff1946f291054eb75a4f06 *screendata.y4m b7c1296630cdf1a7ef493d15ff4f9eb2999202f6 *invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf 0a3884edb3fd8f9d9b500223e650f7de257b67d8 *invalid-vp90-2-08-tile_1x2_frame_parallel.webm.ivf.s47039_r01-05_b6-.ivf.res 359e138dfb66863828397b77000ea7a83c844d02 *invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf bbd33de01c17b165b4ce00308e8a19a942023ab8 *invalid-vp90-2-08-tile_1x8_frame_parallel.webm.ivf.s288_r01-05_b6-.ivf.res fac89b5735be8a86b0dc05159f996a5c3208ae32 *invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf 0a3884edb3fd8f9d9b500223e650f7de257b67d8 *invalid-vp90-2-09-aq2.webm.ivf.s3984_r01-05_b6-.v2.ivf.res 4506dfdcdf8ee4250924b075a0dcf1f070f72e5a *invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf bcdedaf168ac225575468fda77502d2dc9fd5baa *invalid-vp90-2-09-subpixel-00.ivf.s19552_r01-05_b6-.v2.ivf.res 65e93f9653bcf65b022f7d225268d1a90a76e7bb *vp90-2-19-skip.webm 368dccdde5288c13c25695d2eacdc7402cadf613 *vp90-2-19-skip.webm.md5 ffe460282df2b0e7d4603c2158653ad96f574b02 *vp90-2-19-skip-01.webm bd21bc9eda4a4a36b221d71ede3a139fc3c7bd85 *vp90-2-19-skip-01.webm.md5 178f5bd239e38cc1cc2657a7a5e1a9f52ad2d3fe *vp90-2-19-skip-02.webm 9020d5e260bd7df08e2b3d4b86f8623cee3daea2 *vp90-2-19-skip-02.webm.md5 b03c408cf23158638da18dbc3323b99a1635c68a *invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf 0a3884edb3fd8f9d9b500223e650f7de257b67d8 *invalid-vp90-2-12-droppable_1.ivf.s3676_r01-05_b6-.ivf.res 5e67e24e7f53fd189e565513cef8519b1bd6c712 *invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf 741158f67c0d9d23726624d06bdc482ad368afc9 *invalid-vp90-2-05-resize.ivf.s59293_r01-05_b6-.ivf.res 8b1f7bf7e86c0976d277f60e8fcd9539e75a079a *invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf 9c6bdf048fb2e66f07d4b4db5b32e6f303bd6109 *invalid-vp90-2-09-subpixel-00.ivf.s20492_r01-05_b6-.v2.ivf.res 552e372e9b78127389fb06b34545df2cec15ba6d *invalid-vp91-2-mixedrefcsp-444to420.ivf a61774cf03fc584bd9f0904fc145253bb8ea6c4c *invalid-vp91-2-mixedrefcsp-444to420.ivf.res 812d05a64a0d83c1b504d0519927ddc5a2cdb273 *invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf 1e472baaf5f6113459f0399a38a5a5e68d17799d *invalid-vp90-2-12-droppable_1.ivf.s73804_r01-05_b6-.ivf.res f97088c7359fc8d3d5aa5eafe57bc7308b3ee124 *vp90-2-20-big_superframe-01.webm 47d7d409785afa33b123376de0c907336e6c7bd7 *vp90-2-20-big_superframe-01.webm.md5 65ade6d2786209582c50d34cfe22b3cdb033abaf *vp90-2-20-big_superframe-02.webm 7c0ed8d04c4d06c5411dd2e5de2411d37f092db5 *vp90-2-20-big_superframe-02.webm.md5 667ec8718c982aef6be07eb94f083c2efb9d2d16 *vp90-2-07-frame_parallel-1.webm bfc82bf848e9c05020d61e3ffc1e62f25df81d19 *vp90-2-07-frame_parallel-1.webm.md5 efd5a51d175cfdacd169ed23477729dc558030dc *invalid-vp90-2-07-frame_parallel-1.webm 9f912712ec418be69adb910e2ca886a63c4cec08 *invalid-vp90-2-07-frame_parallel-2.webm 445f5a53ca9555341852997ccdd480a51540bd14 *invalid-vp90-2-07-frame_parallel-3.webm d18c90709a0d03c82beadf10898b27d88fff719c *invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf d06285d109ecbaef63b0cbcc44d70a129186f51c *invalid-vp90-2-03-size-224x196.webm.ivf.s44156_r01-05_b6-.ivf.res e60d859b0ef2b331b21740cf6cb83fabe469b079 *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf 0ae808dca4d3c1152a9576e14830b6faa39f1b4a *invalid-vp90-2-03-size-202x210.webm.ivf.s113306_r01-05_b6-.ivf.res 9cfc855459e7549fd015c79e8eca512b2f2cb7e3 *niklas_1280_720_30.y4m 5b5763b388b1b52a81bb82b39f7ec25c4bd3d0e1 *desktop_credits.y4m 85771f6ab44e4a0226e206c0cde8351dd5918953 *vp90-2-02-size-130x132.webm 512dad5eabbed37b4bbbc64ce153f1a5484427b8 *vp90-2-02-size-130x132.webm.md5 01f7127d40360289db63b27f61cb9afcda350e95 *vp90-2-02-size-132x130.webm 4a94275328ae076cf60f966c097a8721010fbf5a *vp90-2-02-size-132x130.webm.md5 f41c0400b5716b4b70552c40dd03d44be131e1cc *vp90-2-02-size-132x132.webm 1a69e989f697e424bfe3e3e8a77bb0c0992c8e47 *vp90-2-02-size-132x132.webm.md5 94a5cbfacacba100e0c5f7861c72a1b417feca0f *vp90-2-02-size-178x180.webm dedfecf1d784bcf70629592fa5e6f01d5441ccc9 *vp90-2-02-size-178x180.webm.md5 4828b62478c04014bba3095a83106911a71cf387 *vp90-2-02-size-180x178.webm 423da2b861050c969d78ed8e8f8f14045d1d8199 *vp90-2-02-size-180x178.webm.md5 338f7c9282f43e29940f5391118aadd17e4f9234 *vp90-2-02-size-180x180.webm 6c2ef013392310778dca5dd5351160eca66b0a60 *vp90-2-02-size-180x180.webm.md5 679fa7d6807e936ff937d7b282e7dbd8ac76447e *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm fc7267ab8fc2bf5d6c234e34ee6c078a967b4888 *vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm.md5 9d33a137c819792209c5ce4e4e1ee5da73d574fe *vp90-2-14-resize-10frames-fp-tiles-1-2.webm 0c78a154956a8605d050bdd75e0dcc4d39c040a6 *vp90-2-14-resize-10frames-fp-tiles-1-2.webm.md5 d6a8d8c57f66a91d23e8e7df480f9ae841e56c37 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm e9b4e8c7b33b5fda745d340c3f47e6623ae40cf2 *vp90-2-14-resize-10frames-fp-tiles-1-4.webm.md5 aa6fe043a0c4a42b49c87ebbe812d4afd9945bec *vp90-2-14-resize-10frames-fp-tiles-1-8.webm 028520578994c2d013d4c0129033d4f2ff31bbe0 *vp90-2-14-resize-10frames-fp-tiles-1-8.webm.md5 d1d5463c9ea7b5cc5f609ddedccddf656f348d1a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm 92d5872f5bdffbed721703b7e959b4f885e3d77a *vp90-2-14-resize-10frames-fp-tiles-2-1.webm.md5 677cb29de1215d97346015af5807a9b1faad54cf *vp90-2-14-resize-10frames-fp-tiles-2-4.webm a5db19f977094ec3fd60b4f7671b3e6740225e12 *vp90-2-14-resize-10frames-fp-tiles-2-4.webm.md5 cdd3c52ba21067efdbb2de917fe2a965bf27332e *vp90-2-14-resize-10frames-fp-tiles-2-8.webm db17ec5d894ea8b8d0b7f32206d0dd3d46dcfa6d *vp90-2-14-resize-10frames-fp-tiles-2-8.webm.md5 0f6093c472125d05b764d7d1965c1d56771c0ea2 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm bc7c79e1bee07926dd970462ce6f64fc30eec3e1 *vp90-2-14-resize-10frames-fp-tiles-4-1.webm.md5 c5142e2bff4091338196c8ea8bc9266e64f548bc *vp90-2-14-resize-10frames-fp-tiles-4-2.webm 22aa3dd430b69fd3d92f6561bac86deeed90486d *vp90-2-14-resize-10frames-fp-tiles-4-2.webm.md5 ede8b1466d2f26e1b1bd9602addb9cd1017e1d8c *vp90-2-14-resize-10frames-fp-tiles-4-8.webm 508d5ebb9c0eac2a4100281a3ee052ec2fc19217 *vp90-2-14-resize-10frames-fp-tiles-4-8.webm.md5 2b292e3392854cd1d76ae597a6f53656cf741cfa *vp90-2-14-resize-10frames-fp-tiles-8-1.webm 1c24e54fa19e94e1722f24676404444e941c3d31 *vp90-2-14-resize-10frames-fp-tiles-8-1.webm.md5 61beda21064e09634564caa6697ab90bd53c9af7 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm 9c0657b4d9e1d0e4c9d28a90e5a8630a65519124 *vp90-2-14-resize-10frames-fp-tiles-8-2.webm.md5 1758c50a11a7c92522749b4a251664705f1f0d4b *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm 4f454a06750614314ae15a44087b79016fe2db97 *vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm.md5 3920c95ba94f1f048a731d9d9b416043b44aa4bd *vp90-2-14-resize-10frames-fp-tiles-8-4.webm 4eb347a0456d2c49a1e1d8de5aa1c51acc39887e *vp90-2-14-resize-10frames-fp-tiles-8-4.webm.md5 4b95a74c032a473b6683d7ad5754db1b0ec378e9 *vp90-2-21-resize_inter_1280x720_5_1-2.webm a7826dd386bedfe69d02736969bfb47fb6a40a5e *vp90-2-21-resize_inter_1280x720_5_1-2.webm.md5 5cfff79e82c4d69964ccb8e75b4f0c53b9295167 *vp90-2-21-resize_inter_1280x720_5_3-4.webm a18f57db4a25e1f543a99f2ceb182e00db0ee22f *vp90-2-21-resize_inter_1280x720_5_3-4.webm.md5 d26db0811bf30eb4131d928669713e2485f8e833 *vp90-2-21-resize_inter_1280x720_7_1-2.webm fd6f9f332cd5bea4c0f0d57be4297bea493cc5a1 *vp90-2-21-resize_inter_1280x720_7_1-2.webm.md5 5c7d73d4d268e2ba9593b31cb091fd339505c7fd *vp90-2-21-resize_inter_1280x720_7_3-4.webm 7bbb949cabc1e70dadcc74582739f63b833034e0 *vp90-2-21-resize_inter_1280x720_7_3-4.webm.md5 f2d2a41a60eb894aff0c5854afca15931f1445a8 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm 66d7789992613ac9d678ff905ff1059daa1b89e4 *vp90-2-21-resize_inter_1920x1080_5_1-2.webm.md5 764edb75fe7dd64e73a1b4f3b4b2b1bf237a4dea *vp90-2-21-resize_inter_1920x1080_5_3-4.webm f78bea1075983fd990e7f25d4f31438f9b5efa34 *vp90-2-21-resize_inter_1920x1080_5_3-4.webm.md5 96496f2ade764a5de9f0c27917c7df1f120fb2ef *vp90-2-21-resize_inter_1920x1080_7_1-2.webm 2632b635135ed5ecd67fd22dec7990d29c4f4cb5 *vp90-2-21-resize_inter_1920x1080_7_1-2.webm.md5 74889ea42001bf41428cb742ca74e65129c886dc *vp90-2-21-resize_inter_1920x1080_7_3-4.webm d2cf3b25956415bb579d368e7098097e482dd73a *vp90-2-21-resize_inter_1920x1080_7_3-4.webm.md5 4658986a8ce36ebfcc80a1903e446eaab3985336 *vp90-2-21-resize_inter_320x180_5_1-2.webm 8a3d8cf325109ffa913cc9426c32eea8c202a09a *vp90-2-21-resize_inter_320x180_5_1-2.webm.md5 16303aa45176520ee42c2c425247aadc1506b881 *vp90-2-21-resize_inter_320x180_5_3-4.webm 41cab1ddf7715b680a4dbce42faa9bcd72af4e5c *vp90-2-21-resize_inter_320x180_5_3-4.webm.md5 56648adcee66dd0e5cb6ac947f5ee1b9cc8ba129 *vp90-2-21-resize_inter_320x180_7_1-2.webm 70047377787003cc03dda7b2394e6d7eaa666d9e *vp90-2-21-resize_inter_320x180_7_1-2.webm.md5 d2ff99165488499cc55f75929f1ce5ca9c9e359b *vp90-2-21-resize_inter_320x180_7_3-4.webm e69019e378114a4643db283b66d1a7e304761a56 *vp90-2-21-resize_inter_320x180_7_3-4.webm.md5 4834d129bed0f4289d3a88f2ae3a1736f77621b0 *vp90-2-21-resize_inter_320x240_5_1-2.webm a75653c53d22b623c1927fc0088da21dafef21f4 *vp90-2-21-resize_inter_320x240_5_1-2.webm.md5 19818e1b7fd1c1e63d8873c31b0babe29dd33ba6 *vp90-2-21-resize_inter_320x240_5_3-4.webm 8d89814ff469a186312111651b16601dfbce4336 *vp90-2-21-resize_inter_320x240_5_3-4.webm.md5 ac8057bae52498f324ce92a074d5f8207cc4a4a7 *vp90-2-21-resize_inter_320x240_7_1-2.webm 2643440898c83c08cc47bc744245af696b877c24 *vp90-2-21-resize_inter_320x240_7_1-2.webm.md5 cf4a4cd38ac8b18c42d8c25a3daafdb39132256b *vp90-2-21-resize_inter_320x240_7_3-4.webm 70ba8ec9120b26e9b0ffa2c79b432f16cbcb50ec *vp90-2-21-resize_inter_320x240_7_3-4.webm.md5 669f10409fe1c4a054010162ca47773ea1fdbead *vp90-2-21-resize_inter_640x360_5_1-2.webm 6355a04249004a35fb386dd1024214234f044383 *vp90-2-21-resize_inter_640x360_5_1-2.webm.md5 c23763b950b8247c1775d1f8158d93716197676c *vp90-2-21-resize_inter_640x360_5_3-4.webm 59e6fc381e3ec3b7bdaac586334e0bc944d18fb6 *vp90-2-21-resize_inter_640x360_5_3-4.webm.md5 71b45cbfdd068baa1f679a69e5e6f421d256a85f *vp90-2-21-resize_inter_640x360_7_1-2.webm 1416fc761b690c54a955c4cf017fa078520e8c18 *vp90-2-21-resize_inter_640x360_7_1-2.webm.md5 6c409903279448a697e4db63bab1061784bcd8d2 *vp90-2-21-resize_inter_640x360_7_3-4.webm 60de1299793433a630b71130cf76c9f5965758e2 *vp90-2-21-resize_inter_640x360_7_3-4.webm.md5 852b597b8af096d90c80bf0ed6ed3b336b851f19 *vp90-2-21-resize_inter_640x480_5_1-2.webm f6856f19236ee46ed462bd0a2e7e72b9c3b9cea6 *vp90-2-21-resize_inter_640x480_5_1-2.webm.md5 792a16c6f60043bd8dceb515f0b95b8891647858 *vp90-2-21-resize_inter_640x480_5_3-4.webm 68ffe59877e9a7863805e1c0a3ce18ce037d7c9d *vp90-2-21-resize_inter_640x480_5_3-4.webm.md5 61e044c4759972a35ea3db8c1478a988910a4ef4 *vp90-2-21-resize_inter_640x480_7_1-2.webm 7739bfca167b1b43fea72f807f01e097b7cb98d8 *vp90-2-21-resize_inter_640x480_7_1-2.webm.md5 7291af354b4418917eee00e3a7e366086a0b7a10 *vp90-2-21-resize_inter_640x480_7_3-4.webm 4a18b09ccb36564193f0215f599d745d95bb558c *vp90-2-21-resize_inter_640x480_7_3-4.webm.md5 a000d568431d07379dd5a8ec066061c07e560b47 *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf 1e75aad3433c5c21c194a7b53fc393970f0a8d7f *invalid-vp90-2-00-quantizer-63.ivf.kf_65527x61446.ivf.res 235182f9a1c5c8841552510dd4288487447bfc40 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf 787f04f0483320d536894282f3358a4f8cac1cf9 *invalid-vp80-00-comprehensive-018.ivf.2kf_0x6.ivf.res 91d3cefd0deb98f3b0caf3a2d900ec7a7605e53a *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf 1e472baaf5f6113459f0399a38a5a5e68d17799d *invalid-vp90-2-10-show-existing-frame.webm.ivf.s180315_r01-05_b6-.ivf.res 70057835bf29d14e66699ce5f022df2551fb6b37 *invalid-crbug-629481.webm 5d9474c0309b7ca09a182d888f73b37a8fe1362c *invalid-crbug-629481.webm.res 7602e00378161ca36ae93cc6ee12dd30b5ba1e1d *vp90-2-22-svc_1280x720_3.ivf 02e53e3eefbf25ec0929047fe50876acdeb040bd *vp90-2-22-svc_1280x720_3.ivf.md5 6fa3d3ac306a3d9ce1d610b78441dc00d2c2d4b9 *tos_vp8.webm e402cbbf9e550ae017a1e9f1f73931c1d18474e8 *invalid-crbug-667044.webm d3964f9dad9f60363c81b688324d95b4ec7c8038 *invalid-crbug-667044.webm.res fd9df7f3f6992af1d7a9dde975c9a0d6f28c053d *invalid-bug-1443.ivf fd3020fa6e9ca5966206738654c97dec313b0a95 *invalid-bug-1443.ivf.res 1a0e405606939f2febab1a21b30c37cb8f2c8cb1 *invalid-token-partition.ivf 90a8a95e7024f015b87f5483a65036609b3d1b74 *invalid-token-partition.ivf.res 17696cd21e875f1d6e5d418cbf89feab02c8850a *vp90-2-22-svc_1280x720_1.webm e2f9e1e47a791b4e939a9bdc50bf7a25b3761f77 *vp90-2-22-svc_1280x720_1.webm.md5 a0fbbbc5dd50fd452096f4455a58c1a8c9f66697 *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf a61774cf03fc584bd9f0904fc145253bb8ea6c4c *invalid-vp80-00-comprehensive-s17661_r01-05_b6-.ivf.res 894fae3afee0290546590823974203ab4b8abd95 *crbug-1539.rawfile f1026c03efd5da21b381c8eb21f0d64e6d7e4ba3 *invalid-crbug-1558.ivf eb198c25f861c3fe2cbd310de11eb96843019345 *invalid-crbug-1558.ivf.res c62b005a9fd32c36a1b3f67de6840330f9915e34 *invalid-crbug-1562.ivf f0cd8389948ad16085714d96567612136f6a46c5 *invalid-crbug-1562.ivf.res bac455906360b45338a16dd626ac5f19bc36a307 *desktop_office1.1280_720-020.yuv 094be4b80fa30bd227149ea16ab6476d549ea092 *slides_code_term_web_plot.1920_1080.yuv libvpx-1.8.2/test/test.mk000066400000000000000000000212261357355204000153240ustar00rootroot00000000000000LIBVPX_TEST_SRCS-yes += acm_random.h LIBVPX_TEST_SRCS-yes += bench.h LIBVPX_TEST_SRCS-yes += bench.cc LIBVPX_TEST_SRCS-yes += buffer.h LIBVPX_TEST_SRCS-yes += clear_system_state.h LIBVPX_TEST_SRCS-yes += codec_factory.h LIBVPX_TEST_SRCS-yes += md5_helper.h LIBVPX_TEST_SRCS-yes += register_state_check.h LIBVPX_TEST_SRCS-yes += test.mk LIBVPX_TEST_SRCS-yes += test_libvpx.cc LIBVPX_TEST_SRCS-yes += test_vectors.cc LIBVPX_TEST_SRCS-yes += test_vectors.h LIBVPX_TEST_SRCS-yes += util.h LIBVPX_TEST_SRCS-yes += video_source.h ## ## BLACK BOX TESTS ## ## Black box tests only use the public API. ## LIBVPX_TEST_SRCS-yes += ../md5_utils.h ../md5_utils.c LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ivf_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../y4minput.h ../y4minput.c LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += altref_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += alt_ref_aq_segment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += vp8_datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += vp9_datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_api_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += error_resilience_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += i420_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += realtime_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += resize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += yuv_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += config_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += cq_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += keyframe_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += byte_alignment_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += decode_svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += external_frame_buffer_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += user_priv_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_refresh_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += active_map_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += borders_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += cpu_speed_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += frame_size_tests.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_lossless_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_end_to_end_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += decode_corrupted.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_ethread_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_motion_vector_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += level_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_datarate_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_test.h LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += svc_end_to_end_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += timestamp_test.cc LIBVPX_TEST_SRCS-$(CONFIG_RATE_CTRL) += simple_encode_test.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.cc LIBVPX_TEST_SRCS-yes += decode_test_driver.h LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += encode_test_driver.cc LIBVPX_TEST_SRCS-yes += encode_test_driver.h ## IVF writing. LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += ../ivfenc.c ../ivfenc.h ## Y4m parsing. LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += y4m_test.cc ../y4menc.c ../y4menc.h ## WebM Parsing ifeq ($(CONFIG_WEBM_IO), yes) LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.cc LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.cc LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvparser.h LIBWEBM_PARSER_SRCS += ../third_party/libwebm/mkvparser/mkvreader.h LIBWEBM_PARSER_SRCS += ../third_party/libwebm/common/webmids.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += $(LIBWEBM_PARSER_SRCS) LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../tools_common.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += ../webmdec.h LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += webm_video_source.h LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_skip_loopfilter_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += decode_api_test.cc LIBVPX_TEST_SRCS-$(CONFIG_DECODERS) += test_vector_test.cc # Currently we only support decoder perf tests for vp9. Also they read from WebM # files, so WebM IO is required. ifeq ($(CONFIG_DECODE_PERF_TESTS)$(CONFIG_VP9_DECODER)$(CONFIG_WEBM_IO), \ yesyesyes) LIBVPX_TEST_SRCS-yes += decode_perf_test.cc endif # encode perf tests are vp9 only ifeq ($(CONFIG_ENCODE_PERF_TESTS)$(CONFIG_VP9_ENCODER), yesyes) LIBVPX_TEST_SRCS-yes += encode_perf_test.cc endif ## Multi-codec blackbox tests. ifeq ($(findstring yes,$(CONFIG_VP8_DECODER)$(CONFIG_VP9_DECODER)), yes) LIBVPX_TEST_SRCS-yes += invalid_file_test.cc endif ## ## WHITE BOX TESTS ## ## Whitebox tests invoke functions not exposed via the public API. Certain ## shared library builds don't make these functions accessible. ## ifeq ($(CONFIG_SHARED),) ## VP8 ifeq ($(CONFIG_VP8),yes) # These tests require both the encoder and decoder to be built. ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_VP8_DECODER),yesyes) LIBVPX_TEST_SRCS-yes += vp8_boolcoder_test.cc LIBVPX_TEST_SRCS-yes += vp8_fragments_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += add_noise_test.cc LIBVPX_TEST_SRCS-$(CONFIG_POSTPROC) += pp_filter_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_DECODER) += vp8_decrypt_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += quantize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += set_roi.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP8_ENCODER) += vp8_fdct4x4_test.cc LIBVPX_TEST_SRCS-yes += idct_test.cc LIBVPX_TEST_SRCS-yes += predict_test.cc LIBVPX_TEST_SRCS-yes += vpx_scale_test.cc LIBVPX_TEST_SRCS-yes += vpx_scale_test.h ifeq ($(CONFIG_VP8_ENCODER)$(CONFIG_TEMPORAL_DENOISING),yesyes) LIBVPX_TEST_SRCS-$(HAVE_SSE2) += vp8_denoiser_sse2_test.cc endif endif # VP8 ## VP9 ifeq ($(CONFIG_VP9),yes) # These tests require both the encoder and decoder to be built. ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_DECODER),yesyes) # IDCT test currently depends on FDCT function LIBVPX_TEST_SRCS-yes += idct8x8_test.cc LIBVPX_TEST_SRCS-yes += partial_idct_test.cc LIBVPX_TEST_SRCS-yes += superframe_test.cc LIBVPX_TEST_SRCS-yes += tile_independence_test.cc LIBVPX_TEST_SRCS-yes += vp9_boolcoder_test.cc LIBVPX_TEST_SRCS-yes += vp9_encoder_parms_get_to_decoder.cc endif LIBVPX_TEST_SRCS-yes += convolve_test.cc LIBVPX_TEST_SRCS-yes += lpf_test.cc LIBVPX_TEST_SRCS-yes += vp9_intrapred_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_decrypt_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_DECODER) += vp9_thread_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += avg_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += comp_avg_pred_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct16x16_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct32x32_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_partial_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += dct_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += fdct8x8_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += hadamard_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += minmax_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_scale_test.cc ifneq ($(CONFIG_REALTIME_ONLY),yes) LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += yuv_temporal_filter_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += variance_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_block_error_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_quantize_test.cc LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_subtract_test.cc ifeq ($(CONFIG_VP9_ENCODER),yes) LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += blockiness_test.cc LIBVPX_TEST_SRCS-$(CONFIG_INTERNAL_STATS) += consistency_test.cc endif ifeq ($(CONFIG_VP9_ENCODER),yes) LIBVPX_TEST_SRCS-$(CONFIG_NON_GREEDY_MV) += non_greedy_mv_test.cc endif ifeq ($(CONFIG_VP9_ENCODER)$(CONFIG_VP9_TEMPORAL_DENOISING),yesyes) LIBVPX_TEST_SRCS-yes += vp9_denoiser_test.cc endif LIBVPX_TEST_SRCS-$(CONFIG_VP9_ENCODER) += vp9_arf_freq_test.cc endif # VP9 ## Multi-codec / unconditional whitebox tests. LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sad_test.cc LIBVPX_TEST_SRCS-$(CONFIG_ENCODERS) += sum_squares_test.cc TEST_INTRA_PRED_SPEED_SRCS-yes := test_intra_pred_speed.cc TEST_INTRA_PRED_SPEED_SRCS-yes += ../md5_utils.h ../md5_utils.c endif # CONFIG_SHARED include $(SRC_PATH_BARE)/test/test-data.mk libvpx-1.8.2/test/test_intra_pred_speed.cc000066400000000000000000000727131357355204000207000ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Test and time VPX intra-predictor functions #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/md5_helper.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" // ----------------------------------------------------------------------------- namespace { typedef void (*VpxPredFunc)(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left); const int kBPS = 32; const int kTotalPixels = 32 * kBPS; const int kNumVp9IntraPredFuncs = 13; const char *kVp9IntraPredNames[kNumVp9IntraPredFuncs] = { "DC_PRED", "DC_LEFT_PRED", "DC_TOP_PRED", "DC_128_PRED", "V_PRED", "H_PRED", "D45_PRED", "D135_PRED", "D117_PRED", "D153_PRED", "D207_PRED", "D63_PRED", "TM_PRED" }; template struct IntraPredTestMem { void Init(int block_size, int bd) { libvpx_test::ACMRandom rnd(libvpx_test::ACMRandom::DeterministicSeed()); Pixel *const above = above_mem + 16; const int mask = (1 << bd) - 1; for (int i = 0; i < kTotalPixels; ++i) ref_src[i] = rnd.Rand16() & mask; for (int i = 0; i < kBPS; ++i) left[i] = rnd.Rand16() & mask; for (int i = -1; i < kBPS; ++i) above[i] = rnd.Rand16() & mask; // some code assumes the top row has been extended: // d45/d63 C-code, for instance, but not the assembly. // TODO(jzern): this style of extension isn't strictly necessary. ASSERT_LE(block_size, kBPS); for (int i = block_size; i < 2 * kBPS; ++i) { above[i] = above[block_size - 1]; } } DECLARE_ALIGNED(16, Pixel, src[kTotalPixels]); DECLARE_ALIGNED(16, Pixel, ref_src[kTotalPixels]); DECLARE_ALIGNED(16, Pixel, left[kBPS]); DECLARE_ALIGNED(16, Pixel, above_mem[2 * kBPS + 16]); }; typedef IntraPredTestMem Vp9IntraPredTestMem; void CheckMd5Signature(const char name[], const char *const signatures[], const void *data, size_t data_size, int elapsed_time, int idx) { libvpx_test::MD5 md5; md5.Add(reinterpret_cast(data), data_size); printf("Mode %s[%12s]: %5d ms MD5: %s\n", name, kVp9IntraPredNames[idx], elapsed_time, md5.Get()); EXPECT_STREQ(signatures[idx], md5.Get()); } void TestIntraPred(const char name[], VpxPredFunc const *pred_funcs, const char *const signatures[], int block_size) { const int kNumTests = static_cast( 2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs)); Vp9IntraPredTestMem intra_pred_test_mem; const uint8_t *const above = intra_pred_test_mem.above_mem + 16; intra_pred_test_mem.Init(block_size, 8); for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) { if (pred_funcs[k] == NULL) continue; memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src, sizeof(intra_pred_test_mem.src)); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int num_tests = 0; num_tests < kNumTests; ++num_tests) { pred_funcs[k](intra_pred_test_mem.src, kBPS, above, intra_pred_test_mem.left); } libvpx_test::ClearSystemState(); vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer) / 1000); CheckMd5Signature(name, signatures, intra_pred_test_mem.src, sizeof(intra_pred_test_mem.src), elapsed_time, k); } } void TestIntraPred4(VpxPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "e7ed7353c3383fff942e500e9bfe82fe", "2a4a26fcc6ce005eadc08354d196c8a9", "269d92eff86f315d9c38fe7640d85b15", "ae2960eea9f71ee3dabe08b282ec1773", "6c1abcc44e90148998b51acd11144e9c", "f7bb3186e1ef8a2b326037ff898cad8e", "364c1f3fb2f445f935aec2a70a67eaa4", "141624072a4a56773f68fadbdd07c4a7", "7be49b08687a5f24df3a2c612fca3876", "459bb5d9fd5b238348179c9a22108cd6", "73edb8831bf1bdfce21ae8eaa43b1234", "2e2457f2009c701a355a8b25eb74fcda", "52ae4e8bdbe41494c1f43051d4dd7f0b" }; TestIntraPred("Intra4", pred_funcs, kSignatures, 4); } void TestIntraPred8(VpxPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "d8bbae5d6547cfc17e4f5f44c8730e88", "373bab6d931868d41a601d9d88ce9ac3", "6fdd5ff4ff79656c14747598ca9e3706", "d9661c2811d6a73674f40ffb2b841847", "7c722d10b19ccff0b8c171868e747385", "f81dd986eb2b50f750d3a7da716b7e27", "d500f2c8fc78f46a4c74e4dcf51f14fb", "0e3523f9cab2142dd37fd07ec0760bce", "79ac4efe907f0a0f1885d43066cfedee", "19ecf2432ac305057de3b6578474eec6", "4f985b61acc6dd5d2d2585fa89ea2e2d", "f1bb25a9060dd262f405f15a38f5f674", "209ea00801584829e9a0f7be7d4a74ba" }; TestIntraPred("Intra8", pred_funcs, kSignatures, 8); } void TestIntraPred16(VpxPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "50971c07ce26977d30298538fffec619", "527a6b9e0dc5b21b98cf276305432bef", "7eff2868f80ebc2c43a4f367281d80f7", "67cd60512b54964ef6aff1bd4816d922", "48371c87dc95c08a33b2048f89cf6468", "b0acf2872ee411d7530af6d2625a7084", "f32aafed4d8d3776ed58bcb6188756d5", "dae208f3dca583529cff49b73f7c4183", "7af66a2f4c8e0b4908e40f047e60c47c", "125e3ab6ab9bc961f183ec366a7afa88", "6b90f25b23983c35386b9fd704427622", "f8d6b11d710edc136a7c62c917435f93", "ed308f18614a362917f411c218aee532" }; TestIntraPred("Intra16", pred_funcs, kSignatures, 16); } void TestIntraPred32(VpxPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "a0a618c900e65ae521ccc8af789729f2", "985aaa7c72b4a6c2fb431d32100cf13a", "10662d09febc3ca13ee4e700120daeb5", "b3b01379ba08916ef6b1b35f7d9ad51c", "9f4261755795af97e34679c333ec7004", "bc2c9da91ad97ef0d1610fb0a9041657", "75c79b1362ad18abfcdb1aa0aacfc21d", "4039bb7da0f6860090d3c57b5c85468f", "b29fff7b61804e68383e3a609b33da58", "e1aa5e49067fd8dba66c2eb8d07b7a89", "4e042822909c1c06d3b10a88281df1eb", "72eb9d9e0e67c93f4c66b70348e9fef7", "a22d102bcb51ca798aac12ca4ae8f2e8" }; TestIntraPred("Intra32", pred_funcs, kSignatures, 32); } } // namespace // Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors // to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4. #define INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, v, h, \ d45, d135, d117, d153, d207, d63, tm) \ TEST(arch, test_func) { \ static const VpxPredFunc vpx_intra_pred[] = { \ dc, dc_left, dc_top, dc_128, v, h, d45, d135, d117, d153, d207, d63, tm \ }; \ test_func(vpx_intra_pred); \ } // ----------------------------------------------------------------------------- INTRA_PRED_TEST(C, TestIntraPred4, vpx_dc_predictor_4x4_c, vpx_dc_left_predictor_4x4_c, vpx_dc_top_predictor_4x4_c, vpx_dc_128_predictor_4x4_c, vpx_v_predictor_4x4_c, vpx_h_predictor_4x4_c, vpx_d45_predictor_4x4_c, vpx_d135_predictor_4x4_c, vpx_d117_predictor_4x4_c, vpx_d153_predictor_4x4_c, vpx_d207_predictor_4x4_c, vpx_d63_predictor_4x4_c, vpx_tm_predictor_4x4_c) INTRA_PRED_TEST(C, TestIntraPred8, vpx_dc_predictor_8x8_c, vpx_dc_left_predictor_8x8_c, vpx_dc_top_predictor_8x8_c, vpx_dc_128_predictor_8x8_c, vpx_v_predictor_8x8_c, vpx_h_predictor_8x8_c, vpx_d45_predictor_8x8_c, vpx_d135_predictor_8x8_c, vpx_d117_predictor_8x8_c, vpx_d153_predictor_8x8_c, vpx_d207_predictor_8x8_c, vpx_d63_predictor_8x8_c, vpx_tm_predictor_8x8_c) INTRA_PRED_TEST(C, TestIntraPred16, vpx_dc_predictor_16x16_c, vpx_dc_left_predictor_16x16_c, vpx_dc_top_predictor_16x16_c, vpx_dc_128_predictor_16x16_c, vpx_v_predictor_16x16_c, vpx_h_predictor_16x16_c, vpx_d45_predictor_16x16_c, vpx_d135_predictor_16x16_c, vpx_d117_predictor_16x16_c, vpx_d153_predictor_16x16_c, vpx_d207_predictor_16x16_c, vpx_d63_predictor_16x16_c, vpx_tm_predictor_16x16_c) INTRA_PRED_TEST(C, TestIntraPred32, vpx_dc_predictor_32x32_c, vpx_dc_left_predictor_32x32_c, vpx_dc_top_predictor_32x32_c, vpx_dc_128_predictor_32x32_c, vpx_v_predictor_32x32_c, vpx_h_predictor_32x32_c, vpx_d45_predictor_32x32_c, vpx_d135_predictor_32x32_c, vpx_d117_predictor_32x32_c, vpx_d153_predictor_32x32_c, vpx_d207_predictor_32x32_c, vpx_d63_predictor_32x32_c, vpx_tm_predictor_32x32_c) #if HAVE_SSE2 INTRA_PRED_TEST(SSE2, TestIntraPred4, vpx_dc_predictor_4x4_sse2, vpx_dc_left_predictor_4x4_sse2, vpx_dc_top_predictor_4x4_sse2, vpx_dc_128_predictor_4x4_sse2, vpx_v_predictor_4x4_sse2, vpx_h_predictor_4x4_sse2, vpx_d45_predictor_4x4_sse2, NULL, NULL, NULL, vpx_d207_predictor_4x4_sse2, NULL, vpx_tm_predictor_4x4_sse2) INTRA_PRED_TEST(SSE2, TestIntraPred8, vpx_dc_predictor_8x8_sse2, vpx_dc_left_predictor_8x8_sse2, vpx_dc_top_predictor_8x8_sse2, vpx_dc_128_predictor_8x8_sse2, vpx_v_predictor_8x8_sse2, vpx_h_predictor_8x8_sse2, vpx_d45_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_sse2) INTRA_PRED_TEST(SSE2, TestIntraPred16, vpx_dc_predictor_16x16_sse2, vpx_dc_left_predictor_16x16_sse2, vpx_dc_top_predictor_16x16_sse2, vpx_dc_128_predictor_16x16_sse2, vpx_v_predictor_16x16_sse2, vpx_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_sse2) INTRA_PRED_TEST(SSE2, TestIntraPred32, vpx_dc_predictor_32x32_sse2, vpx_dc_left_predictor_32x32_sse2, vpx_dc_top_predictor_32x32_sse2, vpx_dc_128_predictor_32x32_sse2, vpx_v_predictor_32x32_sse2, vpx_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_sse2) #endif // HAVE_SSE2 #if HAVE_SSSE3 INTRA_PRED_TEST(SSSE3, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d153_predictor_4x4_ssse3, NULL, vpx_d63_predictor_4x4_ssse3, NULL) INTRA_PRED_TEST(SSSE3, TestIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d153_predictor_8x8_ssse3, vpx_d207_predictor_8x8_ssse3, vpx_d63_predictor_8x8_ssse3, NULL) INTRA_PRED_TEST(SSSE3, TestIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d45_predictor_16x16_ssse3, NULL, NULL, vpx_d153_predictor_16x16_ssse3, vpx_d207_predictor_16x16_ssse3, vpx_d63_predictor_16x16_ssse3, NULL) INTRA_PRED_TEST(SSSE3, TestIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL, vpx_d45_predictor_32x32_ssse3, NULL, NULL, vpx_d153_predictor_32x32_ssse3, vpx_d207_predictor_32x32_ssse3, vpx_d63_predictor_32x32_ssse3, NULL) #endif // HAVE_SSSE3 #if HAVE_DSPR2 INTRA_PRED_TEST(DSPR2, TestIntraPred4, vpx_dc_predictor_4x4_dspr2, NULL, NULL, NULL, NULL, vpx_h_predictor_4x4_dspr2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_dspr2) INTRA_PRED_TEST(DSPR2, TestIntraPred8, vpx_dc_predictor_8x8_dspr2, NULL, NULL, NULL, NULL, vpx_h_predictor_8x8_dspr2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_c) INTRA_PRED_TEST(DSPR2, TestIntraPred16, vpx_dc_predictor_16x16_dspr2, NULL, NULL, NULL, NULL, vpx_h_predictor_16x16_dspr2, NULL, NULL, NULL, NULL, NULL, NULL, NULL) #endif // HAVE_DSPR2 #if HAVE_NEON INTRA_PRED_TEST(NEON, TestIntraPred4, vpx_dc_predictor_4x4_neon, vpx_dc_left_predictor_4x4_neon, vpx_dc_top_predictor_4x4_neon, vpx_dc_128_predictor_4x4_neon, vpx_v_predictor_4x4_neon, vpx_h_predictor_4x4_neon, vpx_d45_predictor_4x4_neon, vpx_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_neon) INTRA_PRED_TEST(NEON, TestIntraPred8, vpx_dc_predictor_8x8_neon, vpx_dc_left_predictor_8x8_neon, vpx_dc_top_predictor_8x8_neon, vpx_dc_128_predictor_8x8_neon, vpx_v_predictor_8x8_neon, vpx_h_predictor_8x8_neon, vpx_d45_predictor_8x8_neon, vpx_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_neon) INTRA_PRED_TEST(NEON, TestIntraPred16, vpx_dc_predictor_16x16_neon, vpx_dc_left_predictor_16x16_neon, vpx_dc_top_predictor_16x16_neon, vpx_dc_128_predictor_16x16_neon, vpx_v_predictor_16x16_neon, vpx_h_predictor_16x16_neon, vpx_d45_predictor_16x16_neon, vpx_d135_predictor_16x16_neon, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_neon) INTRA_PRED_TEST(NEON, TestIntraPred32, vpx_dc_predictor_32x32_neon, vpx_dc_left_predictor_32x32_neon, vpx_dc_top_predictor_32x32_neon, vpx_dc_128_predictor_32x32_neon, vpx_v_predictor_32x32_neon, vpx_h_predictor_32x32_neon, vpx_d45_predictor_32x32_neon, vpx_d135_predictor_32x32_neon, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_neon) #endif // HAVE_NEON #if HAVE_MSA INTRA_PRED_TEST(MSA, TestIntraPred4, vpx_dc_predictor_4x4_msa, vpx_dc_left_predictor_4x4_msa, vpx_dc_top_predictor_4x4_msa, vpx_dc_128_predictor_4x4_msa, vpx_v_predictor_4x4_msa, vpx_h_predictor_4x4_msa, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_msa) INTRA_PRED_TEST(MSA, TestIntraPred8, vpx_dc_predictor_8x8_msa, vpx_dc_left_predictor_8x8_msa, vpx_dc_top_predictor_8x8_msa, vpx_dc_128_predictor_8x8_msa, vpx_v_predictor_8x8_msa, vpx_h_predictor_8x8_msa, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_8x8_msa) INTRA_PRED_TEST(MSA, TestIntraPred16, vpx_dc_predictor_16x16_msa, vpx_dc_left_predictor_16x16_msa, vpx_dc_top_predictor_16x16_msa, vpx_dc_128_predictor_16x16_msa, vpx_v_predictor_16x16_msa, vpx_h_predictor_16x16_msa, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_16x16_msa) INTRA_PRED_TEST(MSA, TestIntraPred32, vpx_dc_predictor_32x32_msa, vpx_dc_left_predictor_32x32_msa, vpx_dc_top_predictor_32x32_msa, vpx_dc_128_predictor_32x32_msa, vpx_v_predictor_32x32_msa, vpx_h_predictor_32x32_msa, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_32x32_msa) #endif // HAVE_MSA #if HAVE_VSX // TODO(crbug.com/webm/1522): Fix test failures. #if 0 INTRA_PRED_TEST(VSX, TestIntraPred4, NULL, NULL, NULL, NULL, NULL, vpx_h_predictor_4x4_vsx, NULL, NULL, NULL, NULL, NULL, NULL, vpx_tm_predictor_4x4_vsx) INTRA_PRED_TEST(VSX, TestIntraPred8, vpx_dc_predictor_8x8_vsx, NULL, NULL, NULL, NULL, vpx_h_predictor_8x8_vsx, vpx_d45_predictor_8x8_vsx, NULL, NULL, NULL, NULL, vpx_d63_predictor_8x8_vsx, vpx_tm_predictor_8x8_vsx) #endif INTRA_PRED_TEST(VSX, TestIntraPred16, vpx_dc_predictor_16x16_vsx, vpx_dc_left_predictor_16x16_vsx, vpx_dc_top_predictor_16x16_vsx, vpx_dc_128_predictor_16x16_vsx, vpx_v_predictor_16x16_vsx, vpx_h_predictor_16x16_vsx, vpx_d45_predictor_16x16_vsx, NULL, NULL, NULL, NULL, vpx_d63_predictor_16x16_vsx, vpx_tm_predictor_16x16_vsx) INTRA_PRED_TEST(VSX, TestIntraPred32, vpx_dc_predictor_32x32_vsx, vpx_dc_left_predictor_32x32_vsx, vpx_dc_top_predictor_32x32_vsx, vpx_dc_128_predictor_32x32_vsx, vpx_v_predictor_32x32_vsx, vpx_h_predictor_32x32_vsx, vpx_d45_predictor_32x32_vsx, NULL, NULL, NULL, NULL, vpx_d63_predictor_32x32_vsx, vpx_tm_predictor_32x32_vsx) #endif // HAVE_VSX // ----------------------------------------------------------------------------- #if CONFIG_VP9_HIGHBITDEPTH namespace { typedef void (*VpxHighbdPredFunc)(uint16_t *dst, ptrdiff_t y_stride, const uint16_t *above, const uint16_t *left, int bd); typedef IntraPredTestMem Vp9HighbdIntraPredTestMem; void TestHighbdIntraPred(const char name[], VpxHighbdPredFunc const *pred_funcs, const char *const signatures[], int block_size) { const int kNumTests = static_cast( 2.e10 / (block_size * block_size * kNumVp9IntraPredFuncs)); Vp9HighbdIntraPredTestMem intra_pred_test_mem; const uint16_t *const above = intra_pred_test_mem.above_mem + 16; intra_pred_test_mem.Init(block_size, 12); for (int k = 0; k < kNumVp9IntraPredFuncs; ++k) { if (pred_funcs[k] == NULL) continue; memcpy(intra_pred_test_mem.src, intra_pred_test_mem.ref_src, sizeof(intra_pred_test_mem.src)); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int num_tests = 0; num_tests < kNumTests; ++num_tests) { pred_funcs[k](intra_pred_test_mem.src, kBPS, above, intra_pred_test_mem.left, 12); } libvpx_test::ClearSystemState(); vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer) / 1000); CheckMd5Signature(name, signatures, intra_pred_test_mem.src, sizeof(intra_pred_test_mem.src), elapsed_time, k); } } void TestHighbdIntraPred4(VpxHighbdPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "11f74af6c5737df472f3275cbde062fa", "51bea056b6447c93f6eb8f6b7e8f6f71", "27e97f946766331795886f4de04c5594", "53ab15974b049111fb596c5168ec7e3f", "f0b640bb176fbe4584cf3d32a9b0320a", "729783ca909e03afd4b47111c80d967b", "fbf1c30793d9f32812e4d9f905d53530", "293fc903254a33754133314c6cdba81f", "f8074d704233e73dfd35b458c6092374", "aa6363d08544a1ec4da33d7a0be5640d", "462abcfdfa3d087bb33c9a88f2aec491", "863eab65d22550dd44a2397277c1ec71", "23d61df1574d0fa308f9731811047c4b" }; TestHighbdIntraPred("Intra4", pred_funcs, kSignatures, 4); } void TestHighbdIntraPred8(VpxHighbdPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "03da8829fe94663047fd108c5fcaa71d", "ecdb37b8120a2d3a4c706b016bd1bfd7", "1d4543ed8d2b9368cb96898095fe8a75", "f791c9a67b913cbd82d9da8ecede30e2", "065c70646f4dbaff913282f55a45a441", "51f87123616662ef7c35691497dfd0ba", "2a5b0131ef4716f098ee65e6df01e3dd", "9ffe186a6bc7db95275f1bbddd6f7aba", "a3258a2eae2e2bd55cb8f71351b22998", "8d909f0a2066e39b3216092c6289ece4", "d183abb30b9f24c886a0517e991b22c7", "702a42fe4c7d665dc561b2aeeb60f311", "7b5dbbbe7ae3a4ac2948731600bde5d6" }; TestHighbdIntraPred("Intra8", pred_funcs, kSignatures, 8); } void TestHighbdIntraPred16(VpxHighbdPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "e33cb3f56a878e2fddb1b2fc51cdd275", "c7bff6f04b6052c8ab335d726dbbd52d", "d0b0b47b654a9bcc5c6008110a44589b", "78f5da7b10b2b9ab39f114a33b6254e9", "c78e31d23831abb40d6271a318fdd6f3", "90d1347f4ec9198a0320daecb6ff90b8", "d2c623746cbb64a0c9e29c10f2c57041", "cf28bd387b81ad3e5f1a1c779a4b70a0", "24c304330431ddeaf630f6ce94af2eac", "91a329798036bf64e8e00a87b131b8b1", "d39111f22885307f920796a42084c872", "e2e702f7250ece98dd8f3f2854c31eeb", "e2fb05b01eb8b88549e85641d8ce5b59" }; TestHighbdIntraPred("Intra16", pred_funcs, kSignatures, 16); } void TestHighbdIntraPred32(VpxHighbdPredFunc const *pred_funcs) { static const char *const kSignatures[kNumVp9IntraPredFuncs] = { "a3e8056ba7e36628cce4917cd956fedd", "cc7d3024fe8748b512407edee045377e", "2aab0a0f330a1d3e19b8ecb8f06387a3", "a547bc3fb7b06910bf3973122a426661", "26f712514da95042f93d6e8dc8e431dc", "bb08c6e16177081daa3d936538dbc2e3", "8f031af3e2650e89620d8d2c3a843d8b", "42867c8553285e94ee8e4df7abafbda8", "6496bdee96100667833f546e1be3d640", "2ebfa25bf981377e682e580208504300", "3e8ae52fd1f607f348aa4cb436c71ab7", "3d4efe797ca82193613696753ea624c4", "cb8aab6d372278f3131e8d99efde02d9" }; TestHighbdIntraPred("Intra32", pred_funcs, kSignatures, 32); } } // namespace // Defines a test case for |arch| (e.g., C, SSE2, ...) passing the predictors // to |test_func|. The test name is 'arch.test_func', e.g., C.TestIntraPred4. #define HIGHBD_INTRA_PRED_TEST(arch, test_func, dc, dc_left, dc_top, dc_128, \ v, h, d45, d135, d117, d153, d207, d63, tm) \ TEST(arch, test_func) { \ static const VpxHighbdPredFunc vpx_intra_pred[] = { \ dc, dc_left, dc_top, dc_128, v, h, d45, d135, d117, d153, d207, d63, tm \ }; \ test_func(vpx_intra_pred); \ } // ----------------------------------------------------------------------------- HIGHBD_INTRA_PRED_TEST( C, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_c, vpx_highbd_dc_left_predictor_4x4_c, vpx_highbd_dc_top_predictor_4x4_c, vpx_highbd_dc_128_predictor_4x4_c, vpx_highbd_v_predictor_4x4_c, vpx_highbd_h_predictor_4x4_c, vpx_highbd_d45_predictor_4x4_c, vpx_highbd_d135_predictor_4x4_c, vpx_highbd_d117_predictor_4x4_c, vpx_highbd_d153_predictor_4x4_c, vpx_highbd_d207_predictor_4x4_c, vpx_highbd_d63_predictor_4x4_c, vpx_highbd_tm_predictor_4x4_c) HIGHBD_INTRA_PRED_TEST( C, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_c, vpx_highbd_dc_left_predictor_8x8_c, vpx_highbd_dc_top_predictor_8x8_c, vpx_highbd_dc_128_predictor_8x8_c, vpx_highbd_v_predictor_8x8_c, vpx_highbd_h_predictor_8x8_c, vpx_highbd_d45_predictor_8x8_c, vpx_highbd_d135_predictor_8x8_c, vpx_highbd_d117_predictor_8x8_c, vpx_highbd_d153_predictor_8x8_c, vpx_highbd_d207_predictor_8x8_c, vpx_highbd_d63_predictor_8x8_c, vpx_highbd_tm_predictor_8x8_c) HIGHBD_INTRA_PRED_TEST( C, TestHighbdIntraPred16, vpx_highbd_dc_predictor_16x16_c, vpx_highbd_dc_left_predictor_16x16_c, vpx_highbd_dc_top_predictor_16x16_c, vpx_highbd_dc_128_predictor_16x16_c, vpx_highbd_v_predictor_16x16_c, vpx_highbd_h_predictor_16x16_c, vpx_highbd_d45_predictor_16x16_c, vpx_highbd_d135_predictor_16x16_c, vpx_highbd_d117_predictor_16x16_c, vpx_highbd_d153_predictor_16x16_c, vpx_highbd_d207_predictor_16x16_c, vpx_highbd_d63_predictor_16x16_c, vpx_highbd_tm_predictor_16x16_c) HIGHBD_INTRA_PRED_TEST( C, TestHighbdIntraPred32, vpx_highbd_dc_predictor_32x32_c, vpx_highbd_dc_left_predictor_32x32_c, vpx_highbd_dc_top_predictor_32x32_c, vpx_highbd_dc_128_predictor_32x32_c, vpx_highbd_v_predictor_32x32_c, vpx_highbd_h_predictor_32x32_c, vpx_highbd_d45_predictor_32x32_c, vpx_highbd_d135_predictor_32x32_c, vpx_highbd_d117_predictor_32x32_c, vpx_highbd_d153_predictor_32x32_c, vpx_highbd_d207_predictor_32x32_c, vpx_highbd_d63_predictor_32x32_c, vpx_highbd_tm_predictor_32x32_c) #if HAVE_SSE2 HIGHBD_INTRA_PRED_TEST( SSE2, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_sse2, vpx_highbd_dc_left_predictor_4x4_sse2, vpx_highbd_dc_top_predictor_4x4_sse2, vpx_highbd_dc_128_predictor_4x4_sse2, vpx_highbd_v_predictor_4x4_sse2, vpx_highbd_h_predictor_4x4_sse2, NULL, vpx_highbd_d135_predictor_4x4_sse2, vpx_highbd_d117_predictor_4x4_sse2, vpx_highbd_d153_predictor_4x4_sse2, vpx_highbd_d207_predictor_4x4_sse2, vpx_highbd_d63_predictor_4x4_sse2, vpx_highbd_tm_predictor_4x4_c) HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_sse2, vpx_highbd_dc_left_predictor_8x8_sse2, vpx_highbd_dc_top_predictor_8x8_sse2, vpx_highbd_dc_128_predictor_8x8_sse2, vpx_highbd_v_predictor_8x8_sse2, vpx_highbd_h_predictor_8x8_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_8x8_sse2) HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred16, vpx_highbd_dc_predictor_16x16_sse2, vpx_highbd_dc_left_predictor_16x16_sse2, vpx_highbd_dc_top_predictor_16x16_sse2, vpx_highbd_dc_128_predictor_16x16_sse2, vpx_highbd_v_predictor_16x16_sse2, vpx_highbd_h_predictor_16x16_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_sse2) HIGHBD_INTRA_PRED_TEST(SSE2, TestHighbdIntraPred32, vpx_highbd_dc_predictor_32x32_sse2, vpx_highbd_dc_left_predictor_32x32_sse2, vpx_highbd_dc_top_predictor_32x32_sse2, vpx_highbd_dc_128_predictor_32x32_sse2, vpx_highbd_v_predictor_32x32_sse2, vpx_highbd_h_predictor_32x32_sse2, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_sse2) #endif // HAVE_SSE2 #if HAVE_SSSE3 HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred4, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_d45_predictor_4x4_ssse3, NULL, NULL, NULL, NULL, NULL, NULL) HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred8, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_d45_predictor_8x8_ssse3, vpx_highbd_d135_predictor_8x8_ssse3, vpx_highbd_d117_predictor_8x8_ssse3, vpx_highbd_d153_predictor_8x8_ssse3, vpx_highbd_d207_predictor_8x8_ssse3, vpx_highbd_d63_predictor_8x8_ssse3, NULL) HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred16, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_d45_predictor_16x16_ssse3, vpx_highbd_d135_predictor_16x16_ssse3, vpx_highbd_d117_predictor_16x16_ssse3, vpx_highbd_d153_predictor_16x16_ssse3, vpx_highbd_d207_predictor_16x16_ssse3, vpx_highbd_d63_predictor_16x16_ssse3, NULL) HIGHBD_INTRA_PRED_TEST(SSSE3, TestHighbdIntraPred32, NULL, NULL, NULL, NULL, NULL, NULL, vpx_highbd_d45_predictor_32x32_ssse3, vpx_highbd_d135_predictor_32x32_ssse3, vpx_highbd_d117_predictor_32x32_ssse3, vpx_highbd_d153_predictor_32x32_ssse3, vpx_highbd_d207_predictor_32x32_ssse3, vpx_highbd_d63_predictor_32x32_ssse3, NULL) #endif // HAVE_SSSE3 #if HAVE_NEON HIGHBD_INTRA_PRED_TEST( NEON, TestHighbdIntraPred4, vpx_highbd_dc_predictor_4x4_neon, vpx_highbd_dc_left_predictor_4x4_neon, vpx_highbd_dc_top_predictor_4x4_neon, vpx_highbd_dc_128_predictor_4x4_neon, vpx_highbd_v_predictor_4x4_neon, vpx_highbd_h_predictor_4x4_neon, vpx_highbd_d45_predictor_4x4_neon, vpx_highbd_d135_predictor_4x4_neon, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_4x4_neon) HIGHBD_INTRA_PRED_TEST( NEON, TestHighbdIntraPred8, vpx_highbd_dc_predictor_8x8_neon, vpx_highbd_dc_left_predictor_8x8_neon, vpx_highbd_dc_top_predictor_8x8_neon, vpx_highbd_dc_128_predictor_8x8_neon, vpx_highbd_v_predictor_8x8_neon, vpx_highbd_h_predictor_8x8_neon, vpx_highbd_d45_predictor_8x8_neon, vpx_highbd_d135_predictor_8x8_neon, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_8x8_neon) HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred16, vpx_highbd_dc_predictor_16x16_neon, vpx_highbd_dc_left_predictor_16x16_neon, vpx_highbd_dc_top_predictor_16x16_neon, vpx_highbd_dc_128_predictor_16x16_neon, vpx_highbd_v_predictor_16x16_neon, vpx_highbd_h_predictor_16x16_neon, vpx_highbd_d45_predictor_16x16_neon, vpx_highbd_d135_predictor_16x16_neon, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_16x16_neon) HIGHBD_INTRA_PRED_TEST(NEON, TestHighbdIntraPred32, vpx_highbd_dc_predictor_32x32_neon, vpx_highbd_dc_left_predictor_32x32_neon, vpx_highbd_dc_top_predictor_32x32_neon, vpx_highbd_dc_128_predictor_32x32_neon, vpx_highbd_v_predictor_32x32_neon, vpx_highbd_h_predictor_32x32_neon, vpx_highbd_d45_predictor_32x32_neon, vpx_highbd_d135_predictor_32x32_neon, NULL, NULL, NULL, NULL, vpx_highbd_tm_predictor_32x32_neon) #endif // HAVE_NEON #endif // CONFIG_VP9_HIGHBITDEPTH #include "test/test_libvpx.cc" libvpx-1.8.2/test/test_libvpx.cc000066400000000000000000000045661357355204000166760ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #if VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" #endif extern "C" { #if CONFIG_VP8 extern void vp8_rtcd(); #endif // CONFIG_VP8 #if CONFIG_VP9 extern void vp9_rtcd(); #endif // CONFIG_VP9 extern void vpx_dsp_rtcd(); extern void vpx_scale_rtcd(); } #if VPX_ARCH_X86 || VPX_ARCH_X86_64 static void append_negative_gtest_filter(const char *str) { std::string filter = ::testing::FLAGS_gtest_filter; // Negative patterns begin with one '-' followed by a ':' separated list. if (filter.find('-') == std::string::npos) filter += '-'; filter += str; ::testing::FLAGS_gtest_filter = filter; } #endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 int main(int argc, char **argv) { ::testing::InitGoogleTest(&argc, argv); #if VPX_ARCH_X86 || VPX_ARCH_X86_64 const int simd_caps = x86_simd_caps(); if (!(simd_caps & HAS_MMX)) append_negative_gtest_filter(":MMX.*:MMX/*"); if (!(simd_caps & HAS_SSE)) append_negative_gtest_filter(":SSE.*:SSE/*"); if (!(simd_caps & HAS_SSE2)) append_negative_gtest_filter(":SSE2.*:SSE2/*"); if (!(simd_caps & HAS_SSE3)) append_negative_gtest_filter(":SSE3.*:SSE3/*"); if (!(simd_caps & HAS_SSSE3)) { append_negative_gtest_filter(":SSSE3.*:SSSE3/*"); } if (!(simd_caps & HAS_SSE4_1)) { append_negative_gtest_filter(":SSE4_1.*:SSE4_1/*"); } if (!(simd_caps & HAS_AVX)) append_negative_gtest_filter(":AVX.*:AVX/*"); if (!(simd_caps & HAS_AVX2)) append_negative_gtest_filter(":AVX2.*:AVX2/*"); if (!(simd_caps & HAS_AVX512)) { append_negative_gtest_filter(":AVX512.*:AVX512/*"); } #endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #if !CONFIG_SHARED // Shared library builds don't support whitebox tests // that exercise internal symbols. #if CONFIG_VP8 vp8_rtcd(); #endif // CONFIG_VP8 #if CONFIG_VP9 vp9_rtcd(); #endif // CONFIG_VP9 vpx_dsp_rtcd(); vpx_scale_rtcd(); #endif // !CONFIG_SHARED return RUN_ALL_TESTS(); } libvpx-1.8.2/test/test_vector_test.cc000066400000000000000000000154631357355204000177310ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "../tools_common.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/ivf_video_source.h" #include "test/md5_helper.h" #include "test/test_vectors.h" #include "test/util.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif #include "vpx_mem/vpx_mem.h" namespace { const int kThreads = 0; const int kMtMode = 1; const int kFileName = 2; typedef std::tuple DecodeParam; class TestVectorTest : public ::libvpx_test::DecoderTest, public ::libvpx_test::CodecTestWithParam { protected: TestVectorTest() : DecoderTest(GET_PARAM(0)), md5_file_(NULL) { #if CONFIG_VP9_DECODER resize_clips_.insert(::libvpx_test::kVP9TestVectorsResize, ::libvpx_test::kVP9TestVectorsResize + ::libvpx_test::kNumVP9TestVectorsResize); #endif } virtual ~TestVectorTest() { if (md5_file_) fclose(md5_file_); } void OpenMD5File(const std::string &md5_file_name_) { md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name_); ASSERT_TRUE(md5_file_ != NULL) << "Md5 file open failed. Filename: " << md5_file_name_; } #if CONFIG_VP9_DECODER virtual void PreDecodeFrameHook( const libvpx_test::CompressedVideoSource &video, libvpx_test::Decoder *decoder) { if (video.frame_number() == 0 && mt_mode_ >= 0) { if (mt_mode_ == 1) { decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 1); decoder->Control(VP9D_SET_ROW_MT, 0); } else if (mt_mode_ == 2) { decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 0); decoder->Control(VP9D_SET_ROW_MT, 1); } else { decoder->Control(VP9D_SET_LOOP_FILTER_OPT, 0); decoder->Control(VP9D_SET_ROW_MT, 0); } } } #endif virtual void DecompressedFrameHook(const vpx_image_t &img, const unsigned int frame_number) { ASSERT_TRUE(md5_file_ != NULL); char expected_md5[33]; char junk[128]; // Read correct md5 checksums. const int res = fscanf(md5_file_, "%s %s", expected_md5, junk); ASSERT_NE(res, EOF) << "Read md5 data failed"; expected_md5[32] = '\0'; ::libvpx_test::MD5 md5_res; md5_res.Add(&img); const char *actual_md5 = md5_res.Get(); // Check md5 match. ASSERT_STREQ(expected_md5, actual_md5) << "Md5 checksums don't match: frame number = " << frame_number; } #if CONFIG_VP9_DECODER std::set resize_clips_; #endif int mt_mode_; private: FILE *md5_file_; }; // This test runs through the whole set of test vectors, and decodes them. // The md5 checksums are computed for each frame in the video file. If md5 // checksums match the correct md5 data, then the test is passed. Otherwise, // the test failed. TEST_P(TestVectorTest, MD5Match) { const DecodeParam input = GET_PARAM(1); const std::string filename = std::get(input); vpx_codec_flags_t flags = 0; vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); char str[256]; cfg.threads = std::get(input); mt_mode_ = std::get(input); snprintf(str, sizeof(str) / sizeof(str[0]) - 1, "file: %s threads: %d MT mode: %d", filename.c_str(), cfg.threads, mt_mode_); SCOPED_TRACE(str); // Open compressed video file. std::unique_ptr video; if (filename.substr(filename.length() - 3, 3) == "ivf") { video.reset(new libvpx_test::IVFVideoSource(filename)); } else if (filename.substr(filename.length() - 4, 4) == "webm") { #if CONFIG_WEBM_IO video.reset(new libvpx_test::WebMVideoSource(filename)); #else fprintf(stderr, "WebM IO is disabled, skipping test vector %s\n", filename.c_str()); return; #endif } ASSERT_TRUE(video.get() != NULL); video->Init(); // Construct md5 file name. const std::string md5_filename = filename + ".md5"; OpenMD5File(md5_filename); // Set decode config and flags. set_cfg(cfg); set_flags(flags); // Decode frame, and check the md5 matching. ASSERT_NO_FATAL_FAILURE(RunLoop(video.get(), cfg)); } #if CONFIG_VP8_DECODER VP8_INSTANTIATE_TEST_CASE( TestVectorTest, ::testing::Combine( ::testing::Values(1), // Single thread. ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP8TestVectors, libvpx_test::kVP8TestVectors + libvpx_test::kNumVP8TestVectors))); // Test VP8 decode in with different numbers of threads. INSTANTIATE_TEST_CASE_P( VP8MultiThreaded, TestVectorTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP8)), ::testing::Combine( ::testing::Range(2, 9), // With 2 ~ 8 threads. ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP8TestVectors, libvpx_test::kVP8TestVectors + libvpx_test::kNumVP8TestVectors)))); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER VP9_INSTANTIATE_TEST_CASE( TestVectorTest, ::testing::Combine( ::testing::Values(1), // Single thread. ::testing::Values(-1), // LPF opt and Row MT is not applicable ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors))); INSTANTIATE_TEST_CASE_P( VP9MultiThreaded, TestVectorTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP9)), ::testing::Combine( ::testing::Range(2, 9), // With 2 ~ 8 threads. ::testing::Range(0, 3), // With multi threads modes 0 ~ 2 // 0: LPF opt and Row MT disabled // 1: LPF opt enabled // 2: Row MT enabled ::testing::ValuesIn(libvpx_test::kVP9TestVectors, libvpx_test::kVP9TestVectors + libvpx_test::kNumVP9TestVectors)))); #endif } // namespace libvpx-1.8.2/test/test_vectors.cc000066400000000000000000000343451357355204000170550ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "test/test_vectors.h" namespace libvpx_test { #define NELEMENTS(x) static_cast(sizeof(x) / sizeof(x[0])) #if CONFIG_VP8_DECODER const char *const kVP8TestVectors[] = { "vp80-00-comprehensive-001.ivf", "vp80-00-comprehensive-002.ivf", "vp80-00-comprehensive-003.ivf", "vp80-00-comprehensive-004.ivf", "vp80-00-comprehensive-005.ivf", "vp80-00-comprehensive-006.ivf", "vp80-00-comprehensive-007.ivf", "vp80-00-comprehensive-008.ivf", "vp80-00-comprehensive-009.ivf", "vp80-00-comprehensive-010.ivf", "vp80-00-comprehensive-011.ivf", "vp80-00-comprehensive-012.ivf", "vp80-00-comprehensive-013.ivf", "vp80-00-comprehensive-014.ivf", "vp80-00-comprehensive-015.ivf", "vp80-00-comprehensive-016.ivf", "vp80-00-comprehensive-017.ivf", "vp80-00-comprehensive-018.ivf", "vp80-01-intra-1400.ivf", "vp80-01-intra-1411.ivf", "vp80-01-intra-1416.ivf", "vp80-01-intra-1417.ivf", "vp80-02-inter-1402.ivf", "vp80-02-inter-1412.ivf", "vp80-02-inter-1418.ivf", "vp80-02-inter-1424.ivf", "vp80-03-segmentation-01.ivf", "vp80-03-segmentation-02.ivf", "vp80-03-segmentation-03.ivf", "vp80-03-segmentation-04.ivf", "vp80-03-segmentation-1401.ivf", "vp80-03-segmentation-1403.ivf", "vp80-03-segmentation-1407.ivf", "vp80-03-segmentation-1408.ivf", "vp80-03-segmentation-1409.ivf", "vp80-03-segmentation-1410.ivf", "vp80-03-segmentation-1413.ivf", "vp80-03-segmentation-1414.ivf", "vp80-03-segmentation-1415.ivf", "vp80-03-segmentation-1425.ivf", "vp80-03-segmentation-1426.ivf", "vp80-03-segmentation-1427.ivf", "vp80-03-segmentation-1432.ivf", "vp80-03-segmentation-1435.ivf", "vp80-03-segmentation-1436.ivf", "vp80-03-segmentation-1437.ivf", "vp80-03-segmentation-1441.ivf", "vp80-03-segmentation-1442.ivf", "vp80-04-partitions-1404.ivf", "vp80-04-partitions-1405.ivf", "vp80-04-partitions-1406.ivf", "vp80-05-sharpness-1428.ivf", "vp80-05-sharpness-1429.ivf", "vp80-05-sharpness-1430.ivf", "vp80-05-sharpness-1431.ivf", "vp80-05-sharpness-1433.ivf", "vp80-05-sharpness-1434.ivf", "vp80-05-sharpness-1438.ivf", "vp80-05-sharpness-1439.ivf", "vp80-05-sharpness-1440.ivf", "vp80-05-sharpness-1443.ivf", "vp80-06-smallsize.ivf" }; const int kNumVP8TestVectors = NELEMENTS(kVP8TestVectors); #endif // CONFIG_VP8_DECODER #if CONFIG_VP9_DECODER #define RESIZE_TEST_VECTORS \ "vp90-2-21-resize_inter_320x180_5_1-2.webm", \ "vp90-2-21-resize_inter_320x180_5_3-4.webm", \ "vp90-2-21-resize_inter_320x180_7_1-2.webm", \ "vp90-2-21-resize_inter_320x180_7_3-4.webm", \ "vp90-2-21-resize_inter_320x240_5_1-2.webm", \ "vp90-2-21-resize_inter_320x240_5_3-4.webm", \ "vp90-2-21-resize_inter_320x240_7_1-2.webm", \ "vp90-2-21-resize_inter_320x240_7_3-4.webm", \ "vp90-2-21-resize_inter_640x360_5_1-2.webm", \ "vp90-2-21-resize_inter_640x360_5_3-4.webm", \ "vp90-2-21-resize_inter_640x360_7_1-2.webm", \ "vp90-2-21-resize_inter_640x360_7_3-4.webm", \ "vp90-2-21-resize_inter_640x480_5_1-2.webm", \ "vp90-2-21-resize_inter_640x480_5_3-4.webm", \ "vp90-2-21-resize_inter_640x480_7_1-2.webm", \ "vp90-2-21-resize_inter_640x480_7_3-4.webm", \ "vp90-2-21-resize_inter_1280x720_5_1-2.webm", \ "vp90-2-21-resize_inter_1280x720_5_3-4.webm", \ "vp90-2-21-resize_inter_1280x720_7_1-2.webm", \ "vp90-2-21-resize_inter_1280x720_7_3-4.webm", \ "vp90-2-21-resize_inter_1920x1080_5_1-2.webm", \ "vp90-2-21-resize_inter_1920x1080_5_3-4.webm", \ "vp90-2-21-resize_inter_1920x1080_7_1-2.webm", \ "vp90-2-21-resize_inter_1920x1080_7_3-4.webm", const char *const kVP9TestVectors[] = { "vp90-2-00-quantizer-00.webm", "vp90-2-00-quantizer-01.webm", "vp90-2-00-quantizer-02.webm", "vp90-2-00-quantizer-03.webm", "vp90-2-00-quantizer-04.webm", "vp90-2-00-quantizer-05.webm", "vp90-2-00-quantizer-06.webm", "vp90-2-00-quantizer-07.webm", "vp90-2-00-quantizer-08.webm", "vp90-2-00-quantizer-09.webm", "vp90-2-00-quantizer-10.webm", "vp90-2-00-quantizer-11.webm", "vp90-2-00-quantizer-12.webm", "vp90-2-00-quantizer-13.webm", "vp90-2-00-quantizer-14.webm", "vp90-2-00-quantizer-15.webm", "vp90-2-00-quantizer-16.webm", "vp90-2-00-quantizer-17.webm", "vp90-2-00-quantizer-18.webm", "vp90-2-00-quantizer-19.webm", "vp90-2-00-quantizer-20.webm", "vp90-2-00-quantizer-21.webm", "vp90-2-00-quantizer-22.webm", "vp90-2-00-quantizer-23.webm", "vp90-2-00-quantizer-24.webm", "vp90-2-00-quantizer-25.webm", "vp90-2-00-quantizer-26.webm", "vp90-2-00-quantizer-27.webm", "vp90-2-00-quantizer-28.webm", "vp90-2-00-quantizer-29.webm", "vp90-2-00-quantizer-30.webm", "vp90-2-00-quantizer-31.webm", "vp90-2-00-quantizer-32.webm", "vp90-2-00-quantizer-33.webm", "vp90-2-00-quantizer-34.webm", "vp90-2-00-quantizer-35.webm", "vp90-2-00-quantizer-36.webm", "vp90-2-00-quantizer-37.webm", "vp90-2-00-quantizer-38.webm", "vp90-2-00-quantizer-39.webm", "vp90-2-00-quantizer-40.webm", "vp90-2-00-quantizer-41.webm", "vp90-2-00-quantizer-42.webm", "vp90-2-00-quantizer-43.webm", "vp90-2-00-quantizer-44.webm", "vp90-2-00-quantizer-45.webm", "vp90-2-00-quantizer-46.webm", "vp90-2-00-quantizer-47.webm", "vp90-2-00-quantizer-48.webm", "vp90-2-00-quantizer-49.webm", "vp90-2-00-quantizer-50.webm", "vp90-2-00-quantizer-51.webm", "vp90-2-00-quantizer-52.webm", "vp90-2-00-quantizer-53.webm", "vp90-2-00-quantizer-54.webm", "vp90-2-00-quantizer-55.webm", "vp90-2-00-quantizer-56.webm", "vp90-2-00-quantizer-57.webm", "vp90-2-00-quantizer-58.webm", "vp90-2-00-quantizer-59.webm", "vp90-2-00-quantizer-60.webm", "vp90-2-00-quantizer-61.webm", "vp90-2-00-quantizer-62.webm", "vp90-2-00-quantizer-63.webm", "vp90-2-01-sharpness-1.webm", "vp90-2-01-sharpness-2.webm", "vp90-2-01-sharpness-3.webm", "vp90-2-01-sharpness-4.webm", "vp90-2-01-sharpness-5.webm", "vp90-2-01-sharpness-6.webm", "vp90-2-01-sharpness-7.webm", "vp90-2-02-size-08x08.webm", "vp90-2-02-size-08x10.webm", "vp90-2-02-size-08x16.webm", "vp90-2-02-size-08x18.webm", "vp90-2-02-size-08x32.webm", "vp90-2-02-size-08x34.webm", "vp90-2-02-size-08x64.webm", "vp90-2-02-size-08x66.webm", "vp90-2-02-size-10x08.webm", "vp90-2-02-size-10x10.webm", "vp90-2-02-size-10x16.webm", "vp90-2-02-size-10x18.webm", "vp90-2-02-size-10x32.webm", "vp90-2-02-size-10x34.webm", "vp90-2-02-size-10x64.webm", "vp90-2-02-size-10x66.webm", "vp90-2-02-size-16x08.webm", "vp90-2-02-size-16x10.webm", "vp90-2-02-size-16x16.webm", "vp90-2-02-size-16x18.webm", "vp90-2-02-size-16x32.webm", "vp90-2-02-size-16x34.webm", "vp90-2-02-size-16x64.webm", "vp90-2-02-size-16x66.webm", "vp90-2-02-size-18x08.webm", "vp90-2-02-size-18x10.webm", "vp90-2-02-size-18x16.webm", "vp90-2-02-size-18x18.webm", "vp90-2-02-size-18x32.webm", "vp90-2-02-size-18x34.webm", "vp90-2-02-size-18x64.webm", "vp90-2-02-size-18x66.webm", "vp90-2-02-size-32x08.webm", "vp90-2-02-size-32x10.webm", "vp90-2-02-size-32x16.webm", "vp90-2-02-size-32x18.webm", "vp90-2-02-size-32x32.webm", "vp90-2-02-size-32x34.webm", "vp90-2-02-size-32x64.webm", "vp90-2-02-size-32x66.webm", "vp90-2-02-size-34x08.webm", "vp90-2-02-size-34x10.webm", "vp90-2-02-size-34x16.webm", "vp90-2-02-size-34x18.webm", "vp90-2-02-size-34x32.webm", "vp90-2-02-size-34x34.webm", "vp90-2-02-size-34x64.webm", "vp90-2-02-size-34x66.webm", "vp90-2-02-size-64x08.webm", "vp90-2-02-size-64x10.webm", "vp90-2-02-size-64x16.webm", "vp90-2-02-size-64x18.webm", "vp90-2-02-size-64x32.webm", "vp90-2-02-size-64x34.webm", "vp90-2-02-size-64x64.webm", "vp90-2-02-size-64x66.webm", "vp90-2-02-size-66x08.webm", "vp90-2-02-size-66x10.webm", "vp90-2-02-size-66x16.webm", "vp90-2-02-size-66x18.webm", "vp90-2-02-size-66x32.webm", "vp90-2-02-size-66x34.webm", "vp90-2-02-size-66x64.webm", "vp90-2-02-size-66x66.webm", "vp90-2-02-size-130x132.webm", "vp90-2-02-size-132x130.webm", "vp90-2-02-size-132x132.webm", "vp90-2-02-size-178x180.webm", "vp90-2-02-size-180x178.webm", "vp90-2-02-size-180x180.webm", "vp90-2-03-size-196x196.webm", "vp90-2-03-size-196x198.webm", "vp90-2-03-size-196x200.webm", "vp90-2-03-size-196x202.webm", "vp90-2-03-size-196x208.webm", "vp90-2-03-size-196x210.webm", "vp90-2-03-size-196x224.webm", "vp90-2-03-size-196x226.webm", "vp90-2-03-size-198x196.webm", "vp90-2-03-size-198x198.webm", "vp90-2-03-size-198x200.webm", "vp90-2-03-size-198x202.webm", "vp90-2-03-size-198x208.webm", "vp90-2-03-size-198x210.webm", "vp90-2-03-size-198x224.webm", "vp90-2-03-size-198x226.webm", "vp90-2-03-size-200x196.webm", "vp90-2-03-size-200x198.webm", "vp90-2-03-size-200x200.webm", "vp90-2-03-size-200x202.webm", "vp90-2-03-size-200x208.webm", "vp90-2-03-size-200x210.webm", "vp90-2-03-size-200x224.webm", "vp90-2-03-size-200x226.webm", "vp90-2-03-size-202x196.webm", "vp90-2-03-size-202x198.webm", "vp90-2-03-size-202x200.webm", "vp90-2-03-size-202x202.webm", "vp90-2-03-size-202x208.webm", "vp90-2-03-size-202x210.webm", "vp90-2-03-size-202x224.webm", "vp90-2-03-size-202x226.webm", "vp90-2-03-size-208x196.webm", "vp90-2-03-size-208x198.webm", "vp90-2-03-size-208x200.webm", "vp90-2-03-size-208x202.webm", "vp90-2-03-size-208x208.webm", "vp90-2-03-size-208x210.webm", "vp90-2-03-size-208x224.webm", "vp90-2-03-size-208x226.webm", "vp90-2-03-size-210x196.webm", "vp90-2-03-size-210x198.webm", "vp90-2-03-size-210x200.webm", "vp90-2-03-size-210x202.webm", "vp90-2-03-size-210x208.webm", "vp90-2-03-size-210x210.webm", "vp90-2-03-size-210x224.webm", "vp90-2-03-size-210x226.webm", "vp90-2-03-size-224x196.webm", "vp90-2-03-size-224x198.webm", "vp90-2-03-size-224x200.webm", "vp90-2-03-size-224x202.webm", "vp90-2-03-size-224x208.webm", "vp90-2-03-size-224x210.webm", "vp90-2-03-size-224x224.webm", "vp90-2-03-size-224x226.webm", "vp90-2-03-size-226x196.webm", "vp90-2-03-size-226x198.webm", "vp90-2-03-size-226x200.webm", "vp90-2-03-size-226x202.webm", "vp90-2-03-size-226x208.webm", "vp90-2-03-size-226x210.webm", "vp90-2-03-size-226x224.webm", "vp90-2-03-size-226x226.webm", "vp90-2-03-size-352x288.webm", "vp90-2-03-deltaq.webm", "vp90-2-05-resize.ivf", "vp90-2-06-bilinear.webm", "vp90-2-07-frame_parallel.webm", "vp90-2-08-tile_1x2_frame_parallel.webm", "vp90-2-08-tile_1x2.webm", "vp90-2-08-tile_1x4_frame_parallel.webm", "vp90-2-08-tile_1x4.webm", "vp90-2-08-tile_1x8_frame_parallel.webm", "vp90-2-08-tile_1x8.webm", "vp90-2-08-tile-4x4.webm", "vp90-2-08-tile-4x1.webm", "vp90-2-09-subpixel-00.ivf", "vp90-2-02-size-lf-1920x1080.webm", "vp90-2-09-aq2.webm", "vp90-2-09-lf_deltas.webm", "vp90-2-10-show-existing-frame.webm", "vp90-2-10-show-existing-frame2.webm", "vp90-2-11-size-351x287.webm", "vp90-2-11-size-351x288.webm", "vp90-2-11-size-352x287.webm", "vp90-2-12-droppable_1.ivf", "vp90-2-12-droppable_2.ivf", "vp90-2-12-droppable_3.ivf", #if !CONFIG_SIZE_LIMIT || \ (DECODE_WIDTH_LIMIT >= 20400 && DECODE_HEIGHT_LIMIT >= 120) "vp90-2-13-largescaling.webm", #endif "vp90-2-14-resize-fp-tiles-1-16.webm", "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", "vp90-2-14-resize-fp-tiles-1-2.webm", "vp90-2-14-resize-fp-tiles-1-4.webm", "vp90-2-14-resize-fp-tiles-16-1.webm", "vp90-2-14-resize-fp-tiles-16-2.webm", "vp90-2-14-resize-fp-tiles-16-4.webm", "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", "vp90-2-14-resize-fp-tiles-16-8.webm", "vp90-2-14-resize-fp-tiles-1-8.webm", "vp90-2-14-resize-fp-tiles-2-16.webm", "vp90-2-14-resize-fp-tiles-2-1.webm", "vp90-2-14-resize-fp-tiles-2-4.webm", "vp90-2-14-resize-fp-tiles-2-8.webm", "vp90-2-14-resize-fp-tiles-4-16.webm", "vp90-2-14-resize-fp-tiles-4-1.webm", "vp90-2-14-resize-fp-tiles-4-2.webm", "vp90-2-14-resize-fp-tiles-4-8.webm", "vp90-2-14-resize-fp-tiles-8-16.webm", "vp90-2-14-resize-fp-tiles-8-1.webm", "vp90-2-14-resize-fp-tiles-8-2.webm", "vp90-2-14-resize-fp-tiles-8-4.webm", "vp90-2-14-resize-10frames-fp-tiles-1-2-4-8.webm", "vp90-2-14-resize-10frames-fp-tiles-1-2.webm", "vp90-2-14-resize-10frames-fp-tiles-1-4.webm", "vp90-2-14-resize-10frames-fp-tiles-1-8.webm", "vp90-2-14-resize-10frames-fp-tiles-2-1.webm", "vp90-2-14-resize-10frames-fp-tiles-2-4.webm", "vp90-2-14-resize-10frames-fp-tiles-2-8.webm", "vp90-2-14-resize-10frames-fp-tiles-4-1.webm", "vp90-2-14-resize-10frames-fp-tiles-4-2.webm", "vp90-2-14-resize-10frames-fp-tiles-4-8.webm", "vp90-2-14-resize-10frames-fp-tiles-8-1.webm", "vp90-2-14-resize-10frames-fp-tiles-8-2.webm", "vp90-2-14-resize-10frames-fp-tiles-8-4-2-1.webm", "vp90-2-14-resize-10frames-fp-tiles-8-4.webm", "vp90-2-15-segkey.webm", "vp90-2-15-segkey_adpq.webm", "vp90-2-16-intra-only.webm", "vp90-2-17-show-existing-frame.webm", "vp90-2-18-resize.ivf", "vp90-2-19-skip.webm", "vp90-2-19-skip-01.webm", "vp90-2-19-skip-02.webm", "vp91-2-04-yuv444.webm", "vp91-2-04-yuv422.webm", "vp91-2-04-yuv440.webm", #if CONFIG_VP9_HIGHBITDEPTH "vp92-2-20-10bit-yuv420.webm", "vp92-2-20-12bit-yuv420.webm", "vp93-2-20-10bit-yuv422.webm", "vp93-2-20-12bit-yuv422.webm", "vp93-2-20-10bit-yuv440.webm", "vp93-2-20-12bit-yuv440.webm", "vp93-2-20-10bit-yuv444.webm", "vp93-2-20-12bit-yuv444.webm", #endif // CONFIG_VP9_HIGHBITDEPTH "vp90-2-20-big_superframe-01.webm", "vp90-2-20-big_superframe-02.webm", "vp90-2-22-svc_1280x720_1.webm", RESIZE_TEST_VECTORS }; const char *const kVP9TestVectorsSvc[] = { "vp90-2-22-svc_1280x720_3.ivf" }; const int kNumVP9TestVectors = NELEMENTS(kVP9TestVectors); const int kNumVP9TestVectorsSvc = NELEMENTS(kVP9TestVectorsSvc); const char *const kVP9TestVectorsResize[] = { RESIZE_TEST_VECTORS }; const int kNumVP9TestVectorsResize = NELEMENTS(kVP9TestVectorsResize); #undef RESIZE_TEST_VECTORS #endif // CONFIG_VP9_DECODER } // namespace libvpx_test libvpx-1.8.2/test/test_vectors.h000066400000000000000000000017771357355204000167220ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_TEST_VECTORS_H_ #define VPX_TEST_TEST_VECTORS_H_ #include "./vpx_config.h" namespace libvpx_test { #if CONFIG_VP8_DECODER extern const int kNumVP8TestVectors; extern const char *const kVP8TestVectors[]; #endif #if CONFIG_VP9_DECODER extern const int kNumVP9TestVectors; extern const char *const kVP9TestVectors[]; extern const int kNumVP9TestVectorsSvc; extern const char *const kVP9TestVectorsSvc[]; extern const int kNumVP9TestVectorsResize; extern const char *const kVP9TestVectorsResize[]; #endif // CONFIG_VP9_DECODER } // namespace libvpx_test #endif // VPX_TEST_TEST_VECTORS_H_ libvpx-1.8.2/test/tile_independence_test.cc000066400000000000000000000066051357355204000210240ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/md5_helper.h" #include "vpx_mem/vpx_mem.h" namespace { class TileIndependenceTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: TileIndependenceTest() : EncoderTest(GET_PARAM(0)), md5_fw_order_(), md5_inv_order_(), n_tiles_(GET_PARAM(1)) { init_flags_ = VPX_CODEC_USE_PSNR; vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); cfg.w = 704; cfg.h = 144; cfg.threads = 1; fw_dec_ = codec_->CreateDecoder(cfg, 0); inv_dec_ = codec_->CreateDecoder(cfg, 0); inv_dec_->Control(VP9_INVERT_TILE_DECODE_ORDER, 1); } virtual ~TileIndependenceTest() { delete fw_dec_; delete inv_dec_; } virtual void SetUp() { InitializeConfig(); SetMode(libvpx_test::kTwoPassGood); } virtual void PreEncodeFrameHook(libvpx_test::VideoSource *video, libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP9E_SET_TILE_COLUMNS, n_tiles_); } } void UpdateMD5(::libvpx_test::Decoder *dec, const vpx_codec_cx_pkt_t *pkt, ::libvpx_test::MD5 *md5) { const vpx_codec_err_t res = dec->DecodeFrame( reinterpret_cast(pkt->data.frame.buf), pkt->data.frame.sz); if (res != VPX_CODEC_OK) { abort_ = true; ASSERT_EQ(VPX_CODEC_OK, res); } const vpx_image_t *img = dec->GetDxData().Next(); md5->Add(img); } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { UpdateMD5(fw_dec_, pkt, &md5_fw_order_); UpdateMD5(inv_dec_, pkt, &md5_inv_order_); } ::libvpx_test::MD5 md5_fw_order_, md5_inv_order_; ::libvpx_test::Decoder *fw_dec_, *inv_dec_; private: int n_tiles_; }; // run an encode with 2 or 4 tiles, and do the decode both in normal and // inverted tile ordering. Ensure that the MD5 of the output in both cases // is identical. If so, tiles are considered independent and the test passes. TEST_P(TileIndependenceTest, MD5Match) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 500; cfg_.g_lag_in_frames = 25; cfg_.rc_end_usage = VPX_VBR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 704, 144, timebase.den, timebase.num, 0, 30); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const char *md5_fw_str = md5_fw_order_.Get(); const char *md5_inv_str = md5_inv_order_.Get(); // could use ASSERT_EQ(!memcmp(.., .., 16) here, but this gives nicer // output if it fails. Not sure if it's helpful since it's really just // a MD5... ASSERT_STREQ(md5_fw_str, md5_inv_str); } VP9_INSTANTIATE_TEST_CASE(TileIndependenceTest, ::testing::Range(0, 2, 1)); } // namespace libvpx-1.8.2/test/timestamp_test.cc000066400000000000000000000062601357355204000173660ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/video_source.h" #include "third_party/googletest/src/include/gtest/gtest.h" namespace { const int kVideoSourceWidth = 320; const int kVideoSourceHeight = 240; const int kFramesToEncode = 3; // A video source that exposes functions to set the timebase, framerate and // starting pts. class DummyTimebaseVideoSource : public ::libvpx_test::DummyVideoSource { public: // Parameters num and den set the timebase for the video source. DummyTimebaseVideoSource(int num, int den) : timebase_({ num, den }), framerate_numerator_(30), framerate_denominator_(1), starting_pts_(0) { SetSize(kVideoSourceWidth, kVideoSourceHeight); set_limit(kFramesToEncode); } void SetFramerate(int numerator, int denominator) { framerate_numerator_ = numerator; framerate_denominator_ = denominator; } // Returns one frames duration in timebase units as a double. double FrameDuration() const { return (static_cast(timebase_.den) / timebase_.num) / (static_cast(framerate_numerator_) / framerate_denominator_); } virtual vpx_codec_pts_t pts() const { return static_cast(frame_ * FrameDuration() + starting_pts_ + 0.5); } virtual unsigned long duration() const { return static_cast(FrameDuration() + 0.5); } virtual vpx_rational_t timebase() const { return timebase_; } void set_starting_pts(int64_t starting_pts) { starting_pts_ = starting_pts; } private: vpx_rational_t timebase_; int framerate_numerator_; int framerate_denominator_; int64_t starting_pts_; }; class TimestampTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: TimestampTest() : EncoderTest(GET_PARAM(0)) {} virtual ~TimestampTest() {} virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); } }; // Tests encoding in millisecond timebase. TEST_P(TimestampTest, EncodeFrames) { DummyTimebaseVideoSource video(1, 1000); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(TimestampTest, TestMicrosecondTimebase) { // Set the timebase to microseconds. DummyTimebaseVideoSource video(1, 1000000); video.set_limit(1); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } TEST_P(TimestampTest, TestVpxRollover) { DummyTimebaseVideoSource video(1, 1000); video.set_starting_pts(922337170351ll); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } VP8_INSTANTIATE_TEST_CASE(TimestampTest, ::testing::Values(::libvpx_test::kTwoPassGood)); VP9_INSTANTIATE_TEST_CASE(TimestampTest, ::testing::Values(::libvpx_test::kTwoPassGood)); } // namespace libvpx-1.8.2/test/tools_common.sh000077500000000000000000000333201357355204000170610ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file contains shell code shared by test scripts for libvpx tools. # Use $VPX_TEST_TOOLS_COMMON_SH as a pseudo include guard. if [ -z "${VPX_TEST_TOOLS_COMMON_SH}" ]; then VPX_TEST_TOOLS_COMMON_SH=included set -e devnull='> /dev/null 2>&1' VPX_TEST_PREFIX="" elog() { echo "$@" 1>&2 } vlog() { if [ "${VPX_TEST_VERBOSE_OUTPUT}" = "yes" ]; then echo "$@" fi } # Sets $VPX_TOOL_TEST to the name specified by positional parameter one. test_begin() { VPX_TOOL_TEST="${1}" } # Clears the VPX_TOOL_TEST variable after confirming that $VPX_TOOL_TEST matches # positional parameter one. test_end() { if [ "$1" != "${VPX_TOOL_TEST}" ]; then echo "FAIL completed test mismatch!." echo " completed test: ${1}" echo " active test: ${VPX_TOOL_TEST}." return 1 fi VPX_TOOL_TEST='' } # Echoes the target configuration being tested. test_configuration_target() { vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" # Find the TOOLCHAIN line, split it using ':=' as the field separator, and # print the last field to get the value. Then pipe the value to tr to consume # any leading/trailing spaces while allowing tr to echo the output to stdout. awk -F ':=' '/TOOLCHAIN/ { print $NF }' "${vpx_config_mk}" | tr -d ' ' } # Trap function used for failure reports and tool output directory removal. # When the contents of $VPX_TOOL_TEST do not match the string '', reports # failure of test stored in $VPX_TOOL_TEST. cleanup() { if [ -n "${VPX_TOOL_TEST}" ] && [ "${VPX_TOOL_TEST}" != '' ]; then echo "FAIL: $VPX_TOOL_TEST" fi if [ -n "${VPX_TEST_OUTPUT_DIR}" ] && [ -d "${VPX_TEST_OUTPUT_DIR}" ]; then rm -rf "${VPX_TEST_OUTPUT_DIR}" fi } # Echoes the git hash portion of the VERSION_STRING variable defined in # $LIBVPX_CONFIG_PATH/config.mk to stdout, or the version number string when # no git hash is contained in VERSION_STRING. config_hash() { vpx_config_mk="${LIBVPX_CONFIG_PATH}/config.mk" # Find VERSION_STRING line, split it with "-g" and print the last field to # output the git hash to stdout. vpx_version=$(awk -F -g '/VERSION_STRING/ {print $NF}' "${vpx_config_mk}") # Handle two situations here: # 1. The default case: $vpx_version is a git hash, so echo it unchanged. # 2. When being run a non-dev tree, the -g portion is not present in the # version string: It's only the version number. # In this case $vpx_version is something like 'VERSION_STRING=v1.3.0', so # we echo only what is after the '='. echo "${vpx_version##*=}" } # Echoes the short form of the current git hash. current_hash() { if git --version > /dev/null 2>&1; then (cd "$(dirname "${0}")" git rev-parse --short HEAD) else # Return the config hash if git is unavailable: Fail silently, git hashes # are used only for warnings. config_hash fi } # Echoes warnings to stdout when git hash in vpx_config.h does not match the # current git hash. check_git_hashes() { hash_at_configure_time=$(config_hash) hash_now=$(current_hash) if [ "${hash_at_configure_time}" != "${hash_now}" ]; then echo "Warning: git hash has changed since last configure." fi } # $1 is the name of an environment variable containing a directory name to # test. test_env_var_dir() { local dir=$(eval echo "\${$1}") if [ ! -d "${dir}" ]; then elog "'${dir}': No such directory" elog "The $1 environment variable must be set to a valid directory." return 1 fi } # This script requires that the LIBVPX_BIN_PATH, LIBVPX_CONFIG_PATH, and # LIBVPX_TEST_DATA_PATH variables are in the environment: Confirm that # the variables are set and that they all evaluate to directory paths. verify_vpx_test_environment() { test_env_var_dir "LIBVPX_BIN_PATH" \ && test_env_var_dir "LIBVPX_CONFIG_PATH" \ && test_env_var_dir "LIBVPX_TEST_DATA_PATH" } # Greps vpx_config.h in LIBVPX_CONFIG_PATH for positional parameter one, which # should be a LIBVPX preprocessor flag. Echoes yes to stdout when the feature # is available. vpx_config_option_enabled() { vpx_config_option="${1}" vpx_config_file="${LIBVPX_CONFIG_PATH}/vpx_config.h" config_line=$(grep "${vpx_config_option}" "${vpx_config_file}") if echo "${config_line}" | egrep -q '1$'; then echo yes fi } # Echoes yes when output of test_configuration_target() contains win32 or win64. is_windows_target() { if test_configuration_target \ | grep -q -e win32 -e win64 > /dev/null 2>&1; then echo yes fi } # Echoes path to $1 when it's executable and exists in ${LIBVPX_BIN_PATH}, or an # empty string. Caller is responsible for testing the string once the function # returns. vpx_tool_path() { local tool_name="$1" local tool_path="${LIBVPX_BIN_PATH}/${tool_name}${VPX_TEST_EXE_SUFFIX}" if [ ! -x "${tool_path}" ]; then # Try one directory up: when running via examples.sh the tool could be in # the parent directory of $LIBVPX_BIN_PATH. tool_path="${LIBVPX_BIN_PATH}/../${tool_name}${VPX_TEST_EXE_SUFFIX}" fi if [ ! -x "${tool_path}" ]; then tool_path="" fi echo "${tool_path}" } # Echoes yes to stdout when the file named by positional parameter one exists # in LIBVPX_BIN_PATH, and is executable. vpx_tool_available() { local tool_name="$1" local tool="${LIBVPX_BIN_PATH}/${tool_name}${VPX_TEST_EXE_SUFFIX}" [ -x "${tool}" ] && echo yes } # Echoes yes to stdout when vpx_config_option_enabled() reports yes for # CONFIG_VP8_DECODER. vp8_decode_available() { [ "$(vpx_config_option_enabled CONFIG_VP8_DECODER)" = "yes" ] && echo yes } # Echoes yes to stdout when vpx_config_option_enabled() reports yes for # CONFIG_VP8_ENCODER. vp8_encode_available() { [ "$(vpx_config_option_enabled CONFIG_VP8_ENCODER)" = "yes" ] && echo yes } # Echoes yes to stdout when vpx_config_option_enabled() reports yes for # CONFIG_VP9_DECODER. vp9_decode_available() { [ "$(vpx_config_option_enabled CONFIG_VP9_DECODER)" = "yes" ] && echo yes } # Echoes yes to stdout when vpx_config_option_enabled() reports yes for # CONFIG_VP9_ENCODER. vp9_encode_available() { [ "$(vpx_config_option_enabled CONFIG_VP9_ENCODER)" = "yes" ] && echo yes } # Echoes yes to stdout when vpx_config_option_enabled() reports yes for # CONFIG_WEBM_IO. webm_io_available() { [ "$(vpx_config_option_enabled CONFIG_WEBM_IO)" = "yes" ] && echo yes } # Filters strings from $1 using the filter specified by $2. Filter behavior # depends on the presence of $3. When $3 is present, strings that match the # filter are excluded. When $3 is omitted, strings matching the filter are # included. # The filtered result is echoed to stdout. filter_strings() { strings=${1} filter=${2} exclude=${3} if [ -n "${exclude}" ]; then # When positional parameter three exists the caller wants to remove strings. # Tell grep to invert matches using the -v argument. exclude='-v' else unset exclude fi if [ -n "${filter}" ]; then for s in ${strings}; do if echo "${s}" | egrep -q ${exclude} "${filter}" > /dev/null 2>&1; then filtered_strings="${filtered_strings} ${s}" fi done else filtered_strings="${strings}" fi echo "${filtered_strings}" } # Runs user test functions passed via positional parameters one and two. # Functions in positional parameter one are treated as environment verification # functions and are run unconditionally. Functions in positional parameter two # are run according to the rules specified in vpx_test_usage(). run_tests() { local env_tests="verify_vpx_test_environment $1" local tests_to_filter="$2" local test_name="${VPX_TEST_NAME}" if [ -z "${test_name}" ]; then test_name="$(basename "${0%.*}")" fi if [ "${VPX_TEST_RUN_DISABLED_TESTS}" != "yes" ]; then # Filter out DISABLED tests. tests_to_filter=$(filter_strings "${tests_to_filter}" ^DISABLED exclude) fi if [ -n "${VPX_TEST_FILTER}" ]; then # Remove tests not matching the user's filter. tests_to_filter=$(filter_strings "${tests_to_filter}" ${VPX_TEST_FILTER}) fi # User requested test listing: Dump test names and return. if [ "${VPX_TEST_LIST_TESTS}" = "yes" ]; then for test_name in $tests_to_filter; do echo ${test_name} done return fi # Don't bother with the environment tests if everything else was disabled. [ -z "${tests_to_filter}" ] && return # Combine environment and actual tests. local tests_to_run="${env_tests} ${tests_to_filter}" check_git_hashes # Run tests. for test in ${tests_to_run}; do test_begin "${test}" vlog " RUN ${test}" "${test}" vlog " PASS ${test}" test_end "${test}" done local tested_config="$(test_configuration_target) @ $(current_hash)" echo "${test_name}: Done, all tests pass for ${tested_config}." } vpx_test_usage() { cat << EOF Usage: ${0##*/} [arguments] --bin-path --config-path --filter : User test filter. Only tests matching filter are run. --run-disabled-tests: Run disabled tests. --help: Display this message and exit. --test-data-path --show-program-output: Shows output from all programs being tested. --prefix: Allows for a user specified prefix to be inserted before all test programs. Grants the ability, for example, to run test programs within valgrind. --list-tests: List all test names and exit without actually running tests. --verbose: Verbose output. When the --bin-path option is not specified the script attempts to use \$LIBVPX_BIN_PATH and then the current directory. When the --config-path option is not specified the script attempts to use \$LIBVPX_CONFIG_PATH and then the current directory. When the -test-data-path option is not specified the script attempts to use \$LIBVPX_TEST_DATA_PATH and then the current directory. EOF } # Returns non-zero (failure) when required environment variables are empty # strings. vpx_test_check_environment() { if [ -z "${LIBVPX_BIN_PATH}" ] || \ [ -z "${LIBVPX_CONFIG_PATH}" ] || \ [ -z "${LIBVPX_TEST_DATA_PATH}" ]; then return 1 fi } # Parse the command line. while [ -n "$1" ]; do case "$1" in --bin-path) LIBVPX_BIN_PATH="$2" shift ;; --config-path) LIBVPX_CONFIG_PATH="$2" shift ;; --filter) VPX_TEST_FILTER="$2" shift ;; --run-disabled-tests) VPX_TEST_RUN_DISABLED_TESTS=yes ;; --help) vpx_test_usage exit ;; --test-data-path) LIBVPX_TEST_DATA_PATH="$2" shift ;; --prefix) VPX_TEST_PREFIX="$2" shift ;; --verbose) VPX_TEST_VERBOSE_OUTPUT=yes ;; --show-program-output) devnull= ;; --list-tests) VPX_TEST_LIST_TESTS=yes ;; *) vpx_test_usage exit 1 ;; esac shift done # Handle running the tests from a build directory without arguments when running # the tests on *nix/macosx. LIBVPX_BIN_PATH="${LIBVPX_BIN_PATH:-.}" LIBVPX_CONFIG_PATH="${LIBVPX_CONFIG_PATH:-.}" LIBVPX_TEST_DATA_PATH="${LIBVPX_TEST_DATA_PATH:-.}" # Create a temporary directory for output files, and a trap to clean it up. if [ -n "${TMPDIR}" ]; then VPX_TEST_TEMP_ROOT="${TMPDIR}" elif [ -n "${TEMPDIR}" ]; then VPX_TEST_TEMP_ROOT="${TEMPDIR}" else VPX_TEST_TEMP_ROOT=/tmp fi VPX_TEST_OUTPUT_DIR="${VPX_TEST_TEMP_ROOT}/vpx_test_$$" if ! mkdir -p "${VPX_TEST_OUTPUT_DIR}" || \ [ ! -d "${VPX_TEST_OUTPUT_DIR}" ]; then echo "${0##*/}: Cannot create output directory, giving up." echo "${0##*/}: VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR}" exit 1 fi if [ "$(is_windows_target)" = "yes" ]; then VPX_TEST_EXE_SUFFIX=".exe" fi # Variables shared by tests. VP8_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp80-00-comprehensive-001.ivf" VP9_IVF_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-09-subpixel-00.ivf" VP9_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-00-quantizer-00.webm" VP9_FPM_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-07-frame_parallel-1.webm" VP9_LT_50_FRAMES_WEBM_FILE="${LIBVPX_TEST_DATA_PATH}/vp90-2-02-size-32x08.webm" VP9_RAW_FILE="${LIBVPX_TEST_DATA_PATH}/crbug-1539.rawfile" YUV_RAW_INPUT="${LIBVPX_TEST_DATA_PATH}/hantro_collage_w352h288.yuv" YUV_RAW_INPUT_WIDTH=352 YUV_RAW_INPUT_HEIGHT=288 Y4M_NOSQ_PAR_INPUT="${LIBVPX_TEST_DATA_PATH}/park_joy_90p_8_420_a10-1.y4m" Y4M_720P_INPUT="${LIBVPX_TEST_DATA_PATH}/niklas_1280_720_30.y4m" Y4M_720P_INPUT_WIDTH=1280 Y4M_720P_INPUT_HEIGHT=720 # Setup a trap function to clean up after tests complete. trap cleanup EXIT vlog "$(basename "${0%.*}") test configuration: LIBVPX_BIN_PATH=${LIBVPX_BIN_PATH} LIBVPX_CONFIG_PATH=${LIBVPX_CONFIG_PATH} LIBVPX_TEST_DATA_PATH=${LIBVPX_TEST_DATA_PATH} VP8_IVF_FILE=${VP8_IVF_FILE} VP9_IVF_FILE=${VP9_IVF_FILE} VP9_WEBM_FILE=${VP9_WEBM_FILE} VPX_TEST_EXE_SUFFIX=${VPX_TEST_EXE_SUFFIX} VPX_TEST_FILTER=${VPX_TEST_FILTER} VPX_TEST_LIST_TESTS=${VPX_TEST_LIST_TESTS} VPX_TEST_OUTPUT_DIR=${VPX_TEST_OUTPUT_DIR} VPX_TEST_PREFIX=${VPX_TEST_PREFIX} VPX_TEST_RUN_DISABLED_TESTS=${VPX_TEST_RUN_DISABLED_TESTS} VPX_TEST_SHOW_PROGRAM_OUTPUT=${VPX_TEST_SHOW_PROGRAM_OUTPUT} VPX_TEST_TEMP_ROOT=${VPX_TEST_TEMP_ROOT} VPX_TEST_VERBOSE_OUTPUT=${VPX_TEST_VERBOSE_OUTPUT} YUV_RAW_INPUT=${YUV_RAW_INPUT} YUV_RAW_INPUT_WIDTH=${YUV_RAW_INPUT_WIDTH} YUV_RAW_INPUT_HEIGHT=${YUV_RAW_INPUT_HEIGHT} Y4M_NOSQ_PAR_INPUT=${Y4M_NOSQ_PAR_INPUT}" fi # End $VPX_TEST_TOOLS_COMMON_SH pseudo include guard. libvpx-1.8.2/test/twopass_encoder.sh000077500000000000000000000036651357355204000175610ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx twopass_encoder example. To add new tests to this ## file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to twopass_encoder_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. twopass_encoder_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi } # Runs twopass_encoder using the codec specified by $1 with a frame limit of # 100. twopass_encoder() { local encoder="${LIBVPX_BIN_PATH}/twopass_encoder${VPX_TEST_EXE_SUFFIX}" local codec="$1" local output_file="${VPX_TEST_OUTPUT_DIR}/twopass_encoder_${codec}.ivf" if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${encoder}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${YUV_RAW_INPUT}" "${output_file}" 100 \ ${devnull} [ -e "${output_file}" ] || return 1 } twopass_encoder_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then twopass_encoder vp8 || return 1 fi } twopass_encoder_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then twopass_encoder vp9 || return 1 fi } if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" != "yes" ]; then twopass_encoder_tests="twopass_encoder_vp8 twopass_encoder_vp9" run_tests twopass_encoder_verify_environment "${twopass_encoder_tests}" fi libvpx-1.8.2/test/user_priv_test.cc000066400000000000000000000060541357355204000174020ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/acm_random.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/ivf_video_source.h" #include "test/md5_helper.h" #include "test/util.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif #include "vpx_mem/vpx_mem.h" #include "vpx/vp8.h" namespace { using libvpx_test::ACMRandom; using std::string; #if CONFIG_WEBM_IO void CheckUserPrivateData(void *user_priv, int *target) { // actual pointer value should be the same as expected. EXPECT_EQ(reinterpret_cast(target), user_priv) << "user_priv pointer value does not match."; } // Decodes |filename|. Passes in user_priv data when calling DecodeFrame and // compares the user_priv from return img with the original user_priv to see if // they match. Both the pointer values and the values inside the addresses // should match. string DecodeFile(const string &filename) { ACMRandom rnd(ACMRandom::DeterministicSeed()); libvpx_test::WebMVideoSource video(filename); video.Init(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); libvpx_test::VP9Decoder decoder(cfg, 0); libvpx_test::MD5 md5; int frame_num = 0; for (video.Begin(); !::testing::Test::HasFailure() && video.cxdata(); video.Next()) { void *user_priv = reinterpret_cast(&frame_num); const vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size(), (frame_num == 0) ? NULL : user_priv); if (res != VPX_CODEC_OK) { EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); break; } libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); const vpx_image_t *img = NULL; // Get decompressed data. while ((img = dec_iter.Next())) { if (frame_num == 0) { CheckUserPrivateData(img->user_priv, NULL); } else { CheckUserPrivateData(img->user_priv, &frame_num); // Also test ctrl_get_reference api. struct vp9_ref_frame ref = vp9_ref_frame(); // Randomly fetch a reference frame. ref.idx = rnd.Rand8() % 3; decoder.Control(VP9_GET_REFERENCE, &ref); CheckUserPrivateData(ref.img.user_priv, NULL); } md5.Add(img); } frame_num++; } return string(md5.Get()); } TEST(UserPrivTest, VideoDecode) { // no tiles or frame parallel; this exercises the decoding to test the // user_priv. EXPECT_STREQ("b35a1b707b28e82be025d960aba039bc", DecodeFile("vp90-2-03-size-226x226.webm").c_str()); } #endif // CONFIG_WEBM_IO } // namespace libvpx-1.8.2/test/util.h000066400000000000000000000026401357355204000151410ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_UTIL_H_ #define VPX_TEST_UTIL_H_ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "vpx/vpx_image.h" // Macros #define GET_PARAM(k) std::get(GetParam()) inline double compute_psnr(const vpx_image_t *img1, const vpx_image_t *img2) { assert((img1->fmt == img2->fmt) && (img1->d_w == img2->d_w) && (img1->d_h == img2->d_h)); const unsigned int width_y = img1->d_w; const unsigned int height_y = img1->d_h; unsigned int i, j; int64_t sqrerr = 0; for (i = 0; i < height_y; ++i) { for (j = 0; j < width_y; ++j) { int64_t d = img1->planes[VPX_PLANE_Y][i * img1->stride[VPX_PLANE_Y] + j] - img2->planes[VPX_PLANE_Y][i * img2->stride[VPX_PLANE_Y] + j]; sqrerr += d * d; } } double mse = static_cast(sqrerr) / (width_y * height_y); double psnr = 100.0; if (mse > 0.0) { psnr = 10 * log10(255.0 * 255.0 / mse); } return psnr; } #endif // VPX_TEST_UTIL_H_ libvpx-1.8.2/test/variance_test.cc000066400000000000000000002231051357355204000171520ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/variance.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" namespace { typedef unsigned int (*Get4x4SseFunc)(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride); typedef unsigned int (*SumOfSquaresFunction)(const int16_t *src); using libvpx_test::ACMRandom; // Truncate high bit depth results by downshifting (with rounding) by: // 2 * (bit_depth - 8) for sse // (bit_depth - 8) for se static void RoundHighBitDepth(int bit_depth, int64_t *se, uint64_t *sse) { switch (bit_depth) { case VPX_BITS_12: *sse = (*sse + 128) >> 8; *se = (*se + 8) >> 4; break; case VPX_BITS_10: *sse = (*sse + 8) >> 4; *se = (*se + 2) >> 2; break; case VPX_BITS_8: default: break; } } static unsigned int mb_ss_ref(const int16_t *src) { unsigned int res = 0; for (int i = 0; i < 256; ++i) { res += src[i] * src[i]; } return res; } /* Note: * Our codebase calculates the "diff" value in the variance algorithm by * (src - ref). */ static uint32_t variance_ref(const uint8_t *src, const uint8_t *ref, int l2w, int l2h, int src_stride, int ref_stride, uint32_t *sse_ptr, bool use_high_bit_depth_, vpx_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { int diff; if (!use_high_bit_depth_) { diff = src[y * src_stride + x] - ref[y * ref_stride + x]; se += diff; sse += diff * diff; #if CONFIG_VP9_HIGHBITDEPTH } else { diff = CONVERT_TO_SHORTPTR(src)[y * src_stride + x] - CONVERT_TO_SHORTPTR(ref)[y * ref_stride + x]; se += diff; sse += diff * diff; #endif // CONFIG_VP9_HIGHBITDEPTH } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast( sse - ((static_cast(se) * se) >> (l2w + l2h))); } /* The subpel reference functions differ from the codec version in one aspect: * they calculate the bilinear factors directly instead of using a lookup table * and therefore upshift xoff and yoff by 1. Only every other calculated value * is used so the codec version shrinks the table to save space and maintain * compatibility with vp8. */ static uint32_t subpel_variance_ref(const uint8_t *ref, const uint8_t *src, int l2w, int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth_, vpx_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // Bilinear interpolation at a 16th pel step. if (!use_high_bit_depth_) { const int a1 = ref[(w + 1) * (y + 0) + x + 0]; const int a2 = ref[(w + 1) * (y + 0) + x + 1]; const int b1 = ref[(w + 1) * (y + 1) + x + 0]; const int b2 = ref[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = r - src[w * y + x]; se += diff; sse += diff * diff; #if CONFIG_VP9_HIGHBITDEPTH } else { uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = r - src16[w * y + x]; se += diff; sse += diff * diff; #endif // CONFIG_VP9_HIGHBITDEPTH } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast( sse - ((static_cast(se) * se) >> (l2w + l2h))); } static uint32_t subpel_avg_variance_ref(const uint8_t *ref, const uint8_t *src, const uint8_t *second_pred, int l2w, int l2h, int xoff, int yoff, uint32_t *sse_ptr, bool use_high_bit_depth, vpx_bit_depth_t bit_depth) { int64_t se = 0; uint64_t sse = 0; const int w = 1 << l2w; const int h = 1 << l2h; xoff <<= 1; yoff <<= 1; for (int y = 0; y < h; y++) { for (int x = 0; x < w; x++) { // bilinear interpolation at a 16th pel step if (!use_high_bit_depth) { const int a1 = ref[(w + 1) * (y + 0) + x + 0]; const int a2 = ref[(w + 1) * (y + 0) + x + 1]; const int b1 = ref[(w + 1) * (y + 1) + x + 0]; const int b2 = ref[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ((r + second_pred[w * y + x] + 1) >> 1) - src[w * y + x]; se += diff; sse += diff * diff; #if CONFIG_VP9_HIGHBITDEPTH } else { const uint16_t *ref16 = CONVERT_TO_SHORTPTR(ref); const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *sec16 = CONVERT_TO_SHORTPTR(second_pred); const int a1 = ref16[(w + 1) * (y + 0) + x + 0]; const int a2 = ref16[(w + 1) * (y + 0) + x + 1]; const int b1 = ref16[(w + 1) * (y + 1) + x + 0]; const int b2 = ref16[(w + 1) * (y + 1) + x + 1]; const int a = a1 + (((a2 - a1) * xoff + 8) >> 4); const int b = b1 + (((b2 - b1) * xoff + 8) >> 4); const int r = a + (((b - a) * yoff + 8) >> 4); const int diff = ((r + sec16[w * y + x] + 1) >> 1) - src16[w * y + x]; se += diff; sse += diff * diff; #endif // CONFIG_VP9_HIGHBITDEPTH } } } RoundHighBitDepth(bit_depth, &se, &sse); *sse_ptr = static_cast(sse); return static_cast( sse - ((static_cast(se) * se) >> (l2w + l2h))); } //////////////////////////////////////////////////////////////////////////////// class SumOfSquaresTest : public ::testing::TestWithParam { public: SumOfSquaresTest() : func_(GetParam()) {} virtual ~SumOfSquaresTest() { libvpx_test::ClearSystemState(); } protected: void ConstTest(); void RefTest(); SumOfSquaresFunction func_; ACMRandom rnd_; }; void SumOfSquaresTest::ConstTest() { int16_t mem[256]; unsigned int res; for (int v = 0; v < 256; ++v) { for (int i = 0; i < 256; ++i) { mem[i] = v; } ASM_REGISTER_STATE_CHECK(res = func_(mem)); EXPECT_EQ(256u * (v * v), res); } } void SumOfSquaresTest::RefTest() { int16_t mem[256]; for (int i = 0; i < 100; ++i) { for (int j = 0; j < 256; ++j) { mem[j] = rnd_.Rand8() - rnd_.Rand8(); } const unsigned int expected = mb_ss_ref(mem); unsigned int res; ASM_REGISTER_STATE_CHECK(res = func_(mem)); EXPECT_EQ(expected, res); } } //////////////////////////////////////////////////////////////////////////////// // Encapsulating struct to store the function to test along with // some testing context. // Can be used for MSE, SSE, Variance, etc. template struct TestParams { TestParams(int log2w = 0, int log2h = 0, Func function = NULL, int bit_depth_value = 0) : log2width(log2w), log2height(log2h), func(function) { use_high_bit_depth = (bit_depth_value > 0); if (use_high_bit_depth) { bit_depth = static_cast(bit_depth_value); } else { bit_depth = VPX_BITS_8; } width = 1 << log2width; height = 1 << log2height; block_size = width * height; mask = (1u << bit_depth) - 1; } int log2width, log2height; int width, height; int block_size; Func func; vpx_bit_depth_t bit_depth; bool use_high_bit_depth; uint32_t mask; }; template std::ostream &operator<<(std::ostream &os, const TestParams &p) { return os << "log2width/height:" << p.log2width << "/" << p.log2height << " function:" << reinterpret_cast(p.func) << " bit-depth:" << p.bit_depth; } // Main class for testing a function type template class MainTestClass : public ::testing::TestWithParam > { public: virtual void SetUp() { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); const size_t unit = use_high_bit_depth() ? sizeof(uint16_t) : sizeof(uint8_t); src_ = reinterpret_cast(vpx_memalign(16, block_size() * unit)); ref_ = new uint8_t[block_size() * unit]; ASSERT_TRUE(src_ != NULL); ASSERT_TRUE(ref_ != NULL); #if CONFIG_VP9_HIGHBITDEPTH if (use_high_bit_depth()) { // TODO(skal): remove! src_ = CONVERT_TO_BYTEPTR(src_); ref_ = CONVERT_TO_BYTEPTR(ref_); } #endif } virtual void TearDown() { #if CONFIG_VP9_HIGHBITDEPTH if (use_high_bit_depth()) { // TODO(skal): remove! src_ = reinterpret_cast(CONVERT_TO_SHORTPTR(src_)); ref_ = reinterpret_cast(CONVERT_TO_SHORTPTR(ref_)); } #endif vpx_free(src_); delete[] ref_; src_ = NULL; ref_ = NULL; libvpx_test::ClearSystemState(); } protected: // We could sub-class MainTestClass into dedicated class for Variance // and MSE/SSE, but it involves a lot of 'this->xxx' dereferencing // to access top class fields xxx. That's cumbersome, so for now we'll just // implement the testing methods here: // Variance tests void ZeroTest(); void RefTest(); void RefStrideTest(); void OneQuarterTest(); void SpeedTest(); // MSE/SSE tests void RefTestMse(); void RefTestSse(); void MaxTestMse(); void MaxTestSse(); protected: ACMRandom rnd_; uint8_t *src_; uint8_t *ref_; TestParams params_; // some relay helpers bool use_high_bit_depth() const { return params_.use_high_bit_depth; } int byte_shift() const { return params_.bit_depth - 8; } int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } uint32_t mask() const { return params_.mask; } }; //////////////////////////////////////////////////////////////////////////////// // Tests related to variance. template void MainTestClass::ZeroTest() { for (int i = 0; i <= 255; ++i) { if (!use_high_bit_depth()) { memset(src_, i, block_size()); } else { uint16_t *const src16 = CONVERT_TO_SHORTPTR(src_); for (int k = 0; k < block_size(); ++k) src16[k] = i << byte_shift(); } for (int j = 0; j <= 255; ++j) { if (!use_high_bit_depth()) { memset(ref_, j, block_size()); } else { uint16_t *const ref16 = CONVERT_TO_SHORTPTR(ref_); for (int k = 0; k < block_size(); ++k) ref16[k] = j << byte_shift(); } unsigned int sse, var; ASM_REGISTER_STATE_CHECK( var = params_.func(src_, width(), ref_, width(), &sse)); EXPECT_EQ(0u, var) << "src values: " << i << " ref values: " << j; } } } template void MainTestClass::RefTest() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); j++) { if (!use_high_bit_depth()) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); #endif // CONFIG_VP9_HIGHBITDEPTH } } unsigned int sse1, sse2, var1, var2; const int stride = width(); ASM_REGISTER_STATE_CHECK( var1 = params_.func(src_, stride, ref_, stride, &sse1)); var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; EXPECT_EQ(var1, var2) << "Error at test index: " << i; } } template void MainTestClass::RefStrideTest() { for (int i = 0; i < 10; ++i) { const int ref_stride = (i & 1) * width(); const int src_stride = ((i >> 1) & 1) * width(); for (int j = 0; j < block_size(); j++) { const int ref_ind = (j / width()) * ref_stride + j % width(); const int src_ind = (j / width()) * src_stride + j % width(); if (!use_high_bit_depth()) { src_[src_ind] = rnd_.Rand8(); ref_[ref_ind] = rnd_.Rand8(); #if CONFIG_VP9_HIGHBITDEPTH } else { CONVERT_TO_SHORTPTR(src_)[src_ind] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(ref_)[ref_ind] = rnd_.Rand16() & mask(); #endif // CONFIG_VP9_HIGHBITDEPTH } } unsigned int sse1, sse2; unsigned int var1, var2; ASM_REGISTER_STATE_CHECK( var1 = params_.func(src_, src_stride, ref_, ref_stride, &sse1)); var2 = variance_ref(src_, ref_, params_.log2width, params_.log2height, src_stride, ref_stride, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "Error at test index: " << i; EXPECT_EQ(var1, var2) << "Error at test index: " << i; } } template void MainTestClass::OneQuarterTest() { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(src_, 255, block_size()); memset(ref_, 255, half); memset(ref_ + half, 0, half); #if CONFIG_VP9_HIGHBITDEPTH } else { vpx_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); #endif // CONFIG_VP9_HIGHBITDEPTH } unsigned int sse, var, expected; ASM_REGISTER_STATE_CHECK( var = params_.func(src_, width(), ref_, width(), &sse)); expected = block_size() * 255 * 255 / 4; EXPECT_EQ(expected, var); } template void MainTestClass::SpeedTest() { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(src_, 255, block_size()); memset(ref_, 255, half); memset(ref_ + half, 0, half); #if CONFIG_VP9_HIGHBITDEPTH } else { vpx_memset16(CONVERT_TO_SHORTPTR(src_), 255 << byte_shift(), block_size()); vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 255 << byte_shift(), half); vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, 0, half); #endif // CONFIG_VP9_HIGHBITDEPTH } unsigned int sse; vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int i = 0; i < (1 << 30) / block_size(); ++i) { const uint32_t variance = params_.func(src_, width(), ref_, width(), &sse); // Ignore return value. (void)variance; } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf("Variance %dx%d time: %5d ms\n", width(), height(), elapsed_time / 1000); } //////////////////////////////////////////////////////////////////////////////// // Tests related to MSE / SSE. template void MainTestClass::RefTestMse() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); ++j) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } unsigned int sse1, sse2; const int stride = width(); ASM_REGISTER_STATE_CHECK(params_.func(src_, stride, ref_, stride, &sse1)); variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, false, VPX_BITS_8); EXPECT_EQ(sse1, sse2); } } template void MainTestClass::RefTestSse() { for (int i = 0; i < 10; ++i) { for (int j = 0; j < block_size(); ++j) { src_[j] = rnd_.Rand8(); ref_[j] = rnd_.Rand8(); } unsigned int sse2; unsigned int var1; const int stride = width(); ASM_REGISTER_STATE_CHECK(var1 = params_.func(src_, stride, ref_, stride)); variance_ref(src_, ref_, params_.log2width, params_.log2height, stride, stride, &sse2, false, VPX_BITS_8); EXPECT_EQ(var1, sse2); } } template void MainTestClass::MaxTestMse() { memset(src_, 255, block_size()); memset(ref_, 0, block_size()); unsigned int sse; ASM_REGISTER_STATE_CHECK(params_.func(src_, width(), ref_, width(), &sse)); const unsigned int expected = block_size() * 255 * 255; EXPECT_EQ(expected, sse); } template void MainTestClass::MaxTestSse() { memset(src_, 255, block_size()); memset(ref_, 0, block_size()); unsigned int var; ASM_REGISTER_STATE_CHECK(var = params_.func(src_, width(), ref_, width())); const unsigned int expected = block_size() * 255 * 255; EXPECT_EQ(expected, var); } //////////////////////////////////////////////////////////////////////////////// template class SubpelVarianceTest : public ::testing::TestWithParam > { public: virtual void SetUp() { params_ = this->GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); if (!use_high_bit_depth()) { src_ = reinterpret_cast(vpx_memalign(16, block_size())); sec_ = reinterpret_cast(vpx_memalign(16, block_size())); ref_ = reinterpret_cast( vpx_malloc(block_size() + width() + height() + 1)); #if CONFIG_VP9_HIGHBITDEPTH } else { src_ = CONVERT_TO_BYTEPTR(reinterpret_cast( vpx_memalign(16, block_size() * sizeof(uint16_t)))); sec_ = CONVERT_TO_BYTEPTR(reinterpret_cast( vpx_memalign(16, block_size() * sizeof(uint16_t)))); ref_ = CONVERT_TO_BYTEPTR(reinterpret_cast(vpx_malloc( (block_size() + width() + height() + 1) * sizeof(uint16_t)))); #endif // CONFIG_VP9_HIGHBITDEPTH } ASSERT_TRUE(src_ != NULL); ASSERT_TRUE(sec_ != NULL); ASSERT_TRUE(ref_ != NULL); } virtual void TearDown() { if (!use_high_bit_depth()) { vpx_free(src_); vpx_free(sec_); vpx_free(ref_); #if CONFIG_VP9_HIGHBITDEPTH } else { vpx_free(CONVERT_TO_SHORTPTR(src_)); vpx_free(CONVERT_TO_SHORTPTR(ref_)); vpx_free(CONVERT_TO_SHORTPTR(sec_)); #endif // CONFIG_VP9_HIGHBITDEPTH } libvpx_test::ClearSystemState(); } protected: void RefTest(); void ExtremeRefTest(); ACMRandom rnd_; uint8_t *src_; uint8_t *ref_; uint8_t *sec_; TestParams params_; // some relay helpers bool use_high_bit_depth() const { return params_.use_high_bit_depth; } int byte_shift() const { return params_.bit_depth - 8; } int block_size() const { return params_.block_size; } int width() const { return params_.width; } int height() const { return params_.height; } uint32_t mask() const { return params_.mask; } }; template void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } #if CONFIG_VP9_HIGHBITDEPTH } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } #endif // CONFIG_VP9_HIGHBITDEPTH } unsigned int sse1, sse2; unsigned int var1; ASM_REGISTER_STATE_CHECK( var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); const unsigned int var2 = subpel_variance_ref( ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; } } } template void SubpelVarianceTest::ExtremeRefTest() { // Compare against reference. // Src: Set the first half of values to 0, the second half to the maximum. // Ref: Set the first half of values to the maximum, the second half to 0. for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { const int half = block_size() / 2; if (!use_high_bit_depth()) { memset(src_, 0, half); memset(src_ + half, 255, half); memset(ref_, 255, half); memset(ref_ + half, 0, half + width() + height() + 1); #if CONFIG_VP9_HIGHBITDEPTH } else { vpx_memset16(CONVERT_TO_SHORTPTR(src_), mask(), half); vpx_memset16(CONVERT_TO_SHORTPTR(src_) + half, 0, half); vpx_memset16(CONVERT_TO_SHORTPTR(ref_), 0, half); vpx_memset16(CONVERT_TO_SHORTPTR(ref_) + half, mask(), half + width() + height() + 1); #endif // CONFIG_VP9_HIGHBITDEPTH } unsigned int sse1, sse2; unsigned int var1; ASM_REGISTER_STATE_CHECK( var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1)); const unsigned int var2 = subpel_variance_ref( ref_, src_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "for xoffset " << x << " and yoffset " << y; EXPECT_EQ(var1, var2) << "for xoffset " << x << " and yoffset " << y; } } } template <> void SubpelVarianceTest::RefTest() { for (int x = 0; x < 8; ++x) { for (int y = 0; y < 8; ++y) { if (!use_high_bit_depth()) { for (int j = 0; j < block_size(); j++) { src_[j] = rnd_.Rand8(); sec_[j] = rnd_.Rand8(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { ref_[j] = rnd_.Rand8(); } #if CONFIG_VP9_HIGHBITDEPTH } else { for (int j = 0; j < block_size(); j++) { CONVERT_TO_SHORTPTR(src_)[j] = rnd_.Rand16() & mask(); CONVERT_TO_SHORTPTR(sec_)[j] = rnd_.Rand16() & mask(); } for (int j = 0; j < block_size() + width() + height() + 1; j++) { CONVERT_TO_SHORTPTR(ref_)[j] = rnd_.Rand16() & mask(); } #endif // CONFIG_VP9_HIGHBITDEPTH } uint32_t sse1, sse2; uint32_t var1, var2; ASM_REGISTER_STATE_CHECK(var1 = params_.func(ref_, width() + 1, x, y, src_, width(), &sse1, sec_)); var2 = subpel_avg_variance_ref(ref_, src_, sec_, params_.log2width, params_.log2height, x, y, &sse2, use_high_bit_depth(), params_.bit_depth); EXPECT_EQ(sse1, sse2) << "at position " << x << ", " << y; EXPECT_EQ(var1, var2) << "at position " << x << ", " << y; } } } typedef MainTestClass VpxSseTest; typedef MainTestClass VpxMseTest; typedef MainTestClass VpxVarianceTest; typedef SubpelVarianceTest VpxSubpelVarianceTest; typedef SubpelVarianceTest VpxSubpelAvgVarianceTest; TEST_P(VpxSseTest, RefSse) { RefTestSse(); } TEST_P(VpxSseTest, MaxSse) { MaxTestSse(); } TEST_P(VpxMseTest, RefMse) { RefTestMse(); } TEST_P(VpxMseTest, MaxMse) { MaxTestMse(); } TEST_P(VpxVarianceTest, Zero) { ZeroTest(); } TEST_P(VpxVarianceTest, Ref) { RefTest(); } TEST_P(VpxVarianceTest, RefStride) { RefStrideTest(); } TEST_P(VpxVarianceTest, OneQuarter) { OneQuarterTest(); } TEST_P(VpxVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(SumOfSquaresTest, Const) { ConstTest(); } TEST_P(SumOfSquaresTest, Ref) { RefTest(); } TEST_P(VpxSubpelVarianceTest, Ref) { RefTest(); } TEST_P(VpxSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(VpxSubpelAvgVarianceTest, Ref) { RefTest(); } INSTANTIATE_TEST_CASE_P(C, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_c)); typedef TestParams SseParams; INSTANTIATE_TEST_CASE_P(C, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_c))); typedef TestParams MseParams; INSTANTIATE_TEST_CASE_P(C, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_c), MseParams(4, 3, &vpx_mse16x8_c), MseParams(3, 4, &vpx_mse8x16_c), MseParams(3, 3, &vpx_mse8x8_c))); typedef TestParams VarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_c), VarianceParams(6, 5, &vpx_variance64x32_c), VarianceParams(5, 6, &vpx_variance32x64_c), VarianceParams(5, 5, &vpx_variance32x32_c), VarianceParams(5, 4, &vpx_variance32x16_c), VarianceParams(4, 5, &vpx_variance16x32_c), VarianceParams(4, 4, &vpx_variance16x16_c), VarianceParams(4, 3, &vpx_variance16x8_c), VarianceParams(3, 4, &vpx_variance8x16_c), VarianceParams(3, 3, &vpx_variance8x8_c), VarianceParams(3, 2, &vpx_variance8x4_c), VarianceParams(2, 3, &vpx_variance4x8_c), VarianceParams(2, 2, &vpx_variance4x4_c))); typedef TestParams SubpelVarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_c, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_c, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_c, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_c, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_c, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_c, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_c, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_c, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_c, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_c, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_c, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_c, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_c, 0))); typedef TestParams SubpelAvgVarianceParams; INSTANTIATE_TEST_CASE_P( C, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_c, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_c, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_c, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_c, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_c, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_c, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_c, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_c, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_c, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_c, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_c, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_c, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_c, 0))); #if CONFIG_VP9_HIGHBITDEPTH typedef MainTestClass VpxHBDMseTest; typedef MainTestClass VpxHBDVarianceTest; typedef SubpelVarianceTest VpxHBDSubpelVarianceTest; typedef SubpelVarianceTest VpxHBDSubpelAvgVarianceTest; TEST_P(VpxHBDMseTest, RefMse) { RefTestMse(); } TEST_P(VpxHBDMseTest, MaxMse) { MaxTestMse(); } TEST_P(VpxHBDVarianceTest, Zero) { ZeroTest(); } TEST_P(VpxHBDVarianceTest, Ref) { RefTest(); } TEST_P(VpxHBDVarianceTest, RefStride) { RefStrideTest(); } TEST_P(VpxHBDVarianceTest, OneQuarter) { OneQuarterTest(); } TEST_P(VpxHBDVarianceTest, DISABLED_Speed) { SpeedTest(); } TEST_P(VpxHBDSubpelVarianceTest, Ref) { RefTest(); } TEST_P(VpxHBDSubpelVarianceTest, ExtremeRef) { ExtremeRefTest(); } TEST_P(VpxHBDSubpelAvgVarianceTest, Ref) { RefTest(); } /* TODO(debargha): This test does not support the highbd version INSTANTIATE_TEST_CASE_P( C, VpxHBDMseTest, ::testing::Values(MseParams(4, 4, &vpx_highbd_12_mse16x16_c), MseParams(4, 4, &vpx_highbd_12_mse16x8_c), MseParams(4, 4, &vpx_highbd_12_mse8x16_c), MseParams(4, 4, &vpx_highbd_12_mse8x8_c), MseParams(4, 4, &vpx_highbd_10_mse16x16_c), MseParams(4, 4, &vpx_highbd_10_mse16x8_c), MseParams(4, 4, &vpx_highbd_10_mse8x16_c), MseParams(4, 4, &vpx_highbd_10_mse8x8_c), MseParams(4, 4, &vpx_highbd_8_mse16x16_c), MseParams(4, 4, &vpx_highbd_8_mse16x8_c), MseParams(4, 4, &vpx_highbd_8_mse8x16_c), MseParams(4, 4, &vpx_highbd_8_mse8x8_c))); */ INSTANTIATE_TEST_CASE_P( C, VpxHBDVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_highbd_12_variance64x64_c, 12), VarianceParams(6, 5, &vpx_highbd_12_variance64x32_c, 12), VarianceParams(5, 6, &vpx_highbd_12_variance32x64_c, 12), VarianceParams(5, 5, &vpx_highbd_12_variance32x32_c, 12), VarianceParams(5, 4, &vpx_highbd_12_variance32x16_c, 12), VarianceParams(4, 5, &vpx_highbd_12_variance16x32_c, 12), VarianceParams(4, 4, &vpx_highbd_12_variance16x16_c, 12), VarianceParams(4, 3, &vpx_highbd_12_variance16x8_c, 12), VarianceParams(3, 4, &vpx_highbd_12_variance8x16_c, 12), VarianceParams(3, 3, &vpx_highbd_12_variance8x8_c, 12), VarianceParams(3, 2, &vpx_highbd_12_variance8x4_c, 12), VarianceParams(2, 3, &vpx_highbd_12_variance4x8_c, 12), VarianceParams(2, 2, &vpx_highbd_12_variance4x4_c, 12), VarianceParams(6, 6, &vpx_highbd_10_variance64x64_c, 10), VarianceParams(6, 5, &vpx_highbd_10_variance64x32_c, 10), VarianceParams(5, 6, &vpx_highbd_10_variance32x64_c, 10), VarianceParams(5, 5, &vpx_highbd_10_variance32x32_c, 10), VarianceParams(5, 4, &vpx_highbd_10_variance32x16_c, 10), VarianceParams(4, 5, &vpx_highbd_10_variance16x32_c, 10), VarianceParams(4, 4, &vpx_highbd_10_variance16x16_c, 10), VarianceParams(4, 3, &vpx_highbd_10_variance16x8_c, 10), VarianceParams(3, 4, &vpx_highbd_10_variance8x16_c, 10), VarianceParams(3, 3, &vpx_highbd_10_variance8x8_c, 10), VarianceParams(3, 2, &vpx_highbd_10_variance8x4_c, 10), VarianceParams(2, 3, &vpx_highbd_10_variance4x8_c, 10), VarianceParams(2, 2, &vpx_highbd_10_variance4x4_c, 10), VarianceParams(6, 6, &vpx_highbd_8_variance64x64_c, 8), VarianceParams(6, 5, &vpx_highbd_8_variance64x32_c, 8), VarianceParams(5, 6, &vpx_highbd_8_variance32x64_c, 8), VarianceParams(5, 5, &vpx_highbd_8_variance32x32_c, 8), VarianceParams(5, 4, &vpx_highbd_8_variance32x16_c, 8), VarianceParams(4, 5, &vpx_highbd_8_variance16x32_c, 8), VarianceParams(4, 4, &vpx_highbd_8_variance16x16_c, 8), VarianceParams(4, 3, &vpx_highbd_8_variance16x8_c, 8), VarianceParams(3, 4, &vpx_highbd_8_variance8x16_c, 8), VarianceParams(3, 3, &vpx_highbd_8_variance8x8_c, 8), VarianceParams(3, 2, &vpx_highbd_8_variance8x4_c, 8), VarianceParams(2, 3, &vpx_highbd_8_variance4x8_c, 8), VarianceParams(2, 2, &vpx_highbd_8_variance4x4_c, 8))); INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_c, 8), SubpelVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_c, 8), SubpelVarianceParams(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_c, 8), SubpelVarianceParams(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_c, 8), SubpelVarianceParams(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_c, 8), SubpelVarianceParams(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_c, 8), SubpelVarianceParams(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_c, 8), SubpelVarianceParams(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_c, 8), SubpelVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_c, 8), SubpelVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_c, 8), SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_c, 8), SubpelVarianceParams(2, 3, &vpx_highbd_8_sub_pixel_variance4x8_c, 8), SubpelVarianceParams(2, 2, &vpx_highbd_8_sub_pixel_variance4x4_c, 8), SubpelVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_c, 10), SubpelVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_c, 10), SubpelVarianceParams(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_c, 10), SubpelVarianceParams(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_c, 10), SubpelVarianceParams(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_c, 10), SubpelVarianceParams(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_c, 10), SubpelVarianceParams(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_c, 10), SubpelVarianceParams(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_c, 10), SubpelVarianceParams(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_c, 10), SubpelVarianceParams(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_c, 10), SubpelVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_c, 10), SubpelVarianceParams(2, 3, &vpx_highbd_10_sub_pixel_variance4x8_c, 10), SubpelVarianceParams(2, 2, &vpx_highbd_10_sub_pixel_variance4x4_c, 10), SubpelVarianceParams(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_c, 12), SubpelVarianceParams(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_c, 12), SubpelVarianceParams(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_c, 12), SubpelVarianceParams(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_c, 12), SubpelVarianceParams(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_c, 12), SubpelVarianceParams(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_c, 12), SubpelVarianceParams(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_c, 12), SubpelVarianceParams(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_c, 12), SubpelVarianceParams(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_c, 12), SubpelVarianceParams(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_c, 12), SubpelVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_c, 12), SubpelVarianceParams(2, 3, &vpx_highbd_12_sub_pixel_variance4x8_c, 12), SubpelVarianceParams(2, 2, &vpx_highbd_12_sub_pixel_variance4x4_c, 12))); INSTANTIATE_TEST_CASE_P( C, VpxHBDSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_c, 8), SubpelAvgVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_c, 8), SubpelAvgVarianceParams(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_c, 8), SubpelAvgVarianceParams(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_c, 8), SubpelAvgVarianceParams(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_c, 8), SubpelAvgVarianceParams(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_c, 8), SubpelAvgVarianceParams(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_c, 8), SubpelAvgVarianceParams(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_c, 8), SubpelAvgVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_c, 8), SubpelAvgVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_c, 8), SubpelAvgVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_c, 8), SubpelAvgVarianceParams(2, 3, &vpx_highbd_8_sub_pixel_avg_variance4x8_c, 8), SubpelAvgVarianceParams(2, 2, &vpx_highbd_8_sub_pixel_avg_variance4x4_c, 8), SubpelAvgVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_c, 10), SubpelAvgVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_c, 10), SubpelAvgVarianceParams(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_c, 10), SubpelAvgVarianceParams(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_c, 10), SubpelAvgVarianceParams(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_c, 10), SubpelAvgVarianceParams(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_c, 10), SubpelAvgVarianceParams(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_c, 10), SubpelAvgVarianceParams(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_c, 10), SubpelAvgVarianceParams(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_c, 10), SubpelAvgVarianceParams(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_c, 10), SubpelAvgVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_c, 10), SubpelAvgVarianceParams(2, 3, &vpx_highbd_10_sub_pixel_avg_variance4x8_c, 10), SubpelAvgVarianceParams(2, 2, &vpx_highbd_10_sub_pixel_avg_variance4x4_c, 10), SubpelAvgVarianceParams(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_c, 12), SubpelAvgVarianceParams(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_c, 12), SubpelAvgVarianceParams(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_c, 12), SubpelAvgVarianceParams(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_c, 12), SubpelAvgVarianceParams(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_c, 12), SubpelAvgVarianceParams(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_c, 12), SubpelAvgVarianceParams(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_c, 12), SubpelAvgVarianceParams(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_c, 12), SubpelAvgVarianceParams(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_c, 12), SubpelAvgVarianceParams(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_c, 12), SubpelAvgVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_c, 12), SubpelAvgVarianceParams(2, 3, &vpx_highbd_12_sub_pixel_avg_variance4x8_c, 12), SubpelAvgVarianceParams(2, 2, &vpx_highbd_12_sub_pixel_avg_variance4x4_c, 12))); #endif // CONFIG_VP9_HIGHBITDEPTH #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_sse2)); INSTANTIATE_TEST_CASE_P(SSE2, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_sse2), MseParams(4, 3, &vpx_mse16x8_sse2), MseParams(3, 4, &vpx_mse8x16_sse2), MseParams(3, 3, &vpx_mse8x8_sse2))); INSTANTIATE_TEST_CASE_P( SSE2, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_sse2), VarianceParams(6, 5, &vpx_variance64x32_sse2), VarianceParams(5, 6, &vpx_variance32x64_sse2), VarianceParams(5, 5, &vpx_variance32x32_sse2), VarianceParams(5, 4, &vpx_variance32x16_sse2), VarianceParams(4, 5, &vpx_variance16x32_sse2), VarianceParams(4, 4, &vpx_variance16x16_sse2), VarianceParams(4, 3, &vpx_variance16x8_sse2), VarianceParams(3, 4, &vpx_variance8x16_sse2), VarianceParams(3, 3, &vpx_variance8x8_sse2), VarianceParams(3, 2, &vpx_variance8x4_sse2), VarianceParams(2, 3, &vpx_variance4x8_sse2), VarianceParams(2, 2, &vpx_variance4x4_sse2))); INSTANTIATE_TEST_CASE_P( SSE2, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_sse2, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_sse2, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_sse2, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_sse2, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_sse2, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_sse2, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_sse2, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_sse2, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_sse2, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_sse2, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_sse2, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_sse2, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_sse2, 0))); INSTANTIATE_TEST_CASE_P( SSE2, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_sse2, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_sse2, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_sse2, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_sse2, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_sse2, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_sse2, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_sse2, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_sse2, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_sse2, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_sse2, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_sse2, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_sse2, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_sse2, 0))); #if CONFIG_VP9_HIGHBITDEPTH /* TODO(debargha): This test does not support the highbd version INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDMseTest, ::testing::Values(MseParams(4, 4, &vpx_highbd_12_mse16x16_sse2), MseParams(4, 3, &vpx_highbd_12_mse16x8_sse2), MseParams(3, 4, &vpx_highbd_12_mse8x16_sse2), MseParams(3, 3, &vpx_highbd_12_mse8x8_sse2), MseParams(4, 4, &vpx_highbd_10_mse16x16_sse2), MseParams(4, 3, &vpx_highbd_10_mse16x8_sse2), MseParams(3, 4, &vpx_highbd_10_mse8x16_sse2), MseParams(3, 3, &vpx_highbd_10_mse8x8_sse2), MseParams(4, 4, &vpx_highbd_8_mse16x16_sse2), MseParams(4, 3, &vpx_highbd_8_mse16x8_sse2), MseParams(3, 4, &vpx_highbd_8_mse8x16_sse2), MseParams(3, 3, &vpx_highbd_8_mse8x8_sse2))); */ INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDVarianceTest, ::testing::Values( VarianceParams(6, 6, &vpx_highbd_12_variance64x64_sse2, 12), VarianceParams(6, 5, &vpx_highbd_12_variance64x32_sse2, 12), VarianceParams(5, 6, &vpx_highbd_12_variance32x64_sse2, 12), VarianceParams(5, 5, &vpx_highbd_12_variance32x32_sse2, 12), VarianceParams(5, 4, &vpx_highbd_12_variance32x16_sse2, 12), VarianceParams(4, 5, &vpx_highbd_12_variance16x32_sse2, 12), VarianceParams(4, 4, &vpx_highbd_12_variance16x16_sse2, 12), VarianceParams(4, 3, &vpx_highbd_12_variance16x8_sse2, 12), VarianceParams(3, 4, &vpx_highbd_12_variance8x16_sse2, 12), VarianceParams(3, 3, &vpx_highbd_12_variance8x8_sse2, 12), VarianceParams(6, 6, &vpx_highbd_10_variance64x64_sse2, 10), VarianceParams(6, 5, &vpx_highbd_10_variance64x32_sse2, 10), VarianceParams(5, 6, &vpx_highbd_10_variance32x64_sse2, 10), VarianceParams(5, 5, &vpx_highbd_10_variance32x32_sse2, 10), VarianceParams(5, 4, &vpx_highbd_10_variance32x16_sse2, 10), VarianceParams(4, 5, &vpx_highbd_10_variance16x32_sse2, 10), VarianceParams(4, 4, &vpx_highbd_10_variance16x16_sse2, 10), VarianceParams(4, 3, &vpx_highbd_10_variance16x8_sse2, 10), VarianceParams(3, 4, &vpx_highbd_10_variance8x16_sse2, 10), VarianceParams(3, 3, &vpx_highbd_10_variance8x8_sse2, 10), VarianceParams(6, 6, &vpx_highbd_8_variance64x64_sse2, 8), VarianceParams(6, 5, &vpx_highbd_8_variance64x32_sse2, 8), VarianceParams(5, 6, &vpx_highbd_8_variance32x64_sse2, 8), VarianceParams(5, 5, &vpx_highbd_8_variance32x32_sse2, 8), VarianceParams(5, 4, &vpx_highbd_8_variance32x16_sse2, 8), VarianceParams(4, 5, &vpx_highbd_8_variance16x32_sse2, 8), VarianceParams(4, 4, &vpx_highbd_8_variance16x16_sse2, 8), VarianceParams(4, 3, &vpx_highbd_8_variance16x8_sse2, 8), VarianceParams(3, 4, &vpx_highbd_8_variance8x16_sse2, 8), VarianceParams(3, 3, &vpx_highbd_8_variance8x8_sse2, 8))); INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_highbd_12_sub_pixel_variance64x64_sse2, 12), SubpelVarianceParams(6, 5, &vpx_highbd_12_sub_pixel_variance64x32_sse2, 12), SubpelVarianceParams(5, 6, &vpx_highbd_12_sub_pixel_variance32x64_sse2, 12), SubpelVarianceParams(5, 5, &vpx_highbd_12_sub_pixel_variance32x32_sse2, 12), SubpelVarianceParams(5, 4, &vpx_highbd_12_sub_pixel_variance32x16_sse2, 12), SubpelVarianceParams(4, 5, &vpx_highbd_12_sub_pixel_variance16x32_sse2, 12), SubpelVarianceParams(4, 4, &vpx_highbd_12_sub_pixel_variance16x16_sse2, 12), SubpelVarianceParams(4, 3, &vpx_highbd_12_sub_pixel_variance16x8_sse2, 12), SubpelVarianceParams(3, 4, &vpx_highbd_12_sub_pixel_variance8x16_sse2, 12), SubpelVarianceParams(3, 3, &vpx_highbd_12_sub_pixel_variance8x8_sse2, 12), SubpelVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_variance8x4_sse2, 12), SubpelVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_variance64x64_sse2, 10), SubpelVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_variance64x32_sse2, 10), SubpelVarianceParams(5, 6, &vpx_highbd_10_sub_pixel_variance32x64_sse2, 10), SubpelVarianceParams(5, 5, &vpx_highbd_10_sub_pixel_variance32x32_sse2, 10), SubpelVarianceParams(5, 4, &vpx_highbd_10_sub_pixel_variance32x16_sse2, 10), SubpelVarianceParams(4, 5, &vpx_highbd_10_sub_pixel_variance16x32_sse2, 10), SubpelVarianceParams(4, 4, &vpx_highbd_10_sub_pixel_variance16x16_sse2, 10), SubpelVarianceParams(4, 3, &vpx_highbd_10_sub_pixel_variance16x8_sse2, 10), SubpelVarianceParams(3, 4, &vpx_highbd_10_sub_pixel_variance8x16_sse2, 10), SubpelVarianceParams(3, 3, &vpx_highbd_10_sub_pixel_variance8x8_sse2, 10), SubpelVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_variance8x4_sse2, 10), SubpelVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_variance64x64_sse2, 8), SubpelVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_variance64x32_sse2, 8), SubpelVarianceParams(5, 6, &vpx_highbd_8_sub_pixel_variance32x64_sse2, 8), SubpelVarianceParams(5, 5, &vpx_highbd_8_sub_pixel_variance32x32_sse2, 8), SubpelVarianceParams(5, 4, &vpx_highbd_8_sub_pixel_variance32x16_sse2, 8), SubpelVarianceParams(4, 5, &vpx_highbd_8_sub_pixel_variance16x32_sse2, 8), SubpelVarianceParams(4, 4, &vpx_highbd_8_sub_pixel_variance16x16_sse2, 8), SubpelVarianceParams(4, 3, &vpx_highbd_8_sub_pixel_variance16x8_sse2, 8), SubpelVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_variance8x16_sse2, 8), SubpelVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_variance8x8_sse2, 8), SubpelVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_variance8x4_sse2, 8))); INSTANTIATE_TEST_CASE_P( SSE2, VpxHBDSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_highbd_12_sub_pixel_avg_variance64x64_sse2, 12), SubpelAvgVarianceParams(6, 5, &vpx_highbd_12_sub_pixel_avg_variance64x32_sse2, 12), SubpelAvgVarianceParams(5, 6, &vpx_highbd_12_sub_pixel_avg_variance32x64_sse2, 12), SubpelAvgVarianceParams(5, 5, &vpx_highbd_12_sub_pixel_avg_variance32x32_sse2, 12), SubpelAvgVarianceParams(5, 4, &vpx_highbd_12_sub_pixel_avg_variance32x16_sse2, 12), SubpelAvgVarianceParams(4, 5, &vpx_highbd_12_sub_pixel_avg_variance16x32_sse2, 12), SubpelAvgVarianceParams(4, 4, &vpx_highbd_12_sub_pixel_avg_variance16x16_sse2, 12), SubpelAvgVarianceParams(4, 3, &vpx_highbd_12_sub_pixel_avg_variance16x8_sse2, 12), SubpelAvgVarianceParams(3, 4, &vpx_highbd_12_sub_pixel_avg_variance8x16_sse2, 12), SubpelAvgVarianceParams(3, 3, &vpx_highbd_12_sub_pixel_avg_variance8x8_sse2, 12), SubpelAvgVarianceParams(3, 2, &vpx_highbd_12_sub_pixel_avg_variance8x4_sse2, 12), SubpelAvgVarianceParams(6, 6, &vpx_highbd_10_sub_pixel_avg_variance64x64_sse2, 10), SubpelAvgVarianceParams(6, 5, &vpx_highbd_10_sub_pixel_avg_variance64x32_sse2, 10), SubpelAvgVarianceParams(5, 6, &vpx_highbd_10_sub_pixel_avg_variance32x64_sse2, 10), SubpelAvgVarianceParams(5, 5, &vpx_highbd_10_sub_pixel_avg_variance32x32_sse2, 10), SubpelAvgVarianceParams(5, 4, &vpx_highbd_10_sub_pixel_avg_variance32x16_sse2, 10), SubpelAvgVarianceParams(4, 5, &vpx_highbd_10_sub_pixel_avg_variance16x32_sse2, 10), SubpelAvgVarianceParams(4, 4, &vpx_highbd_10_sub_pixel_avg_variance16x16_sse2, 10), SubpelAvgVarianceParams(4, 3, &vpx_highbd_10_sub_pixel_avg_variance16x8_sse2, 10), SubpelAvgVarianceParams(3, 4, &vpx_highbd_10_sub_pixel_avg_variance8x16_sse2, 10), SubpelAvgVarianceParams(3, 3, &vpx_highbd_10_sub_pixel_avg_variance8x8_sse2, 10), SubpelAvgVarianceParams(3, 2, &vpx_highbd_10_sub_pixel_avg_variance8x4_sse2, 10), SubpelAvgVarianceParams(6, 6, &vpx_highbd_8_sub_pixel_avg_variance64x64_sse2, 8), SubpelAvgVarianceParams(6, 5, &vpx_highbd_8_sub_pixel_avg_variance64x32_sse2, 8), SubpelAvgVarianceParams(5, 6, &vpx_highbd_8_sub_pixel_avg_variance32x64_sse2, 8), SubpelAvgVarianceParams(5, 5, &vpx_highbd_8_sub_pixel_avg_variance32x32_sse2, 8), SubpelAvgVarianceParams(5, 4, &vpx_highbd_8_sub_pixel_avg_variance32x16_sse2, 8), SubpelAvgVarianceParams(4, 5, &vpx_highbd_8_sub_pixel_avg_variance16x32_sse2, 8), SubpelAvgVarianceParams(4, 4, &vpx_highbd_8_sub_pixel_avg_variance16x16_sse2, 8), SubpelAvgVarianceParams(4, 3, &vpx_highbd_8_sub_pixel_avg_variance16x8_sse2, 8), SubpelAvgVarianceParams(3, 4, &vpx_highbd_8_sub_pixel_avg_variance8x16_sse2, 8), SubpelAvgVarianceParams(3, 3, &vpx_highbd_8_sub_pixel_avg_variance8x8_sse2, 8), SubpelAvgVarianceParams(3, 2, &vpx_highbd_8_sub_pixel_avg_variance8x4_sse2, 8))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( SSSE3, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_ssse3, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_ssse3, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_ssse3, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_ssse3, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_ssse3, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_ssse3, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_ssse3, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_ssse3, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_ssse3, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_ssse3, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_ssse3, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_ssse3, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_ssse3, 0))); INSTANTIATE_TEST_CASE_P( SSSE3, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_ssse3, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_ssse3, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_ssse3, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_ssse3, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_ssse3, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_ssse3, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_ssse3, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_ssse3, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_ssse3, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_ssse3, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_ssse3, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_ssse3, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_ssse3, 0))); #endif // HAVE_SSSE3 #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P(AVX2, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_avx2), MseParams(4, 3, &vpx_mse16x8_avx2))); INSTANTIATE_TEST_CASE_P( AVX2, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_avx2), VarianceParams(6, 5, &vpx_variance64x32_avx2), VarianceParams(5, 6, &vpx_variance32x64_avx2), VarianceParams(5, 5, &vpx_variance32x32_avx2), VarianceParams(5, 4, &vpx_variance32x16_avx2), VarianceParams(4, 5, &vpx_variance16x32_avx2), VarianceParams(4, 4, &vpx_variance16x16_avx2), VarianceParams(4, 3, &vpx_variance16x8_avx2))); INSTANTIATE_TEST_CASE_P( AVX2, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_avx2, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_avx2, 0))); INSTANTIATE_TEST_CASE_P( AVX2, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_avx2, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_avx2, 0))); #endif // HAVE_AVX2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_neon))); INSTANTIATE_TEST_CASE_P(NEON, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_neon))); INSTANTIATE_TEST_CASE_P( NEON, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_neon), VarianceParams(6, 5, &vpx_variance64x32_neon), VarianceParams(5, 6, &vpx_variance32x64_neon), VarianceParams(5, 5, &vpx_variance32x32_neon), VarianceParams(5, 4, &vpx_variance32x16_neon), VarianceParams(4, 5, &vpx_variance16x32_neon), VarianceParams(4, 4, &vpx_variance16x16_neon), VarianceParams(4, 3, &vpx_variance16x8_neon), VarianceParams(3, 4, &vpx_variance8x16_neon), VarianceParams(3, 3, &vpx_variance8x8_neon), VarianceParams(3, 2, &vpx_variance8x4_neon), VarianceParams(2, 3, &vpx_variance4x8_neon), VarianceParams(2, 2, &vpx_variance4x4_neon))); INSTANTIATE_TEST_CASE_P( NEON, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_neon, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_neon, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_neon, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_neon, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_neon, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_neon, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_neon, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_neon, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_neon, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_neon, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_neon, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_neon, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_neon, 0))); INSTANTIATE_TEST_CASE_P( NEON, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_neon, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_neon, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_neon, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_neon, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_neon, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_neon, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_neon, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_neon, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_neon, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_neon, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_neon, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_neon, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_neon, 0))); #endif // HAVE_NEON #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_msa)); INSTANTIATE_TEST_CASE_P(MSA, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_msa))); INSTANTIATE_TEST_CASE_P(MSA, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_msa), MseParams(4, 3, &vpx_mse16x8_msa), MseParams(3, 4, &vpx_mse8x16_msa), MseParams(3, 3, &vpx_mse8x8_msa))); INSTANTIATE_TEST_CASE_P( MSA, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_msa), VarianceParams(6, 5, &vpx_variance64x32_msa), VarianceParams(5, 6, &vpx_variance32x64_msa), VarianceParams(5, 5, &vpx_variance32x32_msa), VarianceParams(5, 4, &vpx_variance32x16_msa), VarianceParams(4, 5, &vpx_variance16x32_msa), VarianceParams(4, 4, &vpx_variance16x16_msa), VarianceParams(4, 3, &vpx_variance16x8_msa), VarianceParams(3, 4, &vpx_variance8x16_msa), VarianceParams(3, 3, &vpx_variance8x8_msa), VarianceParams(3, 2, &vpx_variance8x4_msa), VarianceParams(2, 3, &vpx_variance4x8_msa), VarianceParams(2, 2, &vpx_variance4x4_msa))); INSTANTIATE_TEST_CASE_P( MSA, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_msa, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_msa, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_msa, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_msa, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_msa, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_msa, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_msa, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_msa, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_msa, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_msa, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_msa, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_msa, 0), SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_msa, 0))); INSTANTIATE_TEST_CASE_P( MSA, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_msa, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_msa, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_msa, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_msa, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_msa, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_msa, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_msa, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_msa, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_msa, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_msa, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_msa, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_msa, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_msa, 0))); #endif // HAVE_MSA #if HAVE_VSX INSTANTIATE_TEST_CASE_P(VSX, SumOfSquaresTest, ::testing::Values(vpx_get_mb_ss_vsx)); INSTANTIATE_TEST_CASE_P(VSX, VpxSseTest, ::testing::Values(SseParams(2, 2, &vpx_get4x4sse_cs_vsx))); INSTANTIATE_TEST_CASE_P(VSX, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_vsx), MseParams(4, 3, &vpx_mse16x8_vsx), MseParams(3, 4, &vpx_mse8x16_vsx), MseParams(3, 3, &vpx_mse8x8_vsx))); INSTANTIATE_TEST_CASE_P( VSX, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_vsx), VarianceParams(6, 5, &vpx_variance64x32_vsx), VarianceParams(5, 6, &vpx_variance32x64_vsx), VarianceParams(5, 5, &vpx_variance32x32_vsx), VarianceParams(5, 4, &vpx_variance32x16_vsx), VarianceParams(4, 5, &vpx_variance16x32_vsx), VarianceParams(4, 4, &vpx_variance16x16_vsx), VarianceParams(4, 3, &vpx_variance16x8_vsx), VarianceParams(3, 4, &vpx_variance8x16_vsx), VarianceParams(3, 3, &vpx_variance8x8_vsx), VarianceParams(3, 2, &vpx_variance8x4_vsx), VarianceParams(2, 3, &vpx_variance4x8_vsx), VarianceParams(2, 2, &vpx_variance4x4_vsx))); #endif // HAVE_VSX #if HAVE_MMI INSTANTIATE_TEST_CASE_P(MMI, VpxMseTest, ::testing::Values(MseParams(4, 4, &vpx_mse16x16_mmi), MseParams(4, 3, &vpx_mse16x8_mmi), MseParams(3, 4, &vpx_mse8x16_mmi), MseParams(3, 3, &vpx_mse8x8_mmi))); INSTANTIATE_TEST_CASE_P( MMI, VpxVarianceTest, ::testing::Values(VarianceParams(6, 6, &vpx_variance64x64_mmi), VarianceParams(6, 5, &vpx_variance64x32_mmi), VarianceParams(5, 6, &vpx_variance32x64_mmi), VarianceParams(5, 5, &vpx_variance32x32_mmi), VarianceParams(5, 4, &vpx_variance32x16_mmi), VarianceParams(4, 5, &vpx_variance16x32_mmi), VarianceParams(4, 4, &vpx_variance16x16_mmi), VarianceParams(4, 3, &vpx_variance16x8_mmi), VarianceParams(3, 4, &vpx_variance8x16_mmi), VarianceParams(3, 3, &vpx_variance8x8_mmi), VarianceParams(3, 2, &vpx_variance8x4_mmi), VarianceParams(2, 3, &vpx_variance4x8_mmi), VarianceParams(2, 2, &vpx_variance4x4_mmi))); INSTANTIATE_TEST_CASE_P( MMI, VpxSubpelVarianceTest, ::testing::Values( SubpelVarianceParams(6, 6, &vpx_sub_pixel_variance64x64_mmi, 0), SubpelVarianceParams(6, 5, &vpx_sub_pixel_variance64x32_mmi, 0), SubpelVarianceParams(5, 6, &vpx_sub_pixel_variance32x64_mmi, 0), SubpelVarianceParams(5, 5, &vpx_sub_pixel_variance32x32_mmi, 0), SubpelVarianceParams(5, 4, &vpx_sub_pixel_variance32x16_mmi, 0), SubpelVarianceParams(4, 5, &vpx_sub_pixel_variance16x32_mmi, 0), SubpelVarianceParams(4, 4, &vpx_sub_pixel_variance16x16_mmi, 0), SubpelVarianceParams(4, 3, &vpx_sub_pixel_variance16x8_mmi, 0), SubpelVarianceParams(3, 4, &vpx_sub_pixel_variance8x16_mmi, 0), SubpelVarianceParams(3, 3, &vpx_sub_pixel_variance8x8_mmi, 0), SubpelVarianceParams(3, 2, &vpx_sub_pixel_variance8x4_mmi, 0), SubpelVarianceParams(2, 3, &vpx_sub_pixel_variance4x8_mmi, 0), SubpelVarianceParams(2, 2, &vpx_sub_pixel_variance4x4_mmi, 0))); INSTANTIATE_TEST_CASE_P( MMI, VpxSubpelAvgVarianceTest, ::testing::Values( SubpelAvgVarianceParams(6, 6, &vpx_sub_pixel_avg_variance64x64_mmi, 0), SubpelAvgVarianceParams(6, 5, &vpx_sub_pixel_avg_variance64x32_mmi, 0), SubpelAvgVarianceParams(5, 6, &vpx_sub_pixel_avg_variance32x64_mmi, 0), SubpelAvgVarianceParams(5, 5, &vpx_sub_pixel_avg_variance32x32_mmi, 0), SubpelAvgVarianceParams(5, 4, &vpx_sub_pixel_avg_variance32x16_mmi, 0), SubpelAvgVarianceParams(4, 5, &vpx_sub_pixel_avg_variance16x32_mmi, 0), SubpelAvgVarianceParams(4, 4, &vpx_sub_pixel_avg_variance16x16_mmi, 0), SubpelAvgVarianceParams(4, 3, &vpx_sub_pixel_avg_variance16x8_mmi, 0), SubpelAvgVarianceParams(3, 4, &vpx_sub_pixel_avg_variance8x16_mmi, 0), SubpelAvgVarianceParams(3, 3, &vpx_sub_pixel_avg_variance8x8_mmi, 0), SubpelAvgVarianceParams(3, 2, &vpx_sub_pixel_avg_variance8x4_mmi, 0), SubpelAvgVarianceParams(2, 3, &vpx_sub_pixel_avg_variance4x8_mmi, 0), SubpelAvgVarianceParams(2, 2, &vpx_sub_pixel_avg_variance4x4_mmi, 0))); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/video_source.h000066400000000000000000000147401357355204000166560ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_VIDEO_SOURCE_H_ #define VPX_TEST_VIDEO_SOURCE_H_ #if defined(_WIN32) #undef NOMINMAX #define NOMINMAX #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #endif #include #include #include #include "test/acm_random.h" #include "vpx/vpx_encoder.h" namespace libvpx_test { // Helper macros to ensure LIBVPX_TEST_DATA_PATH is a quoted string. // These are undefined right below GetDataPath // NOTE: LIBVPX_TEST_DATA_PATH MUST NOT be a quoted string before // Stringification or the GetDataPath will fail at runtime #define TO_STRING(S) #S #define STRINGIFY(S) TO_STRING(S) // A simple function to encapsulate cross platform retrieval of test data path static std::string GetDataPath() { const char *const data_path = getenv("LIBVPX_TEST_DATA_PATH"); if (data_path == NULL) { #ifdef LIBVPX_TEST_DATA_PATH // In some environments, we cannot set environment variables // Instead, we set the data path by using a preprocessor symbol // which can be set from make files return STRINGIFY(LIBVPX_TEST_DATA_PATH); #else return "."; #endif } return data_path; } // Undefining stringification macros because they are not used elsewhere #undef TO_STRING #undef STRINGIFY inline FILE *OpenTestDataFile(const std::string &file_name) { const std::string path_to_source = GetDataPath() + "/" + file_name; return fopen(path_to_source.c_str(), "rb"); } static FILE *GetTempOutFile(std::string *file_name) { file_name->clear(); #if defined(_WIN32) char fname[MAX_PATH]; char tmppath[MAX_PATH]; if (GetTempPathA(MAX_PATH, tmppath)) { // Assume for now that the filename generated is unique per process if (GetTempFileNameA(tmppath, "lvx", 0, fname)) { file_name->assign(fname); return fopen(fname, "wb+"); } } return NULL; #else return tmpfile(); #endif } class TempOutFile { public: TempOutFile() { file_ = GetTempOutFile(&file_name_); } ~TempOutFile() { CloseFile(); if (!file_name_.empty()) { EXPECT_EQ(0, remove(file_name_.c_str())); } } FILE *file() { return file_; } const std::string &file_name() { return file_name_; } protected: void CloseFile() { if (file_) { fclose(file_); file_ = NULL; } } FILE *file_; std::string file_name_; }; // Abstract base class for test video sources, which provide a stream of // vpx_image_t images with associated timestamps and duration. class VideoSource { public: virtual ~VideoSource() {} // Prepare the stream for reading, rewind/open as necessary. virtual void Begin() = 0; // Advance the cursor to the next frame virtual void Next() = 0; // Get the current video frame, or NULL on End-Of-Stream. virtual vpx_image_t *img() const = 0; // Get the presentation timestamp of the current frame. virtual vpx_codec_pts_t pts() const = 0; // Get the current frame's duration virtual unsigned long duration() const = 0; // Get the timebase for the stream virtual vpx_rational_t timebase() const = 0; // Get the current frame counter, starting at 0. virtual unsigned int frame() const = 0; // Get the current file limit. virtual unsigned int limit() const = 0; }; class DummyVideoSource : public VideoSource { public: DummyVideoSource() : img_(NULL), limit_(100), width_(80), height_(64), format_(VPX_IMG_FMT_I420) { ReallocImage(); } virtual ~DummyVideoSource() { vpx_img_free(img_); } virtual void Begin() { frame_ = 0; FillFrame(); } virtual void Next() { ++frame_; FillFrame(); } virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } // Models a stream where Timebase = 1/FPS, so pts == frame. virtual vpx_codec_pts_t pts() const { return frame_; } virtual unsigned long duration() const { return 1; } virtual vpx_rational_t timebase() const { const vpx_rational_t t = { 1, 30 }; return t; } virtual unsigned int frame() const { return frame_; } virtual unsigned int limit() const { return limit_; } void set_limit(unsigned int limit) { limit_ = limit; } void SetSize(unsigned int width, unsigned int height) { if (width != width_ || height != height_) { width_ = width; height_ = height; ReallocImage(); } } void SetImageFormat(vpx_img_fmt_t format) { if (format_ != format) { format_ = format; ReallocImage(); } } protected: virtual void FillFrame() { if (img_) memset(img_->img_data, 0, raw_sz_); } void ReallocImage() { vpx_img_free(img_); img_ = vpx_img_alloc(NULL, format_, width_, height_, 32); raw_sz_ = ((img_->w + 31) & ~31) * img_->h * img_->bps / 8; } vpx_image_t *img_; size_t raw_sz_; unsigned int limit_; unsigned int frame_; unsigned int width_; unsigned int height_; vpx_img_fmt_t format_; }; class RandomVideoSource : public DummyVideoSource { public: RandomVideoSource(int seed = ACMRandom::DeterministicSeed()) : rnd_(seed), seed_(seed) {} protected: // Reset the RNG to get a matching stream for the second pass virtual void Begin() { frame_ = 0; rnd_.Reset(seed_); FillFrame(); } // 15 frames of noise, followed by 15 static frames. Reset to 0 rather // than holding previous frames to encourage keyframes to be thrown. virtual void FillFrame() { if (img_) { if (frame_ % 30 < 15) { for (size_t i = 0; i < raw_sz_; ++i) img_->img_data[i] = rnd_.Rand8(); } else { memset(img_->img_data, 0, raw_sz_); } } } ACMRandom rnd_; int seed_; }; // Abstract base class for test video sources, which provide a stream of // decompressed images to the decoder. class CompressedVideoSource { public: virtual ~CompressedVideoSource() {} virtual void Init() = 0; // Prepare the stream for reading, rewind/open as necessary. virtual void Begin() = 0; // Advance the cursor to the next frame virtual void Next() = 0; virtual const uint8_t *cxdata() const = 0; virtual size_t frame_size() const = 0; virtual unsigned int frame_number() const = 0; }; } // namespace libvpx_test #endif // VPX_TEST_VIDEO_SOURCE_H_ libvpx-1.8.2/test/vp8_boolcoder_test.cc000066400000000000000000000077011357355204000201310ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "vp8/decoder/dboolhuff.h" #include "vp8/encoder/boolhuff.h" #include "vpx/vpx_integer.h" namespace { const int num_tests = 10; // In a real use the 'decrypt_state' parameter will be a pointer to a struct // with whatever internal state the decryptor uses. For testing we'll just // xor with a constant key, and decrypt_state will point to the start of // the original buffer. const uint8_t secret_key[16] = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x78, 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 }; void encrypt_buffer(uint8_t *buffer, size_t size) { for (size_t i = 0; i < size; ++i) { buffer[i] ^= secret_key[i & 15]; } } void test_decrypt_cb(void *decrypt_state, const uint8_t *input, uint8_t *output, int count) { const size_t offset = input - reinterpret_cast(decrypt_state); for (int i = 0; i < count; i++) { output[i] = input[i] ^ secret_key[(offset + i) & 15]; } } } // namespace using libvpx_test::ACMRandom; TEST(VP8, TestBitIO) { ACMRandom rnd(ACMRandom::DeterministicSeed()); for (int n = 0; n < num_tests; ++n) { for (int method = 0; method <= 7; ++method) { // we generate various proba const int kBitsToTest = 1000; uint8_t probas[kBitsToTest]; for (int i = 0; i < kBitsToTest; ++i) { const int parity = i & 1; /* clang-format off */ probas[i] = (method == 0) ? 0 : (method == 1) ? 255 : (method == 2) ? 128 : (method == 3) ? rnd.Rand8() : (method == 4) ? (parity ? 0 : 255) : // alternate between low and high proba: (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : (method == 6) ? (parity ? rnd(64) : 255 - rnd(64)) : (parity ? rnd(32) : 255 - rnd(32)); /* clang-format on */ } for (int bit_method = 0; bit_method <= 3; ++bit_method) { const int random_seed = 6432; const int kBufferSize = 10000; ACMRandom bit_rnd(random_seed); BOOL_CODER bw; uint8_t bw_buffer[kBufferSize]; vp8_start_encode(&bw, bw_buffer, bw_buffer + kBufferSize); int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; for (int i = 0; i < kBitsToTest; ++i) { if (bit_method == 2) { bit = (i & 1); } else if (bit_method == 3) { bit = bit_rnd(2); } vp8_encode_bool(&bw, bit, static_cast(probas[i])); } vp8_stop_encode(&bw); // vp8dx_bool_decoder_fill() may read into uninitialized data that // isn't used meaningfully, but may trigger an MSan warning. memset(bw_buffer + bw.pos, 0, sizeof(VP8_BD_VALUE) - 1); BOOL_DECODER br; encrypt_buffer(bw_buffer, kBufferSize); vp8dx_start_decode(&br, bw_buffer, kBufferSize, test_decrypt_cb, reinterpret_cast(bw_buffer)); bit_rnd.Reset(random_seed); for (int i = 0; i < kBitsToTest; ++i) { if (bit_method == 2) { bit = (i & 1); } else if (bit_method == 3) { bit = bit_rnd(2); } GTEST_ASSERT_EQ(vp8dx_decode_bool(&br, probas[i]), bit) << "pos: " << i << " / " << kBitsToTest << " bit_method: " << bit_method << " method: " << method; } } } } } libvpx-1.8.2/test/vp8_datarate_test.cc000066400000000000000000000336421357355204000177510ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vpx/vpx_codec.h" namespace { class DatarateTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { public: DatarateTestLarge() : EncoderTest(GET_PARAM(0)) {} virtual ~DatarateTestLarge() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(GET_PARAM(1)); set_cpu_used_ = GET_PARAM(2); ResetModel(); } virtual void ResetModel() { last_pts_ = 0; bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; frame_number_ = 0; first_drop_ = 0; bits_total_ = 0; duration_ = 0.0; denoiser_offon_test_ = 0; denoiser_offon_period_ = -1; gf_boost_ = 0; use_roi_ = false; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP8E_SET_GF_CBR_BOOST_PCT, gf_boost_); } if (use_roi_) { encoder->Control(VP8E_SET_ROI_MAP, &roi_); } if (denoiser_offon_test_) { ASSERT_GT(denoiser_offon_period_, 0) << "denoiser_offon_period_ is not positive."; if ((video->frame() + 1) % denoiser_offon_period_ == 0) { // Flip denoiser_on_ periodically denoiser_on_ ^= 1; } encoder->Control(VP8E_SET_NOISE_SENSITIVITY, denoiser_on_); } const vpx_rational_t tb = video->timebase(); timebase_ = static_cast(tb.num) / tb.den; duration_ = 0; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { // Time since last timestamp = duration. vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; // TODO(jimbankoski): Remove these lines when the issue: // http://code.google.com/p/webm/issues/detail?id=496 is fixed. // For now the codec assumes buffer starts at starting buffer rate // plus one frame's time. if (last_pts_ == 0) duration = 1; // Add to the buffer the bits we'd expect from a constant bitrate server. bits_in_buffer_model_ += static_cast( duration * timebase_ * cfg_.rc_target_bitrate * 1000); /* Test the buffer model here before subtracting the frame. Do so because * the way the leaky bucket model works in libvpx is to allow the buffer to * empty - and then stop showing frames until we've got enough bits to * show one. As noted in comment below (issue 495), this does not currently * apply to key frames. For now exclude key frames in condition below. */ const bool key_frame = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) ? true : false; if (!key_frame) { ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " << pkt->data.frame.pts; } const int64_t frame_size_in_bits = pkt->data.frame.sz * 8; // Subtract from the buffer the bits associated with a played back frame. bits_in_buffer_model_ -= frame_size_in_bits; // Update the running total of bits for end of test datarate checks. bits_total_ += frame_size_in_bits; // If first drop not set and we have a drop set it to this time. if (!first_drop_ && duration > 1) first_drop_ = last_pts_ + 1; // Update the most recent pts. last_pts_ = pkt->data.frame.pts; // We update this so that we can calculate the datarate minus the last // frame encoded in the file. bits_in_last_frame_ = frame_size_in_bits; ++frame_number_; } virtual void EndPassHook(void) { if (bits_total_) { const double file_size_in_kb = bits_total_ / 1000.; // bits per kilobit duration_ = (last_pts_ + 1) * timebase_; // Effective file datarate includes the time spent prebuffering. effective_datarate_ = (bits_total_ - bits_in_last_frame_) / 1000.0 / (cfg_.rc_buf_initial_sz / 1000.0 + duration_); file_datarate_ = file_size_in_kb / duration_; } } virtual void DenoiserLevelsTest() { cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 1; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); for (int j = 1; j < 5; ++j) { // Run over the denoiser levels. // For the temporal denoiser (#if CONFIG_TEMPORAL_DENOISING) the level j // refers to the 4 denoiser modes: denoiserYonly, denoiserOnYUV, // denoiserOnAggressive, and denoiserOnAdaptive. denoiser_on_ = j; cfg_.rc_target_bitrate = 300; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; } } virtual void DenoiserOffOnTest() { cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 1; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 299); cfg_.rc_target_bitrate = 300; ResetModel(); // The denoiser is off by default. denoiser_on_ = 0; // Set the offon test flag. denoiser_offon_test_ = 1; denoiser_offon_period_ = 100; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; } virtual void BasicBufferModelTest() { denoiser_on_ = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 1; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; // 2 pass cbr datarate control has a bug hidden by the small # of // frames selected in this encode. The problem is that even if the buffer is // negative we produce a keyframe on a cutscene. Ignoring datarate // constraints // TODO(jimbankoski): ( Fix when issue // http://code.google.com/p/webm/issues/detail?id=495 is addressed. ) ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); // There is an issue for low bitrates in real-time mode, where the // effective_datarate slightly overshoots the target bitrate. // This is same the issue as noted about (#495). // TODO(jimbankoski/marpan): Update test to run for lower bitrates (< 100), // when the issue is resolved. for (int i = 100; i < 800; i += 200) { cfg_.rc_target_bitrate = i; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; } } virtual void ChangingDropFrameThreshTest() { denoiser_on_ = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_max_quantizer = 36; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_target_bitrate = 200; cfg_.kf_mode = VPX_KF_DISABLED; const int frame_count = 40; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, frame_count); // Here we check that the first dropped frame gets earlier and earlier // as the drop frame threshold is increased. const int kDropFrameThreshTestStep = 30; vpx_codec_pts_t last_drop = frame_count; for (int i = 1; i < 91; i += kDropFrameThreshTestStep) { cfg_.rc_dropframe_thresh = i; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_LE(first_drop_, last_drop) << " The first dropped frame for drop_thresh " << i << " > first dropped frame for drop_thresh " << i - kDropFrameThreshTestStep; last_drop = first_drop_; } } virtual void DropFramesMultiThreadsTest() { denoiser_on_ = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 30; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_threads = 2; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 140); cfg_.rc_target_bitrate = 200; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; } vpx_codec_pts_t last_pts_; int64_t bits_in_buffer_model_; double timebase_; int frame_number_; vpx_codec_pts_t first_drop_; int64_t bits_total_; double duration_; double file_datarate_; double effective_datarate_; int64_t bits_in_last_frame_; int denoiser_on_; int denoiser_offon_test_; int denoiser_offon_period_; int set_cpu_used_; int gf_boost_; bool use_roi_; vpx_roi_map_t roi_; }; #if CONFIG_TEMPORAL_DENOISING // Check basic datarate targeting, for a single bitrate, but loop over the // various denoiser settings. TEST_P(DatarateTestLarge, DenoiserLevels) { DenoiserLevelsTest(); } // Check basic datarate targeting, for a single bitrate, when denoiser is off // and on. TEST_P(DatarateTestLarge, DenoiserOffOn) { DenoiserOffOnTest(); } #endif // CONFIG_TEMPORAL_DENOISING TEST_P(DatarateTestLarge, BasicBufferModel) { BasicBufferModelTest(); } TEST_P(DatarateTestLarge, ChangingDropFrameThresh) { ChangingDropFrameThreshTest(); } TEST_P(DatarateTestLarge, DropFramesMultiThreads) { DropFramesMultiThreadsTest(); } class DatarateTestRealTime : public DatarateTestLarge { public: virtual ~DatarateTestRealTime() {} }; #if CONFIG_TEMPORAL_DENOISING // Check basic datarate targeting, for a single bitrate, but loop over the // various denoiser settings. TEST_P(DatarateTestRealTime, DenoiserLevels) { DenoiserLevelsTest(); } // Check basic datarate targeting, for a single bitrate, when denoiser is off // and on. TEST_P(DatarateTestRealTime, DenoiserOffOn) {} #endif // CONFIG_TEMPORAL_DENOISING TEST_P(DatarateTestRealTime, BasicBufferModel) { BasicBufferModelTest(); } TEST_P(DatarateTestRealTime, ChangingDropFrameThresh) { ChangingDropFrameThreshTest(); } TEST_P(DatarateTestRealTime, DropFramesMultiThreads) { DropFramesMultiThreadsTest(); } TEST_P(DatarateTestRealTime, RegionOfInterest) { denoiser_on_ = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 0; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; // Encode using multiple threads. cfg_.g_threads = 2; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); cfg_.rc_target_bitrate = 450; cfg_.g_w = 352; cfg_.g_h = 288; ResetModel(); // Set ROI parameters use_roi_ = true; memset(&roi_, 0, sizeof(roi_)); roi_.rows = (cfg_.g_h + 15) / 16; roi_.cols = (cfg_.g_w + 15) / 16; roi_.delta_q[0] = 0; roi_.delta_q[1] = -20; roi_.delta_q[2] = 0; roi_.delta_q[3] = 0; roi_.delta_lf[0] = 0; roi_.delta_lf[1] = -20; roi_.delta_lf[2] = 0; roi_.delta_lf[3] = 0; roi_.static_threshold[0] = 0; roi_.static_threshold[1] = 1000; roi_.static_threshold[2] = 0; roi_.static_threshold[3] = 0; // Use 2 states: 1 is center square, 0 is the rest. roi_.roi_map = (uint8_t *)calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map)); for (unsigned int i = 0; i < roi_.rows; ++i) { for (unsigned int j = 0; j < roi_.cols; ++j) { if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) && j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) { roi_.roi_map[i * roi_.cols + j] = 1; } } } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; free(roi_.roi_map); } TEST_P(DatarateTestRealTime, GFBoost) { denoiser_on_ = 0; cfg_.rc_buf_initial_sz = 500; cfg_.rc_dropframe_thresh = 0; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_error_resilient = 0; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); cfg_.rc_target_bitrate = 300; ResetModel(); // Apply a gf boost. gf_boost_ = 50; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_ * 0.95) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, file_datarate_ * 1.4) << " The datarate for the file missed the target!"; } VP8_INSTANTIATE_TEST_CASE(DatarateTestLarge, ALL_TEST_MODES, ::testing::Values(0)); VP8_INSTANTIATE_TEST_CASE(DatarateTestRealTime, ::testing::Values(::libvpx_test::kRealTime), ::testing::Values(-6, -12)); } // namespace libvpx-1.8.2/test/vp8_decrypt_test.cc000066400000000000000000000044121357355204000176270ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/ivf_video_source.h" namespace { // In a real use the 'decrypt_state' parameter will be a pointer to a struct // with whatever internal state the decryptor uses. For testing we'll just // xor with a constant key, and decrypt_state will point to the start of // the original buffer. const uint8_t test_key[16] = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x78, 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 }; void encrypt_buffer(const uint8_t *src, uint8_t *dst, size_t size, ptrdiff_t offset) { for (size_t i = 0; i < size; ++i) { dst[i] = src[i] ^ test_key[(offset + i) & 15]; } } void test_decrypt_cb(void *decrypt_state, const uint8_t *input, uint8_t *output, int count) { encrypt_buffer(input, output, count, input - reinterpret_cast(decrypt_state)); } } // namespace namespace libvpx_test { TEST(TestDecrypt, DecryptWorksVp8) { libvpx_test::IVFVideoSource video("vp80-00-comprehensive-001.ivf"); video.Init(); vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); VP8Decoder decoder(dec_cfg, 0); video.Begin(); // no decryption vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size()); ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); // decrypt frame video.Next(); std::vector encrypted(video.frame_size()); encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0); vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] }; decoder.Control(VPXD_SET_DECRYPTOR, &di); res = decoder.DecodeFrame(&encrypted[0], encrypted.size()); ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); } } // namespace libvpx_test libvpx-1.8.2/test/vp8_denoiser_sse2_test.cc000066400000000000000000000077371357355204000207360ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp8/encoder/denoising.h" #include "vp8/common/reconinter.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" using libvpx_test::ACMRandom; namespace { const int kNumPixels = 16 * 16; class VP8DenoiserTest : public ::testing::TestWithParam { public: virtual ~VP8DenoiserTest() {} virtual void SetUp() { increase_denoising_ = GetParam(); } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: int increase_denoising_; }; TEST_P(VP8DenoiserTest, BitexactCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 4000; const int stride = 16; // Allocate the space for input and output, // where sig_block_c/_sse2 is the block to be denoised, // mc_avg_block is the denoised reference block, // avg_block_c is the denoised result from C code, // avg_block_sse2 is the denoised result from SSE2 code. DECLARE_ALIGNED(16, uint8_t, sig_block_c[kNumPixels]); // Since in VP8 denoiser, the source signal will be changed, // we need another copy of the source signal as the input of sse2 code. DECLARE_ALIGNED(16, uint8_t, sig_block_sse2[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]); for (int i = 0; i < count_test_block; ++i) { // Generate random motion magnitude, 20% of which exceed the threshold. const int motion_magnitude_ran = rnd.Rand8() % static_cast(MOTION_MAGNITUDE_THRESHOLD * 1.2); // Initialize a test block with random number in range [0, 255]. for (int j = 0; j < kNumPixels; ++j) { int temp = 0; sig_block_sse2[j] = sig_block_c[j] = rnd.Rand8(); // The pixels in mc_avg_block are generated by adding a random // number in range [-19, 19] to corresponding pixels in sig_block. temp = sig_block_c[j] + (rnd.Rand8() % 2 == 0 ? -1 : 1) * (rnd.Rand8() % 20); // Clip. mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp); } // Test denosiser on Y component. ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_c( mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride, motion_magnitude_ran, increase_denoising_)); ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_sse2( mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride, motion_magnitude_ran, increase_denoising_)); // Check bitexactness. for (int h = 0; h < 16; ++h) { for (int w = 0; w < 16; ++w) { EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]); } } // Test denoiser on UV component. ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_c( mc_avg_block, stride, avg_block_c, stride, sig_block_c, stride, motion_magnitude_ran, increase_denoising_)); ASM_REGISTER_STATE_CHECK(vp8_denoiser_filter_uv_sse2( mc_avg_block, stride, avg_block_sse2, stride, sig_block_sse2, stride, motion_magnitude_ran, increase_denoising_)); // Check bitexactness. for (int h = 0; h < 16; ++h) { for (int w = 0; w < 16; ++w) { EXPECT_EQ(avg_block_c[h * stride + w], avg_block_sse2[h * stride + w]); } } } } // Test for all block size. INSTANTIATE_TEST_CASE_P(SSE2, VP8DenoiserTest, ::testing::Values(0, 1)); } // namespace libvpx-1.8.2/test/vp8_fdct4x4_test.cc000066400000000000000000000135161357355204000174420ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "test/acm_random.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" namespace { typedef void (*FdctFunc)(int16_t *a, int16_t *b, int a_stride); const int cospi8sqrt2minus1 = 20091; const int sinpi8sqrt2 = 35468; void reference_idct4x4(const int16_t *input, int16_t *output) { const int16_t *ip = input; int16_t *op = output; for (int i = 0; i < 4; ++i) { const int a1 = ip[0] + ip[8]; const int b1 = ip[0] - ip[8]; const int temp1 = (ip[4] * sinpi8sqrt2) >> 16; const int temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); const int c1 = temp1 - temp2; const int temp3 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); const int temp4 = (ip[12] * sinpi8sqrt2) >> 16; const int d1 = temp3 + temp4; op[0] = a1 + d1; op[12] = a1 - d1; op[4] = b1 + c1; op[8] = b1 - c1; ++ip; ++op; } ip = output; op = output; for (int i = 0; i < 4; ++i) { const int a1 = ip[0] + ip[2]; const int b1 = ip[0] - ip[2]; const int temp1 = (ip[1] * sinpi8sqrt2) >> 16; const int temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); const int c1 = temp1 - temp2; const int temp3 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); const int temp4 = (ip[3] * sinpi8sqrt2) >> 16; const int d1 = temp3 + temp4; op[0] = (a1 + d1 + 4) >> 3; op[3] = (a1 - d1 + 4) >> 3; op[1] = (b1 + c1 + 4) >> 3; op[2] = (b1 - c1 + 4) >> 3; ip += 4; op += 4; } } using libvpx_test::ACMRandom; class FdctTest : public ::testing::TestWithParam { public: virtual void SetUp() { fdct_func_ = GetParam(); rnd_.Reset(ACMRandom::DeterministicSeed()); } protected: FdctFunc fdct_func_; ACMRandom rnd_; }; TEST_P(FdctTest, SignBiasCheck) { int16_t test_input_block[16]; DECLARE_ALIGNED(16, int16_t, test_output_block[16]); const int pitch = 8; int count_sign_block[16][2]; const int count_test_block = 1000000; memset(count_sign_block, 0, sizeof(count_sign_block)); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 16; ++j) { test_input_block[j] = rnd_.Rand8() - rnd_.Rand8(); } fdct_func_(test_input_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { if (test_output_block[j] < 0) { ++count_sign_block[j][0]; } else if (test_output_block[j] > 0) { ++count_sign_block[j][1]; } } } bool bias_acceptable = true; for (int j = 0; j < 16; ++j) { bias_acceptable = bias_acceptable && (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 10000); } EXPECT_EQ(true, bias_acceptable) << "Error: 4x4 FDCT has a sign bias > 1% for input range [-255, 255]"; memset(count_sign_block, 0, sizeof(count_sign_block)); for (int i = 0; i < count_test_block; ++i) { // Initialize a test block with input range [-15, 15]. for (int j = 0; j < 16; ++j) { test_input_block[j] = (rnd_.Rand8() >> 4) - (rnd_.Rand8() >> 4); } fdct_func_(test_input_block, test_output_block, pitch); for (int j = 0; j < 16; ++j) { if (test_output_block[j] < 0) { ++count_sign_block[j][0]; } else if (test_output_block[j] > 0) { ++count_sign_block[j][1]; } } } bias_acceptable = true; for (int j = 0; j < 16; ++j) { bias_acceptable = bias_acceptable && (abs(count_sign_block[j][0] - count_sign_block[j][1]) < 100000); } EXPECT_EQ(true, bias_acceptable) << "Error: 4x4 FDCT has a sign bias > 10% for input range [-15, 15]"; }; TEST_P(FdctTest, RoundTripErrorCheck) { int max_error = 0; double total_error = 0; const int count_test_block = 1000000; for (int i = 0; i < count_test_block; ++i) { int16_t test_input_block[16]; int16_t test_output_block[16]; DECLARE_ALIGNED(16, int16_t, test_temp_block[16]); // Initialize a test block with input range [-255, 255]. for (int j = 0; j < 16; ++j) { test_input_block[j] = rnd_.Rand8() - rnd_.Rand8(); } const int pitch = 8; fdct_func_(test_input_block, test_temp_block, pitch); reference_idct4x4(test_temp_block, test_output_block); for (int j = 0; j < 16; ++j) { const int diff = test_input_block[j] - test_output_block[j]; const int error = diff * diff; if (max_error < error) max_error = error; total_error += error; } } EXPECT_GE(1, max_error) << "Error: FDCT/IDCT has an individual roundtrip error > 1"; EXPECT_GE(count_test_block, total_error) << "Error: FDCT/IDCT has average roundtrip error > 1 per block"; }; INSTANTIATE_TEST_CASE_P(C, FdctTest, ::testing::Values(vp8_short_fdct4x4_c)); #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, FdctTest, ::testing::Values(vp8_short_fdct4x4_neon)); #endif // HAVE_NEON #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, FdctTest, ::testing::Values(vp8_short_fdct4x4_sse2)); #endif // HAVE_SSE2 #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, FdctTest, ::testing::Values(vp8_short_fdct4x4_msa)); #endif // HAVE_MSA #if HAVE_MMI INSTANTIATE_TEST_CASE_P(MMI, FdctTest, ::testing::Values(vp8_short_fdct4x4_mmi)); #endif // HAVE_MMI } // namespace libvpx-1.8.2/test/vp8_fragments_test.cc000066400000000000000000000022051357355204000201410ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/video_source.h" namespace { class VP8FramgmentsTest : public ::libvpx_test::EncoderTest, public ::testing::Test { protected: VP8FramgmentsTest() : EncoderTest(&::libvpx_test::kVP8) {} virtual ~VP8FramgmentsTest() {} virtual void SetUp() { const unsigned long init_flags = // NOLINT(runtime/int) VPX_CODEC_USE_OUTPUT_PARTITION; InitializeConfig(); SetMode(::libvpx_test::kRealTime); set_init_flags(init_flags); } }; TEST_F(VP8FramgmentsTest, TestFragmentsEncodeDecode) { ::libvpx_test::RandomVideoSource video; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } } // namespace libvpx-1.8.2/test/vp8_multi_resolution_encoder.sh000077500000000000000000000055071357355204000222700ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx vp8_multi_resolution_encoder example. To add new ## tests to this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to vp8_mre_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. vp8_multi_resolution_encoder_verify_environment() { if [ "$(vpx_config_option_enabled CONFIG_MULTI_RES_ENCODING)" = "yes" ]; then if [ ! -e "${YUV_RAW_INPUT}" ]; then elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi local app="vp8_multi_resolution_encoder" if [ -z "$(vpx_tool_path "${app}")" ]; then elog "${app} not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi fi } # Runs vp8_multi_resolution_encoder. Simply forwards all arguments to # vp8_multi_resolution_encoder after building path to the executable. vp8_mre() { local encoder="$(vpx_tool_path vp8_multi_resolution_encoder)" if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 fi eval "${VPX_TEST_PREFIX}" "${encoder}" "$@" ${devnull} } vp8_multi_resolution_encoder_three_formats() { local output_files="${VPX_TEST_OUTPUT_DIR}/vp8_mre_0.ivf ${VPX_TEST_OUTPUT_DIR}/vp8_mre_1.ivf ${VPX_TEST_OUTPUT_DIR}/vp8_mre_2.ivf" local layer_bitrates="150 80 50" local keyframe_insert="200" local temporal_layers="3 3 3" local framerate="30" if [ "$(vpx_config_option_enabled CONFIG_MULTI_RES_ENCODING)" = "yes" ]; then if [ "$(vp8_encode_available)" = "yes" ]; then # Param order: # Input width # Input height # Framerate # Input file path # Output file names # Layer bitrates # Temporal layers # Keyframe insert # Output PSNR vp8_mre "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" \ "${framerate}" \ "${YUV_RAW_INPUT}" \ ${output_files} \ ${layer_bitrates} \ ${temporal_layers} \ "${keyframe_insert}" \ 0 for output_file in ${output_files}; do if [ ! -e "${output_file}" ]; then elog "Missing output file: ${output_file}" return 1 fi done fi fi } vp8_mre_tests="vp8_multi_resolution_encoder_three_formats" run_tests vp8_multi_resolution_encoder_verify_environment "${vp8_mre_tests}" libvpx-1.8.2/test/vp9_arf_freq_test.cc000066400000000000000000000160521357355204000177460ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "test/yuv_video_source.h" #include "vp9/encoder/vp9_ratectrl.h" namespace { const unsigned int kFrames = 100; const int kBitrate = 500; #define ARF_NOT_SEEN 1000001 #define ARF_SEEN_ONCE 1000000 typedef struct { const char *filename; unsigned int width; unsigned int height; unsigned int framerate_num; unsigned int framerate_den; unsigned int input_bit_depth; vpx_img_fmt fmt; vpx_bit_depth_t bit_depth; unsigned int profile; } TestVideoParam; typedef struct { libvpx_test::TestMode mode; int cpu_used; } TestEncodeParam; const TestVideoParam kTestVectors[] = { // artificially increase framerate to trigger default check { "hantro_collage_w352h288.yuv", 352, 288, 5000, 1, 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0 }, { "hantro_collage_w352h288.yuv", 352, 288, 30, 1, 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0 }, { "rush_hour_444.y4m", 352, 288, 30, 1, 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1 }, #if CONFIG_VP9_HIGHBITDEPTH // Add list of profile 2/3 test videos here ... #endif // CONFIG_VP9_HIGHBITDEPTH }; const TestEncodeParam kEncodeVectors[] = { { ::libvpx_test::kOnePassGood, 2 }, { ::libvpx_test::kOnePassGood, 5 }, { ::libvpx_test::kTwoPassGood, 1 }, { ::libvpx_test::kTwoPassGood, 2 }, { ::libvpx_test::kTwoPassGood, 5 }, { ::libvpx_test::kRealTime, 5 }, }; const int kMinArfVectors[] = { // NOTE: 0 refers to the default built-in logic in: // vp9_rc_get_default_min_gf_interval(...) 0, 4, 8, 12, 15 }; int is_extension_y4m(const char *filename) { const char *dot = strrchr(filename, '.'); if (!dot || dot == filename) { return 0; } else { return !strcmp(dot, ".y4m"); } } class ArfFreqTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith3Params { protected: ArfFreqTest() : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(1)), test_encode_param_(GET_PARAM(2)), min_arf_requested_(GET_PARAM(3)) {} virtual ~ArfFreqTest() {} virtual void SetUp() { InitializeConfig(); SetMode(test_encode_param_.mode); if (test_encode_param_.mode != ::libvpx_test::kRealTime) { cfg_.g_lag_in_frames = 25; cfg_.rc_end_usage = VPX_VBR; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; } dec_cfg_.threads = 4; } virtual void BeginPassHook(unsigned int) { min_run_ = ARF_NOT_SEEN; run_of_visible_frames_ = 0; } int GetNumFramesInPkt(const vpx_codec_cx_pkt_t *pkt) { const uint8_t *buffer = reinterpret_cast(pkt->data.frame.buf); const uint8_t marker = buffer[pkt->data.frame.sz - 1]; const int mag = ((marker >> 3) & 3) + 1; int frames = (marker & 0x7) + 1; const unsigned int index_sz = 2 + mag * frames; // Check for superframe or not. // Assume superframe has only one visible frame, the rest being // invisible. If superframe index is not found, then there is only // one frame. if (!((marker & 0xe0) == 0xc0 && pkt->data.frame.sz >= index_sz && buffer[pkt->data.frame.sz - index_sz] == marker)) { frames = 1; } return frames; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->kind != VPX_CODEC_CX_FRAME_PKT) return; const int frames = GetNumFramesInPkt(pkt); if (frames == 1) { run_of_visible_frames_++; } else if (frames == 2) { if (min_run_ == ARF_NOT_SEEN) { min_run_ = ARF_SEEN_ONCE; } else if (min_run_ == ARF_SEEN_ONCE || run_of_visible_frames_ < min_run_) { min_run_ = run_of_visible_frames_; } run_of_visible_frames_ = 1; } else { min_run_ = 0; run_of_visible_frames_ = 1; } } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 4); encoder->Control(VP8E_SET_CPUUSED, test_encode_param_.cpu_used); encoder->Control(VP9E_SET_MIN_GF_INTERVAL, min_arf_requested_); if (test_encode_param_.mode != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } } int GetMinVisibleRun() const { return min_run_; } int GetMinArfDistanceRequested() const { if (min_arf_requested_) { return min_arf_requested_; } else { return vp9_rc_get_default_min_gf_interval( test_video_param_.width, test_video_param_.height, (double)test_video_param_.framerate_num / test_video_param_.framerate_den); } } TestVideoParam test_video_param_; TestEncodeParam test_encode_param_; private: int min_arf_requested_; int min_run_; int run_of_visible_frames_; }; TEST_P(ArfFreqTest, MinArfFreqTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; cfg_.g_profile = test_video_param_.profile; cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; cfg_.g_bit_depth = test_video_param_.bit_depth; init_flags_ = VPX_CODEC_USE_PSNR; if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; std::unique_ptr video; if (is_extension_y4m(test_video_param_.filename)) { video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, kFrames)); } else { video.reset(new libvpx_test::YUVVideoSource( test_video_param_.filename, test_video_param_.fmt, test_video_param_.width, test_video_param_.height, test_video_param_.framerate_num, test_video_param_.framerate_den, 0, kFrames)); } ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); const int min_run = GetMinVisibleRun(); const int min_arf_dist_requested = GetMinArfDistanceRequested(); if (min_run != ARF_NOT_SEEN && min_run != ARF_SEEN_ONCE) { const int min_arf_dist = min_run + 1; EXPECT_GE(min_arf_dist, min_arf_dist_requested); } } VP9_INSTANTIATE_TEST_CASE(ArfFreqTest, ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kEncodeVectors), ::testing::ValuesIn(kMinArfVectors)); } // namespace libvpx-1.8.2/test/vp9_block_error_test.cc000066400000000000000000000146421357355204000204670ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" using libvpx_test::ACMRandom; namespace { const int kNumIterations = 1000; typedef int64_t (*HBDBlockErrorFunc)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bps); typedef std::tuple BlockErrorParam; typedef int64_t (*BlockErrorFunc)(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz); template int64_t BlockError8BitWrapper(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bps) { EXPECT_EQ(bps, 8); return fn(coeff, dqcoeff, block_size, ssz); } class BlockErrorTest : public ::testing::TestWithParam { public: virtual ~BlockErrorTest() {} virtual void SetUp() { error_block_op_ = GET_PARAM(0); ref_error_block_op_ = GET_PARAM(1); bit_depth_ = GET_PARAM(2); } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: vpx_bit_depth_t bit_depth_; HBDBlockErrorFunc error_block_op_; HBDBlockErrorFunc ref_error_block_op_; }; TEST_P(BlockErrorTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); int err_count_total = 0; int first_failure = -1; intptr_t block_size; int64_t ssz; int64_t ret; int64_t ref_ssz; int64_t ref_ret; const int msb = bit_depth_ + 8 - 1; for (int i = 0; i < kNumIterations; ++i) { int err_count = 0; block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 for (int j = 0; j < block_size; j++) { // coeff and dqcoeff will always have at least the same sign, and this // can be used for optimization, so generate test input precisely. if (rnd(2)) { // Positive number coeff[j] = rnd(1 << msb); dqcoeff[j] = rnd(1 << msb); } else { // Negative number coeff[j] = -rnd(1 << msb); dqcoeff[j] = -rnd(1 << msb); } } ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); ASM_REGISTER_STATE_CHECK( ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); err_count += (ref_ret != ret) | (ref_ssz != ssz); if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Error Block Test, C output doesn't match optimized output. " << "First failed at test case " << first_failure; } TEST_P(BlockErrorTest, ExtremeValues) { ACMRandom rnd(ACMRandom::DeterministicSeed()); DECLARE_ALIGNED(16, tran_low_t, coeff[4096]); DECLARE_ALIGNED(16, tran_low_t, dqcoeff[4096]); int err_count_total = 0; int first_failure = -1; intptr_t block_size; int64_t ssz; int64_t ret; int64_t ref_ssz; int64_t ref_ret; const int msb = bit_depth_ + 8 - 1; int max_val = ((1 << msb) - 1); for (int i = 0; i < kNumIterations; ++i) { int err_count = 0; int k = (i / 9) % 9; // Change the maximum coeff value, to test different bit boundaries if (k == 8 && (i % 9) == 0) { max_val >>= 1; } block_size = 16 << (i % 9); // All block sizes from 4x4, 8x4 ..64x64 for (int j = 0; j < block_size; j++) { if (k < 4) { // Test at positive maximum values coeff[j] = k % 2 ? max_val : 0; dqcoeff[j] = (k >> 1) % 2 ? max_val : 0; } else if (k < 8) { // Test at negative maximum values coeff[j] = k % 2 ? -max_val : 0; dqcoeff[j] = (k >> 1) % 2 ? -max_val : 0; } else { if (rnd(2)) { // Positive number coeff[j] = rnd(1 << 14); dqcoeff[j] = rnd(1 << 14); } else { // Negative number coeff[j] = -rnd(1 << 14); dqcoeff[j] = -rnd(1 << 14); } } } ref_ret = ref_error_block_op_(coeff, dqcoeff, block_size, &ref_ssz, bit_depth_); ASM_REGISTER_STATE_CHECK( ret = error_block_op_(coeff, dqcoeff, block_size, &ssz, bit_depth_)); err_count += (ref_ret != ret) | (ref_ssz != ssz); if (err_count && !err_count_total) { first_failure = i; } err_count_total += err_count; } EXPECT_EQ(0, err_count_total) << "Error: Error Block Test, C output doesn't match optimized output. " << "First failed at test case " << first_failure; } using std::make_tuple; #if HAVE_SSE2 const BlockErrorParam sse2_block_error_tests[] = { #if CONFIG_VP9_HIGHBITDEPTH make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c, VPX_BITS_10), make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c, VPX_BITS_12), make_tuple(&vp9_highbd_block_error_sse2, &vp9_highbd_block_error_c, VPX_BITS_8), #endif // CONFIG_VP9_HIGHBITDEPTH make_tuple(&BlockError8BitWrapper, &BlockError8BitWrapper, VPX_BITS_8) }; INSTANTIATE_TEST_CASE_P(SSE2, BlockErrorTest, ::testing::ValuesIn(sse2_block_error_tests)); #endif // HAVE_SSE2 #if HAVE_AVX2 INSTANTIATE_TEST_CASE_P( AVX2, BlockErrorTest, ::testing::Values(make_tuple(&BlockError8BitWrapper, &BlockError8BitWrapper, VPX_BITS_8))); #endif // HAVE_AVX2 } // namespace libvpx-1.8.2/test/vp9_boolcoder_test.cc000066400000000000000000000057501357355204000201340ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/bitreader.h" #include "vpx_dsp/bitwriter.h" using libvpx_test::ACMRandom; namespace { const int num_tests = 10; } // namespace TEST(VP9, TestBitIO) { ACMRandom rnd(ACMRandom::DeterministicSeed()); for (int n = 0; n < num_tests; ++n) { for (int method = 0; method <= 7; ++method) { // we generate various proba const int kBitsToTest = 1000; uint8_t probas[kBitsToTest]; for (int i = 0; i < kBitsToTest; ++i) { const int parity = i & 1; /* clang-format off */ probas[i] = (method == 0) ? 0 : (method == 1) ? 255 : (method == 2) ? 128 : (method == 3) ? rnd.Rand8() : (method == 4) ? (parity ? 0 : 255) : // alternate between low and high proba: (method == 5) ? (parity ? rnd(128) : 255 - rnd(128)) : (method == 6) ? (parity ? rnd(64) : 255 - rnd(64)) : (parity ? rnd(32) : 255 - rnd(32)); /* clang-format on */ } for (int bit_method = 0; bit_method <= 3; ++bit_method) { const int random_seed = 6432; const int kBufferSize = 10000; ACMRandom bit_rnd(random_seed); vpx_writer bw; uint8_t bw_buffer[kBufferSize]; vpx_start_encode(&bw, bw_buffer); int bit = (bit_method == 0) ? 0 : (bit_method == 1) ? 1 : 0; for (int i = 0; i < kBitsToTest; ++i) { if (bit_method == 2) { bit = (i & 1); } else if (bit_method == 3) { bit = bit_rnd(2); } vpx_write(&bw, bit, static_cast(probas[i])); } vpx_stop_encode(&bw); // vpx_reader_fill() may read into uninitialized data that // isn't used meaningfully, but may trigger an MSan warning. memset(bw_buffer + bw.pos, 0, sizeof(BD_VALUE) - 1); // First bit should be zero GTEST_ASSERT_EQ(bw_buffer[0] & 0x80, 0); vpx_reader br; vpx_reader_init(&br, bw_buffer, kBufferSize, NULL, NULL); bit_rnd.Reset(random_seed); for (int i = 0; i < kBitsToTest; ++i) { if (bit_method == 2) { bit = (i & 1); } else if (bit_method == 3) { bit = bit_rnd(2); } GTEST_ASSERT_EQ(vpx_read(&br, probas[i]), bit) << "pos: " << i << " / " << kBitsToTest << " bit_method: " << bit_method << " method: " << method; } } } } } libvpx-1.8.2/test/vp9_datarate_test.cc000066400000000000000000000776371357355204000177660ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vpx/vpx_codec.h" #include "vpx_ports/bitops.h" namespace { class DatarateTestVP9 : public ::libvpx_test::EncoderTest { public: explicit DatarateTestVP9(const ::libvpx_test::CodecFactory *codec) : EncoderTest(codec) { tune_content_ = 0; } protected: virtual ~DatarateTestVP9() {} virtual void ResetModel() { last_pts_ = 0; bits_in_buffer_model_ = cfg_.rc_target_bitrate * cfg_.rc_buf_initial_sz; frame_number_ = 0; tot_frame_number_ = 0; first_drop_ = 0; num_drops_ = 0; aq_mode_ = 3; // Denoiser is off by default. denoiser_on_ = 0; // For testing up to 3 layers. for (int i = 0; i < 3; ++i) { bits_total_[i] = 0; } denoiser_offon_test_ = 0; denoiser_offon_period_ = -1; frame_parallel_decoding_mode_ = 1; use_roi_ = false; } // // Frame flags and layer id for temporal layers. // // For two layers, test pattern is: // 1 3 // 0 2 ..... // For three layers, test pattern is: // 1 3 5 7 // 2 6 // 0 4 .... // LAST is always update on base/layer 0, GOLDEN is updated on layer 1. // For this 3 layer example, the 2nd enhancement layer (layer 2) updates // the altref frame. static int GetFrameFlags(int frame_num, int num_temp_layers) { int frame_flags = 0; if (num_temp_layers == 2) { if (frame_num % 2 == 0) { // Layer 0: predict from L and ARF, update L. frame_flags = VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF; } else { // Layer 1: predict from L, G and ARF, and update G. frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_ENTROPY; } } else if (num_temp_layers == 3) { if (frame_num % 4 == 0) { // Layer 0: predict from L and ARF; update L. frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_REF_GF; } else if ((frame_num - 2) % 4 == 0) { // Layer 1: predict from L, G, ARF; update G. frame_flags = VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_NO_UPD_LAST; } else if ((frame_num - 1) % 2 == 0) { // Layer 2: predict from L, G, ARF; update ARF. frame_flags = VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_LAST; } } return frame_flags; } static int SetLayerId(int frame_num, int num_temp_layers) { int layer_id = 0; if (num_temp_layers == 2) { if (frame_num % 2 == 0) { layer_id = 0; } else { layer_id = 1; } } else if (num_temp_layers == 3) { if (frame_num % 4 == 0) { layer_id = 0; } else if ((frame_num - 2) % 4 == 0) { layer_id = 1; } else if ((frame_num - 1) % 2 == 0) { layer_id = 2; } } return layer_id; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP9E_SET_AQ_MODE, aq_mode_); encoder->Control(VP9E_SET_TUNE_CONTENT, tune_content_); } if (denoiser_offon_test_) { ASSERT_GT(denoiser_offon_period_, 0) << "denoiser_offon_period_ is not positive."; if ((video->frame() + 1) % denoiser_offon_period_ == 0) { // Flip denoiser_on_ periodically denoiser_on_ ^= 1; } } encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); encoder->Control(VP9E_SET_TILE_COLUMNS, get_msb(cfg_.g_threads)); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, frame_parallel_decoding_mode_); if (use_roi_) { encoder->Control(VP9E_SET_ROI_MAP, &roi_); encoder->Control(VP9E_SET_AQ_MODE, 0); } if (cfg_.ts_number_layers > 1) { if (video->frame() == 0) { encoder->Control(VP9E_SET_SVC, 1); } vpx_svc_layer_id_t layer_id; layer_id.spatial_layer_id = 0; frame_flags_ = GetFrameFlags(video->frame(), cfg_.ts_number_layers); layer_id.temporal_layer_id = SetLayerId(video->frame(), cfg_.ts_number_layers); layer_id.temporal_layer_id_per_spatial[0] = SetLayerId(video->frame(), cfg_.ts_number_layers); encoder->Control(VP9E_SET_SVC_LAYER_ID, &layer_id); } const vpx_rational_t tb = video->timebase(); timebase_ = static_cast(tb.num) / tb.den; duration_ = 0; } virtual void FramePktHook(const vpx_codec_cx_pkt_t *pkt) { // Time since last timestamp = duration. vpx_codec_pts_t duration = pkt->data.frame.pts - last_pts_; if (duration > 1) { // If first drop not set and we have a drop set it to this time. if (!first_drop_) first_drop_ = last_pts_ + 1; // Update the number of frame drops. num_drops_ += static_cast(duration - 1); // Update counter for total number of frames (#frames input to encoder). // Needed for setting the proper layer_id below. tot_frame_number_ += static_cast(duration - 1); } int layer = SetLayerId(tot_frame_number_, cfg_.ts_number_layers); // Add to the buffer the bits we'd expect from a constant bitrate server. bits_in_buffer_model_ += static_cast( duration * timebase_ * cfg_.rc_target_bitrate * 1000); // Buffer should not go negative. ASSERT_GE(bits_in_buffer_model_, 0) << "Buffer Underrun at frame " << pkt->data.frame.pts; const size_t frame_size_in_bits = pkt->data.frame.sz * 8; // Update the total encoded bits. For temporal layers, update the cumulative // encoded bits per layer. for (int i = layer; i < static_cast(cfg_.ts_number_layers); ++i) { bits_total_[i] += frame_size_in_bits; } // Update the most recent pts. last_pts_ = pkt->data.frame.pts; ++frame_number_; ++tot_frame_number_; } virtual void EndPassHook(void) { for (int layer = 0; layer < static_cast(cfg_.ts_number_layers); ++layer) { duration_ = (last_pts_ + 1) * timebase_; if (bits_total_[layer]) { // Effective file datarate: effective_datarate_[layer] = (bits_total_[layer] / 1000.0) / duration_; } } } vpx_codec_pts_t last_pts_; double timebase_; int tune_content_; int frame_number_; // Counter for number of non-dropped/encoded frames. int tot_frame_number_; // Counter for total number of input frames. int64_t bits_total_[3]; double duration_; double effective_datarate_[3]; int set_cpu_used_; int64_t bits_in_buffer_model_; vpx_codec_pts_t first_drop_; int num_drops_; int aq_mode_; int denoiser_on_; int denoiser_offon_test_; int denoiser_offon_period_; int frame_parallel_decoding_mode_; bool use_roi_; vpx_roi_map_t roi_; }; // Params: test mode, speed setting and index for bitrate array. class DatarateTestVP9RealTimeMultiBR : public DatarateTestVP9, public ::libvpx_test::CodecTestWith2Params { public: DatarateTestVP9RealTimeMultiBR() : DatarateTestVP9(GET_PARAM(0)) {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); set_cpu_used_ = GET_PARAM(1); ResetModel(); } }; // Params: speed setting and index for bitrate array. class DatarateTestVP9LargeVBR : public DatarateTestVP9, public ::libvpx_test::CodecTestWith2Params { public: DatarateTestVP9LargeVBR() : DatarateTestVP9(GET_PARAM(0)) {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); set_cpu_used_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for VBR mode with 0 lag. TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagZero) { cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_error_resilient = 0; cfg_.rc_end_usage = VPX_VBR; cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); const int bitrates[2] = { 400, 800 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.36) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for VBR mode with non-zero lag. TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagNonZero) { cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_error_resilient = 0; cfg_.rc_end_usage = VPX_VBR; // For non-zero lag, rate control will work (be within bounds) for // real-time mode. if (deadline_ == VPX_DL_REALTIME) { cfg_.g_lag_in_frames = 15; } else { cfg_.g_lag_in_frames = 0; } ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); const int bitrates[2] = { 400, 800 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for VBR mode with non-zero lag, with // frame_parallel_decoding_mode off. This enables the adapt_coeff/mode/mv probs // since error_resilience is off. TEST_P(DatarateTestVP9LargeVBR, BasicRateTargetingVBRLagNonZeroFrameParDecOff) { cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.g_error_resilient = 0; cfg_.rc_end_usage = VPX_VBR; // For non-zero lag, rate control will work (be within bounds) for // real-time mode. if (deadline_ == VPX_DL_REALTIME) { cfg_.g_lag_in_frames = 15; } else { cfg_.g_lag_in_frames = 0; } ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); const int bitrates[2] = { 400, 800 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); frame_parallel_decoding_mode_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.75) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.35) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for CBR mode. TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); const int bitrates[4] = { 150, 350, 550, 750 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for CBR mode, with frame_parallel_decoding_mode // off( and error_resilience off). TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargetingFrameParDecOff) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_error_resilient = 0; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); const int bitrates[4] = { 150, 350, 550, 750 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); frame_parallel_decoding_mode_ = 0; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for CBR. TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting444) { ::libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 140); cfg_.g_profile = 1; cfg_.g_timebase = video.timebase(); cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; const int bitrates[4] = { 250, 450, 650, 850 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(static_cast(cfg_.rc_target_bitrate), effective_datarate_[0] * 0.80) << " The datarate for the file exceeds the target by too much!"; ASSERT_LE(static_cast(cfg_.rc_target_bitrate), effective_datarate_[0] * 1.15) << " The datarate for the file missed the target!" << cfg_.rc_target_bitrate << " " << effective_datarate_; } // Check that (1) the first dropped frame gets earlier and earlier // as the drop frame threshold is increased, and (2) that the total number of // frame drops does not decrease as we increase frame drop threshold. // Use a lower qp-max to force some frame drops. TEST_P(DatarateTestVP9RealTimeMultiBR, ChangingDropFrameThresh) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_undershoot_pct = 20; cfg_.rc_undershoot_pct = 20; cfg_.rc_dropframe_thresh = 10; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 50; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_target_bitrate = 200; cfg_.g_lag_in_frames = 0; // TODO(marpan): Investigate datarate target failures with a smaller keyframe // interval (128). cfg_.kf_max_dist = 9999; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); const int kDropFrameThreshTestStep = 30; const int bitrates[2] = { 50, 150 }; const int bitrate_index = GET_PARAM(2); if (bitrate_index > 1) return; cfg_.rc_target_bitrate = bitrates[bitrate_index]; vpx_codec_pts_t last_drop = 140; int last_num_drops = 0; for (int i = 10; i < 100; i += kDropFrameThreshTestStep) { cfg_.rc_dropframe_thresh = i; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.25) << " The datarate for the file is greater than target by too much!"; ASSERT_LE(first_drop_, last_drop) << " The first dropped frame for drop_thresh " << i << " > first dropped frame for drop_thresh " << i - kDropFrameThreshTestStep; ASSERT_GE(num_drops_, last_num_drops * 0.85) << " The number of dropped frames for drop_thresh " << i << " < number of dropped frames for drop_thresh " << i - kDropFrameThreshTestStep; last_drop = first_drop_; last_num_drops = num_drops_; } } // namespace // Check basic rate targeting for 2 temporal layers. TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting2TemporalLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; // 2 Temporal layers, no spatial layers: Framerate decimation (2, 1). cfg_.ss_number_layers = 1; cfg_.ts_number_layers = 2; cfg_.ts_rate_decimator[0] = 2; cfg_.ts_rate_decimator[1] = 1; cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); const int bitrates[4] = { 200, 400, 600, 800 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); // 60-40 bitrate allocation for 2 temporal layers. cfg_.layer_target_bitrate[0] = 60 * cfg_.rc_target_bitrate / 100; cfg_.layer_target_bitrate[1] = cfg_.rc_target_bitrate; aq_mode_ = 0; if (deadline_ == VPX_DL_REALTIME) { aq_mode_ = 3; cfg_.g_error_resilient = 1; } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) << " The datarate for the file is lower than target by too much, " "for layer: " << j; ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.15) << " The datarate for the file is greater than target by too much, " "for layer: " << j; } } // Check basic rate targeting for 3 temporal layers. TEST_P(DatarateTestVP9RealTimeMultiBR, BasicRateTargeting3TemporalLayers) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). cfg_.ss_number_layers = 1; cfg_.ts_number_layers = 3; cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); const int bitrates[4] = { 200, 400, 600, 800 }; const int bitrate_index = GET_PARAM(2); cfg_.rc_target_bitrate = bitrates[bitrate_index]; ResetModel(); // 40-20-40 bitrate allocation for 3 temporal layers. cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; aq_mode_ = 0; if (deadline_ == VPX_DL_REALTIME) { aq_mode_ = 3; cfg_.g_error_resilient = 1; } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { // TODO(yaowu): Work out more stable rc control strategy and // Adjust the thresholds to be tighter than .75. ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.75) << " The datarate for the file is lower than target by too much, " "for layer: " << j; // TODO(yaowu): Work out more stable rc control strategy and // Adjust the thresholds to be tighter than 1.25. ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.25) << " The datarate for the file is greater than target by too much, " "for layer: " << j; } } // Params: speed setting. class DatarateTestVP9RealTime : public DatarateTestVP9, public ::libvpx_test::CodecTestWithParam { public: DatarateTestVP9RealTime() : DatarateTestVP9(GET_PARAM(0)) {} virtual ~DatarateTestVP9RealTime() {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); set_cpu_used_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for CBR mode, with 2 threads and dropped frames. TEST_P(DatarateTestVP9RealTime, BasicRateTargetingDropFramesMultiThreads) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 30; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; // Encode using multiple threads. cfg_.g_threads = 2; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); cfg_.rc_target_bitrate = 200; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } // Check basic rate targeting for 3 temporal layers, with frame dropping. // Only for one (low) bitrate with lower max_quantizer, and somewhat higher // frame drop threshold, to force frame dropping. TEST_P(DatarateTestVP9RealTime, BasicRateTargeting3TemporalLayersFrameDropping) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; // Set frame drop threshold and rc_max_quantizer to force some frame drops. cfg_.rc_dropframe_thresh = 20; cfg_.rc_max_quantizer = 45; cfg_.rc_min_quantizer = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; // 3 Temporal layers, no spatial layers: Framerate decimation (4, 2, 1). cfg_.ss_number_layers = 1; cfg_.ts_number_layers = 3; cfg_.ts_rate_decimator[0] = 4; cfg_.ts_rate_decimator[1] = 2; cfg_.ts_rate_decimator[2] = 1; cfg_.temporal_layering_mode = VP9E_TEMPORAL_LAYERING_MODE_BYPASS; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); cfg_.rc_target_bitrate = 200; ResetModel(); // 40-20-40 bitrate allocation for 3 temporal layers. cfg_.layer_target_bitrate[0] = 40 * cfg_.rc_target_bitrate / 100; cfg_.layer_target_bitrate[1] = 60 * cfg_.rc_target_bitrate / 100; cfg_.layer_target_bitrate[2] = cfg_.rc_target_bitrate; aq_mode_ = 0; if (deadline_ == VPX_DL_REALTIME) { aq_mode_ = 3; cfg_.g_error_resilient = 1; } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); for (int j = 0; j < static_cast(cfg_.ts_number_layers); ++j) { ASSERT_GE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 0.85) << " The datarate for the file is lower than target by too much, " "for layer: " << j; ASSERT_LE(effective_datarate_[j], cfg_.layer_target_bitrate[j] * 1.20) << " The datarate for the file is greater than target by too much, " "for layer: " << j; // Expect some frame drops in this test: for this 200 frames test, // expect at least 10% and not more than 60% drops. ASSERT_GE(num_drops_, 20); ASSERT_LE(num_drops_, 280); } } // Check VP9 region of interest feature. TEST_P(DatarateTestVP9RealTime, RegionOfInterest) { if (deadline_ != VPX_DL_REALTIME || set_cpu_used_ < 5) return; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 0; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 63; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); cfg_.rc_target_bitrate = 450; cfg_.g_w = 640; cfg_.g_h = 480; ResetModel(); // Set ROI parameters use_roi_ = true; memset(&roi_, 0, sizeof(roi_)); roi_.rows = (cfg_.g_h + 7) / 8; roi_.cols = (cfg_.g_w + 7) / 8; roi_.delta_q[1] = -20; roi_.delta_lf[1] = -20; memset(roi_.ref_frame, -1, sizeof(roi_.ref_frame)); roi_.ref_frame[1] = 1; // Use 2 states: 1 is center square, 0 is the rest. roi_.roi_map = reinterpret_cast( calloc(roi_.rows * roi_.cols, sizeof(*roi_.roi_map))); ASSERT_TRUE(roi_.roi_map != NULL); for (unsigned int i = 0; i < roi_.rows; ++i) { for (unsigned int j = 0; j < roi_.cols; ++j) { if (i > (roi_.rows >> 2) && i < ((roi_.rows * 3) >> 2) && j > (roi_.cols >> 2) && j < ((roi_.cols * 3) >> 2)) { roi_.roi_map[i * roi_.cols + j] = 1; } } } ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(cfg_.rc_target_bitrate, effective_datarate_[0] * 0.90) << " The datarate for the file exceeds the target!"; ASSERT_LE(cfg_.rc_target_bitrate, effective_datarate_[0] * 1.4) << " The datarate for the file missed the target!"; free(roi_.roi_map); } // Params: test mode, speed setting and index for bitrate array. class DatarateTestVP9PostEncodeDrop : public DatarateTestVP9, public ::libvpx_test::CodecTestWithParam { public: DatarateTestVP9PostEncodeDrop() : DatarateTestVP9(GET_PARAM(0)) {} protected: virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); set_cpu_used_ = GET_PARAM(1); ResetModel(); } }; // Check basic rate targeting for CBR mode, with 2 threads and dropped frames. TEST_P(DatarateTestVP9PostEncodeDrop, PostEncodeDropScreenContent) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 30; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; // Encode using multiple threads. cfg_.g_threads = 2; cfg_.g_error_resilient = 0; tune_content_ = 1; ::libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, 30, 1, 0, 300); cfg_.rc_target_bitrate = 300; ResetModel(); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } #if CONFIG_VP9_TEMPORAL_DENOISING // Params: speed setting. class DatarateTestVP9RealTimeDenoiser : public DatarateTestVP9RealTime { public: virtual ~DatarateTestVP9RealTimeDenoiser() {} }; // Check basic datarate targeting, for a single bitrate, when denoiser is on. TEST_P(DatarateTestVP9RealTimeDenoiser, LowNoise) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), // there is only one denoiser mode: denoiserYonly(which is 1), // but may add more modes in the future. cfg_.rc_target_bitrate = 400; ResetModel(); // Turn on the denoiser. denoiser_on_ = 1; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } // Check basic datarate targeting, for a single bitrate, when denoiser is on, // for clip with high noise level. Use 2 threads. TEST_P(DatarateTestVP9RealTimeDenoiser, HighNoise) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_threads = 2; ::libvpx_test::Y4mVideoSource video("noisy_clip_640_360.y4m", 0, 200); // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), // there is only one denoiser mode: kDenoiserOnYOnly(which is 1), // but may add more modes in the future. cfg_.rc_target_bitrate = 1000; ResetModel(); // Turn on the denoiser. denoiser_on_ = 1; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } // Check basic datarate targeting, for a single bitrate, when denoiser is on, // for 1280x720 clip with 4 threads. TEST_P(DatarateTestVP9RealTimeDenoiser, 4threads) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; cfg_.g_threads = 4; ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 300); // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), // there is only one denoiser mode: denoiserYonly(which is 1), // but may add more modes in the future. cfg_.rc_target_bitrate = 1000; ResetModel(); // Turn on the denoiser. denoiser_on_ = 1; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.29) << " The datarate for the file is greater than target by too much!"; } // Check basic datarate targeting, for a single bitrate, when denoiser is off // and on. TEST_P(DatarateTestVP9RealTimeDenoiser, DenoiserOffOn) { cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 500; cfg_.rc_buf_sz = 1000; cfg_.rc_dropframe_thresh = 1; cfg_.rc_min_quantizer = 2; cfg_.rc_max_quantizer = 56; cfg_.rc_end_usage = VPX_CBR; cfg_.g_lag_in_frames = 0; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); // For the temporal denoiser (#if CONFIG_VP9_TEMPORAL_DENOISING), // there is only one denoiser mode: denoiserYonly(which is 1), // but may add more modes in the future. cfg_.rc_target_bitrate = 400; ResetModel(); // The denoiser is off by default. denoiser_on_ = 0; // Set the offon test flag. denoiser_offon_test_ = 1; denoiser_offon_period_ = 100; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); ASSERT_GE(effective_datarate_[0], cfg_.rc_target_bitrate * 0.85) << " The datarate for the file is lower than target by too much!"; ASSERT_LE(effective_datarate_[0], cfg_.rc_target_bitrate * 1.15) << " The datarate for the file is greater than target by too much!"; } #endif // CONFIG_VP9_TEMPORAL_DENOISING VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTimeMultiBR, ::testing::Range(5, 10), ::testing::Range(0, 4)); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9LargeVBR, ::testing::Range(5, 9), ::testing::Range(0, 2)); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTime, ::testing::Range(5, 10)); VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9PostEncodeDrop, ::testing::Range(5, 6)); #if CONFIG_VP9_TEMPORAL_DENOISING VP9_INSTANTIATE_TEST_CASE(DatarateTestVP9RealTimeDenoiser, ::testing::Range(5, 10)); #endif } // namespace libvpx-1.8.2/test/vp9_decrypt_test.cc000066400000000000000000000044011357355204000176260ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/ivf_video_source.h" namespace { // In a real use the 'decrypt_state' parameter will be a pointer to a struct // with whatever internal state the decryptor uses. For testing we'll just // xor with a constant key, and decrypt_state will point to the start of // the original buffer. const uint8_t test_key[16] = { 0x01, 0x12, 0x23, 0x34, 0x45, 0x56, 0x67, 0x78, 0x89, 0x9a, 0xab, 0xbc, 0xcd, 0xde, 0xef, 0xf0 }; void encrypt_buffer(const uint8_t *src, uint8_t *dst, size_t size, ptrdiff_t offset) { for (size_t i = 0; i < size; ++i) { dst[i] = src[i] ^ test_key[(offset + i) & 15]; } } void test_decrypt_cb(void *decrypt_state, const uint8_t *input, uint8_t *output, int count) { encrypt_buffer(input, output, count, input - reinterpret_cast(decrypt_state)); } } // namespace namespace libvpx_test { TEST(TestDecrypt, DecryptWorksVp9) { libvpx_test::IVFVideoSource video("vp90-2-05-resize.ivf"); video.Init(); vpx_codec_dec_cfg_t dec_cfg = vpx_codec_dec_cfg_t(); VP9Decoder decoder(dec_cfg, 0); video.Begin(); // no decryption vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size()); ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); // decrypt frame video.Next(); std::vector encrypted(video.frame_size()); encrypt_buffer(video.cxdata(), &encrypted[0], video.frame_size(), 0); vpx_decrypt_init di = { test_decrypt_cb, &encrypted[0] }; decoder.Control(VPXD_SET_DECRYPTOR, &di); res = decoder.DecodeFrame(&encrypted[0], encrypted.size()); ASSERT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); } } // namespace libvpx_test libvpx-1.8.2/test/vp9_denoiser_test.cc000066400000000000000000000124731357355204000177740ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" using libvpx_test::ACMRandom; namespace { const int kNumPixels = 64 * 64; typedef int (*Vp9DenoiserFilterFunc)(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude); typedef std::tuple VP9DenoiserTestParam; class VP9DenoiserTest : public ::testing::Test, public ::testing::WithParamInterface { public: virtual ~VP9DenoiserTest() {} virtual void SetUp() { bs_ = GET_PARAM(1); } virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: BLOCK_SIZE bs_; }; TEST_P(VP9DenoiserTest, BitexactCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int count_test_block = 4000; // Allocate the space for input and output, // where sig_block is the block to be denoised, // mc_avg_block is the denoised reference block, // avg_block_c is the denoised result from C code, // avg_block_sse2 is the denoised result from SSE2 code. DECLARE_ALIGNED(16, uint8_t, sig_block[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, mc_avg_block[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, avg_block_c[kNumPixels]); DECLARE_ALIGNED(16, uint8_t, avg_block_sse2[kNumPixels]); for (int i = 0; i < count_test_block; ++i) { // Generate random motion magnitude, 20% of which exceed the threshold. const int motion_magnitude_random = rnd.Rand8() % static_cast(MOTION_MAGNITUDE_THRESHOLD * 1.2); // Initialize a test block with random number in range [0, 255]. for (int j = 0; j < kNumPixels; ++j) { int temp = 0; sig_block[j] = rnd.Rand8(); // The pixels in mc_avg_block are generated by adding a random // number in range [-19, 19] to corresponding pixels in sig_block. temp = sig_block[j] + ((rnd.Rand8() % 2 == 0) ? -1 : 1) * (rnd.Rand8() % 20); // Clip. mc_avg_block[j] = (temp < 0) ? 0 : ((temp > 255) ? 255 : temp); } ASM_REGISTER_STATE_CHECK(vp9_denoiser_filter_c(sig_block, 64, mc_avg_block, 64, avg_block_c, 64, 0, bs_, motion_magnitude_random)); ASM_REGISTER_STATE_CHECK(GET_PARAM(0)(sig_block, 64, mc_avg_block, 64, avg_block_sse2, 64, 0, bs_, motion_magnitude_random)); // Test bitexactness. for (int h = 0; h < (4 << b_height_log2_lookup[bs_]); ++h) { for (int w = 0; w < (4 << b_width_log2_lookup[bs_]); ++w) { EXPECT_EQ(avg_block_c[h * 64 + w], avg_block_sse2[h * 64 + w]); } } } } using std::make_tuple; // Test for all block size. #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, VP9DenoiserTest, ::testing::Values(make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X8), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_8X16), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X8), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X16), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_16X32), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X16), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X32), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_32X64), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X32), make_tuple(&vp9_denoiser_filter_sse2, BLOCK_64X64))); #endif // HAVE_SSE2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, VP9DenoiserTest, ::testing::Values(make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X8), make_tuple(&vp9_denoiser_filter_neon, BLOCK_8X16), make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X8), make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X16), make_tuple(&vp9_denoiser_filter_neon, BLOCK_16X32), make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X16), make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X32), make_tuple(&vp9_denoiser_filter_neon, BLOCK_32X64), make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X32), make_tuple(&vp9_denoiser_filter_neon, BLOCK_64X64))); #endif } // namespace libvpx-1.8.2/test/vp9_encoder_parms_get_to_decoder.cc000066400000000000000000000125521357355204000227720ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vp9/vp9_dx_iface.h" namespace { const int kCpuUsed = 2; struct EncodePerfTestVideo { const char *name; uint32_t width; uint32_t height; uint32_t bitrate; int frames; }; const EncodePerfTestVideo kVP9EncodePerfTestVectors[] = { { "niklas_1280_720_30.y4m", 1280, 720, 600, 10 }, }; struct EncodeParameters { int32_t tile_rows; int32_t tile_cols; int32_t lossless; int32_t error_resilient; int32_t frame_parallel; vpx_color_range_t color_range; vpx_color_space_t cs; int render_size[2]; // TODO(JBB): quantizers / bitrate }; const EncodeParameters kVP9EncodeParameterSet[] = { { 0, 0, 0, 1, 0, VPX_CR_STUDIO_RANGE, VPX_CS_BT_601, { 0, 0 } }, { 0, 0, 0, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_709, { 0, 0 } }, { 0, 0, 1, 0, 0, VPX_CR_FULL_RANGE, VPX_CS_BT_2020, { 0, 0 } }, { 0, 2, 0, 0, 1, VPX_CR_STUDIO_RANGE, VPX_CS_UNKNOWN, { 640, 480 } }, // TODO(JBB): Test profiles (requires more work). }; class VpxEncoderParmsGetToDecoder : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: VpxEncoderParmsGetToDecoder() : EncoderTest(GET_PARAM(0)), encode_parms(GET_PARAM(1)) {} virtual ~VpxEncoderParmsGetToDecoder() {} virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kTwoPassGood); cfg_.g_lag_in_frames = 25; cfg_.g_error_resilient = encode_parms.error_resilient; dec_cfg_.threads = 4; test_video_ = GET_PARAM(2); cfg_.rc_target_bitrate = test_video_.bitrate; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP9E_SET_COLOR_SPACE, encode_parms.cs); encoder->Control(VP9E_SET_COLOR_RANGE, encode_parms.color_range); encoder->Control(VP9E_SET_LOSSLESS, encode_parms.lossless); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, encode_parms.frame_parallel); encoder->Control(VP9E_SET_TILE_ROWS, encode_parms.tile_rows); encoder->Control(VP9E_SET_TILE_COLUMNS, encode_parms.tile_cols); encoder->Control(VP8E_SET_CPUUSED, kCpuUsed); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { encoder->Control(VP9E_SET_RENDER_SIZE, encode_parms.render_size); } } } virtual bool HandleDecodeResult(const vpx_codec_err_t res_dec, const libvpx_test::VideoSource & /*video*/, libvpx_test::Decoder *decoder) { vpx_codec_ctx_t *const vp9_decoder = decoder->GetDecoder(); vpx_codec_alg_priv_t *const priv = reinterpret_cast(vp9_decoder->priv); VP9_COMMON *const common = &priv->pbi->common; if (encode_parms.lossless) { EXPECT_EQ(0, common->base_qindex); EXPECT_EQ(0, common->y_dc_delta_q); EXPECT_EQ(0, common->uv_dc_delta_q); EXPECT_EQ(0, common->uv_ac_delta_q); EXPECT_EQ(ONLY_4X4, common->tx_mode); } EXPECT_EQ(encode_parms.error_resilient, common->error_resilient_mode); if (encode_parms.error_resilient) { EXPECT_EQ(1, common->frame_parallel_decoding_mode); EXPECT_EQ(0, common->use_prev_frame_mvs); } else { EXPECT_EQ(encode_parms.frame_parallel, common->frame_parallel_decoding_mode); } EXPECT_EQ(encode_parms.color_range, common->color_range); EXPECT_EQ(encode_parms.cs, common->color_space); if (encode_parms.render_size[0] > 0 && encode_parms.render_size[1] > 0) { EXPECT_EQ(encode_parms.render_size[0], common->render_width); EXPECT_EQ(encode_parms.render_size[1], common->render_height); } EXPECT_EQ(encode_parms.tile_cols, common->log2_tile_cols); EXPECT_EQ(encode_parms.tile_rows, common->log2_tile_rows); EXPECT_EQ(VPX_CODEC_OK, res_dec) << decoder->DecodeError(); return VPX_CODEC_OK == res_dec; } EncodePerfTestVideo test_video_; private: EncodeParameters encode_parms; }; TEST_P(VpxEncoderParmsGetToDecoder, BitstreamParms) { init_flags_ = VPX_CODEC_USE_PSNR; std::unique_ptr video( new libvpx_test::Y4mVideoSource(test_video_.name, 0, test_video_.frames)); ASSERT_TRUE(video.get() != NULL); ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); } VP9_INSTANTIATE_TEST_CASE(VpxEncoderParmsGetToDecoder, ::testing::ValuesIn(kVP9EncodeParameterSet), ::testing::ValuesIn(kVP9EncodePerfTestVectors)); } // namespace libvpx-1.8.2/test/vp9_end_to_end_test.cc000066400000000000000000000247261357355204000202660ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "memory" #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "test/yuv_video_source.h" namespace { const unsigned int kWidth = 160; const unsigned int kHeight = 90; const unsigned int kFramerate = 50; const unsigned int kFrames = 20; const int kBitrate = 500; // List of psnr thresholds for speed settings 0-7 and 5 encoding modes const double kPsnrThreshold[][5] = { { 36.0, 37.0, 37.0, 37.0, 37.0 }, { 35.0, 36.0, 36.0, 36.0, 36.0 }, { 34.0, 35.0, 35.0, 35.0, 35.0 }, { 33.0, 34.0, 34.0, 34.0, 34.0 }, { 32.0, 33.0, 33.0, 33.0, 33.0 }, { 28.0, 32.0, 32.0, 32.0, 32.0 }, { 28.5, 31.0, 31.0, 31.0, 31.0 }, { 27.5, 30.0, 30.0, 30.0, 30.0 }, }; typedef struct { const char *filename; unsigned int input_bit_depth; vpx_img_fmt fmt; vpx_bit_depth_t bit_depth; unsigned int profile; } TestVideoParam; const TestVideoParam kTestVectors[] = { { "park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, VPX_BITS_8, 0 }, { "park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, VPX_BITS_8, 1 }, { "park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, VPX_BITS_8, 1 }, { "park_joy_90p_8_440.yuv", 8, VPX_IMG_FMT_I440, VPX_BITS_8, 1 }, #if CONFIG_VP9_HIGHBITDEPTH { "park_joy_90p_10_420_20f.y4m", 10, VPX_IMG_FMT_I42016, VPX_BITS_10, 2 }, { "park_joy_90p_10_422_20f.y4m", 10, VPX_IMG_FMT_I42216, VPX_BITS_10, 3 }, { "park_joy_90p_10_444_20f.y4m", 10, VPX_IMG_FMT_I44416, VPX_BITS_10, 3 }, { "park_joy_90p_10_440.yuv", 10, VPX_IMG_FMT_I44016, VPX_BITS_10, 3 }, { "park_joy_90p_12_420_20f.y4m", 12, VPX_IMG_FMT_I42016, VPX_BITS_12, 2 }, { "park_joy_90p_12_422_20f.y4m", 12, VPX_IMG_FMT_I42216, VPX_BITS_12, 3 }, { "park_joy_90p_12_444_20f.y4m", 12, VPX_IMG_FMT_I44416, VPX_BITS_12, 3 }, { "park_joy_90p_12_440.yuv", 12, VPX_IMG_FMT_I44016, VPX_BITS_12, 3 }, #endif // CONFIG_VP9_HIGHBITDEPTH }; // Encoding modes tested const libvpx_test::TestMode kEncodingModeVectors[] = { ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime }; // Speed settings tested const int kCpuUsedVectors[] = { 1, 2, 3, 5, 6, 7 }; int is_extension_y4m(const char *filename) { const char *dot = strrchr(filename, '.'); if (!dot || dot == filename) { return 0; } else { return !strcmp(dot, ".y4m"); } } class EndToEndTestAdaptiveRDThresh : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: EndToEndTestAdaptiveRDThresh() : EncoderTest(GET_PARAM(0)), cpu_used_start_(GET_PARAM(1)), cpu_used_end_(GET_PARAM(2)) {} virtual ~EndToEndTestAdaptiveRDThresh() {} virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; dec_cfg_.threads = 4; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_start_); encoder->Control(VP9E_SET_ROW_MT, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 2); } if (video->frame() == 100) encoder->Control(VP8E_SET_CPUUSED, cpu_used_end_); } private: int cpu_used_start_; int cpu_used_end_; }; class EndToEndTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith3Params { protected: EndToEndTestLarge() : EncoderTest(GET_PARAM(0)), test_video_param_(GET_PARAM(2)), cpu_used_(GET_PARAM(3)), psnr_(0.0), nframes_(0), encoding_mode_(GET_PARAM(1)) { cyclic_refresh_ = 0; denoiser_on_ = 0; } virtual ~EndToEndTestLarge() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { cfg_.g_lag_in_frames = 5; cfg_.rc_end_usage = VPX_VBR; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; } dec_cfg_.threads = 4; } virtual void BeginPassHook(unsigned int) { psnr_ = 0.0; nframes_ = 0; } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { psnr_ += pkt->data.psnr.psnr[0]; nframes_++; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 1); encoder->Control(VP9E_SET_TILE_COLUMNS, 4); encoder->Control(VP8E_SET_CPUUSED, cpu_used_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } else { encoder->Control(VP9E_SET_NOISE_SENSITIVITY, denoiser_on_); encoder->Control(VP9E_SET_AQ_MODE, cyclic_refresh_); } } } double GetAveragePsnr() const { if (nframes_) return psnr_ / nframes_; return 0.0; } double GetPsnrThreshold() { return kPsnrThreshold[cpu_used_][encoding_mode_]; } TestVideoParam test_video_param_; int cpu_used_; int cyclic_refresh_; int denoiser_on_; private: double psnr_; unsigned int nframes_; libvpx_test::TestMode encoding_mode_; }; #if CONFIG_VP9_DECODER // The test parameters control VP9D_SET_LOOP_FILTER_OPT and the number of // decoder threads. class EndToEndTestLoopFilterThreading : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: EndToEndTestLoopFilterThreading() : EncoderTest(GET_PARAM(0)), use_loop_filter_opt_(GET_PARAM(1)) {} virtual ~EndToEndTestLoopFilterThreading() {} virtual void SetUp() { InitializeConfig(); SetMode(::libvpx_test::kRealTime); cfg_.g_threads = 2; cfg_.g_lag_in_frames = 0; cfg_.rc_target_bitrate = 500; cfg_.rc_end_usage = VPX_CBR; cfg_.kf_min_dist = 1; cfg_.kf_max_dist = 1; dec_cfg_.threads = GET_PARAM(2); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, 8); } encoder->Control(VP9E_SET_TILE_COLUMNS, 4 - video->frame() % 5); } virtual void PreDecodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Decoder *decoder) { if (video->frame() == 0) { decoder->Control(VP9D_SET_LOOP_FILTER_OPT, use_loop_filter_opt_ ? 1 : 0); } } private: const bool use_loop_filter_opt_; }; #endif // CONFIG_VP9_DECODER TEST_P(EndToEndTestLarge, EndtoEndPSNRTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; cfg_.g_profile = test_video_param_.profile; cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; cfg_.g_bit_depth = test_video_param_.bit_depth; init_flags_ = VPX_CODEC_USE_PSNR; if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; std::unique_ptr video; if (is_extension_y4m(test_video_param_.filename)) { video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, kFrames)); } else { video.reset(new libvpx_test::YUVVideoSource( test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, kFramerate, 1, 0, kFrames)); } ASSERT_TRUE(video.get() != NULL); ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); const double psnr = GetAveragePsnr(); EXPECT_GT(psnr, GetPsnrThreshold()); } TEST_P(EndToEndTestLarge, EndtoEndPSNRDenoiserAQTest) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; cfg_.g_profile = test_video_param_.profile; cfg_.g_input_bit_depth = test_video_param_.input_bit_depth; cfg_.g_bit_depth = test_video_param_.bit_depth; init_flags_ = VPX_CODEC_USE_PSNR; cyclic_refresh_ = 3; denoiser_on_ = 1; if (cfg_.g_bit_depth > 8) init_flags_ |= VPX_CODEC_USE_HIGHBITDEPTH; std::unique_ptr video; if (is_extension_y4m(test_video_param_.filename)) { video.reset(new libvpx_test::Y4mVideoSource(test_video_param_.filename, 0, kFrames)); } else { video.reset(new libvpx_test::YUVVideoSource( test_video_param_.filename, test_video_param_.fmt, kWidth, kHeight, kFramerate, 1, 0, kFrames)); } ASSERT_TRUE(video.get() != NULL); ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); const double psnr = GetAveragePsnr(); EXPECT_GT(psnr, GetPsnrThreshold()); } TEST_P(EndToEndTestAdaptiveRDThresh, EndtoEndAdaptiveRDThreshRowMT) { cfg_.rc_target_bitrate = kBitrate; cfg_.g_error_resilient = 0; cfg_.g_threads = 2; ::libvpx_test::I420VideoSource video("niklas_640_480_30.yuv", 640, 480, 30, 1, 0, 400); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } #if CONFIG_VP9_DECODER TEST_P(EndToEndTestLoopFilterThreading, TileCountChange) { ::libvpx_test::RandomVideoSource video; video.SetSize(4096, 2160); video.set_limit(10); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); } #endif // CONFIG_VP9_DECODER VP9_INSTANTIATE_TEST_CASE(EndToEndTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kTestVectors), ::testing::ValuesIn(kCpuUsedVectors)); VP9_INSTANTIATE_TEST_CASE(EndToEndTestAdaptiveRDThresh, ::testing::Values(5, 6, 7), ::testing::Values(8, 9)); #if CONFIG_VP9_DECODER VP9_INSTANTIATE_TEST_CASE(EndToEndTestLoopFilterThreading, ::testing::Bool(), ::testing::Range(2, 6)); #endif // CONFIG_VP9_DECODER } // namespace libvpx-1.8.2/test/vp9_ethread_test.cc000066400000000000000000000332021357355204000175710ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/md5_helper.h" #include "test/util.h" #include "test/y4m_video_source.h" #include "vp9/encoder/vp9_firstpass.h" namespace { // FIRSTPASS_STATS struct: // { // 25 double members; // 1 int64_t member; // } // Whenever FIRSTPASS_STATS struct is modified, the following constants need to // be revisited. const int kDbl = 25; const int kInt = 1; const size_t kFirstPassStatsSz = kDbl * sizeof(double) + kInt * sizeof(int64_t); class VPxFirstPassEncoderThreadTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith2Params { protected: VPxFirstPassEncoderThreadTest() : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(0), encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) { init_flags_ = VPX_CODEC_USE_PSNR; row_mt_mode_ = 1; first_pass_only_ = true; firstpass_stats_.buf = NULL; firstpass_stats_.sz = 0; } virtual ~VPxFirstPassEncoderThreadTest() { free(firstpass_stats_.buf); } virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); cfg_.rc_end_usage = VPX_VBR; cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; cfg_.rc_max_quantizer = 56; cfg_.rc_min_quantizer = 0; } virtual void BeginPassHook(unsigned int /*pass*/) { encoder_initialized_ = false; abort_ = false; } virtual void EndPassHook() { // For first pass stats test, only run first pass encoder. if (first_pass_only_ && cfg_.g_pass == VPX_RC_FIRST_PASS) abort_ |= first_pass_only_; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/, ::libvpx_test::Encoder *encoder) { if (!encoder_initialized_) { // Encode in 2-pass mode. encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_); encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0); if (encoding_mode_ == ::libvpx_test::kTwoPassGood) encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_); encoder_initialized_ = true; } } virtual void StatsPktHook(const vpx_codec_cx_pkt_t *pkt) { const uint8_t *const pkt_buf = reinterpret_cast(pkt->data.twopass_stats.buf); const size_t pkt_size = pkt->data.twopass_stats.sz; // First pass stats size equals sizeof(FIRSTPASS_STATS) EXPECT_EQ(pkt_size, kFirstPassStatsSz) << "Error: First pass stats size doesn't equal kFirstPassStatsSz"; firstpass_stats_.buf = realloc(firstpass_stats_.buf, firstpass_stats_.sz + pkt_size); memcpy((uint8_t *)firstpass_stats_.buf + firstpass_stats_.sz, pkt_buf, pkt_size); firstpass_stats_.sz += pkt_size; } bool encoder_initialized_; int tiles_; ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; int row_mt_mode_; bool first_pass_only_; vpx_fixed_buf_t firstpass_stats_; }; static void compare_fp_stats(vpx_fixed_buf_t *fp_stats, double factor) { // fp_stats consists of 2 set of first pass encoding stats. These 2 set of // stats are compared to check if the stats match or at least are very close. FIRSTPASS_STATS *stats1 = reinterpret_cast(fp_stats->buf); int nframes_ = (int)(fp_stats->sz / sizeof(FIRSTPASS_STATS)); FIRSTPASS_STATS *stats2 = stats1 + nframes_ / 2; int i, j; // The total stats are also output and included in the first pass stats. Here // ignore that in the comparison. for (i = 0; i < (nframes_ / 2 - 1); ++i) { const double *frame_stats1 = reinterpret_cast(stats1); const double *frame_stats2 = reinterpret_cast(stats2); for (j = 0; j < kDbl; ++j) { ASSERT_LE(fabs(*frame_stats1 - *frame_stats2), fabs(*frame_stats1) / factor) << "First failure @ frame #" << i << " stat #" << j << " (" << *frame_stats1 << " vs. " << *frame_stats2 << ")"; frame_stats1++; frame_stats2++; } stats1++; stats2++; } // Reset firstpass_stats_ to 0. memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz); fp_stats->sz = 0; } static void compare_fp_stats_md5(vpx_fixed_buf_t *fp_stats) { // fp_stats consists of 2 set of first pass encoding stats. These 2 set of // stats are compared to check if the stats match. uint8_t *stats1 = reinterpret_cast(fp_stats->buf); uint8_t *stats2 = stats1 + fp_stats->sz / 2; ::libvpx_test::MD5 md5_row_mt_0, md5_row_mt_1; md5_row_mt_0.Add(stats1, fp_stats->sz / 2); const char *md5_row_mt_0_str = md5_row_mt_0.Get(); md5_row_mt_1.Add(stats2, fp_stats->sz / 2); const char *md5_row_mt_1_str = md5_row_mt_1.Get(); // Check md5 match. ASSERT_STREQ(md5_row_mt_0_str, md5_row_mt_1_str) << "MD5 checksums don't match"; // Reset firstpass_stats_ to 0. memset((uint8_t *)fp_stats->buf, 0, fp_stats->sz); fp_stats->sz = 0; } TEST_P(VPxFirstPassEncoderThreadTest, FirstPassStatsTest) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 0, 60); first_pass_only_ = true; cfg_.rc_target_bitrate = 1000; // Test row_mt_mode: 0 vs 1 at single thread case(threads = 1, tiles_ = 0) tiles_ = 0; cfg_.g_threads = 1; row_mt_mode_ = 0; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); row_mt_mode_ = 1; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Compare to check if using or not using row-mt generates close stats. ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0)); // Test single thread vs multiple threads row_mt_mode_ = 1; tiles_ = 0; cfg_.g_threads = 1; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); cfg_.g_threads = 4; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Compare to check if single-thread and multi-thread stats are close enough. ASSERT_NO_FATAL_FAILURE(compare_fp_stats(&firstpass_stats_, 1000.0)); // Bit exact test in row_mt mode. // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact // result. row_mt_mode_ = 1; tiles_ = 2; cfg_.g_threads = 2; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); cfg_.g_threads = 8; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); // Compare to check if stats match with row-mt=0/1. compare_fp_stats_md5(&firstpass_stats_); } class VPxEncoderThreadTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith4Params { protected: VPxEncoderThreadTest() : EncoderTest(GET_PARAM(0)), encoder_initialized_(false), tiles_(GET_PARAM(3)), threads_(GET_PARAM(4)), encoding_mode_(GET_PARAM(1)), set_cpu_used_(GET_PARAM(2)) { init_flags_ = VPX_CODEC_USE_PSNR; md5_.clear(); row_mt_mode_ = 1; psnr_ = 0.0; nframes_ = 0; } virtual ~VPxEncoderThreadTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { cfg_.rc_end_usage = VPX_VBR; cfg_.rc_2pass_vbr_minsection_pct = 5; cfg_.rc_2pass_vbr_maxsection_pct = 2000; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.g_error_resilient = 1; } cfg_.rc_max_quantizer = 56; cfg_.rc_min_quantizer = 0; } virtual void BeginPassHook(unsigned int /*pass*/) { encoder_initialized_ = false; psnr_ = 0.0; nframes_ = 0; } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource * /*video*/, ::libvpx_test::Encoder *encoder) { if (!encoder_initialized_) { // Encode 4 column tiles. encoder->Control(VP9E_SET_TILE_COLUMNS, tiles_); encoder->Control(VP8E_SET_CPUUSED, set_cpu_used_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); encoder->Control(VP9E_SET_FRAME_PARALLEL_DECODING, 0); } else { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 0); encoder->Control(VP9E_SET_AQ_MODE, 3); } encoder->Control(VP9E_SET_ROW_MT, row_mt_mode_); encoder_initialized_ = true; } } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { psnr_ += pkt->data.psnr.psnr[0]; nframes_++; } virtual void DecompressedFrameHook(const vpx_image_t &img, vpx_codec_pts_t /*pts*/) { ::libvpx_test::MD5 md5_res; md5_res.Add(&img); md5_.push_back(md5_res.Get()); } virtual bool HandleDecodeResult(const vpx_codec_err_t res, const libvpx_test::VideoSource & /*video*/, libvpx_test::Decoder * /*decoder*/) { if (res != VPX_CODEC_OK) { EXPECT_EQ(VPX_CODEC_OK, res); return false; } return true; } double GetAveragePsnr() const { return nframes_ ? (psnr_ / nframes_) : 0.0; } bool encoder_initialized_; int tiles_; int threads_; ::libvpx_test::TestMode encoding_mode_; int set_cpu_used_; int row_mt_mode_; double psnr_; unsigned int nframes_; std::vector md5_; }; TEST_P(VPxEncoderThreadTest, EncoderResultTest) { ::libvpx_test::Y4mVideoSource video("niklas_1280_720_30.y4m", 15, 20); cfg_.rc_target_bitrate = 1000; // Part 1: Bit exact test for row_mt_mode_ = 0. // This part keeps original unit tests done before row-mt code is checked in. row_mt_mode_ = 0; // Encode using single thread. cfg_.g_threads = 1; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const std::vector single_thr_md5 = md5_; md5_.clear(); // Encode using multiple threads. cfg_.g_threads = threads_; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const std::vector multi_thr_md5 = md5_; md5_.clear(); // Compare to check if two vectors are equal. ASSERT_EQ(single_thr_md5, multi_thr_md5); // Part 2: row_mt_mode_ = 0 vs row_mt_mode_ = 1 single thread bit exact test. row_mt_mode_ = 1; // Encode using single thread cfg_.g_threads = 1; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); std::vector row_mt_single_thr_md5 = md5_; md5_.clear(); ASSERT_EQ(single_thr_md5, row_mt_single_thr_md5); // Part 3: Bit exact test with row-mt on // When row_mt_mode_=1 and using >1 threads, the encoder generates bit exact // result. row_mt_mode_ = 1; row_mt_single_thr_md5.clear(); // Encode using 2 threads. cfg_.g_threads = 2; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); row_mt_single_thr_md5 = md5_; md5_.clear(); // Encode using multiple threads. cfg_.g_threads = threads_; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const std::vector row_mt_multi_thr_md5 = md5_; md5_.clear(); // Compare to check if two vectors are equal. ASSERT_EQ(row_mt_single_thr_md5, row_mt_multi_thr_md5); // Part 4: PSNR test with bit_match_mode_ = 0 row_mt_mode_ = 1; // Encode using single thread. cfg_.g_threads = 1; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double single_thr_psnr = GetAveragePsnr(); // Encode using multiple threads. cfg_.g_threads = threads_; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double multi_thr_psnr = GetAveragePsnr(); EXPECT_NEAR(single_thr_psnr, multi_thr_psnr, 0.2); } INSTANTIATE_TEST_CASE_P( VP9, VPxFirstPassEncoderThreadTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP9)), ::testing::Values(::libvpx_test::kTwoPassGood), ::testing::Range(0, 4))); // cpu_used // Split this into two instantiations so that we can distinguish // between very slow runs ( ie cpu_speed 0 ) vs ones that can be // run nightly by adding Large to the title. INSTANTIATE_TEST_CASE_P( VP9, VPxEncoderThreadTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP9)), ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(3, 10), // cpu_used ::testing::Range(0, 3), // tile_columns ::testing::Range(2, 5))); // threads INSTANTIATE_TEST_CASE_P( VP9Large, VPxEncoderThreadTest, ::testing::Combine( ::testing::Values( static_cast(&libvpx_test::kVP9)), ::testing::Values(::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime), ::testing::Range(0, 3), // cpu_used ::testing::Range(0, 3), // tile_columns ::testing::Range(2, 5))); // threads } // namespace libvpx-1.8.2/test/vp9_intrapred_test.cc000066400000000000000000001655511357355204000201620ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_pred_common.h" #include "vpx_mem/vpx_mem.h" namespace { using libvpx_test::ACMRandom; const int count_test_block = 100000; typedef void (*IntraPredFunc)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); struct IntraPredParam { IntraPredParam(IntraPredFunc pred = NULL, IntraPredFunc ref = NULL, int block_size_value = 0, int bit_depth_value = 0) : pred_fn(pred), ref_fn(ref), block_size(block_size_value), bit_depth(bit_depth_value) {} IntraPredFunc pred_fn; IntraPredFunc ref_fn; int block_size; int bit_depth; }; template class IntraPredTest : public ::testing::TestWithParam { public: void RunTest(Pixel *left_col, Pixel *above_data, Pixel *dst, Pixel *ref_dst) { ACMRandom rnd(ACMRandom::DeterministicSeed()); const int block_size = params_.block_size; above_row_ = above_data + 16; left_col_ = left_col; dst_ = dst; ref_dst_ = ref_dst; int error_count = 0; for (int i = 0; i < count_test_block; ++i) { // Fill edges with random data, try first with saturated values. for (int x = -1; x < block_size; x++) { if (i == 0) { above_row_[x] = mask_; } else { above_row_[x] = rnd.Rand16() & mask_; } } for (int x = block_size; x < 2 * block_size; x++) { above_row_[x] = above_row_[block_size - 1]; } for (int y = 0; y < block_size; y++) { if (i == 0) { left_col_[y] = mask_; } else { left_col_[y] = rnd.Rand16() & mask_; } } Predict(); CheckPrediction(i, &error_count); } ASSERT_EQ(0, error_count); } protected: virtual void SetUp() { params_ = this->GetParam(); stride_ = params_.block_size * 3; mask_ = (1 << params_.bit_depth) - 1; } void Predict(); void CheckPrediction(int test_case_number, int *error_count) const { // For each pixel ensure that the calculated value is the same as reference. const int block_size = params_.block_size; for (int y = 0; y < block_size; y++) { for (int x = 0; x < block_size; x++) { *error_count += ref_dst_[x + y * stride_] != dst_[x + y * stride_]; if (*error_count == 1) { ASSERT_EQ(ref_dst_[x + y * stride_], dst_[x + y * stride_]) << " Failed on Test Case Number " << test_case_number; } } } } Pixel *above_row_; Pixel *left_col_; Pixel *dst_; Pixel *ref_dst_; ptrdiff_t stride_; int mask_; PredParam params_; }; template <> void IntraPredTest::Predict() { params_.ref_fn(ref_dst_, stride_, above_row_, left_col_); ASM_REGISTER_STATE_CHECK( params_.pred_fn(dst_, stride_, above_row_, left_col_)); } typedef IntraPredTest VP9IntraPredTest; TEST_P(VP9IntraPredTest, IntraPredTests) { // max block size is 32 DECLARE_ALIGNED(16, uint8_t, left_col[2 * 32]); DECLARE_ALIGNED(16, uint8_t, above_data[2 * 32 + 32]); DECLARE_ALIGNED(16, uint8_t, dst[3 * 32 * 32]); DECLARE_ALIGNED(16, uint8_t, ref_dst[3 * 32 * 32]); RunTest(left_col, above_data, dst, ref_dst); } // Instantiate a token test to avoid -Wuninitialized warnings when none of the // other tests are enabled. INSTANTIATE_TEST_CASE_P( C, VP9IntraPredTest, ::testing::Values(IntraPredParam(&vpx_d45_predictor_4x4_c, &vpx_d45_predictor_4x4_c, 4, 8))); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2, VP9IntraPredTest, ::testing::Values( IntraPredParam(&vpx_d45_predictor_4x4_sse2, &vpx_d45_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_d45_predictor_8x8_sse2, &vpx_d45_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d207_predictor_4x4_sse2, &vpx_d207_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_128_predictor_4x4_sse2, &vpx_dc_128_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_128_predictor_8x8_sse2, &vpx_dc_128_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_128_predictor_16x16_sse2, &vpx_dc_128_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_128_predictor_32x32_sse2, &vpx_dc_128_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_left_predictor_4x4_sse2, &vpx_dc_left_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_left_predictor_8x8_sse2, &vpx_dc_left_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_left_predictor_16x16_sse2, &vpx_dc_left_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_left_predictor_32x32_sse2, &vpx_dc_left_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_predictor_4x4_sse2, &vpx_dc_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_predictor_8x8_sse2, &vpx_dc_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_predictor_16x16_sse2, &vpx_dc_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_predictor_32x32_sse2, &vpx_dc_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_top_predictor_4x4_sse2, &vpx_dc_top_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_top_predictor_8x8_sse2, &vpx_dc_top_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_top_predictor_16x16_sse2, &vpx_dc_top_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_top_predictor_32x32_sse2, &vpx_dc_top_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_h_predictor_4x4_sse2, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_sse2, &vpx_h_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_h_predictor_16x16_sse2, &vpx_h_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_h_predictor_32x32_sse2, &vpx_h_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_tm_predictor_4x4_sse2, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_sse2, &vpx_tm_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_tm_predictor_16x16_sse2, &vpx_tm_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_tm_predictor_32x32_sse2, &vpx_tm_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_v_predictor_4x4_sse2, &vpx_v_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_v_predictor_8x8_sse2, &vpx_v_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_v_predictor_16x16_sse2, &vpx_v_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_v_predictor_32x32_sse2, &vpx_v_predictor_32x32_c, 32, 8))); #endif // HAVE_SSE2 #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( SSSE3, VP9IntraPredTest, ::testing::Values(IntraPredParam(&vpx_d45_predictor_16x16_ssse3, &vpx_d45_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d45_predictor_32x32_ssse3, &vpx_d45_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_d63_predictor_4x4_ssse3, &vpx_d63_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_d63_predictor_8x8_ssse3, &vpx_d63_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d63_predictor_16x16_ssse3, &vpx_d63_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d63_predictor_32x32_ssse3, &vpx_d63_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_d153_predictor_4x4_ssse3, &vpx_d153_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_d153_predictor_8x8_ssse3, &vpx_d153_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d153_predictor_16x16_ssse3, &vpx_d153_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d153_predictor_32x32_ssse3, &vpx_d153_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_d207_predictor_8x8_ssse3, &vpx_d207_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d207_predictor_16x16_ssse3, &vpx_d207_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d207_predictor_32x32_ssse3, &vpx_d207_predictor_32x32_c, 32, 8))); #endif // HAVE_SSSE3 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, VP9IntraPredTest, ::testing::Values( IntraPredParam(&vpx_d45_predictor_4x4_neon, &vpx_d45_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_d45_predictor_8x8_neon, &vpx_d45_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d45_predictor_16x16_neon, &vpx_d45_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d45_predictor_32x32_neon, &vpx_d45_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_d135_predictor_4x4_neon, &vpx_d135_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_d135_predictor_8x8_neon, &vpx_d135_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d135_predictor_16x16_neon, &vpx_d135_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d135_predictor_32x32_neon, &vpx_d135_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_128_predictor_4x4_neon, &vpx_dc_128_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_128_predictor_8x8_neon, &vpx_dc_128_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_128_predictor_16x16_neon, &vpx_dc_128_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_128_predictor_32x32_neon, &vpx_dc_128_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_left_predictor_4x4_neon, &vpx_dc_left_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_left_predictor_8x8_neon, &vpx_dc_left_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_left_predictor_16x16_neon, &vpx_dc_left_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_left_predictor_32x32_neon, &vpx_dc_left_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_predictor_4x4_neon, &vpx_dc_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_predictor_8x8_neon, &vpx_dc_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_predictor_16x16_neon, &vpx_dc_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_predictor_32x32_neon, &vpx_dc_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_top_predictor_4x4_neon, &vpx_dc_top_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_top_predictor_8x8_neon, &vpx_dc_top_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_top_predictor_16x16_neon, &vpx_dc_top_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_top_predictor_32x32_neon, &vpx_dc_top_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_h_predictor_4x4_neon, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_neon, &vpx_h_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_h_predictor_16x16_neon, &vpx_h_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_h_predictor_32x32_neon, &vpx_h_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_tm_predictor_4x4_neon, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_neon, &vpx_tm_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_tm_predictor_16x16_neon, &vpx_tm_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_tm_predictor_32x32_neon, &vpx_tm_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_v_predictor_4x4_neon, &vpx_v_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_v_predictor_8x8_neon, &vpx_v_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_v_predictor_16x16_neon, &vpx_v_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_v_predictor_32x32_neon, &vpx_v_predictor_32x32_c, 32, 8))); #endif // HAVE_NEON #if HAVE_DSPR2 INSTANTIATE_TEST_CASE_P( DSPR2, VP9IntraPredTest, ::testing::Values(IntraPredParam(&vpx_dc_predictor_4x4_dspr2, &vpx_dc_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_predictor_8x8_dspr2, &vpx_dc_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_predictor_16x16_dspr2, &vpx_dc_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_h_predictor_4x4_dspr2, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_dspr2, &vpx_h_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_h_predictor_16x16_dspr2, &vpx_h_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_tm_predictor_4x4_dspr2, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_dspr2, &vpx_tm_predictor_8x8_c, 8, 8))); #endif // HAVE_DSPR2 #if HAVE_MSA INSTANTIATE_TEST_CASE_P( MSA, VP9IntraPredTest, ::testing::Values( IntraPredParam(&vpx_dc_128_predictor_4x4_msa, &vpx_dc_128_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_128_predictor_8x8_msa, &vpx_dc_128_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_128_predictor_16x16_msa, &vpx_dc_128_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_128_predictor_32x32_msa, &vpx_dc_128_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_left_predictor_4x4_msa, &vpx_dc_left_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_left_predictor_8x8_msa, &vpx_dc_left_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_left_predictor_16x16_msa, &vpx_dc_left_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_left_predictor_32x32_msa, &vpx_dc_left_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_predictor_4x4_msa, &vpx_dc_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_predictor_8x8_msa, &vpx_dc_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_predictor_16x16_msa, &vpx_dc_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_predictor_32x32_msa, &vpx_dc_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_top_predictor_4x4_msa, &vpx_dc_top_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_dc_top_predictor_8x8_msa, &vpx_dc_top_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_top_predictor_16x16_msa, &vpx_dc_top_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_top_predictor_32x32_msa, &vpx_dc_top_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_h_predictor_4x4_msa, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_msa, &vpx_h_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_h_predictor_16x16_msa, &vpx_h_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_h_predictor_32x32_msa, &vpx_h_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_tm_predictor_4x4_msa, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_msa, &vpx_tm_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_tm_predictor_16x16_msa, &vpx_tm_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_tm_predictor_32x32_msa, &vpx_tm_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_v_predictor_4x4_msa, &vpx_v_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_v_predictor_8x8_msa, &vpx_v_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_v_predictor_16x16_msa, &vpx_v_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_v_predictor_32x32_msa, &vpx_v_predictor_32x32_c, 32, 8))); #endif // HAVE_MSA // TODO(crbug.com/webm/1522): Fix test failures. #if 0 IntraPredParam(&vpx_d45_predictor_8x8_vsx, &vpx_d45_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_d63_predictor_8x8_vsx, &vpx_d63_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_dc_predictor_8x8_vsx, &vpx_dc_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_h_predictor_4x4_vsx, &vpx_h_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_h_predictor_8x8_vsx, &vpx_h_predictor_8x8_c, 8, 8), IntraPredParam(&vpx_tm_predictor_4x4_vsx, &vpx_tm_predictor_4x4_c, 4, 8), IntraPredParam(&vpx_tm_predictor_8x8_vsx, &vpx_tm_predictor_8x8_c, 8, 8), #endif #if HAVE_VSX INSTANTIATE_TEST_CASE_P( VSX, VP9IntraPredTest, ::testing::Values(IntraPredParam(&vpx_d45_predictor_16x16_vsx, &vpx_d45_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d45_predictor_32x32_vsx, &vpx_d45_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_d63_predictor_16x16_vsx, &vpx_d63_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_d63_predictor_32x32_vsx, &vpx_d63_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_128_predictor_16x16_vsx, &vpx_dc_128_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_128_predictor_32x32_vsx, &vpx_dc_128_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_left_predictor_16x16_vsx, &vpx_dc_left_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_left_predictor_32x32_vsx, &vpx_dc_left_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_predictor_16x16_vsx, &vpx_dc_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_predictor_32x32_vsx, &vpx_dc_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_dc_top_predictor_16x16_vsx, &vpx_dc_top_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_dc_top_predictor_32x32_vsx, &vpx_dc_top_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_h_predictor_16x16_vsx, &vpx_h_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_h_predictor_32x32_vsx, &vpx_h_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_tm_predictor_16x16_vsx, &vpx_tm_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_tm_predictor_32x32_vsx, &vpx_tm_predictor_32x32_c, 32, 8), IntraPredParam(&vpx_v_predictor_16x16_vsx, &vpx_v_predictor_16x16_c, 16, 8), IntraPredParam(&vpx_v_predictor_32x32_vsx, &vpx_v_predictor_32x32_c, 32, 8))); #endif // HAVE_VSX #if CONFIG_VP9_HIGHBITDEPTH typedef void (*HighbdIntraPred)(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bps); struct HighbdIntraPredParam { HighbdIntraPredParam(HighbdIntraPred pred = NULL, HighbdIntraPred ref = NULL, int block_size_value = 0, int bit_depth_value = 0) : pred_fn(pred), ref_fn(ref), block_size(block_size_value), bit_depth(bit_depth_value) {} HighbdIntraPred pred_fn; HighbdIntraPred ref_fn; int block_size; int bit_depth; }; template <> void IntraPredTest::Predict() { const int bit_depth = params_.bit_depth; params_.ref_fn(ref_dst_, stride_, above_row_, left_col_, bit_depth); ASM_REGISTER_STATE_CHECK( params_.pred_fn(dst_, stride_, above_row_, left_col_, bit_depth)); } typedef IntraPredTest VP9HighbdIntraPredTest; TEST_P(VP9HighbdIntraPredTest, HighbdIntraPredTests) { // max block size is 32 DECLARE_ALIGNED(16, uint16_t, left_col[2 * 32]); DECLARE_ALIGNED(16, uint16_t, above_data[2 * 32 + 32]); DECLARE_ALIGNED(16, uint16_t, dst[3 * 32 * 32]); DECLARE_ALIGNED(16, uint16_t, ref_dst[3 * 32 * 32]); RunTest(left_col, above_data, dst, ref_dst); } #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P( SSSE3_TO_C_8, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3, &vpx_highbd_d45_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3, &vpx_highbd_d45_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3, &vpx_highbd_d45_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3, &vpx_highbd_d45_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3, &vpx_highbd_d63_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3, &vpx_highbd_d63_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c, &vpx_highbd_d63_predictor_32x32_ssse3, 32, 8), HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3, &vpx_highbd_d117_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3, &vpx_highbd_d117_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c, &vpx_highbd_d117_predictor_32x32_ssse3, 32, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3, &vpx_highbd_d135_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3, &vpx_highbd_d135_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3, &vpx_highbd_d135_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3, &vpx_highbd_d153_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3, &vpx_highbd_d153_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3, &vpx_highbd_d153_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3, &vpx_highbd_d207_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3, &vpx_highbd_d207_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3, &vpx_highbd_d207_predictor_32x32_c, 32, 8))); INSTANTIATE_TEST_CASE_P( SSSE3_TO_C_10, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3, &vpx_highbd_d45_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3, &vpx_highbd_d45_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3, &vpx_highbd_d45_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3, &vpx_highbd_d45_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3, &vpx_highbd_d63_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3, &vpx_highbd_d63_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c, &vpx_highbd_d63_predictor_32x32_ssse3, 32, 10), HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3, &vpx_highbd_d117_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3, &vpx_highbd_d117_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c, &vpx_highbd_d117_predictor_32x32_ssse3, 32, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3, &vpx_highbd_d135_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3, &vpx_highbd_d135_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3, &vpx_highbd_d135_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3, &vpx_highbd_d153_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3, &vpx_highbd_d153_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3, &vpx_highbd_d153_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3, &vpx_highbd_d207_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3, &vpx_highbd_d207_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3, &vpx_highbd_d207_predictor_32x32_c, 32, 10))); INSTANTIATE_TEST_CASE_P( SSSE3_TO_C_12, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_ssse3, &vpx_highbd_d45_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_ssse3, &vpx_highbd_d45_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_ssse3, &vpx_highbd_d45_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_ssse3, &vpx_highbd_d45_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_d63_predictor_8x8_ssse3, &vpx_highbd_d63_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d63_predictor_16x16_ssse3, &vpx_highbd_d63_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d63_predictor_32x32_c, &vpx_highbd_d63_predictor_32x32_ssse3, 32, 12), HighbdIntraPredParam(&vpx_highbd_d117_predictor_8x8_ssse3, &vpx_highbd_d117_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d117_predictor_16x16_ssse3, &vpx_highbd_d117_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d117_predictor_32x32_c, &vpx_highbd_d117_predictor_32x32_ssse3, 32, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_ssse3, &vpx_highbd_d135_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_ssse3, &vpx_highbd_d135_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_ssse3, &vpx_highbd_d135_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_d153_predictor_8x8_ssse3, &vpx_highbd_d153_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d153_predictor_16x16_ssse3, &vpx_highbd_d153_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d153_predictor_32x32_ssse3, &vpx_highbd_d153_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_d207_predictor_8x8_ssse3, &vpx_highbd_d207_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d207_predictor_16x16_ssse3, &vpx_highbd_d207_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d207_predictor_32x32_ssse3, &vpx_highbd_d207_predictor_32x32_c, 32, 12))); #endif // HAVE_SSSE3 #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P( SSE2_TO_C_8, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2, &vpx_highbd_dc_128_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2, &vpx_highbd_dc_128_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2, &vpx_highbd_dc_128_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2, &vpx_highbd_dc_128_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2, &vpx_highbd_d63_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2, &vpx_highbd_d117_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2, &vpx_highbd_d135_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2, &vpx_highbd_d153_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2, &vpx_highbd_d207_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2, &vpx_highbd_dc_left_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2, &vpx_highbd_dc_left_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2, &vpx_highbd_dc_left_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2, &vpx_highbd_dc_left_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2, &vpx_highbd_dc_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2, &vpx_highbd_dc_top_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2, &vpx_highbd_dc_top_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2, &vpx_highbd_dc_top_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2, &vpx_highbd_dc_top_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2, &vpx_highbd_tm_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2, &vpx_highbd_h_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2, &vpx_highbd_h_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2, &vpx_highbd_h_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2, &vpx_highbd_h_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2, &vpx_highbd_v_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 8))); INSTANTIATE_TEST_CASE_P( SSE2_TO_C_10, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2, &vpx_highbd_dc_128_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2, &vpx_highbd_dc_128_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2, &vpx_highbd_dc_128_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2, &vpx_highbd_dc_128_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2, &vpx_highbd_d63_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2, &vpx_highbd_d117_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2, &vpx_highbd_d135_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2, &vpx_highbd_d153_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2, &vpx_highbd_d207_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2, &vpx_highbd_dc_left_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2, &vpx_highbd_dc_left_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2, &vpx_highbd_dc_left_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2, &vpx_highbd_dc_left_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2, &vpx_highbd_dc_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2, &vpx_highbd_dc_top_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2, &vpx_highbd_dc_top_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2, &vpx_highbd_dc_top_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2, &vpx_highbd_dc_top_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2, &vpx_highbd_tm_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2, &vpx_highbd_h_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2, &vpx_highbd_h_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2, &vpx_highbd_h_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2, &vpx_highbd_h_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2, &vpx_highbd_v_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 10))); INSTANTIATE_TEST_CASE_P( SSE2_TO_C_12, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_sse2, &vpx_highbd_dc_128_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_sse2, &vpx_highbd_dc_128_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_sse2, &vpx_highbd_dc_128_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_sse2, &vpx_highbd_dc_128_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_d63_predictor_4x4_sse2, &vpx_highbd_d63_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d117_predictor_4x4_sse2, &vpx_highbd_d117_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_sse2, &vpx_highbd_d135_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d153_predictor_4x4_sse2, &vpx_highbd_d153_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d207_predictor_4x4_sse2, &vpx_highbd_d207_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_sse2, &vpx_highbd_dc_left_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_sse2, &vpx_highbd_dc_left_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_sse2, &vpx_highbd_dc_left_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_sse2, &vpx_highbd_dc_left_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_sse2, &vpx_highbd_dc_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_sse2, &vpx_highbd_dc_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_sse2, &vpx_highbd_dc_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_sse2, &vpx_highbd_dc_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_sse2, &vpx_highbd_dc_top_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_sse2, &vpx_highbd_dc_top_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_sse2, &vpx_highbd_dc_top_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_sse2, &vpx_highbd_dc_top_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_sse2, &vpx_highbd_tm_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_sse2, &vpx_highbd_tm_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_sse2, &vpx_highbd_tm_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_sse2, &vpx_highbd_tm_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_sse2, &vpx_highbd_h_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_sse2, &vpx_highbd_h_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_sse2, &vpx_highbd_h_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_sse2, &vpx_highbd_h_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_sse2, &vpx_highbd_v_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_sse2, &vpx_highbd_v_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_sse2, &vpx_highbd_v_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_sse2, &vpx_highbd_v_predictor_32x32_c, 32, 12))); #endif // HAVE_SSE2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON_TO_C_8, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon, &vpx_highbd_d45_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon, &vpx_highbd_d45_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon, &vpx_highbd_d45_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon, &vpx_highbd_d45_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon, &vpx_highbd_d135_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon, &vpx_highbd_d135_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon, &vpx_highbd_d135_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon, &vpx_highbd_d135_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon, &vpx_highbd_dc_128_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon, &vpx_highbd_dc_128_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon, &vpx_highbd_dc_128_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon, &vpx_highbd_dc_128_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon, &vpx_highbd_dc_left_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon, &vpx_highbd_dc_left_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon, &vpx_highbd_dc_left_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon, &vpx_highbd_dc_left_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon, &vpx_highbd_dc_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon, &vpx_highbd_dc_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon, &vpx_highbd_dc_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon, &vpx_highbd_dc_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon, &vpx_highbd_dc_top_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon, &vpx_highbd_dc_top_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon, &vpx_highbd_dc_top_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon, &vpx_highbd_dc_top_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon, &vpx_highbd_h_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon, &vpx_highbd_h_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon, &vpx_highbd_h_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon, &vpx_highbd_h_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon, &vpx_highbd_tm_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon, &vpx_highbd_tm_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon, &vpx_highbd_tm_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon, &vpx_highbd_tm_predictor_32x32_c, 32, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon, &vpx_highbd_v_predictor_4x4_c, 4, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon, &vpx_highbd_v_predictor_8x8_c, 8, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon, &vpx_highbd_v_predictor_16x16_c, 16, 8), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon, &vpx_highbd_v_predictor_32x32_c, 32, 8))); INSTANTIATE_TEST_CASE_P( NEON_TO_C_10, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon, &vpx_highbd_d45_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon, &vpx_highbd_d45_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon, &vpx_highbd_d45_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon, &vpx_highbd_d45_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon, &vpx_highbd_d135_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon, &vpx_highbd_d135_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon, &vpx_highbd_d135_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon, &vpx_highbd_d135_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon, &vpx_highbd_dc_128_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon, &vpx_highbd_dc_128_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon, &vpx_highbd_dc_128_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon, &vpx_highbd_dc_128_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon, &vpx_highbd_dc_left_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon, &vpx_highbd_dc_left_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon, &vpx_highbd_dc_left_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon, &vpx_highbd_dc_left_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon, &vpx_highbd_dc_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon, &vpx_highbd_dc_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon, &vpx_highbd_dc_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon, &vpx_highbd_dc_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon, &vpx_highbd_dc_top_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon, &vpx_highbd_dc_top_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon, &vpx_highbd_dc_top_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon, &vpx_highbd_dc_top_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon, &vpx_highbd_h_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon, &vpx_highbd_h_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon, &vpx_highbd_h_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon, &vpx_highbd_h_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon, &vpx_highbd_tm_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon, &vpx_highbd_tm_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon, &vpx_highbd_tm_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon, &vpx_highbd_tm_predictor_32x32_c, 32, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon, &vpx_highbd_v_predictor_4x4_c, 4, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon, &vpx_highbd_v_predictor_8x8_c, 8, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon, &vpx_highbd_v_predictor_16x16_c, 16, 10), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon, &vpx_highbd_v_predictor_32x32_c, 32, 10))); INSTANTIATE_TEST_CASE_P( NEON_TO_C_12, VP9HighbdIntraPredTest, ::testing::Values( HighbdIntraPredParam(&vpx_highbd_d45_predictor_4x4_neon, &vpx_highbd_d45_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_8x8_neon, &vpx_highbd_d45_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_16x16_neon, &vpx_highbd_d45_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d45_predictor_32x32_neon, &vpx_highbd_d45_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_4x4_neon, &vpx_highbd_d135_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_8x8_neon, &vpx_highbd_d135_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_16x16_neon, &vpx_highbd_d135_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_d135_predictor_32x32_neon, &vpx_highbd_d135_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_4x4_neon, &vpx_highbd_dc_128_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_8x8_neon, &vpx_highbd_dc_128_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_16x16_neon, &vpx_highbd_dc_128_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_128_predictor_32x32_neon, &vpx_highbd_dc_128_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_4x4_neon, &vpx_highbd_dc_left_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_8x8_neon, &vpx_highbd_dc_left_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_16x16_neon, &vpx_highbd_dc_left_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_left_predictor_32x32_neon, &vpx_highbd_dc_left_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_4x4_neon, &vpx_highbd_dc_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_8x8_neon, &vpx_highbd_dc_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_16x16_neon, &vpx_highbd_dc_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_predictor_32x32_neon, &vpx_highbd_dc_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_4x4_neon, &vpx_highbd_dc_top_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_8x8_neon, &vpx_highbd_dc_top_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_16x16_neon, &vpx_highbd_dc_top_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_dc_top_predictor_32x32_neon, &vpx_highbd_dc_top_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_4x4_neon, &vpx_highbd_h_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_8x8_neon, &vpx_highbd_h_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_16x16_neon, &vpx_highbd_h_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_h_predictor_32x32_neon, &vpx_highbd_h_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_4x4_neon, &vpx_highbd_tm_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_8x8_neon, &vpx_highbd_tm_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_16x16_neon, &vpx_highbd_tm_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_tm_predictor_32x32_neon, &vpx_highbd_tm_predictor_32x32_c, 32, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_4x4_neon, &vpx_highbd_v_predictor_4x4_c, 4, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_8x8_neon, &vpx_highbd_v_predictor_8x8_c, 8, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_16x16_neon, &vpx_highbd_v_predictor_16x16_c, 16, 12), HighbdIntraPredParam(&vpx_highbd_v_predictor_32x32_neon, &vpx_highbd_v_predictor_32x32_c, 32, 12))); #endif // HAVE_NEON #endif // CONFIG_VP9_HIGHBITDEPTH } // namespace libvpx-1.8.2/test/vp9_lossless_test.cc000066400000000000000000000075221357355204000200320ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/i420_video_source.h" #include "test/util.h" #include "test/y4m_video_source.h" namespace { const int kMaxPsnr = 100; class LosslessTest : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWithParam { protected: LosslessTest() : EncoderTest(GET_PARAM(0)), psnr_(kMaxPsnr), nframes_(0), encoding_mode_(GET_PARAM(1)) {} virtual ~LosslessTest() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { // Only call Control if quantizer > 0 to verify that using quantizer // alone will activate lossless if (cfg_.rc_max_quantizer > 0 || cfg_.rc_min_quantizer > 0) { encoder->Control(VP9E_SET_LOSSLESS, 1); } } } virtual void BeginPassHook(unsigned int /*pass*/) { psnr_ = kMaxPsnr; nframes_ = 0; } virtual void PSNRPktHook(const vpx_codec_cx_pkt_t *pkt) { if (pkt->data.psnr.psnr[0] < psnr_) psnr_ = pkt->data.psnr.psnr[0]; } double GetMinPsnr() const { return psnr_; } private: double psnr_; unsigned int nframes_; libvpx_test::TestMode encoding_mode_; }; TEST_P(LosslessTest, TestLossLessEncoding) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 2000; cfg_.g_lag_in_frames = 25; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 0; init_flags_ = VPX_CODEC_USE_PSNR; // intentionally changed the dimension for better testing coverage libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 10); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_lossless = GetMinPsnr(); EXPECT_GE(psnr_lossless, kMaxPsnr); } TEST_P(LosslessTest, TestLossLessEncoding444) { libvpx_test::Y4mVideoSource video("rush_hour_444.y4m", 0, 10); cfg_.g_profile = 1; cfg_.g_timebase = video.timebase(); cfg_.rc_target_bitrate = 2000; cfg_.g_lag_in_frames = 25; cfg_.rc_min_quantizer = 0; cfg_.rc_max_quantizer = 0; init_flags_ = VPX_CODEC_USE_PSNR; ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_lossless = GetMinPsnr(); EXPECT_GE(psnr_lossless, kMaxPsnr); } TEST_P(LosslessTest, TestLossLessEncodingCtrl) { const vpx_rational timebase = { 33333333, 1000000000 }; cfg_.g_timebase = timebase; cfg_.rc_target_bitrate = 2000; cfg_.g_lag_in_frames = 25; // Intentionally set Q > 0, to make sure control can be used to activate // lossless cfg_.rc_min_quantizer = 10; cfg_.rc_max_quantizer = 20; init_flags_ = VPX_CODEC_USE_PSNR; libvpx_test::I420VideoSource video("hantro_collage_w352h288.yuv", 352, 288, timebase.den, timebase.num, 0, 10); ASSERT_NO_FATAL_FAILURE(RunLoop(&video)); const double psnr_lossless = GetMinPsnr(); EXPECT_GE(psnr_lossless, kMaxPsnr); } VP9_INSTANTIATE_TEST_CASE(LosslessTest, ::testing::Values(::libvpx_test::kRealTime, ::libvpx_test::kOnePassGood, ::libvpx_test::kTwoPassGood)); } // namespace libvpx-1.8.2/test/vp9_motion_vector_test.cc000066400000000000000000000062101357355204000210430ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "test/codec_factory.h" #include "test/encode_test_driver.h" #include "test/util.h" #include "test/yuv_video_source.h" namespace { #define MAX_EXTREME_MV 1 #define MIN_EXTREME_MV 2 // Encoding modes const libvpx_test::TestMode kEncodingModeVectors[] = { ::libvpx_test::kTwoPassGood, ::libvpx_test::kOnePassGood, ::libvpx_test::kRealTime }; // Encoding speeds const int kCpuUsedVectors[] = { 0, 1, 2, 3, 4, 5, 6 }; // MV test modes: 1 - always use maximum MV; 2 - always use minimum MV. const int kMVTestModes[] = { MAX_EXTREME_MV, MIN_EXTREME_MV }; class MotionVectorTestLarge : public ::libvpx_test::EncoderTest, public ::libvpx_test::CodecTestWith3Params { protected: MotionVectorTestLarge() : EncoderTest(GET_PARAM(0)), encoding_mode_(GET_PARAM(1)), cpu_used_(GET_PARAM(2)), mv_test_mode_(GET_PARAM(3)) {} virtual ~MotionVectorTestLarge() {} virtual void SetUp() { InitializeConfig(); SetMode(encoding_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { cfg_.g_lag_in_frames = 3; cfg_.rc_end_usage = VPX_VBR; } else { cfg_.g_lag_in_frames = 0; cfg_.rc_end_usage = VPX_CBR; cfg_.rc_buf_sz = 1000; cfg_.rc_buf_initial_sz = 500; cfg_.rc_buf_optimal_sz = 600; } } virtual void PreEncodeFrameHook(::libvpx_test::VideoSource *video, ::libvpx_test::Encoder *encoder) { if (video->frame() == 0) { encoder->Control(VP8E_SET_CPUUSED, cpu_used_); encoder->Control(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, mv_test_mode_); if (encoding_mode_ != ::libvpx_test::kRealTime) { encoder->Control(VP8E_SET_ENABLEAUTOALTREF, 1); encoder->Control(VP8E_SET_ARNR_MAXFRAMES, 7); encoder->Control(VP8E_SET_ARNR_STRENGTH, 5); encoder->Control(VP8E_SET_ARNR_TYPE, 3); } } } libvpx_test::TestMode encoding_mode_; int cpu_used_; int mv_test_mode_; }; TEST_P(MotionVectorTestLarge, OverallTest) { cfg_.rc_target_bitrate = 24000; cfg_.g_profile = 0; init_flags_ = VPX_CODEC_USE_PSNR; std::unique_ptr video; video.reset(new libvpx_test::YUVVideoSource( "niklas_640_480_30.yuv", VPX_IMG_FMT_I420, 3840, 2160, // 2048, 1080, 30, 1, 0, 5)); ASSERT_TRUE(video.get() != NULL); ASSERT_NO_FATAL_FAILURE(RunLoop(video.get())); } VP9_INSTANTIATE_TEST_CASE(MotionVectorTestLarge, ::testing::ValuesIn(kEncodingModeVectors), ::testing::ValuesIn(kCpuUsedVectors), ::testing::ValuesIn(kMVTestModes)); } // namespace libvpx-1.8.2/test/vp9_quantize_test.cc000066400000000000000000000552221357355204000200230ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/buffer.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/util.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_scan.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vpx_ports/msvc.h" #include "vpx_ports/vpx_timer.h" using libvpx_test::ACMRandom; using libvpx_test::Buffer; namespace { const int number_of_iterations = 100; typedef void (*QuantizeFunc)(const tran_low_t *coeff, intptr_t count, int skip_block, const int16_t *zbin, const int16_t *round, const int16_t *quant, const int16_t *quant_shift, tran_low_t *qcoeff, tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob, const int16_t *scan, const int16_t *iscan); typedef std::tuple QuantizeParam; // Wrapper for FP version which does not use zbin or quant_shift. typedef void (*QuantizeFPFunc)(const tran_low_t *coeff, intptr_t count, int skip_block, const int16_t *round, const int16_t *quant, tran_low_t *qcoeff, tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob, const int16_t *scan, const int16_t *iscan); template void QuantFPWrapper(const tran_low_t *coeff, intptr_t count, int skip_block, const int16_t *zbin, const int16_t *round, const int16_t *quant, const int16_t *quant_shift, tran_low_t *qcoeff, tran_low_t *dqcoeff, const int16_t *dequant, uint16_t *eob, const int16_t *scan, const int16_t *iscan) { (void)zbin; (void)quant_shift; fn(coeff, count, skip_block, round, quant, qcoeff, dqcoeff, dequant, eob, scan, iscan); } class VP9QuantizeBase : public AbstractBench { public: VP9QuantizeBase(vpx_bit_depth_t bit_depth, int max_size, bool is_fp) : bit_depth_(bit_depth), max_size_(max_size), is_fp_(is_fp), coeff_(Buffer(max_size_, max_size_, 0, 16)), qcoeff_(Buffer(max_size_, max_size_, 0, 32)), dqcoeff_(Buffer(max_size_, max_size_, 0, 32)) { // TODO(jianj): SSSE3 and AVX2 tests fail on extreme values. #if HAVE_NEON max_value_ = (1 << (7 + bit_depth_)) - 1; #else max_value_ = (1 << bit_depth_) - 1; #endif zbin_ptr_ = reinterpret_cast(vpx_memalign(16, 8 * sizeof(*zbin_ptr_))); round_fp_ptr_ = reinterpret_cast( vpx_memalign(16, 8 * sizeof(*round_fp_ptr_))); quant_fp_ptr_ = reinterpret_cast( vpx_memalign(16, 8 * sizeof(*quant_fp_ptr_))); round_ptr_ = reinterpret_cast(vpx_memalign(16, 8 * sizeof(*round_ptr_))); quant_ptr_ = reinterpret_cast(vpx_memalign(16, 8 * sizeof(*quant_ptr_))); quant_shift_ptr_ = reinterpret_cast( vpx_memalign(16, 8 * sizeof(*quant_shift_ptr_))); dequant_ptr_ = reinterpret_cast( vpx_memalign(16, 8 * sizeof(*dequant_ptr_))); r_ptr_ = (is_fp_) ? round_fp_ptr_ : round_ptr_; q_ptr_ = (is_fp_) ? quant_fp_ptr_ : quant_ptr_; } ~VP9QuantizeBase() { vpx_free(zbin_ptr_); vpx_free(round_fp_ptr_); vpx_free(quant_fp_ptr_); vpx_free(round_ptr_); vpx_free(quant_ptr_); vpx_free(quant_shift_ptr_); vpx_free(dequant_ptr_); zbin_ptr_ = NULL; round_fp_ptr_ = NULL; quant_fp_ptr_ = NULL; round_ptr_ = NULL; quant_ptr_ = NULL; quant_shift_ptr_ = NULL; dequant_ptr_ = NULL; libvpx_test::ClearSystemState(); } protected: int16_t *zbin_ptr_; int16_t *round_fp_ptr_; int16_t *quant_fp_ptr_; int16_t *round_ptr_; int16_t *quant_ptr_; int16_t *quant_shift_ptr_; int16_t *dequant_ptr_; const vpx_bit_depth_t bit_depth_; int max_value_; const int max_size_; const bool is_fp_; Buffer coeff_; Buffer qcoeff_; Buffer dqcoeff_; int16_t *r_ptr_; int16_t *q_ptr_; int count_; int skip_block_; const scan_order *scan_; uint16_t eob_; }; class VP9QuantizeTest : public VP9QuantizeBase, public ::testing::TestWithParam { public: VP9QuantizeTest() : VP9QuantizeBase(GET_PARAM(2), GET_PARAM(3), GET_PARAM(4)), quantize_op_(GET_PARAM(0)), ref_quantize_op_(GET_PARAM(1)) {} protected: virtual void Run(); const QuantizeFunc quantize_op_; const QuantizeFunc ref_quantize_op_; }; void VP9QuantizeTest::Run() { quantize_op_(coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, r_ptr_, q_ptr_, quant_shift_ptr_, qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_->scan, scan_->iscan); } // This quantizer compares the AC coefficients to the quantization step size to // determine if further multiplication operations are needed. // Based on vp9_quantize_fp_sse2(). inline void quant_fp_nz(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan, int is_32x32) { int i, eob = -1; const int thr = dequant_ptr[1] >> (1 + is_32x32); (void)iscan; (void)skip_block; assert(!skip_block); // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. for (i = 0; i < n_coeffs; i += 16) { int y; int nzflag_cnt = 0; int abs_coeff[16]; int coeff_sign[16]; // count nzflag for each row (16 tran_low_t) for (y = 0; y < 16; ++y) { const int rc = i + y; const int coeff = coeff_ptr[rc]; coeff_sign[y] = (coeff >> 31); abs_coeff[y] = (coeff ^ coeff_sign[y]) - coeff_sign[y]; // The first 16 are skipped in the sse2 code. Do the same here to match. if (i >= 16 && (abs_coeff[y] <= thr)) { nzflag_cnt++; } } for (y = 0; y < 16; ++y) { const int rc = i + y; // If all of the AC coeffs in a row has magnitude less than the // quantization step_size/2, quantize to zero. if (nzflag_cnt < 16) { int tmp; int _round; if (is_32x32) { _round = ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); } else { _round = round_ptr[rc != 0]; } tmp = clamp(abs_coeff[y] + _round, INT16_MIN, INT16_MAX); tmp = (tmp * quant_ptr[rc != 0]) >> (16 - is_32x32); qcoeff_ptr[rc] = (tmp ^ coeff_sign[y]) - coeff_sign[y]; dqcoeff_ptr[rc] = static_cast(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); if (is_32x32) { dqcoeff_ptr[rc] = static_cast(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2); } else { dqcoeff_ptr[rc] = static_cast(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); } } else { qcoeff_ptr[rc] = 0; dqcoeff_ptr[rc] = 0; } } } // Scan for eob. for (i = 0; i < n_coeffs; i++) { // Use the scan order to find the correct eob. const int rc = scan[i]; if (qcoeff_ptr[rc]) { eob = i; } } *eob_ptr = eob + 1; } void quantize_fp_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 0); } void quantize_fp_32x32_nz_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { quant_fp_nz(coeff_ptr, n_coeffs, skip_block, round_ptr, quant_ptr, qcoeff_ptr, dqcoeff_ptr, dequant_ptr, eob_ptr, scan, iscan, 1); } void GenerateHelperArrays(ACMRandom *rnd, int16_t *zbin, int16_t *round, int16_t *quant, int16_t *quant_shift, int16_t *dequant, int16_t *round_fp, int16_t *quant_fp) { // Max when q == 0. Otherwise, it is 48 for Y and 42 for U/V. const int max_qrounding_factor_fp = 64; for (int j = 0; j < 2; j++) { // The range is 4 to 1828 in the VP9 tables. const int qlookup = rnd->RandRange(1825) + 4; round_fp[j] = (max_qrounding_factor_fp * qlookup) >> 7; quant_fp[j] = (1 << 16) / qlookup; // Values determined by deconstructing vp9_init_quantizer(). // zbin may be up to 1143 for 8 and 10 bit Y values, or 1200 for 12 bit Y // values or U/V values of any bit depth. This is because y_delta is not // factored into the vp9_ac_quant() call. zbin[j] = rnd->RandRange(1200); // round may be up to 685 for Y values or 914 for U/V. round[j] = rnd->RandRange(914); // quant ranges from 1 to -32703 quant[j] = static_cast(rnd->RandRange(32704)) - 32703; // quant_shift goes up to 1 << 16. quant_shift[j] = rnd->RandRange(16384); // dequant maxes out at 1828 for all cases. dequant[j] = rnd->RandRange(1828); } for (int j = 2; j < 8; j++) { zbin[j] = zbin[1]; round_fp[j] = round_fp[1]; quant_fp[j] = quant_fp[1]; round[j] = round[1]; quant[j] = quant[1]; quant_shift[j] = quant_shift[1]; dequant[j] = dequant[1]; } } TEST_P(VP9QuantizeTest, OperationCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); ASSERT_TRUE(coeff_.Init()); ASSERT_TRUE(qcoeff_.Init()); ASSERT_TRUE(dqcoeff_.Init()); Buffer ref_qcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_qcoeff.Init()); Buffer ref_dqcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_dqcoeff.Init()); uint16_t ref_eob = 0; eob_ = 0; for (int i = 0; i < number_of_iterations; ++i) { // Test skip block for the first three iterations to catch all the different // sizes. const int skip_block = 0; TX_SIZE sz; if (max_size_ == 16) { sz = static_cast(i % 3); // TX_4X4, TX_8X8 TX_16X16 } else { sz = TX_32X32; } const TX_TYPE tx_type = static_cast((i >> 2) % 3); scan_ = &vp9_scan_orders[sz][tx_type]; count_ = (4 << sz) * (4 << sz); coeff_.Set(&rnd, -max_value_, max_value_); GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); ref_quantize_op_(coeff_.TopLeftPixel(), count_, skip_block, zbin_ptr_, r_ptr_, q_ptr_, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob, scan_->scan, scan_->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_( coeff_.TopLeftPixel(), count_, skip_block, zbin_ptr_, r_ptr_, q_ptr_, quant_shift_ptr_, qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_->scan, scan_->iscan)); EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff)); EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff)); EXPECT_EQ(eob_, ref_eob); if (HasFailure()) { printf("Failure on iteration %d.\n", i); qcoeff_.PrintDifference(ref_qcoeff); dqcoeff_.PrintDifference(ref_dqcoeff); return; } } } TEST_P(VP9QuantizeTest, EOBCheck) { ACMRandom rnd(ACMRandom::DeterministicSeed()); ASSERT_TRUE(coeff_.Init()); ASSERT_TRUE(qcoeff_.Init()); ASSERT_TRUE(dqcoeff_.Init()); Buffer ref_qcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_qcoeff.Init()); Buffer ref_dqcoeff = Buffer(max_size_, max_size_, 0, 32); ASSERT_TRUE(ref_dqcoeff.Init()); uint16_t ref_eob = 0; eob_ = 0; const uint32_t max_index = max_size_ * max_size_ - 1; for (int i = 0; i < number_of_iterations; ++i) { skip_block_ = 0; TX_SIZE sz; if (max_size_ == 16) { sz = static_cast(i % 3); // TX_4X4, TX_8X8 TX_16X16 } else { sz = TX_32X32; } const TX_TYPE tx_type = static_cast((i >> 2) % 3); scan_ = &vp9_scan_orders[sz][tx_type]; count_ = (4 << sz) * (4 << sz); // Two random entries coeff_.Set(0); coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] = static_cast(rnd.RandRange(max_value_ * 2)) - max_value_; coeff_.TopLeftPixel()[rnd.RandRange(count_) & max_index] = static_cast(rnd.RandRange(max_value_ * 2)) - max_value_; GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); ref_quantize_op_(coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, r_ptr_, q_ptr_, quant_shift_ptr_, ref_qcoeff.TopLeftPixel(), ref_dqcoeff.TopLeftPixel(), dequant_ptr_, &ref_eob, scan_->scan, scan_->iscan); ASM_REGISTER_STATE_CHECK(quantize_op_( coeff_.TopLeftPixel(), count_, skip_block_, zbin_ptr_, r_ptr_, q_ptr_, quant_shift_ptr_, qcoeff_.TopLeftPixel(), dqcoeff_.TopLeftPixel(), dequant_ptr_, &eob_, scan_->scan, scan_->iscan)); EXPECT_TRUE(qcoeff_.CheckValues(ref_qcoeff)); EXPECT_TRUE(dqcoeff_.CheckValues(ref_dqcoeff)); EXPECT_EQ(eob_, ref_eob); if (HasFailure()) { printf("Failure on iteration %d.\n", i); qcoeff_.PrintDifference(ref_qcoeff); dqcoeff_.PrintDifference(ref_dqcoeff); return; } } } TEST_P(VP9QuantizeTest, DISABLED_Speed) { ACMRandom rnd(ACMRandom::DeterministicSeed()); ASSERT_TRUE(coeff_.Init()); ASSERT_TRUE(qcoeff_.Init()); ASSERT_TRUE(dqcoeff_.Init()); TX_SIZE starting_sz, ending_sz; if (max_size_ == 16) { starting_sz = TX_4X4; ending_sz = TX_16X16; } else { starting_sz = TX_32X32; ending_sz = TX_32X32; } for (TX_SIZE sz = starting_sz; sz <= ending_sz; ++sz) { // zbin > coeff, zbin < coeff. for (int i = 0; i < 2; ++i) { skip_block_ = 0; // TX_TYPE defines the scan order. That is not relevant to the speed test. // Pick the first one. const TX_TYPE tx_type = DCT_DCT; count_ = (4 << sz) * (4 << sz); scan_ = &vp9_scan_orders[sz][tx_type]; GenerateHelperArrays(&rnd, zbin_ptr_, round_ptr_, quant_ptr_, quant_shift_ptr_, dequant_ptr_, round_fp_ptr_, quant_fp_ptr_); if (i == 0) { // When |coeff values| are less than zbin the results are 0. int threshold = 100; if (max_size_ == 32) { // For 32x32, the threshold is halved. Double it to keep the values // from clearing it. threshold = 200; } for (int j = 0; j < 8; ++j) zbin_ptr_[j] = threshold; coeff_.Set(&rnd, -99, 99); } else if (i == 1) { for (int j = 0; j < 8; ++j) zbin_ptr_[j] = 50; coeff_.Set(&rnd, -500, 500); } RunNTimes(10000000 / count_); const char *type = (i == 0) ? "Bypass calculations " : "Full calculations "; char block_size[16]; snprintf(block_size, sizeof(block_size), "%dx%d", 4 << sz, 4 << sz); char title[100]; snprintf(title, sizeof(title), "%25s %8s ", type, block_size); PrintMedian(title); } } } using std::make_tuple; #if HAVE_SSE2 #if CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( SSE2, VP9QuantizeTest, ::testing::Values( make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_10, 16, false), make_tuple(&vpx_highbd_quantize_b_sse2, &vpx_highbd_quantize_b_c, VPX_BITS_12, 16, false), make_tuple(&vpx_highbd_quantize_b_32x32_sse2, &vpx_highbd_quantize_b_32x32_c, VPX_BITS_8, 32, false), make_tuple(&vpx_highbd_quantize_b_32x32_sse2, &vpx_highbd_quantize_b_32x32_c, VPX_BITS_10, 32, false), make_tuple(&vpx_highbd_quantize_b_32x32_sse2, &vpx_highbd_quantize_b_32x32_c, VPX_BITS_12, 32, false))); #else INSTANTIATE_TEST_CASE_P( SSE2, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_sse2, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true))); #endif // CONFIG_VP9_HIGHBITDEPTH #endif // HAVE_SSE2 #if HAVE_SSSE3 #if VPX_ARCH_X86_64 INSTANTIATE_TEST_CASE_P( SSSE3, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_ssse3, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); #else INSTANTIATE_TEST_CASE_P( SSSE3, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_ssse3, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_ssse3, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); #endif // VPX_ARCH_X86_64 #endif // HAVE_SSSE3 #if HAVE_AVX INSTANTIATE_TEST_CASE_P(AVX, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_avx, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_avx, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false))); #endif // HAVE_AVX #if VPX_ARCH_X86_64 && HAVE_AVX2 INSTANTIATE_TEST_CASE_P( AVX2, VP9QuantizeTest, ::testing::Values(make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true))); #endif // HAVE_AVX2 #if HAVE_NEON INSTANTIATE_TEST_CASE_P( NEON, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_neon, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_neon, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); #endif // HAVE_NEON #if HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH INSTANTIATE_TEST_CASE_P( VSX, VP9QuantizeTest, ::testing::Values(make_tuple(&vpx_quantize_b_vsx, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_vsx, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); #endif // HAVE_VSX && !CONFIG_VP9_HIGHBITDEPTH // Only useful to compare "Speed" test results. INSTANTIATE_TEST_CASE_P( DISABLED_C, VP9QuantizeTest, ::testing::Values( make_tuple(&vpx_quantize_b_c, &vpx_quantize_b_c, VPX_BITS_8, 16, false), make_tuple(&vpx_quantize_b_32x32_c, &vpx_quantize_b_32x32_c, VPX_BITS_8, 32, false), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 16, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true), make_tuple(&QuantFPWrapper, &QuantFPWrapper, VPX_BITS_8, 32, true))); } // namespace libvpx-1.8.2/test/vp9_scale_test.cc000066400000000000000000000203531357355204000172470ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/vpx_scale_test.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_scale/yv12config.h" namespace libvpx_test { typedef void (*ScaleFrameFunc)(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, INTERP_FILTER filter_type, int phase_scaler); class ScaleTest : public VpxScaleBase, public ::testing::TestWithParam { public: virtual ~ScaleTest() {} protected: virtual void SetUp() { scale_fn_ = GetParam(); } void ReferenceScaleFrame(INTERP_FILTER filter_type, int phase_scaler) { vp9_scale_and_extend_frame_c(&img_, &ref_img_, filter_type, phase_scaler); } void ScaleFrame(INTERP_FILTER filter_type, int phase_scaler) { ASM_REGISTER_STATE_CHECK( scale_fn_(&img_, &dst_img_, filter_type, phase_scaler)); } void RunTest(INTERP_FILTER filter_type) { static const int kNumSizesToTest = 20; static const int kNumScaleFactorsToTest = 4; static const int kSizesToTest[] = { 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 68, 128, 134 }; static const int kScaleFactors[] = { 1, 2, 3, 4 }; for (int phase_scaler = 0; phase_scaler < 16; ++phase_scaler) { for (int h = 0; h < kNumSizesToTest; ++h) { const int src_height = kSizesToTest[h]; for (int w = 0; w < kNumSizesToTest; ++w) { const int src_width = kSizesToTest[w]; for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; ++sf_up_idx) { const int sf_up = kScaleFactors[sf_up_idx]; for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest; ++sf_down_idx) { const int sf_down = kScaleFactors[sf_down_idx]; const int dst_width = src_width * sf_up / sf_down; const int dst_height = src_height * sf_up / sf_down; if (sf_up == sf_down && sf_up != 1) { continue; } // I420 frame width and height must be even. if (!dst_width || !dst_height || dst_width & 1 || dst_height & 1) { continue; } // vpx_convolve8_c() has restriction on the step which cannot // exceed 64 (ratio 1 to 4). if (src_width > 4 * dst_width || src_height > 4 * dst_height) { continue; } ASSERT_NO_FATAL_FAILURE(ResetScaleImages(src_width, src_height, dst_width, dst_height)); ReferenceScaleFrame(filter_type, phase_scaler); ScaleFrame(filter_type, phase_scaler); if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc, ref_img_.frame_size)) { printf( "filter_type = %d, phase_scaler = %d, src_width = %4d, " "src_height = %4d, dst_width = %4d, dst_height = %4d, " "scale factor = %d:%d\n", filter_type, phase_scaler, src_width, src_height, dst_width, dst_height, sf_down, sf_up); PrintDiff(); } CompareImages(dst_img_); DeallocScaleImages(); } } } } } } void PrintDiffComponent(const uint8_t *const ref, const uint8_t *const opt, const int stride, const int width, const int height, const int plane_idx) const { for (int y = 0; y < height; y++) { for (int x = 0; x < width; x++) { if (ref[y * stride + x] != opt[y * stride + x]) { printf("Plane %d pixel[%d][%d] diff:%6d (ref),%6d (opt)\n", plane_idx, y, x, ref[y * stride + x], opt[y * stride + x]); break; } } } } void PrintDiff() const { assert(ref_img_.y_stride == dst_img_.y_stride); assert(ref_img_.y_width == dst_img_.y_width); assert(ref_img_.y_height == dst_img_.y_height); assert(ref_img_.uv_stride == dst_img_.uv_stride); assert(ref_img_.uv_width == dst_img_.uv_width); assert(ref_img_.uv_height == dst_img_.uv_height); if (memcmp(dst_img_.buffer_alloc, ref_img_.buffer_alloc, ref_img_.frame_size)) { PrintDiffComponent(ref_img_.y_buffer, dst_img_.y_buffer, ref_img_.y_stride, ref_img_.y_width, ref_img_.y_height, 0); PrintDiffComponent(ref_img_.u_buffer, dst_img_.u_buffer, ref_img_.uv_stride, ref_img_.uv_width, ref_img_.uv_height, 1); PrintDiffComponent(ref_img_.v_buffer, dst_img_.v_buffer, ref_img_.uv_stride, ref_img_.uv_width, ref_img_.uv_height, 2); } } ScaleFrameFunc scale_fn_; }; TEST_P(ScaleTest, ScaleFrame_EightTap) { RunTest(EIGHTTAP); } TEST_P(ScaleTest, ScaleFrame_EightTapSmooth) { RunTest(EIGHTTAP_SMOOTH); } TEST_P(ScaleTest, ScaleFrame_EightTapSharp) { RunTest(EIGHTTAP_SHARP); } TEST_P(ScaleTest, ScaleFrame_Bilinear) { RunTest(BILINEAR); } TEST_P(ScaleTest, DISABLED_Speed) { static const int kCountSpeedTestBlock = 100; static const int kNumScaleFactorsToTest = 4; static const int kScaleFactors[] = { 1, 2, 3, 4 }; const int src_width = 1280; const int src_height = 720; for (INTERP_FILTER filter_type = 2; filter_type < 4; ++filter_type) { for (int phase_scaler = 0; phase_scaler < 2; ++phase_scaler) { for (int sf_up_idx = 0; sf_up_idx < kNumScaleFactorsToTest; ++sf_up_idx) { const int sf_up = kScaleFactors[sf_up_idx]; for (int sf_down_idx = 0; sf_down_idx < kNumScaleFactorsToTest; ++sf_down_idx) { const int sf_down = kScaleFactors[sf_down_idx]; const int dst_width = src_width * sf_up / sf_down; const int dst_height = src_height * sf_up / sf_down; if (sf_up == sf_down && sf_up != 1) { continue; } // I420 frame width and height must be even. if (dst_width & 1 || dst_height & 1) { continue; } ASSERT_NO_FATAL_FAILURE( ResetScaleImages(src_width, src_height, dst_width, dst_height)); ASM_REGISTER_STATE_CHECK( ReferenceScaleFrame(filter_type, phase_scaler)); vpx_usec_timer timer; vpx_usec_timer_start(&timer); for (int i = 0; i < kCountSpeedTestBlock; ++i) { ScaleFrame(filter_type, phase_scaler); } libvpx_test::ClearSystemState(); vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer) / 1000); CompareImages(dst_img_); DeallocScaleImages(); printf( "filter_type = %d, phase_scaler = %d, src_width = %4d, " "src_height = %4d, dst_width = %4d, dst_height = %4d, " "scale factor = %d:%d, scale time: %5d ms\n", filter_type, phase_scaler, src_width, src_height, dst_width, dst_height, sf_down, sf_up, elapsed_time); } } } } } INSTANTIATE_TEST_CASE_P(C, ScaleTest, ::testing::Values(vp9_scale_and_extend_frame_c)); #if HAVE_SSSE3 INSTANTIATE_TEST_CASE_P(SSSE3, ScaleTest, ::testing::Values(vp9_scale_and_extend_frame_ssse3)); #endif // HAVE_SSSE3 #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, ScaleTest, ::testing::Values(vp9_scale_and_extend_frame_neon)); #endif // HAVE_NEON } // namespace libvpx_test libvpx-1.8.2/test/vp9_skip_loopfilter_test.cc000066400000000000000000000126751357355204000213750ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/md5_helper.h" #include "test/util.h" #include "test/webm_video_source.h" namespace { const char kVp9TestFile[] = "vp90-2-08-tile_1x8_frame_parallel.webm"; const char kVp9Md5File[] = "vp90-2-08-tile_1x8_frame_parallel.webm.md5"; // Class for testing shutting off the loop filter. class SkipLoopFilterTest { public: SkipLoopFilterTest() : video_(NULL), decoder_(NULL), md5_file_(NULL) {} ~SkipLoopFilterTest() { if (md5_file_ != NULL) fclose(md5_file_); delete decoder_; delete video_; } // If |threads| > 0 then set the decoder with that number of threads. void Init(int num_threads) { expected_md5_[0] = '\0'; junk_[0] = '\0'; video_ = new libvpx_test::WebMVideoSource(kVp9TestFile); ASSERT_TRUE(video_ != NULL); video_->Init(); video_->Begin(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); if (num_threads > 0) cfg.threads = num_threads; decoder_ = new libvpx_test::VP9Decoder(cfg, 0); ASSERT_TRUE(decoder_ != NULL); OpenMd5File(kVp9Md5File); } // Set the VP9 skipLoopFilter control value. void SetSkipLoopFilter(int value, vpx_codec_err_t expected_value) { decoder_->Control(VP9_SET_SKIP_LOOP_FILTER, value, expected_value); } vpx_codec_err_t DecodeOneFrame() { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); if (res == VPX_CODEC_OK) { ReadMd5(); video_->Next(); } return res; } vpx_codec_err_t DecodeRemainingFrames() { for (; video_->cxdata() != NULL; video_->Next()) { const vpx_codec_err_t res = decoder_->DecodeFrame(video_->cxdata(), video_->frame_size()); if (res != VPX_CODEC_OK) return res; ReadMd5(); } return VPX_CODEC_OK; } // Checks if MD5 matches or doesn't. void CheckMd5(bool matches) { libvpx_test::DxDataIterator dec_iter = decoder_->GetDxData(); const vpx_image_t *img = dec_iter.Next(); CheckMd5Vpx(*img, matches); } private: // TODO(fgalligan): Move the MD5 testing code into another class. void OpenMd5File(const std::string &md5_file_name) { md5_file_ = libvpx_test::OpenTestDataFile(md5_file_name); ASSERT_TRUE(md5_file_ != NULL) << "MD5 file open failed. Filename: " << md5_file_name; } // Reads the next line of the MD5 file. void ReadMd5() { ASSERT_TRUE(md5_file_ != NULL); const int res = fscanf(md5_file_, "%s %s", expected_md5_, junk_); ASSERT_NE(EOF, res) << "Read md5 data failed"; expected_md5_[32] = '\0'; } // Checks if the last read MD5 matches |img| or doesn't. void CheckMd5Vpx(const vpx_image_t &img, bool matches) { ::libvpx_test::MD5 md5_res; md5_res.Add(&img); const char *const actual_md5 = md5_res.Get(); // Check MD5. if (matches) ASSERT_STREQ(expected_md5_, actual_md5) << "MD5 checksums don't match"; else ASSERT_STRNE(expected_md5_, actual_md5) << "MD5 checksums match"; } libvpx_test::WebMVideoSource *video_; libvpx_test::VP9Decoder *decoder_; FILE *md5_file_; char expected_md5_[33]; char junk_[128]; }; TEST(SkipLoopFilterTest, ShutOffLoopFilter) { const int non_zero_value = 1; const int num_threads = 0; SkipLoopFilterTest skip_loop_filter; skip_loop_filter.Init(num_threads); skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames()); skip_loop_filter.CheckMd5(false); } TEST(SkipLoopFilterTest, ShutOffLoopFilterSingleThread) { const int non_zero_value = 1; const int num_threads = 1; SkipLoopFilterTest skip_loop_filter; skip_loop_filter.Init(num_threads); skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames()); skip_loop_filter.CheckMd5(false); } TEST(SkipLoopFilterTest, ShutOffLoopFilter8Threads) { const int non_zero_value = 1; const int num_threads = 8; SkipLoopFilterTest skip_loop_filter; skip_loop_filter.Init(num_threads); skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames()); skip_loop_filter.CheckMd5(false); } TEST(SkipLoopFilterTest, WithLoopFilter) { const int non_zero_value = 1; const int num_threads = 0; SkipLoopFilterTest skip_loop_filter; skip_loop_filter.Init(num_threads); skip_loop_filter.SetSkipLoopFilter(non_zero_value, VPX_CODEC_OK); skip_loop_filter.SetSkipLoopFilter(0, VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames()); skip_loop_filter.CheckMd5(true); } TEST(SkipLoopFilterTest, ToggleLoopFilter) { const int num_threads = 0; SkipLoopFilterTest skip_loop_filter; skip_loop_filter.Init(num_threads); for (int i = 0; i < 10; ++i) { skip_loop_filter.SetSkipLoopFilter(i % 2, VPX_CODEC_OK); ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeOneFrame()); } ASSERT_EQ(VPX_CODEC_OK, skip_loop_filter.DecodeRemainingFrames()); skip_loop_filter.CheckMd5(false); } } // namespace libvpx-1.8.2/test/vp9_subtract_test.cc000066400000000000000000000117131357355204000200070ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "test/acm_random.h" #include "test/bench.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "vp9/common/vp9_blockd.h" #include "vpx_ports/msvc.h" #include "vpx_mem/vpx_mem.h" typedef void (*SubtractFunc)(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride); namespace vp9 { class VP9SubtractBlockTest : public AbstractBench, public ::testing::TestWithParam { public: virtual void TearDown() { libvpx_test::ClearSystemState(); } protected: virtual void Run() { GetParam()(block_height_, block_width_, diff_, block_width_, src_, block_width_, pred_, block_width_); } void SetupBlocks(BLOCK_SIZE bsize) { block_width_ = 4 * num_4x4_blocks_wide_lookup[bsize]; block_height_ = 4 * num_4x4_blocks_high_lookup[bsize]; diff_ = reinterpret_cast( vpx_memalign(16, sizeof(*diff_) * block_width_ * block_height_ * 2)); pred_ = reinterpret_cast( vpx_memalign(16, block_width_ * block_height_ * 2)); src_ = reinterpret_cast( vpx_memalign(16, block_width_ * block_height_ * 2)); } int block_width_; int block_height_; int16_t *diff_; uint8_t *pred_; uint8_t *src_; }; using libvpx_test::ACMRandom; TEST_P(VP9SubtractBlockTest, DISABLED_Speed) { ACMRandom rnd(ACMRandom::DeterministicSeed()); for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES; bsize = static_cast(static_cast(bsize) + 1)) { SetupBlocks(bsize); RunNTimes(100000000 / (block_height_ * block_width_)); char block_size[16]; snprintf(block_size, sizeof(block_size), "%dx%d", block_height_, block_width_); char title[100]; snprintf(title, sizeof(title), "%8s ", block_size); PrintMedian(title); vpx_free(diff_); vpx_free(pred_); vpx_free(src_); } } TEST_P(VP9SubtractBlockTest, SimpleSubtract) { ACMRandom rnd(ACMRandom::DeterministicSeed()); for (BLOCK_SIZE bsize = BLOCK_4X4; bsize < BLOCK_SIZES; bsize = static_cast(static_cast(bsize) + 1)) { SetupBlocks(bsize); for (int n = 0; n < 100; n++) { for (int r = 0; r < block_height_; ++r) { for (int c = 0; c < block_width_ * 2; ++c) { src_[r * block_width_ * 2 + c] = rnd.Rand8(); pred_[r * block_width_ * 2 + c] = rnd.Rand8(); } } GetParam()(block_height_, block_width_, diff_, block_width_, src_, block_width_, pred_, block_width_); for (int r = 0; r < block_height_; ++r) { for (int c = 0; c < block_width_; ++c) { EXPECT_EQ(diff_[r * block_width_ + c], (src_[r * block_width_ + c] - pred_[r * block_width_ + c])) << "r = " << r << ", c = " << c << ", bs = " << static_cast(bsize); } } GetParam()(block_height_, block_width_, diff_, block_width_ * 2, src_, block_width_ * 2, pred_, block_width_ * 2); for (int r = 0; r < block_height_; ++r) { for (int c = 0; c < block_width_; ++c) { EXPECT_EQ(diff_[r * block_width_ * 2 + c], (src_[r * block_width_ * 2 + c] - pred_[r * block_width_ * 2 + c])) << "r = " << r << ", c = " << c << ", bs = " << static_cast(bsize); } } } vpx_free(diff_); vpx_free(pred_); vpx_free(src_); } } INSTANTIATE_TEST_CASE_P(C, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_c)); #if HAVE_SSE2 INSTANTIATE_TEST_CASE_P(SSE2, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_sse2)); #endif #if HAVE_NEON INSTANTIATE_TEST_CASE_P(NEON, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_neon)); #endif #if HAVE_MSA INSTANTIATE_TEST_CASE_P(MSA, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_msa)); #endif #if HAVE_MMI INSTANTIATE_TEST_CASE_P(MMI, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_mmi)); #endif #if HAVE_VSX INSTANTIATE_TEST_CASE_P(VSX, VP9SubtractBlockTest, ::testing::Values(vpx_subtract_block_vsx)); #endif } // namespace vp9 libvpx-1.8.2/test/vp9_thread_test.cc000066400000000000000000000252161357355204000174320ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "test/codec_factory.h" #include "test/decode_test_driver.h" #include "test/md5_helper.h" #if CONFIG_WEBM_IO #include "test/webm_video_source.h" #endif #include "vpx_util/vpx_thread.h" namespace { using std::string; class VPxWorkerThreadTest : public ::testing::TestWithParam { protected: virtual ~VPxWorkerThreadTest() {} virtual void SetUp() { vpx_get_worker_interface()->init(&worker_); } virtual void TearDown() { vpx_get_worker_interface()->end(&worker_); } void Run(VPxWorker *worker) { const bool synchronous = GetParam(); if (synchronous) { vpx_get_worker_interface()->execute(worker); } else { vpx_get_worker_interface()->launch(worker); } } VPxWorker worker_; }; int ThreadHook(void *data, void *return_value) { int *const hook_data = reinterpret_cast(data); *hook_data = 5; return *reinterpret_cast(return_value); } TEST_P(VPxWorkerThreadTest, HookSuccess) { // should be a no-op. EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0); for (int i = 0; i < 2; ++i) { EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0); int hook_data = 0; int return_value = 1; // return successfully from the hook worker_.hook = ThreadHook; worker_.data1 = &hook_data; worker_.data2 = &return_value; Run(&worker_); EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0); EXPECT_FALSE(worker_.had_error); EXPECT_EQ(5, hook_data); // should be a no-op. EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0); } } TEST_P(VPxWorkerThreadTest, HookFailure) { EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0); int hook_data = 0; int return_value = 0; // return failure from the hook worker_.hook = ThreadHook; worker_.data1 = &hook_data; worker_.data2 = &return_value; Run(&worker_); EXPECT_FALSE(vpx_get_worker_interface()->sync(&worker_)); EXPECT_EQ(1, worker_.had_error); // Ensure _reset() clears the error and _launch() can be called again. return_value = 1; EXPECT_NE(vpx_get_worker_interface()->reset(&worker_), 0); EXPECT_FALSE(worker_.had_error); vpx_get_worker_interface()->launch(&worker_); EXPECT_NE(vpx_get_worker_interface()->sync(&worker_), 0); EXPECT_FALSE(worker_.had_error); } TEST_P(VPxWorkerThreadTest, EndWithoutSync) { // Create a large number of threads to increase the chances of detecting a // race. Doing more work in the hook is no guarantee as any race would occur // post hook execution in the main thread loop driver. static const int kNumWorkers = 64; VPxWorker workers[kNumWorkers]; int hook_data[kNumWorkers]; int return_value[kNumWorkers]; for (int n = 0; n < kNumWorkers; ++n) { vpx_get_worker_interface()->init(&workers[n]); return_value[n] = 1; // return successfully from the hook workers[n].hook = ThreadHook; workers[n].data1 = &hook_data[n]; workers[n].data2 = &return_value[n]; } for (int i = 0; i < 2; ++i) { for (int n = 0; n < kNumWorkers; ++n) { EXPECT_NE(vpx_get_worker_interface()->reset(&workers[n]), 0); hook_data[n] = 0; } for (int n = 0; n < kNumWorkers; ++n) { Run(&workers[n]); } for (int n = kNumWorkers - 1; n >= 0; --n) { vpx_get_worker_interface()->end(&workers[n]); } } } TEST(VPxWorkerThreadTest, TestInterfaceAPI) { EXPECT_EQ(0, vpx_set_worker_interface(NULL)); EXPECT_TRUE(vpx_get_worker_interface() != NULL); for (int i = 0; i < 6; ++i) { VPxWorkerInterface winterface = *vpx_get_worker_interface(); switch (i) { default: case 0: winterface.init = NULL; break; case 1: winterface.reset = NULL; break; case 2: winterface.sync = NULL; break; case 3: winterface.launch = NULL; break; case 4: winterface.execute = NULL; break; case 5: winterface.end = NULL; break; } EXPECT_EQ(0, vpx_set_worker_interface(&winterface)); } } // ----------------------------------------------------------------------------- // Multi-threaded decode tests #if CONFIG_WEBM_IO struct FileList { const char *name; const char *expected_md5; }; // Decodes |filename| with |num_threads|. Returns the md5 of the decoded frames. string DecodeFile(const string &filename, int num_threads) { libvpx_test::WebMVideoSource video(filename); video.Init(); vpx_codec_dec_cfg_t cfg = vpx_codec_dec_cfg_t(); cfg.threads = num_threads; libvpx_test::VP9Decoder decoder(cfg, 0); libvpx_test::MD5 md5; for (video.Begin(); video.cxdata(); video.Next()) { const vpx_codec_err_t res = decoder.DecodeFrame(video.cxdata(), video.frame_size()); if (res != VPX_CODEC_OK) { EXPECT_EQ(VPX_CODEC_OK, res) << decoder.DecodeError(); break; } libvpx_test::DxDataIterator dec_iter = decoder.GetDxData(); const vpx_image_t *img = NULL; // Get decompressed data while ((img = dec_iter.Next())) { md5.Add(img); } } return string(md5.Get()); } void DecodeFiles(const FileList files[]) { for (const FileList *iter = files; iter->name != NULL; ++iter) { SCOPED_TRACE(iter->name); for (int t = 1; t <= 8; ++t) { EXPECT_EQ(iter->expected_md5, DecodeFile(iter->name, t)) << "threads = " << t; } } } // Trivial serialized thread worker interface implementation. // Note any worker that requires synchronization between other workers will // hang. namespace impl { namespace { void Init(VPxWorker *const worker) { memset(worker, 0, sizeof(*worker)); } int Reset(VPxWorker *const /*worker*/) { return 1; } int Sync(VPxWorker *const worker) { return !worker->had_error; } void Execute(VPxWorker *const worker) { worker->had_error |= !worker->hook(worker->data1, worker->data2); } void Launch(VPxWorker *const worker) { Execute(worker); } void End(VPxWorker *const /*worker*/) {} } // namespace } // namespace impl TEST(VPxWorkerThreadTest, TestSerialInterface) { static const VPxWorkerInterface serial_interface = { impl::Init, impl::Reset, impl::Sync, impl::Launch, impl::Execute, impl::End }; // TODO(jzern): Avoid using a file that will use the row-based thread // loopfilter, with the simple serialized implementation it will hang. This is // due to its expectation that rows will be run in parallel as they wait on // progress in the row above before proceeding. static const char expected_md5[] = "b35a1b707b28e82be025d960aba039bc"; static const char filename[] = "vp90-2-03-size-226x226.webm"; VPxWorkerInterface default_interface = *vpx_get_worker_interface(); EXPECT_NE(vpx_set_worker_interface(&serial_interface), 0); EXPECT_EQ(expected_md5, DecodeFile(filename, 2)); // Reset the interface. EXPECT_NE(vpx_set_worker_interface(&default_interface), 0); EXPECT_EQ(expected_md5, DecodeFile(filename, 2)); } TEST(VP9DecodeMultiThreadedTest, NoTilesNonFrameParallel) { // no tiles or frame parallel; this exercises loop filter threading. EXPECT_EQ("b35a1b707b28e82be025d960aba039bc", DecodeFile("vp90-2-03-size-226x226.webm", 2)); } TEST(VP9DecodeMultiThreadedTest, FrameParallel) { static const FileList files[] = { { "vp90-2-08-tile_1x2_frame_parallel.webm", "68ede6abd66bae0a2edf2eb9232241b6" }, { "vp90-2-08-tile_1x4_frame_parallel.webm", "368ebc6ebf3a5e478d85b2c3149b2848" }, { "vp90-2-08-tile_1x8_frame_parallel.webm", "17e439da2388aff3a0f69cb22579c6c1" }, { NULL, NULL } }; DecodeFiles(files); } TEST(VP9DecodeMultiThreadedTest, FrameParallelResize) { static const FileList files[] = { { "vp90-2-14-resize-fp-tiles-1-16.webm", "0cd5e632c326297e975f38949c31ea94" }, { "vp90-2-14-resize-fp-tiles-1-2-4-8-16.webm", "5c78a96a42e7f4a4f6b2edcdb791e44c" }, { "vp90-2-14-resize-fp-tiles-1-2.webm", "e030450ae85c3277be2a418769df98e2" }, { "vp90-2-14-resize-fp-tiles-1-4.webm", "312eed4e2b64eb7a4e7f18916606a430" }, { "vp90-2-14-resize-fp-tiles-16-1.webm", "1755c16d8af16a9cb3fe7338d90abe52" }, { "vp90-2-14-resize-fp-tiles-16-2.webm", "500300592d3fcb6f12fab25e48aaf4df" }, { "vp90-2-14-resize-fp-tiles-16-4.webm", "47c48379fa6331215d91c67648e1af6e" }, { "vp90-2-14-resize-fp-tiles-16-8-4-2-1.webm", "eecf17290739bc708506fa4827665989" }, { "vp90-2-14-resize-fp-tiles-16-8.webm", "29b6bb54e4c26b5ca85d5de5fed94e76" }, { "vp90-2-14-resize-fp-tiles-1-8.webm", "1b6f175e08cd82cf84bb800ac6d1caa3" }, { "vp90-2-14-resize-fp-tiles-2-16.webm", "ca3b03e4197995d8d5444ede7a6c0804" }, { "vp90-2-14-resize-fp-tiles-2-1.webm", "99aec065369d70bbb78ccdff65afed3f" }, { "vp90-2-14-resize-fp-tiles-2-4.webm", "22d0ebdb49b87d2920a85aea32e1afd5" }, { "vp90-2-14-resize-fp-tiles-2-8.webm", "c2115cf051c62e0f7db1d4a783831541" }, { "vp90-2-14-resize-fp-tiles-4-16.webm", "c690d7e1719b31367564cac0af0939cb" }, { "vp90-2-14-resize-fp-tiles-4-1.webm", "a926020b2cc3e15ad4cc271853a0ff26" }, { "vp90-2-14-resize-fp-tiles-4-2.webm", "42699063d9e581f1993d0cf890c2be78" }, { "vp90-2-14-resize-fp-tiles-4-8.webm", "7f76d96036382f45121e3d5aa6f8ec52" }, { "vp90-2-14-resize-fp-tiles-8-16.webm", "76a43fcdd7e658542913ea43216ec55d" }, { "vp90-2-14-resize-fp-tiles-8-1.webm", "8e3fbe89486ca60a59299dea9da91378" }, { "vp90-2-14-resize-fp-tiles-8-2.webm", "ae96f21f21b6370cc0125621b441fc52" }, { "vp90-2-14-resize-fp-tiles-8-4.webm", "3eb4f24f10640d42218f7fd7b9fd30d4" }, { NULL, NULL } }; DecodeFiles(files); } TEST(VP9DecodeMultiThreadedTest, NonFrameParallel) { static const FileList files[] = { { "vp90-2-08-tile_1x2.webm", "570b4a5d5a70d58b5359671668328a16" }, { "vp90-2-08-tile_1x4.webm", "988d86049e884c66909d2d163a09841a" }, { "vp90-2-08-tile_1x8.webm", "0941902a52e9092cb010905eab16364c" }, { "vp90-2-08-tile-4x1.webm", "06505aade6647c583c8e00a2f582266f" }, { "vp90-2-08-tile-4x4.webm", "85c2299892460d76e2c600502d52bfe2" }, { NULL, NULL } }; DecodeFiles(files); } #endif // CONFIG_WEBM_IO INSTANTIATE_TEST_CASE_P(Synchronous, VPxWorkerThreadTest, ::testing::Bool()); } // namespace libvpx-1.8.2/test/vpx_scale_test.cc000066400000000000000000000056461357355204000173560ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "test/vpx_scale_test.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_scale/yv12config.h" namespace libvpx_test { namespace { #if VPX_ARCH_ARM || (VPX_ARCH_MIPS && !HAVE_MIPS64) || VPX_ARCH_X86 // Avoid OOM failures on 32-bit platforms. const int kNumSizesToTest = 7; #else const int kNumSizesToTest = 8; #endif const int kSizesToTest[] = { 1, 15, 33, 145, 512, 1025, 3840, 16383 }; typedef void (*ExtendFrameBorderFunc)(YV12_BUFFER_CONFIG *ybf); typedef void (*CopyFrameFunc)(const YV12_BUFFER_CONFIG *src_ybf, YV12_BUFFER_CONFIG *dst_ybf); class ExtendBorderTest : public VpxScaleBase, public ::testing::TestWithParam { public: virtual ~ExtendBorderTest() {} protected: virtual void SetUp() { extend_fn_ = GetParam(); } void ExtendBorder() { ASM_REGISTER_STATE_CHECK(extend_fn_(&img_)); } void RunTest() { for (int h = 0; h < kNumSizesToTest; ++h) { for (int w = 0; w < kNumSizesToTest; ++w) { ASSERT_NO_FATAL_FAILURE(ResetImages(kSizesToTest[w], kSizesToTest[h])); ReferenceCopyFrame(); ExtendBorder(); CompareImages(img_); DeallocImages(); } } } ExtendFrameBorderFunc extend_fn_; }; TEST_P(ExtendBorderTest, ExtendBorder) { ASSERT_NO_FATAL_FAILURE(RunTest()); } INSTANTIATE_TEST_CASE_P(C, ExtendBorderTest, ::testing::Values(vp8_yv12_extend_frame_borders_c)); class CopyFrameTest : public VpxScaleBase, public ::testing::TestWithParam { public: virtual ~CopyFrameTest() {} protected: virtual void SetUp() { copy_frame_fn_ = GetParam(); } void CopyFrame() { ASM_REGISTER_STATE_CHECK(copy_frame_fn_(&img_, &dst_img_)); } void RunTest() { for (int h = 0; h < kNumSizesToTest; ++h) { for (int w = 0; w < kNumSizesToTest; ++w) { ASSERT_NO_FATAL_FAILURE(ResetImages(kSizesToTest[w], kSizesToTest[h])); ReferenceCopyFrame(); CopyFrame(); CompareImages(dst_img_); DeallocImages(); } } } CopyFrameFunc copy_frame_fn_; }; TEST_P(CopyFrameTest, CopyFrame) { ASSERT_NO_FATAL_FAILURE(RunTest()); } INSTANTIATE_TEST_CASE_P(C, CopyFrameTest, ::testing::Values(vp8_yv12_copy_frame_c)); } // namespace } // namespace libvpx_test libvpx-1.8.2/test/vpx_scale_test.h000066400000000000000000000161461357355204000172150ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_VPX_SCALE_TEST_H_ #define VPX_TEST_VPX_SCALE_TEST_H_ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "test/acm_random.h" #include "test/clear_system_state.h" #include "test/register_state_check.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12config.h" using libvpx_test::ACMRandom; namespace libvpx_test { class VpxScaleBase { public: virtual ~VpxScaleBase() { libvpx_test::ClearSystemState(); } void ResetImage(YV12_BUFFER_CONFIG *const img, const int width, const int height) { memset(img, 0, sizeof(*img)); ASSERT_EQ( 0, vp8_yv12_alloc_frame_buffer(img, width, height, VP8BORDERINPIXELS)) << "for width: " << width << " height: " << height; memset(img->buffer_alloc, kBufFiller, img->frame_size); } void ResetImages(const int width, const int height) { ResetImage(&img_, width, height); ResetImage(&ref_img_, width, height); ResetImage(&dst_img_, width, height); FillPlane(img_.y_buffer, img_.y_crop_width, img_.y_crop_height, img_.y_stride); FillPlane(img_.u_buffer, img_.uv_crop_width, img_.uv_crop_height, img_.uv_stride); FillPlane(img_.v_buffer, img_.uv_crop_width, img_.uv_crop_height, img_.uv_stride); } void ResetScaleImage(YV12_BUFFER_CONFIG *const img, const int width, const int height) { memset(img, 0, sizeof(*img)); #if CONFIG_VP9_HIGHBITDEPTH ASSERT_EQ(0, vpx_alloc_frame_buffer(img, width, height, 1, 1, 0, VP9_ENC_BORDER_IN_PIXELS, 0)); #else ASSERT_EQ(0, vpx_alloc_frame_buffer(img, width, height, 1, 1, VP9_ENC_BORDER_IN_PIXELS, 0)); #endif memset(img->buffer_alloc, kBufFiller, img->frame_size); } void ResetScaleImages(const int src_width, const int src_height, const int dst_width, const int dst_height) { ResetScaleImage(&img_, src_width, src_height); ResetScaleImage(&ref_img_, dst_width, dst_height); ResetScaleImage(&dst_img_, dst_width, dst_height); FillPlaneExtreme(img_.y_buffer, img_.y_crop_width, img_.y_crop_height, img_.y_stride); FillPlaneExtreme(img_.u_buffer, img_.uv_crop_width, img_.uv_crop_height, img_.uv_stride); FillPlaneExtreme(img_.v_buffer, img_.uv_crop_width, img_.uv_crop_height, img_.uv_stride); } void DeallocImages() { vp8_yv12_de_alloc_frame_buffer(&img_); vp8_yv12_de_alloc_frame_buffer(&ref_img_); vp8_yv12_de_alloc_frame_buffer(&dst_img_); } void DeallocScaleImages() { vpx_free_frame_buffer(&img_); vpx_free_frame_buffer(&ref_img_); vpx_free_frame_buffer(&dst_img_); } protected: static const int kBufFiller = 123; static const int kBufMax = kBufFiller - 1; static void FillPlane(uint8_t *const buf, const int width, const int height, const int stride) { for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { buf[x + (y * stride)] = (x + (width * y)) % kBufMax; } } } static void FillPlaneExtreme(uint8_t *const buf, const int width, const int height, const int stride) { ACMRandom rnd; for (int y = 0; y < height; ++y) { for (int x = 0; x < width; ++x) { buf[x + (y * stride)] = rnd.Rand8() % 2 ? 255 : 0; } } } static void ExtendPlane(uint8_t *buf, int crop_width, int crop_height, int width, int height, int stride, int padding) { // Copy the outermost visible pixel to a distance of at least 'padding.' // The buffers are allocated such that there may be excess space outside the // padding. As long as the minimum amount of padding is achieved it is not // necessary to fill this space as well. uint8_t *left = buf - padding; uint8_t *right = buf + crop_width; const int right_extend = padding + (width - crop_width); const int bottom_extend = padding + (height - crop_height); // Fill the border pixels from the nearest image pixel. for (int y = 0; y < crop_height; ++y) { memset(left, left[padding], padding); memset(right, right[-1], right_extend); left += stride; right += stride; } left = buf - padding; uint8_t *top = left - (stride * padding); // The buffer does not always extend as far as the stride. // Equivalent to padding + width + padding. const int extend_width = padding + crop_width + right_extend; // The first row was already extended to the left and right. Copy it up. for (int y = 0; y < padding; ++y) { memcpy(top, left, extend_width); top += stride; } uint8_t *bottom = left + (crop_height * stride); for (int y = 0; y < bottom_extend; ++y) { memcpy(bottom, left + (crop_height - 1) * stride, extend_width); bottom += stride; } } void ReferenceExtendBorder() { ExtendPlane(ref_img_.y_buffer, ref_img_.y_crop_width, ref_img_.y_crop_height, ref_img_.y_width, ref_img_.y_height, ref_img_.y_stride, ref_img_.border); ExtendPlane(ref_img_.u_buffer, ref_img_.uv_crop_width, ref_img_.uv_crop_height, ref_img_.uv_width, ref_img_.uv_height, ref_img_.uv_stride, ref_img_.border / 2); ExtendPlane(ref_img_.v_buffer, ref_img_.uv_crop_width, ref_img_.uv_crop_height, ref_img_.uv_width, ref_img_.uv_height, ref_img_.uv_stride, ref_img_.border / 2); } void ReferenceCopyFrame() { // Copy img_ to ref_img_ and extend frame borders. This will be used for // verifying extend_fn_ as well as copy_frame_fn_. EXPECT_EQ(ref_img_.frame_size, img_.frame_size); for (int y = 0; y < img_.y_crop_height; ++y) { for (int x = 0; x < img_.y_crop_width; ++x) { ref_img_.y_buffer[x + y * ref_img_.y_stride] = img_.y_buffer[x + y * img_.y_stride]; } } for (int y = 0; y < img_.uv_crop_height; ++y) { for (int x = 0; x < img_.uv_crop_width; ++x) { ref_img_.u_buffer[x + y * ref_img_.uv_stride] = img_.u_buffer[x + y * img_.uv_stride]; ref_img_.v_buffer[x + y * ref_img_.uv_stride] = img_.v_buffer[x + y * img_.uv_stride]; } } ReferenceExtendBorder(); } void CompareImages(const YV12_BUFFER_CONFIG actual) { EXPECT_EQ(ref_img_.frame_size, actual.frame_size); EXPECT_EQ(0, memcmp(ref_img_.buffer_alloc, actual.buffer_alloc, ref_img_.frame_size)); } YV12_BUFFER_CONFIG img_; YV12_BUFFER_CONFIG ref_img_; YV12_BUFFER_CONFIG dst_img_; }; } // namespace libvpx_test #endif // VPX_TEST_VPX_SCALE_TEST_H_ libvpx-1.8.2/test/vpx_temporal_svc_encoder.sh000077500000000000000000000275701357355204000214550ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests the libvpx vpx_temporal_svc_encoder example. To add new ## tests to this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to vpx_tsvc_encoder_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: $YUV_RAW_INPUT is required. vpx_tsvc_encoder_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then echo "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ "$(vpx_config_option_enabled CONFIG_TEMPORAL_DENOISING)" != "yes" ]; then elog "Warning: Temporal denoising is disabled! Spatial denoising will be " \ "used instead, which is probably not what you want for this test." fi } # Runs vpx_temporal_svc_encoder using the codec specified by $1 and output file # name by $2. Additional positional parameters are passed directly to # vpx_temporal_svc_encoder. vpx_tsvc_encoder() { local encoder="${LIBVPX_BIN_PATH}/vpx_temporal_svc_encoder" encoder="${encoder}${VPX_TEST_EXE_SUFFIX}" local codec="$1" local output_file_base="$2" local output_file="${VPX_TEST_OUTPUT_DIR}/${output_file_base}" local timebase_num="1" local timebase_den="1000" local timebase_den_y4m="30" local speed="6" local frame_drop_thresh="30" local max_threads="4" local error_resilient="1" shift 2 if [ ! -x "${encoder}" ]; then elog "${encoder} does not exist or is not executable." return 1 fi # TODO(tomfinegan): Verify file output for all thread runs. for threads in $(seq $max_threads); do if [ "$(vpx_config_option_enabled CONFIG_VP9_HIGHBITDEPTH)" != "yes" ]; then eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" \ "${output_file}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den}" \ "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ "$@" ${devnull} # Test for y4m input. eval "${VPX_TEST_PREFIX}" "${encoder}" "${Y4M_720P_INPUT}" \ "${output_file}" "${codec}" "${Y4M_720P_INPUT_WIDTH}" \ "${Y4M_720P_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den_y4m}" \ "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ "$@" ${devnull} else eval "${VPX_TEST_PREFIX}" "${encoder}" "${YUV_RAW_INPUT}" \ "${output_file}" "${codec}" "${YUV_RAW_INPUT_WIDTH}" \ "${YUV_RAW_INPUT_HEIGHT}" "${timebase_num}" "${timebase_den}" \ "${speed}" "${frame_drop_thresh}" "${error_resilient}" "${threads}" \ "$@" "8" ${devnull} fi done } # Confirms that all expected output files exist given the output file name # passed to vpx_temporal_svc_encoder. # The file name passed to vpx_temporal_svc_encoder is joined with the stream # number and the extension .ivf to produce per stream output files. Here $1 is # file name, and $2 is expected number of files. files_exist() { local file_name="${VPX_TEST_OUTPUT_DIR}/$1" local num_files="$(($2 - 1))" for stream_num in $(seq 0 ${num_files}); do [ -e "${file_name}_${stream_num}.ivf" ] || return 1 done } # Run vpx_temporal_svc_encoder in all supported modes for vp8 and vp9. vpx_tsvc_encoder_vp8_mode_0() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_0" vpx_tsvc_encoder vp8 "${output_basename}" 0 200 || return 1 # Mode 0 produces 1 stream files_exist "${output_basename}" 1 || return 1 fi } vpx_tsvc_encoder_vp8_mode_1() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_1" vpx_tsvc_encoder vp8 "${output_basename}" 1 200 400 || return 1 # Mode 1 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp8_mode_2() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_2" vpx_tsvc_encoder vp8 "${output_basename}" 2 200 400 || return 1 # Mode 2 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp8_mode_3() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_3" vpx_tsvc_encoder vp8 "${output_basename}" 3 200 400 600 || return 1 # Mode 3 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_4() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_4" vpx_tsvc_encoder vp8 "${output_basename}" 4 200 400 600 || return 1 # Mode 4 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_5() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_5" vpx_tsvc_encoder vp8 "${output_basename}" 5 200 400 600 || return 1 # Mode 5 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_6() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_6" vpx_tsvc_encoder vp8 "${output_basename}" 6 200 400 600 || return 1 # Mode 6 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_7() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_7" vpx_tsvc_encoder vp8 "${output_basename}" 7 200 400 600 800 1000 || return 1 # Mode 7 produces 5 streams files_exist "${output_basename}" 5 || return 1 fi } vpx_tsvc_encoder_vp8_mode_8() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_8" vpx_tsvc_encoder vp8 "${output_basename}" 8 200 400 || return 1 # Mode 8 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp8_mode_9() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_9" vpx_tsvc_encoder vp8 "${output_basename}" 9 200 400 600 || return 1 # Mode 9 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_10() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_10" vpx_tsvc_encoder vp8 "${output_basename}" 10 200 400 600 || return 1 # Mode 10 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp8_mode_11() { if [ "$(vp8_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp8_mode_11" vpx_tsvc_encoder vp8 "${output_basename}" 11 200 400 600 || return 1 # Mode 11 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_0() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_0" vpx_tsvc_encoder vp9 "${output_basename}" 0 200 || return 1 # Mode 0 produces 1 stream files_exist "${output_basename}" 1 || return 1 fi } vpx_tsvc_encoder_vp9_mode_1() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_1" vpx_tsvc_encoder vp9 "${output_basename}" 1 200 400 || return 1 # Mode 1 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp9_mode_2() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_2" vpx_tsvc_encoder vp9 "${output_basename}" 2 200 400 || return 1 # Mode 2 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp9_mode_3() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_3" vpx_tsvc_encoder vp9 "${output_basename}" 3 200 400 600 || return 1 # Mode 3 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_4() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_4" vpx_tsvc_encoder vp9 "${output_basename}" 4 200 400 600 || return 1 # Mode 4 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_5() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_5" vpx_tsvc_encoder vp9 "${output_basename}" 5 200 400 600 || return 1 # Mode 5 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_6() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_6" vpx_tsvc_encoder vp9 "${output_basename}" 6 200 400 600 || return 1 # Mode 6 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_7() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_7" vpx_tsvc_encoder vp9 "${output_basename}" 7 200 400 600 800 1000 || return 1 # Mode 7 produces 5 streams files_exist "${output_basename}" 5 || return 1 fi } vpx_tsvc_encoder_vp9_mode_8() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_8" vpx_tsvc_encoder vp9 "${output_basename}" 8 200 400 || return 1 # Mode 8 produces 2 streams files_exist "${output_basename}" 2 || return 1 fi } vpx_tsvc_encoder_vp9_mode_9() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_9" vpx_tsvc_encoder vp9 "${output_basename}" 9 200 400 600 || return 1 # Mode 9 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_10() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_10" vpx_tsvc_encoder vp9 "${output_basename}" 10 200 400 600 || return 1 # Mode 10 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_vp9_mode_11() { if [ "$(vp9_encode_available)" = "yes" ]; then local output_basename="vpx_tsvc_encoder_vp9_mode_11" vpx_tsvc_encoder vp9 "${output_basename}" 11 200 400 600 || return 1 # Mode 11 produces 3 streams files_exist "${output_basename}" 3 || return 1 fi } vpx_tsvc_encoder_tests="vpx_tsvc_encoder_vp8_mode_0 vpx_tsvc_encoder_vp8_mode_1 vpx_tsvc_encoder_vp8_mode_2 vpx_tsvc_encoder_vp8_mode_3 vpx_tsvc_encoder_vp8_mode_4 vpx_tsvc_encoder_vp8_mode_5 vpx_tsvc_encoder_vp8_mode_6 vpx_tsvc_encoder_vp8_mode_7 vpx_tsvc_encoder_vp8_mode_8 vpx_tsvc_encoder_vp8_mode_9 vpx_tsvc_encoder_vp8_mode_10 vpx_tsvc_encoder_vp8_mode_11 vpx_tsvc_encoder_vp9_mode_0 vpx_tsvc_encoder_vp9_mode_1 vpx_tsvc_encoder_vp9_mode_2 vpx_tsvc_encoder_vp9_mode_3 vpx_tsvc_encoder_vp9_mode_4 vpx_tsvc_encoder_vp9_mode_5 vpx_tsvc_encoder_vp9_mode_6 vpx_tsvc_encoder_vp9_mode_7 vpx_tsvc_encoder_vp9_mode_8 vpx_tsvc_encoder_vp9_mode_9 vpx_tsvc_encoder_vp9_mode_10 vpx_tsvc_encoder_vp9_mode_11" run_tests vpx_tsvc_encoder_verify_environment "${vpx_tsvc_encoder_tests}" libvpx-1.8.2/test/vpxdec.sh000077500000000000000000000105511357355204000156430ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests vpxdec. To add new tests to this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to vpxdec_tests (on a new line). ## . $(dirname $0)/tools_common.sh # Environment check: Make sure input is available. vpxdec_verify_environment() { if [ ! -e "${VP8_IVF_FILE}" ] || [ ! -e "${VP9_WEBM_FILE}" ] || \ [ ! -e "${VP9_FPM_WEBM_FILE}" ] || \ [ ! -e "${VP9_LT_50_FRAMES_WEBM_FILE}" ] || \ [ ! -e "${VP9_RAW_FILE}" ]; then elog "Libvpx test data must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ -z "$(vpx_tool_path vpxdec)" ]; then elog "vpxdec not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi } # Wrapper function for running vpxdec with pipe input. Requires that # LIBVPX_BIN_PATH points to the directory containing vpxdec. $1 is used as the # input file path and shifted away. All remaining parameters are passed through # to vpxdec. vpxdec_pipe() { local decoder="$(vpx_tool_path vpxdec)" local input="$1" shift cat "${input}" | eval "${VPX_TEST_PREFIX}" "${decoder}" - "$@" ${devnull} } # Wrapper function for running vpxdec. Requires that LIBVPX_BIN_PATH points to # the directory containing vpxdec. $1 one is used as the input file path and # shifted away. All remaining parameters are passed through to vpxdec. vpxdec() { local decoder="$(vpx_tool_path vpxdec)" local input="$1" shift eval "${VPX_TEST_PREFIX}" "${decoder}" "$input" "$@" ${devnull} } vpxdec_can_decode_vp8() { if [ "$(vp8_decode_available)" = "yes" ]; then echo yes fi } vpxdec_can_decode_vp9() { if [ "$(vp9_decode_available)" = "yes" ]; then echo yes fi } vpxdec_vp8_ivf() { if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then vpxdec "${VP8_IVF_FILE}" --summary --noblit fi } vpxdec_vp8_ivf_pipe_input() { if [ "$(vpxdec_can_decode_vp8)" = "yes" ]; then vpxdec_pipe "${VP8_IVF_FILE}" --summary --noblit fi } vpxdec_vp9_webm() { if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then vpxdec "${VP9_WEBM_FILE}" --summary --noblit fi } vpxdec_vp9_webm_frame_parallel() { if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then for threads in 2 3 4 5 6 7 8; do vpxdec "${VP9_FPM_WEBM_FILE}" --summary --noblit --threads=$threads \ --frame-parallel done fi } vpxdec_vp9_webm_less_than_50_frames() { # ensure that reaching eof in webm_guess_framerate doesn't result in invalid # frames in actual webm_read_frame calls. if [ "$(vpxdec_can_decode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local decoder="$(vpx_tool_path vpxdec)" local expected=10 local num_frames=$(${VPX_TEST_PREFIX} "${decoder}" \ "${VP9_LT_50_FRAMES_WEBM_FILE}" --summary --noblit 2>&1 \ | awk '/^[0-9]+ decoded frames/ { print $1 }') if [ "$num_frames" -ne "$expected" ]; then elog "Output frames ($num_frames) != expected ($expected)" return 1 fi fi } # Ensures VP9_RAW_FILE correctly produces 1 frame instead of causing a hang. vpxdec_vp9_raw_file() { # Ensure a raw file properly reports eof and doesn't cause a hang. if [ "$(vpxdec_can_decode_vp9)" = "yes" ]; then local decoder="$(vpx_tool_path vpxdec)" local expected=1 [ -x /usr/bin/timeout ] && local TIMEOUT="/usr/bin/timeout 30s" local num_frames=$(${TIMEOUT} ${VPX_TEST_PREFIX} "${decoder}" \ "${VP9_RAW_FILE}" --summary --noblit 2>&1 \ | awk '/^[0-9]+ decoded frames/ { print $1 }') if [ -z "$num_frames" ] || [ "$num_frames" -ne "$expected" ]; then elog "Output frames ($num_frames) != expected ($expected)" return 1 fi fi } vpxdec_tests="vpxdec_vp8_ivf vpxdec_vp8_ivf_pipe_input vpxdec_vp9_webm vpxdec_vp9_webm_frame_parallel vpxdec_vp9_webm_less_than_50_frames vpxdec_vp9_raw_file" run_tests vpxdec_verify_environment "${vpxdec_tests}" libvpx-1.8.2/test/vpxenc.sh000077500000000000000000000315671357355204000156670ustar00rootroot00000000000000#!/bin/sh ## ## Copyright (c) 2014 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## This file tests vpxenc using hantro_collage_w352h288.yuv as input. To add ## new tests to this file, do the following: ## 1. Write a shell function (this is your test). ## 2. Add the function to vpxenc_tests (on a new line). ## . $(dirname $0)/tools_common.sh readonly TEST_FRAMES=10 # Environment check: Make sure input is available. vpxenc_verify_environment() { if [ ! -e "${YUV_RAW_INPUT}" ]; then elog "The file ${YUV_RAW_INPUT##*/} must exist in LIBVPX_TEST_DATA_PATH." return 1 fi if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then if [ ! -e "${Y4M_NOSQ_PAR_INPUT}" ]; then elog "The file ${Y4M_NOSQ_PAR_INPUT##*/} must exist in" elog "LIBVPX_TEST_DATA_PATH." return 1 fi fi if [ -z "$(vpx_tool_path vpxenc)" ]; then elog "vpxenc not found. It must exist in LIBVPX_BIN_PATH or its parent." return 1 fi } vpxenc_can_encode_vp8() { if [ "$(vp8_encode_available)" = "yes" ]; then echo yes fi } vpxenc_can_encode_vp9() { if [ "$(vp9_encode_available)" = "yes" ]; then echo yes fi } # Echo vpxenc command line parameters allowing use of # hantro_collage_w352h288.yuv as input. yuv_input_hantro_collage() { echo ""${YUV_RAW_INPUT}" --width="${YUV_RAW_INPUT_WIDTH}" --height="${YUV_RAW_INPUT_HEIGHT}"" } y4m_input_non_square_par() { echo ""${Y4M_NOSQ_PAR_INPUT}"" } y4m_input_720p() { echo ""${Y4M_720P_INPUT}"" } # Echo default vpxenc real time encoding params. $1 is the codec, which defaults # to vp8 if unspecified. vpxenc_rt_params() { local codec="${1:-vp8}" echo "--codec=${codec} --buf-initial-sz=500 --buf-optimal-sz=600 --buf-sz=1000 --cpu-used=-6 --end-usage=cbr --error-resilient=1 --kf-max-dist=90000 --lag-in-frames=0 --max-intra-rate=300 --max-q=56 --min-q=2 --noise-sensitivity=0 --overshoot-pct=50 --passes=1 --profile=0 --resize-allowed=0 --rt --static-thresh=0 --undershoot-pct=50" } # Forces --passes to 1 with CONFIG_REALTIME_ONLY. vpxenc_passes_param() { if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" = "yes" ]; then echo "--passes=1" else echo "--passes=2" fi } # Wrapper function for running vpxenc with pipe input. Requires that # LIBVPX_BIN_PATH points to the directory containing vpxenc. $1 is used as the # input file path and shifted away. All remaining parameters are passed through # to vpxenc. vpxenc_pipe() { local encoder="$(vpx_tool_path vpxenc)" local input="$1" shift cat "${input}" | eval "${VPX_TEST_PREFIX}" "${encoder}" - \ --test-decode=fatal \ "$@" ${devnull} } # Wrapper function for running vpxenc. Requires that LIBVPX_BIN_PATH points to # the directory containing vpxenc. $1 one is used as the input file path and # shifted away. All remaining parameters are passed through to vpxenc. vpxenc() { local encoder="$(vpx_tool_path vpxenc)" local input="$1" shift eval "${VPX_TEST_PREFIX}" "${encoder}" "${input}" \ --test-decode=fatal \ "$@" ${devnull} } vpxenc_vp8_ivf() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp8.ivf" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ --ivf \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp8_webm() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp8_webm_rt() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp8_rt.webm" vpxenc $(yuv_input_hantro_collage) \ $(vpxenc_rt_params vp8) \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp8_webm_2pass() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp8.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ --output="${output}" \ --passes=2 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp8_webm_lag10_frames20() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local lag_total_frames=20 local lag_frames=10 local output="${VPX_TEST_OUTPUT_DIR}/vp8_lag10_frames20.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${lag_total_frames}" \ --lag-in-frames="${lag_frames}" \ --output="${output}" \ --auto-alt-ref=1 \ --passes=2 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp8_ivf_piped_input() { if [ "$(vpxenc_can_encode_vp8)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp8_piped_input.ivf" vpxenc_pipe $(yuv_input_hantro_collage) \ --codec=vp8 \ --limit="${TEST_FRAMES}" \ --ivf \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_ivf() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9.ivf" local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ "${passes}" \ --ivf \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_webm() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ "${passes}" \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_webm_rt() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt.webm" vpxenc $(yuv_input_hantro_collage) \ $(vpxenc_rt_params vp9) \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_webm_rt_multithread_tiled() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_multithread_tiled.webm" local tilethread_min=2 local tilethread_max=4 local num_threads="$(seq ${tilethread_min} ${tilethread_max})" local num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" for threads in ${num_threads}; do for tile_cols in ${num_tile_cols}; do vpxenc $(y4m_input_720p) \ $(vpxenc_rt_params vp9) \ --threads=${threads} \ --tile-columns=${tile_cols} \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi rm "${output}" done done fi } vpxenc_vp9_webm_rt_multithread_tiled_frameparallel() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_rt_mt_t_fp.webm" local tilethread_min=2 local tilethread_max=4 local num_threads="$(seq ${tilethread_min} ${tilethread_max})" local num_tile_cols="$(seq ${tilethread_min} ${tilethread_max})" for threads in ${num_threads}; do for tile_cols in ${num_tile_cols}; do vpxenc $(y4m_input_720p) \ $(vpxenc_rt_params vp9) \ --threads=${threads} \ --tile-columns=${tile_cols} \ --frame-parallel=1 \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi rm "${output}" done done fi } vpxenc_vp9_webm_2pass() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9.webm" vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ --output="${output}" \ --passes=2 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_ivf_lossless() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless.ivf" local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ --ivf \ --output="${output}" \ "${passes}" \ --lossless=1 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_ivf_minq0_maxq0() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_lossless_minq0_maxq0.ivf" local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ --ivf \ --output="${output}" \ "${passes}" \ --min-q=0 \ --max-q=0 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_webm_lag10_frames20() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local lag_total_frames=20 local lag_frames=10 local output="${VPX_TEST_OUTPUT_DIR}/vp9_lag10_frames20.webm" local passes=$(vpxenc_passes_param) vpxenc $(yuv_input_hantro_collage) \ --codec=vp9 \ --limit="${lag_total_frames}" \ --lag-in-frames="${lag_frames}" \ --output="${output}" \ "${passes}" \ --auto-alt-ref=1 if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } # TODO(fgalligan): Test that DisplayWidth is different than video width. vpxenc_vp9_webm_non_square_par() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ] && \ [ "$(webm_io_available)" = "yes" ]; then local output="${VPX_TEST_OUTPUT_DIR}/vp9_non_square_par.webm" local passes=$(vpxenc_passes_param) vpxenc $(y4m_input_non_square_par) \ --codec=vp9 \ --limit="${TEST_FRAMES}" \ "${passes}" \ --output="${output}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi fi } vpxenc_vp9_webm_sharpness() { if [ "$(vpxenc_can_encode_vp9)" = "yes" ]; then local sharpnesses="0 1 2 3 4 5 6 7" local output="${VPX_TEST_OUTPUT_DIR}/vpxenc_vp9_webm_sharpness.ivf" local last_size=0 local this_size=0 for sharpness in ${sharpnesses}; do vpxenc $(yuv_input_hantro_collage) \ --sharpness="${sharpness}" \ --codec=vp9 \ --limit=1 \ --cpu-used=2 \ --end-usage=q \ --cq-level=40 \ --output="${output}" \ "${passes}" if [ ! -e "${output}" ]; then elog "Output file does not exist." return 1 fi this_size=$(stat -c '%s' "${output}") if [ "${this_size}" -lt "${last_size}" ]; then elog "Higher sharpness value yielded lower file size." echo "${this_size}" " < " "${last_size}" return 1 fi last_size="${this_size}" done fi } vpxenc_tests="vpxenc_vp8_ivf vpxenc_vp8_webm vpxenc_vp8_webm_rt vpxenc_vp8_ivf_piped_input vpxenc_vp9_ivf vpxenc_vp9_webm vpxenc_vp9_webm_rt vpxenc_vp9_webm_rt_multithread_tiled vpxenc_vp9_webm_rt_multithread_tiled_frameparallel vpxenc_vp9_ivf_lossless vpxenc_vp9_ivf_minq0_maxq0 vpxenc_vp9_webm_lag10_frames20 vpxenc_vp9_webm_non_square_par vpxenc_vp9_webm_sharpness" if [ "$(vpx_config_option_enabled CONFIG_REALTIME_ONLY)" != "yes" ]; then vpxenc_tests="$vpxenc_tests vpxenc_vp8_webm_2pass vpxenc_vp8_webm_lag10_frames20 vpxenc_vp9_webm_2pass" fi run_tests vpxenc_verify_environment "${vpxenc_tests}" libvpx-1.8.2/test/webm_video_source.h000066400000000000000000000051371357355204000176700ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_WEBM_VIDEO_SOURCE_H_ #define VPX_TEST_WEBM_VIDEO_SOURCE_H_ #include #include #include #include #include #include "../tools_common.h" #include "../webmdec.h" #include "test/video_source.h" namespace libvpx_test { // This class extends VideoSource to allow parsing of WebM files, // so that we can do actual file decodes. class WebMVideoSource : public CompressedVideoSource { public: explicit WebMVideoSource(const std::string &file_name) : file_name_(file_name), vpx_ctx_(new VpxInputContext()), webm_ctx_(new WebmInputContext()), buf_(NULL), buf_sz_(0), frame_(0), end_of_file_(false) {} virtual ~WebMVideoSource() { if (vpx_ctx_->file != NULL) fclose(vpx_ctx_->file); webm_free(webm_ctx_); delete vpx_ctx_; delete webm_ctx_; } virtual void Init() {} virtual void Begin() { vpx_ctx_->file = OpenTestDataFile(file_name_); ASSERT_TRUE(vpx_ctx_->file != NULL) << "Input file open failed. Filename: " << file_name_; ASSERT_EQ(file_is_webm(webm_ctx_, vpx_ctx_), 1) << "file is not WebM"; FillFrame(); } virtual void Next() { ++frame_; FillFrame(); } void FillFrame() { ASSERT_TRUE(vpx_ctx_->file != NULL); const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); ASSERT_GE(status, 0) << "webm_read_frame failed"; if (status == 1) { end_of_file_ = true; } } void SeekToNextKeyFrame() { ASSERT_TRUE(vpx_ctx_->file != NULL); do { const int status = webm_read_frame(webm_ctx_, &buf_, &buf_sz_); ASSERT_GE(status, 0) << "webm_read_frame failed"; ++frame_; if (status == 1) { end_of_file_ = true; } } while (!webm_ctx_->is_key_frame && !end_of_file_); } virtual const uint8_t *cxdata() const { return end_of_file_ ? NULL : buf_; } virtual size_t frame_size() const { return buf_sz_; } virtual unsigned int frame_number() const { return frame_; } protected: std::string file_name_; VpxInputContext *vpx_ctx_; WebmInputContext *webm_ctx_; uint8_t *buf_; size_t buf_sz_; unsigned int frame_; bool end_of_file_; }; } // namespace libvpx_test #endif // VPX_TEST_WEBM_VIDEO_SOURCE_H_ libvpx-1.8.2/test/y4m_test.cc000066400000000000000000000136361357355204000161010ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vpx_config.h" #include "./y4menc.h" #include "test/md5_helper.h" #include "test/util.h" #include "test/y4m_video_source.h" namespace { using std::string; static const unsigned int kWidth = 160; static const unsigned int kHeight = 90; static const unsigned int kFrames = 10; struct Y4mTestParam { const char *filename; unsigned int bit_depth; vpx_img_fmt format; const char *md5raw; }; const Y4mTestParam kY4mTestVectors[] = { { "park_joy_90p_8_420.y4m", 8, VPX_IMG_FMT_I420, "e5406275b9fc6bb3436c31d4a05c1cab" }, { "park_joy_90p_8_422.y4m", 8, VPX_IMG_FMT_I422, "284a47a47133b12884ec3a14e959a0b6" }, { "park_joy_90p_8_444.y4m", 8, VPX_IMG_FMT_I444, "90517ff33843d85de712fd4fe60dbed0" }, { "park_joy_90p_10_420_20f.y4m", 10, VPX_IMG_FMT_I42016, "2f56ab9809269f074df7e3daf1ce0be6" }, { "park_joy_90p_10_422_20f.y4m", 10, VPX_IMG_FMT_I42216, "1b5c73d2e8e8c4e02dc4889ecac41c83" }, { "park_joy_90p_10_444_20f.y4m", 10, VPX_IMG_FMT_I44416, "ec4ab5be53195c5b838d1d19e1bc2674" }, { "park_joy_90p_12_420_20f.y4m", 12, VPX_IMG_FMT_I42016, "3370856c8ddebbd1f9bb2e66f97677f4" }, { "park_joy_90p_12_422_20f.y4m", 12, VPX_IMG_FMT_I42216, "4eab364318dd8201acbb182e43bd4966" }, { "park_joy_90p_12_444_20f.y4m", 12, VPX_IMG_FMT_I44416, "f189dfbbd92119fc8e5f211a550166be" }, }; static void write_image_file(const vpx_image_t *img, FILE *file) { int plane, y; for (plane = 0; plane < 3; ++plane) { const unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; const int h = (plane ? (img->d_h + img->y_chroma_shift) >> img->y_chroma_shift : img->d_h); const int w = (plane ? (img->d_w + img->x_chroma_shift) >> img->x_chroma_shift : img->d_w); for (y = 0; y < h; ++y) { fwrite(buf, bytes_per_sample, w, file); buf += stride; } } } class Y4mVideoSourceTest : public ::testing::TestWithParam, public ::libvpx_test::Y4mVideoSource { protected: Y4mVideoSourceTest() : Y4mVideoSource("", 0, 0) {} virtual ~Y4mVideoSourceTest() { CloseSource(); } virtual void Init(const std::string &file_name, int limit) { file_name_ = file_name; start_ = 0; limit_ = limit; frame_ = 0; Begin(); } // Checks y4m header information void HeaderChecks(unsigned int bit_depth, vpx_img_fmt_t fmt) { ASSERT_TRUE(input_file_ != NULL); ASSERT_EQ(y4m_.pic_w, (int)kWidth); ASSERT_EQ(y4m_.pic_h, (int)kHeight); ASSERT_EQ(img()->d_w, kWidth); ASSERT_EQ(img()->d_h, kHeight); ASSERT_EQ(y4m_.bit_depth, bit_depth); ASSERT_EQ(y4m_.vpx_fmt, fmt); if (fmt == VPX_IMG_FMT_I420 || fmt == VPX_IMG_FMT_I42016) { ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3 / 2); ASSERT_EQ(img()->x_chroma_shift, 1U); ASSERT_EQ(img()->y_chroma_shift, 1U); } if (fmt == VPX_IMG_FMT_I422 || fmt == VPX_IMG_FMT_I42216) { ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 2); ASSERT_EQ(img()->x_chroma_shift, 1U); ASSERT_EQ(img()->y_chroma_shift, 0U); } if (fmt == VPX_IMG_FMT_I444 || fmt == VPX_IMG_FMT_I44416) { ASSERT_EQ(y4m_.bps, (int)y4m_.bit_depth * 3); ASSERT_EQ(img()->x_chroma_shift, 0U); ASSERT_EQ(img()->y_chroma_shift, 0U); } } // Checks MD5 of the raw frame data void Md5Check(const string &expected_md5) { ASSERT_TRUE(input_file_ != NULL); libvpx_test::MD5 md5; for (unsigned int i = start_; i < limit_; i++) { md5.Add(img()); Next(); } ASSERT_EQ(string(md5.Get()), expected_md5); } }; TEST_P(Y4mVideoSourceTest, SourceTest) { const Y4mTestParam t = GetParam(); Init(t.filename, kFrames); HeaderChecks(t.bit_depth, t.format); Md5Check(t.md5raw); } INSTANTIATE_TEST_CASE_P(C, Y4mVideoSourceTest, ::testing::ValuesIn(kY4mTestVectors)); class Y4mVideoWriteTest : public Y4mVideoSourceTest { protected: Y4mVideoWriteTest() : tmpfile_(NULL) {} virtual ~Y4mVideoWriteTest() { delete tmpfile_; input_file_ = NULL; } void ReplaceInputFile(FILE *input_file) { CloseSource(); frame_ = 0; input_file_ = input_file; rewind(input_file_); ReadSourceToStart(); } // Writes out a y4m file and then reads it back void WriteY4mAndReadBack() { ASSERT_TRUE(input_file_ != NULL); char buf[Y4M_BUFFER_SIZE] = { 0 }; const struct VpxRational framerate = { y4m_.fps_n, y4m_.fps_d }; tmpfile_ = new libvpx_test::TempOutFile; ASSERT_TRUE(tmpfile_->file() != NULL); y4m_write_file_header(buf, sizeof(buf), kWidth, kHeight, &framerate, y4m_.vpx_fmt, y4m_.bit_depth); fputs(buf, tmpfile_->file()); for (unsigned int i = start_; i < limit_; i++) { y4m_write_frame_header(buf, sizeof(buf)); fputs(buf, tmpfile_->file()); write_image_file(img(), tmpfile_->file()); Next(); } ReplaceInputFile(tmpfile_->file()); } virtual void Init(const std::string &file_name, int limit) { Y4mVideoSourceTest::Init(file_name, limit); WriteY4mAndReadBack(); } libvpx_test::TempOutFile *tmpfile_; }; TEST_P(Y4mVideoWriteTest, WriteTest) { const Y4mTestParam t = GetParam(); Init(t.filename, kFrames); HeaderChecks(t.bit_depth, t.format); Md5Check(t.md5raw); } INSTANTIATE_TEST_CASE_P(C, Y4mVideoWriteTest, ::testing::ValuesIn(kY4mTestVectors)); } // namespace libvpx-1.8.2/test/y4m_video_source.h000066400000000000000000000065571357355204000174560ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_Y4M_VIDEO_SOURCE_H_ #define VPX_TEST_Y4M_VIDEO_SOURCE_H_ #include #include #include #include "test/video_source.h" #include "./y4minput.h" namespace libvpx_test { // This class extends VideoSource to allow parsing of raw yv12 // so that we can do actual file encodes. class Y4mVideoSource : public VideoSource { public: Y4mVideoSource(const std::string &file_name, unsigned int start, int limit) : file_name_(file_name), input_file_(NULL), img_(new vpx_image_t()), start_(start), limit_(limit), frame_(0), framerate_numerator_(0), framerate_denominator_(0), y4m_() {} virtual ~Y4mVideoSource() { vpx_img_free(img_.get()); CloseSource(); } virtual void OpenSource() { CloseSource(); input_file_ = OpenTestDataFile(file_name_); ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " << file_name_; } virtual void ReadSourceToStart() { ASSERT_TRUE(input_file_ != NULL); ASSERT_FALSE(y4m_input_open(&y4m_, input_file_, NULL, 0, 0)); framerate_numerator_ = y4m_.fps_n; framerate_denominator_ = y4m_.fps_d; frame_ = 0; for (unsigned int i = 0; i < start_; i++) { Next(); } FillFrame(); } virtual void Begin() { OpenSource(); ReadSourceToStart(); } virtual void Next() { ++frame_; FillFrame(); } virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_.get() : NULL; } // Models a stream where Timebase = 1/FPS, so pts == frame. virtual vpx_codec_pts_t pts() const { return frame_; } virtual unsigned long duration() const { return 1; } virtual vpx_rational_t timebase() const { const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; return t; } virtual unsigned int frame() const { return frame_; } virtual unsigned int limit() const { return limit_; } virtual void FillFrame() { ASSERT_TRUE(input_file_ != NULL); // Read a frame from input_file. y4m_input_fetch_frame(&y4m_, input_file_, img_.get()); } // Swap buffers with another y4m source. This allows reading a new frame // while keeping the old frame around. A whole Y4mSource is required and // not just a vpx_image_t because of how the y4m reader manipulates // vpx_image_t internals, void SwapBuffers(Y4mVideoSource *other) { std::swap(other->y4m_.dst_buf, y4m_.dst_buf); vpx_image_t *tmp; tmp = other->img_.release(); other->img_.reset(img_.release()); img_.reset(tmp); } protected: void CloseSource() { y4m_input_close(&y4m_); y4m_ = y4m_input(); if (input_file_ != NULL) { fclose(input_file_); input_file_ = NULL; } } std::string file_name_; FILE *input_file_; std::unique_ptr img_; unsigned int start_; unsigned int limit_; unsigned int frame_; int framerate_numerator_; int framerate_denominator_; y4m_input y4m_; }; } // namespace libvpx_test #endif // VPX_TEST_Y4M_VIDEO_SOURCE_H_ libvpx-1.8.2/test/yuv_temporal_filter_test.cc000066400000000000000000000653321357355204000214630ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "third_party/googletest/src/include/gtest/gtest.h" #include "./vp9_rtcd.h" #include "test/acm_random.h" #include "test/buffer.h" #include "test/register_state_check.h" #include "vpx_ports/vpx_timer.h" namespace { using ::libvpx_test::ACMRandom; using ::libvpx_test::Buffer; typedef void (*YUVTemporalFilterFunc)( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count); struct TemporalFilterWithBd { TemporalFilterWithBd(YUVTemporalFilterFunc func, int bitdepth) : temporal_filter(func), bd(bitdepth) {} YUVTemporalFilterFunc temporal_filter; int bd; }; std::ostream &operator<<(std::ostream &os, const TemporalFilterWithBd &tf) { return os << "Bitdepth: " << tf.bd; } int GetFilterWeight(unsigned int row, unsigned int col, unsigned int block_height, unsigned int block_width, const int *const blk_fw, int use_32x32) { if (use_32x32) { return blk_fw[0]; } return blk_fw[2 * (row >= block_height / 2) + (col >= block_width / 2)]; } template int GetModIndex(int sum_dist, int index, int rounding, int strength, int filter_weight) { int mod = sum_dist * 3 / index; mod += rounding; mod >>= strength; mod = VPXMIN(16, mod); mod = 16 - mod; mod *= filter_weight; return mod; } template <> int GetModIndex(int sum_dist, int index, int rounding, int strength, int filter_weight) { unsigned int index_mult[14] = { 0, 0, 0, 0, 49152, 39322, 32768, 28087, 24576, 21846, 19661, 17874, 0, 15124 }; assert(index >= 0 && index <= 13); assert(index_mult[index] != 0); int mod = (clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; mod += rounding; mod >>= strength; mod = VPXMIN(16, mod); mod = 16 - mod; mod *= filter_weight; return mod; } template <> int GetModIndex(int sum_dist, int index, int rounding, int strength, int filter_weight) { int64_t index_mult[14] = { 0U, 0U, 0U, 0U, 3221225472U, 2576980378U, 2147483648U, 1840700270U, 1610612736U, 1431655766U, 1288490189U, 1171354718U, 0U, 991146300U }; assert(index >= 0 && index <= 13); assert(index_mult[index] != 0); int mod = static_cast((sum_dist * index_mult[index]) >> 32); mod += rounding; mod >>= strength; mod = VPXMIN(16, mod); mod = 16 - mod; mod *= filter_weight; return mod; } template void ApplyReferenceFilter( const Buffer &y_src, const Buffer &y_pre, const Buffer &u_src, const Buffer &v_src, const Buffer &u_pre, const Buffer &v_pre, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, Buffer *y_accumulator, Buffer *y_counter, Buffer *u_accumulator, Buffer *u_counter, Buffer *v_accumulator, Buffer *v_counter) { const PixelType *y_src_ptr = y_src.TopLeftPixel(); const PixelType *y_pre_ptr = y_pre.TopLeftPixel(); const PixelType *u_src_ptr = u_src.TopLeftPixel(); const PixelType *u_pre_ptr = u_pre.TopLeftPixel(); const PixelType *v_src_ptr = v_src.TopLeftPixel(); const PixelType *v_pre_ptr = v_pre.TopLeftPixel(); const int uv_block_width = block_width >> ss_x, uv_block_height = block_height >> ss_y; const int y_src_stride = y_src.stride(), y_pre_stride = y_pre.stride(); const int uv_src_stride = u_src.stride(), uv_pre_stride = u_pre.stride(); const int y_diff_stride = block_width, uv_diff_stride = uv_block_width; Buffer y_dif = Buffer(block_width, block_height, 0); Buffer u_dif = Buffer(uv_block_width, uv_block_height, 0); Buffer v_dif = Buffer(uv_block_width, uv_block_height, 0); ASSERT_TRUE(y_dif.Init()); ASSERT_TRUE(u_dif.Init()); ASSERT_TRUE(v_dif.Init()); y_dif.Set(0); u_dif.Set(0); v_dif.Set(0); int *y_diff_ptr = y_dif.TopLeftPixel(); int *u_diff_ptr = u_dif.TopLeftPixel(); int *v_diff_ptr = v_dif.TopLeftPixel(); uint32_t *y_accum = y_accumulator->TopLeftPixel(); uint32_t *u_accum = u_accumulator->TopLeftPixel(); uint32_t *v_accum = v_accumulator->TopLeftPixel(); uint16_t *y_count = y_counter->TopLeftPixel(); uint16_t *u_count = u_counter->TopLeftPixel(); uint16_t *v_count = v_counter->TopLeftPixel(); const int y_accum_stride = y_accumulator->stride(); const int u_accum_stride = u_accumulator->stride(); const int v_accum_stride = v_accumulator->stride(); const int y_count_stride = y_counter->stride(); const int u_count_stride = u_counter->stride(); const int v_count_stride = v_counter->stride(); const int rounding = (1 << strength) >> 1; // Get the square diffs for (int row = 0; row < static_cast(block_height); row++) { for (int col = 0; col < static_cast(block_width); col++) { const int diff = y_src_ptr[row * y_src_stride + col] - y_pre_ptr[row * y_pre_stride + col]; y_diff_ptr[row * y_diff_stride + col] = diff * diff; } } for (int row = 0; row < uv_block_height; row++) { for (int col = 0; col < uv_block_width; col++) { const int u_diff = u_src_ptr[row * uv_src_stride + col] - u_pre_ptr[row * uv_pre_stride + col]; const int v_diff = v_src_ptr[row * uv_src_stride + col] - v_pre_ptr[row * uv_pre_stride + col]; u_diff_ptr[row * uv_diff_stride + col] = u_diff * u_diff; v_diff_ptr[row * uv_diff_stride + col] = v_diff * v_diff; } } // Apply the filter to luma for (int row = 0; row < static_cast(block_height); row++) { for (int col = 0; col < static_cast(block_width); col++) { const int uv_row = row >> ss_y; const int uv_col = col >> ss_x; const int filter_weight = GetFilterWeight(row, col, block_height, block_width, blk_fw, use_32x32); // First we get the modifier for the current y pixel const int y_pixel = y_pre_ptr[row * y_pre_stride + col]; int y_num_used = 0; int y_mod = 0; // Sum the neighboring 3x3 y pixels for (int row_step = -1; row_step <= 1; row_step++) { for (int col_step = -1; col_step <= 1; col_step++) { const int sub_row = row + row_step; const int sub_col = col + col_step; if (sub_row >= 0 && sub_row < static_cast(block_height) && sub_col >= 0 && sub_col < static_cast(block_width)) { y_mod += y_diff_ptr[sub_row * y_diff_stride + sub_col]; y_num_used++; } } } // Sum the corresponding uv pixels to the current y modifier // Note we are rounding down instead of rounding to the nearest pixel. y_mod += u_diff_ptr[uv_row * uv_diff_stride + uv_col]; y_mod += v_diff_ptr[uv_row * uv_diff_stride + uv_col]; y_num_used += 2; // Set the modifier y_mod = GetModIndex(y_mod, y_num_used, rounding, strength, filter_weight); // Accumulate the result y_count[row * y_count_stride + col] += y_mod; y_accum[row * y_accum_stride + col] += y_mod * y_pixel; } } // Apply the filter to chroma for (int uv_row = 0; uv_row < uv_block_height; uv_row++) { for (int uv_col = 0; uv_col < uv_block_width; uv_col++) { const int y_row = uv_row << ss_y; const int y_col = uv_col << ss_x; const int filter_weight = GetFilterWeight( uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32); const int u_pixel = u_pre_ptr[uv_row * uv_pre_stride + uv_col]; const int v_pixel = v_pre_ptr[uv_row * uv_pre_stride + uv_col]; int uv_num_used = 0; int u_mod = 0, v_mod = 0; // Sum the neighboring 3x3 chromal pixels to the chroma modifier for (int row_step = -1; row_step <= 1; row_step++) { for (int col_step = -1; col_step <= 1; col_step++) { const int sub_row = uv_row + row_step; const int sub_col = uv_col + col_step; if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 && sub_col < uv_block_width) { u_mod += u_diff_ptr[sub_row * uv_diff_stride + sub_col]; v_mod += v_diff_ptr[sub_row * uv_diff_stride + sub_col]; uv_num_used++; } } } // Sum all the luma pixels associated with the current luma pixel for (int row_step = 0; row_step < 1 + ss_y; row_step++) { for (int col_step = 0; col_step < 1 + ss_x; col_step++) { const int sub_row = y_row + row_step; const int sub_col = y_col + col_step; const int y_diff = y_diff_ptr[sub_row * y_diff_stride + sub_col]; u_mod += y_diff; v_mod += y_diff; uv_num_used++; } } // Set the modifier u_mod = GetModIndex(u_mod, uv_num_used, rounding, strength, filter_weight); v_mod = GetModIndex(v_mod, uv_num_used, rounding, strength, filter_weight); // Accumulate the result u_count[uv_row * u_count_stride + uv_col] += u_mod; u_accum[uv_row * u_accum_stride + uv_col] += u_mod * u_pixel; v_count[uv_row * v_count_stride + uv_col] += v_mod; v_accum[uv_row * v_accum_stride + uv_col] += v_mod * v_pixel; } } } class YUVTemporalFilterTest : public ::testing::TestWithParam { public: virtual void SetUp() { filter_func_ = GetParam().temporal_filter; bd_ = GetParam().bd; use_highbd_ = (bd_ != 8); rnd_.Reset(ACMRandom::DeterministicSeed()); saturate_test_ = 0; num_repeats_ = 10; ASSERT_TRUE(bd_ == 8 || bd_ == 10 || bd_ == 12); } protected: template void CompareTestWithParam(int width, int height, int ss_x, int ss_y, int filter_strength, int use_32x32, const int *filter_weight); template void RunTestFilterWithParam(int width, int height, int ss_x, int ss_y, int filter_strength, int use_32x32, const int *filter_weight); YUVTemporalFilterFunc filter_func_; ACMRandom rnd_; int saturate_test_; int num_repeats_; int use_highbd_; int bd_; }; template void YUVTemporalFilterTest::CompareTestWithParam(int width, int height, int ss_x, int ss_y, int filter_strength, int use_32x32, const int *filter_weight) { const int uv_width = width >> ss_x, uv_height = height >> ss_y; Buffer y_src = Buffer(width, height, 0); Buffer y_pre = Buffer(width, height, 0); Buffer y_count_ref = Buffer(width, height, 0); Buffer y_accum_ref = Buffer(width, height, 0); Buffer y_count_tst = Buffer(width, height, 0); Buffer y_accum_tst = Buffer(width, height, 0); Buffer u_src = Buffer(uv_width, uv_height, 0); Buffer u_pre = Buffer(uv_width, uv_height, 0); Buffer u_count_ref = Buffer(uv_width, uv_height, 0); Buffer u_accum_ref = Buffer(uv_width, uv_height, 0); Buffer u_count_tst = Buffer(uv_width, uv_height, 0); Buffer u_accum_tst = Buffer(uv_width, uv_height, 0); Buffer v_src = Buffer(uv_width, uv_height, 0); Buffer v_pre = Buffer(uv_width, uv_height, 0); Buffer v_count_ref = Buffer(uv_width, uv_height, 0); Buffer v_accum_ref = Buffer(uv_width, uv_height, 0); Buffer v_count_tst = Buffer(uv_width, uv_height, 0); Buffer v_accum_tst = Buffer(uv_width, uv_height, 0); ASSERT_TRUE(y_src.Init()); ASSERT_TRUE(y_pre.Init()); ASSERT_TRUE(y_count_ref.Init()); ASSERT_TRUE(y_accum_ref.Init()); ASSERT_TRUE(y_count_tst.Init()); ASSERT_TRUE(y_accum_tst.Init()); ASSERT_TRUE(u_src.Init()); ASSERT_TRUE(u_pre.Init()); ASSERT_TRUE(u_count_ref.Init()); ASSERT_TRUE(u_accum_ref.Init()); ASSERT_TRUE(u_count_tst.Init()); ASSERT_TRUE(u_accum_tst.Init()); ASSERT_TRUE(v_src.Init()); ASSERT_TRUE(v_pre.Init()); ASSERT_TRUE(v_count_ref.Init()); ASSERT_TRUE(v_accum_ref.Init()); ASSERT_TRUE(v_count_tst.Init()); ASSERT_TRUE(v_accum_tst.Init()); y_accum_ref.Set(0); y_accum_tst.Set(0); y_count_ref.Set(0); y_count_tst.Set(0); u_accum_ref.Set(0); u_accum_tst.Set(0); u_count_ref.Set(0); u_count_tst.Set(0); v_accum_ref.Set(0); v_accum_tst.Set(0); v_count_ref.Set(0); v_count_tst.Set(0); for (int repeats = 0; repeats < num_repeats_; repeats++) { if (saturate_test_) { const int max_val = (1 << bd_) - 1; y_src.Set(max_val); y_pre.Set(0); u_src.Set(max_val); u_pre.Set(0); v_src.Set(max_val); v_pre.Set(0); } else { y_src.Set(&rnd_, 0, 7 << (bd_ - 8)); y_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); u_src.Set(&rnd_, 0, 7 << (bd_ - 8)); u_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); v_src.Set(&rnd_, 0, 7 << (bd_ - 8)); v_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); } ApplyReferenceFilter( y_src, y_pre, u_src, v_src, u_pre, v_pre, width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32, &y_accum_ref, &y_count_ref, &u_accum_ref, &u_count_ref, &v_accum_ref, &v_count_ref); ASM_REGISTER_STATE_CHECK(filter_func_( reinterpret_cast(y_src.TopLeftPixel()), y_src.stride(), reinterpret_cast(y_pre.TopLeftPixel()), y_pre.stride(), reinterpret_cast(u_src.TopLeftPixel()), reinterpret_cast(v_src.TopLeftPixel()), u_src.stride(), reinterpret_cast(u_pre.TopLeftPixel()), reinterpret_cast(v_pre.TopLeftPixel()), u_pre.stride(), width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32, y_accum_tst.TopLeftPixel(), y_count_tst.TopLeftPixel(), u_accum_tst.TopLeftPixel(), u_count_tst.TopLeftPixel(), v_accum_tst.TopLeftPixel(), v_count_tst.TopLeftPixel())); EXPECT_TRUE(y_accum_tst.CheckValues(y_accum_ref)); EXPECT_TRUE(y_count_tst.CheckValues(y_count_ref)); EXPECT_TRUE(u_accum_tst.CheckValues(u_accum_ref)); EXPECT_TRUE(u_count_tst.CheckValues(u_count_ref)); EXPECT_TRUE(v_accum_tst.CheckValues(v_accum_ref)); EXPECT_TRUE(v_count_tst.CheckValues(v_count_ref)); if (HasFailure()) { if (use_32x32) { printf("SS_X: %d, SS_Y: %d, Strength: %d, Weight: %d\n", ss_x, ss_y, filter_strength, *filter_weight); } else { printf("SS_X: %d, SS_Y: %d, Strength: %d, Weights: %d,%d,%d,%d\n", ss_x, ss_y, filter_strength, filter_weight[0], filter_weight[1], filter_weight[2], filter_weight[3]); } y_accum_tst.PrintDifference(y_accum_ref); y_count_tst.PrintDifference(y_count_ref); u_accum_tst.PrintDifference(u_accum_ref); u_count_tst.PrintDifference(u_count_ref); v_accum_tst.PrintDifference(v_accum_ref); v_count_tst.PrintDifference(v_count_ref); return; } } } template void YUVTemporalFilterTest::RunTestFilterWithParam(int width, int height, int ss_x, int ss_y, int filter_strength, int use_32x32, const int *filter_weight) { const int uv_width = width >> ss_x, uv_height = height >> ss_y; Buffer y_src = Buffer(width, height, 0); Buffer y_pre = Buffer(width, height, 0); Buffer y_count = Buffer(width, height, 0); Buffer y_accum = Buffer(width, height, 0); Buffer u_src = Buffer(uv_width, uv_height, 0); Buffer u_pre = Buffer(uv_width, uv_height, 0); Buffer u_count = Buffer(uv_width, uv_height, 0); Buffer u_accum = Buffer(uv_width, uv_height, 0); Buffer v_src = Buffer(uv_width, uv_height, 0); Buffer v_pre = Buffer(uv_width, uv_height, 0); Buffer v_count = Buffer(uv_width, uv_height, 0); Buffer v_accum = Buffer(uv_width, uv_height, 0); ASSERT_TRUE(y_src.Init()); ASSERT_TRUE(y_pre.Init()); ASSERT_TRUE(y_count.Init()); ASSERT_TRUE(y_accum.Init()); ASSERT_TRUE(u_src.Init()); ASSERT_TRUE(u_pre.Init()); ASSERT_TRUE(u_count.Init()); ASSERT_TRUE(u_accum.Init()); ASSERT_TRUE(v_src.Init()); ASSERT_TRUE(v_pre.Init()); ASSERT_TRUE(v_count.Init()); ASSERT_TRUE(v_accum.Init()); y_accum.Set(0); y_count.Set(0); u_accum.Set(0); u_count.Set(0); v_accum.Set(0); v_count.Set(0); y_src.Set(&rnd_, 0, 7 << (bd_ - 8)); y_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); u_src.Set(&rnd_, 0, 7 << (bd_ - 8)); u_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); v_src.Set(&rnd_, 0, 7 << (bd_ - 8)); v_pre.Set(&rnd_, 0, 7 << (bd_ - 8)); for (int repeats = 0; repeats < num_repeats_; repeats++) { ASM_REGISTER_STATE_CHECK(filter_func_( reinterpret_cast(y_src.TopLeftPixel()), y_src.stride(), reinterpret_cast(y_pre.TopLeftPixel()), y_pre.stride(), reinterpret_cast(u_src.TopLeftPixel()), reinterpret_cast(v_src.TopLeftPixel()), u_src.stride(), reinterpret_cast(u_pre.TopLeftPixel()), reinterpret_cast(v_pre.TopLeftPixel()), u_pre.stride(), width, height, ss_x, ss_y, filter_strength, filter_weight, use_32x32, y_accum.TopLeftPixel(), y_count.TopLeftPixel(), u_accum.TopLeftPixel(), u_count.TopLeftPixel(), v_accum.TopLeftPixel(), v_count.TopLeftPixel())); } } TEST_P(YUVTemporalFilterTest, Use32x32) { const int width = 32, height = 32; const int use_32x32 = 1; for (int ss_x = 0; ss_x <= 1; ss_x++) { for (int ss_y = 0; ss_y <= 1; ss_y++) { for (int filter_strength = 0; filter_strength <= 6; filter_strength += 2) { for (int filter_weight = 0; filter_weight <= 2; filter_weight++) { if (use_highbd_) { const int adjusted_strength = filter_strength + 2 * (bd_ - 8); CompareTestWithParam(width, height, ss_x, ss_y, adjusted_strength, use_32x32, &filter_weight); } else { CompareTestWithParam(width, height, ss_x, ss_y, filter_strength, use_32x32, &filter_weight); } ASSERT_FALSE(HasFailure()); } } } } } TEST_P(YUVTemporalFilterTest, Use16x16) { const int width = 32, height = 32; const int use_32x32 = 0; for (int ss_x = 0; ss_x <= 1; ss_x++) { for (int ss_y = 0; ss_y <= 1; ss_y++) { for (int filter_idx = 0; filter_idx < 3 * 3 * 3 * 3; filter_idx++) { // Set up the filter int filter_weight[4]; int filter_idx_cp = filter_idx; for (int idx = 0; idx < 4; idx++) { filter_weight[idx] = filter_idx_cp % 3; filter_idx_cp /= 3; } // Test each parameter for (int filter_strength = 0; filter_strength <= 6; filter_strength += 2) { if (use_highbd_) { const int adjusted_strength = filter_strength + 2 * (bd_ - 8); CompareTestWithParam(width, height, ss_x, ss_y, adjusted_strength, use_32x32, filter_weight); } else { CompareTestWithParam(width, height, ss_x, ss_y, filter_strength, use_32x32, filter_weight); } ASSERT_FALSE(HasFailure()); } } } } } TEST_P(YUVTemporalFilterTest, SaturationTest) { const int width = 32, height = 32; const int use_32x32 = 1; const int filter_weight = 1; saturate_test_ = 1; for (int ss_x = 0; ss_x <= 1; ss_x++) { for (int ss_y = 0; ss_y <= 1; ss_y++) { for (int filter_strength = 0; filter_strength <= 6; filter_strength += 2) { if (use_highbd_) { const int adjusted_strength = filter_strength + 2 * (bd_ - 8); CompareTestWithParam(width, height, ss_x, ss_y, adjusted_strength, use_32x32, &filter_weight); } else { CompareTestWithParam(width, height, ss_x, ss_y, filter_strength, use_32x32, &filter_weight); } ASSERT_FALSE(HasFailure()); } } } } TEST_P(YUVTemporalFilterTest, DISABLED_Speed) { const int width = 32, height = 32; num_repeats_ = 1000; for (int use_32x32 = 0; use_32x32 <= 1; use_32x32++) { const int num_filter_weights = use_32x32 ? 3 : 3 * 3 * 3 * 3; for (int ss_x = 0; ss_x <= 1; ss_x++) { for (int ss_y = 0; ss_y <= 1; ss_y++) { for (int filter_idx = 0; filter_idx < num_filter_weights; filter_idx++) { // Set up the filter int filter_weight[4]; int filter_idx_cp = filter_idx; for (int idx = 0; idx < 4; idx++) { filter_weight[idx] = filter_idx_cp % 3; filter_idx_cp /= 3; } // Test each parameter for (int filter_strength = 0; filter_strength <= 6; filter_strength += 2) { vpx_usec_timer timer; vpx_usec_timer_start(&timer); if (use_highbd_) { RunTestFilterWithParam(width, height, ss_x, ss_y, filter_strength, use_32x32, filter_weight); } else { RunTestFilterWithParam(width, height, ss_x, ss_y, filter_strength, use_32x32, filter_weight); } vpx_usec_timer_mark(&timer); const int elapsed_time = static_cast(vpx_usec_timer_elapsed(&timer)); printf( "Bitdepth: %d, Use 32X32: %d, SS_X: %d, SS_Y: %d, Weight Idx: " "%d, Strength: %d, Time: %5d\n", bd_, use_32x32, ss_x, ss_y, filter_idx, filter_strength, elapsed_time); } } } } } } #if CONFIG_VP9_HIGHBITDEPTH #define WRAP_HIGHBD_FUNC(func, bd) \ void wrap_##func##_##bd( \ const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, \ int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, \ int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, \ int uv_pre_stride, unsigned int block_width, unsigned int block_height, \ int ss_x, int ss_y, int strength, const int *const blk_fw, \ int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, \ uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, \ uint16_t *v_count) { \ func(reinterpret_cast(y_src), y_src_stride, \ reinterpret_cast(y_pre), y_pre_stride, \ reinterpret_cast(u_src), \ reinterpret_cast(v_src), uv_src_stride, \ reinterpret_cast(u_pre), \ reinterpret_cast(v_pre), uv_pre_stride, \ block_width, block_height, ss_x, ss_y, strength, blk_fw, use_32x32, \ y_accumulator, y_count, u_accumulator, u_count, v_accumulator, \ v_count); \ } WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 10); WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_c, 12); INSTANTIATE_TEST_CASE_P( C, YUVTemporalFilterTest, ::testing::Values( TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_10, 10), TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_c_12, 12))); #if HAVE_SSE4_1 WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_sse4_1, 10); WRAP_HIGHBD_FUNC(vp9_highbd_apply_temporal_filter_sse4_1, 12); INSTANTIATE_TEST_CASE_P( SSE4_1, YUVTemporalFilterTest, ::testing::Values( TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_sse4_1_10, 10), TemporalFilterWithBd(&wrap_vp9_highbd_apply_temporal_filter_sse4_1_12, 12))); #endif // HAVE_SSE4_1 #else INSTANTIATE_TEST_CASE_P( C, YUVTemporalFilterTest, ::testing::Values(TemporalFilterWithBd(&vp9_apply_temporal_filter_c, 8))); #if HAVE_SSE4_1 INSTANTIATE_TEST_CASE_P(SSE4_1, YUVTemporalFilterTest, ::testing::Values(TemporalFilterWithBd( &vp9_apply_temporal_filter_sse4_1, 8))); #endif // HAVE_SSE4_1 #endif // CONFIG_VP9_HIGHBITDEPTH } // namespace libvpx-1.8.2/test/yuv_video_source.h000066400000000000000000000077231357355204000175640ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TEST_YUV_VIDEO_SOURCE_H_ #define VPX_TEST_YUV_VIDEO_SOURCE_H_ #include #include #include #include "test/video_source.h" #include "vpx/vpx_image.h" namespace libvpx_test { // This class extends VideoSource to allow parsing of raw YUV // formats of various color sampling and bit-depths so that we can // do actual file encodes. class YUVVideoSource : public VideoSource { public: YUVVideoSource(const std::string &file_name, vpx_img_fmt format, unsigned int width, unsigned int height, int rate_numerator, int rate_denominator, unsigned int start, int limit) : file_name_(file_name), input_file_(NULL), img_(NULL), start_(start), limit_(limit), frame_(0), width_(0), height_(0), format_(VPX_IMG_FMT_NONE), framerate_numerator_(rate_numerator), framerate_denominator_(rate_denominator) { // This initializes format_, raw_size_, width_, height_ and allocates img. SetSize(width, height, format); } virtual ~YUVVideoSource() { vpx_img_free(img_); if (input_file_) fclose(input_file_); } virtual void Begin() { if (input_file_) fclose(input_file_); input_file_ = OpenTestDataFile(file_name_); ASSERT_TRUE(input_file_ != NULL) << "Input file open failed. Filename: " << file_name_; if (start_) { fseek(input_file_, static_cast(raw_size_) * start_, SEEK_SET); } frame_ = start_; FillFrame(); } virtual void Next() { ++frame_; FillFrame(); } virtual vpx_image_t *img() const { return (frame_ < limit_) ? img_ : NULL; } // Models a stream where Timebase = 1/FPS, so pts == frame. virtual vpx_codec_pts_t pts() const { return frame_; } virtual unsigned long duration() const { return 1; } virtual vpx_rational_t timebase() const { const vpx_rational_t t = { framerate_denominator_, framerate_numerator_ }; return t; } virtual unsigned int frame() const { return frame_; } virtual unsigned int limit() const { return limit_; } virtual void SetSize(unsigned int width, unsigned int height, vpx_img_fmt format) { if (width != width_ || height != height_ || format != format_) { vpx_img_free(img_); img_ = vpx_img_alloc(NULL, format, width, height, 1); ASSERT_TRUE(img_ != NULL); width_ = width; height_ = height; format_ = format; switch (format) { case VPX_IMG_FMT_I420: raw_size_ = width * height * 3 / 2; break; case VPX_IMG_FMT_I422: raw_size_ = width * height * 2; break; case VPX_IMG_FMT_I440: raw_size_ = width * height * 2; break; case VPX_IMG_FMT_I444: raw_size_ = width * height * 3; break; case VPX_IMG_FMT_I42016: raw_size_ = width * height * 3; break; case VPX_IMG_FMT_I42216: raw_size_ = width * height * 4; break; case VPX_IMG_FMT_I44016: raw_size_ = width * height * 4; break; case VPX_IMG_FMT_I44416: raw_size_ = width * height * 6; break; default: ASSERT_TRUE(0); } } } virtual void FillFrame() { ASSERT_TRUE(input_file_ != NULL); // Read a frame from input_file. if (fread(img_->img_data, raw_size_, 1, input_file_) == 0) { limit_ = frame_; } } protected: std::string file_name_; FILE *input_file_; vpx_image_t *img_; size_t raw_size_; unsigned int start_; unsigned int limit_; unsigned int frame_; unsigned int width_; unsigned int height_; vpx_img_fmt format_; int framerate_numerator_; int framerate_denominator_; }; } // namespace libvpx_test #endif // VPX_TEST_YUV_VIDEO_SOURCE_H_ libvpx-1.8.2/third_party/000077500000000000000000000000001357355204000153635ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/000077500000000000000000000000001357355204000175375ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/README.libvpx000066400000000000000000000015301357355204000217210ustar00rootroot00000000000000URL: https://github.com/google/googletest.git Version: release-1.8.1 License: BSD License File: LICENSE Description: Google's framework for writing C++ tests on a variety of platforms (Linux, Mac OS X, Windows, Windows CE, Symbian, etc). Based on the xUnit architecture. Supports automatic test discovery, a rich set of assertions, user-defined assertions, death tests, fatal and non-fatal failures, various options for running the tests, and XML test report generation. Local Modifications: - Remove everything but: googletest-release-1.8.1/googletest/ CHANGES CONTRIBUTORS include LICENSE README.md src - Make WithParamInterface::GetParam static in order to avoid initialization issues https://github.com/google/googletest/pull/1830 - Use wcslen() instead of std::wcslen() https://github.com/google/googletest/pull/1899 libvpx-1.8.2/third_party/googletest/gtest.mk000066400000000000000000000000441357355204000212140ustar00rootroot00000000000000GTEST_SRCS-yes += src/gtest-all.cc libvpx-1.8.2/third_party/googletest/src/000077500000000000000000000000001357355204000203265ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/CHANGES000066400000000000000000000147651357355204000213360ustar00rootroot00000000000000Changes for 1.7.0: * New feature: death tests are supported on OpenBSD and in iOS simulator now. * New feature: Google Test now implements a protocol to allow a test runner to detect that a test program has exited prematurely and report it as a failure (before it would be falsely reported as a success if the exit code is 0). * New feature: Test::RecordProperty() can now be used outside of the lifespan of a test method, in which case it will be attributed to the current test case or the test program in the XML report. * New feature (potentially breaking): --gtest_list_tests now prints the type parameters and value parameters for each test. * Improvement: char pointers and char arrays are now escaped properly in failure messages. * Improvement: failure summary in XML reports now includes file and line information. * Improvement: the XML element now has a timestamp attribute. * Improvement: When --gtest_filter is specified, XML report now doesn't contain information about tests that are filtered out. * Fixed the bug where long --gtest_filter flag values are truncated in death tests. * Potentially breaking change: RUN_ALL_TESTS() is now implemented as a function instead of a macro in order to work better with Clang. * Compatibility fixes with C++ 11 and various platforms. * Bug/warning fixes. Changes for 1.6.0: * New feature: ADD_FAILURE_AT() for reporting a test failure at the given source location -- useful for writing testing utilities. * New feature: the universal value printer is moved from Google Mock to Google Test. * New feature: type parameters and value parameters are reported in the XML report now. * A gtest_disable_pthreads CMake option. * Colored output works in GNU Screen sessions now. * Parameters of value-parameterized tests are now printed in the textual output. * Failures from ad hoc test assertions run before RUN_ALL_TESTS() are now correctly reported. * Arguments of ASSERT_XY and EXPECT_XY no longer need to support << to ostream. * More complete handling of exceptions. * GTEST_ASSERT_XY can be used instead of ASSERT_XY in case the latter name is already used by another library. * --gtest_catch_exceptions is now true by default, allowing a test program to continue after an exception is thrown. * Value-parameterized test fixtures can now derive from Test and WithParamInterface separately, easing conversion of legacy tests. * Death test messages are clearly marked to make them more distinguishable from other messages. * Compatibility fixes for Android, Google Native Client, MinGW, HP UX, PowerPC, Lucid autotools, libCStd, Sun C++, Borland C++ Builder (Code Gear), IBM XL C++ (Visual Age C++), and C++0x. * Bug fixes and implementation clean-ups. * Potentially incompatible changes: disables the harmful 'make install' command in autotools. Changes for 1.5.0: * New feature: assertions can be safely called in multiple threads where the pthreads library is available. * New feature: predicates used inside EXPECT_TRUE() and friends can now generate custom failure messages. * New feature: Google Test can now be compiled as a DLL. * New feature: fused source files are included. * New feature: prints help when encountering unrecognized Google Test flags. * Experimental feature: CMake build script (requires CMake 2.6.4+). * Experimental feature: the Pump script for meta programming. * double values streamed to an assertion are printed with enough precision to differentiate any two different values. * Google Test now works on Solaris and AIX. * Build and test script improvements. * Bug fixes and implementation clean-ups. Potentially breaking changes: * Stopped supporting VC++ 7.1 with exceptions disabled. * Dropped support for 'make install'. Changes for 1.4.0: * New feature: the event listener API * New feature: test shuffling * New feature: the XML report format is closer to junitreport and can be parsed by Hudson now. * New feature: when a test runs under Visual Studio, its failures are integrated in the IDE. * New feature: /MD(d) versions of VC++ projects. * New feature: elapsed time for the tests is printed by default. * New feature: comes with a TR1 tuple implementation such that Boost is no longer needed for Combine(). * New feature: EXPECT_DEATH_IF_SUPPORTED macro and friends. * New feature: the Xcode project can now produce static gtest libraries in addition to a framework. * Compatibility fixes for Solaris, Cygwin, minGW, Windows Mobile, Symbian, gcc, and C++Builder. * Bug fixes and implementation clean-ups. Changes for 1.3.0: * New feature: death tests on Windows, Cygwin, and Mac. * New feature: ability to use Google Test assertions in other testing frameworks. * New feature: ability to run disabled test via --gtest_also_run_disabled_tests. * New feature: the --help flag for printing the usage. * New feature: access to Google Test flag values in user code. * New feature: a script that packs Google Test into one .h and one .cc file for easy deployment. * New feature: support for distributing test functions to multiple machines (requires support from the test runner). * Bug fixes and implementation clean-ups. Changes for 1.2.1: * Compatibility fixes for Linux IA-64 and IBM z/OS. * Added support for using Boost and other TR1 implementations. * Changes to the build scripts to support upcoming release of Google C++ Mocking Framework. * Added Makefile to the distribution package. * Improved build instructions in README. Changes for 1.2.0: * New feature: value-parameterized tests. * New feature: the ASSERT/EXPECT_(NON)FATAL_FAILURE(_ON_ALL_THREADS) macros. * Changed the XML report format to match JUnit/Ant's. * Added tests to the Xcode project. * Added scons/SConscript for building with SCons. * Added src/gtest-all.cc for building Google Test from a single file. * Fixed compatibility with Solaris and z/OS. * Enabled running Python tests on systems with python 2.3 installed, e.g. Mac OS X 10.4. * Bug fixes. Changes for 1.1.0: * New feature: type-parameterized tests. * New feature: exception assertions. * New feature: printing elapsed time of tests. * Improved the robustness of death tests. * Added an Xcode project and samples. * Adjusted the output format on Windows to be understandable by Visual Studio. * Minor bug fixes. Changes for 1.0.1: * Added project files for Visual Studio 7.1. * Fixed issues with compiling on Mac OS X. * Fixed issues with compiling on Cygwin. Changes for 1.0.0: * Initial Open Source release of Google Test libvpx-1.8.2/third_party/googletest/src/CONTRIBUTORS000066400000000000000000000025161357355204000222120ustar00rootroot00000000000000# This file contains a list of people who've made non-trivial # contribution to the Google C++ Testing Framework project. People # who commit code to the project are encouraged to add their names # here. Please keep the list sorted by first names. Ajay Joshi Balázs Dán Bharat Mediratta Chandler Carruth Chris Prince Chris Taylor Dan Egnor Eric Roman Hady Zalek Jeffrey Yasskin Jói Sigurðsson Keir Mierle Keith Ray Kenton Varda Manuel Klimek Markus Heule Mika Raento Miklós Fazekas Pasi Valminen Patrick Hanna Patrick Riley Peter Kaminski Preston Jackson Rainer Klaffenboeck Russ Cox Russ Rufer Sean Mcafee Sigurður Ásgeirsson Tracy Bialik Vadim Berman Vlad Losev Zhanyong Wan libvpx-1.8.2/third_party/googletest/src/LICENSE000066400000000000000000000027031357355204000213350ustar00rootroot00000000000000Copyright 2008, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google Inc. nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. libvpx-1.8.2/third_party/googletest/src/README.md000066400000000000000000000336671357355204000216240ustar00rootroot00000000000000### Generic Build Instructions #### Setup To build Google Test and your tests that use it, you need to tell your build system where to find its headers and source files. The exact way to do it depends on which build system you use, and is usually straightforward. #### Build Suppose you put Google Test in directory `${GTEST_DIR}`. To build it, create a library build target (or a project as called by Visual Studio and Xcode) to compile ${GTEST_DIR}/src/gtest-all.cc with `${GTEST_DIR}/include` in the system header search path and `${GTEST_DIR}` in the normal header search path. Assuming a Linux-like system and gcc, something like the following will do: g++ -isystem ${GTEST_DIR}/include -I${GTEST_DIR} \ -pthread -c ${GTEST_DIR}/src/gtest-all.cc ar -rv libgtest.a gtest-all.o (We need `-pthread` as Google Test uses threads.) Next, you should compile your test source file with `${GTEST_DIR}/include` in the system header search path, and link it with gtest and any other necessary libraries: g++ -isystem ${GTEST_DIR}/include -pthread path/to/your_test.cc libgtest.a \ -o your_test As an example, the make/ directory contains a Makefile that you can use to build Google Test on systems where GNU make is available (e.g. Linux, Mac OS X, and Cygwin). It doesn't try to build Google Test's own tests. Instead, it just builds the Google Test library and a sample test. You can use it as a starting point for your own build script. If the default settings are correct for your environment, the following commands should succeed: cd ${GTEST_DIR}/make make ./sample1_unittest If you see errors, try to tweak the contents of `make/Makefile` to make them go away. There are instructions in `make/Makefile` on how to do it. ### Using CMake Google Test comes with a CMake build script ( [CMakeLists.txt](https://github.com/google/googletest/blob/master/CMakeLists.txt)) that can be used on a wide range of platforms ("C" stands for cross-platform.). If you don't have CMake installed already, you can download it for free from . CMake works by generating native makefiles or build projects that can be used in the compiler environment of your choice. You can either build Google Test as a standalone project or it can be incorporated into an existing CMake build for another project. #### Standalone CMake Project When building Google Test as a standalone project, the typical workflow starts with: mkdir mybuild # Create a directory to hold the build output. cd mybuild cmake ${GTEST_DIR} # Generate native build scripts. If you want to build Google Test's samples, you should replace the last command with cmake -Dgtest_build_samples=ON ${GTEST_DIR} If you are on a \*nix system, you should now see a Makefile in the current directory. Just type 'make' to build gtest. If you use Windows and have Visual Studio installed, a `gtest.sln` file and several `.vcproj` files will be created. You can then build them using Visual Studio. On Mac OS X with Xcode installed, a `.xcodeproj` file will be generated. #### Incorporating Into An Existing CMake Project If you want to use gtest in a project which already uses CMake, then a more robust and flexible approach is to build gtest as part of that project directly. This is done by making the GoogleTest source code available to the main build and adding it using CMake's `add_subdirectory()` command. This has the significant advantage that the same compiler and linker settings are used between gtest and the rest of your project, so issues associated with using incompatible libraries (eg debug/release), etc. are avoided. This is particularly useful on Windows. Making GoogleTest's source code available to the main build can be done a few different ways: * Download the GoogleTest source code manually and place it at a known location. This is the least flexible approach and can make it more difficult to use with continuous integration systems, etc. * Embed the GoogleTest source code as a direct copy in the main project's source tree. This is often the simplest approach, but is also the hardest to keep up to date. Some organizations may not permit this method. * Add GoogleTest as a git submodule or equivalent. This may not always be possible or appropriate. Git submodules, for example, have their own set of advantages and drawbacks. * Use CMake to download GoogleTest as part of the build's configure step. This is just a little more complex, but doesn't have the limitations of the other methods. The last of the above methods is implemented with a small piece of CMake code in a separate file (e.g. `CMakeLists.txt.in`) which is copied to the build area and then invoked as a sub-build _during the CMake stage_. That directory is then pulled into the main build with `add_subdirectory()`. For example: New file `CMakeLists.txt.in`: cmake_minimum_required(VERSION 2.8.2) project(googletest-download NONE) include(ExternalProject) ExternalProject_Add(googletest GIT_REPOSITORY https://github.com/google/googletest.git GIT_TAG master SOURCE_DIR "${CMAKE_BINARY_DIR}/googletest-src" BINARY_DIR "${CMAKE_BINARY_DIR}/googletest-build" CONFIGURE_COMMAND "" BUILD_COMMAND "" INSTALL_COMMAND "" TEST_COMMAND "" ) Existing build's `CMakeLists.txt`: # Download and unpack googletest at configure time configure_file(CMakeLists.txt.in googletest-download/CMakeLists.txt) execute_process(COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download ) if(result) message(FATAL_ERROR "CMake step for googletest failed: ${result}") endif() execute_process(COMMAND ${CMAKE_COMMAND} --build . RESULT_VARIABLE result WORKING_DIRECTORY ${CMAKE_BINARY_DIR}/googletest-download ) if(result) message(FATAL_ERROR "Build step for googletest failed: ${result}") endif() # Prevent overriding the parent project's compiler/linker # settings on Windows set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) # Add googletest directly to our build. This defines # the gtest and gtest_main targets. add_subdirectory(${CMAKE_BINARY_DIR}/googletest-src ${CMAKE_BINARY_DIR}/googletest-build EXCLUDE_FROM_ALL) # The gtest/gtest_main targets carry header search path # dependencies automatically when using CMake 2.8.11 or # later. Otherwise we have to add them here ourselves. if (CMAKE_VERSION VERSION_LESS 2.8.11) include_directories("${gtest_SOURCE_DIR}/include") endif() # Now simply link against gtest or gtest_main as needed. Eg add_executable(example example.cpp) target_link_libraries(example gtest_main) add_test(NAME example_test COMMAND example) Note that this approach requires CMake 2.8.2 or later due to its use of the `ExternalProject_Add()` command. The above technique is discussed in more detail in [this separate article](http://crascit.com/2015/07/25/cmake-gtest/) which also contains a link to a fully generalized implementation of the technique. ##### Visual Studio Dynamic vs Static Runtimes By default, new Visual Studio projects link the C runtimes dynamically but Google Test links them statically. This will generate an error that looks something like the following: gtest.lib(gtest-all.obj) : error LNK2038: mismatch detected for 'RuntimeLibrary': value 'MTd_StaticDebug' doesn't match value 'MDd_DynamicDebug' in main.obj Google Test already has a CMake option for this: `gtest_force_shared_crt` Enabling this option will make gtest link the runtimes dynamically too, and match the project in which it is included. ### Legacy Build Scripts Before settling on CMake, we have been providing hand-maintained build projects/scripts for Visual Studio, Xcode, and Autotools. While we continue to provide them for convenience, they are not actively maintained any more. We highly recommend that you follow the instructions in the above sections to integrate Google Test with your existing build system. If you still need to use the legacy build scripts, here's how: The msvc\ folder contains two solutions with Visual C++ projects. Open the `gtest.sln` or `gtest-md.sln` file using Visual Studio, and you are ready to build Google Test the same way you build any Visual Studio project. Files that have names ending with -md use DLL versions of Microsoft runtime libraries (the /MD or the /MDd compiler option). Files without that suffix use static versions of the runtime libraries (the /MT or the /MTd option). Please note that one must use the same option to compile both gtest and the test code. If you use Visual Studio 2005 or above, we recommend the -md version as /MD is the default for new projects in these versions of Visual Studio. On Mac OS X, open the `gtest.xcodeproj` in the `xcode/` folder using Xcode. Build the "gtest" target. The universal binary framework will end up in your selected build directory (selected in the Xcode "Preferences..." -> "Building" pane and defaults to xcode/build). Alternatively, at the command line, enter: xcodebuild This will build the "Release" configuration of gtest.framework in your default build location. See the "xcodebuild" man page for more information about building different configurations and building in different locations. If you wish to use the Google Test Xcode project with Xcode 4.x and above, you need to either: * update the SDK configuration options in xcode/Config/General.xconfig. Comment options `SDKROOT`, `MACOS_DEPLOYMENT_TARGET`, and `GCC_VERSION`. If you choose this route you lose the ability to target earlier versions of MacOS X. * Install an SDK for an earlier version. This doesn't appear to be supported by Apple, but has been reported to work (http://stackoverflow.com/questions/5378518). ### Tweaking Google Test Google Test can be used in diverse environments. The default configuration may not work (or may not work well) out of the box in some environments. However, you can easily tweak Google Test by defining control macros on the compiler command line. Generally, these macros are named like `GTEST_XYZ` and you define them to either 1 or 0 to enable or disable a certain feature. We list the most frequently used macros below. For a complete list, see file [include/gtest/internal/gtest-port.h](https://github.com/google/googletest/blob/master/include/gtest/internal/gtest-port.h). ### Choosing a TR1 Tuple Library Some Google Test features require the C++ Technical Report 1 (TR1) tuple library, which is not yet available with all compilers. The good news is that Google Test implements a subset of TR1 tuple that's enough for its own need, and will automatically use this when the compiler doesn't provide TR1 tuple. Usually you don't need to care about which tuple library Google Test uses. However, if your project already uses TR1 tuple, you need to tell Google Test to use the same TR1 tuple library the rest of your project uses, or the two tuple implementations will clash. To do that, add -DGTEST_USE_OWN_TR1_TUPLE=0 to the compiler flags while compiling Google Test and your tests. If you want to force Google Test to use its own tuple library, just add -DGTEST_USE_OWN_TR1_TUPLE=1 to the compiler flags instead. If you don't want Google Test to use tuple at all, add -DGTEST_HAS_TR1_TUPLE=0 and all features using tuple will be disabled. ### Multi-threaded Tests Google Test is thread-safe where the pthread library is available. After `#include "gtest/gtest.h"`, you can check the `GTEST_IS_THREADSAFE` macro to see whether this is the case (yes if the macro is `#defined` to 1, no if it's undefined.). If Google Test doesn't correctly detect whether pthread is available in your environment, you can force it with -DGTEST_HAS_PTHREAD=1 or -DGTEST_HAS_PTHREAD=0 When Google Test uses pthread, you may need to add flags to your compiler and/or linker to select the pthread library, or you'll get link errors. If you use the CMake script or the deprecated Autotools script, this is taken care of for you. If you use your own build script, you'll need to read your compiler and linker's manual to figure out what flags to add. ### As a Shared Library (DLL) Google Test is compact, so most users can build and link it as a static library for the simplicity. You can choose to use Google Test as a shared library (known as a DLL on Windows) if you prefer. To compile *gtest* as a shared library, add -DGTEST_CREATE_SHARED_LIBRARY=1 to the compiler flags. You'll also need to tell the linker to produce a shared library instead - consult your linker's manual for how to do it. To compile your *tests* that use the gtest shared library, add -DGTEST_LINKED_AS_SHARED_LIBRARY=1 to the compiler flags. Note: while the above steps aren't technically necessary today when using some compilers (e.g. GCC), they may become necessary in the future, if we decide to improve the speed of loading the library (see for details). Therefore you are recommended to always add the above flags when using Google Test as a shared library. Otherwise a future release of Google Test may break your build script. ### Avoiding Macro Name Clashes In C++, macros don't obey namespaces. Therefore two libraries that both define a macro of the same name will clash if you `#include` both definitions. In case a Google Test macro clashes with another library, you can force Google Test to rename its macro to avoid the conflict. Specifically, if both Google Test and some other code define macro FOO, you can add -DGTEST_DONT_DEFINE_FOO=1 to the compiler flags to tell Google Test to change the macro's name from `FOO` to `GTEST_FOO`. Currently `FOO` can be `FAIL`, `SUCCEED`, or `TEST`. For example, with `-DGTEST_DONT_DEFINE_TEST=1`, you'll need to write GTEST_TEST(SomeTest, DoesThis) { ... } instead of TEST(SomeTest, DoesThis) { ... } in order to define a test. libvpx-1.8.2/third_party/googletest/src/include/000077500000000000000000000000001357355204000217515ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/include/gtest/000077500000000000000000000000001357355204000230775ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-death-test.h000066400000000000000000000340761357355204000264500ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the public API for death tests. It is // #included by gtest.h so a user doesn't need to include this // directly. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ #include "gtest/internal/gtest-death-test-internal.h" namespace testing { // This flag controls the style of death tests. Valid values are "threadsafe", // meaning that the death test child process will re-execute the test binary // from the start, running only a single death test, or "fast", // meaning that the child process will execute the test logic immediately // after forking. GTEST_DECLARE_string_(death_test_style); #if GTEST_HAS_DEATH_TEST namespace internal { // Returns a Boolean value indicating whether the caller is currently // executing in the context of the death test child process. Tools such as // Valgrind heap checkers may need this to modify their behavior in death // tests. IMPORTANT: This is an internal utility. Using it may break the // implementation of death tests. User code MUST NOT use it. GTEST_API_ bool InDeathTestChild(); } // namespace internal // The following macros are useful for writing death tests. // Here's what happens when an ASSERT_DEATH* or EXPECT_DEATH* is // executed: // // 1. It generates a warning if there is more than one active // thread. This is because it's safe to fork() or clone() only // when there is a single thread. // // 2. The parent process clone()s a sub-process and runs the death // test in it; the sub-process exits with code 0 at the end of the // death test, if it hasn't exited already. // // 3. The parent process waits for the sub-process to terminate. // // 4. The parent process checks the exit code and error message of // the sub-process. // // Examples: // // ASSERT_DEATH(server.SendMessage(56, "Hello"), "Invalid port number"); // for (int i = 0; i < 5; i++) { // EXPECT_DEATH(server.ProcessRequest(i), // "Invalid request .* in ProcessRequest()") // << "Failed to die on request " << i; // } // // ASSERT_EXIT(server.ExitNow(), ::testing::ExitedWithCode(0), "Exiting"); // // bool KilledBySIGHUP(int exit_code) { // return WIFSIGNALED(exit_code) && WTERMSIG(exit_code) == SIGHUP; // } // // ASSERT_EXIT(client.HangUpServer(), KilledBySIGHUP, "Hanging up!"); // // On the regular expressions used in death tests: // // GOOGLETEST_CM0005 DO NOT DELETE // On POSIX-compliant systems (*nix), we use the library, // which uses the POSIX extended regex syntax. // // On other platforms (e.g. Windows or Mac), we only support a simple regex // syntax implemented as part of Google Test. This limited // implementation should be enough most of the time when writing // death tests; though it lacks many features you can find in PCRE // or POSIX extended regex syntax. For example, we don't support // union ("x|y"), grouping ("(xy)"), brackets ("[xy]"), and // repetition count ("x{5,7}"), among others. // // Below is the syntax that we do support. We chose it to be a // subset of both PCRE and POSIX extended regex, so it's easy to // learn wherever you come from. In the following: 'A' denotes a // literal character, period (.), or a single \\ escape sequence; // 'x' and 'y' denote regular expressions; 'm' and 'n' are for // natural numbers. // // c matches any literal character c // \\d matches any decimal digit // \\D matches any character that's not a decimal digit // \\f matches \f // \\n matches \n // \\r matches \r // \\s matches any ASCII whitespace, including \n // \\S matches any character that's not a whitespace // \\t matches \t // \\v matches \v // \\w matches any letter, _, or decimal digit // \\W matches any character that \\w doesn't match // \\c matches any literal character c, which must be a punctuation // . matches any single character except \n // A? matches 0 or 1 occurrences of A // A* matches 0 or many occurrences of A // A+ matches 1 or many occurrences of A // ^ matches the beginning of a string (not that of each line) // $ matches the end of a string (not that of each line) // xy matches x followed by y // // If you accidentally use PCRE or POSIX extended regex features // not implemented by us, you will get a run-time failure. In that // case, please try to rewrite your regular expression within the // above syntax. // // This implementation is *not* meant to be as highly tuned or robust // as a compiled regex library, but should perform well enough for a // death test, which already incurs significant overhead by launching // a child process. // // Known caveats: // // A "threadsafe" style death test obtains the path to the test // program from argv[0] and re-executes it in the sub-process. For // simplicity, the current implementation doesn't search the PATH // when launching the sub-process. This means that the user must // invoke the test program via a path that contains at least one // path separator (e.g. path/to/foo_test and // /absolute/path/to/bar_test are fine, but foo_test is not). This // is rarely a problem as people usually don't put the test binary // directory in PATH. // // FIXME: make thread-safe death tests search the PATH. // Asserts that a given statement causes the program to exit, with an // integer exit status that satisfies predicate, and emitting error output // that matches regex. # define ASSERT_EXIT(statement, predicate, regex) \ GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_FATAL_FAILURE_) // Like ASSERT_EXIT, but continues on to successive tests in the // test case, if any: # define EXPECT_EXIT(statement, predicate, regex) \ GTEST_DEATH_TEST_(statement, predicate, regex, GTEST_NONFATAL_FAILURE_) // Asserts that a given statement causes the program to exit, either by // explicitly exiting with a nonzero exit code or being killed by a // signal, and emitting error output that matches regex. # define ASSERT_DEATH(statement, regex) \ ASSERT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) // Like ASSERT_DEATH, but continues on to successive tests in the // test case, if any: # define EXPECT_DEATH(statement, regex) \ EXPECT_EXIT(statement, ::testing::internal::ExitedUnsuccessfully, regex) // Two predicate classes that can be used in {ASSERT,EXPECT}_EXIT*: // Tests that an exit code describes a normal exit with a given exit code. class GTEST_API_ ExitedWithCode { public: explicit ExitedWithCode(int exit_code); bool operator()(int exit_status) const; private: // No implementation - assignment is unsupported. void operator=(const ExitedWithCode& other); const int exit_code_; }; # if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Tests that an exit code describes an exit due to termination by a // given signal. // GOOGLETEST_CM0006 DO NOT DELETE class GTEST_API_ KilledBySignal { public: explicit KilledBySignal(int signum); bool operator()(int exit_status) const; private: const int signum_; }; # endif // !GTEST_OS_WINDOWS // EXPECT_DEBUG_DEATH asserts that the given statements die in debug mode. // The death testing framework causes this to have interesting semantics, // since the sideeffects of the call are only visible in opt mode, and not // in debug mode. // // In practice, this can be used to test functions that utilize the // LOG(DFATAL) macro using the following style: // // int DieInDebugOr12(int* sideeffect) { // if (sideeffect) { // *sideeffect = 12; // } // LOG(DFATAL) << "death"; // return 12; // } // // TEST(TestCase, TestDieOr12WorksInDgbAndOpt) { // int sideeffect = 0; // // Only asserts in dbg. // EXPECT_DEBUG_DEATH(DieInDebugOr12(&sideeffect), "death"); // // #ifdef NDEBUG // // opt-mode has sideeffect visible. // EXPECT_EQ(12, sideeffect); // #else // // dbg-mode no visible sideeffect. // EXPECT_EQ(0, sideeffect); // #endif // } // // This will assert that DieInDebugReturn12InOpt() crashes in debug // mode, usually due to a DCHECK or LOG(DFATAL), but returns the // appropriate fallback value (12 in this case) in opt mode. If you // need to test that a function has appropriate side-effects in opt // mode, include assertions against the side-effects. A general // pattern for this is: // // EXPECT_DEBUG_DEATH({ // // Side-effects here will have an effect after this statement in // // opt mode, but none in debug mode. // EXPECT_EQ(12, DieInDebugOr12(&sideeffect)); // }, "death"); // # ifdef NDEBUG # define EXPECT_DEBUG_DEATH(statement, regex) \ GTEST_EXECUTE_STATEMENT_(statement, regex) # define ASSERT_DEBUG_DEATH(statement, regex) \ GTEST_EXECUTE_STATEMENT_(statement, regex) # else # define EXPECT_DEBUG_DEATH(statement, regex) \ EXPECT_DEATH(statement, regex) # define ASSERT_DEBUG_DEATH(statement, regex) \ ASSERT_DEATH(statement, regex) # endif // NDEBUG for EXPECT_DEBUG_DEATH #endif // GTEST_HAS_DEATH_TEST // This macro is used for implementing macros such as // EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED on systems where // death tests are not supported. Those macros must compile on such systems // iff EXPECT_DEATH and ASSERT_DEATH compile with the same parameters on // systems that support death tests. This allows one to write such a macro // on a system that does not support death tests and be sure that it will // compile on a death-test supporting system. It is exposed publicly so that // systems that have death-tests with stricter requirements than // GTEST_HAS_DEATH_TEST can write their own equivalent of // EXPECT_DEATH_IF_SUPPORTED and ASSERT_DEATH_IF_SUPPORTED. // // Parameters: // statement - A statement that a macro such as EXPECT_DEATH would test // for program termination. This macro has to make sure this // statement is compiled but not executed, to ensure that // EXPECT_DEATH_IF_SUPPORTED compiles with a certain // parameter iff EXPECT_DEATH compiles with it. // regex - A regex that a macro such as EXPECT_DEATH would use to test // the output of statement. This parameter has to be // compiled but not evaluated by this macro, to ensure that // this macro only accepts expressions that a macro such as // EXPECT_DEATH would accept. // terminator - Must be an empty statement for EXPECT_DEATH_IF_SUPPORTED // and a return statement for ASSERT_DEATH_IF_SUPPORTED. // This ensures that ASSERT_DEATH_IF_SUPPORTED will not // compile inside functions where ASSERT_DEATH doesn't // compile. // // The branch that has an always false condition is used to ensure that // statement and regex are compiled (and thus syntactically correct) but // never executed. The unreachable code macro protects the terminator // statement from generating an 'unreachable code' warning in case // statement unconditionally returns or throws. The Message constructor at // the end allows the syntax of streaming additional messages into the // macro, for compilational compatibility with EXPECT_DEATH/ASSERT_DEATH. # define GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, terminator) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ GTEST_LOG_(WARNING) \ << "Death tests are not supported on this platform.\n" \ << "Statement '" #statement "' cannot be verified."; \ } else if (::testing::internal::AlwaysFalse()) { \ ::testing::internal::RE::PartialMatch(".*", (regex)); \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ terminator; \ } else \ ::testing::Message() // EXPECT_DEATH_IF_SUPPORTED(statement, regex) and // ASSERT_DEATH_IF_SUPPORTED(statement, regex) expand to real death tests if // death tests are supported; otherwise they just issue a warning. This is // useful when you are combining death test assertions with normal test // assertions in one test. #if GTEST_HAS_DEATH_TEST # define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ EXPECT_DEATH(statement, regex) # define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ ASSERT_DEATH(statement, regex) #else # define EXPECT_DEATH_IF_SUPPORTED(statement, regex) \ GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, ) # define ASSERT_DEATH_IF_SUPPORTED(statement, regex) \ GTEST_UNSUPPORTED_DEATH_TEST(statement, regex, return) #endif } // namespace testing #endif // GTEST_INCLUDE_GTEST_GTEST_DEATH_TEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-message.h000066400000000000000000000222101357355204000260150ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the Message class. // // IMPORTANT NOTE: Due to limitation of the C++ language, we have to // leave some internal implementation details in this header file. // They are clearly marked by comments like this: // // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. // // Such code is NOT meant to be used by a user directly, and is subject // to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user // program! // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ #define GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ #include #include "gtest/internal/gtest-port.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) // Ensures that there is at least one operator<< in the global namespace. // See Message& operator<<(...) below for why. void operator<<(const testing::internal::Secret&, int); namespace testing { // The Message class works like an ostream repeater. // // Typical usage: // // 1. You stream a bunch of values to a Message object. // It will remember the text in a stringstream. // 2. Then you stream the Message object to an ostream. // This causes the text in the Message to be streamed // to the ostream. // // For example; // // testing::Message foo; // foo << 1 << " != " << 2; // std::cout << foo; // // will print "1 != 2". // // Message is not intended to be inherited from. In particular, its // destructor is not virtual. // // Note that stringstream behaves differently in gcc and in MSVC. You // can stream a NULL char pointer to it in the former, but not in the // latter (it causes an access violation if you do). The Message // class hides this difference by treating a NULL char pointer as // "(null)". class GTEST_API_ Message { private: // The type of basic IO manipulators (endl, ends, and flush) for // narrow streams. typedef std::ostream& (*BasicNarrowIoManip)(std::ostream&); public: // Constructs an empty Message. Message(); // Copy constructor. Message(const Message& msg) : ss_(new ::std::stringstream) { // NOLINT *ss_ << msg.GetString(); } // Constructs a Message from a C-string. explicit Message(const char* str) : ss_(new ::std::stringstream) { *ss_ << str; } #if GTEST_OS_SYMBIAN // Streams a value (either a pointer or not) to this object. template inline Message& operator <<(const T& value) { StreamHelper(typename internal::is_pointer::type(), value); return *this; } #else // Streams a non-pointer value to this object. template inline Message& operator <<(const T& val) { // Some libraries overload << for STL containers. These // overloads are defined in the global namespace instead of ::std. // // C++'s symbol lookup rule (i.e. Koenig lookup) says that these // overloads are visible in either the std namespace or the global // namespace, but not other namespaces, including the testing // namespace which Google Test's Message class is in. // // To allow STL containers (and other types that has a << operator // defined in the global namespace) to be used in Google Test // assertions, testing::Message must access the custom << operator // from the global namespace. With this using declaration, // overloads of << defined in the global namespace and those // visible via Koenig lookup are both exposed in this function. using ::operator <<; *ss_ << val; return *this; } // Streams a pointer value to this object. // // This function is an overload of the previous one. When you // stream a pointer to a Message, this definition will be used as it // is more specialized. (The C++ Standard, section // [temp.func.order].) If you stream a non-pointer, then the // previous definition will be used. // // The reason for this overload is that streaming a NULL pointer to // ostream is undefined behavior. Depending on the compiler, you // may get "0", "(nil)", "(null)", or an access violation. To // ensure consistent result across compilers, we always treat NULL // as "(null)". template inline Message& operator <<(T* const& pointer) { // NOLINT if (pointer == NULL) { *ss_ << "(null)"; } else { *ss_ << pointer; } return *this; } #endif // GTEST_OS_SYMBIAN // Since the basic IO manipulators are overloaded for both narrow // and wide streams, we have to provide this specialized definition // of operator <<, even though its body is the same as the // templatized version above. Without this definition, streaming // endl or other basic IO manipulators to Message will confuse the // compiler. Message& operator <<(BasicNarrowIoManip val) { *ss_ << val; return *this; } // Instead of 1/0, we want to see true/false for bool values. Message& operator <<(bool b) { return *this << (b ? "true" : "false"); } // These two overloads allow streaming a wide C string to a Message // using the UTF-8 encoding. Message& operator <<(const wchar_t* wide_c_str); Message& operator <<(wchar_t* wide_c_str); #if GTEST_HAS_STD_WSTRING // Converts the given wide string to a narrow string using the UTF-8 // encoding, and streams the result to this Message object. Message& operator <<(const ::std::wstring& wstr); #endif // GTEST_HAS_STD_WSTRING #if GTEST_HAS_GLOBAL_WSTRING // Converts the given wide string to a narrow string using the UTF-8 // encoding, and streams the result to this Message object. Message& operator <<(const ::wstring& wstr); #endif // GTEST_HAS_GLOBAL_WSTRING // Gets the text streamed to this object so far as an std::string. // Each '\0' character in the buffer is replaced with "\\0". // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. std::string GetString() const; private: #if GTEST_OS_SYMBIAN // These are needed as the Nokia Symbian Compiler cannot decide between // const T& and const T* in a function template. The Nokia compiler _can_ // decide between class template specializations for T and T*, so a // tr1::type_traits-like is_pointer works, and we can overload on that. template inline void StreamHelper(internal::true_type /*is_pointer*/, T* pointer) { if (pointer == NULL) { *ss_ << "(null)"; } else { *ss_ << pointer; } } template inline void StreamHelper(internal::false_type /*is_pointer*/, const T& value) { // See the comments in Message& operator <<(const T&) above for why // we need this using statement. using ::operator <<; *ss_ << value; } #endif // GTEST_OS_SYMBIAN // We'll hold the text streamed to this object here. const internal::scoped_ptr< ::std::stringstream> ss_; // We declare (but don't implement) this to prevent the compiler // from implementing the assignment operator. void operator=(const Message&); }; // Streams a Message to an ostream. inline std::ostream& operator <<(std::ostream& os, const Message& sb) { return os << sb.GetString(); } namespace internal { // Converts a streamable value to an std::string. A NULL pointer is // converted to "(null)". When the input value is a ::string, // ::std::string, ::wstring, or ::std::wstring object, each NUL // character in it is replaced with "\\0". template std::string StreamableToString(const T& streamable) { return (Message() << streamable).GetString(); } } // namespace internal } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GTEST_INCLUDE_GTEST_GTEST_MESSAGE_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-param-test.h000066400000000000000000002262051357355204000264600ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gtest-param-test.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Macros and functions for implementing parameterized tests // in Google C++ Testing and Mocking Framework (Google Test) // // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ // Value-parameterized tests allow you to test your code with different // parameters without writing multiple copies of the same test. // // Here is how you use value-parameterized tests: #if 0 // To write value-parameterized tests, first you should define a fixture // class. It is usually derived from testing::TestWithParam (see below for // another inheritance scheme that's sometimes useful in more complicated // class hierarchies), where the type of your parameter values. // TestWithParam is itself derived from testing::Test. T can be any // copyable type. If it's a raw pointer, you are responsible for managing the // lifespan of the pointed values. class FooTest : public ::testing::TestWithParam { // You can implement all the usual class fixture members here. }; // Then, use the TEST_P macro to define as many parameterized tests // for this fixture as you want. The _P suffix is for "parameterized" // or "pattern", whichever you prefer to think. TEST_P(FooTest, DoesBlah) { // Inside a test, access the test parameter with the GetParam() method // of the TestWithParam class: EXPECT_TRUE(foo.Blah(GetParam())); ... } TEST_P(FooTest, HasBlahBlah) { ... } // Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test // case with any set of parameters you want. Google Test defines a number // of functions for generating test parameters. They return what we call // (surprise!) parameter generators. Here is a summary of them, which // are all in the testing namespace: // // // Range(begin, end [, step]) - Yields values {begin, begin+step, // begin+step+step, ...}. The values do not // include end. step defaults to 1. // Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}. // ValuesIn(container) - Yields values from a C-style array, an STL // ValuesIn(begin,end) container, or an iterator range [begin, end). // Bool() - Yields sequence {false, true}. // Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product // for the math savvy) of the values generated // by the N generators. // // For more details, see comments at the definitions of these functions below // in this file. // // The following statement will instantiate tests from the FooTest test case // each with parameter values "meeny", "miny", and "moe". INSTANTIATE_TEST_CASE_P(InstantiationName, FooTest, Values("meeny", "miny", "moe")); // To distinguish different instances of the pattern, (yes, you // can instantiate it more then once) the first argument to the // INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the // actual test case name. Remember to pick unique prefixes for different // instantiations. The tests from the instantiation above will have // these names: // // * InstantiationName/FooTest.DoesBlah/0 for "meeny" // * InstantiationName/FooTest.DoesBlah/1 for "miny" // * InstantiationName/FooTest.DoesBlah/2 for "moe" // * InstantiationName/FooTest.HasBlahBlah/0 for "meeny" // * InstantiationName/FooTest.HasBlahBlah/1 for "miny" // * InstantiationName/FooTest.HasBlahBlah/2 for "moe" // // You can use these names in --gtest_filter. // // This statement will instantiate all tests from FooTest again, each // with parameter values "cat" and "dog": const char* pets[] = {"cat", "dog"}; INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets)); // The tests from the instantiation above will have these names: // // * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat" // * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog" // * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat" // * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog" // // Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests // in the given test case, whether their definitions come before or // AFTER the INSTANTIATE_TEST_CASE_P statement. // // Please also note that generator expressions (including parameters to the // generators) are evaluated in InitGoogleTest(), after main() has started. // This allows the user on one hand, to adjust generator parameters in order // to dynamically determine a set of tests to run and on the other hand, // give the user a chance to inspect the generated tests with Google Test // reflection API before RUN_ALL_TESTS() is executed. // // You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc // for more examples. // // In the future, we plan to publish the API for defining new parameter // generators. But for now this interface remains part of the internal // implementation and is subject to change. // // // A parameterized test fixture must be derived from testing::Test and from // testing::WithParamInterface, where T is the type of the parameter // values. Inheriting from TestWithParam satisfies that requirement because // TestWithParam inherits from both Test and WithParamInterface. In more // complicated hierarchies, however, it is occasionally useful to inherit // separately from Test and WithParamInterface. For example: class BaseTest : public ::testing::Test { // You can inherit all the usual members for a non-parameterized test // fixture here. }; class DerivedTest : public BaseTest, public ::testing::WithParamInterface { // The usual test fixture members go here too. }; TEST_F(BaseTest, HasFoo) { // This is an ordinary non-parameterized test. } TEST_P(DerivedTest, DoesBlah) { // GetParam works just the same here as if you inherit from TestWithParam. EXPECT_TRUE(foo.Blah(GetParam())); } #endif // 0 #include "gtest/internal/gtest-port.h" #if !GTEST_OS_SYMBIAN # include #endif #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-param-util-generated.h" namespace testing { // Functions producing parameter generators. // // Google Test uses these generators to produce parameters for value- // parameterized tests. When a parameterized test case is instantiated // with a particular generator, Google Test creates and runs tests // for each element in the sequence produced by the generator. // // In the following sample, tests from test case FooTest are instantiated // each three times with parameter values 3, 5, and 8: // // class FooTest : public TestWithParam { ... }; // // TEST_P(FooTest, TestThis) { // } // TEST_P(FooTest, TestThat) { // } // INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8)); // // Range() returns generators providing sequences of values in a range. // // Synopsis: // Range(start, end) // - returns a generator producing a sequence of values {start, start+1, // start+2, ..., }. // Range(start, end, step) // - returns a generator producing a sequence of values {start, start+step, // start+step+step, ..., }. // Notes: // * The generated sequences never include end. For example, Range(1, 5) // returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2) // returns a generator producing {1, 3, 5, 7}. // * start and end must have the same type. That type may be any integral or // floating-point type or a user defined type satisfying these conditions: // * It must be assignable (have operator=() defined). // * It must have operator+() (operator+(int-compatible type) for // two-operand version). // * It must have operator<() defined. // Elements in the resulting sequences will also have that type. // * Condition start < end must be satisfied in order for resulting sequences // to contain any elements. // template internal::ParamGenerator Range(T start, T end, IncrementT step) { return internal::ParamGenerator( new internal::RangeGenerator(start, end, step)); } template internal::ParamGenerator Range(T start, T end) { return Range(start, end, 1); } // ValuesIn() function allows generation of tests with parameters coming from // a container. // // Synopsis: // ValuesIn(const T (&array)[N]) // - returns a generator producing sequences with elements from // a C-style array. // ValuesIn(const Container& container) // - returns a generator producing sequences with elements from // an STL-style container. // ValuesIn(Iterator begin, Iterator end) // - returns a generator producing sequences with elements from // a range [begin, end) defined by a pair of STL-style iterators. These // iterators can also be plain C pointers. // // Please note that ValuesIn copies the values from the containers // passed in and keeps them to generate tests in RUN_ALL_TESTS(). // // Examples: // // This instantiates tests from test case StringTest // each with C-string values of "foo", "bar", and "baz": // // const char* strings[] = {"foo", "bar", "baz"}; // INSTANTIATE_TEST_CASE_P(StringSequence, StringTest, ValuesIn(strings)); // // This instantiates tests from test case StlStringTest // each with STL strings with values "a" and "b": // // ::std::vector< ::std::string> GetParameterStrings() { // ::std::vector< ::std::string> v; // v.push_back("a"); // v.push_back("b"); // return v; // } // // INSTANTIATE_TEST_CASE_P(CharSequence, // StlStringTest, // ValuesIn(GetParameterStrings())); // // // This will also instantiate tests from CharTest // each with parameter values 'a' and 'b': // // ::std::list GetParameterChars() { // ::std::list list; // list.push_back('a'); // list.push_back('b'); // return list; // } // ::std::list l = GetParameterChars(); // INSTANTIATE_TEST_CASE_P(CharSequence2, // CharTest, // ValuesIn(l.begin(), l.end())); // template internal::ParamGenerator< typename ::testing::internal::IteratorTraits::value_type> ValuesIn(ForwardIterator begin, ForwardIterator end) { typedef typename ::testing::internal::IteratorTraits ::value_type ParamType; return internal::ParamGenerator( new internal::ValuesInIteratorRangeGenerator(begin, end)); } template internal::ParamGenerator ValuesIn(const T (&array)[N]) { return ValuesIn(array, array + N); } template internal::ParamGenerator ValuesIn( const Container& container) { return ValuesIn(container.begin(), container.end()); } // Values() allows generating tests from explicitly specified list of // parameters. // // Synopsis: // Values(T v1, T v2, ..., T vN) // - returns a generator producing sequences with elements v1, v2, ..., vN. // // For example, this instantiates tests from test case BarTest each // with values "one", "two", and "three": // // INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three")); // // This instantiates tests from test case BazTest each with values 1, 2, 3.5. // The exact type of values will depend on the type of parameter in BazTest. // // INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5)); // // Currently, Values() supports from 1 to 50 parameters. // template internal::ValueArray1 Values(T1 v1) { return internal::ValueArray1(v1); } template internal::ValueArray2 Values(T1 v1, T2 v2) { return internal::ValueArray2(v1, v2); } template internal::ValueArray3 Values(T1 v1, T2 v2, T3 v3) { return internal::ValueArray3(v1, v2, v3); } template internal::ValueArray4 Values(T1 v1, T2 v2, T3 v3, T4 v4) { return internal::ValueArray4(v1, v2, v3, v4); } template internal::ValueArray5 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) { return internal::ValueArray5(v1, v2, v3, v4, v5); } template internal::ValueArray6 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) { return internal::ValueArray6(v1, v2, v3, v4, v5, v6); } template internal::ValueArray7 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) { return internal::ValueArray7(v1, v2, v3, v4, v5, v6, v7); } template internal::ValueArray8 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) { return internal::ValueArray8(v1, v2, v3, v4, v5, v6, v7, v8); } template internal::ValueArray9 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) { return internal::ValueArray9(v1, v2, v3, v4, v5, v6, v7, v8, v9); } template internal::ValueArray10 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) { return internal::ValueArray10(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10); } template internal::ValueArray11 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11) { return internal::ValueArray11(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11); } template internal::ValueArray12 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12) { return internal::ValueArray12(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12); } template internal::ValueArray13 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13) { return internal::ValueArray13(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13); } template internal::ValueArray14 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) { return internal::ValueArray14(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14); } template internal::ValueArray15 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) { return internal::ValueArray15(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15); } template internal::ValueArray16 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) { return internal::ValueArray16(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16); } template internal::ValueArray17 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17) { return internal::ValueArray17(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17); } template internal::ValueArray18 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18) { return internal::ValueArray18(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18); } template internal::ValueArray19 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) { return internal::ValueArray19(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19); } template internal::ValueArray20 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) { return internal::ValueArray20(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20); } template internal::ValueArray21 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) { return internal::ValueArray21(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21); } template internal::ValueArray22 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) { return internal::ValueArray22(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22); } template internal::ValueArray23 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) { return internal::ValueArray23(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23); } template internal::ValueArray24 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) { return internal::ValueArray24(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24); } template internal::ValueArray25 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) { return internal::ValueArray25(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25); } template internal::ValueArray26 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26) { return internal::ValueArray26(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26); } template internal::ValueArray27 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27) { return internal::ValueArray27(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27); } template internal::ValueArray28 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28) { return internal::ValueArray28(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28); } template internal::ValueArray29 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29) { return internal::ValueArray29(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29); } template internal::ValueArray30 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) { return internal::ValueArray30(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30); } template internal::ValueArray31 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) { return internal::ValueArray31(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31); } template internal::ValueArray32 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) { return internal::ValueArray32(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32); } template internal::ValueArray33 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33) { return internal::ValueArray33(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33); } template internal::ValueArray34 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34) { return internal::ValueArray34(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34); } template internal::ValueArray35 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) { return internal::ValueArray35(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35); } template internal::ValueArray36 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) { return internal::ValueArray36(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36); } template internal::ValueArray37 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37) { return internal::ValueArray37(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37); } template internal::ValueArray38 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) { return internal::ValueArray38(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38); } template internal::ValueArray39 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) { return internal::ValueArray39(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39); } template internal::ValueArray40 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) { return internal::ValueArray40(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40); } template internal::ValueArray41 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) { return internal::ValueArray41(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41); } template internal::ValueArray42 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42) { return internal::ValueArray42(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42); } template internal::ValueArray43 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43) { return internal::ValueArray43(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43); } template internal::ValueArray44 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44) { return internal::ValueArray44(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44); } template internal::ValueArray45 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) { return internal::ValueArray45(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45); } template internal::ValueArray46 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) { return internal::ValueArray46(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46); } template internal::ValueArray47 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) { return internal::ValueArray47(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47); } template internal::ValueArray48 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) { return internal::ValueArray48(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48); } template internal::ValueArray49 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49) { return internal::ValueArray49(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49); } template internal::ValueArray50 Values(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) { return internal::ValueArray50(v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15, v16, v17, v18, v19, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29, v30, v31, v32, v33, v34, v35, v36, v37, v38, v39, v40, v41, v42, v43, v44, v45, v46, v47, v48, v49, v50); } // Bool() allows generating tests with parameters in a set of (false, true). // // Synopsis: // Bool() // - returns a generator producing sequences with elements {false, true}. // // It is useful when testing code that depends on Boolean flags. Combinations // of multiple flags can be tested when several Bool()'s are combined using // Combine() function. // // In the following example all tests in the test case FlagDependentTest // will be instantiated twice with parameters false and true. // // class FlagDependentTest : public testing::TestWithParam { // virtual void SetUp() { // external_flag = GetParam(); // } // } // INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool()); // inline internal::ParamGenerator Bool() { return Values(false, true); } # if GTEST_HAS_COMBINE // Combine() allows the user to combine two or more sequences to produce // values of a Cartesian product of those sequences' elements. // // Synopsis: // Combine(gen1, gen2, ..., genN) // - returns a generator producing sequences with elements coming from // the Cartesian product of elements from the sequences generated by // gen1, gen2, ..., genN. The sequence elements will have a type of // tuple where T1, T2, ..., TN are the types // of elements from sequences produces by gen1, gen2, ..., genN. // // Combine can have up to 10 arguments. This number is currently limited // by the maximum number of elements in the tuple implementation used by Google // Test. // // Example: // // This will instantiate tests in test case AnimalTest each one with // the parameter values tuple("cat", BLACK), tuple("cat", WHITE), // tuple("dog", BLACK), and tuple("dog", WHITE): // // enum Color { BLACK, GRAY, WHITE }; // class AnimalTest // : public testing::TestWithParam > {...}; // // TEST_P(AnimalTest, AnimalLooksNice) {...} // // INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest, // Combine(Values("cat", "dog"), // Values(BLACK, WHITE))); // // This will instantiate tests in FlagDependentTest with all variations of two // Boolean flags: // // class FlagDependentTest // : public testing::TestWithParam > { // virtual void SetUp() { // // Assigns external_flag_1 and external_flag_2 values from the tuple. // tie(external_flag_1, external_flag_2) = GetParam(); // } // }; // // TEST_P(FlagDependentTest, TestFeature1) { // // Test your code using external_flag_1 and external_flag_2 here. // } // INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest, // Combine(Bool(), Bool())); // template internal::CartesianProductHolder2 Combine( const Generator1& g1, const Generator2& g2) { return internal::CartesianProductHolder2( g1, g2); } template internal::CartesianProductHolder3 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3) { return internal::CartesianProductHolder3( g1, g2, g3); } template internal::CartesianProductHolder4 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4) { return internal::CartesianProductHolder4( g1, g2, g3, g4); } template internal::CartesianProductHolder5 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5) { return internal::CartesianProductHolder5( g1, g2, g3, g4, g5); } template internal::CartesianProductHolder6 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6) { return internal::CartesianProductHolder6( g1, g2, g3, g4, g5, g6); } template internal::CartesianProductHolder7 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7) { return internal::CartesianProductHolder7( g1, g2, g3, g4, g5, g6, g7); } template internal::CartesianProductHolder8 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8) { return internal::CartesianProductHolder8( g1, g2, g3, g4, g5, g6, g7, g8); } template internal::CartesianProductHolder9 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8, const Generator9& g9) { return internal::CartesianProductHolder9( g1, g2, g3, g4, g5, g6, g7, g8, g9); } template internal::CartesianProductHolder10 Combine( const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8, const Generator9& g9, const Generator10& g10) { return internal::CartesianProductHolder10( g1, g2, g3, g4, g5, g6, g7, g8, g9, g10); } # endif // GTEST_HAS_COMBINE # define TEST_P(test_case_name, test_name) \ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ : public test_case_name { \ public: \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ virtual void TestBody(); \ private: \ static int AddToRegistry() { \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ ::testing::internal::CodeLocation(\ __FILE__, __LINE__))->AddTestPattern(\ GTEST_STRINGIFY_(test_case_name), \ GTEST_STRINGIFY_(test_name), \ new ::testing::internal::TestMetaFactory< \ GTEST_TEST_CLASS_NAME_(\ test_case_name, test_name)>()); \ return 0; \ } \ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ }; \ int GTEST_TEST_CLASS_NAME_(test_case_name, \ test_name)::gtest_registering_dummy_ = \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() // The optional last argument to INSTANTIATE_TEST_CASE_P allows the user // to specify a function or functor that generates custom test name suffixes // based on the test parameters. The function should accept one argument of // type testing::TestParamInfo, and return std::string. // // testing::PrintToStringParamName is a builtin test suffix generator that // returns the value of testing::PrintToString(GetParam()). // // Note: test names must be non-empty, unique, and may only contain ASCII // alphanumeric characters or underscore. Because PrintToString adds quotes // to std::string and C strings, it won't work for these types. # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \ static ::testing::internal::ParamGenerator \ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ static ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ const ::testing::TestParamInfo& info) { \ return ::testing::internal::GetParamNameGen \ (__VA_ARGS__)(info); \ } \ static int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ ::testing::internal::CodeLocation(\ __FILE__, __LINE__))->AddTestCaseInstantiation(\ #prefix, \ >est_##prefix##test_case_name##_EvalGenerator_, \ >est_##prefix##test_case_name##_EvalGenerateName_, \ __FILE__, __LINE__) } // namespace testing #endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-param-test.h.pump000066400000000000000000000466311357355204000274430ustar00rootroot00000000000000$$ -*- mode: c++; -*- $var n = 50 $$ Maximum length of Values arguments we want to support. $var maxtuple = 10 $$ Maximum number of Combine arguments we want to support. // Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Macros and functions for implementing parameterized tests // in Google C++ Testing and Mocking Framework (Google Test) // // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ // Value-parameterized tests allow you to test your code with different // parameters without writing multiple copies of the same test. // // Here is how you use value-parameterized tests: #if 0 // To write value-parameterized tests, first you should define a fixture // class. It is usually derived from testing::TestWithParam (see below for // another inheritance scheme that's sometimes useful in more complicated // class hierarchies), where the type of your parameter values. // TestWithParam is itself derived from testing::Test. T can be any // copyable type. If it's a raw pointer, you are responsible for managing the // lifespan of the pointed values. class FooTest : public ::testing::TestWithParam { // You can implement all the usual class fixture members here. }; // Then, use the TEST_P macro to define as many parameterized tests // for this fixture as you want. The _P suffix is for "parameterized" // or "pattern", whichever you prefer to think. TEST_P(FooTest, DoesBlah) { // Inside a test, access the test parameter with the GetParam() method // of the TestWithParam class: EXPECT_TRUE(foo.Blah(GetParam())); ... } TEST_P(FooTest, HasBlahBlah) { ... } // Finally, you can use INSTANTIATE_TEST_CASE_P to instantiate the test // case with any set of parameters you want. Google Test defines a number // of functions for generating test parameters. They return what we call // (surprise!) parameter generators. Here is a summary of them, which // are all in the testing namespace: // // // Range(begin, end [, step]) - Yields values {begin, begin+step, // begin+step+step, ...}. The values do not // include end. step defaults to 1. // Values(v1, v2, ..., vN) - Yields values {v1, v2, ..., vN}. // ValuesIn(container) - Yields values from a C-style array, an STL // ValuesIn(begin,end) container, or an iterator range [begin, end). // Bool() - Yields sequence {false, true}. // Combine(g1, g2, ..., gN) - Yields all combinations (the Cartesian product // for the math savvy) of the values generated // by the N generators. // // For more details, see comments at the definitions of these functions below // in this file. // // The following statement will instantiate tests from the FooTest test case // each with parameter values "meeny", "miny", and "moe". INSTANTIATE_TEST_CASE_P(InstantiationName, FooTest, Values("meeny", "miny", "moe")); // To distinguish different instances of the pattern, (yes, you // can instantiate it more then once) the first argument to the // INSTANTIATE_TEST_CASE_P macro is a prefix that will be added to the // actual test case name. Remember to pick unique prefixes for different // instantiations. The tests from the instantiation above will have // these names: // // * InstantiationName/FooTest.DoesBlah/0 for "meeny" // * InstantiationName/FooTest.DoesBlah/1 for "miny" // * InstantiationName/FooTest.DoesBlah/2 for "moe" // * InstantiationName/FooTest.HasBlahBlah/0 for "meeny" // * InstantiationName/FooTest.HasBlahBlah/1 for "miny" // * InstantiationName/FooTest.HasBlahBlah/2 for "moe" // // You can use these names in --gtest_filter. // // This statement will instantiate all tests from FooTest again, each // with parameter values "cat" and "dog": const char* pets[] = {"cat", "dog"}; INSTANTIATE_TEST_CASE_P(AnotherInstantiationName, FooTest, ValuesIn(pets)); // The tests from the instantiation above will have these names: // // * AnotherInstantiationName/FooTest.DoesBlah/0 for "cat" // * AnotherInstantiationName/FooTest.DoesBlah/1 for "dog" // * AnotherInstantiationName/FooTest.HasBlahBlah/0 for "cat" // * AnotherInstantiationName/FooTest.HasBlahBlah/1 for "dog" // // Please note that INSTANTIATE_TEST_CASE_P will instantiate all tests // in the given test case, whether their definitions come before or // AFTER the INSTANTIATE_TEST_CASE_P statement. // // Please also note that generator expressions (including parameters to the // generators) are evaluated in InitGoogleTest(), after main() has started. // This allows the user on one hand, to adjust generator parameters in order // to dynamically determine a set of tests to run and on the other hand, // give the user a chance to inspect the generated tests with Google Test // reflection API before RUN_ALL_TESTS() is executed. // // You can see samples/sample7_unittest.cc and samples/sample8_unittest.cc // for more examples. // // In the future, we plan to publish the API for defining new parameter // generators. But for now this interface remains part of the internal // implementation and is subject to change. // // // A parameterized test fixture must be derived from testing::Test and from // testing::WithParamInterface, where T is the type of the parameter // values. Inheriting from TestWithParam satisfies that requirement because // TestWithParam inherits from both Test and WithParamInterface. In more // complicated hierarchies, however, it is occasionally useful to inherit // separately from Test and WithParamInterface. For example: class BaseTest : public ::testing::Test { // You can inherit all the usual members for a non-parameterized test // fixture here. }; class DerivedTest : public BaseTest, public ::testing::WithParamInterface { // The usual test fixture members go here too. }; TEST_F(BaseTest, HasFoo) { // This is an ordinary non-parameterized test. } TEST_P(DerivedTest, DoesBlah) { // GetParam works just the same here as if you inherit from TestWithParam. EXPECT_TRUE(foo.Blah(GetParam())); } #endif // 0 #include "gtest/internal/gtest-port.h" #if !GTEST_OS_SYMBIAN # include #endif #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-param-util-generated.h" namespace testing { // Functions producing parameter generators. // // Google Test uses these generators to produce parameters for value- // parameterized tests. When a parameterized test case is instantiated // with a particular generator, Google Test creates and runs tests // for each element in the sequence produced by the generator. // // In the following sample, tests from test case FooTest are instantiated // each three times with parameter values 3, 5, and 8: // // class FooTest : public TestWithParam { ... }; // // TEST_P(FooTest, TestThis) { // } // TEST_P(FooTest, TestThat) { // } // INSTANTIATE_TEST_CASE_P(TestSequence, FooTest, Values(3, 5, 8)); // // Range() returns generators providing sequences of values in a range. // // Synopsis: // Range(start, end) // - returns a generator producing a sequence of values {start, start+1, // start+2, ..., }. // Range(start, end, step) // - returns a generator producing a sequence of values {start, start+step, // start+step+step, ..., }. // Notes: // * The generated sequences never include end. For example, Range(1, 5) // returns a generator producing a sequence {1, 2, 3, 4}. Range(1, 9, 2) // returns a generator producing {1, 3, 5, 7}. // * start and end must have the same type. That type may be any integral or // floating-point type or a user defined type satisfying these conditions: // * It must be assignable (have operator=() defined). // * It must have operator+() (operator+(int-compatible type) for // two-operand version). // * It must have operator<() defined. // Elements in the resulting sequences will also have that type. // * Condition start < end must be satisfied in order for resulting sequences // to contain any elements. // template internal::ParamGenerator Range(T start, T end, IncrementT step) { return internal::ParamGenerator( new internal::RangeGenerator(start, end, step)); } template internal::ParamGenerator Range(T start, T end) { return Range(start, end, 1); } // ValuesIn() function allows generation of tests with parameters coming from // a container. // // Synopsis: // ValuesIn(const T (&array)[N]) // - returns a generator producing sequences with elements from // a C-style array. // ValuesIn(const Container& container) // - returns a generator producing sequences with elements from // an STL-style container. // ValuesIn(Iterator begin, Iterator end) // - returns a generator producing sequences with elements from // a range [begin, end) defined by a pair of STL-style iterators. These // iterators can also be plain C pointers. // // Please note that ValuesIn copies the values from the containers // passed in and keeps them to generate tests in RUN_ALL_TESTS(). // // Examples: // // This instantiates tests from test case StringTest // each with C-string values of "foo", "bar", and "baz": // // const char* strings[] = {"foo", "bar", "baz"}; // INSTANTIATE_TEST_CASE_P(StringSequence, StringTest, ValuesIn(strings)); // // This instantiates tests from test case StlStringTest // each with STL strings with values "a" and "b": // // ::std::vector< ::std::string> GetParameterStrings() { // ::std::vector< ::std::string> v; // v.push_back("a"); // v.push_back("b"); // return v; // } // // INSTANTIATE_TEST_CASE_P(CharSequence, // StlStringTest, // ValuesIn(GetParameterStrings())); // // // This will also instantiate tests from CharTest // each with parameter values 'a' and 'b': // // ::std::list GetParameterChars() { // ::std::list list; // list.push_back('a'); // list.push_back('b'); // return list; // } // ::std::list l = GetParameterChars(); // INSTANTIATE_TEST_CASE_P(CharSequence2, // CharTest, // ValuesIn(l.begin(), l.end())); // template internal::ParamGenerator< typename ::testing::internal::IteratorTraits::value_type> ValuesIn(ForwardIterator begin, ForwardIterator end) { typedef typename ::testing::internal::IteratorTraits ::value_type ParamType; return internal::ParamGenerator( new internal::ValuesInIteratorRangeGenerator(begin, end)); } template internal::ParamGenerator ValuesIn(const T (&array)[N]) { return ValuesIn(array, array + N); } template internal::ParamGenerator ValuesIn( const Container& container) { return ValuesIn(container.begin(), container.end()); } // Values() allows generating tests from explicitly specified list of // parameters. // // Synopsis: // Values(T v1, T v2, ..., T vN) // - returns a generator producing sequences with elements v1, v2, ..., vN. // // For example, this instantiates tests from test case BarTest each // with values "one", "two", and "three": // // INSTANTIATE_TEST_CASE_P(NumSequence, BarTest, Values("one", "two", "three")); // // This instantiates tests from test case BazTest each with values 1, 2, 3.5. // The exact type of values will depend on the type of parameter in BazTest. // // INSTANTIATE_TEST_CASE_P(FloatingNumbers, BazTest, Values(1, 2, 3.5)); // // Currently, Values() supports from 1 to $n parameters. // $range i 1..n $for i [[ $range j 1..i template <$for j, [[typename T$j]]> internal::ValueArray$i<$for j, [[T$j]]> Values($for j, [[T$j v$j]]) { return internal::ValueArray$i<$for j, [[T$j]]>($for j, [[v$j]]); } ]] // Bool() allows generating tests with parameters in a set of (false, true). // // Synopsis: // Bool() // - returns a generator producing sequences with elements {false, true}. // // It is useful when testing code that depends on Boolean flags. Combinations // of multiple flags can be tested when several Bool()'s are combined using // Combine() function. // // In the following example all tests in the test case FlagDependentTest // will be instantiated twice with parameters false and true. // // class FlagDependentTest : public testing::TestWithParam { // virtual void SetUp() { // external_flag = GetParam(); // } // } // INSTANTIATE_TEST_CASE_P(BoolSequence, FlagDependentTest, Bool()); // inline internal::ParamGenerator Bool() { return Values(false, true); } # if GTEST_HAS_COMBINE // Combine() allows the user to combine two or more sequences to produce // values of a Cartesian product of those sequences' elements. // // Synopsis: // Combine(gen1, gen2, ..., genN) // - returns a generator producing sequences with elements coming from // the Cartesian product of elements from the sequences generated by // gen1, gen2, ..., genN. The sequence elements will have a type of // tuple where T1, T2, ..., TN are the types // of elements from sequences produces by gen1, gen2, ..., genN. // // Combine can have up to $maxtuple arguments. This number is currently limited // by the maximum number of elements in the tuple implementation used by Google // Test. // // Example: // // This will instantiate tests in test case AnimalTest each one with // the parameter values tuple("cat", BLACK), tuple("cat", WHITE), // tuple("dog", BLACK), and tuple("dog", WHITE): // // enum Color { BLACK, GRAY, WHITE }; // class AnimalTest // : public testing::TestWithParam > {...}; // // TEST_P(AnimalTest, AnimalLooksNice) {...} // // INSTANTIATE_TEST_CASE_P(AnimalVariations, AnimalTest, // Combine(Values("cat", "dog"), // Values(BLACK, WHITE))); // // This will instantiate tests in FlagDependentTest with all variations of two // Boolean flags: // // class FlagDependentTest // : public testing::TestWithParam > { // virtual void SetUp() { // // Assigns external_flag_1 and external_flag_2 values from the tuple. // tie(external_flag_1, external_flag_2) = GetParam(); // } // }; // // TEST_P(FlagDependentTest, TestFeature1) { // // Test your code using external_flag_1 and external_flag_2 here. // } // INSTANTIATE_TEST_CASE_P(TwoBoolSequence, FlagDependentTest, // Combine(Bool(), Bool())); // $range i 2..maxtuple $for i [[ $range j 1..i template <$for j, [[typename Generator$j]]> internal::CartesianProductHolder$i<$for j, [[Generator$j]]> Combine( $for j, [[const Generator$j& g$j]]) { return internal::CartesianProductHolder$i<$for j, [[Generator$j]]>( $for j, [[g$j]]); } ]] # endif // GTEST_HAS_COMBINE # define TEST_P(test_case_name, test_name) \ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ : public test_case_name { \ public: \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {} \ virtual void TestBody(); \ private: \ static int AddToRegistry() { \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ ::testing::internal::CodeLocation(\ __FILE__, __LINE__))->AddTestPattern(\ GTEST_STRINGIFY_(test_case_name), \ GTEST_STRINGIFY_(test_name), \ new ::testing::internal::TestMetaFactory< \ GTEST_TEST_CLASS_NAME_(\ test_case_name, test_name)>()); \ return 0; \ } \ static int gtest_registering_dummy_ GTEST_ATTRIBUTE_UNUSED_; \ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)); \ }; \ int GTEST_TEST_CLASS_NAME_(test_case_name, \ test_name)::gtest_registering_dummy_ = \ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::AddToRegistry(); \ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() // The optional last argument to INSTANTIATE_TEST_CASE_P allows the user // to specify a function or functor that generates custom test name suffixes // based on the test parameters. The function should accept one argument of // type testing::TestParamInfo, and return std::string. // // testing::PrintToStringParamName is a builtin test suffix generator that // returns the value of testing::PrintToString(GetParam()). // // Note: test names must be non-empty, unique, and may only contain ASCII // alphanumeric characters or underscore. Because PrintToString adds quotes // to std::string and C strings, it won't work for these types. # define INSTANTIATE_TEST_CASE_P(prefix, test_case_name, generator, ...) \ static ::testing::internal::ParamGenerator \ gtest_##prefix##test_case_name##_EvalGenerator_() { return generator; } \ static ::std::string gtest_##prefix##test_case_name##_EvalGenerateName_( \ const ::testing::TestParamInfo& info) { \ return ::testing::internal::GetParamNameGen \ (__VA_ARGS__)(info); \ } \ static int gtest_##prefix##test_case_name##_dummy_ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::UnitTest::GetInstance()->parameterized_test_registry(). \ GetTestCasePatternHolder(\ #test_case_name, \ ::testing::internal::CodeLocation(\ __FILE__, __LINE__))->AddTestCaseInstantiation(\ #prefix, \ >est_##prefix##test_case_name##_EvalGenerator_, \ >est_##prefix##test_case_name##_EvalGenerateName_, \ __FILE__, __LINE__) } // namespace testing #endif // GTEST_INCLUDE_GTEST_GTEST_PARAM_TEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-printers.h000066400000000000000000001155761357355204000262610ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Test - The Google C++ Testing and Mocking Framework // // This file implements a universal value printer that can print a // value of any type T: // // void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); // // A user can teach this function how to print a class type T by // defining either operator<<() or PrintTo() in the namespace that // defines T. More specifically, the FIRST defined function in the // following list will be used (assuming T is defined in namespace // foo): // // 1. foo::PrintTo(const T&, ostream*) // 2. operator<<(ostream&, const T&) defined in either foo or the // global namespace. // // However if T is an STL-style container then it is printed element-wise // unless foo::PrintTo(const T&, ostream*) is defined. Note that // operator<<() is ignored for container types. // // If none of the above is defined, it will print the debug string of // the value if it is a protocol buffer, or print the raw bytes in the // value otherwise. // // To aid debugging: when T is a reference type, the address of the // value is also printed; when T is a (const) char pointer, both the // pointer value and the NUL-terminated string it points to are // printed. // // We also provide some convenient wrappers: // // // Prints a value to a string. For a (const or not) char // // pointer, the NUL-terminated string (but not the pointer) is // // printed. // std::string ::testing::PrintToString(const T& value); // // // Prints a value tersely: for a reference type, the referenced // // value (but not the address) is printed; for a (const or not) char // // pointer, the NUL-terminated string (but not the pointer) is // // printed. // void ::testing::internal::UniversalTersePrint(const T& value, ostream*); // // // Prints value using the type inferred by the compiler. The difference // // from UniversalTersePrint() is that this function prints both the // // pointer and the NUL-terminated string for a (const or not) char pointer. // void ::testing::internal::UniversalPrint(const T& value, ostream*); // // // Prints the fields of a tuple tersely to a string vector, one // // element for each field. Tuple support must be enabled in // // gtest-port.h. // std::vector UniversalTersePrintTupleFieldsToStrings( // const Tuple& value); // // Known limitation: // // The print primitives print the elements of an STL-style container // using the compiler-inferred type of *iter where iter is a // const_iterator of the container. When const_iterator is an input // iterator but not a forward iterator, this inferred type may not // match value_type, and the print output may be incorrect. In // practice, this is rarely a problem as for most containers // const_iterator is a forward iterator. We'll fix this if there's an // actual need for it. Note that this fix cannot rely on value_type // being defined as many user-defined container types don't have // value_type. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ #define GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ #include // NOLINT #include #include #include #include #include "gtest/internal/gtest-port.h" #include "gtest/internal/gtest-internal.h" #if GTEST_HAS_STD_TUPLE_ # include #endif #if GTEST_HAS_ABSL #include "absl/strings/string_view.h" #include "absl/types/optional.h" #include "absl/types/variant.h" #endif // GTEST_HAS_ABSL namespace testing { // Definitions in the 'internal' and 'internal2' name spaces are // subject to change without notice. DO NOT USE THEM IN USER CODE! namespace internal2 { // Prints the given number of bytes in the given object to the given // ostream. GTEST_API_ void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, ::std::ostream* os); // For selecting which printer to use when a given type has neither << // nor PrintTo(). enum TypeKind { kProtobuf, // a protobuf type kConvertibleToInteger, // a type implicitly convertible to BiggestInt // (e.g. a named or unnamed enum type) #if GTEST_HAS_ABSL kConvertibleToStringView, // a type implicitly convertible to // absl::string_view #endif kOtherType // anything else }; // TypeWithoutFormatter::PrintValue(value, os) is called // by the universal printer to print a value of type T when neither // operator<< nor PrintTo() is defined for T, where kTypeKind is the // "kind" of T as defined by enum TypeKind. template class TypeWithoutFormatter { public: // This default version is called when kTypeKind is kOtherType. static void PrintValue(const T& value, ::std::ostream* os) { PrintBytesInObjectTo(static_cast( reinterpret_cast(&value)), sizeof(value), os); } }; // We print a protobuf using its ShortDebugString() when the string // doesn't exceed this many characters; otherwise we print it using // DebugString() for better readability. const size_t kProtobufOneLinerMaxLength = 50; template class TypeWithoutFormatter { public: static void PrintValue(const T& value, ::std::ostream* os) { std::string pretty_str = value.ShortDebugString(); if (pretty_str.length() > kProtobufOneLinerMaxLength) { pretty_str = "\n" + value.DebugString(); } *os << ("<" + pretty_str + ">"); } }; template class TypeWithoutFormatter { public: // Since T has no << operator or PrintTo() but can be implicitly // converted to BiggestInt, we print it as a BiggestInt. // // Most likely T is an enum type (either named or unnamed), in which // case printing it as an integer is the desired behavior. In case // T is not an enum, printing it as an integer is the best we can do // given that it has no user-defined printer. static void PrintValue(const T& value, ::std::ostream* os) { const internal::BiggestInt kBigInt = value; *os << kBigInt; } }; #if GTEST_HAS_ABSL template class TypeWithoutFormatter { public: // Since T has neither operator<< nor PrintTo() but can be implicitly // converted to absl::string_view, we print it as a absl::string_view. // // Note: the implementation is further below, as it depends on // internal::PrintTo symbol which is defined later in the file. static void PrintValue(const T& value, ::std::ostream* os); }; #endif // Prints the given value to the given ostream. If the value is a // protocol message, its debug string is printed; if it's an enum or // of a type implicitly convertible to BiggestInt, it's printed as an // integer; otherwise the bytes in the value are printed. This is // what UniversalPrinter::Print() does when it knows nothing about // type T and T has neither << operator nor PrintTo(). // // A user can override this behavior for a class type Foo by defining // a << operator in the namespace where Foo is defined. // // We put this operator in namespace 'internal2' instead of 'internal' // to simplify the implementation, as much code in 'internal' needs to // use << in STL, which would conflict with our own << were it defined // in 'internal'. // // Note that this operator<< takes a generic std::basic_ostream type instead of the more restricted std::ostream. If // we define it to take an std::ostream instead, we'll get an // "ambiguous overloads" compiler error when trying to print a type // Foo that supports streaming to std::basic_ostream, as the compiler cannot tell whether // operator<<(std::ostream&, const T&) or // operator<<(std::basic_stream, const Foo&) is more // specific. template ::std::basic_ostream& operator<<( ::std::basic_ostream& os, const T& x) { TypeWithoutFormatter::value ? kProtobuf : internal::ImplicitlyConvertible< const T&, internal::BiggestInt>::value ? kConvertibleToInteger : #if GTEST_HAS_ABSL internal::ImplicitlyConvertible< const T&, absl::string_view>::value ? kConvertibleToStringView : #endif kOtherType)>::PrintValue(x, &os); return os; } } // namespace internal2 } // namespace testing // This namespace MUST NOT BE NESTED IN ::testing, or the name look-up // magic needed for implementing UniversalPrinter won't work. namespace testing_internal { // Used to print a value that is not an STL-style container when the // user doesn't define PrintTo() for it. template void DefaultPrintNonContainerTo(const T& value, ::std::ostream* os) { // With the following statement, during unqualified name lookup, // testing::internal2::operator<< appears as if it was declared in // the nearest enclosing namespace that contains both // ::testing_internal and ::testing::internal2, i.e. the global // namespace. For more details, refer to the C++ Standard section // 7.3.4-1 [namespace.udir]. This allows us to fall back onto // testing::internal2::operator<< in case T doesn't come with a << // operator. // // We cannot write 'using ::testing::internal2::operator<<;', which // gcc 3.3 fails to compile due to a compiler bug. using namespace ::testing::internal2; // NOLINT // Assuming T is defined in namespace foo, in the next statement, // the compiler will consider all of: // // 1. foo::operator<< (thanks to Koenig look-up), // 2. ::operator<< (as the current namespace is enclosed in ::), // 3. testing::internal2::operator<< (thanks to the using statement above). // // The operator<< whose type matches T best will be picked. // // We deliberately allow #2 to be a candidate, as sometimes it's // impossible to define #1 (e.g. when foo is ::std, defining // anything in it is undefined behavior unless you are a compiler // vendor.). *os << value; } } // namespace testing_internal namespace testing { namespace internal { // FormatForComparison::Format(value) formats a // value of type ToPrint that is an operand of a comparison assertion // (e.g. ASSERT_EQ). OtherOperand is the type of the other operand in // the comparison, and is used to help determine the best way to // format the value. In particular, when the value is a C string // (char pointer) and the other operand is an STL string object, we // want to format the C string as a string, since we know it is // compared by value with the string object. If the value is a char // pointer but the other operand is not an STL string object, we don't // know whether the pointer is supposed to point to a NUL-terminated // string, and thus want to print it as a pointer to be safe. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. // The default case. template class FormatForComparison { public: static ::std::string Format(const ToPrint& value) { return ::testing::PrintToString(value); } }; // Array. template class FormatForComparison { public: static ::std::string Format(const ToPrint* value) { return FormatForComparison::Format(value); } }; // By default, print C string as pointers to be safe, as we don't know // whether they actually point to a NUL-terminated string. #define GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(CharType) \ template \ class FormatForComparison { \ public: \ static ::std::string Format(CharType* value) { \ return ::testing::PrintToString(static_cast(value)); \ } \ } GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(char); GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const char); GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(wchar_t); GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_(const wchar_t); #undef GTEST_IMPL_FORMAT_C_STRING_AS_POINTER_ // If a C string is compared with an STL string object, we know it's meant // to point to a NUL-terminated string, and thus can print it as a string. #define GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(CharType, OtherStringType) \ template <> \ class FormatForComparison { \ public: \ static ::std::string Format(CharType* value) { \ return ::testing::PrintToString(value); \ } \ } GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::std::string); GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::std::string); #if GTEST_HAS_GLOBAL_STRING GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(char, ::string); GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const char, ::string); #endif #if GTEST_HAS_GLOBAL_WSTRING GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::wstring); GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::wstring); #endif #if GTEST_HAS_STD_WSTRING GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(wchar_t, ::std::wstring); GTEST_IMPL_FORMAT_C_STRING_AS_STRING_(const wchar_t, ::std::wstring); #endif #undef GTEST_IMPL_FORMAT_C_STRING_AS_STRING_ // Formats a comparison assertion (e.g. ASSERT_EQ, EXPECT_LT, and etc) // operand to be used in a failure message. The type (but not value) // of the other operand may affect the format. This allows us to // print a char* as a raw pointer when it is compared against another // char* or void*, and print it as a C string when it is compared // against an std::string object, for example. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. template std::string FormatForComparisonFailureMessage( const T1& value, const T2& /* other_operand */) { return FormatForComparison::Format(value); } // UniversalPrinter::Print(value, ostream_ptr) prints the given // value to the given ostream. The caller must ensure that // 'ostream_ptr' is not NULL, or the behavior is undefined. // // We define UniversalPrinter as a class template (as opposed to a // function template), as we need to partially specialize it for // reference types, which cannot be done with function templates. template class UniversalPrinter; template void UniversalPrint(const T& value, ::std::ostream* os); enum DefaultPrinterType { kPrintContainer, kPrintPointer, kPrintFunctionPointer, kPrintOther, }; template struct WrapPrinterType {}; // Used to print an STL-style container when the user doesn't define // a PrintTo() for it. template void DefaultPrintTo(WrapPrinterType /* dummy */, const C& container, ::std::ostream* os) { const size_t kMaxCount = 32; // The maximum number of elements to print. *os << '{'; size_t count = 0; for (typename C::const_iterator it = container.begin(); it != container.end(); ++it, ++count) { if (count > 0) { *os << ','; if (count == kMaxCount) { // Enough has been printed. *os << " ..."; break; } } *os << ' '; // We cannot call PrintTo(*it, os) here as PrintTo() doesn't // handle *it being a native array. internal::UniversalPrint(*it, os); } if (count > 0) { *os << ' '; } *os << '}'; } // Used to print a pointer that is neither a char pointer nor a member // pointer, when the user doesn't define PrintTo() for it. (A member // variable pointer or member function pointer doesn't really point to // a location in the address space. Their representation is // implementation-defined. Therefore they will be printed as raw // bytes.) template void DefaultPrintTo(WrapPrinterType /* dummy */, T* p, ::std::ostream* os) { if (p == NULL) { *os << "NULL"; } else { // T is not a function type. We just call << to print p, // relying on ADL to pick up user-defined << for their pointer // types, if any. *os << p; } } template void DefaultPrintTo(WrapPrinterType /* dummy */, T* p, ::std::ostream* os) { if (p == NULL) { *os << "NULL"; } else { // T is a function type, so '*os << p' doesn't do what we want // (it just prints p as bool). We want to print p as a const // void*. *os << reinterpret_cast(p); } } // Used to print a non-container, non-pointer value when the user // doesn't define PrintTo() for it. template void DefaultPrintTo(WrapPrinterType /* dummy */, const T& value, ::std::ostream* os) { ::testing_internal::DefaultPrintNonContainerTo(value, os); } // Prints the given value using the << operator if it has one; // otherwise prints the bytes in it. This is what // UniversalPrinter::Print() does when PrintTo() is not specialized // or overloaded for type T. // // A user can override this behavior for a class type Foo by defining // an overload of PrintTo() in the namespace where Foo is defined. We // give the user this option as sometimes defining a << operator for // Foo is not desirable (e.g. the coding style may prevent doing it, // or there is already a << operator but it doesn't do what the user // wants). template void PrintTo(const T& value, ::std::ostream* os) { // DefaultPrintTo() is overloaded. The type of its first argument // determines which version will be picked. // // Note that we check for container types here, prior to we check // for protocol message types in our operator<<. The rationale is: // // For protocol messages, we want to give people a chance to // override Google Mock's format by defining a PrintTo() or // operator<<. For STL containers, other formats can be // incompatible with Google Mock's format for the container // elements; therefore we check for container types here to ensure // that our format is used. // // Note that MSVC and clang-cl do allow an implicit conversion from // pointer-to-function to pointer-to-object, but clang-cl warns on it. // So don't use ImplicitlyConvertible if it can be helped since it will // cause this warning, and use a separate overload of DefaultPrintTo for // function pointers so that the `*os << p` in the object pointer overload // doesn't cause that warning either. DefaultPrintTo( WrapPrinterType < (sizeof(IsContainerTest(0)) == sizeof(IsContainer)) && !IsRecursiveContainer::value ? kPrintContainer : !is_pointer::value ? kPrintOther #if GTEST_LANG_CXX11 : std::is_function::type>::value #else : !internal::ImplicitlyConvertible::value #endif ? kPrintFunctionPointer : kPrintPointer > (), value, os); } // The following list of PrintTo() overloads tells // UniversalPrinter::Print() how to print standard types (built-in // types, strings, plain arrays, and pointers). // Overloads for various char types. GTEST_API_ void PrintTo(unsigned char c, ::std::ostream* os); GTEST_API_ void PrintTo(signed char c, ::std::ostream* os); inline void PrintTo(char c, ::std::ostream* os) { // When printing a plain char, we always treat it as unsigned. This // way, the output won't be affected by whether the compiler thinks // char is signed or not. PrintTo(static_cast(c), os); } // Overloads for other simple built-in types. inline void PrintTo(bool x, ::std::ostream* os) { *os << (x ? "true" : "false"); } // Overload for wchar_t type. // Prints a wchar_t as a symbol if it is printable or as its internal // code otherwise and also as its decimal code (except for L'\0'). // The L'\0' char is printed as "L'\\0'". The decimal code is printed // as signed integer when wchar_t is implemented by the compiler // as a signed type and is printed as an unsigned integer when wchar_t // is implemented as an unsigned type. GTEST_API_ void PrintTo(wchar_t wc, ::std::ostream* os); // Overloads for C strings. GTEST_API_ void PrintTo(const char* s, ::std::ostream* os); inline void PrintTo(char* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } // signed/unsigned char is often used for representing binary data, so // we print pointers to it as void* to be safe. inline void PrintTo(const signed char* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } inline void PrintTo(signed char* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } inline void PrintTo(const unsigned char* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } inline void PrintTo(unsigned char* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } // MSVC can be configured to define wchar_t as a typedef of unsigned // short. It defines _NATIVE_WCHAR_T_DEFINED when wchar_t is a native // type. When wchar_t is a typedef, defining an overload for const // wchar_t* would cause unsigned short* be printed as a wide string, // possibly causing invalid memory accesses. #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) // Overloads for wide C strings GTEST_API_ void PrintTo(const wchar_t* s, ::std::ostream* os); inline void PrintTo(wchar_t* s, ::std::ostream* os) { PrintTo(ImplicitCast_(s), os); } #endif // Overload for C arrays. Multi-dimensional arrays are printed // properly. // Prints the given number of elements in an array, without printing // the curly braces. template void PrintRawArrayTo(const T a[], size_t count, ::std::ostream* os) { UniversalPrint(a[0], os); for (size_t i = 1; i != count; i++) { *os << ", "; UniversalPrint(a[i], os); } } // Overloads for ::string and ::std::string. #if GTEST_HAS_GLOBAL_STRING GTEST_API_ void PrintStringTo(const ::string&s, ::std::ostream* os); inline void PrintTo(const ::string& s, ::std::ostream* os) { PrintStringTo(s, os); } #endif // GTEST_HAS_GLOBAL_STRING GTEST_API_ void PrintStringTo(const ::std::string&s, ::std::ostream* os); inline void PrintTo(const ::std::string& s, ::std::ostream* os) { PrintStringTo(s, os); } // Overloads for ::wstring and ::std::wstring. #if GTEST_HAS_GLOBAL_WSTRING GTEST_API_ void PrintWideStringTo(const ::wstring&s, ::std::ostream* os); inline void PrintTo(const ::wstring& s, ::std::ostream* os) { PrintWideStringTo(s, os); } #endif // GTEST_HAS_GLOBAL_WSTRING #if GTEST_HAS_STD_WSTRING GTEST_API_ void PrintWideStringTo(const ::std::wstring&s, ::std::ostream* os); inline void PrintTo(const ::std::wstring& s, ::std::ostream* os) { PrintWideStringTo(s, os); } #endif // GTEST_HAS_STD_WSTRING #if GTEST_HAS_ABSL // Overload for absl::string_view. inline void PrintTo(absl::string_view sp, ::std::ostream* os) { PrintTo(::std::string(sp), os); } #endif // GTEST_HAS_ABSL #if GTEST_LANG_CXX11 inline void PrintTo(std::nullptr_t, ::std::ostream* os) { *os << "(nullptr)"; } #endif // GTEST_LANG_CXX11 #if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_ // Helper function for printing a tuple. T must be instantiated with // a tuple type. template void PrintTupleTo(const T& t, ::std::ostream* os); #endif // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_ #if GTEST_HAS_TR1_TUPLE // Overload for ::std::tr1::tuple. Needed for printing function arguments, // which are packed as tuples. // Overloaded PrintTo() for tuples of various arities. We support // tuples of up-to 10 fields. The following implementation works // regardless of whether tr1::tuple is implemented using the // non-standard variadic template feature or not. inline void PrintTo(const ::std::tr1::tuple<>& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo(const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } template void PrintTo( const ::std::tr1::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } #endif // GTEST_HAS_TR1_TUPLE #if GTEST_HAS_STD_TUPLE_ template void PrintTo(const ::std::tuple& t, ::std::ostream* os) { PrintTupleTo(t, os); } #endif // GTEST_HAS_STD_TUPLE_ // Overload for std::pair. template void PrintTo(const ::std::pair& value, ::std::ostream* os) { *os << '('; // We cannot use UniversalPrint(value.first, os) here, as T1 may be // a reference type. The same for printing value.second. UniversalPrinter::Print(value.first, os); *os << ", "; UniversalPrinter::Print(value.second, os); *os << ')'; } // Implements printing a non-reference type T by letting the compiler // pick the right overload of PrintTo() for T. template class UniversalPrinter { public: // MSVC warns about adding const to a function type, so we want to // disable the warning. GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180) // Note: we deliberately don't call this PrintTo(), as that name // conflicts with ::testing::internal::PrintTo in the body of the // function. static void Print(const T& value, ::std::ostream* os) { // By default, ::testing::internal::PrintTo() is used for printing // the value. // // Thanks to Koenig look-up, if T is a class and has its own // PrintTo() function defined in its namespace, that function will // be visible here. Since it is more specific than the generic ones // in ::testing::internal, it will be picked by the compiler in the // following statement - exactly what we want. PrintTo(value, os); } GTEST_DISABLE_MSC_WARNINGS_POP_() }; #if GTEST_HAS_ABSL // Printer for absl::optional template class UniversalPrinter<::absl::optional> { public: static void Print(const ::absl::optional& value, ::std::ostream* os) { *os << '('; if (!value) { *os << "nullopt"; } else { UniversalPrint(*value, os); } *os << ')'; } }; // Printer for absl::variant template class UniversalPrinter<::absl::variant> { public: static void Print(const ::absl::variant& value, ::std::ostream* os) { *os << '('; absl::visit(Visitor{os}, value); *os << ')'; } private: struct Visitor { template void operator()(const U& u) const { *os << "'" << GetTypeName() << "' with value "; UniversalPrint(u, os); } ::std::ostream* os; }; }; #endif // GTEST_HAS_ABSL // UniversalPrintArray(begin, len, os) prints an array of 'len' // elements, starting at address 'begin'. template void UniversalPrintArray(const T* begin, size_t len, ::std::ostream* os) { if (len == 0) { *os << "{}"; } else { *os << "{ "; const size_t kThreshold = 18; const size_t kChunkSize = 8; // If the array has more than kThreshold elements, we'll have to // omit some details by printing only the first and the last // kChunkSize elements. // FIXME: let the user control the threshold using a flag. if (len <= kThreshold) { PrintRawArrayTo(begin, len, os); } else { PrintRawArrayTo(begin, kChunkSize, os); *os << ", ..., "; PrintRawArrayTo(begin + len - kChunkSize, kChunkSize, os); } *os << " }"; } } // This overload prints a (const) char array compactly. GTEST_API_ void UniversalPrintArray( const char* begin, size_t len, ::std::ostream* os); // This overload prints a (const) wchar_t array compactly. GTEST_API_ void UniversalPrintArray( const wchar_t* begin, size_t len, ::std::ostream* os); // Implements printing an array type T[N]. template class UniversalPrinter { public: // Prints the given array, omitting some elements when there are too // many. static void Print(const T (&a)[N], ::std::ostream* os) { UniversalPrintArray(a, N, os); } }; // Implements printing a reference type T&. template class UniversalPrinter { public: // MSVC warns about adding const to a function type, so we want to // disable the warning. GTEST_DISABLE_MSC_WARNINGS_PUSH_(4180) static void Print(const T& value, ::std::ostream* os) { // Prints the address of the value. We use reinterpret_cast here // as static_cast doesn't compile when T is a function type. *os << "@" << reinterpret_cast(&value) << " "; // Then prints the value itself. UniversalPrint(value, os); } GTEST_DISABLE_MSC_WARNINGS_POP_() }; // Prints a value tersely: for a reference type, the referenced value // (but not the address) is printed; for a (const) char pointer, the // NUL-terminated string (but not the pointer) is printed. template class UniversalTersePrinter { public: static void Print(const T& value, ::std::ostream* os) { UniversalPrint(value, os); } }; template class UniversalTersePrinter { public: static void Print(const T& value, ::std::ostream* os) { UniversalPrint(value, os); } }; template class UniversalTersePrinter { public: static void Print(const T (&value)[N], ::std::ostream* os) { UniversalPrinter::Print(value, os); } }; template <> class UniversalTersePrinter { public: static void Print(const char* str, ::std::ostream* os) { if (str == NULL) { *os << "NULL"; } else { UniversalPrint(std::string(str), os); } } }; template <> class UniversalTersePrinter { public: static void Print(char* str, ::std::ostream* os) { UniversalTersePrinter::Print(str, os); } }; #if GTEST_HAS_STD_WSTRING template <> class UniversalTersePrinter { public: static void Print(const wchar_t* str, ::std::ostream* os) { if (str == NULL) { *os << "NULL"; } else { UniversalPrint(::std::wstring(str), os); } } }; #endif template <> class UniversalTersePrinter { public: static void Print(wchar_t* str, ::std::ostream* os) { UniversalTersePrinter::Print(str, os); } }; template void UniversalTersePrint(const T& value, ::std::ostream* os) { UniversalTersePrinter::Print(value, os); } // Prints a value using the type inferred by the compiler. The // difference between this and UniversalTersePrint() is that for a // (const) char pointer, this prints both the pointer and the // NUL-terminated string. template void UniversalPrint(const T& value, ::std::ostream* os) { // A workarond for the bug in VC++ 7.1 that prevents us from instantiating // UniversalPrinter with T directly. typedef T T1; UniversalPrinter::Print(value, os); } typedef ::std::vector< ::std::string> Strings; // TuplePolicy must provide: // - tuple_size // size of tuple TupleT. // - get(const TupleT& t) // static function extracting element I of tuple TupleT. // - tuple_element::type // type of element I of tuple TupleT. template struct TuplePolicy; #if GTEST_HAS_TR1_TUPLE template struct TuplePolicy { typedef TupleT Tuple; static const size_t tuple_size = ::std::tr1::tuple_size::value; template struct tuple_element : ::std::tr1::tuple_element(I), Tuple> { }; template static typename AddReference(I), Tuple>::type>::type get(const Tuple& tuple) { return ::std::tr1::get(tuple); } }; template const size_t TuplePolicy::tuple_size; #endif // GTEST_HAS_TR1_TUPLE #if GTEST_HAS_STD_TUPLE_ template struct TuplePolicy< ::std::tuple > { typedef ::std::tuple Tuple; static const size_t tuple_size = ::std::tuple_size::value; template struct tuple_element : ::std::tuple_element {}; template static const typename ::std::tuple_element::type& get( const Tuple& tuple) { return ::std::get(tuple); } }; template const size_t TuplePolicy< ::std::tuple >::tuple_size; #endif // GTEST_HAS_STD_TUPLE_ #if GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_ // This helper template allows PrintTo() for tuples and // UniversalTersePrintTupleFieldsToStrings() to be defined by // induction on the number of tuple fields. The idea is that // TuplePrefixPrinter::PrintPrefixTo(t, os) prints the first N // fields in tuple t, and can be defined in terms of // TuplePrefixPrinter. // // The inductive case. template struct TuplePrefixPrinter { // Prints the first N fields of a tuple. template static void PrintPrefixTo(const Tuple& t, ::std::ostream* os) { TuplePrefixPrinter::PrintPrefixTo(t, os); GTEST_INTENTIONAL_CONST_COND_PUSH_() if (N > 1) { GTEST_INTENTIONAL_CONST_COND_POP_() *os << ", "; } UniversalPrinter< typename TuplePolicy::template tuple_element::type> ::Print(TuplePolicy::template get(t), os); } // Tersely prints the first N fields of a tuple to a string vector, // one element for each field. template static void TersePrintPrefixToStrings(const Tuple& t, Strings* strings) { TuplePrefixPrinter::TersePrintPrefixToStrings(t, strings); ::std::stringstream ss; UniversalTersePrint(TuplePolicy::template get(t), &ss); strings->push_back(ss.str()); } }; // Base case. template <> struct TuplePrefixPrinter<0> { template static void PrintPrefixTo(const Tuple&, ::std::ostream*) {} template static void TersePrintPrefixToStrings(const Tuple&, Strings*) {} }; // Helper function for printing a tuple. // Tuple must be either std::tr1::tuple or std::tuple type. template void PrintTupleTo(const Tuple& t, ::std::ostream* os) { *os << "("; TuplePrefixPrinter::tuple_size>::PrintPrefixTo(t, os); *os << ")"; } // Prints the fields of a tuple tersely to a string vector, one // element for each field. See the comment before // UniversalTersePrint() for how we define "tersely". template Strings UniversalTersePrintTupleFieldsToStrings(const Tuple& value) { Strings result; TuplePrefixPrinter::tuple_size>:: TersePrintPrefixToStrings(value, &result); return result; } #endif // GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_ } // namespace internal #if GTEST_HAS_ABSL namespace internal2 { template void TypeWithoutFormatter::PrintValue( const T& value, ::std::ostream* os) { internal::PrintTo(absl::string_view(value), os); } } // namespace internal2 #endif template ::std::string PrintToString(const T& value) { ::std::stringstream ss; internal::UniversalTersePrinter::Print(value, &ss); return ss.str(); } } // namespace testing // Include any custom printer added by the local installation. // We must include this header at the end to make sure it can use the // declarations from this file. #include "gtest/internal/custom/gtest-printers.h" #endif // GTEST_INCLUDE_GTEST_GTEST_PRINTERS_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-spi.h000066400000000000000000000235561357355204000252020ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Utilities for testing Google Test itself and code that uses Google Test // (e.g. frameworks built on top of Google Test). // GOOGLETEST_CM0004 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_SPI_H_ #define GTEST_INCLUDE_GTEST_GTEST_SPI_H_ #include "gtest/gtest.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) namespace testing { // This helper class can be used to mock out Google Test failure reporting // so that we can test Google Test or code that builds on Google Test. // // An object of this class appends a TestPartResult object to the // TestPartResultArray object given in the constructor whenever a Google Test // failure is reported. It can either intercept only failures that are // generated in the same thread that created this object or it can intercept // all generated failures. The scope of this mock object can be controlled with // the second argument to the two arguments constructor. class GTEST_API_ ScopedFakeTestPartResultReporter : public TestPartResultReporterInterface { public: // The two possible mocking modes of this object. enum InterceptMode { INTERCEPT_ONLY_CURRENT_THREAD, // Intercepts only thread local failures. INTERCEPT_ALL_THREADS // Intercepts all failures. }; // The c'tor sets this object as the test part result reporter used // by Google Test. The 'result' parameter specifies where to report the // results. This reporter will only catch failures generated in the current // thread. DEPRECATED explicit ScopedFakeTestPartResultReporter(TestPartResultArray* result); // Same as above, but you can choose the interception scope of this object. ScopedFakeTestPartResultReporter(InterceptMode intercept_mode, TestPartResultArray* result); // The d'tor restores the previous test part result reporter. virtual ~ScopedFakeTestPartResultReporter(); // Appends the TestPartResult object to the TestPartResultArray // received in the constructor. // // This method is from the TestPartResultReporterInterface // interface. virtual void ReportTestPartResult(const TestPartResult& result); private: void Init(); const InterceptMode intercept_mode_; TestPartResultReporterInterface* old_reporter_; TestPartResultArray* const result_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedFakeTestPartResultReporter); }; namespace internal { // A helper class for implementing EXPECT_FATAL_FAILURE() and // EXPECT_NONFATAL_FAILURE(). Its destructor verifies that the given // TestPartResultArray contains exactly one failure that has the given // type and contains the given substring. If that's not the case, a // non-fatal failure will be generated. class GTEST_API_ SingleFailureChecker { public: // The constructor remembers the arguments. SingleFailureChecker(const TestPartResultArray* results, TestPartResult::Type type, const std::string& substr); ~SingleFailureChecker(); private: const TestPartResultArray* const results_; const TestPartResult::Type type_; const std::string substr_; GTEST_DISALLOW_COPY_AND_ASSIGN_(SingleFailureChecker); }; } // namespace internal } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 // A set of macros for testing Google Test assertions or code that's expected // to generate Google Test fatal failures. It verifies that the given // statement will cause exactly one fatal Google Test failure with 'substr' // being part of the failure message. // // There are two different versions of this macro. EXPECT_FATAL_FAILURE only // affects and considers failures generated in the current thread and // EXPECT_FATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. // // The verification of the assertion is done correctly even when the statement // throws an exception or aborts the current function. // // Known restrictions: // - 'statement' cannot reference local non-static variables or // non-static members of the current object. // - 'statement' cannot return a value. // - You cannot stream a failure message to this macro. // // Note that even though the implementations of the following two // macros are much alike, we cannot refactor them to use a common // helper macro, due to some peculiarity in how the preprocessor // works. The AcceptsMacroThatExpandsToUnprotectedComma test in // gtest_unittest.cc will fail to compile if we do that. #define EXPECT_FATAL_FAILURE(statement, substr) \ do { \ class GTestExpectFatalFailureHelper {\ public:\ static void Execute() { statement; }\ };\ ::testing::TestPartResultArray gtest_failures;\ ::testing::internal::SingleFailureChecker gtest_checker(\ >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ {\ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ ::testing::ScopedFakeTestPartResultReporter:: \ INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ GTestExpectFatalFailureHelper::Execute();\ }\ } while (::testing::internal::AlwaysFalse()) #define EXPECT_FATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ do { \ class GTestExpectFatalFailureHelper {\ public:\ static void Execute() { statement; }\ };\ ::testing::TestPartResultArray gtest_failures;\ ::testing::internal::SingleFailureChecker gtest_checker(\ >est_failures, ::testing::TestPartResult::kFatalFailure, (substr));\ {\ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ ::testing::ScopedFakeTestPartResultReporter:: \ INTERCEPT_ALL_THREADS, >est_failures);\ GTestExpectFatalFailureHelper::Execute();\ }\ } while (::testing::internal::AlwaysFalse()) // A macro for testing Google Test assertions or code that's expected to // generate Google Test non-fatal failures. It asserts that the given // statement will cause exactly one non-fatal Google Test failure with 'substr' // being part of the failure message. // // There are two different versions of this macro. EXPECT_NONFATAL_FAILURE only // affects and considers failures generated in the current thread and // EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS does the same but for all threads. // // 'statement' is allowed to reference local variables and members of // the current object. // // The verification of the assertion is done correctly even when the statement // throws an exception or aborts the current function. // // Known restrictions: // - You cannot stream a failure message to this macro. // // Note that even though the implementations of the following two // macros are much alike, we cannot refactor them to use a common // helper macro, due to some peculiarity in how the preprocessor // works. If we do that, the code won't compile when the user gives // EXPECT_NONFATAL_FAILURE() a statement that contains a macro that // expands to code containing an unprotected comma. The // AcceptsMacroThatExpandsToUnprotectedComma test in gtest_unittest.cc // catches that. // // For the same reason, we have to write // if (::testing::internal::AlwaysTrue()) { statement; } // instead of // GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) // to avoid an MSVC warning on unreachable code. #define EXPECT_NONFATAL_FAILURE(statement, substr) \ do {\ ::testing::TestPartResultArray gtest_failures;\ ::testing::internal::SingleFailureChecker gtest_checker(\ >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ (substr));\ {\ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ ::testing::ScopedFakeTestPartResultReporter:: \ INTERCEPT_ONLY_CURRENT_THREAD, >est_failures);\ if (::testing::internal::AlwaysTrue()) { statement; }\ }\ } while (::testing::internal::AlwaysFalse()) #define EXPECT_NONFATAL_FAILURE_ON_ALL_THREADS(statement, substr) \ do {\ ::testing::TestPartResultArray gtest_failures;\ ::testing::internal::SingleFailureChecker gtest_checker(\ >est_failures, ::testing::TestPartResult::kNonFatalFailure, \ (substr));\ {\ ::testing::ScopedFakeTestPartResultReporter gtest_reporter(\ ::testing::ScopedFakeTestPartResultReporter::INTERCEPT_ALL_THREADS, \ >est_failures);\ if (::testing::internal::AlwaysTrue()) { statement; }\ }\ } while (::testing::internal::AlwaysFalse()) #endif // GTEST_INCLUDE_GTEST_GTEST_SPI_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-test-part.h000066400000000000000000000150161357355204000263220ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ #define GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ #include #include #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-string.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) namespace testing { // A copyable object representing the result of a test part (i.e. an // assertion or an explicit FAIL(), ADD_FAILURE(), or SUCCESS()). // // Don't inherit from TestPartResult as its destructor is not virtual. class GTEST_API_ TestPartResult { public: // The possible outcomes of a test part (i.e. an assertion or an // explicit SUCCEED(), FAIL(), or ADD_FAILURE()). enum Type { kSuccess, // Succeeded. kNonFatalFailure, // Failed but the test can continue. kFatalFailure // Failed and the test should be terminated. }; // C'tor. TestPartResult does NOT have a default constructor. // Always use this constructor (with parameters) to create a // TestPartResult object. TestPartResult(Type a_type, const char* a_file_name, int a_line_number, const char* a_message) : type_(a_type), file_name_(a_file_name == NULL ? "" : a_file_name), line_number_(a_line_number), summary_(ExtractSummary(a_message)), message_(a_message) { } // Gets the outcome of the test part. Type type() const { return type_; } // Gets the name of the source file where the test part took place, or // NULL if it's unknown. const char* file_name() const { return file_name_.empty() ? NULL : file_name_.c_str(); } // Gets the line in the source file where the test part took place, // or -1 if it's unknown. int line_number() const { return line_number_; } // Gets the summary of the failure message. const char* summary() const { return summary_.c_str(); } // Gets the message associated with the test part. const char* message() const { return message_.c_str(); } // Returns true iff the test part passed. bool passed() const { return type_ == kSuccess; } // Returns true iff the test part failed. bool failed() const { return type_ != kSuccess; } // Returns true iff the test part non-fatally failed. bool nonfatally_failed() const { return type_ == kNonFatalFailure; } // Returns true iff the test part fatally failed. bool fatally_failed() const { return type_ == kFatalFailure; } private: Type type_; // Gets the summary of the failure message by omitting the stack // trace in it. static std::string ExtractSummary(const char* message); // The name of the source file where the test part took place, or // "" if the source file is unknown. std::string file_name_; // The line in the source file where the test part took place, or -1 // if the line number is unknown. int line_number_; std::string summary_; // The test failure summary. std::string message_; // The test failure message. }; // Prints a TestPartResult object. std::ostream& operator<<(std::ostream& os, const TestPartResult& result); // An array of TestPartResult objects. // // Don't inherit from TestPartResultArray as its destructor is not // virtual. class GTEST_API_ TestPartResultArray { public: TestPartResultArray() {} // Appends the given TestPartResult to the array. void Append(const TestPartResult& result); // Returns the TestPartResult at the given index (0-based). const TestPartResult& GetTestPartResult(int index) const; // Returns the number of TestPartResult objects in the array. int size() const; private: std::vector array_; GTEST_DISALLOW_COPY_AND_ASSIGN_(TestPartResultArray); }; // This interface knows how to report a test part result. class GTEST_API_ TestPartResultReporterInterface { public: virtual ~TestPartResultReporterInterface() {} virtual void ReportTestPartResult(const TestPartResult& result) = 0; }; namespace internal { // This helper class is used by {ASSERT|EXPECT}_NO_FATAL_FAILURE to check if a // statement generates new fatal failures. To do so it registers itself as the // current test part result reporter. Besides checking if fatal failures were // reported, it only delegates the reporting to the former result reporter. // The original result reporter is restored in the destructor. // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. class GTEST_API_ HasNewFatalFailureHelper : public TestPartResultReporterInterface { public: HasNewFatalFailureHelper(); virtual ~HasNewFatalFailureHelper(); virtual void ReportTestPartResult(const TestPartResult& result); bool has_new_fatal_failure() const { return has_new_fatal_failure_; } private: bool has_new_fatal_failure_; TestPartResultReporterInterface* original_reporter_; GTEST_DISALLOW_COPY_AND_ASSIGN_(HasNewFatalFailureHelper); }; } // namespace internal } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GTEST_INCLUDE_GTEST_GTEST_TEST_PART_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest-typed-test.h000066400000000000000000000323541357355204000265050ustar00rootroot00000000000000// Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ // This header implements typed tests and type-parameterized tests. // Typed (aka type-driven) tests repeat the same test for types in a // list. You must know which types you want to test with when writing // typed tests. Here's how you do it: #if 0 // First, define a fixture class template. It should be parameterized // by a type. Remember to derive it from testing::Test. template class FooTest : public testing::Test { public: ... typedef std::list List; static T shared_; T value_; }; // Next, associate a list of types with the test case, which will be // repeated for each type in the list. The typedef is necessary for // the macro to parse correctly. typedef testing::Types MyTypes; TYPED_TEST_CASE(FooTest, MyTypes); // If the type list contains only one type, you can write that type // directly without Types<...>: // TYPED_TEST_CASE(FooTest, int); // Then, use TYPED_TEST() instead of TEST_F() to define as many typed // tests for this test case as you want. TYPED_TEST(FooTest, DoesBlah) { // Inside a test, refer to TypeParam to get the type parameter. // Since we are inside a derived class template, C++ requires use to // visit the members of FooTest via 'this'. TypeParam n = this->value_; // To visit static members of the fixture, add the TestFixture:: // prefix. n += TestFixture::shared_; // To refer to typedefs in the fixture, add the "typename // TestFixture::" prefix. typename TestFixture::List values; values.push_back(n); ... } TYPED_TEST(FooTest, HasPropertyA) { ... } // TYPED_TEST_CASE takes an optional third argument which allows to specify a // class that generates custom test name suffixes based on the type. This should // be a class which has a static template function GetName(int index) returning // a string for each type. The provided integer index equals the index of the // type in the provided type list. In many cases the index can be ignored. // // For example: // class MyTypeNames { // public: // template // static std::string GetName(int) { // if (std::is_same()) return "char"; // if (std::is_same()) return "int"; // if (std::is_same()) return "unsignedInt"; // } // }; // TYPED_TEST_CASE(FooTest, MyTypes, MyTypeNames); #endif // 0 // Type-parameterized tests are abstract test patterns parameterized // by a type. Compared with typed tests, type-parameterized tests // allow you to define the test pattern without knowing what the type // parameters are. The defined pattern can be instantiated with // different types any number of times, in any number of translation // units. // // If you are designing an interface or concept, you can define a // suite of type-parameterized tests to verify properties that any // valid implementation of the interface/concept should have. Then, // each implementation can easily instantiate the test suite to verify // that it conforms to the requirements, without having to write // similar tests repeatedly. Here's an example: #if 0 // First, define a fixture class template. It should be parameterized // by a type. Remember to derive it from testing::Test. template class FooTest : public testing::Test { ... }; // Next, declare that you will define a type-parameterized test case // (the _P suffix is for "parameterized" or "pattern", whichever you // prefer): TYPED_TEST_CASE_P(FooTest); // Then, use TYPED_TEST_P() to define as many type-parameterized tests // for this type-parameterized test case as you want. TYPED_TEST_P(FooTest, DoesBlah) { // Inside a test, refer to TypeParam to get the type parameter. TypeParam n = 0; ... } TYPED_TEST_P(FooTest, HasPropertyA) { ... } // Now the tricky part: you need to register all test patterns before // you can instantiate them. The first argument of the macro is the // test case name; the rest are the names of the tests in this test // case. REGISTER_TYPED_TEST_CASE_P(FooTest, DoesBlah, HasPropertyA); // Finally, you are free to instantiate the pattern with the types you // want. If you put the above code in a header file, you can #include // it in multiple C++ source files and instantiate it multiple times. // // To distinguish different instances of the pattern, the first // argument to the INSTANTIATE_* macro is a prefix that will be added // to the actual test case name. Remember to pick unique prefixes for // different instances. typedef testing::Types MyTypes; INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes); // If the type list contains only one type, you can write that type // directly without Types<...>: // INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, int); // // Similar to the optional argument of TYPED_TEST_CASE above, // INSTANTIATE_TEST_CASE_P takes an optional fourth argument which allows to // generate custom names. // INSTANTIATE_TYPED_TEST_CASE_P(My, FooTest, MyTypes, MyTypeNames); #endif // 0 #include "gtest/internal/gtest-port.h" #include "gtest/internal/gtest-type-util.h" // Implements typed tests. #if GTEST_HAS_TYPED_TEST // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Expands to the name of the typedef for the type parameters of the // given test case. # define GTEST_TYPE_PARAMS_(TestCaseName) gtest_type_params_##TestCaseName##_ // Expands to the name of the typedef for the NameGenerator, responsible for // creating the suffixes of the name. #define GTEST_NAME_GENERATOR_(TestCaseName) \ gtest_type_params_##TestCaseName##_NameGenerator // The 'Types' template argument below must have spaces around it // since some compilers may choke on '>>' when passing a template // instance (e.g. Types) # define TYPED_TEST_CASE(CaseName, Types, ...) \ typedef ::testing::internal::TypeList< Types >::type GTEST_TYPE_PARAMS_( \ CaseName); \ typedef ::testing::internal::NameGeneratorSelector<__VA_ARGS__>::type \ GTEST_NAME_GENERATOR_(CaseName) # define TYPED_TEST(CaseName, TestName) \ template \ class GTEST_TEST_CLASS_NAME_(CaseName, TestName) \ : public CaseName { \ private: \ typedef CaseName TestFixture; \ typedef gtest_TypeParam_ TypeParam; \ virtual void TestBody(); \ }; \ static bool gtest_##CaseName##_##TestName##_registered_ \ GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::internal::TypeParameterizedTest< \ CaseName, \ ::testing::internal::TemplateSel, \ GTEST_TYPE_PARAMS_( \ CaseName)>::Register("", \ ::testing::internal::CodeLocation( \ __FILE__, __LINE__), \ #CaseName, #TestName, 0, \ ::testing::internal::GenerateNames< \ GTEST_NAME_GENERATOR_(CaseName), \ GTEST_TYPE_PARAMS_(CaseName)>()); \ template \ void GTEST_TEST_CLASS_NAME_(CaseName, \ TestName)::TestBody() #endif // GTEST_HAS_TYPED_TEST // Implements type-parameterized tests. #if GTEST_HAS_TYPED_TEST_P // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Expands to the namespace name that the type-parameterized tests for // the given type-parameterized test case are defined in. The exact // name of the namespace is subject to change without notice. # define GTEST_CASE_NAMESPACE_(TestCaseName) \ gtest_case_##TestCaseName##_ // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Expands to the name of the variable used to remember the names of // the defined tests in the given test case. # define GTEST_TYPED_TEST_CASE_P_STATE_(TestCaseName) \ gtest_typed_test_case_p_state_##TestCaseName##_ // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE DIRECTLY. // // Expands to the name of the variable used to remember the names of // the registered tests in the given test case. # define GTEST_REGISTERED_TEST_NAMES_(TestCaseName) \ gtest_registered_test_names_##TestCaseName##_ // The variables defined in the type-parameterized test macros are // static as typically these macros are used in a .h file that can be // #included in multiple translation units linked together. # define TYPED_TEST_CASE_P(CaseName) \ static ::testing::internal::TypedTestCasePState \ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName) # define TYPED_TEST_P(CaseName, TestName) \ namespace GTEST_CASE_NAMESPACE_(CaseName) { \ template \ class TestName : public CaseName { \ private: \ typedef CaseName TestFixture; \ typedef gtest_TypeParam_ TypeParam; \ virtual void TestBody(); \ }; \ static bool gtest_##TestName##_defined_ GTEST_ATTRIBUTE_UNUSED_ = \ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).AddTestName(\ __FILE__, __LINE__, #CaseName, #TestName); \ } \ template \ void GTEST_CASE_NAMESPACE_(CaseName)::TestName::TestBody() # define REGISTER_TYPED_TEST_CASE_P(CaseName, ...) \ namespace GTEST_CASE_NAMESPACE_(CaseName) { \ typedef ::testing::internal::Templates<__VA_ARGS__>::type gtest_AllTests_; \ } \ static const char* const GTEST_REGISTERED_TEST_NAMES_(CaseName) \ GTEST_ATTRIBUTE_UNUSED_ = \ GTEST_TYPED_TEST_CASE_P_STATE_(CaseName).VerifyRegisteredTestNames( \ __FILE__, __LINE__, #__VA_ARGS__) // The 'Types' template argument below must have spaces around it // since some compilers may choke on '>>' when passing a template // instance (e.g. Types) # define INSTANTIATE_TYPED_TEST_CASE_P(Prefix, CaseName, Types, ...) \ static bool gtest_##Prefix##_##CaseName GTEST_ATTRIBUTE_UNUSED_ = \ ::testing::internal::TypeParameterizedTestCase< \ CaseName, GTEST_CASE_NAMESPACE_(CaseName)::gtest_AllTests_, \ ::testing::internal::TypeList< Types >::type>:: \ Register(#Prefix, \ ::testing::internal::CodeLocation(__FILE__, __LINE__), \ >EST_TYPED_TEST_CASE_P_STATE_(CaseName), #CaseName, \ GTEST_REGISTERED_TEST_NAMES_(CaseName), \ ::testing::internal::GenerateNames< \ ::testing::internal::NameGeneratorSelector< \ __VA_ARGS__>::type, \ ::testing::internal::TypeList< Types >::type>()) #endif // GTEST_HAS_TYPED_TEST_P #endif // GTEST_INCLUDE_GTEST_GTEST_TYPED_TEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest.h000066400000000000000000002554051357355204000244110ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the public API for Google Test. It should be // included by any test program that uses Google Test. // // IMPORTANT NOTE: Due to limitation of the C++ language, we have to // leave some internal implementation details in this header file. // They are clearly marked by comments like this: // // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. // // Such code is NOT meant to be used by a user directly, and is subject // to CHANGE WITHOUT NOTICE. Therefore DO NOT DEPEND ON IT in a user // program! // // Acknowledgment: Google Test borrowed the idea of automatic test // registration from Barthelemy Dagenais' (barthelemy@prologique.com) // easyUnit framework. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_H_ #define GTEST_INCLUDE_GTEST_GTEST_H_ #include #include #include #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-string.h" #include "gtest/gtest-death-test.h" #include "gtest/gtest-message.h" #include "gtest/gtest-param-test.h" #include "gtest/gtest-printers.h" #include "gtest/gtest_prod.h" #include "gtest/gtest-test-part.h" #include "gtest/gtest-typed-test.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) // Depending on the platform, different string classes are available. // On Linux, in addition to ::std::string, Google also makes use of // class ::string, which has the same interface as ::std::string, but // has a different implementation. // // You can define GTEST_HAS_GLOBAL_STRING to 1 to indicate that // ::string is available AND is a distinct type to ::std::string, or // define it to 0 to indicate otherwise. // // If ::std::string and ::string are the same class on your platform // due to aliasing, you should define GTEST_HAS_GLOBAL_STRING to 0. // // If you do not define GTEST_HAS_GLOBAL_STRING, it is defined // heuristically. namespace testing { // Silence C4100 (unreferenced formal parameter) and 4805 // unsafe mix of type 'const int' and type 'const bool' #ifdef _MSC_VER # pragma warning(push) # pragma warning(disable:4805) # pragma warning(disable:4100) #endif // Declares the flags. // This flag temporary enables the disabled tests. GTEST_DECLARE_bool_(also_run_disabled_tests); // This flag brings the debugger on an assertion failure. GTEST_DECLARE_bool_(break_on_failure); // This flag controls whether Google Test catches all test-thrown exceptions // and logs them as failures. GTEST_DECLARE_bool_(catch_exceptions); // This flag enables using colors in terminal output. Available values are // "yes" to enable colors, "no" (disable colors), or "auto" (the default) // to let Google Test decide. GTEST_DECLARE_string_(color); // This flag sets up the filter to select by name using a glob pattern // the tests to run. If the filter is not given all tests are executed. GTEST_DECLARE_string_(filter); // This flag controls whether Google Test installs a signal handler that dumps // debugging information when fatal signals are raised. GTEST_DECLARE_bool_(install_failure_signal_handler); // This flag causes the Google Test to list tests. None of the tests listed // are actually run if the flag is provided. GTEST_DECLARE_bool_(list_tests); // This flag controls whether Google Test emits a detailed XML report to a file // in addition to its normal textual output. GTEST_DECLARE_string_(output); // This flags control whether Google Test prints the elapsed time for each // test. GTEST_DECLARE_bool_(print_time); // This flags control whether Google Test prints UTF8 characters as text. GTEST_DECLARE_bool_(print_utf8); // This flag specifies the random number seed. GTEST_DECLARE_int32_(random_seed); // This flag sets how many times the tests are repeated. The default value // is 1. If the value is -1 the tests are repeating forever. GTEST_DECLARE_int32_(repeat); // This flag controls whether Google Test includes Google Test internal // stack frames in failure stack traces. GTEST_DECLARE_bool_(show_internal_stack_frames); // When this flag is specified, tests' order is randomized on every iteration. GTEST_DECLARE_bool_(shuffle); // This flag specifies the maximum number of stack frames to be // printed in a failure message. GTEST_DECLARE_int32_(stack_trace_depth); // When this flag is specified, a failed assertion will throw an // exception if exceptions are enabled, or exit the program with a // non-zero code otherwise. For use with an external test framework. GTEST_DECLARE_bool_(throw_on_failure); // When this flag is set with a "host:port" string, on supported // platforms test results are streamed to the specified port on // the specified host machine. GTEST_DECLARE_string_(stream_result_to); #if GTEST_USE_OWN_FLAGFILE_FLAG_ GTEST_DECLARE_string_(flagfile); #endif // GTEST_USE_OWN_FLAGFILE_FLAG_ // The upper limit for valid stack trace depths. const int kMaxStackTraceDepth = 100; namespace internal { class AssertHelper; class DefaultGlobalTestPartResultReporter; class ExecDeathTest; class NoExecDeathTest; class FinalSuccessChecker; class GTestFlagSaver; class StreamingListenerTest; class TestResultAccessor; class TestEventListenersAccessor; class TestEventRepeater; class UnitTestRecordPropertyTestHelper; class WindowsDeathTest; class FuchsiaDeathTest; class UnitTestImpl* GetUnitTestImpl(); void ReportFailureInUnknownLocation(TestPartResult::Type result_type, const std::string& message); } // namespace internal // The friend relationship of some of these classes is cyclic. // If we don't forward declare them the compiler might confuse the classes // in friendship clauses with same named classes on the scope. class Test; class TestCase; class TestInfo; class UnitTest; // A class for indicating whether an assertion was successful. When // the assertion wasn't successful, the AssertionResult object // remembers a non-empty message that describes how it failed. // // To create an instance of this class, use one of the factory functions // (AssertionSuccess() and AssertionFailure()). // // This class is useful for two purposes: // 1. Defining predicate functions to be used with Boolean test assertions // EXPECT_TRUE/EXPECT_FALSE and their ASSERT_ counterparts // 2. Defining predicate-format functions to be // used with predicate assertions (ASSERT_PRED_FORMAT*, etc). // // For example, if you define IsEven predicate: // // testing::AssertionResult IsEven(int n) { // if ((n % 2) == 0) // return testing::AssertionSuccess(); // else // return testing::AssertionFailure() << n << " is odd"; // } // // Then the failed expectation EXPECT_TRUE(IsEven(Fib(5))) // will print the message // // Value of: IsEven(Fib(5)) // Actual: false (5 is odd) // Expected: true // // instead of a more opaque // // Value of: IsEven(Fib(5)) // Actual: false // Expected: true // // in case IsEven is a simple Boolean predicate. // // If you expect your predicate to be reused and want to support informative // messages in EXPECT_FALSE and ASSERT_FALSE (negative assertions show up // about half as often as positive ones in our tests), supply messages for // both success and failure cases: // // testing::AssertionResult IsEven(int n) { // if ((n % 2) == 0) // return testing::AssertionSuccess() << n << " is even"; // else // return testing::AssertionFailure() << n << " is odd"; // } // // Then a statement EXPECT_FALSE(IsEven(Fib(6))) will print // // Value of: IsEven(Fib(6)) // Actual: true (8 is even) // Expected: false // // NB: Predicates that support negative Boolean assertions have reduced // performance in positive ones so be careful not to use them in tests // that have lots (tens of thousands) of positive Boolean assertions. // // To use this class with EXPECT_PRED_FORMAT assertions such as: // // // Verifies that Foo() returns an even number. // EXPECT_PRED_FORMAT1(IsEven, Foo()); // // you need to define: // // testing::AssertionResult IsEven(const char* expr, int n) { // if ((n % 2) == 0) // return testing::AssertionSuccess(); // else // return testing::AssertionFailure() // << "Expected: " << expr << " is even\n Actual: it's " << n; // } // // If Foo() returns 5, you will see the following message: // // Expected: Foo() is even // Actual: it's 5 // class GTEST_API_ AssertionResult { public: // Copy constructor. // Used in EXPECT_TRUE/FALSE(assertion_result). AssertionResult(const AssertionResult& other); #if defined(_MSC_VER) && _MSC_VER < 1910 GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 /* forcing value to bool */) #endif // Used in the EXPECT_TRUE/FALSE(bool_expression). // // T must be contextually convertible to bool. // // The second parameter prevents this overload from being considered if // the argument is implicitly convertible to AssertionResult. In that case // we want AssertionResult's copy constructor to be used. template explicit AssertionResult( const T& success, typename internal::EnableIf< !internal::ImplicitlyConvertible::value>::type* /*enabler*/ = NULL) : success_(success) {} #if defined(_MSC_VER) && _MSC_VER < 1910 GTEST_DISABLE_MSC_WARNINGS_POP_() #endif // Assignment operator. AssertionResult& operator=(AssertionResult other) { swap(other); return *this; } // Returns true iff the assertion succeeded. operator bool() const { return success_; } // NOLINT // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. AssertionResult operator!() const; // Returns the text streamed into this AssertionResult. Test assertions // use it when they fail (i.e., the predicate's outcome doesn't match the // assertion's expectation). When nothing has been streamed into the // object, returns an empty string. const char* message() const { return message_.get() != NULL ? message_->c_str() : ""; } // FIXME: Remove this after making sure no clients use it. // Deprecated; please use message() instead. const char* failure_message() const { return message(); } // Streams a custom failure message into this object. template AssertionResult& operator<<(const T& value) { AppendMessage(Message() << value); return *this; } // Allows streaming basic output manipulators such as endl or flush into // this object. AssertionResult& operator<<( ::std::ostream& (*basic_manipulator)(::std::ostream& stream)) { AppendMessage(Message() << basic_manipulator); return *this; } private: // Appends the contents of message to message_. void AppendMessage(const Message& a_message) { if (message_.get() == NULL) message_.reset(new ::std::string); message_->append(a_message.GetString().c_str()); } // Swap the contents of this AssertionResult with other. void swap(AssertionResult& other); // Stores result of the assertion predicate. bool success_; // Stores the message describing the condition in case the expectation // construct is not satisfied with the predicate's outcome. // Referenced via a pointer to avoid taking too much stack frame space // with test assertions. internal::scoped_ptr< ::std::string> message_; }; // Makes a successful assertion result. GTEST_API_ AssertionResult AssertionSuccess(); // Makes a failed assertion result. GTEST_API_ AssertionResult AssertionFailure(); // Makes a failed assertion result with the given failure message. // Deprecated; use AssertionFailure() << msg. GTEST_API_ AssertionResult AssertionFailure(const Message& msg); } // namespace testing // Includes the auto-generated header that implements a family of generic // predicate assertion macros. This include comes late because it relies on // APIs declared above. #include "gtest/gtest_pred_impl.h" namespace testing { // The abstract class that all tests inherit from. // // In Google Test, a unit test program contains one or many TestCases, and // each TestCase contains one or many Tests. // // When you define a test using the TEST macro, you don't need to // explicitly derive from Test - the TEST macro automatically does // this for you. // // The only time you derive from Test is when defining a test fixture // to be used in a TEST_F. For example: // // class FooTest : public testing::Test { // protected: // void SetUp() override { ... } // void TearDown() override { ... } // ... // }; // // TEST_F(FooTest, Bar) { ... } // TEST_F(FooTest, Baz) { ... } // // Test is not copyable. class GTEST_API_ Test { public: friend class TestInfo; // Defines types for pointers to functions that set up and tear down // a test case. typedef internal::SetUpTestCaseFunc SetUpTestCaseFunc; typedef internal::TearDownTestCaseFunc TearDownTestCaseFunc; // The d'tor is virtual as we intend to inherit from Test. virtual ~Test(); // Sets up the stuff shared by all tests in this test case. // // Google Test will call Foo::SetUpTestCase() before running the first // test in test case Foo. Hence a sub-class can define its own // SetUpTestCase() method to shadow the one defined in the super // class. static void SetUpTestCase() {} // Tears down the stuff shared by all tests in this test case. // // Google Test will call Foo::TearDownTestCase() after running the last // test in test case Foo. Hence a sub-class can define its own // TearDownTestCase() method to shadow the one defined in the super // class. static void TearDownTestCase() {} // Returns true iff the current test has a fatal failure. static bool HasFatalFailure(); // Returns true iff the current test has a non-fatal failure. static bool HasNonfatalFailure(); // Returns true iff the current test has a (either fatal or // non-fatal) failure. static bool HasFailure() { return HasFatalFailure() || HasNonfatalFailure(); } // Logs a property for the current test, test case, or for the entire // invocation of the test program when used outside of the context of a // test case. Only the last value for a given key is remembered. These // are public static so they can be called from utility functions that are // not members of the test fixture. Calls to RecordProperty made during // lifespan of the test (from the moment its constructor starts to the // moment its destructor finishes) will be output in XML as attributes of // the element. Properties recorded from fixture's // SetUpTestCase or TearDownTestCase are logged as attributes of the // corresponding element. Calls to RecordProperty made in the // global context (before or after invocation of RUN_ALL_TESTS and from // SetUp/TearDown method of Environment objects registered with Google // Test) will be output as attributes of the element. static void RecordProperty(const std::string& key, const std::string& value); static void RecordProperty(const std::string& key, int value); protected: // Creates a Test object. Test(); // Sets up the test fixture. virtual void SetUp(); // Tears down the test fixture. virtual void TearDown(); private: // Returns true iff the current test has the same fixture class as // the first test in the current test case. static bool HasSameFixtureClass(); // Runs the test after the test fixture has been set up. // // A sub-class must implement this to define the test logic. // // DO NOT OVERRIDE THIS FUNCTION DIRECTLY IN A USER PROGRAM. // Instead, use the TEST or TEST_F macro. virtual void TestBody() = 0; // Sets up, executes, and tears down the test. void Run(); // Deletes self. We deliberately pick an unusual name for this // internal method to avoid clashing with names used in user TESTs. void DeleteSelf_() { delete this; } const internal::scoped_ptr< GTEST_FLAG_SAVER_ > gtest_flag_saver_; // Often a user misspells SetUp() as Setup() and spends a long time // wondering why it is never called by Google Test. The declaration of // the following method is solely for catching such an error at // compile time: // // - The return type is deliberately chosen to be not void, so it // will be a conflict if void Setup() is declared in the user's // test fixture. // // - This method is private, so it will be another compiler error // if the method is called from the user's test fixture. // // DO NOT OVERRIDE THIS FUNCTION. // // If you see an error about overriding the following function or // about it being private, you have mis-spelled SetUp() as Setup(). struct Setup_should_be_spelled_SetUp {}; virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } // We disallow copying Tests. GTEST_DISALLOW_COPY_AND_ASSIGN_(Test); }; typedef internal::TimeInMillis TimeInMillis; // A copyable object representing a user specified test property which can be // output as a key/value string pair. // // Don't inherit from TestProperty as its destructor is not virtual. class TestProperty { public: // C'tor. TestProperty does NOT have a default constructor. // Always use this constructor (with parameters) to create a // TestProperty object. TestProperty(const std::string& a_key, const std::string& a_value) : key_(a_key), value_(a_value) { } // Gets the user supplied key. const char* key() const { return key_.c_str(); } // Gets the user supplied value. const char* value() const { return value_.c_str(); } // Sets a new value, overriding the one supplied in the constructor. void SetValue(const std::string& new_value) { value_ = new_value; } private: // The key supplied by the user. std::string key_; // The value supplied by the user. std::string value_; }; // The result of a single Test. This includes a list of // TestPartResults, a list of TestProperties, a count of how many // death tests there are in the Test, and how much time it took to run // the Test. // // TestResult is not copyable. class GTEST_API_ TestResult { public: // Creates an empty TestResult. TestResult(); // D'tor. Do not inherit from TestResult. ~TestResult(); // Gets the number of all test parts. This is the sum of the number // of successful test parts and the number of failed test parts. int total_part_count() const; // Returns the number of the test properties. int test_property_count() const; // Returns true iff the test passed (i.e. no test part failed). bool Passed() const { return !Failed(); } // Returns true iff the test failed. bool Failed() const; // Returns true iff the test fatally failed. bool HasFatalFailure() const; // Returns true iff the test has a non-fatal failure. bool HasNonfatalFailure() const; // Returns the elapsed time, in milliseconds. TimeInMillis elapsed_time() const { return elapsed_time_; } // Returns the i-th test part result among all the results. i can range from 0 // to total_part_count() - 1. If i is not in that range, aborts the program. const TestPartResult& GetTestPartResult(int i) const; // Returns the i-th test property. i can range from 0 to // test_property_count() - 1. If i is not in that range, aborts the // program. const TestProperty& GetTestProperty(int i) const; private: friend class TestInfo; friend class TestCase; friend class UnitTest; friend class internal::DefaultGlobalTestPartResultReporter; friend class internal::ExecDeathTest; friend class internal::TestResultAccessor; friend class internal::UnitTestImpl; friend class internal::WindowsDeathTest; friend class internal::FuchsiaDeathTest; // Gets the vector of TestPartResults. const std::vector& test_part_results() const { return test_part_results_; } // Gets the vector of TestProperties. const std::vector& test_properties() const { return test_properties_; } // Sets the elapsed time. void set_elapsed_time(TimeInMillis elapsed) { elapsed_time_ = elapsed; } // Adds a test property to the list. The property is validated and may add // a non-fatal failure if invalid (e.g., if it conflicts with reserved // key names). If a property is already recorded for the same key, the // value will be updated, rather than storing multiple values for the same // key. xml_element specifies the element for which the property is being // recorded and is used for validation. void RecordProperty(const std::string& xml_element, const TestProperty& test_property); // Adds a failure if the key is a reserved attribute of Google Test // testcase tags. Returns true if the property is valid. // FIXME: Validate attribute names are legal and human readable. static bool ValidateTestProperty(const std::string& xml_element, const TestProperty& test_property); // Adds a test part result to the list. void AddTestPartResult(const TestPartResult& test_part_result); // Returns the death test count. int death_test_count() const { return death_test_count_; } // Increments the death test count, returning the new count. int increment_death_test_count() { return ++death_test_count_; } // Clears the test part results. void ClearTestPartResults(); // Clears the object. void Clear(); // Protects mutable state of the property vector and of owned // properties, whose values may be updated. internal::Mutex test_properites_mutex_; // The vector of TestPartResults std::vector test_part_results_; // The vector of TestProperties std::vector test_properties_; // Running count of death tests. int death_test_count_; // The elapsed time, in milliseconds. TimeInMillis elapsed_time_; // We disallow copying TestResult. GTEST_DISALLOW_COPY_AND_ASSIGN_(TestResult); }; // class TestResult // A TestInfo object stores the following information about a test: // // Test case name // Test name // Whether the test should be run // A function pointer that creates the test object when invoked // Test result // // The constructor of TestInfo registers itself with the UnitTest // singleton such that the RUN_ALL_TESTS() macro knows which tests to // run. class GTEST_API_ TestInfo { public: // Destructs a TestInfo object. This function is not virtual, so // don't inherit from TestInfo. ~TestInfo(); // Returns the test case name. const char* test_case_name() const { return test_case_name_.c_str(); } // Returns the test name. const char* name() const { return name_.c_str(); } // Returns the name of the parameter type, or NULL if this is not a typed // or a type-parameterized test. const char* type_param() const { if (type_param_.get() != NULL) return type_param_->c_str(); return NULL; } // Returns the text representation of the value parameter, or NULL if this // is not a value-parameterized test. const char* value_param() const { if (value_param_.get() != NULL) return value_param_->c_str(); return NULL; } // Returns the file name where this test is defined. const char* file() const { return location_.file.c_str(); } // Returns the line where this test is defined. int line() const { return location_.line; } // Return true if this test should not be run because it's in another shard. bool is_in_another_shard() const { return is_in_another_shard_; } // Returns true if this test should run, that is if the test is not // disabled (or it is disabled but the also_run_disabled_tests flag has // been specified) and its full name matches the user-specified filter. // // Google Test allows the user to filter the tests by their full names. // The full name of a test Bar in test case Foo is defined as // "Foo.Bar". Only the tests that match the filter will run. // // A filter is a colon-separated list of glob (not regex) patterns, // optionally followed by a '-' and a colon-separated list of // negative patterns (tests to exclude). A test is run if it // matches one of the positive patterns and does not match any of // the negative patterns. // // For example, *A*:Foo.* is a filter that matches any string that // contains the character 'A' or starts with "Foo.". bool should_run() const { return should_run_; } // Returns true iff this test will appear in the XML report. bool is_reportable() const { // The XML report includes tests matching the filter, excluding those // run in other shards. return matches_filter_ && !is_in_another_shard_; } // Returns the result of the test. const TestResult* result() const { return &result_; } private: #if GTEST_HAS_DEATH_TEST friend class internal::DefaultDeathTestFactory; #endif // GTEST_HAS_DEATH_TEST friend class Test; friend class TestCase; friend class internal::UnitTestImpl; friend class internal::StreamingListenerTest; friend TestInfo* internal::MakeAndRegisterTestInfo( const char* test_case_name, const char* name, const char* type_param, const char* value_param, internal::CodeLocation code_location, internal::TypeId fixture_class_id, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc, internal::TestFactoryBase* factory); // Constructs a TestInfo object. The newly constructed instance assumes // ownership of the factory object. TestInfo(const std::string& test_case_name, const std::string& name, const char* a_type_param, // NULL if not a type-parameterized test const char* a_value_param, // NULL if not a value-parameterized test internal::CodeLocation a_code_location, internal::TypeId fixture_class_id, internal::TestFactoryBase* factory); // Increments the number of death tests encountered in this test so // far. int increment_death_test_count() { return result_.increment_death_test_count(); } // Creates the test object, runs it, records its result, and then // deletes it. void Run(); static void ClearTestResult(TestInfo* test_info) { test_info->result_.Clear(); } // These fields are immutable properties of the test. const std::string test_case_name_; // Test case name const std::string name_; // Test name // Name of the parameter type, or NULL if this is not a typed or a // type-parameterized test. const internal::scoped_ptr type_param_; // Text representation of the value parameter, or NULL if this is not a // value-parameterized test. const internal::scoped_ptr value_param_; internal::CodeLocation location_; const internal::TypeId fixture_class_id_; // ID of the test fixture class bool should_run_; // True iff this test should run bool is_disabled_; // True iff this test is disabled bool matches_filter_; // True if this test matches the // user-specified filter. bool is_in_another_shard_; // Will be run in another shard. internal::TestFactoryBase* const factory_; // The factory that creates // the test object // This field is mutable and needs to be reset before running the // test for the second time. TestResult result_; GTEST_DISALLOW_COPY_AND_ASSIGN_(TestInfo); }; // A test case, which consists of a vector of TestInfos. // // TestCase is not copyable. class GTEST_API_ TestCase { public: // Creates a TestCase with the given name. // // TestCase does NOT have a default constructor. Always use this // constructor to create a TestCase object. // // Arguments: // // name: name of the test case // a_type_param: the name of the test's type parameter, or NULL if // this is not a type-parameterized test. // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case TestCase(const char* name, const char* a_type_param, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc); // Destructor of TestCase. virtual ~TestCase(); // Gets the name of the TestCase. const char* name() const { return name_.c_str(); } // Returns the name of the parameter type, or NULL if this is not a // type-parameterized test case. const char* type_param() const { if (type_param_.get() != NULL) return type_param_->c_str(); return NULL; } // Returns true if any test in this test case should run. bool should_run() const { return should_run_; } // Gets the number of successful tests in this test case. int successful_test_count() const; // Gets the number of failed tests in this test case. int failed_test_count() const; // Gets the number of disabled tests that will be reported in the XML report. int reportable_disabled_test_count() const; // Gets the number of disabled tests in this test case. int disabled_test_count() const; // Gets the number of tests to be printed in the XML report. int reportable_test_count() const; // Get the number of tests in this test case that should run. int test_to_run_count() const; // Gets the number of all tests in this test case. int total_test_count() const; // Returns true iff the test case passed. bool Passed() const { return !Failed(); } // Returns true iff the test case failed. bool Failed() const { return failed_test_count() > 0; } // Returns the elapsed time, in milliseconds. TimeInMillis elapsed_time() const { return elapsed_time_; } // Returns the i-th test among all the tests. i can range from 0 to // total_test_count() - 1. If i is not in that range, returns NULL. const TestInfo* GetTestInfo(int i) const; // Returns the TestResult that holds test properties recorded during // execution of SetUpTestCase and TearDownTestCase. const TestResult& ad_hoc_test_result() const { return ad_hoc_test_result_; } private: friend class Test; friend class internal::UnitTestImpl; // Gets the (mutable) vector of TestInfos in this TestCase. std::vector& test_info_list() { return test_info_list_; } // Gets the (immutable) vector of TestInfos in this TestCase. const std::vector& test_info_list() const { return test_info_list_; } // Returns the i-th test among all the tests. i can range from 0 to // total_test_count() - 1. If i is not in that range, returns NULL. TestInfo* GetMutableTestInfo(int i); // Sets the should_run member. void set_should_run(bool should) { should_run_ = should; } // Adds a TestInfo to this test case. Will delete the TestInfo upon // destruction of the TestCase object. void AddTestInfo(TestInfo * test_info); // Clears the results of all tests in this test case. void ClearResult(); // Clears the results of all tests in the given test case. static void ClearTestCaseResult(TestCase* test_case) { test_case->ClearResult(); } // Runs every test in this TestCase. void Run(); // Runs SetUpTestCase() for this TestCase. This wrapper is needed // for catching exceptions thrown from SetUpTestCase(). void RunSetUpTestCase() { (*set_up_tc_)(); } // Runs TearDownTestCase() for this TestCase. This wrapper is // needed for catching exceptions thrown from TearDownTestCase(). void RunTearDownTestCase() { (*tear_down_tc_)(); } // Returns true iff test passed. static bool TestPassed(const TestInfo* test_info) { return test_info->should_run() && test_info->result()->Passed(); } // Returns true iff test failed. static bool TestFailed(const TestInfo* test_info) { return test_info->should_run() && test_info->result()->Failed(); } // Returns true iff the test is disabled and will be reported in the XML // report. static bool TestReportableDisabled(const TestInfo* test_info) { return test_info->is_reportable() && test_info->is_disabled_; } // Returns true iff test is disabled. static bool TestDisabled(const TestInfo* test_info) { return test_info->is_disabled_; } // Returns true iff this test will appear in the XML report. static bool TestReportable(const TestInfo* test_info) { return test_info->is_reportable(); } // Returns true if the given test should run. static bool ShouldRunTest(const TestInfo* test_info) { return test_info->should_run(); } // Shuffles the tests in this test case. void ShuffleTests(internal::Random* random); // Restores the test order to before the first shuffle. void UnshuffleTests(); // Name of the test case. std::string name_; // Name of the parameter type, or NULL if this is not a typed or a // type-parameterized test. const internal::scoped_ptr type_param_; // The vector of TestInfos in their original order. It owns the // elements in the vector. std::vector test_info_list_; // Provides a level of indirection for the test list to allow easy // shuffling and restoring the test order. The i-th element in this // vector is the index of the i-th test in the shuffled test list. std::vector test_indices_; // Pointer to the function that sets up the test case. Test::SetUpTestCaseFunc set_up_tc_; // Pointer to the function that tears down the test case. Test::TearDownTestCaseFunc tear_down_tc_; // True iff any test in this test case should run. bool should_run_; // Elapsed time, in milliseconds. TimeInMillis elapsed_time_; // Holds test properties recorded during execution of SetUpTestCase and // TearDownTestCase. TestResult ad_hoc_test_result_; // We disallow copying TestCases. GTEST_DISALLOW_COPY_AND_ASSIGN_(TestCase); }; // An Environment object is capable of setting up and tearing down an // environment. You should subclass this to define your own // environment(s). // // An Environment object does the set-up and tear-down in virtual // methods SetUp() and TearDown() instead of the constructor and the // destructor, as: // // 1. You cannot safely throw from a destructor. This is a problem // as in some cases Google Test is used where exceptions are enabled, and // we may want to implement ASSERT_* using exceptions where they are // available. // 2. You cannot use ASSERT_* directly in a constructor or // destructor. class Environment { public: // The d'tor is virtual as we need to subclass Environment. virtual ~Environment() {} // Override this to define how to set up the environment. virtual void SetUp() {} // Override this to define how to tear down the environment. virtual void TearDown() {} private: // If you see an error about overriding the following function or // about it being private, you have mis-spelled SetUp() as Setup(). struct Setup_should_be_spelled_SetUp {}; virtual Setup_should_be_spelled_SetUp* Setup() { return NULL; } }; #if GTEST_HAS_EXCEPTIONS // Exception which can be thrown from TestEventListener::OnTestPartResult. class GTEST_API_ AssertionException : public internal::GoogleTestFailureException { public: explicit AssertionException(const TestPartResult& result) : GoogleTestFailureException(result) {} }; #endif // GTEST_HAS_EXCEPTIONS // The interface for tracing execution of tests. The methods are organized in // the order the corresponding events are fired. class TestEventListener { public: virtual ~TestEventListener() {} // Fired before any test activity starts. virtual void OnTestProgramStart(const UnitTest& unit_test) = 0; // Fired before each iteration of tests starts. There may be more than // one iteration if GTEST_FLAG(repeat) is set. iteration is the iteration // index, starting from 0. virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration) = 0; // Fired before environment set-up for each iteration of tests starts. virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test) = 0; // Fired after environment set-up for each iteration of tests ends. virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test) = 0; // Fired before the test case starts. virtual void OnTestCaseStart(const TestCase& test_case) = 0; // Fired before the test starts. virtual void OnTestStart(const TestInfo& test_info) = 0; // Fired after a failed assertion or a SUCCEED() invocation. // If you want to throw an exception from this function to skip to the next // TEST, it must be AssertionException defined above, or inherited from it. virtual void OnTestPartResult(const TestPartResult& test_part_result) = 0; // Fired after the test ends. virtual void OnTestEnd(const TestInfo& test_info) = 0; // Fired after the test case ends. virtual void OnTestCaseEnd(const TestCase& test_case) = 0; // Fired before environment tear-down for each iteration of tests starts. virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test) = 0; // Fired after environment tear-down for each iteration of tests ends. virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test) = 0; // Fired after each iteration of tests finishes. virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration) = 0; // Fired after all test activities have ended. virtual void OnTestProgramEnd(const UnitTest& unit_test) = 0; }; // The convenience class for users who need to override just one or two // methods and are not concerned that a possible change to a signature of // the methods they override will not be caught during the build. For // comments about each method please see the definition of TestEventListener // above. class EmptyTestEventListener : public TestEventListener { public: virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} virtual void OnTestIterationStart(const UnitTest& /*unit_test*/, int /*iteration*/) {} virtual void OnEnvironmentsSetUpStart(const UnitTest& /*unit_test*/) {} virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} virtual void OnTestCaseStart(const TestCase& /*test_case*/) {} virtual void OnTestStart(const TestInfo& /*test_info*/) {} virtual void OnTestPartResult(const TestPartResult& /*test_part_result*/) {} virtual void OnTestEnd(const TestInfo& /*test_info*/) {} virtual void OnTestCaseEnd(const TestCase& /*test_case*/) {} virtual void OnEnvironmentsTearDownStart(const UnitTest& /*unit_test*/) {} virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} virtual void OnTestIterationEnd(const UnitTest& /*unit_test*/, int /*iteration*/) {} virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} }; // TestEventListeners lets users add listeners to track events in Google Test. class GTEST_API_ TestEventListeners { public: TestEventListeners(); ~TestEventListeners(); // Appends an event listener to the end of the list. Google Test assumes // the ownership of the listener (i.e. it will delete the listener when // the test program finishes). void Append(TestEventListener* listener); // Removes the given event listener from the list and returns it. It then // becomes the caller's responsibility to delete the listener. Returns // NULL if the listener is not found in the list. TestEventListener* Release(TestEventListener* listener); // Returns the standard listener responsible for the default console // output. Can be removed from the listeners list to shut down default // console output. Note that removing this object from the listener list // with Release transfers its ownership to the caller and makes this // function return NULL the next time. TestEventListener* default_result_printer() const { return default_result_printer_; } // Returns the standard listener responsible for the default XML output // controlled by the --gtest_output=xml flag. Can be removed from the // listeners list by users who want to shut down the default XML output // controlled by this flag and substitute it with custom one. Note that // removing this object from the listener list with Release transfers its // ownership to the caller and makes this function return NULL the next // time. TestEventListener* default_xml_generator() const { return default_xml_generator_; } private: friend class TestCase; friend class TestInfo; friend class internal::DefaultGlobalTestPartResultReporter; friend class internal::NoExecDeathTest; friend class internal::TestEventListenersAccessor; friend class internal::UnitTestImpl; // Returns repeater that broadcasts the TestEventListener events to all // subscribers. TestEventListener* repeater(); // Sets the default_result_printer attribute to the provided listener. // The listener is also added to the listener list and previous // default_result_printer is removed from it and deleted. The listener can // also be NULL in which case it will not be added to the list. Does // nothing if the previous and the current listener objects are the same. void SetDefaultResultPrinter(TestEventListener* listener); // Sets the default_xml_generator attribute to the provided listener. The // listener is also added to the listener list and previous // default_xml_generator is removed from it and deleted. The listener can // also be NULL in which case it will not be added to the list. Does // nothing if the previous and the current listener objects are the same. void SetDefaultXmlGenerator(TestEventListener* listener); // Controls whether events will be forwarded by the repeater to the // listeners in the list. bool EventForwardingEnabled() const; void SuppressEventForwarding(); // The actual list of listeners. internal::TestEventRepeater* repeater_; // Listener responsible for the standard result output. TestEventListener* default_result_printer_; // Listener responsible for the creation of the XML output file. TestEventListener* default_xml_generator_; // We disallow copying TestEventListeners. GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventListeners); }; // A UnitTest consists of a vector of TestCases. // // This is a singleton class. The only instance of UnitTest is // created when UnitTest::GetInstance() is first called. This // instance is never deleted. // // UnitTest is not copyable. // // This class is thread-safe as long as the methods are called // according to their specification. class GTEST_API_ UnitTest { public: // Gets the singleton UnitTest object. The first time this method // is called, a UnitTest object is constructed and returned. // Consecutive calls will return the same object. static UnitTest* GetInstance(); // Runs all tests in this UnitTest object and prints the result. // Returns 0 if successful, or 1 otherwise. // // This method can only be called from the main thread. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. int Run() GTEST_MUST_USE_RESULT_; // Returns the working directory when the first TEST() or TEST_F() // was executed. The UnitTest object owns the string. const char* original_working_dir() const; // Returns the TestCase object for the test that's currently running, // or NULL if no test is running. const TestCase* current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_); // Returns the TestInfo object for the test that's currently running, // or NULL if no test is running. const TestInfo* current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_); // Returns the random seed used at the start of the current test run. int random_seed() const; // Returns the ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. internal::ParameterizedTestCaseRegistry& parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_); // Gets the number of successful test cases. int successful_test_case_count() const; // Gets the number of failed test cases. int failed_test_case_count() const; // Gets the number of all test cases. int total_test_case_count() const; // Gets the number of all test cases that contain at least one test // that should run. int test_case_to_run_count() const; // Gets the number of successful tests. int successful_test_count() const; // Gets the number of failed tests. int failed_test_count() const; // Gets the number of disabled tests that will be reported in the XML report. int reportable_disabled_test_count() const; // Gets the number of disabled tests. int disabled_test_count() const; // Gets the number of tests to be printed in the XML report. int reportable_test_count() const; // Gets the number of all tests. int total_test_count() const; // Gets the number of tests that should run. int test_to_run_count() const; // Gets the time of the test program start, in ms from the start of the // UNIX epoch. TimeInMillis start_timestamp() const; // Gets the elapsed time, in milliseconds. TimeInMillis elapsed_time() const; // Returns true iff the unit test passed (i.e. all test cases passed). bool Passed() const; // Returns true iff the unit test failed (i.e. some test case failed // or something outside of all tests failed). bool Failed() const; // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. const TestCase* GetTestCase(int i) const; // Returns the TestResult containing information on test failures and // properties logged outside of individual test cases. const TestResult& ad_hoc_test_result() const; // Returns the list of event listeners that can be used to track events // inside Google Test. TestEventListeners& listeners(); private: // Registers and returns a global test environment. When a test // program is run, all global test environments will be set-up in // the order they were registered. After all tests in the program // have finished, all global test environments will be torn-down in // the *reverse* order they were registered. // // The UnitTest object takes ownership of the given environment. // // This method can only be called from the main thread. Environment* AddEnvironment(Environment* env); // Adds a TestPartResult to the current TestResult object. All // Google Test assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) // eventually call this to report their results. The user code // should use the assertion macros instead of calling this directly. void AddTestPartResult(TestPartResult::Type result_type, const char* file_name, int line_number, const std::string& message, const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_); // Adds a TestProperty to the current TestResult object when invoked from // inside a test, to current TestCase's ad_hoc_test_result_ when invoked // from SetUpTestCase or TearDownTestCase, or to the global property set // when invoked elsewhere. If the result already contains a property with // the same key, the value will be updated. void RecordProperty(const std::string& key, const std::string& value); // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. TestCase* GetMutableTestCase(int i); // Accessors for the implementation object. internal::UnitTestImpl* impl() { return impl_; } const internal::UnitTestImpl* impl() const { return impl_; } // These classes and functions are friends as they need to access private // members of UnitTest. friend class ScopedTrace; friend class Test; friend class internal::AssertHelper; friend class internal::StreamingListenerTest; friend class internal::UnitTestRecordPropertyTestHelper; friend Environment* AddGlobalTestEnvironment(Environment* env); friend internal::UnitTestImpl* internal::GetUnitTestImpl(); friend void internal::ReportFailureInUnknownLocation( TestPartResult::Type result_type, const std::string& message); // Creates an empty UnitTest. UnitTest(); // D'tor virtual ~UnitTest(); // Pushes a trace defined by SCOPED_TRACE() on to the per-thread // Google Test trace stack. void PushGTestTrace(const internal::TraceInfo& trace) GTEST_LOCK_EXCLUDED_(mutex_); // Pops a trace from the per-thread Google Test trace stack. void PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_); // Protects mutable state in *impl_. This is mutable as some const // methods need to lock it too. mutable internal::Mutex mutex_; // Opaque implementation object. This field is never changed once // the object is constructed. We don't mark it as const here, as // doing so will cause a warning in the constructor of UnitTest. // Mutable state in *impl_ is protected by mutex_. internal::UnitTestImpl* impl_; // We disallow copying UnitTest. GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTest); }; // A convenient wrapper for adding an environment for the test // program. // // You should call this before RUN_ALL_TESTS() is called, probably in // main(). If you use gtest_main, you need to call this before main() // starts for it to take effect. For example, you can define a global // variable like this: // // testing::Environment* const foo_env = // testing::AddGlobalTestEnvironment(new FooEnvironment); // // However, we strongly recommend you to write your own main() and // call AddGlobalTestEnvironment() there, as relying on initialization // of global variables makes the code harder to read and may cause // problems when you register multiple environments from different // translation units and the environments have dependencies among them // (remember that the compiler doesn't guarantee the order in which // global variables from different translation units are initialized). inline Environment* AddGlobalTestEnvironment(Environment* env) { return UnitTest::GetInstance()->AddEnvironment(env); } // Initializes Google Test. This must be called before calling // RUN_ALL_TESTS(). In particular, it parses a command line for the // flags that Google Test recognizes. Whenever a Google Test flag is // seen, it is removed from argv, and *argc is decremented. // // No value is returned. Instead, the Google Test flag variables are // updated. // // Calling the function for the second time has no user-visible effect. GTEST_API_ void InitGoogleTest(int* argc, char** argv); // This overloaded version can be used in Windows programs compiled in // UNICODE mode. GTEST_API_ void InitGoogleTest(int* argc, wchar_t** argv); namespace internal { // Separate the error generating code from the code path to reduce the stack // frame size of CmpHelperEQ. This helps reduce the overhead of some sanitizers // when calling EXPECT_* in a tight loop. template AssertionResult CmpHelperEQFailure(const char* lhs_expression, const char* rhs_expression, const T1& lhs, const T2& rhs) { return EqFailure(lhs_expression, rhs_expression, FormatForComparisonFailureMessage(lhs, rhs), FormatForComparisonFailureMessage(rhs, lhs), false); } // The helper function for {ASSERT|EXPECT}_EQ. template AssertionResult CmpHelperEQ(const char* lhs_expression, const char* rhs_expression, const T1& lhs, const T2& rhs) { if (lhs == rhs) { return AssertionSuccess(); } return CmpHelperEQFailure(lhs_expression, rhs_expression, lhs, rhs); } // With this overloaded version, we allow anonymous enums to be used // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous enums // can be implicitly cast to BiggestInt. GTEST_API_ AssertionResult CmpHelperEQ(const char* lhs_expression, const char* rhs_expression, BiggestInt lhs, BiggestInt rhs); // The helper class for {ASSERT|EXPECT}_EQ. The template argument // lhs_is_null_literal is true iff the first argument to ASSERT_EQ() // is a null pointer literal. The following default implementation is // for lhs_is_null_literal being false. template class EqHelper { public: // This templatized version is for the general case. template static AssertionResult Compare(const char* lhs_expression, const char* rhs_expression, const T1& lhs, const T2& rhs) { return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs); } // With this overloaded version, we allow anonymous enums to be used // in {ASSERT|EXPECT}_EQ when compiled with gcc 4, as anonymous // enums can be implicitly cast to BiggestInt. // // Even though its body looks the same as the above version, we // cannot merge the two, as it will make anonymous enums unhappy. static AssertionResult Compare(const char* lhs_expression, const char* rhs_expression, BiggestInt lhs, BiggestInt rhs) { return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs); } }; // This specialization is used when the first argument to ASSERT_EQ() // is a null pointer literal, like NULL, false, or 0. template <> class EqHelper { public: // We define two overloaded versions of Compare(). The first // version will be picked when the second argument to ASSERT_EQ() is // NOT a pointer, e.g. ASSERT_EQ(0, AnIntFunction()) or // EXPECT_EQ(false, a_bool). template static AssertionResult Compare( const char* lhs_expression, const char* rhs_expression, const T1& lhs, const T2& rhs, // The following line prevents this overload from being considered if T2 // is not a pointer type. We need this because ASSERT_EQ(NULL, my_ptr) // expands to Compare("", "", NULL, my_ptr), which requires a conversion // to match the Secret* in the other overload, which would otherwise make // this template match better. typename EnableIf::value>::type* = 0) { return CmpHelperEQ(lhs_expression, rhs_expression, lhs, rhs); } // This version will be picked when the second argument to ASSERT_EQ() is a // pointer, e.g. ASSERT_EQ(NULL, a_pointer). template static AssertionResult Compare( const char* lhs_expression, const char* rhs_expression, // We used to have a second template parameter instead of Secret*. That // template parameter would deduce to 'long', making this a better match // than the first overload even without the first overload's EnableIf. // Unfortunately, gcc with -Wconversion-null warns when "passing NULL to // non-pointer argument" (even a deduced integral argument), so the old // implementation caused warnings in user code. Secret* /* lhs (NULL) */, T* rhs) { // We already know that 'lhs' is a null pointer. return CmpHelperEQ(lhs_expression, rhs_expression, static_cast(NULL), rhs); } }; // Separate the error generating code from the code path to reduce the stack // frame size of CmpHelperOP. This helps reduce the overhead of some sanitizers // when calling EXPECT_OP in a tight loop. template AssertionResult CmpHelperOpFailure(const char* expr1, const char* expr2, const T1& val1, const T2& val2, const char* op) { return AssertionFailure() << "Expected: (" << expr1 << ") " << op << " (" << expr2 << "), actual: " << FormatForComparisonFailureMessage(val1, val2) << " vs " << FormatForComparisonFailureMessage(val2, val1); } // A macro for implementing the helper functions needed to implement // ASSERT_?? and EXPECT_??. It is here just to avoid copy-and-paste // of similar code. // // For each templatized helper function, we also define an overloaded // version for BiggestInt in order to reduce code bloat and allow // anonymous enums to be used with {ASSERT|EXPECT}_?? when compiled // with gcc 4. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. #define GTEST_IMPL_CMP_HELPER_(op_name, op)\ template \ AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ const T1& val1, const T2& val2) {\ if (val1 op val2) {\ return AssertionSuccess();\ } else {\ return CmpHelperOpFailure(expr1, expr2, val1, val2, #op);\ }\ }\ GTEST_API_ AssertionResult CmpHelper##op_name(\ const char* expr1, const char* expr2, BiggestInt val1, BiggestInt val2) // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. // Implements the helper function for {ASSERT|EXPECT}_NE GTEST_IMPL_CMP_HELPER_(NE, !=); // Implements the helper function for {ASSERT|EXPECT}_LE GTEST_IMPL_CMP_HELPER_(LE, <=); // Implements the helper function for {ASSERT|EXPECT}_LT GTEST_IMPL_CMP_HELPER_(LT, <); // Implements the helper function for {ASSERT|EXPECT}_GE GTEST_IMPL_CMP_HELPER_(GE, >=); // Implements the helper function for {ASSERT|EXPECT}_GT GTEST_IMPL_CMP_HELPER_(GT, >); #undef GTEST_IMPL_CMP_HELPER_ // The helper function for {ASSERT|EXPECT}_STREQ. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2); // The helper function for {ASSERT|EXPECT}_STRCASEEQ. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTRCASEEQ(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2); // The helper function for {ASSERT|EXPECT}_STRNE. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2); // The helper function for {ASSERT|EXPECT}_STRCASENE. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTRCASENE(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2); // Helper function for *_STREQ on wide strings. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTREQ(const char* s1_expression, const char* s2_expression, const wchar_t* s1, const wchar_t* s2); // Helper function for *_STRNE on wide strings. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult CmpHelperSTRNE(const char* s1_expression, const char* s2_expression, const wchar_t* s1, const wchar_t* s2); } // namespace internal // IsSubstring() and IsNotSubstring() are intended to be used as the // first argument to {EXPECT,ASSERT}_PRED_FORMAT2(), not by // themselves. They check whether needle is a substring of haystack // (NULL is considered a substring of itself only), and return an // appropriate error message when they fail. // // The {needle,haystack}_expr arguments are the stringified // expressions that generated the two real arguments. GTEST_API_ AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const char* needle, const char* haystack); GTEST_API_ AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const wchar_t* needle, const wchar_t* haystack); GTEST_API_ AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const char* needle, const char* haystack); GTEST_API_ AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const wchar_t* needle, const wchar_t* haystack); GTEST_API_ AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const ::std::string& needle, const ::std::string& haystack); GTEST_API_ AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const ::std::string& needle, const ::std::string& haystack); #if GTEST_HAS_STD_WSTRING GTEST_API_ AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const ::std::wstring& needle, const ::std::wstring& haystack); GTEST_API_ AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const ::std::wstring& needle, const ::std::wstring& haystack); #endif // GTEST_HAS_STD_WSTRING namespace internal { // Helper template function for comparing floating-points. // // Template parameter: // // RawType: the raw floating-point type (either float or double) // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. template AssertionResult CmpHelperFloatingPointEQ(const char* lhs_expression, const char* rhs_expression, RawType lhs_value, RawType rhs_value) { const FloatingPoint lhs(lhs_value), rhs(rhs_value); if (lhs.AlmostEquals(rhs)) { return AssertionSuccess(); } ::std::stringstream lhs_ss; lhs_ss << std::setprecision(std::numeric_limits::digits10 + 2) << lhs_value; ::std::stringstream rhs_ss; rhs_ss << std::setprecision(std::numeric_limits::digits10 + 2) << rhs_value; return EqFailure(lhs_expression, rhs_expression, StringStreamToString(&lhs_ss), StringStreamToString(&rhs_ss), false); } // Helper function for implementing ASSERT_NEAR. // // INTERNAL IMPLEMENTATION - DO NOT USE IN A USER PROGRAM. GTEST_API_ AssertionResult DoubleNearPredFormat(const char* expr1, const char* expr2, const char* abs_error_expr, double val1, double val2, double abs_error); // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // A class that enables one to stream messages to assertion macros class GTEST_API_ AssertHelper { public: // Constructor. AssertHelper(TestPartResult::Type type, const char* file, int line, const char* message); ~AssertHelper(); // Message assignment is a semantic trick to enable assertion // streaming; see the GTEST_MESSAGE_ macro below. void operator=(const Message& message) const; private: // We put our data in a struct so that the size of the AssertHelper class can // be as small as possible. This is important because gcc is incapable of // re-using stack space even for temporary variables, so every EXPECT_EQ // reserves stack space for another AssertHelper. struct AssertHelperData { AssertHelperData(TestPartResult::Type t, const char* srcfile, int line_num, const char* msg) : type(t), file(srcfile), line(line_num), message(msg) { } TestPartResult::Type const type; const char* const file; int const line; std::string const message; private: GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelperData); }; AssertHelperData* const data_; GTEST_DISALLOW_COPY_AND_ASSIGN_(AssertHelper); }; } // namespace internal // The pure interface class that all value-parameterized tests inherit from. // A value-parameterized class must inherit from both ::testing::Test and // ::testing::WithParamInterface. In most cases that just means inheriting // from ::testing::TestWithParam, but more complicated test hierarchies // may need to inherit from Test and WithParamInterface at different levels. // // This interface has support for accessing the test parameter value via // the GetParam() method. // // Use it with one of the parameter generator defining functions, like Range(), // Values(), ValuesIn(), Bool(), and Combine(). // // class FooTest : public ::testing::TestWithParam { // protected: // FooTest() { // // Can use GetParam() here. // } // virtual ~FooTest() { // // Can use GetParam() here. // } // virtual void SetUp() { // // Can use GetParam() here. // } // virtual void TearDown { // // Can use GetParam() here. // } // }; // TEST_P(FooTest, DoesBar) { // // Can use GetParam() method here. // Foo foo; // ASSERT_TRUE(foo.DoesBar(GetParam())); // } // INSTANTIATE_TEST_CASE_P(OneToTenRange, FooTest, ::testing::Range(1, 10)); template class WithParamInterface { public: typedef T ParamType; virtual ~WithParamInterface() {} // The current parameter value. Is also available in the test fixture's // constructor. static const ParamType& GetParam() { GTEST_CHECK_(parameter_ != NULL) << "GetParam() can only be called inside a value-parameterized test " << "-- did you intend to write TEST_P instead of TEST_F?"; return *parameter_; } private: // Sets parameter value. The caller is responsible for making sure the value // remains alive and unchanged throughout the current test. static void SetParam(const ParamType* parameter) { parameter_ = parameter; } // Static value used for accessing parameter during a test lifetime. static const ParamType* parameter_; // TestClass must be a subclass of WithParamInterface and Test. template friend class internal::ParameterizedTestFactory; }; template const T* WithParamInterface::parameter_ = NULL; // Most value-parameterized classes can ignore the existence of // WithParamInterface, and can just inherit from ::testing::TestWithParam. template class TestWithParam : public Test, public WithParamInterface { }; // Macros for indicating success/failure in test code. // ADD_FAILURE unconditionally adds a failure to the current test. // SUCCEED generates a success - it doesn't automatically make the // current test successful, as a test is only successful when it has // no failure. // // EXPECT_* verifies that a certain condition is satisfied. If not, // it behaves like ADD_FAILURE. In particular: // // EXPECT_TRUE verifies that a Boolean condition is true. // EXPECT_FALSE verifies that a Boolean condition is false. // // FAIL and ASSERT_* are similar to ADD_FAILURE and EXPECT_*, except // that they will also abort the current function on failure. People // usually want the fail-fast behavior of FAIL and ASSERT_*, but those // writing data-driven tests often find themselves using ADD_FAILURE // and EXPECT_* more. // Generates a nonfatal failure with a generic message. #define ADD_FAILURE() GTEST_NONFATAL_FAILURE_("Failed") // Generates a nonfatal failure at the given source file location with // a generic message. #define ADD_FAILURE_AT(file, line) \ GTEST_MESSAGE_AT_(file, line, "Failed", \ ::testing::TestPartResult::kNonFatalFailure) // Generates a fatal failure with a generic message. #define GTEST_FAIL() GTEST_FATAL_FAILURE_("Failed") // Define this macro to 1 to omit the definition of FAIL(), which is a // generic name and clashes with some other libraries. #if !GTEST_DONT_DEFINE_FAIL # define FAIL() GTEST_FAIL() #endif // Generates a success with a generic message. #define GTEST_SUCCEED() GTEST_SUCCESS_("Succeeded") // Define this macro to 1 to omit the definition of SUCCEED(), which // is a generic name and clashes with some other libraries. #if !GTEST_DONT_DEFINE_SUCCEED # define SUCCEED() GTEST_SUCCEED() #endif // Macros for testing exceptions. // // * {ASSERT|EXPECT}_THROW(statement, expected_exception): // Tests that the statement throws the expected exception. // * {ASSERT|EXPECT}_NO_THROW(statement): // Tests that the statement doesn't throw any exception. // * {ASSERT|EXPECT}_ANY_THROW(statement): // Tests that the statement throws an exception. #define EXPECT_THROW(statement, expected_exception) \ GTEST_TEST_THROW_(statement, expected_exception, GTEST_NONFATAL_FAILURE_) #define EXPECT_NO_THROW(statement) \ GTEST_TEST_NO_THROW_(statement, GTEST_NONFATAL_FAILURE_) #define EXPECT_ANY_THROW(statement) \ GTEST_TEST_ANY_THROW_(statement, GTEST_NONFATAL_FAILURE_) #define ASSERT_THROW(statement, expected_exception) \ GTEST_TEST_THROW_(statement, expected_exception, GTEST_FATAL_FAILURE_) #define ASSERT_NO_THROW(statement) \ GTEST_TEST_NO_THROW_(statement, GTEST_FATAL_FAILURE_) #define ASSERT_ANY_THROW(statement) \ GTEST_TEST_ANY_THROW_(statement, GTEST_FATAL_FAILURE_) // Boolean assertions. Condition can be either a Boolean expression or an // AssertionResult. For more information on how to use AssertionResult with // these macros see comments on that class. #define EXPECT_TRUE(condition) \ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ GTEST_NONFATAL_FAILURE_) #define EXPECT_FALSE(condition) \ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ GTEST_NONFATAL_FAILURE_) #define ASSERT_TRUE(condition) \ GTEST_TEST_BOOLEAN_(condition, #condition, false, true, \ GTEST_FATAL_FAILURE_) #define ASSERT_FALSE(condition) \ GTEST_TEST_BOOLEAN_(!(condition), #condition, true, false, \ GTEST_FATAL_FAILURE_) // Macros for testing equalities and inequalities. // // * {ASSERT|EXPECT}_EQ(v1, v2): Tests that v1 == v2 // * {ASSERT|EXPECT}_NE(v1, v2): Tests that v1 != v2 // * {ASSERT|EXPECT}_LT(v1, v2): Tests that v1 < v2 // * {ASSERT|EXPECT}_LE(v1, v2): Tests that v1 <= v2 // * {ASSERT|EXPECT}_GT(v1, v2): Tests that v1 > v2 // * {ASSERT|EXPECT}_GE(v1, v2): Tests that v1 >= v2 // // When they are not, Google Test prints both the tested expressions and // their actual values. The values must be compatible built-in types, // or you will get a compiler error. By "compatible" we mean that the // values can be compared by the respective operator. // // Note: // // 1. It is possible to make a user-defined type work with // {ASSERT|EXPECT}_??(), but that requires overloading the // comparison operators and is thus discouraged by the Google C++ // Usage Guide. Therefore, you are advised to use the // {ASSERT|EXPECT}_TRUE() macro to assert that two objects are // equal. // // 2. The {ASSERT|EXPECT}_??() macros do pointer comparisons on // pointers (in particular, C strings). Therefore, if you use it // with two C strings, you are testing how their locations in memory // are related, not how their content is related. To compare two C // strings by content, use {ASSERT|EXPECT}_STR*(). // // 3. {ASSERT|EXPECT}_EQ(v1, v2) is preferred to // {ASSERT|EXPECT}_TRUE(v1 == v2), as the former tells you // what the actual value is when it fails, and similarly for the // other comparisons. // // 4. Do not depend on the order in which {ASSERT|EXPECT}_??() // evaluate their arguments, which is undefined. // // 5. These macros evaluate their arguments exactly once. // // Examples: // // EXPECT_NE(Foo(), 5); // EXPECT_EQ(a_pointer, NULL); // ASSERT_LT(i, array_size); // ASSERT_GT(records.size(), 0) << "There is no record left."; #define EXPECT_EQ(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal:: \ EqHelper::Compare, \ val1, val2) #define EXPECT_NE(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) #define EXPECT_LE(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) #define EXPECT_LT(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) #define EXPECT_GE(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) #define EXPECT_GT(val1, val2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) #define GTEST_ASSERT_EQ(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal:: \ EqHelper::Compare, \ val1, val2) #define GTEST_ASSERT_NE(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperNE, val1, val2) #define GTEST_ASSERT_LE(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLE, val1, val2) #define GTEST_ASSERT_LT(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperLT, val1, val2) #define GTEST_ASSERT_GE(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGE, val1, val2) #define GTEST_ASSERT_GT(val1, val2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperGT, val1, val2) // Define macro GTEST_DONT_DEFINE_ASSERT_XY to 1 to omit the definition of // ASSERT_XY(), which clashes with some users' own code. #if !GTEST_DONT_DEFINE_ASSERT_EQ # define ASSERT_EQ(val1, val2) GTEST_ASSERT_EQ(val1, val2) #endif #if !GTEST_DONT_DEFINE_ASSERT_NE # define ASSERT_NE(val1, val2) GTEST_ASSERT_NE(val1, val2) #endif #if !GTEST_DONT_DEFINE_ASSERT_LE # define ASSERT_LE(val1, val2) GTEST_ASSERT_LE(val1, val2) #endif #if !GTEST_DONT_DEFINE_ASSERT_LT # define ASSERT_LT(val1, val2) GTEST_ASSERT_LT(val1, val2) #endif #if !GTEST_DONT_DEFINE_ASSERT_GE # define ASSERT_GE(val1, val2) GTEST_ASSERT_GE(val1, val2) #endif #if !GTEST_DONT_DEFINE_ASSERT_GT # define ASSERT_GT(val1, val2) GTEST_ASSERT_GT(val1, val2) #endif // C-string Comparisons. All tests treat NULL and any non-NULL string // as different. Two NULLs are equal. // // * {ASSERT|EXPECT}_STREQ(s1, s2): Tests that s1 == s2 // * {ASSERT|EXPECT}_STRNE(s1, s2): Tests that s1 != s2 // * {ASSERT|EXPECT}_STRCASEEQ(s1, s2): Tests that s1 == s2, ignoring case // * {ASSERT|EXPECT}_STRCASENE(s1, s2): Tests that s1 != s2, ignoring case // // For wide or narrow string objects, you can use the // {ASSERT|EXPECT}_??() macros. // // Don't depend on the order in which the arguments are evaluated, // which is undefined. // // These macros evaluate their arguments exactly once. #define EXPECT_STREQ(s1, s2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2) #define EXPECT_STRNE(s1, s2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) #define EXPECT_STRCASEEQ(s1, s2) \ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2) #define EXPECT_STRCASENE(s1, s2)\ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) #define ASSERT_STREQ(s1, s2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTREQ, s1, s2) #define ASSERT_STRNE(s1, s2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRNE, s1, s2) #define ASSERT_STRCASEEQ(s1, s2) \ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASEEQ, s1, s2) #define ASSERT_STRCASENE(s1, s2)\ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperSTRCASENE, s1, s2) // Macros for comparing floating-point numbers. // // * {ASSERT|EXPECT}_FLOAT_EQ(val1, val2): // Tests that two float values are almost equal. // * {ASSERT|EXPECT}_DOUBLE_EQ(val1, val2): // Tests that two double values are almost equal. // * {ASSERT|EXPECT}_NEAR(v1, v2, abs_error): // Tests that v1 and v2 are within the given distance to each other. // // Google Test uses ULP-based comparison to automatically pick a default // error bound that is appropriate for the operands. See the // FloatingPoint template class in gtest-internal.h if you are // interested in the implementation details. #define EXPECT_FLOAT_EQ(val1, val2)\ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ val1, val2) #define EXPECT_DOUBLE_EQ(val1, val2)\ EXPECT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ val1, val2) #define ASSERT_FLOAT_EQ(val1, val2)\ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ val1, val2) #define ASSERT_DOUBLE_EQ(val1, val2)\ ASSERT_PRED_FORMAT2(::testing::internal::CmpHelperFloatingPointEQ, \ val1, val2) #define EXPECT_NEAR(val1, val2, abs_error)\ EXPECT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ val1, val2, abs_error) #define ASSERT_NEAR(val1, val2, abs_error)\ ASSERT_PRED_FORMAT3(::testing::internal::DoubleNearPredFormat, \ val1, val2, abs_error) // These predicate format functions work on floating-point values, and // can be used in {ASSERT|EXPECT}_PRED_FORMAT2*(), e.g. // // EXPECT_PRED_FORMAT2(testing::DoubleLE, Foo(), 5.0); // Asserts that val1 is less than, or almost equal to, val2. Fails // otherwise. In particular, it fails if either val1 or val2 is NaN. GTEST_API_ AssertionResult FloatLE(const char* expr1, const char* expr2, float val1, float val2); GTEST_API_ AssertionResult DoubleLE(const char* expr1, const char* expr2, double val1, double val2); #if GTEST_OS_WINDOWS // Macros that test for HRESULT failure and success, these are only useful // on Windows, and rely on Windows SDK macros and APIs to compile. // // * {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED}(expr) // // When expr unexpectedly fails or succeeds, Google Test prints the // expected result and the actual result with both a human-readable // string representation of the error, if available, as well as the // hex result code. # define EXPECT_HRESULT_SUCCEEDED(expr) \ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) # define ASSERT_HRESULT_SUCCEEDED(expr) \ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTSuccess, (expr)) # define EXPECT_HRESULT_FAILED(expr) \ EXPECT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) # define ASSERT_HRESULT_FAILED(expr) \ ASSERT_PRED_FORMAT1(::testing::internal::IsHRESULTFailure, (expr)) #endif // GTEST_OS_WINDOWS // Macros that execute statement and check that it doesn't generate new fatal // failures in the current thread. // // * {ASSERT|EXPECT}_NO_FATAL_FAILURE(statement); // // Examples: // // EXPECT_NO_FATAL_FAILURE(Process()); // ASSERT_NO_FATAL_FAILURE(Process()) << "Process() failed"; // #define ASSERT_NO_FATAL_FAILURE(statement) \ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_FATAL_FAILURE_) #define EXPECT_NO_FATAL_FAILURE(statement) \ GTEST_TEST_NO_FATAL_FAILURE_(statement, GTEST_NONFATAL_FAILURE_) // Causes a trace (including the given source file path and line number, // and the given message) to be included in every test failure message generated // by code in the scope of the lifetime of an instance of this class. The effect // is undone with the destruction of the instance. // // The message argument can be anything streamable to std::ostream. // // Example: // testing::ScopedTrace trace("file.cc", 123, "message"); // class GTEST_API_ ScopedTrace { public: // The c'tor pushes the given source file location and message onto // a trace stack maintained by Google Test. // Template version. Uses Message() to convert the values into strings. // Slow, but flexible. template ScopedTrace(const char* file, int line, const T& message) { PushTrace(file, line, (Message() << message).GetString()); } // Optimize for some known types. ScopedTrace(const char* file, int line, const char* message) { PushTrace(file, line, message ? message : "(null)"); } #if GTEST_HAS_GLOBAL_STRING ScopedTrace(const char* file, int line, const ::string& message) { PushTrace(file, line, message); } #endif ScopedTrace(const char* file, int line, const std::string& message) { PushTrace(file, line, message); } // The d'tor pops the info pushed by the c'tor. // // Note that the d'tor is not virtual in order to be efficient. // Don't inherit from ScopedTrace! ~ScopedTrace(); private: void PushTrace(const char* file, int line, std::string message); GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedTrace); } GTEST_ATTRIBUTE_UNUSED_; // A ScopedTrace object does its job in its // c'tor and d'tor. Therefore it doesn't // need to be used otherwise. // Causes a trace (including the source file path, the current line // number, and the given message) to be included in every test failure // message generated by code in the current scope. The effect is // undone when the control leaves the current scope. // // The message argument can be anything streamable to std::ostream. // // In the implementation, we include the current line number as part // of the dummy variable name, thus allowing multiple SCOPED_TRACE()s // to appear in the same block - as long as they are on different // lines. // // Assuming that each thread maintains its own stack of traces. // Therefore, a SCOPED_TRACE() would (correctly) only affect the // assertions in its own thread. #define SCOPED_TRACE(message) \ ::testing::ScopedTrace GTEST_CONCAT_TOKEN_(gtest_trace_, __LINE__)(\ __FILE__, __LINE__, (message)) // Compile-time assertion for type equality. // StaticAssertTypeEq() compiles iff type1 and type2 are // the same type. The value it returns is not interesting. // // Instead of making StaticAssertTypeEq a class template, we make it a // function template that invokes a helper class template. This // prevents a user from misusing StaticAssertTypeEq by // defining objects of that type. // // CAVEAT: // // When used inside a method of a class template, // StaticAssertTypeEq() is effective ONLY IF the method is // instantiated. For example, given: // // template class Foo { // public: // void Bar() { testing::StaticAssertTypeEq(); } // }; // // the code: // // void Test1() { Foo foo; } // // will NOT generate a compiler error, as Foo::Bar() is never // actually instantiated. Instead, you need: // // void Test2() { Foo foo; foo.Bar(); } // // to cause a compiler error. template bool StaticAssertTypeEq() { (void)internal::StaticAssertTypeEqHelper(); return true; } // Defines a test. // // The first parameter is the name of the test case, and the second // parameter is the name of the test within the test case. // // The convention is to end the test case name with "Test". For // example, a test case for the Foo class can be named FooTest. // // Test code should appear between braces after an invocation of // this macro. Example: // // TEST(FooTest, InitializesCorrectly) { // Foo foo; // EXPECT_TRUE(foo.StatusIsOK()); // } // Note that we call GetTestTypeId() instead of GetTypeId< // ::testing::Test>() here to get the type ID of testing::Test. This // is to work around a suspected linker bug when using Google Test as // a framework on Mac OS X. The bug causes GetTypeId< // ::testing::Test>() to return different values depending on whether // the call is from the Google Test framework itself or from user test // code. GetTestTypeId() is guaranteed to always return the same // value, as it always calls GetTypeId<>() from the Google Test // framework. #define GTEST_TEST(test_case_name, test_name)\ GTEST_TEST_(test_case_name, test_name, \ ::testing::Test, ::testing::internal::GetTestTypeId()) // Define this macro to 1 to omit the definition of TEST(), which // is a generic name and clashes with some other libraries. #if !GTEST_DONT_DEFINE_TEST # define TEST(test_case_name, test_name) GTEST_TEST(test_case_name, test_name) #endif // Defines a test that uses a test fixture. // // The first parameter is the name of the test fixture class, which // also doubles as the test case name. The second parameter is the // name of the test within the test case. // // A test fixture class must be declared earlier. The user should put // the test code between braces after using this macro. Example: // // class FooTest : public testing::Test { // protected: // virtual void SetUp() { b_.AddElement(3); } // // Foo a_; // Foo b_; // }; // // TEST_F(FooTest, InitializesCorrectly) { // EXPECT_TRUE(a_.StatusIsOK()); // } // // TEST_F(FooTest, ReturnsElementCountCorrectly) { // EXPECT_EQ(a_.size(), 0); // EXPECT_EQ(b_.size(), 1); // } #define TEST_F(test_fixture, test_name)\ GTEST_TEST_(test_fixture, test_name, test_fixture, \ ::testing::internal::GetTypeId()) // Returns a path to temporary directory. // Tries to determine an appropriate directory for the platform. GTEST_API_ std::string TempDir(); #ifdef _MSC_VER # pragma warning(pop) #endif } // namespace testing // Use this function in main() to run all tests. It returns 0 if all // tests are successful, or 1 otherwise. // // RUN_ALL_TESTS() should be invoked after the command line has been // parsed by InitGoogleTest(). // // This function was formerly a macro; thus, it is in the global // namespace and has an all-caps name. int RUN_ALL_TESTS() GTEST_MUST_USE_RESULT_; inline int RUN_ALL_TESTS() { return ::testing::UnitTest::GetInstance()->Run(); } GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GTEST_INCLUDE_GTEST_GTEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest_pred_impl.h000066400000000000000000000351401357355204000264340ustar00rootroot00000000000000// Copyright 2006, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // This file is AUTOMATICALLY GENERATED on 01/02/2018 by command // 'gen_gtest_pred_impl.py 5'. DO NOT EDIT BY HAND! // // Implements a family of generic predicate assertion macros. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ #define GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ #include "gtest/gtest.h" namespace testing { // This header implements a family of generic predicate assertion // macros: // // ASSERT_PRED_FORMAT1(pred_format, v1) // ASSERT_PRED_FORMAT2(pred_format, v1, v2) // ... // // where pred_format is a function or functor that takes n (in the // case of ASSERT_PRED_FORMATn) values and their source expression // text, and returns a testing::AssertionResult. See the definition // of ASSERT_EQ in gtest.h for an example. // // If you don't care about formatting, you can use the more // restrictive version: // // ASSERT_PRED1(pred, v1) // ASSERT_PRED2(pred, v1, v2) // ... // // where pred is an n-ary function or functor that returns bool, // and the values v1, v2, ..., must support the << operator for // streaming to std::ostream. // // We also define the EXPECT_* variations. // // For now we only support predicates whose arity is at most 5. // GTEST_ASSERT_ is the basic statement to which all of the assertions // in this file reduce. Don't use this in your code. #define GTEST_ASSERT_(expression, on_failure) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar = (expression)) \ ; \ else \ on_failure(gtest_ar.failure_message()) // Helper function for implementing {EXPECT|ASSERT}_PRED1. Don't use // this in your code. template AssertionResult AssertPred1Helper(const char* pred_text, const char* e1, Pred pred, const T1& v1) { if (pred(v1)) return AssertionSuccess(); return AssertionFailure() << pred_text << "(" << e1 << ") evaluates to false, where" << "\n" << e1 << " evaluates to " << v1; } // Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT1. // Don't use this in your code. #define GTEST_PRED_FORMAT1_(pred_format, v1, on_failure)\ GTEST_ASSERT_(pred_format(#v1, v1), \ on_failure) // Internal macro for implementing {EXPECT|ASSERT}_PRED1. Don't use // this in your code. #define GTEST_PRED1_(pred, v1, on_failure)\ GTEST_ASSERT_(::testing::AssertPred1Helper(#pred, \ #v1, \ pred, \ v1), on_failure) // Unary predicate assertion macros. #define EXPECT_PRED_FORMAT1(pred_format, v1) \ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_NONFATAL_FAILURE_) #define EXPECT_PRED1(pred, v1) \ GTEST_PRED1_(pred, v1, GTEST_NONFATAL_FAILURE_) #define ASSERT_PRED_FORMAT1(pred_format, v1) \ GTEST_PRED_FORMAT1_(pred_format, v1, GTEST_FATAL_FAILURE_) #define ASSERT_PRED1(pred, v1) \ GTEST_PRED1_(pred, v1, GTEST_FATAL_FAILURE_) // Helper function for implementing {EXPECT|ASSERT}_PRED2. Don't use // this in your code. template AssertionResult AssertPred2Helper(const char* pred_text, const char* e1, const char* e2, Pred pred, const T1& v1, const T2& v2) { if (pred(v1, v2)) return AssertionSuccess(); return AssertionFailure() << pred_text << "(" << e1 << ", " << e2 << ") evaluates to false, where" << "\n" << e1 << " evaluates to " << v1 << "\n" << e2 << " evaluates to " << v2; } // Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT2. // Don't use this in your code. #define GTEST_PRED_FORMAT2_(pred_format, v1, v2, on_failure)\ GTEST_ASSERT_(pred_format(#v1, #v2, v1, v2), \ on_failure) // Internal macro for implementing {EXPECT|ASSERT}_PRED2. Don't use // this in your code. #define GTEST_PRED2_(pred, v1, v2, on_failure)\ GTEST_ASSERT_(::testing::AssertPred2Helper(#pred, \ #v1, \ #v2, \ pred, \ v1, \ v2), on_failure) // Binary predicate assertion macros. #define EXPECT_PRED_FORMAT2(pred_format, v1, v2) \ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_NONFATAL_FAILURE_) #define EXPECT_PRED2(pred, v1, v2) \ GTEST_PRED2_(pred, v1, v2, GTEST_NONFATAL_FAILURE_) #define ASSERT_PRED_FORMAT2(pred_format, v1, v2) \ GTEST_PRED_FORMAT2_(pred_format, v1, v2, GTEST_FATAL_FAILURE_) #define ASSERT_PRED2(pred, v1, v2) \ GTEST_PRED2_(pred, v1, v2, GTEST_FATAL_FAILURE_) // Helper function for implementing {EXPECT|ASSERT}_PRED3. Don't use // this in your code. template AssertionResult AssertPred3Helper(const char* pred_text, const char* e1, const char* e2, const char* e3, Pred pred, const T1& v1, const T2& v2, const T3& v3) { if (pred(v1, v2, v3)) return AssertionSuccess(); return AssertionFailure() << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ") evaluates to false, where" << "\n" << e1 << " evaluates to " << v1 << "\n" << e2 << " evaluates to " << v2 << "\n" << e3 << " evaluates to " << v3; } // Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT3. // Don't use this in your code. #define GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, on_failure)\ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, v1, v2, v3), \ on_failure) // Internal macro for implementing {EXPECT|ASSERT}_PRED3. Don't use // this in your code. #define GTEST_PRED3_(pred, v1, v2, v3, on_failure)\ GTEST_ASSERT_(::testing::AssertPred3Helper(#pred, \ #v1, \ #v2, \ #v3, \ pred, \ v1, \ v2, \ v3), on_failure) // Ternary predicate assertion macros. #define EXPECT_PRED_FORMAT3(pred_format, v1, v2, v3) \ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_NONFATAL_FAILURE_) #define EXPECT_PRED3(pred, v1, v2, v3) \ GTEST_PRED3_(pred, v1, v2, v3, GTEST_NONFATAL_FAILURE_) #define ASSERT_PRED_FORMAT3(pred_format, v1, v2, v3) \ GTEST_PRED_FORMAT3_(pred_format, v1, v2, v3, GTEST_FATAL_FAILURE_) #define ASSERT_PRED3(pred, v1, v2, v3) \ GTEST_PRED3_(pred, v1, v2, v3, GTEST_FATAL_FAILURE_) // Helper function for implementing {EXPECT|ASSERT}_PRED4. Don't use // this in your code. template AssertionResult AssertPred4Helper(const char* pred_text, const char* e1, const char* e2, const char* e3, const char* e4, Pred pred, const T1& v1, const T2& v2, const T3& v3, const T4& v4) { if (pred(v1, v2, v3, v4)) return AssertionSuccess(); return AssertionFailure() << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4 << ") evaluates to false, where" << "\n" << e1 << " evaluates to " << v1 << "\n" << e2 << " evaluates to " << v2 << "\n" << e3 << " evaluates to " << v3 << "\n" << e4 << " evaluates to " << v4; } // Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT4. // Don't use this in your code. #define GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, on_failure)\ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, v1, v2, v3, v4), \ on_failure) // Internal macro for implementing {EXPECT|ASSERT}_PRED4. Don't use // this in your code. #define GTEST_PRED4_(pred, v1, v2, v3, v4, on_failure)\ GTEST_ASSERT_(::testing::AssertPred4Helper(#pred, \ #v1, \ #v2, \ #v3, \ #v4, \ pred, \ v1, \ v2, \ v3, \ v4), on_failure) // 4-ary predicate assertion macros. #define EXPECT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) #define EXPECT_PRED4(pred, v1, v2, v3, v4) \ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_NONFATAL_FAILURE_) #define ASSERT_PRED_FORMAT4(pred_format, v1, v2, v3, v4) \ GTEST_PRED_FORMAT4_(pred_format, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) #define ASSERT_PRED4(pred, v1, v2, v3, v4) \ GTEST_PRED4_(pred, v1, v2, v3, v4, GTEST_FATAL_FAILURE_) // Helper function for implementing {EXPECT|ASSERT}_PRED5. Don't use // this in your code. template AssertionResult AssertPred5Helper(const char* pred_text, const char* e1, const char* e2, const char* e3, const char* e4, const char* e5, Pred pred, const T1& v1, const T2& v2, const T3& v3, const T4& v4, const T5& v5) { if (pred(v1, v2, v3, v4, v5)) return AssertionSuccess(); return AssertionFailure() << pred_text << "(" << e1 << ", " << e2 << ", " << e3 << ", " << e4 << ", " << e5 << ") evaluates to false, where" << "\n" << e1 << " evaluates to " << v1 << "\n" << e2 << " evaluates to " << v2 << "\n" << e3 << " evaluates to " << v3 << "\n" << e4 << " evaluates to " << v4 << "\n" << e5 << " evaluates to " << v5; } // Internal macro for implementing {EXPECT|ASSERT}_PRED_FORMAT5. // Don't use this in your code. #define GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, on_failure)\ GTEST_ASSERT_(pred_format(#v1, #v2, #v3, #v4, #v5, v1, v2, v3, v4, v5), \ on_failure) // Internal macro for implementing {EXPECT|ASSERT}_PRED5. Don't use // this in your code. #define GTEST_PRED5_(pred, v1, v2, v3, v4, v5, on_failure)\ GTEST_ASSERT_(::testing::AssertPred5Helper(#pred, \ #v1, \ #v2, \ #v3, \ #v4, \ #v5, \ pred, \ v1, \ v2, \ v3, \ v4, \ v5), on_failure) // 5-ary predicate assertion macros. #define EXPECT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) #define EXPECT_PRED5(pred, v1, v2, v3, v4, v5) \ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_NONFATAL_FAILURE_) #define ASSERT_PRED_FORMAT5(pred_format, v1, v2, v3, v4, v5) \ GTEST_PRED_FORMAT5_(pred_format, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) #define ASSERT_PRED5(pred, v1, v2, v3, v4, v5) \ GTEST_PRED5_(pred, v1, v2, v3, v4, v5, GTEST_FATAL_FAILURE_) } // namespace testing #endif // GTEST_INCLUDE_GTEST_GTEST_PRED_IMPL_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/gtest_prod.h000066400000000000000000000047271357355204000254340ustar00rootroot00000000000000// Copyright 2006, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Google C++ Testing and Mocking Framework definitions useful in production code. // GOOGLETEST_CM0003 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_GTEST_PROD_H_ #define GTEST_INCLUDE_GTEST_GTEST_PROD_H_ // When you need to test the private or protected members of a class, // use the FRIEND_TEST macro to declare your tests as friends of the // class. For example: // // class MyClass { // private: // void PrivateMethod(); // FRIEND_TEST(MyClassTest, PrivateMethodWorks); // }; // // class MyClassTest : public testing::Test { // // ... // }; // // TEST_F(MyClassTest, PrivateMethodWorks) { // // Can call MyClass::PrivateMethod() here. // } // // Note: The test class must be in the same namespace as the class being tested. // For example, putting MyClassTest in an anonymous namespace will not work. #define FRIEND_TEST(test_case_name, test_name)\ friend class test_case_name##_##test_name##_Test #endif // GTEST_INCLUDE_GTEST_GTEST_PROD_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/000077500000000000000000000000001357355204000247135ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/custom/000077500000000000000000000000001357355204000262255ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/custom/README.md000066400000000000000000000032221357355204000275030ustar00rootroot00000000000000# Customization Points The custom directory is an injection point for custom user configurations. ## Header `gtest.h` ### The following macros can be defined: * `GTEST_OS_STACK_TRACE_GETTER_` - The name of an implementation of `OsStackTraceGetterInterface`. * `GTEST_CUSTOM_TEMPDIR_FUNCTION_` - An override for `testing::TempDir()`. See `testing::TempDir` for semantics and signature. ## Header `gtest-port.h` The following macros can be defined: ### Flag related macros: * `GTEST_FLAG(flag_name)` * `GTEST_USE_OWN_FLAGFILE_FLAG_` - Define to 0 when the system provides its own flagfile flag parsing. * `GTEST_DECLARE_bool_(name)` * `GTEST_DECLARE_int32_(name)` * `GTEST_DECLARE_string_(name)` * `GTEST_DEFINE_bool_(name, default_val, doc)` * `GTEST_DEFINE_int32_(name, default_val, doc)` * `GTEST_DEFINE_string_(name, default_val, doc)` ### Logging: * `GTEST_LOG_(severity)` * `GTEST_CHECK_(condition)` * Functions `LogToStderr()` and `FlushInfoLog()` have to be provided too. ### Threading: * `GTEST_HAS_NOTIFICATION_` - Enabled if Notification is already provided. * `GTEST_HAS_MUTEX_AND_THREAD_LOCAL_` - Enabled if `Mutex` and `ThreadLocal` are already provided. Must also provide `GTEST_DECLARE_STATIC_MUTEX_(mutex)` and `GTEST_DEFINE_STATIC_MUTEX_(mutex)` * `GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks)` * `GTEST_LOCK_EXCLUDED_(locks)` ### Underlying library support features * `GTEST_HAS_CXXABI_H_` ### Exporting API symbols: * `GTEST_API_` - Specifier for exported symbols. ## Header `gtest-printers.h` * See documentation at `gtest/gtest-printers.h` for details on how to define a custom printer. libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/custom/gtest-port.h000066400000000000000000000035021357355204000305060ustar00rootroot00000000000000// Copyright 2015, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** #ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_ #endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PORT_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/custom/gtest-printers.h000066400000000000000000000040371357355204000313740ustar00rootroot00000000000000// Copyright 2015, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // This file provides an injection point for custom printers in a local // installation of gTest. // It will be included from gtest-printers.h and the overrides in this file // will be visible to everyone. // // Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** #ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_ #endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_PRINTERS_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/custom/gtest.h000066400000000000000000000034631357355204000275320ustar00rootroot00000000000000// Copyright 2015, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Injection point for custom user configurations. See README for details // // ** Custom implementation starts here ** #ifndef GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_ #endif // GTEST_INCLUDE_GTEST_INTERNAL_CUSTOM_GTEST_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-death-test-internal.h000066400000000000000000000261721357355204000320740ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines internal utilities needed for implementing // death tests. They are subject to change without notice. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ #include "gtest/internal/gtest-internal.h" #include namespace testing { namespace internal { GTEST_DECLARE_string_(internal_run_death_test); // Names of the flags (needed for parsing Google Test flags). const char kDeathTestStyleFlag[] = "death_test_style"; const char kDeathTestUseFork[] = "death_test_use_fork"; const char kInternalRunDeathTestFlag[] = "internal_run_death_test"; #if GTEST_HAS_DEATH_TEST GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) // DeathTest is a class that hides much of the complexity of the // GTEST_DEATH_TEST_ macro. It is abstract; its static Create method // returns a concrete class that depends on the prevailing death test // style, as defined by the --gtest_death_test_style and/or // --gtest_internal_run_death_test flags. // In describing the results of death tests, these terms are used with // the corresponding definitions: // // exit status: The integer exit information in the format specified // by wait(2) // exit code: The integer code passed to exit(3), _exit(2), or // returned from main() class GTEST_API_ DeathTest { public: // Create returns false if there was an error determining the // appropriate action to take for the current death test; for example, // if the gtest_death_test_style flag is set to an invalid value. // The LastMessage method will return a more detailed message in that // case. Otherwise, the DeathTest pointer pointed to by the "test" // argument is set. If the death test should be skipped, the pointer // is set to NULL; otherwise, it is set to the address of a new concrete // DeathTest object that controls the execution of the current test. static bool Create(const char* statement, const RE* regex, const char* file, int line, DeathTest** test); DeathTest(); virtual ~DeathTest() { } // A helper class that aborts a death test when it's deleted. class ReturnSentinel { public: explicit ReturnSentinel(DeathTest* test) : test_(test) { } ~ReturnSentinel() { test_->Abort(TEST_ENCOUNTERED_RETURN_STATEMENT); } private: DeathTest* const test_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ReturnSentinel); } GTEST_ATTRIBUTE_UNUSED_; // An enumeration of possible roles that may be taken when a death // test is encountered. EXECUTE means that the death test logic should // be executed immediately. OVERSEE means that the program should prepare // the appropriate environment for a child process to execute the death // test, then wait for it to complete. enum TestRole { OVERSEE_TEST, EXECUTE_TEST }; // An enumeration of the three reasons that a test might be aborted. enum AbortReason { TEST_ENCOUNTERED_RETURN_STATEMENT, TEST_THREW_EXCEPTION, TEST_DID_NOT_DIE }; // Assumes one of the above roles. virtual TestRole AssumeRole() = 0; // Waits for the death test to finish and returns its status. virtual int Wait() = 0; // Returns true if the death test passed; that is, the test process // exited during the test, its exit status matches a user-supplied // predicate, and its stderr output matches a user-supplied regular // expression. // The user-supplied predicate may be a macro expression rather // than a function pointer or functor, or else Wait and Passed could // be combined. virtual bool Passed(bool exit_status_ok) = 0; // Signals that the death test did not die as expected. virtual void Abort(AbortReason reason) = 0; // Returns a human-readable outcome message regarding the outcome of // the last death test. static const char* LastMessage(); static void set_last_death_test_message(const std::string& message); private: // A string containing a description of the outcome of the last death test. static std::string last_death_test_message_; GTEST_DISALLOW_COPY_AND_ASSIGN_(DeathTest); }; GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 // Factory interface for death tests. May be mocked out for testing. class DeathTestFactory { public: virtual ~DeathTestFactory() { } virtual bool Create(const char* statement, const RE* regex, const char* file, int line, DeathTest** test) = 0; }; // A concrete DeathTestFactory implementation for normal use. class DefaultDeathTestFactory : public DeathTestFactory { public: virtual bool Create(const char* statement, const RE* regex, const char* file, int line, DeathTest** test); }; // Returns true if exit_status describes a process that was terminated // by a signal, or exited normally with a nonzero exit code. GTEST_API_ bool ExitedUnsuccessfully(int exit_status); // Traps C++ exceptions escaping statement and reports them as test // failures. Note that trapping SEH exceptions is not implemented here. # if GTEST_HAS_EXCEPTIONS # define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ try { \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ } catch (const ::std::exception& gtest_exception) { \ fprintf(\ stderr, \ "\n%s: Caught std::exception-derived exception escaping the " \ "death test statement. Exception message: %s\n", \ ::testing::internal::FormatFileLocation(__FILE__, __LINE__).c_str(), \ gtest_exception.what()); \ fflush(stderr); \ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ } catch (...) { \ death_test->Abort(::testing::internal::DeathTest::TEST_THREW_EXCEPTION); \ } # else # define GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, death_test) \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) # endif // This macro is for implementing ASSERT_DEATH*, EXPECT_DEATH*, // ASSERT_EXIT*, and EXPECT_EXIT*. # define GTEST_DEATH_TEST_(statement, predicate, regex, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ const ::testing::internal::RE& gtest_regex = (regex); \ ::testing::internal::DeathTest* gtest_dt; \ if (!::testing::internal::DeathTest::Create(#statement, >est_regex, \ __FILE__, __LINE__, >est_dt)) { \ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ } \ if (gtest_dt != NULL) { \ ::testing::internal::scoped_ptr< ::testing::internal::DeathTest> \ gtest_dt_ptr(gtest_dt); \ switch (gtest_dt->AssumeRole()) { \ case ::testing::internal::DeathTest::OVERSEE_TEST: \ if (!gtest_dt->Passed(predicate(gtest_dt->Wait()))) { \ goto GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__); \ } \ break; \ case ::testing::internal::DeathTest::EXECUTE_TEST: { \ ::testing::internal::DeathTest::ReturnSentinel \ gtest_sentinel(gtest_dt); \ GTEST_EXECUTE_DEATH_TEST_STATEMENT_(statement, gtest_dt); \ gtest_dt->Abort(::testing::internal::DeathTest::TEST_DID_NOT_DIE); \ break; \ } \ default: \ break; \ } \ } \ } else \ GTEST_CONCAT_TOKEN_(gtest_label_, __LINE__): \ fail(::testing::internal::DeathTest::LastMessage()) // The symbol "fail" here expands to something into which a message // can be streamed. // This macro is for implementing ASSERT/EXPECT_DEBUG_DEATH when compiled in // NDEBUG mode. In this case we need the statements to be executed and the macro // must accept a streamed message even though the message is never printed. // The regex object is not evaluated, but it is used to prevent "unused" // warnings and to avoid an expression that doesn't compile in debug mode. #define GTEST_EXECUTE_STATEMENT_(statement, regex) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ } else if (!::testing::internal::AlwaysTrue()) { \ const ::testing::internal::RE& gtest_regex = (regex); \ static_cast(gtest_regex); \ } else \ ::testing::Message() // A class representing the parsed contents of the // --gtest_internal_run_death_test flag, as it existed when // RUN_ALL_TESTS was called. class InternalRunDeathTestFlag { public: InternalRunDeathTestFlag(const std::string& a_file, int a_line, int an_index, int a_write_fd) : file_(a_file), line_(a_line), index_(an_index), write_fd_(a_write_fd) {} ~InternalRunDeathTestFlag() { if (write_fd_ >= 0) posix::Close(write_fd_); } const std::string& file() const { return file_; } int line() const { return line_; } int index() const { return index_; } int write_fd() const { return write_fd_; } private: std::string file_; int line_; int index_; int write_fd_; GTEST_DISALLOW_COPY_AND_ASSIGN_(InternalRunDeathTestFlag); }; // Returns a newly created InternalRunDeathTestFlag object with fields // initialized from the GTEST_FLAG(internal_run_death_test) flag if // the flag is specified; otherwise returns NULL. InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag(); #endif // GTEST_HAS_DEATH_TEST } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_DEATH_TEST_INTERNAL_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-filepath.h000066400000000000000000000230311357355204000300030ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Google Test filepath utilities // // This header file declares classes and functions used internally by // Google Test. They are subject to change without notice. // // This file is #included in gtest/internal/gtest-internal.h. // Do not include this header file separately! // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ #include "gtest/internal/gtest-string.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) namespace testing { namespace internal { // FilePath - a class for file and directory pathname manipulation which // handles platform-specific conventions (like the pathname separator). // Used for helper functions for naming files in a directory for xml output. // Except for Set methods, all methods are const or static, which provides an // "immutable value object" -- useful for peace of mind. // A FilePath with a value ending in a path separator ("like/this/") represents // a directory, otherwise it is assumed to represent a file. In either case, // it may or may not represent an actual file or directory in the file system. // Names are NOT checked for syntax correctness -- no checking for illegal // characters, malformed paths, etc. class GTEST_API_ FilePath { public: FilePath() : pathname_("") { } FilePath(const FilePath& rhs) : pathname_(rhs.pathname_) { } explicit FilePath(const std::string& pathname) : pathname_(pathname) { Normalize(); } FilePath& operator=(const FilePath& rhs) { Set(rhs); return *this; } void Set(const FilePath& rhs) { pathname_ = rhs.pathname_; } const std::string& string() const { return pathname_; } const char* c_str() const { return pathname_.c_str(); } // Returns the current working directory, or "" if unsuccessful. static FilePath GetCurrentDir(); // Given directory = "dir", base_name = "test", number = 0, // extension = "xml", returns "dir/test.xml". If number is greater // than zero (e.g., 12), returns "dir/test_12.xml". // On Windows platform, uses \ as the separator rather than /. static FilePath MakeFileName(const FilePath& directory, const FilePath& base_name, int number, const char* extension); // Given directory = "dir", relative_path = "test.xml", // returns "dir/test.xml". // On Windows, uses \ as the separator rather than /. static FilePath ConcatPaths(const FilePath& directory, const FilePath& relative_path); // Returns a pathname for a file that does not currently exist. The pathname // will be directory/base_name.extension or // directory/base_name_.extension if directory/base_name.extension // already exists. The number will be incremented until a pathname is found // that does not already exist. // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. // There could be a race condition if two or more processes are calling this // function at the same time -- they could both pick the same filename. static FilePath GenerateUniqueFileName(const FilePath& directory, const FilePath& base_name, const char* extension); // Returns true iff the path is "". bool IsEmpty() const { return pathname_.empty(); } // If input name has a trailing separator character, removes it and returns // the name, otherwise return the name string unmodified. // On Windows platform, uses \ as the separator, other platforms use /. FilePath RemoveTrailingPathSeparator() const; // Returns a copy of the FilePath with the directory part removed. // Example: FilePath("path/to/file").RemoveDirectoryName() returns // FilePath("file"). If there is no directory part ("just_a_file"), it returns // the FilePath unmodified. If there is no file part ("just_a_dir/") it // returns an empty FilePath (""). // On Windows platform, '\' is the path separator, otherwise it is '/'. FilePath RemoveDirectoryName() const; // RemoveFileName returns the directory path with the filename removed. // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". // If the FilePath is "a_file" or "/a_file", RemoveFileName returns // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does // not have a file, like "just/a/dir/", it returns the FilePath unmodified. // On Windows platform, '\' is the path separator, otherwise it is '/'. FilePath RemoveFileName() const; // Returns a copy of the FilePath with the case-insensitive extension removed. // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns // FilePath("dir/file"). If a case-insensitive extension is not // found, returns a copy of the original FilePath. FilePath RemoveExtension(const char* extension) const; // Creates directories so that path exists. Returns true if successful or if // the directories already exist; returns false if unable to create // directories for any reason. Will also return false if the FilePath does // not represent a directory (that is, it doesn't end with a path separator). bool CreateDirectoriesRecursively() const; // Create the directory so that path exists. Returns true if successful or // if the directory already exists; returns false if unable to create the // directory for any reason, including if the parent directory does not // exist. Not named "CreateDirectory" because that's a macro on Windows. bool CreateFolder() const; // Returns true if FilePath describes something in the file-system, // either a file, directory, or whatever, and that something exists. bool FileOrDirectoryExists() const; // Returns true if pathname describes a directory in the file-system // that exists. bool DirectoryExists() const; // Returns true if FilePath ends with a path separator, which indicates that // it is intended to represent a directory. Returns false otherwise. // This does NOT check that a directory (or file) actually exists. bool IsDirectory() const; // Returns true if pathname describes a root directory. (Windows has one // root directory per disk drive.) bool IsRootDirectory() const; // Returns true if pathname describes an absolute path. bool IsAbsolutePath() const; private: // Replaces multiple consecutive separators with a single separator. // For example, "bar///foo" becomes "bar/foo". Does not eliminate other // redundancies that might be in a pathname involving "." or "..". // // A pathname with multiple consecutive separators may occur either through // user error or as a result of some scripts or APIs that generate a pathname // with a trailing separator. On other platforms the same API or script // may NOT generate a pathname with a trailing "/". Then elsewhere that // pathname may have another "/" and pathname components added to it, // without checking for the separator already being there. // The script language and operating system may allow paths like "foo//bar" // but some of the functions in FilePath will not handle that correctly. In // particular, RemoveTrailingPathSeparator() only removes one separator, and // it is called in CreateDirectoriesRecursively() assuming that it will change // a pathname from directory syntax (trailing separator) to filename syntax. // // On Windows this method also replaces the alternate path separator '/' with // the primary path separator '\\', so that for example "bar\\/\\foo" becomes // "bar\\foo". void Normalize(); // Returns a pointer to the last occurence of a valid path separator in // the FilePath. On Windows, for example, both '/' and '\' are valid path // separators. Returns NULL if no path separator was found. const char* FindLastPathSeparator() const; std::string pathname_; }; // class FilePath } // namespace internal } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_FILEPATH_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-internal.h000066400000000000000000001433761357355204000300420ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file declares functions and macros used internally by // Google Test. They are subject to change without notice. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ #include "gtest/internal/gtest-port.h" #if GTEST_OS_LINUX # include # include # include # include #endif // GTEST_OS_LINUX #if GTEST_HAS_EXCEPTIONS # include #endif #include #include #include #include #include #include #include #include #include #include "gtest/gtest-message.h" #include "gtest/internal/gtest-filepath.h" #include "gtest/internal/gtest-string.h" #include "gtest/internal/gtest-type-util.h" // Due to C++ preprocessor weirdness, we need double indirection to // concatenate two tokens when one of them is __LINE__. Writing // // foo ## __LINE__ // // will result in the token foo__LINE__, instead of foo followed by // the current line number. For more details, see // http://www.parashift.com/c++-faq-lite/misc-technical-issues.html#faq-39.6 #define GTEST_CONCAT_TOKEN_(foo, bar) GTEST_CONCAT_TOKEN_IMPL_(foo, bar) #define GTEST_CONCAT_TOKEN_IMPL_(foo, bar) foo ## bar // Stringifies its argument. #define GTEST_STRINGIFY_(name) #name class ProtocolMessage; namespace proto2 { class Message; } namespace testing { // Forward declarations. class AssertionResult; // Result of an assertion. class Message; // Represents a failure message. class Test; // Represents a test. class TestInfo; // Information about a test. class TestPartResult; // Result of a test part. class UnitTest; // A collection of test cases. template ::std::string PrintToString(const T& value); namespace internal { struct TraceInfo; // Information about a trace point. class TestInfoImpl; // Opaque implementation of TestInfo class UnitTestImpl; // Opaque implementation of UnitTest // The text used in failure messages to indicate the start of the // stack trace. GTEST_API_ extern const char kStackTraceMarker[]; // Two overloaded helpers for checking at compile time whether an // expression is a null pointer literal (i.e. NULL or any 0-valued // compile-time integral constant). Their return values have // different sizes, so we can use sizeof() to test which version is // picked by the compiler. These helpers have no implementations, as // we only need their signatures. // // Given IsNullLiteralHelper(x), the compiler will pick the first // version if x can be implicitly converted to Secret*, and pick the // second version otherwise. Since Secret is a secret and incomplete // type, the only expression a user can write that has type Secret* is // a null pointer literal. Therefore, we know that x is a null // pointer literal if and only if the first version is picked by the // compiler. char IsNullLiteralHelper(Secret* p); char (&IsNullLiteralHelper(...))[2]; // NOLINT // A compile-time bool constant that is true if and only if x is a // null pointer literal (i.e. NULL or any 0-valued compile-time // integral constant). #ifdef GTEST_ELLIPSIS_NEEDS_POD_ // We lose support for NULL detection where the compiler doesn't like // passing non-POD classes through ellipsis (...). # define GTEST_IS_NULL_LITERAL_(x) false #else # define GTEST_IS_NULL_LITERAL_(x) \ (sizeof(::testing::internal::IsNullLiteralHelper(x)) == 1) #endif // GTEST_ELLIPSIS_NEEDS_POD_ // Appends the user-supplied message to the Google-Test-generated message. GTEST_API_ std::string AppendUserMessage( const std::string& gtest_msg, const Message& user_msg); #if GTEST_HAS_EXCEPTIONS GTEST_DISABLE_MSC_WARNINGS_PUSH_(4275 \ /* an exported class was derived from a class that was not exported */) // This exception is thrown by (and only by) a failed Google Test // assertion when GTEST_FLAG(throw_on_failure) is true (if exceptions // are enabled). We derive it from std::runtime_error, which is for // errors presumably detectable only at run time. Since // std::runtime_error inherits from std::exception, many testing // frameworks know how to extract and print the message inside it. class GTEST_API_ GoogleTestFailureException : public ::std::runtime_error { public: explicit GoogleTestFailureException(const TestPartResult& failure); }; GTEST_DISABLE_MSC_WARNINGS_POP_() // 4275 #endif // GTEST_HAS_EXCEPTIONS namespace edit_distance { // Returns the optimal edits to go from 'left' to 'right'. // All edits cost the same, with replace having lower priority than // add/remove. // Simple implementation of the Wagner-Fischer algorithm. // See http://en.wikipedia.org/wiki/Wagner-Fischer_algorithm enum EditType { kMatch, kAdd, kRemove, kReplace }; GTEST_API_ std::vector CalculateOptimalEdits( const std::vector& left, const std::vector& right); // Same as above, but the input is represented as strings. GTEST_API_ std::vector CalculateOptimalEdits( const std::vector& left, const std::vector& right); // Create a diff of the input strings in Unified diff format. GTEST_API_ std::string CreateUnifiedDiff(const std::vector& left, const std::vector& right, size_t context = 2); } // namespace edit_distance // Calculate the diff between 'left' and 'right' and return it in unified diff // format. // If not null, stores in 'total_line_count' the total number of lines found // in left + right. GTEST_API_ std::string DiffStrings(const std::string& left, const std::string& right, size_t* total_line_count); // Constructs and returns the message for an equality assertion // (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. // // The first four parameters are the expressions used in the assertion // and their values, as strings. For example, for ASSERT_EQ(foo, bar) // where foo is 5 and bar is 6, we have: // // expected_expression: "foo" // actual_expression: "bar" // expected_value: "5" // actual_value: "6" // // The ignoring_case parameter is true iff the assertion is a // *_STRCASEEQ*. When it's true, the string " (ignoring case)" will // be inserted into the message. GTEST_API_ AssertionResult EqFailure(const char* expected_expression, const char* actual_expression, const std::string& expected_value, const std::string& actual_value, bool ignoring_case); // Constructs a failure message for Boolean assertions such as EXPECT_TRUE. GTEST_API_ std::string GetBoolAssertionFailureMessage( const AssertionResult& assertion_result, const char* expression_text, const char* actual_predicate_value, const char* expected_predicate_value); // This template class represents an IEEE floating-point number // (either single-precision or double-precision, depending on the // template parameters). // // The purpose of this class is to do more sophisticated number // comparison. (Due to round-off error, etc, it's very unlikely that // two floating-points will be equal exactly. Hence a naive // comparison by the == operation often doesn't work.) // // Format of IEEE floating-point: // // The most-significant bit being the leftmost, an IEEE // floating-point looks like // // sign_bit exponent_bits fraction_bits // // Here, sign_bit is a single bit that designates the sign of the // number. // // For float, there are 8 exponent bits and 23 fraction bits. // // For double, there are 11 exponent bits and 52 fraction bits. // // More details can be found at // http://en.wikipedia.org/wiki/IEEE_floating-point_standard. // // Template parameter: // // RawType: the raw floating-point type (either float or double) template class FloatingPoint { public: // Defines the unsigned integer type that has the same size as the // floating point number. typedef typename TypeWithSize::UInt Bits; // Constants. // # of bits in a number. static const size_t kBitCount = 8*sizeof(RawType); // # of fraction bits in a number. static const size_t kFractionBitCount = std::numeric_limits::digits - 1; // # of exponent bits in a number. static const size_t kExponentBitCount = kBitCount - 1 - kFractionBitCount; // The mask for the sign bit. static const Bits kSignBitMask = static_cast(1) << (kBitCount - 1); // The mask for the fraction bits. static const Bits kFractionBitMask = ~static_cast(0) >> (kExponentBitCount + 1); // The mask for the exponent bits. static const Bits kExponentBitMask = ~(kSignBitMask | kFractionBitMask); // How many ULP's (Units in the Last Place) we want to tolerate when // comparing two numbers. The larger the value, the more error we // allow. A 0 value means that two numbers must be exactly the same // to be considered equal. // // The maximum error of a single floating-point operation is 0.5 // units in the last place. On Intel CPU's, all floating-point // calculations are done with 80-bit precision, while double has 64 // bits. Therefore, 4 should be enough for ordinary use. // // See the following article for more details on ULP: // http://randomascii.wordpress.com/2012/02/25/comparing-floating-point-numbers-2012-edition/ static const size_t kMaxUlps = 4; // Constructs a FloatingPoint from a raw floating-point number. // // On an Intel CPU, passing a non-normalized NAN (Not a Number) // around may change its bits, although the new value is guaranteed // to be also a NAN. Therefore, don't expect this constructor to // preserve the bits in x when x is a NAN. explicit FloatingPoint(const RawType& x) { u_.value_ = x; } // Static methods // Reinterprets a bit pattern as a floating-point number. // // This function is needed to test the AlmostEquals() method. static RawType ReinterpretBits(const Bits bits) { FloatingPoint fp(0); fp.u_.bits_ = bits; return fp.u_.value_; } // Returns the floating-point number that represent positive infinity. static RawType Infinity() { return ReinterpretBits(kExponentBitMask); } // Returns the maximum representable finite floating-point number. static RawType Max(); // Non-static methods // Returns the bits that represents this number. const Bits &bits() const { return u_.bits_; } // Returns the exponent bits of this number. Bits exponent_bits() const { return kExponentBitMask & u_.bits_; } // Returns the fraction bits of this number. Bits fraction_bits() const { return kFractionBitMask & u_.bits_; } // Returns the sign bit of this number. Bits sign_bit() const { return kSignBitMask & u_.bits_; } // Returns true iff this is NAN (not a number). bool is_nan() const { // It's a NAN if the exponent bits are all ones and the fraction // bits are not entirely zeros. return (exponent_bits() == kExponentBitMask) && (fraction_bits() != 0); } // Returns true iff this number is at most kMaxUlps ULP's away from // rhs. In particular, this function: // // - returns false if either number is (or both are) NAN. // - treats really large numbers as almost equal to infinity. // - thinks +0.0 and -0.0 are 0 DLP's apart. bool AlmostEquals(const FloatingPoint& rhs) const { // The IEEE standard says that any comparison operation involving // a NAN must return false. if (is_nan() || rhs.is_nan()) return false; return DistanceBetweenSignAndMagnitudeNumbers(u_.bits_, rhs.u_.bits_) <= kMaxUlps; } private: // The data type used to store the actual floating-point number. union FloatingPointUnion { RawType value_; // The raw floating-point number. Bits bits_; // The bits that represent the number. }; // Converts an integer from the sign-and-magnitude representation to // the biased representation. More precisely, let N be 2 to the // power of (kBitCount - 1), an integer x is represented by the // unsigned number x + N. // // For instance, // // -N + 1 (the most negative number representable using // sign-and-magnitude) is represented by 1; // 0 is represented by N; and // N - 1 (the biggest number representable using // sign-and-magnitude) is represented by 2N - 1. // // Read http://en.wikipedia.org/wiki/Signed_number_representations // for more details on signed number representations. static Bits SignAndMagnitudeToBiased(const Bits &sam) { if (kSignBitMask & sam) { // sam represents a negative number. return ~sam + 1; } else { // sam represents a positive number. return kSignBitMask | sam; } } // Given two numbers in the sign-and-magnitude representation, // returns the distance between them as an unsigned number. static Bits DistanceBetweenSignAndMagnitudeNumbers(const Bits &sam1, const Bits &sam2) { const Bits biased1 = SignAndMagnitudeToBiased(sam1); const Bits biased2 = SignAndMagnitudeToBiased(sam2); return (biased1 >= biased2) ? (biased1 - biased2) : (biased2 - biased1); } FloatingPointUnion u_; }; // We cannot use std::numeric_limits::max() as it clashes with the max() // macro defined by . template <> inline float FloatingPoint::Max() { return FLT_MAX; } template <> inline double FloatingPoint::Max() { return DBL_MAX; } // Typedefs the instances of the FloatingPoint template class that we // care to use. typedef FloatingPoint Float; typedef FloatingPoint Double; // In order to catch the mistake of putting tests that use different // test fixture classes in the same test case, we need to assign // unique IDs to fixture classes and compare them. The TypeId type is // used to hold such IDs. The user should treat TypeId as an opaque // type: the only operation allowed on TypeId values is to compare // them for equality using the == operator. typedef const void* TypeId; template class TypeIdHelper { public: // dummy_ must not have a const type. Otherwise an overly eager // compiler (e.g. MSVC 7.1 & 8.0) may try to merge // TypeIdHelper::dummy_ for different Ts as an "optimization". static bool dummy_; }; template bool TypeIdHelper::dummy_ = false; // GetTypeId() returns the ID of type T. Different values will be // returned for different types. Calling the function twice with the // same type argument is guaranteed to return the same ID. template TypeId GetTypeId() { // The compiler is required to allocate a different // TypeIdHelper::dummy_ variable for each T used to instantiate // the template. Therefore, the address of dummy_ is guaranteed to // be unique. return &(TypeIdHelper::dummy_); } // Returns the type ID of ::testing::Test. Always call this instead // of GetTypeId< ::testing::Test>() to get the type ID of // ::testing::Test, as the latter may give the wrong result due to a // suspected linker bug when compiling Google Test as a Mac OS X // framework. GTEST_API_ TypeId GetTestTypeId(); // Defines the abstract factory interface that creates instances // of a Test object. class TestFactoryBase { public: virtual ~TestFactoryBase() {} // Creates a test instance to run. The instance is both created and destroyed // within TestInfoImpl::Run() virtual Test* CreateTest() = 0; protected: TestFactoryBase() {} private: GTEST_DISALLOW_COPY_AND_ASSIGN_(TestFactoryBase); }; // This class provides implementation of TeastFactoryBase interface. // It is used in TEST and TEST_F macros. template class TestFactoryImpl : public TestFactoryBase { public: virtual Test* CreateTest() { return new TestClass; } }; #if GTEST_OS_WINDOWS // Predicate-formatters for implementing the HRESULT checking macros // {ASSERT|EXPECT}_HRESULT_{SUCCEEDED|FAILED} // We pass a long instead of HRESULT to avoid causing an // include dependency for the HRESULT type. GTEST_API_ AssertionResult IsHRESULTSuccess(const char* expr, long hr); // NOLINT GTEST_API_ AssertionResult IsHRESULTFailure(const char* expr, long hr); // NOLINT #endif // GTEST_OS_WINDOWS // Types of SetUpTestCase() and TearDownTestCase() functions. typedef void (*SetUpTestCaseFunc)(); typedef void (*TearDownTestCaseFunc)(); struct CodeLocation { CodeLocation(const std::string& a_file, int a_line) : file(a_file), line(a_line) {} std::string file; int line; }; // Creates a new TestInfo object and registers it with Google Test; // returns the created object. // // Arguments: // // test_case_name: name of the test case // name: name of the test // type_param the name of the test's type parameter, or NULL if // this is not a typed or a type-parameterized test. // value_param text representation of the test's value parameter, // or NULL if this is not a type-parameterized test. // code_location: code location where the test is defined // fixture_class_id: ID of the test fixture class // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case // factory: pointer to the factory that creates a test object. // The newly created TestInfo instance will assume // ownership of the factory object. GTEST_API_ TestInfo* MakeAndRegisterTestInfo( const char* test_case_name, const char* name, const char* type_param, const char* value_param, CodeLocation code_location, TypeId fixture_class_id, SetUpTestCaseFunc set_up_tc, TearDownTestCaseFunc tear_down_tc, TestFactoryBase* factory); // If *pstr starts with the given prefix, modifies *pstr to be right // past the prefix and returns true; otherwise leaves *pstr unchanged // and returns false. None of pstr, *pstr, and prefix can be NULL. GTEST_API_ bool SkipPrefix(const char* prefix, const char** pstr); #if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) // State of the definition of a type-parameterized test case. class GTEST_API_ TypedTestCasePState { public: TypedTestCasePState() : registered_(false) {} // Adds the given test name to defined_test_names_ and return true // if the test case hasn't been registered; otherwise aborts the // program. bool AddTestName(const char* file, int line, const char* case_name, const char* test_name) { if (registered_) { fprintf(stderr, "%s Test %s must be defined before " "REGISTER_TYPED_TEST_CASE_P(%s, ...).\n", FormatFileLocation(file, line).c_str(), test_name, case_name); fflush(stderr); posix::Abort(); } registered_tests_.insert( ::std::make_pair(test_name, CodeLocation(file, line))); return true; } bool TestExists(const std::string& test_name) const { return registered_tests_.count(test_name) > 0; } const CodeLocation& GetCodeLocation(const std::string& test_name) const { RegisteredTestsMap::const_iterator it = registered_tests_.find(test_name); GTEST_CHECK_(it != registered_tests_.end()); return it->second; } // Verifies that registered_tests match the test names in // defined_test_names_; returns registered_tests if successful, or // aborts the program otherwise. const char* VerifyRegisteredTestNames( const char* file, int line, const char* registered_tests); private: typedef ::std::map RegisteredTestsMap; bool registered_; RegisteredTestsMap registered_tests_; }; GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 // Skips to the first non-space char after the first comma in 'str'; // returns NULL if no comma is found in 'str'. inline const char* SkipComma(const char* str) { const char* comma = strchr(str, ','); if (comma == NULL) { return NULL; } while (IsSpace(*(++comma))) {} return comma; } // Returns the prefix of 'str' before the first comma in it; returns // the entire string if it contains no comma. inline std::string GetPrefixUntilComma(const char* str) { const char* comma = strchr(str, ','); return comma == NULL ? str : std::string(str, comma); } // Splits a given string on a given delimiter, populating a given // vector with the fields. void SplitString(const ::std::string& str, char delimiter, ::std::vector< ::std::string>* dest); // The default argument to the template below for the case when the user does // not provide a name generator. struct DefaultNameGenerator { template static std::string GetName(int i) { return StreamableToString(i); } }; template struct NameGeneratorSelector { typedef Provided type; }; template void GenerateNamesRecursively(Types0, std::vector*, int) {} template void GenerateNamesRecursively(Types, std::vector* result, int i) { result->push_back(NameGenerator::template GetName(i)); GenerateNamesRecursively(typename Types::Tail(), result, i + 1); } template std::vector GenerateNames() { std::vector result; GenerateNamesRecursively(Types(), &result, 0); return result; } // TypeParameterizedTest::Register() // registers a list of type-parameterized tests with Google Test. The // return value is insignificant - we just need to return something // such that we can call this function in a namespace scope. // // Implementation note: The GTEST_TEMPLATE_ macro declares a template // template parameter. It's defined in gtest-type-util.h. template class TypeParameterizedTest { public: // 'index' is the index of the test in the type list 'Types' // specified in INSTANTIATE_TYPED_TEST_CASE_P(Prefix, TestCase, // Types). Valid values for 'index' are [0, N - 1] where N is the // length of Types. static bool Register(const char* prefix, const CodeLocation& code_location, const char* case_name, const char* test_names, int index, const std::vector& type_names = GenerateNames()) { typedef typename Types::Head Type; typedef Fixture FixtureClass; typedef typename GTEST_BIND_(TestSel, Type) TestClass; // First, registers the first type-parameterized test in the type // list. MakeAndRegisterTestInfo( (std::string(prefix) + (prefix[0] == '\0' ? "" : "/") + case_name + "/" + type_names[index]) .c_str(), StripTrailingSpaces(GetPrefixUntilComma(test_names)).c_str(), GetTypeName().c_str(), NULL, // No value parameter. code_location, GetTypeId(), TestClass::SetUpTestCase, TestClass::TearDownTestCase, new TestFactoryImpl); // Next, recurses (at compile time) with the tail of the type list. return TypeParameterizedTest::Register(prefix, code_location, case_name, test_names, index + 1, type_names); } }; // The base case for the compile time recursion. template class TypeParameterizedTest { public: static bool Register(const char* /*prefix*/, const CodeLocation&, const char* /*case_name*/, const char* /*test_names*/, int /*index*/, const std::vector& = std::vector() /*type_names*/) { return true; } }; // TypeParameterizedTestCase::Register() // registers *all combinations* of 'Tests' and 'Types' with Google // Test. The return value is insignificant - we just need to return // something such that we can call this function in a namespace scope. template class TypeParameterizedTestCase { public: static bool Register(const char* prefix, CodeLocation code_location, const TypedTestCasePState* state, const char* case_name, const char* test_names, const std::vector& type_names = GenerateNames()) { std::string test_name = StripTrailingSpaces( GetPrefixUntilComma(test_names)); if (!state->TestExists(test_name)) { fprintf(stderr, "Failed to get code location for test %s.%s at %s.", case_name, test_name.c_str(), FormatFileLocation(code_location.file.c_str(), code_location.line).c_str()); fflush(stderr); posix::Abort(); } const CodeLocation& test_location = state->GetCodeLocation(test_name); typedef typename Tests::Head Head; // First, register the first test in 'Test' for each type in 'Types'. TypeParameterizedTest::Register( prefix, test_location, case_name, test_names, 0, type_names); // Next, recurses (at compile time) with the tail of the test list. return TypeParameterizedTestCase::Register(prefix, code_location, state, case_name, SkipComma(test_names), type_names); } }; // The base case for the compile time recursion. template class TypeParameterizedTestCase { public: static bool Register(const char* /*prefix*/, const CodeLocation&, const TypedTestCasePState* /*state*/, const char* /*case_name*/, const char* /*test_names*/, const std::vector& = std::vector() /*type_names*/) { return true; } }; #endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P // Returns the current OS stack trace as an std::string. // // The maximum number of stack frames to be included is specified by // the gtest_stack_trace_depth flag. The skip_count parameter // specifies the number of top frames to be skipped, which doesn't // count against the number of frames to be included. // // For example, if Foo() calls Bar(), which in turn calls // GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in // the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. GTEST_API_ std::string GetCurrentOsStackTraceExceptTop( UnitTest* unit_test, int skip_count); // Helpers for suppressing warnings on unreachable code or constant // condition. // Always returns true. GTEST_API_ bool AlwaysTrue(); // Always returns false. inline bool AlwaysFalse() { return !AlwaysTrue(); } // Helper for suppressing false warning from Clang on a const char* // variable declared in a conditional expression always being NULL in // the else branch. struct GTEST_API_ ConstCharPtr { ConstCharPtr(const char* str) : value(str) {} operator bool() const { return true; } const char* value; }; // A simple Linear Congruential Generator for generating random // numbers with a uniform distribution. Unlike rand() and srand(), it // doesn't use global state (and therefore can't interfere with user // code). Unlike rand_r(), it's portable. An LCG isn't very random, // but it's good enough for our purposes. class GTEST_API_ Random { public: static const UInt32 kMaxRange = 1u << 31; explicit Random(UInt32 seed) : state_(seed) {} void Reseed(UInt32 seed) { state_ = seed; } // Generates a random number from [0, range). Crashes if 'range' is // 0 or greater than kMaxRange. UInt32 Generate(UInt32 range); private: UInt32 state_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Random); }; // Defining a variable of type CompileAssertTypesEqual will cause a // compiler error iff T1 and T2 are different types. template struct CompileAssertTypesEqual; template struct CompileAssertTypesEqual { }; // Removes the reference from a type if it is a reference type, // otherwise leaves it unchanged. This is the same as // tr1::remove_reference, which is not widely available yet. template struct RemoveReference { typedef T type; }; // NOLINT template struct RemoveReference { typedef T type; }; // NOLINT // A handy wrapper around RemoveReference that works when the argument // T depends on template parameters. #define GTEST_REMOVE_REFERENCE_(T) \ typename ::testing::internal::RemoveReference::type // Removes const from a type if it is a const type, otherwise leaves // it unchanged. This is the same as tr1::remove_const, which is not // widely available yet. template struct RemoveConst { typedef T type; }; // NOLINT template struct RemoveConst { typedef T type; }; // NOLINT // MSVC 8.0, Sun C++, and IBM XL C++ have a bug which causes the above // definition to fail to remove the const in 'const int[3]' and 'const // char[3][4]'. The following specialization works around the bug. template struct RemoveConst { typedef typename RemoveConst::type type[N]; }; #if defined(_MSC_VER) && _MSC_VER < 1400 // This is the only specialization that allows VC++ 7.1 to remove const in // 'const int[3] and 'const int[3][4]'. However, it causes trouble with GCC // and thus needs to be conditionally compiled. template struct RemoveConst { typedef typename RemoveConst::type type[N]; }; #endif // A handy wrapper around RemoveConst that works when the argument // T depends on template parameters. #define GTEST_REMOVE_CONST_(T) \ typename ::testing::internal::RemoveConst::type // Turns const U&, U&, const U, and U all into U. #define GTEST_REMOVE_REFERENCE_AND_CONST_(T) \ GTEST_REMOVE_CONST_(GTEST_REMOVE_REFERENCE_(T)) // ImplicitlyConvertible::value is a compile-time bool // constant that's true iff type From can be implicitly converted to // type To. template class ImplicitlyConvertible { private: // We need the following helper functions only for their types. // They have no implementations. // MakeFrom() is an expression whose type is From. We cannot simply // use From(), as the type From may not have a public default // constructor. static typename AddReference::type MakeFrom(); // These two functions are overloaded. Given an expression // Helper(x), the compiler will pick the first version if x can be // implicitly converted to type To; otherwise it will pick the // second version. // // The first version returns a value of size 1, and the second // version returns a value of size 2. Therefore, by checking the // size of Helper(x), which can be done at compile time, we can tell // which version of Helper() is used, and hence whether x can be // implicitly converted to type To. static char Helper(To); static char (&Helper(...))[2]; // NOLINT // We have to put the 'public' section after the 'private' section, // or MSVC refuses to compile the code. public: #if defined(__BORLANDC__) // C++Builder cannot use member overload resolution during template // instantiation. The simplest workaround is to use its C++0x type traits // functions (C++Builder 2009 and above only). static const bool value = __is_convertible(From, To); #else // MSVC warns about implicitly converting from double to int for // possible loss of data, so we need to temporarily disable the // warning. GTEST_DISABLE_MSC_WARNINGS_PUSH_(4244) static const bool value = sizeof(Helper(ImplicitlyConvertible::MakeFrom())) == 1; GTEST_DISABLE_MSC_WARNINGS_POP_() #endif // __BORLANDC__ }; template const bool ImplicitlyConvertible::value; // IsAProtocolMessage::value is a compile-time bool constant that's // true iff T is type ProtocolMessage, proto2::Message, or a subclass // of those. template struct IsAProtocolMessage : public bool_constant< ImplicitlyConvertible::value || ImplicitlyConvertible::value> { }; // When the compiler sees expression IsContainerTest(0), if C is an // STL-style container class, the first overload of IsContainerTest // will be viable (since both C::iterator* and C::const_iterator* are // valid types and NULL can be implicitly converted to them). It will // be picked over the second overload as 'int' is a perfect match for // the type of argument 0. If C::iterator or C::const_iterator is not // a valid type, the first overload is not viable, and the second // overload will be picked. Therefore, we can determine whether C is // a container class by checking the type of IsContainerTest(0). // The value of the expression is insignificant. // // In C++11 mode we check the existence of a const_iterator and that an // iterator is properly implemented for the container. // // For pre-C++11 that we look for both C::iterator and C::const_iterator. // The reason is that C++ injects the name of a class as a member of the // class itself (e.g. you can refer to class iterator as either // 'iterator' or 'iterator::iterator'). If we look for C::iterator // only, for example, we would mistakenly think that a class named // iterator is an STL container. // // Also note that the simpler approach of overloading // IsContainerTest(typename C::const_iterator*) and // IsContainerTest(...) doesn't work with Visual Age C++ and Sun C++. typedef int IsContainer; #if GTEST_LANG_CXX11 template ().begin()), class = decltype(::std::declval().end()), class = decltype(++::std::declval()), class = decltype(*::std::declval()), class = typename C::const_iterator> IsContainer IsContainerTest(int /* dummy */) { return 0; } #else template IsContainer IsContainerTest(int /* dummy */, typename C::iterator* /* it */ = NULL, typename C::const_iterator* /* const_it */ = NULL) { return 0; } #endif // GTEST_LANG_CXX11 typedef char IsNotContainer; template IsNotContainer IsContainerTest(long /* dummy */) { return '\0'; } // Trait to detect whether a type T is a hash table. // The heuristic used is that the type contains an inner type `hasher` and does // not contain an inner type `reverse_iterator`. // If the container is iterable in reverse, then order might actually matter. template struct IsHashTable { private: template static char test(typename U::hasher*, typename U::reverse_iterator*); template static int test(typename U::hasher*, ...); template static char test(...); public: static const bool value = sizeof(test(0, 0)) == sizeof(int); }; template const bool IsHashTable::value; template struct VoidT { typedef void value_type; }; template struct HasValueType : false_type {}; template struct HasValueType > : true_type { }; template (0)) == sizeof(IsContainer), bool = HasValueType::value> struct IsRecursiveContainerImpl; template struct IsRecursiveContainerImpl : public false_type {}; // Since the IsRecursiveContainerImpl depends on the IsContainerTest we need to // obey the same inconsistencies as the IsContainerTest, namely check if // something is a container is relying on only const_iterator in C++11 and // is relying on both const_iterator and iterator otherwise template struct IsRecursiveContainerImpl : public false_type {}; template struct IsRecursiveContainerImpl { #if GTEST_LANG_CXX11 typedef typename IteratorTraits::value_type value_type; #else typedef typename IteratorTraits::value_type value_type; #endif typedef is_same type; }; // IsRecursiveContainer is a unary compile-time predicate that // evaluates whether C is a recursive container type. A recursive container // type is a container type whose value_type is equal to the container type // itself. An example for a recursive container type is // boost::filesystem::path, whose iterator has a value_type that is equal to // boost::filesystem::path. template struct IsRecursiveContainer : public IsRecursiveContainerImpl::type {}; // EnableIf::type is void when 'Cond' is true, and // undefined when 'Cond' is false. To use SFINAE to make a function // overload only apply when a particular expression is true, add // "typename EnableIf::type* = 0" as the last parameter. template struct EnableIf; template<> struct EnableIf { typedef void type; }; // NOLINT // Utilities for native arrays. // ArrayEq() compares two k-dimensional native arrays using the // elements' operator==, where k can be any integer >= 0. When k is // 0, ArrayEq() degenerates into comparing a single pair of values. template bool ArrayEq(const T* lhs, size_t size, const U* rhs); // This generic version is used when k is 0. template inline bool ArrayEq(const T& lhs, const U& rhs) { return lhs == rhs; } // This overload is used when k >= 1. template inline bool ArrayEq(const T(&lhs)[N], const U(&rhs)[N]) { return internal::ArrayEq(lhs, N, rhs); } // This helper reduces code bloat. If we instead put its logic inside // the previous ArrayEq() function, arrays with different sizes would // lead to different copies of the template code. template bool ArrayEq(const T* lhs, size_t size, const U* rhs) { for (size_t i = 0; i != size; i++) { if (!internal::ArrayEq(lhs[i], rhs[i])) return false; } return true; } // Finds the first element in the iterator range [begin, end) that // equals elem. Element may be a native array type itself. template Iter ArrayAwareFind(Iter begin, Iter end, const Element& elem) { for (Iter it = begin; it != end; ++it) { if (internal::ArrayEq(*it, elem)) return it; } return end; } // CopyArray() copies a k-dimensional native array using the elements' // operator=, where k can be any integer >= 0. When k is 0, // CopyArray() degenerates into copying a single value. template void CopyArray(const T* from, size_t size, U* to); // This generic version is used when k is 0. template inline void CopyArray(const T& from, U* to) { *to = from; } // This overload is used when k >= 1. template inline void CopyArray(const T(&from)[N], U(*to)[N]) { internal::CopyArray(from, N, *to); } // This helper reduces code bloat. If we instead put its logic inside // the previous CopyArray() function, arrays with different sizes // would lead to different copies of the template code. template void CopyArray(const T* from, size_t size, U* to) { for (size_t i = 0; i != size; i++) { internal::CopyArray(from[i], to + i); } } // The relation between an NativeArray object (see below) and the // native array it represents. // We use 2 different structs to allow non-copyable types to be used, as long // as RelationToSourceReference() is passed. struct RelationToSourceReference {}; struct RelationToSourceCopy {}; // Adapts a native array to a read-only STL-style container. Instead // of the complete STL container concept, this adaptor only implements // members useful for Google Mock's container matchers. New members // should be added as needed. To simplify the implementation, we only // support Element being a raw type (i.e. having no top-level const or // reference modifier). It's the client's responsibility to satisfy // this requirement. Element can be an array type itself (hence // multi-dimensional arrays are supported). template class NativeArray { public: // STL-style container typedefs. typedef Element value_type; typedef Element* iterator; typedef const Element* const_iterator; // Constructs from a native array. References the source. NativeArray(const Element* array, size_t count, RelationToSourceReference) { InitRef(array, count); } // Constructs from a native array. Copies the source. NativeArray(const Element* array, size_t count, RelationToSourceCopy) { InitCopy(array, count); } // Copy constructor. NativeArray(const NativeArray& rhs) { (this->*rhs.clone_)(rhs.array_, rhs.size_); } ~NativeArray() { if (clone_ != &NativeArray::InitRef) delete[] array_; } // STL-style container methods. size_t size() const { return size_; } const_iterator begin() const { return array_; } const_iterator end() const { return array_ + size_; } bool operator==(const NativeArray& rhs) const { return size() == rhs.size() && ArrayEq(begin(), size(), rhs.begin()); } private: enum { kCheckTypeIsNotConstOrAReference = StaticAssertTypeEqHelper< Element, GTEST_REMOVE_REFERENCE_AND_CONST_(Element)>::value }; // Initializes this object with a copy of the input. void InitCopy(const Element* array, size_t a_size) { Element* const copy = new Element[a_size]; CopyArray(array, a_size, copy); array_ = copy; size_ = a_size; clone_ = &NativeArray::InitCopy; } // Initializes this object with a reference of the input. void InitRef(const Element* array, size_t a_size) { array_ = array; size_ = a_size; clone_ = &NativeArray::InitRef; } const Element* array_; size_t size_; void (NativeArray::*clone_)(const Element*, size_t); GTEST_DISALLOW_ASSIGN_(NativeArray); }; } // namespace internal } // namespace testing #define GTEST_MESSAGE_AT_(file, line, message, result_type) \ ::testing::internal::AssertHelper(result_type, file, line, message) \ = ::testing::Message() #define GTEST_MESSAGE_(message, result_type) \ GTEST_MESSAGE_AT_(__FILE__, __LINE__, message, result_type) #define GTEST_FATAL_FAILURE_(message) \ return GTEST_MESSAGE_(message, ::testing::TestPartResult::kFatalFailure) #define GTEST_NONFATAL_FAILURE_(message) \ GTEST_MESSAGE_(message, ::testing::TestPartResult::kNonFatalFailure) #define GTEST_SUCCESS_(message) \ GTEST_MESSAGE_(message, ::testing::TestPartResult::kSuccess) // Suppress MSVC warning 4702 (unreachable code) for the code following // statement if it returns or throws (or doesn't return or throw in some // situations). #define GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement) \ if (::testing::internal::AlwaysTrue()) { statement; } #define GTEST_TEST_THROW_(statement, expected_exception, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::ConstCharPtr gtest_msg = "") { \ bool gtest_caught_expected = false; \ try { \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ } \ catch (expected_exception const&) { \ gtest_caught_expected = true; \ } \ catch (...) { \ gtest_msg.value = \ "Expected: " #statement " throws an exception of type " \ #expected_exception ".\n Actual: it throws a different type."; \ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ } \ if (!gtest_caught_expected) { \ gtest_msg.value = \ "Expected: " #statement " throws an exception of type " \ #expected_exception ".\n Actual: it throws nothing."; \ goto GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__); \ } \ } else \ GTEST_CONCAT_TOKEN_(gtest_label_testthrow_, __LINE__): \ fail(gtest_msg.value) #define GTEST_TEST_NO_THROW_(statement, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ try { \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ } \ catch (...) { \ goto GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__); \ } \ } else \ GTEST_CONCAT_TOKEN_(gtest_label_testnothrow_, __LINE__): \ fail("Expected: " #statement " doesn't throw an exception.\n" \ " Actual: it throws.") #define GTEST_TEST_ANY_THROW_(statement, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ bool gtest_caught_any = false; \ try { \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ } \ catch (...) { \ gtest_caught_any = true; \ } \ if (!gtest_caught_any) { \ goto GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__); \ } \ } else \ GTEST_CONCAT_TOKEN_(gtest_label_testanythrow_, __LINE__): \ fail("Expected: " #statement " throws an exception.\n" \ " Actual: it doesn't.") // Implements Boolean test assertions such as EXPECT_TRUE. expression can be // either a boolean expression or an AssertionResult. text is a textual // represenation of expression as it was passed into the EXPECT_TRUE. #define GTEST_TEST_BOOLEAN_(expression, text, actual, expected, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (const ::testing::AssertionResult gtest_ar_ = \ ::testing::AssertionResult(expression)) \ ; \ else \ fail(::testing::internal::GetBoolAssertionFailureMessage(\ gtest_ar_, text, #actual, #expected).c_str()) #define GTEST_TEST_NO_FATAL_FAILURE_(statement, fail) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::AlwaysTrue()) { \ ::testing::internal::HasNewFatalFailureHelper gtest_fatal_failure_checker; \ GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_(statement); \ if (gtest_fatal_failure_checker.has_new_fatal_failure()) { \ goto GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__); \ } \ } else \ GTEST_CONCAT_TOKEN_(gtest_label_testnofatal_, __LINE__): \ fail("Expected: " #statement " doesn't generate new fatal " \ "failures in the current thread.\n" \ " Actual: it does.") // Expands to the name of the class that implements the given test. #define GTEST_TEST_CLASS_NAME_(test_case_name, test_name) \ test_case_name##_##test_name##_Test // Helper macro for defining tests. #define GTEST_TEST_(test_case_name, test_name, parent_class, parent_id)\ class GTEST_TEST_CLASS_NAME_(test_case_name, test_name) : public parent_class {\ public:\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)() {}\ private:\ virtual void TestBody();\ static ::testing::TestInfo* const test_info_ GTEST_ATTRIBUTE_UNUSED_;\ GTEST_DISALLOW_COPY_AND_ASSIGN_(\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name));\ };\ \ ::testing::TestInfo* const GTEST_TEST_CLASS_NAME_(test_case_name, test_name)\ ::test_info_ =\ ::testing::internal::MakeAndRegisterTestInfo(\ #test_case_name, #test_name, NULL, NULL, \ ::testing::internal::CodeLocation(__FILE__, __LINE__), \ (parent_id), \ parent_class::SetUpTestCase, \ parent_class::TearDownTestCase, \ new ::testing::internal::TestFactoryImpl<\ GTEST_TEST_CLASS_NAME_(test_case_name, test_name)>);\ void GTEST_TEST_CLASS_NAME_(test_case_name, test_name)::TestBody() #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_INTERNAL_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-linked_ptr.h000066400000000000000000000203211357355204000303410ustar00rootroot00000000000000// Copyright 2003 Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // A "smart" pointer type with reference tracking. Every pointer to a // particular object is kept on a circular linked list. When the last pointer // to an object is destroyed or reassigned, the object is deleted. // // Used properly, this deletes the object when the last reference goes away. // There are several caveats: // - Like all reference counting schemes, cycles lead to leaks. // - Each smart pointer is actually two pointers (8 bytes instead of 4). // - Every time a pointer is assigned, the entire list of pointers to that // object is traversed. This class is therefore NOT SUITABLE when there // will often be more than two or three pointers to a particular object. // - References are only tracked as long as linked_ptr<> objects are copied. // If a linked_ptr<> is converted to a raw pointer and back, BAD THINGS // will happen (double deletion). // // A good use of this class is storing object references in STL containers. // You can safely put linked_ptr<> in a vector<>. // Other uses may not be as good. // // Note: If you use an incomplete type with linked_ptr<>, the class // *containing* linked_ptr<> must have a constructor and destructor (even // if they do nothing!). // // Bill Gibbons suggested we use something like this. // // Thread Safety: // Unlike other linked_ptr implementations, in this implementation // a linked_ptr object is thread-safe in the sense that: // - it's safe to copy linked_ptr objects concurrently, // - it's safe to copy *from* a linked_ptr and read its underlying // raw pointer (e.g. via get()) concurrently, and // - it's safe to write to two linked_ptrs that point to the same // shared object concurrently. // FIXME: rename this to safe_linked_ptr to avoid // confusion with normal linked_ptr. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ #include #include #include "gtest/internal/gtest-port.h" namespace testing { namespace internal { // Protects copying of all linked_ptr objects. GTEST_API_ GTEST_DECLARE_STATIC_MUTEX_(g_linked_ptr_mutex); // This is used internally by all instances of linked_ptr<>. It needs to be // a non-template class because different types of linked_ptr<> can refer to // the same object (linked_ptr(obj) vs linked_ptr(obj)). // So, it needs to be possible for different types of linked_ptr to participate // in the same circular linked list, so we need a single class type here. // // DO NOT USE THIS CLASS DIRECTLY YOURSELF. Use linked_ptr. class linked_ptr_internal { public: // Create a new circle that includes only this instance. void join_new() { next_ = this; } // Many linked_ptr operations may change p.link_ for some linked_ptr // variable p in the same circle as this object. Therefore we need // to prevent two such operations from occurring concurrently. // // Note that different types of linked_ptr objects can coexist in a // circle (e.g. linked_ptr, linked_ptr, and // linked_ptr). Therefore we must use a single mutex to // protect all linked_ptr objects. This can create serious // contention in production code, but is acceptable in a testing // framework. // Join an existing circle. void join(linked_ptr_internal const* ptr) GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { MutexLock lock(&g_linked_ptr_mutex); linked_ptr_internal const* p = ptr; while (p->next_ != ptr) { assert(p->next_ != this && "Trying to join() a linked ring we are already in. " "Is GMock thread safety enabled?"); p = p->next_; } p->next_ = this; next_ = ptr; } // Leave whatever circle we're part of. Returns true if we were the // last member of the circle. Once this is done, you can join() another. bool depart() GTEST_LOCK_EXCLUDED_(g_linked_ptr_mutex) { MutexLock lock(&g_linked_ptr_mutex); if (next_ == this) return true; linked_ptr_internal const* p = next_; while (p->next_ != this) { assert(p->next_ != next_ && "Trying to depart() a linked ring we are not in. " "Is GMock thread safety enabled?"); p = p->next_; } p->next_ = next_; return false; } private: mutable linked_ptr_internal const* next_; }; template class linked_ptr { public: typedef T element_type; // Take over ownership of a raw pointer. This should happen as soon as // possible after the object is created. explicit linked_ptr(T* ptr = NULL) { capture(ptr); } ~linked_ptr() { depart(); } // Copy an existing linked_ptr<>, adding ourselves to the list of references. template linked_ptr(linked_ptr const& ptr) { copy(&ptr); } linked_ptr(linked_ptr const& ptr) { // NOLINT assert(&ptr != this); copy(&ptr); } // Assignment releases the old value and acquires the new. template linked_ptr& operator=(linked_ptr const& ptr) { depart(); copy(&ptr); return *this; } linked_ptr& operator=(linked_ptr const& ptr) { if (&ptr != this) { depart(); copy(&ptr); } return *this; } // Smart pointer members. void reset(T* ptr = NULL) { depart(); capture(ptr); } T* get() const { return value_; } T* operator->() const { return value_; } T& operator*() const { return *value_; } bool operator==(T* p) const { return value_ == p; } bool operator!=(T* p) const { return value_ != p; } template bool operator==(linked_ptr const& ptr) const { return value_ == ptr.get(); } template bool operator!=(linked_ptr const& ptr) const { return value_ != ptr.get(); } private: template friend class linked_ptr; T* value_; linked_ptr_internal link_; void depart() { if (link_.depart()) delete value_; } void capture(T* ptr) { value_ = ptr; link_.join_new(); } template void copy(linked_ptr const* ptr) { value_ = ptr->get(); if (value_) link_.join(&ptr->link_); else link_.join_new(); } }; template inline bool operator==(T* ptr, const linked_ptr& x) { return ptr == x.get(); } template inline bool operator!=(T* ptr, const linked_ptr& x) { return ptr != x.get(); } // A function to convert T* into linked_ptr // Doing e.g. make_linked_ptr(new FooBarBaz(arg)) is a shorter notation // for linked_ptr >(new FooBarBaz(arg)) template linked_ptr make_linked_ptr(T* ptr) { return linked_ptr(ptr); } } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_LINKED_PTR_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h000066400000000000000000006526021357355204000322320ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gtest-param-util-generated.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Type and function utilities for implementing parameterized tests. // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // Currently Google Test supports at most 50 arguments in Values, // and at most 10 arguments in Combine. Please contact // googletestframework@googlegroups.com if you need more. // Please note that the number of arguments to Combine is limited // by the maximum arity of the implementation of tuple which is // currently set at 10. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-port.h" namespace testing { // Forward declarations of ValuesIn(), which is implemented in // include/gtest/gtest-param-test.h. template internal::ParamGenerator< typename ::testing::internal::IteratorTraits::value_type> ValuesIn(ForwardIterator begin, ForwardIterator end); template internal::ParamGenerator ValuesIn(const T (&array)[N]); template internal::ParamGenerator ValuesIn( const Container& container); namespace internal { // Used in the Values() function to provide polymorphic capabilities. template class ValueArray1 { public: explicit ValueArray1(T1 v1) : v1_(v1) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_)}; return ValuesIn(array); } ValueArray1(const ValueArray1& other) : v1_(other.v1_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray1& other); const T1 v1_; }; template class ValueArray2 { public: ValueArray2(T1 v1, T2 v2) : v1_(v1), v2_(v2) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_)}; return ValuesIn(array); } ValueArray2(const ValueArray2& other) : v1_(other.v1_), v2_(other.v2_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray2& other); const T1 v1_; const T2 v2_; }; template class ValueArray3 { public: ValueArray3(T1 v1, T2 v2, T3 v3) : v1_(v1), v2_(v2), v3_(v3) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_)}; return ValuesIn(array); } ValueArray3(const ValueArray3& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray3& other); const T1 v1_; const T2 v2_; const T3 v3_; }; template class ValueArray4 { public: ValueArray4(T1 v1, T2 v2, T3 v3, T4 v4) : v1_(v1), v2_(v2), v3_(v3), v4_(v4) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_)}; return ValuesIn(array); } ValueArray4(const ValueArray4& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray4& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; }; template class ValueArray5 { public: ValueArray5(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_)}; return ValuesIn(array); } ValueArray5(const ValueArray5& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray5& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; }; template class ValueArray6 { public: ValueArray6(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_)}; return ValuesIn(array); } ValueArray6(const ValueArray6& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray6& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; }; template class ValueArray7 { public: ValueArray7(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_)}; return ValuesIn(array); } ValueArray7(const ValueArray7& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray7& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; }; template class ValueArray8 { public: ValueArray8(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_)}; return ValuesIn(array); } ValueArray8(const ValueArray8& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray8& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; }; template class ValueArray9 { public: ValueArray9(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_)}; return ValuesIn(array); } ValueArray9(const ValueArray9& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray9& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; }; template class ValueArray10 { public: ValueArray10(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_)}; return ValuesIn(array); } ValueArray10(const ValueArray10& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray10& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; }; template class ValueArray11 { public: ValueArray11(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_)}; return ValuesIn(array); } ValueArray11(const ValueArray11& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray11& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; }; template class ValueArray12 { public: ValueArray12(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_)}; return ValuesIn(array); } ValueArray12(const ValueArray12& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray12& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; }; template class ValueArray13 { public: ValueArray13(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_)}; return ValuesIn(array); } ValueArray13(const ValueArray13& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray13& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; }; template class ValueArray14 { public: ValueArray14(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_)}; return ValuesIn(array); } ValueArray14(const ValueArray14& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray14& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; }; template class ValueArray15 { public: ValueArray15(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_)}; return ValuesIn(array); } ValueArray15(const ValueArray15& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray15& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; }; template class ValueArray16 { public: ValueArray16(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_)}; return ValuesIn(array); } ValueArray16(const ValueArray16& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray16& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; }; template class ValueArray17 { public: ValueArray17(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_)}; return ValuesIn(array); } ValueArray17(const ValueArray17& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray17& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; }; template class ValueArray18 { public: ValueArray18(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_)}; return ValuesIn(array); } ValueArray18(const ValueArray18& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray18& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; }; template class ValueArray19 { public: ValueArray19(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_)}; return ValuesIn(array); } ValueArray19(const ValueArray19& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray19& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; }; template class ValueArray20 { public: ValueArray20(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_)}; return ValuesIn(array); } ValueArray20(const ValueArray20& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray20& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; }; template class ValueArray21 { public: ValueArray21(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_)}; return ValuesIn(array); } ValueArray21(const ValueArray21& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray21& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; }; template class ValueArray22 { public: ValueArray22(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_)}; return ValuesIn(array); } ValueArray22(const ValueArray22& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray22& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; }; template class ValueArray23 { public: ValueArray23(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_)}; return ValuesIn(array); } ValueArray23(const ValueArray23& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray23& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; }; template class ValueArray24 { public: ValueArray24(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_)}; return ValuesIn(array); } ValueArray24(const ValueArray24& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray24& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; }; template class ValueArray25 { public: ValueArray25(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_)}; return ValuesIn(array); } ValueArray25(const ValueArray25& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray25& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; }; template class ValueArray26 { public: ValueArray26(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_)}; return ValuesIn(array); } ValueArray26(const ValueArray26& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray26& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; }; template class ValueArray27 { public: ValueArray27(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_)}; return ValuesIn(array); } ValueArray27(const ValueArray27& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray27& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; }; template class ValueArray28 { public: ValueArray28(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_)}; return ValuesIn(array); } ValueArray28(const ValueArray28& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray28& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; }; template class ValueArray29 { public: ValueArray29(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_)}; return ValuesIn(array); } ValueArray29(const ValueArray29& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray29& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; }; template class ValueArray30 { public: ValueArray30(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_)}; return ValuesIn(array); } ValueArray30(const ValueArray30& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray30& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; }; template class ValueArray31 { public: ValueArray31(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_)}; return ValuesIn(array); } ValueArray31(const ValueArray31& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray31& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; }; template class ValueArray32 { public: ValueArray32(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_)}; return ValuesIn(array); } ValueArray32(const ValueArray32& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray32& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; }; template class ValueArray33 { public: ValueArray33(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_)}; return ValuesIn(array); } ValueArray33(const ValueArray33& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray33& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; }; template class ValueArray34 { public: ValueArray34(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_)}; return ValuesIn(array); } ValueArray34(const ValueArray34& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray34& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; }; template class ValueArray35 { public: ValueArray35(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_)}; return ValuesIn(array); } ValueArray35(const ValueArray35& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray35& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; }; template class ValueArray36 { public: ValueArray36(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_)}; return ValuesIn(array); } ValueArray36(const ValueArray36& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray36& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; }; template class ValueArray37 { public: ValueArray37(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_)}; return ValuesIn(array); } ValueArray37(const ValueArray37& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray37& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; }; template class ValueArray38 { public: ValueArray38(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_)}; return ValuesIn(array); } ValueArray38(const ValueArray38& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray38& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; }; template class ValueArray39 { public: ValueArray39(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_)}; return ValuesIn(array); } ValueArray39(const ValueArray39& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray39& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; }; template class ValueArray40 { public: ValueArray40(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_)}; return ValuesIn(array); } ValueArray40(const ValueArray40& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray40& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; }; template class ValueArray41 { public: ValueArray41(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_)}; return ValuesIn(array); } ValueArray41(const ValueArray41& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray41& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; }; template class ValueArray42 { public: ValueArray42(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_)}; return ValuesIn(array); } ValueArray42(const ValueArray42& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray42& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; }; template class ValueArray43 { public: ValueArray43(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_)}; return ValuesIn(array); } ValueArray43(const ValueArray43& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray43& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; }; template class ValueArray44 { public: ValueArray44(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_)}; return ValuesIn(array); } ValueArray44(const ValueArray44& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray44& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; }; template class ValueArray45 { public: ValueArray45(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_)}; return ValuesIn(array); } ValueArray45(const ValueArray45& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray45& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; }; template class ValueArray46 { public: ValueArray46(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_), static_cast(v46_)}; return ValuesIn(array); } ValueArray46(const ValueArray46& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray46& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; const T46 v46_; }; template class ValueArray47 { public: ValueArray47(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), v47_(v47) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_), static_cast(v46_), static_cast(v47_)}; return ValuesIn(array); } ValueArray47(const ValueArray47& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), v47_(other.v47_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray47& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; const T46 v46_; const T47 v47_; }; template class ValueArray48 { public: ValueArray48(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), v47_(v47), v48_(v48) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_), static_cast(v46_), static_cast(v47_), static_cast(v48_)}; return ValuesIn(array); } ValueArray48(const ValueArray48& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), v47_(other.v47_), v48_(other.v48_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray48& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; const T46 v46_; const T47 v47_; const T48 v48_; }; template class ValueArray49 { public: ValueArray49(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_), static_cast(v46_), static_cast(v47_), static_cast(v48_), static_cast(v49_)}; return ValuesIn(array); } ValueArray49(const ValueArray49& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), v47_(other.v47_), v48_(other.v48_), v49_(other.v49_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray49& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; const T46 v46_; const T47 v47_; const T48 v48_; const T49 v49_; }; template class ValueArray50 { public: ValueArray50(T1 v1, T2 v2, T3 v3, T4 v4, T5 v5, T6 v6, T7 v7, T8 v8, T9 v9, T10 v10, T11 v11, T12 v12, T13 v13, T14 v14, T15 v15, T16 v16, T17 v17, T18 v18, T19 v19, T20 v20, T21 v21, T22 v22, T23 v23, T24 v24, T25 v25, T26 v26, T27 v27, T28 v28, T29 v29, T30 v30, T31 v31, T32 v32, T33 v33, T34 v34, T35 v35, T36 v36, T37 v37, T38 v38, T39 v39, T40 v40, T41 v41, T42 v42, T43 v43, T44 v44, T45 v45, T46 v46, T47 v47, T48 v48, T49 v49, T50 v50) : v1_(v1), v2_(v2), v3_(v3), v4_(v4), v5_(v5), v6_(v6), v7_(v7), v8_(v8), v9_(v9), v10_(v10), v11_(v11), v12_(v12), v13_(v13), v14_(v14), v15_(v15), v16_(v16), v17_(v17), v18_(v18), v19_(v19), v20_(v20), v21_(v21), v22_(v22), v23_(v23), v24_(v24), v25_(v25), v26_(v26), v27_(v27), v28_(v28), v29_(v29), v30_(v30), v31_(v31), v32_(v32), v33_(v33), v34_(v34), v35_(v35), v36_(v36), v37_(v37), v38_(v38), v39_(v39), v40_(v40), v41_(v41), v42_(v42), v43_(v43), v44_(v44), v45_(v45), v46_(v46), v47_(v47), v48_(v48), v49_(v49), v50_(v50) {} template operator ParamGenerator() const { const T array[] = {static_cast(v1_), static_cast(v2_), static_cast(v3_), static_cast(v4_), static_cast(v5_), static_cast(v6_), static_cast(v7_), static_cast(v8_), static_cast(v9_), static_cast(v10_), static_cast(v11_), static_cast(v12_), static_cast(v13_), static_cast(v14_), static_cast(v15_), static_cast(v16_), static_cast(v17_), static_cast(v18_), static_cast(v19_), static_cast(v20_), static_cast(v21_), static_cast(v22_), static_cast(v23_), static_cast(v24_), static_cast(v25_), static_cast(v26_), static_cast(v27_), static_cast(v28_), static_cast(v29_), static_cast(v30_), static_cast(v31_), static_cast(v32_), static_cast(v33_), static_cast(v34_), static_cast(v35_), static_cast(v36_), static_cast(v37_), static_cast(v38_), static_cast(v39_), static_cast(v40_), static_cast(v41_), static_cast(v42_), static_cast(v43_), static_cast(v44_), static_cast(v45_), static_cast(v46_), static_cast(v47_), static_cast(v48_), static_cast(v49_), static_cast(v50_)}; return ValuesIn(array); } ValueArray50(const ValueArray50& other) : v1_(other.v1_), v2_(other.v2_), v3_(other.v3_), v4_(other.v4_), v5_(other.v5_), v6_(other.v6_), v7_(other.v7_), v8_(other.v8_), v9_(other.v9_), v10_(other.v10_), v11_(other.v11_), v12_(other.v12_), v13_(other.v13_), v14_(other.v14_), v15_(other.v15_), v16_(other.v16_), v17_(other.v17_), v18_(other.v18_), v19_(other.v19_), v20_(other.v20_), v21_(other.v21_), v22_(other.v22_), v23_(other.v23_), v24_(other.v24_), v25_(other.v25_), v26_(other.v26_), v27_(other.v27_), v28_(other.v28_), v29_(other.v29_), v30_(other.v30_), v31_(other.v31_), v32_(other.v32_), v33_(other.v33_), v34_(other.v34_), v35_(other.v35_), v36_(other.v36_), v37_(other.v37_), v38_(other.v38_), v39_(other.v39_), v40_(other.v40_), v41_(other.v41_), v42_(other.v42_), v43_(other.v43_), v44_(other.v44_), v45_(other.v45_), v46_(other.v46_), v47_(other.v47_), v48_(other.v48_), v49_(other.v49_), v50_(other.v50_) {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray50& other); const T1 v1_; const T2 v2_; const T3 v3_; const T4 v4_; const T5 v5_; const T6 v6_; const T7 v7_; const T8 v8_; const T9 v9_; const T10 v10_; const T11 v11_; const T12 v12_; const T13 v13_; const T14 v14_; const T15 v15_; const T16 v16_; const T17 v17_; const T18 v18_; const T19 v19_; const T20 v20_; const T21 v21_; const T22 v22_; const T23 v23_; const T24 v24_; const T25 v25_; const T26 v26_; const T27 v27_; const T28 v28_; const T29 v29_; const T30 v30_; const T31 v31_; const T32 v32_; const T33 v33_; const T34 v34_; const T35 v35_; const T36 v36_; const T37 v37_; const T38 v38_; const T39 v39_; const T40 v40_; const T41 v41_; const T42 v42_; const T43 v43_; const T44 v44_; const T45 v45_; const T46 v46_; const T47 v47_; const T48 v48_; const T49 v49_; const T50 v50_; }; # if GTEST_HAS_COMBINE // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Generates values from the Cartesian product of values produced // by the argument generators. // template class CartesianProductGenerator2 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator2(const ParamGenerator& g1, const ParamGenerator& g2) : g1_(g1), g2_(g2) {} virtual ~CartesianProductGenerator2() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current2_; if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; linked_ptr current_value_; }; // class CartesianProductGenerator2::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator2& other); const ParamGenerator g1_; const ParamGenerator g2_; }; // class CartesianProductGenerator2 template class CartesianProductGenerator3 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator3(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3) : g1_(g1), g2_(g2), g3_(g3) {} virtual ~CartesianProductGenerator3() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current3_; if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; linked_ptr current_value_; }; // class CartesianProductGenerator3::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator3& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; }; // class CartesianProductGenerator3 template class CartesianProductGenerator4 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator4(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4) : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} virtual ~CartesianProductGenerator4() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current4_; if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; linked_ptr current_value_; }; // class CartesianProductGenerator4::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator4& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; }; // class CartesianProductGenerator4 template class CartesianProductGenerator5 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator5(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} virtual ~CartesianProductGenerator5() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current5_; if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; linked_ptr current_value_; }; // class CartesianProductGenerator5::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator5& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; }; // class CartesianProductGenerator5 template class CartesianProductGenerator6 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator6(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5, const ParamGenerator& g6) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} virtual ~CartesianProductGenerator6() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5, const ParamGenerator& g6, const typename ParamGenerator::iterator& current6) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5), begin6_(g6.begin()), end6_(g6.end()), current6_(current6) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current6_; if (current6_ == end6_) { current6_ = begin6_; ++current5_; } if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_ && current6_ == typed_other->current6_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_), begin6_(other.begin6_), end6_(other.end6_), current6_(other.current6_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_ || current6_ == end6_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; linked_ptr current_value_; }; // class CartesianProductGenerator6::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator6& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; const ParamGenerator g6_; }; // class CartesianProductGenerator6 template class CartesianProductGenerator7 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator7(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5, const ParamGenerator& g6, const ParamGenerator& g7) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} virtual ~CartesianProductGenerator7() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, g7_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5, const ParamGenerator& g6, const typename ParamGenerator::iterator& current6, const ParamGenerator& g7, const typename ParamGenerator::iterator& current7) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5), begin6_(g6.begin()), end6_(g6.end()), current6_(current6), begin7_(g7.begin()), end7_(g7.end()), current7_(current7) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current7_; if (current7_ == end7_) { current7_ = begin7_; ++current6_; } if (current6_ == end6_) { current6_ = begin6_; ++current5_; } if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_ && current6_ == typed_other->current6_ && current7_ == typed_other->current7_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_), begin6_(other.begin6_), end6_(other.end6_), current6_(other.current6_), begin7_(other.begin7_), end7_(other.end7_), current7_(other.current7_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_ || current6_ == end6_ || current7_ == end7_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; const typename ParamGenerator::iterator begin7_; const typename ParamGenerator::iterator end7_; typename ParamGenerator::iterator current7_; linked_ptr current_value_; }; // class CartesianProductGenerator7::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator7& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; const ParamGenerator g6_; const ParamGenerator g7_; }; // class CartesianProductGenerator7 template class CartesianProductGenerator8 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator8(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5, const ParamGenerator& g6, const ParamGenerator& g7, const ParamGenerator& g8) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8) {} virtual ~CartesianProductGenerator8() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, g7_.begin(), g8_, g8_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, g8_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5, const ParamGenerator& g6, const typename ParamGenerator::iterator& current6, const ParamGenerator& g7, const typename ParamGenerator::iterator& current7, const ParamGenerator& g8, const typename ParamGenerator::iterator& current8) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5), begin6_(g6.begin()), end6_(g6.end()), current6_(current6), begin7_(g7.begin()), end7_(g7.end()), current7_(current7), begin8_(g8.begin()), end8_(g8.end()), current8_(current8) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current8_; if (current8_ == end8_) { current8_ = begin8_; ++current7_; } if (current7_ == end7_) { current7_ = begin7_; ++current6_; } if (current6_ == end6_) { current6_ = begin6_; ++current5_; } if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_ && current6_ == typed_other->current6_ && current7_ == typed_other->current7_ && current8_ == typed_other->current8_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_), begin6_(other.begin6_), end6_(other.end6_), current6_(other.current6_), begin7_(other.begin7_), end7_(other.end7_), current7_(other.current7_), begin8_(other.begin8_), end8_(other.end8_), current8_(other.current8_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_, *current8_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_ || current6_ == end6_ || current7_ == end7_ || current8_ == end8_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; const typename ParamGenerator::iterator begin7_; const typename ParamGenerator::iterator end7_; typename ParamGenerator::iterator current7_; const typename ParamGenerator::iterator begin8_; const typename ParamGenerator::iterator end8_; typename ParamGenerator::iterator current8_; linked_ptr current_value_; }; // class CartesianProductGenerator8::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator8& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; const ParamGenerator g6_; const ParamGenerator g7_; const ParamGenerator g8_; }; // class CartesianProductGenerator8 template class CartesianProductGenerator9 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator9(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5, const ParamGenerator& g6, const ParamGenerator& g7, const ParamGenerator& g8, const ParamGenerator& g9) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), g9_(g9) {} virtual ~CartesianProductGenerator9() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, g8_.end(), g9_, g9_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5, const ParamGenerator& g6, const typename ParamGenerator::iterator& current6, const ParamGenerator& g7, const typename ParamGenerator::iterator& current7, const ParamGenerator& g8, const typename ParamGenerator::iterator& current8, const ParamGenerator& g9, const typename ParamGenerator::iterator& current9) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5), begin6_(g6.begin()), end6_(g6.end()), current6_(current6), begin7_(g7.begin()), end7_(g7.end()), current7_(current7), begin8_(g8.begin()), end8_(g8.end()), current8_(current8), begin9_(g9.begin()), end9_(g9.end()), current9_(current9) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current9_; if (current9_ == end9_) { current9_ = begin9_; ++current8_; } if (current8_ == end8_) { current8_ = begin8_; ++current7_; } if (current7_ == end7_) { current7_ = begin7_; ++current6_; } if (current6_ == end6_) { current6_ = begin6_; ++current5_; } if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_ && current6_ == typed_other->current6_ && current7_ == typed_other->current7_ && current8_ == typed_other->current8_ && current9_ == typed_other->current9_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_), begin6_(other.begin6_), end6_(other.end6_), current6_(other.current6_), begin7_(other.begin7_), end7_(other.end7_), current7_(other.current7_), begin8_(other.begin8_), end8_(other.end8_), current8_(other.current8_), begin9_(other.begin9_), end9_(other.end9_), current9_(other.current9_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_, *current8_, *current9_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_ || current6_ == end6_ || current7_ == end7_ || current8_ == end8_ || current9_ == end9_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; const typename ParamGenerator::iterator begin7_; const typename ParamGenerator::iterator end7_; typename ParamGenerator::iterator current7_; const typename ParamGenerator::iterator begin8_; const typename ParamGenerator::iterator end8_; typename ParamGenerator::iterator current8_; const typename ParamGenerator::iterator begin9_; const typename ParamGenerator::iterator end9_; typename ParamGenerator::iterator current9_; linked_ptr current_value_; }; // class CartesianProductGenerator9::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator9& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; const ParamGenerator g6_; const ParamGenerator g7_; const ParamGenerator g8_; const ParamGenerator g9_; }; // class CartesianProductGenerator9 template class CartesianProductGenerator10 : public ParamGeneratorInterface< ::testing::tuple > { public: typedef ::testing::tuple ParamType; CartesianProductGenerator10(const ParamGenerator& g1, const ParamGenerator& g2, const ParamGenerator& g3, const ParamGenerator& g4, const ParamGenerator& g5, const ParamGenerator& g6, const ParamGenerator& g7, const ParamGenerator& g8, const ParamGenerator& g9, const ParamGenerator& g10) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), g9_(g9), g10_(g10) {} virtual ~CartesianProductGenerator10() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, g1_, g1_.begin(), g2_, g2_.begin(), g3_, g3_.begin(), g4_, g4_.begin(), g5_, g5_.begin(), g6_, g6_.begin(), g7_, g7_.begin(), g8_, g8_.begin(), g9_, g9_.begin(), g10_, g10_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, g1_, g1_.end(), g2_, g2_.end(), g3_, g3_.end(), g4_, g4_.end(), g5_, g5_.end(), g6_, g6_.end(), g7_, g7_.end(), g8_, g8_.end(), g9_, g9_.end(), g10_, g10_.end()); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, const ParamGenerator& g1, const typename ParamGenerator::iterator& current1, const ParamGenerator& g2, const typename ParamGenerator::iterator& current2, const ParamGenerator& g3, const typename ParamGenerator::iterator& current3, const ParamGenerator& g4, const typename ParamGenerator::iterator& current4, const ParamGenerator& g5, const typename ParamGenerator::iterator& current5, const ParamGenerator& g6, const typename ParamGenerator::iterator& current6, const ParamGenerator& g7, const typename ParamGenerator::iterator& current7, const ParamGenerator& g8, const typename ParamGenerator::iterator& current8, const ParamGenerator& g9, const typename ParamGenerator::iterator& current9, const ParamGenerator& g10, const typename ParamGenerator::iterator& current10) : base_(base), begin1_(g1.begin()), end1_(g1.end()), current1_(current1), begin2_(g2.begin()), end2_(g2.end()), current2_(current2), begin3_(g3.begin()), end3_(g3.end()), current3_(current3), begin4_(g4.begin()), end4_(g4.end()), current4_(current4), begin5_(g5.begin()), end5_(g5.end()), current5_(current5), begin6_(g6.begin()), end6_(g6.end()), current6_(current6), begin7_(g7.begin()), end7_(g7.end()), current7_(current7), begin8_(g8.begin()), end8_(g8.end()), current8_(current8), begin9_(g9.begin()), end9_(g9.end()), current9_(current9), begin10_(g10.begin()), end10_(g10.end()), current10_(current10) { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current10_; if (current10_ == end10_) { current10_ = begin10_; ++current9_; } if (current9_ == end9_) { current9_ = begin9_; ++current8_; } if (current8_ == end8_) { current8_ = begin8_; ++current7_; } if (current7_ == end7_) { current7_ = begin7_; ++current6_; } if (current6_ == end6_) { current6_ = begin6_; ++current5_; } if (current5_ == end5_) { current5_ = begin5_; ++current4_; } if (current4_ == end4_) { current4_ = begin4_; ++current3_; } if (current3_ == end3_) { current3_ = begin3_; ++current2_; } if (current2_ == end2_) { current2_ = begin2_; ++current1_; } ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ( current1_ == typed_other->current1_ && current2_ == typed_other->current2_ && current3_ == typed_other->current3_ && current4_ == typed_other->current4_ && current5_ == typed_other->current5_ && current6_ == typed_other->current6_ && current7_ == typed_other->current7_ && current8_ == typed_other->current8_ && current9_ == typed_other->current9_ && current10_ == typed_other->current10_); } private: Iterator(const Iterator& other) : base_(other.base_), begin1_(other.begin1_), end1_(other.end1_), current1_(other.current1_), begin2_(other.begin2_), end2_(other.end2_), current2_(other.current2_), begin3_(other.begin3_), end3_(other.end3_), current3_(other.current3_), begin4_(other.begin4_), end4_(other.end4_), current4_(other.current4_), begin5_(other.begin5_), end5_(other.end5_), current5_(other.current5_), begin6_(other.begin6_), end6_(other.end6_), current6_(other.current6_), begin7_(other.begin7_), end7_(other.end7_), current7_(other.current7_), begin8_(other.begin8_), end8_(other.end8_), current8_(other.current8_), begin9_(other.begin9_), end9_(other.end9_), current9_(other.current9_), begin10_(other.begin10_), end10_(other.end10_), current10_(other.current10_) { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType(*current1_, *current2_, *current3_, *current4_, *current5_, *current6_, *current7_, *current8_, *current9_, *current10_)); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return current1_ == end1_ || current2_ == end2_ || current3_ == end3_ || current4_ == end4_ || current5_ == end5_ || current6_ == end6_ || current7_ == end7_ || current8_ == end8_ || current9_ == end9_ || current10_ == end10_; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. const typename ParamGenerator::iterator begin1_; const typename ParamGenerator::iterator end1_; typename ParamGenerator::iterator current1_; const typename ParamGenerator::iterator begin2_; const typename ParamGenerator::iterator end2_; typename ParamGenerator::iterator current2_; const typename ParamGenerator::iterator begin3_; const typename ParamGenerator::iterator end3_; typename ParamGenerator::iterator current3_; const typename ParamGenerator::iterator begin4_; const typename ParamGenerator::iterator end4_; typename ParamGenerator::iterator current4_; const typename ParamGenerator::iterator begin5_; const typename ParamGenerator::iterator end5_; typename ParamGenerator::iterator current5_; const typename ParamGenerator::iterator begin6_; const typename ParamGenerator::iterator end6_; typename ParamGenerator::iterator current6_; const typename ParamGenerator::iterator begin7_; const typename ParamGenerator::iterator end7_; typename ParamGenerator::iterator current7_; const typename ParamGenerator::iterator begin8_; const typename ParamGenerator::iterator end8_; typename ParamGenerator::iterator current8_; const typename ParamGenerator::iterator begin9_; const typename ParamGenerator::iterator end9_; typename ParamGenerator::iterator current9_; const typename ParamGenerator::iterator begin10_; const typename ParamGenerator::iterator end10_; typename ParamGenerator::iterator current10_; linked_ptr current_value_; }; // class CartesianProductGenerator10::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator10& other); const ParamGenerator g1_; const ParamGenerator g2_; const ParamGenerator g3_; const ParamGenerator g4_; const ParamGenerator g5_; const ParamGenerator g6_; const ParamGenerator g7_; const ParamGenerator g8_; const ParamGenerator g9_; const ParamGenerator g10_; }; // class CartesianProductGenerator10 // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Helper classes providing Combine() with polymorphic features. They allow // casting CartesianProductGeneratorN to ParamGenerator if T is // convertible to U. // template class CartesianProductHolder2 { public: CartesianProductHolder2(const Generator1& g1, const Generator2& g2) : g1_(g1), g2_(g2) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator2( static_cast >(g1_), static_cast >(g2_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder2& other); const Generator1 g1_; const Generator2 g2_; }; // class CartesianProductHolder2 template class CartesianProductHolder3 { public: CartesianProductHolder3(const Generator1& g1, const Generator2& g2, const Generator3& g3) : g1_(g1), g2_(g2), g3_(g3) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator3( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder3& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; }; // class CartesianProductHolder3 template class CartesianProductHolder4 { public: CartesianProductHolder4(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4) : g1_(g1), g2_(g2), g3_(g3), g4_(g4) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator4( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder4& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; }; // class CartesianProductHolder4 template class CartesianProductHolder5 { public: CartesianProductHolder5(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator5( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder5& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; }; // class CartesianProductHolder5 template class CartesianProductHolder6 { public: CartesianProductHolder6(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator6( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_), static_cast >(g6_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder6& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; const Generator6 g6_; }; // class CartesianProductHolder6 template class CartesianProductHolder7 { public: CartesianProductHolder7(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator7( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_), static_cast >(g6_), static_cast >(g7_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder7& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; const Generator6 g6_; const Generator7 g7_; }; // class CartesianProductHolder7 template class CartesianProductHolder8 { public: CartesianProductHolder8(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator8( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_), static_cast >(g6_), static_cast >(g7_), static_cast >(g8_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder8& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; const Generator6 g6_; const Generator7 g7_; const Generator8 g8_; }; // class CartesianProductHolder8 template class CartesianProductHolder9 { public: CartesianProductHolder9(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8, const Generator9& g9) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), g9_(g9) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator9( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_), static_cast >(g6_), static_cast >(g7_), static_cast >(g8_), static_cast >(g9_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder9& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; const Generator6 g6_; const Generator7 g7_; const Generator8 g8_; const Generator9 g9_; }; // class CartesianProductHolder9 template class CartesianProductHolder10 { public: CartesianProductHolder10(const Generator1& g1, const Generator2& g2, const Generator3& g3, const Generator4& g4, const Generator5& g5, const Generator6& g6, const Generator7& g7, const Generator8& g8, const Generator9& g9, const Generator10& g10) : g1_(g1), g2_(g2), g3_(g3), g4_(g4), g5_(g5), g6_(g6), g7_(g7), g8_(g8), g9_(g9), g10_(g10) {} template operator ParamGenerator< ::testing::tuple >() const { return ParamGenerator< ::testing::tuple >( new CartesianProductGenerator10( static_cast >(g1_), static_cast >(g2_), static_cast >(g3_), static_cast >(g4_), static_cast >(g5_), static_cast >(g6_), static_cast >(g7_), static_cast >(g8_), static_cast >(g9_), static_cast >(g10_))); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder10& other); const Generator1 g1_; const Generator2 g2_; const Generator3 g3_; const Generator4 g4_; const Generator5 g5_; const Generator6 g6_; const Generator7 g7_; const Generator8 g8_; const Generator9 g9_; const Generator10 g10_; }; // class CartesianProductHolder10 # endif // GTEST_HAS_COMBINE } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-param-util-generated.h.pump000066400000000000000000000214171357355204000332040ustar00rootroot00000000000000$$ -*- mode: c++; -*- $var n = 50 $$ Maximum length of Values arguments we want to support. $var maxtuple = 10 $$ Maximum number of Combine arguments we want to support. // Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Type and function utilities for implementing parameterized tests. // This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // Currently Google Test supports at most $n arguments in Values, // and at most $maxtuple arguments in Combine. Please contact // googletestframework@googlegroups.com if you need more. // Please note that the number of arguments to Combine is limited // by the maximum arity of the implementation of tuple which is // currently set at $maxtuple. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ #include "gtest/internal/gtest-param-util.h" #include "gtest/internal/gtest-port.h" namespace testing { // Forward declarations of ValuesIn(), which is implemented in // include/gtest/gtest-param-test.h. template internal::ParamGenerator< typename ::testing::internal::IteratorTraits::value_type> ValuesIn(ForwardIterator begin, ForwardIterator end); template internal::ParamGenerator ValuesIn(const T (&array)[N]); template internal::ParamGenerator ValuesIn( const Container& container); namespace internal { // Used in the Values() function to provide polymorphic capabilities. $range i 1..n $for i [[ $range j 1..i template <$for j, [[typename T$j]]> class ValueArray$i { public: $if i==1 [[explicit ]]ValueArray$i($for j, [[T$j v$j]]) : $for j, [[v$(j)_(v$j)]] {} template operator ParamGenerator() const { const T array[] = {$for j, [[static_cast(v$(j)_)]]}; return ValuesIn(array); } ValueArray$i(const ValueArray$i& other) : $for j, [[v$(j)_(other.v$(j)_)]] {} private: // No implementation - assignment is unsupported. void operator=(const ValueArray$i& other); $for j [[ const T$j v$(j)_; ]] }; ]] # if GTEST_HAS_COMBINE // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Generates values from the Cartesian product of values produced // by the argument generators. // $range i 2..maxtuple $for i [[ $range j 1..i $range k 2..i template <$for j, [[typename T$j]]> class CartesianProductGenerator$i : public ParamGeneratorInterface< ::testing::tuple<$for j, [[T$j]]> > { public: typedef ::testing::tuple<$for j, [[T$j]]> ParamType; CartesianProductGenerator$i($for j, [[const ParamGenerator& g$j]]) : $for j, [[g$(j)_(g$j)]] {} virtual ~CartesianProductGenerator$i() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, $for j, [[g$(j)_, g$(j)_.begin()]]); } virtual ParamIteratorInterface* End() const { return new Iterator(this, $for j, [[g$(j)_, g$(j)_.end()]]); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, $for j, [[ const ParamGenerator& g$j, const typename ParamGenerator::iterator& current$(j)]]) : base_(base), $for j, [[ begin$(j)_(g$j.begin()), end$(j)_(g$j.end()), current$(j)_(current$j) ]] { ComputeCurrentValue(); } virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } // Advance should not be called on beyond-of-range iterators // so no component iterators must be beyond end of range, either. virtual void Advance() { assert(!AtEnd()); ++current$(i)_; $for k [[ if (current$(i+2-k)_ == end$(i+2-k)_) { current$(i+2-k)_ = begin$(i+2-k)_; ++current$(i+2-k-1)_; } ]] ComputeCurrentValue(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const ParamType* Current() const { return current_value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const Iterator* typed_other = CheckedDowncastToActualType(&other); // We must report iterators equal if they both point beyond their // respective ranges. That can happen in a variety of fashions, // so we have to consult AtEnd(). return (AtEnd() && typed_other->AtEnd()) || ($for j && [[ current$(j)_ == typed_other->current$(j)_ ]]); } private: Iterator(const Iterator& other) : base_(other.base_), $for j, [[ begin$(j)_(other.begin$(j)_), end$(j)_(other.end$(j)_), current$(j)_(other.current$(j)_) ]] { ComputeCurrentValue(); } void ComputeCurrentValue() { if (!AtEnd()) current_value_.reset(new ParamType($for j, [[*current$(j)_]])); } bool AtEnd() const { // We must report iterator past the end of the range when either of the // component iterators has reached the end of its range. return $for j || [[ current$(j)_ == end$(j)_ ]]; } // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; // begin[i]_ and end[i]_ define the i-th range that Iterator traverses. // current[i]_ is the actual traversing iterator. $for j [[ const typename ParamGenerator::iterator begin$(j)_; const typename ParamGenerator::iterator end$(j)_; typename ParamGenerator::iterator current$(j)_; ]] linked_ptr current_value_; }; // class CartesianProductGenerator$i::Iterator // No implementation - assignment is unsupported. void operator=(const CartesianProductGenerator$i& other); $for j [[ const ParamGenerator g$(j)_; ]] }; // class CartesianProductGenerator$i ]] // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Helper classes providing Combine() with polymorphic features. They allow // casting CartesianProductGeneratorN to ParamGenerator if T is // convertible to U. // $range i 2..maxtuple $for i [[ $range j 1..i template <$for j, [[class Generator$j]]> class CartesianProductHolder$i { public: CartesianProductHolder$i($for j, [[const Generator$j& g$j]]) : $for j, [[g$(j)_(g$j)]] {} template <$for j, [[typename T$j]]> operator ParamGenerator< ::testing::tuple<$for j, [[T$j]]> >() const { return ParamGenerator< ::testing::tuple<$for j, [[T$j]]> >( new CartesianProductGenerator$i<$for j, [[T$j]]>( $for j,[[ static_cast >(g$(j)_) ]])); } private: // No implementation - assignment is unsupported. void operator=(const CartesianProductHolder$i& other); $for j [[ const Generator$j g$(j)_; ]] }; // class CartesianProductHolder$i ]] # endif // GTEST_HAS_COMBINE } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_GENERATED_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-param-util.h000066400000000000000000000660161357355204000302740ustar00rootroot00000000000000// Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Type and function utilities for implementing parameterized tests. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ #include #include #include #include #include #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-linked_ptr.h" #include "gtest/internal/gtest-port.h" #include "gtest/gtest-printers.h" namespace testing { // Input to a parameterized test name generator, describing a test parameter. // Consists of the parameter value and the integer parameter index. template struct TestParamInfo { TestParamInfo(const ParamType& a_param, size_t an_index) : param(a_param), index(an_index) {} ParamType param; size_t index; }; // A builtin parameterized test name generator which returns the result of // testing::PrintToString. struct PrintToStringParamName { template std::string operator()(const TestParamInfo& info) const { return PrintToString(info.param); } }; namespace internal { // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Outputs a message explaining invalid registration of different // fixture class for the same test case. This may happen when // TEST_P macro is used to define two tests with the same name // but in different namespaces. GTEST_API_ void ReportInvalidTestCaseType(const char* test_case_name, CodeLocation code_location); template class ParamGeneratorInterface; template class ParamGenerator; // Interface for iterating over elements provided by an implementation // of ParamGeneratorInterface. template class ParamIteratorInterface { public: virtual ~ParamIteratorInterface() {} // A pointer to the base generator instance. // Used only for the purposes of iterator comparison // to make sure that two iterators belong to the same generator. virtual const ParamGeneratorInterface* BaseGenerator() const = 0; // Advances iterator to point to the next element // provided by the generator. The caller is responsible // for not calling Advance() on an iterator equal to // BaseGenerator()->End(). virtual void Advance() = 0; // Clones the iterator object. Used for implementing copy semantics // of ParamIterator. virtual ParamIteratorInterface* Clone() const = 0; // Dereferences the current iterator and provides (read-only) access // to the pointed value. It is the caller's responsibility not to call // Current() on an iterator equal to BaseGenerator()->End(). // Used for implementing ParamGenerator::operator*(). virtual const T* Current() const = 0; // Determines whether the given iterator and other point to the same // element in the sequence generated by the generator. // Used for implementing ParamGenerator::operator==(). virtual bool Equals(const ParamIteratorInterface& other) const = 0; }; // Class iterating over elements provided by an implementation of // ParamGeneratorInterface. It wraps ParamIteratorInterface // and implements the const forward iterator concept. template class ParamIterator { public: typedef T value_type; typedef const T& reference; typedef ptrdiff_t difference_type; // ParamIterator assumes ownership of the impl_ pointer. ParamIterator(const ParamIterator& other) : impl_(other.impl_->Clone()) {} ParamIterator& operator=(const ParamIterator& other) { if (this != &other) impl_.reset(other.impl_->Clone()); return *this; } const T& operator*() const { return *impl_->Current(); } const T* operator->() const { return impl_->Current(); } // Prefix version of operator++. ParamIterator& operator++() { impl_->Advance(); return *this; } // Postfix version of operator++. ParamIterator operator++(int /*unused*/) { ParamIteratorInterface* clone = impl_->Clone(); impl_->Advance(); return ParamIterator(clone); } bool operator==(const ParamIterator& other) const { return impl_.get() == other.impl_.get() || impl_->Equals(*other.impl_); } bool operator!=(const ParamIterator& other) const { return !(*this == other); } private: friend class ParamGenerator; explicit ParamIterator(ParamIteratorInterface* impl) : impl_(impl) {} scoped_ptr > impl_; }; // ParamGeneratorInterface is the binary interface to access generators // defined in other translation units. template class ParamGeneratorInterface { public: typedef T ParamType; virtual ~ParamGeneratorInterface() {} // Generator interface definition virtual ParamIteratorInterface* Begin() const = 0; virtual ParamIteratorInterface* End() const = 0; }; // Wraps ParamGeneratorInterface and provides general generator syntax // compatible with the STL Container concept. // This class implements copy initialization semantics and the contained // ParamGeneratorInterface instance is shared among all copies // of the original object. This is possible because that instance is immutable. template class ParamGenerator { public: typedef ParamIterator iterator; explicit ParamGenerator(ParamGeneratorInterface* impl) : impl_(impl) {} ParamGenerator(const ParamGenerator& other) : impl_(other.impl_) {} ParamGenerator& operator=(const ParamGenerator& other) { impl_ = other.impl_; return *this; } iterator begin() const { return iterator(impl_->Begin()); } iterator end() const { return iterator(impl_->End()); } private: linked_ptr > impl_; }; // Generates values from a range of two comparable values. Can be used to // generate sequences of user-defined types that implement operator+() and // operator<(). // This class is used in the Range() function. template class RangeGenerator : public ParamGeneratorInterface { public: RangeGenerator(T begin, T end, IncrementT step) : begin_(begin), end_(end), step_(step), end_index_(CalculateEndIndex(begin, end, step)) {} virtual ~RangeGenerator() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, begin_, 0, step_); } virtual ParamIteratorInterface* End() const { return new Iterator(this, end_, end_index_, step_); } private: class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, T value, int index, IncrementT step) : base_(base), value_(value), index_(index), step_(step) {} virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } virtual void Advance() { value_ = static_cast(value_ + step_); index_++; } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } virtual const T* Current() const { return &value_; } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; const int other_index = CheckedDowncastToActualType(&other)->index_; return index_ == other_index; } private: Iterator(const Iterator& other) : ParamIteratorInterface(), base_(other.base_), value_(other.value_), index_(other.index_), step_(other.step_) {} // No implementation - assignment is unsupported. void operator=(const Iterator& other); const ParamGeneratorInterface* const base_; T value_; int index_; const IncrementT step_; }; // class RangeGenerator::Iterator static int CalculateEndIndex(const T& begin, const T& end, const IncrementT& step) { int end_index = 0; for (T i = begin; i < end; i = static_cast(i + step)) end_index++; return end_index; } // No implementation - assignment is unsupported. void operator=(const RangeGenerator& other); const T begin_; const T end_; const IncrementT step_; // The index for the end() iterator. All the elements in the generated // sequence are indexed (0-based) to aid iterator comparison. const int end_index_; }; // class RangeGenerator // Generates values from a pair of STL-style iterators. Used in the // ValuesIn() function. The elements are copied from the source range // since the source can be located on the stack, and the generator // is likely to persist beyond that stack frame. template class ValuesInIteratorRangeGenerator : public ParamGeneratorInterface { public: template ValuesInIteratorRangeGenerator(ForwardIterator begin, ForwardIterator end) : container_(begin, end) {} virtual ~ValuesInIteratorRangeGenerator() {} virtual ParamIteratorInterface* Begin() const { return new Iterator(this, container_.begin()); } virtual ParamIteratorInterface* End() const { return new Iterator(this, container_.end()); } private: typedef typename ::std::vector ContainerType; class Iterator : public ParamIteratorInterface { public: Iterator(const ParamGeneratorInterface* base, typename ContainerType::const_iterator iterator) : base_(base), iterator_(iterator) {} virtual ~Iterator() {} virtual const ParamGeneratorInterface* BaseGenerator() const { return base_; } virtual void Advance() { ++iterator_; value_.reset(); } virtual ParamIteratorInterface* Clone() const { return new Iterator(*this); } // We need to use cached value referenced by iterator_ because *iterator_ // can return a temporary object (and of type other then T), so just // having "return &*iterator_;" doesn't work. // value_ is updated here and not in Advance() because Advance() // can advance iterator_ beyond the end of the range, and we cannot // detect that fact. The client code, on the other hand, is // responsible for not calling Current() on an out-of-range iterator. virtual const T* Current() const { if (value_.get() == NULL) value_.reset(new T(*iterator_)); return value_.get(); } virtual bool Equals(const ParamIteratorInterface& other) const { // Having the same base generator guarantees that the other // iterator is of the same type and we can downcast. GTEST_CHECK_(BaseGenerator() == other.BaseGenerator()) << "The program attempted to compare iterators " << "from different generators." << std::endl; return iterator_ == CheckedDowncastToActualType(&other)->iterator_; } private: Iterator(const Iterator& other) // The explicit constructor call suppresses a false warning // emitted by gcc when supplied with the -Wextra option. : ParamIteratorInterface(), base_(other.base_), iterator_(other.iterator_) {} const ParamGeneratorInterface* const base_; typename ContainerType::const_iterator iterator_; // A cached value of *iterator_. We keep it here to allow access by // pointer in the wrapping iterator's operator->(). // value_ needs to be mutable to be accessed in Current(). // Use of scoped_ptr helps manage cached value's lifetime, // which is bound by the lifespan of the iterator itself. mutable scoped_ptr value_; }; // class ValuesInIteratorRangeGenerator::Iterator // No implementation - assignment is unsupported. void operator=(const ValuesInIteratorRangeGenerator& other); const ContainerType container_; }; // class ValuesInIteratorRangeGenerator // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Default parameterized test name generator, returns a string containing the // integer test parameter index. template std::string DefaultParamName(const TestParamInfo& info) { Message name_stream; name_stream << info.index; return name_stream.GetString(); } // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Parameterized test name overload helpers, which help the // INSTANTIATE_TEST_CASE_P macro choose between the default parameterized // test name generator and user param name generator. template ParamNameGenFunctor GetParamNameGen(ParamNameGenFunctor func) { return func; } template struct ParamNameGenFunc { typedef std::string Type(const TestParamInfo&); }; template typename ParamNameGenFunc::Type *GetParamNameGen() { return DefaultParamName; } // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Stores a parameter value and later creates tests parameterized with that // value. template class ParameterizedTestFactory : public TestFactoryBase { public: typedef typename TestClass::ParamType ParamType; explicit ParameterizedTestFactory(ParamType parameter) : parameter_(parameter) {} virtual Test* CreateTest() { TestClass::SetParam(¶meter_); return new TestClass(); } private: const ParamType parameter_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestFactory); }; // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // TestMetaFactoryBase is a base class for meta-factories that create // test factories for passing into MakeAndRegisterTestInfo function. template class TestMetaFactoryBase { public: virtual ~TestMetaFactoryBase() {} virtual TestFactoryBase* CreateTestFactory(ParamType parameter) = 0; }; // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // TestMetaFactory creates test factories for passing into // MakeAndRegisterTestInfo function. Since MakeAndRegisterTestInfo receives // ownership of test factory pointer, same factory object cannot be passed // into that method twice. But ParameterizedTestCaseInfo is going to call // it for each Test/Parameter value combination. Thus it needs meta factory // creator class. template class TestMetaFactory : public TestMetaFactoryBase { public: typedef typename TestCase::ParamType ParamType; TestMetaFactory() {} virtual TestFactoryBase* CreateTestFactory(ParamType parameter) { return new ParameterizedTestFactory(parameter); } private: GTEST_DISALLOW_COPY_AND_ASSIGN_(TestMetaFactory); }; // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // ParameterizedTestCaseInfoBase is a generic interface // to ParameterizedTestCaseInfo classes. ParameterizedTestCaseInfoBase // accumulates test information provided by TEST_P macro invocations // and generators provided by INSTANTIATE_TEST_CASE_P macro invocations // and uses that information to register all resulting test instances // in RegisterTests method. The ParameterizeTestCaseRegistry class holds // a collection of pointers to the ParameterizedTestCaseInfo objects // and calls RegisterTests() on each of them when asked. class ParameterizedTestCaseInfoBase { public: virtual ~ParameterizedTestCaseInfoBase() {} // Base part of test case name for display purposes. virtual const std::string& GetTestCaseName() const = 0; // Test case id to verify identity. virtual TypeId GetTestCaseTypeId() const = 0; // UnitTest class invokes this method to register tests in this // test case right before running them in RUN_ALL_TESTS macro. // This method should not be called more then once on any single // instance of a ParameterizedTestCaseInfoBase derived class. virtual void RegisterTests() = 0; protected: ParameterizedTestCaseInfoBase() {} private: GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfoBase); }; // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // ParameterizedTestCaseInfo accumulates tests obtained from TEST_P // macro invocations for a particular test case and generators // obtained from INSTANTIATE_TEST_CASE_P macro invocations for that // test case. It registers tests with all values generated by all // generators when asked. template class ParameterizedTestCaseInfo : public ParameterizedTestCaseInfoBase { public: // ParamType and GeneratorCreationFunc are private types but are required // for declarations of public methods AddTestPattern() and // AddTestCaseInstantiation(). typedef typename TestCase::ParamType ParamType; // A function that returns an instance of appropriate generator type. typedef ParamGenerator(GeneratorCreationFunc)(); typedef typename ParamNameGenFunc::Type ParamNameGeneratorFunc; explicit ParameterizedTestCaseInfo( const char* name, CodeLocation code_location) : test_case_name_(name), code_location_(code_location) {} // Test case base name for display purposes. virtual const std::string& GetTestCaseName() const { return test_case_name_; } // Test case id to verify identity. virtual TypeId GetTestCaseTypeId() const { return GetTypeId(); } // TEST_P macro uses AddTestPattern() to record information // about a single test in a LocalTestInfo structure. // test_case_name is the base name of the test case (without invocation // prefix). test_base_name is the name of an individual test without // parameter index. For the test SequenceA/FooTest.DoBar/1 FooTest is // test case base name and DoBar is test base name. void AddTestPattern(const char* test_case_name, const char* test_base_name, TestMetaFactoryBase* meta_factory) { tests_.push_back(linked_ptr(new TestInfo(test_case_name, test_base_name, meta_factory))); } // INSTANTIATE_TEST_CASE_P macro uses AddGenerator() to record information // about a generator. int AddTestCaseInstantiation(const std::string& instantiation_name, GeneratorCreationFunc* func, ParamNameGeneratorFunc* name_func, const char* file, int line) { instantiations_.push_back( InstantiationInfo(instantiation_name, func, name_func, file, line)); return 0; // Return value used only to run this method in namespace scope. } // UnitTest class invokes this method to register tests in this test case // test cases right before running tests in RUN_ALL_TESTS macro. // This method should not be called more then once on any single // instance of a ParameterizedTestCaseInfoBase derived class. // UnitTest has a guard to prevent from calling this method more then once. virtual void RegisterTests() { for (typename TestInfoContainer::iterator test_it = tests_.begin(); test_it != tests_.end(); ++test_it) { linked_ptr test_info = *test_it; for (typename InstantiationContainer::iterator gen_it = instantiations_.begin(); gen_it != instantiations_.end(); ++gen_it) { const std::string& instantiation_name = gen_it->name; ParamGenerator generator((*gen_it->generator)()); ParamNameGeneratorFunc* name_func = gen_it->name_func; const char* file = gen_it->file; int line = gen_it->line; std::string test_case_name; if ( !instantiation_name.empty() ) test_case_name = instantiation_name + "/"; test_case_name += test_info->test_case_base_name; size_t i = 0; std::set test_param_names; for (typename ParamGenerator::iterator param_it = generator.begin(); param_it != generator.end(); ++param_it, ++i) { Message test_name_stream; std::string param_name = name_func( TestParamInfo(*param_it, i)); GTEST_CHECK_(IsValidParamName(param_name)) << "Parameterized test name '" << param_name << "' is invalid, in " << file << " line " << line << std::endl; GTEST_CHECK_(test_param_names.count(param_name) == 0) << "Duplicate parameterized test name '" << param_name << "', in " << file << " line " << line << std::endl; test_param_names.insert(param_name); test_name_stream << test_info->test_base_name << "/" << param_name; MakeAndRegisterTestInfo( test_case_name.c_str(), test_name_stream.GetString().c_str(), NULL, // No type parameter. PrintToString(*param_it).c_str(), code_location_, GetTestCaseTypeId(), TestCase::SetUpTestCase, TestCase::TearDownTestCase, test_info->test_meta_factory->CreateTestFactory(*param_it)); } // for param_it } // for gen_it } // for test_it } // RegisterTests private: // LocalTestInfo structure keeps information about a single test registered // with TEST_P macro. struct TestInfo { TestInfo(const char* a_test_case_base_name, const char* a_test_base_name, TestMetaFactoryBase* a_test_meta_factory) : test_case_base_name(a_test_case_base_name), test_base_name(a_test_base_name), test_meta_factory(a_test_meta_factory) {} const std::string test_case_base_name; const std::string test_base_name; const scoped_ptr > test_meta_factory; }; typedef ::std::vector > TestInfoContainer; // Records data received from INSTANTIATE_TEST_CASE_P macros: // struct InstantiationInfo { InstantiationInfo(const std::string &name_in, GeneratorCreationFunc* generator_in, ParamNameGeneratorFunc* name_func_in, const char* file_in, int line_in) : name(name_in), generator(generator_in), name_func(name_func_in), file(file_in), line(line_in) {} std::string name; GeneratorCreationFunc* generator; ParamNameGeneratorFunc* name_func; const char* file; int line; }; typedef ::std::vector InstantiationContainer; static bool IsValidParamName(const std::string& name) { // Check for empty string if (name.empty()) return false; // Check for invalid characters for (std::string::size_type index = 0; index < name.size(); ++index) { if (!isalnum(name[index]) && name[index] != '_') return false; } return true; } const std::string test_case_name_; CodeLocation code_location_; TestInfoContainer tests_; InstantiationContainer instantiations_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseInfo); }; // class ParameterizedTestCaseInfo // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // ParameterizedTestCaseRegistry contains a map of ParameterizedTestCaseInfoBase // classes accessed by test case names. TEST_P and INSTANTIATE_TEST_CASE_P // macros use it to locate their corresponding ParameterizedTestCaseInfo // descriptors. class ParameterizedTestCaseRegistry { public: ParameterizedTestCaseRegistry() {} ~ParameterizedTestCaseRegistry() { for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); it != test_case_infos_.end(); ++it) { delete *it; } } // Looks up or creates and returns a structure containing information about // tests and instantiations of a particular test case. template ParameterizedTestCaseInfo* GetTestCasePatternHolder( const char* test_case_name, CodeLocation code_location) { ParameterizedTestCaseInfo* typed_test_info = NULL; for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); it != test_case_infos_.end(); ++it) { if ((*it)->GetTestCaseName() == test_case_name) { if ((*it)->GetTestCaseTypeId() != GetTypeId()) { // Complain about incorrect usage of Google Test facilities // and terminate the program since we cannot guaranty correct // test case setup and tear-down in this case. ReportInvalidTestCaseType(test_case_name, code_location); posix::Abort(); } else { // At this point we are sure that the object we found is of the same // type we are looking for, so we downcast it to that type // without further checks. typed_test_info = CheckedDowncastToActualType< ParameterizedTestCaseInfo >(*it); } break; } } if (typed_test_info == NULL) { typed_test_info = new ParameterizedTestCaseInfo( test_case_name, code_location); test_case_infos_.push_back(typed_test_info); } return typed_test_info; } void RegisterTests() { for (TestCaseInfoContainer::iterator it = test_case_infos_.begin(); it != test_case_infos_.end(); ++it) { (*it)->RegisterTests(); } } private: typedef ::std::vector TestCaseInfoContainer; TestCaseInfoContainer test_case_infos_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ParameterizedTestCaseRegistry); }; } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PARAM_UTIL_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-port-arch.h000066400000000000000000000072131357355204000301120ustar00rootroot00000000000000// Copyright 2015, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file defines the GTEST_OS_* macro. // It is separate from gtest-port.h so that custom/gtest-port.h can include it. #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_ // Determines the platform on which Google Test is compiled. #ifdef __CYGWIN__ # define GTEST_OS_CYGWIN 1 #elif defined __SYMBIAN32__ # define GTEST_OS_SYMBIAN 1 #elif defined _WIN32 # define GTEST_OS_WINDOWS 1 # ifdef _WIN32_WCE # define GTEST_OS_WINDOWS_MOBILE 1 # elif defined(__MINGW__) || defined(__MINGW32__) # define GTEST_OS_WINDOWS_MINGW 1 # elif defined(WINAPI_FAMILY) # include # if WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) # define GTEST_OS_WINDOWS_DESKTOP 1 # elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_PHONE_APP) # define GTEST_OS_WINDOWS_PHONE 1 # elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_APP) # define GTEST_OS_WINDOWS_RT 1 # elif WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_TV_TITLE) # define GTEST_OS_WINDOWS_PHONE 1 # define GTEST_OS_WINDOWS_TV_TITLE 1 # else // WINAPI_FAMILY defined but no known partition matched. // Default to desktop. # define GTEST_OS_WINDOWS_DESKTOP 1 # endif # else # define GTEST_OS_WINDOWS_DESKTOP 1 # endif // _WIN32_WCE #elif defined __APPLE__ # define GTEST_OS_MAC 1 # if TARGET_OS_IPHONE # define GTEST_OS_IOS 1 # endif #elif defined __FreeBSD__ # define GTEST_OS_FREEBSD 1 #elif defined __Fuchsia__ # define GTEST_OS_FUCHSIA 1 #elif defined __linux__ # define GTEST_OS_LINUX 1 # if defined __ANDROID__ # define GTEST_OS_LINUX_ANDROID 1 # endif #elif defined __MVS__ # define GTEST_OS_ZOS 1 #elif defined(__sun) && defined(__SVR4) # define GTEST_OS_SOLARIS 1 #elif defined(_AIX) # define GTEST_OS_AIX 1 #elif defined(__hpux) # define GTEST_OS_HPUX 1 #elif defined __native_client__ # define GTEST_OS_NACL 1 #elif defined __NetBSD__ # define GTEST_OS_NETBSD 1 #elif defined __OpenBSD__ # define GTEST_OS_OPENBSD 1 #elif defined __QNX__ # define GTEST_OS_QNX 1 #endif // __CYGWIN__ #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_ARCH_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-port.h000066400000000000000000002727531357355204000272140ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Low-level types and utilities for porting Google Test to various // platforms. All macros ending with _ and symbols defined in an // internal namespace are subject to change without notice. Code // outside Google Test MUST NOT USE THEM DIRECTLY. Macros that don't // end with _ are part of Google Test's public API and can be used by // code outside Google Test. // // This file is fundamental to Google Test. All other Google Test source // files are expected to #include this. Therefore, it cannot #include // any other Google Test header. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ // Environment-describing macros // ----------------------------- // // Google Test can be used in many different environments. Macros in // this section tell Google Test what kind of environment it is being // used in, such that Google Test can provide environment-specific // features and implementations. // // Google Test tries to automatically detect the properties of its // environment, so users usually don't need to worry about these // macros. However, the automatic detection is not perfect. // Sometimes it's necessary for a user to define some of the following // macros in the build script to override Google Test's decisions. // // If the user doesn't define a macro in the list, Google Test will // provide a default definition. After this header is #included, all // macros in this list will be defined to either 1 or 0. // // Notes to maintainers: // - Each macro here is a user-tweakable knob; do not grow the list // lightly. // - Use #if to key off these macros. Don't use #ifdef or "#if // defined(...)", which will not work as these macros are ALWAYS // defined. // // GTEST_HAS_CLONE - Define it to 1/0 to indicate that clone(2) // is/isn't available. // GTEST_HAS_EXCEPTIONS - Define it to 1/0 to indicate that exceptions // are enabled. // GTEST_HAS_GLOBAL_STRING - Define it to 1/0 to indicate that ::string // is/isn't available // GTEST_HAS_GLOBAL_WSTRING - Define it to 1/0 to indicate that ::wstring // is/isn't available // GTEST_HAS_POSIX_RE - Define it to 1/0 to indicate that POSIX regular // expressions are/aren't available. // GTEST_HAS_PTHREAD - Define it to 1/0 to indicate that // is/isn't available. // GTEST_HAS_RTTI - Define it to 1/0 to indicate that RTTI is/isn't // enabled. // GTEST_HAS_STD_WSTRING - Define it to 1/0 to indicate that // std::wstring does/doesn't work (Google Test can // be used where std::wstring is unavailable). // GTEST_HAS_TR1_TUPLE - Define it to 1/0 to indicate tr1::tuple // is/isn't available. // GTEST_HAS_SEH - Define it to 1/0 to indicate whether the // compiler supports Microsoft's "Structured // Exception Handling". // GTEST_HAS_STREAM_REDIRECTION // - Define it to 1/0 to indicate whether the // platform supports I/O stream redirection using // dup() and dup2(). // GTEST_USE_OWN_TR1_TUPLE - Define it to 1/0 to indicate whether Google // Test's own tr1 tuple implementation should be // used. Unused when the user sets // GTEST_HAS_TR1_TUPLE to 0. // GTEST_LANG_CXX11 - Define it to 1/0 to indicate that Google Test // is building in C++11/C++98 mode. // GTEST_LINKED_AS_SHARED_LIBRARY // - Define to 1 when compiling tests that use // Google Test as a shared library (known as // DLL on Windows). // GTEST_CREATE_SHARED_LIBRARY // - Define to 1 when compiling Google Test itself // as a shared library. // GTEST_DEFAULT_DEATH_TEST_STYLE // - The default value of --gtest_death_test_style. // The legacy default has been "fast" in the open // source version since 2008. The recommended value // is "threadsafe", and can be set in // custom/gtest-port.h. // Platform-indicating macros // -------------------------- // // Macros indicating the platform on which Google Test is being used // (a macro is defined to 1 if compiled on the given platform; // otherwise UNDEFINED -- it's never defined to 0.). Google Test // defines these macros automatically. Code outside Google Test MUST // NOT define them. // // GTEST_OS_AIX - IBM AIX // GTEST_OS_CYGWIN - Cygwin // GTEST_OS_FREEBSD - FreeBSD // GTEST_OS_FUCHSIA - Fuchsia // GTEST_OS_HPUX - HP-UX // GTEST_OS_LINUX - Linux // GTEST_OS_LINUX_ANDROID - Google Android // GTEST_OS_MAC - Mac OS X // GTEST_OS_IOS - iOS // GTEST_OS_NACL - Google Native Client (NaCl) // GTEST_OS_NETBSD - NetBSD // GTEST_OS_OPENBSD - OpenBSD // GTEST_OS_QNX - QNX // GTEST_OS_SOLARIS - Sun Solaris // GTEST_OS_SYMBIAN - Symbian // GTEST_OS_WINDOWS - Windows (Desktop, MinGW, or Mobile) // GTEST_OS_WINDOWS_DESKTOP - Windows Desktop // GTEST_OS_WINDOWS_MINGW - MinGW // GTEST_OS_WINDOWS_MOBILE - Windows Mobile // GTEST_OS_WINDOWS_PHONE - Windows Phone // GTEST_OS_WINDOWS_RT - Windows Store App/WinRT // GTEST_OS_ZOS - z/OS // // Among the platforms, Cygwin, Linux, Max OS X, and Windows have the // most stable support. Since core members of the Google Test project // don't have access to other platforms, support for them may be less // stable. If you notice any problems on your platform, please notify // googletestframework@googlegroups.com (patches for fixing them are // even more welcome!). // // It is possible that none of the GTEST_OS_* macros are defined. // Feature-indicating macros // ------------------------- // // Macros indicating which Google Test features are available (a macro // is defined to 1 if the corresponding feature is supported; // otherwise UNDEFINED -- it's never defined to 0.). Google Test // defines these macros automatically. Code outside Google Test MUST // NOT define them. // // These macros are public so that portable tests can be written. // Such tests typically surround code using a feature with an #if // which controls that code. For example: // // #if GTEST_HAS_DEATH_TEST // EXPECT_DEATH(DoSomethingDeadly()); // #endif // // GTEST_HAS_COMBINE - the Combine() function (for value-parameterized // tests) // GTEST_HAS_DEATH_TEST - death tests // GTEST_HAS_TYPED_TEST - typed tests // GTEST_HAS_TYPED_TEST_P - type-parameterized tests // GTEST_IS_THREADSAFE - Google Test is thread-safe. // GOOGLETEST_CM0007 DO NOT DELETE // GTEST_USES_POSIX_RE - enhanced POSIX regex is used. Do not confuse with // GTEST_HAS_POSIX_RE (see above) which users can // define themselves. // GTEST_USES_SIMPLE_RE - our own simple regex is used; // the above RE\b(s) are mutually exclusive. // GTEST_CAN_COMPARE_NULL - accepts untyped NULL in EXPECT_EQ(). // Misc public macros // ------------------ // // GTEST_FLAG(flag_name) - references the variable corresponding to // the given Google Test flag. // Internal utilities // ------------------ // // The following macros and utilities are for Google Test's INTERNAL // use only. Code outside Google Test MUST NOT USE THEM DIRECTLY. // // Macros for basic C++ coding: // GTEST_AMBIGUOUS_ELSE_BLOCKER_ - for disabling a gcc warning. // GTEST_ATTRIBUTE_UNUSED_ - declares that a class' instances or a // variable don't have to be used. // GTEST_DISALLOW_ASSIGN_ - disables operator=. // GTEST_DISALLOW_COPY_AND_ASSIGN_ - disables copy ctor and operator=. // GTEST_MUST_USE_RESULT_ - declares that a function's result must be used. // GTEST_INTENTIONAL_CONST_COND_PUSH_ - start code section where MSVC C4127 is // suppressed (constant conditional). // GTEST_INTENTIONAL_CONST_COND_POP_ - finish code section where MSVC C4127 // is suppressed. // // C++11 feature wrappers: // // testing::internal::forward - portability wrapper for std::forward. // testing::internal::move - portability wrapper for std::move. // // Synchronization: // Mutex, MutexLock, ThreadLocal, GetThreadCount() // - synchronization primitives. // // Template meta programming: // is_pointer - as in TR1; needed on Symbian and IBM XL C/C++ only. // IteratorTraits - partial implementation of std::iterator_traits, which // is not available in libCstd when compiled with Sun C++. // // Smart pointers: // scoped_ptr - as in TR2. // // Regular expressions: // RE - a simple regular expression class using the POSIX // Extended Regular Expression syntax on UNIX-like platforms // GOOGLETEST_CM0008 DO NOT DELETE // or a reduced regular exception syntax on other // platforms, including Windows. // Logging: // GTEST_LOG_() - logs messages at the specified severity level. // LogToStderr() - directs all log messages to stderr. // FlushInfoLog() - flushes informational log messages. // // Stdout and stderr capturing: // CaptureStdout() - starts capturing stdout. // GetCapturedStdout() - stops capturing stdout and returns the captured // string. // CaptureStderr() - starts capturing stderr. // GetCapturedStderr() - stops capturing stderr and returns the captured // string. // // Integer types: // TypeWithSize - maps an integer to a int type. // Int32, UInt32, Int64, UInt64, TimeInMillis // - integers of known sizes. // BiggestInt - the biggest signed integer type. // // Command-line utilities: // GTEST_DECLARE_*() - declares a flag. // GTEST_DEFINE_*() - defines a flag. // GetInjectableArgvs() - returns the command line as a vector of strings. // // Environment variable utilities: // GetEnv() - gets the value of an environment variable. // BoolFromGTestEnv() - parses a bool environment variable. // Int32FromGTestEnv() - parses an Int32 environment variable. // StringFromGTestEnv() - parses a string environment variable. #include // for isspace, etc #include // for ptrdiff_t #include #include #include #ifndef _WIN32_WCE # include # include #endif // !_WIN32_WCE #if defined __APPLE__ # include # include #endif // Brings in the definition of HAS_GLOBAL_STRING. This must be done // BEFORE we test HAS_GLOBAL_STRING. #include // NOLINT #include // NOLINT #include // NOLINT #include // NOLINT #include #include // NOLINT #include "gtest/internal/gtest-port-arch.h" #include "gtest/internal/custom/gtest-port.h" #if !defined(GTEST_DEV_EMAIL_) # define GTEST_DEV_EMAIL_ "googletestframework@@googlegroups.com" # define GTEST_FLAG_PREFIX_ "gtest_" # define GTEST_FLAG_PREFIX_DASH_ "gtest-" # define GTEST_FLAG_PREFIX_UPPER_ "GTEST_" # define GTEST_NAME_ "Google Test" # define GTEST_PROJECT_URL_ "https://github.com/google/googletest/" #endif // !defined(GTEST_DEV_EMAIL_) #if !defined(GTEST_INIT_GOOGLE_TEST_NAME_) # define GTEST_INIT_GOOGLE_TEST_NAME_ "testing::InitGoogleTest" #endif // !defined(GTEST_INIT_GOOGLE_TEST_NAME_) // Determines the version of gcc that is used to compile this. #ifdef __GNUC__ // 40302 means version 4.3.2. # define GTEST_GCC_VER_ \ (__GNUC__*10000 + __GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) #endif // __GNUC__ // Macros for disabling Microsoft Visual C++ warnings. // // GTEST_DISABLE_MSC_WARNINGS_PUSH_(4800 4385) // /* code that triggers warnings C4800 and C4385 */ // GTEST_DISABLE_MSC_WARNINGS_POP_() #if _MSC_VER >= 1400 # define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) \ __pragma(warning(push)) \ __pragma(warning(disable: warnings)) # define GTEST_DISABLE_MSC_WARNINGS_POP_() \ __pragma(warning(pop)) #else // Older versions of MSVC don't have __pragma. # define GTEST_DISABLE_MSC_WARNINGS_PUSH_(warnings) # define GTEST_DISABLE_MSC_WARNINGS_POP_() #endif // Clang on Windows does not understand MSVC's pragma warning. // We need clang-specific way to disable function deprecation warning. #ifdef __clang__ # define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \ _Pragma("clang diagnostic push") \ _Pragma("clang diagnostic ignored \"-Wdeprecated-declarations\"") \ _Pragma("clang diagnostic ignored \"-Wdeprecated-implementations\"") #define GTEST_DISABLE_MSC_DEPRECATED_POP_() \ _Pragma("clang diagnostic pop") #else # define GTEST_DISABLE_MSC_DEPRECATED_PUSH_() \ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4996) # define GTEST_DISABLE_MSC_DEPRECATED_POP_() \ GTEST_DISABLE_MSC_WARNINGS_POP_() #endif #ifndef GTEST_LANG_CXX11 // gcc and clang define __GXX_EXPERIMENTAL_CXX0X__ when // -std={c,gnu}++{0x,11} is passed. The C++11 standard specifies a // value for __cplusplus, and recent versions of clang, gcc, and // probably other compilers set that too in C++11 mode. # if __GXX_EXPERIMENTAL_CXX0X__ || __cplusplus >= 201103L || _MSC_VER >= 1900 // Compiling in at least C++11 mode. # define GTEST_LANG_CXX11 1 # else # define GTEST_LANG_CXX11 0 # endif #endif // Distinct from C++11 language support, some environments don't provide // proper C++11 library support. Notably, it's possible to build in // C++11 mode when targeting Mac OS X 10.6, which has an old libstdc++ // with no C++11 support. // // libstdc++ has sufficient C++11 support as of GCC 4.6.0, __GLIBCXX__ // 20110325, but maintenance releases in the 4.4 and 4.5 series followed // this date, so check for those versions by their date stamps. // https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html#abi.versioning #if GTEST_LANG_CXX11 && \ (!defined(__GLIBCXX__) || ( \ __GLIBCXX__ >= 20110325ul && /* GCC >= 4.6.0 */ \ /* Blacklist of patch releases of older branches: */ \ __GLIBCXX__ != 20110416ul && /* GCC 4.4.6 */ \ __GLIBCXX__ != 20120313ul && /* GCC 4.4.7 */ \ __GLIBCXX__ != 20110428ul && /* GCC 4.5.3 */ \ __GLIBCXX__ != 20120702ul)) /* GCC 4.5.4 */ # define GTEST_STDLIB_CXX11 1 #endif // Only use C++11 library features if the library provides them. #if GTEST_STDLIB_CXX11 # define GTEST_HAS_STD_BEGIN_AND_END_ 1 # define GTEST_HAS_STD_FORWARD_LIST_ 1 # if !defined(_MSC_VER) || (_MSC_FULL_VER >= 190023824) // works only with VS2015U2 and better # define GTEST_HAS_STD_FUNCTION_ 1 # endif # define GTEST_HAS_STD_INITIALIZER_LIST_ 1 # define GTEST_HAS_STD_MOVE_ 1 # define GTEST_HAS_STD_UNIQUE_PTR_ 1 # define GTEST_HAS_STD_SHARED_PTR_ 1 # define GTEST_HAS_UNORDERED_MAP_ 1 # define GTEST_HAS_UNORDERED_SET_ 1 #endif // C++11 specifies that provides std::tuple. // Some platforms still might not have it, however. #if GTEST_LANG_CXX11 # define GTEST_HAS_STD_TUPLE_ 1 # if defined(__clang__) // Inspired by // https://clang.llvm.org/docs/LanguageExtensions.html#include-file-checking-macros # if defined(__has_include) && !__has_include() # undef GTEST_HAS_STD_TUPLE_ # endif # elif defined(_MSC_VER) // Inspired by boost/config/stdlib/dinkumware.hpp # if defined(_CPPLIB_VER) && _CPPLIB_VER < 520 # undef GTEST_HAS_STD_TUPLE_ # endif # elif defined(__GLIBCXX__) // Inspired by boost/config/stdlib/libstdcpp3.hpp, // http://gcc.gnu.org/gcc-4.2/changes.html and // https://web.archive.org/web/20140227044429/gcc.gnu.org/onlinedocs/libstdc++/manual/bk01pt01ch01.html#manual.intro.status.standard.200x # if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 2) # undef GTEST_HAS_STD_TUPLE_ # endif # endif #endif // Brings in definitions for functions used in the testing::internal::posix // namespace (read, write, close, chdir, isatty, stat). We do not currently // use them on Windows Mobile. #if GTEST_OS_WINDOWS # if !GTEST_OS_WINDOWS_MOBILE # include # include # endif // In order to avoid having to include , use forward declaration #if GTEST_OS_WINDOWS_MINGW && !defined(__MINGW64_VERSION_MAJOR) // MinGW defined _CRITICAL_SECTION and _RTL_CRITICAL_SECTION as two // separate (equivalent) structs, instead of using typedef typedef struct _CRITICAL_SECTION GTEST_CRITICAL_SECTION; #else // Assume CRITICAL_SECTION is a typedef of _RTL_CRITICAL_SECTION. // This assumption is verified by // WindowsTypesTest.CRITICAL_SECTIONIs_RTL_CRITICAL_SECTION. typedef struct _RTL_CRITICAL_SECTION GTEST_CRITICAL_SECTION; #endif #else // This assumes that non-Windows OSes provide unistd.h. For OSes where this // is not the case, we need to include headers that provide the functions // mentioned above. # include # include #endif // GTEST_OS_WINDOWS #if GTEST_OS_LINUX_ANDROID // Used to define __ANDROID_API__ matching the target NDK API level. # include // NOLINT #endif // Defines this to true iff Google Test can use POSIX regular expressions. #ifndef GTEST_HAS_POSIX_RE # if GTEST_OS_LINUX_ANDROID // On Android, is only available starting with Gingerbread. # define GTEST_HAS_POSIX_RE (__ANDROID_API__ >= 9) # else # define GTEST_HAS_POSIX_RE (!GTEST_OS_WINDOWS) # endif #endif #if GTEST_USES_PCRE // The appropriate headers have already been included. #elif GTEST_HAS_POSIX_RE // On some platforms, needs someone to define size_t, and // won't compile otherwise. We can #include it here as we already // included , which is guaranteed to define size_t through // . # include // NOLINT # define GTEST_USES_POSIX_RE 1 #elif GTEST_OS_WINDOWS // is not available on Windows. Use our own simple regex // implementation instead. # define GTEST_USES_SIMPLE_RE 1 #else // may not be available on this platform. Use our own // simple regex implementation instead. # define GTEST_USES_SIMPLE_RE 1 #endif // GTEST_USES_PCRE #ifndef GTEST_HAS_EXCEPTIONS // The user didn't tell us whether exceptions are enabled, so we need // to figure it out. # if defined(_MSC_VER) && defined(_CPPUNWIND) // MSVC defines _CPPUNWIND to 1 iff exceptions are enabled. # define GTEST_HAS_EXCEPTIONS 1 # elif defined(__BORLANDC__) // C++Builder's implementation of the STL uses the _HAS_EXCEPTIONS // macro to enable exceptions, so we'll do the same. // Assumes that exceptions are enabled by default. # ifndef _HAS_EXCEPTIONS # define _HAS_EXCEPTIONS 1 # endif // _HAS_EXCEPTIONS # define GTEST_HAS_EXCEPTIONS _HAS_EXCEPTIONS # elif defined(__clang__) // clang defines __EXCEPTIONS iff exceptions are enabled before clang 220714, // but iff cleanups are enabled after that. In Obj-C++ files, there can be // cleanups for ObjC exceptions which also need cleanups, even if C++ exceptions // are disabled. clang has __has_feature(cxx_exceptions) which checks for C++ // exceptions starting at clang r206352, but which checked for cleanups prior to // that. To reliably check for C++ exception availability with clang, check for // __EXCEPTIONS && __has_feature(cxx_exceptions). # define GTEST_HAS_EXCEPTIONS (__EXCEPTIONS && __has_feature(cxx_exceptions)) # elif defined(__GNUC__) && __EXCEPTIONS // gcc defines __EXCEPTIONS to 1 iff exceptions are enabled. # define GTEST_HAS_EXCEPTIONS 1 # elif defined(__SUNPRO_CC) // Sun Pro CC supports exceptions. However, there is no compile-time way of // detecting whether they are enabled or not. Therefore, we assume that // they are enabled unless the user tells us otherwise. # define GTEST_HAS_EXCEPTIONS 1 # elif defined(__IBMCPP__) && __EXCEPTIONS // xlC defines __EXCEPTIONS to 1 iff exceptions are enabled. # define GTEST_HAS_EXCEPTIONS 1 # elif defined(__HP_aCC) // Exception handling is in effect by default in HP aCC compiler. It has to // be turned of by +noeh compiler option if desired. # define GTEST_HAS_EXCEPTIONS 1 # else // For other compilers, we assume exceptions are disabled to be // conservative. # define GTEST_HAS_EXCEPTIONS 0 # endif // defined(_MSC_VER) || defined(__BORLANDC__) #endif // GTEST_HAS_EXCEPTIONS #if !defined(GTEST_HAS_STD_STRING) // Even though we don't use this macro any longer, we keep it in case // some clients still depend on it. # define GTEST_HAS_STD_STRING 1 #elif !GTEST_HAS_STD_STRING // The user told us that ::std::string isn't available. # error "::std::string isn't available." #endif // !defined(GTEST_HAS_STD_STRING) #ifndef GTEST_HAS_GLOBAL_STRING # define GTEST_HAS_GLOBAL_STRING 0 #endif // GTEST_HAS_GLOBAL_STRING #ifndef GTEST_HAS_STD_WSTRING // The user didn't tell us whether ::std::wstring is available, so we need // to figure it out. // FIXME: uses autoconf to detect whether ::std::wstring // is available. // Cygwin 1.7 and below doesn't support ::std::wstring. // Solaris' libc++ doesn't support it either. Android has // no support for it at least as recent as Froyo (2.2). # define GTEST_HAS_STD_WSTRING \ (!(GTEST_OS_LINUX_ANDROID || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS)) #endif // GTEST_HAS_STD_WSTRING #ifndef GTEST_HAS_GLOBAL_WSTRING // The user didn't tell us whether ::wstring is available, so we need // to figure it out. # define GTEST_HAS_GLOBAL_WSTRING \ (GTEST_HAS_STD_WSTRING && GTEST_HAS_GLOBAL_STRING) #endif // GTEST_HAS_GLOBAL_WSTRING // Determines whether RTTI is available. #ifndef GTEST_HAS_RTTI // The user didn't tell us whether RTTI is enabled, so we need to // figure it out. # ifdef _MSC_VER # ifdef _CPPRTTI // MSVC defines this macro iff RTTI is enabled. # define GTEST_HAS_RTTI 1 # else # define GTEST_HAS_RTTI 0 # endif // Starting with version 4.3.2, gcc defines __GXX_RTTI iff RTTI is enabled. # elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40302) # ifdef __GXX_RTTI // When building against STLport with the Android NDK and with // -frtti -fno-exceptions, the build fails at link time with undefined // references to __cxa_bad_typeid. Note sure if STL or toolchain bug, // so disable RTTI when detected. # if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) && \ !defined(__EXCEPTIONS) # define GTEST_HAS_RTTI 0 # else # define GTEST_HAS_RTTI 1 # endif // GTEST_OS_LINUX_ANDROID && __STLPORT_MAJOR && !__EXCEPTIONS # else # define GTEST_HAS_RTTI 0 # endif // __GXX_RTTI // Clang defines __GXX_RTTI starting with version 3.0, but its manual recommends // using has_feature instead. has_feature(cxx_rtti) is supported since 2.7, the // first version with C++ support. # elif defined(__clang__) # define GTEST_HAS_RTTI __has_feature(cxx_rtti) // Starting with version 9.0 IBM Visual Age defines __RTTI_ALL__ to 1 if // both the typeid and dynamic_cast features are present. # elif defined(__IBMCPP__) && (__IBMCPP__ >= 900) # ifdef __RTTI_ALL__ # define GTEST_HAS_RTTI 1 # else # define GTEST_HAS_RTTI 0 # endif # else // For all other compilers, we assume RTTI is enabled. # define GTEST_HAS_RTTI 1 # endif // _MSC_VER #endif // GTEST_HAS_RTTI // It's this header's responsibility to #include when RTTI // is enabled. #if GTEST_HAS_RTTI # include #endif // Determines whether Google Test can use the pthreads library. #ifndef GTEST_HAS_PTHREAD // The user didn't tell us explicitly, so we make reasonable assumptions about // which platforms have pthreads support. // // To disable threading support in Google Test, add -DGTEST_HAS_PTHREAD=0 // to your compiler flags. #define GTEST_HAS_PTHREAD \ (GTEST_OS_LINUX || GTEST_OS_MAC || GTEST_OS_HPUX || GTEST_OS_QNX || \ GTEST_OS_FREEBSD || GTEST_OS_NACL || GTEST_OS_NETBSD || GTEST_OS_FUCHSIA) #endif // GTEST_HAS_PTHREAD #if GTEST_HAS_PTHREAD // gtest-port.h guarantees to #include when GTEST_HAS_PTHREAD is // true. # include // NOLINT // For timespec and nanosleep, used below. # include // NOLINT #endif // Determines if hash_map/hash_set are available. // Only used for testing against those containers. #if !defined(GTEST_HAS_HASH_MAP_) # if defined(_MSC_VER) && (_MSC_VER < 1900) # define GTEST_HAS_HASH_MAP_ 1 // Indicates that hash_map is available. # define GTEST_HAS_HASH_SET_ 1 // Indicates that hash_set is available. # endif // _MSC_VER #endif // !defined(GTEST_HAS_HASH_MAP_) // Determines whether Google Test can use tr1/tuple. You can define // this macro to 0 to prevent Google Test from using tuple (any // feature depending on tuple with be disabled in this mode). #ifndef GTEST_HAS_TR1_TUPLE # if GTEST_OS_LINUX_ANDROID && defined(_STLPORT_MAJOR) // STLport, provided with the Android NDK, has neither or . # define GTEST_HAS_TR1_TUPLE 0 # elif defined(_MSC_VER) && (_MSC_VER >= 1910) // Prevent `warning C4996: 'std::tr1': warning STL4002: // The non-Standard std::tr1 namespace and TR1-only machinery // are deprecated and will be REMOVED.` # define GTEST_HAS_TR1_TUPLE 0 # elif GTEST_LANG_CXX11 && defined(_LIBCPP_VERSION) // libc++ doesn't support TR1. # define GTEST_HAS_TR1_TUPLE 0 # else // The user didn't tell us not to do it, so we assume it's OK. # define GTEST_HAS_TR1_TUPLE 1 # endif #endif // GTEST_HAS_TR1_TUPLE // Determines whether Google Test's own tr1 tuple implementation // should be used. #ifndef GTEST_USE_OWN_TR1_TUPLE // We use our own tuple implementation on Symbian. # if GTEST_OS_SYMBIAN # define GTEST_USE_OWN_TR1_TUPLE 1 # else // The user didn't tell us, so we need to figure it out. // We use our own TR1 tuple if we aren't sure the user has an // implementation of it already. At this time, libstdc++ 4.0.0+ and // MSVC 2010 are the only mainstream standard libraries that come // with a TR1 tuple implementation. NVIDIA's CUDA NVCC compiler // pretends to be GCC by defining __GNUC__ and friends, but cannot // compile GCC's tuple implementation. MSVC 2008 (9.0) provides TR1 // tuple in a 323 MB Feature Pack download, which we cannot assume the // user has. QNX's QCC compiler is a modified GCC but it doesn't // support TR1 tuple. libc++ only provides std::tuple, in C++11 mode, // and it can be used with some compilers that define __GNUC__. # if (defined(__GNUC__) && !defined(__CUDACC__) && (GTEST_GCC_VER_ >= 40000) \ && !GTEST_OS_QNX && !defined(_LIBCPP_VERSION)) \ || (_MSC_VER >= 1600 && _MSC_VER < 1900) # define GTEST_ENV_HAS_TR1_TUPLE_ 1 # endif // C++11 specifies that provides std::tuple. Use that if gtest is used // in C++11 mode and libstdc++ isn't very old (binaries targeting OS X 10.6 // can build with clang but need to use gcc4.2's libstdc++). # if GTEST_LANG_CXX11 && (!defined(__GLIBCXX__) || __GLIBCXX__ > 20110325) # define GTEST_ENV_HAS_STD_TUPLE_ 1 # endif # if GTEST_ENV_HAS_TR1_TUPLE_ || GTEST_ENV_HAS_STD_TUPLE_ # define GTEST_USE_OWN_TR1_TUPLE 0 # else # define GTEST_USE_OWN_TR1_TUPLE 1 # endif # endif // GTEST_OS_SYMBIAN #endif // GTEST_USE_OWN_TR1_TUPLE // To avoid conditional compilation we make it gtest-port.h's responsibility // to #include the header implementing tuple. #if GTEST_HAS_STD_TUPLE_ # include // IWYU pragma: export # define GTEST_TUPLE_NAMESPACE_ ::std #endif // GTEST_HAS_STD_TUPLE_ // We include tr1::tuple even if std::tuple is available to define printers for // them. #if GTEST_HAS_TR1_TUPLE # ifndef GTEST_TUPLE_NAMESPACE_ # define GTEST_TUPLE_NAMESPACE_ ::std::tr1 # endif // GTEST_TUPLE_NAMESPACE_ # if GTEST_USE_OWN_TR1_TUPLE # include "gtest/internal/gtest-tuple.h" // IWYU pragma: export // NOLINT # elif GTEST_OS_SYMBIAN // On Symbian, BOOST_HAS_TR1_TUPLE causes Boost's TR1 tuple library to // use STLport's tuple implementation, which unfortunately doesn't // work as the copy of STLport distributed with Symbian is incomplete. // By making sure BOOST_HAS_TR1_TUPLE is undefined, we force Boost to // use its own tuple implementation. # ifdef BOOST_HAS_TR1_TUPLE # undef BOOST_HAS_TR1_TUPLE # endif // BOOST_HAS_TR1_TUPLE // This prevents , which defines // BOOST_HAS_TR1_TUPLE, from being #included by Boost's . # define BOOST_TR1_DETAIL_CONFIG_HPP_INCLUDED # include // IWYU pragma: export // NOLINT # elif defined(__GNUC__) && (GTEST_GCC_VER_ >= 40000) // GCC 4.0+ implements tr1/tuple in the header. This does // not conform to the TR1 spec, which requires the header to be . # if !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 // Until version 4.3.2, gcc has a bug that causes , // which is #included by , to not compile when RTTI is // disabled. _TR1_FUNCTIONAL is the header guard for // . Hence the following #define is used to prevent // from being included. # define _TR1_FUNCTIONAL 1 # include # undef _TR1_FUNCTIONAL // Allows the user to #include // if they choose to. # else # include // NOLINT # endif // !GTEST_HAS_RTTI && GTEST_GCC_VER_ < 40302 // VS 2010 now has tr1 support. # elif _MSC_VER >= 1600 # include // IWYU pragma: export // NOLINT # else // GTEST_USE_OWN_TR1_TUPLE # include // IWYU pragma: export // NOLINT # endif // GTEST_USE_OWN_TR1_TUPLE #endif // GTEST_HAS_TR1_TUPLE // Determines whether clone(2) is supported. // Usually it will only be available on Linux, excluding // Linux on the Itanium architecture. // Also see http://linux.die.net/man/2/clone. #ifndef GTEST_HAS_CLONE // The user didn't tell us, so we need to figure it out. # if GTEST_OS_LINUX && !defined(__ia64__) # if GTEST_OS_LINUX_ANDROID // On Android, clone() became available at different API levels for each 32-bit // architecture. # if defined(__LP64__) || \ (defined(__arm__) && __ANDROID_API__ >= 9) || \ (defined(__mips__) && __ANDROID_API__ >= 12) || \ (defined(__i386__) && __ANDROID_API__ >= 17) # define GTEST_HAS_CLONE 1 # else # define GTEST_HAS_CLONE 0 # endif # else # define GTEST_HAS_CLONE 1 # endif # else # define GTEST_HAS_CLONE 0 # endif // GTEST_OS_LINUX && !defined(__ia64__) #endif // GTEST_HAS_CLONE // Determines whether to support stream redirection. This is used to test // output correctness and to implement death tests. #ifndef GTEST_HAS_STREAM_REDIRECTION // By default, we assume that stream redirection is supported on all // platforms except known mobile ones. # if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || \ GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT # define GTEST_HAS_STREAM_REDIRECTION 0 # else # define GTEST_HAS_STREAM_REDIRECTION 1 # endif // !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_SYMBIAN #endif // GTEST_HAS_STREAM_REDIRECTION // Determines whether to support death tests. // Google Test does not support death tests for VC 7.1 and earlier as // abort() in a VC 7.1 application compiled as GUI in debug config // pops up a dialog window that cannot be suppressed programmatically. #if (GTEST_OS_LINUX || GTEST_OS_CYGWIN || GTEST_OS_SOLARIS || \ (GTEST_OS_MAC && !GTEST_OS_IOS) || \ (GTEST_OS_WINDOWS_DESKTOP && _MSC_VER >= 1400) || \ GTEST_OS_WINDOWS_MINGW || GTEST_OS_AIX || GTEST_OS_HPUX || \ GTEST_OS_OPENBSD || GTEST_OS_QNX || GTEST_OS_FREEBSD || \ GTEST_OS_NETBSD || GTEST_OS_FUCHSIA) # define GTEST_HAS_DEATH_TEST 1 #endif // Determines whether to support type-driven tests. // Typed tests need and variadic macros, which GCC, VC++ 8.0, // Sun Pro CC, IBM Visual Age, and HP aCC support. #if defined(__GNUC__) || (_MSC_VER >= 1400) || defined(__SUNPRO_CC) || \ defined(__IBMCPP__) || defined(__HP_aCC) # define GTEST_HAS_TYPED_TEST 1 # define GTEST_HAS_TYPED_TEST_P 1 #endif // Determines whether to support Combine(). This only makes sense when // value-parameterized tests are enabled. The implementation doesn't // work on Sun Studio since it doesn't understand templated conversion // operators. #if (GTEST_HAS_TR1_TUPLE || GTEST_HAS_STD_TUPLE_) && !defined(__SUNPRO_CC) # define GTEST_HAS_COMBINE 1 #endif // Determines whether the system compiler uses UTF-16 for encoding wide strings. #define GTEST_WIDE_STRING_USES_UTF16_ \ (GTEST_OS_WINDOWS || GTEST_OS_CYGWIN || GTEST_OS_SYMBIAN || GTEST_OS_AIX) // Determines whether test results can be streamed to a socket. #if GTEST_OS_LINUX # define GTEST_CAN_STREAM_RESULTS_ 1 #endif // Defines some utility macros. // The GNU compiler emits a warning if nested "if" statements are followed by // an "else" statement and braces are not used to explicitly disambiguate the // "else" binding. This leads to problems with code like: // // if (gate) // ASSERT_*(condition) << "Some message"; // // The "switch (0) case 0:" idiom is used to suppress this. #ifdef __INTEL_COMPILER # define GTEST_AMBIGUOUS_ELSE_BLOCKER_ #else # define GTEST_AMBIGUOUS_ELSE_BLOCKER_ switch (0) case 0: default: // NOLINT #endif // Use this annotation at the end of a struct/class definition to // prevent the compiler from optimizing away instances that are never // used. This is useful when all interesting logic happens inside the // c'tor and / or d'tor. Example: // // struct Foo { // Foo() { ... } // } GTEST_ATTRIBUTE_UNUSED_; // // Also use it after a variable or parameter declaration to tell the // compiler the variable/parameter does not have to be used. #if defined(__GNUC__) && !defined(COMPILER_ICC) # define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused)) #elif defined(__clang__) # if __has_attribute(unused) # define GTEST_ATTRIBUTE_UNUSED_ __attribute__ ((unused)) # endif #endif #ifndef GTEST_ATTRIBUTE_UNUSED_ # define GTEST_ATTRIBUTE_UNUSED_ #endif #if GTEST_LANG_CXX11 # define GTEST_CXX11_EQUALS_DELETE_ = delete #else // GTEST_LANG_CXX11 # define GTEST_CXX11_EQUALS_DELETE_ #endif // GTEST_LANG_CXX11 // Use this annotation before a function that takes a printf format string. #if (defined(__GNUC__) || defined(__clang__)) && !defined(COMPILER_ICC) # if defined(__MINGW_PRINTF_FORMAT) // MinGW has two different printf implementations. Ensure the format macro // matches the selected implementation. See // https://sourceforge.net/p/mingw-w64/wiki2/gnu%20printf/. # define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \ __attribute__((__format__(__MINGW_PRINTF_FORMAT, string_index, \ first_to_check))) # else # define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) \ __attribute__((__format__(__printf__, string_index, first_to_check))) # endif #else # define GTEST_ATTRIBUTE_PRINTF_(string_index, first_to_check) #endif // A macro to disallow operator= // This should be used in the private: declarations for a class. #define GTEST_DISALLOW_ASSIGN_(type) \ void operator=(type const &) GTEST_CXX11_EQUALS_DELETE_ // A macro to disallow copy constructor and operator= // This should be used in the private: declarations for a class. #define GTEST_DISALLOW_COPY_AND_ASSIGN_(type) \ type(type const &) GTEST_CXX11_EQUALS_DELETE_; \ GTEST_DISALLOW_ASSIGN_(type) // Tell the compiler to warn about unused return values for functions declared // with this macro. The macro should be used on function declarations // following the argument list: // // Sprocket* AllocateSprocket() GTEST_MUST_USE_RESULT_; #if defined(__GNUC__) && (GTEST_GCC_VER_ >= 30400) && !defined(COMPILER_ICC) # define GTEST_MUST_USE_RESULT_ __attribute__ ((warn_unused_result)) #else # define GTEST_MUST_USE_RESULT_ #endif // __GNUC__ && (GTEST_GCC_VER_ >= 30400) && !COMPILER_ICC // MS C++ compiler emits warning when a conditional expression is compile time // constant. In some contexts this warning is false positive and needs to be // suppressed. Use the following two macros in such cases: // // GTEST_INTENTIONAL_CONST_COND_PUSH_() // while (true) { // GTEST_INTENTIONAL_CONST_COND_POP_() // } # define GTEST_INTENTIONAL_CONST_COND_PUSH_() \ GTEST_DISABLE_MSC_WARNINGS_PUSH_(4127) # define GTEST_INTENTIONAL_CONST_COND_POP_() \ GTEST_DISABLE_MSC_WARNINGS_POP_() // Determine whether the compiler supports Microsoft's Structured Exception // Handling. This is supported by several Windows compilers but generally // does not exist on any other system. #ifndef GTEST_HAS_SEH // The user didn't tell us, so we need to figure it out. # if defined(_MSC_VER) || defined(__BORLANDC__) // These two compilers are known to support SEH. # define GTEST_HAS_SEH 1 # else // Assume no SEH. # define GTEST_HAS_SEH 0 # endif #define GTEST_IS_THREADSAFE \ (GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ \ || (GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT) \ || GTEST_HAS_PTHREAD) #endif // GTEST_HAS_SEH // GTEST_API_ qualifies all symbols that must be exported. The definitions below // are guarded by #ifndef to give embedders a chance to define GTEST_API_ in // gtest/internal/custom/gtest-port.h #ifndef GTEST_API_ #ifdef _MSC_VER # if GTEST_LINKED_AS_SHARED_LIBRARY # define GTEST_API_ __declspec(dllimport) # elif GTEST_CREATE_SHARED_LIBRARY # define GTEST_API_ __declspec(dllexport) # endif #elif __GNUC__ >= 4 || defined(__clang__) # define GTEST_API_ __attribute__((visibility ("default"))) #endif // _MSC_VER #endif // GTEST_API_ #ifndef GTEST_API_ # define GTEST_API_ #endif // GTEST_API_ #ifndef GTEST_DEFAULT_DEATH_TEST_STYLE # define GTEST_DEFAULT_DEATH_TEST_STYLE "fast" #endif // GTEST_DEFAULT_DEATH_TEST_STYLE #ifdef __GNUC__ // Ask the compiler to never inline a given function. # define GTEST_NO_INLINE_ __attribute__((noinline)) #else # define GTEST_NO_INLINE_ #endif // _LIBCPP_VERSION is defined by the libc++ library from the LLVM project. #if !defined(GTEST_HAS_CXXABI_H_) # if defined(__GLIBCXX__) || (defined(_LIBCPP_VERSION) && !defined(_MSC_VER)) # define GTEST_HAS_CXXABI_H_ 1 # else # define GTEST_HAS_CXXABI_H_ 0 # endif #endif // A function level attribute to disable checking for use of uninitialized // memory when built with MemorySanitizer. #if defined(__clang__) # if __has_feature(memory_sanitizer) # define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ \ __attribute__((no_sanitize_memory)) # else # define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ # endif // __has_feature(memory_sanitizer) #else # define GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ #endif // __clang__ // A function level attribute to disable AddressSanitizer instrumentation. #if defined(__clang__) # if __has_feature(address_sanitizer) # define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ \ __attribute__((no_sanitize_address)) # else # define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ # endif // __has_feature(address_sanitizer) #else # define GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ #endif // __clang__ // A function level attribute to disable ThreadSanitizer instrumentation. #if defined(__clang__) # if __has_feature(thread_sanitizer) # define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ \ __attribute__((no_sanitize_thread)) # else # define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ # endif // __has_feature(thread_sanitizer) #else # define GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ #endif // __clang__ namespace testing { class Message; #if defined(GTEST_TUPLE_NAMESPACE_) // Import tuple and friends into the ::testing namespace. // It is part of our interface, having them in ::testing allows us to change // their types as needed. using GTEST_TUPLE_NAMESPACE_::get; using GTEST_TUPLE_NAMESPACE_::make_tuple; using GTEST_TUPLE_NAMESPACE_::tuple; using GTEST_TUPLE_NAMESPACE_::tuple_size; using GTEST_TUPLE_NAMESPACE_::tuple_element; #endif // defined(GTEST_TUPLE_NAMESPACE_) namespace internal { // A secret type that Google Test users don't know about. It has no // definition on purpose. Therefore it's impossible to create a // Secret object, which is what we want. class Secret; // The GTEST_COMPILE_ASSERT_ macro can be used to verify that a compile time // expression is true. For example, you could use it to verify the // size of a static array: // // GTEST_COMPILE_ASSERT_(GTEST_ARRAY_SIZE_(names) == NUM_NAMES, // names_incorrect_size); // // or to make sure a struct is smaller than a certain size: // // GTEST_COMPILE_ASSERT_(sizeof(foo) < 128, foo_too_large); // // The second argument to the macro is the name of the variable. If // the expression is false, most compilers will issue a warning/error // containing the name of the variable. #if GTEST_LANG_CXX11 # define GTEST_COMPILE_ASSERT_(expr, msg) static_assert(expr, #msg) #else // !GTEST_LANG_CXX11 template struct CompileAssert { }; # define GTEST_COMPILE_ASSERT_(expr, msg) \ typedef ::testing::internal::CompileAssert<(static_cast(expr))> \ msg[static_cast(expr) ? 1 : -1] GTEST_ATTRIBUTE_UNUSED_ #endif // !GTEST_LANG_CXX11 // Implementation details of GTEST_COMPILE_ASSERT_: // // (In C++11, we simply use static_assert instead of the following) // // - GTEST_COMPILE_ASSERT_ works by defining an array type that has -1 // elements (and thus is invalid) when the expression is false. // // - The simpler definition // // #define GTEST_COMPILE_ASSERT_(expr, msg) typedef char msg[(expr) ? 1 : -1] // // does not work, as gcc supports variable-length arrays whose sizes // are determined at run-time (this is gcc's extension and not part // of the C++ standard). As a result, gcc fails to reject the // following code with the simple definition: // // int foo; // GTEST_COMPILE_ASSERT_(foo, msg); // not supposed to compile as foo is // // not a compile-time constant. // // - By using the type CompileAssert<(bool(expr))>, we ensures that // expr is a compile-time constant. (Template arguments must be // determined at compile-time.) // // - The outter parentheses in CompileAssert<(bool(expr))> are necessary // to work around a bug in gcc 3.4.4 and 4.0.1. If we had written // // CompileAssert // // instead, these compilers will refuse to compile // // GTEST_COMPILE_ASSERT_(5 > 0, some_message); // // (They seem to think the ">" in "5 > 0" marks the end of the // template argument list.) // // - The array size is (bool(expr) ? 1 : -1), instead of simply // // ((expr) ? 1 : -1). // // This is to avoid running into a bug in MS VC 7.1, which // causes ((0.0) ? 1 : -1) to incorrectly evaluate to 1. // StaticAssertTypeEqHelper is used by StaticAssertTypeEq defined in gtest.h. // // This template is declared, but intentionally undefined. template struct StaticAssertTypeEqHelper; template struct StaticAssertTypeEqHelper { enum { value = true }; }; // Same as std::is_same<>. template struct IsSame { enum { value = false }; }; template struct IsSame { enum { value = true }; }; // Evaluates to the number of elements in 'array'. #define GTEST_ARRAY_SIZE_(array) (sizeof(array) / sizeof(array[0])) #if GTEST_HAS_GLOBAL_STRING typedef ::string string; #else typedef ::std::string string; #endif // GTEST_HAS_GLOBAL_STRING #if GTEST_HAS_GLOBAL_WSTRING typedef ::wstring wstring; #elif GTEST_HAS_STD_WSTRING typedef ::std::wstring wstring; #endif // GTEST_HAS_GLOBAL_WSTRING // A helper for suppressing warnings on constant condition. It just // returns 'condition'. GTEST_API_ bool IsTrue(bool condition); // Defines scoped_ptr. // This implementation of scoped_ptr is PARTIAL - it only contains // enough stuff to satisfy Google Test's need. template class scoped_ptr { public: typedef T element_type; explicit scoped_ptr(T* p = NULL) : ptr_(p) {} ~scoped_ptr() { reset(); } T& operator*() const { return *ptr_; } T* operator->() const { return ptr_; } T* get() const { return ptr_; } T* release() { T* const ptr = ptr_; ptr_ = NULL; return ptr; } void reset(T* p = NULL) { if (p != ptr_) { if (IsTrue(sizeof(T) > 0)) { // Makes sure T is a complete type. delete ptr_; } ptr_ = p; } } friend void swap(scoped_ptr& a, scoped_ptr& b) { using std::swap; swap(a.ptr_, b.ptr_); } private: T* ptr_; GTEST_DISALLOW_COPY_AND_ASSIGN_(scoped_ptr); }; // Defines RE. #if GTEST_USES_PCRE // if used, PCRE is injected by custom/gtest-port.h #elif GTEST_USES_POSIX_RE || GTEST_USES_SIMPLE_RE // A simple C++ wrapper for . It uses the POSIX Extended // Regular Expression syntax. class GTEST_API_ RE { public: // A copy constructor is required by the Standard to initialize object // references from r-values. RE(const RE& other) { Init(other.pattern()); } // Constructs an RE from a string. RE(const ::std::string& regex) { Init(regex.c_str()); } // NOLINT # if GTEST_HAS_GLOBAL_STRING RE(const ::string& regex) { Init(regex.c_str()); } // NOLINT # endif // GTEST_HAS_GLOBAL_STRING RE(const char* regex) { Init(regex); } // NOLINT ~RE(); // Returns the string representation of the regex. const char* pattern() const { return pattern_; } // FullMatch(str, re) returns true iff regular expression re matches // the entire str. // PartialMatch(str, re) returns true iff regular expression re // matches a substring of str (including str itself). // // FIXME: make FullMatch() and PartialMatch() work // when str contains NUL characters. static bool FullMatch(const ::std::string& str, const RE& re) { return FullMatch(str.c_str(), re); } static bool PartialMatch(const ::std::string& str, const RE& re) { return PartialMatch(str.c_str(), re); } # if GTEST_HAS_GLOBAL_STRING static bool FullMatch(const ::string& str, const RE& re) { return FullMatch(str.c_str(), re); } static bool PartialMatch(const ::string& str, const RE& re) { return PartialMatch(str.c_str(), re); } # endif // GTEST_HAS_GLOBAL_STRING static bool FullMatch(const char* str, const RE& re); static bool PartialMatch(const char* str, const RE& re); private: void Init(const char* regex); // We use a const char* instead of an std::string, as Google Test used to be // used where std::string is not available. FIXME: change to // std::string. const char* pattern_; bool is_valid_; # if GTEST_USES_POSIX_RE regex_t full_regex_; // For FullMatch(). regex_t partial_regex_; // For PartialMatch(). # else // GTEST_USES_SIMPLE_RE const char* full_pattern_; // For FullMatch(); # endif GTEST_DISALLOW_ASSIGN_(RE); }; #endif // GTEST_USES_PCRE // Formats a source file path and a line number as they would appear // in an error message from the compiler used to compile this code. GTEST_API_ ::std::string FormatFileLocation(const char* file, int line); // Formats a file location for compiler-independent XML output. // Although this function is not platform dependent, we put it next to // FormatFileLocation in order to contrast the two functions. GTEST_API_ ::std::string FormatCompilerIndependentFileLocation(const char* file, int line); // Defines logging utilities: // GTEST_LOG_(severity) - logs messages at the specified severity level. The // message itself is streamed into the macro. // LogToStderr() - directs all log messages to stderr. // FlushInfoLog() - flushes informational log messages. enum GTestLogSeverity { GTEST_INFO, GTEST_WARNING, GTEST_ERROR, GTEST_FATAL }; // Formats log entry severity, provides a stream object for streaming the // log message, and terminates the message with a newline when going out of // scope. class GTEST_API_ GTestLog { public: GTestLog(GTestLogSeverity severity, const char* file, int line); // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. ~GTestLog(); ::std::ostream& GetStream() { return ::std::cerr; } private: const GTestLogSeverity severity_; GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestLog); }; #if !defined(GTEST_LOG_) # define GTEST_LOG_(severity) \ ::testing::internal::GTestLog(::testing::internal::GTEST_##severity, \ __FILE__, __LINE__).GetStream() inline void LogToStderr() {} inline void FlushInfoLog() { fflush(NULL); } #endif // !defined(GTEST_LOG_) #if !defined(GTEST_CHECK_) // INTERNAL IMPLEMENTATION - DO NOT USE. // // GTEST_CHECK_ is an all-mode assert. It aborts the program if the condition // is not satisfied. // Synopsys: // GTEST_CHECK_(boolean_condition); // or // GTEST_CHECK_(boolean_condition) << "Additional message"; // // This checks the condition and if the condition is not satisfied // it prints message about the condition violation, including the // condition itself, plus additional message streamed into it, if any, // and then it aborts the program. It aborts the program irrespective of // whether it is built in the debug mode or not. # define GTEST_CHECK_(condition) \ GTEST_AMBIGUOUS_ELSE_BLOCKER_ \ if (::testing::internal::IsTrue(condition)) \ ; \ else \ GTEST_LOG_(FATAL) << "Condition " #condition " failed. " #endif // !defined(GTEST_CHECK_) // An all-mode assert to verify that the given POSIX-style function // call returns 0 (indicating success). Known limitation: this // doesn't expand to a balanced 'if' statement, so enclose the macro // in {} if you need to use it as the only statement in an 'if' // branch. #define GTEST_CHECK_POSIX_SUCCESS_(posix_call) \ if (const int gtest_error = (posix_call)) \ GTEST_LOG_(FATAL) << #posix_call << "failed with error " \ << gtest_error // Adds reference to a type if it is not a reference type, // otherwise leaves it unchanged. This is the same as // tr1::add_reference, which is not widely available yet. template struct AddReference { typedef T& type; }; // NOLINT template struct AddReference { typedef T& type; }; // NOLINT // A handy wrapper around AddReference that works when the argument T // depends on template parameters. #define GTEST_ADD_REFERENCE_(T) \ typename ::testing::internal::AddReference::type // Transforms "T" into "const T&" according to standard reference collapsing // rules (this is only needed as a backport for C++98 compilers that do not // support reference collapsing). Specifically, it transforms: // // char ==> const char& // const char ==> const char& // char& ==> char& // const char& ==> const char& // // Note that the non-const reference will not have "const" added. This is // standard, and necessary so that "T" can always bind to "const T&". template struct ConstRef { typedef const T& type; }; template struct ConstRef { typedef T& type; }; // The argument T must depend on some template parameters. #define GTEST_REFERENCE_TO_CONST_(T) \ typename ::testing::internal::ConstRef::type #if GTEST_HAS_STD_MOVE_ using std::forward; using std::move; template struct RvalueRef { typedef T&& type; }; #else // GTEST_HAS_STD_MOVE_ template const T& move(const T& t) { return t; } template GTEST_ADD_REFERENCE_(T) forward(GTEST_ADD_REFERENCE_(T) t) { return t; } template struct RvalueRef { typedef const T& type; }; #endif // GTEST_HAS_STD_MOVE_ // INTERNAL IMPLEMENTATION - DO NOT USE IN USER CODE. // // Use ImplicitCast_ as a safe version of static_cast for upcasting in // the type hierarchy (e.g. casting a Foo* to a SuperclassOfFoo* or a // const Foo*). When you use ImplicitCast_, the compiler checks that // the cast is safe. Such explicit ImplicitCast_s are necessary in // surprisingly many situations where C++ demands an exact type match // instead of an argument type convertable to a target type. // // The syntax for using ImplicitCast_ is the same as for static_cast: // // ImplicitCast_(expr) // // ImplicitCast_ would have been part of the C++ standard library, // but the proposal was submitted too late. It will probably make // its way into the language in the future. // // This relatively ugly name is intentional. It prevents clashes with // similar functions users may have (e.g., implicit_cast). The internal // namespace alone is not enough because the function can be found by ADL. template inline To ImplicitCast_(To x) { return x; } // When you upcast (that is, cast a pointer from type Foo to type // SuperclassOfFoo), it's fine to use ImplicitCast_<>, since upcasts // always succeed. When you downcast (that is, cast a pointer from // type Foo to type SubclassOfFoo), static_cast<> isn't safe, because // how do you know the pointer is really of type SubclassOfFoo? It // could be a bare Foo, or of type DifferentSubclassOfFoo. Thus, // when you downcast, you should use this macro. In debug mode, we // use dynamic_cast<> to double-check the downcast is legal (we die // if it's not). In normal mode, we do the efficient static_cast<> // instead. Thus, it's important to test in debug mode to make sure // the cast is legal! // This is the only place in the code we should use dynamic_cast<>. // In particular, you SHOULDN'T be using dynamic_cast<> in order to // do RTTI (eg code like this: // if (dynamic_cast(foo)) HandleASubclass1Object(foo); // if (dynamic_cast(foo)) HandleASubclass2Object(foo); // You should design the code some other way not to need this. // // This relatively ugly name is intentional. It prevents clashes with // similar functions users may have (e.g., down_cast). The internal // namespace alone is not enough because the function can be found by ADL. template // use like this: DownCast_(foo); inline To DownCast_(From* f) { // so we only accept pointers // Ensures that To is a sub-type of From *. This test is here only // for compile-time type checking, and has no overhead in an // optimized build at run-time, as it will be optimized away // completely. GTEST_INTENTIONAL_CONST_COND_PUSH_() if (false) { GTEST_INTENTIONAL_CONST_COND_POP_() const To to = NULL; ::testing::internal::ImplicitCast_(to); } #if GTEST_HAS_RTTI // RTTI: debug mode only! GTEST_CHECK_(f == NULL || dynamic_cast(f) != NULL); #endif return static_cast(f); } // Downcasts the pointer of type Base to Derived. // Derived must be a subclass of Base. The parameter MUST // point to a class of type Derived, not any subclass of it. // When RTTI is available, the function performs a runtime // check to enforce this. template Derived* CheckedDowncastToActualType(Base* base) { #if GTEST_HAS_RTTI GTEST_CHECK_(typeid(*base) == typeid(Derived)); #endif #if GTEST_HAS_DOWNCAST_ return ::down_cast(base); #elif GTEST_HAS_RTTI return dynamic_cast(base); // NOLINT #else return static_cast(base); // Poor man's downcast. #endif } #if GTEST_HAS_STREAM_REDIRECTION // Defines the stderr capturer: // CaptureStdout - starts capturing stdout. // GetCapturedStdout - stops capturing stdout and returns the captured string. // CaptureStderr - starts capturing stderr. // GetCapturedStderr - stops capturing stderr and returns the captured string. // GTEST_API_ void CaptureStdout(); GTEST_API_ std::string GetCapturedStdout(); GTEST_API_ void CaptureStderr(); GTEST_API_ std::string GetCapturedStderr(); #endif // GTEST_HAS_STREAM_REDIRECTION // Returns the size (in bytes) of a file. GTEST_API_ size_t GetFileSize(FILE* file); // Reads the entire content of a file as a string. GTEST_API_ std::string ReadEntireFile(FILE* file); // All command line arguments. GTEST_API_ std::vector GetArgvs(); #if GTEST_HAS_DEATH_TEST std::vector GetInjectableArgvs(); // Deprecated: pass the args vector by value instead. void SetInjectableArgvs(const std::vector* new_argvs); void SetInjectableArgvs(const std::vector& new_argvs); #if GTEST_HAS_GLOBAL_STRING void SetInjectableArgvs(const std::vector< ::string>& new_argvs); #endif // GTEST_HAS_GLOBAL_STRING void ClearInjectableArgvs(); #endif // GTEST_HAS_DEATH_TEST // Defines synchronization primitives. #if GTEST_IS_THREADSAFE # if GTEST_HAS_PTHREAD // Sleeps for (roughly) n milliseconds. This function is only for testing // Google Test's own constructs. Don't use it in user tests, either // directly or indirectly. inline void SleepMilliseconds(int n) { const timespec time = { 0, // 0 seconds. n * 1000L * 1000L, // And n ms. }; nanosleep(&time, NULL); } # endif // GTEST_HAS_PTHREAD # if GTEST_HAS_NOTIFICATION_ // Notification has already been imported into the namespace. // Nothing to do here. # elif GTEST_HAS_PTHREAD // Allows a controller thread to pause execution of newly created // threads until notified. Instances of this class must be created // and destroyed in the controller thread. // // This class is only for testing Google Test's own constructs. Do not // use it in user tests, either directly or indirectly. class Notification { public: Notification() : notified_(false) { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); } ~Notification() { pthread_mutex_destroy(&mutex_); } // Notifies all threads created with this notification to start. Must // be called from the controller thread. void Notify() { pthread_mutex_lock(&mutex_); notified_ = true; pthread_mutex_unlock(&mutex_); } // Blocks until the controller thread notifies. Must be called from a test // thread. void WaitForNotification() { for (;;) { pthread_mutex_lock(&mutex_); const bool notified = notified_; pthread_mutex_unlock(&mutex_); if (notified) break; SleepMilliseconds(10); } } private: pthread_mutex_t mutex_; bool notified_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification); }; # elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT GTEST_API_ void SleepMilliseconds(int n); // Provides leak-safe Windows kernel handle ownership. // Used in death tests and in threading support. class GTEST_API_ AutoHandle { public: // Assume that Win32 HANDLE type is equivalent to void*. Doing so allows us to // avoid including in this header file. Including is // undesirable because it defines a lot of symbols and macros that tend to // conflict with client code. This assumption is verified by // WindowsTypesTest.HANDLEIsVoidStar. typedef void* Handle; AutoHandle(); explicit AutoHandle(Handle handle); ~AutoHandle(); Handle Get() const; void Reset(); void Reset(Handle handle); private: // Returns true iff the handle is a valid handle object that can be closed. bool IsCloseable() const; Handle handle_; GTEST_DISALLOW_COPY_AND_ASSIGN_(AutoHandle); }; // Allows a controller thread to pause execution of newly created // threads until notified. Instances of this class must be created // and destroyed in the controller thread. // // This class is only for testing Google Test's own constructs. Do not // use it in user tests, either directly or indirectly. class GTEST_API_ Notification { public: Notification(); void Notify(); void WaitForNotification(); private: AutoHandle event_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Notification); }; # endif // GTEST_HAS_NOTIFICATION_ // On MinGW, we can have both GTEST_OS_WINDOWS and GTEST_HAS_PTHREAD // defined, but we don't want to use MinGW's pthreads implementation, which // has conformance problems with some versions of the POSIX standard. # if GTEST_HAS_PTHREAD && !GTEST_OS_WINDOWS_MINGW // As a C-function, ThreadFuncWithCLinkage cannot be templated itself. // Consequently, it cannot select a correct instantiation of ThreadWithParam // in order to call its Run(). Introducing ThreadWithParamBase as a // non-templated base class for ThreadWithParam allows us to bypass this // problem. class ThreadWithParamBase { public: virtual ~ThreadWithParamBase() {} virtual void Run() = 0; }; // pthread_create() accepts a pointer to a function type with the C linkage. // According to the Standard (7.5/1), function types with different linkages // are different even if they are otherwise identical. Some compilers (for // example, SunStudio) treat them as different types. Since class methods // cannot be defined with C-linkage we need to define a free C-function to // pass into pthread_create(). extern "C" inline void* ThreadFuncWithCLinkage(void* thread) { static_cast(thread)->Run(); return NULL; } // Helper class for testing Google Test's multi-threading constructs. // To use it, write: // // void ThreadFunc(int param) { /* Do things with param */ } // Notification thread_can_start; // ... // // The thread_can_start parameter is optional; you can supply NULL. // ThreadWithParam thread(&ThreadFunc, 5, &thread_can_start); // thread_can_start.Notify(); // // These classes are only for testing Google Test's own constructs. Do // not use them in user tests, either directly or indirectly. template class ThreadWithParam : public ThreadWithParamBase { public: typedef void UserThreadFunc(T); ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start) : func_(func), param_(param), thread_can_start_(thread_can_start), finished_(false) { ThreadWithParamBase* const base = this; // The thread can be created only after all fields except thread_ // have been initialized. GTEST_CHECK_POSIX_SUCCESS_( pthread_create(&thread_, 0, &ThreadFuncWithCLinkage, base)); } ~ThreadWithParam() { Join(); } void Join() { if (!finished_) { GTEST_CHECK_POSIX_SUCCESS_(pthread_join(thread_, 0)); finished_ = true; } } virtual void Run() { if (thread_can_start_ != NULL) thread_can_start_->WaitForNotification(); func_(param_); } private: UserThreadFunc* const func_; // User-supplied thread function. const T param_; // User-supplied parameter to the thread function. // When non-NULL, used to block execution until the controller thread // notifies. Notification* const thread_can_start_; bool finished_; // true iff we know that the thread function has finished. pthread_t thread_; // The native thread object. GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam); }; # endif // !GTEST_OS_WINDOWS && GTEST_HAS_PTHREAD || // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ # if GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ // Mutex and ThreadLocal have already been imported into the namespace. // Nothing to do here. # elif GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT // Mutex implements mutex on Windows platforms. It is used in conjunction // with class MutexLock: // // Mutex mutex; // ... // MutexLock lock(&mutex); // Acquires the mutex and releases it at the // // end of the current scope. // // A static Mutex *must* be defined or declared using one of the following // macros: // GTEST_DEFINE_STATIC_MUTEX_(g_some_mutex); // GTEST_DECLARE_STATIC_MUTEX_(g_some_mutex); // // (A non-static Mutex is defined/declared in the usual way). class GTEST_API_ Mutex { public: enum MutexType { kStatic = 0, kDynamic = 1 }; // We rely on kStaticMutex being 0 as it is to what the linker initializes // type_ in static mutexes. critical_section_ will be initialized lazily // in ThreadSafeLazyInit(). enum StaticConstructorSelector { kStaticMutex = 0 }; // This constructor intentionally does nothing. It relies on type_ being // statically initialized to 0 (effectively setting it to kStatic) and on // ThreadSafeLazyInit() to lazily initialize the rest of the members. explicit Mutex(StaticConstructorSelector /*dummy*/) {} Mutex(); ~Mutex(); void Lock(); void Unlock(); // Does nothing if the current thread holds the mutex. Otherwise, crashes // with high probability. void AssertHeld(); private: // Initializes owner_thread_id_ and critical_section_ in static mutexes. void ThreadSafeLazyInit(); // Per https://blogs.msdn.microsoft.com/oldnewthing/20040223-00/?p=40503, // we assume that 0 is an invalid value for thread IDs. unsigned int owner_thread_id_; // For static mutexes, we rely on these members being initialized to zeros // by the linker. MutexType type_; long critical_section_init_phase_; // NOLINT GTEST_CRITICAL_SECTION* critical_section_; GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); }; # define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ extern ::testing::internal::Mutex mutex # define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ ::testing::internal::Mutex mutex(::testing::internal::Mutex::kStaticMutex) // We cannot name this class MutexLock because the ctor declaration would // conflict with a macro named MutexLock, which is defined on some // platforms. That macro is used as a defensive measure to prevent against // inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than // "MutexLock l(&mu)". Hence the typedef trick below. class GTestMutexLock { public: explicit GTestMutexLock(Mutex* mutex) : mutex_(mutex) { mutex_->Lock(); } ~GTestMutexLock() { mutex_->Unlock(); } private: Mutex* const mutex_; GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock); }; typedef GTestMutexLock MutexLock; // Base class for ValueHolder. Allows a caller to hold and delete a value // without knowing its type. class ThreadLocalValueHolderBase { public: virtual ~ThreadLocalValueHolderBase() {} }; // Provides a way for a thread to send notifications to a ThreadLocal // regardless of its parameter type. class ThreadLocalBase { public: // Creates a new ValueHolder object holding a default value passed to // this ThreadLocal's constructor and returns it. It is the caller's // responsibility not to call this when the ThreadLocal instance already // has a value on the current thread. virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const = 0; protected: ThreadLocalBase() {} virtual ~ThreadLocalBase() {} private: GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocalBase); }; // Maps a thread to a set of ThreadLocals that have values instantiated on that // thread and notifies them when the thread exits. A ThreadLocal instance is // expected to persist until all threads it has values on have terminated. class GTEST_API_ ThreadLocalRegistry { public: // Registers thread_local_instance as having value on the current thread. // Returns a value that can be used to identify the thread from other threads. static ThreadLocalValueHolderBase* GetValueOnCurrentThread( const ThreadLocalBase* thread_local_instance); // Invoked when a ThreadLocal instance is destroyed. static void OnThreadLocalDestroyed( const ThreadLocalBase* thread_local_instance); }; class GTEST_API_ ThreadWithParamBase { public: void Join(); protected: class Runnable { public: virtual ~Runnable() {} virtual void Run() = 0; }; ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start); virtual ~ThreadWithParamBase(); private: AutoHandle thread_; }; // Helper class for testing Google Test's multi-threading constructs. template class ThreadWithParam : public ThreadWithParamBase { public: typedef void UserThreadFunc(T); ThreadWithParam(UserThreadFunc* func, T param, Notification* thread_can_start) : ThreadWithParamBase(new RunnableImpl(func, param), thread_can_start) { } virtual ~ThreadWithParam() {} private: class RunnableImpl : public Runnable { public: RunnableImpl(UserThreadFunc* func, T param) : func_(func), param_(param) { } virtual ~RunnableImpl() {} virtual void Run() { func_(param_); } private: UserThreadFunc* const func_; const T param_; GTEST_DISALLOW_COPY_AND_ASSIGN_(RunnableImpl); }; GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParam); }; // Implements thread-local storage on Windows systems. // // // Thread 1 // ThreadLocal tl(100); // 100 is the default value for each thread. // // // Thread 2 // tl.set(150); // Changes the value for thread 2 only. // EXPECT_EQ(150, tl.get()); // // // Thread 1 // EXPECT_EQ(100, tl.get()); // In thread 1, tl has the original value. // tl.set(200); // EXPECT_EQ(200, tl.get()); // // The template type argument T must have a public copy constructor. // In addition, the default ThreadLocal constructor requires T to have // a public default constructor. // // The users of a TheadLocal instance have to make sure that all but one // threads (including the main one) using that instance have exited before // destroying it. Otherwise, the per-thread objects managed for them by the // ThreadLocal instance are not guaranteed to be destroyed on all platforms. // // Google Test only uses global ThreadLocal objects. That means they // will die after main() has returned. Therefore, no per-thread // object managed by Google Test will be leaked as long as all threads // using Google Test have exited when main() returns. template class ThreadLocal : public ThreadLocalBase { public: ThreadLocal() : default_factory_(new DefaultValueHolderFactory()) {} explicit ThreadLocal(const T& value) : default_factory_(new InstanceValueHolderFactory(value)) {} ~ThreadLocal() { ThreadLocalRegistry::OnThreadLocalDestroyed(this); } T* pointer() { return GetOrCreateValue(); } const T* pointer() const { return GetOrCreateValue(); } const T& get() const { return *pointer(); } void set(const T& value) { *pointer() = value; } private: // Holds a value of T. Can be deleted via its base class without the caller // knowing the type of T. class ValueHolder : public ThreadLocalValueHolderBase { public: ValueHolder() : value_() {} explicit ValueHolder(const T& value) : value_(value) {} T* pointer() { return &value_; } private: T value_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder); }; T* GetOrCreateValue() const { return static_cast( ThreadLocalRegistry::GetValueOnCurrentThread(this))->pointer(); } virtual ThreadLocalValueHolderBase* NewValueForCurrentThread() const { return default_factory_->MakeNewHolder(); } class ValueHolderFactory { public: ValueHolderFactory() {} virtual ~ValueHolderFactory() {} virtual ValueHolder* MakeNewHolder() const = 0; private: GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory); }; class DefaultValueHolderFactory : public ValueHolderFactory { public: DefaultValueHolderFactory() {} virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); } private: GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory); }; class InstanceValueHolderFactory : public ValueHolderFactory { public: explicit InstanceValueHolderFactory(const T& value) : value_(value) {} virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(value_); } private: const T value_; // The value for each thread. GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory); }; scoped_ptr default_factory_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal); }; # elif GTEST_HAS_PTHREAD // MutexBase and Mutex implement mutex on pthreads-based platforms. class MutexBase { public: // Acquires this mutex. void Lock() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_lock(&mutex_)); owner_ = pthread_self(); has_owner_ = true; } // Releases this mutex. void Unlock() { // Since the lock is being released the owner_ field should no longer be // considered valid. We don't protect writing to has_owner_ here, as it's // the caller's responsibility to ensure that the current thread holds the // mutex when this is called. has_owner_ = false; GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_unlock(&mutex_)); } // Does nothing if the current thread holds the mutex. Otherwise, crashes // with high probability. void AssertHeld() const { GTEST_CHECK_(has_owner_ && pthread_equal(owner_, pthread_self())) << "The current thread is not holding the mutex @" << this; } // A static mutex may be used before main() is entered. It may even // be used before the dynamic initialization stage. Therefore we // must be able to initialize a static mutex object at link time. // This means MutexBase has to be a POD and its member variables // have to be public. public: pthread_mutex_t mutex_; // The underlying pthread mutex. // has_owner_ indicates whether the owner_ field below contains a valid thread // ID and is therefore safe to inspect (e.g., to use in pthread_equal()). All // accesses to the owner_ field should be protected by a check of this field. // An alternative might be to memset() owner_ to all zeros, but there's no // guarantee that a zero'd pthread_t is necessarily invalid or even different // from pthread_self(). bool has_owner_; pthread_t owner_; // The thread holding the mutex. }; // Forward-declares a static mutex. # define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ extern ::testing::internal::MutexBase mutex // Defines and statically (i.e. at link time) initializes a static mutex. // The initialization list here does not explicitly initialize each field, // instead relying on default initialization for the unspecified fields. In // particular, the owner_ field (a pthread_t) is not explicitly initialized. // This allows initialization to work whether pthread_t is a scalar or struct. // The flag -Wmissing-field-initializers must not be specified for this to work. #define GTEST_DEFINE_STATIC_MUTEX_(mutex) \ ::testing::internal::MutexBase mutex = {PTHREAD_MUTEX_INITIALIZER, false, 0} // The Mutex class can only be used for mutexes created at runtime. It // shares its API with MutexBase otherwise. class Mutex : public MutexBase { public: Mutex() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_init(&mutex_, NULL)); has_owner_ = false; } ~Mutex() { GTEST_CHECK_POSIX_SUCCESS_(pthread_mutex_destroy(&mutex_)); } private: GTEST_DISALLOW_COPY_AND_ASSIGN_(Mutex); }; // We cannot name this class MutexLock because the ctor declaration would // conflict with a macro named MutexLock, which is defined on some // platforms. That macro is used as a defensive measure to prevent against // inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than // "MutexLock l(&mu)". Hence the typedef trick below. class GTestMutexLock { public: explicit GTestMutexLock(MutexBase* mutex) : mutex_(mutex) { mutex_->Lock(); } ~GTestMutexLock() { mutex_->Unlock(); } private: MutexBase* const mutex_; GTEST_DISALLOW_COPY_AND_ASSIGN_(GTestMutexLock); }; typedef GTestMutexLock MutexLock; // Helpers for ThreadLocal. // pthread_key_create() requires DeleteThreadLocalValue() to have // C-linkage. Therefore it cannot be templatized to access // ThreadLocal. Hence the need for class // ThreadLocalValueHolderBase. class ThreadLocalValueHolderBase { public: virtual ~ThreadLocalValueHolderBase() {} }; // Called by pthread to delete thread-local data stored by // pthread_setspecific(). extern "C" inline void DeleteThreadLocalValue(void* value_holder) { delete static_cast(value_holder); } // Implements thread-local storage on pthreads-based systems. template class GTEST_API_ ThreadLocal { public: ThreadLocal() : key_(CreateKey()), default_factory_(new DefaultValueHolderFactory()) {} explicit ThreadLocal(const T& value) : key_(CreateKey()), default_factory_(new InstanceValueHolderFactory(value)) {} ~ThreadLocal() { // Destroys the managed object for the current thread, if any. DeleteThreadLocalValue(pthread_getspecific(key_)); // Releases resources associated with the key. This will *not* // delete managed objects for other threads. GTEST_CHECK_POSIX_SUCCESS_(pthread_key_delete(key_)); } T* pointer() { return GetOrCreateValue(); } const T* pointer() const { return GetOrCreateValue(); } const T& get() const { return *pointer(); } void set(const T& value) { *pointer() = value; } private: // Holds a value of type T. class ValueHolder : public ThreadLocalValueHolderBase { public: ValueHolder() : value_() {} explicit ValueHolder(const T& value) : value_(value) {} T* pointer() { return &value_; } private: T value_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolder); }; static pthread_key_t CreateKey() { pthread_key_t key; // When a thread exits, DeleteThreadLocalValue() will be called on // the object managed for that thread. GTEST_CHECK_POSIX_SUCCESS_( pthread_key_create(&key, &DeleteThreadLocalValue)); return key; } T* GetOrCreateValue() const { ThreadLocalValueHolderBase* const holder = static_cast(pthread_getspecific(key_)); if (holder != NULL) { return CheckedDowncastToActualType(holder)->pointer(); } ValueHolder* const new_holder = default_factory_->MakeNewHolder(); ThreadLocalValueHolderBase* const holder_base = new_holder; GTEST_CHECK_POSIX_SUCCESS_(pthread_setspecific(key_, holder_base)); return new_holder->pointer(); } class ValueHolderFactory { public: ValueHolderFactory() {} virtual ~ValueHolderFactory() {} virtual ValueHolder* MakeNewHolder() const = 0; private: GTEST_DISALLOW_COPY_AND_ASSIGN_(ValueHolderFactory); }; class DefaultValueHolderFactory : public ValueHolderFactory { public: DefaultValueHolderFactory() {} virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(); } private: GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultValueHolderFactory); }; class InstanceValueHolderFactory : public ValueHolderFactory { public: explicit InstanceValueHolderFactory(const T& value) : value_(value) {} virtual ValueHolder* MakeNewHolder() const { return new ValueHolder(value_); } private: const T value_; // The value for each thread. GTEST_DISALLOW_COPY_AND_ASSIGN_(InstanceValueHolderFactory); }; // A key pthreads uses for looking up per-thread values. const pthread_key_t key_; scoped_ptr default_factory_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadLocal); }; # endif // GTEST_HAS_MUTEX_AND_THREAD_LOCAL_ #else // GTEST_IS_THREADSAFE // A dummy implementation of synchronization primitives (mutex, lock, // and thread-local variable). Necessary for compiling Google Test where // mutex is not supported - using Google Test in multiple threads is not // supported on such platforms. class Mutex { public: Mutex() {} void Lock() {} void Unlock() {} void AssertHeld() const {} }; # define GTEST_DECLARE_STATIC_MUTEX_(mutex) \ extern ::testing::internal::Mutex mutex # define GTEST_DEFINE_STATIC_MUTEX_(mutex) ::testing::internal::Mutex mutex // We cannot name this class MutexLock because the ctor declaration would // conflict with a macro named MutexLock, which is defined on some // platforms. That macro is used as a defensive measure to prevent against // inadvertent misuses of MutexLock like "MutexLock(&mu)" rather than // "MutexLock l(&mu)". Hence the typedef trick below. class GTestMutexLock { public: explicit GTestMutexLock(Mutex*) {} // NOLINT }; typedef GTestMutexLock MutexLock; template class GTEST_API_ ThreadLocal { public: ThreadLocal() : value_() {} explicit ThreadLocal(const T& value) : value_(value) {} T* pointer() { return &value_; } const T* pointer() const { return &value_; } const T& get() const { return value_; } void set(const T& value) { value_ = value; } private: T value_; }; #endif // GTEST_IS_THREADSAFE // Returns the number of threads running in the process, or 0 to indicate that // we cannot detect it. GTEST_API_ size_t GetThreadCount(); // Passing non-POD classes through ellipsis (...) crashes the ARM // compiler and generates a warning in Sun Studio before 12u4. The Nokia Symbian // and the IBM XL C/C++ compiler try to instantiate a copy constructor // for objects passed through ellipsis (...), failing for uncopyable // objects. We define this to ensure that only POD is passed through // ellipsis on these systems. #if defined(__SYMBIAN32__) || defined(__IBMCPP__) || \ (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x5130) // We lose support for NULL detection where the compiler doesn't like // passing non-POD classes through ellipsis (...). # define GTEST_ELLIPSIS_NEEDS_POD_ 1 #else # define GTEST_CAN_COMPARE_NULL 1 #endif // The Nokia Symbian and IBM XL C/C++ compilers cannot decide between // const T& and const T* in a function template. These compilers // _can_ decide between class template specializations for T and T*, // so a tr1::type_traits-like is_pointer works. #if defined(__SYMBIAN32__) || defined(__IBMCPP__) # define GTEST_NEEDS_IS_POINTER_ 1 #endif template struct bool_constant { typedef bool_constant type; static const bool value = bool_value; }; template const bool bool_constant::value; typedef bool_constant false_type; typedef bool_constant true_type; template struct is_same : public false_type {}; template struct is_same : public true_type {}; template struct is_pointer : public false_type {}; template struct is_pointer : public true_type {}; template struct IteratorTraits { typedef typename Iterator::value_type value_type; }; template struct IteratorTraits { typedef T value_type; }; template struct IteratorTraits { typedef T value_type; }; #if GTEST_OS_WINDOWS # define GTEST_PATH_SEP_ "\\" # define GTEST_HAS_ALT_PATH_SEP_ 1 // The biggest signed integer type the compiler supports. typedef __int64 BiggestInt; #else # define GTEST_PATH_SEP_ "/" # define GTEST_HAS_ALT_PATH_SEP_ 0 typedef long long BiggestInt; // NOLINT #endif // GTEST_OS_WINDOWS // Utilities for char. // isspace(int ch) and friends accept an unsigned char or EOF. char // may be signed, depending on the compiler (or compiler flags). // Therefore we need to cast a char to unsigned char before calling // isspace(), etc. inline bool IsAlpha(char ch) { return isalpha(static_cast(ch)) != 0; } inline bool IsAlNum(char ch) { return isalnum(static_cast(ch)) != 0; } inline bool IsDigit(char ch) { return isdigit(static_cast(ch)) != 0; } inline bool IsLower(char ch) { return islower(static_cast(ch)) != 0; } inline bool IsSpace(char ch) { return isspace(static_cast(ch)) != 0; } inline bool IsUpper(char ch) { return isupper(static_cast(ch)) != 0; } inline bool IsXDigit(char ch) { return isxdigit(static_cast(ch)) != 0; } inline bool IsXDigit(wchar_t ch) { const unsigned char low_byte = static_cast(ch); return ch == low_byte && isxdigit(low_byte) != 0; } inline char ToLower(char ch) { return static_cast(tolower(static_cast(ch))); } inline char ToUpper(char ch) { return static_cast(toupper(static_cast(ch))); } inline std::string StripTrailingSpaces(std::string str) { std::string::iterator it = str.end(); while (it != str.begin() && IsSpace(*--it)) it = str.erase(it); return str; } // The testing::internal::posix namespace holds wrappers for common // POSIX functions. These wrappers hide the differences between // Windows/MSVC and POSIX systems. Since some compilers define these // standard functions as macros, the wrapper cannot have the same name // as the wrapped function. namespace posix { // Functions with a different name on Windows. #if GTEST_OS_WINDOWS typedef struct _stat StatStruct; # ifdef __BORLANDC__ inline int IsATTY(int fd) { return isatty(fd); } inline int StrCaseCmp(const char* s1, const char* s2) { return stricmp(s1, s2); } inline char* StrDup(const char* src) { return strdup(src); } # else // !__BORLANDC__ # if GTEST_OS_WINDOWS_MOBILE inline int IsATTY(int /* fd */) { return 0; } # else inline int IsATTY(int fd) { return _isatty(fd); } # endif // GTEST_OS_WINDOWS_MOBILE inline int StrCaseCmp(const char* s1, const char* s2) { return _stricmp(s1, s2); } inline char* StrDup(const char* src) { return _strdup(src); } # endif // __BORLANDC__ # if GTEST_OS_WINDOWS_MOBILE inline int FileNo(FILE* file) { return reinterpret_cast(_fileno(file)); } // Stat(), RmDir(), and IsDir() are not needed on Windows CE at this // time and thus not defined there. # else inline int FileNo(FILE* file) { return _fileno(file); } inline int Stat(const char* path, StatStruct* buf) { return _stat(path, buf); } inline int RmDir(const char* dir) { return _rmdir(dir); } inline bool IsDir(const StatStruct& st) { return (_S_IFDIR & st.st_mode) != 0; } # endif // GTEST_OS_WINDOWS_MOBILE #else typedef struct stat StatStruct; inline int FileNo(FILE* file) { return fileno(file); } inline int IsATTY(int fd) { return isatty(fd); } inline int Stat(const char* path, StatStruct* buf) { return stat(path, buf); } inline int StrCaseCmp(const char* s1, const char* s2) { return strcasecmp(s1, s2); } inline char* StrDup(const char* src) { return strdup(src); } inline int RmDir(const char* dir) { return rmdir(dir); } inline bool IsDir(const StatStruct& st) { return S_ISDIR(st.st_mode); } #endif // GTEST_OS_WINDOWS // Functions deprecated by MSVC 8.0. GTEST_DISABLE_MSC_DEPRECATED_PUSH_() inline const char* StrNCpy(char* dest, const char* src, size_t n) { return strncpy(dest, src, n); } // ChDir(), FReopen(), FDOpen(), Read(), Write(), Close(), and // StrError() aren't needed on Windows CE at this time and thus not // defined there. #if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT inline int ChDir(const char* dir) { return chdir(dir); } #endif inline FILE* FOpen(const char* path, const char* mode) { return fopen(path, mode); } #if !GTEST_OS_WINDOWS_MOBILE inline FILE *FReopen(const char* path, const char* mode, FILE* stream) { return freopen(path, mode, stream); } inline FILE* FDOpen(int fd, const char* mode) { return fdopen(fd, mode); } #endif inline int FClose(FILE* fp) { return fclose(fp); } #if !GTEST_OS_WINDOWS_MOBILE inline int Read(int fd, void* buf, unsigned int count) { return static_cast(read(fd, buf, count)); } inline int Write(int fd, const void* buf, unsigned int count) { return static_cast(write(fd, buf, count)); } inline int Close(int fd) { return close(fd); } inline const char* StrError(int errnum) { return strerror(errnum); } #endif inline const char* GetEnv(const char* name) { #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT // We are on Windows CE, which has no environment variables. static_cast(name); // To prevent 'unused argument' warning. return NULL; #elif defined(__BORLANDC__) || defined(__SunOS_5_8) || defined(__SunOS_5_9) // Environment variables which we programmatically clear will be set to the // empty string rather than unset (NULL). Handle that case. const char* const env = getenv(name); return (env != NULL && env[0] != '\0') ? env : NULL; #else return getenv(name); #endif } GTEST_DISABLE_MSC_DEPRECATED_POP_() #if GTEST_OS_WINDOWS_MOBILE // Windows CE has no C library. The abort() function is used in // several places in Google Test. This implementation provides a reasonable // imitation of standard behaviour. void Abort(); #else inline void Abort() { abort(); } #endif // GTEST_OS_WINDOWS_MOBILE } // namespace posix // MSVC "deprecates" snprintf and issues warnings wherever it is used. In // order to avoid these warnings, we need to use _snprintf or _snprintf_s on // MSVC-based platforms. We map the GTEST_SNPRINTF_ macro to the appropriate // function in order to achieve that. We use macro definition here because // snprintf is a variadic function. #if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE // MSVC 2005 and above support variadic macros. # define GTEST_SNPRINTF_(buffer, size, format, ...) \ _snprintf_s(buffer, size, size, format, __VA_ARGS__) #elif defined(_MSC_VER) // Windows CE does not define _snprintf_s and MSVC prior to 2005 doesn't // complain about _snprintf. # define GTEST_SNPRINTF_ _snprintf #else # define GTEST_SNPRINTF_ snprintf #endif // The maximum number a BiggestInt can represent. This definition // works no matter BiggestInt is represented in one's complement or // two's complement. // // We cannot rely on numeric_limits in STL, as __int64 and long long // are not part of standard C++ and numeric_limits doesn't need to be // defined for them. const BiggestInt kMaxBiggestInt = ~(static_cast(1) << (8*sizeof(BiggestInt) - 1)); // This template class serves as a compile-time function from size to // type. It maps a size in bytes to a primitive type with that // size. e.g. // // TypeWithSize<4>::UInt // // is typedef-ed to be unsigned int (unsigned integer made up of 4 // bytes). // // Such functionality should belong to STL, but I cannot find it // there. // // Google Test uses this class in the implementation of floating-point // comparison. // // For now it only handles UInt (unsigned int) as that's all Google Test // needs. Other types can be easily added in the future if need // arises. template class TypeWithSize { public: // This prevents the user from using TypeWithSize with incorrect // values of N. typedef void UInt; }; // The specialization for size 4. template <> class TypeWithSize<4> { public: // unsigned int has size 4 in both gcc and MSVC. // // As base/basictypes.h doesn't compile on Windows, we cannot use // uint32, uint64, and etc here. typedef int Int; typedef unsigned int UInt; }; // The specialization for size 8. template <> class TypeWithSize<8> { public: #if GTEST_OS_WINDOWS typedef __int64 Int; typedef unsigned __int64 UInt; #else typedef long long Int; // NOLINT typedef unsigned long long UInt; // NOLINT #endif // GTEST_OS_WINDOWS }; // Integer types of known sizes. typedef TypeWithSize<4>::Int Int32; typedef TypeWithSize<4>::UInt UInt32; typedef TypeWithSize<8>::Int Int64; typedef TypeWithSize<8>::UInt UInt64; typedef TypeWithSize<8>::Int TimeInMillis; // Represents time in milliseconds. // Utilities for command line flags and environment variables. // Macro for referencing flags. #if !defined(GTEST_FLAG) # define GTEST_FLAG(name) FLAGS_gtest_##name #endif // !defined(GTEST_FLAG) #if !defined(GTEST_USE_OWN_FLAGFILE_FLAG_) # define GTEST_USE_OWN_FLAGFILE_FLAG_ 1 #endif // !defined(GTEST_USE_OWN_FLAGFILE_FLAG_) #if !defined(GTEST_DECLARE_bool_) # define GTEST_FLAG_SAVER_ ::testing::internal::GTestFlagSaver // Macros for declaring flags. # define GTEST_DECLARE_bool_(name) GTEST_API_ extern bool GTEST_FLAG(name) # define GTEST_DECLARE_int32_(name) \ GTEST_API_ extern ::testing::internal::Int32 GTEST_FLAG(name) # define GTEST_DECLARE_string_(name) \ GTEST_API_ extern ::std::string GTEST_FLAG(name) // Macros for defining flags. # define GTEST_DEFINE_bool_(name, default_val, doc) \ GTEST_API_ bool GTEST_FLAG(name) = (default_val) # define GTEST_DEFINE_int32_(name, default_val, doc) \ GTEST_API_ ::testing::internal::Int32 GTEST_FLAG(name) = (default_val) # define GTEST_DEFINE_string_(name, default_val, doc) \ GTEST_API_ ::std::string GTEST_FLAG(name) = (default_val) #endif // !defined(GTEST_DECLARE_bool_) // Thread annotations #if !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_) # define GTEST_EXCLUSIVE_LOCK_REQUIRED_(locks) # define GTEST_LOCK_EXCLUDED_(locks) #endif // !defined(GTEST_EXCLUSIVE_LOCK_REQUIRED_) // Parses 'str' for a 32-bit signed integer. If successful, writes the result // to *value and returns true; otherwise leaves *value unchanged and returns // false. // FIXME: Find a better way to refactor flag and environment parsing // out of both gtest-port.cc and gtest.cc to avoid exporting this utility // function. bool ParseInt32(const Message& src_text, const char* str, Int32* value); // Parses a bool/Int32/string from the environment variable // corresponding to the given Google Test flag. bool BoolFromGTestEnv(const char* flag, bool default_val); GTEST_API_ Int32 Int32FromGTestEnv(const char* flag, Int32 default_val); std::string OutputFlagAlsoCheckEnvVar(); const char* StringFromGTestEnv(const char* flag, const char* default_val); } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_PORT_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-string.h000066400000000000000000000154121357355204000275210ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) // // This header file declares the String class and functions used internally by // Google Test. They are subject to change without notice. They should not used // by code external to Google Test. // // This header file is #included by gtest-internal.h. // It should not be #included by other files. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ #ifdef __BORLANDC__ // string.h is not guaranteed to provide strcpy on C++ Builder. # include #endif #include #include #include "gtest/internal/gtest-port.h" namespace testing { namespace internal { // String - an abstract class holding static string utilities. class GTEST_API_ String { public: // Static utility methods // Clones a 0-terminated C string, allocating memory using new. The // caller is responsible for deleting the return value using // delete[]. Returns the cloned string, or NULL if the input is // NULL. // // This is different from strdup() in string.h, which allocates // memory using malloc(). static const char* CloneCString(const char* c_str); #if GTEST_OS_WINDOWS_MOBILE // Windows CE does not have the 'ANSI' versions of Win32 APIs. To be // able to pass strings to Win32 APIs on CE we need to convert them // to 'Unicode', UTF-16. // Creates a UTF-16 wide string from the given ANSI string, allocating // memory using new. The caller is responsible for deleting the return // value using delete[]. Returns the wide string, or NULL if the // input is NULL. // // The wide string is created using the ANSI codepage (CP_ACP) to // match the behaviour of the ANSI versions of Win32 calls and the // C runtime. static LPCWSTR AnsiToUtf16(const char* c_str); // Creates an ANSI string from the given wide string, allocating // memory using new. The caller is responsible for deleting the return // value using delete[]. Returns the ANSI string, or NULL if the // input is NULL. // // The returned string is created using the ANSI codepage (CP_ACP) to // match the behaviour of the ANSI versions of Win32 calls and the // C runtime. static const char* Utf16ToAnsi(LPCWSTR utf16_str); #endif // Compares two C strings. Returns true iff they have the same content. // // Unlike strcmp(), this function can handle NULL argument(s). A // NULL C string is considered different to any non-NULL C string, // including the empty string. static bool CStringEquals(const char* lhs, const char* rhs); // Converts a wide C string to a String using the UTF-8 encoding. // NULL will be converted to "(null)". If an error occurred during // the conversion, "(failed to convert from wide string)" is // returned. static std::string ShowWideCString(const wchar_t* wide_c_str); // Compares two wide C strings. Returns true iff they have the same // content. // // Unlike wcscmp(), this function can handle NULL argument(s). A // NULL C string is considered different to any non-NULL C string, // including the empty string. static bool WideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); // Compares two C strings, ignoring case. Returns true iff they // have the same content. // // Unlike strcasecmp(), this function can handle NULL argument(s). // A NULL C string is considered different to any non-NULL C string, // including the empty string. static bool CaseInsensitiveCStringEquals(const char* lhs, const char* rhs); // Compares two wide C strings, ignoring case. Returns true iff they // have the same content. // // Unlike wcscasecmp(), this function can handle NULL argument(s). // A NULL C string is considered different to any non-NULL wide C string, // including the empty string. // NB: The implementations on different platforms slightly differ. // On windows, this method uses _wcsicmp which compares according to LC_CTYPE // environment variable. On GNU platform this method uses wcscasecmp // which compares according to LC_CTYPE category of the current locale. // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the // current locale. static bool CaseInsensitiveWideCStringEquals(const wchar_t* lhs, const wchar_t* rhs); // Returns true iff the given string ends with the given suffix, ignoring // case. Any string is considered to end with an empty suffix. static bool EndsWithCaseInsensitive( const std::string& str, const std::string& suffix); // Formats an int value as "%02d". static std::string FormatIntWidth2(int value); // "%02d" for width == 2 // Formats an int value as "%X". static std::string FormatHexInt(int value); // Formats a byte as "%02X". static std::string FormatByte(unsigned char value); private: String(); // Not meant to be instantiated. }; // class String // Gets the content of the stringstream's buffer as an std::string. Each '\0' // character in the buffer is replaced with "\\0". GTEST_API_ std::string StringStreamToString(::std::stringstream* stream); } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_STRING_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-tuple.h000066400000000000000000000676751357355204000273660ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gtest-tuple.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2009 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Implements a subset of TR1 tuple needed by Google Test and Google Mock. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #include // For ::std::pair. // The compiler used in Symbian has a bug that prevents us from declaring the // tuple template as a friend (it complains that tuple is redefined). This // bypasses the bug by declaring the members that should otherwise be // private as public. // Sun Studio versions < 12 also have the above bug. #if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) # define GTEST_DECLARE_TUPLE_AS_FRIEND_ public: #else # define GTEST_DECLARE_TUPLE_AS_FRIEND_ \ template friend class tuple; \ private: #endif // Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict // with our own definitions. Therefore using our own tuple does not work on // those compilers. #if defined(_MSC_VER) && _MSC_VER >= 1600 /* 1600 is Visual Studio 2010 */ # error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \ GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers." #endif // GTEST_n_TUPLE_(T) is the type of an n-tuple. #define GTEST_0_TUPLE_(T) tuple<> #define GTEST_1_TUPLE_(T) tuple #define GTEST_2_TUPLE_(T) tuple #define GTEST_3_TUPLE_(T) tuple #define GTEST_4_TUPLE_(T) tuple #define GTEST_5_TUPLE_(T) tuple #define GTEST_6_TUPLE_(T) tuple #define GTEST_7_TUPLE_(T) tuple #define GTEST_8_TUPLE_(T) tuple #define GTEST_9_TUPLE_(T) tuple #define GTEST_10_TUPLE_(T) tuple // GTEST_n_TYPENAMES_(T) declares a list of n typenames. #define GTEST_0_TYPENAMES_(T) #define GTEST_1_TYPENAMES_(T) typename T##0 #define GTEST_2_TYPENAMES_(T) typename T##0, typename T##1 #define GTEST_3_TYPENAMES_(T) typename T##0, typename T##1, typename T##2 #define GTEST_4_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3 #define GTEST_5_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4 #define GTEST_6_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4, typename T##5 #define GTEST_7_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4, typename T##5, typename T##6 #define GTEST_8_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4, typename T##5, typename T##6, typename T##7 #define GTEST_9_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4, typename T##5, typename T##6, \ typename T##7, typename T##8 #define GTEST_10_TYPENAMES_(T) typename T##0, typename T##1, typename T##2, \ typename T##3, typename T##4, typename T##5, typename T##6, \ typename T##7, typename T##8, typename T##9 // In theory, defining stuff in the ::std namespace is undefined // behavior. We can do this as we are playing the role of a standard // library vendor. namespace std { namespace tr1 { template class tuple; // Anything in namespace gtest_internal is Google Test's INTERNAL // IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code. namespace gtest_internal { // ByRef::type is T if T is a reference; otherwise it's const T&. template struct ByRef { typedef const T& type; }; // NOLINT template struct ByRef { typedef T& type; }; // NOLINT // A handy wrapper for ByRef. #define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef::type // AddRef::type is T if T is a reference; otherwise it's T&. This // is the same as tr1::add_reference::type. template struct AddRef { typedef T& type; }; // NOLINT template struct AddRef { typedef T& type; }; // NOLINT // A handy wrapper for AddRef. #define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef::type // A helper for implementing get(). template class Get; // A helper for implementing tuple_element. kIndexValid is true // iff k < the number of fields in tuple type T. template struct TupleElement; template struct TupleElement { typedef T0 type; }; template struct TupleElement { typedef T1 type; }; template struct TupleElement { typedef T2 type; }; template struct TupleElement { typedef T3 type; }; template struct TupleElement { typedef T4 type; }; template struct TupleElement { typedef T5 type; }; template struct TupleElement { typedef T6 type; }; template struct TupleElement { typedef T7 type; }; template struct TupleElement { typedef T8 type; }; template struct TupleElement { typedef T9 type; }; } // namespace gtest_internal template <> class tuple<> { public: tuple() {} tuple(const tuple& /* t */) {} tuple& operator=(const tuple& /* t */) { return *this; } }; template class GTEST_1_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_() {} explicit tuple(GTEST_BY_REF_(T0) f0) : f0_(f0) {} tuple(const tuple& t) : f0_(t.f0_) {} template tuple(const GTEST_1_TUPLE_(U)& t) : f0_(t.f0_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_1_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_1_TUPLE_(U)& t) { f0_ = t.f0_; return *this; } T0 f0_; }; template class GTEST_2_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1) : f0_(f0), f1_(f1) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_) {} template tuple(const GTEST_2_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_) {} template tuple(const ::std::pair& p) : f0_(p.first), f1_(p.second) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_2_TUPLE_(U)& t) { return CopyFrom(t); } template tuple& operator=(const ::std::pair& p) { f0_ = p.first; f1_ = p.second; return *this; } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_2_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; return *this; } T0 f0_; T1 f1_; }; template class GTEST_3_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2) : f0_(f0), f1_(f1), f2_(f2) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} template tuple(const GTEST_3_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_3_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_3_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; return *this; } T0 f0_; T1 f1_; T2 f2_; }; template class GTEST_4_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3) : f0_(f0), f1_(f1), f2_(f2), f3_(f3) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {} template tuple(const GTEST_4_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_4_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_4_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; }; template class GTEST_5_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_) {} template tuple(const GTEST_5_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_5_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_5_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; }; template class GTEST_6_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, GTEST_BY_REF_(T5) f5) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), f5_(f5) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {} template tuple(const GTEST_6_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_6_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_6_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; f5_ = t.f5_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; T5 f5_; }; template class GTEST_7_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), f5_(f5), f6_(f6) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} template tuple(const GTEST_7_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_7_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_7_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; f5_ = t.f5_; f6_ = t.f6_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; T5 f5_; T6 f6_; }; template class GTEST_8_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} template tuple(const GTEST_8_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_8_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_8_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; f5_ = t.f5_; f6_ = t.f6_; f7_ = t.f7_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; T5 f5_; T6 f6_; T7 f7_; }; template class GTEST_9_TUPLE_(T) { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, GTEST_BY_REF_(T8) f8) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} template tuple(const GTEST_9_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_9_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_9_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; f5_ = t.f5_; f6_ = t.f6_; f7_ = t.f7_; f8_ = t.f8_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; T5 f5_; T6 f6_; T7 f7_; T8 f8_; }; template class tuple { public: template friend class gtest_internal::Get; tuple() : f0_(), f1_(), f2_(), f3_(), f4_(), f5_(), f6_(), f7_(), f8_(), f9_() {} explicit tuple(GTEST_BY_REF_(T0) f0, GTEST_BY_REF_(T1) f1, GTEST_BY_REF_(T2) f2, GTEST_BY_REF_(T3) f3, GTEST_BY_REF_(T4) f4, GTEST_BY_REF_(T5) f5, GTEST_BY_REF_(T6) f6, GTEST_BY_REF_(T7) f7, GTEST_BY_REF_(T8) f8, GTEST_BY_REF_(T9) f9) : f0_(f0), f1_(f1), f2_(f2), f3_(f3), f4_(f4), f5_(f5), f6_(f6), f7_(f7), f8_(f8), f9_(f9) {} tuple(const tuple& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {} template tuple(const GTEST_10_TUPLE_(U)& t) : f0_(t.f0_), f1_(t.f1_), f2_(t.f2_), f3_(t.f3_), f4_(t.f4_), f5_(t.f5_), f6_(t.f6_), f7_(t.f7_), f8_(t.f8_), f9_(t.f9_) {} tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_10_TUPLE_(U)& t) { return CopyFrom(t); } GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_10_TUPLE_(U)& t) { f0_ = t.f0_; f1_ = t.f1_; f2_ = t.f2_; f3_ = t.f3_; f4_ = t.f4_; f5_ = t.f5_; f6_ = t.f6_; f7_ = t.f7_; f8_ = t.f8_; f9_ = t.f9_; return *this; } T0 f0_; T1 f1_; T2 f2_; T3 f3_; T4 f4_; T5 f5_; T6 f6_; T7 f7_; T8 f8_; T9 f9_; }; // 6.1.3.2 Tuple creation functions. // Known limitations: we don't support passing an // std::tr1::reference_wrapper to make_tuple(). And we don't // implement tie(). inline tuple<> make_tuple() { return tuple<>(); } template inline GTEST_1_TUPLE_(T) make_tuple(const T0& f0) { return GTEST_1_TUPLE_(T)(f0); } template inline GTEST_2_TUPLE_(T) make_tuple(const T0& f0, const T1& f1) { return GTEST_2_TUPLE_(T)(f0, f1); } template inline GTEST_3_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2) { return GTEST_3_TUPLE_(T)(f0, f1, f2); } template inline GTEST_4_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3) { return GTEST_4_TUPLE_(T)(f0, f1, f2, f3); } template inline GTEST_5_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4) { return GTEST_5_TUPLE_(T)(f0, f1, f2, f3, f4); } template inline GTEST_6_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4, const T5& f5) { return GTEST_6_TUPLE_(T)(f0, f1, f2, f3, f4, f5); } template inline GTEST_7_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4, const T5& f5, const T6& f6) { return GTEST_7_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6); } template inline GTEST_8_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7) { return GTEST_8_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7); } template inline GTEST_9_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, const T8& f8) { return GTEST_9_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8); } template inline GTEST_10_TUPLE_(T) make_tuple(const T0& f0, const T1& f1, const T2& f2, const T3& f3, const T4& f4, const T5& f5, const T6& f6, const T7& f7, const T8& f8, const T9& f9) { return GTEST_10_TUPLE_(T)(f0, f1, f2, f3, f4, f5, f6, f7, f8, f9); } // 6.1.3.3 Tuple helper classes. template struct tuple_size; template struct tuple_size { static const int value = 0; }; template struct tuple_size { static const int value = 1; }; template struct tuple_size { static const int value = 2; }; template struct tuple_size { static const int value = 3; }; template struct tuple_size { static const int value = 4; }; template struct tuple_size { static const int value = 5; }; template struct tuple_size { static const int value = 6; }; template struct tuple_size { static const int value = 7; }; template struct tuple_size { static const int value = 8; }; template struct tuple_size { static const int value = 9; }; template struct tuple_size { static const int value = 10; }; template struct tuple_element { typedef typename gtest_internal::TupleElement< k < (tuple_size::value), k, Tuple>::type type; }; #define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element::type // 6.1.3.4 Element access. namespace gtest_internal { template <> class Get<0> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) Field(Tuple& t) { return t.f0_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(0, Tuple)) ConstField(const Tuple& t) { return t.f0_; } }; template <> class Get<1> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) Field(Tuple& t) { return t.f1_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(1, Tuple)) ConstField(const Tuple& t) { return t.f1_; } }; template <> class Get<2> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) Field(Tuple& t) { return t.f2_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(2, Tuple)) ConstField(const Tuple& t) { return t.f2_; } }; template <> class Get<3> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) Field(Tuple& t) { return t.f3_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(3, Tuple)) ConstField(const Tuple& t) { return t.f3_; } }; template <> class Get<4> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) Field(Tuple& t) { return t.f4_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(4, Tuple)) ConstField(const Tuple& t) { return t.f4_; } }; template <> class Get<5> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) Field(Tuple& t) { return t.f5_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(5, Tuple)) ConstField(const Tuple& t) { return t.f5_; } }; template <> class Get<6> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) Field(Tuple& t) { return t.f6_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(6, Tuple)) ConstField(const Tuple& t) { return t.f6_; } }; template <> class Get<7> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) Field(Tuple& t) { return t.f7_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(7, Tuple)) ConstField(const Tuple& t) { return t.f7_; } }; template <> class Get<8> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) Field(Tuple& t) { return t.f8_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(8, Tuple)) ConstField(const Tuple& t) { return t.f8_; } }; template <> class Get<9> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) Field(Tuple& t) { return t.f9_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(9, Tuple)) ConstField(const Tuple& t) { return t.f9_; } }; } // namespace gtest_internal template GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) get(GTEST_10_TUPLE_(T)& t) { return gtest_internal::Get::Field(t); } template GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_10_TUPLE_(T))) get(const GTEST_10_TUPLE_(T)& t) { return gtest_internal::Get::ConstField(t); } // 6.1.3.5 Relational operators // We only implement == and !=, as we don't have a need for the rest yet. namespace gtest_internal { // SameSizeTuplePrefixComparator::Eq(t1, t2) returns true if the // first k fields of t1 equals the first k fields of t2. // SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if // k1 != k2. template struct SameSizeTuplePrefixComparator; template <> struct SameSizeTuplePrefixComparator<0, 0> { template static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) { return true; } }; template struct SameSizeTuplePrefixComparator { template static bool Eq(const Tuple1& t1, const Tuple2& t2) { return SameSizeTuplePrefixComparator::Eq(t1, t2) && ::std::tr1::get(t1) == ::std::tr1::get(t2); } }; } // namespace gtest_internal template inline bool operator==(const GTEST_10_TUPLE_(T)& t, const GTEST_10_TUPLE_(U)& u) { return gtest_internal::SameSizeTuplePrefixComparator< tuple_size::value, tuple_size::value>::Eq(t, u); } template inline bool operator!=(const GTEST_10_TUPLE_(T)& t, const GTEST_10_TUPLE_(U)& u) { return !(t == u); } // 6.1.4 Pairs. // Unimplemented. } // namespace tr1 } // namespace std #undef GTEST_0_TUPLE_ #undef GTEST_1_TUPLE_ #undef GTEST_2_TUPLE_ #undef GTEST_3_TUPLE_ #undef GTEST_4_TUPLE_ #undef GTEST_5_TUPLE_ #undef GTEST_6_TUPLE_ #undef GTEST_7_TUPLE_ #undef GTEST_8_TUPLE_ #undef GTEST_9_TUPLE_ #undef GTEST_10_TUPLE_ #undef GTEST_0_TYPENAMES_ #undef GTEST_1_TYPENAMES_ #undef GTEST_2_TYPENAMES_ #undef GTEST_3_TYPENAMES_ #undef GTEST_4_TYPENAMES_ #undef GTEST_5_TYPENAMES_ #undef GTEST_6_TYPENAMES_ #undef GTEST_7_TYPENAMES_ #undef GTEST_8_TYPENAMES_ #undef GTEST_9_TYPENAMES_ #undef GTEST_10_TYPENAMES_ #undef GTEST_DECLARE_TUPLE_AS_FRIEND_ #undef GTEST_BY_REF_ #undef GTEST_ADD_REF_ #undef GTEST_TUPLE_ELEMENT_ #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-tuple.h.pump000066400000000000000000000226101357355204000303220ustar00rootroot00000000000000$$ -*- mode: c++; -*- $var n = 10 $$ Maximum number of tuple fields we want to support. $$ This meta comment fixes auto-indentation in Emacs. }} // Copyright 2009 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Implements a subset of TR1 tuple needed by Google Test and Google Mock. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ #include // For ::std::pair. // The compiler used in Symbian has a bug that prevents us from declaring the // tuple template as a friend (it complains that tuple is redefined). This // bypasses the bug by declaring the members that should otherwise be // private as public. // Sun Studio versions < 12 also have the above bug. #if defined(__SYMBIAN32__) || (defined(__SUNPRO_CC) && __SUNPRO_CC < 0x590) # define GTEST_DECLARE_TUPLE_AS_FRIEND_ public: #else # define GTEST_DECLARE_TUPLE_AS_FRIEND_ \ template friend class tuple; \ private: #endif // Visual Studio 2010, 2012, and 2013 define symbols in std::tr1 that conflict // with our own definitions. Therefore using our own tuple does not work on // those compilers. #if defined(_MSC_VER) && _MSC_VER >= 1600 /* 1600 is Visual Studio 2010 */ # error "gtest's tuple doesn't compile on Visual Studio 2010 or later. \ GTEST_USE_OWN_TR1_TUPLE must be set to 0 on those compilers." #endif $range i 0..n-1 $range j 0..n $range k 1..n // GTEST_n_TUPLE_(T) is the type of an n-tuple. #define GTEST_0_TUPLE_(T) tuple<> $for k [[ $range m 0..k-1 $range m2 k..n-1 #define GTEST_$(k)_TUPLE_(T) tuple<$for m, [[T##$m]]$for m2 [[, void]]> ]] // GTEST_n_TYPENAMES_(T) declares a list of n typenames. $for j [[ $range m 0..j-1 #define GTEST_$(j)_TYPENAMES_(T) $for m, [[typename T##$m]] ]] // In theory, defining stuff in the ::std namespace is undefined // behavior. We can do this as we are playing the role of a standard // library vendor. namespace std { namespace tr1 { template <$for i, [[typename T$i = void]]> class tuple; // Anything in namespace gtest_internal is Google Test's INTERNAL // IMPLEMENTATION DETAIL and MUST NOT BE USED DIRECTLY in user code. namespace gtest_internal { // ByRef::type is T if T is a reference; otherwise it's const T&. template struct ByRef { typedef const T& type; }; // NOLINT template struct ByRef { typedef T& type; }; // NOLINT // A handy wrapper for ByRef. #define GTEST_BY_REF_(T) typename ::std::tr1::gtest_internal::ByRef::type // AddRef::type is T if T is a reference; otherwise it's T&. This // is the same as tr1::add_reference::type. template struct AddRef { typedef T& type; }; // NOLINT template struct AddRef { typedef T& type; }; // NOLINT // A handy wrapper for AddRef. #define GTEST_ADD_REF_(T) typename ::std::tr1::gtest_internal::AddRef::type // A helper for implementing get(). template class Get; // A helper for implementing tuple_element. kIndexValid is true // iff k < the number of fields in tuple type T. template struct TupleElement; $for i [[ template struct TupleElement { typedef T$i type; }; ]] } // namespace gtest_internal template <> class tuple<> { public: tuple() {} tuple(const tuple& /* t */) {} tuple& operator=(const tuple& /* t */) { return *this; } }; $for k [[ $range m 0..k-1 template class $if k < n [[GTEST_$(k)_TUPLE_(T)]] $else [[tuple]] { public: template friend class gtest_internal::Get; tuple() : $for m, [[f$(m)_()]] {} explicit tuple($for m, [[GTEST_BY_REF_(T$m) f$m]]) : [[]] $for m, [[f$(m)_(f$m)]] {} tuple(const tuple& t) : $for m, [[f$(m)_(t.f$(m)_)]] {} template tuple(const GTEST_$(k)_TUPLE_(U)& t) : $for m, [[f$(m)_(t.f$(m)_)]] {} $if k == 2 [[ template tuple(const ::std::pair& p) : f0_(p.first), f1_(p.second) {} ]] tuple& operator=(const tuple& t) { return CopyFrom(t); } template tuple& operator=(const GTEST_$(k)_TUPLE_(U)& t) { return CopyFrom(t); } $if k == 2 [[ template tuple& operator=(const ::std::pair& p) { f0_ = p.first; f1_ = p.second; return *this; } ]] GTEST_DECLARE_TUPLE_AS_FRIEND_ template tuple& CopyFrom(const GTEST_$(k)_TUPLE_(U)& t) { $for m [[ f$(m)_ = t.f$(m)_; ]] return *this; } $for m [[ T$m f$(m)_; ]] }; ]] // 6.1.3.2 Tuple creation functions. // Known limitations: we don't support passing an // std::tr1::reference_wrapper to make_tuple(). And we don't // implement tie(). inline tuple<> make_tuple() { return tuple<>(); } $for k [[ $range m 0..k-1 template inline GTEST_$(k)_TUPLE_(T) make_tuple($for m, [[const T$m& f$m]]) { return GTEST_$(k)_TUPLE_(T)($for m, [[f$m]]); } ]] // 6.1.3.3 Tuple helper classes. template struct tuple_size; $for j [[ template struct tuple_size { static const int value = $j; }; ]] template struct tuple_element { typedef typename gtest_internal::TupleElement< k < (tuple_size::value), k, Tuple>::type type; }; #define GTEST_TUPLE_ELEMENT_(k, Tuple) typename tuple_element::type // 6.1.3.4 Element access. namespace gtest_internal { $for i [[ template <> class Get<$i> { public: template static GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple)) Field(Tuple& t) { return t.f$(i)_; } // NOLINT template static GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_($i, Tuple)) ConstField(const Tuple& t) { return t.f$(i)_; } }; ]] } // namespace gtest_internal template GTEST_ADD_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T))) get(GTEST_$(n)_TUPLE_(T)& t) { return gtest_internal::Get::Field(t); } template GTEST_BY_REF_(GTEST_TUPLE_ELEMENT_(k, GTEST_$(n)_TUPLE_(T))) get(const GTEST_$(n)_TUPLE_(T)& t) { return gtest_internal::Get::ConstField(t); } // 6.1.3.5 Relational operators // We only implement == and !=, as we don't have a need for the rest yet. namespace gtest_internal { // SameSizeTuplePrefixComparator::Eq(t1, t2) returns true if the // first k fields of t1 equals the first k fields of t2. // SameSizeTuplePrefixComparator(k1, k2) would be a compiler error if // k1 != k2. template struct SameSizeTuplePrefixComparator; template <> struct SameSizeTuplePrefixComparator<0, 0> { template static bool Eq(const Tuple1& /* t1 */, const Tuple2& /* t2 */) { return true; } }; template struct SameSizeTuplePrefixComparator { template static bool Eq(const Tuple1& t1, const Tuple2& t2) { return SameSizeTuplePrefixComparator::Eq(t1, t2) && ::std::tr1::get(t1) == ::std::tr1::get(t2); } }; } // namespace gtest_internal template inline bool operator==(const GTEST_$(n)_TUPLE_(T)& t, const GTEST_$(n)_TUPLE_(U)& u) { return gtest_internal::SameSizeTuplePrefixComparator< tuple_size::value, tuple_size::value>::Eq(t, u); } template inline bool operator!=(const GTEST_$(n)_TUPLE_(T)& t, const GTEST_$(n)_TUPLE_(U)& u) { return !(t == u); } // 6.1.4 Pairs. // Unimplemented. } // namespace tr1 } // namespace std $for j [[ #undef GTEST_$(j)_TUPLE_ ]] $for j [[ #undef GTEST_$(j)_TYPENAMES_ ]] #undef GTEST_DECLARE_TUPLE_AS_FRIEND_ #undef GTEST_BY_REF_ #undef GTEST_ADD_REF_ #undef GTEST_TUPLE_ELEMENT_ #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TUPLE_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-type-util.h000066400000000000000000005537531357355204000301660ustar00rootroot00000000000000// This file was GENERATED by command: // pump.py gtest-type-util.h.pump // DO NOT EDIT BY HAND!!! // Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Type utilities needed for implementing typed and type-parameterized // tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // Currently we support at most 50 types in a list, and at most 50 // type-parameterized tests in one type-parameterized test case. // Please contact googletestframework@googlegroups.com if you need // more. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #include "gtest/internal/gtest-port.h" // #ifdef __GNUC__ is too general here. It is possible to use gcc without using // libstdc++ (which is where cxxabi.h comes from). # if GTEST_HAS_CXXABI_H_ # include # elif defined(__HP_aCC) # include # endif // GTEST_HASH_CXXABI_H_ namespace testing { namespace internal { // Canonicalizes a given name with respect to the Standard C++ Library. // This handles removing the inline namespace within `std` that is // used by various standard libraries (e.g., `std::__1`). Names outside // of namespace std are returned unmodified. inline std::string CanonicalizeForStdLibVersioning(std::string s) { static const char prefix[] = "std::__"; if (s.compare(0, strlen(prefix), prefix) == 0) { std::string::size_type end = s.find("::", strlen(prefix)); if (end != s.npos) { // Erase everything between the initial `std` and the second `::`. s.erase(strlen("std"), end - strlen("std")); } } return s; } // GetTypeName() returns a human-readable name of type T. // NB: This function is also used in Google Mock, so don't move it inside of // the typed-test-only section below. template std::string GetTypeName() { # if GTEST_HAS_RTTI const char* const name = typeid(T).name(); # if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC) int status = 0; // gcc's implementation of typeid(T).name() mangles the type name, // so we have to demangle it. # if GTEST_HAS_CXXABI_H_ using abi::__cxa_demangle; # endif // GTEST_HAS_CXXABI_H_ char* const readable_name = __cxa_demangle(name, 0, 0, &status); const std::string name_str(status == 0 ? readable_name : name); free(readable_name); return CanonicalizeForStdLibVersioning(name_str); # else return name; # endif // GTEST_HAS_CXXABI_H_ || __HP_aCC # else return ""; # endif // GTEST_HAS_RTTI } #if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P // AssertyTypeEq::type is defined iff T1 and T2 are the same // type. This can be used as a compile-time assertion to ensure that // two types are equal. template struct AssertTypeEq; template struct AssertTypeEq { typedef bool type; }; // A unique type used as the default value for the arguments of class // template Types. This allows us to simulate variadic templates // (e.g. Types, Type, and etc), which C++ doesn't // support directly. struct None {}; // The following family of struct and struct templates are used to // represent type lists. In particular, TypesN // represents a type list with N types (T1, T2, ..., and TN) in it. // Except for Types0, every struct in the family has two member types: // Head for the first type in the list, and Tail for the rest of the // list. // The empty type list. struct Types0 {}; // Type lists of length 1, 2, 3, and so on. template struct Types1 { typedef T1 Head; typedef Types0 Tail; }; template struct Types2 { typedef T1 Head; typedef Types1 Tail; }; template struct Types3 { typedef T1 Head; typedef Types2 Tail; }; template struct Types4 { typedef T1 Head; typedef Types3 Tail; }; template struct Types5 { typedef T1 Head; typedef Types4 Tail; }; template struct Types6 { typedef T1 Head; typedef Types5 Tail; }; template struct Types7 { typedef T1 Head; typedef Types6 Tail; }; template struct Types8 { typedef T1 Head; typedef Types7 Tail; }; template struct Types9 { typedef T1 Head; typedef Types8 Tail; }; template struct Types10 { typedef T1 Head; typedef Types9 Tail; }; template struct Types11 { typedef T1 Head; typedef Types10 Tail; }; template struct Types12 { typedef T1 Head; typedef Types11 Tail; }; template struct Types13 { typedef T1 Head; typedef Types12 Tail; }; template struct Types14 { typedef T1 Head; typedef Types13 Tail; }; template struct Types15 { typedef T1 Head; typedef Types14 Tail; }; template struct Types16 { typedef T1 Head; typedef Types15 Tail; }; template struct Types17 { typedef T1 Head; typedef Types16 Tail; }; template struct Types18 { typedef T1 Head; typedef Types17 Tail; }; template struct Types19 { typedef T1 Head; typedef Types18 Tail; }; template struct Types20 { typedef T1 Head; typedef Types19 Tail; }; template struct Types21 { typedef T1 Head; typedef Types20 Tail; }; template struct Types22 { typedef T1 Head; typedef Types21 Tail; }; template struct Types23 { typedef T1 Head; typedef Types22 Tail; }; template struct Types24 { typedef T1 Head; typedef Types23 Tail; }; template struct Types25 { typedef T1 Head; typedef Types24 Tail; }; template struct Types26 { typedef T1 Head; typedef Types25 Tail; }; template struct Types27 { typedef T1 Head; typedef Types26 Tail; }; template struct Types28 { typedef T1 Head; typedef Types27 Tail; }; template struct Types29 { typedef T1 Head; typedef Types28 Tail; }; template struct Types30 { typedef T1 Head; typedef Types29 Tail; }; template struct Types31 { typedef T1 Head; typedef Types30 Tail; }; template struct Types32 { typedef T1 Head; typedef Types31 Tail; }; template struct Types33 { typedef T1 Head; typedef Types32 Tail; }; template struct Types34 { typedef T1 Head; typedef Types33 Tail; }; template struct Types35 { typedef T1 Head; typedef Types34 Tail; }; template struct Types36 { typedef T1 Head; typedef Types35 Tail; }; template struct Types37 { typedef T1 Head; typedef Types36 Tail; }; template struct Types38 { typedef T1 Head; typedef Types37 Tail; }; template struct Types39 { typedef T1 Head; typedef Types38 Tail; }; template struct Types40 { typedef T1 Head; typedef Types39 Tail; }; template struct Types41 { typedef T1 Head; typedef Types40 Tail; }; template struct Types42 { typedef T1 Head; typedef Types41 Tail; }; template struct Types43 { typedef T1 Head; typedef Types42 Tail; }; template struct Types44 { typedef T1 Head; typedef Types43 Tail; }; template struct Types45 { typedef T1 Head; typedef Types44 Tail; }; template struct Types46 { typedef T1 Head; typedef Types45 Tail; }; template struct Types47 { typedef T1 Head; typedef Types46 Tail; }; template struct Types48 { typedef T1 Head; typedef Types47 Tail; }; template struct Types49 { typedef T1 Head; typedef Types48 Tail; }; template struct Types50 { typedef T1 Head; typedef Types49 Tail; }; } // namespace internal // We don't want to require the users to write TypesN<...> directly, // as that would require them to count the length. Types<...> is much // easier to write, but generates horrible messages when there is a // compiler error, as gcc insists on printing out each template // argument, even if it has the default value (this means Types // will appear as Types in the compiler // errors). // // Our solution is to combine the best part of the two approaches: a // user would write Types, and Google Test will translate // that to TypesN internally to make error messages // readable. The translation is done by the 'type' member of the // Types template. template struct Types { typedef internal::Types50 type; }; template <> struct Types { typedef internal::Types0 type; }; template struct Types { typedef internal::Types1 type; }; template struct Types { typedef internal::Types2 type; }; template struct Types { typedef internal::Types3 type; }; template struct Types { typedef internal::Types4 type; }; template struct Types { typedef internal::Types5 type; }; template struct Types { typedef internal::Types6 type; }; template struct Types { typedef internal::Types7 type; }; template struct Types { typedef internal::Types8 type; }; template struct Types { typedef internal::Types9 type; }; template struct Types { typedef internal::Types10 type; }; template struct Types { typedef internal::Types11 type; }; template struct Types { typedef internal::Types12 type; }; template struct Types { typedef internal::Types13 type; }; template struct Types { typedef internal::Types14 type; }; template struct Types { typedef internal::Types15 type; }; template struct Types { typedef internal::Types16 type; }; template struct Types { typedef internal::Types17 type; }; template struct Types { typedef internal::Types18 type; }; template struct Types { typedef internal::Types19 type; }; template struct Types { typedef internal::Types20 type; }; template struct Types { typedef internal::Types21 type; }; template struct Types { typedef internal::Types22 type; }; template struct Types { typedef internal::Types23 type; }; template struct Types { typedef internal::Types24 type; }; template struct Types { typedef internal::Types25 type; }; template struct Types { typedef internal::Types26 type; }; template struct Types { typedef internal::Types27 type; }; template struct Types { typedef internal::Types28 type; }; template struct Types { typedef internal::Types29 type; }; template struct Types { typedef internal::Types30 type; }; template struct Types { typedef internal::Types31 type; }; template struct Types { typedef internal::Types32 type; }; template struct Types { typedef internal::Types33 type; }; template struct Types { typedef internal::Types34 type; }; template struct Types { typedef internal::Types35 type; }; template struct Types { typedef internal::Types36 type; }; template struct Types { typedef internal::Types37 type; }; template struct Types { typedef internal::Types38 type; }; template struct Types { typedef internal::Types39 type; }; template struct Types { typedef internal::Types40 type; }; template struct Types { typedef internal::Types41 type; }; template struct Types { typedef internal::Types42 type; }; template struct Types { typedef internal::Types43 type; }; template struct Types { typedef internal::Types44 type; }; template struct Types { typedef internal::Types45 type; }; template struct Types { typedef internal::Types46 type; }; template struct Types { typedef internal::Types47 type; }; template struct Types { typedef internal::Types48 type; }; template struct Types { typedef internal::Types49 type; }; namespace internal { # define GTEST_TEMPLATE_ template class // The template "selector" struct TemplateSel is used to // represent Tmpl, which must be a class template with one type // parameter, as a type. TemplateSel::Bind::type is defined // as the type Tmpl. This allows us to actually instantiate the // template "selected" by TemplateSel. // // This trick is necessary for simulating typedef for class templates, // which C++ doesn't support directly. template struct TemplateSel { template struct Bind { typedef Tmpl type; }; }; # define GTEST_BIND_(TmplSel, T) \ TmplSel::template Bind::type // A unique struct template used as the default value for the // arguments of class template Templates. This allows us to simulate // variadic templates (e.g. Templates, Templates, // and etc), which C++ doesn't support directly. template struct NoneT {}; // The following family of struct and struct templates are used to // represent template lists. In particular, TemplatesN represents a list of N templates (T1, T2, ..., and TN). Except // for Templates0, every struct in the family has two member types: // Head for the selector of the first template in the list, and Tail // for the rest of the list. // The empty template list. struct Templates0 {}; // Template lists of length 1, 2, 3, and so on. template struct Templates1 { typedef TemplateSel Head; typedef Templates0 Tail; }; template struct Templates2 { typedef TemplateSel Head; typedef Templates1 Tail; }; template struct Templates3 { typedef TemplateSel Head; typedef Templates2 Tail; }; template struct Templates4 { typedef TemplateSel Head; typedef Templates3 Tail; }; template struct Templates5 { typedef TemplateSel Head; typedef Templates4 Tail; }; template struct Templates6 { typedef TemplateSel Head; typedef Templates5 Tail; }; template struct Templates7 { typedef TemplateSel Head; typedef Templates6 Tail; }; template struct Templates8 { typedef TemplateSel Head; typedef Templates7 Tail; }; template struct Templates9 { typedef TemplateSel Head; typedef Templates8 Tail; }; template struct Templates10 { typedef TemplateSel Head; typedef Templates9 Tail; }; template struct Templates11 { typedef TemplateSel Head; typedef Templates10 Tail; }; template struct Templates12 { typedef TemplateSel Head; typedef Templates11 Tail; }; template struct Templates13 { typedef TemplateSel Head; typedef Templates12 Tail; }; template struct Templates14 { typedef TemplateSel Head; typedef Templates13 Tail; }; template struct Templates15 { typedef TemplateSel Head; typedef Templates14 Tail; }; template struct Templates16 { typedef TemplateSel Head; typedef Templates15 Tail; }; template struct Templates17 { typedef TemplateSel Head; typedef Templates16 Tail; }; template struct Templates18 { typedef TemplateSel Head; typedef Templates17 Tail; }; template struct Templates19 { typedef TemplateSel Head; typedef Templates18 Tail; }; template struct Templates20 { typedef TemplateSel Head; typedef Templates19 Tail; }; template struct Templates21 { typedef TemplateSel Head; typedef Templates20 Tail; }; template struct Templates22 { typedef TemplateSel Head; typedef Templates21 Tail; }; template struct Templates23 { typedef TemplateSel Head; typedef Templates22 Tail; }; template struct Templates24 { typedef TemplateSel Head; typedef Templates23 Tail; }; template struct Templates25 { typedef TemplateSel Head; typedef Templates24 Tail; }; template struct Templates26 { typedef TemplateSel Head; typedef Templates25 Tail; }; template struct Templates27 { typedef TemplateSel Head; typedef Templates26 Tail; }; template struct Templates28 { typedef TemplateSel Head; typedef Templates27 Tail; }; template struct Templates29 { typedef TemplateSel Head; typedef Templates28 Tail; }; template struct Templates30 { typedef TemplateSel Head; typedef Templates29 Tail; }; template struct Templates31 { typedef TemplateSel Head; typedef Templates30 Tail; }; template struct Templates32 { typedef TemplateSel Head; typedef Templates31 Tail; }; template struct Templates33 { typedef TemplateSel Head; typedef Templates32 Tail; }; template struct Templates34 { typedef TemplateSel Head; typedef Templates33 Tail; }; template struct Templates35 { typedef TemplateSel Head; typedef Templates34 Tail; }; template struct Templates36 { typedef TemplateSel Head; typedef Templates35 Tail; }; template struct Templates37 { typedef TemplateSel Head; typedef Templates36 Tail; }; template struct Templates38 { typedef TemplateSel Head; typedef Templates37 Tail; }; template struct Templates39 { typedef TemplateSel Head; typedef Templates38 Tail; }; template struct Templates40 { typedef TemplateSel Head; typedef Templates39 Tail; }; template struct Templates41 { typedef TemplateSel Head; typedef Templates40 Tail; }; template struct Templates42 { typedef TemplateSel Head; typedef Templates41 Tail; }; template struct Templates43 { typedef TemplateSel Head; typedef Templates42 Tail; }; template struct Templates44 { typedef TemplateSel Head; typedef Templates43 Tail; }; template struct Templates45 { typedef TemplateSel Head; typedef Templates44 Tail; }; template struct Templates46 { typedef TemplateSel Head; typedef Templates45 Tail; }; template struct Templates47 { typedef TemplateSel Head; typedef Templates46 Tail; }; template struct Templates48 { typedef TemplateSel Head; typedef Templates47 Tail; }; template struct Templates49 { typedef TemplateSel Head; typedef Templates48 Tail; }; template struct Templates50 { typedef TemplateSel Head; typedef Templates49 Tail; }; // We don't want to require the users to write TemplatesN<...> directly, // as that would require them to count the length. Templates<...> is much // easier to write, but generates horrible messages when there is a // compiler error, as gcc insists on printing out each template // argument, even if it has the default value (this means Templates // will appear as Templates in the compiler // errors). // // Our solution is to combine the best part of the two approaches: a // user would write Templates, and Google Test will translate // that to TemplatesN internally to make error messages // readable. The translation is done by the 'type' member of the // Templates template. template struct Templates { typedef Templates50 type; }; template <> struct Templates { typedef Templates0 type; }; template struct Templates { typedef Templates1 type; }; template struct Templates { typedef Templates2 type; }; template struct Templates { typedef Templates3 type; }; template struct Templates { typedef Templates4 type; }; template struct Templates { typedef Templates5 type; }; template struct Templates { typedef Templates6 type; }; template struct Templates { typedef Templates7 type; }; template struct Templates { typedef Templates8 type; }; template struct Templates { typedef Templates9 type; }; template struct Templates { typedef Templates10 type; }; template struct Templates { typedef Templates11 type; }; template struct Templates { typedef Templates12 type; }; template struct Templates { typedef Templates13 type; }; template struct Templates { typedef Templates14 type; }; template struct Templates { typedef Templates15 type; }; template struct Templates { typedef Templates16 type; }; template struct Templates { typedef Templates17 type; }; template struct Templates { typedef Templates18 type; }; template struct Templates { typedef Templates19 type; }; template struct Templates { typedef Templates20 type; }; template struct Templates { typedef Templates21 type; }; template struct Templates { typedef Templates22 type; }; template struct Templates { typedef Templates23 type; }; template struct Templates { typedef Templates24 type; }; template struct Templates { typedef Templates25 type; }; template struct Templates { typedef Templates26 type; }; template struct Templates { typedef Templates27 type; }; template struct Templates { typedef Templates28 type; }; template struct Templates { typedef Templates29 type; }; template struct Templates { typedef Templates30 type; }; template struct Templates { typedef Templates31 type; }; template struct Templates { typedef Templates32 type; }; template struct Templates { typedef Templates33 type; }; template struct Templates { typedef Templates34 type; }; template struct Templates { typedef Templates35 type; }; template struct Templates { typedef Templates36 type; }; template struct Templates { typedef Templates37 type; }; template struct Templates { typedef Templates38 type; }; template struct Templates { typedef Templates39 type; }; template struct Templates { typedef Templates40 type; }; template struct Templates { typedef Templates41 type; }; template struct Templates { typedef Templates42 type; }; template struct Templates { typedef Templates43 type; }; template struct Templates { typedef Templates44 type; }; template struct Templates { typedef Templates45 type; }; template struct Templates { typedef Templates46 type; }; template struct Templates { typedef Templates47 type; }; template struct Templates { typedef Templates48 type; }; template struct Templates { typedef Templates49 type; }; // The TypeList template makes it possible to use either a single type // or a Types<...> list in TYPED_TEST_CASE() and // INSTANTIATE_TYPED_TEST_CASE_P(). template struct TypeList { typedef Types1 type; }; template struct TypeList > { typedef typename Types::type type; }; #endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ libvpx-1.8.2/third_party/googletest/src/include/gtest/internal/gtest-type-util.h.pump000066400000000000000000000234161357355204000311320ustar00rootroot00000000000000$$ -*- mode: c++; -*- $var n = 50 $$ Maximum length of type lists we want to support. // Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Type utilities needed for implementing typed and type-parameterized // tests. This file is generated by a SCRIPT. DO NOT EDIT BY HAND! // // Currently we support at most $n types in a list, and at most $n // type-parameterized tests in one type-parameterized test case. // Please contact googletestframework@googlegroups.com if you need // more. // GOOGLETEST_CM0001 DO NOT DELETE #ifndef GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #define GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ #include "gtest/internal/gtest-port.h" // #ifdef __GNUC__ is too general here. It is possible to use gcc without using // libstdc++ (which is where cxxabi.h comes from). # if GTEST_HAS_CXXABI_H_ # include # elif defined(__HP_aCC) # include # endif // GTEST_HASH_CXXABI_H_ namespace testing { namespace internal { // Canonicalizes a given name with respect to the Standard C++ Library. // This handles removing the inline namespace within `std` that is // used by various standard libraries (e.g., `std::__1`). Names outside // of namespace std are returned unmodified. inline std::string CanonicalizeForStdLibVersioning(std::string s) { static const char prefix[] = "std::__"; if (s.compare(0, strlen(prefix), prefix) == 0) { std::string::size_type end = s.find("::", strlen(prefix)); if (end != s.npos) { // Erase everything between the initial `std` and the second `::`. s.erase(strlen("std"), end - strlen("std")); } } return s; } // GetTypeName() returns a human-readable name of type T. // NB: This function is also used in Google Mock, so don't move it inside of // the typed-test-only section below. template std::string GetTypeName() { # if GTEST_HAS_RTTI const char* const name = typeid(T).name(); # if GTEST_HAS_CXXABI_H_ || defined(__HP_aCC) int status = 0; // gcc's implementation of typeid(T).name() mangles the type name, // so we have to demangle it. # if GTEST_HAS_CXXABI_H_ using abi::__cxa_demangle; # endif // GTEST_HAS_CXXABI_H_ char* const readable_name = __cxa_demangle(name, 0, 0, &status); const std::string name_str(status == 0 ? readable_name : name); free(readable_name); return CanonicalizeForStdLibVersioning(name_str); # else return name; # endif // GTEST_HAS_CXXABI_H_ || __HP_aCC # else return ""; # endif // GTEST_HAS_RTTI } #if GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P // AssertyTypeEq::type is defined iff T1 and T2 are the same // type. This can be used as a compile-time assertion to ensure that // two types are equal. template struct AssertTypeEq; template struct AssertTypeEq { typedef bool type; }; // A unique type used as the default value for the arguments of class // template Types. This allows us to simulate variadic templates // (e.g. Types, Type, and etc), which C++ doesn't // support directly. struct None {}; // The following family of struct and struct templates are used to // represent type lists. In particular, TypesN // represents a type list with N types (T1, T2, ..., and TN) in it. // Except for Types0, every struct in the family has two member types: // Head for the first type in the list, and Tail for the rest of the // list. // The empty type list. struct Types0 {}; // Type lists of length 1, 2, 3, and so on. template struct Types1 { typedef T1 Head; typedef Types0 Tail; }; $range i 2..n $for i [[ $range j 1..i $range k 2..i template <$for j, [[typename T$j]]> struct Types$i { typedef T1 Head; typedef Types$(i-1)<$for k, [[T$k]]> Tail; }; ]] } // namespace internal // We don't want to require the users to write TypesN<...> directly, // as that would require them to count the length. Types<...> is much // easier to write, but generates horrible messages when there is a // compiler error, as gcc insists on printing out each template // argument, even if it has the default value (this means Types // will appear as Types in the compiler // errors). // // Our solution is to combine the best part of the two approaches: a // user would write Types, and Google Test will translate // that to TypesN internally to make error messages // readable. The translation is done by the 'type' member of the // Types template. $range i 1..n template <$for i, [[typename T$i = internal::None]]> struct Types { typedef internal::Types$n<$for i, [[T$i]]> type; }; template <> struct Types<$for i, [[internal::None]]> { typedef internal::Types0 type; }; $range i 1..n-1 $for i [[ $range j 1..i $range k i+1..n template <$for j, [[typename T$j]]> struct Types<$for j, [[T$j]]$for k[[, internal::None]]> { typedef internal::Types$i<$for j, [[T$j]]> type; }; ]] namespace internal { # define GTEST_TEMPLATE_ template class // The template "selector" struct TemplateSel is used to // represent Tmpl, which must be a class template with one type // parameter, as a type. TemplateSel::Bind::type is defined // as the type Tmpl. This allows us to actually instantiate the // template "selected" by TemplateSel. // // This trick is necessary for simulating typedef for class templates, // which C++ doesn't support directly. template struct TemplateSel { template struct Bind { typedef Tmpl type; }; }; # define GTEST_BIND_(TmplSel, T) \ TmplSel::template Bind::type // A unique struct template used as the default value for the // arguments of class template Templates. This allows us to simulate // variadic templates (e.g. Templates, Templates, // and etc), which C++ doesn't support directly. template struct NoneT {}; // The following family of struct and struct templates are used to // represent template lists. In particular, TemplatesN represents a list of N templates (T1, T2, ..., and TN). Except // for Templates0, every struct in the family has two member types: // Head for the selector of the first template in the list, and Tail // for the rest of the list. // The empty template list. struct Templates0 {}; // Template lists of length 1, 2, 3, and so on. template struct Templates1 { typedef TemplateSel Head; typedef Templates0 Tail; }; $range i 2..n $for i [[ $range j 1..i $range k 2..i template <$for j, [[GTEST_TEMPLATE_ T$j]]> struct Templates$i { typedef TemplateSel Head; typedef Templates$(i-1)<$for k, [[T$k]]> Tail; }; ]] // We don't want to require the users to write TemplatesN<...> directly, // as that would require them to count the length. Templates<...> is much // easier to write, but generates horrible messages when there is a // compiler error, as gcc insists on printing out each template // argument, even if it has the default value (this means Templates // will appear as Templates in the compiler // errors). // // Our solution is to combine the best part of the two approaches: a // user would write Templates, and Google Test will translate // that to TemplatesN internally to make error messages // readable. The translation is done by the 'type' member of the // Templates template. $range i 1..n template <$for i, [[GTEST_TEMPLATE_ T$i = NoneT]]> struct Templates { typedef Templates$n<$for i, [[T$i]]> type; }; template <> struct Templates<$for i, [[NoneT]]> { typedef Templates0 type; }; $range i 1..n-1 $for i [[ $range j 1..i $range k i+1..n template <$for j, [[GTEST_TEMPLATE_ T$j]]> struct Templates<$for j, [[T$j]]$for k[[, NoneT]]> { typedef Templates$i<$for j, [[T$j]]> type; }; ]] // The TypeList template makes it possible to use either a single type // or a Types<...> list in TYPED_TEST_CASE() and // INSTANTIATE_TYPED_TEST_CASE_P(). template struct TypeList { typedef Types1 type; }; $range i 1..n template <$for i, [[typename T$i]]> struct TypeList > { typedef typename Types<$for i, [[T$i]]>::type type; }; #endif // GTEST_HAS_TYPED_TEST || GTEST_HAS_TYPED_TEST_P } // namespace internal } // namespace testing #endif // GTEST_INCLUDE_GTEST_INTERNAL_GTEST_TYPE_UTIL_H_ libvpx-1.8.2/third_party/googletest/src/src/000077500000000000000000000000001357355204000211155ustar00rootroot00000000000000libvpx-1.8.2/third_party/googletest/src/src/gtest-all.cc000066400000000000000000000041171357355204000233230ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // Google C++ Testing and Mocking Framework (Google Test) // // Sometimes it's desirable to build Google Test by compiling a single file. // This file serves this purpose. // This line ensures that gtest.h can be compiled on its own, even // when it's fused. #include "gtest/gtest.h" // The following lines pull in the real gtest *.cc files. #include "src/gtest.cc" #include "src/gtest-death-test.cc" #include "src/gtest-filepath.cc" #include "src/gtest-port.cc" #include "src/gtest-printers.cc" #include "src/gtest-test-part.cc" #include "src/gtest-typed-test.cc" libvpx-1.8.2/third_party/googletest/src/src/gtest-death-test.cc000066400000000000000000001624101357355204000246160ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // This file implements death tests. #include "gtest/gtest-death-test.h" #include "gtest/internal/gtest-port.h" #include "gtest/internal/custom/gtest.h" #if GTEST_HAS_DEATH_TEST # if GTEST_OS_MAC # include # endif // GTEST_OS_MAC # include # include # include # if GTEST_OS_LINUX # include # endif // GTEST_OS_LINUX # include # if GTEST_OS_WINDOWS # include # else # include # include # endif // GTEST_OS_WINDOWS # if GTEST_OS_QNX # include # endif // GTEST_OS_QNX # if GTEST_OS_FUCHSIA # include # include # include # include # include # endif // GTEST_OS_FUCHSIA #endif // GTEST_HAS_DEATH_TEST #include "gtest/gtest-message.h" #include "gtest/internal/gtest-string.h" #include "src/gtest-internal-inl.h" namespace testing { // Constants. // The default death test style. // // This is defined in internal/gtest-port.h as "fast", but can be overridden by // a definition in internal/custom/gtest-port.h. The recommended value, which is // used internally at Google, is "threadsafe". static const char kDefaultDeathTestStyle[] = GTEST_DEFAULT_DEATH_TEST_STYLE; GTEST_DEFINE_string_( death_test_style, internal::StringFromGTestEnv("death_test_style", kDefaultDeathTestStyle), "Indicates how to run a death test in a forked child process: " "\"threadsafe\" (child process re-executes the test binary " "from the beginning, running only the specific death test) or " "\"fast\" (child process runs the death test immediately " "after forking)."); GTEST_DEFINE_bool_( death_test_use_fork, internal::BoolFromGTestEnv("death_test_use_fork", false), "Instructs to use fork()/_exit() instead of clone() in death tests. " "Ignored and always uses fork() on POSIX systems where clone() is not " "implemented. Useful when running under valgrind or similar tools if " "those do not support clone(). Valgrind 3.3.1 will just fail if " "it sees an unsupported combination of clone() flags. " "It is not recommended to use this flag w/o valgrind though it will " "work in 99% of the cases. Once valgrind is fixed, this flag will " "most likely be removed."); namespace internal { GTEST_DEFINE_string_( internal_run_death_test, "", "Indicates the file, line number, temporal index of " "the single death test to run, and a file descriptor to " "which a success code may be sent, all separated by " "the '|' characters. This flag is specified if and only if the current " "process is a sub-process launched for running a thread-safe " "death test. FOR INTERNAL USE ONLY."); } // namespace internal #if GTEST_HAS_DEATH_TEST namespace internal { // Valid only for fast death tests. Indicates the code is running in the // child process of a fast style death test. # if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA static bool g_in_fast_death_test_child = false; # endif // Returns a Boolean value indicating whether the caller is currently // executing in the context of the death test child process. Tools such as // Valgrind heap checkers may need this to modify their behavior in death // tests. IMPORTANT: This is an internal utility. Using it may break the // implementation of death tests. User code MUST NOT use it. bool InDeathTestChild() { # if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA // On Windows and Fuchsia, death tests are thread-safe regardless of the value // of the death_test_style flag. return !GTEST_FLAG(internal_run_death_test).empty(); # else if (GTEST_FLAG(death_test_style) == "threadsafe") return !GTEST_FLAG(internal_run_death_test).empty(); else return g_in_fast_death_test_child; #endif } } // namespace internal // ExitedWithCode constructor. ExitedWithCode::ExitedWithCode(int exit_code) : exit_code_(exit_code) { } // ExitedWithCode function-call operator. bool ExitedWithCode::operator()(int exit_status) const { # if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA return exit_status == exit_code_; # else return WIFEXITED(exit_status) && WEXITSTATUS(exit_status) == exit_code_; # endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA } # if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // KilledBySignal constructor. KilledBySignal::KilledBySignal(int signum) : signum_(signum) { } // KilledBySignal function-call operator. bool KilledBySignal::operator()(int exit_status) const { # if defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) { bool result; if (GTEST_KILLED_BY_SIGNAL_OVERRIDE_(signum_, exit_status, &result)) { return result; } } # endif // defined(GTEST_KILLED_BY_SIGNAL_OVERRIDE_) return WIFSIGNALED(exit_status) && WTERMSIG(exit_status) == signum_; } # endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA namespace internal { // Utilities needed for death tests. // Generates a textual description of a given exit code, in the format // specified by wait(2). static std::string ExitSummary(int exit_code) { Message m; # if GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA m << "Exited with exit status " << exit_code; # else if (WIFEXITED(exit_code)) { m << "Exited with exit status " << WEXITSTATUS(exit_code); } else if (WIFSIGNALED(exit_code)) { m << "Terminated by signal " << WTERMSIG(exit_code); } # ifdef WCOREDUMP if (WCOREDUMP(exit_code)) { m << " (core dumped)"; } # endif # endif // GTEST_OS_WINDOWS || GTEST_OS_FUCHSIA return m.GetString(); } // Returns true if exit_status describes a process that was terminated // by a signal, or exited normally with a nonzero exit code. bool ExitedUnsuccessfully(int exit_status) { return !ExitedWithCode(0)(exit_status); } # if !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Generates a textual failure message when a death test finds more than // one thread running, or cannot determine the number of threads, prior // to executing the given statement. It is the responsibility of the // caller not to pass a thread_count of 1. static std::string DeathTestThreadWarning(size_t thread_count) { Message msg; msg << "Death tests use fork(), which is unsafe particularly" << " in a threaded context. For this test, " << GTEST_NAME_ << " "; if (thread_count == 0) { msg << "couldn't detect the number of threads."; } else { msg << "detected " << thread_count << " threads."; } msg << " See " "https://github.com/google/googletest/blob/master/googletest/docs/" "advanced.md#death-tests-and-threads" << " for more explanation and suggested solutions, especially if" << " this is the last message you see before your test times out."; return msg.GetString(); } # endif // !GTEST_OS_WINDOWS && !GTEST_OS_FUCHSIA // Flag characters for reporting a death test that did not die. static const char kDeathTestLived = 'L'; static const char kDeathTestReturned = 'R'; static const char kDeathTestThrew = 'T'; static const char kDeathTestInternalError = 'I'; #if GTEST_OS_FUCHSIA // File descriptor used for the pipe in the child process. static const int kFuchsiaReadPipeFd = 3; #endif // An enumeration describing all of the possible ways that a death test can // conclude. DIED means that the process died while executing the test // code; LIVED means that process lived beyond the end of the test code; // RETURNED means that the test statement attempted to execute a return // statement, which is not allowed; THREW means that the test statement // returned control by throwing an exception. IN_PROGRESS means the test // has not yet concluded. // FIXME: Unify names and possibly values for // AbortReason, DeathTestOutcome, and flag characters above. enum DeathTestOutcome { IN_PROGRESS, DIED, LIVED, RETURNED, THREW }; // Routine for aborting the program which is safe to call from an // exec-style death test child process, in which case the error // message is propagated back to the parent process. Otherwise, the // message is simply printed to stderr. In either case, the program // then exits with status 1. static void DeathTestAbort(const std::string& message) { // On a POSIX system, this function may be called from a threadsafe-style // death test child process, which operates on a very small stack. Use // the heap for any additional non-minuscule memory requirements. const InternalRunDeathTestFlag* const flag = GetUnitTestImpl()->internal_run_death_test_flag(); if (flag != NULL) { FILE* parent = posix::FDOpen(flag->write_fd(), "w"); fputc(kDeathTestInternalError, parent); fprintf(parent, "%s", message.c_str()); fflush(parent); _exit(1); } else { fprintf(stderr, "%s", message.c_str()); fflush(stderr); posix::Abort(); } } // A replacement for CHECK that calls DeathTestAbort if the assertion // fails. # define GTEST_DEATH_TEST_CHECK_(expression) \ do { \ if (!::testing::internal::IsTrue(expression)) { \ DeathTestAbort( \ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + ::testing::internal::StreamableToString(__LINE__) + ": " \ + #expression); \ } \ } while (::testing::internal::AlwaysFalse()) // This macro is similar to GTEST_DEATH_TEST_CHECK_, but it is meant for // evaluating any system call that fulfills two conditions: it must return // -1 on failure, and set errno to EINTR when it is interrupted and // should be tried again. The macro expands to a loop that repeatedly // evaluates the expression as long as it evaluates to -1 and sets // errno to EINTR. If the expression evaluates to -1 but errno is // something other than EINTR, DeathTestAbort is called. # define GTEST_DEATH_TEST_CHECK_SYSCALL_(expression) \ do { \ int gtest_retval; \ do { \ gtest_retval = (expression); \ } while (gtest_retval == -1 && errno == EINTR); \ if (gtest_retval == -1) { \ DeathTestAbort( \ ::std::string("CHECK failed: File ") + __FILE__ + ", line " \ + ::testing::internal::StreamableToString(__LINE__) + ": " \ + #expression + " != -1"); \ } \ } while (::testing::internal::AlwaysFalse()) // Returns the message describing the last system error in errno. std::string GetLastErrnoDescription() { return errno == 0 ? "" : posix::StrError(errno); } // This is called from a death test parent process to read a failure // message from the death test child process and log it with the FATAL // severity. On Windows, the message is read from a pipe handle. On other // platforms, it is read from a file descriptor. static void FailFromInternalError(int fd) { Message error; char buffer[256]; int num_read; do { while ((num_read = posix::Read(fd, buffer, 255)) > 0) { buffer[num_read] = '\0'; error << buffer; } } while (num_read == -1 && errno == EINTR); if (num_read == 0) { GTEST_LOG_(FATAL) << error.GetString(); } else { const int last_error = errno; GTEST_LOG_(FATAL) << "Error while reading death test internal: " << GetLastErrnoDescription() << " [" << last_error << "]"; } } // Death test constructor. Increments the running death test count // for the current test. DeathTest::DeathTest() { TestInfo* const info = GetUnitTestImpl()->current_test_info(); if (info == NULL) { DeathTestAbort("Cannot run a death test outside of a TEST or " "TEST_F construct"); } } // Creates and returns a death test by dispatching to the current // death test factory. bool DeathTest::Create(const char* statement, const RE* regex, const char* file, int line, DeathTest** test) { return GetUnitTestImpl()->death_test_factory()->Create( statement, regex, file, line, test); } const char* DeathTest::LastMessage() { return last_death_test_message_.c_str(); } void DeathTest::set_last_death_test_message(const std::string& message) { last_death_test_message_ = message; } std::string DeathTest::last_death_test_message_; // Provides cross platform implementation for some death functionality. class DeathTestImpl : public DeathTest { protected: DeathTestImpl(const char* a_statement, const RE* a_regex) : statement_(a_statement), regex_(a_regex), spawned_(false), status_(-1), outcome_(IN_PROGRESS), read_fd_(-1), write_fd_(-1) {} // read_fd_ is expected to be closed and cleared by a derived class. ~DeathTestImpl() { GTEST_DEATH_TEST_CHECK_(read_fd_ == -1); } void Abort(AbortReason reason); virtual bool Passed(bool status_ok); const char* statement() const { return statement_; } const RE* regex() const { return regex_; } bool spawned() const { return spawned_; } void set_spawned(bool is_spawned) { spawned_ = is_spawned; } int status() const { return status_; } void set_status(int a_status) { status_ = a_status; } DeathTestOutcome outcome() const { return outcome_; } void set_outcome(DeathTestOutcome an_outcome) { outcome_ = an_outcome; } int read_fd() const { return read_fd_; } void set_read_fd(int fd) { read_fd_ = fd; } int write_fd() const { return write_fd_; } void set_write_fd(int fd) { write_fd_ = fd; } // Called in the parent process only. Reads the result code of the death // test child process via a pipe, interprets it to set the outcome_ // member, and closes read_fd_. Outputs diagnostics and terminates in // case of unexpected codes. void ReadAndInterpretStatusByte(); private: // The textual content of the code this object is testing. This class // doesn't own this string and should not attempt to delete it. const char* const statement_; // The regular expression which test output must match. DeathTestImpl // doesn't own this object and should not attempt to delete it. const RE* const regex_; // True if the death test child process has been successfully spawned. bool spawned_; // The exit status of the child process. int status_; // How the death test concluded. DeathTestOutcome outcome_; // Descriptor to the read end of the pipe to the child process. It is // always -1 in the child process. The child keeps its write end of the // pipe in write_fd_. int read_fd_; // Descriptor to the child's write end of the pipe to the parent process. // It is always -1 in the parent process. The parent keeps its end of the // pipe in read_fd_. int write_fd_; }; // Called in the parent process only. Reads the result code of the death // test child process via a pipe, interprets it to set the outcome_ // member, and closes read_fd_. Outputs diagnostics and terminates in // case of unexpected codes. void DeathTestImpl::ReadAndInterpretStatusByte() { char flag; int bytes_read; // The read() here blocks until data is available (signifying the // failure of the death test) or until the pipe is closed (signifying // its success), so it's okay to call this in the parent before // the child process has exited. do { bytes_read = posix::Read(read_fd(), &flag, 1); } while (bytes_read == -1 && errno == EINTR); if (bytes_read == 0) { set_outcome(DIED); } else if (bytes_read == 1) { switch (flag) { case kDeathTestReturned: set_outcome(RETURNED); break; case kDeathTestThrew: set_outcome(THREW); break; case kDeathTestLived: set_outcome(LIVED); break; case kDeathTestInternalError: FailFromInternalError(read_fd()); // Does not return. break; default: GTEST_LOG_(FATAL) << "Death test child process reported " << "unexpected status byte (" << static_cast(flag) << ")"; } } else { GTEST_LOG_(FATAL) << "Read from death test child process failed: " << GetLastErrnoDescription(); } GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Close(read_fd())); set_read_fd(-1); } // Signals that the death test code which should have exited, didn't. // Should be called only in a death test child process. // Writes a status byte to the child's status file descriptor, then // calls _exit(1). void DeathTestImpl::Abort(AbortReason reason) { // The parent process considers the death test to be a failure if // it finds any data in our pipe. So, here we write a single flag byte // to the pipe, then exit. const char status_ch = reason == TEST_DID_NOT_DIE ? kDeathTestLived : reason == TEST_THREW_EXCEPTION ? kDeathTestThrew : kDeathTestReturned; GTEST_DEATH_TEST_CHECK_SYSCALL_(posix::Write(write_fd(), &status_ch, 1)); // We are leaking the descriptor here because on some platforms (i.e., // when built as Windows DLL), destructors of global objects will still // run after calling _exit(). On such systems, write_fd_ will be // indirectly closed from the destructor of UnitTestImpl, causing double // close if it is also closed here. On debug configurations, double close // may assert. As there are no in-process buffers to flush here, we are // relying on the OS to close the descriptor after the process terminates // when the destructors are not run. _exit(1); // Exits w/o any normal exit hooks (we were supposed to crash) } // Returns an indented copy of stderr output for a death test. // This makes distinguishing death test output lines from regular log lines // much easier. static ::std::string FormatDeathTestOutput(const ::std::string& output) { ::std::string ret; for (size_t at = 0; ; ) { const size_t line_end = output.find('\n', at); ret += "[ DEATH ] "; if (line_end == ::std::string::npos) { ret += output.substr(at); break; } ret += output.substr(at, line_end + 1 - at); at = line_end + 1; } return ret; } // Assesses the success or failure of a death test, using both private // members which have previously been set, and one argument: // // Private data members: // outcome: An enumeration describing how the death test // concluded: DIED, LIVED, THREW, or RETURNED. The death test // fails in the latter three cases. // status: The exit status of the child process. On *nix, it is in the // in the format specified by wait(2). On Windows, this is the // value supplied to the ExitProcess() API or a numeric code // of the exception that terminated the program. // regex: A regular expression object to be applied to // the test's captured standard error output; the death test // fails if it does not match. // // Argument: // status_ok: true if exit_status is acceptable in the context of // this particular death test, which fails if it is false // // Returns true iff all of the above conditions are met. Otherwise, the // first failing condition, in the order given above, is the one that is // reported. Also sets the last death test message string. bool DeathTestImpl::Passed(bool status_ok) { if (!spawned()) return false; const std::string error_message = GetCapturedStderr(); bool success = false; Message buffer; buffer << "Death test: " << statement() << "\n"; switch (outcome()) { case LIVED: buffer << " Result: failed to die.\n" << " Error msg:\n" << FormatDeathTestOutput(error_message); break; case THREW: buffer << " Result: threw an exception.\n" << " Error msg:\n" << FormatDeathTestOutput(error_message); break; case RETURNED: buffer << " Result: illegal return in test statement.\n" << " Error msg:\n" << FormatDeathTestOutput(error_message); break; case DIED: if (status_ok) { # if GTEST_USES_PCRE // PCRE regexes support embedded NULs. const bool matched = RE::PartialMatch(error_message, *regex()); # else const bool matched = RE::PartialMatch(error_message.c_str(), *regex()); # endif // GTEST_USES_PCRE if (matched) { success = true; } else { buffer << " Result: died but not with expected error.\n" << " Expected: " << regex()->pattern() << "\n" << "Actual msg:\n" << FormatDeathTestOutput(error_message); } } else { buffer << " Result: died but not with expected exit code:\n" << " " << ExitSummary(status()) << "\n" << "Actual msg:\n" << FormatDeathTestOutput(error_message); } break; case IN_PROGRESS: default: GTEST_LOG_(FATAL) << "DeathTest::Passed somehow called before conclusion of test"; } DeathTest::set_last_death_test_message(buffer.GetString()); return success; } # if GTEST_OS_WINDOWS // WindowsDeathTest implements death tests on Windows. Due to the // specifics of starting new processes on Windows, death tests there are // always threadsafe, and Google Test considers the // --gtest_death_test_style=fast setting to be equivalent to // --gtest_death_test_style=threadsafe there. // // A few implementation notes: Like the Linux version, the Windows // implementation uses pipes for child-to-parent communication. But due to // the specifics of pipes on Windows, some extra steps are required: // // 1. The parent creates a communication pipe and stores handles to both // ends of it. // 2. The parent starts the child and provides it with the information // necessary to acquire the handle to the write end of the pipe. // 3. The child acquires the write end of the pipe and signals the parent // using a Windows event. // 4. Now the parent can release the write end of the pipe on its side. If // this is done before step 3, the object's reference count goes down to // 0 and it is destroyed, preventing the child from acquiring it. The // parent now has to release it, or read operations on the read end of // the pipe will not return when the child terminates. // 5. The parent reads child's output through the pipe (outcome code and // any possible error messages) from the pipe, and its stderr and then // determines whether to fail the test. // // Note: to distinguish Win32 API calls from the local method and function // calls, the former are explicitly resolved in the global namespace. // class WindowsDeathTest : public DeathTestImpl { public: WindowsDeathTest(const char* a_statement, const RE* a_regex, const char* file, int line) : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} // All of these virtual functions are inherited from DeathTest. virtual int Wait(); virtual TestRole AssumeRole(); private: // The name of the file in which the death test is located. const char* const file_; // The line number on which the death test is located. const int line_; // Handle to the write end of the pipe to the child process. AutoHandle write_handle_; // Child process handle. AutoHandle child_handle_; // Event the child process uses to signal the parent that it has // acquired the handle to the write end of the pipe. After seeing this // event the parent can release its own handles to make sure its // ReadFile() calls return when the child terminates. AutoHandle event_handle_; }; // Waits for the child in a death test to exit, returning its exit // status, or 0 if no child process exists. As a side effect, sets the // outcome data member. int WindowsDeathTest::Wait() { if (!spawned()) return 0; // Wait until the child either signals that it has acquired the write end // of the pipe or it dies. const HANDLE wait_handles[2] = { child_handle_.Get(), event_handle_.Get() }; switch (::WaitForMultipleObjects(2, wait_handles, FALSE, // Waits for any of the handles. INFINITE)) { case WAIT_OBJECT_0: case WAIT_OBJECT_0 + 1: break; default: GTEST_DEATH_TEST_CHECK_(false); // Should not get here. } // The child has acquired the write end of the pipe or exited. // We release the handle on our side and continue. write_handle_.Reset(); event_handle_.Reset(); ReadAndInterpretStatusByte(); // Waits for the child process to exit if it haven't already. This // returns immediately if the child has already exited, regardless of // whether previous calls to WaitForMultipleObjects synchronized on this // handle or not. GTEST_DEATH_TEST_CHECK_( WAIT_OBJECT_0 == ::WaitForSingleObject(child_handle_.Get(), INFINITE)); DWORD status_code; GTEST_DEATH_TEST_CHECK_( ::GetExitCodeProcess(child_handle_.Get(), &status_code) != FALSE); child_handle_.Reset(); set_status(static_cast(status_code)); return status(); } // The AssumeRole process for a Windows death test. It creates a child // process with the same executable as the current process to run the // death test. The child process is given the --gtest_filter and // --gtest_internal_run_death_test flags such that it knows to run the // current death test only. DeathTest::TestRole WindowsDeathTest::AssumeRole() { const UnitTestImpl* const impl = GetUnitTestImpl(); const InternalRunDeathTestFlag* const flag = impl->internal_run_death_test_flag(); const TestInfo* const info = impl->current_test_info(); const int death_test_index = info->result()->death_test_count(); if (flag != NULL) { // ParseInternalRunDeathTestFlag() has performed all the necessary // processing. set_write_fd(flag->write_fd()); return EXECUTE_TEST; } // WindowsDeathTest uses an anonymous pipe to communicate results of // a death test. SECURITY_ATTRIBUTES handles_are_inheritable = { sizeof(SECURITY_ATTRIBUTES), NULL, TRUE }; HANDLE read_handle, write_handle; GTEST_DEATH_TEST_CHECK_( ::CreatePipe(&read_handle, &write_handle, &handles_are_inheritable, 0) // Default buffer size. != FALSE); set_read_fd(::_open_osfhandle(reinterpret_cast(read_handle), O_RDONLY)); write_handle_.Reset(write_handle); event_handle_.Reset(::CreateEvent( &handles_are_inheritable, TRUE, // The event will automatically reset to non-signaled state. FALSE, // The initial state is non-signalled. NULL)); // The even is unnamed. GTEST_DEATH_TEST_CHECK_(event_handle_.Get() != NULL); const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + info->test_case_name() + "." + info->name(); const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + file_ + "|" + StreamableToString(line_) + "|" + StreamableToString(death_test_index) + "|" + StreamableToString(static_cast(::GetCurrentProcessId())) + // size_t has the same width as pointers on both 32-bit and 64-bit // Windows platforms. // See http://msdn.microsoft.com/en-us/library/tcxf1dw6.aspx. "|" + StreamableToString(reinterpret_cast(write_handle)) + "|" + StreamableToString(reinterpret_cast(event_handle_.Get())); char executable_path[_MAX_PATH + 1]; // NOLINT GTEST_DEATH_TEST_CHECK_( _MAX_PATH + 1 != ::GetModuleFileNameA(NULL, executable_path, _MAX_PATH)); std::string command_line = std::string(::GetCommandLineA()) + " " + filter_flag + " \"" + internal_flag + "\""; DeathTest::set_last_death_test_message(""); CaptureStderr(); // Flush the log buffers since the log streams are shared with the child. FlushInfoLog(); // The child process will share the standard handles with the parent. STARTUPINFOA startup_info; memset(&startup_info, 0, sizeof(STARTUPINFO)); startup_info.dwFlags = STARTF_USESTDHANDLES; startup_info.hStdInput = ::GetStdHandle(STD_INPUT_HANDLE); startup_info.hStdOutput = ::GetStdHandle(STD_OUTPUT_HANDLE); startup_info.hStdError = ::GetStdHandle(STD_ERROR_HANDLE); PROCESS_INFORMATION process_info; GTEST_DEATH_TEST_CHECK_(::CreateProcessA( executable_path, const_cast(command_line.c_str()), NULL, // Retuned process handle is not inheritable. NULL, // Retuned thread handle is not inheritable. TRUE, // Child inherits all inheritable handles (for write_handle_). 0x0, // Default creation flags. NULL, // Inherit the parent's environment. UnitTest::GetInstance()->original_working_dir(), &startup_info, &process_info) != FALSE); child_handle_.Reset(process_info.hProcess); ::CloseHandle(process_info.hThread); set_spawned(true); return OVERSEE_TEST; } # elif GTEST_OS_FUCHSIA class FuchsiaDeathTest : public DeathTestImpl { public: FuchsiaDeathTest(const char* a_statement, const RE* a_regex, const char* file, int line) : DeathTestImpl(a_statement, a_regex), file_(file), line_(line) {} virtual ~FuchsiaDeathTest() { zx_status_t status = zx_handle_close(child_process_); GTEST_DEATH_TEST_CHECK_(status == ZX_OK); status = zx_handle_close(port_); GTEST_DEATH_TEST_CHECK_(status == ZX_OK); } // All of these virtual functions are inherited from DeathTest. virtual int Wait(); virtual TestRole AssumeRole(); private: // The name of the file in which the death test is located. const char* const file_; // The line number on which the death test is located. const int line_; zx_handle_t child_process_ = ZX_HANDLE_INVALID; zx_handle_t port_ = ZX_HANDLE_INVALID; }; // Utility class for accumulating command-line arguments. class Arguments { public: Arguments() { args_.push_back(NULL); } ~Arguments() { for (std::vector::iterator i = args_.begin(); i != args_.end(); ++i) { free(*i); } } void AddArgument(const char* argument) { args_.insert(args_.end() - 1, posix::StrDup(argument)); } template void AddArguments(const ::std::vector& arguments) { for (typename ::std::vector::const_iterator i = arguments.begin(); i != arguments.end(); ++i) { args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); } } char* const* Argv() { return &args_[0]; } int size() { return args_.size() - 1; } private: std::vector args_; }; // Waits for the child in a death test to exit, returning its exit // status, or 0 if no child process exists. As a side effect, sets the // outcome data member. int FuchsiaDeathTest::Wait() { if (!spawned()) return 0; // Register to wait for the child process to terminate. zx_status_t status_zx; status_zx = zx_object_wait_async(child_process_, port_, 0 /* key */, ZX_PROCESS_TERMINATED, ZX_WAIT_ASYNC_ONCE); GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); // Wait for it to terminate, or an exception to be received. zx_port_packet_t packet; status_zx = zx_port_wait(port_, ZX_TIME_INFINITE, &packet); GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); if (ZX_PKT_IS_EXCEPTION(packet.type)) { // Process encountered an exception. Kill it directly rather than letting // other handlers process the event. status_zx = zx_task_kill(child_process_); GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); // Now wait for |child_process_| to terminate. zx_signals_t signals = 0; status_zx = zx_object_wait_one( child_process_, ZX_PROCESS_TERMINATED, ZX_TIME_INFINITE, &signals); GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); GTEST_DEATH_TEST_CHECK_(signals & ZX_PROCESS_TERMINATED); } else { // Process terminated. GTEST_DEATH_TEST_CHECK_(ZX_PKT_IS_SIGNAL_ONE(packet.type)); GTEST_DEATH_TEST_CHECK_(packet.signal.observed & ZX_PROCESS_TERMINATED); } ReadAndInterpretStatusByte(); zx_info_process_t buffer; status_zx = zx_object_get_info( child_process_, ZX_INFO_PROCESS, &buffer, sizeof(buffer), nullptr, nullptr); GTEST_DEATH_TEST_CHECK_(status_zx == ZX_OK); GTEST_DEATH_TEST_CHECK_(buffer.exited); set_status(buffer.return_code); return status(); } // The AssumeRole process for a Fuchsia death test. It creates a child // process with the same executable as the current process to run the // death test. The child process is given the --gtest_filter and // --gtest_internal_run_death_test flags such that it knows to run the // current death test only. DeathTest::TestRole FuchsiaDeathTest::AssumeRole() { const UnitTestImpl* const impl = GetUnitTestImpl(); const InternalRunDeathTestFlag* const flag = impl->internal_run_death_test_flag(); const TestInfo* const info = impl->current_test_info(); const int death_test_index = info->result()->death_test_count(); if (flag != NULL) { // ParseInternalRunDeathTestFlag() has performed all the necessary // processing. set_write_fd(kFuchsiaReadPipeFd); return EXECUTE_TEST; } CaptureStderr(); // Flush the log buffers since the log streams are shared with the child. FlushInfoLog(); // Build the child process command line. const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + info->test_case_name() + "." + info->name(); const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + file_ + "|" + StreamableToString(line_) + "|" + StreamableToString(death_test_index); Arguments args; args.AddArguments(GetInjectableArgvs()); args.AddArgument(filter_flag.c_str()); args.AddArgument(internal_flag.c_str()); // Build the pipe for communication with the child. zx_status_t status; zx_handle_t child_pipe_handle; uint32_t type; status = fdio_pipe_half(&child_pipe_handle, &type); GTEST_DEATH_TEST_CHECK_(status >= 0); set_read_fd(status); // Set the pipe handle for the child. fdio_spawn_action_t add_handle_action = {}; add_handle_action.action = FDIO_SPAWN_ACTION_ADD_HANDLE; add_handle_action.h.id = PA_HND(type, kFuchsiaReadPipeFd); add_handle_action.h.handle = child_pipe_handle; // Spawn the child process. status = fdio_spawn_etc(ZX_HANDLE_INVALID, FDIO_SPAWN_CLONE_ALL, args.Argv()[0], args.Argv(), nullptr, 1, &add_handle_action, &child_process_, nullptr); GTEST_DEATH_TEST_CHECK_(status == ZX_OK); // Create an exception port and attach it to the |child_process_|, to allow // us to suppress the system default exception handler from firing. status = zx_port_create(0, &port_); GTEST_DEATH_TEST_CHECK_(status == ZX_OK); status = zx_task_bind_exception_port( child_process_, port_, 0 /* key */, 0 /*options */); GTEST_DEATH_TEST_CHECK_(status == ZX_OK); set_spawned(true); return OVERSEE_TEST; } #else // We are neither on Windows, nor on Fuchsia. // ForkingDeathTest provides implementations for most of the abstract // methods of the DeathTest interface. Only the AssumeRole method is // left undefined. class ForkingDeathTest : public DeathTestImpl { public: ForkingDeathTest(const char* statement, const RE* regex); // All of these virtual functions are inherited from DeathTest. virtual int Wait(); protected: void set_child_pid(pid_t child_pid) { child_pid_ = child_pid; } private: // PID of child process during death test; 0 in the child process itself. pid_t child_pid_; }; // Constructs a ForkingDeathTest. ForkingDeathTest::ForkingDeathTest(const char* a_statement, const RE* a_regex) : DeathTestImpl(a_statement, a_regex), child_pid_(-1) {} // Waits for the child in a death test to exit, returning its exit // status, or 0 if no child process exists. As a side effect, sets the // outcome data member. int ForkingDeathTest::Wait() { if (!spawned()) return 0; ReadAndInterpretStatusByte(); int status_value; GTEST_DEATH_TEST_CHECK_SYSCALL_(waitpid(child_pid_, &status_value, 0)); set_status(status_value); return status_value; } // A concrete death test class that forks, then immediately runs the test // in the child process. class NoExecDeathTest : public ForkingDeathTest { public: NoExecDeathTest(const char* a_statement, const RE* a_regex) : ForkingDeathTest(a_statement, a_regex) { } virtual TestRole AssumeRole(); }; // The AssumeRole process for a fork-and-run death test. It implements a // straightforward fork, with a simple pipe to transmit the status byte. DeathTest::TestRole NoExecDeathTest::AssumeRole() { const size_t thread_count = GetThreadCount(); if (thread_count != 1) { GTEST_LOG_(WARNING) << DeathTestThreadWarning(thread_count); } int pipe_fd[2]; GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); DeathTest::set_last_death_test_message(""); CaptureStderr(); // When we fork the process below, the log file buffers are copied, but the // file descriptors are shared. We flush all log files here so that closing // the file descriptors in the child process doesn't throw off the // synchronization between descriptors and buffers in the parent process. // This is as close to the fork as possible to avoid a race condition in case // there are multiple threads running before the death test, and another // thread writes to the log file. FlushInfoLog(); const pid_t child_pid = fork(); GTEST_DEATH_TEST_CHECK_(child_pid != -1); set_child_pid(child_pid); if (child_pid == 0) { GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[0])); set_write_fd(pipe_fd[1]); // Redirects all logging to stderr in the child process to prevent // concurrent writes to the log files. We capture stderr in the parent // process and append the child process' output to a log. LogToStderr(); // Event forwarding to the listeners of event listener API mush be shut // down in death test subprocesses. GetUnitTestImpl()->listeners()->SuppressEventForwarding(); g_in_fast_death_test_child = true; return EXECUTE_TEST; } else { GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); set_read_fd(pipe_fd[0]); set_spawned(true); return OVERSEE_TEST; } } // A concrete death test class that forks and re-executes the main // program from the beginning, with command-line flags set that cause // only this specific death test to be run. class ExecDeathTest : public ForkingDeathTest { public: ExecDeathTest(const char* a_statement, const RE* a_regex, const char* file, int line) : ForkingDeathTest(a_statement, a_regex), file_(file), line_(line) { } virtual TestRole AssumeRole(); private: static ::std::vector GetArgvsForDeathTestChildProcess() { ::std::vector args = GetInjectableArgvs(); # if defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) ::std::vector extra_args = GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_(); args.insert(args.end(), extra_args.begin(), extra_args.end()); # endif // defined(GTEST_EXTRA_DEATH_TEST_COMMAND_LINE_ARGS_) return args; } // The name of the file in which the death test is located. const char* const file_; // The line number on which the death test is located. const int line_; }; // Utility class for accumulating command-line arguments. class Arguments { public: Arguments() { args_.push_back(NULL); } ~Arguments() { for (std::vector::iterator i = args_.begin(); i != args_.end(); ++i) { free(*i); } } void AddArgument(const char* argument) { args_.insert(args_.end() - 1, posix::StrDup(argument)); } template void AddArguments(const ::std::vector& arguments) { for (typename ::std::vector::const_iterator i = arguments.begin(); i != arguments.end(); ++i) { args_.insert(args_.end() - 1, posix::StrDup(i->c_str())); } } char* const* Argv() { return &args_[0]; } private: std::vector args_; }; // A struct that encompasses the arguments to the child process of a // threadsafe-style death test process. struct ExecDeathTestArgs { char* const* argv; // Command-line arguments for the child's call to exec int close_fd; // File descriptor to close; the read end of a pipe }; # if GTEST_OS_MAC inline char** GetEnviron() { // When Google Test is built as a framework on MacOS X, the environ variable // is unavailable. Apple's documentation (man environ) recommends using // _NSGetEnviron() instead. return *_NSGetEnviron(); } # else // Some POSIX platforms expect you to declare environ. extern "C" makes // it reside in the global namespace. extern "C" char** environ; inline char** GetEnviron() { return environ; } # endif // GTEST_OS_MAC # if !GTEST_OS_QNX // The main function for a threadsafe-style death test child process. // This function is called in a clone()-ed process and thus must avoid // any potentially unsafe operations like malloc or libc functions. static int ExecDeathTestChildMain(void* child_arg) { ExecDeathTestArgs* const args = static_cast(child_arg); GTEST_DEATH_TEST_CHECK_SYSCALL_(close(args->close_fd)); // We need to execute the test program in the same environment where // it was originally invoked. Therefore we change to the original // working directory first. const char* const original_dir = UnitTest::GetInstance()->original_working_dir(); // We can safely call chdir() as it's a direct system call. if (chdir(original_dir) != 0) { DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + GetLastErrnoDescription()); return EXIT_FAILURE; } // We can safely call execve() as it's a direct system call. We // cannot use execvp() as it's a libc function and thus potentially // unsafe. Since execve() doesn't search the PATH, the user must // invoke the test program via a valid path that contains at least // one path separator. execve(args->argv[0], args->argv, GetEnviron()); DeathTestAbort(std::string("execve(") + args->argv[0] + ", ...) in " + original_dir + " failed: " + GetLastErrnoDescription()); return EXIT_FAILURE; } # endif // !GTEST_OS_QNX # if GTEST_HAS_CLONE // Two utility routines that together determine the direction the stack // grows. // This could be accomplished more elegantly by a single recursive // function, but we want to guard against the unlikely possibility of // a smart compiler optimizing the recursion away. // // GTEST_NO_INLINE_ is required to prevent GCC 4.6 from inlining // StackLowerThanAddress into StackGrowsDown, which then doesn't give // correct answer. static void StackLowerThanAddress(const void* ptr, bool* result) GTEST_NO_INLINE_; static void StackLowerThanAddress(const void* ptr, bool* result) { int dummy; *result = (&dummy < ptr); } // Make sure AddressSanitizer does not tamper with the stack here. GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ static bool StackGrowsDown() { int dummy; bool result; StackLowerThanAddress(&dummy, &result); return result; } # endif // GTEST_HAS_CLONE // Spawns a child process with the same executable as the current process in // a thread-safe manner and instructs it to run the death test. The // implementation uses fork(2) + exec. On systems where clone(2) is // available, it is used instead, being slightly more thread-safe. On QNX, // fork supports only single-threaded environments, so this function uses // spawn(2) there instead. The function dies with an error message if // anything goes wrong. static pid_t ExecDeathTestSpawnChild(char* const* argv, int close_fd) { ExecDeathTestArgs args = { argv, close_fd }; pid_t child_pid = -1; # if GTEST_OS_QNX // Obtains the current directory and sets it to be closed in the child // process. const int cwd_fd = open(".", O_RDONLY); GTEST_DEATH_TEST_CHECK_(cwd_fd != -1); GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(cwd_fd, F_SETFD, FD_CLOEXEC)); // We need to execute the test program in the same environment where // it was originally invoked. Therefore we change to the original // working directory first. const char* const original_dir = UnitTest::GetInstance()->original_working_dir(); // We can safely call chdir() as it's a direct system call. if (chdir(original_dir) != 0) { DeathTestAbort(std::string("chdir(\"") + original_dir + "\") failed: " + GetLastErrnoDescription()); return EXIT_FAILURE; } int fd_flags; // Set close_fd to be closed after spawn. GTEST_DEATH_TEST_CHECK_SYSCALL_(fd_flags = fcntl(close_fd, F_GETFD)); GTEST_DEATH_TEST_CHECK_SYSCALL_(fcntl(close_fd, F_SETFD, fd_flags | FD_CLOEXEC)); struct inheritance inherit = {0}; // spawn is a system call. child_pid = spawn(args.argv[0], 0, NULL, &inherit, args.argv, GetEnviron()); // Restores the current working directory. GTEST_DEATH_TEST_CHECK_(fchdir(cwd_fd) != -1); GTEST_DEATH_TEST_CHECK_SYSCALL_(close(cwd_fd)); # else // GTEST_OS_QNX # if GTEST_OS_LINUX // When a SIGPROF signal is received while fork() or clone() are executing, // the process may hang. To avoid this, we ignore SIGPROF here and re-enable // it after the call to fork()/clone() is complete. struct sigaction saved_sigprof_action; struct sigaction ignore_sigprof_action; memset(&ignore_sigprof_action, 0, sizeof(ignore_sigprof_action)); sigemptyset(&ignore_sigprof_action.sa_mask); ignore_sigprof_action.sa_handler = SIG_IGN; GTEST_DEATH_TEST_CHECK_SYSCALL_(sigaction( SIGPROF, &ignore_sigprof_action, &saved_sigprof_action)); # endif // GTEST_OS_LINUX # if GTEST_HAS_CLONE const bool use_fork = GTEST_FLAG(death_test_use_fork); if (!use_fork) { static const bool stack_grows_down = StackGrowsDown(); const size_t stack_size = getpagesize(); // MMAP_ANONYMOUS is not defined on Mac, so we use MAP_ANON instead. void* const stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE, MAP_ANON | MAP_PRIVATE, -1, 0); GTEST_DEATH_TEST_CHECK_(stack != MAP_FAILED); // Maximum stack alignment in bytes: For a downward-growing stack, this // amount is subtracted from size of the stack space to get an address // that is within the stack space and is aligned on all systems we care // about. As far as I know there is no ABI with stack alignment greater // than 64. We assume stack and stack_size already have alignment of // kMaxStackAlignment. const size_t kMaxStackAlignment = 64; void* const stack_top = static_cast(stack) + (stack_grows_down ? stack_size - kMaxStackAlignment : 0); GTEST_DEATH_TEST_CHECK_(stack_size > kMaxStackAlignment && reinterpret_cast(stack_top) % kMaxStackAlignment == 0); child_pid = clone(&ExecDeathTestChildMain, stack_top, SIGCHLD, &args); GTEST_DEATH_TEST_CHECK_(munmap(stack, stack_size) != -1); } # else const bool use_fork = true; # endif // GTEST_HAS_CLONE if (use_fork && (child_pid = fork()) == 0) { ExecDeathTestChildMain(&args); _exit(0); } # endif // GTEST_OS_QNX # if GTEST_OS_LINUX GTEST_DEATH_TEST_CHECK_SYSCALL_( sigaction(SIGPROF, &saved_sigprof_action, NULL)); # endif // GTEST_OS_LINUX GTEST_DEATH_TEST_CHECK_(child_pid != -1); return child_pid; } // The AssumeRole process for a fork-and-exec death test. It re-executes the // main program from the beginning, setting the --gtest_filter // and --gtest_internal_run_death_test flags to cause only the current // death test to be re-run. DeathTest::TestRole ExecDeathTest::AssumeRole() { const UnitTestImpl* const impl = GetUnitTestImpl(); const InternalRunDeathTestFlag* const flag = impl->internal_run_death_test_flag(); const TestInfo* const info = impl->current_test_info(); const int death_test_index = info->result()->death_test_count(); if (flag != NULL) { set_write_fd(flag->write_fd()); return EXECUTE_TEST; } int pipe_fd[2]; GTEST_DEATH_TEST_CHECK_(pipe(pipe_fd) != -1); // Clear the close-on-exec flag on the write end of the pipe, lest // it be closed when the child process does an exec: GTEST_DEATH_TEST_CHECK_(fcntl(pipe_fd[1], F_SETFD, 0) != -1); const std::string filter_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kFilterFlag + "=" + info->test_case_name() + "." + info->name(); const std::string internal_flag = std::string("--") + GTEST_FLAG_PREFIX_ + kInternalRunDeathTestFlag + "=" + file_ + "|" + StreamableToString(line_) + "|" + StreamableToString(death_test_index) + "|" + StreamableToString(pipe_fd[1]); Arguments args; args.AddArguments(GetArgvsForDeathTestChildProcess()); args.AddArgument(filter_flag.c_str()); args.AddArgument(internal_flag.c_str()); DeathTest::set_last_death_test_message(""); CaptureStderr(); // See the comment in NoExecDeathTest::AssumeRole for why the next line // is necessary. FlushInfoLog(); const pid_t child_pid = ExecDeathTestSpawnChild(args.Argv(), pipe_fd[0]); GTEST_DEATH_TEST_CHECK_SYSCALL_(close(pipe_fd[1])); set_child_pid(child_pid); set_read_fd(pipe_fd[0]); set_spawned(true); return OVERSEE_TEST; } # endif // !GTEST_OS_WINDOWS // Creates a concrete DeathTest-derived class that depends on the // --gtest_death_test_style flag, and sets the pointer pointed to // by the "test" argument to its address. If the test should be // skipped, sets that pointer to NULL. Returns true, unless the // flag is set to an invalid value. bool DefaultDeathTestFactory::Create(const char* statement, const RE* regex, const char* file, int line, DeathTest** test) { UnitTestImpl* const impl = GetUnitTestImpl(); const InternalRunDeathTestFlag* const flag = impl->internal_run_death_test_flag(); const int death_test_index = impl->current_test_info() ->increment_death_test_count(); if (flag != NULL) { if (death_test_index > flag->index()) { DeathTest::set_last_death_test_message( "Death test count (" + StreamableToString(death_test_index) + ") somehow exceeded expected maximum (" + StreamableToString(flag->index()) + ")"); return false; } if (!(flag->file() == file && flag->line() == line && flag->index() == death_test_index)) { *test = NULL; return true; } } # if GTEST_OS_WINDOWS if (GTEST_FLAG(death_test_style) == "threadsafe" || GTEST_FLAG(death_test_style) == "fast") { *test = new WindowsDeathTest(statement, regex, file, line); } # elif GTEST_OS_FUCHSIA if (GTEST_FLAG(death_test_style) == "threadsafe" || GTEST_FLAG(death_test_style) == "fast") { *test = new FuchsiaDeathTest(statement, regex, file, line); } # else if (GTEST_FLAG(death_test_style) == "threadsafe") { *test = new ExecDeathTest(statement, regex, file, line); } else if (GTEST_FLAG(death_test_style) == "fast") { *test = new NoExecDeathTest(statement, regex); } # endif // GTEST_OS_WINDOWS else { // NOLINT - this is more readable than unbalanced brackets inside #if. DeathTest::set_last_death_test_message( "Unknown death test style \"" + GTEST_FLAG(death_test_style) + "\" encountered"); return false; } return true; } # if GTEST_OS_WINDOWS // Recreates the pipe and event handles from the provided parameters, // signals the event, and returns a file descriptor wrapped around the pipe // handle. This function is called in the child process only. static int GetStatusFileDescriptor(unsigned int parent_process_id, size_t write_handle_as_size_t, size_t event_handle_as_size_t) { AutoHandle parent_process_handle(::OpenProcess(PROCESS_DUP_HANDLE, FALSE, // Non-inheritable. parent_process_id)); if (parent_process_handle.Get() == INVALID_HANDLE_VALUE) { DeathTestAbort("Unable to open parent process " + StreamableToString(parent_process_id)); } // FIXME: Replace the following check with a // compile-time assertion when available. GTEST_CHECK_(sizeof(HANDLE) <= sizeof(size_t)); const HANDLE write_handle = reinterpret_cast(write_handle_as_size_t); HANDLE dup_write_handle; // The newly initialized handle is accessible only in the parent // process. To obtain one accessible within the child, we need to use // DuplicateHandle. if (!::DuplicateHandle(parent_process_handle.Get(), write_handle, ::GetCurrentProcess(), &dup_write_handle, 0x0, // Requested privileges ignored since // DUPLICATE_SAME_ACCESS is used. FALSE, // Request non-inheritable handler. DUPLICATE_SAME_ACCESS)) { DeathTestAbort("Unable to duplicate the pipe handle " + StreamableToString(write_handle_as_size_t) + " from the parent process " + StreamableToString(parent_process_id)); } const HANDLE event_handle = reinterpret_cast(event_handle_as_size_t); HANDLE dup_event_handle; if (!::DuplicateHandle(parent_process_handle.Get(), event_handle, ::GetCurrentProcess(), &dup_event_handle, 0x0, FALSE, DUPLICATE_SAME_ACCESS)) { DeathTestAbort("Unable to duplicate the event handle " + StreamableToString(event_handle_as_size_t) + " from the parent process " + StreamableToString(parent_process_id)); } const int write_fd = ::_open_osfhandle(reinterpret_cast(dup_write_handle), O_APPEND); if (write_fd == -1) { DeathTestAbort("Unable to convert pipe handle " + StreamableToString(write_handle_as_size_t) + " to a file descriptor"); } // Signals the parent that the write end of the pipe has been acquired // so the parent can release its own write end. ::SetEvent(dup_event_handle); return write_fd; } # endif // GTEST_OS_WINDOWS // Returns a newly created InternalRunDeathTestFlag object with fields // initialized from the GTEST_FLAG(internal_run_death_test) flag if // the flag is specified; otherwise returns NULL. InternalRunDeathTestFlag* ParseInternalRunDeathTestFlag() { if (GTEST_FLAG(internal_run_death_test) == "") return NULL; // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we // can use it here. int line = -1; int index = -1; ::std::vector< ::std::string> fields; SplitString(GTEST_FLAG(internal_run_death_test).c_str(), '|', &fields); int write_fd = -1; # if GTEST_OS_WINDOWS unsigned int parent_process_id = 0; size_t write_handle_as_size_t = 0; size_t event_handle_as_size_t = 0; if (fields.size() != 6 || !ParseNaturalNumber(fields[1], &line) || !ParseNaturalNumber(fields[2], &index) || !ParseNaturalNumber(fields[3], &parent_process_id) || !ParseNaturalNumber(fields[4], &write_handle_as_size_t) || !ParseNaturalNumber(fields[5], &event_handle_as_size_t)) { DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + GTEST_FLAG(internal_run_death_test)); } write_fd = GetStatusFileDescriptor(parent_process_id, write_handle_as_size_t, event_handle_as_size_t); # elif GTEST_OS_FUCHSIA if (fields.size() != 3 || !ParseNaturalNumber(fields[1], &line) || !ParseNaturalNumber(fields[2], &index)) { DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + GTEST_FLAG(internal_run_death_test)); } # else if (fields.size() != 4 || !ParseNaturalNumber(fields[1], &line) || !ParseNaturalNumber(fields[2], &index) || !ParseNaturalNumber(fields[3], &write_fd)) { DeathTestAbort("Bad --gtest_internal_run_death_test flag: " + GTEST_FLAG(internal_run_death_test)); } # endif // GTEST_OS_WINDOWS return new InternalRunDeathTestFlag(fields[0], line, index, write_fd); } } // namespace internal #endif // GTEST_HAS_DEATH_TEST } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest-filepath.cc000066400000000000000000000342151357355204000243510ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest/internal/gtest-filepath.h" #include #include "gtest/internal/gtest-port.h" #include "gtest/gtest-message.h" #if GTEST_OS_WINDOWS_MOBILE # include #elif GTEST_OS_WINDOWS # include # include #elif GTEST_OS_SYMBIAN // Symbian OpenC has PATH_MAX in sys/syslimits.h # include #else # include # include // Some Linux distributions define PATH_MAX here. #endif // GTEST_OS_WINDOWS_MOBILE #include "gtest/internal/gtest-string.h" #if GTEST_OS_WINDOWS # define GTEST_PATH_MAX_ _MAX_PATH #elif defined(PATH_MAX) # define GTEST_PATH_MAX_ PATH_MAX #elif defined(_XOPEN_PATH_MAX) # define GTEST_PATH_MAX_ _XOPEN_PATH_MAX #else # define GTEST_PATH_MAX_ _POSIX_PATH_MAX #endif // GTEST_OS_WINDOWS namespace testing { namespace internal { #if GTEST_OS_WINDOWS // On Windows, '\\' is the standard path separator, but many tools and the // Windows API also accept '/' as an alternate path separator. Unless otherwise // noted, a file path can contain either kind of path separators, or a mixture // of them. const char kPathSeparator = '\\'; const char kAlternatePathSeparator = '/'; const char kAlternatePathSeparatorString[] = "/"; # if GTEST_OS_WINDOWS_MOBILE // Windows CE doesn't have a current directory. You should not use // the current directory in tests on Windows CE, but this at least // provides a reasonable fallback. const char kCurrentDirectoryString[] = "\\"; // Windows CE doesn't define INVALID_FILE_ATTRIBUTES const DWORD kInvalidFileAttributes = 0xffffffff; # else const char kCurrentDirectoryString[] = ".\\"; # endif // GTEST_OS_WINDOWS_MOBILE #else const char kPathSeparator = '/'; const char kCurrentDirectoryString[] = "./"; #endif // GTEST_OS_WINDOWS // Returns whether the given character is a valid path separator. static bool IsPathSeparator(char c) { #if GTEST_HAS_ALT_PATH_SEP_ return (c == kPathSeparator) || (c == kAlternatePathSeparator); #else return c == kPathSeparator; #endif } // Returns the current working directory, or "" if unsuccessful. FilePath FilePath::GetCurrentDir() { #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT // Windows CE doesn't have a current directory, so we just return // something reasonable. return FilePath(kCurrentDirectoryString); #elif GTEST_OS_WINDOWS char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; return FilePath(_getcwd(cwd, sizeof(cwd)) == NULL ? "" : cwd); #else char cwd[GTEST_PATH_MAX_ + 1] = { '\0' }; char* result = getcwd(cwd, sizeof(cwd)); # if GTEST_OS_NACL // getcwd will likely fail in NaCl due to the sandbox, so return something // reasonable. The user may have provided a shim implementation for getcwd, // however, so fallback only when failure is detected. return FilePath(result == NULL ? kCurrentDirectoryString : cwd); # endif // GTEST_OS_NACL return FilePath(result == NULL ? "" : cwd); #endif // GTEST_OS_WINDOWS_MOBILE } // Returns a copy of the FilePath with the case-insensitive extension removed. // Example: FilePath("dir/file.exe").RemoveExtension("EXE") returns // FilePath("dir/file"). If a case-insensitive extension is not // found, returns a copy of the original FilePath. FilePath FilePath::RemoveExtension(const char* extension) const { const std::string dot_extension = std::string(".") + extension; if (String::EndsWithCaseInsensitive(pathname_, dot_extension)) { return FilePath(pathname_.substr( 0, pathname_.length() - dot_extension.length())); } return *this; } // Returns a pointer to the last occurrence of a valid path separator in // the FilePath. On Windows, for example, both '/' and '\' are valid path // separators. Returns NULL if no path separator was found. const char* FilePath::FindLastPathSeparator() const { const char* const last_sep = strrchr(c_str(), kPathSeparator); #if GTEST_HAS_ALT_PATH_SEP_ const char* const last_alt_sep = strrchr(c_str(), kAlternatePathSeparator); // Comparing two pointers of which only one is NULL is undefined. if (last_alt_sep != NULL && (last_sep == NULL || last_alt_sep > last_sep)) { return last_alt_sep; } #endif return last_sep; } // Returns a copy of the FilePath with the directory part removed. // Example: FilePath("path/to/file").RemoveDirectoryName() returns // FilePath("file"). If there is no directory part ("just_a_file"), it returns // the FilePath unmodified. If there is no file part ("just_a_dir/") it // returns an empty FilePath (""). // On Windows platform, '\' is the path separator, otherwise it is '/'. FilePath FilePath::RemoveDirectoryName() const { const char* const last_sep = FindLastPathSeparator(); return last_sep ? FilePath(last_sep + 1) : *this; } // RemoveFileName returns the directory path with the filename removed. // Example: FilePath("path/to/file").RemoveFileName() returns "path/to/". // If the FilePath is "a_file" or "/a_file", RemoveFileName returns // FilePath("./") or, on Windows, FilePath(".\\"). If the filepath does // not have a file, like "just/a/dir/", it returns the FilePath unmodified. // On Windows platform, '\' is the path separator, otherwise it is '/'. FilePath FilePath::RemoveFileName() const { const char* const last_sep = FindLastPathSeparator(); std::string dir; if (last_sep) { dir = std::string(c_str(), last_sep + 1 - c_str()); } else { dir = kCurrentDirectoryString; } return FilePath(dir); } // Helper functions for naming files in a directory for xml output. // Given directory = "dir", base_name = "test", number = 0, // extension = "xml", returns "dir/test.xml". If number is greater // than zero (e.g., 12), returns "dir/test_12.xml". // On Windows platform, uses \ as the separator rather than /. FilePath FilePath::MakeFileName(const FilePath& directory, const FilePath& base_name, int number, const char* extension) { std::string file; if (number == 0) { file = base_name.string() + "." + extension; } else { file = base_name.string() + "_" + StreamableToString(number) + "." + extension; } return ConcatPaths(directory, FilePath(file)); } // Given directory = "dir", relative_path = "test.xml", returns "dir/test.xml". // On Windows, uses \ as the separator rather than /. FilePath FilePath::ConcatPaths(const FilePath& directory, const FilePath& relative_path) { if (directory.IsEmpty()) return relative_path; const FilePath dir(directory.RemoveTrailingPathSeparator()); return FilePath(dir.string() + kPathSeparator + relative_path.string()); } // Returns true if pathname describes something findable in the file-system, // either a file, directory, or whatever. bool FilePath::FileOrDirectoryExists() const { #if GTEST_OS_WINDOWS_MOBILE LPCWSTR unicode = String::AnsiToUtf16(pathname_.c_str()); const DWORD attributes = GetFileAttributes(unicode); delete [] unicode; return attributes != kInvalidFileAttributes; #else posix::StatStruct file_stat; return posix::Stat(pathname_.c_str(), &file_stat) == 0; #endif // GTEST_OS_WINDOWS_MOBILE } // Returns true if pathname describes a directory in the file-system // that exists. bool FilePath::DirectoryExists() const { bool result = false; #if GTEST_OS_WINDOWS // Don't strip off trailing separator if path is a root directory on // Windows (like "C:\\"). const FilePath& path(IsRootDirectory() ? *this : RemoveTrailingPathSeparator()); #else const FilePath& path(*this); #endif #if GTEST_OS_WINDOWS_MOBILE LPCWSTR unicode = String::AnsiToUtf16(path.c_str()); const DWORD attributes = GetFileAttributes(unicode); delete [] unicode; if ((attributes != kInvalidFileAttributes) && (attributes & FILE_ATTRIBUTE_DIRECTORY)) { result = true; } #else posix::StatStruct file_stat; result = posix::Stat(path.c_str(), &file_stat) == 0 && posix::IsDir(file_stat); #endif // GTEST_OS_WINDOWS_MOBILE return result; } // Returns true if pathname describes a root directory. (Windows has one // root directory per disk drive.) bool FilePath::IsRootDirectory() const { #if GTEST_OS_WINDOWS // FIXME: on Windows a network share like // \\server\share can be a root directory, although it cannot be the // current directory. Handle this properly. return pathname_.length() == 3 && IsAbsolutePath(); #else return pathname_.length() == 1 && IsPathSeparator(pathname_.c_str()[0]); #endif } // Returns true if pathname describes an absolute path. bool FilePath::IsAbsolutePath() const { const char* const name = pathname_.c_str(); #if GTEST_OS_WINDOWS return pathname_.length() >= 3 && ((name[0] >= 'a' && name[0] <= 'z') || (name[0] >= 'A' && name[0] <= 'Z')) && name[1] == ':' && IsPathSeparator(name[2]); #else return IsPathSeparator(name[0]); #endif } // Returns a pathname for a file that does not currently exist. The pathname // will be directory/base_name.extension or // directory/base_name_.extension if directory/base_name.extension // already exists. The number will be incremented until a pathname is found // that does not already exist. // Examples: 'dir/foo_test.xml' or 'dir/foo_test_1.xml'. // There could be a race condition if two or more processes are calling this // function at the same time -- they could both pick the same filename. FilePath FilePath::GenerateUniqueFileName(const FilePath& directory, const FilePath& base_name, const char* extension) { FilePath full_pathname; int number = 0; do { full_pathname.Set(MakeFileName(directory, base_name, number++, extension)); } while (full_pathname.FileOrDirectoryExists()); return full_pathname; } // Returns true if FilePath ends with a path separator, which indicates that // it is intended to represent a directory. Returns false otherwise. // This does NOT check that a directory (or file) actually exists. bool FilePath::IsDirectory() const { return !pathname_.empty() && IsPathSeparator(pathname_.c_str()[pathname_.length() - 1]); } // Create directories so that path exists. Returns true if successful or if // the directories already exist; returns false if unable to create directories // for any reason. bool FilePath::CreateDirectoriesRecursively() const { if (!this->IsDirectory()) { return false; } if (pathname_.length() == 0 || this->DirectoryExists()) { return true; } const FilePath parent(this->RemoveTrailingPathSeparator().RemoveFileName()); return parent.CreateDirectoriesRecursively() && this->CreateFolder(); } // Create the directory so that path exists. Returns true if successful or // if the directory already exists; returns false if unable to create the // directory for any reason, including if the parent directory does not // exist. Not named "CreateDirectory" because that's a macro on Windows. bool FilePath::CreateFolder() const { #if GTEST_OS_WINDOWS_MOBILE FilePath removed_sep(this->RemoveTrailingPathSeparator()); LPCWSTR unicode = String::AnsiToUtf16(removed_sep.c_str()); int result = CreateDirectory(unicode, NULL) ? 0 : -1; delete [] unicode; #elif GTEST_OS_WINDOWS int result = _mkdir(pathname_.c_str()); #else int result = mkdir(pathname_.c_str(), 0777); #endif // GTEST_OS_WINDOWS_MOBILE if (result == -1) { return this->DirectoryExists(); // An error is OK if the directory exists. } return true; // No error. } // If input name has a trailing separator character, remove it and return the // name, otherwise return the name string unmodified. // On Windows platform, uses \ as the separator, other platforms use /. FilePath FilePath::RemoveTrailingPathSeparator() const { return IsDirectory() ? FilePath(pathname_.substr(0, pathname_.length() - 1)) : *this; } // Removes any redundant separators that might be in the pathname. // For example, "bar///foo" becomes "bar/foo". Does not eliminate other // redundancies that might be in a pathname involving "." or "..". // FIXME: handle Windows network shares (e.g. \\server\share). void FilePath::Normalize() { if (pathname_.c_str() == NULL) { pathname_ = ""; return; } const char* src = pathname_.c_str(); char* const dest = new char[pathname_.length() + 1]; char* dest_ptr = dest; memset(dest_ptr, 0, pathname_.length() + 1); while (*src != '\0') { *dest_ptr = *src; if (!IsPathSeparator(*src)) { src++; } else { #if GTEST_HAS_ALT_PATH_SEP_ if (*dest_ptr == kAlternatePathSeparator) { *dest_ptr = kPathSeparator; } #endif while (IsPathSeparator(*src)) src++; } dest_ptr++; } *dest_ptr = '\0'; pathname_ = dest; delete[] dest; } } // namespace internal } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest-internal-inl.h000066400000000000000000001311071357355204000250110ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Utility functions and classes used by the Google C++ testing framework.// // This file contains purely Google Test's internal implementation. Please // DO NOT #INCLUDE IT IN A USER PROGRAM. #ifndef GTEST_SRC_GTEST_INTERNAL_INL_H_ #define GTEST_SRC_GTEST_INTERNAL_INL_H_ #ifndef _WIN32_WCE # include #endif // !_WIN32_WCE #include #include // For strtoll/_strtoul64/malloc/free. #include // For memmove. #include #include #include #include "gtest/internal/gtest-port.h" #if GTEST_CAN_STREAM_RESULTS_ # include // NOLINT # include // NOLINT #endif #if GTEST_OS_WINDOWS # include // NOLINT #endif // GTEST_OS_WINDOWS #include "gtest/gtest.h" #include "gtest/gtest-spi.h" GTEST_DISABLE_MSC_WARNINGS_PUSH_(4251 \ /* class A needs to have dll-interface to be used by clients of class B */) namespace testing { // Declares the flags. // // We don't want the users to modify this flag in the code, but want // Google Test's own unit tests to be able to access it. Therefore we // declare it here as opposed to in gtest.h. GTEST_DECLARE_bool_(death_test_use_fork); namespace internal { // The value of GetTestTypeId() as seen from within the Google Test // library. This is solely for testing GetTestTypeId(). GTEST_API_ extern const TypeId kTestTypeIdInGoogleTest; // Names of the flags (needed for parsing Google Test flags). const char kAlsoRunDisabledTestsFlag[] = "also_run_disabled_tests"; const char kBreakOnFailureFlag[] = "break_on_failure"; const char kCatchExceptionsFlag[] = "catch_exceptions"; const char kColorFlag[] = "color"; const char kFilterFlag[] = "filter"; const char kListTestsFlag[] = "list_tests"; const char kOutputFlag[] = "output"; const char kPrintTimeFlag[] = "print_time"; const char kPrintUTF8Flag[] = "print_utf8"; const char kRandomSeedFlag[] = "random_seed"; const char kRepeatFlag[] = "repeat"; const char kShuffleFlag[] = "shuffle"; const char kStackTraceDepthFlag[] = "stack_trace_depth"; const char kStreamResultToFlag[] = "stream_result_to"; const char kThrowOnFailureFlag[] = "throw_on_failure"; const char kFlagfileFlag[] = "flagfile"; // A valid random seed must be in [1, kMaxRandomSeed]. const int kMaxRandomSeed = 99999; // g_help_flag is true iff the --help flag or an equivalent form is // specified on the command line. GTEST_API_ extern bool g_help_flag; // Returns the current time in milliseconds. GTEST_API_ TimeInMillis GetTimeInMillis(); // Returns true iff Google Test should use colors in the output. GTEST_API_ bool ShouldUseColor(bool stdout_is_tty); // Formats the given time in milliseconds as seconds. GTEST_API_ std::string FormatTimeInMillisAsSeconds(TimeInMillis ms); // Converts the given time in milliseconds to a date string in the ISO 8601 // format, without the timezone information. N.B.: due to the use the // non-reentrant localtime() function, this function is not thread safe. Do // not use it in any code that can be called from multiple threads. GTEST_API_ std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms); // Parses a string for an Int32 flag, in the form of "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. GTEST_API_ bool ParseInt32Flag( const char* str, const char* flag, Int32* value); // Returns a random seed in range [1, kMaxRandomSeed] based on the // given --gtest_random_seed flag value. inline int GetRandomSeedFromFlag(Int32 random_seed_flag) { const unsigned int raw_seed = (random_seed_flag == 0) ? static_cast(GetTimeInMillis()) : static_cast(random_seed_flag); // Normalizes the actual seed to range [1, kMaxRandomSeed] such that // it's easy to type. const int normalized_seed = static_cast((raw_seed - 1U) % static_cast(kMaxRandomSeed)) + 1; return normalized_seed; } // Returns the first valid random seed after 'seed'. The behavior is // undefined if 'seed' is invalid. The seed after kMaxRandomSeed is // considered to be 1. inline int GetNextRandomSeed(int seed) { GTEST_CHECK_(1 <= seed && seed <= kMaxRandomSeed) << "Invalid random seed " << seed << " - must be in [1, " << kMaxRandomSeed << "]."; const int next_seed = seed + 1; return (next_seed > kMaxRandomSeed) ? 1 : next_seed; } // This class saves the values of all Google Test flags in its c'tor, and // restores them in its d'tor. class GTestFlagSaver { public: // The c'tor. GTestFlagSaver() { also_run_disabled_tests_ = GTEST_FLAG(also_run_disabled_tests); break_on_failure_ = GTEST_FLAG(break_on_failure); catch_exceptions_ = GTEST_FLAG(catch_exceptions); color_ = GTEST_FLAG(color); death_test_style_ = GTEST_FLAG(death_test_style); death_test_use_fork_ = GTEST_FLAG(death_test_use_fork); filter_ = GTEST_FLAG(filter); internal_run_death_test_ = GTEST_FLAG(internal_run_death_test); list_tests_ = GTEST_FLAG(list_tests); output_ = GTEST_FLAG(output); print_time_ = GTEST_FLAG(print_time); print_utf8_ = GTEST_FLAG(print_utf8); random_seed_ = GTEST_FLAG(random_seed); repeat_ = GTEST_FLAG(repeat); shuffle_ = GTEST_FLAG(shuffle); stack_trace_depth_ = GTEST_FLAG(stack_trace_depth); stream_result_to_ = GTEST_FLAG(stream_result_to); throw_on_failure_ = GTEST_FLAG(throw_on_failure); } // The d'tor is not virtual. DO NOT INHERIT FROM THIS CLASS. ~GTestFlagSaver() { GTEST_FLAG(also_run_disabled_tests) = also_run_disabled_tests_; GTEST_FLAG(break_on_failure) = break_on_failure_; GTEST_FLAG(catch_exceptions) = catch_exceptions_; GTEST_FLAG(color) = color_; GTEST_FLAG(death_test_style) = death_test_style_; GTEST_FLAG(death_test_use_fork) = death_test_use_fork_; GTEST_FLAG(filter) = filter_; GTEST_FLAG(internal_run_death_test) = internal_run_death_test_; GTEST_FLAG(list_tests) = list_tests_; GTEST_FLAG(output) = output_; GTEST_FLAG(print_time) = print_time_; GTEST_FLAG(print_utf8) = print_utf8_; GTEST_FLAG(random_seed) = random_seed_; GTEST_FLAG(repeat) = repeat_; GTEST_FLAG(shuffle) = shuffle_; GTEST_FLAG(stack_trace_depth) = stack_trace_depth_; GTEST_FLAG(stream_result_to) = stream_result_to_; GTEST_FLAG(throw_on_failure) = throw_on_failure_; } private: // Fields for saving the original values of flags. bool also_run_disabled_tests_; bool break_on_failure_; bool catch_exceptions_; std::string color_; std::string death_test_style_; bool death_test_use_fork_; std::string filter_; std::string internal_run_death_test_; bool list_tests_; std::string output_; bool print_time_; bool print_utf8_; internal::Int32 random_seed_; internal::Int32 repeat_; bool shuffle_; internal::Int32 stack_trace_depth_; std::string stream_result_to_; bool throw_on_failure_; } GTEST_ATTRIBUTE_UNUSED_; // Converts a Unicode code point to a narrow string in UTF-8 encoding. // code_point parameter is of type UInt32 because wchar_t may not be // wide enough to contain a code point. // If the code_point is not a valid Unicode code point // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted // to "(Invalid Unicode 0xXXXXXXXX)". GTEST_API_ std::string CodePointToUtf8(UInt32 code_point); // Converts a wide string to a narrow string in UTF-8 encoding. // The wide string is assumed to have the following encoding: // UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) // UTF-32 if sizeof(wchar_t) == 4 (on Linux) // Parameter str points to a null-terminated wide string. // Parameter num_chars may additionally limit the number // of wchar_t characters processed. -1 is used when the entire string // should be processed. // If the string contains code points that are not valid Unicode code points // (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output // as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding // and contains invalid UTF-16 surrogate pairs, values in those pairs // will be encoded as individual Unicode characters from Basic Normal Plane. GTEST_API_ std::string WideStringToUtf8(const wchar_t* str, int num_chars); // Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file // if the variable is present. If a file already exists at this location, this // function will write over it. If the variable is present, but the file cannot // be created, prints an error and exits. void WriteToShardStatusFileIfNeeded(); // Checks whether sharding is enabled by examining the relevant // environment variable values. If the variables are present, // but inconsistent (e.g., shard_index >= total_shards), prints // an error and exits. If in_subprocess_for_death_test, sharding is // disabled because it must only be applied to the original test // process. Otherwise, we could filter out death tests we intended to execute. GTEST_API_ bool ShouldShard(const char* total_shards_str, const char* shard_index_str, bool in_subprocess_for_death_test); // Parses the environment variable var as an Int32. If it is unset, // returns default_val. If it is not an Int32, prints an error and // and aborts. GTEST_API_ Int32 Int32FromEnvOrDie(const char* env_var, Int32 default_val); // Given the total number of shards, the shard index, and the test id, // returns true iff the test should be run on this shard. The test id is // some arbitrary but unique non-negative integer assigned to each test // method. Assumes that 0 <= shard_index < total_shards. GTEST_API_ bool ShouldRunTestOnShard( int total_shards, int shard_index, int test_id); // STL container utilities. // Returns the number of elements in the given container that satisfy // the given predicate. template inline int CountIf(const Container& c, Predicate predicate) { // Implemented as an explicit loop since std::count_if() in libCstd on // Solaris has a non-standard signature. int count = 0; for (typename Container::const_iterator it = c.begin(); it != c.end(); ++it) { if (predicate(*it)) ++count; } return count; } // Applies a function/functor to each element in the container. template void ForEach(const Container& c, Functor functor) { std::for_each(c.begin(), c.end(), functor); } // Returns the i-th element of the vector, or default_value if i is not // in range [0, v.size()). template inline E GetElementOr(const std::vector& v, int i, E default_value) { return (i < 0 || i >= static_cast(v.size())) ? default_value : v[i]; } // Performs an in-place shuffle of a range of the vector's elements. // 'begin' and 'end' are element indices as an STL-style range; // i.e. [begin, end) are shuffled, where 'end' == size() means to // shuffle to the end of the vector. template void ShuffleRange(internal::Random* random, int begin, int end, std::vector* v) { const int size = static_cast(v->size()); GTEST_CHECK_(0 <= begin && begin <= size) << "Invalid shuffle range start " << begin << ": must be in range [0, " << size << "]."; GTEST_CHECK_(begin <= end && end <= size) << "Invalid shuffle range finish " << end << ": must be in range [" << begin << ", " << size << "]."; // Fisher-Yates shuffle, from // http://en.wikipedia.org/wiki/Fisher-Yates_shuffle for (int range_width = end - begin; range_width >= 2; range_width--) { const int last_in_range = begin + range_width - 1; const int selected = begin + random->Generate(range_width); std::swap((*v)[selected], (*v)[last_in_range]); } } // Performs an in-place shuffle of the vector's elements. template inline void Shuffle(internal::Random* random, std::vector* v) { ShuffleRange(random, 0, static_cast(v->size()), v); } // A function for deleting an object. Handy for being used as a // functor. template static void Delete(T* x) { delete x; } // A predicate that checks the key of a TestProperty against a known key. // // TestPropertyKeyIs is copyable. class TestPropertyKeyIs { public: // Constructor. // // TestPropertyKeyIs has NO default constructor. explicit TestPropertyKeyIs(const std::string& key) : key_(key) {} // Returns true iff the test name of test property matches on key_. bool operator()(const TestProperty& test_property) const { return test_property.key() == key_; } private: std::string key_; }; // Class UnitTestOptions. // // This class contains functions for processing options the user // specifies when running the tests. It has only static members. // // In most cases, the user can specify an option using either an // environment variable or a command line flag. E.g. you can set the // test filter using either GTEST_FILTER or --gtest_filter. If both // the variable and the flag are present, the latter overrides the // former. class GTEST_API_ UnitTestOptions { public: // Functions for processing the gtest_output flag. // Returns the output format, or "" for normal printed output. static std::string GetOutputFormat(); // Returns the absolute path of the requested output file, or the // default (test_detail.xml in the original working directory) if // none was explicitly specified. static std::string GetAbsolutePathToOutputFile(); // Functions for processing the gtest_filter flag. // Returns true iff the wildcard pattern matches the string. The // first ':' or '\0' character in pattern marks the end of it. // // This recursive algorithm isn't very efficient, but is clear and // works well enough for matching test names, which are short. static bool PatternMatchesString(const char *pattern, const char *str); // Returns true iff the user-specified filter matches the test case // name and the test name. static bool FilterMatchesTest(const std::string &test_case_name, const std::string &test_name); #if GTEST_OS_WINDOWS // Function for supporting the gtest_catch_exception flag. // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. // This function is useful as an __except condition. static int GTestShouldProcessSEH(DWORD exception_code); #endif // GTEST_OS_WINDOWS // Returns true if "name" matches the ':' separated list of glob-style // filters in "filter". static bool MatchesFilter(const std::string& name, const char* filter); }; // Returns the current application's name, removing directory path if that // is present. Used by UnitTestOptions::GetOutputFile. GTEST_API_ FilePath GetCurrentExecutableName(); // The role interface for getting the OS stack trace as a string. class OsStackTraceGetterInterface { public: OsStackTraceGetterInterface() {} virtual ~OsStackTraceGetterInterface() {} // Returns the current OS stack trace as an std::string. Parameters: // // max_depth - the maximum number of stack frames to be included // in the trace. // skip_count - the number of top frames to be skipped; doesn't count // against max_depth. virtual std::string CurrentStackTrace(int max_depth, int skip_count) = 0; // UponLeavingGTest() should be called immediately before Google Test calls // user code. It saves some information about the current stack that // CurrentStackTrace() will use to find and hide Google Test stack frames. virtual void UponLeavingGTest() = 0; // This string is inserted in place of stack frames that are part of // Google Test's implementation. static const char* const kElidedFramesMarker; private: GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetterInterface); }; // A working implementation of the OsStackTraceGetterInterface interface. class OsStackTraceGetter : public OsStackTraceGetterInterface { public: OsStackTraceGetter() {} virtual std::string CurrentStackTrace(int max_depth, int skip_count); virtual void UponLeavingGTest(); private: #if GTEST_HAS_ABSL Mutex mutex_; // Protects all internal state. // We save the stack frame below the frame that calls user code. // We do this because the address of the frame immediately below // the user code changes between the call to UponLeavingGTest() // and any calls to the stack trace code from within the user code. void* caller_frame_ = nullptr; #endif // GTEST_HAS_ABSL GTEST_DISALLOW_COPY_AND_ASSIGN_(OsStackTraceGetter); }; // Information about a Google Test trace point. struct TraceInfo { const char* file; int line; std::string message; }; // This is the default global test part result reporter used in UnitTestImpl. // This class should only be used by UnitTestImpl. class DefaultGlobalTestPartResultReporter : public TestPartResultReporterInterface { public: explicit DefaultGlobalTestPartResultReporter(UnitTestImpl* unit_test); // Implements the TestPartResultReporterInterface. Reports the test part // result in the current test. virtual void ReportTestPartResult(const TestPartResult& result); private: UnitTestImpl* const unit_test_; GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultGlobalTestPartResultReporter); }; // This is the default per thread test part result reporter used in // UnitTestImpl. This class should only be used by UnitTestImpl. class DefaultPerThreadTestPartResultReporter : public TestPartResultReporterInterface { public: explicit DefaultPerThreadTestPartResultReporter(UnitTestImpl* unit_test); // Implements the TestPartResultReporterInterface. The implementation just // delegates to the current global test part result reporter of *unit_test_. virtual void ReportTestPartResult(const TestPartResult& result); private: UnitTestImpl* const unit_test_; GTEST_DISALLOW_COPY_AND_ASSIGN_(DefaultPerThreadTestPartResultReporter); }; // The private implementation of the UnitTest class. We don't protect // the methods under a mutex, as this class is not accessible by a // user and the UnitTest class that delegates work to this class does // proper locking. class GTEST_API_ UnitTestImpl { public: explicit UnitTestImpl(UnitTest* parent); virtual ~UnitTestImpl(); // There are two different ways to register your own TestPartResultReporter. // You can register your own repoter to listen either only for test results // from the current thread or for results from all threads. // By default, each per-thread test result repoter just passes a new // TestPartResult to the global test result reporter, which registers the // test part result for the currently running test. // Returns the global test part result reporter. TestPartResultReporterInterface* GetGlobalTestPartResultReporter(); // Sets the global test part result reporter. void SetGlobalTestPartResultReporter( TestPartResultReporterInterface* reporter); // Returns the test part result reporter for the current thread. TestPartResultReporterInterface* GetTestPartResultReporterForCurrentThread(); // Sets the test part result reporter for the current thread. void SetTestPartResultReporterForCurrentThread( TestPartResultReporterInterface* reporter); // Gets the number of successful test cases. int successful_test_case_count() const; // Gets the number of failed test cases. int failed_test_case_count() const; // Gets the number of all test cases. int total_test_case_count() const; // Gets the number of all test cases that contain at least one test // that should run. int test_case_to_run_count() const; // Gets the number of successful tests. int successful_test_count() const; // Gets the number of failed tests. int failed_test_count() const; // Gets the number of disabled tests that will be reported in the XML report. int reportable_disabled_test_count() const; // Gets the number of disabled tests. int disabled_test_count() const; // Gets the number of tests to be printed in the XML report. int reportable_test_count() const; // Gets the number of all tests. int total_test_count() const; // Gets the number of tests that should run. int test_to_run_count() const; // Gets the time of the test program start, in ms from the start of the // UNIX epoch. TimeInMillis start_timestamp() const { return start_timestamp_; } // Gets the elapsed time, in milliseconds. TimeInMillis elapsed_time() const { return elapsed_time_; } // Returns true iff the unit test passed (i.e. all test cases passed). bool Passed() const { return !Failed(); } // Returns true iff the unit test failed (i.e. some test case failed // or something outside of all tests failed). bool Failed() const { return failed_test_case_count() > 0 || ad_hoc_test_result()->Failed(); } // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. const TestCase* GetTestCase(int i) const { const int index = GetElementOr(test_case_indices_, i, -1); return index < 0 ? NULL : test_cases_[i]; } // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. TestCase* GetMutableTestCase(int i) { const int index = GetElementOr(test_case_indices_, i, -1); return index < 0 ? NULL : test_cases_[index]; } // Provides access to the event listener list. TestEventListeners* listeners() { return &listeners_; } // Returns the TestResult for the test that's currently running, or // the TestResult for the ad hoc test if no test is running. TestResult* current_test_result(); // Returns the TestResult for the ad hoc test. const TestResult* ad_hoc_test_result() const { return &ad_hoc_test_result_; } // Sets the OS stack trace getter. // // Does nothing if the input and the current OS stack trace getter // are the same; otherwise, deletes the old getter and makes the // input the current getter. void set_os_stack_trace_getter(OsStackTraceGetterInterface* getter); // Returns the current OS stack trace getter if it is not NULL; // otherwise, creates an OsStackTraceGetter, makes it the current // getter, and returns it. OsStackTraceGetterInterface* os_stack_trace_getter(); // Returns the current OS stack trace as an std::string. // // The maximum number of stack frames to be included is specified by // the gtest_stack_trace_depth flag. The skip_count parameter // specifies the number of top frames to be skipped, which doesn't // count against the number of frames to be included. // // For example, if Foo() calls Bar(), which in turn calls // CurrentOsStackTraceExceptTop(1), Foo() will be included in the // trace but Bar() and CurrentOsStackTraceExceptTop() won't. std::string CurrentOsStackTraceExceptTop(int skip_count) GTEST_NO_INLINE_; // Finds and returns a TestCase with the given name. If one doesn't // exist, creates one and returns it. // // Arguments: // // test_case_name: name of the test case // type_param: the name of the test's type parameter, or NULL if // this is not a typed or a type-parameterized test. // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case TestCase* GetTestCase(const char* test_case_name, const char* type_param, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc); // Adds a TestInfo to the unit test. // // Arguments: // // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case // test_info: the TestInfo object void AddTestInfo(Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc, TestInfo* test_info) { // In order to support thread-safe death tests, we need to // remember the original working directory when the test program // was first invoked. We cannot do this in RUN_ALL_TESTS(), as // the user may have changed the current directory before calling // RUN_ALL_TESTS(). Therefore we capture the current directory in // AddTestInfo(), which is called to register a TEST or TEST_F // before main() is reached. if (original_working_dir_.IsEmpty()) { original_working_dir_.Set(FilePath::GetCurrentDir()); GTEST_CHECK_(!original_working_dir_.IsEmpty()) << "Failed to get the current working directory."; } GetTestCase(test_info->test_case_name(), test_info->type_param(), set_up_tc, tear_down_tc)->AddTestInfo(test_info); } // Returns ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. internal::ParameterizedTestCaseRegistry& parameterized_test_registry() { return parameterized_test_registry_; } // Sets the TestCase object for the test that's currently running. void set_current_test_case(TestCase* a_current_test_case) { current_test_case_ = a_current_test_case; } // Sets the TestInfo object for the test that's currently running. If // current_test_info is NULL, the assertion results will be stored in // ad_hoc_test_result_. void set_current_test_info(TestInfo* a_current_test_info) { current_test_info_ = a_current_test_info; } // Registers all parameterized tests defined using TEST_P and // INSTANTIATE_TEST_CASE_P, creating regular tests for each test/parameter // combination. This method can be called more then once; it has guards // protecting from registering the tests more then once. If // value-parameterized tests are disabled, RegisterParameterizedTests is // present but does nothing. void RegisterParameterizedTests(); // Runs all tests in this UnitTest object, prints the result, and // returns true if all tests are successful. If any exception is // thrown during a test, this test is considered to be failed, but // the rest of the tests will still be run. bool RunAllTests(); // Clears the results of all tests, except the ad hoc tests. void ClearNonAdHocTestResult() { ForEach(test_cases_, TestCase::ClearTestCaseResult); } // Clears the results of ad-hoc test assertions. void ClearAdHocTestResult() { ad_hoc_test_result_.Clear(); } // Adds a TestProperty to the current TestResult object when invoked in a // context of a test or a test case, or to the global property set. If the // result already contains a property with the same key, the value will be // updated. void RecordProperty(const TestProperty& test_property); enum ReactionToSharding { HONOR_SHARDING_PROTOCOL, IGNORE_SHARDING_PROTOCOL }; // Matches the full name of each test against the user-specified // filter to decide whether the test should run, then records the // result in each TestCase and TestInfo object. // If shard_tests == HONOR_SHARDING_PROTOCOL, further filters tests // based on sharding variables in the environment. // Returns the number of tests that should run. int FilterTests(ReactionToSharding shard_tests); // Prints the names of the tests matching the user-specified filter flag. void ListTestsMatchingFilter(); const TestCase* current_test_case() const { return current_test_case_; } TestInfo* current_test_info() { return current_test_info_; } const TestInfo* current_test_info() const { return current_test_info_; } // Returns the vector of environments that need to be set-up/torn-down // before/after the tests are run. std::vector& environments() { return environments_; } // Getters for the per-thread Google Test trace stack. std::vector& gtest_trace_stack() { return *(gtest_trace_stack_.pointer()); } const std::vector& gtest_trace_stack() const { return gtest_trace_stack_.get(); } #if GTEST_HAS_DEATH_TEST void InitDeathTestSubprocessControlInfo() { internal_run_death_test_flag_.reset(ParseInternalRunDeathTestFlag()); } // Returns a pointer to the parsed --gtest_internal_run_death_test // flag, or NULL if that flag was not specified. // This information is useful only in a death test child process. // Must not be called before a call to InitGoogleTest. const InternalRunDeathTestFlag* internal_run_death_test_flag() const { return internal_run_death_test_flag_.get(); } // Returns a pointer to the current death test factory. internal::DeathTestFactory* death_test_factory() { return death_test_factory_.get(); } void SuppressTestEventsIfInSubprocess(); friend class ReplaceDeathTestFactory; #endif // GTEST_HAS_DEATH_TEST // Initializes the event listener performing XML output as specified by // UnitTestOptions. Must not be called before InitGoogleTest. void ConfigureXmlOutput(); #if GTEST_CAN_STREAM_RESULTS_ // Initializes the event listener for streaming test results to a socket. // Must not be called before InitGoogleTest. void ConfigureStreamingOutput(); #endif // Performs initialization dependent upon flag values obtained in // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest // this function is also called from RunAllTests. Since this function can be // called more than once, it has to be idempotent. void PostFlagParsingInit(); // Gets the random seed used at the start of the current test iteration. int random_seed() const { return random_seed_; } // Gets the random number generator. internal::Random* random() { return &random_; } // Shuffles all test cases, and the tests within each test case, // making sure that death tests are still run first. void ShuffleTests(); // Restores the test cases and tests to their order before the first shuffle. void UnshuffleTests(); // Returns the value of GTEST_FLAG(catch_exceptions) at the moment // UnitTest::Run() starts. bool catch_exceptions() const { return catch_exceptions_; } private: friend class ::testing::UnitTest; // Used by UnitTest::Run() to capture the state of // GTEST_FLAG(catch_exceptions) at the moment it starts. void set_catch_exceptions(bool value) { catch_exceptions_ = value; } // The UnitTest object that owns this implementation object. UnitTest* const parent_; // The working directory when the first TEST() or TEST_F() was // executed. internal::FilePath original_working_dir_; // The default test part result reporters. DefaultGlobalTestPartResultReporter default_global_test_part_result_reporter_; DefaultPerThreadTestPartResultReporter default_per_thread_test_part_result_reporter_; // Points to (but doesn't own) the global test part result reporter. TestPartResultReporterInterface* global_test_part_result_repoter_; // Protects read and write access to global_test_part_result_reporter_. internal::Mutex global_test_part_result_reporter_mutex_; // Points to (but doesn't own) the per-thread test part result reporter. internal::ThreadLocal per_thread_test_part_result_reporter_; // The vector of environments that need to be set-up/torn-down // before/after the tests are run. std::vector environments_; // The vector of TestCases in their original order. It owns the // elements in the vector. std::vector test_cases_; // Provides a level of indirection for the test case list to allow // easy shuffling and restoring the test case order. The i-th // element of this vector is the index of the i-th test case in the // shuffled order. std::vector test_case_indices_; // ParameterizedTestRegistry object used to register value-parameterized // tests. internal::ParameterizedTestCaseRegistry parameterized_test_registry_; // Indicates whether RegisterParameterizedTests() has been called already. bool parameterized_tests_registered_; // Index of the last death test case registered. Initially -1. int last_death_test_case_; // This points to the TestCase for the currently running test. It // changes as Google Test goes through one test case after another. // When no test is running, this is set to NULL and Google Test // stores assertion results in ad_hoc_test_result_. Initially NULL. TestCase* current_test_case_; // This points to the TestInfo for the currently running test. It // changes as Google Test goes through one test after another. When // no test is running, this is set to NULL and Google Test stores // assertion results in ad_hoc_test_result_. Initially NULL. TestInfo* current_test_info_; // Normally, a user only writes assertions inside a TEST or TEST_F, // or inside a function called by a TEST or TEST_F. Since Google // Test keeps track of which test is current running, it can // associate such an assertion with the test it belongs to. // // If an assertion is encountered when no TEST or TEST_F is running, // Google Test attributes the assertion result to an imaginary "ad hoc" // test, and records the result in ad_hoc_test_result_. TestResult ad_hoc_test_result_; // The list of event listeners that can be used to track events inside // Google Test. TestEventListeners listeners_; // The OS stack trace getter. Will be deleted when the UnitTest // object is destructed. By default, an OsStackTraceGetter is used, // but the user can set this field to use a custom getter if that is // desired. OsStackTraceGetterInterface* os_stack_trace_getter_; // True iff PostFlagParsingInit() has been called. bool post_flag_parse_init_performed_; // The random number seed used at the beginning of the test run. int random_seed_; // Our random number generator. internal::Random random_; // The time of the test program start, in ms from the start of the // UNIX epoch. TimeInMillis start_timestamp_; // How long the test took to run, in milliseconds. TimeInMillis elapsed_time_; #if GTEST_HAS_DEATH_TEST // The decomposed components of the gtest_internal_run_death_test flag, // parsed when RUN_ALL_TESTS is called. internal::scoped_ptr internal_run_death_test_flag_; internal::scoped_ptr death_test_factory_; #endif // GTEST_HAS_DEATH_TEST // A per-thread stack of traces created by the SCOPED_TRACE() macro. internal::ThreadLocal > gtest_trace_stack_; // The value of GTEST_FLAG(catch_exceptions) at the moment RunAllTests() // starts. bool catch_exceptions_; GTEST_DISALLOW_COPY_AND_ASSIGN_(UnitTestImpl); }; // class UnitTestImpl // Convenience function for accessing the global UnitTest // implementation object. inline UnitTestImpl* GetUnitTestImpl() { return UnitTest::GetInstance()->impl(); } #if GTEST_USES_SIMPLE_RE // Internal helper functions for implementing the simple regular // expression matcher. GTEST_API_ bool IsInSet(char ch, const char* str); GTEST_API_ bool IsAsciiDigit(char ch); GTEST_API_ bool IsAsciiPunct(char ch); GTEST_API_ bool IsRepeat(char ch); GTEST_API_ bool IsAsciiWhiteSpace(char ch); GTEST_API_ bool IsAsciiWordChar(char ch); GTEST_API_ bool IsValidEscape(char ch); GTEST_API_ bool AtomMatchesChar(bool escaped, char pattern, char ch); GTEST_API_ bool ValidateRegex(const char* regex); GTEST_API_ bool MatchRegexAtHead(const char* regex, const char* str); GTEST_API_ bool MatchRepetitionAndRegexAtHead( bool escaped, char ch, char repeat, const char* regex, const char* str); GTEST_API_ bool MatchRegexAnywhere(const char* regex, const char* str); #endif // GTEST_USES_SIMPLE_RE // Parses the command line for Google Test flags, without initializing // other parts of Google Test. GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, char** argv); GTEST_API_ void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv); #if GTEST_HAS_DEATH_TEST // Returns the message describing the last system error, regardless of the // platform. GTEST_API_ std::string GetLastErrnoDescription(); // Attempts to parse a string into a positive integer pointed to by the // number parameter. Returns true if that is possible. // GTEST_HAS_DEATH_TEST implies that we have ::std::string, so we can use // it here. template bool ParseNaturalNumber(const ::std::string& str, Integer* number) { // Fail fast if the given string does not begin with a digit; // this bypasses strtoXXX's "optional leading whitespace and plus // or minus sign" semantics, which are undesirable here. if (str.empty() || !IsDigit(str[0])) { return false; } errno = 0; char* end; // BiggestConvertible is the largest integer type that system-provided // string-to-number conversion routines can return. # if GTEST_OS_WINDOWS && !defined(__GNUC__) // MSVC and C++ Builder define __int64 instead of the standard long long. typedef unsigned __int64 BiggestConvertible; const BiggestConvertible parsed = _strtoui64(str.c_str(), &end, 10); # else typedef unsigned long long BiggestConvertible; // NOLINT const BiggestConvertible parsed = strtoull(str.c_str(), &end, 10); # endif // GTEST_OS_WINDOWS && !defined(__GNUC__) const bool parse_success = *end == '\0' && errno == 0; // FIXME: Convert this to compile time assertion when it is // available. GTEST_CHECK_(sizeof(Integer) <= sizeof(parsed)); const Integer result = static_cast(parsed); if (parse_success && static_cast(result) == parsed) { *number = result; return true; } return false; } #endif // GTEST_HAS_DEATH_TEST // TestResult contains some private methods that should be hidden from // Google Test user but are required for testing. This class allow our tests // to access them. // // This class is supplied only for the purpose of testing Google Test's own // constructs. Do not use it in user tests, either directly or indirectly. class TestResultAccessor { public: static void RecordProperty(TestResult* test_result, const std::string& xml_element, const TestProperty& property) { test_result->RecordProperty(xml_element, property); } static void ClearTestPartResults(TestResult* test_result) { test_result->ClearTestPartResults(); } static const std::vector& test_part_results( const TestResult& test_result) { return test_result.test_part_results(); } }; #if GTEST_CAN_STREAM_RESULTS_ // Streams test results to the given port on the given host machine. class StreamingListener : public EmptyTestEventListener { public: // Abstract base class for writing strings to a socket. class AbstractSocketWriter { public: virtual ~AbstractSocketWriter() {} // Sends a string to the socket. virtual void Send(const std::string& message) = 0; // Closes the socket. virtual void CloseConnection() {} // Sends a string and a newline to the socket. void SendLn(const std::string& message) { Send(message + "\n"); } }; // Concrete class for actually writing strings to a socket. class SocketWriter : public AbstractSocketWriter { public: SocketWriter(const std::string& host, const std::string& port) : sockfd_(-1), host_name_(host), port_num_(port) { MakeConnection(); } virtual ~SocketWriter() { if (sockfd_ != -1) CloseConnection(); } // Sends a string to the socket. virtual void Send(const std::string& message) { GTEST_CHECK_(sockfd_ != -1) << "Send() can be called only when there is a connection."; const int len = static_cast(message.length()); if (write(sockfd_, message.c_str(), len) != len) { GTEST_LOG_(WARNING) << "stream_result_to: failed to stream to " << host_name_ << ":" << port_num_; } } private: // Creates a client socket and connects to the server. void MakeConnection(); // Closes the socket. void CloseConnection() { GTEST_CHECK_(sockfd_ != -1) << "CloseConnection() can be called only when there is a connection."; close(sockfd_); sockfd_ = -1; } int sockfd_; // socket file descriptor const std::string host_name_; const std::string port_num_; GTEST_DISALLOW_COPY_AND_ASSIGN_(SocketWriter); }; // class SocketWriter // Escapes '=', '&', '%', and '\n' characters in str as "%xx". static std::string UrlEncode(const char* str); StreamingListener(const std::string& host, const std::string& port) : socket_writer_(new SocketWriter(host, port)) { Start(); } explicit StreamingListener(AbstractSocketWriter* socket_writer) : socket_writer_(socket_writer) { Start(); } void OnTestProgramStart(const UnitTest& /* unit_test */) { SendLn("event=TestProgramStart"); } void OnTestProgramEnd(const UnitTest& unit_test) { // Note that Google Test current only report elapsed time for each // test iteration, not for the entire test program. SendLn("event=TestProgramEnd&passed=" + FormatBool(unit_test.Passed())); // Notify the streaming server to stop. socket_writer_->CloseConnection(); } void OnTestIterationStart(const UnitTest& /* unit_test */, int iteration) { SendLn("event=TestIterationStart&iteration=" + StreamableToString(iteration)); } void OnTestIterationEnd(const UnitTest& unit_test, int /* iteration */) { SendLn("event=TestIterationEnd&passed=" + FormatBool(unit_test.Passed()) + "&elapsed_time=" + StreamableToString(unit_test.elapsed_time()) + "ms"); } void OnTestCaseStart(const TestCase& test_case) { SendLn(std::string("event=TestCaseStart&name=") + test_case.name()); } void OnTestCaseEnd(const TestCase& test_case) { SendLn("event=TestCaseEnd&passed=" + FormatBool(test_case.Passed()) + "&elapsed_time=" + StreamableToString(test_case.elapsed_time()) + "ms"); } void OnTestStart(const TestInfo& test_info) { SendLn(std::string("event=TestStart&name=") + test_info.name()); } void OnTestEnd(const TestInfo& test_info) { SendLn("event=TestEnd&passed=" + FormatBool((test_info.result())->Passed()) + "&elapsed_time=" + StreamableToString((test_info.result())->elapsed_time()) + "ms"); } void OnTestPartResult(const TestPartResult& test_part_result) { const char* file_name = test_part_result.file_name(); if (file_name == NULL) file_name = ""; SendLn("event=TestPartResult&file=" + UrlEncode(file_name) + "&line=" + StreamableToString(test_part_result.line_number()) + "&message=" + UrlEncode(test_part_result.message())); } private: // Sends the given message and a newline to the socket. void SendLn(const std::string& message) { socket_writer_->SendLn(message); } // Called at the start of streaming to notify the receiver what // protocol we are using. void Start() { SendLn("gtest_streaming_protocol_version=1.0"); } std::string FormatBool(bool value) { return value ? "1" : "0"; } const scoped_ptr socket_writer_; GTEST_DISALLOW_COPY_AND_ASSIGN_(StreamingListener); }; // class StreamingListener #endif // GTEST_CAN_STREAM_RESULTS_ } // namespace internal } // namespace testing GTEST_DISABLE_MSC_WARNINGS_POP_() // 4251 #endif // GTEST_SRC_GTEST_INTERNAL_INL_H_ libvpx-1.8.2/third_party/googletest/src/src/gtest-port.cc000066400000000000000000001267531357355204000235520ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest/internal/gtest-port.h" #include #include #include #include #include #if GTEST_OS_WINDOWS # include # include # include # include // Used in ThreadLocal. #else # include #endif // GTEST_OS_WINDOWS #if GTEST_OS_MAC # include # include # include #endif // GTEST_OS_MAC #if GTEST_OS_QNX # include # include # include #endif // GTEST_OS_QNX #if GTEST_OS_AIX # include # include #endif // GTEST_OS_AIX #if GTEST_OS_FUCHSIA # include # include #endif // GTEST_OS_FUCHSIA #include "gtest/gtest-spi.h" #include "gtest/gtest-message.h" #include "gtest/internal/gtest-internal.h" #include "gtest/internal/gtest-string.h" #include "src/gtest-internal-inl.h" namespace testing { namespace internal { #if defined(_MSC_VER) || defined(__BORLANDC__) // MSVC and C++Builder do not provide a definition of STDERR_FILENO. const int kStdOutFileno = 1; const int kStdErrFileno = 2; #else const int kStdOutFileno = STDOUT_FILENO; const int kStdErrFileno = STDERR_FILENO; #endif // _MSC_VER #if GTEST_OS_LINUX namespace { template T ReadProcFileField(const std::string& filename, int field) { std::string dummy; std::ifstream file(filename.c_str()); while (field-- > 0) { file >> dummy; } T output = 0; file >> output; return output; } } // namespace // Returns the number of active threads, or 0 when there is an error. size_t GetThreadCount() { const std::string filename = (Message() << "/proc/" << getpid() << "/stat").GetString(); return ReadProcFileField(filename, 19); } #elif GTEST_OS_MAC size_t GetThreadCount() { const task_t task = mach_task_self(); mach_msg_type_number_t thread_count; thread_act_array_t thread_list; const kern_return_t status = task_threads(task, &thread_list, &thread_count); if (status == KERN_SUCCESS) { // task_threads allocates resources in thread_list and we need to free them // to avoid leaks. vm_deallocate(task, reinterpret_cast(thread_list), sizeof(thread_t) * thread_count); return static_cast(thread_count); } else { return 0; } } #elif GTEST_OS_QNX // Returns the number of threads running in the process, or 0 to indicate that // we cannot detect it. size_t GetThreadCount() { const int fd = open("/proc/self/as", O_RDONLY); if (fd < 0) { return 0; } procfs_info process_info; const int status = devctl(fd, DCMD_PROC_INFO, &process_info, sizeof(process_info), NULL); close(fd); if (status == EOK) { return static_cast(process_info.num_threads); } else { return 0; } } #elif GTEST_OS_AIX size_t GetThreadCount() { struct procentry64 entry; pid_t pid = getpid(); int status = getprocs64(&entry, sizeof(entry), NULL, 0, &pid, 1); if (status == 1) { return entry.pi_thcount; } else { return 0; } } #elif GTEST_OS_FUCHSIA size_t GetThreadCount() { int dummy_buffer; size_t avail; zx_status_t status = zx_object_get_info( zx_process_self(), ZX_INFO_PROCESS_THREADS, &dummy_buffer, 0, nullptr, &avail); if (status == ZX_OK) { return avail; } else { return 0; } } #else size_t GetThreadCount() { // There's no portable way to detect the number of threads, so we just // return 0 to indicate that we cannot detect it. return 0; } #endif // GTEST_OS_LINUX #if GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS void SleepMilliseconds(int n) { ::Sleep(n); } AutoHandle::AutoHandle() : handle_(INVALID_HANDLE_VALUE) {} AutoHandle::AutoHandle(Handle handle) : handle_(handle) {} AutoHandle::~AutoHandle() { Reset(); } AutoHandle::Handle AutoHandle::Get() const { return handle_; } void AutoHandle::Reset() { Reset(INVALID_HANDLE_VALUE); } void AutoHandle::Reset(HANDLE handle) { // Resetting with the same handle we already own is invalid. if (handle_ != handle) { if (IsCloseable()) { ::CloseHandle(handle_); } handle_ = handle; } else { GTEST_CHECK_(!IsCloseable()) << "Resetting a valid handle to itself is likely a programmer error " "and thus not allowed."; } } bool AutoHandle::IsCloseable() const { // Different Windows APIs may use either of these values to represent an // invalid handle. return handle_ != NULL && handle_ != INVALID_HANDLE_VALUE; } Notification::Notification() : event_(::CreateEvent(NULL, // Default security attributes. TRUE, // Do not reset automatically. FALSE, // Initially unset. NULL)) { // Anonymous event. GTEST_CHECK_(event_.Get() != NULL); } void Notification::Notify() { GTEST_CHECK_(::SetEvent(event_.Get()) != FALSE); } void Notification::WaitForNotification() { GTEST_CHECK_( ::WaitForSingleObject(event_.Get(), INFINITE) == WAIT_OBJECT_0); } Mutex::Mutex() : owner_thread_id_(0), type_(kDynamic), critical_section_init_phase_(0), critical_section_(new CRITICAL_SECTION) { ::InitializeCriticalSection(critical_section_); } Mutex::~Mutex() { // Static mutexes are leaked intentionally. It is not thread-safe to try // to clean them up. // FIXME: Switch to Slim Reader/Writer (SRW) Locks, which requires // nothing to clean it up but is available only on Vista and later. // https://docs.microsoft.com/en-us/windows/desktop/Sync/slim-reader-writer--srw--locks if (type_ == kDynamic) { ::DeleteCriticalSection(critical_section_); delete critical_section_; critical_section_ = NULL; } } void Mutex::Lock() { ThreadSafeLazyInit(); ::EnterCriticalSection(critical_section_); owner_thread_id_ = ::GetCurrentThreadId(); } void Mutex::Unlock() { ThreadSafeLazyInit(); // We don't protect writing to owner_thread_id_ here, as it's the // caller's responsibility to ensure that the current thread holds the // mutex when this is called. owner_thread_id_ = 0; ::LeaveCriticalSection(critical_section_); } // Does nothing if the current thread holds the mutex. Otherwise, crashes // with high probability. void Mutex::AssertHeld() { ThreadSafeLazyInit(); GTEST_CHECK_(owner_thread_id_ == ::GetCurrentThreadId()) << "The current thread is not holding the mutex @" << this; } namespace { // Use the RAII idiom to flag mem allocs that are intentionally never // deallocated. The motivation is to silence the false positive mem leaks // that are reported by the debug version of MS's CRT which can only detect // if an alloc is missing a matching deallocation. // Example: // MemoryIsNotDeallocated memory_is_not_deallocated; // critical_section_ = new CRITICAL_SECTION; // class MemoryIsNotDeallocated { public: MemoryIsNotDeallocated() : old_crtdbg_flag_(0) { #ifdef _MSC_VER old_crtdbg_flag_ = _CrtSetDbgFlag(_CRTDBG_REPORT_FLAG); // Set heap allocation block type to _IGNORE_BLOCK so that MS debug CRT // doesn't report mem leak if there's no matching deallocation. _CrtSetDbgFlag(old_crtdbg_flag_ & ~_CRTDBG_ALLOC_MEM_DF); #endif // _MSC_VER } ~MemoryIsNotDeallocated() { #ifdef _MSC_VER // Restore the original _CRTDBG_ALLOC_MEM_DF flag _CrtSetDbgFlag(old_crtdbg_flag_); #endif // _MSC_VER } private: int old_crtdbg_flag_; GTEST_DISALLOW_COPY_AND_ASSIGN_(MemoryIsNotDeallocated); }; } // namespace // Initializes owner_thread_id_ and critical_section_ in static mutexes. void Mutex::ThreadSafeLazyInit() { // Dynamic mutexes are initialized in the constructor. if (type_ == kStatic) { switch ( ::InterlockedCompareExchange(&critical_section_init_phase_, 1L, 0L)) { case 0: // If critical_section_init_phase_ was 0 before the exchange, we // are the first to test it and need to perform the initialization. owner_thread_id_ = 0; { // Use RAII to flag that following mem alloc is never deallocated. MemoryIsNotDeallocated memory_is_not_deallocated; critical_section_ = new CRITICAL_SECTION; } ::InitializeCriticalSection(critical_section_); // Updates the critical_section_init_phase_ to 2 to signal // initialization complete. GTEST_CHECK_(::InterlockedCompareExchange( &critical_section_init_phase_, 2L, 1L) == 1L); break; case 1: // Somebody else is already initializing the mutex; spin until they // are done. while (::InterlockedCompareExchange(&critical_section_init_phase_, 2L, 2L) != 2L) { // Possibly yields the rest of the thread's time slice to other // threads. ::Sleep(0); } break; case 2: break; // The mutex is already initialized and ready for use. default: GTEST_CHECK_(false) << "Unexpected value of critical_section_init_phase_ " << "while initializing a static mutex."; } } } namespace { class ThreadWithParamSupport : public ThreadWithParamBase { public: static HANDLE CreateThread(Runnable* runnable, Notification* thread_can_start) { ThreadMainParam* param = new ThreadMainParam(runnable, thread_can_start); DWORD thread_id; // FIXME: Consider to use _beginthreadex instead. HANDLE thread_handle = ::CreateThread( NULL, // Default security. 0, // Default stack size. &ThreadWithParamSupport::ThreadMain, param, // Parameter to ThreadMainStatic 0x0, // Default creation flags. &thread_id); // Need a valid pointer for the call to work under Win98. GTEST_CHECK_(thread_handle != NULL) << "CreateThread failed with error " << ::GetLastError() << "."; if (thread_handle == NULL) { delete param; } return thread_handle; } private: struct ThreadMainParam { ThreadMainParam(Runnable* runnable, Notification* thread_can_start) : runnable_(runnable), thread_can_start_(thread_can_start) { } scoped_ptr runnable_; // Does not own. Notification* thread_can_start_; }; static DWORD WINAPI ThreadMain(void* ptr) { // Transfers ownership. scoped_ptr param(static_cast(ptr)); if (param->thread_can_start_ != NULL) param->thread_can_start_->WaitForNotification(); param->runnable_->Run(); return 0; } // Prohibit instantiation. ThreadWithParamSupport(); GTEST_DISALLOW_COPY_AND_ASSIGN_(ThreadWithParamSupport); }; } // namespace ThreadWithParamBase::ThreadWithParamBase(Runnable *runnable, Notification* thread_can_start) : thread_(ThreadWithParamSupport::CreateThread(runnable, thread_can_start)) { } ThreadWithParamBase::~ThreadWithParamBase() { Join(); } void ThreadWithParamBase::Join() { GTEST_CHECK_(::WaitForSingleObject(thread_.Get(), INFINITE) == WAIT_OBJECT_0) << "Failed to join the thread with error " << ::GetLastError() << "."; } // Maps a thread to a set of ThreadIdToThreadLocals that have values // instantiated on that thread and notifies them when the thread exits. A // ThreadLocal instance is expected to persist until all threads it has // values on have terminated. class ThreadLocalRegistryImpl { public: // Registers thread_local_instance as having value on the current thread. // Returns a value that can be used to identify the thread from other threads. static ThreadLocalValueHolderBase* GetValueOnCurrentThread( const ThreadLocalBase* thread_local_instance) { DWORD current_thread = ::GetCurrentThreadId(); MutexLock lock(&mutex_); ThreadIdToThreadLocals* const thread_to_thread_locals = GetThreadLocalsMapLocked(); ThreadIdToThreadLocals::iterator thread_local_pos = thread_to_thread_locals->find(current_thread); if (thread_local_pos == thread_to_thread_locals->end()) { thread_local_pos = thread_to_thread_locals->insert( std::make_pair(current_thread, ThreadLocalValues())).first; StartWatcherThreadFor(current_thread); } ThreadLocalValues& thread_local_values = thread_local_pos->second; ThreadLocalValues::iterator value_pos = thread_local_values.find(thread_local_instance); if (value_pos == thread_local_values.end()) { value_pos = thread_local_values .insert(std::make_pair( thread_local_instance, linked_ptr( thread_local_instance->NewValueForCurrentThread()))) .first; } return value_pos->second.get(); } static void OnThreadLocalDestroyed( const ThreadLocalBase* thread_local_instance) { std::vector > value_holders; // Clean up the ThreadLocalValues data structure while holding the lock, but // defer the destruction of the ThreadLocalValueHolderBases. { MutexLock lock(&mutex_); ThreadIdToThreadLocals* const thread_to_thread_locals = GetThreadLocalsMapLocked(); for (ThreadIdToThreadLocals::iterator it = thread_to_thread_locals->begin(); it != thread_to_thread_locals->end(); ++it) { ThreadLocalValues& thread_local_values = it->second; ThreadLocalValues::iterator value_pos = thread_local_values.find(thread_local_instance); if (value_pos != thread_local_values.end()) { value_holders.push_back(value_pos->second); thread_local_values.erase(value_pos); // This 'if' can only be successful at most once, so theoretically we // could break out of the loop here, but we don't bother doing so. } } } // Outside the lock, let the destructor for 'value_holders' deallocate the // ThreadLocalValueHolderBases. } static void OnThreadExit(DWORD thread_id) { GTEST_CHECK_(thread_id != 0) << ::GetLastError(); std::vector > value_holders; // Clean up the ThreadIdToThreadLocals data structure while holding the // lock, but defer the destruction of the ThreadLocalValueHolderBases. { MutexLock lock(&mutex_); ThreadIdToThreadLocals* const thread_to_thread_locals = GetThreadLocalsMapLocked(); ThreadIdToThreadLocals::iterator thread_local_pos = thread_to_thread_locals->find(thread_id); if (thread_local_pos != thread_to_thread_locals->end()) { ThreadLocalValues& thread_local_values = thread_local_pos->second; for (ThreadLocalValues::iterator value_pos = thread_local_values.begin(); value_pos != thread_local_values.end(); ++value_pos) { value_holders.push_back(value_pos->second); } thread_to_thread_locals->erase(thread_local_pos); } } // Outside the lock, let the destructor for 'value_holders' deallocate the // ThreadLocalValueHolderBases. } private: // In a particular thread, maps a ThreadLocal object to its value. typedef std::map > ThreadLocalValues; // Stores all ThreadIdToThreadLocals having values in a thread, indexed by // thread's ID. typedef std::map ThreadIdToThreadLocals; // Holds the thread id and thread handle that we pass from // StartWatcherThreadFor to WatcherThreadFunc. typedef std::pair ThreadIdAndHandle; static void StartWatcherThreadFor(DWORD thread_id) { // The returned handle will be kept in thread_map and closed by // watcher_thread in WatcherThreadFunc. HANDLE thread = ::OpenThread(SYNCHRONIZE | THREAD_QUERY_INFORMATION, FALSE, thread_id); GTEST_CHECK_(thread != NULL); // We need to pass a valid thread ID pointer into CreateThread for it // to work correctly under Win98. DWORD watcher_thread_id; HANDLE watcher_thread = ::CreateThread( NULL, // Default security. 0, // Default stack size &ThreadLocalRegistryImpl::WatcherThreadFunc, reinterpret_cast(new ThreadIdAndHandle(thread_id, thread)), CREATE_SUSPENDED, &watcher_thread_id); GTEST_CHECK_(watcher_thread != NULL); // Give the watcher thread the same priority as ours to avoid being // blocked by it. ::SetThreadPriority(watcher_thread, ::GetThreadPriority(::GetCurrentThread())); ::ResumeThread(watcher_thread); ::CloseHandle(watcher_thread); } // Monitors exit from a given thread and notifies those // ThreadIdToThreadLocals about thread termination. static DWORD WINAPI WatcherThreadFunc(LPVOID param) { const ThreadIdAndHandle* tah = reinterpret_cast(param); GTEST_CHECK_( ::WaitForSingleObject(tah->second, INFINITE) == WAIT_OBJECT_0); OnThreadExit(tah->first); ::CloseHandle(tah->second); delete tah; return 0; } // Returns map of thread local instances. static ThreadIdToThreadLocals* GetThreadLocalsMapLocked() { mutex_.AssertHeld(); MemoryIsNotDeallocated memory_is_not_deallocated; static ThreadIdToThreadLocals* map = new ThreadIdToThreadLocals(); return map; } // Protects access to GetThreadLocalsMapLocked() and its return value. static Mutex mutex_; // Protects access to GetThreadMapLocked() and its return value. static Mutex thread_map_mutex_; }; Mutex ThreadLocalRegistryImpl::mutex_(Mutex::kStaticMutex); Mutex ThreadLocalRegistryImpl::thread_map_mutex_(Mutex::kStaticMutex); ThreadLocalValueHolderBase* ThreadLocalRegistry::GetValueOnCurrentThread( const ThreadLocalBase* thread_local_instance) { return ThreadLocalRegistryImpl::GetValueOnCurrentThread( thread_local_instance); } void ThreadLocalRegistry::OnThreadLocalDestroyed( const ThreadLocalBase* thread_local_instance) { ThreadLocalRegistryImpl::OnThreadLocalDestroyed(thread_local_instance); } #endif // GTEST_IS_THREADSAFE && GTEST_OS_WINDOWS #if GTEST_USES_POSIX_RE // Implements RE. Currently only needed for death tests. RE::~RE() { if (is_valid_) { // regfree'ing an invalid regex might crash because the content // of the regex is undefined. Since the regex's are essentially // the same, one cannot be valid (or invalid) without the other // being so too. regfree(&partial_regex_); regfree(&full_regex_); } free(const_cast(pattern_)); } // Returns true iff regular expression re matches the entire str. bool RE::FullMatch(const char* str, const RE& re) { if (!re.is_valid_) return false; regmatch_t match; return regexec(&re.full_regex_, str, 1, &match, 0) == 0; } // Returns true iff regular expression re matches a substring of str // (including str itself). bool RE::PartialMatch(const char* str, const RE& re) { if (!re.is_valid_) return false; regmatch_t match; return regexec(&re.partial_regex_, str, 1, &match, 0) == 0; } // Initializes an RE from its string representation. void RE::Init(const char* regex) { pattern_ = posix::StrDup(regex); // Reserves enough bytes to hold the regular expression used for a // full match. const size_t full_regex_len = strlen(regex) + 10; char* const full_pattern = new char[full_regex_len]; snprintf(full_pattern, full_regex_len, "^(%s)$", regex); is_valid_ = regcomp(&full_regex_, full_pattern, REG_EXTENDED) == 0; // We want to call regcomp(&partial_regex_, ...) even if the // previous expression returns false. Otherwise partial_regex_ may // not be properly initialized can may cause trouble when it's // freed. // // Some implementation of POSIX regex (e.g. on at least some // versions of Cygwin) doesn't accept the empty string as a valid // regex. We change it to an equivalent form "()" to be safe. if (is_valid_) { const char* const partial_regex = (*regex == '\0') ? "()" : regex; is_valid_ = regcomp(&partial_regex_, partial_regex, REG_EXTENDED) == 0; } EXPECT_TRUE(is_valid_) << "Regular expression \"" << regex << "\" is not a valid POSIX Extended regular expression."; delete[] full_pattern; } #elif GTEST_USES_SIMPLE_RE // Returns true iff ch appears anywhere in str (excluding the // terminating '\0' character). bool IsInSet(char ch, const char* str) { return ch != '\0' && strchr(str, ch) != NULL; } // Returns true iff ch belongs to the given classification. Unlike // similar functions in , these aren't affected by the // current locale. bool IsAsciiDigit(char ch) { return '0' <= ch && ch <= '9'; } bool IsAsciiPunct(char ch) { return IsInSet(ch, "^-!\"#$%&'()*+,./:;<=>?@[\\]_`{|}~"); } bool IsRepeat(char ch) { return IsInSet(ch, "?*+"); } bool IsAsciiWhiteSpace(char ch) { return IsInSet(ch, " \f\n\r\t\v"); } bool IsAsciiWordChar(char ch) { return ('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z') || ('0' <= ch && ch <= '9') || ch == '_'; } // Returns true iff "\\c" is a supported escape sequence. bool IsValidEscape(char c) { return (IsAsciiPunct(c) || IsInSet(c, "dDfnrsStvwW")); } // Returns true iff the given atom (specified by escaped and pattern) // matches ch. The result is undefined if the atom is invalid. bool AtomMatchesChar(bool escaped, char pattern_char, char ch) { if (escaped) { // "\\p" where p is pattern_char. switch (pattern_char) { case 'd': return IsAsciiDigit(ch); case 'D': return !IsAsciiDigit(ch); case 'f': return ch == '\f'; case 'n': return ch == '\n'; case 'r': return ch == '\r'; case 's': return IsAsciiWhiteSpace(ch); case 'S': return !IsAsciiWhiteSpace(ch); case 't': return ch == '\t'; case 'v': return ch == '\v'; case 'w': return IsAsciiWordChar(ch); case 'W': return !IsAsciiWordChar(ch); } return IsAsciiPunct(pattern_char) && pattern_char == ch; } return (pattern_char == '.' && ch != '\n') || pattern_char == ch; } // Helper function used by ValidateRegex() to format error messages. static std::string FormatRegexSyntaxError(const char* regex, int index) { return (Message() << "Syntax error at index " << index << " in simple regular expression \"" << regex << "\": ").GetString(); } // Generates non-fatal failures and returns false if regex is invalid; // otherwise returns true. bool ValidateRegex(const char* regex) { if (regex == NULL) { // FIXME: fix the source file location in the // assertion failures to match where the regex is used in user // code. ADD_FAILURE() << "NULL is not a valid simple regular expression."; return false; } bool is_valid = true; // True iff ?, *, or + can follow the previous atom. bool prev_repeatable = false; for (int i = 0; regex[i]; i++) { if (regex[i] == '\\') { // An escape sequence i++; if (regex[i] == '\0') { ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) << "'\\' cannot appear at the end."; return false; } if (!IsValidEscape(regex[i])) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i - 1) << "invalid escape sequence \"\\" << regex[i] << "\"."; is_valid = false; } prev_repeatable = true; } else { // Not an escape sequence. const char ch = regex[i]; if (ch == '^' && i > 0) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'^' can only appear at the beginning."; is_valid = false; } else if (ch == '$' && regex[i + 1] != '\0') { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'$' can only appear at the end."; is_valid = false; } else if (IsInSet(ch, "()[]{}|")) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch << "' is unsupported."; is_valid = false; } else if (IsRepeat(ch) && !prev_repeatable) { ADD_FAILURE() << FormatRegexSyntaxError(regex, i) << "'" << ch << "' can only follow a repeatable token."; is_valid = false; } prev_repeatable = !IsInSet(ch, "^$?*+"); } } return is_valid; } // Matches a repeated regex atom followed by a valid simple regular // expression. The regex atom is defined as c if escaped is false, // or \c otherwise. repeat is the repetition meta character (?, *, // or +). The behavior is undefined if str contains too many // characters to be indexable by size_t, in which case the test will // probably time out anyway. We are fine with this limitation as // std::string has it too. bool MatchRepetitionAndRegexAtHead( bool escaped, char c, char repeat, const char* regex, const char* str) { const size_t min_count = (repeat == '+') ? 1 : 0; const size_t max_count = (repeat == '?') ? 1 : static_cast(-1) - 1; // We cannot call numeric_limits::max() as it conflicts with the // max() macro on Windows. for (size_t i = 0; i <= max_count; ++i) { // We know that the atom matches each of the first i characters in str. if (i >= min_count && MatchRegexAtHead(regex, str + i)) { // We have enough matches at the head, and the tail matches too. // Since we only care about *whether* the pattern matches str // (as opposed to *how* it matches), there is no need to find a // greedy match. return true; } if (str[i] == '\0' || !AtomMatchesChar(escaped, c, str[i])) return false; } return false; } // Returns true iff regex matches a prefix of str. regex must be a // valid simple regular expression and not start with "^", or the // result is undefined. bool MatchRegexAtHead(const char* regex, const char* str) { if (*regex == '\0') // An empty regex matches a prefix of anything. return true; // "$" only matches the end of a string. Note that regex being // valid guarantees that there's nothing after "$" in it. if (*regex == '$') return *str == '\0'; // Is the first thing in regex an escape sequence? const bool escaped = *regex == '\\'; if (escaped) ++regex; if (IsRepeat(regex[1])) { // MatchRepetitionAndRegexAtHead() calls MatchRegexAtHead(), so // here's an indirect recursion. It terminates as the regex gets // shorter in each recursion. return MatchRepetitionAndRegexAtHead( escaped, regex[0], regex[1], regex + 2, str); } else { // regex isn't empty, isn't "$", and doesn't start with a // repetition. We match the first atom of regex with the first // character of str and recurse. return (*str != '\0') && AtomMatchesChar(escaped, *regex, *str) && MatchRegexAtHead(regex + 1, str + 1); } } // Returns true iff regex matches any substring of str. regex must be // a valid simple regular expression, or the result is undefined. // // The algorithm is recursive, but the recursion depth doesn't exceed // the regex length, so we won't need to worry about running out of // stack space normally. In rare cases the time complexity can be // exponential with respect to the regex length + the string length, // but usually it's must faster (often close to linear). bool MatchRegexAnywhere(const char* regex, const char* str) { if (regex == NULL || str == NULL) return false; if (*regex == '^') return MatchRegexAtHead(regex + 1, str); // A successful match can be anywhere in str. do { if (MatchRegexAtHead(regex, str)) return true; } while (*str++ != '\0'); return false; } // Implements the RE class. RE::~RE() { free(const_cast(pattern_)); free(const_cast(full_pattern_)); } // Returns true iff regular expression re matches the entire str. bool RE::FullMatch(const char* str, const RE& re) { return re.is_valid_ && MatchRegexAnywhere(re.full_pattern_, str); } // Returns true iff regular expression re matches a substring of str // (including str itself). bool RE::PartialMatch(const char* str, const RE& re) { return re.is_valid_ && MatchRegexAnywhere(re.pattern_, str); } // Initializes an RE from its string representation. void RE::Init(const char* regex) { pattern_ = full_pattern_ = NULL; if (regex != NULL) { pattern_ = posix::StrDup(regex); } is_valid_ = ValidateRegex(regex); if (!is_valid_) { // No need to calculate the full pattern when the regex is invalid. return; } const size_t len = strlen(regex); // Reserves enough bytes to hold the regular expression used for a // full match: we need space to prepend a '^', append a '$', and // terminate the string with '\0'. char* buffer = static_cast(malloc(len + 3)); full_pattern_ = buffer; if (*regex != '^') *buffer++ = '^'; // Makes sure full_pattern_ starts with '^'. // We don't use snprintf or strncpy, as they trigger a warning when // compiled with VC++ 8.0. memcpy(buffer, regex, len); buffer += len; if (len == 0 || regex[len - 1] != '$') *buffer++ = '$'; // Makes sure full_pattern_ ends with '$'. *buffer = '\0'; } #endif // GTEST_USES_POSIX_RE const char kUnknownFile[] = "unknown file"; // Formats a source file path and a line number as they would appear // in an error message from the compiler used to compile this code. GTEST_API_ ::std::string FormatFileLocation(const char* file, int line) { const std::string file_name(file == NULL ? kUnknownFile : file); if (line < 0) { return file_name + ":"; } #ifdef _MSC_VER return file_name + "(" + StreamableToString(line) + "):"; #else return file_name + ":" + StreamableToString(line) + ":"; #endif // _MSC_VER } // Formats a file location for compiler-independent XML output. // Although this function is not platform dependent, we put it next to // FormatFileLocation in order to contrast the two functions. // Note that FormatCompilerIndependentFileLocation() does NOT append colon // to the file location it produces, unlike FormatFileLocation(). GTEST_API_ ::std::string FormatCompilerIndependentFileLocation( const char* file, int line) { const std::string file_name(file == NULL ? kUnknownFile : file); if (line < 0) return file_name; else return file_name + ":" + StreamableToString(line); } GTestLog::GTestLog(GTestLogSeverity severity, const char* file, int line) : severity_(severity) { const char* const marker = severity == GTEST_INFO ? "[ INFO ]" : severity == GTEST_WARNING ? "[WARNING]" : severity == GTEST_ERROR ? "[ ERROR ]" : "[ FATAL ]"; GetStream() << ::std::endl << marker << " " << FormatFileLocation(file, line).c_str() << ": "; } // Flushes the buffers and, if severity is GTEST_FATAL, aborts the program. GTestLog::~GTestLog() { GetStream() << ::std::endl; if (severity_ == GTEST_FATAL) { fflush(stderr); posix::Abort(); } } // Disable Microsoft deprecation warnings for POSIX functions called from // this class (creat, dup, dup2, and close) GTEST_DISABLE_MSC_DEPRECATED_PUSH_() #if GTEST_HAS_STREAM_REDIRECTION // Object that captures an output stream (stdout/stderr). class CapturedStream { public: // The ctor redirects the stream to a temporary file. explicit CapturedStream(int fd) : fd_(fd), uncaptured_fd_(dup(fd)) { # if GTEST_OS_WINDOWS char temp_dir_path[MAX_PATH + 1] = { '\0' }; // NOLINT char temp_file_path[MAX_PATH + 1] = { '\0' }; // NOLINT ::GetTempPathA(sizeof(temp_dir_path), temp_dir_path); const UINT success = ::GetTempFileNameA(temp_dir_path, "gtest_redir", 0, // Generate unique file name. temp_file_path); GTEST_CHECK_(success != 0) << "Unable to create a temporary file in " << temp_dir_path; const int captured_fd = creat(temp_file_path, _S_IREAD | _S_IWRITE); GTEST_CHECK_(captured_fd != -1) << "Unable to open temporary file " << temp_file_path; filename_ = temp_file_path; # else // There's no guarantee that a test has write access to the current // directory, so we create the temporary file in the /tmp directory // instead. We use /tmp on most systems, and /sdcard on Android. // That's because Android doesn't have /tmp. # if GTEST_OS_LINUX_ANDROID // Note: Android applications are expected to call the framework's // Context.getExternalStorageDirectory() method through JNI to get // the location of the world-writable SD Card directory. However, // this requires a Context handle, which cannot be retrieved // globally from native code. Doing so also precludes running the // code as part of a regular standalone executable, which doesn't // run in a Dalvik process (e.g. when running it through 'adb shell'). // // The location /sdcard is directly accessible from native code // and is the only location (unofficially) supported by the Android // team. It's generally a symlink to the real SD Card mount point // which can be /mnt/sdcard, /mnt/sdcard0, /system/media/sdcard, or // other OEM-customized locations. Never rely on these, and always // use /sdcard. char name_template[] = "/sdcard/gtest_captured_stream.XXXXXX"; # else char name_template[] = "/tmp/captured_stream.XXXXXX"; # endif // GTEST_OS_LINUX_ANDROID const int captured_fd = mkstemp(name_template); filename_ = name_template; # endif // GTEST_OS_WINDOWS fflush(NULL); dup2(captured_fd, fd_); close(captured_fd); } ~CapturedStream() { remove(filename_.c_str()); } std::string GetCapturedString() { if (uncaptured_fd_ != -1) { // Restores the original stream. fflush(NULL); dup2(uncaptured_fd_, fd_); close(uncaptured_fd_); uncaptured_fd_ = -1; } FILE* const file = posix::FOpen(filename_.c_str(), "r"); const std::string content = ReadEntireFile(file); posix::FClose(file); return content; } private: const int fd_; // A stream to capture. int uncaptured_fd_; // Name of the temporary file holding the stderr output. ::std::string filename_; GTEST_DISALLOW_COPY_AND_ASSIGN_(CapturedStream); }; GTEST_DISABLE_MSC_DEPRECATED_POP_() static CapturedStream* g_captured_stderr = NULL; static CapturedStream* g_captured_stdout = NULL; // Starts capturing an output stream (stdout/stderr). static void CaptureStream(int fd, const char* stream_name, CapturedStream** stream) { if (*stream != NULL) { GTEST_LOG_(FATAL) << "Only one " << stream_name << " capturer can exist at a time."; } *stream = new CapturedStream(fd); } // Stops capturing the output stream and returns the captured string. static std::string GetCapturedStream(CapturedStream** captured_stream) { const std::string content = (*captured_stream)->GetCapturedString(); delete *captured_stream; *captured_stream = NULL; return content; } // Starts capturing stdout. void CaptureStdout() { CaptureStream(kStdOutFileno, "stdout", &g_captured_stdout); } // Starts capturing stderr. void CaptureStderr() { CaptureStream(kStdErrFileno, "stderr", &g_captured_stderr); } // Stops capturing stdout and returns the captured string. std::string GetCapturedStdout() { return GetCapturedStream(&g_captured_stdout); } // Stops capturing stderr and returns the captured string. std::string GetCapturedStderr() { return GetCapturedStream(&g_captured_stderr); } #endif // GTEST_HAS_STREAM_REDIRECTION size_t GetFileSize(FILE* file) { fseek(file, 0, SEEK_END); return static_cast(ftell(file)); } std::string ReadEntireFile(FILE* file) { const size_t file_size = GetFileSize(file); char* const buffer = new char[file_size]; size_t bytes_last_read = 0; // # of bytes read in the last fread() size_t bytes_read = 0; // # of bytes read so far fseek(file, 0, SEEK_SET); // Keeps reading the file until we cannot read further or the // pre-determined file size is reached. do { bytes_last_read = fread(buffer+bytes_read, 1, file_size-bytes_read, file); bytes_read += bytes_last_read; } while (bytes_last_read > 0 && bytes_read < file_size); const std::string content(buffer, bytes_read); delete[] buffer; return content; } #if GTEST_HAS_DEATH_TEST static const std::vector* g_injected_test_argvs = NULL; // Owned. std::vector GetInjectableArgvs() { if (g_injected_test_argvs != NULL) { return *g_injected_test_argvs; } return GetArgvs(); } void SetInjectableArgvs(const std::vector* new_argvs) { if (g_injected_test_argvs != new_argvs) delete g_injected_test_argvs; g_injected_test_argvs = new_argvs; } void SetInjectableArgvs(const std::vector& new_argvs) { SetInjectableArgvs( new std::vector(new_argvs.begin(), new_argvs.end())); } #if GTEST_HAS_GLOBAL_STRING void SetInjectableArgvs(const std::vector< ::string>& new_argvs) { SetInjectableArgvs( new std::vector(new_argvs.begin(), new_argvs.end())); } #endif // GTEST_HAS_GLOBAL_STRING void ClearInjectableArgvs() { delete g_injected_test_argvs; g_injected_test_argvs = NULL; } #endif // GTEST_HAS_DEATH_TEST #if GTEST_OS_WINDOWS_MOBILE namespace posix { void Abort() { DebugBreak(); TerminateProcess(GetCurrentProcess(), 1); } } // namespace posix #endif // GTEST_OS_WINDOWS_MOBILE // Returns the name of the environment variable corresponding to the // given flag. For example, FlagToEnvVar("foo") will return // "GTEST_FOO" in the open-source version. static std::string FlagToEnvVar(const char* flag) { const std::string full_flag = (Message() << GTEST_FLAG_PREFIX_ << flag).GetString(); Message env_var; for (size_t i = 0; i != full_flag.length(); i++) { env_var << ToUpper(full_flag.c_str()[i]); } return env_var.GetString(); } // Parses 'str' for a 32-bit signed integer. If successful, writes // the result to *value and returns true; otherwise leaves *value // unchanged and returns false. bool ParseInt32(const Message& src_text, const char* str, Int32* value) { // Parses the environment variable as a decimal integer. char* end = NULL; const long long_value = strtol(str, &end, 10); // NOLINT // Has strtol() consumed all characters in the string? if (*end != '\0') { // No - an invalid character was encountered. Message msg; msg << "WARNING: " << src_text << " is expected to be a 32-bit integer, but actually" << " has value \"" << str << "\".\n"; printf("%s", msg.GetString().c_str()); fflush(stdout); return false; } // Is the parsed value in the range of an Int32? const Int32 result = static_cast(long_value); if (long_value == LONG_MAX || long_value == LONG_MIN || // The parsed value overflows as a long. (strtol() returns // LONG_MAX or LONG_MIN when the input overflows.) result != long_value // The parsed value overflows as an Int32. ) { Message msg; msg << "WARNING: " << src_text << " is expected to be a 32-bit integer, but actually" << " has value " << str << ", which overflows.\n"; printf("%s", msg.GetString().c_str()); fflush(stdout); return false; } *value = result; return true; } // Reads and returns the Boolean environment variable corresponding to // the given flag; if it's not set, returns default_value. // // The value is considered true iff it's not "0". bool BoolFromGTestEnv(const char* flag, bool default_value) { #if defined(GTEST_GET_BOOL_FROM_ENV_) return GTEST_GET_BOOL_FROM_ENV_(flag, default_value); #else const std::string env_var = FlagToEnvVar(flag); const char* const string_value = posix::GetEnv(env_var.c_str()); return string_value == NULL ? default_value : strcmp(string_value, "0") != 0; #endif // defined(GTEST_GET_BOOL_FROM_ENV_) } // Reads and returns a 32-bit integer stored in the environment // variable corresponding to the given flag; if it isn't set or // doesn't represent a valid 32-bit integer, returns default_value. Int32 Int32FromGTestEnv(const char* flag, Int32 default_value) { #if defined(GTEST_GET_INT32_FROM_ENV_) return GTEST_GET_INT32_FROM_ENV_(flag, default_value); #else const std::string env_var = FlagToEnvVar(flag); const char* const string_value = posix::GetEnv(env_var.c_str()); if (string_value == NULL) { // The environment variable is not set. return default_value; } Int32 result = default_value; if (!ParseInt32(Message() << "Environment variable " << env_var, string_value, &result)) { printf("The default value %s is used.\n", (Message() << default_value).GetString().c_str()); fflush(stdout); return default_value; } return result; #endif // defined(GTEST_GET_INT32_FROM_ENV_) } // As a special case for the 'output' flag, if GTEST_OUTPUT is not // set, we look for XML_OUTPUT_FILE, which is set by the Bazel build // system. The value of XML_OUTPUT_FILE is a filename without the // "xml:" prefix of GTEST_OUTPUT. // Note that this is meant to be called at the call site so it does // not check that the flag is 'output' // In essence this checks an env variable called XML_OUTPUT_FILE // and if it is set we prepend "xml:" to its value, if it not set we return "" std::string OutputFlagAlsoCheckEnvVar(){ std::string default_value_for_output_flag = ""; const char* xml_output_file_env = posix::GetEnv("XML_OUTPUT_FILE"); if (NULL != xml_output_file_env) { default_value_for_output_flag = std::string("xml:") + xml_output_file_env; } return default_value_for_output_flag; } // Reads and returns the string environment variable corresponding to // the given flag; if it's not set, returns default_value. const char* StringFromGTestEnv(const char* flag, const char* default_value) { #if defined(GTEST_GET_STRING_FROM_ENV_) return GTEST_GET_STRING_FROM_ENV_(flag, default_value); #else const std::string env_var = FlagToEnvVar(flag); const char* const value = posix::GetEnv(env_var.c_str()); return value == NULL ? default_value : value; #endif // defined(GTEST_GET_STRING_FROM_ENV_) } } // namespace internal } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest-printers.cc000066400000000000000000000354611357355204000244270ustar00rootroot00000000000000// Copyright 2007, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // Google Test - The Google C++ Testing and Mocking Framework // // This file implements a universal value printer that can print a // value of any type T: // // void ::testing::internal::UniversalPrinter::Print(value, ostream_ptr); // // It uses the << operator when possible, and prints the bytes in the // object otherwise. A user can override its behavior for a class // type Foo by defining either operator<<(::std::ostream&, const Foo&) // or void PrintTo(const Foo&, ::std::ostream*) in the namespace that // defines Foo. #include "gtest/gtest-printers.h" #include #include #include #include // NOLINT #include #include "gtest/internal/gtest-port.h" #include "src/gtest-internal-inl.h" namespace testing { namespace { using ::std::ostream; // Prints a segment of bytes in the given object. GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ void PrintByteSegmentInObjectTo(const unsigned char* obj_bytes, size_t start, size_t count, ostream* os) { char text[5] = ""; for (size_t i = 0; i != count; i++) { const size_t j = start + i; if (i != 0) { // Organizes the bytes into groups of 2 for easy parsing by // human. if ((j % 2) == 0) *os << ' '; else *os << '-'; } GTEST_SNPRINTF_(text, sizeof(text), "%02X", obj_bytes[j]); *os << text; } } // Prints the bytes in the given value to the given ostream. void PrintBytesInObjectToImpl(const unsigned char* obj_bytes, size_t count, ostream* os) { // Tells the user how big the object is. *os << count << "-byte object <"; const size_t kThreshold = 132; const size_t kChunkSize = 64; // If the object size is bigger than kThreshold, we'll have to omit // some details by printing only the first and the last kChunkSize // bytes. // FIXME: let the user control the threshold using a flag. if (count < kThreshold) { PrintByteSegmentInObjectTo(obj_bytes, 0, count, os); } else { PrintByteSegmentInObjectTo(obj_bytes, 0, kChunkSize, os); *os << " ... "; // Rounds up to 2-byte boundary. const size_t resume_pos = (count - kChunkSize + 1)/2*2; PrintByteSegmentInObjectTo(obj_bytes, resume_pos, count - resume_pos, os); } *os << ">"; } } // namespace namespace internal2 { // Delegates to PrintBytesInObjectToImpl() to print the bytes in the // given object. The delegation simplifies the implementation, which // uses the << operator and thus is easier done outside of the // ::testing::internal namespace, which contains a << operator that // sometimes conflicts with the one in STL. void PrintBytesInObjectTo(const unsigned char* obj_bytes, size_t count, ostream* os) { PrintBytesInObjectToImpl(obj_bytes, count, os); } } // namespace internal2 namespace internal { // Depending on the value of a char (or wchar_t), we print it in one // of three formats: // - as is if it's a printable ASCII (e.g. 'a', '2', ' '), // - as a hexadecimal escape sequence (e.g. '\x7F'), or // - as a special escape sequence (e.g. '\r', '\n'). enum CharFormat { kAsIs, kHexEscape, kSpecialEscape }; // Returns true if c is a printable ASCII character. We test the // value of c directly instead of calling isprint(), which is buggy on // Windows Mobile. inline bool IsPrintableAscii(wchar_t c) { return 0x20 <= c && c <= 0x7E; } // Prints a wide or narrow char c as a character literal without the // quotes, escaping it when necessary; returns how c was formatted. // The template argument UnsignedChar is the unsigned version of Char, // which is the type of c. template static CharFormat PrintAsCharLiteralTo(Char c, ostream* os) { switch (static_cast(c)) { case L'\0': *os << "\\0"; break; case L'\'': *os << "\\'"; break; case L'\\': *os << "\\\\"; break; case L'\a': *os << "\\a"; break; case L'\b': *os << "\\b"; break; case L'\f': *os << "\\f"; break; case L'\n': *os << "\\n"; break; case L'\r': *os << "\\r"; break; case L'\t': *os << "\\t"; break; case L'\v': *os << "\\v"; break; default: if (IsPrintableAscii(c)) { *os << static_cast(c); return kAsIs; } else { ostream::fmtflags flags = os->flags(); *os << "\\x" << std::hex << std::uppercase << static_cast(static_cast(c)); os->flags(flags); return kHexEscape; } } return kSpecialEscape; } // Prints a wchar_t c as if it's part of a string literal, escaping it when // necessary; returns how c was formatted. static CharFormat PrintAsStringLiteralTo(wchar_t c, ostream* os) { switch (c) { case L'\'': *os << "'"; return kAsIs; case L'"': *os << "\\\""; return kSpecialEscape; default: return PrintAsCharLiteralTo(c, os); } } // Prints a char c as if it's part of a string literal, escaping it when // necessary; returns how c was formatted. static CharFormat PrintAsStringLiteralTo(char c, ostream* os) { return PrintAsStringLiteralTo( static_cast(static_cast(c)), os); } // Prints a wide or narrow character c and its code. '\0' is printed // as "'\\0'", other unprintable characters are also properly escaped // using the standard C++ escape sequence. The template argument // UnsignedChar is the unsigned version of Char, which is the type of c. template void PrintCharAndCodeTo(Char c, ostream* os) { // First, print c as a literal in the most readable form we can find. *os << ((sizeof(c) > 1) ? "L'" : "'"); const CharFormat format = PrintAsCharLiteralTo(c, os); *os << "'"; // To aid user debugging, we also print c's code in decimal, unless // it's 0 (in which case c was printed as '\\0', making the code // obvious). if (c == 0) return; *os << " (" << static_cast(c); // For more convenience, we print c's code again in hexadecimal, // unless c was already printed in the form '\x##' or the code is in // [1, 9]. if (format == kHexEscape || (1 <= c && c <= 9)) { // Do nothing. } else { *os << ", 0x" << String::FormatHexInt(static_cast(c)); } *os << ")"; } void PrintTo(unsigned char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); } void PrintTo(signed char c, ::std::ostream* os) { PrintCharAndCodeTo(c, os); } // Prints a wchar_t as a symbol if it is printable or as its internal // code otherwise and also as its code. L'\0' is printed as "L'\\0'". void PrintTo(wchar_t wc, ostream* os) { PrintCharAndCodeTo(wc, os); } // Prints the given array of characters to the ostream. CharType must be either // char or wchar_t. // The array starts at begin, the length is len, it may include '\0' characters // and may not be NUL-terminated. template GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static CharFormat PrintCharsAsStringTo( const CharType* begin, size_t len, ostream* os) { const char* const kQuoteBegin = sizeof(CharType) == 1 ? "\"" : "L\""; *os << kQuoteBegin; bool is_previous_hex = false; CharFormat print_format = kAsIs; for (size_t index = 0; index < len; ++index) { const CharType cur = begin[index]; if (is_previous_hex && IsXDigit(cur)) { // Previous character is of '\x..' form and this character can be // interpreted as another hexadecimal digit in its number. Break string to // disambiguate. *os << "\" " << kQuoteBegin; } is_previous_hex = PrintAsStringLiteralTo(cur, os) == kHexEscape; // Remember if any characters required hex escaping. if (is_previous_hex) { print_format = kHexEscape; } } *os << "\""; return print_format; } // Prints a (const) char/wchar_t array of 'len' elements, starting at address // 'begin'. CharType must be either char or wchar_t. template GTEST_ATTRIBUTE_NO_SANITIZE_MEMORY_ GTEST_ATTRIBUTE_NO_SANITIZE_ADDRESS_ GTEST_ATTRIBUTE_NO_SANITIZE_THREAD_ static void UniversalPrintCharArray( const CharType* begin, size_t len, ostream* os) { // The code // const char kFoo[] = "foo"; // generates an array of 4, not 3, elements, with the last one being '\0'. // // Therefore when printing a char array, we don't print the last element if // it's '\0', such that the output matches the string literal as it's // written in the source code. if (len > 0 && begin[len - 1] == '\0') { PrintCharsAsStringTo(begin, len - 1, os); return; } // If, however, the last element in the array is not '\0', e.g. // const char kFoo[] = { 'f', 'o', 'o' }; // we must print the entire array. We also print a message to indicate // that the array is not NUL-terminated. PrintCharsAsStringTo(begin, len, os); *os << " (no terminating NUL)"; } // Prints a (const) char array of 'len' elements, starting at address 'begin'. void UniversalPrintArray(const char* begin, size_t len, ostream* os) { UniversalPrintCharArray(begin, len, os); } // Prints a (const) wchar_t array of 'len' elements, starting at address // 'begin'. void UniversalPrintArray(const wchar_t* begin, size_t len, ostream* os) { UniversalPrintCharArray(begin, len, os); } // Prints the given C string to the ostream. void PrintTo(const char* s, ostream* os) { if (s == NULL) { *os << "NULL"; } else { *os << ImplicitCast_(s) << " pointing to "; PrintCharsAsStringTo(s, strlen(s), os); } } // MSVC compiler can be configured to define whar_t as a typedef // of unsigned short. Defining an overload for const wchar_t* in that case // would cause pointers to unsigned shorts be printed as wide strings, // possibly accessing more memory than intended and causing invalid // memory accesses. MSVC defines _NATIVE_WCHAR_T_DEFINED symbol when // wchar_t is implemented as a native type. #if !defined(_MSC_VER) || defined(_NATIVE_WCHAR_T_DEFINED) // Prints the given wide C string to the ostream. void PrintTo(const wchar_t* s, ostream* os) { if (s == NULL) { *os << "NULL"; } else { *os << ImplicitCast_(s) << " pointing to "; PrintCharsAsStringTo(s, wcslen(s), os); } } #endif // wchar_t is native namespace { bool ContainsUnprintableControlCodes(const char* str, size_t length) { const unsigned char *s = reinterpret_cast(str); for (size_t i = 0; i < length; i++) { unsigned char ch = *s++; if (std::iscntrl(ch)) { switch (ch) { case '\t': case '\n': case '\r': break; default: return true; } } } return false; } bool IsUTF8TrailByte(unsigned char t) { return 0x80 <= t && t<= 0xbf; } bool IsValidUTF8(const char* str, size_t length) { const unsigned char *s = reinterpret_cast(str); for (size_t i = 0; i < length;) { unsigned char lead = s[i++]; if (lead <= 0x7f) { continue; // single-byte character (ASCII) 0..7F } if (lead < 0xc2) { return false; // trail byte or non-shortest form } else if (lead <= 0xdf && (i + 1) <= length && IsUTF8TrailByte(s[i])) { ++i; // 2-byte character } else if (0xe0 <= lead && lead <= 0xef && (i + 2) <= length && IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) && // check for non-shortest form and surrogate (lead != 0xe0 || s[i] >= 0xa0) && (lead != 0xed || s[i] < 0xa0)) { i += 2; // 3-byte character } else if (0xf0 <= lead && lead <= 0xf4 && (i + 3) <= length && IsUTF8TrailByte(s[i]) && IsUTF8TrailByte(s[i + 1]) && IsUTF8TrailByte(s[i + 2]) && // check for non-shortest form (lead != 0xf0 || s[i] >= 0x90) && (lead != 0xf4 || s[i] < 0x90)) { i += 3; // 4-byte character } else { return false; } } return true; } void ConditionalPrintAsText(const char* str, size_t length, ostream* os) { if (!ContainsUnprintableControlCodes(str, length) && IsValidUTF8(str, length)) { *os << "\n As Text: \"" << str << "\""; } } } // anonymous namespace // Prints a ::string object. #if GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::string& s, ostream* os) { if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { if (GTEST_FLAG(print_utf8)) { ConditionalPrintAsText(s.data(), s.size(), os); } } } #endif // GTEST_HAS_GLOBAL_STRING void PrintStringTo(const ::std::string& s, ostream* os) { if (PrintCharsAsStringTo(s.data(), s.size(), os) == kHexEscape) { if (GTEST_FLAG(print_utf8)) { ConditionalPrintAsText(s.data(), s.size(), os); } } } // Prints a ::wstring object. #if GTEST_HAS_GLOBAL_WSTRING void PrintWideStringTo(const ::wstring& s, ostream* os) { PrintCharsAsStringTo(s.data(), s.size(), os); } #endif // GTEST_HAS_GLOBAL_WSTRING #if GTEST_HAS_STD_WSTRING void PrintWideStringTo(const ::std::wstring& s, ostream* os) { PrintCharsAsStringTo(s.data(), s.size(), os); } #endif // GTEST_HAS_STD_WSTRING } // namespace internal } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest-test-part.cc000066400000000000000000000073111357355204000244750ustar00rootroot00000000000000// Copyright 2008, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) #include "gtest/gtest-test-part.h" #include "src/gtest-internal-inl.h" namespace testing { using internal::GetUnitTestImpl; // Gets the summary of the failure message by omitting the stack trace // in it. std::string TestPartResult::ExtractSummary(const char* message) { const char* const stack_trace = strstr(message, internal::kStackTraceMarker); return stack_trace == NULL ? message : std::string(message, stack_trace); } // Prints a TestPartResult object. std::ostream& operator<<(std::ostream& os, const TestPartResult& result) { return os << result.file_name() << ":" << result.line_number() << ": " << (result.type() == TestPartResult::kSuccess ? "Success" : result.type() == TestPartResult::kFatalFailure ? "Fatal failure" : "Non-fatal failure") << ":\n" << result.message() << std::endl; } // Appends a TestPartResult to the array. void TestPartResultArray::Append(const TestPartResult& result) { array_.push_back(result); } // Returns the TestPartResult at the given index (0-based). const TestPartResult& TestPartResultArray::GetTestPartResult(int index) const { if (index < 0 || index >= size()) { printf("\nInvalid index (%d) into TestPartResultArray.\n", index); internal::posix::Abort(); } return array_[index]; } // Returns the number of TestPartResult objects in the array. int TestPartResultArray::size() const { return static_cast(array_.size()); } namespace internal { HasNewFatalFailureHelper::HasNewFatalFailureHelper() : has_new_fatal_failure_(false), original_reporter_(GetUnitTestImpl()-> GetTestPartResultReporterForCurrentThread()) { GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread(this); } HasNewFatalFailureHelper::~HasNewFatalFailureHelper() { GetUnitTestImpl()->SetTestPartResultReporterForCurrentThread( original_reporter_); } void HasNewFatalFailureHelper::ReportTestPartResult( const TestPartResult& result) { if (result.fatally_failed()) has_new_fatal_failure_ = true; original_reporter_->ReportTestPartResult(result); } } // namespace internal } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest-typed-test.cc000066400000000000000000000075161357355204000246630ustar00rootroot00000000000000// Copyright 2008 Google Inc. // All Rights Reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "gtest/gtest-typed-test.h" #include "gtest/gtest.h" namespace testing { namespace internal { #if GTEST_HAS_TYPED_TEST_P // Skips to the first non-space char in str. Returns an empty string if str // contains only whitespace characters. static const char* SkipSpaces(const char* str) { while (IsSpace(*str)) str++; return str; } static std::vector SplitIntoTestNames(const char* src) { std::vector name_vec; src = SkipSpaces(src); for (; src != NULL; src = SkipComma(src)) { name_vec.push_back(StripTrailingSpaces(GetPrefixUntilComma(src))); } return name_vec; } // Verifies that registered_tests match the test names in // registered_tests_; returns registered_tests if successful, or // aborts the program otherwise. const char* TypedTestCasePState::VerifyRegisteredTestNames( const char* file, int line, const char* registered_tests) { typedef RegisteredTestsMap::const_iterator RegisteredTestIter; registered_ = true; std::vector name_vec = SplitIntoTestNames(registered_tests); Message errors; std::set tests; for (std::vector::const_iterator name_it = name_vec.begin(); name_it != name_vec.end(); ++name_it) { const std::string& name = *name_it; if (tests.count(name) != 0) { errors << "Test " << name << " is listed more than once.\n"; continue; } bool found = false; for (RegisteredTestIter it = registered_tests_.begin(); it != registered_tests_.end(); ++it) { if (name == it->first) { found = true; break; } } if (found) { tests.insert(name); } else { errors << "No test named " << name << " can be found in this test case.\n"; } } for (RegisteredTestIter it = registered_tests_.begin(); it != registered_tests_.end(); ++it) { if (tests.count(it->first) == 0) { errors << "You forgot to list test " << it->first << ".\n"; } } const std::string& errors_str = errors.GetString(); if (errors_str != "") { fprintf(stderr, "%s %s", FormatFileLocation(file, line).c_str(), errors_str.c_str()); fflush(stderr); posix::Abort(); } return registered_tests; } #endif // GTEST_HAS_TYPED_TEST_P } // namespace internal } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest.cc000066400000000000000000006513251357355204000225660ustar00rootroot00000000000000// Copyright 2005, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // // The Google C++ Testing and Mocking Framework (Google Test) #include "gtest/gtest.h" #include "gtest/internal/custom/gtest.h" #include "gtest/gtest-spi.h" #include #include #include #include #include #include #include #include #include #include #include #include #include #include // NOLINT #include #include #if GTEST_OS_LINUX // FIXME: Use autoconf to detect availability of // gettimeofday(). # define GTEST_HAS_GETTIMEOFDAY_ 1 # include // NOLINT # include // NOLINT # include // NOLINT // Declares vsnprintf(). This header is not available on Windows. # include // NOLINT # include // NOLINT # include // NOLINT # include // NOLINT # include #elif GTEST_OS_SYMBIAN # define GTEST_HAS_GETTIMEOFDAY_ 1 # include // NOLINT #elif GTEST_OS_ZOS # define GTEST_HAS_GETTIMEOFDAY_ 1 # include // NOLINT // On z/OS we additionally need strings.h for strcasecmp. # include // NOLINT #elif GTEST_OS_WINDOWS_MOBILE // We are on Windows CE. # include // NOLINT # undef min #elif GTEST_OS_WINDOWS // We are on Windows proper. # include // NOLINT # include // NOLINT # include // NOLINT # include // NOLINT # if GTEST_OS_WINDOWS_MINGW // MinGW has gettimeofday() but not _ftime64(). // FIXME: Use autoconf to detect availability of // gettimeofday(). // FIXME: There are other ways to get the time on // Windows, like GetTickCount() or GetSystemTimeAsFileTime(). MinGW // supports these. consider using them instead. # define GTEST_HAS_GETTIMEOFDAY_ 1 # include // NOLINT # endif // GTEST_OS_WINDOWS_MINGW // cpplint thinks that the header is already included, so we want to // silence it. # include // NOLINT # undef min #else // Assume other platforms have gettimeofday(). // FIXME: Use autoconf to detect availability of // gettimeofday(). # define GTEST_HAS_GETTIMEOFDAY_ 1 // cpplint thinks that the header is already included, so we want to // silence it. # include // NOLINT # include // NOLINT #endif // GTEST_OS_LINUX #if GTEST_HAS_EXCEPTIONS # include #endif #if GTEST_CAN_STREAM_RESULTS_ # include // NOLINT # include // NOLINT # include // NOLINT # include // NOLINT #endif #include "src/gtest-internal-inl.h" #if GTEST_OS_WINDOWS # define vsnprintf _vsnprintf #endif // GTEST_OS_WINDOWS #if GTEST_OS_MAC #ifndef GTEST_OS_IOS #include #endif #endif #if GTEST_HAS_ABSL #include "absl/debugging/failure_signal_handler.h" #include "absl/debugging/stacktrace.h" #include "absl/debugging/symbolize.h" #include "absl/strings/str_cat.h" #endif // GTEST_HAS_ABSL namespace testing { using internal::CountIf; using internal::ForEach; using internal::GetElementOr; using internal::Shuffle; // Constants. // A test whose test case name or test name matches this filter is // disabled and not run. static const char kDisableTestFilter[] = "DISABLED_*:*/DISABLED_*"; // A test case whose name matches this filter is considered a death // test case and will be run before test cases whose name doesn't // match this filter. static const char kDeathTestCaseFilter[] = "*DeathTest:*DeathTest/*"; // A test filter that matches everything. static const char kUniversalFilter[] = "*"; // The default output format. static const char kDefaultOutputFormat[] = "xml"; // The default output file. static const char kDefaultOutputFile[] = "test_detail"; // The environment variable name for the test shard index. static const char kTestShardIndex[] = "GTEST_SHARD_INDEX"; // The environment variable name for the total number of test shards. static const char kTestTotalShards[] = "GTEST_TOTAL_SHARDS"; // The environment variable name for the test shard status file. static const char kTestShardStatusFile[] = "GTEST_SHARD_STATUS_FILE"; namespace internal { // The text used in failure messages to indicate the start of the // stack trace. const char kStackTraceMarker[] = "\nStack trace:\n"; // g_help_flag is true iff the --help flag or an equivalent form is // specified on the command line. bool g_help_flag = false; // Utilty function to Open File for Writing static FILE* OpenFileForWriting(const std::string& output_file) { FILE* fileout = NULL; FilePath output_file_path(output_file); FilePath output_dir(output_file_path.RemoveFileName()); if (output_dir.CreateDirectoriesRecursively()) { fileout = posix::FOpen(output_file.c_str(), "w"); } if (fileout == NULL) { GTEST_LOG_(FATAL) << "Unable to open file \"" << output_file << "\""; } return fileout; } } // namespace internal // Bazel passes in the argument to '--test_filter' via the TESTBRIDGE_TEST_ONLY // environment variable. static const char* GetDefaultFilter() { const char* const testbridge_test_only = internal::posix::GetEnv("TESTBRIDGE_TEST_ONLY"); if (testbridge_test_only != NULL) { return testbridge_test_only; } return kUniversalFilter; } GTEST_DEFINE_bool_( also_run_disabled_tests, internal::BoolFromGTestEnv("also_run_disabled_tests", false), "Run disabled tests too, in addition to the tests normally being run."); GTEST_DEFINE_bool_( break_on_failure, internal::BoolFromGTestEnv("break_on_failure", false), "True iff a failed assertion should be a debugger break-point."); GTEST_DEFINE_bool_( catch_exceptions, internal::BoolFromGTestEnv("catch_exceptions", true), "True iff " GTEST_NAME_ " should catch exceptions and treat them as test failures."); GTEST_DEFINE_string_( color, internal::StringFromGTestEnv("color", "auto"), "Whether to use colors in the output. Valid values: yes, no, " "and auto. 'auto' means to use colors if the output is " "being sent to a terminal and the TERM environment variable " "is set to a terminal type that supports colors."); GTEST_DEFINE_string_( filter, internal::StringFromGTestEnv("filter", GetDefaultFilter()), "A colon-separated list of glob (not regex) patterns " "for filtering the tests to run, optionally followed by a " "'-' and a : separated list of negative patterns (tests to " "exclude). A test is run if it matches one of the positive " "patterns and does not match any of the negative patterns."); GTEST_DEFINE_bool_( install_failure_signal_handler, internal::BoolFromGTestEnv("install_failure_signal_handler", false), "If true and supported on the current platform, " GTEST_NAME_ " should " "install a signal handler that dumps debugging information when fatal " "signals are raised."); GTEST_DEFINE_bool_(list_tests, false, "List all tests without running them."); // The net priority order after flag processing is thus: // --gtest_output command line flag // GTEST_OUTPUT environment variable // XML_OUTPUT_FILE environment variable // '' GTEST_DEFINE_string_( output, internal::StringFromGTestEnv("output", internal::OutputFlagAlsoCheckEnvVar().c_str()), "A format (defaults to \"xml\" but can be specified to be \"json\"), " "optionally followed by a colon and an output file name or directory. " "A directory is indicated by a trailing pathname separator. " "Examples: \"xml:filename.xml\", \"xml::directoryname/\". " "If a directory is specified, output files will be created " "within that directory, with file-names based on the test " "executable's name and, if necessary, made unique by adding " "digits."); GTEST_DEFINE_bool_( print_time, internal::BoolFromGTestEnv("print_time", true), "True iff " GTEST_NAME_ " should display elapsed time in text output."); GTEST_DEFINE_bool_( print_utf8, internal::BoolFromGTestEnv("print_utf8", true), "True iff " GTEST_NAME_ " prints UTF8 characters as text."); GTEST_DEFINE_int32_( random_seed, internal::Int32FromGTestEnv("random_seed", 0), "Random number seed to use when shuffling test orders. Must be in range " "[1, 99999], or 0 to use a seed based on the current time."); GTEST_DEFINE_int32_( repeat, internal::Int32FromGTestEnv("repeat", 1), "How many times to repeat each test. Specify a negative number " "for repeating forever. Useful for shaking out flaky tests."); GTEST_DEFINE_bool_( show_internal_stack_frames, false, "True iff " GTEST_NAME_ " should include internal stack frames when " "printing test failure stack traces."); GTEST_DEFINE_bool_( shuffle, internal::BoolFromGTestEnv("shuffle", false), "True iff " GTEST_NAME_ " should randomize tests' order on every run."); GTEST_DEFINE_int32_( stack_trace_depth, internal::Int32FromGTestEnv("stack_trace_depth", kMaxStackTraceDepth), "The maximum number of stack frames to print when an " "assertion fails. The valid range is 0 through 100, inclusive."); GTEST_DEFINE_string_( stream_result_to, internal::StringFromGTestEnv("stream_result_to", ""), "This flag specifies the host name and the port number on which to stream " "test results. Example: \"localhost:555\". The flag is effective only on " "Linux."); GTEST_DEFINE_bool_( throw_on_failure, internal::BoolFromGTestEnv("throw_on_failure", false), "When this flag is specified, a failed assertion will throw an exception " "if exceptions are enabled or exit the program with a non-zero code " "otherwise. For use with an external test framework."); #if GTEST_USE_OWN_FLAGFILE_FLAG_ GTEST_DEFINE_string_( flagfile, internal::StringFromGTestEnv("flagfile", ""), "This flag specifies the flagfile to read command-line flags from."); #endif // GTEST_USE_OWN_FLAGFILE_FLAG_ namespace internal { // Generates a random number from [0, range), using a Linear // Congruential Generator (LCG). Crashes if 'range' is 0 or greater // than kMaxRange. UInt32 Random::Generate(UInt32 range) { // These constants are the same as are used in glibc's rand(3). // Use wider types than necessary to prevent unsigned overflow diagnostics. state_ = static_cast(1103515245ULL*state_ + 12345U) % kMaxRange; GTEST_CHECK_(range > 0) << "Cannot generate a number in the range [0, 0)."; GTEST_CHECK_(range <= kMaxRange) << "Generation of a number in [0, " << range << ") was requested, " << "but this can only generate numbers in [0, " << kMaxRange << ")."; // Converting via modulus introduces a bit of downward bias, but // it's simple, and a linear congruential generator isn't too good // to begin with. return state_ % range; } // GTestIsInitialized() returns true iff the user has initialized // Google Test. Useful for catching the user mistake of not initializing // Google Test before calling RUN_ALL_TESTS(). static bool GTestIsInitialized() { return GetArgvs().size() > 0; } // Iterates over a vector of TestCases, keeping a running sum of the // results of calling a given int-returning method on each. // Returns the sum. static int SumOverTestCaseList(const std::vector& case_list, int (TestCase::*method)() const) { int sum = 0; for (size_t i = 0; i < case_list.size(); i++) { sum += (case_list[i]->*method)(); } return sum; } // Returns true iff the test case passed. static bool TestCasePassed(const TestCase* test_case) { return test_case->should_run() && test_case->Passed(); } // Returns true iff the test case failed. static bool TestCaseFailed(const TestCase* test_case) { return test_case->should_run() && test_case->Failed(); } // Returns true iff test_case contains at least one test that should // run. static bool ShouldRunTestCase(const TestCase* test_case) { return test_case->should_run(); } // AssertHelper constructor. AssertHelper::AssertHelper(TestPartResult::Type type, const char* file, int line, const char* message) : data_(new AssertHelperData(type, file, line, message)) { } AssertHelper::~AssertHelper() { delete data_; } // Message assignment, for assertion streaming support. void AssertHelper::operator=(const Message& message) const { UnitTest::GetInstance()-> AddTestPartResult(data_->type, data_->file, data_->line, AppendUserMessage(data_->message, message), UnitTest::GetInstance()->impl() ->CurrentOsStackTraceExceptTop(1) // Skips the stack frame for this function itself. ); // NOLINT } // Mutex for linked pointers. GTEST_API_ GTEST_DEFINE_STATIC_MUTEX_(g_linked_ptr_mutex); // A copy of all command line arguments. Set by InitGoogleTest(). static ::std::vector g_argvs; ::std::vector GetArgvs() { #if defined(GTEST_CUSTOM_GET_ARGVS_) // GTEST_CUSTOM_GET_ARGVS_() may return a container of std::string or // ::string. This code converts it to the appropriate type. const auto& custom = GTEST_CUSTOM_GET_ARGVS_(); return ::std::vector(custom.begin(), custom.end()); #else // defined(GTEST_CUSTOM_GET_ARGVS_) return g_argvs; #endif // defined(GTEST_CUSTOM_GET_ARGVS_) } // Returns the current application's name, removing directory path if that // is present. FilePath GetCurrentExecutableName() { FilePath result; #if GTEST_OS_WINDOWS result.Set(FilePath(GetArgvs()[0]).RemoveExtension("exe")); #else result.Set(FilePath(GetArgvs()[0])); #endif // GTEST_OS_WINDOWS return result.RemoveDirectoryName(); } // Functions for processing the gtest_output flag. // Returns the output format, or "" for normal printed output. std::string UnitTestOptions::GetOutputFormat() { const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); const char* const colon = strchr(gtest_output_flag, ':'); return (colon == NULL) ? std::string(gtest_output_flag) : std::string(gtest_output_flag, colon - gtest_output_flag); } // Returns the name of the requested output file, or the default if none // was explicitly specified. std::string UnitTestOptions::GetAbsolutePathToOutputFile() { const char* const gtest_output_flag = GTEST_FLAG(output).c_str(); std::string format = GetOutputFormat(); if (format.empty()) format = std::string(kDefaultOutputFormat); const char* const colon = strchr(gtest_output_flag, ':'); if (colon == NULL) return internal::FilePath::MakeFileName( internal::FilePath( UnitTest::GetInstance()->original_working_dir()), internal::FilePath(kDefaultOutputFile), 0, format.c_str()).string(); internal::FilePath output_name(colon + 1); if (!output_name.IsAbsolutePath()) // FIXME: on Windows \some\path is not an absolute // path (as its meaning depends on the current drive), yet the // following logic for turning it into an absolute path is wrong. // Fix it. output_name = internal::FilePath::ConcatPaths( internal::FilePath(UnitTest::GetInstance()->original_working_dir()), internal::FilePath(colon + 1)); if (!output_name.IsDirectory()) return output_name.string(); internal::FilePath result(internal::FilePath::GenerateUniqueFileName( output_name, internal::GetCurrentExecutableName(), GetOutputFormat().c_str())); return result.string(); } // Returns true iff the wildcard pattern matches the string. The // first ':' or '\0' character in pattern marks the end of it. // // This recursive algorithm isn't very efficient, but is clear and // works well enough for matching test names, which are short. bool UnitTestOptions::PatternMatchesString(const char *pattern, const char *str) { switch (*pattern) { case '\0': case ':': // Either ':' or '\0' marks the end of the pattern. return *str == '\0'; case '?': // Matches any single character. return *str != '\0' && PatternMatchesString(pattern + 1, str + 1); case '*': // Matches any string (possibly empty) of characters. return (*str != '\0' && PatternMatchesString(pattern, str + 1)) || PatternMatchesString(pattern + 1, str); default: // Non-special character. Matches itself. return *pattern == *str && PatternMatchesString(pattern + 1, str + 1); } } bool UnitTestOptions::MatchesFilter( const std::string& name, const char* filter) { const char *cur_pattern = filter; for (;;) { if (PatternMatchesString(cur_pattern, name.c_str())) { return true; } // Finds the next pattern in the filter. cur_pattern = strchr(cur_pattern, ':'); // Returns if no more pattern can be found. if (cur_pattern == NULL) { return false; } // Skips the pattern separater (the ':' character). cur_pattern++; } } // Returns true iff the user-specified filter matches the test case // name and the test name. bool UnitTestOptions::FilterMatchesTest(const std::string &test_case_name, const std::string &test_name) { const std::string& full_name = test_case_name + "." + test_name.c_str(); // Split --gtest_filter at '-', if there is one, to separate into // positive filter and negative filter portions const char* const p = GTEST_FLAG(filter).c_str(); const char* const dash = strchr(p, '-'); std::string positive; std::string negative; if (dash == NULL) { positive = GTEST_FLAG(filter).c_str(); // Whole string is a positive filter negative = ""; } else { positive = std::string(p, dash); // Everything up to the dash negative = std::string(dash + 1); // Everything after the dash if (positive.empty()) { // Treat '-test1' as the same as '*-test1' positive = kUniversalFilter; } } // A filter is a colon-separated list of patterns. It matches a // test if any pattern in it matches the test. return (MatchesFilter(full_name, positive.c_str()) && !MatchesFilter(full_name, negative.c_str())); } #if GTEST_HAS_SEH // Returns EXCEPTION_EXECUTE_HANDLER if Google Test should handle the // given SEH exception, or EXCEPTION_CONTINUE_SEARCH otherwise. // This function is useful as an __except condition. int UnitTestOptions::GTestShouldProcessSEH(DWORD exception_code) { // Google Test should handle a SEH exception if: // 1. the user wants it to, AND // 2. this is not a breakpoint exception, AND // 3. this is not a C++ exception (VC++ implements them via SEH, // apparently). // // SEH exception code for C++ exceptions. // (see http://support.microsoft.com/kb/185294 for more information). const DWORD kCxxExceptionCode = 0xe06d7363; bool should_handle = true; if (!GTEST_FLAG(catch_exceptions)) should_handle = false; else if (exception_code == EXCEPTION_BREAKPOINT) should_handle = false; else if (exception_code == kCxxExceptionCode) should_handle = false; return should_handle ? EXCEPTION_EXECUTE_HANDLER : EXCEPTION_CONTINUE_SEARCH; } #endif // GTEST_HAS_SEH } // namespace internal // The c'tor sets this object as the test part result reporter used by // Google Test. The 'result' parameter specifies where to report the // results. Intercepts only failures from the current thread. ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( TestPartResultArray* result) : intercept_mode_(INTERCEPT_ONLY_CURRENT_THREAD), result_(result) { Init(); } // The c'tor sets this object as the test part result reporter used by // Google Test. The 'result' parameter specifies where to report the // results. ScopedFakeTestPartResultReporter::ScopedFakeTestPartResultReporter( InterceptMode intercept_mode, TestPartResultArray* result) : intercept_mode_(intercept_mode), result_(result) { Init(); } void ScopedFakeTestPartResultReporter::Init() { internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); if (intercept_mode_ == INTERCEPT_ALL_THREADS) { old_reporter_ = impl->GetGlobalTestPartResultReporter(); impl->SetGlobalTestPartResultReporter(this); } else { old_reporter_ = impl->GetTestPartResultReporterForCurrentThread(); impl->SetTestPartResultReporterForCurrentThread(this); } } // The d'tor restores the test part result reporter used by Google Test // before. ScopedFakeTestPartResultReporter::~ScopedFakeTestPartResultReporter() { internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); if (intercept_mode_ == INTERCEPT_ALL_THREADS) { impl->SetGlobalTestPartResultReporter(old_reporter_); } else { impl->SetTestPartResultReporterForCurrentThread(old_reporter_); } } // Increments the test part result count and remembers the result. // This method is from the TestPartResultReporterInterface interface. void ScopedFakeTestPartResultReporter::ReportTestPartResult( const TestPartResult& result) { result_->Append(result); } namespace internal { // Returns the type ID of ::testing::Test. We should always call this // instead of GetTypeId< ::testing::Test>() to get the type ID of // testing::Test. This is to work around a suspected linker bug when // using Google Test as a framework on Mac OS X. The bug causes // GetTypeId< ::testing::Test>() to return different values depending // on whether the call is from the Google Test framework itself or // from user test code. GetTestTypeId() is guaranteed to always // return the same value, as it always calls GetTypeId<>() from the // gtest.cc, which is within the Google Test framework. TypeId GetTestTypeId() { return GetTypeId(); } // The value of GetTestTypeId() as seen from within the Google Test // library. This is solely for testing GetTestTypeId(). extern const TypeId kTestTypeIdInGoogleTest = GetTestTypeId(); // This predicate-formatter checks that 'results' contains a test part // failure of the given type and that the failure message contains the // given substring. static AssertionResult HasOneFailure(const char* /* results_expr */, const char* /* type_expr */, const char* /* substr_expr */, const TestPartResultArray& results, TestPartResult::Type type, const std::string& substr) { const std::string expected(type == TestPartResult::kFatalFailure ? "1 fatal failure" : "1 non-fatal failure"); Message msg; if (results.size() != 1) { msg << "Expected: " << expected << "\n" << " Actual: " << results.size() << " failures"; for (int i = 0; i < results.size(); i++) { msg << "\n" << results.GetTestPartResult(i); } return AssertionFailure() << msg; } const TestPartResult& r = results.GetTestPartResult(0); if (r.type() != type) { return AssertionFailure() << "Expected: " << expected << "\n" << " Actual:\n" << r; } if (strstr(r.message(), substr.c_str()) == NULL) { return AssertionFailure() << "Expected: " << expected << " containing \"" << substr << "\"\n" << " Actual:\n" << r; } return AssertionSuccess(); } // The constructor of SingleFailureChecker remembers where to look up // test part results, what type of failure we expect, and what // substring the failure message should contain. SingleFailureChecker::SingleFailureChecker(const TestPartResultArray* results, TestPartResult::Type type, const std::string& substr) : results_(results), type_(type), substr_(substr) {} // The destructor of SingleFailureChecker verifies that the given // TestPartResultArray contains exactly one failure that has the given // type and contains the given substring. If that's not the case, a // non-fatal failure will be generated. SingleFailureChecker::~SingleFailureChecker() { EXPECT_PRED_FORMAT3(HasOneFailure, *results_, type_, substr_); } DefaultGlobalTestPartResultReporter::DefaultGlobalTestPartResultReporter( UnitTestImpl* unit_test) : unit_test_(unit_test) {} void DefaultGlobalTestPartResultReporter::ReportTestPartResult( const TestPartResult& result) { unit_test_->current_test_result()->AddTestPartResult(result); unit_test_->listeners()->repeater()->OnTestPartResult(result); } DefaultPerThreadTestPartResultReporter::DefaultPerThreadTestPartResultReporter( UnitTestImpl* unit_test) : unit_test_(unit_test) {} void DefaultPerThreadTestPartResultReporter::ReportTestPartResult( const TestPartResult& result) { unit_test_->GetGlobalTestPartResultReporter()->ReportTestPartResult(result); } // Returns the global test part result reporter. TestPartResultReporterInterface* UnitTestImpl::GetGlobalTestPartResultReporter() { internal::MutexLock lock(&global_test_part_result_reporter_mutex_); return global_test_part_result_repoter_; } // Sets the global test part result reporter. void UnitTestImpl::SetGlobalTestPartResultReporter( TestPartResultReporterInterface* reporter) { internal::MutexLock lock(&global_test_part_result_reporter_mutex_); global_test_part_result_repoter_ = reporter; } // Returns the test part result reporter for the current thread. TestPartResultReporterInterface* UnitTestImpl::GetTestPartResultReporterForCurrentThread() { return per_thread_test_part_result_reporter_.get(); } // Sets the test part result reporter for the current thread. void UnitTestImpl::SetTestPartResultReporterForCurrentThread( TestPartResultReporterInterface* reporter) { per_thread_test_part_result_reporter_.set(reporter); } // Gets the number of successful test cases. int UnitTestImpl::successful_test_case_count() const { return CountIf(test_cases_, TestCasePassed); } // Gets the number of failed test cases. int UnitTestImpl::failed_test_case_count() const { return CountIf(test_cases_, TestCaseFailed); } // Gets the number of all test cases. int UnitTestImpl::total_test_case_count() const { return static_cast(test_cases_.size()); } // Gets the number of all test cases that contain at least one test // that should run. int UnitTestImpl::test_case_to_run_count() const { return CountIf(test_cases_, ShouldRunTestCase); } // Gets the number of successful tests. int UnitTestImpl::successful_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::successful_test_count); } // Gets the number of failed tests. int UnitTestImpl::failed_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::failed_test_count); } // Gets the number of disabled tests that will be reported in the XML report. int UnitTestImpl::reportable_disabled_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::reportable_disabled_test_count); } // Gets the number of disabled tests. int UnitTestImpl::disabled_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::disabled_test_count); } // Gets the number of tests to be printed in the XML report. int UnitTestImpl::reportable_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::reportable_test_count); } // Gets the number of all tests. int UnitTestImpl::total_test_count() const { return SumOverTestCaseList(test_cases_, &TestCase::total_test_count); } // Gets the number of tests that should run. int UnitTestImpl::test_to_run_count() const { return SumOverTestCaseList(test_cases_, &TestCase::test_to_run_count); } // Returns the current OS stack trace as an std::string. // // The maximum number of stack frames to be included is specified by // the gtest_stack_trace_depth flag. The skip_count parameter // specifies the number of top frames to be skipped, which doesn't // count against the number of frames to be included. // // For example, if Foo() calls Bar(), which in turn calls // CurrentOsStackTraceExceptTop(1), Foo() will be included in the // trace but Bar() and CurrentOsStackTraceExceptTop() won't. std::string UnitTestImpl::CurrentOsStackTraceExceptTop(int skip_count) { return os_stack_trace_getter()->CurrentStackTrace( static_cast(GTEST_FLAG(stack_trace_depth)), skip_count + 1 // Skips the user-specified number of frames plus this function // itself. ); // NOLINT } // Returns the current time in milliseconds. TimeInMillis GetTimeInMillis() { #if GTEST_OS_WINDOWS_MOBILE || defined(__BORLANDC__) // Difference between 1970-01-01 and 1601-01-01 in milliseconds. // http://analogous.blogspot.com/2005/04/epoch.html const TimeInMillis kJavaEpochToWinFileTimeDelta = static_cast(116444736UL) * 100000UL; const DWORD kTenthMicrosInMilliSecond = 10000; SYSTEMTIME now_systime; FILETIME now_filetime; ULARGE_INTEGER now_int64; // FIXME: Shouldn't this just use // GetSystemTimeAsFileTime()? GetSystemTime(&now_systime); if (SystemTimeToFileTime(&now_systime, &now_filetime)) { now_int64.LowPart = now_filetime.dwLowDateTime; now_int64.HighPart = now_filetime.dwHighDateTime; now_int64.QuadPart = (now_int64.QuadPart / kTenthMicrosInMilliSecond) - kJavaEpochToWinFileTimeDelta; return now_int64.QuadPart; } return 0; #elif GTEST_OS_WINDOWS && !GTEST_HAS_GETTIMEOFDAY_ __timeb64 now; // MSVC 8 deprecates _ftime64(), so we want to suppress warning 4996 // (deprecated function) there. // FIXME: Use GetTickCount()? Or use // SystemTimeToFileTime() GTEST_DISABLE_MSC_DEPRECATED_PUSH_() _ftime64(&now); GTEST_DISABLE_MSC_DEPRECATED_POP_() return static_cast(now.time) * 1000 + now.millitm; #elif GTEST_HAS_GETTIMEOFDAY_ struct timeval now; gettimeofday(&now, NULL); return static_cast(now.tv_sec) * 1000 + now.tv_usec / 1000; #else # error "Don't know how to get the current time on your system." #endif } // Utilities // class String. #if GTEST_OS_WINDOWS_MOBILE // Creates a UTF-16 wide string from the given ANSI string, allocating // memory using new. The caller is responsible for deleting the return // value using delete[]. Returns the wide string, or NULL if the // input is NULL. LPCWSTR String::AnsiToUtf16(const char* ansi) { if (!ansi) return NULL; const int length = strlen(ansi); const int unicode_length = MultiByteToWideChar(CP_ACP, 0, ansi, length, NULL, 0); WCHAR* unicode = new WCHAR[unicode_length + 1]; MultiByteToWideChar(CP_ACP, 0, ansi, length, unicode, unicode_length); unicode[unicode_length] = 0; return unicode; } // Creates an ANSI string from the given wide string, allocating // memory using new. The caller is responsible for deleting the return // value using delete[]. Returns the ANSI string, or NULL if the // input is NULL. const char* String::Utf16ToAnsi(LPCWSTR utf16_str) { if (!utf16_str) return NULL; const int ansi_length = WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, NULL, 0, NULL, NULL); char* ansi = new char[ansi_length + 1]; WideCharToMultiByte(CP_ACP, 0, utf16_str, -1, ansi, ansi_length, NULL, NULL); ansi[ansi_length] = 0; return ansi; } #endif // GTEST_OS_WINDOWS_MOBILE // Compares two C strings. Returns true iff they have the same content. // // Unlike strcmp(), this function can handle NULL argument(s). A NULL // C string is considered different to any non-NULL C string, // including the empty string. bool String::CStringEquals(const char * lhs, const char * rhs) { if ( lhs == NULL ) return rhs == NULL; if ( rhs == NULL ) return false; return strcmp(lhs, rhs) == 0; } #if GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING // Converts an array of wide chars to a narrow string using the UTF-8 // encoding, and streams the result to the given Message object. static void StreamWideCharsToMessage(const wchar_t* wstr, size_t length, Message* msg) { for (size_t i = 0; i != length; ) { // NOLINT if (wstr[i] != L'\0') { *msg << WideStringToUtf8(wstr + i, static_cast(length - i)); while (i != length && wstr[i] != L'\0') i++; } else { *msg << '\0'; i++; } } } #endif // GTEST_HAS_STD_WSTRING || GTEST_HAS_GLOBAL_WSTRING void SplitString(const ::std::string& str, char delimiter, ::std::vector< ::std::string>* dest) { ::std::vector< ::std::string> parsed; ::std::string::size_type pos = 0; while (::testing::internal::AlwaysTrue()) { const ::std::string::size_type colon = str.find(delimiter, pos); if (colon == ::std::string::npos) { parsed.push_back(str.substr(pos)); break; } else { parsed.push_back(str.substr(pos, colon - pos)); pos = colon + 1; } } dest->swap(parsed); } } // namespace internal // Constructs an empty Message. // We allocate the stringstream separately because otherwise each use of // ASSERT/EXPECT in a procedure adds over 200 bytes to the procedure's // stack frame leading to huge stack frames in some cases; gcc does not reuse // the stack space. Message::Message() : ss_(new ::std::stringstream) { // By default, we want there to be enough precision when printing // a double to a Message. *ss_ << std::setprecision(std::numeric_limits::digits10 + 2); } // These two overloads allow streaming a wide C string to a Message // using the UTF-8 encoding. Message& Message::operator <<(const wchar_t* wide_c_str) { return *this << internal::String::ShowWideCString(wide_c_str); } Message& Message::operator <<(wchar_t* wide_c_str) { return *this << internal::String::ShowWideCString(wide_c_str); } #if GTEST_HAS_STD_WSTRING // Converts the given wide string to a narrow string using the UTF-8 // encoding, and streams the result to this Message object. Message& Message::operator <<(const ::std::wstring& wstr) { internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); return *this; } #endif // GTEST_HAS_STD_WSTRING #if GTEST_HAS_GLOBAL_WSTRING // Converts the given wide string to a narrow string using the UTF-8 // encoding, and streams the result to this Message object. Message& Message::operator <<(const ::wstring& wstr) { internal::StreamWideCharsToMessage(wstr.c_str(), wstr.length(), this); return *this; } #endif // GTEST_HAS_GLOBAL_WSTRING // Gets the text streamed to this object so far as an std::string. // Each '\0' character in the buffer is replaced with "\\0". std::string Message::GetString() const { return internal::StringStreamToString(ss_.get()); } // AssertionResult constructors. // Used in EXPECT_TRUE/FALSE(assertion_result). AssertionResult::AssertionResult(const AssertionResult& other) : success_(other.success_), message_(other.message_.get() != NULL ? new ::std::string(*other.message_) : static_cast< ::std::string*>(NULL)) { } // Swaps two AssertionResults. void AssertionResult::swap(AssertionResult& other) { using std::swap; swap(success_, other.success_); swap(message_, other.message_); } // Returns the assertion's negation. Used with EXPECT/ASSERT_FALSE. AssertionResult AssertionResult::operator!() const { AssertionResult negation(!success_); if (message_.get() != NULL) negation << *message_; return negation; } // Makes a successful assertion result. AssertionResult AssertionSuccess() { return AssertionResult(true); } // Makes a failed assertion result. AssertionResult AssertionFailure() { return AssertionResult(false); } // Makes a failed assertion result with the given failure message. // Deprecated; use AssertionFailure() << message. AssertionResult AssertionFailure(const Message& message) { return AssertionFailure() << message; } namespace internal { namespace edit_distance { std::vector CalculateOptimalEdits(const std::vector& left, const std::vector& right) { std::vector > costs( left.size() + 1, std::vector(right.size() + 1)); std::vector > best_move( left.size() + 1, std::vector(right.size() + 1)); // Populate for empty right. for (size_t l_i = 0; l_i < costs.size(); ++l_i) { costs[l_i][0] = static_cast(l_i); best_move[l_i][0] = kRemove; } // Populate for empty left. for (size_t r_i = 1; r_i < costs[0].size(); ++r_i) { costs[0][r_i] = static_cast(r_i); best_move[0][r_i] = kAdd; } for (size_t l_i = 0; l_i < left.size(); ++l_i) { for (size_t r_i = 0; r_i < right.size(); ++r_i) { if (left[l_i] == right[r_i]) { // Found a match. Consume it. costs[l_i + 1][r_i + 1] = costs[l_i][r_i]; best_move[l_i + 1][r_i + 1] = kMatch; continue; } const double add = costs[l_i + 1][r_i]; const double remove = costs[l_i][r_i + 1]; const double replace = costs[l_i][r_i]; if (add < remove && add < replace) { costs[l_i + 1][r_i + 1] = add + 1; best_move[l_i + 1][r_i + 1] = kAdd; } else if (remove < add && remove < replace) { costs[l_i + 1][r_i + 1] = remove + 1; best_move[l_i + 1][r_i + 1] = kRemove; } else { // We make replace a little more expensive than add/remove to lower // their priority. costs[l_i + 1][r_i + 1] = replace + 1.00001; best_move[l_i + 1][r_i + 1] = kReplace; } } } // Reconstruct the best path. We do it in reverse order. std::vector best_path; for (size_t l_i = left.size(), r_i = right.size(); l_i > 0 || r_i > 0;) { EditType move = best_move[l_i][r_i]; best_path.push_back(move); l_i -= move != kAdd; r_i -= move != kRemove; } std::reverse(best_path.begin(), best_path.end()); return best_path; } namespace { // Helper class to convert string into ids with deduplication. class InternalStrings { public: size_t GetId(const std::string& str) { IdMap::iterator it = ids_.find(str); if (it != ids_.end()) return it->second; size_t id = ids_.size(); return ids_[str] = id; } private: typedef std::map IdMap; IdMap ids_; }; } // namespace std::vector CalculateOptimalEdits( const std::vector& left, const std::vector& right) { std::vector left_ids, right_ids; { InternalStrings intern_table; for (size_t i = 0; i < left.size(); ++i) { left_ids.push_back(intern_table.GetId(left[i])); } for (size_t i = 0; i < right.size(); ++i) { right_ids.push_back(intern_table.GetId(right[i])); } } return CalculateOptimalEdits(left_ids, right_ids); } namespace { // Helper class that holds the state for one hunk and prints it out to the // stream. // It reorders adds/removes when possible to group all removes before all // adds. It also adds the hunk header before printint into the stream. class Hunk { public: Hunk(size_t left_start, size_t right_start) : left_start_(left_start), right_start_(right_start), adds_(), removes_(), common_() {} void PushLine(char edit, const char* line) { switch (edit) { case ' ': ++common_; FlushEdits(); hunk_.push_back(std::make_pair(' ', line)); break; case '-': ++removes_; hunk_removes_.push_back(std::make_pair('-', line)); break; case '+': ++adds_; hunk_adds_.push_back(std::make_pair('+', line)); break; } } void PrintTo(std::ostream* os) { PrintHeader(os); FlushEdits(); for (std::list >::const_iterator it = hunk_.begin(); it != hunk_.end(); ++it) { *os << it->first << it->second << "\n"; } } bool has_edits() const { return adds_ || removes_; } private: void FlushEdits() { hunk_.splice(hunk_.end(), hunk_removes_); hunk_.splice(hunk_.end(), hunk_adds_); } // Print a unified diff header for one hunk. // The format is // "@@ -, +, @@" // where the left/right parts are omitted if unnecessary. void PrintHeader(std::ostream* ss) const { *ss << "@@ "; if (removes_) { *ss << "-" << left_start_ << "," << (removes_ + common_); } if (removes_ && adds_) { *ss << " "; } if (adds_) { *ss << "+" << right_start_ << "," << (adds_ + common_); } *ss << " @@\n"; } size_t left_start_, right_start_; size_t adds_, removes_, common_; std::list > hunk_, hunk_adds_, hunk_removes_; }; } // namespace // Create a list of diff hunks in Unified diff format. // Each hunk has a header generated by PrintHeader above plus a body with // lines prefixed with ' ' for no change, '-' for deletion and '+' for // addition. // 'context' represents the desired unchanged prefix/suffix around the diff. // If two hunks are close enough that their contexts overlap, then they are // joined into one hunk. std::string CreateUnifiedDiff(const std::vector& left, const std::vector& right, size_t context) { const std::vector edits = CalculateOptimalEdits(left, right); size_t l_i = 0, r_i = 0, edit_i = 0; std::stringstream ss; while (edit_i < edits.size()) { // Find first edit. while (edit_i < edits.size() && edits[edit_i] == kMatch) { ++l_i; ++r_i; ++edit_i; } // Find the first line to include in the hunk. const size_t prefix_context = std::min(l_i, context); Hunk hunk(l_i - prefix_context + 1, r_i - prefix_context + 1); for (size_t i = prefix_context; i > 0; --i) { hunk.PushLine(' ', left[l_i - i].c_str()); } // Iterate the edits until we found enough suffix for the hunk or the input // is over. size_t n_suffix = 0; for (; edit_i < edits.size(); ++edit_i) { if (n_suffix >= context) { // Continue only if the next hunk is very close. std::vector::const_iterator it = edits.begin() + edit_i; while (it != edits.end() && *it == kMatch) ++it; if (it == edits.end() || (it - edits.begin()) - edit_i >= context) { // There is no next edit or it is too far away. break; } } EditType edit = edits[edit_i]; // Reset count when a non match is found. n_suffix = edit == kMatch ? n_suffix + 1 : 0; if (edit == kMatch || edit == kRemove || edit == kReplace) { hunk.PushLine(edit == kMatch ? ' ' : '-', left[l_i].c_str()); } if (edit == kAdd || edit == kReplace) { hunk.PushLine('+', right[r_i].c_str()); } // Advance indices, depending on edit type. l_i += edit != kAdd; r_i += edit != kRemove; } if (!hunk.has_edits()) { // We are done. We don't want this hunk. break; } hunk.PrintTo(&ss); } return ss.str(); } } // namespace edit_distance namespace { // The string representation of the values received in EqFailure() are already // escaped. Split them on escaped '\n' boundaries. Leave all other escaped // characters the same. std::vector SplitEscapedString(const std::string& str) { std::vector lines; size_t start = 0, end = str.size(); if (end > 2 && str[0] == '"' && str[end - 1] == '"') { ++start; --end; } bool escaped = false; for (size_t i = start; i + 1 < end; ++i) { if (escaped) { escaped = false; if (str[i] == 'n') { lines.push_back(str.substr(start, i - start - 1)); start = i + 1; } } else { escaped = str[i] == '\\'; } } lines.push_back(str.substr(start, end - start)); return lines; } } // namespace // Constructs and returns the message for an equality assertion // (e.g. ASSERT_EQ, EXPECT_STREQ, etc) failure. // // The first four parameters are the expressions used in the assertion // and their values, as strings. For example, for ASSERT_EQ(foo, bar) // where foo is 5 and bar is 6, we have: // // lhs_expression: "foo" // rhs_expression: "bar" // lhs_value: "5" // rhs_value: "6" // // The ignoring_case parameter is true iff the assertion is a // *_STRCASEEQ*. When it's true, the string "Ignoring case" will // be inserted into the message. AssertionResult EqFailure(const char* lhs_expression, const char* rhs_expression, const std::string& lhs_value, const std::string& rhs_value, bool ignoring_case) { Message msg; msg << "Expected equality of these values:"; msg << "\n " << lhs_expression; if (lhs_value != lhs_expression) { msg << "\n Which is: " << lhs_value; } msg << "\n " << rhs_expression; if (rhs_value != rhs_expression) { msg << "\n Which is: " << rhs_value; } if (ignoring_case) { msg << "\nIgnoring case"; } if (!lhs_value.empty() && !rhs_value.empty()) { const std::vector lhs_lines = SplitEscapedString(lhs_value); const std::vector rhs_lines = SplitEscapedString(rhs_value); if (lhs_lines.size() > 1 || rhs_lines.size() > 1) { msg << "\nWith diff:\n" << edit_distance::CreateUnifiedDiff(lhs_lines, rhs_lines); } } return AssertionFailure() << msg; } // Constructs a failure message for Boolean assertions such as EXPECT_TRUE. std::string GetBoolAssertionFailureMessage( const AssertionResult& assertion_result, const char* expression_text, const char* actual_predicate_value, const char* expected_predicate_value) { const char* actual_message = assertion_result.message(); Message msg; msg << "Value of: " << expression_text << "\n Actual: " << actual_predicate_value; if (actual_message[0] != '\0') msg << " (" << actual_message << ")"; msg << "\nExpected: " << expected_predicate_value; return msg.GetString(); } // Helper function for implementing ASSERT_NEAR. AssertionResult DoubleNearPredFormat(const char* expr1, const char* expr2, const char* abs_error_expr, double val1, double val2, double abs_error) { const double diff = fabs(val1 - val2); if (diff <= abs_error) return AssertionSuccess(); // FIXME: do not print the value of an expression if it's // already a literal. return AssertionFailure() << "The difference between " << expr1 << " and " << expr2 << " is " << diff << ", which exceeds " << abs_error_expr << ", where\n" << expr1 << " evaluates to " << val1 << ",\n" << expr2 << " evaluates to " << val2 << ", and\n" << abs_error_expr << " evaluates to " << abs_error << "."; } // Helper template for implementing FloatLE() and DoubleLE(). template AssertionResult FloatingPointLE(const char* expr1, const char* expr2, RawType val1, RawType val2) { // Returns success if val1 is less than val2, if (val1 < val2) { return AssertionSuccess(); } // or if val1 is almost equal to val2. const FloatingPoint lhs(val1), rhs(val2); if (lhs.AlmostEquals(rhs)) { return AssertionSuccess(); } // Note that the above two checks will both fail if either val1 or // val2 is NaN, as the IEEE floating-point standard requires that // any predicate involving a NaN must return false. ::std::stringstream val1_ss; val1_ss << std::setprecision(std::numeric_limits::digits10 + 2) << val1; ::std::stringstream val2_ss; val2_ss << std::setprecision(std::numeric_limits::digits10 + 2) << val2; return AssertionFailure() << "Expected: (" << expr1 << ") <= (" << expr2 << ")\n" << " Actual: " << StringStreamToString(&val1_ss) << " vs " << StringStreamToString(&val2_ss); } } // namespace internal // Asserts that val1 is less than, or almost equal to, val2. Fails // otherwise. In particular, it fails if either val1 or val2 is NaN. AssertionResult FloatLE(const char* expr1, const char* expr2, float val1, float val2) { return internal::FloatingPointLE(expr1, expr2, val1, val2); } // Asserts that val1 is less than, or almost equal to, val2. Fails // otherwise. In particular, it fails if either val1 or val2 is NaN. AssertionResult DoubleLE(const char* expr1, const char* expr2, double val1, double val2) { return internal::FloatingPointLE(expr1, expr2, val1, val2); } namespace internal { // The helper function for {ASSERT|EXPECT}_EQ with int or enum // arguments. AssertionResult CmpHelperEQ(const char* lhs_expression, const char* rhs_expression, BiggestInt lhs, BiggestInt rhs) { if (lhs == rhs) { return AssertionSuccess(); } return EqFailure(lhs_expression, rhs_expression, FormatForComparisonFailureMessage(lhs, rhs), FormatForComparisonFailureMessage(rhs, lhs), false); } // A macro for implementing the helper functions needed to implement // ASSERT_?? and EXPECT_?? with integer or enum arguments. It is here // just to avoid copy-and-paste of similar code. #define GTEST_IMPL_CMP_HELPER_(op_name, op)\ AssertionResult CmpHelper##op_name(const char* expr1, const char* expr2, \ BiggestInt val1, BiggestInt val2) {\ if (val1 op val2) {\ return AssertionSuccess();\ } else {\ return AssertionFailure() \ << "Expected: (" << expr1 << ") " #op " (" << expr2\ << "), actual: " << FormatForComparisonFailureMessage(val1, val2)\ << " vs " << FormatForComparisonFailureMessage(val2, val1);\ }\ } // Implements the helper function for {ASSERT|EXPECT}_NE with int or // enum arguments. GTEST_IMPL_CMP_HELPER_(NE, !=) // Implements the helper function for {ASSERT|EXPECT}_LE with int or // enum arguments. GTEST_IMPL_CMP_HELPER_(LE, <=) // Implements the helper function for {ASSERT|EXPECT}_LT with int or // enum arguments. GTEST_IMPL_CMP_HELPER_(LT, < ) // Implements the helper function for {ASSERT|EXPECT}_GE with int or // enum arguments. GTEST_IMPL_CMP_HELPER_(GE, >=) // Implements the helper function for {ASSERT|EXPECT}_GT with int or // enum arguments. GTEST_IMPL_CMP_HELPER_(GT, > ) #undef GTEST_IMPL_CMP_HELPER_ // The helper function for {ASSERT|EXPECT}_STREQ. AssertionResult CmpHelperSTREQ(const char* lhs_expression, const char* rhs_expression, const char* lhs, const char* rhs) { if (String::CStringEquals(lhs, rhs)) { return AssertionSuccess(); } return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs), PrintToString(rhs), false); } // The helper function for {ASSERT|EXPECT}_STRCASEEQ. AssertionResult CmpHelperSTRCASEEQ(const char* lhs_expression, const char* rhs_expression, const char* lhs, const char* rhs) { if (String::CaseInsensitiveCStringEquals(lhs, rhs)) { return AssertionSuccess(); } return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs), PrintToString(rhs), true); } // The helper function for {ASSERT|EXPECT}_STRNE. AssertionResult CmpHelperSTRNE(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2) { if (!String::CStringEquals(s1, s2)) { return AssertionSuccess(); } else { return AssertionFailure() << "Expected: (" << s1_expression << ") != (" << s2_expression << "), actual: \"" << s1 << "\" vs \"" << s2 << "\""; } } // The helper function for {ASSERT|EXPECT}_STRCASENE. AssertionResult CmpHelperSTRCASENE(const char* s1_expression, const char* s2_expression, const char* s1, const char* s2) { if (!String::CaseInsensitiveCStringEquals(s1, s2)) { return AssertionSuccess(); } else { return AssertionFailure() << "Expected: (" << s1_expression << ") != (" << s2_expression << ") (ignoring case), actual: \"" << s1 << "\" vs \"" << s2 << "\""; } } } // namespace internal namespace { // Helper functions for implementing IsSubString() and IsNotSubstring(). // This group of overloaded functions return true iff needle is a // substring of haystack. NULL is considered a substring of itself // only. bool IsSubstringPred(const char* needle, const char* haystack) { if (needle == NULL || haystack == NULL) return needle == haystack; return strstr(haystack, needle) != NULL; } bool IsSubstringPred(const wchar_t* needle, const wchar_t* haystack) { if (needle == NULL || haystack == NULL) return needle == haystack; return wcsstr(haystack, needle) != NULL; } // StringType here can be either ::std::string or ::std::wstring. template bool IsSubstringPred(const StringType& needle, const StringType& haystack) { return haystack.find(needle) != StringType::npos; } // This function implements either IsSubstring() or IsNotSubstring(), // depending on the value of the expected_to_be_substring parameter. // StringType here can be const char*, const wchar_t*, ::std::string, // or ::std::wstring. template AssertionResult IsSubstringImpl( bool expected_to_be_substring, const char* needle_expr, const char* haystack_expr, const StringType& needle, const StringType& haystack) { if (IsSubstringPred(needle, haystack) == expected_to_be_substring) return AssertionSuccess(); const bool is_wide_string = sizeof(needle[0]) > 1; const char* const begin_string_quote = is_wide_string ? "L\"" : "\""; return AssertionFailure() << "Value of: " << needle_expr << "\n" << " Actual: " << begin_string_quote << needle << "\"\n" << "Expected: " << (expected_to_be_substring ? "" : "not ") << "a substring of " << haystack_expr << "\n" << "Which is: " << begin_string_quote << haystack << "\""; } } // namespace // IsSubstring() and IsNotSubstring() check whether needle is a // substring of haystack (NULL is considered a substring of itself // only), and return an appropriate error message when they fail. AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const char* needle, const char* haystack) { return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const wchar_t* needle, const wchar_t* haystack) { return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const char* needle, const char* haystack) { return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const wchar_t* needle, const wchar_t* haystack) { return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const ::std::string& needle, const ::std::string& haystack) { return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const ::std::string& needle, const ::std::string& haystack) { return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); } #if GTEST_HAS_STD_WSTRING AssertionResult IsSubstring( const char* needle_expr, const char* haystack_expr, const ::std::wstring& needle, const ::std::wstring& haystack) { return IsSubstringImpl(true, needle_expr, haystack_expr, needle, haystack); } AssertionResult IsNotSubstring( const char* needle_expr, const char* haystack_expr, const ::std::wstring& needle, const ::std::wstring& haystack) { return IsSubstringImpl(false, needle_expr, haystack_expr, needle, haystack); } #endif // GTEST_HAS_STD_WSTRING namespace internal { #if GTEST_OS_WINDOWS namespace { // Helper function for IsHRESULT{SuccessFailure} predicates AssertionResult HRESULTFailureHelper(const char* expr, const char* expected, long hr) { // NOLINT # if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_WINDOWS_TV_TITLE // Windows CE doesn't support FormatMessage. const char error_text[] = ""; # else // Looks up the human-readable system message for the HRESULT code // and since we're not passing any params to FormatMessage, we don't // want inserts expanded. const DWORD kFlags = FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS; const DWORD kBufSize = 4096; // Gets the system's human readable message string for this HRESULT. char error_text[kBufSize] = { '\0' }; DWORD message_length = ::FormatMessageA(kFlags, 0, // no source, we're asking system hr, // the error 0, // no line width restrictions error_text, // output buffer kBufSize, // buf size NULL); // no arguments for inserts // Trims tailing white space (FormatMessage leaves a trailing CR-LF) for (; message_length && IsSpace(error_text[message_length - 1]); --message_length) { error_text[message_length - 1] = '\0'; } # endif // GTEST_OS_WINDOWS_MOBILE const std::string error_hex("0x" + String::FormatHexInt(hr)); return ::testing::AssertionFailure() << "Expected: " << expr << " " << expected << ".\n" << " Actual: " << error_hex << " " << error_text << "\n"; } } // namespace AssertionResult IsHRESULTSuccess(const char* expr, long hr) { // NOLINT if (SUCCEEDED(hr)) { return AssertionSuccess(); } return HRESULTFailureHelper(expr, "succeeds", hr); } AssertionResult IsHRESULTFailure(const char* expr, long hr) { // NOLINT if (FAILED(hr)) { return AssertionSuccess(); } return HRESULTFailureHelper(expr, "fails", hr); } #endif // GTEST_OS_WINDOWS // Utility functions for encoding Unicode text (wide strings) in // UTF-8. // A Unicode code-point can have up to 21 bits, and is encoded in UTF-8 // like this: // // Code-point length Encoding // 0 - 7 bits 0xxxxxxx // 8 - 11 bits 110xxxxx 10xxxxxx // 12 - 16 bits 1110xxxx 10xxxxxx 10xxxxxx // 17 - 21 bits 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx // The maximum code-point a one-byte UTF-8 sequence can represent. const UInt32 kMaxCodePoint1 = (static_cast(1) << 7) - 1; // The maximum code-point a two-byte UTF-8 sequence can represent. const UInt32 kMaxCodePoint2 = (static_cast(1) << (5 + 6)) - 1; // The maximum code-point a three-byte UTF-8 sequence can represent. const UInt32 kMaxCodePoint3 = (static_cast(1) << (4 + 2*6)) - 1; // The maximum code-point a four-byte UTF-8 sequence can represent. const UInt32 kMaxCodePoint4 = (static_cast(1) << (3 + 3*6)) - 1; // Chops off the n lowest bits from a bit pattern. Returns the n // lowest bits. As a side effect, the original bit pattern will be // shifted to the right by n bits. inline UInt32 ChopLowBits(UInt32* bits, int n) { const UInt32 low_bits = *bits & ((static_cast(1) << n) - 1); *bits >>= n; return low_bits; } // Converts a Unicode code point to a narrow string in UTF-8 encoding. // code_point parameter is of type UInt32 because wchar_t may not be // wide enough to contain a code point. // If the code_point is not a valid Unicode code point // (i.e. outside of Unicode range U+0 to U+10FFFF) it will be converted // to "(Invalid Unicode 0xXXXXXXXX)". std::string CodePointToUtf8(UInt32 code_point) { if (code_point > kMaxCodePoint4) { return "(Invalid Unicode 0x" + String::FormatHexInt(code_point) + ")"; } char str[5]; // Big enough for the largest valid code point. if (code_point <= kMaxCodePoint1) { str[1] = '\0'; str[0] = static_cast(code_point); // 0xxxxxxx } else if (code_point <= kMaxCodePoint2) { str[2] = '\0'; str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[0] = static_cast(0xC0 | code_point); // 110xxxxx } else if (code_point <= kMaxCodePoint3) { str[3] = '\0'; str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[0] = static_cast(0xE0 | code_point); // 1110xxxx } else { // code_point <= kMaxCodePoint4 str[4] = '\0'; str[3] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[2] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[1] = static_cast(0x80 | ChopLowBits(&code_point, 6)); // 10xxxxxx str[0] = static_cast(0xF0 | code_point); // 11110xxx } return str; } // The following two functions only make sense if the system // uses UTF-16 for wide string encoding. All supported systems // with 16 bit wchar_t (Windows, Cygwin, Symbian OS) do use UTF-16. // Determines if the arguments constitute UTF-16 surrogate pair // and thus should be combined into a single Unicode code point // using CreateCodePointFromUtf16SurrogatePair. inline bool IsUtf16SurrogatePair(wchar_t first, wchar_t second) { return sizeof(wchar_t) == 2 && (first & 0xFC00) == 0xD800 && (second & 0xFC00) == 0xDC00; } // Creates a Unicode code point from UTF16 surrogate pair. inline UInt32 CreateCodePointFromUtf16SurrogatePair(wchar_t first, wchar_t second) { const UInt32 mask = (1 << 10) - 1; return (sizeof(wchar_t) == 2) ? (((first & mask) << 10) | (second & mask)) + 0x10000 : // This function should not be called when the condition is // false, but we provide a sensible default in case it is. static_cast(first); } // Converts a wide string to a narrow string in UTF-8 encoding. // The wide string is assumed to have the following encoding: // UTF-16 if sizeof(wchar_t) == 2 (on Windows, Cygwin, Symbian OS) // UTF-32 if sizeof(wchar_t) == 4 (on Linux) // Parameter str points to a null-terminated wide string. // Parameter num_chars may additionally limit the number // of wchar_t characters processed. -1 is used when the entire string // should be processed. // If the string contains code points that are not valid Unicode code points // (i.e. outside of Unicode range U+0 to U+10FFFF) they will be output // as '(Invalid Unicode 0xXXXXXXXX)'. If the string is in UTF16 encoding // and contains invalid UTF-16 surrogate pairs, values in those pairs // will be encoded as individual Unicode characters from Basic Normal Plane. std::string WideStringToUtf8(const wchar_t* str, int num_chars) { if (num_chars == -1) num_chars = static_cast(wcslen(str)); ::std::stringstream stream; for (int i = 0; i < num_chars; ++i) { UInt32 unicode_code_point; if (str[i] == L'\0') { break; } else if (i + 1 < num_chars && IsUtf16SurrogatePair(str[i], str[i + 1])) { unicode_code_point = CreateCodePointFromUtf16SurrogatePair(str[i], str[i + 1]); i++; } else { unicode_code_point = static_cast(str[i]); } stream << CodePointToUtf8(unicode_code_point); } return StringStreamToString(&stream); } // Converts a wide C string to an std::string using the UTF-8 encoding. // NULL will be converted to "(null)". std::string String::ShowWideCString(const wchar_t * wide_c_str) { if (wide_c_str == NULL) return "(null)"; return internal::WideStringToUtf8(wide_c_str, -1); } // Compares two wide C strings. Returns true iff they have the same // content. // // Unlike wcscmp(), this function can handle NULL argument(s). A NULL // C string is considered different to any non-NULL C string, // including the empty string. bool String::WideCStringEquals(const wchar_t * lhs, const wchar_t * rhs) { if (lhs == NULL) return rhs == NULL; if (rhs == NULL) return false; return wcscmp(lhs, rhs) == 0; } // Helper function for *_STREQ on wide strings. AssertionResult CmpHelperSTREQ(const char* lhs_expression, const char* rhs_expression, const wchar_t* lhs, const wchar_t* rhs) { if (String::WideCStringEquals(lhs, rhs)) { return AssertionSuccess(); } return EqFailure(lhs_expression, rhs_expression, PrintToString(lhs), PrintToString(rhs), false); } // Helper function for *_STRNE on wide strings. AssertionResult CmpHelperSTRNE(const char* s1_expression, const char* s2_expression, const wchar_t* s1, const wchar_t* s2) { if (!String::WideCStringEquals(s1, s2)) { return AssertionSuccess(); } return AssertionFailure() << "Expected: (" << s1_expression << ") != (" << s2_expression << "), actual: " << PrintToString(s1) << " vs " << PrintToString(s2); } // Compares two C strings, ignoring case. Returns true iff they have // the same content. // // Unlike strcasecmp(), this function can handle NULL argument(s). A // NULL C string is considered different to any non-NULL C string, // including the empty string. bool String::CaseInsensitiveCStringEquals(const char * lhs, const char * rhs) { if (lhs == NULL) return rhs == NULL; if (rhs == NULL) return false; return posix::StrCaseCmp(lhs, rhs) == 0; } // Compares two wide C strings, ignoring case. Returns true iff they // have the same content. // // Unlike wcscasecmp(), this function can handle NULL argument(s). // A NULL C string is considered different to any non-NULL wide C string, // including the empty string. // NB: The implementations on different platforms slightly differ. // On windows, this method uses _wcsicmp which compares according to LC_CTYPE // environment variable. On GNU platform this method uses wcscasecmp // which compares according to LC_CTYPE category of the current locale. // On MacOS X, it uses towlower, which also uses LC_CTYPE category of the // current locale. bool String::CaseInsensitiveWideCStringEquals(const wchar_t* lhs, const wchar_t* rhs) { if (lhs == NULL) return rhs == NULL; if (rhs == NULL) return false; #if GTEST_OS_WINDOWS return _wcsicmp(lhs, rhs) == 0; #elif GTEST_OS_LINUX && !GTEST_OS_LINUX_ANDROID return wcscasecmp(lhs, rhs) == 0; #else // Android, Mac OS X and Cygwin don't define wcscasecmp. // Other unknown OSes may not define it either. wint_t left, right; do { left = towlower(*lhs++); right = towlower(*rhs++); } while (left && left == right); return left == right; #endif // OS selector } // Returns true iff str ends with the given suffix, ignoring case. // Any string is considered to end with an empty suffix. bool String::EndsWithCaseInsensitive( const std::string& str, const std::string& suffix) { const size_t str_len = str.length(); const size_t suffix_len = suffix.length(); return (str_len >= suffix_len) && CaseInsensitiveCStringEquals(str.c_str() + str_len - suffix_len, suffix.c_str()); } // Formats an int value as "%02d". std::string String::FormatIntWidth2(int value) { std::stringstream ss; ss << std::setfill('0') << std::setw(2) << value; return ss.str(); } // Formats an int value as "%X". std::string String::FormatHexInt(int value) { std::stringstream ss; ss << std::hex << std::uppercase << value; return ss.str(); } // Formats a byte as "%02X". std::string String::FormatByte(unsigned char value) { std::stringstream ss; ss << std::setfill('0') << std::setw(2) << std::hex << std::uppercase << static_cast(value); return ss.str(); } // Converts the buffer in a stringstream to an std::string, converting NUL // bytes to "\\0" along the way. std::string StringStreamToString(::std::stringstream* ss) { const ::std::string& str = ss->str(); const char* const start = str.c_str(); const char* const end = start + str.length(); std::string result; result.reserve(2 * (end - start)); for (const char* ch = start; ch != end; ++ch) { if (*ch == '\0') { result += "\\0"; // Replaces NUL with "\\0"; } else { result += *ch; } } return result; } // Appends the user-supplied message to the Google-Test-generated message. std::string AppendUserMessage(const std::string& gtest_msg, const Message& user_msg) { // Appends the user message if it's non-empty. const std::string user_msg_string = user_msg.GetString(); if (user_msg_string.empty()) { return gtest_msg; } return gtest_msg + "\n" + user_msg_string; } } // namespace internal // class TestResult // Creates an empty TestResult. TestResult::TestResult() : death_test_count_(0), elapsed_time_(0) { } // D'tor. TestResult::~TestResult() { } // Returns the i-th test part result among all the results. i can // range from 0 to total_part_count() - 1. If i is not in that range, // aborts the program. const TestPartResult& TestResult::GetTestPartResult(int i) const { if (i < 0 || i >= total_part_count()) internal::posix::Abort(); return test_part_results_.at(i); } // Returns the i-th test property. i can range from 0 to // test_property_count() - 1. If i is not in that range, aborts the // program. const TestProperty& TestResult::GetTestProperty(int i) const { if (i < 0 || i >= test_property_count()) internal::posix::Abort(); return test_properties_.at(i); } // Clears the test part results. void TestResult::ClearTestPartResults() { test_part_results_.clear(); } // Adds a test part result to the list. void TestResult::AddTestPartResult(const TestPartResult& test_part_result) { test_part_results_.push_back(test_part_result); } // Adds a test property to the list. If a property with the same key as the // supplied property is already represented, the value of this test_property // replaces the old value for that key. void TestResult::RecordProperty(const std::string& xml_element, const TestProperty& test_property) { if (!ValidateTestProperty(xml_element, test_property)) { return; } internal::MutexLock lock(&test_properites_mutex_); const std::vector::iterator property_with_matching_key = std::find_if(test_properties_.begin(), test_properties_.end(), internal::TestPropertyKeyIs(test_property.key())); if (property_with_matching_key == test_properties_.end()) { test_properties_.push_back(test_property); return; } property_with_matching_key->SetValue(test_property.value()); } // The list of reserved attributes used in the element of XML // output. static const char* const kReservedTestSuitesAttributes[] = { "disabled", "errors", "failures", "name", "random_seed", "tests", "time", "timestamp" }; // The list of reserved attributes used in the element of XML // output. static const char* const kReservedTestSuiteAttributes[] = { "disabled", "errors", "failures", "name", "tests", "time" }; // The list of reserved attributes used in the element of XML output. static const char* const kReservedTestCaseAttributes[] = { "classname", "name", "status", "time", "type_param", "value_param", "file", "line"}; template std::vector ArrayAsVector(const char* const (&array)[kSize]) { return std::vector(array, array + kSize); } static std::vector GetReservedAttributesForElement( const std::string& xml_element) { if (xml_element == "testsuites") { return ArrayAsVector(kReservedTestSuitesAttributes); } else if (xml_element == "testsuite") { return ArrayAsVector(kReservedTestSuiteAttributes); } else if (xml_element == "testcase") { return ArrayAsVector(kReservedTestCaseAttributes); } else { GTEST_CHECK_(false) << "Unrecognized xml_element provided: " << xml_element; } // This code is unreachable but some compilers may not realizes that. return std::vector(); } static std::string FormatWordList(const std::vector& words) { Message word_list; for (size_t i = 0; i < words.size(); ++i) { if (i > 0 && words.size() > 2) { word_list << ", "; } if (i == words.size() - 1) { word_list << "and "; } word_list << "'" << words[i] << "'"; } return word_list.GetString(); } static bool ValidateTestPropertyName( const std::string& property_name, const std::vector& reserved_names) { if (std::find(reserved_names.begin(), reserved_names.end(), property_name) != reserved_names.end()) { ADD_FAILURE() << "Reserved key used in RecordProperty(): " << property_name << " (" << FormatWordList(reserved_names) << " are reserved by " << GTEST_NAME_ << ")"; return false; } return true; } // Adds a failure if the key is a reserved attribute of the element named // xml_element. Returns true if the property is valid. bool TestResult::ValidateTestProperty(const std::string& xml_element, const TestProperty& test_property) { return ValidateTestPropertyName(test_property.key(), GetReservedAttributesForElement(xml_element)); } // Clears the object. void TestResult::Clear() { test_part_results_.clear(); test_properties_.clear(); death_test_count_ = 0; elapsed_time_ = 0; } // Returns true iff the test failed. bool TestResult::Failed() const { for (int i = 0; i < total_part_count(); ++i) { if (GetTestPartResult(i).failed()) return true; } return false; } // Returns true iff the test part fatally failed. static bool TestPartFatallyFailed(const TestPartResult& result) { return result.fatally_failed(); } // Returns true iff the test fatally failed. bool TestResult::HasFatalFailure() const { return CountIf(test_part_results_, TestPartFatallyFailed) > 0; } // Returns true iff the test part non-fatally failed. static bool TestPartNonfatallyFailed(const TestPartResult& result) { return result.nonfatally_failed(); } // Returns true iff the test has a non-fatal failure. bool TestResult::HasNonfatalFailure() const { return CountIf(test_part_results_, TestPartNonfatallyFailed) > 0; } // Gets the number of all test parts. This is the sum of the number // of successful test parts and the number of failed test parts. int TestResult::total_part_count() const { return static_cast(test_part_results_.size()); } // Returns the number of the test properties. int TestResult::test_property_count() const { return static_cast(test_properties_.size()); } // class Test // Creates a Test object. // The c'tor saves the states of all flags. Test::Test() : gtest_flag_saver_(new GTEST_FLAG_SAVER_) { } // The d'tor restores the states of all flags. The actual work is // done by the d'tor of the gtest_flag_saver_ field, and thus not // visible here. Test::~Test() { } // Sets up the test fixture. // // A sub-class may override this. void Test::SetUp() { } // Tears down the test fixture. // // A sub-class may override this. void Test::TearDown() { } // Allows user supplied key value pairs to be recorded for later output. void Test::RecordProperty(const std::string& key, const std::string& value) { UnitTest::GetInstance()->RecordProperty(key, value); } // Allows user supplied key value pairs to be recorded for later output. void Test::RecordProperty(const std::string& key, int value) { Message value_message; value_message << value; RecordProperty(key, value_message.GetString().c_str()); } namespace internal { void ReportFailureInUnknownLocation(TestPartResult::Type result_type, const std::string& message) { // This function is a friend of UnitTest and as such has access to // AddTestPartResult. UnitTest::GetInstance()->AddTestPartResult( result_type, NULL, // No info about the source file where the exception occurred. -1, // We have no info on which line caused the exception. message, ""); // No stack trace, either. } } // namespace internal // Google Test requires all tests in the same test case to use the same test // fixture class. This function checks if the current test has the // same fixture class as the first test in the current test case. If // yes, it returns true; otherwise it generates a Google Test failure and // returns false. bool Test::HasSameFixtureClass() { internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); const TestCase* const test_case = impl->current_test_case(); // Info about the first test in the current test case. const TestInfo* const first_test_info = test_case->test_info_list()[0]; const internal::TypeId first_fixture_id = first_test_info->fixture_class_id_; const char* const first_test_name = first_test_info->name(); // Info about the current test. const TestInfo* const this_test_info = impl->current_test_info(); const internal::TypeId this_fixture_id = this_test_info->fixture_class_id_; const char* const this_test_name = this_test_info->name(); if (this_fixture_id != first_fixture_id) { // Is the first test defined using TEST? const bool first_is_TEST = first_fixture_id == internal::GetTestTypeId(); // Is this test defined using TEST? const bool this_is_TEST = this_fixture_id == internal::GetTestTypeId(); if (first_is_TEST || this_is_TEST) { // Both TEST and TEST_F appear in same test case, which is incorrect. // Tell the user how to fix this. // Gets the name of the TEST and the name of the TEST_F. Note // that first_is_TEST and this_is_TEST cannot both be true, as // the fixture IDs are different for the two tests. const char* const TEST_name = first_is_TEST ? first_test_name : this_test_name; const char* const TEST_F_name = first_is_TEST ? this_test_name : first_test_name; ADD_FAILURE() << "All tests in the same test case must use the same test fixture\n" << "class, so mixing TEST_F and TEST in the same test case is\n" << "illegal. In test case " << this_test_info->test_case_name() << ",\n" << "test " << TEST_F_name << " is defined using TEST_F but\n" << "test " << TEST_name << " is defined using TEST. You probably\n" << "want to change the TEST to TEST_F or move it to another test\n" << "case."; } else { // Two fixture classes with the same name appear in two different // namespaces, which is not allowed. Tell the user how to fix this. ADD_FAILURE() << "All tests in the same test case must use the same test fixture\n" << "class. However, in test case " << this_test_info->test_case_name() << ",\n" << "you defined test " << first_test_name << " and test " << this_test_name << "\n" << "using two different test fixture classes. This can happen if\n" << "the two classes are from different namespaces or translation\n" << "units and have the same name. You should probably rename one\n" << "of the classes to put the tests into different test cases."; } return false; } return true; } #if GTEST_HAS_SEH // Adds an "exception thrown" fatal failure to the current test. This // function returns its result via an output parameter pointer because VC++ // prohibits creation of objects with destructors on stack in functions // using __try (see error C2712). static std::string* FormatSehExceptionMessage(DWORD exception_code, const char* location) { Message message; message << "SEH exception with code 0x" << std::setbase(16) << exception_code << std::setbase(10) << " thrown in " << location << "."; return new std::string(message.GetString()); } #endif // GTEST_HAS_SEH namespace internal { #if GTEST_HAS_EXCEPTIONS // Adds an "exception thrown" fatal failure to the current test. static std::string FormatCxxExceptionMessage(const char* description, const char* location) { Message message; if (description != NULL) { message << "C++ exception with description \"" << description << "\""; } else { message << "Unknown C++ exception"; } message << " thrown in " << location << "."; return message.GetString(); } static std::string PrintTestPartResultToString( const TestPartResult& test_part_result); GoogleTestFailureException::GoogleTestFailureException( const TestPartResult& failure) : ::std::runtime_error(PrintTestPartResultToString(failure).c_str()) {} #endif // GTEST_HAS_EXCEPTIONS // We put these helper functions in the internal namespace as IBM's xlC // compiler rejects the code if they were declared static. // Runs the given method and handles SEH exceptions it throws, when // SEH is supported; returns the 0-value for type Result in case of an // SEH exception. (Microsoft compilers cannot handle SEH and C++ // exceptions in the same function. Therefore, we provide a separate // wrapper function for handling SEH exceptions.) template Result HandleSehExceptionsInMethodIfSupported( T* object, Result (T::*method)(), const char* location) { #if GTEST_HAS_SEH __try { return (object->*method)(); } __except (internal::UnitTestOptions::GTestShouldProcessSEH( // NOLINT GetExceptionCode())) { // We create the exception message on the heap because VC++ prohibits // creation of objects with destructors on stack in functions using __try // (see error C2712). std::string* exception_message = FormatSehExceptionMessage( GetExceptionCode(), location); internal::ReportFailureInUnknownLocation(TestPartResult::kFatalFailure, *exception_message); delete exception_message; return static_cast(0); } #else (void)location; return (object->*method)(); #endif // GTEST_HAS_SEH } // Runs the given method and catches and reports C++ and/or SEH-style // exceptions, if they are supported; returns the 0-value for type // Result in case of an SEH exception. template Result HandleExceptionsInMethodIfSupported( T* object, Result (T::*method)(), const char* location) { // NOTE: The user code can affect the way in which Google Test handles // exceptions by setting GTEST_FLAG(catch_exceptions), but only before // RUN_ALL_TESTS() starts. It is technically possible to check the flag // after the exception is caught and either report or re-throw the // exception based on the flag's value: // // try { // // Perform the test method. // } catch (...) { // if (GTEST_FLAG(catch_exceptions)) // // Report the exception as failure. // else // throw; // Re-throws the original exception. // } // // However, the purpose of this flag is to allow the program to drop into // the debugger when the exception is thrown. On most platforms, once the // control enters the catch block, the exception origin information is // lost and the debugger will stop the program at the point of the // re-throw in this function -- instead of at the point of the original // throw statement in the code under test. For this reason, we perform // the check early, sacrificing the ability to affect Google Test's // exception handling in the method where the exception is thrown. if (internal::GetUnitTestImpl()->catch_exceptions()) { #if GTEST_HAS_EXCEPTIONS try { return HandleSehExceptionsInMethodIfSupported(object, method, location); } catch (const AssertionException&) { // NOLINT // This failure was reported already. } catch (const internal::GoogleTestFailureException&) { // NOLINT // This exception type can only be thrown by a failed Google // Test assertion with the intention of letting another testing // framework catch it. Therefore we just re-throw it. throw; } catch (const std::exception& e) { // NOLINT internal::ReportFailureInUnknownLocation( TestPartResult::kFatalFailure, FormatCxxExceptionMessage(e.what(), location)); } catch (...) { // NOLINT internal::ReportFailureInUnknownLocation( TestPartResult::kFatalFailure, FormatCxxExceptionMessage(NULL, location)); } return static_cast(0); #else return HandleSehExceptionsInMethodIfSupported(object, method, location); #endif // GTEST_HAS_EXCEPTIONS } else { return (object->*method)(); } } } // namespace internal // Runs the test and updates the test result. void Test::Run() { if (!HasSameFixtureClass()) return; internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported(this, &Test::SetUp, "SetUp()"); // We will run the test only if SetUp() was successful. if (!HasFatalFailure()) { impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported( this, &Test::TestBody, "the test body"); } // However, we want to clean up as much as possible. Hence we will // always call TearDown(), even if SetUp() or the test body has // failed. impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported( this, &Test::TearDown, "TearDown()"); } // Returns true iff the current test has a fatal failure. bool Test::HasFatalFailure() { return internal::GetUnitTestImpl()->current_test_result()->HasFatalFailure(); } // Returns true iff the current test has a non-fatal failure. bool Test::HasNonfatalFailure() { return internal::GetUnitTestImpl()->current_test_result()-> HasNonfatalFailure(); } // class TestInfo // Constructs a TestInfo object. It assumes ownership of the test factory // object. TestInfo::TestInfo(const std::string& a_test_case_name, const std::string& a_name, const char* a_type_param, const char* a_value_param, internal::CodeLocation a_code_location, internal::TypeId fixture_class_id, internal::TestFactoryBase* factory) : test_case_name_(a_test_case_name), name_(a_name), type_param_(a_type_param ? new std::string(a_type_param) : NULL), value_param_(a_value_param ? new std::string(a_value_param) : NULL), location_(a_code_location), fixture_class_id_(fixture_class_id), should_run_(false), is_disabled_(false), matches_filter_(false), factory_(factory), result_() {} // Destructs a TestInfo object. TestInfo::~TestInfo() { delete factory_; } namespace internal { // Creates a new TestInfo object and registers it with Google Test; // returns the created object. // // Arguments: // // test_case_name: name of the test case // name: name of the test // type_param: the name of the test's type parameter, or NULL if // this is not a typed or a type-parameterized test. // value_param: text representation of the test's value parameter, // or NULL if this is not a value-parameterized test. // code_location: code location where the test is defined // fixture_class_id: ID of the test fixture class // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case // factory: pointer to the factory that creates a test object. // The newly created TestInfo instance will assume // ownership of the factory object. TestInfo* MakeAndRegisterTestInfo( const char* test_case_name, const char* name, const char* type_param, const char* value_param, CodeLocation code_location, TypeId fixture_class_id, SetUpTestCaseFunc set_up_tc, TearDownTestCaseFunc tear_down_tc, TestFactoryBase* factory) { TestInfo* const test_info = new TestInfo(test_case_name, name, type_param, value_param, code_location, fixture_class_id, factory); GetUnitTestImpl()->AddTestInfo(set_up_tc, tear_down_tc, test_info); return test_info; } void ReportInvalidTestCaseType(const char* test_case_name, CodeLocation code_location) { Message errors; errors << "Attempted redefinition of test case " << test_case_name << ".\n" << "All tests in the same test case must use the same test fixture\n" << "class. However, in test case " << test_case_name << ", you tried\n" << "to define a test using a fixture class different from the one\n" << "used earlier. This can happen if the two fixture classes are\n" << "from different namespaces and have the same name. You should\n" << "probably rename one of the classes to put the tests into different\n" << "test cases."; GTEST_LOG_(ERROR) << FormatFileLocation(code_location.file.c_str(), code_location.line) << " " << errors.GetString(); } } // namespace internal namespace { // A predicate that checks the test name of a TestInfo against a known // value. // // This is used for implementation of the TestCase class only. We put // it in the anonymous namespace to prevent polluting the outer // namespace. // // TestNameIs is copyable. class TestNameIs { public: // Constructor. // // TestNameIs has NO default constructor. explicit TestNameIs(const char* name) : name_(name) {} // Returns true iff the test name of test_info matches name_. bool operator()(const TestInfo * test_info) const { return test_info && test_info->name() == name_; } private: std::string name_; }; } // namespace namespace internal { // This method expands all parameterized tests registered with macros TEST_P // and INSTANTIATE_TEST_CASE_P into regular tests and registers those. // This will be done just once during the program runtime. void UnitTestImpl::RegisterParameterizedTests() { if (!parameterized_tests_registered_) { parameterized_test_registry_.RegisterTests(); parameterized_tests_registered_ = true; } } } // namespace internal // Creates the test object, runs it, records its result, and then // deletes it. void TestInfo::Run() { if (!should_run_) return; // Tells UnitTest where to store test result. internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); impl->set_current_test_info(this); TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); // Notifies the unit test event listeners that a test is about to start. repeater->OnTestStart(*this); const TimeInMillis start = internal::GetTimeInMillis(); impl->os_stack_trace_getter()->UponLeavingGTest(); // Creates the test object. Test* const test = internal::HandleExceptionsInMethodIfSupported( factory_, &internal::TestFactoryBase::CreateTest, "the test fixture's constructor"); // Runs the test if the constructor didn't generate a fatal failure. // Note that the object will not be null if (!Test::HasFatalFailure()) { // This doesn't throw as all user code that can throw are wrapped into // exception handling code. test->Run(); } // Deletes the test object. impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported( test, &Test::DeleteSelf_, "the test fixture's destructor"); result_.set_elapsed_time(internal::GetTimeInMillis() - start); // Notifies the unit test event listener that a test has just finished. repeater->OnTestEnd(*this); // Tells UnitTest to stop associating assertion results to this // test. impl->set_current_test_info(NULL); } // class TestCase // Gets the number of successful tests in this test case. int TestCase::successful_test_count() const { return CountIf(test_info_list_, TestPassed); } // Gets the number of failed tests in this test case. int TestCase::failed_test_count() const { return CountIf(test_info_list_, TestFailed); } // Gets the number of disabled tests that will be reported in the XML report. int TestCase::reportable_disabled_test_count() const { return CountIf(test_info_list_, TestReportableDisabled); } // Gets the number of disabled tests in this test case. int TestCase::disabled_test_count() const { return CountIf(test_info_list_, TestDisabled); } // Gets the number of tests to be printed in the XML report. int TestCase::reportable_test_count() const { return CountIf(test_info_list_, TestReportable); } // Get the number of tests in this test case that should run. int TestCase::test_to_run_count() const { return CountIf(test_info_list_, ShouldRunTest); } // Gets the number of all tests. int TestCase::total_test_count() const { return static_cast(test_info_list_.size()); } // Creates a TestCase with the given name. // // Arguments: // // name: name of the test case // a_type_param: the name of the test case's type parameter, or NULL if // this is not a typed or a type-parameterized test case. // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case TestCase::TestCase(const char* a_name, const char* a_type_param, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc) : name_(a_name), type_param_(a_type_param ? new std::string(a_type_param) : NULL), set_up_tc_(set_up_tc), tear_down_tc_(tear_down_tc), should_run_(false), elapsed_time_(0) { } // Destructor of TestCase. TestCase::~TestCase() { // Deletes every Test in the collection. ForEach(test_info_list_, internal::Delete); } // Returns the i-th test among all the tests. i can range from 0 to // total_test_count() - 1. If i is not in that range, returns NULL. const TestInfo* TestCase::GetTestInfo(int i) const { const int index = GetElementOr(test_indices_, i, -1); return index < 0 ? NULL : test_info_list_[index]; } // Returns the i-th test among all the tests. i can range from 0 to // total_test_count() - 1. If i is not in that range, returns NULL. TestInfo* TestCase::GetMutableTestInfo(int i) { const int index = GetElementOr(test_indices_, i, -1); return index < 0 ? NULL : test_info_list_[index]; } // Adds a test to this test case. Will delete the test upon // destruction of the TestCase object. void TestCase::AddTestInfo(TestInfo * test_info) { test_info_list_.push_back(test_info); test_indices_.push_back(static_cast(test_indices_.size())); } // Runs every test in this TestCase. void TestCase::Run() { if (!should_run_) return; internal::UnitTestImpl* const impl = internal::GetUnitTestImpl(); impl->set_current_test_case(this); TestEventListener* repeater = UnitTest::GetInstance()->listeners().repeater(); repeater->OnTestCaseStart(*this); impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported( this, &TestCase::RunSetUpTestCase, "SetUpTestCase()"); const internal::TimeInMillis start = internal::GetTimeInMillis(); for (int i = 0; i < total_test_count(); i++) { GetMutableTestInfo(i)->Run(); } elapsed_time_ = internal::GetTimeInMillis() - start; impl->os_stack_trace_getter()->UponLeavingGTest(); internal::HandleExceptionsInMethodIfSupported( this, &TestCase::RunTearDownTestCase, "TearDownTestCase()"); repeater->OnTestCaseEnd(*this); impl->set_current_test_case(NULL); } // Clears the results of all tests in this test case. void TestCase::ClearResult() { ad_hoc_test_result_.Clear(); ForEach(test_info_list_, TestInfo::ClearTestResult); } // Shuffles the tests in this test case. void TestCase::ShuffleTests(internal::Random* random) { Shuffle(random, &test_indices_); } // Restores the test order to before the first shuffle. void TestCase::UnshuffleTests() { for (size_t i = 0; i < test_indices_.size(); i++) { test_indices_[i] = static_cast(i); } } // Formats a countable noun. Depending on its quantity, either the // singular form or the plural form is used. e.g. // // FormatCountableNoun(1, "formula", "formuli") returns "1 formula". // FormatCountableNoun(5, "book", "books") returns "5 books". static std::string FormatCountableNoun(int count, const char * singular_form, const char * plural_form) { return internal::StreamableToString(count) + " " + (count == 1 ? singular_form : plural_form); } // Formats the count of tests. static std::string FormatTestCount(int test_count) { return FormatCountableNoun(test_count, "test", "tests"); } // Formats the count of test cases. static std::string FormatTestCaseCount(int test_case_count) { return FormatCountableNoun(test_case_count, "test case", "test cases"); } // Converts a TestPartResult::Type enum to human-friendly string // representation. Both kNonFatalFailure and kFatalFailure are translated // to "Failure", as the user usually doesn't care about the difference // between the two when viewing the test result. static const char * TestPartResultTypeToString(TestPartResult::Type type) { switch (type) { case TestPartResult::kSuccess: return "Success"; case TestPartResult::kNonFatalFailure: case TestPartResult::kFatalFailure: #ifdef _MSC_VER return "error: "; #else return "Failure\n"; #endif default: return "Unknown result type"; } } namespace internal { // Prints a TestPartResult to an std::string. static std::string PrintTestPartResultToString( const TestPartResult& test_part_result) { return (Message() << internal::FormatFileLocation(test_part_result.file_name(), test_part_result.line_number()) << " " << TestPartResultTypeToString(test_part_result.type()) << test_part_result.message()).GetString(); } // Prints a TestPartResult. static void PrintTestPartResult(const TestPartResult& test_part_result) { const std::string& result = PrintTestPartResultToString(test_part_result); printf("%s\n", result.c_str()); fflush(stdout); // If the test program runs in Visual Studio or a debugger, the // following statements add the test part result message to the Output // window such that the user can double-click on it to jump to the // corresponding source code location; otherwise they do nothing. #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE // We don't call OutputDebugString*() on Windows Mobile, as printing // to stdout is done by OutputDebugString() there already - we don't // want the same message printed twice. ::OutputDebugStringA(result.c_str()); ::OutputDebugStringA("\n"); #endif } // class PrettyUnitTestResultPrinter enum GTestColor { COLOR_DEFAULT, COLOR_RED, COLOR_GREEN, COLOR_YELLOW }; #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW // Returns the character attribute for the given color. static WORD GetColorAttribute(GTestColor color) { switch (color) { case COLOR_RED: return FOREGROUND_RED; case COLOR_GREEN: return FOREGROUND_GREEN; case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN; default: return 0; } } static int GetBitOffset(WORD color_mask) { if (color_mask == 0) return 0; int bitOffset = 0; while ((color_mask & 1) == 0) { color_mask >>= 1; ++bitOffset; } return bitOffset; } static WORD GetNewColor(GTestColor color, WORD old_color_attrs) { // Let's reuse the BG static const WORD background_mask = BACKGROUND_BLUE | BACKGROUND_GREEN | BACKGROUND_RED | BACKGROUND_INTENSITY; static const WORD foreground_mask = FOREGROUND_BLUE | FOREGROUND_GREEN | FOREGROUND_RED | FOREGROUND_INTENSITY; const WORD existing_bg = old_color_attrs & background_mask; WORD new_color = GetColorAttribute(color) | existing_bg | FOREGROUND_INTENSITY; static const int bg_bitOffset = GetBitOffset(background_mask); static const int fg_bitOffset = GetBitOffset(foreground_mask); if (((new_color & background_mask) >> bg_bitOffset) == ((new_color & foreground_mask) >> fg_bitOffset)) { new_color ^= FOREGROUND_INTENSITY; // invert intensity } return new_color; } #else // Returns the ANSI color code for the given color. COLOR_DEFAULT is // an invalid input. static const char* GetAnsiColorCode(GTestColor color) { switch (color) { case COLOR_RED: return "1"; case COLOR_GREEN: return "2"; case COLOR_YELLOW: return "3"; default: return NULL; }; } #endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE // Returns true iff Google Test should use colors in the output. bool ShouldUseColor(bool stdout_is_tty) { const char* const gtest_color = GTEST_FLAG(color).c_str(); if (String::CaseInsensitiveCStringEquals(gtest_color, "auto")) { #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MINGW // On Windows the TERM variable is usually not set, but the // console there does support colors. return stdout_is_tty; #else // On non-Windows platforms, we rely on the TERM variable. const char* const term = posix::GetEnv("TERM"); const bool term_supports_color = String::CStringEquals(term, "xterm") || String::CStringEquals(term, "xterm-color") || String::CStringEquals(term, "xterm-256color") || String::CStringEquals(term, "screen") || String::CStringEquals(term, "screen-256color") || String::CStringEquals(term, "tmux") || String::CStringEquals(term, "tmux-256color") || String::CStringEquals(term, "rxvt-unicode") || String::CStringEquals(term, "rxvt-unicode-256color") || String::CStringEquals(term, "linux") || String::CStringEquals(term, "cygwin"); return stdout_is_tty && term_supports_color; #endif // GTEST_OS_WINDOWS } return String::CaseInsensitiveCStringEquals(gtest_color, "yes") || String::CaseInsensitiveCStringEquals(gtest_color, "true") || String::CaseInsensitiveCStringEquals(gtest_color, "t") || String::CStringEquals(gtest_color, "1"); // We take "yes", "true", "t", and "1" as meaning "yes". If the // value is neither one of these nor "auto", we treat it as "no" to // be conservative. } // Helpers for printing colored strings to stdout. Note that on Windows, we // cannot simply emit special characters and have the terminal change colors. // This routine must actually emit the characters rather than return a string // that would be colored when printed, as can be done on Linux. static void ColoredPrintf(GTestColor color, const char* fmt, ...) { va_list args; va_start(args, fmt); #if GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS || \ GTEST_OS_IOS || GTEST_OS_WINDOWS_PHONE || GTEST_OS_WINDOWS_RT const bool use_color = AlwaysFalse(); #else static const bool in_color_mode = ShouldUseColor(posix::IsATTY(posix::FileNo(stdout)) != 0); const bool use_color = in_color_mode && (color != COLOR_DEFAULT); #endif // GTEST_OS_WINDOWS_MOBILE || GTEST_OS_SYMBIAN || GTEST_OS_ZOS // The '!= 0' comparison is necessary to satisfy MSVC 7.1. if (!use_color) { vprintf(fmt, args); va_end(args); return; } #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE && \ !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT && !GTEST_OS_WINDOWS_MINGW const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE); // Gets the current text color. CONSOLE_SCREEN_BUFFER_INFO buffer_info; GetConsoleScreenBufferInfo(stdout_handle, &buffer_info); const WORD old_color_attrs = buffer_info.wAttributes; const WORD new_color = GetNewColor(color, old_color_attrs); // We need to flush the stream buffers into the console before each // SetConsoleTextAttribute call lest it affect the text that is already // printed but has not yet reached the console. fflush(stdout); SetConsoleTextAttribute(stdout_handle, new_color); vprintf(fmt, args); fflush(stdout); // Restores the text color. SetConsoleTextAttribute(stdout_handle, old_color_attrs); #else printf("\033[0;3%sm", GetAnsiColorCode(color)); vprintf(fmt, args); printf("\033[m"); // Resets the terminal to default. #endif // GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_MOBILE va_end(args); } // Text printed in Google Test's text output and --gtest_list_tests // output to label the type parameter and value parameter for a test. static const char kTypeParamLabel[] = "TypeParam"; static const char kValueParamLabel[] = "GetParam()"; static void PrintFullTestCommentIfPresent(const TestInfo& test_info) { const char* const type_param = test_info.type_param(); const char* const value_param = test_info.value_param(); if (type_param != NULL || value_param != NULL) { printf(", where "); if (type_param != NULL) { printf("%s = %s", kTypeParamLabel, type_param); if (value_param != NULL) printf(" and "); } if (value_param != NULL) { printf("%s = %s", kValueParamLabel, value_param); } } } // This class implements the TestEventListener interface. // // Class PrettyUnitTestResultPrinter is copyable. class PrettyUnitTestResultPrinter : public TestEventListener { public: PrettyUnitTestResultPrinter() {} static void PrintTestName(const char * test_case, const char * test) { printf("%s.%s", test_case, test); } // The following methods override what's in the TestEventListener class. virtual void OnTestProgramStart(const UnitTest& /*unit_test*/) {} virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); virtual void OnEnvironmentsSetUpEnd(const UnitTest& /*unit_test*/) {} virtual void OnTestCaseStart(const TestCase& test_case); virtual void OnTestStart(const TestInfo& test_info); virtual void OnTestPartResult(const TestPartResult& result); virtual void OnTestEnd(const TestInfo& test_info); virtual void OnTestCaseEnd(const TestCase& test_case); virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); virtual void OnEnvironmentsTearDownEnd(const UnitTest& /*unit_test*/) {} virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); virtual void OnTestProgramEnd(const UnitTest& /*unit_test*/) {} private: static void PrintFailedTests(const UnitTest& unit_test); }; // Fired before each iteration of tests starts. void PrettyUnitTestResultPrinter::OnTestIterationStart( const UnitTest& unit_test, int iteration) { if (GTEST_FLAG(repeat) != 1) printf("\nRepeating all tests (iteration %d) . . .\n\n", iteration + 1); const char* const filter = GTEST_FLAG(filter).c_str(); // Prints the filter if it's not *. This reminds the user that some // tests may be skipped. if (!String::CStringEquals(filter, kUniversalFilter)) { ColoredPrintf(COLOR_YELLOW, "Note: %s filter = %s\n", GTEST_NAME_, filter); } if (internal::ShouldShard(kTestTotalShards, kTestShardIndex, false)) { const Int32 shard_index = Int32FromEnvOrDie(kTestShardIndex, -1); ColoredPrintf(COLOR_YELLOW, "Note: This is test shard %d of %s.\n", static_cast(shard_index) + 1, internal::posix::GetEnv(kTestTotalShards)); } if (GTEST_FLAG(shuffle)) { ColoredPrintf(COLOR_YELLOW, "Note: Randomizing tests' orders with a seed of %d .\n", unit_test.random_seed()); } ColoredPrintf(COLOR_GREEN, "[==========] "); printf("Running %s from %s.\n", FormatTestCount(unit_test.test_to_run_count()).c_str(), FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); fflush(stdout); } void PrettyUnitTestResultPrinter::OnEnvironmentsSetUpStart( const UnitTest& /*unit_test*/) { ColoredPrintf(COLOR_GREEN, "[----------] "); printf("Global test environment set-up.\n"); fflush(stdout); } void PrettyUnitTestResultPrinter::OnTestCaseStart(const TestCase& test_case) { const std::string counts = FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); ColoredPrintf(COLOR_GREEN, "[----------] "); printf("%s from %s", counts.c_str(), test_case.name()); if (test_case.type_param() == NULL) { printf("\n"); } else { printf(", where %s = %s\n", kTypeParamLabel, test_case.type_param()); } fflush(stdout); } void PrettyUnitTestResultPrinter::OnTestStart(const TestInfo& test_info) { ColoredPrintf(COLOR_GREEN, "[ RUN ] "); PrintTestName(test_info.test_case_name(), test_info.name()); printf("\n"); fflush(stdout); } // Called after an assertion failure. void PrettyUnitTestResultPrinter::OnTestPartResult( const TestPartResult& result) { // If the test part succeeded, we don't need to do anything. if (result.type() == TestPartResult::kSuccess) return; // Print failure message from the assertion (e.g. expected this and got that). PrintTestPartResult(result); fflush(stdout); } void PrettyUnitTestResultPrinter::OnTestEnd(const TestInfo& test_info) { if (test_info.result()->Passed()) { ColoredPrintf(COLOR_GREEN, "[ OK ] "); } else { ColoredPrintf(COLOR_RED, "[ FAILED ] "); } PrintTestName(test_info.test_case_name(), test_info.name()); if (test_info.result()->Failed()) PrintFullTestCommentIfPresent(test_info); if (GTEST_FLAG(print_time)) { printf(" (%s ms)\n", internal::StreamableToString( test_info.result()->elapsed_time()).c_str()); } else { printf("\n"); } fflush(stdout); } void PrettyUnitTestResultPrinter::OnTestCaseEnd(const TestCase& test_case) { if (!GTEST_FLAG(print_time)) return; const std::string counts = FormatCountableNoun(test_case.test_to_run_count(), "test", "tests"); ColoredPrintf(COLOR_GREEN, "[----------] "); printf("%s from %s (%s ms total)\n\n", counts.c_str(), test_case.name(), internal::StreamableToString(test_case.elapsed_time()).c_str()); fflush(stdout); } void PrettyUnitTestResultPrinter::OnEnvironmentsTearDownStart( const UnitTest& /*unit_test*/) { ColoredPrintf(COLOR_GREEN, "[----------] "); printf("Global test environment tear-down\n"); fflush(stdout); } // Internal helper for printing the list of failed tests. void PrettyUnitTestResultPrinter::PrintFailedTests(const UnitTest& unit_test) { const int failed_test_count = unit_test.failed_test_count(); if (failed_test_count == 0) { return; } for (int i = 0; i < unit_test.total_test_case_count(); ++i) { const TestCase& test_case = *unit_test.GetTestCase(i); if (!test_case.should_run() || (test_case.failed_test_count() == 0)) { continue; } for (int j = 0; j < test_case.total_test_count(); ++j) { const TestInfo& test_info = *test_case.GetTestInfo(j); if (!test_info.should_run() || test_info.result()->Passed()) { continue; } ColoredPrintf(COLOR_RED, "[ FAILED ] "); printf("%s.%s", test_case.name(), test_info.name()); PrintFullTestCommentIfPresent(test_info); printf("\n"); } } } void PrettyUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, int /*iteration*/) { ColoredPrintf(COLOR_GREEN, "[==========] "); printf("%s from %s ran.", FormatTestCount(unit_test.test_to_run_count()).c_str(), FormatTestCaseCount(unit_test.test_case_to_run_count()).c_str()); if (GTEST_FLAG(print_time)) { printf(" (%s ms total)", internal::StreamableToString(unit_test.elapsed_time()).c_str()); } printf("\n"); ColoredPrintf(COLOR_GREEN, "[ PASSED ] "); printf("%s.\n", FormatTestCount(unit_test.successful_test_count()).c_str()); int num_failures = unit_test.failed_test_count(); if (!unit_test.Passed()) { const int failed_test_count = unit_test.failed_test_count(); ColoredPrintf(COLOR_RED, "[ FAILED ] "); printf("%s, listed below:\n", FormatTestCount(failed_test_count).c_str()); PrintFailedTests(unit_test); printf("\n%2d FAILED %s\n", num_failures, num_failures == 1 ? "TEST" : "TESTS"); } int num_disabled = unit_test.reportable_disabled_test_count(); if (num_disabled && !GTEST_FLAG(also_run_disabled_tests)) { if (!num_failures) { printf("\n"); // Add a spacer if no FAILURE banner is displayed. } ColoredPrintf(COLOR_YELLOW, " YOU HAVE %d DISABLED %s\n\n", num_disabled, num_disabled == 1 ? "TEST" : "TESTS"); } // Ensure that Google Test output is printed before, e.g., heapchecker output. fflush(stdout); } // End PrettyUnitTestResultPrinter // class TestEventRepeater // // This class forwards events to other event listeners. class TestEventRepeater : public TestEventListener { public: TestEventRepeater() : forwarding_enabled_(true) {} virtual ~TestEventRepeater(); void Append(TestEventListener *listener); TestEventListener* Release(TestEventListener* listener); // Controls whether events will be forwarded to listeners_. Set to false // in death test child processes. bool forwarding_enabled() const { return forwarding_enabled_; } void set_forwarding_enabled(bool enable) { forwarding_enabled_ = enable; } virtual void OnTestProgramStart(const UnitTest& unit_test); virtual void OnTestIterationStart(const UnitTest& unit_test, int iteration); virtual void OnEnvironmentsSetUpStart(const UnitTest& unit_test); virtual void OnEnvironmentsSetUpEnd(const UnitTest& unit_test); virtual void OnTestCaseStart(const TestCase& test_case); virtual void OnTestStart(const TestInfo& test_info); virtual void OnTestPartResult(const TestPartResult& result); virtual void OnTestEnd(const TestInfo& test_info); virtual void OnTestCaseEnd(const TestCase& test_case); virtual void OnEnvironmentsTearDownStart(const UnitTest& unit_test); virtual void OnEnvironmentsTearDownEnd(const UnitTest& unit_test); virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); virtual void OnTestProgramEnd(const UnitTest& unit_test); private: // Controls whether events will be forwarded to listeners_. Set to false // in death test child processes. bool forwarding_enabled_; // The list of listeners that receive events. std::vector listeners_; GTEST_DISALLOW_COPY_AND_ASSIGN_(TestEventRepeater); }; TestEventRepeater::~TestEventRepeater() { ForEach(listeners_, Delete); } void TestEventRepeater::Append(TestEventListener *listener) { listeners_.push_back(listener); } // FIXME: Factor the search functionality into Vector::Find. TestEventListener* TestEventRepeater::Release(TestEventListener *listener) { for (size_t i = 0; i < listeners_.size(); ++i) { if (listeners_[i] == listener) { listeners_.erase(listeners_.begin() + i); return listener; } } return NULL; } // Since most methods are very similar, use macros to reduce boilerplate. // This defines a member that forwards the call to all listeners. #define GTEST_REPEATER_METHOD_(Name, Type) \ void TestEventRepeater::Name(const Type& parameter) { \ if (forwarding_enabled_) { \ for (size_t i = 0; i < listeners_.size(); i++) { \ listeners_[i]->Name(parameter); \ } \ } \ } // This defines a member that forwards the call to all listeners in reverse // order. #define GTEST_REVERSE_REPEATER_METHOD_(Name, Type) \ void TestEventRepeater::Name(const Type& parameter) { \ if (forwarding_enabled_) { \ for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { \ listeners_[i]->Name(parameter); \ } \ } \ } GTEST_REPEATER_METHOD_(OnTestProgramStart, UnitTest) GTEST_REPEATER_METHOD_(OnEnvironmentsSetUpStart, UnitTest) GTEST_REPEATER_METHOD_(OnTestCaseStart, TestCase) GTEST_REPEATER_METHOD_(OnTestStart, TestInfo) GTEST_REPEATER_METHOD_(OnTestPartResult, TestPartResult) GTEST_REPEATER_METHOD_(OnEnvironmentsTearDownStart, UnitTest) GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsSetUpEnd, UnitTest) GTEST_REVERSE_REPEATER_METHOD_(OnEnvironmentsTearDownEnd, UnitTest) GTEST_REVERSE_REPEATER_METHOD_(OnTestEnd, TestInfo) GTEST_REVERSE_REPEATER_METHOD_(OnTestCaseEnd, TestCase) GTEST_REVERSE_REPEATER_METHOD_(OnTestProgramEnd, UnitTest) #undef GTEST_REPEATER_METHOD_ #undef GTEST_REVERSE_REPEATER_METHOD_ void TestEventRepeater::OnTestIterationStart(const UnitTest& unit_test, int iteration) { if (forwarding_enabled_) { for (size_t i = 0; i < listeners_.size(); i++) { listeners_[i]->OnTestIterationStart(unit_test, iteration); } } } void TestEventRepeater::OnTestIterationEnd(const UnitTest& unit_test, int iteration) { if (forwarding_enabled_) { for (int i = static_cast(listeners_.size()) - 1; i >= 0; i--) { listeners_[i]->OnTestIterationEnd(unit_test, iteration); } } } // End TestEventRepeater // This class generates an XML output file. class XmlUnitTestResultPrinter : public EmptyTestEventListener { public: explicit XmlUnitTestResultPrinter(const char* output_file); virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); void ListTestsMatchingFilter(const std::vector& test_cases); // Prints an XML summary of all unit tests. static void PrintXmlTestsList(std::ostream* stream, const std::vector& test_cases); private: // Is c a whitespace character that is normalized to a space character // when it appears in an XML attribute value? static bool IsNormalizableWhitespace(char c) { return c == 0x9 || c == 0xA || c == 0xD; } // May c appear in a well-formed XML document? static bool IsValidXmlCharacter(char c) { return IsNormalizableWhitespace(c) || c >= 0x20; } // Returns an XML-escaped copy of the input string str. If // is_attribute is true, the text is meant to appear as an attribute // value, and normalizable whitespace is preserved by replacing it // with character references. static std::string EscapeXml(const std::string& str, bool is_attribute); // Returns the given string with all characters invalid in XML removed. static std::string RemoveInvalidXmlCharacters(const std::string& str); // Convenience wrapper around EscapeXml when str is an attribute value. static std::string EscapeXmlAttribute(const std::string& str) { return EscapeXml(str, true); } // Convenience wrapper around EscapeXml when str is not an attribute value. static std::string EscapeXmlText(const char* str) { return EscapeXml(str, false); } // Verifies that the given attribute belongs to the given element and // streams the attribute as XML. static void OutputXmlAttribute(std::ostream* stream, const std::string& element_name, const std::string& name, const std::string& value); // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. static void OutputXmlCDataSection(::std::ostream* stream, const char* data); // Streams an XML representation of a TestInfo object. static void OutputXmlTestInfo(::std::ostream* stream, const char* test_case_name, const TestInfo& test_info); // Prints an XML representation of a TestCase object static void PrintXmlTestCase(::std::ostream* stream, const TestCase& test_case); // Prints an XML summary of unit_test to output stream out. static void PrintXmlUnitTest(::std::ostream* stream, const UnitTest& unit_test); // Produces a string representing the test properties in a result as space // delimited XML attributes based on the property key="value" pairs. // When the std::string is not empty, it includes a space at the beginning, // to delimit this attribute from prior attributes. static std::string TestPropertiesAsXmlAttributes(const TestResult& result); // Streams an XML representation of the test properties of a TestResult // object. static void OutputXmlTestProperties(std::ostream* stream, const TestResult& result); // The output file. const std::string output_file_; GTEST_DISALLOW_COPY_AND_ASSIGN_(XmlUnitTestResultPrinter); }; // Creates a new XmlUnitTestResultPrinter. XmlUnitTestResultPrinter::XmlUnitTestResultPrinter(const char* output_file) : output_file_(output_file) { if (output_file_.empty()) { GTEST_LOG_(FATAL) << "XML output file may not be null"; } } // Called after the unit test ends. void XmlUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, int /*iteration*/) { FILE* xmlout = OpenFileForWriting(output_file_); std::stringstream stream; PrintXmlUnitTest(&stream, unit_test); fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); fclose(xmlout); } void XmlUnitTestResultPrinter::ListTestsMatchingFilter( const std::vector& test_cases) { FILE* xmlout = OpenFileForWriting(output_file_); std::stringstream stream; PrintXmlTestsList(&stream, test_cases); fprintf(xmlout, "%s", StringStreamToString(&stream).c_str()); fclose(xmlout); } // Returns an XML-escaped copy of the input string str. If is_attribute // is true, the text is meant to appear as an attribute value, and // normalizable whitespace is preserved by replacing it with character // references. // // Invalid XML characters in str, if any, are stripped from the output. // It is expected that most, if not all, of the text processed by this // module will consist of ordinary English text. // If this module is ever modified to produce version 1.1 XML output, // most invalid characters can be retained using character references. // FIXME: It might be nice to have a minimally invasive, human-readable // escaping scheme for invalid characters, rather than dropping them. std::string XmlUnitTestResultPrinter::EscapeXml( const std::string& str, bool is_attribute) { Message m; for (size_t i = 0; i < str.size(); ++i) { const char ch = str[i]; switch (ch) { case '<': m << "<"; break; case '>': m << ">"; break; case '&': m << "&"; break; case '\'': if (is_attribute) m << "'"; else m << '\''; break; case '"': if (is_attribute) m << """; else m << '"'; break; default: if (IsValidXmlCharacter(ch)) { if (is_attribute && IsNormalizableWhitespace(ch)) m << "&#x" << String::FormatByte(static_cast(ch)) << ";"; else m << ch; } break; } } return m.GetString(); } // Returns the given string with all characters invalid in XML removed. // Currently invalid characters are dropped from the string. An // alternative is to replace them with certain characters such as . or ?. std::string XmlUnitTestResultPrinter::RemoveInvalidXmlCharacters( const std::string& str) { std::string output; output.reserve(str.size()); for (std::string::const_iterator it = str.begin(); it != str.end(); ++it) if (IsValidXmlCharacter(*it)) output.push_back(*it); return output; } // The following routines generate an XML representation of a UnitTest // object. // GOOGLETEST_CM0009 DO NOT DELETE // // This is how Google Test concepts map to the DTD: // // <-- corresponds to a UnitTest object // <-- corresponds to a TestCase object // <-- corresponds to a TestInfo object // ... // ... // ... // <-- individual assertion failures // // // // Formats the given time in milliseconds as seconds. std::string FormatTimeInMillisAsSeconds(TimeInMillis ms) { ::std::stringstream ss; ss << (static_cast(ms) * 1e-3); return ss.str(); } static bool PortableLocaltime(time_t seconds, struct tm* out) { #if defined(_MSC_VER) return localtime_s(out, &seconds) == 0; #elif defined(__MINGW32__) || defined(__MINGW64__) // MINGW provides neither localtime_r nor localtime_s, but uses // Windows' localtime(), which has a thread-local tm buffer. struct tm* tm_ptr = localtime(&seconds); // NOLINT if (tm_ptr == NULL) return false; *out = *tm_ptr; return true; #else return localtime_r(&seconds, out) != NULL; #endif } // Converts the given epoch time in milliseconds to a date string in the ISO // 8601 format, without the timezone information. std::string FormatEpochTimeInMillisAsIso8601(TimeInMillis ms) { struct tm time_struct; if (!PortableLocaltime(static_cast(ms / 1000), &time_struct)) return ""; // YYYY-MM-DDThh:mm:ss return StreamableToString(time_struct.tm_year + 1900) + "-" + String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" + String::FormatIntWidth2(time_struct.tm_mday) + "T" + String::FormatIntWidth2(time_struct.tm_hour) + ":" + String::FormatIntWidth2(time_struct.tm_min) + ":" + String::FormatIntWidth2(time_struct.tm_sec); } // Streams an XML CDATA section, escaping invalid CDATA sequences as needed. void XmlUnitTestResultPrinter::OutputXmlCDataSection(::std::ostream* stream, const char* data) { const char* segment = data; *stream << ""); if (next_segment != NULL) { stream->write( segment, static_cast(next_segment - segment)); *stream << "]]>]]>"); } else { *stream << segment; break; } } *stream << "]]>"; } void XmlUnitTestResultPrinter::OutputXmlAttribute( std::ostream* stream, const std::string& element_name, const std::string& name, const std::string& value) { const std::vector& allowed_names = GetReservedAttributesForElement(element_name); GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != allowed_names.end()) << "Attribute " << name << " is not allowed for element <" << element_name << ">."; *stream << " " << name << "=\"" << EscapeXmlAttribute(value) << "\""; } // Prints an XML representation of a TestInfo object. // FIXME: There is also value in printing properties with the plain printer. void XmlUnitTestResultPrinter::OutputXmlTestInfo(::std::ostream* stream, const char* test_case_name, const TestInfo& test_info) { const TestResult& result = *test_info.result(); const std::string kTestcase = "testcase"; if (test_info.is_in_another_shard()) { return; } *stream << " \n"; return; } OutputXmlAttribute(stream, kTestcase, "status", test_info.should_run() ? "run" : "notrun"); OutputXmlAttribute(stream, kTestcase, "time", FormatTimeInMillisAsSeconds(result.elapsed_time())); OutputXmlAttribute(stream, kTestcase, "classname", test_case_name); int failures = 0; for (int i = 0; i < result.total_part_count(); ++i) { const TestPartResult& part = result.GetTestPartResult(i); if (part.failed()) { if (++failures == 1) { *stream << ">\n"; } const std::string location = internal::FormatCompilerIndependentFileLocation(part.file_name(), part.line_number()); const std::string summary = location + "\n" + part.summary(); *stream << " "; const std::string detail = location + "\n" + part.message(); OutputXmlCDataSection(stream, RemoveInvalidXmlCharacters(detail).c_str()); *stream << "\n"; } } if (failures == 0 && result.test_property_count() == 0) { *stream << " />\n"; } else { if (failures == 0) { *stream << ">\n"; } OutputXmlTestProperties(stream, result); *stream << " \n"; } } // Prints an XML representation of a TestCase object void XmlUnitTestResultPrinter::PrintXmlTestCase(std::ostream* stream, const TestCase& test_case) { const std::string kTestsuite = "testsuite"; *stream << " <" << kTestsuite; OutputXmlAttribute(stream, kTestsuite, "name", test_case.name()); OutputXmlAttribute(stream, kTestsuite, "tests", StreamableToString(test_case.reportable_test_count())); if (!GTEST_FLAG(list_tests)) { OutputXmlAttribute(stream, kTestsuite, "failures", StreamableToString(test_case.failed_test_count())); OutputXmlAttribute( stream, kTestsuite, "disabled", StreamableToString(test_case.reportable_disabled_test_count())); OutputXmlAttribute(stream, kTestsuite, "errors", "0"); OutputXmlAttribute(stream, kTestsuite, "time", FormatTimeInMillisAsSeconds(test_case.elapsed_time())); *stream << TestPropertiesAsXmlAttributes(test_case.ad_hoc_test_result()); } *stream << ">\n"; for (int i = 0; i < test_case.total_test_count(); ++i) { if (test_case.GetTestInfo(i)->is_reportable()) OutputXmlTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); } *stream << " \n"; } // Prints an XML summary of unit_test to output stream out. void XmlUnitTestResultPrinter::PrintXmlUnitTest(std::ostream* stream, const UnitTest& unit_test) { const std::string kTestsuites = "testsuites"; *stream << "\n"; *stream << "<" << kTestsuites; OutputXmlAttribute(stream, kTestsuites, "tests", StreamableToString(unit_test.reportable_test_count())); OutputXmlAttribute(stream, kTestsuites, "failures", StreamableToString(unit_test.failed_test_count())); OutputXmlAttribute( stream, kTestsuites, "disabled", StreamableToString(unit_test.reportable_disabled_test_count())); OutputXmlAttribute(stream, kTestsuites, "errors", "0"); OutputXmlAttribute( stream, kTestsuites, "timestamp", FormatEpochTimeInMillisAsIso8601(unit_test.start_timestamp())); OutputXmlAttribute(stream, kTestsuites, "time", FormatTimeInMillisAsSeconds(unit_test.elapsed_time())); if (GTEST_FLAG(shuffle)) { OutputXmlAttribute(stream, kTestsuites, "random_seed", StreamableToString(unit_test.random_seed())); } *stream << TestPropertiesAsXmlAttributes(unit_test.ad_hoc_test_result()); OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); *stream << ">\n"; for (int i = 0; i < unit_test.total_test_case_count(); ++i) { if (unit_test.GetTestCase(i)->reportable_test_count() > 0) PrintXmlTestCase(stream, *unit_test.GetTestCase(i)); } *stream << "\n"; } void XmlUnitTestResultPrinter::PrintXmlTestsList( std::ostream* stream, const std::vector& test_cases) { const std::string kTestsuites = "testsuites"; *stream << "\n"; *stream << "<" << kTestsuites; int total_tests = 0; for (size_t i = 0; i < test_cases.size(); ++i) { total_tests += test_cases[i]->total_test_count(); } OutputXmlAttribute(stream, kTestsuites, "tests", StreamableToString(total_tests)); OutputXmlAttribute(stream, kTestsuites, "name", "AllTests"); *stream << ">\n"; for (size_t i = 0; i < test_cases.size(); ++i) { PrintXmlTestCase(stream, *test_cases[i]); } *stream << "\n"; } // Produces a string representing the test properties in a result as space // delimited XML attributes based on the property key="value" pairs. std::string XmlUnitTestResultPrinter::TestPropertiesAsXmlAttributes( const TestResult& result) { Message attributes; for (int i = 0; i < result.test_property_count(); ++i) { const TestProperty& property = result.GetTestProperty(i); attributes << " " << property.key() << "=" << "\"" << EscapeXmlAttribute(property.value()) << "\""; } return attributes.GetString(); } void XmlUnitTestResultPrinter::OutputXmlTestProperties( std::ostream* stream, const TestResult& result) { const std::string kProperties = "properties"; const std::string kProperty = "property"; if (result.test_property_count() <= 0) { return; } *stream << "<" << kProperties << ">\n"; for (int i = 0; i < result.test_property_count(); ++i) { const TestProperty& property = result.GetTestProperty(i); *stream << "<" << kProperty; *stream << " name=\"" << EscapeXmlAttribute(property.key()) << "\""; *stream << " value=\"" << EscapeXmlAttribute(property.value()) << "\""; *stream << "/>\n"; } *stream << "\n"; } // End XmlUnitTestResultPrinter // This class generates an JSON output file. class JsonUnitTestResultPrinter : public EmptyTestEventListener { public: explicit JsonUnitTestResultPrinter(const char* output_file); virtual void OnTestIterationEnd(const UnitTest& unit_test, int iteration); // Prints an JSON summary of all unit tests. static void PrintJsonTestList(::std::ostream* stream, const std::vector& test_cases); private: // Returns an JSON-escaped copy of the input string str. static std::string EscapeJson(const std::string& str); //// Verifies that the given attribute belongs to the given element and //// streams the attribute as JSON. static void OutputJsonKey(std::ostream* stream, const std::string& element_name, const std::string& name, const std::string& value, const std::string& indent, bool comma = true); static void OutputJsonKey(std::ostream* stream, const std::string& element_name, const std::string& name, int value, const std::string& indent, bool comma = true); // Streams a JSON representation of a TestInfo object. static void OutputJsonTestInfo(::std::ostream* stream, const char* test_case_name, const TestInfo& test_info); // Prints a JSON representation of a TestCase object static void PrintJsonTestCase(::std::ostream* stream, const TestCase& test_case); // Prints a JSON summary of unit_test to output stream out. static void PrintJsonUnitTest(::std::ostream* stream, const UnitTest& unit_test); // Produces a string representing the test properties in a result as // a JSON dictionary. static std::string TestPropertiesAsJson(const TestResult& result, const std::string& indent); // The output file. const std::string output_file_; GTEST_DISALLOW_COPY_AND_ASSIGN_(JsonUnitTestResultPrinter); }; // Creates a new JsonUnitTestResultPrinter. JsonUnitTestResultPrinter::JsonUnitTestResultPrinter(const char* output_file) : output_file_(output_file) { if (output_file_.empty()) { GTEST_LOG_(FATAL) << "JSON output file may not be null"; } } void JsonUnitTestResultPrinter::OnTestIterationEnd(const UnitTest& unit_test, int /*iteration*/) { FILE* jsonout = OpenFileForWriting(output_file_); std::stringstream stream; PrintJsonUnitTest(&stream, unit_test); fprintf(jsonout, "%s", StringStreamToString(&stream).c_str()); fclose(jsonout); } // Returns an JSON-escaped copy of the input string str. std::string JsonUnitTestResultPrinter::EscapeJson(const std::string& str) { Message m; for (size_t i = 0; i < str.size(); ++i) { const char ch = str[i]; switch (ch) { case '\\': case '"': case '/': m << '\\' << ch; break; case '\b': m << "\\b"; break; case '\t': m << "\\t"; break; case '\n': m << "\\n"; break; case '\f': m << "\\f"; break; case '\r': m << "\\r"; break; default: if (ch < ' ') { m << "\\u00" << String::FormatByte(static_cast(ch)); } else { m << ch; } break; } } return m.GetString(); } // The following routines generate an JSON representation of a UnitTest // object. // Formats the given time in milliseconds as seconds. static std::string FormatTimeInMillisAsDuration(TimeInMillis ms) { ::std::stringstream ss; ss << (static_cast(ms) * 1e-3) << "s"; return ss.str(); } // Converts the given epoch time in milliseconds to a date string in the // RFC3339 format, without the timezone information. static std::string FormatEpochTimeInMillisAsRFC3339(TimeInMillis ms) { struct tm time_struct; if (!PortableLocaltime(static_cast(ms / 1000), &time_struct)) return ""; // YYYY-MM-DDThh:mm:ss return StreamableToString(time_struct.tm_year + 1900) + "-" + String::FormatIntWidth2(time_struct.tm_mon + 1) + "-" + String::FormatIntWidth2(time_struct.tm_mday) + "T" + String::FormatIntWidth2(time_struct.tm_hour) + ":" + String::FormatIntWidth2(time_struct.tm_min) + ":" + String::FormatIntWidth2(time_struct.tm_sec) + "Z"; } static inline std::string Indent(int width) { return std::string(width, ' '); } void JsonUnitTestResultPrinter::OutputJsonKey( std::ostream* stream, const std::string& element_name, const std::string& name, const std::string& value, const std::string& indent, bool comma) { const std::vector& allowed_names = GetReservedAttributesForElement(element_name); GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != allowed_names.end()) << "Key \"" << name << "\" is not allowed for value \"" << element_name << "\"."; *stream << indent << "\"" << name << "\": \"" << EscapeJson(value) << "\""; if (comma) *stream << ",\n"; } void JsonUnitTestResultPrinter::OutputJsonKey( std::ostream* stream, const std::string& element_name, const std::string& name, int value, const std::string& indent, bool comma) { const std::vector& allowed_names = GetReservedAttributesForElement(element_name); GTEST_CHECK_(std::find(allowed_names.begin(), allowed_names.end(), name) != allowed_names.end()) << "Key \"" << name << "\" is not allowed for value \"" << element_name << "\"."; *stream << indent << "\"" << name << "\": " << StreamableToString(value); if (comma) *stream << ",\n"; } // Prints a JSON representation of a TestInfo object. void JsonUnitTestResultPrinter::OutputJsonTestInfo(::std::ostream* stream, const char* test_case_name, const TestInfo& test_info) { const TestResult& result = *test_info.result(); const std::string kTestcase = "testcase"; const std::string kIndent = Indent(10); *stream << Indent(8) << "{\n"; OutputJsonKey(stream, kTestcase, "name", test_info.name(), kIndent); if (test_info.value_param() != NULL) { OutputJsonKey(stream, kTestcase, "value_param", test_info.value_param(), kIndent); } if (test_info.type_param() != NULL) { OutputJsonKey(stream, kTestcase, "type_param", test_info.type_param(), kIndent); } if (GTEST_FLAG(list_tests)) { OutputJsonKey(stream, kTestcase, "file", test_info.file(), kIndent); OutputJsonKey(stream, kTestcase, "line", test_info.line(), kIndent, false); *stream << "\n" << Indent(8) << "}"; return; } OutputJsonKey(stream, kTestcase, "status", test_info.should_run() ? "RUN" : "NOTRUN", kIndent); OutputJsonKey(stream, kTestcase, "time", FormatTimeInMillisAsDuration(result.elapsed_time()), kIndent); OutputJsonKey(stream, kTestcase, "classname", test_case_name, kIndent, false); *stream << TestPropertiesAsJson(result, kIndent); int failures = 0; for (int i = 0; i < result.total_part_count(); ++i) { const TestPartResult& part = result.GetTestPartResult(i); if (part.failed()) { *stream << ",\n"; if (++failures == 1) { *stream << kIndent << "\"" << "failures" << "\": [\n"; } const std::string location = internal::FormatCompilerIndependentFileLocation(part.file_name(), part.line_number()); const std::string message = EscapeJson(location + "\n" + part.message()); *stream << kIndent << " {\n" << kIndent << " \"failure\": \"" << message << "\",\n" << kIndent << " \"type\": \"\"\n" << kIndent << " }"; } } if (failures > 0) *stream << "\n" << kIndent << "]"; *stream << "\n" << Indent(8) << "}"; } // Prints an JSON representation of a TestCase object void JsonUnitTestResultPrinter::PrintJsonTestCase(std::ostream* stream, const TestCase& test_case) { const std::string kTestsuite = "testsuite"; const std::string kIndent = Indent(6); *stream << Indent(4) << "{\n"; OutputJsonKey(stream, kTestsuite, "name", test_case.name(), kIndent); OutputJsonKey(stream, kTestsuite, "tests", test_case.reportable_test_count(), kIndent); if (!GTEST_FLAG(list_tests)) { OutputJsonKey(stream, kTestsuite, "failures", test_case.failed_test_count(), kIndent); OutputJsonKey(stream, kTestsuite, "disabled", test_case.reportable_disabled_test_count(), kIndent); OutputJsonKey(stream, kTestsuite, "errors", 0, kIndent); OutputJsonKey(stream, kTestsuite, "time", FormatTimeInMillisAsDuration(test_case.elapsed_time()), kIndent, false); *stream << TestPropertiesAsJson(test_case.ad_hoc_test_result(), kIndent) << ",\n"; } *stream << kIndent << "\"" << kTestsuite << "\": [\n"; bool comma = false; for (int i = 0; i < test_case.total_test_count(); ++i) { if (test_case.GetTestInfo(i)->is_reportable()) { if (comma) { *stream << ",\n"; } else { comma = true; } OutputJsonTestInfo(stream, test_case.name(), *test_case.GetTestInfo(i)); } } *stream << "\n" << kIndent << "]\n" << Indent(4) << "}"; } // Prints a JSON summary of unit_test to output stream out. void JsonUnitTestResultPrinter::PrintJsonUnitTest(std::ostream* stream, const UnitTest& unit_test) { const std::string kTestsuites = "testsuites"; const std::string kIndent = Indent(2); *stream << "{\n"; OutputJsonKey(stream, kTestsuites, "tests", unit_test.reportable_test_count(), kIndent); OutputJsonKey(stream, kTestsuites, "failures", unit_test.failed_test_count(), kIndent); OutputJsonKey(stream, kTestsuites, "disabled", unit_test.reportable_disabled_test_count(), kIndent); OutputJsonKey(stream, kTestsuites, "errors", 0, kIndent); if (GTEST_FLAG(shuffle)) { OutputJsonKey(stream, kTestsuites, "random_seed", unit_test.random_seed(), kIndent); } OutputJsonKey(stream, kTestsuites, "timestamp", FormatEpochTimeInMillisAsRFC3339(unit_test.start_timestamp()), kIndent); OutputJsonKey(stream, kTestsuites, "time", FormatTimeInMillisAsDuration(unit_test.elapsed_time()), kIndent, false); *stream << TestPropertiesAsJson(unit_test.ad_hoc_test_result(), kIndent) << ",\n"; OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent); *stream << kIndent << "\"" << kTestsuites << "\": [\n"; bool comma = false; for (int i = 0; i < unit_test.total_test_case_count(); ++i) { if (unit_test.GetTestCase(i)->reportable_test_count() > 0) { if (comma) { *stream << ",\n"; } else { comma = true; } PrintJsonTestCase(stream, *unit_test.GetTestCase(i)); } } *stream << "\n" << kIndent << "]\n" << "}\n"; } void JsonUnitTestResultPrinter::PrintJsonTestList( std::ostream* stream, const std::vector& test_cases) { const std::string kTestsuites = "testsuites"; const std::string kIndent = Indent(2); *stream << "{\n"; int total_tests = 0; for (size_t i = 0; i < test_cases.size(); ++i) { total_tests += test_cases[i]->total_test_count(); } OutputJsonKey(stream, kTestsuites, "tests", total_tests, kIndent); OutputJsonKey(stream, kTestsuites, "name", "AllTests", kIndent); *stream << kIndent << "\"" << kTestsuites << "\": [\n"; for (size_t i = 0; i < test_cases.size(); ++i) { if (i != 0) { *stream << ",\n"; } PrintJsonTestCase(stream, *test_cases[i]); } *stream << "\n" << kIndent << "]\n" << "}\n"; } // Produces a string representing the test properties in a result as // a JSON dictionary. std::string JsonUnitTestResultPrinter::TestPropertiesAsJson( const TestResult& result, const std::string& indent) { Message attributes; for (int i = 0; i < result.test_property_count(); ++i) { const TestProperty& property = result.GetTestProperty(i); attributes << ",\n" << indent << "\"" << property.key() << "\": " << "\"" << EscapeJson(property.value()) << "\""; } return attributes.GetString(); } // End JsonUnitTestResultPrinter #if GTEST_CAN_STREAM_RESULTS_ // Checks if str contains '=', '&', '%' or '\n' characters. If yes, // replaces them by "%xx" where xx is their hexadecimal value. For // example, replaces "=" with "%3D". This algorithm is O(strlen(str)) // in both time and space -- important as the input str may contain an // arbitrarily long test failure message and stack trace. std::string StreamingListener::UrlEncode(const char* str) { std::string result; result.reserve(strlen(str) + 1); for (char ch = *str; ch != '\0'; ch = *++str) { switch (ch) { case '%': case '=': case '&': case '\n': result.append("%" + String::FormatByte(static_cast(ch))); break; default: result.push_back(ch); break; } } return result; } void StreamingListener::SocketWriter::MakeConnection() { GTEST_CHECK_(sockfd_ == -1) << "MakeConnection() can't be called when there is already a connection."; addrinfo hints; memset(&hints, 0, sizeof(hints)); hints.ai_family = AF_UNSPEC; // To allow both IPv4 and IPv6 addresses. hints.ai_socktype = SOCK_STREAM; addrinfo* servinfo = NULL; // Use the getaddrinfo() to get a linked list of IP addresses for // the given host name. const int error_num = getaddrinfo( host_name_.c_str(), port_num_.c_str(), &hints, &servinfo); if (error_num != 0) { GTEST_LOG_(WARNING) << "stream_result_to: getaddrinfo() failed: " << gai_strerror(error_num); } // Loop through all the results and connect to the first we can. for (addrinfo* cur_addr = servinfo; sockfd_ == -1 && cur_addr != NULL; cur_addr = cur_addr->ai_next) { sockfd_ = socket( cur_addr->ai_family, cur_addr->ai_socktype, cur_addr->ai_protocol); if (sockfd_ != -1) { // Connect the client socket to the server socket. if (connect(sockfd_, cur_addr->ai_addr, cur_addr->ai_addrlen) == -1) { close(sockfd_); sockfd_ = -1; } } } freeaddrinfo(servinfo); // all done with this structure if (sockfd_ == -1) { GTEST_LOG_(WARNING) << "stream_result_to: failed to connect to " << host_name_ << ":" << port_num_; } } // End of class Streaming Listener #endif // GTEST_CAN_STREAM_RESULTS__ // class OsStackTraceGetter const char* const OsStackTraceGetterInterface::kElidedFramesMarker = "... " GTEST_NAME_ " internal frames ..."; std::string OsStackTraceGetter::CurrentStackTrace(int max_depth, int skip_count) GTEST_LOCK_EXCLUDED_(mutex_) { #if GTEST_HAS_ABSL std::string result; if (max_depth <= 0) { return result; } max_depth = std::min(max_depth, kMaxStackTraceDepth); std::vector raw_stack(max_depth); // Skips the frames requested by the caller, plus this function. const int raw_stack_size = absl::GetStackTrace(&raw_stack[0], max_depth, skip_count + 1); void* caller_frame = nullptr; { MutexLock lock(&mutex_); caller_frame = caller_frame_; } for (int i = 0; i < raw_stack_size; ++i) { if (raw_stack[i] == caller_frame && !GTEST_FLAG(show_internal_stack_frames)) { // Add a marker to the trace and stop adding frames. absl::StrAppend(&result, kElidedFramesMarker, "\n"); break; } char tmp[1024]; const char* symbol = "(unknown)"; if (absl::Symbolize(raw_stack[i], tmp, sizeof(tmp))) { symbol = tmp; } char line[1024]; snprintf(line, sizeof(line), " %p: %s\n", raw_stack[i], symbol); result += line; } return result; #else // !GTEST_HAS_ABSL static_cast(max_depth); static_cast(skip_count); return ""; #endif // GTEST_HAS_ABSL } void OsStackTraceGetter::UponLeavingGTest() GTEST_LOCK_EXCLUDED_(mutex_) { #if GTEST_HAS_ABSL void* caller_frame = nullptr; if (absl::GetStackTrace(&caller_frame, 1, 3) <= 0) { caller_frame = nullptr; } MutexLock lock(&mutex_); caller_frame_ = caller_frame; #endif // GTEST_HAS_ABSL } // A helper class that creates the premature-exit file in its // constructor and deletes the file in its destructor. class ScopedPrematureExitFile { public: explicit ScopedPrematureExitFile(const char* premature_exit_filepath) : premature_exit_filepath_(premature_exit_filepath ? premature_exit_filepath : "") { // If a path to the premature-exit file is specified... if (!premature_exit_filepath_.empty()) { // create the file with a single "0" character in it. I/O // errors are ignored as there's nothing better we can do and we // don't want to fail the test because of this. FILE* pfile = posix::FOpen(premature_exit_filepath, "w"); fwrite("0", 1, 1, pfile); fclose(pfile); } } ~ScopedPrematureExitFile() { if (!premature_exit_filepath_.empty()) { int retval = remove(premature_exit_filepath_.c_str()); if (retval) { GTEST_LOG_(ERROR) << "Failed to remove premature exit filepath \"" << premature_exit_filepath_ << "\" with error " << retval; } } } private: const std::string premature_exit_filepath_; GTEST_DISALLOW_COPY_AND_ASSIGN_(ScopedPrematureExitFile); }; } // namespace internal // class TestEventListeners TestEventListeners::TestEventListeners() : repeater_(new internal::TestEventRepeater()), default_result_printer_(NULL), default_xml_generator_(NULL) { } TestEventListeners::~TestEventListeners() { delete repeater_; } // Returns the standard listener responsible for the default console // output. Can be removed from the listeners list to shut down default // console output. Note that removing this object from the listener list // with Release transfers its ownership to the user. void TestEventListeners::Append(TestEventListener* listener) { repeater_->Append(listener); } // Removes the given event listener from the list and returns it. It then // becomes the caller's responsibility to delete the listener. Returns // NULL if the listener is not found in the list. TestEventListener* TestEventListeners::Release(TestEventListener* listener) { if (listener == default_result_printer_) default_result_printer_ = NULL; else if (listener == default_xml_generator_) default_xml_generator_ = NULL; return repeater_->Release(listener); } // Returns repeater that broadcasts the TestEventListener events to all // subscribers. TestEventListener* TestEventListeners::repeater() { return repeater_; } // Sets the default_result_printer attribute to the provided listener. // The listener is also added to the listener list and previous // default_result_printer is removed from it and deleted. The listener can // also be NULL in which case it will not be added to the list. Does // nothing if the previous and the current listener objects are the same. void TestEventListeners::SetDefaultResultPrinter(TestEventListener* listener) { if (default_result_printer_ != listener) { // It is an error to pass this method a listener that is already in the // list. delete Release(default_result_printer_); default_result_printer_ = listener; if (listener != NULL) Append(listener); } } // Sets the default_xml_generator attribute to the provided listener. The // listener is also added to the listener list and previous // default_xml_generator is removed from it and deleted. The listener can // also be NULL in which case it will not be added to the list. Does // nothing if the previous and the current listener objects are the same. void TestEventListeners::SetDefaultXmlGenerator(TestEventListener* listener) { if (default_xml_generator_ != listener) { // It is an error to pass this method a listener that is already in the // list. delete Release(default_xml_generator_); default_xml_generator_ = listener; if (listener != NULL) Append(listener); } } // Controls whether events will be forwarded by the repeater to the // listeners in the list. bool TestEventListeners::EventForwardingEnabled() const { return repeater_->forwarding_enabled(); } void TestEventListeners::SuppressEventForwarding() { repeater_->set_forwarding_enabled(false); } // class UnitTest // Gets the singleton UnitTest object. The first time this method is // called, a UnitTest object is constructed and returned. Consecutive // calls will return the same object. // // We don't protect this under mutex_ as a user is not supposed to // call this before main() starts, from which point on the return // value will never change. UnitTest* UnitTest::GetInstance() { // When compiled with MSVC 7.1 in optimized mode, destroying the // UnitTest object upon exiting the program messes up the exit code, // causing successful tests to appear failed. We have to use a // different implementation in this case to bypass the compiler bug. // This implementation makes the compiler happy, at the cost of // leaking the UnitTest object. // CodeGear C++Builder insists on a public destructor for the // default implementation. Use this implementation to keep good OO // design with private destructor. #if (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) static UnitTest* const instance = new UnitTest; return instance; #else static UnitTest instance; return &instance; #endif // (_MSC_VER == 1310 && !defined(_DEBUG)) || defined(__BORLANDC__) } // Gets the number of successful test cases. int UnitTest::successful_test_case_count() const { return impl()->successful_test_case_count(); } // Gets the number of failed test cases. int UnitTest::failed_test_case_count() const { return impl()->failed_test_case_count(); } // Gets the number of all test cases. int UnitTest::total_test_case_count() const { return impl()->total_test_case_count(); } // Gets the number of all test cases that contain at least one test // that should run. int UnitTest::test_case_to_run_count() const { return impl()->test_case_to_run_count(); } // Gets the number of successful tests. int UnitTest::successful_test_count() const { return impl()->successful_test_count(); } // Gets the number of failed tests. int UnitTest::failed_test_count() const { return impl()->failed_test_count(); } // Gets the number of disabled tests that will be reported in the XML report. int UnitTest::reportable_disabled_test_count() const { return impl()->reportable_disabled_test_count(); } // Gets the number of disabled tests. int UnitTest::disabled_test_count() const { return impl()->disabled_test_count(); } // Gets the number of tests to be printed in the XML report. int UnitTest::reportable_test_count() const { return impl()->reportable_test_count(); } // Gets the number of all tests. int UnitTest::total_test_count() const { return impl()->total_test_count(); } // Gets the number of tests that should run. int UnitTest::test_to_run_count() const { return impl()->test_to_run_count(); } // Gets the time of the test program start, in ms from the start of the // UNIX epoch. internal::TimeInMillis UnitTest::start_timestamp() const { return impl()->start_timestamp(); } // Gets the elapsed time, in milliseconds. internal::TimeInMillis UnitTest::elapsed_time() const { return impl()->elapsed_time(); } // Returns true iff the unit test passed (i.e. all test cases passed). bool UnitTest::Passed() const { return impl()->Passed(); } // Returns true iff the unit test failed (i.e. some test case failed // or something outside of all tests failed). bool UnitTest::Failed() const { return impl()->Failed(); } // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. const TestCase* UnitTest::GetTestCase(int i) const { return impl()->GetTestCase(i); } // Returns the TestResult containing information on test failures and // properties logged outside of individual test cases. const TestResult& UnitTest::ad_hoc_test_result() const { return *impl()->ad_hoc_test_result(); } // Gets the i-th test case among all the test cases. i can range from 0 to // total_test_case_count() - 1. If i is not in that range, returns NULL. TestCase* UnitTest::GetMutableTestCase(int i) { return impl()->GetMutableTestCase(i); } // Returns the list of event listeners that can be used to track events // inside Google Test. TestEventListeners& UnitTest::listeners() { return *impl()->listeners(); } // Registers and returns a global test environment. When a test // program is run, all global test environments will be set-up in the // order they were registered. After all tests in the program have // finished, all global test environments will be torn-down in the // *reverse* order they were registered. // // The UnitTest object takes ownership of the given environment. // // We don't protect this under mutex_, as we only support calling it // from the main thread. Environment* UnitTest::AddEnvironment(Environment* env) { if (env == NULL) { return NULL; } impl_->environments().push_back(env); return env; } // Adds a TestPartResult to the current TestResult object. All Google Test // assertion macros (e.g. ASSERT_TRUE, EXPECT_EQ, etc) eventually call // this to report their results. The user code should use the // assertion macros instead of calling this directly. void UnitTest::AddTestPartResult( TestPartResult::Type result_type, const char* file_name, int line_number, const std::string& message, const std::string& os_stack_trace) GTEST_LOCK_EXCLUDED_(mutex_) { Message msg; msg << message; internal::MutexLock lock(&mutex_); if (impl_->gtest_trace_stack().size() > 0) { msg << "\n" << GTEST_NAME_ << " trace:"; for (int i = static_cast(impl_->gtest_trace_stack().size()); i > 0; --i) { const internal::TraceInfo& trace = impl_->gtest_trace_stack()[i - 1]; msg << "\n" << internal::FormatFileLocation(trace.file, trace.line) << " " << trace.message; } } if (os_stack_trace.c_str() != NULL && !os_stack_trace.empty()) { msg << internal::kStackTraceMarker << os_stack_trace; } const TestPartResult result = TestPartResult(result_type, file_name, line_number, msg.GetString().c_str()); impl_->GetTestPartResultReporterForCurrentThread()-> ReportTestPartResult(result); if (result_type != TestPartResult::kSuccess) { // gtest_break_on_failure takes precedence over // gtest_throw_on_failure. This allows a user to set the latter // in the code (perhaps in order to use Google Test assertions // with another testing framework) and specify the former on the // command line for debugging. if (GTEST_FLAG(break_on_failure)) { #if GTEST_OS_WINDOWS && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT // Using DebugBreak on Windows allows gtest to still break into a debugger // when a failure happens and both the --gtest_break_on_failure and // the --gtest_catch_exceptions flags are specified. DebugBreak(); #elif (!defined(__native_client__)) && \ ((defined(__clang__) || defined(__GNUC__)) && \ (defined(__x86_64__) || defined(__i386__))) // with clang/gcc we can achieve the same effect on x86 by invoking int3 asm("int3"); #else // Dereference NULL through a volatile pointer to prevent the compiler // from removing. We use this rather than abort() or __builtin_trap() for // portability: Symbian doesn't implement abort() well, and some debuggers // don't correctly trap abort(). *static_cast(NULL) = 1; #endif // GTEST_OS_WINDOWS } else if (GTEST_FLAG(throw_on_failure)) { #if GTEST_HAS_EXCEPTIONS throw internal::GoogleTestFailureException(result); #else // We cannot call abort() as it generates a pop-up in debug mode // that cannot be suppressed in VC 7.1 or below. exit(1); #endif } } } // Adds a TestProperty to the current TestResult object when invoked from // inside a test, to current TestCase's ad_hoc_test_result_ when invoked // from SetUpTestCase or TearDownTestCase, or to the global property set // when invoked elsewhere. If the result already contains a property with // the same key, the value will be updated. void UnitTest::RecordProperty(const std::string& key, const std::string& value) { impl_->RecordProperty(TestProperty(key, value)); } // Runs all tests in this UnitTest object and prints the result. // Returns 0 if successful, or 1 otherwise. // // We don't protect this under mutex_, as we only support calling it // from the main thread. int UnitTest::Run() { const bool in_death_test_child_process = internal::GTEST_FLAG(internal_run_death_test).length() > 0; // Google Test implements this protocol for catching that a test // program exits before returning control to Google Test: // // 1. Upon start, Google Test creates a file whose absolute path // is specified by the environment variable // TEST_PREMATURE_EXIT_FILE. // 2. When Google Test has finished its work, it deletes the file. // // This allows a test runner to set TEST_PREMATURE_EXIT_FILE before // running a Google-Test-based test program and check the existence // of the file at the end of the test execution to see if it has // exited prematurely. // If we are in the child process of a death test, don't // create/delete the premature exit file, as doing so is unnecessary // and will confuse the parent process. Otherwise, create/delete // the file upon entering/leaving this function. If the program // somehow exits before this function has a chance to return, the // premature-exit file will be left undeleted, causing a test runner // that understands the premature-exit-file protocol to report the // test as having failed. const internal::ScopedPrematureExitFile premature_exit_file( in_death_test_child_process ? NULL : internal::posix::GetEnv("TEST_PREMATURE_EXIT_FILE")); // Captures the value of GTEST_FLAG(catch_exceptions). This value will be // used for the duration of the program. impl()->set_catch_exceptions(GTEST_FLAG(catch_exceptions)); #if GTEST_OS_WINDOWS // Either the user wants Google Test to catch exceptions thrown by the // tests or this is executing in the context of death test child // process. In either case the user does not want to see pop-up dialogs // about crashes - they are expected. if (impl()->catch_exceptions() || in_death_test_child_process) { # if !GTEST_OS_WINDOWS_MOBILE && !GTEST_OS_WINDOWS_PHONE && !GTEST_OS_WINDOWS_RT // SetErrorMode doesn't exist on CE. SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOALIGNMENTFAULTEXCEPT | SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); # endif // !GTEST_OS_WINDOWS_MOBILE # if (defined(_MSC_VER) || GTEST_OS_WINDOWS_MINGW) && !GTEST_OS_WINDOWS_MOBILE // Death test children can be terminated with _abort(). On Windows, // _abort() can show a dialog with a warning message. This forces the // abort message to go to stderr instead. _set_error_mode(_OUT_TO_STDERR); # endif # if _MSC_VER >= 1400 && !GTEST_OS_WINDOWS_MOBILE // In the debug version, Visual Studio pops up a separate dialog // offering a choice to debug the aborted program. We need to suppress // this dialog or it will pop up for every EXPECT/ASSERT_DEATH statement // executed. Google Test will notify the user of any unexpected // failure via stderr. // // VC++ doesn't define _set_abort_behavior() prior to the version 8.0. // Users of prior VC versions shall suffer the agony and pain of // clicking through the countless debug dialogs. // FIXME: find a way to suppress the abort dialog() in the // debug mode when compiled with VC 7.1 or lower. if (!GTEST_FLAG(break_on_failure)) _set_abort_behavior( 0x0, // Clear the following flags: _WRITE_ABORT_MSG | _CALL_REPORTFAULT); // pop-up window, core dump. # endif } #endif // GTEST_OS_WINDOWS return internal::HandleExceptionsInMethodIfSupported( impl(), &internal::UnitTestImpl::RunAllTests, "auxiliary test code (environments or event listeners)") ? 0 : 1; } // Returns the working directory when the first TEST() or TEST_F() was // executed. const char* UnitTest::original_working_dir() const { return impl_->original_working_dir_.c_str(); } // Returns the TestCase object for the test that's currently running, // or NULL if no test is running. const TestCase* UnitTest::current_test_case() const GTEST_LOCK_EXCLUDED_(mutex_) { internal::MutexLock lock(&mutex_); return impl_->current_test_case(); } // Returns the TestInfo object for the test that's currently running, // or NULL if no test is running. const TestInfo* UnitTest::current_test_info() const GTEST_LOCK_EXCLUDED_(mutex_) { internal::MutexLock lock(&mutex_); return impl_->current_test_info(); } // Returns the random seed used at the start of the current test run. int UnitTest::random_seed() const { return impl_->random_seed(); } // Returns ParameterizedTestCaseRegistry object used to keep track of // value-parameterized tests and instantiate and register them. internal::ParameterizedTestCaseRegistry& UnitTest::parameterized_test_registry() GTEST_LOCK_EXCLUDED_(mutex_) { return impl_->parameterized_test_registry(); } // Creates an empty UnitTest. UnitTest::UnitTest() { impl_ = new internal::UnitTestImpl(this); } // Destructor of UnitTest. UnitTest::~UnitTest() { delete impl_; } // Pushes a trace defined by SCOPED_TRACE() on to the per-thread // Google Test trace stack. void UnitTest::PushGTestTrace(const internal::TraceInfo& trace) GTEST_LOCK_EXCLUDED_(mutex_) { internal::MutexLock lock(&mutex_); impl_->gtest_trace_stack().push_back(trace); } // Pops a trace from the per-thread Google Test trace stack. void UnitTest::PopGTestTrace() GTEST_LOCK_EXCLUDED_(mutex_) { internal::MutexLock lock(&mutex_); impl_->gtest_trace_stack().pop_back(); } namespace internal { UnitTestImpl::UnitTestImpl(UnitTest* parent) : parent_(parent), GTEST_DISABLE_MSC_WARNINGS_PUSH_(4355 /* using this in initializer */) default_global_test_part_result_reporter_(this), default_per_thread_test_part_result_reporter_(this), GTEST_DISABLE_MSC_WARNINGS_POP_() global_test_part_result_repoter_( &default_global_test_part_result_reporter_), per_thread_test_part_result_reporter_( &default_per_thread_test_part_result_reporter_), parameterized_test_registry_(), parameterized_tests_registered_(false), last_death_test_case_(-1), current_test_case_(NULL), current_test_info_(NULL), ad_hoc_test_result_(), os_stack_trace_getter_(NULL), post_flag_parse_init_performed_(false), random_seed_(0), // Will be overridden by the flag before first use. random_(0), // Will be reseeded before first use. start_timestamp_(0), elapsed_time_(0), #if GTEST_HAS_DEATH_TEST death_test_factory_(new DefaultDeathTestFactory), #endif // Will be overridden by the flag before first use. catch_exceptions_(false) { listeners()->SetDefaultResultPrinter(new PrettyUnitTestResultPrinter); } UnitTestImpl::~UnitTestImpl() { // Deletes every TestCase. ForEach(test_cases_, internal::Delete); // Deletes every Environment. ForEach(environments_, internal::Delete); delete os_stack_trace_getter_; } // Adds a TestProperty to the current TestResult object when invoked in a // context of a test, to current test case's ad_hoc_test_result when invoke // from SetUpTestCase/TearDownTestCase, or to the global property set // otherwise. If the result already contains a property with the same key, // the value will be updated. void UnitTestImpl::RecordProperty(const TestProperty& test_property) { std::string xml_element; TestResult* test_result; // TestResult appropriate for property recording. if (current_test_info_ != NULL) { xml_element = "testcase"; test_result = &(current_test_info_->result_); } else if (current_test_case_ != NULL) { xml_element = "testsuite"; test_result = &(current_test_case_->ad_hoc_test_result_); } else { xml_element = "testsuites"; test_result = &ad_hoc_test_result_; } test_result->RecordProperty(xml_element, test_property); } #if GTEST_HAS_DEATH_TEST // Disables event forwarding if the control is currently in a death test // subprocess. Must not be called before InitGoogleTest. void UnitTestImpl::SuppressTestEventsIfInSubprocess() { if (internal_run_death_test_flag_.get() != NULL) listeners()->SuppressEventForwarding(); } #endif // GTEST_HAS_DEATH_TEST // Initializes event listeners performing XML output as specified by // UnitTestOptions. Must not be called before InitGoogleTest. void UnitTestImpl::ConfigureXmlOutput() { const std::string& output_format = UnitTestOptions::GetOutputFormat(); if (output_format == "xml") { listeners()->SetDefaultXmlGenerator(new XmlUnitTestResultPrinter( UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); } else if (output_format == "json") { listeners()->SetDefaultXmlGenerator(new JsonUnitTestResultPrinter( UnitTestOptions::GetAbsolutePathToOutputFile().c_str())); } else if (output_format != "") { GTEST_LOG_(WARNING) << "WARNING: unrecognized output format \"" << output_format << "\" ignored."; } } #if GTEST_CAN_STREAM_RESULTS_ // Initializes event listeners for streaming test results in string form. // Must not be called before InitGoogleTest. void UnitTestImpl::ConfigureStreamingOutput() { const std::string& target = GTEST_FLAG(stream_result_to); if (!target.empty()) { const size_t pos = target.find(':'); if (pos != std::string::npos) { listeners()->Append(new StreamingListener(target.substr(0, pos), target.substr(pos+1))); } else { GTEST_LOG_(WARNING) << "unrecognized streaming target \"" << target << "\" ignored."; } } } #endif // GTEST_CAN_STREAM_RESULTS_ // Performs initialization dependent upon flag values obtained in // ParseGoogleTestFlagsOnly. Is called from InitGoogleTest after the call to // ParseGoogleTestFlagsOnly. In case a user neglects to call InitGoogleTest // this function is also called from RunAllTests. Since this function can be // called more than once, it has to be idempotent. void UnitTestImpl::PostFlagParsingInit() { // Ensures that this function does not execute more than once. if (!post_flag_parse_init_performed_) { post_flag_parse_init_performed_ = true; #if defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_) // Register to send notifications about key process state changes. listeners()->Append(new GTEST_CUSTOM_TEST_EVENT_LISTENER_()); #endif // defined(GTEST_CUSTOM_TEST_EVENT_LISTENER_) #if GTEST_HAS_DEATH_TEST InitDeathTestSubprocessControlInfo(); SuppressTestEventsIfInSubprocess(); #endif // GTEST_HAS_DEATH_TEST // Registers parameterized tests. This makes parameterized tests // available to the UnitTest reflection API without running // RUN_ALL_TESTS. RegisterParameterizedTests(); // Configures listeners for XML output. This makes it possible for users // to shut down the default XML output before invoking RUN_ALL_TESTS. ConfigureXmlOutput(); #if GTEST_CAN_STREAM_RESULTS_ // Configures listeners for streaming test results to the specified server. ConfigureStreamingOutput(); #endif // GTEST_CAN_STREAM_RESULTS_ #if GTEST_HAS_ABSL if (GTEST_FLAG(install_failure_signal_handler)) { absl::FailureSignalHandlerOptions options; absl::InstallFailureSignalHandler(options); } #endif // GTEST_HAS_ABSL } } // A predicate that checks the name of a TestCase against a known // value. // // This is used for implementation of the UnitTest class only. We put // it in the anonymous namespace to prevent polluting the outer // namespace. // // TestCaseNameIs is copyable. class TestCaseNameIs { public: // Constructor. explicit TestCaseNameIs(const std::string& name) : name_(name) {} // Returns true iff the name of test_case matches name_. bool operator()(const TestCase* test_case) const { return test_case != NULL && strcmp(test_case->name(), name_.c_str()) == 0; } private: std::string name_; }; // Finds and returns a TestCase with the given name. If one doesn't // exist, creates one and returns it. It's the CALLER'S // RESPONSIBILITY to ensure that this function is only called WHEN THE // TESTS ARE NOT SHUFFLED. // // Arguments: // // test_case_name: name of the test case // type_param: the name of the test case's type parameter, or NULL if // this is not a typed or a type-parameterized test case. // set_up_tc: pointer to the function that sets up the test case // tear_down_tc: pointer to the function that tears down the test case TestCase* UnitTestImpl::GetTestCase(const char* test_case_name, const char* type_param, Test::SetUpTestCaseFunc set_up_tc, Test::TearDownTestCaseFunc tear_down_tc) { // Can we find a TestCase with the given name? const std::vector::const_reverse_iterator test_case = std::find_if(test_cases_.rbegin(), test_cases_.rend(), TestCaseNameIs(test_case_name)); if (test_case != test_cases_.rend()) return *test_case; // No. Let's create one. TestCase* const new_test_case = new TestCase(test_case_name, type_param, set_up_tc, tear_down_tc); // Is this a death test case? if (internal::UnitTestOptions::MatchesFilter(test_case_name, kDeathTestCaseFilter)) { // Yes. Inserts the test case after the last death test case // defined so far. This only works when the test cases haven't // been shuffled. Otherwise we may end up running a death test // after a non-death test. ++last_death_test_case_; test_cases_.insert(test_cases_.begin() + last_death_test_case_, new_test_case); } else { // No. Appends to the end of the list. test_cases_.push_back(new_test_case); } test_case_indices_.push_back(static_cast(test_case_indices_.size())); return new_test_case; } // Helpers for setting up / tearing down the given environment. They // are for use in the ForEach() function. static void SetUpEnvironment(Environment* env) { env->SetUp(); } static void TearDownEnvironment(Environment* env) { env->TearDown(); } // Runs all tests in this UnitTest object, prints the result, and // returns true if all tests are successful. If any exception is // thrown during a test, the test is considered to be failed, but the // rest of the tests will still be run. // // When parameterized tests are enabled, it expands and registers // parameterized tests first in RegisterParameterizedTests(). // All other functions called from RunAllTests() may safely assume that // parameterized tests are ready to be counted and run. bool UnitTestImpl::RunAllTests() { // True iff Google Test is initialized before RUN_ALL_TESTS() is called. const bool gtest_is_initialized_before_run_all_tests = GTestIsInitialized(); // Do not run any test if the --help flag was specified. if (g_help_flag) return true; // Repeats the call to the post-flag parsing initialization in case the // user didn't call InitGoogleTest. PostFlagParsingInit(); // Even if sharding is not on, test runners may want to use the // GTEST_SHARD_STATUS_FILE to query whether the test supports the sharding // protocol. internal::WriteToShardStatusFileIfNeeded(); // True iff we are in a subprocess for running a thread-safe-style // death test. bool in_subprocess_for_death_test = false; #if GTEST_HAS_DEATH_TEST in_subprocess_for_death_test = (internal_run_death_test_flag_.get() != NULL); # if defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_) if (in_subprocess_for_death_test) { GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_(); } # endif // defined(GTEST_EXTRA_DEATH_TEST_CHILD_SETUP_) #endif // GTEST_HAS_DEATH_TEST const bool should_shard = ShouldShard(kTestTotalShards, kTestShardIndex, in_subprocess_for_death_test); // Compares the full test names with the filter to decide which // tests to run. const bool has_tests_to_run = FilterTests(should_shard ? HONOR_SHARDING_PROTOCOL : IGNORE_SHARDING_PROTOCOL) > 0; // Lists the tests and exits if the --gtest_list_tests flag was specified. if (GTEST_FLAG(list_tests)) { // This must be called *after* FilterTests() has been called. ListTestsMatchingFilter(); return true; } random_seed_ = GTEST_FLAG(shuffle) ? GetRandomSeedFromFlag(GTEST_FLAG(random_seed)) : 0; // True iff at least one test has failed. bool failed = false; TestEventListener* repeater = listeners()->repeater(); start_timestamp_ = GetTimeInMillis(); repeater->OnTestProgramStart(*parent_); // How many times to repeat the tests? We don't want to repeat them // when we are inside the subprocess of a death test. const int repeat = in_subprocess_for_death_test ? 1 : GTEST_FLAG(repeat); // Repeats forever if the repeat count is negative. const bool forever = repeat < 0; for (int i = 0; forever || i != repeat; i++) { // We want to preserve failures generated by ad-hoc test // assertions executed before RUN_ALL_TESTS(). ClearNonAdHocTestResult(); const TimeInMillis start = GetTimeInMillis(); // Shuffles test cases and tests if requested. if (has_tests_to_run && GTEST_FLAG(shuffle)) { random()->Reseed(random_seed_); // This should be done before calling OnTestIterationStart(), // such that a test event listener can see the actual test order // in the event. ShuffleTests(); } // Tells the unit test event listeners that the tests are about to start. repeater->OnTestIterationStart(*parent_, i); // Runs each test case if there is at least one test to run. if (has_tests_to_run) { // Sets up all environments beforehand. repeater->OnEnvironmentsSetUpStart(*parent_); ForEach(environments_, SetUpEnvironment); repeater->OnEnvironmentsSetUpEnd(*parent_); // Runs the tests only if there was no fatal failure during global // set-up. if (!Test::HasFatalFailure()) { for (int test_index = 0; test_index < total_test_case_count(); test_index++) { GetMutableTestCase(test_index)->Run(); } } // Tears down all environments in reverse order afterwards. repeater->OnEnvironmentsTearDownStart(*parent_); std::for_each(environments_.rbegin(), environments_.rend(), TearDownEnvironment); repeater->OnEnvironmentsTearDownEnd(*parent_); } elapsed_time_ = GetTimeInMillis() - start; // Tells the unit test event listener that the tests have just finished. repeater->OnTestIterationEnd(*parent_, i); // Gets the result and clears it. if (!Passed()) { failed = true; } // Restores the original test order after the iteration. This // allows the user to quickly repro a failure that happens in the // N-th iteration without repeating the first (N - 1) iterations. // This is not enclosed in "if (GTEST_FLAG(shuffle)) { ... }", in // case the user somehow changes the value of the flag somewhere // (it's always safe to unshuffle the tests). UnshuffleTests(); if (GTEST_FLAG(shuffle)) { // Picks a new random seed for each iteration. random_seed_ = GetNextRandomSeed(random_seed_); } } repeater->OnTestProgramEnd(*parent_); if (!gtest_is_initialized_before_run_all_tests) { ColoredPrintf( COLOR_RED, "\nIMPORTANT NOTICE - DO NOT IGNORE:\n" "This test program did NOT call " GTEST_INIT_GOOGLE_TEST_NAME_ "() before calling RUN_ALL_TESTS(). This is INVALID. Soon " GTEST_NAME_ " will start to enforce the valid usage. " "Please fix it ASAP, or IT WILL START TO FAIL.\n"); // NOLINT #if GTEST_FOR_GOOGLE_ ColoredPrintf(COLOR_RED, "For more details, see http://wiki/Main/ValidGUnitMain.\n"); #endif // GTEST_FOR_GOOGLE_ } return !failed; } // Reads the GTEST_SHARD_STATUS_FILE environment variable, and creates the file // if the variable is present. If a file already exists at this location, this // function will write over it. If the variable is present, but the file cannot // be created, prints an error and exits. void WriteToShardStatusFileIfNeeded() { const char* const test_shard_file = posix::GetEnv(kTestShardStatusFile); if (test_shard_file != NULL) { FILE* const file = posix::FOpen(test_shard_file, "w"); if (file == NULL) { ColoredPrintf(COLOR_RED, "Could not write to the test shard status file \"%s\" " "specified by the %s environment variable.\n", test_shard_file, kTestShardStatusFile); fflush(stdout); exit(EXIT_FAILURE); } fclose(file); } } // Checks whether sharding is enabled by examining the relevant // environment variable values. If the variables are present, // but inconsistent (i.e., shard_index >= total_shards), prints // an error and exits. If in_subprocess_for_death_test, sharding is // disabled because it must only be applied to the original test // process. Otherwise, we could filter out death tests we intended to execute. bool ShouldShard(const char* total_shards_env, const char* shard_index_env, bool in_subprocess_for_death_test) { if (in_subprocess_for_death_test) { return false; } const Int32 total_shards = Int32FromEnvOrDie(total_shards_env, -1); const Int32 shard_index = Int32FromEnvOrDie(shard_index_env, -1); if (total_shards == -1 && shard_index == -1) { return false; } else if (total_shards == -1 && shard_index != -1) { const Message msg = Message() << "Invalid environment variables: you have " << kTestShardIndex << " = " << shard_index << ", but have left " << kTestTotalShards << " unset.\n"; ColoredPrintf(COLOR_RED, msg.GetString().c_str()); fflush(stdout); exit(EXIT_FAILURE); } else if (total_shards != -1 && shard_index == -1) { const Message msg = Message() << "Invalid environment variables: you have " << kTestTotalShards << " = " << total_shards << ", but have left " << kTestShardIndex << " unset.\n"; ColoredPrintf(COLOR_RED, msg.GetString().c_str()); fflush(stdout); exit(EXIT_FAILURE); } else if (shard_index < 0 || shard_index >= total_shards) { const Message msg = Message() << "Invalid environment variables: we require 0 <= " << kTestShardIndex << " < " << kTestTotalShards << ", but you have " << kTestShardIndex << "=" << shard_index << ", " << kTestTotalShards << "=" << total_shards << ".\n"; ColoredPrintf(COLOR_RED, msg.GetString().c_str()); fflush(stdout); exit(EXIT_FAILURE); } return total_shards > 1; } // Parses the environment variable var as an Int32. If it is unset, // returns default_val. If it is not an Int32, prints an error // and aborts. Int32 Int32FromEnvOrDie(const char* var, Int32 default_val) { const char* str_val = posix::GetEnv(var); if (str_val == NULL) { return default_val; } Int32 result; if (!ParseInt32(Message() << "The value of environment variable " << var, str_val, &result)) { exit(EXIT_FAILURE); } return result; } // Given the total number of shards, the shard index, and the test id, // returns true iff the test should be run on this shard. The test id is // some arbitrary but unique non-negative integer assigned to each test // method. Assumes that 0 <= shard_index < total_shards. bool ShouldRunTestOnShard(int total_shards, int shard_index, int test_id) { return (test_id % total_shards) == shard_index; } // Compares the name of each test with the user-specified filter to // decide whether the test should be run, then records the result in // each TestCase and TestInfo object. // If shard_tests == true, further filters tests based on sharding // variables in the environment - see // https://github.com/google/googletest/blob/master/googletest/docs/advanced.md // . Returns the number of tests that should run. int UnitTestImpl::FilterTests(ReactionToSharding shard_tests) { const Int32 total_shards = shard_tests == HONOR_SHARDING_PROTOCOL ? Int32FromEnvOrDie(kTestTotalShards, -1) : -1; const Int32 shard_index = shard_tests == HONOR_SHARDING_PROTOCOL ? Int32FromEnvOrDie(kTestShardIndex, -1) : -1; // num_runnable_tests are the number of tests that will // run across all shards (i.e., match filter and are not disabled). // num_selected_tests are the number of tests to be run on // this shard. int num_runnable_tests = 0; int num_selected_tests = 0; for (size_t i = 0; i < test_cases_.size(); i++) { TestCase* const test_case = test_cases_[i]; const std::string &test_case_name = test_case->name(); test_case->set_should_run(false); for (size_t j = 0; j < test_case->test_info_list().size(); j++) { TestInfo* const test_info = test_case->test_info_list()[j]; const std::string test_name(test_info->name()); // A test is disabled if test case name or test name matches // kDisableTestFilter. const bool is_disabled = internal::UnitTestOptions::MatchesFilter(test_case_name, kDisableTestFilter) || internal::UnitTestOptions::MatchesFilter(test_name, kDisableTestFilter); test_info->is_disabled_ = is_disabled; const bool matches_filter = internal::UnitTestOptions::FilterMatchesTest(test_case_name, test_name); test_info->matches_filter_ = matches_filter; const bool is_runnable = (GTEST_FLAG(also_run_disabled_tests) || !is_disabled) && matches_filter; const bool is_in_another_shard = shard_tests != IGNORE_SHARDING_PROTOCOL && !ShouldRunTestOnShard(total_shards, shard_index, num_runnable_tests); test_info->is_in_another_shard_ = is_in_another_shard; const bool is_selected = is_runnable && !is_in_another_shard; num_runnable_tests += is_runnable; num_selected_tests += is_selected; test_info->should_run_ = is_selected; test_case->set_should_run(test_case->should_run() || is_selected); } } return num_selected_tests; } // Prints the given C-string on a single line by replacing all '\n' // characters with string "\\n". If the output takes more than // max_length characters, only prints the first max_length characters // and "...". static void PrintOnOneLine(const char* str, int max_length) { if (str != NULL) { for (int i = 0; *str != '\0'; ++str) { if (i >= max_length) { printf("..."); break; } if (*str == '\n') { printf("\\n"); i += 2; } else { printf("%c", *str); ++i; } } } } // Prints the names of the tests matching the user-specified filter flag. void UnitTestImpl::ListTestsMatchingFilter() { // Print at most this many characters for each type/value parameter. const int kMaxParamLength = 250; for (size_t i = 0; i < test_cases_.size(); i++) { const TestCase* const test_case = test_cases_[i]; bool printed_test_case_name = false; for (size_t j = 0; j < test_case->test_info_list().size(); j++) { const TestInfo* const test_info = test_case->test_info_list()[j]; if (test_info->matches_filter_) { if (!printed_test_case_name) { printed_test_case_name = true; printf("%s.", test_case->name()); if (test_case->type_param() != NULL) { printf(" # %s = ", kTypeParamLabel); // We print the type parameter on a single line to make // the output easy to parse by a program. PrintOnOneLine(test_case->type_param(), kMaxParamLength); } printf("\n"); } printf(" %s", test_info->name()); if (test_info->value_param() != NULL) { printf(" # %s = ", kValueParamLabel); // We print the value parameter on a single line to make the // output easy to parse by a program. PrintOnOneLine(test_info->value_param(), kMaxParamLength); } printf("\n"); } } } fflush(stdout); const std::string& output_format = UnitTestOptions::GetOutputFormat(); if (output_format == "xml" || output_format == "json") { FILE* fileout = OpenFileForWriting( UnitTestOptions::GetAbsolutePathToOutputFile().c_str()); std::stringstream stream; if (output_format == "xml") { XmlUnitTestResultPrinter( UnitTestOptions::GetAbsolutePathToOutputFile().c_str()) .PrintXmlTestsList(&stream, test_cases_); } else if (output_format == "json") { JsonUnitTestResultPrinter( UnitTestOptions::GetAbsolutePathToOutputFile().c_str()) .PrintJsonTestList(&stream, test_cases_); } fprintf(fileout, "%s", StringStreamToString(&stream).c_str()); fclose(fileout); } } // Sets the OS stack trace getter. // // Does nothing if the input and the current OS stack trace getter are // the same; otherwise, deletes the old getter and makes the input the // current getter. void UnitTestImpl::set_os_stack_trace_getter( OsStackTraceGetterInterface* getter) { if (os_stack_trace_getter_ != getter) { delete os_stack_trace_getter_; os_stack_trace_getter_ = getter; } } // Returns the current OS stack trace getter if it is not NULL; // otherwise, creates an OsStackTraceGetter, makes it the current // getter, and returns it. OsStackTraceGetterInterface* UnitTestImpl::os_stack_trace_getter() { if (os_stack_trace_getter_ == NULL) { #ifdef GTEST_OS_STACK_TRACE_GETTER_ os_stack_trace_getter_ = new GTEST_OS_STACK_TRACE_GETTER_; #else os_stack_trace_getter_ = new OsStackTraceGetter; #endif // GTEST_OS_STACK_TRACE_GETTER_ } return os_stack_trace_getter_; } // Returns the most specific TestResult currently running. TestResult* UnitTestImpl::current_test_result() { if (current_test_info_ != NULL) { return ¤t_test_info_->result_; } if (current_test_case_ != NULL) { return ¤t_test_case_->ad_hoc_test_result_; } return &ad_hoc_test_result_; } // Shuffles all test cases, and the tests within each test case, // making sure that death tests are still run first. void UnitTestImpl::ShuffleTests() { // Shuffles the death test cases. ShuffleRange(random(), 0, last_death_test_case_ + 1, &test_case_indices_); // Shuffles the non-death test cases. ShuffleRange(random(), last_death_test_case_ + 1, static_cast(test_cases_.size()), &test_case_indices_); // Shuffles the tests inside each test case. for (size_t i = 0; i < test_cases_.size(); i++) { test_cases_[i]->ShuffleTests(random()); } } // Restores the test cases and tests to their order before the first shuffle. void UnitTestImpl::UnshuffleTests() { for (size_t i = 0; i < test_cases_.size(); i++) { // Unshuffles the tests in each test case. test_cases_[i]->UnshuffleTests(); // Resets the index of each test case. test_case_indices_[i] = static_cast(i); } } // Returns the current OS stack trace as an std::string. // // The maximum number of stack frames to be included is specified by // the gtest_stack_trace_depth flag. The skip_count parameter // specifies the number of top frames to be skipped, which doesn't // count against the number of frames to be included. // // For example, if Foo() calls Bar(), which in turn calls // GetCurrentOsStackTraceExceptTop(..., 1), Foo() will be included in // the trace but Bar() and GetCurrentOsStackTraceExceptTop() won't. std::string GetCurrentOsStackTraceExceptTop(UnitTest* /*unit_test*/, int skip_count) { // We pass skip_count + 1 to skip this wrapper function in addition // to what the user really wants to skip. return GetUnitTestImpl()->CurrentOsStackTraceExceptTop(skip_count + 1); } // Used by the GTEST_SUPPRESS_UNREACHABLE_CODE_WARNING_BELOW_ macro to // suppress unreachable code warnings. namespace { class ClassUniqueToAlwaysTrue {}; } bool IsTrue(bool condition) { return condition; } bool AlwaysTrue() { #if GTEST_HAS_EXCEPTIONS // This condition is always false so AlwaysTrue() never actually throws, // but it makes the compiler think that it may throw. if (IsTrue(false)) throw ClassUniqueToAlwaysTrue(); #endif // GTEST_HAS_EXCEPTIONS return true; } // If *pstr starts with the given prefix, modifies *pstr to be right // past the prefix and returns true; otherwise leaves *pstr unchanged // and returns false. None of pstr, *pstr, and prefix can be NULL. bool SkipPrefix(const char* prefix, const char** pstr) { const size_t prefix_len = strlen(prefix); if (strncmp(*pstr, prefix, prefix_len) == 0) { *pstr += prefix_len; return true; } return false; } // Parses a string as a command line flag. The string should have // the format "--flag=value". When def_optional is true, the "=value" // part can be omitted. // // Returns the value of the flag, or NULL if the parsing failed. static const char* ParseFlagValue(const char* str, const char* flag, bool def_optional) { // str and flag must not be NULL. if (str == NULL || flag == NULL) return NULL; // The flag must start with "--" followed by GTEST_FLAG_PREFIX_. const std::string flag_str = std::string("--") + GTEST_FLAG_PREFIX_ + flag; const size_t flag_len = flag_str.length(); if (strncmp(str, flag_str.c_str(), flag_len) != 0) return NULL; // Skips the flag name. const char* flag_end = str + flag_len; // When def_optional is true, it's OK to not have a "=value" part. if (def_optional && (flag_end[0] == '\0')) { return flag_end; } // If def_optional is true and there are more characters after the // flag name, or if def_optional is false, there must be a '=' after // the flag name. if (flag_end[0] != '=') return NULL; // Returns the string after "=". return flag_end + 1; } // Parses a string for a bool flag, in the form of either // "--flag=value" or "--flag". // // In the former case, the value is taken as true as long as it does // not start with '0', 'f', or 'F'. // // In the latter case, the value is taken as true. // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. static bool ParseBoolFlag(const char* str, const char* flag, bool* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, true); // Aborts if the parsing failed. if (value_str == NULL) return false; // Converts the string value to a bool. *value = !(*value_str == '0' || *value_str == 'f' || *value_str == 'F'); return true; } // Parses a string for an Int32 flag, in the form of // "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. bool ParseInt32Flag(const char* str, const char* flag, Int32* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); // Aborts if the parsing failed. if (value_str == NULL) return false; // Sets *value to the value of the flag. return ParseInt32(Message() << "The value of flag --" << flag, value_str, value); } // Parses a string for a string flag, in the form of // "--flag=value". // // On success, stores the value of the flag in *value, and returns // true. On failure, returns false without changing *value. template static bool ParseStringFlag(const char* str, const char* flag, String* value) { // Gets the value of the flag as a string. const char* const value_str = ParseFlagValue(str, flag, false); // Aborts if the parsing failed. if (value_str == NULL) return false; // Sets *value to the value of the flag. *value = value_str; return true; } // Determines whether a string has a prefix that Google Test uses for its // flags, i.e., starts with GTEST_FLAG_PREFIX_ or GTEST_FLAG_PREFIX_DASH_. // If Google Test detects that a command line flag has its prefix but is not // recognized, it will print its help message. Flags starting with // GTEST_INTERNAL_PREFIX_ followed by "internal_" are considered Google Test // internal flags and do not trigger the help message. static bool HasGoogleTestFlagPrefix(const char* str) { return (SkipPrefix("--", &str) || SkipPrefix("-", &str) || SkipPrefix("/", &str)) && !SkipPrefix(GTEST_FLAG_PREFIX_ "internal_", &str) && (SkipPrefix(GTEST_FLAG_PREFIX_, &str) || SkipPrefix(GTEST_FLAG_PREFIX_DASH_, &str)); } // Prints a string containing code-encoded text. The following escape // sequences can be used in the string to control the text color: // // @@ prints a single '@' character. // @R changes the color to red. // @G changes the color to green. // @Y changes the color to yellow. // @D changes to the default terminal text color. // // FIXME: Write tests for this once we add stdout // capturing to Google Test. static void PrintColorEncoded(const char* str) { GTestColor color = COLOR_DEFAULT; // The current color. // Conceptually, we split the string into segments divided by escape // sequences. Then we print one segment at a time. At the end of // each iteration, the str pointer advances to the beginning of the // next segment. for (;;) { const char* p = strchr(str, '@'); if (p == NULL) { ColoredPrintf(color, "%s", str); return; } ColoredPrintf(color, "%s", std::string(str, p).c_str()); const char ch = p[1]; str = p + 2; if (ch == '@') { ColoredPrintf(color, "@"); } else if (ch == 'D') { color = COLOR_DEFAULT; } else if (ch == 'R') { color = COLOR_RED; } else if (ch == 'G') { color = COLOR_GREEN; } else if (ch == 'Y') { color = COLOR_YELLOW; } else { --str; } } } static const char kColorEncodedHelpMessage[] = "This program contains tests written using " GTEST_NAME_ ". You can use the\n" "following command line flags to control its behavior:\n" "\n" "Test Selection:\n" " @G--" GTEST_FLAG_PREFIX_ "list_tests@D\n" " List the names of all tests instead of running them. The name of\n" " TEST(Foo, Bar) is \"Foo.Bar\".\n" " @G--" GTEST_FLAG_PREFIX_ "filter=@YPOSTIVE_PATTERNS" "[@G-@YNEGATIVE_PATTERNS]@D\n" " Run only the tests whose name matches one of the positive patterns but\n" " none of the negative patterns. '?' matches any single character; '*'\n" " matches any substring; ':' separates two patterns.\n" " @G--" GTEST_FLAG_PREFIX_ "also_run_disabled_tests@D\n" " Run all disabled tests too.\n" "\n" "Test Execution:\n" " @G--" GTEST_FLAG_PREFIX_ "repeat=@Y[COUNT]@D\n" " Run the tests repeatedly; use a negative count to repeat forever.\n" " @G--" GTEST_FLAG_PREFIX_ "shuffle@D\n" " Randomize tests' orders on every iteration.\n" " @G--" GTEST_FLAG_PREFIX_ "random_seed=@Y[NUMBER]@D\n" " Random number seed to use for shuffling test orders (between 1 and\n" " 99999, or 0 to use a seed based on the current time).\n" "\n" "Test Output:\n" " @G--" GTEST_FLAG_PREFIX_ "color=@Y(@Gyes@Y|@Gno@Y|@Gauto@Y)@D\n" " Enable/disable colored output. The default is @Gauto@D.\n" " -@G-" GTEST_FLAG_PREFIX_ "print_time=0@D\n" " Don't print the elapsed time of each test.\n" " @G--" GTEST_FLAG_PREFIX_ "output=@Y(@Gjson@Y|@Gxml@Y)[@G:@YDIRECTORY_PATH@G" GTEST_PATH_SEP_ "@Y|@G:@YFILE_PATH]@D\n" " Generate a JSON or XML report in the given directory or with the given\n" " file name. @YFILE_PATH@D defaults to @Gtest_details.xml@D.\n" # if GTEST_CAN_STREAM_RESULTS_ " @G--" GTEST_FLAG_PREFIX_ "stream_result_to=@YHOST@G:@YPORT@D\n" " Stream test results to the given server.\n" # endif // GTEST_CAN_STREAM_RESULTS_ "\n" "Assertion Behavior:\n" # if GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS " @G--" GTEST_FLAG_PREFIX_ "death_test_style=@Y(@Gfast@Y|@Gthreadsafe@Y)@D\n" " Set the default death test style.\n" # endif // GTEST_HAS_DEATH_TEST && !GTEST_OS_WINDOWS " @G--" GTEST_FLAG_PREFIX_ "break_on_failure@D\n" " Turn assertion failures into debugger break-points.\n" " @G--" GTEST_FLAG_PREFIX_ "throw_on_failure@D\n" " Turn assertion failures into C++ exceptions for use by an external\n" " test framework.\n" " @G--" GTEST_FLAG_PREFIX_ "catch_exceptions=0@D\n" " Do not report exceptions as test failures. Instead, allow them\n" " to crash the program or throw a pop-up (on Windows).\n" "\n" "Except for @G--" GTEST_FLAG_PREFIX_ "list_tests@D, you can alternatively set " "the corresponding\n" "environment variable of a flag (all letters in upper-case). For example, to\n" "disable colored text output, you can either specify @G--" GTEST_FLAG_PREFIX_ "color=no@D or set\n" "the @G" GTEST_FLAG_PREFIX_UPPER_ "COLOR@D environment variable to @Gno@D.\n" "\n" "For more information, please read the " GTEST_NAME_ " documentation at\n" "@G" GTEST_PROJECT_URL_ "@D. If you find a bug in " GTEST_NAME_ "\n" "(not one in your own code or tests), please report it to\n" "@G<" GTEST_DEV_EMAIL_ ">@D.\n"; static bool ParseGoogleTestFlag(const char* const arg) { return ParseBoolFlag(arg, kAlsoRunDisabledTestsFlag, >EST_FLAG(also_run_disabled_tests)) || ParseBoolFlag(arg, kBreakOnFailureFlag, >EST_FLAG(break_on_failure)) || ParseBoolFlag(arg, kCatchExceptionsFlag, >EST_FLAG(catch_exceptions)) || ParseStringFlag(arg, kColorFlag, >EST_FLAG(color)) || ParseStringFlag(arg, kDeathTestStyleFlag, >EST_FLAG(death_test_style)) || ParseBoolFlag(arg, kDeathTestUseFork, >EST_FLAG(death_test_use_fork)) || ParseStringFlag(arg, kFilterFlag, >EST_FLAG(filter)) || ParseStringFlag(arg, kInternalRunDeathTestFlag, >EST_FLAG(internal_run_death_test)) || ParseBoolFlag(arg, kListTestsFlag, >EST_FLAG(list_tests)) || ParseStringFlag(arg, kOutputFlag, >EST_FLAG(output)) || ParseBoolFlag(arg, kPrintTimeFlag, >EST_FLAG(print_time)) || ParseBoolFlag(arg, kPrintUTF8Flag, >EST_FLAG(print_utf8)) || ParseInt32Flag(arg, kRandomSeedFlag, >EST_FLAG(random_seed)) || ParseInt32Flag(arg, kRepeatFlag, >EST_FLAG(repeat)) || ParseBoolFlag(arg, kShuffleFlag, >EST_FLAG(shuffle)) || ParseInt32Flag(arg, kStackTraceDepthFlag, >EST_FLAG(stack_trace_depth)) || ParseStringFlag(arg, kStreamResultToFlag, >EST_FLAG(stream_result_to)) || ParseBoolFlag(arg, kThrowOnFailureFlag, >EST_FLAG(throw_on_failure)); } #if GTEST_USE_OWN_FLAGFILE_FLAG_ static void LoadFlagsFromFile(const std::string& path) { FILE* flagfile = posix::FOpen(path.c_str(), "r"); if (!flagfile) { GTEST_LOG_(FATAL) << "Unable to open file \"" << GTEST_FLAG(flagfile) << "\""; } std::string contents(ReadEntireFile(flagfile)); posix::FClose(flagfile); std::vector lines; SplitString(contents, '\n', &lines); for (size_t i = 0; i < lines.size(); ++i) { if (lines[i].empty()) continue; if (!ParseGoogleTestFlag(lines[i].c_str())) g_help_flag = true; } } #endif // GTEST_USE_OWN_FLAGFILE_FLAG_ // Parses the command line for Google Test flags, without initializing // other parts of Google Test. The type parameter CharType can be // instantiated to either char or wchar_t. template void ParseGoogleTestFlagsOnlyImpl(int* argc, CharType** argv) { for (int i = 1; i < *argc; i++) { const std::string arg_string = StreamableToString(argv[i]); const char* const arg = arg_string.c_str(); using internal::ParseBoolFlag; using internal::ParseInt32Flag; using internal::ParseStringFlag; bool remove_flag = false; if (ParseGoogleTestFlag(arg)) { remove_flag = true; #if GTEST_USE_OWN_FLAGFILE_FLAG_ } else if (ParseStringFlag(arg, kFlagfileFlag, >EST_FLAG(flagfile))) { LoadFlagsFromFile(GTEST_FLAG(flagfile)); remove_flag = true; #endif // GTEST_USE_OWN_FLAGFILE_FLAG_ } else if (arg_string == "--help" || arg_string == "-h" || arg_string == "-?" || arg_string == "/?" || HasGoogleTestFlagPrefix(arg)) { // Both help flag and unrecognized Google Test flags (excluding // internal ones) trigger help display. g_help_flag = true; } if (remove_flag) { // Shift the remainder of the argv list left by one. Note // that argv has (*argc + 1) elements, the last one always being // NULL. The following loop moves the trailing NULL element as // well. for (int j = i; j != *argc; j++) { argv[j] = argv[j + 1]; } // Decrements the argument count. (*argc)--; // We also need to decrement the iterator as we just removed // an element. i--; } } if (g_help_flag) { // We print the help here instead of in RUN_ALL_TESTS(), as the // latter may not be called at all if the user is using Google // Test with another testing framework. PrintColorEncoded(kColorEncodedHelpMessage); } } // Parses the command line for Google Test flags, without initializing // other parts of Google Test. void ParseGoogleTestFlagsOnly(int* argc, char** argv) { ParseGoogleTestFlagsOnlyImpl(argc, argv); // Fix the value of *_NSGetArgc() on macOS, but iff // *_NSGetArgv() == argv // Only applicable to char** version of argv #if GTEST_OS_MAC #ifndef GTEST_OS_IOS if (*_NSGetArgv() == argv) { *_NSGetArgc() = *argc; } #endif #endif } void ParseGoogleTestFlagsOnly(int* argc, wchar_t** argv) { ParseGoogleTestFlagsOnlyImpl(argc, argv); } // The internal implementation of InitGoogleTest(). // // The type parameter CharType can be instantiated to either char or // wchar_t. template void InitGoogleTestImpl(int* argc, CharType** argv) { // We don't want to run the initialization code twice. if (GTestIsInitialized()) return; if (*argc <= 0) return; g_argvs.clear(); for (int i = 0; i != *argc; i++) { g_argvs.push_back(StreamableToString(argv[i])); } #if GTEST_HAS_ABSL absl::InitializeSymbolizer(g_argvs[0].c_str()); #endif // GTEST_HAS_ABSL ParseGoogleTestFlagsOnly(argc, argv); GetUnitTestImpl()->PostFlagParsingInit(); } } // namespace internal // Initializes Google Test. This must be called before calling // RUN_ALL_TESTS(). In particular, it parses a command line for the // flags that Google Test recognizes. Whenever a Google Test flag is // seen, it is removed from argv, and *argc is decremented. // // No value is returned. Instead, the Google Test flag variables are // updated. // // Calling the function for the second time has no user-visible effect. void InitGoogleTest(int* argc, char** argv) { #if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv); #else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) internal::InitGoogleTestImpl(argc, argv); #endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) } // This overloaded version can be used in Windows programs compiled in // UNICODE mode. void InitGoogleTest(int* argc, wchar_t** argv) { #if defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_(argc, argv); #else // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) internal::InitGoogleTestImpl(argc, argv); #endif // defined(GTEST_CUSTOM_INIT_GOOGLE_TEST_FUNCTION_) } std::string TempDir() { #if defined(GTEST_CUSTOM_TEMPDIR_FUNCTION_) return GTEST_CUSTOM_TEMPDIR_FUNCTION_(); #endif #if GTEST_OS_WINDOWS_MOBILE return "\\temp\\"; #elif GTEST_OS_WINDOWS const char* temp_dir = internal::posix::GetEnv("TEMP"); if (temp_dir == NULL || temp_dir[0] == '\0') return "\\temp\\"; else if (temp_dir[strlen(temp_dir) - 1] == '\\') return temp_dir; else return std::string(temp_dir) + "\\"; #elif GTEST_OS_LINUX_ANDROID return "/sdcard/"; #else return "/tmp/"; #endif // GTEST_OS_WINDOWS_MOBILE } // Class ScopedTrace // Pushes the given source file location and message onto a per-thread // trace stack maintained by Google Test. void ScopedTrace::PushTrace(const char* file, int line, std::string message) { internal::TraceInfo trace; trace.file = file; trace.line = line; trace.message.swap(message); UnitTest::GetInstance()->PushGTestTrace(trace); } // Pops the info pushed by the c'tor. ScopedTrace::~ScopedTrace() GTEST_LOCK_EXCLUDED_(&UnitTest::mutex_) { UnitTest::GetInstance()->PopGTestTrace(); } } // namespace testing libvpx-1.8.2/third_party/googletest/src/src/gtest_main.cc000066400000000000000000000033431357355204000235610ustar00rootroot00000000000000// Copyright 2006, Google Inc. // All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: // // * Redistributions of source code must retain the above copyright // notice, this list of conditions and the following disclaimer. // * Redistributions in binary form must reproduce the above // copyright notice, this list of conditions and the following disclaimer // in the documentation and/or other materials provided with the // distribution. // * Neither the name of Google Inc. nor the names of its // contributors may be used to endorse or promote products derived from // this software without specific prior written permission. // // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include "gtest/gtest.h" GTEST_API_ int main(int argc, char **argv) { printf("Running main() from %s\n", __FILE__); testing::InitGoogleTest(&argc, argv); return RUN_ALL_TESTS(); } libvpx-1.8.2/third_party/libwebm/000077500000000000000000000000001357355204000170045ustar00rootroot00000000000000libvpx-1.8.2/third_party/libwebm/AUTHORS.TXT000066400000000000000000000001421357355204000205270ustar00rootroot00000000000000# Names should be added to this file like so: # Name or Organization Google Inc. libvpx-1.8.2/third_party/libwebm/Android.mk000066400000000000000000000011101357355204000207060ustar00rootroot00000000000000LOCAL_PATH:= $(call my-dir) include $(CLEAR_VARS) LOCAL_MODULE:= libwebm LOCAL_CPPFLAGS:=-D__STDC_CONSTANT_MACROS -D__STDC_FORMAT_MACROS LOCAL_CPPFLAGS+=-D__STDC_LIMIT_MACROS -std=c++11 LOCAL_C_INCLUDES:= $(LOCAL_PATH) LOCAL_EXPORT_C_INCLUDES:= $(LOCAL_PATH) LOCAL_SRC_FILES:= common/file_util.cc \ common/hdr_util.cc \ mkvparser/mkvparser.cc \ mkvparser/mkvreader.cc \ mkvmuxer/mkvmuxer.cc \ mkvmuxer/mkvmuxerutil.cc \ mkvmuxer/mkvwriter.cc include $(BUILD_STATIC_LIBRARY) libvpx-1.8.2/third_party/libwebm/LICENSE.TXT000066400000000000000000000027301357355204000204710ustar00rootroot00000000000000Copyright (c) 2010, Google Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. libvpx-1.8.2/third_party/libwebm/PATENTS.TXT000066400000000000000000000026341357355204000205300ustar00rootroot00000000000000Additional IP Rights Grant (Patents) ------------------------------------ "These implementations" means the copyrightable works that implement the WebM codecs distributed by Google as part of the WebM Project. Google hereby grants to you a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, transfer, and otherwise run, modify and propagate the contents of these implementations of WebM, where such license applies only to those patent claims, both currently owned by Google and acquired in the future, licensable by Google that are necessarily infringed by these implementations of WebM. This grant does not include claims that would be infringed only as a consequence of further modification of these implementations. If you or your agent or exclusive licensee institute or order or agree to the institution of patent litigation or any other patent enforcement activity against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that any of these implementations of WebM or any code incorporated within any of these implementations of WebM constitute direct or contributory patent infringement, or inducement of patent infringement, then any patent rights granted to you under this License for these implementations of WebM shall terminate as of the date such litigation is filed. libvpx-1.8.2/third_party/libwebm/README.libvpx000066400000000000000000000005701357355204000211710ustar00rootroot00000000000000URL: https://chromium.googlesource.com/webm/libwebm Version: 37d9b860ebbf40cb0f6dcb7a6fef452d798062da License: BSD License File: LICENSE.txt Description: libwebm is used to handle WebM container I/O. Local Changes: Only keep: - Android.mk - AUTHORS.TXT - common/ file_util.cc/h hdr_util.cc/h webmids.h - LICENSE.TXT - mkvmuxer/ - mkvparser/ - PATENTS.TXT libvpx-1.8.2/third_party/libwebm/common/000077500000000000000000000000001357355204000202745ustar00rootroot00000000000000libvpx-1.8.2/third_party/libwebm/common/file_util.cc000066400000000000000000000052351357355204000225640ustar00rootroot00000000000000// Copyright (c) 2016 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "common/file_util.h" #include #ifndef _MSC_VER #include // close() #endif #include #include #include #include #include #include namespace libwebm { std::string GetTempFileName() { #if !defined _MSC_VER && !defined __MINGW32__ std::string temp_file_name_template_str = std::string(std::getenv("TEST_TMPDIR") ? std::getenv("TEST_TMPDIR") : ".") + "/libwebm_temp.XXXXXX"; char* temp_file_name_template = new char[temp_file_name_template_str.length() + 1]; memset(temp_file_name_template, 0, temp_file_name_template_str.length() + 1); temp_file_name_template_str.copy(temp_file_name_template, temp_file_name_template_str.length(), 0); int fd = mkstemp(temp_file_name_template); std::string temp_file_name = (fd != -1) ? std::string(temp_file_name_template) : std::string(); delete[] temp_file_name_template; if (fd != -1) { close(fd); } return temp_file_name; #else char tmp_file_name[_MAX_PATH]; #if defined _MSC_VER || defined MINGW_HAS_SECURE_API errno_t err = tmpnam_s(tmp_file_name); #else char* fname_pointer = tmpnam(tmp_file_name); int err = (fname_pointer == &tmp_file_name[0]) ? 0 : -1; #endif if (err == 0) { return std::string(tmp_file_name); } return std::string(); #endif } uint64_t GetFileSize(const std::string& file_name) { uint64_t file_size = 0; #ifndef _MSC_VER struct stat st; st.st_size = 0; if (stat(file_name.c_str(), &st) == 0) { #else struct _stat st; st.st_size = 0; if (_stat(file_name.c_str(), &st) == 0) { #endif file_size = st.st_size; } return file_size; } bool GetFileContents(const std::string& file_name, std::string* contents) { std::ifstream file(file_name.c_str()); *contents = std::string(static_cast(GetFileSize(file_name)), 0); if (file.good() && contents->size()) { file.read(&(*contents)[0], contents->size()); } return !file.fail(); } TempFileDeleter::TempFileDeleter() { file_name_ = GetTempFileName(); } TempFileDeleter::~TempFileDeleter() { std::ifstream file(file_name_.c_str()); if (file.good()) { file.close(); std::remove(file_name_.c_str()); } } } // namespace libwebm libvpx-1.8.2/third_party/libwebm/common/file_util.h000066400000000000000000000026031357355204000224220ustar00rootroot00000000000000// Copyright (c) 2016 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef LIBWEBM_COMMON_FILE_UTIL_H_ #define LIBWEBM_COMMON_FILE_UTIL_H_ #include #include #include "mkvmuxer/mkvmuxertypes.h" // LIBWEBM_DISALLOW_COPY_AND_ASSIGN() namespace libwebm { // Returns a temporary file name. std::string GetTempFileName(); // Returns size of file specified by |file_name|, or 0 upon failure. uint64_t GetFileSize(const std::string& file_name); // Gets the contents file_name as a string. Returns false on error. bool GetFileContents(const std::string& file_name, std::string* contents); // Manages life of temporary file specified at time of construction. Deletes // file upon destruction. class TempFileDeleter { public: TempFileDeleter(); explicit TempFileDeleter(std::string file_name) : file_name_(file_name) {} ~TempFileDeleter(); const std::string& name() const { return file_name_; } private: std::string file_name_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TempFileDeleter); }; } // namespace libwebm #endif // LIBWEBM_COMMON_FILE_UTIL_H_ libvpx-1.8.2/third_party/libwebm/common/hdr_util.cc000066400000000000000000000200661357355204000224210ustar00rootroot00000000000000// Copyright (c) 2016 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "hdr_util.h" #include #include #include #include "mkvparser/mkvparser.h" namespace libwebm { const int Vp9CodecFeatures::kValueNotPresent = INT_MAX; bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, PrimaryChromaticityPtr* muxer_pc) { muxer_pc->reset(new (std::nothrow) mkvmuxer::PrimaryChromaticity(parser_pc.x, parser_pc.y)); if (!muxer_pc->get()) return false; return true; } bool MasteringMetadataValuePresent(double value) { return value != mkvparser::MasteringMetadata::kValueNotPresent; } bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, mkvmuxer::MasteringMetadata* muxer_mm) { if (MasteringMetadataValuePresent(parser_mm.luminance_max)) muxer_mm->set_luminance_max(parser_mm.luminance_max); if (MasteringMetadataValuePresent(parser_mm.luminance_min)) muxer_mm->set_luminance_min(parser_mm.luminance_min); PrimaryChromaticityPtr r_ptr(nullptr); PrimaryChromaticityPtr g_ptr(nullptr); PrimaryChromaticityPtr b_ptr(nullptr); PrimaryChromaticityPtr wp_ptr(nullptr); if (parser_mm.r) { if (!CopyPrimaryChromaticity(*parser_mm.r, &r_ptr)) return false; } if (parser_mm.g) { if (!CopyPrimaryChromaticity(*parser_mm.g, &g_ptr)) return false; } if (parser_mm.b) { if (!CopyPrimaryChromaticity(*parser_mm.b, &b_ptr)) return false; } if (parser_mm.white_point) { if (!CopyPrimaryChromaticity(*parser_mm.white_point, &wp_ptr)) return false; } if (!muxer_mm->SetChromaticity(r_ptr.get(), g_ptr.get(), b_ptr.get(), wp_ptr.get())) { return false; } return true; } bool ColourValuePresent(long long value) { return value != mkvparser::Colour::kValueNotPresent; } bool CopyColour(const mkvparser::Colour& parser_colour, mkvmuxer::Colour* muxer_colour) { if (!muxer_colour) return false; if (ColourValuePresent(parser_colour.matrix_coefficients)) muxer_colour->set_matrix_coefficients(parser_colour.matrix_coefficients); if (ColourValuePresent(parser_colour.bits_per_channel)) muxer_colour->set_bits_per_channel(parser_colour.bits_per_channel); if (ColourValuePresent(parser_colour.chroma_subsampling_horz)) { muxer_colour->set_chroma_subsampling_horz( parser_colour.chroma_subsampling_horz); } if (ColourValuePresent(parser_colour.chroma_subsampling_vert)) { muxer_colour->set_chroma_subsampling_vert( parser_colour.chroma_subsampling_vert); } if (ColourValuePresent(parser_colour.cb_subsampling_horz)) muxer_colour->set_cb_subsampling_horz(parser_colour.cb_subsampling_horz); if (ColourValuePresent(parser_colour.cb_subsampling_vert)) muxer_colour->set_cb_subsampling_vert(parser_colour.cb_subsampling_vert); if (ColourValuePresent(parser_colour.chroma_siting_horz)) muxer_colour->set_chroma_siting_horz(parser_colour.chroma_siting_horz); if (ColourValuePresent(parser_colour.chroma_siting_vert)) muxer_colour->set_chroma_siting_vert(parser_colour.chroma_siting_vert); if (ColourValuePresent(parser_colour.range)) muxer_colour->set_range(parser_colour.range); if (ColourValuePresent(parser_colour.transfer_characteristics)) { muxer_colour->set_transfer_characteristics( parser_colour.transfer_characteristics); } if (ColourValuePresent(parser_colour.primaries)) muxer_colour->set_primaries(parser_colour.primaries); if (ColourValuePresent(parser_colour.max_cll)) muxer_colour->set_max_cll(parser_colour.max_cll); if (ColourValuePresent(parser_colour.max_fall)) muxer_colour->set_max_fall(parser_colour.max_fall); if (parser_colour.mastering_metadata) { mkvmuxer::MasteringMetadata muxer_mm; if (!CopyMasteringMetadata(*parser_colour.mastering_metadata, &muxer_mm)) return false; if (!muxer_colour->SetMasteringMetadata(muxer_mm)) return false; } return true; } // Format of VPx private data: // // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // | ID Byte | Length | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ | // | | // : Bytes 1..Length of Codec Feature : // | | // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ // // ID Byte Format // ID byte is an unsigned byte. // 0 1 2 3 4 5 6 7 // +-+-+-+-+-+-+-+-+ // |X| ID | // +-+-+-+-+-+-+-+-+ // // The X bit is reserved. // // See the following link for more information: // http://www.webmproject.org/vp9/profiles/ bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length, Vp9CodecFeatures* features) { const int kVpxCodecPrivateMinLength = 3; if (!private_data || !features || length < kVpxCodecPrivateMinLength) return false; const uint8_t kVp9ProfileId = 1; const uint8_t kVp9LevelId = 2; const uint8_t kVp9BitDepthId = 3; const uint8_t kVp9ChromaSubsamplingId = 4; const int kVpxFeatureLength = 1; int offset = 0; // Set features to not set. features->profile = Vp9CodecFeatures::kValueNotPresent; features->level = Vp9CodecFeatures::kValueNotPresent; features->bit_depth = Vp9CodecFeatures::kValueNotPresent; features->chroma_subsampling = Vp9CodecFeatures::kValueNotPresent; do { const uint8_t id_byte = private_data[offset++]; const uint8_t length_byte = private_data[offset++]; if (length_byte != kVpxFeatureLength) return false; if (id_byte == kVp9ProfileId) { const int priv_profile = static_cast(private_data[offset++]); if (priv_profile < 0 || priv_profile > 3) return false; if (features->profile != Vp9CodecFeatures::kValueNotPresent && features->profile != priv_profile) { return false; } features->profile = priv_profile; } else if (id_byte == kVp9LevelId) { const int priv_level = static_cast(private_data[offset++]); const int kNumLevels = 14; const int levels[kNumLevels] = {10, 11, 20, 21, 30, 31, 40, 41, 50, 51, 52, 60, 61, 62}; for (int i = 0; i < kNumLevels; ++i) { if (priv_level == levels[i]) { if (features->level != Vp9CodecFeatures::kValueNotPresent && features->level != priv_level) { return false; } features->level = priv_level; break; } } if (features->level == Vp9CodecFeatures::kValueNotPresent) return false; } else if (id_byte == kVp9BitDepthId) { const int priv_profile = static_cast(private_data[offset++]); if (priv_profile != 8 && priv_profile != 10 && priv_profile != 12) return false; if (features->bit_depth != Vp9CodecFeatures::kValueNotPresent && features->bit_depth != priv_profile) { return false; } features->bit_depth = priv_profile; } else if (id_byte == kVp9ChromaSubsamplingId) { const int priv_profile = static_cast(private_data[offset++]); if (priv_profile != 0 && priv_profile != 2 && priv_profile != 3) return false; if (features->chroma_subsampling != Vp9CodecFeatures::kValueNotPresent && features->chroma_subsampling != priv_profile) { return false; } features->chroma_subsampling = priv_profile; } else { // Invalid ID. return false; } } while (offset + kVpxCodecPrivateMinLength <= length); return true; } } // namespace libwebm libvpx-1.8.2/third_party/libwebm/common/hdr_util.h000066400000000000000000000044131357355204000222610ustar00rootroot00000000000000// Copyright (c) 2016 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef LIBWEBM_COMMON_HDR_UTIL_H_ #define LIBWEBM_COMMON_HDR_UTIL_H_ #include #include #include "mkvmuxer/mkvmuxer.h" namespace mkvparser { struct Colour; struct MasteringMetadata; struct PrimaryChromaticity; } // namespace mkvparser namespace libwebm { // Utility types and functions for working with the Colour element and its // children. Copiers return true upon success. Presence functions return true // when the specified element is present. // TODO(tomfinegan): These should be moved to libwebm_utils once c++11 is // required by libwebm. // Features of the VP9 codec that may be set in the CodecPrivate of a VP9 video // stream. A value of kValueNotPresent represents that the value was not set in // the CodecPrivate. struct Vp9CodecFeatures { static const int kValueNotPresent; Vp9CodecFeatures() : profile(kValueNotPresent), level(kValueNotPresent), bit_depth(kValueNotPresent), chroma_subsampling(kValueNotPresent) {} ~Vp9CodecFeatures() {} int profile; int level; int bit_depth; int chroma_subsampling; }; typedef std::unique_ptr PrimaryChromaticityPtr; bool CopyPrimaryChromaticity(const mkvparser::PrimaryChromaticity& parser_pc, PrimaryChromaticityPtr* muxer_pc); bool MasteringMetadataValuePresent(double value); bool CopyMasteringMetadata(const mkvparser::MasteringMetadata& parser_mm, mkvmuxer::MasteringMetadata* muxer_mm); bool ColourValuePresent(long long value); bool CopyColour(const mkvparser::Colour& parser_colour, mkvmuxer::Colour* muxer_colour); // Returns true if |features| is set to one or more valid values. bool ParseVpxCodecPrivate(const uint8_t* private_data, int32_t length, Vp9CodecFeatures* features); } // namespace libwebm #endif // LIBWEBM_COMMON_HDR_UTIL_H_ libvpx-1.8.2/third_party/libwebm/common/webmids.h000066400000000000000000000124761357355204000221110ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef COMMON_WEBMIDS_H_ #define COMMON_WEBMIDS_H_ namespace libwebm { enum MkvId { kMkvEBML = 0x1A45DFA3, kMkvEBMLVersion = 0x4286, kMkvEBMLReadVersion = 0x42F7, kMkvEBMLMaxIDLength = 0x42F2, kMkvEBMLMaxSizeLength = 0x42F3, kMkvDocType = 0x4282, kMkvDocTypeVersion = 0x4287, kMkvDocTypeReadVersion = 0x4285, kMkvVoid = 0xEC, kMkvSignatureSlot = 0x1B538667, kMkvSignatureAlgo = 0x7E8A, kMkvSignatureHash = 0x7E9A, kMkvSignaturePublicKey = 0x7EA5, kMkvSignature = 0x7EB5, kMkvSignatureElements = 0x7E5B, kMkvSignatureElementList = 0x7E7B, kMkvSignedElement = 0x6532, // segment kMkvSegment = 0x18538067, // Meta Seek Information kMkvSeekHead = 0x114D9B74, kMkvSeek = 0x4DBB, kMkvSeekID = 0x53AB, kMkvSeekPosition = 0x53AC, // Segment Information kMkvInfo = 0x1549A966, kMkvTimecodeScale = 0x2AD7B1, kMkvDuration = 0x4489, kMkvDateUTC = 0x4461, kMkvTitle = 0x7BA9, kMkvMuxingApp = 0x4D80, kMkvWritingApp = 0x5741, // Cluster kMkvCluster = 0x1F43B675, kMkvTimecode = 0xE7, kMkvPrevSize = 0xAB, kMkvBlockGroup = 0xA0, kMkvBlock = 0xA1, kMkvBlockDuration = 0x9B, kMkvReferenceBlock = 0xFB, kMkvLaceNumber = 0xCC, kMkvSimpleBlock = 0xA3, kMkvBlockAdditions = 0x75A1, kMkvBlockMore = 0xA6, kMkvBlockAddID = 0xEE, kMkvBlockAdditional = 0xA5, kMkvDiscardPadding = 0x75A2, // Track kMkvTracks = 0x1654AE6B, kMkvTrackEntry = 0xAE, kMkvTrackNumber = 0xD7, kMkvTrackUID = 0x73C5, kMkvTrackType = 0x83, kMkvFlagEnabled = 0xB9, kMkvFlagDefault = 0x88, kMkvFlagForced = 0x55AA, kMkvFlagLacing = 0x9C, kMkvDefaultDuration = 0x23E383, kMkvMaxBlockAdditionID = 0x55EE, kMkvName = 0x536E, kMkvLanguage = 0x22B59C, kMkvCodecID = 0x86, kMkvCodecPrivate = 0x63A2, kMkvCodecName = 0x258688, kMkvCodecDelay = 0x56AA, kMkvSeekPreRoll = 0x56BB, // video kMkvVideo = 0xE0, kMkvFlagInterlaced = 0x9A, kMkvStereoMode = 0x53B8, kMkvAlphaMode = 0x53C0, kMkvPixelWidth = 0xB0, kMkvPixelHeight = 0xBA, kMkvPixelCropBottom = 0x54AA, kMkvPixelCropTop = 0x54BB, kMkvPixelCropLeft = 0x54CC, kMkvPixelCropRight = 0x54DD, kMkvDisplayWidth = 0x54B0, kMkvDisplayHeight = 0x54BA, kMkvDisplayUnit = 0x54B2, kMkvAspectRatioType = 0x54B3, kMkvColourSpace = 0x2EB524, kMkvFrameRate = 0x2383E3, // end video // colour kMkvColour = 0x55B0, kMkvMatrixCoefficients = 0x55B1, kMkvBitsPerChannel = 0x55B2, kMkvChromaSubsamplingHorz = 0x55B3, kMkvChromaSubsamplingVert = 0x55B4, kMkvCbSubsamplingHorz = 0x55B5, kMkvCbSubsamplingVert = 0x55B6, kMkvChromaSitingHorz = 0x55B7, kMkvChromaSitingVert = 0x55B8, kMkvRange = 0x55B9, kMkvTransferCharacteristics = 0x55BA, kMkvPrimaries = 0x55BB, kMkvMaxCLL = 0x55BC, kMkvMaxFALL = 0x55BD, // mastering metadata kMkvMasteringMetadata = 0x55D0, kMkvPrimaryRChromaticityX = 0x55D1, kMkvPrimaryRChromaticityY = 0x55D2, kMkvPrimaryGChromaticityX = 0x55D3, kMkvPrimaryGChromaticityY = 0x55D4, kMkvPrimaryBChromaticityX = 0x55D5, kMkvPrimaryBChromaticityY = 0x55D6, kMkvWhitePointChromaticityX = 0x55D7, kMkvWhitePointChromaticityY = 0x55D8, kMkvLuminanceMax = 0x55D9, kMkvLuminanceMin = 0x55DA, // end mastering metadata // end colour // projection kMkvProjection = 0x7670, kMkvProjectionType = 0x7671, kMkvProjectionPrivate = 0x7672, kMkvProjectionPoseYaw = 0x7673, kMkvProjectionPosePitch = 0x7674, kMkvProjectionPoseRoll = 0x7675, // end projection // audio kMkvAudio = 0xE1, kMkvSamplingFrequency = 0xB5, kMkvOutputSamplingFrequency = 0x78B5, kMkvChannels = 0x9F, kMkvBitDepth = 0x6264, // end audio // ContentEncodings kMkvContentEncodings = 0x6D80, kMkvContentEncoding = 0x6240, kMkvContentEncodingOrder = 0x5031, kMkvContentEncodingScope = 0x5032, kMkvContentEncodingType = 0x5033, kMkvContentCompression = 0x5034, kMkvContentCompAlgo = 0x4254, kMkvContentCompSettings = 0x4255, kMkvContentEncryption = 0x5035, kMkvContentEncAlgo = 0x47E1, kMkvContentEncKeyID = 0x47E2, kMkvContentSignature = 0x47E3, kMkvContentSigKeyID = 0x47E4, kMkvContentSigAlgo = 0x47E5, kMkvContentSigHashAlgo = 0x47E6, kMkvContentEncAESSettings = 0x47E7, kMkvAESSettingsCipherMode = 0x47E8, kMkvAESSettingsCipherInitData = 0x47E9, // end ContentEncodings // Cueing Data kMkvCues = 0x1C53BB6B, kMkvCuePoint = 0xBB, kMkvCueTime = 0xB3, kMkvCueTrackPositions = 0xB7, kMkvCueTrack = 0xF7, kMkvCueClusterPosition = 0xF1, kMkvCueBlockNumber = 0x5378, // Chapters kMkvChapters = 0x1043A770, kMkvEditionEntry = 0x45B9, kMkvChapterAtom = 0xB6, kMkvChapterUID = 0x73C4, kMkvChapterStringUID = 0x5654, kMkvChapterTimeStart = 0x91, kMkvChapterTimeEnd = 0x92, kMkvChapterDisplay = 0x80, kMkvChapString = 0x85, kMkvChapLanguage = 0x437C, kMkvChapCountry = 0x437E, // Tags kMkvTags = 0x1254C367, kMkvTag = 0x7373, kMkvSimpleTag = 0x67C8, kMkvTagName = 0x45A3, kMkvTagString = 0x4487 }; } // namespace libwebm #endif // COMMON_WEBMIDS_H_ libvpx-1.8.2/third_party/libwebm/mkvmuxer/000077500000000000000000000000001357355204000206625ustar00rootroot00000000000000libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvmuxer.cc000066400000000000000000003562231357355204000230620ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "mkvmuxer/mkvmuxer.h" #include #include #include #include #include #include #include #include #include #include #include #include "common/webmids.h" #include "mkvmuxer/mkvmuxerutil.h" #include "mkvmuxer/mkvwriter.h" #include "mkvparser/mkvparser.h" namespace mkvmuxer { const float PrimaryChromaticity::kChromaticityMin = 0.0f; const float PrimaryChromaticity::kChromaticityMax = 1.0f; const float MasteringMetadata::kMinLuminance = 0.0f; const float MasteringMetadata::kMinLuminanceMax = 999.99f; const float MasteringMetadata::kMaxLuminanceMax = 9999.99f; const float MasteringMetadata::kValueNotPresent = FLT_MAX; const uint64_t Colour::kValueNotPresent = UINT64_MAX; namespace { const char kDocTypeWebm[] = "webm"; const char kDocTypeMatroska[] = "matroska"; // Deallocate the string designated by |dst|, and then copy the |src| // string to |dst|. The caller owns both the |src| string and the // |dst| copy (hence the caller is responsible for eventually // deallocating the strings, either directly, or indirectly via // StrCpy). Returns true if the source string was successfully copied // to the destination. bool StrCpy(const char* src, char** dst_ptr) { if (dst_ptr == NULL) return false; char*& dst = *dst_ptr; delete[] dst; dst = NULL; if (src == NULL) return true; const size_t size = strlen(src) + 1; dst = new (std::nothrow) char[size]; // NOLINT if (dst == NULL) return false; strcpy(dst, src); // NOLINT return true; } typedef std::unique_ptr PrimaryChromaticityPtr; bool CopyChromaticity(const PrimaryChromaticity* src, PrimaryChromaticityPtr* dst) { if (!dst) return false; dst->reset(new (std::nothrow) PrimaryChromaticity(src->x(), src->y())); if (!dst->get()) return false; return true; } } // namespace /////////////////////////////////////////////////////////////// // // IMkvWriter Class IMkvWriter::IMkvWriter() {} IMkvWriter::~IMkvWriter() {} bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version, const char* const doc_type) { // Level 0 uint64_t size = EbmlElementSize(libwebm::kMkvEBMLVersion, static_cast(1)); size += EbmlElementSize(libwebm::kMkvEBMLReadVersion, static_cast(1)); size += EbmlElementSize(libwebm::kMkvEBMLMaxIDLength, static_cast(4)); size += EbmlElementSize(libwebm::kMkvEBMLMaxSizeLength, static_cast(8)); size += EbmlElementSize(libwebm::kMkvDocType, doc_type); size += EbmlElementSize(libwebm::kMkvDocTypeVersion, static_cast(doc_type_version)); size += EbmlElementSize(libwebm::kMkvDocTypeReadVersion, static_cast(2)); if (!WriteEbmlMasterElement(writer, libwebm::kMkvEBML, size)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvEBMLVersion, static_cast(1))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvEBMLReadVersion, static_cast(1))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxIDLength, static_cast(4))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvEBMLMaxSizeLength, static_cast(8))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvDocType, doc_type)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeVersion, static_cast(doc_type_version))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvDocTypeReadVersion, static_cast(2))) { return false; } return true; } bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version) { return WriteEbmlHeader(writer, doc_type_version, kDocTypeWebm); } bool WriteEbmlHeader(IMkvWriter* writer) { return WriteEbmlHeader(writer, mkvmuxer::Segment::kDefaultDocTypeVersion); } bool ChunkedCopy(mkvparser::IMkvReader* source, mkvmuxer::IMkvWriter* dst, int64_t start, int64_t size) { // TODO(vigneshv): Check if this is a reasonable value. const uint32_t kBufSize = 2048; uint8_t* buf = new uint8_t[kBufSize]; int64_t offset = start; while (size > 0) { const int64_t read_len = (size > kBufSize) ? kBufSize : size; if (source->Read(offset, static_cast(read_len), buf)) return false; dst->Write(buf, static_cast(read_len)); offset += read_len; size -= read_len; } delete[] buf; return true; } /////////////////////////////////////////////////////////////// // // Frame Class Frame::Frame() : add_id_(0), additional_(NULL), additional_length_(0), duration_(0), duration_set_(false), frame_(NULL), is_key_(false), length_(0), track_number_(0), timestamp_(0), discard_padding_(0), reference_block_timestamp_(0), reference_block_timestamp_set_(false) {} Frame::~Frame() { delete[] frame_; delete[] additional_; } bool Frame::CopyFrom(const Frame& frame) { delete[] frame_; frame_ = NULL; length_ = 0; if (frame.length() > 0 && frame.frame() != NULL && !Init(frame.frame(), frame.length())) { return false; } add_id_ = 0; delete[] additional_; additional_ = NULL; additional_length_ = 0; if (frame.additional_length() > 0 && frame.additional() != NULL && !AddAdditionalData(frame.additional(), frame.additional_length(), frame.add_id())) { return false; } duration_ = frame.duration(); duration_set_ = frame.duration_set(); is_key_ = frame.is_key(); track_number_ = frame.track_number(); timestamp_ = frame.timestamp(); discard_padding_ = frame.discard_padding(); reference_block_timestamp_ = frame.reference_block_timestamp(); reference_block_timestamp_set_ = frame.reference_block_timestamp_set(); return true; } bool Frame::Init(const uint8_t* frame, uint64_t length) { uint8_t* const data = new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT if (!data) return false; delete[] frame_; frame_ = data; length_ = length; memcpy(frame_, frame, static_cast(length_)); return true; } bool Frame::AddAdditionalData(const uint8_t* additional, uint64_t length, uint64_t add_id) { uint8_t* const data = new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT if (!data) return false; delete[] additional_; additional_ = data; additional_length_ = length; add_id_ = add_id; memcpy(additional_, additional, static_cast(additional_length_)); return true; } bool Frame::IsValid() const { if (length_ == 0 || !frame_) { return false; } if ((additional_length_ != 0 && !additional_) || (additional_ != NULL && additional_length_ == 0)) { return false; } if (track_number_ == 0 || track_number_ > kMaxTrackNumber) { return false; } if (!CanBeSimpleBlock() && !is_key_ && !reference_block_timestamp_set_) { return false; } return true; } bool Frame::CanBeSimpleBlock() const { return additional_ == NULL && discard_padding_ == 0 && duration_ == 0; } void Frame::set_duration(uint64_t duration) { duration_ = duration; duration_set_ = true; } void Frame::set_reference_block_timestamp(int64_t reference_block_timestamp) { reference_block_timestamp_ = reference_block_timestamp; reference_block_timestamp_set_ = true; } /////////////////////////////////////////////////////////////// // // CuePoint Class CuePoint::CuePoint() : time_(0), track_(0), cluster_pos_(0), block_number_(1), output_block_number_(true) {} CuePoint::~CuePoint() {} bool CuePoint::Write(IMkvWriter* writer) const { if (!writer || track_ < 1 || cluster_pos_ < 1) return false; uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition, static_cast(cluster_pos_)); size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast(track_)); if (output_block_number_ && block_number_ > 1) size += EbmlElementSize(libwebm::kMkvCueBlockNumber, static_cast(block_number_)); const uint64_t track_pos_size = EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; const uint64_t payload_size = EbmlElementSize(libwebm::kMkvCueTime, static_cast(time_)) + track_pos_size; if (!WriteEbmlMasterElement(writer, libwebm::kMkvCuePoint, payload_size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement(writer, libwebm::kMkvCueTime, static_cast(time_))) { return false; } if (!WriteEbmlMasterElement(writer, libwebm::kMkvCueTrackPositions, size)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvCueTrack, static_cast(track_))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvCueClusterPosition, static_cast(cluster_pos_))) { return false; } if (output_block_number_ && block_number_ > 1) { if (!WriteEbmlElement(writer, libwebm::kMkvCueBlockNumber, static_cast(block_number_))) { return false; } } const int64_t stop_position = writer->Position(); if (stop_position < 0) return false; if (stop_position - payload_position != static_cast(payload_size)) return false; return true; } uint64_t CuePoint::PayloadSize() const { uint64_t size = EbmlElementSize(libwebm::kMkvCueClusterPosition, static_cast(cluster_pos_)); size += EbmlElementSize(libwebm::kMkvCueTrack, static_cast(track_)); if (output_block_number_ && block_number_ > 1) size += EbmlElementSize(libwebm::kMkvCueBlockNumber, static_cast(block_number_)); const uint64_t track_pos_size = EbmlMasterElementSize(libwebm::kMkvCueTrackPositions, size) + size; const uint64_t payload_size = EbmlElementSize(libwebm::kMkvCueTime, static_cast(time_)) + track_pos_size; return payload_size; } uint64_t CuePoint::Size() const { const uint64_t payload_size = PayloadSize(); return EbmlMasterElementSize(libwebm::kMkvCuePoint, payload_size) + payload_size; } /////////////////////////////////////////////////////////////// // // Cues Class Cues::Cues() : cue_entries_capacity_(0), cue_entries_size_(0), cue_entries_(NULL), output_block_number_(true) {} Cues::~Cues() { if (cue_entries_) { for (int32_t i = 0; i < cue_entries_size_; ++i) { CuePoint* const cue = cue_entries_[i]; delete cue; } delete[] cue_entries_; } } bool Cues::AddCue(CuePoint* cue) { if (!cue) return false; if ((cue_entries_size_ + 1) > cue_entries_capacity_) { // Add more CuePoints. const int32_t new_capacity = (!cue_entries_capacity_) ? 2 : cue_entries_capacity_ * 2; if (new_capacity < 1) return false; CuePoint** const cues = new (std::nothrow) CuePoint*[new_capacity]; // NOLINT if (!cues) return false; for (int32_t i = 0; i < cue_entries_size_; ++i) { cues[i] = cue_entries_[i]; } delete[] cue_entries_; cue_entries_ = cues; cue_entries_capacity_ = new_capacity; } cue->set_output_block_number(output_block_number_); cue_entries_[cue_entries_size_++] = cue; return true; } CuePoint* Cues::GetCueByIndex(int32_t index) const { if (cue_entries_ == NULL) return NULL; if (index >= cue_entries_size_) return NULL; return cue_entries_[index]; } uint64_t Cues::Size() { uint64_t size = 0; for (int32_t i = 0; i < cue_entries_size_; ++i) size += GetCueByIndex(i)->Size(); size += EbmlMasterElementSize(libwebm::kMkvCues, size); return size; } bool Cues::Write(IMkvWriter* writer) const { if (!writer) return false; uint64_t size = 0; for (int32_t i = 0; i < cue_entries_size_; ++i) { const CuePoint* const cue = GetCueByIndex(i); if (!cue) return false; size += cue->Size(); } if (!WriteEbmlMasterElement(writer, libwebm::kMkvCues, size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; for (int32_t i = 0; i < cue_entries_size_; ++i) { const CuePoint* const cue = GetCueByIndex(i); if (!cue->Write(writer)) return false; } const int64_t stop_position = writer->Position(); if (stop_position < 0) return false; if (stop_position - payload_position != static_cast(size)) return false; return true; } /////////////////////////////////////////////////////////////// // // ContentEncAESSettings Class ContentEncAESSettings::ContentEncAESSettings() : cipher_mode_(kCTR) {} uint64_t ContentEncAESSettings::Size() const { const uint64_t payload = PayloadSize(); const uint64_t size = EbmlMasterElementSize(libwebm::kMkvContentEncAESSettings, payload) + payload; return size; } bool ContentEncAESSettings::Write(IMkvWriter* writer) const { const uint64_t payload = PayloadSize(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncAESSettings, payload)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement(writer, libwebm::kMkvAESSettingsCipherMode, static_cast(cipher_mode_))) { return false; } const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(payload)) return false; return true; } uint64_t ContentEncAESSettings::PayloadSize() const { uint64_t size = EbmlElementSize(libwebm::kMkvAESSettingsCipherMode, static_cast(cipher_mode_)); return size; } /////////////////////////////////////////////////////////////// // // ContentEncoding Class ContentEncoding::ContentEncoding() : enc_algo_(5), enc_key_id_(NULL), encoding_order_(0), encoding_scope_(1), encoding_type_(1), enc_key_id_length_(0) {} ContentEncoding::~ContentEncoding() { delete[] enc_key_id_; } bool ContentEncoding::SetEncryptionID(const uint8_t* id, uint64_t length) { if (!id || length < 1) return false; delete[] enc_key_id_; enc_key_id_ = new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT if (!enc_key_id_) return false; memcpy(enc_key_id_, id, static_cast(length)); enc_key_id_length_ = length; return true; } uint64_t ContentEncoding::Size() const { const uint64_t encryption_size = EncryptionSize(); const uint64_t encoding_size = EncodingSize(0, encryption_size); const uint64_t encodings_size = EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + encoding_size; return encodings_size; } bool ContentEncoding::Write(IMkvWriter* writer) const { const uint64_t encryption_size = EncryptionSize(); const uint64_t encoding_size = EncodingSize(0, encryption_size); const uint64_t size = EbmlMasterElementSize(libwebm::kMkvContentEncoding, encoding_size) + encoding_size; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncoding, encoding_size)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingOrder, static_cast(encoding_order_))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingScope, static_cast(encoding_scope_))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvContentEncodingType, static_cast(encoding_type_))) return false; if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncryption, encryption_size)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvContentEncAlgo, static_cast(enc_algo_))) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_)) return false; if (!enc_aes_settings_.Write(writer)) return false; const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) return false; return true; } uint64_t ContentEncoding::EncodingSize(uint64_t compresion_size, uint64_t encryption_size) const { // TODO(fgalligan): Add support for compression settings. if (compresion_size != 0) return 0; uint64_t encoding_size = 0; if (encryption_size > 0) { encoding_size += EbmlMasterElementSize(libwebm::kMkvContentEncryption, encryption_size) + encryption_size; } encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingType, static_cast(encoding_type_)); encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingScope, static_cast(encoding_scope_)); encoding_size += EbmlElementSize(libwebm::kMkvContentEncodingOrder, static_cast(encoding_order_)); return encoding_size; } uint64_t ContentEncoding::EncryptionSize() const { const uint64_t aes_size = enc_aes_settings_.Size(); uint64_t encryption_size = EbmlElementSize(libwebm::kMkvContentEncKeyID, enc_key_id_, enc_key_id_length_); encryption_size += EbmlElementSize(libwebm::kMkvContentEncAlgo, static_cast(enc_algo_)); return encryption_size + aes_size; } /////////////////////////////////////////////////////////////// // // Track Class Track::Track(unsigned int* seed) : codec_id_(NULL), codec_private_(NULL), language_(NULL), max_block_additional_id_(0), name_(NULL), number_(0), type_(0), uid_(MakeUID(seed)), codec_delay_(0), seek_pre_roll_(0), default_duration_(0), codec_private_length_(0), content_encoding_entries_(NULL), content_encoding_entries_size_(0) {} Track::~Track() { delete[] codec_id_; delete[] codec_private_; delete[] language_; delete[] name_; if (content_encoding_entries_) { for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; delete encoding; } delete[] content_encoding_entries_; } } bool Track::AddContentEncoding() { const uint32_t count = content_encoding_entries_size_ + 1; ContentEncoding** const content_encoding_entries = new (std::nothrow) ContentEncoding*[count]; // NOLINT if (!content_encoding_entries) return false; ContentEncoding* const content_encoding = new (std::nothrow) ContentEncoding(); // NOLINT if (!content_encoding) { delete[] content_encoding_entries; return false; } for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { content_encoding_entries[i] = content_encoding_entries_[i]; } delete[] content_encoding_entries_; content_encoding_entries_ = content_encoding_entries; content_encoding_entries_[content_encoding_entries_size_] = content_encoding; content_encoding_entries_size_ = count; return true; } ContentEncoding* Track::GetContentEncodingByIndex(uint32_t index) const { if (content_encoding_entries_ == NULL) return NULL; if (index >= content_encoding_entries_size_) return NULL; return content_encoding_entries_[index]; } uint64_t Track::PayloadSize() const { uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, static_cast(number_)); size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast(uid_)); size += EbmlElementSize(libwebm::kMkvTrackType, static_cast(type_)); if (codec_id_) size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); if (codec_private_) size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, codec_private_length_); if (language_) size += EbmlElementSize(libwebm::kMkvLanguage, language_); if (name_) size += EbmlElementSize(libwebm::kMkvName, name_); if (max_block_additional_id_) { size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, static_cast(max_block_additional_id_)); } if (codec_delay_) { size += EbmlElementSize(libwebm::kMkvCodecDelay, static_cast(codec_delay_)); } if (seek_pre_roll_) { size += EbmlElementSize(libwebm::kMkvSeekPreRoll, static_cast(seek_pre_roll_)); } if (default_duration_) { size += EbmlElementSize(libwebm::kMkvDefaultDuration, static_cast(default_duration_)); } if (content_encoding_entries_size_ > 0) { uint64_t content_encodings_size = 0; for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; content_encodings_size += encoding->Size(); } size += EbmlMasterElementSize(libwebm::kMkvContentEncodings, content_encodings_size) + content_encodings_size; } return size; } uint64_t Track::Size() const { uint64_t size = PayloadSize(); size += EbmlMasterElementSize(libwebm::kMkvTrackEntry, size); return size; } bool Track::Write(IMkvWriter* writer) const { if (!writer) return false; // mandatory elements without a default value. if (!type_ || !codec_id_) return false; // AV1 tracks require a CodecPrivate. See // https://github.com/Matroska-Org/matroska-specification/blob/av1-mappin/codec/av1.md // TODO(tomfinegan): Update the above link to the AV1 Matroska mappings to // point to a stable version once it is finalized, or our own WebM mappings // page on webmproject.org should we decide to release them. if (!strcmp(codec_id_, Tracks::kAv1CodecId) && !codec_private_) return false; // |size| may be bigger than what is written out in this function because // derived classes may write out more data in the Track element. const uint64_t payload_size = PayloadSize(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvTrackEntry, payload_size)) return false; uint64_t size = EbmlElementSize(libwebm::kMkvTrackNumber, static_cast(number_)); size += EbmlElementSize(libwebm::kMkvTrackUID, static_cast(uid_)); size += EbmlElementSize(libwebm::kMkvTrackType, static_cast(type_)); if (codec_id_) size += EbmlElementSize(libwebm::kMkvCodecID, codec_id_); if (codec_private_) size += EbmlElementSize(libwebm::kMkvCodecPrivate, codec_private_, static_cast(codec_private_length_)); if (language_) size += EbmlElementSize(libwebm::kMkvLanguage, language_); if (name_) size += EbmlElementSize(libwebm::kMkvName, name_); if (max_block_additional_id_) size += EbmlElementSize(libwebm::kMkvMaxBlockAdditionID, static_cast(max_block_additional_id_)); if (codec_delay_) size += EbmlElementSize(libwebm::kMkvCodecDelay, static_cast(codec_delay_)); if (seek_pre_roll_) size += EbmlElementSize(libwebm::kMkvSeekPreRoll, static_cast(seek_pre_roll_)); if (default_duration_) size += EbmlElementSize(libwebm::kMkvDefaultDuration, static_cast(default_duration_)); const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement(writer, libwebm::kMkvTrackNumber, static_cast(number_))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvTrackUID, static_cast(uid_))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvTrackType, static_cast(type_))) return false; if (max_block_additional_id_) { if (!WriteEbmlElement(writer, libwebm::kMkvMaxBlockAdditionID, static_cast(max_block_additional_id_))) { return false; } } if (codec_delay_) { if (!WriteEbmlElement(writer, libwebm::kMkvCodecDelay, static_cast(codec_delay_))) return false; } if (seek_pre_roll_) { if (!WriteEbmlElement(writer, libwebm::kMkvSeekPreRoll, static_cast(seek_pre_roll_))) return false; } if (default_duration_) { if (!WriteEbmlElement(writer, libwebm::kMkvDefaultDuration, static_cast(default_duration_))) return false; } if (codec_id_) { if (!WriteEbmlElement(writer, libwebm::kMkvCodecID, codec_id_)) return false; } if (codec_private_) { if (!WriteEbmlElement(writer, libwebm::kMkvCodecPrivate, codec_private_, static_cast(codec_private_length_))) return false; } if (language_) { if (!WriteEbmlElement(writer, libwebm::kMkvLanguage, language_)) return false; } if (name_) { if (!WriteEbmlElement(writer, libwebm::kMkvName, name_)) return false; } int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) return false; if (content_encoding_entries_size_ > 0) { uint64_t content_encodings_size = 0; for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; content_encodings_size += encoding->Size(); } if (!WriteEbmlMasterElement(writer, libwebm::kMkvContentEncodings, content_encodings_size)) return false; for (uint32_t i = 0; i < content_encoding_entries_size_; ++i) { ContentEncoding* const encoding = content_encoding_entries_[i]; if (!encoding->Write(writer)) return false; } } stop_position = writer->Position(); if (stop_position < 0) return false; return true; } bool Track::SetCodecPrivate(const uint8_t* codec_private, uint64_t length) { if (!codec_private || length < 1) return false; delete[] codec_private_; codec_private_ = new (std::nothrow) uint8_t[static_cast(length)]; // NOLINT if (!codec_private_) return false; memcpy(codec_private_, codec_private, static_cast(length)); codec_private_length_ = length; return true; } void Track::set_codec_id(const char* codec_id) { if (codec_id) { delete[] codec_id_; const size_t length = strlen(codec_id) + 1; codec_id_ = new (std::nothrow) char[length]; // NOLINT if (codec_id_) { #ifdef _MSC_VER strcpy_s(codec_id_, length, codec_id); #else strcpy(codec_id_, codec_id); #endif } } } // TODO(fgalligan): Vet the language parameter. void Track::set_language(const char* language) { if (language) { delete[] language_; const size_t length = strlen(language) + 1; language_ = new (std::nothrow) char[length]; // NOLINT if (language_) { #ifdef _MSC_VER strcpy_s(language_, length, language); #else strcpy(language_, language); #endif } } } void Track::set_name(const char* name) { if (name) { delete[] name_; const size_t length = strlen(name) + 1; name_ = new (std::nothrow) char[length]; // NOLINT if (name_) { #ifdef _MSC_VER strcpy_s(name_, length, name); #else strcpy(name_, name); #endif } } } /////////////////////////////////////////////////////////////// // // Colour and its child elements uint64_t PrimaryChromaticity::PrimaryChromaticitySize( libwebm::MkvId x_id, libwebm::MkvId y_id) const { return EbmlElementSize(x_id, x_) + EbmlElementSize(y_id, y_); } bool PrimaryChromaticity::Write(IMkvWriter* writer, libwebm::MkvId x_id, libwebm::MkvId y_id) const { if (!Valid()) { return false; } return WriteEbmlElement(writer, x_id, x_) && WriteEbmlElement(writer, y_id, y_); } bool PrimaryChromaticity::Valid() const { return (x_ >= kChromaticityMin && x_ <= kChromaticityMax && y_ >= kChromaticityMin && y_ <= kChromaticityMax); } uint64_t MasteringMetadata::MasteringMetadataSize() const { uint64_t size = PayloadSize(); if (size > 0) size += EbmlMasterElementSize(libwebm::kMkvMasteringMetadata, size); return size; } bool MasteringMetadata::Valid() const { if (luminance_min_ != kValueNotPresent) { if (luminance_min_ < kMinLuminance || luminance_min_ > kMinLuminanceMax || luminance_min_ > luminance_max_) { return false; } } if (luminance_max_ != kValueNotPresent) { if (luminance_max_ < kMinLuminance || luminance_max_ > kMaxLuminanceMax || luminance_max_ < luminance_min_) { return false; } } if (r_ && !r_->Valid()) return false; if (g_ && !g_->Valid()) return false; if (b_ && !b_->Valid()) return false; if (white_point_ && !white_point_->Valid()) return false; return true; } bool MasteringMetadata::Write(IMkvWriter* writer) const { const uint64_t size = PayloadSize(); // Don't write an empty element. if (size == 0) return true; if (!WriteEbmlMasterElement(writer, libwebm::kMkvMasteringMetadata, size)) return false; if (luminance_max_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvLuminanceMax, luminance_max_)) { return false; } if (luminance_min_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvLuminanceMin, luminance_min_)) { return false; } if (r_ && !r_->Write(writer, libwebm::kMkvPrimaryRChromaticityX, libwebm::kMkvPrimaryRChromaticityY)) { return false; } if (g_ && !g_->Write(writer, libwebm::kMkvPrimaryGChromaticityX, libwebm::kMkvPrimaryGChromaticityY)) { return false; } if (b_ && !b_->Write(writer, libwebm::kMkvPrimaryBChromaticityX, libwebm::kMkvPrimaryBChromaticityY)) { return false; } if (white_point_ && !white_point_->Write(writer, libwebm::kMkvWhitePointChromaticityX, libwebm::kMkvWhitePointChromaticityY)) { return false; } return true; } bool MasteringMetadata::SetChromaticity( const PrimaryChromaticity* r, const PrimaryChromaticity* g, const PrimaryChromaticity* b, const PrimaryChromaticity* white_point) { PrimaryChromaticityPtr r_ptr(nullptr); if (r) { if (!CopyChromaticity(r, &r_ptr)) return false; } PrimaryChromaticityPtr g_ptr(nullptr); if (g) { if (!CopyChromaticity(g, &g_ptr)) return false; } PrimaryChromaticityPtr b_ptr(nullptr); if (b) { if (!CopyChromaticity(b, &b_ptr)) return false; } PrimaryChromaticityPtr wp_ptr(nullptr); if (white_point) { if (!CopyChromaticity(white_point, &wp_ptr)) return false; } r_ = r_ptr.release(); g_ = g_ptr.release(); b_ = b_ptr.release(); white_point_ = wp_ptr.release(); return true; } uint64_t MasteringMetadata::PayloadSize() const { uint64_t size = 0; if (luminance_max_ != kValueNotPresent) size += EbmlElementSize(libwebm::kMkvLuminanceMax, luminance_max_); if (luminance_min_ != kValueNotPresent) size += EbmlElementSize(libwebm::kMkvLuminanceMin, luminance_min_); if (r_) { size += r_->PrimaryChromaticitySize(libwebm::kMkvPrimaryRChromaticityX, libwebm::kMkvPrimaryRChromaticityY); } if (g_) { size += g_->PrimaryChromaticitySize(libwebm::kMkvPrimaryGChromaticityX, libwebm::kMkvPrimaryGChromaticityY); } if (b_) { size += b_->PrimaryChromaticitySize(libwebm::kMkvPrimaryBChromaticityX, libwebm::kMkvPrimaryBChromaticityY); } if (white_point_) { size += white_point_->PrimaryChromaticitySize( libwebm::kMkvWhitePointChromaticityX, libwebm::kMkvWhitePointChromaticityY); } return size; } uint64_t Colour::ColourSize() const { uint64_t size = PayloadSize(); if (size > 0) size += EbmlMasterElementSize(libwebm::kMkvColour, size); return size; } bool Colour::Valid() const { if (mastering_metadata_ && !mastering_metadata_->Valid()) return false; if (matrix_coefficients_ != kValueNotPresent && !IsMatrixCoefficientsValueValid(matrix_coefficients_)) { return false; } if (chroma_siting_horz_ != kValueNotPresent && !IsChromaSitingHorzValueValid(chroma_siting_horz_)) { return false; } if (chroma_siting_vert_ != kValueNotPresent && !IsChromaSitingVertValueValid(chroma_siting_vert_)) { return false; } if (range_ != kValueNotPresent && !IsColourRangeValueValid(range_)) return false; if (transfer_characteristics_ != kValueNotPresent && !IsTransferCharacteristicsValueValid(transfer_characteristics_)) { return false; } if (primaries_ != kValueNotPresent && !IsPrimariesValueValid(primaries_)) return false; return true; } bool Colour::Write(IMkvWriter* writer) const { const uint64_t size = PayloadSize(); // Don't write an empty element. if (size == 0) return true; // Don't write an invalid element. if (!Valid()) return false; if (!WriteEbmlMasterElement(writer, libwebm::kMkvColour, size)) return false; if (matrix_coefficients_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvMatrixCoefficients, static_cast(matrix_coefficients_))) { return false; } if (bits_per_channel_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvBitsPerChannel, static_cast(bits_per_channel_))) { return false; } if (chroma_subsampling_horz_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingHorz, static_cast(chroma_subsampling_horz_))) { return false; } if (chroma_subsampling_vert_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvChromaSubsamplingVert, static_cast(chroma_subsampling_vert_))) { return false; } if (cb_subsampling_horz_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingHorz, static_cast(cb_subsampling_horz_))) { return false; } if (cb_subsampling_vert_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvCbSubsamplingVert, static_cast(cb_subsampling_vert_))) { return false; } if (chroma_siting_horz_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvChromaSitingHorz, static_cast(chroma_siting_horz_))) { return false; } if (chroma_siting_vert_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvChromaSitingVert, static_cast(chroma_siting_vert_))) { return false; } if (range_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvRange, static_cast(range_))) { return false; } if (transfer_characteristics_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvTransferCharacteristics, static_cast(transfer_characteristics_))) { return false; } if (primaries_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvPrimaries, static_cast(primaries_))) { return false; } if (max_cll_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvMaxCLL, static_cast(max_cll_))) { return false; } if (max_fall_ != kValueNotPresent && !WriteEbmlElement(writer, libwebm::kMkvMaxFALL, static_cast(max_fall_))) { return false; } if (mastering_metadata_ && !mastering_metadata_->Write(writer)) return false; return true; } bool Colour::SetMasteringMetadata(const MasteringMetadata& mastering_metadata) { std::unique_ptr mm_ptr(new MasteringMetadata()); if (!mm_ptr.get()) return false; mm_ptr->set_luminance_max(mastering_metadata.luminance_max()); mm_ptr->set_luminance_min(mastering_metadata.luminance_min()); if (!mm_ptr->SetChromaticity(mastering_metadata.r(), mastering_metadata.g(), mastering_metadata.b(), mastering_metadata.white_point())) { return false; } delete mastering_metadata_; mastering_metadata_ = mm_ptr.release(); return true; } uint64_t Colour::PayloadSize() const { uint64_t size = 0; if (matrix_coefficients_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvMatrixCoefficients, static_cast(matrix_coefficients_)); } if (bits_per_channel_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvBitsPerChannel, static_cast(bits_per_channel_)); } if (chroma_subsampling_horz_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvChromaSubsamplingHorz, static_cast(chroma_subsampling_horz_)); } if (chroma_subsampling_vert_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvChromaSubsamplingVert, static_cast(chroma_subsampling_vert_)); } if (cb_subsampling_horz_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvCbSubsamplingHorz, static_cast(cb_subsampling_horz_)); } if (cb_subsampling_vert_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvCbSubsamplingVert, static_cast(cb_subsampling_vert_)); } if (chroma_siting_horz_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvChromaSitingHorz, static_cast(chroma_siting_horz_)); } if (chroma_siting_vert_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvChromaSitingVert, static_cast(chroma_siting_vert_)); } if (range_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvRange, static_cast(range_)); } if (transfer_characteristics_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvTransferCharacteristics, static_cast(transfer_characteristics_)); } if (primaries_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvPrimaries, static_cast(primaries_)); } if (max_cll_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvMaxCLL, static_cast(max_cll_)); } if (max_fall_ != kValueNotPresent) { size += EbmlElementSize(libwebm::kMkvMaxFALL, static_cast(max_fall_)); } if (mastering_metadata_) size += mastering_metadata_->MasteringMetadataSize(); return size; } /////////////////////////////////////////////////////////////// // // Projection element uint64_t Projection::ProjectionSize() const { uint64_t size = PayloadSize(); if (size > 0) size += EbmlMasterElementSize(libwebm::kMkvProjection, size); return size; } bool Projection::Write(IMkvWriter* writer) const { const uint64_t size = PayloadSize(); // Don't write an empty element. if (size == 0) return true; if (!WriteEbmlMasterElement(writer, libwebm::kMkvProjection, size)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvProjectionType, static_cast(type_))) { return false; } if (private_data_length_ > 0 && private_data_ != NULL && !WriteEbmlElement(writer, libwebm::kMkvProjectionPrivate, private_data_, private_data_length_)) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseYaw, pose_yaw_)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPosePitch, pose_pitch_)) { return false; } if (!WriteEbmlElement(writer, libwebm::kMkvProjectionPoseRoll, pose_roll_)) { return false; } return true; } bool Projection::SetProjectionPrivate(const uint8_t* data, uint64_t data_length) { if (data == NULL || data_length == 0) { return false; } if (data_length != static_cast(data_length)) { return false; } uint8_t* new_private_data = new (std::nothrow) uint8_t[static_cast(data_length)]; if (new_private_data == NULL) { return false; } delete[] private_data_; private_data_ = new_private_data; private_data_length_ = data_length; memcpy(private_data_, data, static_cast(data_length)); return true; } uint64_t Projection::PayloadSize() const { uint64_t size = EbmlElementSize(libwebm::kMkvProjection, static_cast(type_)); if (private_data_length_ > 0 && private_data_ != NULL) { size += EbmlElementSize(libwebm::kMkvProjectionPrivate, private_data_, private_data_length_); } size += EbmlElementSize(libwebm::kMkvProjectionPoseYaw, pose_yaw_); size += EbmlElementSize(libwebm::kMkvProjectionPosePitch, pose_pitch_); size += EbmlElementSize(libwebm::kMkvProjectionPoseRoll, pose_roll_); return size; } /////////////////////////////////////////////////////////////// // // VideoTrack Class VideoTrack::VideoTrack(unsigned int* seed) : Track(seed), display_height_(0), display_width_(0), pixel_height_(0), pixel_width_(0), crop_left_(0), crop_right_(0), crop_top_(0), crop_bottom_(0), frame_rate_(0.0), height_(0), stereo_mode_(0), alpha_mode_(0), width_(0), colour_space_(NULL), colour_(NULL), projection_(NULL) {} VideoTrack::~VideoTrack() { delete colour_; delete projection_; } bool VideoTrack::SetStereoMode(uint64_t stereo_mode) { if (stereo_mode != kMono && stereo_mode != kSideBySideLeftIsFirst && stereo_mode != kTopBottomRightIsFirst && stereo_mode != kTopBottomLeftIsFirst && stereo_mode != kSideBySideRightIsFirst) return false; stereo_mode_ = stereo_mode; return true; } bool VideoTrack::SetAlphaMode(uint64_t alpha_mode) { if (alpha_mode != kNoAlpha && alpha_mode != kAlpha) return false; alpha_mode_ = alpha_mode; return true; } uint64_t VideoTrack::PayloadSize() const { const uint64_t parent_size = Track::PayloadSize(); uint64_t size = VideoPayloadSize(); size += EbmlMasterElementSize(libwebm::kMkvVideo, size); return parent_size + size; } bool VideoTrack::Write(IMkvWriter* writer) const { if (!Track::Write(writer)) return false; const uint64_t size = VideoPayloadSize(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvVideo, size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement( writer, libwebm::kMkvPixelWidth, static_cast((pixel_width_ > 0) ? pixel_width_ : width_))) return false; if (!WriteEbmlElement( writer, libwebm::kMkvPixelHeight, static_cast((pixel_height_ > 0) ? pixel_height_ : height_))) return false; if (display_width_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvDisplayWidth, static_cast(display_width_))) return false; } if (display_height_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvDisplayHeight, static_cast(display_height_))) return false; } if (crop_left_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropLeft, static_cast(crop_left_))) return false; } if (crop_right_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropRight, static_cast(crop_right_))) return false; } if (crop_top_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropTop, static_cast(crop_top_))) return false; } if (crop_bottom_ > 0) { if (!WriteEbmlElement(writer, libwebm::kMkvPixelCropBottom, static_cast(crop_bottom_))) return false; } if (stereo_mode_ > kMono) { if (!WriteEbmlElement(writer, libwebm::kMkvStereoMode, static_cast(stereo_mode_))) return false; } if (alpha_mode_ > kNoAlpha) { if (!WriteEbmlElement(writer, libwebm::kMkvAlphaMode, static_cast(alpha_mode_))) return false; } if (colour_space_) { if (!WriteEbmlElement(writer, libwebm::kMkvColourSpace, colour_space_)) return false; } if (frame_rate_ > 0.0) { if (!WriteEbmlElement(writer, libwebm::kMkvFrameRate, static_cast(frame_rate_))) { return false; } } if (colour_) { if (!colour_->Write(writer)) return false; } if (projection_) { if (!projection_->Write(writer)) return false; } const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) { return false; } return true; } void VideoTrack::set_colour_space(const char* colour_space) { if (colour_space) { delete[] colour_space_; const size_t length = strlen(colour_space) + 1; colour_space_ = new (std::nothrow) char[length]; // NOLINT if (colour_space_) { #ifdef _MSC_VER strcpy_s(colour_space_, length, colour_space); #else strcpy(colour_space_, colour_space); #endif } } } bool VideoTrack::SetColour(const Colour& colour) { std::unique_ptr colour_ptr(new Colour()); if (!colour_ptr.get()) return false; if (colour.mastering_metadata()) { if (!colour_ptr->SetMasteringMetadata(*colour.mastering_metadata())) return false; } colour_ptr->set_matrix_coefficients(colour.matrix_coefficients()); colour_ptr->set_bits_per_channel(colour.bits_per_channel()); colour_ptr->set_chroma_subsampling_horz(colour.chroma_subsampling_horz()); colour_ptr->set_chroma_subsampling_vert(colour.chroma_subsampling_vert()); colour_ptr->set_cb_subsampling_horz(colour.cb_subsampling_horz()); colour_ptr->set_cb_subsampling_vert(colour.cb_subsampling_vert()); colour_ptr->set_chroma_siting_horz(colour.chroma_siting_horz()); colour_ptr->set_chroma_siting_vert(colour.chroma_siting_vert()); colour_ptr->set_range(colour.range()); colour_ptr->set_transfer_characteristics(colour.transfer_characteristics()); colour_ptr->set_primaries(colour.primaries()); colour_ptr->set_max_cll(colour.max_cll()); colour_ptr->set_max_fall(colour.max_fall()); delete colour_; colour_ = colour_ptr.release(); return true; } bool VideoTrack::SetProjection(const Projection& projection) { std::unique_ptr projection_ptr(new Projection()); if (!projection_ptr.get()) return false; if (projection.private_data()) { if (!projection_ptr->SetProjectionPrivate( projection.private_data(), projection.private_data_length())) { return false; } } projection_ptr->set_type(projection.type()); projection_ptr->set_pose_yaw(projection.pose_yaw()); projection_ptr->set_pose_pitch(projection.pose_pitch()); projection_ptr->set_pose_roll(projection.pose_roll()); delete projection_; projection_ = projection_ptr.release(); return true; } uint64_t VideoTrack::VideoPayloadSize() const { uint64_t size = EbmlElementSize( libwebm::kMkvPixelWidth, static_cast((pixel_width_ > 0) ? pixel_width_ : width_)); size += EbmlElementSize( libwebm::kMkvPixelHeight, static_cast((pixel_height_ > 0) ? pixel_height_ : height_)); if (display_width_ > 0) size += EbmlElementSize(libwebm::kMkvDisplayWidth, static_cast(display_width_)); if (display_height_ > 0) size += EbmlElementSize(libwebm::kMkvDisplayHeight, static_cast(display_height_)); if (crop_left_ > 0) size += EbmlElementSize(libwebm::kMkvPixelCropLeft, static_cast(crop_left_)); if (crop_right_ > 0) size += EbmlElementSize(libwebm::kMkvPixelCropRight, static_cast(crop_right_)); if (crop_top_ > 0) size += EbmlElementSize(libwebm::kMkvPixelCropTop, static_cast(crop_top_)); if (crop_bottom_ > 0) size += EbmlElementSize(libwebm::kMkvPixelCropBottom, static_cast(crop_bottom_)); if (stereo_mode_ > kMono) size += EbmlElementSize(libwebm::kMkvStereoMode, static_cast(stereo_mode_)); if (alpha_mode_ > kNoAlpha) size += EbmlElementSize(libwebm::kMkvAlphaMode, static_cast(alpha_mode_)); if (frame_rate_ > 0.0) size += EbmlElementSize(libwebm::kMkvFrameRate, static_cast(frame_rate_)); if (colour_space_) size += EbmlElementSize(libwebm::kMkvColourSpace, colour_space_); if (colour_) size += colour_->ColourSize(); if (projection_) size += projection_->ProjectionSize(); return size; } /////////////////////////////////////////////////////////////// // // AudioTrack Class AudioTrack::AudioTrack(unsigned int* seed) : Track(seed), bit_depth_(0), channels_(1), sample_rate_(0.0) {} AudioTrack::~AudioTrack() {} uint64_t AudioTrack::PayloadSize() const { const uint64_t parent_size = Track::PayloadSize(); uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, static_cast(sample_rate_)); size += EbmlElementSize(libwebm::kMkvChannels, static_cast(channels_)); if (bit_depth_ > 0) size += EbmlElementSize(libwebm::kMkvBitDepth, static_cast(bit_depth_)); size += EbmlMasterElementSize(libwebm::kMkvAudio, size); return parent_size + size; } bool AudioTrack::Write(IMkvWriter* writer) const { if (!Track::Write(writer)) return false; // Calculate AudioSettings size. uint64_t size = EbmlElementSize(libwebm::kMkvSamplingFrequency, static_cast(sample_rate_)); size += EbmlElementSize(libwebm::kMkvChannels, static_cast(channels_)); if (bit_depth_ > 0) size += EbmlElementSize(libwebm::kMkvBitDepth, static_cast(bit_depth_)); if (!WriteEbmlMasterElement(writer, libwebm::kMkvAudio, size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement(writer, libwebm::kMkvSamplingFrequency, static_cast(sample_rate_))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvChannels, static_cast(channels_))) return false; if (bit_depth_ > 0) if (!WriteEbmlElement(writer, libwebm::kMkvBitDepth, static_cast(bit_depth_))) return false; const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) return false; return true; } /////////////////////////////////////////////////////////////// // // Tracks Class const char Tracks::kOpusCodecId[] = "A_OPUS"; const char Tracks::kVorbisCodecId[] = "A_VORBIS"; const char Tracks::kAv1CodecId[] = "V_AV1"; const char Tracks::kVp8CodecId[] = "V_VP8"; const char Tracks::kVp9CodecId[] = "V_VP9"; const char Tracks::kWebVttCaptionsId[] = "D_WEBVTT/CAPTIONS"; const char Tracks::kWebVttDescriptionsId[] = "D_WEBVTT/DESCRIPTIONS"; const char Tracks::kWebVttMetadataId[] = "D_WEBVTT/METADATA"; const char Tracks::kWebVttSubtitlesId[] = "D_WEBVTT/SUBTITLES"; Tracks::Tracks() : track_entries_(NULL), track_entries_size_(0), wrote_tracks_(false) {} Tracks::~Tracks() { if (track_entries_) { for (uint32_t i = 0; i < track_entries_size_; ++i) { Track* const track = track_entries_[i]; delete track; } delete[] track_entries_; } } bool Tracks::AddTrack(Track* track, int32_t number) { if (number < 0 || wrote_tracks_) return false; // This muxer only supports track numbers in the range [1, 126], in // order to be able (to use Matroska integer representation) to // serialize the block header (of which the track number is a part) // for a frame using exactly 4 bytes. if (number > 0x7E) return false; uint32_t track_num = number; if (track_num > 0) { // Check to make sure a track does not already have |track_num|. for (uint32_t i = 0; i < track_entries_size_; ++i) { if (track_entries_[i]->number() == track_num) return false; } } const uint32_t count = track_entries_size_ + 1; Track** const track_entries = new (std::nothrow) Track*[count]; // NOLINT if (!track_entries) return false; for (uint32_t i = 0; i < track_entries_size_; ++i) { track_entries[i] = track_entries_[i]; } delete[] track_entries_; // Find the lowest availible track number > 0. if (track_num == 0) { track_num = count; // Check to make sure a track does not already have |track_num|. bool exit = false; do { exit = true; for (uint32_t i = 0; i < track_entries_size_; ++i) { if (track_entries[i]->number() == track_num) { track_num++; exit = false; break; } } } while (!exit); } track->set_number(track_num); track_entries_ = track_entries; track_entries_[track_entries_size_] = track; track_entries_size_ = count; return true; } const Track* Tracks::GetTrackByIndex(uint32_t index) const { if (track_entries_ == NULL) return NULL; if (index >= track_entries_size_) return NULL; return track_entries_[index]; } Track* Tracks::GetTrackByNumber(uint64_t track_number) const { const int32_t count = track_entries_size(); for (int32_t i = 0; i < count; ++i) { if (track_entries_[i]->number() == track_number) return track_entries_[i]; } return NULL; } bool Tracks::TrackIsAudio(uint64_t track_number) const { const Track* const track = GetTrackByNumber(track_number); if (track->type() == kAudio) return true; return false; } bool Tracks::TrackIsVideo(uint64_t track_number) const { const Track* const track = GetTrackByNumber(track_number); if (track->type() == kVideo) return true; return false; } bool Tracks::Write(IMkvWriter* writer) const { uint64_t size = 0; const int32_t count = track_entries_size(); for (int32_t i = 0; i < count; ++i) { const Track* const track = GetTrackByIndex(i); if (!track) return false; size += track->Size(); } if (!WriteEbmlMasterElement(writer, libwebm::kMkvTracks, size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; for (int32_t i = 0; i < count; ++i) { const Track* const track = GetTrackByIndex(i); if (!track->Write(writer)) return false; } const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) return false; wrote_tracks_ = true; return true; } /////////////////////////////////////////////////////////////// // // Chapter Class bool Chapter::set_id(const char* id) { return StrCpy(id, &id_); } void Chapter::set_time(const Segment& segment, uint64_t start_ns, uint64_t end_ns) { const SegmentInfo* const info = segment.GetSegmentInfo(); const uint64_t timecode_scale = info->timecode_scale(); start_timecode_ = start_ns / timecode_scale; end_timecode_ = end_ns / timecode_scale; } bool Chapter::add_string(const char* title, const char* language, const char* country) { if (!ExpandDisplaysArray()) return false; Display& d = displays_[displays_count_++]; d.Init(); if (!d.set_title(title)) return false; if (!d.set_language(language)) return false; if (!d.set_country(country)) return false; return true; } Chapter::Chapter() { // This ctor only constructs the object. Proper initialization is // done in Init() (called in Chapters::AddChapter()). The only // reason we bother implementing this ctor is because we had to // declare it as private (along with the dtor), in order to prevent // clients from creating Chapter instances (a privelege we grant // only to the Chapters class). Doing no initialization here also // means that creating arrays of chapter objects is more efficient, // because we only initialize each new chapter object as it becomes // active on the array. } Chapter::~Chapter() {} void Chapter::Init(unsigned int* seed) { id_ = NULL; start_timecode_ = 0; end_timecode_ = 0; displays_ = NULL; displays_size_ = 0; displays_count_ = 0; uid_ = MakeUID(seed); } void Chapter::ShallowCopy(Chapter* dst) const { dst->id_ = id_; dst->start_timecode_ = start_timecode_; dst->end_timecode_ = end_timecode_; dst->uid_ = uid_; dst->displays_ = displays_; dst->displays_size_ = displays_size_; dst->displays_count_ = displays_count_; } void Chapter::Clear() { StrCpy(NULL, &id_); while (displays_count_ > 0) { Display& d = displays_[--displays_count_]; d.Clear(); } delete[] displays_; displays_ = NULL; displays_size_ = 0; } bool Chapter::ExpandDisplaysArray() { if (displays_size_ > displays_count_) return true; // nothing to do yet const int size = (displays_size_ == 0) ? 1 : 2 * displays_size_; Display* const displays = new (std::nothrow) Display[size]; // NOLINT if (displays == NULL) return false; for (int idx = 0; idx < displays_count_; ++idx) { displays[idx] = displays_[idx]; // shallow copy } delete[] displays_; displays_ = displays; displays_size_ = size; return true; } uint64_t Chapter::WriteAtom(IMkvWriter* writer) const { uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapterStringUID, id_) + EbmlElementSize(libwebm::kMkvChapterUID, static_cast(uid_)) + EbmlElementSize(libwebm::kMkvChapterTimeStart, static_cast(start_timecode_)) + EbmlElementSize(libwebm::kMkvChapterTimeEnd, static_cast(end_timecode_)); for (int idx = 0; idx < displays_count_; ++idx) { const Display& d = displays_[idx]; payload_size += d.WriteDisplay(NULL); } const uint64_t atom_size = EbmlMasterElementSize(libwebm::kMkvChapterAtom, payload_size) + payload_size; if (writer == NULL) return atom_size; const int64_t start = writer->Position(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterAtom, payload_size)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvChapterStringUID, id_)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvChapterUID, static_cast(uid_))) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeStart, static_cast(start_timecode_))) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvChapterTimeEnd, static_cast(end_timecode_))) return 0; for (int idx = 0; idx < displays_count_; ++idx) { const Display& d = displays_[idx]; if (!d.WriteDisplay(writer)) return 0; } const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != atom_size) return 0; return atom_size; } void Chapter::Display::Init() { title_ = NULL; language_ = NULL; country_ = NULL; } void Chapter::Display::Clear() { StrCpy(NULL, &title_); StrCpy(NULL, &language_); StrCpy(NULL, &country_); } bool Chapter::Display::set_title(const char* title) { return StrCpy(title, &title_); } bool Chapter::Display::set_language(const char* language) { return StrCpy(language, &language_); } bool Chapter::Display::set_country(const char* country) { return StrCpy(country, &country_); } uint64_t Chapter::Display::WriteDisplay(IMkvWriter* writer) const { uint64_t payload_size = EbmlElementSize(libwebm::kMkvChapString, title_); if (language_) payload_size += EbmlElementSize(libwebm::kMkvChapLanguage, language_); if (country_) payload_size += EbmlElementSize(libwebm::kMkvChapCountry, country_); const uint64_t display_size = EbmlMasterElementSize(libwebm::kMkvChapterDisplay, payload_size) + payload_size; if (writer == NULL) return display_size; const int64_t start = writer->Position(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapterDisplay, payload_size)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvChapString, title_)) return 0; if (language_) { if (!WriteEbmlElement(writer, libwebm::kMkvChapLanguage, language_)) return 0; } if (country_) { if (!WriteEbmlElement(writer, libwebm::kMkvChapCountry, country_)) return 0; } const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != display_size) return 0; return display_size; } /////////////////////////////////////////////////////////////// // // Chapters Class Chapters::Chapters() : chapters_size_(0), chapters_count_(0), chapters_(NULL) {} Chapters::~Chapters() { while (chapters_count_ > 0) { Chapter& chapter = chapters_[--chapters_count_]; chapter.Clear(); } delete[] chapters_; chapters_ = NULL; } int Chapters::Count() const { return chapters_count_; } Chapter* Chapters::AddChapter(unsigned int* seed) { if (!ExpandChaptersArray()) return NULL; Chapter& chapter = chapters_[chapters_count_++]; chapter.Init(seed); return &chapter; } bool Chapters::Write(IMkvWriter* writer) const { if (writer == NULL) return false; const uint64_t payload_size = WriteEdition(NULL); // return size only if (!WriteEbmlMasterElement(writer, libwebm::kMkvChapters, payload_size)) return false; const int64_t start = writer->Position(); if (WriteEdition(writer) == 0) // error return false; const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != payload_size) return false; return true; } bool Chapters::ExpandChaptersArray() { if (chapters_size_ > chapters_count_) return true; // nothing to do yet const int size = (chapters_size_ == 0) ? 1 : 2 * chapters_size_; Chapter* const chapters = new (std::nothrow) Chapter[size]; // NOLINT if (chapters == NULL) return false; for (int idx = 0; idx < chapters_count_; ++idx) { const Chapter& src = chapters_[idx]; Chapter* const dst = chapters + idx; src.ShallowCopy(dst); } delete[] chapters_; chapters_ = chapters; chapters_size_ = size; return true; } uint64_t Chapters::WriteEdition(IMkvWriter* writer) const { uint64_t payload_size = 0; for (int idx = 0; idx < chapters_count_; ++idx) { const Chapter& chapter = chapters_[idx]; payload_size += chapter.WriteAtom(NULL); } const uint64_t edition_size = EbmlMasterElementSize(libwebm::kMkvEditionEntry, payload_size) + payload_size; if (writer == NULL) // return size only return edition_size; const int64_t start = writer->Position(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvEditionEntry, payload_size)) return 0; // error for (int idx = 0; idx < chapters_count_; ++idx) { const Chapter& chapter = chapters_[idx]; const uint64_t chapter_size = chapter.WriteAtom(writer); if (chapter_size == 0) // error return 0; } const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != edition_size) return 0; return edition_size; } // Tag Class bool Tag::add_simple_tag(const char* tag_name, const char* tag_string) { if (!ExpandSimpleTagsArray()) return false; SimpleTag& st = simple_tags_[simple_tags_count_++]; st.Init(); if (!st.set_tag_name(tag_name)) return false; if (!st.set_tag_string(tag_string)) return false; return true; } Tag::Tag() { simple_tags_ = NULL; simple_tags_size_ = 0; simple_tags_count_ = 0; } Tag::~Tag() {} void Tag::ShallowCopy(Tag* dst) const { dst->simple_tags_ = simple_tags_; dst->simple_tags_size_ = simple_tags_size_; dst->simple_tags_count_ = simple_tags_count_; } void Tag::Clear() { while (simple_tags_count_ > 0) { SimpleTag& st = simple_tags_[--simple_tags_count_]; st.Clear(); } delete[] simple_tags_; simple_tags_ = NULL; simple_tags_size_ = 0; } bool Tag::ExpandSimpleTagsArray() { if (simple_tags_size_ > simple_tags_count_) return true; // nothing to do yet const int size = (simple_tags_size_ == 0) ? 1 : 2 * simple_tags_size_; SimpleTag* const simple_tags = new (std::nothrow) SimpleTag[size]; // NOLINT if (simple_tags == NULL) return false; for (int idx = 0; idx < simple_tags_count_; ++idx) { simple_tags[idx] = simple_tags_[idx]; // shallow copy } delete[] simple_tags_; simple_tags_ = simple_tags; simple_tags_size_ = size; return true; } uint64_t Tag::Write(IMkvWriter* writer) const { uint64_t payload_size = 0; for (int idx = 0; idx < simple_tags_count_; ++idx) { const SimpleTag& st = simple_tags_[idx]; payload_size += st.Write(NULL); } const uint64_t tag_size = EbmlMasterElementSize(libwebm::kMkvTag, payload_size) + payload_size; if (writer == NULL) return tag_size; const int64_t start = writer->Position(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvTag, payload_size)) return 0; for (int idx = 0; idx < simple_tags_count_; ++idx) { const SimpleTag& st = simple_tags_[idx]; if (!st.Write(writer)) return 0; } const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != tag_size) return 0; return tag_size; } // Tag::SimpleTag void Tag::SimpleTag::Init() { tag_name_ = NULL; tag_string_ = NULL; } void Tag::SimpleTag::Clear() { StrCpy(NULL, &tag_name_); StrCpy(NULL, &tag_string_); } bool Tag::SimpleTag::set_tag_name(const char* tag_name) { return StrCpy(tag_name, &tag_name_); } bool Tag::SimpleTag::set_tag_string(const char* tag_string) { return StrCpy(tag_string, &tag_string_); } uint64_t Tag::SimpleTag::Write(IMkvWriter* writer) const { uint64_t payload_size = EbmlElementSize(libwebm::kMkvTagName, tag_name_); payload_size += EbmlElementSize(libwebm::kMkvTagString, tag_string_); const uint64_t simple_tag_size = EbmlMasterElementSize(libwebm::kMkvSimpleTag, payload_size) + payload_size; if (writer == NULL) return simple_tag_size; const int64_t start = writer->Position(); if (!WriteEbmlMasterElement(writer, libwebm::kMkvSimpleTag, payload_size)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvTagName, tag_name_)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvTagString, tag_string_)) return 0; const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != simple_tag_size) return 0; return simple_tag_size; } // Tags Class Tags::Tags() : tags_size_(0), tags_count_(0), tags_(NULL) {} Tags::~Tags() { while (tags_count_ > 0) { Tag& tag = tags_[--tags_count_]; tag.Clear(); } delete[] tags_; tags_ = NULL; } int Tags::Count() const { return tags_count_; } Tag* Tags::AddTag() { if (!ExpandTagsArray()) return NULL; Tag& tag = tags_[tags_count_++]; return &tag; } bool Tags::Write(IMkvWriter* writer) const { if (writer == NULL) return false; uint64_t payload_size = 0; for (int idx = 0; idx < tags_count_; ++idx) { const Tag& tag = tags_[idx]; payload_size += tag.Write(NULL); } if (!WriteEbmlMasterElement(writer, libwebm::kMkvTags, payload_size)) return false; const int64_t start = writer->Position(); for (int idx = 0; idx < tags_count_; ++idx) { const Tag& tag = tags_[idx]; const uint64_t tag_size = tag.Write(writer); if (tag_size == 0) // error return 0; } const int64_t stop = writer->Position(); if (stop >= start && uint64_t(stop - start) != payload_size) return false; return true; } bool Tags::ExpandTagsArray() { if (tags_size_ > tags_count_) return true; // nothing to do yet const int size = (tags_size_ == 0) ? 1 : 2 * tags_size_; Tag* const tags = new (std::nothrow) Tag[size]; // NOLINT if (tags == NULL) return false; for (int idx = 0; idx < tags_count_; ++idx) { const Tag& src = tags_[idx]; Tag* const dst = tags + idx; src.ShallowCopy(dst); } delete[] tags_; tags_ = tags; tags_size_ = size; return true; } /////////////////////////////////////////////////////////////// // // Cluster class Cluster::Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, bool write_last_frame_with_duration, bool fixed_size_timecode) : blocks_added_(0), finalized_(false), fixed_size_timecode_(fixed_size_timecode), header_written_(false), payload_size_(0), position_for_cues_(cues_pos), size_position_(-1), timecode_(timecode), timecode_scale_(timecode_scale), write_last_frame_with_duration_(write_last_frame_with_duration), writer_(NULL) {} Cluster::~Cluster() { // Delete any stored frames that are left behind. This will happen if the // Cluster was not Finalized for whatever reason. while (!stored_frames_.empty()) { while (!stored_frames_.begin()->second.empty()) { delete stored_frames_.begin()->second.front(); stored_frames_.begin()->second.pop_front(); } stored_frames_.erase(stored_frames_.begin()->first); } } bool Cluster::Init(IMkvWriter* ptr_writer) { if (!ptr_writer) { return false; } writer_ = ptr_writer; return true; } bool Cluster::AddFrame(const Frame* const frame) { return QueueOrWriteFrame(frame); } bool Cluster::AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t abs_timecode, bool is_key) { Frame frame; if (!frame.Init(data, length)) return false; frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); return QueueOrWriteFrame(&frame); } bool Cluster::AddFrameWithAdditional(const uint8_t* data, uint64_t length, const uint8_t* additional, uint64_t additional_length, uint64_t add_id, uint64_t track_number, uint64_t abs_timecode, bool is_key) { if (!additional || additional_length == 0) { return false; } Frame frame; if (!frame.Init(data, length) || !frame.AddAdditionalData(additional, additional_length, add_id)) { return false; } frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); return QueueOrWriteFrame(&frame); } bool Cluster::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, int64_t discard_padding, uint64_t track_number, uint64_t abs_timecode, bool is_key) { Frame frame; if (!frame.Init(data, length)) return false; frame.set_discard_padding(discard_padding); frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_is_key(is_key); return QueueOrWriteFrame(&frame); } bool Cluster::AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t abs_timecode, uint64_t duration_timecode) { Frame frame; if (!frame.Init(data, length)) return false; frame.set_track_number(track_number); frame.set_timestamp(abs_timecode); frame.set_duration(duration_timecode); frame.set_is_key(true); // All metadata blocks are keyframes. return QueueOrWriteFrame(&frame); } void Cluster::AddPayloadSize(uint64_t size) { payload_size_ += size; } bool Cluster::Finalize() { return !write_last_frame_with_duration_ && Finalize(false, 0); } bool Cluster::Finalize(bool set_last_frame_duration, uint64_t duration) { if (!writer_ || finalized_) return false; if (write_last_frame_with_duration_) { // Write out held back Frames. This essentially performs a k-way merge // across all tracks in the increasing order of timestamps. while (!stored_frames_.empty()) { Frame* frame = stored_frames_.begin()->second.front(); // Get the next frame to write (frame with least timestamp across all // tracks). for (FrameMapIterator frames_iterator = ++stored_frames_.begin(); frames_iterator != stored_frames_.end(); ++frames_iterator) { if (frames_iterator->second.front()->timestamp() < frame->timestamp()) { frame = frames_iterator->second.front(); } } // Set the duration if it's the last frame for the track. if (set_last_frame_duration && stored_frames_[frame->track_number()].size() == 1 && !frame->duration_set()) { frame->set_duration(duration - frame->timestamp()); if (!frame->is_key() && !frame->reference_block_timestamp_set()) { frame->set_reference_block_timestamp( last_block_timestamp_[frame->track_number()]); } } // Write the frame and remove it from |stored_frames_|. const bool wrote_frame = DoWriteFrame(frame); stored_frames_[frame->track_number()].pop_front(); if (stored_frames_[frame->track_number()].empty()) { stored_frames_.erase(frame->track_number()); } delete frame; if (!wrote_frame) return false; } } if (size_position_ == -1) return false; if (writer_->Seekable()) { const int64_t pos = writer_->Position(); if (writer_->Position(size_position_)) return false; if (WriteUIntSize(writer_, payload_size(), 8)) return false; if (writer_->Position(pos)) return false; } finalized_ = true; return true; } uint64_t Cluster::Size() const { const uint64_t element_size = EbmlMasterElementSize(libwebm::kMkvCluster, 0xFFFFFFFFFFFFFFFFULL) + payload_size_; return element_size; } bool Cluster::PreWriteBlock() { if (finalized_) return false; if (!header_written_) { if (!WriteClusterHeader()) return false; } return true; } void Cluster::PostWriteBlock(uint64_t element_size) { AddPayloadSize(element_size); ++blocks_added_; } int64_t Cluster::GetRelativeTimecode(int64_t abs_timecode) const { const int64_t cluster_timecode = this->Cluster::timecode(); const int64_t rel_timecode = static_cast(abs_timecode) - cluster_timecode; if (rel_timecode < 0 || rel_timecode > kMaxBlockTimecode) return -1; return rel_timecode; } bool Cluster::DoWriteFrame(const Frame* const frame) { if (!frame || !frame->IsValid()) return false; if (!PreWriteBlock()) return false; const uint64_t element_size = WriteFrame(writer_, frame, this); if (element_size == 0) return false; PostWriteBlock(element_size); last_block_timestamp_[frame->track_number()] = frame->timestamp(); return true; } bool Cluster::QueueOrWriteFrame(const Frame* const frame) { if (!frame || !frame->IsValid()) return false; // If |write_last_frame_with_duration_| is not set, then write the frame right // away. if (!write_last_frame_with_duration_) { return DoWriteFrame(frame); } // Queue the current frame. uint64_t track_number = frame->track_number(); Frame* const frame_to_store = new Frame(); frame_to_store->CopyFrom(*frame); stored_frames_[track_number].push_back(frame_to_store); // Iterate through all queued frames in the current track except the last one // and write it if it is okay to do so (i.e.) no other track has an held back // frame with timestamp <= the timestamp of the frame in question. std::vector::iterator> frames_to_erase; for (std::list::iterator current_track_iterator = stored_frames_[track_number].begin(), end = --stored_frames_[track_number].end(); current_track_iterator != end; ++current_track_iterator) { const Frame* const frame_to_write = *current_track_iterator; bool okay_to_write = true; for (FrameMapIterator track_iterator = stored_frames_.begin(); track_iterator != stored_frames_.end(); ++track_iterator) { if (track_iterator->first == track_number) { continue; } if (track_iterator->second.front()->timestamp() < frame_to_write->timestamp()) { okay_to_write = false; break; } } if (okay_to_write) { const bool wrote_frame = DoWriteFrame(frame_to_write); delete frame_to_write; if (!wrote_frame) return false; frames_to_erase.push_back(current_track_iterator); } else { break; } } for (std::vector::iterator>::iterator iterator = frames_to_erase.begin(); iterator != frames_to_erase.end(); ++iterator) { stored_frames_[track_number].erase(*iterator); } return true; } bool Cluster::WriteClusterHeader() { if (finalized_) return false; if (WriteID(writer_, libwebm::kMkvCluster)) return false; // Save for later. size_position_ = writer_->Position(); // Write "unknown" (EBML coded -1) as cluster size value. We need to write 8 // bytes because we do not know how big our cluster will be. if (SerializeInt(writer_, kEbmlUnknownValue, 8)) return false; if (!WriteEbmlElement(writer_, libwebm::kMkvTimecode, timecode(), fixed_size_timecode_ ? 8 : 0)) { return false; } AddPayloadSize(EbmlElementSize(libwebm::kMkvTimecode, timecode(), fixed_size_timecode_ ? 8 : 0)); header_written_ = true; return true; } /////////////////////////////////////////////////////////////// // // SeekHead Class SeekHead::SeekHead() : start_pos_(0ULL) { for (int32_t i = 0; i < kSeekEntryCount; ++i) { seek_entry_id_[i] = 0; seek_entry_pos_[i] = 0; } } SeekHead::~SeekHead() {} bool SeekHead::Finalize(IMkvWriter* writer) const { if (writer->Seekable()) { if (start_pos_ == -1) return false; uint64_t payload_size = 0; uint64_t entry_size[kSeekEntryCount]; for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] != 0) { entry_size[i] = EbmlElementSize(libwebm::kMkvSeekID, static_cast(seek_entry_id_[i])); entry_size[i] += EbmlElementSize( libwebm::kMkvSeekPosition, static_cast(seek_entry_pos_[i])); payload_size += EbmlMasterElementSize(libwebm::kMkvSeek, entry_size[i]) + entry_size[i]; } } // No SeekHead elements if (payload_size == 0) return true; const int64_t pos = writer->Position(); if (writer->Position(start_pos_)) return false; if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeekHead, payload_size)) return false; for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] != 0) { if (!WriteEbmlMasterElement(writer, libwebm::kMkvSeek, entry_size[i])) return false; if (!WriteEbmlElement(writer, libwebm::kMkvSeekID, static_cast(seek_entry_id_[i]))) return false; if (!WriteEbmlElement(writer, libwebm::kMkvSeekPosition, static_cast(seek_entry_pos_[i]))) return false; } } const uint64_t total_entry_size = kSeekEntryCount * MaxEntrySize(); const uint64_t total_size = EbmlMasterElementSize(libwebm::kMkvSeekHead, total_entry_size) + total_entry_size; const int64_t size_left = total_size - (writer->Position() - start_pos_); const uint64_t bytes_written = WriteVoidElement(writer, size_left); if (!bytes_written) return false; if (writer->Position(pos)) return false; } return true; } bool SeekHead::Write(IMkvWriter* writer) { const uint64_t entry_size = kSeekEntryCount * MaxEntrySize(); const uint64_t size = EbmlMasterElementSize(libwebm::kMkvSeekHead, entry_size); start_pos_ = writer->Position(); const uint64_t bytes_written = WriteVoidElement(writer, size + entry_size); if (!bytes_written) return false; return true; } bool SeekHead::AddSeekEntry(uint32_t id, uint64_t pos) { for (int32_t i = 0; i < kSeekEntryCount; ++i) { if (seek_entry_id_[i] == 0) { seek_entry_id_[i] = id; seek_entry_pos_[i] = pos; return true; } } return false; } uint32_t SeekHead::GetId(int index) const { if (index < 0 || index >= kSeekEntryCount) return UINT_MAX; return seek_entry_id_[index]; } uint64_t SeekHead::GetPosition(int index) const { if (index < 0 || index >= kSeekEntryCount) return ULLONG_MAX; return seek_entry_pos_[index]; } bool SeekHead::SetSeekEntry(int index, uint32_t id, uint64_t position) { if (index < 0 || index >= kSeekEntryCount) return false; seek_entry_id_[index] = id; seek_entry_pos_[index] = position; return true; } uint64_t SeekHead::MaxEntrySize() const { const uint64_t max_entry_payload_size = EbmlElementSize(libwebm::kMkvSeekID, static_cast(UINT64_C(0xffffffff))) + EbmlElementSize(libwebm::kMkvSeekPosition, static_cast(UINT64_C(0xffffffffffffffff))); const uint64_t max_entry_size = EbmlMasterElementSize(libwebm::kMkvSeek, max_entry_payload_size) + max_entry_payload_size; return max_entry_size; } /////////////////////////////////////////////////////////////// // // SegmentInfo Class SegmentInfo::SegmentInfo() : duration_(-1.0), muxing_app_(NULL), timecode_scale_(1000000ULL), writing_app_(NULL), date_utc_(LLONG_MIN), duration_pos_(-1) {} SegmentInfo::~SegmentInfo() { delete[] muxing_app_; delete[] writing_app_; } bool SegmentInfo::Init() { int32_t major; int32_t minor; int32_t build; int32_t revision; GetVersion(&major, &minor, &build, &revision); char temp[256]; #ifdef _MSC_VER sprintf_s(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major, minor, build, revision); #else snprintf(temp, sizeof(temp) / sizeof(temp[0]), "libwebm-%d.%d.%d.%d", major, minor, build, revision); #endif const size_t app_len = strlen(temp) + 1; delete[] muxing_app_; muxing_app_ = new (std::nothrow) char[app_len]; // NOLINT if (!muxing_app_) return false; #ifdef _MSC_VER strcpy_s(muxing_app_, app_len, temp); #else strcpy(muxing_app_, temp); #endif set_writing_app(temp); if (!writing_app_) return false; return true; } bool SegmentInfo::Finalize(IMkvWriter* writer) const { if (!writer) return false; if (duration_ > 0.0) { if (writer->Seekable()) { if (duration_pos_ == -1) return false; const int64_t pos = writer->Position(); if (writer->Position(duration_pos_)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvDuration, static_cast(duration_))) return false; if (writer->Position(pos)) return false; } } return true; } bool SegmentInfo::Write(IMkvWriter* writer) { if (!writer || !muxing_app_ || !writing_app_) return false; uint64_t size = EbmlElementSize(libwebm::kMkvTimecodeScale, static_cast(timecode_scale_)); if (duration_ > 0.0) size += EbmlElementSize(libwebm::kMkvDuration, static_cast(duration_)); if (date_utc_ != LLONG_MIN) size += EbmlDateElementSize(libwebm::kMkvDateUTC); size += EbmlElementSize(libwebm::kMkvMuxingApp, muxing_app_); size += EbmlElementSize(libwebm::kMkvWritingApp, writing_app_); if (!WriteEbmlMasterElement(writer, libwebm::kMkvInfo, size)) return false; const int64_t payload_position = writer->Position(); if (payload_position < 0) return false; if (!WriteEbmlElement(writer, libwebm::kMkvTimecodeScale, static_cast(timecode_scale_))) return false; if (duration_ > 0.0) { // Save for later duration_pos_ = writer->Position(); if (!WriteEbmlElement(writer, libwebm::kMkvDuration, static_cast(duration_))) return false; } if (date_utc_ != LLONG_MIN) WriteEbmlDateElement(writer, libwebm::kMkvDateUTC, date_utc_); if (!WriteEbmlElement(writer, libwebm::kMkvMuxingApp, muxing_app_)) return false; if (!WriteEbmlElement(writer, libwebm::kMkvWritingApp, writing_app_)) return false; const int64_t stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(size)) return false; return true; } void SegmentInfo::set_muxing_app(const char* app) { if (app) { const size_t length = strlen(app) + 1; char* temp_str = new (std::nothrow) char[length]; // NOLINT if (!temp_str) return; #ifdef _MSC_VER strcpy_s(temp_str, length, app); #else strcpy(temp_str, app); #endif delete[] muxing_app_; muxing_app_ = temp_str; } } void SegmentInfo::set_writing_app(const char* app) { if (app) { const size_t length = strlen(app) + 1; char* temp_str = new (std::nothrow) char[length]; // NOLINT if (!temp_str) return; #ifdef _MSC_VER strcpy_s(temp_str, length, app); #else strcpy(temp_str, app); #endif delete[] writing_app_; writing_app_ = temp_str; } } /////////////////////////////////////////////////////////////// // // Segment Class Segment::Segment() : chunk_count_(0), chunk_name_(NULL), chunk_writer_cluster_(NULL), chunk_writer_cues_(NULL), chunk_writer_header_(NULL), chunking_(false), chunking_base_name_(NULL), cluster_list_(NULL), cluster_list_capacity_(0), cluster_list_size_(0), cues_position_(kAfterClusters), cues_track_(0), force_new_cluster_(false), frames_(NULL), frames_capacity_(0), frames_size_(0), has_video_(false), header_written_(false), last_block_duration_(0), last_timestamp_(0), max_cluster_duration_(kDefaultMaxClusterDuration), max_cluster_size_(0), mode_(kFile), new_cuepoint_(false), output_cues_(true), accurate_cluster_duration_(false), fixed_size_cluster_timecode_(false), estimate_file_duration_(false), payload_pos_(0), size_position_(0), doc_type_version_(kDefaultDocTypeVersion), doc_type_version_written_(0), duration_(0.0), writer_cluster_(NULL), writer_cues_(NULL), writer_header_(NULL) { const time_t curr_time = time(NULL); seed_ = static_cast(curr_time); #ifdef _WIN32 srand(seed_); #endif } Segment::~Segment() { if (cluster_list_) { for (int32_t i = 0; i < cluster_list_size_; ++i) { Cluster* const cluster = cluster_list_[i]; delete cluster; } delete[] cluster_list_; } if (frames_) { for (int32_t i = 0; i < frames_size_; ++i) { Frame* const frame = frames_[i]; delete frame; } delete[] frames_; } delete[] chunk_name_; delete[] chunking_base_name_; if (chunk_writer_cluster_) { chunk_writer_cluster_->Close(); delete chunk_writer_cluster_; } if (chunk_writer_cues_) { chunk_writer_cues_->Close(); delete chunk_writer_cues_; } if (chunk_writer_header_) { chunk_writer_header_->Close(); delete chunk_writer_header_; } } void Segment::MoveCuesBeforeClustersHelper(uint64_t diff, int32_t index, uint64_t* cues_size) { CuePoint* const cue_point = cues_.GetCueByIndex(index); if (cue_point == NULL) return; const uint64_t old_cue_point_size = cue_point->Size(); const uint64_t cluster_pos = cue_point->cluster_pos() + diff; cue_point->set_cluster_pos(cluster_pos); // update the new cluster position // New size of the cue is computed as follows // Let a = current sum of size of all CuePoints // Let b = Increase in Cue Point's size due to this iteration // Let c = Increase in size of Cues Element's length due to this iteration // (This is computed as CodedSize(a + b) - CodedSize(a)) // Let d = b + c. Now d is the |diff| passed to the next recursive call. // Let e = a + b. Now e is the |cues_size| passed to the next recursive // call. const uint64_t cue_point_size_diff = cue_point->Size() - old_cue_point_size; const uint64_t cue_size_diff = GetCodedUIntSize(*cues_size + cue_point_size_diff) - GetCodedUIntSize(*cues_size); *cues_size += cue_point_size_diff; diff = cue_size_diff + cue_point_size_diff; if (diff > 0) { for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) { MoveCuesBeforeClustersHelper(diff, i, cues_size); } } } void Segment::MoveCuesBeforeClusters() { const uint64_t current_cue_size = cues_.Size(); uint64_t cue_size = 0; for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) cue_size += cues_.GetCueByIndex(i)->Size(); for (int32_t i = 0; i < cues_.cue_entries_size(); ++i) MoveCuesBeforeClustersHelper(current_cue_size, i, &cue_size); // Adjust the Seek Entry to reflect the change in position // of Cluster and Cues int32_t cluster_index = 0; int32_t cues_index = 0; for (int32_t i = 0; i < SeekHead::kSeekEntryCount; ++i) { if (seek_head_.GetId(i) == libwebm::kMkvCluster) cluster_index = i; if (seek_head_.GetId(i) == libwebm::kMkvCues) cues_index = i; } seek_head_.SetSeekEntry(cues_index, libwebm::kMkvCues, seek_head_.GetPosition(cluster_index)); seek_head_.SetSeekEntry(cluster_index, libwebm::kMkvCluster, cues_.Size() + seek_head_.GetPosition(cues_index)); } bool Segment::Init(IMkvWriter* ptr_writer) { if (!ptr_writer) { return false; } writer_cluster_ = ptr_writer; writer_cues_ = ptr_writer; writer_header_ = ptr_writer; memset(&track_frames_written_, 0, sizeof(track_frames_written_[0]) * kMaxTrackNumber); memset(&last_track_timestamp_, 0, sizeof(last_track_timestamp_[0]) * kMaxTrackNumber); return segment_info_.Init(); } bool Segment::CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, IMkvWriter* writer) { if (!writer->Seekable() || chunking_) return false; const int64_t cluster_offset = cluster_list_[0]->size_position() - GetUIntSize(libwebm::kMkvCluster); // Copy the headers. if (!ChunkedCopy(reader, writer, 0, cluster_offset)) return false; // Recompute cue positions and seek entries. MoveCuesBeforeClusters(); // Write cues and seek entries. // TODO(vigneshv): As of now, it's safe to call seek_head_.Finalize() for the // second time with a different writer object. But the name Finalize() doesn't // indicate something we want to call more than once. So consider renaming it // to write() or some such. if (!cues_.Write(writer) || !seek_head_.Finalize(writer)) return false; // Copy the Clusters. if (!ChunkedCopy(reader, writer, cluster_offset, cluster_end_offset_ - cluster_offset)) return false; // Update the Segment size in case the Cues size has changed. const int64_t pos = writer->Position(); const int64_t segment_size = writer->Position() - payload_pos_; if (writer->Position(size_position_) || WriteUIntSize(writer, segment_size, 8) || writer->Position(pos)) return false; return true; } bool Segment::Finalize() { if (WriteFramesAll() < 0) return false; // In kLive mode, call Cluster::Finalize only if |accurate_cluster_duration_| // is set. In all other modes, always call Cluster::Finalize. if ((mode_ == kLive ? accurate_cluster_duration_ : true) && cluster_list_size_ > 0) { // Update last cluster's size Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; // For the last frame of the last Cluster, we don't write it as a BlockGroup // with Duration unless the frame itself has duration set explicitly. if (!old_cluster || !old_cluster->Finalize(false, 0)) return false; } if (mode_ == kFile) { if (chunking_ && chunk_writer_cluster_) { chunk_writer_cluster_->Close(); chunk_count_++; } double duration = (static_cast(last_timestamp_) + last_block_duration_) / segment_info_.timecode_scale(); if (duration_ > 0.0) { duration = duration_; } else { if (last_block_duration_ == 0 && estimate_file_duration_) { const int num_tracks = static_cast(tracks_.track_entries_size()); for (int i = 0; i < num_tracks; ++i) { if (track_frames_written_[i] < 2) continue; // Estimate the duration for the last block of a Track. const double nano_per_frame = static_cast(last_track_timestamp_[i]) / (track_frames_written_[i] - 1); const double track_duration = (last_track_timestamp_[i] + nano_per_frame) / segment_info_.timecode_scale(); if (track_duration > duration) duration = track_duration; } } } segment_info_.set_duration(duration); if (!segment_info_.Finalize(writer_header_)) return false; if (output_cues_) if (!seek_head_.AddSeekEntry(libwebm::kMkvCues, MaxOffset())) return false; if (chunking_) { if (!chunk_writer_cues_) return false; char* name = NULL; if (!UpdateChunkName("cues", &name)) return false; const bool cues_open = chunk_writer_cues_->Open(name); delete[] name; if (!cues_open) return false; } cluster_end_offset_ = writer_cluster_->Position(); // Write the seek headers and cues if (output_cues_) if (!cues_.Write(writer_cues_)) return false; if (!seek_head_.Finalize(writer_header_)) return false; if (writer_header_->Seekable()) { if (size_position_ == -1) return false; const int64_t segment_size = MaxOffset(); if (segment_size < 1) return false; const int64_t pos = writer_header_->Position(); UpdateDocTypeVersion(); if (doc_type_version_ != doc_type_version_written_) { if (writer_header_->Position(0)) return false; const char* const doc_type = DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska; if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type)) return false; if (writer_header_->Position() != ebml_header_size_) return false; doc_type_version_written_ = doc_type_version_; } if (writer_header_->Position(size_position_)) return false; if (WriteUIntSize(writer_header_, segment_size, 8)) return false; if (writer_header_->Position(pos)) return false; } if (chunking_) { // Do not close any writers until the segment size has been written, // otherwise the size may be off. if (!chunk_writer_cues_ || !chunk_writer_header_) return false; chunk_writer_cues_->Close(); chunk_writer_header_->Close(); } } return true; } Track* Segment::AddTrack(int32_t number) { Track* const track = new (std::nothrow) Track(&seed_); // NOLINT if (!track) return NULL; if (!tracks_.AddTrack(track, number)) { delete track; return NULL; } return track; } Chapter* Segment::AddChapter() { return chapters_.AddChapter(&seed_); } Tag* Segment::AddTag() { return tags_.AddTag(); } uint64_t Segment::AddVideoTrack(int32_t width, int32_t height, int32_t number) { VideoTrack* const track = new (std::nothrow) VideoTrack(&seed_); // NOLINT if (!track) return 0; track->set_type(Tracks::kVideo); track->set_codec_id(Tracks::kVp8CodecId); track->set_width(width); track->set_height(height); if (!tracks_.AddTrack(track, number)) { delete track; return 0; } has_video_ = true; return track->number(); } bool Segment::AddCuePoint(uint64_t timestamp, uint64_t track) { if (cluster_list_size_ < 1) return false; const Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; if (!cluster) return false; CuePoint* const cue = new (std::nothrow) CuePoint(); // NOLINT if (!cue) return false; cue->set_time(timestamp / segment_info_.timecode_scale()); cue->set_block_number(cluster->blocks_added()); cue->set_cluster_pos(cluster->position_for_cues()); cue->set_track(track); if (!cues_.AddCue(cue)) { delete cue; return false; } new_cuepoint_ = false; return true; } uint64_t Segment::AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number) { AudioTrack* const track = new (std::nothrow) AudioTrack(&seed_); // NOLINT if (!track) return 0; track->set_type(Tracks::kAudio); track->set_codec_id(Tracks::kVorbisCodecId); track->set_sample_rate(sample_rate); track->set_channels(channels); if (!tracks_.AddTrack(track, number)) { delete track; return 0; } return track->number(); } bool Segment::AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timestamp, bool is_key) { if (!data) return false; Frame frame; if (!frame.Init(data, length)) return false; frame.set_track_number(track_number); frame.set_timestamp(timestamp); frame.set_is_key(is_key); return AddGenericFrame(&frame); } bool Segment::AddFrameWithAdditional(const uint8_t* data, uint64_t length, const uint8_t* additional, uint64_t additional_length, uint64_t add_id, uint64_t track_number, uint64_t timestamp, bool is_key) { if (!data || !additional) return false; Frame frame; if (!frame.Init(data, length) || !frame.AddAdditionalData(additional, additional_length, add_id)) { return false; } frame.set_track_number(track_number); frame.set_timestamp(timestamp); frame.set_is_key(is_key); return AddGenericFrame(&frame); } bool Segment::AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, int64_t discard_padding, uint64_t track_number, uint64_t timestamp, bool is_key) { if (!data) return false; Frame frame; if (!frame.Init(data, length)) return false; frame.set_discard_padding(discard_padding); frame.set_track_number(track_number); frame.set_timestamp(timestamp); frame.set_is_key(is_key); return AddGenericFrame(&frame); } bool Segment::AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timestamp_ns, uint64_t duration_ns) { if (!data) return false; Frame frame; if (!frame.Init(data, length)) return false; frame.set_track_number(track_number); frame.set_timestamp(timestamp_ns); frame.set_duration(duration_ns); frame.set_is_key(true); // All metadata blocks are keyframes. return AddGenericFrame(&frame); } bool Segment::AddGenericFrame(const Frame* frame) { if (!frame) return false; if (!CheckHeaderInfo()) return false; // Check for non-monotonically increasing timestamps. if (frame->timestamp() < last_timestamp_) return false; // Check if the track number is valid. if (!tracks_.GetTrackByNumber(frame->track_number())) return false; if (frame->discard_padding() != 0) doc_type_version_ = 4; if (cluster_list_size_ > 0) { const uint64_t timecode_scale = segment_info_.timecode_scale(); const uint64_t frame_timecode = frame->timestamp() / timecode_scale; const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; const uint64_t last_cluster_timecode = last_cluster->timecode(); const uint64_t rel_timecode = frame_timecode - last_cluster_timecode; if (rel_timecode > kMaxBlockTimecode) { force_new_cluster_ = true; } } // If the segment has a video track hold onto audio frames to make sure the // audio that is associated with the start time of a video key-frame is // muxed into the same cluster. if (has_video_ && tracks_.TrackIsAudio(frame->track_number()) && !force_new_cluster_) { Frame* const new_frame = new (std::nothrow) Frame(); if (!new_frame || !new_frame->CopyFrom(*frame)) { delete new_frame; return false; } if (!QueueFrame(new_frame)) { delete new_frame; return false; } track_frames_written_[frame->track_number() - 1]++; return true; } if (!DoNewClusterProcessing(frame->track_number(), frame->timestamp(), frame->is_key())) { return false; } if (cluster_list_size_ < 1) return false; Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; if (!cluster) return false; // If the Frame is not a SimpleBlock, then set the reference_block_timestamp // if it is not set already. bool frame_created = false; if (!frame->CanBeSimpleBlock() && !frame->is_key() && !frame->reference_block_timestamp_set()) { Frame* const new_frame = new (std::nothrow) Frame(); if (!new_frame || !new_frame->CopyFrom(*frame)) { delete new_frame; return false; } new_frame->set_reference_block_timestamp( last_track_timestamp_[frame->track_number() - 1]); frame = new_frame; frame_created = true; } if (!cluster->AddFrame(frame)) return false; if (new_cuepoint_ && cues_track_ == frame->track_number()) { if (!AddCuePoint(frame->timestamp(), cues_track_)) return false; } last_timestamp_ = frame->timestamp(); last_track_timestamp_[frame->track_number() - 1] = frame->timestamp(); last_block_duration_ = frame->duration(); track_frames_written_[frame->track_number() - 1]++; if (frame_created) delete frame; return true; } void Segment::OutputCues(bool output_cues) { output_cues_ = output_cues; } void Segment::AccurateClusterDuration(bool accurate_cluster_duration) { accurate_cluster_duration_ = accurate_cluster_duration; } void Segment::UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode) { fixed_size_cluster_timecode_ = fixed_size_cluster_timecode; } bool Segment::SetChunking(bool chunking, const char* filename) { if (chunk_count_ > 0) return false; if (chunking) { if (!filename) return false; // Check if we are being set to what is already set. if (chunking_ && !strcmp(filename, chunking_base_name_)) return true; const size_t name_length = strlen(filename) + 1; char* const temp = new (std::nothrow) char[name_length]; // NOLINT if (!temp) return false; #ifdef _MSC_VER strcpy_s(temp, name_length, filename); #else strcpy(temp, filename); #endif delete[] chunking_base_name_; chunking_base_name_ = temp; if (!UpdateChunkName("chk", &chunk_name_)) return false; if (!chunk_writer_cluster_) { chunk_writer_cluster_ = new (std::nothrow) MkvWriter(); // NOLINT if (!chunk_writer_cluster_) return false; } if (!chunk_writer_cues_) { chunk_writer_cues_ = new (std::nothrow) MkvWriter(); // NOLINT if (!chunk_writer_cues_) return false; } if (!chunk_writer_header_) { chunk_writer_header_ = new (std::nothrow) MkvWriter(); // NOLINT if (!chunk_writer_header_) return false; } if (!chunk_writer_cluster_->Open(chunk_name_)) return false; const size_t header_length = strlen(filename) + strlen(".hdr") + 1; char* const header = new (std::nothrow) char[header_length]; // NOLINT if (!header) return false; #ifdef _MSC_VER strcpy_s(header, header_length - strlen(".hdr"), chunking_base_name_); strcat_s(header, header_length, ".hdr"); #else strcpy(header, chunking_base_name_); strcat(header, ".hdr"); #endif if (!chunk_writer_header_->Open(header)) { delete[] header; return false; } writer_cluster_ = chunk_writer_cluster_; writer_cues_ = chunk_writer_cues_; writer_header_ = chunk_writer_header_; delete[] header; } chunking_ = chunking; return true; } bool Segment::CuesTrack(uint64_t track_number) { const Track* const track = GetTrackByNumber(track_number); if (!track) return false; cues_track_ = track_number; return true; } void Segment::ForceNewClusterOnNextFrame() { force_new_cluster_ = true; } Track* Segment::GetTrackByNumber(uint64_t track_number) const { return tracks_.GetTrackByNumber(track_number); } bool Segment::WriteSegmentHeader() { UpdateDocTypeVersion(); const char* const doc_type = DocTypeIsWebm() ? kDocTypeWebm : kDocTypeMatroska; if (!WriteEbmlHeader(writer_header_, doc_type_version_, doc_type)) return false; doc_type_version_written_ = doc_type_version_; ebml_header_size_ = static_cast(writer_header_->Position()); // Write "unknown" (-1) as segment size value. If mode is kFile, Segment // will write over duration when the file is finalized. if (WriteID(writer_header_, libwebm::kMkvSegment)) return false; // Save for later. size_position_ = writer_header_->Position(); // Write "unknown" (EBML coded -1) as segment size value. We need to write 8 // bytes because if we are going to overwrite the segment size later we do // not know how big our segment will be. if (SerializeInt(writer_header_, kEbmlUnknownValue, 8)) return false; payload_pos_ = writer_header_->Position(); if (mode_ == kFile && writer_header_->Seekable()) { // Set the duration > 0.0 so SegmentInfo will write out the duration. When // the muxer is done writing we will set the correct duration and have // SegmentInfo upadte it. segment_info_.set_duration(1.0); if (!seek_head_.Write(writer_header_)) return false; } if (!seek_head_.AddSeekEntry(libwebm::kMkvInfo, MaxOffset())) return false; if (!segment_info_.Write(writer_header_)) return false; if (!seek_head_.AddSeekEntry(libwebm::kMkvTracks, MaxOffset())) return false; if (!tracks_.Write(writer_header_)) return false; if (chapters_.Count() > 0) { if (!seek_head_.AddSeekEntry(libwebm::kMkvChapters, MaxOffset())) return false; if (!chapters_.Write(writer_header_)) return false; } if (tags_.Count() > 0) { if (!seek_head_.AddSeekEntry(libwebm::kMkvTags, MaxOffset())) return false; if (!tags_.Write(writer_header_)) return false; } if (chunking_ && (mode_ == kLive || !writer_header_->Seekable())) { if (!chunk_writer_header_) return false; chunk_writer_header_->Close(); } header_written_ = true; return true; } // Here we are testing whether to create a new cluster, given a frame // having time frame_timestamp_ns. // int Segment::TestFrame(uint64_t track_number, uint64_t frame_timestamp_ns, bool is_key) const { if (force_new_cluster_) return 1; // If no clusters have been created yet, then create a new cluster // and write this frame immediately, in the new cluster. This path // should only be followed once, the first time we attempt to write // a frame. if (cluster_list_size_ <= 0) return 1; // There exists at least one cluster. We must compare the frame to // the last cluster, in order to determine whether the frame is // written to the existing cluster, or that a new cluster should be // created. const uint64_t timecode_scale = segment_info_.timecode_scale(); const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; const Cluster* const last_cluster = cluster_list_[cluster_list_size_ - 1]; const uint64_t last_cluster_timecode = last_cluster->timecode(); // For completeness we test for the case when the frame's timecode // is less than the cluster's timecode. Although in principle that // is allowed, this muxer doesn't actually write clusters like that, // so this indicates a bug somewhere in our algorithm. if (frame_timecode < last_cluster_timecode) // should never happen return -1; // If the frame has a timestamp significantly larger than the last // cluster (in Matroska, cluster-relative timestamps are serialized // using a 16-bit signed integer), then we cannot write this frame // to that cluster, and so we must create a new cluster. const int64_t delta_timecode = frame_timecode - last_cluster_timecode; if (delta_timecode > kMaxBlockTimecode) return 2; // We decide to create a new cluster when we have a video keyframe. // This will flush queued (audio) frames, and write the keyframe // immediately, in the newly-created cluster. if (is_key && tracks_.TrackIsVideo(track_number)) return 1; // Create a new cluster if we have accumulated too many frames // already, where "too many" is defined as "the total time of frames // in the cluster exceeds a threshold". const uint64_t delta_ns = delta_timecode * timecode_scale; if (max_cluster_duration_ > 0 && delta_ns >= max_cluster_duration_) return 1; // This is similar to the case above, with the difference that a new // cluster is created when the size of the current cluster exceeds a // threshold. const uint64_t cluster_size = last_cluster->payload_size(); if (max_cluster_size_ > 0 && cluster_size >= max_cluster_size_) return 1; // There's no need to create a new cluster, so emit this frame now. return 0; } bool Segment::MakeNewCluster(uint64_t frame_timestamp_ns) { const int32_t new_size = cluster_list_size_ + 1; if (new_size > cluster_list_capacity_) { // Add more clusters. const int32_t new_capacity = (cluster_list_capacity_ <= 0) ? 1 : cluster_list_capacity_ * 2; Cluster** const clusters = new (std::nothrow) Cluster*[new_capacity]; // NOLINT if (!clusters) return false; for (int32_t i = 0; i < cluster_list_size_; ++i) { clusters[i] = cluster_list_[i]; } delete[] cluster_list_; cluster_list_ = clusters; cluster_list_capacity_ = new_capacity; } if (!WriteFramesLessThan(frame_timestamp_ns)) return false; if (cluster_list_size_ > 0) { // Update old cluster's size Cluster* const old_cluster = cluster_list_[cluster_list_size_ - 1]; if (!old_cluster || !old_cluster->Finalize(true, frame_timestamp_ns)) return false; } if (output_cues_) new_cuepoint_ = true; if (chunking_ && cluster_list_size_ > 0) { chunk_writer_cluster_->Close(); chunk_count_++; if (!UpdateChunkName("chk", &chunk_name_)) return false; if (!chunk_writer_cluster_->Open(chunk_name_)) return false; } const uint64_t timecode_scale = segment_info_.timecode_scale(); const uint64_t frame_timecode = frame_timestamp_ns / timecode_scale; uint64_t cluster_timecode = frame_timecode; if (frames_size_ > 0) { const Frame* const f = frames_[0]; // earliest queued frame const uint64_t ns = f->timestamp(); const uint64_t tc = ns / timecode_scale; if (tc < cluster_timecode) cluster_timecode = tc; } Cluster*& cluster = cluster_list_[cluster_list_size_]; const int64_t offset = MaxOffset(); cluster = new (std::nothrow) Cluster(cluster_timecode, offset, segment_info_.timecode_scale(), accurate_cluster_duration_, fixed_size_cluster_timecode_); if (!cluster) return false; if (!cluster->Init(writer_cluster_)) return false; cluster_list_size_ = new_size; return true; } bool Segment::DoNewClusterProcessing(uint64_t track_number, uint64_t frame_timestamp_ns, bool is_key) { for (;;) { // Based on the characteristics of the current frame and current // cluster, decide whether to create a new cluster. const int result = TestFrame(track_number, frame_timestamp_ns, is_key); if (result < 0) // error return false; // Always set force_new_cluster_ to false after TestFrame. force_new_cluster_ = false; // A non-zero result means create a new cluster. if (result > 0 && !MakeNewCluster(frame_timestamp_ns)) return false; // Write queued (audio) frames. const int frame_count = WriteFramesAll(); if (frame_count < 0) // error return false; // Write the current frame to the current cluster (if TestFrame // returns 0) or to a newly created cluster (TestFrame returns 1). if (result <= 1) return true; // TestFrame returned 2, which means there was a large time // difference between the cluster and the frame itself. Do the // test again, comparing the frame to the new cluster. } } bool Segment::CheckHeaderInfo() { if (!header_written_) { if (!WriteSegmentHeader()) return false; if (!seek_head_.AddSeekEntry(libwebm::kMkvCluster, MaxOffset())) return false; if (output_cues_ && cues_track_ == 0) { // Check for a video track for (uint32_t i = 0; i < tracks_.track_entries_size(); ++i) { const Track* const track = tracks_.GetTrackByIndex(i); if (!track) return false; if (tracks_.TrackIsVideo(track->number())) { cues_track_ = track->number(); break; } } // Set first track found if (cues_track_ == 0) { const Track* const track = tracks_.GetTrackByIndex(0); if (!track) return false; cues_track_ = track->number(); } } } return true; } void Segment::UpdateDocTypeVersion() { for (uint32_t index = 0; index < tracks_.track_entries_size(); ++index) { const Track* track = tracks_.GetTrackByIndex(index); if (track == NULL) break; if ((track->codec_delay() || track->seek_pre_roll()) && doc_type_version_ < 4) { doc_type_version_ = 4; break; } } } bool Segment::UpdateChunkName(const char* ext, char** name) const { if (!name || !ext) return false; char ext_chk[64]; #ifdef _MSC_VER sprintf_s(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); #else snprintf(ext_chk, sizeof(ext_chk), "_%06d.%s", chunk_count_, ext); #endif const size_t length = strlen(chunking_base_name_) + strlen(ext_chk) + 1; char* const str = new (std::nothrow) char[length]; // NOLINT if (!str) return false; #ifdef _MSC_VER strcpy_s(str, length - strlen(ext_chk), chunking_base_name_); strcat_s(str, length, ext_chk); #else strcpy(str, chunking_base_name_); strcat(str, ext_chk); #endif delete[] * name; *name = str; return true; } int64_t Segment::MaxOffset() { if (!writer_header_) return -1; int64_t offset = writer_header_->Position() - payload_pos_; if (chunking_) { for (int32_t i = 0; i < cluster_list_size_; ++i) { Cluster* const cluster = cluster_list_[i]; offset += cluster->Size(); } if (writer_cues_) offset += writer_cues_->Position(); } return offset; } bool Segment::QueueFrame(Frame* frame) { const int32_t new_size = frames_size_ + 1; if (new_size > frames_capacity_) { // Add more frames. const int32_t new_capacity = (!frames_capacity_) ? 2 : frames_capacity_ * 2; if (new_capacity < 1) return false; Frame** const frames = new (std::nothrow) Frame*[new_capacity]; // NOLINT if (!frames) return false; for (int32_t i = 0; i < frames_size_; ++i) { frames[i] = frames_[i]; } delete[] frames_; frames_ = frames; frames_capacity_ = new_capacity; } frames_[frames_size_++] = frame; return true; } int Segment::WriteFramesAll() { if (frames_ == NULL) return 0; if (cluster_list_size_ < 1) return -1; Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; if (!cluster) return -1; for (int32_t i = 0; i < frames_size_; ++i) { Frame*& frame = frames_[i]; // TODO(jzern/vigneshv): using Segment::AddGenericFrame here would limit the // places where |doc_type_version_| needs to be updated. if (frame->discard_padding() != 0) doc_type_version_ = 4; if (!cluster->AddFrame(frame)) return -1; if (new_cuepoint_ && cues_track_ == frame->track_number()) { if (!AddCuePoint(frame->timestamp(), cues_track_)) return -1; } if (frame->timestamp() > last_timestamp_) { last_timestamp_ = frame->timestamp(); last_track_timestamp_[frame->track_number() - 1] = frame->timestamp(); } delete frame; frame = NULL; } const int result = frames_size_; frames_size_ = 0; return result; } bool Segment::WriteFramesLessThan(uint64_t timestamp) { // Check |cluster_list_size_| to see if this is the first cluster. If it is // the first cluster the audio frames that are less than the first video // timesatmp will be written in a later step. if (frames_size_ > 0 && cluster_list_size_ > 0) { if (!frames_) return false; Cluster* const cluster = cluster_list_[cluster_list_size_ - 1]; if (!cluster) return false; int32_t shift_left = 0; // TODO(fgalligan): Change this to use the durations of frames instead of // the next frame's start time if the duration is accurate. for (int32_t i = 1; i < frames_size_; ++i) { const Frame* const frame_curr = frames_[i]; if (frame_curr->timestamp() > timestamp) break; const Frame* const frame_prev = frames_[i - 1]; if (frame_prev->discard_padding() != 0) doc_type_version_ = 4; if (!cluster->AddFrame(frame_prev)) return false; if (new_cuepoint_ && cues_track_ == frame_prev->track_number()) { if (!AddCuePoint(frame_prev->timestamp(), cues_track_)) return false; } ++shift_left; if (frame_prev->timestamp() > last_timestamp_) { last_timestamp_ = frame_prev->timestamp(); last_track_timestamp_[frame_prev->track_number() - 1] = frame_prev->timestamp(); } delete frame_prev; } if (shift_left > 0) { if (shift_left >= frames_size_) return false; const int32_t new_frames_size = frames_size_ - shift_left; for (int32_t i = 0; i < new_frames_size; ++i) { frames_[i] = frames_[i + shift_left]; } frames_size_ = new_frames_size; } } return true; } bool Segment::DocTypeIsWebm() const { const int kNumCodecIds = 9; // TODO(vigneshv): Tweak .clang-format. const char* kWebmCodecIds[kNumCodecIds] = { Tracks::kOpusCodecId, Tracks::kVorbisCodecId, Tracks::kAv1CodecId, Tracks::kVp8CodecId, Tracks::kVp9CodecId, Tracks::kWebVttCaptionsId, Tracks::kWebVttDescriptionsId, Tracks::kWebVttMetadataId, Tracks::kWebVttSubtitlesId}; const int num_tracks = static_cast(tracks_.track_entries_size()); for (int track_index = 0; track_index < num_tracks; ++track_index) { const Track* const track = tracks_.GetTrackByIndex(track_index); const std::string codec_id = track->codec_id(); bool id_is_webm = false; for (int id_index = 0; id_index < kNumCodecIds; ++id_index) { if (codec_id == kWebmCodecIds[id_index]) { id_is_webm = true; break; } } if (!id_is_webm) return false; } return true; } } // namespace mkvmuxer libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvmuxer.h000066400000000000000000002041361357355204000227170ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVMUXER_MKVMUXER_H_ #define MKVMUXER_MKVMUXER_H_ #include #include #include #include #include "common/webmids.h" #include "mkvmuxer/mkvmuxertypes.h" // For a description of the WebM elements see // http://www.webmproject.org/code/specs/container/. namespace mkvparser { class IMkvReader; } // namespace mkvparser namespace mkvmuxer { class MkvWriter; class Segment; const uint64_t kMaxTrackNumber = 126; /////////////////////////////////////////////////////////////// // Interface used by the mkvmuxer to write out the Mkv data. class IMkvWriter { public: // Writes out |len| bytes of |buf|. Returns 0 on success. virtual int32 Write(const void* buf, uint32 len) = 0; // Returns the offset of the output position from the beginning of the // output. virtual int64 Position() const = 0; // Set the current File position. Returns 0 on success. virtual int32 Position(int64 position) = 0; // Returns true if the writer is seekable. virtual bool Seekable() const = 0; // Element start notification. Called whenever an element identifier is about // to be written to the stream. |element_id| is the element identifier, and // |position| is the location in the WebM stream where the first octet of the // element identifier will be written. // Note: the |MkvId| enumeration in webmids.hpp defines element values. virtual void ElementStartNotify(uint64 element_id, int64 position) = 0; protected: IMkvWriter(); virtual ~IMkvWriter(); private: LIBWEBM_DISALLOW_COPY_AND_ASSIGN(IMkvWriter); }; // Writes out the EBML header for a WebM file, but allows caller to specify // DocType. This function must be called before any other libwebm writing // functions are called. bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version, const char* const doc_type); // Writes out the EBML header for a WebM file. This function must be called // before any other libwebm writing functions are called. bool WriteEbmlHeader(IMkvWriter* writer, uint64_t doc_type_version); // Deprecated. Writes out EBML header with doc_type_version as // kDefaultDocTypeVersion. Exists for backward compatibility. bool WriteEbmlHeader(IMkvWriter* writer); // Copies in Chunk from source to destination between the given byte positions bool ChunkedCopy(mkvparser::IMkvReader* source, IMkvWriter* dst, int64_t start, int64_t size); /////////////////////////////////////////////////////////////// // Class to hold data the will be written to a block. class Frame { public: Frame(); ~Frame(); // Sets this frame's contents based on |frame|. Returns true on success. On // failure, this frame's existing contents may be lost. bool CopyFrom(const Frame& frame); // Copies |frame| data into |frame_|. Returns true on success. bool Init(const uint8_t* frame, uint64_t length); // Copies |additional| data into |additional_|. Returns true on success. bool AddAdditionalData(const uint8_t* additional, uint64_t length, uint64_t add_id); // Returns true if the frame has valid parameters. bool IsValid() const; // Returns true if the frame can be written as a SimpleBlock based on current // parameters. bool CanBeSimpleBlock() const; uint64_t add_id() const { return add_id_; } const uint8_t* additional() const { return additional_; } uint64_t additional_length() const { return additional_length_; } void set_duration(uint64_t duration); uint64_t duration() const { return duration_; } bool duration_set() const { return duration_set_; } const uint8_t* frame() const { return frame_; } void set_is_key(bool key) { is_key_ = key; } bool is_key() const { return is_key_; } uint64_t length() const { return length_; } void set_track_number(uint64_t track_number) { track_number_ = track_number; } uint64_t track_number() const { return track_number_; } void set_timestamp(uint64_t timestamp) { timestamp_ = timestamp; } uint64_t timestamp() const { return timestamp_; } void set_discard_padding(int64_t discard_padding) { discard_padding_ = discard_padding; } int64_t discard_padding() const { return discard_padding_; } void set_reference_block_timestamp(int64_t reference_block_timestamp); int64_t reference_block_timestamp() const { return reference_block_timestamp_; } bool reference_block_timestamp_set() const { return reference_block_timestamp_set_; } private: // Id of the Additional data. uint64_t add_id_; // Pointer to additional data. Owned by this class. uint8_t* additional_; // Length of the additional data. uint64_t additional_length_; // Duration of the frame in nanoseconds. uint64_t duration_; // Flag indicating that |duration_| has been set. Setting duration causes the // frame to be written out as a Block with BlockDuration instead of as a // SimpleBlock. bool duration_set_; // Pointer to the data. Owned by this class. uint8_t* frame_; // Flag telling if the data should set the key flag of a block. bool is_key_; // Length of the data. uint64_t length_; // Mkv track number the data is associated with. uint64_t track_number_; // Timestamp of the data in nanoseconds. uint64_t timestamp_; // Discard padding for the frame. int64_t discard_padding_; // Reference block timestamp. int64_t reference_block_timestamp_; // Flag indicating if |reference_block_timestamp_| has been set. bool reference_block_timestamp_set_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Frame); }; /////////////////////////////////////////////////////////////// // Class to hold one cue point in a Cues element. class CuePoint { public: CuePoint(); ~CuePoint(); // Returns the size in bytes for the entire CuePoint element. uint64_t Size() const; // Output the CuePoint element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; void set_time(uint64_t time) { time_ = time; } uint64_t time() const { return time_; } void set_track(uint64_t track) { track_ = track; } uint64_t track() const { return track_; } void set_cluster_pos(uint64_t cluster_pos) { cluster_pos_ = cluster_pos; } uint64_t cluster_pos() const { return cluster_pos_; } void set_block_number(uint64_t block_number) { block_number_ = block_number; } uint64_t block_number() const { return block_number_; } void set_output_block_number(bool output_block_number) { output_block_number_ = output_block_number; } bool output_block_number() const { return output_block_number_; } private: // Returns the size in bytes for the payload of the CuePoint element. uint64_t PayloadSize() const; // Absolute timecode according to the segment time base. uint64_t time_; // The Track element associated with the CuePoint. uint64_t track_; // The position of the Cluster containing the Block. uint64_t cluster_pos_; // Number of the Block within the Cluster, starting from 1. uint64_t block_number_; // If true the muxer will write out the block number for the cue if the // block number is different than the default of 1. Default is set to true. bool output_block_number_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(CuePoint); }; /////////////////////////////////////////////////////////////// // Cues element. class Cues { public: Cues(); ~Cues(); // Adds a cue point to the Cues element. Returns true on success. bool AddCue(CuePoint* cue); // Returns the cue point by index. Returns NULL if there is no cue point // match. CuePoint* GetCueByIndex(int32_t index) const; // Returns the total size of the Cues element uint64_t Size(); // Output the Cues element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; int32_t cue_entries_size() const { return cue_entries_size_; } void set_output_block_number(bool output_block_number) { output_block_number_ = output_block_number; } bool output_block_number() const { return output_block_number_; } private: // Number of allocated elements in |cue_entries_|. int32_t cue_entries_capacity_; // Number of CuePoints in |cue_entries_|. int32_t cue_entries_size_; // CuePoint list. CuePoint** cue_entries_; // If true the muxer will write out the block number for the cue if the // block number is different than the default of 1. Default is set to true. bool output_block_number_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cues); }; /////////////////////////////////////////////////////////////// // ContentEncAESSettings element class ContentEncAESSettings { public: enum { kCTR = 1 }; ContentEncAESSettings(); ~ContentEncAESSettings() {} // Returns the size in bytes for the ContentEncAESSettings element. uint64_t Size() const; // Writes out the ContentEncAESSettings element to |writer|. Returns true on // success. bool Write(IMkvWriter* writer) const; uint64_t cipher_mode() const { return cipher_mode_; } private: // Returns the size in bytes for the payload of the ContentEncAESSettings // element. uint64_t PayloadSize() const; // Sub elements uint64_t cipher_mode_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncAESSettings); }; /////////////////////////////////////////////////////////////// // ContentEncoding element // Elements used to describe if the track data has been encrypted or // compressed with zlib or header stripping. // Currently only whole frames can be encrypted with AES. This dictates that // ContentEncodingOrder will be 0, ContentEncodingScope will be 1, // ContentEncodingType will be 1, and ContentEncAlgo will be 5. class ContentEncoding { public: ContentEncoding(); ~ContentEncoding(); // Sets the content encryption id. Copies |length| bytes from |id| to // |enc_key_id_|. Returns true on success. bool SetEncryptionID(const uint8_t* id, uint64_t length); // Returns the size in bytes for the ContentEncoding element. uint64_t Size() const; // Writes out the ContentEncoding element to |writer|. Returns true on // success. bool Write(IMkvWriter* writer) const; uint64_t enc_algo() const { return enc_algo_; } uint64_t encoding_order() const { return encoding_order_; } uint64_t encoding_scope() const { return encoding_scope_; } uint64_t encoding_type() const { return encoding_type_; } ContentEncAESSettings* enc_aes_settings() { return &enc_aes_settings_; } private: // Returns the size in bytes for the encoding elements. uint64_t EncodingSize(uint64_t compresion_size, uint64_t encryption_size) const; // Returns the size in bytes for the encryption elements. uint64_t EncryptionSize() const; // Track element names uint64_t enc_algo_; uint8_t* enc_key_id_; uint64_t encoding_order_; uint64_t encoding_scope_; uint64_t encoding_type_; // ContentEncAESSettings element. ContentEncAESSettings enc_aes_settings_; // Size of the ContentEncKeyID data in bytes. uint64_t enc_key_id_length_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); }; /////////////////////////////////////////////////////////////// // Colour element. class PrimaryChromaticity { public: static const float kChromaticityMin; static const float kChromaticityMax; PrimaryChromaticity(float x_val, float y_val) : x_(x_val), y_(y_val) {} PrimaryChromaticity() : x_(0), y_(0) {} ~PrimaryChromaticity() {} // Returns sum of |x_id| and |y_id| element id sizes and payload sizes. uint64_t PrimaryChromaticitySize(libwebm::MkvId x_id, libwebm::MkvId y_id) const; bool Valid() const; bool Write(IMkvWriter* writer, libwebm::MkvId x_id, libwebm::MkvId y_id) const; float x() const { return x_; } void set_x(float new_x) { x_ = new_x; } float y() const { return y_; } void set_y(float new_y) { y_ = new_y; } private: float x_; float y_; }; class MasteringMetadata { public: static const float kValueNotPresent; static const float kMinLuminance; static const float kMinLuminanceMax; static const float kMaxLuminanceMax; MasteringMetadata() : luminance_max_(kValueNotPresent), luminance_min_(kValueNotPresent), r_(NULL), g_(NULL), b_(NULL), white_point_(NULL) {} ~MasteringMetadata() { delete r_; delete g_; delete b_; delete white_point_; } // Returns total size of the MasteringMetadata element. uint64_t MasteringMetadataSize() const; bool Valid() const; bool Write(IMkvWriter* writer) const; // Copies non-null chromaticity. bool SetChromaticity(const PrimaryChromaticity* r, const PrimaryChromaticity* g, const PrimaryChromaticity* b, const PrimaryChromaticity* white_point); const PrimaryChromaticity* r() const { return r_; } const PrimaryChromaticity* g() const { return g_; } const PrimaryChromaticity* b() const { return b_; } const PrimaryChromaticity* white_point() const { return white_point_; } float luminance_max() const { return luminance_max_; } void set_luminance_max(float luminance_max) { luminance_max_ = luminance_max; } float luminance_min() const { return luminance_min_; } void set_luminance_min(float luminance_min) { luminance_min_ = luminance_min; } private: // Returns size of MasteringMetadata child elements. uint64_t PayloadSize() const; float luminance_max_; float luminance_min_; PrimaryChromaticity* r_; PrimaryChromaticity* g_; PrimaryChromaticity* b_; PrimaryChromaticity* white_point_; }; class Colour { public: enum MatrixCoefficients { kGbr = 0, kBt709 = 1, kUnspecifiedMc = 2, kReserved = 3, kFcc = 4, kBt470bg = 5, kSmpte170MMc = 6, kSmpte240MMc = 7, kYcocg = 8, kBt2020NonConstantLuminance = 9, kBt2020ConstantLuminance = 10, }; enum ChromaSitingHorz { kUnspecifiedCsh = 0, kLeftCollocated = 1, kHalfCsh = 2, }; enum ChromaSitingVert { kUnspecifiedCsv = 0, kTopCollocated = 1, kHalfCsv = 2, }; enum Range { kUnspecifiedCr = 0, kBroadcastRange = 1, kFullRange = 2, kMcTcDefined = 3, // Defined by MatrixCoefficients/TransferCharacteristics. }; enum TransferCharacteristics { kIturBt709Tc = 1, kUnspecifiedTc = 2, kReservedTc = 3, kGamma22Curve = 4, kGamma28Curve = 5, kSmpte170MTc = 6, kSmpte240MTc = 7, kLinear = 8, kLog = 9, kLogSqrt = 10, kIec6196624 = 11, kIturBt1361ExtendedColourGamut = 12, kIec6196621 = 13, kIturBt202010bit = 14, kIturBt202012bit = 15, kSmpteSt2084 = 16, kSmpteSt4281Tc = 17, kAribStdB67Hlg = 18, }; enum Primaries { kReservedP0 = 0, kIturBt709P = 1, kUnspecifiedP = 2, kReservedP3 = 3, kIturBt470M = 4, kIturBt470Bg = 5, kSmpte170MP = 6, kSmpte240MP = 7, kFilm = 8, kIturBt2020 = 9, kSmpteSt4281P = 10, kJedecP22Phosphors = 22, }; static const uint64_t kValueNotPresent; Colour() : matrix_coefficients_(kValueNotPresent), bits_per_channel_(kValueNotPresent), chroma_subsampling_horz_(kValueNotPresent), chroma_subsampling_vert_(kValueNotPresent), cb_subsampling_horz_(kValueNotPresent), cb_subsampling_vert_(kValueNotPresent), chroma_siting_horz_(kValueNotPresent), chroma_siting_vert_(kValueNotPresent), range_(kValueNotPresent), transfer_characteristics_(kValueNotPresent), primaries_(kValueNotPresent), max_cll_(kValueNotPresent), max_fall_(kValueNotPresent), mastering_metadata_(NULL) {} ~Colour() { delete mastering_metadata_; } // Returns total size of the Colour element. uint64_t ColourSize() const; bool Valid() const; bool Write(IMkvWriter* writer) const; // Deep copies |mastering_metadata|. bool SetMasteringMetadata(const MasteringMetadata& mastering_metadata); const MasteringMetadata* mastering_metadata() const { return mastering_metadata_; } uint64_t matrix_coefficients() const { return matrix_coefficients_; } void set_matrix_coefficients(uint64_t matrix_coefficients) { matrix_coefficients_ = matrix_coefficients; } uint64_t bits_per_channel() const { return bits_per_channel_; } void set_bits_per_channel(uint64_t bits_per_channel) { bits_per_channel_ = bits_per_channel; } uint64_t chroma_subsampling_horz() const { return chroma_subsampling_horz_; } void set_chroma_subsampling_horz(uint64_t chroma_subsampling_horz) { chroma_subsampling_horz_ = chroma_subsampling_horz; } uint64_t chroma_subsampling_vert() const { return chroma_subsampling_vert_; } void set_chroma_subsampling_vert(uint64_t chroma_subsampling_vert) { chroma_subsampling_vert_ = chroma_subsampling_vert; } uint64_t cb_subsampling_horz() const { return cb_subsampling_horz_; } void set_cb_subsampling_horz(uint64_t cb_subsampling_horz) { cb_subsampling_horz_ = cb_subsampling_horz; } uint64_t cb_subsampling_vert() const { return cb_subsampling_vert_; } void set_cb_subsampling_vert(uint64_t cb_subsampling_vert) { cb_subsampling_vert_ = cb_subsampling_vert; } uint64_t chroma_siting_horz() const { return chroma_siting_horz_; } void set_chroma_siting_horz(uint64_t chroma_siting_horz) { chroma_siting_horz_ = chroma_siting_horz; } uint64_t chroma_siting_vert() const { return chroma_siting_vert_; } void set_chroma_siting_vert(uint64_t chroma_siting_vert) { chroma_siting_vert_ = chroma_siting_vert; } uint64_t range() const { return range_; } void set_range(uint64_t range) { range_ = range; } uint64_t transfer_characteristics() const { return transfer_characteristics_; } void set_transfer_characteristics(uint64_t transfer_characteristics) { transfer_characteristics_ = transfer_characteristics; } uint64_t primaries() const { return primaries_; } void set_primaries(uint64_t primaries) { primaries_ = primaries; } uint64_t max_cll() const { return max_cll_; } void set_max_cll(uint64_t max_cll) { max_cll_ = max_cll; } uint64_t max_fall() const { return max_fall_; } void set_max_fall(uint64_t max_fall) { max_fall_ = max_fall; } private: // Returns size of Colour child elements. uint64_t PayloadSize() const; uint64_t matrix_coefficients_; uint64_t bits_per_channel_; uint64_t chroma_subsampling_horz_; uint64_t chroma_subsampling_vert_; uint64_t cb_subsampling_horz_; uint64_t cb_subsampling_vert_; uint64_t chroma_siting_horz_; uint64_t chroma_siting_vert_; uint64_t range_; uint64_t transfer_characteristics_; uint64_t primaries_; uint64_t max_cll_; uint64_t max_fall_; MasteringMetadata* mastering_metadata_; }; /////////////////////////////////////////////////////////////// // Projection element. class Projection { public: enum ProjectionType { kTypeNotPresent = -1, kRectangular = 0, kEquirectangular = 1, kCubeMap = 2, kMesh = 3, }; static const uint64_t kValueNotPresent; Projection() : type_(kRectangular), pose_yaw_(0.0), pose_pitch_(0.0), pose_roll_(0.0), private_data_(NULL), private_data_length_(0) {} ~Projection() { delete[] private_data_; } uint64_t ProjectionSize() const; bool Write(IMkvWriter* writer) const; bool SetProjectionPrivate(const uint8_t* private_data, uint64_t private_data_length); ProjectionType type() const { return type_; } void set_type(ProjectionType type) { type_ = type; } float pose_yaw() const { return pose_yaw_; } void set_pose_yaw(float pose_yaw) { pose_yaw_ = pose_yaw; } float pose_pitch() const { return pose_pitch_; } void set_pose_pitch(float pose_pitch) { pose_pitch_ = pose_pitch; } float pose_roll() const { return pose_roll_; } void set_pose_roll(float pose_roll) { pose_roll_ = pose_roll; } uint8_t* private_data() const { return private_data_; } uint64_t private_data_length() const { return private_data_length_; } private: // Returns size of VideoProjection child elements. uint64_t PayloadSize() const; ProjectionType type_; float pose_yaw_; float pose_pitch_; float pose_roll_; uint8_t* private_data_; uint64_t private_data_length_; }; /////////////////////////////////////////////////////////////// // Track element. class Track { public: // The |seed| parameter is used to synthesize a UID for the track. explicit Track(unsigned int* seed); virtual ~Track(); // Adds a ContentEncoding element to the Track. Returns true on success. virtual bool AddContentEncoding(); // Returns the ContentEncoding by index. Returns NULL if there is no // ContentEncoding match. ContentEncoding* GetContentEncodingByIndex(uint32_t index) const; // Returns the size in bytes for the payload of the Track element. virtual uint64_t PayloadSize() const; // Returns the size in bytes of the Track element. virtual uint64_t Size() const; // Output the Track element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; // Sets the CodecPrivate element of the Track element. Copies |length| // bytes from |codec_private| to |codec_private_|. Returns true on success. bool SetCodecPrivate(const uint8_t* codec_private, uint64_t length); void set_codec_id(const char* codec_id); const char* codec_id() const { return codec_id_; } const uint8_t* codec_private() const { return codec_private_; } void set_language(const char* language); const char* language() const { return language_; } void set_max_block_additional_id(uint64_t max_block_additional_id) { max_block_additional_id_ = max_block_additional_id; } uint64_t max_block_additional_id() const { return max_block_additional_id_; } void set_name(const char* name); const char* name() const { return name_; } void set_number(uint64_t number) { number_ = number; } uint64_t number() const { return number_; } void set_type(uint64_t type) { type_ = type; } uint64_t type() const { return type_; } void set_uid(uint64_t uid) { uid_ = uid; } uint64_t uid() const { return uid_; } void set_codec_delay(uint64_t codec_delay) { codec_delay_ = codec_delay; } uint64_t codec_delay() const { return codec_delay_; } void set_seek_pre_roll(uint64_t seek_pre_roll) { seek_pre_roll_ = seek_pre_roll; } uint64_t seek_pre_roll() const { return seek_pre_roll_; } void set_default_duration(uint64_t default_duration) { default_duration_ = default_duration; } uint64_t default_duration() const { return default_duration_; } uint64_t codec_private_length() const { return codec_private_length_; } uint32_t content_encoding_entries_size() const { return content_encoding_entries_size_; } private: // Track element names. char* codec_id_; uint8_t* codec_private_; char* language_; uint64_t max_block_additional_id_; char* name_; uint64_t number_; uint64_t type_; uint64_t uid_; uint64_t codec_delay_; uint64_t seek_pre_roll_; uint64_t default_duration_; // Size of the CodecPrivate data in bytes. uint64_t codec_private_length_; // ContentEncoding element list. ContentEncoding** content_encoding_entries_; // Number of ContentEncoding elements added. uint32_t content_encoding_entries_size_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Track); }; /////////////////////////////////////////////////////////////// // Track that has video specific elements. class VideoTrack : public Track { public: // Supported modes for stereo 3D. enum StereoMode { kMono = 0, kSideBySideLeftIsFirst = 1, kTopBottomRightIsFirst = 2, kTopBottomLeftIsFirst = 3, kSideBySideRightIsFirst = 11 }; enum AlphaMode { kNoAlpha = 0, kAlpha = 1 }; // The |seed| parameter is used to synthesize a UID for the track. explicit VideoTrack(unsigned int* seed); virtual ~VideoTrack(); // Returns the size in bytes for the payload of the Track element plus the // video specific elements. virtual uint64_t PayloadSize() const; // Output the VideoTrack element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; // Sets the video's stereo mode. Returns true on success. bool SetStereoMode(uint64_t stereo_mode); // Sets the video's alpha mode. Returns true on success. bool SetAlphaMode(uint64_t alpha_mode); void set_display_height(uint64_t height) { display_height_ = height; } uint64_t display_height() const { return display_height_; } void set_display_width(uint64_t width) { display_width_ = width; } uint64_t display_width() const { return display_width_; } void set_pixel_height(uint64_t height) { pixel_height_ = height; } uint64_t pixel_height() const { return pixel_height_; } void set_pixel_width(uint64_t width) { pixel_width_ = width; } uint64_t pixel_width() const { return pixel_width_; } void set_crop_left(uint64_t crop_left) { crop_left_ = crop_left; } uint64_t crop_left() const { return crop_left_; } void set_crop_right(uint64_t crop_right) { crop_right_ = crop_right; } uint64_t crop_right() const { return crop_right_; } void set_crop_top(uint64_t crop_top) { crop_top_ = crop_top; } uint64_t crop_top() const { return crop_top_; } void set_crop_bottom(uint64_t crop_bottom) { crop_bottom_ = crop_bottom; } uint64_t crop_bottom() const { return crop_bottom_; } void set_frame_rate(double frame_rate) { frame_rate_ = frame_rate; } double frame_rate() const { return frame_rate_; } void set_height(uint64_t height) { height_ = height; } uint64_t height() const { return height_; } uint64_t stereo_mode() { return stereo_mode_; } uint64_t alpha_mode() { return alpha_mode_; } void set_width(uint64_t width) { width_ = width; } uint64_t width() const { return width_; } void set_colour_space(const char* colour_space); const char* colour_space() const { return colour_space_; } Colour* colour() { return colour_; } // Deep copies |colour|. bool SetColour(const Colour& colour); Projection* projection() { return projection_; } // Deep copies |projection|. bool SetProjection(const Projection& projection); private: // Returns the size in bytes of the Video element. uint64_t VideoPayloadSize() const; // Video track element names. uint64_t display_height_; uint64_t display_width_; uint64_t pixel_height_; uint64_t pixel_width_; uint64_t crop_left_; uint64_t crop_right_; uint64_t crop_top_; uint64_t crop_bottom_; double frame_rate_; uint64_t height_; uint64_t stereo_mode_; uint64_t alpha_mode_; uint64_t width_; char* colour_space_; Colour* colour_; Projection* projection_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(VideoTrack); }; /////////////////////////////////////////////////////////////// // Track that has audio specific elements. class AudioTrack : public Track { public: // The |seed| parameter is used to synthesize a UID for the track. explicit AudioTrack(unsigned int* seed); virtual ~AudioTrack(); // Returns the size in bytes for the payload of the Track element plus the // audio specific elements. virtual uint64_t PayloadSize() const; // Output the AudioTrack element to the writer. Returns true on success. virtual bool Write(IMkvWriter* writer) const; void set_bit_depth(uint64_t bit_depth) { bit_depth_ = bit_depth; } uint64_t bit_depth() const { return bit_depth_; } void set_channels(uint64_t channels) { channels_ = channels; } uint64_t channels() const { return channels_; } void set_sample_rate(double sample_rate) { sample_rate_ = sample_rate; } double sample_rate() const { return sample_rate_; } private: // Audio track element names. uint64_t bit_depth_; uint64_t channels_; double sample_rate_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(AudioTrack); }; /////////////////////////////////////////////////////////////// // Tracks element class Tracks { public: // Audio and video type defined by the Matroska specs. enum { kVideo = 0x1, kAudio = 0x2 }; static const char kOpusCodecId[]; static const char kVorbisCodecId[]; static const char kAv1CodecId[]; static const char kVp8CodecId[]; static const char kVp9CodecId[]; static const char kWebVttCaptionsId[]; static const char kWebVttDescriptionsId[]; static const char kWebVttMetadataId[]; static const char kWebVttSubtitlesId[]; Tracks(); ~Tracks(); // Adds a Track element to the Tracks object. |track| will be owned and // deleted by the Tracks object. Returns true on success. |number| is the // number to use for the track. |number| must be >= 0. If |number| == 0 // then the muxer will decide on the track number. bool AddTrack(Track* track, int32_t number); // Returns the track by index. Returns NULL if there is no track match. const Track* GetTrackByIndex(uint32_t idx) const; // Search the Tracks and return the track that matches |tn|. Returns NULL // if there is no track match. Track* GetTrackByNumber(uint64_t track_number) const; // Returns true if the track number is an audio track. bool TrackIsAudio(uint64_t track_number) const; // Returns true if the track number is a video track. bool TrackIsVideo(uint64_t track_number) const; // Output the Tracks element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; uint32_t track_entries_size() const { return track_entries_size_; } private: // Track element list. Track** track_entries_; // Number of Track elements added. uint32_t track_entries_size_; // Whether or not Tracks element has already been written via IMkvWriter. mutable bool wrote_tracks_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tracks); }; /////////////////////////////////////////////////////////////// // Chapter element // class Chapter { public: // Set the identifier for this chapter. (This corresponds to the // Cue Identifier line in WebVTT.) // TODO(matthewjheaney): the actual serialization of this item in // MKV is pending. bool set_id(const char* id); // Converts the nanosecond start and stop times of this chapter to // their corresponding timecode values, and stores them that way. void set_time(const Segment& segment, uint64_t start_time_ns, uint64_t end_time_ns); // Sets the uid for this chapter. Primarily used to enable // deterministic output from the muxer. void set_uid(const uint64_t uid) { uid_ = uid; } // Add a title string to this chapter, per the semantics described // here: // http://www.matroska.org/technical/specs/index.html // // The title ("chapter string") is a UTF-8 string. // // The language has ISO 639-2 representation, described here: // http://www.loc.gov/standards/iso639-2/englangn.html // http://www.loc.gov/standards/iso639-2/php/English_list.php // If you specify NULL as the language value, this implies // English ("eng"). // // The country value corresponds to the codes listed here: // http://www.iana.org/domains/root/db/ // // The function returns false if the string could not be allocated. bool add_string(const char* title, const char* language, const char* country); private: friend class Chapters; // For storage of chapter titles that differ by language. class Display { public: // Establish representation invariant for new Display object. void Init(); // Reclaim resources, in anticipation of destruction. void Clear(); // Copies the title to the |title_| member. Returns false on // error. bool set_title(const char* title); // Copies the language to the |language_| member. Returns false // on error. bool set_language(const char* language); // Copies the country to the |country_| member. Returns false on // error. bool set_country(const char* country); // If |writer| is non-NULL, serialize the Display sub-element of // the Atom into the stream. Returns the Display element size on // success, 0 if error. uint64_t WriteDisplay(IMkvWriter* writer) const; private: char* title_; char* language_; char* country_; }; Chapter(); ~Chapter(); // Establish the representation invariant for a newly-created // Chapter object. The |seed| parameter is used to create the UID // for this chapter atom. void Init(unsigned int* seed); // Copies this Chapter object to a different one. This is used when // expanding a plain array of Chapter objects (see Chapters). void ShallowCopy(Chapter* dst) const; // Reclaim resources used by this Chapter object, pending its // destruction. void Clear(); // If there is no storage remaining on the |displays_| array for a // new display object, creates a new, longer array and copies the // existing Display objects to the new array. Returns false if the // array cannot be expanded. bool ExpandDisplaysArray(); // If |writer| is non-NULL, serialize the Atom sub-element into the // stream. Returns the total size of the element on success, 0 if // error. uint64_t WriteAtom(IMkvWriter* writer) const; // The string identifier for this chapter (corresponds to WebVTT cue // identifier). char* id_; // Start timecode of the chapter. uint64_t start_timecode_; // Stop timecode of the chapter. uint64_t end_timecode_; // The binary identifier for this chapter. uint64_t uid_; // The Atom element can contain multiple Display sub-elements, as // the same logical title can be rendered in different languages. Display* displays_; // The physical length (total size) of the |displays_| array. int displays_size_; // The logical length (number of active elements) on the |displays_| // array. int displays_count_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapter); }; /////////////////////////////////////////////////////////////// // Chapters element // class Chapters { public: Chapters(); ~Chapters(); Chapter* AddChapter(unsigned int* seed); // Returns the number of chapters that have been added. int Count() const; // Output the Chapters element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; private: // Expands the chapters_ array if there is not enough space to contain // another chapter object. Returns true on success. bool ExpandChaptersArray(); // If |writer| is non-NULL, serialize the Edition sub-element of the // Chapters element into the stream. Returns the Edition element // size on success, 0 if error. uint64_t WriteEdition(IMkvWriter* writer) const; // Total length of the chapters_ array. int chapters_size_; // Number of active chapters on the chapters_ array. int chapters_count_; // Array for storage of chapter objects. Chapter* chapters_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Chapters); }; /////////////////////////////////////////////////////////////// // Tag element // class Tag { public: bool add_simple_tag(const char* tag_name, const char* tag_string); private: // Tags calls Clear and the destructor of Tag friend class Tags; // For storage of simple tags class SimpleTag { public: // Establish representation invariant for new SimpleTag object. void Init(); // Reclaim resources, in anticipation of destruction. void Clear(); // Copies the title to the |tag_name_| member. Returns false on // error. bool set_tag_name(const char* tag_name); // Copies the language to the |tag_string_| member. Returns false // on error. bool set_tag_string(const char* tag_string); // If |writer| is non-NULL, serialize the SimpleTag sub-element of // the Atom into the stream. Returns the SimpleTag element size on // success, 0 if error. uint64_t Write(IMkvWriter* writer) const; private: char* tag_name_; char* tag_string_; }; Tag(); ~Tag(); // Copies this Tag object to a different one. This is used when // expanding a plain array of Tag objects (see Tags). void ShallowCopy(Tag* dst) const; // Reclaim resources used by this Tag object, pending its // destruction. void Clear(); // If there is no storage remaining on the |simple_tags_| array for a // new display object, creates a new, longer array and copies the // existing SimpleTag objects to the new array. Returns false if the // array cannot be expanded. bool ExpandSimpleTagsArray(); // If |writer| is non-NULL, serialize the Tag sub-element into the // stream. Returns the total size of the element on success, 0 if // error. uint64_t Write(IMkvWriter* writer) const; // The Atom element can contain multiple SimpleTag sub-elements SimpleTag* simple_tags_; // The physical length (total size) of the |simple_tags_| array. int simple_tags_size_; // The logical length (number of active elements) on the |simple_tags_| // array. int simple_tags_count_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tag); }; /////////////////////////////////////////////////////////////// // Tags element // class Tags { public: Tags(); ~Tags(); Tag* AddTag(); // Returns the number of tags that have been added. int Count() const; // Output the Tags element to the writer. Returns true on success. bool Write(IMkvWriter* writer) const; private: // Expands the tags_ array if there is not enough space to contain // another tag object. Returns true on success. bool ExpandTagsArray(); // Total length of the tags_ array. int tags_size_; // Number of active tags on the tags_ array. int tags_count_; // Array for storage of tag objects. Tag* tags_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Tags); }; /////////////////////////////////////////////////////////////// // Cluster element // // Notes: // |Init| must be called before any other method in this class. class Cluster { public: // |timecode| is the absolute timecode of the cluster. |cues_pos| is the // position for the cluster within the segment that should be written in // the cues element. |timecode_scale| is the timecode scale of the segment. Cluster(uint64_t timecode, int64_t cues_pos, uint64_t timecode_scale, bool write_last_frame_with_duration = false, bool fixed_size_timecode = false); ~Cluster(); bool Init(IMkvWriter* ptr_writer); // Adds a frame to be output in the file. The frame is written out through // |writer_| if successful. Returns true on success. bool AddFrame(const Frame* frame); // Adds a frame to be output in the file. The frame is written out through // |writer_| if successful. Returns true on success. // Inputs: // data: Pointer to the data // length: Length of the data // track_number: Track to add the data to. Value returned by Add track // functions. The range of allowed values is [1, 126]. // timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timecode, // timecode units (absolute) bool is_key); // Adds a frame to be output in the file. The frame is written out through // |writer_| if successful. Returns true on success. // Inputs: // data: Pointer to the data // length: Length of the data // additional: Pointer to the additional data // additional_length: Length of the additional data // add_id: Value of BlockAddID element // track_number: Track to add the data to. Value returned by Add track // functions. The range of allowed values is [1, 126]. // abs_timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, const uint8_t* additional, uint64_t additional_length, uint64_t add_id, uint64_t track_number, uint64_t abs_timecode, bool is_key); // Adds a frame to be output in the file. The frame is written out through // |writer_| if successful. Returns true on success. // Inputs: // data: Pointer to the data. // length: Length of the data. // discard_padding: DiscardPadding element value. // track_number: Track to add the data to. Value returned by Add track // functions. The range of allowed values is [1, 126]. // abs_timecode: Absolute (not relative to cluster) timestamp of the // frame, expressed in timecode units. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, int64_t discard_padding, uint64_t track_number, uint64_t abs_timecode, bool is_key); // Writes a frame of metadata to the output medium; returns true on // success. // Inputs: // data: Pointer to the data // length: Length of the data // track_number: Track to add the data to. Value returned by Add track // functions. The range of allowed values is [1, 126]. // timecode: Absolute (not relative to cluster) timestamp of the // metadata frame, expressed in timecode units. // duration: Duration of metadata frame, in timecode units. // // The metadata frame is written as a block group, with a duration // sub-element but no reference time sub-elements (indicating that // it is considered a keyframe, per Matroska semantics). bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timecode, uint64_t duration); // Increments the size of the cluster's data in bytes. void AddPayloadSize(uint64_t size); // Closes the cluster so no more data can be written to it. Will update the // cluster's size if |writer_| is seekable. Returns true on success. This // variant of Finalize() fails when |write_last_frame_with_duration_| is set // to true. bool Finalize(); // Closes the cluster so no more data can be written to it. Will update the // cluster's size if |writer_| is seekable. Returns true on success. // Inputs: // set_last_frame_duration: Boolean indicating whether or not the duration // of the last frame should be set. If set to // false, the |duration| value is ignored and // |write_last_frame_with_duration_| will not be // honored. // duration: Duration of the Cluster in timecode scale. bool Finalize(bool set_last_frame_duration, uint64_t duration); // Returns the size in bytes for the entire Cluster element. uint64_t Size() const; // Given |abs_timecode|, calculates timecode relative to most recent timecode. // Returns -1 on failure, or a relative timecode. int64_t GetRelativeTimecode(int64_t abs_timecode) const; int64_t size_position() const { return size_position_; } int32_t blocks_added() const { return blocks_added_; } uint64_t payload_size() const { return payload_size_; } int64_t position_for_cues() const { return position_for_cues_; } uint64_t timecode() const { return timecode_; } uint64_t timecode_scale() const { return timecode_scale_; } void set_write_last_frame_with_duration(bool write_last_frame_with_duration) { write_last_frame_with_duration_ = write_last_frame_with_duration; } bool write_last_frame_with_duration() const { return write_last_frame_with_duration_; } private: // Iterator type for the |stored_frames_| map. typedef std::map >::iterator FrameMapIterator; // Utility method that confirms that blocks can still be added, and that the // cluster header has been written. Used by |DoWriteFrame*|. Returns true // when successful. bool PreWriteBlock(); // Utility method used by the |DoWriteFrame*| methods that handles the book // keeping required after each block is written. void PostWriteBlock(uint64_t element_size); // Does some verification and calls WriteFrame. bool DoWriteFrame(const Frame* const frame); // Either holds back the given frame, or writes it out depending on whether or // not |write_last_frame_with_duration_| is set. bool QueueOrWriteFrame(const Frame* const frame); // Outputs the Cluster header to |writer_|. Returns true on success. bool WriteClusterHeader(); // Number of blocks added to the cluster. int32_t blocks_added_; // Flag telling if the cluster has been closed. bool finalized_; // Flag indicating whether the cluster's timecode will always be written out // using 8 bytes. bool fixed_size_timecode_; // Flag telling if the cluster's header has been written. bool header_written_; // The size of the cluster elements in bytes. uint64_t payload_size_; // The file position used for cue points. const int64_t position_for_cues_; // The file position of the cluster's size element. int64_t size_position_; // The absolute timecode of the cluster. const uint64_t timecode_; // The timecode scale of the Segment containing the cluster. const uint64_t timecode_scale_; // Flag indicating whether the last frame of the cluster should be written as // a Block with Duration. If set to true, then it will result in holding back // of frames and the parameterized version of Finalize() must be called to // finish writing the Cluster. bool write_last_frame_with_duration_; // Map used to hold back frames, if required. Track number is the key. std::map > stored_frames_; // Map from track number to the timestamp of the last block written for that // track. std::map last_block_timestamp_; // Pointer to the writer object. Not owned by this class. IMkvWriter* writer_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Cluster); }; /////////////////////////////////////////////////////////////// // SeekHead element class SeekHead { public: SeekHead(); ~SeekHead(); // TODO(fgalligan): Change this to reserve a certain size. Then check how // big the seek entry to be added is as not every seek entry will be the // maximum size it could be. // Adds a seek entry to be written out when the element is finalized. |id| // must be the coded mkv element id. |pos| is the file position of the // element. Returns true on success. bool AddSeekEntry(uint32_t id, uint64_t pos); // Writes out SeekHead and SeekEntry elements. Returns true on success. bool Finalize(IMkvWriter* writer) const; // Returns the id of the Seek Entry at the given index. Returns -1 if index is // out of range. uint32_t GetId(int index) const; // Returns the position of the Seek Entry at the given index. Returns -1 if // index is out of range. uint64_t GetPosition(int index) const; // Sets the Seek Entry id and position at given index. // Returns true on success. bool SetSeekEntry(int index, uint32_t id, uint64_t position); // Reserves space by writing out a Void element which will be updated with // a SeekHead element later. Returns true on success. bool Write(IMkvWriter* writer); // We are going to put a cap on the number of Seek Entries. const static int32_t kSeekEntryCount = 5; private: // Returns the maximum size in bytes of one seek entry. uint64_t MaxEntrySize() const; // Seek entry id element list. uint32_t seek_entry_id_[kSeekEntryCount]; // Seek entry pos element list. uint64_t seek_entry_pos_[kSeekEntryCount]; // The file position of SeekHead element. int64_t start_pos_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SeekHead); }; /////////////////////////////////////////////////////////////// // Segment Information element class SegmentInfo { public: SegmentInfo(); ~SegmentInfo(); // Will update the duration if |duration_| is > 0.0. Returns true on success. bool Finalize(IMkvWriter* writer) const; // Sets |muxing_app_| and |writing_app_|. bool Init(); // Output the Segment Information element to the writer. Returns true on // success. bool Write(IMkvWriter* writer); void set_duration(double duration) { duration_ = duration; } double duration() const { return duration_; } void set_muxing_app(const char* app); const char* muxing_app() const { return muxing_app_; } void set_timecode_scale(uint64_t scale) { timecode_scale_ = scale; } uint64_t timecode_scale() const { return timecode_scale_; } void set_writing_app(const char* app); const char* writing_app() const { return writing_app_; } void set_date_utc(int64_t date_utc) { date_utc_ = date_utc; } int64_t date_utc() const { return date_utc_; } private: // Segment Information element names. // Initially set to -1 to signify that a duration has not been set and should // not be written out. double duration_; // Set to libwebm-%d.%d.%d.%d, major, minor, build, revision. char* muxing_app_; uint64_t timecode_scale_; // Initially set to libwebm-%d.%d.%d.%d, major, minor, build, revision. char* writing_app_; // LLONG_MIN when DateUTC is not set. int64_t date_utc_; // The file position of the duration element. int64_t duration_pos_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(SegmentInfo); }; /////////////////////////////////////////////////////////////// // This class represents the main segment in a WebM file. Currently only // supports one Segment element. // // Notes: // |Init| must be called before any other method in this class. class Segment { public: enum Mode { kLive = 0x1, kFile = 0x2 }; enum CuesPosition { kAfterClusters = 0x0, // Position Cues after Clusters - Default kBeforeClusters = 0x1 // Position Cues before Clusters }; static const uint32_t kDefaultDocTypeVersion = 4; static const uint64_t kDefaultMaxClusterDuration = 30000000000ULL; Segment(); ~Segment(); // Initializes |SegmentInfo| and returns result. Always returns false when // |ptr_writer| is NULL. bool Init(IMkvWriter* ptr_writer); // Adds a generic track to the segment. Returns the newly-allocated // track object (which is owned by the segment) on success, NULL on // error. |number| is the number to use for the track. |number| // must be >= 0. If |number| == 0 then the muxer will decide on the // track number. Track* AddTrack(int32_t number); // Adds a Vorbis audio track to the segment. Returns the number of the track // on success, 0 on error. |number| is the number to use for the audio track. // |number| must be >= 0. If |number| == 0 then the muxer will decide on // the track number. uint64_t AddAudioTrack(int32_t sample_rate, int32_t channels, int32_t number); // Adds an empty chapter to the chapters of this segment. Returns // non-NULL on success. After adding the chapter, the caller should // populate its fields via the Chapter member functions. Chapter* AddChapter(); // Adds an empty tag to the tags of this segment. Returns // non-NULL on success. After adding the tag, the caller should // populate its fields via the Tag member functions. Tag* AddTag(); // Adds a cue point to the Cues element. |timestamp| is the time in // nanoseconds of the cue's time. |track| is the Track of the Cue. This // function must be called after AddFrame to calculate the correct // BlockNumber for the CuePoint. Returns true on success. bool AddCuePoint(uint64_t timestamp, uint64_t track); // Adds a frame to be output in the file. Returns true on success. // Inputs: // data: Pointer to the data // length: Length of the data // track_number: Track to add the data to. Value returned by Add track // functions. // timestamp: Timestamp of the frame in nanoseconds from 0. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrame(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timestamp_ns, bool is_key); // Writes a frame of metadata to the output medium; returns true on // success. // Inputs: // data: Pointer to the data // length: Length of the data // track_number: Track to add the data to. Value returned by Add track // functions. // timecode: Absolute timestamp of the metadata frame, expressed // in nanosecond units. // duration: Duration of metadata frame, in nanosecond units. // // The metadata frame is written as a block group, with a duration // sub-element but no reference time sub-elements (indicating that // it is considered a keyframe, per Matroska semantics). bool AddMetadata(const uint8_t* data, uint64_t length, uint64_t track_number, uint64_t timestamp_ns, uint64_t duration_ns); // Writes a frame with additional data to the output medium; returns true on // success. // Inputs: // data: Pointer to the data. // length: Length of the data. // additional: Pointer to additional data. // additional_length: Length of additional data. // add_id: Additional ID which identifies the type of additional data. // track_number: Track to add the data to. Value returned by Add track // functions. // timestamp: Absolute timestamp of the frame, expressed in nanosecond // units. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrameWithAdditional(const uint8_t* data, uint64_t length, const uint8_t* additional, uint64_t additional_length, uint64_t add_id, uint64_t track_number, uint64_t timestamp, bool is_key); // Writes a frame with DiscardPadding to the output medium; returns true on // success. // Inputs: // data: Pointer to the data. // length: Length of the data. // discard_padding: DiscardPadding element value. // track_number: Track to add the data to. Value returned by Add track // functions. // timestamp: Absolute timestamp of the frame, expressed in nanosecond // units. // is_key: Flag telling whether or not this frame is a key frame. bool AddFrameWithDiscardPadding(const uint8_t* data, uint64_t length, int64_t discard_padding, uint64_t track_number, uint64_t timestamp, bool is_key); // Writes a Frame to the output medium. Chooses the correct way of writing // the frame (Block vs SimpleBlock) based on the parameters passed. // Inputs: // frame: frame object bool AddGenericFrame(const Frame* frame); // Adds a VP8 video track to the segment. Returns the number of the track on // success, 0 on error. |number| is the number to use for the video track. // |number| must be >= 0. If |number| == 0 then the muxer will decide on // the track number. uint64_t AddVideoTrack(int32_t width, int32_t height, int32_t number); // This function must be called after Finalize() if you need a copy of the // output with Cues written before the Clusters. It will return false if the // writer is not seekable of if chunking is set to true. // Input parameters: // reader - an IMkvReader object created with the same underlying file of the // current writer object. Make sure to close the existing writer // object before creating this so that all the data is properly // flushed and available for reading. // writer - an IMkvWriter object pointing to a *different* file than the one // pointed by the current writer object. This file will contain the // Cues element before the Clusters. bool CopyAndMoveCuesBeforeClusters(mkvparser::IMkvReader* reader, IMkvWriter* writer); // Sets which track to use for the Cues element. Must have added the track // before calling this function. Returns true on success. |track_number| is // returned by the Add track functions. bool CuesTrack(uint64_t track_number); // This will force the muxer to create a new Cluster when the next frame is // added. void ForceNewClusterOnNextFrame(); // Writes out any frames that have not been written out. Finalizes the last // cluster. May update the size and duration of the segment. May output the // Cues element. May finalize the SeekHead element. Returns true on success. bool Finalize(); // Returns the Cues object. Cues* GetCues() { return &cues_; } // Returns the Segment Information object. const SegmentInfo* GetSegmentInfo() const { return &segment_info_; } SegmentInfo* GetSegmentInfo() { return &segment_info_; } // Search the Tracks and return the track that matches |track_number|. // Returns NULL if there is no track match. Track* GetTrackByNumber(uint64_t track_number) const; // Toggles whether to output a cues element. void OutputCues(bool output_cues); // Toggles whether to write the last frame in each Cluster with Duration. void AccurateClusterDuration(bool accurate_cluster_duration); // Toggles whether to write the Cluster Timecode using exactly 8 bytes. void UseFixedSizeClusterTimecode(bool fixed_size_cluster_timecode); // Sets if the muxer will output files in chunks or not. |chunking| is a // flag telling whether or not to turn on chunking. |filename| is the base // filename for the chunk files. The header chunk file will be named // |filename|.hdr and the data chunks will be named // |filename|_XXXXXX.chk. Chunking implies that the muxer will be writing // to files so the muxer will use the default MkvWriter class to control // what data is written to what files. Returns true on success. // TODO: Should we change the IMkvWriter Interface to add Open and Close? // That will force the interface to be dependent on files. bool SetChunking(bool chunking, const char* filename); bool chunking() const { return chunking_; } uint64_t cues_track() const { return cues_track_; } void set_max_cluster_duration(uint64_t max_cluster_duration) { max_cluster_duration_ = max_cluster_duration; } uint64_t max_cluster_duration() const { return max_cluster_duration_; } void set_max_cluster_size(uint64_t max_cluster_size) { max_cluster_size_ = max_cluster_size; } uint64_t max_cluster_size() const { return max_cluster_size_; } void set_mode(Mode mode) { mode_ = mode; } Mode mode() const { return mode_; } CuesPosition cues_position() const { return cues_position_; } bool output_cues() const { return output_cues_; } void set_estimate_file_duration(bool estimate_duration) { estimate_file_duration_ = estimate_duration; } bool estimate_file_duration() const { return estimate_file_duration_; } const SegmentInfo* segment_info() const { return &segment_info_; } void set_duration(double duration) { duration_ = duration; } double duration() const { return duration_; } // Returns true when codec IDs are valid for WebM. bool DocTypeIsWebm() const; private: // Checks if header information has been output and initialized. If not it // will output the Segment element and initialize the SeekHead elment and // Cues elements. bool CheckHeaderInfo(); // Sets |doc_type_version_| based on the current element requirements. void UpdateDocTypeVersion(); // Sets |name| according to how many chunks have been written. |ext| is the // file extension. |name| must be deleted by the calling app. Returns true // on success. bool UpdateChunkName(const char* ext, char** name) const; // Returns the maximum offset within the segment's payload. When chunking // this function is needed to determine offsets of elements within the // chunked files. Returns -1 on error. int64_t MaxOffset(); // Adds the frame to our frame array. bool QueueFrame(Frame* frame); // Output all frames that are queued. Returns -1 on error, otherwise // it returns the number of frames written. int WriteFramesAll(); // Output all frames that are queued that have an end time that is less // then |timestamp|. Returns true on success and if there are no frames // queued. bool WriteFramesLessThan(uint64_t timestamp); // Outputs the segment header, Segment Information element, SeekHead element, // and Tracks element to |writer_|. bool WriteSegmentHeader(); // Given a frame with the specified timestamp (nanosecond units) and // keyframe status, determine whether a new cluster should be // created, before writing enqueued frames and the frame itself. The // function returns one of the following values: // -1 = error: an out-of-order frame was detected // 0 = do not create a new cluster, and write frame to the existing cluster // 1 = create a new cluster, and write frame to that new cluster // 2 = create a new cluster, and re-run test int TestFrame(uint64_t track_num, uint64_t timestamp_ns, bool key) const; // Create a new cluster, using the earlier of the first enqueued // frame, or the indicated time. Returns true on success. bool MakeNewCluster(uint64_t timestamp_ns); // Checks whether a new cluster needs to be created, and if so // creates a new cluster. Returns false if creation of a new cluster // was necessary but creation was not successful. bool DoNewClusterProcessing(uint64_t track_num, uint64_t timestamp_ns, bool key); // Adjusts Cue Point values (to place Cues before Clusters) so that they // reflect the correct offsets. void MoveCuesBeforeClusters(); // This function recursively computes the correct cluster offsets (this is // done to move the Cues before Clusters). It recursively updates the change // in size (which indicates a change in cluster offset) until no sizes change. // Parameters: // diff - indicates the difference in size of the Cues element that needs to // accounted for. // index - index in the list of Cues which is currently being adjusted. // cue_size - sum of size of all the CuePoint elements. void MoveCuesBeforeClustersHelper(uint64_t diff, int index, uint64_t* cue_size); // Seeds the random number generator used to make UIDs. unsigned int seed_; // WebM elements Cues cues_; SeekHead seek_head_; SegmentInfo segment_info_; Tracks tracks_; Chapters chapters_; Tags tags_; // Number of chunks written. int chunk_count_; // Current chunk filename. char* chunk_name_; // Default MkvWriter object created by this class used for writing clusters // out in separate files. MkvWriter* chunk_writer_cluster_; // Default MkvWriter object created by this class used for writing Cues // element out to a file. MkvWriter* chunk_writer_cues_; // Default MkvWriter object created by this class used for writing the // Matroska header out to a file. MkvWriter* chunk_writer_header_; // Flag telling whether or not the muxer is chunking output to multiple // files. bool chunking_; // Base filename for the chunked files. char* chunking_base_name_; // File position offset where the Clusters end. int64_t cluster_end_offset_; // List of clusters. Cluster** cluster_list_; // Number of cluster pointers allocated in the cluster list. int32_t cluster_list_capacity_; // Number of clusters in the cluster list. int32_t cluster_list_size_; // Indicates whether Cues should be written before or after Clusters CuesPosition cues_position_; // Track number that is associated with the cues element for this segment. uint64_t cues_track_; // Tells the muxer to force a new cluster on the next Block. bool force_new_cluster_; // List of stored audio frames. These variables are used to store frames so // the muxer can follow the guideline "Audio blocks that contain the video // key frame's timecode should be in the same cluster as the video key frame // block." Frame** frames_; // Number of frame pointers allocated in the frame list. int32_t frames_capacity_; // Number of frames in the frame list. int32_t frames_size_; // Flag telling if a video track has been added to the segment. bool has_video_; // Flag telling if the segment's header has been written. bool header_written_; // Duration of the last block in nanoseconds. uint64_t last_block_duration_; // Last timestamp in nanoseconds added to a cluster. uint64_t last_timestamp_; // Last timestamp in nanoseconds by track number added to a cluster. uint64_t last_track_timestamp_[kMaxTrackNumber]; // Number of frames written per track. uint64_t track_frames_written_[kMaxTrackNumber]; // Maximum time in nanoseconds for a cluster duration. This variable is a // guideline and some clusters may have a longer duration. Default is 30 // seconds. uint64_t max_cluster_duration_; // Maximum size in bytes for a cluster. This variable is a guideline and // some clusters may have a larger size. Default is 0 which signifies that // the muxer will decide the size. uint64_t max_cluster_size_; // The mode that segment is in. If set to |kLive| the writer must not // seek backwards. Mode mode_; // Flag telling the muxer that a new cue point should be added. bool new_cuepoint_; // TODO(fgalligan): Should we add support for more than one Cues element? // Flag whether or not the muxer should output a Cues element. bool output_cues_; // Flag whether or not the last frame in each Cluster will have a Duration // element in it. bool accurate_cluster_duration_; // Flag whether or not to write the Cluster Timecode using exactly 8 bytes. bool fixed_size_cluster_timecode_; // Flag whether or not to estimate the file duration. bool estimate_file_duration_; // The size of the EBML header, used to validate the header if // WriteEbmlHeader() is called more than once. int32_t ebml_header_size_; // The file position of the segment's payload. int64_t payload_pos_; // The file position of the element's size. int64_t size_position_; // Current DocTypeVersion (|doc_type_version_|) and that written in // WriteSegmentHeader(). // WriteEbmlHeader() will be called from Finalize() if |doc_type_version_| // differs from |doc_type_version_written_|. uint32_t doc_type_version_; uint32_t doc_type_version_written_; // If |duration_| is > 0, then explicitly set the duration of the segment. double duration_; // Pointer to the writer objects. Not owned by this class. IMkvWriter* writer_cluster_; IMkvWriter* writer_cues_; IMkvWriter* writer_header_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(Segment); }; } // namespace mkvmuxer #endif // MKVMUXER_MKVMUXER_H_ libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvmuxertypes.h000066400000000000000000000017761357355204000240110ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVMUXER_MKVMUXERTYPES_H_ #define MKVMUXER_MKVMUXERTYPES_H_ namespace mkvmuxer { typedef unsigned char uint8; typedef short int16; typedef int int32; typedef unsigned int uint32; typedef long long int64; typedef unsigned long long uint64; } // namespace mkvmuxer // Copied from Chromium basictypes.h // A macro to disallow the copy constructor and operator= functions // This should be used in the private: declarations for a class #define LIBWEBM_DISALLOW_COPY_AND_ASSIGN(TypeName) \ TypeName(const TypeName&); \ void operator=(const TypeName&) #endif // MKVMUXER_MKVMUXERTYPES_HPP_ libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvmuxerutil.cc000066400000000000000000000442141357355204000237520ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "mkvmuxer/mkvmuxerutil.h" #ifdef __ANDROID__ #include #include #endif #include #include #include #include #include #include #include #include "common/webmids.h" #include "mkvmuxer/mkvmuxer.h" #include "mkvmuxer/mkvwriter.h" namespace mkvmuxer { namespace { // Date elements are always 8 octets in size. const int kDateElementSize = 8; uint64 WriteBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode, uint64 timecode_scale) { uint64 block_additional_elem_size = 0; uint64 block_addid_elem_size = 0; uint64 block_more_payload_size = 0; uint64 block_more_elem_size = 0; uint64 block_additions_payload_size = 0; uint64 block_additions_elem_size = 0; if (frame->additional()) { block_additional_elem_size = EbmlElementSize(libwebm::kMkvBlockAdditional, frame->additional(), frame->additional_length()); block_addid_elem_size = EbmlElementSize( libwebm::kMkvBlockAddID, static_cast(frame->add_id())); block_more_payload_size = block_addid_elem_size + block_additional_elem_size; block_more_elem_size = EbmlMasterElementSize(libwebm::kMkvBlockMore, block_more_payload_size) + block_more_payload_size; block_additions_payload_size = block_more_elem_size; block_additions_elem_size = EbmlMasterElementSize(libwebm::kMkvBlockAdditions, block_additions_payload_size) + block_additions_payload_size; } uint64 discard_padding_elem_size = 0; if (frame->discard_padding() != 0) { discard_padding_elem_size = EbmlElementSize(libwebm::kMkvDiscardPadding, static_cast(frame->discard_padding())); } const uint64 reference_block_timestamp = frame->reference_block_timestamp() / timecode_scale; uint64 reference_block_elem_size = 0; if (!frame->is_key()) { reference_block_elem_size = EbmlElementSize(libwebm::kMkvReferenceBlock, reference_block_timestamp); } const uint64 duration = frame->duration() / timecode_scale; uint64 block_duration_elem_size = 0; if (duration > 0) block_duration_elem_size = EbmlElementSize(libwebm::kMkvBlockDuration, duration); const uint64 block_payload_size = 4 + frame->length(); const uint64 block_elem_size = EbmlMasterElementSize(libwebm::kMkvBlock, block_payload_size) + block_payload_size; const uint64 block_group_payload_size = block_elem_size + block_additions_elem_size + block_duration_elem_size + discard_padding_elem_size + reference_block_elem_size; if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockGroup, block_group_payload_size)) { return 0; } if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlock, block_payload_size)) return 0; if (WriteUInt(writer, frame->track_number())) return 0; if (SerializeInt(writer, timecode, 2)) return 0; // For a Block, flags is always 0. if (SerializeInt(writer, 0, 1)) return 0; if (writer->Write(frame->frame(), static_cast(frame->length()))) return 0; if (frame->additional()) { if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockAdditions, block_additions_payload_size)) { return 0; } if (!WriteEbmlMasterElement(writer, libwebm::kMkvBlockMore, block_more_payload_size)) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvBlockAddID, static_cast(frame->add_id()))) return 0; if (!WriteEbmlElement(writer, libwebm::kMkvBlockAdditional, frame->additional(), frame->additional_length())) { return 0; } } if (frame->discard_padding() != 0 && !WriteEbmlElement(writer, libwebm::kMkvDiscardPadding, static_cast(frame->discard_padding()))) { return false; } if (!frame->is_key() && !WriteEbmlElement(writer, libwebm::kMkvReferenceBlock, reference_block_timestamp)) { return false; } if (duration > 0 && !WriteEbmlElement(writer, libwebm::kMkvBlockDuration, duration)) { return false; } return EbmlMasterElementSize(libwebm::kMkvBlockGroup, block_group_payload_size) + block_group_payload_size; } uint64 WriteSimpleBlock(IMkvWriter* writer, const Frame* const frame, int64 timecode) { if (WriteID(writer, libwebm::kMkvSimpleBlock)) return 0; const int32 size = static_cast(frame->length()) + 4; if (WriteUInt(writer, size)) return 0; if (WriteUInt(writer, static_cast(frame->track_number()))) return 0; if (SerializeInt(writer, timecode, 2)) return 0; uint64 flags = 0; if (frame->is_key()) flags |= 0x80; if (SerializeInt(writer, flags, 1)) return 0; if (writer->Write(frame->frame(), static_cast(frame->length()))) return 0; return GetUIntSize(libwebm::kMkvSimpleBlock) + GetCodedUIntSize(size) + 4 + frame->length(); } } // namespace int32 GetCodedUIntSize(uint64 value) { if (value < 0x000000000000007FULL) return 1; else if (value < 0x0000000000003FFFULL) return 2; else if (value < 0x00000000001FFFFFULL) return 3; else if (value < 0x000000000FFFFFFFULL) return 4; else if (value < 0x00000007FFFFFFFFULL) return 5; else if (value < 0x000003FFFFFFFFFFULL) return 6; else if (value < 0x0001FFFFFFFFFFFFULL) return 7; return 8; } int32 GetUIntSize(uint64 value) { if (value < 0x0000000000000100ULL) return 1; else if (value < 0x0000000000010000ULL) return 2; else if (value < 0x0000000001000000ULL) return 3; else if (value < 0x0000000100000000ULL) return 4; else if (value < 0x0000010000000000ULL) return 5; else if (value < 0x0001000000000000ULL) return 6; else if (value < 0x0100000000000000ULL) return 7; return 8; } int32 GetIntSize(int64 value) { // Doubling the requested value ensures positive values with their high bit // set are written with 0-padding to avoid flipping the signedness. const uint64 v = (value < 0) ? value ^ -1LL : value; return GetUIntSize(2 * v); } uint64 EbmlMasterElementSize(uint64 type, uint64 value) { // Size of EBML ID int32 ebml_size = GetUIntSize(type); // Datasize ebml_size += GetCodedUIntSize(value); return ebml_size; } uint64 EbmlElementSize(uint64 type, int64 value) { // Size of EBML ID int32 ebml_size = GetUIntSize(type); // Datasize ebml_size += GetIntSize(value); // Size of Datasize ebml_size++; return ebml_size; } uint64 EbmlElementSize(uint64 type, uint64 value) { return EbmlElementSize(type, value, 0); } uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size) { // Size of EBML ID uint64 ebml_size = GetUIntSize(type); // Datasize ebml_size += (fixed_size > 0) ? fixed_size : GetUIntSize(value); // Size of Datasize ebml_size++; return ebml_size; } uint64 EbmlElementSize(uint64 type, float /* value */) { // Size of EBML ID uint64 ebml_size = GetUIntSize(type); // Datasize ebml_size += sizeof(float); // Size of Datasize ebml_size++; return ebml_size; } uint64 EbmlElementSize(uint64 type, const char* value) { if (!value) return 0; // Size of EBML ID uint64 ebml_size = GetUIntSize(type); // Datasize ebml_size += strlen(value); // Size of Datasize ebml_size += GetCodedUIntSize(strlen(value)); return ebml_size; } uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size) { if (!value) return 0; // Size of EBML ID uint64 ebml_size = GetUIntSize(type); // Datasize ebml_size += size; // Size of Datasize ebml_size += GetCodedUIntSize(size); return ebml_size; } uint64 EbmlDateElementSize(uint64 type) { // Size of EBML ID uint64 ebml_size = GetUIntSize(type); // Datasize ebml_size += kDateElementSize; // Size of Datasize ebml_size++; return ebml_size; } int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size) { if (!writer || size < 1 || size > 8) return -1; for (int32 i = 1; i <= size; ++i) { const int32 byte_count = size - i; const int32 bit_count = byte_count * 8; const int64 bb = value >> bit_count; const uint8 b = static_cast(bb); const int32 status = writer->Write(&b, 1); if (status < 0) return status; } return 0; } int32 SerializeFloat(IMkvWriter* writer, float f) { if (!writer) return -1; assert(sizeof(uint32) == sizeof(float)); // This union is merely used to avoid a reinterpret_cast from float& to // uint32& which will result in violation of strict aliasing. union U32 { uint32 u32; float f; } value; value.f = f; for (int32 i = 1; i <= 4; ++i) { const int32 byte_count = 4 - i; const int32 bit_count = byte_count * 8; const uint8 byte = static_cast(value.u32 >> bit_count); const int32 status = writer->Write(&byte, 1); if (status < 0) return status; } return 0; } int32 WriteUInt(IMkvWriter* writer, uint64 value) { if (!writer) return -1; int32 size = GetCodedUIntSize(value); return WriteUIntSize(writer, value, size); } int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size) { if (!writer || size < 0 || size > 8) return -1; if (size > 0) { const uint64 bit = 1LL << (size * 7); if (value > (bit - 2)) return -1; value |= bit; } else { size = 1; int64 bit; for (;;) { bit = 1LL << (size * 7); const uint64 max = bit - 2; if (value <= max) break; ++size; } if (size > 8) return false; value |= bit; } return SerializeInt(writer, value, size); } int32 WriteID(IMkvWriter* writer, uint64 type) { if (!writer) return -1; writer->ElementStartNotify(type, writer->Position()); const int32 size = GetUIntSize(type); return SerializeInt(writer, type, size); } bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 type, uint64 size) { if (!writer) return false; if (WriteID(writer, type)) return false; if (WriteUInt(writer, size)) return false; return true; } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value) { return WriteEbmlElement(writer, type, value, 0); } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value, uint64 fixed_size) { if (!writer) return false; if (WriteID(writer, type)) return false; uint64 size = GetUIntSize(value); if (fixed_size > 0) { if (size > fixed_size) return false; size = fixed_size; } if (WriteUInt(writer, size)) return false; if (SerializeInt(writer, value, static_cast(size))) return false; return true; } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value) { if (!writer) return false; if (WriteID(writer, type)) return 0; const uint64 size = GetIntSize(value); if (WriteUInt(writer, size)) return false; if (SerializeInt(writer, value, static_cast(size))) return false; return true; } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value) { if (!writer) return false; if (WriteID(writer, type)) return false; if (WriteUInt(writer, 4)) return false; if (SerializeFloat(writer, value)) return false; return true; } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value) { if (!writer || !value) return false; if (WriteID(writer, type)) return false; const uint64 length = strlen(value); if (WriteUInt(writer, length)) return false; if (writer->Write(value, static_cast(length))) return false; return true; } bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, uint64 size) { if (!writer || !value || size < 1) return false; if (WriteID(writer, type)) return false; if (WriteUInt(writer, size)) return false; if (writer->Write(value, static_cast(size))) return false; return true; } bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value) { if (!writer) return false; if (WriteID(writer, type)) return false; if (WriteUInt(writer, kDateElementSize)) return false; if (SerializeInt(writer, value, kDateElementSize)) return false; return true; } uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, Cluster* cluster) { if (!writer || !frame || !frame->IsValid() || !cluster || !cluster->timecode_scale()) return 0; // Technically the timecode for a block can be less than the // timecode for the cluster itself (remember that block timecode // is a signed, 16-bit integer). However, as a simplification we // only permit non-negative cluster-relative timecodes for blocks. const int64 relative_timecode = cluster->GetRelativeTimecode( frame->timestamp() / cluster->timecode_scale()); if (relative_timecode < 0 || relative_timecode > kMaxBlockTimecode) return 0; return frame->CanBeSimpleBlock() ? WriteSimpleBlock(writer, frame, relative_timecode) : WriteBlock(writer, frame, relative_timecode, cluster->timecode_scale()); } uint64 WriteVoidElement(IMkvWriter* writer, uint64 size) { if (!writer) return false; // Subtract one for the void ID and the coded size. uint64 void_entry_size = size - 1 - GetCodedUIntSize(size - 1); uint64 void_size = EbmlMasterElementSize(libwebm::kMkvVoid, void_entry_size) + void_entry_size; if (void_size != size) return 0; const int64 payload_position = writer->Position(); if (payload_position < 0) return 0; if (WriteID(writer, libwebm::kMkvVoid)) return 0; if (WriteUInt(writer, void_entry_size)) return 0; const uint8 value = 0; for (int32 i = 0; i < static_cast(void_entry_size); ++i) { if (writer->Write(&value, 1)) return 0; } const int64 stop_position = writer->Position(); if (stop_position < 0 || stop_position - payload_position != static_cast(void_size)) return 0; return void_size; } void GetVersion(int32* major, int32* minor, int32* build, int32* revision) { *major = 0; *minor = 2; *build = 1; *revision = 0; } uint64 MakeUID(unsigned int* seed) { uint64 uid = 0; #ifdef __MINGW32__ srand(*seed); #endif for (int i = 0; i < 7; ++i) { // avoid problems with 8-byte values uid <<= 8; // TODO(fgalligan): Move random number generation to platform specific code. #ifdef _MSC_VER (void)seed; const int32 nn = rand(); #elif __ANDROID__ (void)seed; int32 temp_num = 1; int fd = open("/dev/urandom", O_RDONLY); if (fd != -1) { read(fd, &temp_num, sizeof(temp_num)); close(fd); } const int32 nn = temp_num; #elif defined __MINGW32__ const int32 nn = rand(); #else const int32 nn = rand_r(seed); #endif const int32 n = 0xFF & (nn >> 4); // throw away low-order bits uid |= n; } return uid; } bool IsMatrixCoefficientsValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kGbr: case mkvmuxer::Colour::kBt709: case mkvmuxer::Colour::kUnspecifiedMc: case mkvmuxer::Colour::kReserved: case mkvmuxer::Colour::kFcc: case mkvmuxer::Colour::kBt470bg: case mkvmuxer::Colour::kSmpte170MMc: case mkvmuxer::Colour::kSmpte240MMc: case mkvmuxer::Colour::kYcocg: case mkvmuxer::Colour::kBt2020NonConstantLuminance: case mkvmuxer::Colour::kBt2020ConstantLuminance: return true; } return false; } bool IsChromaSitingHorzValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kUnspecifiedCsh: case mkvmuxer::Colour::kLeftCollocated: case mkvmuxer::Colour::kHalfCsh: return true; } return false; } bool IsChromaSitingVertValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kUnspecifiedCsv: case mkvmuxer::Colour::kTopCollocated: case mkvmuxer::Colour::kHalfCsv: return true; } return false; } bool IsColourRangeValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kUnspecifiedCr: case mkvmuxer::Colour::kBroadcastRange: case mkvmuxer::Colour::kFullRange: case mkvmuxer::Colour::kMcTcDefined: return true; } return false; } bool IsTransferCharacteristicsValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kIturBt709Tc: case mkvmuxer::Colour::kUnspecifiedTc: case mkvmuxer::Colour::kReservedTc: case mkvmuxer::Colour::kGamma22Curve: case mkvmuxer::Colour::kGamma28Curve: case mkvmuxer::Colour::kSmpte170MTc: case mkvmuxer::Colour::kSmpte240MTc: case mkvmuxer::Colour::kLinear: case mkvmuxer::Colour::kLog: case mkvmuxer::Colour::kLogSqrt: case mkvmuxer::Colour::kIec6196624: case mkvmuxer::Colour::kIturBt1361ExtendedColourGamut: case mkvmuxer::Colour::kIec6196621: case mkvmuxer::Colour::kIturBt202010bit: case mkvmuxer::Colour::kIturBt202012bit: case mkvmuxer::Colour::kSmpteSt2084: case mkvmuxer::Colour::kSmpteSt4281Tc: case mkvmuxer::Colour::kAribStdB67Hlg: return true; } return false; } bool IsPrimariesValueValid(uint64_t value) { switch (value) { case mkvmuxer::Colour::kReservedP0: case mkvmuxer::Colour::kIturBt709P: case mkvmuxer::Colour::kUnspecifiedP: case mkvmuxer::Colour::kReservedP3: case mkvmuxer::Colour::kIturBt470M: case mkvmuxer::Colour::kIturBt470Bg: case mkvmuxer::Colour::kSmpte170MP: case mkvmuxer::Colour::kSmpte240MP: case mkvmuxer::Colour::kFilm: case mkvmuxer::Colour::kIturBt2020: case mkvmuxer::Colour::kSmpteSt4281P: case mkvmuxer::Colour::kJedecP22Phosphors: return true; } return false; } } // namespace mkvmuxer libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvmuxerutil.h000066400000000000000000000121511357355204000236070ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVMUXER_MKVMUXERUTIL_H_ #define MKVMUXER_MKVMUXERUTIL_H_ #include "mkvmuxertypes.h" #include "stdint.h" namespace mkvmuxer { class Cluster; class Frame; class IMkvWriter; // TODO(tomfinegan): mkvmuxer:: integer types continue to be used here because // changing them causes pain for downstream projects. It would be nice if a // solution that allows removal of the mkvmuxer:: integer types while avoiding // pain for downstream users of libwebm. Considering that mkvmuxerutil.{cc,h} // are really, for the great majority of cases, EBML size calculation and writer // functions, perhaps a more EBML focused utility would be the way to go as a // first step. const uint64 kEbmlUnknownValue = 0x01FFFFFFFFFFFFFFULL; const int64 kMaxBlockTimecode = 0x07FFFLL; // Writes out |value| in Big Endian order. Returns 0 on success. int32 SerializeInt(IMkvWriter* writer, int64 value, int32 size); // Writes out |f| in Big Endian order. Returns 0 on success. int32 SerializeFloat(IMkvWriter* writer, float f); // Returns the size in bytes of the element. int32 GetUIntSize(uint64 value); int32 GetIntSize(int64 value); int32 GetCodedUIntSize(uint64 value); uint64 EbmlMasterElementSize(uint64 type, uint64 value); uint64 EbmlElementSize(uint64 type, int64 value); uint64 EbmlElementSize(uint64 type, uint64 value); uint64 EbmlElementSize(uint64 type, float value); uint64 EbmlElementSize(uint64 type, const char* value); uint64 EbmlElementSize(uint64 type, const uint8* value, uint64 size); uint64 EbmlDateElementSize(uint64 type); // Returns the size in bytes of the element assuming that the element was // written using |fixed_size| bytes. If |fixed_size| is set to zero, then it // computes the necessary number of bytes based on |value|. uint64 EbmlElementSize(uint64 type, uint64 value, uint64 fixed_size); // Creates an EBML coded number from |value| and writes it out. The size of // the coded number is determined by the value of |value|. |value| must not // be in a coded form. Returns 0 on success. int32 WriteUInt(IMkvWriter* writer, uint64 value); // Creates an EBML coded number from |value| and writes it out. The size of // the coded number is determined by the value of |size|. |value| must not // be in a coded form. Returns 0 on success. int32 WriteUIntSize(IMkvWriter* writer, uint64 value, int32 size); // Output an Mkv master element. Returns true if the element was written. bool WriteEbmlMasterElement(IMkvWriter* writer, uint64 value, uint64 size); // Outputs an Mkv ID, calls |IMkvWriter::ElementStartNotify|, and passes the // ID to |SerializeInt|. Returns 0 on success. int32 WriteID(IMkvWriter* writer, uint64 type); // Output an Mkv non-master element. Returns true if the element was written. bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value); bool WriteEbmlElement(IMkvWriter* writer, uint64 type, int64 value); bool WriteEbmlElement(IMkvWriter* writer, uint64 type, float value); bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const char* value); bool WriteEbmlElement(IMkvWriter* writer, uint64 type, const uint8* value, uint64 size); bool WriteEbmlDateElement(IMkvWriter* writer, uint64 type, int64 value); // Output an Mkv non-master element using fixed size. The element will be // written out using exactly |fixed_size| bytes. If |fixed_size| is set to zero // then it computes the necessary number of bytes based on |value|. Returns true // if the element was written. bool WriteEbmlElement(IMkvWriter* writer, uint64 type, uint64 value, uint64 fixed_size); // Output a Mkv Frame. It decides the correct element to write (Block vs // SimpleBlock) based on the parameters of the Frame. uint64 WriteFrame(IMkvWriter* writer, const Frame* const frame, Cluster* cluster); // Output a void element. |size| must be the entire size in bytes that will be // void. The function will calculate the size of the void header and subtract // it from |size|. uint64 WriteVoidElement(IMkvWriter* writer, uint64 size); // Returns the version number of the muxer in |major|, |minor|, |build|, // and |revision|. void GetVersion(int32* major, int32* minor, int32* build, int32* revision); // Returns a random number to be used for UID, using |seed| to seed // the random-number generator (see POSIX rand_r() for semantics). uint64 MakeUID(unsigned int* seed); // Colour field validation helpers. All return true when |value| is valid. bool IsMatrixCoefficientsValueValid(uint64_t value); bool IsChromaSitingHorzValueValid(uint64_t value); bool IsChromaSitingVertValueValid(uint64_t value); bool IsColourRangeValueValid(uint64_t value); bool IsTransferCharacteristicsValueValid(uint64_t value); bool IsPrimariesValueValid(uint64_t value); } // namespace mkvmuxer #endif // MKVMUXER_MKVMUXERUTIL_H_ libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvwriter.cc000066400000000000000000000036551357355204000232340ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "mkvmuxer/mkvwriter.h" #include #ifdef _MSC_VER #include // for _SH_DENYWR #endif namespace mkvmuxer { MkvWriter::MkvWriter() : file_(NULL), writer_owns_file_(true) {} MkvWriter::MkvWriter(FILE* fp) : file_(fp), writer_owns_file_(false) {} MkvWriter::~MkvWriter() { Close(); } int32 MkvWriter::Write(const void* buffer, uint32 length) { if (!file_) return -1; if (length == 0) return 0; if (buffer == NULL) return -1; const size_t bytes_written = fwrite(buffer, 1, length, file_); return (bytes_written == length) ? 0 : -1; } bool MkvWriter::Open(const char* filename) { if (filename == NULL) return false; if (file_) return false; #ifdef _MSC_VER file_ = _fsopen(filename, "wb", _SH_DENYWR); #else file_ = fopen(filename, "wb"); #endif if (file_ == NULL) return false; return true; } void MkvWriter::Close() { if (file_ && writer_owns_file_) { fclose(file_); } file_ = NULL; } int64 MkvWriter::Position() const { if (!file_) return 0; #ifdef _MSC_VER return _ftelli64(file_); #else return ftell(file_); #endif } int32 MkvWriter::Position(int64 position) { if (!file_) return -1; #ifdef _MSC_VER return _fseeki64(file_, position, SEEK_SET); #elif defined(_WIN32) return fseeko64(file_, static_cast(position), SEEK_SET); #else return fseeko(file_, static_cast(position), SEEK_SET); #endif } bool MkvWriter::Seekable() const { return true; } void MkvWriter::ElementStartNotify(uint64, int64) {} } // namespace mkvmuxer libvpx-1.8.2/third_party/libwebm/mkvmuxer/mkvwriter.h000066400000000000000000000027021357355204000230660ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVMUXER_MKVWRITER_H_ #define MKVMUXER_MKVWRITER_H_ #include #include "mkvmuxer/mkvmuxer.h" #include "mkvmuxer/mkvmuxertypes.h" namespace mkvmuxer { // Default implementation of the IMkvWriter interface on Windows. class MkvWriter : public IMkvWriter { public: MkvWriter(); explicit MkvWriter(FILE* fp); virtual ~MkvWriter(); // IMkvWriter interface virtual int64 Position() const; virtual int32 Position(int64 position); virtual bool Seekable() const; virtual int32 Write(const void* buffer, uint32 length); virtual void ElementStartNotify(uint64 element_id, int64 position); // Creates and opens a file for writing. |filename| is the name of the file // to open. This function will overwrite the contents of |filename|. Returns // true on success. bool Open(const char* filename); // Closes an opened file. void Close(); private: // File handle to output file. FILE* file_; bool writer_owns_file_; LIBWEBM_DISALLOW_COPY_AND_ASSIGN(MkvWriter); }; } // namespace mkvmuxer #endif // MKVMUXER_MKVWRITER_H_ libvpx-1.8.2/third_party/libwebm/mkvparser/000077500000000000000000000000001357355204000210165ustar00rootroot00000000000000libvpx-1.8.2/third_party/libwebm/mkvparser/mkvparser.cc000066400000000000000000005710331357355204000233500ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "mkvparser/mkvparser.h" #if defined(_MSC_VER) && _MSC_VER < 1800 #include // _isnan() / _finite() #define MSC_COMPAT #endif #include #include #include #include #include #include #include #include "common/webmids.h" namespace mkvparser { const long long kStringElementSizeLimit = 20 * 1000 * 1000; const float MasteringMetadata::kValueNotPresent = FLT_MAX; const long long Colour::kValueNotPresent = LLONG_MAX; const float Projection::kValueNotPresent = FLT_MAX; #ifdef MSC_COMPAT inline bool isnan(double val) { return !!_isnan(val); } inline bool isinf(double val) { return !_finite(val); } #else inline bool isnan(double val) { return std::isnan(val); } inline bool isinf(double val) { return std::isinf(val); } #endif // MSC_COMPAT template Type* SafeArrayAlloc(unsigned long long num_elements, unsigned long long element_size) { if (num_elements == 0 || element_size == 0) return NULL; const size_t kMaxAllocSize = 0x80000000; // 2GiB const unsigned long long num_bytes = num_elements * element_size; if (element_size > (kMaxAllocSize / num_elements)) return NULL; if (num_bytes != static_cast(num_bytes)) return NULL; return new (std::nothrow) Type[static_cast(num_bytes)]; } void GetVersion(int& major, int& minor, int& build, int& revision) { major = 1; minor = 0; build = 0; revision = 30; } long long ReadUInt(IMkvReader* pReader, long long pos, long& len) { if (!pReader || pos < 0) return E_FILE_FORMAT_INVALID; len = 1; unsigned char b; int status = pReader->Read(pos, 1, &b); if (status < 0) // error or underflow return status; if (status > 0) // interpreted as "underflow" return E_BUFFER_NOT_FULL; if (b == 0) // we can't handle u-int values larger than 8 bytes return E_FILE_FORMAT_INVALID; unsigned char m = 0x80; while (!(b & m)) { m >>= 1; ++len; } long long result = b & (~m); ++pos; for (int i = 1; i < len; ++i) { status = pReader->Read(pos, 1, &b); if (status < 0) { len = 1; return status; } if (status > 0) { len = 1; return E_BUFFER_NOT_FULL; } result <<= 8; result |= b; ++pos; } return result; } // Reads an EBML ID and returns it. // An ID must at least 1 byte long, cannot exceed 4, and its value must be // greater than 0. // See known EBML values and EBMLMaxIDLength: // http://www.matroska.org/technical/specs/index.html // Returns the ID, or a value less than 0 to report an error while reading the // ID. long long ReadID(IMkvReader* pReader, long long pos, long& len) { if (pReader == NULL || pos < 0) return E_FILE_FORMAT_INVALID; // Read the first byte. The length in bytes of the ID is determined by // finding the first set bit in the first byte of the ID. unsigned char temp_byte = 0; int read_status = pReader->Read(pos, 1, &temp_byte); if (read_status < 0) return E_FILE_FORMAT_INVALID; else if (read_status > 0) // No data to read. return E_BUFFER_NOT_FULL; if (temp_byte == 0) // ID length > 8 bytes; invalid file. return E_FILE_FORMAT_INVALID; int bit_pos = 0; const int kMaxIdLengthInBytes = 4; const int kCheckByte = 0x80; // Find the first bit that's set. bool found_bit = false; for (; bit_pos < kMaxIdLengthInBytes; ++bit_pos) { if ((kCheckByte >> bit_pos) & temp_byte) { found_bit = true; break; } } if (!found_bit) { // The value is too large to be a valid ID. return E_FILE_FORMAT_INVALID; } // Read the remaining bytes of the ID (if any). const int id_length = bit_pos + 1; long long ebml_id = temp_byte; for (int i = 1; i < id_length; ++i) { ebml_id <<= 8; read_status = pReader->Read(pos + i, 1, &temp_byte); if (read_status < 0) return E_FILE_FORMAT_INVALID; else if (read_status > 0) return E_BUFFER_NOT_FULL; ebml_id |= temp_byte; } len = id_length; return ebml_id; } long long GetUIntLength(IMkvReader* pReader, long long pos, long& len) { if (!pReader || pos < 0) return E_FILE_FORMAT_INVALID; long long total, available; int status = pReader->Length(&total, &available); if (status < 0 || (total >= 0 && available > total)) return E_FILE_FORMAT_INVALID; len = 1; if (pos >= available) return pos; // too few bytes available unsigned char b; status = pReader->Read(pos, 1, &b); if (status != 0) return status; if (b == 0) // we can't handle u-int values larger than 8 bytes return E_FILE_FORMAT_INVALID; unsigned char m = 0x80; while (!(b & m)) { m >>= 1; ++len; } return 0; // success } // TODO(vigneshv): This function assumes that unsigned values never have their // high bit set. long long UnserializeUInt(IMkvReader* pReader, long long pos, long long size) { if (!pReader || pos < 0 || (size <= 0) || (size > 8)) return E_FILE_FORMAT_INVALID; long long result = 0; for (long long i = 0; i < size; ++i) { unsigned char b; const long status = pReader->Read(pos, 1, &b); if (status < 0) return status; result <<= 8; result |= b; ++pos; } return result; } long UnserializeFloat(IMkvReader* pReader, long long pos, long long size_, double& result) { if (!pReader || pos < 0 || ((size_ != 4) && (size_ != 8))) return E_FILE_FORMAT_INVALID; const long size = static_cast(size_); unsigned char buf[8]; const int status = pReader->Read(pos, size, buf); if (status < 0) // error return status; if (size == 4) { union { float f; unsigned long ff; }; ff = 0; for (int i = 0;;) { ff |= buf[i]; if (++i >= 4) break; ff <<= 8; } result = f; } else { union { double d; unsigned long long dd; }; dd = 0; for (int i = 0;;) { dd |= buf[i]; if (++i >= 8) break; dd <<= 8; } result = d; } if (mkvparser::isinf(result) || mkvparser::isnan(result)) return E_FILE_FORMAT_INVALID; return 0; } long UnserializeInt(IMkvReader* pReader, long long pos, long long size, long long& result_ref) { if (!pReader || pos < 0 || size < 1 || size > 8) return E_FILE_FORMAT_INVALID; signed char first_byte = 0; const long status = pReader->Read(pos, 1, (unsigned char*)&first_byte); if (status < 0) return status; unsigned long long result = first_byte; ++pos; for (long i = 1; i < size; ++i) { unsigned char b; const long status = pReader->Read(pos, 1, &b); if (status < 0) return status; result <<= 8; result |= b; ++pos; } result_ref = static_cast(result); return 0; } long UnserializeString(IMkvReader* pReader, long long pos, long long size, char*& str) { delete[] str; str = NULL; if (size >= LONG_MAX || size < 0 || size > kStringElementSizeLimit) return E_FILE_FORMAT_INVALID; // +1 for '\0' terminator const long required_size = static_cast(size) + 1; str = SafeArrayAlloc(1, required_size); if (str == NULL) return E_FILE_FORMAT_INVALID; unsigned char* const buf = reinterpret_cast(str); const long status = pReader->Read(pos, static_cast(size), buf); if (status) { delete[] str; str = NULL; return status; } str[required_size - 1] = '\0'; return 0; } long ParseElementHeader(IMkvReader* pReader, long long& pos, long long stop, long long& id, long long& size) { if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; long len; id = ReadID(pReader, pos, len); if (id < 0) return E_FILE_FORMAT_INVALID; pos += len; // consume id if (stop >= 0 && pos >= stop) return E_FILE_FORMAT_INVALID; size = ReadUInt(pReader, pos, len); if (size < 0 || len < 1 || len > 8) { // Invalid: Negative payload size, negative or 0 length integer, or integer // larger than 64 bits (libwebm cannot handle them). return E_FILE_FORMAT_INVALID; } // Avoid rolling over pos when very close to LLONG_MAX. const unsigned long long rollover_check = static_cast(pos) + len; if (rollover_check > LLONG_MAX) return E_FILE_FORMAT_INVALID; pos += len; // consume length of size // pos now designates payload if (stop >= 0 && pos > stop) return E_FILE_FORMAT_INVALID; return 0; // success } bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, long long& val) { if (!pReader || pos < 0) return false; long long total = 0; long long available = 0; const long status = pReader->Length(&total, &available); if (status < 0 || (total >= 0 && available > total)) return false; long len = 0; const long long id = ReadID(pReader, pos, len); if (id < 0 || (available - pos) > len) return false; if (static_cast(id) != expected_id) return false; pos += len; // consume id const long long size = ReadUInt(pReader, pos, len); if (size < 0 || size > 8 || len < 1 || len > 8 || (available - pos) > len) return false; pos += len; // consume length of size of payload val = UnserializeUInt(pReader, pos, size); if (val < 0) return false; pos += size; // consume size of payload return true; } bool Match(IMkvReader* pReader, long long& pos, unsigned long expected_id, unsigned char*& buf, size_t& buflen) { if (!pReader || pos < 0) return false; long long total = 0; long long available = 0; long status = pReader->Length(&total, &available); if (status < 0 || (total >= 0 && available > total)) return false; long len = 0; const long long id = ReadID(pReader, pos, len); if (id < 0 || (available - pos) > len) return false; if (static_cast(id) != expected_id) return false; pos += len; // consume id const long long size = ReadUInt(pReader, pos, len); if (size < 0 || len <= 0 || len > 8 || (available - pos) > len) return false; unsigned long long rollover_check = static_cast(pos) + len; if (rollover_check > LLONG_MAX) return false; pos += len; // consume length of size of payload rollover_check = static_cast(pos) + size; if (rollover_check > LLONG_MAX) return false; if ((pos + size) > available) return false; if (size >= LONG_MAX) return false; const long buflen_ = static_cast(size); buf = SafeArrayAlloc(1, buflen_); if (!buf) return false; status = pReader->Read(pos, buflen_, buf); if (status != 0) return false; buflen = buflen_; pos += size; // consume size of payload return true; } EBMLHeader::EBMLHeader() : m_docType(NULL) { Init(); } EBMLHeader::~EBMLHeader() { delete[] m_docType; } void EBMLHeader::Init() { m_version = 1; m_readVersion = 1; m_maxIdLength = 4; m_maxSizeLength = 8; if (m_docType) { delete[] m_docType; m_docType = NULL; } m_docTypeVersion = 1; m_docTypeReadVersion = 1; } long long EBMLHeader::Parse(IMkvReader* pReader, long long& pos) { if (!pReader) return E_FILE_FORMAT_INVALID; long long total, available; long status = pReader->Length(&total, &available); if (status < 0) // error return status; pos = 0; // Scan until we find what looks like the first byte of the EBML header. const long long kMaxScanBytes = (available >= 1024) ? 1024 : available; const unsigned char kEbmlByte0 = 0x1A; unsigned char scan_byte = 0; while (pos < kMaxScanBytes) { status = pReader->Read(pos, 1, &scan_byte); if (status < 0) // error return status; else if (status > 0) return E_BUFFER_NOT_FULL; if (scan_byte == kEbmlByte0) break; ++pos; } long len = 0; const long long ebml_id = ReadID(pReader, pos, len); if (ebml_id == E_BUFFER_NOT_FULL) return E_BUFFER_NOT_FULL; if (len != 4 || ebml_id != libwebm::kMkvEBML) return E_FILE_FORMAT_INVALID; // Move read pos forward to the EBML header size field. pos += 4; // Read length of size field. long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return E_FILE_FORMAT_INVALID; else if (result > 0) // need more data return E_BUFFER_NOT_FULL; if (len < 1 || len > 8) return E_FILE_FORMAT_INVALID; if ((total >= 0) && ((total - pos) < len)) return E_FILE_FORMAT_INVALID; if ((available - pos) < len) return pos + len; // try again later // Read the EBML header size. result = ReadUInt(pReader, pos, len); if (result < 0) // error return result; pos += len; // consume size field // pos now designates start of payload if ((total >= 0) && ((total - pos) < result)) return E_FILE_FORMAT_INVALID; if ((available - pos) < result) return pos + result; const long long end = pos + result; Init(); while (pos < end) { long long id, size; status = ParseElementHeader(pReader, pos, end, id, size); if (status < 0) // error return status; if (size == 0) return E_FILE_FORMAT_INVALID; if (id == libwebm::kMkvEBMLVersion) { m_version = UnserializeUInt(pReader, pos, size); if (m_version <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvEBMLReadVersion) { m_readVersion = UnserializeUInt(pReader, pos, size); if (m_readVersion <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvEBMLMaxIDLength) { m_maxIdLength = UnserializeUInt(pReader, pos, size); if (m_maxIdLength <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvEBMLMaxSizeLength) { m_maxSizeLength = UnserializeUInt(pReader, pos, size); if (m_maxSizeLength <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDocType) { if (m_docType) return E_FILE_FORMAT_INVALID; status = UnserializeString(pReader, pos, size, m_docType); if (status) // error return status; } else if (id == libwebm::kMkvDocTypeVersion) { m_docTypeVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeVersion <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDocTypeReadVersion) { m_docTypeReadVersion = UnserializeUInt(pReader, pos, size); if (m_docTypeReadVersion <= 0) return E_FILE_FORMAT_INVALID; } pos += size; } if (pos != end) return E_FILE_FORMAT_INVALID; // Make sure DocType, DocTypeReadVersion, and DocTypeVersion are valid. if (m_docType == NULL || m_docTypeReadVersion <= 0 || m_docTypeVersion <= 0) return E_FILE_FORMAT_INVALID; // Make sure EBMLMaxIDLength and EBMLMaxSizeLength are valid. if (m_maxIdLength <= 0 || m_maxIdLength > 4 || m_maxSizeLength <= 0 || m_maxSizeLength > 8) return E_FILE_FORMAT_INVALID; return 0; } Segment::Segment(IMkvReader* pReader, long long elem_start, // long long elem_size, long long start, long long size) : m_pReader(pReader), m_element_start(elem_start), // m_element_size(elem_size), m_start(start), m_size(size), m_pos(start), m_pUnknownSize(0), m_pSeekHead(NULL), m_pInfo(NULL), m_pTracks(NULL), m_pCues(NULL), m_pChapters(NULL), m_pTags(NULL), m_clusters(NULL), m_clusterCount(0), m_clusterPreloadCount(0), m_clusterSize(0) {} Segment::~Segment() { const long count = m_clusterCount + m_clusterPreloadCount; Cluster** i = m_clusters; Cluster** j = m_clusters + count; while (i != j) { Cluster* const p = *i++; delete p; } delete[] m_clusters; delete m_pTracks; delete m_pInfo; delete m_pCues; delete m_pChapters; delete m_pTags; delete m_pSeekHead; } long long Segment::CreateInstance(IMkvReader* pReader, long long pos, Segment*& pSegment) { if (pReader == NULL || pos < 0) return E_PARSE_FAILED; pSegment = NULL; long long total, available; const long status = pReader->Length(&total, &available); if (status < 0) // error return status; if (available < 0) return -1; if ((total >= 0) && (available > total)) return -1; // I would assume that in practice this loop would execute // exactly once, but we allow for other elements (e.g. Void) // to immediately follow the EBML header. This is fine for // the source filter case (since the entire file is available), // but in the splitter case over a network we should probably // just give up early. We could for example decide only to // execute this loop a maximum of, say, 10 times. // TODO: // There is an implied "give up early" by only parsing up // to the available limit. We do do that, but only if the // total file size is unknown. We could decide to always // use what's available as our limit (irrespective of whether // we happen to know the total file length). This would have // as its sense "parse this much of the file before giving up", // which a slightly different sense from "try to parse up to // 10 EMBL elements before giving up". for (;;) { if ((total >= 0) && (pos >= total)) return E_FILE_FORMAT_INVALID; // Read ID long len; long long result = GetUIntLength(pReader, pos, len); if (result) // error, or too few available bytes return result; if ((total >= 0) && ((pos + len) > total)) return E_FILE_FORMAT_INVALID; if ((pos + len) > available) return pos + len; const long long idpos = pos; const long long id = ReadID(pReader, pos, len); if (id < 0) return E_FILE_FORMAT_INVALID; pos += len; // consume ID // Read Size result = GetUIntLength(pReader, pos, len); if (result) // error, or too few available bytes return result; if ((total >= 0) && ((pos + len) > total)) return E_FILE_FORMAT_INVALID; if ((pos + len) > available) return pos + len; long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return size; pos += len; // consume length of size of element // Pos now points to start of payload // Handle "unknown size" for live streaming of webm files. const long long unknown_size = (1LL << (7 * len)) - 1; if (id == libwebm::kMkvSegment) { if (size == unknown_size) size = -1; else if (total < 0) size = -1; else if ((pos + size) > total) size = -1; pSegment = new (std::nothrow) Segment(pReader, idpos, pos, size); if (pSegment == NULL) return E_PARSE_FAILED; return 0; // success } if (size == unknown_size) return E_FILE_FORMAT_INVALID; if ((total >= 0) && ((pos + size) > total)) return E_FILE_FORMAT_INVALID; if ((pos + size) > available) return pos + size; pos += size; // consume payload } } long long Segment::ParseHeaders() { // Outermost (level 0) segment object has been constructed, // and pos designates start of payload. We need to find the // inner (level 1) elements. long long total, available; const int status = m_pReader->Length(&total, &available); if (status < 0) // error return status; if (total > 0 && available > total) return E_FILE_FORMAT_INVALID; const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; if ((segment_stop >= 0 && total >= 0 && segment_stop > total) || (segment_stop >= 0 && m_pos > segment_stop)) { return E_FILE_FORMAT_INVALID; } for (;;) { if ((total >= 0) && (m_pos >= total)) break; if ((segment_stop >= 0) && (m_pos >= segment_stop)) break; long long pos = m_pos; const long long element_start = pos; // Avoid rolling over pos when very close to LLONG_MAX. unsigned long long rollover_check = pos + 1ULL; if (rollover_check > LLONG_MAX) return E_FILE_FORMAT_INVALID; if ((pos + 1) > available) return (pos + 1); long len; long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return result; if (result > 0) { // MkvReader doesn't have enough data to satisfy this read attempt. return (pos + 1); } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > available) return pos + len; const long long idpos = pos; const long long id = ReadID(m_pReader, idpos, len); if (id < 0) return E_FILE_FORMAT_INVALID; if (id == libwebm::kMkvCluster) break; pos += len; // consume ID if ((pos + 1) > available) return (pos + 1); // Read Size result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return result; if (result > 0) { // MkvReader doesn't have enough data to satisfy this read attempt. return (pos + 1); } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > available) return pos + len; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0 || len < 1 || len > 8) { // TODO(tomfinegan): ReadUInt should return an error when len is < 1 or // len > 8 is true instead of checking this _everywhere_. return size; } pos += len; // consume length of size of element // Avoid rolling over pos when very close to LLONG_MAX. rollover_check = static_cast(pos) + size; if (rollover_check > LLONG_MAX) return E_FILE_FORMAT_INVALID; const long long element_size = size + pos - element_start; // Pos now points to start of payload if ((segment_stop >= 0) && ((pos + size) > segment_stop)) return E_FILE_FORMAT_INVALID; // We read EBML elements either in total or nothing at all. if ((pos + size) > available) return pos + size; if (id == libwebm::kMkvInfo) { if (m_pInfo) return E_FILE_FORMAT_INVALID; m_pInfo = new (std::nothrow) SegmentInfo(this, pos, size, element_start, element_size); if (m_pInfo == NULL) return -1; const long status = m_pInfo->Parse(); if (status) return status; } else if (id == libwebm::kMkvTracks) { if (m_pTracks) return E_FILE_FORMAT_INVALID; m_pTracks = new (std::nothrow) Tracks(this, pos, size, element_start, element_size); if (m_pTracks == NULL) return -1; const long status = m_pTracks->Parse(); if (status) return status; } else if (id == libwebm::kMkvCues) { if (m_pCues == NULL) { m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); if (m_pCues == NULL) return -1; } } else if (id == libwebm::kMkvSeekHead) { if (m_pSeekHead == NULL) { m_pSeekHead = new (std::nothrow) SeekHead(this, pos, size, element_start, element_size); if (m_pSeekHead == NULL) return -1; const long status = m_pSeekHead->Parse(); if (status) return status; } } else if (id == libwebm::kMkvChapters) { if (m_pChapters == NULL) { m_pChapters = new (std::nothrow) Chapters(this, pos, size, element_start, element_size); if (m_pChapters == NULL) return -1; const long status = m_pChapters->Parse(); if (status) return status; } } else if (id == libwebm::kMkvTags) { if (m_pTags == NULL) { m_pTags = new (std::nothrow) Tags(this, pos, size, element_start, element_size); if (m_pTags == NULL) return -1; const long status = m_pTags->Parse(); if (status) return status; } } m_pos = pos + size; // consume payload } if (segment_stop >= 0 && m_pos > segment_stop) return E_FILE_FORMAT_INVALID; if (m_pInfo == NULL) // TODO: liberalize this behavior return E_FILE_FORMAT_INVALID; if (m_pTracks == NULL) return E_FILE_FORMAT_INVALID; return 0; // success } long Segment::LoadCluster(long long& pos, long& len) { for (;;) { const long result = DoLoadCluster(pos, len); if (result <= 1) return result; } } long Segment::DoLoadCluster(long long& pos, long& len) { if (m_pos < 0) return DoLoadClusterUnknownSize(pos, len); long long total, avail; long status = m_pReader->Length(&total, &avail); if (status < 0) // error return status; if (total >= 0 && avail > total) return E_FILE_FORMAT_INVALID; const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; long long cluster_off = -1; // offset relative to start of segment long long cluster_size = -1; // size of cluster payload for (;;) { if ((total >= 0) && (m_pos >= total)) return 1; // no more clusters if ((segment_stop >= 0) && (m_pos >= segment_stop)) return 1; // no more clusters pos = m_pos; // Read ID if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long idpos = pos; const long long id = ReadID(m_pReader, idpos, len); if (id < 0) return E_FILE_FORMAT_INVALID; pos += len; // consume ID // Read Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume length of size of element // pos now points to start of payload if (size == 0) { // Missing element payload: move on. m_pos = pos; continue; } const long long unknown_size = (1LL << (7 * len)) - 1; if ((segment_stop >= 0) && (size != unknown_size) && ((pos + size) > segment_stop)) { return E_FILE_FORMAT_INVALID; } if (id == libwebm::kMkvCues) { if (size == unknown_size) { // Cues element of unknown size: Not supported. return E_FILE_FORMAT_INVALID; } if (m_pCues == NULL) { const long long element_size = (pos - idpos) + size; m_pCues = new (std::nothrow) Cues(this, pos, size, idpos, element_size); if (m_pCues == NULL) return -1; } m_pos = pos + size; // consume payload continue; } if (id != libwebm::kMkvCluster) { // Besides the Segment, Libwebm allows only cluster elements of unknown // size. Fail the parse upon encountering a non-cluster element reporting // unknown size. if (size == unknown_size) return E_FILE_FORMAT_INVALID; m_pos = pos + size; // consume payload continue; } // We have a cluster. cluster_off = idpos - m_start; // relative pos if (size != unknown_size) cluster_size = size; break; } if (cluster_off < 0) { // No cluster, die. return E_FILE_FORMAT_INVALID; } long long pos_; long len_; status = Cluster::HasBlockEntries(this, cluster_off, pos_, len_); if (status < 0) { // error, or underflow pos = pos_; len = len_; return status; } // status == 0 means "no block entries found" // status > 0 means "found at least one block entry" // TODO: // The issue here is that the segment increments its own // pos ptr past the most recent cluster parsed, and then // starts from there to parse the next cluster. If we // don't know the size of the current cluster, then we // must either parse its payload (as we do below), looking // for the cluster (or cues) ID to terminate the parse. // This isn't really what we want: rather, we really need // a way to create the curr cluster object immediately. // The pity is that cluster::parse can determine its own // boundary, and we largely duplicate that same logic here. // // Maybe we need to get rid of our look-ahead preloading // in source::parse??? // // As we're parsing the blocks in the curr cluster //(in cluster::parse), we should have some way to signal // to the segment that we have determined the boundary, // so it can adjust its own segment::m_pos member. // // The problem is that we're asserting in asyncreadinit, // because we adjust the pos down to the curr seek pos, // and the resulting adjusted len is > 2GB. I'm suspicious // that this is even correct, but even if it is, we can't // be loading that much data in the cache anyway. const long idx = m_clusterCount; if (m_clusterPreloadCount > 0) { if (idx >= m_clusterSize) return E_FILE_FORMAT_INVALID; Cluster* const pCluster = m_clusters[idx]; if (pCluster == NULL || pCluster->m_index >= 0) return E_FILE_FORMAT_INVALID; const long long off = pCluster->GetPosition(); if (off < 0) return E_FILE_FORMAT_INVALID; if (off == cluster_off) { // preloaded already if (status == 0) // no entries found return E_FILE_FORMAT_INVALID; if (cluster_size >= 0) pos += cluster_size; else { const long long element_size = pCluster->GetElementSize(); if (element_size <= 0) return E_FILE_FORMAT_INVALID; // TODO: handle this case pos = pCluster->m_element_start + element_size; } pCluster->m_index = idx; // move from preloaded to loaded ++m_clusterCount; --m_clusterPreloadCount; m_pos = pos; // consume payload if (segment_stop >= 0 && m_pos > segment_stop) return E_FILE_FORMAT_INVALID; return 0; // success } } if (status == 0) { // no entries found if (cluster_size >= 0) pos += cluster_size; if ((total >= 0) && (pos >= total)) { m_pos = total; return 1; // no more clusters } if ((segment_stop >= 0) && (pos >= segment_stop)) { m_pos = segment_stop; return 1; // no more clusters } m_pos = pos; return 2; // try again } // status > 0 means we have an entry Cluster* const pCluster = Cluster::Create(this, idx, cluster_off); if (pCluster == NULL) return -1; if (!AppendCluster(pCluster)) { delete pCluster; return -1; } if (cluster_size >= 0) { pos += cluster_size; m_pos = pos; if (segment_stop > 0 && m_pos > segment_stop) return E_FILE_FORMAT_INVALID; return 0; } m_pUnknownSize = pCluster; m_pos = -pos; return 0; // partial success, since we have a new cluster // status == 0 means "no block entries found" // pos designates start of payload // m_pos has NOT been adjusted yet (in case we need to come back here) } long Segment::DoLoadClusterUnknownSize(long long& pos, long& len) { if (m_pos >= 0 || m_pUnknownSize == NULL) return E_PARSE_FAILED; const long status = m_pUnknownSize->Parse(pos, len); if (status < 0) // error or underflow return status; if (status == 0) // parsed a block return 2; // continue parsing const long long start = m_pUnknownSize->m_element_start; const long long size = m_pUnknownSize->GetElementSize(); if (size < 0) return E_FILE_FORMAT_INVALID; pos = start + size; m_pos = pos; m_pUnknownSize = 0; return 2; // continue parsing } bool Segment::AppendCluster(Cluster* pCluster) { if (pCluster == NULL || pCluster->m_index < 0) return false; const long count = m_clusterCount + m_clusterPreloadCount; long& size = m_clusterSize; const long idx = pCluster->m_index; if (size < count || idx != m_clusterCount) return false; if (count >= size) { const long n = (size <= 0) ? 2048 : 2 * size; Cluster** const qq = new (std::nothrow) Cluster*[n]; if (qq == NULL) return false; Cluster** q = qq; Cluster** p = m_clusters; Cluster** const pp = p + count; while (p != pp) *q++ = *p++; delete[] m_clusters; m_clusters = qq; size = n; } if (m_clusterPreloadCount > 0) { Cluster** const p = m_clusters + m_clusterCount; if (*p == NULL || (*p)->m_index >= 0) return false; Cluster** q = p + m_clusterPreloadCount; if (q >= (m_clusters + size)) return false; for (;;) { Cluster** const qq = q - 1; if ((*qq)->m_index >= 0) return false; *q = *qq; q = qq; if (q == p) break; } } m_clusters[idx] = pCluster; ++m_clusterCount; return true; } bool Segment::PreloadCluster(Cluster* pCluster, ptrdiff_t idx) { if (pCluster == NULL || pCluster->m_index >= 0 || idx < m_clusterCount) return false; const long count = m_clusterCount + m_clusterPreloadCount; long& size = m_clusterSize; if (size < count) return false; if (count >= size) { const long n = (size <= 0) ? 2048 : 2 * size; Cluster** const qq = new (std::nothrow) Cluster*[n]; if (qq == NULL) return false; Cluster** q = qq; Cluster** p = m_clusters; Cluster** const pp = p + count; while (p != pp) *q++ = *p++; delete[] m_clusters; m_clusters = qq; size = n; } if (m_clusters == NULL) return false; Cluster** const p = m_clusters + idx; Cluster** q = m_clusters + count; if (q < p || q >= (m_clusters + size)) return false; while (q > p) { Cluster** const qq = q - 1; if ((*qq)->m_index >= 0) return false; *q = *qq; q = qq; } m_clusters[idx] = pCluster; ++m_clusterPreloadCount; return true; } long Segment::Load() { if (m_clusters != NULL || m_clusterSize != 0 || m_clusterCount != 0) return E_PARSE_FAILED; // Outermost (level 0) segment object has been constructed, // and pos designates start of payload. We need to find the // inner (level 1) elements. const long long header_status = ParseHeaders(); if (header_status < 0) // error return static_cast(header_status); if (header_status > 0) // underflow return E_BUFFER_NOT_FULL; if (m_pInfo == NULL || m_pTracks == NULL) return E_FILE_FORMAT_INVALID; for (;;) { const long status = LoadCluster(); if (status < 0) // error return status; if (status >= 1) // no more clusters return 0; } } SeekHead::Entry::Entry() : id(0), pos(0), element_start(0), element_size(0) {} SeekHead::SeekHead(Segment* pSegment, long long start, long long size_, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(start), m_size(size_), m_element_start(element_start), m_element_size(element_size), m_entries(0), m_entry_count(0), m_void_elements(0), m_void_element_count(0) {} SeekHead::~SeekHead() { delete[] m_entries; delete[] m_void_elements; } long SeekHead::Parse() { IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = m_start; const long long stop = m_start + m_size; // first count the seek head entries int entry_count = 0; int void_element_count = 0; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvSeek) ++entry_count; else if (id == libwebm::kMkvVoid) ++void_element_count; pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; if (entry_count > 0) { m_entries = new (std::nothrow) Entry[entry_count]; if (m_entries == NULL) return -1; } if (void_element_count > 0) { m_void_elements = new (std::nothrow) VoidElement[void_element_count]; if (m_void_elements == NULL) return -1; } // now parse the entries and void elements Entry* pEntry = m_entries; VoidElement* pVoidElement = m_void_elements; pos = m_start; while (pos < stop) { const long long idpos = pos; long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvSeek && entry_count > 0) { if (ParseEntry(pReader, pos, size, pEntry)) { Entry& e = *pEntry++; e.element_start = idpos; e.element_size = (pos + size) - idpos; } } else if (id == libwebm::kMkvVoid && void_element_count > 0) { VoidElement& e = *pVoidElement++; e.element_start = idpos; e.element_size = (pos + size) - idpos; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; ptrdiff_t count_ = ptrdiff_t(pEntry - m_entries); assert(count_ >= 0); assert(count_ <= entry_count); m_entry_count = static_cast(count_); count_ = ptrdiff_t(pVoidElement - m_void_elements); assert(count_ >= 0); assert(count_ <= void_element_count); m_void_element_count = static_cast(count_); return 0; } int SeekHead::GetCount() const { return m_entry_count; } const SeekHead::Entry* SeekHead::GetEntry(int idx) const { if (idx < 0) return 0; if (idx >= m_entry_count) return 0; return m_entries + idx; } int SeekHead::GetVoidElementCount() const { return m_void_element_count; } const SeekHead::VoidElement* SeekHead::GetVoidElement(int idx) const { if (idx < 0) return 0; if (idx >= m_void_element_count) return 0; return m_void_elements + idx; } long Segment::ParseCues(long long off, long long& pos, long& len) { if (m_pCues) return 0; // success if (off < 0) return -1; long long total, avail; const int status = m_pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); pos = m_start + off; if ((total < 0) || (pos >= total)) return 1; // don't bother parsing cues const long long element_start = pos; const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // underflow (weird) { len = 1; return E_BUFFER_NOT_FULL; } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long idpos = pos; const long long id = ReadID(m_pReader, idpos, len); if (id != libwebm::kMkvCues) return E_FILE_FORMAT_INVALID; pos += len; // consume ID assert((segment_stop < 0) || (pos <= segment_stop)); // Read Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // underflow (weird) { len = 1; return E_BUFFER_NOT_FULL; } if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0) // error return static_cast(size); if (size == 0) // weird, although technically not illegal return 1; // done pos += len; // consume length of size of element assert((segment_stop < 0) || (pos <= segment_stop)); // Pos now points to start of payload const long long element_stop = pos + size; if ((segment_stop >= 0) && (element_stop > segment_stop)) return E_FILE_FORMAT_INVALID; if ((total >= 0) && (element_stop > total)) return 1; // don't bother parsing anymore len = static_cast(size); if (element_stop > avail) return E_BUFFER_NOT_FULL; const long long element_size = element_stop - element_start; m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); if (m_pCues == NULL) return -1; return 0; // success } bool SeekHead::ParseEntry(IMkvReader* pReader, long long start, long long size_, Entry* pEntry) { if (size_ <= 0) return false; long long pos = start; const long long stop = start + size_; long len; // parse the container for the level-1 element ID const long long seekIdId = ReadID(pReader, pos, len); if (seekIdId < 0) return false; if (seekIdId != libwebm::kMkvSeekID) return false; if ((pos + len) > stop) return false; pos += len; // consume SeekID id const long long seekIdSize = ReadUInt(pReader, pos, len); if (seekIdSize <= 0) return false; if ((pos + len) > stop) return false; pos += len; // consume size of field if ((pos + seekIdSize) > stop) return false; pEntry->id = ReadID(pReader, pos, len); // payload if (pEntry->id <= 0) return false; if (len != seekIdSize) return false; pos += seekIdSize; // consume SeekID payload const long long seekPosId = ReadID(pReader, pos, len); if (seekPosId != libwebm::kMkvSeekPosition) return false; if ((pos + len) > stop) return false; pos += len; // consume id const long long seekPosSize = ReadUInt(pReader, pos, len); if (seekPosSize <= 0) return false; if ((pos + len) > stop) return false; pos += len; // consume size if ((pos + seekPosSize) > stop) return false; pEntry->pos = UnserializeUInt(pReader, pos, seekPosSize); if (pEntry->pos < 0) return false; pos += seekPosSize; // consume payload if (pos != stop) return false; return true; } Cues::Cues(Segment* pSegment, long long start_, long long size_, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(start_), m_size(size_), m_element_start(element_start), m_element_size(element_size), m_cue_points(NULL), m_count(0), m_preload_count(0), m_pos(start_) {} Cues::~Cues() { const long n = m_count + m_preload_count; CuePoint** p = m_cue_points; CuePoint** const q = p + n; while (p != q) { CuePoint* const pCP = *p++; assert(pCP); delete pCP; } delete[] m_cue_points; } long Cues::GetCount() const { if (m_cue_points == NULL) return -1; return m_count; // TODO: really ignore preload count? } bool Cues::DoneParsing() const { const long long stop = m_start + m_size; return (m_pos >= stop); } bool Cues::Init() const { if (m_cue_points) return true; if (m_count != 0 || m_preload_count != 0) return false; IMkvReader* const pReader = m_pSegment->m_pReader; const long long stop = m_start + m_size; long long pos = m_start; long cue_points_size = 0; while (pos < stop) { const long long idpos = pos; long len; const long long id = ReadID(pReader, pos, len); if (id < 0 || (pos + len) > stop) { return false; } pos += len; // consume ID const long long size = ReadUInt(pReader, pos, len); if (size < 0 || (pos + len > stop)) { return false; } pos += len; // consume Size field if (pos + size > stop) { return false; } if (id == libwebm::kMkvCuePoint) { if (!PreloadCuePoint(cue_points_size, idpos)) return false; } pos += size; // skip payload } return true; } bool Cues::PreloadCuePoint(long& cue_points_size, long long pos) const { if (m_count != 0) return false; if (m_preload_count >= cue_points_size) { const long n = (cue_points_size <= 0) ? 2048 : 2 * cue_points_size; CuePoint** const qq = new (std::nothrow) CuePoint*[n]; if (qq == NULL) return false; CuePoint** q = qq; // beginning of target CuePoint** p = m_cue_points; // beginning of source CuePoint** const pp = p + m_preload_count; // end of source while (p != pp) *q++ = *p++; delete[] m_cue_points; m_cue_points = qq; cue_points_size = n; } CuePoint* const pCP = new (std::nothrow) CuePoint(m_preload_count, pos); if (pCP == NULL) return false; m_cue_points[m_preload_count++] = pCP; return true; } bool Cues::LoadCuePoint() const { const long long stop = m_start + m_size; if (m_pos >= stop) return false; // nothing else to do if (!Init()) { m_pos = stop; return false; } IMkvReader* const pReader = m_pSegment->m_pReader; while (m_pos < stop) { const long long idpos = m_pos; long len; const long long id = ReadID(pReader, m_pos, len); if (id < 0 || (m_pos + len) > stop) return false; m_pos += len; // consume ID const long long size = ReadUInt(pReader, m_pos, len); if (size < 0 || (m_pos + len) > stop) return false; m_pos += len; // consume Size field if ((m_pos + size) > stop) return false; if (id != libwebm::kMkvCuePoint) { m_pos += size; // consume payload if (m_pos > stop) return false; continue; } if (m_preload_count < 1) return false; CuePoint* const pCP = m_cue_points[m_count]; if (!pCP || (pCP->GetTimeCode() < 0 && (-pCP->GetTimeCode() != idpos))) return false; if (!pCP->Load(pReader)) { m_pos = stop; return false; } ++m_count; --m_preload_count; m_pos += size; // consume payload if (m_pos > stop) return false; return true; // yes, we loaded a cue point } return false; // no, we did not load a cue point } bool Cues::Find(long long time_ns, const Track* pTrack, const CuePoint*& pCP, const CuePoint::TrackPosition*& pTP) const { if (time_ns < 0 || pTrack == NULL || m_cue_points == NULL || m_count == 0) return false; CuePoint** const ii = m_cue_points; CuePoint** i = ii; CuePoint** const jj = ii + m_count; CuePoint** j = jj; pCP = *i; if (pCP == NULL) return false; if (time_ns <= pCP->GetTime(m_pSegment)) { pTP = pCP->Find(pTrack); return (pTP != NULL); } while (i < j) { // INVARIANT: //[ii, i) <= time_ns //[i, j) ? //[j, jj) > time_ns CuePoint** const k = i + (j - i) / 2; if (k >= jj) return false; CuePoint* const pCP = *k; if (pCP == NULL) return false; const long long t = pCP->GetTime(m_pSegment); if (t <= time_ns) i = k + 1; else j = k; if (i > j) return false; } if (i != j || i > jj || i <= ii) return false; pCP = *--i; if (pCP == NULL || pCP->GetTime(m_pSegment) > time_ns) return false; // TODO: here and elsewhere, it's probably not correct to search // for the cue point with this time, and then search for a matching // track. In principle, the matching track could be on some earlier // cue point, and with our current algorithm, we'd miss it. To make // this bullet-proof, we'd need to create a secondary structure, // with a list of cue points that apply to a track, and then search // that track-based structure for a matching cue point. pTP = pCP->Find(pTrack); return (pTP != NULL); } const CuePoint* Cues::GetFirst() const { if (m_cue_points == NULL || m_count == 0) return NULL; CuePoint* const* const pp = m_cue_points; if (pp == NULL) return NULL; CuePoint* const pCP = pp[0]; if (pCP == NULL || pCP->GetTimeCode() < 0) return NULL; return pCP; } const CuePoint* Cues::GetLast() const { if (m_cue_points == NULL || m_count <= 0) return NULL; const long index = m_count - 1; CuePoint* const* const pp = m_cue_points; if (pp == NULL) return NULL; CuePoint* const pCP = pp[index]; if (pCP == NULL || pCP->GetTimeCode() < 0) return NULL; return pCP; } const CuePoint* Cues::GetNext(const CuePoint* pCurr) const { if (pCurr == NULL || pCurr->GetTimeCode() < 0 || m_cue_points == NULL || m_count < 1) { return NULL; } long index = pCurr->m_index; if (index >= m_count) return NULL; CuePoint* const* const pp = m_cue_points; if (pp == NULL || pp[index] != pCurr) return NULL; ++index; if (index >= m_count) return NULL; CuePoint* const pNext = pp[index]; if (pNext == NULL || pNext->GetTimeCode() < 0) return NULL; return pNext; } const BlockEntry* Cues::GetBlock(const CuePoint* pCP, const CuePoint::TrackPosition* pTP) const { if (pCP == NULL || pTP == NULL) return NULL; return m_pSegment->GetBlock(*pCP, *pTP); } const BlockEntry* Segment::GetBlock(const CuePoint& cp, const CuePoint::TrackPosition& tp) { Cluster** const ii = m_clusters; Cluster** i = ii; const long count = m_clusterCount + m_clusterPreloadCount; Cluster** const jj = ii + count; Cluster** j = jj; while (i < j) { // INVARIANT: //[ii, i) < pTP->m_pos //[i, j) ? //[j, jj) > pTP->m_pos Cluster** const k = i + (j - i) / 2; assert(k < jj); Cluster* const pCluster = *k; assert(pCluster); // const long long pos_ = pCluster->m_pos; // assert(pos_); // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); const long long pos = pCluster->GetPosition(); assert(pos >= 0); if (pos < tp.m_pos) i = k + 1; else if (pos > tp.m_pos) j = k; else return pCluster->GetEntry(cp, tp); } assert(i == j); // assert(Cluster::HasBlockEntries(this, tp.m_pos)); Cluster* const pCluster = Cluster::Create(this, -1, tp.m_pos); //, -1); if (pCluster == NULL) return NULL; const ptrdiff_t idx = i - m_clusters; if (!PreloadCluster(pCluster, idx)) { delete pCluster; return NULL; } assert(m_clusters); assert(m_clusterPreloadCount > 0); assert(m_clusters[idx] == pCluster); return pCluster->GetEntry(cp, tp); } const Cluster* Segment::FindOrPreloadCluster(long long requested_pos) { if (requested_pos < 0) return 0; Cluster** const ii = m_clusters; Cluster** i = ii; const long count = m_clusterCount + m_clusterPreloadCount; Cluster** const jj = ii + count; Cluster** j = jj; while (i < j) { // INVARIANT: //[ii, i) < pTP->m_pos //[i, j) ? //[j, jj) > pTP->m_pos Cluster** const k = i + (j - i) / 2; assert(k < jj); Cluster* const pCluster = *k; assert(pCluster); // const long long pos_ = pCluster->m_pos; // assert(pos_); // const long long pos = pos_ * ((pos_ < 0) ? -1 : 1); const long long pos = pCluster->GetPosition(); assert(pos >= 0); if (pos < requested_pos) i = k + 1; else if (pos > requested_pos) j = k; else return pCluster; } assert(i == j); // assert(Cluster::HasBlockEntries(this, tp.m_pos)); Cluster* const pCluster = Cluster::Create(this, -1, requested_pos); if (pCluster == NULL) return NULL; const ptrdiff_t idx = i - m_clusters; if (!PreloadCluster(pCluster, idx)) { delete pCluster; return NULL; } assert(m_clusters); assert(m_clusterPreloadCount > 0); assert(m_clusters[idx] == pCluster); return pCluster; } CuePoint::CuePoint(long idx, long long pos) : m_element_start(0), m_element_size(0), m_index(idx), m_timecode(-1 * pos), m_track_positions(NULL), m_track_positions_count(0) { assert(pos > 0); } CuePoint::~CuePoint() { delete[] m_track_positions; } bool CuePoint::Load(IMkvReader* pReader) { // odbgstream os; // os << "CuePoint::Load(begin): timecode=" << m_timecode << endl; if (m_timecode >= 0) // already loaded return true; assert(m_track_positions == NULL); assert(m_track_positions_count == 0); long long pos_ = -m_timecode; const long long element_start = pos_; long long stop; { long len; const long long id = ReadID(pReader, pos_, len); if (id != libwebm::kMkvCuePoint) return false; pos_ += len; // consume ID const long long size = ReadUInt(pReader, pos_, len); assert(size >= 0); pos_ += len; // consume Size field // pos_ now points to start of payload stop = pos_ + size; } const long long element_size = stop - element_start; long long pos = pos_; // First count number of track positions while (pos < stop) { long len; const long long id = ReadID(pReader, pos, len); if ((id < 0) || (pos + len > stop)) { return false; } pos += len; // consume ID const long long size = ReadUInt(pReader, pos, len); if ((size < 0) || (pos + len > stop)) { return false; } pos += len; // consume Size field if ((pos + size) > stop) { return false; } if (id == libwebm::kMkvCueTime) m_timecode = UnserializeUInt(pReader, pos, size); else if (id == libwebm::kMkvCueTrackPositions) ++m_track_positions_count; pos += size; // consume payload } if (m_timecode < 0 || m_track_positions_count <= 0) { return false; } // os << "CuePoint::Load(cont'd): idpos=" << idpos // << " timecode=" << m_timecode // << endl; m_track_positions = new (std::nothrow) TrackPosition[m_track_positions_count]; if (m_track_positions == NULL) return false; // Now parse track positions TrackPosition* p = m_track_positions; pos = pos_; while (pos < stop) { long len; const long long id = ReadID(pReader, pos, len); if (id < 0 || (pos + len) > stop) return false; pos += len; // consume ID const long long size = ReadUInt(pReader, pos, len); assert(size >= 0); assert((pos + len) <= stop); pos += len; // consume Size field assert((pos + size) <= stop); if (id == libwebm::kMkvCueTrackPositions) { TrackPosition& tp = *p++; if (!tp.Parse(pReader, pos, size)) { return false; } } pos += size; // consume payload if (pos > stop) return false; } assert(size_t(p - m_track_positions) == m_track_positions_count); m_element_start = element_start; m_element_size = element_size; return true; } bool CuePoint::TrackPosition::Parse(IMkvReader* pReader, long long start_, long long size_) { const long long stop = start_ + size_; long long pos = start_; m_track = -1; m_pos = -1; m_block = 1; // default while (pos < stop) { long len; const long long id = ReadID(pReader, pos, len); if ((id < 0) || ((pos + len) > stop)) { return false; } pos += len; // consume ID const long long size = ReadUInt(pReader, pos, len); if ((size < 0) || ((pos + len) > stop)) { return false; } pos += len; // consume Size field if ((pos + size) > stop) { return false; } if (id == libwebm::kMkvCueTrack) m_track = UnserializeUInt(pReader, pos, size); else if (id == libwebm::kMkvCueClusterPosition) m_pos = UnserializeUInt(pReader, pos, size); else if (id == libwebm::kMkvCueBlockNumber) m_block = UnserializeUInt(pReader, pos, size); pos += size; // consume payload } if ((m_pos < 0) || (m_track <= 0)) { return false; } return true; } const CuePoint::TrackPosition* CuePoint::Find(const Track* pTrack) const { if (pTrack == NULL) { return NULL; } const long long n = pTrack->GetNumber(); const TrackPosition* i = m_track_positions; const TrackPosition* const j = i + m_track_positions_count; while (i != j) { const TrackPosition& p = *i++; if (p.m_track == n) return &p; } return NULL; // no matching track number found } long long CuePoint::GetTimeCode() const { return m_timecode; } long long CuePoint::GetTime(const Segment* pSegment) const { assert(pSegment); assert(m_timecode >= 0); const SegmentInfo* const pInfo = pSegment->GetInfo(); assert(pInfo); const long long scale = pInfo->GetTimeCodeScale(); assert(scale >= 1); const long long time = scale * m_timecode; return time; } bool Segment::DoneParsing() const { if (m_size < 0) { long long total, avail; const int status = m_pReader->Length(&total, &avail); if (status < 0) // error return true; // must assume done if (total < 0) return false; // assume live stream return (m_pos >= total); } const long long stop = m_start + m_size; return (m_pos >= stop); } const Cluster* Segment::GetFirst() const { if ((m_clusters == NULL) || (m_clusterCount <= 0)) return &m_eos; Cluster* const pCluster = m_clusters[0]; assert(pCluster); return pCluster; } const Cluster* Segment::GetLast() const { if ((m_clusters == NULL) || (m_clusterCount <= 0)) return &m_eos; const long idx = m_clusterCount - 1; Cluster* const pCluster = m_clusters[idx]; assert(pCluster); return pCluster; } unsigned long Segment::GetCount() const { return m_clusterCount; } const Cluster* Segment::GetNext(const Cluster* pCurr) { assert(pCurr); assert(pCurr != &m_eos); assert(m_clusters); long idx = pCurr->m_index; if (idx >= 0) { assert(m_clusterCount > 0); assert(idx < m_clusterCount); assert(pCurr == m_clusters[idx]); ++idx; if (idx >= m_clusterCount) return &m_eos; // caller will LoadCluster as desired Cluster* const pNext = m_clusters[idx]; assert(pNext); assert(pNext->m_index >= 0); assert(pNext->m_index == idx); return pNext; } assert(m_clusterPreloadCount > 0); long long pos = pCurr->m_element_start; assert(m_size >= 0); // TODO const long long stop = m_start + m_size; // end of segment { long len; long long result = GetUIntLength(m_pReader, pos, len); assert(result == 0); assert((pos + len) <= stop); // TODO if (result != 0) return NULL; const long long id = ReadID(m_pReader, pos, len); if (id != libwebm::kMkvCluster) return NULL; pos += len; // consume ID // Read Size result = GetUIntLength(m_pReader, pos, len); assert(result == 0); // TODO assert((pos + len) <= stop); // TODO const long long size = ReadUInt(m_pReader, pos, len); assert(size > 0); // TODO // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); pos += len; // consume length of size of element assert((pos + size) <= stop); // TODO // Pos now points to start of payload pos += size; // consume payload } long long off_next = 0; while (pos < stop) { long len; long long result = GetUIntLength(m_pReader, pos, len); assert(result == 0); assert((pos + len) <= stop); // TODO if (result != 0) return NULL; const long long idpos = pos; // pos of next (potential) cluster const long long id = ReadID(m_pReader, idpos, len); if (id < 0) return NULL; pos += len; // consume ID // Read Size result = GetUIntLength(m_pReader, pos, len); assert(result == 0); // TODO assert((pos + len) <= stop); // TODO const long long size = ReadUInt(m_pReader, pos, len); assert(size >= 0); // TODO pos += len; // consume length of size of element assert((pos + size) <= stop); // TODO // Pos now points to start of payload if (size == 0) // weird continue; if (id == libwebm::kMkvCluster) { const long long off_next_ = idpos - m_start; long long pos_; long len_; const long status = Cluster::HasBlockEntries(this, off_next_, pos_, len_); assert(status >= 0); if (status > 0) { off_next = off_next_; break; } } pos += size; // consume payload } if (off_next <= 0) return 0; Cluster** const ii = m_clusters + m_clusterCount; Cluster** i = ii; Cluster** const jj = ii + m_clusterPreloadCount; Cluster** j = jj; while (i < j) { // INVARIANT: //[0, i) < pos_next //[i, j) ? //[j, jj) > pos_next Cluster** const k = i + (j - i) / 2; assert(k < jj); Cluster* const pNext = *k; assert(pNext); assert(pNext->m_index < 0); // const long long pos_ = pNext->m_pos; // assert(pos_); // pos = pos_ * ((pos_ < 0) ? -1 : 1); pos = pNext->GetPosition(); if (pos < off_next) i = k + 1; else if (pos > off_next) j = k; else return pNext; } assert(i == j); Cluster* const pNext = Cluster::Create(this, -1, off_next); if (pNext == NULL) return NULL; const ptrdiff_t idx_next = i - m_clusters; // insertion position if (!PreloadCluster(pNext, idx_next)) { delete pNext; return NULL; } assert(m_clusters); assert(idx_next < m_clusterSize); assert(m_clusters[idx_next] == pNext); return pNext; } long Segment::ParseNext(const Cluster* pCurr, const Cluster*& pResult, long long& pos, long& len) { assert(pCurr); assert(!pCurr->EOS()); assert(m_clusters); pResult = 0; if (pCurr->m_index >= 0) { // loaded (not merely preloaded) assert(m_clusters[pCurr->m_index] == pCurr); const long next_idx = pCurr->m_index + 1; if (next_idx < m_clusterCount) { pResult = m_clusters[next_idx]; return 0; // success } // curr cluster is last among loaded const long result = LoadCluster(pos, len); if (result < 0) // error or underflow return result; if (result > 0) // no more clusters { // pResult = &m_eos; return 1; } pResult = GetLast(); return 0; // success } assert(m_pos > 0); long long total, avail; long status = m_pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; // interrogate curr cluster pos = pCurr->m_element_start; if (pCurr->m_element_size >= 0) pos += pCurr->m_element_size; else { if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadUInt(m_pReader, pos, len); if (id != libwebm::kMkvCluster) return -1; pos += len; // consume ID // Read Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume size field const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) // TODO: should never happen return E_FILE_FORMAT_INVALID; // TODO: resolve this // assert((pCurr->m_size <= 0) || (pCurr->m_size == size)); if ((segment_stop >= 0) && ((pos + size) > segment_stop)) return E_FILE_FORMAT_INVALID; // Pos now points to start of payload pos += size; // consume payload (that is, the current cluster) if (segment_stop >= 0 && pos > segment_stop) return E_FILE_FORMAT_INVALID; // By consuming the payload, we are assuming that the curr // cluster isn't interesting. That is, we don't bother checking // whether the payload of the curr cluster is less than what // happens to be available (obtained via IMkvReader::Length). // Presumably the caller has already dispensed with the current // cluster, and really does want the next cluster. } // pos now points to just beyond the last fully-loaded cluster for (;;) { const long status = DoParseNext(pResult, pos, len); if (status <= 1) return status; } } long Segment::DoParseNext(const Cluster*& pResult, long long& pos, long& len) { long long total, avail; long status = m_pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); const long long segment_stop = (m_size < 0) ? -1 : m_start + m_size; // Parse next cluster. This is strictly a parsing activity. // Creation of a new cluster object happens later, after the // parsing is done. long long off_next = 0; long long cluster_size = -1; for (;;) { if ((total >= 0) && (pos >= total)) return 1; // EOF if ((segment_stop >= 0) && (pos >= segment_stop)) return 1; // EOF if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long idpos = pos; // absolute const long long idoff = pos - m_start; // relative const long long id = ReadID(m_pReader, idpos, len); // absolute if (id < 0) // error return static_cast(id); if (id == 0) // weird return -1; // generic error pos += len; // consume ID // Read Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume length of size of element // Pos now points to start of payload if (size == 0) // weird continue; const long long unknown_size = (1LL << (7 * len)) - 1; if ((segment_stop >= 0) && (size != unknown_size) && ((pos + size) > segment_stop)) { return E_FILE_FORMAT_INVALID; } if (id == libwebm::kMkvCues) { if (size == unknown_size) return E_FILE_FORMAT_INVALID; const long long element_stop = pos + size; if ((segment_stop >= 0) && (element_stop > segment_stop)) return E_FILE_FORMAT_INVALID; const long long element_start = idpos; const long long element_size = element_stop - element_start; if (m_pCues == NULL) { m_pCues = new (std::nothrow) Cues(this, pos, size, element_start, element_size); if (m_pCues == NULL) return false; } pos += size; // consume payload if (segment_stop >= 0 && pos > segment_stop) return E_FILE_FORMAT_INVALID; continue; } if (id != libwebm::kMkvCluster) { // not a Cluster ID if (size == unknown_size) return E_FILE_FORMAT_INVALID; pos += size; // consume payload if (segment_stop >= 0 && pos > segment_stop) return E_FILE_FORMAT_INVALID; continue; } // We have a cluster. off_next = idoff; if (size != unknown_size) cluster_size = size; break; } assert(off_next > 0); // have cluster // We have parsed the next cluster. // We have not created a cluster object yet. What we need // to do now is determine whether it has already be preloaded //(in which case, an object for this cluster has already been // created), and if not, create a new cluster object. Cluster** const ii = m_clusters + m_clusterCount; Cluster** i = ii; Cluster** const jj = ii + m_clusterPreloadCount; Cluster** j = jj; while (i < j) { // INVARIANT: //[0, i) < pos_next //[i, j) ? //[j, jj) > pos_next Cluster** const k = i + (j - i) / 2; assert(k < jj); const Cluster* const pNext = *k; assert(pNext); assert(pNext->m_index < 0); pos = pNext->GetPosition(); assert(pos >= 0); if (pos < off_next) i = k + 1; else if (pos > off_next) j = k; else { pResult = pNext; return 0; // success } } assert(i == j); long long pos_; long len_; status = Cluster::HasBlockEntries(this, off_next, pos_, len_); if (status < 0) { // error or underflow pos = pos_; len = len_; return status; } if (status > 0) { // means "found at least one block entry" Cluster* const pNext = Cluster::Create(this, -1, // preloaded off_next); if (pNext == NULL) return -1; const ptrdiff_t idx_next = i - m_clusters; // insertion position if (!PreloadCluster(pNext, idx_next)) { delete pNext; return -1; } assert(m_clusters); assert(idx_next < m_clusterSize); assert(m_clusters[idx_next] == pNext); pResult = pNext; return 0; // success } // status == 0 means "no block entries found" if (cluster_size < 0) { // unknown size const long long payload_pos = pos; // absolute pos of cluster payload for (;;) { // determine cluster size if ((total >= 0) && (pos >= total)) break; if ((segment_stop >= 0) && (pos >= segment_stop)) break; // no more clusters // Read ID if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long idpos = pos; const long long id = ReadID(m_pReader, idpos, len); if (id < 0) // error (or underflow) return static_cast(id); // This is the distinguished set of ID's we use to determine // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. if (id == libwebm::kMkvCluster || id == libwebm::kMkvCues) break; pos += len; // consume ID (of sub-element) // Read Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(m_pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(m_pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume size field of element // pos now points to start of sub-element's payload if (size == 0) // weird continue; const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) return E_FILE_FORMAT_INVALID; // not allowed for sub-elements if ((segment_stop >= 0) && ((pos + size) > segment_stop)) // weird return E_FILE_FORMAT_INVALID; pos += size; // consume payload of sub-element if (segment_stop >= 0 && pos > segment_stop) return E_FILE_FORMAT_INVALID; } // determine cluster size cluster_size = pos - payload_pos; assert(cluster_size >= 0); // TODO: handle cluster_size = 0 pos = payload_pos; // reset and re-parse original cluster } pos += cluster_size; // consume payload if (segment_stop >= 0 && pos > segment_stop) return E_FILE_FORMAT_INVALID; return 2; // try to find a cluster that follows next } const Cluster* Segment::FindCluster(long long time_ns) const { if ((m_clusters == NULL) || (m_clusterCount <= 0)) return &m_eos; { Cluster* const pCluster = m_clusters[0]; assert(pCluster); assert(pCluster->m_index == 0); if (time_ns <= pCluster->GetTime()) return pCluster; } // Binary search of cluster array long i = 0; long j = m_clusterCount; while (i < j) { // INVARIANT: //[0, i) <= time_ns //[i, j) ? //[j, m_clusterCount) > time_ns const long k = i + (j - i) / 2; assert(k < m_clusterCount); Cluster* const pCluster = m_clusters[k]; assert(pCluster); assert(pCluster->m_index == k); const long long t = pCluster->GetTime(); if (t <= time_ns) i = k + 1; else j = k; assert(i <= j); } assert(i == j); assert(i > 0); assert(i <= m_clusterCount); const long k = i - 1; Cluster* const pCluster = m_clusters[k]; assert(pCluster); assert(pCluster->m_index == k); assert(pCluster->GetTime() <= time_ns); return pCluster; } const Tracks* Segment::GetTracks() const { return m_pTracks; } const SegmentInfo* Segment::GetInfo() const { return m_pInfo; } const Cues* Segment::GetCues() const { return m_pCues; } const Chapters* Segment::GetChapters() const { return m_pChapters; } const Tags* Segment::GetTags() const { return m_pTags; } const SeekHead* Segment::GetSeekHead() const { return m_pSeekHead; } long long Segment::GetDuration() const { assert(m_pInfo); return m_pInfo->GetDuration(); } Chapters::Chapters(Segment* pSegment, long long payload_start, long long payload_size, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(payload_start), m_size(payload_size), m_element_start(element_start), m_element_size(element_size), m_editions(NULL), m_editions_size(0), m_editions_count(0) {} Chapters::~Chapters() { while (m_editions_count > 0) { Edition& e = m_editions[--m_editions_count]; e.Clear(); } delete[] m_editions; } long Chapters::Parse() { IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = m_start; // payload start const long long stop = pos + m_size; // payload stop while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) // weird continue; if (id == libwebm::kMkvEditionEntry) { status = ParseEdition(pos, size); if (status < 0) // error return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } int Chapters::GetEditionCount() const { return m_editions_count; } const Chapters::Edition* Chapters::GetEdition(int idx) const { if (idx < 0) return NULL; if (idx >= m_editions_count) return NULL; return m_editions + idx; } bool Chapters::ExpandEditionsArray() { if (m_editions_size > m_editions_count) return true; // nothing else to do const int size = (m_editions_size == 0) ? 1 : 2 * m_editions_size; Edition* const editions = new (std::nothrow) Edition[size]; if (editions == NULL) return false; for (int idx = 0; idx < m_editions_count; ++idx) { m_editions[idx].ShallowCopy(editions[idx]); } delete[] m_editions; m_editions = editions; m_editions_size = size; return true; } long Chapters::ParseEdition(long long pos, long long size) { if (!ExpandEditionsArray()) return -1; Edition& e = m_editions[m_editions_count++]; e.Init(); return e.Parse(m_pSegment->m_pReader, pos, size); } Chapters::Edition::Edition() {} Chapters::Edition::~Edition() {} int Chapters::Edition::GetAtomCount() const { return m_atoms_count; } const Chapters::Atom* Chapters::Edition::GetAtom(int index) const { if (index < 0) return NULL; if (index >= m_atoms_count) return NULL; return m_atoms + index; } void Chapters::Edition::Init() { m_atoms = NULL; m_atoms_size = 0; m_atoms_count = 0; } void Chapters::Edition::ShallowCopy(Edition& rhs) const { rhs.m_atoms = m_atoms; rhs.m_atoms_size = m_atoms_size; rhs.m_atoms_count = m_atoms_count; } void Chapters::Edition::Clear() { while (m_atoms_count > 0) { Atom& a = m_atoms[--m_atoms_count]; a.Clear(); } delete[] m_atoms; m_atoms = NULL; m_atoms_size = 0; } long Chapters::Edition::Parse(IMkvReader* pReader, long long pos, long long size) { const long long stop = pos + size; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) continue; if (id == libwebm::kMkvChapterAtom) { status = ParseAtom(pReader, pos, size); if (status < 0) // error return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } long Chapters::Edition::ParseAtom(IMkvReader* pReader, long long pos, long long size) { if (!ExpandAtomsArray()) return -1; Atom& a = m_atoms[m_atoms_count++]; a.Init(); return a.Parse(pReader, pos, size); } bool Chapters::Edition::ExpandAtomsArray() { if (m_atoms_size > m_atoms_count) return true; // nothing else to do const int size = (m_atoms_size == 0) ? 1 : 2 * m_atoms_size; Atom* const atoms = new (std::nothrow) Atom[size]; if (atoms == NULL) return false; for (int idx = 0; idx < m_atoms_count; ++idx) { m_atoms[idx].ShallowCopy(atoms[idx]); } delete[] m_atoms; m_atoms = atoms; m_atoms_size = size; return true; } Chapters::Atom::Atom() {} Chapters::Atom::~Atom() {} unsigned long long Chapters::Atom::GetUID() const { return m_uid; } const char* Chapters::Atom::GetStringUID() const { return m_string_uid; } long long Chapters::Atom::GetStartTimecode() const { return m_start_timecode; } long long Chapters::Atom::GetStopTimecode() const { return m_stop_timecode; } long long Chapters::Atom::GetStartTime(const Chapters* pChapters) const { return GetTime(pChapters, m_start_timecode); } long long Chapters::Atom::GetStopTime(const Chapters* pChapters) const { return GetTime(pChapters, m_stop_timecode); } int Chapters::Atom::GetDisplayCount() const { return m_displays_count; } const Chapters::Display* Chapters::Atom::GetDisplay(int index) const { if (index < 0) return NULL; if (index >= m_displays_count) return NULL; return m_displays + index; } void Chapters::Atom::Init() { m_string_uid = NULL; m_uid = 0; m_start_timecode = -1; m_stop_timecode = -1; m_displays = NULL; m_displays_size = 0; m_displays_count = 0; } void Chapters::Atom::ShallowCopy(Atom& rhs) const { rhs.m_string_uid = m_string_uid; rhs.m_uid = m_uid; rhs.m_start_timecode = m_start_timecode; rhs.m_stop_timecode = m_stop_timecode; rhs.m_displays = m_displays; rhs.m_displays_size = m_displays_size; rhs.m_displays_count = m_displays_count; } void Chapters::Atom::Clear() { delete[] m_string_uid; m_string_uid = NULL; while (m_displays_count > 0) { Display& d = m_displays[--m_displays_count]; d.Clear(); } delete[] m_displays; m_displays = NULL; m_displays_size = 0; } long Chapters::Atom::Parse(IMkvReader* pReader, long long pos, long long size) { const long long stop = pos + size; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) // 0 length payload, skip. continue; if (id == libwebm::kMkvChapterDisplay) { status = ParseDisplay(pReader, pos, size); if (status < 0) // error return status; } else if (id == libwebm::kMkvChapterStringUID) { status = UnserializeString(pReader, pos, size, m_string_uid); if (status < 0) // error return status; } else if (id == libwebm::kMkvChapterUID) { long long val; status = UnserializeInt(pReader, pos, size, val); if (status < 0) // error return status; m_uid = static_cast(val); } else if (id == libwebm::kMkvChapterTimeStart) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error return static_cast(val); m_start_timecode = val; } else if (id == libwebm::kMkvChapterTimeEnd) { const long long val = UnserializeUInt(pReader, pos, size); if (val < 0) // error return static_cast(val); m_stop_timecode = val; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } long long Chapters::Atom::GetTime(const Chapters* pChapters, long long timecode) { if (pChapters == NULL) return -1; Segment* const pSegment = pChapters->m_pSegment; if (pSegment == NULL) // weird return -1; const SegmentInfo* const pInfo = pSegment->GetInfo(); if (pInfo == NULL) return -1; const long long timecode_scale = pInfo->GetTimeCodeScale(); if (timecode_scale < 1) // weird return -1; if (timecode < 0) return -1; const long long result = timecode_scale * timecode; return result; } long Chapters::Atom::ParseDisplay(IMkvReader* pReader, long long pos, long long size) { if (!ExpandDisplaysArray()) return -1; Display& d = m_displays[m_displays_count++]; d.Init(); return d.Parse(pReader, pos, size); } bool Chapters::Atom::ExpandDisplaysArray() { if (m_displays_size > m_displays_count) return true; // nothing else to do const int size = (m_displays_size == 0) ? 1 : 2 * m_displays_size; Display* const displays = new (std::nothrow) Display[size]; if (displays == NULL) return false; for (int idx = 0; idx < m_displays_count; ++idx) { m_displays[idx].ShallowCopy(displays[idx]); } delete[] m_displays; m_displays = displays; m_displays_size = size; return true; } Chapters::Display::Display() {} Chapters::Display::~Display() {} const char* Chapters::Display::GetString() const { return m_string; } const char* Chapters::Display::GetLanguage() const { return m_language; } const char* Chapters::Display::GetCountry() const { return m_country; } void Chapters::Display::Init() { m_string = NULL; m_language = NULL; m_country = NULL; } void Chapters::Display::ShallowCopy(Display& rhs) const { rhs.m_string = m_string; rhs.m_language = m_language; rhs.m_country = m_country; } void Chapters::Display::Clear() { delete[] m_string; m_string = NULL; delete[] m_language; m_language = NULL; delete[] m_country; m_country = NULL; } long Chapters::Display::Parse(IMkvReader* pReader, long long pos, long long size) { const long long stop = pos + size; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) // No payload. continue; if (id == libwebm::kMkvChapString) { status = UnserializeString(pReader, pos, size, m_string); if (status) return status; } else if (id == libwebm::kMkvChapLanguage) { status = UnserializeString(pReader, pos, size, m_language); if (status) return status; } else if (id == libwebm::kMkvChapCountry) { status = UnserializeString(pReader, pos, size, m_country); if (status) return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } Tags::Tags(Segment* pSegment, long long payload_start, long long payload_size, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(payload_start), m_size(payload_size), m_element_start(element_start), m_element_size(element_size), m_tags(NULL), m_tags_size(0), m_tags_count(0) {} Tags::~Tags() { while (m_tags_count > 0) { Tag& t = m_tags[--m_tags_count]; t.Clear(); } delete[] m_tags; } long Tags::Parse() { IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = m_start; // payload start const long long stop = pos + m_size; // payload stop while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) return status; if (size == 0) // 0 length tag, read another continue; if (id == libwebm::kMkvTag) { status = ParseTag(pos, size); if (status < 0) return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } int Tags::GetTagCount() const { return m_tags_count; } const Tags::Tag* Tags::GetTag(int idx) const { if (idx < 0) return NULL; if (idx >= m_tags_count) return NULL; return m_tags + idx; } bool Tags::ExpandTagsArray() { if (m_tags_size > m_tags_count) return true; // nothing else to do const int size = (m_tags_size == 0) ? 1 : 2 * m_tags_size; Tag* const tags = new (std::nothrow) Tag[size]; if (tags == NULL) return false; for (int idx = 0; idx < m_tags_count; ++idx) { m_tags[idx].ShallowCopy(tags[idx]); } delete[] m_tags; m_tags = tags; m_tags_size = size; return true; } long Tags::ParseTag(long long pos, long long size) { if (!ExpandTagsArray()) return -1; Tag& t = m_tags[m_tags_count++]; t.Init(); return t.Parse(m_pSegment->m_pReader, pos, size); } Tags::Tag::Tag() {} Tags::Tag::~Tag() {} int Tags::Tag::GetSimpleTagCount() const { return m_simple_tags_count; } const Tags::SimpleTag* Tags::Tag::GetSimpleTag(int index) const { if (index < 0) return NULL; if (index >= m_simple_tags_count) return NULL; return m_simple_tags + index; } void Tags::Tag::Init() { m_simple_tags = NULL; m_simple_tags_size = 0; m_simple_tags_count = 0; } void Tags::Tag::ShallowCopy(Tag& rhs) const { rhs.m_simple_tags = m_simple_tags; rhs.m_simple_tags_size = m_simple_tags_size; rhs.m_simple_tags_count = m_simple_tags_count; } void Tags::Tag::Clear() { while (m_simple_tags_count > 0) { SimpleTag& d = m_simple_tags[--m_simple_tags_count]; d.Clear(); } delete[] m_simple_tags; m_simple_tags = NULL; m_simple_tags_size = 0; } long Tags::Tag::Parse(IMkvReader* pReader, long long pos, long long size) { const long long stop = pos + size; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) return status; if (size == 0) // 0 length tag, read another continue; if (id == libwebm::kMkvSimpleTag) { status = ParseSimpleTag(pReader, pos, size); if (status < 0) return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } long Tags::Tag::ParseSimpleTag(IMkvReader* pReader, long long pos, long long size) { if (!ExpandSimpleTagsArray()) return -1; SimpleTag& st = m_simple_tags[m_simple_tags_count++]; st.Init(); return st.Parse(pReader, pos, size); } bool Tags::Tag::ExpandSimpleTagsArray() { if (m_simple_tags_size > m_simple_tags_count) return true; // nothing else to do const int size = (m_simple_tags_size == 0) ? 1 : 2 * m_simple_tags_size; SimpleTag* const displays = new (std::nothrow) SimpleTag[size]; if (displays == NULL) return false; for (int idx = 0; idx < m_simple_tags_count; ++idx) { m_simple_tags[idx].ShallowCopy(displays[idx]); } delete[] m_simple_tags; m_simple_tags = displays; m_simple_tags_size = size; return true; } Tags::SimpleTag::SimpleTag() {} Tags::SimpleTag::~SimpleTag() {} const char* Tags::SimpleTag::GetTagName() const { return m_tag_name; } const char* Tags::SimpleTag::GetTagString() const { return m_tag_string; } void Tags::SimpleTag::Init() { m_tag_name = NULL; m_tag_string = NULL; } void Tags::SimpleTag::ShallowCopy(SimpleTag& rhs) const { rhs.m_tag_name = m_tag_name; rhs.m_tag_string = m_tag_string; } void Tags::SimpleTag::Clear() { delete[] m_tag_name; m_tag_name = NULL; delete[] m_tag_string; m_tag_string = NULL; } long Tags::SimpleTag::Parse(IMkvReader* pReader, long long pos, long long size) { const long long stop = pos + size; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) // weird continue; if (id == libwebm::kMkvTagName) { status = UnserializeString(pReader, pos, size, m_tag_name); if (status) return status; } else if (id == libwebm::kMkvTagString) { status = UnserializeString(pReader, pos, size, m_tag_string); if (status) return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } SegmentInfo::SegmentInfo(Segment* pSegment, long long start, long long size_, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(start), m_size(size_), m_element_start(element_start), m_element_size(element_size), m_pMuxingAppAsUTF8(NULL), m_pWritingAppAsUTF8(NULL), m_pTitleAsUTF8(NULL) {} SegmentInfo::~SegmentInfo() { delete[] m_pMuxingAppAsUTF8; m_pMuxingAppAsUTF8 = NULL; delete[] m_pWritingAppAsUTF8; m_pWritingAppAsUTF8 = NULL; delete[] m_pTitleAsUTF8; m_pTitleAsUTF8 = NULL; } long SegmentInfo::Parse() { assert(m_pMuxingAppAsUTF8 == NULL); assert(m_pWritingAppAsUTF8 == NULL); assert(m_pTitleAsUTF8 == NULL); IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = m_start; const long long stop = m_start + m_size; m_timecodeScale = 1000000; m_duration = -1; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvTimecodeScale) { m_timecodeScale = UnserializeUInt(pReader, pos, size); if (m_timecodeScale <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDuration) { const long status = UnserializeFloat(pReader, pos, size, m_duration); if (status < 0) return status; if (m_duration < 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvMuxingApp) { const long status = UnserializeString(pReader, pos, size, m_pMuxingAppAsUTF8); if (status) return status; } else if (id == libwebm::kMkvWritingApp) { const long status = UnserializeString(pReader, pos, size, m_pWritingAppAsUTF8); if (status) return status; } else if (id == libwebm::kMkvTitle) { const long status = UnserializeString(pReader, pos, size, m_pTitleAsUTF8); if (status) return status; } pos += size; if (pos > stop) return E_FILE_FORMAT_INVALID; } const double rollover_check = m_duration * m_timecodeScale; if (rollover_check > static_cast(LLONG_MAX)) return E_FILE_FORMAT_INVALID; if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } long long SegmentInfo::GetTimeCodeScale() const { return m_timecodeScale; } long long SegmentInfo::GetDuration() const { if (m_duration < 0) return -1; assert(m_timecodeScale >= 1); const double dd = double(m_duration) * double(m_timecodeScale); const long long d = static_cast(dd); return d; } const char* SegmentInfo::GetMuxingAppAsUTF8() const { return m_pMuxingAppAsUTF8; } const char* SegmentInfo::GetWritingAppAsUTF8() const { return m_pWritingAppAsUTF8; } const char* SegmentInfo::GetTitleAsUTF8() const { return m_pTitleAsUTF8; } /////////////////////////////////////////////////////////////// // ContentEncoding element ContentEncoding::ContentCompression::ContentCompression() : algo(0), settings(NULL), settings_len(0) {} ContentEncoding::ContentCompression::~ContentCompression() { delete[] settings; } ContentEncoding::ContentEncryption::ContentEncryption() : algo(0), key_id(NULL), key_id_len(0), signature(NULL), signature_len(0), sig_key_id(NULL), sig_key_id_len(0), sig_algo(0), sig_hash_algo(0) {} ContentEncoding::ContentEncryption::~ContentEncryption() { delete[] key_id; delete[] signature; delete[] sig_key_id; } ContentEncoding::ContentEncoding() : compression_entries_(NULL), compression_entries_end_(NULL), encryption_entries_(NULL), encryption_entries_end_(NULL), encoding_order_(0), encoding_scope_(1), encoding_type_(0) {} ContentEncoding::~ContentEncoding() { ContentCompression** comp_i = compression_entries_; ContentCompression** const comp_j = compression_entries_end_; while (comp_i != comp_j) { ContentCompression* const comp = *comp_i++; delete comp; } delete[] compression_entries_; ContentEncryption** enc_i = encryption_entries_; ContentEncryption** const enc_j = encryption_entries_end_; while (enc_i != enc_j) { ContentEncryption* const enc = *enc_i++; delete enc; } delete[] encryption_entries_; } const ContentEncoding::ContentCompression* ContentEncoding::GetCompressionByIndex(unsigned long idx) const { const ptrdiff_t count = compression_entries_end_ - compression_entries_; assert(count >= 0); if (idx >= static_cast(count)) return NULL; return compression_entries_[idx]; } unsigned long ContentEncoding::GetCompressionCount() const { const ptrdiff_t count = compression_entries_end_ - compression_entries_; assert(count >= 0); return static_cast(count); } const ContentEncoding::ContentEncryption* ContentEncoding::GetEncryptionByIndex( unsigned long idx) const { const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; assert(count >= 0); if (idx >= static_cast(count)) return NULL; return encryption_entries_[idx]; } unsigned long ContentEncoding::GetEncryptionCount() const { const ptrdiff_t count = encryption_entries_end_ - encryption_entries_; assert(count >= 0); return static_cast(count); } long ContentEncoding::ParseContentEncAESSettingsEntry( long long start, long long size, IMkvReader* pReader, ContentEncAESSettings* aes) { assert(pReader); assert(aes); long long pos = start; const long long stop = start + size; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvAESSettingsCipherMode) { aes->cipher_mode = UnserializeUInt(pReader, pos, size); if (aes->cipher_mode != 1) return E_FILE_FORMAT_INVALID; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } return 0; } long ContentEncoding::ParseContentEncodingEntry(long long start, long long size, IMkvReader* pReader) { assert(pReader); long long pos = start; const long long stop = start + size; // Count ContentCompression and ContentEncryption elements. int compression_count = 0; int encryption_count = 0; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvContentCompression) ++compression_count; if (id == libwebm::kMkvContentEncryption) ++encryption_count; pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (compression_count <= 0 && encryption_count <= 0) return -1; if (compression_count > 0) { compression_entries_ = new (std::nothrow) ContentCompression*[compression_count]; if (!compression_entries_) return -1; compression_entries_end_ = compression_entries_; } if (encryption_count > 0) { encryption_entries_ = new (std::nothrow) ContentEncryption*[encryption_count]; if (!encryption_entries_) { delete[] compression_entries_; compression_entries_ = NULL; return -1; } encryption_entries_end_ = encryption_entries_; } pos = start; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvContentEncodingOrder) { encoding_order_ = UnserializeUInt(pReader, pos, size); } else if (id == libwebm::kMkvContentEncodingScope) { encoding_scope_ = UnserializeUInt(pReader, pos, size); if (encoding_scope_ < 1) return -1; } else if (id == libwebm::kMkvContentEncodingType) { encoding_type_ = UnserializeUInt(pReader, pos, size); } else if (id == libwebm::kMkvContentCompression) { ContentCompression* const compression = new (std::nothrow) ContentCompression(); if (!compression) return -1; status = ParseCompressionEntry(pos, size, pReader, compression); if (status) { delete compression; return status; } assert(compression_count > 0); *compression_entries_end_++ = compression; } else if (id == libwebm::kMkvContentEncryption) { ContentEncryption* const encryption = new (std::nothrow) ContentEncryption(); if (!encryption) return -1; status = ParseEncryptionEntry(pos, size, pReader, encryption); if (status) { delete encryption; return status; } assert(encryption_count > 0); *encryption_entries_end_++ = encryption; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } long ContentEncoding::ParseCompressionEntry(long long start, long long size, IMkvReader* pReader, ContentCompression* compression) { assert(pReader); assert(compression); long long pos = start; const long long stop = start + size; bool valid = false; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvContentCompAlgo) { long long algo = UnserializeUInt(pReader, pos, size); if (algo < 0) return E_FILE_FORMAT_INVALID; compression->algo = algo; valid = true; } else if (id == libwebm::kMkvContentCompSettings) { if (size <= 0) return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast(size); unsigned char* buf = SafeArrayAlloc(1, buflen); if (buf == NULL) return -1; const int read_status = pReader->Read(pos, static_cast(buflen), buf); if (read_status) { delete[] buf; return status; } // There should be only one settings element per content compression. if (compression->settings != NULL) { delete[] buf; return E_FILE_FORMAT_INVALID; } compression->settings = buf; compression->settings_len = buflen; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } // ContentCompAlgo is mandatory if (!valid) return E_FILE_FORMAT_INVALID; return 0; } long ContentEncoding::ParseEncryptionEntry(long long start, long long size, IMkvReader* pReader, ContentEncryption* encryption) { assert(pReader); assert(encryption); long long pos = start; const long long stop = start + size; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvContentEncAlgo) { encryption->algo = UnserializeUInt(pReader, pos, size); if (encryption->algo != 5) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvContentEncKeyID) { delete[] encryption->key_id; encryption->key_id = NULL; encryption->key_id_len = 0; if (size <= 0) return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast(size); unsigned char* buf = SafeArrayAlloc(1, buflen); if (buf == NULL) return -1; const int read_status = pReader->Read(pos, static_cast(buflen), buf); if (read_status) { delete[] buf; return status; } encryption->key_id = buf; encryption->key_id_len = buflen; } else if (id == libwebm::kMkvContentSignature) { delete[] encryption->signature; encryption->signature = NULL; encryption->signature_len = 0; if (size <= 0) return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast(size); unsigned char* buf = SafeArrayAlloc(1, buflen); if (buf == NULL) return -1; const int read_status = pReader->Read(pos, static_cast(buflen), buf); if (read_status) { delete[] buf; return status; } encryption->signature = buf; encryption->signature_len = buflen; } else if (id == libwebm::kMkvContentSigKeyID) { delete[] encryption->sig_key_id; encryption->sig_key_id = NULL; encryption->sig_key_id_len = 0; if (size <= 0) return E_FILE_FORMAT_INVALID; const size_t buflen = static_cast(size); unsigned char* buf = SafeArrayAlloc(1, buflen); if (buf == NULL) return -1; const int read_status = pReader->Read(pos, static_cast(buflen), buf); if (read_status) { delete[] buf; return status; } encryption->sig_key_id = buf; encryption->sig_key_id_len = buflen; } else if (id == libwebm::kMkvContentSigAlgo) { encryption->sig_algo = UnserializeUInt(pReader, pos, size); } else if (id == libwebm::kMkvContentSigHashAlgo) { encryption->sig_hash_algo = UnserializeUInt(pReader, pos, size); } else if (id == libwebm::kMkvContentEncAESSettings) { const long status = ParseContentEncAESSettingsEntry( pos, size, pReader, &encryption->aes_settings); if (status) return status; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } return 0; } Track::Track(Segment* pSegment, long long element_start, long long element_size) : m_pSegment(pSegment), m_element_start(element_start), m_element_size(element_size), content_encoding_entries_(NULL), content_encoding_entries_end_(NULL) {} Track::~Track() { Info& info = const_cast(m_info); info.Clear(); ContentEncoding** i = content_encoding_entries_; ContentEncoding** const j = content_encoding_entries_end_; while (i != j) { ContentEncoding* const encoding = *i++; delete encoding; } delete[] content_encoding_entries_; } long Track::Create(Segment* pSegment, const Info& info, long long element_start, long long element_size, Track*& pResult) { if (pResult) return -1; Track* const pTrack = new (std::nothrow) Track(pSegment, element_start, element_size); if (pTrack == NULL) return -1; // generic error const int status = info.Copy(pTrack->m_info); if (status) { // error delete pTrack; return status; } pResult = pTrack; return 0; // success } Track::Info::Info() : uid(0), defaultDuration(0), codecDelay(0), seekPreRoll(0), nameAsUTF8(NULL), language(NULL), codecId(NULL), codecNameAsUTF8(NULL), codecPrivate(NULL), codecPrivateSize(0), lacing(false) {} Track::Info::~Info() { Clear(); } void Track::Info::Clear() { delete[] nameAsUTF8; nameAsUTF8 = NULL; delete[] language; language = NULL; delete[] codecId; codecId = NULL; delete[] codecPrivate; codecPrivate = NULL; codecPrivateSize = 0; delete[] codecNameAsUTF8; codecNameAsUTF8 = NULL; } int Track::Info::CopyStr(char* Info::*str, Info& dst_) const { if (str == static_cast(NULL)) return -1; char*& dst = dst_.*str; if (dst) // should be NULL already return -1; const char* const src = this->*str; if (src == NULL) return 0; const size_t len = strlen(src); dst = SafeArrayAlloc(1, len + 1); if (dst == NULL) return -1; strcpy(dst, src); return 0; } int Track::Info::Copy(Info& dst) const { if (&dst == this) return 0; dst.type = type; dst.number = number; dst.defaultDuration = defaultDuration; dst.codecDelay = codecDelay; dst.seekPreRoll = seekPreRoll; dst.uid = uid; dst.lacing = lacing; dst.settings = settings; // We now copy the string member variables from src to dst. // This involves memory allocation so in principle the operation // can fail (indeed, that's why we have Info::Copy), so we must // report this to the caller. An error return from this function // therefore implies that the copy was only partially successful. if (int status = CopyStr(&Info::nameAsUTF8, dst)) return status; if (int status = CopyStr(&Info::language, dst)) return status; if (int status = CopyStr(&Info::codecId, dst)) return status; if (int status = CopyStr(&Info::codecNameAsUTF8, dst)) return status; if (codecPrivateSize > 0) { if (codecPrivate == NULL) return -1; if (dst.codecPrivate) return -1; if (dst.codecPrivateSize != 0) return -1; dst.codecPrivate = SafeArrayAlloc(1, codecPrivateSize); if (dst.codecPrivate == NULL) return -1; memcpy(dst.codecPrivate, codecPrivate, codecPrivateSize); dst.codecPrivateSize = codecPrivateSize; } return 0; } const BlockEntry* Track::GetEOS() const { return &m_eos; } long Track::GetType() const { return m_info.type; } long Track::GetNumber() const { return m_info.number; } unsigned long long Track::GetUid() const { return m_info.uid; } const char* Track::GetNameAsUTF8() const { return m_info.nameAsUTF8; } const char* Track::GetLanguage() const { return m_info.language; } const char* Track::GetCodecNameAsUTF8() const { return m_info.codecNameAsUTF8; } const char* Track::GetCodecId() const { return m_info.codecId; } const unsigned char* Track::GetCodecPrivate(size_t& size) const { size = m_info.codecPrivateSize; return m_info.codecPrivate; } bool Track::GetLacing() const { return m_info.lacing; } unsigned long long Track::GetDefaultDuration() const { return m_info.defaultDuration; } unsigned long long Track::GetCodecDelay() const { return m_info.codecDelay; } unsigned long long Track::GetSeekPreRoll() const { return m_info.seekPreRoll; } long Track::GetFirst(const BlockEntry*& pBlockEntry) const { const Cluster* pCluster = m_pSegment->GetFirst(); for (int i = 0;;) { if (pCluster == NULL) { pBlockEntry = GetEOS(); return 1; } if (pCluster->EOS()) { if (m_pSegment->DoneParsing()) { pBlockEntry = GetEOS(); return 1; } pBlockEntry = 0; return E_BUFFER_NOT_FULL; } long status = pCluster->GetFirst(pBlockEntry); if (status < 0) // error return status; if (pBlockEntry == 0) { // empty cluster pCluster = m_pSegment->GetNext(pCluster); continue; } for (;;) { const Block* const pBlock = pBlockEntry->GetBlock(); assert(pBlock); const long long tn = pBlock->GetTrackNumber(); if ((tn == m_info.number) && VetEntry(pBlockEntry)) return 0; const BlockEntry* pNextEntry; status = pCluster->GetNext(pBlockEntry, pNextEntry); if (status < 0) // error return status; if (pNextEntry == 0) break; pBlockEntry = pNextEntry; } ++i; if (i >= 100) break; pCluster = m_pSegment->GetNext(pCluster); } // NOTE: if we get here, it means that we didn't find a block with // a matching track number. We interpret that as an error (which // might be too conservative). pBlockEntry = GetEOS(); // so we can return a non-NULL value return 1; } long Track::GetNext(const BlockEntry* pCurrEntry, const BlockEntry*& pNextEntry) const { assert(pCurrEntry); assert(!pCurrEntry->EOS()); //? const Block* const pCurrBlock = pCurrEntry->GetBlock(); assert(pCurrBlock && pCurrBlock->GetTrackNumber() == m_info.number); if (!pCurrBlock || pCurrBlock->GetTrackNumber() != m_info.number) return -1; const Cluster* pCluster = pCurrEntry->GetCluster(); assert(pCluster); assert(!pCluster->EOS()); long status = pCluster->GetNext(pCurrEntry, pNextEntry); if (status < 0) // error return status; for (int i = 0;;) { while (pNextEntry) { const Block* const pNextBlock = pNextEntry->GetBlock(); assert(pNextBlock); if (pNextBlock->GetTrackNumber() == m_info.number) return 0; pCurrEntry = pNextEntry; status = pCluster->GetNext(pCurrEntry, pNextEntry); if (status < 0) // error return status; } pCluster = m_pSegment->GetNext(pCluster); if (pCluster == NULL) { pNextEntry = GetEOS(); return 1; } if (pCluster->EOS()) { if (m_pSegment->DoneParsing()) { pNextEntry = GetEOS(); return 1; } // TODO: there is a potential O(n^2) problem here: we tell the // caller to (pre)load another cluster, which he does, but then he // calls GetNext again, which repeats the same search. This is // a pathological case, since the only way it can happen is if // there exists a long sequence of clusters none of which contain a // block from this track. One way around this problem is for the // caller to be smarter when he loads another cluster: don't call // us back until you have a cluster that contains a block from this // track. (Of course, that's not cheap either, since our caller // would have to scan the each cluster as it's loaded, so that // would just push back the problem.) pNextEntry = NULL; return E_BUFFER_NOT_FULL; } status = pCluster->GetFirst(pNextEntry); if (status < 0) // error return status; if (pNextEntry == NULL) // empty cluster continue; ++i; if (i >= 100) break; } // NOTE: if we get here, it means that we didn't find a block with // a matching track number after lots of searching, so we give // up trying. pNextEntry = GetEOS(); // so we can return a non-NULL value return 1; } bool Track::VetEntry(const BlockEntry* pBlockEntry) const { assert(pBlockEntry); const Block* const pBlock = pBlockEntry->GetBlock(); assert(pBlock); assert(pBlock->GetTrackNumber() == m_info.number); if (!pBlock || pBlock->GetTrackNumber() != m_info.number) return false; // This function is used during a seek to determine whether the // frame is a valid seek target. This default function simply // returns true, which means all frames are valid seek targets. // It gets overridden by the VideoTrack class, because only video // keyframes can be used as seek target. return true; } long Track::Seek(long long time_ns, const BlockEntry*& pResult) const { const long status = GetFirst(pResult); if (status < 0) // buffer underflow, etc return status; assert(pResult); if (pResult->EOS()) return 0; const Cluster* pCluster = pResult->GetCluster(); assert(pCluster); assert(pCluster->GetIndex() >= 0); if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) return 0; Cluster** const clusters = m_pSegment->m_clusters; assert(clusters); const long count = m_pSegment->GetCount(); // loaded only, not preloaded assert(count > 0); Cluster** const i = clusters + pCluster->GetIndex(); assert(i); assert(*i == pCluster); assert(pCluster->GetTime() <= time_ns); Cluster** const j = clusters + count; Cluster** lo = i; Cluster** hi = j; while (lo < hi) { // INVARIANT: //[i, lo) <= time_ns //[lo, hi) ? //[hi, j) > time_ns Cluster** const mid = lo + (hi - lo) / 2; assert(mid < hi); pCluster = *mid; assert(pCluster); assert(pCluster->GetIndex() >= 0); assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); const long long t = pCluster->GetTime(); if (t <= time_ns) lo = mid + 1; else hi = mid; assert(lo <= hi); } assert(lo == hi); assert(lo > i); assert(lo <= j); while (lo > i) { pCluster = *--lo; assert(pCluster); assert(pCluster->GetTime() <= time_ns); pResult = pCluster->GetEntry(this); if ((pResult != 0) && !pResult->EOS()) return 0; // landed on empty cluster (no entries) } pResult = GetEOS(); // weird return 0; } const ContentEncoding* Track::GetContentEncodingByIndex( unsigned long idx) const { const ptrdiff_t count = content_encoding_entries_end_ - content_encoding_entries_; assert(count >= 0); if (idx >= static_cast(count)) return NULL; return content_encoding_entries_[idx]; } unsigned long Track::GetContentEncodingCount() const { const ptrdiff_t count = content_encoding_entries_end_ - content_encoding_entries_; assert(count >= 0); return static_cast(count); } long Track::ParseContentEncodingsEntry(long long start, long long size) { IMkvReader* const pReader = m_pSegment->m_pReader; assert(pReader); long long pos = start; const long long stop = start + size; // Count ContentEncoding elements. int count = 0; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; // pos now designates start of element if (id == libwebm::kMkvContentEncoding) ++count; pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (count <= 0) return -1; content_encoding_entries_ = new (std::nothrow) ContentEncoding*[count]; if (!content_encoding_entries_) return -1; content_encoding_entries_end_ = content_encoding_entries_; pos = start; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; // pos now designates start of element if (id == libwebm::kMkvContentEncoding) { ContentEncoding* const content_encoding = new (std::nothrow) ContentEncoding(); if (!content_encoding) return -1; status = content_encoding->ParseContentEncodingEntry(pos, size, pReader); if (status) { delete content_encoding; return status; } *content_encoding_entries_end_++ = content_encoding; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; } Track::EOSBlock::EOSBlock() : BlockEntry(NULL, LONG_MIN) {} BlockEntry::Kind Track::EOSBlock::GetKind() const { return kBlockEOS; } const Block* Track::EOSBlock::GetBlock() const { return NULL; } bool PrimaryChromaticity::Parse(IMkvReader* reader, long long read_pos, long long value_size, bool is_x, PrimaryChromaticity** chromaticity) { if (!reader) return false; if (!*chromaticity) *chromaticity = new PrimaryChromaticity(); if (!*chromaticity) return false; PrimaryChromaticity* pc = *chromaticity; float* value = is_x ? &pc->x : &pc->y; double parser_value = 0; const long long parse_status = UnserializeFloat(reader, read_pos, value_size, parser_value); // Valid range is [0, 1]. Make sure the double is representable as a float // before casting. if (parse_status < 0 || parser_value < 0.0 || parser_value > 1.0 || (parser_value > 0.0 && parser_value < FLT_MIN)) return false; *value = static_cast(parser_value); return true; } bool MasteringMetadata::Parse(IMkvReader* reader, long long mm_start, long long mm_size, MasteringMetadata** mm) { if (!reader || *mm) return false; std::unique_ptr mm_ptr(new MasteringMetadata()); if (!mm_ptr.get()) return false; const long long mm_end = mm_start + mm_size; long long read_pos = mm_start; while (read_pos < mm_end) { long long child_id = 0; long long child_size = 0; const long long status = ParseElementHeader(reader, read_pos, mm_end, child_id, child_size); if (status < 0) return false; if (child_id == libwebm::kMkvLuminanceMax) { double value = 0; const long long value_parse_status = UnserializeFloat(reader, read_pos, child_size, value); if (value < -FLT_MAX || value > FLT_MAX || (value > 0.0 && value < FLT_MIN)) { return false; } mm_ptr->luminance_max = static_cast(value); if (value_parse_status < 0 || mm_ptr->luminance_max < 0.0 || mm_ptr->luminance_max > 9999.99) { return false; } } else if (child_id == libwebm::kMkvLuminanceMin) { double value = 0; const long long value_parse_status = UnserializeFloat(reader, read_pos, child_size, value); if (value < -FLT_MAX || value > FLT_MAX || (value > 0.0 && value < FLT_MIN)) { return false; } mm_ptr->luminance_min = static_cast(value); if (value_parse_status < 0 || mm_ptr->luminance_min < 0.0 || mm_ptr->luminance_min > 999.9999) { return false; } } else { bool is_x = false; PrimaryChromaticity** chromaticity; switch (child_id) { case libwebm::kMkvPrimaryRChromaticityX: case libwebm::kMkvPrimaryRChromaticityY: is_x = child_id == libwebm::kMkvPrimaryRChromaticityX; chromaticity = &mm_ptr->r; break; case libwebm::kMkvPrimaryGChromaticityX: case libwebm::kMkvPrimaryGChromaticityY: is_x = child_id == libwebm::kMkvPrimaryGChromaticityX; chromaticity = &mm_ptr->g; break; case libwebm::kMkvPrimaryBChromaticityX: case libwebm::kMkvPrimaryBChromaticityY: is_x = child_id == libwebm::kMkvPrimaryBChromaticityX; chromaticity = &mm_ptr->b; break; case libwebm::kMkvWhitePointChromaticityX: case libwebm::kMkvWhitePointChromaticityY: is_x = child_id == libwebm::kMkvWhitePointChromaticityX; chromaticity = &mm_ptr->white_point; break; default: return false; } const bool value_parse_status = PrimaryChromaticity::Parse( reader, read_pos, child_size, is_x, chromaticity); if (!value_parse_status) return false; } read_pos += child_size; if (read_pos > mm_end) return false; } *mm = mm_ptr.release(); return true; } bool Colour::Parse(IMkvReader* reader, long long colour_start, long long colour_size, Colour** colour) { if (!reader || *colour) return false; std::unique_ptr colour_ptr(new Colour()); if (!colour_ptr.get()) return false; const long long colour_end = colour_start + colour_size; long long read_pos = colour_start; while (read_pos < colour_end) { long long child_id = 0; long long child_size = 0; const long status = ParseElementHeader(reader, read_pos, colour_end, child_id, child_size); if (status < 0) return false; if (child_id == libwebm::kMkvMatrixCoefficients) { colour_ptr->matrix_coefficients = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->matrix_coefficients < 0) return false; } else if (child_id == libwebm::kMkvBitsPerChannel) { colour_ptr->bits_per_channel = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->bits_per_channel < 0) return false; } else if (child_id == libwebm::kMkvChromaSubsamplingHorz) { colour_ptr->chroma_subsampling_horz = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->chroma_subsampling_horz < 0) return false; } else if (child_id == libwebm::kMkvChromaSubsamplingVert) { colour_ptr->chroma_subsampling_vert = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->chroma_subsampling_vert < 0) return false; } else if (child_id == libwebm::kMkvCbSubsamplingHorz) { colour_ptr->cb_subsampling_horz = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->cb_subsampling_horz < 0) return false; } else if (child_id == libwebm::kMkvCbSubsamplingVert) { colour_ptr->cb_subsampling_vert = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->cb_subsampling_vert < 0) return false; } else if (child_id == libwebm::kMkvChromaSitingHorz) { colour_ptr->chroma_siting_horz = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->chroma_siting_horz < 0) return false; } else if (child_id == libwebm::kMkvChromaSitingVert) { colour_ptr->chroma_siting_vert = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->chroma_siting_vert < 0) return false; } else if (child_id == libwebm::kMkvRange) { colour_ptr->range = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->range < 0) return false; } else if (child_id == libwebm::kMkvTransferCharacteristics) { colour_ptr->transfer_characteristics = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->transfer_characteristics < 0) return false; } else if (child_id == libwebm::kMkvPrimaries) { colour_ptr->primaries = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->primaries < 0) return false; } else if (child_id == libwebm::kMkvMaxCLL) { colour_ptr->max_cll = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->max_cll < 0) return false; } else if (child_id == libwebm::kMkvMaxFALL) { colour_ptr->max_fall = UnserializeUInt(reader, read_pos, child_size); if (colour_ptr->max_fall < 0) return false; } else if (child_id == libwebm::kMkvMasteringMetadata) { if (!MasteringMetadata::Parse(reader, read_pos, child_size, &colour_ptr->mastering_metadata)) return false; } else { return false; } read_pos += child_size; if (read_pos > colour_end) return false; } *colour = colour_ptr.release(); return true; } bool Projection::Parse(IMkvReader* reader, long long start, long long size, Projection** projection) { if (!reader || *projection) return false; std::unique_ptr projection_ptr(new Projection()); if (!projection_ptr.get()) return false; const long long end = start + size; long long read_pos = start; while (read_pos < end) { long long child_id = 0; long long child_size = 0; const long long status = ParseElementHeader(reader, read_pos, end, child_id, child_size); if (status < 0) return false; if (child_id == libwebm::kMkvProjectionType) { long long projection_type = kTypeNotPresent; projection_type = UnserializeUInt(reader, read_pos, child_size); if (projection_type < 0) return false; projection_ptr->type = static_cast(projection_type); } else if (child_id == libwebm::kMkvProjectionPrivate) { unsigned char* data = SafeArrayAlloc(1, child_size); if (data == NULL) return false; const int status = reader->Read(read_pos, static_cast(child_size), data); if (status) { delete[] data; return false; } projection_ptr->private_data = data; projection_ptr->private_data_length = static_cast(child_size); } else { double value = 0; const long long value_parse_status = UnserializeFloat(reader, read_pos, child_size, value); // Make sure value is representable as a float before casting. if (value_parse_status < 0 || value < -FLT_MAX || value > FLT_MAX || (value > 0.0 && value < FLT_MIN)) { return false; } switch (child_id) { case libwebm::kMkvProjectionPoseYaw: projection_ptr->pose_yaw = static_cast(value); break; case libwebm::kMkvProjectionPosePitch: projection_ptr->pose_pitch = static_cast(value); break; case libwebm::kMkvProjectionPoseRoll: projection_ptr->pose_roll = static_cast(value); break; default: return false; } } read_pos += child_size; if (read_pos > end) return false; } *projection = projection_ptr.release(); return true; } VideoTrack::VideoTrack(Segment* pSegment, long long element_start, long long element_size) : Track(pSegment, element_start, element_size), m_colour_space(NULL), m_colour(NULL), m_projection(NULL) {} VideoTrack::~VideoTrack() { delete m_colour; delete m_projection; } long VideoTrack::Parse(Segment* pSegment, const Info& info, long long element_start, long long element_size, VideoTrack*& pResult) { if (pResult) return -1; if (info.type != Track::kVideo) return -1; long long width = 0; long long height = 0; long long display_width = 0; long long display_height = 0; long long display_unit = 0; long long stereo_mode = 0; double rate = 0.0; char* colour_space = NULL; IMkvReader* const pReader = pSegment->m_pReader; const Settings& s = info.settings; assert(s.start >= 0); assert(s.size >= 0); long long pos = s.start; assert(pos >= 0); const long long stop = pos + s.size; std::unique_ptr colour_ptr; std::unique_ptr projection_ptr; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvPixelWidth) { width = UnserializeUInt(pReader, pos, size); if (width <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvPixelHeight) { height = UnserializeUInt(pReader, pos, size); if (height <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDisplayWidth) { display_width = UnserializeUInt(pReader, pos, size); if (display_width <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDisplayHeight) { display_height = UnserializeUInt(pReader, pos, size); if (display_height <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvDisplayUnit) { display_unit = UnserializeUInt(pReader, pos, size); if (display_unit < 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvStereoMode) { stereo_mode = UnserializeUInt(pReader, pos, size); if (stereo_mode < 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvFrameRate) { const long status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) return status; if (rate <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvColour) { Colour* colour = NULL; if (!Colour::Parse(pReader, pos, size, &colour)) { return E_FILE_FORMAT_INVALID; } else { colour_ptr.reset(colour); } } else if (id == libwebm::kMkvProjection) { Projection* projection = NULL; if (!Projection::Parse(pReader, pos, size, &projection)) { return E_FILE_FORMAT_INVALID; } else { projection_ptr.reset(projection); } } else if (id == libwebm::kMkvColourSpace) { const long status = UnserializeString(pReader, pos, size, colour_space); if (status < 0) return status; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; VideoTrack* const pTrack = new (std::nothrow) VideoTrack(pSegment, element_start, element_size); if (pTrack == NULL) return -1; // generic error const int status = info.Copy(pTrack->m_info); if (status) { // error delete pTrack; return status; } pTrack->m_width = width; pTrack->m_height = height; pTrack->m_display_width = display_width; pTrack->m_display_height = display_height; pTrack->m_display_unit = display_unit; pTrack->m_stereo_mode = stereo_mode; pTrack->m_rate = rate; pTrack->m_colour = colour_ptr.release(); pTrack->m_colour_space = colour_space; pTrack->m_projection = projection_ptr.release(); pResult = pTrack; return 0; // success } bool VideoTrack::VetEntry(const BlockEntry* pBlockEntry) const { return Track::VetEntry(pBlockEntry) && pBlockEntry->GetBlock()->IsKey(); } long VideoTrack::Seek(long long time_ns, const BlockEntry*& pResult) const { const long status = GetFirst(pResult); if (status < 0) // buffer underflow, etc return status; assert(pResult); if (pResult->EOS()) return 0; const Cluster* pCluster = pResult->GetCluster(); assert(pCluster); assert(pCluster->GetIndex() >= 0); if (time_ns <= pResult->GetBlock()->GetTime(pCluster)) return 0; Cluster** const clusters = m_pSegment->m_clusters; assert(clusters); const long count = m_pSegment->GetCount(); // loaded only, not pre-loaded assert(count > 0); Cluster** const i = clusters + pCluster->GetIndex(); assert(i); assert(*i == pCluster); assert(pCluster->GetTime() <= time_ns); Cluster** const j = clusters + count; Cluster** lo = i; Cluster** hi = j; while (lo < hi) { // INVARIANT: //[i, lo) <= time_ns //[lo, hi) ? //[hi, j) > time_ns Cluster** const mid = lo + (hi - lo) / 2; assert(mid < hi); pCluster = *mid; assert(pCluster); assert(pCluster->GetIndex() >= 0); assert(pCluster->GetIndex() == long(mid - m_pSegment->m_clusters)); const long long t = pCluster->GetTime(); if (t <= time_ns) lo = mid + 1; else hi = mid; assert(lo <= hi); } assert(lo == hi); assert(lo > i); assert(lo <= j); pCluster = *--lo; assert(pCluster); assert(pCluster->GetTime() <= time_ns); pResult = pCluster->GetEntry(this, time_ns); if ((pResult != 0) && !pResult->EOS()) // found a keyframe return 0; while (lo != i) { pCluster = *--lo; assert(pCluster); assert(pCluster->GetTime() <= time_ns); pResult = pCluster->GetEntry(this, time_ns); if ((pResult != 0) && !pResult->EOS()) return 0; } // weird: we're on the first cluster, but no keyframe found // should never happen but we must return something anyway pResult = GetEOS(); return 0; } Colour* VideoTrack::GetColour() const { return m_colour; } Projection* VideoTrack::GetProjection() const { return m_projection; } long long VideoTrack::GetWidth() const { return m_width; } long long VideoTrack::GetHeight() const { return m_height; } long long VideoTrack::GetDisplayWidth() const { return m_display_width > 0 ? m_display_width : GetWidth(); } long long VideoTrack::GetDisplayHeight() const { return m_display_height > 0 ? m_display_height : GetHeight(); } long long VideoTrack::GetDisplayUnit() const { return m_display_unit; } long long VideoTrack::GetStereoMode() const { return m_stereo_mode; } double VideoTrack::GetFrameRate() const { return m_rate; } AudioTrack::AudioTrack(Segment* pSegment, long long element_start, long long element_size) : Track(pSegment, element_start, element_size) {} long AudioTrack::Parse(Segment* pSegment, const Info& info, long long element_start, long long element_size, AudioTrack*& pResult) { if (pResult) return -1; if (info.type != Track::kAudio) return -1; IMkvReader* const pReader = pSegment->m_pReader; const Settings& s = info.settings; assert(s.start >= 0); assert(s.size >= 0); long long pos = s.start; assert(pos >= 0); const long long stop = pos + s.size; double rate = 8000.0; // MKV default long long channels = 1; long long bit_depth = 0; while (pos < stop) { long long id, size; long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (id == libwebm::kMkvSamplingFrequency) { status = UnserializeFloat(pReader, pos, size, rate); if (status < 0) return status; if (rate <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvChannels) { channels = UnserializeUInt(pReader, pos, size); if (channels <= 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvBitDepth) { bit_depth = UnserializeUInt(pReader, pos, size); if (bit_depth <= 0) return E_FILE_FORMAT_INVALID; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; AudioTrack* const pTrack = new (std::nothrow) AudioTrack(pSegment, element_start, element_size); if (pTrack == NULL) return -1; // generic error const int status = info.Copy(pTrack->m_info); if (status) { delete pTrack; return status; } pTrack->m_rate = rate; pTrack->m_channels = channels; pTrack->m_bitDepth = bit_depth; pResult = pTrack; return 0; // success } double AudioTrack::GetSamplingRate() const { return m_rate; } long long AudioTrack::GetChannels() const { return m_channels; } long long AudioTrack::GetBitDepth() const { return m_bitDepth; } Tracks::Tracks(Segment* pSegment, long long start, long long size_, long long element_start, long long element_size) : m_pSegment(pSegment), m_start(start), m_size(size_), m_element_start(element_start), m_element_size(element_size), m_trackEntries(NULL), m_trackEntriesEnd(NULL) {} long Tracks::Parse() { assert(m_trackEntries == NULL); assert(m_trackEntriesEnd == NULL); const long long stop = m_start + m_size; IMkvReader* const pReader = m_pSegment->m_pReader; int count = 0; long long pos = m_start; while (pos < stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, stop, id, size); if (status < 0) // error return status; if (size == 0) // weird continue; if (id == libwebm::kMkvTrackEntry) ++count; pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; if (count <= 0) return 0; // success m_trackEntries = new (std::nothrow) Track*[count]; if (m_trackEntries == NULL) return -1; m_trackEntriesEnd = m_trackEntries; pos = m_start; while (pos < stop) { const long long element_start = pos; long long id, payload_size; const long status = ParseElementHeader(pReader, pos, stop, id, payload_size); if (status < 0) // error return status; if (payload_size == 0) // weird continue; const long long payload_stop = pos + payload_size; assert(payload_stop <= stop); // checked in ParseElement const long long element_size = payload_stop - element_start; if (id == libwebm::kMkvTrackEntry) { Track*& pTrack = *m_trackEntriesEnd; pTrack = NULL; const long status = ParseTrackEntry(pos, payload_size, element_start, element_size, pTrack); if (status) return status; if (pTrack) ++m_trackEntriesEnd; } pos = payload_stop; if (pos > stop) return E_FILE_FORMAT_INVALID; } if (pos != stop) return E_FILE_FORMAT_INVALID; return 0; // success } unsigned long Tracks::GetTracksCount() const { const ptrdiff_t result = m_trackEntriesEnd - m_trackEntries; assert(result >= 0); return static_cast(result); } long Tracks::ParseTrackEntry(long long track_start, long long track_size, long long element_start, long long element_size, Track*& pResult) const { if (pResult) return -1; IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = track_start; const long long track_stop = track_start + track_size; Track::Info info; info.type = 0; info.number = 0; info.uid = 0; info.defaultDuration = 0; Track::Settings v; v.start = -1; v.size = -1; Track::Settings a; a.start = -1; a.size = -1; Track::Settings e; // content_encodings_settings; e.start = -1; e.size = -1; long long lacing = 1; // default is true while (pos < track_stop) { long long id, size; const long status = ParseElementHeader(pReader, pos, track_stop, id, size); if (status < 0) // error return status; if (size < 0) return E_FILE_FORMAT_INVALID; const long long start = pos; if (id == libwebm::kMkvVideo) { v.start = start; v.size = size; } else if (id == libwebm::kMkvAudio) { a.start = start; a.size = size; } else if (id == libwebm::kMkvContentEncodings) { e.start = start; e.size = size; } else if (id == libwebm::kMkvTrackUID) { if (size > 8) return E_FILE_FORMAT_INVALID; info.uid = 0; long long pos_ = start; const long long pos_end = start + size; while (pos_ != pos_end) { unsigned char b; const int status = pReader->Read(pos_, 1, &b); if (status) return status; info.uid <<= 8; info.uid |= b; ++pos_; } } else if (id == libwebm::kMkvTrackNumber) { const long long num = UnserializeUInt(pReader, pos, size); if ((num <= 0) || (num > 127)) return E_FILE_FORMAT_INVALID; info.number = static_cast(num); } else if (id == libwebm::kMkvTrackType) { const long long type = UnserializeUInt(pReader, pos, size); if ((type <= 0) || (type > 254)) return E_FILE_FORMAT_INVALID; info.type = static_cast(type); } else if (id == libwebm::kMkvName) { const long status = UnserializeString(pReader, pos, size, info.nameAsUTF8); if (status) return status; } else if (id == libwebm::kMkvLanguage) { const long status = UnserializeString(pReader, pos, size, info.language); if (status) return status; } else if (id == libwebm::kMkvDefaultDuration) { const long long duration = UnserializeUInt(pReader, pos, size); if (duration < 0) return E_FILE_FORMAT_INVALID; info.defaultDuration = static_cast(duration); } else if (id == libwebm::kMkvCodecID) { const long status = UnserializeString(pReader, pos, size, info.codecId); if (status) return status; } else if (id == libwebm::kMkvFlagLacing) { lacing = UnserializeUInt(pReader, pos, size); if ((lacing < 0) || (lacing > 1)) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvCodecPrivate) { delete[] info.codecPrivate; info.codecPrivate = NULL; info.codecPrivateSize = 0; const size_t buflen = static_cast(size); if (buflen) { unsigned char* buf = SafeArrayAlloc(1, buflen); if (buf == NULL) return -1; const int status = pReader->Read(pos, static_cast(buflen), buf); if (status) { delete[] buf; return status; } info.codecPrivate = buf; info.codecPrivateSize = buflen; } } else if (id == libwebm::kMkvCodecName) { const long status = UnserializeString(pReader, pos, size, info.codecNameAsUTF8); if (status) return status; } else if (id == libwebm::kMkvCodecDelay) { info.codecDelay = UnserializeUInt(pReader, pos, size); } else if (id == libwebm::kMkvSeekPreRoll) { info.seekPreRoll = UnserializeUInt(pReader, pos, size); } pos += size; // consume payload if (pos > track_stop) return E_FILE_FORMAT_INVALID; } if (pos != track_stop) return E_FILE_FORMAT_INVALID; if (info.number <= 0) // not specified return E_FILE_FORMAT_INVALID; if (GetTrackByNumber(info.number)) return E_FILE_FORMAT_INVALID; if (info.type <= 0) // not specified return E_FILE_FORMAT_INVALID; info.lacing = (lacing > 0) ? true : false; if (info.type == Track::kVideo) { if (v.start < 0) return E_FILE_FORMAT_INVALID; if (a.start >= 0) return E_FILE_FORMAT_INVALID; info.settings = v; VideoTrack* pTrack = NULL; const long status = VideoTrack::Parse(m_pSegment, info, element_start, element_size, pTrack); if (status) return status; pResult = pTrack; assert(pResult); if (e.start >= 0) pResult->ParseContentEncodingsEntry(e.start, e.size); } else if (info.type == Track::kAudio) { if (a.start < 0) return E_FILE_FORMAT_INVALID; if (v.start >= 0) return E_FILE_FORMAT_INVALID; info.settings = a; AudioTrack* pTrack = NULL; const long status = AudioTrack::Parse(m_pSegment, info, element_start, element_size, pTrack); if (status) return status; pResult = pTrack; assert(pResult); if (e.start >= 0) pResult->ParseContentEncodingsEntry(e.start, e.size); } else { // neither video nor audio - probably metadata or subtitles if (a.start >= 0) return E_FILE_FORMAT_INVALID; if (v.start >= 0) return E_FILE_FORMAT_INVALID; if (info.type == Track::kMetadata && e.start >= 0) return E_FILE_FORMAT_INVALID; info.settings.start = -1; info.settings.size = 0; Track* pTrack = NULL; const long status = Track::Create(m_pSegment, info, element_start, element_size, pTrack); if (status) return status; pResult = pTrack; assert(pResult); } return 0; // success } Tracks::~Tracks() { Track** i = m_trackEntries; Track** const j = m_trackEntriesEnd; while (i != j) { Track* const pTrack = *i++; delete pTrack; } delete[] m_trackEntries; } const Track* Tracks::GetTrackByNumber(long tn) const { if (tn < 0) return NULL; Track** i = m_trackEntries; Track** const j = m_trackEntriesEnd; while (i != j) { Track* const pTrack = *i++; if (pTrack == NULL) continue; if (tn == pTrack->GetNumber()) return pTrack; } return NULL; // not found } const Track* Tracks::GetTrackByIndex(unsigned long idx) const { const ptrdiff_t count = m_trackEntriesEnd - m_trackEntries; if (idx >= static_cast(count)) return NULL; return m_trackEntries[idx]; } long Cluster::Load(long long& pos, long& len) const { if (m_pSegment == NULL) return E_PARSE_FAILED; if (m_timecode >= 0) // at least partially loaded return 0; if (m_pos != m_element_start || m_element_size >= 0) return E_PARSE_FAILED; IMkvReader* const pReader = m_pSegment->m_pReader; long long total, avail; const int status = pReader->Length(&total, &avail); if (status < 0) // error return status; if (total >= 0 && (avail > total || m_pos > total)) return E_FILE_FORMAT_INVALID; pos = m_pos; long long cluster_size = -1; if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error or underflow return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id_ = ReadID(pReader, pos, len); if (id_ < 0) // error return static_cast(id_); if (id_ != libwebm::kMkvCluster) return E_FILE_FORMAT_INVALID; pos += len; // consume id // read cluster size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(cluster_size); if (size == 0) return E_FILE_FORMAT_INVALID; pos += len; // consume length of size of element const long long unknown_size = (1LL << (7 * len)) - 1; if (size != unknown_size) cluster_size = size; // pos points to start of payload long long timecode = -1; long long new_pos = -1; bool bBlock = false; long long cluster_stop = (cluster_size < 0) ? -1 : pos + cluster_size; for (;;) { if ((cluster_stop >= 0) && (pos >= cluster_stop)) break; // Parse ID if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast(id); if (id == 0) return E_FILE_FORMAT_INVALID; // This is the distinguished set of ID's we use to determine // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. if (id == libwebm::kMkvCluster) break; if (id == libwebm::kMkvCues) break; pos += len; // consume ID field // Parse Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(size); const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) return E_FILE_FORMAT_INVALID; pos += len; // consume size field if ((cluster_stop >= 0) && (pos > cluster_stop)) return E_FILE_FORMAT_INVALID; // pos now points to start of payload if (size == 0) continue; if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; if (id == libwebm::kMkvTimecode) { len = static_cast(size); if ((pos + size) > avail) return E_BUFFER_NOT_FULL; timecode = UnserializeUInt(pReader, pos, size); if (timecode < 0) // error (or underflow) return static_cast(timecode); new_pos = pos + size; if (bBlock) break; } else if (id == libwebm::kMkvBlockGroup) { bBlock = true; break; } else if (id == libwebm::kMkvSimpleBlock) { bBlock = true; break; } pos += size; // consume payload if (cluster_stop >= 0 && pos > cluster_stop) return E_FILE_FORMAT_INVALID; } if (cluster_stop >= 0 && pos > cluster_stop) return E_FILE_FORMAT_INVALID; if (timecode < 0) // no timecode found return E_FILE_FORMAT_INVALID; if (!bBlock) return E_FILE_FORMAT_INVALID; m_pos = new_pos; // designates position just beyond timecode payload m_timecode = timecode; // m_timecode >= 0 means we're partially loaded if (cluster_size >= 0) m_element_size = cluster_stop - m_element_start; return 0; } long Cluster::Parse(long long& pos, long& len) const { long status = Load(pos, len); if (status < 0) return status; if (m_pos < m_element_start || m_timecode < 0) return E_PARSE_FAILED; const long long cluster_stop = (m_element_size < 0) ? -1 : m_element_start + m_element_size; if ((cluster_stop >= 0) && (m_pos >= cluster_stop)) return 1; // nothing else to do IMkvReader* const pReader = m_pSegment->m_pReader; long long total, avail; status = pReader->Length(&total, &avail); if (status < 0) // error return status; if (total >= 0 && avail > total) return E_FILE_FORMAT_INVALID; pos = m_pos; for (;;) { if ((cluster_stop >= 0) && (pos >= cluster_stop)) break; if ((total >= 0) && (pos >= total)) { if (m_element_size < 0) m_element_size = pos - m_element_start; break; } // Parse ID if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadID(pReader, pos, len); if (id < 0) return E_FILE_FORMAT_INVALID; // This is the distinguished set of ID's we use to determine // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. if ((id == libwebm::kMkvCluster) || (id == libwebm::kMkvCues)) { if (m_element_size < 0) m_element_size = pos - m_element_start; break; } pos += len; // consume ID field // Parse Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(size); const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) return E_FILE_FORMAT_INVALID; pos += len; // consume size field if ((cluster_stop >= 0) && (pos > cluster_stop)) return E_FILE_FORMAT_INVALID; // pos now points to start of payload if (size == 0) continue; // const long long block_start = pos; const long long block_stop = pos + size; if (cluster_stop >= 0) { if (block_stop > cluster_stop) { if (id == libwebm::kMkvBlockGroup || id == libwebm::kMkvSimpleBlock) { return E_FILE_FORMAT_INVALID; } pos = cluster_stop; break; } } else if ((total >= 0) && (block_stop > total)) { m_element_size = total - m_element_start; pos = total; break; } else if (block_stop > avail) { len = static_cast(size); return E_BUFFER_NOT_FULL; } Cluster* const this_ = const_cast(this); if (id == libwebm::kMkvBlockGroup) return this_->ParseBlockGroup(size, pos, len); if (id == libwebm::kMkvSimpleBlock) return this_->ParseSimpleBlock(size, pos, len); pos += size; // consume payload if (cluster_stop >= 0 && pos > cluster_stop) return E_FILE_FORMAT_INVALID; } if (m_element_size < 1) return E_FILE_FORMAT_INVALID; m_pos = pos; if (cluster_stop >= 0 && m_pos > cluster_stop) return E_FILE_FORMAT_INVALID; if (m_entries_count > 0) { const long idx = m_entries_count - 1; const BlockEntry* const pLast = m_entries[idx]; if (pLast == NULL) return E_PARSE_FAILED; const Block* const pBlock = pLast->GetBlock(); if (pBlock == NULL) return E_PARSE_FAILED; const long long start = pBlock->m_start; if ((total >= 0) && (start > total)) return E_PARSE_FAILED; // defend against trucated stream const long long size = pBlock->m_size; const long long stop = start + size; if (cluster_stop >= 0 && stop > cluster_stop) return E_FILE_FORMAT_INVALID; if ((total >= 0) && (stop > total)) return E_PARSE_FAILED; // defend against trucated stream } return 1; // no more entries } long Cluster::ParseSimpleBlock(long long block_size, long long& pos, long& len) { const long long block_start = pos; const long long block_stop = pos + block_size; IMkvReader* const pReader = m_pSegment->m_pReader; long long total, avail; long status = pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); // parse track number if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((pos + len) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long track = ReadUInt(pReader, pos, len); if (track < 0) // error return static_cast(track); if (track == 0) return E_FILE_FORMAT_INVALID; pos += len; // consume track number if ((pos + 2) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + 2) > avail) { len = 2; return E_BUFFER_NOT_FULL; } pos += 2; // consume timecode if ((pos + 1) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } unsigned char flags; status = pReader->Read(pos, 1, &flags); if (status < 0) { // error or underflow len = 1; return status; } ++pos; // consume flags byte assert(pos <= avail); if (pos >= block_stop) return E_FILE_FORMAT_INVALID; const int lacing = int(flags & 0x06) >> 1; if ((lacing != 0) && (block_stop > avail)) { len = static_cast(block_stop - pos); return E_BUFFER_NOT_FULL; } status = CreateBlock(libwebm::kMkvSimpleBlock, block_start, block_size, 0); // DiscardPadding if (status != 0) return status; m_pos = block_stop; return 0; // success } long Cluster::ParseBlockGroup(long long payload_size, long long& pos, long& len) { const long long payload_start = pos; const long long payload_stop = pos + payload_size; IMkvReader* const pReader = m_pSegment->m_pReader; long long total, avail; long status = pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); if ((total >= 0) && (payload_stop > total)) return E_FILE_FORMAT_INVALID; if (payload_stop > avail) { len = static_cast(payload_size); return E_BUFFER_NOT_FULL; } long long discard_padding = 0; while (pos < payload_stop) { // parse sub-block element ID if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((pos + len) > payload_stop) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast(id); if (id == 0) // not a valid ID return E_FILE_FORMAT_INVALID; pos += len; // consume ID field // Parse Size if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((pos + len) > payload_stop) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume size field // pos now points to start of sub-block group payload if (pos > payload_stop) return E_FILE_FORMAT_INVALID; if (size == 0) // weird continue; const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) return E_FILE_FORMAT_INVALID; if (id == libwebm::kMkvDiscardPadding) { status = UnserializeInt(pReader, pos, size, discard_padding); if (status < 0) // error return status; } if (id != libwebm::kMkvBlock) { pos += size; // consume sub-part of block group if (pos > payload_stop) return E_FILE_FORMAT_INVALID; continue; } const long long block_stop = pos + size; if (block_stop > payload_stop) return E_FILE_FORMAT_INVALID; // parse track number if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((pos + len) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long track = ReadUInt(pReader, pos, len); if (track < 0) // error return static_cast(track); if (track == 0) return E_FILE_FORMAT_INVALID; pos += len; // consume track number if ((pos + 2) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + 2) > avail) { len = 2; return E_BUFFER_NOT_FULL; } pos += 2; // consume timecode if ((pos + 1) > block_stop) return E_FILE_FORMAT_INVALID; if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } unsigned char flags; status = pReader->Read(pos, 1, &flags); if (status < 0) { // error or underflow len = 1; return status; } ++pos; // consume flags byte assert(pos <= avail); if (pos >= block_stop) return E_FILE_FORMAT_INVALID; const int lacing = int(flags & 0x06) >> 1; if ((lacing != 0) && (block_stop > avail)) { len = static_cast(block_stop - pos); return E_BUFFER_NOT_FULL; } pos = block_stop; // consume block-part of block group if (pos > payload_stop) return E_FILE_FORMAT_INVALID; } if (pos != payload_stop) return E_FILE_FORMAT_INVALID; status = CreateBlock(libwebm::kMkvBlockGroup, payload_start, payload_size, discard_padding); if (status != 0) return status; m_pos = payload_stop; return 0; // success } long Cluster::GetEntry(long index, const mkvparser::BlockEntry*& pEntry) const { assert(m_pos >= m_element_start); pEntry = NULL; if (index < 0) return -1; // generic error if (m_entries_count < 0) return E_BUFFER_NOT_FULL; assert(m_entries); assert(m_entries_size > 0); assert(m_entries_count <= m_entries_size); if (index < m_entries_count) { pEntry = m_entries[index]; assert(pEntry); return 1; // found entry } if (m_element_size < 0) // we don't know cluster end yet return E_BUFFER_NOT_FULL; // underflow const long long element_stop = m_element_start + m_element_size; if (m_pos >= element_stop) return 0; // nothing left to parse return E_BUFFER_NOT_FULL; // underflow, since more remains to be parsed } Cluster* Cluster::Create(Segment* pSegment, long idx, long long off) { if (!pSegment || off < 0) return NULL; const long long element_start = pSegment->m_start + off; Cluster* const pCluster = new (std::nothrow) Cluster(pSegment, idx, element_start); return pCluster; } Cluster::Cluster() : m_pSegment(NULL), m_element_start(0), m_index(0), m_pos(0), m_element_size(0), m_timecode(0), m_entries(NULL), m_entries_size(0), m_entries_count(0) // means "no entries" {} Cluster::Cluster(Segment* pSegment, long idx, long long element_start /* long long element_size */) : m_pSegment(pSegment), m_element_start(element_start), m_index(idx), m_pos(element_start), m_element_size(-1 /* element_size */), m_timecode(-1), m_entries(NULL), m_entries_size(0), m_entries_count(-1) // means "has not been parsed yet" {} Cluster::~Cluster() { if (m_entries_count <= 0) { delete[] m_entries; return; } BlockEntry** i = m_entries; BlockEntry** const j = m_entries + m_entries_count; while (i != j) { BlockEntry* p = *i++; assert(p); delete p; } delete[] m_entries; } bool Cluster::EOS() const { return (m_pSegment == NULL); } long Cluster::GetIndex() const { return m_index; } long long Cluster::GetPosition() const { const long long pos = m_element_start - m_pSegment->m_start; assert(pos >= 0); return pos; } long long Cluster::GetElementSize() const { return m_element_size; } long Cluster::HasBlockEntries( const Segment* pSegment, long long off, // relative to start of segment payload long long& pos, long& len) { assert(pSegment); assert(off >= 0); // relative to segment IMkvReader* const pReader = pSegment->m_pReader; long long total, avail; long status = pReader->Length(&total, &avail); if (status < 0) // error return status; assert((total < 0) || (avail <= total)); pos = pSegment->m_start + off; // absolute if ((total >= 0) && (pos >= total)) return 0; // we don't even have a complete cluster const long long segment_stop = (pSegment->m_size < 0) ? -1 : pSegment->m_start + pSegment->m_size; long long cluster_stop = -1; // interpreted later to mean "unknown size" { if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // need more data return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((total >= 0) && ((pos + len) > total)) return 0; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast(id); if (id != libwebm::kMkvCluster) return E_PARSE_FAILED; pos += len; // consume Cluster ID field // read size field if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // weird return E_BUFFER_NOT_FULL; if ((segment_stop >= 0) && ((pos + len) > segment_stop)) return E_FILE_FORMAT_INVALID; if ((total >= 0) && ((pos + len) > total)) return 0; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(size); if (size == 0) return 0; // cluster does not have entries pos += len; // consume size field // pos now points to start of payload const long long unknown_size = (1LL << (7 * len)) - 1; if (size != unknown_size) { cluster_stop = pos + size; assert(cluster_stop >= 0); if ((segment_stop >= 0) && (cluster_stop > segment_stop)) return E_FILE_FORMAT_INVALID; if ((total >= 0) && (cluster_stop > total)) // return E_FILE_FORMAT_INVALID; //too conservative return 0; // cluster does not have any entries } } for (;;) { if ((cluster_stop >= 0) && (pos >= cluster_stop)) return 0; // no entries detected if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } long long result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // need more data return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long id = ReadID(pReader, pos, len); if (id < 0) // error return static_cast(id); // This is the distinguished set of ID's we use to determine // that we have exhausted the sub-element's inside the cluster // whose ID we parsed earlier. if (id == libwebm::kMkvCluster) return 0; // no entries found if (id == libwebm::kMkvCues) return 0; // no entries found pos += len; // consume id field if ((cluster_stop >= 0) && (pos >= cluster_stop)) return E_FILE_FORMAT_INVALID; // read size field if ((pos + 1) > avail) { len = 1; return E_BUFFER_NOT_FULL; } result = GetUIntLength(pReader, pos, len); if (result < 0) // error return static_cast(result); if (result > 0) // underflow return E_BUFFER_NOT_FULL; if ((cluster_stop >= 0) && ((pos + len) > cluster_stop)) return E_FILE_FORMAT_INVALID; if ((pos + len) > avail) return E_BUFFER_NOT_FULL; const long long size = ReadUInt(pReader, pos, len); if (size < 0) // error return static_cast(size); pos += len; // consume size field // pos now points to start of payload if ((cluster_stop >= 0) && (pos > cluster_stop)) return E_FILE_FORMAT_INVALID; if (size == 0) // weird continue; const long long unknown_size = (1LL << (7 * len)) - 1; if (size == unknown_size) return E_FILE_FORMAT_INVALID; // not supported inside cluster if ((cluster_stop >= 0) && ((pos + size) > cluster_stop)) return E_FILE_FORMAT_INVALID; if (id == libwebm::kMkvBlockGroup) return 1; // have at least one entry if (id == libwebm::kMkvSimpleBlock) return 1; // have at least one entry pos += size; // consume payload if (cluster_stop >= 0 && pos > cluster_stop) return E_FILE_FORMAT_INVALID; } } long long Cluster::GetTimeCode() const { long long pos; long len; const long status = Load(pos, len); if (status < 0) // error return status; return m_timecode; } long long Cluster::GetTime() const { const long long tc = GetTimeCode(); if (tc < 0) return tc; const SegmentInfo* const pInfo = m_pSegment->GetInfo(); assert(pInfo); const long long scale = pInfo->GetTimeCodeScale(); assert(scale >= 1); const long long t = m_timecode * scale; return t; } long long Cluster::GetFirstTime() const { const BlockEntry* pEntry; const long status = GetFirst(pEntry); if (status < 0) // error return status; if (pEntry == NULL) // empty cluster return GetTime(); const Block* const pBlock = pEntry->GetBlock(); assert(pBlock); return pBlock->GetTime(this); } long long Cluster::GetLastTime() const { const BlockEntry* pEntry; const long status = GetLast(pEntry); if (status < 0) // error return status; if (pEntry == NULL) // empty cluster return GetTime(); const Block* const pBlock = pEntry->GetBlock(); assert(pBlock); return pBlock->GetTime(this); } long Cluster::CreateBlock(long long id, long long pos, // absolute pos of payload long long size, long long discard_padding) { if (id != libwebm::kMkvBlockGroup && id != libwebm::kMkvSimpleBlock) return E_PARSE_FAILED; if (m_entries_count < 0) { // haven't parsed anything yet assert(m_entries == NULL); assert(m_entries_size == 0); m_entries_size = 1024; m_entries = new (std::nothrow) BlockEntry*[m_entries_size]; if (m_entries == NULL) return -1; m_entries_count = 0; } else { assert(m_entries); assert(m_entries_size > 0); assert(m_entries_count <= m_entries_size); if (m_entries_count >= m_entries_size) { const long entries_size = 2 * m_entries_size; BlockEntry** const entries = new (std::nothrow) BlockEntry*[entries_size]; if (entries == NULL) return -1; BlockEntry** src = m_entries; BlockEntry** const src_end = src + m_entries_count; BlockEntry** dst = entries; while (src != src_end) *dst++ = *src++; delete[] m_entries; m_entries = entries; m_entries_size = entries_size; } } if (id == libwebm::kMkvBlockGroup) return CreateBlockGroup(pos, size, discard_padding); else return CreateSimpleBlock(pos, size); } long Cluster::CreateBlockGroup(long long start_offset, long long size, long long discard_padding) { assert(m_entries); assert(m_entries_size > 0); assert(m_entries_count >= 0); assert(m_entries_count < m_entries_size); IMkvReader* const pReader = m_pSegment->m_pReader; long long pos = start_offset; const long long stop = start_offset + size; // For WebM files, there is a bias towards previous reference times //(in order to support alt-ref frames, which refer back to the previous // keyframe). Normally a 0 value is not possible, but here we tenatively // allow 0 as the value of a reference frame, with the interpretation // that this is a "previous" reference time. long long prev = 1; // nonce long long next = 0; // nonce long long duration = -1; // really, this is unsigned long long bpos = -1; long long bsize = -1; while (pos < stop) { long len; const long long id = ReadID(pReader, pos, len); if (id < 0 || (pos + len) > stop) return E_FILE_FORMAT_INVALID; pos += len; // consume ID const long long size = ReadUInt(pReader, pos, len); assert(size >= 0); // TODO assert((pos + len) <= stop); pos += len; // consume size if (id == libwebm::kMkvBlock) { if (bpos < 0) { // Block ID bpos = pos; bsize = size; } } else if (id == libwebm::kMkvBlockDuration) { if (size > 8) return E_FILE_FORMAT_INVALID; duration = UnserializeUInt(pReader, pos, size); if (duration < 0) return E_FILE_FORMAT_INVALID; } else if (id == libwebm::kMkvReferenceBlock) { if (size > 8 || size <= 0) return E_FILE_FORMAT_INVALID; const long size_ = static_cast(size); long long time; long status = UnserializeInt(pReader, pos, size_, time); assert(status == 0); if (status != 0) return -1; if (time <= 0) // see note above prev = time; else next = time; } pos += size; // consume payload if (pos > stop) return E_FILE_FORMAT_INVALID; } if (bpos < 0) return E_FILE_FORMAT_INVALID; if (pos != stop) return E_FILE_FORMAT_INVALID; assert(bsize >= 0); const long idx = m_entries_count; BlockEntry** const ppEntry = m_entries + idx; BlockEntry*& pEntry = *ppEntry; pEntry = new (std::nothrow) BlockGroup(this, idx, bpos, bsize, prev, next, duration, discard_padding); if (pEntry == NULL) return -1; // generic error BlockGroup* const p = static_cast(pEntry); const long status = p->Parse(); if (status == 0) { // success ++m_entries_count; return 0; } delete pEntry; pEntry = 0; return status; } long Cluster::CreateSimpleBlock(long long st, long long sz) { assert(m_entries); assert(m_entries_size > 0); assert(m_entries_count >= 0); assert(m_entries_count < m_entries_size); const long idx = m_entries_count; BlockEntry** const ppEntry = m_entries + idx; BlockEntry*& pEntry = *ppEntry; pEntry = new (std::nothrow) SimpleBlock(this, idx, st, sz); if (pEntry == NULL) return -1; // generic error SimpleBlock* const p = static_cast(pEntry); const long status = p->Parse(); if (status == 0) { ++m_entries_count; return 0; } delete pEntry; pEntry = 0; return status; } long Cluster::GetFirst(const BlockEntry*& pFirst) const { if (m_entries_count <= 0) { long long pos; long len; const long status = Parse(pos, len); if (status < 0) { // error pFirst = NULL; return status; } if (m_entries_count <= 0) { // empty cluster pFirst = NULL; return 0; } } assert(m_entries); pFirst = m_entries[0]; assert(pFirst); return 0; // success } long Cluster::GetLast(const BlockEntry*& pLast) const { for (;;) { long long pos; long len; const long status = Parse(pos, len); if (status < 0) { // error pLast = NULL; return status; } if (status > 0) // no new block break; } if (m_entries_count <= 0) { pLast = NULL; return 0; } assert(m_entries); const long idx = m_entries_count - 1; pLast = m_entries[idx]; assert(pLast); return 0; } long Cluster::GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const { assert(pCurr); assert(m_entries); assert(m_entries_count > 0); size_t idx = pCurr->GetIndex(); assert(idx < size_t(m_entries_count)); assert(m_entries[idx] == pCurr); ++idx; if (idx >= size_t(m_entries_count)) { long long pos; long len; const long status = Parse(pos, len); if (status < 0) { // error pNext = NULL; return status; } if (status > 0) { pNext = NULL; return 0; } assert(m_entries); assert(m_entries_count > 0); assert(idx < size_t(m_entries_count)); } pNext = m_entries[idx]; assert(pNext); return 0; } long Cluster::GetEntryCount() const { return m_entries_count; } const BlockEntry* Cluster::GetEntry(const Track* pTrack, long long time_ns) const { assert(pTrack); if (m_pSegment == NULL) // this is the special EOS cluster return pTrack->GetEOS(); const BlockEntry* pResult = pTrack->GetEOS(); long index = 0; for (;;) { if (index >= m_entries_count) { long long pos; long len; const long status = Parse(pos, len); assert(status >= 0); if (status > 0) // completely parsed, and no more entries return pResult; if (status < 0) // should never happen return 0; assert(m_entries); assert(index < m_entries_count); } const BlockEntry* const pEntry = m_entries[index]; assert(pEntry); assert(!pEntry->EOS()); const Block* const pBlock = pEntry->GetBlock(); assert(pBlock); if (pBlock->GetTrackNumber() != pTrack->GetNumber()) { ++index; continue; } if (pTrack->VetEntry(pEntry)) { if (time_ns < 0) // just want first candidate block return pEntry; const long long ns = pBlock->GetTime(this); if (ns > time_ns) return pResult; pResult = pEntry; // have a candidate } else if (time_ns >= 0) { const long long ns = pBlock->GetTime(this); if (ns > time_ns) return pResult; } ++index; } } const BlockEntry* Cluster::GetEntry(const CuePoint& cp, const CuePoint::TrackPosition& tp) const { assert(m_pSegment); const long long tc = cp.GetTimeCode(); if (tp.m_block > 0) { const long block = static_cast(tp.m_block); const long index = block - 1; while (index >= m_entries_count) { long long pos; long len; const long status = Parse(pos, len); if (status < 0) // TODO: can this happen? return NULL; if (status > 0) // nothing remains to be parsed return NULL; } const BlockEntry* const pEntry = m_entries[index]; assert(pEntry); assert(!pEntry->EOS()); const Block* const pBlock = pEntry->GetBlock(); assert(pBlock); if ((pBlock->GetTrackNumber() == tp.m_track) && (pBlock->GetTimeCode(this) == tc)) { return pEntry; } } long index = 0; for (;;) { if (index >= m_entries_count) { long long pos; long len; const long status = Parse(pos, len); if (status < 0) // TODO: can this happen? return NULL; if (status > 0) // nothing remains to be parsed return NULL; assert(m_entries); assert(index < m_entries_count); } const BlockEntry* const pEntry = m_entries[index]; assert(pEntry); assert(!pEntry->EOS()); const Block* const pBlock = pEntry->GetBlock(); assert(pBlock); if (pBlock->GetTrackNumber() != tp.m_track) { ++index; continue; } const long long tc_ = pBlock->GetTimeCode(this); if (tc_ < tc) { ++index; continue; } if (tc_ > tc) return NULL; const Tracks* const pTracks = m_pSegment->GetTracks(); assert(pTracks); const long tn = static_cast(tp.m_track); const Track* const pTrack = pTracks->GetTrackByNumber(tn); if (pTrack == NULL) return NULL; const long long type = pTrack->GetType(); if (type == 2) // audio return pEntry; if (type != 1) // not video return NULL; if (!pBlock->IsKey()) return NULL; return pEntry; } } BlockEntry::BlockEntry(Cluster* p, long idx) : m_pCluster(p), m_index(idx) {} BlockEntry::~BlockEntry() {} const Cluster* BlockEntry::GetCluster() const { return m_pCluster; } long BlockEntry::GetIndex() const { return m_index; } SimpleBlock::SimpleBlock(Cluster* pCluster, long idx, long long start, long long size) : BlockEntry(pCluster, idx), m_block(start, size, 0) {} long SimpleBlock::Parse() { return m_block.Parse(m_pCluster); } BlockEntry::Kind SimpleBlock::GetKind() const { return kBlockSimple; } const Block* SimpleBlock::GetBlock() const { return &m_block; } BlockGroup::BlockGroup(Cluster* pCluster, long idx, long long block_start, long long block_size, long long prev, long long next, long long duration, long long discard_padding) : BlockEntry(pCluster, idx), m_block(block_start, block_size, discard_padding), m_prev(prev), m_next(next), m_duration(duration) {} long BlockGroup::Parse() { const long status = m_block.Parse(m_pCluster); if (status) return status; m_block.SetKey((m_prev > 0) && (m_next <= 0)); return 0; } BlockEntry::Kind BlockGroup::GetKind() const { return kBlockGroup; } const Block* BlockGroup::GetBlock() const { return &m_block; } long long BlockGroup::GetPrevTimeCode() const { return m_prev; } long long BlockGroup::GetNextTimeCode() const { return m_next; } long long BlockGroup::GetDurationTimeCode() const { return m_duration; } Block::Block(long long start, long long size_, long long discard_padding) : m_start(start), m_size(size_), m_track(0), m_timecode(-1), m_flags(0), m_frames(NULL), m_frame_count(-1), m_discard_padding(discard_padding) {} Block::~Block() { delete[] m_frames; } long Block::Parse(const Cluster* pCluster) { if (pCluster == NULL) return -1; if (pCluster->m_pSegment == NULL) return -1; assert(m_start >= 0); assert(m_size >= 0); assert(m_track <= 0); assert(m_frames == NULL); assert(m_frame_count <= 0); long long pos = m_start; const long long stop = m_start + m_size; long len; IMkvReader* const pReader = pCluster->m_pSegment->m_pReader; m_track = ReadUInt(pReader, pos, len); if (m_track <= 0) return E_FILE_FORMAT_INVALID; if ((pos + len) > stop) return E_FILE_FORMAT_INVALID; pos += len; // consume track number if ((stop - pos) < 2) return E_FILE_FORMAT_INVALID; long status; long long value; status = UnserializeInt(pReader, pos, 2, value); if (status) return E_FILE_FORMAT_INVALID; if (value < SHRT_MIN) return E_FILE_FORMAT_INVALID; if (value > SHRT_MAX) return E_FILE_FORMAT_INVALID; m_timecode = static_cast(value); pos += 2; if ((stop - pos) <= 0) return E_FILE_FORMAT_INVALID; status = pReader->Read(pos, 1, &m_flags); if (status) return E_FILE_FORMAT_INVALID; const int lacing = int(m_flags & 0x06) >> 1; ++pos; // consume flags byte if (lacing == 0) { // no lacing if (pos > stop) return E_FILE_FORMAT_INVALID; m_frame_count = 1; m_frames = new (std::nothrow) Frame[m_frame_count]; if (m_frames == NULL) return -1; Frame& f = m_frames[0]; f.pos = pos; const long long frame_size = stop - pos; if (frame_size > LONG_MAX || frame_size <= 0) return E_FILE_FORMAT_INVALID; f.len = static_cast(frame_size); return 0; // success } if (pos >= stop) return E_FILE_FORMAT_INVALID; unsigned char biased_count; status = pReader->Read(pos, 1, &biased_count); if (status) return E_FILE_FORMAT_INVALID; ++pos; // consume frame count if (pos > stop) return E_FILE_FORMAT_INVALID; m_frame_count = int(biased_count) + 1; m_frames = new (std::nothrow) Frame[m_frame_count]; if (m_frames == NULL) return -1; if (!m_frames) return E_FILE_FORMAT_INVALID; if (lacing == 1) { // Xiph Frame* pf = m_frames; Frame* const pf_end = pf + m_frame_count; long long size = 0; int frame_count = m_frame_count; while (frame_count > 1) { long frame_size = 0; for (;;) { unsigned char val; if (pos >= stop) return E_FILE_FORMAT_INVALID; status = pReader->Read(pos, 1, &val); if (status) return E_FILE_FORMAT_INVALID; ++pos; // consume xiph size byte frame_size += val; if (val < 255) break; } Frame& f = *pf++; assert(pf < pf_end); if (pf >= pf_end) return E_FILE_FORMAT_INVALID; f.pos = 0; // patch later if (frame_size <= 0) return E_FILE_FORMAT_INVALID; f.len = frame_size; size += frame_size; // contribution of this frame --frame_count; } if (pf >= pf_end || pos > stop) return E_FILE_FORMAT_INVALID; { Frame& f = *pf++; if (pf != pf_end) return E_FILE_FORMAT_INVALID; f.pos = 0; // patch later const long long total_size = stop - pos; if (total_size < size) return E_FILE_FORMAT_INVALID; const long long frame_size = total_size - size; if (frame_size > LONG_MAX || frame_size <= 0) return E_FILE_FORMAT_INVALID; f.len = static_cast(frame_size); } pf = m_frames; while (pf != pf_end) { Frame& f = *pf++; assert((pos + f.len) <= stop); if ((pos + f.len) > stop) return E_FILE_FORMAT_INVALID; f.pos = pos; pos += f.len; } assert(pos == stop); if (pos != stop) return E_FILE_FORMAT_INVALID; } else if (lacing == 2) { // fixed-size lacing if (pos >= stop) return E_FILE_FORMAT_INVALID; const long long total_size = stop - pos; if ((total_size % m_frame_count) != 0) return E_FILE_FORMAT_INVALID; const long long frame_size = total_size / m_frame_count; if (frame_size > LONG_MAX || frame_size <= 0) return E_FILE_FORMAT_INVALID; Frame* pf = m_frames; Frame* const pf_end = pf + m_frame_count; while (pf != pf_end) { assert((pos + frame_size) <= stop); if ((pos + frame_size) > stop) return E_FILE_FORMAT_INVALID; Frame& f = *pf++; f.pos = pos; f.len = static_cast(frame_size); pos += frame_size; } assert(pos == stop); if (pos != stop) return E_FILE_FORMAT_INVALID; } else { assert(lacing == 3); // EBML lacing if (pos >= stop) return E_FILE_FORMAT_INVALID; long long size = 0; int frame_count = m_frame_count; long long frame_size = ReadUInt(pReader, pos, len); if (frame_size <= 0) return E_FILE_FORMAT_INVALID; if (frame_size > LONG_MAX) return E_FILE_FORMAT_INVALID; if ((pos + len) > stop) return E_FILE_FORMAT_INVALID; pos += len; // consume length of size of first frame if ((pos + frame_size) > stop) return E_FILE_FORMAT_INVALID; Frame* pf = m_frames; Frame* const pf_end = pf + m_frame_count; { Frame& curr = *pf; curr.pos = 0; // patch later curr.len = static_cast(frame_size); size += curr.len; // contribution of this frame } --frame_count; while (frame_count > 1) { if (pos >= stop) return E_FILE_FORMAT_INVALID; assert(pf < pf_end); if (pf >= pf_end) return E_FILE_FORMAT_INVALID; const Frame& prev = *pf++; assert(prev.len == frame_size); if (prev.len != frame_size) return E_FILE_FORMAT_INVALID; assert(pf < pf_end); if (pf >= pf_end) return E_FILE_FORMAT_INVALID; Frame& curr = *pf; curr.pos = 0; // patch later const long long delta_size_ = ReadUInt(pReader, pos, len); if (delta_size_ < 0) return E_FILE_FORMAT_INVALID; if ((pos + len) > stop) return E_FILE_FORMAT_INVALID; pos += len; // consume length of (delta) size if (pos > stop) return E_FILE_FORMAT_INVALID; const long exp = 7 * len - 1; const long long bias = (1LL << exp) - 1LL; const long long delta_size = delta_size_ - bias; frame_size += delta_size; if (frame_size <= 0) return E_FILE_FORMAT_INVALID; if (frame_size > LONG_MAX) return E_FILE_FORMAT_INVALID; curr.len = static_cast(frame_size); // Check if size + curr.len could overflow. if (size > LLONG_MAX - curr.len) { return E_FILE_FORMAT_INVALID; } size += curr.len; // contribution of this frame --frame_count; } // parse last frame if (frame_count > 0) { if (pos > stop || pf >= pf_end) return E_FILE_FORMAT_INVALID; const Frame& prev = *pf++; assert(prev.len == frame_size); if (prev.len != frame_size) return E_FILE_FORMAT_INVALID; if (pf >= pf_end) return E_FILE_FORMAT_INVALID; Frame& curr = *pf++; if (pf != pf_end) return E_FILE_FORMAT_INVALID; curr.pos = 0; // patch later const long long total_size = stop - pos; if (total_size < size) return E_FILE_FORMAT_INVALID; frame_size = total_size - size; if (frame_size > LONG_MAX || frame_size <= 0) return E_FILE_FORMAT_INVALID; curr.len = static_cast(frame_size); } pf = m_frames; while (pf != pf_end) { Frame& f = *pf++; if ((pos + f.len) > stop) return E_FILE_FORMAT_INVALID; f.pos = pos; pos += f.len; } if (pos != stop) return E_FILE_FORMAT_INVALID; } return 0; // success } long long Block::GetTimeCode(const Cluster* pCluster) const { if (pCluster == 0) return m_timecode; const long long tc0 = pCluster->GetTimeCode(); assert(tc0 >= 0); // Check if tc0 + m_timecode would overflow. if (tc0 < 0 || LLONG_MAX - tc0 < m_timecode) { return -1; } const long long tc = tc0 + m_timecode; return tc; // unscaled timecode units } long long Block::GetTime(const Cluster* pCluster) const { assert(pCluster); const long long tc = GetTimeCode(pCluster); const Segment* const pSegment = pCluster->m_pSegment; const SegmentInfo* const pInfo = pSegment->GetInfo(); assert(pInfo); const long long scale = pInfo->GetTimeCodeScale(); assert(scale >= 1); // Check if tc * scale could overflow. if (tc != 0 && scale > LLONG_MAX / tc) { return -1; } const long long ns = tc * scale; return ns; } long long Block::GetTrackNumber() const { return m_track; } bool Block::IsKey() const { return ((m_flags & static_cast(1 << 7)) != 0); } void Block::SetKey(bool bKey) { if (bKey) m_flags |= static_cast(1 << 7); else m_flags &= 0x7F; } bool Block::IsInvisible() const { return bool(int(m_flags & 0x08) != 0); } Block::Lacing Block::GetLacing() const { const int value = int(m_flags & 0x06) >> 1; return static_cast(value); } int Block::GetFrameCount() const { return m_frame_count; } const Block::Frame& Block::GetFrame(int idx) const { assert(idx >= 0); assert(idx < m_frame_count); const Frame& f = m_frames[idx]; assert(f.pos > 0); assert(f.len > 0); return f; } long Block::Frame::Read(IMkvReader* pReader, unsigned char* buf) const { assert(pReader); assert(buf); const long status = pReader->Read(pos, len, buf); return status; } long long Block::GetDiscardPadding() const { return m_discard_padding; } } // namespace mkvparser libvpx-1.8.2/third_party/libwebm/mkvparser/mkvparser.h000066400000000000000000000737701357355204000232170ustar00rootroot00000000000000// Copyright (c) 2012 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVPARSER_MKVPARSER_H_ #define MKVPARSER_MKVPARSER_H_ #include namespace mkvparser { const int E_PARSE_FAILED = -1; const int E_FILE_FORMAT_INVALID = -2; const int E_BUFFER_NOT_FULL = -3; class IMkvReader { public: virtual int Read(long long pos, long len, unsigned char* buf) = 0; virtual int Length(long long* total, long long* available) = 0; protected: virtual ~IMkvReader() {} }; template Type* SafeArrayAlloc(unsigned long long num_elements, unsigned long long element_size); long long GetUIntLength(IMkvReader*, long long, long&); long long ReadUInt(IMkvReader*, long long, long&); long long ReadID(IMkvReader* pReader, long long pos, long& len); long long UnserializeUInt(IMkvReader*, long long pos, long long size); long UnserializeFloat(IMkvReader*, long long pos, long long size, double&); long UnserializeInt(IMkvReader*, long long pos, long long size, long long& result); long UnserializeString(IMkvReader*, long long pos, long long size, char*& str); long ParseElementHeader(IMkvReader* pReader, long long& pos, // consume id and size fields long long stop, // if you know size of element's parent long long& id, long long& size); bool Match(IMkvReader*, long long&, unsigned long, long long&); bool Match(IMkvReader*, long long&, unsigned long, unsigned char*&, size_t&); void GetVersion(int& major, int& minor, int& build, int& revision); struct EBMLHeader { EBMLHeader(); ~EBMLHeader(); long long m_version; long long m_readVersion; long long m_maxIdLength; long long m_maxSizeLength; char* m_docType; long long m_docTypeVersion; long long m_docTypeReadVersion; long long Parse(IMkvReader*, long long&); void Init(); }; class Segment; class Track; class Cluster; class Block { Block(const Block&); Block& operator=(const Block&); public: const long long m_start; const long long m_size; Block(long long start, long long size, long long discard_padding); ~Block(); long Parse(const Cluster*); long long GetTrackNumber() const; long long GetTimeCode(const Cluster*) const; // absolute, but not scaled long long GetTime(const Cluster*) const; // absolute, and scaled (ns) bool IsKey() const; void SetKey(bool); bool IsInvisible() const; enum Lacing { kLacingNone, kLacingXiph, kLacingFixed, kLacingEbml }; Lacing GetLacing() const; int GetFrameCount() const; // to index frames: [0, count) struct Frame { long long pos; // absolute offset long len; long Read(IMkvReader*, unsigned char*) const; }; const Frame& GetFrame(int frame_index) const; long long GetDiscardPadding() const; private: long long m_track; // Track::Number() short m_timecode; // relative to cluster unsigned char m_flags; Frame* m_frames; int m_frame_count; protected: const long long m_discard_padding; }; class BlockEntry { BlockEntry(const BlockEntry&); BlockEntry& operator=(const BlockEntry&); protected: BlockEntry(Cluster*, long index); public: virtual ~BlockEntry(); bool EOS() const { return (GetKind() == kBlockEOS); } const Cluster* GetCluster() const; long GetIndex() const; virtual const Block* GetBlock() const = 0; enum Kind { kBlockEOS, kBlockSimple, kBlockGroup }; virtual Kind GetKind() const = 0; protected: Cluster* const m_pCluster; const long m_index; }; class SimpleBlock : public BlockEntry { SimpleBlock(const SimpleBlock&); SimpleBlock& operator=(const SimpleBlock&); public: SimpleBlock(Cluster*, long index, long long start, long long size); long Parse(); Kind GetKind() const; const Block* GetBlock() const; protected: Block m_block; }; class BlockGroup : public BlockEntry { BlockGroup(const BlockGroup&); BlockGroup& operator=(const BlockGroup&); public: BlockGroup(Cluster*, long index, long long block_start, // absolute pos of block's payload long long block_size, // size of block's payload long long prev, long long next, long long duration, long long discard_padding); long Parse(); Kind GetKind() const; const Block* GetBlock() const; long long GetPrevTimeCode() const; // relative to block's time long long GetNextTimeCode() const; // as above long long GetDurationTimeCode() const; private: Block m_block; const long long m_prev; const long long m_next; const long long m_duration; }; /////////////////////////////////////////////////////////////// // ContentEncoding element // Elements used to describe if the track data has been encrypted or // compressed with zlib or header stripping. class ContentEncoding { public: enum { kCTR = 1 }; ContentEncoding(); ~ContentEncoding(); // ContentCompression element names struct ContentCompression { ContentCompression(); ~ContentCompression(); unsigned long long algo; unsigned char* settings; long long settings_len; }; // ContentEncAESSettings element names struct ContentEncAESSettings { ContentEncAESSettings() : cipher_mode(kCTR) {} ~ContentEncAESSettings() {} unsigned long long cipher_mode; }; // ContentEncryption element names struct ContentEncryption { ContentEncryption(); ~ContentEncryption(); unsigned long long algo; unsigned char* key_id; long long key_id_len; unsigned char* signature; long long signature_len; unsigned char* sig_key_id; long long sig_key_id_len; unsigned long long sig_algo; unsigned long long sig_hash_algo; ContentEncAESSettings aes_settings; }; // Returns ContentCompression represented by |idx|. Returns NULL if |idx| // is out of bounds. const ContentCompression* GetCompressionByIndex(unsigned long idx) const; // Returns number of ContentCompression elements in this ContentEncoding // element. unsigned long GetCompressionCount() const; // Parses the ContentCompression element from |pReader|. |start| is the // starting offset of the ContentCompression payload. |size| is the size in // bytes of the ContentCompression payload. |compression| is where the parsed // values will be stored. long ParseCompressionEntry(long long start, long long size, IMkvReader* pReader, ContentCompression* compression); // Returns ContentEncryption represented by |idx|. Returns NULL if |idx| // is out of bounds. const ContentEncryption* GetEncryptionByIndex(unsigned long idx) const; // Returns number of ContentEncryption elements in this ContentEncoding // element. unsigned long GetEncryptionCount() const; // Parses the ContentEncAESSettings element from |pReader|. |start| is the // starting offset of the ContentEncAESSettings payload. |size| is the // size in bytes of the ContentEncAESSettings payload. |encryption| is // where the parsed values will be stored. long ParseContentEncAESSettingsEntry(long long start, long long size, IMkvReader* pReader, ContentEncAESSettings* aes); // Parses the ContentEncoding element from |pReader|. |start| is the // starting offset of the ContentEncoding payload. |size| is the size in // bytes of the ContentEncoding payload. Returns true on success. long ParseContentEncodingEntry(long long start, long long size, IMkvReader* pReader); // Parses the ContentEncryption element from |pReader|. |start| is the // starting offset of the ContentEncryption payload. |size| is the size in // bytes of the ContentEncryption payload. |encryption| is where the parsed // values will be stored. long ParseEncryptionEntry(long long start, long long size, IMkvReader* pReader, ContentEncryption* encryption); unsigned long long encoding_order() const { return encoding_order_; } unsigned long long encoding_scope() const { return encoding_scope_; } unsigned long long encoding_type() const { return encoding_type_; } private: // Member variables for list of ContentCompression elements. ContentCompression** compression_entries_; ContentCompression** compression_entries_end_; // Member variables for list of ContentEncryption elements. ContentEncryption** encryption_entries_; ContentEncryption** encryption_entries_end_; // ContentEncoding element names unsigned long long encoding_order_; unsigned long long encoding_scope_; unsigned long long encoding_type_; // LIBWEBM_DISALLOW_COPY_AND_ASSIGN(ContentEncoding); ContentEncoding(const ContentEncoding&); ContentEncoding& operator=(const ContentEncoding&); }; class Track { Track(const Track&); Track& operator=(const Track&); public: class Info; static long Create(Segment*, const Info&, long long element_start, long long element_size, Track*&); enum Type { kVideo = 1, kAudio = 2, kSubtitle = 0x11, kMetadata = 0x21 }; Segment* const m_pSegment; const long long m_element_start; const long long m_element_size; virtual ~Track(); long GetType() const; long GetNumber() const; unsigned long long GetUid() const; const char* GetNameAsUTF8() const; const char* GetLanguage() const; const char* GetCodecNameAsUTF8() const; const char* GetCodecId() const; const unsigned char* GetCodecPrivate(size_t&) const; bool GetLacing() const; unsigned long long GetDefaultDuration() const; unsigned long long GetCodecDelay() const; unsigned long long GetSeekPreRoll() const; const BlockEntry* GetEOS() const; struct Settings { long long start; long long size; }; class Info { public: Info(); ~Info(); int Copy(Info&) const; void Clear(); long type; long number; unsigned long long uid; unsigned long long defaultDuration; unsigned long long codecDelay; unsigned long long seekPreRoll; char* nameAsUTF8; char* language; char* codecId; char* codecNameAsUTF8; unsigned char* codecPrivate; size_t codecPrivateSize; bool lacing; Settings settings; private: Info(const Info&); Info& operator=(const Info&); int CopyStr(char* Info::*str, Info&) const; }; long GetFirst(const BlockEntry*&) const; long GetNext(const BlockEntry* pCurr, const BlockEntry*& pNext) const; virtual bool VetEntry(const BlockEntry*) const; virtual long Seek(long long time_ns, const BlockEntry*&) const; const ContentEncoding* GetContentEncodingByIndex(unsigned long idx) const; unsigned long GetContentEncodingCount() const; long ParseContentEncodingsEntry(long long start, long long size); protected: Track(Segment*, long long element_start, long long element_size); Info m_info; class EOSBlock : public BlockEntry { public: EOSBlock(); Kind GetKind() const; const Block* GetBlock() const; }; EOSBlock m_eos; private: ContentEncoding** content_encoding_entries_; ContentEncoding** content_encoding_entries_end_; }; struct PrimaryChromaticity { PrimaryChromaticity() : x(0), y(0) {} ~PrimaryChromaticity() {} static bool Parse(IMkvReader* reader, long long read_pos, long long value_size, bool is_x, PrimaryChromaticity** chromaticity); float x; float y; }; struct MasteringMetadata { static const float kValueNotPresent; MasteringMetadata() : r(NULL), g(NULL), b(NULL), white_point(NULL), luminance_max(kValueNotPresent), luminance_min(kValueNotPresent) {} ~MasteringMetadata() { delete r; delete g; delete b; delete white_point; } static bool Parse(IMkvReader* reader, long long element_start, long long element_size, MasteringMetadata** mastering_metadata); PrimaryChromaticity* r; PrimaryChromaticity* g; PrimaryChromaticity* b; PrimaryChromaticity* white_point; float luminance_max; float luminance_min; }; struct Colour { static const long long kValueNotPresent; // Unless otherwise noted all values assigned upon construction are the // equivalent of unspecified/default. Colour() : matrix_coefficients(kValueNotPresent), bits_per_channel(kValueNotPresent), chroma_subsampling_horz(kValueNotPresent), chroma_subsampling_vert(kValueNotPresent), cb_subsampling_horz(kValueNotPresent), cb_subsampling_vert(kValueNotPresent), chroma_siting_horz(kValueNotPresent), chroma_siting_vert(kValueNotPresent), range(kValueNotPresent), transfer_characteristics(kValueNotPresent), primaries(kValueNotPresent), max_cll(kValueNotPresent), max_fall(kValueNotPresent), mastering_metadata(NULL) {} ~Colour() { delete mastering_metadata; mastering_metadata = NULL; } static bool Parse(IMkvReader* reader, long long element_start, long long element_size, Colour** colour); long long matrix_coefficients; long long bits_per_channel; long long chroma_subsampling_horz; long long chroma_subsampling_vert; long long cb_subsampling_horz; long long cb_subsampling_vert; long long chroma_siting_horz; long long chroma_siting_vert; long long range; long long transfer_characteristics; long long primaries; long long max_cll; long long max_fall; MasteringMetadata* mastering_metadata; }; struct Projection { enum ProjectionType { kTypeNotPresent = -1, kRectangular = 0, kEquirectangular = 1, kCubeMap = 2, kMesh = 3, }; static const float kValueNotPresent; Projection() : type(kTypeNotPresent), private_data(NULL), private_data_length(0), pose_yaw(kValueNotPresent), pose_pitch(kValueNotPresent), pose_roll(kValueNotPresent) {} ~Projection() { delete[] private_data; } static bool Parse(IMkvReader* reader, long long element_start, long long element_size, Projection** projection); ProjectionType type; unsigned char* private_data; size_t private_data_length; float pose_yaw; float pose_pitch; float pose_roll; }; class VideoTrack : public Track { VideoTrack(const VideoTrack&); VideoTrack& operator=(const VideoTrack&); VideoTrack(Segment*, long long element_start, long long element_size); public: virtual ~VideoTrack(); static long Parse(Segment*, const Info&, long long element_start, long long element_size, VideoTrack*&); long long GetWidth() const; long long GetHeight() const; long long GetDisplayWidth() const; long long GetDisplayHeight() const; long long GetDisplayUnit() const; long long GetStereoMode() const; double GetFrameRate() const; bool VetEntry(const BlockEntry*) const; long Seek(long long time_ns, const BlockEntry*&) const; Colour* GetColour() const; Projection* GetProjection() const; const char* GetColourSpace() const { return m_colour_space; } private: long long m_width; long long m_height; long long m_display_width; long long m_display_height; long long m_display_unit; long long m_stereo_mode; char* m_colour_space; double m_rate; Colour* m_colour; Projection* m_projection; }; class AudioTrack : public Track { AudioTrack(const AudioTrack&); AudioTrack& operator=(const AudioTrack&); AudioTrack(Segment*, long long element_start, long long element_size); public: static long Parse(Segment*, const Info&, long long element_start, long long element_size, AudioTrack*&); double GetSamplingRate() const; long long GetChannels() const; long long GetBitDepth() const; private: double m_rate; long long m_channels; long long m_bitDepth; }; class Tracks { Tracks(const Tracks&); Tracks& operator=(const Tracks&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; Tracks(Segment*, long long start, long long size, long long element_start, long long element_size); ~Tracks(); long Parse(); unsigned long GetTracksCount() const; const Track* GetTrackByNumber(long tn) const; const Track* GetTrackByIndex(unsigned long idx) const; private: Track** m_trackEntries; Track** m_trackEntriesEnd; long ParseTrackEntry(long long payload_start, long long payload_size, long long element_start, long long element_size, Track*&) const; }; class Chapters { Chapters(const Chapters&); Chapters& operator=(const Chapters&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; Chapters(Segment*, long long payload_start, long long payload_size, long long element_start, long long element_size); ~Chapters(); long Parse(); class Atom; class Edition; class Display { friend class Atom; Display(); Display(const Display&); ~Display(); Display& operator=(const Display&); public: const char* GetString() const; const char* GetLanguage() const; const char* GetCountry() const; private: void Init(); void ShallowCopy(Display&) const; void Clear(); long Parse(IMkvReader*, long long pos, long long size); char* m_string; char* m_language; char* m_country; }; class Atom { friend class Edition; Atom(); Atom(const Atom&); ~Atom(); Atom& operator=(const Atom&); public: unsigned long long GetUID() const; const char* GetStringUID() const; long long GetStartTimecode() const; long long GetStopTimecode() const; long long GetStartTime(const Chapters*) const; long long GetStopTime(const Chapters*) const; int GetDisplayCount() const; const Display* GetDisplay(int index) const; private: void Init(); void ShallowCopy(Atom&) const; void Clear(); long Parse(IMkvReader*, long long pos, long long size); static long long GetTime(const Chapters*, long long timecode); long ParseDisplay(IMkvReader*, long long pos, long long size); bool ExpandDisplaysArray(); char* m_string_uid; unsigned long long m_uid; long long m_start_timecode; long long m_stop_timecode; Display* m_displays; int m_displays_size; int m_displays_count; }; class Edition { friend class Chapters; Edition(); Edition(const Edition&); ~Edition(); Edition& operator=(const Edition&); public: int GetAtomCount() const; const Atom* GetAtom(int index) const; private: void Init(); void ShallowCopy(Edition&) const; void Clear(); long Parse(IMkvReader*, long long pos, long long size); long ParseAtom(IMkvReader*, long long pos, long long size); bool ExpandAtomsArray(); Atom* m_atoms; int m_atoms_size; int m_atoms_count; }; int GetEditionCount() const; const Edition* GetEdition(int index) const; private: long ParseEdition(long long pos, long long size); bool ExpandEditionsArray(); Edition* m_editions; int m_editions_size; int m_editions_count; }; class Tags { Tags(const Tags&); Tags& operator=(const Tags&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; Tags(Segment*, long long payload_start, long long payload_size, long long element_start, long long element_size); ~Tags(); long Parse(); class Tag; class SimpleTag; class SimpleTag { friend class Tag; SimpleTag(); SimpleTag(const SimpleTag&); ~SimpleTag(); SimpleTag& operator=(const SimpleTag&); public: const char* GetTagName() const; const char* GetTagString() const; private: void Init(); void ShallowCopy(SimpleTag&) const; void Clear(); long Parse(IMkvReader*, long long pos, long long size); char* m_tag_name; char* m_tag_string; }; class Tag { friend class Tags; Tag(); Tag(const Tag&); ~Tag(); Tag& operator=(const Tag&); public: int GetSimpleTagCount() const; const SimpleTag* GetSimpleTag(int index) const; private: void Init(); void ShallowCopy(Tag&) const; void Clear(); long Parse(IMkvReader*, long long pos, long long size); long ParseSimpleTag(IMkvReader*, long long pos, long long size); bool ExpandSimpleTagsArray(); SimpleTag* m_simple_tags; int m_simple_tags_size; int m_simple_tags_count; }; int GetTagCount() const; const Tag* GetTag(int index) const; private: long ParseTag(long long pos, long long size); bool ExpandTagsArray(); Tag* m_tags; int m_tags_size; int m_tags_count; }; class SegmentInfo { SegmentInfo(const SegmentInfo&); SegmentInfo& operator=(const SegmentInfo&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; SegmentInfo(Segment*, long long start, long long size, long long element_start, long long element_size); ~SegmentInfo(); long Parse(); long long GetTimeCodeScale() const; long long GetDuration() const; // scaled const char* GetMuxingAppAsUTF8() const; const char* GetWritingAppAsUTF8() const; const char* GetTitleAsUTF8() const; private: long long m_timecodeScale; double m_duration; char* m_pMuxingAppAsUTF8; char* m_pWritingAppAsUTF8; char* m_pTitleAsUTF8; }; class SeekHead { SeekHead(const SeekHead&); SeekHead& operator=(const SeekHead&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; SeekHead(Segment*, long long start, long long size, long long element_start, long long element_size); ~SeekHead(); long Parse(); struct Entry { Entry(); // the SeekHead entry payload long long id; long long pos; // absolute pos of SeekEntry ID long long element_start; // SeekEntry ID size + size size + payload long long element_size; }; int GetCount() const; const Entry* GetEntry(int idx) const; struct VoidElement { // absolute pos of Void ID long long element_start; // ID size + size size + payload size long long element_size; }; int GetVoidElementCount() const; const VoidElement* GetVoidElement(int idx) const; private: Entry* m_entries; int m_entry_count; VoidElement* m_void_elements; int m_void_element_count; static bool ParseEntry(IMkvReader*, long long pos, // payload long long size, Entry*); }; class Cues; class CuePoint { friend class Cues; CuePoint(long, long long); ~CuePoint(); CuePoint(const CuePoint&); CuePoint& operator=(const CuePoint&); public: long long m_element_start; long long m_element_size; bool Load(IMkvReader*); long long GetTimeCode() const; // absolute but unscaled long long GetTime(const Segment*) const; // absolute and scaled (ns units) struct TrackPosition { long long m_track; long long m_pos; // of cluster long long m_block; // codec_state //defaults to 0 // reference = clusters containing req'd referenced blocks // reftime = timecode of the referenced block bool Parse(IMkvReader*, long long, long long); }; const TrackPosition* Find(const Track*) const; private: const long m_index; long long m_timecode; TrackPosition* m_track_positions; size_t m_track_positions_count; }; class Cues { friend class Segment; Cues(Segment*, long long start, long long size, long long element_start, long long element_size); ~Cues(); Cues(const Cues&); Cues& operator=(const Cues&); public: Segment* const m_pSegment; const long long m_start; const long long m_size; const long long m_element_start; const long long m_element_size; bool Find( // lower bound of time_ns long long time_ns, const Track*, const CuePoint*&, const CuePoint::TrackPosition*&) const; const CuePoint* GetFirst() const; const CuePoint* GetLast() const; const CuePoint* GetNext(const CuePoint*) const; const BlockEntry* GetBlock(const CuePoint*, const CuePoint::TrackPosition*) const; bool LoadCuePoint() const; long GetCount() const; // loaded only // long GetTotal() const; //loaded + preloaded bool DoneParsing() const; private: bool Init() const; bool PreloadCuePoint(long&, long long) const; mutable CuePoint** m_cue_points; mutable long m_count; mutable long m_preload_count; mutable long long m_pos; }; class Cluster { friend class Segment; Cluster(const Cluster&); Cluster& operator=(const Cluster&); public: Segment* const m_pSegment; public: static Cluster* Create(Segment*, long index, // index in segment long long off); // offset relative to segment // long long element_size); Cluster(); // EndOfStream ~Cluster(); bool EOS() const; long long GetTimeCode() const; // absolute, but not scaled long long GetTime() const; // absolute, and scaled (nanosecond units) long long GetFirstTime() const; // time (ns) of first (earliest) block long long GetLastTime() const; // time (ns) of last (latest) block long GetFirst(const BlockEntry*&) const; long GetLast(const BlockEntry*&) const; long GetNext(const BlockEntry* curr, const BlockEntry*& next) const; const BlockEntry* GetEntry(const Track*, long long ns = -1) const; const BlockEntry* GetEntry(const CuePoint&, const CuePoint::TrackPosition&) const; // const BlockEntry* GetMaxKey(const VideoTrack*) const; // static bool HasBlockEntries(const Segment*, long long); static long HasBlockEntries(const Segment*, long long idoff, long long& pos, long& size); long GetEntryCount() const; long Load(long long& pos, long& size) const; long Parse(long long& pos, long& size) const; long GetEntry(long index, const mkvparser::BlockEntry*&) const; protected: Cluster(Segment*, long index, long long element_start); // long long element_size); public: const long long m_element_start; long long GetPosition() const; // offset relative to segment long GetIndex() const; long long GetElementSize() const; // long long GetPayloadSize() const; // long long Unparsed() const; private: long m_index; mutable long long m_pos; // mutable long long m_size; mutable long long m_element_size; mutable long long m_timecode; mutable BlockEntry** m_entries; mutable long m_entries_size; mutable long m_entries_count; long ParseSimpleBlock(long long, long long&, long&); long ParseBlockGroup(long long, long long&, long&); long CreateBlock(long long id, long long pos, long long size, long long discard_padding); long CreateBlockGroup(long long start_offset, long long size, long long discard_padding); long CreateSimpleBlock(long long, long long); }; class Segment { friend class Cues; friend class Track; friend class VideoTrack; Segment(const Segment&); Segment& operator=(const Segment&); private: Segment(IMkvReader*, long long elem_start, // long long elem_size, long long pos, long long size); public: IMkvReader* const m_pReader; const long long m_element_start; // const long long m_element_size; const long long m_start; // posn of segment payload const long long m_size; // size of segment payload Cluster m_eos; // TODO: make private? static long long CreateInstance(IMkvReader*, long long, Segment*&); ~Segment(); long Load(); // loads headers and all clusters // for incremental loading // long long Unparsed() const; bool DoneParsing() const; long long ParseHeaders(); // stops when first cluster is found // long FindNextCluster(long long& pos, long& size) const; long LoadCluster(long long& pos, long& size); // load one cluster long LoadCluster(); long ParseNext(const Cluster* pCurr, const Cluster*& pNext, long long& pos, long& size); const SeekHead* GetSeekHead() const; const Tracks* GetTracks() const; const SegmentInfo* GetInfo() const; const Cues* GetCues() const; const Chapters* GetChapters() const; const Tags* GetTags() const; long long GetDuration() const; unsigned long GetCount() const; const Cluster* GetFirst() const; const Cluster* GetLast() const; const Cluster* GetNext(const Cluster*); const Cluster* FindCluster(long long time_nanoseconds) const; // const BlockEntry* Seek(long long time_nanoseconds, const Track*) const; const Cluster* FindOrPreloadCluster(long long pos); long ParseCues(long long cues_off, // offset relative to start of segment long long& parse_pos, long& parse_len); private: long long m_pos; // absolute file posn; what has been consumed so far Cluster* m_pUnknownSize; SeekHead* m_pSeekHead; SegmentInfo* m_pInfo; Tracks* m_pTracks; Cues* m_pCues; Chapters* m_pChapters; Tags* m_pTags; Cluster** m_clusters; long m_clusterCount; // number of entries for which m_index >= 0 long m_clusterPreloadCount; // number of entries for which m_index < 0 long m_clusterSize; // array size long DoLoadCluster(long long&, long&); long DoLoadClusterUnknownSize(long long&, long&); long DoParseNext(const Cluster*&, long long&, long&); bool AppendCluster(Cluster*); bool PreloadCluster(Cluster*, ptrdiff_t); // void ParseSeekHead(long long pos, long long size); // void ParseSeekEntry(long long pos, long long size); // void ParseCues(long long); const BlockEntry* GetBlock(const CuePoint&, const CuePoint::TrackPosition&); }; } // namespace mkvparser inline long mkvparser::Segment::LoadCluster() { long long pos; long size; return LoadCluster(pos, size); } #endif // MKVPARSER_MKVPARSER_H_ libvpx-1.8.2/third_party/libwebm/mkvparser/mkvreader.cc000066400000000000000000000047741357355204000233210ustar00rootroot00000000000000// Copyright (c) 2010 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #include "mkvparser/mkvreader.h" #include #include namespace mkvparser { MkvReader::MkvReader() : m_file(NULL), reader_owns_file_(true) {} MkvReader::MkvReader(FILE* fp) : m_file(fp), reader_owns_file_(false) { GetFileSize(); } MkvReader::~MkvReader() { if (reader_owns_file_) Close(); m_file = NULL; } int MkvReader::Open(const char* fileName) { if (fileName == NULL) return -1; if (m_file) return -1; #ifdef _MSC_VER const errno_t e = fopen_s(&m_file, fileName, "rb"); if (e) return -1; // error #else m_file = fopen(fileName, "rb"); if (m_file == NULL) return -1; #endif return !GetFileSize(); } bool MkvReader::GetFileSize() { if (m_file == NULL) return false; #ifdef _MSC_VER int status = _fseeki64(m_file, 0L, SEEK_END); if (status) return false; // error m_length = _ftelli64(m_file); #else fseek(m_file, 0L, SEEK_END); m_length = ftell(m_file); #endif assert(m_length >= 0); if (m_length < 0) return false; #ifdef _MSC_VER status = _fseeki64(m_file, 0L, SEEK_SET); if (status) return false; // error #else fseek(m_file, 0L, SEEK_SET); #endif return true; } void MkvReader::Close() { if (m_file != NULL) { fclose(m_file); m_file = NULL; } } int MkvReader::Length(long long* total, long long* available) { if (m_file == NULL) return -1; if (total) *total = m_length; if (available) *available = m_length; return 0; } int MkvReader::Read(long long offset, long len, unsigned char* buffer) { if (m_file == NULL) return -1; if (offset < 0) return -1; if (len < 0) return -1; if (len == 0) return 0; if (offset >= m_length) return -1; #ifdef _MSC_VER const int status = _fseeki64(m_file, offset, SEEK_SET); if (status) return -1; // error #elif defined(_WIN32) fseeko64(m_file, static_cast(offset), SEEK_SET); #else fseeko(m_file, static_cast(offset), SEEK_SET); #endif const size_t size = fread(buffer, 1, len, m_file); if (size < size_t(len)) return -1; // error return 0; // success } } // namespace mkvparser libvpx-1.8.2/third_party/libwebm/mkvparser/mkvreader.h000066400000000000000000000023301357355204000231450ustar00rootroot00000000000000// Copyright (c) 2010 The WebM project authors. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the LICENSE file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. #ifndef MKVPARSER_MKVREADER_H_ #define MKVPARSER_MKVREADER_H_ #include #include "mkvparser/mkvparser.h" namespace mkvparser { class MkvReader : public IMkvReader { public: MkvReader(); explicit MkvReader(FILE* fp); virtual ~MkvReader(); int Open(const char*); void Close(); virtual int Read(long long position, long length, unsigned char* buffer); virtual int Length(long long* total, long long* available); private: MkvReader(const MkvReader&); MkvReader& operator=(const MkvReader&); // Determines the size of the file. This is called either by the constructor // or by the Open function depending on file ownership. Returns true on // success. bool GetFileSize(); long long m_length; FILE* m_file; bool reader_owns_file_; }; } // namespace mkvparser #endif // MKVPARSER_MKVREADER_H_ libvpx-1.8.2/third_party/libyuv/000077500000000000000000000000001357355204000166755ustar00rootroot00000000000000libvpx-1.8.2/third_party/libyuv/LICENSE000066400000000000000000000027421357355204000177070ustar00rootroot00000000000000Copyright 2011 The LibYuv Project Authors. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Google nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. libvpx-1.8.2/third_party/libyuv/README.libvpx000066400000000000000000000013271357355204000210630ustar00rootroot00000000000000Name: libyuv URL: https://chromium.googlesource.com/libyuv/libyuv Version: a37e7bfece9e0676ae90a1700b0ec85b0f4f22a1 License: BSD License File: LICENSE Description: libyuv is an open source project that includes YUV conversion and scaling functionality. The optimized scaler in libyuv is used in the multiple resolution encoder example which down-samples the original input video (f.g. 1280x720) a number of times in order to encode multiple resolution bit streams. Local Modifications: Disable ARGBToRGB24Row_AVX512VBMI due to build failure on Mac. rm libyuv/include/libyuv.h libyuv/include/libyuv/compare_row.h mv libyuv/include tmp/ mv libyuv/source tmp/ mv libyuv/LICENSE tmp/ rm -rf libyuv mv tmp/* third_party/libyuv/ libvpx-1.8.2/third_party/libyuv/include/000077500000000000000000000000001357355204000203205ustar00rootroot00000000000000libvpx-1.8.2/third_party/libyuv/include/libyuv/000077500000000000000000000000001357355204000216325ustar00rootroot00000000000000libvpx-1.8.2/third_party/libyuv/include/libyuv/basic_types.h000066400000000000000000000037661357355204000243240ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_BASIC_TYPES_H_ #define INCLUDE_LIBYUV_BASIC_TYPES_H_ #include // For size_t and NULL #if !defined(INT_TYPES_DEFINED) && !defined(GG_LONGLONG) #define INT_TYPES_DEFINED #if defined(_MSC_VER) && (_MSC_VER < 1600) #include // for uintptr_t on x86 typedef unsigned __int64 uint64_t; typedef __int64 int64_t; typedef unsigned int uint32_t; typedef int int32_t; typedef unsigned short uint16_t; typedef short int16_t; typedef unsigned char uint8_t; typedef signed char int8_t; #else #include // for uintptr_t and C99 types #endif // defined(_MSC_VER) && (_MSC_VER < 1600) typedef uint64_t uint64; typedef int64_t int64; typedef uint32_t uint32; typedef int32_t int32; typedef uint16_t uint16; typedef int16_t int16; typedef uint8_t uint8; typedef int8_t int8; #endif // INT_TYPES_DEFINED #if !defined(LIBYUV_API) #if defined(_WIN32) || defined(__CYGWIN__) #if defined(LIBYUV_BUILDING_SHARED_LIBRARY) #define LIBYUV_API __declspec(dllexport) #elif defined(LIBYUV_USING_SHARED_LIBRARY) #define LIBYUV_API __declspec(dllimport) #else #define LIBYUV_API #endif // LIBYUV_BUILDING_SHARED_LIBRARY #elif defined(__GNUC__) && (__GNUC__ >= 4) && !defined(__APPLE__) && \ (defined(LIBYUV_BUILDING_SHARED_LIBRARY) || \ defined(LIBYUV_USING_SHARED_LIBRARY)) #define LIBYUV_API __attribute__((visibility("default"))) #else #define LIBYUV_API #endif // __GNUC__ #endif // LIBYUV_API // TODO(fbarchard): Remove bool macros. #define LIBYUV_BOOL int #define LIBYUV_FALSE 0 #define LIBYUV_TRUE 1 #endif // INCLUDE_LIBYUV_BASIC_TYPES_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/compare.h000066400000000000000000000065171357355204000234420ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_COMPARE_H_ #define INCLUDE_LIBYUV_COMPARE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Compute a hash for specified memory. Seed of 5381 recommended. LIBYUV_API uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed); // Hamming Distance LIBYUV_API uint64_t ComputeHammingDistance(const uint8_t* src_a, const uint8_t* src_b, int count); // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height); // Sum Square Error - used to compute Mean Square Error or PSNR. LIBYUV_API uint64_t ComputeSumSquareError(const uint8_t* src_a, const uint8_t* src_b, int count); LIBYUV_API uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height); static const int kMaxPsnr = 128; LIBYUV_API double SumSquareErrorToPsnr(uint64_t sse, uint64_t count); LIBYUV_API double CalcFramePsnr(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height); LIBYUV_API double I420Psnr(const uint8_t* src_y_a, int stride_y_a, const uint8_t* src_u_a, int stride_u_a, const uint8_t* src_v_a, int stride_v_a, const uint8_t* src_y_b, int stride_y_b, const uint8_t* src_u_b, int stride_u_b, const uint8_t* src_v_b, int stride_v_b, int width, int height); LIBYUV_API double CalcFrameSsim(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height); LIBYUV_API double I420Ssim(const uint8_t* src_y_a, int stride_y_a, const uint8_t* src_u_a, int stride_u_a, const uint8_t* src_v_a, int stride_v_a, const uint8_t* src_y_b, int stride_y_b, const uint8_t* src_u_b, int stride_u_b, const uint8_t* src_v_b, int stride_v_b, int width, int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_COMPARE_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/convert.h000066400000000000000000000305251357355204000234700ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_H_ #define INCLUDE_LIBYUV_CONVERT_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For enum RotationMode. // TODO(fbarchard): fix WebRTC source to include following libyuv headers: #include "libyuv/convert_argb.h" // For WebRTC I420ToARGB. b/620 #include "libyuv/convert_from.h" // For WebRTC ConvertFromI420. b/620 #include "libyuv/planar_functions.h" // For WebRTC I420Rect, CopyPlane. b/618 #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Convert I444 to I420. LIBYUV_API int I444ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert I422 to I420. LIBYUV_API int I422ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Copy I420 to I420. #define I420ToI420 I420Copy LIBYUV_API int I420Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Copy I010 to I010 #define I010ToI010 I010Copy #define H010ToH010 I010Copy LIBYUV_API int I010Copy(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int width, int height); // Convert 10 bit YUV to 8 bit #define H010ToH420 I010ToI420 LIBYUV_API int I010ToI420(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert I400 (grey) to I420. LIBYUV_API int I400ToI420(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); #define J400ToJ420 I400ToI420 // Convert NV12 to I420. LIBYUV_API int NV12ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert NV21 to I420. LIBYUV_API int NV21ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert YUY2 to I420. LIBYUV_API int YUY2ToI420(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert UYVY to I420. LIBYUV_API int UYVYToI420(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert M420 to I420. LIBYUV_API int M420ToI420(const uint8_t* src_m420, int src_stride_m420, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert Android420 to I420. LIBYUV_API int Android420ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // ARGB little endian (bgra in memory) to I420. LIBYUV_API int ARGBToI420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // BGRA little endian (argb in memory) to I420. LIBYUV_API int BGRAToI420(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // ABGR little endian (rgba in memory) to I420. LIBYUV_API int ABGRToI420(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGBA little endian (abgr in memory) to I420. LIBYUV_API int RGBAToI420(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB little endian (bgr in memory) to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB big endian (rgb in memory) to I420. LIBYUV_API int RAWToI420(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB16 (RGBP fourcc) little endian to I420. LIBYUV_API int RGB565ToI420(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB15 (RGBO fourcc) little endian to I420. LIBYUV_API int ARGB1555ToI420(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // RGB12 (R444 fourcc) little endian to I420. LIBYUV_API int ARGB4444ToI420(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); #ifdef HAVE_JPEG // src_width/height provided by capture. // dst_width/height for clipping determine final size. LIBYUV_API int MJPGToI420(const uint8_t* sample, size_t sample_size, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int src_width, int src_height, int dst_width, int dst_height); // Query size of MJPG in pixels. LIBYUV_API int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height); #endif // Convert camera sample to I420 with cropping, rotation and vertical flip. // "src_size" is needed to parse MJPG. // "dst_stride_y" number of bytes in a row of the dst_y plane. // Normally this would be the same as dst_width, with recommended alignment // to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. The caller should // allocate the I420 buffer according to rotation. // "dst_stride_u" number of bytes in a row of the dst_u plane. // Normally this would be the same as (dst_width + 1) / 2, with // recommended alignment to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. // "crop_x" and "crop_y" are starting position for cropping. // To center, crop_x = (src_width - dst_width) / 2 // crop_y = (src_height - dst_height) / 2 // "src_width" / "src_height" is size of src_frame in pixels. // "src_height" can be negative indicating a vertically flipped image source. // "crop_width" / "crop_height" is the size to crop the src to. // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. // "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API int ConvertToI420(const uint8_t* sample, size_t sample_size, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int crop_x, int crop_y, int src_width, int src_height, int crop_width, int crop_height, enum RotationMode rotation, uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/convert_argb.h000066400000000000000000000501731357355204000244640ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_ARGB_H_ #define INCLUDE_LIBYUV_CONVERT_ARGB_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For enum RotationMode. // TODO(fbarchard): This set of functions should exactly match convert.h // TODO(fbarchard): Add tests. Create random content of right size and convert // with C vs Opt and or to I420 and compare. // TODO(fbarchard): Some of these functions lack parameter setting. #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Alias. #define ARGBToARGB ARGBCopy // Copy ARGB to ARGB. LIBYUV_API int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I420 to ARGB. LIBYUV_API int I420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Duplicate prototype for function in convert_from.h for remoting. LIBYUV_API int I420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I010 to ARGB. LIBYUV_API int I010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I010 to ARGB. LIBYUV_API int I010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I010 to ABGR. LIBYUV_API int I010ToABGR(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H010 to ARGB. LIBYUV_API int H010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H010 to ABGR. LIBYUV_API int H010ToABGR(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I422 to ARGB. LIBYUV_API int I422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I444 to ARGB. LIBYUV_API int I444ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J444 to ARGB. LIBYUV_API int J444ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I444 to ABGR. LIBYUV_API int I444ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I420 with Alpha to preattenuated ARGB. LIBYUV_API int I420AlphaToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, const uint8_t* src_a, int src_stride_a, uint8_t* dst_argb, int dst_stride_argb, int width, int height, int attenuate); // Convert I420 with Alpha to preattenuated ABGR. LIBYUV_API int I420AlphaToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, const uint8_t* src_a, int src_stride_a, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height, int attenuate); // Convert I400 (grey) to ARGB. Reverse of ARGBToI400. LIBYUV_API int I400ToARGB(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J400 (jpeg grey) to ARGB. LIBYUV_API int J400ToARGB(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Alias. #define YToARGB I400ToARGB // Convert NV12 to ARGB. LIBYUV_API int NV12ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV21 to ARGB. LIBYUV_API int NV21ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to ABGR. int NV12ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert NV21 to ABGR. LIBYUV_API int NV21ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert NV12 to RGB24. LIBYUV_API int NV12ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Convert NV21 to RGB24. LIBYUV_API int NV21ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Convert M420 to ARGB. LIBYUV_API int M420ToARGB(const uint8_t* src_m420, int src_stride_m420, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert YUY2 to ARGB. LIBYUV_API int YUY2ToARGB(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert UYVY to ARGB. LIBYUV_API int UYVYToARGB(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J420 to ARGB. LIBYUV_API int J420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J422 to ARGB. LIBYUV_API int J422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert J420 to ABGR. LIBYUV_API int J420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert J422 to ABGR. LIBYUV_API int J422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H420 to ARGB. LIBYUV_API int H420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H422 to ARGB. LIBYUV_API int H422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert H420 to ABGR. LIBYUV_API int H420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H422 to ABGR. LIBYUV_API int H422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert H010 to ARGB. LIBYUV_API int H010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I010 to AR30. LIBYUV_API int I010ToAR30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert H010 to AR30. LIBYUV_API int H010ToAR30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert I010 to AB30. LIBYUV_API int I010ToAB30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height); // Convert H010 to AB30. LIBYUV_API int H010ToAB30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height); // BGRA little endian (argb in memory) to ARGB. LIBYUV_API int BGRAToARGB(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // ABGR little endian (rgba in memory) to ARGB. LIBYUV_API int ABGRToARGB(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGBA little endian (abgr in memory) to ARGB. LIBYUV_API int RGBAToARGB(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Deprecated function name. #define BG24ToARGB RGB24ToARGB // RGB little endian (bgr in memory) to ARGB. LIBYUV_API int RGB24ToARGB(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB big endian (rgb in memory) to ARGB. LIBYUV_API int RAWToARGB(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB16 (RGBP fourcc) little endian to ARGB. LIBYUV_API int RGB565ToARGB(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB15 (RGBO fourcc) little endian to ARGB. LIBYUV_API int ARGB1555ToARGB(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // RGB12 (R444 fourcc) little endian to ARGB. LIBYUV_API int ARGB4444ToARGB(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Aliases #define AB30ToARGB AR30ToABGR #define AB30ToABGR AR30ToARGB #define AB30ToAR30 AR30ToAB30 // Convert AR30 To ARGB. LIBYUV_API int AR30ToARGB(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert AR30 To ABGR. LIBYUV_API int AR30ToABGR(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert AR30 To AB30. LIBYUV_API int AR30ToAB30(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height); #ifdef HAVE_JPEG // src_width/height provided by capture // dst_width/height for clipping determine final size. LIBYUV_API int MJPGToARGB(const uint8_t* sample, size_t sample_size, uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, int dst_width, int dst_height); #endif // Convert Android420 to ARGB. LIBYUV_API int Android420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert Android420 to ABGR. LIBYUV_API int Android420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert camera sample to ARGB with cropping, rotation and vertical flip. // "sample_size" is needed to parse MJPG. // "dst_stride_argb" number of bytes in a row of the dst_argb plane. // Normally this would be the same as dst_width, with recommended alignment // to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. The caller should // allocate the I420 buffer according to rotation. // "dst_stride_u" number of bytes in a row of the dst_u plane. // Normally this would be the same as (dst_width + 1) / 2, with // recommended alignment to 16 bytes for better efficiency. // If rotation of 90 or 270 is used, stride is affected. // "crop_x" and "crop_y" are starting position for cropping. // To center, crop_x = (src_width - dst_width) / 2 // crop_y = (src_height - dst_height) / 2 // "src_width" / "src_height" is size of src_frame in pixels. // "src_height" can be negative indicating a vertically flipped image source. // "crop_width" / "crop_height" is the size to crop the src to. // Must be less than or equal to src_width/src_height // Cropping parameters are pre-rotation. // "rotation" can be 0, 90, 180 or 270. // "fourcc" is a fourcc. ie 'I420', 'YUY2' // Returns 0 for successful; -1 for invalid parameter. Non-zero for failure. LIBYUV_API int ConvertToARGB(const uint8_t* sample, size_t sample_size, uint8_t* dst_argb, int dst_stride_argb, int crop_x, int crop_y, int src_width, int src_height, int crop_width, int crop_height, enum RotationMode rotation, uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_ARGB_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/convert_from.h000066400000000000000000000240701357355204000245110ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_FROM_H_ #define INCLUDE_LIBYUV_CONVERT_FROM_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // See Also convert.h for conversions from formats to I420. // Convert 8 bit YUV to 10 bit. #define H420ToH010 I420ToI010 int I420ToI010(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API int I420ToI422(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API int I420ToI444(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Copy to I400. Source can be I420, I422, I444, I400, NV12 or NV21. LIBYUV_API int I400Copy(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API int I420ToNV12(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API int I420ToNV21(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width, int height); LIBYUV_API int I420ToYUY2(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height); LIBYUV_API int I420ToUYVY(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height); LIBYUV_API int I420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height); LIBYUV_API int I420ToBGRA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_bgra, int dst_stride_bgra, int width, int height); LIBYUV_API int I420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); LIBYUV_API int I420ToRGBA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); LIBYUV_API int I420ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); LIBYUV_API int I420ToRAW(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_raw, int dst_stride_raw, int width, int height); LIBYUV_API int H420ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); LIBYUV_API int H420ToRAW(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_raw, int dst_stride_raw, int width, int height); LIBYUV_API int I420ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); LIBYUV_API int I422ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); // Convert I420 To RGB565 with 4x4 dither matrix (16 bytes). // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. LIBYUV_API int I420ToRGB565Dither(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, const uint8_t* dither4x4, int width, int height); LIBYUV_API int I420ToARGB1555(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height); LIBYUV_API int I420ToARGB4444(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height); // Convert I420 to AR30. LIBYUV_API int I420ToAR30(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert H420 to AR30. LIBYUV_API int H420ToAR30(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert I420 to specified format. // "dst_sample_stride" is bytes in a row for the destination. Pass 0 if the // buffer has contiguous rows. Can be negative. A multiple of 16 is optimal. LIBYUV_API int ConvertFromI420(const uint8_t* y, int y_stride, const uint8_t* u, int u_stride, const uint8_t* v, int v_stride, uint8_t* dst_sample, int dst_sample_stride, int width, int height, uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_FROM_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/convert_from_argb.h000066400000000000000000000175361357355204000255150ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ #define INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Copy ARGB to ARGB. #define ARGBToARGB ARGBCopy LIBYUV_API int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert ARGB To BGRA. LIBYUV_API int ARGBToBGRA(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_bgra, int dst_stride_bgra, int width, int height); // Convert ARGB To ABGR. LIBYUV_API int ARGBToABGR(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert ARGB To RGBA. LIBYUV_API int ARGBToRGBA(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); // Aliases #define ARGBToAB30 ABGRToAR30 #define ABGRToAB30 ARGBToAR30 // Convert ABGR To AR30. LIBYUV_API int ABGRToAR30(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert ARGB To AR30. LIBYUV_API int ARGBToAR30(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height); // Convert ARGB To RGB24. LIBYUV_API int ARGBToRGB24(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Convert ARGB To RAW. LIBYUV_API int ARGBToRAW(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_raw, int dst_stride_raw, int width, int height); // Convert ARGB To RGB565. LIBYUV_API int ARGBToRGB565(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). // Values in dither matrix from 0 to 7 recommended. // The order of the dither matrix is first byte is upper left. // TODO(fbarchard): Consider pointer to 2d array for dither4x4. // const uint8_t(*dither)[4][4]; LIBYUV_API int ARGBToRGB565Dither(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb565, int dst_stride_rgb565, const uint8_t* dither4x4, int width, int height); // Convert ARGB To ARGB1555. LIBYUV_API int ARGBToARGB1555(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height); // Convert ARGB To ARGB4444. LIBYUV_API int ARGBToARGB4444(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height); // Convert ARGB To I444. LIBYUV_API int ARGBToI444(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB To I422. LIBYUV_API int ARGBToI422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB To I420. (also in convert.h) LIBYUV_API int ARGBToI420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J422. LIBYUV_API int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert ARGB to J400. (JPeg full range). LIBYUV_API int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, int width, int height); // Convert ARGB to I400. LIBYUV_API int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, int width, int height); // Convert ARGB to G. (Reverse of J400toARGB, which replicates G back to ARGB) LIBYUV_API int ARGBToG(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_g, int dst_stride_g, int width, int height); // Convert ARGB To NV12. LIBYUV_API int ARGBToNV12(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height); // Convert ARGB To NV21. LIBYUV_API int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width, int height); // Convert ARGB To NV21. LIBYUV_API int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width, int height); // Convert ARGB To YUY2. LIBYUV_API int ARGBToYUY2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height); // Convert ARGB To UYVY. LIBYUV_API int ARGBToUYVY(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CONVERT_FROM_ARGB_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/cpu_id.h000066400000000000000000000101471357355204000232510ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_CPU_ID_H_ #define INCLUDE_LIBYUV_CPU_ID_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Internal flag to indicate cpuid requires initialization. static const int kCpuInitialized = 0x1; // These flags are only valid on ARM processors. static const int kCpuHasARM = 0x2; static const int kCpuHasNEON = 0x4; // 0x8 reserved for future ARM flag. // These flags are only valid on x86 processors. static const int kCpuHasX86 = 0x10; static const int kCpuHasSSE2 = 0x20; static const int kCpuHasSSSE3 = 0x40; static const int kCpuHasSSE41 = 0x80; static const int kCpuHasSSE42 = 0x100; // unused at this time. static const int kCpuHasAVX = 0x200; static const int kCpuHasAVX2 = 0x400; static const int kCpuHasERMS = 0x800; static const int kCpuHasFMA3 = 0x1000; static const int kCpuHasF16C = 0x2000; static const int kCpuHasGFNI = 0x4000; static const int kCpuHasAVX512BW = 0x8000; static const int kCpuHasAVX512VL = 0x10000; static const int kCpuHasAVX512VBMI = 0x20000; static const int kCpuHasAVX512VBMI2 = 0x40000; static const int kCpuHasAVX512VBITALG = 0x80000; static const int kCpuHasAVX512VPOPCNTDQ = 0x100000; // These flags are only valid on MIPS processors. static const int kCpuHasMIPS = 0x200000; static const int kCpuHasMSA = 0x400000; // Optional init function. TestCpuFlag does an auto-init. // Returns cpu_info flags. LIBYUV_API int InitCpuFlags(void); // Detect CPU has SSE2 etc. // Test_flag parameter should be one of kCpuHas constants above. // Returns non-zero if instruction set is detected static __inline int TestCpuFlag(int test_flag) { LIBYUV_API extern int cpu_info_; #ifdef __ATOMIC_RELAXED int cpu_info = __atomic_load_n(&cpu_info_, __ATOMIC_RELAXED); #else int cpu_info = cpu_info_; #endif return (!cpu_info ? InitCpuFlags() : cpu_info) & test_flag; } // Internal function for parsing /proc/cpuinfo. LIBYUV_API int ArmCpuCaps(const char* cpuinfo_name); // For testing, allow CPU flags to be disabled. // ie MaskCpuFlags(~kCpuHasSSSE3) to disable SSSE3. // MaskCpuFlags(-1) to enable all cpu specific optimizations. // MaskCpuFlags(1) to disable all cpu specific optimizations. // MaskCpuFlags(0) to reset state so next call will auto init. // Returns cpu_info flags. LIBYUV_API int MaskCpuFlags(int enable_flags); // Sets the CPU flags to |cpu_flags|, bypassing the detection code. |cpu_flags| // should be a valid combination of the kCpuHas constants above and include // kCpuInitialized. Use this method when running in a sandboxed process where // the detection code might fail (as it might access /proc/cpuinfo). In such // cases the cpu_info can be obtained from a non sandboxed process by calling // InitCpuFlags() and passed to the sandboxed process (via command line // parameters, IPC...) which can then call this method to initialize the CPU // flags. // Notes: // - when specifying 0 for |cpu_flags|, the auto initialization is enabled // again. // - enabling CPU features that are not supported by the CPU will result in // undefined behavior. // TODO(fbarchard): consider writing a helper function that translates from // other library CPU info to libyuv CPU info and add a .md doc that explains // CPU detection. static __inline void SetCpuFlags(int cpu_flags) { LIBYUV_API extern int cpu_info_; #ifdef __ATOMIC_RELAXED __atomic_store_n(&cpu_info_, cpu_flags, __ATOMIC_RELAXED); #else cpu_info_ = cpu_flags; #endif } // Low level cpuid for X86. Returns zeros on other CPUs. // eax is the info type that you want. // ecx is typically the cpu number, and should normally be zero. LIBYUV_API void CpuId(int info_eax, int info_ecx, int* cpu_info); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_CPU_ID_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/macros_msa.h000066400000000000000000000247371357355204000241440ustar00rootroot00000000000000/* * Copyright 2016 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_MACROS_MSA_H_ #define INCLUDE_LIBYUV_MACROS_MSA_H_ #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include #include #if (__mips_isa_rev >= 6) #define LW(psrc) \ ({ \ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ uint32_t val_m; \ asm volatile("lw %[val_m], %[psrc_lw_m] \n" \ : [val_m] "=r"(val_m) \ : [psrc_lw_m] "m"(*psrc_lw_m)); \ val_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ uint64_t val_m = 0; \ asm volatile("ld %[val_m], %[psrc_ld_m] \n" \ : [val_m] "=r"(val_m) \ : [psrc_ld_m] "m"(*psrc_ld_m)); \ val_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ val0_m = LW(psrc_ld_m); \ val1_m = LW(psrc_ld_m + 4); \ val_m = (uint64_t)(val1_m); /* NOLINT */ \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ val_m; \ }) #endif // (__mips == 64) #define SW(val, pdst) \ ({ \ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ uint32_t val_m = (val); \ asm volatile("sw %[val_m], %[pdst_sw_m] \n" \ : [pdst_sw_m] "=m"(*pdst_sw_m) \ : [val_m] "r"(val_m)); \ }) #if (__mips == 64) #define SD(val, pdst) \ ({ \ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ uint64_t val_m = (val); \ asm volatile("sd %[val_m], %[pdst_sd_m] \n" \ : [pdst_sd_m] "=m"(*pdst_sd_m) \ : [val_m] "r"(val_m)); \ }) #else // !(__mips == 64) #define SD(val, pdst) \ ({ \ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ uint32_t val0_m, val1_m; \ val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ SW(val0_m, pdst_sd_m); \ SW(val1_m, pdst_sd_m + 4); \ }) #endif // !(__mips == 64) #else // !(__mips_isa_rev >= 6) #define LW(psrc) \ ({ \ const uint8_t* psrc_lw_m = (const uint8_t*)(psrc); \ uint32_t val_m; \ asm volatile("ulw %[val_m], %[psrc_lw_m] \n" \ : [val_m] "=r"(val_m) \ : [psrc_lw_m] "m"(*psrc_lw_m)); \ val_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ uint64_t val_m = 0; \ asm volatile("uld %[val_m], %[psrc_ld_m] \n" \ : [val_m] "=r"(val_m) \ : [psrc_ld_m] "m"(*psrc_ld_m)); \ val_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t* psrc_ld_m = (const uint8_t*)(psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ val0_m = LW(psrc_ld_m); \ val1_m = LW(psrc_ld_m + 4); \ val_m = (uint64_t)(val1_m); /* NOLINT */ \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); /* NOLINT */ \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); /* NOLINT */ \ val_m; \ }) #endif // (__mips == 64) #define SW(val, pdst) \ ({ \ uint8_t* pdst_sw_m = (uint8_t*)(pdst); /* NOLINT */ \ uint32_t val_m = (val); \ asm volatile("usw %[val_m], %[pdst_sw_m] \n" \ : [pdst_sw_m] "=m"(*pdst_sw_m) \ : [val_m] "r"(val_m)); \ }) #define SD(val, pdst) \ ({ \ uint8_t* pdst_sd_m = (uint8_t*)(pdst); /* NOLINT */ \ uint32_t val0_m, val1_m; \ val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ SW(val0_m, pdst_sd_m); \ SW(val1_m, pdst_sd_m + 4); \ }) #endif // (__mips_isa_rev >= 6) // TODO(fbarchard): Consider removing __VAR_ARGS versions. #define LD_B(RTYPE, psrc) *((RTYPE*)(psrc)) /* NOLINT */ #define LD_UB(...) LD_B(const v16u8, __VA_ARGS__) #define ST_B(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) #define ST_H(RTYPE, in, pdst) *((RTYPE*)(pdst)) = (in) /* NOLINT */ #define ST_UH(...) ST_H(v8u16, __VA_ARGS__) /* Description : Load two vectors with 16 'byte' sized elements Arguments : Inputs - psrc, stride Outputs - out0, out1 Return Type - as per RTYPE Details : Load 16 byte elements in 'out0' from (psrc) Load 16 byte elements in 'out1' from (psrc + stride) */ #define LD_B2(RTYPE, psrc, stride, out0, out1) \ { \ out0 = LD_B(RTYPE, (psrc)); \ out1 = LD_B(RTYPE, (psrc) + stride); \ } #define LD_UB2(...) LD_B2(const v16u8, __VA_ARGS__) #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ { \ LD_B2(RTYPE, (psrc), stride, out0, out1); \ LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ } #define LD_UB4(...) LD_B4(const v16u8, __VA_ARGS__) /* Description : Store two vectors with stride each having 16 'byte' sized elements Arguments : Inputs - in0, in1, pdst, stride Details : Store 16 byte elements from 'in0' to (pdst) Store 16 byte elements from 'in1' to (pdst + stride) */ #define ST_B2(RTYPE, in0, in1, pdst, stride) \ { \ ST_B(RTYPE, in0, (pdst)); \ ST_B(RTYPE, in1, (pdst) + stride); \ } #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ { \ ST_B2(RTYPE, in0, in1, (pdst), stride); \ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ } #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) /* Description : Store vectors of 8 halfword elements with stride Arguments : Inputs - in0, in1, pdst, stride Details : Store 8 halfword elements from 'in0' to (pdst) Store 8 halfword elements from 'in1' to (pdst + stride) */ #define ST_H2(RTYPE, in0, in1, pdst, stride) \ { \ ST_H(RTYPE, in0, (pdst)); \ ST_H(RTYPE, in1, (pdst) + stride); \ } #define ST_UH2(...) ST_H2(v8u16, __VA_ARGS__) // TODO(fbarchard): Consider using __msa_vshf_b and __msa_ilvr_b directly. /* Description : Shuffle byte vector elements as per mask vector Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'in0' & 'in1' are copied selectively to 'out0' as per control vector 'mask0' */ #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ { \ out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ } #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) /* Description : Interleave both left and right half of input vectors Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements from 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ } #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) #endif /* !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) */ #endif // INCLUDE_LIBYUV_MACROS_MSA_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/mjpeg_decoder.h000066400000000000000000000135031357355204000245740ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_MJPEG_DECODER_H_ #define INCLUDE_LIBYUV_MJPEG_DECODER_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus // NOTE: For a simplified public API use convert.h MJPGToI420(). struct jpeg_common_struct; struct jpeg_decompress_struct; struct jpeg_source_mgr; namespace libyuv { #ifdef __cplusplus extern "C" { #endif LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size); #ifdef __cplusplus } // extern "C" #endif static const uint32_t kUnknownDataSize = 0xFFFFFFFF; enum JpegSubsamplingType { kJpegYuv420, kJpegYuv422, kJpegYuv444, kJpegYuv400, kJpegUnknown }; struct Buffer { const uint8_t* data; int len; }; struct BufferVector { Buffer* buffers; int len; int pos; }; struct SetJmpErrorMgr; // MJPEG ("Motion JPEG") is a pseudo-standard video codec where the frames are // simply independent JPEG images with a fixed huffman table (which is omitted). // It is rarely used in video transmission, but is common as a camera capture // format, especially in Logitech devices. This class implements a decoder for // MJPEG frames. // // See http://tools.ietf.org/html/rfc2435 class LIBYUV_API MJpegDecoder { public: typedef void (*CallbackFunction)(void* opaque, const uint8_t* const* data, const int* strides, int rows); static const int kColorSpaceUnknown; static const int kColorSpaceGrayscale; static const int kColorSpaceRgb; static const int kColorSpaceYCbCr; static const int kColorSpaceCMYK; static const int kColorSpaceYCCK; MJpegDecoder(); ~MJpegDecoder(); // Loads a new frame, reads its headers, and determines the uncompressed // image format. // Returns LIBYUV_TRUE if image looks valid and format is supported. // If return value is LIBYUV_TRUE, then the values for all the following // getters are populated. // src_len is the size of the compressed mjpeg frame in bytes. LIBYUV_BOOL LoadFrame(const uint8_t* src, size_t src_len); // Returns width of the last loaded frame in pixels. int GetWidth(); // Returns height of the last loaded frame in pixels. int GetHeight(); // Returns format of the last loaded frame. The return value is one of the // kColorSpace* constants. int GetColorSpace(); // Number of color components in the color space. int GetNumComponents(); // Sample factors of the n-th component. int GetHorizSampFactor(int component); int GetVertSampFactor(int component); int GetHorizSubSampFactor(int component); int GetVertSubSampFactor(int component); // Public for testability. int GetImageScanlinesPerImcuRow(); // Public for testability. int GetComponentScanlinesPerImcuRow(int component); // Width of a component in bytes. int GetComponentWidth(int component); // Height of a component. int GetComponentHeight(int component); // Width of a component in bytes with padding for DCTSIZE. Public for testing. int GetComponentStride(int component); // Size of a component in bytes. int GetComponentSize(int component); // Call this after LoadFrame() if you decide you don't want to decode it // after all. LIBYUV_BOOL UnloadFrame(); // Decodes the entire image into a one-buffer-per-color-component format. // dst_width must match exactly. dst_height must be <= to image height; if // less, the image is cropped. "planes" must have size equal to at least // GetNumComponents() and they must point to non-overlapping buffers of size // at least GetComponentSize(i). The pointers in planes are incremented // to point to after the end of the written data. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. LIBYUV_BOOL DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height); // Decodes the entire image and passes the data via repeated calls to a // callback function. Each call will get the data for a whole number of // image scanlines. // TODO(fbarchard): Add dst_x, dst_y to allow specific rect to be decoded. LIBYUV_BOOL DecodeToCallback(CallbackFunction fn, void* opaque, int dst_width, int dst_height); // The helper function which recognizes the jpeg sub-sampling type. static JpegSubsamplingType JpegSubsamplingTypeHelper( int* subsample_x, int* subsample_y, int number_of_components); private: void AllocOutputBuffers(int num_outbufs); void DestroyOutputBuffers(); LIBYUV_BOOL StartDecode(); LIBYUV_BOOL FinishDecode(); void SetScanlinePointers(uint8_t** data); LIBYUV_BOOL DecodeImcuRow(); int GetComponentScanlinePadding(int component); // A buffer holding the input data for a frame. Buffer buf_; BufferVector buf_vec_; jpeg_decompress_struct* decompress_struct_; jpeg_source_mgr* source_mgr_; SetJmpErrorMgr* error_mgr_; // LIBYUV_TRUE iff at least one component has scanline padding. (i.e., // GetComponentScanlinePadding() != 0.) LIBYUV_BOOL has_scanline_padding_; // Temporaries used to point to scanline outputs. int num_outbufs_; // Outermost size of all arrays below. uint8_t*** scanlines_; int* scanlines_sizes_; // Temporary buffer used for decoding when we can't decode directly to the // output buffers. Large enough for just one iMCU row. uint8_t** databuf_; int* databuf_strides_; }; } // namespace libyuv #endif // __cplusplus #endif // INCLUDE_LIBYUV_MJPEG_DECODER_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/planar_functions.h000066400000000000000000000643221357355204000253570ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ #define INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ #include "libyuv/basic_types.h" // TODO(fbarchard): Remove the following headers includes. #include "libyuv/convert.h" #include "libyuv/convert_argb.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // TODO(fbarchard): Move cpu macros to row.h #if defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_ARGBAFFINEROW_SSE2 #endif // Copy a plane of data. LIBYUV_API void CopyPlane(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API void CopyPlane_16(const uint16_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, int width, int height); LIBYUV_API void Convert16To8Plane(const uint16_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height); LIBYUV_API void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, int scale, // 1024 for 10 bits int width, int height); // Set a plane of data to a 32 bit value. LIBYUV_API void SetPlane(uint8_t* dst_y, int dst_stride_y, int width, int height, uint32_t value); // Split interleaved UV plane into separate U and V planes. LIBYUV_API void SplitUVPlane(const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Merge separate U and V planes into one interleaved UV plane. LIBYUV_API void MergeUVPlane(const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uv, int dst_stride_uv, int width, int height); // Split interleaved RGB plane into separate R, G and B planes. LIBYUV_API void SplitRGBPlane(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_r, int dst_stride_r, uint8_t* dst_g, int dst_stride_g, uint8_t* dst_b, int dst_stride_b, int width, int height); // Merge separate R, G and B planes into one interleaved RGB plane. LIBYUV_API void MergeRGBPlane(const uint8_t* src_r, int src_stride_r, const uint8_t* src_g, int src_stride_g, const uint8_t* src_b, int src_stride_b, uint8_t* dst_rgb, int dst_stride_rgb, int width, int height); // Copy I400. Supports inverting. LIBYUV_API int I400ToI400(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height); #define J400ToJ400 I400ToI400 // Copy I422 to I422. #define I422ToI422 I422Copy LIBYUV_API int I422Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Copy I444 to I444. #define I444ToI444 I444Copy LIBYUV_API int I444Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert YUY2 to I422. LIBYUV_API int YUY2ToI422(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Convert UYVY to I422. LIBYUV_API int UYVYToI422(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); LIBYUV_API int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API int UYVYToNV12(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height); LIBYUV_API int YUY2ToY(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, int width, int height); // Convert I420 to I400. (calls CopyPlane ignoring u/v). LIBYUV_API int I420ToI400(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, int width, int height); // Alias #define J420ToJ400 I420ToI400 #define I420ToI420Mirror I420Mirror // I420 mirror. LIBYUV_API int I420Mirror(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Alias #define I400ToI400Mirror I400Mirror // I400 mirror. A single plane is mirrored horizontally. // Pass negative height to achieve 180 degree rotation. LIBYUV_API int I400Mirror(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height); // Alias #define ARGBToARGBMirror ARGBMirror // ARGB mirror. LIBYUV_API int ARGBMirror(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert NV12 to RGB565. LIBYUV_API int NV12ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height); // I422ToARGB is in convert_argb.h // Convert I422 to BGRA. LIBYUV_API int I422ToBGRA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_bgra, int dst_stride_bgra, int width, int height); // Convert I422 to ABGR. LIBYUV_API int I422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height); // Convert I422 to RGBA. LIBYUV_API int I422ToRGBA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height); // Alias #define RGB24ToRAW RAWToRGB24 LIBYUV_API int RAWToRGB24(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height); // Draw a rectangle into I420. LIBYUV_API int I420Rect(uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int x, int y, int width, int height, int value_y, int value_u, int value_v); // Draw a rectangle into ARGB. LIBYUV_API int ARGBRect(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height, uint32_t value); // Convert ARGB to gray scale ARGB. LIBYUV_API int ARGBGrayTo(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Make a rectangle of ARGB gray scale. LIBYUV_API int ARGBGray(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height); // Make a rectangle of ARGB Sepia tone. LIBYUV_API int ARGBSepia(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height); // Apply a matrix rotation to each ARGB pixel. // matrix_argb is 4 signed ARGB values. -128 to 127 representing -2 to 2. // The first 4 coefficients apply to B, G, R, A and produce B of the output. // The next 4 coefficients apply to B, G, R, A and produce G of the output. // The next 4 coefficients apply to B, G, R, A and produce R of the output. // The last 4 coefficients apply to B, G, R, A and produce A of the output. LIBYUV_API int ARGBColorMatrix(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const int8_t* matrix_argb, int width, int height); // Deprecated. Use ARGBColorMatrix instead. // Apply a matrix rotation to each ARGB pixel. // matrix_argb is 3 signed ARGB values. -128 to 127 representing -1 to 1. // The first 4 coefficients apply to B, G, R, A and produce B of the output. // The next 4 coefficients apply to B, G, R, A and produce G of the output. // The last 4 coefficients apply to B, G, R, A and produce R of the output. LIBYUV_API int RGBColorMatrix(uint8_t* dst_argb, int dst_stride_argb, const int8_t* matrix_rgb, int dst_x, int dst_y, int width, int height); // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API int ARGBColorTable(uint8_t* dst_argb, int dst_stride_argb, const uint8_t* table_argb, int dst_x, int dst_y, int width, int height); // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API int RGBColorTable(uint8_t* dst_argb, int dst_stride_argb, const uint8_t* table_argb, int dst_x, int dst_y, int width, int height); // Apply a luma/color table each ARGB pixel but preserve destination alpha. // Table contains 32768 values indexed by [Y][C] where 7 it 7 bit luma from // RGB (YJ style) and C is an 8 bit color component (R, G or B). LIBYUV_API int ARGBLumaColorTable(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const uint8_t* luma, int width, int height); // Apply a 3 term polynomial to ARGB values. // poly points to a 4x4 matrix. The first row is constants. The 2nd row is // coefficients for b, g, r and a. The 3rd row is coefficients for b squared, // g squared, r squared and a squared. The 4rd row is coefficients for b to // the 3, g to the 3, r to the 3 and a to the 3. The values are summed and // result clamped to 0 to 255. // A polynomial approximation can be dirived using software such as 'R'. LIBYUV_API int ARGBPolynomial(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const float* poly, int width, int height); // Convert plane of 16 bit shorts to half floats. // Source values are multiplied by scale before storing as half float. LIBYUV_API int HalfFloatPlane(const uint16_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, float scale, int width, int height); // Convert a buffer of bytes to floats, scale the values and store as floats. LIBYUV_API int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width); // Quantize a rectangle of ARGB. Alpha unaffected. // scale is a 16 bit fractional fixed point scaler between 0 and 65535. // interval_size should be a value between 1 and 255. // interval_offset should be a value between 0 and 255. LIBYUV_API int ARGBQuantize(uint8_t* dst_argb, int dst_stride_argb, int scale, int interval_size, int interval_offset, int dst_x, int dst_y, int width, int height); // Copy ARGB to ARGB. LIBYUV_API int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Copy Alpha channel of ARGB to alpha of ARGB. LIBYUV_API int ARGBCopyAlpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Extract the alpha channel from ARGB. LIBYUV_API int ARGBExtractAlpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_a, int dst_stride_a, int width, int height); // Copy Y channel to Alpha of ARGB. LIBYUV_API int ARGBCopyYToAlpha(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height); typedef void (*ARGBBlendRow)(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); // Get function to Alpha Blend ARGB pixels and store to destination. LIBYUV_API ARGBBlendRow GetARGBBlend(); // Alpha Blend ARGB images and store to destination. // Source is pre-multiplied by alpha using ARGBAttenuate. // Alpha of destination is set to 255. LIBYUV_API int ARGBBlend(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Alpha Blend plane and store to destination. // Source is not pre-multiplied by alpha. LIBYUV_API int BlendPlane(const uint8_t* src_y0, int src_stride_y0, const uint8_t* src_y1, int src_stride_y1, const uint8_t* alpha, int alpha_stride, uint8_t* dst_y, int dst_stride_y, int width, int height); // Alpha Blend YUV images and store to destination. // Source is not pre-multiplied by alpha. // Alpha is full width x height and subsampled to half size to apply to UV. LIBYUV_API int I420Blend(const uint8_t* src_y0, int src_stride_y0, const uint8_t* src_u0, int src_stride_u0, const uint8_t* src_v0, int src_stride_v0, const uint8_t* src_y1, int src_stride_y1, const uint8_t* src_u1, int src_stride_u1, const uint8_t* src_v1, int src_stride_v1, const uint8_t* alpha, int alpha_stride, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height); // Multiply ARGB image by ARGB image. Shifted down by 8. Saturates to 255. LIBYUV_API int ARGBMultiply(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Add ARGB image with ARGB image. Saturates to 255. LIBYUV_API int ARGBAdd(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Subtract ARGB image (argb1) from ARGB image (argb0). Saturates to 0. LIBYUV_API int ARGBSubtract(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert I422 to YUY2. LIBYUV_API int I422ToYUY2(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height); // Convert I422 to UYVY. LIBYUV_API int I422ToUYVY(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height); // Convert unattentuated ARGB to preattenuated ARGB. LIBYUV_API int ARGBAttenuate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API int ARGBUnattenuate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Internal function - do not call directly. // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API int ARGBComputeCumulativeSum(const uint8_t* src_argb, int src_stride_argb, int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height); // Blur ARGB image. // dst_cumsum table of width * (height + 1) * 16 bytes aligned to // 16 byte boundary. // dst_stride32_cumsum is number of ints in a row (width * 4). // radius is number of pixels around the center. e.g. 1 = 3x3. 2=5x5. // Blur is optimized for radius of 5 (11x11) or less. LIBYUV_API int ARGBBlur(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius); // Multiply ARGB image by ARGB value. LIBYUV_API int ARGBShade(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height, uint32_t value); // Interpolate between two images using specified amount of interpolation // (0 to 255) and store to destination. // 'interpolation' is specified as 8 bit fraction where 0 means 100% src0 // and 255 means 1% src0 and 99% src1. LIBYUV_API int InterpolatePlane(const uint8_t* src0, int src_stride0, const uint8_t* src1, int src_stride1, uint8_t* dst, int dst_stride, int width, int height, int interpolation); // Interpolate between two ARGB images using specified amount of interpolation // Internally calls InterpolatePlane with width * 4 (bpp). LIBYUV_API int ARGBInterpolate(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height, int interpolation); // Interpolate between two YUV images using specified amount of interpolation // Internally calls InterpolatePlane on each plane where the U and V planes // are half width and half height. LIBYUV_API int I420Interpolate(const uint8_t* src0_y, int src0_stride_y, const uint8_t* src0_u, int src0_stride_u, const uint8_t* src0_v, int src0_stride_v, const uint8_t* src1_y, int src1_stride_y, const uint8_t* src1_u, int src1_stride_u, const uint8_t* src1_v, int src1_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, int interpolation); // Row function for copying pixels from a source with a slope to a row // of destination. Useful for scaling, rotation, mirror, texture mapping. LIBYUV_API void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* uv_dudv, int width); // TODO(fbarchard): Move ARGBAffineRow_SSE2 to row.h LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* uv_dudv, int width); // Shuffle ARGB channel order. e.g. BGRA to ARGB. // shuffler is 16 bytes and must be aligned. LIBYUV_API int ARGBShuffle(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_argb, int dst_stride_argb, const uint8_t* shuffler, int width, int height); // Sobel ARGB effect with planar output. LIBYUV_API int ARGBSobelToPlane(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, int width, int height); // Sobel ARGB effect. LIBYUV_API int ARGBSobel(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); // Sobel ARGB effect w/ Sobel X, Sobel, Sobel Y in ARGB. LIBYUV_API int ARGBSobelXY(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_PLANAR_FUNCTIONS_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/rotate.h000066400000000000000000000112171357355204000233030ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROTATE_H_ #define INCLUDE_LIBYUV_ROTATE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Supported rotation. typedef enum RotationMode { kRotate0 = 0, // No rotation. kRotate90 = 90, // Rotate 90 degrees clockwise. kRotate180 = 180, // Rotate 180 degrees. kRotate270 = 270, // Rotate 270 degrees clockwise. // Deprecated. kRotateNone = 0, kRotateClockwise = 90, kRotateCounterClockwise = 270, } RotationModeEnum; // Rotate I420 frame. LIBYUV_API int I420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, enum RotationMode mode); // Rotate NV12 input and store in I420. LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, enum RotationMode mode); // Rotate a plane by 0, 90, 180, or 270. LIBYUV_API int RotatePlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height, enum RotationMode mode); // Rotate planes by 90, 180, 270. Deprecated. LIBYUV_API void RotatePlane90(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API void RotatePlane180(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API void RotatePlane270(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API void RotateUV90(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height); // Rotations for when U and V are interleaved. // These functions take one input pointer and // split the data into two buffers while // rotating them. Deprecated. LIBYUV_API void RotateUV180(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height); LIBYUV_API void RotateUV270(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height); // The 90 and 270 functions are based on transposes. // Doing a transpose with reversing the read/write // order will result in a rotation by +- 90 degrees. // Deprecated. LIBYUV_API void TransposePlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height); LIBYUV_API void TransposeUV(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROTATE_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/rotate_argb.h000066400000000000000000000017671357355204000243070ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROTATE_ARGB_H_ #define INCLUDE_LIBYUV_ROTATE_ARGB_H_ #include "libyuv/basic_types.h" #include "libyuv/rotate.h" // For RotationMode. #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Rotate ARGB frame LIBYUV_API int ARGBRotate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, enum RotationMode mode); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROTATE_ARGB_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/rotate_row.h000066400000000000000000000155021357355204000241730ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROTATE_ROW_H_ #define INCLUDE_LIBYUV_ROTATE_ROW_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif #if defined(__native_client__) #define LIBYUV_DISABLE_NEON #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif // The following are available for Visual C and clangcl 32 bit: #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) #define HAS_TRANSPOSEWX8_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif // The following are available for GCC 32 or 64 bit: #if !defined(LIBYUV_DISABLE_X86) && (defined(__i386__) || defined(__x86_64__)) #define HAS_TRANSPOSEWX8_SSSE3 #endif // The following are available for 64 bit GCC: #if !defined(LIBYUV_DISABLE_X86) && defined(__x86_64__) #define HAS_TRANSPOSEWX8_FAST_SSSE3 #define HAS_TRANSPOSEUVWX8_SSE2 #endif #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_TRANSPOSEWX8_NEON #define HAS_TRANSPOSEUVWX8_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_TRANSPOSEWX16_MSA #define HAS_TRANSPOSEUVWX16_MSA #endif void TransposeWxH_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height); void TransposeWx8_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx16_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_Fast_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx16_MSA(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_Any_NEON(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_Any_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx8_Fast_Any_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeWx16_Any_MSA(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width); void TransposeUVWxH_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height); void TransposeUVWx8_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx16_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx16_MSA(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx8_Any_SSE2(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx8_Any_NEON(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); void TransposeUVWx16_Any_MSA(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROTATE_ROW_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/row.h000066400000000000000000004512341357355204000226230ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_ROW_H_ #define INCLUDE_LIBYUV_ROW_H_ #include // For malloc. #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif #if defined(__native_client__) #define LIBYUV_DISABLE_NEON #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif // clang >= 3.5.0 required for Arm64. #if defined(__clang__) && defined(__aarch64__) && !defined(LIBYUV_DISABLE_NEON) #if (__clang_major__ < 3) || (__clang_major__ == 3 && (__clang_minor__ < 5)) #define LIBYUV_DISABLE_NEON #endif // clang >= 3.5 #endif // __clang__ // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) #define GCC_HAS_AVX2 1 #endif // GNUC >= 4.7 #endif // __GNUC__ // clang >= 3.4.0 required for AVX2. #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) #define CLANG_HAS_AVX2 1 #endif // clang >= 3.4 #endif // __clang__ // clang >= 6.0.0 required for AVX512. // TODO(fbarchard): fix xcode 9 ios b/789. #if 0 // Build fails in libvpx on Mac #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) #if (__clang_major__ >= 7) && !defined(__APPLE_EMBEDDED_SIMULATOR__) #define CLANG_HAS_AVX512 1 #endif // clang >= 7 #endif // __clang__ #endif // 0 // Visual C 2012 required for AVX2. #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) // Conversions: #define HAS_ABGRTOUVROW_SSSE3 #define HAS_ABGRTOYROW_SSSE3 #define HAS_ARGB1555TOARGBROW_SSE2 #define HAS_ARGB4444TOARGBROW_SSE2 #define HAS_ARGBEXTRACTALPHAROW_SSE2 #define HAS_ARGBSETROW_X86 #define HAS_ARGBSHUFFLEROW_SSSE3 #define HAS_ARGBTOARGB1555ROW_SSE2 #define HAS_ARGBTOARGB4444ROW_SSE2 #define HAS_ARGBTORAWROW_SSSE3 #define HAS_ARGBTORGB24ROW_SSSE3 #define HAS_ARGBTORGB565DITHERROW_SSE2 #define HAS_ARGBTORGB565ROW_SSE2 #define HAS_ARGBTOUV444ROW_SSSE3 #define HAS_ARGBTOUVJROW_SSSE3 #define HAS_ARGBTOUVROW_SSSE3 #define HAS_ARGBTOYJROW_SSSE3 #define HAS_ARGBTOYROW_SSSE3 #define HAS_BGRATOUVROW_SSSE3 #define HAS_BGRATOYROW_SSSE3 #define HAS_COPYROW_ERMS #define HAS_COPYROW_SSE2 #define HAS_H422TOARGBROW_SSSE3 #define HAS_HALFFLOATROW_SSE2 #define HAS_I400TOARGBROW_SSE2 #define HAS_I422TOARGB1555ROW_SSSE3 #define HAS_I422TOARGB4444ROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #define HAS_I422TORGB24ROW_SSSE3 #define HAS_I422TORGB565ROW_SSSE3 #define HAS_I422TORGBAROW_SSSE3 #define HAS_I422TOUYVYROW_SSE2 #define HAS_I422TOYUY2ROW_SSE2 #define HAS_I444TOARGBROW_SSSE3 #define HAS_J400TOARGBROW_SSE2 #define HAS_J422TOARGBROW_SSSE3 #define HAS_MERGEUVROW_SSE2 #define HAS_MIRRORROW_SSSE3 #define HAS_MIRRORUVROW_SSSE3 #define HAS_NV12TOARGBROW_SSSE3 #define HAS_NV12TORGB24ROW_SSSE3 #define HAS_NV12TORGB565ROW_SSSE3 #define HAS_NV21TOARGBROW_SSSE3 #define HAS_NV21TORGB24ROW_SSSE3 #define HAS_RAWTOARGBROW_SSSE3 #define HAS_RAWTORGB24ROW_SSSE3 #define HAS_RAWTOYROW_SSSE3 #define HAS_RGB24TOARGBROW_SSSE3 #define HAS_RGB24TOYROW_SSSE3 #define HAS_RGB565TOARGBROW_SSE2 #define HAS_RGBATOUVROW_SSSE3 #define HAS_RGBATOYROW_SSSE3 #define HAS_SETROW_ERMS #define HAS_SETROW_X86 #define HAS_SPLITUVROW_SSE2 #define HAS_UYVYTOARGBROW_SSSE3 #define HAS_UYVYTOUV422ROW_SSE2 #define HAS_UYVYTOUVROW_SSE2 #define HAS_UYVYTOYROW_SSE2 #define HAS_YUY2TOARGBROW_SSSE3 #define HAS_YUY2TOUV422ROW_SSE2 #define HAS_YUY2TOUVROW_SSE2 #define HAS_YUY2TOYROW_SSE2 // Effects: #define HAS_ARGBADDROW_SSE2 #define HAS_ARGBAFFINEROW_SSE2 #define HAS_ARGBATTENUATEROW_SSSE3 #define HAS_ARGBBLENDROW_SSSE3 #define HAS_ARGBCOLORMATRIXROW_SSSE3 #define HAS_ARGBCOLORTABLEROW_X86 #define HAS_ARGBCOPYALPHAROW_SSE2 #define HAS_ARGBCOPYYTOALPHAROW_SSE2 #define HAS_ARGBGRAYROW_SSSE3 #define HAS_ARGBLUMACOLORTABLEROW_SSSE3 #define HAS_ARGBMIRRORROW_SSE2 #define HAS_ARGBMULTIPLYROW_SSE2 #define HAS_ARGBPOLYNOMIALROW_SSE2 #define HAS_ARGBQUANTIZEROW_SSE2 #define HAS_ARGBSEPIAROW_SSSE3 #define HAS_ARGBSHADEROW_SSE2 #define HAS_ARGBSUBTRACTROW_SSE2 #define HAS_ARGBUNATTENUATEROW_SSE2 #define HAS_BLENDPLANEROW_SSSE3 #define HAS_COMPUTECUMULATIVESUMROW_SSE2 #define HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #define HAS_INTERPOLATEROW_SSSE3 #define HAS_RGBCOLORTABLEROW_X86 #define HAS_SOBELROW_SSE2 #define HAS_SOBELTOPLANEROW_SSE2 #define HAS_SOBELXROW_SSE2 #define HAS_SOBELXYROW_SSE2 #define HAS_SOBELYROW_SSE2 // The following functions fail on gcc/clang 32 bit with fpic and framepointer. // caveat: clangcl uses row_win.cc which works. #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ defined(_MSC_VER) // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_SSSE3 #endif #endif // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ defined(GCC_HAS_AVX2)) #define HAS_ARGBCOPYALPHAROW_AVX2 #define HAS_ARGBCOPYYTOALPHAROW_AVX2 #define HAS_ARGBEXTRACTALPHAROW_AVX2 #define HAS_ARGBMIRRORROW_AVX2 #define HAS_ARGBPOLYNOMIALROW_AVX2 #define HAS_ARGBSHUFFLEROW_AVX2 #define HAS_ARGBTORGB565DITHERROW_AVX2 #define HAS_ARGBTOUVJROW_AVX2 #define HAS_ARGBTOUVROW_AVX2 #define HAS_ARGBTOYJROW_AVX2 #define HAS_ARGBTOYROW_AVX2 #define HAS_COPYROW_AVX #define HAS_H422TOARGBROW_AVX2 #define HAS_HALFFLOATROW_AVX2 // #define HAS_HALFFLOATROW_F16C // Enable to test halffloat cast #define HAS_I400TOARGBROW_AVX2 #define HAS_I422TOARGB1555ROW_AVX2 #define HAS_I422TOARGB4444ROW_AVX2 #define HAS_I422TOARGBROW_AVX2 #define HAS_I422TORGB24ROW_AVX2 #define HAS_I422TORGB565ROW_AVX2 #define HAS_I422TORGBAROW_AVX2 #define HAS_I444TOARGBROW_AVX2 #define HAS_INTERPOLATEROW_AVX2 #define HAS_J422TOARGBROW_AVX2 #define HAS_MERGEUVROW_AVX2 #define HAS_MIRRORROW_AVX2 #define HAS_NV12TOARGBROW_AVX2 #define HAS_NV12TORGB24ROW_AVX2 #define HAS_NV12TORGB565ROW_AVX2 #define HAS_NV21TOARGBROW_AVX2 #define HAS_NV21TORGB24ROW_AVX2 #define HAS_SPLITUVROW_AVX2 #define HAS_UYVYTOARGBROW_AVX2 #define HAS_UYVYTOUV422ROW_AVX2 #define HAS_UYVYTOUVROW_AVX2 #define HAS_UYVYTOYROW_AVX2 #define HAS_YUY2TOARGBROW_AVX2 #define HAS_YUY2TOUV422ROW_AVX2 #define HAS_YUY2TOUVROW_AVX2 #define HAS_YUY2TOYROW_AVX2 // Effects: #define HAS_ARGBADDROW_AVX2 #define HAS_ARGBATTENUATEROW_AVX2 #define HAS_ARGBMULTIPLYROW_AVX2 #define HAS_ARGBSUBTRACTROW_AVX2 #define HAS_ARGBUNATTENUATEROW_AVX2 #define HAS_BLENDPLANEROW_AVX2 #if defined(__x86_64__) || !defined(__pic__) || defined(__clang__) || \ defined(_MSC_VER) // TODO(fbarchard): fix build error on android_full_debug=1 // https://code.google.com/p/libyuv/issues/detail?id=517 #define HAS_I422ALPHATOARGBROW_AVX2 #endif #endif // The following are available for AVX2 Visual C and clangcl 32 bit: // TODO(fbarchard): Port to gcc. #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2)) #define HAS_ARGB1555TOARGBROW_AVX2 #define HAS_ARGB4444TOARGBROW_AVX2 #define HAS_ARGBTOARGB1555ROW_AVX2 #define HAS_ARGBTOARGB4444ROW_AVX2 #define HAS_ARGBTORGB565ROW_AVX2 #define HAS_J400TOARGBROW_AVX2 #define HAS_RGB565TOARGBROW_AVX2 #endif // The following are also available on x64 Visual C. #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && defined(_M_X64) && \ (!defined(__clang__) || defined(__SSSE3__)) #define HAS_I422ALPHATOARGBROW_SSSE3 #define HAS_I422TOARGBROW_SSSE3 #endif // The following are available for gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) #define HAS_ABGRTOAR30ROW_SSSE3 #define HAS_ARGBTOAR30ROW_SSSE3 #define HAS_CONVERT16TO8ROW_SSSE3 #define HAS_CONVERT8TO16ROW_SSE2 // I210 is for H010. 2 = 422. I for 601 vs H for 709. #define HAS_I210TOAR30ROW_SSSE3 #define HAS_I210TOARGBROW_SSSE3 #define HAS_I422TOAR30ROW_SSSE3 #define HAS_MERGERGBROW_SSSE3 #define HAS_SPLITRGBROW_SSSE3 #endif // The following are available for AVX2 gcc/clang x86 platforms: // TODO(fbarchard): Port to Visual C #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2)) #define HAS_ABGRTOAR30ROW_AVX2 #define HAS_ARGBTOAR30ROW_AVX2 #define HAS_ARGBTORAWROW_AVX2 #define HAS_ARGBTORGB24ROW_AVX2 #define HAS_CONVERT16TO8ROW_AVX2 #define HAS_CONVERT8TO16ROW_AVX2 #define HAS_I210TOAR30ROW_AVX2 #define HAS_I210TOARGBROW_AVX2 #define HAS_I422TOAR30ROW_AVX2 #define HAS_I422TOUYVYROW_AVX2 #define HAS_I422TOYUY2ROW_AVX2 #define HAS_MERGEUVROW_16_AVX2 #define HAS_MULTIPLYROW_16_AVX2 #endif // The following are available for AVX512 clang x86 platforms: // TODO(fbarchard): Port to GCC and Visual C // TODO(fbarchard): re-enable HAS_ARGBTORGB24ROW_AVX512VBMI. Issue libyuv:789 #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) && \ (defined(CLANG_HAS_AVX512)) #define HAS_ARGBTORGB24ROW_AVX512VBMI #endif // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__aarch64__) || defined(__ARM_NEON__) || defined(LIBYUV_NEON)) #define HAS_ABGRTOUVROW_NEON #define HAS_ABGRTOYROW_NEON #define HAS_ARGB1555TOARGBROW_NEON #define HAS_ARGB1555TOUVROW_NEON #define HAS_ARGB1555TOYROW_NEON #define HAS_ARGB4444TOARGBROW_NEON #define HAS_ARGB4444TOUVROW_NEON #define HAS_ARGB4444TOYROW_NEON #define HAS_ARGBEXTRACTALPHAROW_NEON #define HAS_ARGBSETROW_NEON #define HAS_ARGBTOARGB1555ROW_NEON #define HAS_ARGBTOARGB4444ROW_NEON #define HAS_ARGBTORAWROW_NEON #define HAS_ARGBTORGB24ROW_NEON #define HAS_ARGBTORGB565DITHERROW_NEON #define HAS_ARGBTORGB565ROW_NEON #define HAS_ARGBTOUV444ROW_NEON #define HAS_ARGBTOUVJROW_NEON #define HAS_ARGBTOUVROW_NEON #define HAS_ARGBTOYJROW_NEON #define HAS_ARGBTOYROW_NEON #define HAS_BGRATOUVROW_NEON #define HAS_BGRATOYROW_NEON #define HAS_BYTETOFLOATROW_NEON #define HAS_COPYROW_NEON #define HAS_HALFFLOATROW_NEON #define HAS_I400TOARGBROW_NEON #define HAS_I422ALPHATOARGBROW_NEON #define HAS_I422TOARGB1555ROW_NEON #define HAS_I422TOARGB4444ROW_NEON #define HAS_I422TOARGBROW_NEON #define HAS_I422TORGB24ROW_NEON #define HAS_I422TORGB565ROW_NEON #define HAS_I422TORGBAROW_NEON #define HAS_I422TOUYVYROW_NEON #define HAS_I422TOYUY2ROW_NEON #define HAS_I444TOARGBROW_NEON #define HAS_J400TOARGBROW_NEON #define HAS_MERGEUVROW_NEON #define HAS_MIRRORROW_NEON #define HAS_MIRRORUVROW_NEON #define HAS_NV12TOARGBROW_NEON #define HAS_NV12TORGB24ROW_NEON #define HAS_NV12TORGB565ROW_NEON #define HAS_NV21TOARGBROW_NEON #define HAS_NV21TORGB24ROW_NEON #define HAS_RAWTOARGBROW_NEON #define HAS_RAWTORGB24ROW_NEON #define HAS_RAWTOUVROW_NEON #define HAS_RAWTOYROW_NEON #define HAS_RGB24TOARGBROW_NEON #define HAS_RGB24TOUVROW_NEON #define HAS_RGB24TOYROW_NEON #define HAS_RGB565TOARGBROW_NEON #define HAS_RGB565TOUVROW_NEON #define HAS_RGB565TOYROW_NEON #define HAS_RGBATOUVROW_NEON #define HAS_RGBATOYROW_NEON #define HAS_SETROW_NEON #define HAS_SPLITRGBROW_NEON #define HAS_SPLITUVROW_NEON #define HAS_UYVYTOARGBROW_NEON #define HAS_UYVYTOUV422ROW_NEON #define HAS_UYVYTOUVROW_NEON #define HAS_UYVYTOYROW_NEON #define HAS_YUY2TOARGBROW_NEON #define HAS_YUY2TOUV422ROW_NEON #define HAS_YUY2TOUVROW_NEON #define HAS_YUY2TOYROW_NEON // Effects: #define HAS_ARGBADDROW_NEON #define HAS_ARGBATTENUATEROW_NEON #define HAS_ARGBBLENDROW_NEON #define HAS_ARGBCOLORMATRIXROW_NEON #define HAS_ARGBGRAYROW_NEON #define HAS_ARGBMIRRORROW_NEON #define HAS_ARGBMULTIPLYROW_NEON #define HAS_ARGBQUANTIZEROW_NEON #define HAS_ARGBSEPIAROW_NEON #define HAS_ARGBSHADEROW_NEON #define HAS_ARGBSHUFFLEROW_NEON #define HAS_ARGBSUBTRACTROW_NEON #define HAS_INTERPOLATEROW_NEON #define HAS_SOBELROW_NEON #define HAS_SOBELTOPLANEROW_NEON #define HAS_SOBELXROW_NEON #define HAS_SOBELXYROW_NEON #define HAS_SOBELYROW_NEON #endif // The following are available on AArch64 platforms: #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #define HAS_SCALESUMSAMPLES_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_ABGRTOUVROW_MSA #define HAS_ABGRTOYROW_MSA #define HAS_ARGB1555TOARGBROW_MSA #define HAS_ARGB1555TOUVROW_MSA #define HAS_ARGB1555TOYROW_MSA #define HAS_ARGB4444TOARGBROW_MSA #define HAS_ARGBADDROW_MSA #define HAS_ARGBATTENUATEROW_MSA #define HAS_ARGBBLENDROW_MSA #define HAS_ARGBCOLORMATRIXROW_MSA #define HAS_ARGBEXTRACTALPHAROW_MSA #define HAS_ARGBGRAYROW_MSA #define HAS_ARGBMIRRORROW_MSA #define HAS_ARGBMULTIPLYROW_MSA #define HAS_ARGBQUANTIZEROW_MSA #define HAS_ARGBSEPIAROW_MSA #define HAS_ARGBSETROW_MSA #define HAS_ARGBSHADEROW_MSA #define HAS_ARGBSHUFFLEROW_MSA #define HAS_ARGBSUBTRACTROW_MSA #define HAS_ARGBTOARGB1555ROW_MSA #define HAS_ARGBTOARGB4444ROW_MSA #define HAS_ARGBTORAWROW_MSA #define HAS_ARGBTORGB24ROW_MSA #define HAS_ARGBTORGB565DITHERROW_MSA #define HAS_ARGBTORGB565ROW_MSA #define HAS_ARGBTOUV444ROW_MSA #define HAS_ARGBTOUVJROW_MSA #define HAS_ARGBTOUVROW_MSA #define HAS_ARGBTOYJROW_MSA #define HAS_ARGBTOYROW_MSA #define HAS_BGRATOUVROW_MSA #define HAS_BGRATOYROW_MSA #define HAS_HALFFLOATROW_MSA #define HAS_I400TOARGBROW_MSA #define HAS_I422ALPHATOARGBROW_MSA #define HAS_I422TOARGBROW_MSA #define HAS_I422TORGB24ROW_MSA #define HAS_I422TORGBAROW_MSA #define HAS_I422TOUYVYROW_MSA #define HAS_I422TOYUY2ROW_MSA #define HAS_I444TOARGBROW_MSA #define HAS_INTERPOLATEROW_MSA #define HAS_J400TOARGBROW_MSA #define HAS_MERGEUVROW_MSA #define HAS_MIRRORROW_MSA #define HAS_MIRRORUVROW_MSA #define HAS_NV12TOARGBROW_MSA #define HAS_NV12TORGB565ROW_MSA #define HAS_NV21TOARGBROW_MSA #define HAS_RAWTOARGBROW_MSA #define HAS_RAWTORGB24ROW_MSA #define HAS_RAWTOUVROW_MSA #define HAS_RAWTOYROW_MSA #define HAS_RGB24TOARGBROW_MSA #define HAS_RGB24TOUVROW_MSA #define HAS_RGB24TOYROW_MSA #define HAS_RGB565TOARGBROW_MSA #define HAS_RGB565TOUVROW_MSA #define HAS_RGB565TOYROW_MSA #define HAS_RGBATOUVROW_MSA #define HAS_RGBATOYROW_MSA #define HAS_SETROW_MSA #define HAS_SOBELROW_MSA #define HAS_SOBELTOPLANEROW_MSA #define HAS_SOBELXROW_MSA #define HAS_SOBELXYROW_MSA #define HAS_SOBELYROW_MSA #define HAS_SPLITUVROW_MSA #define HAS_UYVYTOARGBROW_MSA #define HAS_UYVYTOUVROW_MSA #define HAS_UYVYTOYROW_MSA #define HAS_YUY2TOARGBROW_MSA #define HAS_YUY2TOUV422ROW_MSA #define HAS_YUY2TOUVROW_MSA #define HAS_YUY2TOYROW_MSA #endif #if defined(_MSC_VER) && !defined(__CLR_VER) && !defined(__clang__) #if defined(VISUALC_HAS_AVX2) #define SIMD_ALIGNED(var) __declspec(align(32)) var #else #define SIMD_ALIGNED(var) __declspec(align(16)) var #endif typedef __declspec(align(16)) int16_t vec16[8]; typedef __declspec(align(16)) int32_t vec32[4]; typedef __declspec(align(16)) int8_t vec8[16]; typedef __declspec(align(16)) uint16_t uvec16[8]; typedef __declspec(align(16)) uint32_t uvec32[4]; typedef __declspec(align(16)) uint8_t uvec8[16]; typedef __declspec(align(32)) int16_t lvec16[16]; typedef __declspec(align(32)) int32_t lvec32[8]; typedef __declspec(align(32)) int8_t lvec8[32]; typedef __declspec(align(32)) uint16_t ulvec16[16]; typedef __declspec(align(32)) uint32_t ulvec32[8]; typedef __declspec(align(32)) uint8_t ulvec8[32]; #elif !defined(__pnacl__) && (defined(__GNUC__) || defined(__clang__)) // Caveat GCC 4.2 to 4.7 have a known issue using vectors with const. #if defined(CLANG_HAS_AVX2) || defined(GCC_HAS_AVX2) #define SIMD_ALIGNED(var) var __attribute__((aligned(32))) #else #define SIMD_ALIGNED(var) var __attribute__((aligned(16))) #endif typedef int16_t __attribute__((vector_size(16))) vec16; typedef int32_t __attribute__((vector_size(16))) vec32; typedef int8_t __attribute__((vector_size(16))) vec8; typedef uint16_t __attribute__((vector_size(16))) uvec16; typedef uint32_t __attribute__((vector_size(16))) uvec32; typedef uint8_t __attribute__((vector_size(16))) uvec8; typedef int16_t __attribute__((vector_size(32))) lvec16; typedef int32_t __attribute__((vector_size(32))) lvec32; typedef int8_t __attribute__((vector_size(32))) lvec8; typedef uint16_t __attribute__((vector_size(32))) ulvec16; typedef uint32_t __attribute__((vector_size(32))) ulvec32; typedef uint8_t __attribute__((vector_size(32))) ulvec8; #else #define SIMD_ALIGNED(var) var typedef int16_t vec16[8]; typedef int32_t vec32[4]; typedef int8_t vec8[16]; typedef uint16_t uvec16[8]; typedef uint32_t uvec32[4]; typedef uint8_t uvec8[16]; typedef int16_t lvec16[16]; typedef int32_t lvec32[8]; typedef int8_t lvec8[32]; typedef uint16_t ulvec16[16]; typedef uint32_t ulvec32[8]; typedef uint8_t ulvec8[32]; #endif #if defined(__aarch64__) // This struct is for Arm64 color conversion. struct YuvConstants { uvec16 kUVToRB; uvec16 kUVToRB2; uvec16 kUVToG; uvec16 kUVToG2; vec16 kUVBiasBGR; vec32 kYToRgb; }; #elif defined(__arm__) // This struct is for ArmV7 color conversion. struct YuvConstants { uvec8 kUVToRB; uvec8 kUVToG; vec16 kUVBiasBGR; vec32 kYToRgb; }; #else // This struct is for Intel color conversion. struct YuvConstants { int8_t kUVToB[32]; int8_t kUVToG[32]; int8_t kUVToR[32]; int16_t kUVBiasB[16]; int16_t kUVBiasG[16]; int16_t kUVBiasR[16]; int16_t kYToRgb[16]; }; // Offsets into YuvConstants structure #define KUVTOB 0 #define KUVTOG 32 #define KUVTOR 64 #define KUVBIASB 96 #define KUVBIASG 128 #define KUVBIASR 160 #define KYTORGB 192 #endif // Conversion matrix for YUV to RGB extern const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants); // BT.601 extern const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants); // BT.709 // Conversion matrix for YVU to BGR extern const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants); // BT.601 extern const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants); // JPeg extern const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants); // BT.709 #define IS_ALIGNED(p, a) (!((uintptr_t)(p) & ((a)-1))) #define align_buffer_64(var, size) \ uint8_t* var##_mem = (uint8_t*)(malloc((size) + 63)); /* NOLINT */ \ uint8_t* var = (uint8_t*)(((intptr_t)(var##_mem) + 63) & ~63) /* NOLINT */ #define free_aligned_buffer_64(var) \ free(var##_mem); \ var = 0 #if defined(__APPLE__) || defined(__x86_64__) || defined(__llvm__) #define OMITFP #else #define OMITFP __attribute__((optimize("omit-frame-pointer"))) #endif // NaCL macros for GCC x86 and x64. #if defined(__native_client__) #define LABELALIGN ".p2align 5\n" #else #define LABELALIGN #endif // Intel Code Analizer markers. Insert IACA_START IACA_END around code to be // measured and then run with iaca -64 libyuv_unittest. // IACA_ASM_START amd IACA_ASM_END are equivalents that can be used within // inline assembly blocks. // example of iaca: // ~/iaca-lin64/bin/iaca.sh -64 -analysis LATENCY out/Release/libyuv_unittest #if defined(__x86_64__) || defined(__i386__) #define IACA_ASM_START \ ".byte 0x0F, 0x0B\n" \ " movl $111, %%ebx\n" \ ".byte 0x64, 0x67, 0x90\n" #define IACA_ASM_END \ " movl $222, %%ebx\n" \ ".byte 0x64, 0x67, 0x90\n" \ ".byte 0x0F, 0x0B\n" #define IACA_SSC_MARK(MARK_ID) \ __asm__ __volatile__("\n\t movl $" #MARK_ID \ ", %%ebx" \ "\n\t .byte 0x64, 0x67, 0x90" \ : \ : \ : "memory"); #define IACA_UD_BYTES __asm__ __volatile__("\n\t .byte 0x0F, 0x0B"); #else /* Visual C */ #define IACA_UD_BYTES \ { __asm _emit 0x0F __asm _emit 0x0B } #define IACA_SSC_MARK(x) \ { __asm mov ebx, x __asm _emit 0x64 __asm _emit 0x67 __asm _emit 0x90 } #define IACA_VC64_START __writegsbyte(111, 111); #define IACA_VC64_END __writegsbyte(222, 222); #endif #define IACA_START \ { \ IACA_UD_BYTES \ IACA_SSC_MARK(111) \ } #define IACA_END \ { \ IACA_SSC_MARK(222) \ IACA_UD_BYTES \ } void I444ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_MSA(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width); void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGB24ToYRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RAWToYRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width); void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_MSA(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_MSA(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width); void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width); void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width); void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width); void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width); void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGBToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ARGBToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void BGRAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void ABGRToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGBAToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGB24ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RAWToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width); void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width); void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width); void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width); void ARGBToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_y, int width); void RAWToYRow_Any_SSSE3(const uint8_t* src_raw, uint8_t* dst_y, int width); void ARGBToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void BGRAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGBAToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYJRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_AVX2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_AVX2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_Any_SSSE3(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB565ToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB4444ToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB565ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void BGRAToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void ABGRToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGBAToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB24ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RAWToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width); void RGB565ToUVRow_C(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBToUV444Row_C(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width); void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_SSE2(const uint8_t* src, uint8_t* dst, int width); void MirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void MirrorUVRow_SSSE3(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width); void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBMirrorRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBMirrorRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_SSE2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void SplitUVRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void MergeUVRow_C(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); void MergeUVRow_SSE2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); void MergeUVRow_MSA(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width); void MergeUVRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void MergeUVRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void MergeUVRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void MergeUVRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width); void SplitRGBRow_SSSE3(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width); void SplitRGBRow_NEON(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width); void SplitRGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width); void SplitRGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width); void MergeRGBRow_C(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width); void MergeRGBRow_SSSE3(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width); void MergeRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width); void MergeRGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void MergeRGBRow_Any_NEON(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width); void MergeUVRow_16_C(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, int scale, /* 64 for 10 bit */ int width); void MergeUVRow_16_AVX2(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, int scale, int width); void MultiplyRow_16_AVX2(const uint16_t* src_y, uint16_t* dst_y, int scale, int width); void MultiplyRow_16_C(const uint16_t* src_y, uint16_t* dst_y, int scale, int width); void Convert8To16Row_C(const uint8_t* src_y, uint16_t* dst_y, int scale, int width); void Convert8To16Row_SSE2(const uint8_t* src_y, uint16_t* dst_y, int scale, int width); void Convert8To16Row_AVX2(const uint8_t* src_y, uint16_t* dst_y, int scale, int width); void Convert8To16Row_Any_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int scale, int width); void Convert8To16Row_Any_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int scale, int width); void Convert16To8Row_C(const uint16_t* src_y, uint8_t* dst_y, int scale, int width); void Convert16To8Row_SSSE3(const uint16_t* src_y, uint8_t* dst_y, int scale, int width); void Convert16To8Row_AVX2(const uint16_t* src_y, uint8_t* dst_y, int scale, int width); void Convert16To8Row_Any_SSSE3(const uint16_t* src_ptr, uint8_t* dst_ptr, int scale, int width); void Convert16To8Row_Any_AVX2(const uint16_t* src_ptr, uint8_t* dst_ptr, int scale, int width); void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width); void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width); void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width); void CopyRow_MIPS(const uint8_t* src, uint8_t* dst, int count); void CopyRow_C(const uint8_t* src, uint8_t* dst, int count); void CopyRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_Any_AVX(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count); void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyAlphaRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width); void ARGBExtractAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBExtractAlphaRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBExtractAlphaRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBExtractAlphaRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBCopyYToAlphaRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBCopyYToAlphaRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void SetRow_C(uint8_t* dst, uint8_t v8, int width); void SetRow_MSA(uint8_t* dst, uint8_t v8, int width); void SetRow_X86(uint8_t* dst, uint8_t v8, int width); void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width); void SetRow_NEON(uint8_t* dst, uint8_t v8, int width); void SetRow_Any_X86(uint8_t* dst_ptr, uint8_t v32, int width); void SetRow_Any_NEON(uint8_t* dst_ptr, uint8_t v32, int width); void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width); void ARGBSetRow_Any_NEON(uint8_t* dst_ptr, uint32_t v32, int width); void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width); void ARGBSetRow_Any_MSA(uint8_t* dst_ptr, uint32_t v32, int width); // ARGBShufflers for BGRAToARGB etc. void ARGBShuffleRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); void ARGBShuffleRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width); void ARGBShuffleRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); void ARGBShuffleRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); void ARGBShuffleRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); void ARGBShuffleRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint8_t* param, int width); void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width); void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width); void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width); void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width); void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width); void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, uint8_t* dst_argb, int width); void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, uint8_t* dst_argb, int width); void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width); void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width); void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width); void RGB24ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB24ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RAWToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void RGB565ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB1555ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGB4444ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width); void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width); void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width); void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width); void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, const uint32_t dither4, int width); void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, const uint32_t dither4, int width); void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width); void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width); void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width); void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width); void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb1555, int width); void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb4444, int width); void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width); void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width); void ARGBToRGBARow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width); void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width); void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width); void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width); void J400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void J400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I444ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422ToAR30Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I210ToAR30Row_C(const uint16_t* src_y, const uint16_t* src_u, const uint16_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I210ToARGBRow_C(const uint16_t* src_y, const uint16_t* src_u, const uint16_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_C(const uint8_t* src_yuy2, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_C(const uint8_t* src_uyvy, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToAR30Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); void I210ToAR30Row_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); void I210ToARGBRow_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToAR30Row_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); void I210ToARGBRow_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I210ToAR30Row_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToAR30Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I210ToAR30Row_Any_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I210ToARGBRow_Any_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToAR30Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I210ToARGBRow_Any_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I210ToAR30Row_Any_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width); void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width); void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width); void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width); void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width); void I400ToARGBRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I400ToARGBRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I400ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void I400ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); // ARGB preattenuated alpha blend. void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBBlendRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBBlendRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBBlendRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); // Unattenuated planar alpha blend. void BlendPlaneRow_SSSE3(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width); void BlendPlaneRow_Any_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void BlendPlaneRow_AVX2(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width); void BlendPlaneRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width); // ARGB multiply images. Same API as Blend, but these require // pointer and width alignment for SSE2. void ARGBMultiplyRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBMultiplyRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBMultiplyRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBMultiplyRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBMultiplyRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); // ARGB add images. void ARGBAddRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBAddRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBAddRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBAddRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBAddRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBAddRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBAddRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBAddRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBAddRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); // ARGB subtract images. Same API as Blend, but these require // pointer and width alignment for SSE2. void ARGBSubtractRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBSubtractRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBSubtractRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBSubtractRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBSubtractRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBSubtractRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width); void ARGBSubtractRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB1555Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB4444Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToAR30Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToAR30Row_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_Any_AVX512VBMI(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565DitherRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); void ARGBToRGB565DitherRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); void ARGBToRGB565Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB1555Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB4444Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ABGRToAR30Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToAR30Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB24Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB1555Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB4444Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565DitherRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); void ARGBToRGB24Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRAWRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB1555Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToARGB4444Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBToRGB565DitherRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const uint32_t param, int width); void I444ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB24Row_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToRGB24Row_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I444ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGBARow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422AlphaToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB24Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToRGB565Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB4444Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void I422ToARGB1555Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV12ToRGB565Row_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void NV21ToARGBRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void YUY2ToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void UYVYToARGBRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, int width); void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width); void YUY2ToUVRow_C(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_AVX2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_SSE2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void YUY2ToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void YUY2ToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_MSA(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width); void UYVYToUVRow_C(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_C(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_AVX2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_SSE2(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_NEON(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToYRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void UYVYToUVRow_Any_MSA(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void UYVYToUV422Row_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, int width); void I422ToYUY2Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width); void I422ToUYVYRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width); void I422ToYUY2Row_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width); void I422ToUYVYRow_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width); void I422ToYUY2Row_Any_SSE2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToUYVYRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToYUY2Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width); void I422ToUYVYRow_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width); void I422ToYUY2Row_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToUYVYRow_Any_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToYUY2Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width); void I422ToUYVYRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width); void I422ToYUY2Row_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToUYVYRow_Any_NEON(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToYUY2Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width); void I422ToUYVYRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width); void I422ToYUY2Row_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); void I422ToUYVYRow_Any_MSA(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ptr, int width); // Effects related row functions. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBAttenuateRow_Any_SSSE3(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBAttenuateRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBAttenuateRow_Any_NEON(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBAttenuateRow_Any_MSA(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); // Inverse table for unattenuate, shared by C and SSE2. extern const uint32_t fixed_invtbl8[256]; void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBUnattenuateRow_Any_SSE2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBUnattenuateRow_Any_AVX2(const uint8_t* src_ptr, uint8_t* dst_ptr, int width); void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width); void ARGBSepiaRow_C(uint8_t* dst_argb, int width); void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width); void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width); void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width); void ARGBColorMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width); void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width); void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width); void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width); void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width); void ARGBQuantizeRow_C(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); void ARGBQuantizeRow_MSA(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width); void ARGBShadeRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); void ARGBShadeRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); void ARGBShadeRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); void ARGBShadeRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value); // Used for blur. void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, const int32_t* botleft, int width, int area, uint8_t* dst, int count); void ComputeCumulativeSumRow_SSE2(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width); void CumulativeSumToAverageRow_C(const int32_t* tl, const int32_t* bl, int w, int area, uint8_t* dst, int count); void ComputeCumulativeSumRow_C(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width); LIBYUV_API void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* uv_dudv, int width); LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* src_dudv, int width); // Used for I420Scale, ARGBScale, and ARGBInterpolate. void InterpolateRow_C(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); void InterpolateRow_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction); void InterpolateRow_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction); void InterpolateRow_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction); void InterpolateRow_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); void InterpolateRow_Any_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); void InterpolateRow_Any_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); void InterpolateRow_Any_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); void InterpolateRow_Any_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride_ptr, int width, int source_y_fraction); void InterpolateRow_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction); // Sobel images. void SobelXRow_C(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); void SobelXRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); void SobelXRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); void SobelXRow_MSA(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width); void SobelYRow_C(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); void SobelYRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); void SobelYRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); void SobelYRow_MSA(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width); void SobelRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelToPlaneRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width); void SobelXYRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelXYRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelXYRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelXYRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width); void SobelRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelToPlaneRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelToPlaneRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelToPlaneRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelXYRow_Any_SSE2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelXYRow_Any_NEON(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void SobelXYRow_Any_MSA(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, int width); void ARGBPolynomialRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width); void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width); void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width); // Scale and convert to half float. void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_SSE2(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_Any_SSE2(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); void HalfFloatRow_AVX2(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_Any_AVX2(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); void HalfFloatRow_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_Any_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloat1Row_Any_F16C(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_Any_NEON(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloat1Row_Any_NEON(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width); void HalfFloatRow_Any_MSA(const uint16_t* src_ptr, uint16_t* dst_ptr, float param, int width); void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width); void ByteToFloatRow_NEON(const uint8_t* src, float* dst, float scale, int width); void ByteToFloatRow_Any_NEON(const uint8_t* src_ptr, float* dst_ptr, float param, int width); void ARGBLumaColorTableRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, uint32_t lumacoeff); void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, uint32_t lumacoeff); float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width); float ScaleMaxSamples_NEON(const float* src, float* dst, float scale, int width); float ScaleSumSamples_C(const float* src, float* dst, float scale, int width); float ScaleSumSamples_NEON(const float* src, float* dst, float scale, int width); void ScaleSamples_C(const float* src, float* dst, float scale, int width); void ScaleSamples_NEON(const float* src, float* dst, float scale, int width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_ROW_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/scale.h000066400000000000000000000076231357355204000231020ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_SCALE_H_ #define INCLUDE_LIBYUV_SCALE_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Supported filtering. typedef enum FilterMode { kFilterNone = 0, // Point sample; Fastest. kFilterLinear = 1, // Filter horizontally only. kFilterBilinear = 2, // Faster than box, but lower quality scaling down. kFilterBox = 3 // Highest quality. } FilterModeEnum; // Scale a YUV plane. LIBYUV_API void ScalePlane(const uint8_t* src, int src_stride, int src_width, int src_height, uint8_t* dst, int dst_stride, int dst_width, int dst_height, enum FilterMode filtering); LIBYUV_API void ScalePlane_16(const uint16_t* src, int src_stride, int src_width, int src_height, uint16_t* dst, int dst_stride, int dst_width, int dst_height, enum FilterMode filtering); // Scales a YUV 4:2:0 image from the src width and height to the // dst width and height. // If filtering is kFilterNone, a simple nearest-neighbor algorithm is // used. This produces basic (blocky) quality at the fastest speed. // If filtering is kFilterBilinear, interpolation is used to produce a better // quality image, at the expense of speed. // If filtering is kFilterBox, averaging is used to produce ever better // quality image, at further expense of speed. // Returns 0 if successful. LIBYUV_API int I420Scale(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_width, int src_height, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int dst_width, int dst_height, enum FilterMode filtering); LIBYUV_API int I420Scale_16(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, int src_width, int src_height, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int dst_width, int dst_height, enum FilterMode filtering); #ifdef __cplusplus // Legacy API. Deprecated. LIBYUV_API int Scale(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, int src_stride_y, int src_stride_u, int src_stride_v, int src_width, int src_height, uint8_t* dst_y, uint8_t* dst_u, uint8_t* dst_v, int dst_stride_y, int dst_stride_u, int dst_stride_v, int dst_width, int dst_height, LIBYUV_BOOL interpolate); // For testing, allow disabling of specialized scalers. LIBYUV_API void SetUseReferenceImpl(LIBYUV_BOOL use); #endif // __cplusplus #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_SCALE_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/scale_argb.h000066400000000000000000000046141357355204000240720ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_SCALE_ARGB_H_ #define INCLUDE_LIBYUV_SCALE_ARGB_H_ #include "libyuv/basic_types.h" #include "libyuv/scale.h" // For FilterMode #ifdef __cplusplus namespace libyuv { extern "C" { #endif LIBYUV_API int ARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, enum FilterMode filtering); // Clipped scale takes destination rectangle coordinates for clip values. LIBYUV_API int ARGBScaleClip(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering); // Scale with YUV conversion to ARGB and clipping. LIBYUV_API int YUVToARGBScaleClip(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint32_t src_fourcc, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, uint32_t dst_fourcc, int dst_width, int dst_height, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_SCALE_ARGB_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/scale_row.h000066400000000000000000001207441357355204000237710ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_SCALE_ROW_H_ #define INCLUDE_LIBYUV_SCALE_ROW_H_ #include "libyuv/basic_types.h" #include "libyuv/scale.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if defined(__pnacl__) || defined(__CLR_VER) || \ (defined(__native_client__) && defined(__x86_64__)) || \ (defined(__i386__) && !defined(__SSE__) && !defined(__clang__)) #define LIBYUV_DISABLE_X86 #endif #if defined(__native_client__) #define LIBYUV_DISABLE_NEON #endif // MemorySanitizer does not support assembly code yet. http://crbug.com/344505 #if defined(__has_feature) #if __has_feature(memory_sanitizer) #define LIBYUV_DISABLE_X86 #endif #endif // GCC >= 4.7.0 required for AVX2. #if defined(__GNUC__) && (defined(__x86_64__) || defined(__i386__)) #if (__GNUC__ > 4) || (__GNUC__ == 4 && (__GNUC_MINOR__ >= 7)) #define GCC_HAS_AVX2 1 #endif // GNUC >= 4.7 #endif // __GNUC__ // clang >= 3.4.0 required for AVX2. #if defined(__clang__) && (defined(__x86_64__) || defined(__i386__)) #if (__clang_major__ > 3) || (__clang_major__ == 3 && (__clang_minor__ >= 4)) #define CLANG_HAS_AVX2 1 #endif // clang >= 3.4 #endif // __clang__ // Visual C 2012 required for AVX2. #if defined(_M_IX86) && !defined(__clang__) && defined(_MSC_VER) && \ _MSC_VER >= 1700 #define VISUALC_HAS_AVX2 1 #endif // VisualStudio >= 2012 // The following are available on all x86 platforms: #if !defined(LIBYUV_DISABLE_X86) && \ (defined(_M_IX86) || defined(__x86_64__) || defined(__i386__)) #define HAS_FIXEDDIV1_X86 #define HAS_FIXEDDIV_X86 #define HAS_SCALEARGBCOLS_SSE2 #define HAS_SCALEARGBCOLSUP2_SSE2 #define HAS_SCALEARGBFILTERCOLS_SSSE3 #define HAS_SCALEARGBROWDOWN2_SSE2 #define HAS_SCALEARGBROWDOWNEVEN_SSE2 #define HAS_SCALECOLSUP2_SSE2 #define HAS_SCALEFILTERCOLS_SSSE3 #define HAS_SCALEROWDOWN2_SSSE3 #define HAS_SCALEROWDOWN34_SSSE3 #define HAS_SCALEROWDOWN38_SSSE3 #define HAS_SCALEROWDOWN4_SSSE3 #define HAS_SCALEADDROW_SSE2 #endif // The following are available on all x86 platforms, but // require VS2012, clang 3.4 or gcc 4.7. // The code supports NaCL but requires a new compiler and validator. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(VISUALC_HAS_AVX2) || defined(CLANG_HAS_AVX2) || \ defined(GCC_HAS_AVX2)) #define HAS_SCALEADDROW_AVX2 #define HAS_SCALEROWDOWN2_AVX2 #define HAS_SCALEROWDOWN4_AVX2 #endif // The following are available on Neon platforms: #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(LIBYUV_NEON) || defined(__aarch64__)) #define HAS_SCALEARGBCOLS_NEON #define HAS_SCALEARGBROWDOWN2_NEON #define HAS_SCALEARGBROWDOWNEVEN_NEON #define HAS_SCALEFILTERCOLS_NEON #define HAS_SCALEROWDOWN2_NEON #define HAS_SCALEROWDOWN34_NEON #define HAS_SCALEROWDOWN38_NEON #define HAS_SCALEROWDOWN4_NEON #define HAS_SCALEARGBFILTERCOLS_NEON #endif #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #define HAS_SCALEADDROW_MSA #define HAS_SCALEARGBCOLS_MSA #define HAS_SCALEARGBFILTERCOLS_MSA #define HAS_SCALEARGBROWDOWN2_MSA #define HAS_SCALEARGBROWDOWNEVEN_MSA #define HAS_SCALEFILTERCOLS_MSA #define HAS_SCALEROWDOWN2_MSA #define HAS_SCALEROWDOWN34_MSA #define HAS_SCALEROWDOWN38_MSA #define HAS_SCALEROWDOWN4_MSA #endif // Scale ARGB vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int y, int dy, int bpp, enum FilterMode filtering); void ScalePlaneVertical_16(int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_argb, uint16_t* dst_argb, int x, int y, int dy, int wpp, enum FilterMode filtering); // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, int dst_width, int dst_height, enum FilterMode filtering); // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div); int FixedDiv_X86(int num, int div); // Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div); int FixedDiv1_X86(int num, int div); #ifdef HAS_FIXEDDIV_X86 #define FixedDiv FixedDiv_X86 #define FixedDiv1 FixedDiv1_X86 #else #define FixedDiv FixedDiv_C #define FixedDiv1 FixedDiv1_C #endif // Compute slope values for stepping. void ScaleSlope(int src_width, int src_height, int dst_width, int dst_height, enum FilterMode filtering, int* x, int* y, int* dx, int* dy); void ScaleRowDown2_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown4_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown4Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown34_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown34_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width); void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* d, int dst_width); void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width); void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* d, int dst_width); void ScaleCols_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleCols_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx); void ScaleColsUp2_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int, int); void ScaleColsUp2_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int, int); void ScaleFilterCols_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleFilterCols_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx); void ScaleFilterCols64_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x32, int dx); void ScaleFilterCols64_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x32, int dx); void ScaleRowDown38_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown38_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width); void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width); void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEven_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBCols_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBCols64_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x32, int dx); void ScaleARGBColsUp2_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int, int); void ScaleARGBFilterCols_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols64_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x32, int dx); // Specialized scalers for x86. void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Odd_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Odd_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_1_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_0_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_3_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_Any_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_Any_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_Any_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleColsUp2_SSE2(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); // ARGB Column functions void ScaleARGBCols_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols_Any_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleARGBCols_Any_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBCols_MSA(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx); void ScaleARGBFilterCols_Any_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleARGBCols_Any_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); // ARGB Row functions void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDown2_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Linear_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Box_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Linear_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDown2Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int32_t src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width); void ScaleARGBRowDownEven_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEvenBox_Any_SSE2(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEven_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEvenBox_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEven_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int32_t src_stepx, uint8_t* dst_ptr, int dst_width); void ScaleARGBRowDownEvenBox_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_ptr, int dst_width); // ScaleRowDown2Box also used by planar functions // NEON downscalers with interpolation. // Note - not static due to reuse in convert for 444 to 420. void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32 -> 12 void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32x3 -> 12x1 void ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32x2 -> 12x1 void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Odd_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_0_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_1_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32 -> 12 void ScaleRowDown38_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32x3 -> 12x1 void ScaleRowDown38_3_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); // 32x2 -> 12x1 void ScaleRowDown38_2_Box_Any_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleAddRow_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleAddRow_Any_NEON(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleFilterCols_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleFilterCols_Any_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleRowDown2_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown4_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown38_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleFilterCols_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleRowDown34_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width); void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width); void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width); void ScaleRowDown2_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Linear_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown2Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown4Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_2_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown38_3_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleAddRow_Any_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width); void ScaleFilterCols_Any_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx); void ScaleRowDown34_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_0_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void ScaleRowDown34_1_Box_Any_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_SCALE_ROW_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/version.h000066400000000000000000000010401357355204000234630ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef INCLUDE_LIBYUV_VERSION_H_ #define INCLUDE_LIBYUV_VERSION_H_ #define LIBYUV_VERSION 1711 #endif // INCLUDE_LIBYUV_VERSION_H_ libvpx-1.8.2/third_party/libyuv/include/libyuv/video_common.h000066400000000000000000000154711357355204000244710ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ // Common definitions for video, including fourcc and VideoFormat. #ifndef INCLUDE_LIBYUV_VIDEO_COMMON_H_ #define INCLUDE_LIBYUV_VIDEO_COMMON_H_ #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif ////////////////////////////////////////////////////////////////////////////// // Definition of FourCC codes ////////////////////////////////////////////////////////////////////////////// // Convert four characters to a FourCC code. // Needs to be a macro otherwise the OS X compiler complains when the kFormat* // constants are used in a switch. #ifdef __cplusplus #define FOURCC(a, b, c, d) \ ((static_cast(a)) | (static_cast(b) << 8) | \ (static_cast(c) << 16) | (static_cast(d) << 24)) #else #define FOURCC(a, b, c, d) \ (((uint32_t)(a)) | ((uint32_t)(b) << 8) | /* NOLINT */ \ ((uint32_t)(c) << 16) | ((uint32_t)(d) << 24)) /* NOLINT */ #endif // Some pages discussing FourCC codes: // http://www.fourcc.org/yuv.php // http://v4l2spec.bytesex.org/spec/book1.htm // http://developer.apple.com/quicktime/icefloe/dispatch020.html // http://msdn.microsoft.com/library/windows/desktop/dd206750.aspx#nv12 // http://people.xiph.org/~xiphmont/containers/nut/nut4cc.txt // FourCC codes grouped according to implementation efficiency. // Primary formats should convert in 1 efficient step. // Secondary formats are converted in 2 steps. // Auxilliary formats call primary converters. enum FourCC { // 9 Primary YUV formats: 5 planar, 2 biplanar, 2 packed. FOURCC_I420 = FOURCC('I', '4', '2', '0'), FOURCC_I422 = FOURCC('I', '4', '2', '2'), FOURCC_I444 = FOURCC('I', '4', '4', '4'), FOURCC_I400 = FOURCC('I', '4', '0', '0'), FOURCC_NV21 = FOURCC('N', 'V', '2', '1'), FOURCC_NV12 = FOURCC('N', 'V', '1', '2'), FOURCC_YUY2 = FOURCC('Y', 'U', 'Y', '2'), FOURCC_UYVY = FOURCC('U', 'Y', 'V', 'Y'), FOURCC_H010 = FOURCC('H', '0', '1', '0'), // unofficial fourcc. 10 bit lsb // 1 Secondary YUV format: row biplanar. FOURCC_M420 = FOURCC('M', '4', '2', '0'), // 11 Primary RGB formats: 4 32 bpp, 2 24 bpp, 3 16 bpp, 1 10 bpc FOURCC_ARGB = FOURCC('A', 'R', 'G', 'B'), FOURCC_BGRA = FOURCC('B', 'G', 'R', 'A'), FOURCC_ABGR = FOURCC('A', 'B', 'G', 'R'), FOURCC_AR30 = FOURCC('A', 'R', '3', '0'), // 10 bit per channel. 2101010. FOURCC_AB30 = FOURCC('A', 'B', '3', '0'), // ABGR version of 10 bit FOURCC_24BG = FOURCC('2', '4', 'B', 'G'), FOURCC_RAW = FOURCC('r', 'a', 'w', ' '), FOURCC_RGBA = FOURCC('R', 'G', 'B', 'A'), FOURCC_RGBP = FOURCC('R', 'G', 'B', 'P'), // rgb565 LE. FOURCC_RGBO = FOURCC('R', 'G', 'B', 'O'), // argb1555 LE. FOURCC_R444 = FOURCC('R', '4', '4', '4'), // argb4444 LE. // 1 Primary Compressed YUV format. FOURCC_MJPG = FOURCC('M', 'J', 'P', 'G'), // 7 Auxiliary YUV variations: 3 with U and V planes are swapped, 1 Alias. FOURCC_YV12 = FOURCC('Y', 'V', '1', '2'), FOURCC_YV16 = FOURCC('Y', 'V', '1', '6'), FOURCC_YV24 = FOURCC('Y', 'V', '2', '4'), FOURCC_YU12 = FOURCC('Y', 'U', '1', '2'), // Linux version of I420. FOURCC_J420 = FOURCC('J', '4', '2', '0'), FOURCC_J400 = FOURCC('J', '4', '0', '0'), // unofficial fourcc FOURCC_H420 = FOURCC('H', '4', '2', '0'), // unofficial fourcc // 14 Auxiliary aliases. CanonicalFourCC() maps these to canonical fourcc. FOURCC_IYUV = FOURCC('I', 'Y', 'U', 'V'), // Alias for I420. FOURCC_YU16 = FOURCC('Y', 'U', '1', '6'), // Alias for I422. FOURCC_YU24 = FOURCC('Y', 'U', '2', '4'), // Alias for I444. FOURCC_YUYV = FOURCC('Y', 'U', 'Y', 'V'), // Alias for YUY2. FOURCC_YUVS = FOURCC('y', 'u', 'v', 's'), // Alias for YUY2 on Mac. FOURCC_HDYC = FOURCC('H', 'D', 'Y', 'C'), // Alias for UYVY. FOURCC_2VUY = FOURCC('2', 'v', 'u', 'y'), // Alias for UYVY on Mac. FOURCC_JPEG = FOURCC('J', 'P', 'E', 'G'), // Alias for MJPG. FOURCC_DMB1 = FOURCC('d', 'm', 'b', '1'), // Alias for MJPG on Mac. FOURCC_BA81 = FOURCC('B', 'A', '8', '1'), // Alias for BGGR. FOURCC_RGB3 = FOURCC('R', 'G', 'B', '3'), // Alias for RAW. FOURCC_BGR3 = FOURCC('B', 'G', 'R', '3'), // Alias for 24BG. FOURCC_CM32 = FOURCC(0, 0, 0, 32), // Alias for BGRA kCMPixelFormat_32ARGB FOURCC_CM24 = FOURCC(0, 0, 0, 24), // Alias for RAW kCMPixelFormat_24RGB FOURCC_L555 = FOURCC('L', '5', '5', '5'), // Alias for RGBO. FOURCC_L565 = FOURCC('L', '5', '6', '5'), // Alias for RGBP. FOURCC_5551 = FOURCC('5', '5', '5', '1'), // Alias for RGBO. // deprecated formats. Not supported, but defined for backward compatibility. FOURCC_I411 = FOURCC('I', '4', '1', '1'), FOURCC_Q420 = FOURCC('Q', '4', '2', '0'), FOURCC_RGGB = FOURCC('R', 'G', 'G', 'B'), FOURCC_BGGR = FOURCC('B', 'G', 'G', 'R'), FOURCC_GRBG = FOURCC('G', 'R', 'B', 'G'), FOURCC_GBRG = FOURCC('G', 'B', 'R', 'G'), FOURCC_H264 = FOURCC('H', '2', '6', '4'), // Match any fourcc. FOURCC_ANY = -1, }; enum FourCCBpp { // Canonical fourcc codes used in our code. FOURCC_BPP_I420 = 12, FOURCC_BPP_I422 = 16, FOURCC_BPP_I444 = 24, FOURCC_BPP_I411 = 12, FOURCC_BPP_I400 = 8, FOURCC_BPP_NV21 = 12, FOURCC_BPP_NV12 = 12, FOURCC_BPP_YUY2 = 16, FOURCC_BPP_UYVY = 16, FOURCC_BPP_M420 = 12, FOURCC_BPP_Q420 = 12, FOURCC_BPP_ARGB = 32, FOURCC_BPP_BGRA = 32, FOURCC_BPP_ABGR = 32, FOURCC_BPP_RGBA = 32, FOURCC_BPP_AR30 = 32, FOURCC_BPP_AB30 = 32, FOURCC_BPP_24BG = 24, FOURCC_BPP_RAW = 24, FOURCC_BPP_RGBP = 16, FOURCC_BPP_RGBO = 16, FOURCC_BPP_R444 = 16, FOURCC_BPP_RGGB = 8, FOURCC_BPP_BGGR = 8, FOURCC_BPP_GRBG = 8, FOURCC_BPP_GBRG = 8, FOURCC_BPP_YV12 = 12, FOURCC_BPP_YV16 = 16, FOURCC_BPP_YV24 = 24, FOURCC_BPP_YU12 = 12, FOURCC_BPP_J420 = 12, FOURCC_BPP_J400 = 8, FOURCC_BPP_H420 = 12, FOURCC_BPP_H010 = 24, FOURCC_BPP_MJPG = 0, // 0 means unknown. FOURCC_BPP_H264 = 0, FOURCC_BPP_IYUV = 12, FOURCC_BPP_YU16 = 16, FOURCC_BPP_YU24 = 24, FOURCC_BPP_YUYV = 16, FOURCC_BPP_YUVS = 16, FOURCC_BPP_HDYC = 16, FOURCC_BPP_2VUY = 16, FOURCC_BPP_JPEG = 1, FOURCC_BPP_DMB1 = 1, FOURCC_BPP_BA81 = 8, FOURCC_BPP_RGB3 = 24, FOURCC_BPP_BGR3 = 24, FOURCC_BPP_CM32 = 32, FOURCC_BPP_CM24 = 24, // Match any fourcc. FOURCC_BPP_ANY = 0, // 0 means unknown. }; // Converts fourcc aliases into canonical ones. LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc); #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // INCLUDE_LIBYUV_VIDEO_COMMON_H_ libvpx-1.8.2/third_party/libyuv/source/000077500000000000000000000000001357355204000201755ustar00rootroot00000000000000libvpx-1.8.2/third_party/libyuv/source/compare.cc000066400000000000000000000306651357355204000221440ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/compare.h" #include #include #ifdef _OPENMP #include #endif #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/cpu_id.h" #include "libyuv/row.h" #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // hash seed of 5381 recommended. LIBYUV_API uint32_t HashDjb2(const uint8_t* src, uint64_t count, uint32_t seed) { const int kBlockSize = 1 << 15; // 32768; int remainder; uint32_t (*HashDjb2_SSE)(const uint8_t* src, int count, uint32_t seed) = HashDjb2_C; #if defined(HAS_HASHDJB2_SSE41) if (TestCpuFlag(kCpuHasSSE41)) { HashDjb2_SSE = HashDjb2_SSE41; } #endif #if defined(HAS_HASHDJB2_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { HashDjb2_SSE = HashDjb2_AVX2; } #endif while (count >= (uint64_t)(kBlockSize)) { seed = HashDjb2_SSE(src, kBlockSize, seed); src += kBlockSize; count -= kBlockSize; } remainder = (int)count & ~15; if (remainder) { seed = HashDjb2_SSE(src, remainder, seed); src += remainder; count -= remainder; } remainder = (int)count & 15; if (remainder) { seed = HashDjb2_C(src, remainder, seed); } return seed; } static uint32_t ARGBDetectRow_C(const uint8_t* argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. return FOURCC_BGRA; } if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. return FOURCC_ARGB; } if (argb[4] != 255) { // Second pixel first byte is not Alpha of 255. return FOURCC_BGRA; } if (argb[7] != 255) { // Second pixel 4th byte is not Alpha of 255. return FOURCC_ARGB; } argb += 8; } if (width & 1) { if (argb[0] != 255) { // First byte is not Alpha of 255, so not ARGB. return FOURCC_BGRA; } if (argb[3] != 255) { // 4th byte is not Alpha of 255, so not BGRA. return FOURCC_ARGB; } } return 0; } // Scan an opaque argb image and return fourcc based on alpha offset. // Returns FOURCC_ARGB, FOURCC_BGRA, or 0 if unknown. LIBYUV_API uint32_t ARGBDetect(const uint8_t* argb, int stride_argb, int width, int height) { uint32_t fourcc = 0; int h; // Coalesce rows. if (stride_argb == width * 4) { width *= height; height = 1; stride_argb = 0; } for (h = 0; h < height && fourcc == 0; ++h) { fourcc = ARGBDetectRow_C(argb, width); argb += stride_argb; } return fourcc; } // NEON version accumulates in 16 bit shorts which overflow at 65536 bytes. // So actual maximum is 1 less loop, which is 64436 - 32 bytes. LIBYUV_API uint64_t ComputeHammingDistance(const uint8_t* src_a, const uint8_t* src_b, int count) { const int kBlockSize = 1 << 15; // 32768; const int kSimdSize = 64; // SIMD for multiple of 64, and C for remainder int remainder = count & (kBlockSize - 1) & ~(kSimdSize - 1); uint64_t diff = 0; int i; uint32_t (*HammingDistance)(const uint8_t* src_a, const uint8_t* src_b, int count) = HammingDistance_C; #if defined(HAS_HAMMINGDISTANCE_NEON) if (TestCpuFlag(kCpuHasNEON)) { HammingDistance = HammingDistance_NEON; } #endif #if defined(HAS_HAMMINGDISTANCE_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { HammingDistance = HammingDistance_SSSE3; } #endif #if defined(HAS_HAMMINGDISTANCE_SSE42) if (TestCpuFlag(kCpuHasSSE42)) { HammingDistance = HammingDistance_SSE42; } #endif #if defined(HAS_HAMMINGDISTANCE_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { HammingDistance = HammingDistance_AVX2; } #endif #if defined(HAS_HAMMINGDISTANCE_MSA) if (TestCpuFlag(kCpuHasMSA)) { HammingDistance = HammingDistance_MSA; } #endif #ifdef _OPENMP #pragma omp parallel for reduction(+ : diff) #endif for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { diff += HammingDistance(src_a + i, src_b + i, kBlockSize); } src_a += count & ~(kBlockSize - 1); src_b += count & ~(kBlockSize - 1); if (remainder) { diff += HammingDistance(src_a, src_b, remainder); src_a += remainder; src_b += remainder; } remainder = count & (kSimdSize - 1); if (remainder) { diff += HammingDistance_C(src_a, src_b, remainder); } return diff; } // TODO(fbarchard): Refactor into row function. LIBYUV_API uint64_t ComputeSumSquareError(const uint8_t* src_a, const uint8_t* src_b, int count) { // SumSquareError returns values 0 to 65535 for each squared difference. // Up to 65536 of those can be summed and remain within a uint32_t. // After each block of 65536 pixels, accumulate into a uint64_t. const int kBlockSize = 65536; int remainder = count & (kBlockSize - 1) & ~31; uint64_t sse = 0; int i; uint32_t (*SumSquareError)(const uint8_t* src_a, const uint8_t* src_b, int count) = SumSquareError_C; #if defined(HAS_SUMSQUAREERROR_NEON) if (TestCpuFlag(kCpuHasNEON)) { SumSquareError = SumSquareError_NEON; } #endif #if defined(HAS_SUMSQUAREERROR_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { // Note only used for multiples of 16 so count is not checked. SumSquareError = SumSquareError_SSE2; } #endif #if defined(HAS_SUMSQUAREERROR_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { // Note only used for multiples of 32 so count is not checked. SumSquareError = SumSquareError_AVX2; } #endif #if defined(HAS_SUMSQUAREERROR_MSA) if (TestCpuFlag(kCpuHasMSA)) { SumSquareError = SumSquareError_MSA; } #endif #ifdef _OPENMP #pragma omp parallel for reduction(+ : sse) #endif for (i = 0; i < (count - (kBlockSize - 1)); i += kBlockSize) { sse += SumSquareError(src_a + i, src_b + i, kBlockSize); } src_a += count & ~(kBlockSize - 1); src_b += count & ~(kBlockSize - 1); if (remainder) { sse += SumSquareError(src_a, src_b, remainder); src_a += remainder; src_b += remainder; } remainder = count & 31; if (remainder) { sse += SumSquareError_C(src_a, src_b, remainder); } return sse; } LIBYUV_API uint64_t ComputeSumSquareErrorPlane(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height) { uint64_t sse = 0; int h; // Coalesce rows. if (stride_a == width && stride_b == width) { width *= height; height = 1; stride_a = stride_b = 0; } for (h = 0; h < height; ++h) { sse += ComputeSumSquareError(src_a, src_b, width); src_a += stride_a; src_b += stride_b; } return sse; } LIBYUV_API double SumSquareErrorToPsnr(uint64_t sse, uint64_t count) { double psnr; if (sse > 0) { double mse = (double)count / (double)sse; psnr = 10.0 * log10(255.0 * 255.0 * mse); } else { psnr = kMaxPsnr; // Limit to prevent divide by 0 } if (psnr > kMaxPsnr) { psnr = kMaxPsnr; } return psnr; } LIBYUV_API double CalcFramePsnr(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height) { const uint64_t samples = (uint64_t)width * (uint64_t)height; const uint64_t sse = ComputeSumSquareErrorPlane(src_a, stride_a, src_b, stride_b, width, height); return SumSquareErrorToPsnr(sse, samples); } LIBYUV_API double I420Psnr(const uint8_t* src_y_a, int stride_y_a, const uint8_t* src_u_a, int stride_u_a, const uint8_t* src_v_a, int stride_v_a, const uint8_t* src_y_b, int stride_y_b, const uint8_t* src_u_b, int stride_u_b, const uint8_t* src_v_b, int stride_v_b, int width, int height) { const uint64_t sse_y = ComputeSumSquareErrorPlane( src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); const int width_uv = (width + 1) >> 1; const int height_uv = (height + 1) >> 1; const uint64_t sse_u = ComputeSumSquareErrorPlane( src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); const uint64_t sse_v = ComputeSumSquareErrorPlane( src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); const uint64_t samples = (uint64_t)width * (uint64_t)height + 2 * ((uint64_t)width_uv * (uint64_t)height_uv); const uint64_t sse = sse_y + sse_u + sse_v; return SumSquareErrorToPsnr(sse, samples); } static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 static double Ssim8x8_C(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b) { int64_t sum_a = 0; int64_t sum_b = 0; int64_t sum_sq_a = 0; int64_t sum_sq_b = 0; int64_t sum_axb = 0; int i; for (i = 0; i < 8; ++i) { int j; for (j = 0; j < 8; ++j) { sum_a += src_a[j]; sum_b += src_b[j]; sum_sq_a += src_a[j] * src_a[j]; sum_sq_b += src_b[j] * src_b[j]; sum_axb += src_a[j] * src_b[j]; } src_a += stride_a; src_b += stride_b; } { const int64_t count = 64; // scale the constants by number of pixels const int64_t c1 = (cc1 * count * count) >> 12; const int64_t c2 = (cc2 * count * count) >> 12; const int64_t sum_a_x_sum_b = sum_a * sum_b; const int64_t ssim_n = (2 * sum_a_x_sum_b + c1) * (2 * count * sum_axb - 2 * sum_a_x_sum_b + c2); const int64_t sum_a_sq = sum_a * sum_a; const int64_t sum_b_sq = sum_b * sum_b; const int64_t ssim_d = (sum_a_sq + sum_b_sq + c1) * (count * sum_sq_a - sum_a_sq + count * sum_sq_b - sum_b_sq + c2); if (ssim_d == 0.0) { return DBL_MAX; } return ssim_n * 1.0 / ssim_d; } } // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. LIBYUV_API double CalcFrameSsim(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b, int width, int height) { int samples = 0; double ssim_total = 0; double (*Ssim8x8)(const uint8_t* src_a, int stride_a, const uint8_t* src_b, int stride_b) = Ssim8x8_C; // sample point start with each 4x4 location int i; for (i = 0; i < height - 8; i += 4) { int j; for (j = 0; j < width - 8; j += 4) { ssim_total += Ssim8x8(src_a + j, stride_a, src_b + j, stride_b); samples++; } src_a += stride_a * 4; src_b += stride_b * 4; } ssim_total /= samples; return ssim_total; } LIBYUV_API double I420Ssim(const uint8_t* src_y_a, int stride_y_a, const uint8_t* src_u_a, int stride_u_a, const uint8_t* src_v_a, int stride_v_a, const uint8_t* src_y_b, int stride_y_b, const uint8_t* src_u_b, int stride_u_b, const uint8_t* src_v_b, int stride_v_b, int width, int height) { const double ssim_y = CalcFrameSsim(src_y_a, stride_y_a, src_y_b, stride_y_b, width, height); const int width_uv = (width + 1) >> 1; const int height_uv = (height + 1) >> 1; const double ssim_u = CalcFrameSsim(src_u_a, stride_u_a, src_u_b, stride_u_b, width_uv, height_uv); const double ssim_v = CalcFrameSsim(src_v_a, stride_v_a, src_v_b, stride_v_b, width_uv, height_uv); return ssim_y * 0.8 + 0.1 * (ssim_u + ssim_v); } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/compare_common.cc000066400000000000000000000046311357355204000235060ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if ORIGINAL_OPT uint32_t HammingDistance_C1(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; int i; for (i = 0; i < count; ++i) { int x = src_a[i] ^ src_b[i]; if (x & 1) ++diff; if (x & 2) ++diff; if (x & 4) ++diff; if (x & 8) ++diff; if (x & 16) ++diff; if (x & 32) ++diff; if (x & 64) ++diff; if (x & 128) ++diff; } return diff; } #endif // Hakmem method for hamming distance. uint32_t HammingDistance_C(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; int i; for (i = 0; i < count - 3; i += 4) { uint32_t x = *((const uint32_t*)src_a) ^ *((const uint32_t*)src_b); uint32_t u = x - ((x >> 1) & 0x55555555); u = ((u >> 2) & 0x33333333) + (u & 0x33333333); diff += ((((u + (u >> 4)) & 0x0f0f0f0f) * 0x01010101) >> 24); src_a += 4; src_b += 4; } for (; i < count; ++i) { uint32_t x = *src_a ^ *src_b; uint32_t u = x - ((x >> 1) & 0x55); u = ((u >> 2) & 0x33) + (u & 0x33); diff += (u + (u >> 4)) & 0x0f; src_a += 1; src_b += 1; } return diff; } uint32_t SumSquareError_C(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t sse = 0u; int i; for (i = 0; i < count; ++i) { int diff = src_a[i] - src_b[i]; sse += (uint32_t)(diff * diff); } return sse; } // hash seed of 5381 recommended. // Internal C version of HashDjb2 with int sized count for efficiency. uint32_t HashDjb2_C(const uint8_t* src, int count, uint32_t seed) { uint32_t hash = seed; int i; for (i = 0; i < count; ++i) { hash += (hash << 5) + src[i]; } return hash; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/compare_gcc.cc000066400000000000000000000336661357355204000227640ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC x86 and x64. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) #if defined(__x86_64__) uint32_t HammingDistance_SSE42(const uint8_t* src_a, const uint8_t* src_b, int count) { uint64_t diff = 0u; asm volatile( "xor %3,%3 \n" "xor %%r8,%%r8 \n" "xor %%r9,%%r9 \n" "xor %%r10,%%r10 \n" // Process 32 bytes per loop. LABELALIGN "1: \n" "mov (%0),%%rcx \n" "mov 0x8(%0),%%rdx \n" "xor (%1),%%rcx \n" "xor 0x8(%1),%%rdx \n" "popcnt %%rcx,%%rcx \n" "popcnt %%rdx,%%rdx \n" "mov 0x10(%0),%%rsi \n" "mov 0x18(%0),%%rdi \n" "xor 0x10(%1),%%rsi \n" "xor 0x18(%1),%%rdi \n" "popcnt %%rsi,%%rsi \n" "popcnt %%rdi,%%rdi \n" "add $0x20,%0 \n" "add $0x20,%1 \n" "add %%rcx,%3 \n" "add %%rdx,%%r8 \n" "add %%rsi,%%r9 \n" "add %%rdi,%%r10 \n" "sub $0x20,%2 \n" "jg 1b \n" "add %%r8, %3 \n" "add %%r9, %3 \n" "add %%r10, %3 \n" : "+r"(src_a), // %0 "+r"(src_b), // %1 "+r"(count), // %2 "=r"(diff) // %3 : : "memory", "cc", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10"); return static_cast(diff); } #else uint32_t HammingDistance_SSE42(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; asm volatile( // Process 16 bytes per loop. LABELALIGN "1: \n" "mov (%0),%%ecx \n" "mov 0x4(%0),%%edx \n" "xor (%1),%%ecx \n" "xor 0x4(%1),%%edx \n" "popcnt %%ecx,%%ecx \n" "add %%ecx,%3 \n" "popcnt %%edx,%%edx \n" "add %%edx,%3 \n" "mov 0x8(%0),%%ecx \n" "mov 0xc(%0),%%edx \n" "xor 0x8(%1),%%ecx \n" "xor 0xc(%1),%%edx \n" "popcnt %%ecx,%%ecx \n" "add %%ecx,%3 \n" "popcnt %%edx,%%edx \n" "add %%edx,%3 \n" "add $0x10,%0 \n" "add $0x10,%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_a), // %0 "+r"(src_b), // %1 "+r"(count), // %2 "+r"(diff) // %3 : : "memory", "cc", "ecx", "edx"); return diff; } #endif static const vec8 kNibbleMask = {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15}; static const vec8 kBitCount = {0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4}; uint32_t HammingDistance_SSSE3(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; asm volatile( "movdqa %4,%%xmm2 \n" "movdqa %5,%%xmm3 \n" "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" "movdqa (%0),%%xmm4 \n" "movdqa 0x10(%0), %%xmm5 \n" "pxor (%0,%1), %%xmm4 \n" "movdqa %%xmm4,%%xmm6 \n" "pand %%xmm2,%%xmm6 \n" "psrlw $0x4,%%xmm4 \n" "movdqa %%xmm3,%%xmm7 \n" "pshufb %%xmm6,%%xmm7 \n" "pand %%xmm2,%%xmm4 \n" "movdqa %%xmm3,%%xmm6 \n" "pshufb %%xmm4,%%xmm6 \n" "paddb %%xmm7,%%xmm6 \n" "pxor 0x10(%0,%1),%%xmm5 \n" "add $0x20,%0 \n" "movdqa %%xmm5,%%xmm4 \n" "pand %%xmm2,%%xmm5 \n" "psrlw $0x4,%%xmm4 \n" "movdqa %%xmm3,%%xmm7 \n" "pshufb %%xmm5,%%xmm7 \n" "pand %%xmm2,%%xmm4 \n" "movdqa %%xmm3,%%xmm5 \n" "pshufb %%xmm4,%%xmm5 \n" "paddb %%xmm7,%%xmm5 \n" "paddb %%xmm5,%%xmm6 \n" "psadbw %%xmm1,%%xmm6 \n" "paddd %%xmm6,%%xmm0 \n" "sub $0x20,%2 \n" "jg 1b \n" "pshufd $0xaa,%%xmm0,%%xmm1 \n" "paddd %%xmm1,%%xmm0 \n" "movd %%xmm0, %3 \n" : "+r"(src_a), // %0 "+r"(src_b), // %1 "+r"(count), // %2 "=r"(diff) // %3 : "m"(kNibbleMask), // %4 "m"(kBitCount) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); return diff; } #ifdef HAS_HAMMINGDISTANCE_AVX2 uint32_t HammingDistance_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; asm volatile( "vbroadcastf128 %4,%%ymm2 \n" "vbroadcastf128 %5,%%ymm3 \n" "vpxor %%ymm0,%%ymm0,%%ymm0 \n" "vpxor %%ymm1,%%ymm1,%%ymm1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" "vmovdqa (%0),%%ymm4 \n" "vmovdqa 0x20(%0), %%ymm5 \n" "vpxor (%0,%1), %%ymm4, %%ymm4 \n" "vpand %%ymm2,%%ymm4,%%ymm6 \n" "vpsrlw $0x4,%%ymm4,%%ymm4 \n" "vpshufb %%ymm6,%%ymm3,%%ymm6 \n" "vpand %%ymm2,%%ymm4,%%ymm4 \n" "vpshufb %%ymm4,%%ymm3,%%ymm4 \n" "vpaddb %%ymm4,%%ymm6,%%ymm6 \n" "vpxor 0x20(%0,%1),%%ymm5,%%ymm4 \n" "add $0x40,%0 \n" "vpand %%ymm2,%%ymm4,%%ymm5 \n" "vpsrlw $0x4,%%ymm4,%%ymm4 \n" "vpshufb %%ymm5,%%ymm3,%%ymm5 \n" "vpand %%ymm2,%%ymm4,%%ymm4 \n" "vpshufb %%ymm4,%%ymm3,%%ymm4 \n" "vpaddb %%ymm5,%%ymm4,%%ymm4 \n" "vpaddb %%ymm6,%%ymm4,%%ymm4 \n" "vpsadbw %%ymm1,%%ymm4,%%ymm4 \n" "vpaddd %%ymm0,%%ymm4,%%ymm0 \n" "sub $0x40,%2 \n" "jg 1b \n" "vpermq $0xb1,%%ymm0,%%ymm1 \n" "vpaddd %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xaa,%%ymm0,%%ymm1 \n" "vpaddd %%ymm1,%%ymm0,%%ymm0 \n" "vmovd %%xmm0, %3 \n" "vzeroupper \n" : "+r"(src_a), // %0 "+r"(src_b), // %1 "+r"(count), // %2 "=r"(diff) // %3 : "m"(kNibbleMask), // %4 "m"(kBitCount) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); return diff; } #endif // HAS_HAMMINGDISTANCE_AVX2 uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t sse; asm volatile( "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm1 \n" "lea 0x10(%0),%0 \n" "movdqu (%1),%%xmm2 \n" "lea 0x10(%1),%1 \n" "movdqa %%xmm1,%%xmm3 \n" "psubusb %%xmm2,%%xmm1 \n" "psubusb %%xmm3,%%xmm2 \n" "por %%xmm2,%%xmm1 \n" "movdqa %%xmm1,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpckhbw %%xmm5,%%xmm2 \n" "pmaddwd %%xmm1,%%xmm1 \n" "pmaddwd %%xmm2,%%xmm2 \n" "paddd %%xmm1,%%xmm0 \n" "paddd %%xmm2,%%xmm0 \n" "sub $0x10,%2 \n" "jg 1b \n" "pshufd $0xee,%%xmm0,%%xmm1 \n" "paddd %%xmm1,%%xmm0 \n" "pshufd $0x1,%%xmm0,%%xmm1 \n" "paddd %%xmm1,%%xmm0 \n" "movd %%xmm0,%3 \n" : "+r"(src_a), // %0 "+r"(src_b), // %1 "+r"(count), // %2 "=g"(sse) // %3 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); return sse; } static const uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 static const uvec32 kHashMul0 = { 0x0c3525e1, // 33 ^ 15 0xa3476dc1, // 33 ^ 14 0x3b4039a1, // 33 ^ 13 0x4f5f0981, // 33 ^ 12 }; static const uvec32 kHashMul1 = { 0x30f35d61, // 33 ^ 11 0x855cb541, // 33 ^ 10 0x040a9121, // 33 ^ 9 0x747c7101, // 33 ^ 8 }; static const uvec32 kHashMul2 = { 0xec41d4e1, // 33 ^ 7 0x4cfa3cc1, // 33 ^ 6 0x025528a1, // 33 ^ 5 0x00121881, // 33 ^ 4 }; static const uvec32 kHashMul3 = { 0x00008c61, // 33 ^ 3 0x00000441, // 33 ^ 2 0x00000021, // 33 ^ 1 0x00000001, // 33 ^ 0 }; uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { uint32_t hash; asm volatile( "movd %2,%%xmm0 \n" "pxor %%xmm7,%%xmm7 \n" "movdqa %4,%%xmm6 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm1 \n" "lea 0x10(%0),%0 \n" "pmulld %%xmm6,%%xmm0 \n" "movdqa %5,%%xmm5 \n" "movdqa %%xmm1,%%xmm2 \n" "punpcklbw %%xmm7,%%xmm2 \n" "movdqa %%xmm2,%%xmm3 \n" "punpcklwd %%xmm7,%%xmm3 \n" "pmulld %%xmm5,%%xmm3 \n" "movdqa %6,%%xmm5 \n" "movdqa %%xmm2,%%xmm4 \n" "punpckhwd %%xmm7,%%xmm4 \n" "pmulld %%xmm5,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "punpckhbw %%xmm7,%%xmm1 \n" "movdqa %%xmm1,%%xmm2 \n" "punpcklwd %%xmm7,%%xmm2 \n" "pmulld %%xmm5,%%xmm2 \n" "movdqa %8,%%xmm5 \n" "punpckhwd %%xmm7,%%xmm1 \n" "pmulld %%xmm5,%%xmm1 \n" "paddd %%xmm4,%%xmm3 \n" "paddd %%xmm2,%%xmm1 \n" "paddd %%xmm3,%%xmm1 \n" "pshufd $0xe,%%xmm1,%%xmm2 \n" "paddd %%xmm2,%%xmm1 \n" "pshufd $0x1,%%xmm1,%%xmm2 \n" "paddd %%xmm2,%%xmm1 \n" "paddd %%xmm1,%%xmm0 \n" "sub $0x10,%1 \n" "jg 1b \n" "movd %%xmm0,%3 \n" : "+r"(src), // %0 "+r"(count), // %1 "+rm"(seed), // %2 "=g"(hash) // %3 : "m"(kHash16x33), // %4 "m"(kHashMul0), // %5 "m"(kHashMul1), // %6 "m"(kHashMul2), // %7 "m"(kHashMul3) // %8 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); return hash; } #endif // defined(__x86_64__) || (defined(__i386__) && !defined(__pic__))) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/compare_msa.cc000066400000000000000000000056341357355204000230020ustar00rootroot00000000000000/* * Copyright 2017 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/row.h" // This module is for GCC MSA #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include "libyuv/macros_msa.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif uint32_t HammingDistance_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; int i; v16u8 src0, src1, src2, src3; v2i64 vec0 = {0}, vec1 = {0}; for (i = 0; i < count; i += 32) { src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); src0 ^= src2; src1 ^= src3; vec0 += __msa_pcnt_d((v2i64)src0); vec1 += __msa_pcnt_d((v2i64)src1); src_a += 32; src_b += 32; } vec0 += vec1; diff = (uint32_t)__msa_copy_u_w((v4i32)vec0, 0); diff += (uint32_t)__msa_copy_u_w((v4i32)vec0, 2); return diff; } uint32_t SumSquareError_MSA(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t sse = 0u; int i; v16u8 src0, src1, src2, src3; v8i16 vec0, vec1, vec2, vec3; v4i32 reg0 = {0}, reg1 = {0}, reg2 = {0}, reg3 = {0}; v2i64 tmp0; for (i = 0; i < count; i += 32) { src0 = (v16u8)__msa_ld_b((v16i8*)src_a, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_a, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_b, 0); src3 = (v16u8)__msa_ld_b((v16i8*)src_b, 16); vec0 = (v8i16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); vec1 = (v8i16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); vec2 = (v8i16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); vec3 = (v8i16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); vec0 = __msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); vec1 = __msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); vec2 = __msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); vec3 = __msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); reg0 = __msa_dpadd_s_w(reg0, vec0, vec0); reg1 = __msa_dpadd_s_w(reg1, vec1, vec1); reg2 = __msa_dpadd_s_w(reg2, vec2, vec2); reg3 = __msa_dpadd_s_w(reg3, vec3, vec3); src_a += 32; src_b += 32; } reg0 += reg1; reg2 += reg3; reg0 += reg2; tmp0 = __msa_hadd_s_d(reg0, reg0); sse = (uint32_t)__msa_copy_u_w((v4i32)tmp0, 0); sse += (uint32_t)__msa_copy_u_w((v4i32)tmp0, 2); return sse; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) libvpx-1.8.2/third_party/libyuv/source/compare_neon.cc000066400000000000000000000067021357355204000231560ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) // 256 bits at a time // uses short accumulator which restricts count to 131 KB uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff; asm volatile( "vmov.u16 q4, #0 \n" // accumulator "1: \n" "vld1.8 {q0, q1}, [%0]! \n" "vld1.8 {q2, q3}, [%1]! \n" "veor.32 q0, q0, q2 \n" "veor.32 q1, q1, q3 \n" "vcnt.i8 q0, q0 \n" "vcnt.i8 q1, q1 \n" "subs %2, %2, #32 \n" "vadd.u8 q0, q0, q1 \n" // 16 byte counts "vpadal.u8 q4, q0 \n" // 8 shorts "bgt 1b \n" "vpaddl.u16 q0, q4 \n" // 4 ints "vpadd.u32 d0, d0, d1 \n" "vpadd.u32 d0, d0, d0 \n" "vmov.32 %3, d0[0] \n" : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) : : "cc", "q0", "q1", "q2", "q3", "q4"); return diff; } uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t sse; asm volatile( "vmov.u8 q8, #0 \n" "vmov.u8 q10, #0 \n" "vmov.u8 q9, #0 \n" "vmov.u8 q11, #0 \n" "1: \n" "vld1.8 {q0}, [%0]! \n" "vld1.8 {q1}, [%1]! \n" "subs %2, %2, #16 \n" "vsubl.u8 q2, d0, d2 \n" "vsubl.u8 q3, d1, d3 \n" "vmlal.s16 q8, d4, d4 \n" "vmlal.s16 q9, d6, d6 \n" "vmlal.s16 q10, d5, d5 \n" "vmlal.s16 q11, d7, d7 \n" "bgt 1b \n" "vadd.u32 q8, q8, q9 \n" "vadd.u32 q10, q10, q11 \n" "vadd.u32 q11, q8, q10 \n" "vpaddl.u32 q1, q11 \n" "vadd.u64 d0, d2, d3 \n" "vmov.32 %3, d0[0] \n" : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) : : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); return sse; } #endif // defined(__ARM_NEON__) && !defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/compare_neon64.cc000066400000000000000000000063051357355204000233270ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // 256 bits at a time // uses short accumulator which restricts count to 131 KB uint32_t HammingDistance_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff; asm volatile( "movi v4.8h, #0 \n" "1: \n" "ld1 {v0.16b, v1.16b}, [%0], #32 \n" "ld1 {v2.16b, v3.16b}, [%1], #32 \n" "eor v0.16b, v0.16b, v2.16b \n" "eor v1.16b, v1.16b, v3.16b \n" "cnt v0.16b, v0.16b \n" "cnt v1.16b, v1.16b \n" "subs %w2, %w2, #32 \n" "add v0.16b, v0.16b, v1.16b \n" "uadalp v4.8h, v0.16b \n" "b.gt 1b \n" "uaddlv s4, v4.8h \n" "fmov %w3, s4 \n" : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(diff) : : "cc", "v0", "v1", "v2", "v3", "v4"); return diff; } uint32_t SumSquareError_NEON(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t sse; asm volatile( "eor v16.16b, v16.16b, v16.16b \n" "eor v18.16b, v18.16b, v18.16b \n" "eor v17.16b, v17.16b, v17.16b \n" "eor v19.16b, v19.16b, v19.16b \n" "1: \n" "ld1 {v0.16b}, [%0], #16 \n" "ld1 {v1.16b}, [%1], #16 \n" "subs %w2, %w2, #16 \n" "usubl v2.8h, v0.8b, v1.8b \n" "usubl2 v3.8h, v0.16b, v1.16b \n" "smlal v16.4s, v2.4h, v2.4h \n" "smlal v17.4s, v3.4h, v3.4h \n" "smlal2 v18.4s, v2.8h, v2.8h \n" "smlal2 v19.4s, v3.8h, v3.8h \n" "b.gt 1b \n" "add v16.4s, v16.4s, v17.4s \n" "add v18.4s, v18.4s, v19.4s \n" "add v19.4s, v16.4s, v18.4s \n" "addv s0, v19.4s \n" "fmov %w3, s0 \n" : "+r"(src_a), "+r"(src_b), "+r"(count), "=r"(sse) : : "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); return sse; } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/compare_win.cc000066400000000000000000000151631357355204000230150ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/basic_types.h" #include "libyuv/compare_row.h" #include "libyuv/row.h" #if defined(_MSC_VER) #include // For __popcnt #endif #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) uint32_t HammingDistance_SSE42(const uint8_t* src_a, const uint8_t* src_b, int count) { uint32_t diff = 0u; int i; for (i = 0; i < count - 3; i += 4) { uint32_t x = *((uint32_t*)src_a) ^ *((uint32_t*)src_b); // NOLINT src_a += 4; src_b += 4; diff += __popcnt(x); } return diff; } __declspec(naked) uint32_t SumSquareError_SSE2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b mov ecx, [esp + 12] // count pxor xmm0, xmm0 pxor xmm5, xmm5 wloop: movdqu xmm1, [eax] lea eax, [eax + 16] movdqu xmm2, [edx] lea edx, [edx + 16] movdqa xmm3, xmm1 // abs trick psubusb xmm1, xmm2 psubusb xmm2, xmm3 por xmm1, xmm2 movdqa xmm2, xmm1 punpcklbw xmm1, xmm5 punpckhbw xmm2, xmm5 pmaddwd xmm1, xmm1 pmaddwd xmm2, xmm2 paddd xmm0, xmm1 paddd xmm0, xmm2 sub ecx, 16 jg wloop pshufd xmm1, xmm0, 0xee paddd xmm0, xmm1 pshufd xmm1, xmm0, 0x01 paddd xmm0, xmm1 movd eax, xmm0 ret } } // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 // C4752: found Intel(R) Advanced Vector Extensions; consider using /arch:AVX. #pragma warning(disable : 4752) __declspec(naked) uint32_t SumSquareError_AVX2(const uint8_t* src_a, const uint8_t* src_b, int count) { __asm { mov eax, [esp + 4] // src_a mov edx, [esp + 8] // src_b mov ecx, [esp + 12] // count vpxor ymm0, ymm0, ymm0 // sum vpxor ymm5, ymm5, ymm5 // constant 0 for unpck sub edx, eax wloop: vmovdqu ymm1, [eax] vmovdqu ymm2, [eax + edx] lea eax, [eax + 32] vpsubusb ymm3, ymm1, ymm2 // abs difference trick vpsubusb ymm2, ymm2, ymm1 vpor ymm1, ymm2, ymm3 vpunpcklbw ymm2, ymm1, ymm5 // u16. mutates order. vpunpckhbw ymm1, ymm1, ymm5 vpmaddwd ymm2, ymm2, ymm2 // square + hadd to u32. vpmaddwd ymm1, ymm1, ymm1 vpaddd ymm0, ymm0, ymm1 vpaddd ymm0, ymm0, ymm2 sub ecx, 32 jg wloop vpshufd ymm1, ymm0, 0xee // 3, 2 + 1, 0 both lanes. vpaddd ymm0, ymm0, ymm1 vpshufd ymm1, ymm0, 0x01 // 1 + 0 both lanes. vpaddd ymm0, ymm0, ymm1 vpermq ymm1, ymm0, 0x02 // high + low lane. vpaddd ymm0, ymm0, ymm1 vmovd eax, xmm0 vzeroupper ret } } #endif // _MSC_VER >= 1700 uvec32 kHash16x33 = {0x92d9e201, 0, 0, 0}; // 33 ^ 16 uvec32 kHashMul0 = { 0x0c3525e1, // 33 ^ 15 0xa3476dc1, // 33 ^ 14 0x3b4039a1, // 33 ^ 13 0x4f5f0981, // 33 ^ 12 }; uvec32 kHashMul1 = { 0x30f35d61, // 33 ^ 11 0x855cb541, // 33 ^ 10 0x040a9121, // 33 ^ 9 0x747c7101, // 33 ^ 8 }; uvec32 kHashMul2 = { 0xec41d4e1, // 33 ^ 7 0x4cfa3cc1, // 33 ^ 6 0x025528a1, // 33 ^ 5 0x00121881, // 33 ^ 4 }; uvec32 kHashMul3 = { 0x00008c61, // 33 ^ 3 0x00000441, // 33 ^ 2 0x00000021, // 33 ^ 1 0x00000001, // 33 ^ 0 }; __declspec(naked) uint32_t HashDjb2_SSE41(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count movd xmm0, [esp + 12] // seed pxor xmm7, xmm7 // constant 0 for unpck movdqa xmm6, xmmword ptr kHash16x33 wloop: movdqu xmm1, [eax] // src[0-15] lea eax, [eax + 16] pmulld xmm0, xmm6 // hash *= 33 ^ 16 movdqa xmm5, xmmword ptr kHashMul0 movdqa xmm2, xmm1 punpcklbw xmm2, xmm7 // src[0-7] movdqa xmm3, xmm2 punpcklwd xmm3, xmm7 // src[0-3] pmulld xmm3, xmm5 movdqa xmm5, xmmword ptr kHashMul1 movdqa xmm4, xmm2 punpckhwd xmm4, xmm7 // src[4-7] pmulld xmm4, xmm5 movdqa xmm5, xmmword ptr kHashMul2 punpckhbw xmm1, xmm7 // src[8-15] movdqa xmm2, xmm1 punpcklwd xmm2, xmm7 // src[8-11] pmulld xmm2, xmm5 movdqa xmm5, xmmword ptr kHashMul3 punpckhwd xmm1, xmm7 // src[12-15] pmulld xmm1, xmm5 paddd xmm3, xmm4 // add 16 results paddd xmm1, xmm2 paddd xmm1, xmm3 pshufd xmm2, xmm1, 0x0e // upper 2 dwords paddd xmm1, xmm2 pshufd xmm2, xmm1, 0x01 paddd xmm1, xmm2 paddd xmm0, xmm1 sub ecx, 16 jg wloop movd eax, xmm0 // return hash ret } } // Visual C 2012 required for AVX2. #if _MSC_VER >= 1700 __declspec(naked) uint32_t HashDjb2_AVX2(const uint8_t* src, int count, uint32_t seed) { __asm { mov eax, [esp + 4] // src mov ecx, [esp + 8] // count vmovd xmm0, [esp + 12] // seed wloop: vpmovzxbd xmm3, [eax] // src[0-3] vpmulld xmm0, xmm0, xmmword ptr kHash16x33 // hash *= 33 ^ 16 vpmovzxbd xmm4, [eax + 4] // src[4-7] vpmulld xmm3, xmm3, xmmword ptr kHashMul0 vpmovzxbd xmm2, [eax + 8] // src[8-11] vpmulld xmm4, xmm4, xmmword ptr kHashMul1 vpmovzxbd xmm1, [eax + 12] // src[12-15] vpmulld xmm2, xmm2, xmmword ptr kHashMul2 lea eax, [eax + 16] vpmulld xmm1, xmm1, xmmword ptr kHashMul3 vpaddd xmm3, xmm3, xmm4 // add 16 results vpaddd xmm1, xmm1, xmm2 vpaddd xmm1, xmm1, xmm3 vpshufd xmm2, xmm1, 0x0e // upper 2 dwords vpaddd xmm1, xmm1,xmm2 vpshufd xmm2, xmm1, 0x01 vpaddd xmm1, xmm1, xmm2 vpaddd xmm0, xmm0, xmm1 sub ecx, 16 jg wloop vmovd eax, xmm0 // return hash vzeroupper ret } } #endif // _MSC_VER >= 1700 #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert.cc000066400000000000000000001531041357355204000221700ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert.h" #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/row.h" #include "libyuv/scale.h" // For ScalePlane() #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) static __inline int Abs(int v) { return v >= 0 ? v : -v; } // Any I4xx To I420 format with mirroring. static int I4xxToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int src_y_width, int src_y_height, int src_uv_width, int src_uv_height) { const int dst_y_width = Abs(src_y_width); const int dst_y_height = Abs(src_y_height); const int dst_uv_width = SUBSAMPLE(dst_y_width, 1, 1); const int dst_uv_height = SUBSAMPLE(dst_y_height, 1, 1); if (src_uv_width == 0 || src_uv_height == 0) { return -1; } if (dst_y) { ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); } ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); return 0; } // Copy I420 with optional flipping. // TODO(fbarchard): Use Scale plane which supports mirroring, but ensure // is does row coalescing. LIBYUV_API int I420Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } // Copy UV planes. CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } // Copy I010 with optional flipping. LIBYUV_API int I010Copy(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { CopyPlane_16(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } // Copy UV planes. CopyPlane_16(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); CopyPlane_16(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } // Convert 10 bit YUV to 8 bit. LIBYUV_API int I010ToI420(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } // Convert Y plane. Convert16To8Plane(src_y, src_stride_y, dst_y, dst_stride_y, 16384, width, height); // Convert UV planes. Convert16To8Plane(src_u, src_stride_u, dst_u, dst_stride_u, 16384, halfwidth, halfheight); Convert16To8Plane(src_v, src_stride_v, dst_v, dst_stride_v, 16384, halfwidth, halfheight); return 0; } // 422 chroma is 1/2 width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API int I422ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { const int src_uv_width = SUBSAMPLE(width, 1, 1); return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height, src_uv_width, height); } // 444 chroma is 1x width, 1x height // 420 chroma is 1/2 width, 1/2 height LIBYUV_API int I444ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { return I4xxToI420(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height, width, height); } // I400 is greyscale typically used in MJPG LIBYUV_API int I400ToI420(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } SetPlane(dst_u, dst_stride_u, halfwidth, halfheight, 128); SetPlane(dst_v, dst_stride_v, halfwidth, halfheight, 128); return 0; } static void CopyPlane2(const uint8_t* src, int src_stride_0, int src_stride_1, uint8_t* dst, int dst_stride, int width, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; } #endif #if defined(HAS_COPYROW_AVX) if (TestCpuFlag(kCpuHasAVX)) { CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX; } #endif #if defined(HAS_COPYROW_ERMS) if (TestCpuFlag(kCpuHasERMS)) { CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif // Copy plane for (y = 0; y < height - 1; y += 2) { CopyRow(src, dst, width); CopyRow(src + src_stride_0, dst + dst_stride, width); src += src_stride_0 + src_stride_1; dst += dst_stride * 2; } if (height & 1) { CopyRow(src, dst, width); } } // Support converting from FOURCC_M420 // Useful for bandwidth constrained transports like USB 1.0 and 2.0 and for // easy conversion to I420. // M420 format description: // M420 is row biplanar 420: 2 rows of Y and 1 row of UV. // Chroma is half width / half height. (420) // src_stride_m420 is row planar. Normally this will be the width in pixels. // The UV plane is half width, but 2 values, so src_stride_m420 applies to // this as well as the two Y planes. static int X420ToI420(const uint8_t* src_y, int src_stride_y0, int src_stride_y1, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_uv || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; if (dst_y) { dst_y = dst_y + (height - 1) * dst_stride_y; } dst_u = dst_u + (halfheight - 1) * dst_stride_u; dst_v = dst_v + (halfheight - 1) * dst_stride_v; dst_stride_y = -dst_stride_y; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } // Coalesce rows. if (src_stride_y0 == width && src_stride_y1 == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y0 = src_stride_y1 = dst_stride_y = 0; } // Coalesce rows. if (src_stride_uv == halfwidth * 2 && dst_stride_u == halfwidth && dst_stride_v == halfwidth) { halfwidth *= halfheight; halfheight = 1; src_stride_uv = dst_stride_u = dst_stride_v = 0; } if (dst_y) { if (src_stride_y0 == src_stride_y1) { CopyPlane(src_y, src_stride_y0, dst_y, dst_stride_y, width, height); } else { CopyPlane2(src_y, src_stride_y0, src_stride_y1, dst_y, dst_stride_y, width, height); } } // Split UV plane - NV12 / NV21 SplitUVPlane(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } // Convert NV12 to I420. LIBYUV_API int NV12ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { return X420ToI420(src_y, src_stride_y, src_stride_y, src_uv, src_stride_uv, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); } // Convert NV21 to I420. Same as NV12 but u and v pointers swapped. LIBYUV_API int NV21ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { return X420ToI420(src_y, src_stride_y, src_stride_y, src_vu, src_stride_vu, dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u, width, height); } // Convert M420 to I420. LIBYUV_API int M420ToI420(const uint8_t* src_m420, int src_stride_m420, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { return X420ToI420(src_m420, src_stride_m420, src_stride_m420 * 2, src_m420 + src_stride_m420 * 2, src_stride_m420 * 3, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); } // Convert YUY2 to I420. LIBYUV_API int YUY2ToI420(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*YUY2ToUVRow)(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) = YUY2ToUVRow_C; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; // Negative height means invert the image. if (height < 0) { height = -height; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } #if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { YUY2ToUVRow = YUY2ToUVRow_Any_SSE2; YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { YUY2ToUVRow = YUY2ToUVRow_SSE2; YUY2ToYRow = YUY2ToYRow_SSE2; } } #endif #if defined(HAS_YUY2TOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { YUY2ToUVRow = YUY2ToUVRow_Any_AVX2; YUY2ToYRow = YUY2ToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { YUY2ToUVRow = YUY2ToUVRow_AVX2; YUY2ToYRow = YUY2ToYRow_AVX2; } } #endif #if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { YUY2ToYRow = YUY2ToYRow_Any_NEON; YUY2ToUVRow = YUY2ToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; YUY2ToUVRow = YUY2ToUVRow_NEON; } } #endif #if defined(HAS_YUY2TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToYRow = YUY2ToYRow_Any_MSA; YUY2ToUVRow = YUY2ToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { YUY2ToYRow = YUY2ToYRow_MSA; YUY2ToUVRow = YUY2ToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { YUY2ToUVRow(src_yuy2, src_stride_yuy2, dst_u, dst_v, width); YUY2ToYRow(src_yuy2, dst_y, width); YUY2ToYRow(src_yuy2 + src_stride_yuy2, dst_y + dst_stride_y, width); src_yuy2 += src_stride_yuy2 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { YUY2ToUVRow(src_yuy2, 0, dst_u, dst_v, width); YUY2ToYRow(src_yuy2, dst_y, width); } return 0; } // Convert UYVY to I420. LIBYUV_API int UYVYToI420(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*UYVYToUVRow)(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) = UYVYToUVRow_C; void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; // Negative height means invert the image. if (height < 0) { height = -height; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } #if defined(HAS_UYVYTOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { UYVYToUVRow = UYVYToUVRow_Any_SSE2; UYVYToYRow = UYVYToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { UYVYToUVRow = UYVYToUVRow_SSE2; UYVYToYRow = UYVYToYRow_SSE2; } } #endif #if defined(HAS_UYVYTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { UYVYToUVRow = UYVYToUVRow_Any_AVX2; UYVYToYRow = UYVYToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { UYVYToUVRow = UYVYToUVRow_AVX2; UYVYToYRow = UYVYToYRow_AVX2; } } #endif #if defined(HAS_UYVYTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { UYVYToYRow = UYVYToYRow_Any_NEON; UYVYToUVRow = UYVYToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; UYVYToUVRow = UYVYToUVRow_NEON; } } #endif #if defined(HAS_UYVYTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { UYVYToYRow = UYVYToYRow_Any_MSA; UYVYToUVRow = UYVYToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { UYVYToYRow = UYVYToYRow_MSA; UYVYToUVRow = UYVYToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { UYVYToUVRow(src_uyvy, src_stride_uyvy, dst_u, dst_v, width); UYVYToYRow(src_uyvy, dst_y, width); UYVYToYRow(src_uyvy + src_stride_uyvy, dst_y + dst_stride_y, width); src_uyvy += src_stride_uyvy * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { UYVYToUVRow(src_uyvy, 0, dst_u, dst_v, width); UYVYToYRow(src_uyvy, dst_y, width); } return 0; } // Convert ARGB to I420. LIBYUV_API int ARGBToI420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); } return 0; } // Convert BGRA to I420. LIBYUV_API int BGRAToI420(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*BGRAToUVRow)(const uint8_t* src_bgra0, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) = BGRAToUVRow_C; void (*BGRAToYRow)(const uint8_t* src_bgra, uint8_t* dst_y, int width) = BGRAToYRow_C; if (!src_bgra || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } #if defined(HAS_BGRATOYROW_SSSE3) && defined(HAS_BGRATOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { BGRAToUVRow = BGRAToUVRow_Any_SSSE3; BGRAToYRow = BGRAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { BGRAToUVRow = BGRAToUVRow_SSSE3; BGRAToYRow = BGRAToYRow_SSSE3; } } #endif #if defined(HAS_BGRATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { BGRAToYRow = BGRAToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { BGRAToYRow = BGRAToYRow_NEON; } } #endif #if defined(HAS_BGRATOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { BGRAToUVRow = BGRAToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { BGRAToUVRow = BGRAToUVRow_NEON; } } #endif #if defined(HAS_BGRATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToYRow = BGRAToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { BGRAToYRow = BGRAToYRow_MSA; } } #endif #if defined(HAS_BGRATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { BGRAToUVRow = BGRAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { BGRAToUVRow = BGRAToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { BGRAToUVRow(src_bgra, src_stride_bgra, dst_u, dst_v, width); BGRAToYRow(src_bgra, dst_y, width); BGRAToYRow(src_bgra + src_stride_bgra, dst_y + dst_stride_y, width); src_bgra += src_stride_bgra * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { BGRAToUVRow(src_bgra, 0, dst_u, dst_v, width); BGRAToYRow(src_bgra, dst_y, width); } return 0; } // Convert ABGR to I420. LIBYUV_API int ABGRToI420(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ABGRToUVRow)(const uint8_t* src_abgr0, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) = ABGRToUVRow_C; void (*ABGRToYRow)(const uint8_t* src_abgr, uint8_t* dst_y, int width) = ABGRToYRow_C; if (!src_abgr || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } #if defined(HAS_ABGRTOYROW_SSSE3) && defined(HAS_ABGRTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ABGRToUVRow = ABGRToUVRow_Any_SSSE3; ABGRToYRow = ABGRToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ABGRToUVRow = ABGRToUVRow_SSSE3; ABGRToYRow = ABGRToYRow_SSSE3; } } #endif #if defined(HAS_ABGRTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToYRow = ABGRToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ABGRToYRow = ABGRToYRow_NEON; } } #endif #if defined(HAS_ABGRTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ABGRToUVRow = ABGRToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ABGRToUVRow = ABGRToUVRow_NEON; } } #endif #if defined(HAS_ABGRTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ABGRToYRow = ABGRToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ABGRToYRow = ABGRToYRow_MSA; } } #endif #if defined(HAS_ABGRTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ABGRToUVRow = ABGRToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ABGRToUVRow = ABGRToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { ABGRToUVRow(src_abgr, src_stride_abgr, dst_u, dst_v, width); ABGRToYRow(src_abgr, dst_y, width); ABGRToYRow(src_abgr + src_stride_abgr, dst_y + dst_stride_y, width); src_abgr += src_stride_abgr * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { ABGRToUVRow(src_abgr, 0, dst_u, dst_v, width); ABGRToYRow(src_abgr, dst_y, width); } return 0; } // Convert RGBA to I420. LIBYUV_API int RGBAToI420(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*RGBAToUVRow)(const uint8_t* src_rgba0, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) = RGBAToUVRow_C; void (*RGBAToYRow)(const uint8_t* src_rgba, uint8_t* dst_y, int width) = RGBAToYRow_C; if (!src_rgba || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgba = src_rgba + (height - 1) * src_stride_rgba; src_stride_rgba = -src_stride_rgba; } #if defined(HAS_RGBATOYROW_SSSE3) && defined(HAS_RGBATOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGBAToUVRow = RGBAToUVRow_Any_SSSE3; RGBAToYRow = RGBAToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RGBAToUVRow = RGBAToUVRow_SSSE3; RGBAToYRow = RGBAToYRow_SSSE3; } } #endif #if defined(HAS_RGBATOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGBAToYRow = RGBAToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RGBAToYRow = RGBAToYRow_NEON; } } #endif #if defined(HAS_RGBATOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGBAToUVRow = RGBAToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { RGBAToUVRow = RGBAToUVRow_NEON; } } #endif #if defined(HAS_RGBATOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGBAToYRow = RGBAToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGBAToYRow = RGBAToYRow_MSA; } } #endif #if defined(HAS_RGBATOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGBAToUVRow = RGBAToUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGBAToUVRow = RGBAToUVRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { RGBAToUVRow(src_rgba, src_stride_rgba, dst_u, dst_v, width); RGBAToYRow(src_rgba, dst_y, width); RGBAToYRow(src_rgba + src_stride_rgba, dst_y + dst_stride_y, width); src_rgba += src_stride_rgba * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { RGBAToUVRow(src_rgba, 0, dst_u, dst_v, width); RGBAToYRow(src_rgba, dst_y, width); } return 0; } // Convert RGB24 to I420. LIBYUV_API int RGB24ToI420(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) void (*RGB24ToUVRow)(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB24ToUVRow_C; void (*RGB24ToYRow)(const uint8_t* src_rgb24, uint8_t* dst_y, int width) = RGB24ToYRow_C; #else void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_rgb24 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } // Neon version does direct RGB24 to YUV. #if defined(HAS_RGB24TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToUVRow = RGB24ToUVRow_Any_NEON; RGB24ToYRow = RGB24ToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RGB24ToYRow = RGB24ToYRow_NEON; if (IS_ALIGNED(width, 16)) { RGB24ToUVRow = RGB24ToUVRow_NEON; } } } #elif defined(HAS_RGB24TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToUVRow = RGB24ToUVRow_Any_MSA; RGB24ToYRow = RGB24ToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB24ToYRow = RGB24ToYRow_MSA; RGB24ToUVRow = RGB24ToUVRow_MSA; } } // Other platforms do intermediate conversion from RGB24 to ARGB. #else #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #endif { #if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) RGB24ToUVRow(src_rgb24, src_stride_rgb24, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); RGB24ToYRow(src_rgb24 + src_stride_rgb24, dst_y + dst_stride_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); RGB24ToARGBRow(src_rgb24 + src_stride_rgb24, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); #endif src_rgb24 += src_stride_rgb24 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { #if (defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) RGB24ToUVRow(src_rgb24, 0, dst_u, dst_v, width); RGB24ToYRow(src_rgb24, dst_y, width); #else RGB24ToARGBRow(src_rgb24, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); #endif } #if !(defined(HAS_RGB24TOYROW_NEON) || defined(HAS_RGB24TOYROW_MSA)) free_aligned_buffer_64(row); #endif } return 0; } // Convert RAW to I420. LIBYUV_API int RAWToI420(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) void (*RAWToUVRow)(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, uint8_t* dst_v, int width) = RAWToUVRow_C; void (*RAWToYRow)(const uint8_t* src_raw, uint8_t* dst_y, int width) = RAWToYRow_C; #else void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_raw || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Neon version does direct RAW to YUV. #if defined(HAS_RAWTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RAWToUVRow = RAWToUVRow_Any_NEON; RAWToYRow = RAWToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RAWToYRow = RAWToYRow_NEON; if (IS_ALIGNED(width, 16)) { RAWToUVRow = RAWToUVRow_NEON; } } } #elif defined(HAS_RAWTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToUVRow = RAWToUVRow_Any_MSA; RAWToYRow = RAWToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RAWToYRow = RAWToYRow_MSA; RAWToUVRow = RAWToUVRow_MSA; } } // Other platforms do intermediate conversion from RAW to ARGB. #else #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RAWToARGBRow = RAWToARGBRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #endif { #if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) RAWToUVRow(src_raw, src_stride_raw, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); RAWToYRow(src_raw + src_stride_raw, dst_y + dst_stride_y, width); #else RAWToARGBRow(src_raw, row, width); RAWToARGBRow(src_raw + src_stride_raw, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); #endif src_raw += src_stride_raw * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { #if (defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) RAWToUVRow(src_raw, 0, dst_u, dst_v, width); RAWToYRow(src_raw, dst_y, width); #else RAWToARGBRow(src_raw, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); #endif } #if !(defined(HAS_RAWTOYROW_NEON) || defined(HAS_RAWTOYROW_MSA)) free_aligned_buffer_64(row); #endif } return 0; } // Convert RGB565 to I420. LIBYUV_API int RGB565ToI420(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) void (*RGB565ToUVRow)(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) = RGB565ToUVRow_C; void (*RGB565ToYRow)(const uint8_t* src_rgb565, uint8_t* dst_y, int width) = RGB565ToYRow_C; #else void (*RGB565ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB565ToARGBRow_C; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_rgb565 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; src_stride_rgb565 = -src_stride_rgb565; } // Neon version does direct RGB565 to YUV. #if defined(HAS_RGB565TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB565ToUVRow = RGB565ToUVRow_Any_NEON; RGB565ToYRow = RGB565ToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RGB565ToYRow = RGB565ToYRow_NEON; if (IS_ALIGNED(width, 16)) { RGB565ToUVRow = RGB565ToUVRow_NEON; } } } #elif defined(HAS_RGB565TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToUVRow = RGB565ToUVRow_Any_MSA; RGB565ToYRow = RGB565ToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB565ToYRow = RGB565ToYRow_MSA; RGB565ToUVRow = RGB565ToUVRow_MSA; } } // Other platforms do intermediate conversion from RGB565 to ARGB. #else #if defined(HAS_RGB565TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { RGB565ToARGBRow = RGB565ToARGBRow_SSE2; } } #endif #if defined(HAS_RGB565TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { RGB565ToARGBRow = RGB565ToARGBRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #endif { #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) RGB565ToUVRow(src_rgb565, src_stride_rgb565, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); RGB565ToYRow(src_rgb565 + src_stride_rgb565, dst_y + dst_stride_y, width); #else RGB565ToARGBRow(src_rgb565, row, width); RGB565ToARGBRow(src_rgb565 + src_stride_rgb565, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); #endif src_rgb565 += src_stride_rgb565 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { #if (defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) RGB565ToUVRow(src_rgb565, 0, dst_u, dst_v, width); RGB565ToYRow(src_rgb565, dst_y, width); #else RGB565ToARGBRow(src_rgb565, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); #endif } #if !(defined(HAS_RGB565TOYROW_NEON) || defined(HAS_RGB565TOYROW_MSA)) free_aligned_buffer_64(row); #endif } return 0; } // Convert ARGB1555 to I420. LIBYUV_API int ARGB1555ToI420(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) void (*ARGB1555ToUVRow)(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB1555ToUVRow_C; void (*ARGB1555ToYRow)(const uint8_t* src_argb1555, uint8_t* dst_y, int width) = ARGB1555ToYRow_C; #else void (*ARGB1555ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = ARGB1555ToARGBRow_C; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_argb1555 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555; } // Neon version does direct ARGB1555 to YUV. #if defined(HAS_ARGB1555TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_NEON; ARGB1555ToYRow = ARGB1555ToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGB1555ToYRow = ARGB1555ToYRow_NEON; if (IS_ALIGNED(width, 16)) { ARGB1555ToUVRow = ARGB1555ToUVRow_NEON; } } } #elif defined(HAS_ARGB1555TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToUVRow = ARGB1555ToUVRow_Any_MSA; ARGB1555ToYRow = ARGB1555ToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGB1555ToYRow = ARGB1555ToYRow_MSA; ARGB1555ToUVRow = ARGB1555ToUVRow_MSA; } } // Other platforms do intermediate conversion from ARGB1555 to ARGB. #else #if defined(HAS_ARGB1555TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; } } #endif #if defined(HAS_ARGB1555TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #endif { #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) ARGB1555ToUVRow(src_argb1555, src_stride_argb1555, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); ARGB1555ToYRow(src_argb1555 + src_stride_argb1555, dst_y + dst_stride_y, width); #else ARGB1555ToARGBRow(src_argb1555, row, width); ARGB1555ToARGBRow(src_argb1555 + src_stride_argb1555, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); #endif src_argb1555 += src_stride_argb1555 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { #if (defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) ARGB1555ToUVRow(src_argb1555, 0, dst_u, dst_v, width); ARGB1555ToYRow(src_argb1555, dst_y, width); #else ARGB1555ToARGBRow(src_argb1555, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); #endif } #if !(defined(HAS_ARGB1555TOYROW_NEON) || defined(HAS_ARGB1555TOYROW_MSA)) free_aligned_buffer_64(row); #endif } return 0; } // Convert ARGB4444 to I420. LIBYUV_API int ARGB4444ToI420(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; #if defined(HAS_ARGB4444TOYROW_NEON) void (*ARGB4444ToUVRow)(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGB4444ToUVRow_C; void (*ARGB4444ToYRow)(const uint8_t* src_argb4444, uint8_t* dst_y, int width) = ARGB4444ToYRow_C; #else void (*ARGB4444ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = ARGB4444ToARGBRow_C; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; #endif if (!src_argb4444 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444; } // Neon version does direct ARGB4444 to YUV. #if defined(HAS_ARGB4444TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGB4444ToUVRow = ARGB4444ToUVRow_Any_NEON; ARGB4444ToYRow = ARGB4444ToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGB4444ToYRow = ARGB4444ToYRow_NEON; if (IS_ALIGNED(width, 16)) { ARGB4444ToUVRow = ARGB4444ToUVRow_NEON; } } } // Other platforms do intermediate conversion from ARGB4444 to ARGB. #else #if defined(HAS_ARGB4444TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; } } #endif #if defined(HAS_ARGB4444TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2; } } #endif #if defined(HAS_ARGB4444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } } #endif #endif { #if !defined(HAS_ARGB4444TOYROW_NEON) // Allocate 2 rows of ARGB. const int kRowSize = (width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); #endif for (y = 0; y < height - 1; y += 2) { #if defined(HAS_ARGB4444TOYROW_NEON) ARGB4444ToUVRow(src_argb4444, src_stride_argb4444, dst_u, dst_v, width); ARGB4444ToYRow(src_argb4444, dst_y, width); ARGB4444ToYRow(src_argb4444 + src_stride_argb4444, dst_y + dst_stride_y, width); #else ARGB4444ToARGBRow(src_argb4444, row, width); ARGB4444ToARGBRow(src_argb4444 + src_stride_argb4444, row + kRowSize, width); ARGBToUVRow(row, kRowSize, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); ARGBToYRow(row + kRowSize, dst_y + dst_stride_y, width); #endif src_argb4444 += src_stride_argb4444 * 2; dst_y += dst_stride_y * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { #if defined(HAS_ARGB4444TOYROW_NEON) ARGB4444ToUVRow(src_argb4444, 0, dst_u, dst_v, width); ARGB4444ToYRow(src_argb4444, dst_y, width); #else ARGB4444ToARGBRow(src_argb4444, row, width); ARGBToUVRow(row, 0, dst_u, dst_v, width); ARGBToYRow(row, dst_y, width); #endif } #if !defined(HAS_ARGB4444TOYROW_NEON) free_aligned_buffer_64(row); #endif } return 0; } static void SplitPixels(const uint8_t* src_u, int src_pixel_stride_uv, uint8_t* dst_u, int width) { int i; for (i = 0; i < width; ++i) { *dst_u = *src_u; ++dst_u; src_u += src_pixel_stride_uv; } } // Convert Android420 to I420. LIBYUV_API int Android420ToI420(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } // Copy UV planes as is - I420 if (src_pixel_stride_uv == 1) { CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; // Split UV planes - NV21 } if (src_pixel_stride_uv == 2 && vu_off == -1 && src_stride_u == src_stride_v) { SplitUVPlane(src_v, src_stride_v, dst_v, dst_stride_v, dst_u, dst_stride_u, halfwidth, halfheight); return 0; // Split UV planes - NV12 } if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { SplitUVPlane(src_u, src_stride_u, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } for (y = 0; y < halfheight; ++y) { SplitPixels(src_u, src_pixel_stride_uv, dst_u, halfwidth); SplitPixels(src_v, src_pixel_stride_uv, dst_v, halfwidth); src_u += src_stride_u; src_v += src_stride_v; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_argb.cc000066400000000000000000002105401357355204000231610ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" #endif #include "libyuv/planar_functions.h" // For CopyPlane and ARGBShuffle. #include "libyuv/rotate_argb.h" #include "libyuv/row.h" #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Copy ARGB with optional flipping LIBYUV_API int ARGBCopy(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } CopyPlane(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width * 4, height); return 0; } // Convert I420 to ARGB with matrix static int I420ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif #if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif #if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_NEON; } } #endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to ARGB. LIBYUV_API int I420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert I420 to ABGR. LIBYUV_API int I420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J420 to ARGB. LIBYUV_API int J420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvJPEGConstants, width, height); } // Convert J420 to ABGR. LIBYUV_API int J420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuJPEGConstants, // Use Yvu matrix width, height); } // Convert H420 to ARGB. LIBYUV_API int H420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvH709Constants, width, height); } // Convert H420 to ABGR. LIBYUV_API int H420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert I422 to ARGB with matrix static int I422ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && src_stride_v * 2 == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; } #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif #if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif #if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_NEON; } } #endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; } return 0; } // Convert I422 to ARGB. LIBYUV_API int I422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert I422 to ABGR. LIBYUV_API int I422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J422 to ARGB. LIBYUV_API int J422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvJPEGConstants, width, height); } // Convert J422 to ABGR. LIBYUV_API int J422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuJPEGConstants, // Use Yvu matrix width, height); } // Convert H422 to ARGB. LIBYUV_API int H422ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvH709Constants, width, height); } // Convert H422 to ABGR. LIBYUV_API int H422ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I422ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert 10 bit YUV to ARGB with matrix // TODO(fbarchard): Consider passing scale multiplier to I210ToARGB to // multiply 10 bit yuv into high bits to allow any number of bits. static int I010ToAR30Matrix(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I210ToAR30Row)(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToAR30Row_C; if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I210TOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I210ToAR30Row = I210ToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I210ToAR30Row = I210ToAR30Row_SSSE3; } } #endif #if defined(HAS_I210TOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I210ToAR30Row = I210ToAR30Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I210ToAR30Row = I210ToAR30Row_AVX2; } } #endif for (y = 0; y < height; ++y) { I210ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I010 to AR30. LIBYUV_API int I010ToAR30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, &kYuvI601Constants, width, height); } // Convert H010 to AR30. LIBYUV_API int H010ToAR30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { return I010ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, &kYuvH709Constants, width, height); } // Convert I010 to AB30. LIBYUV_API int I010ToAB30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height) { return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, &kYvuI601Constants, width, height); } // Convert H010 to AB30. LIBYUV_API int H010ToAB30(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height) { return I010ToAR30Matrix(src_y, src_stride_y, src_v, src_stride_v, src_u, src_stride_u, dst_ab30, dst_stride_ab30, &kYvuH709Constants, width, height); } // Convert 10 bit YUV to ARGB with matrix static int I010ToARGBMatrix(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I210ToARGBRow)(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I210ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I210TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I210ToARGBRow = I210ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I210ToARGBRow = I210ToARGBRow_SSSE3; } } #endif #if defined(HAS_I210TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I210ToARGBRow = I210ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I210ToARGBRow = I210ToARGBRow_AVX2; } } #endif for (y = 0; y < height; ++y) { I210ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I010 to ARGB. LIBYUV_API int I010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert I010 to ABGR. LIBYUV_API int I010ToABGR(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I010ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert H010 to ARGB. LIBYUV_API int H010ToARGB(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I010ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvH709Constants, width, height); } // Convert H010 to ABGR. LIBYUV_API int H010ToABGR(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I010ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert I444 to ARGB with matrix static int I444ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I444ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I444ToARGBRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_y == width && src_stride_u == width && src_stride_v == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_argb = 0; } #if defined(HAS_I444TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I444ToARGBRow = I444ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I444ToARGBRow = I444ToARGBRow_SSSE3; } } #endif #if defined(HAS_I444TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I444ToARGBRow = I444ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I444ToARGBRow = I444ToARGBRow_AVX2; } } #endif #if defined(HAS_I444TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I444ToARGBRow = I444ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I444ToARGBRow = I444ToARGBRow_NEON; } } #endif #if defined(HAS_I444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I444ToARGBRow = I444ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { I444ToARGBRow = I444ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { I444ToARGBRow(src_y, src_u, src_v, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; } return 0; } // Convert I444 to ARGB. LIBYUV_API int I444ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert I444 to ABGR. LIBYUV_API int I444ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return I444ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert J444 to ARGB. LIBYUV_API int J444ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return I444ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, &kYuvJPEGConstants, width, height); } // Convert I420 with Alpha to preattenuated ARGB. static int I420AlphaToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, const uint8_t* src_a, int src_stride_a, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height, int attenuate) { int y; void (*I422AlphaToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = I422AlphaToARGBRow_C; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_I422ALPHATOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422AlphaToARGBRow = I422AlphaToARGBRow_SSSE3; } } #endif #if defined(HAS_I422ALPHATOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422AlphaToARGBRow = I422AlphaToARGBRow_AVX2; } } #endif #if defined(HAS_I422ALPHATOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422AlphaToARGBRow = I422AlphaToARGBRow_NEON; } } #endif #if defined(HAS_I422ALPHATOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422AlphaToARGBRow = I422AlphaToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422AlphaToARGBRow = I422AlphaToARGBRow_MSA; } } #endif #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; if (IS_ALIGNED(width, 4)) { ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; } } #endif #if defined(HAS_ARGBATTENUATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_AVX2; } } #endif #if defined(HAS_ARGBATTENUATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_NEON; } } #endif #if defined(HAS_ARGBATTENUATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_MSA; } } #endif for (y = 0; y < height; ++y) { I422AlphaToARGBRow(src_y, src_u, src_v, src_a, dst_argb, yuvconstants, width); if (attenuate) { ARGBAttenuateRow(dst_argb, dst_argb, width); } dst_argb += dst_stride_argb; src_a += src_stride_a; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 with Alpha to ARGB. LIBYUV_API int I420AlphaToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, const uint8_t* src_a, int src_stride_a, uint8_t* dst_argb, int dst_stride_argb, int width, int height, int attenuate) { return I420AlphaToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_a, src_stride_a, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height, attenuate); } // Convert I420 with Alpha to ABGR. LIBYUV_API int I420AlphaToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, const uint8_t* src_a, int src_stride_a, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height, int attenuate) { return I420AlphaToARGBMatrix( src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, src_a, src_stride_a, dst_abgr, dst_stride_abgr, &kYvuI601Constants, // Use Yvu matrix width, height, attenuate); } // Convert I400 to ARGB. LIBYUV_API int I400ToARGB(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*I400ToARGBRow)(const uint8_t* y_buf, uint8_t* rgb_buf, int width) = I400ToARGBRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; } #if defined(HAS_I400TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I400ToARGBRow = I400ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { I400ToARGBRow = I400ToARGBRow_SSE2; } } #endif #if defined(HAS_I400TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I400ToARGBRow = I400ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I400ToARGBRow = I400ToARGBRow_AVX2; } } #endif #if defined(HAS_I400TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I400ToARGBRow = I400ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I400ToARGBRow = I400ToARGBRow_NEON; } } #endif #if defined(HAS_I400TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I400ToARGBRow = I400ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { I400ToARGBRow = I400ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { I400ToARGBRow(src_y, dst_argb, width); dst_argb += dst_stride_argb; src_y += src_stride_y; } return 0; } // Convert J400 to ARGB. LIBYUV_API int J400ToARGB(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*J400ToARGBRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = J400ToARGBRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; } #if defined(HAS_J400TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { J400ToARGBRow = J400ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { J400ToARGBRow = J400ToARGBRow_SSE2; } } #endif #if defined(HAS_J400TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { J400ToARGBRow = J400ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { J400ToARGBRow = J400ToARGBRow_AVX2; } } #endif #if defined(HAS_J400TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { J400ToARGBRow = J400ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { J400ToARGBRow = J400ToARGBRow_NEON; } } #endif #if defined(HAS_J400TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { J400ToARGBRow = J400ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { J400ToARGBRow = J400ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { J400ToARGBRow(src_y, dst_argb, width); src_y += src_stride_y; dst_argb += dst_stride_argb; } return 0; } // Shuffle table for converting BGRA to ARGB. static const uvec8 kShuffleMaskBGRAToARGB = { 3u, 2u, 1u, 0u, 7u, 6u, 5u, 4u, 11u, 10u, 9u, 8u, 15u, 14u, 13u, 12u}; // Shuffle table for converting ABGR to ARGB. static const uvec8 kShuffleMaskABGRToARGB = { 2u, 1u, 0u, 3u, 6u, 5u, 4u, 7u, 10u, 9u, 8u, 11u, 14u, 13u, 12u, 15u}; // Shuffle table for converting RGBA to ARGB. static const uvec8 kShuffleMaskRGBAToARGB = { 1u, 2u, 3u, 0u, 5u, 6u, 7u, 4u, 9u, 10u, 11u, 8u, 13u, 14u, 15u, 12u}; // Convert BGRA to ARGB. LIBYUV_API int BGRAToARGB(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ARGB to BGRA (same as BGRAToARGB). LIBYUV_API int ARGBToBGRA(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_bgra, src_stride_bgra, dst_argb, dst_stride_argb, (const uint8_t*)(&kShuffleMaskBGRAToARGB), width, height); } // Convert ABGR to ARGB. LIBYUV_API int ABGRToARGB(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert ARGB to ABGR to (same as ABGRToARGB). LIBYUV_API int ARGBToABGR(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_abgr, src_stride_abgr, dst_argb, dst_stride_argb, (const uint8_t*)(&kShuffleMaskABGRToARGB), width, height); } // Convert RGBA to ARGB. LIBYUV_API int RGBAToARGB(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return ARGBShuffle(src_rgba, src_stride_rgba, dst_argb, dst_stride_argb, (const uint8_t*)(&kShuffleMaskRGBAToARGB), width, height); } // Convert RGB24 to ARGB. LIBYUV_API int RGB24ToARGB(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*RGB24ToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RGB24ToARGBRow_C; if (!src_rgb24 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb24 = src_rgb24 + (height - 1) * src_stride_rgb24; src_stride_rgb24 = -src_stride_rgb24; } // Coalesce rows. if (src_stride_rgb24 == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb24 = dst_stride_argb = 0; } #if defined(HAS_RGB24TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RGB24ToARGBRow = RGB24ToARGBRow_SSSE3; } } #endif #if defined(HAS_RGB24TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RGB24ToARGBRow = RGB24ToARGBRow_NEON; } } #endif #if defined(HAS_RGB24TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB24ToARGBRow = RGB24ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB24ToARGBRow = RGB24ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { RGB24ToARGBRow(src_rgb24, dst_argb, width); src_rgb24 += src_stride_rgb24; dst_argb += dst_stride_argb; } return 0; } // Convert RAW to ARGB. LIBYUV_API int RAWToARGB(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*RAWToARGBRow)(const uint8_t* src_rgb, uint8_t* dst_argb, int width) = RAWToARGBRow_C; if (!src_raw || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Coalesce rows. if (src_stride_raw == width * 3 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_raw = dst_stride_argb = 0; } #if defined(HAS_RAWTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToARGBRow = RAWToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { RAWToARGBRow = RAWToARGBRow_SSSE3; } } #endif #if defined(HAS_RAWTOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RAWToARGBRow = RAWToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RAWToARGBRow = RAWToARGBRow_NEON; } } #endif #if defined(HAS_RAWTOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToARGBRow = RAWToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RAWToARGBRow = RAWToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { RAWToARGBRow(src_raw, dst_argb, width); src_raw += src_stride_raw; dst_argb += dst_stride_argb; } return 0; } // Convert RGB565 to ARGB. LIBYUV_API int RGB565ToARGB(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*RGB565ToARGBRow)(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) = RGB565ToARGBRow_C; if (!src_rgb565 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_rgb565 = src_rgb565 + (height - 1) * src_stride_rgb565; src_stride_rgb565 = -src_stride_rgb565; } // Coalesce rows. if (src_stride_rgb565 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_rgb565 = dst_stride_argb = 0; } #if defined(HAS_RGB565TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { RGB565ToARGBRow = RGB565ToARGBRow_SSE2; } } #endif #if defined(HAS_RGB565TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { RGB565ToARGBRow = RGB565ToARGBRow_AVX2; } } #endif #if defined(HAS_RGB565TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { RGB565ToARGBRow = RGB565ToARGBRow_NEON; } } #endif #if defined(HAS_RGB565TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RGB565ToARGBRow = RGB565ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { RGB565ToARGBRow = RGB565ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { RGB565ToARGBRow(src_rgb565, dst_argb, width); src_rgb565 += src_stride_rgb565; dst_argb += dst_stride_argb; } return 0; } // Convert ARGB1555 to ARGB. LIBYUV_API int ARGB1555ToARGB(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGB1555ToARGBRow)(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) = ARGB1555ToARGBRow_C; if (!src_argb1555 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb1555 = src_argb1555 + (height - 1) * src_stride_argb1555; src_stride_argb1555 = -src_stride_argb1555; } // Coalesce rows. if (src_stride_argb1555 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb1555 = dst_stride_argb = 0; } #if defined(HAS_ARGB1555TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_SSE2; } } #endif #if defined(HAS_ARGB1555TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_AVX2; } } #endif #if defined(HAS_ARGB1555TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_NEON; } } #endif #if defined(HAS_ARGB1555TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGB1555ToARGBRow = ARGB1555ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGB1555ToARGBRow(src_argb1555, dst_argb, width); src_argb1555 += src_stride_argb1555; dst_argb += dst_stride_argb; } return 0; } // Convert ARGB4444 to ARGB. LIBYUV_API int ARGB4444ToARGB(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGB4444ToARGBRow)(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) = ARGB4444ToARGBRow_C; if (!src_argb4444 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb4444 = src_argb4444 + (height - 1) * src_stride_argb4444; src_stride_argb4444 = -src_stride_argb4444; } // Coalesce rows. if (src_stride_argb4444 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb4444 = dst_stride_argb = 0; } #if defined(HAS_ARGB4444TOARGBROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_SSE2; } } #endif #if defined(HAS_ARGB4444TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_AVX2; } } #endif #if defined(HAS_ARGB4444TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_NEON; } } #endif #if defined(HAS_ARGB4444TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGB4444ToARGBRow = ARGB4444ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGB4444ToARGBRow(src_argb4444, dst_argb, width); src_argb4444 += src_stride_argb4444; dst_argb += dst_stride_argb; } return 0; } // Convert AR30 to ARGB. LIBYUV_API int AR30ToARGB(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; if (!src_ar30 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. if (src_stride_ar30 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_argb = 0; } for (y = 0; y < height; ++y) { AR30ToARGBRow_C(src_ar30, dst_argb, width); src_ar30 += src_stride_ar30; dst_argb += dst_stride_argb; } return 0; } // Convert AR30 to ABGR. LIBYUV_API int AR30ToABGR(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { int y; if (!src_ar30 || !dst_abgr || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. if (src_stride_ar30 == width * 4 && dst_stride_abgr == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_abgr = 0; } for (y = 0; y < height; ++y) { AR30ToABGRRow_C(src_ar30, dst_abgr, width); src_ar30 += src_stride_ar30; dst_abgr += dst_stride_abgr; } return 0; } // Convert AR30 to AB30. LIBYUV_API int AR30ToAB30(const uint8_t* src_ar30, int src_stride_ar30, uint8_t* dst_ab30, int dst_stride_ab30, int width, int height) { int y; if (!src_ar30 || !dst_ab30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_ar30 = src_ar30 + (height - 1) * src_stride_ar30; src_stride_ar30 = -src_stride_ar30; } // Coalesce rows. if (src_stride_ar30 == width * 4 && dst_stride_ab30 == width * 4) { width *= height; height = 1; src_stride_ar30 = dst_stride_ab30 = 0; } for (y = 0; y < height; ++y) { AR30ToAB30Row_C(src_ar30, dst_ab30, width); src_ar30 += src_stride_ar30; dst_ab30 += dst_stride_ab30; } return 0; } // Convert NV12 to ARGB with matrix static int NV12ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*NV12ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_y || !src_uv || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_NV12TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_SSSE3; } } #endif #if defined(HAS_NV12TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV12ToARGBRow = NV12ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { NV12ToARGBRow = NV12ToARGBRow_AVX2; } } #endif #if defined(HAS_NV12TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV12ToARGBRow = NV12ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_NEON; } } #endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { NV12ToARGBRow(src_y, src_uv, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { src_uv += src_stride_uv; } } return 0; } // Convert NV21 to ARGB with matrix static int NV21ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*NV21ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToARGBRow_C; if (!src_y || !src_vu || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_NV21TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV21ToARGBRow = NV21ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { NV21ToARGBRow = NV21ToARGBRow_SSSE3; } } #endif #if defined(HAS_NV21TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV21ToARGBRow = NV21ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { NV21ToARGBRow = NV21ToARGBRow_AVX2; } } #endif #if defined(HAS_NV21TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV21ToARGBRow = NV21ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { NV21ToARGBRow = NV21ToARGBRow_NEON; } } #endif #if defined(HAS_NV21TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV21ToARGBRow = NV21ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { NV21ToARGBRow = NV21ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { NV21ToARGBRow(src_y, src_vu, dst_argb, yuvconstants, width); dst_argb += dst_stride_argb; src_y += src_stride_y; if (y & 1) { src_vu += src_stride_vu; } } return 0; } // Convert NV12 to ARGB. LIBYUV_API int NV12ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return NV12ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert NV21 to ARGB. LIBYUV_API int NV21ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return NV21ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert NV12 to ABGR. // To output ABGR instead of ARGB swap the UV and use a mirrrored yuc matrix. // To swap the UV use NV12 instead of NV21.LIBYUV_API int NV12ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return NV21ToARGBMatrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_abgr, dst_stride_abgr, &kYvuI601Constants, width, height); } // Convert NV21 to ABGR. LIBYUV_API int NV21ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return NV12ToARGBMatrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_abgr, dst_stride_abgr, &kYvuI601Constants, width, height); } // TODO(fbarchard): Consider SSSE3 2 step conversion. // Convert NV12 to RGB24 with matrix static int NV12ToRGB24Matrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_rgb24, int dst_stride_rgb24, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*NV12ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB24Row_C; if (!src_y || !src_uv || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_NV12TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV12ToRGB24Row = NV12ToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { NV12ToRGB24Row = NV12ToRGB24Row_NEON; } } #endif #if defined(HAS_NV12TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV12ToRGB24Row = NV12ToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { NV12ToRGB24Row = NV12ToRGB24Row_SSSE3; } } #endif #if defined(HAS_NV12TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV12ToRGB24Row = NV12ToRGB24Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { NV12ToRGB24Row = NV12ToRGB24Row_AVX2; } } #endif for (y = 0; y < height; ++y) { NV12ToRGB24Row(src_y, src_uv, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; if (y & 1) { src_uv += src_stride_uv; } } return 0; } // Convert NV21 to RGB24 with matrix static int NV21ToRGB24Matrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_rgb24, int dst_stride_rgb24, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*NV21ToRGB24Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV21ToRGB24Row_C; if (!src_y || !src_vu || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_NV21TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV21ToRGB24Row = NV21ToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { NV21ToRGB24Row = NV21ToRGB24Row_NEON; } } #endif #if defined(HAS_NV21TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV21ToRGB24Row = NV21ToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { NV21ToRGB24Row = NV21ToRGB24Row_SSSE3; } } #endif #if defined(HAS_NV21TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV21ToRGB24Row = NV21ToRGB24Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { NV21ToRGB24Row = NV21ToRGB24Row_AVX2; } } #endif for (y = 0; y < height; ++y) { NV21ToRGB24Row(src_y, src_vu, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; if (y & 1) { src_vu += src_stride_vu; } } return 0; } // TODO(fbarchard): NV12ToRAW can be implemented by mirrored matrix. // Convert NV12 to RGB24. LIBYUV_API int NV12ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { return NV12ToRGB24Matrix(src_y, src_stride_y, src_uv, src_stride_uv, dst_rgb24, dst_stride_rgb24, &kYuvI601Constants, width, height); } // Convert NV21 to RGB24. LIBYUV_API int NV21ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_vu, int src_stride_vu, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { return NV21ToRGB24Matrix(src_y, src_stride_y, src_vu, src_stride_vu, dst_rgb24, dst_stride_rgb24, &kYuvI601Constants, width, height); } // Convert M420 to ARGB. LIBYUV_API int M420ToARGB(const uint8_t* src_m420, int src_stride_m420, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*NV12ToARGBRow)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToARGBRow_C; if (!src_m420 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } #if defined(HAS_NV12TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV12ToARGBRow = NV12ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_SSSE3; } } #endif #if defined(HAS_NV12TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV12ToARGBRow = NV12ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { NV12ToARGBRow = NV12ToARGBRow_AVX2; } } #endif #if defined(HAS_NV12TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV12ToARGBRow = NV12ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_NEON; } } #endif #if defined(HAS_NV12TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToARGBRow = NV12ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { NV12ToARGBRow = NV12ToARGBRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, &kYuvI601Constants, width); NV12ToARGBRow(src_m420 + src_stride_m420, src_m420 + src_stride_m420 * 2, dst_argb + dst_stride_argb, &kYuvI601Constants, width); dst_argb += dst_stride_argb * 2; src_m420 += src_stride_m420 * 3; } if (height & 1) { NV12ToARGBRow(src_m420, src_m420 + src_stride_m420 * 2, dst_argb, &kYuvI601Constants, width); } return 0; } // Convert YUY2 to ARGB. LIBYUV_API int YUY2ToARGB(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*YUY2ToARGBRow)(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = YUY2ToARGBRow_C; if (!src_yuy2 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. if (src_stride_yuy2 == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_yuy2 = dst_stride_argb = 0; } #if defined(HAS_YUY2TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { YUY2ToARGBRow = YUY2ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { YUY2ToARGBRow = YUY2ToARGBRow_SSSE3; } } #endif #if defined(HAS_YUY2TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { YUY2ToARGBRow = YUY2ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { YUY2ToARGBRow = YUY2ToARGBRow_AVX2; } } #endif #if defined(HAS_YUY2TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { YUY2ToARGBRow = YUY2ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { YUY2ToARGBRow = YUY2ToARGBRow_NEON; } } #endif #if defined(HAS_YUY2TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToARGBRow = YUY2ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { YUY2ToARGBRow = YUY2ToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { YUY2ToARGBRow(src_yuy2, dst_argb, &kYuvI601Constants, width); src_yuy2 += src_stride_yuy2; dst_argb += dst_stride_argb; } return 0; } // Convert UYVY to ARGB. LIBYUV_API int UYVYToARGB(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*UYVYToARGBRow)(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) = UYVYToARGBRow_C; if (!src_uyvy || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. if (src_stride_uyvy == width * 2 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_uyvy = dst_stride_argb = 0; } #if defined(HAS_UYVYTOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { UYVYToARGBRow = UYVYToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { UYVYToARGBRow = UYVYToARGBRow_SSSE3; } } #endif #if defined(HAS_UYVYTOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { UYVYToARGBRow = UYVYToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { UYVYToARGBRow = UYVYToARGBRow_AVX2; } } #endif #if defined(HAS_UYVYTOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { UYVYToARGBRow = UYVYToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { UYVYToARGBRow = UYVYToARGBRow_NEON; } } #endif #if defined(HAS_UYVYTOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { UYVYToARGBRow = UYVYToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { UYVYToARGBRow = UYVYToARGBRow_MSA; } } #endif for (y = 0; y < height; ++y) { UYVYToARGBRow(src_uyvy, dst_argb, &kYuvI601Constants, width); src_uyvy += src_stride_uyvy; dst_argb += dst_stride_argb; } return 0; } static void WeavePixels(const uint8_t* src_u, const uint8_t* src_v, int src_pixel_stride_uv, uint8_t* dst_uv, int width) { int i; for (i = 0; i < width; ++i) { dst_uv[0] = *src_u; dst_uv[1] = *src_v; dst_uv += 2; src_u += src_pixel_stride_uv; src_v += src_pixel_stride_uv; } } // Convert Android420 to ARGB. LIBYUV_API int Android420ToARGBMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_argb, int dst_stride_argb, const struct YuvConstants* yuvconstants, int width, int height) { int y; uint8_t* dst_uv; const ptrdiff_t vu_off = src_v - src_u; int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_y || !src_u || !src_v || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // I420 if (src_pixel_stride_uv == 1) { return I420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); // NV21 } if (src_pixel_stride_uv == 2 && vu_off == -1 && src_stride_u == src_stride_v) { return NV21ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, dst_argb, dst_stride_argb, yuvconstants, width, height); // NV12 } if (src_pixel_stride_uv == 2 && vu_off == 1 && src_stride_u == src_stride_v) { return NV12ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, dst_argb, dst_stride_argb, yuvconstants, width, height); } // General case fallback creates NV12 align_buffer_64(plane_uv, halfwidth * 2 * halfheight); dst_uv = plane_uv; for (y = 0; y < halfheight; ++y) { WeavePixels(src_u, src_v, src_pixel_stride_uv, dst_uv, halfwidth); src_u += src_stride_u; src_v += src_stride_v; dst_uv += halfwidth * 2; } NV12ToARGBMatrix(src_y, src_stride_y, plane_uv, halfwidth * 2, dst_argb, dst_stride_argb, yuvconstants, width, height); free_aligned_buffer_64(plane_uv); return 0; } // Convert Android420 to ARGB. LIBYUV_API int Android420ToARGB(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { return Android420ToARGBMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_pixel_stride_uv, dst_argb, dst_stride_argb, &kYuvI601Constants, width, height); } // Convert Android420 to ABGR. LIBYUV_API int Android420ToABGR(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_pixel_stride_uv, uint8_t* dst_abgr, int dst_stride_abgr, int width, int height) { return Android420ToARGBMatrix(src_y, src_stride_y, src_v, src_stride_v, src_u, src_stride_u, src_pixel_stride_uv, dst_abgr, dst_stride_abgr, &kYvuI601Constants, width, height); } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_from.cc000066400000000000000000001314021357355204000232100ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert_from.h" #include "libyuv/basic_types.h" #include "libyuv/convert.h" // For I420Copy #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate.h" #include "libyuv/row.h" #include "libyuv/scale.h" // For ScalePlane() #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) static __inline int Abs(int v) { return v >= 0 ? v : -v; } // I420 To any I4xx YUV format with mirroring. static int I420ToI4xx(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int src_y_width, int src_y_height, int dst_uv_width, int dst_uv_height) { const int dst_y_width = Abs(src_y_width); const int dst_y_height = Abs(src_y_height); const int src_uv_width = SUBSAMPLE(src_y_width, 1, 1); const int src_uv_height = SUBSAMPLE(src_y_height, 1, 1); if (src_y_width == 0 || src_y_height == 0 || dst_uv_width <= 0 || dst_uv_height <= 0) { return -1; } if (dst_y) { ScalePlane(src_y, src_stride_y, src_y_width, src_y_height, dst_y, dst_stride_y, dst_y_width, dst_y_height, kFilterBilinear); } ScalePlane(src_u, src_stride_u, src_uv_width, src_uv_height, dst_u, dst_stride_u, dst_uv_width, dst_uv_height, kFilterBilinear); ScalePlane(src_v, src_stride_v, src_uv_width, src_uv_height, dst_v, dst_stride_v, dst_uv_width, dst_uv_height, kFilterBilinear); return 0; } // Convert 8 bit YUV to 10 bit. LIBYUV_API int I420ToI010(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } // Convert Y plane. Convert8To16Plane(src_y, src_stride_y, dst_y, dst_stride_y, 1024, width, height); // Convert UV planes. Convert8To16Plane(src_u, src_stride_u, dst_u, dst_stride_u, 1024, halfwidth, halfheight); Convert8To16Plane(src_v, src_stride_v, dst_v, dst_stride_v, 1024, halfwidth, halfheight); return 0; } // 420 chroma is 1/2 width, 1/2 height // 422 chroma is 1/2 width, 1x height LIBYUV_API int I420ToI422(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { const int dst_uv_width = (Abs(width) + 1) >> 1; const int dst_uv_height = Abs(height); return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height, dst_uv_width, dst_uv_height); } // 420 chroma is 1/2 width, 1/2 height // 444 chroma is 1x width, 1x height LIBYUV_API int I420ToI444(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { const int dst_uv_width = Abs(width); const int dst_uv_height = Abs(height); return I420ToI4xx(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height, dst_uv_width, dst_uv_height); } // Copy to I400. Source can be I420,422,444,400,NV12,NV21 LIBYUV_API int I400Copy(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height) { if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; } LIBYUV_API int I422ToYUY2(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && src_stride_v * 2 == width && dst_stride_yuy2 == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_yuy2 = 0; } #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_SSE2; } } #endif #if defined(HAS_I422TOYUY2ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToYUY2Row = I422ToYUY2Row_AVX2; } } #endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_NEON; } } #endif for (y = 0; y < height; ++y) { I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; dst_yuy2 += dst_stride_yuy2; } return 0; } LIBYUV_API int I420ToYUY2(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_y || !src_u || !src_v || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_SSE2; } } #endif #if defined(HAS_I422TOYUY2ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToYUY2Row = I422ToYUY2Row_AVX2; } } #endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_NEON; } } #endif #if defined(HAS_I422TOYUY2ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToYUY2Row = I422ToYUY2Row_Any_MSA; if (IS_ALIGNED(width, 32)) { I422ToYUY2Row = I422ToYUY2Row_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); I422ToYUY2Row(src_y + src_stride_y, src_u, src_v, dst_yuy2 + dst_stride_yuy2, width); src_y += src_stride_y * 2; src_u += src_stride_u; src_v += src_stride_v; dst_yuy2 += dst_stride_yuy2 * 2; } if (height & 1) { I422ToYUY2Row(src_y, src_u, src_v, dst_yuy2, width); } return 0; } LIBYUV_API int I422ToUYVY(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } // Coalesce rows. if (src_stride_y == width && src_stride_u * 2 == width && src_stride_v * 2 == width && dst_stride_uyvy == width * 2) { width *= height; height = 1; src_stride_y = src_stride_u = src_stride_v = dst_stride_uyvy = 0; } #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_SSE2; } } #endif #if defined(HAS_I422TOUYVYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_AVX2; } } #endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_NEON; } } #endif #if defined(HAS_I422TOUYVYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToUYVYRow = I422ToUYVYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; dst_uyvy += dst_stride_uyvy; } return 0; } LIBYUV_API int I420ToUYVY(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_y || !src_u || !src_v || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_SSE2; } } #endif #if defined(HAS_I422TOUYVYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_AVX2; } } #endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_NEON; } } #endif #if defined(HAS_I422TOUYVYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToUYVYRow = I422ToUYVYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); I422ToUYVYRow(src_y + src_stride_y, src_u, src_v, dst_uyvy + dst_stride_uyvy, width); src_y += src_stride_y * 2; src_u += src_stride_u; src_v += src_stride_v; dst_uyvy += dst_stride_uyvy * 2; } if (height & 1) { I422ToUYVYRow(src_y, src_u, src_v, dst_uyvy, width); } return 0; } // TODO(fbarchard): test negative height for invert. LIBYUV_API int I420ToNV12(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height) { if (!src_y || !src_u || !src_v || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } int halfwidth = (width + 1) / 2; int halfheight = height > 0 ? (height + 1) / 2 : (height - 1) / 2; if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } MergeUVPlane(src_u, src_stride_u, src_v, src_stride_v, dst_uv, dst_stride_uv, halfwidth, halfheight); return 0; } LIBYUV_API int I420ToNV21(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width, int height) { return I420ToNV12(src_y, src_stride_y, src_v, src_stride_v, src_u, src_stride_u, dst_y, dst_stride_y, dst_vu, dst_stride_vu, width, height); } // Convert I422 to RGBA with matrix static int I420ToRGBAMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; dst_stride_rgba = -dst_stride_rgba; } #if defined(HAS_I422TORGBAROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGBARow = I422ToRGBARow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_SSSE3; } } #endif #if defined(HAS_I422TORGBAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGBARow = I422ToRGBARow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToRGBARow = I422ToRGBARow_AVX2; } } #endif #if defined(HAS_I422TORGBAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGBARow = I422ToRGBARow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_NEON; } } #endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); dst_rgba += dst_stride_rgba; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to RGBA. LIBYUV_API int I420ToRGBA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { return I420ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgba, dst_stride_rgba, &kYuvI601Constants, width, height); } // Convert I420 to BGRA. LIBYUV_API int I420ToBGRA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_bgra, int dst_stride_bgra, int width, int height) { return I420ToRGBAMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_bgra, dst_stride_bgra, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert I420 to RGB24 with matrix static int I420ToRGB24Matrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb24, int dst_stride_rgb24, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToRGB24Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB24Row_C; if (!src_y || !src_u || !src_v || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb24 = dst_rgb24 + (height - 1) * dst_stride_rgb24; dst_stride_rgb24 = -dst_stride_rgb24; } #if defined(HAS_I422TORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGB24Row = I422ToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToRGB24Row = I422ToRGB24Row_SSSE3; } } #endif #if defined(HAS_I422TORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGB24Row = I422ToRGB24Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToRGB24Row = I422ToRGB24Row_AVX2; } } #endif #if defined(HAS_I422TORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB24Row = I422ToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToRGB24Row = I422ToRGB24Row_NEON; } } #endif #if defined(HAS_I422TORGB24ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGB24Row = I422ToRGB24Row_Any_MSA; if (IS_ALIGNED(width, 16)) { I422ToRGB24Row = I422ToRGB24Row_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToRGB24Row(src_y, src_u, src_v, dst_rgb24, yuvconstants, width); dst_rgb24 += dst_stride_rgb24; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to RGB24. LIBYUV_API int I420ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, &kYuvI601Constants, width, height); } // Convert I420 to RAW. LIBYUV_API int I420ToRAW(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_raw, int dst_stride_raw, int width, int height) { return I420ToRGB24Matrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_raw, dst_stride_raw, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert H420 to RGB24. LIBYUV_API int H420ToRGB24(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { return I420ToRGB24Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgb24, dst_stride_rgb24, &kYuvH709Constants, width, height); } // Convert H420 to RAW. LIBYUV_API int H420ToRAW(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_raw, int dst_stride_raw, int width, int height) { return I420ToRGB24Matrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_raw, dst_stride_raw, &kYvuH709Constants, // Use Yvu matrix width, height); } // Convert I420 to ARGB1555. LIBYUV_API int I420ToARGB1555(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height) { int y; void (*I422ToARGB1555Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB1555Row_C; if (!src_y || !src_u || !src_v || !dst_argb1555 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb1555 = dst_argb1555 + (height - 1) * dst_stride_argb1555; dst_stride_argb1555 = -dst_stride_argb1555; } #if defined(HAS_I422TOARGB1555ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToARGB1555Row = I422ToARGB1555Row_SSSE3; } } #endif #if defined(HAS_I422TOARGB1555ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToARGB1555Row = I422ToARGB1555Row_AVX2; } } #endif #if defined(HAS_I422TOARGB1555ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToARGB1555Row = I422ToARGB1555Row_NEON; } } #endif #if defined(HAS_I422TOARGB1555ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB1555Row = I422ToARGB1555Row_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToARGB1555Row = I422ToARGB1555Row_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToARGB1555Row(src_y, src_u, src_v, dst_argb1555, &kYuvI601Constants, width); dst_argb1555 += dst_stride_argb1555; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to ARGB4444. LIBYUV_API int I420ToARGB4444(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height) { int y; void (*I422ToARGB4444Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGB4444Row_C; if (!src_y || !src_u || !src_v || !dst_argb4444 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb4444 = dst_argb4444 + (height - 1) * dst_stride_argb4444; dst_stride_argb4444 = -dst_stride_argb4444; } #if defined(HAS_I422TOARGB4444ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToARGB4444Row = I422ToARGB4444Row_SSSE3; } } #endif #if defined(HAS_I422TOARGB4444ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToARGB4444Row = I422ToARGB4444Row_AVX2; } } #endif #if defined(HAS_I422TOARGB4444ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToARGB4444Row = I422ToARGB4444Row_NEON; } } #endif #if defined(HAS_I422TOARGB4444ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGB4444Row = I422ToARGB4444Row_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToARGB4444Row = I422ToARGB4444Row_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToARGB4444Row(src_y, src_u, src_v, dst_argb4444, &kYuvI601Constants, width); dst_argb4444 += dst_stride_argb4444; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to RGB565. LIBYUV_API int I420ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_I422TORGB565ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_SSSE3; } } #endif #if defined(HAS_I422TORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGB565Row = I422ToRGB565Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToRGB565Row = I422ToRGB565Row_AVX2; } } #endif #if defined(HAS_I422TORGB565ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB565Row = I422ToRGB565Row_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_NEON; } } #endif #if defined(HAS_I422TORGB565ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGB565Row = I422ToRGB565Row_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I422 to RGB565. LIBYUV_API int I422ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; void (*I422ToRGB565Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGB565Row_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_I422TORGB565ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGB565Row = I422ToRGB565Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_SSSE3; } } #endif #if defined(HAS_I422TORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGB565Row = I422ToRGB565Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToRGB565Row = I422ToRGB565Row_AVX2; } } #endif #if defined(HAS_I422TORGB565ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGB565Row = I422ToRGB565Row_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_NEON; } } #endif #if defined(HAS_I422TORGB565ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGB565Row = I422ToRGB565Row_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToRGB565Row = I422ToRGB565Row_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToRGB565Row(src_y, src_u, src_v, dst_rgb565, &kYuvI601Constants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; } return 0; } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert I420 to RGB565 with dithering. LIBYUV_API int I420ToRGB565Dither(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgb565, int dst_stride_rgb565, const uint8_t* dither4x4, int width, int height) { int y; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToARGBRow_C; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_y || !src_u || !src_v || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } if (!dither4x4) { dither4x4 = kDither565_4x4; } #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif #if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif #if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_NEON; } } #endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToARGBRow = I422ToARGBRow_MSA; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; } } #endif { // Allocate a row of argb. align_buffer_64(row_argb, width * 4); for (y = 0; y < height; ++y) { I422ToARGBRow(src_y, src_u, src_v, row_argb, &kYuvI601Constants, width); ARGBToRGB565DitherRow(row_argb, dst_rgb565, *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } free_aligned_buffer_64(row_argb); } return 0; } // Convert I420 to AR30 with matrix static int I420ToAR30Matrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToAR30Row)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToAR30Row_C; if (!src_y || !src_u || !src_v || !dst_ar30 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_ar30 = dst_ar30 + (height - 1) * dst_stride_ar30; dst_stride_ar30 = -dst_stride_ar30; } #if defined(HAS_I422TOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToAR30Row = I422ToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToAR30Row = I422ToAR30Row_SSSE3; } } #endif #if defined(HAS_I422TOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToAR30Row = I422ToAR30Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToAR30Row = I422ToAR30Row_AVX2; } } #endif for (y = 0; y < height; ++y) { I422ToAR30Row(src_y, src_u, src_v, dst_ar30, yuvconstants, width); dst_ar30 += dst_stride_ar30; src_y += src_stride_y; if (y & 1) { src_u += src_stride_u; src_v += src_stride_v; } } return 0; } // Convert I420 to AR30. LIBYUV_API int I420ToAR30(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, &kYuvI601Constants, width, height); } // Convert H420 to AR30. LIBYUV_API int H420ToAR30(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { return I420ToAR30Matrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_ar30, dst_stride_ar30, &kYvuH709Constants, width, height); } // Convert I420 to specified format LIBYUV_API int ConvertFromI420(const uint8_t* y, int y_stride, const uint8_t* u, int u_stride, const uint8_t* v, int v_stride, uint8_t* dst_sample, int dst_sample_stride, int width, int height, uint32_t fourcc) { uint32_t format = CanonicalFourCC(fourcc); int r = 0; if (!y || !u || !v || !dst_sample || width <= 0 || height == 0) { return -1; } switch (format) { // Single plane formats case FOURCC_YUY2: r = I420ToYUY2(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_UYVY: r = I420ToUYVY(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_RGBP: r = I420ToRGB565(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_RGBO: r = I420ToARGB1555(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_R444: r = I420ToARGB4444(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 2, width, height); break; case FOURCC_24BG: r = I420ToRGB24(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 3, width, height); break; case FOURCC_RAW: r = I420ToRAW(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 3, width, height); break; case FOURCC_ARGB: r = I420ToARGB(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; case FOURCC_BGRA: r = I420ToBGRA(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; case FOURCC_ABGR: r = I420ToABGR(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; case FOURCC_RGBA: r = I420ToRGBA(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; case FOURCC_AR30: r = I420ToAR30(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width * 4, width, height); break; case FOURCC_I400: r = I400Copy(y, y_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, width, height); break; case FOURCC_NV12: { uint8_t* dst_uv = dst_sample + width * height; r = I420ToNV12(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_uv, dst_sample_stride ? dst_sample_stride : width, width, height); break; } case FOURCC_NV21: { uint8_t* dst_vu = dst_sample + width * height; r = I420ToNV21(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride ? dst_sample_stride : width, dst_vu, dst_sample_stride ? dst_sample_stride : width, width, height); break; } // TODO(fbarchard): Add M420. // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; int halfstride = (dst_sample_stride + 1) / 2; int halfheight = (height + 1) / 2; uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV12) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + halfstride * halfheight; } else { dst_u = dst_sample + dst_sample_stride * height; dst_v = dst_u + halfstride * halfheight; } r = I420Copy(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, width, height); break; } case FOURCC_I422: case FOURCC_YV16: { dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; int halfstride = (dst_sample_stride + 1) / 2; uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV16) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + halfstride * height; } else { dst_u = dst_sample + dst_sample_stride * height; dst_v = dst_u + halfstride * height; } r = I420ToI422(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, halfstride, dst_v, halfstride, width, height); break; } case FOURCC_I444: case FOURCC_YV24: { dst_sample_stride = dst_sample_stride ? dst_sample_stride : width; uint8_t* dst_u; uint8_t* dst_v; if (format == FOURCC_YV24) { dst_v = dst_sample + dst_sample_stride * height; dst_u = dst_v + dst_sample_stride * height; } else { dst_u = dst_sample + dst_sample_stride * height; dst_v = dst_u + dst_sample_stride * height; } r = I420ToI444(y, y_stride, u, u_stride, v, v_stride, dst_sample, dst_sample_stride, dst_u, dst_sample_stride, dst_v, dst_sample_stride, width, height); break; } // Formats not supported - MJPG, biplanar, some rgb formats. default: return -1; // unknown fourcc - return failure code. } return r; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_from_argb.cc000066400000000000000000001305711357355204000242110ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert_from_argb.h" #include "libyuv/basic_types.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // ARGB little endian (bgra in memory) to I444 LIBYUV_API int ARGBToI444(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; void (*ARGBToUV444Row)(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUV444Row_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_y == width && dst_stride_u == width && dst_stride_v == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_ARGBTOUV444ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUV444Row = ARGBToUV444Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUV444Row = ARGBToUV444Row_SSSE3; } } #endif #if defined(HAS_ARGBTOUV444ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUV444Row = ARGBToUV444Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToUV444Row = ARGBToUV444Row_NEON; } } #endif #if defined(HAS_ARGBTOUV444ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUV444Row = ARGBToUV444Row_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToUV444Row = ARGBToUV444Row_MSA; } } #endif #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToUV444Row(src_argb, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); src_argb += src_stride_argb; dst_y += dst_stride_y; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } // ARGB little endian (bgra in memory) to I422 LIBYUV_API int ARGBToI422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_y == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, dst_u, dst_v, width); ARGBToYRow(src_argb, dst_y, width); src_argb += src_stride_argb; dst_y += dst_stride_y; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } LIBYUV_API int ARGBToNV12(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; if (!src_argb || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_SSE2; } } #endif #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_NEON; } } #endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_MSA; } } #endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); MergeUVRow_(row_u, row_v, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_uv += dst_stride_uv; } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); MergeUVRow_(row_u, row_v, dst_uv, halfwidth); ARGBToYRow(src_argb, dst_y, width); } free_aligned_buffer_64(row_u); } return 0; } // Same as NV12 but U and V swapped. LIBYUV_API int ARGBToNV21(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_vu, int dst_stride_vu, int width, int height) { int y; int halfwidth = (width + 1) >> 1; void (*ARGBToUVRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; void (*MergeUVRow_)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_vu, int width) = MergeUVRow_C; if (!src_argb || !dst_y || !dst_vu || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow_ = MergeUVRow_Any_SSE2; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_SSE2; } } #endif #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow_ = MergeUVRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { MergeUVRow_ = MergeUVRow_AVX2; } } #endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow_ = MergeUVRow_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_NEON; } } #endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow_ = MergeUVRow_Any_MSA; if (IS_ALIGNED(halfwidth, 16)) { MergeUVRow_ = MergeUVRow_MSA; } } #endif { // Allocate a rows of uv. align_buffer_64(row_u, ((halfwidth + 31) & ~31) * 2); uint8_t* row_v = row_u + ((halfwidth + 31) & ~31); for (y = 0; y < height - 1; y += 2) { ARGBToUVRow(src_argb, src_stride_argb, row_u, row_v, width); MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); ARGBToYRow(src_argb + src_stride_argb, dst_y + dst_stride_y, width); src_argb += src_stride_argb * 2; dst_y += dst_stride_y * 2; dst_vu += dst_stride_vu; } if (height & 1) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); MergeUVRow_(row_v, row_u, dst_vu, halfwidth); ARGBToYRow(src_argb, dst_y, width); } free_aligned_buffer_64(row_u); } return 0; } // Convert ARGB to YUY2. LIBYUV_API int ARGBToYUY2(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yuy2, int dst_stride_yuy2, int width, int height) { int y; void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; void (*I422ToYUY2Row)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) = I422ToYUY2Row_C; if (!src_argb || !dst_yuy2 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_yuy2 = dst_yuy2 + (height - 1) * dst_stride_yuy2; dst_stride_yuy2 = -dst_stride_yuy2; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yuy2 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_yuy2 = 0; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif #if defined(HAS_I422TOYUY2ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToYUY2Row = I422ToYUY2Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_SSE2; } } #endif #if defined(HAS_I422TOYUY2ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToYUY2Row = I422ToYUY2Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToYUY2Row = I422ToYUY2Row_AVX2; } } #endif #if defined(HAS_I422TOYUY2ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToYUY2Row = I422ToYUY2Row_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToYUY2Row = I422ToYUY2Row_NEON; } } #endif #if defined(HAS_I422TOYUY2ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToYUY2Row = I422ToYUY2Row_Any_MSA; if (IS_ALIGNED(width, 32)) { I422ToYUY2Row = I422ToYUY2Row_MSA; } } #endif { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToYRow(src_argb, row_y, width); I422ToYUY2Row(row_y, row_u, row_v, dst_yuy2, width); src_argb += src_stride_argb; dst_yuy2 += dst_stride_yuy2; } free_aligned_buffer_64(row_y); } return 0; } // Convert ARGB to UYVY. LIBYUV_API int ARGBToUYVY(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_uyvy, int dst_stride_uyvy, int width, int height) { int y; void (*ARGBToUVRow)(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVRow_C; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; void (*I422ToUYVYRow)(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) = I422ToUYVYRow_C; if (!src_argb || !dst_uyvy || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_uyvy = dst_uyvy + (height - 1) * dst_stride_uyvy; dst_stride_uyvy = -dst_stride_uyvy; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_uyvy == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_uyvy = 0; } #if defined(HAS_ARGBTOYROW_SSSE3) && defined(HAS_ARGBTOUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVRow = ARGBToUVRow_Any_SSSE3; ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_SSSE3; ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) && defined(HAS_ARGBTOUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToUVRow = ARGBToUVRow_Any_AVX2; ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_AVX2; ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVRow = ARGBToUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVRow = ARGBToUVRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif #if defined(HAS_ARGBTOUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVRow = ARGBToUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVRow = ARGBToUVRow_MSA; } } #endif #if defined(HAS_I422TOUYVYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { I422ToUYVYRow = I422ToUYVYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_SSE2; } } #endif #if defined(HAS_I422TOUYVYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToUYVYRow = I422ToUYVYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_AVX2; } } #endif #if defined(HAS_I422TOUYVYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToUYVYRow = I422ToUYVYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { I422ToUYVYRow = I422ToUYVYRow_NEON; } } #endif #if defined(HAS_I422TOUYVYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToUYVYRow = I422ToUYVYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { I422ToUYVYRow = I422ToUYVYRow_MSA; } } #endif { // Allocate a rows of yuv. align_buffer_64(row_y, ((width + 63) & ~63) * 2); uint8_t* row_u = row_y + ((width + 63) & ~63); uint8_t* row_v = row_u + ((width + 63) & ~63) / 2; for (y = 0; y < height; ++y) { ARGBToUVRow(src_argb, 0, row_u, row_v, width); ARGBToYRow(src_argb, row_y, width); I422ToUYVYRow(row_y, row_u, row_v, dst_uyvy, width); src_argb += src_stride_argb; dst_uyvy += dst_stride_uyvy; } free_aligned_buffer_64(row_y); } return 0; } // Convert ARGB to I400. LIBYUV_API int ARGBToI400(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*ARGBToYRow)(const uint8_t* src_argb, uint8_t* dst_y, int width) = ARGBToYRow_C; if (!src_argb || !dst_y || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_y == width) { width *= height; height = 1; src_stride_argb = dst_stride_y = 0; } #if defined(HAS_ARGBTOYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYRow = ARGBToYRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYRow = ARGBToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYRow = ARGBToYRow_AVX2; } } #endif #if defined(HAS_ARGBTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYRow = ARGBToYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYRow = ARGBToYRow_NEON; } } #endif #if defined(HAS_ARGBTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYRow = ARGBToYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYRow = ARGBToYRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToYRow(src_argb, dst_y, width); src_argb += src_stride_argb; dst_y += dst_stride_y; } return 0; } // Shuffle table for converting ARGB to RGBA. static const uvec8 kShuffleMaskARGBToRGBA = { 3u, 0u, 1u, 2u, 7u, 4u, 5u, 6u, 11u, 8u, 9u, 10u, 15u, 12u, 13u, 14u}; // Convert ARGB to RGBA. LIBYUV_API int ARGBToRGBA(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { return ARGBShuffle(src_argb, src_stride_argb, dst_rgba, dst_stride_rgba, (const uint8_t*)(&kShuffleMaskARGBToRGBA), width, height); } // Convert ARGB To RGB24. LIBYUV_API int ARGBToRGB24(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { int y; void (*ARGBToRGB24Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB24Row_C; if (!src_argb || !dst_rgb24 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_rgb24 = 0; } #if defined(HAS_ARGBTORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToRGB24Row = ARGBToRGB24Row_SSSE3; } } #endif #if defined(HAS_ARGBTORGB24ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToRGB24Row = ARGBToRGB24Row_AVX2; } } #endif #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) if (TestCpuFlag(kCpuHasAVX512VBMI)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_AVX512VBMI; if (IS_ALIGNED(width, 32)) { ARGBToRGB24Row = ARGBToRGB24Row_AVX512VBMI; } } #endif #if defined(HAS_ARGBTORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToRGB24Row = ARGBToRGB24Row_NEON; } } #endif #if defined(HAS_ARGBTORGB24ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToRGB24Row = ARGBToRGB24Row_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToRGB24Row = ARGBToRGB24Row_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToRGB24Row(src_argb, dst_rgb24, width); src_argb += src_stride_argb; dst_rgb24 += dst_stride_rgb24; } return 0; } // Convert ARGB To RAW. LIBYUV_API int ARGBToRAW(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_raw, int dst_stride_raw, int width, int height) { int y; void (*ARGBToRAWRow)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRAWRow_C; if (!src_argb || !dst_raw || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_raw == width * 3) { width *= height; height = 1; src_stride_argb = dst_stride_raw = 0; } #if defined(HAS_ARGBTORAWROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToRAWRow = ARGBToRAWRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToRAWRow = ARGBToRAWRow_SSSE3; } } #endif #if defined(HAS_ARGBTORAWROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRAWRow = ARGBToRAWRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToRAWRow = ARGBToRAWRow_AVX2; } } #endif #if defined(HAS_ARGBTORAWROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRAWRow = ARGBToRAWRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToRAWRow = ARGBToRAWRow_NEON; } } #endif #if defined(HAS_ARGBTORAWROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToRAWRow = ARGBToRAWRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToRAWRow = ARGBToRAWRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToRAWRow(src_argb, dst_raw, width); src_argb += src_stride_argb; dst_raw += dst_stride_raw; } return 0; } // Ordered 8x8 dither for 888 to 565. Values from 0 to 7. static const uint8_t kDither565_4x4[16] = { 0, 4, 1, 5, 6, 2, 7, 3, 1, 5, 0, 4, 7, 3, 6, 2, }; // Convert ARGB To RGB565 with 4x4 dither matrix (16 bytes). LIBYUV_API int ARGBToRGB565Dither(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb565, int dst_stride_rgb565, const uint8_t* dither4x4, int width, int height) { int y; void (*ARGBToRGB565DitherRow)(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) = ARGBToRGB565DitherRow_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } if (!dither4x4) { dither4x4 = kDither565_4x4; } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_SSE2; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_AVX2; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_NEON; } } #endif #if defined(HAS_ARGBTORGB565DITHERROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBToRGB565DitherRow = ARGBToRGB565DitherRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToRGB565DitherRow(src_argb, dst_rgb565, *(const uint32_t*)(dither4x4 + ((y & 3) << 2)), width); src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; } return 0; } // Convert ARGB To RGB565. // TODO(fbarchard): Consider using dither function low level with zeros. LIBYUV_API int ARGBToRGB565(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; void (*ARGBToRGB565Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToRGB565Row_C; if (!src_argb || !dst_rgb565 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_rgb565 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_rgb565 = 0; } #if defined(HAS_ARGBTORGB565ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToRGB565Row = ARGBToRGB565Row_SSE2; } } #endif #if defined(HAS_ARGBTORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToRGB565Row = ARGBToRGB565Row_AVX2; } } #endif #if defined(HAS_ARGBTORGB565ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToRGB565Row = ARGBToRGB565Row_NEON; } } #endif #if defined(HAS_ARGBTORGB565ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToRGB565Row = ARGBToRGB565Row_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBToRGB565Row = ARGBToRGB565Row_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToRGB565Row(src_argb, dst_rgb565, width); src_argb += src_stride_argb; dst_rgb565 += dst_stride_rgb565; } return 0; } // Convert ARGB To ARGB1555. LIBYUV_API int ARGBToARGB1555(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb1555, int dst_stride_argb1555, int width, int height) { int y; void (*ARGBToARGB1555Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToARGB1555Row_C; if (!src_argb || !dst_argb1555 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb1555 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb1555 = 0; } #if defined(HAS_ARGBTOARGB1555ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToARGB1555Row = ARGBToARGB1555Row_SSE2; } } #endif #if defined(HAS_ARGBTOARGB1555ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToARGB1555Row = ARGBToARGB1555Row_AVX2; } } #endif #if defined(HAS_ARGBTOARGB1555ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToARGB1555Row = ARGBToARGB1555Row_NEON; } } #endif #if defined(HAS_ARGBTOARGB1555ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToARGB1555Row = ARGBToARGB1555Row_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBToARGB1555Row = ARGBToARGB1555Row_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToARGB1555Row(src_argb, dst_argb1555, width); src_argb += src_stride_argb; dst_argb1555 += dst_stride_argb1555; } return 0; } // Convert ARGB To ARGB4444. LIBYUV_API int ARGBToARGB4444(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb4444, int dst_stride_argb4444, int width, int height) { int y; void (*ARGBToARGB4444Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToARGB4444Row_C; if (!src_argb || !dst_argb4444 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb4444 == width * 2) { width *= height; height = 1; src_stride_argb = dst_stride_argb4444 = 0; } #if defined(HAS_ARGBTOARGB4444ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBToARGB4444Row = ARGBToARGB4444Row_SSE2; } } #endif #if defined(HAS_ARGBTOARGB4444ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToARGB4444Row = ARGBToARGB4444Row_AVX2; } } #endif #if defined(HAS_ARGBTOARGB4444ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToARGB4444Row = ARGBToARGB4444Row_NEON; } } #endif #if defined(HAS_ARGBTOARGB4444ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToARGB4444Row = ARGBToARGB4444Row_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBToARGB4444Row = ARGBToARGB4444Row_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToARGB4444Row(src_argb, dst_argb4444, width); src_argb += src_stride_argb; dst_argb4444 += dst_stride_argb4444; } return 0; } // Convert ABGR To AR30. LIBYUV_API int ABGRToAR30(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { int y; void (*ABGRToAR30Row)(const uint8_t* src_abgr, uint8_t* dst_rgb, int width) = ABGRToAR30Row_C; if (!src_abgr || !dst_ar30 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_abgr = src_abgr + (height - 1) * src_stride_abgr; src_stride_abgr = -src_stride_abgr; } // Coalesce rows. if (src_stride_abgr == width * 4 && dst_stride_ar30 == width * 4) { width *= height; height = 1; src_stride_abgr = dst_stride_ar30 = 0; } #if defined(HAS_ABGRTOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ABGRToAR30Row = ABGRToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 4)) { ABGRToAR30Row = ABGRToAR30Row_SSSE3; } } #endif #if defined(HAS_ABGRTOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ABGRToAR30Row = ABGRToAR30Row_Any_AVX2; if (IS_ALIGNED(width, 8)) { ABGRToAR30Row = ABGRToAR30Row_AVX2; } } #endif for (y = 0; y < height; ++y) { ABGRToAR30Row(src_abgr, dst_ar30, width); src_abgr += src_stride_abgr; dst_ar30 += dst_stride_ar30; } return 0; } // Convert ARGB To AR30. LIBYUV_API int ARGBToAR30(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_ar30, int dst_stride_ar30, int width, int height) { int y; void (*ARGBToAR30Row)(const uint8_t* src_argb, uint8_t* dst_rgb, int width) = ARGBToAR30Row_C; if (!src_argb || !dst_ar30 || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_ar30 == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_ar30 = 0; } #if defined(HAS_ARGBTOAR30ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToAR30Row = ARGBToAR30Row_Any_SSSE3; if (IS_ALIGNED(width, 4)) { ARGBToAR30Row = ARGBToAR30Row_SSSE3; } } #endif #if defined(HAS_ARGBTOAR30ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToAR30Row = ARGBToAR30Row_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBToAR30Row = ARGBToAR30Row_AVX2; } } #endif for (y = 0; y < height; ++y) { ARGBToAR30Row(src_argb, dst_ar30, width); src_argb += src_stride_argb; dst_ar30 += dst_stride_ar30; } return 0; } // Convert ARGB to J420. (JPeg full range I420). LIBYUV_API int ARGBToJ420(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYJRow = ARGBToYJRow_NEON; } } #endif #if defined(HAS_ARGBTOUVJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVJRow = ARGBToUVJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVJRow = ARGBToUVJRow_NEON; } } #endif #if defined(HAS_ARGBTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYJRow = ARGBToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_MSA; } } #endif #if defined(HAS_ARGBTOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVJRow = ARGBToUVJRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVJRow = ARGBToUVJRow_MSA; } } #endif for (y = 0; y < height - 1; y += 2) { ARGBToUVJRow(src_argb, src_stride_argb, dst_u, dst_v, width); ARGBToYJRow(src_argb, dst_yj, width); ARGBToYJRow(src_argb + src_stride_argb, dst_yj + dst_stride_yj, width); src_argb += src_stride_argb * 2; dst_yj += dst_stride_yj * 2; dst_u += dst_stride_u; dst_v += dst_stride_v; } if (height & 1) { ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); ARGBToYJRow(src_argb, dst_yj, width); } return 0; } // Convert ARGB to J422. (JPeg full range I422). LIBYUV_API int ARGBToJ422(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*ARGBToUVJRow)(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) = ARGBToUVJRow_C; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yj == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width) { width *= height; height = 1; src_stride_argb = dst_stride_yj = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_ARGBTOYJROW_SSSE3) && defined(HAS_ARGBTOUVJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToUVJRow = ARGBToUVJRow_Any_SSSE3; ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToUVJRow = ARGBToUVJRow_SSSE3; ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYJRow = ARGBToYJRow_NEON; } } #endif #if defined(HAS_ARGBTOUVJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToUVJRow = ARGBToUVJRow_Any_NEON; if (IS_ALIGNED(width, 16)) { ARGBToUVJRow = ARGBToUVJRow_NEON; } } #endif #if defined(HAS_ARGBTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYJRow = ARGBToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_MSA; } } #endif #if defined(HAS_ARGBTOUVJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToUVJRow = ARGBToUVJRow_Any_MSA; if (IS_ALIGNED(width, 32)) { ARGBToUVJRow = ARGBToUVJRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToUVJRow(src_argb, 0, dst_u, dst_v, width); ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; dst_yj += dst_stride_yj; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } // Convert ARGB to J400. LIBYUV_API int ARGBToJ400(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_yj, int dst_stride_yj, int width, int height) { int y; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_yj, int width) = ARGBToYJRow_C; if (!src_argb || !dst_yj || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_yj == width) { width *= height; height = 1; src_stride_argb = dst_stride_yj = 0; } #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYJRow = ARGBToYJRow_NEON; } } #endif #if defined(HAS_ARGBTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYJRow = ARGBToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBToYJRow(src_argb, dst_yj, width); src_argb += src_stride_argb; dst_yj += dst_stride_yj; } return 0; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_jpeg.cc000066400000000000000000000305321357355204000231740ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert.h" #include "libyuv/convert_argb.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" #endif #ifdef __cplusplus namespace libyuv { extern "C" { #endif #ifdef HAVE_JPEG struct I420Buffers { uint8_t* y; int y_stride; uint8_t* u; int u_stride; uint8_t* v; int v_stride; int w; int h; }; static void JpegCopyI420(void* opaque, const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); I420Copy(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; dest->h -= rows; } static void JpegI422ToI420(void* opaque, const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); I422ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; dest->h -= rows; } static void JpegI444ToI420(void* opaque, const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); I444ToI420(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; dest->h -= rows; } static void JpegI400ToI420(void* opaque, const uint8_t* const* data, const int* strides, int rows) { I420Buffers* dest = (I420Buffers*)(opaque); I400ToI420(data[0], strides[0], dest->y, dest->y_stride, dest->u, dest->u_stride, dest->v, dest->v_stride, dest->w, rows); dest->y += rows * dest->y_stride; dest->u += ((rows + 1) >> 1) * dest->u_stride; dest->v += ((rows + 1) >> 1) * dest->v_stride; dest->h -= rows; } // Query size of MJPG in pixels. LIBYUV_API int MJPGSize(const uint8_t* sample, size_t sample_size, int* width, int* height) { MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); if (ret) { *width = mjpeg_decoder.GetWidth(); *height = mjpeg_decoder.GetHeight(); } mjpeg_decoder.UnloadFrame(); return ret ? 0 : -1; // -1 for runtime failure. } // MJPG (Motion JPeg) to I420 // TODO(fbarchard): review src_width and src_height requirement. dst_width and // dst_height may be enough. LIBYUV_API int MJPGToI420(const uint8_t* sample, size_t sample_size, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int src_width, int src_height, int dst_width, int dst_height) { if (sample_size == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; } // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); if (ret && (mjpeg_decoder.GetWidth() != src_width || mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions mjpeg_decoder.UnloadFrame(); return 1; // runtime failure } if (ret) { I420Buffers bufs = {dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, dst_height}; // YUV420 if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 2 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegCopyI420, &bufs, dst_width, dst_height); // YUV422 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToI420, &bufs, dst_width, dst_height); // YUV444 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToI420, &bufs, dst_width, dst_height); // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && mjpeg_decoder.GetNumComponents() == 1 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToI420, &bufs, dst_width, dst_height); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; } } return ret ? 0 : 1; } #ifdef HAVE_JPEG struct ARGBBuffers { uint8_t* argb; int argb_stride; int w; int h; }; static void JpegI420ToARGB(void* opaque, const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); I420ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI422ToARGB(void* opaque, const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); I422ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI444ToARGB(void* opaque, const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); I444ToARGB(data[0], strides[0], data[1], strides[1], data[2], strides[2], dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } static void JpegI400ToARGB(void* opaque, const uint8_t* const* data, const int* strides, int rows) { ARGBBuffers* dest = (ARGBBuffers*)(opaque); I400ToARGB(data[0], strides[0], dest->argb, dest->argb_stride, dest->w, rows); dest->argb += rows * dest->argb_stride; dest->h -= rows; } // MJPG (Motion JPeg) to ARGB // TODO(fbarchard): review src_width and src_height requirement. dst_width and // dst_height may be enough. LIBYUV_API int MJPGToARGB(const uint8_t* sample, size_t sample_size, uint8_t* dst_argb, int dst_stride_argb, int src_width, int src_height, int dst_width, int dst_height) { if (sample_size == kUnknownDataSize) { // ERROR: MJPEG frame size unknown return -1; } // TODO(fbarchard): Port MJpeg to C. MJpegDecoder mjpeg_decoder; LIBYUV_BOOL ret = mjpeg_decoder.LoadFrame(sample, sample_size); if (ret && (mjpeg_decoder.GetWidth() != src_width || mjpeg_decoder.GetHeight() != src_height)) { // ERROR: MJPEG frame has unexpected dimensions mjpeg_decoder.UnloadFrame(); return 1; // runtime failure } if (ret) { ARGBBuffers bufs = {dst_argb, dst_stride_argb, dst_width, dst_height}; // YUV420 if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 2 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI420ToARGB, &bufs, dst_width, dst_height); // YUV422 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 2 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI422ToARGB, &bufs, dst_width, dst_height); // YUV444 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceYCbCr && mjpeg_decoder.GetNumComponents() == 3 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1 && mjpeg_decoder.GetVertSampFactor(1) == 1 && mjpeg_decoder.GetHorizSampFactor(1) == 1 && mjpeg_decoder.GetVertSampFactor(2) == 1 && mjpeg_decoder.GetHorizSampFactor(2) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI444ToARGB, &bufs, dst_width, dst_height); // YUV400 } else if (mjpeg_decoder.GetColorSpace() == MJpegDecoder::kColorSpaceGrayscale && mjpeg_decoder.GetNumComponents() == 1 && mjpeg_decoder.GetVertSampFactor(0) == 1 && mjpeg_decoder.GetHorizSampFactor(0) == 1) { ret = mjpeg_decoder.DecodeToCallback(&JpegI400ToARGB, &bufs, dst_width, dst_height); } else { // TODO(fbarchard): Implement conversion for any other colorspace/sample // factors that occur in practice. // ERROR: Unable to convert MJPEG frame because format is not supported mjpeg_decoder.UnloadFrame(); return 1; } } return ret ? 0 : 1; } #endif #endif #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_to_argb.cc000066400000000000000000000253161357355204000236700ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/convert_argb.h" #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" #endif #include "libyuv/rotate_argb.h" #include "libyuv/row.h" #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Convert camera sample to ARGB with cropping, rotation and vertical flip. // src_width is used for source stride computation // src_height is used to compute location of planes, and indicate inversion // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. // TODO(fbarchard): Add the following: // H010ToARGB // H420ToARGB // H422ToARGB // I010ToARGB // J400ToARGB // J422ToARGB // J444ToARGB LIBYUV_API int ConvertToARGB(const uint8_t* sample, size_t sample_size, uint8_t* dst_argb, int dst_stride_argb, int crop_x, int crop_y, int src_width, int src_height, int crop_width, int crop_height, enum RotationMode rotation, uint32_t fourcc) { uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; const uint8_t* src; const uint8_t* src_uv; int abs_src_height = (src_height < 0) ? -src_height : src_height; int inv_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; // One pass rotation is available for some formats. For the rest, convert // to ARGB (with optional vertical flipping) into a temporary ARGB buffer, // and then rotate the ARGB to the final destination buffer. // For in-place conversion, if destination dst_argb is same as source sample, // also enable temporary buffer. LIBYUV_BOOL need_buf = (rotation && format != FOURCC_ARGB) || dst_argb == sample; uint8_t* dest_argb = dst_argb; int dest_dst_stride_argb = dst_stride_argb; uint8_t* rotate_buffer = NULL; int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; if (dst_argb == NULL || sample == NULL || src_width <= 0 || crop_width <= 0 || src_height == 0 || crop_height == 0) { return -1; } if (src_height < 0) { inv_crop_height = -inv_crop_height; } if (need_buf) { int argb_size = crop_width * 4 * abs_crop_height; rotate_buffer = (uint8_t*)malloc(argb_size); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } dst_argb = rotate_buffer; dst_stride_argb = crop_width * 4; } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = YUY2ToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = UYVYToARGB(src, aligned_src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; r = RGB24ToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; r = RAWToARGB(src, src_width * 3, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ARGB: if (!need_buf && !rotation) { src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); } break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; r = BGRAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; r = ABGRToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; r = RGBAToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_AR30: src = sample + (src_width * crop_y + crop_x) * 4; r = AR30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_AB30: src = sample + (src_width * crop_y + crop_x) * 4; r = AB30ToARGB(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; r = RGB565ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB1555ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB4444ToARGB(src, src_width * 2, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_I400: src = sample + src_width * crop_y + crop_x; r = I400ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; r = NV12ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); src_uv = sample + aligned_src_width * (abs_src_height + crop_y / 2) + crop_x; // Call NV12 but with u and v parameters swapped. r = NV21ToARGB(src, src_width, src_uv, aligned_src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; r = M420ToARGB(src, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { const uint8_t* src_y = sample + (src_width * crop_y + crop_x); const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { src_v = sample + src_width * abs_src_height + (halfwidth * crop_y + crop_x) / 2; src_u = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } r = I420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_J420: { const uint8_t* src_y = sample + (src_width * crop_y + crop_x); const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; src_u = sample + src_width * abs_src_height + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; r = J420ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I422: case FOURCC_YV16: { const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { src_v = sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; src_u = sample + src_width * abs_src_height + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; src_v = sample + src_width * abs_src_height + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } r = I422ToARGB(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } else { src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } r = I444ToARGB(src_y, src_width, src_u, src_width, src_v, src_width, dst_argb, dst_stride_argb, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: r = MJPGToARGB(sample, sample_size, dst_argb, dst_stride_argb, src_width, abs_src_height, crop_width, inv_crop_height); break; #endif default: r = -1; // unknown fourcc - return failure code. } if (need_buf) { if (!r) { r = ARGBRotate(dst_argb, dst_stride_argb, dest_argb, dest_dst_stride_argb, crop_width, abs_crop_height, rotation); } free(rotate_buffer); } else if (rotation) { src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBRotate(src, src_width * 4, dst_argb, dst_stride_argb, crop_width, inv_crop_height, rotation); } return r; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/convert_to_i420.cc000066400000000000000000000257701357355204000234370ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "libyuv/convert.h" #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Convert camera sample to I420 with cropping, rotation and vertical flip. // src_width is used for source stride computation // src_height is used to compute location of planes, and indicate inversion // sample_size is measured in bytes and is the size of the frame. // With MJPEG it is the compressed size of the frame. LIBYUV_API int ConvertToI420(const uint8_t* sample, size_t sample_size, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int crop_x, int crop_y, int src_width, int src_height, int crop_width, int crop_height, enum RotationMode rotation, uint32_t fourcc) { uint32_t format = CanonicalFourCC(fourcc); int aligned_src_width = (src_width + 1) & ~1; const uint8_t* src; const uint8_t* src_uv; const int abs_src_height = (src_height < 0) ? -src_height : src_height; // TODO(nisse): Why allow crop_height < 0? const int abs_crop_height = (crop_height < 0) ? -crop_height : crop_height; int r = 0; LIBYUV_BOOL need_buf = (rotation && format != FOURCC_I420 && format != FOURCC_NV12 && format != FOURCC_NV21 && format != FOURCC_YV12) || dst_y == sample; uint8_t* tmp_y = dst_y; uint8_t* tmp_u = dst_u; uint8_t* tmp_v = dst_v; int tmp_y_stride = dst_stride_y; int tmp_u_stride = dst_stride_u; int tmp_v_stride = dst_stride_v; uint8_t* rotate_buffer = NULL; const int inv_crop_height = (src_height < 0) ? -abs_crop_height : abs_crop_height; if (!dst_y || !dst_u || !dst_v || !sample || src_width <= 0 || crop_width <= 0 || src_height == 0 || crop_height == 0) { return -1; } // One pass rotation is available for some formats. For the rest, convert // to I420 (with optional vertical flipping) into a temporary I420 buffer, // and then rotate the I420 to the final destination buffer. // For in-place conversion, if destination dst_y is same as source sample, // also enable temporary buffer. if (need_buf) { int y_size = crop_width * abs_crop_height; int uv_size = ((crop_width + 1) / 2) * ((abs_crop_height + 1) / 2); rotate_buffer = (uint8_t*)malloc(y_size + uv_size * 2); /* NOLINT */ if (!rotate_buffer) { return 1; // Out of memory runtime error. } dst_y = rotate_buffer; dst_u = dst_y + y_size; dst_v = dst_u + uv_size; dst_stride_y = crop_width; dst_stride_u = dst_stride_v = ((crop_width + 1) / 2); } switch (format) { // Single plane formats case FOURCC_YUY2: src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = YUY2ToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_UYVY: src = sample + (aligned_src_width * crop_y + crop_x) * 2; r = UYVYToI420(src, aligned_src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RGBP: src = sample + (src_width * crop_y + crop_x) * 2; r = RGB565ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RGBO: src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB1555ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_R444: src = sample + (src_width * crop_y + crop_x) * 2; r = ARGB4444ToI420(src, src_width * 2, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_24BG: src = sample + (src_width * crop_y + crop_x) * 3; r = RGB24ToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RAW: src = sample + (src_width * crop_y + crop_x) * 3; r = RAWToI420(src, src_width * 3, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_ARGB: src = sample + (src_width * crop_y + crop_x) * 4; r = ARGBToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_BGRA: src = sample + (src_width * crop_y + crop_x) * 4; r = BGRAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_ABGR: src = sample + (src_width * crop_y + crop_x) * 4; r = ABGRToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; case FOURCC_RGBA: src = sample + (src_width * crop_y + crop_x) * 4; r = RGBAToI420(src, src_width * 4, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; // TODO(fbarchard): Add AR30 and AB30 case FOURCC_I400: src = sample + src_width * crop_y + crop_x; r = I400ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Biplanar formats case FOURCC_NV12: src = sample + (src_width * crop_y + crop_x); src_uv = sample + (src_width * abs_src_height) + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height, rotation); break; case FOURCC_NV21: src = sample + (src_width * crop_y + crop_x); src_uv = sample + (src_width * abs_src_height) + ((crop_y / 2) * aligned_src_width) + ((crop_x / 2) * 2); // Call NV12 but with dst_u and dst_v parameters swapped. r = NV12ToI420Rotate(src, src_width, src_uv, aligned_src_width, dst_y, dst_stride_y, dst_v, dst_stride_v, dst_u, dst_stride_u, crop_width, inv_crop_height, rotation); break; case FOURCC_M420: src = sample + (src_width * crop_y) * 12 / 8 + crop_x; r = M420ToI420(src, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; // Triplanar formats case FOURCC_I420: case FOURCC_YV12: { const uint8_t* src_y = sample + (src_width * crop_y + crop_x); const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; int halfheight = (abs_src_height + 1) / 2; if (format == FOURCC_YV12) { src_v = sample + src_width * abs_src_height + (halfwidth * crop_y + crop_x) / 2; src_u = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + (halfwidth * crop_y + crop_x) / 2; src_v = sample + src_width * abs_src_height + halfwidth * (halfheight + crop_y / 2) + crop_x / 2; } r = I420Rotate(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height, rotation); break; } case FOURCC_I422: case FOURCC_YV16: { const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; int halfwidth = (src_width + 1) / 2; if (format == FOURCC_YV16) { src_v = sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; src_u = sample + src_width * abs_src_height + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } else { src_u = sample + src_width * abs_src_height + halfwidth * crop_y + crop_x / 2; src_v = sample + src_width * abs_src_height + halfwidth * (abs_src_height + crop_y) + crop_x / 2; } r = I422ToI420(src_y, src_width, src_u, halfwidth, src_v, halfwidth, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; } case FOURCC_I444: case FOURCC_YV24: { const uint8_t* src_y = sample + src_width * crop_y + crop_x; const uint8_t* src_u; const uint8_t* src_v; if (format == FOURCC_YV24) { src_v = sample + src_width * (abs_src_height + crop_y) + crop_x; src_u = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } else { src_u = sample + src_width * (abs_src_height + crop_y) + crop_x; src_v = sample + src_width * (abs_src_height * 2 + crop_y) + crop_x; } r = I444ToI420(src_y, src_width, src_u, src_width, src_v, src_width, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, crop_width, inv_crop_height); break; } #ifdef HAVE_JPEG case FOURCC_MJPG: r = MJPGToI420(sample, sample_size, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, src_width, abs_src_height, crop_width, inv_crop_height); break; #endif default: r = -1; // unknown fourcc - return failure code. } if (need_buf) { if (!r) { r = I420Rotate(dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, tmp_y, tmp_y_stride, tmp_u, tmp_u_stride, tmp_v, tmp_v_stride, crop_width, abs_crop_height, rotation); } free(rotate_buffer); } return r; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/cpu_id.cc000066400000000000000000000214731357355204000217560ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/cpu_id.h" #if defined(_MSC_VER) #include // For __cpuidex() #endif #if !defined(__pnacl__) && !defined(__CLR_VER) && \ !defined(__native_client__) && (defined(_M_IX86) || defined(_M_X64)) && \ defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) #include // For _xgetbv() #endif // For ArmCpuCaps() but unittested on all platforms #include #include #ifdef __cplusplus namespace libyuv { extern "C" { #endif // For functions that use the stack and have runtime checks for overflow, // use SAFEBUFFERS to avoid additional check. #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) && \ !defined(__clang__) #define SAFEBUFFERS __declspec(safebuffers) #else #define SAFEBUFFERS #endif // cpu_info_ variable for SIMD instruction sets detected. LIBYUV_API int cpu_info_ = 0; // TODO(fbarchard): Consider using int for cpuid so casting is not needed. // Low level cpuid for X86. #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) LIBYUV_API void CpuId(int info_eax, int info_ecx, int* cpu_info) { #if defined(_MSC_VER) // Visual C version uses intrinsic or inline x86 assembly. #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) __cpuidex(cpu_info, info_eax, info_ecx); #elif defined(_M_IX86) __asm { mov eax, info_eax mov ecx, info_ecx mov edi, cpu_info cpuid mov [edi], eax mov [edi + 4], ebx mov [edi + 8], ecx mov [edi + 12], edx } #else // Visual C but not x86 if (info_ecx == 0) { __cpuid(cpu_info, info_eax); } else { cpu_info[3] = cpu_info[2] = cpu_info[1] = cpu_info[0] = 0u; } #endif // GCC version uses inline x86 assembly. #else // defined(_MSC_VER) int info_ebx, info_edx; asm volatile( #if defined(__i386__) && defined(__PIC__) // Preserve ebx for fpic 32 bit. "mov %%ebx, %%edi \n" "cpuid \n" "xchg %%edi, %%ebx \n" : "=D"(info_ebx), #else "cpuid \n" : "=b"(info_ebx), #endif // defined( __i386__) && defined(__PIC__) "+a"(info_eax), "+c"(info_ecx), "=d"(info_edx)); cpu_info[0] = info_eax; cpu_info[1] = info_ebx; cpu_info[2] = info_ecx; cpu_info[3] = info_edx; #endif // defined(_MSC_VER) } #else // (defined(_M_IX86) || defined(_M_X64) ... LIBYUV_API void CpuId(int eax, int ecx, int* cpu_info) { (void)eax; (void)ecx; cpu_info[0] = cpu_info[1] = cpu_info[2] = cpu_info[3] = 0; } #endif // For VS2010 and earlier emit can be used: // _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0 // For VS2010 and earlier. // __asm { // xor ecx, ecx // xcr 0 // xgetbv // mov xcr0, eax // } // For VS2013 and earlier 32 bit, the _xgetbv(0) optimizer produces bad code. // https://code.google.com/p/libyuv/issues/detail?id=529 #if defined(_M_IX86) && (_MSC_VER < 1900) #pragma optimize("g", off) #endif #if (defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || \ defined(__x86_64__)) && \ !defined(__pnacl__) && !defined(__CLR_VER) && !defined(__native_client__) // X86 CPUs have xgetbv to detect OS saves high parts of ymm registers. int GetXCR0() { int xcr0 = 0; #if defined(_MSC_FULL_VER) && (_MSC_FULL_VER >= 160040219) xcr0 = (int)_xgetbv(0); // VS2010 SP1 required. NOLINT #elif defined(__i386__) || defined(__x86_64__) asm(".byte 0x0f, 0x01, 0xd0" : "=a"(xcr0) : "c"(0) : "%edx"); #endif // defined(__i386__) || defined(__x86_64__) return xcr0; } #else // xgetbv unavailable to query for OSSave support. Return 0. #define GetXCR0() 0 #endif // defined(_M_IX86) || defined(_M_X64) .. // Return optimization to previous setting. #if defined(_M_IX86) && (_MSC_VER < 1900) #pragma optimize("g", on) #endif // based on libvpx arm_cpudetect.c // For Arm, but public to allow testing on any CPU LIBYUV_API SAFEBUFFERS int ArmCpuCaps(const char* cpuinfo_name) { char cpuinfo_line[512]; FILE* f = fopen(cpuinfo_name, "r"); if (!f) { // Assume Neon if /proc/cpuinfo is unavailable. // This will occur for Chrome sandbox for Pepper or Render process. return kCpuHasNEON; } while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { if (memcmp(cpuinfo_line, "Features", 8) == 0) { char* p = strstr(cpuinfo_line, " neon"); if (p && (p[5] == ' ' || p[5] == '\n')) { fclose(f); return kCpuHasNEON; } // aarch64 uses asimd for Neon. p = strstr(cpuinfo_line, " asimd"); if (p) { fclose(f); return kCpuHasNEON; } } } fclose(f); return 0; } // TODO(fbarchard): Consider read_msa_ir(). // TODO(fbarchard): Add unittest. LIBYUV_API SAFEBUFFERS int MipsCpuCaps(const char* cpuinfo_name, const char ase[]) { char cpuinfo_line[512]; FILE* f = fopen(cpuinfo_name, "r"); if (!f) { // ase enabled if /proc/cpuinfo is unavailable. if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } return 0; } while (fgets(cpuinfo_line, sizeof(cpuinfo_line) - 1, f)) { if (memcmp(cpuinfo_line, "ASEs implemented", 16) == 0) { char* p = strstr(cpuinfo_line, ase); if (p) { fclose(f); if (strcmp(ase, " msa") == 0) { return kCpuHasMSA; } return 0; } } } fclose(f); return 0; } static SAFEBUFFERS int GetCpuFlags(void) { int cpu_info = 0; #if !defined(__pnacl__) && !defined(__CLR_VER) && \ (defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ defined(_M_IX86)) int cpu_info0[4] = {0, 0, 0, 0}; int cpu_info1[4] = {0, 0, 0, 0}; int cpu_info7[4] = {0, 0, 0, 0}; CpuId(0, 0, cpu_info0); CpuId(1, 0, cpu_info1); if (cpu_info0[0] >= 7) { CpuId(7, 0, cpu_info7); } cpu_info = kCpuHasX86 | ((cpu_info1[3] & 0x04000000) ? kCpuHasSSE2 : 0) | ((cpu_info1[2] & 0x00000200) ? kCpuHasSSSE3 : 0) | ((cpu_info1[2] & 0x00080000) ? kCpuHasSSE41 : 0) | ((cpu_info1[2] & 0x00100000) ? kCpuHasSSE42 : 0) | ((cpu_info7[1] & 0x00000200) ? kCpuHasERMS : 0); // AVX requires OS saves YMM registers. if (((cpu_info1[2] & 0x1c000000) == 0x1c000000) && // AVX and OSXSave ((GetXCR0() & 6) == 6)) { // Test OS saves YMM registers cpu_info |= kCpuHasAVX | ((cpu_info7[1] & 0x00000020) ? kCpuHasAVX2 : 0) | ((cpu_info1[2] & 0x00001000) ? kCpuHasFMA3 : 0) | ((cpu_info1[2] & 0x20000000) ? kCpuHasF16C : 0); // Detect AVX512bw if ((GetXCR0() & 0xe0) == 0xe0) { cpu_info |= (cpu_info7[1] & 0x40000000) ? kCpuHasAVX512BW : 0; cpu_info |= (cpu_info7[1] & 0x80000000) ? kCpuHasAVX512VL : 0; cpu_info |= (cpu_info7[2] & 0x00000002) ? kCpuHasAVX512VBMI : 0; cpu_info |= (cpu_info7[2] & 0x00000040) ? kCpuHasAVX512VBMI2 : 0; cpu_info |= (cpu_info7[2] & 0x00001000) ? kCpuHasAVX512VBITALG : 0; cpu_info |= (cpu_info7[2] & 0x00004000) ? kCpuHasAVX512VPOPCNTDQ : 0; cpu_info |= (cpu_info7[2] & 0x00000100) ? kCpuHasGFNI : 0; } } #endif #if defined(__mips__) && defined(__linux__) #if defined(__mips_msa) cpu_info = MipsCpuCaps("/proc/cpuinfo", " msa"); #endif cpu_info |= kCpuHasMIPS; #endif #if defined(__arm__) || defined(__aarch64__) // gcc -mfpu=neon defines __ARM_NEON__ // __ARM_NEON__ generates code that requires Neon. NaCL also requires Neon. // For Linux, /proc/cpuinfo can be tested but without that assume Neon. #if defined(__ARM_NEON__) || defined(__native_client__) || !defined(__linux__) cpu_info = kCpuHasNEON; // For aarch64(arm64), /proc/cpuinfo's feature is not complete, e.g. no neon // flag in it. // So for aarch64, neon enabling is hard coded here. #endif #if defined(__aarch64__) cpu_info = kCpuHasNEON; #else // Linux arm parse text file for neon detect. cpu_info = ArmCpuCaps("/proc/cpuinfo"); #endif cpu_info |= kCpuHasARM; #endif // __arm__ cpu_info |= kCpuInitialized; return cpu_info; } // Note that use of this function is not thread safe. LIBYUV_API int MaskCpuFlags(int enable_flags) { int cpu_info = GetCpuFlags() & enable_flags; SetCpuFlags(cpu_info); return cpu_info; } LIBYUV_API int InitCpuFlags(void) { return MaskCpuFlags(-1); } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/mjpeg_decoder.cc000066400000000000000000000447151357355204000233060ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/mjpeg_decoder.h" #ifdef HAVE_JPEG #include #if !defined(__pnacl__) && !defined(__CLR_VER) && \ !defined(COVERAGE_ENABLED) && !defined(TARGET_IPHONE_SIMULATOR) // Must be included before jpeglib. #include #define HAVE_SETJMP #if defined(_MSC_VER) // disable warning 4324: structure was padded due to __declspec(align()) #pragma warning(disable : 4324) #endif #endif struct FILE; // For jpeglib.h. // C++ build requires extern C for jpeg internals. #ifdef __cplusplus extern "C" { #endif #include #ifdef __cplusplus } // extern "C" #endif #include "libyuv/planar_functions.h" // For CopyPlane(). namespace libyuv { #ifdef HAVE_SETJMP struct SetJmpErrorMgr { jpeg_error_mgr base; // Must be at the top jmp_buf setjmp_buffer; }; #endif const int MJpegDecoder::kColorSpaceUnknown = JCS_UNKNOWN; const int MJpegDecoder::kColorSpaceGrayscale = JCS_GRAYSCALE; const int MJpegDecoder::kColorSpaceRgb = JCS_RGB; const int MJpegDecoder::kColorSpaceYCbCr = JCS_YCbCr; const int MJpegDecoder::kColorSpaceCMYK = JCS_CMYK; const int MJpegDecoder::kColorSpaceYCCK = JCS_YCCK; // Methods that are passed to jpeglib. boolean fill_input_buffer(jpeg_decompress_struct* cinfo); void init_source(jpeg_decompress_struct* cinfo); void skip_input_data(jpeg_decompress_struct* cinfo, long num_bytes); // NOLINT void term_source(jpeg_decompress_struct* cinfo); void ErrorHandler(jpeg_common_struct* cinfo); void OutputHandler(jpeg_common_struct* cinfo); MJpegDecoder::MJpegDecoder() : has_scanline_padding_(LIBYUV_FALSE), num_outbufs_(0), scanlines_(NULL), scanlines_sizes_(NULL), databuf_(NULL), databuf_strides_(NULL) { decompress_struct_ = new jpeg_decompress_struct; source_mgr_ = new jpeg_source_mgr; #ifdef HAVE_SETJMP error_mgr_ = new SetJmpErrorMgr; decompress_struct_->err = jpeg_std_error(&error_mgr_->base); // Override standard exit()-based error handler. error_mgr_->base.error_exit = &ErrorHandler; error_mgr_->base.output_message = &OutputHandler; #endif decompress_struct_->client_data = NULL; source_mgr_->init_source = &init_source; source_mgr_->fill_input_buffer = &fill_input_buffer; source_mgr_->skip_input_data = &skip_input_data; source_mgr_->resync_to_restart = &jpeg_resync_to_restart; source_mgr_->term_source = &term_source; jpeg_create_decompress(decompress_struct_); decompress_struct_->src = source_mgr_; buf_vec_.buffers = &buf_; buf_vec_.len = 1; } MJpegDecoder::~MJpegDecoder() { jpeg_destroy_decompress(decompress_struct_); delete decompress_struct_; delete source_mgr_; #ifdef HAVE_SETJMP delete error_mgr_; #endif DestroyOutputBuffers(); } LIBYUV_BOOL MJpegDecoder::LoadFrame(const uint8_t* src, size_t src_len) { if (!ValidateJpeg(src, src_len)) { return LIBYUV_FALSE; } buf_.data = src; buf_.len = static_cast(src_len); buf_vec_.pos = 0; decompress_struct_->client_data = &buf_vec_; #ifdef HAVE_SETJMP if (setjmp(error_mgr_->setjmp_buffer)) { // We called jpeg_read_header, it experienced an error, and we called // longjmp() and rewound the stack to here. Return error. return LIBYUV_FALSE; } #endif if (jpeg_read_header(decompress_struct_, TRUE) != JPEG_HEADER_OK) { // ERROR: Bad MJPEG header return LIBYUV_FALSE; } AllocOutputBuffers(GetNumComponents()); for (int i = 0; i < num_outbufs_; ++i) { int scanlines_size = GetComponentScanlinesPerImcuRow(i); if (scanlines_sizes_[i] != scanlines_size) { if (scanlines_[i]) { delete scanlines_[i]; } scanlines_[i] = new uint8_t*[scanlines_size]; scanlines_sizes_[i] = scanlines_size; } // We allocate padding for the final scanline to pad it up to DCTSIZE bytes // to avoid memory errors, since jpeglib only reads full MCUs blocks. For // the preceding scanlines, the padding is not needed/wanted because the // following addresses will already be valid (they are the initial bytes of // the next scanline) and will be overwritten when jpeglib writes out that // next scanline. int databuf_stride = GetComponentStride(i); int databuf_size = scanlines_size * databuf_stride; if (databuf_strides_[i] != databuf_stride) { if (databuf_[i]) { delete databuf_[i]; } databuf_[i] = new uint8_t[databuf_size]; databuf_strides_[i] = databuf_stride; } if (GetComponentStride(i) != GetComponentWidth(i)) { has_scanline_padding_ = LIBYUV_TRUE; } } return LIBYUV_TRUE; } static int DivideAndRoundUp(int numerator, int denominator) { return (numerator + denominator - 1) / denominator; } static int DivideAndRoundDown(int numerator, int denominator) { return numerator / denominator; } // Returns width of the last loaded frame. int MJpegDecoder::GetWidth() { return decompress_struct_->image_width; } // Returns height of the last loaded frame. int MJpegDecoder::GetHeight() { return decompress_struct_->image_height; } // Returns format of the last loaded frame. The return value is one of the // kColorSpace* constants. int MJpegDecoder::GetColorSpace() { return decompress_struct_->jpeg_color_space; } // Number of color components in the color space. int MJpegDecoder::GetNumComponents() { return decompress_struct_->num_components; } // Sample factors of the n-th component. int MJpegDecoder::GetHorizSampFactor(int component) { return decompress_struct_->comp_info[component].h_samp_factor; } int MJpegDecoder::GetVertSampFactor(int component) { return decompress_struct_->comp_info[component].v_samp_factor; } int MJpegDecoder::GetHorizSubSampFactor(int component) { return decompress_struct_->max_h_samp_factor / GetHorizSampFactor(component); } int MJpegDecoder::GetVertSubSampFactor(int component) { return decompress_struct_->max_v_samp_factor / GetVertSampFactor(component); } int MJpegDecoder::GetImageScanlinesPerImcuRow() { return decompress_struct_->max_v_samp_factor * DCTSIZE; } int MJpegDecoder::GetComponentScanlinesPerImcuRow(int component) { int vs = GetVertSubSampFactor(component); return DivideAndRoundUp(GetImageScanlinesPerImcuRow(), vs); } int MJpegDecoder::GetComponentWidth(int component) { int hs = GetHorizSubSampFactor(component); return DivideAndRoundUp(GetWidth(), hs); } int MJpegDecoder::GetComponentHeight(int component) { int vs = GetVertSubSampFactor(component); return DivideAndRoundUp(GetHeight(), vs); } // Get width in bytes padded out to a multiple of DCTSIZE int MJpegDecoder::GetComponentStride(int component) { return (GetComponentWidth(component) + DCTSIZE - 1) & ~(DCTSIZE - 1); } int MJpegDecoder::GetComponentSize(int component) { return GetComponentWidth(component) * GetComponentHeight(component); } LIBYUV_BOOL MJpegDecoder::UnloadFrame() { #ifdef HAVE_SETJMP if (setjmp(error_mgr_->setjmp_buffer)) { // We called jpeg_abort_decompress, it experienced an error, and we called // longjmp() and rewound the stack to here. Return error. return LIBYUV_FALSE; } #endif jpeg_abort_decompress(decompress_struct_); return LIBYUV_TRUE; } // TODO(fbarchard): Allow rectangle to be specified: x, y, width, height. LIBYUV_BOOL MJpegDecoder::DecodeToBuffers(uint8_t** planes, int dst_width, int dst_height) { if (dst_width != GetWidth() || dst_height > GetHeight()) { // ERROR: Bad dimensions return LIBYUV_FALSE; } #ifdef HAVE_SETJMP if (setjmp(error_mgr_->setjmp_buffer)) { // We called into jpeglib, it experienced an error sometime during this // function call, and we called longjmp() and rewound the stack to here. // Return error. return LIBYUV_FALSE; } #endif if (!StartDecode()) { return LIBYUV_FALSE; } SetScanlinePointers(databuf_); int lines_left = dst_height; // Compute amount of lines to skip to implement vertical crop. // TODO(fbarchard): Ensure skip is a multiple of maximum component // subsample. ie 2 int skip = (GetHeight() - dst_height) / 2; if (skip > 0) { // There is no API to skip lines in the output data, so we read them // into the temp buffer. while (skip >= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } skip -= GetImageScanlinesPerImcuRow(); } if (skip > 0) { // Have a partial iMCU row left over to skip. Must read it and then // copy the parts we want into the destination. if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } for (int i = 0; i < num_outbufs_; ++i) { // TODO(fbarchard): Compute skip to avoid this assert(skip % GetVertSubSampFactor(i) == 0); int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i) - rows_to_skip; int data_to_skip = rows_to_skip * GetComponentStride(i); CopyPlane(databuf_[i] + data_to_skip, GetComponentStride(i), planes[i], GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } lines_left -= (GetImageScanlinesPerImcuRow() - skip); } } // Read full MCUs but cropped horizontally for (; lines_left > GetImageScanlinesPerImcuRow(); lines_left -= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } for (int i = 0; i < num_outbufs_; ++i) { int scanlines_to_copy = GetComponentScanlinesPerImcuRow(i); CopyPlane(databuf_[i], GetComponentStride(i), planes[i], GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } } if (lines_left > 0) { // Have a partial iMCU row left over to decode. if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } for (int i = 0; i < num_outbufs_; ++i) { int scanlines_to_copy = DivideAndRoundUp(lines_left, GetVertSubSampFactor(i)); CopyPlane(databuf_[i], GetComponentStride(i), planes[i], GetComponentWidth(i), GetComponentWidth(i), scanlines_to_copy); planes[i] += scanlines_to_copy * GetComponentWidth(i); } } return FinishDecode(); } LIBYUV_BOOL MJpegDecoder::DecodeToCallback(CallbackFunction fn, void* opaque, int dst_width, int dst_height) { if (dst_width != GetWidth() || dst_height > GetHeight()) { // ERROR: Bad dimensions return LIBYUV_FALSE; } #ifdef HAVE_SETJMP if (setjmp(error_mgr_->setjmp_buffer)) { // We called into jpeglib, it experienced an error sometime during this // function call, and we called longjmp() and rewound the stack to here. // Return error. return LIBYUV_FALSE; } #endif if (!StartDecode()) { return LIBYUV_FALSE; } SetScanlinePointers(databuf_); int lines_left = dst_height; // TODO(fbarchard): Compute amount of lines to skip to implement vertical crop int skip = (GetHeight() - dst_height) / 2; if (skip > 0) { while (skip >= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } skip -= GetImageScanlinesPerImcuRow(); } if (skip > 0) { // Have a partial iMCU row left over to skip. if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } for (int i = 0; i < num_outbufs_; ++i) { // TODO(fbarchard): Compute skip to avoid this assert(skip % GetVertSubSampFactor(i) == 0); int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); int data_to_skip = rows_to_skip * GetComponentStride(i); // Change our own data buffer pointers so we can pass them to the // callback. databuf_[i] += data_to_skip; } int scanlines_to_copy = GetImageScanlinesPerImcuRow() - skip; (*fn)(opaque, databuf_, databuf_strides_, scanlines_to_copy); // Now change them back. for (int i = 0; i < num_outbufs_; ++i) { int rows_to_skip = DivideAndRoundDown(skip, GetVertSubSampFactor(i)); int data_to_skip = rows_to_skip * GetComponentStride(i); databuf_[i] -= data_to_skip; } lines_left -= scanlines_to_copy; } } // Read full MCUs until we get to the crop point. for (; lines_left >= GetImageScanlinesPerImcuRow(); lines_left -= GetImageScanlinesPerImcuRow()) { if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } (*fn)(opaque, databuf_, databuf_strides_, GetImageScanlinesPerImcuRow()); } if (lines_left > 0) { // Have a partial iMCU row left over to decode. if (!DecodeImcuRow()) { FinishDecode(); return LIBYUV_FALSE; } (*fn)(opaque, databuf_, databuf_strides_, lines_left); } return FinishDecode(); } void init_source(j_decompress_ptr cinfo) { fill_input_buffer(cinfo); } boolean fill_input_buffer(j_decompress_ptr cinfo) { BufferVector* buf_vec = reinterpret_cast(cinfo->client_data); if (buf_vec->pos >= buf_vec->len) { assert(0 && "No more data"); // ERROR: No more data return FALSE; } cinfo->src->next_input_byte = buf_vec->buffers[buf_vec->pos].data; cinfo->src->bytes_in_buffer = buf_vec->buffers[buf_vec->pos].len; ++buf_vec->pos; return TRUE; } void skip_input_data(j_decompress_ptr cinfo, long num_bytes) { // NOLINT cinfo->src->next_input_byte += num_bytes; } void term_source(j_decompress_ptr cinfo) { (void)cinfo; // Nothing to do. } #ifdef HAVE_SETJMP void ErrorHandler(j_common_ptr cinfo) { // This is called when a jpeglib command experiences an error. Unfortunately // jpeglib's error handling model is not very flexible, because it expects the // error handler to not return--i.e., it wants the program to terminate. To // recover from errors we use setjmp() as shown in their example. setjmp() is // C's implementation for the "call with current continuation" functionality // seen in some functional programming languages. // A formatted message can be output, but is unsafe for release. #ifdef DEBUG char buf[JMSG_LENGTH_MAX]; (*cinfo->err->format_message)(cinfo, buf); // ERROR: Error in jpeglib: buf #endif SetJmpErrorMgr* mgr = reinterpret_cast(cinfo->err); // This rewinds the call stack to the point of the corresponding setjmp() // and causes it to return (for a second time) with value 1. longjmp(mgr->setjmp_buffer, 1); } // Suppress fprintf warnings. void OutputHandler(j_common_ptr cinfo) { (void)cinfo; } #endif // HAVE_SETJMP void MJpegDecoder::AllocOutputBuffers(int num_outbufs) { if (num_outbufs != num_outbufs_) { // We could perhaps optimize this case to resize the output buffers without // necessarily having to delete and recreate each one, but it's not worth // it. DestroyOutputBuffers(); scanlines_ = new uint8_t**[num_outbufs]; scanlines_sizes_ = new int[num_outbufs]; databuf_ = new uint8_t*[num_outbufs]; databuf_strides_ = new int[num_outbufs]; for (int i = 0; i < num_outbufs; ++i) { scanlines_[i] = NULL; scanlines_sizes_[i] = 0; databuf_[i] = NULL; databuf_strides_[i] = 0; } num_outbufs_ = num_outbufs; } } void MJpegDecoder::DestroyOutputBuffers() { for (int i = 0; i < num_outbufs_; ++i) { delete[] scanlines_[i]; delete[] databuf_[i]; } delete[] scanlines_; delete[] databuf_; delete[] scanlines_sizes_; delete[] databuf_strides_; scanlines_ = NULL; databuf_ = NULL; scanlines_sizes_ = NULL; databuf_strides_ = NULL; num_outbufs_ = 0; } // JDCT_IFAST and do_block_smoothing improve performance substantially. LIBYUV_BOOL MJpegDecoder::StartDecode() { decompress_struct_->raw_data_out = TRUE; decompress_struct_->dct_method = JDCT_IFAST; // JDCT_ISLOW is default decompress_struct_->dither_mode = JDITHER_NONE; // Not applicable to 'raw': decompress_struct_->do_fancy_upsampling = (boolean)(LIBYUV_FALSE); // Only for buffered mode: decompress_struct_->enable_2pass_quant = (boolean)(LIBYUV_FALSE); // Blocky but fast: decompress_struct_->do_block_smoothing = (boolean)(LIBYUV_FALSE); if (!jpeg_start_decompress(decompress_struct_)) { // ERROR: Couldn't start JPEG decompressor"; return LIBYUV_FALSE; } return LIBYUV_TRUE; } LIBYUV_BOOL MJpegDecoder::FinishDecode() { // jpeglib considers it an error if we finish without decoding the whole // image, so we call "abort" rather than "finish". jpeg_abort_decompress(decompress_struct_); return LIBYUV_TRUE; } void MJpegDecoder::SetScanlinePointers(uint8_t** data) { for (int i = 0; i < num_outbufs_; ++i) { uint8_t* data_i = data[i]; for (int j = 0; j < scanlines_sizes_[i]; ++j) { scanlines_[i][j] = data_i; data_i += GetComponentStride(i); } } } inline LIBYUV_BOOL MJpegDecoder::DecodeImcuRow() { return (unsigned int)(GetImageScanlinesPerImcuRow()) == jpeg_read_raw_data(decompress_struct_, scanlines_, GetImageScanlinesPerImcuRow()); } // The helper function which recognizes the jpeg sub-sampling type. JpegSubsamplingType MJpegDecoder::JpegSubsamplingTypeHelper( int* subsample_x, int* subsample_y, int number_of_components) { if (number_of_components == 3) { // Color images. if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && subsample_y[1] == 2 && subsample_x[2] == 2 && subsample_y[2] == 2) { return kJpegYuv420; } if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 2 && subsample_y[1] == 1 && subsample_x[2] == 2 && subsample_y[2] == 1) { return kJpegYuv422; } if (subsample_x[0] == 1 && subsample_y[0] == 1 && subsample_x[1] == 1 && subsample_y[1] == 1 && subsample_x[2] == 1 && subsample_y[2] == 1) { return kJpegYuv444; } } else if (number_of_components == 1) { // Grey-scale images. if (subsample_x[0] == 1 && subsample_y[0] == 1) { return kJpegYuv400; } } return kJpegUnknown; } } // namespace libyuv #endif // HAVE_JPEG libvpx-1.8.2/third_party/libyuv/source/mjpeg_validate.cc000066400000000000000000000042701357355204000234620ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/mjpeg_decoder.h" #include // For memchr. #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Helper function to scan for EOI marker (0xff 0xd9). static LIBYUV_BOOL ScanEOI(const uint8_t* sample, size_t sample_size) { if (sample_size >= 2) { const uint8_t* end = sample + sample_size - 1; const uint8_t* it = sample; while (it < end) { // TODO(fbarchard): scan for 0xd9 instead. it = (const uint8_t*)(memchr(it, 0xff, end - it)); if (it == NULL) { break; } if (it[1] == 0xd9) { return LIBYUV_TRUE; // Success: Valid jpeg. } ++it; // Skip over current 0xff. } } // ERROR: Invalid jpeg end code not found. Size sample_size return LIBYUV_FALSE; } // Helper function to validate the jpeg appears intact. LIBYUV_BOOL ValidateJpeg(const uint8_t* sample, size_t sample_size) { // Maximum size that ValidateJpeg will consider valid. const size_t kMaxJpegSize = 0x7fffffffull; const size_t kBackSearchSize = 1024; if (sample_size < 64 || sample_size > kMaxJpegSize || !sample) { // ERROR: Invalid jpeg size: sample_size return LIBYUV_FALSE; } if (sample[0] != 0xff || sample[1] != 0xd8) { // SOI marker // ERROR: Invalid jpeg initial start code return LIBYUV_FALSE; } // Look for the End Of Image (EOI) marker near the end of the buffer. if (sample_size > kBackSearchSize) { if (ScanEOI(sample + sample_size - kBackSearchSize, kBackSearchSize)) { return LIBYUV_TRUE; // Success: Valid jpeg. } // Reduce search size for forward search. sample_size = sample_size - kBackSearchSize + 1; } // Step over SOI marker and scan for EOI. return ScanEOI(sample + 2, sample_size - 2); } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/planar_functions.cc000066400000000000000000003153541357355204000240640ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/planar_functions.h" #include // for memset() #include "libyuv/cpu_id.h" #ifdef HAVE_JPEG #include "libyuv/mjpeg_decoder.h" #endif #include "libyuv/row.h" #include "libyuv/scale_row.h" // for ScaleRowDown2 #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Copy a plane of data LIBYUV_API void CopyPlane(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; // Negative height means invert the image. if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } // Nothing to do. if (src_y == dst_y && src_stride_y == dst_stride_y) { return; } #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; } #endif #if defined(HAS_COPYROW_AVX) if (TestCpuFlag(kCpuHasAVX)) { CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX; } #endif #if defined(HAS_COPYROW_ERMS) if (TestCpuFlag(kCpuHasERMS)) { CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif // Copy plane for (y = 0; y < height; ++y) { CopyRow(src_y, dst_y, width); src_y += src_stride_y; dst_y += dst_stride_y; } } // TODO(fbarchard): Consider support for negative height. // TODO(fbarchard): Consider stride measured in bytes. LIBYUV_API void CopyPlane_16(const uint16_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*CopyRow)(const uint16_t* src, uint16_t* dst, int width) = CopyRow_16_C; // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } #if defined(HAS_COPYROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 32)) { CopyRow = CopyRow_16_SSE2; } #endif #if defined(HAS_COPYROW_16_ERMS) if (TestCpuFlag(kCpuHasERMS)) { CopyRow = CopyRow_16_ERMS; } #endif #if defined(HAS_COPYROW_16_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 32)) { CopyRow = CopyRow_16_NEON; } #endif // Copy plane for (y = 0; y < height; ++y) { CopyRow(src_y, dst_y, width); src_y += src_stride_y; dst_y += dst_stride_y; } } // Convert a plane of 16 bit data to 8 bit LIBYUV_API void Convert16To8Plane(const uint16_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height) { int y; void (*Convert16To8Row)(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) = Convert16To8Row_C; // Negative height means invert the image. if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } #if defined(HAS_CONVERT16TO8ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { Convert16To8Row = Convert16To8Row_Any_SSSE3; if (IS_ALIGNED(width, 16)) { Convert16To8Row = Convert16To8Row_SSSE3; } } #endif #if defined(HAS_CONVERT16TO8ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Convert16To8Row = Convert16To8Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { Convert16To8Row = Convert16To8Row_AVX2; } } #endif // Convert plane for (y = 0; y < height; ++y) { Convert16To8Row(src_y, dst_y, scale, width); src_y += src_stride_y; dst_y += dst_stride_y; } } // Convert a plane of 8 bit data to 16 bit LIBYUV_API void Convert8To16Plane(const uint8_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, int scale, // 16384 for 10 bits int width, int height) { int y; void (*Convert8To16Row)(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) = Convert8To16Row_C; // Negative height means invert the image. if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } #if defined(HAS_CONVERT8TO16ROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { Convert8To16Row = Convert8To16Row_Any_SSE2; if (IS_ALIGNED(width, 16)) { Convert8To16Row = Convert8To16Row_SSE2; } } #endif #if defined(HAS_CONVERT8TO16ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { Convert8To16Row = Convert8To16Row_Any_AVX2; if (IS_ALIGNED(width, 32)) { Convert8To16Row = Convert8To16Row_AVX2; } } #endif // Convert plane for (y = 0; y < height; ++y) { Convert8To16Row(src_y, dst_y, scale, width); src_y += src_stride_y; dst_y += dst_stride_y; } } // Copy I422. LIBYUV_API int I422Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (height - 1) * src_stride_u; src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, height); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, height); return 0; } // Copy I444. LIBYUV_API int I444Copy(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { if (!src_u || !src_v || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (height - 1) * src_stride_u; src_v = src_v + (height - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } CopyPlane(src_u, src_stride_u, dst_u, dst_stride_u, width, height); CopyPlane(src_v, src_stride_v, dst_v, dst_stride_v, width, height); return 0; } // Copy I400. LIBYUV_API int I400ToI400(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height) { if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; } // Convert I420 to I400. LIBYUV_API int I420ToI400(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, int width, int height) { (void)src_u; (void)src_stride_u; (void)src_v; (void)src_stride_v; if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } CopyPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; } // Support function for NV12 etc UV channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API void SplitUVPlane(const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; // Negative height means invert the image. if (height < 0) { height = -height; dst_u = dst_u + (height - 1) * dst_stride_u; dst_v = dst_v + (height - 1) * dst_stride_v; dst_stride_u = -dst_stride_u; dst_stride_v = -dst_stride_v; } // Coalesce rows. if (src_stride_uv == width * 2 && dst_stride_u == width && dst_stride_v == width) { width *= height; height = 1; src_stride_uv = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_SPLITUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SplitUVRow = SplitUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_SSE2; } } #endif #if defined(HAS_SPLITUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { SplitUVRow = SplitUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_AVX2; } } #endif #if defined(HAS_SPLITUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SplitUVRow = SplitUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_NEON; } } #endif #if defined(HAS_SPLITUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SplitUVRow = SplitUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_MSA; } } #endif for (y = 0; y < height; ++y) { // Copy a row of UV. SplitUVRow(src_uv, dst_u, dst_v, width); dst_u += dst_stride_u; dst_v += dst_stride_v; src_uv += src_stride_uv; } } LIBYUV_API void MergeUVPlane(const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; void (*MergeUVRow)(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) = MergeUVRow_C; // Coalesce rows. // Negative height means invert the image. if (height < 0) { height = -height; dst_uv = dst_uv + (height - 1) * dst_stride_uv; dst_stride_uv = -dst_stride_uv; } // Coalesce rows. if (src_stride_u == width && src_stride_v == width && dst_stride_uv == width * 2) { width *= height; height = 1; src_stride_u = src_stride_v = dst_stride_uv = 0; } #if defined(HAS_MERGEUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { MergeUVRow = MergeUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { MergeUVRow = MergeUVRow_SSE2; } } #endif #if defined(HAS_MERGEUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MergeUVRow = MergeUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { MergeUVRow = MergeUVRow_AVX2; } } #endif #if defined(HAS_MERGEUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeUVRow = MergeUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { MergeUVRow = MergeUVRow_NEON; } } #endif #if defined(HAS_MERGEUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MergeUVRow = MergeUVRow_Any_MSA; if (IS_ALIGNED(width, 16)) { MergeUVRow = MergeUVRow_MSA; } } #endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of UV. MergeUVRow(src_u, src_v, dst_uv, width); src_u += src_stride_u; src_v += src_stride_v; dst_uv += dst_stride_uv; } } // Support function for NV12 etc RGB channels. // Width and height are plane sizes (typically half pixel width). LIBYUV_API void SplitRGBPlane(const uint8_t* src_rgb, int src_stride_rgb, uint8_t* dst_r, int dst_stride_r, uint8_t* dst_g, int dst_stride_g, uint8_t* dst_b, int dst_stride_b, int width, int height) { int y; void (*SplitRGBRow)(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) = SplitRGBRow_C; // Negative height means invert the image. if (height < 0) { height = -height; dst_r = dst_r + (height - 1) * dst_stride_r; dst_g = dst_g + (height - 1) * dst_stride_g; dst_b = dst_b + (height - 1) * dst_stride_b; dst_stride_r = -dst_stride_r; dst_stride_g = -dst_stride_g; dst_stride_b = -dst_stride_b; } // Coalesce rows. if (src_stride_rgb == width * 3 && dst_stride_r == width && dst_stride_g == width && dst_stride_b == width) { width *= height; height = 1; src_stride_rgb = dst_stride_r = dst_stride_g = dst_stride_b = 0; } #if defined(HAS_SPLITRGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { SplitRGBRow = SplitRGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { SplitRGBRow = SplitRGBRow_SSSE3; } } #endif #if defined(HAS_SPLITRGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SplitRGBRow = SplitRGBRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SplitRGBRow = SplitRGBRow_NEON; } } #endif for (y = 0; y < height; ++y) { // Copy a row of RGB. SplitRGBRow(src_rgb, dst_r, dst_g, dst_b, width); dst_r += dst_stride_r; dst_g += dst_stride_g; dst_b += dst_stride_b; src_rgb += src_stride_rgb; } } LIBYUV_API void MergeRGBPlane(const uint8_t* src_r, int src_stride_r, const uint8_t* src_g, int src_stride_g, const uint8_t* src_b, int src_stride_b, uint8_t* dst_rgb, int dst_stride_rgb, int width, int height) { int y; void (*MergeRGBRow)(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) = MergeRGBRow_C; // Coalesce rows. // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb = dst_rgb + (height - 1) * dst_stride_rgb; dst_stride_rgb = -dst_stride_rgb; } // Coalesce rows. if (src_stride_r == width && src_stride_g == width && src_stride_b == width && dst_stride_rgb == width * 3) { width *= height; height = 1; src_stride_r = src_stride_g = src_stride_b = dst_stride_rgb = 0; } #if defined(HAS_MERGERGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { MergeRGBRow = MergeRGBRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { MergeRGBRow = MergeRGBRow_SSSE3; } } #endif #if defined(HAS_MERGERGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MergeRGBRow = MergeRGBRow_Any_NEON; if (IS_ALIGNED(width, 16)) { MergeRGBRow = MergeRGBRow_NEON; } } #endif for (y = 0; y < height; ++y) { // Merge a row of U and V into a row of RGB. MergeRGBRow(src_r, src_g, src_b, dst_rgb, width); src_r += src_stride_r; src_g += src_stride_g; src_b += src_stride_b; dst_rgb += dst_stride_rgb; } } // Mirror a plane of data. void MirrorPlane(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } #if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MirrorRow = MirrorRow_Any_NEON; if (IS_ALIGNED(width, 16)) { MirrorRow = MirrorRow_NEON; } } #endif #if defined(HAS_MIRRORROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { MirrorRow = MirrorRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { MirrorRow = MirrorRow_SSSE3; } } #endif #if defined(HAS_MIRRORROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MirrorRow = MirrorRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { MirrorRow = MirrorRow_AVX2; } } #endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; if (IS_ALIGNED(width, 64)) { MirrorRow = MirrorRow_MSA; } } #endif // Mirror plane for (y = 0; y < height; ++y) { MirrorRow(src_y, dst_y, width); src_y += src_stride_y; dst_y += dst_stride_y; } } // Convert YUY2 to I422. LIBYUV_API int YUY2ToI422(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*YUY2ToUV422Row)(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) = YUY2ToUV422Row_C; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; if (!src_yuy2 || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. if (src_stride_yuy2 == width * 2 && dst_stride_y == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width && width * height <= 32768) { width *= height; height = 1; src_stride_yuy2 = dst_stride_y = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { YUY2ToUV422Row = YUY2ToUV422Row_Any_SSE2; YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { YUY2ToUV422Row = YUY2ToUV422Row_SSE2; YUY2ToYRow = YUY2ToYRow_SSE2; } } #endif #if defined(HAS_YUY2TOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { YUY2ToUV422Row = YUY2ToUV422Row_Any_AVX2; YUY2ToYRow = YUY2ToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { YUY2ToUV422Row = YUY2ToUV422Row_AVX2; YUY2ToYRow = YUY2ToYRow_AVX2; } } #endif #if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { YUY2ToYRow = YUY2ToYRow_Any_NEON; YUY2ToUV422Row = YUY2ToUV422Row_Any_NEON; if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; YUY2ToUV422Row = YUY2ToUV422Row_NEON; } } #endif #if defined(HAS_YUY2TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToYRow = YUY2ToYRow_Any_MSA; YUY2ToUV422Row = YUY2ToUV422Row_Any_MSA; if (IS_ALIGNED(width, 32)) { YUY2ToYRow = YUY2ToYRow_MSA; YUY2ToUV422Row = YUY2ToUV422Row_MSA; } } #endif for (y = 0; y < height; ++y) { YUY2ToUV422Row(src_yuy2, dst_u, dst_v, width); YUY2ToYRow(src_yuy2, dst_y, width); src_yuy2 += src_stride_yuy2; dst_y += dst_stride_y; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } // Convert UYVY to I422. LIBYUV_API int UYVYToI422(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; void (*UYVYToUV422Row)(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) = UYVYToUV422Row_C; void (*UYVYToYRow)(const uint8_t* src_uyvy, uint8_t* dst_y, int width) = UYVYToYRow_C; if (!src_uyvy || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } // Coalesce rows. if (src_stride_uyvy == width * 2 && dst_stride_y == width && dst_stride_u * 2 == width && dst_stride_v * 2 == width && width * height <= 32768) { width *= height; height = 1; src_stride_uyvy = dst_stride_y = dst_stride_u = dst_stride_v = 0; } #if defined(HAS_UYVYTOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { UYVYToUV422Row = UYVYToUV422Row_Any_SSE2; UYVYToYRow = UYVYToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { UYVYToUV422Row = UYVYToUV422Row_SSE2; UYVYToYRow = UYVYToYRow_SSE2; } } #endif #if defined(HAS_UYVYTOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { UYVYToUV422Row = UYVYToUV422Row_Any_AVX2; UYVYToYRow = UYVYToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { UYVYToUV422Row = UYVYToUV422Row_AVX2; UYVYToYRow = UYVYToYRow_AVX2; } } #endif #if defined(HAS_UYVYTOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { UYVYToYRow = UYVYToYRow_Any_NEON; UYVYToUV422Row = UYVYToUV422Row_Any_NEON; if (IS_ALIGNED(width, 16)) { UYVYToYRow = UYVYToYRow_NEON; UYVYToUV422Row = UYVYToUV422Row_NEON; } } #endif #if defined(HAS_UYVYTOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { UYVYToYRow = UYVYToYRow_Any_MSA; UYVYToUV422Row = UYVYToUV422Row_Any_MSA; if (IS_ALIGNED(width, 32)) { UYVYToYRow = UYVYToYRow_MSA; UYVYToUV422Row = UYVYToUV422Row_MSA; } } #endif for (y = 0; y < height; ++y) { UYVYToUV422Row(src_uyvy, dst_u, dst_v, width); UYVYToYRow(src_uyvy, dst_y, width); src_uyvy += src_stride_uyvy; dst_y += dst_stride_y; dst_u += dst_stride_u; dst_v += dst_stride_v; } return 0; } // Convert YUY2 to Y. LIBYUV_API int YUY2ToY(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*YUY2ToYRow)(const uint8_t* src_yuy2, uint8_t* dst_y, int width) = YUY2ToYRow_C; if (!src_yuy2 || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } // Coalesce rows. if (src_stride_yuy2 == width * 2 && dst_stride_y == width) { width *= height; height = 1; src_stride_yuy2 = dst_stride_y = 0; } #if defined(HAS_YUY2TOYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { YUY2ToYRow = YUY2ToYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_SSE2; } } #endif #if defined(HAS_YUY2TOYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { YUY2ToYRow = YUY2ToYRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { YUY2ToYRow = YUY2ToYRow_AVX2; } } #endif #if defined(HAS_YUY2TOYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { YUY2ToYRow = YUY2ToYRow_Any_NEON; if (IS_ALIGNED(width, 16)) { YUY2ToYRow = YUY2ToYRow_NEON; } } #endif #if defined(HAS_YUY2TOYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { YUY2ToYRow = YUY2ToYRow_Any_MSA; if (IS_ALIGNED(width, 32)) { YUY2ToYRow = YUY2ToYRow_MSA; } } #endif for (y = 0; y < height; ++y) { YUY2ToYRow(src_yuy2, dst_y, width); src_yuy2 += src_stride_yuy2; dst_y += dst_stride_y; } return 0; } // Mirror I400 with optional flipping LIBYUV_API int I400Mirror(const uint8_t* src_y, int src_stride_y, uint8_t* dst_y, int dst_stride_y, int width, int height) { if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); return 0; } // Mirror I420 with optional flipping LIBYUV_API int I420Mirror(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_y || !src_u || !src_v || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } if (dst_y) { MirrorPlane(src_y, src_stride_y, dst_y, dst_stride_y, width, height); } MirrorPlane(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); MirrorPlane(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; } // ARGB mirror. LIBYUV_API int ARGBMirror(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBMirrorRow)(const uint8_t* src, uint8_t* dst, int width) = ARGBMirrorRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; if (IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_NEON; } } #endif #if defined(HAS_ARGBMIRRORROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_SSE2; } } #endif #if defined(HAS_ARGBMIRRORROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBMirrorRow = ARGBMirrorRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBMirrorRow = ARGBMirrorRow_AVX2; } } #endif #if defined(HAS_ARGBMIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBMirrorRow = ARGBMirrorRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBMirrorRow = ARGBMirrorRow_MSA; } } #endif // Mirror plane for (y = 0; y < height; ++y) { ARGBMirrorRow(src_argb, dst_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Get a blender that optimized for the CPU and pixel count. // As there are 6 blenders to choose from, the caller should try to use // the same blend function for all pixels if possible. LIBYUV_API ARGBBlendRow GetARGBBlend() { void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) = ARGBBlendRow_C; #if defined(HAS_ARGBBLENDROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBBlendRow = ARGBBlendRow_SSSE3; return ARGBBlendRow; } #endif #if defined(HAS_ARGBBLENDROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBBlendRow = ARGBBlendRow_NEON; } #endif #if defined(HAS_ARGBBLENDROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBBlendRow = ARGBBlendRow_MSA; } #endif return ARGBBlendRow; } // Alpha Blend 2 ARGB images and store to destination. LIBYUV_API int ARGBBlend(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBBlendRow)(const uint8_t* src_argb, const uint8_t* src_argb1, uint8_t* dst_argb, int width) = GetARGBBlend(); if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } for (y = 0; y < height; ++y) { ARGBBlendRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; src_argb1 += src_stride_argb1; dst_argb += dst_stride_argb; } return 0; } // Alpha Blend plane and store to destination. LIBYUV_API int BlendPlane(const uint8_t* src_y0, int src_stride_y0, const uint8_t* src_y1, int src_stride_y1, const uint8_t* alpha, int alpha_stride, uint8_t* dst_y, int dst_stride_y, int width, int height) { int y; void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) = BlendPlaneRow_C; if (!src_y0 || !src_y1 || !alpha || !dst_y || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows for Y plane. if (src_stride_y0 == width && src_stride_y1 == width && alpha_stride == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y0 = src_stride_y1 = alpha_stride = dst_stride_y = 0; } #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { BlendPlaneRow = BlendPlaneRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } } #endif #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { BlendPlaneRow = BlendPlaneRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { BlendPlaneRow = BlendPlaneRow_AVX2; } } #endif for (y = 0; y < height; ++y) { BlendPlaneRow(src_y0, src_y1, alpha, dst_y, width); src_y0 += src_stride_y0; src_y1 += src_stride_y1; alpha += alpha_stride; dst_y += dst_stride_y; } return 0; } #define MAXTWIDTH 2048 // Alpha Blend YUV images and store to destination. LIBYUV_API int I420Blend(const uint8_t* src_y0, int src_stride_y0, const uint8_t* src_u0, int src_stride_u0, const uint8_t* src_v0, int src_stride_v0, const uint8_t* src_y1, int src_stride_y1, const uint8_t* src_u1, int src_stride_u1, const uint8_t* src_v1, int src_stride_v1, const uint8_t* alpha, int alpha_stride, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height) { int y; // Half width/height for UV. int halfwidth = (width + 1) >> 1; void (*BlendPlaneRow)(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) = BlendPlaneRow_C; void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = ScaleRowDown2Box_C; if (!src_y0 || !src_u0 || !src_v0 || !src_y1 || !src_u1 || !src_v1 || !alpha || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Blend Y plane. BlendPlane(src_y0, src_stride_y0, src_y1, src_stride_y1, alpha, alpha_stride, dst_y, dst_stride_y, width, height); #if defined(HAS_BLENDPLANEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { BlendPlaneRow = BlendPlaneRow_Any_SSSE3; if (IS_ALIGNED(halfwidth, 8)) { BlendPlaneRow = BlendPlaneRow_SSSE3; } } #endif #if defined(HAS_BLENDPLANEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { BlendPlaneRow = BlendPlaneRow_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { BlendPlaneRow = BlendPlaneRow_AVX2; } } #endif if (!IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_C; } #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_NEON; if (IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Any_NEON; if (IS_ALIGNED(halfwidth, 16)) { ScaleRowDown2 = ScaleRowDown2Box_NEON; } } } #endif #if defined(HAS_SCALEROWDOWN2_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_SSSE3; if (IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Any_SSSE3; if (IS_ALIGNED(halfwidth, 16)) { ScaleRowDown2 = ScaleRowDown2Box_SSSE3; } } } #endif #if defined(HAS_SCALEROWDOWN2_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowDown2 = ScaleRowDown2Box_Odd_AVX2; if (IS_ALIGNED(width, 2)) { ScaleRowDown2 = ScaleRowDown2Box_Any_AVX2; if (IS_ALIGNED(halfwidth, 32)) { ScaleRowDown2 = ScaleRowDown2Box_AVX2; } } } #endif // Row buffer for intermediate alpha pixels. align_buffer_64(halfalpha, halfwidth); for (y = 0; y < height; y += 2) { // last row of odd height image use 1 row of alpha instead of 2. if (y == (height - 1)) { alpha_stride = 0; } // Subsample 2 rows of UV to half width and half height. ScaleRowDown2(alpha, alpha_stride, halfalpha, halfwidth); alpha += alpha_stride * 2; BlendPlaneRow(src_u0, src_u1, halfalpha, dst_u, halfwidth); BlendPlaneRow(src_v0, src_v1, halfalpha, dst_v, halfwidth); src_u0 += src_stride_u0; src_u1 += src_stride_u1; dst_u += dst_stride_u; src_v0 += src_stride_v0; src_v1 += src_stride_v1; dst_v += dst_stride_v; } free_aligned_buffer_64(halfalpha); return 0; } // Multiply 2 ARGB images and store to destination. LIBYUV_API int ARGBMultiply(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBMultiplyRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBMultiplyRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } #if defined(HAS_ARGBMULTIPLYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBMultiplyRow = ARGBMultiplyRow_SSE2; } } #endif #if defined(HAS_ARGBMULTIPLYROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBMultiplyRow = ARGBMultiplyRow_AVX2; } } #endif #if defined(HAS_ARGBMULTIPLYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBMultiplyRow = ARGBMultiplyRow_NEON; } } #endif #if defined(HAS_ARGBMULTIPLYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBMultiplyRow = ARGBMultiplyRow_Any_MSA; if (IS_ALIGNED(width, 4)) { ARGBMultiplyRow = ARGBMultiplyRow_MSA; } } #endif // Multiply plane for (y = 0; y < height; ++y) { ARGBMultiplyRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; src_argb1 += src_stride_argb1; dst_argb += dst_stride_argb; } return 0; } // Add 2 ARGB images and store to destination. LIBYUV_API int ARGBAdd(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBAddRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBAddRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } #if defined(HAS_ARGBADDROW_SSE2) && (defined(_MSC_VER) && !defined(__clang__)) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_SSE2; } #endif #if defined(HAS_ARGBADDROW_SSE2) && !(defined(_MSC_VER) && !defined(__clang__)) if (TestCpuFlag(kCpuHasSSE2)) { ARGBAddRow = ARGBAddRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBAddRow = ARGBAddRow_SSE2; } } #endif #if defined(HAS_ARGBADDROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBAddRow = ARGBAddRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBAddRow = ARGBAddRow_AVX2; } } #endif #if defined(HAS_ARGBADDROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBAddRow = ARGBAddRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBAddRow = ARGBAddRow_NEON; } } #endif #if defined(HAS_ARGBADDROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBAddRow = ARGBAddRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBAddRow = ARGBAddRow_MSA; } } #endif // Add plane for (y = 0; y < height; ++y) { ARGBAddRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; src_argb1 += src_stride_argb1; dst_argb += dst_stride_argb; } return 0; } // Subtract 2 ARGB images and store to destination. LIBYUV_API int ARGBSubtract(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBSubtractRow)(const uint8_t* src0, const uint8_t* src1, uint8_t* dst, int width) = ARGBSubtractRow_C; if (!src_argb0 || !src_argb1 || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } // Coalesce rows. if (src_stride_argb0 == width * 4 && src_stride_argb1 == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb0 = src_stride_argb1 = dst_stride_argb = 0; } #if defined(HAS_ARGBSUBTRACTROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBSubtractRow = ARGBSubtractRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBSubtractRow = ARGBSubtractRow_SSE2; } } #endif #if defined(HAS_ARGBSUBTRACTROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBSubtractRow = ARGBSubtractRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBSubtractRow = ARGBSubtractRow_AVX2; } } #endif #if defined(HAS_ARGBSUBTRACTROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBSubtractRow = ARGBSubtractRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBSubtractRow = ARGBSubtractRow_NEON; } } #endif #if defined(HAS_ARGBSUBTRACTROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBSubtractRow = ARGBSubtractRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBSubtractRow = ARGBSubtractRow_MSA; } } #endif // Subtract plane for (y = 0; y < height; ++y) { ARGBSubtractRow(src_argb0, src_argb1, dst_argb, width); src_argb0 += src_stride_argb0; src_argb1 += src_stride_argb1; dst_argb += dst_stride_argb; } return 0; } // Convert I422 to RGBA with matrix static int I422ToRGBAMatrix(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, const struct YuvConstants* yuvconstants, int width, int height) { int y; void (*I422ToRGBARow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = I422ToRGBARow_C; if (!src_y || !src_u || !src_v || !dst_rgba || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgba = dst_rgba + (height - 1) * dst_stride_rgba; dst_stride_rgba = -dst_stride_rgba; } #if defined(HAS_I422TORGBAROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToRGBARow = I422ToRGBARow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_SSSE3; } } #endif #if defined(HAS_I422TORGBAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToRGBARow = I422ToRGBARow_Any_AVX2; if (IS_ALIGNED(width, 16)) { I422ToRGBARow = I422ToRGBARow_AVX2; } } #endif #if defined(HAS_I422TORGBAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToRGBARow = I422ToRGBARow_Any_NEON; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_NEON; } } #endif #if defined(HAS_I422TORGBAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToRGBARow = I422ToRGBARow_Any_MSA; if (IS_ALIGNED(width, 8)) { I422ToRGBARow = I422ToRGBARow_MSA; } } #endif for (y = 0; y < height; ++y) { I422ToRGBARow(src_y, src_u, src_v, dst_rgba, yuvconstants, width); dst_rgba += dst_stride_rgba; src_y += src_stride_y; src_u += src_stride_u; src_v += src_stride_v; } return 0; } // Convert I422 to RGBA. LIBYUV_API int I422ToRGBA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_rgba, int dst_stride_rgba, int width, int height) { return I422ToRGBAMatrix(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_rgba, dst_stride_rgba, &kYuvI601Constants, width, height); } // Convert I422 to BGRA. LIBYUV_API int I422ToBGRA(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_bgra, int dst_stride_bgra, int width, int height) { return I422ToRGBAMatrix(src_y, src_stride_y, src_v, src_stride_v, // Swap U and V src_u, src_stride_u, dst_bgra, dst_stride_bgra, &kYvuI601Constants, // Use Yvu matrix width, height); } // Convert NV12 to RGB565. LIBYUV_API int NV12ToRGB565(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_rgb565, int dst_stride_rgb565, int width, int height) { int y; void (*NV12ToRGB565Row)( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) = NV12ToRGB565Row_C; if (!src_y || !src_uv || !dst_rgb565 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst_rgb565 = dst_rgb565 + (height - 1) * dst_stride_rgb565; dst_stride_rgb565 = -dst_stride_rgb565; } #if defined(HAS_NV12TORGB565ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { NV12ToRGB565Row = NV12ToRGB565Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { NV12ToRGB565Row = NV12ToRGB565Row_SSSE3; } } #endif #if defined(HAS_NV12TORGB565ROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { NV12ToRGB565Row = NV12ToRGB565Row_Any_AVX2; if (IS_ALIGNED(width, 16)) { NV12ToRGB565Row = NV12ToRGB565Row_AVX2; } } #endif #if defined(HAS_NV12TORGB565ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { NV12ToRGB565Row = NV12ToRGB565Row_Any_NEON; if (IS_ALIGNED(width, 8)) { NV12ToRGB565Row = NV12ToRGB565Row_NEON; } } #endif #if defined(HAS_NV12TORGB565ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { NV12ToRGB565Row = NV12ToRGB565Row_Any_MSA; if (IS_ALIGNED(width, 8)) { NV12ToRGB565Row = NV12ToRGB565Row_MSA; } } #endif for (y = 0; y < height; ++y) { NV12ToRGB565Row(src_y, src_uv, dst_rgb565, &kYuvI601Constants, width); dst_rgb565 += dst_stride_rgb565; src_y += src_stride_y; if (y & 1) { src_uv += src_stride_uv; } } return 0; } // Convert RAW to RGB24. LIBYUV_API int RAWToRGB24(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_rgb24, int dst_stride_rgb24, int width, int height) { int y; void (*RAWToRGB24Row)(const uint8_t* src_rgb, uint8_t* dst_rgb24, int width) = RAWToRGB24Row_C; if (!src_raw || !dst_rgb24 || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_raw = src_raw + (height - 1) * src_stride_raw; src_stride_raw = -src_stride_raw; } // Coalesce rows. if (src_stride_raw == width * 3 && dst_stride_rgb24 == width * 3) { width *= height; height = 1; src_stride_raw = dst_stride_rgb24 = 0; } #if defined(HAS_RAWTORGB24ROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { RAWToRGB24Row = RAWToRGB24Row_Any_SSSE3; if (IS_ALIGNED(width, 8)) { RAWToRGB24Row = RAWToRGB24Row_SSSE3; } } #endif #if defined(HAS_RAWTORGB24ROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { RAWToRGB24Row = RAWToRGB24Row_Any_NEON; if (IS_ALIGNED(width, 8)) { RAWToRGB24Row = RAWToRGB24Row_NEON; } } #endif #if defined(HAS_RAWTORGB24ROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { RAWToRGB24Row = RAWToRGB24Row_Any_MSA; if (IS_ALIGNED(width, 16)) { RAWToRGB24Row = RAWToRGB24Row_MSA; } } #endif for (y = 0; y < height; ++y) { RAWToRGB24Row(src_raw, dst_rgb24, width); src_raw += src_stride_raw; dst_rgb24 += dst_stride_rgb24; } return 0; } LIBYUV_API void SetPlane(uint8_t* dst_y, int dst_stride_y, int width, int height, uint32_t value) { int y; void (*SetRow)(uint8_t * dst, uint8_t value, int width) = SetRow_C; if (height < 0) { height = -height; dst_y = dst_y + (height - 1) * dst_stride_y; dst_stride_y = -dst_stride_y; } // Coalesce rows. if (dst_stride_y == width) { width *= height; height = 1; dst_stride_y = 0; } #if defined(HAS_SETROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SetRow = SetRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SetRow = SetRow_NEON; } } #endif #if defined(HAS_SETROW_X86) if (TestCpuFlag(kCpuHasX86)) { SetRow = SetRow_Any_X86; if (IS_ALIGNED(width, 4)) { SetRow = SetRow_X86; } } #endif #if defined(HAS_SETROW_ERMS) if (TestCpuFlag(kCpuHasERMS)) { SetRow = SetRow_ERMS; } #endif #if defined(HAS_SETROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 16)) { SetRow = SetRow_MSA; } #endif // Set plane for (y = 0; y < height; ++y) { SetRow(dst_y, value, width); dst_y += dst_stride_y; } } // Draw a rectangle into I420 LIBYUV_API int I420Rect(uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int x, int y, int width, int height, int value_y, int value_u, int value_v) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; uint8_t* start_y = dst_y + y * dst_stride_y + x; uint8_t* start_u = dst_u + (y / 2) * dst_stride_u + (x / 2); uint8_t* start_v = dst_v + (y / 2) * dst_stride_v + (x / 2); if (!dst_y || !dst_u || !dst_v || width <= 0 || height == 0 || x < 0 || y < 0 || value_y < 0 || value_y > 255 || value_u < 0 || value_u > 255 || value_v < 0 || value_v > 255) { return -1; } SetPlane(start_y, dst_stride_y, width, height, value_y); SetPlane(start_u, dst_stride_u, halfwidth, halfheight, value_u); SetPlane(start_v, dst_stride_v, halfwidth, halfheight, value_v); return 0; } // Draw a rectangle into ARGB LIBYUV_API int ARGBRect(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height, uint32_t value) { int y; void (*ARGBSetRow)(uint8_t * dst_argb, uint32_t value, int width) = ARGBSetRow_C; if (!dst_argb || width <= 0 || height == 0 || dst_x < 0 || dst_y < 0) { return -1; } if (height < 0) { height = -height; dst_argb = dst_argb + (height - 1) * dst_stride_argb; dst_stride_argb = -dst_stride_argb; } dst_argb += dst_y * dst_stride_argb + dst_x * 4; // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_ARGBSETROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBSetRow = ARGBSetRow_Any_NEON; if (IS_ALIGNED(width, 4)) { ARGBSetRow = ARGBSetRow_NEON; } } #endif #if defined(HAS_ARGBSETROW_X86) if (TestCpuFlag(kCpuHasX86)) { ARGBSetRow = ARGBSetRow_X86; } #endif #if defined(HAS_ARGBSETROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBSetRow = ARGBSetRow_Any_MSA; if (IS_ALIGNED(width, 4)) { ARGBSetRow = ARGBSetRow_MSA; } } #endif // Set plane for (y = 0; y < height; ++y) { ARGBSetRow(dst_argb, value, width); dst_argb += dst_stride_argb; } return 0; } // Convert unattentuated ARGB to preattenuated ARGB. // An unattenutated ARGB alpha blend uses the formula // p = a * f + (1 - a) * b // where // p is output pixel // f is foreground pixel // b is background pixel // a is alpha value from foreground pixel // An preattenutated ARGB alpha blend uses the formula // p = f + (1 - a) * b // where // f is foreground pixel premultiplied by alpha LIBYUV_API int ARGBAttenuate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBAttenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBAttenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBATTENUATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_SSSE3; if (IS_ALIGNED(width, 4)) { ARGBAttenuateRow = ARGBAttenuateRow_SSSE3; } } #endif #if defined(HAS_ARGBATTENUATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_AVX2; } } #endif #if defined(HAS_ARGBATTENUATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_NEON; } } #endif #if defined(HAS_ARGBATTENUATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBAttenuateRow = ARGBAttenuateRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBAttenuateRow = ARGBAttenuateRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBAttenuateRow(src_argb, dst_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Convert preattentuated ARGB to unattenuated ARGB. LIBYUV_API int ARGBUnattenuate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBUnattenuateRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBUnattenuateRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBUNATTENUATEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBUnattenuateRow = ARGBUnattenuateRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBUnattenuateRow = ARGBUnattenuateRow_SSE2; } } #endif #if defined(HAS_ARGBUNATTENUATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBUnattenuateRow = ARGBUnattenuateRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBUnattenuateRow = ARGBUnattenuateRow_AVX2; } } #endif // TODO(fbarchard): Neon version. for (y = 0; y < height; ++y) { ARGBUnattenuateRow(src_argb, dst_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Convert ARGB to Grayed ARGB. LIBYUV_API int ARGBGrayTo(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBGrayRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBGRAYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_SSSE3; } #endif #if defined(HAS_ARGBGRAYROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_NEON; } #endif #if defined(HAS_ARGBGRAYROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBGrayRow(src_argb, dst_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Make a rectangle of ARGB gray scale. LIBYUV_API int ARGBGray(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height) { int y; void (*ARGBGrayRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBGrayRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_ARGBGRAYROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_SSSE3; } #endif #if defined(HAS_ARGBGRAYROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_NEON; } #endif #if defined(HAS_ARGBGRAYROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { ARGBGrayRow = ARGBGrayRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBGrayRow(dst, dst, width); dst += dst_stride_argb; } return 0; } // Make a rectangle of ARGB Sepia tone. LIBYUV_API int ARGBSepia(uint8_t* dst_argb, int dst_stride_argb, int dst_x, int dst_y, int width, int height) { int y; void (*ARGBSepiaRow)(uint8_t * dst_argb, int width) = ARGBSepiaRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_ARGBSEPIAROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { ARGBSepiaRow = ARGBSepiaRow_SSSE3; } #endif #if defined(HAS_ARGBSEPIAROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBSepiaRow = ARGBSepiaRow_NEON; } #endif #if defined(HAS_ARGBSEPIAROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { ARGBSepiaRow = ARGBSepiaRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBSepiaRow(dst, width); dst += dst_stride_argb; } return 0; } // Apply a 4x4 matrix to each ARGB pixel. // Note: Normally for shading, but can be used to swizzle or invert. LIBYUV_API int ARGBColorMatrix(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const int8_t* matrix_argb, int width, int height) { int y; void (*ARGBColorMatrixRow)(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) = ARGBColorMatrixRow_C; if (!src_argb || !dst_argb || !matrix_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBCOLORMATRIXROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 8)) { ARGBColorMatrixRow = ARGBColorMatrixRow_SSSE3; } #endif #if defined(HAS_ARGBCOLORMATRIXROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBColorMatrixRow = ARGBColorMatrixRow_NEON; } #endif #if defined(HAS_ARGBCOLORMATRIXROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { ARGBColorMatrixRow = ARGBColorMatrixRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBColorMatrixRow(src_argb, dst_argb, matrix_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Apply a 4x3 matrix to each ARGB pixel. // Deprecated. LIBYUV_API int RGBColorMatrix(uint8_t* dst_argb, int dst_stride_argb, const int8_t* matrix_rgb, int dst_x, int dst_y, int width, int height) { SIMD_ALIGNED(int8_t matrix_argb[16]); uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !matrix_rgb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } // Convert 4x3 7 bit matrix to 4x4 6 bit matrix. matrix_argb[0] = matrix_rgb[0] / 2; matrix_argb[1] = matrix_rgb[1] / 2; matrix_argb[2] = matrix_rgb[2] / 2; matrix_argb[3] = matrix_rgb[3] / 2; matrix_argb[4] = matrix_rgb[4] / 2; matrix_argb[5] = matrix_rgb[5] / 2; matrix_argb[6] = matrix_rgb[6] / 2; matrix_argb[7] = matrix_rgb[7] / 2; matrix_argb[8] = matrix_rgb[8] / 2; matrix_argb[9] = matrix_rgb[9] / 2; matrix_argb[10] = matrix_rgb[10] / 2; matrix_argb[11] = matrix_rgb[11] / 2; matrix_argb[14] = matrix_argb[13] = matrix_argb[12] = 0; matrix_argb[15] = 64; // 1.0 return ARGBColorMatrix((const uint8_t*)(dst), dst_stride_argb, dst, dst_stride_argb, &matrix_argb[0], width, height); } // Apply a color table each ARGB pixel. // Table contains 256 ARGB values. LIBYUV_API int ARGBColorTable(uint8_t* dst_argb, int dst_stride_argb, const uint8_t* table_argb, int dst_x, int dst_y, int width, int height) { int y; void (*ARGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = ARGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_ARGBCOLORTABLEROW_X86) if (TestCpuFlag(kCpuHasX86)) { ARGBColorTableRow = ARGBColorTableRow_X86; } #endif for (y = 0; y < height; ++y) { ARGBColorTableRow(dst, table_argb, width); dst += dst_stride_argb; } return 0; } // Apply a color table each ARGB pixel but preserve destination alpha. // Table contains 256 ARGB values. LIBYUV_API int RGBColorTable(uint8_t* dst_argb, int dst_stride_argb, const uint8_t* table_argb, int dst_x, int dst_y, int width, int height) { int y; void (*RGBColorTableRow)(uint8_t * dst_argb, const uint8_t* table_argb, int width) = RGBColorTableRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || !table_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0) { return -1; } // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_RGBCOLORTABLEROW_X86) if (TestCpuFlag(kCpuHasX86)) { RGBColorTableRow = RGBColorTableRow_X86; } #endif for (y = 0; y < height; ++y) { RGBColorTableRow(dst, table_argb, width); dst += dst_stride_argb; } return 0; } // ARGBQuantize is used to posterize art. // e.g. rgb / qvalue * qvalue + qvalue / 2 // But the low levels implement efficiently with 3 parameters, and could be // used for other high level operations. // dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; // where scale is 1 / interval_size as a fixed point value. // The divide is replaces with a multiply by reciprocal fixed point multiply. // Caveat - although SSE2 saturates, the C function does not and should be used // with care if doing anything but quantization. LIBYUV_API int ARGBQuantize(uint8_t* dst_argb, int dst_stride_argb, int scale, int interval_size, int interval_offset, int dst_x, int dst_y, int width, int height) { int y; void (*ARGBQuantizeRow)(uint8_t * dst_argb, int scale, int interval_size, int interval_offset, int width) = ARGBQuantizeRow_C; uint8_t* dst = dst_argb + dst_y * dst_stride_argb + dst_x * 4; if (!dst_argb || width <= 0 || height <= 0 || dst_x < 0 || dst_y < 0 || interval_size < 1 || interval_size > 255) { return -1; } // Coalesce rows. if (dst_stride_argb == width * 4) { width *= height; height = 1; dst_stride_argb = 0; } #if defined(HAS_ARGBQUANTIZEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) { ARGBQuantizeRow = ARGBQuantizeRow_SSE2; } #endif #if defined(HAS_ARGBQUANTIZEROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBQuantizeRow = ARGBQuantizeRow_NEON; } #endif #if defined(HAS_ARGBQUANTIZEROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 8)) { ARGBQuantizeRow = ARGBQuantizeRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBQuantizeRow(dst, scale, interval_size, interval_offset, width); dst += dst_stride_argb; } return 0; } // Computes table of cumulative sum for image where the value is the sum // of all values above and to the left of the entry. Used by ARGBBlur. LIBYUV_API int ARGBComputeCumulativeSum(const uint8_t* src_argb, int src_stride_argb, int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height) { int y; void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width) = ComputeCumulativeSumRow_C; int32_t* previous_cumsum = dst_cumsum; if (!dst_cumsum || !src_argb || width <= 0 || height <= 0) { return -1; } #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; } #endif memset(dst_cumsum, 0, width * sizeof(dst_cumsum[0]) * 4); // 4 int per pixel. for (y = 0; y < height; ++y) { ComputeCumulativeSumRow(src_argb, dst_cumsum, previous_cumsum, width); previous_cumsum = dst_cumsum; dst_cumsum += dst_stride32_cumsum; src_argb += src_stride_argb; } return 0; } // Blur ARGB image. // Caller should allocate CumulativeSum table of width * height * 16 bytes // aligned to 16 byte boundary. height can be radius * 2 + 2 to save memory // as the buffer is treated as circular. LIBYUV_API int ARGBBlur(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int32_t* dst_cumsum, int dst_stride32_cumsum, int width, int height, int radius) { int y; void (*ComputeCumulativeSumRow)(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width) = ComputeCumulativeSumRow_C; void (*CumulativeSumToAverageRow)( const int32_t* topleft, const int32_t* botleft, int width, int area, uint8_t* dst, int count) = CumulativeSumToAverageRow_C; int32_t* cumsum_bot_row; int32_t* max_cumsum_bot_row; int32_t* cumsum_top_row; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } if (radius > height) { radius = height; } if (radius > (width / 2 - 1)) { radius = width / 2 - 1; } if (radius <= 0) { return -1; } #if defined(HAS_CUMULATIVESUMTOAVERAGEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ComputeCumulativeSumRow = ComputeCumulativeSumRow_SSE2; CumulativeSumToAverageRow = CumulativeSumToAverageRow_SSE2; } #endif // Compute enough CumulativeSum for first row to be blurred. After this // one row of CumulativeSum is updated at a time. ARGBComputeCumulativeSum(src_argb, src_stride_argb, dst_cumsum, dst_stride32_cumsum, width, radius); src_argb = src_argb + radius * src_stride_argb; cumsum_bot_row = &dst_cumsum[(radius - 1) * dst_stride32_cumsum]; max_cumsum_bot_row = &dst_cumsum[(radius * 2 + 2) * dst_stride32_cumsum]; cumsum_top_row = &dst_cumsum[0]; for (y = 0; y < height; ++y) { int top_y = ((y - radius - 1) >= 0) ? (y - radius - 1) : 0; int bot_y = ((y + radius) < height) ? (y + radius) : (height - 1); int area = radius * (bot_y - top_y); int boxwidth = radius * 4; int x; int n; // Increment cumsum_top_row pointer with circular buffer wrap around. if (top_y) { cumsum_top_row += dst_stride32_cumsum; if (cumsum_top_row >= max_cumsum_bot_row) { cumsum_top_row = dst_cumsum; } } // Increment cumsum_bot_row pointer with circular buffer wrap around and // then fill in a row of CumulativeSum. if ((y + radius) < height) { const int32_t* prev_cumsum_bot_row = cumsum_bot_row; cumsum_bot_row += dst_stride32_cumsum; if (cumsum_bot_row >= max_cumsum_bot_row) { cumsum_bot_row = dst_cumsum; } ComputeCumulativeSumRow(src_argb, cumsum_bot_row, prev_cumsum_bot_row, width); src_argb += src_stride_argb; } // Left clipped. for (x = 0; x < radius + 1; ++x) { CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, &dst_argb[x * 4], 1); area += (bot_y - top_y); boxwidth += 4; } // Middle unclipped. n = (width - 1) - radius - x + 1; CumulativeSumToAverageRow(cumsum_top_row, cumsum_bot_row, boxwidth, area, &dst_argb[x * 4], n); // Right clipped. for (x += n; x <= width - 1; ++x) { area -= (bot_y - top_y); boxwidth -= 4; CumulativeSumToAverageRow(cumsum_top_row + (x - radius - 1) * 4, cumsum_bot_row + (x - radius - 1) * 4, boxwidth, area, &dst_argb[x * 4], 1); } dst_argb += dst_stride_argb; } return 0; } // Multiply ARGB image by a specified ARGB value. LIBYUV_API int ARGBShade(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height, uint32_t value) { int y; void (*ARGBShadeRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) = ARGBShadeRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0 || value == 0u) { return -1; } if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBSHADEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 4)) { ARGBShadeRow = ARGBShadeRow_SSE2; } #endif #if defined(HAS_ARGBSHADEROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { ARGBShadeRow = ARGBShadeRow_NEON; } #endif #if defined(HAS_ARGBSHADEROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 4)) { ARGBShadeRow = ARGBShadeRow_MSA; } #endif for (y = 0; y < height; ++y) { ARGBShadeRow(src_argb, dst_argb, width, value); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Interpolate 2 planes by specified amount (0 to 255). LIBYUV_API int InterpolatePlane(const uint8_t* src0, int src_stride0, const uint8_t* src1, int src_stride1, uint8_t* dst, int dst_stride, int width, int height, int interpolation) { int y; void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src0 || !src1 || !dst || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; dst = dst + (height - 1) * dst_stride; dst_stride = -dst_stride; } // Coalesce rows. if (src_stride0 == width && src_stride1 == width && dst_stride == width) { width *= height; height = 1; src_stride0 = src_stride1 = dst_stride = 0; } #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif for (y = 0; y < height; ++y) { InterpolateRow(dst, src0, src1 - src0, width, interpolation); src0 += src_stride0; src1 += src_stride1; dst += dst_stride; } return 0; } // Interpolate 2 ARGB images by specified amount (0 to 255). LIBYUV_API int ARGBInterpolate(const uint8_t* src_argb0, int src_stride_argb0, const uint8_t* src_argb1, int src_stride_argb1, uint8_t* dst_argb, int dst_stride_argb, int width, int height, int interpolation) { return InterpolatePlane(src_argb0, src_stride_argb0, src_argb1, src_stride_argb1, dst_argb, dst_stride_argb, width * 4, height, interpolation); } // Interpolate 2 YUV images by specified amount (0 to 255). LIBYUV_API int I420Interpolate(const uint8_t* src0_y, int src0_stride_y, const uint8_t* src0_u, int src0_stride_u, const uint8_t* src0_v, int src0_stride_v, const uint8_t* src1_y, int src1_stride_y, const uint8_t* src1_u, int src1_stride_u, const uint8_t* src1_v, int src1_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, int interpolation) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src0_y || !src0_u || !src0_v || !src1_y || !src1_u || !src1_v || !dst_y || !dst_u || !dst_v || width <= 0 || height == 0) { return -1; } InterpolatePlane(src0_y, src0_stride_y, src1_y, src1_stride_y, dst_y, dst_stride_y, width, height, interpolation); InterpolatePlane(src0_u, src0_stride_u, src1_u, src1_stride_u, dst_u, dst_stride_u, halfwidth, halfheight, interpolation); InterpolatePlane(src0_v, src0_stride_v, src1_v, src1_stride_v, dst_v, dst_stride_v, halfwidth, halfheight, interpolation); return 0; } // Shuffle ARGB channel order. e.g. BGRA to ARGB. LIBYUV_API int ARGBShuffle(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_argb, int dst_stride_argb, const uint8_t* shuffler, int width, int height) { int y; void (*ARGBShuffleRow)(const uint8_t* src_bgra, uint8_t* dst_argb, const uint8_t* shuffler, int width) = ARGBShuffleRow_C; if (!src_bgra || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_bgra = src_bgra + (height - 1) * src_stride_bgra; src_stride_bgra = -src_stride_bgra; } // Coalesce rows. if (src_stride_bgra == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_bgra = dst_stride_argb = 0; } #if defined(HAS_ARGBSHUFFLEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBShuffleRow = ARGBShuffleRow_Any_SSSE3; if (IS_ALIGNED(width, 8)) { ARGBShuffleRow = ARGBShuffleRow_SSSE3; } } #endif #if defined(HAS_ARGBSHUFFLEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBShuffleRow = ARGBShuffleRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGBShuffleRow = ARGBShuffleRow_AVX2; } } #endif #if defined(HAS_ARGBSHUFFLEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBShuffleRow = ARGBShuffleRow_Any_NEON; if (IS_ALIGNED(width, 4)) { ARGBShuffleRow = ARGBShuffleRow_NEON; } } #endif #if defined(HAS_ARGBSHUFFLEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBShuffleRow = ARGBShuffleRow_Any_MSA; if (IS_ALIGNED(width, 8)) { ARGBShuffleRow = ARGBShuffleRow_MSA; } } #endif for (y = 0; y < height; ++y) { ARGBShuffleRow(src_bgra, dst_argb, shuffler, width); src_bgra += src_stride_bgra; dst_argb += dst_stride_argb; } return 0; } // Sobel ARGB effect. static int ARGBSobelize(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height, void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst, int width)) { int y; void (*ARGBToYJRow)(const uint8_t* src_argb, uint8_t* dst_g, int width) = ARGBToYJRow_C; void (*SobelYRow)(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) = SobelYRow_C; void (*SobelXRow)(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobely, int width) = SobelXRow_C; const int kEdge = 16; // Extra pixels at start of row for extrude/align. if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } #if defined(HAS_ARGBTOYJROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ARGBToYJRow = ARGBToYJRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_SSSE3; } } #endif #if defined(HAS_ARGBTOYJROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBToYJRow = ARGBToYJRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { ARGBToYJRow = ARGBToYJRow_AVX2; } } #endif #if defined(HAS_ARGBTOYJROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBToYJRow = ARGBToYJRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ARGBToYJRow = ARGBToYJRow_NEON; } } #endif #if defined(HAS_ARGBTOYJROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBToYJRow = ARGBToYJRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBToYJRow = ARGBToYJRow_MSA; } } #endif #if defined(HAS_SOBELYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelYRow = SobelYRow_SSE2; } #endif #if defined(HAS_SOBELYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SobelYRow = SobelYRow_NEON; } #endif #if defined(HAS_SOBELYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SobelYRow = SobelYRow_MSA; } #endif #if defined(HAS_SOBELXROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXRow = SobelXRow_SSE2; } #endif #if defined(HAS_SOBELXROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SobelXRow = SobelXRow_NEON; } #endif #if defined(HAS_SOBELXROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SobelXRow = SobelXRow_MSA; } #endif { // 3 rows with edges before/after. const int kRowSize = (width + kEdge + 31) & ~31; align_buffer_64(rows, kRowSize * 2 + (kEdge + kRowSize * 3 + kEdge)); uint8_t* row_sobelx = rows; uint8_t* row_sobely = rows + kRowSize; uint8_t* row_y = rows + kRowSize * 2; // Convert first row. uint8_t* row_y0 = row_y + kEdge; uint8_t* row_y1 = row_y0 + kRowSize; uint8_t* row_y2 = row_y1 + kRowSize; ARGBToYJRow(src_argb, row_y0, width); row_y0[-1] = row_y0[0]; memset(row_y0 + width, row_y0[width - 1], 16); // Extrude 16 for valgrind. ARGBToYJRow(src_argb, row_y1, width); row_y1[-1] = row_y1[0]; memset(row_y1 + width, row_y1[width - 1], 16); memset(row_y2 + width, 0, 16); for (y = 0; y < height; ++y) { // Convert next row of ARGB to G. if (y < (height - 1)) { src_argb += src_stride_argb; } ARGBToYJRow(src_argb, row_y2, width); row_y2[-1] = row_y2[0]; row_y2[width] = row_y2[width - 1]; SobelXRow(row_y0 - 1, row_y1 - 1, row_y2 - 1, row_sobelx, width); SobelYRow(row_y0 - 1, row_y2 - 1, row_sobely, width); SobelRow(row_sobelx, row_sobely, dst_argb, width); // Cycle thru circular queue of 3 row_y buffers. { uint8_t* row_yt = row_y0; row_y0 = row_y1; row_y1 = row_y2; row_y2 = row_yt; } dst_argb += dst_stride_argb; } free_aligned_buffer_64(rows); } return 0; } // Sobel ARGB effect. LIBYUV_API int ARGBSobel(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { void (*SobelRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) = SobelRow_C; #if defined(HAS_SOBELROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelRow = SobelRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SobelRow = SobelRow_SSE2; } } #endif #if defined(HAS_SOBELROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SobelRow = SobelRow_Any_NEON; if (IS_ALIGNED(width, 8)) { SobelRow = SobelRow_NEON; } } #endif #if defined(HAS_SOBELROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SobelRow = SobelRow_Any_MSA; if (IS_ALIGNED(width, 16)) { SobelRow = SobelRow_MSA; } } #endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelRow); } // Sobel ARGB effect with planar output. LIBYUV_API int ARGBSobelToPlane(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_y, int dst_stride_y, int width, int height) { void (*SobelToPlaneRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_, int width) = SobelToPlaneRow_C; #if defined(HAS_SOBELTOPLANEROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelToPlaneRow = SobelToPlaneRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SobelToPlaneRow = SobelToPlaneRow_SSE2; } } #endif #if defined(HAS_SOBELTOPLANEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SobelToPlaneRow = SobelToPlaneRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SobelToPlaneRow = SobelToPlaneRow_NEON; } } #endif #if defined(HAS_SOBELTOPLANEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SobelToPlaneRow = SobelToPlaneRow_Any_MSA; if (IS_ALIGNED(width, 32)) { SobelToPlaneRow = SobelToPlaneRow_MSA; } } #endif return ARGBSobelize(src_argb, src_stride_argb, dst_y, dst_stride_y, width, height, SobelToPlaneRow); } // SobelXY ARGB effect. // Similar to Sobel, but also stores Sobel X in R and Sobel Y in B. G = Sobel. LIBYUV_API int ARGBSobelXY(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { void (*SobelXYRow)(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) = SobelXYRow_C; #if defined(HAS_SOBELXYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SobelXYRow = SobelXYRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SobelXYRow = SobelXYRow_SSE2; } } #endif #if defined(HAS_SOBELXYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SobelXYRow = SobelXYRow_Any_NEON; if (IS_ALIGNED(width, 8)) { SobelXYRow = SobelXYRow_NEON; } } #endif #if defined(HAS_SOBELXYROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SobelXYRow = SobelXYRow_Any_MSA; if (IS_ALIGNED(width, 16)) { SobelXYRow = SobelXYRow_MSA; } } #endif return ARGBSobelize(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height, SobelXYRow); } // Apply a 4x4 polynomial to each ARGB pixel. LIBYUV_API int ARGBPolynomial(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const float* poly, int width, int height) { int y; void (*ARGBPolynomialRow)(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) = ARGBPolynomialRow_C; if (!src_argb || !dst_argb || !poly || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBPOLYNOMIALROW_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(width, 2)) { ARGBPolynomialRow = ARGBPolynomialRow_SSE2; } #endif #if defined(HAS_ARGBPOLYNOMIALROW_AVX2) if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasFMA3) && IS_ALIGNED(width, 2)) { ARGBPolynomialRow = ARGBPolynomialRow_AVX2; } #endif for (y = 0; y < height; ++y) { ARGBPolynomialRow(src_argb, dst_argb, poly, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Convert plane of 16 bit shorts to half floats. // Source values are multiplied by scale before storing as half float. LIBYUV_API int HalfFloatPlane(const uint16_t* src_y, int src_stride_y, uint16_t* dst_y, int dst_stride_y, float scale, int width, int height) { int y; void (*HalfFloatRow)(const uint16_t* src, uint16_t* dst, float scale, int width) = HalfFloatRow_C; if (!src_y || !dst_y || width <= 0 || height == 0) { return -1; } src_stride_y >>= 1; dst_stride_y >>= 1; // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_y == width) { width *= height; height = 1; src_stride_y = dst_stride_y = 0; } #if defined(HAS_HALFFLOATROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { HalfFloatRow = HalfFloatRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { HalfFloatRow = HalfFloatRow_SSE2; } } #endif #if defined(HAS_HALFFLOATROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { HalfFloatRow = HalfFloatRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { HalfFloatRow = HalfFloatRow_AVX2; } } #endif #if defined(HAS_HALFFLOATROW_F16C) if (TestCpuFlag(kCpuHasAVX2) && TestCpuFlag(kCpuHasF16C)) { HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_Any_F16C : HalfFloatRow_Any_F16C; if (IS_ALIGNED(width, 16)) { HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_F16C : HalfFloatRow_F16C; } } #endif #if defined(HAS_HALFFLOATROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_Any_NEON : HalfFloatRow_Any_NEON; if (IS_ALIGNED(width, 8)) { HalfFloatRow = (scale == 1.0f) ? HalfFloat1Row_NEON : HalfFloatRow_NEON; } } #endif #if defined(HAS_HALFFLOATROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { HalfFloatRow = HalfFloatRow_Any_MSA; if (IS_ALIGNED(width, 32)) { HalfFloatRow = HalfFloatRow_MSA; } } #endif for (y = 0; y < height; ++y) { HalfFloatRow(src_y, dst_y, scale, width); src_y += src_stride_y; dst_y += dst_stride_y; } return 0; } // Convert a buffer of bytes to floats, scale the values and store as floats. LIBYUV_API int ByteToFloat(const uint8_t* src_y, float* dst_y, float scale, int width) { void (*ByteToFloatRow)(const uint8_t* src, float* dst, float scale, int width) = ByteToFloatRow_C; if (!src_y || !dst_y || width <= 0) { return -1; } #if defined(HAS_BYTETOFLOATROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ByteToFloatRow = ByteToFloatRow_Any_NEON; if (IS_ALIGNED(width, 8)) { ByteToFloatRow = ByteToFloatRow_NEON; } } #endif ByteToFloatRow(src_y, dst_y, scale, width); return 0; } // Apply a lumacolortable to each ARGB pixel. LIBYUV_API int ARGBLumaColorTable(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, const uint8_t* luma, int width, int height) { int y; void (*ARGBLumaColorTableRow)( const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, const uint32_t lumacoeff) = ARGBLumaColorTableRow_C; if (!src_argb || !dst_argb || !luma || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBLUMACOLORTABLEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 4)) { ARGBLumaColorTableRow = ARGBLumaColorTableRow_SSSE3; } #endif for (y = 0; y < height; ++y) { ARGBLumaColorTableRow(src_argb, dst_argb, width, luma, 0x00264b0f); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Copy Alpha from one ARGB image to another. LIBYUV_API int ARGBCopyAlpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBCopyAlphaRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBCopyAlphaRow_C; if (!src_argb || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_argb = dst_stride_argb = 0; } #if defined(HAS_ARGBCOPYALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGBCopyAlphaRow = ARGBCopyAlphaRow_SSE2; } } #endif #if defined(HAS_ARGBCOPYALPHAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBCopyAlphaRow = ARGBCopyAlphaRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGBCopyAlphaRow = ARGBCopyAlphaRow_AVX2; } } #endif for (y = 0; y < height; ++y) { ARGBCopyAlphaRow(src_argb, dst_argb, width); src_argb += src_stride_argb; dst_argb += dst_stride_argb; } return 0; } // Extract just the alpha channel from ARGB. LIBYUV_API int ARGBExtractAlpha(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_a, int dst_stride_a, int width, int height) { if (!src_argb || !dst_a || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb += (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } // Coalesce rows. if (src_stride_argb == width * 4 && dst_stride_a == width) { width *= height; height = 1; src_stride_argb = dst_stride_a = 0; } void (*ARGBExtractAlphaRow)(const uint8_t* src_argb, uint8_t* dst_a, int width) = ARGBExtractAlphaRow_C; #if defined(HAS_ARGBEXTRACTALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 8) ? ARGBExtractAlphaRow_SSE2 : ARGBExtractAlphaRow_Any_SSE2; } #endif #if defined(HAS_ARGBEXTRACTALPHAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 32) ? ARGBExtractAlphaRow_AVX2 : ARGBExtractAlphaRow_Any_AVX2; } #endif #if defined(HAS_ARGBEXTRACTALPHAROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_NEON : ARGBExtractAlphaRow_Any_NEON; } #endif #if defined(HAS_ARGBEXTRACTALPHAROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBExtractAlphaRow = IS_ALIGNED(width, 16) ? ARGBExtractAlphaRow_MSA : ARGBExtractAlphaRow_Any_MSA; } #endif for (int y = 0; y < height; ++y) { ARGBExtractAlphaRow(src_argb, dst_a, width); src_argb += src_stride_argb; dst_a += dst_stride_a; } return 0; } // Copy a planar Y channel to the alpha channel of a destination ARGB image. LIBYUV_API int ARGBCopyYToAlpha(const uint8_t* src_y, int src_stride_y, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int y; void (*ARGBCopyYToAlphaRow)(const uint8_t* src_y, uint8_t* dst_argb, int width) = ARGBCopyYToAlphaRow_C; if (!src_y || !dst_argb || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_y = src_y + (height - 1) * src_stride_y; src_stride_y = -src_stride_y; } // Coalesce rows. if (src_stride_y == width && dst_stride_argb == width * 4) { width *= height; height = 1; src_stride_y = dst_stride_argb = 0; } #if defined(HAS_ARGBCOPYYTOALPHAROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_SSE2; if (IS_ALIGNED(width, 8)) { ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_SSE2; } } #endif #if defined(HAS_ARGBCOPYYTOALPHAROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_Any_AVX2; if (IS_ALIGNED(width, 16)) { ARGBCopyYToAlphaRow = ARGBCopyYToAlphaRow_AVX2; } } #endif for (y = 0; y < height; ++y) { ARGBCopyYToAlphaRow(src_y, dst_argb, width); src_y += src_stride_y; dst_argb += dst_stride_argb; } return 0; } // TODO(fbarchard): Consider if width is even Y channel can be split // directly. A SplitUVRow_Odd function could copy the remaining chroma. LIBYUV_API int YUY2ToNV12(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_yuy2 || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_yuy2 = src_yuy2 + (height - 1) * src_stride_yuy2; src_stride_yuy2 = -src_stride_yuy2; } #if defined(HAS_SPLITUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SplitUVRow = SplitUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_SSE2; } } #endif #if defined(HAS_SPLITUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { SplitUVRow = SplitUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_AVX2; } } #endif #if defined(HAS_SPLITUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SplitUVRow = SplitUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_NEON; } } #endif #if defined(HAS_SPLITUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SplitUVRow = SplitUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_MSA; } } #endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif { int awidth = halfwidth * 2; // row of y and 2 rows of uv align_buffer_64(rows, awidth * 3); for (y = 0; y < height - 1; y += 2) { // Split Y from UV. SplitUVRow(src_yuy2, rows, rows + awidth, awidth); memcpy(dst_y, rows, width); SplitUVRow(src_yuy2 + src_stride_yuy2, rows, rows + awidth * 2, awidth); memcpy(dst_y + dst_stride_y, rows, width); InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); src_yuy2 += src_stride_yuy2 * 2; dst_y += dst_stride_y * 2; dst_uv += dst_stride_uv; } if (height & 1) { // Split Y from UV. SplitUVRow(src_yuy2, rows, dst_uv, awidth); memcpy(dst_y, rows, width); } free_aligned_buffer_64(rows); } return 0; } LIBYUV_API int UYVYToNV12(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_uv, int dst_stride_uv, int width, int height) { int y; int halfwidth = (width + 1) >> 1; void (*SplitUVRow)(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) = SplitUVRow_C; void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; if (!src_uyvy || !dst_y || !dst_uv || width <= 0 || height == 0) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_uyvy = src_uyvy + (height - 1) * src_stride_uyvy; src_stride_uyvy = -src_stride_uyvy; } #if defined(HAS_SPLITUVROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { SplitUVRow = SplitUVRow_Any_SSE2; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_SSE2; } } #endif #if defined(HAS_SPLITUVROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { SplitUVRow = SplitUVRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_AVX2; } } #endif #if defined(HAS_SPLITUVROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { SplitUVRow = SplitUVRow_Any_NEON; if (IS_ALIGNED(width, 16)) { SplitUVRow = SplitUVRow_NEON; } } #endif #if defined(HAS_SPLITUVROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { SplitUVRow = SplitUVRow_Any_MSA; if (IS_ALIGNED(width, 32)) { SplitUVRow = SplitUVRow_MSA; } } #endif #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(width, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif { int awidth = halfwidth * 2; // row of y and 2 rows of uv align_buffer_64(rows, awidth * 3); for (y = 0; y < height - 1; y += 2) { // Split Y from UV. SplitUVRow(src_uyvy, rows + awidth, rows, awidth); memcpy(dst_y, rows, width); SplitUVRow(src_uyvy + src_stride_uyvy, rows + awidth * 2, rows, awidth); memcpy(dst_y + dst_stride_y, rows, width); InterpolateRow(dst_uv, rows + awidth, awidth, awidth, 128); src_uyvy += src_stride_uyvy * 2; dst_y += dst_stride_y * 2; dst_uv += dst_stride_uv; } if (height & 1) { // Split Y from UV. SplitUVRow(src_uyvy, dst_uv, rows, awidth); memcpy(dst_y, rows, width); } free_aligned_buffer_64(rows); } return 0; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate.cc000066400000000000000000000365731357355204000220200ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate.h" #include "libyuv/convert.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/rotate_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif LIBYUV_API void TransposePlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { int i = height; #if defined(HAS_TRANSPOSEWX16_MSA) void (*TransposeWx16)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx16_C; #else void (*TransposeWx8)(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) = TransposeWx8_C; #endif #if defined(HAS_TRANSPOSEWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeWx8 = TransposeWx8_NEON; } #endif #if defined(HAS_TRANSPOSEWX8_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { TransposeWx8 = TransposeWx8_Any_SSSE3; if (IS_ALIGNED(width, 8)) { TransposeWx8 = TransposeWx8_SSSE3; } } #endif #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { TransposeWx8 = TransposeWx8_Fast_Any_SSSE3; if (IS_ALIGNED(width, 16)) { TransposeWx8 = TransposeWx8_Fast_SSSE3; } } #endif #if defined(HAS_TRANSPOSEWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeWx16 = TransposeWx16_Any_MSA; if (IS_ALIGNED(width, 16)) { TransposeWx16 = TransposeWx16_MSA; } } #endif #if defined(HAS_TRANSPOSEWX16_MSA) // Work across the source in 16x16 tiles while (i >= 16) { TransposeWx16(src, src_stride, dst, dst_stride, width); src += 16 * src_stride; // Go down 16 rows. dst += 16; // Move over 16 columns. i -= 16; } #else // Work across the source in 8x8 tiles while (i >= 8) { TransposeWx8(src, src_stride, dst, dst_stride, width); src += 8 * src_stride; // Go down 8 rows. dst += 8; // Move over 8 columns. i -= 8; } #endif if (i > 0) { TransposeWxH_C(src, src_stride, dst, dst_stride, width, i); } } LIBYUV_API void RotatePlane90(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { // Rotate by 90 is a transpose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. src += src_stride * (height - 1); src_stride = -src_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } LIBYUV_API void RotatePlane270(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { // Rotate by 270 is a transpose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. dst += dst_stride * (width - 1); dst_stride = -dst_stride; TransposePlane(src, src_stride, dst, dst_stride, width, height); } LIBYUV_API void RotatePlane180(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width); const uint8_t* src_bot = src + src_stride * (height - 1); uint8_t* dst_bot = dst + dst_stride * (height - 1); int half_height = (height + 1) >> 1; int y; void (*MirrorRow)(const uint8_t* src, uint8_t* dst, int width) = MirrorRow_C; void (*CopyRow)(const uint8_t* src, uint8_t* dst, int width) = CopyRow_C; #if defined(HAS_MIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { MirrorRow = MirrorRow_Any_NEON; if (IS_ALIGNED(width, 16)) { MirrorRow = MirrorRow_NEON; } } #endif #if defined(HAS_MIRRORROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { MirrorRow = MirrorRow_Any_SSSE3; if (IS_ALIGNED(width, 16)) { MirrorRow = MirrorRow_SSSE3; } } #endif #if defined(HAS_MIRRORROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { MirrorRow = MirrorRow_Any_AVX2; if (IS_ALIGNED(width, 32)) { MirrorRow = MirrorRow_AVX2; } } #endif #if defined(HAS_MIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { MirrorRow = MirrorRow_Any_MSA; if (IS_ALIGNED(width, 64)) { MirrorRow = MirrorRow_MSA; } } #endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; } #endif #if defined(HAS_COPYROW_AVX) if (TestCpuFlag(kCpuHasAVX)) { CopyRow = IS_ALIGNED(width, 64) ? CopyRow_AVX : CopyRow_Any_AVX; } #endif #if defined(HAS_COPYROW_ERMS) if (TestCpuFlag(kCpuHasERMS)) { CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { CopyRow = IS_ALIGNED(width, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { MirrorRow(src, row, width); // Mirror first row into a buffer src += src_stride; MirrorRow(src_bot, dst, width); // Mirror last row into first row dst += dst_stride; CopyRow(row, dst_bot, width); // Copy first mirrored row into last src_bot -= src_stride; dst_bot -= dst_stride; } free_aligned_buffer_64(row); } LIBYUV_API void TransposeUV(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) { int i = height; #if defined(HAS_TRANSPOSEUVWX16_MSA) void (*TransposeUVWx16)(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx16_C; #else void (*TransposeUVWx8)(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) = TransposeUVWx8_C; #endif #if defined(HAS_TRANSPOSEUVWX8_NEON) if (TestCpuFlag(kCpuHasNEON)) { TransposeUVWx8 = TransposeUVWx8_NEON; } #endif #if defined(HAS_TRANSPOSEUVWX8_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { TransposeUVWx8 = TransposeUVWx8_Any_SSE2; if (IS_ALIGNED(width, 8)) { TransposeUVWx8 = TransposeUVWx8_SSE2; } } #endif #if defined(HAS_TRANSPOSEUVWX16_MSA) if (TestCpuFlag(kCpuHasMSA)) { TransposeUVWx16 = TransposeUVWx16_Any_MSA; if (IS_ALIGNED(width, 8)) { TransposeUVWx16 = TransposeUVWx16_MSA; } } #endif #if defined(HAS_TRANSPOSEUVWX16_MSA) // Work through the source in 8x8 tiles. while (i >= 16) { TransposeUVWx16(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width); src += 16 * src_stride; // Go down 16 rows. dst_a += 16; // Move over 8 columns. dst_b += 16; // Move over 8 columns. i -= 16; } #else // Work through the source in 8x8 tiles. while (i >= 8) { TransposeUVWx8(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width); src += 8 * src_stride; // Go down 8 rows. dst_a += 8; // Move over 8 columns. dst_b += 8; // Move over 8 columns. i -= 8; } #endif if (i > 0) { TransposeUVWxH_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, i); } } LIBYUV_API void RotateUV90(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) { src += src_stride * (height - 1); src_stride = -src_stride; TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, height); } LIBYUV_API void RotateUV270(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) { dst_a += dst_stride_a * (width - 1); dst_b += dst_stride_b * (width - 1); dst_stride_a = -dst_stride_a; dst_stride_b = -dst_stride_b; TransposeUV(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width, height); } // Rotate 180 is a horizontal and vertical flip. LIBYUV_API void RotateUV180(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) { int i; void (*MirrorUVRow)(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width) = MirrorUVRow_C; #if defined(HAS_MIRRORUVROW_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(width, 8)) { MirrorUVRow = MirrorUVRow_NEON; } #endif #if defined(HAS_MIRRORUVROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && IS_ALIGNED(width, 16)) { MirrorUVRow = MirrorUVRow_SSSE3; } #endif #if defined(HAS_MIRRORUVROW_MSA) if (TestCpuFlag(kCpuHasMSA) && IS_ALIGNED(width, 32)) { MirrorUVRow = MirrorUVRow_MSA; } #endif dst_a += dst_stride_a * (height - 1); dst_b += dst_stride_b * (height - 1); for (i = 0; i < height; ++i) { MirrorUVRow(src, dst_a, dst_b, width); src += src_stride; dst_a -= dst_stride_a; dst_b -= dst_stride_b; } } LIBYUV_API int RotatePlane(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height, enum RotationMode mode) { if (!src || width <= 0 || height == 0 || !dst) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src = src + (height - 1) * src_stride; src_stride = -src_stride; } switch (mode) { case kRotate0: // copy frame CopyPlane(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate90: RotatePlane90(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate270: RotatePlane270(src, src_stride, dst, dst_stride, width, height); return 0; case kRotate180: RotatePlane180(src, src_stride, dst, dst_stride, width, height); return 0; default: break; } return -1; } LIBYUV_API int I420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_y || !src_u || !src_v || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_u = src_u + (halfheight - 1) * src_stride_u; src_v = src_v + (halfheight - 1) * src_stride_v; src_stride_y = -src_stride_y; src_stride_u = -src_stride_u; src_stride_v = -src_stride_v; } switch (mode) { case kRotate0: // copy frame return I420Copy(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); case kRotate90: RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotatePlane90(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); RotatePlane90(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; case kRotate270: RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotatePlane270(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); RotatePlane270(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; case kRotate180: RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotatePlane180(src_u, src_stride_u, dst_u, dst_stride_u, halfwidth, halfheight); RotatePlane180(src_v, src_stride_v, dst_v, dst_stride_v, halfwidth, halfheight); return 0; default: break; } return -1; } LIBYUV_API int NV12ToI420Rotate(const uint8_t* src_y, int src_stride_y, const uint8_t* src_uv, int src_stride_uv, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int width, int height, enum RotationMode mode) { int halfwidth = (width + 1) >> 1; int halfheight = (height + 1) >> 1; if (!src_y || !src_uv || width <= 0 || height == 0 || !dst_y || !dst_u || !dst_v) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; halfheight = (height + 1) >> 1; src_y = src_y + (height - 1) * src_stride_y; src_uv = src_uv + (halfheight - 1) * src_stride_uv; src_stride_y = -src_stride_y; src_stride_uv = -src_stride_uv; } switch (mode) { case kRotate0: // copy frame return NV12ToI420(src_y, src_stride_y, src_uv, src_stride_uv, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, width, height); case kRotate90: RotatePlane90(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotateUV90(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; case kRotate270: RotatePlane270(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotateUV270(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; case kRotate180: RotatePlane180(src_y, src_stride_y, dst_y, dst_stride_y, width, height); RotateUV180(src_uv, src_stride_uv, dst_u, dst_stride_u, dst_v, dst_stride_v, halfwidth, halfheight); return 0; default: break; } return -1; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_any.cc000066400000000000000000000055541357355204000226620ustar00rootroot00000000000000/* * Copyright 2015 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate.h" #include "libyuv/rotate_row.h" #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define TANY(NAMEANY, TPOS_SIMD, MASK) \ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst, \ int dst_stride, int width) { \ int r = width & MASK; \ int n = width - r; \ if (n > 0) { \ TPOS_SIMD(src, src_stride, dst, dst_stride, n); \ } \ TransposeWx8_C(src + n, src_stride, dst + n * dst_stride, dst_stride, r); \ } #ifdef HAS_TRANSPOSEWX8_NEON TANY(TransposeWx8_Any_NEON, TransposeWx8_NEON, 7) #endif #ifdef HAS_TRANSPOSEWX8_SSSE3 TANY(TransposeWx8_Any_SSSE3, TransposeWx8_SSSE3, 7) #endif #ifdef HAS_TRANSPOSEWX8_FAST_SSSE3 TANY(TransposeWx8_Fast_Any_SSSE3, TransposeWx8_Fast_SSSE3, 15) #endif #ifdef HAS_TRANSPOSEWX16_MSA TANY(TransposeWx16_Any_MSA, TransposeWx16_MSA, 15) #endif #undef TANY #define TUVANY(NAMEANY, TPOS_SIMD, MASK) \ void NAMEANY(const uint8_t* src, int src_stride, uint8_t* dst_a, \ int dst_stride_a, uint8_t* dst_b, int dst_stride_b, \ int width) { \ int r = width & MASK; \ int n = width - r; \ if (n > 0) { \ TPOS_SIMD(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, n); \ } \ TransposeUVWx8_C(src + n * 2, src_stride, dst_a + n * dst_stride_a, \ dst_stride_a, dst_b + n * dst_stride_b, dst_stride_b, r); \ } #ifdef HAS_TRANSPOSEUVWX8_NEON TUVANY(TransposeUVWx8_Any_NEON, TransposeUVWx8_NEON, 7) #endif #ifdef HAS_TRANSPOSEUVWX8_SSE2 TUVANY(TransposeUVWx8_Any_SSE2, TransposeUVWx8_SSE2, 7) #endif #ifdef HAS_TRANSPOSEUVWX16_MSA TUVANY(TransposeUVWx16_Any_MSA, TransposeUVWx16_MSA, 7) #endif #undef TUVANY #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_argb.cc000066400000000000000000000160001357355204000227720ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate.h" #include "libyuv/convert.h" #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" #include "libyuv/row.h" #include "libyuv/scale_row.h" /* for ScaleARGBRowDownEven_ */ #ifdef __cplusplus namespace libyuv { extern "C" { #endif static void ARGBTranspose(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { int i; int src_pixel_step = src_stride_argb >> 2; void (*ScaleARGBRowDownEven)( const uint8_t* src_argb, ptrdiff_t src_stride_argb, int src_step, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDownEven_C; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_SSE2; if (IS_ALIGNED(height, 4)) { // Width of dest. ScaleARGBRowDownEven = ScaleARGBRowDownEven_SSE2; } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_NEON; if (IS_ALIGNED(height, 4)) { // Width of dest. ScaleARGBRowDownEven = ScaleARGBRowDownEven_NEON; } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleARGBRowDownEven = ScaleARGBRowDownEven_Any_MSA; if (IS_ALIGNED(height, 4)) { // Width of dest. ScaleARGBRowDownEven = ScaleARGBRowDownEven_MSA; } } #endif for (i = 0; i < width; ++i) { // column of source to row of dest. ScaleARGBRowDownEven(src_argb, 0, src_pixel_step, dst_argb, height); dst_argb += dst_stride_argb; src_argb += 4; } } void ARGBRotate90(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { // Rotate by 90 is a ARGBTranspose with the source read // from bottom to top. So set the source pointer to the end // of the buffer and flip the sign of the source stride. src_argb += src_stride_argb * (height - 1); src_stride_argb = -src_stride_argb; ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); } void ARGBRotate270(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { // Rotate by 270 is a ARGBTranspose with the destination written // from bottom to top. So set the destination pointer to the end // of the buffer and flip the sign of the destination stride. dst_argb += dst_stride_argb * (width - 1); dst_stride_argb = -dst_stride_argb; ARGBTranspose(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); } void ARGBRotate180(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height) { // Swap first and last row and mirror the content. Uses a temporary row. align_buffer_64(row, width * 4); const uint8_t* src_bot = src_argb + src_stride_argb * (height - 1); uint8_t* dst_bot = dst_argb + dst_stride_argb * (height - 1); int half_height = (height + 1) >> 1; int y; void (*ARGBMirrorRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = ARGBMirrorRow_C; void (*CopyRow)(const uint8_t* src_argb, uint8_t* dst_argb, int width) = CopyRow_C; #if defined(HAS_ARGBMIRRORROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ARGBMirrorRow = ARGBMirrorRow_Any_NEON; if (IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_NEON; } } #endif #if defined(HAS_ARGBMIRRORROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ARGBMirrorRow = ARGBMirrorRow_Any_SSE2; if (IS_ALIGNED(width, 4)) { ARGBMirrorRow = ARGBMirrorRow_SSE2; } } #endif #if defined(HAS_ARGBMIRRORROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ARGBMirrorRow = ARGBMirrorRow_Any_AVX2; if (IS_ALIGNED(width, 8)) { ARGBMirrorRow = ARGBMirrorRow_AVX2; } } #endif #if defined(HAS_ARGBMIRRORROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ARGBMirrorRow = ARGBMirrorRow_Any_MSA; if (IS_ALIGNED(width, 16)) { ARGBMirrorRow = ARGBMirrorRow_MSA; } } #endif #if defined(HAS_COPYROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_SSE2 : CopyRow_Any_SSE2; } #endif #if defined(HAS_COPYROW_AVX) if (TestCpuFlag(kCpuHasAVX)) { CopyRow = IS_ALIGNED(width * 4, 64) ? CopyRow_AVX : CopyRow_Any_AVX; } #endif #if defined(HAS_COPYROW_ERMS) if (TestCpuFlag(kCpuHasERMS)) { CopyRow = CopyRow_ERMS; } #endif #if defined(HAS_COPYROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { CopyRow = IS_ALIGNED(width * 4, 32) ? CopyRow_NEON : CopyRow_Any_NEON; } #endif // Odd height will harmlessly mirror the middle row twice. for (y = 0; y < half_height; ++y) { ARGBMirrorRow(src_argb, row, width); // Mirror first row into a buffer ARGBMirrorRow(src_bot, dst_argb, width); // Mirror last row into first row CopyRow(row, dst_bot, width * 4); // Copy first mirrored row into last src_argb += src_stride_argb; dst_argb += dst_stride_argb; src_bot -= src_stride_argb; dst_bot -= dst_stride_argb; } free_aligned_buffer_64(row); } LIBYUV_API int ARGBRotate(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_argb, int dst_stride_argb, int width, int height, enum RotationMode mode) { if (!src_argb || width <= 0 || height == 0 || !dst_argb) { return -1; } // Negative height means invert the image. if (height < 0) { height = -height; src_argb = src_argb + (height - 1) * src_stride_argb; src_stride_argb = -src_stride_argb; } switch (mode) { case kRotate0: // copy frame return ARGBCopy(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); case kRotate90: ARGBRotate90(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); return 0; case kRotate270: ARGBRotate270(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); return 0; case kRotate180: ARGBRotate180(src_argb, src_stride_argb, dst_argb, dst_stride_argb, width, height); return 0; default: break; } return -1; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_common.cc000066400000000000000000000060061357355204000233540ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif void TransposeWx8_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { int i; for (i = 0; i < width; ++i) { dst[0] = src[0 * src_stride]; dst[1] = src[1 * src_stride]; dst[2] = src[2 * src_stride]; dst[3] = src[3 * src_stride]; dst[4] = src[4 * src_stride]; dst[5] = src[5 * src_stride]; dst[6] = src[6 * src_stride]; dst[7] = src[7 * src_stride]; ++src; dst += dst_stride; } } void TransposeUVWx8_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { int i; for (i = 0; i < width; ++i) { dst_a[0] = src[0 * src_stride + 0]; dst_b[0] = src[0 * src_stride + 1]; dst_a[1] = src[1 * src_stride + 0]; dst_b[1] = src[1 * src_stride + 1]; dst_a[2] = src[2 * src_stride + 0]; dst_b[2] = src[2 * src_stride + 1]; dst_a[3] = src[3 * src_stride + 0]; dst_b[3] = src[3 * src_stride + 1]; dst_a[4] = src[4 * src_stride + 0]; dst_b[4] = src[4 * src_stride + 1]; dst_a[5] = src[5 * src_stride + 0]; dst_b[5] = src[5 * src_stride + 1]; dst_a[6] = src[6 * src_stride + 0]; dst_b[6] = src[6 * src_stride + 1]; dst_a[7] = src[7 * src_stride + 0]; dst_b[7] = src[7 * src_stride + 1]; src += 2; dst_a += dst_stride_a; dst_b += dst_stride_b; } } void TransposeWxH_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width, int height) { int i; for (i = 0; i < width; ++i) { int j; for (j = 0; j < height; ++j) { dst[i * dst_stride + j] = src[j * src_stride + i]; } } } void TransposeUVWxH_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width, int height) { int i; for (i = 0; i < width * 2; i += 2) { int j; for (j = 0; j < height; ++j) { dst_a[j + ((i >> 1) * dst_stride_a)] = src[i + (j * src_stride)]; dst_b[j + ((i >> 1) * dst_stride_b)] = src[i + (j * src_stride) + 1]; } } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_gcc.cc000066400000000000000000000442661357355204000226320ustar00rootroot00000000000000/* * Copyright 2015 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC x86 and x64. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) // Transpose 8x8. 32 or 64 bit, but not NaCL for 64 bit. #if defined(HAS_TRANSPOSEWX8_SSSE3) void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { asm volatile( // Read in the data from the source pointer. // First round of bit swap. LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "movq (%0,%3),%%xmm1 \n" "lea (%0,%3,2),%0 \n" "punpcklbw %%xmm1,%%xmm0 \n" "movq (%0),%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "palignr $0x8,%%xmm1,%%xmm1 \n" "movq (%0,%3),%%xmm3 \n" "lea (%0,%3,2),%0 \n" "punpcklbw %%xmm3,%%xmm2 \n" "movdqa %%xmm2,%%xmm3 \n" "movq (%0),%%xmm4 \n" "palignr $0x8,%%xmm3,%%xmm3 \n" "movq (%0,%3),%%xmm5 \n" "lea (%0,%3,2),%0 \n" "punpcklbw %%xmm5,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" "movq (%0),%%xmm6 \n" "palignr $0x8,%%xmm5,%%xmm5 \n" "movq (%0,%3),%%xmm7 \n" "lea (%0,%3,2),%0 \n" "punpcklbw %%xmm7,%%xmm6 \n" "neg %3 \n" "movdqa %%xmm6,%%xmm7 \n" "lea 0x8(%0,%3,8),%0 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" "neg %3 \n" // Second round of bit swap. "punpcklwd %%xmm2,%%xmm0 \n" "punpcklwd %%xmm3,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm1,%%xmm3 \n" "palignr $0x8,%%xmm2,%%xmm2 \n" "palignr $0x8,%%xmm3,%%xmm3 \n" "punpcklwd %%xmm6,%%xmm4 \n" "punpcklwd %%xmm7,%%xmm5 \n" "movdqa %%xmm4,%%xmm6 \n" "movdqa %%xmm5,%%xmm7 \n" "palignr $0x8,%%xmm6,%%xmm6 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" // Third round of bit swap. // Write to the destination pointer. "punpckldq %%xmm4,%%xmm0 \n" "movq %%xmm0,(%1) \n" "movdqa %%xmm0,%%xmm4 \n" "palignr $0x8,%%xmm4,%%xmm4 \n" "movq %%xmm4,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm6,%%xmm2 \n" "movdqa %%xmm2,%%xmm6 \n" "movq %%xmm2,(%1) \n" "palignr $0x8,%%xmm6,%%xmm6 \n" "punpckldq %%xmm5,%%xmm1 \n" "movq %%xmm6,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "movdqa %%xmm1,%%xmm5 \n" "movq %%xmm1,(%1) \n" "palignr $0x8,%%xmm5,%%xmm5 \n" "movq %%xmm5,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm7,%%xmm3 \n" "movq %%xmm3,(%1) \n" "movdqa %%xmm3,%%xmm7 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" "sub $0x8,%2 \n" "movq %%xmm7,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "r"((intptr_t)(src_stride)), // %3 "r"((intptr_t)(dst_stride)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // defined(HAS_TRANSPOSEWX8_SSSE3) // Transpose 16x8. 64 bit #if defined(HAS_TRANSPOSEWX8_FAST_SSSE3) void TransposeWx8_Fast_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { asm volatile( // Read in the data from the source pointer. // First round of bit swap. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu (%0,%3),%%xmm1 \n" "lea (%0,%3,2),%0 \n" "movdqa %%xmm0,%%xmm8 \n" "punpcklbw %%xmm1,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm8 \n" "movdqu (%0),%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm8,%%xmm9 \n" "palignr $0x8,%%xmm1,%%xmm1 \n" "palignr $0x8,%%xmm9,%%xmm9 \n" "movdqu (%0,%3),%%xmm3 \n" "lea (%0,%3,2),%0 \n" "movdqa %%xmm2,%%xmm10 \n" "punpcklbw %%xmm3,%%xmm2 \n" "punpckhbw %%xmm3,%%xmm10 \n" "movdqa %%xmm2,%%xmm3 \n" "movdqa %%xmm10,%%xmm11 \n" "movdqu (%0),%%xmm4 \n" "palignr $0x8,%%xmm3,%%xmm3 \n" "palignr $0x8,%%xmm11,%%xmm11 \n" "movdqu (%0,%3),%%xmm5 \n" "lea (%0,%3,2),%0 \n" "movdqa %%xmm4,%%xmm12 \n" "punpcklbw %%xmm5,%%xmm4 \n" "punpckhbw %%xmm5,%%xmm12 \n" "movdqa %%xmm4,%%xmm5 \n" "movdqa %%xmm12,%%xmm13 \n" "movdqu (%0),%%xmm6 \n" "palignr $0x8,%%xmm5,%%xmm5 \n" "palignr $0x8,%%xmm13,%%xmm13 \n" "movdqu (%0,%3),%%xmm7 \n" "lea (%0,%3,2),%0 \n" "movdqa %%xmm6,%%xmm14 \n" "punpcklbw %%xmm7,%%xmm6 \n" "punpckhbw %%xmm7,%%xmm14 \n" "neg %3 \n" "movdqa %%xmm6,%%xmm7 \n" "movdqa %%xmm14,%%xmm15 \n" "lea 0x10(%0,%3,8),%0 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" "palignr $0x8,%%xmm15,%%xmm15 \n" "neg %3 \n" // Second round of bit swap. "punpcklwd %%xmm2,%%xmm0 \n" "punpcklwd %%xmm3,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm1,%%xmm3 \n" "palignr $0x8,%%xmm2,%%xmm2 \n" "palignr $0x8,%%xmm3,%%xmm3 \n" "punpcklwd %%xmm6,%%xmm4 \n" "punpcklwd %%xmm7,%%xmm5 \n" "movdqa %%xmm4,%%xmm6 \n" "movdqa %%xmm5,%%xmm7 \n" "palignr $0x8,%%xmm6,%%xmm6 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" "punpcklwd %%xmm10,%%xmm8 \n" "punpcklwd %%xmm11,%%xmm9 \n" "movdqa %%xmm8,%%xmm10 \n" "movdqa %%xmm9,%%xmm11 \n" "palignr $0x8,%%xmm10,%%xmm10 \n" "palignr $0x8,%%xmm11,%%xmm11 \n" "punpcklwd %%xmm14,%%xmm12 \n" "punpcklwd %%xmm15,%%xmm13 \n" "movdqa %%xmm12,%%xmm14 \n" "movdqa %%xmm13,%%xmm15 \n" "palignr $0x8,%%xmm14,%%xmm14 \n" "palignr $0x8,%%xmm15,%%xmm15 \n" // Third round of bit swap. // Write to the destination pointer. "punpckldq %%xmm4,%%xmm0 \n" "movq %%xmm0,(%1) \n" "movdqa %%xmm0,%%xmm4 \n" "palignr $0x8,%%xmm4,%%xmm4 \n" "movq %%xmm4,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm6,%%xmm2 \n" "movdqa %%xmm2,%%xmm6 \n" "movq %%xmm2,(%1) \n" "palignr $0x8,%%xmm6,%%xmm6 \n" "punpckldq %%xmm5,%%xmm1 \n" "movq %%xmm6,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "movdqa %%xmm1,%%xmm5 \n" "movq %%xmm1,(%1) \n" "palignr $0x8,%%xmm5,%%xmm5 \n" "movq %%xmm5,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm7,%%xmm3 \n" "movq %%xmm3,(%1) \n" "movdqa %%xmm3,%%xmm7 \n" "palignr $0x8,%%xmm7,%%xmm7 \n" "movq %%xmm7,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm12,%%xmm8 \n" "movq %%xmm8,(%1) \n" "movdqa %%xmm8,%%xmm12 \n" "palignr $0x8,%%xmm12,%%xmm12 \n" "movq %%xmm12,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm14,%%xmm10 \n" "movdqa %%xmm10,%%xmm14 \n" "movq %%xmm10,(%1) \n" "palignr $0x8,%%xmm14,%%xmm14 \n" "punpckldq %%xmm13,%%xmm9 \n" "movq %%xmm14,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "movdqa %%xmm9,%%xmm13 \n" "movq %%xmm9,(%1) \n" "palignr $0x8,%%xmm13,%%xmm13 \n" "movq %%xmm13,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "punpckldq %%xmm15,%%xmm11 \n" "movq %%xmm11,(%1) \n" "movdqa %%xmm11,%%xmm15 \n" "palignr $0x8,%%xmm15,%%xmm15 \n" "sub $0x10,%2 \n" "movq %%xmm15,(%1,%4) \n" "lea (%1,%4,2),%1 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "r"((intptr_t)(src_stride)), // %3 "r"((intptr_t)(dst_stride)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", "xmm15"); } #endif // defined(HAS_TRANSPOSEWX8_FAST_SSSE3) // Transpose UV 8x8. 64 bit. #if defined(HAS_TRANSPOSEUVWX8_SSE2) void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { asm volatile( // Read in the data from the source pointer. // First round of bit swap. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu (%0,%4),%%xmm1 \n" "lea (%0,%4,2),%0 \n" "movdqa %%xmm0,%%xmm8 \n" "punpcklbw %%xmm1,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm8 \n" "movdqa %%xmm8,%%xmm1 \n" "movdqu (%0),%%xmm2 \n" "movdqu (%0,%4),%%xmm3 \n" "lea (%0,%4,2),%0 \n" "movdqa %%xmm2,%%xmm8 \n" "punpcklbw %%xmm3,%%xmm2 \n" "punpckhbw %%xmm3,%%xmm8 \n" "movdqa %%xmm8,%%xmm3 \n" "movdqu (%0),%%xmm4 \n" "movdqu (%0,%4),%%xmm5 \n" "lea (%0,%4,2),%0 \n" "movdqa %%xmm4,%%xmm8 \n" "punpcklbw %%xmm5,%%xmm4 \n" "punpckhbw %%xmm5,%%xmm8 \n" "movdqa %%xmm8,%%xmm5 \n" "movdqu (%0),%%xmm6 \n" "movdqu (%0,%4),%%xmm7 \n" "lea (%0,%4,2),%0 \n" "movdqa %%xmm6,%%xmm8 \n" "punpcklbw %%xmm7,%%xmm6 \n" "neg %4 \n" "lea 0x10(%0,%4,8),%0 \n" "punpckhbw %%xmm7,%%xmm8 \n" "movdqa %%xmm8,%%xmm7 \n" "neg %4 \n" // Second round of bit swap. "movdqa %%xmm0,%%xmm8 \n" "movdqa %%xmm1,%%xmm9 \n" "punpckhwd %%xmm2,%%xmm8 \n" "punpckhwd %%xmm3,%%xmm9 \n" "punpcklwd %%xmm2,%%xmm0 \n" "punpcklwd %%xmm3,%%xmm1 \n" "movdqa %%xmm8,%%xmm2 \n" "movdqa %%xmm9,%%xmm3 \n" "movdqa %%xmm4,%%xmm8 \n" "movdqa %%xmm5,%%xmm9 \n" "punpckhwd %%xmm6,%%xmm8 \n" "punpckhwd %%xmm7,%%xmm9 \n" "punpcklwd %%xmm6,%%xmm4 \n" "punpcklwd %%xmm7,%%xmm5 \n" "movdqa %%xmm8,%%xmm6 \n" "movdqa %%xmm9,%%xmm7 \n" // Third round of bit swap. // Write to the destination pointer. "movdqa %%xmm0,%%xmm8 \n" "punpckldq %%xmm4,%%xmm0 \n" "movlpd %%xmm0,(%1) \n" // Write back U channel "movhpd %%xmm0,(%2) \n" // Write back V channel "punpckhdq %%xmm4,%%xmm8 \n" "movlpd %%xmm8,(%1,%5) \n" "lea (%1,%5,2),%1 \n" "movhpd %%xmm8,(%2,%6) \n" "lea (%2,%6,2),%2 \n" "movdqa %%xmm2,%%xmm8 \n" "punpckldq %%xmm6,%%xmm2 \n" "movlpd %%xmm2,(%1) \n" "movhpd %%xmm2,(%2) \n" "punpckhdq %%xmm6,%%xmm8 \n" "movlpd %%xmm8,(%1,%5) \n" "lea (%1,%5,2),%1 \n" "movhpd %%xmm8,(%2,%6) \n" "lea (%2,%6,2),%2 \n" "movdqa %%xmm1,%%xmm8 \n" "punpckldq %%xmm5,%%xmm1 \n" "movlpd %%xmm1,(%1) \n" "movhpd %%xmm1,(%2) \n" "punpckhdq %%xmm5,%%xmm8 \n" "movlpd %%xmm8,(%1,%5) \n" "lea (%1,%5,2),%1 \n" "movhpd %%xmm8,(%2,%6) \n" "lea (%2,%6,2),%2 \n" "movdqa %%xmm3,%%xmm8 \n" "punpckldq %%xmm7,%%xmm3 \n" "movlpd %%xmm3,(%1) \n" "movhpd %%xmm3,(%2) \n" "punpckhdq %%xmm7,%%xmm8 \n" "sub $0x8,%3 \n" "movlpd %%xmm8,(%1,%5) \n" "lea (%1,%5,2),%1 \n" "movhpd %%xmm8,(%2,%6) \n" "lea (%2,%6,2),%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst_a), // %1 "+r"(dst_b), // %2 "+r"(width) // %3 : "r"((intptr_t)(src_stride)), // %4 "r"((intptr_t)(dst_stride_a)), // %5 "r"((intptr_t)(dst_stride_b)) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7", "xmm8", "xmm9"); } #endif // defined(HAS_TRANSPOSEUVWX8_SSE2) #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_msa.cc000066400000000000000000000232101357355204000226400ustar00rootroot00000000000000/* * Copyright 2016 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" // This module is for GCC MSA #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include "libyuv/macros_msa.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define ILVRL_B(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = (v16u8)__msa_ilvr_b((v16i8)in1, (v16i8)in0); \ out1 = (v16u8)__msa_ilvl_b((v16i8)in1, (v16i8)in0); \ out2 = (v16u8)__msa_ilvr_b((v16i8)in3, (v16i8)in2); \ out3 = (v16u8)__msa_ilvl_b((v16i8)in3, (v16i8)in2); \ } #define ILVRL_H(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = (v16u8)__msa_ilvr_h((v8i16)in1, (v8i16)in0); \ out1 = (v16u8)__msa_ilvl_h((v8i16)in1, (v8i16)in0); \ out2 = (v16u8)__msa_ilvr_h((v8i16)in3, (v8i16)in2); \ out3 = (v16u8)__msa_ilvl_h((v8i16)in3, (v8i16)in2); \ } #define ILVRL_W(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = (v16u8)__msa_ilvr_w((v4i32)in1, (v4i32)in0); \ out1 = (v16u8)__msa_ilvl_w((v4i32)in1, (v4i32)in0); \ out2 = (v16u8)__msa_ilvr_w((v4i32)in3, (v4i32)in2); \ out3 = (v16u8)__msa_ilvl_w((v4i32)in3, (v4i32)in2); \ } #define ILVRL_D(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = (v16u8)__msa_ilvr_d((v2i64)in1, (v2i64)in0); \ out1 = (v16u8)__msa_ilvl_d((v2i64)in1, (v2i64)in0); \ out2 = (v16u8)__msa_ilvr_d((v2i64)in3, (v2i64)in2); \ out3 = (v16u8)__msa_ilvl_d((v2i64)in3, (v2i64)in2); \ } void TransposeWx16_C(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { TransposeWx8_C(src, src_stride, dst, dst_stride, width); TransposeWx8_C((src + 8 * src_stride), src_stride, (dst + 8), dst_stride, width); } void TransposeUVWx16_C(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { TransposeUVWx8_C(src, src_stride, dst_a, dst_stride_a, dst_b, dst_stride_b, width); TransposeUVWx8_C((src + 8 * src_stride), src_stride, (dst_a + 8), dst_stride_a, (dst_b + 8), dst_stride_b, width); } void TransposeWx16_MSA(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { int x; const uint8_t* s; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; for (x = 0; x < width; x += 16) { s = src; src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); dst += dst_stride * 4; res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); dst += dst_stride * 4; res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); dst += dst_stride * 4; res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); ST_UB4(dst0, dst1, dst2, dst3, dst, dst_stride); src += 16; dst += dst_stride * 4; } } void TransposeUVWx16_MSA(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { int x; const uint8_t* s; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3, vec0, vec1, vec2, vec3; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 res0, res1, res2, res3, res4, res5, res6, res7, res8, res9; for (x = 0; x < width; x += 8) { s = src; src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); ILVRL_W(reg0, reg4, reg1, reg5, res0, res1, res2, res3); ILVRL_W(reg2, reg6, reg3, reg7, res4, res5, res6, res7); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg0, reg1, reg2, reg3); src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src1 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src2 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; src3 = (v16u8)__msa_ld_b((v16i8*)s, 0); s += src_stride; ILVRL_B(src0, src1, src2, src3, vec0, vec1, vec2, vec3); ILVRL_H(vec0, vec2, vec1, vec3, reg4, reg5, reg6, reg7); res8 = (v16u8)__msa_ilvr_w((v4i32)reg4, (v4i32)reg0); res9 = (v16u8)__msa_ilvl_w((v4i32)reg4, (v4i32)reg0); ILVRL_D(res0, res8, res1, res9, dst0, dst1, dst2, dst3); ST_UB2(dst0, dst2, dst_a, dst_stride_a); ST_UB2(dst1, dst3, dst_b, dst_stride_b); dst_a += dst_stride_a * 2; dst_b += dst_stride_b * 2; res8 = (v16u8)__msa_ilvr_w((v4i32)reg5, (v4i32)reg1); res9 = (v16u8)__msa_ilvl_w((v4i32)reg5, (v4i32)reg1); ILVRL_D(res2, res8, res3, res9, dst0, dst1, dst2, dst3); ST_UB2(dst0, dst2, dst_a, dst_stride_a); ST_UB2(dst1, dst3, dst_b, dst_stride_b); dst_a += dst_stride_a * 2; dst_b += dst_stride_b * 2; res8 = (v16u8)__msa_ilvr_w((v4i32)reg6, (v4i32)reg2); res9 = (v16u8)__msa_ilvl_w((v4i32)reg6, (v4i32)reg2); ILVRL_D(res4, res8, res5, res9, dst0, dst1, dst2, dst3); ST_UB2(dst0, dst2, dst_a, dst_stride_a); ST_UB2(dst1, dst3, dst_b, dst_stride_b); dst_a += dst_stride_a * 2; dst_b += dst_stride_b * 2; res8 = (v16u8)__msa_ilvr_w((v4i32)reg7, (v4i32)reg3); res9 = (v16u8)__msa_ilvl_w((v4i32)reg7, (v4i32)reg3); ILVRL_D(res6, res8, res7, res9, dst0, dst1, dst2, dst3); ST_UB2(dst0, dst2, dst_a, dst_stride_a); ST_UB2(dst1, dst3, dst_b, dst_stride_b); src += 16; dst_a += dst_stride_a * 2; dst_b += dst_stride_b * 2; } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) libvpx-1.8.2/third_party/libyuv/source/rotate_neon.cc000066400000000000000000000414321357355204000230250ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" #include "libyuv/row.h" #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; void TransposeWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this "sub %5, #8 \n" // handle 8x8 blocks. this should be the majority of the plane "1: \n" "mov %0, %1 \n" "vld1.8 {d0}, [%0], %2 \n" "vld1.8 {d1}, [%0], %2 \n" "vld1.8 {d2}, [%0], %2 \n" "vld1.8 {d3}, [%0], %2 \n" "vld1.8 {d4}, [%0], %2 \n" "vld1.8 {d5}, [%0], %2 \n" "vld1.8 {d6}, [%0], %2 \n" "vld1.8 {d7}, [%0] \n" "vtrn.8 d1, d0 \n" "vtrn.8 d3, d2 \n" "vtrn.8 d5, d4 \n" "vtrn.8 d7, d6 \n" "vtrn.16 d1, d3 \n" "vtrn.16 d0, d2 \n" "vtrn.16 d5, d7 \n" "vtrn.16 d4, d6 \n" "vtrn.32 d1, d5 \n" "vtrn.32 d0, d4 \n" "vtrn.32 d3, d7 \n" "vtrn.32 d2, d6 \n" "vrev16.8 q0, q0 \n" "vrev16.8 q1, q1 \n" "vrev16.8 q2, q2 \n" "vrev16.8 q3, q3 \n" "mov %0, %3 \n" "vst1.8 {d1}, [%0], %4 \n" "vst1.8 {d0}, [%0], %4 \n" "vst1.8 {d3}, [%0], %4 \n" "vst1.8 {d2}, [%0], %4 \n" "vst1.8 {d5}, [%0], %4 \n" "vst1.8 {d4}, [%0], %4 \n" "vst1.8 {d7}, [%0], %4 \n" "vst1.8 {d6}, [%0] \n" "add %1, #8 \n" // src += 8 "add %3, %3, %4, lsl #3 \n" // dst += 8 * dst_stride "subs %5, #8 \n" // w -= 8 "bge 1b \n" // add 8 back to counter. if the result is 0 there are // no residuals. "adds %5, #8 \n" "beq 4f \n" // some residual, so between 1 and 7 lines left to transpose "cmp %5, #2 \n" "blt 3f \n" "cmp %5, #4 \n" "blt 2f \n" // 4x8 block "mov %0, %1 \n" "vld1.32 {d0[0]}, [%0], %2 \n" "vld1.32 {d0[1]}, [%0], %2 \n" "vld1.32 {d1[0]}, [%0], %2 \n" "vld1.32 {d1[1]}, [%0], %2 \n" "vld1.32 {d2[0]}, [%0], %2 \n" "vld1.32 {d2[1]}, [%0], %2 \n" "vld1.32 {d3[0]}, [%0], %2 \n" "vld1.32 {d3[1]}, [%0] \n" "mov %0, %3 \n" "vld1.8 {q3}, [%6] \n" "vtbl.8 d4, {d0, d1}, d6 \n" "vtbl.8 d5, {d0, d1}, d7 \n" "vtbl.8 d0, {d2, d3}, d6 \n" "vtbl.8 d1, {d2, d3}, d7 \n" // TODO(frkoenig): Rework shuffle above to // write out with 4 instead of 8 writes. "vst1.32 {d4[0]}, [%0], %4 \n" "vst1.32 {d4[1]}, [%0], %4 \n" "vst1.32 {d5[0]}, [%0], %4 \n" "vst1.32 {d5[1]}, [%0] \n" "add %0, %3, #4 \n" "vst1.32 {d0[0]}, [%0], %4 \n" "vst1.32 {d0[1]}, [%0], %4 \n" "vst1.32 {d1[0]}, [%0], %4 \n" "vst1.32 {d1[1]}, [%0] \n" "add %1, #4 \n" // src += 4 "add %3, %3, %4, lsl #2 \n" // dst += 4 * dst_stride "subs %5, #4 \n" // w -= 4 "beq 4f \n" // some residual, check to see if it includes a 2x8 block, // or less "cmp %5, #2 \n" "blt 3f \n" // 2x8 block "2: \n" "mov %0, %1 \n" "vld1.16 {d0[0]}, [%0], %2 \n" "vld1.16 {d1[0]}, [%0], %2 \n" "vld1.16 {d0[1]}, [%0], %2 \n" "vld1.16 {d1[1]}, [%0], %2 \n" "vld1.16 {d0[2]}, [%0], %2 \n" "vld1.16 {d1[2]}, [%0], %2 \n" "vld1.16 {d0[3]}, [%0], %2 \n" "vld1.16 {d1[3]}, [%0] \n" "vtrn.8 d0, d1 \n" "mov %0, %3 \n" "vst1.64 {d0}, [%0], %4 \n" "vst1.64 {d1}, [%0] \n" "add %1, #2 \n" // src += 2 "add %3, %3, %4, lsl #1 \n" // dst += 2 * dst_stride "subs %5, #2 \n" // w -= 2 "beq 4f \n" // 1x8 block "3: \n" "vld1.8 {d0[0]}, [%1], %2 \n" "vld1.8 {d0[1]}, [%1], %2 \n" "vld1.8 {d0[2]}, [%1], %2 \n" "vld1.8 {d0[3]}, [%1], %2 \n" "vld1.8 {d0[4]}, [%1], %2 \n" "vld1.8 {d0[5]}, [%1], %2 \n" "vld1.8 {d0[6]}, [%1], %2 \n" "vld1.8 {d0[7]}, [%1] \n" "vst1.64 {d0}, [%3] \n" "4: \n" : "=&r"(src_temp), // %0 "+r"(src), // %1 "+r"(src_stride), // %2 "+r"(dst), // %3 "+r"(dst_stride), // %4 "+r"(width) // %5 : "r"(&kVTbl4x4Transpose) // %6 : "memory", "cc", "q0", "q1", "q2", "q3"); } static const uvec8 kVTbl4x4TransposeDi = {0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15}; void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this "sub %7, #8 \n" // handle 8x8 blocks. this should be the majority of the plane "1: \n" "mov %0, %1 \n" "vld2.8 {d0, d1}, [%0], %2 \n" "vld2.8 {d2, d3}, [%0], %2 \n" "vld2.8 {d4, d5}, [%0], %2 \n" "vld2.8 {d6, d7}, [%0], %2 \n" "vld2.8 {d16, d17}, [%0], %2 \n" "vld2.8 {d18, d19}, [%0], %2 \n" "vld2.8 {d20, d21}, [%0], %2 \n" "vld2.8 {d22, d23}, [%0] \n" "vtrn.8 q1, q0 \n" "vtrn.8 q3, q2 \n" "vtrn.8 q9, q8 \n" "vtrn.8 q11, q10 \n" "vtrn.16 q1, q3 \n" "vtrn.16 q0, q2 \n" "vtrn.16 q9, q11 \n" "vtrn.16 q8, q10 \n" "vtrn.32 q1, q9 \n" "vtrn.32 q0, q8 \n" "vtrn.32 q3, q11 \n" "vtrn.32 q2, q10 \n" "vrev16.8 q0, q0 \n" "vrev16.8 q1, q1 \n" "vrev16.8 q2, q2 \n" "vrev16.8 q3, q3 \n" "vrev16.8 q8, q8 \n" "vrev16.8 q9, q9 \n" "vrev16.8 q10, q10 \n" "vrev16.8 q11, q11 \n" "mov %0, %3 \n" "vst1.8 {d2}, [%0], %4 \n" "vst1.8 {d0}, [%0], %4 \n" "vst1.8 {d6}, [%0], %4 \n" "vst1.8 {d4}, [%0], %4 \n" "vst1.8 {d18}, [%0], %4 \n" "vst1.8 {d16}, [%0], %4 \n" "vst1.8 {d22}, [%0], %4 \n" "vst1.8 {d20}, [%0] \n" "mov %0, %5 \n" "vst1.8 {d3}, [%0], %6 \n" "vst1.8 {d1}, [%0], %6 \n" "vst1.8 {d7}, [%0], %6 \n" "vst1.8 {d5}, [%0], %6 \n" "vst1.8 {d19}, [%0], %6 \n" "vst1.8 {d17}, [%0], %6 \n" "vst1.8 {d23}, [%0], %6 \n" "vst1.8 {d21}, [%0] \n" "add %1, #8*2 \n" // src += 8*2 "add %3, %3, %4, lsl #3 \n" // dst_a += 8 * dst_stride_a "add %5, %5, %6, lsl #3 \n" // dst_b += 8 * dst_stride_b "subs %7, #8 \n" // w -= 8 "bge 1b \n" // add 8 back to counter. if the result is 0 there are // no residuals. "adds %7, #8 \n" "beq 4f \n" // some residual, so between 1 and 7 lines left to transpose "cmp %7, #2 \n" "blt 3f \n" "cmp %7, #4 \n" "blt 2f \n" // TODO(frkoenig): Clean this up // 4x8 block "mov %0, %1 \n" "vld1.64 {d0}, [%0], %2 \n" "vld1.64 {d1}, [%0], %2 \n" "vld1.64 {d2}, [%0], %2 \n" "vld1.64 {d3}, [%0], %2 \n" "vld1.64 {d4}, [%0], %2 \n" "vld1.64 {d5}, [%0], %2 \n" "vld1.64 {d6}, [%0], %2 \n" "vld1.64 {d7}, [%0] \n" "vld1.8 {q15}, [%8] \n" "vtrn.8 q0, q1 \n" "vtrn.8 q2, q3 \n" "vtbl.8 d16, {d0, d1}, d30 \n" "vtbl.8 d17, {d0, d1}, d31 \n" "vtbl.8 d18, {d2, d3}, d30 \n" "vtbl.8 d19, {d2, d3}, d31 \n" "vtbl.8 d20, {d4, d5}, d30 \n" "vtbl.8 d21, {d4, d5}, d31 \n" "vtbl.8 d22, {d6, d7}, d30 \n" "vtbl.8 d23, {d6, d7}, d31 \n" "mov %0, %3 \n" "vst1.32 {d16[0]}, [%0], %4 \n" "vst1.32 {d16[1]}, [%0], %4 \n" "vst1.32 {d17[0]}, [%0], %4 \n" "vst1.32 {d17[1]}, [%0], %4 \n" "add %0, %3, #4 \n" "vst1.32 {d20[0]}, [%0], %4 \n" "vst1.32 {d20[1]}, [%0], %4 \n" "vst1.32 {d21[0]}, [%0], %4 \n" "vst1.32 {d21[1]}, [%0] \n" "mov %0, %5 \n" "vst1.32 {d18[0]}, [%0], %6 \n" "vst1.32 {d18[1]}, [%0], %6 \n" "vst1.32 {d19[0]}, [%0], %6 \n" "vst1.32 {d19[1]}, [%0], %6 \n" "add %0, %5, #4 \n" "vst1.32 {d22[0]}, [%0], %6 \n" "vst1.32 {d22[1]}, [%0], %6 \n" "vst1.32 {d23[0]}, [%0], %6 \n" "vst1.32 {d23[1]}, [%0] \n" "add %1, #4*2 \n" // src += 4 * 2 "add %3, %3, %4, lsl #2 \n" // dst_a += 4 * // dst_stride_a "add %5, %5, %6, lsl #2 \n" // dst_b += 4 * // dst_stride_b "subs %7, #4 \n" // w -= 4 "beq 4f \n" // some residual, check to see if it includes a 2x8 block, // or less "cmp %7, #2 \n" "blt 3f \n" // 2x8 block "2: \n" "mov %0, %1 \n" "vld2.16 {d0[0], d2[0]}, [%0], %2 \n" "vld2.16 {d1[0], d3[0]}, [%0], %2 \n" "vld2.16 {d0[1], d2[1]}, [%0], %2 \n" "vld2.16 {d1[1], d3[1]}, [%0], %2 \n" "vld2.16 {d0[2], d2[2]}, [%0], %2 \n" "vld2.16 {d1[2], d3[2]}, [%0], %2 \n" "vld2.16 {d0[3], d2[3]}, [%0], %2 \n" "vld2.16 {d1[3], d3[3]}, [%0] \n" "vtrn.8 d0, d1 \n" "vtrn.8 d2, d3 \n" "mov %0, %3 \n" "vst1.64 {d0}, [%0], %4 \n" "vst1.64 {d2}, [%0] \n" "mov %0, %5 \n" "vst1.64 {d1}, [%0], %6 \n" "vst1.64 {d3}, [%0] \n" "add %1, #2*2 \n" // src += 2 * 2 "add %3, %3, %4, lsl #1 \n" // dst_a += 2 * // dst_stride_a "add %5, %5, %6, lsl #1 \n" // dst_b += 2 * // dst_stride_b "subs %7, #2 \n" // w -= 2 "beq 4f \n" // 1x8 block "3: \n" "vld2.8 {d0[0], d1[0]}, [%1], %2 \n" "vld2.8 {d0[1], d1[1]}, [%1], %2 \n" "vld2.8 {d0[2], d1[2]}, [%1], %2 \n" "vld2.8 {d0[3], d1[3]}, [%1], %2 \n" "vld2.8 {d0[4], d1[4]}, [%1], %2 \n" "vld2.8 {d0[5], d1[5]}, [%1], %2 \n" "vld2.8 {d0[6], d1[6]}, [%1], %2 \n" "vld2.8 {d0[7], d1[7]}, [%1] \n" "vst1.64 {d0}, [%3] \n" "vst1.64 {d1}, [%5] \n" "4: \n" : "=&r"(src_temp), // %0 "+r"(src), // %1 "+r"(src_stride), // %2 "+r"(dst_a), // %3 "+r"(dst_stride_a), // %4 "+r"(dst_b), // %5 "+r"(dst_stride_b), // %6 "+r"(width) // %7 : "r"(&kVTbl4x4TransposeDi) // %8 : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } #endif // defined(__ARM_NEON__) && !defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_neon64.cc000066400000000000000000000442511357355204000232010ustar00rootroot00000000000000/* * Copyright 2014 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" #include "libyuv/row.h" #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) static const uvec8 kVTbl4x4Transpose = {0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15}; void TransposeWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this "sub %w3, %w3, #8 \n" // handle 8x8 blocks. this should be the majority of the plane "1: \n" "mov %0, %1 \n" "ld1 {v0.8b}, [%0], %5 \n" "ld1 {v1.8b}, [%0], %5 \n" "ld1 {v2.8b}, [%0], %5 \n" "ld1 {v3.8b}, [%0], %5 \n" "ld1 {v4.8b}, [%0], %5 \n" "ld1 {v5.8b}, [%0], %5 \n" "ld1 {v6.8b}, [%0], %5 \n" "ld1 {v7.8b}, [%0] \n" "trn2 v16.8b, v0.8b, v1.8b \n" "trn1 v17.8b, v0.8b, v1.8b \n" "trn2 v18.8b, v2.8b, v3.8b \n" "trn1 v19.8b, v2.8b, v3.8b \n" "trn2 v20.8b, v4.8b, v5.8b \n" "trn1 v21.8b, v4.8b, v5.8b \n" "trn2 v22.8b, v6.8b, v7.8b \n" "trn1 v23.8b, v6.8b, v7.8b \n" "trn2 v3.4h, v17.4h, v19.4h \n" "trn1 v1.4h, v17.4h, v19.4h \n" "trn2 v2.4h, v16.4h, v18.4h \n" "trn1 v0.4h, v16.4h, v18.4h \n" "trn2 v7.4h, v21.4h, v23.4h \n" "trn1 v5.4h, v21.4h, v23.4h \n" "trn2 v6.4h, v20.4h, v22.4h \n" "trn1 v4.4h, v20.4h, v22.4h \n" "trn2 v21.2s, v1.2s, v5.2s \n" "trn1 v17.2s, v1.2s, v5.2s \n" "trn2 v20.2s, v0.2s, v4.2s \n" "trn1 v16.2s, v0.2s, v4.2s \n" "trn2 v23.2s, v3.2s, v7.2s \n" "trn1 v19.2s, v3.2s, v7.2s \n" "trn2 v22.2s, v2.2s, v6.2s \n" "trn1 v18.2s, v2.2s, v6.2s \n" "mov %0, %2 \n" "st1 {v17.8b}, [%0], %6 \n" "st1 {v16.8b}, [%0], %6 \n" "st1 {v19.8b}, [%0], %6 \n" "st1 {v18.8b}, [%0], %6 \n" "st1 {v21.8b}, [%0], %6 \n" "st1 {v20.8b}, [%0], %6 \n" "st1 {v23.8b}, [%0], %6 \n" "st1 {v22.8b}, [%0] \n" "add %1, %1, #8 \n" // src += 8 "add %2, %2, %6, lsl #3 \n" // dst += 8 * dst_stride "subs %w3, %w3, #8 \n" // w -= 8 "b.ge 1b \n" // add 8 back to counter. if the result is 0 there are // no residuals. "adds %w3, %w3, #8 \n" "b.eq 4f \n" // some residual, so between 1 and 7 lines left to transpose "cmp %w3, #2 \n" "b.lt 3f \n" "cmp %w3, #4 \n" "b.lt 2f \n" // 4x8 block "mov %0, %1 \n" "ld1 {v0.s}[0], [%0], %5 \n" "ld1 {v0.s}[1], [%0], %5 \n" "ld1 {v0.s}[2], [%0], %5 \n" "ld1 {v0.s}[3], [%0], %5 \n" "ld1 {v1.s}[0], [%0], %5 \n" "ld1 {v1.s}[1], [%0], %5 \n" "ld1 {v1.s}[2], [%0], %5 \n" "ld1 {v1.s}[3], [%0] \n" "mov %0, %2 \n" "ld1 {v2.16b}, [%4] \n" "tbl v3.16b, {v0.16b}, v2.16b \n" "tbl v0.16b, {v1.16b}, v2.16b \n" // TODO(frkoenig): Rework shuffle above to // write out with 4 instead of 8 writes. "st1 {v3.s}[0], [%0], %6 \n" "st1 {v3.s}[1], [%0], %6 \n" "st1 {v3.s}[2], [%0], %6 \n" "st1 {v3.s}[3], [%0] \n" "add %0, %2, #4 \n" "st1 {v0.s}[0], [%0], %6 \n" "st1 {v0.s}[1], [%0], %6 \n" "st1 {v0.s}[2], [%0], %6 \n" "st1 {v0.s}[3], [%0] \n" "add %1, %1, #4 \n" // src += 4 "add %2, %2, %6, lsl #2 \n" // dst += 4 * dst_stride "subs %w3, %w3, #4 \n" // w -= 4 "b.eq 4f \n" // some residual, check to see if it includes a 2x8 block, // or less "cmp %w3, #2 \n" "b.lt 3f \n" // 2x8 block "2: \n" "mov %0, %1 \n" "ld1 {v0.h}[0], [%0], %5 \n" "ld1 {v1.h}[0], [%0], %5 \n" "ld1 {v0.h}[1], [%0], %5 \n" "ld1 {v1.h}[1], [%0], %5 \n" "ld1 {v0.h}[2], [%0], %5 \n" "ld1 {v1.h}[2], [%0], %5 \n" "ld1 {v0.h}[3], [%0], %5 \n" "ld1 {v1.h}[3], [%0] \n" "trn2 v2.8b, v0.8b, v1.8b \n" "trn1 v3.8b, v0.8b, v1.8b \n" "mov %0, %2 \n" "st1 {v3.8b}, [%0], %6 \n" "st1 {v2.8b}, [%0] \n" "add %1, %1, #2 \n" // src += 2 "add %2, %2, %6, lsl #1 \n" // dst += 2 * dst_stride "subs %w3, %w3, #2 \n" // w -= 2 "b.eq 4f \n" // 1x8 block "3: \n" "ld1 {v0.b}[0], [%1], %5 \n" "ld1 {v0.b}[1], [%1], %5 \n" "ld1 {v0.b}[2], [%1], %5 \n" "ld1 {v0.b}[3], [%1], %5 \n" "ld1 {v0.b}[4], [%1], %5 \n" "ld1 {v0.b}[5], [%1], %5 \n" "ld1 {v0.b}[6], [%1], %5 \n" "ld1 {v0.b}[7], [%1] \n" "st1 {v0.8b}, [%2] \n" "4: \n" : "=&r"(src_temp), // %0 "+r"(src), // %1 "+r"(dst), // %2 "+r"(width) // %3 : "r"(&kVTbl4x4Transpose), // %4 "r"(static_cast(src_stride)), // %5 "r"(static_cast(dst_stride)) // %6 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23"); } static const uint8_t kVTbl4x4TransposeDi[32] = { 0, 16, 32, 48, 2, 18, 34, 50, 4, 20, 36, 52, 6, 22, 38, 54, 1, 17, 33, 49, 3, 19, 35, 51, 5, 21, 37, 53, 7, 23, 39, 55}; void TransposeUVWx8_NEON(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int width) { const uint8_t* src_temp; asm volatile( // loops are on blocks of 8. loop will stop when // counter gets to or below 0. starting the counter // at w-8 allow for this "sub %w4, %w4, #8 \n" // handle 8x8 blocks. this should be the majority of the plane "1: \n" "mov %0, %1 \n" "ld1 {v0.16b}, [%0], %5 \n" "ld1 {v1.16b}, [%0], %5 \n" "ld1 {v2.16b}, [%0], %5 \n" "ld1 {v3.16b}, [%0], %5 \n" "ld1 {v4.16b}, [%0], %5 \n" "ld1 {v5.16b}, [%0], %5 \n" "ld1 {v6.16b}, [%0], %5 \n" "ld1 {v7.16b}, [%0] \n" "trn1 v16.16b, v0.16b, v1.16b \n" "trn2 v17.16b, v0.16b, v1.16b \n" "trn1 v18.16b, v2.16b, v3.16b \n" "trn2 v19.16b, v2.16b, v3.16b \n" "trn1 v20.16b, v4.16b, v5.16b \n" "trn2 v21.16b, v4.16b, v5.16b \n" "trn1 v22.16b, v6.16b, v7.16b \n" "trn2 v23.16b, v6.16b, v7.16b \n" "trn1 v0.8h, v16.8h, v18.8h \n" "trn2 v1.8h, v16.8h, v18.8h \n" "trn1 v2.8h, v20.8h, v22.8h \n" "trn2 v3.8h, v20.8h, v22.8h \n" "trn1 v4.8h, v17.8h, v19.8h \n" "trn2 v5.8h, v17.8h, v19.8h \n" "trn1 v6.8h, v21.8h, v23.8h \n" "trn2 v7.8h, v21.8h, v23.8h \n" "trn1 v16.4s, v0.4s, v2.4s \n" "trn2 v17.4s, v0.4s, v2.4s \n" "trn1 v18.4s, v1.4s, v3.4s \n" "trn2 v19.4s, v1.4s, v3.4s \n" "trn1 v20.4s, v4.4s, v6.4s \n" "trn2 v21.4s, v4.4s, v6.4s \n" "trn1 v22.4s, v5.4s, v7.4s \n" "trn2 v23.4s, v5.4s, v7.4s \n" "mov %0, %2 \n" "st1 {v16.d}[0], [%0], %6 \n" "st1 {v18.d}[0], [%0], %6 \n" "st1 {v17.d}[0], [%0], %6 \n" "st1 {v19.d}[0], [%0], %6 \n" "st1 {v16.d}[1], [%0], %6 \n" "st1 {v18.d}[1], [%0], %6 \n" "st1 {v17.d}[1], [%0], %6 \n" "st1 {v19.d}[1], [%0] \n" "mov %0, %3 \n" "st1 {v20.d}[0], [%0], %7 \n" "st1 {v22.d}[0], [%0], %7 \n" "st1 {v21.d}[0], [%0], %7 \n" "st1 {v23.d}[0], [%0], %7 \n" "st1 {v20.d}[1], [%0], %7 \n" "st1 {v22.d}[1], [%0], %7 \n" "st1 {v21.d}[1], [%0], %7 \n" "st1 {v23.d}[1], [%0] \n" "add %1, %1, #16 \n" // src += 8*2 "add %2, %2, %6, lsl #3 \n" // dst_a += 8 * // dst_stride_a "add %3, %3, %7, lsl #3 \n" // dst_b += 8 * // dst_stride_b "subs %w4, %w4, #8 \n" // w -= 8 "b.ge 1b \n" // add 8 back to counter. if the result is 0 there are // no residuals. "adds %w4, %w4, #8 \n" "b.eq 4f \n" // some residual, so between 1 and 7 lines left to transpose "cmp %w4, #2 \n" "b.lt 3f \n" "cmp %w4, #4 \n" "b.lt 2f \n" // TODO(frkoenig): Clean this up // 4x8 block "mov %0, %1 \n" "ld1 {v0.8b}, [%0], %5 \n" "ld1 {v1.8b}, [%0], %5 \n" "ld1 {v2.8b}, [%0], %5 \n" "ld1 {v3.8b}, [%0], %5 \n" "ld1 {v4.8b}, [%0], %5 \n" "ld1 {v5.8b}, [%0], %5 \n" "ld1 {v6.8b}, [%0], %5 \n" "ld1 {v7.8b}, [%0] \n" "ld1 {v30.16b}, [%8], #16 \n" "ld1 {v31.16b}, [%8] \n" "tbl v16.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v30.16b \n" "tbl v17.16b, {v0.16b, v1.16b, v2.16b, v3.16b}, v31.16b \n" "tbl v18.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v30.16b \n" "tbl v19.16b, {v4.16b, v5.16b, v6.16b, v7.16b}, v31.16b \n" "mov %0, %2 \n" "st1 {v16.s}[0], [%0], %6 \n" "st1 {v16.s}[1], [%0], %6 \n" "st1 {v16.s}[2], [%0], %6 \n" "st1 {v16.s}[3], [%0], %6 \n" "add %0, %2, #4 \n" "st1 {v18.s}[0], [%0], %6 \n" "st1 {v18.s}[1], [%0], %6 \n" "st1 {v18.s}[2], [%0], %6 \n" "st1 {v18.s}[3], [%0] \n" "mov %0, %3 \n" "st1 {v17.s}[0], [%0], %7 \n" "st1 {v17.s}[1], [%0], %7 \n" "st1 {v17.s}[2], [%0], %7 \n" "st1 {v17.s}[3], [%0], %7 \n" "add %0, %3, #4 \n" "st1 {v19.s}[0], [%0], %7 \n" "st1 {v19.s}[1], [%0], %7 \n" "st1 {v19.s}[2], [%0], %7 \n" "st1 {v19.s}[3], [%0] \n" "add %1, %1, #8 \n" // src += 4 * 2 "add %2, %2, %6, lsl #2 \n" // dst_a += 4 * // dst_stride_a "add %3, %3, %7, lsl #2 \n" // dst_b += 4 * // dst_stride_b "subs %w4, %w4, #4 \n" // w -= 4 "b.eq 4f \n" // some residual, check to see if it includes a 2x8 block, // or less "cmp %w4, #2 \n" "b.lt 3f \n" // 2x8 block "2: \n" "mov %0, %1 \n" "ld2 {v0.h, v1.h}[0], [%0], %5 \n" "ld2 {v2.h, v3.h}[0], [%0], %5 \n" "ld2 {v0.h, v1.h}[1], [%0], %5 \n" "ld2 {v2.h, v3.h}[1], [%0], %5 \n" "ld2 {v0.h, v1.h}[2], [%0], %5 \n" "ld2 {v2.h, v3.h}[2], [%0], %5 \n" "ld2 {v0.h, v1.h}[3], [%0], %5 \n" "ld2 {v2.h, v3.h}[3], [%0] \n" "trn1 v4.8b, v0.8b, v2.8b \n" "trn2 v5.8b, v0.8b, v2.8b \n" "trn1 v6.8b, v1.8b, v3.8b \n" "trn2 v7.8b, v1.8b, v3.8b \n" "mov %0, %2 \n" "st1 {v4.d}[0], [%0], %6 \n" "st1 {v6.d}[0], [%0] \n" "mov %0, %3 \n" "st1 {v5.d}[0], [%0], %7 \n" "st1 {v7.d}[0], [%0] \n" "add %1, %1, #4 \n" // src += 2 * 2 "add %2, %2, %6, lsl #1 \n" // dst_a += 2 * // dst_stride_a "add %3, %3, %7, lsl #1 \n" // dst_b += 2 * // dst_stride_b "subs %w4, %w4, #2 \n" // w -= 2 "b.eq 4f \n" // 1x8 block "3: \n" "ld2 {v0.b, v1.b}[0], [%1], %5 \n" "ld2 {v0.b, v1.b}[1], [%1], %5 \n" "ld2 {v0.b, v1.b}[2], [%1], %5 \n" "ld2 {v0.b, v1.b}[3], [%1], %5 \n" "ld2 {v0.b, v1.b}[4], [%1], %5 \n" "ld2 {v0.b, v1.b}[5], [%1], %5 \n" "ld2 {v0.b, v1.b}[6], [%1], %5 \n" "ld2 {v0.b, v1.b}[7], [%1] \n" "st1 {v0.d}[0], [%2] \n" "st1 {v1.d}[0], [%3] \n" "4: \n" : "=&r"(src_temp), // %0 "+r"(src), // %1 "+r"(dst_a), // %2 "+r"(dst_b), // %3 "+r"(width) // %4 : "r"(static_cast(src_stride)), // %5 "r"(static_cast(dst_stride_a)), // %6 "r"(static_cast(dst_stride_b)), // %7 "r"(&kVTbl4x4TransposeDi) // %8 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v30", "v31"); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/rotate_win.cc000066400000000000000000000170521357355204000226640ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/rotate_row.h" #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) __declspec(naked) void TransposeWx8_SSSE3(const uint8_t* src, int src_stride, uint8_t* dst, int dst_stride, int width) { __asm { push edi push esi push ebp mov eax, [esp + 12 + 4] // src mov edi, [esp + 12 + 8] // src_stride mov edx, [esp + 12 + 12] // dst mov esi, [esp + 12 + 16] // dst_stride mov ecx, [esp + 12 + 20] // width // Read in the data from the source pointer. // First round of bit swap. align 4 convertloop: movq xmm0, qword ptr [eax] lea ebp, [eax + 8] movq xmm1, qword ptr [eax + edi] lea eax, [eax + 2 * edi] punpcklbw xmm0, xmm1 movq xmm2, qword ptr [eax] movdqa xmm1, xmm0 palignr xmm1, xmm1, 8 movq xmm3, qword ptr [eax + edi] lea eax, [eax + 2 * edi] punpcklbw xmm2, xmm3 movdqa xmm3, xmm2 movq xmm4, qword ptr [eax] palignr xmm3, xmm3, 8 movq xmm5, qword ptr [eax + edi] punpcklbw xmm4, xmm5 lea eax, [eax + 2 * edi] movdqa xmm5, xmm4 movq xmm6, qword ptr [eax] palignr xmm5, xmm5, 8 movq xmm7, qword ptr [eax + edi] punpcklbw xmm6, xmm7 mov eax, ebp movdqa xmm7, xmm6 palignr xmm7, xmm7, 8 // Second round of bit swap. punpcklwd xmm0, xmm2 punpcklwd xmm1, xmm3 movdqa xmm2, xmm0 movdqa xmm3, xmm1 palignr xmm2, xmm2, 8 palignr xmm3, xmm3, 8 punpcklwd xmm4, xmm6 punpcklwd xmm5, xmm7 movdqa xmm6, xmm4 movdqa xmm7, xmm5 palignr xmm6, xmm6, 8 palignr xmm7, xmm7, 8 // Third round of bit swap. // Write to the destination pointer. punpckldq xmm0, xmm4 movq qword ptr [edx], xmm0 movdqa xmm4, xmm0 palignr xmm4, xmm4, 8 movq qword ptr [edx + esi], xmm4 lea edx, [edx + 2 * esi] punpckldq xmm2, xmm6 movdqa xmm6, xmm2 palignr xmm6, xmm6, 8 movq qword ptr [edx], xmm2 punpckldq xmm1, xmm5 movq qword ptr [edx + esi], xmm6 lea edx, [edx + 2 * esi] movdqa xmm5, xmm1 movq qword ptr [edx], xmm1 palignr xmm5, xmm5, 8 punpckldq xmm3, xmm7 movq qword ptr [edx + esi], xmm5 lea edx, [edx + 2 * esi] movq qword ptr [edx], xmm3 movdqa xmm7, xmm3 palignr xmm7, xmm7, 8 sub ecx, 8 movq qword ptr [edx + esi], xmm7 lea edx, [edx + 2 * esi] jg convertloop pop ebp pop esi pop edi ret } } __declspec(naked) void TransposeUVWx8_SSE2(const uint8_t* src, int src_stride, uint8_t* dst_a, int dst_stride_a, uint8_t* dst_b, int dst_stride_b, int w) { __asm { push ebx push esi push edi push ebp mov eax, [esp + 16 + 4] // src mov edi, [esp + 16 + 8] // src_stride mov edx, [esp + 16 + 12] // dst_a mov esi, [esp + 16 + 16] // dst_stride_a mov ebx, [esp + 16 + 20] // dst_b mov ebp, [esp + 16 + 24] // dst_stride_b mov ecx, esp sub esp, 4 + 16 and esp, ~15 mov [esp + 16], ecx mov ecx, [ecx + 16 + 28] // w align 4 // Read in the data from the source pointer. // First round of bit swap. convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + edi] lea eax, [eax + 2 * edi] movdqa xmm7, xmm0 // use xmm7 as temp register. punpcklbw xmm0, xmm1 punpckhbw xmm7, xmm1 movdqa xmm1, xmm7 movdqu xmm2, [eax] movdqu xmm3, [eax + edi] lea eax, [eax + 2 * edi] movdqa xmm7, xmm2 punpcklbw xmm2, xmm3 punpckhbw xmm7, xmm3 movdqa xmm3, xmm7 movdqu xmm4, [eax] movdqu xmm5, [eax + edi] lea eax, [eax + 2 * edi] movdqa xmm7, xmm4 punpcklbw xmm4, xmm5 punpckhbw xmm7, xmm5 movdqa xmm5, xmm7 movdqu xmm6, [eax] movdqu xmm7, [eax + edi] lea eax, [eax + 2 * edi] movdqu [esp], xmm5 // backup xmm5 neg edi movdqa xmm5, xmm6 // use xmm5 as temp register. punpcklbw xmm6, xmm7 punpckhbw xmm5, xmm7 movdqa xmm7, xmm5 lea eax, [eax + 8 * edi + 16] neg edi // Second round of bit swap. movdqa xmm5, xmm0 punpcklwd xmm0, xmm2 punpckhwd xmm5, xmm2 movdqa xmm2, xmm5 movdqa xmm5, xmm1 punpcklwd xmm1, xmm3 punpckhwd xmm5, xmm3 movdqa xmm3, xmm5 movdqa xmm5, xmm4 punpcklwd xmm4, xmm6 punpckhwd xmm5, xmm6 movdqa xmm6, xmm5 movdqu xmm5, [esp] // restore xmm5 movdqu [esp], xmm6 // backup xmm6 movdqa xmm6, xmm5 // use xmm6 as temp register. punpcklwd xmm5, xmm7 punpckhwd xmm6, xmm7 movdqa xmm7, xmm6 // Third round of bit swap. // Write to the destination pointer. movdqa xmm6, xmm0 punpckldq xmm0, xmm4 punpckhdq xmm6, xmm4 movdqa xmm4, xmm6 movdqu xmm6, [esp] // restore xmm6 movlpd qword ptr [edx], xmm0 movhpd qword ptr [ebx], xmm0 movlpd qword ptr [edx + esi], xmm4 lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm4 lea ebx, [ebx + 2 * ebp] movdqa xmm0, xmm2 // use xmm0 as the temp register. punpckldq xmm2, xmm6 movlpd qword ptr [edx], xmm2 movhpd qword ptr [ebx], xmm2 punpckhdq xmm0, xmm6 movlpd qword ptr [edx + esi], xmm0 lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm0 lea ebx, [ebx + 2 * ebp] movdqa xmm0, xmm1 // use xmm0 as the temp register. punpckldq xmm1, xmm5 movlpd qword ptr [edx], xmm1 movhpd qword ptr [ebx], xmm1 punpckhdq xmm0, xmm5 movlpd qword ptr [edx + esi], xmm0 lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm0 lea ebx, [ebx + 2 * ebp] movdqa xmm0, xmm3 // use xmm0 as the temp register. punpckldq xmm3, xmm7 movlpd qword ptr [edx], xmm3 movhpd qword ptr [ebx], xmm3 punpckhdq xmm0, xmm7 sub ecx, 8 movlpd qword ptr [edx + esi], xmm0 lea edx, [edx + 2 * esi] movhpd qword ptr [ebx + ebp], xmm0 lea ebx, [ebx + 2 * ebp] jg convertloop mov esp, [esp + 16] pop ebp pop edi pop esi pop ebx ret } } #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_any.cc000066400000000000000000001433131357355204000221670ustar00rootroot00000000000000/* * Copyright 2012 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include // For memset. #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // memset for temp is meant to clear the source buffer (not dest) so that // SIMD that reads full multiple of 16 bytes will not trigger msan errors. // memset is not needed for production, as the garbage values are processed but // not used, although there may be edge cases for subsampling. // The size of the buffer is based on the largest read, which can be inferred // by the source type (e.g. ARGB) and the mask (last parameter), or by examining // the source code for how much the source pointers are advanced. // Subsampled source needs to be increase by 1 of not even. #define SS(width, shift) (((width) + (1 << (shift)) - 1) >> (shift)) // Any 4 planes to 1 with yuvconstants #define ANY41C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 5]); \ memset(temp, 0, 64 * 4); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, a_buf, dst_ptr, yuvconstants, n); \ } \ memcpy(temp, y_buf + n, r); \ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 192, a_buf + n, r); \ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, temp + 256, \ yuvconstants, MASK + 1); \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 256, \ SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I422ALPHATOARGBROW_SSSE3 ANY41C(I422AlphaToARGBRow_Any_SSSE3, I422AlphaToARGBRow_SSSE3, 1, 0, 4, 7) #endif #ifdef HAS_I422ALPHATOARGBROW_AVX2 ANY41C(I422AlphaToARGBRow_Any_AVX2, I422AlphaToARGBRow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I422ALPHATOARGBROW_NEON ANY41C(I422AlphaToARGBRow_Any_NEON, I422AlphaToARGBRow_NEON, 1, 0, 4, 7) #endif #ifdef HAS_I422ALPHATOARGBROW_MSA ANY41C(I422AlphaToARGBRow_Any_MSA, I422AlphaToARGBRow_MSA, 1, 0, 4, 7) #endif #undef ANY41C // Any 3 planes to 1. #define ANY31(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ const uint8_t* v_buf, uint8_t* dst_ptr, int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 4]); \ memset(temp, 0, 64 * 3); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, n); \ } \ memcpy(temp, y_buf + n, r); \ memcpy(temp + 64, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 128, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ ANY_SIMD(temp, temp + 64, temp + 128, temp + 192, MASK + 1); \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 192, \ SS(r, DUVSHIFT) * BPP); \ } // Merge functions. #ifdef HAS_MERGERGBROW_SSSE3 ANY31(MergeRGBRow_Any_SSSE3, MergeRGBRow_SSSE3, 0, 0, 3, 15) #endif #ifdef HAS_MERGERGBROW_NEON ANY31(MergeRGBRow_Any_NEON, MergeRGBRow_NEON, 0, 0, 3, 15) #endif #ifdef HAS_I422TOYUY2ROW_SSE2 ANY31(I422ToYUY2Row_Any_SSE2, I422ToYUY2Row_SSE2, 1, 1, 4, 15) ANY31(I422ToUYVYRow_Any_SSE2, I422ToUYVYRow_SSE2, 1, 1, 4, 15) #endif #ifdef HAS_I422TOYUY2ROW_AVX2 ANY31(I422ToYUY2Row_Any_AVX2, I422ToYUY2Row_AVX2, 1, 1, 4, 31) ANY31(I422ToUYVYRow_Any_AVX2, I422ToUYVYRow_AVX2, 1, 1, 4, 31) #endif #ifdef HAS_I422TOYUY2ROW_NEON ANY31(I422ToYUY2Row_Any_NEON, I422ToYUY2Row_NEON, 1, 1, 4, 15) #endif #ifdef HAS_I422TOYUY2ROW_MSA ANY31(I422ToYUY2Row_Any_MSA, I422ToYUY2Row_MSA, 1, 1, 4, 31) #endif #ifdef HAS_I422TOUYVYROW_NEON ANY31(I422ToUYVYRow_Any_NEON, I422ToUYVYRow_NEON, 1, 1, 4, 15) #endif #ifdef HAS_I422TOUYVYROW_MSA ANY31(I422ToUYVYRow_Any_MSA, I422ToUYVYRow_MSA, 1, 1, 4, 31) #endif #ifdef HAS_BLENDPLANEROW_AVX2 ANY31(BlendPlaneRow_Any_AVX2, BlendPlaneRow_AVX2, 0, 0, 1, 31) #endif #ifdef HAS_BLENDPLANEROW_SSSE3 ANY31(BlendPlaneRow_Any_SSSE3, BlendPlaneRow_SSSE3, 0, 0, 1, 7) #endif #undef ANY31 // Note that odd width replication includes 444 due to implementation // on arm that subsamples 444 to 422 internally. // Any 3 planes to 1 with yuvconstants #define ANY31C(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* u_buf, \ const uint8_t* v_buf, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 4]); \ memset(temp, 0, 128 * 3); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ } \ memcpy(temp, y_buf + n, r); \ memcpy(temp + 128, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ memcpy(temp + 256, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT)); \ if (width & 1) { \ temp[128 + SS(r, UVSHIFT)] = temp[128 + SS(r, UVSHIFT) - 1]; \ temp[256 + SS(r, UVSHIFT)] = temp[256 + SS(r, UVSHIFT) - 1]; \ } \ ANY_SIMD(temp, temp + 128, temp + 256, temp + 384, yuvconstants, \ MASK + 1); \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, temp + 384, \ SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I422TOARGBROW_SSSE3 ANY31C(I422ToARGBRow_Any_SSSE3, I422ToARGBRow_SSSE3, 1, 0, 4, 7) #endif #ifdef HAS_I422TOAR30ROW_SSSE3 ANY31C(I422ToAR30Row_Any_SSSE3, I422ToAR30Row_SSSE3, 1, 0, 4, 7) #endif #ifdef HAS_I422TOAR30ROW_AVX2 ANY31C(I422ToAR30Row_Any_AVX2, I422ToAR30Row_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I444TOARGBROW_SSSE3 ANY31C(I444ToARGBRow_Any_SSSE3, I444ToARGBRow_SSSE3, 0, 0, 4, 7) ANY31C(I422ToRGBARow_Any_SSSE3, I422ToRGBARow_SSSE3, 1, 0, 4, 7) ANY31C(I422ToARGB4444Row_Any_SSSE3, I422ToARGB4444Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_SSSE3, I422ToARGB1555Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_SSSE3, I422ToRGB565Row_SSSE3, 1, 0, 2, 7) ANY31C(I422ToRGB24Row_Any_SSSE3, I422ToRGB24Row_SSSE3, 1, 0, 3, 15) #endif // HAS_I444TOARGBROW_SSSE3 #ifdef HAS_I422TORGB24ROW_AVX2 ANY31C(I422ToRGB24Row_Any_AVX2, I422ToRGB24Row_AVX2, 1, 0, 3, 31) #endif #ifdef HAS_I422TOARGBROW_AVX2 ANY31C(I422ToARGBRow_Any_AVX2, I422ToARGBRow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I422TORGBAROW_AVX2 ANY31C(I422ToRGBARow_Any_AVX2, I422ToRGBARow_AVX2, 1, 0, 4, 15) #endif #ifdef HAS_I444TOARGBROW_AVX2 ANY31C(I444ToARGBRow_Any_AVX2, I444ToARGBRow_AVX2, 0, 0, 4, 15) #endif #ifdef HAS_I422TOARGB4444ROW_AVX2 ANY31C(I422ToARGB4444Row_Any_AVX2, I422ToARGB4444Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TOARGB1555ROW_AVX2 ANY31C(I422ToARGB1555Row_Any_AVX2, I422ToARGB1555Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TORGB565ROW_AVX2 ANY31C(I422ToRGB565Row_Any_AVX2, I422ToRGB565Row_AVX2, 1, 0, 2, 15) #endif #ifdef HAS_I422TOARGBROW_NEON ANY31C(I444ToARGBRow_Any_NEON, I444ToARGBRow_NEON, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_NEON, I422ToARGBRow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_NEON, I422ToRGBARow_NEON, 1, 0, 4, 7) ANY31C(I422ToRGB24Row_Any_NEON, I422ToRGB24Row_NEON, 1, 0, 3, 7) ANY31C(I422ToARGB4444Row_Any_NEON, I422ToARGB4444Row_NEON, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_NEON, I422ToARGB1555Row_NEON, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_NEON, I422ToRGB565Row_NEON, 1, 0, 2, 7) #endif #ifdef HAS_I422TOARGBROW_MSA ANY31C(I444ToARGBRow_Any_MSA, I444ToARGBRow_MSA, 0, 0, 4, 7) ANY31C(I422ToARGBRow_Any_MSA, I422ToARGBRow_MSA, 1, 0, 4, 7) ANY31C(I422ToRGBARow_Any_MSA, I422ToRGBARow_MSA, 1, 0, 4, 7) ANY31C(I422ToRGB24Row_Any_MSA, I422ToRGB24Row_MSA, 1, 0, 3, 15) ANY31C(I422ToARGB4444Row_Any_MSA, I422ToARGB4444Row_MSA, 1, 0, 2, 7) ANY31C(I422ToARGB1555Row_Any_MSA, I422ToARGB1555Row_MSA, 1, 0, 2, 7) ANY31C(I422ToRGB565Row_Any_MSA, I422ToRGB565Row_MSA, 1, 0, 2, 7) #endif #undef ANY31C // Any 3 planes of 16 bit to 1 with yuvconstants // TODO(fbarchard): consider sharing this code with ANY31C #define ANY31CT(NAMEANY, ANY_SIMD, UVSHIFT, DUVSHIFT, T, SBPP, BPP, MASK) \ void NAMEANY(const T* y_buf, const T* u_buf, const T* v_buf, \ uint8_t* dst_ptr, const struct YuvConstants* yuvconstants, \ int width) { \ SIMD_ALIGNED(T temp[16 * 3]); \ SIMD_ALIGNED(uint8_t out[64]); \ memset(temp, 0, 16 * 3 * SBPP); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, u_buf, v_buf, dst_ptr, yuvconstants, n); \ } \ memcpy(temp, y_buf + n, r * SBPP); \ memcpy(temp + 16, u_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ memcpy(temp + 32, v_buf + (n >> UVSHIFT), SS(r, UVSHIFT) * SBPP); \ ANY_SIMD(temp, temp + 16, temp + 32, out, yuvconstants, MASK + 1); \ memcpy(dst_ptr + (n >> DUVSHIFT) * BPP, out, SS(r, DUVSHIFT) * BPP); \ } #ifdef HAS_I210TOAR30ROW_SSSE3 ANY31CT(I210ToAR30Row_Any_SSSE3, I210ToAR30Row_SSSE3, 1, 0, uint16_t, 2, 4, 7) #endif #ifdef HAS_I210TOARGBROW_SSSE3 ANY31CT(I210ToARGBRow_Any_SSSE3, I210ToARGBRow_SSSE3, 1, 0, uint16_t, 2, 4, 7) #endif #ifdef HAS_I210TOARGBROW_AVX2 ANY31CT(I210ToARGBRow_Any_AVX2, I210ToARGBRow_AVX2, 1, 0, uint16_t, 2, 4, 15) #endif #ifdef HAS_I210TOAR30ROW_AVX2 ANY31CT(I210ToAR30Row_Any_AVX2, I210ToAR30Row_AVX2, 1, 0, uint16_t, 2, 4, 15) #endif #undef ANY31CT // Any 2 planes to 1. #define ANY21(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 3]); \ memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, uv_buf, dst_ptr, n); \ } \ memcpy(temp, y_buf + n * SBPP, r * SBPP); \ memcpy(temp + 64, uv_buf + (n >> UVSHIFT) * SBPP2, \ SS(r, UVSHIFT) * SBPP2); \ ANY_SIMD(temp, temp + 64, temp + 128, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } // Merge functions. #ifdef HAS_MERGEUVROW_SSE2 ANY21(MergeUVRow_Any_SSE2, MergeUVRow_SSE2, 0, 1, 1, 2, 15) #endif #ifdef HAS_MERGEUVROW_AVX2 ANY21(MergeUVRow_Any_AVX2, MergeUVRow_AVX2, 0, 1, 1, 2, 31) #endif #ifdef HAS_MERGEUVROW_NEON ANY21(MergeUVRow_Any_NEON, MergeUVRow_NEON, 0, 1, 1, 2, 15) #endif #ifdef HAS_MERGEUVROW_MSA ANY21(MergeUVRow_Any_MSA, MergeUVRow_MSA, 0, 1, 1, 2, 15) #endif // Math functions. #ifdef HAS_ARGBMULTIPLYROW_SSE2 ANY21(ARGBMultiplyRow_Any_SSE2, ARGBMultiplyRow_SSE2, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBADDROW_SSE2 ANY21(ARGBAddRow_Any_SSE2, ARGBAddRow_SSE2, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBSUBTRACTROW_SSE2 ANY21(ARGBSubtractRow_Any_SSE2, ARGBSubtractRow_SSE2, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBMULTIPLYROW_AVX2 ANY21(ARGBMultiplyRow_Any_AVX2, ARGBMultiplyRow_AVX2, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBADDROW_AVX2 ANY21(ARGBAddRow_Any_AVX2, ARGBAddRow_AVX2, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBSUBTRACTROW_AVX2 ANY21(ARGBSubtractRow_Any_AVX2, ARGBSubtractRow_AVX2, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBMULTIPLYROW_NEON ANY21(ARGBMultiplyRow_Any_NEON, ARGBMultiplyRow_NEON, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBADDROW_NEON ANY21(ARGBAddRow_Any_NEON, ARGBAddRow_NEON, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBSUBTRACTROW_NEON ANY21(ARGBSubtractRow_Any_NEON, ARGBSubtractRow_NEON, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBMULTIPLYROW_MSA ANY21(ARGBMultiplyRow_Any_MSA, ARGBMultiplyRow_MSA, 0, 4, 4, 4, 3) #endif #ifdef HAS_ARGBADDROW_MSA ANY21(ARGBAddRow_Any_MSA, ARGBAddRow_MSA, 0, 4, 4, 4, 7) #endif #ifdef HAS_ARGBSUBTRACTROW_MSA ANY21(ARGBSubtractRow_Any_MSA, ARGBSubtractRow_MSA, 0, 4, 4, 4, 7) #endif #ifdef HAS_SOBELROW_SSE2 ANY21(SobelRow_Any_SSE2, SobelRow_SSE2, 0, 1, 1, 4, 15) #endif #ifdef HAS_SOBELROW_NEON ANY21(SobelRow_Any_NEON, SobelRow_NEON, 0, 1, 1, 4, 7) #endif #ifdef HAS_SOBELROW_MSA ANY21(SobelRow_Any_MSA, SobelRow_MSA, 0, 1, 1, 4, 15) #endif #ifdef HAS_SOBELTOPLANEROW_SSE2 ANY21(SobelToPlaneRow_Any_SSE2, SobelToPlaneRow_SSE2, 0, 1, 1, 1, 15) #endif #ifdef HAS_SOBELTOPLANEROW_NEON ANY21(SobelToPlaneRow_Any_NEON, SobelToPlaneRow_NEON, 0, 1, 1, 1, 15) #endif #ifdef HAS_SOBELTOPLANEROW_MSA ANY21(SobelToPlaneRow_Any_MSA, SobelToPlaneRow_MSA, 0, 1, 1, 1, 31) #endif #ifdef HAS_SOBELXYROW_SSE2 ANY21(SobelXYRow_Any_SSE2, SobelXYRow_SSE2, 0, 1, 1, 4, 15) #endif #ifdef HAS_SOBELXYROW_NEON ANY21(SobelXYRow_Any_NEON, SobelXYRow_NEON, 0, 1, 1, 4, 7) #endif #ifdef HAS_SOBELXYROW_MSA ANY21(SobelXYRow_Any_MSA, SobelXYRow_MSA, 0, 1, 1, 4, 15) #endif #undef ANY21 // Any 2 planes to 1 with yuvconstants #define ANY21C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, SBPP2, BPP, MASK) \ void NAMEANY(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 3]); \ memset(temp, 0, 128 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(y_buf, uv_buf, dst_ptr, yuvconstants, n); \ } \ memcpy(temp, y_buf + n * SBPP, r * SBPP); \ memcpy(temp + 128, uv_buf + (n >> UVSHIFT) * SBPP2, \ SS(r, UVSHIFT) * SBPP2); \ ANY_SIMD(temp, temp + 128, temp + 256, yuvconstants, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 256, r * BPP); \ } // Biplanar to RGB. #ifdef HAS_NV12TOARGBROW_SSSE3 ANY21C(NV12ToARGBRow_Any_SSSE3, NV12ToARGBRow_SSSE3, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV12TOARGBROW_AVX2 ANY21C(NV12ToARGBRow_Any_AVX2, NV12ToARGBRow_AVX2, 1, 1, 2, 4, 15) #endif #ifdef HAS_NV12TOARGBROW_NEON ANY21C(NV12ToARGBRow_Any_NEON, NV12ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV12TOARGBROW_MSA ANY21C(NV12ToARGBRow_Any_MSA, NV12ToARGBRow_MSA, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV21TOARGBROW_SSSE3 ANY21C(NV21ToARGBRow_Any_SSSE3, NV21ToARGBRow_SSSE3, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV21TOARGBROW_AVX2 ANY21C(NV21ToARGBRow_Any_AVX2, NV21ToARGBRow_AVX2, 1, 1, 2, 4, 15) #endif #ifdef HAS_NV21TOARGBROW_NEON ANY21C(NV21ToARGBRow_Any_NEON, NV21ToARGBRow_NEON, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV21TOARGBROW_MSA ANY21C(NV21ToARGBRow_Any_MSA, NV21ToARGBRow_MSA, 1, 1, 2, 4, 7) #endif #ifdef HAS_NV12TORGB24ROW_NEON ANY21C(NV12ToRGB24Row_Any_NEON, NV12ToRGB24Row_NEON, 1, 1, 2, 3, 7) #endif #ifdef HAS_NV21TORGB24ROW_NEON ANY21C(NV21ToRGB24Row_Any_NEON, NV21ToRGB24Row_NEON, 1, 1, 2, 3, 7) #endif #ifdef HAS_NV12TORGB24ROW_SSSE3 ANY21C(NV12ToRGB24Row_Any_SSSE3, NV12ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) #endif #ifdef HAS_NV21TORGB24ROW_SSSE3 ANY21C(NV21ToRGB24Row_Any_SSSE3, NV21ToRGB24Row_SSSE3, 1, 1, 2, 3, 15) #endif #ifdef HAS_NV12TORGB24ROW_AVX2 ANY21C(NV12ToRGB24Row_Any_AVX2, NV12ToRGB24Row_AVX2, 1, 1, 2, 3, 31) #endif #ifdef HAS_NV21TORGB24ROW_AVX2 ANY21C(NV21ToRGB24Row_Any_AVX2, NV21ToRGB24Row_AVX2, 1, 1, 2, 3, 31) #endif #ifdef HAS_NV12TORGB565ROW_SSSE3 ANY21C(NV12ToRGB565Row_Any_SSSE3, NV12ToRGB565Row_SSSE3, 1, 1, 2, 2, 7) #endif #ifdef HAS_NV12TORGB565ROW_AVX2 ANY21C(NV12ToRGB565Row_Any_AVX2, NV12ToRGB565Row_AVX2, 1, 1, 2, 2, 15) #endif #ifdef HAS_NV12TORGB565ROW_NEON ANY21C(NV12ToRGB565Row_Any_NEON, NV12ToRGB565Row_NEON, 1, 1, 2, 2, 7) #endif #ifdef HAS_NV12TORGB565ROW_MSA ANY21C(NV12ToRGB565Row_Any_MSA, NV12ToRGB565Row_MSA, 1, 1, 2, 2, 7) #endif #undef ANY21C // Any 1 to 1. #define ANY11(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 2]); \ memset(temp, 0, 128); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ ANY_SIMD(temp, temp + 128, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #ifdef HAS_COPYROW_AVX ANY11(CopyRow_Any_AVX, CopyRow_AVX, 0, 1, 1, 63) #endif #ifdef HAS_COPYROW_SSE2 ANY11(CopyRow_Any_SSE2, CopyRow_SSE2, 0, 1, 1, 31) #endif #ifdef HAS_COPYROW_NEON ANY11(CopyRow_Any_NEON, CopyRow_NEON, 0, 1, 1, 31) #endif #if defined(HAS_ARGBTORGB24ROW_SSSE3) ANY11(ARGBToRGB24Row_Any_SSSE3, ARGBToRGB24Row_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_SSSE3, ARGBToRAWRow_SSSE3, 0, 4, 3, 15) ANY11(ARGBToRGB565Row_Any_SSE2, ARGBToRGB565Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB1555Row_Any_SSE2, ARGBToARGB1555Row_SSE2, 0, 4, 2, 3) ANY11(ARGBToARGB4444Row_Any_SSE2, ARGBToARGB4444Row_SSE2, 0, 4, 2, 3) #endif #if defined(HAS_ARGBTORGB24ROW_AVX2) ANY11(ARGBToRGB24Row_Any_AVX2, ARGBToRGB24Row_AVX2, 0, 4, 3, 31) #endif #if defined(HAS_ARGBTORGB24ROW_AVX512VBMI) ANY11(ARGBToRGB24Row_Any_AVX512VBMI, ARGBToRGB24Row_AVX512VBMI, 0, 4, 3, 31) #endif #if defined(HAS_ARGBTORAWROW_AVX2) ANY11(ARGBToRAWRow_Any_AVX2, ARGBToRAWRow_AVX2, 0, 4, 3, 31) #endif #if defined(HAS_ARGBTORGB565ROW_AVX2) ANY11(ARGBToRGB565Row_Any_AVX2, ARGBToRGB565Row_AVX2, 0, 4, 2, 7) #endif #if defined(HAS_ARGBTOARGB4444ROW_AVX2) ANY11(ARGBToARGB1555Row_Any_AVX2, ARGBToARGB1555Row_AVX2, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_AVX2, ARGBToARGB4444Row_AVX2, 0, 4, 2, 7) #endif #if defined(HAS_ABGRTOAR30ROW_SSSE3) ANY11(ABGRToAR30Row_Any_SSSE3, ABGRToAR30Row_SSSE3, 0, 4, 4, 3) #endif #if defined(HAS_ARGBTOAR30ROW_SSSE3) ANY11(ARGBToAR30Row_Any_SSSE3, ARGBToAR30Row_SSSE3, 0, 4, 4, 3) #endif #if defined(HAS_ABGRTOAR30ROW_AVX2) ANY11(ABGRToAR30Row_Any_AVX2, ABGRToAR30Row_AVX2, 0, 4, 4, 7) #endif #if defined(HAS_ARGBTOAR30ROW_AVX2) ANY11(ARGBToAR30Row_Any_AVX2, ARGBToAR30Row_AVX2, 0, 4, 4, 7) #endif #if defined(HAS_J400TOARGBROW_SSE2) ANY11(J400ToARGBRow_Any_SSE2, J400ToARGBRow_SSE2, 0, 1, 4, 7) #endif #if defined(HAS_J400TOARGBROW_AVX2) ANY11(J400ToARGBRow_Any_AVX2, J400ToARGBRow_AVX2, 0, 1, 4, 15) #endif #if defined(HAS_I400TOARGBROW_SSE2) ANY11(I400ToARGBRow_Any_SSE2, I400ToARGBRow_SSE2, 0, 1, 4, 7) #endif #if defined(HAS_I400TOARGBROW_AVX2) ANY11(I400ToARGBRow_Any_AVX2, I400ToARGBRow_AVX2, 0, 1, 4, 15) #endif #if defined(HAS_RGB24TOARGBROW_SSSE3) ANY11(RGB24ToARGBRow_Any_SSSE3, RGB24ToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RAWToARGBRow_Any_SSSE3, RAWToARGBRow_SSSE3, 0, 3, 4, 15) ANY11(RGB565ToARGBRow_Any_SSE2, RGB565ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB1555ToARGBRow_Any_SSE2, ARGB1555ToARGBRow_SSE2, 0, 2, 4, 7) ANY11(ARGB4444ToARGBRow_Any_SSE2, ARGB4444ToARGBRow_SSE2, 0, 2, 4, 7) #endif #if defined(HAS_RAWTORGB24ROW_SSSE3) ANY11(RAWToRGB24Row_Any_SSSE3, RAWToRGB24Row_SSSE3, 0, 3, 3, 7) #endif #if defined(HAS_RGB565TOARGBROW_AVX2) ANY11(RGB565ToARGBRow_Any_AVX2, RGB565ToARGBRow_AVX2, 0, 2, 4, 15) #endif #if defined(HAS_ARGB1555TOARGBROW_AVX2) ANY11(ARGB1555ToARGBRow_Any_AVX2, ARGB1555ToARGBRow_AVX2, 0, 2, 4, 15) #endif #if defined(HAS_ARGB4444TOARGBROW_AVX2) ANY11(ARGB4444ToARGBRow_Any_AVX2, ARGB4444ToARGBRow_AVX2, 0, 2, 4, 15) #endif #if defined(HAS_ARGBTORGB24ROW_NEON) ANY11(ARGBToRGB24Row_Any_NEON, ARGBToRGB24Row_NEON, 0, 4, 3, 7) ANY11(ARGBToRAWRow_Any_NEON, ARGBToRAWRow_NEON, 0, 4, 3, 7) ANY11(ARGBToRGB565Row_Any_NEON, ARGBToRGB565Row_NEON, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_NEON, ARGBToARGB1555Row_NEON, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_NEON, ARGBToARGB4444Row_NEON, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_NEON, J400ToARGBRow_NEON, 0, 1, 4, 7) ANY11(I400ToARGBRow_Any_NEON, I400ToARGBRow_NEON, 0, 1, 4, 7) #endif #if defined(HAS_ARGBTORGB24ROW_MSA) ANY11(ARGBToRGB24Row_Any_MSA, ARGBToRGB24Row_MSA, 0, 4, 3, 15) ANY11(ARGBToRAWRow_Any_MSA, ARGBToRAWRow_MSA, 0, 4, 3, 15) ANY11(ARGBToRGB565Row_Any_MSA, ARGBToRGB565Row_MSA, 0, 4, 2, 7) ANY11(ARGBToARGB1555Row_Any_MSA, ARGBToARGB1555Row_MSA, 0, 4, 2, 7) ANY11(ARGBToARGB4444Row_Any_MSA, ARGBToARGB4444Row_MSA, 0, 4, 2, 7) ANY11(J400ToARGBRow_Any_MSA, J400ToARGBRow_MSA, 0, 1, 4, 15) ANY11(I400ToARGBRow_Any_MSA, I400ToARGBRow_MSA, 0, 1, 4, 15) #endif #if defined(HAS_RAWTORGB24ROW_NEON) ANY11(RAWToRGB24Row_Any_NEON, RAWToRGB24Row_NEON, 0, 3, 3, 7) #endif #if defined(HAS_RAWTORGB24ROW_MSA) ANY11(RAWToRGB24Row_Any_MSA, RAWToRGB24Row_MSA, 0, 3, 3, 15) #endif #ifdef HAS_ARGBTOYROW_AVX2 ANY11(ARGBToYRow_Any_AVX2, ARGBToYRow_AVX2, 0, 4, 1, 31) #endif #ifdef HAS_ARGBTOYJROW_AVX2 ANY11(ARGBToYJRow_Any_AVX2, ARGBToYJRow_AVX2, 0, 4, 1, 31) #endif #ifdef HAS_UYVYTOYROW_AVX2 ANY11(UYVYToYRow_Any_AVX2, UYVYToYRow_AVX2, 0, 2, 1, 31) #endif #ifdef HAS_YUY2TOYROW_AVX2 ANY11(YUY2ToYRow_Any_AVX2, YUY2ToYRow_AVX2, 1, 4, 1, 31) #endif #ifdef HAS_ARGBTOYROW_SSSE3 ANY11(ARGBToYRow_Any_SSSE3, ARGBToYRow_SSSE3, 0, 4, 1, 15) #endif #ifdef HAS_BGRATOYROW_SSSE3 ANY11(BGRAToYRow_Any_SSSE3, BGRAToYRow_SSSE3, 0, 4, 1, 15) ANY11(ABGRToYRow_Any_SSSE3, ABGRToYRow_SSSE3, 0, 4, 1, 15) ANY11(RGBAToYRow_Any_SSSE3, RGBAToYRow_SSSE3, 0, 4, 1, 15) ANY11(YUY2ToYRow_Any_SSE2, YUY2ToYRow_SSE2, 1, 4, 1, 15) ANY11(UYVYToYRow_Any_SSE2, UYVYToYRow_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_ARGBTOYJROW_SSSE3 ANY11(ARGBToYJRow_Any_SSSE3, ARGBToYJRow_SSSE3, 0, 4, 1, 15) #endif #ifdef HAS_ARGBTOYROW_NEON ANY11(ARGBToYRow_Any_NEON, ARGBToYRow_NEON, 0, 4, 1, 7) #endif #ifdef HAS_ARGBTOYROW_MSA ANY11(ARGBToYRow_Any_MSA, ARGBToYRow_MSA, 0, 4, 1, 15) #endif #ifdef HAS_ARGBTOYJROW_NEON ANY11(ARGBToYJRow_Any_NEON, ARGBToYJRow_NEON, 0, 4, 1, 7) #endif #ifdef HAS_ARGBTOYJROW_MSA ANY11(ARGBToYJRow_Any_MSA, ARGBToYJRow_MSA, 0, 4, 1, 15) #endif #ifdef HAS_BGRATOYROW_NEON ANY11(BGRAToYRow_Any_NEON, BGRAToYRow_NEON, 0, 4, 1, 7) #endif #ifdef HAS_BGRATOYROW_MSA ANY11(BGRAToYRow_Any_MSA, BGRAToYRow_MSA, 0, 4, 1, 15) #endif #ifdef HAS_ABGRTOYROW_NEON ANY11(ABGRToYRow_Any_NEON, ABGRToYRow_NEON, 0, 4, 1, 7) #endif #ifdef HAS_ABGRTOYROW_MSA ANY11(ABGRToYRow_Any_MSA, ABGRToYRow_MSA, 0, 4, 1, 7) #endif #ifdef HAS_RGBATOYROW_NEON ANY11(RGBAToYRow_Any_NEON, RGBAToYRow_NEON, 0, 4, 1, 7) #endif #ifdef HAS_RGBATOYROW_MSA ANY11(RGBAToYRow_Any_MSA, RGBAToYRow_MSA, 0, 4, 1, 15) #endif #ifdef HAS_RGB24TOYROW_NEON ANY11(RGB24ToYRow_Any_NEON, RGB24ToYRow_NEON, 0, 3, 1, 7) #endif #ifdef HAS_RGB24TOYROW_MSA ANY11(RGB24ToYRow_Any_MSA, RGB24ToYRow_MSA, 0, 3, 1, 15) #endif #ifdef HAS_RAWTOYROW_NEON ANY11(RAWToYRow_Any_NEON, RAWToYRow_NEON, 0, 3, 1, 7) #endif #ifdef HAS_RAWTOYROW_MSA ANY11(RAWToYRow_Any_MSA, RAWToYRow_MSA, 0, 3, 1, 15) #endif #ifdef HAS_RGB565TOYROW_NEON ANY11(RGB565ToYRow_Any_NEON, RGB565ToYRow_NEON, 0, 2, 1, 7) #endif #ifdef HAS_RGB565TOYROW_MSA ANY11(RGB565ToYRow_Any_MSA, RGB565ToYRow_MSA, 0, 2, 1, 15) #endif #ifdef HAS_ARGB1555TOYROW_NEON ANY11(ARGB1555ToYRow_Any_NEON, ARGB1555ToYRow_NEON, 0, 2, 1, 7) #endif #ifdef HAS_ARGB1555TOYROW_MSA ANY11(ARGB1555ToYRow_Any_MSA, ARGB1555ToYRow_MSA, 0, 2, 1, 15) #endif #ifdef HAS_ARGB4444TOYROW_NEON ANY11(ARGB4444ToYRow_Any_NEON, ARGB4444ToYRow_NEON, 0, 2, 1, 7) #endif #ifdef HAS_YUY2TOYROW_NEON ANY11(YUY2ToYRow_Any_NEON, YUY2ToYRow_NEON, 1, 4, 1, 15) #endif #ifdef HAS_UYVYTOYROW_NEON ANY11(UYVYToYRow_Any_NEON, UYVYToYRow_NEON, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOYROW_MSA ANY11(YUY2ToYRow_Any_MSA, YUY2ToYRow_MSA, 1, 4, 1, 31) #endif #ifdef HAS_UYVYTOYROW_MSA ANY11(UYVYToYRow_Any_MSA, UYVYToYRow_MSA, 1, 4, 1, 31) #endif #ifdef HAS_RGB24TOARGBROW_NEON ANY11(RGB24ToARGBRow_Any_NEON, RGB24ToARGBRow_NEON, 0, 3, 4, 7) #endif #ifdef HAS_RGB24TOARGBROW_MSA ANY11(RGB24ToARGBRow_Any_MSA, RGB24ToARGBRow_MSA, 0, 3, 4, 15) #endif #ifdef HAS_RAWTOARGBROW_NEON ANY11(RAWToARGBRow_Any_NEON, RAWToARGBRow_NEON, 0, 3, 4, 7) #endif #ifdef HAS_RAWTOARGBROW_MSA ANY11(RAWToARGBRow_Any_MSA, RAWToARGBRow_MSA, 0, 3, 4, 15) #endif #ifdef HAS_RGB565TOARGBROW_NEON ANY11(RGB565ToARGBRow_Any_NEON, RGB565ToARGBRow_NEON, 0, 2, 4, 7) #endif #ifdef HAS_RGB565TOARGBROW_MSA ANY11(RGB565ToARGBRow_Any_MSA, RGB565ToARGBRow_MSA, 0, 2, 4, 15) #endif #ifdef HAS_ARGB1555TOARGBROW_NEON ANY11(ARGB1555ToARGBRow_Any_NEON, ARGB1555ToARGBRow_NEON, 0, 2, 4, 7) #endif #ifdef HAS_ARGB1555TOARGBROW_MSA ANY11(ARGB1555ToARGBRow_Any_MSA, ARGB1555ToARGBRow_MSA, 0, 2, 4, 15) #endif #ifdef HAS_ARGB4444TOARGBROW_NEON ANY11(ARGB4444ToARGBRow_Any_NEON, ARGB4444ToARGBRow_NEON, 0, 2, 4, 7) #endif #ifdef HAS_ARGB4444TOARGBROW_MSA ANY11(ARGB4444ToARGBRow_Any_MSA, ARGB4444ToARGBRow_MSA, 0, 2, 4, 15) #endif #ifdef HAS_ARGBATTENUATEROW_SSSE3 ANY11(ARGBAttenuateRow_Any_SSSE3, ARGBAttenuateRow_SSSE3, 0, 4, 4, 3) #endif #ifdef HAS_ARGBUNATTENUATEROW_SSE2 ANY11(ARGBUnattenuateRow_Any_SSE2, ARGBUnattenuateRow_SSE2, 0, 4, 4, 3) #endif #ifdef HAS_ARGBATTENUATEROW_AVX2 ANY11(ARGBAttenuateRow_Any_AVX2, ARGBAttenuateRow_AVX2, 0, 4, 4, 7) #endif #ifdef HAS_ARGBUNATTENUATEROW_AVX2 ANY11(ARGBUnattenuateRow_Any_AVX2, ARGBUnattenuateRow_AVX2, 0, 4, 4, 7) #endif #ifdef HAS_ARGBATTENUATEROW_NEON ANY11(ARGBAttenuateRow_Any_NEON, ARGBAttenuateRow_NEON, 0, 4, 4, 7) #endif #ifdef HAS_ARGBATTENUATEROW_MSA ANY11(ARGBAttenuateRow_Any_MSA, ARGBAttenuateRow_MSA, 0, 4, 4, 7) #endif #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 ANY11(ARGBExtractAlphaRow_Any_SSE2, ARGBExtractAlphaRow_SSE2, 0, 4, 1, 7) #endif #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 ANY11(ARGBExtractAlphaRow_Any_AVX2, ARGBExtractAlphaRow_AVX2, 0, 4, 1, 31) #endif #ifdef HAS_ARGBEXTRACTALPHAROW_NEON ANY11(ARGBExtractAlphaRow_Any_NEON, ARGBExtractAlphaRow_NEON, 0, 4, 1, 15) #endif #ifdef HAS_ARGBEXTRACTALPHAROW_MSA ANY11(ARGBExtractAlphaRow_Any_MSA, ARGBExtractAlphaRow_MSA, 0, 4, 1, 15) #endif #undef ANY11 // Any 1 to 1 blended. Destination is read, modify, write. #define ANY11B(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \ memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ memcpy(temp + 64, dst_ptr + n * BPP, r * BPP); \ ANY_SIMD(temp, temp + 64, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ } #ifdef HAS_ARGBCOPYALPHAROW_AVX2 ANY11B(ARGBCopyAlphaRow_Any_AVX2, ARGBCopyAlphaRow_AVX2, 0, 4, 4, 15) #endif #ifdef HAS_ARGBCOPYALPHAROW_SSE2 ANY11B(ARGBCopyAlphaRow_Any_SSE2, ARGBCopyAlphaRow_SSE2, 0, 4, 4, 7) #endif #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 ANY11B(ARGBCopyYToAlphaRow_Any_AVX2, ARGBCopyYToAlphaRow_AVX2, 0, 1, 4, 15) #endif #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 ANY11B(ARGBCopyYToAlphaRow_Any_SSE2, ARGBCopyYToAlphaRow_SSE2, 0, 1, 4, 7) #endif #undef ANY11B // Any 1 to 1 with parameter. #define ANY11P(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, T param, int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \ memset(temp, 0, 64); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, param, n); \ } \ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ ANY_SIMD(temp, temp + 64, param, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 64, r * BPP); \ } #if defined(HAS_ARGBTORGB565DITHERROW_SSE2) ANY11P(ARGBToRGB565DitherRow_Any_SSE2, ARGBToRGB565DitherRow_SSE2, const uint32_t, 4, 2, 3) #endif #if defined(HAS_ARGBTORGB565DITHERROW_AVX2) ANY11P(ARGBToRGB565DitherRow_Any_AVX2, ARGBToRGB565DitherRow_AVX2, const uint32_t, 4, 2, 7) #endif #if defined(HAS_ARGBTORGB565DITHERROW_NEON) ANY11P(ARGBToRGB565DitherRow_Any_NEON, ARGBToRGB565DitherRow_NEON, const uint32_t, 4, 2, 7) #endif #if defined(HAS_ARGBTORGB565DITHERROW_MSA) ANY11P(ARGBToRGB565DitherRow_Any_MSA, ARGBToRGB565DitherRow_MSA, const uint32_t, 4, 2, 7) #endif #ifdef HAS_ARGBSHUFFLEROW_SSSE3 ANY11P(ARGBShuffleRow_Any_SSSE3, ARGBShuffleRow_SSSE3, const uint8_t*, 4, 4, 7) #endif #ifdef HAS_ARGBSHUFFLEROW_AVX2 ANY11P(ARGBShuffleRow_Any_AVX2, ARGBShuffleRow_AVX2, const uint8_t*, 4, 4, 15) #endif #ifdef HAS_ARGBSHUFFLEROW_NEON ANY11P(ARGBShuffleRow_Any_NEON, ARGBShuffleRow_NEON, const uint8_t*, 4, 4, 3) #endif #ifdef HAS_ARGBSHUFFLEROW_MSA ANY11P(ARGBShuffleRow_Any_MSA, ARGBShuffleRow_MSA, const uint8_t*, 4, 4, 7) #endif #undef ANY11P // Any 1 to 1 with parameter and shorts. BPP measures in shorts. #define ANY11C(NAMEANY, ANY_SIMD, SBPP, BPP, STYPE, DTYPE, MASK) \ void NAMEANY(const STYPE* src_ptr, DTYPE* dst_ptr, int scale, int width) { \ SIMD_ALIGNED(STYPE temp[32]); \ SIMD_ALIGNED(DTYPE out[32]); \ memset(temp, 0, 32 * SBPP); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, scale, n); \ } \ memcpy(temp, src_ptr + n, r * SBPP); \ ANY_SIMD(temp, out, scale, MASK + 1); \ memcpy(dst_ptr + n, out, r * BPP); \ } #ifdef HAS_CONVERT16TO8ROW_SSSE3 ANY11C(Convert16To8Row_Any_SSSE3, Convert16To8Row_SSSE3, 2, 1, uint16_t, uint8_t, 15) #endif #ifdef HAS_CONVERT16TO8ROW_AVX2 ANY11C(Convert16To8Row_Any_AVX2, Convert16To8Row_AVX2, 2, 1, uint16_t, uint8_t, 31) #endif #ifdef HAS_CONVERT8TO16ROW_SSE2 ANY11C(Convert8To16Row_Any_SSE2, Convert8To16Row_SSE2, 1, 2, uint8_t, uint16_t, 15) #endif #ifdef HAS_CONVERT8TO16ROW_AVX2 ANY11C(Convert8To16Row_Any_AVX2, Convert8To16Row_AVX2, 1, 2, uint8_t, uint16_t, 31) #endif #undef ANY11C // Any 1 to 1 with parameter and shorts to byte. BPP measures in shorts. #define ANY11P16(NAMEANY, ANY_SIMD, ST, T, SBPP, BPP, MASK) \ void NAMEANY(const ST* src_ptr, T* dst_ptr, float param, int width) { \ SIMD_ALIGNED(ST temp[32]); \ SIMD_ALIGNED(T out[32]); \ memset(temp, 0, SBPP * 32); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, param, n); \ } \ memcpy(temp, src_ptr + n, r * SBPP); \ ANY_SIMD(temp, out, param, MASK + 1); \ memcpy(dst_ptr + n, out, r * BPP); \ } #ifdef HAS_HALFFLOATROW_SSE2 ANY11P16(HalfFloatRow_Any_SSE2, HalfFloatRow_SSE2, uint16_t, uint16_t, 2, 2, 7) #endif #ifdef HAS_HALFFLOATROW_AVX2 ANY11P16(HalfFloatRow_Any_AVX2, HalfFloatRow_AVX2, uint16_t, uint16_t, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_F16C ANY11P16(HalfFloatRow_Any_F16C, HalfFloatRow_F16C, uint16_t, uint16_t, 2, 2, 15) ANY11P16(HalfFloat1Row_Any_F16C, HalfFloat1Row_F16C, uint16_t, uint16_t, 2, 2, 15) #endif #ifdef HAS_HALFFLOATROW_NEON ANY11P16(HalfFloatRow_Any_NEON, HalfFloatRow_NEON, uint16_t, uint16_t, 2, 2, 7) ANY11P16(HalfFloat1Row_Any_NEON, HalfFloat1Row_NEON, uint16_t, uint16_t, 2, 2, 7) #endif #ifdef HAS_HALFFLOATROW_MSA ANY11P16(HalfFloatRow_Any_MSA, HalfFloatRow_MSA, uint16_t, uint16_t, 2, 2, 31) #endif #ifdef HAS_BYTETOFLOATROW_NEON ANY11P16(ByteToFloatRow_Any_NEON, ByteToFloatRow_NEON, uint8_t, float, 1, 3, 7) #endif #undef ANY11P16 // Any 1 to 1 with yuvconstants #define ANY11C(NAMEANY, ANY_SIMD, UVSHIFT, SBPP, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, \ const struct YuvConstants* yuvconstants, int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 2]); \ memset(temp, 0, 128); /* for YUY2 and msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_ptr, yuvconstants, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * SBPP, SS(r, UVSHIFT) * SBPP); \ ANY_SIMD(temp, temp + 128, yuvconstants, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #if defined(HAS_YUY2TOARGBROW_SSSE3) ANY11C(YUY2ToARGBRow_Any_SSSE3, YUY2ToARGBRow_SSSE3, 1, 4, 4, 15) ANY11C(UYVYToARGBRow_Any_SSSE3, UYVYToARGBRow_SSSE3, 1, 4, 4, 15) #endif #if defined(HAS_YUY2TOARGBROW_AVX2) ANY11C(YUY2ToARGBRow_Any_AVX2, YUY2ToARGBRow_AVX2, 1, 4, 4, 31) ANY11C(UYVYToARGBRow_Any_AVX2, UYVYToARGBRow_AVX2, 1, 4, 4, 31) #endif #if defined(HAS_YUY2TOARGBROW_NEON) ANY11C(YUY2ToARGBRow_Any_NEON, YUY2ToARGBRow_NEON, 1, 4, 4, 7) ANY11C(UYVYToARGBRow_Any_NEON, UYVYToARGBRow_NEON, 1, 4, 4, 7) #endif #if defined(HAS_YUY2TOARGBROW_MSA) ANY11C(YUY2ToARGBRow_Any_MSA, YUY2ToARGBRow_MSA, 1, 4, 4, 7) ANY11C(UYVYToARGBRow_Any_MSA, UYVYToARGBRow_MSA, 1, 4, 4, 7) #endif #undef ANY11C // Any 1 to 1 interpolate. Takes 2 rows of source via stride. #define ANY11T(NAMEANY, ANY_SIMD, SBPP, BPP, MASK) \ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, \ ptrdiff_t src_stride_ptr, int width, int source_y_fraction) { \ SIMD_ALIGNED(uint8_t temp[64 * 3]); \ memset(temp, 0, 64 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(dst_ptr, src_ptr, src_stride_ptr, n, source_y_fraction); \ } \ memcpy(temp, src_ptr + n * SBPP, r * SBPP); \ memcpy(temp + 64, src_ptr + src_stride_ptr + n * SBPP, r * SBPP); \ ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction); \ memcpy(dst_ptr + n * BPP, temp + 128, r * BPP); \ } #ifdef HAS_INTERPOLATEROW_AVX2 ANY11T(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, 1, 1, 31) #endif #ifdef HAS_INTERPOLATEROW_SSSE3 ANY11T(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_NEON ANY11T(InterpolateRow_Any_NEON, InterpolateRow_NEON, 1, 1, 15) #endif #ifdef HAS_INTERPOLATEROW_MSA ANY11T(InterpolateRow_Any_MSA, InterpolateRow_MSA, 1, 1, 31) #endif #undef ANY11T // Any 1 to 1 mirror. #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_ptr, int width) { \ SIMD_ALIGNED(uint8_t temp[64 * 2]); \ memset(temp, 0, 64); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr + r * BPP, dst_ptr, n); \ } \ memcpy(temp, src_ptr, r* BPP); \ ANY_SIMD(temp, temp + 64, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp + 64 + (MASK + 1 - r) * BPP, r * BPP); \ } #ifdef HAS_MIRRORROW_AVX2 ANY11M(MirrorRow_Any_AVX2, MirrorRow_AVX2, 1, 31) #endif #ifdef HAS_MIRRORROW_SSSE3 ANY11M(MirrorRow_Any_SSSE3, MirrorRow_SSSE3, 1, 15) #endif #ifdef HAS_MIRRORROW_NEON ANY11M(MirrorRow_Any_NEON, MirrorRow_NEON, 1, 15) #endif #ifdef HAS_MIRRORROW_MSA ANY11M(MirrorRow_Any_MSA, MirrorRow_MSA, 1, 63) #endif #ifdef HAS_ARGBMIRRORROW_AVX2 ANY11M(ARGBMirrorRow_Any_AVX2, ARGBMirrorRow_AVX2, 4, 7) #endif #ifdef HAS_ARGBMIRRORROW_SSE2 ANY11M(ARGBMirrorRow_Any_SSE2, ARGBMirrorRow_SSE2, 4, 3) #endif #ifdef HAS_ARGBMIRRORROW_NEON ANY11M(ARGBMirrorRow_Any_NEON, ARGBMirrorRow_NEON, 4, 3) #endif #ifdef HAS_ARGBMIRRORROW_MSA ANY11M(ARGBMirrorRow_Any_MSA, ARGBMirrorRow_MSA, 4, 15) #endif #undef ANY11M // Any 1 plane. (memset) #define ANY1(NAMEANY, ANY_SIMD, T, BPP, MASK) \ void NAMEANY(uint8_t* dst_ptr, T v32, int width) { \ SIMD_ALIGNED(uint8_t temp[64]); \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(dst_ptr, v32, n); \ } \ ANY_SIMD(temp, v32, MASK + 1); \ memcpy(dst_ptr + n * BPP, temp, r * BPP); \ } #ifdef HAS_SETROW_X86 ANY1(SetRow_Any_X86, SetRow_X86, uint8_t, 1, 3) #endif #ifdef HAS_SETROW_NEON ANY1(SetRow_Any_NEON, SetRow_NEON, uint8_t, 1, 15) #endif #ifdef HAS_ARGBSETROW_NEON ANY1(ARGBSetRow_Any_NEON, ARGBSetRow_NEON, uint32_t, 4, 3) #endif #ifdef HAS_ARGBSETROW_MSA ANY1(ARGBSetRow_Any_MSA, ARGBSetRow_MSA, uint32_t, 4, 3) #endif #undef ANY1 // Any 1 to 2. Outputs UV planes. #define ANY12(NAMEANY, ANY_SIMD, UVSHIFT, BPP, DUVSHIFT, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_u, uint8_t* dst_v, \ int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 3]); \ memset(temp, 0, 128); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_u, dst_v, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ ANY_SIMD(temp, temp + 128, temp + 256, MASK + 1); \ memcpy(dst_u + (n >> DUVSHIFT), temp + 128, SS(r, DUVSHIFT)); \ memcpy(dst_v + (n >> DUVSHIFT), temp + 256, SS(r, DUVSHIFT)); \ } #ifdef HAS_SPLITUVROW_SSE2 ANY12(SplitUVRow_Any_SSE2, SplitUVRow_SSE2, 0, 2, 0, 15) #endif #ifdef HAS_SPLITUVROW_AVX2 ANY12(SplitUVRow_Any_AVX2, SplitUVRow_AVX2, 0, 2, 0, 31) #endif #ifdef HAS_SPLITUVROW_NEON ANY12(SplitUVRow_Any_NEON, SplitUVRow_NEON, 0, 2, 0, 15) #endif #ifdef HAS_SPLITUVROW_MSA ANY12(SplitUVRow_Any_MSA, SplitUVRow_MSA, 0, 2, 0, 31) #endif #ifdef HAS_ARGBTOUV444ROW_SSSE3 ANY12(ARGBToUV444Row_Any_SSSE3, ARGBToUV444Row_SSSE3, 0, 4, 0, 15) #endif #ifdef HAS_YUY2TOUV422ROW_AVX2 ANY12(YUY2ToUV422Row_Any_AVX2, YUY2ToUV422Row_AVX2, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_AVX2, UYVYToUV422Row_AVX2, 1, 4, 1, 31) #endif #ifdef HAS_YUY2TOUV422ROW_SSE2 ANY12(YUY2ToUV422Row_Any_SSE2, YUY2ToUV422Row_SSE2, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_SSE2, UYVYToUV422Row_SSE2, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_NEON ANY12(ARGBToUV444Row_Any_NEON, ARGBToUV444Row_NEON, 0, 4, 0, 7) ANY12(YUY2ToUV422Row_Any_NEON, YUY2ToUV422Row_NEON, 1, 4, 1, 15) ANY12(UYVYToUV422Row_Any_NEON, UYVYToUV422Row_NEON, 1, 4, 1, 15) #endif #ifdef HAS_YUY2TOUV422ROW_MSA ANY12(ARGBToUV444Row_Any_MSA, ARGBToUV444Row_MSA, 0, 4, 0, 15) ANY12(YUY2ToUV422Row_Any_MSA, YUY2ToUV422Row_MSA, 1, 4, 1, 31) ANY12(UYVYToUV422Row_Any_MSA, UYVYToUV422Row_MSA, 1, 4, 1, 31) #endif #undef ANY12 // Any 1 to 3. Outputs RGB planes. #define ANY13(NAMEANY, ANY_SIMD, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint8_t* dst_r, uint8_t* dst_g, \ uint8_t* dst_b, int width) { \ SIMD_ALIGNED(uint8_t temp[16 * 6]); \ memset(temp, 0, 16 * 3); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, dst_r, dst_g, dst_b, n); \ } \ memcpy(temp, src_ptr + n * BPP, r * BPP); \ ANY_SIMD(temp, temp + 16 * 3, temp + 16 * 4, temp + 16 * 5, MASK + 1); \ memcpy(dst_r + n, temp + 16 * 3, r); \ memcpy(dst_g + n, temp + 16 * 4, r); \ memcpy(dst_b + n, temp + 16 * 5, r); \ } #ifdef HAS_SPLITRGBROW_SSSE3 ANY13(SplitRGBRow_Any_SSSE3, SplitRGBRow_SSSE3, 3, 15) #endif #ifdef HAS_SPLITRGBROW_NEON ANY13(SplitRGBRow_Any_NEON, SplitRGBRow_NEON, 3, 15) #endif // Any 1 to 2 with source stride (2 rows of source). Outputs UV planes. // 128 byte row allows for 32 avx ARGB pixels. #define ANY12S(NAMEANY, ANY_SIMD, UVSHIFT, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, int src_stride_ptr, uint8_t* dst_u, \ uint8_t* dst_v, int width) { \ SIMD_ALIGNED(uint8_t temp[128 * 4]); \ memset(temp, 0, 128 * 2); /* for msan */ \ int r = width & MASK; \ int n = width & ~MASK; \ if (n > 0) { \ ANY_SIMD(src_ptr, src_stride_ptr, dst_u, dst_v, n); \ } \ memcpy(temp, src_ptr + (n >> UVSHIFT) * BPP, SS(r, UVSHIFT) * BPP); \ memcpy(temp + 128, src_ptr + src_stride_ptr + (n >> UVSHIFT) * BPP, \ SS(r, UVSHIFT) * BPP); \ if ((width & 1) && UVSHIFT == 0) { /* repeat last pixel for subsample */ \ memcpy(temp + SS(r, UVSHIFT) * BPP, temp + SS(r, UVSHIFT) * BPP - BPP, \ BPP); \ memcpy(temp + 128 + SS(r, UVSHIFT) * BPP, \ temp + 128 + SS(r, UVSHIFT) * BPP - BPP, BPP); \ } \ ANY_SIMD(temp, 128, temp + 256, temp + 384, MASK + 1); \ memcpy(dst_u + (n >> 1), temp + 256, SS(r, 1)); \ memcpy(dst_v + (n >> 1), temp + 384, SS(r, 1)); \ } #ifdef HAS_ARGBTOUVROW_AVX2 ANY12S(ARGBToUVRow_Any_AVX2, ARGBToUVRow_AVX2, 0, 4, 31) #endif #ifdef HAS_ARGBTOUVJROW_AVX2 ANY12S(ARGBToUVJRow_Any_AVX2, ARGBToUVJRow_AVX2, 0, 4, 31) #endif #ifdef HAS_ARGBTOUVROW_SSSE3 ANY12S(ARGBToUVRow_Any_SSSE3, ARGBToUVRow_SSSE3, 0, 4, 15) ANY12S(ARGBToUVJRow_Any_SSSE3, ARGBToUVJRow_SSSE3, 0, 4, 15) ANY12S(BGRAToUVRow_Any_SSSE3, BGRAToUVRow_SSSE3, 0, 4, 15) ANY12S(ABGRToUVRow_Any_SSSE3, ABGRToUVRow_SSSE3, 0, 4, 15) ANY12S(RGBAToUVRow_Any_SSSE3, RGBAToUVRow_SSSE3, 0, 4, 15) #endif #ifdef HAS_YUY2TOUVROW_AVX2 ANY12S(YUY2ToUVRow_Any_AVX2, YUY2ToUVRow_AVX2, 1, 4, 31) ANY12S(UYVYToUVRow_Any_AVX2, UYVYToUVRow_AVX2, 1, 4, 31) #endif #ifdef HAS_YUY2TOUVROW_SSE2 ANY12S(YUY2ToUVRow_Any_SSE2, YUY2ToUVRow_SSE2, 1, 4, 15) ANY12S(UYVYToUVRow_Any_SSE2, UYVYToUVRow_SSE2, 1, 4, 15) #endif #ifdef HAS_ARGBTOUVROW_NEON ANY12S(ARGBToUVRow_Any_NEON, ARGBToUVRow_NEON, 0, 4, 15) #endif #ifdef HAS_ARGBTOUVROW_MSA ANY12S(ARGBToUVRow_Any_MSA, ARGBToUVRow_MSA, 0, 4, 31) #endif #ifdef HAS_ARGBTOUVJROW_NEON ANY12S(ARGBToUVJRow_Any_NEON, ARGBToUVJRow_NEON, 0, 4, 15) #endif #ifdef HAS_ARGBTOUVJROW_MSA ANY12S(ARGBToUVJRow_Any_MSA, ARGBToUVJRow_MSA, 0, 4, 31) #endif #ifdef HAS_BGRATOUVROW_NEON ANY12S(BGRAToUVRow_Any_NEON, BGRAToUVRow_NEON, 0, 4, 15) #endif #ifdef HAS_BGRATOUVROW_MSA ANY12S(BGRAToUVRow_Any_MSA, BGRAToUVRow_MSA, 0, 4, 31) #endif #ifdef HAS_ABGRTOUVROW_NEON ANY12S(ABGRToUVRow_Any_NEON, ABGRToUVRow_NEON, 0, 4, 15) #endif #ifdef HAS_ABGRTOUVROW_MSA ANY12S(ABGRToUVRow_Any_MSA, ABGRToUVRow_MSA, 0, 4, 31) #endif #ifdef HAS_RGBATOUVROW_NEON ANY12S(RGBAToUVRow_Any_NEON, RGBAToUVRow_NEON, 0, 4, 15) #endif #ifdef HAS_RGBATOUVROW_MSA ANY12S(RGBAToUVRow_Any_MSA, RGBAToUVRow_MSA, 0, 4, 31) #endif #ifdef HAS_RGB24TOUVROW_NEON ANY12S(RGB24ToUVRow_Any_NEON, RGB24ToUVRow_NEON, 0, 3, 15) #endif #ifdef HAS_RGB24TOUVROW_MSA ANY12S(RGB24ToUVRow_Any_MSA, RGB24ToUVRow_MSA, 0, 3, 15) #endif #ifdef HAS_RAWTOUVROW_NEON ANY12S(RAWToUVRow_Any_NEON, RAWToUVRow_NEON, 0, 3, 15) #endif #ifdef HAS_RAWTOUVROW_MSA ANY12S(RAWToUVRow_Any_MSA, RAWToUVRow_MSA, 0, 3, 15) #endif #ifdef HAS_RGB565TOUVROW_NEON ANY12S(RGB565ToUVRow_Any_NEON, RGB565ToUVRow_NEON, 0, 2, 15) #endif #ifdef HAS_RGB565TOUVROW_MSA ANY12S(RGB565ToUVRow_Any_MSA, RGB565ToUVRow_MSA, 0, 2, 15) #endif #ifdef HAS_ARGB1555TOUVROW_NEON ANY12S(ARGB1555ToUVRow_Any_NEON, ARGB1555ToUVRow_NEON, 0, 2, 15) #endif #ifdef HAS_ARGB1555TOUVROW_MSA ANY12S(ARGB1555ToUVRow_Any_MSA, ARGB1555ToUVRow_MSA, 0, 2, 15) #endif #ifdef HAS_ARGB4444TOUVROW_NEON ANY12S(ARGB4444ToUVRow_Any_NEON, ARGB4444ToUVRow_NEON, 0, 2, 15) #endif #ifdef HAS_YUY2TOUVROW_NEON ANY12S(YUY2ToUVRow_Any_NEON, YUY2ToUVRow_NEON, 1, 4, 15) #endif #ifdef HAS_UYVYTOUVROW_NEON ANY12S(UYVYToUVRow_Any_NEON, UYVYToUVRow_NEON, 1, 4, 15) #endif #ifdef HAS_YUY2TOUVROW_MSA ANY12S(YUY2ToUVRow_Any_MSA, YUY2ToUVRow_MSA, 1, 4, 31) #endif #ifdef HAS_UYVYTOUVROW_MSA ANY12S(UYVYToUVRow_Any_MSA, UYVYToUVRow_MSA, 1, 4, 31) #endif #undef ANY12S #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_common.cc000066400000000000000000003170061357355204000226720ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include #include // For memcpy and memset. #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // llvm x86 is poor at ternary operator, so use branchless min/max. #define USE_BRANCHLESS 1 #if USE_BRANCHLESS static __inline int32_t clamp0(int32_t v) { return ((-(v) >> 31) & (v)); } static __inline int32_t clamp255(int32_t v) { return (((255 - (v)) >> 31) | (v)) & 255; } static __inline int32_t clamp1023(int32_t v) { return (((1023 - (v)) >> 31) | (v)) & 1023; } static __inline uint32_t Abs(int32_t v) { int m = v >> 31; return (v + m) ^ m; } #else // USE_BRANCHLESS static __inline int32_t clamp0(int32_t v) { return (v < 0) ? 0 : v; } static __inline int32_t clamp255(int32_t v) { return (v > 255) ? 255 : v; } static __inline int32_t clamp1023(int32_t v) { return (v > 1023) ? 1023 : v; } static __inline uint32_t Abs(int32_t v) { return (v < 0) ? -v : v; } #endif // USE_BRANCHLESS static __inline uint32_t Clamp(int32_t val) { int v = clamp0(val); return (uint32_t)(clamp255(v)); } static __inline uint32_t Clamp10(int32_t val) { int v = clamp0(val); return (uint32_t)(clamp1023(v)); } // Little Endian #if defined(__x86_64__) || defined(_M_X64) || defined(__i386__) || \ defined(_M_IX86) || defined(__arm__) || defined(_M_ARM) || \ (defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) #define WRITEWORD(p, v) *(uint32_t*)(p) = v #else static inline void WRITEWORD(uint8_t* p, uint32_t v) { p[0] = (uint8_t)(v & 255); p[1] = (uint8_t)((v >> 8) & 255); p[2] = (uint8_t)((v >> 16) & 255); p[3] = (uint8_t)((v >> 24) & 255); } #endif void RGB24ToARGBRow_C(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb24[0]; uint8_t g = src_rgb24[1]; uint8_t r = src_rgb24[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; dst_argb[3] = 255u; dst_argb += 4; src_rgb24 += 3; } } void RAWToARGBRow_C(const uint8_t* src_raw, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t r = src_raw[0]; uint8_t g = src_raw[1]; uint8_t b = src_raw[2]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; dst_argb[3] = 255u; dst_argb += 4; src_raw += 3; } } void RAWToRGB24Row_C(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; for (x = 0; x < width; ++x) { uint8_t r = src_raw[0]; uint8_t g = src_raw[1]; uint8_t b = src_raw[2]; dst_rgb24[0] = b; dst_rgb24[1] = g; dst_rgb24[2] = r; dst_rgb24 += 3; src_raw += 3; } } void RGB565ToARGBRow_C(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb565[0] & 0x1f; uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); uint8_t r = src_rgb565[1] >> 3; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 2) | (g >> 4); dst_argb[2] = (r << 3) | (r >> 2); dst_argb[3] = 255u; dst_argb += 4; src_rgb565 += 2; } } void ARGB1555ToARGBRow_C(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb1555[0] & 0x1f; uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); uint8_t r = (src_argb1555[1] & 0x7c) >> 2; uint8_t a = src_argb1555[1] >> 7; dst_argb[0] = (b << 3) | (b >> 2); dst_argb[1] = (g << 3) | (g >> 2); dst_argb[2] = (r << 3) | (r >> 2); dst_argb[3] = -a; dst_argb += 4; src_argb1555 += 2; } } void ARGB4444ToARGBRow_C(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb4444[0] & 0x0f; uint8_t g = src_argb4444[0] >> 4; uint8_t r = src_argb4444[1] & 0x0f; uint8_t a = src_argb4444[1] >> 4; dst_argb[0] = (b << 4) | b; dst_argb[1] = (g << 4) | g; dst_argb[2] = (r << 4) | r; dst_argb[3] = (a << 4) | a; dst_argb += 4; src_argb4444 += 2; } } void AR30ToARGBRow_C(const uint8_t* src_ar30, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint32_t ar30 = *(const uint32_t*)src_ar30; uint32_t b = (ar30 >> 2) & 0xff; uint32_t g = (ar30 >> 12) & 0xff; uint32_t r = (ar30 >> 22) & 0xff; uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits. *(uint32_t*)(dst_argb) = b | (g << 8) | (r << 16) | (a << 24); dst_argb += 4; src_ar30 += 4; } } void AR30ToABGRRow_C(const uint8_t* src_ar30, uint8_t* dst_abgr, int width) { int x; for (x = 0; x < width; ++x) { uint32_t ar30 = *(const uint32_t*)src_ar30; uint32_t b = (ar30 >> 2) & 0xff; uint32_t g = (ar30 >> 12) & 0xff; uint32_t r = (ar30 >> 22) & 0xff; uint32_t a = (ar30 >> 30) * 0x55; // Replicate 2 bits to 8 bits. *(uint32_t*)(dst_abgr) = r | (g << 8) | (b << 16) | (a << 24); dst_abgr += 4; src_ar30 += 4; } } void AR30ToAB30Row_C(const uint8_t* src_ar30, uint8_t* dst_ab30, int width) { int x; for (x = 0; x < width; ++x) { uint32_t ar30 = *(const uint32_t*)src_ar30; uint32_t b = ar30 & 0x3ff; uint32_t ga = ar30 & 0xc00ffc00; uint32_t r = (ar30 >> 20) & 0x3ff; *(uint32_t*)(dst_ab30) = r | ga | (b << 20); dst_ab30 += 4; src_ar30 += 4; } } void ARGBToRGB24Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb[0]; uint8_t g = src_argb[1]; uint8_t r = src_argb[2]; dst_rgb[0] = b; dst_rgb[1] = g; dst_rgb[2] = r; dst_rgb += 3; src_argb += 4; } } void ARGBToRAWRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb[0]; uint8_t g = src_argb[1]; uint8_t r = src_argb[2]; dst_rgb[0] = r; dst_rgb[1] = g; dst_rgb[2] = b; dst_rgb += 3; src_argb += 4; } } void ARGBToRGB565Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 2; uint8_t r0 = src_argb[2] >> 3; uint8_t b1 = src_argb[4] >> 3; uint8_t g1 = src_argb[5] >> 2; uint8_t r1 = src_argb[6] >> 3; WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27)); dst_rgb += 4; src_argb += 8; } if (width & 1) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 2; uint8_t r0 = src_argb[2] >> 3; *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } // dither4 is a row of 4 values from 4x4 dither matrix. // The 4x4 matrix contains values to increase RGB. When converting to // fewer bits (565) this provides an ordered dither. // The order in the 4x4 matrix in first byte is upper left. // The 4 values are passed as an int, then referenced as an array, so // endian will not affect order of the original matrix. But the dither4 // will containing the first pixel in the lower byte for little endian // or the upper byte for big endian. void ARGBToRGB565DitherRow_C(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) { int x; for (x = 0; x < width - 1; x += 2) { int dither0 = ((const unsigned char*)(&dither4))[x & 3]; int dither1 = ((const unsigned char*)(&dither4))[(x + 1) & 3]; uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; uint8_t b1 = clamp255(src_argb[4] + dither1) >> 3; uint8_t g1 = clamp255(src_argb[5] + dither1) >> 2; uint8_t r1 = clamp255(src_argb[6] + dither1) >> 3; WRITEWORD(dst_rgb, b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27)); dst_rgb += 4; src_argb += 8; } if (width & 1) { int dither0 = ((const unsigned char*)(&dither4))[(width - 1) & 3]; uint8_t b0 = clamp255(src_argb[0] + dither0) >> 3; uint8_t g0 = clamp255(src_argb[1] + dither0) >> 2; uint8_t r0 = clamp255(src_argb[2] + dither0) >> 3; *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 11); } } void ARGBToARGB1555Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 3; uint8_t r0 = src_argb[2] >> 3; uint8_t a0 = src_argb[3] >> 7; uint8_t b1 = src_argb[4] >> 3; uint8_t g1 = src_argb[5] >> 3; uint8_t r1 = src_argb[6] >> 3; uint8_t a1 = src_argb[7] >> 7; *(uint32_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15) | (b1 << 16) | (g1 << 21) | (r1 << 26) | (a1 << 31); dst_rgb += 4; src_argb += 8; } if (width & 1) { uint8_t b0 = src_argb[0] >> 3; uint8_t g0 = src_argb[1] >> 3; uint8_t r0 = src_argb[2] >> 3; uint8_t a0 = src_argb[3] >> 7; *(uint16_t*)(dst_rgb) = b0 | (g0 << 5) | (r0 << 10) | (a0 << 15); } } void ARGBToARGB4444Row_C(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_argb[0] >> 4; uint8_t g0 = src_argb[1] >> 4; uint8_t r0 = src_argb[2] >> 4; uint8_t a0 = src_argb[3] >> 4; uint8_t b1 = src_argb[4] >> 4; uint8_t g1 = src_argb[5] >> 4; uint8_t r1 = src_argb[6] >> 4; uint8_t a1 = src_argb[7] >> 4; *(uint32_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12) | (b1 << 16) | (g1 << 20) | (r1 << 24) | (a1 << 28); dst_rgb += 4; src_argb += 8; } if (width & 1) { uint8_t b0 = src_argb[0] >> 4; uint8_t g0 = src_argb[1] >> 4; uint8_t r0 = src_argb[2] >> 4; uint8_t a0 = src_argb[3] >> 4; *(uint16_t*)(dst_rgb) = b0 | (g0 << 4) | (r0 << 8) | (a0 << 12); } } void ABGRToAR30Row_C(const uint8_t* src_abgr, uint8_t* dst_ar30, int width) { int x; for (x = 0; x < width; ++x) { uint32_t b0 = (src_abgr[0] >> 6) | ((uint32_t)(src_abgr[0]) << 2); uint32_t g0 = (src_abgr[1] >> 6) | ((uint32_t)(src_abgr[1]) << 2); uint32_t r0 = (src_abgr[2] >> 6) | ((uint32_t)(src_abgr[2]) << 2); uint32_t a0 = (src_abgr[3] >> 6); *(uint32_t*)(dst_ar30) = r0 | (g0 << 10) | (b0 << 20) | (a0 << 30); dst_ar30 += 4; src_abgr += 4; } } void ARGBToAR30Row_C(const uint8_t* src_argb, uint8_t* dst_ar30, int width) { int x; for (x = 0; x < width; ++x) { uint32_t b0 = (src_argb[0] >> 6) | ((uint32_t)(src_argb[0]) << 2); uint32_t g0 = (src_argb[1] >> 6) | ((uint32_t)(src_argb[1]) << 2); uint32_t r0 = (src_argb[2] >> 6) | ((uint32_t)(src_argb[2]) << 2); uint32_t a0 = (src_argb[3] >> 6); *(uint32_t*)(dst_ar30) = b0 | (g0 << 10) | (r0 << 20) | (a0 << 30); dst_ar30 += 4; src_argb += 4; } } static __inline int RGBToY(uint8_t r, uint8_t g, uint8_t b) { return (66 * r + 129 * g + 25 * b + 0x1080) >> 8; } static __inline int RGBToU(uint8_t r, uint8_t g, uint8_t b) { return (112 * b - 74 * g - 38 * r + 0x8080) >> 8; } static __inline int RGBToV(uint8_t r, uint8_t g, uint8_t b) { return (112 * r - 94 * g - 18 * b + 0x8080) >> 8; } // ARGBToY_C and ARGBToUV_C #define MAKEROWY(NAME, R, G, B, BPP) \ void NAME##ToYRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ int x; \ for (x = 0; x < width; ++x) { \ dst_y[0] = RGBToY(src_argb0[R], src_argb0[G], src_argb0[B]); \ src_argb0 += BPP; \ dst_y += 1; \ } \ } \ void NAME##ToUVRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ uint8_t* dst_u, uint8_t* dst_v, int width) { \ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ int x; \ for (x = 0; x < width - 1; x += 2) { \ uint8_t ab = (src_rgb0[B] + src_rgb0[B + BPP] + src_rgb1[B] + \ src_rgb1[B + BPP]) >> \ 2; \ uint8_t ag = (src_rgb0[G] + src_rgb0[G + BPP] + src_rgb1[G] + \ src_rgb1[G + BPP]) >> \ 2; \ uint8_t ar = (src_rgb0[R] + src_rgb0[R + BPP] + src_rgb1[R] + \ src_rgb1[R + BPP]) >> \ 2; \ dst_u[0] = RGBToU(ar, ag, ab); \ dst_v[0] = RGBToV(ar, ag, ab); \ src_rgb0 += BPP * 2; \ src_rgb1 += BPP * 2; \ dst_u += 1; \ dst_v += 1; \ } \ if (width & 1) { \ uint8_t ab = (src_rgb0[B] + src_rgb1[B]) >> 1; \ uint8_t ag = (src_rgb0[G] + src_rgb1[G]) >> 1; \ uint8_t ar = (src_rgb0[R] + src_rgb1[R]) >> 1; \ dst_u[0] = RGBToU(ar, ag, ab); \ dst_v[0] = RGBToV(ar, ag, ab); \ } \ } MAKEROWY(ARGB, 2, 1, 0, 4) MAKEROWY(BGRA, 1, 2, 3, 4) MAKEROWY(ABGR, 0, 1, 2, 4) MAKEROWY(RGBA, 3, 2, 1, 4) MAKEROWY(RGB24, 2, 1, 0, 3) MAKEROWY(RAW, 0, 1, 2, 3) #undef MAKEROWY // JPeg uses a variation on BT.601-1 full range // y = 0.29900 * r + 0.58700 * g + 0.11400 * b // u = -0.16874 * r - 0.33126 * g + 0.50000 * b + center // v = 0.50000 * r - 0.41869 * g - 0.08131 * b + center // BT.601 Mpeg range uses: // b 0.1016 * 255 = 25.908 = 25 // g 0.5078 * 255 = 129.489 = 129 // r 0.2578 * 255 = 65.739 = 66 // JPeg 8 bit Y (not used): // b 0.11400 * 256 = 29.184 = 29 // g 0.58700 * 256 = 150.272 = 150 // r 0.29900 * 256 = 76.544 = 77 // JPeg 7 bit Y: // b 0.11400 * 128 = 14.592 = 15 // g 0.58700 * 128 = 75.136 = 75 // r 0.29900 * 128 = 38.272 = 38 // JPeg 8 bit U: // b 0.50000 * 255 = 127.5 = 127 // g -0.33126 * 255 = -84.4713 = -84 // r -0.16874 * 255 = -43.0287 = -43 // JPeg 8 bit V: // b -0.08131 * 255 = -20.73405 = -20 // g -0.41869 * 255 = -106.76595 = -107 // r 0.50000 * 255 = 127.5 = 127 static __inline int RGBToYJ(uint8_t r, uint8_t g, uint8_t b) { return (38 * r + 75 * g + 15 * b + 64) >> 7; } static __inline int RGBToUJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * b - 84 * g - 43 * r + 0x8080) >> 8; } static __inline int RGBToVJ(uint8_t r, uint8_t g, uint8_t b) { return (127 * r - 107 * g - 20 * b + 0x8080) >> 8; } #define AVGB(a, b) (((a) + (b) + 1) >> 1) // ARGBToYJ_C and ARGBToUVJ_C #define MAKEROWYJ(NAME, R, G, B, BPP) \ void NAME##ToYJRow_C(const uint8_t* src_argb0, uint8_t* dst_y, int width) { \ int x; \ for (x = 0; x < width; ++x) { \ dst_y[0] = RGBToYJ(src_argb0[R], src_argb0[G], src_argb0[B]); \ src_argb0 += BPP; \ dst_y += 1; \ } \ } \ void NAME##ToUVJRow_C(const uint8_t* src_rgb0, int src_stride_rgb, \ uint8_t* dst_u, uint8_t* dst_v, int width) { \ const uint8_t* src_rgb1 = src_rgb0 + src_stride_rgb; \ int x; \ for (x = 0; x < width - 1; x += 2) { \ uint8_t ab = AVGB(AVGB(src_rgb0[B], src_rgb1[B]), \ AVGB(src_rgb0[B + BPP], src_rgb1[B + BPP])); \ uint8_t ag = AVGB(AVGB(src_rgb0[G], src_rgb1[G]), \ AVGB(src_rgb0[G + BPP], src_rgb1[G + BPP])); \ uint8_t ar = AVGB(AVGB(src_rgb0[R], src_rgb1[R]), \ AVGB(src_rgb0[R + BPP], src_rgb1[R + BPP])); \ dst_u[0] = RGBToUJ(ar, ag, ab); \ dst_v[0] = RGBToVJ(ar, ag, ab); \ src_rgb0 += BPP * 2; \ src_rgb1 += BPP * 2; \ dst_u += 1; \ dst_v += 1; \ } \ if (width & 1) { \ uint8_t ab = AVGB(src_rgb0[B], src_rgb1[B]); \ uint8_t ag = AVGB(src_rgb0[G], src_rgb1[G]); \ uint8_t ar = AVGB(src_rgb0[R], src_rgb1[R]); \ dst_u[0] = RGBToUJ(ar, ag, ab); \ dst_v[0] = RGBToVJ(ar, ag, ab); \ } \ } MAKEROWYJ(ARGB, 2, 1, 0, 4) #undef MAKEROWYJ void RGB565ToYRow_C(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_rgb565[0] & 0x1f; uint8_t g = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); uint8_t r = src_rgb565[1] >> 3; b = (b << 3) | (b >> 2); g = (g << 2) | (g >> 4); r = (r << 3) | (r >> 2); dst_y[0] = RGBToY(r, g, b); src_rgb565 += 2; dst_y += 1; } } void ARGB1555ToYRow_C(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb1555[0] & 0x1f; uint8_t g = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); uint8_t r = (src_argb1555[1] & 0x7c) >> 2; b = (b << 3) | (b >> 2); g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); dst_y[0] = RGBToY(r, g, b); src_argb1555 += 2; dst_y += 1; } } void ARGB4444ToYRow_C(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { int x; for (x = 0; x < width; ++x) { uint8_t b = src_argb4444[0] & 0x0f; uint8_t g = src_argb4444[0] >> 4; uint8_t r = src_argb4444[1] & 0x0f; b = (b << 4) | b; g = (g << 4) | g; r = (r << 4) | r; dst_y[0] = RGBToY(r, g, b); src_argb4444 += 2; dst_y += 1; } } void RGB565ToUVRow_C(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* next_rgb565 = src_rgb565 + src_stride_rgb565; int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_rgb565[0] & 0x1f; uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); uint8_t r0 = src_rgb565[1] >> 3; uint8_t b1 = src_rgb565[2] & 0x1f; uint8_t g1 = (src_rgb565[2] >> 5) | ((src_rgb565[3] & 0x07) << 3); uint8_t r1 = src_rgb565[3] >> 3; uint8_t b2 = next_rgb565[0] & 0x1f; uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); uint8_t r2 = next_rgb565[1] >> 3; uint8_t b3 = next_rgb565[2] & 0x1f; uint8_t g3 = (next_rgb565[2] >> 5) | ((next_rgb565[3] & 0x07) << 3); uint8_t r3 = next_rgb565[3] >> 3; uint8_t b = (b0 + b1 + b2 + b3); // 565 * 4 = 787. uint8_t g = (g0 + g1 + g2 + g3); uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 787 -> 888. r = (r << 1) | (r >> 6); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); src_rgb565 += 4; next_rgb565 += 4; dst_u += 1; dst_v += 1; } if (width & 1) { uint8_t b0 = src_rgb565[0] & 0x1f; uint8_t g0 = (src_rgb565[0] >> 5) | ((src_rgb565[1] & 0x07) << 3); uint8_t r0 = src_rgb565[1] >> 3; uint8_t b2 = next_rgb565[0] & 0x1f; uint8_t g2 = (next_rgb565[0] >> 5) | ((next_rgb565[1] & 0x07) << 3); uint8_t r2 = next_rgb565[1] >> 3; uint8_t b = (b0 + b2); // 565 * 2 = 676. uint8_t g = (g0 + g2); uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 676 -> 888 g = (g << 1) | (g >> 6); r = (r << 2) | (r >> 4); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); } } void ARGB1555ToUVRow_C(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* next_argb1555 = src_argb1555 + src_stride_argb1555; int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_argb1555[0] & 0x1f; uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; uint8_t b1 = src_argb1555[2] & 0x1f; uint8_t g1 = (src_argb1555[2] >> 5) | ((src_argb1555[3] & 0x03) << 3); uint8_t r1 = (src_argb1555[3] & 0x7c) >> 2; uint8_t b2 = next_argb1555[0] & 0x1f; uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); uint8_t r2 = (next_argb1555[1] & 0x7c) >> 2; uint8_t b3 = next_argb1555[2] & 0x1f; uint8_t g3 = (next_argb1555[2] >> 5) | ((next_argb1555[3] & 0x03) << 3); uint8_t r3 = (next_argb1555[3] & 0x7c) >> 2; uint8_t b = (b0 + b1 + b2 + b3); // 555 * 4 = 777. uint8_t g = (g0 + g1 + g2 + g3); uint8_t r = (r0 + r1 + r2 + r3); b = (b << 1) | (b >> 6); // 777 -> 888. g = (g << 1) | (g >> 6); r = (r << 1) | (r >> 6); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); src_argb1555 += 4; next_argb1555 += 4; dst_u += 1; dst_v += 1; } if (width & 1) { uint8_t b0 = src_argb1555[0] & 0x1f; uint8_t g0 = (src_argb1555[0] >> 5) | ((src_argb1555[1] & 0x03) << 3); uint8_t r0 = (src_argb1555[1] & 0x7c) >> 2; uint8_t b2 = next_argb1555[0] & 0x1f; uint8_t g2 = (next_argb1555[0] >> 5) | ((next_argb1555[1] & 0x03) << 3); uint8_t r2 = next_argb1555[1] >> 3; uint8_t b = (b0 + b2); // 555 * 2 = 666. uint8_t g = (g0 + g2); uint8_t r = (r0 + r2); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); } } void ARGB4444ToUVRow_C(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* next_argb4444 = src_argb4444 + src_stride_argb4444; int x; for (x = 0; x < width - 1; x += 2) { uint8_t b0 = src_argb4444[0] & 0x0f; uint8_t g0 = src_argb4444[0] >> 4; uint8_t r0 = src_argb4444[1] & 0x0f; uint8_t b1 = src_argb4444[2] & 0x0f; uint8_t g1 = src_argb4444[2] >> 4; uint8_t r1 = src_argb4444[3] & 0x0f; uint8_t b2 = next_argb4444[0] & 0x0f; uint8_t g2 = next_argb4444[0] >> 4; uint8_t r2 = next_argb4444[1] & 0x0f; uint8_t b3 = next_argb4444[2] & 0x0f; uint8_t g3 = next_argb4444[2] >> 4; uint8_t r3 = next_argb4444[3] & 0x0f; uint8_t b = (b0 + b1 + b2 + b3); // 444 * 4 = 666. uint8_t g = (g0 + g1 + g2 + g3); uint8_t r = (r0 + r1 + r2 + r3); b = (b << 2) | (b >> 4); // 666 -> 888. g = (g << 2) | (g >> 4); r = (r << 2) | (r >> 4); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); src_argb4444 += 4; next_argb4444 += 4; dst_u += 1; dst_v += 1; } if (width & 1) { uint8_t b0 = src_argb4444[0] & 0x0f; uint8_t g0 = src_argb4444[0] >> 4; uint8_t r0 = src_argb4444[1] & 0x0f; uint8_t b2 = next_argb4444[0] & 0x0f; uint8_t g2 = next_argb4444[0] >> 4; uint8_t r2 = next_argb4444[1] & 0x0f; uint8_t b = (b0 + b2); // 444 * 2 = 555. uint8_t g = (g0 + g2); uint8_t r = (r0 + r2); b = (b << 3) | (b >> 2); // 555 -> 888. g = (g << 3) | (g >> 2); r = (r << 3) | (r >> 2); dst_u[0] = RGBToU(r, g, b); dst_v[0] = RGBToV(r, g, b); } } void ARGBToUV444Row_C(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; for (x = 0; x < width; ++x) { uint8_t ab = src_argb[0]; uint8_t ag = src_argb[1]; uint8_t ar = src_argb[2]; dst_u[0] = RGBToU(ar, ag, ab); dst_v[0] = RGBToV(ar, ag, ab); src_argb += 4; dst_u += 1; dst_v += 1; } } void ARGBGrayRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { uint8_t y = RGBToYJ(src_argb[2], src_argb[1], src_argb[0]); dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = src_argb[3]; dst_argb += 4; src_argb += 4; } } // Convert a row of image to Sepia tone. void ARGBSepiaRow_C(uint8_t* dst_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; int sb = (b * 17 + g * 68 + r * 35) >> 7; int sg = (b * 22 + g * 88 + r * 45) >> 7; int sr = (b * 24 + g * 98 + r * 50) >> 7; // b does not over flow. a is preserved from original. dst_argb[0] = sb; dst_argb[1] = clamp255(sg); dst_argb[2] = clamp255(sr); dst_argb += 4; } } // Apply color matrix to a row of image. Matrix is signed. // TODO(fbarchard): Consider adding rounding (+32). void ARGBColorMatrixRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = src_argb[0]; int g = src_argb[1]; int r = src_argb[2]; int a = src_argb[3]; int sb = (b * matrix_argb[0] + g * matrix_argb[1] + r * matrix_argb[2] + a * matrix_argb[3]) >> 6; int sg = (b * matrix_argb[4] + g * matrix_argb[5] + r * matrix_argb[6] + a * matrix_argb[7]) >> 6; int sr = (b * matrix_argb[8] + g * matrix_argb[9] + r * matrix_argb[10] + a * matrix_argb[11]) >> 6; int sa = (b * matrix_argb[12] + g * matrix_argb[13] + r * matrix_argb[14] + a * matrix_argb[15]) >> 6; dst_argb[0] = Clamp(sb); dst_argb[1] = Clamp(sg); dst_argb[2] = Clamp(sr); dst_argb[3] = Clamp(sa); src_argb += 4; dst_argb += 4; } } // Apply color table to a row of image. void ARGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; int a = dst_argb[3]; dst_argb[0] = table_argb[b * 4 + 0]; dst_argb[1] = table_argb[g * 4 + 1]; dst_argb[2] = table_argb[r * 4 + 2]; dst_argb[3] = table_argb[a * 4 + 3]; dst_argb += 4; } } // Apply color table to a row of image. void RGBColorTableRow_C(uint8_t* dst_argb, const uint8_t* table_argb, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; dst_argb[0] = table_argb[b * 4 + 0]; dst_argb[1] = table_argb[g * 4 + 1]; dst_argb[2] = table_argb[r * 4 + 2]; dst_argb += 4; } } void ARGBQuantizeRow_C(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { int x; for (x = 0; x < width; ++x) { int b = dst_argb[0]; int g = dst_argb[1]; int r = dst_argb[2]; dst_argb[0] = (b * scale >> 16) * interval_size + interval_offset; dst_argb[1] = (g * scale >> 16) * interval_size + interval_offset; dst_argb[2] = (r * scale >> 16) * interval_size + interval_offset; dst_argb += 4; } } #define REPEAT8(v) (v) | ((v) << 8) #define SHADE(f, v) v* f >> 24 void ARGBShadeRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { const uint32_t b_scale = REPEAT8(value & 0xff); const uint32_t g_scale = REPEAT8((value >> 8) & 0xff); const uint32_t r_scale = REPEAT8((value >> 16) & 0xff); const uint32_t a_scale = REPEAT8(value >> 24); int i; for (i = 0; i < width; ++i) { const uint32_t b = REPEAT8(src_argb[0]); const uint32_t g = REPEAT8(src_argb[1]); const uint32_t r = REPEAT8(src_argb[2]); const uint32_t a = REPEAT8(src_argb[3]); dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); dst_argb[3] = SHADE(a, a_scale); src_argb += 4; dst_argb += 4; } } #undef REPEAT8 #undef SHADE #define REPEAT8(v) (v) | ((v) << 8) #define SHADE(f, v) v* f >> 16 void ARGBMultiplyRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { const uint32_t b = REPEAT8(src_argb0[0]); const uint32_t g = REPEAT8(src_argb0[1]); const uint32_t r = REPEAT8(src_argb0[2]); const uint32_t a = REPEAT8(src_argb0[3]); const uint32_t b_scale = src_argb1[0]; const uint32_t g_scale = src_argb1[1]; const uint32_t r_scale = src_argb1[2]; const uint32_t a_scale = src_argb1[3]; dst_argb[0] = SHADE(b, b_scale); dst_argb[1] = SHADE(g, g_scale); dst_argb[2] = SHADE(r, r_scale); dst_argb[3] = SHADE(a, a_scale); src_argb0 += 4; src_argb1 += 4; dst_argb += 4; } } #undef REPEAT8 #undef SHADE #define SHADE(f, v) clamp255(v + f) void ARGBAddRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { const int b = src_argb0[0]; const int g = src_argb0[1]; const int r = src_argb0[2]; const int a = src_argb0[3]; const int b_add = src_argb1[0]; const int g_add = src_argb1[1]; const int r_add = src_argb1[2]; const int a_add = src_argb1[3]; dst_argb[0] = SHADE(b, b_add); dst_argb[1] = SHADE(g, g_add); dst_argb[2] = SHADE(r, r_add); dst_argb[3] = SHADE(a, a_add); src_argb0 += 4; src_argb1 += 4; dst_argb += 4; } } #undef SHADE #define SHADE(f, v) clamp0(f - v) void ARGBSubtractRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { const int b = src_argb0[0]; const int g = src_argb0[1]; const int r = src_argb0[2]; const int a = src_argb0[3]; const int b_sub = src_argb1[0]; const int g_sub = src_argb1[1]; const int r_sub = src_argb1[2]; const int a_sub = src_argb1[3]; dst_argb[0] = SHADE(b, b_sub); dst_argb[1] = SHADE(g, g_sub); dst_argb[2] = SHADE(r, r_sub); dst_argb[3] = SHADE(a, a_sub); src_argb0 += 4; src_argb1 += 4; dst_argb += 4; } } #undef SHADE // Sobel functions which mimics SSSE3. void SobelXRow_C(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width) { int i; for (i = 0; i < width; ++i) { int a = src_y0[i]; int b = src_y1[i]; int c = src_y2[i]; int a_sub = src_y0[i + 2]; int b_sub = src_y1[i + 2]; int c_sub = src_y2[i + 2]; int a_diff = a - a_sub; int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); dst_sobelx[i] = (uint8_t)(clamp255(sobel)); } } void SobelYRow_C(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) { int i; for (i = 0; i < width; ++i) { int a = src_y0[i + 0]; int b = src_y0[i + 1]; int c = src_y0[i + 2]; int a_sub = src_y1[i + 0]; int b_sub = src_y1[i + 1]; int c_sub = src_y1[i + 2]; int a_diff = a - a_sub; int b_diff = b - b_sub; int c_diff = c - c_sub; int sobel = Abs(a_diff + b_diff * 2 + c_diff); dst_sobely[i] = (uint8_t)(clamp255(sobel)); } } void SobelRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); dst_argb[0] = (uint8_t)(s); dst_argb[1] = (uint8_t)(s); dst_argb[2] = (uint8_t)(s); dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } void SobelToPlaneRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int s = clamp255(r + b); dst_y[i] = (uint8_t)(s); } } void SobelXYRow_C(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { int r = src_sobelx[i]; int b = src_sobely[i]; int g = clamp255(r + b); dst_argb[0] = (uint8_t)(b); dst_argb[1] = (uint8_t)(g); dst_argb[2] = (uint8_t)(r); dst_argb[3] = (uint8_t)(255u); dst_argb += 4; } } void J400ToARGBRow_C(const uint8_t* src_y, uint8_t* dst_argb, int width) { // Copy a Y to RGB. int x; for (x = 0; x < width; ++x) { uint8_t y = src_y[0]; dst_argb[2] = dst_argb[1] = dst_argb[0] = y; dst_argb[3] = 255u; dst_argb += 4; ++src_y; } } // TODO(fbarchard): Unify these structures to be platform independent. // TODO(fbarchard): Generate SIMD structures from float matrix. // BT.601 YUV to RGB reference // R = (Y - 16) * 1.164 - V * -1.596 // G = (Y - 16) * 1.164 - U * 0.391 - V * 0.813 // B = (Y - 16) * 1.164 - U * -2.018 // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // U and V contributions to R,G,B. #define UB -128 /* max(-128, round(-2.018 * 64)) */ #define UG 25 /* round(0.391 * 64) */ #define VG 52 /* round(0.813 * 64) */ #define VR -102 /* round(-1.596 * 64) */ // Bias values to subtract 16 from Y and 128 from U and V. #define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) #if defined(__aarch64__) // 64 bit arm const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) // 32 bit arm const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvI601Constants) = { {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuI601Constants) = { {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB #undef BG #undef BR #undef YGB #undef UB #undef UG #undef VG #undef VR #undef YG // JPEG YUV to RGB reference // * R = Y - V * -1.40200 // * G = Y - U * 0.34414 - V * 0.71414 // * B = Y - U * -1.77200 // Y contribution to R,G,B. Scale and bias. #define YG 16320 /* round(1.000 * 64 * 256 * 256 / 257) */ #define YGB 32 /* 64 / 2 */ // U and V contributions to R,G,B. #define UB -113 /* round(-1.77200 * 64) */ #define UG 22 /* round(0.34414 * 64) */ #define VG 46 /* round(0.71414 * 64) */ #define VR -90 /* round(-1.40200 * 64) */ // Bias values to round, and subtract 128 from U and V. #define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) #if defined(__aarch64__) const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvJPEGConstants) = { {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuJPEGConstants) = { {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB #undef BG #undef BR #undef YGB #undef UB #undef UG #undef VG #undef VR #undef YG // BT.709 YUV to RGB reference // R = (Y - 16) * 1.164 - V * -1.793 // G = (Y - 16) * 1.164 - U * 0.213 - V * 0.533 // B = (Y - 16) * 1.164 - U * -2.112 // See also http://www.equasys.de/colorconversion.html // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // TODO(fbarchard): Find way to express 2.112 instead of 2.0. // U and V contributions to R,G,B. #define UB -128 /* max(-128, round(-2.112 * 64)) */ #define UG 14 /* round(0.213 * 64) */ #define VG 34 /* round(0.533 * 64) */ #define VR -115 /* round(-1.793 * 64) */ // Bias values to round, and subtract 128 from U and V. #define BB (UB * 128 + YGB) #define BG (UG * 128 + VG * 128 + YGB) #define BR (VR * 128 + YGB) #if defined(__aarch64__) const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {-UB, -VR, -UB, -VR, -UB, -VR, -UB, -VR}, {UG, VG, UG, VG, UG, VG, UG, VG}, {UG, VG, UG, VG, UG, VG, UG, VG}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {-VR, -UB, -VR, -UB, -VR, -UB, -VR, -UB}, {VG, UG, VG, UG, VG, UG, VG, UG}, {VG, UG, VG, UG, VG, UG, VG, UG}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #elif defined(__arm__) const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { {-UB, -UB, -UB, -UB, -VR, -VR, -VR, -VR, 0, 0, 0, 0, 0, 0, 0, 0}, {UG, UG, UG, UG, VG, VG, VG, VG, 0, 0, 0, 0, 0, 0, 0, 0}, {BB, BG, BR, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { {-VR, -VR, -VR, -VR, -UB, -UB, -UB, -UB, 0, 0, 0, 0, 0, 0, 0, 0}, {VG, VG, VG, VG, UG, UG, UG, UG, 0, 0, 0, 0, 0, 0, 0, 0}, {BR, BG, BB, 0, 0, 0, 0, 0}, {0x0101 * YG, 0, 0, 0}}; #else const struct YuvConstants SIMD_ALIGNED(kYuvH709Constants) = { {UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0}, {UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG}, {0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; const struct YuvConstants SIMD_ALIGNED(kYvuH709Constants) = { {VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0, VR, 0}, {VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG, VG, UG}, {0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB, 0, UB}, {BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR, BR}, {BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG, BG}, {BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB, BB}, {YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG, YG}}; #endif #undef BB #undef BG #undef BR #undef YGB #undef UB #undef UG #undef VG #undef VR #undef YG // C reference code that mimics the YUV assembly. // Reads 8 bit YUV and leaves result as 16 bit. static __inline void YuvPixel(uint8_t y, uint8_t u, uint8_t v, uint8_t* b, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = -yuvconstants->kUVToRB[1]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #elif defined(__arm__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[4]; int vr = -yuvconstants->kUVToRB[4]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #else int ub = yuvconstants->kUVToB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = yuvconstants->kUVToR[1]; int bb = yuvconstants->kUVBiasB[0]; int bg = yuvconstants->kUVBiasG[0]; int br = yuvconstants->kUVBiasR[0]; int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; *b = Clamp((int32_t)(-(u * ub) + y1 + bb) >> 6); *g = Clamp((int32_t)(-(u * ug + v * vg) + y1 + bg) >> 6); *r = Clamp((int32_t)(-(v * vr) + y1 + br) >> 6); } // Reads 8 bit YUV and leaves result as 16 bit. static __inline void YuvPixel8_16(uint8_t y, uint8_t u, uint8_t v, int* b, int* g, int* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = -yuvconstants->kUVToRB[1]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #elif defined(__arm__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[4]; int vr = -yuvconstants->kUVToRB[4]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #else int ub = yuvconstants->kUVToB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = yuvconstants->kUVToR[1]; int bb = yuvconstants->kUVBiasB[0]; int bg = yuvconstants->kUVBiasG[0]; int br = yuvconstants->kUVBiasR[0]; int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)(y * 0x0101 * yg) >> 16; *b = (int)(-(u * ub) + y1 + bb); *g = (int)(-(u * ug + v * vg) + y1 + bg); *r = (int)(-(v * vr) + y1 + br); } // C reference code that mimics the YUV 16 bit assembly. // Reads 10 bit YUV and leaves result as 16 bit. static __inline void YuvPixel16(int16_t y, int16_t u, int16_t v, int* b, int* g, int* r, const struct YuvConstants* yuvconstants) { #if defined(__aarch64__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = -yuvconstants->kUVToRB[1]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #elif defined(__arm__) int ub = -yuvconstants->kUVToRB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[4]; int vr = -yuvconstants->kUVToRB[4]; int bb = yuvconstants->kUVBiasBGR[0]; int bg = yuvconstants->kUVBiasBGR[1]; int br = yuvconstants->kUVBiasBGR[2]; int yg = yuvconstants->kYToRgb[0] / 0x0101; #else int ub = yuvconstants->kUVToB[0]; int ug = yuvconstants->kUVToG[0]; int vg = yuvconstants->kUVToG[1]; int vr = yuvconstants->kUVToR[1]; int bb = yuvconstants->kUVBiasB[0]; int bg = yuvconstants->kUVBiasG[0]; int br = yuvconstants->kUVBiasR[0]; int yg = yuvconstants->kYToRgb[0]; #endif uint32_t y1 = (uint32_t)((y << 6) * yg) >> 16; u = clamp255(u >> 2); v = clamp255(v >> 2); *b = (int)(-(u * ub) + y1 + bb); *g = (int)(-(u * ug + v * vg) + y1 + bg); *r = (int)(-(v * vr) + y1 + br); } // C reference code that mimics the YUV 10 bit assembly. // Reads 10 bit YUV and clamps down to 8 bit RGB. static __inline void YuvPixel10(uint16_t y, uint16_t u, uint16_t v, uint8_t* b, uint8_t* g, uint8_t* r, const struct YuvConstants* yuvconstants) { int b16; int g16; int r16; YuvPixel16(y, u, v, &b16, &g16, &r16, yuvconstants); *b = Clamp(b16 >> 6); *g = Clamp(g16 >> 6); *r = Clamp(r16 >> 6); } // Y contribution to R,G,B. Scale and bias. #define YG 18997 /* round(1.164 * 64 * 256 * 256 / 257) */ #define YGB -1160 /* 1.164 * 64 * -16 + 64 / 2 */ // C reference code that mimics the YUV assembly. static __inline void YPixel(uint8_t y, uint8_t* b, uint8_t* g, uint8_t* r) { uint32_t y1 = (uint32_t)(y * 0x0101 * YG) >> 16; *b = Clamp((int32_t)(y1 + YGB) >> 6); *g = Clamp((int32_t)(y1 + YGB) >> 6); *r = Clamp((int32_t)(y1 + YGB) >> 6); } #undef YG #undef YGB #if !defined(LIBYUV_DISABLE_NEON) && \ (defined(__ARM_NEON__) || defined(__aarch64__) || defined(LIBYUV_NEON)) // C mimic assembly. // TODO(fbarchard): Remove subsampling from Neon. void I444ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint8_t u = (src_u[0] + src_u[1] + 1) >> 1; uint8_t v = (src_v[0] + src_v[1] + 1) >> 1; YuvPixel(src_y[0], u, v, rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], u, v, rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 2; src_v += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } #else void I444ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width; ++x) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; src_y += 1; src_u += 1; src_v += 1; rgb_buf += 4; // Advance 1 pixel. } } #endif // Also used for 420 void I422ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 1; src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } // 10 bit YUV to ARGB void I210ToARGBRow_C(const uint16_t* src_y, const uint16_t* src_u, const uint16_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel10(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_u += 1; src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel10(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } static void StoreAR30(uint8_t* rgb_buf, int b, int g, int r) { uint32_t ar30; b = b >> 4; // convert 10.6 to 10 bit. g = g >> 4; r = r >> 4; b = Clamp10(b); g = Clamp10(g); r = Clamp10(r); ar30 = b | ((uint32_t)g << 10) | ((uint32_t)r << 20) | 0xc0000000; (*(uint32_t*)rgb_buf) = ar30; } // 10 bit YUV to 10 bit AR30 void I210ToAR30Row_C(const uint16_t* src_y, const uint16_t* src_u, const uint16_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; int b; int g; int r; for (x = 0; x < width - 1; x += 2) { YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); YuvPixel16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf + 4, b, g, r); src_y += 2; src_u += 1; src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); } } // 8 bit YUV to 10 bit AR30 // Uses same code as 10 bit YUV bit shifts the 8 bit values up to 10 bits. void I422ToAR30Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; int b; int g; int r; for (x = 0; x < width - 1; x += 2) { YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); YuvPixel8_16(src_y[1], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf + 4, b, g, r); src_y += 2; src_u += 1; src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel8_16(src_y[0], src_u[0], src_v[0], &b, &g, &r, yuvconstants); StoreAR30(rgb_buf, b, g, r); } } void I422AlphaToARGBRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = src_a[0]; YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = src_a[1]; src_y += 2; src_u += 1; src_v += 1; src_a += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = src_a[0]; } } void I422ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); src_y += 2; src_u += 1; src_v += 1; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); } } void I422ToARGB4444Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { uint8_t b0; uint8_t g0; uint8_t r0; uint8_t b1; uint8_t g1; uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; b1 = b1 >> 4; g1 = g1 >> 4; r1 = r1 >> 4; *(uint32_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | (b1 << 16) | (g1 << 20) | (r1 << 24) | 0xf000f000; src_y += 2; src_u += 1; src_v += 1; dst_argb4444 += 4; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 4; g0 = g0 >> 4; r0 = r0 >> 4; *(uint16_t*)(dst_argb4444) = b0 | (g0 << 4) | (r0 << 8) | 0xf000; } } void I422ToARGB1555Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { uint8_t b0; uint8_t g0; uint8_t r0; uint8_t b1; uint8_t g1; uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; b1 = b1 >> 3; g1 = g1 >> 3; r1 = r1 >> 3; *(uint32_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | (b1 << 16) | (g1 << 21) | (r1 << 26) | 0x80008000; src_y += 2; src_u += 1; src_v += 1; dst_argb1555 += 4; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 3; r0 = r0 >> 3; *(uint16_t*)(dst_argb1555) = b0 | (g0 << 5) | (r0 << 10) | 0x8000; } } void I422ToRGB565Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { uint8_t b0; uint8_t g0; uint8_t r0; uint8_t b1; uint8_t g1; uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[1], src_u[0], src_v[0], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; *(uint32_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_u += 1; src_v += 1; dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } void NV12ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_uv += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } void NV21ToARGBRow_C(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_y += 2; src_vu += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } void NV12ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); YuvPixel(src_y[1], src_uv[0], src_uv[1], rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); src_y += 2; src_uv += 2; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_uv[0], src_uv[1], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); } } void NV21ToRGB24Row_C(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); YuvPixel(src_y[1], src_vu[1], src_vu[0], rgb_buf + 3, rgb_buf + 4, rgb_buf + 5, yuvconstants); src_y += 2; src_vu += 2; rgb_buf += 6; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_vu[1], src_vu[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); } } void NV12ToRGB565Row_C(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { uint8_t b0; uint8_t g0; uint8_t r0; uint8_t b1; uint8_t g1; uint8_t r1; int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); YuvPixel(src_y[1], src_uv[0], src_uv[1], &b1, &g1, &r1, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; b1 = b1 >> 3; g1 = g1 >> 2; r1 = r1 >> 3; *(uint32_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11) | (b1 << 16) | (g1 << 21) | (r1 << 27); src_y += 2; src_uv += 2; dst_rgb565 += 4; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_uv[0], src_uv[1], &b0, &g0, &r0, yuvconstants); b0 = b0 >> 3; g0 = g0 >> 2; r0 = r0 >> 3; *(uint16_t*)(dst_rgb565) = b0 | (g0 << 5) | (r0 << 11); } } void YUY2ToARGBRow_C(const uint8_t* src_yuy2, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_yuy2[2], src_yuy2[1], src_yuy2[3], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_yuy2 += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_yuy2[0], src_yuy2[1], src_yuy2[3], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } void UYVYToARGBRow_C(const uint8_t* src_uyvy, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; YuvPixel(src_uyvy[3], src_uyvy[0], src_uyvy[2], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6, yuvconstants); rgb_buf[7] = 255; src_uyvy += 4; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_uyvy[1], src_uyvy[0], src_uyvy[2], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2, yuvconstants); rgb_buf[3] = 255; } } void I422ToRGBARow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* rgb_buf, const struct YuvConstants* yuvconstants, int width) { int x; for (x = 0; x < width - 1; x += 2) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; YuvPixel(src_y[1], src_u[0], src_v[0], rgb_buf + 5, rgb_buf + 6, rgb_buf + 7, yuvconstants); rgb_buf[4] = 255; src_y += 2; src_u += 1; src_v += 1; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YuvPixel(src_y[0], src_u[0], src_v[0], rgb_buf + 1, rgb_buf + 2, rgb_buf + 3, yuvconstants); rgb_buf[0] = 255; } } void I400ToARGBRow_C(const uint8_t* src_y, uint8_t* rgb_buf, int width) { int x; for (x = 0; x < width - 1; x += 2) { YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); rgb_buf[3] = 255; YPixel(src_y[1], rgb_buf + 4, rgb_buf + 5, rgb_buf + 6); rgb_buf[7] = 255; src_y += 2; rgb_buf += 8; // Advance 2 pixels. } if (width & 1) { YPixel(src_y[0], rgb_buf + 0, rgb_buf + 1, rgb_buf + 2); rgb_buf[3] = 255; } } void MirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; src += width - 1; for (x = 0; x < width - 1; x += 2) { dst[x] = src[0]; dst[x + 1] = src[-1]; src -= 2; } if (width & 1) { dst[width - 1] = src[0]; } } void MirrorUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; src_uv += (width - 1) << 1; for (x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; dst_u[x + 1] = src_uv[-2]; dst_v[x] = src_uv[1]; dst_v[x + 1] = src_uv[-2 + 1]; src_uv -= 4; } if (width & 1) { dst_u[width - 1] = src_uv[0]; dst_v[width - 1] = src_uv[1]; } } void ARGBMirrorRow_C(const uint8_t* src, uint8_t* dst, int width) { int x; const uint32_t* src32 = (const uint32_t*)(src); uint32_t* dst32 = (uint32_t*)(dst); src32 += width - 1; for (x = 0; x < width - 1; x += 2) { dst32[x] = src32[0]; dst32[x + 1] = src32[-1]; src32 -= 2; } if (width & 1) { dst32[width - 1] = src32[0]; } } void SplitUVRow_C(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_u[x] = src_uv[0]; dst_u[x + 1] = src_uv[2]; dst_v[x] = src_uv[1]; dst_v[x + 1] = src_uv[3]; src_uv += 4; } if (width & 1) { dst_u[width - 1] = src_uv[0]; dst_v[width - 1] = src_uv[1]; } } void MergeUVRow_C(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_uv[0] = src_u[x]; dst_uv[1] = src_v[x]; dst_uv[2] = src_u[x + 1]; dst_uv[3] = src_v[x + 1]; dst_uv += 4; } if (width & 1) { dst_uv[0] = src_u[width - 1]; dst_uv[1] = src_v[width - 1]; } } void SplitRGBRow_C(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) { int x; for (x = 0; x < width; ++x) { dst_r[x] = src_rgb[0]; dst_g[x] = src_rgb[1]; dst_b[x] = src_rgb[2]; src_rgb += 3; } } void MergeRGBRow_C(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) { int x; for (x = 0; x < width; ++x) { dst_rgb[0] = src_r[x]; dst_rgb[1] = src_g[x]; dst_rgb[2] = src_b[x]; dst_rgb += 3; } } // Use scale to convert lsb formats to msb, depending how many bits there are: // 128 = 9 bits // 64 = 10 bits // 16 = 12 bits // 1 = 16 bits void MergeUVRow_16_C(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, int scale, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_uv[0] = src_u[x] * scale; dst_uv[1] = src_v[x] * scale; dst_uv[2] = src_u[x + 1] * scale; dst_uv[3] = src_v[x + 1] * scale; dst_uv += 4; } if (width & 1) { dst_uv[0] = src_u[width - 1] * scale; dst_uv[1] = src_v[width - 1] * scale; } } void MultiplyRow_16_C(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) { int x; for (x = 0; x < width; ++x) { dst_y[x] = src_y[x] * scale; } } // Use scale to convert lsb formats to msb, depending how many bits there are: // 32768 = 9 bits // 16384 = 10 bits // 4096 = 12 bits // 256 = 16 bits void Convert16To8Row_C(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) { int x; for (x = 0; x < width; ++x) { dst_y[x] = clamp255((src_y[x] * scale) >> 16); } } // Use scale to convert lsb formats to msb, depending how many bits there are: // 1024 = 10 bits void Convert8To16Row_C(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) { int x; scale *= 0x0101; // replicates the byte. for (x = 0; x < width; ++x) { dst_y[x] = (src_y[x] * scale) >> 16; } } void CopyRow_C(const uint8_t* src, uint8_t* dst, int count) { memcpy(dst, src, count); } void CopyRow_16_C(const uint16_t* src, uint16_t* dst, int count) { memcpy(dst, src, count * 2); } void SetRow_C(uint8_t* dst, uint8_t v8, int width) { memset(dst, v8, width); } void ARGBSetRow_C(uint8_t* dst_argb, uint32_t v32, int width) { uint32_t* d = (uint32_t*)(dst_argb); int x; for (x = 0; x < width; ++x) { d[x] = v32; } } // Filter 2 rows of YUY2 UV's (422) into U and V (420). void YUY2ToUVRow_C(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { // Output a row of UV values, filtering 2 rows of YUY2. int x; for (x = 0; x < width; x += 2) { dst_u[0] = (src_yuy2[1] + src_yuy2[src_stride_yuy2 + 1] + 1) >> 1; dst_v[0] = (src_yuy2[3] + src_yuy2[src_stride_yuy2 + 3] + 1) >> 1; src_yuy2 += 4; dst_u += 1; dst_v += 1; } } // Copy row of YUY2 UV's (422) into U and V (422). void YUY2ToUV422Row_C(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { dst_u[0] = src_yuy2[1]; dst_v[0] = src_yuy2[3]; src_yuy2 += 4; dst_u += 1; dst_v += 1; } } // Copy row of YUY2 Y's (422) into Y (420/422). void YUY2ToYRow_C(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { dst_y[x] = src_yuy2[0]; dst_y[x + 1] = src_yuy2[2]; src_yuy2 += 4; } if (width & 1) { dst_y[width - 1] = src_yuy2[0]; } } // Filter 2 rows of UYVY UV's (422) into U and V (420). void UYVYToUVRow_C(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { dst_u[0] = (src_uyvy[0] + src_uyvy[src_stride_uyvy + 0] + 1) >> 1; dst_v[0] = (src_uyvy[2] + src_uyvy[src_stride_uyvy + 2] + 1) >> 1; src_uyvy += 4; dst_u += 1; dst_v += 1; } } // Copy row of UYVY UV's (422) into U and V (422). void UYVYToUV422Row_C(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { // Output a row of UV values. int x; for (x = 0; x < width; x += 2) { dst_u[0] = src_uyvy[0]; dst_v[0] = src_uyvy[2]; src_uyvy += 4; dst_u += 1; dst_v += 1; } } // Copy row of UYVY Y's (422) into Y (420/422). void UYVYToYRow_C(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { // Output a row of Y values. int x; for (x = 0; x < width - 1; x += 2) { dst_y[x] = src_uyvy[1]; dst_y[x + 1] = src_uyvy[3]; src_uyvy += 4; } if (width & 1) { dst_y[width - 1] = src_uyvy[1]; } } #define BLEND(f, b, a) (((256 - a) * b) >> 8) + f // Blend src_argb0 over src_argb1 and store to dst_argb. // dst_argb may be src_argb0 or src_argb1. // This code mimics the SSSE3 version for better testability. void ARGBBlendRow_C(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; for (x = 0; x < width - 1; x += 2) { uint32_t fb = src_argb0[0]; uint32_t fg = src_argb0[1]; uint32_t fr = src_argb0[2]; uint32_t a = src_argb0[3]; uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); dst_argb[3] = 255u; fb = src_argb0[4 + 0]; fg = src_argb0[4 + 1]; fr = src_argb0[4 + 2]; a = src_argb0[4 + 3]; bb = src_argb1[4 + 0]; bg = src_argb1[4 + 1]; br = src_argb1[4 + 2]; dst_argb[4 + 0] = BLEND(fb, bb, a); dst_argb[4 + 1] = BLEND(fg, bg, a); dst_argb[4 + 2] = BLEND(fr, br, a); dst_argb[4 + 3] = 255u; src_argb0 += 8; src_argb1 += 8; dst_argb += 8; } if (width & 1) { uint32_t fb = src_argb0[0]; uint32_t fg = src_argb0[1]; uint32_t fr = src_argb0[2]; uint32_t a = src_argb0[3]; uint32_t bb = src_argb1[0]; uint32_t bg = src_argb1[1]; uint32_t br = src_argb1[2]; dst_argb[0] = BLEND(fb, bb, a); dst_argb[1] = BLEND(fg, bg, a); dst_argb[2] = BLEND(fr, br, a); dst_argb[3] = 255u; } } #undef BLEND #define UBLEND(f, b, a) (((a)*f) + ((255 - a) * b) + 255) >> 8 void BlendPlaneRow_C(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst[0] = UBLEND(src0[0], src1[0], alpha[0]); dst[1] = UBLEND(src0[1], src1[1], alpha[1]); src0 += 2; src1 += 2; alpha += 2; dst += 2; } if (width & 1) { dst[0] = UBLEND(src0[0], src1[0], alpha[0]); } } #undef UBLEND #define ATTENUATE(f, a) (a | (a << 8)) * (f | (f << 8)) >> 24 // Multiply source RGB by alpha and store to destination. // This code mimics the SSSE3 version for better testability. void ARGBAttenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width - 1; i += 2) { uint32_t b = src_argb[0]; uint32_t g = src_argb[1]; uint32_t r = src_argb[2]; uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); dst_argb[3] = a; b = src_argb[4]; g = src_argb[5]; r = src_argb[6]; a = src_argb[7]; dst_argb[4] = ATTENUATE(b, a); dst_argb[5] = ATTENUATE(g, a); dst_argb[6] = ATTENUATE(r, a); dst_argb[7] = a; src_argb += 8; dst_argb += 8; } if (width & 1) { const uint32_t b = src_argb[0]; const uint32_t g = src_argb[1]; const uint32_t r = src_argb[2]; const uint32_t a = src_argb[3]; dst_argb[0] = ATTENUATE(b, a); dst_argb[1] = ATTENUATE(g, a); dst_argb[2] = ATTENUATE(r, a); dst_argb[3] = a; } } #undef ATTENUATE // Divide source RGB by alpha and store to destination. // b = (b * 255 + (a / 2)) / a; // g = (g * 255 + (a / 2)) / a; // r = (r * 255 + (a / 2)) / a; // Reciprocal method is off by 1 on some values. ie 125 // 8.8 fixed point inverse table with 1.0 in upper short and 1 / a in lower. #define T(a) 0x01000000 + (0x10000 / a) const uint32_t fixed_invtbl8[256] = { 0x01000000, 0x0100ffff, T(0x02), T(0x03), T(0x04), T(0x05), T(0x06), T(0x07), T(0x08), T(0x09), T(0x0a), T(0x0b), T(0x0c), T(0x0d), T(0x0e), T(0x0f), T(0x10), T(0x11), T(0x12), T(0x13), T(0x14), T(0x15), T(0x16), T(0x17), T(0x18), T(0x19), T(0x1a), T(0x1b), T(0x1c), T(0x1d), T(0x1e), T(0x1f), T(0x20), T(0x21), T(0x22), T(0x23), T(0x24), T(0x25), T(0x26), T(0x27), T(0x28), T(0x29), T(0x2a), T(0x2b), T(0x2c), T(0x2d), T(0x2e), T(0x2f), T(0x30), T(0x31), T(0x32), T(0x33), T(0x34), T(0x35), T(0x36), T(0x37), T(0x38), T(0x39), T(0x3a), T(0x3b), T(0x3c), T(0x3d), T(0x3e), T(0x3f), T(0x40), T(0x41), T(0x42), T(0x43), T(0x44), T(0x45), T(0x46), T(0x47), T(0x48), T(0x49), T(0x4a), T(0x4b), T(0x4c), T(0x4d), T(0x4e), T(0x4f), T(0x50), T(0x51), T(0x52), T(0x53), T(0x54), T(0x55), T(0x56), T(0x57), T(0x58), T(0x59), T(0x5a), T(0x5b), T(0x5c), T(0x5d), T(0x5e), T(0x5f), T(0x60), T(0x61), T(0x62), T(0x63), T(0x64), T(0x65), T(0x66), T(0x67), T(0x68), T(0x69), T(0x6a), T(0x6b), T(0x6c), T(0x6d), T(0x6e), T(0x6f), T(0x70), T(0x71), T(0x72), T(0x73), T(0x74), T(0x75), T(0x76), T(0x77), T(0x78), T(0x79), T(0x7a), T(0x7b), T(0x7c), T(0x7d), T(0x7e), T(0x7f), T(0x80), T(0x81), T(0x82), T(0x83), T(0x84), T(0x85), T(0x86), T(0x87), T(0x88), T(0x89), T(0x8a), T(0x8b), T(0x8c), T(0x8d), T(0x8e), T(0x8f), T(0x90), T(0x91), T(0x92), T(0x93), T(0x94), T(0x95), T(0x96), T(0x97), T(0x98), T(0x99), T(0x9a), T(0x9b), T(0x9c), T(0x9d), T(0x9e), T(0x9f), T(0xa0), T(0xa1), T(0xa2), T(0xa3), T(0xa4), T(0xa5), T(0xa6), T(0xa7), T(0xa8), T(0xa9), T(0xaa), T(0xab), T(0xac), T(0xad), T(0xae), T(0xaf), T(0xb0), T(0xb1), T(0xb2), T(0xb3), T(0xb4), T(0xb5), T(0xb6), T(0xb7), T(0xb8), T(0xb9), T(0xba), T(0xbb), T(0xbc), T(0xbd), T(0xbe), T(0xbf), T(0xc0), T(0xc1), T(0xc2), T(0xc3), T(0xc4), T(0xc5), T(0xc6), T(0xc7), T(0xc8), T(0xc9), T(0xca), T(0xcb), T(0xcc), T(0xcd), T(0xce), T(0xcf), T(0xd0), T(0xd1), T(0xd2), T(0xd3), T(0xd4), T(0xd5), T(0xd6), T(0xd7), T(0xd8), T(0xd9), T(0xda), T(0xdb), T(0xdc), T(0xdd), T(0xde), T(0xdf), T(0xe0), T(0xe1), T(0xe2), T(0xe3), T(0xe4), T(0xe5), T(0xe6), T(0xe7), T(0xe8), T(0xe9), T(0xea), T(0xeb), T(0xec), T(0xed), T(0xee), T(0xef), T(0xf0), T(0xf1), T(0xf2), T(0xf3), T(0xf4), T(0xf5), T(0xf6), T(0xf7), T(0xf8), T(0xf9), T(0xfa), T(0xfb), T(0xfc), T(0xfd), T(0xfe), 0x01000100}; #undef T void ARGBUnattenuateRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int i; for (i = 0; i < width; ++i) { uint32_t b = src_argb[0]; uint32_t g = src_argb[1]; uint32_t r = src_argb[2]; const uint32_t a = src_argb[3]; const uint32_t ia = fixed_invtbl8[a] & 0xffff; // 8.8 fixed point b = (b * ia) >> 8; g = (g * ia) >> 8; r = (r * ia) >> 8; // Clamping should not be necessary but is free in assembly. dst_argb[0] = clamp255(b); dst_argb[1] = clamp255(g); dst_argb[2] = clamp255(r); dst_argb[3] = a; src_argb += 4; dst_argb += 4; } } void ComputeCumulativeSumRow_C(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width) { int32_t row_sum[4] = {0, 0, 0, 0}; int x; for (x = 0; x < width; ++x) { row_sum[0] += row[x * 4 + 0]; row_sum[1] += row[x * 4 + 1]; row_sum[2] += row[x * 4 + 2]; row_sum[3] += row[x * 4 + 3]; cumsum[x * 4 + 0] = row_sum[0] + previous_cumsum[x * 4 + 0]; cumsum[x * 4 + 1] = row_sum[1] + previous_cumsum[x * 4 + 1]; cumsum[x * 4 + 2] = row_sum[2] + previous_cumsum[x * 4 + 2]; cumsum[x * 4 + 3] = row_sum[3] + previous_cumsum[x * 4 + 3]; } } void CumulativeSumToAverageRow_C(const int32_t* tl, const int32_t* bl, int w, int area, uint8_t* dst, int count) { float ooa = 1.0f / area; int i; for (i = 0; i < count; ++i) { dst[0] = (uint8_t)((bl[w + 0] + tl[0] - bl[0] - tl[w + 0]) * ooa); dst[1] = (uint8_t)((bl[w + 1] + tl[1] - bl[1] - tl[w + 1]) * ooa); dst[2] = (uint8_t)((bl[w + 2] + tl[2] - bl[2] - tl[w + 2]) * ooa); dst[3] = (uint8_t)((bl[w + 3] + tl[3] - bl[3] - tl[w + 3]) * ooa); dst += 4; tl += 4; bl += 4; } } // Copy pixels from rotated source to destination row with a slope. LIBYUV_API void ARGBAffineRow_C(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* uv_dudv, int width) { int i; // Render a row of pixels from source into a buffer. float uv[2]; uv[0] = uv_dudv[0]; uv[1] = uv_dudv[1]; for (i = 0; i < width; ++i) { int x = (int)(uv[0]); int y = (int)(uv[1]); *(uint32_t*)(dst_argb) = *(const uint32_t*)(src_argb + y * src_argb_stride + x * 4); dst_argb += 4; uv[0] += uv_dudv[2]; uv[1] += uv_dudv[3]; } } // Blend 2 rows into 1. static void HalfRow_C(const uint8_t* src_uv, ptrdiff_t src_uv_stride, uint8_t* dst_uv, int width) { int x; for (x = 0; x < width; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; } } static void HalfRow_16_C(const uint16_t* src_uv, ptrdiff_t src_uv_stride, uint16_t* dst_uv, int width) { int x; for (x = 0; x < width; ++x) { dst_uv[x] = (src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1; } } // C version 2x2 -> 2x1. void InterpolateRow_C(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; const uint8_t* src_ptr1 = src_ptr + src_stride; int x; if (y1_fraction == 0) { memcpy(dst_ptr, src_ptr, width); return; } if (y1_fraction == 128) { HalfRow_C(src_ptr, src_stride, dst_ptr, width); return; } for (x = 0; x < width - 1; x += 2) { dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8; src_ptr += 2; src_ptr1 += 2; dst_ptr += 2; } if (width & 1) { dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8; } } void InterpolateRow_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; const uint16_t* src_ptr1 = src_ptr + src_stride; int x; if (source_y_fraction == 0) { memcpy(dst_ptr, src_ptr, width * 2); return; } if (source_y_fraction == 128) { HalfRow_16_C(src_ptr, src_stride, dst_ptr, width); return; } for (x = 0; x < width - 1; x += 2) { dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8; src_ptr += 2; src_ptr1 += 2; dst_ptr += 2; } if (width & 1) { dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8; } } // Use first 4 shuffler values to reorder ARGB channels. void ARGBShuffleRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { int index0 = shuffler[0]; int index1 = shuffler[1]; int index2 = shuffler[2]; int index3 = shuffler[3]; // Shuffle a row of ARGB. int x; for (x = 0; x < width; ++x) { // To support in-place conversion. uint8_t b = src_argb[index0]; uint8_t g = src_argb[index1]; uint8_t r = src_argb[index2]; uint8_t a = src_argb[index3]; dst_argb[0] = b; dst_argb[1] = g; dst_argb[2] = r; dst_argb[3] = a; src_argb += 4; dst_argb += 4; } } void I422ToYUY2Row_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_frame[0] = src_y[0]; dst_frame[1] = src_u[0]; dst_frame[2] = src_y[1]; dst_frame[3] = src_v[0]; dst_frame += 4; src_y += 2; src_u += 1; src_v += 1; } if (width & 1) { dst_frame[0] = src_y[0]; dst_frame[1] = src_u[0]; dst_frame[2] = 0; dst_frame[3] = src_v[0]; } } void I422ToUYVYRow_C(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width) { int x; for (x = 0; x < width - 1; x += 2) { dst_frame[0] = src_u[0]; dst_frame[1] = src_y[0]; dst_frame[2] = src_v[0]; dst_frame[3] = src_y[1]; dst_frame += 4; src_y += 2; src_u += 1; src_v += 1; } if (width & 1) { dst_frame[0] = src_u[0]; dst_frame[1] = src_y[0]; dst_frame[2] = src_v[0]; dst_frame[3] = 0; } } void ARGBPolynomialRow_C(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { int i; for (i = 0; i < width; ++i) { float b = (float)(src_argb[0]); float g = (float)(src_argb[1]); float r = (float)(src_argb[2]); float a = (float)(src_argb[3]); float b2 = b * b; float g2 = g * g; float r2 = r * r; float a2 = a * a; float db = poly[0] + poly[4] * b; float dg = poly[1] + poly[5] * g; float dr = poly[2] + poly[6] * r; float da = poly[3] + poly[7] * a; float b3 = b2 * b; float g3 = g2 * g; float r3 = r2 * r; float a3 = a2 * a; db += poly[8] * b2; dg += poly[9] * g2; dr += poly[10] * r2; da += poly[11] * a2; db += poly[12] * b3; dg += poly[13] * g3; dr += poly[14] * r3; da += poly[15] * a3; dst_argb[0] = Clamp((int32_t)(db)); dst_argb[1] = Clamp((int32_t)(dg)); dst_argb[2] = Clamp((int32_t)(dr)); dst_argb[3] = Clamp((int32_t)(da)); src_argb += 4; dst_argb += 4; } } // Samples assumed to be unsigned in low 9, 10 or 12 bits. Scale factor // adjust the source integer range to the half float range desired. // This magic constant is 2^-112. Multiplying by this // is the same as subtracting 112 from the exponent, which // is the difference in exponent bias between 32-bit and // 16-bit floats. Once we've done this subtraction, we can // simply extract the low bits of the exponent and the high // bits of the mantissa from our float and we're done. // Work around GCC 7 punning warning -Wstrict-aliasing #if defined(__GNUC__) typedef uint32_t __attribute__((__may_alias__)) uint32_alias_t; #else typedef uint32_t uint32_alias_t; #endif void HalfFloatRow_C(const uint16_t* src, uint16_t* dst, float scale, int width) { int i; float mult = 1.9259299444e-34f * scale; for (i = 0; i < width; ++i) { float value = src[i] * mult; dst[i] = (uint16_t)((*(const uint32_alias_t*)&value) >> 13); } } void ByteToFloatRow_C(const uint8_t* src, float* dst, float scale, int width) { int i; for (i = 0; i < width; ++i) { float value = src[i] * scale; dst[i] = value; } } void ARGBLumaColorTableRow_C(const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, uint32_t lumacoeff) { uint32_t bc = lumacoeff & 0xff; uint32_t gc = (lumacoeff >> 8) & 0xff; uint32_t rc = (lumacoeff >> 16) & 0xff; int i; for (i = 0; i < width - 1; i += 2) { // Luminance in rows, color values in columns. const uint8_t* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + luma; const uint8_t* luma1; dst_argb[0] = luma0[src_argb[0]]; dst_argb[1] = luma0[src_argb[1]]; dst_argb[2] = luma0[src_argb[2]]; dst_argb[3] = src_argb[3]; luma1 = ((src_argb[4] * bc + src_argb[5] * gc + src_argb[6] * rc) & 0x7F00u) + luma; dst_argb[4] = luma1[src_argb[4]]; dst_argb[5] = luma1[src_argb[5]]; dst_argb[6] = luma1[src_argb[6]]; dst_argb[7] = src_argb[7]; src_argb += 8; dst_argb += 8; } if (width & 1) { // Luminance in rows, color values in columns. const uint8_t* luma0 = ((src_argb[0] * bc + src_argb[1] * gc + src_argb[2] * rc) & 0x7F00u) + luma; dst_argb[0] = luma0[src_argb[0]]; dst_argb[1] = luma0[src_argb[1]]; dst_argb[2] = luma0[src_argb[2]]; dst_argb[3] = src_argb[3]; } } void ARGBCopyAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[3]; dst[7] = src[7]; dst += 8; src += 8; } if (width & 1) { dst[3] = src[3]; } } void ARGBExtractAlphaRow_C(const uint8_t* src_argb, uint8_t* dst_a, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst_a[0] = src_argb[3]; dst_a[1] = src_argb[7]; dst_a += 2; src_argb += 8; } if (width & 1) { dst_a[0] = src_argb[3]; } } void ARGBCopyYToAlphaRow_C(const uint8_t* src, uint8_t* dst, int width) { int i; for (i = 0; i < width - 1; i += 2) { dst[3] = src[0]; dst[7] = src[1]; dst += 8; src += 2; } if (width & 1) { dst[3] = src[0]; } } // Maximum temporary width for wrappers to process at a time, in pixels. #define MAXTWIDTH 2048 #if !(defined(_MSC_VER) && defined(_M_IX86)) && \ defined(HAS_I422TORGB565ROW_SSSE3) // row_win.cc has asm version, but GCC uses 2 step wrapper. void I422ToRGB565Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_rgb565 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_I422TOARGB1555ROW_SSSE3) void I422ToARGB1555Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_argb1555 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_I422TOARGB4444ROW_SSSE3) void I422ToARGB4444Row_SSSE3(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_SSSE3(src_y, src_u, src_v, row, yuvconstants, twidth); ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_argb4444 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_NV12TORGB565ROW_SSSE3) void NV12ToRGB565Row_SSSE3(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); src_y += twidth; src_uv += twidth; dst_rgb565 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_NV12TORGB24ROW_SSSE3) void NV12ToRGB24Row_SSSE3(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_SSSE3(src_y, src_uv, row, yuvconstants, twidth); ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); src_y += twidth; src_uv += twidth; dst_rgb24 += twidth * 3; width -= twidth; } } #endif #if defined(HAS_NV21TORGB24ROW_SSSE3) void NV21ToRGB24Row_SSSE3(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV21ToARGBRow_SSSE3(src_y, src_vu, row, yuvconstants, twidth); ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); src_y += twidth; src_vu += twidth; dst_rgb24 += twidth * 3; width -= twidth; } } #endif #if defined(HAS_NV12TORGB24ROW_AVX2) void NV12ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); #if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); #else ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); #endif src_y += twidth; src_uv += twidth; dst_rgb24 += twidth * 3; width -= twidth; } } #endif #if defined(HAS_NV21TORGB24ROW_AVX2) void NV21ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV21ToARGBRow_AVX2(src_y, src_vu, row, yuvconstants, twidth); #if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); #else ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); #endif src_y += twidth; src_vu += twidth; dst_rgb24 += twidth * 3; width -= twidth; } } #endif #if defined(HAS_I422TORGB565ROW_AVX2) void I422ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); #if defined(HAS_ARGBTORGB565ROW_AVX2) ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); #else ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); #endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_rgb565 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_I422TOARGB1555ROW_AVX2) void I422ToARGB1555Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); #if defined(HAS_ARGBTOARGB1555ROW_AVX2) ARGBToARGB1555Row_AVX2(row, dst_argb1555, twidth); #else ARGBToARGB1555Row_SSE2(row, dst_argb1555, twidth); #endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_argb1555 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_I422TOARGB4444ROW_AVX2) void I422ToARGB4444Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); #if defined(HAS_ARGBTOARGB4444ROW_AVX2) ARGBToARGB4444Row_AVX2(row, dst_argb4444, twidth); #else ARGBToARGB4444Row_SSE2(row, dst_argb4444, twidth); #endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_argb4444 += twidth * 2; width -= twidth; } } #endif #if defined(HAS_I422TORGB24ROW_AVX2) void I422ToRGB24Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; I422ToARGBRow_AVX2(src_y, src_u, src_v, row, yuvconstants, twidth); #if defined(HAS_ARGBTORGB24ROW_AVX2) ARGBToRGB24Row_AVX2(row, dst_rgb24, twidth); #else ARGBToRGB24Row_SSSE3(row, dst_rgb24, twidth); #endif src_y += twidth; src_u += twidth / 2; src_v += twidth / 2; dst_rgb24 += twidth * 3; width -= twidth; } } #endif #if defined(HAS_NV12TORGB565ROW_AVX2) void NV12ToRGB565Row_AVX2(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { // Row buffer for intermediate ARGB pixels. SIMD_ALIGNED(uint8_t row[MAXTWIDTH * 4]); while (width > 0) { int twidth = width > MAXTWIDTH ? MAXTWIDTH : width; NV12ToARGBRow_AVX2(src_y, src_uv, row, yuvconstants, twidth); #if defined(HAS_ARGBTORGB565ROW_AVX2) ARGBToRGB565Row_AVX2(row, dst_rgb565, twidth); #else ARGBToRGB565Row_SSE2(row, dst_rgb565, twidth); #endif src_y += twidth; src_uv += twidth; dst_rgb565 += twidth * 2; width -= twidth; } } #endif float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) { float fsum = 0.f; int i; #if defined(__clang__) #pragma clang loop vectorize_width(4) #endif for (i = 0; i < width; ++i) { float v = *src++; fsum += v * v; *dst++ = v * scale; } return fsum; } float ScaleMaxSamples_C(const float* src, float* dst, float scale, int width) { float fmax = 0.f; int i; for (i = 0; i < width; ++i) { float v = *src++; float vs = v * scale; fmax = (v > fmax) ? v : fmax; *dst++ = vs; } return fmax; } void ScaleSamples_C(const float* src, float* dst, float scale, int width) { int i; for (i = 0; i < width; ++i) { *dst++ = *src++ * scale; } } void GaussRow_C(const uint32_t* src, uint16_t* dst, int width) { int i; for (i = 0; i < width; ++i) { *dst++ = (src[0] + src[1] * 4 + src[2] * 6 + src[3] * 4 + src[4] + 128) >> 8; ++src; } } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. void GaussCol_C(const uint16_t* src0, const uint16_t* src1, const uint16_t* src2, const uint16_t* src3, const uint16_t* src4, uint32_t* dst, int width) { int i; for (i = 0; i < width; ++i) { *dst++ = *src0++ + *src1++ * 4 + *src2++ * 6 + *src3++ * 4 + *src4++; } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_gcc.cc000066400000000000000000010714041357355204000221360ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC x86 and x64. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) // Constants for ARGB static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0}; // JPeg full range. static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_ARGBGRAYROW_SSSE3) #if defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0}; static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0}; static const vec8 kARGBToV = {-18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0}; static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0}; // Constants for BGRA static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13}; static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112}; static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18}; // Constants for ABGR static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0}; static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0}; static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0}; // Constants for RGBA. static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33}; static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38}; static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112}; static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; // 7 bit fixed point 0.5. static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u}; #endif // defined(HAS_ARGBTOYROW_SSSE3) || defined(HAS_I422TOARGBROW_SSSE3) #ifdef HAS_RGB24TOARGBROW_SSSE3 // Shuffle table for converting RGB24 to ARGB. static const uvec8 kShuffleMaskRGB24ToARGB = { 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Middle 8. static const uvec8 kShuffleMaskRAWToRGB24_1 = { 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleMaskRAWToRGB24_2 = { 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RGB24. static const uvec8 kShuffleMaskARGBToRGB24 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RAW. static const uvec8 kShuffleMaskARGBToRAW = { 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 static const uvec8 kShuffleMaskARGBToRGB24_0 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u}; // YUY2 shuf 16 Y to 32 Y. static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}; // YUY2 shuf 8 UV to 16 UV. static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15}; // UYVY shuf 16 Y to 32 Y. static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; // UYVY shuf 8 UV to 16 UV. static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14}; // NV21 shuf 8 VU to 16 UV. static const lvec8 kShuffleNV21 = { 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, }; #endif // HAS_RGB24TOARGBROW_SSSE3 #ifdef HAS_J400TOARGBROW_SSE2 void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0x18,%%xmm5 \n" LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "lea 0x8(%0),%0 \n" "punpcklbw %%xmm0,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm0,%%xmm0 \n" "punpckhwd %%xmm1,%%xmm1 \n" "por %%xmm5,%%xmm0 \n" "por %%xmm5,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_J400TOARGBROW_SSE2 #ifdef HAS_RGB24TOARGBROW_SSSE3 void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 "pslld $0x18,%%xmm5 \n" "movdqa %3,%%xmm4 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm3 \n" "lea 0x30(%0),%0 \n" "movdqa %%xmm3,%%xmm2 \n" "palignr $0x8,%%xmm1,%%xmm2 \n" "pshufb %%xmm4,%%xmm2 \n" "por %%xmm5,%%xmm2 \n" "palignr $0xc,%%xmm0,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "movdqu %%xmm2,0x20(%1) \n" "por %%xmm5,%%xmm0 \n" "pshufb %%xmm4,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "por %%xmm5,%%xmm1 \n" "palignr $0x4,%%xmm3,%%xmm3 \n" "pshufb %%xmm4,%%xmm3 \n" "movdqu %%xmm1,0x10(%1) \n" "por %%xmm5,%%xmm3 \n" "movdqu %%xmm3,0x30(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_rgb24), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "m"(kShuffleMaskRGB24ToARGB) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" // 0xff000000 "pslld $0x18,%%xmm5 \n" "movdqa %3,%%xmm4 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm3 \n" "lea 0x30(%0),%0 \n" "movdqa %%xmm3,%%xmm2 \n" "palignr $0x8,%%xmm1,%%xmm2 \n" "pshufb %%xmm4,%%xmm2 \n" "por %%xmm5,%%xmm2 \n" "palignr $0xc,%%xmm0,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "movdqu %%xmm2,0x20(%1) \n" "por %%xmm5,%%xmm0 \n" "pshufb %%xmm4,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "por %%xmm5,%%xmm1 \n" "palignr $0x4,%%xmm3,%%xmm3 \n" "pshufb %%xmm4,%%xmm3 \n" "movdqu %%xmm1,0x10(%1) \n" "por %%xmm5,%%xmm3 \n" "movdqu %%xmm3,0x30(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_raw), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "m"(kShuffleMaskRAWToARGB) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "movdqa %3,%%xmm3 \n" "movdqa %4,%%xmm4 \n" "movdqa %5,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x4(%0),%%xmm1 \n" "movdqu 0x8(%0),%%xmm2 \n" "lea 0x18(%0),%0 \n" "pshufb %%xmm3,%%xmm0 \n" "pshufb %%xmm4,%%xmm1 \n" "pshufb %%xmm5,%%xmm2 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x8(%1) \n" "movq %%xmm2,0x10(%1) \n" "lea 0x18(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_raw), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : "m"(kShuffleMaskRAWToRGB24_0), // %3 "m"(kShuffleMaskRAWToRGB24_1), // %4 "m"(kShuffleMaskRAWToRGB24_2) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void RGB565ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "mov $0x1080108,%%eax \n" "movd %%eax,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "mov $0x20802080,%%eax \n" "movd %%eax,%%xmm6 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "pcmpeqb %%xmm3,%%xmm3 \n" "psllw $0xb,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "psllw $0xa,%%xmm4 \n" "psrlw $0x5,%%xmm4 \n" "pcmpeqb %%xmm7,%%xmm7 \n" "psllw $0x8,%%xmm7 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "pand %%xmm3,%%xmm1 \n" "psllw $0xb,%%xmm2 \n" "pmulhuw %%xmm5,%%xmm1 \n" "pmulhuw %%xmm5,%%xmm2 \n" "psllw $0x8,%%xmm1 \n" "por %%xmm2,%%xmm1 \n" "pand %%xmm4,%%xmm0 \n" "pmulhuw %%xmm6,%%xmm0 \n" "por %%xmm7,%%xmm0 \n" "movdqa %%xmm1,%%xmm2 \n" "punpcklbw %%xmm0,%%xmm1 \n" "punpckhbw %%xmm0,%%xmm2 \n" "movdqu %%xmm1,0x00(%1,%0,2) \n" "movdqu %%xmm2,0x10(%1,%0,2) \n" "lea 0x10(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } void ARGB1555ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "mov $0x1080108,%%eax \n" "movd %%eax,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "mov $0x42004200,%%eax \n" "movd %%eax,%%xmm6 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "pcmpeqb %%xmm3,%%xmm3 \n" "psllw $0xb,%%xmm3 \n" "movdqa %%xmm3,%%xmm4 \n" "psrlw $0x6,%%xmm4 \n" "pcmpeqb %%xmm7,%%xmm7 \n" "psllw $0x8,%%xmm7 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "psllw $0x1,%%xmm1 \n" "psllw $0xb,%%xmm2 \n" "pand %%xmm3,%%xmm1 \n" "pmulhuw %%xmm5,%%xmm2 \n" "pmulhuw %%xmm5,%%xmm1 \n" "psllw $0x8,%%xmm1 \n" "por %%xmm2,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "pand %%xmm4,%%xmm0 \n" "psraw $0x8,%%xmm2 \n" "pmulhuw %%xmm6,%%xmm0 \n" "pand %%xmm7,%%xmm2 \n" "por %%xmm2,%%xmm0 \n" "movdqa %%xmm1,%%xmm2 \n" "punpcklbw %%xmm0,%%xmm1 \n" "punpckhbw %%xmm0,%%xmm2 \n" "movdqu %%xmm1,0x00(%1,%0,2) \n" "movdqu %%xmm2,0x10(%1,%0,2) \n" "lea 0x10(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } void ARGB4444ToARGBRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "mov $0xf0f0f0f,%%eax \n" "movd %%eax,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" "pslld $0x4,%%xmm5 \n" "sub %0,%1 \n" "sub %0,%1 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm2 \n" "pand %%xmm4,%%xmm0 \n" "pand %%xmm5,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm3 \n" "psllw $0x4,%%xmm1 \n" "psrlw $0x4,%%xmm3 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm3,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm2,%%xmm0 \n" "punpckhbw %%xmm2,%%xmm1 \n" "movdqu %%xmm0,0x00(%1,%0,2) \n" "movdqu %%xmm1,0x10(%1,%0,2) \n" "lea 0x10(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void ARGBToRGB24Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "movdqa %3,%%xmm6 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "lea 0x40(%0),%0 \n" "pshufb %%xmm6,%%xmm0 \n" "pshufb %%xmm6,%%xmm1 \n" "pshufb %%xmm6,%%xmm2 \n" "pshufb %%xmm6,%%xmm3 \n" "movdqa %%xmm1,%%xmm4 \n" "psrldq $0x4,%%xmm1 \n" "pslldq $0xc,%%xmm4 \n" "movdqa %%xmm2,%%xmm5 \n" "por %%xmm4,%%xmm0 \n" "pslldq $0x8,%%xmm5 \n" "movdqu %%xmm0,(%1) \n" "por %%xmm5,%%xmm1 \n" "psrldq $0x8,%%xmm2 \n" "pslldq $0x4,%%xmm3 \n" "por %%xmm3,%%xmm2 \n" "movdqu %%xmm1,0x10(%1) \n" "movdqu %%xmm2,0x20(%1) \n" "lea 0x30(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleMaskARGBToRGB24) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } void ARGBToRAWRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "movdqa %3,%%xmm6 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "lea 0x40(%0),%0 \n" "pshufb %%xmm6,%%xmm0 \n" "pshufb %%xmm6,%%xmm1 \n" "pshufb %%xmm6,%%xmm2 \n" "pshufb %%xmm6,%%xmm3 \n" "movdqa %%xmm1,%%xmm4 \n" "psrldq $0x4,%%xmm1 \n" "pslldq $0xc,%%xmm4 \n" "movdqa %%xmm2,%%xmm5 \n" "por %%xmm4,%%xmm0 \n" "pslldq $0x8,%%xmm5 \n" "movdqu %%xmm0,(%1) \n" "por %%xmm5,%%xmm1 \n" "psrldq $0x8,%%xmm2 \n" "pslldq $0x4,%%xmm3 \n" "por %%xmm3,%%xmm2 \n" "movdqu %%xmm1,0x10(%1) \n" "movdqu %%xmm2,0x20(%1) \n" "lea 0x30(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleMaskARGBToRAW) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #ifdef HAS_ARGBTORGB24ROW_AVX2 // vpermd for 12+12 to 24 static const lvec32 kPermdRGB24_AVX = {0, 1, 2, 4, 5, 6, 3, 7}; void ARGBToRGB24Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm6 \n" "vmovdqa %4,%%ymm7 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "lea 0x80(%0),%0 \n" "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes "vpermd %%ymm1,%%ymm7,%%ymm1 \n" "vpermd %%ymm2,%%ymm7,%%ymm2 \n" "vpermd %%ymm3,%%ymm7,%%ymm3 \n" "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 "vpor %%ymm4,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 "vpermq $0x4f,%%ymm2,%%ymm4 \n" "vpor %%ymm4,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm1,0x20(%1) \n" "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 "vpermq $0x93,%%ymm3,%%ymm3 \n" "vpor %%ymm3,%%ymm2,%%ymm2 \n" "vmovdqu %%ymm2,0x40(%1) \n" "lea 0x60(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleMaskARGBToRGB24), // %3 "m"(kPermdRGB24_AVX) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif #ifdef HAS_ARGBTORGB24ROW_AVX512VBMI // Shuffle table for converting ARGBToRGB24 static const ulvec8 kPermARGBToRGB24_0 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u}; static const ulvec8 kPermARGBToRGB24_1 = { 10u, 12u, 13u, 14u, 16u, 17u, 18u, 20u, 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u}; static const ulvec8 kPermARGBToRGB24_2 = { 21u, 22u, 24u, 25u, 26u, 28u, 29u, 30u, 32u, 33u, 34u, 36u, 37u, 38u, 40u, 41u, 42u, 44u, 45u, 46u, 48u, 49u, 50u, 52u, 53u, 54u, 56u, 57u, 58u, 60u, 61u, 62u}; void ARGBToRGB24Row_AVX512VBMI(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vmovdqa %3,%%ymm5 \n" "vmovdqa %4,%%ymm6 \n" "vmovdqa %5,%%ymm7 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "lea 0x80(%0),%0 \n" "vpermt2b %%ymm1,%%ymm5,%%ymm0 \n" "vpermt2b %%ymm2,%%ymm6,%%ymm1 \n" "vpermt2b %%ymm3,%%ymm7,%%ymm2 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "vmovdqu %%ymm2,0x40(%1) \n" "lea 0x60(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kPermARGBToRGB24_0), // %3 "m"(kPermARGBToRGB24_1), // %4 "m"(kPermARGBToRGB24_2) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5", "xmm6", "xmm7"); } #endif #ifdef HAS_ARGBTORAWROW_AVX2 void ARGBToRAWRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm6 \n" "vmovdqa %4,%%ymm7 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "lea 0x80(%0),%0 \n" "vpshufb %%ymm6,%%ymm0,%%ymm0 \n" // xxx0yyy0 "vpshufb %%ymm6,%%ymm1,%%ymm1 \n" "vpshufb %%ymm6,%%ymm2,%%ymm2 \n" "vpshufb %%ymm6,%%ymm3,%%ymm3 \n" "vpermd %%ymm0,%%ymm7,%%ymm0 \n" // pack to 24 bytes "vpermd %%ymm1,%%ymm7,%%ymm1 \n" "vpermd %%ymm2,%%ymm7,%%ymm2 \n" "vpermd %%ymm3,%%ymm7,%%ymm3 \n" "vpermq $0x3f,%%ymm1,%%ymm4 \n" // combine 24 + 8 "vpor %%ymm4,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "vpermq $0xf9,%%ymm1,%%ymm1 \n" // combine 16 + 16 "vpermq $0x4f,%%ymm2,%%ymm4 \n" "vpor %%ymm4,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm1,0x20(%1) \n" "vpermq $0xfe,%%ymm2,%%ymm2 \n" // combine 8 + 24 "vpermq $0x93,%%ymm3,%%ymm3 \n" "vpor %%ymm3,%%ymm2,%%ymm2 \n" "vmovdqu %%ymm2,0x40(%1) \n" "lea 0x60(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleMaskARGBToRAW), // %3 "m"(kPermdRGB24_AVX) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif void ARGBToRGB565Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm3,%%xmm3 \n" "psrld $0x1b,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "psrld $0x1a,%%xmm4 \n" "pslld $0x5,%%xmm4 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0xb,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "pslld $0x8,%%xmm0 \n" "psrld $0x3,%%xmm1 \n" "psrld $0x5,%%xmm2 \n" "psrad $0x10,%%xmm0 \n" "pand %%xmm3,%%xmm1 \n" "pand %%xmm4,%%xmm2 \n" "pand %%xmm5,%%xmm0 \n" "por %%xmm2,%%xmm1 \n" "por %%xmm1,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" "lea 0x10(%0),%0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void ARGBToRGB565DitherRow_SSE2(const uint8_t* src, uint8_t* dst, const uint32_t dither4, int width) { asm volatile( "movd %3,%%xmm6 \n" "punpcklbw %%xmm6,%%xmm6 \n" "movdqa %%xmm6,%%xmm7 \n" "punpcklwd %%xmm6,%%xmm6 \n" "punpckhwd %%xmm7,%%xmm7 \n" "pcmpeqb %%xmm3,%%xmm3 \n" "psrld $0x1b,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "psrld $0x1a,%%xmm4 \n" "pslld $0x5,%%xmm4 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0xb,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "paddusb %%xmm6,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "pslld $0x8,%%xmm0 \n" "psrld $0x3,%%xmm1 \n" "psrld $0x5,%%xmm2 \n" "psrad $0x10,%%xmm0 \n" "pand %%xmm3,%%xmm1 \n" "pand %%xmm4,%%xmm2 \n" "pand %%xmm5,%%xmm0 \n" "por %%xmm2,%%xmm1 \n" "por %%xmm1,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" "lea 0x10(%0),%0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(dither4) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 void ARGBToRGB565DitherRow_AVX2(const uint8_t* src, uint8_t* dst, const uint32_t dither4, int width) { asm volatile( "vbroadcastss %3,%%xmm6 \n" "vpunpcklbw %%xmm6,%%xmm6,%%xmm6 \n" "vpermq $0xd8,%%ymm6,%%ymm6 \n" "vpunpcklwd %%ymm6,%%ymm6,%%ymm6 \n" "vpcmpeqb %%ymm3,%%ymm3,%%ymm3 \n" "vpsrld $0x1b,%%ymm3,%%ymm3 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpsrld $0x1a,%%ymm4,%%ymm4 \n" "vpslld $0x5,%%ymm4,%%ymm4 \n" "vpslld $0xb,%%ymm3,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vpaddusb %%ymm6,%%ymm0,%%ymm0 \n" "vpsrld $0x5,%%ymm0,%%ymm2 \n" "vpsrld $0x3,%%ymm0,%%ymm1 \n" "vpsrld $0x8,%%ymm0,%%ymm0 \n" "vpand %%ymm4,%%ymm2,%%ymm2 \n" "vpand %%ymm3,%%ymm1,%%ymm1 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpor %%ymm2,%%ymm1,%%ymm1 \n" "vpor %%ymm1,%%ymm0,%%ymm0 \n" "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "lea 0x20(%0),%0 \n" "vmovdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(dither4) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBTORGB565DITHERROW_AVX2 void ARGBToARGB1555Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psrld $0x1b,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" "pslld $0x5,%%xmm5 \n" "movdqa %%xmm4,%%xmm6 \n" "pslld $0xa,%%xmm6 \n" "pcmpeqb %%xmm7,%%xmm7 \n" "pslld $0xf,%%xmm7 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm0,%%xmm3 \n" "psrad $0x10,%%xmm0 \n" "psrld $0x3,%%xmm1 \n" "psrld $0x6,%%xmm2 \n" "psrld $0x9,%%xmm3 \n" "pand %%xmm7,%%xmm0 \n" "pand %%xmm4,%%xmm1 \n" "pand %%xmm5,%%xmm2 \n" "pand %%xmm6,%%xmm3 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm3,%%xmm2 \n" "por %%xmm2,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" "lea 0x10(%0),%0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } void ARGBToARGB4444Row_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psllw $0xc,%%xmm4 \n" "movdqa %%xmm4,%%xmm3 \n" "psrlw $0x8,%%xmm3 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pand %%xmm3,%%xmm0 \n" "pand %%xmm4,%%xmm1 \n" "psrlq $0x4,%%xmm0 \n" "psrlq $0x8,%%xmm1 \n" "por %%xmm1,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "lea 0x10(%0),%0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_RGB24TOARGBROW_SSSE3 /* ARGBToAR30Row: Red Blue With the 8 bit value in the upper bits of a short, vpmulhuw by (1024+4) will produce a 10 bit value in the low 10 bits of each 16 bit value. This is whats wanted for the blue channel. The red needs to be shifted 4 left, so multiply by (1024+4)*16 for red. Alpha Green Alpha and Green are already in the high bits so vpand can zero out the other bits, keeping just 2 upper bits of alpha and 8 bit green. The same multiplier could be used for Green - (1024+4) putting the 10 bit green in the lsb. Alpha would be a simple multiplier to shift it into position. It wants a gap of 10 above the green. Green is 10 bits, so there are 6 bits in the low short. 4 more are needed, so a multiplier of 4 gets the 2 bits into the upper 16 bits, and then a shift of 4 is a multiply of 16, so (4*16) = 64. Then shift the result left 10 to position the A and G channels. */ // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleRB30 = {128u, 0u, 128u, 2u, 128u, 4u, 128u, 6u, 128u, 8u, 128u, 10u, 128u, 12u, 128u, 14u}; static const uvec8 kShuffleBR30 = {128u, 2u, 128u, 0u, 128u, 6u, 128u, 4u, 128u, 10u, 128u, 8u, 128u, 14u, 128u, 12u}; static const uint32_t kMulRB10 = 1028 * 16 * 65536 + 1028; static const uint32_t kMaskRB10 = 0x3ff003ff; static const uint32_t kMaskAG10 = 0xc000ff00; static const uint32_t kMulAG10 = 64 * 65536 + 1028; void ARGBToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "movdqa %3,%%xmm2 \n" // shuffler for RB "movd %4,%%xmm3 \n" // multipler for RB "movd %5,%%xmm4 \n" // mask for R10 B10 "movd %6,%%xmm5 \n" // mask for AG "movd %7,%%xmm6 \n" // multipler for AG "pshufd $0x0,%%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "sub %0,%1 \n" "1: \n" "movdqu (%0),%%xmm0 \n" // fetch 4 ARGB pixels "movdqa %%xmm0,%%xmm1 \n" "pshufb %%xmm2,%%xmm1 \n" // R0B0 "pand %%xmm5,%%xmm0 \n" // A0G0 "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels "add $0x10,%0 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleRB30), // %3 "m"(kMulRB10), // %4 "m"(kMaskRB10), // %5 "m"(kMaskAG10), // %6 "m"(kMulAG10) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } void ABGRToAR30Row_SSSE3(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "movdqa %3,%%xmm2 \n" // shuffler for RB "movd %4,%%xmm3 \n" // multipler for RB "movd %5,%%xmm4 \n" // mask for R10 B10 "movd %6,%%xmm5 \n" // mask for AG "movd %7,%%xmm6 \n" // multipler for AG "pshufd $0x0,%%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "sub %0,%1 \n" "1: \n" "movdqu (%0),%%xmm0 \n" // fetch 4 ABGR pixels "movdqa %%xmm0,%%xmm1 \n" "pshufb %%xmm2,%%xmm1 \n" // R0B0 "pand %%xmm5,%%xmm0 \n" // A0G0 "pmulhuw %%xmm3,%%xmm1 \n" // X2 R16 X4 B10 "pmulhuw %%xmm6,%%xmm0 \n" // X10 A2 X10 G10 "pand %%xmm4,%%xmm1 \n" // X2 R10 X10 B10 "pslld $10,%%xmm0 \n" // A2 x10 G10 x10 "por %%xmm1,%%xmm0 \n" // A2 R10 G10 B10 "movdqu %%xmm0,(%1,%0) \n" // store 4 AR30 pixels "add $0x10,%0 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleBR30), // %3 reversed shuffler "m"(kMulRB10), // %4 "m"(kMaskRB10), // %5 "m"(kMaskAG10), // %6 "m"(kMulAG10) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #ifdef HAS_ARGBTOAR30ROW_AVX2 void ARGBToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB "vbroadcastss %4,%%ymm3 \n" // multipler for RB "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 "vbroadcastss %6,%%ymm5 \n" // mask for AG "vbroadcastss %7,%%ymm6 \n" // multipler for AG "sub %0,%1 \n" "1: \n" "vmovdqu (%0),%%ymm0 \n" // fetch 8 ARGB pixels "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels "add $0x20,%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleRB30), // %3 "m"(kMulRB10), // %4 "m"(kMaskRB10), // %5 "m"(kMaskAG10), // %6 "m"(kMulAG10) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif #ifdef HAS_ABGRTOAR30ROW_AVX2 void ABGRToAR30Row_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vbroadcastf128 %3,%%ymm2 \n" // shuffler for RB "vbroadcastss %4,%%ymm3 \n" // multipler for RB "vbroadcastss %5,%%ymm4 \n" // mask for R10 B10 "vbroadcastss %6,%%ymm5 \n" // mask for AG "vbroadcastss %7,%%ymm6 \n" // multipler for AG "sub %0,%1 \n" "1: \n" "vmovdqu (%0),%%ymm0 \n" // fetch 8 ABGR pixels "vpshufb %%ymm2,%%ymm0,%%ymm1 \n" // R0B0 "vpand %%ymm5,%%ymm0,%%ymm0 \n" // A0G0 "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" // X2 R16 X4 B10 "vpmulhuw %%ymm6,%%ymm0,%%ymm0 \n" // X10 A2 X10 G10 "vpand %%ymm4,%%ymm1,%%ymm1 \n" // X2 R10 X10 B10 "vpslld $10,%%ymm0,%%ymm0 \n" // A2 x10 G10 x10 "vpor %%ymm1,%%ymm0,%%ymm0 \n" // A2 R10 G10 B10 "vmovdqu %%ymm0,(%1,%0) \n" // store 8 AR30 pixels "add $0x20,%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(kShuffleBR30), // %3 reversed shuffler "m"(kMulRB10), // %4 "m"(kMaskRB10), // %5 "m"(kMaskAG10), // %6 "m"(kMulAG10) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif #ifdef HAS_ARGBTOYROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movdqa %3,%%xmm4 \n" "movdqa %4,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "lea 0x40(%0),%0 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm3,%%xmm2 \n" "psrlw $0x7,%%xmm0 \n" "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kARGBToY), // %3 "m"(kAddY16) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYROW_SSSE3 #ifdef HAS_ARGBTOYJROW_SSSE3 // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movdqa %3,%%xmm4 \n" "movdqa %4,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "lea 0x40(%0),%0 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm3,%%xmm2 \n" "paddw %%xmm5,%%xmm0 \n" "paddw %%xmm5,%%xmm2 \n" "psrlw $0x7,%%xmm0 \n" "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kARGBToYJ), // %3 "m"(kAddYJ64) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBTOYJROW_SSSE3 #ifdef HAS_ARGBTOYROW_AVX2 // vpermd for vphaddw + vpackuswb vpermd. static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" "vmovdqu %5,%%ymm6 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "lea 0x80(%0),%0 \n" "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" "vpsrlw $0x7,%%ymm0,%%ymm0 \n" "vpsrlw $0x7,%%ymm2,%%ymm2 \n" "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" // add 16 for Y "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kARGBToY), // %3 "m"(kAddY16), // %4 "m"(kPermdARGBToY_AVX) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vbroadcastf128 %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" "vmovdqu %5,%%ymm6 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "lea 0x80(%0),%0 \n" "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" // mutates. "vphaddw %%ymm3,%%ymm2,%%ymm2 \n" "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" // Add .5 for rounding. "vpaddw %%ymm5,%%ymm2,%%ymm2 \n" "vpsrlw $0x7,%%ymm0,%%ymm0 \n" "vpsrlw $0x7,%%ymm2,%%ymm2 \n" "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. "vpermd %%ymm0,%%ymm6,%%ymm0 \n" // unmutate. "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kARGBToYJ), // %3 "m"(kAddYJ64), // %4 "m"(kPermdARGBToY_AVX) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBTOYJROW_AVX2 #ifdef HAS_ARGBTOUVROW_SSSE3 void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %5,%%xmm3 \n" "movdqa %6,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqa %%xmm2,%%xmm7 \n" "shufps $0x88,%%xmm6,%%xmm2 \n" "shufps $0xdd,%%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm2,%%xmm0 \n" "phaddw %%xmm6,%%xmm1 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm1 \n" "packsswb %%xmm1,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movlps %%xmm0,(%1) \n" "movhps %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_argb0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_argb)), // %4 "m"(kARGBToV), // %5 "m"(kARGBToU), // %6 "m"(kAddUV128) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVROW_SSSE3 #ifdef HAS_ARGBTOUVROW_AVX2 // vpshufb for vphaddw + vpackuswb packed to shorts. static const lvec8 kShufARGBToUV_AVX = { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vbroadcastf128 %5,%%ymm5 \n" "vbroadcastf128 %6,%%ymm6 \n" "vbroadcastf128 %7,%%ymm7 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" "lea 0x80(%0),%0 \n" "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpsraw $0x8,%%ymm1,%%ymm1 \n" "vpsraw $0x8,%%ymm0,%%ymm0 \n" "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpshufb %8,%%ymm0,%%ymm0 \n" "vpaddb %%ymm5,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm0,(%1) \n" "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_argb)), // %4 "m"(kAddUV128), // %5 "m"(kARGBToV), // %6 "m"(kARGBToU), // %7 "m"(kShufARGBToUV_AVX) // %8 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vbroadcastf128 %5,%%ymm5 \n" "vbroadcastf128 %6,%%ymm6 \n" "vbroadcastf128 %7,%%ymm7 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x40(%0),%%ymm2 \n" "vmovdqu 0x60(%0),%%ymm3 \n" "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" "vpavgb 0x40(%0,%4,1),%%ymm2,%%ymm2 \n" "vpavgb 0x60(%0,%4,1),%%ymm3,%%ymm3 \n" "lea 0x80(%0),%0 \n" "vshufps $0x88,%%ymm1,%%ymm0,%%ymm4 \n" "vshufps $0xdd,%%ymm1,%%ymm0,%%ymm0 \n" "vpavgb %%ymm4,%%ymm0,%%ymm0 \n" "vshufps $0x88,%%ymm3,%%ymm2,%%ymm4 \n" "vshufps $0xdd,%%ymm3,%%ymm2,%%ymm2 \n" "vpavgb %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm7,%%ymm0,%%ymm1 \n" "vpmaddubsw %%ymm7,%%ymm2,%%ymm3 \n" "vpmaddubsw %%ymm6,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm6,%%ymm2,%%ymm2 \n" "vphaddw %%ymm3,%%ymm1,%%ymm1 \n" "vphaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" "vpaddw %%ymm5,%%ymm1,%%ymm1 \n" "vpsraw $0x8,%%ymm1,%%ymm1 \n" "vpsraw $0x8,%%ymm0,%%ymm0 \n" "vpacksswb %%ymm0,%%ymm1,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpshufb %8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm0,(%1) \n" "vextractf128 $0x1,%%ymm0,0x0(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_argb)), // %4 "m"(kAddUVJ128), // %5 "m"(kARGBToVJ), // %6 "m"(kARGBToUJ), // %7 "m"(kShufARGBToUV_AVX) // %8 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVJROW_AVX2 #ifdef HAS_ARGBTOUVJROW_SSSE3 void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %5,%%xmm3 \n" "movdqa %6,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqa %%xmm2,%%xmm7 \n" "shufps $0x88,%%xmm6,%%xmm2 \n" "shufps $0xdd,%%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm2,%%xmm0 \n" "phaddw %%xmm6,%%xmm1 \n" "paddw %%xmm5,%%xmm0 \n" "paddw %%xmm5,%%xmm1 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm1 \n" "packsswb %%xmm1,%%xmm0 \n" "movlps %%xmm0,(%1) \n" "movhps %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_argb0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_argb)), // %4 "m"(kARGBToVJ), // %5 "m"(kARGBToUJ), // %6 "m"(kAddUVJ128) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #endif // HAS_ARGBTOUVJROW_SSSE3 #ifdef HAS_ARGBTOUV444ROW_SSSE3 void ARGBToUV444Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %4,%%xmm3 \n" "movdqa %5,%%xmm4 \n" "movdqa %6,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm6 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm6,%%xmm2 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm2 \n" "packsswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "pmaddubsw %%xmm3,%%xmm0 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm6,%%xmm2 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm2 \n" "packsswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "lea 0x40(%0),%0 \n" "movdqu %%xmm0,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "m"(kARGBToV), // %4 "m"(kARGBToU), // %5 "m"(kAddUV128) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6"); } #endif // HAS_ARGBTOUV444ROW_SSSE3 void BGRAToYRow_SSSE3(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "lea 0x40(%0),%0 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm3,%%xmm2 \n" "psrlw $0x7,%%xmm0 \n" "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_bgra), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kBGRAToY), // %3 "m"(kAddY16) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void BGRAToUVRow_SSSE3(const uint8_t* src_bgra0, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %5,%%xmm3 \n" "movdqa %6,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqa %%xmm2,%%xmm7 \n" "shufps $0x88,%%xmm6,%%xmm2 \n" "shufps $0xdd,%%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm2,%%xmm0 \n" "phaddw %%xmm6,%%xmm1 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm1 \n" "packsswb %%xmm1,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movlps %%xmm0,(%1) \n" "movhps %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_bgra0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_bgra)), // %4 "m"(kBGRAToV), // %5 "m"(kBGRAToU), // %6 "m"(kAddUV128) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } void ABGRToYRow_SSSE3(const uint8_t* src_abgr, uint8_t* dst_y, int width) { asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "lea 0x40(%0),%0 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm3,%%xmm2 \n" "psrlw $0x7,%%xmm0 \n" "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_abgr), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kABGRToY), // %3 "m"(kAddY16) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void RGBAToYRow_SSSE3(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "movdqa %4,%%xmm5 \n" "movdqa %3,%%xmm4 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "lea 0x40(%0),%0 \n" "phaddw %%xmm1,%%xmm0 \n" "phaddw %%xmm3,%%xmm2 \n" "psrlw $0x7,%%xmm0 \n" "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_rgba), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "m"(kRGBAToY), // %3 "m"(kAddY16) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void ABGRToUVRow_SSSE3(const uint8_t* src_abgr0, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %5,%%xmm3 \n" "movdqa %6,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqa %%xmm2,%%xmm7 \n" "shufps $0x88,%%xmm6,%%xmm2 \n" "shufps $0xdd,%%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm2,%%xmm0 \n" "phaddw %%xmm6,%%xmm1 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm1 \n" "packsswb %%xmm1,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movlps %%xmm0,(%1) \n" "movhps %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_abgr0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_abgr)), // %4 "m"(kABGRToV), // %5 "m"(kABGRToU), // %6 "m"(kAddUV128) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } void RGBAToUVRow_SSSE3(const uint8_t* src_rgba0, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movdqa %5,%%xmm3 \n" "movdqa %6,%%xmm4 \n" "movdqa %7,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x10(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x20(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqu 0x30(%0),%%xmm6 \n" "movdqu 0x30(%0,%4,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "lea 0x40(%0),%0 \n" "movdqa %%xmm0,%%xmm7 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm7 \n" "pavgb %%xmm7,%%xmm0 \n" "movdqa %%xmm2,%%xmm7 \n" "shufps $0x88,%%xmm6,%%xmm2 \n" "shufps $0xdd,%%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "phaddw %%xmm2,%%xmm0 \n" "phaddw %%xmm6,%%xmm1 \n" "psraw $0x8,%%xmm0 \n" "psraw $0x8,%%xmm1 \n" "packsswb %%xmm1,%%xmm0 \n" "paddb %%xmm5,%%xmm0 \n" "movlps %%xmm0,(%1) \n" "movhps %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_rgba0), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+rm"(width) // %3 : "r"((intptr_t)(src_stride_rgba)), // %4 "m"(kRGBAToV), // %5 "m"(kRGBAToU), // %6 "m"(kAddUV128) // %7 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm6", "xmm7"); } #if defined(HAS_I422TOARGBROW_SSSE3) || defined(HAS_I422TOARGBROW_AVX2) // Read 8 UV from 444 #define READYUV444 \ "movq (%[u_buf]),%%xmm0 \n" \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ "punpcklbw %%xmm4,%%xmm4 \n" \ "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV #define READYUV422 \ "movd (%[u_buf]),%%xmm0 \n" \ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ "punpcklbw %%xmm4,%%xmm4 \n" \ "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422 10 bit, upsample to 8 UV // TODO(fbarchard): Consider shufb to replace pack/unpack // TODO(fbarchard): Consider pmulhuw to replace psraw // TODO(fbarchard): Consider pmullw to replace psllw and allow different bits. #define READYUV210 \ "movq (%[u_buf]),%%xmm0 \n" \ "movq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "punpcklwd %%xmm1,%%xmm0 \n" \ "psraw $0x2,%%xmm0 \n" \ "packuswb %%xmm0,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ "movdqu (%[y_buf]),%%xmm4 \n" \ "psllw $0x6,%%xmm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ "movd (%[u_buf]),%%xmm0 \n" \ "movd 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x4(%[u_buf]),%[u_buf] \n" \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ "punpcklbw %%xmm4,%%xmm4 \n" \ "lea 0x8(%[y_buf]),%[y_buf] \n" \ "movq (%[a_buf]),%%xmm5 \n" \ "lea 0x8(%[a_buf]),%[a_buf] \n" // Read 4 UV from NV12, upsample to 8 UV #define READNV12 \ "movq (%[uv_buf]),%%xmm0 \n" \ "lea 0x8(%[uv_buf]),%[uv_buf] \n" \ "punpcklwd %%xmm0,%%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ "punpcklbw %%xmm4,%%xmm4 \n" \ "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 VU from NV21, upsample to 8 UV #define READNV21 \ "movq (%[vu_buf]),%%xmm0 \n" \ "lea 0x8(%[vu_buf]),%[vu_buf] \n" \ "pshufb %[kShuffleNV21], %%xmm0 \n" \ "movq (%[y_buf]),%%xmm4 \n" \ "punpcklbw %%xmm4,%%xmm4 \n" \ "lea 0x8(%[y_buf]),%[y_buf] \n" // Read 4 YUY2 with 8 Y and update 4 UV to 8 UV. #define READYUY2 \ "movdqu (%[yuy2_buf]),%%xmm4 \n" \ "pshufb %[kShuffleYUY2Y], %%xmm4 \n" \ "movdqu (%[yuy2_buf]),%%xmm0 \n" \ "pshufb %[kShuffleYUY2UV], %%xmm0 \n" \ "lea 0x10(%[yuy2_buf]),%[yuy2_buf] \n" // Read 4 UYVY with 8 Y and update 4 UV to 8 UV. #define READUYVY \ "movdqu (%[uyvy_buf]),%%xmm4 \n" \ "pshufb %[kShuffleUYVYY], %%xmm4 \n" \ "movdqu (%[uyvy_buf]),%%xmm0 \n" \ "pshufb %[kShuffleUYVYUV], %%xmm0 \n" \ "lea 0x10(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) #define YUVTORGB_SETUP(yuvconstants) \ "movdqa (%[yuvconstants]),%%xmm8 \n" \ "movdqa 32(%[yuvconstants]),%%xmm9 \n" \ "movdqa 64(%[yuvconstants]),%%xmm10 \n" \ "movdqa 96(%[yuvconstants]),%%xmm11 \n" \ "movdqa 128(%[yuvconstants]),%%xmm12 \n" \ "movdqa 160(%[yuvconstants]),%%xmm13 \n" \ "movdqa 192(%[yuvconstants]),%%xmm14 \n" // Convert 8 pixels: 8 UV and 8 Y #define YUVTORGB16(yuvconstants) \ "movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm3 \n" \ "movdqa %%xmm11,%%xmm0 \n" \ "pmaddubsw %%xmm8,%%xmm1 \n" \ "psubw %%xmm1,%%xmm0 \n" \ "movdqa %%xmm12,%%xmm1 \n" \ "pmaddubsw %%xmm9,%%xmm2 \n" \ "psubw %%xmm2,%%xmm1 \n" \ "movdqa %%xmm13,%%xmm2 \n" \ "pmaddubsw %%xmm10,%%xmm3 \n" \ "psubw %%xmm3,%%xmm2 \n" \ "pmulhuw %%xmm14,%%xmm4 \n" \ "paddsw %%xmm4,%%xmm0 \n" \ "paddsw %%xmm4,%%xmm1 \n" \ "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS \ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", #else #define YUVTORGB_SETUP(yuvconstants) // Convert 8 pixels: 8 UV and 8 Y #define YUVTORGB16(yuvconstants) \ "movdqa %%xmm0,%%xmm1 \n" \ "movdqa %%xmm0,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm3 \n" \ "movdqa 96(%[yuvconstants]),%%xmm0 \n" \ "pmaddubsw (%[yuvconstants]),%%xmm1 \n" \ "psubw %%xmm1,%%xmm0 \n" \ "movdqa 128(%[yuvconstants]),%%xmm1 \n" \ "pmaddubsw 32(%[yuvconstants]),%%xmm2 \n" \ "psubw %%xmm2,%%xmm1 \n" \ "movdqa 160(%[yuvconstants]),%%xmm2 \n" \ "pmaddubsw 64(%[yuvconstants]),%%xmm3 \n" \ "psubw %%xmm3,%%xmm2 \n" \ "pmulhuw 192(%[yuvconstants]),%%xmm4 \n" \ "paddsw %%xmm4,%%xmm0 \n" \ "paddsw %%xmm4,%%xmm1 \n" \ "paddsw %%xmm4,%%xmm2 \n" #define YUVTORGB_REGS #endif #define YUVTORGB(yuvconstants) \ YUVTORGB16(yuvconstants) \ "psraw $0x6,%%xmm0 \n" \ "psraw $0x6,%%xmm1 \n" \ "psraw $0x6,%%xmm2 \n" \ "packuswb %%xmm0,%%xmm0 \n" \ "packuswb %%xmm1,%%xmm1 \n" \ "packuswb %%xmm2,%%xmm2 \n" // Store 8 ARGB values. #define STOREARGB \ "punpcklbw %%xmm1,%%xmm0 \n" \ "punpcklbw %%xmm5,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm1 \n" \ "punpcklwd %%xmm2,%%xmm0 \n" \ "punpckhwd %%xmm2,%%xmm1 \n" \ "movdqu %%xmm0,(%[dst_argb]) \n" \ "movdqu %%xmm1,0x10(%[dst_argb]) \n" \ "lea 0x20(%[dst_argb]), %[dst_argb] \n" // Store 8 RGBA values. #define STORERGBA \ "pcmpeqb %%xmm5,%%xmm5 \n" \ "punpcklbw %%xmm2,%%xmm1 \n" \ "punpcklbw %%xmm0,%%xmm5 \n" \ "movdqa %%xmm5,%%xmm0 \n" \ "punpcklwd %%xmm1,%%xmm5 \n" \ "punpckhwd %%xmm1,%%xmm0 \n" \ "movdqu %%xmm5,(%[dst_rgba]) \n" \ "movdqu %%xmm0,0x10(%[dst_rgba]) \n" \ "lea 0x20(%[dst_rgba]),%[dst_rgba] \n" // Store 8 AR30 values. #define STOREAR30 \ "psraw $0x4,%%xmm0 \n" \ "psraw $0x4,%%xmm1 \n" \ "psraw $0x4,%%xmm2 \n" \ "pminsw %%xmm7,%%xmm0 \n" \ "pminsw %%xmm7,%%xmm1 \n" \ "pminsw %%xmm7,%%xmm2 \n" \ "pmaxsw %%xmm6,%%xmm0 \n" \ "pmaxsw %%xmm6,%%xmm1 \n" \ "pmaxsw %%xmm6,%%xmm2 \n" \ "psllw $0x4,%%xmm2 \n" \ "movdqa %%xmm0,%%xmm3 \n" \ "punpcklwd %%xmm2,%%xmm0 \n" \ "punpckhwd %%xmm2,%%xmm3 \n" \ "movdqa %%xmm1,%%xmm2 \n" \ "punpcklwd %%xmm5,%%xmm1 \n" \ "punpckhwd %%xmm5,%%xmm2 \n" \ "pslld $0xa,%%xmm1 \n" \ "pslld $0xa,%%xmm2 \n" \ "por %%xmm1,%%xmm0 \n" \ "por %%xmm2,%%xmm3 \n" \ "movdqu %%xmm0,(%[dst_ar30]) \n" \ "movdqu %%xmm3,0x10(%[dst_ar30]) \n" \ "lea 0x20(%[dst_ar30]), %[dst_ar30] \n" void OMITFP I444ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV444 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } void OMITFP I422ToRGB24Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "movdqa %[kShuffleMaskARGBToRGB24_0],%%xmm5 \n" "movdqa %[kShuffleMaskARGBToRGB24],%%xmm6 \n" "sub %[u_buf],%[v_buf] \n" LABELALIGN "1: \n" READYUV422 YUVTORGB(yuvconstants) "punpcklbw %%xmm1,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm2,%%xmm0 \n" "punpckhwd %%xmm2,%%xmm1 \n" "pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm6,%%xmm1 \n" "palignr $0xc,%%xmm0,%%xmm1 \n" "movq %%xmm0,(%[dst_rgb24]) \n" "movdqu %%xmm1,0x8(%[dst_rgb24]) \n" "lea 0x18(%[dst_rgb24]),%[dst_rgb24] \n" "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_rgb24]"+r"(dst_rgb24), // %[dst_rgb24] #if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleMaskARGBToRGB24_0]"m"(kShuffleMaskARGBToRGB24_0), [kShuffleMaskARGBToRGB24]"m"(kShuffleMaskARGBToRGB24) : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6" ); } void OMITFP I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV422 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } void OMITFP I422ToAR30Row_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" // AR30 constants "psrlw $14,%%xmm5 \n" "psllw $4,%%xmm5 \n" // 2 alpha bits "pxor %%xmm6,%%xmm6 \n" "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min "psrlw $6,%%xmm7 \n" // 1023 for max LABELALIGN "1: \n" READYUV422 YUVTORGB16(yuvconstants) STOREAR30 "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" ); } // 10 bit YUV to ARGB void OMITFP I210ToARGBRow_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV210 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } // 10 bit YUV to AR30 void OMITFP I210ToAR30Row_SSSE3(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $14,%%xmm5 \n" "psllw $4,%%xmm5 \n" // 2 alpha bits "pxor %%xmm6,%%xmm6 \n" "pcmpeqb %%xmm7,%%xmm7 \n" // 0 for min "psrlw $6,%%xmm7 \n" // 1023 for max LABELALIGN "1: \n" READYUV210 YUVTORGB16(yuvconstants) STOREAR30 "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" ); } #ifdef HAS_I422ALPHATOARGBROW_SSSE3 void OMITFP I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" LABELALIGN "1: \n" READYUVA422 YUVTORGB(yuvconstants) STOREARGB "subl $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [a_buf]"+r"(a_buf), // %[a_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] #if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_I422ALPHATOARGBROW_SSSE3 void OMITFP NV12ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READNV12 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } void OMITFP NV21ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READNV21 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [vu_buf]"+r"(vu_buf), // %[vu_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } void OMITFP YUY2ToARGBRow_SSSE3(const uint8_t* yuy2_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUY2 YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } void OMITFP UYVYToARGBRow_SSSE3(const uint8_t* uyvy_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP(yuvconstants) "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READUYVY YUVTORGB(yuvconstants) STOREARGB "sub $0x8,%[width] \n" "jg 1b \n" : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } void OMITFP I422ToRGBARow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP(yuvconstants) "sub %[u_buf],%[v_buf] \n" "pcmpeqb %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" READYUV422 YUVTORGB(yuvconstants) STORERGBA "sub $0x8,%[width] \n" "jg 1b \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_rgba]"+r"(dst_rgba), // %[dst_rgba] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I422TOARGBROW_SSSE3 // Read 16 UV from 444 #define READYUV444_AVX2 \ "vmovdqu (%[u_buf]),%%xmm0 \n" \ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vmovdqu (%[y_buf]),%%xmm4 \n" \ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. #define READYUV422_AVX2 \ "vmovq (%[u_buf]),%%xmm0 \n" \ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ "vmovdqu (%[y_buf]),%%xmm4 \n" \ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 UV from 210 10 bit, upsample to 16 UV // TODO(fbarchard): Consider vshufb to replace pack/unpack // TODO(fbarchard): Consider vunpcklpd to combine the 2 registers into 1. #define READYUV210_AVX2 \ "vmovdqu (%[u_buf]),%%xmm0 \n" \ "vmovdqu 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x10(%[u_buf]),%[u_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ "vpunpcklwd %%ymm1,%%ymm0,%%ymm0 \n" \ "vpsraw $0x2,%%ymm0,%%ymm0 \n" \ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ "vmovdqu (%[y_buf]),%%ymm4 \n" \ "vpsllw $0x6,%%ymm4,%%ymm4 \n" \ "lea 0x20(%[y_buf]),%[y_buf] \n" // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. #define READYUVA422_AVX2 \ "vmovq (%[u_buf]),%%xmm0 \n" \ "vmovq 0x00(%[u_buf],%[v_buf],1),%%xmm1 \n" \ "lea 0x8(%[u_buf]),%[u_buf] \n" \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ "vmovdqu (%[y_buf]),%%xmm4 \n" \ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" \ "vmovdqu (%[a_buf]),%%xmm5 \n" \ "vpermq $0xd8,%%ymm5,%%ymm5 \n" \ "lea 0x10(%[a_buf]),%[a_buf] \n" // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 \ "vmovdqu (%[uv_buf]),%%xmm0 \n" \ "lea 0x10(%[uv_buf]),%[uv_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpunpcklwd %%ymm0,%%ymm0,%%ymm0 \n" \ "vmovdqu (%[y_buf]),%%xmm4 \n" \ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 VU from NV21, upsample to 16 UV. #define READNV21_AVX2 \ "vmovdqu (%[vu_buf]),%%xmm0 \n" \ "lea 0x10(%[vu_buf]),%[vu_buf] \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpshufb %[kShuffleNV21], %%ymm0, %%ymm0 \n" \ "vmovdqu (%[y_buf]),%%xmm4 \n" \ "vpermq $0xd8,%%ymm4,%%ymm4 \n" \ "vpunpcklbw %%ymm4,%%ymm4,%%ymm4 \n" \ "lea 0x10(%[y_buf]),%[y_buf] \n" // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. #define READYUY2_AVX2 \ "vmovdqu (%[yuy2_buf]),%%ymm4 \n" \ "vpshufb %[kShuffleYUY2Y], %%ymm4, %%ymm4 \n" \ "vmovdqu (%[yuy2_buf]),%%ymm0 \n" \ "vpshufb %[kShuffleYUY2UV], %%ymm0, %%ymm0 \n" \ "lea 0x20(%[yuy2_buf]),%[yuy2_buf] \n" // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. #define READUYVY_AVX2 \ "vmovdqu (%[uyvy_buf]),%%ymm4 \n" \ "vpshufb %[kShuffleUYVYY], %%ymm4, %%ymm4 \n" \ "vmovdqu (%[uyvy_buf]),%%ymm0 \n" \ "vpshufb %[kShuffleUYVYUV], %%ymm0, %%ymm0 \n" \ "lea 0x20(%[uyvy_buf]),%[uyvy_buf] \n" #if defined(__x86_64__) #define YUVTORGB_SETUP_AVX2(yuvconstants) \ "vmovdqa (%[yuvconstants]),%%ymm8 \n" \ "vmovdqa 32(%[yuvconstants]),%%ymm9 \n" \ "vmovdqa 64(%[yuvconstants]),%%ymm10 \n" \ "vmovdqa 96(%[yuvconstants]),%%ymm11 \n" \ "vmovdqa 128(%[yuvconstants]),%%ymm12 \n" \ "vmovdqa 160(%[yuvconstants]),%%ymm13 \n" \ "vmovdqa 192(%[yuvconstants]),%%ymm14 \n" #define YUVTORGB16_AVX2(yuvconstants) \ "vpmaddubsw %%ymm10,%%ymm0,%%ymm2 \n" \ "vpmaddubsw %%ymm9,%%ymm0,%%ymm1 \n" \ "vpmaddubsw %%ymm8,%%ymm0,%%ymm0 \n" \ "vpsubw %%ymm2,%%ymm13,%%ymm2 \n" \ "vpsubw %%ymm1,%%ymm12,%%ymm1 \n" \ "vpsubw %%ymm0,%%ymm11,%%ymm0 \n" \ "vpmulhuw %%ymm14,%%ymm4,%%ymm4 \n" \ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" #define YUVTORGB_REGS_AVX2 \ "xmm8", "xmm9", "xmm10", "xmm11", "xmm12", "xmm13", "xmm14", #else // Convert 16 pixels: 16 UV and 16 Y. #define YUVTORGB_SETUP_AVX2(yuvconstants) #define YUVTORGB16_AVX2(yuvconstants) \ "vpmaddubsw 64(%[yuvconstants]),%%ymm0,%%ymm2 \n" \ "vpmaddubsw 32(%[yuvconstants]),%%ymm0,%%ymm1 \n" \ "vpmaddubsw (%[yuvconstants]),%%ymm0,%%ymm0 \n" \ "vmovdqu 160(%[yuvconstants]),%%ymm3 \n" \ "vpsubw %%ymm2,%%ymm3,%%ymm2 \n" \ "vmovdqu 128(%[yuvconstants]),%%ymm3 \n" \ "vpsubw %%ymm1,%%ymm3,%%ymm1 \n" \ "vmovdqu 96(%[yuvconstants]),%%ymm3 \n" \ "vpsubw %%ymm0,%%ymm3,%%ymm0 \n" \ "vpmulhuw 192(%[yuvconstants]),%%ymm4,%%ymm4 \n" \ "vpaddsw %%ymm4,%%ymm0,%%ymm0 \n" \ "vpaddsw %%ymm4,%%ymm1,%%ymm1 \n" \ "vpaddsw %%ymm4,%%ymm2,%%ymm2 \n" #define YUVTORGB_REGS_AVX2 #endif #define YUVTORGB_AVX2(yuvconstants) \ YUVTORGB16_AVX2(yuvconstants) \ "vpsraw $0x6,%%ymm0,%%ymm0 \n" \ "vpsraw $0x6,%%ymm1,%%ymm1 \n" \ "vpsraw $0x6,%%ymm2,%%ymm2 \n" \ "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" \ "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" \ "vpackuswb %%ymm2,%%ymm2,%%ymm2 \n" // Store 16 ARGB values. #define STOREARGB_AVX2 \ "vpunpcklbw %%ymm1,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpunpcklbw %%ymm5,%%ymm2,%%ymm2 \n" \ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ "vpunpcklwd %%ymm2,%%ymm0,%%ymm1 \n" \ "vpunpckhwd %%ymm2,%%ymm0,%%ymm0 \n" \ "vmovdqu %%ymm1,(%[dst_argb]) \n" \ "vmovdqu %%ymm0,0x20(%[dst_argb]) \n" \ "lea 0x40(%[dst_argb]), %[dst_argb] \n" // Store 16 AR30 values. #define STOREAR30_AVX2 \ "vpsraw $0x4,%%ymm0,%%ymm0 \n" \ "vpsraw $0x4,%%ymm1,%%ymm1 \n" \ "vpsraw $0x4,%%ymm2,%%ymm2 \n" \ "vpminsw %%ymm7,%%ymm0,%%ymm0 \n" \ "vpminsw %%ymm7,%%ymm1,%%ymm1 \n" \ "vpminsw %%ymm7,%%ymm2,%%ymm2 \n" \ "vpmaxsw %%ymm6,%%ymm0,%%ymm0 \n" \ "vpmaxsw %%ymm6,%%ymm1,%%ymm1 \n" \ "vpmaxsw %%ymm6,%%ymm2,%%ymm2 \n" \ "vpsllw $0x4,%%ymm2,%%ymm2 \n" \ "vpermq $0xd8,%%ymm0,%%ymm0 \n" \ "vpermq $0xd8,%%ymm1,%%ymm1 \n" \ "vpermq $0xd8,%%ymm2,%%ymm2 \n" \ "vpunpckhwd %%ymm2,%%ymm0,%%ymm3 \n" \ "vpunpcklwd %%ymm2,%%ymm0,%%ymm0 \n" \ "vpunpckhwd %%ymm5,%%ymm1,%%ymm2 \n" \ "vpunpcklwd %%ymm5,%%ymm1,%%ymm1 \n" \ "vpslld $0xa,%%ymm1,%%ymm1 \n" \ "vpslld $0xa,%%ymm2,%%ymm2 \n" \ "vpor %%ymm1,%%ymm0,%%ymm0 \n" \ "vpor %%ymm2,%%ymm3,%%ymm3 \n" \ "vmovdqu %%ymm0,(%[dst_ar30]) \n" \ "vmovdqu %%ymm3,0x20(%[dst_ar30]) \n" \ "lea 0x40(%[dst_ar30]), %[dst_ar30] \n" #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). void OMITFP I444ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV444_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I444TOARGBROW_AVX2 #if defined(HAS_I422TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). void OMITFP I422ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I422TOARGBROW_AVX2 #if defined(HAS_I422TOAR30ROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). void OMITFP I422ToAR30Row_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpsrlw $14,%%ymm5,%%ymm5 \n" "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max "vpsrlw $6,%%ymm7,%%ymm7 \n" LABELALIGN "1: \n" READYUV422_AVX2 YUVTORGB16_AVX2(yuvconstants) STOREAR30_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" ); } #endif // HAS_I422TOAR30ROW_AVX2 #if defined(HAS_I210TOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). void OMITFP I210ToARGBRow_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV210_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I210TOARGBROW_AVX2 #if defined(HAS_I210TOAR30ROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 AR30 (64 bytes). void OMITFP I210ToAR30Row_AVX2(const uint16_t* y_buf, const uint16_t* u_buf, const uint16_t* v_buf, uint8_t* dst_ar30, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" // AR30 constants "vpsrlw $14,%%ymm5,%%ymm5 \n" "vpsllw $4,%%ymm5,%%ymm5 \n" // 2 alpha bits "vpxor %%ymm6,%%ymm6,%%ymm6 \n" // 0 for min "vpcmpeqb %%ymm7,%%ymm7,%%ymm7 \n" // 1023 for max "vpsrlw $6,%%ymm7,%%ymm7 \n" LABELALIGN "1: \n" READYUV210_AVX2 YUVTORGB16_AVX2(yuvconstants) STOREAR30_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_ar30]"+r"(dst_ar30), // %[dst_ar30] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I210TOAR30ROW_AVX2 #if defined(HAS_I422ALPHATOARGBROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. void OMITFP I422AlphaToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" LABELALIGN "1: \n" READYUVA422_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "subl $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [a_buf]"+r"(a_buf), // %[a_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] #if defined(__i386__) [width]"+m"(width) // %[width] #else [width]"+rm"(width) // %[width] #endif : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_I422ALPHATOARGBROW_AVX2 #if defined(HAS_I422TORGBAROW_AVX2) // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). void OMITFP I422ToRGBARow_AVX2(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "sub %[u_buf],%[v_buf] \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUV422_AVX2 YUVTORGB_AVX2(yuvconstants) // Step 3: Weave into RGBA "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpunpcklbw %%ymm0,%%ymm5,%%ymm2 \n" "vpermq $0xd8,%%ymm2,%%ymm2 \n" "vpunpcklwd %%ymm1,%%ymm2,%%ymm0 \n" "vpunpckhwd %%ymm1,%%ymm2,%%ymm1 \n" "vmovdqu %%ymm0,(%[dst_argb]) \n" "vmovdqu %%ymm1,0x20(%[dst_argb]) \n" "lea 0x40(%[dst_argb]),%[dst_argb] \n" "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [u_buf]"+r"(u_buf), // %[u_buf] [v_buf]"+r"(v_buf), // %[v_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); } #endif // HAS_I422TORGBAROW_AVX2 #if defined(HAS_NV12TOARGBROW_AVX2) // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). void OMITFP NV12ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READNV12_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [uv_buf]"+r"(uv_buf), // %[uv_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants) // %[yuvconstants] : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_NV12TOARGBROW_AVX2 #if defined(HAS_NV21TOARGBROW_AVX2) // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). void OMITFP NV21ToARGBRow_AVX2(const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READNV21_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [y_buf]"+r"(y_buf), // %[y_buf] [vu_buf]"+r"(vu_buf), // %[vu_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleNV21]"m"(kShuffleNV21) : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_NV21TOARGBROW_AVX2 #if defined(HAS_YUY2TOARGBROW_AVX2) // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). void OMITFP YUY2ToARGBRow_AVX2(const uint8_t* yuy2_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READYUY2_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [yuy2_buf]"+r"(yuy2_buf), // %[yuy2_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleYUY2Y]"m"(kShuffleYUY2Y), [kShuffleYUY2UV]"m"(kShuffleYUY2UV) : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_YUY2TOARGBROW_AVX2 #if defined(HAS_UYVYTOARGBROW_AVX2) // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). void OMITFP UYVYToARGBRow_AVX2(const uint8_t* uyvy_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { // clang-format off asm volatile ( YUVTORGB_SETUP_AVX2(yuvconstants) "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" READUYVY_AVX2 YUVTORGB_AVX2(yuvconstants) STOREARGB_AVX2 "sub $0x10,%[width] \n" "jg 1b \n" "vzeroupper \n" : [uyvy_buf]"+r"(uyvy_buf), // %[uyvy_buf] [dst_argb]"+r"(dst_argb), // %[dst_argb] [width]"+rm"(width) // %[width] : [yuvconstants]"r"(yuvconstants), // %[yuvconstants] [kShuffleUYVYY]"m"(kShuffleUYVYY), [kShuffleUYVYUV]"m"(kShuffleUYVYUV) : "memory", "cc", YUVTORGB_REGS_AVX2 "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5" ); // clang-format on } #endif // HAS_UYVYTOARGBROW_AVX2 #ifdef HAS_I400TOARGBROW_SSE2 void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { asm volatile( "mov $0x4a354a35,%%eax \n" // 4a35 = 18997 = 1.164 "movd %%eax,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" "mov $0x04880488,%%eax \n" // 0488 = 1160 = 1.164 * // 16 "movd %%eax,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "pslld $0x18,%%xmm4 \n" LABELALIGN "1: \n" // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 "movq (%0),%%xmm0 \n" "lea 0x8(%0),%0 \n" "punpcklbw %%xmm0,%%xmm0 \n" "pmulhuw %%xmm2,%%xmm0 \n" "psubusw %%xmm3,%%xmm0 \n" "psrlw $6, %%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" // Step 2: Weave into ARGB "punpcklbw %%xmm0,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm0,%%xmm0 \n" "punpckhwd %%xmm1,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "por %%xmm4,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(y_buf), // %0 "+r"(dst_argb), // %1 "+rm"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_SSE2 #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* dst_argb, int width) { asm volatile( "mov $0x4a354a35,%%eax \n" // 0488 = 1160 = 1.164 * // 16 "vmovd %%eax,%%xmm2 \n" "vbroadcastss %%xmm2,%%ymm2 \n" "mov $0x4880488,%%eax \n" // 4a35 = 18997 = 1.164 "vmovd %%eax,%%xmm3 \n" "vbroadcastss %%xmm3,%%ymm3 \n" "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpslld $0x18,%%ymm4,%%ymm4 \n" LABELALIGN "1: \n" // Step 1: Scale Y contribution to 16 G values. G = (y - 16) * 1.164 "vmovdqu (%0),%%xmm0 \n" "lea 0x10(%0),%0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpsubusw %%ymm3,%%ymm0,%%ymm0 \n" "vpsrlw $0x6,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpunpcklbw %%ymm0,%%ymm0,%%ymm1 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpunpcklwd %%ymm1,%%ymm1,%%ymm0 \n" "vpunpckhwd %%ymm1,%%ymm1,%%ymm1 \n" "vpor %%ymm4,%%ymm0,%%ymm0 \n" "vpor %%ymm4,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(y_buf), // %0 "+r"(dst_argb), // %1 "+rm"(width) // %2 : : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } #endif // HAS_I400TOARGBROW_AVX2 #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile( "movdqa %3,%%xmm5 \n" LABELALIGN "1: \n" "movdqu -0x10(%0,%2,1),%%xmm0 \n" "pshufb %%xmm5,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 : "m"(kShuffleMirror) // %3 : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile( "vbroadcastf128 %3,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu -0x20(%0,%2,1),%%ymm0 \n" "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" "vpermq $0x4e,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 : "m"(kShuffleMirror) // %3 : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_MIRRORROW_AVX2 #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the bytes of UV channels. static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; void MirrorUVRow_SSSE3(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile( "movdqa %4,%%xmm1 \n" "lea -0x10(%0,%3,2),%0 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "lea -0x10(%0),%0 \n" "pshufb %%xmm1,%%xmm0 \n" "movlpd %%xmm0,(%1) \n" "movhpd %%xmm0,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $8,%3 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(temp_width) // %3 : "m"(kShuffleMirrorUV) // %4 : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile( "lea -0x10(%0,%2,4),%0 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "pshufd $0x1b,%%xmm0,%%xmm0 \n" "lea -0x10(%0),%0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 : : "memory", "cc", "xmm0"); } #endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { intptr_t temp_width = (intptr_t)(width); asm volatile( "vmovdqu %3,%%ymm5 \n" LABELALIGN "1: \n" "vpermd -0x20(%0,%2,4),%%ymm5,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(temp_width) // %2 : "m"(kARGBShuffleMirror_AVX2) // %3 : "memory", "cc", "xmm0", "xmm5"); } #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_AVX2 void SplitUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpsrlw $0x8,%%ymm0,%%ymm2 \n" "vpsrlw $0x8,%%ymm1,%%ymm3 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm3,%%ymm2,%%ymm2 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm2,%%ymm2 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm2,0x00(%1,%2,1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 void SplitUVRow_SSE2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "movdqa %%xmm1,%%xmm3 \n" "pand %%xmm5,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "psrlw $0x8,%%xmm2 \n" "psrlw $0x8,%%xmm3 \n" "packuswb %%xmm3,%%xmm2 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm2,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { asm volatile( "sub %0,%1 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x00(%0,%1,1),%%ymm1 \n" "lea 0x20(%0),%0 \n" "vpunpcklbw %%ymm1,%%ymm0,%%ymm2 \n" "vpunpckhbw %%ymm1,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm2,(%2) \n" "vextractf128 $0x0,%%ymm0,0x10(%2) \n" "vextractf128 $0x1,%%ymm2,0x20(%2) \n" "vextractf128 $0x1,%%ymm0,0x30(%2) \n" "lea 0x40(%2),%2 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 void MergeUVRow_SSE2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { asm volatile( "sub %0,%1 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "punpcklbw %%xmm1,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm2 \n" "movdqu %%xmm0,(%2) \n" "movdqu %%xmm2,0x10(%2) \n" "lea 0x20(%2),%2 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGEUVROW_SSE2 // Use scale to convert lsb formats to msb, depending how many bits there are: // 128 = 9 bits // 64 = 10 bits // 16 = 12 bits // 1 = 16 bits #ifdef HAS_MERGEUVROW_16_AVX2 void MergeUVRow_16_AVX2(const uint16_t* src_u, const uint16_t* src_v, uint16_t* dst_uv, int scale, int width) { // clang-format off asm volatile ( "vmovd %4,%%xmm3 \n" "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" "vbroadcastss %%xmm3,%%ymm3 \n" "sub %0,%1 \n" // 16 pixels per loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu (%0,%1,1),%%ymm1 \n" "add $0x20,%0 \n" "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" "vpunpcklwd %%ymm1,%%ymm0,%%ymm2 \n" // mutates "vpunpckhwd %%ymm1,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm2,(%2) \n" "vextractf128 $0x0,%%ymm0,0x10(%2) \n" "vextractf128 $0x1,%%ymm2,0x20(%2) \n" "vextractf128 $0x1,%%ymm0,0x30(%2) \n" "add $0x40,%2 \n" "sub $0x10,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 "+r"(width) // %3 : "r"(scale) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); // clang-format on } #endif // HAS_MERGEUVROW_AVX2 // Use scale to convert lsb formats to msb, depending how many bits there are: // 128 = 9 bits // 64 = 10 bits // 16 = 12 bits // 1 = 16 bits #ifdef HAS_MULTIPLYROW_16_AVX2 void MultiplyRow_16_AVX2(const uint16_t* src_y, uint16_t* dst_y, int scale, int width) { // clang-format off asm volatile ( "vmovd %3,%%xmm3 \n" "vpunpcklwd %%xmm3,%%xmm3,%%xmm3 \n" "vbroadcastss %%xmm3,%%ymm3 \n" "sub %0,%1 \n" // 16 pixels per loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vpmullw %%ymm3,%%ymm0,%%ymm0 \n" "vpmullw %%ymm3,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm0,(%0,%1) \n" "vmovdqu %%ymm1,0x20(%0,%1) \n" "add $0x40,%0 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm3"); // clang-format on } #endif // HAS_MULTIPLYROW_16_AVX2 // Use scale to convert lsb formats to msb, depending how many bits there are: // 32768 = 9 bits // 16384 = 10 bits // 4096 = 12 bits // 256 = 16 bits void Convert16To8Row_SSSE3(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) { // clang-format off asm volatile ( "movd %3,%%xmm2 \n" "punpcklwd %%xmm2,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "add $0x20,%0 \n" "pmulhuw %%xmm2,%%xmm0 \n" "pmulhuw %%xmm2,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "add $0x10,%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } #ifdef HAS_CONVERT16TO8ROW_AVX2 void Convert16To8Row_AVX2(const uint16_t* src_y, uint8_t* dst_y, int scale, int width) { // clang-format off asm volatile ( "vmovd %3,%%xmm2 \n" "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" "vbroadcastss %%xmm2,%%ymm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "add $0x40,%0 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" // mutates "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "add $0x20,%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } #endif // HAS_CONVERT16TO8ROW_AVX2 // Use scale to convert to lsb formats depending how many bits there are: // 512 = 9 bits // 1024 = 10 bits // 4096 = 12 bits // TODO(fbarchard): reduce to SSE2 void Convert8To16Row_SSE2(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) { // clang-format off asm volatile ( "movd %3,%%xmm2 \n" "punpcklwd %%xmm2,%%xmm2 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm0,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm1 \n" "add $0x10,%0 \n" "pmulhuw %%xmm2,%%xmm0 \n" "pmulhuw %%xmm2,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "add $0x20,%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } #ifdef HAS_CONVERT8TO16ROW_AVX2 void Convert8To16Row_AVX2(const uint8_t* src_y, uint16_t* dst_y, int scale, int width) { // clang-format off asm volatile ( "vmovd %3,%%xmm2 \n" "vpunpcklwd %%xmm2,%%xmm2,%%xmm2 \n" "vbroadcastss %%xmm2,%%ymm2 \n" // 32 pixels per loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "add $0x20,%0 \n" "vpunpckhbw %%ymm0,%%ymm0,%%ymm1 \n" "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm2,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "add $0x40,%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_y), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : "r"(scale) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); // clang-format on } #endif // HAS_CONVERT8TO16ROW_AVX2 #ifdef HAS_SPLITRGBROW_SSSE3 // Shuffle table for converting RGB to Planar. static const uvec8 kShuffleMaskRGBToR0 = {0u, 3u, 6u, 9u, 12u, 15u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u, 2u, 5u, 8u, 11u, 14u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 1u, 4u, 7u, 10u, 13u}; static const uvec8 kShuffleMaskRGBToG0 = {1u, 4u, 7u, 10u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u, 3u, 6u, 9u, 12u, 15u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 2u, 5u, 8u, 11u, 14u}; static const uvec8 kShuffleMaskRGBToB0 = {2u, 5u, 8u, 11u, 14u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u, 4u, 7u, 10u, 13u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 0u, 3u, 6u, 9u, 12u, 15u}; void SplitRGBRow_SSSE3(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) { asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "pshufb %5, %%xmm0 \n" "pshufb %6, %%xmm1 \n" "pshufb %7, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "pshufb %8, %%xmm0 \n" "pshufb %9, %%xmm1 \n" "pshufb %10, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "pshufb %11, %%xmm0 \n" "pshufb %12, %%xmm1 \n" "pshufb %13, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%3) \n" "lea 0x10(%3),%3 \n" "lea 0x30(%0),%0 \n" "sub $0x10,%4 \n" "jg 1b \n" : "+r"(src_rgb), // %0 "+r"(dst_r), // %1 "+r"(dst_g), // %2 "+r"(dst_b), // %3 "+r"(width) // %4 : "m"(kShuffleMaskRGBToR0), // %5 "m"(kShuffleMaskRGBToR1), // %6 "m"(kShuffleMaskRGBToR2), // %7 "m"(kShuffleMaskRGBToG0), // %8 "m"(kShuffleMaskRGBToG1), // %9 "m"(kShuffleMaskRGBToG2), // %10 "m"(kShuffleMaskRGBToB0), // %11 "m"(kShuffleMaskRGBToB1), // %12 "m"(kShuffleMaskRGBToB2) // %13 : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_SPLITRGBROW_SSSE3 #ifdef HAS_MERGERGBROW_SSSE3 // Shuffle table for converting RGB to Planar. static const uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u, 4u, 128u, 128u, 5u}; static const uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u, 4u, 128u, 128u}; static const uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u, 4u, 128u}; static const uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u, 128u, 128u, 10u}; static const uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u, 128u, 128u}; static const uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u, 128u, 128u, 10u, 128u}; static const uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u, 128u, 15u}; static const uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u, 128u, 15u, 128u, 128u}; static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u, 128u, 15u, 128u}; void MergeRGBRow_SSSE3(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) { asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu (%1),%%xmm1 \n" "movdqu (%2),%%xmm2 \n" "pshufb %5, %%xmm0 \n" "pshufb %6, %%xmm1 \n" "pshufb %7, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%3) \n" "movdqu (%0),%%xmm0 \n" "movdqu (%1),%%xmm1 \n" "movdqu (%2),%%xmm2 \n" "pshufb %8, %%xmm0 \n" "pshufb %9, %%xmm1 \n" "pshufb %10, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,16(%3) \n" "movdqu (%0),%%xmm0 \n" "movdqu (%1),%%xmm1 \n" "movdqu (%2),%%xmm2 \n" "pshufb %11, %%xmm0 \n" "pshufb %12, %%xmm1 \n" "pshufb %13, %%xmm2 \n" "por %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,32(%3) \n" "lea 0x10(%0),%0 \n" "lea 0x10(%1),%1 \n" "lea 0x10(%2),%2 \n" "lea 0x30(%3),%3 \n" "sub $0x10,%4 \n" "jg 1b \n" : "+r"(src_r), // %0 "+r"(src_g), // %1 "+r"(src_b), // %2 "+r"(dst_rgb), // %3 "+r"(width) // %4 : "m"(kShuffleMaskRToRGB0), // %5 "m"(kShuffleMaskGToRGB0), // %6 "m"(kShuffleMaskBToRGB0), // %7 "m"(kShuffleMaskRToRGB1), // %8 "m"(kShuffleMaskGToRGB1), // %9 "m"(kShuffleMaskBToRGB1), // %10 "m"(kShuffleMaskRToRGB2), // %11 "m"(kShuffleMaskGToRGB2), // %12 "m"(kShuffleMaskBToRGB2) // %13 : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_MERGERGBROW_SSSE3 #ifdef HAS_COPYROW_SSE2 void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "test $0xf,%0 \n" "jne 2f \n" "test $0xf,%1 \n" "jne 2f \n" LABELALIGN "1: \n" "movdqa (%0),%%xmm0 \n" "movdqa 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "movdqa %%xmm0,(%1) \n" "movdqa %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "jmp 9f \n" LABELALIGN "2: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 2b \n" LABELALIGN "9: \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) { asm volatile( LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x40,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_COPYROW_AVX #ifdef HAS_COPYROW_ERMS // Multiple of 1. void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { size_t width_tmp = (size_t)(width); asm volatile( "rep movsb \n" : "+S"(src), // %0 "+D"(dst), // %1 "+c"(width_tmp) // %2 : : "memory", "cc"); } #endif // HAS_COPYROW_ERMS #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm0,%%xmm0 \n" "pslld $0x18,%%xmm0 \n" "pcmpeqb %%xmm1,%%xmm1 \n" "psrld $0x8,%%xmm1 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm2 \n" "movdqu 0x10(%0),%%xmm3 \n" "lea 0x20(%0),%0 \n" "movdqu (%1),%%xmm4 \n" "movdqu 0x10(%1),%%xmm5 \n" "pand %%xmm0,%%xmm2 \n" "pand %%xmm0,%%xmm3 \n" "pand %%xmm1,%%xmm4 \n" "pand %%xmm1,%%xmm5 \n" "por %%xmm4,%%xmm2 \n" "por %%xmm5,%%xmm3 \n" "movdqu %%xmm2,(%1) \n" "movdqu %%xmm3,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYALPHAROW_SSE2 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" "vpsrld $0x8,%%ymm0,%%ymm0 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm1 \n" "vmovdqu 0x20(%0),%%ymm2 \n" "lea 0x40(%0),%0 \n" "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" "vmovdqu %%ymm1,(%1) \n" "vmovdqu %%ymm2,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYALPHAROW_AVX2 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_a, int width) { asm volatile( LABELALIGN "1: \n" "movdqu (%0), %%xmm0 \n" "movdqu 0x10(%0), %%xmm1 \n" "lea 0x20(%0), %0 \n" "psrld $0x18, %%xmm0 \n" "psrld $0x18, %%xmm1 \n" "packssdw %%xmm1, %%xmm0 \n" "packuswb %%xmm0, %%xmm0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1), %1 \n" "sub $0x8, %2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1 "+rm"(width) // %2 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 static const uvec8 kShuffleAlphaShort_AVX2 = { 3u, 128u, 128u, 128u, 7u, 128u, 128u, 128u, 11u, 128u, 128u, 128u, 15u, 128u, 128u, 128u}; void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_a, int width) { asm volatile( "vmovdqa %3,%%ymm4 \n" "vbroadcastf128 %4,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0), %%ymm0 \n" "vmovdqu 0x20(%0), %%ymm1 \n" "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" // vpsrld $0x18, %%ymm0 "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" "vmovdqu 0x40(%0), %%ymm2 \n" "vmovdqu 0x60(%0), %%ymm3 \n" "lea 0x80(%0), %0 \n" "vpackssdw %%ymm1, %%ymm0, %%ymm0 \n" // mutates "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // mutates "vpackuswb %%ymm2,%%ymm0,%%ymm0 \n" // mutates. "vpermd %%ymm0,%%ymm4,%%ymm0 \n" // unmutate. "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20, %2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1 "+rm"(width) // %2 : "m"(kPermdARGBToY_AVX), // %3 "m"(kShuffleAlphaShort_AVX2) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBEXTRACTALPHAROW_AVX2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm0,%%xmm0 \n" "pslld $0x18,%%xmm0 \n" "pcmpeqb %%xmm1,%%xmm1 \n" "psrld $0x8,%%xmm1 \n" LABELALIGN "1: \n" "movq (%0),%%xmm2 \n" "lea 0x8(%0),%0 \n" "punpcklbw %%xmm2,%%xmm2 \n" "punpckhwd %%xmm2,%%xmm3 \n" "punpcklwd %%xmm2,%%xmm2 \n" "movdqu (%1),%%xmm4 \n" "movdqu 0x10(%1),%%xmm5 \n" "pand %%xmm0,%%xmm2 \n" "pand %%xmm0,%%xmm3 \n" "pand %%xmm1,%%xmm4 \n" "pand %%xmm1,%%xmm5 \n" "por %%xmm4,%%xmm2 \n" "por %%xmm5,%%xmm3 \n" "movdqu %%xmm2,(%1) \n" "movdqu %%xmm3,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "vpcmpeqb %%ymm0,%%ymm0,%%ymm0 \n" "vpsrld $0x8,%%ymm0,%%ymm0 \n" LABELALIGN "1: \n" "vpmovzxbd (%0),%%ymm1 \n" "vpmovzxbd 0x8(%0),%%ymm2 \n" "lea 0x10(%0),%0 \n" "vpslld $0x18,%%ymm1,%%ymm1 \n" "vpslld $0x18,%%ymm2,%%ymm2 \n" "vpblendvb %%ymm0,(%1),%%ymm1,%%ymm1 \n" "vpblendvb %%ymm0,0x20(%1),%%ymm2,%%ymm2 \n" "vmovdqu %%ymm1,(%1) \n" "vmovdqu %%ymm2,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width >> 2); const uint32_t v32 = v8 * 0x01010101u; // Duplicate byte to all bytes. asm volatile( "rep stosl \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 : "memory", "cc"); } void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { size_t width_tmp = (size_t)(width); asm volatile( "rep stosb \n" : "+D"(dst), // %0 "+c"(width_tmp) // %1 : "a"(v8) // %2 : "memory", "cc"); } void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { size_t width_tmp = (size_t)(width); asm volatile( "rep stosl \n" : "+D"(dst_argb), // %0 "+c"(width_tmp) // %1 : "a"(v32) // %2 : "memory", "cc"); } #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_SSE2 void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pand %%xmm5,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x00(%0,%4,1),%%xmm2 \n" "movdqu 0x10(%0,%4,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pand %%xmm5,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : "r"((intptr_t)(stride_yuy2)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pand %%xmm5,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1"); } void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x00(%0,%4,1),%%xmm2 \n" "movdqu 0x10(%0,%4,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" "pand %%xmm5,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pand %%xmm5,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : "r"((intptr_t)(stride_uyvy)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrlw $0x8,%%xmm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pand %%xmm5,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pand %%xmm5,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x00(%1,%2,1) \n" "lea 0x8(%1),%1 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_SSE2 #ifdef HAS_YUY2TOYROW_AVX2 void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_yuy2), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm0,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1,(%1) \n" "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : "r"((intptr_t)(stride_yuy2)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm0,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1,(%1) \n" "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_uyvy), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vpavgb 0x00(%0,%4,1),%%ymm0,%%ymm0 \n" "vpavgb 0x20(%0,%4,1),%%ymm1,%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm0,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1,(%1) \n" "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : "r"((intptr_t)(stride_uyvy)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm5"); } void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrlw $0x8,%%ymm5,%%ymm5 \n" "sub %1,%2 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm0,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm1,%%ymm1 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vextractf128 $0x0,%%ymm1,(%1) \n" "vextractf128 $0x0,%%ymm0,0x00(%1,%2,1) \n" "lea 0x10(%1),%1 \n" "sub $0x20,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for isolating alpha. static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $0xf,%%xmm7 \n" "pcmpeqb %%xmm6,%%xmm6 \n" "psrlw $0x8,%%xmm6 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "psllw $0x8,%%xmm5 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "pslld $0x18,%%xmm4 \n" "sub $0x4,%3 \n" "jl 49f \n" // 4 pixel loop. LABELALIGN "40: \n" "movdqu (%0),%%xmm3 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" "movdqu (%1),%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" "movdqu (%1),%%xmm1 \n" "lea 0x10(%1),%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" "psrlw $0x8,%%xmm2 \n" "paddusb %%xmm2,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jge 40b \n" "49: \n" "add $0x3,%3 \n" "jl 99f \n" // 1 pixel loop. "91: \n" "movd (%0),%%xmm3 \n" "lea 0x4(%0),%0 \n" "movdqa %%xmm3,%%xmm0 \n" "pxor %%xmm4,%%xmm3 \n" "movd (%1),%%xmm2 \n" "pshufb %4,%%xmm3 \n" "pand %%xmm6,%%xmm2 \n" "paddw %%xmm7,%%xmm3 \n" "pmullw %%xmm3,%%xmm2 \n" "movd (%1),%%xmm1 \n" "lea 0x4(%1),%1 \n" "psrlw $0x8,%%xmm1 \n" "por %%xmm4,%%xmm0 \n" "pmullw %%xmm3,%%xmm1 \n" "psrlw $0x8,%%xmm2 \n" "paddusb %%xmm2,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "movd %%xmm0,(%2) \n" "lea 0x4(%2),%2 \n" "sub $0x1,%3 \n" "jge 91b \n" "99: \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : "m"(kShuffleAlpha) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBBLENDROW_SSSE3 #ifdef HAS_BLENDPLANEROW_SSSE3 // Blend 8 pixels at a time. // unsigned version of math // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 void BlendPlaneRow_SSSE3(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) { asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psllw $0x8,%%xmm5 \n" "mov $0x80808080,%%eax \n" "movd %%eax,%%xmm6 \n" "pshufd $0x0,%%xmm6,%%xmm6 \n" "mov $0x807f807f,%%eax \n" "movd %%eax,%%xmm7 \n" "pshufd $0x0,%%xmm7,%%xmm7 \n" "sub %2,%0 \n" "sub %2,%1 \n" "sub %2,%3 \n" // 8 pixel loop. LABELALIGN "1: \n" "movq (%2),%%xmm0 \n" "punpcklbw %%xmm0,%%xmm0 \n" "pxor %%xmm5,%%xmm0 \n" "movq (%0,%2,1),%%xmm1 \n" "movq (%1,%2,1),%%xmm2 \n" "punpcklbw %%xmm2,%%xmm1 \n" "psubb %%xmm6,%%xmm1 \n" "pmaddubsw %%xmm1,%%xmm0 \n" "paddw %%xmm7,%%xmm0 \n" "psrlw $0x8,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,(%3,%2,1) \n" "lea 0x8(%2),%2 \n" "sub $0x8,%4 \n" "jg 1b \n" : "+r"(src0), // %0 "+r"(src1), // %1 "+r"(alpha), // %2 "+r"(dst), // %3 "+rm"(width) // %4 ::"memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm5", "xmm6", "xmm7"); } #endif // HAS_BLENDPLANEROW_SSSE3 #ifdef HAS_BLENDPLANEROW_AVX2 // Blend 32 pixels at a time. // unsigned version of math // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 void BlendPlaneRow_AVX2(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) { asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsllw $0x8,%%ymm5,%%ymm5 \n" "mov $0x80808080,%%eax \n" "vmovd %%eax,%%xmm6 \n" "vbroadcastss %%xmm6,%%ymm6 \n" "mov $0x807f807f,%%eax \n" "vmovd %%eax,%%xmm7 \n" "vbroadcastss %%xmm7,%%ymm7 \n" "sub %2,%0 \n" "sub %2,%1 \n" "sub %2,%3 \n" // 32 pixel loop. LABELALIGN "1: \n" "vmovdqu (%2),%%ymm0 \n" "vpunpckhbw %%ymm0,%%ymm0,%%ymm3 \n" "vpunpcklbw %%ymm0,%%ymm0,%%ymm0 \n" "vpxor %%ymm5,%%ymm3,%%ymm3 \n" "vpxor %%ymm5,%%ymm0,%%ymm0 \n" "vmovdqu (%0,%2,1),%%ymm1 \n" "vmovdqu (%1,%2,1),%%ymm2 \n" "vpunpckhbw %%ymm2,%%ymm1,%%ymm4 \n" "vpunpcklbw %%ymm2,%%ymm1,%%ymm1 \n" "vpsubb %%ymm6,%%ymm4,%%ymm4 \n" "vpsubb %%ymm6,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpmaddubsw %%ymm1,%%ymm0,%%ymm0 \n" "vpaddw %%ymm7,%%ymm3,%%ymm3 \n" "vpaddw %%ymm7,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm3,%%ymm3 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm3,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%3,%2,1) \n" "lea 0x20(%2),%2 \n" "sub $0x20,%4 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src0), // %0 "+r"(src1), // %1 "+r"(alpha), // %2 "+r"(dst), // %3 "+rm"(width) // %4 ::"memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_BLENDPLANEROW_AVX2 #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha static const uvec8 kShuffleAlpha0 = {3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u}; static const uvec8 kShuffleAlpha1 = {11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u}; // Attenuate 4 pixels at a time. void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "pcmpeqb %%xmm3,%%xmm3 \n" "pslld $0x18,%%xmm3 \n" "movdqa %3,%%xmm4 \n" "movdqa %4,%%xmm5 \n" // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "pshufb %%xmm4,%%xmm0 \n" "movdqu (%0),%%xmm1 \n" "punpcklbw %%xmm1,%%xmm1 \n" "pmulhuw %%xmm1,%%xmm0 \n" "movdqu (%0),%%xmm1 \n" "pshufb %%xmm5,%%xmm1 \n" "movdqu (%0),%%xmm2 \n" "punpckhbw %%xmm2,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" "movdqu (%0),%%xmm2 \n" "lea 0x10(%0),%0 \n" "pand %%xmm3,%%xmm2 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "por %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "m"(kShuffleAlpha0), // %3 "m"(kShuffleAlpha1) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u}; // Attenuate 8 pixels at a time. void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "vbroadcastf128 %3,%%ymm4 \n" "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpslld $0x18,%%ymm5,%%ymm5 \n" "sub %0,%1 \n" // 8 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm6 \n" "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" "vpshufb %%ymm4,%%ymm0,%%ymm2 \n" "vpshufb %%ymm4,%%ymm1,%%ymm3 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" "vpand %%ymm5,%%ymm6,%%ymm6 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpor %%ymm6,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%0,%1,1) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "m"(kShuffleAlpha_AVX2) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { uintptr_t alpha; asm volatile( // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movzb 0x03(%0),%3 \n" "punpcklbw %%xmm0,%%xmm0 \n" "movd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x07(%0),%3 \n" "movd 0x00(%4,%3,4),%%xmm3 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm0 \n" "movdqu (%0),%%xmm1 \n" "movzb 0x0b(%0),%3 \n" "punpckhbw %%xmm1,%%xmm1 \n" "movd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x0f(%0),%3 \n" "movd 0x00(%4,%3,4),%%xmm3 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "movlhps %%xmm3,%%xmm2 \n" "pmulhuw %%xmm2,%%xmm1 \n" "lea 0x10(%0),%0 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width), // %2 "=&r"(alpha) // %3 : "r"(fixed_invtbl8) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBUNATTENUATEROW_SSE2 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kUnattenShuffleAlpha_AVX2 = { 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u}; // Unattenuate 8 pixels at a time. void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { uintptr_t alpha; asm volatile( "sub %0,%1 \n" "vbroadcastf128 %5,%%ymm5 \n" // 8 pixel loop. LABELALIGN "1: \n" // replace VPGATHER "movzb 0x03(%0),%3 \n" "vmovd 0x00(%4,%3,4),%%xmm0 \n" "movzb 0x07(%0),%3 \n" "vmovd 0x00(%4,%3,4),%%xmm1 \n" "movzb 0x0b(%0),%3 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm6 \n" "vmovd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x0f(%0),%3 \n" "vmovd 0x00(%4,%3,4),%%xmm3 \n" "movzb 0x13(%0),%3 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm7 \n" "vmovd 0x00(%4,%3,4),%%xmm0 \n" "movzb 0x17(%0),%3 \n" "vmovd 0x00(%4,%3,4),%%xmm1 \n" "movzb 0x1b(%0),%3 \n" "vpunpckldq %%xmm1,%%xmm0,%%xmm0 \n" "vmovd 0x00(%4,%3,4),%%xmm2 \n" "movzb 0x1f(%0),%3 \n" "vmovd 0x00(%4,%3,4),%%xmm3 \n" "vpunpckldq %%xmm3,%%xmm2,%%xmm2 \n" "vpunpcklqdq %%xmm7,%%xmm6,%%xmm3 \n" "vpunpcklqdq %%xmm2,%%xmm0,%%xmm0 \n" "vinserti128 $0x1,%%xmm0,%%ymm3,%%ymm3 \n" // end of VPGATHER "vmovdqu (%0),%%ymm6 \n" "vpunpcklbw %%ymm6,%%ymm6,%%ymm0 \n" "vpunpckhbw %%ymm6,%%ymm6,%%ymm1 \n" "vpunpcklwd %%ymm3,%%ymm3,%%ymm2 \n" "vpunpckhwd %%ymm3,%%ymm3,%%ymm3 \n" "vpshufb %%ymm5,%%ymm2,%%ymm2 \n" "vpshufb %%ymm5,%%ymm3,%%ymm3 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%0,%1,1) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width), // %2 "=&r"(alpha) // %3 : "r"(fixed_invtbl8), // %4 "m"(kUnattenShuffleAlpha_AVX2) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBUNATTENUATEROW_AVX2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "movdqa %3,%%xmm4 \n" "movdqa %4,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "phaddw %%xmm1,%%xmm0 \n" "paddw %%xmm5,%%xmm0 \n" "psrlw $0x7,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movdqu (%0),%%xmm2 \n" "movdqu 0x10(%0),%%xmm3 \n" "lea 0x20(%0),%0 \n" "psrld $0x18,%%xmm2 \n" "psrld $0x18,%%xmm3 \n" "packuswb %%xmm3,%%xmm2 \n" "packuswb %%xmm2,%%xmm2 \n" "movdqa %%xmm0,%%xmm3 \n" "punpcklbw %%xmm0,%%xmm0 \n" "punpcklbw %%xmm2,%%xmm3 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm3,%%xmm0 \n" "punpckhwd %%xmm3,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "m"(kARGBToYJ), // %3 "m"(kAddYJ64) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBGRAYROW_SSSE3 #ifdef HAS_ARGBSEPIAROW_SSSE3 // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 // Constant for ARGB color to sepia tone static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0}; static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0}; static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { asm volatile( "movdqa %2,%%xmm2 \n" "movdqa %3,%%xmm3 \n" "movdqa %4,%%xmm4 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm6 \n" "pmaddubsw %%xmm2,%%xmm0 \n" "pmaddubsw %%xmm2,%%xmm6 \n" "phaddw %%xmm6,%%xmm0 \n" "psrlw $0x7,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movdqu (%0),%%xmm5 \n" "movdqu 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm5 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "phaddw %%xmm1,%%xmm5 \n" "psrlw $0x7,%%xmm5 \n" "packuswb %%xmm5,%%xmm5 \n" "punpcklbw %%xmm5,%%xmm0 \n" "movdqu (%0),%%xmm5 \n" "movdqu 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm5 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "phaddw %%xmm1,%%xmm5 \n" "psrlw $0x7,%%xmm5 \n" "packuswb %%xmm5,%%xmm5 \n" "movdqu (%0),%%xmm6 \n" "movdqu 0x10(%0),%%xmm1 \n" "psrld $0x18,%%xmm6 \n" "psrld $0x18,%%xmm1 \n" "packuswb %%xmm1,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "punpcklbw %%xmm6,%%xmm5 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklwd %%xmm5,%%xmm0 \n" "punpckhwd %%xmm5,%%xmm1 \n" "movdqu %%xmm0,(%0) \n" "movdqu %%xmm1,0x10(%0) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%1 \n" "jg 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : "m"(kARGBToSepiaB), // %2 "m"(kARGBToSepiaG), // %3 "m"(kARGBToSepiaR) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBSEPIAROW_SSSE3 #ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 // Tranform 8 ARGB pixels (32 bytes) with color matrix. // Same as Sepia except matrix is provided. void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { asm volatile( "movdqu (%3),%%xmm5 \n" "pshufd $0x00,%%xmm5,%%xmm2 \n" "pshufd $0x55,%%xmm5,%%xmm3 \n" "pshufd $0xaa,%%xmm5,%%xmm4 \n" "pshufd $0xff,%%xmm5,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm7 \n" "pmaddubsw %%xmm2,%%xmm0 \n" "pmaddubsw %%xmm2,%%xmm7 \n" "movdqu (%0),%%xmm6 \n" "movdqu 0x10(%0),%%xmm1 \n" "pmaddubsw %%xmm3,%%xmm6 \n" "pmaddubsw %%xmm3,%%xmm1 \n" "phaddsw %%xmm7,%%xmm0 \n" "phaddsw %%xmm1,%%xmm6 \n" "psraw $0x6,%%xmm0 \n" "psraw $0x6,%%xmm6 \n" "packuswb %%xmm0,%%xmm0 \n" "packuswb %%xmm6,%%xmm6 \n" "punpcklbw %%xmm6,%%xmm0 \n" "movdqu (%0),%%xmm1 \n" "movdqu 0x10(%0),%%xmm7 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm7 \n" "phaddsw %%xmm7,%%xmm1 \n" "movdqu (%0),%%xmm6 \n" "movdqu 0x10(%0),%%xmm7 \n" "pmaddubsw %%xmm5,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm7 \n" "phaddsw %%xmm7,%%xmm6 \n" "psraw $0x6,%%xmm1 \n" "psraw $0x6,%%xmm6 \n" "packuswb %%xmm1,%%xmm1 \n" "packuswb %%xmm6,%%xmm6 \n" "punpcklbw %%xmm6,%%xmm1 \n" "movdqa %%xmm0,%%xmm6 \n" "punpcklwd %%xmm1,%%xmm0 \n" "punpckhwd %%xmm1,%%xmm6 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm6,0x10(%1) \n" "lea 0x20(%0),%0 \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(matrix_argb) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBCOLORMATRIXROW_SSSE3 #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { asm volatile( "movd %2,%%xmm2 \n" "movd %3,%%xmm3 \n" "movd %4,%%xmm4 \n" "pshuflw $0x40,%%xmm2,%%xmm2 \n" "pshufd $0x44,%%xmm2,%%xmm2 \n" "pshuflw $0x40,%%xmm3,%%xmm3 \n" "pshufd $0x44,%%xmm3,%%xmm3 \n" "pshuflw $0x40,%%xmm4,%%xmm4 \n" "pshufd $0x44,%%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" "pcmpeqb %%xmm6,%%xmm6 \n" "pslld $0x18,%%xmm6 \n" // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "punpcklbw %%xmm5,%%xmm0 \n" "pmulhuw %%xmm2,%%xmm0 \n" "movdqu (%0),%%xmm1 \n" "punpckhbw %%xmm5,%%xmm1 \n" "pmulhuw %%xmm2,%%xmm1 \n" "pmullw %%xmm3,%%xmm0 \n" "movdqu (%0),%%xmm7 \n" "pmullw %%xmm3,%%xmm1 \n" "pand %%xmm6,%%xmm7 \n" "paddw %%xmm4,%%xmm0 \n" "paddw %%xmm4,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "por %%xmm7,%%xmm0 \n" "movdqu %%xmm0,(%0) \n" "lea 0x10(%0),%0 \n" "sub $0x4,%1 \n" "jg 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : "r"(scale), // %2 "r"(interval_size), // %3 "r"(interval_offset) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBQUANTIZEROW_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. void ARGBShadeRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { asm volatile( "movd %3,%%xmm2 \n" "punpcklbw %%xmm2,%%xmm2 \n" "punpcklqdq %%xmm2,%%xmm2 \n" // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm0,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm1 \n" "pmulhuw %%xmm2,%%xmm0 \n" "pmulhuw %%xmm2,%%xmm1 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(value) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( "pxor %%xmm5,%%xmm5 \n" // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "lea 0x10(%0),%0 \n" "movdqu (%1),%%xmm2 \n" "lea 0x10(%1),%1 \n" "movdqu %%xmm0,%%xmm1 \n" "movdqu %%xmm2,%%xmm3 \n" "punpcklbw %%xmm0,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpckhbw %%xmm5,%%xmm3 \n" "pmulhuw %%xmm2,%%xmm0 \n" "pmulhuw %%xmm3,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jg 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( "vpxor %%ymm5,%%ymm5,%%ymm5 \n" // 4 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm1 \n" "lea 0x20(%0),%0 \n" "vmovdqu (%1),%%ymm3 \n" "lea 0x20(%1),%1 \n" "vpunpcklbw %%ymm1,%%ymm1,%%ymm0 \n" "vpunpckhbw %%ymm1,%%ymm1,%%ymm1 \n" "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" "vpmulhuw %%ymm2,%%ymm0,%%ymm0 \n" "vpmulhuw %%ymm3,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%2) \n" "lea 0x20(%2),%2 \n" "sub $0x8,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc" #if defined(__AVX2__) , "xmm0", "xmm1", "xmm2", "xmm3", "xmm5" #endif ); } #endif // HAS_ARGBMULTIPLYROW_AVX2 #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. void ARGBAddRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "lea 0x10(%0),%0 \n" "movdqu (%1),%%xmm1 \n" "lea 0x10(%1),%1 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jg 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBADDROW_SSE2 #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. void ARGBAddRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 4 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "lea 0x20(%0),%0 \n" "vpaddusb (%1),%%ymm0,%%ymm0 \n" "lea 0x20(%1),%1 \n" "vmovdqu %%ymm0,(%2) \n" "lea 0x20(%2),%2 \n" "sub $0x8,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0"); } #endif // HAS_ARGBADDROW_AVX2 #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels, 4 pixels at a time. void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "lea 0x10(%0),%0 \n" "movdqu (%1),%%xmm1 \n" "lea 0x10(%1),%1 \n" "psubusb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jg 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_ARGBSUBTRACTROW_SSE2 #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels, 8 pixels at a time. void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 4 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "lea 0x20(%0),%0 \n" "vpsubusb (%1),%%ymm0,%%ymm0 \n" "lea 0x20(%1),%1 \n" "vmovdqu %%ymm0,(%2) \n" "lea 0x20(%2),%2 \n" "sub $0x8,%3 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0"); } #endif // HAS_ARGBSUBTRACTROW_AVX2 #ifdef HAS_SOBELXROW_SSE2 // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 void SobelXRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width) { asm volatile( "sub %0,%1 \n" "sub %0,%2 \n" "sub %0,%3 \n" "pxor %%xmm5,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "movq 0x2(%0),%%xmm1 \n" "punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm1 \n" "psubw %%xmm1,%%xmm0 \n" "movq 0x00(%0,%1,1),%%xmm1 \n" "movq 0x02(%0,%1,1),%%xmm2 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm2 \n" "psubw %%xmm2,%%xmm1 \n" "movq 0x00(%0,%2,1),%%xmm2 \n" "movq 0x02(%0,%2,1),%%xmm3 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm3 \n" "psubw %%xmm3,%%xmm2 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n" "psubw %%xmm0,%%xmm1 \n" "pmaxsw %%xmm1,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,0x00(%0,%3,1) \n" "lea 0x8(%0),%0 \n" "sub $0x8,%4 \n" "jg 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(src_y2), // %2 "+r"(dst_sobelx), // %3 "+r"(width) // %4 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELXROW_SSE2 #ifdef HAS_SOBELYROW_SSE2 // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 void SobelYRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) { asm volatile( "sub %0,%1 \n" "sub %0,%2 \n" "pxor %%xmm5,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "movq 0x00(%0,%1,1),%%xmm1 \n" "punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm1 \n" "psubw %%xmm1,%%xmm0 \n" "movq 0x1(%0),%%xmm1 \n" "movq 0x01(%0,%1,1),%%xmm2 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm2 \n" "psubw %%xmm2,%%xmm1 \n" "movq 0x2(%0),%%xmm2 \n" "movq 0x02(%0,%1,1),%%xmm3 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm3 \n" "psubw %%xmm3,%%xmm2 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n" "paddw %%xmm1,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n" "psubw %%xmm0,%%xmm1 \n" "pmaxsw %%xmm1,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,0x00(%0,%2,1) \n" "lea 0x8(%0),%0 \n" "sub $0x8,%3 \n" "jg 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(dst_sobely), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELYROW_SSE2 #ifdef HAS_SOBELROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into ARGB. // A = 255 // R = Sobel // G = Sobel // B = Sobel void SobelRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "sub %0,%1 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0x18,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm2 \n" "punpcklbw %%xmm0,%%xmm2 \n" "punpckhbw %%xmm0,%%xmm0 \n" "movdqa %%xmm2,%%xmm1 \n" "punpcklwd %%xmm2,%%xmm1 \n" "punpckhwd %%xmm2,%%xmm2 \n" "por %%xmm5,%%xmm1 \n" "por %%xmm5,%%xmm2 \n" "movdqa %%xmm0,%%xmm3 \n" "punpcklwd %%xmm0,%%xmm3 \n" "punpckhwd %%xmm0,%%xmm0 \n" "por %%xmm5,%%xmm3 \n" "por %%xmm5,%%xmm0 \n" "movdqu %%xmm1,(%2) \n" "movdqu %%xmm2,0x10(%2) \n" "movdqu %%xmm3,0x20(%2) \n" "movdqu %%xmm0,0x30(%2) \n" "lea 0x40(%2),%2 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SOBELROW_SSE2 #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { asm volatile( "sub %0,%1 \n" "pcmpeqb %%xmm5,%%xmm5 \n" "pslld $0x18,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "paddusb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_y), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1"); } #endif // HAS_SOBELTOPLANEROW_SSE2 #ifdef HAS_SOBELXYROW_SSE2 // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 // R = Sobel X // G = Sobel // B = Sobel Y void SobelXYRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "sub %0,%1 \n" "pcmpeqb %%xmm5,%%xmm5 \n" // 8 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%1,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "paddusb %%xmm1,%%xmm2 \n" "movdqa %%xmm0,%%xmm3 \n" "punpcklbw %%xmm5,%%xmm3 \n" "punpckhbw %%xmm5,%%xmm0 \n" "movdqa %%xmm1,%%xmm4 \n" "punpcklbw %%xmm2,%%xmm4 \n" "punpckhbw %%xmm2,%%xmm1 \n" "movdqa %%xmm4,%%xmm6 \n" "punpcklwd %%xmm3,%%xmm6 \n" "punpckhwd %%xmm3,%%xmm4 \n" "movdqa %%xmm1,%%xmm7 \n" "punpcklwd %%xmm0,%%xmm7 \n" "punpckhwd %%xmm0,%%xmm1 \n" "movdqu %%xmm6,(%2) \n" "movdqu %%xmm4,0x10(%2) \n" "movdqu %%xmm7,0x20(%2) \n" "movdqu %%xmm1,0x30(%2) \n" "lea 0x40(%2),%2 \n" "sub $0x10,%3 \n" "jg 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_SOBELXYROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value, inclusive of the value. void ComputeCumulativeSumRow_SSE2(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width) { asm volatile( "pxor %%xmm0,%%xmm0 \n" "pxor %%xmm1,%%xmm1 \n" "sub $0x4,%3 \n" "jl 49f \n" "test $0xf,%1 \n" "jne 49f \n" // 4 pixel loop. LABELALIGN "40: \n" "movdqu (%0),%%xmm2 \n" "lea 0x10(%0),%0 \n" "movdqa %%xmm2,%%xmm4 \n" "punpcklbw %%xmm1,%%xmm2 \n" "movdqa %%xmm2,%%xmm3 \n" "punpcklwd %%xmm1,%%xmm2 \n" "punpckhwd %%xmm1,%%xmm3 \n" "punpckhbw %%xmm1,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" "punpcklwd %%xmm1,%%xmm4 \n" "punpckhwd %%xmm1,%%xmm5 \n" "paddd %%xmm2,%%xmm0 \n" "movdqu (%2),%%xmm2 \n" "paddd %%xmm0,%%xmm2 \n" "paddd %%xmm3,%%xmm0 \n" "movdqu 0x10(%2),%%xmm3 \n" "paddd %%xmm0,%%xmm3 \n" "paddd %%xmm4,%%xmm0 \n" "movdqu 0x20(%2),%%xmm4 \n" "paddd %%xmm0,%%xmm4 \n" "paddd %%xmm5,%%xmm0 \n" "movdqu 0x30(%2),%%xmm5 \n" "lea 0x40(%2),%2 \n" "paddd %%xmm0,%%xmm5 \n" "movdqu %%xmm2,(%1) \n" "movdqu %%xmm3,0x10(%1) \n" "movdqu %%xmm4,0x20(%1) \n" "movdqu %%xmm5,0x30(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x4,%3 \n" "jge 40b \n" "49: \n" "add $0x3,%3 \n" "jl 19f \n" // 1 pixel loop. LABELALIGN "10: \n" "movd (%0),%%xmm2 \n" "lea 0x4(%0),%0 \n" "punpcklbw %%xmm1,%%xmm2 \n" "punpcklwd %%xmm1,%%xmm2 \n" "paddd %%xmm2,%%xmm0 \n" "movdqu (%2),%%xmm2 \n" "lea 0x10(%2),%2 \n" "paddd %%xmm0,%%xmm2 \n" "movdqu %%xmm2,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x1,%3 \n" "jge 10b \n" "19: \n" : "+r"(row), // %0 "+r"(cumsum), // %1 "+r"(previous_cumsum), // %2 "+r"(width) // %3 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, const int32_t* botleft, int width, int area, uint8_t* dst, int count) { asm volatile( "movd %5,%%xmm5 \n" "cvtdq2ps %%xmm5,%%xmm5 \n" "rcpss %%xmm5,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" "sub $0x4,%3 \n" "jl 49f \n" "cmpl $0x80,%5 \n" "ja 40f \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "pcmpeqb %%xmm6,%%xmm6 \n" "psrld $0x10,%%xmm6 \n" "cvtdq2ps %%xmm6,%%xmm6 \n" "addps %%xmm6,%%xmm5 \n" "mulps %%xmm4,%%xmm5 \n" "cvtps2dq %%xmm5,%%xmm5 \n" "packssdw %%xmm5,%%xmm5 \n" // 4 pixel small loop. LABELALIGN "4: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "psubd 0x00(%0,%4,4),%%xmm0 \n" "psubd 0x10(%0,%4,4),%%xmm1 \n" "psubd 0x20(%0,%4,4),%%xmm2 \n" "psubd 0x30(%0,%4,4),%%xmm3 \n" "lea 0x40(%0),%0 \n" "psubd (%1),%%xmm0 \n" "psubd 0x10(%1),%%xmm1 \n" "psubd 0x20(%1),%%xmm2 \n" "psubd 0x30(%1),%%xmm3 \n" "paddd 0x00(%1,%4,4),%%xmm0 \n" "paddd 0x10(%1,%4,4),%%xmm1 \n" "paddd 0x20(%1,%4,4),%%xmm2 \n" "paddd 0x30(%1,%4,4),%%xmm3 \n" "lea 0x40(%1),%1 \n" "packssdw %%xmm1,%%xmm0 \n" "packssdw %%xmm3,%%xmm2 \n" "pmulhuw %%xmm5,%%xmm0 \n" "pmulhuw %%xmm5,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jge 4b \n" "jmp 49f \n" // 4 pixel loop LABELALIGN "40: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x20(%0),%%xmm2 \n" "movdqu 0x30(%0),%%xmm3 \n" "psubd 0x00(%0,%4,4),%%xmm0 \n" "psubd 0x10(%0,%4,4),%%xmm1 \n" "psubd 0x20(%0,%4,4),%%xmm2 \n" "psubd 0x30(%0,%4,4),%%xmm3 \n" "lea 0x40(%0),%0 \n" "psubd (%1),%%xmm0 \n" "psubd 0x10(%1),%%xmm1 \n" "psubd 0x20(%1),%%xmm2 \n" "psubd 0x30(%1),%%xmm3 \n" "paddd 0x00(%1,%4,4),%%xmm0 \n" "paddd 0x10(%1,%4,4),%%xmm1 \n" "paddd 0x20(%1,%4,4),%%xmm2 \n" "paddd 0x30(%1,%4,4),%%xmm3 \n" "lea 0x40(%1),%1 \n" "cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm1,%%xmm1 \n" "mulps %%xmm4,%%xmm0 \n" "mulps %%xmm4,%%xmm1 \n" "cvtdq2ps %%xmm2,%%xmm2 \n" "cvtdq2ps %%xmm3,%%xmm3 \n" "mulps %%xmm4,%%xmm2 \n" "mulps %%xmm4,%%xmm3 \n" "cvtps2dq %%xmm0,%%xmm0 \n" "cvtps2dq %%xmm1,%%xmm1 \n" "cvtps2dq %%xmm2,%%xmm2 \n" "cvtps2dq %%xmm3,%%xmm3 \n" "packssdw %%xmm1,%%xmm0 \n" "packssdw %%xmm3,%%xmm2 \n" "packuswb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jge 40b \n" "49: \n" "add $0x3,%3 \n" "jl 19f \n" // 1 pixel loop LABELALIGN "10: \n" "movdqu (%0),%%xmm0 \n" "psubd 0x00(%0,%4,4),%%xmm0 \n" "lea 0x10(%0),%0 \n" "psubd (%1),%%xmm0 \n" "paddd 0x00(%1,%4,4),%%xmm0 \n" "lea 0x10(%1),%1 \n" "cvtdq2ps %%xmm0,%%xmm0 \n" "mulps %%xmm4,%%xmm0 \n" "cvtps2dq %%xmm0,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movd %%xmm0,(%2) \n" "lea 0x4(%2),%2 \n" "sub $0x1,%3 \n" "jge 10b \n" "19: \n" : "+r"(topleft), // %0 "+r"(botleft), // %1 "+r"(dst), // %2 "+rm"(count) // %3 : "r"((intptr_t)(width)), // %4 "rm"(area) // %5 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* src_dudv, int width) { intptr_t src_argb_stride_temp = src_argb_stride; intptr_t temp; asm volatile( "movq (%3),%%xmm2 \n" "movq 0x08(%3),%%xmm7 \n" "shl $0x10,%1 \n" "add $0x4,%1 \n" "movd %1,%%xmm5 \n" "sub $0x4,%4 \n" "jl 49f \n" "pshufd $0x44,%%xmm7,%%xmm7 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "movdqa %%xmm2,%%xmm0 \n" "addps %%xmm7,%%xmm0 \n" "movlhps %%xmm0,%%xmm2 \n" "movdqa %%xmm7,%%xmm4 \n" "addps %%xmm4,%%xmm4 \n" "movdqa %%xmm2,%%xmm3 \n" "addps %%xmm4,%%xmm3 \n" "addps %%xmm4,%%xmm4 \n" // 4 pixel loop LABELALIGN "40: \n" "cvttps2dq %%xmm2,%%xmm0 \n" // x,y float->int first 2 "cvttps2dq %%xmm3,%%xmm1 \n" // x,y float->int next 2 "packssdw %%xmm1,%%xmm0 \n" // x, y as 8 shorts "pmaddwd %%xmm5,%%xmm0 \n" // off = x*4 + y*stride "movd %%xmm0,%k1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd %%xmm0,%k5 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd 0x00(%0,%1,1),%%xmm1 \n" "movd 0x00(%0,%5,1),%%xmm6 \n" "punpckldq %%xmm6,%%xmm1 \n" "addps %%xmm4,%%xmm2 \n" "movq %%xmm1,(%2) \n" "movd %%xmm0,%k1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movd %%xmm0,%k5 \n" "movd 0x00(%0,%1,1),%%xmm0 \n" "movd 0x00(%0,%5,1),%%xmm6 \n" "punpckldq %%xmm6,%%xmm0 \n" "addps %%xmm4,%%xmm3 \n" "movq %%xmm0,0x08(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%4 \n" "jge 40b \n" "49: \n" "add $0x3,%4 \n" "jl 19f \n" // 1 pixel loop LABELALIGN "10: \n" "cvttps2dq %%xmm2,%%xmm0 \n" "packssdw %%xmm0,%%xmm0 \n" "pmaddwd %%xmm5,%%xmm0 \n" "addps %%xmm7,%%xmm2 \n" "movd %%xmm0,%k1 \n" "movd 0x00(%0,%1,1),%%xmm0 \n" "movd %%xmm0,(%2) \n" "lea 0x04(%2),%2 \n" "sub $0x1,%4 \n" "jge 10b \n" "19: \n" : "+r"(src_argb), // %0 "+r"(src_argb_stride_temp), // %1 "+r"(dst_argb), // %2 "+r"(src_dudv), // %3 "+rm"(width), // %4 "=&r"(temp) // %5 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_INTERPOLATEROW_SSSE3 // Bilinear filter 16x2 -> 16x1 void InterpolateRow_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { asm volatile( "sub %1,%0 \n" "cmp $0x0,%3 \n" "je 100f \n" "cmp $0x80,%3 \n" "je 50f \n" "movd %3,%%xmm0 \n" "neg %3 \n" "add $0x100,%3 \n" "movd %3,%%xmm5 \n" "punpcklbw %%xmm0,%%xmm5 \n" "punpcklwd %%xmm5,%%xmm5 \n" "pshufd $0x0,%%xmm5,%%xmm5 \n" "mov $0x80808080,%%eax \n" "movd %%eax,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" // General purpose row blend. LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" "movdqu 0x00(%1,%4,1),%%xmm2 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm2,%%xmm0 \n" "punpckhbw %%xmm2,%%xmm1 \n" "psubb %%xmm4,%%xmm0 \n" "psubb %%xmm4,%%xmm1 \n" "movdqa %%xmm5,%%xmm2 \n" "movdqa %%xmm5,%%xmm3 \n" "pmaddubsw %%xmm0,%%xmm2 \n" "pmaddubsw %%xmm1,%%xmm3 \n" "paddw %%xmm4,%%xmm2 \n" "paddw %%xmm4,%%xmm3 \n" "psrlw $0x8,%%xmm2 \n" "psrlw $0x8,%%xmm3 \n" "packuswb %%xmm3,%%xmm2 \n" "movdqu %%xmm2,0x00(%1,%0,1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "jmp 99f \n" // Blend 50 / 50. LABELALIGN "50: \n" "movdqu (%1),%%xmm0 \n" "movdqu 0x00(%1,%4,1),%%xmm1 \n" "pavgb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,0x00(%1,%0,1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 50b \n" "jmp 99f \n" // Blend 100 / 0 - Copy row unchanged. LABELALIGN "100: \n" "movdqu (%1),%%xmm0 \n" "movdqu %%xmm0,0x00(%1,%0,1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 100b \n" "99: \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+rm"(dst_width), // %2 "+r"(source_y_fraction) // %3 : "r"((intptr_t)(src_stride)) // %4 : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_SSSE3 #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 void InterpolateRow_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { asm volatile( "cmp $0x0,%3 \n" "je 100f \n" "sub %1,%0 \n" "cmp $0x80,%3 \n" "je 50f \n" "vmovd %3,%%xmm0 \n" "neg %3 \n" "add $0x100,%3 \n" "vmovd %3,%%xmm5 \n" "vpunpcklbw %%xmm0,%%xmm5,%%xmm5 \n" "vpunpcklwd %%xmm5,%%xmm5,%%xmm5 \n" "vbroadcastss %%xmm5,%%ymm5 \n" "mov $0x80808080,%%eax \n" "vmovd %%eax,%%xmm4 \n" "vbroadcastss %%xmm4,%%ymm4 \n" // General purpose row blend. LABELALIGN "1: \n" "vmovdqu (%1),%%ymm0 \n" "vmovdqu 0x00(%1,%4,1),%%ymm2 \n" "vpunpckhbw %%ymm2,%%ymm0,%%ymm1 \n" "vpunpcklbw %%ymm2,%%ymm0,%%ymm0 \n" "vpsubb %%ymm4,%%ymm1,%%ymm1 \n" "vpsubb %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm1,%%ymm5,%%ymm1 \n" "vpmaddubsw %%ymm0,%%ymm5,%%ymm0 \n" "vpaddw %%ymm4,%%ymm1,%%ymm1 \n" "vpaddw %%ymm4,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%1,%0,1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "jmp 99f \n" // Blend 50 / 50. LABELALIGN "50: \n" "vmovdqu (%1),%%ymm0 \n" "vpavgb 0x00(%1,%4,1),%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,0x00(%1,%0,1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 50b \n" "jmp 99f \n" // Blend 100 / 0 - Copy row unchanged. LABELALIGN "100: \n" "rep movsb \n" "jmp 999f \n" "99: \n" "vzeroupper \n" "999: \n" : "+D"(dst_ptr), // %0 "+S"(src_ptr), // %1 "+cm"(dst_width), // %2 "+r"(source_y_fraction) // %3 : "r"((intptr_t)(src_stride)) // %4 : "memory", "cc", "eax", "xmm0", "xmm1", "xmm2", "xmm4", "xmm5"); } #endif // HAS_INTERPOLATEROW_AVX2 #ifdef HAS_ARGBSHUFFLEROW_SSSE3 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { asm volatile( "movdqu (%3),%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pshufb %%xmm5,%%xmm0 \n" "pshufb %%xmm5,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(shuffler) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_SSSE3 #ifdef HAS_ARGBSHUFFLEROW_AVX2 // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { asm volatile( "vbroadcastf128 (%3),%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpshufb %%ymm5,%%ymm0,%%ymm0 \n" "vpshufb %%ymm5,%%ymm1,%%ymm1 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(shuffler) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm5"); } #endif // HAS_ARGBSHUFFLEROW_AVX2 #ifdef HAS_I422TOYUY2ROW_SSE2 void I422ToYUY2Row_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { asm volatile( "sub %1,%2 \n" LABELALIGN "1: \n" "movq (%1),%%xmm2 \n" "movq 0x00(%1,%2,1),%%xmm1 \n" "add $0x8,%1 \n" "punpcklbw %%xmm1,%%xmm2 \n" "movdqu (%0),%%xmm0 \n" "add $0x10,%0 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm2,%%xmm0 \n" "punpckhbw %%xmm2,%%xmm1 \n" "movdqu %%xmm0,(%3) \n" "movdqu %%xmm1,0x10(%3) \n" "lea 0x20(%3),%3 \n" "sub $0x10,%4 \n" "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_yuy2), // %3 "+rm"(width) // %4 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOYUY2ROW_SSE2 #ifdef HAS_I422TOUYVYROW_SSE2 void I422ToUYVYRow_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { asm volatile( "sub %1,%2 \n" LABELALIGN "1: \n" "movq (%1),%%xmm2 \n" "movq 0x00(%1,%2,1),%%xmm1 \n" "add $0x8,%1 \n" "punpcklbw %%xmm1,%%xmm2 \n" "movdqu (%0),%%xmm0 \n" "movdqa %%xmm2,%%xmm1 \n" "add $0x10,%0 \n" "punpcklbw %%xmm0,%%xmm1 \n" "punpckhbw %%xmm0,%%xmm2 \n" "movdqu %%xmm1,(%3) \n" "movdqu %%xmm2,0x10(%3) \n" "lea 0x20(%3),%3 \n" "sub $0x10,%4 \n" "jg 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_uyvy), // %3 "+rm"(width) // %4 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOUYVYROW_SSE2 #ifdef HAS_I422TOYUY2ROW_AVX2 void I422ToYUY2Row_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { asm volatile( "sub %1,%2 \n" LABELALIGN "1: \n" "vpmovzxbw (%1),%%ymm1 \n" "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" "add $0x10,%1 \n" "vpsllw $0x8,%%ymm2,%%ymm2 \n" "vpor %%ymm1,%%ymm2,%%ymm2 \n" "vmovdqu (%0),%%ymm0 \n" "add $0x20,%0 \n" "vpunpcklbw %%ymm2,%%ymm0,%%ymm1 \n" "vpunpckhbw %%ymm2,%%ymm0,%%ymm2 \n" "vextractf128 $0x0,%%ymm1,(%3) \n" "vextractf128 $0x0,%%ymm2,0x10(%3) \n" "vextractf128 $0x1,%%ymm1,0x20(%3) \n" "vextractf128 $0x1,%%ymm2,0x30(%3) \n" "lea 0x40(%3),%3 \n" "sub $0x20,%4 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_yuy2), // %3 "+rm"(width) // %4 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOYUY2ROW_AVX2 #ifdef HAS_I422TOUYVYROW_AVX2 void I422ToUYVYRow_AVX2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { asm volatile( "sub %1,%2 \n" LABELALIGN "1: \n" "vpmovzxbw (%1),%%ymm1 \n" "vpmovzxbw 0x00(%1,%2,1),%%ymm2 \n" "add $0x10,%1 \n" "vpsllw $0x8,%%ymm2,%%ymm2 \n" "vpor %%ymm1,%%ymm2,%%ymm2 \n" "vmovdqu (%0),%%ymm0 \n" "add $0x20,%0 \n" "vpunpcklbw %%ymm0,%%ymm2,%%ymm1 \n" "vpunpckhbw %%ymm0,%%ymm2,%%ymm2 \n" "vextractf128 $0x0,%%ymm1,(%3) \n" "vextractf128 $0x0,%%ymm2,0x10(%3) \n" "vextractf128 $0x1,%%ymm1,0x20(%3) \n" "vextractf128 $0x1,%%ymm2,0x30(%3) \n" "lea 0x40(%3),%3 \n" "sub $0x20,%4 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_uyvy), // %3 "+rm"(width) // %4 : : "memory", "cc", "xmm0", "xmm1", "xmm2"); } #endif // HAS_I422TOUYVYROW_AVX2 #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { asm volatile( "pxor %%xmm3,%%xmm3 \n" // 2 pixel loop. LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "lea 0x8(%0),%0 \n" "punpcklbw %%xmm3,%%xmm0 \n" "movdqa %%xmm0,%%xmm4 \n" "punpcklwd %%xmm3,%%xmm0 \n" "punpckhwd %%xmm3,%%xmm4 \n" "cvtdq2ps %%xmm0,%%xmm0 \n" "cvtdq2ps %%xmm4,%%xmm4 \n" "movdqa %%xmm0,%%xmm1 \n" "movdqa %%xmm4,%%xmm5 \n" "mulps 0x10(%3),%%xmm0 \n" "mulps 0x10(%3),%%xmm4 \n" "addps (%3),%%xmm0 \n" "addps (%3),%%xmm4 \n" "movdqa %%xmm1,%%xmm2 \n" "movdqa %%xmm5,%%xmm6 \n" "mulps %%xmm1,%%xmm2 \n" "mulps %%xmm5,%%xmm6 \n" "mulps %%xmm2,%%xmm1 \n" "mulps %%xmm6,%%xmm5 \n" "mulps 0x20(%3),%%xmm2 \n" "mulps 0x20(%3),%%xmm6 \n" "mulps 0x30(%3),%%xmm1 \n" "mulps 0x30(%3),%%xmm5 \n" "addps %%xmm2,%%xmm0 \n" "addps %%xmm6,%%xmm4 \n" "addps %%xmm1,%%xmm0 \n" "addps %%xmm5,%%xmm4 \n" "cvttps2dq %%xmm0,%%xmm0 \n" "cvttps2dq %%xmm4,%%xmm4 \n" "packuswb %%xmm4,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x2,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(poly) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { asm volatile( "vbroadcastf128 (%3),%%ymm4 \n" "vbroadcastf128 0x10(%3),%%ymm5 \n" "vbroadcastf128 0x20(%3),%%ymm6 \n" "vbroadcastf128 0x30(%3),%%ymm7 \n" // 2 pixel loop. LABELALIGN "1: \n" "vpmovzxbd (%0),%%ymm0 \n" // 2 ARGB pixels "lea 0x8(%0),%0 \n" "vcvtdq2ps %%ymm0,%%ymm0 \n" // X 8 floats "vmulps %%ymm0,%%ymm0,%%ymm2 \n" // X * X "vmulps %%ymm7,%%ymm0,%%ymm3 \n" // C3 * X "vfmadd132ps %%ymm5,%%ymm4,%%ymm0 \n" // result = C0 + C1 * X "vfmadd231ps %%ymm6,%%ymm2,%%ymm0 \n" // result += C2 * X * X "vfmadd231ps %%ymm3,%%ymm2,%%ymm0 \n" // result += C3 * X * X * // X "vcvttps2dq %%ymm0,%%ymm0 \n" "vpackusdw %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpackuswb %%xmm0,%%xmm0,%%xmm0 \n" "vmovq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x2,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(poly) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } #endif // HAS_ARGBPOLYNOMIALROW_AVX2 #ifdef HAS_HALFFLOATROW_SSE2 static float kScaleBias = 1.9259299444e-34f; void HalfFloatRow_SSE2(const uint16_t* src, uint16_t* dst, float scale, int width) { scale *= kScaleBias; asm volatile( "movd %3,%%xmm4 \n" "pshufd $0x0,%%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" "sub %0,%1 \n" // 16 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm2 \n" // 8 shorts "add $0x10,%0 \n" "movdqa %%xmm2,%%xmm3 \n" "punpcklwd %%xmm5,%%xmm2 \n" // 8 ints in xmm2/1 "cvtdq2ps %%xmm2,%%xmm2 \n" // 8 floats "punpckhwd %%xmm5,%%xmm3 \n" "cvtdq2ps %%xmm3,%%xmm3 \n" "mulps %%xmm4,%%xmm2 \n" "mulps %%xmm4,%%xmm3 \n" "psrld $0xd,%%xmm2 \n" "psrld $0xd,%%xmm3 \n" "packssdw %%xmm3,%%xmm2 \n" "movdqu %%xmm2,-0x10(%0,%1,1) \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "m"(scale) // %3 : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_AVX2 void HalfFloatRow_AVX2(const uint16_t* src, uint16_t* dst, float scale, int width) { scale *= kScaleBias; asm volatile( "vbroadcastss %3, %%ymm4 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n" "sub %0,%1 \n" // 16 pixel loop. LABELALIGN "1: \n" "vmovdqu (%0),%%ymm2 \n" // 16 shorts "add $0x20,%0 \n" "vpunpckhwd %%ymm5,%%ymm2,%%ymm3 \n" // mutates "vpunpcklwd %%ymm5,%%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n" "vmulps %%ymm3,%%ymm4,%%ymm3 \n" "vmulps %%ymm2,%%ymm4,%%ymm2 \n" "vpsrld $0xd,%%ymm3,%%ymm3 \n" "vpsrld $0xd,%%ymm2,%%ymm2 \n" "vpackssdw %%ymm3, %%ymm2, %%ymm2 \n" // unmutates "vmovdqu %%ymm2,-0x20(%0,%1,1) \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 #if defined(__x86_64__) : "x"(scale) // %3 #else : "m"(scale) // %3 #endif : "memory", "cc", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_F16C void HalfFloatRow_F16C(const uint16_t* src, uint16_t* dst, float scale, int width) { asm volatile( "vbroadcastss %3, %%ymm4 \n" "sub %0,%1 \n" // 16 pixel loop. LABELALIGN "1: \n" "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints "vpmovzxwd 0x10(%0),%%ymm3 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n" "vmulps %%ymm2,%%ymm4,%%ymm2 \n" "vmulps %%ymm3,%%ymm4,%%ymm3 \n" "vcvtps2ph $3, %%ymm2, %%xmm2 \n" "vcvtps2ph $3, %%ymm3, %%xmm3 \n" "vmovdqu %%xmm2,0x00(%0,%1,1) \n" "vmovdqu %%xmm3,0x10(%0,%1,1) \n" "add $0x20,%0 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 #if defined(__x86_64__) : "x"(scale) // %3 #else : "m"(scale) // %3 #endif : "memory", "cc", "xmm2", "xmm3", "xmm4"); } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_HALFFLOATROW_F16C void HalfFloat1Row_F16C(const uint16_t* src, uint16_t* dst, float, int width) { asm volatile( "sub %0,%1 \n" // 16 pixel loop. LABELALIGN "1: \n" "vpmovzxwd (%0),%%ymm2 \n" // 16 shorts -> 16 ints "vpmovzxwd 0x10(%0),%%ymm3 \n" "vcvtdq2ps %%ymm2,%%ymm2 \n" "vcvtdq2ps %%ymm3,%%ymm3 \n" "vcvtps2ph $3, %%ymm2, %%xmm2 \n" "vcvtps2ph $3, %%ymm3, %%xmm3 \n" "vmovdqu %%xmm2,0x00(%0,%1,1) \n" "vmovdqu %%xmm3,0x10(%0,%1,1) \n" "add $0x20,%0 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "memory", "cc", "xmm2", "xmm3"); } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { uintptr_t pixel_temp; asm volatile( // 1 pixel loop. LABELALIGN "1: \n" "movzb (%0),%1 \n" "lea 0x4(%0),%0 \n" "movzb 0x00(%3,%1,4),%1 \n" "mov %b1,-0x4(%0) \n" "movzb -0x3(%0),%1 \n" "movzb 0x01(%3,%1,4),%1 \n" "mov %b1,-0x3(%0) \n" "movzb -0x2(%0),%1 \n" "movzb 0x02(%3,%1,4),%1 \n" "mov %b1,-0x2(%0) \n" "movzb -0x1(%0),%1 \n" "movzb 0x03(%3,%1,4),%1 \n" "mov %b1,-0x1(%0) \n" "dec %2 \n" "jg 1b \n" : "+r"(dst_argb), // %0 "=&d"(pixel_temp), // %1 "+r"(width) // %2 : "r"(table_argb) // %3 : "memory", "cc"); } #endif // HAS_ARGBCOLORTABLEROW_X86 #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { uintptr_t pixel_temp; asm volatile( // 1 pixel loop. LABELALIGN "1: \n" "movzb (%0),%1 \n" "lea 0x4(%0),%0 \n" "movzb 0x00(%3,%1,4),%1 \n" "mov %b1,-0x4(%0) \n" "movzb -0x3(%0),%1 \n" "movzb 0x01(%3,%1,4),%1 \n" "mov %b1,-0x3(%0) \n" "movzb -0x2(%0),%1 \n" "movzb 0x02(%3,%1,4),%1 \n" "mov %b1,-0x2(%0) \n" "dec %2 \n" "jg 1b \n" : "+r"(dst_argb), // %0 "=&d"(pixel_temp), // %1 "+r"(width) // %2 : "r"(table_argb) // %3 : "memory", "cc"); } #endif // HAS_RGBCOLORTABLEROW_X86 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, uint32_t lumacoeff) { uintptr_t pixel_temp; uintptr_t table_temp; asm volatile( "movd %6,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n" "pcmpeqb %%xmm4,%%xmm4 \n" "psllw $0x8,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" // 4 pixel loop. LABELALIGN "1: \n" "movdqu (%2),%%xmm0 \n" "pmaddubsw %%xmm3,%%xmm0 \n" "phaddw %%xmm0,%%xmm0 \n" "pand %%xmm4,%%xmm0 \n" "punpcklwd %%xmm5,%%xmm0 \n" "movd %%xmm0,%k1 \n" // 32 bit offset "add %5,%1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movzb (%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,(%3) \n" "movzb 0x1(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x1(%3) \n" "movzb 0x2(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x2(%3) \n" "movzb 0x3(%2),%0 \n" "mov %b0,0x3(%3) \n" "movd %%xmm0,%k1 \n" // 32 bit offset "add %5,%1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movzb 0x4(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x4(%3) \n" "movzb 0x5(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x5(%3) \n" "movzb 0x6(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x6(%3) \n" "movzb 0x7(%2),%0 \n" "mov %b0,0x7(%3) \n" "movd %%xmm0,%k1 \n" // 32 bit offset "add %5,%1 \n" "pshufd $0x39,%%xmm0,%%xmm0 \n" "movzb 0x8(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x8(%3) \n" "movzb 0x9(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0x9(%3) \n" "movzb 0xa(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0xa(%3) \n" "movzb 0xb(%2),%0 \n" "mov %b0,0xb(%3) \n" "movd %%xmm0,%k1 \n" // 32 bit offset "add %5,%1 \n" "movzb 0xc(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0xc(%3) \n" "movzb 0xd(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0xd(%3) \n" "movzb 0xe(%2),%0 \n" "movzb 0x00(%1,%0,1),%0 \n" "mov %b0,0xe(%3) \n" "movzb 0xf(%2),%0 \n" "mov %b0,0xf(%3) \n" "lea 0x10(%2),%2 \n" "lea 0x10(%3),%3 \n" "sub $0x4,%4 \n" "jg 1b \n" : "=&d"(pixel_temp), // %0 "=&a"(table_temp), // %1 "+r"(src_argb), // %2 "+r"(dst_argb), // %3 "+rm"(width) // %4 : "r"(luma), // %5 "rm"(lumacoeff) // %6 : "memory", "cc", "xmm0", "xmm3", "xmm4", "xmm5"); } #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_msa.cc000066400000000000000000004270141357355204000221630ustar00rootroot00000000000000/* * Copyright 2016 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "libyuv/row.h" // This module is for GCC MSA #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include "libyuv/macros_msa.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define ALPHA_VAL (-1) // Fill YUV -> RGB conversion constants into vectors #define YUVTORGB_SETUP(yuvconst, ub, vr, ug, vg, bb, bg, br, yg) \ { \ ub = __msa_fill_w(yuvconst->kUVToB[0]); \ vr = __msa_fill_w(yuvconst->kUVToR[1]); \ ug = __msa_fill_w(yuvconst->kUVToG[0]); \ vg = __msa_fill_w(yuvconst->kUVToG[1]); \ bb = __msa_fill_w(yuvconst->kUVBiasB[0]); \ bg = __msa_fill_w(yuvconst->kUVBiasG[0]); \ br = __msa_fill_w(yuvconst->kUVBiasR[0]); \ yg = __msa_fill_w(yuvconst->kYToRgb[0]); \ } // Load YUV 422 pixel data #define READYUV422(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ { \ uint64_t y_m; \ uint32_t u_m, v_m; \ v4i32 zero_m = {0}; \ y_m = LD(psrc_y); \ u_m = LW(psrc_u); \ v_m = LW(psrc_v); \ out_y = (v16u8)__msa_insert_d((v2i64)zero_m, 0, (int64_t)y_m); \ out_u = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)u_m); \ out_v = (v16u8)__msa_insert_w(zero_m, 0, (int32_t)v_m); \ } // Clip input vector elements between 0 to 255 #define CLIP_0TO255(in0, in1, in2, in3, in4, in5) \ { \ v4i32 max_m = __msa_ldi_w(0xFF); \ \ in0 = __msa_maxi_s_w(in0, 0); \ in1 = __msa_maxi_s_w(in1, 0); \ in2 = __msa_maxi_s_w(in2, 0); \ in3 = __msa_maxi_s_w(in3, 0); \ in4 = __msa_maxi_s_w(in4, 0); \ in5 = __msa_maxi_s_w(in5, 0); \ in0 = __msa_min_s_w(max_m, in0); \ in1 = __msa_min_s_w(max_m, in1); \ in2 = __msa_min_s_w(max_m, in2); \ in3 = __msa_min_s_w(max_m, in3); \ in4 = __msa_min_s_w(max_m, in4); \ in5 = __msa_min_s_w(max_m, in5); \ } // Convert 8 pixels of YUV 420 to RGB. #define YUVTORGB(in_y, in_uv, ubvr, ugvg, bb, bg, br, yg, out_b, out_g, out_r) \ { \ v8i16 vec0_m, vec1_m; \ v4i32 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m; \ v4i32 reg5_m, reg6_m, reg7_m; \ v16i8 zero_m = {0}; \ \ vec0_m = (v8i16)__msa_ilvr_b((v16i8)in_y, (v16i8)in_y); \ vec1_m = (v8i16)__msa_ilvr_b((v16i8)zero_m, (v16i8)in_uv); \ reg0_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec0_m); \ reg1_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec0_m); \ reg2_m = (v4i32)__msa_ilvr_h((v8i16)zero_m, (v8i16)vec1_m); \ reg3_m = (v4i32)__msa_ilvl_h((v8i16)zero_m, (v8i16)vec1_m); \ reg0_m *= yg; \ reg1_m *= yg; \ reg2_m *= ubvr; \ reg3_m *= ubvr; \ reg0_m = __msa_srai_w(reg0_m, 16); \ reg1_m = __msa_srai_w(reg1_m, 16); \ reg4_m = __msa_dotp_s_w((v8i16)vec1_m, (v8i16)ugvg); \ reg5_m = __msa_ilvev_w(reg2_m, reg2_m); \ reg6_m = __msa_ilvev_w(reg3_m, reg3_m); \ reg7_m = __msa_ilvr_w(reg4_m, reg4_m); \ reg2_m = __msa_ilvod_w(reg2_m, reg2_m); \ reg3_m = __msa_ilvod_w(reg3_m, reg3_m); \ reg4_m = __msa_ilvl_w(reg4_m, reg4_m); \ reg5_m = reg0_m - reg5_m; \ reg6_m = reg1_m - reg6_m; \ reg2_m = reg0_m - reg2_m; \ reg3_m = reg1_m - reg3_m; \ reg7_m = reg0_m - reg7_m; \ reg4_m = reg1_m - reg4_m; \ reg5_m += bb; \ reg6_m += bb; \ reg7_m += bg; \ reg4_m += bg; \ reg2_m += br; \ reg3_m += br; \ reg5_m = __msa_srai_w(reg5_m, 6); \ reg6_m = __msa_srai_w(reg6_m, 6); \ reg7_m = __msa_srai_w(reg7_m, 6); \ reg4_m = __msa_srai_w(reg4_m, 6); \ reg2_m = __msa_srai_w(reg2_m, 6); \ reg3_m = __msa_srai_w(reg3_m, 6); \ CLIP_0TO255(reg5_m, reg6_m, reg7_m, reg4_m, reg2_m, reg3_m); \ out_b = __msa_pckev_h((v8i16)reg6_m, (v8i16)reg5_m); \ out_g = __msa_pckev_h((v8i16)reg4_m, (v8i16)reg7_m); \ out_r = __msa_pckev_h((v8i16)reg3_m, (v8i16)reg2_m); \ } // Pack and Store 8 ARGB values. #define STOREARGB(in0, in1, in2, in3, pdst_argb) \ { \ v8i16 vec0_m, vec1_m; \ v16u8 dst0_m, dst1_m; \ vec0_m = (v8i16)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ vec1_m = (v8i16)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ dst0_m = (v16u8)__msa_ilvr_h(vec1_m, vec0_m); \ dst1_m = (v16u8)__msa_ilvl_h(vec1_m, vec0_m); \ ST_UB2(dst0_m, dst1_m, pdst_argb, 16); \ } // Takes ARGB input and calculates Y. #define ARGBTOY(argb0, argb1, argb2, argb3, const0, const1, const2, shift, \ y_out) \ { \ v16u8 vec0_m, vec1_m, vec2_m, vec3_m; \ v8u16 reg0_m, reg1_m; \ \ vec0_m = (v16u8)__msa_pckev_h((v8i16)argb1, (v8i16)argb0); \ vec1_m = (v16u8)__msa_pckev_h((v8i16)argb3, (v8i16)argb2); \ vec2_m = (v16u8)__msa_pckod_h((v8i16)argb1, (v8i16)argb0); \ vec3_m = (v16u8)__msa_pckod_h((v8i16)argb3, (v8i16)argb2); \ reg0_m = __msa_dotp_u_h(vec0_m, const0); \ reg1_m = __msa_dotp_u_h(vec1_m, const0); \ reg0_m = __msa_dpadd_u_h(reg0_m, vec2_m, const1); \ reg1_m = __msa_dpadd_u_h(reg1_m, vec3_m, const1); \ reg0_m += const2; \ reg1_m += const2; \ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, shift); \ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, shift); \ y_out = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ } // Loads current and next row of ARGB input and averages it to calculate U and V #define READ_ARGB(s_ptr, t_ptr, argb0, argb1, argb2, argb3) \ { \ v16u8 src0_m, src1_m, src2_m, src3_m, src4_m, src5_m, src6_m, src7_m; \ v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ v16u8 vec8_m, vec9_m; \ v8u16 reg0_m, reg1_m, reg2_m, reg3_m, reg4_m, reg5_m, reg6_m, reg7_m; \ v8u16 reg8_m, reg9_m; \ \ src0_m = (v16u8)__msa_ld_b((v16i8*)s, 0); \ src1_m = (v16u8)__msa_ld_b((v16i8*)s, 16); \ src2_m = (v16u8)__msa_ld_b((v16i8*)s, 32); \ src3_m = (v16u8)__msa_ld_b((v16i8*)s, 48); \ src4_m = (v16u8)__msa_ld_b((v16i8*)t, 0); \ src5_m = (v16u8)__msa_ld_b((v16i8*)t, 16); \ src6_m = (v16u8)__msa_ld_b((v16i8*)t, 32); \ src7_m = (v16u8)__msa_ld_b((v16i8*)t, 48); \ vec0_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ vec1_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ vec2_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ vec3_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ vec4_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ vec5_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ vec6_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ vec7_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ reg0_m = __msa_hadd_u_h(vec0_m, vec0_m); \ reg1_m = __msa_hadd_u_h(vec1_m, vec1_m); \ reg2_m = __msa_hadd_u_h(vec2_m, vec2_m); \ reg3_m = __msa_hadd_u_h(vec3_m, vec3_m); \ reg4_m = __msa_hadd_u_h(vec4_m, vec4_m); \ reg5_m = __msa_hadd_u_h(vec5_m, vec5_m); \ reg6_m = __msa_hadd_u_h(vec6_m, vec6_m); \ reg7_m = __msa_hadd_u_h(vec7_m, vec7_m); \ reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ argb0 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ argb1 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ src0_m = (v16u8)__msa_ld_b((v16i8*)s, 64); \ src1_m = (v16u8)__msa_ld_b((v16i8*)s, 80); \ src2_m = (v16u8)__msa_ld_b((v16i8*)s, 96); \ src3_m = (v16u8)__msa_ld_b((v16i8*)s, 112); \ src4_m = (v16u8)__msa_ld_b((v16i8*)t, 64); \ src5_m = (v16u8)__msa_ld_b((v16i8*)t, 80); \ src6_m = (v16u8)__msa_ld_b((v16i8*)t, 96); \ src7_m = (v16u8)__msa_ld_b((v16i8*)t, 112); \ vec2_m = (v16u8)__msa_ilvr_b((v16i8)src0_m, (v16i8)src4_m); \ vec3_m = (v16u8)__msa_ilvr_b((v16i8)src1_m, (v16i8)src5_m); \ vec4_m = (v16u8)__msa_ilvr_b((v16i8)src2_m, (v16i8)src6_m); \ vec5_m = (v16u8)__msa_ilvr_b((v16i8)src3_m, (v16i8)src7_m); \ vec6_m = (v16u8)__msa_ilvl_b((v16i8)src0_m, (v16i8)src4_m); \ vec7_m = (v16u8)__msa_ilvl_b((v16i8)src1_m, (v16i8)src5_m); \ vec8_m = (v16u8)__msa_ilvl_b((v16i8)src2_m, (v16i8)src6_m); \ vec9_m = (v16u8)__msa_ilvl_b((v16i8)src3_m, (v16i8)src7_m); \ reg0_m = __msa_hadd_u_h(vec2_m, vec2_m); \ reg1_m = __msa_hadd_u_h(vec3_m, vec3_m); \ reg2_m = __msa_hadd_u_h(vec4_m, vec4_m); \ reg3_m = __msa_hadd_u_h(vec5_m, vec5_m); \ reg4_m = __msa_hadd_u_h(vec6_m, vec6_m); \ reg5_m = __msa_hadd_u_h(vec7_m, vec7_m); \ reg6_m = __msa_hadd_u_h(vec8_m, vec8_m); \ reg7_m = __msa_hadd_u_h(vec9_m, vec9_m); \ reg8_m = (v8u16)__msa_pckev_d((v2i64)reg4_m, (v2i64)reg0_m); \ reg9_m = (v8u16)__msa_pckev_d((v2i64)reg5_m, (v2i64)reg1_m); \ reg8_m += (v8u16)__msa_pckod_d((v2i64)reg4_m, (v2i64)reg0_m); \ reg9_m += (v8u16)__msa_pckod_d((v2i64)reg5_m, (v2i64)reg1_m); \ reg0_m = (v8u16)__msa_pckev_d((v2i64)reg6_m, (v2i64)reg2_m); \ reg1_m = (v8u16)__msa_pckev_d((v2i64)reg7_m, (v2i64)reg3_m); \ reg0_m += (v8u16)__msa_pckod_d((v2i64)reg6_m, (v2i64)reg2_m); \ reg1_m += (v8u16)__msa_pckod_d((v2i64)reg7_m, (v2i64)reg3_m); \ reg8_m = (v8u16)__msa_srai_h((v8i16)reg8_m, 2); \ reg9_m = (v8u16)__msa_srai_h((v8i16)reg9_m, 2); \ reg0_m = (v8u16)__msa_srai_h((v8i16)reg0_m, 2); \ reg1_m = (v8u16)__msa_srai_h((v8i16)reg1_m, 2); \ argb2 = (v16u8)__msa_pckev_b((v16i8)reg9_m, (v16i8)reg8_m); \ argb3 = (v16u8)__msa_pckev_b((v16i8)reg1_m, (v16i8)reg0_m); \ } // Takes ARGB input and calculates U and V. #define ARGBTOUV(argb0, argb1, argb2, argb3, const0, const1, const2, const3, \ shf0, shf1, shf2, shf3, v_out, u_out) \ { \ v16u8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ v8u16 reg0_m, reg1_m, reg2_m, reg3_m; \ \ vec0_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb1, (v16i8)argb0); \ vec1_m = (v16u8)__msa_vshf_b(shf0, (v16i8)argb3, (v16i8)argb2); \ vec2_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb1, (v16i8)argb0); \ vec3_m = (v16u8)__msa_vshf_b(shf1, (v16i8)argb3, (v16i8)argb2); \ vec4_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb1, (v16i8)argb0); \ vec5_m = (v16u8)__msa_vshf_b(shf2, (v16i8)argb3, (v16i8)argb2); \ vec6_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb1, (v16i8)argb0); \ vec7_m = (v16u8)__msa_vshf_b(shf3, (v16i8)argb3, (v16i8)argb2); \ reg0_m = __msa_dotp_u_h(vec0_m, const1); \ reg1_m = __msa_dotp_u_h(vec1_m, const1); \ reg2_m = __msa_dotp_u_h(vec4_m, const1); \ reg3_m = __msa_dotp_u_h(vec5_m, const1); \ reg0_m += const3; \ reg1_m += const3; \ reg2_m += const3; \ reg3_m += const3; \ reg0_m -= __msa_dotp_u_h(vec2_m, const0); \ reg1_m -= __msa_dotp_u_h(vec3_m, const0); \ reg2_m -= __msa_dotp_u_h(vec6_m, const2); \ reg3_m -= __msa_dotp_u_h(vec7_m, const2); \ v_out = (v16u8)__msa_pckod_b((v16i8)reg1_m, (v16i8)reg0_m); \ u_out = (v16u8)__msa_pckod_b((v16i8)reg3_m, (v16i8)reg2_m); \ } // Load I444 pixel data #define READI444(psrc_y, psrc_u, psrc_v, out_y, out_u, out_v) \ { \ uint64_t y_m, u_m, v_m; \ v2i64 zero_m = {0}; \ y_m = LD(psrc_y); \ u_m = LD(psrc_u); \ v_m = LD(psrc_v); \ out_y = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)y_m); \ out_u = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)u_m); \ out_v = (v16u8)__msa_insert_d(zero_m, 0, (int64_t)v_m); \ } void MirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; v16i8 shuffler = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0}; src += width - 64; for (x = 0; x < width; x += 64) { LD_UB4(src, 16, src3, src2, src1, src0); VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); ST_UB4(dst0, dst1, dst2, dst3, dst, 16); dst += 64; src -= 64; } } void ARGBMirrorRow_MSA(const uint8_t* src, uint8_t* dst, int width) { int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; v16i8 shuffler = {12, 13, 14, 15, 8, 9, 10, 11, 4, 5, 6, 7, 0, 1, 2, 3}; src += width * 4 - 64; for (x = 0; x < width; x += 16) { LD_UB4(src, 16, src3, src2, src1, src0); VSHF_B2_UB(src3, src3, src2, src2, shuffler, shuffler, dst3, dst2); VSHF_B2_UB(src1, src1, src0, src0, shuffler, shuffler, dst1, dst0); ST_UB4(dst0, dst1, dst2, dst3, dst, 16); dst += 64; src -= 64; } } void I422ToYUY2Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { int x; v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; v16u8 dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3; for (x = 0; x < width; x += 32) { src_u0 = LD_UB(src_u); src_v0 = LD_UB(src_v); LD_UB2(src_y, 16, src_y0, src_y1); ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); ILVRL_B2_UB(vec_uv0, src_y0, dst_yuy2_0, dst_yuy2_1); ILVRL_B2_UB(vec_uv1, src_y1, dst_yuy2_2, dst_yuy2_3); ST_UB4(dst_yuy2_0, dst_yuy2_1, dst_yuy2_2, dst_yuy2_3, dst_yuy2, 16); src_u += 16; src_v += 16; src_y += 32; dst_yuy2 += 64; } } void I422ToUYVYRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { int x; v16u8 src_u0, src_v0, src_y0, src_y1, vec_uv0, vec_uv1; v16u8 dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3; for (x = 0; x < width; x += 32) { src_u0 = LD_UB(src_u); src_v0 = LD_UB(src_v); LD_UB2(src_y, 16, src_y0, src_y1); ILVRL_B2_UB(src_v0, src_u0, vec_uv0, vec_uv1); ILVRL_B2_UB(src_y0, vec_uv0, dst_uyvy0, dst_uyvy1); ILVRL_B2_UB(src_y1, vec_uv1, dst_uyvy2, dst_uyvy3); ST_UB4(dst_uyvy0, dst_uyvy1, dst_uyvy2, dst_uyvy3, dst_uyvy, 16); src_u += 16; src_v += 16; src_y += 32; dst_uyvy += 64; } } void I422ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_y += 8; src_u += 4; src_v += 4; dst_argb += 32; } } void I422ToRGBARow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); STOREARGB(alpha, vec0, vec1, vec2, dst_argb); src_y += 8; src_u += 4; src_v += 4; dst_argb += 32; } } void I422AlphaToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; int64_t data_a; v16u8 src0, src1, src2, src3; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v4i32 zero = {0}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { data_a = LD(src_a); READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); src3 = (v16u8)__msa_insert_d((v2i64)zero, 0, data_a); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); src3 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src3); STOREARGB(vec0, vec1, vec2, src3, dst_argb); src_y += 8; src_u += 4; src_v += 4; src_a += 8; dst_argb += 32; } } void I422ToRGB24Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int32_t width) { int x; int64_t data_u, data_v; v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; v8i16 vec0, vec1, vec2, vec3, vec4, vec5; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 reg0, reg1, reg2, reg3; v2i64 zero = {0}; v16i8 shuffler0 = {0, 1, 16, 2, 3, 17, 4, 5, 18, 6, 7, 19, 8, 9, 20, 10}; v16i8 shuffler1 = {0, 21, 1, 2, 22, 3, 4, 23, 5, 6, 24, 7, 8, 25, 9, 10}; v16i8 shuffler2 = {26, 6, 7, 27, 8, 9, 28, 10, 11, 29, 12, 13, 30, 14, 15, 31}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((v16u8*)src_y, 0); data_u = LD(src_u); data_v = LD(src_v); src1 = (v16u8)__msa_insert_d(zero, 0, data_u); src2 = (v16u8)__msa_insert_d(zero, 0, data_v); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); src3 = (v16u8)__msa_sldi_b((v16i8)src0, (v16i8)src0, 8); src4 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src1, 8); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); YUVTORGB(src3, src4, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec3, vec4, vec5); reg0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); reg2 = (v16u8)__msa_ilvev_b((v16i8)vec4, (v16i8)vec3); reg3 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec2); reg1 = (v16u8)__msa_sldi_b((v16i8)reg2, (v16i8)reg0, 11); dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)reg3, (v16i8)reg0); dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)reg3, (v16i8)reg1); dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)reg3, (v16i8)reg2); ST_UB2(dst0, dst1, dst_argb, 16); ST_UB(dst2, (dst_argb + 32)); src_y += 16; src_u += 8; src_v += 8; dst_argb += 48; } } // TODO(fbarchard): Consider AND instead of shift to isolate 5 upper bits of R. void I422ToRGB565Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2, dst0; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec2, vec1); vec0 = __msa_srai_h(vec0, 3); vec1 = __msa_srai_h(vec1, 3); vec2 = __msa_srai_h(vec2, 2); vec1 = __msa_slli_h(vec1, 11); vec2 = __msa_slli_h(vec2, 5); vec0 |= vec1; dst0 = (v16u8)(vec2 | vec0); ST_UB(dst0, dst_rgb565); src_y += 8; src_u += 4; src_v += 4; dst_rgb565 += 16; } } // TODO(fbarchard): Consider AND instead of shift to isolate 4 upper bits of G. void I422ToARGB4444Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2, dst0; v8i16 vec0, vec1, vec2; v8u16 reg0, reg1, reg2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v8u16 const_0xF000 = (v8u16)__msa_fill_h(0xF000); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); reg0 = (v8u16)__msa_srai_h(vec0, 4); reg1 = (v8u16)__msa_srai_h(vec1, 4); reg2 = (v8u16)__msa_srai_h(vec2, 4); reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 4); reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 8); reg1 |= const_0xF000; reg0 |= reg2; dst0 = (v16u8)(reg1 | reg0); ST_UB(dst0, dst_argb4444); src_y += 8; src_u += 4; src_v += 4; dst_argb4444 += 16; } } void I422ToARGB1555Row_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2, dst0; v8i16 vec0, vec1, vec2; v8u16 reg0, reg1, reg2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v8u16 const_0x8000 = (v8u16)__msa_fill_h(0x8000); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { READYUV422(src_y, src_u, src_v, src0, src1, src2); src1 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); reg0 = (v8u16)__msa_srai_h(vec0, 3); reg1 = (v8u16)__msa_srai_h(vec1, 3); reg2 = (v8u16)__msa_srai_h(vec2, 3); reg1 = (v8u16)__msa_slli_h((v8i16)reg1, 5); reg2 = (v8u16)__msa_slli_h((v8i16)reg2, 10); reg1 |= const_0x8000; reg0 |= reg2; dst0 = (v16u8)(reg1 | reg0); ST_UB(dst0, dst_argb1555); src_y += 8; src_u += 4; src_v += 4; dst_argb1555 += 16; } } void YUY2ToYRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_yuy2, 16, src0, src1, src2, src3); dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_y, 16); src_yuy2 += 64; dst_y += 32; } } void YUY2ToUVRow_MSA(const uint8_t* src_yuy2, int src_stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_yuy2_next = src_yuy2 + src_stride_yuy2; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_yuy2, 16, src0, src1, src2, src3); LD_UB4(src_yuy2_next, 16, src4, src5, src6, src7); src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); src2 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); src3 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); vec0 = __msa_aver_u_b(src0, src2); vec1 = __msa_aver_u_b(src1, src3); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_yuy2 += 64; src_yuy2_next += 64; dst_u += 16; dst_v += 16; } } void YUY2ToUV422Row_MSA(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_yuy2, 16, src0, src1, src2, src3); src0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); src1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_yuy2 += 64; dst_u += 16; dst_v += 16; } } void UYVYToYRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_uyvy, 16, src0, src1, src2, src3); dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_y, 16); src_uyvy += 64; dst_y += 32; } } void UYVYToUVRow_MSA(const uint8_t* src_uyvy, int src_stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_uyvy_next = src_uyvy + src_stride_uyvy; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_uyvy, 16, src0, src1, src2, src3); LD_UB4(src_uyvy_next, 16, src4, src5, src6, src7); src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); src2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); src3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); vec0 = __msa_aver_u_b(src0, src2); vec1 = __msa_aver_u_b(src1, src3); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_uyvy += 64; src_uyvy_next += 64; dst_u += 16; dst_v += 16; } } void UYVYToUV422Row_MSA(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 32) { LD_UB4(src_uyvy, 16, src0, src1, src2, src3); src0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); src1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_uyvy += 64; dst_u += 16; dst_v += 16; } } void ARGBToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v16i8 zero = {0}; v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); vec2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); reg0 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec0); reg1 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec1); reg2 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec2); reg3 = (v8u16)__msa_ilvev_b(zero, (v16i8)vec3); reg4 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec0); reg5 = (v8u16)__msa_ilvod_b(zero, (v16i8)vec1); reg0 *= const_0x19; reg1 *= const_0x19; reg2 *= const_0x81; reg3 *= const_0x81; reg4 *= const_0x42; reg5 *= const_0x42; reg0 += reg2; reg1 += reg3; reg0 += reg4; reg1 += reg5; reg0 += const_0x1080; reg1 += const_0x1080; reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); ST_UB(dst0, dst_y); src_argb0 += 64; dst_y += 16; } } void ARGBToUVRow_MSA(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* src_argb0_next = src_argb0 + src_stride_argb; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; v16u8 dst0, dst1; v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 48); src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 64); src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 80); src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 96); src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0, 112); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); reg0 = __msa_hadd_u_h(vec8, vec8); reg1 = __msa_hadd_u_h(vec9, vec9); reg2 = __msa_hadd_u_h(vec4, vec4); reg3 = __msa_hadd_u_h(vec5, vec5); reg4 = __msa_hadd_u_h(vec0, vec0); reg5 = __msa_hadd_u_h(vec1, vec1); src0 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 0); src1 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 16); src2 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 32); src3 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 48); src4 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 64); src5 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 80); src6 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 96); src7 = (v16u8)__msa_ld_b((v16u8*)src_argb0_next, 112); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); vec2 = (v16u8)__msa_pckev_b((v16i8)src5, (v16i8)src4); vec3 = (v16u8)__msa_pckev_b((v16i8)src7, (v16i8)src6); vec4 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); vec5 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); vec6 = (v16u8)__msa_pckod_b((v16i8)src5, (v16i8)src4); vec7 = (v16u8)__msa_pckod_b((v16i8)src7, (v16i8)src6); vec8 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); vec9 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); vec4 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); vec5 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); vec0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); vec1 = (v16u8)__msa_pckod_b((v16i8)vec3, (v16i8)vec2); reg0 += __msa_hadd_u_h(vec8, vec8); reg1 += __msa_hadd_u_h(vec9, vec9); reg2 += __msa_hadd_u_h(vec4, vec4); reg3 += __msa_hadd_u_h(vec5, vec5); reg4 += __msa_hadd_u_h(vec0, vec0); reg5 += __msa_hadd_u_h(vec1, vec1); reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 2); reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 2); reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 2); reg3 = (v8u16)__msa_srai_h((v8i16)reg3, 2); reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 2); reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 2); reg6 = reg0 * const_0x70; reg7 = reg1 * const_0x70; reg8 = reg2 * const_0x4A; reg9 = reg3 * const_0x4A; reg6 += const_0x8080; reg7 += const_0x8080; reg8 += reg4 * const_0x26; reg9 += reg5 * const_0x26; reg0 *= const_0x12; reg1 *= const_0x12; reg2 *= const_0x5E; reg3 *= const_0x5E; reg4 *= const_0x70; reg5 *= const_0x70; reg2 += reg0; reg3 += reg1; reg4 += const_0x8080; reg5 += const_0x8080; reg6 -= reg8; reg7 -= reg9; reg4 -= reg2; reg5 -= reg3; reg6 = (v8u16)__msa_srai_h((v8i16)reg6, 8); reg7 = (v8u16)__msa_srai_h((v8i16)reg7, 8); reg4 = (v8u16)__msa_srai_h((v8i16)reg4, 8); reg5 = (v8u16)__msa_srai_h((v8i16)reg5, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg7, (v16i8)reg6); dst1 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_argb0 += 128; src_argb0_next += 128; dst_u += 16; dst_v += 16; } } void ARGBToRGB24Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2; v16i8 shuffler0 = {0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20}; v16i8 shuffler1 = {5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25}; v16i8 shuffler2 = {10, 12, 13, 14, 16, 17, 18, 20, 21, 22, 24, 25, 26, 28, 29, 30}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_rgb, 16); ST_UB(dst2, (dst_rgb + 32)); src_argb += 64; dst_rgb += 48; } } void ARGBToRAWRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2; v16i8 shuffler0 = {2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22}; v16i8 shuffler1 = {5, 4, 10, 9, 8, 14, 13, 12, 18, 17, 16, 22, 21, 20, 26, 25}; v16i8 shuffler2 = {8, 14, 13, 12, 18, 17, 16, 22, 21, 20, 26, 25, 24, 30, 29, 28}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src2, (v16i8)src1); dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_rgb, 16); ST_UB(dst2, (dst_rgb + 32)); src_argb += 64; dst_rgb += 48; } } void ARGBToRGB565Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, dst0; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v16i8 zero = {0}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); vec1 = (v16u8)__msa_slli_b((v16i8)src0, 3); vec2 = (v16u8)__msa_srai_b((v16i8)src0, 5); vec4 = (v16u8)__msa_srai_b((v16i8)src1, 3); vec5 = (v16u8)__msa_slli_b((v16i8)src1, 3); vec6 = (v16u8)__msa_srai_b((v16i8)src1, 5); vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); vec5 = (v16u8)__msa_sldi_b(zero, (v16i8)vec5, 1); vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 2); vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 2); vec0 = __msa_binsli_b(vec0, vec1, 2); vec1 = __msa_binsli_b(vec2, vec3, 4); vec4 = __msa_binsli_b(vec4, vec5, 2); vec5 = __msa_binsli_b(vec6, vec7, 4); vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); vec4 = (v16u8)__msa_ilvev_b((v16i8)vec5, (v16i8)vec4); dst0 = (v16u8)__msa_pckev_h((v8i16)vec4, (v8i16)vec0); ST_UB(dst0, dst_rgb); src_argb += 32; dst_rgb += 16; } } void ARGBToARGB1555Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1, dst0; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v16i8 zero = {0}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec0 = (v16u8)__msa_srai_b((v16i8)src0, 3); vec1 = (v16u8)__msa_slli_b((v16i8)src0, 2); vec2 = (v16u8)__msa_srai_b((v16i8)vec0, 3); vec1 = (v16u8)__msa_sldi_b(zero, (v16i8)vec1, 1); vec2 = (v16u8)__msa_sldi_b(zero, (v16i8)vec2, 1); vec3 = (v16u8)__msa_srai_b((v16i8)src0, 1); vec5 = (v16u8)__msa_srai_b((v16i8)src1, 3); vec6 = (v16u8)__msa_slli_b((v16i8)src1, 2); vec7 = (v16u8)__msa_srai_b((v16i8)vec5, 3); vec6 = (v16u8)__msa_sldi_b(zero, (v16i8)vec6, 1); vec7 = (v16u8)__msa_sldi_b(zero, (v16i8)vec7, 1); vec8 = (v16u8)__msa_srai_b((v16i8)src1, 1); vec3 = (v16u8)__msa_sldi_b(zero, (v16i8)vec3, 2); vec8 = (v16u8)__msa_sldi_b(zero, (v16i8)vec8, 2); vec4 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 3); vec9 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 3); vec0 = __msa_binsli_b(vec0, vec1, 2); vec5 = __msa_binsli_b(vec5, vec6, 2); vec1 = __msa_binsli_b(vec2, vec3, 5); vec6 = __msa_binsli_b(vec7, vec8, 5); vec1 = __msa_binsli_b(vec1, vec4, 0); vec6 = __msa_binsli_b(vec6, vec9, 0); vec0 = (v16u8)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); vec1 = (v16u8)__msa_ilvev_b((v16i8)vec6, (v16i8)vec5); dst0 = (v16u8)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); ST_UB(dst0, dst_rgb); src_argb += 32; dst_rgb += 16; } } void ARGBToARGB4444Row_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { int x; v16u8 src0, src1; v16u8 vec0, vec1; v16u8 dst0; v16i8 zero = {0}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec0 = (v16u8)__msa_srai_b((v16i8)src0, 4); vec1 = (v16u8)__msa_srai_b((v16i8)src1, 4); src0 = (v16u8)__msa_sldi_b(zero, (v16i8)src0, 1); src1 = (v16u8)__msa_sldi_b(zero, (v16i8)src1, 1); vec0 = __msa_binsli_b(vec0, src0, 3); vec1 = __msa_binsli_b(vec1, src1, 3); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_rgb); src_argb += 32; dst_rgb += 16; } } void ARGBToUV444Row_MSA(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int32_t width) { int32_t x; v16u8 src0, src1, src2, src3, reg0, reg1, reg2, reg3, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 vec8, vec9, vec10, vec11; v8u16 const_112 = (v8u16)__msa_ldi_h(112); v8u16 const_74 = (v8u16)__msa_ldi_h(74); v8u16 const_38 = (v8u16)__msa_ldi_h(38); v8u16 const_94 = (v8u16)__msa_ldi_h(94); v8u16 const_18 = (v8u16)__msa_ldi_h(18); v8u16 const_32896 = (v8u16)__msa_fill_h(32896); v16i8 zero = {0}; for (x = width; x > 0; x -= 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); reg0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); reg1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); reg2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); reg3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); src0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); src1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); src2 = (v16u8)__msa_pckod_b((v16i8)reg1, (v16i8)reg0); vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); vec10 = vec0 * const_18; vec11 = vec1 * const_18; vec8 = vec2 * const_94; vec9 = vec3 * const_94; vec6 = vec4 * const_112; vec7 = vec5 * const_112; vec0 *= const_112; vec1 *= const_112; vec2 *= const_74; vec3 *= const_74; vec4 *= const_38; vec5 *= const_38; vec8 += vec10; vec9 += vec11; vec6 += const_32896; vec7 += const_32896; vec0 += const_32896; vec1 += const_32896; vec2 += vec4; vec3 += vec5; vec0 -= vec2; vec1 -= vec3; vec6 -= vec8; vec7 -= vec9; vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); vec6 = (v8u16)__msa_srai_h((v8i16)vec6, 8); vec7 = (v8u16)__msa_srai_h((v8i16)vec7, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); src_argb += 64; dst_u += 16; dst_v += 16; } } void ARGBMultiplyRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, dst0; v8u16 vec0, vec1, vec2, vec3; v4u32 reg0, reg1, reg2, reg3; v8i16 zero = {0}; for (x = 0; x < width; x += 4) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 16); reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 16); reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 16); reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 16); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_argb); src_argb0 += 16; src_argb1 += 16; dst_argb += 16; } } void ARGBAddRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); dst0 = __msa_adds_u_b(src0, src2); dst1 = __msa_adds_u_b(src1, src3); ST_UB2(dst0, dst1, dst_argb, 16); src_argb0 += 32; src_argb1 += 32; dst_argb += 32; } } void ARGBSubtractRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); dst0 = __msa_subs_u_b(src0, src2); dst1 = __msa_subs_u_b(src1, src3); ST_UB2(dst0, dst1, dst_argb, 16); src_argb0 += 32; src_argb1 += 32; dst_argb += 32; } } void ARGBAttenuateRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v4u32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v8i16 zero = {0}; v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src1, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src1, (v16i8)src1); vec4 = (v8u16)__msa_fill_h(vec0[3]); vec5 = (v8u16)__msa_fill_h(vec0[7]); vec6 = (v8u16)__msa_fill_h(vec1[3]); vec7 = (v8u16)__msa_fill_h(vec1[7]); vec4 = (v8u16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); vec5 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); vec6 = (v8u16)__msa_fill_h(vec2[3]); vec7 = (v8u16)__msa_fill_h(vec2[7]); vec8 = (v8u16)__msa_fill_h(vec3[3]); vec9 = (v8u16)__msa_fill_h(vec3[7]); vec6 = (v8u16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); vec7 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec4); reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec4); reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec5); reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec5); reg4 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec6); reg5 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec6); reg6 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec7); reg7 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec7); reg0 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); reg1 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); reg2 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); reg3 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); reg4 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec2); reg5 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec2); reg6 *= (v4u32)__msa_ilvr_h(zero, (v8i16)vec3); reg7 *= (v4u32)__msa_ilvl_h(zero, (v8i16)vec3); reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); reg4 = (v4u32)__msa_srai_w((v4i32)reg4, 24); reg5 = (v4u32)__msa_srai_w((v4i32)reg5, 24); reg6 = (v4u32)__msa_srai_w((v4i32)reg6, 24); reg7 = (v4u32)__msa_srai_w((v4i32)reg7, 24); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); vec3 = (v8u16)__msa_pckev_h((v8i16)reg7, (v8i16)reg6); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); dst0 = __msa_bmnz_v(dst0, src0, mask); dst1 = __msa_bmnz_v(dst1, src1, mask); ST_UB2(dst0, dst1, dst_argb, 16); src_argb += 32; dst_argb += 32; } } void ARGBToRGB565DitherRow_MSA(const uint8_t* src_argb, uint8_t* dst_rgb, uint32_t dither4, int width) { int x; v16u8 src0, src1, dst0, vec0, vec1; v8i16 vec_d0; v8i16 reg0, reg1, reg2; v16i8 zero = {0}; v8i16 max = __msa_ldi_h(0xFF); vec_d0 = (v8i16)__msa_fill_w(dither4); vec_d0 = (v8i16)__msa_ilvr_b(zero, (v16i8)vec_d0); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); reg0 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec0); reg1 = (v8i16)__msa_ilvev_b(zero, (v16i8)vec1); reg2 = (v8i16)__msa_ilvod_b(zero, (v16i8)vec0); reg0 += vec_d0; reg1 += vec_d0; reg2 += vec_d0; reg0 = __msa_maxi_s_h((v8i16)reg0, 0); reg1 = __msa_maxi_s_h((v8i16)reg1, 0); reg2 = __msa_maxi_s_h((v8i16)reg2, 0); reg0 = __msa_min_s_h((v8i16)max, (v8i16)reg0); reg1 = __msa_min_s_h((v8i16)max, (v8i16)reg1); reg2 = __msa_min_s_h((v8i16)max, (v8i16)reg2); reg0 = __msa_srai_h(reg0, 3); reg2 = __msa_srai_h(reg2, 3); reg1 = __msa_srai_h(reg1, 2); reg2 = __msa_slli_h(reg2, 11); reg1 = __msa_slli_h(reg1, 5); reg0 |= reg1; dst0 = (v16u8)(reg0 | reg2); ST_UB(dst0, dst_rgb); src_argb += 32; dst_rgb += 16; } } void ARGBShuffleRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { int x; v16u8 src0, src1, dst0, dst1; v16i8 vec0; v16i8 shuffler_vec = {0, 0, 0, 0, 4, 4, 4, 4, 8, 8, 8, 8, 12, 12, 12, 12}; int32_t val = LW((int32_t*)shuffler); vec0 = (v16i8)__msa_fill_w(val); shuffler_vec += vec0; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16); dst0 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src0, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(shuffler_vec, (v16i8)src1, (v16i8)src1); ST_UB2(dst0, dst1, dst_argb, 16); src_argb += 32; dst_argb += 32; } } void ARGBShadeRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { int x; v16u8 src0, dst0; v8u16 vec0, vec1; v4u32 reg0, reg1, reg2, reg3, rgba_scale; v8i16 zero = {0}; rgba_scale[0] = value; rgba_scale = (v4u32)__msa_ilvr_b((v16i8)rgba_scale, (v16i8)rgba_scale); rgba_scale = (v4u32)__msa_ilvr_h(zero, (v8i16)rgba_scale); for (x = 0; x < width; x += 4) { src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); reg0 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec0); reg1 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec0); reg2 = (v4u32)__msa_ilvr_h(zero, (v8i16)vec1); reg3 = (v4u32)__msa_ilvl_h(zero, (v8i16)vec1); reg0 *= rgba_scale; reg1 *= rgba_scale; reg2 *= rgba_scale; reg3 *= rgba_scale; reg0 = (v4u32)__msa_srai_w((v4i32)reg0, 24); reg1 = (v4u32)__msa_srai_w((v4i32)reg1, 24); reg2 = (v4u32)__msa_srai_w((v4i32)reg2, 24); reg3 = (v4u32)__msa_srai_w((v4i32)reg3, 24); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_argb); src_argb += 16; dst_argb += 16; } } void ARGBGrayRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, vec1, dst0, dst1; v8u16 reg0; v16u8 const_0x26 = (v16u8)__msa_ldi_h(0x26); v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb, 16); vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); reg0 = __msa_dotp_u_h(vec0, const_0x4B0F); reg0 = __msa_dpadd_u_h(reg0, vec1, const_0x26); reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 7); vec0 = (v16u8)__msa_ilvev_b((v16i8)reg0, (v16i8)reg0); vec1 = (v16u8)__msa_ilvod_b((v16i8)vec1, (v16i8)vec0); dst0 = (v16u8)__msa_ilvr_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_ilvl_b((v16i8)vec1, (v16i8)vec0); ST_UB2(dst0, dst1, dst_argb, 16); src_argb += 32; dst_argb += 32; } } void ARGBSepiaRow_MSA(uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, dst0, dst1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2; v16u8 const_0x4411 = (v16u8)__msa_fill_h(0x4411); v16u8 const_0x23 = (v16u8)__msa_ldi_h(0x23); v16u8 const_0x5816 = (v16u8)__msa_fill_h(0x5816); v16u8 const_0x2D = (v16u8)__msa_ldi_h(0x2D); v16u8 const_0x6218 = (v16u8)__msa_fill_h(0x6218); v16u8 const_0x32 = (v16u8)__msa_ldi_h(0x32); v8u16 const_0xFF = (v8u16)__msa_ldi_h(0xFF); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 0); src1 = (v16u8)__msa_ld_b((v16u8*)dst_argb, 16); vec0 = (v16u8)__msa_pckev_h((v8i16)src1, (v8i16)src0); vec1 = (v16u8)__msa_pckod_h((v8i16)src1, (v8i16)src0); vec3 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec1); reg0 = (v8u16)__msa_dotp_u_h(vec0, const_0x4411); reg1 = (v8u16)__msa_dotp_u_h(vec0, const_0x5816); reg2 = (v8u16)__msa_dotp_u_h(vec0, const_0x6218); reg0 = (v8u16)__msa_dpadd_u_h(reg0, vec1, const_0x23); reg1 = (v8u16)__msa_dpadd_u_h(reg1, vec1, const_0x2D); reg2 = (v8u16)__msa_dpadd_u_h(reg2, vec1, const_0x32); reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 7); reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 7); reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 7); reg1 = (v8u16)__msa_min_u_h((v8u16)reg1, const_0xFF); reg2 = (v8u16)__msa_min_u_h((v8u16)reg2, const_0xFF); vec0 = (v16u8)__msa_pckev_b((v16i8)reg0, (v16i8)reg0); vec1 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg1); vec2 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg2); vec4 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); vec5 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); dst0 = (v16u8)__msa_ilvr_b((v16i8)vec5, (v16i8)vec4); dst1 = (v16u8)__msa_ilvl_b((v16i8)vec5, (v16i8)vec4); ST_UB2(dst0, dst1, dst_argb, 16); dst_argb += 32; } } void ARGB4444ToARGBRow_MSA(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1; v8u16 vec0, vec1, vec2, vec3; v16u8 dst0, dst1, dst2, dst3; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 0); src1 = (v16u8)__msa_ld_b((const v16u8*)src_argb4444, 16); vec0 = (v8u16)__msa_andi_b(src0, 0x0F); vec1 = (v8u16)__msa_andi_b(src1, 0x0F); vec2 = (v8u16)__msa_andi_b(src0, 0xF0); vec3 = (v8u16)__msa_andi_b(src1, 0xF0); vec0 |= (v8u16)__msa_slli_b((v16i8)vec0, 4); vec1 |= (v8u16)__msa_slli_b((v16i8)vec1, 4); vec2 |= (v8u16)__msa_srli_b((v16i8)vec2, 4); vec3 |= (v8u16)__msa_srli_b((v16i8)vec3, 4); dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_argb4444 += 32; dst_argb += 64; } } void ARGB1555ToARGBRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { int x; v8u16 src0, src1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6; v16u8 dst0, dst1, dst2, dst3; v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 0); src1 = (v8u16)__msa_ld_h((const v8u16*)src_argb1555, 16); vec0 = src0 & const_0x1F; vec1 = src1 & const_0x1F; src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); vec2 = src0 & const_0x1F; vec3 = src1 & const_0x1F; src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); vec4 = src0 & const_0x1F; vec5 = src1 & const_0x1F; src0 = (v8u16)__msa_srli_h((v8i16)src0, 5); src1 = (v8u16)__msa_srli_h((v8i16)src1, 5); reg0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); reg1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); reg2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); reg3 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); reg4 = (v16u8)__msa_slli_b((v16i8)reg0, 3); reg5 = (v16u8)__msa_slli_b((v16i8)reg1, 3); reg6 = (v16u8)__msa_slli_b((v16i8)reg2, 3); reg4 |= (v16u8)__msa_srai_b((v16i8)reg0, 2); reg5 |= (v16u8)__msa_srai_b((v16i8)reg1, 2); reg6 |= (v16u8)__msa_srai_b((v16i8)reg2, 2); reg3 = -reg3; reg0 = (v16u8)__msa_ilvr_b((v16i8)reg6, (v16i8)reg4); reg1 = (v16u8)__msa_ilvl_b((v16i8)reg6, (v16i8)reg4); reg2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg5); reg3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg5); dst0 = (v16u8)__msa_ilvr_b((v16i8)reg2, (v16i8)reg0); dst1 = (v16u8)__msa_ilvl_b((v16i8)reg2, (v16i8)reg0); dst2 = (v16u8)__msa_ilvr_b((v16i8)reg3, (v16i8)reg1); dst3 = (v16u8)__msa_ilvl_b((v16i8)reg3, (v16i8)reg1); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_argb1555 += 32; dst_argb += 64; } } void RGB565ToARGBRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v16u8 res0, res1, res2, res3, dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 0); src1 = (v8u16)__msa_ld_h((const v8u16*)src_rgb565, 16); vec0 = src0 & const_0x1F; vec1 = src0 & const_0x7E0; vec2 = src0 & const_0xF800; vec3 = src1 & const_0x1F; vec4 = src1 & const_0x7E0; vec5 = src1 & const_0xF800; reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); res0 = (v16u8)__msa_ilvev_b((v16i8)reg2, (v16i8)reg0); res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg1); res2 = (v16u8)__msa_ilvev_b((v16i8)reg5, (v16i8)reg3); res3 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)reg4); dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); dst2 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res2); dst3 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res2); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_rgb565 += 32; dst_argb += 64; } } void RGB24ToARGBRow_MSA(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2; v16u8 vec0, vec1, vec2; v16u8 dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v16i8 shuffler = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_rgb24, 32); vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); dst0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec0); dst2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec1); dst3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)alpha, (v16i8)vec2); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_rgb24 += 48; dst_argb += 64; } } void RAWToARGBRow_MSA(const uint8_t* src_raw, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2; v16u8 vec0, vec1, vec2; v16u8 dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v16i8 mask = {2, 1, 0, 16, 5, 4, 3, 17, 8, 7, 6, 18, 11, 10, 9, 19}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32); vec0 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 12); vec1 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); vec2 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src2, 4); dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec0); dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec1); dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)alpha, (v16i8)vec2); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_raw += 48; dst_argb += 64; } } void ARGB1555ToYRow_MSA(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v16u8 dst0; v8u16 const_0x19 = (v8u16)__msa_ldi_h(0x19); v8u16 const_0x81 = (v8u16)__msa_ldi_h(0x81); v8u16 const_0x42 = (v8u16)__msa_ldi_h(0x42); v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 0); src1 = (v8u16)__msa_ld_b((const v8i16*)src_argb1555, 16); vec0 = src0 & const_0x1F; vec1 = src1 & const_0x1F; src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); vec2 = src0 & const_0x1F; vec3 = src1 & const_0x1F; src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); vec4 = src0 & const_0x1F; vec5 = src1 & const_0x1F; reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); reg1 = (v8u16)__msa_slli_h((v8i16)vec1, 3); reg0 |= (v8u16)__msa_srai_h((v8i16)vec0, 2); reg1 |= (v8u16)__msa_srai_h((v8i16)vec1, 2); reg2 = (v8u16)__msa_slli_h((v8i16)vec2, 3); reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); reg2 |= (v8u16)__msa_srai_h((v8i16)vec2, 2); reg3 |= (v8u16)__msa_srai_h((v8i16)vec3, 2); reg4 = (v8u16)__msa_slli_h((v8i16)vec4, 3); reg5 = (v8u16)__msa_slli_h((v8i16)vec5, 3); reg4 |= (v8u16)__msa_srai_h((v8i16)vec4, 2); reg5 |= (v8u16)__msa_srai_h((v8i16)vec5, 2); reg0 *= const_0x19; reg1 *= const_0x19; reg2 *= const_0x81; reg3 *= const_0x81; reg4 *= const_0x42; reg5 *= const_0x42; reg0 += reg2; reg1 += reg3; reg0 += reg4; reg1 += reg5; reg0 += const_0x1080; reg1 += const_0x1080; reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); reg1 = (v8u16)__msa_srai_h((v8i16)reg1, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); ST_UB(dst0, dst_y); src_argb1555 += 32; dst_y += 16; } } void RGB565ToYRow_MSA(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { int x; v8u16 src0, src1, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 reg0, reg1, reg2, reg3, reg4, reg5; v4u32 res0, res1, res2, res3; v16u8 dst0; v4u32 const_0x810019 = (v4u32)__msa_fill_w(0x810019); v4u32 const_0x010042 = (v4u32)__msa_fill_w(0x010042); v8i16 const_0x1080 = __msa_fill_h(0x1080); v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); v8u16 const_0x7E0 = (v8u16)__msa_fill_h(0x7E0); v8u16 const_0xF800 = (v8u16)__msa_fill_h(0xF800); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 0); src1 = (v8u16)__msa_ld_b((const v8i16*)src_rgb565, 16); vec0 = src0 & const_0x1F; vec1 = src0 & const_0x7E0; vec2 = src0 & const_0xF800; vec3 = src1 & const_0x1F; vec4 = src1 & const_0x7E0; vec5 = src1 & const_0xF800; reg0 = (v8u16)__msa_slli_h((v8i16)vec0, 3); reg1 = (v8u16)__msa_srli_h((v8i16)vec1, 3); reg2 = (v8u16)__msa_srli_h((v8i16)vec2, 8); reg3 = (v8u16)__msa_slli_h((v8i16)vec3, 3); reg4 = (v8u16)__msa_srli_h((v8i16)vec4, 3); reg5 = (v8u16)__msa_srli_h((v8i16)vec5, 8); reg0 |= (v8u16)__msa_srli_h((v8i16)vec0, 2); reg1 |= (v8u16)__msa_srli_h((v8i16)vec1, 9); reg2 |= (v8u16)__msa_srli_h((v8i16)vec2, 13); reg3 |= (v8u16)__msa_srli_h((v8i16)vec3, 2); reg4 |= (v8u16)__msa_srli_h((v8i16)vec4, 9); reg5 |= (v8u16)__msa_srli_h((v8i16)vec5, 13); vec0 = (v8u16)__msa_ilvr_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_ilvl_h((v8i16)reg1, (v8i16)reg0); vec2 = (v8u16)__msa_ilvr_h((v8i16)reg4, (v8i16)reg3); vec3 = (v8u16)__msa_ilvl_h((v8i16)reg4, (v8i16)reg3); vec4 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg2); vec5 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg2); vec6 = (v8u16)__msa_ilvr_h(const_0x1080, (v8i16)reg5); vec7 = (v8u16)__msa_ilvl_h(const_0x1080, (v8i16)reg5); res0 = __msa_dotp_u_w(vec0, (v8u16)const_0x810019); res1 = __msa_dotp_u_w(vec1, (v8u16)const_0x810019); res2 = __msa_dotp_u_w(vec2, (v8u16)const_0x810019); res3 = __msa_dotp_u_w(vec3, (v8u16)const_0x810019); res0 = __msa_dpadd_u_w(res0, vec4, (v8u16)const_0x010042); res1 = __msa_dpadd_u_w(res1, vec5, (v8u16)const_0x010042); res2 = __msa_dpadd_u_w(res2, vec6, (v8u16)const_0x010042); res3 = __msa_dpadd_u_w(res3, vec7, (v8u16)const_0x010042); res0 = (v4u32)__msa_srai_w((v4i32)res0, 8); res1 = (v4u32)__msa_srai_w((v4i32)res1, 8); res2 = (v4u32)__msa_srai_w((v4i32)res2, 8); res3 = (v4u32)__msa_srai_w((v4i32)res3, 8); vec0 = (v8u16)__msa_pckev_h((v8i16)res1, (v8i16)res0); vec1 = (v8u16)__msa_pckev_h((v8i16)res3, (v8i16)res2); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_y); src_rgb565 += 32; dst_y += 16; } } void RGB24ToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; v8u16 vec0, vec1, vec2, vec3; v8u16 const_0x8119 = (v8u16)__msa_fill_h(0x8119); v8u16 const_0x42 = (v8u16)__msa_fill_h(0x42); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 20, 21, 21, 22, 23, 24}; v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; v16i8 zero = {0}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8119); vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8119); vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x42); vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x42); vec0 += const_0x1080; vec1 += const_0x1080; vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_y); src_argb0 += 48; dst_y += 16; } } void RAWToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, reg0, reg1, reg2, reg3, dst0; v8u16 vec0, vec1, vec2, vec3; v8u16 const_0x8142 = (v8u16)__msa_fill_h(0x8142); v8u16 const_0x19 = (v8u16)__msa_fill_h(0x19); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); v16i8 mask0 = {0, 1, 2, 3, 3, 4, 5, 6, 6, 7, 8, 9, 9, 10, 11, 12}; v16i8 mask1 = {12, 13, 14, 15, 15, 16, 17, 18, 18, 19, 20, 21, 21, 22, 23, 24}; v16i8 mask2 = {8, 9, 10, 11, 11, 12, 13, 14, 14, 15, 16, 17, 17, 18, 19, 20}; v16i8 mask3 = {4, 5, 6, 7, 7, 8, 9, 10, 10, 11, 12, 13, 13, 14, 15, 16}; v16i8 zero = {0}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); reg0 = (v16u8)__msa_vshf_b(mask0, zero, (v16i8)src0); reg1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); reg2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src2, (v16i8)src1); reg3 = (v16u8)__msa_vshf_b(mask3, zero, (v16i8)src2); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec2 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); vec3 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); vec0 = __msa_dotp_u_h((v16u8)vec0, (v16u8)const_0x8142); vec1 = __msa_dotp_u_h((v16u8)vec1, (v16u8)const_0x8142); vec0 = __msa_dpadd_u_h(vec0, (v16u8)vec2, (v16u8)const_0x19); vec1 = __msa_dpadd_u_h(vec1, (v16u8)vec3, (v16u8)const_0x19); vec0 += const_0x1080; vec1 += const_0x1080; vec0 = (v8u16)__msa_srai_h((v8i16)vec0, 8); vec1 = (v8u16)__msa_srai_h((v8i16)vec1, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_y); src_argb0 += 48; dst_y += 16; } } void ARGB1555ToUVRow_MSA(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint16_t* s = (const uint16_t*)src_argb1555; const uint16_t* t = (const uint16_t*)(src_argb1555 + src_stride_argb1555); int64_t res0, res1; v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; v16u8 dst0; v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); vec0 = src0 & const_0x1F; vec1 = src1 & const_0x1F; vec0 += src2 & const_0x1F; vec1 += src3 & const_0x1F; vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); vec2 = src0 & const_0x1F; vec3 = src1 & const_0x1F; vec2 += src2 & const_0x1F; vec3 += src3 & const_0x1F; vec2 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); vec4 = src0 & const_0x1F; vec5 = src1 & const_0x1F; vec4 += src2 & const_0x1F; vec5 += src3 & const_0x1F; vec4 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec4 = __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); vec6 = (v8u16)__msa_slli_h((v8i16)vec0, 1); vec6 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); vec0 = (v8u16)__msa_slli_h((v8i16)vec2, 1); vec0 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); vec2 = (v8u16)__msa_slli_h((v8i16)vec4, 1); vec2 |= (v8u16)__msa_srai_h((v8i16)vec4, 6); reg0 = vec6 * const_0x70; reg1 = vec0 * const_0x4A; reg2 = vec2 * const_0x70; reg3 = vec0 * const_0x5E; reg0 += const_0x8080; reg1 += vec2 * const_0x26; reg2 += const_0x8080; reg3 += vec6 * const_0x12; reg0 -= reg1; reg2 -= reg3; reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); res0 = __msa_copy_u_d((v2i64)dst0, 0); res1 = __msa_copy_u_d((v2i64)dst0, 1); SD(res0, dst_u); SD(res1, dst_v); s += 16; t += 16; dst_u += 8; dst_v += 8; } } void RGB565ToUVRow_MSA(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint16_t* s = (const uint16_t*)src_rgb565; const uint16_t* t = (const uint16_t*)(src_rgb565 + src_stride_rgb565); int64_t res0, res1; v8u16 src0, src1, src2, src3, reg0, reg1, reg2, reg3; v8u16 vec0, vec1, vec2, vec3, vec4, vec5; v16u8 dst0; v8u16 const_0x70 = (v8u16)__msa_ldi_h(0x70); v8u16 const_0x4A = (v8u16)__msa_ldi_h(0x4A); v8u16 const_0x26 = (v8u16)__msa_ldi_h(0x26); v8u16 const_0x5E = (v8u16)__msa_ldi_h(0x5E); v8u16 const_0x12 = (v8u16)__msa_ldi_h(0x12); v8u16 const_32896 = (v8u16)__msa_fill_h(0x8080); v8u16 const_0x1F = (v8u16)__msa_ldi_h(0x1F); v8u16 const_0x3F = (v8u16)__msa_fill_h(0x3F); for (x = 0; x < width; x += 16) { src0 = (v8u16)__msa_ld_b((v8i16*)s, 0); src1 = (v8u16)__msa_ld_b((v8i16*)s, 16); src2 = (v8u16)__msa_ld_b((v8i16*)t, 0); src3 = (v8u16)__msa_ld_b((v8i16*)t, 16); vec0 = src0 & const_0x1F; vec1 = src1 & const_0x1F; vec0 += src2 & const_0x1F; vec1 += src3 & const_0x1F; vec0 = (v8u16)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); src0 = (v8u16)__msa_srai_h((v8i16)src0, 5); src1 = (v8u16)__msa_srai_h((v8i16)src1, 5); src2 = (v8u16)__msa_srai_h((v8i16)src2, 5); src3 = (v8u16)__msa_srai_h((v8i16)src3, 5); vec2 = src0 & const_0x3F; vec3 = src1 & const_0x3F; vec2 += src2 & const_0x3F; vec3 += src3 & const_0x3F; vec1 = (v8u16)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); src0 = (v8u16)__msa_srai_h((v8i16)src0, 6); src1 = (v8u16)__msa_srai_h((v8i16)src1, 6); src2 = (v8u16)__msa_srai_h((v8i16)src2, 6); src3 = (v8u16)__msa_srai_h((v8i16)src3, 6); vec4 = src0 & const_0x1F; vec5 = src1 & const_0x1F; vec4 += src2 & const_0x1F; vec5 += src3 & const_0x1F; vec2 = (v8u16)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec3 = (v8u16)__msa_slli_h((v8i16)vec0, 1); vec3 |= (v8u16)__msa_srai_h((v8i16)vec0, 6); vec4 = (v8u16)__msa_slli_h((v8i16)vec2, 1); vec4 |= (v8u16)__msa_srai_h((v8i16)vec2, 6); reg0 = vec3 * const_0x70; reg1 = vec1 * const_0x4A; reg2 = vec4 * const_0x70; reg3 = vec1 * const_0x5E; reg0 += const_32896; reg1 += vec4 * const_0x26; reg2 += const_32896; reg3 += vec3 * const_0x12; reg0 -= reg1; reg2 -= reg3; reg0 = (v8u16)__msa_srai_h((v8i16)reg0, 8); reg2 = (v8u16)__msa_srai_h((v8i16)reg2, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); res0 = __msa_copy_u_d((v2i64)dst0, 0); res1 = __msa_copy_u_d((v2i64)dst0, 1); SD(res0, dst_u); SD(res1, dst_v); s += 16; t += 16; dst_u += 8; dst_v += 8; } } void RGB24ToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; int64_t res0, res1; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 inp0, inp1, inp2, inp3, inp4, inp5; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 reg0, reg1, reg2, reg3; v16u8 dst0; v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; v16i8 zero = {0}; for (x = 0; x < width; x += 16) { inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0); inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16); inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32); src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); reg0 = __msa_srai_h((v8i16)reg0, 2); reg1 = __msa_srai_h((v8i16)reg1, 2); reg2 = __msa_srai_h((v8i16)reg2, 2); reg3 = __msa_srai_h((v8i16)reg3, 2); vec4 = (v8u16)__msa_pckev_h(reg1, reg0); vec5 = (v8u16)__msa_pckev_h(reg3, reg2); vec6 = (v8u16)__msa_pckod_h(reg1, reg0); vec7 = (v8u16)__msa_pckod_h(reg3, reg2); vec0 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); vec2 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); vec3 = vec0 * const_0x70; vec4 = vec1 * const_0x4A; vec5 = vec2 * const_0x26; vec2 *= const_0x70; vec1 *= const_0x5E; vec0 *= const_0x12; reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); reg0 += reg1; reg2 += reg3; reg0 = __msa_srai_h(reg0, 8); reg2 = __msa_srai_h(reg2, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); res0 = __msa_copy_u_d((v2i64)dst0, 0); res1 = __msa_copy_u_d((v2i64)dst0, 1); SD(res0, dst_u); SD(res1, dst_v); t += 48; s += 48; dst_u += 8; dst_v += 8; } } void RAWToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; int64_t res0, res1; v16u8 inp0, inp1, inp2, inp3, inp4, inp5; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 reg0, reg1, reg2, reg3; v16u8 dst0; v8u16 const_0x70 = (v8u16)__msa_fill_h(0x70); v8u16 const_0x4A = (v8u16)__msa_fill_h(0x4A); v8u16 const_0x26 = (v8u16)__msa_fill_h(0x26); v8u16 const_0x5E = (v8u16)__msa_fill_h(0x5E); v8u16 const_0x12 = (v8u16)__msa_fill_h(0x12); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); v16i8 mask = {0, 1, 2, 16, 3, 4, 5, 17, 6, 7, 8, 18, 9, 10, 11, 19}; v16i8 zero = {0}; for (x = 0; x < width; x += 16) { inp0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); inp1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); inp2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); inp3 = (v16u8)__msa_ld_b((const v16i8*)t, 0); inp4 = (v16u8)__msa_ld_b((const v16i8*)t, 16); inp5 = (v16u8)__msa_ld_b((const v16i8*)t, 32); src1 = (v16u8)__msa_sldi_b((v16i8)inp1, (v16i8)inp0, 12); src5 = (v16u8)__msa_sldi_b((v16i8)inp4, (v16i8)inp3, 12); src2 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp1, 8); src6 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp4, 8); src3 = (v16u8)__msa_sldi_b((v16i8)inp2, (v16i8)inp2, 4); src7 = (v16u8)__msa_sldi_b((v16i8)inp5, (v16i8)inp5, 4); src0 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp0); src1 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src1); src2 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src2); src3 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src3); src4 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)inp3); src5 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src5); src6 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src6); src7 = (v16u8)__msa_vshf_b(mask, (v16i8)zero, (v16i8)src7); vec0 = (v8u16)__msa_ilvr_b((v16i8)src4, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src4, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src5, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src5, (v16i8)src1); vec4 = (v8u16)__msa_ilvr_b((v16i8)src6, (v16i8)src2); vec5 = (v8u16)__msa_ilvl_b((v16i8)src6, (v16i8)src2); vec6 = (v8u16)__msa_ilvr_b((v16i8)src7, (v16i8)src3); vec7 = (v8u16)__msa_ilvl_b((v16i8)src7, (v16i8)src3); vec0 = (v8u16)__msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec1 = (v8u16)__msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); vec2 = (v8u16)__msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec3 = (v8u16)__msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); vec4 = (v8u16)__msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); vec5 = (v8u16)__msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); vec6 = (v8u16)__msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); vec7 = (v8u16)__msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); reg0 = (v8i16)__msa_pckev_d((v2i64)vec1, (v2i64)vec0); reg1 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec2); reg2 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec4); reg3 = (v8i16)__msa_pckev_d((v2i64)vec7, (v2i64)vec6); reg0 += (v8i16)__msa_pckod_d((v2i64)vec1, (v2i64)vec0); reg1 += (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec2); reg2 += (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec4); reg3 += (v8i16)__msa_pckod_d((v2i64)vec7, (v2i64)vec6); reg0 = __msa_srai_h(reg0, 2); reg1 = __msa_srai_h(reg1, 2); reg2 = __msa_srai_h(reg2, 2); reg3 = __msa_srai_h(reg3, 2); vec4 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec5 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec6 = (v8u16)__msa_pckod_h((v8i16)reg1, (v8i16)reg0); vec7 = (v8u16)__msa_pckod_h((v8i16)reg3, (v8i16)reg2); vec0 = (v8u16)__msa_pckod_h((v8i16)vec5, (v8i16)vec4); vec1 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); vec2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); vec3 = vec0 * const_0x70; vec4 = vec1 * const_0x4A; vec5 = vec2 * const_0x26; vec2 *= const_0x70; vec1 *= const_0x5E; vec0 *= const_0x12; reg0 = __msa_subv_h((v8i16)vec3, (v8i16)vec4); reg1 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec5); reg2 = __msa_subv_h((v8i16)vec2, (v8i16)vec1); reg3 = __msa_subv_h((v8i16)const_0x8080, (v8i16)vec0); reg0 += reg1; reg2 += reg3; reg0 = __msa_srai_h(reg0, 8); reg2 = __msa_srai_h(reg2, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)reg2, (v16i8)reg0); res0 = __msa_copy_u_d((v2i64)dst0, 0); res1 = __msa_copy_u_d((v2i64)dst0, 1); SD(res0, dst_u); SD(res1, dst_v); t += 48; s += 48; dst_u += 8; dst_v += 8; } } void NV12ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; uint64_t val0, val1; v16u8 src0, src1, res0, res1, dst0, dst1; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 zero = {0}; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { val0 = LD(src_y); val1 = LD(src_uv); src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_uv += 8; dst_argb += 32; } } void NV12ToRGB565Row_MSA(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { int x; uint64_t val0, val1; v16u8 src0, src1, dst0; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 zero = {0}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { val0 = LD(src_y); val1 = LD(src_uv); src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); vec0 = vec0 >> 3; vec1 = (vec1 >> 2) << 5; vec2 = (vec2 >> 3) << 11; dst0 = (v16u8)(vec0 | vec1 | vec2); ST_UB(dst0, dst_rgb565); src_y += 8; src_uv += 8; dst_rgb565 += 16; } } void NV21ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; uint64_t val0, val1; v16u8 src0, src1, res0, res1, dst0, dst1; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v16u8 zero = {0}; v16i8 shuffler = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { val0 = LD(src_y); val1 = LD(src_vu); src0 = (v16u8)__msa_insert_d((v2i64)zero, 0, val0); src1 = (v16u8)__msa_insert_d((v2i64)zero, 0, val1); src1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); YUVTORGB(src0, src1, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); res0 = (v16u8)__msa_ilvev_b((v16i8)vec2, (v16i8)vec0); res1 = (v16u8)__msa_ilvev_b((v16i8)alpha, (v16i8)vec1); dst0 = (v16u8)__msa_ilvr_b((v16i8)res1, (v16i8)res0); dst1 = (v16u8)__msa_ilvl_b((v16i8)res1, (v16i8)res0); ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_vu += 8; dst_argb += 32; } } void SobelRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, dst0, dst1, dst2, dst3; v16i8 mask0 = {0, 0, 0, 16, 1, 1, 1, 16, 2, 2, 2, 16, 3, 3, 3, 16}; v16i8 const_0x4 = __msa_ldi_b(0x4); v16i8 mask1 = mask0 + const_0x4; v16i8 mask2 = mask1 + const_0x4; v16i8 mask3 = mask2 + const_0x4; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); vec0 = __msa_adds_u_b(src0, src1); dst0 = (v16u8)__msa_vshf_b(mask0, (v16i8)alpha, (v16i8)vec0); dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)alpha, (v16i8)vec0); dst2 = (v16u8)__msa_vshf_b(mask2, (v16i8)alpha, (v16i8)vec0); dst3 = (v16u8)__msa_vshf_b(mask3, (v16i8)alpha, (v16i8)vec0); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_sobelx += 16; src_sobely += 16; dst_argb += 64; } } void SobelToPlaneRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 16); dst0 = __msa_adds_u_b(src0, src2); dst1 = __msa_adds_u_b(src1, src3); ST_UB2(dst0, dst1, dst_y, 16); src_sobelx += 32; src_sobely += 32; dst_y += 32; } } void SobelXYRow_MSA(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, vec0, vec1, vec2; v16u8 reg0, reg1, dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_sobelx, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_sobely, 0); vec0 = __msa_adds_u_b(src0, src1); vec1 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src1); vec2 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src1); reg0 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)vec0); reg1 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)vec0); dst0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)vec1); dst1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)vec1); dst2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)vec2); dst3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)vec2); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_sobelx += 16; src_sobely += 16; dst_argb += 64; } } void ARGBToYJRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x4B0F = (v16u8)__msa_fill_h(0x4B0F); v16u8 const_0x26 = (v16u8)__msa_fill_h(0x26); v8u16 const_0x40 = (v8u16)__msa_fill_h(0x40); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); ARGBTOY(src0, src1, src2, src3, const_0x4B0F, const_0x26, const_0x40, 7, dst0); ST_UB(dst0, dst_y); src_argb0 += 64; dst_y += 16; } } void BGRAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x4200 = (v16u8)__msa_fill_h(0x4200); v16u8 const_0x1981 = (v16u8)__msa_fill_h(0x1981); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); ARGBTOY(src0, src1, src2, src3, const_0x4200, const_0x1981, const_0x1080, 8, dst0); ST_UB(dst0, dst_y); src_argb0 += 64; dst_y += 16; } } void ABGRToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x8142 = (v16u8)__msa_fill_h(0x8142); v16u8 const_0x19 = (v16u8)__msa_fill_h(0x19); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); ARGBTOY(src0, src1, src2, src3, const_0x8142, const_0x19, const_0x1080, 8, dst0); ST_UB(dst0, dst_y); src_argb0 += 64; dst_y += 16; } } void RGBAToYRow_MSA(const uint8_t* src_argb0, uint8_t* dst_y, int width) { int x; v16u8 src0, src1, src2, src3, dst0; v16u8 const_0x1900 = (v16u8)__msa_fill_h(0x1900); v16u8 const_0x4281 = (v16u8)__msa_fill_h(0x4281); v8u16 const_0x1080 = (v8u16)__msa_fill_h(0x1080); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 48); ARGBTOY(src0, src1, src2, src3, const_0x1900, const_0x4281, const_0x1080, 8, dst0); ST_UB(dst0, dst_y); src_argb0 += 64; dst_y += 16; } } void ARGBToUVJRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 vec0, vec1, vec2, vec3; v16u8 dst0, dst1; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; v16u8 const_0x7F = (v16u8)__msa_fill_h(0x7F); v16u8 const_0x6B14 = (v16u8)__msa_fill_h(0x6B14); v16u8 const_0x2B54 = (v16u8)__msa_fill_h(0x2B54); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)s, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)s, 48); src4 = (v16u8)__msa_ld_b((const v16i8*)t, 0); src5 = (v16u8)__msa_ld_b((const v16i8*)t, 16); src6 = (v16u8)__msa_ld_b((const v16i8*)t, 32); src7 = (v16u8)__msa_ld_b((const v16i8*)t, 48); src0 = __msa_aver_u_b(src0, src4); src1 = __msa_aver_u_b(src1, src5); src2 = __msa_aver_u_b(src2, src6); src3 = __msa_aver_u_b(src3, src7); src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); vec0 = __msa_aver_u_b(src4, src6); vec1 = __msa_aver_u_b(src5, src7); src0 = (v16u8)__msa_ld_b((v16i8*)s, 64); src1 = (v16u8)__msa_ld_b((v16i8*)s, 80); src2 = (v16u8)__msa_ld_b((v16i8*)s, 96); src3 = (v16u8)__msa_ld_b((v16i8*)s, 112); src4 = (v16u8)__msa_ld_b((v16i8*)t, 64); src5 = (v16u8)__msa_ld_b((v16i8*)t, 80); src6 = (v16u8)__msa_ld_b((v16i8*)t, 96); src7 = (v16u8)__msa_ld_b((v16i8*)t, 112); src0 = __msa_aver_u_b(src0, src4); src1 = __msa_aver_u_b(src1, src5); src2 = __msa_aver_u_b(src2, src6); src3 = __msa_aver_u_b(src3, src7); src4 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); src5 = (v16u8)__msa_pckev_w((v4i32)src3, (v4i32)src2); src6 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); src7 = (v16u8)__msa_pckod_w((v4i32)src3, (v4i32)src2); vec2 = __msa_aver_u_b(src4, src6); vec3 = __msa_aver_u_b(src5, src7); ARGBTOUV(vec0, vec1, vec2, vec3, const_0x6B14, const_0x7F, const_0x2B54, const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, dst1); ST_UB(dst0, dst_v); ST_UB(dst1, dst_u); s += 128; t += 128; dst_v += 16; dst_u += 16; } } void BGRAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 dst0, dst1, vec0, vec1, vec2, vec3; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); v16u8 const_0x264A = (v16u8)__msa_fill_h(0x264A); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 32) { READ_ARGB(s, t, vec0, vec1, vec2, vec3); ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, dst1); ST_UB(dst0, dst_v); ST_UB(dst1, dst_u); s += 128; t += 128; dst_v += 16; dst_u += 16; } } void ABGRToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; v16i8 shuffler3 = {1, 2, 5, 6, 9, 10, 13, 14, 17, 18, 21, 22, 25, 26, 29, 30}; v16u8 const_0x4A26 = (v16u8)__msa_fill_h(0x4A26); v16u8 const_0x0070 = (v16u8)__msa_fill_h(0x0070); v16u8 const_0x125E = (v16u8)__msa_fill_h(0x125E); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 32) { READ_ARGB(s, t, src0, src1, src2, src3); ARGBTOUV(src0, src1, src2, src3, const_0x4A26, const_0x0070, const_0x125E, const_0x8080, shuffler1, shuffler0, shuffler2, shuffler3, dst0, dst1); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); s += 128; t += 128; dst_u += 16; dst_v += 16; } } void RGBAToUVRow_MSA(const uint8_t* src_rgb0, int src_stride_rgb, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; const uint8_t* s = src_rgb0; const uint8_t* t = src_rgb0 + src_stride_rgb; v16u8 dst0, dst1, vec0, vec1, vec2, vec3; v16i8 shuffler0 = {0, 1, 4, 5, 8, 9, 12, 13, 16, 17, 20, 21, 24, 25, 28, 29}; v16i8 shuffler1 = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31}; v16i8 shuffler2 = {0, 3, 4, 7, 8, 11, 12, 15, 16, 19, 20, 23, 24, 27, 28, 31}; v16i8 shuffler3 = {2, 1, 6, 5, 10, 9, 14, 13, 18, 17, 22, 21, 26, 25, 30, 29}; v16u8 const_0x125E = (v16u8)__msa_fill_h(0x264A); v16u8 const_0x7000 = (v16u8)__msa_fill_h(0x7000); v16u8 const_0x264A = (v16u8)__msa_fill_h(0x125E); v8u16 const_0x8080 = (v8u16)__msa_fill_h(0x8080); for (x = 0; x < width; x += 32) { READ_ARGB(s, t, vec0, vec1, vec2, vec3); ARGBTOUV(vec0, vec1, vec2, vec3, const_0x125E, const_0x7000, const_0x264A, const_0x8080, shuffler0, shuffler1, shuffler2, shuffler3, dst0, dst1); ST_UB(dst0, dst_u); ST_UB(dst1, dst_v); s += 128; t += 128; dst_u += 16; dst_v += 16; } } void I444ToARGBRow_MSA(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2, dst0, dst1; v8u16 vec0, vec1, vec2; v4i32 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg8, reg9; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v8i16 zero = {0}; YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); for (x = 0; x < width; x += 8) { READI444(src_y, src_u, src_v, src0, src1, src2); vec0 = (v8u16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); reg0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); reg1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); reg0 *= vec_yg; reg1 *= vec_yg; reg0 = __msa_srai_w(reg0, 16); reg1 = __msa_srai_w(reg1, 16); reg4 = reg0 + vec_br; reg5 = reg1 + vec_br; reg2 = reg0 + vec_bg; reg3 = reg1 + vec_bg; reg0 += vec_bb; reg1 += vec_bb; vec0 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); vec1 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src2); reg6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); reg7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); reg8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); reg9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); reg0 -= reg6 * vec_ub; reg1 -= reg7 * vec_ub; reg2 -= reg6 * vec_ug; reg3 -= reg7 * vec_ug; reg4 -= reg8 * vec_vr; reg5 -= reg9 * vec_vr; reg2 -= reg8 * vec_vg; reg3 -= reg9 * vec_vg; reg0 = __msa_srai_w(reg0, 6); reg1 = __msa_srai_w(reg1, 6); reg2 = __msa_srai_w(reg2, 6); reg3 = __msa_srai_w(reg3, 6); reg4 = __msa_srai_w(reg4, 6); reg5 = __msa_srai_w(reg5, 6); CLIP_0TO255(reg0, reg1, reg2, reg3, reg4, reg5); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec2 = (v8u16)__msa_pckev_h((v8i16)reg5, (v8i16)reg4); vec0 = (v8u16)__msa_ilvev_b((v16i8)vec1, (v16i8)vec0); vec1 = (v8u16)__msa_ilvev_b((v16i8)alpha, (v16i8)vec2); dst0 = (v16u8)__msa_ilvr_h((v8i16)vec1, (v8i16)vec0); dst1 = (v16u8)__msa_ilvl_h((v8i16)vec1, (v8i16)vec0); ST_UB2(dst0, dst1, dst_argb, 16); src_y += 8; src_u += 8; src_v += 8; dst_argb += 32; } } void I400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { int x; v16u8 src0, res0, res1, res2, res3, res4, dst0, dst1, dst2, dst3; v8i16 vec0, vec1; v4i32 reg0, reg1, reg2, reg3; v4i32 vec_yg = __msa_fill_w(0x4A35); v8i16 vec_ygb = __msa_fill_h(0xFB78); v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); v8i16 max = __msa_ldi_h(0xFF); v8i16 zero = {0}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0); vec0 = (v8i16)__msa_ilvr_b((v16i8)src0, (v16i8)src0); vec1 = (v8i16)__msa_ilvl_b((v16i8)src0, (v16i8)src0); reg0 = (v4i32)__msa_ilvr_h(zero, vec0); reg1 = (v4i32)__msa_ilvl_h(zero, vec0); reg2 = (v4i32)__msa_ilvr_h(zero, vec1); reg3 = (v4i32)__msa_ilvl_h(zero, vec1); reg0 *= vec_yg; reg1 *= vec_yg; reg2 *= vec_yg; reg3 *= vec_yg; reg0 = __msa_srai_w(reg0, 16); reg1 = __msa_srai_w(reg1, 16); reg2 = __msa_srai_w(reg2, 16); reg3 = __msa_srai_w(reg3, 16); vec0 = (v8i16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8i16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); vec0 += vec_ygb; vec1 += vec_ygb; vec0 = __msa_srai_h(vec0, 6); vec1 = __msa_srai_h(vec1, 6); vec0 = __msa_maxi_s_h(vec0, 0); vec1 = __msa_maxi_s_h(vec1, 0); vec0 = __msa_min_s_h(max, vec0); vec1 = __msa_min_s_h(max, vec1); res0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); res1 = (v16u8)__msa_ilvr_b((v16i8)res0, (v16i8)res0); res2 = (v16u8)__msa_ilvl_b((v16i8)res0, (v16i8)res0); res3 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)res0); res4 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)res0); dst0 = (v16u8)__msa_ilvr_b((v16i8)res3, (v16i8)res1); dst1 = (v16u8)__msa_ilvl_b((v16i8)res3, (v16i8)res1); dst2 = (v16u8)__msa_ilvr_b((v16i8)res4, (v16i8)res2); dst3 = (v16u8)__msa_ilvl_b((v16i8)res4, (v16i8)res2); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_y += 16; dst_argb += 64; } } void J400ToARGBRow_MSA(const uint8_t* src_y, uint8_t* dst_argb, int width) { int x; v16u8 src0, vec0, vec1, vec2, vec3, dst0, dst1, dst2, dst3; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_y, 0); vec0 = (v16u8)__msa_ilvr_b((v16i8)src0, (v16i8)src0); vec1 = (v16u8)__msa_ilvl_b((v16i8)src0, (v16i8)src0); vec2 = (v16u8)__msa_ilvr_b((v16i8)alpha, (v16i8)src0); vec3 = (v16u8)__msa_ilvl_b((v16i8)alpha, (v16i8)src0); dst0 = (v16u8)__msa_ilvr_b((v16i8)vec2, (v16i8)vec0); dst1 = (v16u8)__msa_ilvl_b((v16i8)vec2, (v16i8)vec0); dst2 = (v16u8)__msa_ilvr_b((v16i8)vec3, (v16i8)vec1); dst3 = (v16u8)__msa_ilvl_b((v16i8)vec3, (v16i8)vec1); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); src_y += 16; dst_argb += 64; } } void YUY2ToARGBRow_MSA(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_yuy2, 0); src1 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); src2 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_yuy2 += 16; dst_argb += 32; } } void UYVYToARGBRow_MSA(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { int x; v16u8 src0, src1, src2; v8i16 vec0, vec1, vec2; v4i32 vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg; v4i32 vec_ubvr, vec_ugvg; v16u8 alpha = (v16u8)__msa_ldi_b(ALPHA_VAL); YUVTORGB_SETUP(yuvconstants, vec_ub, vec_vr, vec_ug, vec_vg, vec_bb, vec_bg, vec_br, vec_yg); vec_ubvr = __msa_ilvr_w(vec_vr, vec_ub); vec_ugvg = (v4i32)__msa_ilvev_h((v8i16)vec_vg, (v8i16)vec_ug); for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_uyvy, 0); src1 = (v16u8)__msa_pckod_b((v16i8)src0, (v16i8)src0); src2 = (v16u8)__msa_pckev_b((v16i8)src0, (v16i8)src0); YUVTORGB(src1, src2, vec_ubvr, vec_ugvg, vec_bb, vec_bg, vec_br, vec_yg, vec0, vec1, vec2); STOREARGB(vec0, vec1, vec2, alpha, dst_argb); src_uyvy += 16; dst_argb += 32; } } void InterpolateRow_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int width, int32_t source_y_fraction) { int32_t y1_fraction = source_y_fraction; int32_t y0_fraction = 256 - y1_fraction; uint16_t y_fractions; const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, y_frac; if (0 == y1_fraction) { memcpy(dst_ptr, src_ptr, width); return; } if (128 == y1_fraction) { for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16); dst0 = __msa_aver_u_b(src0, src2); dst1 = __msa_aver_u_b(src1, src3); ST_UB2(dst0, dst1, dst_ptr, 16); s += 32; t += 32; dst_ptr += 32; } return; } y_fractions = (uint16_t)(y0_fraction + (y1_fraction << 8)); y_frac = (v8u16)__msa_fill_h(y_fractions); for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((const v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)t, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)t, 16); vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); vec0 = (v8u16)__msa_dotp_u_h((v16u8)vec0, (v16u8)y_frac); vec1 = (v8u16)__msa_dotp_u_h((v16u8)vec1, (v16u8)y_frac); vec2 = (v8u16)__msa_dotp_u_h((v16u8)vec2, (v16u8)y_frac); vec3 = (v8u16)__msa_dotp_u_h((v16u8)vec3, (v16u8)y_frac); vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 8); vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 8); vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 8); vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 8); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); ST_UB2(dst0, dst1, dst_ptr, 16); s += 32; t += 32; dst_ptr += 32; } } void ARGBSetRow_MSA(uint8_t* dst_argb, uint32_t v32, int width) { int x; v4i32 dst0 = __builtin_msa_fill_w(v32); for (x = 0; x < width; x += 4) { ST_UB(dst0, dst_argb); dst_argb += 16; } } void RAWToRGB24Row_MSA(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { int x; v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2; v16i8 shuffler0 = {2, 1, 0, 5, 4, 3, 8, 7, 6, 11, 10, 9, 14, 13, 12, 17}; v16i8 shuffler1 = {8, 7, 12, 11, 10, 15, 14, 13, 18, 17, 16, 21, 20, 19, 24, 23}; v16i8 shuffler2 = {14, 19, 18, 17, 22, 21, 20, 25, 24, 23, 28, 27, 26, 31, 30, 29}; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_raw, 32); src3 = (v16u8)__msa_sldi_b((v16i8)src1, (v16i8)src0, 8); src4 = (v16u8)__msa_sldi_b((v16i8)src2, (v16i8)src1, 8); dst0 = (v16u8)__msa_vshf_b(shuffler0, (v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(shuffler1, (v16i8)src4, (v16i8)src3); dst2 = (v16u8)__msa_vshf_b(shuffler2, (v16i8)src2, (v16i8)src1); ST_UB2(dst0, dst1, dst_rgb24, 16); ST_UB(dst2, (dst_rgb24 + 32)); src_raw += 48; dst_rgb24 += 48; } } void MergeUVRow_MSA(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { int x; v16u8 src0, src1, dst0, dst1; for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_u, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_v, 0); dst0 = (v16u8)__msa_ilvr_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_ilvl_b((v16i8)src1, (v16i8)src0); ST_UB2(dst0, dst1, dst_uv, 16); src_u += 16; src_v += 16; dst_uv += 32; } } void ARGBExtractAlphaRow_MSA(const uint8_t* src_argb, uint8_t* dst_a, int width) { int i; v16u8 src0, src1, src2, src3, vec0, vec1, dst0; for (i = 0; i < width; i += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 48); vec0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_a); src_argb += 64; dst_a += 16; } } void ARGBBlendRow_MSA(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 vec8, vec9, vec10, vec11, vec12, vec13; v8u16 const_256 = (v8u16)__msa_ldi_h(256); v16u8 const_255 = (v16u8)__msa_ldi_b(255); v16u8 mask = {0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255, 0, 0, 0, 255}; v16i8 zero = {0}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_argb1, 16); vec0 = (v8u16)__msa_ilvr_b(zero, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b(zero, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b(zero, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b(zero, (v16i8)src1); vec4 = (v8u16)__msa_ilvr_b(zero, (v16i8)src2); vec5 = (v8u16)__msa_ilvl_b(zero, (v16i8)src2); vec6 = (v8u16)__msa_ilvr_b(zero, (v16i8)src3); vec7 = (v8u16)__msa_ilvl_b(zero, (v16i8)src3); vec8 = (v8u16)__msa_fill_h(vec0[3]); vec9 = (v8u16)__msa_fill_h(vec0[7]); vec10 = (v8u16)__msa_fill_h(vec1[3]); vec11 = (v8u16)__msa_fill_h(vec1[7]); vec8 = (v8u16)__msa_pckev_d((v2i64)vec9, (v2i64)vec8); vec9 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); vec10 = (v8u16)__msa_fill_h(vec2[3]); vec11 = (v8u16)__msa_fill_h(vec2[7]); vec12 = (v8u16)__msa_fill_h(vec3[3]); vec13 = (v8u16)__msa_fill_h(vec3[7]); vec10 = (v8u16)__msa_pckev_d((v2i64)vec11, (v2i64)vec10); vec11 = (v8u16)__msa_pckev_d((v2i64)vec13, (v2i64)vec12); vec8 = const_256 - vec8; vec9 = const_256 - vec9; vec10 = const_256 - vec10; vec11 = const_256 - vec11; vec8 *= vec4; vec9 *= vec5; vec10 *= vec6; vec11 *= vec7; vec8 = (v8u16)__msa_srai_h((v8i16)vec8, 8); vec9 = (v8u16)__msa_srai_h((v8i16)vec9, 8); vec10 = (v8u16)__msa_srai_h((v8i16)vec10, 8); vec11 = (v8u16)__msa_srai_h((v8i16)vec11, 8); vec0 += vec8; vec1 += vec9; vec2 += vec10; vec3 += vec11; dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); dst0 = __msa_bmnz_v(dst0, const_255, mask); dst1 = __msa_bmnz_v(dst1, const_255, mask); ST_UB2(dst0, dst1, dst_argb, 16); src_argb0 += 32; src_argb1 += 32; dst_argb += 32; } } void ARGBQuantizeRow_MSA(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; v4i32 vec_scale = __msa_fill_w(scale); v16u8 vec_int_sz = (v16u8)__msa_fill_b(interval_size); v16u8 vec_int_ofst = (v16u8)__msa_fill_b(interval_offset); v16i8 mask = {0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31}; v16i8 zero = {0}; for (x = 0; x < width; x += 8) { src0 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 0); src1 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 16); src2 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 32); src3 = (v16u8)__msa_ld_b((v16i8*)dst_argb, 48); vec0 = (v8i16)__msa_ilvr_b(zero, (v16i8)src0); vec1 = (v8i16)__msa_ilvl_b(zero, (v16i8)src0); vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); vec6 = (v8i16)__msa_ilvr_b(zero, (v16i8)src3); vec7 = (v8i16)__msa_ilvl_b(zero, (v16i8)src3); tmp0 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec0); tmp1 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec0); tmp2 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec1); tmp3 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec1); tmp4 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec2); tmp5 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec2); tmp6 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec3); tmp7 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec3); tmp8 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec4); tmp9 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec4); tmp10 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec5); tmp11 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec5); tmp12 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec6); tmp13 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec6); tmp14 = (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)vec7); tmp15 = (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)vec7); tmp0 *= vec_scale; tmp1 *= vec_scale; tmp2 *= vec_scale; tmp3 *= vec_scale; tmp4 *= vec_scale; tmp5 *= vec_scale; tmp6 *= vec_scale; tmp7 *= vec_scale; tmp8 *= vec_scale; tmp9 *= vec_scale; tmp10 *= vec_scale; tmp11 *= vec_scale; tmp12 *= vec_scale; tmp13 *= vec_scale; tmp14 *= vec_scale; tmp15 *= vec_scale; tmp0 >>= 16; tmp1 >>= 16; tmp2 >>= 16; tmp3 >>= 16; tmp4 >>= 16; tmp5 >>= 16; tmp6 >>= 16; tmp7 >>= 16; tmp8 >>= 16; tmp9 >>= 16; tmp10 >>= 16; tmp11 >>= 16; tmp12 >>= 16; tmp13 >>= 16; tmp14 >>= 16; tmp15 >>= 16; vec0 = (v8i16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec1 = (v8i16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); vec2 = (v8i16)__msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); vec3 = (v8i16)__msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); vec4 = (v8i16)__msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); vec5 = (v8i16)__msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); vec6 = (v8i16)__msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); vec7 = (v8i16)__msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); dst2 = (v16u8)__msa_pckev_b((v16i8)vec5, (v16i8)vec4); dst3 = (v16u8)__msa_pckev_b((v16i8)vec7, (v16i8)vec6); dst0 *= vec_int_sz; dst1 *= vec_int_sz; dst2 *= vec_int_sz; dst3 *= vec_int_sz; dst0 += vec_int_ofst; dst1 += vec_int_ofst; dst2 += vec_int_ofst; dst3 += vec_int_ofst; dst0 = (v16u8)__msa_vshf_b(mask, (v16i8)src0, (v16i8)dst0); dst1 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)dst1); dst2 = (v16u8)__msa_vshf_b(mask, (v16i8)src2, (v16i8)dst2); dst3 = (v16u8)__msa_vshf_b(mask, (v16i8)src3, (v16i8)dst3); ST_UB4(dst0, dst1, dst2, dst3, dst_argb, 16); dst_argb += 64; } } void ARGBColorMatrixRow_MSA(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { int32_t x; v16i8 src0; v16u8 src1, src2, dst0, dst1; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v8i16 vec10, vec11, vec12, vec13, vec14, vec15, vec16, vec17; v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v4i32 tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; v16i8 zero = {0}; v8i16 max = __msa_ldi_h(255); src0 = __msa_ld_b((v16i8*)matrix_argb, 0); vec0 = (v8i16)__msa_ilvr_b(zero, src0); vec1 = (v8i16)__msa_ilvl_b(zero, src0); for (x = 0; x < width; x += 8) { src1 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 0); src2 = (v16u8)__msa_ld_b((const v16i8*)src_argb, 16); vec2 = (v8i16)__msa_ilvr_b(zero, (v16i8)src1); vec3 = (v8i16)__msa_ilvl_b(zero, (v16i8)src1); vec4 = (v8i16)__msa_ilvr_b(zero, (v16i8)src2); vec5 = (v8i16)__msa_ilvl_b(zero, (v16i8)src2); vec6 = (v8i16)__msa_pckod_d((v2i64)vec2, (v2i64)vec2); vec7 = (v8i16)__msa_pckod_d((v2i64)vec3, (v2i64)vec3); vec8 = (v8i16)__msa_pckod_d((v2i64)vec4, (v2i64)vec4); vec9 = (v8i16)__msa_pckod_d((v2i64)vec5, (v2i64)vec5); vec2 = (v8i16)__msa_pckev_d((v2i64)vec2, (v2i64)vec2); vec3 = (v8i16)__msa_pckev_d((v2i64)vec3, (v2i64)vec3); vec4 = (v8i16)__msa_pckev_d((v2i64)vec4, (v2i64)vec4); vec5 = (v8i16)__msa_pckev_d((v2i64)vec5, (v2i64)vec5); vec10 = vec2 * vec0; vec11 = vec2 * vec1; vec12 = vec6 * vec0; vec13 = vec6 * vec1; tmp0 = __msa_hadd_s_w(vec10, vec10); tmp1 = __msa_hadd_s_w(vec11, vec11); tmp2 = __msa_hadd_s_w(vec12, vec12); tmp3 = __msa_hadd_s_w(vec13, vec13); vec14 = vec3 * vec0; vec15 = vec3 * vec1; vec16 = vec7 * vec0; vec17 = vec7 * vec1; tmp4 = __msa_hadd_s_w(vec14, vec14); tmp5 = __msa_hadd_s_w(vec15, vec15); tmp6 = __msa_hadd_s_w(vec16, vec16); tmp7 = __msa_hadd_s_w(vec17, vec17); vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); tmp0 = __msa_hadd_s_w(vec10, vec10); tmp1 = __msa_hadd_s_w(vec11, vec11); tmp2 = __msa_hadd_s_w(vec12, vec12); tmp3 = __msa_hadd_s_w(vec13, vec13); tmp0 = __msa_srai_w(tmp0, 6); tmp1 = __msa_srai_w(tmp1, 6); tmp2 = __msa_srai_w(tmp2, 6); tmp3 = __msa_srai_w(tmp3, 6); vec2 = vec4 * vec0; vec6 = vec4 * vec1; vec3 = vec8 * vec0; vec7 = vec8 * vec1; tmp8 = __msa_hadd_s_w(vec2, vec2); tmp9 = __msa_hadd_s_w(vec6, vec6); tmp10 = __msa_hadd_s_w(vec3, vec3); tmp11 = __msa_hadd_s_w(vec7, vec7); vec4 = vec5 * vec0; vec8 = vec5 * vec1; vec5 = vec9 * vec0; vec9 = vec9 * vec1; tmp12 = __msa_hadd_s_w(vec4, vec4); tmp13 = __msa_hadd_s_w(vec8, vec8); tmp14 = __msa_hadd_s_w(vec5, vec5); tmp15 = __msa_hadd_s_w(vec9, vec9); vec14 = __msa_pckev_h((v8i16)tmp9, (v8i16)tmp8); vec15 = __msa_pckev_h((v8i16)tmp11, (v8i16)tmp10); vec16 = __msa_pckev_h((v8i16)tmp13, (v8i16)tmp12); vec17 = __msa_pckev_h((v8i16)tmp15, (v8i16)tmp14); tmp4 = __msa_hadd_s_w(vec14, vec14); tmp5 = __msa_hadd_s_w(vec15, vec15); tmp6 = __msa_hadd_s_w(vec16, vec16); tmp7 = __msa_hadd_s_w(vec17, vec17); tmp4 = __msa_srai_w(tmp4, 6); tmp5 = __msa_srai_w(tmp5, 6); tmp6 = __msa_srai_w(tmp6, 6); tmp7 = __msa_srai_w(tmp7, 6); vec10 = __msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec11 = __msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); vec12 = __msa_pckev_h((v8i16)tmp5, (v8i16)tmp4); vec13 = __msa_pckev_h((v8i16)tmp7, (v8i16)tmp6); vec10 = __msa_maxi_s_h(vec10, 0); vec11 = __msa_maxi_s_h(vec11, 0); vec12 = __msa_maxi_s_h(vec12, 0); vec13 = __msa_maxi_s_h(vec13, 0); vec10 = __msa_min_s_h(vec10, max); vec11 = __msa_min_s_h(vec11, max); vec12 = __msa_min_s_h(vec12, max); vec13 = __msa_min_s_h(vec13, max); dst0 = (v16u8)__msa_pckev_b((v16i8)vec11, (v16i8)vec10); dst1 = (v16u8)__msa_pckev_b((v16i8)vec13, (v16i8)vec12); ST_UB2(dst0, dst1, dst_argb, 16); src_argb += 32; dst_argb += 32; } } void SplitUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; for (x = 0; x < width; x += 32) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32); src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48); dst0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); dst2 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); dst3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_u, 16); ST_UB2(dst2, dst3, dst_v, 16); src_uv += 64; dst_u += 32; dst_v += 32; } } void SetRow_MSA(uint8_t* dst, uint8_t v8, int width) { int x; v16u8 dst0 = (v16u8)__msa_fill_b(v8); for (x = 0; x < width; x += 16) { ST_UB(dst0, dst); dst += 16; } } void MirrorUVRow_MSA(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { int x; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; v16i8 mask0 = {30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0}; v16i8 mask1 = {31, 29, 27, 25, 23, 21, 19, 17, 15, 13, 11, 9, 7, 5, 3, 1}; src_uv += (2 * width); for (x = 0; x < width; x += 32) { src_uv -= 64; src2 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 16); src0 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 32); src1 = (v16u8)__msa_ld_b((const v16i8*)src_uv, 48); dst0 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); dst2 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); dst3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst_v, 16); ST_UB2(dst2, dst3, dst_u, 16); dst_u += 32; dst_v += 32; } } void SobelXRow_MSA(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int32_t width) { int x; v16u8 src0, src1, src2, src3, src4, src5, dst0; v8i16 vec0, vec1, vec2, vec3, vec4, vec5; v16i8 mask0 = {0, 2, 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9}; v16i8 tmp = __msa_ldi_b(8); v16i8 mask1 = mask0 + tmp; v8i16 zero = {0}; v8i16 max = __msa_ldi_h(255); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 16); src2 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0); src3 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 16); src4 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 0); src5 = (v16u8)__msa_ld_b((const v16i8*)src_y2, 16); vec0 = (v8i16)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); vec1 = (v8i16)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); vec2 = (v8i16)__msa_vshf_b(mask0, (v16i8)src3, (v16i8)src2); vec3 = (v8i16)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); vec4 = (v8i16)__msa_vshf_b(mask0, (v16i8)src5, (v16i8)src4); vec5 = (v8i16)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); vec0 = (v8i16)__msa_hsub_u_h((v16u8)vec0, (v16u8)vec0); vec1 = (v8i16)__msa_hsub_u_h((v16u8)vec1, (v16u8)vec1); vec2 = (v8i16)__msa_hsub_u_h((v16u8)vec2, (v16u8)vec2); vec3 = (v8i16)__msa_hsub_u_h((v16u8)vec3, (v16u8)vec3); vec4 = (v8i16)__msa_hsub_u_h((v16u8)vec4, (v16u8)vec4); vec5 = (v8i16)__msa_hsub_u_h((v16u8)vec5, (v16u8)vec5); vec0 += vec2; vec1 += vec3; vec4 += vec2; vec5 += vec3; vec0 += vec4; vec1 += vec5; vec0 = __msa_add_a_h(zero, vec0); vec1 = __msa_add_a_h(zero, vec1); vec0 = __msa_maxi_s_h(vec0, 0); vec1 = __msa_maxi_s_h(vec1, 0); vec0 = __msa_min_s_h(max, vec0); vec1 = __msa_min_s_h(max, vec1); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_sobelx); src_y0 += 16; src_y1 += 16; src_y2 += 16; dst_sobelx += 16; } } void SobelYRow_MSA(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int32_t width) { int x; v16u8 src0, src1, dst0; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6; v8i16 zero = {0}; v8i16 max = __msa_ldi_h(255); for (x = 0; x < width; x += 16) { src0 = (v16u8)__msa_ld_b((const v16i8*)src_y0, 0); src1 = (v16u8)__msa_ld_b((const v16i8*)src_y1, 0); vec0 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src0); vec1 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src0); vec2 = (v8i16)__msa_ilvr_b((v16i8)zero, (v16i8)src1); vec3 = (v8i16)__msa_ilvl_b((v16i8)zero, (v16i8)src1); vec0 -= vec2; vec1 -= vec3; vec6[0] = src_y0[16] - src_y1[16]; vec6[1] = src_y0[17] - src_y1[17]; vec2 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 2); vec3 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 2); vec4 = (v8i16)__msa_sldi_b((v16i8)vec1, (v16i8)vec0, 4); vec5 = (v8i16)__msa_sldi_b((v16i8)vec6, (v16i8)vec1, 4); vec0 += vec2; vec1 += vec3; vec4 += vec2; vec5 += vec3; vec0 += vec4; vec1 += vec5; vec0 = __msa_add_a_h(zero, vec0); vec1 = __msa_add_a_h(zero, vec1); vec0 = __msa_maxi_s_h(vec0, 0); vec1 = __msa_maxi_s_h(vec1, 0); vec0 = __msa_min_s_h(max, vec0); vec1 = __msa_min_s_h(max, vec1); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst_sobely); src_y0 += 16; src_y1 += 16; dst_sobely += 16; } } void HalfFloatRow_MSA(const uint16_t* src, uint16_t* dst, float scale, int width) { int i; v8u16 src0, src1, src2, src3, dst0, dst1, dst2, dst3; v4u32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4f32 fvec0, fvec1, fvec2, fvec3, fvec4, fvec5, fvec6, fvec7; v4f32 mult_vec; v8i16 zero = {0}; mult_vec[0] = 1.9259299444e-34f * scale; mult_vec = (v4f32)__msa_splati_w((v4i32)mult_vec, 0); for (i = 0; i < width; i += 32) { src0 = (v8u16)__msa_ld_h((v8i16*)src, 0); src1 = (v8u16)__msa_ld_h((v8i16*)src, 16); src2 = (v8u16)__msa_ld_h((v8i16*)src, 32); src3 = (v8u16)__msa_ld_h((v8i16*)src, 48); vec0 = (v4u32)__msa_ilvr_h(zero, (v8i16)src0); vec1 = (v4u32)__msa_ilvl_h(zero, (v8i16)src0); vec2 = (v4u32)__msa_ilvr_h(zero, (v8i16)src1); vec3 = (v4u32)__msa_ilvl_h(zero, (v8i16)src1); vec4 = (v4u32)__msa_ilvr_h(zero, (v8i16)src2); vec5 = (v4u32)__msa_ilvl_h(zero, (v8i16)src2); vec6 = (v4u32)__msa_ilvr_h(zero, (v8i16)src3); vec7 = (v4u32)__msa_ilvl_h(zero, (v8i16)src3); fvec0 = __msa_ffint_u_w(vec0); fvec1 = __msa_ffint_u_w(vec1); fvec2 = __msa_ffint_u_w(vec2); fvec3 = __msa_ffint_u_w(vec3); fvec4 = __msa_ffint_u_w(vec4); fvec5 = __msa_ffint_u_w(vec5); fvec6 = __msa_ffint_u_w(vec6); fvec7 = __msa_ffint_u_w(vec7); fvec0 *= mult_vec; fvec1 *= mult_vec; fvec2 *= mult_vec; fvec3 *= mult_vec; fvec4 *= mult_vec; fvec5 *= mult_vec; fvec6 *= mult_vec; fvec7 *= mult_vec; vec0 = ((v4u32)fvec0) >> 13; vec1 = ((v4u32)fvec1) >> 13; vec2 = ((v4u32)fvec2) >> 13; vec3 = ((v4u32)fvec3) >> 13; vec4 = ((v4u32)fvec4) >> 13; vec5 = ((v4u32)fvec5) >> 13; vec6 = ((v4u32)fvec6) >> 13; vec7 = ((v4u32)fvec7) >> 13; dst0 = (v8u16)__msa_pckev_h((v8i16)vec1, (v8i16)vec0); dst1 = (v8u16)__msa_pckev_h((v8i16)vec3, (v8i16)vec2); dst2 = (v8u16)__msa_pckev_h((v8i16)vec5, (v8i16)vec4); dst3 = (v8u16)__msa_pckev_h((v8i16)vec7, (v8i16)vec6); ST_UH2(dst0, dst1, dst, 8); ST_UH2(dst2, dst3, dst + 16, 8); src += 32; dst += 32; } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) libvpx-1.8.2/third_party/libyuv/source/row_neon.cc000066400000000000000000003755001357355204000223440ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC Neon #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) // Read 8 Y, 4 U and 4 V from 422 #define READYUV422 \ "vld1.8 {d0}, [%0]! \n" \ "vld1.32 {d2[0]}, [%1]! \n" \ "vld1.32 {d2[1]}, [%2]! \n" // Read 8 Y, 8 U and 8 V from 444 #define READYUV444 \ "vld1.8 {d0}, [%0]! \n" \ "vld1.8 {d2}, [%1]! \n" \ "vld1.8 {d3}, [%2]! \n" \ "vpaddl.u8 q1, q1 \n" \ "vrshrn.u16 d2, q1, #1 \n" // Read 8 Y, and set 4 U and 4 V to 128 #define READYUV400 \ "vld1.8 {d0}, [%0]! \n" \ "vmov.u8 d2, #128 \n" // Read 8 Y and 4 UV from NV12 #define READNV12 \ "vld1.8 {d0}, [%0]! \n" \ "vld1.8 {d2}, [%1]! \n" \ "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \ "vuzp.u8 d2, d3 \n" \ "vtrn.u32 d2, d3 \n" // Read 8 Y and 4 VU from NV21 #define READNV21 \ "vld1.8 {d0}, [%0]! \n" \ "vld1.8 {d2}, [%1]! \n" \ "vmov.u8 d3, d2 \n" /* split odd/even uv apart */ \ "vuzp.u8 d3, d2 \n" \ "vtrn.u32 d2, d3 \n" // Read 8 YUY2 #define READYUY2 \ "vld2.8 {d0, d2}, [%0]! \n" \ "vmov.u8 d3, d2 \n" \ "vuzp.u8 d2, d3 \n" \ "vtrn.u32 d2, d3 \n" // Read 8 UYVY #define READUYVY \ "vld2.8 {d2, d3}, [%0]! \n" \ "vmov.u8 d0, d3 \n" \ "vmov.u8 d3, d2 \n" \ "vuzp.u8 d2, d3 \n" \ "vtrn.u32 d2, d3 \n" #define YUVTORGB_SETUP \ "vld1.8 {d24}, [%[kUVToRB]] \n" \ "vld1.8 {d25}, [%[kUVToG]] \n" \ "vld1.16 {d26[], d27[]}, [%[kUVBiasBGR]]! \n" \ "vld1.16 {d8[], d9[]}, [%[kUVBiasBGR]]! \n" \ "vld1.16 {d28[], d29[]}, [%[kUVBiasBGR]] \n" \ "vld1.32 {d30[], d31[]}, [%[kYToRgb]] \n" #define YUVTORGB \ "vmull.u8 q8, d2, d24 \n" /* u/v B/R component */ \ "vmull.u8 q9, d2, d25 \n" /* u/v G component */ \ "vmovl.u8 q0, d0 \n" /* Y */ \ "vmovl.s16 q10, d1 \n" \ "vmovl.s16 q0, d0 \n" \ "vmul.s32 q10, q10, q15 \n" \ "vmul.s32 q0, q0, q15 \n" \ "vqshrun.s32 d0, q0, #16 \n" \ "vqshrun.s32 d1, q10, #16 \n" /* Y */ \ "vadd.s16 d18, d19 \n" \ "vshll.u16 q1, d16, #16 \n" /* Replicate u * UB */ \ "vshll.u16 q10, d17, #16 \n" /* Replicate v * VR */ \ "vshll.u16 q3, d18, #16 \n" /* Replicate (v*VG + u*UG)*/ \ "vaddw.u16 q1, q1, d16 \n" \ "vaddw.u16 q10, q10, d17 \n" \ "vaddw.u16 q3, q3, d18 \n" \ "vqadd.s16 q8, q0, q13 \n" /* B */ \ "vqadd.s16 q9, q0, q14 \n" /* R */ \ "vqadd.s16 q0, q0, q4 \n" /* G */ \ "vqadd.s16 q8, q8, q1 \n" /* B */ \ "vqadd.s16 q9, q9, q10 \n" /* R */ \ "vqsub.s16 q0, q0, q3 \n" /* G */ \ "vqshrun.s16 d20, q8, #6 \n" /* B */ \ "vqshrun.s16 d22, q9, #6 \n" /* R */ \ "vqshrun.s16 d21, q0, #6 \n" /* G */ void I444ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READYUV444 YUVTORGB "subs %4, %4, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%3]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void I422AlphaToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB "subs %5, %5, #8 \n" "vld1.8 {d23}, [%3]! \n" "vst4.8 {d20, d21, d22, d23}, [%4]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(src_a), // %3 "+r"(dst_argb), // %4 "+r"(width) // %5 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void I422ToRGBARow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d19, #255 \n" // YUVTORGB modified d19 "vst4.8 {d19, d20, d21, d22}, [%3]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgba), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void I422ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vst3.8 {d20, d21, d22}, [%3]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgb24), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } #define ARGBTORGB565 \ "vshll.u8 q0, d22, #8 \n" /* R */ \ "vshll.u8 q8, d21, #8 \n" /* G */ \ "vshll.u8 q9, d20, #8 \n" /* B */ \ "vsri.16 q0, q8, #5 \n" /* RG */ \ "vsri.16 q0, q9, #11 \n" /* RGB */ void I422ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" ARGBTORGB565 "vst1.8 {q0}, [%3]! \n" // store 8 pixels RGB565. "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgb565), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } #define ARGBTOARGB1555 \ "vshll.u8 q0, d23, #8 \n" /* A */ \ "vshll.u8 q8, d22, #8 \n" /* R */ \ "vshll.u8 q9, d21, #8 \n" /* G */ \ "vshll.u8 q10, d20, #8 \n" /* B */ \ "vsri.16 q0, q8, #1 \n" /* AR */ \ "vsri.16 q0, q9, #6 \n" /* ARG */ \ "vsri.16 q0, q10, #11 \n" /* ARGB */ void I422ToARGB1555Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" ARGBTOARGB1555 "vst1.8 {q0}, [%3]! \n" // store 8 pixels "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb1555), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } #define ARGBTOARGB4444 \ "vshr.u8 d20, d20, #4 \n" /* B */ \ "vbic.32 d21, d21, d4 \n" /* G */ \ "vshr.u8 d22, d22, #4 \n" /* R */ \ "vbic.32 d23, d23, d4 \n" /* A */ \ "vorr d0, d20, d21 \n" /* BG */ \ "vorr d1, d22, d23 \n" /* RA */ \ "vzip.u8 d0, d1 \n" /* BGRA */ void I422ToARGB4444Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "vmov.u8 d4, #0x0f \n" // vbic bits to clear "1: \n" READYUV422 YUVTORGB "subs %4, %4, #8 \n" "vmov.u8 d23, #255 \n" ARGBTOARGB4444 "vst1.8 {q0}, [%3]! \n" // store 8 pixels "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb4444), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READYUV400 YUVTORGB "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB] "r"(&kYuvI601Constants.kUVToRB), [kUVToG] "r"(&kYuvI601Constants.kUVToG), [kUVBiasBGR] "r"(&kYuvI601Constants.kUVBiasBGR), [kYToRgb] "r"(&kYuvI601Constants.kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d23, #255 \n" "1: \n" "vld1.8 {d20}, [%0]! \n" "vmov d21, d20 \n" "vmov d22, d20 \n" "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "d20", "d21", "d22", "d23"); } void NV12ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READNV12 YUVTORGB "subs %3, %3, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%2]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void NV21ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READNV21 YUVTORGB "subs %3, %3, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%2]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_vu), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void NV12ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READNV12 YUVTORGB "subs %3, %3, #8 \n" "vst3.8 {d20, d21, d22}, [%2]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_rgb24), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void NV21ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READNV21 YUVTORGB "subs %3, %3, #8 \n" "vst3.8 {d20, d21, d22}, [%2]! \n" "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_vu), // %1 "+r"(dst_rgb24), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void NV12ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READNV12 YUVTORGB "subs %3, %3, #8 \n" ARGBTORGB565 "vst1.8 {q0}, [%2]! \n" // store 8 pixels RGB565. "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_rgb565), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READYUY2 YUVTORGB "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile(YUVTORGB_SETUP "vmov.u8 d23, #255 \n" "1: \n" READUYVY YUVTORGB "subs %2, %2, #8 \n" "vst4.8 {d20, d21, d22, d23}, [%1]! \n" "bgt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. void SplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 16 pairs of UV "subs %3, %3, #16 \n" // 16 processed per loop "vst1.8 {q0}, [%1]! \n" // store U "vst1.8 {q1}, [%2]! \n" // store V "bgt 1b \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List ); } // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { asm volatile( "1: \n" "vld1.8 {q0}, [%0]! \n" // load U "vld1.8 {q1}, [%1]! \n" // load V "subs %3, %3, #16 \n" // 16 processed per loop "vst2.8 {q0, q1}, [%2]! \n" // store 16 pairs of UV "bgt 1b \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List ); } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. void SplitRGBRow_NEON(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) { asm volatile( "1: \n" "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB "vld3.8 {d1, d3, d5}, [%0]! \n" // next 8 RGB "subs %4, %4, #16 \n" // 16 processed per loop "vst1.8 {q0}, [%1]! \n" // store R "vst1.8 {q1}, [%2]! \n" // store G "vst1.8 {q2}, [%3]! \n" // store B "bgt 1b \n" : "+r"(src_rgb), // %0 "+r"(dst_r), // %1 "+r"(dst_g), // %2 "+r"(dst_b), // %3 "+r"(width) // %4 : // Input registers : "cc", "memory", "d0", "d1", "d2" // Clobber List ); } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time void MergeRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) { asm volatile( "1: \n" "vld1.8 {q0}, [%0]! \n" // load R "vld1.8 {q1}, [%1]! \n" // load G "vld1.8 {q2}, [%2]! \n" // load B "subs %4, %4, #16 \n" // 16 processed per loop "vst3.8 {d0, d2, d4}, [%3]! \n" // store 8 RGB "vst3.8 {d1, d3, d5}, [%3]! \n" // next 8 RGB "bgt 1b \n" : "+r"(src_r), // %0 "+r"(src_g), // %1 "+r"(src_b), // %2 "+r"(dst_rgb), // %3 "+r"(width) // %4 : // Input registers : "cc", "memory", "q0", "q1", "q2" // Clobber List ); } // Copy multiple of 32. vld4.8 allow unaligned and is fastest on a15. void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "1: \n" "vld1.8 {d0, d1, d2, d3}, [%0]! \n" // load 32 "subs %2, %2, #32 \n" // 32 processed per loop "vst1.8 {d0, d1, d2, d3}, [%1]! \n" // store 32 "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "q0", "q1" // Clobber List ); } // SetRow writes 'width' bytes using an 8 bit value repeated. void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { asm volatile( "vdup.8 q0, %2 \n" // duplicate 16 bytes "1: \n" "subs %1, %1, #16 \n" // 16 bytes per loop "vst1.8 {q0}, [%0]! \n" // store "bgt 1b \n" : "+r"(dst), // %0 "+r"(width) // %1 : "r"(v8) // %2 : "cc", "memory", "q0"); } // ARGBSetRow writes 'width' pixels using an 32 bit value repeated. void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { asm volatile( "vdup.u32 q0, %2 \n" // duplicate 4 ints "1: \n" "subs %1, %1, #4 \n" // 4 pixels per loop "vst1.8 {q0}, [%0]! \n" // store "bgt 1b \n" : "+r"(dst), // %0 "+r"(width) // %1 : "r"(v32) // %2 : "cc", "memory", "q0"); } void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "mov r3, #-16 \n" "add %0, %0, %2 \n" "sub %0, #16 \n" "1: \n" "vld1.8 {q0}, [%0], r3 \n" // src -= 16 "subs %2, #16 \n" // 16 pixels per loop. "vrev64.8 q0, q0 \n" "vst1.8 {d1}, [%1]! \n" // dst += 16 "vst1.8 {d0}, [%1]! \n" "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "cc", "memory", "r3", "q0"); } void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( // Start at end of source row. "mov r12, #-16 \n" "add %0, %0, %3, lsl #1 \n" "sub %0, #16 \n" "1: \n" "vld2.8 {d0, d1}, [%0], r12 \n" // src -= 16 "subs %3, #8 \n" // 8 pixels per loop. "vrev64.8 q0, q0 \n" "vst1.8 {d0}, [%1]! \n" // dst += 8 "vst1.8 {d1}, [%2]! \n" "bgt 1b \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "r12", "q0"); } void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "mov r3, #-16 \n" "add %0, %0, %2, lsl #2 \n" "sub %0, #16 \n" "1: \n" "vld1.8 {q0}, [%0], r3 \n" // src -= 16 "subs %2, #4 \n" // 4 pixels per loop. "vrev64.32 q0, q0 \n" "vst1.8 {d1}, [%1]! \n" // dst += 16 "vst1.8 {d0}, [%1]! \n" "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "cc", "memory", "r3", "q0"); } void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d4, #255 \n" // Alpha "1: \n" "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RGB24. "subs %2, %2, #8 \n" // 8 processed per loop. "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_rgb24), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List ); } void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d4, #255 \n" // Alpha "1: \n" "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. "subs %2, %2, #8 \n" // 8 processed per loop. "vswp.u8 d1, d3 \n" // swap R, B "vst4.8 {d1, d2, d3, d4}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List ); } void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "vld3.8 {d1, d2, d3}, [%0]! \n" // load 8 pixels of RAW. "subs %2, %2, #8 \n" // 8 processed per loop. "vswp.u8 d1, d3 \n" // swap R, B "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of // RGB24. "bgt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3" // Clobber List ); } #define RGB565TOARGB \ "vshrn.u16 d6, q0, #5 \n" /* G xxGGGGGG */ \ "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB RRRRRxxx */ \ "vshl.u8 d6, d6, #2 \n" /* G GGGGGG00 upper 6 */ \ "vshr.u8 d1, d1, #3 \n" /* R 000RRRRR lower 5 */ \ "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ "vorr.u8 d0, d0, d4 \n" /* B */ \ "vshr.u8 d4, d6, #6 \n" /* G 000000GG lower 2 */ \ "vorr.u8 d2, d1, d5 \n" /* R */ \ "vorr.u8 d1, d4, d6 \n" /* G */ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. RGB565TOARGB "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_rgb565), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List ); } #define ARGB1555TOARGB \ "vshrn.u16 d7, q0, #8 \n" /* A Arrrrrxx */ \ "vshr.u8 d6, d7, #2 \n" /* R xxxRRRRR */ \ "vshrn.u16 d5, q0, #5 \n" /* G xxxGGGGG */ \ "vmovn.u16 d4, q0 \n" /* B xxxBBBBB */ \ "vshr.u8 d7, d7, #7 \n" /* A 0000000A */ \ "vneg.s8 d7, d7 \n" /* A AAAAAAAA upper 8 */ \ "vshl.u8 d6, d6, #3 \n" /* R RRRRR000 upper 5 */ \ "vshr.u8 q1, q3, #5 \n" /* R,A 00000RRR lower 3 */ \ "vshl.u8 q0, q2, #3 \n" /* B,G BBBBB000 upper 5 */ \ "vshr.u8 q2, q0, #5 \n" /* B,G 00000BBB lower 3 */ \ "vorr.u8 q1, q1, q3 \n" /* R,A */ \ "vorr.u8 q0, q0, q2 \n" /* B,G */ // RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. #define RGB555TOARGB \ "vshrn.u16 d6, q0, #5 \n" /* G xxxGGGGG */ \ "vuzp.u8 d0, d1 \n" /* d0 xxxBBBBB xRRRRRxx */ \ "vshl.u8 d6, d6, #3 \n" /* G GGGGG000 upper 5 */ \ "vshr.u8 d1, d1, #2 \n" /* R 00xRRRRR lower 5 */ \ "vshl.u8 q0, q0, #3 \n" /* B,R BBBBB000 upper 5 */ \ "vshr.u8 q2, q0, #5 \n" /* B,R 00000BBB lower 3 */ \ "vorr.u8 d0, d0, d4 \n" /* B */ \ "vshr.u8 d4, d6, #5 \n" /* G 00000GGG lower 3 */ \ "vorr.u8 d2, d1, d5 \n" /* R */ \ "vorr.u8 d1, d4, d6 \n" /* G */ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. ARGB1555TOARGB "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb1555), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List ); } #define ARGB4444TOARGB \ "vuzp.u8 d0, d1 \n" /* d0 BG, d1 RA */ \ "vshl.u8 q2, q0, #4 \n" /* B,R BBBB0000 */ \ "vshr.u8 q1, q0, #4 \n" /* G,A 0000GGGG */ \ "vshr.u8 q0, q2, #4 \n" /* B,R 0000BBBB */ \ "vorr.u8 q0, q0, q2 \n" /* B,R BBBBBBBB */ \ "vshl.u8 q2, q1, #4 \n" /* G,A GGGG0000 */ \ "vorr.u8 q1, q1, q2 \n" /* G,A GGGGGGGG */ \ "vswp.u8 d1, d2 \n" /* B,R,G,A -> B,G,R,A */ void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // Alpha "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. ARGB4444TOARGB "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb4444), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2" // Clobber List ); } void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of // RGB24. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List ); } void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { asm volatile( "1: \n" "vld4.8 {d1, d2, d3, d4}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. "vswp.u8 d1, d3 \n" // swap R, B "vst3.8 {d1, d2, d3}, [%1]! \n" // store 8 pixels of RAW. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_raw), // %1 "+r"(width) // %2 : : "cc", "memory", "d1", "d2", "d3", "d4" // Clobber List ); } void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of YUY2. "subs %2, %2, #16 \n" // 16 processed per loop. "vst1.8 {q0}, [%1]! \n" // store 16 pixels of Y. "bgt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1" // Clobber List ); } void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 16 pixels of UYVY. "subs %2, %2, #16 \n" // 16 processed per loop. "vst1.8 {q1}, [%1]! \n" // store 16 pixels of Y. "bgt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1" // Clobber List ); } void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. "vst1.8 {d1}, [%1]! \n" // store 8 U. "vst1.8 {d3}, [%2]! \n" // store 8 V. "bgt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List ); } void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. "subs %3, %3, #16 \n" // 16 pixels = 8 UVs. "vst1.8 {d0}, [%1]! \n" // store 8 U. "vst1.8 {d2}, [%2]! \n" // store 8 V. "bgt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "d0", "d1", "d2", "d3" // Clobber List ); } void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // stride + src_yuy2 "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of YUY2. "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row YUY2. "vrhadd.u8 d1, d1, d5 \n" // average rows of U "vrhadd.u8 d3, d3, d7 \n" // average rows of V "vst1.8 {d1}, [%2]! \n" // store 8 U. "vst1.8 {d3}, [%3]! \n" // store 8 V. "bgt 1b \n" : "+r"(src_yuy2), // %0 "+r"(stride_yuy2), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List ); } void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // stride + src_uyvy "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 16 pixels of UYVY. "subs %4, %4, #16 \n" // 16 pixels = 8 UVs. "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load next row UYVY. "vrhadd.u8 d0, d0, d4 \n" // average rows of U "vrhadd.u8 d2, d2, d6 \n" // average rows of V "vst1.8 {d0}, [%2]! \n" // store 8 U. "vst1.8 {d2}, [%3]! \n" // store 8 V. "bgt 1b \n" : "+r"(src_uyvy), // %0 "+r"(stride_uyvy), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7" // Clobber List ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { asm volatile( "vld1.8 {q2}, [%3] \n" // shuffler "1: \n" "vld1.8 {q0}, [%0]! \n" // load 4 pixels. "subs %2, %2, #4 \n" // 4 processed per loop "vtbl.8 d2, {d0, d1}, d4 \n" // look up 2 first pixels "vtbl.8 d3, {d0, d1}, d5 \n" // look up 2 next pixels "vst1.8 {q1}, [%1]! \n" // store 4. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(shuffler) // %3 : "cc", "memory", "q0", "q1", "q2" // Clobber List ); } void I422ToYUY2Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { asm volatile( "1: \n" "vld2.8 {d0, d2}, [%0]! \n" // load 16 Ys "vld1.8 {d1}, [%1]! \n" // load 8 Us "vld1.8 {d3}, [%2]! \n" // load 8 Vs "subs %4, %4, #16 \n" // 16 pixels "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 YUY2/16 pixels. "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_yuy2), // %3 "+r"(width) // %4 : : "cc", "memory", "d0", "d1", "d2", "d3"); } void I422ToUYVYRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { asm volatile( "1: \n" "vld2.8 {d1, d3}, [%0]! \n" // load 16 Ys "vld1.8 {d0}, [%1]! \n" // load 8 Us "vld1.8 {d2}, [%2]! \n" // load 8 Vs "subs %4, %4, #16 \n" // 16 pixels "vst4.8 {d0, d1, d2, d3}, [%3]! \n" // Store 8 UYVY/16 pixels. "bgt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_uyvy), // %3 "+r"(width) // %4 : : "cc", "memory", "d0", "d1", "d2", "d3"); } void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) { asm volatile( "1: \n" "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. ARGBTORGB565 "vst1.8 {q0}, [%1]! \n" // store 8 pixels RGB565. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_rgb565), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) { asm volatile( "vdup.32 d2, %2 \n" // dither4 "1: \n" "vld4.8 {d20, d21, d22, d23}, [%1]! \n" // load 8 pixels of ARGB. "subs %3, %3, #8 \n" // 8 processed per loop. "vqadd.u8 d20, d20, d2 \n" "vqadd.u8 d21, d21, d2 \n" "vqadd.u8 d22, d22, d2 \n" // add for dither ARGBTORGB565 "vst1.8 {q0}, [%0]! \n" // store 8 RGB565. "bgt 1b \n" : "+r"(dst_rgb) // %0 : "r"(src_argb), // %1 "r"(dither4), // %2 "r"(width) // %3 : "cc", "memory", "q0", "q1", "q8", "q9", "q10", "q11"); } void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb1555, int width) { asm volatile( "1: \n" "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. ARGBTOARGB1555 "vst1.8 {q0}, [%1]! \n" // store 8 ARGB1555. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb1555), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb4444, int width) { asm volatile( "vmov.u8 d4, #0x0f \n" // bits to clear with // vbic. "1: \n" "vld4.8 {d20, d21, d22, d23}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. ARGBTOARGB4444 "vst1.8 {q0}, [%1]! \n" // store 8 ARGB4444. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb4444), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q8", "q9", "q10", "q11"); } void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient "vmov.u8 d27, #16 \n" // Add 16 constant "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d27 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) { asm volatile( "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels "subs %2, %2, #16 \n" // 16 processed per loop "vst1.8 {q3}, [%1]! \n" // store 16 A's. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3" // Clobber List ); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit Y "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } // 8x1 pixels. void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "vmov.u8 d24, #112 \n" // UB / VR 0.875 // coefficient "vmov.u8 d25, #74 \n" // UG -0.5781 coefficient "vmov.u8 d26, #38 \n" // UR -0.2969 coefficient "vmov.u8 d27, #18 \n" // VB -0.1406 coefficient "vmov.u8 d28, #94 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %3, %3, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B "vmlsl.u8 q2, d1, d25 \n" // G "vmlsl.u8 q2, d2, d26 \n" // R "vadd.u16 q2, q2, q15 \n" // +128 -> unsigned "vmull.u8 q3, d2, d24 \n" // R "vmlsl.u8 q3, d1, d28 \n" // G "vmlsl.u8 q3, d0, d27 \n" // B "vadd.u16 q3, q3, q15 \n" // +128 -> unsigned "vqshrn.u16 d0, q2, #8 \n" // 16 bit to 8 bit U "vqshrn.u16 d1, q3, #8 \n" // 16 bit to 8 bit V "vst1.8 {d0}, [%1]! \n" // store 8 pixels U. "vst1.8 {d1}, [%2]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q12", "q13", "q14", "q15"); } // clang-format off // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. #define RGBTOUV(QB, QG, QR) \ "vmul.s16 q8, " #QB ", q10 \n" /* B */ \ "vmls.s16 q8, " #QG ", q11 \n" /* G */ \ "vmls.s16 q8, " #QR ", q12 \n" /* R */ \ "vadd.u16 q8, q8, q15 \n" /* +128 -> unsigned */ \ "vmul.s16 q9, " #QR ", q10 \n" /* R */ \ "vmls.s16 q9, " #QG ", q14 \n" /* G */ \ "vmls.s16 q9, " #QB ", q13 \n" /* B */ \ "vadd.u16 q9, q9, q15 \n" /* +128 -> unsigned */ \ "vqshrn.u16 d0, q8, #8 \n" /* 16 bit to 8 bit U */ \ "vqshrn.u16 d1, q9, #8 \n" /* 16 bit to 8 bit V */ // clang-format on // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(src_stride_argb), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } // TODO(fbarchard): Subsample match C code. void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #127 / 2 \n" // UB / VR 0.500 coefficient "vmov.s16 q11, #84 / 2 \n" // UG -0.33126 coefficient "vmov.s16 q12, #43 / 2 \n" // UR -0.16874 coefficient "vmov.s16 q13, #20 / 2 \n" // VB -0.08131 coefficient "vmov.s16 q14, #107 / 2 \n" // VG -0.41869 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ARGB pixels. "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ARGB pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(src_stride_argb), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_bgra "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 BGRA pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 BGRA pixels. "vpaddl.u8 q3, q3 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // R 16 bytes -> 8 shorts. "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more BGRA pixels. "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 BGRA pixels. "vpadal.u8 q3, q7 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q2, q6 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q1, q1, #1 \n" // 2x average "vrshr.u16 q2, q2, #1 \n" "vrshr.u16 q3, q3, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q3, q2, q1) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_bgra), // %0 "+r"(src_stride_bgra), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_abgr "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ABGR pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ABGR pixels. "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more ABGR pixels. "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 ABGR pixels. "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q2, q1, q0) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_abgr), // %0 "+r"(src_stride_abgr), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgba "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 RGBA pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 RGBA pixels. "vpaddl.u8 q0, q1 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q2 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q3 \n" // R 16 bytes -> 8 shorts. "vld4.8 {d8, d10, d12, d14}, [%1]! \n" // load 8 more RGBA pixels. "vld4.8 {d9, d11, d13, d15}, [%1]! \n" // load last 8 RGBA pixels. "vpadal.u8 q0, q5 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q6 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q2, q7 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_rgba), // %0 "+r"(src_stride_rgba), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_rgb24 "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RGB24 pixels. "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RGB24 pixels. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RGB24 pixels. "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RGB24 pixels. "vpadal.u8 q0, q4 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q2, q6 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q0, q1, q2) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_rgb24), // %0 "+r"(src_stride_rgb24), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile ( "add %1, %0, %1 \n" // src_stride + src_raw "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld3.8 {d0, d2, d4}, [%0]! \n" // load 8 RAW pixels. "vld3.8 {d1, d3, d5}, [%0]! \n" // load next 8 RAW pixels. "vpaddl.u8 q2, q2 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q0, q0 \n" // R 16 bytes -> 8 shorts. "vld3.8 {d8, d10, d12}, [%1]! \n" // load 8 more RAW pixels. "vld3.8 {d9, d11, d13}, [%1]! \n" // load last 8 RAW pixels. "vpadal.u8 q2, q6 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q5 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q0, q4 \n" // R 16 bytes -> 8 shorts. "vrshr.u16 q0, q0, #1 \n" // 2x average "vrshr.u16 q1, q1, #1 \n" "vrshr.u16 q2, q2, #1 \n" "subs %4, %4, #16 \n" // 32 processed per loop. RGBTOUV(q2, q1, q0) "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_raw), // %0 "+r"(src_stride_raw), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 // coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. RGB565TOARGB "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%0]! \n" // next 8 RGB565 pixels. RGB565TOARGB "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // load 8 RGB565 pixels. RGB565TOARGB "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // next 8 RGB565 pixels. RGB565TOARGB "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vrshr.u16 q4, q4, #1 \n" // 2x average "vrshr.u16 q5, q5, #1 \n" "vrshr.u16 q6, q6, #1 \n" "subs %4, %4, #16 \n" // 16 processed per loop. "vmul.s16 q8, q4, q10 \n" // B "vmls.s16 q8, q5, q11 \n" // G "vmls.s16 q8, q6, q12 \n" // R "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned "vmul.s16 q9, q6, q10 \n" // R "vmls.s16 q9, q5, q14 \n" // G "vmls.s16 q9, q4, q13 \n" // B "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_rgb565), // %0 "+r"(src_stride_rgb565), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 // coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. RGB555TOARGB "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%0]! \n" // next 8 ARGB1555 pixels. RGB555TOARGB "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // load 8 ARGB1555 pixels. RGB555TOARGB "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // next 8 ARGB1555 pixels. RGB555TOARGB "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vrshr.u16 q4, q4, #1 \n" // 2x average "vrshr.u16 q5, q5, #1 \n" "vrshr.u16 q6, q6, #1 \n" "subs %4, %4, #16 \n" // 16 processed per loop. "vmul.s16 q8, q4, q10 \n" // B "vmls.s16 q8, q5, q11 \n" // G "vmls.s16 q8, q6, q12 \n" // R "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned "vmul.s16 q9, q6, q10 \n" // R "vmls.s16 q9, q5, q14 \n" // G "vmls.s16 q9, q4, q13 \n" // B "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb1555), // %0 "+r"(src_stride_argb1555), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "add %1, %0, %1 \n" // src_stride + src_argb "vmov.s16 q10, #112 / 2 \n" // UB / VR 0.875 // coefficient "vmov.s16 q11, #74 / 2 \n" // UG -0.5781 coefficient "vmov.s16 q12, #38 / 2 \n" // UR -0.2969 coefficient "vmov.s16 q13, #18 / 2 \n" // VB -0.1406 coefficient "vmov.s16 q14, #94 / 2 \n" // VG -0.7344 coefficient "vmov.u16 q15, #0x8080 \n" // 128.5 "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. ARGB4444TOARGB "vpaddl.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%0]! \n" // next 8 ARGB4444 pixels. ARGB4444TOARGB "vpaddl.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpaddl.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpaddl.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // load 8 ARGB4444 pixels. ARGB4444TOARGB "vpadal.u8 d8, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d10, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d12, d2 \n" // R 8 bytes -> 4 shorts. "vld1.8 {q0}, [%1]! \n" // next 8 ARGB4444 pixels. ARGB4444TOARGB "vpadal.u8 d9, d0 \n" // B 8 bytes -> 4 shorts. "vpadal.u8 d11, d1 \n" // G 8 bytes -> 4 shorts. "vpadal.u8 d13, d2 \n" // R 8 bytes -> 4 shorts. "vrshr.u16 q4, q4, #1 \n" // 2x average "vrshr.u16 q5, q5, #1 \n" "vrshr.u16 q6, q6, #1 \n" "subs %4, %4, #16 \n" // 16 processed per loop. "vmul.s16 q8, q4, q10 \n" // B "vmls.s16 q8, q5, q11 \n" // G "vmls.s16 q8, q6, q12 \n" // R "vadd.u16 q8, q8, q15 \n" // +128 -> unsigned "vmul.s16 q9, q6, q10 \n" // R "vmls.s16 q9, q5, q14 \n" // G "vmls.s16 q9, q4, q13 \n" // B "vadd.u16 q9, q9, q15 \n" // +128 -> unsigned "vqshrn.u16 d0, q8, #8 \n" // 16 bit to 8 bit U "vqshrn.u16 d1, q9, #8 \n" // 16 bit to 8 bit V "vst1.8 {d0}, [%2]! \n" // store 8 pixels U. "vst1.8 {d1}, [%3]! \n" // store 8 pixels V. "bgt 1b \n" : "+r"(src_argb4444), // %0 "+r"(src_stride_argb4444), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient "vmov.u8 d27, #16 \n" // Add 16 constant "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 RGB565 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. RGB565TOARGB "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d27 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_rgb565), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient "vmov.u8 d27, #16 \n" // Add 16 constant "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB1555 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. ARGB1555TOARGB "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d27 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb1555), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d24, #13 \n" // B * 0.1016 coefficient "vmov.u8 d25, #65 \n" // G * 0.5078 coefficient "vmov.u8 d26, #33 \n" // R * 0.2578 coefficient "vmov.u8 d27, #16 \n" // Add 16 constant "1: \n" "vld1.8 {q0}, [%0]! \n" // load 8 ARGB4444 pixels. "subs %2, %2, #8 \n" // 8 processed per loop. ARGB4444TOARGB "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d27 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_argb4444), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q3", "q12", "q13"); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient "vmov.u8 d7, #16 \n" // Add 16 constant "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of BGRA. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d1, d4 \n" // R "vmlal.u8 q8, d2, d5 \n" // G "vmlal.u8 q8, d3, d6 \n" // B "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_bgra), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient "vmov.u8 d7, #16 \n" // Add 16 constant "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ABGR. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d0, d4 \n" // R "vmlal.u8 q8, d1, d5 \n" // G "vmlal.u8 q8, d2, d6 \n" // B "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_abgr), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient "vmov.u8 d7, #16 \n" // Add 16 constant "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of RGBA. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d1, d4 \n" // B "vmlal.u8 q8, d2, d5 \n" // G "vmlal.u8 q8, d3, d6 \n" // R "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_rgba), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #13 \n" // B * 0.1016 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient "vmov.u8 d6, #33 \n" // R * 0.2578 coefficient "vmov.u8 d7, #16 \n" // Add 16 constant "1: \n" "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RGB24. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d0, d4 \n" // B "vmlal.u8 q8, d1, d5 \n" // G "vmlal.u8 q8, d2, d6 \n" // R "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_rgb24), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { asm volatile( "vmov.u8 d4, #33 \n" // R * 0.2578 coefficient "vmov.u8 d5, #65 \n" // G * 0.5078 coefficient "vmov.u8 d6, #13 \n" // B * 0.1016 coefficient "vmov.u8 d7, #16 \n" // Add 16 constant "1: \n" "vld3.8 {d0, d1, d2}, [%0]! \n" // load 8 pixels of RAW. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q8, d0, d4 \n" // B "vmlal.u8 q8, d1, d5 \n" // G "vmlal.u8 q8, d2, d6 \n" // R "vqrshrun.s16 d0, q8, #7 \n" // 16 bit to 8 bit Y "vqadd.u8 d0, d7 \n" "vst1.8 {d0}, [%1]! \n" // store 8 pixels Y. "bgt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "q8"); } // Bilinear filter 16x2 -> 16x1 void InterpolateRow_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { int y1_fraction = source_y_fraction; asm volatile( "cmp %4, #0 \n" "beq 100f \n" "add %2, %1 \n" "cmp %4, #128 \n" "beq 50f \n" "vdup.8 d5, %4 \n" "rsb %4, #256 \n" "vdup.8 d4, %4 \n" // General purpose row blend. "1: \n" "vld1.8 {q0}, [%1]! \n" "vld1.8 {q1}, [%2]! \n" "subs %3, %3, #16 \n" "vmull.u8 q13, d0, d4 \n" "vmull.u8 q14, d1, d4 \n" "vmlal.u8 q13, d2, d5 \n" "vmlal.u8 q14, d3, d5 \n" "vrshrn.u16 d0, q13, #8 \n" "vrshrn.u16 d1, q14, #8 \n" "vst1.8 {q0}, [%0]! \n" "bgt 1b \n" "b 99f \n" // Blend 50 / 50. "50: \n" "vld1.8 {q0}, [%1]! \n" "vld1.8 {q1}, [%2]! \n" "subs %3, %3, #16 \n" "vrhadd.u8 q0, q1 \n" "vst1.8 {q0}, [%0]! \n" "bgt 50b \n" "b 99f \n" // Blend 100 / 0 - Copy row unchanged. "100: \n" "vld1.8 {q0}, [%1]! \n" "subs %3, %3, #16 \n" "vst1.8 {q0}, [%0]! \n" "bgt 100b \n" "99: \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(src_stride), // %2 "+r"(dst_width), // %3 "+r"(y1_fraction) // %4 : : "cc", "memory", "q0", "q1", "d4", "d5", "q13", "q14"); } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( "subs %3, #8 \n" "blt 89f \n" // Blend 8 pixels. "8: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB0. "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 pixels of ARGB1. "subs %3, %3, #8 \n" // 8 processed per loop. "vmull.u8 q10, d4, d3 \n" // db * a "vmull.u8 q11, d5, d3 \n" // dg * a "vmull.u8 q12, d6, d3 \n" // dr * a "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 "vqadd.u8 q0, q0, q2 \n" // + sbg "vqadd.u8 d2, d2, d6 \n" // + sr "vmov.u8 d3, #255 \n" // a = 255 "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 pixels of ARGB. "bge 8b \n" "89: \n" "adds %3, #8-1 \n" "blt 99f \n" // Blend 1 pixels. "1: \n" "vld4.8 {d0[0],d1[0],d2[0],d3[0]}, [%0]! \n" // load 1 pixel ARGB0. "vld4.8 {d4[0],d5[0],d6[0],d7[0]}, [%1]! \n" // load 1 pixel ARGB1. "subs %3, %3, #1 \n" // 1 processed per loop. "vmull.u8 q10, d4, d3 \n" // db * a "vmull.u8 q11, d5, d3 \n" // dg * a "vmull.u8 q12, d6, d3 \n" // dr * a "vqrshrn.u16 d20, q10, #8 \n" // db >>= 8 "vqrshrn.u16 d21, q11, #8 \n" // dg >>= 8 "vqrshrn.u16 d22, q12, #8 \n" // dr >>= 8 "vqsub.u8 q2, q2, q10 \n" // dbg - dbg * a / 256 "vqsub.u8 d6, d6, d22 \n" // dr - dr * a / 256 "vqadd.u8 q0, q0, q2 \n" // + sbg "vqadd.u8 d2, d2, d6 \n" // + sr "vmov.u8 d3, #255 \n" // a = 255 "vst4.8 {d0[0],d1[0],d2[0],d3[0]}, [%2]! \n" // store 1 pixel. "bge 1b \n" "99: \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12"); } // Attenuate 8 pixels at a time. void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( // Attenuate 8 pixels. "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q10, d0, d3 \n" // b * a "vmull.u8 q11, d1, d3 \n" // g * a "vmull.u8 q12, d2, d3 \n" // r * a "vqrshrn.u16 d0, q10, #8 \n" // b >>= 8 "vqrshrn.u16 d1, q11, #8 \n" // g >>= 8 "vqrshrn.u16 d2, q12, #8 \n" // r >>= 8 "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q10", "q11", "q12"); } // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { asm volatile( "vdup.u16 q8, %2 \n" "vshr.u16 q8, q8, #1 \n" // scale >>= 1 "vdup.u16 q9, %3 \n" // interval multiply. "vdup.u16 q10, %4 \n" // interval add // 8 pixel loop. "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0] \n" // load 8 pixels of ARGB. "subs %1, %1, #8 \n" // 8 processed per loop. "vmovl.u8 q0, d0 \n" // b (0 .. 255) "vmovl.u8 q1, d2 \n" "vmovl.u8 q2, d4 \n" "vqdmulh.s16 q0, q0, q8 \n" // b * scale "vqdmulh.s16 q1, q1, q8 \n" // g "vqdmulh.s16 q2, q2, q8 \n" // r "vmul.u16 q0, q0, q9 \n" // b * interval_size "vmul.u16 q1, q1, q9 \n" // g "vmul.u16 q2, q2, q9 \n" // r "vadd.u16 q0, q0, q10 \n" // b + interval_offset "vadd.u16 q1, q1, q10 \n" // g "vadd.u16 q2, q2, q10 \n" // r "vqmovn.u16 d0, q0 \n" "vqmovn.u16 d2, q1 \n" "vqmovn.u16 d4, q2 \n" "vst4.8 {d0, d2, d4, d6}, [%0]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : "r"(scale), // %2 "r"(interval_size), // %3 "r"(interval_offset) // %4 : "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10"); } // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. void ARGBShadeRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { asm volatile( "vdup.u32 q0, %3 \n" // duplicate scale value. "vzip.u8 d0, d1 \n" // d0 aarrggbb. "vshr.u16 q0, q0, #1 \n" // scale / 2. // 8 pixel loop. "1: \n" "vld4.8 {d20, d22, d24, d26}, [%0]! \n" // load 8 pixels of ARGB. "subs %2, %2, #8 \n" // 8 processed per loop. "vmovl.u8 q10, d20 \n" // b (0 .. 255) "vmovl.u8 q11, d22 \n" "vmovl.u8 q12, d24 \n" "vmovl.u8 q13, d26 \n" "vqrdmulh.s16 q10, q10, d0[0] \n" // b * scale * 2 "vqrdmulh.s16 q11, q11, d0[1] \n" // g "vqrdmulh.s16 q12, q12, d0[2] \n" // r "vqrdmulh.s16 q13, q13, d0[3] \n" // a "vqmovn.u16 d20, q10 \n" "vqmovn.u16 d22, q11 \n" "vqmovn.u16 d24, q12 \n" "vqmovn.u16 d26, q13 \n" "vst4.8 {d20, d22, d24, d26}, [%1]! \n" // store 8 pixels of ARGB. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(value) // %3 : "cc", "memory", "q0", "q10", "q11", "q12", "q13"); } // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d24, #15 \n" // B * 0.11400 coefficient "vmov.u8 d25, #75 \n" // G * 0.58700 coefficient "vmov.u8 d26, #38 \n" // R * 0.29900 coefficient "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d24 \n" // B "vmlal.u8 q2, d1, d25 \n" // G "vmlal.u8 q2, d2, d26 \n" // R "vqrshrun.s16 d0, q2, #7 \n" // 15 bit to 8 bit B "vmov d1, d0 \n" // G "vmov d2, d0 \n" // R "vst4.8 {d0, d1, d2, d3}, [%1]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "q0", "q1", "q2", "q12", "q13"); } // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d20, #17 \n" // BB coefficient "vmov.u8 d21, #68 \n" // BG coefficient "vmov.u8 d22, #35 \n" // BR coefficient "vmov.u8 d24, #22 \n" // GB coefficient "vmov.u8 d25, #88 \n" // GG coefficient "vmov.u8 d26, #45 \n" // GR coefficient "vmov.u8 d28, #24 \n" // BB coefficient "vmov.u8 d29, #98 \n" // BG coefficient "vmov.u8 d30, #50 \n" // BR coefficient "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0] \n" // load 8 ARGB pixels. "subs %1, %1, #8 \n" // 8 processed per loop. "vmull.u8 q2, d0, d20 \n" // B to Sepia B "vmlal.u8 q2, d1, d21 \n" // G "vmlal.u8 q2, d2, d22 \n" // R "vmull.u8 q3, d0, d24 \n" // B to Sepia G "vmlal.u8 q3, d1, d25 \n" // G "vmlal.u8 q3, d2, d26 \n" // R "vmull.u8 q8, d0, d28 \n" // B to Sepia R "vmlal.u8 q8, d1, d29 \n" // G "vmlal.u8 q8, d2, d30 \n" // R "vqshrn.u16 d0, q2, #7 \n" // 16 bit to 8 bit B "vqshrn.u16 d1, q3, #7 \n" // 16 bit to 8 bit G "vqshrn.u16 d2, q8, #7 \n" // 16 bit to 8 bit R "vst4.8 {d0, d1, d2, d3}, [%0]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : : "cc", "memory", "q0", "q1", "q2", "q3", "q10", "q11", "q12", "q13", "q14", "q15"); } // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { asm volatile( "vld1.8 {q2}, [%3] \n" // load 3 ARGB vectors. "vmovl.s8 q0, d4 \n" // B,G coefficients s16. "vmovl.s8 q1, d5 \n" // R,A coefficients s16. "1: \n" "vld4.8 {d16, d18, d20, d22}, [%0]! \n" // load 8 ARGB pixels. "subs %2, %2, #8 \n" // 8 processed per loop. "vmovl.u8 q8, d16 \n" // b (0 .. 255) 16 bit "vmovl.u8 q9, d18 \n" // g "vmovl.u8 q10, d20 \n" // r "vmovl.u8 q11, d22 \n" // a "vmul.s16 q12, q8, d0[0] \n" // B = B * Matrix B "vmul.s16 q13, q8, d1[0] \n" // G = B * Matrix G "vmul.s16 q14, q8, d2[0] \n" // R = B * Matrix R "vmul.s16 q15, q8, d3[0] \n" // A = B * Matrix A "vmul.s16 q4, q9, d0[1] \n" // B += G * Matrix B "vmul.s16 q5, q9, d1[1] \n" // G += G * Matrix G "vmul.s16 q6, q9, d2[1] \n" // R += G * Matrix R "vmul.s16 q7, q9, d3[1] \n" // A += G * Matrix A "vqadd.s16 q12, q12, q4 \n" // Accumulate B "vqadd.s16 q13, q13, q5 \n" // Accumulate G "vqadd.s16 q14, q14, q6 \n" // Accumulate R "vqadd.s16 q15, q15, q7 \n" // Accumulate A "vmul.s16 q4, q10, d0[2] \n" // B += R * Matrix B "vmul.s16 q5, q10, d1[2] \n" // G += R * Matrix G "vmul.s16 q6, q10, d2[2] \n" // R += R * Matrix R "vmul.s16 q7, q10, d3[2] \n" // A += R * Matrix A "vqadd.s16 q12, q12, q4 \n" // Accumulate B "vqadd.s16 q13, q13, q5 \n" // Accumulate G "vqadd.s16 q14, q14, q6 \n" // Accumulate R "vqadd.s16 q15, q15, q7 \n" // Accumulate A "vmul.s16 q4, q11, d0[3] \n" // B += A * Matrix B "vmul.s16 q5, q11, d1[3] \n" // G += A * Matrix G "vmul.s16 q6, q11, d2[3] \n" // R += A * Matrix R "vmul.s16 q7, q11, d3[3] \n" // A += A * Matrix A "vqadd.s16 q12, q12, q4 \n" // Accumulate B "vqadd.s16 q13, q13, q5 \n" // Accumulate G "vqadd.s16 q14, q14, q6 \n" // Accumulate R "vqadd.s16 q15, q15, q7 \n" // Accumulate A "vqshrun.s16 d16, q12, #6 \n" // 16 bit to 8 bit B "vqshrun.s16 d18, q13, #6 \n" // 16 bit to 8 bit G "vqshrun.s16 d20, q14, #6 \n" // 16 bit to 8 bit R "vqshrun.s16 d22, q15, #6 \n" // 16 bit to 8 bit A "vst4.8 {d16, d18, d20, d22}, [%1]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(matrix_argb) // %3 : "cc", "memory", "q0", "q1", "q2", "q4", "q5", "q6", "q7", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15"); } // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d1, d3, d5, d7}, [%1]! \n" // load 8 more ARGB "subs %3, %3, #8 \n" // 8 processed per loop. "vmull.u8 q0, d0, d1 \n" // multiply B "vmull.u8 q1, d2, d3 \n" // multiply G "vmull.u8 q2, d4, d5 \n" // multiply R "vmull.u8 q3, d6, d7 \n" // multiply A "vrshrn.u16 d0, q0, #8 \n" // 16 bit to 8 bit B "vrshrn.u16 d1, q1, #8 \n" // 16 bit to 8 bit G "vrshrn.u16 d2, q2, #8 \n" // 16 bit to 8 bit R "vrshrn.u16 d3, q3, #8 \n" // 16 bit to 8 bit A "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1", "q2", "q3"); } // Add 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBAddRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB "subs %3, %3, #8 \n" // 8 processed per loop. "vqadd.u8 q0, q0, q2 \n" // add B, G "vqadd.u8 q1, q1, q3 \n" // add R, A "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1", "q2", "q3"); } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. void ARGBSubtractRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d4, d5, d6, d7}, [%1]! \n" // load 8 more ARGB "subs %3, %3, #8 \n" // 8 processed per loop. "vqsub.u8 q0, q0, q2 \n" // subtract B, G "vqsub.u8 q1, q1, q3 \n" // subtract R, A "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1", "q2", "q3"); } // Adds Sobel X and Sobel Y and stores Sobel into ARGB. // A = 255 // R = Sobel // G = Sobel // B = Sobel void SobelRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // alpha // 8 pixel loop. "1: \n" "vld1.8 {d0}, [%0]! \n" // load 8 sobelx. "vld1.8 {d1}, [%1]! \n" // load 8 sobely. "subs %3, %3, #8 \n" // 8 processed per loop. "vqadd.u8 d0, d0, d1 \n" // add "vmov.u8 d1, d0 \n" "vmov.u8 d2, d0 \n" "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1"); } // Adds Sobel X and Sobel Y and stores Sobel into plane. void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { asm volatile( // 16 pixel loop. "1: \n" "vld1.8 {q0}, [%0]! \n" // load 16 sobelx. "vld1.8 {q1}, [%1]! \n" // load 16 sobely. "subs %3, %3, #16 \n" // 16 processed per loop. "vqadd.u8 q0, q0, q1 \n" // add "vst1.8 {q0}, [%2]! \n" // store 16 pixels. "bgt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_y), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1"); } // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 // R = Sobel X // G = Sobel // B = Sobel Y void SobelXYRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "vmov.u8 d3, #255 \n" // alpha // 8 pixel loop. "1: \n" "vld1.8 {d2}, [%0]! \n" // load 8 sobelx. "vld1.8 {d0}, [%1]! \n" // load 8 sobely. "subs %3, %3, #8 \n" // 8 processed per loop. "vqadd.u8 d1, d0, d2 \n" // add "vst4.8 {d0, d1, d2, d3}, [%2]! \n" // store 8 ARGB pixels. "bgt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "q0", "q1"); } // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 void SobelXRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width) { asm volatile( "1: \n" "vld1.8 {d0}, [%0],%5 \n" // top "vld1.8 {d1}, [%0],%6 \n" "vsubl.u8 q0, d0, d1 \n" "vld1.8 {d2}, [%1],%5 \n" // center * 2 "vld1.8 {d3}, [%1],%6 \n" "vsubl.u8 q1, d2, d3 \n" "vadd.s16 q0, q0, q1 \n" "vadd.s16 q0, q0, q1 \n" "vld1.8 {d2}, [%2],%5 \n" // bottom "vld1.8 {d3}, [%2],%6 \n" "subs %4, %4, #8 \n" // 8 pixels "vsubl.u8 q1, d2, d3 \n" "vadd.s16 q0, q0, q1 \n" "vabs.s16 q0, q0 \n" "vqmovn.u16 d0, q0 \n" "vst1.8 {d0}, [%3]! \n" // store 8 sobelx "bgt 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(src_y2), // %2 "+r"(dst_sobelx), // %3 "+r"(width) // %4 : "r"(2), // %5 "r"(6) // %6 : "cc", "memory", "q0", "q1" // Clobber List ); } // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 void SobelYRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) { asm volatile( "1: \n" "vld1.8 {d0}, [%0],%4 \n" // left "vld1.8 {d1}, [%1],%4 \n" "vsubl.u8 q0, d0, d1 \n" "vld1.8 {d2}, [%0],%4 \n" // center * 2 "vld1.8 {d3}, [%1],%4 \n" "vsubl.u8 q1, d2, d3 \n" "vadd.s16 q0, q0, q1 \n" "vadd.s16 q0, q0, q1 \n" "vld1.8 {d2}, [%0],%5 \n" // right "vld1.8 {d3}, [%1],%5 \n" "subs %3, %3, #8 \n" // 8 pixels "vsubl.u8 q1, d2, d3 \n" "vadd.s16 q0, q0, q1 \n" "vabs.s16 q0, q0 \n" "vqmovn.u16 d0, q0 \n" "vst1.8 {d0}, [%2]! \n" // store 8 sobely "bgt 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(dst_sobely), // %2 "+r"(width) // %3 : "r"(1), // %4 "r"(6) // %5 : "cc", "memory", "q0", "q1" // Clobber List ); } // %y passes a float as a scalar vector for vector * scalar multiply. // the regoster must be d0 to d15 and indexed with [0] or [1] to access // the float in the first or second float of the d-reg void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float /*unused*/, int width) { asm volatile( "1: \n" "vld1.8 {q1}, [%0]! \n" // load 8 shorts "subs %2, %2, #8 \n" // 8 pixels per loop "vmovl.u16 q2, d2 \n" // 8 int's "vmovl.u16 q3, d3 \n" "vcvt.f32.u32 q2, q2 \n" // 8 floats "vcvt.f32.u32 q3, q3 \n" "vmul.f32 q2, q2, %y3 \n" // adjust exponent "vmul.f32 q3, q3, %y3 \n" "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat "vqshrn.u32 d3, q3, #13 \n" "vst1.8 {q1}, [%1]! \n" "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(1.9259299444e-34f) // %3 : "cc", "memory", "q1", "q2", "q3"); } void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) { asm volatile( "1: \n" "vld1.8 {q1}, [%0]! \n" // load 8 shorts "subs %2, %2, #8 \n" // 8 pixels per loop "vmovl.u16 q2, d2 \n" // 8 int's "vmovl.u16 q3, d3 \n" "vcvt.f32.u32 q2, q2 \n" // 8 floats "vcvt.f32.u32 q3, q3 \n" "vmul.f32 q2, q2, %y3 \n" // adjust exponent "vmul.f32 q3, q3, %y3 \n" "vqshrn.u32 d2, q2, #13 \n" // isolate halffloat "vqshrn.u32 d3, q3, #13 \n" "vst1.8 {q1}, [%1]! \n" "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale * 1.9259299444e-34f) // %3 : "cc", "memory", "q1", "q2", "q3"); } void ByteToFloatRow_NEON(const uint8_t* src, float* dst, float scale, int width) { asm volatile( "1: \n" "vld1.8 {d2}, [%0]! \n" // load 8 bytes "subs %2, %2, #8 \n" // 8 pixels per loop "vmovl.u8 q1, d2 \n" // 8 shorts "vmovl.u16 q2, d2 \n" // 8 ints "vmovl.u16 q3, d3 \n" "vcvt.f32.u32 q2, q2 \n" // 8 floats "vcvt.f32.u32 q3, q3 \n" "vmul.f32 q2, q2, %y3 \n" // scale "vmul.f32 q3, q3, %y3 \n" "vst1.8 {q2, q3}, [%1]! \n" // store 8 floats "bgt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale) // %3 : "cc", "memory", "q1", "q2", "q3"); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__).. #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_neon64.cc000066400000000000000000004112121357355204000225050ustar00rootroot00000000000000/* * Copyright 2014 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // Read 8 Y, 4 U and 4 V from 422 #define READYUV422 \ "ld1 {v0.8b}, [%0], #8 \n" \ "ld1 {v1.s}[0], [%1], #4 \n" \ "ld1 {v1.s}[1], [%2], #4 \n" // Read 8 Y, 8 U and 8 V from 444 #define READYUV444 \ "ld1 {v0.8b}, [%0], #8 \n" \ "ld1 {v1.d}[0], [%1], #8 \n" \ "ld1 {v1.d}[1], [%2], #8 \n" \ "uaddlp v1.8h, v1.16b \n" \ "rshrn v1.8b, v1.8h, #1 \n" // Read 8 Y, and set 4 U and 4 V to 128 #define READYUV400 \ "ld1 {v0.8b}, [%0], #8 \n" \ "movi v1.8b , #128 \n" // Read 8 Y and 4 UV from NV12 #define READNV12 \ "ld1 {v0.8b}, [%0], #8 \n" \ "ld1 {v2.8b}, [%1], #8 \n" \ "uzp1 v1.8b, v2.8b, v2.8b \n" \ "uzp2 v3.8b, v2.8b, v2.8b \n" \ "ins v1.s[1], v3.s[0] \n" // Read 8 Y and 4 VU from NV21 #define READNV21 \ "ld1 {v0.8b}, [%0], #8 \n" \ "ld1 {v2.8b}, [%1], #8 \n" \ "uzp1 v3.8b, v2.8b, v2.8b \n" \ "uzp2 v1.8b, v2.8b, v2.8b \n" \ "ins v1.s[1], v3.s[0] \n" // Read 8 YUY2 #define READYUY2 \ "ld2 {v0.8b, v1.8b}, [%0], #16 \n" \ "uzp2 v3.8b, v1.8b, v1.8b \n" \ "uzp1 v1.8b, v1.8b, v1.8b \n" \ "ins v1.s[1], v3.s[0] \n" // Read 8 UYVY #define READUYVY \ "ld2 {v2.8b, v3.8b}, [%0], #16 \n" \ "orr v0.8b, v3.8b, v3.8b \n" \ "uzp1 v1.8b, v2.8b, v2.8b \n" \ "uzp2 v3.8b, v2.8b, v2.8b \n" \ "ins v1.s[1], v3.s[0] \n" #define YUVTORGB_SETUP \ "ld1r {v24.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v25.8h}, [%[kUVBiasBGR]], #2 \n" \ "ld1r {v26.8h}, [%[kUVBiasBGR]] \n" \ "ld1r {v31.4s}, [%[kYToRgb]] \n" \ "ld2 {v27.8h, v28.8h}, [%[kUVToRB]] \n" \ "ld2 {v29.8h, v30.8h}, [%[kUVToG]] \n" #define YUVTORGB(vR, vG, vB) \ "uxtl v0.8h, v0.8b \n" /* Extract Y */ \ "shll v2.8h, v1.8b, #8 \n" /* Replicate UV */ \ "ushll2 v3.4s, v0.8h, #0 \n" /* Y */ \ "ushll v0.4s, v0.4h, #0 \n" \ "mul v3.4s, v3.4s, v31.4s \n" \ "mul v0.4s, v0.4s, v31.4s \n" \ "sqshrun v0.4h, v0.4s, #16 \n" \ "sqshrun2 v0.8h, v3.4s, #16 \n" /* Y */ \ "uaddw v1.8h, v2.8h, v1.8b \n" /* Replicate UV */ \ "mov v2.d[0], v1.d[1] \n" /* Extract V */ \ "uxtl v2.8h, v2.8b \n" \ "uxtl v1.8h, v1.8b \n" /* Extract U */ \ "mul v3.8h, v1.8h, v27.8h \n" \ "mul v5.8h, v1.8h, v29.8h \n" \ "mul v6.8h, v2.8h, v30.8h \n" \ "mul v7.8h, v2.8h, v28.8h \n" \ "sqadd v6.8h, v6.8h, v5.8h \n" \ "sqadd " #vB \ ".8h, v24.8h, v0.8h \n" /* B */ \ "sqadd " #vG \ ".8h, v25.8h, v0.8h \n" /* G */ \ "sqadd " #vR \ ".8h, v26.8h, v0.8h \n" /* R */ \ "sqadd " #vB ".8h, " #vB \ ".8h, v3.8h \n" /* B */ \ "sqsub " #vG ".8h, " #vG \ ".8h, v6.8h \n" /* G */ \ "sqadd " #vR ".8h, " #vR \ ".8h, v7.8h \n" /* R */ \ "sqshrun " #vB ".8b, " #vB \ ".8h, #6 \n" /* B */ \ "sqshrun " #vG ".8b, " #vG \ ".8h, #6 \n" /* G */ \ "sqshrun " #vR ".8b, " #vR ".8h, #6 \n" /* R */ void I444ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" /* A */ "1: \n" READYUV444 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void I422ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" /* A */ "1: \n" READYUV422 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb), // %3 "+r"(width) // %4 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void I422AlphaToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, const uint8_t* src_a, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB(v22, v21, v20) "ld1 {v23.8b}, [%3], #8 \n" "subs %w5, %w5, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%4], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(src_a), // %3 "+r"(dst_argb), // %4 "+r"(width) // %5 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void I422ToRGBARow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v20.8b, #255 \n" /* A */ "1: \n" READYUV422 YUVTORGB(v23, v22, v21) "subs %w4, %w4, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%3], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgba), // %3 "+r"(width) // %4 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void I422ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "st3 {v20.8b,v21.8b,v22.8b}, [%3], #24 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgb24), // %3 "+r"(width) // %4 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } #define ARGBTORGB565 \ "shll v0.8h, v22.8b, #8 \n" /* R */ \ "shll v21.8h, v21.8b, #8 \n" /* G */ \ "shll v20.8h, v20.8b, #8 \n" /* B */ \ "sri v0.8h, v21.8h, #5 \n" /* RG */ \ "sri v0.8h, v20.8h, #11 \n" /* RGB */ void I422ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READYUV422 YUVTORGB( v22, v21, v20) "subs %w4, %w4, #8 \n" ARGBTORGB565 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels // RGB565. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_rgb565), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } #define ARGBTOARGB1555 \ "shll v0.8h, v23.8b, #8 \n" /* A */ \ "shll v22.8h, v22.8b, #8 \n" /* R */ \ "shll v21.8h, v21.8b, #8 \n" /* G */ \ "shll v20.8h, v20.8b, #8 \n" /* B */ \ "sri v0.8h, v22.8h, #1 \n" /* AR */ \ "sri v0.8h, v21.8h, #6 \n" /* ARG */ \ "sri v0.8h, v20.8h, #11 \n" /* ARGB */ void I422ToARGB1555Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb1555, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READYUV422 YUVTORGB( v22, v21, v20) "subs %w4, %w4, #8 \n" ARGBTOARGB1555 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels // RGB565. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb1555), // %3 "+r"(width) // %4 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } #define ARGBTOARGB4444 \ /* Input v20.8b<=B, v21.8b<=G, v22.8b<=R, v23.8b<=A, v4.8b<=0x0f */ \ "ushr v20.8b, v20.8b, #4 \n" /* B */ \ "bic v21.8b, v21.8b, v4.8b \n" /* G */ \ "ushr v22.8b, v22.8b, #4 \n" /* R */ \ "bic v23.8b, v23.8b, v4.8b \n" /* A */ \ "orr v0.8b, v20.8b, v21.8b \n" /* BG */ \ "orr v1.8b, v22.8b, v23.8b \n" /* RA */ \ "zip1 v0.16b, v0.16b, v1.16b \n" /* BGRA */ void I422ToARGB4444Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb4444, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v4.16b, #0x0f \n" // bits to clear with vbic. "1: \n" READYUV422 YUVTORGB(v22, v21, v20) "subs %w4, %w4, #8 \n" "movi v23.8b, #255 \n" ARGBTOARGB4444 "st1 {v0.8h}, [%3], #16 \n" // store 8 pixels ARGB4444. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_argb4444), // %3 "+r"(width) // %4 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void I400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READYUV400 YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB]"r"(&kYuvI601Constants.kUVToRB), [kUVToG]"r"(&kYuvI601Constants.kUVToG), [kUVBiasBGR]"r"(&kYuvI601Constants.kUVBiasBGR), [kYToRgb]"r"(&kYuvI601Constants.kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void J400ToARGBRow_NEON(const uint8_t* src_y, uint8_t* dst_argb, int width) { asm volatile( "movi v23.8b, #255 \n" "1: \n" "ld1 {v20.8b}, [%0], #8 \n" "orr v21.8b, v20.8b, v20.8b \n" "orr v22.8b, v20.8b, v20.8b \n" "subs %w2, %w2, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v20", "v21", "v22", "v23"); } void NV12ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READNV12 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void NV21ToARGBRow_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READNV21 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_vu), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void NV12ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "1: \n" READNV12 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_rgb24), // %2 "+r"(width) // %3 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void NV21ToRGB24Row_NEON(const uint8_t* src_y, const uint8_t* src_vu, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "1: \n" READNV21 YUVTORGB(v22, v21, v20) "subs %w3, %w3, #8 \n" "st3 {v20.8b,v21.8b,v22.8b}, [%2], #24 \n" "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_vu), // %1 "+r"(dst_rgb24), // %2 "+r"(width) // %3 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void NV12ToRGB565Row_NEON(const uint8_t* src_y, const uint8_t* src_uv, uint8_t* dst_rgb565, const struct YuvConstants* yuvconstants, int width) { asm volatile( YUVTORGB_SETUP "1: \n" READNV12 YUVTORGB( v22, v21, v20) "subs %w3, %w3, #8 \n" ARGBTORGB565 "st1 {v0.8h}, [%2], 16 \n" // store 8 pixels // RGB565. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_uv), // %1 "+r"(dst_rgb565), // %2 "+r"(width) // %3 : [kUVToRB] "r"(&yuvconstants->kUVToRB), [kUVToG] "r"(&yuvconstants->kUVToG), [kUVBiasBGR] "r"(&yuvconstants->kUVBiasBGR), [kYToRgb] "r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30"); } void YUY2ToARGBRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READYUY2 YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" "b.gt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } void UYVYToARGBRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { asm volatile ( YUVTORGB_SETUP "movi v23.8b, #255 \n" "1: \n" READUYVY YUVTORGB(v22, v21, v20) "subs %w2, %w2, #8 \n" "st4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], 32 \n" "b.gt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : [kUVToRB]"r"(&yuvconstants->kUVToRB), [kUVToG]"r"(&yuvconstants->kUVToG), [kUVBiasBGR]"r"(&yuvconstants->kUVBiasBGR), [kYToRgb]"r"(&yuvconstants->kYToRgb) : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30" ); } // Reads 16 pairs of UV and write even values to dst_u and odd to dst_v. void SplitUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pairs of UV "subs %w3, %w3, #16 \n" // 16 processed per loop "st1 {v0.16b}, [%1], #16 \n" // store U "st1 {v1.16b}, [%2], #16 \n" // store V "b.gt 1b \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List ); } // Reads 16 U's and V's and writes out 16 pairs of UV. void MergeUVRow_NEON(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { asm volatile( "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load U "ld1 {v1.16b}, [%1], #16 \n" // load V "subs %w3, %w3, #16 \n" // 16 processed per loop "st2 {v0.16b,v1.16b}, [%2], #32 \n" // store 16 pairs of UV "b.gt 1b \n" : "+r"(src_u), // %0 "+r"(src_v), // %1 "+r"(dst_uv), // %2 "+r"(width) // %3 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List ); } // Reads 16 packed RGB and write to planar dst_r, dst_g, dst_b. void SplitRGBRow_NEON(const uint8_t* src_rgb, uint8_t* dst_r, uint8_t* dst_g, uint8_t* dst_b, int width) { asm volatile( "1: \n" "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 RGB "subs %w4, %w4, #16 \n" // 16 processed per loop "st1 {v0.16b}, [%1], #16 \n" // store R "st1 {v1.16b}, [%2], #16 \n" // store G "st1 {v2.16b}, [%3], #16 \n" // store B "b.gt 1b \n" : "+r"(src_rgb), // %0 "+r"(dst_r), // %1 "+r"(dst_g), // %2 "+r"(dst_b), // %3 "+r"(width) // %4 : // Input registers : "cc", "memory", "v0", "v1", "v2" // Clobber List ); } // Reads 16 planar R's, G's and B's and writes out 16 packed RGB at a time void MergeRGBRow_NEON(const uint8_t* src_r, const uint8_t* src_g, const uint8_t* src_b, uint8_t* dst_rgb, int width) { asm volatile( "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load R "ld1 {v1.16b}, [%1], #16 \n" // load G "ld1 {v2.16b}, [%2], #16 \n" // load B "subs %w4, %w4, #16 \n" // 16 processed per loop "st3 {v0.16b,v1.16b,v2.16b}, [%3], #48 \n" // store 16 RGB "b.gt 1b \n" : "+r"(src_r), // %0 "+r"(src_g), // %1 "+r"(src_b), // %2 "+r"(dst_rgb), // %3 "+r"(width) // %4 : // Input registers : "cc", "memory", "v0", "v1", "v2" // Clobber List ); } // Copy multiple of 32. void CopyRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( "1: \n" "ldp q0, q1, [%0], #32 \n" "subs %w2, %w2, #32 \n" // 32 processed per loop "stp q0, q1, [%1], #32 \n" "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 // Output registers : // Input registers : "cc", "memory", "v0", "v1" // Clobber List ); } // SetRow writes 'width' bytes using an 8 bit value repeated. void SetRow_NEON(uint8_t* dst, uint8_t v8, int width) { asm volatile( "dup v0.16b, %w2 \n" // duplicate 16 bytes "1: \n" "subs %w1, %w1, #16 \n" // 16 bytes per loop "st1 {v0.16b}, [%0], #16 \n" // store "b.gt 1b \n" : "+r"(dst), // %0 "+r"(width) // %1 : "r"(v8) // %2 : "cc", "memory", "v0"); } void ARGBSetRow_NEON(uint8_t* dst, uint32_t v32, int width) { asm volatile( "dup v0.4s, %w2 \n" // duplicate 4 ints "1: \n" "subs %w1, %w1, #4 \n" // 4 ints per loop "st1 {v0.16b}, [%0], #16 \n" // store "b.gt 1b \n" : "+r"(dst), // %0 "+r"(width) // %1 : "r"(v32) // %2 : "cc", "memory", "v0"); } void MirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "add %0, %0, %w2, sxtw \n" "sub %0, %0, #16 \n" "1: \n" "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "subs %w2, %w2, #16 \n" // 16 pixels per loop. "rev64 v0.16b, v0.16b \n" "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[0], [%1], #8 \n" "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "r"((ptrdiff_t)-16) // %3 : "cc", "memory", "v0"); } void MirrorUVRow_NEON(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( // Start at end of source row. "add %0, %0, %w3, sxtw #1 \n" "sub %0, %0, #16 \n" "1: \n" "ld2 {v0.8b, v1.8b}, [%0], %4 \n" // src -= 16 "subs %w3, %w3, #8 \n" // 8 pixels per loop. "rev64 v0.8b, v0.8b \n" "rev64 v1.8b, v1.8b \n" "st1 {v0.8b}, [%1], #8 \n" // dst += 8 "st1 {v1.8b}, [%2], #8 \n" "b.gt 1b \n" : "+r"(src_uv), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : "r"((ptrdiff_t)-16) // %4 : "cc", "memory", "v0", "v1"); } void ARGBMirrorRow_NEON(const uint8_t* src, uint8_t* dst, int width) { asm volatile( // Start at end of source row. "add %0, %0, %w2, sxtw #2 \n" "sub %0, %0, #16 \n" "1: \n" "ld1 {v0.16b}, [%0], %3 \n" // src -= 16 "subs %w2, %w2, #4 \n" // 4 pixels per loop. "rev64 v0.4s, v0.4s \n" "st1 {v0.D}[1], [%1], #8 \n" // dst += 16 "st1 {v0.D}[0], [%1], #8 \n" "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "r"((ptrdiff_t)-16) // %3 : "cc", "memory", "v0"); } void RGB24ToARGBRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { asm volatile( "movi v4.8b, #255 \n" // Alpha "1: \n" "ld3 {v1.8b,v2.8b,v3.8b}, [%0], #24 \n" // load 8 pixels of RGB24. "subs %w2, %w2, #8 \n" // 8 processed per loop. "st4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_rgb24), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List ); } void RAWToARGBRow_NEON(const uint8_t* src_raw, uint8_t* dst_argb, int width) { asm volatile( "movi v5.8b, #255 \n" // Alpha "1: \n" "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b "subs %w2, %w2, #8 \n" // 8 processed per loop. "orr v3.8b, v1.8b, v1.8b \n" // move g "orr v4.8b, v0.8b, v0.8b \n" // move r "st4 {v2.8b,v3.8b,v4.8b,v5.8b}, [%1], #32 \n" // store b g r a "b.gt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5" // Clobber List ); } void RAWToRGB24Row_NEON(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // read r g b "subs %w2, %w2, #8 \n" // 8 processed per loop. "orr v3.8b, v1.8b, v1.8b \n" // move g "orr v4.8b, v0.8b, v0.8b \n" // move r "st3 {v2.8b,v3.8b,v4.8b}, [%1], #24 \n" // store b g r "b.gt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List ); } #define RGB565TOARGB \ "shrn v6.8b, v0.8h, #5 \n" /* G xxGGGGGG */ \ "shl v6.8b, v6.8b, #2 \n" /* G GGGGGG00 upper 6 */ \ "ushr v4.8b, v6.8b, #6 \n" /* G 000000GG lower 2 */ \ "orr v1.8b, v4.8b, v6.8b \n" /* G */ \ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ "ushr v0.8h, v0.8h, #11 \n" /* R 000RRRRR */ \ "xtn2 v2.16b,v0.8h \n" /* R in upper part */ \ "shl v2.16b, v2.16b, #3 \n" /* R,B BBBBB000 upper 5 */ \ "ushr v0.16b, v2.16b, #5 \n" /* R,B 00000BBB lower 3 */ \ "orr v0.16b, v0.16b, v2.16b \n" /* R,B */ \ "dup v2.2D, v0.D[1] \n" /* R */ void RGB565ToARGBRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // Alpha "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. RGB565TOARGB "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_rgb565), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6" // Clobber List ); } #define ARGB1555TOARGB \ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ "xtn v3.8b, v2.8h \n" /* RRRRR000 AAAAAAAA */ \ \ "sshr v2.8h, v0.8h, #15 \n" /* A AAAAAAAA */ \ "xtn2 v3.16b, v2.8h \n" \ \ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ \ "ushr v1.16b, v3.16b, #5 \n" /* R,A 00000RRR lower 3 */ \ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ \ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ "orr v2.16b, v1.16b, v3.16b \n" /* R,A */ \ "dup v1.2D, v0.D[1] \n" \ "dup v3.2D, v2.D[1] \n" // RGB555TOARGB is same as ARGB1555TOARGB but ignores alpha. #define RGB555TOARGB \ "ushr v2.8h, v0.8h, #10 \n" /* R xxxRRRRR */ \ "shl v2.8h, v2.8h, #3 \n" /* R RRRRR000 upper 5 */ \ "xtn v3.8b, v2.8h \n" /* RRRRR000 */ \ \ "xtn v2.8b, v0.8h \n" /* B xxxBBBBB */ \ "shrn2 v2.16b,v0.8h, #5 \n" /* G xxxGGGGG */ \ \ "ushr v1.16b, v3.16b, #5 \n" /* R 00000RRR lower 3 */ \ "shl v0.16b, v2.16b, #3 \n" /* B,G BBBBB000 upper 5 */ \ "ushr v2.16b, v0.16b, #5 \n" /* B,G 00000BBB lower 3 */ \ \ "orr v0.16b, v0.16b, v2.16b \n" /* B,G */ \ "orr v2.16b, v1.16b, v3.16b \n" /* R */ \ "dup v1.2D, v0.D[1] \n" /* G */ void ARGB1555ToARGBRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // Alpha "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGB1555TOARGB "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB // pixels "b.gt 1b \n" : "+r"(src_argb1555), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } #define ARGB4444TOARGB \ "shrn v1.8b, v0.8h, #8 \n" /* v1(l) AR */ \ "xtn2 v1.16b, v0.8h \n" /* v1(h) GB */ \ "shl v2.16b, v1.16b, #4 \n" /* B,R BBBB0000 */ \ "ushr v3.16b, v1.16b, #4 \n" /* G,A 0000GGGG */ \ "ushr v0.16b, v2.16b, #4 \n" /* B,R 0000BBBB */ \ "shl v1.16b, v3.16b, #4 \n" /* G,A GGGG0000 */ \ "orr v2.16b, v0.16b, v2.16b \n" /* B,R BBBBBBBB */ \ "orr v3.16b, v1.16b, v3.16b \n" /* G,A GGGGGGGG */ \ "dup v0.2D, v2.D[1] \n" \ "dup v1.2D, v3.D[1] \n" void ARGB4444ToARGBRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { asm volatile( "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGB4444TOARGB "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB // pixels "b.gt 1b \n" : "+r"(src_argb4444), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4" // Clobber List ); } void ARGBToRGB24Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb24, int width) { asm volatile( "1: \n" "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "st3 {v1.8b,v2.8b,v3.8b}, [%1], #24 \n" // store 8 pixels of // RGB24. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_rgb24), // %1 "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4" // Clobber List ); } void ARGBToRAWRow_NEON(const uint8_t* src_argb, uint8_t* dst_raw, int width) { asm volatile( "1: \n" "ld4 {v1.8b,v2.8b,v3.8b,v4.8b}, [%0], #32 \n" // load b g r a "subs %w2, %w2, #8 \n" // 8 processed per loop. "orr v4.8b, v2.8b, v2.8b \n" // mov g "orr v5.8b, v1.8b, v1.8b \n" // mov b "st3 {v3.8b,v4.8b,v5.8b}, [%1], #24 \n" // store r g b "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_raw), // %1 "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3", "v4", "v5" // Clobber List ); } void YUY2ToYRow_NEON(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { asm volatile( "1: \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of YUY2. "subs %w2, %w2, #16 \n" // 16 processed per loop. "st1 {v0.16b}, [%1], #16 \n" // store 16 pixels of Y. "b.gt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1" // Clobber List ); } void UYVYToYRow_NEON(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { asm volatile( "1: \n" "ld2 {v0.16b,v1.16b}, [%0], #32 \n" // load 16 pixels of UYVY. "subs %w2, %w2, #16 \n" // 16 processed per loop. "st1 {v1.16b}, [%1], #16 \n" // store 16 pixels of Y. "b.gt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1" // Clobber List ); } void YUY2ToUV422Row_NEON(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 YUY2 "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. "st1 {v1.8b}, [%1], #8 \n" // store 8 U. "st1 {v3.8b}, [%2], #8 \n" // store 8 V. "b.gt 1b \n" : "+r"(src_yuy2), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } void UYVYToUV422Row_NEON(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 UYVY "subs %w3, %w3, #16 \n" // 16 pixels = 8 UVs. "st1 {v0.8b}, [%1], #8 \n" // store 8 U. "st1 {v2.8b}, [%2], #8 \n" // store 8 V. "b.gt 1b \n" : "+r"(src_uyvy), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } void YUY2ToUVRow_NEON(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_yuy2b = src_yuy2 + stride_yuy2; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row "urhadd v1.8b, v1.8b, v5.8b \n" // average rows of U "urhadd v3.8b, v3.8b, v7.8b \n" // average rows of V "st1 {v1.8b}, [%2], #8 \n" // store 8 U. "st1 {v3.8b}, [%3], #8 \n" // store 8 V. "b.gt 1b \n" : "+r"(src_yuy2), // %0 "+r"(src_yuy2b), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List ); } void UYVYToUVRow_NEON(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_uyvyb = src_uyvy + stride_uyvy; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 16 pixels "subs %w4, %w4, #16 \n" // 16 pixels = 8 UVs. "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load next row "urhadd v0.8b, v0.8b, v4.8b \n" // average rows of U "urhadd v2.8b, v2.8b, v6.8b \n" // average rows of V "st1 {v0.8b}, [%2], #8 \n" // store 8 U. "st1 {v2.8b}, [%3], #8 \n" // store 8 V. "b.gt 1b \n" : "+r"(src_uyvy), // %0 "+r"(src_uyvyb), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7" // Clobber List ); } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. void ARGBShuffleRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { asm volatile( "ld1 {v2.16b}, [%3] \n" // shuffler "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 4 pixels. "subs %w2, %w2, #4 \n" // 4 processed per loop "tbl v1.16b, {v0.16b}, v2.16b \n" // look up 4 pixels "st1 {v1.16b}, [%1], #16 \n" // store 4. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(shuffler) // %3 : "cc", "memory", "v0", "v1", "v2" // Clobber List ); } void I422ToYUY2Row_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_yuy2, int width) { asm volatile( "1: \n" "ld2 {v0.8b, v1.8b}, [%0], #16 \n" // load 16 Ys "orr v2.8b, v1.8b, v1.8b \n" "ld1 {v1.8b}, [%1], #8 \n" // load 8 Us "ld1 {v3.8b}, [%2], #8 \n" // load 8 Vs "subs %w4, %w4, #16 \n" // 16 pixels "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_yuy2), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3"); } void I422ToUYVYRow_NEON(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uyvy, int width) { asm volatile( "1: \n" "ld2 {v1.8b,v2.8b}, [%0], #16 \n" // load 16 Ys "orr v3.8b, v2.8b, v2.8b \n" "ld1 {v0.8b}, [%1], #8 \n" // load 8 Us "ld1 {v2.8b}, [%2], #8 \n" // load 8 Vs "subs %w4, %w4, #16 \n" // 16 pixels "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%3], #32 \n" // Store 16 pixels. "b.gt 1b \n" : "+r"(src_y), // %0 "+r"(src_u), // %1 "+r"(src_v), // %2 "+r"(dst_uyvy), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3"); } void ARGBToRGB565Row_NEON(const uint8_t* src_argb, uint8_t* dst_rgb565, int width) { asm volatile( "1: \n" "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGBTORGB565 "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels RGB565. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_rgb565), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } void ARGBToRGB565DitherRow_NEON(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) { asm volatile( "dup v1.4s, %w2 \n" // dither4 "1: \n" "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%1], #32 \n" // load 8 pixels "subs %w3, %w3, #8 \n" // 8 processed per loop. "uqadd v20.8b, v20.8b, v1.8b \n" "uqadd v21.8b, v21.8b, v1.8b \n" "uqadd v22.8b, v22.8b, v1.8b \n" ARGBTORGB565 "st1 {v0.16b}, [%0], #16 \n" // store 8 pixels RGB565. "b.gt 1b \n" : "+r"(dst_rgb) // %0 : "r"(src_argb), // %1 "r"(dither4), // %2 "r"(width) // %3 : "cc", "memory", "v0", "v1", "v20", "v21", "v22", "v23"); } void ARGBToARGB1555Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb1555, int width) { asm volatile( "1: \n" "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGBTOARGB1555 "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels // ARGB1555. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb1555), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v20", "v21", "v22", "v23"); } void ARGBToARGB4444Row_NEON(const uint8_t* src_argb, uint8_t* dst_argb4444, int width) { asm volatile( "movi v4.16b, #0x0f \n" // bits to clear with // vbic. "1: \n" "ld4 {v20.8b,v21.8b,v22.8b,v23.8b}, [%0], #32 \n" // load 8 pixels "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGBTOARGB4444 "st1 {v0.16b}, [%1], #16 \n" // store 8 pixels // ARGB4444. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb4444), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v4", "v20", "v21", "v22", "v23"); } void ARGBToYRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #33 \n" // R * 0.2578 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v3.8h, v0.8b, v4.8b \n" // B "umlal v3.8h, v1.8b, v5.8b \n" // G "umlal v3.8h, v2.8b, v6.8b \n" // R "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } void ARGBExtractAlphaRow_NEON(const uint8_t* src_argb, uint8_t* dst_a, int width) { asm volatile( "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load row 16 // pixels "subs %w2, %w2, #16 \n" // 16 processed per loop "st1 {v3.16b}, [%1], #16 \n" // store 16 A's. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_a), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } void ARGBToYJRow_NEON(const uint8_t* src_argb, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #15 \n" // B * 0.11400 coefficient "movi v5.8b, #75 \n" // G * 0.58700 coefficient "movi v6.8b, #38 \n" // R * 0.29900 coefficient "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v3.8h, v0.8b, v4.8b \n" // B "umlal v3.8h, v1.8b, v5.8b \n" // G "umlal v3.8h, v2.8b, v6.8b \n" // R "sqrshrun v0.8b, v3.8h, #7 \n" // 15 bit to 8 bit Y "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // 8x1 pixels. void ARGBToUV444Row_NEON(const uint8_t* src_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { asm volatile( "movi v24.8b, #112 \n" // UB / VR 0.875 // coefficient "movi v25.8b, #74 \n" // UG -0.5781 coefficient "movi v26.8b, #38 \n" // UR -0.2969 coefficient "movi v27.8b, #18 \n" // VB -0.1406 coefficient "movi v28.8b, #94 \n" // VG -0.7344 coefficient "movi v29.16b,#0x80 \n" // 128.5 "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB // pixels. "subs %w3, %w3, #8 \n" // 8 processed per loop. "umull v4.8h, v0.8b, v24.8b \n" // B "umlsl v4.8h, v1.8b, v25.8b \n" // G "umlsl v4.8h, v2.8b, v26.8b \n" // R "add v4.8h, v4.8h, v29.8h \n" // +128 -> unsigned "umull v3.8h, v2.8b, v24.8b \n" // R "umlsl v3.8h, v1.8b, v28.8b \n" // G "umlsl v3.8h, v0.8b, v27.8b \n" // B "add v3.8h, v3.8h, v29.8h \n" // +128 -> unsigned "uqshrn v0.8b, v4.8h, #8 \n" // 16 bit to 8 bit U "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%2], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_u), // %1 "+r"(dst_v), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26", "v27", "v28", "v29"); } #define RGBTOUV_SETUP_REG \ "movi v20.8h, #56, lsl #0 \n" /* UB/VR coefficient (0.875) / 2 */ \ "movi v21.8h, #37, lsl #0 \n" /* UG coefficient (-0.5781) / 2 */ \ "movi v22.8h, #19, lsl #0 \n" /* UR coefficient (-0.2969) / 2 */ \ "movi v23.8h, #9, lsl #0 \n" /* VB coefficient (-0.1406) / 2 */ \ "movi v24.8h, #47, lsl #0 \n" /* VG coefficient (-0.7344) / 2 */ \ "movi v25.16b, #0x80 \n" /* 128.5 (0x8080 in 16-bit) */ // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. // clang-format off #define RGBTOUV(QB, QG, QR) \ "mul v3.8h, " #QB ",v20.8h \n" /* B */ \ "mul v4.8h, " #QR ",v20.8h \n" /* R */ \ "mls v3.8h, " #QG ",v21.8h \n" /* G */ \ "mls v4.8h, " #QG ",v24.8h \n" /* G */ \ "mls v3.8h, " #QR ",v22.8h \n" /* R */ \ "mls v4.8h, " #QB ",v23.8h \n" /* B */ \ "add v3.8h, v3.8h, v25.8h \n" /* +128 -> unsigned */ \ "add v4.8h, v4.8h, v25.8h \n" /* +128 -> unsigned */ \ "uqshrn v0.8b, v3.8h, #8 \n" /* 16 bit to 8 bit U */ \ "uqshrn v1.8b, v4.8h, #8 \n" /* 16 bit to 8 bit V */ // clang-format on // TODO(fbarchard): Consider vhadd vertical, then vpaddl horizontal, avoid shr. // TODO(fbarchard): consider ptrdiff_t for all strides. void ARGBToUVRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #1 \n" // 2x average "urshr v1.8h, v1.8h, #1 \n" "urshr v2.8h, v2.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(src_argb_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } // TODO(fbarchard): Subsample match C code. void ARGBToUVJRow_NEON(const uint8_t* src_argb, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_argb_1 = src_argb + src_stride_argb; asm volatile ( "movi v20.8h, #63, lsl #0 \n" // UB/VR coeff (0.500) / 2 "movi v21.8h, #42, lsl #0 \n" // UG coeff (-0.33126) / 2 "movi v22.8h, #21, lsl #0 \n" // UR coeff (-0.16874) / 2 "movi v23.8h, #10, lsl #0 \n" // VB coeff (-0.08131) / 2 "movi v24.8h, #53, lsl #0 \n" // VG coeff (-0.41869) / 2 "movi v25.16b, #0x80 \n" // 128.5 (0x8080 in 16-bit) "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load next 16 "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #1 \n" // 2x average "urshr v1.8h, v1.8h, #1 \n" "urshr v2.8h, v2.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(src_argb_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } void BGRAToUVRow_NEON(const uint8_t* src_bgra, int src_stride_bgra, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_bgra_1 = src_bgra + src_stride_bgra; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v3.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v3.8h, v2.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v1.16b \n" // R 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more "uadalp v0.8h, v7.16b \n" // B 16 bytes -> 8 shorts. "uadalp v3.8h, v6.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v5.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #1 \n" // 2x average "urshr v1.8h, v3.8h, #1 \n" "urshr v2.8h, v2.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_bgra), // %0 "+r"(src_bgra_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } void ABGRToUVRow_NEON(const uint8_t* src_abgr, int src_stride_abgr, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_abgr_1 = src_abgr + src_stride_abgr; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v3.8h, v2.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v2.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v1.8h, v0.16b \n" // R 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v3.8h, v6.16b \n" // B 16 bytes -> 8 shorts. "uadalp v2.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v1.8h, v4.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v3.8h, #1 \n" // 2x average "urshr v2.8h, v2.8h, #1 \n" "urshr v1.8h, v1.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v2.8h, v1.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_abgr), // %0 "+r"(src_abgr_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } void RGBAToUVRow_NEON(const uint8_t* src_rgba, int src_stride_rgba, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_rgba_1 = src_rgba + src_stride_rgba; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 16 pixels. "uaddlp v0.8h, v1.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v2.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v3.16b \n" // R 16 bytes -> 8 shorts. "ld4 {v4.16b,v5.16b,v6.16b,v7.16b}, [%1], #64 \n" // load 16 more. "uadalp v0.8h, v5.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v6.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v7.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #1 \n" // 2x average "urshr v1.8h, v1.8h, #1 \n" "urshr v2.8h, v2.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_rgba), // %0 "+r"(src_rgba_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } void RGB24ToUVRow_NEON(const uint8_t* src_rgb24, int src_stride_rgb24, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_rgb24_1 = src_rgb24 + src_stride_rgb24; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 16 pixels. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 16 more. "uadalp v0.8h, v4.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v6.16b \n" // R 16 bytes -> 8 shorts. "urshr v0.8h, v0.8h, #1 \n" // 2x average "urshr v1.8h, v1.8h, #1 \n" "urshr v2.8h, v2.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v0.8h, v1.8h, v2.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_rgb24), // %0 "+r"(src_rgb24_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } void RAWToUVRow_NEON(const uint8_t* src_raw, int src_stride_raw, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_raw_1 = src_raw + src_stride_raw; asm volatile ( RGBTOUV_SETUP_REG "1: \n" "ld3 {v0.16b,v1.16b,v2.16b}, [%0], #48 \n" // load 8 RAW pixels. "uaddlp v2.8h, v2.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v0.8h, v0.16b \n" // R 16 bytes -> 8 shorts. "ld3 {v4.16b,v5.16b,v6.16b}, [%1], #48 \n" // load 8 more RAW pixels "uadalp v2.8h, v6.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v5.16b \n" // G 16 bytes -> 8 shorts. "uadalp v0.8h, v4.16b \n" // R 16 bytes -> 8 shorts. "urshr v2.8h, v2.8h, #1 \n" // 2x average "urshr v1.8h, v1.8h, #1 \n" "urshr v0.8h, v0.8h, #1 \n" "subs %w4, %w4, #16 \n" // 32 processed per loop. RGBTOUV(v2.8h, v1.8h, v0.8h) "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_raw), // %0 "+r"(src_raw_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v23", "v24", "v25" ); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void RGB565ToUVRow_NEON(const uint8_t* src_rgb565, int src_stride_rgb565, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_rgb565_1 = src_rgb565 + src_stride_rgb565; asm volatile( "movi v22.8h, #56, lsl #0 \n" // UB / VR coeff (0.875) / // 2 "movi v23.8h, #37, lsl #0 \n" // UG coeff (-0.5781) / 2 "movi v24.8h, #19, lsl #0 \n" // UR coeff (-0.2969) / 2 "movi v25.8h, #9 , lsl #0 \n" // VB coeff (-0.1406) / 2 "movi v26.8h, #47, lsl #0 \n" // VG coeff (-0.7344) / 2 "movi v27.16b, #0x80 \n" // 128.5 0x8080 in 16bit "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. RGB565TOARGB "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%0], #16 \n" // next 8 RGB565 pixels. RGB565TOARGB "uaddlp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // load 8 RGB565 pixels. RGB565TOARGB "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v18.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v20.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // next 8 RGB565 pixels. RGB565TOARGB "uadalp v17.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v19.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v21.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ins v16.D[1], v17.D[0] \n" "ins v18.D[1], v19.D[0] \n" "ins v20.D[1], v21.D[0] \n" "urshr v4.8h, v16.8h, #1 \n" // 2x average "urshr v5.8h, v18.8h, #1 \n" "urshr v6.8h, v20.8h, #1 \n" "subs %w4, %w4, #16 \n" // 16 processed per loop. "mul v16.8h, v4.8h, v22.8h \n" // B "mls v16.8h, v5.8h, v23.8h \n" // G "mls v16.8h, v6.8h, v24.8h \n" // R "add v16.8h, v16.8h, v27.8h \n" // +128 -> unsigned "mul v17.8h, v6.8h, v22.8h \n" // R "mls v17.8h, v5.8h, v26.8h \n" // G "mls v17.8h, v4.8h, v25.8h \n" // B "add v17.8h, v17.8h, v27.8h \n" // +128 -> unsigned "uqshrn v0.8b, v16.8h, #8 \n" // 16 bit to 8 bit U "uqshrn v1.8b, v17.8h, #8 \n" // 16 bit to 8 bit V "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_rgb565), // %0 "+r"(src_rgb565_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void ARGB1555ToUVRow_NEON(const uint8_t* src_argb1555, int src_stride_argb1555, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_argb1555_1 = src_argb1555 + src_stride_argb1555; asm volatile( RGBTOUV_SETUP_REG "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. RGB555TOARGB "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB1555 pixels. RGB555TOARGB "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB1555 pixels. RGB555TOARGB "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB1555 pixels. RGB555TOARGB "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ins v16.D[1], v26.D[0] \n" "ins v17.D[1], v27.D[0] \n" "ins v18.D[1], v28.D[0] \n" "urshr v4.8h, v16.8h, #1 \n" // 2x average "urshr v5.8h, v17.8h, #1 \n" "urshr v6.8h, v18.8h, #1 \n" "subs %w4, %w4, #16 \n" // 16 processed per loop. "mul v2.8h, v4.8h, v20.8h \n" // B "mls v2.8h, v5.8h, v21.8h \n" // G "mls v2.8h, v6.8h, v22.8h \n" // R "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned "mul v3.8h, v6.8h, v20.8h \n" // R "mls v3.8h, v5.8h, v24.8h \n" // G "mls v3.8h, v4.8h, v23.8h \n" // B "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb1555), // %0 "+r"(src_argb1555_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28"); } // 16x2 pixels -> 8x1. width is number of argb pixels. e.g. 16. void ARGB4444ToUVRow_NEON(const uint8_t* src_argb4444, int src_stride_argb4444, uint8_t* dst_u, uint8_t* dst_v, int width) { const uint8_t* src_argb4444_1 = src_argb4444 + src_stride_argb4444; asm volatile( RGBTOUV_SETUP_REG "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. ARGB4444TOARGB "uaddlp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%0], #16 \n" // next 8 ARGB4444 pixels. ARGB4444TOARGB "uaddlp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uaddlp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uaddlp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // load 8 ARGB4444 pixels. ARGB4444TOARGB "uadalp v16.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v17.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v18.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ld1 {v0.16b}, [%1], #16 \n" // next 8 ARGB4444 pixels. ARGB4444TOARGB "uadalp v26.4h, v0.8b \n" // B 8 bytes -> 4 shorts. "uadalp v27.4h, v1.8b \n" // G 8 bytes -> 4 shorts. "uadalp v28.4h, v2.8b \n" // R 8 bytes -> 4 shorts. "ins v16.D[1], v26.D[0] \n" "ins v17.D[1], v27.D[0] \n" "ins v18.D[1], v28.D[0] \n" "urshr v4.8h, v16.8h, #1 \n" // 2x average "urshr v5.8h, v17.8h, #1 \n" "urshr v6.8h, v18.8h, #1 \n" "subs %w4, %w4, #16 \n" // 16 processed per loop. "mul v2.8h, v4.8h, v20.8h \n" // B "mls v2.8h, v5.8h, v21.8h \n" // G "mls v2.8h, v6.8h, v22.8h \n" // R "add v2.8h, v2.8h, v25.8h \n" // +128 -> unsigned "mul v3.8h, v6.8h, v20.8h \n" // R "mls v3.8h, v5.8h, v24.8h \n" // G "mls v3.8h, v4.8h, v23.8h \n" // B "add v3.8h, v3.8h, v25.8h \n" // +128 -> unsigned "uqshrn v0.8b, v2.8h, #8 \n" // 16 bit to 8 bit U "uqshrn v1.8b, v3.8h, #8 \n" // 16 bit to 8 bit V "st1 {v0.8b}, [%2], #8 \n" // store 8 pixels U. "st1 {v1.8b}, [%3], #8 \n" // store 8 pixels V. "b.gt 1b \n" : "+r"(src_argb4444), // %0 "+r"(src_argb4444_1), // %1 "+r"(dst_u), // %2 "+r"(dst_v), // %3 "+r"(width) // %4 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28" ); } void RGB565ToYRow_NEON(const uint8_t* src_rgb565, uint8_t* dst_y, int width) { asm volatile( "movi v24.8b, #13 \n" // B * 0.1016 coefficient "movi v25.8b, #65 \n" // G * 0.5078 coefficient "movi v26.8b, #33 \n" // R * 0.2578 coefficient "movi v27.8b, #16 \n" // Add 16 constant "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 RGB565 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. RGB565TOARGB "umull v3.8h, v0.8b, v24.8b \n" // B "umlal v3.8h, v1.8b, v25.8b \n" // G "umlal v3.8h, v2.8b, v26.8b \n" // R "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v27.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_rgb565), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v6", "v24", "v25", "v26", "v27"); } void ARGB1555ToYRow_NEON(const uint8_t* src_argb1555, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #33 \n" // R * 0.2578 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB1555 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGB1555TOARGB "umull v3.8h, v0.8b, v4.8b \n" // B "umlal v3.8h, v1.8b, v5.8b \n" // G "umlal v3.8h, v2.8b, v6.8b \n" // R "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_argb1555), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } void ARGB4444ToYRow_NEON(const uint8_t* src_argb4444, uint8_t* dst_y, int width) { asm volatile( "movi v24.8b, #13 \n" // B * 0.1016 coefficient "movi v25.8b, #65 \n" // G * 0.5078 coefficient "movi v26.8b, #33 \n" // R * 0.2578 coefficient "movi v27.8b, #16 \n" // Add 16 constant "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 8 ARGB4444 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. ARGB4444TOARGB "umull v3.8h, v0.8b, v24.8b \n" // B "umlal v3.8h, v1.8b, v25.8b \n" // G "umlal v3.8h, v2.8b, v26.8b \n" // R "sqrshrun v0.8b, v3.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v27.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_argb4444), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v24", "v25", "v26", "v27"); } void BGRAToYRow_NEON(const uint8_t* src_bgra, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #13 \n" // B * 0.1016 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v16.8h, v1.8b, v4.8b \n" // R "umlal v16.8h, v2.8b, v5.8b \n" // G "umlal v16.8h, v3.8b, v6.8b \n" // B "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_bgra), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } void ABGRToYRow_NEON(const uint8_t* src_abgr, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #13 \n" // B * 0.1016 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v16.8h, v0.8b, v4.8b \n" // R "umlal v16.8h, v1.8b, v5.8b \n" // G "umlal v16.8h, v2.8b, v6.8b \n" // B "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_abgr), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } void RGBAToYRow_NEON(const uint8_t* src_rgba, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #33 \n" // R * 0.2578 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v16.8h, v1.8b, v4.8b \n" // B "umlal v16.8h, v2.8b, v5.8b \n" // G "umlal v16.8h, v3.8b, v6.8b \n" // R "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_rgba), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } void RGB24ToYRow_NEON(const uint8_t* src_rgb24, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #13 \n" // B * 0.1016 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #33 \n" // R * 0.2578 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v16.8h, v0.8b, v4.8b \n" // B "umlal v16.8h, v1.8b, v5.8b \n" // G "umlal v16.8h, v2.8b, v6.8b \n" // R "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_rgb24), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } void RAWToYRow_NEON(const uint8_t* src_raw, uint8_t* dst_y, int width) { asm volatile( "movi v4.8b, #33 \n" // R * 0.2578 coefficient "movi v5.8b, #65 \n" // G * 0.5078 coefficient "movi v6.8b, #13 \n" // B * 0.1016 coefficient "movi v7.8b, #16 \n" // Add 16 constant "1: \n" "ld3 {v0.8b,v1.8b,v2.8b}, [%0], #24 \n" // load 8 pixels. "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v16.8h, v0.8b, v4.8b \n" // B "umlal v16.8h, v1.8b, v5.8b \n" // G "umlal v16.8h, v2.8b, v6.8b \n" // R "sqrshrun v0.8b, v16.8h, #7 \n" // 16 bit to 8 bit Y "uqadd v0.8b, v0.8b, v7.8b \n" "st1 {v0.8b}, [%1], #8 \n" // store 8 pixels Y. "b.gt 1b \n" : "+r"(src_raw), // %0 "+r"(dst_y), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } // Bilinear filter 16x2 -> 16x1 void InterpolateRow_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { int y1_fraction = source_y_fraction; int y0_fraction = 256 - y1_fraction; const uint8_t* src_ptr1 = src_ptr + src_stride; asm volatile( "cmp %w4, #0 \n" "b.eq 100f \n" "cmp %w4, #128 \n" "b.eq 50f \n" "dup v5.16b, %w4 \n" "dup v4.16b, %w5 \n" // General purpose row blend. "1: \n" "ld1 {v0.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "umull v2.8h, v0.8b, v4.8b \n" "umull2 v3.8h, v0.16b, v4.16b \n" "umlal v2.8h, v1.8b, v5.8b \n" "umlal2 v3.8h, v1.16b, v5.16b \n" "rshrn v0.8b, v2.8h, #8 \n" "rshrn2 v0.16b, v3.8h, #8 \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 1b \n" "b 99f \n" // Blend 50 / 50. "50: \n" "ld1 {v0.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "urhadd v0.16b, v0.16b, v1.16b \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 50b \n" "b 99f \n" // Blend 100 / 0 - Copy row unchanged. "100: \n" "ld1 {v0.16b}, [%1], #16 \n" "subs %w3, %w3, #16 \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 100b \n" "99: \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(src_ptr1), // %2 "+r"(dst_width), // %3 "+r"(y1_fraction), // %4 "+r"(y0_fraction) // %5 : : "cc", "memory", "v0", "v1", "v3", "v4", "v5"); } // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr void ARGBBlendRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( "subs %w3, %w3, #8 \n" "b.lt 89f \n" // Blend 8 pixels. "8: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB0 // pixels "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 ARGB1 // pixels "subs %w3, %w3, #8 \n" // 8 processed per loop. "umull v16.8h, v4.8b, v3.8b \n" // db * a "umull v17.8h, v5.8b, v3.8b \n" // dg * a "umull v18.8h, v6.8b, v3.8b \n" // dr * a "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) "uqadd v0.8b, v0.8b, v4.8b \n" // + sb "uqadd v1.8b, v1.8b, v5.8b \n" // + sg "uqadd v2.8b, v2.8b, v6.8b \n" // + sr "movi v3.8b, #255 \n" // a = 255 "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB // pixels "b.ge 8b \n" "89: \n" "adds %w3, %w3, #8-1 \n" "b.lt 99f \n" // Blend 1 pixels. "1: \n" "ld4 {v0.b,v1.b,v2.b,v3.b}[0], [%0], #4 \n" // load 1 pixel ARGB0. "ld4 {v4.b,v5.b,v6.b,v7.b}[0], [%1], #4 \n" // load 1 pixel ARGB1. "subs %w3, %w3, #1 \n" // 1 processed per loop. "umull v16.8h, v4.8b, v3.8b \n" // db * a "umull v17.8h, v5.8b, v3.8b \n" // dg * a "umull v18.8h, v6.8b, v3.8b \n" // dr * a "uqrshrn v16.8b, v16.8h, #8 \n" // db >>= 8 "uqrshrn v17.8b, v17.8h, #8 \n" // dg >>= 8 "uqrshrn v18.8b, v18.8h, #8 \n" // dr >>= 8 "uqsub v4.8b, v4.8b, v16.8b \n" // db - (db * a / 256) "uqsub v5.8b, v5.8b, v17.8b \n" // dg - (dg * a / 256) "uqsub v6.8b, v6.8b, v18.8b \n" // dr - (dr * a / 256) "uqadd v0.8b, v0.8b, v4.8b \n" // + sb "uqadd v1.8b, v1.8b, v5.8b \n" // + sg "uqadd v2.8b, v2.8b, v6.8b \n" // + sr "movi v3.8b, #255 \n" // a = 255 "st4 {v0.b,v1.b,v2.b,v3.b}[0], [%2], #4 \n" // store 1 pixel. "b.ge 1b \n" "99: \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18"); } // Attenuate 8 pixels at a time. void ARGBAttenuateRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( // Attenuate 8 pixels. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v4.8h, v0.8b, v3.8b \n" // b * a "umull v5.8h, v1.8b, v3.8b \n" // g * a "umull v6.8h, v2.8b, v3.8b \n" // r * a "uqrshrn v0.8b, v4.8h, #8 \n" // b >>= 8 "uqrshrn v1.8b, v5.8h, #8 \n" // g >>= 8 "uqrshrn v2.8b, v6.8h, #8 \n" // r >>= 8 "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 ARGB // pixels "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // Quantize 8 ARGB pixels (32 bytes). // dst = (dst * scale >> 16) * interval_size + interval_offset; void ARGBQuantizeRow_NEON(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { asm volatile( "dup v4.8h, %w2 \n" "ushr v4.8h, v4.8h, #1 \n" // scale >>= 1 "dup v5.8h, %w3 \n" // interval multiply. "dup v6.8h, %w4 \n" // interval add // 8 pixel loop. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB. "subs %w1, %w1, #8 \n" // 8 processed per loop. "uxtl v0.8h, v0.8b \n" // b (0 .. 255) "uxtl v1.8h, v1.8b \n" "uxtl v2.8h, v2.8b \n" "sqdmulh v0.8h, v0.8h, v4.8h \n" // b * scale "sqdmulh v1.8h, v1.8h, v4.8h \n" // g "sqdmulh v2.8h, v2.8h, v4.8h \n" // r "mul v0.8h, v0.8h, v5.8h \n" // b * interval_size "mul v1.8h, v1.8h, v5.8h \n" // g "mul v2.8h, v2.8h, v5.8h \n" // r "add v0.8h, v0.8h, v6.8h \n" // b + interval_offset "add v1.8h, v1.8h, v6.8h \n" // g "add v2.8h, v2.8h, v6.8h \n" // r "uqxtn v0.8b, v0.8h \n" "uqxtn v1.8b, v1.8h \n" "uqxtn v2.8b, v2.8h \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : "r"(scale), // %2 "r"(interval_size), // %3 "r"(interval_offset) // %4 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6"); } // Shade 8 pixels at a time by specified value. // NOTE vqrdmulh.s16 q10, q10, d0[0] must use a scaler register from 0 to 8. // Rounding in vqrdmulh does +1 to high if high bit of low s16 is set. void ARGBShadeRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { asm volatile( "dup v0.4s, %w3 \n" // duplicate scale value. "zip1 v0.8b, v0.8b, v0.8b \n" // v0.8b aarrggbb. "ushr v0.8h, v0.8h, #1 \n" // scale / 2. // 8 pixel loop. "1: \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "uxtl v4.8h, v4.8b \n" // b (0 .. 255) "uxtl v5.8h, v5.8b \n" "uxtl v6.8h, v6.8b \n" "uxtl v7.8h, v7.8b \n" "sqrdmulh v4.8h, v4.8h, v0.h[0] \n" // b * scale * 2 "sqrdmulh v5.8h, v5.8h, v0.h[1] \n" // g "sqrdmulh v6.8h, v6.8h, v0.h[2] \n" // r "sqrdmulh v7.8h, v7.8h, v0.h[3] \n" // a "uqxtn v4.8b, v4.8h \n" "uqxtn v5.8b, v5.8h \n" "uqxtn v6.8b, v6.8h \n" "uqxtn v7.8b, v7.8h \n" "st4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(value) // %3 : "cc", "memory", "v0", "v4", "v5", "v6", "v7"); } // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels // Similar to ARGBToYJ but stores ARGB. // C code is (15 * b + 75 * g + 38 * r + 64) >> 7; void ARGBGrayRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, int width) { asm volatile( "movi v24.8b, #15 \n" // B * 0.11400 coefficient "movi v25.8b, #75 \n" // G * 0.58700 coefficient "movi v26.8b, #38 \n" // R * 0.29900 coefficient "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "umull v4.8h, v0.8b, v24.8b \n" // B "umlal v4.8h, v1.8b, v25.8b \n" // G "umlal v4.8h, v2.8b, v26.8b \n" // R "sqrshrun v0.8b, v4.8h, #7 \n" // 15 bit to 8 bit B "orr v1.8b, v0.8b, v0.8b \n" // G "orr v2.8b, v0.8b, v0.8b \n" // R "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%1], #32 \n" // store 8 pixels. "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v24", "v25", "v26"); } // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 void ARGBSepiaRow_NEON(uint8_t* dst_argb, int width) { asm volatile( "movi v20.8b, #17 \n" // BB coefficient "movi v21.8b, #68 \n" // BG coefficient "movi v22.8b, #35 \n" // BR coefficient "movi v24.8b, #22 \n" // GB coefficient "movi v25.8b, #88 \n" // GG coefficient "movi v26.8b, #45 \n" // GR coefficient "movi v28.8b, #24 \n" // BB coefficient "movi v29.8b, #98 \n" // BG coefficient "movi v30.8b, #50 \n" // BR coefficient "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0] \n" // load 8 ARGB pixels. "subs %w1, %w1, #8 \n" // 8 processed per loop. "umull v4.8h, v0.8b, v20.8b \n" // B to Sepia B "umlal v4.8h, v1.8b, v21.8b \n" // G "umlal v4.8h, v2.8b, v22.8b \n" // R "umull v5.8h, v0.8b, v24.8b \n" // B to Sepia G "umlal v5.8h, v1.8b, v25.8b \n" // G "umlal v5.8h, v2.8b, v26.8b \n" // R "umull v6.8h, v0.8b, v28.8b \n" // B to Sepia R "umlal v6.8h, v1.8b, v29.8b \n" // G "umlal v6.8h, v2.8b, v30.8b \n" // R "uqshrn v0.8b, v4.8h, #7 \n" // 16 bit to 8 bit B "uqshrn v1.8b, v5.8h, #7 \n" // 16 bit to 8 bit G "uqshrn v2.8b, v6.8h, #7 \n" // 16 bit to 8 bit R "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // store 8 pixels. "b.gt 1b \n" : "+r"(dst_argb), // %0 "+r"(width) // %1 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "v21", "v22", "v24", "v25", "v26", "v28", "v29", "v30"); } // Tranform 8 ARGB pixels (32 bytes) with color matrix. // TODO(fbarchard): Was same as Sepia except matrix is provided. This function // needs to saturate. Consider doing a non-saturating version. void ARGBColorMatrixRow_NEON(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { asm volatile( "ld1 {v2.16b}, [%3] \n" // load 3 ARGB vectors. "sxtl v0.8h, v2.8b \n" // B,G coefficients s16. "sxtl2 v1.8h, v2.16b \n" // R,A coefficients s16. "1: \n" "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%0], #32 \n" // load 8 ARGB "subs %w2, %w2, #8 \n" // 8 processed per loop. "uxtl v16.8h, v16.8b \n" // b (0 .. 255) 16 bit "uxtl v17.8h, v17.8b \n" // g "uxtl v18.8h, v18.8b \n" // r "uxtl v19.8h, v19.8b \n" // a "mul v22.8h, v16.8h, v0.h[0] \n" // B = B * Matrix B "mul v23.8h, v16.8h, v0.h[4] \n" // G = B * Matrix G "mul v24.8h, v16.8h, v1.h[0] \n" // R = B * Matrix R "mul v25.8h, v16.8h, v1.h[4] \n" // A = B * Matrix A "mul v4.8h, v17.8h, v0.h[1] \n" // B += G * Matrix B "mul v5.8h, v17.8h, v0.h[5] \n" // G += G * Matrix G "mul v6.8h, v17.8h, v1.h[1] \n" // R += G * Matrix R "mul v7.8h, v17.8h, v1.h[5] \n" // A += G * Matrix A "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A "mul v4.8h, v18.8h, v0.h[2] \n" // B += R * Matrix B "mul v5.8h, v18.8h, v0.h[6] \n" // G += R * Matrix G "mul v6.8h, v18.8h, v1.h[2] \n" // R += R * Matrix R "mul v7.8h, v18.8h, v1.h[6] \n" // A += R * Matrix A "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A "mul v4.8h, v19.8h, v0.h[3] \n" // B += A * Matrix B "mul v5.8h, v19.8h, v0.h[7] \n" // G += A * Matrix G "mul v6.8h, v19.8h, v1.h[3] \n" // R += A * Matrix R "mul v7.8h, v19.8h, v1.h[7] \n" // A += A * Matrix A "sqadd v22.8h, v22.8h, v4.8h \n" // Accumulate B "sqadd v23.8h, v23.8h, v5.8h \n" // Accumulate G "sqadd v24.8h, v24.8h, v6.8h \n" // Accumulate R "sqadd v25.8h, v25.8h, v7.8h \n" // Accumulate A "sqshrun v16.8b, v22.8h, #6 \n" // 16 bit to 8 bit B "sqshrun v17.8b, v23.8h, #6 \n" // 16 bit to 8 bit G "sqshrun v18.8b, v24.8h, #6 \n" // 16 bit to 8 bit R "sqshrun v19.8b, v25.8h, #6 \n" // 16 bit to 8 bit A "st4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%1], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(width) // %2 : "r"(matrix_argb) // %3 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v22", "v23", "v24", "v25"); } // TODO(fbarchard): fix vqshrun in ARGBMultiplyRow_NEON and reenable. // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBMultiplyRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more "subs %w3, %w3, #8 \n" // 8 processed per loop. "umull v0.8h, v0.8b, v4.8b \n" // multiply B "umull v1.8h, v1.8b, v5.8b \n" // multiply G "umull v2.8h, v2.8b, v6.8b \n" // multiply R "umull v3.8h, v3.8b, v7.8b \n" // multiply A "rshrn v0.8b, v0.8h, #8 \n" // 16 bit to 8 bit B "rshrn v1.8b, v1.8h, #8 \n" // 16 bit to 8 bit G "rshrn v2.8b, v2.8h, #8 \n" // 16 bit to 8 bit R "rshrn v3.8b, v3.8h, #8 \n" // 16 bit to 8 bit A "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Add 2 rows of ARGB pixels together, 8 pixels at a time. void ARGBAddRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more "subs %w3, %w3, #8 \n" // 8 processed per loop. "uqadd v0.8b, v0.8b, v4.8b \n" "uqadd v1.8b, v1.8b, v5.8b \n" "uqadd v2.8b, v2.8b, v6.8b \n" "uqadd v3.8b, v3.8b, v7.8b \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Subtract 2 rows of ARGB pixels, 8 pixels at a time. void ARGBSubtractRow_NEON(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { asm volatile( // 8 pixel loop. "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // load 8 ARGB "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%1], #32 \n" // load 8 more "subs %w3, %w3, #8 \n" // 8 processed per loop. "uqsub v0.8b, v0.8b, v4.8b \n" "uqsub v1.8b, v1.8b, v5.8b \n" "uqsub v2.8b, v2.8b, v6.8b \n" "uqsub v3.8b, v3.8b, v7.8b \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_argb0), // %0 "+r"(src_argb1), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } // Adds Sobel X and Sobel Y and stores Sobel into ARGB. // A = 255 // R = Sobel // G = Sobel // B = Sobel void SobelRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // alpha // 8 pixel loop. "1: \n" "ld1 {v0.8b}, [%0], #8 \n" // load 8 sobelx. "ld1 {v1.8b}, [%1], #8 \n" // load 8 sobely. "subs %w3, %w3, #8 \n" // 8 processed per loop. "uqadd v0.8b, v0.8b, v1.8b \n" // add "orr v1.8b, v0.8b, v0.8b \n" "orr v2.8b, v0.8b, v0.8b \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3"); } // Adds Sobel X and Sobel Y and stores Sobel into plane. void SobelToPlaneRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { asm volatile( // 16 pixel loop. "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load 16 sobelx. "ld1 {v1.16b}, [%1], #16 \n" // load 16 sobely. "subs %w3, %w3, #16 \n" // 16 processed per loop. "uqadd v0.16b, v0.16b, v1.16b \n" // add "st1 {v0.16b}, [%2], #16 \n" // store 16 pixels. "b.gt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_y), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1"); } // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 // R = Sobel X // G = Sobel // B = Sobel Y void SobelXYRow_NEON(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { asm volatile( "movi v3.8b, #255 \n" // alpha // 8 pixel loop. "1: \n" "ld1 {v2.8b}, [%0], #8 \n" // load 8 sobelx. "ld1 {v0.8b}, [%1], #8 \n" // load 8 sobely. "subs %w3, %w3, #8 \n" // 8 processed per loop. "uqadd v1.8b, v0.8b, v2.8b \n" // add "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" // store 8 ARGB "b.gt 1b \n" : "+r"(src_sobelx), // %0 "+r"(src_sobely), // %1 "+r"(dst_argb), // %2 "+r"(width) // %3 : : "cc", "memory", "v0", "v1", "v2", "v3"); } // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 void SobelXRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width) { asm volatile( "1: \n" "ld1 {v0.8b}, [%0],%5 \n" // top "ld1 {v1.8b}, [%0],%6 \n" "usubl v0.8h, v0.8b, v1.8b \n" "ld1 {v2.8b}, [%1],%5 \n" // center * 2 "ld1 {v3.8b}, [%1],%6 \n" "usubl v1.8h, v2.8b, v3.8b \n" "add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n" "ld1 {v2.8b}, [%2],%5 \n" // bottom "ld1 {v3.8b}, [%2],%6 \n" "subs %w4, %w4, #8 \n" // 8 pixels "usubl v1.8h, v2.8b, v3.8b \n" "add v0.8h, v0.8h, v1.8h \n" "abs v0.8h, v0.8h \n" "uqxtn v0.8b, v0.8h \n" "st1 {v0.8b}, [%3], #8 \n" // store 8 sobelx "b.gt 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(src_y2), // %2 "+r"(dst_sobelx), // %3 "+r"(width) // %4 : "r"(2LL), // %5 "r"(6LL) // %6 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 void SobelYRow_NEON(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) { asm volatile( "1: \n" "ld1 {v0.8b}, [%0],%4 \n" // left "ld1 {v1.8b}, [%1],%4 \n" "usubl v0.8h, v0.8b, v1.8b \n" "ld1 {v2.8b}, [%0],%4 \n" // center * 2 "ld1 {v3.8b}, [%1],%4 \n" "usubl v1.8h, v2.8b, v3.8b \n" "add v0.8h, v0.8h, v1.8h \n" "add v0.8h, v0.8h, v1.8h \n" "ld1 {v2.8b}, [%0],%5 \n" // right "ld1 {v3.8b}, [%1],%5 \n" "subs %w3, %w3, #8 \n" // 8 pixels "usubl v1.8h, v2.8b, v3.8b \n" "add v0.8h, v0.8h, v1.8h \n" "abs v0.8h, v0.8h \n" "uqxtn v0.8b, v0.8h \n" "st1 {v0.8b}, [%2], #8 \n" // store 8 sobely "b.gt 1b \n" : "+r"(src_y0), // %0 "+r"(src_y1), // %1 "+r"(dst_sobely), // %2 "+r"(width) // %3 : "r"(1LL), // %4 "r"(6LL) // %5 : "cc", "memory", "v0", "v1", "v2", "v3" // Clobber List ); } // Caveat - rounds float to half float whereas scaling version truncates. void HalfFloat1Row_NEON(const uint16_t* src, uint16_t* dst, float /*unused*/, int width) { asm volatile( "1: \n" "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts "subs %w2, %w2, #8 \n" // 8 pixels per loop "uxtl v2.4s, v1.4h \n" // 8 int's "uxtl2 v3.4s, v1.8h \n" "scvtf v2.4s, v2.4s \n" // 8 floats "scvtf v3.4s, v3.4s \n" "fcvtn v1.4h, v2.4s \n" // 8 half floats "fcvtn2 v1.8h, v3.4s \n" "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : : "cc", "memory", "v1", "v2", "v3"); } void HalfFloatRow_NEON(const uint16_t* src, uint16_t* dst, float scale, int width) { asm volatile( "1: \n" "ld1 {v1.16b}, [%0], #16 \n" // load 8 shorts "subs %w2, %w2, #8 \n" // 8 pixels per loop "uxtl v2.4s, v1.4h \n" // 8 int's "uxtl2 v3.4s, v1.8h \n" "scvtf v2.4s, v2.4s \n" // 8 floats "scvtf v3.4s, v3.4s \n" "fmul v2.4s, v2.4s, %3.s[0] \n" // adjust exponent "fmul v3.4s, v3.4s, %3.s[0] \n" "uqshrn v1.4h, v2.4s, #13 \n" // isolate halffloat "uqshrn2 v1.8h, v3.4s, #13 \n" "st1 {v1.16b}, [%1], #16 \n" // store 8 shorts "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale * 1.9259299444e-34f) // %3 : "cc", "memory", "v1", "v2", "v3"); } void ByteToFloatRow_NEON(const uint8_t* src, float* dst, float scale, int width) { asm volatile( "1: \n" "ld1 {v1.8b}, [%0], #8 \n" // load 8 bytes "subs %w2, %w2, #8 \n" // 8 pixels per loop "uxtl v1.8h, v1.8b \n" // 8 shorts "uxtl v2.4s, v1.4h \n" // 8 ints "uxtl2 v3.4s, v1.8h \n" "scvtf v2.4s, v2.4s \n" // 8 floats "scvtf v3.4s, v3.4s \n" "fmul v2.4s, v2.4s, %3.s[0] \n" // scale "fmul v3.4s, v3.4s, %3.s[0] \n" "st1 {v2.16b, v3.16b}, [%1], #32 \n" // store 8 floats "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale) // %3 : "cc", "memory", "v1", "v2", "v3"); } float ScaleMaxSamples_NEON(const float* src, float* dst, float scale, int width) { float fmax; asm volatile( "movi v5.4s, #0 \n" // max "movi v6.4s, #0 \n" "1: \n" "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples "subs %w2, %w2, #8 \n" // 8 processed per loop "fmul v3.4s, v1.4s, %4.s[0] \n" // scale "fmul v4.4s, v2.4s, %4.s[0] \n" // scale "fmax v5.4s, v5.4s, v1.4s \n" // max "fmax v6.4s, v6.4s, v2.4s \n" "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples "b.gt 1b \n" "fmax v5.4s, v5.4s, v6.4s \n" // max "fmaxv %s3, v5.4s \n" // signed max acculator : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width), // %2 "=w"(fmax) // %3 : "w"(scale) // %4 : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6"); return fmax; } float ScaleSumSamples_NEON(const float* src, float* dst, float scale, int width) { float fsum; asm volatile( "movi v5.4s, #0 \n" // max "movi v6.4s, #0 \n" // max "1: \n" "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples "subs %w2, %w2, #8 \n" // 8 processed per loop "fmul v3.4s, v1.4s, %4.s[0] \n" // scale "fmul v4.4s, v2.4s, %4.s[0] \n" "fmla v5.4s, v1.4s, v1.4s \n" // sum of squares "fmla v6.4s, v2.4s, v2.4s \n" "st1 {v3.4s, v4.4s}, [%1], #32 \n" // store 8 samples "b.gt 1b \n" "faddp v5.4s, v5.4s, v6.4s \n" "faddp v5.4s, v5.4s, v5.4s \n" "faddp %3.4s, v5.4s, v5.4s \n" // sum : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width), // %2 "=w"(fsum) // %3 : "w"(scale) // %4 : "cc", "memory", "v1", "v2", "v3", "v4", "v5", "v6"); return fsum; } void ScaleSamples_NEON(const float* src, float* dst, float scale, int width) { asm volatile( "1: \n" "ld1 {v1.4s, v2.4s}, [%0], #32 \n" // load 8 samples "subs %w2, %w2, #8 \n" // 8 processed per loop "fmul v1.4s, v1.4s, %3.s[0] \n" // scale "fmul v2.4s, v2.4s, %3.s[0] \n" // scale "st1 {v1.4s, v2.4s}, [%1], #32 \n" // store 8 samples "b.gt 1b \n" : "+r"(src), // %0 "+r"(dst), // %1 "+r"(width) // %2 : "w"(scale) // %3 : "cc", "memory", "v1", "v2"); } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. void GaussCol_NEON(const uint16_t* src0, const uint16_t* src1, const uint16_t* src2, const uint16_t* src3, const uint16_t* src4, uint32_t* dst, int width) { asm volatile( "movi v6.8h, #4 \n" // constant 4 "movi v7.8h, #6 \n" // constant 6 "1: \n" "ld1 {v1.8h}, [%0], #16 \n" // load 8 samples, 5 rows "ld1 {v2.8h}, [%4], #16 \n" "uaddl v0.4s, v1.4h, v2.4h \n" // * 1 "uaddl2 v1.4s, v1.8h, v2.8h \n" // * 1 "ld1 {v2.8h}, [%1], #16 \n" "umlal v0.4s, v2.4h, v6.4h \n" // * 4 "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4 "ld1 {v2.8h}, [%2], #16 \n" "umlal v0.4s, v2.4h, v7.4h \n" // * 6 "umlal2 v1.4s, v2.8h, v7.8h \n" // * 6 "ld1 {v2.8h}, [%3], #16 \n" "umlal v0.4s, v2.4h, v6.4h \n" // * 4 "umlal2 v1.4s, v2.8h, v6.8h \n" // * 4 "subs %w6, %w6, #8 \n" // 8 processed per loop "st1 {v0.4s,v1.4s}, [%5], #32 \n" // store 8 samples "b.gt 1b \n" : "+r"(src0), // %0 "+r"(src1), // %1 "+r"(src2), // %2 "+r"(src3), // %3 "+r"(src4), // %4 "+r"(dst), // %5 "+r"(width) // %6 : : "cc", "memory", "v0", "v1", "v2", "v6", "v7"); } // filter 5 rows with 1, 4, 6, 4, 1 coefficients to produce 1 row. void GaussRow_NEON(const uint32_t* src, uint16_t* dst, int width) { const uint32_t* src1 = src + 1; const uint32_t* src2 = src + 2; const uint32_t* src3 = src + 3; asm volatile( "movi v6.4s, #4 \n" // constant 4 "movi v7.4s, #6 \n" // constant 6 "1: \n" "ld1 {v0.4s,v1.4s,v2.4s}, [%0], %6 \n" // load 12 source samples "add v0.4s, v0.4s, v1.4s \n" // * 1 "add v1.4s, v1.4s, v2.4s \n" // * 1 "ld1 {v2.4s,v3.4s}, [%2], #32 \n" "mla v0.4s, v2.4s, v7.4s \n" // * 6 "mla v1.4s, v3.4s, v7.4s \n" // * 6 "ld1 {v2.4s,v3.4s}, [%1], #32 \n" "ld1 {v4.4s,v5.4s}, [%3], #32 \n" "add v2.4s, v2.4s, v4.4s \n" // add rows for * 4 "add v3.4s, v3.4s, v5.4s \n" "mla v0.4s, v2.4s, v6.4s \n" // * 4 "mla v1.4s, v3.4s, v6.4s \n" // * 4 "subs %w5, %w5, #8 \n" // 8 processed per loop "uqrshrn v0.4h, v0.4s, #8 \n" // round and pack "uqrshrn2 v0.8h, v1.4s, #8 \n" "st1 {v0.8h}, [%4], #16 \n" // store 8 samples "b.gt 1b \n" : "+r"(src), // %0 "+r"(src1), // %1 "+r"(src2), // %2 "+r"(src3), // %3 "+r"(dst), // %4 "+r"(width) // %5 : "r"(32LL) // %6 : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7"); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/row_win.cc000066400000000000000000006235131357355204000222020ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" // This module is for Visual C 32/64 bit and clangcl 32 bit #if !defined(LIBYUV_DISABLE_X86) && defined(_MSC_VER) && \ (defined(_M_IX86) || (defined(_M_X64) && !defined(__clang__))) #if defined(_M_X64) #include #include // For _mm_maddubs_epi16 #endif #ifdef __cplusplus namespace libyuv { extern "C" { #endif // 64 bit #if defined(_M_X64) // Read 4 UV from 422, upsample to 8 UV. #define READYUV422 \ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ u_buf += 4; \ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ y_buf += 8; // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ xmm0 = _mm_cvtsi32_si128(*(uint32_t*)u_buf); \ xmm1 = _mm_cvtsi32_si128(*(uint32_t*)(u_buf + offset)); \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ xmm0 = _mm_unpacklo_epi16(xmm0, xmm0); \ u_buf += 4; \ xmm4 = _mm_loadl_epi64((__m128i*)y_buf); \ xmm4 = _mm_unpacklo_epi8(xmm4, xmm4); \ y_buf += 8; \ xmm5 = _mm_loadl_epi64((__m128i*)a_buf); \ a_buf += 8; // Convert 8 pixels: 8 UV and 8 Y. #define YUVTORGB(yuvconstants) \ xmm1 = _mm_loadu_si128(&xmm0); \ xmm2 = _mm_loadu_si128(&xmm0); \ xmm0 = _mm_maddubs_epi16(xmm0, *(__m128i*)yuvconstants->kUVToB); \ xmm1 = _mm_maddubs_epi16(xmm1, *(__m128i*)yuvconstants->kUVToG); \ xmm2 = _mm_maddubs_epi16(xmm2, *(__m128i*)yuvconstants->kUVToR); \ xmm0 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasB, xmm0); \ xmm1 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasG, xmm1); \ xmm2 = _mm_sub_epi16(*(__m128i*)yuvconstants->kUVBiasR, xmm2); \ xmm4 = _mm_mulhi_epu16(xmm4, *(__m128i*)yuvconstants->kYToRgb); \ xmm0 = _mm_adds_epi16(xmm0, xmm4); \ xmm1 = _mm_adds_epi16(xmm1, xmm4); \ xmm2 = _mm_adds_epi16(xmm2, xmm4); \ xmm0 = _mm_srai_epi16(xmm0, 6); \ xmm1 = _mm_srai_epi16(xmm1, 6); \ xmm2 = _mm_srai_epi16(xmm2, 6); \ xmm0 = _mm_packus_epi16(xmm0, xmm0); \ xmm1 = _mm_packus_epi16(xmm1, xmm1); \ xmm2 = _mm_packus_epi16(xmm2, xmm2); // Store 8 ARGB values. #define STOREARGB \ xmm0 = _mm_unpacklo_epi8(xmm0, xmm1); \ xmm2 = _mm_unpacklo_epi8(xmm2, xmm5); \ xmm1 = _mm_loadu_si128(&xmm0); \ xmm0 = _mm_unpacklo_epi16(xmm0, xmm2); \ xmm1 = _mm_unpackhi_epi16(xmm1, xmm2); \ _mm_storeu_si128((__m128i*)dst_argb, xmm0); \ _mm_storeu_si128((__m128i*)(dst_argb + 16), xmm1); \ dst_argb += 32; #if defined(HAS_I422TOARGBROW_SSSE3) void I422ToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4; const __m128i xmm5 = _mm_set1_epi8(-1); const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUV422 YUVTORGB(yuvconstants) STOREARGB width -= 8; } } #endif #if defined(HAS_I422ALPHATOARGBROW_SSSE3) void I422AlphaToARGBRow_SSSE3(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __m128i xmm0, xmm1, xmm2, xmm4, xmm5; const ptrdiff_t offset = (uint8_t*)v_buf - (uint8_t*)u_buf; while (width > 0) { READYUVA422 YUVTORGB(yuvconstants) STOREARGB width -= 8; } } #endif // 32 bit #else // defined(_M_X64) #ifdef HAS_ARGBTOYROW_SSSE3 // Constants for ARGB. static const vec8 kARGBToY = {13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0}; // JPeg full range. static const vec8 kARGBToYJ = {15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0, 15, 75, 38, 0}; static const vec8 kARGBToU = {112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0}; static const vec8 kARGBToUJ = {127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0, 127, -84, -43, 0}; static const vec8 kARGBToV = { -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, }; static const vec8 kARGBToVJ = {-20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0, -20, -107, 127, 0}; // vpshufb for vphaddw + vpackuswb packed to shorts. static const lvec8 kShufARGBToUV_AVX = { 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15, 0, 1, 8, 9, 2, 3, 10, 11, 4, 5, 12, 13, 6, 7, 14, 15}; // Constants for BGRA. static const vec8 kBGRAToY = {0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13}; static const vec8 kBGRAToU = {0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112}; static const vec8 kBGRAToV = {0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18}; // Constants for ABGR. static const vec8 kABGRToY = {33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0, 33, 65, 13, 0}; static const vec8 kABGRToU = {-38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0, -38, -74, 112, 0}; static const vec8 kABGRToV = {112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0, 112, -94, -18, 0}; // Constants for RGBA. static const vec8 kRGBAToY = {0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33, 0, 13, 65, 33}; static const vec8 kRGBAToU = {0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38, 0, 112, -74, -38}; static const vec8 kRGBAToV = {0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112, 0, -18, -94, 112}; static const uvec8 kAddY16 = {16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u, 16u}; // 7 bit fixed point 0.5. static const vec16 kAddYJ64 = {64, 64, 64, 64, 64, 64, 64, 64}; static const uvec8 kAddUV128 = {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; static const uvec16 kAddUVJ128 = {0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u, 0x8080u}; // Shuffle table for converting RGB24 to ARGB. static const uvec8 kShuffleMaskRGB24ToARGB = { 0u, 1u, 2u, 12u, 3u, 4u, 5u, 13u, 6u, 7u, 8u, 14u, 9u, 10u, 11u, 15u}; // Shuffle table for converting RAW to ARGB. static const uvec8 kShuffleMaskRAWToARGB = {2u, 1u, 0u, 12u, 5u, 4u, 3u, 13u, 8u, 7u, 6u, 14u, 11u, 10u, 9u, 15u}; // Shuffle table for converting RAW to RGB24. First 8. static const uvec8 kShuffleMaskRAWToRGB24_0 = { 2u, 1u, 0u, 5u, 4u, 3u, 8u, 7u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Middle 8. static const uvec8 kShuffleMaskRAWToRGB24_1 = { 2u, 7u, 6u, 5u, 10u, 9u, 8u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting RAW to RGB24. Last 8. static const uvec8 kShuffleMaskRAWToRGB24_2 = { 8u, 7u, 12u, 11u, 10u, 15u, 14u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RGB24. static const uvec8 kShuffleMaskARGBToRGB24 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 10u, 12u, 13u, 14u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGB to RAW. static const uvec8 kShuffleMaskARGBToRAW = { 2u, 1u, 0u, 6u, 5u, 4u, 10u, 9u, 8u, 14u, 13u, 12u, 128u, 128u, 128u, 128u}; // Shuffle table for converting ARGBToRGB24 for I422ToRGB24. First 8 + next 4 static const uvec8 kShuffleMaskARGBToRGB24_0 = { 0u, 1u, 2u, 4u, 5u, 6u, 8u, 9u, 128u, 128u, 128u, 128u, 10u, 12u, 13u, 14u}; // YUY2 shuf 16 Y to 32 Y. static const lvec8 kShuffleYUY2Y = {0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14}; // YUY2 shuf 8 UV to 16 UV. static const lvec8 kShuffleYUY2UV = {1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15, 1, 3, 1, 3, 5, 7, 5, 7, 9, 11, 9, 11, 13, 15, 13, 15}; // UYVY shuf 16 Y to 32 Y. static const lvec8 kShuffleUYVYY = {1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15, 1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15}; // UYVY shuf 8 UV to 16 UV. static const lvec8 kShuffleUYVYUV = {0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14, 0, 2, 0, 2, 4, 6, 4, 6, 8, 10, 8, 10, 12, 14, 12, 14}; // NV21 shuf 8 VU to 16 UV. static const lvec8 kShuffleNV21 = { 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, 1, 0, 1, 0, 3, 2, 3, 2, 5, 4, 5, 4, 7, 6, 7, 6, }; // Duplicates gray value 3 times and fills in alpha opaque. __declspec(naked) void J400ToARGBRow_SSE2(const uint8_t* src_y, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_y mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 convertloop: movq xmm0, qword ptr [eax] lea eax, [eax + 8] punpcklbw xmm0, xmm0 movdqa xmm1, xmm0 punpcklwd xmm0, xmm0 punpckhwd xmm1, xmm1 por xmm0, xmm5 por xmm1, xmm5 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #ifdef HAS_J400TOARGBROW_AVX2 // Duplicates gray value 3 times and fills in alpha opaque. __declspec(naked) void J400ToARGBRow_AVX2(const uint8_t* src_y, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_y mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpslld ymm5, ymm5, 24 convertloop: vmovdqu xmm0, [eax] lea eax, [eax + 16] vpermq ymm0, ymm0, 0xd8 vpunpcklbw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 vpunpckhwd ymm1, ymm0, ymm0 vpunpcklwd ymm0, ymm0, ymm0 vpor ymm0, ymm0, ymm5 vpor ymm1, ymm1, ymm5 vmovdqu [edx], ymm0 vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_J400TOARGBROW_AVX2 __declspec(naked) void RGB24ToARGBRow_SSSE3(const uint8_t* src_rgb24, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_rgb24 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 movdqa xmm4, xmmword ptr kShuffleMaskRGB24ToARGB convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm3, [eax + 32] lea eax, [eax + 48] movdqa xmm2, xmm3 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} pshufb xmm2, xmm4 por xmm2, xmm5 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} pshufb xmm0, xmm4 movdqu [edx + 32], xmm2 por xmm0, xmm5 pshufb xmm1, xmm4 movdqu [edx], xmm0 por xmm1, xmm5 palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} pshufb xmm3, xmm4 movdqu [edx + 16], xmm1 por xmm3, xmm5 movdqu [edx + 48], xmm3 lea edx, [edx + 64] sub ecx, 16 jg convertloop ret } } __declspec(naked) void RAWToARGBRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_raw mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width pcmpeqb xmm5, xmm5 // generate mask 0xff000000 pslld xmm5, 24 movdqa xmm4, xmmword ptr kShuffleMaskRAWToARGB convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm3, [eax + 32] lea eax, [eax + 48] movdqa xmm2, xmm3 palignr xmm2, xmm1, 8 // xmm2 = { xmm3[0:3] xmm1[8:15]} pshufb xmm2, xmm4 por xmm2, xmm5 palignr xmm1, xmm0, 12 // xmm1 = { xmm3[0:7] xmm0[12:15]} pshufb xmm0, xmm4 movdqu [edx + 32], xmm2 por xmm0, xmm5 pshufb xmm1, xmm4 movdqu [edx], xmm0 por xmm1, xmm5 palignr xmm3, xmm3, 4 // xmm3 = { xmm3[4:15]} pshufb xmm3, xmm4 movdqu [edx + 16], xmm1 por xmm3, xmm5 movdqu [edx + 48], xmm3 lea edx, [edx + 64] sub ecx, 16 jg convertloop ret } } __declspec(naked) void RAWToRGB24Row_SSSE3(const uint8_t* src_raw, uint8_t* dst_rgb24, int width) { __asm { mov eax, [esp + 4] // src_raw mov edx, [esp + 8] // dst_rgb24 mov ecx, [esp + 12] // width movdqa xmm3, xmmword ptr kShuffleMaskRAWToRGB24_0 movdqa xmm4, xmmword ptr kShuffleMaskRAWToRGB24_1 movdqa xmm5, xmmword ptr kShuffleMaskRAWToRGB24_2 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 4] movdqu xmm2, [eax + 8] lea eax, [eax + 24] pshufb xmm0, xmm3 pshufb xmm1, xmm4 pshufb xmm2, xmm5 movq qword ptr [edx], xmm0 movq qword ptr [edx + 8], xmm1 movq qword ptr [edx + 16], xmm2 lea edx, [edx + 24] sub ecx, 8 jg convertloop ret } } // pmul method to replicate bits. // Math to replicate bits: // (v << 8) | (v << 3) // v * 256 + v * 8 // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 // 20 instructions. __declspec(naked) void RGB565ToARGBRow_SSE2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits movd xmm5, eax pshufd xmm5, xmm5, 0 mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits movd xmm6, eax pshufd xmm6, xmm6, 0 pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red psllw xmm3, 11 pcmpeqb xmm4, xmm4 // generate mask 0x07e007e0 for Green psllw xmm4, 10 psrlw xmm4, 5 pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha psllw xmm7, 8 mov eax, [esp + 4] // src_rgb565 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: movdqu xmm0, [eax] // fetch 8 pixels of bgr565 movdqa xmm1, xmm0 movdqa xmm2, xmm0 pand xmm1, xmm3 // R in upper 5 bits psllw xmm2, 11 // B in upper 5 bits pmulhuw xmm1, xmm5 // * (256 + 8) pmulhuw xmm2, xmm5 // * (256 + 8) psllw xmm1, 8 por xmm1, xmm2 // RB pand xmm0, xmm4 // G in middle 6 bits pmulhuw xmm0, xmm6 // << 5 * (256 + 4) por xmm0, xmm7 // AG movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB lea eax, [eax + 16] sub ecx, 8 jg convertloop ret } } #ifdef HAS_RGB565TOARGBROW_AVX2 // pmul method to replicate bits. // Math to replicate bits: // (v << 8) | (v << 3) // v * 256 + v * 8 // v * (256 + 8) // G shift of 5 is incorporated, so shift is 5 + 8 and 5 + 3 __declspec(naked) void RGB565ToARGBRow_AVX2(const uint8_t* src_rgb565, uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits vmovd xmm5, eax vbroadcastss ymm5, xmm5 mov eax, 0x20802080 // multiplier shift by 5 and then repeat 6 bits vmovd xmm6, eax vbroadcastss ymm6, xmm6 vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red vpsllw ymm3, ymm3, 11 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x07e007e0 for Green vpsllw ymm4, ymm4, 10 vpsrlw ymm4, ymm4, 5 vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha vpsllw ymm7, ymm7, 8 mov eax, [esp + 4] // src_rgb565 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: vmovdqu ymm0, [eax] // fetch 16 pixels of bgr565 vpand ymm1, ymm0, ymm3 // R in upper 5 bits vpsllw ymm2, ymm0, 11 // B in upper 5 bits vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) vpsllw ymm1, ymm1, 8 vpor ymm1, ymm1, ymm2 // RB vpand ymm0, ymm0, ymm4 // G in middle 6 bits vpmulhuw ymm0, ymm0, ymm6 // << 5 * (256 + 4) vpor ymm0, ymm0, ymm7 // AG vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm1, ymm1, 0xd8 vpunpckhbw ymm2, ymm1, ymm0 vpunpcklbw ymm1, ymm1, ymm0 vmovdqu [eax * 2 + edx], ymm1 // store 4 pixels of ARGB vmovdqu [eax * 2 + edx + 32], ymm2 // store next 4 pixels of ARGB lea eax, [eax + 32] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_RGB565TOARGBROW_AVX2 #ifdef HAS_ARGB1555TOARGBROW_AVX2 __declspec(naked) void ARGB1555ToARGBRow_AVX2(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits vmovd xmm5, eax vbroadcastss ymm5, xmm5 mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits vmovd xmm6, eax vbroadcastss ymm6, xmm6 vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0xf800f800 for Red vpsllw ymm3, ymm3, 11 vpsrlw ymm4, ymm3, 6 // generate mask 0x03e003e0 for Green vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xff00ff00 for Alpha vpsllw ymm7, ymm7, 8 mov eax, [esp + 4] // src_argb1555 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: vmovdqu ymm0, [eax] // fetch 16 pixels of 1555 vpsllw ymm1, ymm0, 1 // R in upper 5 bits vpsllw ymm2, ymm0, 11 // B in upper 5 bits vpand ymm1, ymm1, ymm3 vpmulhuw ymm2, ymm2, ymm5 // * (256 + 8) vpmulhuw ymm1, ymm1, ymm5 // * (256 + 8) vpsllw ymm1, ymm1, 8 vpor ymm1, ymm1, ymm2 // RB vpsraw ymm2, ymm0, 8 // A vpand ymm0, ymm0, ymm4 // G in middle 5 bits vpmulhuw ymm0, ymm0, ymm6 // << 6 * (256 + 8) vpand ymm2, ymm2, ymm7 vpor ymm0, ymm0, ymm2 // AG vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm1, ymm1, 0xd8 vpunpckhbw ymm2, ymm1, ymm0 vpunpcklbw ymm1, ymm1, ymm0 vmovdqu [eax * 2 + edx], ymm1 // store 8 pixels of ARGB vmovdqu [eax * 2 + edx + 32], ymm2 // store next 8 pixels of ARGB lea eax, [eax + 32] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_ARGB1555TOARGBROW_AVX2 #ifdef HAS_ARGB4444TOARGBROW_AVX2 __declspec(naked) void ARGB4444ToARGBRow_AVX2(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f vmovd xmm4, eax vbroadcastss ymm4, xmm4 vpslld ymm5, ymm4, 4 // 0xf0f0f0f0 for high nibbles mov eax, [esp + 4] // src_argb4444 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: vmovdqu ymm0, [eax] // fetch 16 pixels of bgra4444 vpand ymm2, ymm0, ymm5 // mask high nibbles vpand ymm0, ymm0, ymm4 // mask low nibbles vpsrlw ymm3, ymm2, 4 vpsllw ymm1, ymm0, 4 vpor ymm2, ymm2, ymm3 vpor ymm0, ymm0, ymm1 vpermq ymm0, ymm0, 0xd8 // mutate for unpack vpermq ymm2, ymm2, 0xd8 vpunpckhbw ymm1, ymm0, ymm2 vpunpcklbw ymm0, ymm0, ymm2 vmovdqu [eax * 2 + edx], ymm0 // store 8 pixels of ARGB vmovdqu [eax * 2 + edx + 32], ymm1 // store next 8 pixels of ARGB lea eax, [eax + 32] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_ARGB4444TOARGBROW_AVX2 // 24 instructions __declspec(naked) void ARGB1555ToARGBRow_SSE2(const uint8_t* src_argb1555, uint8_t* dst_argb, int width) { __asm { mov eax, 0x01080108 // generate multiplier to repeat 5 bits movd xmm5, eax pshufd xmm5, xmm5, 0 mov eax, 0x42004200 // multiplier shift by 6 and then repeat 5 bits movd xmm6, eax pshufd xmm6, xmm6, 0 pcmpeqb xmm3, xmm3 // generate mask 0xf800f800 for Red psllw xmm3, 11 movdqa xmm4, xmm3 // generate mask 0x03e003e0 for Green psrlw xmm4, 6 pcmpeqb xmm7, xmm7 // generate mask 0xff00ff00 for Alpha psllw xmm7, 8 mov eax, [esp + 4] // src_argb1555 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: movdqu xmm0, [eax] // fetch 8 pixels of 1555 movdqa xmm1, xmm0 movdqa xmm2, xmm0 psllw xmm1, 1 // R in upper 5 bits psllw xmm2, 11 // B in upper 5 bits pand xmm1, xmm3 pmulhuw xmm2, xmm5 // * (256 + 8) pmulhuw xmm1, xmm5 // * (256 + 8) psllw xmm1, 8 por xmm1, xmm2 // RB movdqa xmm2, xmm0 pand xmm0, xmm4 // G in middle 5 bits psraw xmm2, 8 // A pmulhuw xmm0, xmm6 // << 6 * (256 + 8) pand xmm2, xmm7 por xmm0, xmm2 // AG movdqa xmm2, xmm1 punpcklbw xmm1, xmm0 punpckhbw xmm2, xmm0 movdqu [eax * 2 + edx], xmm1 // store 4 pixels of ARGB movdqu [eax * 2 + edx + 16], xmm2 // store next 4 pixels of ARGB lea eax, [eax + 16] sub ecx, 8 jg convertloop ret } } // 18 instructions. __declspec(naked) void ARGB4444ToARGBRow_SSE2(const uint8_t* src_argb4444, uint8_t* dst_argb, int width) { __asm { mov eax, 0x0f0f0f0f // generate mask 0x0f0f0f0f movd xmm4, eax pshufd xmm4, xmm4, 0 movdqa xmm5, xmm4 // 0xf0f0f0f0 for high nibbles pslld xmm5, 4 mov eax, [esp + 4] // src_argb4444 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax sub edx, eax convertloop: movdqu xmm0, [eax] // fetch 8 pixels of bgra4444 movdqa xmm2, xmm0 pand xmm0, xmm4 // mask low nibbles pand xmm2, xmm5 // mask high nibbles movdqa xmm1, xmm0 movdqa xmm3, xmm2 psllw xmm1, 4 psrlw xmm3, 4 por xmm0, xmm1 por xmm2, xmm3 movdqa xmm1, xmm0 punpcklbw xmm0, xmm2 punpckhbw xmm1, xmm2 movdqu [eax * 2 + edx], xmm0 // store 4 pixels of ARGB movdqu [eax * 2 + edx + 16], xmm1 // store next 4 pixels of ARGB lea eax, [eax + 16] sub ecx, 8 jg convertloop ret } } __declspec(naked) void ARGBToRGB24Row_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 convertloop: movdqu xmm0, [eax] // fetch 16 pixels of argb movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] lea eax, [eax + 64] pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB pshufb xmm1, xmm6 pshufb xmm2, xmm6 pshufb xmm3, xmm6 movdqa xmm4, xmm1 // 4 bytes from 1 for 0 psrldq xmm1, 4 // 8 bytes from 1 pslldq xmm4, 12 // 4 bytes from 1 for 0 movdqa xmm5, xmm2 // 8 bytes from 2 for 1 por xmm0, xmm4 // 4 bytes from 1 for 0 pslldq xmm5, 8 // 8 bytes from 2 for 1 movdqu [edx], xmm0 // store 0 por xmm1, xmm5 // 8 bytes from 2 for 1 psrldq xmm2, 8 // 4 bytes from 2 pslldq xmm3, 4 // 12 bytes from 3 for 2 por xmm2, xmm3 // 12 bytes from 3 for 2 movdqu [edx + 16], xmm1 // store 1 movdqu [edx + 32], xmm2 // store 2 lea edx, [edx + 48] sub ecx, 16 jg convertloop ret } } __declspec(naked) void ARGBToRAWRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width movdqa xmm6, xmmword ptr kShuffleMaskARGBToRAW convertloop: movdqu xmm0, [eax] // fetch 16 pixels of argb movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] lea eax, [eax + 64] pshufb xmm0, xmm6 // pack 16 bytes of ARGB to 12 bytes of RGB pshufb xmm1, xmm6 pshufb xmm2, xmm6 pshufb xmm3, xmm6 movdqa xmm4, xmm1 // 4 bytes from 1 for 0 psrldq xmm1, 4 // 8 bytes from 1 pslldq xmm4, 12 // 4 bytes from 1 for 0 movdqa xmm5, xmm2 // 8 bytes from 2 for 1 por xmm0, xmm4 // 4 bytes from 1 for 0 pslldq xmm5, 8 // 8 bytes from 2 for 1 movdqu [edx], xmm0 // store 0 por xmm1, xmm5 // 8 bytes from 2 for 1 psrldq xmm2, 8 // 4 bytes from 2 pslldq xmm3, 4 // 12 bytes from 3 for 2 por xmm2, xmm3 // 12 bytes from 3 for 2 movdqu [edx + 16], xmm1 // store 1 movdqu [edx + 32], xmm2 // store 2 lea edx, [edx + 48] sub ecx, 16 jg convertloop ret } } __declspec(naked) void ARGBToRGB565Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width pcmpeqb xmm3, xmm3 // generate mask 0x0000001f psrld xmm3, 27 pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 psrld xmm4, 26 pslld xmm4, 5 pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 pslld xmm5, 11 convertloop: movdqu xmm0, [eax] // fetch 4 pixels of argb movdqa xmm1, xmm0 // B movdqa xmm2, xmm0 // G pslld xmm0, 8 // R psrld xmm1, 3 // B psrld xmm2, 5 // G psrad xmm0, 16 // R pand xmm1, xmm3 // B pand xmm2, xmm4 // G pand xmm0, xmm5 // R por xmm1, xmm2 // BG por xmm0, xmm1 // BGR packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 lea edx, [edx + 8] sub ecx, 4 jg convertloop ret } } __declspec(naked) void ARGBToRGB565DitherRow_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb movd xmm6, [esp + 12] // dither4 mov ecx, [esp + 16] // width punpcklbw xmm6, xmm6 // make dither 16 bytes movdqa xmm7, xmm6 punpcklwd xmm6, xmm6 punpckhwd xmm7, xmm7 pcmpeqb xmm3, xmm3 // generate mask 0x0000001f psrld xmm3, 27 pcmpeqb xmm4, xmm4 // generate mask 0x000007e0 psrld xmm4, 26 pslld xmm4, 5 pcmpeqb xmm5, xmm5 // generate mask 0xfffff800 pslld xmm5, 11 convertloop: movdqu xmm0, [eax] // fetch 4 pixels of argb paddusb xmm0, xmm6 // add dither movdqa xmm1, xmm0 // B movdqa xmm2, xmm0 // G pslld xmm0, 8 // R psrld xmm1, 3 // B psrld xmm2, 5 // G psrad xmm0, 16 // R pand xmm1, xmm3 // B pand xmm2, xmm4 // G pand xmm0, xmm5 // R por xmm1, xmm2 // BG por xmm0, xmm1 // BGR packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of RGB565 lea edx, [edx + 8] sub ecx, 4 jg convertloop ret } } #ifdef HAS_ARGBTORGB565DITHERROW_AVX2 __declspec(naked) void ARGBToRGB565DitherRow_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, const uint32_t dither4, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb vbroadcastss xmm6, [esp + 12] // dither4 mov ecx, [esp + 16] // width vpunpcklbw xmm6, xmm6, xmm6 // make dither 32 bytes vpermq ymm6, ymm6, 0xd8 vpunpcklwd ymm6, ymm6, ymm6 vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpsrld ymm3, ymm3, 27 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb vpaddusb ymm0, ymm0, ymm6 // add dither vpsrld ymm2, ymm0, 5 // G vpsrld ymm1, ymm0, 3 // B vpsrld ymm0, ymm0, 8 // R vpand ymm2, ymm2, ymm4 // G vpand ymm1, ymm1, ymm3 // B vpand ymm0, ymm0, ymm5 // R vpor ymm1, ymm1, ymm2 // BG vpor ymm0, ymm0, ymm1 // BGR vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of RGB565 lea edx, [edx + 16] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTORGB565DITHERROW_AVX2 // TODO(fbarchard): Improve sign extension/packing. __declspec(naked) void ARGBToARGB1555Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width pcmpeqb xmm4, xmm4 // generate mask 0x0000001f psrld xmm4, 27 movdqa xmm5, xmm4 // generate mask 0x000003e0 pslld xmm5, 5 movdqa xmm6, xmm4 // generate mask 0x00007c00 pslld xmm6, 10 pcmpeqb xmm7, xmm7 // generate mask 0xffff8000 pslld xmm7, 15 convertloop: movdqu xmm0, [eax] // fetch 4 pixels of argb movdqa xmm1, xmm0 // B movdqa xmm2, xmm0 // G movdqa xmm3, xmm0 // R psrad xmm0, 16 // A psrld xmm1, 3 // B psrld xmm2, 6 // G psrld xmm3, 9 // R pand xmm0, xmm7 // A pand xmm1, xmm4 // B pand xmm2, xmm5 // G pand xmm3, xmm6 // R por xmm0, xmm1 // BA por xmm2, xmm3 // GR por xmm0, xmm2 // BGRA packssdw xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of ARGB1555 lea edx, [edx + 8] sub ecx, 4 jg convertloop ret } } __declspec(naked) void ARGBToARGB4444Row_SSE2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width pcmpeqb xmm4, xmm4 // generate mask 0xf000f000 psllw xmm4, 12 movdqa xmm3, xmm4 // generate mask 0x00f000f0 psrlw xmm3, 8 convertloop: movdqu xmm0, [eax] // fetch 4 pixels of argb movdqa xmm1, xmm0 pand xmm0, xmm3 // low nibble pand xmm1, xmm4 // high nibble psrld xmm0, 4 psrld xmm1, 8 por xmm0, xmm1 packuswb xmm0, xmm0 lea eax, [eax + 16] movq qword ptr [edx], xmm0 // store 4 pixels of ARGB4444 lea edx, [edx + 8] sub ecx, 4 jg convertloop ret } } #ifdef HAS_ARGBTORGB565ROW_AVX2 __declspec(naked) void ARGBToRGB565Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width vpcmpeqb ymm3, ymm3, ymm3 // generate mask 0x0000001f vpsrld ymm3, ymm3, 27 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0x000007e0 vpsrld ymm4, ymm4, 26 vpslld ymm4, ymm4, 5 vpslld ymm5, ymm3, 11 // generate mask 0x0000f800 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb vpsrld ymm2, ymm0, 5 // G vpsrld ymm1, ymm0, 3 // B vpsrld ymm0, ymm0, 8 // R vpand ymm2, ymm2, ymm4 // G vpand ymm1, ymm1, ymm3 // B vpand ymm0, ymm0, ymm5 // R vpor ymm1, ymm1, ymm2 // BG vpor ymm0, ymm0, ymm1 // BGR vpackusdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of RGB565 lea edx, [edx + 16] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTORGB565ROW_AVX2 #ifdef HAS_ARGBTOARGB1555ROW_AVX2 __declspec(naked) void ARGBToARGB1555Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width vpcmpeqb ymm4, ymm4, ymm4 vpsrld ymm4, ymm4, 27 // generate mask 0x0000001f vpslld ymm5, ymm4, 5 // generate mask 0x000003e0 vpslld ymm6, ymm4, 10 // generate mask 0x00007c00 vpcmpeqb ymm7, ymm7, ymm7 // generate mask 0xffff8000 vpslld ymm7, ymm7, 15 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb vpsrld ymm3, ymm0, 9 // R vpsrld ymm2, ymm0, 6 // G vpsrld ymm1, ymm0, 3 // B vpsrad ymm0, ymm0, 16 // A vpand ymm3, ymm3, ymm6 // R vpand ymm2, ymm2, ymm5 // G vpand ymm1, ymm1, ymm4 // B vpand ymm0, ymm0, ymm7 // A vpor ymm0, ymm0, ymm1 // BA vpor ymm2, ymm2, ymm3 // GR vpor ymm0, ymm0, ymm2 // BGRA vpackssdw ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of ARGB1555 lea edx, [edx + 16] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTOARGB1555ROW_AVX2 #ifdef HAS_ARGBTOARGB4444ROW_AVX2 __declspec(naked) void ARGBToARGB4444Row_AVX2(const uint8_t* src_argb, uint8_t* dst_rgb, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_rgb mov ecx, [esp + 12] // width vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xf000f000 vpsllw ymm4, ymm4, 12 vpsrlw ymm3, ymm4, 8 // generate mask 0x00f000f0 convertloop: vmovdqu ymm0, [eax] // fetch 8 pixels of argb vpand ymm1, ymm0, ymm4 // high nibble vpand ymm0, ymm0, ymm3 // low nibble vpsrld ymm1, ymm1, 8 vpsrld ymm0, ymm0, 4 vpor ymm0, ymm0, ymm1 vpackuswb ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 lea eax, [eax + 32] vmovdqu [edx], xmm0 // store 8 pixels of ARGB4444 lea edx, [edx + 16] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTOARGB4444ROW_AVX2 // Convert 16 ARGB pixels (64 bytes) to 16 Y values. __declspec(naked) void ARGBToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToY movdqa xmm5, xmmword ptr kAddY16 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 lea eax, [eax + 64] phaddw xmm0, xmm1 phaddw xmm2, xmm3 psrlw xmm0, 7 psrlw xmm2, 7 packuswb xmm0, xmm2 paddb xmm0, xmm5 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } // Convert 16 ARGB pixels (64 bytes) to 16 YJ values. // Same as ARGBToYRow but different coefficients, no add 16, but do rounding. __declspec(naked) void ARGBToYJRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToYJ movdqa xmm5, xmmword ptr kAddYJ64 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 lea eax, [eax + 64] phaddw xmm0, xmm1 phaddw xmm2, xmm3 paddw xmm0, xmm5 // Add .5 for rounding. paddw xmm2, xmm5 psrlw xmm0, 7 psrlw xmm2, 7 packuswb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } #ifdef HAS_ARGBTOYROW_AVX2 // vpermd for vphaddw + vpackuswb vpermd. static const lvec32 kPermdARGBToY_AVX = {0, 4, 1, 5, 2, 6, 3, 7}; // Convert 32 ARGB pixels (128 bytes) to 32 Y values. __declspec(naked) void ARGBToYRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ vbroadcastf128 ymm4, xmmword ptr kARGBToY vbroadcastf128 ymm5, xmmword ptr kAddY16 vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] vmovdqu ymm3, [eax + 96] vpmaddubsw ymm0, ymm0, ymm4 vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 lea eax, [eax + 128] vphaddw ymm0, ymm0, ymm1 // mutates. vphaddw ymm2, ymm2, ymm3 vpsrlw ymm0, ymm0, 7 vpsrlw ymm2, ymm2, 7 vpackuswb ymm0, ymm0, ymm2 // mutates. vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. vpaddb ymm0, ymm0, ymm5 // add 16 for Y vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTOYROW_AVX2 #ifdef HAS_ARGBTOYJROW_AVX2 // Convert 32 ARGB pixels (128 bytes) to 32 Y values. __declspec(naked) void ARGBToYJRow_AVX2(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ vbroadcastf128 ymm4, xmmword ptr kARGBToYJ vbroadcastf128 ymm5, xmmword ptr kAddYJ64 vmovdqu ymm6, ymmword ptr kPermdARGBToY_AVX convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] vmovdqu ymm3, [eax + 96] vpmaddubsw ymm0, ymm0, ymm4 vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 lea eax, [eax + 128] vphaddw ymm0, ymm0, ymm1 // mutates. vphaddw ymm2, ymm2, ymm3 vpaddw ymm0, ymm0, ymm5 // Add .5 for rounding. vpaddw ymm2, ymm2, ymm5 vpsrlw ymm0, ymm0, 7 vpsrlw ymm2, ymm2, 7 vpackuswb ymm0, ymm0, ymm2 // mutates. vpermd ymm0, ymm6, ymm0 // For vphaddw + vpackuswb mutation. vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg convertloop vzeroupper ret } } #endif // HAS_ARGBTOYJROW_AVX2 __declspec(naked) void BGRAToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kBGRAToY movdqa xmm5, xmmword ptr kAddY16 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 lea eax, [eax + 64] phaddw xmm0, xmm1 phaddw xmm2, xmm3 psrlw xmm0, 7 psrlw xmm2, 7 packuswb xmm0, xmm2 paddb xmm0, xmm5 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } __declspec(naked) void ABGRToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kABGRToY movdqa xmm5, xmmword ptr kAddY16 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 lea eax, [eax + 64] phaddw xmm0, xmm1 phaddw xmm2, xmm3 psrlw xmm0, 7 psrlw xmm2, 7 packuswb xmm0, xmm2 paddb xmm0, xmm5 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } __declspec(naked) void RGBAToYRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_y */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kRGBAToY movdqa xmm5, xmmword ptr kAddY16 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 lea eax, [eax + 64] phaddw xmm0, xmm1 phaddw xmm2, xmm3 psrlw xmm0, 7 psrlw xmm2, 7 packuswb xmm0, xmm2 paddb xmm0, xmm5 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } __declspec(naked) void ARGBToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kARGBToV movdqa xmm7, xmmword ptr kARGBToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] movdqu xmm4, [eax + esi + 16] pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] movdqu xmm4, [eax + esi + 32] pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] movdqu xmm4, [eax + esi + 48] pavgb xmm3, xmm4 lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 shufps xmm4, xmm1, 0xdd pavgb xmm0, xmm4 movdqa xmm4, xmm2 shufps xmm2, xmm3, 0x88 shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm6 // V pmaddubsw xmm3, xmm6 phaddw xmm0, xmm2 phaddw xmm1, xmm3 psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 paddb xmm0, xmm5 // -> unsigned // step 3 - store 8 U and 8 V values movlps qword ptr [edx], xmm0 // U movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void ARGBToUVJRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUVJ128 movdqa xmm6, xmmword ptr kARGBToVJ movdqa xmm7, xmmword ptr kARGBToUJ sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] movdqu xmm4, [eax + esi + 16] pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] movdqu xmm4, [eax + esi + 32] pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] movdqu xmm4, [eax + esi + 48] pavgb xmm3, xmm4 lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 shufps xmm4, xmm1, 0xdd pavgb xmm0, xmm4 movdqa xmm4, xmm2 shufps xmm2, xmm3, 0x88 shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm6 // V pmaddubsw xmm3, xmm6 phaddw xmm0, xmm2 phaddw xmm1, xmm3 paddw xmm0, xmm5 // +.5 rounding -> unsigned paddw xmm1, xmm5 psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 // step 3 - store 8 U and 8 V values movlps qword ptr [edx], xmm0 // U movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } #ifdef HAS_ARGBTOUVROW_AVX2 __declspec(naked) void ARGBToUVRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vbroadcastf128 ymm5, xmmword ptr kAddUV128 vbroadcastf128 ymm6, xmmword ptr kARGBToV vbroadcastf128 ymm7, xmmword ptr kARGBToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 32x2 argb pixels to 16x1 */ vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] vmovdqu ymm3, [eax + 96] vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] vpavgb ymm2, ymm2, [eax + esi + 64] vpavgb ymm3, ymm3, [eax + esi + 96] lea eax, [eax + 128] vshufps ymm4, ymm0, ymm1, 0x88 vshufps ymm0, ymm0, ymm1, 0xdd vpavgb ymm0, ymm0, ymm4 // mutated by vshufps vshufps ymm4, ymm2, ymm3, 0x88 vshufps ymm2, ymm2, ymm3, 0xdd vpavgb ymm2, ymm2, ymm4 // mutated by vshufps // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 32 different pixels, its 16 pixels of U and 16 of V vpmaddubsw ymm1, ymm0, ymm7 // U vpmaddubsw ymm3, ymm2, ymm7 vpmaddubsw ymm0, ymm0, ymm6 // V vpmaddubsw ymm2, ymm2, ymm6 vphaddw ymm1, ymm1, ymm3 // mutates vphaddw ymm0, ymm0, ymm2 vpsraw ymm1, ymm1, 8 vpsraw ymm0, ymm0, 8 vpacksswb ymm0, ymm1, ymm0 // mutates vpermq ymm0, ymm0, 0xd8 // For vpacksswb vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw vpaddb ymm0, ymm0, ymm5 // -> unsigned // step 3 - store 16 U and 16 V values vextractf128 [edx], ymm0, 0 // U vextractf128 [edx + edi], ymm0, 1 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi pop esi vzeroupper ret } } #endif // HAS_ARGBTOUVROW_AVX2 #ifdef HAS_ARGBTOUVJROW_AVX2 __declspec(naked) void ARGBToUVJRow_AVX2(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vbroadcastf128 ymm5, xmmword ptr kAddUV128 vbroadcastf128 ymm6, xmmword ptr kARGBToV vbroadcastf128 ymm7, xmmword ptr kARGBToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 32x2 argb pixels to 16x1 */ vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + 64] vmovdqu ymm3, [eax + 96] vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] vpavgb ymm2, ymm2, [eax + esi + 64] vpavgb ymm3, ymm3, [eax + esi + 96] lea eax, [eax + 128] vshufps ymm4, ymm0, ymm1, 0x88 vshufps ymm0, ymm0, ymm1, 0xdd vpavgb ymm0, ymm0, ymm4 // mutated by vshufps vshufps ymm4, ymm2, ymm3, 0x88 vshufps ymm2, ymm2, ymm3, 0xdd vpavgb ymm2, ymm2, ymm4 // mutated by vshufps // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 32 different pixels, its 16 pixels of U and 16 of V vpmaddubsw ymm1, ymm0, ymm7 // U vpmaddubsw ymm3, ymm2, ymm7 vpmaddubsw ymm0, ymm0, ymm6 // V vpmaddubsw ymm2, ymm2, ymm6 vphaddw ymm1, ymm1, ymm3 // mutates vphaddw ymm0, ymm0, ymm2 vpaddw ymm1, ymm1, ymm5 // +.5 rounding -> unsigned vpaddw ymm0, ymm0, ymm5 vpsraw ymm1, ymm1, 8 vpsraw ymm0, ymm0, 8 vpacksswb ymm0, ymm1, ymm0 // mutates vpermq ymm0, ymm0, 0xd8 // For vpacksswb vpshufb ymm0, ymm0, ymmword ptr kShufARGBToUV_AVX // for vshufps/vphaddw // step 3 - store 16 U and 16 V values vextractf128 [edx], ymm0, 0 // U vextractf128 [edx + edi], ymm0, 1 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi pop esi vzeroupper ret } } #endif // HAS_ARGBTOUVJROW_AVX2 __declspec(naked) void ARGBToUV444Row_SSSE3(const uint8_t* src_argb0, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_argb mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kARGBToV movdqa xmm7, xmmword ptr kARGBToU sub edi, edx // stride from u to v convertloop: /* convert to U and V */ movdqu xmm0, [eax] // U movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm7 pmaddubsw xmm1, xmm7 pmaddubsw xmm2, xmm7 pmaddubsw xmm3, xmm7 phaddw xmm0, xmm1 phaddw xmm2, xmm3 psraw xmm0, 8 psraw xmm2, 8 packsswb xmm0, xmm2 paddb xmm0, xmm5 movdqu [edx], xmm0 movdqu xmm0, [eax] // V movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] pmaddubsw xmm0, xmm6 pmaddubsw xmm1, xmm6 pmaddubsw xmm2, xmm6 pmaddubsw xmm3, xmm6 phaddw xmm0, xmm1 phaddw xmm2, xmm3 psraw xmm0, 8 psraw xmm2, 8 packsswb xmm0, xmm2 paddb xmm0, xmm5 lea eax, [eax + 64] movdqu [edx + edi], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop pop edi ret } } __declspec(naked) void BGRAToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kBGRAToV movdqa xmm7, xmmword ptr kBGRAToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] movdqu xmm4, [eax + esi + 16] pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] movdqu xmm4, [eax + esi + 32] pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] movdqu xmm4, [eax + esi + 48] pavgb xmm3, xmm4 lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 shufps xmm4, xmm1, 0xdd pavgb xmm0, xmm4 movdqa xmm4, xmm2 shufps xmm2, xmm3, 0x88 shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm6 // V pmaddubsw xmm3, xmm6 phaddw xmm0, xmm2 phaddw xmm1, xmm3 psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 paddb xmm0, xmm5 // -> unsigned // step 3 - store 8 U and 8 V values movlps qword ptr [edx], xmm0 // U movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void ABGRToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kABGRToV movdqa xmm7, xmmword ptr kABGRToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] movdqu xmm4, [eax + esi + 16] pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] movdqu xmm4, [eax + esi + 32] pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] movdqu xmm4, [eax + esi + 48] pavgb xmm3, xmm4 lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 shufps xmm4, xmm1, 0xdd pavgb xmm0, xmm4 movdqa xmm4, xmm2 shufps xmm2, xmm3, 0x88 shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm6 // V pmaddubsw xmm3, xmm6 phaddw xmm0, xmm2 phaddw xmm1, xmm3 psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 paddb xmm0, xmm5 // -> unsigned // step 3 - store 8 U and 8 V values movlps qword ptr [edx], xmm0 // U movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void RGBAToUVRow_SSSE3(const uint8_t* src_argb0, int src_stride_argb, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_argb mov esi, [esp + 8 + 8] // src_stride_argb mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width movdqa xmm5, xmmword ptr kAddUV128 movdqa xmm6, xmmword ptr kRGBAToV movdqa xmm7, xmmword ptr kRGBAToU sub edi, edx // stride from u to v convertloop: /* step 1 - subsample 16x2 argb pixels to 8x1 */ movdqu xmm0, [eax] movdqu xmm4, [eax + esi] pavgb xmm0, xmm4 movdqu xmm1, [eax + 16] movdqu xmm4, [eax + esi + 16] pavgb xmm1, xmm4 movdqu xmm2, [eax + 32] movdqu xmm4, [eax + esi + 32] pavgb xmm2, xmm4 movdqu xmm3, [eax + 48] movdqu xmm4, [eax + esi + 48] pavgb xmm3, xmm4 lea eax, [eax + 64] movdqa xmm4, xmm0 shufps xmm0, xmm1, 0x88 shufps xmm4, xmm1, 0xdd pavgb xmm0, xmm4 movdqa xmm4, xmm2 shufps xmm2, xmm3, 0x88 shufps xmm4, xmm3, 0xdd pavgb xmm2, xmm4 // step 2 - convert to U and V // from here down is very similar to Y code except // instead of 16 different pixels, its 8 pixels of U and 8 of V movdqa xmm1, xmm0 movdqa xmm3, xmm2 pmaddubsw xmm0, xmm7 // U pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm6 // V pmaddubsw xmm3, xmm6 phaddw xmm0, xmm2 phaddw xmm1, xmm3 psraw xmm0, 8 psraw xmm1, 8 packsswb xmm0, xmm1 paddb xmm0, xmm5 // -> unsigned // step 3 - store 8 U and 8 V values movlps qword ptr [edx], xmm0 // U movhps qword ptr [edx + edi], xmm0 // V lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } #endif // HAS_ARGBTOYROW_SSSE3 // Read 16 UV from 444 #define READYUV444_AVX2 \ __asm { \ __asm vmovdqu xmm0, [esi] /* U */ \ __asm vmovdqu xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpermq ymm1, ymm1, 0xd8 \ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16]} // Read 8 UV from 422, upsample to 16 UV. #define READYUV422_AVX2 \ __asm { \ __asm vmovq xmm0, qword ptr [esi] /* U */ \ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16]} // Read 8 UV from 422, upsample to 16 UV. With 16 Alpha. #define READYUVA422_AVX2 \ __asm { \ __asm vmovq xmm0, qword ptr [esi] /* U */ \ __asm vmovq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ __asm vpunpcklbw ymm0, ymm0, ymm1 /* UV */ \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16] \ __asm vmovdqu xmm5, [ebp] /* A */ \ __asm vpermq ymm5, ymm5, 0xd8 \ __asm lea ebp, [ebp + 16]} // Read 8 UV from NV12, upsample to 16 UV. #define READNV12_AVX2 \ __asm { \ __asm vmovdqu xmm0, [esi] /* UV */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpunpcklwd ymm0, ymm0, ymm0 /* UVUV (upsample) */ \ __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16]} // Read 8 UV from NV21, upsample to 16 UV. #define READNV21_AVX2 \ __asm { \ __asm vmovdqu xmm0, [esi] /* UV */ \ __asm lea esi, [esi + 16] \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleNV21 \ __asm vmovdqu xmm4, [eax] /* Y */ \ __asm vpermq ymm4, ymm4, 0xd8 \ __asm vpunpcklbw ymm4, ymm4, ymm4 \ __asm lea eax, [eax + 16]} // Read 8 YUY2 with 16 Y and upsample 8 UV to 16 UV. #define READYUY2_AVX2 \ __asm { \ __asm vmovdqu ymm4, [eax] /* YUY2 */ \ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleYUY2Y \ __asm vmovdqu ymm0, [eax] /* UV */ \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleYUY2UV \ __asm lea eax, [eax + 32]} // Read 8 UYVY with 16 Y and upsample 8 UV to 16 UV. #define READUYVY_AVX2 \ __asm { \ __asm vmovdqu ymm4, [eax] /* UYVY */ \ __asm vpshufb ymm4, ymm4, ymmword ptr kShuffleUYVYY \ __asm vmovdqu ymm0, [eax] /* UV */ \ __asm vpshufb ymm0, ymm0, ymmword ptr kShuffleUYVYUV \ __asm lea eax, [eax + 32]} // Convert 16 pixels: 16 UV and 16 Y. #define YUVTORGB_AVX2(YuvConstants) \ __asm { \ __asm vpmaddubsw ymm2, ymm0, ymmword ptr [YuvConstants + KUVTOR] /* R UV */\ __asm vpmaddubsw ymm1, ymm0, ymmword ptr [YuvConstants + KUVTOG] /* G UV */\ __asm vpmaddubsw ymm0, ymm0, ymmword ptr [YuvConstants + KUVTOB] /* B UV */\ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASR] \ __asm vpsubw ymm2, ymm3, ymm2 \ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASG] \ __asm vpsubw ymm1, ymm3, ymm1 \ __asm vmovdqu ymm3, ymmword ptr [YuvConstants + KUVBIASB] \ __asm vpsubw ymm0, ymm3, ymm0 /* Step 2: Find Y contribution to 16 R,G,B values */ \ __asm vpmulhuw ymm4, ymm4, ymmword ptr [YuvConstants + KYTORGB] \ __asm vpaddsw ymm0, ymm0, ymm4 /* B += Y */ \ __asm vpaddsw ymm1, ymm1, ymm4 /* G += Y */ \ __asm vpaddsw ymm2, ymm2, ymm4 /* R += Y */ \ __asm vpsraw ymm0, ymm0, 6 \ __asm vpsraw ymm1, ymm1, 6 \ __asm vpsraw ymm2, ymm2, 6 \ __asm vpackuswb ymm0, ymm0, ymm0 /* B */ \ __asm vpackuswb ymm1, ymm1, ymm1 /* G */ \ __asm vpackuswb ymm2, ymm2, ymm2 /* R */ \ } // Store 16 ARGB values. #define STOREARGB_AVX2 \ __asm { \ __asm vpunpcklbw ymm0, ymm0, ymm1 /* BG */ \ __asm vpermq ymm0, ymm0, 0xd8 \ __asm vpunpcklbw ymm2, ymm2, ymm5 /* RA */ \ __asm vpermq ymm2, ymm2, 0xd8 \ __asm vpunpcklwd ymm1, ymm0, ymm2 /* BGRA first 8 pixels */ \ __asm vpunpckhwd ymm0, ymm0, ymm2 /* BGRA next 8 pixels */ \ __asm vmovdqu 0[edx], ymm1 \ __asm vmovdqu 32[edx], ymm0 \ __asm lea edx, [edx + 64]} // Store 16 RGBA values. #define STORERGBA_AVX2 \ __asm { \ __asm vpunpcklbw ymm1, ymm1, ymm2 /* GR */ \ __asm vpermq ymm1, ymm1, 0xd8 \ __asm vpunpcklbw ymm2, ymm5, ymm0 /* AB */ \ __asm vpermq ymm2, ymm2, 0xd8 \ __asm vpunpcklwd ymm0, ymm2, ymm1 /* ABGR first 8 pixels */ \ __asm vpunpckhwd ymm1, ymm2, ymm1 /* ABGR next 8 pixels */ \ __asm vmovdqu [edx], ymm0 \ __asm vmovdqu [edx + 32], ymm1 \ __asm lea edx, [edx + 64]} #ifdef HAS_I422TOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void I422ToARGBRow_AVX2( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx pop edi pop esi vzeroupper ret } } #endif // HAS_I422TOARGBROW_AVX2 #ifdef HAS_I422ALPHATOARGBROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y and 16 A producing 16 ARGB. __declspec(naked) void I422AlphaToARGBRow_AVX2( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx push ebp mov eax, [esp + 16 + 4] // Y mov esi, [esp + 16 + 8] // U mov edi, [esp + 16 + 12] // V mov ebp, [esp + 16 + 16] // A mov edx, [esp + 16 + 20] // argb mov ebx, [esp + 16 + 24] // yuvconstants mov ecx, [esp + 16 + 28] // width sub edi, esi convertloop: READYUVA422_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebp pop ebx pop edi pop esi vzeroupper ret } } #endif // HAS_I422ALPHATOARGBROW_AVX2 #ifdef HAS_I444TOARGBROW_AVX2 // 16 pixels // 16 UV values with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void I444ToARGBRow_AVX2( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV444_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx pop edi pop esi vzeroupper ret } } #endif // HAS_I444TOARGBROW_AVX2 #ifdef HAS_NV12TOARGBROW_AVX2 // 16 pixels. // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void NV12ToARGBRow_AVX2( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push ebx mov eax, [esp + 8 + 4] // Y mov esi, [esp + 8 + 8] // UV mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READNV12_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx pop esi vzeroupper ret } } #endif // HAS_NV12TOARGBROW_AVX2 #ifdef HAS_NV21TOARGBROW_AVX2 // 16 pixels. // 8 VU values upsampled to 16 UV, mixed with 16 Y producing 16 ARGB (64 bytes). __declspec(naked) void NV21ToARGBRow_AVX2( const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push ebx mov eax, [esp + 8 + 4] // Y mov esi, [esp + 8 + 8] // VU mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READNV21_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx pop esi vzeroupper ret } } #endif // HAS_NV21TOARGBROW_AVX2 #ifdef HAS_YUY2TOARGBROW_AVX2 // 16 pixels. // 8 YUY2 values with 16 Y and 8 UV producing 16 ARGB (64 bytes). __declspec(naked) void YUY2ToARGBRow_AVX2( const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push ebx mov eax, [esp + 4 + 4] // yuy2 mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUY2_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx vzeroupper ret } } #endif // HAS_YUY2TOARGBROW_AVX2 #ifdef HAS_UYVYTOARGBROW_AVX2 // 16 pixels. // 8 UYVY values with 16 Y and 8 UV producing 16 ARGB (64 bytes). __declspec(naked) void UYVYToARGBRow_AVX2( const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push ebx mov eax, [esp + 4 + 4] // uyvy mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READUYVY_AVX2 YUVTORGB_AVX2(ebx) STOREARGB_AVX2 sub ecx, 16 jg convertloop pop ebx vzeroupper ret } } #endif // HAS_UYVYTOARGBROW_AVX2 #ifdef HAS_I422TORGBAROW_AVX2 // 16 pixels // 8 UV values upsampled to 16 UV, mixed with 16 Y producing 16 RGBA (64 bytes). __declspec(naked) void I422ToRGBARow_AVX2( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // abgr mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi vpcmpeqb ymm5, ymm5, ymm5 // generate 0xffffffffffffffff for alpha convertloop: READYUV422_AVX2 YUVTORGB_AVX2(ebx) STORERGBA_AVX2 sub ecx, 16 jg convertloop pop ebx pop edi pop esi vzeroupper ret } } #endif // HAS_I422TORGBAROW_AVX2 #if defined(HAS_I422TOARGBROW_SSSE3) // TODO(fbarchard): Read that does half size on Y and treats 420 as 444. // Allows a conversion with half size scaling. // Read 8 UV from 444. #define READYUV444 \ __asm { \ __asm movq xmm0, qword ptr [esi] /* U */ \ __asm movq xmm1, qword ptr [esi + edi] /* V */ \ __asm lea esi, [esi + 8] \ __asm punpcklbw xmm0, xmm1 /* UV */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8]} // Read 4 UV from 422, upsample to 8 UV. #define READYUV422 \ __asm { \ __asm movd xmm0, [esi] /* U */ \ __asm movd xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 4] \ __asm punpcklbw xmm0, xmm1 /* UV */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8]} // Read 4 UV from 422, upsample to 8 UV. With 8 Alpha. #define READYUVA422 \ __asm { \ __asm movd xmm0, [esi] /* U */ \ __asm movd xmm1, [esi + edi] /* V */ \ __asm lea esi, [esi + 4] \ __asm punpcklbw xmm0, xmm1 /* UV */ \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm movq xmm4, qword ptr [eax] /* Y */ \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8] \ __asm movq xmm5, qword ptr [ebp] /* A */ \ __asm lea ebp, [ebp + 8]} // Read 4 UV from NV12, upsample to 8 UV. #define READNV12 \ __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ \ __asm lea esi, [esi + 8] \ __asm punpcklwd xmm0, xmm0 /* UVUV (upsample) */ \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8]} // Read 4 VU from NV21, upsample to 8 UV. #define READNV21 \ __asm { \ __asm movq xmm0, qword ptr [esi] /* UV */ \ __asm lea esi, [esi + 8] \ __asm pshufb xmm0, xmmword ptr kShuffleNV21 \ __asm movq xmm4, qword ptr [eax] \ __asm punpcklbw xmm4, xmm4 \ __asm lea eax, [eax + 8]} // Read 4 YUY2 with 8 Y and upsample 4 UV to 8 UV. #define READYUY2 \ __asm { \ __asm movdqu xmm4, [eax] /* YUY2 */ \ __asm pshufb xmm4, xmmword ptr kShuffleYUY2Y \ __asm movdqu xmm0, [eax] /* UV */ \ __asm pshufb xmm0, xmmword ptr kShuffleYUY2UV \ __asm lea eax, [eax + 16]} // Read 4 UYVY with 8 Y and upsample 4 UV to 8 UV. #define READUYVY \ __asm { \ __asm movdqu xmm4, [eax] /* UYVY */ \ __asm pshufb xmm4, xmmword ptr kShuffleUYVYY \ __asm movdqu xmm0, [eax] /* UV */ \ __asm pshufb xmm0, xmmword ptr kShuffleUYVYUV \ __asm lea eax, [eax + 16]} // Convert 8 pixels: 8 UV and 8 Y. #define YUVTORGB(YuvConstants) \ __asm { \ __asm movdqa xmm1, xmm0 \ __asm movdqa xmm2, xmm0 \ __asm movdqa xmm3, xmm0 \ __asm movdqa xmm0, xmmword ptr [YuvConstants + KUVBIASB] \ __asm pmaddubsw xmm1, xmmword ptr [YuvConstants + KUVTOB] \ __asm psubw xmm0, xmm1 \ __asm movdqa xmm1, xmmword ptr [YuvConstants + KUVBIASG] \ __asm pmaddubsw xmm2, xmmword ptr [YuvConstants + KUVTOG] \ __asm psubw xmm1, xmm2 \ __asm movdqa xmm2, xmmword ptr [YuvConstants + KUVBIASR] \ __asm pmaddubsw xmm3, xmmword ptr [YuvConstants + KUVTOR] \ __asm psubw xmm2, xmm3 \ __asm pmulhuw xmm4, xmmword ptr [YuvConstants + KYTORGB] \ __asm paddsw xmm0, xmm4 /* B += Y */ \ __asm paddsw xmm1, xmm4 /* G += Y */ \ __asm paddsw xmm2, xmm4 /* R += Y */ \ __asm psraw xmm0, 6 \ __asm psraw xmm1, 6 \ __asm psraw xmm2, 6 \ __asm packuswb xmm0, xmm0 /* B */ \ __asm packuswb xmm1, xmm1 /* G */ \ __asm packuswb xmm2, xmm2 /* R */ \ } // Store 8 ARGB values. #define STOREARGB \ __asm { \ __asm punpcklbw xmm0, xmm1 /* BG */ \ __asm punpcklbw xmm2, xmm5 /* RA */ \ __asm movdqa xmm1, xmm0 \ __asm punpcklwd xmm0, xmm2 /* BGRA first 4 pixels */ \ __asm punpckhwd xmm1, xmm2 /* BGRA next 4 pixels */ \ __asm movdqu 0[edx], xmm0 \ __asm movdqu 16[edx], xmm1 \ __asm lea edx, [edx + 32]} // Store 8 BGRA values. #define STOREBGRA \ __asm { \ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ __asm punpcklbw xmm1, xmm0 /* GB */ \ __asm punpcklbw xmm5, xmm2 /* AR */ \ __asm movdqa xmm0, xmm5 \ __asm punpcklwd xmm5, xmm1 /* BGRA first 4 pixels */ \ __asm punpckhwd xmm0, xmm1 /* BGRA next 4 pixels */ \ __asm movdqu 0[edx], xmm5 \ __asm movdqu 16[edx], xmm0 \ __asm lea edx, [edx + 32]} // Store 8 RGBA values. #define STORERGBA \ __asm { \ __asm pcmpeqb xmm5, xmm5 /* generate 0xffffffff for alpha */ \ __asm punpcklbw xmm1, xmm2 /* GR */ \ __asm punpcklbw xmm5, xmm0 /* AB */ \ __asm movdqa xmm0, xmm5 \ __asm punpcklwd xmm5, xmm1 /* RGBA first 4 pixels */ \ __asm punpckhwd xmm0, xmm1 /* RGBA next 4 pixels */ \ __asm movdqu 0[edx], xmm5 \ __asm movdqu 16[edx], xmm0 \ __asm lea edx, [edx + 32]} // Store 8 RGB24 values. #define STORERGB24 \ __asm {/* Weave into RRGB */ \ __asm punpcklbw xmm0, xmm1 /* BG */ \ __asm punpcklbw xmm2, xmm2 /* RR */ \ __asm movdqa xmm1, xmm0 \ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB24 */ \ __asm pshufb xmm0, xmm5 /* Pack first 8 and last 4 bytes. */ \ __asm pshufb xmm1, xmm6 /* Pack first 12 bytes. */ \ __asm palignr xmm1, xmm0, 12 /* last 4 bytes of xmm0 + 12 xmm1 */ \ __asm movq qword ptr 0[edx], xmm0 /* First 8 bytes */ \ __asm movdqu 8[edx], xmm1 /* Last 16 bytes */ \ __asm lea edx, [edx + 24]} // Store 8 RGB565 values. #define STORERGB565 \ __asm {/* Weave into RRGB */ \ __asm punpcklbw xmm0, xmm1 /* BG */ \ __asm punpcklbw xmm2, xmm2 /* RR */ \ __asm movdqa xmm1, xmm0 \ __asm punpcklwd xmm0, xmm2 /* BGRR first 4 pixels */ \ __asm punpckhwd xmm1, xmm2 /* BGRR next 4 pixels */ /* RRGB -> RGB565 */ \ __asm movdqa xmm3, xmm0 /* B first 4 pixels of argb */ \ __asm movdqa xmm2, xmm0 /* G */ \ __asm pslld xmm0, 8 /* R */ \ __asm psrld xmm3, 3 /* B */ \ __asm psrld xmm2, 5 /* G */ \ __asm psrad xmm0, 16 /* R */ \ __asm pand xmm3, xmm5 /* B */ \ __asm pand xmm2, xmm6 /* G */ \ __asm pand xmm0, xmm7 /* R */ \ __asm por xmm3, xmm2 /* BG */ \ __asm por xmm0, xmm3 /* BGR */ \ __asm movdqa xmm3, xmm1 /* B next 4 pixels of argb */ \ __asm movdqa xmm2, xmm1 /* G */ \ __asm pslld xmm1, 8 /* R */ \ __asm psrld xmm3, 3 /* B */ \ __asm psrld xmm2, 5 /* G */ \ __asm psrad xmm1, 16 /* R */ \ __asm pand xmm3, xmm5 /* B */ \ __asm pand xmm2, xmm6 /* G */ \ __asm pand xmm1, xmm7 /* R */ \ __asm por xmm3, xmm2 /* BG */ \ __asm por xmm1, xmm3 /* BGR */ \ __asm packssdw xmm0, xmm1 \ __asm movdqu 0[edx], xmm0 /* store 8 pixels of RGB565 */ \ __asm lea edx, [edx + 16]} // 8 pixels. // 8 UV values, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void I444ToARGBRow_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUV444 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx pop edi pop esi ret } } // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB24 (24 bytes). __declspec(naked) void I422ToRGB24Row_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgb24, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi movdqa xmm5, xmmword ptr kShuffleMaskARGBToRGB24_0 movdqa xmm6, xmmword ptr kShuffleMaskARGBToRGB24 convertloop: READYUV422 YUVTORGB(ebx) STORERGB24 sub ecx, 8 jg convertloop pop ebx pop edi pop esi ret } } // 8 pixels // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 RGB565 (16 bytes). __declspec(naked) void I422ToRGB565Row_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb565_buf, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi pcmpeqb xmm5, xmm5 // generate mask 0x0000001f psrld xmm5, 27 pcmpeqb xmm6, xmm6 // generate mask 0x000007e0 psrld xmm6, 26 pslld xmm6, 5 pcmpeqb xmm7, xmm7 // generate mask 0xfffff800 pslld xmm7, 11 convertloop: READYUV422 YUVTORGB(ebx) STORERGB565 sub ecx, 8 jg convertloop pop ebx pop edi pop esi ret } } // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void I422ToARGBRow_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUV422 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx pop edi pop esi ret } } // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y and 8 A producing 8 ARGB. __declspec(naked) void I422AlphaToARGBRow_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, const uint8_t* a_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx push ebp mov eax, [esp + 16 + 4] // Y mov esi, [esp + 16 + 8] // U mov edi, [esp + 16 + 12] // V mov ebp, [esp + 16 + 16] // A mov edx, [esp + 16 + 20] // argb mov ebx, [esp + 16 + 24] // yuvconstants mov ecx, [esp + 16 + 28] // width sub edi, esi convertloop: READYUVA422 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebp pop ebx pop edi pop esi ret } } // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void NV12ToARGBRow_SSSE3( const uint8_t* y_buf, const uint8_t* uv_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push ebx mov eax, [esp + 8 + 4] // Y mov esi, [esp + 8 + 8] // UV mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READNV12 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx pop esi ret } } // 8 pixels. // 4 UV values upsampled to 8 UV, mixed with 8 Y producing 8 ARGB (32 bytes). __declspec(naked) void NV21ToARGBRow_SSSE3( const uint8_t* y_buf, const uint8_t* vu_buf, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push ebx mov eax, [esp + 8 + 4] // Y mov esi, [esp + 8 + 8] // VU mov edx, [esp + 8 + 12] // argb mov ebx, [esp + 8 + 16] // yuvconstants mov ecx, [esp + 8 + 20] // width pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READNV21 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx pop esi ret } } // 8 pixels. // 4 YUY2 values with 8 Y and 4 UV producing 8 ARGB (32 bytes). __declspec(naked) void YUY2ToARGBRow_SSSE3( const uint8_t* src_yuy2, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push ebx mov eax, [esp + 4 + 4] // yuy2 mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READYUY2 YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx ret } } // 8 pixels. // 4 UYVY values with 8 Y and 4 UV producing 8 ARGB (32 bytes). __declspec(naked) void UYVYToARGBRow_SSSE3( const uint8_t* src_uyvy, uint8_t* dst_argb, const struct YuvConstants* yuvconstants, int width) { __asm { push ebx mov eax, [esp + 4 + 4] // uyvy mov edx, [esp + 4 + 8] // argb mov ebx, [esp + 4 + 12] // yuvconstants mov ecx, [esp + 4 + 16] // width pcmpeqb xmm5, xmm5 // generate 0xffffffff for alpha convertloop: READUYVY YUVTORGB(ebx) STOREARGB sub ecx, 8 jg convertloop pop ebx ret } } __declspec(naked) void I422ToRGBARow_SSSE3( const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* dst_rgba, const struct YuvConstants* yuvconstants, int width) { __asm { push esi push edi push ebx mov eax, [esp + 12 + 4] // Y mov esi, [esp + 12 + 8] // U mov edi, [esp + 12 + 12] // V mov edx, [esp + 12 + 16] // argb mov ebx, [esp + 12 + 20] // yuvconstants mov ecx, [esp + 12 + 24] // width sub edi, esi convertloop: READYUV422 YUVTORGB(ebx) STORERGBA sub ecx, 8 jg convertloop pop ebx pop edi pop esi ret } } #endif // HAS_I422TOARGBROW_SSSE3 #ifdef HAS_I400TOARGBROW_SSE2 // 8 pixels of Y converted to 8 pixels of ARGB (32 bytes). __declspec(naked) void I400ToARGBRow_SSE2(const uint8_t* y_buf, uint8_t* rgb_buf, int width) { __asm { mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) movd xmm2, eax pshufd xmm2, xmm2,0 mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) movd xmm3, eax pshufd xmm3, xmm3, 0 pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 mov eax, [esp + 4] // Y mov edx, [esp + 8] // rgb mov ecx, [esp + 12] // width convertloop: // Step 1: Scale Y contribution to 8 G values. G = (y - 16) * 1.164 movq xmm0, qword ptr [eax] lea eax, [eax + 8] punpcklbw xmm0, xmm0 // Y.Y pmulhuw xmm0, xmm2 psubusw xmm0, xmm3 psrlw xmm0, 6 packuswb xmm0, xmm0 // G // Step 2: Weave into ARGB punpcklbw xmm0, xmm0 // GG movdqa xmm1, xmm0 punpcklwd xmm0, xmm0 // BGRA first 4 pixels punpckhwd xmm1, xmm1 // BGRA next 4 pixels por xmm0, xmm4 por xmm1, xmm4 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_I400TOARGBROW_SSE2 #ifdef HAS_I400TOARGBROW_AVX2 // 16 pixels of Y converted to 16 pixels of ARGB (64 bytes). // note: vpunpcklbw mutates and vpackuswb unmutates. __declspec(naked) void I400ToARGBRow_AVX2(const uint8_t* y_buf, uint8_t* rgb_buf, int width) { __asm { mov eax, 0x4a354a35 // 4a35 = 18997 = round(1.164 * 64 * 256) vmovd xmm2, eax vbroadcastss ymm2, xmm2 mov eax, 0x04880488 // 0488 = 1160 = round(1.164 * 64 * 16) vmovd xmm3, eax vbroadcastss ymm3, xmm3 vpcmpeqb ymm4, ymm4, ymm4 // generate mask 0xff000000 vpslld ymm4, ymm4, 24 mov eax, [esp + 4] // Y mov edx, [esp + 8] // rgb mov ecx, [esp + 12] // width convertloop: // Step 1: Scale Y contriportbution to 16 G values. G = (y - 16) * 1.164 vmovdqu xmm0, [eax] lea eax, [eax + 16] vpermq ymm0, ymm0, 0xd8 // vpunpcklbw mutates vpunpcklbw ymm0, ymm0, ymm0 // Y.Y vpmulhuw ymm0, ymm0, ymm2 vpsubusw ymm0, ymm0, ymm3 vpsrlw ymm0, ymm0, 6 vpackuswb ymm0, ymm0, ymm0 // G. still mutated: 3120 // TODO(fbarchard): Weave alpha with unpack. // Step 2: Weave into ARGB vpunpcklbw ymm1, ymm0, ymm0 // GG - mutates vpermq ymm1, ymm1, 0xd8 vpunpcklwd ymm0, ymm1, ymm1 // GGGG first 8 pixels vpunpckhwd ymm1, ymm1, ymm1 // GGGG next 8 pixels vpor ymm0, ymm0, ymm4 vpor ymm1, ymm1, ymm4 vmovdqu [edx], ymm0 vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_I400TOARGBROW_AVX2 #ifdef HAS_MIRRORROW_SSSE3 // Shuffle table for reversing the bytes. static const uvec8 kShuffleMirror = {15u, 14u, 13u, 12u, 11u, 10u, 9u, 8u, 7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; // TODO(fbarchard): Replace lea with -16 offset. __declspec(naked) void MirrorRow_SSSE3(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width movdqa xmm5, xmmword ptr kShuffleMirror convertloop: movdqu xmm0, [eax - 16 + ecx] pshufb xmm0, xmm5 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } #endif // HAS_MIRRORROW_SSSE3 #ifdef HAS_MIRRORROW_AVX2 __declspec(naked) void MirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vbroadcastf128 ymm5, xmmword ptr kShuffleMirror convertloop: vmovdqu ymm0, [eax - 32 + ecx] vpshufb ymm0, ymm0, ymm5 vpermq ymm0, ymm0, 0x4e // swap high and low halfs vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg convertloop vzeroupper ret } } #endif // HAS_MIRRORROW_AVX2 #ifdef HAS_MIRRORUVROW_SSSE3 // Shuffle table for reversing the bytes of UV channels. static const uvec8 kShuffleMirrorUV = {14u, 12u, 10u, 8u, 6u, 4u, 2u, 0u, 15u, 13u, 11u, 9u, 7u, 5u, 3u, 1u}; __declspec(naked) void MirrorUVRow_SSSE3(const uint8_t* src, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width movdqa xmm1, xmmword ptr kShuffleMirrorUV lea eax, [eax + ecx * 2 - 16] sub edi, edx convertloop: movdqu xmm0, [eax] lea eax, [eax - 16] pshufb xmm0, xmm1 movlpd qword ptr [edx], xmm0 movhpd qword ptr [edx + edi], xmm0 lea edx, [edx + 8] sub ecx, 8 jg convertloop pop edi ret } } #endif // HAS_MIRRORUVROW_SSSE3 #ifdef HAS_ARGBMIRRORROW_SSE2 __declspec(naked) void ARGBMirrorRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width lea eax, [eax - 16 + ecx * 4] // last 4 pixels. convertloop: movdqu xmm0, [eax] lea eax, [eax - 16] pshufd xmm0, xmm0, 0x1b movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop ret } } #endif // HAS_ARGBMIRRORROW_SSE2 #ifdef HAS_ARGBMIRRORROW_AVX2 // Shuffle table for reversing the bytes. static const ulvec32 kARGBShuffleMirror_AVX2 = {7u, 6u, 5u, 4u, 3u, 2u, 1u, 0u}; __declspec(naked) void ARGBMirrorRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vmovdqu ymm5, ymmword ptr kARGBShuffleMirror_AVX2 convertloop: vpermd ymm0, ymm5, [eax - 32 + ecx * 4] // permute dword order vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBMIRRORROW_AVX2 #ifdef HAS_SPLITUVROW_SSE2 __declspec(naked) void SplitUVRow_SSE2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_uv mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] movdqa xmm2, xmm0 movdqa xmm3, xmm1 pand xmm0, xmm5 // even bytes pand xmm1, xmm5 packuswb xmm0, xmm1 psrlw xmm2, 8 // odd bytes psrlw xmm3, 8 packuswb xmm2, xmm3 movdqu [edx], xmm0 movdqu [edx + edi], xmm2 lea edx, [edx + 16] sub ecx, 16 jg convertloop pop edi ret } } #endif // HAS_SPLITUVROW_SSE2 #ifdef HAS_SPLITUVROW_AVX2 __declspec(naked) void SplitUVRow_AVX2(const uint8_t* src_uv, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_uv mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpsrlw ymm2, ymm0, 8 // odd bytes vpsrlw ymm3, ymm1, 8 vpand ymm0, ymm0, ymm5 // even bytes vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 vpackuswb ymm2, ymm2, ymm3 vpermq ymm0, ymm0, 0xd8 vpermq ymm2, ymm2, 0xd8 vmovdqu [edx], ymm0 vmovdqu [edx + edi], ymm2 lea edx, [edx + 32] sub ecx, 32 jg convertloop pop edi vzeroupper ret } } #endif // HAS_SPLITUVROW_AVX2 #ifdef HAS_MERGEUVROW_SSE2 __declspec(naked) void MergeUVRow_SSE2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_u mov edx, [esp + 4 + 8] // src_v mov edi, [esp + 4 + 12] // dst_uv mov ecx, [esp + 4 + 16] // width sub edx, eax convertloop: movdqu xmm0, [eax] // read 16 U's movdqu xmm1, [eax + edx] // and 16 V's lea eax, [eax + 16] movdqa xmm2, xmm0 punpcklbw xmm0, xmm1 // first 8 UV pairs punpckhbw xmm2, xmm1 // next 8 UV pairs movdqu [edi], xmm0 movdqu [edi + 16], xmm2 lea edi, [edi + 32] sub ecx, 16 jg convertloop pop edi ret } } #endif // HAS_MERGEUVROW_SSE2 #ifdef HAS_MERGEUVROW_AVX2 __declspec(naked) void MergeUVRow_AVX2(const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_uv, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_u mov edx, [esp + 4 + 8] // src_v mov edi, [esp + 4 + 12] // dst_uv mov ecx, [esp + 4 + 16] // width sub edx, eax convertloop: vmovdqu ymm0, [eax] // read 32 U's vmovdqu ymm1, [eax + edx] // and 32 V's lea eax, [eax + 32] vpunpcklbw ymm2, ymm0, ymm1 // low 16 UV pairs. mutated qqword 0,2 vpunpckhbw ymm0, ymm0, ymm1 // high 16 UV pairs. mutated qqword 1,3 vextractf128 [edi], ymm2, 0 // bytes 0..15 vextractf128 [edi + 16], ymm0, 0 // bytes 16..31 vextractf128 [edi + 32], ymm2, 1 // bytes 32..47 vextractf128 [edi + 48], ymm0, 1 // bytes 47..63 lea edi, [edi + 64] sub ecx, 32 jg convertloop pop edi vzeroupper ret } } #endif // HAS_MERGEUVROW_AVX2 #ifdef HAS_COPYROW_SSE2 // CopyRow copys 'width' bytes using a 16 byte load/store, 32 bytes at time. __declspec(naked) void CopyRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width test eax, 15 jne convertloopu test edx, 15 jne convertloopu convertloopa: movdqa xmm0, [eax] movdqa xmm1, [eax + 16] lea eax, [eax + 32] movdqa [edx], xmm0 movdqa [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 32 jg convertloopa ret convertloopu: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 32 jg convertloopu ret } } #endif // HAS_COPYROW_SSE2 #ifdef HAS_COPYROW_AVX // CopyRow copys 'width' bytes using a 32 byte load/store, 64 bytes at time. __declspec(naked) void CopyRow_AVX(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vmovdqu [edx], ymm0 vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 64 jg convertloop vzeroupper ret } } #endif // HAS_COPYROW_AVX // Multiple of 1. __declspec(naked) void CopyRow_ERMS(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, esi mov edx, edi mov esi, [esp + 4] // src mov edi, [esp + 8] // dst mov ecx, [esp + 12] // width rep movsb mov edi, edx mov esi, eax ret } } #ifdef HAS_ARGBCOPYALPHAROW_SSE2 // width in pixels __declspec(naked) void ARGBCopyAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff psrld xmm1, 8 convertloop: movdqu xmm2, [eax] movdqu xmm3, [eax + 16] lea eax, [eax + 32] movdqu xmm4, [edx] movdqu xmm5, [edx + 16] pand xmm2, xmm0 pand xmm3, xmm0 pand xmm4, xmm1 pand xmm5, xmm1 por xmm2, xmm4 por xmm3, xmm5 movdqu [edx], xmm2 movdqu [edx + 16], xmm3 lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_ARGBCOPYALPHAROW_SSE2 #ifdef HAS_ARGBCOPYALPHAROW_AVX2 // width in pixels __declspec(naked) void ARGBCopyAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff convertloop: vmovdqu ymm1, [eax] vmovdqu ymm2, [eax + 32] lea eax, [eax + 64] vpblendvb ymm1, ymm1, [edx], ymm0 vpblendvb ymm2, ymm2, [edx + 32], ymm0 vmovdqu [edx], ymm1 vmovdqu [edx + 32], ymm2 lea edx, [edx + 64] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_ARGBCOPYALPHAROW_AVX2 #ifdef HAS_ARGBEXTRACTALPHAROW_SSE2 // width in pixels __declspec(naked) void ARGBExtractAlphaRow_SSE2(const uint8_t* src_argb, uint8_t* dst_a, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_a mov ecx, [esp + 12] // width extractloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] psrld xmm0, 24 psrld xmm1, 24 packssdw xmm0, xmm1 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 8 jg extractloop ret } } #endif // HAS_ARGBEXTRACTALPHAROW_SSE2 #ifdef HAS_ARGBEXTRACTALPHAROW_AVX2 // width in pixels __declspec(naked) void ARGBExtractAlphaRow_AVX2(const uint8_t* src_argb, uint8_t* dst_a, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_a mov ecx, [esp + 12] // width vmovdqa ymm4, ymmword ptr kPermdARGBToY_AVX extractloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vpsrld ymm0, ymm0, 24 vpsrld ymm1, ymm1, 24 vmovdqu ymm2, [eax + 64] vmovdqu ymm3, [eax + 96] lea eax, [eax + 128] vpackssdw ymm0, ymm0, ymm1 // mutates vpsrld ymm2, ymm2, 24 vpsrld ymm3, ymm3, 24 vpackssdw ymm2, ymm2, ymm3 // mutates vpackuswb ymm0, ymm0, ymm2 // mutates vpermd ymm0, ymm4, ymm0 // unmutate vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg extractloop vzeroupper ret } } #endif // HAS_ARGBEXTRACTALPHAROW_AVX2 #ifdef HAS_ARGBCOPYYTOALPHAROW_SSE2 // width in pixels __declspec(naked) void ARGBCopyYToAlphaRow_SSE2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width pcmpeqb xmm0, xmm0 // generate mask 0xff000000 pslld xmm0, 24 pcmpeqb xmm1, xmm1 // generate mask 0x00ffffff psrld xmm1, 8 convertloop: movq xmm2, qword ptr [eax] // 8 Y's lea eax, [eax + 8] punpcklbw xmm2, xmm2 punpckhwd xmm3, xmm2 punpcklwd xmm2, xmm2 movdqu xmm4, [edx] movdqu xmm5, [edx + 16] pand xmm2, xmm0 pand xmm3, xmm0 pand xmm4, xmm1 pand xmm5, xmm1 por xmm2, xmm4 por xmm3, xmm5 movdqu [edx], xmm2 movdqu [edx + 16], xmm3 lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_ARGBCOPYYTOALPHAROW_SSE2 #ifdef HAS_ARGBCOPYYTOALPHAROW_AVX2 // width in pixels __declspec(naked) void ARGBCopyYToAlphaRow_AVX2(const uint8_t* src, uint8_t* dst, int width) { __asm { mov eax, [esp + 4] // src mov edx, [esp + 8] // dst mov ecx, [esp + 12] // width vpcmpeqb ymm0, ymm0, ymm0 vpsrld ymm0, ymm0, 8 // generate mask 0x00ffffff convertloop: vpmovzxbd ymm1, qword ptr [eax] vpmovzxbd ymm2, qword ptr [eax + 8] lea eax, [eax + 16] vpslld ymm1, ymm1, 24 vpslld ymm2, ymm2, 24 vpblendvb ymm1, ymm1, [edx], ymm0 vpblendvb ymm2, ymm2, [edx + 32], ymm0 vmovdqu [edx], ymm1 vmovdqu [edx + 32], ymm2 lea edx, [edx + 64] sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_ARGBCOPYYTOALPHAROW_AVX2 #ifdef HAS_SETROW_X86 // Write 'width' bytes using an 8 bit value repeated. // width should be multiple of 4. __declspec(naked) void SetRow_X86(uint8_t* dst, uint8_t v8, int width) { __asm { movzx eax, byte ptr [esp + 8] // v8 mov edx, 0x01010101 // Duplicate byte to all bytes. mul edx // overwrites edx with upper part of result. mov edx, edi mov edi, [esp + 4] // dst mov ecx, [esp + 12] // width shr ecx, 2 rep stosd mov edi, edx ret } } // Write 'width' bytes using an 8 bit value repeated. __declspec(naked) void SetRow_ERMS(uint8_t* dst, uint8_t v8, int width) { __asm { mov edx, edi mov edi, [esp + 4] // dst mov eax, [esp + 8] // v8 mov ecx, [esp + 12] // width rep stosb mov edi, edx ret } } // Write 'width' 32 bit values. __declspec(naked) void ARGBSetRow_X86(uint8_t* dst_argb, uint32_t v32, int width) { __asm { mov edx, edi mov edi, [esp + 4] // dst mov eax, [esp + 8] // v32 mov ecx, [esp + 12] // width rep stosd mov edi, edx ret } } #endif // HAS_SETROW_X86 #ifdef HAS_YUY2TOYROW_AVX2 __declspec(naked) void YUY2ToYRow_AVX2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_yuy2 mov edx, [esp + 8] // dst_y mov ecx, [esp + 12] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpand ymm0, ymm0, ymm5 // even bytes are Y vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg convertloop vzeroupper ret } } __declspec(naked) void YUY2ToUVRow_AVX2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_yuy2 mov esi, [esp + 8 + 8] // stride_yuy2 mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] lea eax, [eax + 64] vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi pop esi vzeroupper ret } } __declspec(naked) void YUY2ToUV422Row_AVX2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_yuy2 mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpsrlw ymm0, ymm0, 8 // YUYV -> UVUV vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi vzeroupper ret } } __declspec(naked) void UYVYToYRow_AVX2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_uyvy mov edx, [esp + 8] // dst_y mov ecx, [esp + 12] // width convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpsrlw ymm0, ymm0, 8 // odd bytes are Y vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg convertloop vzeroupper ret } } __declspec(naked) void UYVYToUVRow_AVX2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_yuy2 mov esi, [esp + 8 + 8] // stride_yuy2 mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vpavgb ymm0, ymm0, [eax + esi] vpavgb ymm1, ymm1, [eax + esi + 32] lea eax, [eax + 64] vpand ymm0, ymm0, ymm5 // UYVY -> UVUV vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi pop esi vzeroupper ret } } __declspec(naked) void UYVYToUV422Row_AVX2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_yuy2 mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff00ff vpsrlw ymm5, ymm5, 8 sub edi, edx convertloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpand ymm0, ymm0, ymm5 // UYVY -> UVUV vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 // mutates. vpermq ymm0, ymm0, 0xd8 vpand ymm1, ymm0, ymm5 // U vpsrlw ymm0, ymm0, 8 // V vpackuswb ymm1, ymm1, ymm1 // mutates. vpackuswb ymm0, ymm0, ymm0 // mutates. vpermq ymm1, ymm1, 0xd8 vpermq ymm0, ymm0, 0xd8 vextractf128 [edx], ymm1, 0 // U vextractf128 [edx + edi], ymm0, 0 // V lea edx, [edx + 16] sub ecx, 32 jg convertloop pop edi vzeroupper ret } } #endif // HAS_YUY2TOYROW_AVX2 #ifdef HAS_YUY2TOYROW_SSE2 __declspec(naked) void YUY2ToYRow_SSE2(const uint8_t* src_yuy2, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_yuy2 mov edx, [esp + 8] // dst_y mov ecx, [esp + 12] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] pand xmm0, xmm5 // even bytes are Y pand xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } __declspec(naked) void YUY2ToUVRow_SSE2(const uint8_t* src_yuy2, int stride_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_yuy2 mov esi, [esp + 8 + 8] // stride_yuy2 mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm0, xmm2 pavgb xmm1, xmm3 psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void YUY2ToUV422Row_SSE2(const uint8_t* src_yuy2, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_yuy2 mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] psrlw xmm0, 8 // YUYV -> UVUV psrlw xmm1, 8 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi ret } } __declspec(naked) void UYVYToYRow_SSE2(const uint8_t* src_uyvy, uint8_t* dst_y, int width) { __asm { mov eax, [esp + 4] // src_uyvy mov edx, [esp + 8] // dst_y mov ecx, [esp + 12] // width convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] psrlw xmm0, 8 // odd bytes are Y psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop ret } } __declspec(naked) void UYVYToUVRow_SSE2(const uint8_t* src_uyvy, int stride_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_yuy2 mov esi, [esp + 8 + 8] // stride_yuy2 mov edx, [esp + 8 + 12] // dst_u mov edi, [esp + 8 + 16] // dst_v mov ecx, [esp + 8 + 20] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm0, xmm2 pavgb xmm1, xmm3 pand xmm0, xmm5 // UYVY -> UVUV pand xmm1, xmm5 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void UYVYToUV422Row_SSE2(const uint8_t* src_uyvy, uint8_t* dst_u, uint8_t* dst_v, int width) { __asm { push edi mov eax, [esp + 4 + 4] // src_yuy2 mov edx, [esp + 4 + 8] // dst_u mov edi, [esp + 4 + 12] // dst_v mov ecx, [esp + 4 + 16] // width pcmpeqb xmm5, xmm5 // generate mask 0x00ff00ff psrlw xmm5, 8 sub edi, edx convertloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] pand xmm0, xmm5 // UYVY -> UVUV pand xmm1, xmm5 packuswb xmm0, xmm1 movdqa xmm1, xmm0 pand xmm0, xmm5 // U packuswb xmm0, xmm0 psrlw xmm1, 8 // V packuswb xmm1, xmm1 movq qword ptr [edx], xmm0 movq qword ptr [edx + edi], xmm1 lea edx, [edx + 8] sub ecx, 16 jg convertloop pop edi ret } } #endif // HAS_YUY2TOYROW_SSE2 #ifdef HAS_BLENDPLANEROW_SSSE3 // Blend 8 pixels at a time. // unsigned version of math // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 __declspec(naked) void BlendPlaneRow_SSSE3(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) { __asm { push esi push edi pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 psllw xmm5, 8 mov eax, 0x80808080 // 128 for biasing image to signed. movd xmm6, eax pshufd xmm6, xmm6, 0x00 mov eax, 0x807f807f // 32768 + 127 for unbias and round. movd xmm7, eax pshufd xmm7, xmm7, 0x00 mov eax, [esp + 8 + 4] // src0 mov edx, [esp + 8 + 8] // src1 mov esi, [esp + 8 + 12] // alpha mov edi, [esp + 8 + 16] // dst mov ecx, [esp + 8 + 20] // width sub eax, esi sub edx, esi sub edi, esi // 8 pixel loop. convertloop8: movq xmm0, qword ptr [esi] // alpha punpcklbw xmm0, xmm0 pxor xmm0, xmm5 // a, 255-a movq xmm1, qword ptr [eax + esi] // src0 movq xmm2, qword ptr [edx + esi] // src1 punpcklbw xmm1, xmm2 psubb xmm1, xmm6 // bias src0/1 - 128 pmaddubsw xmm0, xmm1 paddw xmm0, xmm7 // unbias result - 32768 and round. psrlw xmm0, 8 packuswb xmm0, xmm0 movq qword ptr [edi + esi], xmm0 lea esi, [esi + 8] sub ecx, 8 jg convertloop8 pop edi pop esi ret } } #endif // HAS_BLENDPLANEROW_SSSE3 #ifdef HAS_BLENDPLANEROW_AVX2 // Blend 32 pixels at a time. // unsigned version of math // =((A2*C2)+(B2*(255-C2))+255)/256 // signed version of math // =(((A2-128)*C2)+((B2-128)*(255-C2))+32768+127)/256 __declspec(naked) void BlendPlaneRow_AVX2(const uint8_t* src0, const uint8_t* src1, const uint8_t* alpha, uint8_t* dst, int width) { __asm { push esi push edi vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff00ff00 vpsllw ymm5, ymm5, 8 mov eax, 0x80808080 // 128 for biasing image to signed. vmovd xmm6, eax vbroadcastss ymm6, xmm6 mov eax, 0x807f807f // 32768 + 127 for unbias and round. vmovd xmm7, eax vbroadcastss ymm7, xmm7 mov eax, [esp + 8 + 4] // src0 mov edx, [esp + 8 + 8] // src1 mov esi, [esp + 8 + 12] // alpha mov edi, [esp + 8 + 16] // dst mov ecx, [esp + 8 + 20] // width sub eax, esi sub edx, esi sub edi, esi // 32 pixel loop. convertloop32: vmovdqu ymm0, [esi] // alpha vpunpckhbw ymm3, ymm0, ymm0 // 8..15, 24..31 vpunpcklbw ymm0, ymm0, ymm0 // 0..7, 16..23 vpxor ymm3, ymm3, ymm5 // a, 255-a vpxor ymm0, ymm0, ymm5 // a, 255-a vmovdqu ymm1, [eax + esi] // src0 vmovdqu ymm2, [edx + esi] // src1 vpunpckhbw ymm4, ymm1, ymm2 vpunpcklbw ymm1, ymm1, ymm2 vpsubb ymm4, ymm4, ymm6 // bias src0/1 - 128 vpsubb ymm1, ymm1, ymm6 // bias src0/1 - 128 vpmaddubsw ymm3, ymm3, ymm4 vpmaddubsw ymm0, ymm0, ymm1 vpaddw ymm3, ymm3, ymm7 // unbias result - 32768 and round. vpaddw ymm0, ymm0, ymm7 // unbias result - 32768 and round. vpsrlw ymm3, ymm3, 8 vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm3 vmovdqu [edi + esi], ymm0 lea esi, [esi + 32] sub ecx, 32 jg convertloop32 pop edi pop esi vzeroupper ret } } #endif // HAS_BLENDPLANEROW_AVX2 #ifdef HAS_ARGBBLENDROW_SSSE3 // Shuffle table for isolating alpha. static const uvec8 kShuffleAlpha = {3u, 0x80, 3u, 0x80, 7u, 0x80, 7u, 0x80, 11u, 0x80, 11u, 0x80, 15u, 0x80, 15u, 0x80}; // Blend 8 pixels at a time. __declspec(naked) void ARGBBlendRow_SSSE3(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width pcmpeqb xmm7, xmm7 // generate constant 0x0001 psrlw xmm7, 15 pcmpeqb xmm6, xmm6 // generate mask 0x00ff00ff psrlw xmm6, 8 pcmpeqb xmm5, xmm5 // generate mask 0xff00ff00 psllw xmm5, 8 pcmpeqb xmm4, xmm4 // generate mask 0xff000000 pslld xmm4, 24 sub ecx, 4 jl convertloop4b // less than 4 pixels? // 4 pixel loop. convertloop4: movdqu xmm3, [eax] // src argb lea eax, [eax + 16] movdqa xmm0, xmm3 // src argb pxor xmm3, xmm4 // ~alpha movdqu xmm2, [esi] // _r_b pshufb xmm3, xmmword ptr kShuffleAlpha // alpha pand xmm2, xmm6 // _r_b paddw xmm3, xmm7 // 256 - alpha pmullw xmm2, xmm3 // _r_b * alpha movdqu xmm1, [esi] // _a_g lea esi, [esi + 16] psrlw xmm1, 8 // _a_g por xmm0, xmm4 // set alpha to 255 pmullw xmm1, xmm3 // _a_g * alpha psrlw xmm2, 8 // _r_b convert to 8 bits again paddusb xmm0, xmm2 // + src argb pand xmm1, xmm5 // a_g_ convert to 8 bits again paddusb xmm0, xmm1 // + src argb movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jge convertloop4 convertloop4b: add ecx, 4 - 1 jl convertloop1b // 1 pixel loop. convertloop1: movd xmm3, [eax] // src argb lea eax, [eax + 4] movdqa xmm0, xmm3 // src argb pxor xmm3, xmm4 // ~alpha movd xmm2, [esi] // _r_b pshufb xmm3, xmmword ptr kShuffleAlpha // alpha pand xmm2, xmm6 // _r_b paddw xmm3, xmm7 // 256 - alpha pmullw xmm2, xmm3 // _r_b * alpha movd xmm1, [esi] // _a_g lea esi, [esi + 4] psrlw xmm1, 8 // _a_g por xmm0, xmm4 // set alpha to 255 pmullw xmm1, xmm3 // _a_g * alpha psrlw xmm2, 8 // _r_b convert to 8 bits again paddusb xmm0, xmm2 // + src argb pand xmm1, xmm5 // a_g_ convert to 8 bits again paddusb xmm0, xmm1 // + src argb movd [edx], xmm0 lea edx, [edx + 4] sub ecx, 1 jge convertloop1 convertloop1b: pop esi ret } } #endif // HAS_ARGBBLENDROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_SSSE3 // Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha0 = { 3u, 3u, 3u, 3u, 3u, 3u, 128u, 128u, 7u, 7u, 7u, 7u, 7u, 7u, 128u, 128u, }; static const uvec8 kShuffleAlpha1 = { 11u, 11u, 11u, 11u, 11u, 11u, 128u, 128u, 15u, 15u, 15u, 15u, 15u, 15u, 128u, 128u, }; __declspec(naked) void ARGBAttenuateRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width pcmpeqb xmm3, xmm3 // generate mask 0xff000000 pslld xmm3, 24 movdqa xmm4, xmmword ptr kShuffleAlpha0 movdqa xmm5, xmmword ptr kShuffleAlpha1 convertloop: movdqu xmm0, [eax] // read 4 pixels pshufb xmm0, xmm4 // isolate first 2 alphas movdqu xmm1, [eax] // read 4 pixels punpcklbw xmm1, xmm1 // first 2 pixel rgbs pmulhuw xmm0, xmm1 // rgb * a movdqu xmm1, [eax] // read 4 pixels pshufb xmm1, xmm5 // isolate next 2 alphas movdqu xmm2, [eax] // read 4 pixels punpckhbw xmm2, xmm2 // next 2 pixel rgbs pmulhuw xmm1, xmm2 // rgb * a movdqu xmm2, [eax] // mask original alpha lea eax, [eax + 16] pand xmm2, xmm3 psrlw xmm0, 8 psrlw xmm1, 8 packuswb xmm0, xmm1 por xmm0, xmm2 // copy original alpha movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop ret } } #endif // HAS_ARGBATTENUATEROW_SSSE3 #ifdef HAS_ARGBATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kShuffleAlpha_AVX2 = {6u, 7u, 6u, 7u, 6u, 7u, 128u, 128u, 14u, 15u, 14u, 15u, 14u, 15u, 128u, 128u}; __declspec(naked) void ARGBAttenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax vbroadcastf128 ymm4, xmmword ptr kShuffleAlpha_AVX2 vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xff000000 vpslld ymm5, ymm5, 24 convertloop: vmovdqu ymm6, [eax] // read 8 pixels. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpshufb ymm2, ymm0, ymm4 // low 4 alphas vpshufb ymm3, ymm1, ymm4 // high 4 alphas vpmulhuw ymm0, ymm0, ymm2 // rgb * a vpmulhuw ymm1, ymm1, ymm3 // rgb * a vpand ymm6, ymm6, ymm5 // isolate alpha vpsrlw ymm0, ymm0, 8 vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 // unmutated. vpor ymm0, ymm0, ymm6 // copy original alpha vmovdqu [eax + edx], ymm0 lea eax, [eax + 32] sub ecx, 8 jg convertloop vzeroupper ret } } #endif // HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBUNATTENUATEROW_SSE2 // Unattenuate 4 pixels at a time. __declspec(naked) void ARGBUnattenuateRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { push ebx push esi push edi mov eax, [esp + 12 + 4] // src_argb mov edx, [esp + 12 + 8] // dst_argb mov ecx, [esp + 12 + 12] // width lea ebx, fixed_invtbl8 convertloop: movdqu xmm0, [eax] // read 4 pixels movzx esi, byte ptr [eax + 3] // first alpha movzx edi, byte ptr [eax + 7] // second alpha punpcklbw xmm0, xmm0 // first 2 movd xmm2, dword ptr [ebx + esi * 4] movd xmm3, dword ptr [ebx + edi * 4] pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words. 1, a, a, a pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 pmulhuw xmm0, xmm2 // rgb * a movdqu xmm1, [eax] // read 4 pixels movzx esi, byte ptr [eax + 11] // third alpha movzx edi, byte ptr [eax + 15] // forth alpha punpckhbw xmm1, xmm1 // next 2 movd xmm2, dword ptr [ebx + esi * 4] movd xmm3, dword ptr [ebx + edi * 4] pshuflw xmm2, xmm2, 040h // first 4 inv_alpha words pshuflw xmm3, xmm3, 040h // next 4 inv_alpha words movlhps xmm2, xmm3 pmulhuw xmm1, xmm2 // rgb * a lea eax, [eax + 16] packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop pop edi pop esi pop ebx ret } } #endif // HAS_ARGBUNATTENUATEROW_SSE2 #ifdef HAS_ARGBUNATTENUATEROW_AVX2 // Shuffle table duplicating alpha. static const uvec8 kUnattenShuffleAlpha_AVX2 = { 0u, 1u, 0u, 1u, 0u, 1u, 6u, 7u, 8u, 9u, 8u, 9u, 8u, 9u, 14u, 15u}; // TODO(fbarchard): Enable USE_GATHER for future hardware if faster. // USE_GATHER is not on by default, due to being a slow instruction. #ifdef USE_GATHER __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] // src_argb0 mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width sub edx, eax vbroadcastf128 ymm4, xmmword ptr kUnattenShuffleAlpha_AVX2 convertloop: vmovdqu ymm6, [eax] // read 8 pixels. vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0xffffffff for gather. vpsrld ymm2, ymm6, 24 // alpha in low 8 bits. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpgatherdd ymm3, [ymm2 * 4 + fixed_invtbl8], ymm5 // ymm5 cleared. 1, a vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. vpshufb ymm2, ymm2, ymm4 // replicate low 4 alphas. 1, a, a, a vpshufb ymm3, ymm3, ymm4 // replicate high 4 alphas vpmulhuw ymm0, ymm0, ymm2 // rgb * ia vpmulhuw ymm1, ymm1, ymm3 // rgb * ia vpackuswb ymm0, ymm0, ymm1 // unmutated. vmovdqu [eax + edx], ymm0 lea eax, [eax + 32] sub ecx, 8 jg convertloop vzeroupper ret } } #else // USE_GATHER __declspec(naked) void ARGBUnattenuateRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { push ebx push esi push edi mov eax, [esp + 12 + 4] // src_argb mov edx, [esp + 12 + 8] // dst_argb mov ecx, [esp + 12 + 12] // width sub edx, eax lea ebx, fixed_invtbl8 vbroadcastf128 ymm5, xmmword ptr kUnattenShuffleAlpha_AVX2 convertloop: // replace VPGATHER movzx esi, byte ptr [eax + 3] // alpha0 movzx edi, byte ptr [eax + 7] // alpha1 vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a0] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a1] movzx esi, byte ptr [eax + 11] // alpha2 movzx edi, byte ptr [eax + 15] // alpha3 vpunpckldq xmm6, xmm0, xmm1 // [1,a1,1,a0] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a2] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a3] movzx esi, byte ptr [eax + 19] // alpha4 movzx edi, byte ptr [eax + 23] // alpha5 vpunpckldq xmm7, xmm2, xmm3 // [1,a3,1,a2] vmovd xmm0, dword ptr [ebx + esi * 4] // [1,a4] vmovd xmm1, dword ptr [ebx + edi * 4] // [1,a5] movzx esi, byte ptr [eax + 27] // alpha6 movzx edi, byte ptr [eax + 31] // alpha7 vpunpckldq xmm0, xmm0, xmm1 // [1,a5,1,a4] vmovd xmm2, dword ptr [ebx + esi * 4] // [1,a6] vmovd xmm3, dword ptr [ebx + edi * 4] // [1,a7] vpunpckldq xmm2, xmm2, xmm3 // [1,a7,1,a6] vpunpcklqdq xmm3, xmm6, xmm7 // [1,a3,1,a2,1,a1,1,a0] vpunpcklqdq xmm0, xmm0, xmm2 // [1,a7,1,a6,1,a5,1,a4] vinserti128 ymm3, ymm3, xmm0, 1 // [1,a7,1,a6,1,a5,1,a4,1,a3,1,a2,1,a1,1,a0] // end of VPGATHER vmovdqu ymm6, [eax] // read 8 pixels. vpunpcklbw ymm0, ymm6, ymm6 // low 4 pixels. mutated. vpunpckhbw ymm1, ymm6, ymm6 // high 4 pixels. mutated. vpunpcklwd ymm2, ymm3, ymm3 // low 4 inverted alphas. mutated. 1, 1, a, a vpunpckhwd ymm3, ymm3, ymm3 // high 4 inverted alphas. mutated. vpshufb ymm2, ymm2, ymm5 // replicate low 4 alphas. 1, a, a, a vpshufb ymm3, ymm3, ymm5 // replicate high 4 alphas vpmulhuw ymm0, ymm0, ymm2 // rgb * ia vpmulhuw ymm1, ymm1, ymm3 // rgb * ia vpackuswb ymm0, ymm0, ymm1 // unmutated. vmovdqu [eax + edx], ymm0 lea eax, [eax + 32] sub ecx, 8 jg convertloop pop edi pop esi pop ebx vzeroupper ret } } #endif // USE_GATHER #endif // HAS_ARGBATTENUATEROW_AVX2 #ifdef HAS_ARGBGRAYROW_SSSE3 // Convert 8 ARGB pixels (64 bytes) to 8 Gray ARGB pixels. __declspec(naked) void ARGBGrayRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_argb */ mov ecx, [esp + 12] /* width */ movdqa xmm4, xmmword ptr kARGBToYJ movdqa xmm5, xmmword ptr kAddYJ64 convertloop: movdqu xmm0, [eax] // G movdqu xmm1, [eax + 16] pmaddubsw xmm0, xmm4 pmaddubsw xmm1, xmm4 phaddw xmm0, xmm1 paddw xmm0, xmm5 // Add .5 for rounding. psrlw xmm0, 7 packuswb xmm0, xmm0 // 8 G bytes movdqu xmm2, [eax] // A movdqu xmm3, [eax + 16] lea eax, [eax + 32] psrld xmm2, 24 psrld xmm3, 24 packuswb xmm2, xmm3 packuswb xmm2, xmm2 // 8 A bytes movdqa xmm3, xmm0 // Weave into GG, GA, then GGGA punpcklbw xmm0, xmm0 // 8 GG words punpcklbw xmm3, xmm2 // 8 GA words movdqa xmm1, xmm0 punpcklwd xmm0, xmm3 // GGGA first 4 punpckhwd xmm1, xmm3 // GGGA next 4 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_ARGBGRAYROW_SSSE3 #ifdef HAS_ARGBSEPIAROW_SSSE3 // b = (r * 35 + g * 68 + b * 17) >> 7 // g = (r * 45 + g * 88 + b * 22) >> 7 // r = (r * 50 + g * 98 + b * 24) >> 7 // Constant for ARGB color to sepia tone. static const vec8 kARGBToSepiaB = {17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0, 17, 68, 35, 0}; static const vec8 kARGBToSepiaG = {22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0, 22, 88, 45, 0}; static const vec8 kARGBToSepiaR = {24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0, 24, 98, 50, 0}; // Convert 8 ARGB pixels (32 bytes) to 8 Sepia ARGB pixels. __declspec(naked) void ARGBSepiaRow_SSSE3(uint8_t* dst_argb, int width) { __asm { mov eax, [esp + 4] /* dst_argb */ mov ecx, [esp + 8] /* width */ movdqa xmm2, xmmword ptr kARGBToSepiaB movdqa xmm3, xmmword ptr kARGBToSepiaG movdqa xmm4, xmmword ptr kARGBToSepiaR convertloop: movdqu xmm0, [eax] // B movdqu xmm6, [eax + 16] pmaddubsw xmm0, xmm2 pmaddubsw xmm6, xmm2 phaddw xmm0, xmm6 psrlw xmm0, 7 packuswb xmm0, xmm0 // 8 B values movdqu xmm5, [eax] // G movdqu xmm1, [eax + 16] pmaddubsw xmm5, xmm3 pmaddubsw xmm1, xmm3 phaddw xmm5, xmm1 psrlw xmm5, 7 packuswb xmm5, xmm5 // 8 G values punpcklbw xmm0, xmm5 // 8 BG values movdqu xmm5, [eax] // R movdqu xmm1, [eax + 16] pmaddubsw xmm5, xmm4 pmaddubsw xmm1, xmm4 phaddw xmm5, xmm1 psrlw xmm5, 7 packuswb xmm5, xmm5 // 8 R values movdqu xmm6, [eax] // A movdqu xmm1, [eax + 16] psrld xmm6, 24 psrld xmm1, 24 packuswb xmm6, xmm1 packuswb xmm6, xmm6 // 8 A values punpcklbw xmm5, xmm6 // 8 RA values movdqa xmm1, xmm0 // Weave BG, RA together punpcklwd xmm0, xmm5 // BGRA first 4 punpckhwd xmm1, xmm5 // BGRA next 4 movdqu [eax], xmm0 movdqu [eax + 16], xmm1 lea eax, [eax + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_ARGBSEPIAROW_SSSE3 #ifdef HAS_ARGBCOLORMATRIXROW_SSSE3 // Tranform 8 ARGB pixels (32 bytes) with color matrix. // Same as Sepia except matrix is provided. // TODO(fbarchard): packuswbs only use half of the reg. To make RGBA, combine R // and B into a high and low, then G/A, unpackl/hbw and then unpckl/hwd. __declspec(naked) void ARGBColorMatrixRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const int8_t* matrix_argb, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_argb */ mov ecx, [esp + 12] /* matrix_argb */ movdqu xmm5, [ecx] pshufd xmm2, xmm5, 0x00 pshufd xmm3, xmm5, 0x55 pshufd xmm4, xmm5, 0xaa pshufd xmm5, xmm5, 0xff mov ecx, [esp + 16] /* width */ convertloop: movdqu xmm0, [eax] // B movdqu xmm7, [eax + 16] pmaddubsw xmm0, xmm2 pmaddubsw xmm7, xmm2 movdqu xmm6, [eax] // G movdqu xmm1, [eax + 16] pmaddubsw xmm6, xmm3 pmaddubsw xmm1, xmm3 phaddsw xmm0, xmm7 // B phaddsw xmm6, xmm1 // G psraw xmm0, 6 // B psraw xmm6, 6 // G packuswb xmm0, xmm0 // 8 B values packuswb xmm6, xmm6 // 8 G values punpcklbw xmm0, xmm6 // 8 BG values movdqu xmm1, [eax] // R movdqu xmm7, [eax + 16] pmaddubsw xmm1, xmm4 pmaddubsw xmm7, xmm4 phaddsw xmm1, xmm7 // R movdqu xmm6, [eax] // A movdqu xmm7, [eax + 16] pmaddubsw xmm6, xmm5 pmaddubsw xmm7, xmm5 phaddsw xmm6, xmm7 // A psraw xmm1, 6 // R psraw xmm6, 6 // A packuswb xmm1, xmm1 // 8 R values packuswb xmm6, xmm6 // 8 A values punpcklbw xmm1, xmm6 // 8 RA values movdqa xmm6, xmm0 // Weave BG, RA together punpcklwd xmm0, xmm1 // BGRA first 4 punpckhwd xmm6, xmm1 // BGRA next 4 movdqu [edx], xmm0 movdqu [edx + 16], xmm6 lea eax, [eax + 32] lea edx, [edx + 32] sub ecx, 8 jg convertloop ret } } #endif // HAS_ARGBCOLORMATRIXROW_SSSE3 #ifdef HAS_ARGBQUANTIZEROW_SSE2 // Quantize 4 ARGB pixels (16 bytes). __declspec(naked) void ARGBQuantizeRow_SSE2(uint8_t* dst_argb, int scale, int interval_size, int interval_offset, int width) { __asm { mov eax, [esp + 4] /* dst_argb */ movd xmm2, [esp + 8] /* scale */ movd xmm3, [esp + 12] /* interval_size */ movd xmm4, [esp + 16] /* interval_offset */ mov ecx, [esp + 20] /* width */ pshuflw xmm2, xmm2, 040h pshufd xmm2, xmm2, 044h pshuflw xmm3, xmm3, 040h pshufd xmm3, xmm3, 044h pshuflw xmm4, xmm4, 040h pshufd xmm4, xmm4, 044h pxor xmm5, xmm5 // constant 0 pcmpeqb xmm6, xmm6 // generate mask 0xff000000 pslld xmm6, 24 convertloop: movdqu xmm0, [eax] // read 4 pixels punpcklbw xmm0, xmm5 // first 2 pixels pmulhuw xmm0, xmm2 // pixel * scale >> 16 movdqu xmm1, [eax] // read 4 pixels punpckhbw xmm1, xmm5 // next 2 pixels pmulhuw xmm1, xmm2 pmullw xmm0, xmm3 // * interval_size movdqu xmm7, [eax] // read 4 pixels pmullw xmm1, xmm3 pand xmm7, xmm6 // mask alpha paddw xmm0, xmm4 // + interval_size / 2 paddw xmm1, xmm4 packuswb xmm0, xmm1 por xmm0, xmm7 movdqu [eax], xmm0 lea eax, [eax + 16] sub ecx, 4 jg convertloop ret } } #endif // HAS_ARGBQUANTIZEROW_SSE2 #ifdef HAS_ARGBSHADEROW_SSE2 // Shade 4 pixels at a time by specified value. __declspec(naked) void ARGBShadeRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, int width, uint32_t value) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // width movd xmm2, [esp + 16] // value punpcklbw xmm2, xmm2 punpcklqdq xmm2, xmm2 convertloop: movdqu xmm0, [eax] // read 4 pixels lea eax, [eax + 16] movdqa xmm1, xmm0 punpcklbw xmm0, xmm0 // first 2 punpckhbw xmm1, xmm1 // next 2 pmulhuw xmm0, xmm2 // argb * value pmulhuw xmm1, xmm2 // argb * value psrlw xmm0, 8 psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop ret } } #endif // HAS_ARGBSHADEROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_SSE2 // Multiply 2 rows of ARGB pixels together, 4 pixels at a time. __declspec(naked) void ARGBMultiplyRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width pxor xmm5, xmm5 // constant 0 convertloop: movdqu xmm0, [eax] // read 4 pixels from src_argb0 movdqu xmm2, [esi] // read 4 pixels from src_argb1 movdqu xmm1, xmm0 movdqu xmm3, xmm2 punpcklbw xmm0, xmm0 // first 2 punpckhbw xmm1, xmm1 // next 2 punpcklbw xmm2, xmm5 // first 2 punpckhbw xmm3, xmm5 // next 2 pmulhuw xmm0, xmm2 // src_argb0 * src_argb1 first 2 pmulhuw xmm1, xmm3 // src_argb0 * src_argb1 next 2 lea eax, [eax + 16] lea esi, [esi + 16] packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop pop esi ret } } #endif // HAS_ARGBMULTIPLYROW_SSE2 #ifdef HAS_ARGBADDROW_SSE2 // Add 2 rows of ARGB pixels together, 4 pixels at a time. // TODO(fbarchard): Port this to posix, neon and other math functions. __declspec(naked) void ARGBAddRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub ecx, 4 jl convertloop49 convertloop4: movdqu xmm0, [eax] // read 4 pixels from src_argb0 lea eax, [eax + 16] movdqu xmm1, [esi] // read 4 pixels from src_argb1 lea esi, [esi + 16] paddusb xmm0, xmm1 // src_argb0 + src_argb1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jge convertloop4 convertloop49: add ecx, 4 - 1 jl convertloop19 convertloop1: movd xmm0, [eax] // read 1 pixels from src_argb0 lea eax, [eax + 4] movd xmm1, [esi] // read 1 pixels from src_argb1 lea esi, [esi + 4] paddusb xmm0, xmm1 // src_argb0 + src_argb1 movd [edx], xmm0 lea edx, [edx + 4] sub ecx, 1 jge convertloop1 convertloop19: pop esi ret } } #endif // HAS_ARGBADDROW_SSE2 #ifdef HAS_ARGBSUBTRACTROW_SSE2 // Subtract 2 rows of ARGB pixels together, 4 pixels at a time. __declspec(naked) void ARGBSubtractRow_SSE2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: movdqu xmm0, [eax] // read 4 pixels from src_argb0 lea eax, [eax + 16] movdqu xmm1, [esi] // read 4 pixels from src_argb1 lea esi, [esi + 16] psubusb xmm0, xmm1 // src_argb0 - src_argb1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg convertloop pop esi ret } } #endif // HAS_ARGBSUBTRACTROW_SSE2 #ifdef HAS_ARGBMULTIPLYROW_AVX2 // Multiply 2 rows of ARGB pixels together, 8 pixels at a time. __declspec(naked) void ARGBMultiplyRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width vpxor ymm5, ymm5, ymm5 // constant 0 convertloop: vmovdqu ymm1, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] vmovdqu ymm3, [esi] // read 8 pixels from src_argb1 lea esi, [esi + 32] vpunpcklbw ymm0, ymm1, ymm1 // low 4 vpunpckhbw ymm1, ymm1, ymm1 // high 4 vpunpcklbw ymm2, ymm3, ymm5 // low 4 vpunpckhbw ymm3, ymm3, ymm5 // high 4 vpmulhuw ymm0, ymm0, ymm2 // src_argb0 * src_argb1 low 4 vpmulhuw ymm1, ymm1, ymm3 // src_argb0 * src_argb1 high 4 vpackuswb ymm0, ymm0, ymm1 vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 8 jg convertloop pop esi vzeroupper ret } } #endif // HAS_ARGBMULTIPLYROW_AVX2 #ifdef HAS_ARGBADDROW_AVX2 // Add 2 rows of ARGB pixels together, 8 pixels at a time. __declspec(naked) void ARGBAddRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] vpaddusb ymm0, ymm0, [esi] // add 8 pixels from src_argb1 lea esi, [esi + 32] vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 8 jg convertloop pop esi vzeroupper ret } } #endif // HAS_ARGBADDROW_AVX2 #ifdef HAS_ARGBSUBTRACTROW_AVX2 // Subtract 2 rows of ARGB pixels together, 8 pixels at a time. __declspec(naked) void ARGBSubtractRow_AVX2(const uint8_t* src_argb0, const uint8_t* src_argb1, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb0 mov esi, [esp + 4 + 8] // src_argb1 mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width convertloop: vmovdqu ymm0, [eax] // read 8 pixels from src_argb0 lea eax, [eax + 32] vpsubusb ymm0, ymm0, [esi] // src_argb0 - src_argb1 lea esi, [esi + 32] vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 8 jg convertloop pop esi vzeroupper ret } } #endif // HAS_ARGBSUBTRACTROW_AVX2 #ifdef HAS_SOBELXROW_SSE2 // SobelX as a matrix is // -1 0 1 // -2 0 2 // -1 0 1 __declspec(naked) void SobelXRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, const uint8_t* src_y2, uint8_t* dst_sobelx, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_y0 mov esi, [esp + 8 + 8] // src_y1 mov edi, [esp + 8 + 12] // src_y2 mov edx, [esp + 8 + 16] // dst_sobelx mov ecx, [esp + 8 + 20] // width sub esi, eax sub edi, eax sub edx, eax pxor xmm5, xmm5 // constant 0 convertloop: movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] movq xmm1, qword ptr [eax + 2] // read 8 pixels from src_y0[2] punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 psubw xmm0, xmm1 movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] movq xmm2, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 psubw xmm1, xmm2 movq xmm2, qword ptr [eax + edi] // read 8 pixels from src_y2[0] movq xmm3, qword ptr [eax + edi + 2] // read 8 pixels from src_y2[2] punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 psubw xmm2, xmm3 paddw xmm0, xmm2 paddw xmm0, xmm1 paddw xmm0, xmm1 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw psubw xmm1, xmm0 pmaxsw xmm0, xmm1 packuswb xmm0, xmm0 movq qword ptr [eax + edx], xmm0 lea eax, [eax + 8] sub ecx, 8 jg convertloop pop edi pop esi ret } } #endif // HAS_SOBELXROW_SSE2 #ifdef HAS_SOBELYROW_SSE2 // SobelY as a matrix is // -1 -2 -1 // 0 0 0 // 1 2 1 __declspec(naked) void SobelYRow_SSE2(const uint8_t* src_y0, const uint8_t* src_y1, uint8_t* dst_sobely, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_y0 mov esi, [esp + 4 + 8] // src_y1 mov edx, [esp + 4 + 12] // dst_sobely mov ecx, [esp + 4 + 16] // width sub esi, eax sub edx, eax pxor xmm5, xmm5 // constant 0 convertloop: movq xmm0, qword ptr [eax] // read 8 pixels from src_y0[0] movq xmm1, qword ptr [eax + esi] // read 8 pixels from src_y1[0] punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 psubw xmm0, xmm1 movq xmm1, qword ptr [eax + 1] // read 8 pixels from src_y0[1] movq xmm2, qword ptr [eax + esi + 1] // read 8 pixels from src_y1[1] punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 psubw xmm1, xmm2 movq xmm2, qword ptr [eax + 2] // read 8 pixels from src_y0[2] movq xmm3, qword ptr [eax + esi + 2] // read 8 pixels from src_y1[2] punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 psubw xmm2, xmm3 paddw xmm0, xmm2 paddw xmm0, xmm1 paddw xmm0, xmm1 pxor xmm1, xmm1 // abs = max(xmm0, -xmm0). SSSE3 could use pabsw psubw xmm1, xmm0 pmaxsw xmm0, xmm1 packuswb xmm0, xmm0 movq qword ptr [eax + edx], xmm0 lea eax, [eax + 8] sub ecx, 8 jg convertloop pop esi ret } } #endif // HAS_SOBELYROW_SSE2 #ifdef HAS_SOBELROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into ARGB. // A = 255 // R = Sobel // G = Sobel // B = Sobel __declspec(naked) void SobelRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_sobelx mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax pcmpeqb xmm5, xmm5 // alpha 255 pslld xmm5, 24 // 0xff000000 convertloop: movdqu xmm0, [eax] // read 16 pixels src_sobelx movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] paddusb xmm0, xmm1 // sobel = sobelx + sobely movdqa xmm2, xmm0 // GG punpcklbw xmm2, xmm0 // First 8 punpckhbw xmm0, xmm0 // Next 8 movdqa xmm1, xmm2 // GGGG punpcklwd xmm1, xmm2 // First 4 punpckhwd xmm2, xmm2 // Next 4 por xmm1, xmm5 // GGGA por xmm2, xmm5 movdqa xmm3, xmm0 // GGGG punpcklwd xmm3, xmm0 // Next 4 punpckhwd xmm0, xmm0 // Last 4 por xmm3, xmm5 // GGGA por xmm0, xmm5 movdqu [edx], xmm1 movdqu [edx + 16], xmm2 movdqu [edx + 32], xmm3 movdqu [edx + 48], xmm0 lea edx, [edx + 64] sub ecx, 16 jg convertloop pop esi ret } } #endif // HAS_SOBELROW_SSE2 #ifdef HAS_SOBELTOPLANEROW_SSE2 // Adds Sobel X and Sobel Y and stores Sobel into a plane. __declspec(naked) void SobelToPlaneRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_y, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_sobelx mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax convertloop: movdqu xmm0, [eax] // read 16 pixels src_sobelx movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] paddusb xmm0, xmm1 // sobel = sobelx + sobely movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg convertloop pop esi ret } } #endif // HAS_SOBELTOPLANEROW_SSE2 #ifdef HAS_SOBELXYROW_SSE2 // Mixes Sobel X, Sobel Y and Sobel into ARGB. // A = 255 // R = Sobel X // G = Sobel // B = Sobel Y __declspec(naked) void SobelXYRow_SSE2(const uint8_t* src_sobelx, const uint8_t* src_sobely, uint8_t* dst_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] // src_sobelx mov esi, [esp + 4 + 8] // src_sobely mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // width sub esi, eax pcmpeqb xmm5, xmm5 // alpha 255 convertloop: movdqu xmm0, [eax] // read 16 pixels src_sobelx movdqu xmm1, [eax + esi] // read 16 pixels src_sobely lea eax, [eax + 16] movdqa xmm2, xmm0 paddusb xmm2, xmm1 // sobel = sobelx + sobely movdqa xmm3, xmm0 // XA punpcklbw xmm3, xmm5 punpckhbw xmm0, xmm5 movdqa xmm4, xmm1 // YS punpcklbw xmm4, xmm2 punpckhbw xmm1, xmm2 movdqa xmm6, xmm4 // YSXA punpcklwd xmm6, xmm3 // First 4 punpckhwd xmm4, xmm3 // Next 4 movdqa xmm7, xmm1 // YSXA punpcklwd xmm7, xmm0 // Next 4 punpckhwd xmm1, xmm0 // Last 4 movdqu [edx], xmm6 movdqu [edx + 16], xmm4 movdqu [edx + 32], xmm7 movdqu [edx + 48], xmm1 lea edx, [edx + 64] sub ecx, 16 jg convertloop pop esi ret } } #endif // HAS_SOBELXYROW_SSE2 #ifdef HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 // Consider float CumulativeSum. // Consider calling CumulativeSum one row at time as needed. // Consider circular CumulativeSum buffer of radius * 2 + 1 height. // Convert cumulative sum for an area to an average for 1 pixel. // topleft is pointer to top left of CumulativeSum buffer for area. // botleft is pointer to bottom left of CumulativeSum buffer. // width is offset from left to right of area in CumulativeSum buffer measured // in number of ints. // area is the number of pixels in the area being averaged. // dst points to pixel to store result to. // count is number of averaged pixels to produce. // Does 4 pixels at a time. // This function requires alignment on accumulation buffer pointers. void CumulativeSumToAverageRow_SSE2(const int32_t* topleft, const int32_t* botleft, int width, int area, uint8_t* dst, int count) { __asm { mov eax, topleft // eax topleft mov esi, botleft // esi botleft mov edx, width movd xmm5, area mov edi, dst mov ecx, count cvtdq2ps xmm5, xmm5 rcpss xmm4, xmm5 // 1.0f / area pshufd xmm4, xmm4, 0 sub ecx, 4 jl l4b cmp area, 128 // 128 pixels will not overflow 15 bits. ja l4 pshufd xmm5, xmm5, 0 // area pcmpeqb xmm6, xmm6 // constant of 65536.0 - 1 = 65535.0 psrld xmm6, 16 cvtdq2ps xmm6, xmm6 addps xmm5, xmm6 // (65536.0 + area - 1) mulps xmm5, xmm4 // (65536.0 + area - 1) * 1 / area cvtps2dq xmm5, xmm5 // 0.16 fixed point packssdw xmm5, xmm5 // 16 bit shorts // 4 pixel loop small blocks. s4: // top left movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] // - top right psubd xmm0, [eax + edx * 4] psubd xmm1, [eax + edx * 4 + 16] psubd xmm2, [eax + edx * 4 + 32] psubd xmm3, [eax + edx * 4 + 48] lea eax, [eax + 64] // - bottom left psubd xmm0, [esi] psubd xmm1, [esi + 16] psubd xmm2, [esi + 32] psubd xmm3, [esi + 48] // + bottom right paddd xmm0, [esi + edx * 4] paddd xmm1, [esi + edx * 4 + 16] paddd xmm2, [esi + edx * 4 + 32] paddd xmm3, [esi + edx * 4 + 48] lea esi, [esi + 64] packssdw xmm0, xmm1 // pack 4 pixels into 2 registers packssdw xmm2, xmm3 pmulhuw xmm0, xmm5 pmulhuw xmm2, xmm5 packuswb xmm0, xmm2 movdqu [edi], xmm0 lea edi, [edi + 16] sub ecx, 4 jge s4 jmp l4b // 4 pixel loop l4: // top left movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + 32] movdqu xmm3, [eax + 48] // - top right psubd xmm0, [eax + edx * 4] psubd xmm1, [eax + edx * 4 + 16] psubd xmm2, [eax + edx * 4 + 32] psubd xmm3, [eax + edx * 4 + 48] lea eax, [eax + 64] // - bottom left psubd xmm0, [esi] psubd xmm1, [esi + 16] psubd xmm2, [esi + 32] psubd xmm3, [esi + 48] // + bottom right paddd xmm0, [esi + edx * 4] paddd xmm1, [esi + edx * 4 + 16] paddd xmm2, [esi + edx * 4 + 32] paddd xmm3, [esi + edx * 4 + 48] lea esi, [esi + 64] cvtdq2ps xmm0, xmm0 // Average = Sum * 1 / Area cvtdq2ps xmm1, xmm1 mulps xmm0, xmm4 mulps xmm1, xmm4 cvtdq2ps xmm2, xmm2 cvtdq2ps xmm3, xmm3 mulps xmm2, xmm4 mulps xmm3, xmm4 cvtps2dq xmm0, xmm0 cvtps2dq xmm1, xmm1 cvtps2dq xmm2, xmm2 cvtps2dq xmm3, xmm3 packssdw xmm0, xmm1 packssdw xmm2, xmm3 packuswb xmm0, xmm2 movdqu [edi], xmm0 lea edi, [edi + 16] sub ecx, 4 jge l4 l4b: add ecx, 4 - 1 jl l1b // 1 pixel loop l1: movdqu xmm0, [eax] psubd xmm0, [eax + edx * 4] lea eax, [eax + 16] psubd xmm0, [esi] paddd xmm0, [esi + edx * 4] lea esi, [esi + 16] cvtdq2ps xmm0, xmm0 mulps xmm0, xmm4 cvtps2dq xmm0, xmm0 packssdw xmm0, xmm0 packuswb xmm0, xmm0 movd dword ptr [edi], xmm0 lea edi, [edi + 4] sub ecx, 1 jge l1 l1b: } } #endif // HAS_CUMULATIVESUMTOAVERAGEROW_SSE2 #ifdef HAS_COMPUTECUMULATIVESUMROW_SSE2 // Creates a table of cumulative sums where each value is a sum of all values // above and to the left of the value. void ComputeCumulativeSumRow_SSE2(const uint8_t* row, int32_t* cumsum, const int32_t* previous_cumsum, int width) { __asm { mov eax, row mov edx, cumsum mov esi, previous_cumsum mov ecx, width pxor xmm0, xmm0 pxor xmm1, xmm1 sub ecx, 4 jl l4b test edx, 15 jne l4b // 4 pixel loop l4: movdqu xmm2, [eax] // 4 argb pixels 16 bytes. lea eax, [eax + 16] movdqa xmm4, xmm2 punpcklbw xmm2, xmm1 movdqa xmm3, xmm2 punpcklwd xmm2, xmm1 punpckhwd xmm3, xmm1 punpckhbw xmm4, xmm1 movdqa xmm5, xmm4 punpcklwd xmm4, xmm1 punpckhwd xmm5, xmm1 paddd xmm0, xmm2 movdqu xmm2, [esi] // previous row above. paddd xmm2, xmm0 paddd xmm0, xmm3 movdqu xmm3, [esi + 16] paddd xmm3, xmm0 paddd xmm0, xmm4 movdqu xmm4, [esi + 32] paddd xmm4, xmm0 paddd xmm0, xmm5 movdqu xmm5, [esi + 48] lea esi, [esi + 64] paddd xmm5, xmm0 movdqu [edx], xmm2 movdqu [edx + 16], xmm3 movdqu [edx + 32], xmm4 movdqu [edx + 48], xmm5 lea edx, [edx + 64] sub ecx, 4 jge l4 l4b: add ecx, 4 - 1 jl l1b // 1 pixel loop l1: movd xmm2, dword ptr [eax] // 1 argb pixel 4 bytes. lea eax, [eax + 4] punpcklbw xmm2, xmm1 punpcklwd xmm2, xmm1 paddd xmm0, xmm2 movdqu xmm2, [esi] lea esi, [esi + 16] paddd xmm2, xmm0 movdqu [edx], xmm2 lea edx, [edx + 16] sub ecx, 1 jge l1 l1b: } } #endif // HAS_COMPUTECUMULATIVESUMROW_SSE2 #ifdef HAS_ARGBAFFINEROW_SSE2 // Copy ARGB pixels from source image with slope to a row of destination. __declspec(naked) LIBYUV_API void ARGBAffineRow_SSE2(const uint8_t* src_argb, int src_argb_stride, uint8_t* dst_argb, const float* uv_dudv, int width) { __asm { push esi push edi mov eax, [esp + 12] // src_argb mov esi, [esp + 16] // stride mov edx, [esp + 20] // dst_argb mov ecx, [esp + 24] // pointer to uv_dudv movq xmm2, qword ptr [ecx] // uv movq xmm7, qword ptr [ecx + 8] // dudv mov ecx, [esp + 28] // width shl esi, 16 // 4, stride add esi, 4 movd xmm5, esi sub ecx, 4 jl l4b // setup for 4 pixel loop pshufd xmm7, xmm7, 0x44 // dup dudv pshufd xmm5, xmm5, 0 // dup 4, stride movdqa xmm0, xmm2 // x0, y0, x1, y1 addps xmm0, xmm7 movlhps xmm2, xmm0 movdqa xmm4, xmm7 addps xmm4, xmm4 // dudv *= 2 movdqa xmm3, xmm2 // x2, y2, x3, y3 addps xmm3, xmm4 addps xmm4, xmm4 // dudv *= 4 // 4 pixel loop l4: cvttps2dq xmm0, xmm2 // x, y float to int first 2 cvttps2dq xmm1, xmm3 // x, y float to int next 2 packssdw xmm0, xmm1 // x, y as 8 shorts pmaddwd xmm0, xmm5 // offsets = x * 4 + y * stride. movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd edi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd xmm1, [eax + esi] // read pixel 0 movd xmm6, [eax + edi] // read pixel 1 punpckldq xmm1, xmm6 // combine pixel 0 and 1 addps xmm2, xmm4 // x, y += dx, dy first 2 movq qword ptr [edx], xmm1 movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // shift right movd edi, xmm0 movd xmm6, [eax + esi] // read pixel 2 movd xmm0, [eax + edi] // read pixel 3 punpckldq xmm6, xmm0 // combine pixel 2 and 3 addps xmm3, xmm4 // x, y += dx, dy next 2 movq qword ptr 8[edx], xmm6 lea edx, [edx + 16] sub ecx, 4 jge l4 l4b: add ecx, 4 - 1 jl l1b // 1 pixel loop l1: cvttps2dq xmm0, xmm2 // x, y float to int packssdw xmm0, xmm0 // x, y as shorts pmaddwd xmm0, xmm5 // offset = x * 4 + y * stride addps xmm2, xmm7 // x, y += dx, dy movd esi, xmm0 movd xmm0, [eax + esi] // copy a pixel movd [edx], xmm0 lea edx, [edx + 4] sub ecx, 1 jge l1 l1b: pop edi pop esi ret } } #endif // HAS_ARGBAFFINEROW_SSE2 #ifdef HAS_INTERPOLATEROW_AVX2 // Bilinear filter 32x2 -> 32x1 __declspec(naked) void InterpolateRow_AVX2(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { __asm { push esi push edi mov edi, [esp + 8 + 4] // dst_ptr mov esi, [esp + 8 + 8] // src_ptr mov edx, [esp + 8 + 12] // src_stride mov ecx, [esp + 8 + 16] // dst_width mov eax, [esp + 8 + 20] // source_y_fraction (0..255) // Dispatch to specialized filters if applicable. cmp eax, 0 je xloop100 // 0 / 256. Blend 100 / 0. sub edi, esi cmp eax, 128 je xloop50 // 128 /256 is 0.50. Blend 50 / 50. vmovd xmm0, eax // high fraction 0..255 neg eax add eax, 256 vmovd xmm5, eax // low fraction 256..1 vpunpcklbw xmm5, xmm5, xmm0 vpunpcklwd xmm5, xmm5, xmm5 vbroadcastss ymm5, xmm5 mov eax, 0x80808080 // 128b for bias and rounding. vmovd xmm4, eax vbroadcastss ymm4, xmm4 xloop: vmovdqu ymm0, [esi] vmovdqu ymm2, [esi + edx] vpunpckhbw ymm1, ymm0, ymm2 // mutates vpunpcklbw ymm0, ymm0, ymm2 vpsubb ymm1, ymm1, ymm4 // bias to signed image vpsubb ymm0, ymm0, ymm4 vpmaddubsw ymm1, ymm5, ymm1 vpmaddubsw ymm0, ymm5, ymm0 vpaddw ymm1, ymm1, ymm4 // unbias and round vpaddw ymm0, ymm0, ymm4 vpsrlw ymm1, ymm1, 8 vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm1 // unmutates vmovdqu [esi + edi], ymm0 lea esi, [esi + 32] sub ecx, 32 jg xloop jmp xloop99 // Blend 50 / 50. xloop50: vmovdqu ymm0, [esi] vpavgb ymm0, ymm0, [esi + edx] vmovdqu [esi + edi], ymm0 lea esi, [esi + 32] sub ecx, 32 jg xloop50 jmp xloop99 // Blend 100 / 0 - Copy row unchanged. xloop100: rep movsb xloop99: pop edi pop esi vzeroupper ret } } #endif // HAS_INTERPOLATEROW_AVX2 // Bilinear filter 16x2 -> 16x1 // TODO(fbarchard): Consider allowing 256 using memcpy. __declspec(naked) void InterpolateRow_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { __asm { push esi push edi mov edi, [esp + 8 + 4] // dst_ptr mov esi, [esp + 8 + 8] // src_ptr mov edx, [esp + 8 + 12] // src_stride mov ecx, [esp + 8 + 16] // dst_width mov eax, [esp + 8 + 20] // source_y_fraction (0..255) sub edi, esi // Dispatch to specialized filters if applicable. cmp eax, 0 je xloop100 // 0 /256. Blend 100 / 0. cmp eax, 128 je xloop50 // 128 / 256 is 0.50. Blend 50 / 50. movd xmm0, eax // high fraction 0..255 neg eax add eax, 256 movd xmm5, eax // low fraction 255..1 punpcklbw xmm5, xmm0 punpcklwd xmm5, xmm5 pshufd xmm5, xmm5, 0 mov eax, 0x80808080 // 128 for biasing image to signed. movd xmm4, eax pshufd xmm4, xmm4, 0x00 xloop: movdqu xmm0, [esi] movdqu xmm2, [esi + edx] movdqu xmm1, xmm0 punpcklbw xmm0, xmm2 punpckhbw xmm1, xmm2 psubb xmm0, xmm4 // bias image by -128 psubb xmm1, xmm4 movdqa xmm2, xmm5 movdqa xmm3, xmm5 pmaddubsw xmm2, xmm0 pmaddubsw xmm3, xmm1 paddw xmm2, xmm4 paddw xmm3, xmm4 psrlw xmm2, 8 psrlw xmm3, 8 packuswb xmm2, xmm3 movdqu [esi + edi], xmm2 lea esi, [esi + 16] sub ecx, 16 jg xloop jmp xloop99 // Blend 50 / 50. xloop50: movdqu xmm0, [esi] movdqu xmm1, [esi + edx] pavgb xmm0, xmm1 movdqu [esi + edi], xmm0 lea esi, [esi + 16] sub ecx, 16 jg xloop50 jmp xloop99 // Blend 100 / 0 - Copy row unchanged. xloop100: movdqu xmm0, [esi] movdqu [esi + edi], xmm0 lea esi, [esi + 16] sub ecx, 16 jg xloop100 xloop99: pop edi pop esi ret } } // For BGRAToARGB, ABGRToARGB, RGBAToARGB, and ARGBToRGBA. __declspec(naked) void ARGBShuffleRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // shuffler movdqu xmm5, [ecx] mov ecx, [esp + 16] // width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] pshufb xmm0, xmm5 pshufb xmm1, xmm5 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 8 jg wloop ret } } #ifdef HAS_ARGBSHUFFLEROW_AVX2 __declspec(naked) void ARGBShuffleRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const uint8_t* shuffler, int width) { __asm { mov eax, [esp + 4] // src_argb mov edx, [esp + 8] // dst_argb mov ecx, [esp + 12] // shuffler vbroadcastf128 ymm5, [ecx] // same shuffle in high as low. mov ecx, [esp + 16] // width wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpshufb ymm0, ymm0, ymm5 vpshufb ymm1, ymm1, ymm5 vmovdqu [edx], ymm0 vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 16 jg wloop vzeroupper ret } } #endif // HAS_ARGBSHUFFLEROW_AVX2 // YUY2 - Macro-pixel = 2 image pixels // Y0U0Y1V0....Y2U2Y3V2...Y4U4Y5V4.... // UYVY - Macro-pixel = 2 image pixels // U0Y0V0Y1 __declspec(naked) void I422ToYUY2Row_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_y mov esi, [esp + 8 + 8] // src_u mov edx, [esp + 8 + 12] // src_v mov edi, [esp + 8 + 16] // dst_frame mov ecx, [esp + 8 + 20] // width sub edx, esi convertloop: movq xmm2, qword ptr [esi] // U movq xmm3, qword ptr [esi + edx] // V lea esi, [esi + 8] punpcklbw xmm2, xmm3 // UV movdqu xmm0, [eax] // Y lea eax, [eax + 16] movdqa xmm1, xmm0 punpcklbw xmm0, xmm2 // YUYV punpckhbw xmm1, xmm2 movdqu [edi], xmm0 movdqu [edi + 16], xmm1 lea edi, [edi + 32] sub ecx, 16 jg convertloop pop edi pop esi ret } } __declspec(naked) void I422ToUYVYRow_SSE2(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_frame, int width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_y mov esi, [esp + 8 + 8] // src_u mov edx, [esp + 8 + 12] // src_v mov edi, [esp + 8 + 16] // dst_frame mov ecx, [esp + 8 + 20] // width sub edx, esi convertloop: movq xmm2, qword ptr [esi] // U movq xmm3, qword ptr [esi + edx] // V lea esi, [esi + 8] punpcklbw xmm2, xmm3 // UV movdqu xmm0, [eax] // Y movdqa xmm1, xmm2 lea eax, [eax + 16] punpcklbw xmm1, xmm0 // UYVY punpckhbw xmm2, xmm0 movdqu [edi], xmm1 movdqu [edi + 16], xmm2 lea edi, [edi + 32] sub ecx, 16 jg convertloop pop edi pop esi ret } } #ifdef HAS_ARGBPOLYNOMIALROW_SSE2 __declspec(naked) void ARGBPolynomialRow_SSE2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { __asm { push esi mov eax, [esp + 4 + 4] /* src_argb */ mov edx, [esp + 4 + 8] /* dst_argb */ mov esi, [esp + 4 + 12] /* poly */ mov ecx, [esp + 4 + 16] /* width */ pxor xmm3, xmm3 // 0 constant for zero extending bytes to ints. // 2 pixel loop. convertloop: // pmovzxbd xmm0, dword ptr [eax] // BGRA pixel // pmovzxbd xmm4, dword ptr [eax + 4] // BGRA pixel movq xmm0, qword ptr [eax] // BGRABGRA lea eax, [eax + 8] punpcklbw xmm0, xmm3 movdqa xmm4, xmm0 punpcklwd xmm0, xmm3 // pixel 0 punpckhwd xmm4, xmm3 // pixel 1 cvtdq2ps xmm0, xmm0 // 4 floats cvtdq2ps xmm4, xmm4 movdqa xmm1, xmm0 // X movdqa xmm5, xmm4 mulps xmm0, [esi + 16] // C1 * X mulps xmm4, [esi + 16] addps xmm0, [esi] // result = C0 + C1 * X addps xmm4, [esi] movdqa xmm2, xmm1 movdqa xmm6, xmm5 mulps xmm2, xmm1 // X * X mulps xmm6, xmm5 mulps xmm1, xmm2 // X * X * X mulps xmm5, xmm6 mulps xmm2, [esi + 32] // C2 * X * X mulps xmm6, [esi + 32] mulps xmm1, [esi + 48] // C3 * X * X * X mulps xmm5, [esi + 48] addps xmm0, xmm2 // result += C2 * X * X addps xmm4, xmm6 addps xmm0, xmm1 // result += C3 * X * X * X addps xmm4, xmm5 cvttps2dq xmm0, xmm0 cvttps2dq xmm4, xmm4 packuswb xmm0, xmm4 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 2 jg convertloop pop esi ret } } #endif // HAS_ARGBPOLYNOMIALROW_SSE2 #ifdef HAS_ARGBPOLYNOMIALROW_AVX2 __declspec(naked) void ARGBPolynomialRow_AVX2(const uint8_t* src_argb, uint8_t* dst_argb, const float* poly, int width) { __asm { mov eax, [esp + 4] /* src_argb */ mov edx, [esp + 8] /* dst_argb */ mov ecx, [esp + 12] /* poly */ vbroadcastf128 ymm4, [ecx] // C0 vbroadcastf128 ymm5, [ecx + 16] // C1 vbroadcastf128 ymm6, [ecx + 32] // C2 vbroadcastf128 ymm7, [ecx + 48] // C3 mov ecx, [esp + 16] /* width */ // 2 pixel loop. convertloop: vpmovzxbd ymm0, qword ptr [eax] // 2 BGRA pixels lea eax, [eax + 8] vcvtdq2ps ymm0, ymm0 // X 8 floats vmulps ymm2, ymm0, ymm0 // X * X vmulps ymm3, ymm0, ymm7 // C3 * X vfmadd132ps ymm0, ymm4, ymm5 // result = C0 + C1 * X vfmadd231ps ymm0, ymm2, ymm6 // result += C2 * X * X vfmadd231ps ymm0, ymm2, ymm3 // result += C3 * X * X * X vcvttps2dq ymm0, ymm0 vpackusdw ymm0, ymm0, ymm0 // b0g0r0a0_00000000_b0g0r0a0_00000000 vpermq ymm0, ymm0, 0xd8 // b0g0r0a0_b0g0r0a0_00000000_00000000 vpackuswb xmm0, xmm0, xmm0 // bgrabgra_00000000_00000000_00000000 vmovq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 2 jg convertloop vzeroupper ret } } #endif // HAS_ARGBPOLYNOMIALROW_AVX2 #ifdef HAS_HALFFLOATROW_SSE2 static float kExpBias = 1.9259299444e-34f; __declspec(naked) void HalfFloatRow_SSE2(const uint16_t* src, uint16_t* dst, float scale, int width) { __asm { mov eax, [esp + 4] /* src */ mov edx, [esp + 8] /* dst */ movd xmm4, dword ptr [esp + 12] /* scale */ mov ecx, [esp + 16] /* width */ mulss xmm4, kExpBias pshufd xmm4, xmm4, 0 pxor xmm5, xmm5 sub edx, eax // 8 pixel loop. convertloop: movdqu xmm2, xmmword ptr [eax] // 8 shorts add eax, 16 movdqa xmm3, xmm2 punpcklwd xmm2, xmm5 cvtdq2ps xmm2, xmm2 // convert 8 ints to floats punpckhwd xmm3, xmm5 cvtdq2ps xmm3, xmm3 mulps xmm2, xmm4 mulps xmm3, xmm4 psrld xmm2, 13 psrld xmm3, 13 packssdw xmm2, xmm3 movdqu [eax + edx - 16], xmm2 sub ecx, 8 jg convertloop ret } } #endif // HAS_HALFFLOATROW_SSE2 #ifdef HAS_HALFFLOATROW_AVX2 __declspec(naked) void HalfFloatRow_AVX2(const uint16_t* src, uint16_t* dst, float scale, int width) { __asm { mov eax, [esp + 4] /* src */ mov edx, [esp + 8] /* dst */ movd xmm4, dword ptr [esp + 12] /* scale */ mov ecx, [esp + 16] /* width */ vmulss xmm4, xmm4, kExpBias vbroadcastss ymm4, xmm4 vpxor ymm5, ymm5, ymm5 sub edx, eax // 16 pixel loop. convertloop: vmovdqu ymm2, [eax] // 16 shorts add eax, 32 vpunpckhwd ymm3, ymm2, ymm5 // convert 16 shorts to 16 ints vpunpcklwd ymm2, ymm2, ymm5 vcvtdq2ps ymm3, ymm3 // convert 16 ints to floats vcvtdq2ps ymm2, ymm2 vmulps ymm3, ymm3, ymm4 // scale to adjust exponent for 5 bit range. vmulps ymm2, ymm2, ymm4 vpsrld ymm3, ymm3, 13 // float convert to 8 half floats truncate vpsrld ymm2, ymm2, 13 vpackssdw ymm2, ymm2, ymm3 vmovdqu [eax + edx - 32], ymm2 sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_HALFFLOATROW_AVX2 #ifdef HAS_HALFFLOATROW_F16C __declspec(naked) void HalfFloatRow_F16C(const uint16_t* src, uint16_t* dst, float scale, int width) { __asm { mov eax, [esp + 4] /* src */ mov edx, [esp + 8] /* dst */ vbroadcastss ymm4, [esp + 12] /* scale */ mov ecx, [esp + 16] /* width */ sub edx, eax // 16 pixel loop. convertloop: vpmovzxwd ymm2, xmmword ptr [eax] // 8 shorts -> 8 ints vpmovzxwd ymm3, xmmword ptr [eax + 16] // 8 more shorts add eax, 32 vcvtdq2ps ymm2, ymm2 // convert 8 ints to floats vcvtdq2ps ymm3, ymm3 vmulps ymm2, ymm2, ymm4 // scale to normalized range 0 to 1 vmulps ymm3, ymm3, ymm4 vcvtps2ph xmm2, ymm2, 3 // float convert to 8 half floats truncate vcvtps2ph xmm3, ymm3, 3 vmovdqu [eax + edx + 32], xmm2 vmovdqu [eax + edx + 32 + 16], xmm3 sub ecx, 16 jg convertloop vzeroupper ret } } #endif // HAS_HALFFLOATROW_F16C #ifdef HAS_ARGBCOLORTABLEROW_X86 // Tranform ARGB pixels with color table. __declspec(naked) void ARGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] /* dst_argb */ mov esi, [esp + 4 + 8] /* table_argb */ mov ecx, [esp + 4 + 12] /* width */ // 1 pixel loop. convertloop: movzx edx, byte ptr [eax] lea eax, [eax + 4] movzx edx, byte ptr [esi + edx * 4] mov byte ptr [eax - 4], dl movzx edx, byte ptr [eax - 4 + 1] movzx edx, byte ptr [esi + edx * 4 + 1] mov byte ptr [eax - 4 + 1], dl movzx edx, byte ptr [eax - 4 + 2] movzx edx, byte ptr [esi + edx * 4 + 2] mov byte ptr [eax - 4 + 2], dl movzx edx, byte ptr [eax - 4 + 3] movzx edx, byte ptr [esi + edx * 4 + 3] mov byte ptr [eax - 4 + 3], dl dec ecx jg convertloop pop esi ret } } #endif // HAS_ARGBCOLORTABLEROW_X86 #ifdef HAS_RGBCOLORTABLEROW_X86 // Tranform RGB pixels with color table. __declspec(naked) void RGBColorTableRow_X86(uint8_t* dst_argb, const uint8_t* table_argb, int width) { __asm { push esi mov eax, [esp + 4 + 4] /* dst_argb */ mov esi, [esp + 4 + 8] /* table_argb */ mov ecx, [esp + 4 + 12] /* width */ // 1 pixel loop. convertloop: movzx edx, byte ptr [eax] lea eax, [eax + 4] movzx edx, byte ptr [esi + edx * 4] mov byte ptr [eax - 4], dl movzx edx, byte ptr [eax - 4 + 1] movzx edx, byte ptr [esi + edx * 4 + 1] mov byte ptr [eax - 4 + 1], dl movzx edx, byte ptr [eax - 4 + 2] movzx edx, byte ptr [esi + edx * 4 + 2] mov byte ptr [eax - 4 + 2], dl dec ecx jg convertloop pop esi ret } } #endif // HAS_RGBCOLORTABLEROW_X86 #ifdef HAS_ARGBLUMACOLORTABLEROW_SSSE3 // Tranform RGB pixels with luma table. __declspec(naked) void ARGBLumaColorTableRow_SSSE3(const uint8_t* src_argb, uint8_t* dst_argb, int width, const uint8_t* luma, uint32_t lumacoeff) { __asm { push esi push edi mov eax, [esp + 8 + 4] /* src_argb */ mov edi, [esp + 8 + 8] /* dst_argb */ mov ecx, [esp + 8 + 12] /* width */ movd xmm2, dword ptr [esp + 8 + 16] // luma table movd xmm3, dword ptr [esp + 8 + 20] // lumacoeff pshufd xmm2, xmm2, 0 pshufd xmm3, xmm3, 0 pcmpeqb xmm4, xmm4 // generate mask 0xff00ff00 psllw xmm4, 8 pxor xmm5, xmm5 // 4 pixel loop. convertloop: movdqu xmm0, xmmword ptr [eax] // generate luma ptr pmaddubsw xmm0, xmm3 phaddw xmm0, xmm0 pand xmm0, xmm4 // mask out low bits punpcklwd xmm0, xmm5 paddd xmm0, xmm2 // add table base movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 movzx edx, byte ptr [eax] movzx edx, byte ptr [esi + edx] mov byte ptr [edi], dl movzx edx, byte ptr [eax + 1] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 1], dl movzx edx, byte ptr [eax + 2] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 2], dl movzx edx, byte ptr [eax + 3] // copy alpha. mov byte ptr [edi + 3], dl movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 movzx edx, byte ptr [eax + 4] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 4], dl movzx edx, byte ptr [eax + 5] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 5], dl movzx edx, byte ptr [eax + 6] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 6], dl movzx edx, byte ptr [eax + 7] // copy alpha. mov byte ptr [edi + 7], dl movd esi, xmm0 pshufd xmm0, xmm0, 0x39 // 00111001 to rotate right 32 movzx edx, byte ptr [eax + 8] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 8], dl movzx edx, byte ptr [eax + 9] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 9], dl movzx edx, byte ptr [eax + 10] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 10], dl movzx edx, byte ptr [eax + 11] // copy alpha. mov byte ptr [edi + 11], dl movd esi, xmm0 movzx edx, byte ptr [eax + 12] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 12], dl movzx edx, byte ptr [eax + 13] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 13], dl movzx edx, byte ptr [eax + 14] movzx edx, byte ptr [esi + edx] mov byte ptr [edi + 14], dl movzx edx, byte ptr [eax + 15] // copy alpha. mov byte ptr [edi + 15], dl lea eax, [eax + 16] lea edi, [edi + 16] sub ecx, 4 jg convertloop pop edi pop esi ret } } #endif // HAS_ARGBLUMACOLORTABLEROW_SSSE3 #endif // defined(_M_X64) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // !defined(LIBYUV_DISABLE_X86) && (defined(_M_IX86) || defined(_M_X64)) libvpx-1.8.2/third_party/libyuv/source/scale.cc000066400000000000000000001617571357355204000216140ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/scale.h" #include #include #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyPlane #include "libyuv/row.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif static __inline int Abs(int v) { return v >= 0 ? v : -v; } #define SUBSAMPLE(v, a, s) (v < 0) ? (-((-v + a) >> s)) : ((v + a) >> s) // Scale plane, 1/2 // This is an optimized version for scaling down a plane to 1/2 of // its original size. static void ScalePlaneDown2(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = filtering == kFilterNone ? ScaleRowDown2_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_C : ScaleRowDown2Box_C); int row_stride = src_stride << 1; (void)src_width; (void)src_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. src_stride = 0; } #if defined(HAS_SCALEROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_NEON : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_NEON : ScaleRowDown2Box_Any_NEON); if (IS_ALIGNED(dst_width, 16)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_NEON : (filtering == kFilterLinear ? ScaleRowDown2Linear_NEON : ScaleRowDown2Box_NEON); } } #endif #if defined(HAS_SCALEROWDOWN2_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_SSSE3 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_SSSE3 : ScaleRowDown2Box_Any_SSSE3); if (IS_ALIGNED(dst_width, 16)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_SSSE3 : (filtering == kFilterLinear ? ScaleRowDown2Linear_SSSE3 : ScaleRowDown2Box_SSSE3); } } #endif #if defined(HAS_SCALEROWDOWN2_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_AVX2 : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_AVX2 : ScaleRowDown2Box_Any_AVX2); if (IS_ALIGNED(dst_width, 32)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_AVX2 : (filtering == kFilterLinear ? ScaleRowDown2Linear_AVX2 : ScaleRowDown2Box_AVX2); } } #endif #if defined(HAS_SCALEROWDOWN2_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_Any_MSA : (filtering == kFilterLinear ? ScaleRowDown2Linear_Any_MSA : ScaleRowDown2Box_Any_MSA); if (IS_ALIGNED(dst_width, 32)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_MSA : (filtering == kFilterLinear ? ScaleRowDown2Linear_MSA : ScaleRowDown2Box_MSA); } } #endif if (filtering == kFilterLinear) { src_stride = 0; } // TODO(fbarchard): Loop through source height to allow odd height. for (y = 0; y < dst_height; ++y) { ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += row_stride; dst_ptr += dst_stride; } } static void ScalePlaneDown2_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown2)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) = filtering == kFilterNone ? ScaleRowDown2_16_C : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_C : ScaleRowDown2Box_16_C); int row_stride = src_stride << 1; (void)src_width; (void)src_height; if (!filtering) { src_ptr += src_stride; // Point to odd rows. src_stride = 0; } #if defined(HAS_SCALEROWDOWN2_16_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 16)) { ScaleRowDown2 = filtering ? ScaleRowDown2Box_16_NEON : ScaleRowDown2_16_NEON; } #endif #if defined(HAS_SCALEROWDOWN2_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 16)) { ScaleRowDown2 = filtering == kFilterNone ? ScaleRowDown2_16_SSE2 : (filtering == kFilterLinear ? ScaleRowDown2Linear_16_SSE2 : ScaleRowDown2Box_16_SSE2); } #endif if (filtering == kFilterLinear) { src_stride = 0; } // TODO(fbarchard): Loop through source height to allow odd height. for (y = 0; y < dst_height; ++y) { ScaleRowDown2(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += row_stride; dst_ptr += dst_stride; } } // Scale plane, 1/4 // This is an optimized version for scaling down a plane to 1/4 of // its original size. static void ScalePlaneDown4(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown4)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_C : ScaleRowDown4_C; int row_stride = src_stride << 2; (void)src_width; (void)src_height; if (!filtering) { src_ptr += src_stride * 2; // Point to row 2. src_stride = 0; } #if defined(HAS_SCALEROWDOWN4_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_Any_NEON : ScaleRowDown4_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_NEON : ScaleRowDown4_NEON; } } #endif #if defined(HAS_SCALEROWDOWN4_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_Any_SSSE3 : ScaleRowDown4_Any_SSSE3; if (IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_SSSE3 : ScaleRowDown4_SSSE3; } } #endif #if defined(HAS_SCALEROWDOWN4_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_Any_AVX2 : ScaleRowDown4_Any_AVX2; if (IS_ALIGNED(dst_width, 16)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_AVX2 : ScaleRowDown4_AVX2; } } #endif #if defined(HAS_SCALEROWDOWN4_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_Any_MSA : ScaleRowDown4_Any_MSA; if (IS_ALIGNED(dst_width, 16)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_MSA : ScaleRowDown4_MSA; } } #endif if (filtering == kFilterLinear) { src_stride = 0; } for (y = 0; y < dst_height; ++y) { ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += row_stride; dst_ptr += dst_stride; } } static void ScalePlaneDown4_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown4)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) = filtering ? ScaleRowDown4Box_16_C : ScaleRowDown4_16_C; int row_stride = src_stride << 2; (void)src_width; (void)src_height; if (!filtering) { src_ptr += src_stride * 2; // Point to row 2. src_stride = 0; } #if defined(HAS_SCALEROWDOWN4_16_NEON) if (TestCpuFlag(kCpuHasNEON) && IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_NEON : ScaleRowDown4_16_NEON; } #endif #if defined(HAS_SCALEROWDOWN4_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleRowDown4 = filtering ? ScaleRowDown4Box_16_SSE2 : ScaleRowDown4_16_SSE2; } #endif if (filtering == kFilterLinear) { src_stride = 0; } for (y = 0; y < dst_height; ++y) { ScaleRowDown4(src_ptr, src_stride, dst_ptr, dst_width); src_ptr += row_stride; dst_ptr += dst_stride; } } // Scale plane down, 3/4 static void ScalePlaneDown34(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown34_0)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_C; ScaleRowDown34_1 = ScaleRowDown34_C; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_C; ScaleRowDown34_1 = ScaleRowDown34_1_Box_C; } #if defined(HAS_SCALEROWDOWN34_NEON) if (TestCpuFlag(kCpuHasNEON)) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_Any_NEON; ScaleRowDown34_1 = ScaleRowDown34_Any_NEON; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_NEON; } if (dst_width % 24 == 0) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_NEON; ScaleRowDown34_1 = ScaleRowDown34_NEON; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Box_NEON; } } } #endif #if defined(HAS_SCALEROWDOWN34_MSA) if (TestCpuFlag(kCpuHasMSA)) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_Any_MSA; ScaleRowDown34_1 = ScaleRowDown34_Any_MSA; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_MSA; ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_MSA; } if (dst_width % 48 == 0) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_MSA; ScaleRowDown34_1 = ScaleRowDown34_MSA; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_MSA; ScaleRowDown34_1 = ScaleRowDown34_1_Box_MSA; } } } #endif #if defined(HAS_SCALEROWDOWN34_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_Any_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_Any_SSSE3; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_Any_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_1_Box_Any_SSSE3; } if (dst_width % 24 == 0) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_SSSE3; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_1_Box_SSSE3; } } } #endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); } else if ((dst_height % 3) == 1) { ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); } } static void ScalePlaneDown34_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown34_0)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown34_1)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_16_C; ScaleRowDown34_1 = ScaleRowDown34_16_C; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_C; ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_C; } #if defined(HAS_SCALEROWDOWN34_16_NEON) if (TestCpuFlag(kCpuHasNEON) && (dst_width % 24 == 0)) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_16_NEON; ScaleRowDown34_1 = ScaleRowDown34_16_NEON; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_NEON; ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_NEON; } } #endif #if defined(HAS_SCALEROWDOWN34_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { if (!filtering) { ScaleRowDown34_0 = ScaleRowDown34_16_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_16_SSSE3; } else { ScaleRowDown34_0 = ScaleRowDown34_0_Box_16_SSSE3; ScaleRowDown34_1 = ScaleRowDown34_1_Box_16_SSSE3; } } #endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_1(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_0(src_ptr + src_stride, -filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { ScaleRowDown34_0(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride; dst_ptr += dst_stride; ScaleRowDown34_1(src_ptr, 0, dst_ptr, dst_width); } else if ((dst_height % 3) == 1) { ScaleRowDown34_0(src_ptr, 0, dst_ptr, dst_width); } } // Scale plane, 3/8 // This is an optimized version for scaling down a plane to 3/8 // of its original size. // // Uses box filter arranges like this // aaabbbcc -> abc // aaabbbcc def // aaabbbcc ghi // dddeeeff // dddeeeff // dddeeeff // ggghhhii // ggghhhii // Boxes are 3x3, 2x3, 3x2 and 2x2 static void ScalePlaneDown38(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown38_3)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; assert(dst_width % 3 == 0); (void)src_width; (void)src_height; if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_C; ScaleRowDown38_2 = ScaleRowDown38_C; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_C; ScaleRowDown38_2 = ScaleRowDown38_2_Box_C; } #if defined(HAS_SCALEROWDOWN38_NEON) if (TestCpuFlag(kCpuHasNEON)) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_Any_NEON; ScaleRowDown38_2 = ScaleRowDown38_Any_NEON; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_NEON; } if (dst_width % 12 == 0) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_NEON; ScaleRowDown38_2 = ScaleRowDown38_NEON; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Box_NEON; } } } #endif #if defined(HAS_SCALEROWDOWN38_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_Any_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_Any_SSSE3; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_SSSE3; } if (dst_width % 12 == 0 && !filtering) { ScaleRowDown38_3 = ScaleRowDown38_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_SSSE3; } if (dst_width % 6 == 0 && filtering) { ScaleRowDown38_3 = ScaleRowDown38_3_Box_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Box_SSSE3; } } #endif #if defined(HAS_SCALEROWDOWN38_MSA) if (TestCpuFlag(kCpuHasMSA)) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_Any_MSA; ScaleRowDown38_2 = ScaleRowDown38_Any_MSA; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_Any_MSA; ScaleRowDown38_2 = ScaleRowDown38_2_Box_Any_MSA; } if (dst_width % 12 == 0) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_MSA; ScaleRowDown38_2 = ScaleRowDown38_MSA; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_MSA; ScaleRowDown38_2 = ScaleRowDown38_2_Box_MSA; } } } #endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); } else if ((dst_height % 3) == 1) { ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); } } static void ScalePlaneDown38_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { int y; void (*ScaleRowDown38_3)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); void (*ScaleRowDown38_2)(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width); const int filter_stride = (filtering == kFilterLinear) ? 0 : src_stride; (void)src_width; (void)src_height; assert(dst_width % 3 == 0); if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_16_C; ScaleRowDown38_2 = ScaleRowDown38_16_C; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_C; ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_C; } #if defined(HAS_SCALEROWDOWN38_16_NEON) if (TestCpuFlag(kCpuHasNEON) && (dst_width % 12 == 0)) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_16_NEON; ScaleRowDown38_2 = ScaleRowDown38_16_NEON; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_NEON; ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_NEON; } } #endif #if defined(HAS_SCALEROWDOWN38_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && (dst_width % 24 == 0)) { if (!filtering) { ScaleRowDown38_3 = ScaleRowDown38_16_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_16_SSSE3; } else { ScaleRowDown38_3 = ScaleRowDown38_3_Box_16_SSSE3; ScaleRowDown38_2 = ScaleRowDown38_2_Box_16_SSSE3; } } #endif for (y = 0; y < dst_height - 2; y += 3) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_2(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 2; dst_ptr += dst_stride; } // Remainder 1 or 2 rows with last row vertically unfiltered if ((dst_height % 3) == 2) { ScaleRowDown38_3(src_ptr, filter_stride, dst_ptr, dst_width); src_ptr += src_stride * 3; dst_ptr += dst_stride; ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); } else if ((dst_height % 3) == 1) { ScaleRowDown38_3(src_ptr, 0, dst_ptr, dst_width); } } #define MIN1(x) ((x) < 1 ? 1 : (x)) static __inline uint32_t SumPixels(int iboxwidth, const uint16_t* src_ptr) { uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { sum += src_ptr[x]; } return sum; } static __inline uint32_t SumPixels_16(int iboxwidth, const uint32_t* src_ptr) { uint32_t sum = 0u; int x; assert(iboxwidth > 0); for (x = 0; x < iboxwidth; ++x) { sum += src_ptr[x]; } return sum; } static void ScaleAddCols2_C(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; int boxwidth; scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); for (i = 0; i < dst_width; ++i) { int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); *dst_ptr++ = SumPixels(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> 16; } } static void ScaleAddCols2_16_C(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) { int i; int scaletbl[2]; int minboxwidth = dx >> 16; int boxwidth; scaletbl[0] = 65536 / (MIN1(minboxwidth) * boxheight); scaletbl[1] = 65536 / (MIN1(minboxwidth + 1) * boxheight); for (i = 0; i < dst_width; ++i) { int ix = x >> 16; x += dx; boxwidth = MIN1((x >> 16) - ix); *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + ix) * scaletbl[boxwidth - minboxwidth] >> 16; } } static void ScaleAddCols0_C(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) { int scaleval = 65536 / boxheight; int i; (void)dx; src_ptr += (x >> 16); for (i = 0; i < dst_width; ++i) { *dst_ptr++ = src_ptr[i] * scaleval >> 16; } } static void ScaleAddCols1_C(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; x >>= 16; for (i = 0; i < dst_width; ++i) { *dst_ptr++ = SumPixels(boxwidth, src_ptr + x) * scaleval >> 16; x += boxwidth; } } static void ScaleAddCols1_16_C(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) { int boxwidth = MIN1(dx >> 16); int scaleval = 65536 / (boxwidth * boxheight); int i; for (i = 0; i < dst_width; ++i) { *dst_ptr++ = SumPixels_16(boxwidth, src_ptr + x) * scaleval >> 16; x += boxwidth; } } // Scale plane down to any dimensions, with interpolation. // (boxfilter). // // Same method as SimpleScale, which is fixed point, outputting // one pixel of destination using fixed point (16.16) to step // through source, sampling a box of pixel with simple // averaging. static void ScalePlaneBox(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; const int max_y = (src_height << 16); ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, &dx, &dy); src_width = Abs(src_width); { // Allocate a row buffer of uint16_t. align_buffer_64(row16, src_width * 2); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint16_t* src_ptr, uint8_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_C : ((dx != 0x10000) ? ScaleAddCols1_C : ScaleAddCols0_C); void (*ScaleAddRow)(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) = ScaleAddRow_C; #if defined(HAS_SCALEADDROW_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleAddRow = ScaleAddRow_Any_SSE2; if (IS_ALIGNED(src_width, 16)) { ScaleAddRow = ScaleAddRow_SSE2; } } #endif #if defined(HAS_SCALEADDROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { ScaleAddRow = ScaleAddRow_Any_AVX2; if (IS_ALIGNED(src_width, 32)) { ScaleAddRow = ScaleAddRow_AVX2; } } #endif #if defined(HAS_SCALEADDROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleAddRow = ScaleAddRow_Any_NEON; if (IS_ALIGNED(src_width, 16)) { ScaleAddRow = ScaleAddRow_NEON; } } #endif #if defined(HAS_SCALEADDROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleAddRow = ScaleAddRow_Any_MSA; if (IS_ALIGNED(src_width, 16)) { ScaleAddRow = ScaleAddRow_MSA; } } #endif for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; const uint8_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; } boxheight = MIN1((y >> 16) - iy); memset(row16, 0, src_width * 2); for (k = 0; k < boxheight; ++k) { ScaleAddRow(src, (uint16_t*)(row16), src_width); src += src_stride; } ScaleAddCols(dst_width, boxheight, x, dx, (uint16_t*)(row16), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row16); } } static void ScalePlaneBox_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int j, k; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; const int max_y = (src_height << 16); ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterBox, &x, &y, &dx, &dy); src_width = Abs(src_width); { // Allocate a row buffer of uint32_t. align_buffer_64(row32, src_width * 4); void (*ScaleAddCols)(int dst_width, int boxheight, int x, int dx, const uint32_t* src_ptr, uint16_t* dst_ptr) = (dx & 0xffff) ? ScaleAddCols2_16_C : ScaleAddCols1_16_C; void (*ScaleAddRow)(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) = ScaleAddRow_16_C; #if defined(HAS_SCALEADDROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(src_width, 16)) { ScaleAddRow = ScaleAddRow_16_SSE2; } #endif for (j = 0; j < dst_height; ++j) { int boxheight; int iy = y >> 16; const uint16_t* src = src_ptr + iy * src_stride; y += dy; if (y > max_y) { y = max_y; } boxheight = MIN1((y >> 16) - iy); memset(row32, 0, src_width * 4); for (k = 0; k < boxheight; ++k) { ScaleAddRow(src, (uint32_t*)(row32), src_width); src += src_stride; } ScaleAddCols(dst_width, boxheight, x, dx, (uint32_t*)(row32), dst_ptr); dst_ptr += dst_stride; } free_aligned_buffer_64(row32); } } // Scale plane down with bilinear interpolation. void ScalePlaneBilinearDown(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width); const int max_y = (src_height - 1) << 16; int j; void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_C : ScaleFilterCols_C; void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(src_width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(src_width, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif #if defined(HAS_SCALEFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_SSSE3; } #endif #if defined(HAS_SCALEFILTERCOLS_NEON) if (TestCpuFlag(kCpuHasNEON) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleFilterCols = ScaleFilterCols_NEON; } } #endif #if defined(HAS_SCALEFILTERCOLS_MSA) if (TestCpuFlag(kCpuHasMSA) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_MSA; if (IS_ALIGNED(dst_width, 16)) { ScaleFilterCols = ScaleFilterCols_MSA; } } #endif if (y > max_y) { y = max_y; } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; const uint8_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { int yf = (y >> 8) & 255; InterpolateRow(row, src, src_stride, src_width, yf); ScaleFilterCols(dst_ptr, row, dst_width, x, dx); } dst_ptr += dst_stride; y += dy; if (y > max_y) { y = max_y; } } free_aligned_buffer_64(row); } void ScalePlaneBilinearDown_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row buffer. align_buffer_64(row, src_width * 2); const int max_y = (src_height - 1) << 16; int j; void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleFilterCols64_16_C : ScaleFilterCols_16_C; void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { InterpolateRow = InterpolateRow_Any_16_SSE2; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } } #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_16_SSSE3; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_16_AVX2; if (IS_ALIGNED(src_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_16_NEON; if (IS_ALIGNED(src_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } } #endif #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_16_SSSE3; } #endif if (y > max_y) { y = max_y; } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; const uint16_t* src = src_ptr + yi * src_stride; if (filtering == kFilterLinear) { ScaleFilterCols(dst_ptr, src, dst_width, x, dx); } else { int yf = (y >> 8) & 255; InterpolateRow((uint16_t*)row, src, src_stride, src_width, yf); ScaleFilterCols(dst_ptr, (uint16_t*)row, dst_width, x, dx); } dst_ptr += dst_stride; y += dy; if (y > max_y) { y = max_y; } } free_aligned_buffer_64(row); } // Scale up down with bilinear interpolation. void ScalePlaneBilinearUp(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; void (*InterpolateRow)(uint8_t * dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; void (*ScaleFilterCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_C : ScaleCols_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(dst_width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_C; } #if defined(HAS_SCALEFILTERCOLS_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_SSSE3; } #endif #if defined(HAS_SCALEFILTERCOLS_NEON) if (filtering && TestCpuFlag(kCpuHasNEON) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleFilterCols = ScaleFilterCols_NEON; } } #endif #if defined(HAS_SCALEFILTERCOLS_MSA) if (filtering && TestCpuFlag(kCpuHasMSA) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_Any_MSA; if (IS_ALIGNED(dst_width, 16)) { ScaleFilterCols = ScaleFilterCols_MSA; } } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleFilterCols = ScaleColsUp2_C; #if defined(HAS_SCALECOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleFilterCols = ScaleColsUp2_SSE2; } #endif } if (y > max_y) { y = max_y; } { int yi = y >> 16; const uint8_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 2); uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { src += src_stride; } ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); src += src_stride; for (j = 0; j < dst_height; ++j) { yi = y >> 16; if (yi != lasty) { if (y > max_y) { y = max_y; yi = y >> 16; src = src_ptr + yi * src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); rowptr += rowstride; rowstride = -rowstride; lasty = yi; src += src_stride; } } if (filtering == kFilterLinear) { InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); } else { int yf = (y >> 8) & 255; InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); } dst_ptr += dst_stride; y += dy; } free_aligned_buffer_64(row); } } void ScalePlaneBilinearUp_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr, enum FilterMode filtering) { int j; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; const int max_y = (src_height - 1) << 16; void (*InterpolateRow)(uint16_t * dst_ptr, const uint16_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; void (*ScaleFilterCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = filtering ? ScaleFilterCols_16_C : ScaleCols_16_C; ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); src_width = Abs(src_width); #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { InterpolateRow = InterpolateRow_Any_16_SSE2; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } } #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_16_SSSE3; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_16_AVX2; if (IS_ALIGNED(dst_width, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_16_NEON; if (IS_ALIGNED(dst_width, 16)) { InterpolateRow = InterpolateRow_16_NEON; } } #endif if (filtering && src_width >= 32768) { ScaleFilterCols = ScaleFilterCols64_16_C; } #if defined(HAS_SCALEFILTERCOLS_16_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleFilterCols = ScaleFilterCols_16_SSSE3; } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleFilterCols = ScaleColsUp2_16_C; #if defined(HAS_SCALECOLS_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleFilterCols = ScaleColsUp2_16_SSE2; } #endif } if (y > max_y) { y = max_y; } { int yi = y >> 16; const uint16_t* src = src_ptr + yi * src_stride; // Allocate 2 row buffers. const int kRowSize = (dst_width + 31) & ~31; align_buffer_64(row, kRowSize * 4); uint16_t* rowptr = (uint16_t*)row; int rowstride = kRowSize; int lasty = yi; ScaleFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { src += src_stride; } ScaleFilterCols(rowptr + rowstride, src, dst_width, x, dx); src += src_stride; for (j = 0; j < dst_height; ++j) { yi = y >> 16; if (yi != lasty) { if (y > max_y) { y = max_y; yi = y >> 16; src = src_ptr + yi * src_stride; } if (yi != lasty) { ScaleFilterCols(rowptr, src, dst_width, x, dx); rowptr += rowstride; rowstride = -rowstride; lasty = yi; src += src_stride; } } if (filtering == kFilterLinear) { InterpolateRow(dst_ptr, rowptr, 0, dst_width, 0); } else { int yf = (y >> 8) & 255; InterpolateRow(dst_ptr, rowptr, rowstride, dst_width, yf); } dst_ptr += dst_stride; y += dy; } free_aligned_buffer_64(row); } } // Scale Plane to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and // the lower 16 bits are the fixed decimal part. static void ScalePlaneSimple(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_ptr, uint8_t* dst_ptr) { int i; void (*ScaleCols)(uint8_t * dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) = ScaleCols_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, &dx, &dy); src_width = Abs(src_width); if (src_width * 2 == dst_width && x < 0x8000) { ScaleCols = ScaleColsUp2_C; #if defined(HAS_SCALECOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleCols = ScaleColsUp2_SSE2; } #endif } for (i = 0; i < dst_height; ++i) { ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } } static void ScalePlaneSimple_16(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_ptr, uint16_t* dst_ptr) { int i; void (*ScaleCols)(uint16_t * dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) = ScaleCols_16_C; // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; ScaleSlope(src_width, src_height, dst_width, dst_height, kFilterNone, &x, &y, &dx, &dy); src_width = Abs(src_width); if (src_width * 2 == dst_width && x < 0x8000) { ScaleCols = ScaleColsUp2_16_C; #if defined(HAS_SCALECOLS_16_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleCols = ScaleColsUp2_16_SSE2; } #endif } for (i = 0; i < dst_height; ++i) { ScaleCols(dst_ptr, src_ptr + (y >> 16) * src_stride, dst_width, x, dx); dst_ptr += dst_stride; y += dy; } } // Scale a plane. // This function dispatches to a specialized scaler based on scale factor. LIBYUV_API void ScalePlane(const uint8_t* src, int src_stride, int src_width, int src_height, uint8_t* dst, int dst_stride, int dst_width, int dst_height, enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; src = src + (src_height - 1) * src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. // For example, all the 1/2 scalings will use ScalePlaneDown2() if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane(src, src_stride, dst, dst_stride, dst_width, dst_height); return; } if (dst_width == src_width && filtering != kFilterBox) { int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled horizontally. ScalePlaneVertical(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, 0, dy, 1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { // optimized, 3/4 ScalePlaneDown34(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { ScalePlaneBox(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); return; } if (filtering && dst_height > src_height) { ScalePlaneBilinearUp(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (filtering) { ScalePlaneBilinearDown(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } ScalePlaneSimple(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); } LIBYUV_API void ScalePlane_16(const uint16_t* src, int src_stride, int src_width, int src_height, uint16_t* dst, int dst_stride, int dst_width, int dst_height, enum FilterMode filtering) { // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); // Negative height means invert the image. if (src_height < 0) { src_height = -src_height; src = src + (src_height - 1) * src_stride; src_stride = -src_stride; } // Use specialized scales to improve performance for common resolutions. // For example, all the 1/2 scalings will use ScalePlaneDown2() if (dst_width == src_width && dst_height == src_height) { // Straight copy. CopyPlane_16(src, src_stride, dst, dst_stride, dst_width, dst_height); return; } if (dst_width == src_width && filtering != kFilterBox) { int dy = FixedDiv(src_height, dst_height); // Arbitrary scale vertically, but unscaled vertically. ScalePlaneVertical_16(src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, 0, 0, dy, 1, filtering); return; } if (dst_width <= Abs(src_width) && dst_height <= src_height) { // Scale down. if (4 * dst_width == 3 * src_width && 4 * dst_height == 3 * src_height) { // optimized, 3/4 ScalePlaneDown34_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (2 * dst_width == src_width && 2 * dst_height == src_height) { // optimized, 1/2 ScalePlaneDown2_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } // 3/8 rounded up for odd sized chroma height. if (8 * dst_width == 3 * src_width && 8 * dst_height == 3 * src_height) { // optimized, 3/8 ScalePlaneDown38_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (4 * dst_width == src_width && 4 * dst_height == src_height && (filtering == kFilterBox || filtering == kFilterNone)) { // optimized, 1/4 ScalePlaneDown4_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } } if (filtering == kFilterBox && dst_height * 2 < src_height) { ScalePlaneBox_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); return; } if (filtering && dst_height > src_height) { ScalePlaneBilinearUp_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } if (filtering) { ScalePlaneBilinearDown_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst, filtering); return; } ScalePlaneSimple_16(src_width, src_height, dst_width, dst_height, src_stride, dst_stride, src, dst); } // Scale an I420 image. // This function in turn calls a scaling function for each plane. LIBYUV_API int I420Scale(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, int src_width, int src_height, uint8_t* dst_y, int dst_stride_y, uint8_t* dst_u, int dst_stride_u, uint8_t* dst_v, int dst_stride_v, int dst_width, int dst_height, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } ScalePlane(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); ScalePlane(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, filtering); ScalePlane(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, filtering); return 0; } LIBYUV_API int I420Scale_16(const uint16_t* src_y, int src_stride_y, const uint16_t* src_u, int src_stride_u, const uint16_t* src_v, int src_stride_v, int src_width, int src_height, uint16_t* dst_y, int dst_stride_y, uint16_t* dst_u, int dst_stride_u, uint16_t* dst_v, int dst_stride_v, int dst_width, int dst_height, enum FilterMode filtering) { int src_halfwidth = SUBSAMPLE(src_width, 1, 1); int src_halfheight = SUBSAMPLE(src_height, 1, 1); int dst_halfwidth = SUBSAMPLE(dst_width, 1, 1); int dst_halfheight = SUBSAMPLE(dst_height, 1, 1); if (!src_y || !src_u || !src_v || src_width == 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_y || !dst_u || !dst_v || dst_width <= 0 || dst_height <= 0) { return -1; } ScalePlane_16(src_y, src_stride_y, src_width, src_height, dst_y, dst_stride_y, dst_width, dst_height, filtering); ScalePlane_16(src_u, src_stride_u, src_halfwidth, src_halfheight, dst_u, dst_stride_u, dst_halfwidth, dst_halfheight, filtering); ScalePlane_16(src_v, src_stride_v, src_halfwidth, src_halfheight, dst_v, dst_stride_v, dst_halfwidth, dst_halfheight, filtering); return 0; } // Deprecated api LIBYUV_API int Scale(const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, int src_stride_y, int src_stride_u, int src_stride_v, int src_width, int src_height, uint8_t* dst_y, uint8_t* dst_u, uint8_t* dst_v, int dst_stride_y, int dst_stride_u, int dst_stride_v, int dst_width, int dst_height, LIBYUV_BOOL interpolate) { return I420Scale(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_width, src_height, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v, dst_width, dst_height, interpolate ? kFilterBox : kFilterNone); } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_any.cc000066400000000000000000000316641357355204000224540ustar00rootroot00000000000000/* * Copyright 2015 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/scale.h" #include "libyuv/scale_row.h" #include "libyuv/basic_types.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // Definition for ScaleFilterCols, ScaleARGBCols and ScaleARGBFilterCols #define CANY(NAMEANY, TERP_SIMD, TERP_C, BPP, MASK) \ void NAMEANY(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, \ int dx) { \ int r = dst_width & MASK; \ int n = dst_width & ~MASK; \ if (n > 0) { \ TERP_SIMD(dst_ptr, src_ptr, n, x, dx); \ } \ TERP_C(dst_ptr + n * BPP, src_ptr, r, x + n * dx, dx); \ } #ifdef HAS_SCALEFILTERCOLS_NEON CANY(ScaleFilterCols_Any_NEON, ScaleFilterCols_NEON, ScaleFilterCols_C, 1, 7) #endif #ifdef HAS_SCALEFILTERCOLS_MSA CANY(ScaleFilterCols_Any_MSA, ScaleFilterCols_MSA, ScaleFilterCols_C, 1, 15) #endif #ifdef HAS_SCALEARGBCOLS_NEON CANY(ScaleARGBCols_Any_NEON, ScaleARGBCols_NEON, ScaleARGBCols_C, 4, 7) #endif #ifdef HAS_SCALEARGBCOLS_MSA CANY(ScaleARGBCols_Any_MSA, ScaleARGBCols_MSA, ScaleARGBCols_C, 4, 3) #endif #ifdef HAS_SCALEARGBFILTERCOLS_NEON CANY(ScaleARGBFilterCols_Any_NEON, ScaleARGBFilterCols_NEON, ScaleARGBFilterCols_C, 4, 3) #endif #ifdef HAS_SCALEARGBFILTERCOLS_MSA CANY(ScaleARGBFilterCols_Any_MSA, ScaleARGBFilterCols_MSA, ScaleARGBFilterCols_C, 4, 7) #endif #undef CANY // Fixed scale down. // Mask may be non-power of 2, so use MOD #define SDANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ int dst_width) { \ int r = (int)((unsigned int)dst_width % (MASK + 1)); /* NOLINT */ \ int n = dst_width - r; \ if (n > 0) { \ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ } \ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ dst_ptr + n * BPP, r); \ } // Fixed scale down for odd source width. Used by I420Blend subsampling. // Since dst_width is (width + 1) / 2, this function scales one less pixel // and copies the last pixel. #define SDODD(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, FACTOR, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, \ int dst_width) { \ int r = (int)((unsigned int)(dst_width - 1) % (MASK + 1)); /* NOLINT */ \ int n = (dst_width - 1) - r; \ if (n > 0) { \ SCALEROWDOWN_SIMD(src_ptr, src_stride, dst_ptr, n); \ } \ SCALEROWDOWN_C(src_ptr + (n * FACTOR) * BPP, src_stride, \ dst_ptr + n * BPP, r + 1); \ } #ifdef HAS_SCALEROWDOWN2_SSSE3 SDANY(ScaleRowDown2_Any_SSSE3, ScaleRowDown2_SSSE3, ScaleRowDown2_C, 2, 1, 15) SDANY(ScaleRowDown2Linear_Any_SSSE3, ScaleRowDown2Linear_SSSE3, ScaleRowDown2Linear_C, 2, 1, 15) SDANY(ScaleRowDown2Box_Any_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_C, 2, 1, 15) SDODD(ScaleRowDown2Box_Odd_SSSE3, ScaleRowDown2Box_SSSE3, ScaleRowDown2Box_Odd_C, 2, 1, 15) #endif #ifdef HAS_SCALEROWDOWN2_AVX2 SDANY(ScaleRowDown2_Any_AVX2, ScaleRowDown2_AVX2, ScaleRowDown2_C, 2, 1, 31) SDANY(ScaleRowDown2Linear_Any_AVX2, ScaleRowDown2Linear_AVX2, ScaleRowDown2Linear_C, 2, 1, 31) SDANY(ScaleRowDown2Box_Any_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_C, 2, 1, 31) SDODD(ScaleRowDown2Box_Odd_AVX2, ScaleRowDown2Box_AVX2, ScaleRowDown2Box_Odd_C, 2, 1, 31) #endif #ifdef HAS_SCALEROWDOWN2_NEON SDANY(ScaleRowDown2_Any_NEON, ScaleRowDown2_NEON, ScaleRowDown2_C, 2, 1, 15) SDANY(ScaleRowDown2Linear_Any_NEON, ScaleRowDown2Linear_NEON, ScaleRowDown2Linear_C, 2, 1, 15) SDANY(ScaleRowDown2Box_Any_NEON, ScaleRowDown2Box_NEON, ScaleRowDown2Box_C, 2, 1, 15) SDODD(ScaleRowDown2Box_Odd_NEON, ScaleRowDown2Box_NEON, ScaleRowDown2Box_Odd_C, 2, 1, 15) #endif #ifdef HAS_SCALEROWDOWN2_MSA SDANY(ScaleRowDown2_Any_MSA, ScaleRowDown2_MSA, ScaleRowDown2_C, 2, 1, 31) SDANY(ScaleRowDown2Linear_Any_MSA, ScaleRowDown2Linear_MSA, ScaleRowDown2Linear_C, 2, 1, 31) SDANY(ScaleRowDown2Box_Any_MSA, ScaleRowDown2Box_MSA, ScaleRowDown2Box_C, 2, 1, 31) #endif #ifdef HAS_SCALEROWDOWN4_SSSE3 SDANY(ScaleRowDown4_Any_SSSE3, ScaleRowDown4_SSSE3, ScaleRowDown4_C, 4, 1, 7) SDANY(ScaleRowDown4Box_Any_SSSE3, ScaleRowDown4Box_SSSE3, ScaleRowDown4Box_C, 4, 1, 7) #endif #ifdef HAS_SCALEROWDOWN4_AVX2 SDANY(ScaleRowDown4_Any_AVX2, ScaleRowDown4_AVX2, ScaleRowDown4_C, 4, 1, 15) SDANY(ScaleRowDown4Box_Any_AVX2, ScaleRowDown4Box_AVX2, ScaleRowDown4Box_C, 4, 1, 15) #endif #ifdef HAS_SCALEROWDOWN4_NEON SDANY(ScaleRowDown4_Any_NEON, ScaleRowDown4_NEON, ScaleRowDown4_C, 4, 1, 7) SDANY(ScaleRowDown4Box_Any_NEON, ScaleRowDown4Box_NEON, ScaleRowDown4Box_C, 4, 1, 7) #endif #ifdef HAS_SCALEROWDOWN4_MSA SDANY(ScaleRowDown4_Any_MSA, ScaleRowDown4_MSA, ScaleRowDown4_C, 4, 1, 15) SDANY(ScaleRowDown4Box_Any_MSA, ScaleRowDown4Box_MSA, ScaleRowDown4Box_C, 4, 1, 15) #endif #ifdef HAS_SCALEROWDOWN34_SSSE3 SDANY(ScaleRowDown34_Any_SSSE3, ScaleRowDown34_SSSE3, ScaleRowDown34_C, 4 / 3, 1, 23) SDANY(ScaleRowDown34_0_Box_Any_SSSE3, ScaleRowDown34_0_Box_SSSE3, ScaleRowDown34_0_Box_C, 4 / 3, 1, 23) SDANY(ScaleRowDown34_1_Box_Any_SSSE3, ScaleRowDown34_1_Box_SSSE3, ScaleRowDown34_1_Box_C, 4 / 3, 1, 23) #endif #ifdef HAS_SCALEROWDOWN34_NEON SDANY(ScaleRowDown34_Any_NEON, ScaleRowDown34_NEON, ScaleRowDown34_C, 4 / 3, 1, 23) SDANY(ScaleRowDown34_0_Box_Any_NEON, ScaleRowDown34_0_Box_NEON, ScaleRowDown34_0_Box_C, 4 / 3, 1, 23) SDANY(ScaleRowDown34_1_Box_Any_NEON, ScaleRowDown34_1_Box_NEON, ScaleRowDown34_1_Box_C, 4 / 3, 1, 23) #endif #ifdef HAS_SCALEROWDOWN34_MSA SDANY(ScaleRowDown34_Any_MSA, ScaleRowDown34_MSA, ScaleRowDown34_C, 4 / 3, 1, 47) SDANY(ScaleRowDown34_0_Box_Any_MSA, ScaleRowDown34_0_Box_MSA, ScaleRowDown34_0_Box_C, 4 / 3, 1, 47) SDANY(ScaleRowDown34_1_Box_Any_MSA, ScaleRowDown34_1_Box_MSA, ScaleRowDown34_1_Box_C, 4 / 3, 1, 47) #endif #ifdef HAS_SCALEROWDOWN38_SSSE3 SDANY(ScaleRowDown38_Any_SSSE3, ScaleRowDown38_SSSE3, ScaleRowDown38_C, 8 / 3, 1, 11) SDANY(ScaleRowDown38_3_Box_Any_SSSE3, ScaleRowDown38_3_Box_SSSE3, ScaleRowDown38_3_Box_C, 8 / 3, 1, 5) SDANY(ScaleRowDown38_2_Box_Any_SSSE3, ScaleRowDown38_2_Box_SSSE3, ScaleRowDown38_2_Box_C, 8 / 3, 1, 5) #endif #ifdef HAS_SCALEROWDOWN38_NEON SDANY(ScaleRowDown38_Any_NEON, ScaleRowDown38_NEON, ScaleRowDown38_C, 8 / 3, 1, 11) SDANY(ScaleRowDown38_3_Box_Any_NEON, ScaleRowDown38_3_Box_NEON, ScaleRowDown38_3_Box_C, 8 / 3, 1, 11) SDANY(ScaleRowDown38_2_Box_Any_NEON, ScaleRowDown38_2_Box_NEON, ScaleRowDown38_2_Box_C, 8 / 3, 1, 11) #endif #ifdef HAS_SCALEROWDOWN38_MSA SDANY(ScaleRowDown38_Any_MSA, ScaleRowDown38_MSA, ScaleRowDown38_C, 8 / 3, 1, 11) SDANY(ScaleRowDown38_3_Box_Any_MSA, ScaleRowDown38_3_Box_MSA, ScaleRowDown38_3_Box_C, 8 / 3, 1, 11) SDANY(ScaleRowDown38_2_Box_Any_MSA, ScaleRowDown38_2_Box_MSA, ScaleRowDown38_2_Box_C, 8 / 3, 1, 11) #endif #ifdef HAS_SCALEARGBROWDOWN2_SSE2 SDANY(ScaleARGBRowDown2_Any_SSE2, ScaleARGBRowDown2_SSE2, ScaleARGBRowDown2_C, 2, 4, 3) SDANY(ScaleARGBRowDown2Linear_Any_SSE2, ScaleARGBRowDown2Linear_SSE2, ScaleARGBRowDown2Linear_C, 2, 4, 3) SDANY(ScaleARGBRowDown2Box_Any_SSE2, ScaleARGBRowDown2Box_SSE2, ScaleARGBRowDown2Box_C, 2, 4, 3) #endif #ifdef HAS_SCALEARGBROWDOWN2_NEON SDANY(ScaleARGBRowDown2_Any_NEON, ScaleARGBRowDown2_NEON, ScaleARGBRowDown2_C, 2, 4, 7) SDANY(ScaleARGBRowDown2Linear_Any_NEON, ScaleARGBRowDown2Linear_NEON, ScaleARGBRowDown2Linear_C, 2, 4, 7) SDANY(ScaleARGBRowDown2Box_Any_NEON, ScaleARGBRowDown2Box_NEON, ScaleARGBRowDown2Box_C, 2, 4, 7) #endif #ifdef HAS_SCALEARGBROWDOWN2_MSA SDANY(ScaleARGBRowDown2_Any_MSA, ScaleARGBRowDown2_MSA, ScaleARGBRowDown2_C, 2, 4, 3) SDANY(ScaleARGBRowDown2Linear_Any_MSA, ScaleARGBRowDown2Linear_MSA, ScaleARGBRowDown2Linear_C, 2, 4, 3) SDANY(ScaleARGBRowDown2Box_Any_MSA, ScaleARGBRowDown2Box_MSA, ScaleARGBRowDown2Box_C, 2, 4, 3) #endif #undef SDANY // Scale down by even scale factor. #define SDAANY(NAMEANY, SCALEROWDOWN_SIMD, SCALEROWDOWN_C, BPP, MASK) \ void NAMEANY(const uint8_t* src_ptr, ptrdiff_t src_stride, int src_stepx, \ uint8_t* dst_ptr, int dst_width) { \ int r = dst_width & MASK; \ int n = dst_width & ~MASK; \ if (n > 0) { \ SCALEROWDOWN_SIMD(src_ptr, src_stride, src_stepx, dst_ptr, n); \ } \ SCALEROWDOWN_C(src_ptr + (n * src_stepx) * BPP, src_stride, src_stepx, \ dst_ptr + n * BPP, r); \ } #ifdef HAS_SCALEARGBROWDOWNEVEN_SSE2 SDAANY(ScaleARGBRowDownEven_Any_SSE2, ScaleARGBRowDownEven_SSE2, ScaleARGBRowDownEven_C, 4, 3) SDAANY(ScaleARGBRowDownEvenBox_Any_SSE2, ScaleARGBRowDownEvenBox_SSE2, ScaleARGBRowDownEvenBox_C, 4, 3) #endif #ifdef HAS_SCALEARGBROWDOWNEVEN_NEON SDAANY(ScaleARGBRowDownEven_Any_NEON, ScaleARGBRowDownEven_NEON, ScaleARGBRowDownEven_C, 4, 3) SDAANY(ScaleARGBRowDownEvenBox_Any_NEON, ScaleARGBRowDownEvenBox_NEON, ScaleARGBRowDownEvenBox_C, 4, 3) #endif #ifdef HAS_SCALEARGBROWDOWNEVEN_MSA SDAANY(ScaleARGBRowDownEven_Any_MSA, ScaleARGBRowDownEven_MSA, ScaleARGBRowDownEven_C, 4, 3) SDAANY(ScaleARGBRowDownEvenBox_Any_MSA, ScaleARGBRowDownEvenBox_MSA, ScaleARGBRowDownEvenBox_C, 4, 3) #endif // Add rows box filter scale down. #define SAANY(NAMEANY, SCALEADDROW_SIMD, SCALEADDROW_C, MASK) \ void NAMEANY(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { \ int n = src_width & ~MASK; \ if (n > 0) { \ SCALEADDROW_SIMD(src_ptr, dst_ptr, n); \ } \ SCALEADDROW_C(src_ptr + n, dst_ptr + n, src_width & MASK); \ } #ifdef HAS_SCALEADDROW_SSE2 SAANY(ScaleAddRow_Any_SSE2, ScaleAddRow_SSE2, ScaleAddRow_C, 15) #endif #ifdef HAS_SCALEADDROW_AVX2 SAANY(ScaleAddRow_Any_AVX2, ScaleAddRow_AVX2, ScaleAddRow_C, 31) #endif #ifdef HAS_SCALEADDROW_NEON SAANY(ScaleAddRow_Any_NEON, ScaleAddRow_NEON, ScaleAddRow_C, 15) #endif #ifdef HAS_SCALEADDROW_MSA SAANY(ScaleAddRow_Any_MSA, ScaleAddRow_MSA, ScaleAddRow_C, 15) #endif #undef SAANY #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_argb.cc000066400000000000000000001024471357355204000225760ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/scale.h" #include #include #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyARGB #include "libyuv/row.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif static __inline int Abs(int v) { return v >= 0 ? v : -v; } // ScaleARGB ARGB, 1/2 // This is an optimized version for scaling down a ARGB to 1/2 of // its original size. static void ScaleARGBDown2(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = filtering == kFilterNone ? ScaleARGBRowDown2_C : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_C : ScaleARGBRowDown2Box_C); (void)src_width; (void)src_height; (void)dx; assert(dx == 65536 * 2); // Test scale factor of 2. assert((dy & 0x1ffff) == 0); // Test vertical scale is multiple of 2. // Advance to odd row, even column. if (filtering == kFilterBilinear) { src_argb += (y >> 16) * src_stride + (x >> 16) * 4; } else { src_argb += (y >> 16) * src_stride + ((x >> 16) - 1) * 4; } #if defined(HAS_SCALEARGBROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_SSE2 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_SSE2 : ScaleARGBRowDown2Box_Any_SSE2); if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_SSE2 : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_SSE2 : ScaleARGBRowDown2Box_SSE2); } } #endif #if defined(HAS_SCALEARGBROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_NEON : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_NEON : ScaleARGBRowDown2Box_Any_NEON); if (IS_ALIGNED(dst_width, 8)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_NEON : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_NEON : ScaleARGBRowDown2Box_NEON); } } #endif #if defined(HAS_SCALEARGBROWDOWN2_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_Any_MSA : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_Any_MSA : ScaleARGBRowDown2Box_Any_MSA); if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDown2 = filtering == kFilterNone ? ScaleARGBRowDown2_MSA : (filtering == kFilterLinear ? ScaleARGBRowDown2Linear_MSA : ScaleARGBRowDown2Box_MSA); } } #endif if (filtering == kFilterLinear) { src_stride = 0; } for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } } // ScaleARGB ARGB, 1/4 // This is an optimized version for scaling down a ARGB to 1/4 of // its original size. static void ScaleARGBDown4Box(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy) { int j; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 2 * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); int row_stride = src_stride * (dy >> 16); void (*ScaleARGBRowDown2)(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) = ScaleARGBRowDown2Box_C; // Advance to odd row, even column. src_argb += (y >> 16) * src_stride + (x >> 16) * 4; (void)src_width; (void)src_height; (void)dx; assert(dx == 65536 * 4); // Test scale factor of 4. assert((dy & 0x3ffff) == 0); // Test vertical scale is multiple of 4. #if defined(HAS_SCALEARGBROWDOWN2_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_SSE2; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDown2 = ScaleARGBRowDown2Box_SSE2; } } #endif #if defined(HAS_SCALEARGBROWDOWN2_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBRowDown2 = ScaleARGBRowDown2Box_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBRowDown2 = ScaleARGBRowDown2Box_NEON; } } #endif for (j = 0; j < dst_height; ++j) { ScaleARGBRowDown2(src_argb, src_stride, row, dst_width * 2); ScaleARGBRowDown2(src_argb + src_stride * 2, src_stride, row + kRowSize, dst_width * 2); ScaleARGBRowDown2(row, kRowSize, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } free_aligned_buffer_64(row); } // ScaleARGB ARGB Even // This is an optimized version for scaling down a ARGB to even // multiple of its original size. static void ScaleARGBDownEven(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; int col_step = dx >> 16; int row_stride = (dy >> 16) * src_stride; void (*ScaleARGBRowDownEven)(const uint8_t* src_argb, ptrdiff_t src_stride, int src_step, uint8_t* dst_argb, int dst_width) = filtering ? ScaleARGBRowDownEvenBox_C : ScaleARGBRowDownEven_C; (void)src_width; (void)src_height; assert(IS_ALIGNED(src_width, 2)); assert(IS_ALIGNED(src_height, 2)); src_argb += (y >> 16) * src_stride + (x >> 16) * 4; #if defined(HAS_SCALEARGBROWDOWNEVEN_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_SSE2 : ScaleARGBRowDownEven_Any_SSE2; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_SSE2 : ScaleARGBRowDownEven_SSE2; } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_NEON : ScaleARGBRowDownEven_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_NEON : ScaleARGBRowDownEven_NEON; } } #endif #if defined(HAS_SCALEARGBROWDOWNEVEN_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_Any_MSA : ScaleARGBRowDownEven_Any_MSA; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBRowDownEven = filtering ? ScaleARGBRowDownEvenBox_MSA : ScaleARGBRowDownEven_MSA; } } #endif if (filtering == kFilterLinear) { src_stride = 0; } for (j = 0; j < dst_height; ++j) { ScaleARGBRowDownEven(src_argb, src_stride, col_step, dst_argb, dst_width); src_argb += row_stride; dst_argb += dst_stride; } } // Scale ARGB down with bilinear interpolation. static void ScaleARGBBilinearDown(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBFilterCols64_C : ScaleARGBFilterCols_C; int64_t xlast = x + (int64_t)(dst_width - 1) * dx; int64_t xl = (dx >= 0) ? x : xlast; int64_t xr = (dx >= 0) ? xlast : x; int clip_src_width; xl = (xl >> 16) & ~3; // Left edge aligned. xr = (xr >> 16) + 1; // Right most pixel used. Bilinear uses 2 pixels. xr = (xr + 1 + 3) & ~3; // 1 beyond 4 pixel aligned right most pixel. if (xr > src_width) { xr = src_width; } clip_src_width = (int)(xr - xl) * 4; // Width aligned to 4. src_argb += xl * 4; x -= (int)(xl << 16); #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(clip_src_width, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(clip_src_width, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(clip_src_width, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(clip_src_width, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif #if defined(HAS_SCALEARGBFILTERCOLS_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; } } #endif #if defined(HAS_SCALEARGBFILTERCOLS_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; } } #endif // TODO(fbarchard): Consider not allocating row buffer for kFilterLinear. // Allocate a row of ARGB. { align_buffer_64(row, clip_src_width * 4); const int max_y = (src_height - 1) << 16; if (y > max_y) { y = max_y; } for (j = 0; j < dst_height; ++j) { int yi = y >> 16; const uint8_t* src = src_argb + yi * src_stride; if (filtering == kFilterLinear) { ScaleARGBFilterCols(dst_argb, src, dst_width, x, dx); } else { int yf = (y >> 8) & 255; InterpolateRow(row, src, src_stride, clip_src_width, yf); ScaleARGBFilterCols(dst_argb, row, dst_width, x, dx); } dst_argb += dst_stride; y += dy; if (y > max_y) { y = max_y; } } free_aligned_buffer_64(row); } } // Scale ARGB up with bilinear interpolation. static void ScaleARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; const int max_y = (src_height - 1) << 16; #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(dst_width, 4)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_MSA; } } #endif if (src_width >= 32768) { ScaleARGBFilterCols = filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif #if defined(HAS_SCALEARGBFILTERCOLS_NEON) if (filtering && TestCpuFlag(kCpuHasNEON)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; } } #endif #if defined(HAS_SCALEARGBFILTERCOLS_MSA) if (filtering && TestCpuFlag(kCpuHasMSA)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; } } #endif #if defined(HAS_SCALEARGBCOLS_SSE2) if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBCols_SSE2; } #endif #if defined(HAS_SCALEARGBCOLS_NEON) if (!filtering && TestCpuFlag(kCpuHasNEON)) { ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBCols_NEON; } } #endif #if defined(HAS_SCALEARGBCOLS_MSA) if (!filtering && TestCpuFlag(kCpuHasMSA)) { ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBFilterCols = ScaleARGBCols_MSA; } } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleARGBFilterCols = ScaleARGBColsUp2_C; #if defined(HAS_SCALEARGBCOLSUP2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; } #endif } if (y > max_y) { y = max_y; } { int yi = y >> 16; const uint8_t* src = src_argb + yi * src_stride; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); if (src_height > 1) { src += src_stride; } ScaleARGBFilterCols(rowptr + rowstride, src, dst_width, x, dx); src += src_stride; for (j = 0; j < dst_height; ++j) { yi = y >> 16; if (yi != lasty) { if (y > max_y) { y = max_y; yi = y >> 16; src = src_argb + yi * src_stride; } if (yi != lasty) { ScaleARGBFilterCols(rowptr, src, dst_width, x, dx); rowptr += rowstride; rowstride = -rowstride; lasty = yi; src += src_stride; } } if (filtering == kFilterLinear) { InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); } else { int yf = (y >> 8) & 255; InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); } dst_argb += dst_stride; y += dy; } free_aligned_buffer_64(row); } } #ifdef YUVSCALEUP // Scale YUV to ARGB up with bilinear interpolation. static void ScaleYUVToARGBBilinearUp(int src_width, int src_height, int dst_width, int dst_height, int src_stride_y, int src_stride_u, int src_stride_v, int dst_stride_argb, const uint8_t* src_y, const uint8_t* src_u, const uint8_t* src_v, uint8_t* dst_argb, int x, int dx, int y, int dy, enum FilterMode filtering) { int j; void (*I422ToARGBRow)(const uint8_t* y_buf, const uint8_t* u_buf, const uint8_t* v_buf, uint8_t* rgb_buf, int width) = I422ToARGBRow_C; #if defined(HAS_I422TOARGBROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { I422ToARGBRow = I422ToARGBRow_Any_SSSE3; if (IS_ALIGNED(src_width, 8)) { I422ToARGBRow = I422ToARGBRow_SSSE3; } } #endif #if defined(HAS_I422TOARGBROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { I422ToARGBRow = I422ToARGBRow_Any_AVX2; if (IS_ALIGNED(src_width, 16)) { I422ToARGBRow = I422ToARGBRow_AVX2; } } #endif #if defined(HAS_I422TOARGBROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { I422ToARGBRow = I422ToARGBRow_Any_NEON; if (IS_ALIGNED(src_width, 8)) { I422ToARGBRow = I422ToARGBRow_NEON; } } #endif #if defined(HAS_I422TOARGBROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { I422ToARGBRow = I422ToARGBRow_Any_MSA; if (IS_ALIGNED(src_width, 8)) { I422ToARGBRow = I422ToARGBRow_MSA; } } #endif void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(dst_width, 4)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(dst_width, 8)) { InterpolateRow = InterpolateRow_MSA; } } #endif void (*ScaleARGBFilterCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = filtering ? ScaleARGBFilterCols_C : ScaleARGBCols_C; if (src_width >= 32768) { ScaleARGBFilterCols = filtering ? ScaleARGBFilterCols64_C : ScaleARGBCols64_C; } #if defined(HAS_SCALEARGBFILTERCOLS_SSSE3) if (filtering && TestCpuFlag(kCpuHasSSSE3) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBFilterCols_SSSE3; } #endif #if defined(HAS_SCALEARGBFILTERCOLS_NEON) if (filtering && TestCpuFlag(kCpuHasNEON)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_NEON; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBFilterCols = ScaleARGBFilterCols_NEON; } } #endif #if defined(HAS_SCALEARGBFILTERCOLS_MSA) if (filtering && TestCpuFlag(kCpuHasMSA)) { ScaleARGBFilterCols = ScaleARGBFilterCols_Any_MSA; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBFilterCols_MSA; } } #endif #if defined(HAS_SCALEARGBCOLS_SSE2) if (!filtering && TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBFilterCols = ScaleARGBCols_SSE2; } #endif #if defined(HAS_SCALEARGBCOLS_NEON) if (!filtering && TestCpuFlag(kCpuHasNEON)) { ScaleARGBFilterCols = ScaleARGBCols_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBCols_NEON; } } #endif #if defined(HAS_SCALEARGBCOLS_MSA) if (!filtering && TestCpuFlag(kCpuHasMSA)) { ScaleARGBFilterCols = ScaleARGBCols_Any_MSA; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBFilterCols = ScaleARGBCols_MSA; } } #endif if (!filtering && src_width * 2 == dst_width && x < 0x8000) { ScaleARGBFilterCols = ScaleARGBColsUp2_C; #if defined(HAS_SCALEARGBCOLSUP2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleARGBFilterCols = ScaleARGBColsUp2_SSE2; } #endif } const int max_y = (src_height - 1) << 16; if (y > max_y) { y = max_y; } const int kYShift = 1; // Shift Y by 1 to convert Y plane to UV coordinate. int yi = y >> 16; int uv_yi = yi >> kYShift; const uint8_t* src_row_y = src_y + yi * src_stride_y; const uint8_t* src_row_u = src_u + uv_yi * src_stride_u; const uint8_t* src_row_v = src_v + uv_yi * src_stride_v; // Allocate 2 rows of ARGB. const int kRowSize = (dst_width * 4 + 31) & ~31; align_buffer_64(row, kRowSize * 2); // Allocate 1 row of ARGB for source conversion. align_buffer_64(argb_row, src_width * 4); uint8_t* rowptr = row; int rowstride = kRowSize; int lasty = yi; // TODO(fbarchard): Convert first 2 rows of YUV to ARGB. ScaleARGBFilterCols(rowptr, src_row_y, dst_width, x, dx); if (src_height > 1) { src_row_y += src_stride_y; if (yi & 1) { src_row_u += src_stride_u; src_row_v += src_stride_v; } } ScaleARGBFilterCols(rowptr + rowstride, src_row_y, dst_width, x, dx); if (src_height > 2) { src_row_y += src_stride_y; if (!(yi & 1)) { src_row_u += src_stride_u; src_row_v += src_stride_v; } } for (j = 0; j < dst_height; ++j) { yi = y >> 16; if (yi != lasty) { if (y > max_y) { y = max_y; yi = y >> 16; uv_yi = yi >> kYShift; src_row_y = src_y + yi * src_stride_y; src_row_u = src_u + uv_yi * src_stride_u; src_row_v = src_v + uv_yi * src_stride_v; } if (yi != lasty) { // TODO(fbarchard): Convert the clipped region of row. I422ToARGBRow(src_row_y, src_row_u, src_row_v, argb_row, src_width); ScaleARGBFilterCols(rowptr, argb_row, dst_width, x, dx); rowptr += rowstride; rowstride = -rowstride; lasty = yi; src_row_y += src_stride_y; if (yi & 1) { src_row_u += src_stride_u; src_row_v += src_stride_v; } } } if (filtering == kFilterLinear) { InterpolateRow(dst_argb, rowptr, 0, dst_width * 4, 0); } else { int yf = (y >> 8) & 255; InterpolateRow(dst_argb, rowptr, rowstride, dst_width * 4, yf); } dst_argb += dst_stride_argb; y += dy; } free_aligned_buffer_64(row); free_aligned_buffer_64(row_argb); } #endif // Scale ARGB to/from any dimensions, without interpolation. // Fixed point math is used for performance: The upper 16 bits // of x and dx is the integer part of the source position and // the lower 16 bits are the fixed decimal part. static void ScaleARGBSimple(int src_width, int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int dx, int y, int dy) { int j; void (*ScaleARGBCols)(uint8_t * dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) = (src_width >= 32768) ? ScaleARGBCols64_C : ScaleARGBCols_C; (void)src_height; #if defined(HAS_SCALEARGBCOLS_SSE2) if (TestCpuFlag(kCpuHasSSE2) && src_width < 32768) { ScaleARGBCols = ScaleARGBCols_SSE2; } #endif #if defined(HAS_SCALEARGBCOLS_NEON) if (TestCpuFlag(kCpuHasNEON)) { ScaleARGBCols = ScaleARGBCols_Any_NEON; if (IS_ALIGNED(dst_width, 8)) { ScaleARGBCols = ScaleARGBCols_NEON; } } #endif #if defined(HAS_SCALEARGBCOLS_MSA) if (TestCpuFlag(kCpuHasMSA)) { ScaleARGBCols = ScaleARGBCols_Any_MSA; if (IS_ALIGNED(dst_width, 4)) { ScaleARGBCols = ScaleARGBCols_MSA; } } #endif if (src_width * 2 == dst_width && x < 0x8000) { ScaleARGBCols = ScaleARGBColsUp2_C; #if defined(HAS_SCALEARGBCOLSUP2_SSE2) if (TestCpuFlag(kCpuHasSSE2) && IS_ALIGNED(dst_width, 8)) { ScaleARGBCols = ScaleARGBColsUp2_SSE2; } #endif } for (j = 0; j < dst_height; ++j) { ScaleARGBCols(dst_argb, src_argb + (y >> 16) * src_stride, dst_width, x, dx); dst_argb += dst_stride; y += dy; } } // ScaleARGB a ARGB. // This function in turn calls a scaling function // suitable for handling the desired resolutions. static void ScaleARGB(const uint8_t* src, int src_stride, int src_width, int src_height, uint8_t* dst, int dst_stride, int dst_width, int dst_height, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering) { // Initial source x/y coordinate and step values as 16.16 fixed point. int x = 0; int y = 0; int dx = 0; int dy = 0; // ARGB does not support box filter yet, but allow the user to pass it. // Simplify filtering when possible. filtering = ScaleFilterReduce(src_width, src_height, dst_width, dst_height, filtering); // Negative src_height means invert the image. if (src_height < 0) { src_height = -src_height; src = src + (src_height - 1) * src_stride; src_stride = -src_stride; } ScaleSlope(src_width, src_height, dst_width, dst_height, filtering, &x, &y, &dx, &dy); src_width = Abs(src_width); if (clip_x) { int64_t clipf = (int64_t)(clip_x)*dx; x += (clipf & 0xffff); src += (clipf >> 16) * 4; dst += clip_x * 4; } if (clip_y) { int64_t clipf = (int64_t)(clip_y)*dy; y += (clipf & 0xffff); src += (clipf >> 16) * src_stride; dst += clip_y * dst_stride; } // Special case for integer step values. if (((dx | dy) & 0xffff) == 0) { if (!dx || !dy) { // 1 pixel wide and/or tall. filtering = kFilterNone; } else { // Optimized even scale down. ie 2, 4, 6, 8, 10x. if (!(dx & 0x10000) && !(dy & 0x10000)) { if (dx == 0x20000) { // Optimized 1/2 downsample. ScaleARGBDown2(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); return; } if (dx == 0x40000 && filtering == kFilterBox) { // Optimized 1/4 box downsample. ScaleARGBDown4Box(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); return; } ScaleARGBDownEven(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); return; } // Optimized odd scale down. ie 3, 5, 7, 9x. if ((dx & 0x10000) && (dy & 0x10000)) { filtering = kFilterNone; if (dx == 0x10000 && dy == 0x10000) { // Straight copy. ARGBCopy(src + (y >> 16) * src_stride + (x >> 16) * 4, src_stride, dst, dst_stride, clip_width, clip_height); return; } } } } if (dx == 0x10000 && (x & 0xffff) == 0) { // Arbitrary scale vertically, but unscaled vertically. ScalePlaneVertical(src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, y, dy, 4, filtering); return; } if (filtering && dy < 65536) { ScaleARGBBilinearUp(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); return; } if (filtering) { ScaleARGBBilinearDown(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy, filtering); return; } ScaleARGBSimple(src_width, src_height, clip_width, clip_height, src_stride, dst_stride, src, dst, x, dx, y, dy); } LIBYUV_API int ARGBScaleClip(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering) { if (!src_argb || src_width == 0 || src_height == 0 || !dst_argb || dst_width <= 0 || dst_height <= 0 || clip_x < 0 || clip_y < 0 || clip_width > 32768 || clip_height > 32768 || (clip_x + clip_width) > dst_width || (clip_y + clip_height) > dst_height) { return -1; } ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, clip_height, filtering); return 0; } // Scale an ARGB image. LIBYUV_API int ARGBScale(const uint8_t* src_argb, int src_stride_argb, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, int dst_width, int dst_height, enum FilterMode filtering) { if (!src_argb || src_width == 0 || src_height == 0 || src_width > 32768 || src_height > 32768 || !dst_argb || dst_width <= 0 || dst_height <= 0) { return -1; } ScaleARGB(src_argb, src_stride_argb, src_width, src_height, dst_argb, dst_stride_argb, dst_width, dst_height, 0, 0, dst_width, dst_height, filtering); return 0; } // Scale with YUV conversion to ARGB and clipping. LIBYUV_API int YUVToARGBScaleClip(const uint8_t* src_y, int src_stride_y, const uint8_t* src_u, int src_stride_u, const uint8_t* src_v, int src_stride_v, uint32_t src_fourcc, int src_width, int src_height, uint8_t* dst_argb, int dst_stride_argb, uint32_t dst_fourcc, int dst_width, int dst_height, int clip_x, int clip_y, int clip_width, int clip_height, enum FilterMode filtering) { uint8_t* argb_buffer = (uint8_t*)malloc(src_width * src_height * 4); int r; (void)src_fourcc; // TODO(fbarchard): implement and/or assert. (void)dst_fourcc; I420ToARGB(src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, argb_buffer, src_width * 4, src_width, src_height); r = ARGBScaleClip(argb_buffer, src_width * 4, src_width, src_height, dst_argb, dst_stride_argb, dst_width, dst_height, clip_x, clip_y, clip_width, clip_height, filtering); free(argb_buffer); return r; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_common.cc000066400000000000000000001157561357355204000231620ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/scale.h" #include #include #include "libyuv/cpu_id.h" #include "libyuv/planar_functions.h" // For CopyARGB #include "libyuv/row.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif static __inline int Abs(int v) { return v >= 0 ? v : -v; } // CPU agnostic row functions void ScaleRowDown2_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[1]; dst[1] = src_ptr[3]; dst += 2; src_ptr += 4; } if (dst_width & 1) { dst[0] = src_ptr[1]; } } void ScaleRowDown2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[1]; dst[1] = src_ptr[3]; dst += 2; src_ptr += 4; } if (dst_width & 1) { dst[0] = src_ptr[1]; } } void ScaleRowDown2Linear_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { const uint8_t* s = src_ptr; int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + 1) >> 1; dst[1] = (s[2] + s[3] + 1) >> 1; dst += 2; s += 4; } if (dst_width & 1) { dst[0] = (s[0] + s[1] + 1) >> 1; } } void ScaleRowDown2Linear_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { const uint16_t* s = src_ptr; int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + 1) >> 1; dst[1] = (s[2] + s[3] + 1) >> 1; dst += 2; s += 4; } if (dst_width & 1) { dst[0] = (s[0] + s[1] + 1) >> 1; } } void ScaleRowDown2Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; dst += 2; s += 4; t += 4; } if (dst_width & 1) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; } } void ScaleRowDown2Box_Odd_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; dst_width -= 1; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; dst += 2; s += 4; t += 4; } if (dst_width & 1) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; dst += 1; s += 2; t += 2; } dst[0] = (s[0] + t[0] + 1) >> 1; } void ScaleRowDown2Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { const uint16_t* s = src_ptr; const uint16_t* t = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; dst[1] = (s[2] + s[3] + t[2] + t[3] + 2) >> 2; dst += 2; s += 4; t += 4; } if (dst_width & 1) { dst[0] = (s[0] + s[1] + t[0] + t[1] + 2) >> 2; } } void ScaleRowDown4_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[2]; dst[1] = src_ptr[6]; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = src_ptr[2]; } } void ScaleRowDown4_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src_ptr[2]; dst[1] = src_ptr[6]; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = src_ptr[2]; } } void ScaleRowDown4Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { intptr_t stride = src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 8) >> 4; dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + 8) >> 4; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 8) >> 4; } } void ScaleRowDown4Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { intptr_t stride = src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 8) >> 4; dst[1] = (src_ptr[4] + src_ptr[5] + src_ptr[6] + src_ptr[7] + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride + 6] + src_ptr[stride + 7] + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5] + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7] + src_ptr[stride * 3 + 4] + src_ptr[stride * 3 + 5] + src_ptr[stride * 3 + 6] + src_ptr[stride * 3 + 7] + 8) >> 4; dst += 2; src_ptr += 8; } if (dst_width & 1) { dst[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[3] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride + 3] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2] + src_ptr[stride * 2 + 3] + src_ptr[stride * 3 + 0] + src_ptr[stride * 3 + 1] + src_ptr[stride * 3 + 2] + src_ptr[stride * 3 + 3] + 8) >> 4; } } void ScaleRowDown34_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; (void)src_stride; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; dst[1] = src_ptr[1]; dst[2] = src_ptr[3]; dst += 3; src_ptr += 4; } } void ScaleRowDown34_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { int x; (void)src_stride; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; dst[1] = src_ptr[1]; dst[2] = src_ptr[3]; dst += 3; src_ptr += 4; } } // Filter rows 0 and 1 together, 3 : 1 void ScaleRowDown34_0_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; d += 3; s += 4; t += 4; } } void ScaleRowDown34_0_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* d, int dst_width) { const uint16_t* s = src_ptr; const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 * 3 + b0 + 2) >> 2; d[1] = (a1 * 3 + b1 + 2) >> 2; d[2] = (a2 * 3 + b2 + 2) >> 2; d += 3; s += 4; t += 4; } } // Filter rows 1 and 2 together, 1 : 1 void ScaleRowDown34_1_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { uint8_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; uint8_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; uint8_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; uint8_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; uint8_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; uint8_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; d += 3; s += 4; t += 4; } } void ScaleRowDown34_1_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* d, int dst_width) { const uint16_t* s = src_ptr; const uint16_t* t = src_ptr + src_stride; int x; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 3) { uint16_t a0 = (s[0] * 3 + s[1] * 1 + 2) >> 2; uint16_t a1 = (s[1] * 1 + s[2] * 1 + 1) >> 1; uint16_t a2 = (s[2] * 1 + s[3] * 3 + 2) >> 2; uint16_t b0 = (t[0] * 3 + t[1] * 1 + 2) >> 2; uint16_t b1 = (t[1] * 1 + t[2] * 1 + 1) >> 1; uint16_t b2 = (t[2] * 1 + t[3] * 3 + 2) >> 2; d[0] = (a0 + b0 + 1) >> 1; d[1] = (a1 + b1 + 1) >> 1; d[2] = (a2 + b2 + 1) >> 1; d += 3; s += 4; t += 4; } } // Scales a single row of pixels using point sampling. void ScaleCols_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[0] = src_ptr[x >> 16]; x += dx; dst_ptr[1] = src_ptr[x >> 16]; x += dx; dst_ptr += 2; } if (dst_width & 1) { dst_ptr[0] = src_ptr[x >> 16]; } } void ScaleCols_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[0] = src_ptr[x >> 16]; x += dx; dst_ptr[1] = src_ptr[x >> 16]; x += dx; dst_ptr += 2; } if (dst_width & 1) { dst_ptr[0] = src_ptr[x >> 16]; } } // Scales a single row of pixels up by 2x using point sampling. void ScaleColsUp2_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int j; (void)x; (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[1] = dst_ptr[0] = src_ptr[0]; src_ptr += 1; dst_ptr += 2; } if (dst_width & 1) { dst_ptr[0] = src_ptr[0]; } } void ScaleColsUp2_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) { int j; (void)x; (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst_ptr[1] = dst_ptr[0] = src_ptr[0]; src_ptr += 1; dst_ptr += 2; } if (dst_width & 1) { dst_ptr[0] = src_ptr[0]; } } // (1-f)a + fb can be replaced with a + f(b-a) #if defined(__arm__) || defined(__aarch64__) #define BLENDER(a, b, f) \ (uint8_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) #else // Intel uses 7 bit math with rounding. #define BLENDER(a, b, f) \ (uint8_t)((int)(a) + (((int)((f) >> 9) * ((int)(b) - (int)(a)) + 0x40) >> 7)) #endif void ScaleFilterCols_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); x += dx; xi = x >> 16; a = src_ptr[xi]; b = src_ptr[xi + 1]; dst_ptr[1] = BLENDER(a, b, x & 0xffff); x += dx; dst_ptr += 2; } if (dst_width & 1) { int xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); } } void ScaleFilterCols64_C(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x32, int dx) { int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); x += dx; xi = x >> 16; a = src_ptr[xi]; b = src_ptr[xi + 1]; dst_ptr[1] = BLENDER(a, b, x & 0xffff); x += dx; dst_ptr += 2; } if (dst_width & 1) { int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); } } #undef BLENDER // Same as 8 bit arm blender but return is cast to uint16_t #define BLENDER(a, b, f) \ (uint16_t)((int)(a) + ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x, int dx) { int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); x += dx; xi = x >> 16; a = src_ptr[xi]; b = src_ptr[xi + 1]; dst_ptr[1] = BLENDER(a, b, x & 0xffff); x += dx; dst_ptr += 2; } if (dst_width & 1) { int xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); } } void ScaleFilterCols64_16_C(uint16_t* dst_ptr, const uint16_t* src_ptr, int dst_width, int x32, int dx) { int64_t x = (int64_t)(x32); int j; for (j = 0; j < dst_width - 1; j += 2) { int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); x += dx; xi = x >> 16; a = src_ptr[xi]; b = src_ptr[xi + 1]; dst_ptr[1] = BLENDER(a, b, x & 0xffff); x += dx; dst_ptr += 2; } if (dst_width & 1) { int64_t xi = x >> 16; int a = src_ptr[xi]; int b = src_ptr[xi + 1]; dst_ptr[0] = BLENDER(a, b, x & 0xffff); } } #undef BLENDER void ScaleRowDown38_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; (void)src_stride; assert(dst_width % 3 == 0); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; dst[1] = src_ptr[3]; dst[2] = src_ptr[6]; dst += 3; src_ptr += 8; } } void ScaleRowDown38_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { int x; (void)src_stride; assert(dst_width % 3 == 0); for (x = 0; x < dst_width; x += 3) { dst[0] = src_ptr[0]; dst[1] = src_ptr[3]; dst[2] = src_ptr[6]; dst += 3; src_ptr += 8; } } // 8x3 -> 3x1 void ScaleRowDown38_3_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * (65536 / 9) >> 16; dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * (65536 / 9) >> 16; dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * (65536 / 6) >> 16; src_ptr += 8; dst_ptr += 3; } } void ScaleRowDown38_3_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2] + src_ptr[stride * 2 + 0] + src_ptr[stride * 2 + 1] + src_ptr[stride * 2 + 2]) * (65536 / 9) >> 16; dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + src_ptr[stride + 4] + src_ptr[stride + 5] + src_ptr[stride * 2 + 3] + src_ptr[stride * 2 + 4] + src_ptr[stride * 2 + 5]) * (65536 / 9) >> 16; dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7] + src_ptr[stride * 2 + 6] + src_ptr[stride * 2 + 7]) * (65536 / 6) >> 16; src_ptr += 8; dst_ptr += 3; } } // 8x2 -> 3x1 void ScaleRowDown38_2_Box_C(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2]) * (65536 / 6) >> 16; dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + src_ptr[stride + 4] + src_ptr[stride + 5]) * (65536 / 6) >> 16; dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * (65536 / 4) >> 16; src_ptr += 8; dst_ptr += 3; } } void ScaleRowDown38_2_Box_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int dst_width) { intptr_t stride = src_stride; int i; assert((dst_width % 3 == 0) && (dst_width > 0)); for (i = 0; i < dst_width; i += 3) { dst_ptr[0] = (src_ptr[0] + src_ptr[1] + src_ptr[2] + src_ptr[stride + 0] + src_ptr[stride + 1] + src_ptr[stride + 2]) * (65536 / 6) >> 16; dst_ptr[1] = (src_ptr[3] + src_ptr[4] + src_ptr[5] + src_ptr[stride + 3] + src_ptr[stride + 4] + src_ptr[stride + 5]) * (65536 / 6) >> 16; dst_ptr[2] = (src_ptr[6] + src_ptr[7] + src_ptr[stride + 6] + src_ptr[stride + 7]) * (65536 / 4) >> 16; src_ptr += 8; dst_ptr += 3; } } void ScaleAddRow_C(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { dst_ptr[0] += src_ptr[0]; dst_ptr[1] += src_ptr[1]; src_ptr += 2; dst_ptr += 2; } if (src_width & 1) { dst_ptr[0] += src_ptr[0]; } } void ScaleAddRow_16_C(const uint16_t* src_ptr, uint32_t* dst_ptr, int src_width) { int x; assert(src_width > 0); for (x = 0; x < src_width - 1; x += 2) { dst_ptr[0] += src_ptr[0]; dst_ptr[1] += src_ptr[1]; src_ptr += 2; dst_ptr += 2; } if (src_width & 1) { dst_ptr[0] += src_ptr[0]; } } void ScaleARGBRowDown2_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int x; (void)src_stride; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src[1]; dst[1] = src[3]; src += 4; dst += 2; } if (dst_width & 1) { dst[0] = src[1]; } } void ScaleARGBRowDown2Linear_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { int x; (void)src_stride; for (x = 0; x < dst_width; ++x) { dst_argb[0] = (src_argb[0] + src_argb[4] + 1) >> 1; dst_argb[1] = (src_argb[1] + src_argb[5] + 1) >> 1; dst_argb[2] = (src_argb[2] + src_argb[6] + 1) >> 1; dst_argb[3] = (src_argb[3] + src_argb[7] + 1) >> 1; src_argb += 8; dst_argb += 4; } } void ScaleARGBRowDown2Box_C(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { int x; for (x = 0; x < dst_width; ++x) { dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; src_argb += 8; dst_argb += 4; } } void ScaleARGBRowDownEven_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); (void)src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { dst[0] = src[0]; dst[1] = src[src_stepx]; src += src_stepx * 2; dst += 2; } if (dst_width & 1) { dst[0] = src[0]; } } void ScaleARGBRowDownEvenBox_C(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { int x; for (x = 0; x < dst_width; ++x) { dst_argb[0] = (src_argb[0] + src_argb[4] + src_argb[src_stride] + src_argb[src_stride + 4] + 2) >> 2; dst_argb[1] = (src_argb[1] + src_argb[5] + src_argb[src_stride + 1] + src_argb[src_stride + 5] + 2) >> 2; dst_argb[2] = (src_argb[2] + src_argb[6] + src_argb[src_stride + 2] + src_argb[src_stride + 6] + 2) >> 2; dst_argb[3] = (src_argb[3] + src_argb[7] + src_argb[src_stride + 3] + src_argb[src_stride + 7] + 2) >> 2; src_argb += src_stepx * 4; dst_argb += 4; } } // Scales a single row of pixels using point sampling. void ScaleARGBCols_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; x += dx; dst[1] = src[x >> 16]; x += dx; dst += 2; } if (dst_width & 1) { dst[0] = src[x >> 16]; } } void ScaleARGBCols64_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x32, int dx) { int64_t x = (int64_t)(x32); const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { dst[0] = src[x >> 16]; x += dx; dst[1] = src[x >> 16]; x += dx; dst += 2; } if (dst_width & 1) { dst[0] = src[x >> 16]; } } // Scales a single row of pixels up by 2x using point sampling. void ScaleARGBColsUp2_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; (void)x; (void)dx; for (j = 0; j < dst_width - 1; j += 2) { dst[1] = dst[0] = src[0]; src += 1; dst += 2; } if (dst_width & 1) { dst[0] = src[0]; } } // TODO(fbarchard): Replace 0x7f ^ f with 128-f. bug=607. // Mimics SSSE3 blender #define BLENDER1(a, b, f) ((a) * (0x7f ^ f) + (b)*f) >> 7 #define BLENDERC(a, b, f, s) \ (uint32_t)(BLENDER1(((a) >> s) & 255, ((b) >> s) & 255, f) << s) #define BLENDER(a, b, f) \ BLENDERC(a, b, f, 24) | BLENDERC(a, b, f, 16) | BLENDERC(a, b, f, 8) | \ BLENDERC(a, b, f, 0) void ScaleARGBFilterCols_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; uint32_t a = src[xi]; uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; xf = (x >> 9) & 0x7f; a = src[xi]; b = src[xi + 1]; dst[1] = BLENDER(a, b, xf); x += dx; dst += 2; } if (dst_width & 1) { int xi = x >> 16; int xf = (x >> 9) & 0x7f; uint32_t a = src[xi]; uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } void ScaleARGBFilterCols64_C(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x32, int dx) { int64_t x = (int64_t)(x32); const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; for (j = 0; j < dst_width - 1; j += 2) { int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; uint32_t a = src[xi]; uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); x += dx; xi = x >> 16; xf = (x >> 9) & 0x7f; a = src[xi]; b = src[xi + 1]; dst[1] = BLENDER(a, b, xf); x += dx; dst += 2; } if (dst_width & 1) { int64_t xi = x >> 16; int xf = (x >> 9) & 0x7f; uint32_t a = src[xi]; uint32_t b = src[xi + 1]; dst[0] = BLENDER(a, b, xf); } } #undef BLENDER1 #undef BLENDERC #undef BLENDER // Scale plane vertically with bilinear interpolation. void ScalePlaneVertical(int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint8_t* src_argb, uint8_t* dst_argb, int x, int y, int dy, int bpp, enum FilterMode filtering) { // TODO(fbarchard): Allow higher bpp. int dst_width_bytes = dst_width * bpp; void (*InterpolateRow)(uint8_t * dst_argb, const uint8_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; int j; assert(bpp >= 1 && bpp <= 4); assert(src_height != 0); assert(dst_width > 0); assert(dst_height > 0); src_argb += (x >> 16) * bpp; #if defined(HAS_INTERPOLATEROW_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_SSSE3; if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_AVX2; if (IS_ALIGNED(dst_width_bytes, 32)) { InterpolateRow = InterpolateRow_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_NEON; if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_NEON; } } #endif #if defined(HAS_INTERPOLATEROW_MSA) if (TestCpuFlag(kCpuHasMSA)) { InterpolateRow = InterpolateRow_Any_MSA; if (IS_ALIGNED(dst_width_bytes, 32)) { InterpolateRow = InterpolateRow_MSA; } } #endif for (j = 0; j < dst_height; ++j) { int yi; int yf; if (y > max_y) { y = max_y; } yi = y >> 16; yf = filtering ? ((y >> 8) & 255) : 0; InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, dst_width_bytes, yf); dst_argb += dst_stride; y += dy; } } void ScalePlaneVertical_16(int src_height, int dst_width, int dst_height, int src_stride, int dst_stride, const uint16_t* src_argb, uint16_t* dst_argb, int x, int y, int dy, int wpp, enum FilterMode filtering) { // TODO(fbarchard): Allow higher wpp. int dst_width_words = dst_width * wpp; void (*InterpolateRow)(uint16_t * dst_argb, const uint16_t* src_argb, ptrdiff_t src_stride, int dst_width, int source_y_fraction) = InterpolateRow_16_C; const int max_y = (src_height > 1) ? ((src_height - 1) << 16) - 1 : 0; int j; assert(wpp >= 1 && wpp <= 2); assert(src_height != 0); assert(dst_width > 0); assert(dst_height > 0); src_argb += (x >> 16) * wpp; #if defined(HAS_INTERPOLATEROW_16_SSE2) if (TestCpuFlag(kCpuHasSSE2)) { InterpolateRow = InterpolateRow_Any_16_SSE2; if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_SSE2; } } #endif #if defined(HAS_INTERPOLATEROW_16_SSSE3) if (TestCpuFlag(kCpuHasSSSE3)) { InterpolateRow = InterpolateRow_Any_16_SSSE3; if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_SSSE3; } } #endif #if defined(HAS_INTERPOLATEROW_16_AVX2) if (TestCpuFlag(kCpuHasAVX2)) { InterpolateRow = InterpolateRow_Any_16_AVX2; if (IS_ALIGNED(dst_width_bytes, 32)) { InterpolateRow = InterpolateRow_16_AVX2; } } #endif #if defined(HAS_INTERPOLATEROW_16_NEON) if (TestCpuFlag(kCpuHasNEON)) { InterpolateRow = InterpolateRow_Any_16_NEON; if (IS_ALIGNED(dst_width_bytes, 16)) { InterpolateRow = InterpolateRow_16_NEON; } } #endif for (j = 0; j < dst_height; ++j) { int yi; int yf; if (y > max_y) { y = max_y; } yi = y >> 16; yf = filtering ? ((y >> 8) & 255) : 0; InterpolateRow(dst_argb, src_argb + yi * src_stride, src_stride, dst_width_words, yf); dst_argb += dst_stride; y += dy; } } // Simplify the filtering based on scale factors. enum FilterMode ScaleFilterReduce(int src_width, int src_height, int dst_width, int dst_height, enum FilterMode filtering) { if (src_width < 0) { src_width = -src_width; } if (src_height < 0) { src_height = -src_height; } if (filtering == kFilterBox) { // If scaling both axis to 0.5 or larger, switch from Box to Bilinear. if (dst_width * 2 >= src_width && dst_height * 2 >= src_height) { filtering = kFilterBilinear; } } if (filtering == kFilterBilinear) { if (src_height == 1) { filtering = kFilterLinear; } // TODO(fbarchard): Detect any odd scale factor and reduce to Linear. if (dst_height == src_height || dst_height * 3 == src_height) { filtering = kFilterLinear; } // TODO(fbarchard): Remove 1 pixel wide filter restriction, which is to // avoid reading 2 pixels horizontally that causes memory exception. if (src_width == 1) { filtering = kFilterNone; } } if (filtering == kFilterLinear) { if (src_width == 1) { filtering = kFilterNone; } // TODO(fbarchard): Detect any odd scale factor and reduce to None. if (dst_width == src_width || dst_width * 3 == src_width) { filtering = kFilterNone; } } return filtering; } // Divide num by div and return as 16.16 fixed point result. int FixedDiv_C(int num, int div) { return (int)(((int64_t)(num) << 16) / div); } // Divide num by div and return as 16.16 fixed point result. int FixedDiv1_C(int num, int div) { return (int)((((int64_t)(num) << 16) - 0x00010001) / (div - 1)); } #define CENTERSTART(dx, s) (dx < 0) ? -((-dx >> 1) + s) : ((dx >> 1) + s) // Compute slope values for stepping. void ScaleSlope(int src_width, int src_height, int dst_width, int dst_height, enum FilterMode filtering, int* x, int* y, int* dx, int* dy) { assert(x != NULL); assert(y != NULL); assert(dx != NULL); assert(dy != NULL); assert(src_width != 0); assert(src_height != 0); assert(dst_width > 0); assert(dst_height > 0); // Check for 1 pixel and avoid FixedDiv overflow. if (dst_width == 1 && src_width >= 32768) { dst_width = src_width; } if (dst_height == 1 && src_height >= 32768) { dst_height = src_height; } if (filtering == kFilterBox) { // Scale step for point sampling duplicates all pixels equally. *dx = FixedDiv(Abs(src_width), dst_width); *dy = FixedDiv(src_height, dst_height); *x = 0; *y = 0; } else if (filtering == kFilterBilinear) { // Scale step for bilinear sampling renders last pixel once for upsample. if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. } else if (dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } if (dst_height <= src_height) { *dy = FixedDiv(src_height, dst_height); *y = CENTERSTART(*dy, -32768); // Subtract 0.5 (32768) to center filter. } else if (dst_height > 1) { *dy = FixedDiv1(src_height, dst_height); *y = 0; } } else if (filtering == kFilterLinear) { // Scale step for bilinear sampling renders last pixel once for upsample. if (dst_width <= Abs(src_width)) { *dx = FixedDiv(Abs(src_width), dst_width); *x = CENTERSTART(*dx, -32768); // Subtract 0.5 (32768) to center filter. } else if (dst_width > 1) { *dx = FixedDiv1(Abs(src_width), dst_width); *x = 0; } *dy = FixedDiv(src_height, dst_height); *y = *dy >> 1; } else { // Scale step for point sampling duplicates all pixels equally. *dx = FixedDiv(Abs(src_width), dst_width); *dy = FixedDiv(src_height, dst_height); *x = CENTERSTART(*dx, 0); *y = CENTERSTART(*dy, 0); } // Negative src_width means horizontally mirror. if (src_width < 0) { *x += (dst_width - 1) * *dx; *dx = -*dx; // src_width = -src_width; // Caller must do this. } } #undef CENTERSTART // Read 8x2 upsample with filtering and write 16x1. // actually reads an extra pixel, so 9x2. void ScaleRowUp2_16_C(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { const uint16_t* src2 = src_ptr + src_stride; int x; for (x = 0; x < dst_width - 1; x += 2) { uint16_t p0 = src_ptr[0]; uint16_t p1 = src_ptr[1]; uint16_t p2 = src2[0]; uint16_t p3 = src2[1]; dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; dst[1] = (p0 * 3 + p1 * 9 + p2 + p3 * 3 + 8) >> 4; ++src_ptr; ++src2; dst += 2; } if (dst_width & 1) { uint16_t p0 = src_ptr[0]; uint16_t p1 = src_ptr[1]; uint16_t p2 = src2[0]; uint16_t p3 = src2[1]; dst[0] = (p0 * 9 + p1 * 3 + p2 * 3 + p3 + 8) >> 4; } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_gcc.cc000066400000000000000000001614011357355204000224120ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC x86 and x64. #if !defined(LIBYUV_DISABLE_X86) && \ (defined(__x86_64__) || (defined(__i386__) && !defined(_MSC_VER))) // Offsets for source bytes 0 to 9 static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0}; // GCC versions of row functions are verbatim conversions from Visual C. // Generated using gcc disassembly on Visual C object file: // objdump -D yuvscaler.obj >yuvscaler.txt void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( // 16 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "psrlw $0x8,%%xmm0 \n" "psrlw $0x8,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psrlw $0xf,%%xmm4 \n" "packuswb %%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pavgw %%xmm5,%%xmm0 \n" "pavgw %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psrlw $0xf,%%xmm4 \n" "packuswb %%xmm4,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x00(%0,%3,1),%%xmm2 \n" "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" "psrlw $0x1,%%xmm0 \n" "psrlw $0x1,%%xmm1 \n" "pavgw %%xmm5,%%xmm0 \n" "pavgw %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #ifdef HAS_SCALEROWDOWN2_AVX2 void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" "vpxor %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" "lea 0x40(%0),%0 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vpsrlw $0x1,%%ymm0,%%ymm0 \n" "vpsrlw $0x1,%%ymm1,%%ymm1 \n" "vpavgw %%ymm5,%%ymm0,%%ymm0 \n" "vpavgw %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%ymm0,(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEROWDOWN2_AVX2 void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "pcmpeqb %%xmm5,%%xmm5 \n" "psrld $0x18,%%xmm5 \n" "pslld $0x10,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pand %%xmm5,%%xmm0 \n" "pand %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm0 \n" "psrlw $0x8,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm5"); } void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { intptr_t stridex3; asm volatile( "pcmpeqb %%xmm4,%%xmm4 \n" "psrlw $0xf,%%xmm4 \n" "movdqa %%xmm4,%%xmm5 \n" "packuswb %%xmm4,%%xmm4 \n" "psllw $0x3,%%xmm5 \n" "lea 0x00(%4,%4,2),%3 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x00(%0,%4,1),%%xmm2 \n" "movdqu 0x10(%0,%4,1),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm0 \n" "pmaddubsw %%xmm4,%%xmm1 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" "movdqu 0x00(%0,%4,2),%%xmm2 \n" "movdqu 0x10(%0,%4,2),%%xmm3 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" "movdqu 0x00(%0,%3,1),%%xmm2 \n" "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pmaddubsw %%xmm4,%%xmm2 \n" "pmaddubsw %%xmm4,%%xmm3 \n" "paddw %%xmm2,%%xmm0 \n" "paddw %%xmm3,%%xmm1 \n" "phaddw %%xmm1,%%xmm0 \n" "paddw %%xmm5,%%xmm0 \n" "psrlw $0x4,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,(%1) \n" "lea 0x8(%1),%1 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "=&r"(stridex3) // %3 : "r"((intptr_t)(src_stride)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #ifdef HAS_SCALEROWDOWN4_AVX2 void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "vpcmpeqb %%ymm5,%%ymm5,%%ymm5 \n" "vpsrld $0x18,%%ymm5,%%ymm5 \n" "vpslld $0x10,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "lea 0x40(%0),%0 \n" "vpand %%ymm5,%%ymm0,%%ymm0 \n" "vpand %%ymm5,%%ymm1,%%ymm1 \n" "vpackuswb %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpsrlw $0x8,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm5"); } void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "vpcmpeqb %%ymm4,%%ymm4,%%ymm4 \n" "vpsrlw $0xf,%%ymm4,%%ymm4 \n" "vpsllw $0x3,%%ymm4,%%ymm5 \n" "vpackuswb %%ymm4,%%ymm4,%%ymm4 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm0 \n" "vmovdqu 0x20(%0),%%ymm1 \n" "vmovdqu 0x00(%0,%3,1),%%ymm2 \n" "vmovdqu 0x20(%0,%3,1),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm0,%%ymm0 \n" "vpmaddubsw %%ymm4,%%ymm1,%%ymm1 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vmovdqu 0x00(%0,%3,2),%%ymm2 \n" "vmovdqu 0x20(%0,%3,2),%%ymm3 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vmovdqu 0x00(%0,%4,1),%%ymm2 \n" "vmovdqu 0x20(%0,%4,1),%%ymm3 \n" "lea 0x40(%0),%0 \n" "vpmaddubsw %%ymm4,%%ymm2,%%ymm2 \n" "vpmaddubsw %%ymm4,%%ymm3,%%ymm3 \n" "vpaddw %%ymm2,%%ymm0,%%ymm0 \n" "vpaddw %%ymm3,%%ymm1,%%ymm1 \n" "vphaddw %%ymm1,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vpaddw %%ymm5,%%ymm0,%%ymm0 \n" "vpsrlw $0x4,%%ymm0,%%ymm0 \n" "vpackuswb %%ymm0,%%ymm0,%%ymm0 \n" "vpermq $0xd8,%%ymm0,%%ymm0 \n" "vmovdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)), // %3 "r"((intptr_t)(src_stride * 3)) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } #endif // HAS_SCALEROWDOWN4_AVX2 void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "movdqa %0,%%xmm3 \n" "movdqa %1,%%xmm4 \n" "movdqa %2,%%xmm5 \n" : : "m"(kShuf0), // %0 "m"(kShuf1), // %1 "m"(kShuf2) // %2 ); asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm2 \n" "lea 0x20(%0),%0 \n" "movdqa %%xmm2,%%xmm1 \n" "palignr $0x8,%%xmm0,%%xmm1 \n" "pshufb %%xmm3,%%xmm0 \n" "pshufb %%xmm4,%%xmm1 \n" "pshufb %%xmm5,%%xmm2 \n" "movq %%xmm0,(%1) \n" "movq %%xmm1,0x8(%1) \n" "movq %%xmm2,0x10(%1) \n" "lea 0x18(%1),%1 \n" "sub $0x18,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5"); } void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" // kShuf01 "movdqa %1,%%xmm3 \n" // kShuf11 "movdqa %2,%%xmm4 \n" // kShuf21 : : "m"(kShuf01), // %0 "m"(kShuf11), // %1 "m"(kShuf21) // %2 ); asm volatile( "movdqa %0,%%xmm5 \n" // kMadd01 "movdqa %1,%%xmm0 \n" // kMadd11 "movdqa %2,%%xmm1 \n" // kRound34 : : "m"(kMadd01), // %0 "m"(kMadd11), // %1 "m"(kRound34) // %2 ); asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" "movdqu 0x00(%0,%3,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,(%1) \n" "movdqu 0x8(%0),%%xmm6 \n" "movdqu 0x8(%0,%3,1),%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n" "pmaddubsw %%xmm0,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x8(%1) \n" "movdqu 0x10(%0),%%xmm6 \n" "movdqu 0x10(%0,%3,1),%%xmm7 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm4,%%xmm6 \n" "pmaddubsw %4,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x10(%1) \n" "lea 0x18(%1),%1 \n" "sub $0x18,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)), // %3 "m"(kMadd21) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" // kShuf01 "movdqa %1,%%xmm3 \n" // kShuf11 "movdqa %2,%%xmm4 \n" // kShuf21 : : "m"(kShuf01), // %0 "m"(kShuf11), // %1 "m"(kShuf21) // %2 ); asm volatile( "movdqa %0,%%xmm5 \n" // kMadd01 "movdqa %1,%%xmm0 \n" // kMadd11 "movdqa %2,%%xmm1 \n" // kRound34 : : "m"(kMadd01), // %0 "m"(kMadd11), // %1 "m"(kRound34) // %2 ); asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm6 \n" "movdqu 0x00(%0,%3,1),%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n" "pmaddubsw %%xmm5,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,(%1) \n" "movdqu 0x8(%0),%%xmm6 \n" "movdqu 0x8(%0,%3,1),%%xmm7 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n" "pmaddubsw %%xmm0,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x8(%1) \n" "movdqu 0x10(%0),%%xmm6 \n" "movdqu 0x10(%0,%3,1),%%xmm7 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm6,%%xmm7 \n" "pavgb %%xmm7,%%xmm6 \n" "pshufb %%xmm4,%%xmm6 \n" "pmaddubsw %4,%%xmm6 \n" "paddsw %%xmm1,%%xmm6 \n" "psrlw $0x2,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movq %%xmm6,0x10(%1) \n" "lea 0x18(%1),%1 \n" "sub $0x18,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)), // %3 "m"(kMadd21) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "movdqa %3,%%xmm4 \n" "movdqa %4,%%xmm5 \n" LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "pshufb %%xmm4,%%xmm0 \n" "pshufb %%xmm5,%%xmm1 \n" "paddusb %%xmm1,%%xmm0 \n" "movq %%xmm0,(%1) \n" "movhlps %%xmm0,%%xmm1 \n" "movd %%xmm1,0x8(%1) \n" "lea 0xc(%1),%1 \n" "sub $0xc,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "m"(kShuf38a), // %3 "m"(kShuf38b) // %4 : "memory", "cc", "xmm0", "xmm1", "xmm4", "xmm5"); } void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm4 \n" "movdqa %3,%%xmm5 \n" : : "m"(kShufAb0), // %0 "m"(kShufAb1), // %1 "m"(kShufAb2), // %2 "m"(kScaleAb2) // %3 ); asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%3,1),%%xmm1 \n" "lea 0x10(%0),%0 \n" "pavgb %%xmm1,%%xmm0 \n" "movdqa %%xmm0,%%xmm1 \n" "pshufb %%xmm2,%%xmm1 \n" "movdqa %%xmm0,%%xmm6 \n" "pshufb %%xmm3,%%xmm6 \n" "paddusw %%xmm6,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "paddusw %%xmm0,%%xmm1 \n" "pmulhuw %%xmm5,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movd %%xmm1,(%1) \n" "psrlq $0x10,%%xmm1 \n" "movd %%xmm1,0x2(%1) \n" "lea 0x6(%1),%1 \n" "sub $0x6,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movdqa %0,%%xmm2 \n" "movdqa %1,%%xmm3 \n" "movdqa %2,%%xmm4 \n" "pxor %%xmm5,%%xmm5 \n" : : "m"(kShufAc), // %0 "m"(kShufAc3), // %1 "m"(kScaleAc33) // %2 ); asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x00(%0,%3,1),%%xmm6 \n" "movhlps %%xmm0,%%xmm1 \n" "movhlps %%xmm6,%%xmm7 \n" "punpcklbw %%xmm5,%%xmm0 \n" "punpcklbw %%xmm5,%%xmm1 \n" "punpcklbw %%xmm5,%%xmm6 \n" "punpcklbw %%xmm5,%%xmm7 \n" "paddusw %%xmm6,%%xmm0 \n" "paddusw %%xmm7,%%xmm1 \n" "movdqu 0x00(%0,%3,2),%%xmm6 \n" "lea 0x10(%0),%0 \n" "movhlps %%xmm6,%%xmm7 \n" "punpcklbw %%xmm5,%%xmm6 \n" "punpcklbw %%xmm5,%%xmm7 \n" "paddusw %%xmm6,%%xmm0 \n" "paddusw %%xmm7,%%xmm1 \n" "movdqa %%xmm0,%%xmm6 \n" "psrldq $0x2,%%xmm0 \n" "paddusw %%xmm0,%%xmm6 \n" "psrldq $0x2,%%xmm0 \n" "paddusw %%xmm0,%%xmm6 \n" "pshufb %%xmm2,%%xmm6 \n" "movdqa %%xmm1,%%xmm7 \n" "psrldq $0x2,%%xmm1 \n" "paddusw %%xmm1,%%xmm7 \n" "psrldq $0x2,%%xmm1 \n" "paddusw %%xmm1,%%xmm7 \n" "pshufb %%xmm3,%%xmm7 \n" "paddusw %%xmm7,%%xmm6 \n" "pmulhuw %%xmm4,%%xmm6 \n" "packuswb %%xmm6,%%xmm6 \n" "movd %%xmm6,(%1) \n" "psrlq $0x10,%%xmm6 \n" "movd %%xmm6,0x2(%1) \n" "lea 0x6(%1),%1 \n" "sub $0x6,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } // Reads 16xN bytes and produces 16 shorts at a time. void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { asm volatile( "pxor %%xmm5,%%xmm5 \n" // 16 pixel loop. LABELALIGN "1: \n" "movdqu (%0),%%xmm3 \n" "lea 0x10(%0),%0 \n" // src_ptr += 16 "movdqu (%1),%%xmm0 \n" "movdqu 0x10(%1),%%xmm1 \n" "movdqa %%xmm3,%%xmm2 \n" "punpcklbw %%xmm5,%%xmm2 \n" "punpckhbw %%xmm5,%%xmm3 \n" "paddusw %%xmm2,%%xmm0 \n" "paddusw %%xmm3,%%xmm1 \n" "movdqu %%xmm0,(%1) \n" "movdqu %%xmm1,0x10(%1) \n" "lea 0x20(%1),%1 \n" "sub $0x10,%2 \n" "jg 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(src_width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { asm volatile( "vpxor %%ymm5,%%ymm5,%%ymm5 \n" LABELALIGN "1: \n" "vmovdqu (%0),%%ymm3 \n" "lea 0x20(%0),%0 \n" // src_ptr += 32 "vpermq $0xd8,%%ymm3,%%ymm3 \n" "vpunpcklbw %%ymm5,%%ymm3,%%ymm2 \n" "vpunpckhbw %%ymm5,%%ymm3,%%ymm3 \n" "vpaddusw (%1),%%ymm2,%%ymm0 \n" "vpaddusw 0x20(%1),%%ymm3,%%ymm1 \n" "vmovdqu %%ymm0,(%1) \n" "vmovdqu %%ymm1,0x20(%1) \n" "lea 0x40(%1),%1 \n" "sub $0x20,%2 \n" "jg 1b \n" "vzeroupper \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(src_width) // %2 : : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm5"); } #endif // HAS_SCALEADDROW_AVX2 // Constant for making pixels signed to avoid pmaddubsw // saturation. static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { intptr_t x0, x1, temp_pixel; asm volatile( "movd %6,%%xmm2 \n" "movd %7,%%xmm3 \n" "movl $0x04040000,%k2 \n" "movd %k2,%%xmm5 \n" "pcmpeqb %%xmm6,%%xmm6 \n" "psrlw $0x9,%%xmm6 \n" // 0x007f007f "pcmpeqb %%xmm7,%%xmm7 \n" "psrlw $15,%%xmm7 \n" // 0x00010001 "pextrw $0x1,%%xmm2,%k3 \n" "subl $0x2,%5 \n" "jl 29f \n" "movdqa %%xmm2,%%xmm0 \n" "paddd %%xmm3,%%xmm0 \n" "punpckldq %%xmm0,%%xmm2 \n" "punpckldq %%xmm3,%%xmm3 \n" "paddd %%xmm3,%%xmm3 \n" "pextrw $0x3,%%xmm2,%k4 \n" LABELALIGN "2: \n" "movdqa %%xmm2,%%xmm1 \n" "paddd %%xmm3,%%xmm2 \n" "movzwl 0x00(%1,%3,1),%k2 \n" "movd %k2,%%xmm0 \n" "psrlw $0x9,%%xmm1 \n" "movzwl 0x00(%1,%4,1),%k2 \n" "movd %k2,%%xmm4 \n" "pshufb %%xmm5,%%xmm1 \n" "punpcklwd %%xmm4,%%xmm0 \n" "psubb %8,%%xmm0 \n" // make pixels signed. "pxor %%xmm6,%%xmm1 \n" // 128 - f = (f ^ 127 ) + // 1 "paddusb %%xmm7,%%xmm1 \n" "pmaddubsw %%xmm0,%%xmm1 \n" "pextrw $0x1,%%xmm2,%k3 \n" "pextrw $0x3,%%xmm2,%k4 \n" "paddw %9,%%xmm1 \n" // make pixels unsigned. "psrlw $0x7,%%xmm1 \n" "packuswb %%xmm1,%%xmm1 \n" "movd %%xmm1,%k2 \n" "mov %w2,(%0) \n" "lea 0x2(%0),%0 \n" "subl $0x2,%5 \n" "jge 2b \n" LABELALIGN "29: \n" "addl $0x1,%5 \n" "jl 99f \n" "movzwl 0x00(%1,%3,1),%k2 \n" "movd %k2,%%xmm0 \n" "psrlw $0x9,%%xmm2 \n" "pshufb %%xmm5,%%xmm2 \n" "psubb %8,%%xmm0 \n" // make pixels signed. "pxor %%xmm6,%%xmm2 \n" "paddusb %%xmm7,%%xmm2 \n" "pmaddubsw %%xmm0,%%xmm2 \n" "paddw %9,%%xmm2 \n" // make pixels unsigned. "psrlw $0x7,%%xmm2 \n" "packuswb %%xmm2,%%xmm2 \n" "movd %%xmm2,%k2 \n" "mov %b2,(%0) \n" "99: \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "=&a"(temp_pixel), // %2 "=&r"(x0), // %3 "=&r"(x1), // %4 #if defined(__x86_64__) "+rm"(dst_width) // %5 #else "+m"(dst_width) // %5 #endif : "rm"(x), // %6 "rm"(dx), // %7 #if defined(__x86_64__) "x"(kFsub80), // %8 "x"(kFadd40) // %9 #else "m"(kFsub80), // %8 "m"(kFadd40) // %9 #endif : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. void ScaleColsUp2_SSE2(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { (void)x; (void)dx; asm volatile( LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" "movdqa %%xmm0,%%xmm1 \n" "punpcklbw %%xmm0,%%xmm0 \n" "punpckhbw %%xmm1,%%xmm1 \n" "movdqu %%xmm0,(%0) \n" "movdqu %%xmm1,0x10(%0) \n" "lea 0x20(%0),%0 \n" "sub $0x20,%2 \n" "jg 1b \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "shufps $0xdd,%%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "lea 0x20(%0),%0 \n" "movdqa %%xmm0,%%xmm2 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm2 \n" "pavgb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { asm volatile( LABELALIGN "1: \n" "movdqu (%0),%%xmm0 \n" "movdqu 0x10(%0),%%xmm1 \n" "movdqu 0x00(%0,%3,1),%%xmm2 \n" "movdqu 0x10(%0,%3,1),%%xmm3 \n" "lea 0x20(%0),%0 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm2 \n" "pavgb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%1) \n" "lea 0x10(%1),%1 \n" "sub $0x4,%2 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 : "r"((intptr_t)(src_stride)) // %3 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Reads 4 pixels at a time. // Alignment requirement: dst_argb 16 byte aligned. void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; (void)src_stride; asm volatile( "lea 0x00(,%1,4),%1 \n" "lea 0x00(%1,%1,2),%4 \n" LABELALIGN "1: \n" "movd (%0),%%xmm0 \n" "movd 0x00(%0,%1,1),%%xmm1 \n" "punpckldq %%xmm1,%%xmm0 \n" "movd 0x00(%0,%1,2),%%xmm2 \n" "movd 0x00(%0,%4,1),%%xmm3 \n" "lea 0x00(%0,%1,4),%0 \n" "punpckldq %%xmm3,%%xmm2 \n" "punpcklqdq %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(src_stepx_x4), // %1 "+r"(dst_argb), // %2 "+r"(dst_width), // %3 "=&r"(src_stepx_x12) // %4 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } // Blends four 2x2 to 4x1. // Alignment requirement: dst_argb 16 byte aligned. void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { intptr_t src_stepx_x4 = (intptr_t)(src_stepx); intptr_t src_stepx_x12; intptr_t row1 = (intptr_t)(src_stride); asm volatile( "lea 0x00(,%1,4),%1 \n" "lea 0x00(%1,%1,2),%4 \n" "lea 0x00(%0,%5,1),%5 \n" LABELALIGN "1: \n" "movq (%0),%%xmm0 \n" "movhps 0x00(%0,%1,1),%%xmm0 \n" "movq 0x00(%0,%1,2),%%xmm1 \n" "movhps 0x00(%0,%4,1),%%xmm1 \n" "lea 0x00(%0,%1,4),%0 \n" "movq (%5),%%xmm2 \n" "movhps 0x00(%5,%1,1),%%xmm2 \n" "movq 0x00(%5,%1,2),%%xmm3 \n" "movhps 0x00(%5,%4,1),%%xmm3 \n" "lea 0x00(%5,%1,4),%5 \n" "pavgb %%xmm2,%%xmm0 \n" "pavgb %%xmm3,%%xmm1 \n" "movdqa %%xmm0,%%xmm2 \n" "shufps $0x88,%%xmm1,%%xmm0 \n" "shufps $0xdd,%%xmm1,%%xmm2 \n" "pavgb %%xmm2,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%3 \n" "jg 1b \n" : "+r"(src_argb), // %0 "+r"(src_stepx_x4), // %1 "+r"(dst_argb), // %2 "+rm"(dst_width), // %3 "=&r"(src_stepx_x12), // %4 "+r"(row1) // %5 ::"memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3"); } void ScaleARGBCols_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { intptr_t x0, x1; asm volatile( "movd %5,%%xmm2 \n" "movd %6,%%xmm3 \n" "pshufd $0x0,%%xmm2,%%xmm2 \n" "pshufd $0x11,%%xmm3,%%xmm0 \n" "paddd %%xmm0,%%xmm2 \n" "paddd %%xmm3,%%xmm3 \n" "pshufd $0x5,%%xmm3,%%xmm0 \n" "paddd %%xmm0,%%xmm2 \n" "paddd %%xmm3,%%xmm3 \n" "pshufd $0x0,%%xmm3,%%xmm3 \n" "pextrw $0x1,%%xmm2,%k0 \n" "pextrw $0x3,%%xmm2,%k1 \n" "cmp $0x0,%4 \n" "jl 99f \n" "sub $0x4,%4 \n" "jl 49f \n" LABELALIGN "40: \n" "movd 0x00(%3,%0,4),%%xmm0 \n" "movd 0x00(%3,%1,4),%%xmm1 \n" "pextrw $0x5,%%xmm2,%k0 \n" "pextrw $0x7,%%xmm2,%k1 \n" "paddd %%xmm3,%%xmm2 \n" "punpckldq %%xmm1,%%xmm0 \n" "movd 0x00(%3,%0,4),%%xmm1 \n" "movd 0x00(%3,%1,4),%%xmm4 \n" "pextrw $0x1,%%xmm2,%k0 \n" "pextrw $0x3,%%xmm2,%k1 \n" "punpckldq %%xmm4,%%xmm1 \n" "punpcklqdq %%xmm1,%%xmm0 \n" "movdqu %%xmm0,(%2) \n" "lea 0x10(%2),%2 \n" "sub $0x4,%4 \n" "jge 40b \n" "49: \n" "test $0x2,%4 \n" "je 29f \n" "movd 0x00(%3,%0,4),%%xmm0 \n" "movd 0x00(%3,%1,4),%%xmm1 \n" "pextrw $0x5,%%xmm2,%k0 \n" "punpckldq %%xmm1,%%xmm0 \n" "movq %%xmm0,(%2) \n" "lea 0x8(%2),%2 \n" "29: \n" "test $0x1,%4 \n" "je 99f \n" "movd 0x00(%3,%0,4),%%xmm0 \n" "movd %%xmm0,(%2) \n" "99: \n" : "=&a"(x0), // %0 "=&d"(x1), // %1 "+r"(dst_argb), // %2 "+r"(src_argb), // %3 "+r"(dst_width) // %4 : "rm"(x), // %5 "rm"(dx) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4"); } // Reads 4 pixels, duplicates them and writes 8 pixels. // Alignment requirement: src_argb 16 byte aligned, dst_argb 16 byte aligned. void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { (void)x; (void)dx; asm volatile( LABELALIGN "1: \n" "movdqu (%1),%%xmm0 \n" "lea 0x10(%1),%1 \n" "movdqa %%xmm0,%%xmm1 \n" "punpckldq %%xmm0,%%xmm0 \n" "punpckhdq %%xmm1,%%xmm1 \n" "movdqu %%xmm0,(%0) \n" "movdqu %%xmm1,0x10(%0) \n" "lea 0x20(%0),%0 \n" "sub $0x8,%2 \n" "jg 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+r"(dst_width) // %2 ::"memory", "cc", "xmm0", "xmm1"); } // Shuffle table for arranging 2 pixels into pairs for pmaddubsw static const uvec8 kShuffleColARGB = { 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each static const uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; // Bilinear row filtering combines 4x2 -> 4x1. SSSE3 version void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { intptr_t x0, x1; asm volatile( "movdqa %0,%%xmm4 \n" "movdqa %1,%%xmm5 \n" : : "m"(kShuffleColARGB), // %0 "m"(kShuffleFractions) // %1 ); asm volatile( "movd %5,%%xmm2 \n" "movd %6,%%xmm3 \n" "pcmpeqb %%xmm6,%%xmm6 \n" "psrlw $0x9,%%xmm6 \n" "pextrw $0x1,%%xmm2,%k3 \n" "sub $0x2,%2 \n" "jl 29f \n" "movdqa %%xmm2,%%xmm0 \n" "paddd %%xmm3,%%xmm0 \n" "punpckldq %%xmm0,%%xmm2 \n" "punpckldq %%xmm3,%%xmm3 \n" "paddd %%xmm3,%%xmm3 \n" "pextrw $0x3,%%xmm2,%k4 \n" LABELALIGN "2: \n" "movdqa %%xmm2,%%xmm1 \n" "paddd %%xmm3,%%xmm2 \n" "movq 0x00(%1,%3,4),%%xmm0 \n" "psrlw $0x9,%%xmm1 \n" "movhps 0x00(%1,%4,4),%%xmm0 \n" "pshufb %%xmm5,%%xmm1 \n" "pshufb %%xmm4,%%xmm0 \n" "pxor %%xmm6,%%xmm1 \n" "pmaddubsw %%xmm1,%%xmm0 \n" "psrlw $0x7,%%xmm0 \n" "pextrw $0x1,%%xmm2,%k3 \n" "pextrw $0x3,%%xmm2,%k4 \n" "packuswb %%xmm0,%%xmm0 \n" "movq %%xmm0,(%0) \n" "lea 0x8(%0),%0 \n" "sub $0x2,%2 \n" "jge 2b \n" LABELALIGN "29: \n" "add $0x1,%2 \n" "jl 99f \n" "psrlw $0x9,%%xmm2 \n" "movq 0x00(%1,%3,4),%%xmm0 \n" "pshufb %%xmm5,%%xmm2 \n" "pshufb %%xmm4,%%xmm0 \n" "pxor %%xmm6,%%xmm2 \n" "pmaddubsw %%xmm2,%%xmm0 \n" "psrlw $0x7,%%xmm0 \n" "packuswb %%xmm0,%%xmm0 \n" "movd %%xmm0,(%0) \n" LABELALIGN "99: \n" // clang-format error. : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+rm"(dst_width), // %2 "=&r"(x0), // %3 "=&r"(x1) // %4 : "rm"(x), // %5 "rm"(dx) // %6 : "memory", "cc", "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6"); } // Divide num by div and return as 16.16 fixed point result. int FixedDiv_X86(int num, int div) { asm volatile( "cdq \n" "shld $0x10,%%eax,%%edx \n" "shl $0x10,%%eax \n" "idiv %1 \n" "mov %0, %%eax \n" : "+a"(num) // %0 : "c"(div) // %1 : "memory", "cc", "edx"); return num; } // Divide num - 1 by div - 1 and return as 16.16 fixed point result. int FixedDiv1_X86(int num, int div) { asm volatile( "cdq \n" "shld $0x10,%%eax,%%edx \n" "shl $0x10,%%eax \n" "sub $0x10001,%%eax \n" "sbb $0x0,%%edx \n" "sub $0x1,%1 \n" "idiv %1 \n" "mov %0, %%eax \n" : "+a"(num) // %0 : "c"(div) // %1 : "memory", "cc", "edx"); return num; } #endif // defined(__x86_64__) || defined(__i386__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_msa.cc000066400000000000000000001035611357355204000224410ustar00rootroot00000000000000/* * Copyright 2016 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "libyuv/scale_row.h" // This module is for GCC MSA #if !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) #include "libyuv/macros_msa.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif #define LOAD_INDEXED_DATA(srcp, indx0, out0) \ { \ out0[0] = srcp[indx0[0]]; \ out0[1] = srcp[indx0[1]]; \ out0[2] = srcp[indx0[2]]; \ out0[3] = srcp[indx0[3]]; \ } void ScaleARGBRowDown2_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { int x; v16u8 src0, src1, dst0; (void)src_stride; for (x = 0; x < dst_width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); dst0 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); ST_UB(dst0, dst_argb); src_argb += 32; dst_argb += 16; } } void ScaleARGBRowDown2Linear_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { int x; v16u8 src0, src1, vec0, vec1, dst0; (void)src_stride; for (x = 0; x < dst_width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)src_argb, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_argb, 16); vec0 = (v16u8)__msa_pckev_w((v4i32)src1, (v4i32)src0); vec1 = (v16u8)__msa_pckod_w((v4i32)src1, (v4i32)src0); dst0 = (v16u8)__msa_aver_u_b((v16u8)vec0, (v16u8)vec1); ST_UB(dst0, dst_argb); src_argb += 32; dst_argb += 16; } } void ScaleARGBRowDown2Box_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { int x; const uint8_t* s = src_argb; const uint8_t* t = src_argb + src_stride; v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0; v8u16 reg0, reg1, reg2, reg3; v16i8 shuffler = {0, 4, 1, 5, 2, 6, 3, 7, 8, 12, 9, 13, 10, 14, 11, 15}; for (x = 0; x < dst_width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); vec0 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src0, (v16i8)src0); vec1 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src1, (v16i8)src1); vec2 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src2, (v16i8)src2); vec3 = (v16u8)__msa_vshf_b(shuffler, (v16i8)src3, (v16i8)src3); reg0 = __msa_hadd_u_h(vec0, vec0); reg1 = __msa_hadd_u_h(vec1, vec1); reg2 = __msa_hadd_u_h(vec2, vec2); reg3 = __msa_hadd_u_h(vec3, vec3); reg0 += reg2; reg1 += reg3; reg0 = (v8u16)__msa_srari_h((v8i16)reg0, 2); reg1 = (v8u16)__msa_srari_h((v8i16)reg1, 2); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); ST_UB(dst0, dst_argb); s += 32; t += 32; dst_argb += 16; } } void ScaleARGBRowDownEven_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int32_t src_stepx, uint8_t* dst_argb, int dst_width) { int x; int32_t stepx = src_stepx * 4; int32_t data0, data1, data2, data3; (void)src_stride; for (x = 0; x < dst_width; x += 4) { data0 = LW(src_argb); data1 = LW(src_argb + stepx); data2 = LW(src_argb + stepx * 2); data3 = LW(src_argb + stepx * 3); SW(data0, dst_argb); SW(data1, dst_argb + 4); SW(data2, dst_argb + 8); SW(data3, dst_argb + 12); src_argb += stepx * 4; dst_argb += 16; } } void ScaleARGBRowDownEvenBox_MSA(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { int x; const uint8_t* nxt_argb = src_argb + src_stride; int32_t stepx = src_stepx * 4; int64_t data0, data1, data2, data3; v16u8 src0 = {0}, src1 = {0}, src2 = {0}, src3 = {0}; v16u8 vec0, vec1, vec2, vec3; v8u16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 dst0; for (x = 0; x < dst_width; x += 4) { data0 = LD(src_argb); data1 = LD(src_argb + stepx); data2 = LD(src_argb + stepx * 2); data3 = LD(src_argb + stepx * 3); src0 = (v16u8)__msa_insert_d((v2i64)src0, 0, data0); src0 = (v16u8)__msa_insert_d((v2i64)src0, 1, data1); src1 = (v16u8)__msa_insert_d((v2i64)src1, 0, data2); src1 = (v16u8)__msa_insert_d((v2i64)src1, 1, data3); data0 = LD(nxt_argb); data1 = LD(nxt_argb + stepx); data2 = LD(nxt_argb + stepx * 2); data3 = LD(nxt_argb + stepx * 3); src2 = (v16u8)__msa_insert_d((v2i64)src2, 0, data0); src2 = (v16u8)__msa_insert_d((v2i64)src2, 1, data1); src3 = (v16u8)__msa_insert_d((v2i64)src3, 0, data2); src3 = (v16u8)__msa_insert_d((v2i64)src3, 1, data3); vec0 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); vec1 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); vec2 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); vec3 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); reg0 = __msa_hadd_u_h(vec0, vec0); reg1 = __msa_hadd_u_h(vec1, vec1); reg2 = __msa_hadd_u_h(vec2, vec2); reg3 = __msa_hadd_u_h(vec3, vec3); reg4 = (v8u16)__msa_pckev_d((v2i64)reg2, (v2i64)reg0); reg5 = (v8u16)__msa_pckev_d((v2i64)reg3, (v2i64)reg1); reg6 = (v8u16)__msa_pckod_d((v2i64)reg2, (v2i64)reg0); reg7 = (v8u16)__msa_pckod_d((v2i64)reg3, (v2i64)reg1); reg4 += reg6; reg5 += reg7; reg4 = (v8u16)__msa_srari_h((v8i16)reg4, 2); reg5 = (v8u16)__msa_srari_h((v8i16)reg5, 2); dst0 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); ST_UB(dst0, dst_argb); src_argb += stepx * 4; nxt_argb += stepx * 4; dst_argb += 16; } } void ScaleRowDown2_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; v16u8 src0, src1, src2, src3, dst0, dst1; (void)src_stride; for (x = 0; x < dst_width; x += 32) { src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); dst0 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); dst1 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); ST_UB2(dst0, dst1, dst, 16); src_ptr += 64; dst += 32; } } void ScaleRowDown2Linear_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; v16u8 src0, src1, src2, src3, vec0, vec1, vec2, vec3, dst0, dst1; (void)src_stride; for (x = 0; x < dst_width; x += 32) { src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec2 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); vec1 = (v16u8)__msa_pckod_b((v16i8)src1, (v16i8)src0); vec3 = (v16u8)__msa_pckod_b((v16i8)src3, (v16i8)src2); dst0 = __msa_aver_u_b(vec1, vec0); dst1 = __msa_aver_u_b(vec3, vec2); ST_UB2(dst0, dst1, dst, 16); src_ptr += 64; dst += 32; } } void ScaleRowDown2Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1; v8u16 vec0, vec1, vec2, vec3; for (x = 0; x < dst_width; x += 32) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); vec0 = __msa_hadd_u_h(src0, src0); vec1 = __msa_hadd_u_h(src1, src1); vec2 = __msa_hadd_u_h(src2, src2); vec3 = __msa_hadd_u_h(src3, src3); vec0 += __msa_hadd_u_h(src4, src4); vec1 += __msa_hadd_u_h(src5, src5); vec2 += __msa_hadd_u_h(src6, src6); vec3 += __msa_hadd_u_h(src7, src7); vec0 = (v8u16)__msa_srari_h((v8i16)vec0, 2); vec1 = (v8u16)__msa_srari_h((v8i16)vec1, 2); vec2 = (v8u16)__msa_srari_h((v8i16)vec2, 2); vec3 = (v8u16)__msa_srari_h((v8i16)vec3, 2); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); dst1 = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); ST_UB2(dst0, dst1, dst, 16); s += 64; t += 64; dst += 32; } } void ScaleRowDown4_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; v16u8 src0, src1, src2, src3, vec0, vec1, dst0; (void)src_stride; for (x = 0; x < dst_width; x += 16) { src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); vec0 = (v16u8)__msa_pckev_b((v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_pckev_b((v16i8)src3, (v16i8)src2); dst0 = (v16u8)__msa_pckod_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst); src_ptr += 64; dst += 16; } } void ScaleRowDown4Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; const uint8_t* s = src_ptr; const uint8_t* t0 = s + src_stride; const uint8_t* t1 = s + src_stride * 2; const uint8_t* t2 = s + src_stride * 3; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0; v8u16 vec0, vec1, vec2, vec3; v4u32 reg0, reg1, reg2, reg3; for (x = 0; x < dst_width; x += 16) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); src4 = (v16u8)__msa_ld_b((v16i8*)t0, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t0, 16); src6 = (v16u8)__msa_ld_b((v16i8*)t0, 32); src7 = (v16u8)__msa_ld_b((v16i8*)t0, 48); vec0 = __msa_hadd_u_h(src0, src0); vec1 = __msa_hadd_u_h(src1, src1); vec2 = __msa_hadd_u_h(src2, src2); vec3 = __msa_hadd_u_h(src3, src3); vec0 += __msa_hadd_u_h(src4, src4); vec1 += __msa_hadd_u_h(src5, src5); vec2 += __msa_hadd_u_h(src6, src6); vec3 += __msa_hadd_u_h(src7, src7); src0 = (v16u8)__msa_ld_b((v16i8*)t1, 0); src1 = (v16u8)__msa_ld_b((v16i8*)t1, 16); src2 = (v16u8)__msa_ld_b((v16i8*)t1, 32); src3 = (v16u8)__msa_ld_b((v16i8*)t1, 48); src4 = (v16u8)__msa_ld_b((v16i8*)t2, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t2, 16); src6 = (v16u8)__msa_ld_b((v16i8*)t2, 32); src7 = (v16u8)__msa_ld_b((v16i8*)t2, 48); vec0 += __msa_hadd_u_h(src0, src0); vec1 += __msa_hadd_u_h(src1, src1); vec2 += __msa_hadd_u_h(src2, src2); vec3 += __msa_hadd_u_h(src3, src3); vec0 += __msa_hadd_u_h(src4, src4); vec1 += __msa_hadd_u_h(src5, src5); vec2 += __msa_hadd_u_h(src6, src6); vec3 += __msa_hadd_u_h(src7, src7); reg0 = __msa_hadd_u_w(vec0, vec0); reg1 = __msa_hadd_u_w(vec1, vec1); reg2 = __msa_hadd_u_w(vec2, vec2); reg3 = __msa_hadd_u_w(vec3, vec3); reg0 = (v4u32)__msa_srari_w((v4i32)reg0, 4); reg1 = (v4u32)__msa_srari_w((v4i32)reg1, 4); reg2 = (v4u32)__msa_srari_w((v4i32)reg2, 4); reg3 = (v4u32)__msa_srari_w((v4i32)reg3, 4); vec0 = (v8u16)__msa_pckev_h((v8i16)reg1, (v8i16)reg0); vec1 = (v8u16)__msa_pckev_h((v8i16)reg3, (v8i16)reg2); dst0 = (v16u8)__msa_pckev_b((v16i8)vec1, (v16i8)vec0); ST_UB(dst0, dst); s += 64; t0 += 64; t1 += 64; t2 += 64; dst += 16; } } void ScaleRowDown38_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x, width; uint64_t dst0; uint32_t dst1; v16u8 src0, src1, vec0; v16i8 mask = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; (void)src_stride; assert(dst_width % 3 == 0); width = dst_width / 3; for (x = 0; x < width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); vec0 = (v16u8)__msa_vshf_b(mask, (v16i8)src1, (v16i8)src0); dst0 = __msa_copy_u_d((v2i64)vec0, 0); dst1 = __msa_copy_u_w((v4i32)vec0, 2); SD(dst0, dst); SW(dst1, dst + 8); src_ptr += 32; dst += 12; } } void ScaleRowDown38_2_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { int x, width; const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; uint64_t dst0; uint32_t dst1; v16u8 src0, src1, src2, src3, out; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; v8i16 zero = {0}; v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); v4u32 const_0x4000 = (v4u32)__msa_fill_w(0x4000); assert((dst_width % 3 == 0) && (dst_width > 0)); width = dst_width / 3; for (x = 0; x < width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)t, 0); src3 = (v16u8)__msa_ld_b((v16i8*)t, 16); vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); vec4 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec0); vec5 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec1); vec6 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec2); vec7 = (v8u16)__msa_vshf_h(mask, zero, (v8i16)vec3); vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); tmp0 = __msa_hadd_u_w(vec4, vec4); tmp1 = __msa_hadd_u_w(vec5, vec5); tmp2 = __msa_hadd_u_w(vec6, vec6); tmp3 = __msa_hadd_u_w(vec7, vec7); tmp4 = __msa_hadd_u_w(vec0, vec0); vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); tmp0 = __msa_hadd_u_w(vec0, vec0); tmp1 = __msa_hadd_u_w(vec1, vec1); tmp0 *= const_0x2AAA; tmp1 *= const_0x2AAA; tmp4 *= const_0x4000; tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); dst0 = __msa_copy_u_d((v2i64)out, 0); dst1 = __msa_copy_u_w((v4i32)out, 2); SD(dst0, dst_ptr); SW(dst1, dst_ptr + 8); s += 32; t += 32; dst_ptr += 12; } } void ScaleRowDown38_3_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { int x, width; const uint8_t* s = src_ptr; const uint8_t* t0 = s + src_stride; const uint8_t* t1 = s + src_stride * 2; uint64_t dst0; uint32_t dst1; v16u8 src0, src1, src2, src3, src4, src5, out; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4u32 tmp0, tmp1, tmp2, tmp3, tmp4; v8u16 zero = {0}; v8i16 mask = {0, 1, 2, 8, 3, 4, 5, 9}; v16i8 dst_mask = {0, 2, 16, 4, 6, 18, 8, 10, 20, 12, 14, 22, 0, 0, 0, 0}; v4u32 const_0x1C71 = (v4u32)__msa_fill_w(0x1C71); v4u32 const_0x2AAA = (v4u32)__msa_fill_w(0x2AAA); assert((dst_width % 3 == 0) && (dst_width > 0)); width = dst_width / 3; for (x = 0; x < width; x += 4) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)t0, 0); src3 = (v16u8)__msa_ld_b((v16i8*)t0, 16); src4 = (v16u8)__msa_ld_b((v16i8*)t1, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t1, 16); vec0 = (v8u16)__msa_ilvr_b((v16i8)src2, (v16i8)src0); vec1 = (v8u16)__msa_ilvl_b((v16i8)src2, (v16i8)src0); vec2 = (v8u16)__msa_ilvr_b((v16i8)src3, (v16i8)src1); vec3 = (v8u16)__msa_ilvl_b((v16i8)src3, (v16i8)src1); vec4 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src4); vec5 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src4); vec6 = (v8u16)__msa_ilvr_b((v16i8)zero, (v16i8)src5); vec7 = (v8u16)__msa_ilvl_b((v16i8)zero, (v16i8)src5); vec0 = __msa_hadd_u_h((v16u8)vec0, (v16u8)vec0); vec1 = __msa_hadd_u_h((v16u8)vec1, (v16u8)vec1); vec2 = __msa_hadd_u_h((v16u8)vec2, (v16u8)vec2); vec3 = __msa_hadd_u_h((v16u8)vec3, (v16u8)vec3); vec0 += __msa_hadd_u_h((v16u8)vec4, (v16u8)vec4); vec1 += __msa_hadd_u_h((v16u8)vec5, (v16u8)vec5); vec2 += __msa_hadd_u_h((v16u8)vec6, (v16u8)vec6); vec3 += __msa_hadd_u_h((v16u8)vec7, (v16u8)vec7); vec4 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec0); vec5 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec1); vec6 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec2); vec7 = (v8u16)__msa_vshf_h(mask, (v8i16)zero, (v8i16)vec3); vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); vec1 = (v8u16)__msa_pckod_w((v4i32)vec3, (v4i32)vec2); vec0 = (v8u16)__msa_pckod_w((v4i32)vec1, (v4i32)vec0); tmp0 = __msa_hadd_u_w(vec4, vec4); tmp1 = __msa_hadd_u_w(vec5, vec5); tmp2 = __msa_hadd_u_w(vec6, vec6); tmp3 = __msa_hadd_u_w(vec7, vec7); tmp4 = __msa_hadd_u_w(vec0, vec0); vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); tmp0 = __msa_hadd_u_w(vec0, vec0); tmp1 = __msa_hadd_u_w(vec1, vec1); tmp0 *= const_0x1C71; tmp1 *= const_0x1C71; tmp4 *= const_0x2AAA; tmp0 = (v4u32)__msa_srai_w((v4i32)tmp0, 16); tmp1 = (v4u32)__msa_srai_w((v4i32)tmp1, 16); tmp4 = (v4u32)__msa_srai_w((v4i32)tmp4, 16); vec0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); vec1 = (v8u16)__msa_pckev_h((v8i16)tmp4, (v8i16)tmp4); out = (v16u8)__msa_vshf_b(dst_mask, (v16i8)vec1, (v16i8)vec0); dst0 = __msa_copy_u_d((v2i64)out, 0); dst1 = __msa_copy_u_w((v4i32)out, 2); SD(dst0, dst_ptr); SW(dst1, dst_ptr + 8); s += 32; t0 += 32; t1 += 32; dst_ptr += 12; } } void ScaleAddRow_MSA(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { int x; v16u8 src0; v8u16 dst0, dst1; v16i8 zero = {0}; assert(src_width > 0); for (x = 0; x < src_width; x += 16) { src0 = LD_UB(src_ptr); dst0 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 0); dst1 = (v8u16)__msa_ld_h((v8i16*)dst_ptr, 16); dst0 += (v8u16)__msa_ilvr_b(zero, (v16i8)src0); dst1 += (v8u16)__msa_ilvl_b(zero, (v16i8)src0); ST_UH2(dst0, dst1, dst_ptr, 8); src_ptr += 16; dst_ptr += 16; } } void ScaleFilterCols_MSA(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int j; v4i32 vec_x = __msa_fill_w(x); v4i32 vec_dx = __msa_fill_w(dx); v4i32 vec_const = {0, 1, 2, 3}; v4i32 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, vec8, vec9; v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8u16 reg0, reg1; v16u8 dst0; v4i32 const_0xFFFF = __msa_fill_w(0xFFFF); v4i32 const_0x40 = __msa_fill_w(0x40); vec0 = vec_dx * vec_const; vec1 = vec_dx * 4; vec_x += vec0; for (j = 0; j < dst_width - 1; j += 16) { vec2 = vec_x >> 16; vec6 = vec_x & const_0xFFFF; vec_x += vec1; vec3 = vec_x >> 16; vec7 = vec_x & const_0xFFFF; vec_x += vec1; vec4 = vec_x >> 16; vec8 = vec_x & const_0xFFFF; vec_x += vec1; vec5 = vec_x >> 16; vec9 = vec_x & const_0xFFFF; vec_x += vec1; vec6 >>= 9; vec7 >>= 9; vec8 >>= 9; vec9 >>= 9; LOAD_INDEXED_DATA(src_ptr, vec2, tmp0); LOAD_INDEXED_DATA(src_ptr, vec3, tmp1); LOAD_INDEXED_DATA(src_ptr, vec4, tmp2); LOAD_INDEXED_DATA(src_ptr, vec5, tmp3); vec2 += 1; vec3 += 1; vec4 += 1; vec5 += 1; LOAD_INDEXED_DATA(src_ptr, vec2, tmp4); LOAD_INDEXED_DATA(src_ptr, vec3, tmp5); LOAD_INDEXED_DATA(src_ptr, vec4, tmp6); LOAD_INDEXED_DATA(src_ptr, vec5, tmp7); tmp4 -= tmp0; tmp5 -= tmp1; tmp6 -= tmp2; tmp7 -= tmp3; tmp4 *= vec6; tmp5 *= vec7; tmp6 *= vec8; tmp7 *= vec9; tmp4 += const_0x40; tmp5 += const_0x40; tmp6 += const_0x40; tmp7 += const_0x40; tmp4 >>= 7; tmp5 >>= 7; tmp6 >>= 7; tmp7 >>= 7; tmp0 += tmp4; tmp1 += tmp5; tmp2 += tmp6; tmp3 += tmp7; reg0 = (v8u16)__msa_pckev_h((v8i16)tmp1, (v8i16)tmp0); reg1 = (v8u16)__msa_pckev_h((v8i16)tmp3, (v8i16)tmp2); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); __msa_st_b(dst0, dst_ptr, 0); dst_ptr += 16; } } void ScaleARGBCols_MSA(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint32_t* src = (const uint32_t*)(src_argb); uint32_t* dst = (uint32_t*)(dst_argb); int j; v4i32 x_vec = __msa_fill_w(x); v4i32 dx_vec = __msa_fill_w(dx); v4i32 const_vec = {0, 1, 2, 3}; v4i32 vec0, vec1, vec2; v4i32 dst0; vec0 = dx_vec * const_vec; vec1 = dx_vec * 4; x_vec += vec0; for (j = 0; j < dst_width; j += 4) { vec2 = x_vec >> 16; x_vec += vec1; LOAD_INDEXED_DATA(src, vec2, dst0); __msa_st_w(dst0, dst, 0); dst += 4; } } void ScaleARGBFilterCols_MSA(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint32_t* src = (const uint32_t*)(src_argb); int j; v4u32 src0, src1, src2, src3; v4u32 vec0, vec1, vec2, vec3; v16u8 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v16u8 mult0, mult1, mult2, mult3; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 dst0, dst1; v4u32 vec_x = (v4u32)__msa_fill_w(x); v4u32 vec_dx = (v4u32)__msa_fill_w(dx); v4u32 vec_const = {0, 1, 2, 3}; v16u8 const_0x7f = (v16u8)__msa_fill_b(0x7f); vec0 = vec_dx * vec_const; vec1 = vec_dx * 4; vec_x += vec0; for (j = 0; j < dst_width - 1; j += 8) { vec2 = vec_x >> 16; reg0 = (v16u8)(vec_x >> 9); vec_x += vec1; vec3 = vec_x >> 16; reg1 = (v16u8)(vec_x >> 9); vec_x += vec1; reg0 = reg0 & const_0x7f; reg1 = reg1 & const_0x7f; reg0 = (v16u8)__msa_shf_b((v16i8)reg0, 0); reg1 = (v16u8)__msa_shf_b((v16i8)reg1, 0); reg2 = reg0 ^ const_0x7f; reg3 = reg1 ^ const_0x7f; mult0 = (v16u8)__msa_ilvr_b((v16i8)reg0, (v16i8)reg2); mult1 = (v16u8)__msa_ilvl_b((v16i8)reg0, (v16i8)reg2); mult2 = (v16u8)__msa_ilvr_b((v16i8)reg1, (v16i8)reg3); mult3 = (v16u8)__msa_ilvl_b((v16i8)reg1, (v16i8)reg3); LOAD_INDEXED_DATA(src, vec2, src0); LOAD_INDEXED_DATA(src, vec3, src1); vec2 += 1; vec3 += 1; LOAD_INDEXED_DATA(src, vec2, src2); LOAD_INDEXED_DATA(src, vec3, src3); reg4 = (v16u8)__msa_ilvr_b((v16i8)src2, (v16i8)src0); reg5 = (v16u8)__msa_ilvl_b((v16i8)src2, (v16i8)src0); reg6 = (v16u8)__msa_ilvr_b((v16i8)src3, (v16i8)src1); reg7 = (v16u8)__msa_ilvl_b((v16i8)src3, (v16i8)src1); tmp0 = __msa_dotp_u_h(reg4, mult0); tmp1 = __msa_dotp_u_h(reg5, mult1); tmp2 = __msa_dotp_u_h(reg6, mult2); tmp3 = __msa_dotp_u_h(reg7, mult3); tmp0 >>= 7; tmp1 >>= 7; tmp2 >>= 7; tmp3 >>= 7; dst0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); dst1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); __msa_st_b(dst0, dst_argb, 0); __msa_st_b(dst1, dst_argb, 16); dst_argb += 32; } } void ScaleRowDown34_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { int x; (void)src_stride; v16u8 src0, src1, src2, src3; v16u8 vec0, vec1, vec2; v16i8 mask0 = {0, 1, 3, 4, 5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20}; v16i8 mask1 = {5, 7, 8, 9, 11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25}; v16i8 mask2 = {11, 12, 13, 15, 16, 17, 19, 20, 21, 23, 24, 25, 27, 28, 29, 31}; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 48) { src0 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 0); src1 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 16); src2 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 32); src3 = (v16u8)__msa_ld_b((v16i8*)src_ptr, 48); vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src1, (v16i8)src0); vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src2, (v16i8)src1); vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src2); __msa_st_b((v16i8)vec0, dst, 0); __msa_st_b((v16i8)vec1, dst, 16); __msa_st_b((v16i8)vec2, dst, 32); src_ptr += 64; dst += 48; } } void ScaleRowDown34_0_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; v16u8 vec0, vec1, vec2, vec3, vec4, vec5; v16u8 vec6, vec7, vec8, vec9, vec10, vec11; v8i16 reg0, reg1, reg2, reg3, reg4, reg5; v8i16 reg6, reg7, reg8, reg9, reg10, reg11; v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, 16, 17, 17, 18, 18, 19, 20, 21}; v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 48) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); reg0 = __msa_srar_h(reg0, shft0); reg1 = __msa_srar_h(reg1, shft1); reg2 = __msa_srar_h(reg2, shft2); reg3 = __msa_srar_h(reg3, shft0); reg4 = __msa_srar_h(reg4, shft1); reg5 = __msa_srar_h(reg5, shft2); reg6 = __msa_srar_h(reg6, shft0); reg7 = __msa_srar_h(reg7, shft1); reg8 = __msa_srar_h(reg8, shft2); reg9 = __msa_srar_h(reg9, shft0); reg10 = __msa_srar_h(reg10, shft1); reg11 = __msa_srar_h(reg11, shft2); reg0 = reg0 * 3 + reg6; reg1 = reg1 * 3 + reg7; reg2 = reg2 * 3 + reg8; reg3 = reg3 * 3 + reg9; reg4 = reg4 * 3 + reg10; reg5 = reg5 * 3 + reg11; reg0 = __msa_srari_h(reg0, 2); reg1 = __msa_srari_h(reg1, 2); reg2 = __msa_srari_h(reg2, 2); reg3 = __msa_srari_h(reg3, 2); reg4 = __msa_srari_h(reg4, 2); reg5 = __msa_srari_h(reg5, 2); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); __msa_st_b((v16i8)dst0, d, 0); __msa_st_b((v16i8)dst1, d, 16); __msa_st_b((v16i8)dst2, d, 32); s += 64; t += 64; d += 48; } } void ScaleRowDown34_1_Box_MSA(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* d, int dst_width) { const uint8_t* s = src_ptr; const uint8_t* t = src_ptr + src_stride; int x; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, dst0, dst1, dst2; v16u8 vec0, vec1, vec2, vec3, vec4, vec5; v16u8 vec6, vec7, vec8, vec9, vec10, vec11; v8i16 reg0, reg1, reg2, reg3, reg4, reg5; v8i16 reg6, reg7, reg8, reg9, reg10, reg11; v16u8 const0 = {3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1}; v16u8 const1 = {1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1}; v16u8 const2 = {1, 1, 1, 3, 3, 1, 1, 1, 1, 3, 3, 1, 1, 1, 1, 3}; v16i8 mask0 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; v16i8 mask1 = {10, 11, 12, 13, 13, 14, 14, 15, 16, 17, 17, 18, 18, 19, 20, 21}; v16i8 mask2 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; v8i16 shft0 = {2, 1, 2, 2, 1, 2, 2, 1}; v8i16 shft1 = {2, 2, 1, 2, 2, 1, 2, 2}; v8i16 shft2 = {1, 2, 2, 1, 2, 2, 1, 2}; assert((dst_width % 3 == 0) && (dst_width > 0)); for (x = 0; x < dst_width; x += 48) { src0 = (v16u8)__msa_ld_b((v16i8*)s, 0); src1 = (v16u8)__msa_ld_b((v16i8*)s, 16); src2 = (v16u8)__msa_ld_b((v16i8*)s, 32); src3 = (v16u8)__msa_ld_b((v16i8*)s, 48); src4 = (v16u8)__msa_ld_b((v16i8*)t, 0); src5 = (v16u8)__msa_ld_b((v16i8*)t, 16); src6 = (v16u8)__msa_ld_b((v16i8*)t, 32); src7 = (v16u8)__msa_ld_b((v16i8*)t, 48); vec0 = (v16u8)__msa_vshf_b(mask0, (v16i8)src0, (v16i8)src0); vec1 = (v16u8)__msa_vshf_b(mask1, (v16i8)src1, (v16i8)src0); vec2 = (v16u8)__msa_vshf_b(mask2, (v16i8)src1, (v16i8)src1); vec3 = (v16u8)__msa_vshf_b(mask0, (v16i8)src2, (v16i8)src2); vec4 = (v16u8)__msa_vshf_b(mask1, (v16i8)src3, (v16i8)src2); vec5 = (v16u8)__msa_vshf_b(mask2, (v16i8)src3, (v16i8)src3); vec6 = (v16u8)__msa_vshf_b(mask0, (v16i8)src4, (v16i8)src4); vec7 = (v16u8)__msa_vshf_b(mask1, (v16i8)src5, (v16i8)src4); vec8 = (v16u8)__msa_vshf_b(mask2, (v16i8)src5, (v16i8)src5); vec9 = (v16u8)__msa_vshf_b(mask0, (v16i8)src6, (v16i8)src6); vec10 = (v16u8)__msa_vshf_b(mask1, (v16i8)src7, (v16i8)src6); vec11 = (v16u8)__msa_vshf_b(mask2, (v16i8)src7, (v16i8)src7); reg0 = (v8i16)__msa_dotp_u_h(vec0, const0); reg1 = (v8i16)__msa_dotp_u_h(vec1, const1); reg2 = (v8i16)__msa_dotp_u_h(vec2, const2); reg3 = (v8i16)__msa_dotp_u_h(vec3, const0); reg4 = (v8i16)__msa_dotp_u_h(vec4, const1); reg5 = (v8i16)__msa_dotp_u_h(vec5, const2); reg6 = (v8i16)__msa_dotp_u_h(vec6, const0); reg7 = (v8i16)__msa_dotp_u_h(vec7, const1); reg8 = (v8i16)__msa_dotp_u_h(vec8, const2); reg9 = (v8i16)__msa_dotp_u_h(vec9, const0); reg10 = (v8i16)__msa_dotp_u_h(vec10, const1); reg11 = (v8i16)__msa_dotp_u_h(vec11, const2); reg0 = __msa_srar_h(reg0, shft0); reg1 = __msa_srar_h(reg1, shft1); reg2 = __msa_srar_h(reg2, shft2); reg3 = __msa_srar_h(reg3, shft0); reg4 = __msa_srar_h(reg4, shft1); reg5 = __msa_srar_h(reg5, shft2); reg6 = __msa_srar_h(reg6, shft0); reg7 = __msa_srar_h(reg7, shft1); reg8 = __msa_srar_h(reg8, shft2); reg9 = __msa_srar_h(reg9, shft0); reg10 = __msa_srar_h(reg10, shft1); reg11 = __msa_srar_h(reg11, shft2); reg0 += reg6; reg1 += reg7; reg2 += reg8; reg3 += reg9; reg4 += reg10; reg5 += reg11; reg0 = __msa_srari_h(reg0, 1); reg1 = __msa_srari_h(reg1, 1); reg2 = __msa_srari_h(reg2, 1); reg3 = __msa_srari_h(reg3, 1); reg4 = __msa_srari_h(reg4, 1); reg5 = __msa_srari_h(reg5, 1); dst0 = (v16u8)__msa_pckev_b((v16i8)reg1, (v16i8)reg0); dst1 = (v16u8)__msa_pckev_b((v16i8)reg3, (v16i8)reg2); dst2 = (v16u8)__msa_pckev_b((v16i8)reg5, (v16i8)reg4); __msa_st_b((v16i8)dst0, d, 0); __msa_st_b((v16i8)dst1, d, 16); __msa_st_b((v16i8)dst2, d, 32); s += 64; t += 64; d += 48; } } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif #endif // !defined(LIBYUV_DISABLE_MSA) && defined(__mips_msa) libvpx-1.8.2/third_party/libyuv/source/scale_neon.cc000066400000000000000000001161511357355204000226170ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC Neon. #if !defined(LIBYUV_DISABLE_NEON) && defined(__ARM_NEON__) && \ !defined(__aarch64__) // NEON downscalers with interpolation. // Provided by Fritz Koenig // Read 32x1 throw away even pixels, and write 16x1. void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" // load even pixels into q0, odd into q1 "vld2.8 {q0, q1}, [%0]! \n" "subs %2, %2, #16 \n" // 16 processed per loop "vst1.8 {q1}, [%1]! \n" // store odd pixels "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "q0", "q1" // Clobber List ); } // Read 32x1 average down and write 16x1. void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" "vld2.8 {q0, q1}, [%0]! \n" // load 32 pixels "subs %2, %2, #16 \n" // 16 processed per loop "vrhadd.u8 q0, q0, q1 \n" // rounding half add "vst1.8 {q0}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "q0", "q1" // Clobber List ); } // Read 32x2 average down and write 16x1. void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer "add %1, %0 \n" "1: \n" "vld1.8 {q0, q1}, [%0]! \n" // load row 1 and post inc "vld1.8 {q2, q3}, [%1]! \n" // load row 2 and post inc "subs %3, %3, #16 \n" // 16 processed per loop "vpaddl.u8 q0, q0 \n" // row 1 add adjacent "vpaddl.u8 q1, q1 \n" "vpadal.u8 q0, q2 \n" // row 2 add adjacent + // row1 "vpadal.u8 q1, q3 \n" "vrshrn.u16 d0, q0, #2 \n" // downshift, round and // pack "vrshrn.u16 d1, q1, #2 \n" "vst1.8 {q0}, [%2]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : : "q0", "q1", "q2", "q3" // Clobber List ); } void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "subs %2, %2, #8 \n" // 8 processed per loop "vst1.8 {d2}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : : "q0", "q1", "memory", "cc"); } void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { const uint8_t* src_ptr1 = src_ptr + src_stride; const uint8_t* src_ptr2 = src_ptr + src_stride * 2; const uint8_t* src_ptr3 = src_ptr + src_stride * 3; asm volatile( "1: \n" "vld1.8 {q0}, [%0]! \n" // load up 16x4 "vld1.8 {q1}, [%3]! \n" "vld1.8 {q2}, [%4]! \n" "vld1.8 {q3}, [%5]! \n" "subs %2, %2, #4 \n" "vpaddl.u8 q0, q0 \n" "vpadal.u8 q0, q1 \n" "vpadal.u8 q0, q2 \n" "vpadal.u8 q0, q3 \n" "vpaddl.u16 q0, q0 \n" "vrshrn.u32 d0, q0, #4 \n" // divide by 16 w/rounding "vmovn.u16 d0, q0 \n" "vst1.32 {d0[0]}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_ptr1), // %3 "+r"(src_ptr2), // %4 "+r"(src_ptr3) // %5 : : "q0", "q1", "q2", "q3", "memory", "cc"); } // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "subs %2, %2, #24 \n" "vmov d2, d3 \n" // order d0, d1, d2 "vst3.8 {d0, d1, d2}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : : "d0", "d1", "d2", "d3", "memory", "cc"); } void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 "subs %2, %2, #24 \n" // filter src line 0 with src line 1 // expand chars to shorts to allow for room // when adding lines together "vmovl.u8 q8, d4 \n" "vmovl.u8 q9, d5 \n" "vmovl.u8 q10, d6 \n" "vmovl.u8 q11, d7 \n" // 3 * line_0 + line_1 "vmlal.u8 q8, d0, d24 \n" "vmlal.u8 q9, d1, d24 \n" "vmlal.u8 q10, d2, d24 \n" "vmlal.u8 q11, d3, d24 \n" // (3 * line_0 + line_1) >> 2 "vqrshrn.u16 d0, q8, #2 \n" "vqrshrn.u16 d1, q9, #2 \n" "vqrshrn.u16 d2, q10, #2 \n" "vqrshrn.u16 d3, q11, #2 \n" // a0 = (src[0] * 3 + s[1] * 1) >> 2 "vmovl.u8 q8, d1 \n" "vmlal.u8 q8, d0, d24 \n" "vqrshrn.u16 d0, q8, #2 \n" // a1 = (src[1] * 1 + s[2] * 1) >> 1 "vrhadd.u8 d1, d1, d2 \n" // a2 = (src[2] * 1 + s[3] * 3) >> 2 "vmovl.u8 q8, d2 \n" "vmlal.u8 q8, d3, d24 \n" "vqrshrn.u16 d2, q8, #2 \n" "vst3.8 {d0, d1, d2}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride) // %3 : : "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "d24", "memory", "cc"); } void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "vmov.u8 d24, #3 \n" "add %3, %0 \n" "1: \n" "vld4.8 {d0, d1, d2, d3}, [%0]! \n" // src line 0 "vld4.8 {d4, d5, d6, d7}, [%3]! \n" // src line 1 "subs %2, %2, #24 \n" // average src line 0 with src line 1 "vrhadd.u8 q0, q0, q2 \n" "vrhadd.u8 q1, q1, q3 \n" // a0 = (src[0] * 3 + s[1] * 1) >> 2 "vmovl.u8 q3, d1 \n" "vmlal.u8 q3, d0, d24 \n" "vqrshrn.u16 d0, q3, #2 \n" // a1 = (src[1] * 1 + s[2] * 1) >> 1 "vrhadd.u8 d1, d1, d2 \n" // a2 = (src[2] * 1 + s[3] * 3) >> 2 "vmovl.u8 q3, d2 \n" "vmlal.u8 q3, d3, d24 \n" "vqrshrn.u16 d2, q3, #2 \n" "vst3.8 {d0, d1, d2}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride) // %3 : : "r4", "q0", "q1", "q2", "q3", "d24", "memory", "cc"); } #define HAS_SCALEROWDOWN38_NEON static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; static const uvec8 kShuf38_2 = {0, 8, 16, 2, 10, 17, 4, 12, 18, 6, 14, 19, 0, 0, 0, 0}; static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12}; static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; // 32 -> 12 void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "vld1.8 {q3}, [%3] \n" "1: \n" "vld1.8 {d0, d1, d2, d3}, [%0]! \n" "subs %2, %2, #12 \n" "vtbl.u8 d4, {d0, d1, d2, d3}, d6 \n" "vtbl.u8 d5, {d0, d1, d2, d3}, d7 \n" "vst1.8 {d4}, [%1]! \n" "vst1.32 {d5[0]}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"(&kShuf38) // %3 : "d0", "d1", "d2", "d3", "d4", "d5", "memory", "cc"); } // 32x3 -> 12x1 void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { const uint8_t* src_ptr1 = src_ptr + src_stride * 2; asm volatile( "vld1.16 {q13}, [%5] \n" "vld1.8 {q14}, [%6] \n" "vld1.8 {q15}, [%7] \n" "add %3, %0 \n" "1: \n" // d0 = 00 40 01 41 02 42 03 43 // d1 = 10 50 11 51 12 52 13 53 // d2 = 20 60 21 61 22 62 23 63 // d3 = 30 70 31 71 32 72 33 73 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" "vld4.8 {d4, d5, d6, d7}, [%3]! \n" "vld4.8 {d16, d17, d18, d19}, [%4]! \n" "subs %2, %2, #12 \n" // Shuffle the input data around to get align the data // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // d0 = 00 10 01 11 02 12 03 13 // d1 = 40 50 41 51 42 52 43 53 "vtrn.u8 d0, d1 \n" "vtrn.u8 d4, d5 \n" "vtrn.u8 d16, d17 \n" // d2 = 20 30 21 31 22 32 23 33 // d3 = 60 70 61 71 62 72 63 73 "vtrn.u8 d2, d3 \n" "vtrn.u8 d6, d7 \n" "vtrn.u8 d18, d19 \n" // d0 = 00+10 01+11 02+12 03+13 // d2 = 40+50 41+51 42+52 43+53 "vpaddl.u8 q0, q0 \n" "vpaddl.u8 q2, q2 \n" "vpaddl.u8 q8, q8 \n" // d3 = 60+70 61+71 62+72 63+73 "vpaddl.u8 d3, d3 \n" "vpaddl.u8 d7, d7 \n" "vpaddl.u8 d19, d19 \n" // combine source lines "vadd.u16 q0, q2 \n" "vadd.u16 q0, q8 \n" "vadd.u16 d4, d3, d7 \n" "vadd.u16 d4, d19 \n" // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] // + s[6 + st * 1] + s[7 + st * 1] // + s[6 + st * 2] + s[7 + st * 2]) / 6 "vqrdmulh.s16 q2, q2, q13 \n" "vmovn.u16 d4, q2 \n" // Shuffle 2,3 reg around so that 2 can be added to the // 0,1 reg and 3 can be added to the 4,5 reg. This // requires expanding from u8 to u16 as the 0,1 and 4,5 // registers are already expanded. Then do transposes // to get aligned. // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 "vmovl.u8 q1, d2 \n" "vmovl.u8 q3, d6 \n" "vmovl.u8 q9, d18 \n" // combine source lines "vadd.u16 q1, q3 \n" "vadd.u16 q1, q9 \n" // d4 = xx 20 xx 30 xx 22 xx 32 // d5 = xx 21 xx 31 xx 23 xx 33 "vtrn.u32 d2, d3 \n" // d4 = xx 20 xx 21 xx 22 xx 23 // d5 = xx 30 xx 31 xx 32 xx 33 "vtrn.u16 d2, d3 \n" // 0+1+2, 3+4+5 "vadd.u16 q0, q1 \n" // Need to divide, but can't downshift as the the value // isn't a power of 2. So multiply by 65536 / n // and take the upper 16 bits. "vqrdmulh.s16 q0, q0, q15 \n" // Align for table lookup, vtbl requires registers to // be adjacent "vmov.u8 d2, d4 \n" "vtbl.u8 d3, {d0, d1, d2}, d28 \n" "vtbl.u8 d4, {d0, d1, d2}, d29 \n" "vst1.8 {d3}, [%1]! \n" "vst1.32 {d4[0]}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride), // %3 "+r"(src_ptr1) // %4 : "r"(&kMult38_Div6), // %5 "r"(&kShuf38_2), // %6 "r"(&kMult38_Div9) // %7 : "q0", "q1", "q2", "q3", "q8", "q9", "q13", "q14", "q15", "memory", "cc"); } // 32x2 -> 12x1 void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "vld1.16 {q13}, [%4] \n" "vld1.8 {q14}, [%5] \n" "add %3, %0 \n" "1: \n" // d0 = 00 40 01 41 02 42 03 43 // d1 = 10 50 11 51 12 52 13 53 // d2 = 20 60 21 61 22 62 23 63 // d3 = 30 70 31 71 32 72 33 73 "vld4.8 {d0, d1, d2, d3}, [%0]! \n" "vld4.8 {d4, d5, d6, d7}, [%3]! \n" "subs %2, %2, #12 \n" // Shuffle the input data around to get align the data // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // d0 = 00 10 01 11 02 12 03 13 // d1 = 40 50 41 51 42 52 43 53 "vtrn.u8 d0, d1 \n" "vtrn.u8 d4, d5 \n" // d2 = 20 30 21 31 22 32 23 33 // d3 = 60 70 61 71 62 72 63 73 "vtrn.u8 d2, d3 \n" "vtrn.u8 d6, d7 \n" // d0 = 00+10 01+11 02+12 03+13 // d2 = 40+50 41+51 42+52 43+53 "vpaddl.u8 q0, q0 \n" "vpaddl.u8 q2, q2 \n" // d3 = 60+70 61+71 62+72 63+73 "vpaddl.u8 d3, d3 \n" "vpaddl.u8 d7, d7 \n" // combine source lines "vadd.u16 q0, q2 \n" "vadd.u16 d4, d3, d7 \n" // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 "vqrshrn.u16 d4, q2, #2 \n" // Shuffle 2,3 reg around so that 2 can be added to the // 0,1 reg and 3 can be added to the 4,5 reg. This // requires expanding from u8 to u16 as the 0,1 and 4,5 // registers are already expanded. Then do transposes // to get aligned. // q2 = xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 "vmovl.u8 q1, d2 \n" "vmovl.u8 q3, d6 \n" // combine source lines "vadd.u16 q1, q3 \n" // d4 = xx 20 xx 30 xx 22 xx 32 // d5 = xx 21 xx 31 xx 23 xx 33 "vtrn.u32 d2, d3 \n" // d4 = xx 20 xx 21 xx 22 xx 23 // d5 = xx 30 xx 31 xx 32 xx 33 "vtrn.u16 d2, d3 \n" // 0+1+2, 3+4+5 "vadd.u16 q0, q1 \n" // Need to divide, but can't downshift as the the value // isn't a power of 2. So multiply by 65536 / n // and take the upper 16 bits. "vqrdmulh.s16 q0, q0, q13 \n" // Align for table lookup, vtbl requires registers to // be adjacent "vmov.u8 d2, d4 \n" "vtbl.u8 d3, {d0, d1, d2}, d28 \n" "vtbl.u8 d4, {d0, d1, d2}, d29 \n" "vst1.8 {d3}, [%1]! \n" "vst1.32 {d4[0]}, [%1]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride) // %3 : "r"(&kMult38_Div6), // %4 "r"(&kShuf38_2) // %5 : "q0", "q1", "q2", "q3", "q13", "q14", "memory", "cc"); } void ScaleAddRows_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int src_width, int src_height) { const uint8_t* src_tmp; asm volatile( "1: \n" "mov %0, %1 \n" "mov r12, %5 \n" "veor q2, q2, q2 \n" "veor q3, q3, q3 \n" "2: \n" // load 16 pixels into q0 "vld1.8 {q0}, [%0], %3 \n" "vaddw.u8 q3, q3, d1 \n" "vaddw.u8 q2, q2, d0 \n" "subs r12, r12, #1 \n" "bgt 2b \n" "vst1.16 {q2, q3}, [%2]! \n" // store pixels "add %1, %1, #16 \n" "subs %4, %4, #16 \n" // 16 processed per loop "bgt 1b \n" : "=&r"(src_tmp), // %0 "+r"(src_ptr), // %1 "+r"(dst_ptr), // %2 "+r"(src_stride), // %3 "+r"(src_width), // %4 "+r"(src_height) // %5 : : "memory", "cc", "r12", "q0", "q1", "q2", "q3" // Clobber List ); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD2_DATA8_LANE(n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5 \n" \ "add %3, %3, %4 \n" \ "vld2.8 {d6[" #n "], d7[" #n "]}, [%6] \n" // The NEON version mimics this formula (from row_common.cc): // #define BLENDER(a, b, f) (uint8_t)((int)(a) + // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; const uint8_t* src_tmp = src_ptr; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx "vld1.32 {q2}, [%5] \n" // 0 1 2 3 "vshl.i32 q3, q1, #2 \n" // 4 * dx "vmul.s32 q1, q1, q2 \n" // x , x + 1 * dx, x + 2 * dx, x + 3 * dx "vadd.s32 q1, q1, q0 \n" // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx "vadd.s32 q2, q1, q3 \n" "vshl.i32 q0, q3, #1 \n" // 8 * dx "1: \n" LOAD2_DATA8_LANE(0) LOAD2_DATA8_LANE(1) LOAD2_DATA8_LANE(2) LOAD2_DATA8_LANE(3) LOAD2_DATA8_LANE(4) LOAD2_DATA8_LANE(5) LOAD2_DATA8_LANE(6) LOAD2_DATA8_LANE(7) "vmov q10, q1 \n" "vmov q11, q2 \n" "vuzp.16 q10, q11 \n" "vmovl.u8 q8, d6 \n" "vmovl.u8 q9, d7 \n" "vsubl.s16 q11, d18, d16 \n" "vsubl.s16 q12, d19, d17 \n" "vmovl.u16 q13, d20 \n" "vmovl.u16 q10, d21 \n" "vmul.s32 q11, q11, q13 \n" "vmul.s32 q12, q12, q10 \n" "vrshrn.s32 d18, q11, #16 \n" "vrshrn.s32 d19, q12, #16 \n" "vadd.s16 q8, q8, q9 \n" "vmovn.s16 d6, q8 \n" "vst1.8 {d6}, [%0]! \n" // store pixels "vadd.s32 q1, q1, q0 \n" "vadd.s32 q2, q2, q0 \n" "subs %2, %2, #8 \n" // 8 processed per loop "bgt 1b \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(dst_width), // %2 "+r"(x), // %3 "+r"(dx), // %4 "+r"(tmp), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13" ); } #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 void ScaleFilterRows_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { asm volatile( "cmp %4, #0 \n" "beq 100f \n" "add %2, %1 \n" "cmp %4, #64 \n" "beq 75f \n" "cmp %4, #128 \n" "beq 50f \n" "cmp %4, #192 \n" "beq 25f \n" "vdup.8 d5, %4 \n" "rsb %4, #256 \n" "vdup.8 d4, %4 \n" // General purpose row blend. "1: \n" "vld1.8 {q0}, [%1]! \n" "vld1.8 {q1}, [%2]! \n" "subs %3, %3, #16 \n" "vmull.u8 q13, d0, d4 \n" "vmull.u8 q14, d1, d4 \n" "vmlal.u8 q13, d2, d5 \n" "vmlal.u8 q14, d3, d5 \n" "vrshrn.u16 d0, q13, #8 \n" "vrshrn.u16 d1, q14, #8 \n" "vst1.8 {q0}, [%0]! \n" "bgt 1b \n" "b 99f \n" // Blend 25 / 75. "25: \n" "vld1.8 {q0}, [%1]! \n" "vld1.8 {q1}, [%2]! \n" "subs %3, %3, #16 \n" "vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n" "vst1.8 {q0}, [%0]! \n" "bgt 25b \n" "b 99f \n" // Blend 50 / 50. "50: \n" "vld1.8 {q0}, [%1]! \n" "vld1.8 {q1}, [%2]! \n" "subs %3, %3, #16 \n" "vrhadd.u8 q0, q1 \n" "vst1.8 {q0}, [%0]! \n" "bgt 50b \n" "b 99f \n" // Blend 75 / 25. "75: \n" "vld1.8 {q1}, [%1]! \n" "vld1.8 {q0}, [%2]! \n" "subs %3, %3, #16 \n" "vrhadd.u8 q0, q1 \n" "vrhadd.u8 q0, q1 \n" "vst1.8 {q0}, [%0]! \n" "bgt 75b \n" "b 99f \n" // Blend 100 / 0 - Copy row unchanged. "100: \n" "vld1.8 {q0}, [%1]! \n" "subs %3, %3, #16 \n" "vst1.8 {q0}, [%0]! \n" "bgt 100b \n" "99: \n" "vst1.8 {d1[7]}, [%0] \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(src_stride), // %2 "+r"(dst_width), // %3 "+r"(source_y_fraction) // %4 : : "q0", "q1", "d4", "d5", "q13", "q14", "memory", "cc"); } void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB "subs %2, %2, #8 \n" // 8 processed per loop "vmov q2, q1 \n" // load next 8 ARGB "vst2.32 {q2, q3}, [%1]! \n" // store odd pixels "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List ); } // 46: f964 018d vld4.32 {d16,d18,d20,d22}, [r4]! // 4a: 3e04 subs r6, #4 // 4c: f964 118d vld4.32 {d17,d19,d21,d23}, [r4]! // 50: ef64 21f4 vorr q9, q10, q10 // 54: f942 038d vst2.32 {d16-d19}, [r2]! // 58: d1f5 bne.n 46 void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( "1: \n" "vld4.32 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.32 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB "subs %2, %2, #8 \n" // 8 processed per loop "vrhadd.u8 q0, q0, q1 \n" // rounding half add "vrhadd.u8 q1, q2, q3 \n" // rounding half add "vst2.32 {q0, q1}, [%1]! \n" "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 : : "memory", "cc", "q0", "q1", "q2", "q3" // Clobber List ); } void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer "add %1, %1, %0 \n" "1: \n" "vld4.8 {d0, d2, d4, d6}, [%0]! \n" // load 8 ARGB pixels. "vld4.8 {d1, d3, d5, d7}, [%0]! \n" // load next 8 ARGB "subs %3, %3, #8 \n" // 8 processed per loop. "vpaddl.u8 q0, q0 \n" // B 16 bytes -> 8 shorts. "vpaddl.u8 q1, q1 \n" // G 16 bytes -> 8 shorts. "vpaddl.u8 q2, q2 \n" // R 16 bytes -> 8 shorts. "vpaddl.u8 q3, q3 \n" // A 16 bytes -> 8 shorts. "vld4.8 {d16, d18, d20, d22}, [%1]! \n" // load 8 more ARGB "vld4.8 {d17, d19, d21, d23}, [%1]! \n" // load last 8 ARGB "vpadal.u8 q0, q8 \n" // B 16 bytes -> 8 shorts. "vpadal.u8 q1, q9 \n" // G 16 bytes -> 8 shorts. "vpadal.u8 q2, q10 \n" // R 16 bytes -> 8 shorts. "vpadal.u8 q3, q11 \n" // A 16 bytes -> 8 shorts. "vrshrn.u16 d0, q0, #2 \n" // round and pack to bytes "vrshrn.u16 d1, q1, #2 \n" "vrshrn.u16 d2, q2, #2 \n" "vrshrn.u16 d3, q3, #2 \n" "vst4.8 {d0, d1, d2, d3}, [%2]! \n" "bgt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( "mov r12, %3, lsl #2 \n" "1: \n" "vld1.32 {d0[0]}, [%0], r12 \n" "vld1.32 {d0[1]}, [%0], r12 \n" "vld1.32 {d1[0]}, [%0], r12 \n" "vld1.32 {d1[1]}, [%0], r12 \n" "subs %2, %2, #4 \n" // 4 pixels per loop. "vst1.8 {q0}, [%1]! \n" "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 : "r"(src_stepx) // %3 : "memory", "cc", "r12", "q0"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { asm volatile( "mov r12, %4, lsl #2 \n" "add %1, %1, %0 \n" "1: \n" "vld1.8 {d0}, [%0], r12 \n" // 4 2x2 blocks -> 2x1 "vld1.8 {d1}, [%1], r12 \n" "vld1.8 {d2}, [%0], r12 \n" "vld1.8 {d3}, [%1], r12 \n" "vld1.8 {d4}, [%0], r12 \n" "vld1.8 {d5}, [%1], r12 \n" "vld1.8 {d6}, [%0], r12 \n" "vld1.8 {d7}, [%1], r12 \n" "vaddl.u8 q0, d0, d1 \n" "vaddl.u8 q1, d2, d3 \n" "vaddl.u8 q2, d4, d5 \n" "vaddl.u8 q3, d6, d7 \n" "vswp.8 d1, d2 \n" // ab_cd -> ac_bd "vswp.8 d5, d6 \n" // ef_gh -> eg_fh "vadd.u16 q0, q0, q1 \n" // (a+b)_(c+d) "vadd.u16 q2, q2, q3 \n" // (e+f)_(g+h) "vrshrn.u16 d0, q0, #2 \n" // first 2 pixels. "vrshrn.u16 d1, q2, #2 \n" // next 2 pixels. "subs %3, %3, #4 \n" // 4 pixels per loop. "vst1.8 {q0}, [%2]! \n" "bgt 1b \n" : "+r"(src_argb), // %0 "+r"(src_stride), // %1 "+r"(dst_argb), // %2 "+r"(dst_width) // %3 : "r"(src_stepx) // %4 : "memory", "cc", "r12", "q0", "q1", "q2", "q3"); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD1_DATA32_LANE(dn, n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5, lsl #2 \n" \ "add %3, %3, %4 \n" \ "vld1.32 {" #dn "[" #n "]}, [%6] \n" void ScaleARGBCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { int tmp; const uint8_t* src_tmp = src_argb; asm volatile( "1: \n" // clang-format off LOAD1_DATA32_LANE(d0, 0) LOAD1_DATA32_LANE(d0, 1) LOAD1_DATA32_LANE(d1, 0) LOAD1_DATA32_LANE(d1, 1) LOAD1_DATA32_LANE(d2, 0) LOAD1_DATA32_LANE(d2, 1) LOAD1_DATA32_LANE(d3, 0) LOAD1_DATA32_LANE(d3, 1) // clang-format on "vst1.32 {q0, q1}, [%0]! \n" // store pixels "subs %2, %2, #8 \n" // 8 processed per loop "bgt 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+r"(dst_width), // %2 "+r"(x), // %3 "+r"(dx), // %4 "=&r"(tmp), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "q0", "q1"); } #undef LOAD1_DATA32_LANE // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD2_DATA32_LANE(dn1, dn2, n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5, lsl #2 \n" \ "add %3, %3, %4 \n" \ "vld2.32 {" #dn1 "[" #n "], " #dn2 "[" #n "]}, [%6] \n" void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; const uint8_t* src_tmp = src_argb; asm volatile ( "vdup.32 q0, %3 \n" // x "vdup.32 q1, %4 \n" // dx "vld1.32 {q2}, [%5] \n" // 0 1 2 3 "vshl.i32 q9, q1, #2 \n" // 4 * dx "vmul.s32 q1, q1, q2 \n" "vmov.i8 q3, #0x7f \n" // 0x7F "vmov.i16 q15, #0x7f \n" // 0x7F // x , x + 1 * dx, x + 2 * dx, x + 3 * dx "vadd.s32 q8, q1, q0 \n" "1: \n" // d0, d1: a // d2, d3: b LOAD2_DATA32_LANE(d0, d2, 0) LOAD2_DATA32_LANE(d0, d2, 1) LOAD2_DATA32_LANE(d1, d3, 0) LOAD2_DATA32_LANE(d1, d3, 1) "vshrn.i32 d22, q8, #9 \n" "vand.16 d22, d22, d30 \n" "vdup.8 d24, d22[0] \n" "vdup.8 d25, d22[2] \n" "vdup.8 d26, d22[4] \n" "vdup.8 d27, d22[6] \n" "vext.8 d4, d24, d25, #4 \n" "vext.8 d5, d26, d27, #4 \n" // f "veor.8 q10, q2, q3 \n" // 0x7f ^ f "vmull.u8 q11, d0, d20 \n" "vmull.u8 q12, d1, d21 \n" "vmull.u8 q13, d2, d4 \n" "vmull.u8 q14, d3, d5 \n" "vadd.i16 q11, q11, q13 \n" "vadd.i16 q12, q12, q14 \n" "vshrn.i16 d0, q11, #7 \n" "vshrn.i16 d1, q12, #7 \n" "vst1.32 {d0, d1}, [%0]! \n" // store pixels "vadd.s32 q8, q8, q9 \n" "subs %2, %2, #4 \n" // 4 processed per loop "bgt 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+r"(dst_width), // %2 "+r"(x), // %3 "+r"(dx), // %4 "+r"(tmp), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "q13", "q14", "q15" ); } #undef LOAD2_DATA32_LANE #endif // defined(__ARM_NEON__) && !defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_neon64.cc000066400000000000000000001317741357355204000230010ustar00rootroot00000000000000/* * Copyright 2014 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include "libyuv/scale.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for GCC Neon armv8 64 bit. #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) // Read 32x1 throw away even pixels, and write 16x1. void ScaleRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" // load even pixels into v0, odd into v1 "ld2 {v0.16b,v1.16b}, [%0], #32 \n" "subs %w2, %w2, #16 \n" // 16 processed per loop "st1 {v1.16b}, [%1], #16 \n" // store odd pixels "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "v0", "v1" // Clobber List ); } // Read 32x1 average down and write 16x1. void ScaleRowDown2Linear_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" // load even pixels into v0, odd into v1 "ld2 {v0.16b,v1.16b}, [%0], #32 \n" "subs %w2, %w2, #16 \n" // 16 processed per loop "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add "st1 {v0.16b}, [%1], #16 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "v0", "v1" // Clobber List ); } // Read 32x2 average down and write 16x1. void ScaleRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer "add %1, %1, %0 \n" "1: \n" "ld1 {v0.16b, v1.16b}, [%0], #32 \n" // load row 1 and post inc "ld1 {v2.16b, v3.16b}, [%1], #32 \n" // load row 2 and post inc "subs %w3, %w3, #16 \n" // 16 processed per loop "uaddlp v0.8h, v0.16b \n" // row 1 add adjacent "uaddlp v1.8h, v1.16b \n" "uadalp v0.8h, v2.16b \n" // += row 2 add adjacent "uadalp v1.8h, v3.16b \n" "rshrn v0.8b, v0.8h, #2 \n" // round and pack "rshrn2 v0.16b, v1.8h, #2 \n" "st1 {v0.16b}, [%2], #16 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : : "v0", "v1", "v2", "v3" // Clobber List ); } void ScaleRowDown4_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 "subs %w2, %w2, #8 \n" // 8 processed per loop "st1 {v2.8b}, [%1], #8 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : : "v0", "v1", "v2", "v3", "memory", "cc"); } void ScaleRowDown4Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { const uint8_t* src_ptr1 = src_ptr + src_stride; const uint8_t* src_ptr2 = src_ptr + src_stride * 2; const uint8_t* src_ptr3 = src_ptr + src_stride * 3; asm volatile( "1: \n" "ld1 {v0.16b}, [%0], #16 \n" // load up 16x4 "ld1 {v1.16b}, [%2], #16 \n" "ld1 {v2.16b}, [%3], #16 \n" "ld1 {v3.16b}, [%4], #16 \n" "subs %w5, %w5, #4 \n" "uaddlp v0.8h, v0.16b \n" "uadalp v0.8h, v1.16b \n" "uadalp v0.8h, v2.16b \n" "uadalp v0.8h, v3.16b \n" "addp v0.8h, v0.8h, v0.8h \n" "rshrn v0.8b, v0.8h, #4 \n" // divide by 16 w/rounding "st1 {v0.s}[0], [%1], #4 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(src_ptr1), // %2 "+r"(src_ptr2), // %3 "+r"(src_ptr3), // %4 "+r"(dst_width) // %5 : : "v0", "v1", "v2", "v3", "memory", "cc"); } // Down scale from 4 to 3 pixels. Use the neon multilane read/write // to load up the every 4th pixel into a 4 different registers. // Point samples 32 pixels to 24 pixels. void ScaleRowDown34_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 "subs %w2, %w2, #24 \n" "orr v2.16b, v3.16b, v3.16b \n" // order v0,v1,v2 "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : : "v0", "v1", "v2", "v3", "memory", "cc"); } void ScaleRowDown34_0_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movi v20.8b, #3 \n" "add %3, %3, %0 \n" "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 "subs %w2, %w2, #24 \n" // filter src line 0 with src line 1 // expand chars to shorts to allow for room // when adding lines together "ushll v16.8h, v4.8b, #0 \n" "ushll v17.8h, v5.8b, #0 \n" "ushll v18.8h, v6.8b, #0 \n" "ushll v19.8h, v7.8b, #0 \n" // 3 * line_0 + line_1 "umlal v16.8h, v0.8b, v20.8b \n" "umlal v17.8h, v1.8b, v20.8b \n" "umlal v18.8h, v2.8b, v20.8b \n" "umlal v19.8h, v3.8b, v20.8b \n" // (3 * line_0 + line_1) >> 2 "uqrshrn v0.8b, v16.8h, #2 \n" "uqrshrn v1.8b, v17.8h, #2 \n" "uqrshrn v2.8b, v18.8h, #2 \n" "uqrshrn v3.8b, v19.8h, #2 \n" // a0 = (src[0] * 3 + s[1] * 1) >> 2 "ushll v16.8h, v1.8b, #0 \n" "umlal v16.8h, v0.8b, v20.8b \n" "uqrshrn v0.8b, v16.8h, #2 \n" // a1 = (src[1] * 1 + s[2] * 1) >> 1 "urhadd v1.8b, v1.8b, v2.8b \n" // a2 = (src[2] * 1 + s[3] * 3) >> 2 "ushll v16.8h, v2.8b, #0 \n" "umlal v16.8h, v3.8b, v20.8b \n" "uqrshrn v2.8b, v16.8h, #2 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride) // %3 : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "memory", "cc"); } void ScaleRowDown34_1_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { asm volatile( "movi v20.8b, #3 \n" "add %3, %3, %0 \n" "1: \n" "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" // src line 0 "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%3], #32 \n" // src line 1 "subs %w2, %w2, #24 \n" // average src line 0 with src line 1 "urhadd v0.8b, v0.8b, v4.8b \n" "urhadd v1.8b, v1.8b, v5.8b \n" "urhadd v2.8b, v2.8b, v6.8b \n" "urhadd v3.8b, v3.8b, v7.8b \n" // a0 = (src[0] * 3 + s[1] * 1) >> 2 "ushll v4.8h, v1.8b, #0 \n" "umlal v4.8h, v0.8b, v20.8b \n" "uqrshrn v0.8b, v4.8h, #2 \n" // a1 = (src[1] * 1 + s[2] * 1) >> 1 "urhadd v1.8b, v1.8b, v2.8b \n" // a2 = (src[2] * 1 + s[3] * 3) >> 2 "ushll v4.8h, v2.8b, #0 \n" "umlal v4.8h, v3.8b, v20.8b \n" "uqrshrn v2.8b, v4.8h, #2 \n" "st3 {v0.8b,v1.8b,v2.8b}, [%1], #24 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width), // %2 "+r"(src_stride) // %3 : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v20", "memory", "cc"); } static const uvec8 kShuf38 = {0, 3, 6, 8, 11, 14, 16, 19, 22, 24, 27, 30, 0, 0, 0, 0}; static const uvec8 kShuf38_2 = {0, 16, 32, 2, 18, 33, 4, 20, 34, 6, 22, 35, 0, 0, 0, 0}; static const vec16 kMult38_Div6 = {65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12, 65536 / 12}; static const vec16 kMult38_Div9 = {65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18, 65536 / 18}; // 32 -> 12 void ScaleRowDown38_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { (void)src_stride; asm volatile( "ld1 {v3.16b}, [%3] \n" "1: \n" "ld1 {v0.16b,v1.16b}, [%0], #32 \n" "subs %w2, %w2, #12 \n" "tbl v2.16b, {v0.16b,v1.16b}, v3.16b \n" "st1 {v2.8b}, [%1], #8 \n" "st1 {v2.s}[2], [%1], #4 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(dst_width) // %2 : "r"(&kShuf38) // %3 : "v0", "v1", "v2", "v3", "memory", "cc"); } // 32x3 -> 12x1 void OMITFP ScaleRowDown38_3_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { const uint8_t* src_ptr1 = src_ptr + src_stride * 2; ptrdiff_t tmp_src_stride = src_stride; asm volatile( "ld1 {v29.8h}, [%5] \n" "ld1 {v30.16b}, [%6] \n" "ld1 {v31.8h}, [%7] \n" "add %2, %2, %0 \n" "1: \n" // 00 40 01 41 02 42 03 43 // 10 50 11 51 12 52 13 53 // 20 60 21 61 22 62 23 63 // 30 70 31 71 32 72 33 73 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" "ld4 {v16.8b,v17.8b,v18.8b,v19.8b}, [%3], #32 \n" "subs %w4, %w4, #12 \n" // Shuffle the input data around to get align the data // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // 00 10 01 11 02 12 03 13 // 40 50 41 51 42 52 43 53 "trn1 v20.8b, v0.8b, v1.8b \n" "trn2 v21.8b, v0.8b, v1.8b \n" "trn1 v22.8b, v4.8b, v5.8b \n" "trn2 v23.8b, v4.8b, v5.8b \n" "trn1 v24.8b, v16.8b, v17.8b \n" "trn2 v25.8b, v16.8b, v17.8b \n" // 20 30 21 31 22 32 23 33 // 60 70 61 71 62 72 63 73 "trn1 v0.8b, v2.8b, v3.8b \n" "trn2 v1.8b, v2.8b, v3.8b \n" "trn1 v4.8b, v6.8b, v7.8b \n" "trn2 v5.8b, v6.8b, v7.8b \n" "trn1 v16.8b, v18.8b, v19.8b \n" "trn2 v17.8b, v18.8b, v19.8b \n" // 00+10 01+11 02+12 03+13 // 40+50 41+51 42+52 43+53 "uaddlp v20.4h, v20.8b \n" "uaddlp v21.4h, v21.8b \n" "uaddlp v22.4h, v22.8b \n" "uaddlp v23.4h, v23.8b \n" "uaddlp v24.4h, v24.8b \n" "uaddlp v25.4h, v25.8b \n" // 60+70 61+71 62+72 63+73 "uaddlp v1.4h, v1.8b \n" "uaddlp v5.4h, v5.8b \n" "uaddlp v17.4h, v17.8b \n" // combine source lines "add v20.4h, v20.4h, v22.4h \n" "add v21.4h, v21.4h, v23.4h \n" "add v20.4h, v20.4h, v24.4h \n" "add v21.4h, v21.4h, v25.4h \n" "add v2.4h, v1.4h, v5.4h \n" "add v2.4h, v2.4h, v17.4h \n" // dst_ptr[3] = (s[6 + st * 0] + s[7 + st * 0] // + s[6 + st * 1] + s[7 + st * 1] // + s[6 + st * 2] + s[7 + st * 2]) / 6 "sqrdmulh v2.8h, v2.8h, v29.8h \n" "xtn v2.8b, v2.8h \n" // Shuffle 2,3 reg around so that 2 can be added to the // 0,1 reg and 3 can be added to the 4,5 reg. This // requires expanding from u8 to u16 as the 0,1 and 4,5 // registers are already expanded. Then do transposes // to get aligned. // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 "ushll v16.8h, v16.8b, #0 \n" "uaddl v0.8h, v0.8b, v4.8b \n" // combine source lines "add v0.8h, v0.8h, v16.8h \n" // xx 20 xx 21 xx 22 xx 23 // xx 30 xx 31 xx 32 xx 33 "trn1 v1.8h, v0.8h, v0.8h \n" "trn2 v4.8h, v0.8h, v0.8h \n" "xtn v0.4h, v1.4s \n" "xtn v4.4h, v4.4s \n" // 0+1+2, 3+4+5 "add v20.8h, v20.8h, v0.8h \n" "add v21.8h, v21.8h, v4.8h \n" // Need to divide, but can't downshift as the the value // isn't a power of 2. So multiply by 65536 / n // and take the upper 16 bits. "sqrdmulh v0.8h, v20.8h, v31.8h \n" "sqrdmulh v1.8h, v21.8h, v31.8h \n" // Align for table lookup, vtbl requires registers to be adjacent "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v30.16b \n" "st1 {v3.8b}, [%1], #8 \n" "st1 {v3.s}[2], [%1], #4 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(tmp_src_stride), // %2 "+r"(src_ptr1), // %3 "+r"(dst_width) // %4 : "r"(&kMult38_Div6), // %5 "r"(&kShuf38_2), // %6 "r"(&kMult38_Div9) // %7 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v29", "v30", "v31", "memory", "cc"); } // 32x2 -> 12x1 void ScaleRowDown38_2_Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { // TODO(fbarchard): use src_stride directly for clang 3.5+. ptrdiff_t tmp_src_stride = src_stride; asm volatile( "ld1 {v30.8h}, [%4] \n" "ld1 {v31.16b}, [%5] \n" "add %2, %2, %0 \n" "1: \n" // 00 40 01 41 02 42 03 43 // 10 50 11 51 12 52 13 53 // 20 60 21 61 22 62 23 63 // 30 70 31 71 32 72 33 73 "ld4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%0], #32 \n" "ld4 {v4.8b,v5.8b,v6.8b,v7.8b}, [%2], #32 \n" "subs %w3, %w3, #12 \n" // Shuffle the input data around to get align the data // so adjacent data can be added. 0,1 - 2,3 - 4,5 - 6,7 // 00 10 01 11 02 12 03 13 // 40 50 41 51 42 52 43 53 "trn1 v16.8b, v0.8b, v1.8b \n" "trn2 v17.8b, v0.8b, v1.8b \n" "trn1 v18.8b, v4.8b, v5.8b \n" "trn2 v19.8b, v4.8b, v5.8b \n" // 20 30 21 31 22 32 23 33 // 60 70 61 71 62 72 63 73 "trn1 v0.8b, v2.8b, v3.8b \n" "trn2 v1.8b, v2.8b, v3.8b \n" "trn1 v4.8b, v6.8b, v7.8b \n" "trn2 v5.8b, v6.8b, v7.8b \n" // 00+10 01+11 02+12 03+13 // 40+50 41+51 42+52 43+53 "uaddlp v16.4h, v16.8b \n" "uaddlp v17.4h, v17.8b \n" "uaddlp v18.4h, v18.8b \n" "uaddlp v19.4h, v19.8b \n" // 60+70 61+71 62+72 63+73 "uaddlp v1.4h, v1.8b \n" "uaddlp v5.4h, v5.8b \n" // combine source lines "add v16.4h, v16.4h, v18.4h \n" "add v17.4h, v17.4h, v19.4h \n" "add v2.4h, v1.4h, v5.4h \n" // dst_ptr[3] = (s[6] + s[7] + s[6+st] + s[7+st]) / 4 "uqrshrn v2.8b, v2.8h, #2 \n" // Shuffle 2,3 reg around so that 2 can be added to the // 0,1 reg and 3 can be added to the 4,5 reg. This // requires expanding from u8 to u16 as the 0,1 and 4,5 // registers are already expanded. Then do transposes // to get aligned. // xx 20 xx 30 xx 21 xx 31 xx 22 xx 32 xx 23 xx 33 // combine source lines "uaddl v0.8h, v0.8b, v4.8b \n" // xx 20 xx 21 xx 22 xx 23 // xx 30 xx 31 xx 32 xx 33 "trn1 v1.8h, v0.8h, v0.8h \n" "trn2 v4.8h, v0.8h, v0.8h \n" "xtn v0.4h, v1.4s \n" "xtn v4.4h, v4.4s \n" // 0+1+2, 3+4+5 "add v16.8h, v16.8h, v0.8h \n" "add v17.8h, v17.8h, v4.8h \n" // Need to divide, but can't downshift as the the value // isn't a power of 2. So multiply by 65536 / n // and take the upper 16 bits. "sqrdmulh v0.8h, v16.8h, v30.8h \n" "sqrdmulh v1.8h, v17.8h, v30.8h \n" // Align for table lookup, vtbl requires registers to // be adjacent "tbl v3.16b, {v0.16b, v1.16b, v2.16b}, v31.16b \n" "st1 {v3.8b}, [%1], #8 \n" "st1 {v3.s}[2], [%1], #4 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst_ptr), // %1 "+r"(tmp_src_stride), // %2 "+r"(dst_width) // %3 : "r"(&kMult38_Div6), // %4 "r"(&kShuf38_2) // %5 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19", "v30", "v31", "memory", "cc"); } void ScaleAddRows_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst_ptr, int src_width, int src_height) { const uint8_t* src_tmp; asm volatile( "1: \n" "mov %0, %1 \n" "mov w12, %w5 \n" "eor v2.16b, v2.16b, v2.16b \n" "eor v3.16b, v3.16b, v3.16b \n" "2: \n" // load 16 pixels into q0 "ld1 {v0.16b}, [%0], %3 \n" "uaddw2 v3.8h, v3.8h, v0.16b \n" "uaddw v2.8h, v2.8h, v0.8b \n" "subs w12, w12, #1 \n" "b.gt 2b \n" "st1 {v2.8h, v3.8h}, [%2], #32 \n" // store pixels "add %1, %1, #16 \n" "subs %w4, %w4, #16 \n" // 16 processed per loop "b.gt 1b \n" : "=&r"(src_tmp), // %0 "+r"(src_ptr), // %1 "+r"(dst_ptr), // %2 "+r"(src_stride), // %3 "+r"(src_width), // %4 "+r"(src_height) // %5 : : "memory", "cc", "w12", "v0", "v1", "v2", "v3" // Clobber List ); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD2_DATA8_LANE(n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5 \n" \ "add %3, %3, %4 \n" \ "ld2 {v4.b, v5.b}[" #n "], [%6] \n" // The NEON version mimics this formula (from row_common.cc): // #define BLENDER(a, b, f) (uint8_t)((int)(a) + // ((((int)((f)) * ((int)(b) - (int)(a))) + 0x8000) >> 16)) void ScaleFilterCols_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; const uint8_t* src_tmp = src_ptr; int64_t x64 = (int64_t)x; // NOLINT int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx "ld1 {v2.4s}, [%5] \n" // 0 1 2 3 "shl v3.4s, v1.4s, #2 \n" // 4 * dx "mul v1.4s, v1.4s, v2.4s \n" // x , x + 1 * dx, x + 2 * dx, x + 3 * dx "add v1.4s, v1.4s, v0.4s \n" // x + 4 * dx, x + 5 * dx, x + 6 * dx, x + 7 * dx "add v2.4s, v1.4s, v3.4s \n" "shl v0.4s, v3.4s, #1 \n" // 8 * dx "1: \n" LOAD2_DATA8_LANE(0) LOAD2_DATA8_LANE(1) LOAD2_DATA8_LANE(2) LOAD2_DATA8_LANE(3) LOAD2_DATA8_LANE(4) LOAD2_DATA8_LANE(5) LOAD2_DATA8_LANE(6) LOAD2_DATA8_LANE(7) "mov v6.16b, v1.16b \n" "mov v7.16b, v2.16b \n" "uzp1 v6.8h, v6.8h, v7.8h \n" "ushll v4.8h, v4.8b, #0 \n" "ushll v5.8h, v5.8b, #0 \n" "ssubl v16.4s, v5.4h, v4.4h \n" "ssubl2 v17.4s, v5.8h, v4.8h \n" "ushll v7.4s, v6.4h, #0 \n" "ushll2 v6.4s, v6.8h, #0 \n" "mul v16.4s, v16.4s, v7.4s \n" "mul v17.4s, v17.4s, v6.4s \n" "rshrn v6.4h, v16.4s, #16 \n" "rshrn2 v6.8h, v17.4s, #16 \n" "add v4.8h, v4.8h, v6.8h \n" "xtn v4.8b, v4.8h \n" "st1 {v4.8b}, [%0], #8 \n" // store pixels "add v1.4s, v1.4s, v0.4s \n" "add v2.4s, v2.4s, v0.4s \n" "subs %w2, %w2, #8 \n" // 8 processed per loop "b.gt 1b \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(dst_width), // %2 "+r"(x64), // %3 "+r"(dx64), // %4 "+r"(tmp), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17" ); } #undef LOAD2_DATA8_LANE // 16x2 -> 16x1 void ScaleFilterRows_NEON(uint8_t* dst_ptr, const uint8_t* src_ptr, ptrdiff_t src_stride, int dst_width, int source_y_fraction) { int y_fraction = 256 - source_y_fraction; asm volatile( "cmp %w4, #0 \n" "b.eq 100f \n" "add %2, %2, %1 \n" "cmp %w4, #64 \n" "b.eq 75f \n" "cmp %w4, #128 \n" "b.eq 50f \n" "cmp %w4, #192 \n" "b.eq 25f \n" "dup v5.8b, %w4 \n" "dup v4.8b, %w5 \n" // General purpose row blend. "1: \n" "ld1 {v0.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "umull v6.8h, v0.8b, v4.8b \n" "umull2 v7.8h, v0.16b, v4.16b \n" "umlal v6.8h, v1.8b, v5.8b \n" "umlal2 v7.8h, v1.16b, v5.16b \n" "rshrn v0.8b, v6.8h, #8 \n" "rshrn2 v0.16b, v7.8h, #8 \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 1b \n" "b 99f \n" // Blend 25 / 75. "25: \n" "ld1 {v0.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 25b \n" "b 99f \n" // Blend 50 / 50. "50: \n" "ld1 {v0.16b}, [%1], #16 \n" "ld1 {v1.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "urhadd v0.16b, v0.16b, v1.16b \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 50b \n" "b 99f \n" // Blend 75 / 25. "75: \n" "ld1 {v1.16b}, [%1], #16 \n" "ld1 {v0.16b}, [%2], #16 \n" "subs %w3, %w3, #16 \n" "urhadd v0.16b, v0.16b, v1.16b \n" "urhadd v0.16b, v0.16b, v1.16b \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 75b \n" "b 99f \n" // Blend 100 / 0 - Copy row unchanged. "100: \n" "ld1 {v0.16b}, [%1], #16 \n" "subs %w3, %w3, #16 \n" "st1 {v0.16b}, [%0], #16 \n" "b.gt 100b \n" "99: \n" "st1 {v0.b}[15], [%0] \n" : "+r"(dst_ptr), // %0 "+r"(src_ptr), // %1 "+r"(src_stride), // %2 "+r"(dst_width), // %3 "+r"(source_y_fraction), // %4 "+r"(y_fraction) // %5 : : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "memory", "cc"); } void ScaleARGBRowDown2_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { (void)src_stride; asm volatile( "1: \n" // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3 "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n" "subs %w2, %w2, #8 \n" // 8 processed per loop "mov v2.16b, v3.16b \n" "st2 {v1.4s,v2.4s}, [%1], #32 \n" // store 8 odd pixels "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(dst), // %1 "+r"(dst_width) // %2 : : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List ); } void ScaleARGBRowDown2Linear_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( "1: \n" // load 16 ARGB pixels with even pixels into q0/q2, odd into q1/q3 "ld4 {v0.4s,v1.4s,v2.4s,v3.4s}, [%0], #64 \n" "subs %w2, %w2, #8 \n" // 8 processed per loop "urhadd v0.16b, v0.16b, v1.16b \n" // rounding half add "urhadd v1.16b, v2.16b, v3.16b \n" "st2 {v0.4s,v1.4s}, [%1], #32 \n" // store 8 pixels "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 : : "memory", "cc", "v0", "v1", "v2", "v3" // Clobber List ); } void ScaleARGBRowDown2Box_NEON(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer "add %1, %1, %0 \n" "1: \n" "ld4 {v0.16b,v1.16b,v2.16b,v3.16b}, [%0], #64 \n" // load 8 ARGB "subs %w3, %w3, #8 \n" // 8 processed per loop. "uaddlp v0.8h, v0.16b \n" // B 16 bytes -> 8 shorts. "uaddlp v1.8h, v1.16b \n" // G 16 bytes -> 8 shorts. "uaddlp v2.8h, v2.16b \n" // R 16 bytes -> 8 shorts. "uaddlp v3.8h, v3.16b \n" // A 16 bytes -> 8 shorts. "ld4 {v16.16b,v17.16b,v18.16b,v19.16b}, [%1], #64 \n" // load 8 "uadalp v0.8h, v16.16b \n" // B 16 bytes -> 8 shorts. "uadalp v1.8h, v17.16b \n" // G 16 bytes -> 8 shorts. "uadalp v2.8h, v18.16b \n" // R 16 bytes -> 8 shorts. "uadalp v3.8h, v19.16b \n" // A 16 bytes -> 8 shorts. "rshrn v0.8b, v0.8h, #2 \n" // round and pack "rshrn v1.8b, v1.8h, #2 \n" "rshrn v2.8b, v2.8h, #2 \n" "rshrn v3.8b, v3.8h, #2 \n" "st4 {v0.8b,v1.8b,v2.8b,v3.8b}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : : "memory", "cc", "v0", "v1", "v2", "v3", "v16", "v17", "v18", "v19"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. void ScaleARGBRowDownEven_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { (void)src_stride; asm volatile( "1: \n" "ld1 {v0.s}[0], [%0], %3 \n" "ld1 {v0.s}[1], [%0], %3 \n" "ld1 {v0.s}[2], [%0], %3 \n" "ld1 {v0.s}[3], [%0], %3 \n" "subs %w2, %w2, #4 \n" // 4 pixels per loop. "st1 {v0.16b}, [%1], #16 \n" "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(dst_argb), // %1 "+r"(dst_width) // %2 : "r"((int64_t)(src_stepx * 4)) // %3 : "memory", "cc", "v0"); } // Reads 4 pixels at a time. // Alignment requirement: src_argb 4 byte aligned. // TODO(Yang Zhang): Might be worth another optimization pass in future. // It could be upgraded to 8 pixels at a time to start with. void ScaleARGBRowDownEvenBox_NEON(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { asm volatile( "add %1, %1, %0 \n" "1: \n" "ld1 {v0.8b}, [%0], %4 \n" // Read 4 2x2 -> 2x1 "ld1 {v1.8b}, [%1], %4 \n" "ld1 {v2.8b}, [%0], %4 \n" "ld1 {v3.8b}, [%1], %4 \n" "ld1 {v4.8b}, [%0], %4 \n" "ld1 {v5.8b}, [%1], %4 \n" "ld1 {v6.8b}, [%0], %4 \n" "ld1 {v7.8b}, [%1], %4 \n" "uaddl v0.8h, v0.8b, v1.8b \n" "uaddl v2.8h, v2.8b, v3.8b \n" "uaddl v4.8h, v4.8b, v5.8b \n" "uaddl v6.8h, v6.8b, v7.8b \n" "mov v16.d[1], v0.d[1] \n" // ab_cd -> ac_bd "mov v0.d[1], v2.d[0] \n" "mov v2.d[0], v16.d[1] \n" "mov v16.d[1], v4.d[1] \n" // ef_gh -> eg_fh "mov v4.d[1], v6.d[0] \n" "mov v6.d[0], v16.d[1] \n" "add v0.8h, v0.8h, v2.8h \n" // (a+b)_(c+d) "add v4.8h, v4.8h, v6.8h \n" // (e+f)_(g+h) "rshrn v0.8b, v0.8h, #2 \n" // first 2 pixels. "rshrn2 v0.16b, v4.8h, #2 \n" // next 2 pixels. "subs %w3, %w3, #4 \n" // 4 pixels per loop. "st1 {v0.16b}, [%2], #16 \n" "b.gt 1b \n" : "+r"(src_argb), // %0 "+r"(src_stride), // %1 "+r"(dst_argb), // %2 "+r"(dst_width) // %3 : "r"((int64_t)(src_stepx * 4)) // %4 : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16"); } // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD1_DATA32_LANE(vn, n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5, lsl #2 \n" \ "add %3, %3, %4 \n" \ "ld1 {" #vn ".s}[" #n "], [%6] \n" void ScaleARGBCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { const uint8_t* src_tmp = src_argb; int64_t x64 = (int64_t)x; // NOLINT int64_t dx64 = (int64_t)dx; // NOLINT int64_t tmp64; asm volatile( "1: \n" // clang-format off LOAD1_DATA32_LANE(v0, 0) LOAD1_DATA32_LANE(v0, 1) LOAD1_DATA32_LANE(v0, 2) LOAD1_DATA32_LANE(v0, 3) LOAD1_DATA32_LANE(v1, 0) LOAD1_DATA32_LANE(v1, 1) LOAD1_DATA32_LANE(v1, 2) LOAD1_DATA32_LANE(v1, 3) // clang-format on "st1 {v0.4s, v1.4s}, [%0], #32 \n" // store pixels "subs %w2, %w2, #8 \n" // 8 processed per loop "b.gt 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+r"(dst_width), // %2 "+r"(x64), // %3 "+r"(dx64), // %4 "=&r"(tmp64), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "v0", "v1"); } #undef LOAD1_DATA32_LANE // TODO(Yang Zhang): Investigate less load instructions for // the x/dx stepping #define LOAD2_DATA32_LANE(vn1, vn2, n) \ "lsr %5, %3, #16 \n" \ "add %6, %1, %5, lsl #2 \n" \ "add %3, %3, %4 \n" \ "ld2 {" #vn1 ".s, " #vn2 ".s}[" #n "], [%6] \n" void ScaleARGBFilterCols_NEON(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { int dx_offset[4] = {0, 1, 2, 3}; int* tmp = dx_offset; const uint8_t* src_tmp = src_argb; int64_t x64 = (int64_t)x; // NOLINT int64_t dx64 = (int64_t)dx; // NOLINT asm volatile ( "dup v0.4s, %w3 \n" // x "dup v1.4s, %w4 \n" // dx "ld1 {v2.4s}, [%5] \n" // 0 1 2 3 "shl v6.4s, v1.4s, #2 \n" // 4 * dx "mul v1.4s, v1.4s, v2.4s \n" "movi v3.16b, #0x7f \n" // 0x7F "movi v4.8h, #0x7f \n" // 0x7F // x , x + 1 * dx, x + 2 * dx, x + 3 * dx "add v5.4s, v1.4s, v0.4s \n" "1: \n" // d0, d1: a // d2, d3: b LOAD2_DATA32_LANE(v0, v1, 0) LOAD2_DATA32_LANE(v0, v1, 1) LOAD2_DATA32_LANE(v0, v1, 2) LOAD2_DATA32_LANE(v0, v1, 3) "shrn v2.4h, v5.4s, #9 \n" "and v2.8b, v2.8b, v4.8b \n" "dup v16.8b, v2.b[0] \n" "dup v17.8b, v2.b[2] \n" "dup v18.8b, v2.b[4] \n" "dup v19.8b, v2.b[6] \n" "ext v2.8b, v16.8b, v17.8b, #4 \n" "ext v17.8b, v18.8b, v19.8b, #4 \n" "ins v2.d[1], v17.d[0] \n" // f "eor v7.16b, v2.16b, v3.16b \n" // 0x7f ^ f "umull v16.8h, v0.8b, v7.8b \n" "umull2 v17.8h, v0.16b, v7.16b \n" "umull v18.8h, v1.8b, v2.8b \n" "umull2 v19.8h, v1.16b, v2.16b \n" "add v16.8h, v16.8h, v18.8h \n" "add v17.8h, v17.8h, v19.8h \n" "shrn v0.8b, v16.8h, #7 \n" "shrn2 v0.16b, v17.8h, #7 \n" "st1 {v0.4s}, [%0], #16 \n" // store pixels "add v5.4s, v5.4s, v6.4s \n" "subs %w2, %w2, #4 \n" // 4 processed per loop "b.gt 1b \n" : "+r"(dst_argb), // %0 "+r"(src_argb), // %1 "+r"(dst_width), // %2 "+r"(x64), // %3 "+r"(dx64), // %4 "+r"(tmp), // %5 "+r"(src_tmp) // %6 : : "memory", "cc", "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19" ); } #undef LOAD2_DATA32_LANE // Read 16x2 average down and write 8x1. void ScaleRowDown2Box_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { asm volatile( // change the stride to row 2 pointer "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 "1: \n" "ld1 {v0.8h, v1.8h}, [%0], #32 \n" // load row 1 and post inc "ld1 {v2.8h, v3.8h}, [%1], #32 \n" // load row 2 and post inc "subs %w3, %w3, #8 \n" // 8 processed per loop "uaddlp v0.4s, v0.8h \n" // row 1 add adjacent "uaddlp v1.4s, v1.8h \n" "uadalp v0.4s, v2.8h \n" // +row 2 add adjacent "uadalp v1.4s, v3.8h \n" "rshrn v0.4h, v0.4s, #2 \n" // round and pack "rshrn2 v0.8h, v1.4s, #2 \n" "st1 {v0.8h}, [%2], #16 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : : "v0", "v1", "v2", "v3" // Clobber List ); } // Read 8x2 upsample with filtering and write 16x1. // Actually reads an extra pixel, so 9x2. void ScaleRowUp2_16_NEON(const uint16_t* src_ptr, ptrdiff_t src_stride, uint16_t* dst, int dst_width) { asm volatile( "add %1, %0, %1, lsl #1 \n" // ptr + stide * 2 "movi v0.8h, #9 \n" // constants "movi v1.4s, #3 \n" "1: \n" "ld1 {v3.8h}, [%0], %4 \n" // TL read first 8 "ld1 {v4.8h}, [%0], %5 \n" // TR read 8 offset by 1 "ld1 {v5.8h}, [%1], %4 \n" // BL read 8 from next row "ld1 {v6.8h}, [%1], %5 \n" // BR offset by 1 "subs %w3, %w3, #16 \n" // 16 dst pixels per loop "umull v16.4s, v3.4h, v0.4h \n" "umull2 v7.4s, v3.8h, v0.8h \n" "umull v18.4s, v4.4h, v0.4h \n" "umull2 v17.4s, v4.8h, v0.8h \n" "uaddw v16.4s, v16.4s, v6.4h \n" "uaddl2 v19.4s, v6.8h, v3.8h \n" "uaddl v3.4s, v6.4h, v3.4h \n" "uaddw2 v6.4s, v7.4s, v6.8h \n" "uaddl2 v7.4s, v5.8h, v4.8h \n" "uaddl v4.4s, v5.4h, v4.4h \n" "uaddw v18.4s, v18.4s, v5.4h \n" "mla v16.4s, v4.4s, v1.4s \n" "mla v18.4s, v3.4s, v1.4s \n" "mla v6.4s, v7.4s, v1.4s \n" "uaddw2 v4.4s, v17.4s, v5.8h \n" "uqrshrn v16.4h, v16.4s, #4 \n" "mla v4.4s, v19.4s, v1.4s \n" "uqrshrn2 v16.8h, v6.4s, #4 \n" "uqrshrn v17.4h, v18.4s, #4 \n" "uqrshrn2 v17.8h, v4.4s, #4 \n" "st2 {v16.8h-v17.8h}, [%2], #32 \n" "b.gt 1b \n" : "+r"(src_ptr), // %0 "+r"(src_stride), // %1 "+r"(dst), // %2 "+r"(dst_width) // %3 : "r"(2LL), // %4 "r"(14LL) // %5 : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v7", "v16", "v17", "v18", "v19" // Clobber List ); } #endif // !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/scale_win.cc000066400000000000000000001277401357355204000224630ustar00rootroot00000000000000/* * Copyright 2013 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/row.h" #include "libyuv/scale_row.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif // This module is for 32 bit Visual C x86 and clangcl #if !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) && defined(_MSC_VER) // Offsets for source bytes 0 to 9 static const uvec8 kShuf0 = {0, 1, 3, 4, 5, 7, 8, 9, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 11 to 20 with 8 subtracted = 3 to 12. static const uvec8 kShuf1 = {3, 4, 5, 7, 8, 9, 11, 12, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. static const uvec8 kShuf2 = {5, 7, 8, 9, 11, 12, 13, 15, 128, 128, 128, 128, 128, 128, 128, 128}; // Offsets for source bytes 0 to 10 static const uvec8 kShuf01 = {0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10}; // Offsets for source bytes 10 to 21 with 8 subtracted = 3 to 13. static const uvec8 kShuf11 = {2, 3, 4, 5, 5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13}; // Offsets for source bytes 21 to 31 with 16 subtracted = 5 to 31. static const uvec8 kShuf21 = {5, 6, 6, 7, 8, 9, 9, 10, 10, 11, 12, 13, 13, 14, 14, 15}; // Coefficients for source bytes 0 to 10 static const uvec8 kMadd01 = {3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2}; // Coefficients for source bytes 10 to 21 static const uvec8 kMadd11 = {1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1}; // Coefficients for source bytes 21 to 31 static const uvec8 kMadd21 = {2, 2, 1, 3, 3, 1, 2, 2, 1, 3, 3, 1, 2, 2, 1, 3}; // Coefficients for source bytes 21 to 31 static const vec16 kRound34 = {2, 2, 2, 2, 2, 2, 2, 2}; static const uvec8 kShuf38a = {0, 3, 6, 8, 11, 14, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; static const uvec8 kShuf38b = {128, 128, 128, 128, 128, 128, 0, 3, 6, 8, 11, 14, 128, 128, 128, 128}; // Arrange words 0,3,6 into 0,1,2 static const uvec8 kShufAc = {0, 1, 6, 7, 12, 13, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128}; // Arrange words 0,3,6 into 3,4,5 static const uvec8 kShufAc3 = {128, 128, 128, 128, 128, 128, 0, 1, 6, 7, 12, 13, 128, 128, 128, 128}; // Scaling values for boxes of 3x3 and 2x3 static const uvec16 kScaleAc33 = {65536 / 9, 65536 / 9, 65536 / 6, 65536 / 9, 65536 / 9, 65536 / 6, 0, 0}; // Arrange first value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb0 = {0, 128, 3, 128, 6, 128, 8, 128, 11, 128, 14, 128, 128, 128, 128, 128}; // Arrange second value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb1 = {1, 128, 4, 128, 7, 128, 9, 128, 12, 128, 15, 128, 128, 128, 128, 128}; // Arrange third value for pixels 0,1,2,3,4,5 static const uvec8 kShufAb2 = {2, 128, 5, 128, 128, 128, 10, 128, 13, 128, 128, 128, 128, 128, 128, 128}; // Scaling values for boxes of 3x2 and 2x2 static const uvec16 kScaleAb2 = {65536 / 3, 65536 / 3, 65536 / 2, 65536 / 3, 65536 / 3, 65536 / 2, 0, 0}; // Reads 32 pixels, throws half away and writes 16 pixels. __declspec(naked) void ScaleRowDown2_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] psrlw xmm0, 8 // isolate odd pixels. psrlw xmm1, 8 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg wloop ret } } // Blends 32x1 rectangle to 16x1. __declspec(naked) void ScaleRowDown2Linear_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 packuswb xmm4, xmm4 pxor xmm5, xmm5 // constant 0 wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 pavgw xmm0, xmm5 // (x + 1) / 2 pavgw xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg wloop ret } } // Blends 32x2 rectangle to 16x1. __declspec(naked) void ScaleRowDown2Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 packuswb xmm4, xmm4 pxor xmm5, xmm5 // constant 0 wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 paddw xmm0, xmm2 // vertical add paddw xmm1, xmm3 psrlw xmm0, 1 psrlw xmm1, 1 pavgw xmm0, xmm5 // (x + 1) / 2 pavgw xmm1, xmm5 packuswb xmm0, xmm1 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg wloop pop esi ret } } #ifdef HAS_SCALEROWDOWN2_AVX2 // Reads 64 pixels, throws half away and writes 32 pixels. __declspec(naked) void ScaleRowDown2_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpsrlw ymm0, ymm0, 8 // isolate odd pixels. vpsrlw ymm1, ymm1, 8 vpackuswb ymm0, ymm0, ymm1 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg wloop vzeroupper ret } } // Blends 64x1 rectangle to 32x1. __declspec(naked) void ScaleRowDown2Linear_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b vpsrlw ymm4, ymm4, 15 vpackuswb ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 // constant 0 wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 vpavgw ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg wloop vzeroupper ret } } // For rounding, average = (sum + 2) / 4 // becomes average((sum >> 1), 0) // Blends 64x2 rectangle to 32x1. __declspec(naked) void ScaleRowDown2Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width vpcmpeqb ymm4, ymm4, ymm4 // '1' constant, 8b vpsrlw ymm4, ymm4, 15 vpackuswb ymm4, ymm4, ymm4 vpxor ymm5, ymm5, ymm5 // constant 0 wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + esi] vmovdqu ymm3, [eax + esi + 32] lea eax, [eax + 64] vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 vpaddw ymm0, ymm0, ymm2 // vertical add vpaddw ymm1, ymm1, ymm3 vpsrlw ymm0, ymm0, 1 // (x + 2) / 4 = (x / 2 + 1) / 2 vpsrlw ymm1, ymm1, 1 vpavgw ymm0, ymm0, ymm5 // (x + 1) / 2 vpavgw ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], ymm0 lea edx, [edx + 32] sub ecx, 32 jg wloop pop esi vzeroupper ret } } #endif // HAS_SCALEROWDOWN2_AVX2 // Point samples 32 pixels to 8 pixels. __declspec(naked) void ScaleRowDown4_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width pcmpeqb xmm5, xmm5 // generate mask 0x00ff0000 psrld xmm5, 24 pslld xmm5, 16 wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] pand xmm0, xmm5 pand xmm1, xmm5 packuswb xmm0, xmm1 psrlw xmm0, 8 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 8 jg wloop ret } } // Blends 32x4 rectangle to 8x1. __declspec(naked) void ScaleRowDown4Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_ptr mov esi, [esp + 8 + 8] // src_stride mov edx, [esp + 8 + 12] // dst_ptr mov ecx, [esp + 8 + 16] // dst_width lea edi, [esi + esi * 2] // src_stride * 3 pcmpeqb xmm4, xmm4 // constant 0x0101 psrlw xmm4, 15 movdqa xmm5, xmm4 packuswb xmm4, xmm4 psllw xmm5, 3 // constant 0x0008 wloop: movdqu xmm0, [eax] // average rows movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] pmaddubsw xmm0, xmm4 // horizontal add pmaddubsw xmm1, xmm4 pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 paddw xmm0, xmm2 // vertical add rows 0, 1 paddw xmm1, xmm3 movdqu xmm2, [eax + esi * 2] movdqu xmm3, [eax + esi * 2 + 16] pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 paddw xmm0, xmm2 // add row 2 paddw xmm1, xmm3 movdqu xmm2, [eax + edi] movdqu xmm3, [eax + edi + 16] lea eax, [eax + 32] pmaddubsw xmm2, xmm4 pmaddubsw xmm3, xmm4 paddw xmm0, xmm2 // add row 3 paddw xmm1, xmm3 phaddw xmm0, xmm1 paddw xmm0, xmm5 // + 8 for round psrlw xmm0, 4 // /16 for average of 4 * 4 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 lea edx, [edx + 8] sub ecx, 8 jg wloop pop edi pop esi ret } } #ifdef HAS_SCALEROWDOWN4_AVX2 // Point samples 64 pixels to 16 pixels. __declspec(naked) void ScaleRowDown4_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width vpcmpeqb ymm5, ymm5, ymm5 // generate mask 0x00ff0000 vpsrld ymm5, ymm5, 24 vpslld ymm5, ymm5, 16 wloop: vmovdqu ymm0, [eax] vmovdqu ymm1, [eax + 32] lea eax, [eax + 64] vpand ymm0, ymm0, ymm5 vpand ymm1, ymm1, ymm5 vpackuswb ymm0, ymm0, ymm1 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vpsrlw ymm0, ymm0, 8 vpackuswb ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg wloop vzeroupper ret } } // Blends 64x4 rectangle to 16x1. __declspec(naked) void ScaleRowDown4Box_AVX2(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi push edi mov eax, [esp + 8 + 4] // src_ptr mov esi, [esp + 8 + 8] // src_stride mov edx, [esp + 8 + 12] // dst_ptr mov ecx, [esp + 8 + 16] // dst_width lea edi, [esi + esi * 2] // src_stride * 3 vpcmpeqb ymm4, ymm4, ymm4 // constant 0x0101 vpsrlw ymm4, ymm4, 15 vpsllw ymm5, ymm4, 3 // constant 0x0008 vpackuswb ymm4, ymm4, ymm4 wloop: vmovdqu ymm0, [eax] // average rows vmovdqu ymm1, [eax + 32] vmovdqu ymm2, [eax + esi] vmovdqu ymm3, [eax + esi + 32] vpmaddubsw ymm0, ymm0, ymm4 // horizontal add vpmaddubsw ymm1, ymm1, ymm4 vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 vpaddw ymm0, ymm0, ymm2 // vertical add rows 0, 1 vpaddw ymm1, ymm1, ymm3 vmovdqu ymm2, [eax + esi * 2] vmovdqu ymm3, [eax + esi * 2 + 32] vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 vpaddw ymm0, ymm0, ymm2 // add row 2 vpaddw ymm1, ymm1, ymm3 vmovdqu ymm2, [eax + edi] vmovdqu ymm3, [eax + edi + 32] lea eax, [eax + 64] vpmaddubsw ymm2, ymm2, ymm4 vpmaddubsw ymm3, ymm3, ymm4 vpaddw ymm0, ymm0, ymm2 // add row 3 vpaddw ymm1, ymm1, ymm3 vphaddw ymm0, ymm0, ymm1 // mutates vpermq ymm0, ymm0, 0xd8 // unmutate vphaddw vpaddw ymm0, ymm0, ymm5 // + 8 for round vpsrlw ymm0, ymm0, 4 // /32 for average of 4 * 4 vpackuswb ymm0, ymm0, ymm0 vpermq ymm0, ymm0, 0xd8 // unmutate vpackuswb vmovdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 16 jg wloop pop edi pop esi vzeroupper ret } } #endif // HAS_SCALEROWDOWN4_AVX2 // Point samples 32 pixels to 24 pixels. // Produces three 8 byte values. For each 8 bytes, 16 bytes are read. // Then shuffled to do the scaling. __declspec(naked) void ScaleRowDown34_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width movdqa xmm3, xmmword ptr kShuf0 movdqa xmm4, xmmword ptr kShuf1 movdqa xmm5, xmmword ptr kShuf2 wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] movdqa xmm2, xmm1 palignr xmm1, xmm0, 8 pshufb xmm0, xmm3 pshufb xmm1, xmm4 pshufb xmm2, xmm5 movq qword ptr [edx], xmm0 movq qword ptr [edx + 8], xmm1 movq qword ptr [edx + 16], xmm2 lea edx, [edx + 24] sub ecx, 24 jg wloop ret } } // Blends 32x2 rectangle to 24x1 // Produces three 8 byte values. For each 8 bytes, 16 bytes are read. // Then shuffled to do the scaling. // Register usage: // xmm0 src_row 0 // xmm1 src_row 1 // xmm2 shuf 0 // xmm3 shuf 1 // xmm4 shuf 2 // xmm5 madd 0 // xmm6 madd 1 // xmm7 kRound34 // Note that movdqa+palign may be better than movdqu. __declspec(naked) void ScaleRowDown34_1_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShuf01 movdqa xmm3, xmmword ptr kShuf11 movdqa xmm4, xmmword ptr kShuf21 movdqa xmm5, xmmword ptr kMadd01 movdqa xmm6, xmmword ptr kMadd11 movdqa xmm7, xmmword ptr kRound34 wloop: movdqu xmm0, [eax] // pixels 0..7 movdqu xmm1, [eax + esi] pavgb xmm0, xmm1 pshufb xmm0, xmm2 pmaddubsw xmm0, xmm5 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 movdqu xmm0, [eax + 8] // pixels 8..15 movdqu xmm1, [eax + esi + 8] pavgb xmm0, xmm1 pshufb xmm0, xmm3 pmaddubsw xmm0, xmm6 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 8], xmm0 movdqu xmm0, [eax + 16] // pixels 16..23 movdqu xmm1, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm0, xmm1 pshufb xmm0, xmm4 movdqa xmm1, xmmword ptr kMadd21 pmaddubsw xmm0, xmm1 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 16], xmm0 lea edx, [edx + 24] sub ecx, 24 jg wloop pop esi ret } } // Note that movdqa+palign may be better than movdqu. __declspec(naked) void ScaleRowDown34_0_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShuf01 movdqa xmm3, xmmword ptr kShuf11 movdqa xmm4, xmmword ptr kShuf21 movdqa xmm5, xmmword ptr kMadd01 movdqa xmm6, xmmword ptr kMadd11 movdqa xmm7, xmmword ptr kRound34 wloop: movdqu xmm0, [eax] // pixels 0..7 movdqu xmm1, [eax + esi] pavgb xmm1, xmm0 pavgb xmm0, xmm1 pshufb xmm0, xmm2 pmaddubsw xmm0, xmm5 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx], xmm0 movdqu xmm0, [eax + 8] // pixels 8..15 movdqu xmm1, [eax + esi + 8] pavgb xmm1, xmm0 pavgb xmm0, xmm1 pshufb xmm0, xmm3 pmaddubsw xmm0, xmm6 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 8], xmm0 movdqu xmm0, [eax + 16] // pixels 16..23 movdqu xmm1, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm1, xmm0 pavgb xmm0, xmm1 pshufb xmm0, xmm4 movdqa xmm1, xmmword ptr kMadd21 pmaddubsw xmm0, xmm1 paddsw xmm0, xmm7 psrlw xmm0, 2 packuswb xmm0, xmm0 movq qword ptr [edx + 16], xmm0 lea edx, [edx+24] sub ecx, 24 jg wloop pop esi ret } } // 3/8 point sampler // Scale 32 pixels to 12 __declspec(naked) void ScaleRowDown38_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { mov eax, [esp + 4] // src_ptr // src_stride ignored mov edx, [esp + 12] // dst_ptr mov ecx, [esp + 16] // dst_width movdqa xmm4, xmmword ptr kShuf38a movdqa xmm5, xmmword ptr kShuf38b xloop: movdqu xmm0, [eax] // 16 pixels -> 0,1,2,3,4,5 movdqu xmm1, [eax + 16] // 16 pixels -> 6,7,8,9,10,11 lea eax, [eax + 32] pshufb xmm0, xmm4 pshufb xmm1, xmm5 paddusb xmm0, xmm1 movq qword ptr [edx], xmm0 // write 12 pixels movhlps xmm1, xmm0 movd [edx + 8], xmm1 lea edx, [edx + 12] sub ecx, 12 jg xloop ret } } // Scale 16x3 pixels to 6x1 with interpolation __declspec(naked) void ScaleRowDown38_3_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShufAc movdqa xmm3, xmmword ptr kShufAc3 movdqa xmm4, xmmword ptr kScaleAc33 pxor xmm5, xmm5 xloop: movdqu xmm0, [eax] // sum up 3 rows into xmm0/1 movdqu xmm6, [eax + esi] movhlps xmm1, xmm0 movhlps xmm7, xmm6 punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 punpcklbw xmm6, xmm5 punpcklbw xmm7, xmm5 paddusw xmm0, xmm6 paddusw xmm1, xmm7 movdqu xmm6, [eax + esi * 2] lea eax, [eax + 16] movhlps xmm7, xmm6 punpcklbw xmm6, xmm5 punpcklbw xmm7, xmm5 paddusw xmm0, xmm6 paddusw xmm1, xmm7 movdqa xmm6, xmm0 // 8 pixels -> 0,1,2 of xmm6 psrldq xmm0, 2 paddusw xmm6, xmm0 psrldq xmm0, 2 paddusw xmm6, xmm0 pshufb xmm6, xmm2 movdqa xmm7, xmm1 // 8 pixels -> 3,4,5 of xmm6 psrldq xmm1, 2 paddusw xmm7, xmm1 psrldq xmm1, 2 paddusw xmm7, xmm1 pshufb xmm7, xmm3 paddusw xmm6, xmm7 pmulhuw xmm6, xmm4 // divide by 9,9,6, 9,9,6 packuswb xmm6, xmm6 movd [edx], xmm6 // write 6 pixels psrlq xmm6, 16 movd [edx + 2], xmm6 lea edx, [edx + 6] sub ecx, 6 jg xloop pop esi ret } } // Scale 16x2 pixels to 6x1 with interpolation __declspec(naked) void ScaleRowDown38_2_Box_SSSE3(const uint8_t* src_ptr, ptrdiff_t src_stride, uint8_t* dst_ptr, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_ptr mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_ptr mov ecx, [esp + 4 + 16] // dst_width movdqa xmm2, xmmword ptr kShufAb0 movdqa xmm3, xmmword ptr kShufAb1 movdqa xmm4, xmmword ptr kShufAb2 movdqa xmm5, xmmword ptr kScaleAb2 xloop: movdqu xmm0, [eax] // average 2 rows into xmm0 movdqu xmm1, [eax + esi] lea eax, [eax + 16] pavgb xmm0, xmm1 movdqa xmm1, xmm0 // 16 pixels -> 0,1,2,3,4,5 of xmm1 pshufb xmm1, xmm2 movdqa xmm6, xmm0 pshufb xmm6, xmm3 paddusw xmm1, xmm6 pshufb xmm0, xmm4 paddusw xmm1, xmm0 pmulhuw xmm1, xmm5 // divide by 3,3,2, 3,3,2 packuswb xmm1, xmm1 movd [edx], xmm1 // write 6 pixels psrlq xmm1, 16 movd [edx + 2], xmm1 lea edx, [edx + 6] sub ecx, 6 jg xloop pop esi ret } } // Reads 16 bytes and accumulates to 16 shorts at a time. __declspec(naked) void ScaleAddRow_SSE2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { __asm { mov eax, [esp + 4] // src_ptr mov edx, [esp + 8] // dst_ptr mov ecx, [esp + 12] // src_width pxor xmm5, xmm5 // sum rows xloop: movdqu xmm3, [eax] // read 16 bytes lea eax, [eax + 16] movdqu xmm0, [edx] // read 16 words from destination movdqu xmm1, [edx + 16] movdqa xmm2, xmm3 punpcklbw xmm2, xmm5 punpckhbw xmm3, xmm5 paddusw xmm0, xmm2 // sum 16 words paddusw xmm1, xmm3 movdqu [edx], xmm0 // write 16 words to destination movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 16 jg xloop ret } } #ifdef HAS_SCALEADDROW_AVX2 // Reads 32 bytes and accumulates to 32 shorts at a time. __declspec(naked) void ScaleAddRow_AVX2(const uint8_t* src_ptr, uint16_t* dst_ptr, int src_width) { __asm { mov eax, [esp + 4] // src_ptr mov edx, [esp + 8] // dst_ptr mov ecx, [esp + 12] // src_width vpxor ymm5, ymm5, ymm5 // sum rows xloop: vmovdqu ymm3, [eax] // read 32 bytes lea eax, [eax + 32] vpermq ymm3, ymm3, 0xd8 // unmutate for vpunpck vpunpcklbw ymm2, ymm3, ymm5 vpunpckhbw ymm3, ymm3, ymm5 vpaddusw ymm0, ymm2, [edx] // sum 16 words vpaddusw ymm1, ymm3, [edx + 32] vmovdqu [edx], ymm0 // write 32 words to destination vmovdqu [edx + 32], ymm1 lea edx, [edx + 64] sub ecx, 32 jg xloop vzeroupper ret } } #endif // HAS_SCALEADDROW_AVX2 // Constant for making pixels signed to avoid pmaddubsw // saturation. static const uvec8 kFsub80 = {0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}; // Constant for making pixels unsigned and adding .5 for rounding. static const uvec16 kFadd40 = {0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040, 0x4040}; // Bilinear column filtering. SSSE3 version. __declspec(naked) void ScaleFilterCols_SSSE3(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { __asm { push ebx push esi push edi mov edi, [esp + 12 + 4] // dst_ptr mov esi, [esp + 12 + 8] // src_ptr mov ecx, [esp + 12 + 12] // dst_width movd xmm2, [esp + 12 + 16] // x movd xmm3, [esp + 12 + 20] // dx mov eax, 0x04040000 // shuffle to line up fractions with pixel. movd xmm5, eax pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. psrlw xmm6, 9 pcmpeqb xmm7, xmm7 // generate 0x0001 psrlw xmm7, 15 pextrw eax, xmm2, 1 // get x0 integer. preroll sub ecx, 2 jl xloop29 movdqa xmm0, xmm2 // x1 = x0 + dx paddd xmm0, xmm3 punpckldq xmm2, xmm0 // x0 x1 punpckldq xmm3, xmm3 // dx dx paddd xmm3, xmm3 // dx * 2, dx * 2 pextrw edx, xmm2, 3 // get x1 integer. preroll // 2 Pixel loop. xloop2: movdqa xmm1, xmm2 // x0, x1 fractions. paddd xmm2, xmm3 // x += dx movzx ebx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx psrlw xmm1, 9 // 7 bit fractions. movzx ebx, word ptr [esi + edx] // 2 source x1 pixels movd xmm4, ebx pshufb xmm1, xmm5 // 0011 punpcklwd xmm0, xmm4 psubb xmm0, xmmword ptr kFsub80 // make pixels signed. pxor xmm1, xmm6 // 0..7f and 7f..0 paddusb xmm1, xmm7 // +1 so 0..7f and 80..1 pmaddubsw xmm1, xmm0 // 16 bit, 2 pixels. pextrw eax, xmm2, 1 // get x0 integer. next iteration. pextrw edx, xmm2, 3 // get x1 integer. next iteration. paddw xmm1, xmmword ptr kFadd40 // make pixels unsigned and round. psrlw xmm1, 7 // 8.7 fixed point to low 8 bits. packuswb xmm1, xmm1 // 8 bits, 2 pixels. movd ebx, xmm1 mov [edi], bx lea edi, [edi + 2] sub ecx, 2 // 2 pixels jge xloop2 xloop29: add ecx, 2 - 1 jl xloop99 // 1 pixel remainder movzx ebx, word ptr [esi + eax] // 2 source x0 pixels movd xmm0, ebx psrlw xmm2, 9 // 7 bit fractions. pshufb xmm2, xmm5 // 0011 psubb xmm0, xmmword ptr kFsub80 // make pixels signed. pxor xmm2, xmm6 // 0..7f and 7f..0 paddusb xmm2, xmm7 // +1 so 0..7f and 80..1 pmaddubsw xmm2, xmm0 // 16 bit paddw xmm2, xmmword ptr kFadd40 // make pixels unsigned and round. psrlw xmm2, 7 // 8.7 fixed point to low 8 bits. packuswb xmm2, xmm2 // 8 bits movd ebx, xmm2 mov [edi], bl xloop99: pop edi pop esi pop ebx ret } } // Reads 16 pixels, duplicates them and writes 32 pixels. __declspec(naked) void ScaleColsUp2_SSE2(uint8_t* dst_ptr, const uint8_t* src_ptr, int dst_width, int x, int dx) { __asm { mov edx, [esp + 4] // dst_ptr mov eax, [esp + 8] // src_ptr mov ecx, [esp + 12] // dst_width wloop: movdqu xmm0, [eax] lea eax, [eax + 16] movdqa xmm1, xmm0 punpcklbw xmm0, xmm0 punpckhbw xmm1, xmm1 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 32 jg wloop ret } } // Reads 8 pixels, throws half away and writes 4 even pixels (0, 2, 4, 6) __declspec(naked) void ScaleARGBRowDown2_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { __asm { mov eax, [esp + 4] // src_argb // src_stride ignored mov edx, [esp + 12] // dst_argb mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] shufps xmm0, xmm1, 0xdd movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg wloop ret } } // Blends 8x1 rectangle to 4x1. __declspec(naked) void ScaleARGBRowDown2Linear_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { __asm { mov eax, [esp + 4] // src_argb // src_stride ignored mov edx, [esp + 12] // dst_argb mov ecx, [esp + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] lea eax, [eax + 32] movdqa xmm2, xmm0 shufps xmm0, xmm1, 0x88 // even pixels shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg wloop ret } } // Blends 8x2 rectangle to 4x1. __declspec(naked) void ScaleARGBRowDown2Box_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, uint8_t* dst_argb, int dst_width) { __asm { push esi mov eax, [esp + 4 + 4] // src_argb mov esi, [esp + 4 + 8] // src_stride mov edx, [esp + 4 + 12] // dst_argb mov ecx, [esp + 4 + 16] // dst_width wloop: movdqu xmm0, [eax] movdqu xmm1, [eax + 16] movdqu xmm2, [eax + esi] movdqu xmm3, [eax + esi + 16] lea eax, [eax + 32] pavgb xmm0, xmm2 // average rows pavgb xmm1, xmm3 movdqa xmm2, xmm0 // average columns (8 to 4 pixels) shufps xmm0, xmm1, 0x88 // even pixels shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg wloop pop esi ret } } // Reads 4 pixels at a time. __declspec(naked) void ScaleARGBRowDownEven_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { __asm { push ebx push edi mov eax, [esp + 8 + 4] // src_argb // src_stride ignored mov ebx, [esp + 8 + 12] // src_stepx mov edx, [esp + 8 + 16] // dst_argb mov ecx, [esp + 8 + 20] // dst_width lea ebx, [ebx * 4] lea edi, [ebx + ebx * 2] wloop: movd xmm0, [eax] movd xmm1, [eax + ebx] punpckldq xmm0, xmm1 movd xmm2, [eax + ebx * 2] movd xmm3, [eax + edi] lea eax, [eax + ebx * 4] punpckldq xmm2, xmm3 punpcklqdq xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg wloop pop edi pop ebx ret } } // Blends four 2x2 to 4x1. __declspec(naked) void ScaleARGBRowDownEvenBox_SSE2(const uint8_t* src_argb, ptrdiff_t src_stride, int src_stepx, uint8_t* dst_argb, int dst_width) { __asm { push ebx push esi push edi mov eax, [esp + 12 + 4] // src_argb mov esi, [esp + 12 + 8] // src_stride mov ebx, [esp + 12 + 12] // src_stepx mov edx, [esp + 12 + 16] // dst_argb mov ecx, [esp + 12 + 20] // dst_width lea esi, [eax + esi] // row1 pointer lea ebx, [ebx * 4] lea edi, [ebx + ebx * 2] wloop: movq xmm0, qword ptr [eax] // row0 4 pairs movhps xmm0, qword ptr [eax + ebx] movq xmm1, qword ptr [eax + ebx * 2] movhps xmm1, qword ptr [eax + edi] lea eax, [eax + ebx * 4] movq xmm2, qword ptr [esi] // row1 4 pairs movhps xmm2, qword ptr [esi + ebx] movq xmm3, qword ptr [esi + ebx * 2] movhps xmm3, qword ptr [esi + edi] lea esi, [esi + ebx * 4] pavgb xmm0, xmm2 // average rows pavgb xmm1, xmm3 movdqa xmm2, xmm0 // average columns (8 to 4 pixels) shufps xmm0, xmm1, 0x88 // even pixels shufps xmm2, xmm1, 0xdd // odd pixels pavgb xmm0, xmm2 movdqu [edx], xmm0 lea edx, [edx + 16] sub ecx, 4 jg wloop pop edi pop esi pop ebx ret } } // Column scaling unfiltered. SSE2 version. __declspec(naked) void ScaleARGBCols_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { __asm { push edi push esi mov edi, [esp + 8 + 4] // dst_argb mov esi, [esp + 8 + 8] // src_argb mov ecx, [esp + 8 + 12] // dst_width movd xmm2, [esp + 8 + 16] // x movd xmm3, [esp + 8 + 20] // dx pshufd xmm2, xmm2, 0 // x0 x0 x0 x0 pshufd xmm0, xmm3, 0x11 // dx 0 dx 0 paddd xmm2, xmm0 paddd xmm3, xmm3 // 0, 0, 0, dx * 2 pshufd xmm0, xmm3, 0x05 // dx * 2, dx * 2, 0, 0 paddd xmm2, xmm0 // x3 x2 x1 x0 paddd xmm3, xmm3 // 0, 0, 0, dx * 4 pshufd xmm3, xmm3, 0 // dx * 4, dx * 4, dx * 4, dx * 4 pextrw eax, xmm2, 1 // get x0 integer. pextrw edx, xmm2, 3 // get x1 integer. cmp ecx, 0 jle xloop99 sub ecx, 4 jl xloop49 // 4 Pixel loop. xloop4: movd xmm0, [esi + eax * 4] // 1 source x0 pixels movd xmm1, [esi + edx * 4] // 1 source x1 pixels pextrw eax, xmm2, 5 // get x2 integer. pextrw edx, xmm2, 7 // get x3 integer. paddd xmm2, xmm3 // x += dx punpckldq xmm0, xmm1 // x0 x1 movd xmm1, [esi + eax * 4] // 1 source x2 pixels movd xmm4, [esi + edx * 4] // 1 source x3 pixels pextrw eax, xmm2, 1 // get x0 integer. next iteration. pextrw edx, xmm2, 3 // get x1 integer. next iteration. punpckldq xmm1, xmm4 // x2 x3 punpcklqdq xmm0, xmm1 // x0 x1 x2 x3 movdqu [edi], xmm0 lea edi, [edi + 16] sub ecx, 4 // 4 pixels jge xloop4 xloop49: test ecx, 2 je xloop29 // 2 Pixels. movd xmm0, [esi + eax * 4] // 1 source x0 pixels movd xmm1, [esi + edx * 4] // 1 source x1 pixels pextrw eax, xmm2, 5 // get x2 integer. punpckldq xmm0, xmm1 // x0 x1 movq qword ptr [edi], xmm0 lea edi, [edi + 8] xloop29: test ecx, 1 je xloop99 // 1 Pixels. movd xmm0, [esi + eax * 4] // 1 source x2 pixels movd dword ptr [edi], xmm0 xloop99: pop esi pop edi ret } } // Bilinear row filtering combines 2x1 -> 1x1. SSSE3 version. // TODO(fbarchard): Port to Neon // Shuffle table for arranging 2 pixels into pairs for pmaddubsw static const uvec8 kShuffleColARGB = { 0u, 4u, 1u, 5u, 2u, 6u, 3u, 7u, // bbggrraa 1st pixel 8u, 12u, 9u, 13u, 10u, 14u, 11u, 15u // bbggrraa 2nd pixel }; // Shuffle table for duplicating 2 fractions into 8 bytes each static const uvec8 kShuffleFractions = { 0u, 0u, 0u, 0u, 0u, 0u, 0u, 0u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, 4u, }; __declspec(naked) void ScaleARGBFilterCols_SSSE3(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { __asm { push esi push edi mov edi, [esp + 8 + 4] // dst_argb mov esi, [esp + 8 + 8] // src_argb mov ecx, [esp + 8 + 12] // dst_width movd xmm2, [esp + 8 + 16] // x movd xmm3, [esp + 8 + 20] // dx movdqa xmm4, xmmword ptr kShuffleColARGB movdqa xmm5, xmmword ptr kShuffleFractions pcmpeqb xmm6, xmm6 // generate 0x007f for inverting fraction. psrlw xmm6, 9 pextrw eax, xmm2, 1 // get x0 integer. preroll sub ecx, 2 jl xloop29 movdqa xmm0, xmm2 // x1 = x0 + dx paddd xmm0, xmm3 punpckldq xmm2, xmm0 // x0 x1 punpckldq xmm3, xmm3 // dx dx paddd xmm3, xmm3 // dx * 2, dx * 2 pextrw edx, xmm2, 3 // get x1 integer. preroll // 2 Pixel loop. xloop2: movdqa xmm1, xmm2 // x0, x1 fractions. paddd xmm2, xmm3 // x += dx movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels psrlw xmm1, 9 // 7 bit fractions. movhps xmm0, qword ptr [esi + edx * 4] // 2 source x1 pixels pshufb xmm1, xmm5 // 0000000011111111 pshufb xmm0, xmm4 // arrange pixels into pairs pxor xmm1, xmm6 // 0..7f and 7f..0 pmaddubsw xmm0, xmm1 // argb_argb 16 bit, 2 pixels. pextrw eax, xmm2, 1 // get x0 integer. next iteration. pextrw edx, xmm2, 3 // get x1 integer. next iteration. psrlw xmm0, 7 // argb 8.7 fixed point to low 8 bits. packuswb xmm0, xmm0 // argb_argb 8 bits, 2 pixels. movq qword ptr [edi], xmm0 lea edi, [edi + 8] sub ecx, 2 // 2 pixels jge xloop2 xloop29: add ecx, 2 - 1 jl xloop99 // 1 pixel remainder psrlw xmm2, 9 // 7 bit fractions. movq xmm0, qword ptr [esi + eax * 4] // 2 source x0 pixels pshufb xmm2, xmm5 // 00000000 pshufb xmm0, xmm4 // arrange pixels into pairs pxor xmm2, xmm6 // 0..7f and 7f..0 pmaddubsw xmm0, xmm2 // argb 16 bit, 1 pixel. psrlw xmm0, 7 packuswb xmm0, xmm0 // argb 8 bits, 1 pixel. movd [edi], xmm0 xloop99: pop edi pop esi ret } } // Reads 4 pixels, duplicates them and writes 8 pixels. __declspec(naked) void ScaleARGBColsUp2_SSE2(uint8_t* dst_argb, const uint8_t* src_argb, int dst_width, int x, int dx) { __asm { mov edx, [esp + 4] // dst_argb mov eax, [esp + 8] // src_argb mov ecx, [esp + 12] // dst_width wloop: movdqu xmm0, [eax] lea eax, [eax + 16] movdqa xmm1, xmm0 punpckldq xmm0, xmm0 punpckhdq xmm1, xmm1 movdqu [edx], xmm0 movdqu [edx + 16], xmm1 lea edx, [edx + 32] sub ecx, 8 jg wloop ret } } // Divide num by div and return as 16.16 fixed point result. __declspec(naked) int FixedDiv_X86(int num, int div) { __asm { mov eax, [esp + 4] // num cdq // extend num to 64 bits shld edx, eax, 16 // 32.16 shl eax, 16 idiv dword ptr [esp + 8] ret } } // Divide num by div and return as 16.16 fixed point result. __declspec(naked) int FixedDiv1_X86(int num, int div) { __asm { mov eax, [esp + 4] // num mov ecx, [esp + 8] // denom cdq // extend num to 64 bits shld edx, eax, 16 // 32.16 shl eax, 16 sub eax, 0x00010001 sbb edx, 0 sub ecx, 1 idiv ecx ret } } #endif // !defined(LIBYUV_DISABLE_X86) && defined(_M_IX86) #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/libyuv/source/video_common.cc000066400000000000000000000036461357355204000231730ustar00rootroot00000000000000/* * Copyright 2011 The LibYuv Project Authors. All rights reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "libyuv/video_common.h" #ifdef __cplusplus namespace libyuv { extern "C" { #endif struct FourCCAliasEntry { uint32_t alias; uint32_t canonical; }; #define NUM_ALIASES 18 static const struct FourCCAliasEntry kFourCCAliases[NUM_ALIASES] = { {FOURCC_IYUV, FOURCC_I420}, {FOURCC_YU12, FOURCC_I420}, {FOURCC_YU16, FOURCC_I422}, {FOURCC_YU24, FOURCC_I444}, {FOURCC_YUYV, FOURCC_YUY2}, {FOURCC_YUVS, FOURCC_YUY2}, // kCMPixelFormat_422YpCbCr8_yuvs {FOURCC_HDYC, FOURCC_UYVY}, {FOURCC_2VUY, FOURCC_UYVY}, // kCMPixelFormat_422YpCbCr8 {FOURCC_JPEG, FOURCC_MJPG}, // Note: JPEG has DHT while MJPG does not. {FOURCC_DMB1, FOURCC_MJPG}, {FOURCC_BA81, FOURCC_BGGR}, // deprecated. {FOURCC_RGB3, FOURCC_RAW}, {FOURCC_BGR3, FOURCC_24BG}, {FOURCC_CM32, FOURCC_BGRA}, // kCMPixelFormat_32ARGB {FOURCC_CM24, FOURCC_RAW}, // kCMPixelFormat_24RGB {FOURCC_L555, FOURCC_RGBO}, // kCMPixelFormat_16LE555 {FOURCC_L565, FOURCC_RGBP}, // kCMPixelFormat_16LE565 {FOURCC_5551, FOURCC_RGBO}, // kCMPixelFormat_16LE5551 }; // TODO(fbarchard): Consider mapping kCMPixelFormat_32BGRA to FOURCC_ARGB. // {FOURCC_BGRA, FOURCC_ARGB}, // kCMPixelFormat_32BGRA LIBYUV_API uint32_t CanonicalFourCC(uint32_t fourcc) { int i; for (i = 0; i < NUM_ALIASES; ++i) { if (kFourCCAliases[i].alias == fourcc) { return kFourCCAliases[i].canonical; } } // Not an alias, so return it as-is. return fourcc; } #ifdef __cplusplus } // extern "C" } // namespace libyuv #endif libvpx-1.8.2/third_party/x86inc/000077500000000000000000000000001357355204000165025ustar00rootroot00000000000000libvpx-1.8.2/third_party/x86inc/LICENSE000066400000000000000000000016501357355204000175110ustar00rootroot00000000000000Copyright (C) 2005-2012 x264 project Authors: Loren Merritt Anton Mitrofanov Jason Garrett-Glaser Henrik Gramner Permission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. libvpx-1.8.2/third_party/x86inc/README.libvpx000066400000000000000000000014731357355204000206720ustar00rootroot00000000000000URL: https://git.videolan.org/git/x264.git Version: d23d18655249944c1ca894b451e2c82c7a584c62 License: ISC License File: LICENSE Description: x264/libav's framework for x86 assembly. Contains a variety of macros and defines that help automatically allow assembly to work cross-platform. Local Modifications: Get configuration from vpx_config.asm. Prefix functions with vpx by default. Manage name mangling (prefixing with '_') manually because 'PREFIX' does not exist in libvpx. Expand PIC default to macho64 and respect CONFIG_PIC from libvpx Set 'private_extern' visibility for macho targets. Copy PIC 'GLOBAL' macros from x86_abi_support.asm Use .text instead of .rodata on macho to avoid broken tables in PIC mode. Use .text with no alignment for aout Only use 'hidden' visibility with Chromium Prefix ARCH_* with VPX_. libvpx-1.8.2/third_party/x86inc/x86inc.asm000066400000000000000000001336141357355204000203330ustar00rootroot00000000000000;***************************************************************************** ;* x86inc.asm: x264asm abstraction layer ;***************************************************************************** ;* Copyright (C) 2005-2016 x264 project ;* ;* Authors: Loren Merritt ;* Anton Mitrofanov ;* Fiona Glaser ;* Henrik Gramner ;* ;* Permission to use, copy, modify, and/or distribute this software for any ;* purpose with or without fee is hereby granted, provided that the above ;* copyright notice and this permission notice appear in all copies. ;* ;* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES ;* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF ;* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ;* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES ;* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ;* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF ;* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ;***************************************************************************** ; This is a header file for the x264ASM assembly language, which uses ; NASM/YASM syntax combined with a large number of macros to provide easy ; abstraction between different calling conventions (x86_32, win64, linux64). ; It also has various other useful features to simplify writing the kind of ; DSP functions that are most often used in x264. ; Unlike the rest of x264, this file is available under an ISC license, as it ; has significant usefulness outside of x264 and we want it to be available ; to the largest audience possible. Of course, if you modify it for your own ; purposes to add a new feature, we strongly encourage contributing a patch ; as this feature might be useful for others as well. Send patches or ideas ; to x264-devel@videolan.org . %include "vpx_config.asm" %ifndef private_prefix %define private_prefix vpx %endif %ifndef public_prefix %define public_prefix private_prefix %endif %ifndef STACK_ALIGNMENT %if VPX_ARCH_X86_64 %define STACK_ALIGNMENT 16 %else %define STACK_ALIGNMENT 4 %endif %endif %define WIN64 0 %define UNIX64 0 %if VPX_ARCH_X86_64 %ifidn __OUTPUT_FORMAT__,win32 %define WIN64 1 %elifidn __OUTPUT_FORMAT__,win64 %define WIN64 1 %elifidn __OUTPUT_FORMAT__,x64 %define WIN64 1 %else %define UNIX64 1 %endif %endif %define FORMAT_ELF 0 %ifidn __OUTPUT_FORMAT__,elf %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf32 %define FORMAT_ELF 1 %elifidn __OUTPUT_FORMAT__,elf64 %define FORMAT_ELF 1 %endif %define FORMAT_MACHO 0 %ifidn __OUTPUT_FORMAT__,macho32 %define FORMAT_MACHO 1 %elifidn __OUTPUT_FORMAT__,macho64 %define FORMAT_MACHO 1 %endif ; Set PREFIX for libvpx builds. %if FORMAT_ELF %undef PREFIX %elif WIN64 %undef PREFIX %else %define PREFIX %endif %ifdef PREFIX %define mangle(x) _ %+ x %else %define mangle(x) x %endif ; In some instances macho32 tables get misaligned when using .rodata. ; When looking at the disassembly it appears that the offset is either ; correct or consistently off by 90. Placing them in the .text section ; works around the issue. It appears to be specific to the way libvpx ; handles the tables. %macro SECTION_RODATA 0-1 16 %ifidn __OUTPUT_FORMAT__,macho32 SECTION .text align=%1 fakegot: %elifidn __OUTPUT_FORMAT__,aout SECTION .text %else SECTION .rodata align=%1 %endif %endmacro ; PIC macros are copied from vpx_ports/x86_abi_support.asm. The "define PIC" ; from original code is added in for 64bit. %ifidn __OUTPUT_FORMAT__,elf32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,macho32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,win32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,aout %define ABI_IS_32BIT 1 %else %define ABI_IS_32BIT 0 %endif %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 %define GET_GOT_DEFINED 1 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ push %1 call %%get_got %%sub_offset: jmp %%exitGG %%get_got: mov %1, [esp] add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc ret %%exitGG: %undef GLOBAL %define GLOBAL(x) x + %1 wrt ..gotoff %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 %define GET_GOT_DEFINED 1 %macro GET_GOT 1 push %1 call %%get_got %%get_got: pop %1 %undef GLOBAL %define GLOBAL(x) x + %1 - %%get_got %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro %else %define GET_GOT_DEFINED 0 %endif %endif %if VPX_ARCH_X86_64 == 0 %undef PIC %endif %else %macro GET_GOT 1 %endmacro %define GLOBAL(x) rel x %define WRT_PLT wrt ..plt %if WIN64 %define PIC %elifidn __OUTPUT_FORMAT__,macho64 %define PIC %elif CONFIG_PIC %define PIC %endif %endif %ifnmacro GET_GOT %macro GET_GOT 1 %endmacro %define GLOBAL(x) x %endif %ifndef RESTORE_GOT %define RESTORE_GOT %endif %ifndef WRT_PLT %define WRT_PLT %endif %ifdef PIC default rel %endif %ifndef GET_GOT_DEFINED %define GET_GOT_DEFINED 0 %endif ; Done with PIC macros %ifdef __NASM_VER__ %use smartalign %endif ; Macros to eliminate most code duplication between x86_32 and x86_64: ; Currently this works only for leaf functions which load all their arguments ; into registers at the start, and make no other use of the stack. Luckily that ; covers most of x264's asm. ; PROLOGUE: ; %1 = number of arguments. loads them from stack if needed. ; %2 = number of registers used. pushes callee-saved regs if needed. ; %3 = number of xmm registers used. pushes callee-saved xmm regs if needed. ; %4 = (optional) stack size to be allocated. The stack will be aligned before ; allocating the specified stack size. If the required stack alignment is ; larger than the known stack alignment the stack will be manually aligned ; and an extra register will be allocated to hold the original stack ; pointer (to not invalidate r0m etc.). To prevent the use of an extra ; register as stack pointer, request a negative stack size. ; %4+/%5+ = list of names to define to registers ; PROLOGUE can also be invoked by adding the same options to cglobal ; e.g. ; cglobal foo, 2,3,7,0x40, dst, src, tmp ; declares a function (foo) that automatically loads two arguments (dst and ; src) into registers, uses one additional register (tmp) plus 7 vector ; registers (m0-m6) and allocates 0x40 bytes of stack space. ; TODO Some functions can use some args directly from the stack. If they're the ; last args then you can just not declare them, but if they're in the middle ; we need more flexible macro. ; RET: ; Pops anything that was pushed by PROLOGUE, and returns. ; REP_RET: ; Use this instead of RET if it's a branch target. ; registers: ; rN and rNq are the native-size register holding function argument N ; rNd, rNw, rNb are dword, word, and byte size ; rNh is the high 8 bits of the word size ; rNm is the original location of arg N (a register or on the stack), dword ; rNmp is native size %macro DECLARE_REG 2-3 %define r%1q %2 %define r%1d %2d %define r%1w %2w %define r%1b %2b %define r%1h %2h %define %2q %2 %if %0 == 2 %define r%1m %2d %define r%1mp %2 %elif VPX_ARCH_X86_64 ; memory %define r%1m [rstk + stack_offset + %3] %define r%1mp qword r %+ %1 %+ m %else %define r%1m [rstk + stack_offset + %3] %define r%1mp dword r %+ %1 %+ m %endif %define r%1 %2 %endmacro %macro DECLARE_REG_SIZE 3 %define r%1q r%1 %define e%1q r%1 %define r%1d e%1 %define e%1d e%1 %define r%1w %1 %define e%1w %1 %define r%1h %3 %define e%1h %3 %define r%1b %2 %define e%1b %2 %if VPX_ARCH_X86_64 == 0 %define r%1 e%1 %endif %endmacro DECLARE_REG_SIZE ax, al, ah DECLARE_REG_SIZE bx, bl, bh DECLARE_REG_SIZE cx, cl, ch DECLARE_REG_SIZE dx, dl, dh DECLARE_REG_SIZE si, sil, null DECLARE_REG_SIZE di, dil, null DECLARE_REG_SIZE bp, bpl, null ; t# defines for when per-arch register allocation is more complex than just function arguments %macro DECLARE_REG_TMP 1-* %assign %%i 0 %rep %0 CAT_XDEFINE t, %%i, r%1 %assign %%i %%i+1 %rotate 1 %endrep %endmacro %macro DECLARE_REG_TMP_SIZE 0-* %rep %0 %define t%1q t%1 %+ q %define t%1d t%1 %+ d %define t%1w t%1 %+ w %define t%1h t%1 %+ h %define t%1b t%1 %+ b %rotate 1 %endrep %endmacro DECLARE_REG_TMP_SIZE 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14 %if VPX_ARCH_X86_64 %define gprsize 8 %else %define gprsize 4 %endif %macro PUSH 1 push %1 %ifidn rstk, rsp %assign stack_offset stack_offset+gprsize %endif %endmacro %macro POP 1 pop %1 %ifidn rstk, rsp %assign stack_offset stack_offset-gprsize %endif %endmacro %macro PUSH_IF_USED 1-* %rep %0 %if %1 < regs_used PUSH r%1 %endif %rotate 1 %endrep %endmacro %macro POP_IF_USED 1-* %rep %0 %if %1 < regs_used pop r%1 %endif %rotate 1 %endrep %endmacro %macro LOAD_IF_USED 1-* %rep %0 %if %1 < num_args mov r%1, r %+ %1 %+ mp %endif %rotate 1 %endrep %endmacro %macro SUB 2 sub %1, %2 %ifidn %1, rstk %assign stack_offset stack_offset+(%2) %endif %endmacro %macro ADD 2 add %1, %2 %ifidn %1, rstk %assign stack_offset stack_offset-(%2) %endif %endmacro %macro movifnidn 2 %ifnidn %1, %2 mov %1, %2 %endif %endmacro %macro movsxdifnidn 2 %ifnidn %1, %2 movsxd %1, %2 %endif %endmacro %macro ASSERT 1 %if (%1) == 0 %error assertion ``%1'' failed %endif %endmacro %macro DEFINE_ARGS 0-* %ifdef n_arg_names %assign %%i 0 %rep n_arg_names CAT_UNDEF arg_name %+ %%i, q CAT_UNDEF arg_name %+ %%i, d CAT_UNDEF arg_name %+ %%i, w CAT_UNDEF arg_name %+ %%i, h CAT_UNDEF arg_name %+ %%i, b CAT_UNDEF arg_name %+ %%i, m CAT_UNDEF arg_name %+ %%i, mp CAT_UNDEF arg_name, %%i %assign %%i %%i+1 %endrep %endif %xdefine %%stack_offset stack_offset %undef stack_offset ; so that the current value of stack_offset doesn't get baked in by xdefine %assign %%i 0 %rep %0 %xdefine %1q r %+ %%i %+ q %xdefine %1d r %+ %%i %+ d %xdefine %1w r %+ %%i %+ w %xdefine %1h r %+ %%i %+ h %xdefine %1b r %+ %%i %+ b %xdefine %1m r %+ %%i %+ m %xdefine %1mp r %+ %%i %+ mp CAT_XDEFINE arg_name, %%i, %1 %assign %%i %%i+1 %rotate 1 %endrep %xdefine stack_offset %%stack_offset %assign n_arg_names %0 %endmacro %define required_stack_alignment ((mmsize + 15) & ~15) %macro ALLOC_STACK 1-2 0 ; stack_size, n_xmm_regs (for win64 only) %ifnum %1 %if %1 != 0 %assign %%pad 0 %assign stack_size %1 %if stack_size < 0 %assign stack_size -stack_size %endif %if WIN64 %assign %%pad %%pad + 32 ; shadow space %if mmsize != 8 %assign xmm_regs_used %2 %if xmm_regs_used > 8 %assign %%pad %%pad + (xmm_regs_used-8)*16 ; callee-saved xmm registers %endif %endif %endif %if required_stack_alignment <= STACK_ALIGNMENT ; maintain the current stack alignment %assign stack_size_padded stack_size + %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) SUB rsp, stack_size_padded %else %assign %%reg_num (regs_used - 1) %xdefine rstk r %+ %%reg_num ; align stack, and save original stack location directly above ; it, i.e. in [rsp+stack_size_padded], so we can restore the ; stack in a single instruction (i.e. mov rsp, rstk or mov ; rsp, [rsp+stack_size_padded]) %if %1 < 0 ; need to store rsp on stack %xdefine rstkm [rsp + stack_size + %%pad] %assign %%pad %%pad + gprsize %else ; can keep rsp in rstk during whole function %xdefine rstkm rstk %endif %assign stack_size_padded stack_size + ((%%pad + required_stack_alignment-1) & ~(required_stack_alignment-1)) mov rstk, rsp and rsp, ~(required_stack_alignment-1) sub rsp, stack_size_padded movifnidn rstkm, rstk %endif WIN64_PUSH_XMM %endif %endif %endmacro %macro SETUP_STACK_POINTER 1 %ifnum %1 %if %1 != 0 && required_stack_alignment > STACK_ALIGNMENT %if %1 > 0 %assign regs_used (regs_used + 1) %endif %if VPX_ARCH_X86_64 && regs_used < 5 + UNIX64 * 3 ; Ensure that we don't clobber any registers containing arguments %assign regs_used 5 + UNIX64 * 3 %endif %endif %endif %endmacro %macro DEFINE_ARGS_INTERNAL 3+ %ifnum %2 DEFINE_ARGS %3 %elif %1 == 4 DEFINE_ARGS %2 %elif %1 > 4 DEFINE_ARGS %2, %3 %endif %endmacro %if WIN64 ; Windows x64 ;================================================= DECLARE_REG 0, rcx DECLARE_REG 1, rdx DECLARE_REG 2, R8 DECLARE_REG 3, R9 DECLARE_REG 4, R10, 40 DECLARE_REG 5, R11, 48 DECLARE_REG 6, rax, 56 DECLARE_REG 7, rdi, 64 DECLARE_REG 8, rsi, 72 DECLARE_REG 9, rbx, 80 DECLARE_REG 10, rbp, 88 DECLARE_REG 11, R12, 96 DECLARE_REG 12, R13, 104 DECLARE_REG 13, R14, 112 DECLARE_REG 14, R15, 120 %macro PROLOGUE 2-5+ 0 ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 ASSERT regs_used >= num_args SETUP_STACK_POINTER %4 ASSERT regs_used <= 15 PUSH_IF_USED 7, 8, 9, 10, 11, 12, 13, 14 ALLOC_STACK %4, %3 %if mmsize != 8 && stack_size == 0 WIN64_SPILL_XMM %3 %endif LOAD_IF_USED 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14 DEFINE_ARGS_INTERNAL %0, %4, %5 %endmacro %macro WIN64_PUSH_XMM 0 ; Use the shadow space to store XMM6 and XMM7, the rest needs stack space allocated. %if xmm_regs_used > 6 movaps [rstk + stack_offset + 8], xmm6 %endif %if xmm_regs_used > 7 movaps [rstk + stack_offset + 24], xmm7 %endif %if xmm_regs_used > 8 %assign %%i 8 %rep xmm_regs_used-8 movaps [rsp + (%%i-8)*16 + stack_size + 32], xmm %+ %%i %assign %%i %%i+1 %endrep %endif %endmacro %macro WIN64_SPILL_XMM 1 %assign xmm_regs_used %1 ASSERT xmm_regs_used <= 16 %if xmm_regs_used > 8 ; Allocate stack space for callee-saved xmm registers plus shadow space and align the stack. %assign %%pad (xmm_regs_used-8)*16 + 32 %assign stack_size_padded %%pad + ((-%%pad-stack_offset-gprsize) & (STACK_ALIGNMENT-1)) SUB rsp, stack_size_padded %endif WIN64_PUSH_XMM %endmacro %macro WIN64_RESTORE_XMM_INTERNAL 1 %assign %%pad_size 0 %if xmm_regs_used > 8 %assign %%i xmm_regs_used %rep xmm_regs_used-8 %assign %%i %%i-1 movaps xmm %+ %%i, [%1 + (%%i-8)*16 + stack_size + 32] %endrep %endif %if stack_size_padded > 0 %if stack_size > 0 && required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add %1, stack_size_padded %assign %%pad_size stack_size_padded %endif %endif %if xmm_regs_used > 7 movaps xmm7, [%1 + stack_offset - %%pad_size + 24] %endif %if xmm_regs_used > 6 movaps xmm6, [%1 + stack_offset - %%pad_size + 8] %endif %endmacro %macro WIN64_RESTORE_XMM 1 WIN64_RESTORE_XMM_INTERNAL %1 %assign stack_offset (stack_offset-stack_size_padded) %assign xmm_regs_used 0 %endmacro %define has_epilogue regs_used > 7 || xmm_regs_used > 6 || mmsize == 32 || stack_size > 0 %macro RET 0 WIN64_RESTORE_XMM_INTERNAL rsp POP_IF_USED 14, 13, 12, 11, 10, 9, 8, 7 %if mmsize == 32 vzeroupper %endif AUTO_REP_RET %endmacro %elif VPX_ARCH_X86_64 ; *nix x64 ;============================================= DECLARE_REG 0, rdi DECLARE_REG 1, rsi DECLARE_REG 2, rdx DECLARE_REG 3, rcx DECLARE_REG 4, R8 DECLARE_REG 5, R9 DECLARE_REG 6, rax, 8 DECLARE_REG 7, R10, 16 DECLARE_REG 8, R11, 24 DECLARE_REG 9, rbx, 32 DECLARE_REG 10, rbp, 40 DECLARE_REG 11, R12, 48 DECLARE_REG 12, R13, 56 DECLARE_REG 13, R14, 64 DECLARE_REG 14, R15, 72 %macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 ASSERT regs_used >= num_args SETUP_STACK_POINTER %4 ASSERT regs_used <= 15 PUSH_IF_USED 9, 10, 11, 12, 13, 14 ALLOC_STACK %4 LOAD_IF_USED 6, 7, 8, 9, 10, 11, 12, 13, 14 DEFINE_ARGS_INTERNAL %0, %4, %5 %endmacro %define has_epilogue regs_used > 9 || mmsize == 32 || stack_size > 0 %macro RET 0 %if stack_size_padded > 0 %if required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add rsp, stack_size_padded %endif %endif POP_IF_USED 14, 13, 12, 11, 10, 9 %if mmsize == 32 vzeroupper %endif AUTO_REP_RET %endmacro %else ; X86_32 ;============================================================== DECLARE_REG 0, eax, 4 DECLARE_REG 1, ecx, 8 DECLARE_REG 2, edx, 12 DECLARE_REG 3, ebx, 16 DECLARE_REG 4, esi, 20 DECLARE_REG 5, edi, 24 DECLARE_REG 6, ebp, 28 %define rsp esp %macro DECLARE_ARG 1-* %rep %0 %define r%1m [rstk + stack_offset + 4*%1 + 4] %define r%1mp dword r%1m %rotate 1 %endrep %endmacro DECLARE_ARG 7, 8, 9, 10, 11, 12, 13, 14 %macro PROLOGUE 2-5+ ; #args, #regs, #xmm_regs, [stack_size,] arg_names... %assign num_args %1 %assign regs_used %2 ASSERT regs_used >= num_args %if num_args > 7 %assign num_args 7 %endif %if regs_used > 7 %assign regs_used 7 %endif SETUP_STACK_POINTER %4 ASSERT regs_used <= 7 PUSH_IF_USED 3, 4, 5, 6 ALLOC_STACK %4 LOAD_IF_USED 0, 1, 2, 3, 4, 5, 6 DEFINE_ARGS_INTERNAL %0, %4, %5 %endmacro %define has_epilogue regs_used > 3 || mmsize == 32 || stack_size > 0 %macro RET 0 %if stack_size_padded > 0 %if required_stack_alignment > STACK_ALIGNMENT mov rsp, rstkm %else add rsp, stack_size_padded %endif %endif POP_IF_USED 6, 5, 4, 3 %if mmsize == 32 vzeroupper %endif AUTO_REP_RET %endmacro %endif ;====================================================================== %if WIN64 == 0 %macro WIN64_SPILL_XMM 1 %endmacro %macro WIN64_RESTORE_XMM 1 %endmacro %macro WIN64_PUSH_XMM 0 %endmacro %endif ; On AMD cpus <=K10, an ordinary ret is slow if it immediately follows either ; a branch or a branch target. So switch to a 2-byte form of ret in that case. ; We can automatically detect "follows a branch", but not a branch target. ; (SSSE3 is a sufficient condition to know that your cpu doesn't have this problem.) %macro REP_RET 0 %if has_epilogue RET %else rep ret %endif annotate_function_size %endmacro %define last_branch_adr $$ %macro AUTO_REP_RET 0 %if notcpuflag(ssse3) times ((last_branch_adr-$)>>31)+1 rep ; times 1 iff $ == last_branch_adr. %endif ret annotate_function_size %endmacro %macro BRANCH_INSTR 0-* %rep %0 %macro %1 1-2 %1 %2 %1 %if notcpuflag(ssse3) %%branch_instr equ $ %xdefine last_branch_adr %%branch_instr %endif %endmacro %rotate 1 %endrep %endmacro BRANCH_INSTR jz, je, jnz, jne, jl, jle, jnl, jnle, jg, jge, jng, jnge, ja, jae, jna, jnae, jb, jbe, jnb, jnbe, jc, jnc, js, jns, jo, jno, jp, jnp %macro TAIL_CALL 2 ; callee, is_nonadjacent %if has_epilogue call %1 RET %elif %2 jmp %1 %endif annotate_function_size %endmacro ;============================================================================= ; arch-independent part ;============================================================================= %assign function_align 16 ; Begin a function. ; Applies any symbol mangling needed for C linkage, and sets up a define such that ; subsequent uses of the function name automatically refer to the mangled version. ; Appends cpuflags to the function name if cpuflags has been specified. ; The "" empty default parameter is a workaround for nasm, which fails if SUFFIX ; is empty and we call cglobal_internal with just %1 %+ SUFFIX (without %2). %macro cglobal 1-2+ "" ; name, [PROLOGUE args] cglobal_internal 1, %1 %+ SUFFIX, %2 %endmacro %macro cvisible 1-2+ "" ; name, [PROLOGUE args] cglobal_internal 0, %1 %+ SUFFIX, %2 %endmacro %macro cglobal_internal 2-3+ annotate_function_size %if %1 %xdefine %%FUNCTION_PREFIX private_prefix ; libvpx explicitly sets visibility in shared object builds. Avoid ; setting visibility to hidden as it may break builds that split ; sources on e.g., directory boundaries. %ifdef CHROMIUM %xdefine %%VISIBILITY hidden %else %xdefine %%VISIBILITY %endif %else %xdefine %%FUNCTION_PREFIX public_prefix %xdefine %%VISIBILITY %endif %ifndef cglobaled_%2 %xdefine %2 mangle(%%FUNCTION_PREFIX %+ _ %+ %2) %xdefine %2.skip_prologue %2 %+ .skip_prologue CAT_XDEFINE cglobaled_, %2, 1 %endif %xdefine current_function %2 %xdefine current_function_section __SECT__ %if FORMAT_ELF global %2:function %%VISIBILITY %elif FORMAT_MACHO %ifdef __NASM_VER__ global %2 %else global %2:private_extern %endif %else global %2 %endif align function_align %2: RESET_MM_PERMUTATION ; needed for x86-64, also makes disassembly somewhat nicer %xdefine rstk rsp ; copy of the original stack pointer, used when greater alignment than the known stack alignment is required %assign stack_offset 0 ; stack pointer offset relative to the return address %assign stack_size 0 ; amount of stack space that can be freely used inside a function %assign stack_size_padded 0 ; total amount of allocated stack space, including space for callee-saved xmm registers on WIN64 and alignment padding %assign xmm_regs_used 0 ; number of XMM registers requested, used for dealing with callee-saved registers on WIN64 %ifnidn %3, "" PROLOGUE %3 %endif %endmacro %macro cextern 1 %xdefine %1 mangle(private_prefix %+ _ %+ %1) CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro ; like cextern, but without the prefix %macro cextern_naked 1 %ifdef PREFIX %xdefine %1 mangle(%1) %endif CAT_XDEFINE cglobaled_, %1, 1 extern %1 %endmacro %macro const 1-2+ %xdefine %1 mangle(private_prefix %+ _ %+ %1) %if FORMAT_ELF global %1:data hidden %else global %1 %endif %1: %2 %endmacro ; This is needed for ELF, otherwise the GNU linker assumes the stack is executable by default. %if FORMAT_ELF [SECTION .note.GNU-stack noalloc noexec nowrite progbits] %endif ; Tell debuggers how large the function was. ; This may be invoked multiple times per function; we rely on later instances overriding earlier ones. ; This is invoked by RET and similar macros, and also cglobal does it for the previous function, ; but if the last function in a source file doesn't use any of the standard macros for its epilogue, ; then its size might be unspecified. %macro annotate_function_size 0 %ifdef __YASM_VER__ %ifdef current_function %if FORMAT_ELF current_function_section %%ecf equ $ size current_function %%ecf - current_function __SECT__ %endif %endif %endif %endmacro ; cpuflags %assign cpuflags_mmx (1<<0) %assign cpuflags_mmx2 (1<<1) | cpuflags_mmx %assign cpuflags_3dnow (1<<2) | cpuflags_mmx %assign cpuflags_3dnowext (1<<3) | cpuflags_3dnow %assign cpuflags_sse (1<<4) | cpuflags_mmx2 %assign cpuflags_sse2 (1<<5) | cpuflags_sse %assign cpuflags_sse2slow (1<<6) | cpuflags_sse2 %assign cpuflags_sse3 (1<<7) | cpuflags_sse2 %assign cpuflags_ssse3 (1<<8) | cpuflags_sse3 %assign cpuflags_sse4 (1<<9) | cpuflags_ssse3 %assign cpuflags_sse42 (1<<10)| cpuflags_sse4 %assign cpuflags_avx (1<<11)| cpuflags_sse42 %assign cpuflags_xop (1<<12)| cpuflags_avx %assign cpuflags_fma4 (1<<13)| cpuflags_avx %assign cpuflags_fma3 (1<<14)| cpuflags_avx %assign cpuflags_avx2 (1<<15)| cpuflags_fma3 %assign cpuflags_cache32 (1<<16) %assign cpuflags_cache64 (1<<17) %assign cpuflags_slowctz (1<<18) %assign cpuflags_lzcnt (1<<19) %assign cpuflags_aligned (1<<20) ; not a cpu feature, but a function variant %assign cpuflags_atom (1<<21) %assign cpuflags_bmi1 (1<<22)|cpuflags_lzcnt %assign cpuflags_bmi2 (1<<23)|cpuflags_bmi1 ; Returns a boolean value expressing whether or not the specified cpuflag is enabled. %define cpuflag(x) (((((cpuflags & (cpuflags_ %+ x)) ^ (cpuflags_ %+ x)) - 1) >> 31) & 1) %define notcpuflag(x) (cpuflag(x) ^ 1) ; Takes an arbitrary number of cpuflags from the above list. ; All subsequent functions (up to the next INIT_CPUFLAGS) is built for the specified cpu. ; You shouldn't need to invoke this macro directly, it's a subroutine for INIT_MMX &co. %macro INIT_CPUFLAGS 0-* %xdefine SUFFIX %undef cpuname %assign cpuflags 0 %if %0 >= 1 %rep %0 %ifdef cpuname %xdefine cpuname cpuname %+ _%1 %else %xdefine cpuname %1 %endif %assign cpuflags cpuflags | cpuflags_%1 %rotate 1 %endrep %xdefine SUFFIX _ %+ cpuname %if cpuflag(avx) %assign avx_enabled 1 %endif %if (mmsize == 16 && notcpuflag(sse2)) || (mmsize == 32 && notcpuflag(avx2)) %define mova movaps %define movu movups %define movnta movntps %endif %if cpuflag(aligned) %define movu mova %elif cpuflag(sse3) && notcpuflag(ssse3) %define movu lddqu %endif %endif %if VPX_ARCH_X86_64 || cpuflag(sse2) %ifdef __NASM_VER__ ALIGNMODE k8 %else CPU amdnop %endif %else %ifdef __NASM_VER__ ALIGNMODE nop %else CPU basicnop %endif %endif %endmacro ; Merge mmx and sse* ; m# is a simd register of the currently selected size ; xm# is the corresponding xmm register if mmsize >= 16, otherwise the same as m# ; ym# is the corresponding ymm register if mmsize >= 32, otherwise the same as m# ; (All 3 remain in sync through SWAP.) %macro CAT_XDEFINE 3 %xdefine %1%2 %3 %endmacro %macro CAT_UNDEF 2 %undef %1%2 %endmacro %macro INIT_MMX 0-1+ %assign avx_enabled 0 %define RESET_MM_PERMUTATION INIT_MMX %1 %define mmsize 8 %define num_mmregs 8 %define mova movq %define movu movq %define movh movd %define movnta movntq %assign %%i 0 %rep 8 CAT_XDEFINE m, %%i, mm %+ %%i CAT_XDEFINE nnmm, %%i, %%i %assign %%i %%i+1 %endrep %rep 8 CAT_UNDEF m, %%i CAT_UNDEF nnmm, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro %macro INIT_XMM 0-1+ %assign avx_enabled 0 %define RESET_MM_PERMUTATION INIT_XMM %1 %define mmsize 16 %define num_mmregs 8 %if VPX_ARCH_X86_64 %define num_mmregs 16 %endif %define mova movdqa %define movu movdqu %define movh movq %define movnta movntdq %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, xmm %+ %%i CAT_XDEFINE nnxmm, %%i, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro %macro INIT_YMM 0-1+ %assign avx_enabled 1 %define RESET_MM_PERMUTATION INIT_YMM %1 %define mmsize 32 %define num_mmregs 8 %if VPX_ARCH_X86_64 %define num_mmregs 16 %endif %define mova movdqa %define movu movdqu %undef movh %define movnta movntdq %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, ymm %+ %%i CAT_XDEFINE nnymm, %%i, %%i %assign %%i %%i+1 %endrep INIT_CPUFLAGS %1 %endmacro INIT_XMM %macro DECLARE_MMCAST 1 %define mmmm%1 mm%1 %define mmxmm%1 mm%1 %define mmymm%1 mm%1 %define xmmmm%1 mm%1 %define xmmxmm%1 xmm%1 %define xmmymm%1 xmm%1 %define ymmmm%1 mm%1 %define ymmxmm%1 xmm%1 %define ymmymm%1 ymm%1 %define xm%1 xmm %+ m%1 %define ym%1 ymm %+ m%1 %endmacro %assign i 0 %rep 16 DECLARE_MMCAST i %assign i i+1 %endrep ; I often want to use macros that permute their arguments. e.g. there's no ; efficient way to implement butterfly or transpose or dct without swapping some ; arguments. ; ; I would like to not have to manually keep track of the permutations: ; If I insert a permutation in the middle of a function, it should automatically ; change everything that follows. For more complex macros I may also have multiple ; implementations, e.g. the SSE2 and SSSE3 versions may have different permutations. ; ; Hence these macros. Insert a PERMUTE or some SWAPs at the end of a macro that ; permutes its arguments. It's equivalent to exchanging the contents of the ; registers, except that this way you exchange the register names instead, so it ; doesn't cost any cycles. %macro PERMUTE 2-* ; takes a list of pairs to swap %rep %0/2 %xdefine %%tmp%2 m%2 %rotate 2 %endrep %rep %0/2 %xdefine m%1 %%tmp%2 CAT_XDEFINE nn, m%1, %1 %rotate 2 %endrep %endmacro %macro SWAP 2+ ; swaps a single chain (sometimes more concise than pairs) %ifnum %1 ; SWAP 0, 1, ... SWAP_INTERNAL_NUM %1, %2 %else ; SWAP m0, m1, ... SWAP_INTERNAL_NAME %1, %2 %endif %endmacro %macro SWAP_INTERNAL_NUM 2-* %rep %0-1 %xdefine %%tmp m%1 %xdefine m%1 m%2 %xdefine m%2 %%tmp CAT_XDEFINE nn, m%1, %1 CAT_XDEFINE nn, m%2, %2 %rotate 1 %endrep %endmacro %macro SWAP_INTERNAL_NAME 2-* %xdefine %%args nn %+ %1 %rep %0-1 %xdefine %%args %%args, nn %+ %2 %rotate 1 %endrep SWAP_INTERNAL_NUM %%args %endmacro ; If SAVE_MM_PERMUTATION is placed at the end of a function, then any later ; calls to that function will automatically load the permutation, so values can ; be returned in mmregs. %macro SAVE_MM_PERMUTATION 0-1 %if %0 %xdefine %%f %1_m %else %xdefine %%f current_function %+ _m %endif %assign %%i 0 %rep num_mmregs CAT_XDEFINE %%f, %%i, m %+ %%i %assign %%i %%i+1 %endrep %endmacro %macro LOAD_MM_PERMUTATION 1 ; name to load from %ifdef %1_m0 %assign %%i 0 %rep num_mmregs CAT_XDEFINE m, %%i, %1_m %+ %%i CAT_XDEFINE nn, m %+ %%i, %%i %assign %%i %%i+1 %endrep %endif %endmacro ; Append cpuflags to the callee's name iff the appended name is known and the plain name isn't %macro call 1 call_internal %1 %+ SUFFIX, %1 %endmacro %macro call_internal 2 %xdefine %%i %2 %ifndef cglobaled_%2 %ifdef cglobaled_%1 %xdefine %%i %1 %endif %endif call %%i LOAD_MM_PERMUTATION %%i %endmacro ; Substitutions that reduce instruction size but are functionally equivalent %macro add 2 %ifnum %2 %if %2==128 sub %1, -128 %else add %1, %2 %endif %else add %1, %2 %endif %endmacro %macro sub 2 %ifnum %2 %if %2==128 add %1, -128 %else sub %1, %2 %endif %else sub %1, %2 %endif %endmacro ;============================================================================= ; AVX abstraction layer ;============================================================================= %assign i 0 %rep 16 %if i < 8 CAT_XDEFINE sizeofmm, i, 8 %endif CAT_XDEFINE sizeofxmm, i, 16 CAT_XDEFINE sizeofymm, i, 32 %assign i i+1 %endrep %undef i %macro CHECK_AVX_INSTR_EMU 3-* %xdefine %%opcode %1 %xdefine %%dst %2 %rep %0-2 %ifidn %%dst, %3 %error non-avx emulation of ``%%opcode'' is not supported %endif %rotate 1 %endrep %endmacro ;%1 == instruction ;%2 == minimal instruction set ;%3 == 1 if float, 0 if int ;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise ;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not ;%6+: operands %macro RUN_AVX_INSTR 6-9+ %ifnum sizeof%7 %assign __sizeofreg sizeof%7 %elifnum sizeof%6 %assign __sizeofreg sizeof%6 %else %assign __sizeofreg mmsize %endif %assign __emulate_avx 0 %if avx_enabled && __sizeofreg >= 16 %xdefine __instr v%1 %else %xdefine __instr %1 %if %0 >= 8+%4 %assign __emulate_avx 1 %endif %endif %ifnidn %2, fnord %ifdef cpuname %if notcpuflag(%2) %error use of ``%1'' %2 instruction in cpuname function: current_function %elif cpuflags_%2 < cpuflags_sse && notcpuflag(sse2) && __sizeofreg > 8 %error use of ``%1'' sse2 instruction in cpuname function: current_function %endif %endif %endif %if __emulate_avx %xdefine __src1 %7 %xdefine __src2 %8 %ifnidn %6, %7 %if %0 >= 9 CHECK_AVX_INSTR_EMU {%1 %6, %7, %8, %9}, %6, %8, %9 %else CHECK_AVX_INSTR_EMU {%1 %6, %7, %8}, %6, %8 %endif %if %5 && %4 == 0 %ifnid %8 ; 3-operand AVX instructions with a memory arg can only have it in src2, ; whereas SSE emulation prefers to have it in src1 (i.e. the mov). ; So, if the instruction is commutative with a memory arg, swap them. %xdefine __src1 %8 %xdefine __src2 %7 %endif %endif %if __sizeofreg == 8 MOVQ %6, __src1 %elif %3 MOVAPS %6, __src1 %else MOVDQA %6, __src1 %endif %endif %if %0 >= 9 %1 %6, __src2, %9 %else %1 %6, __src2 %endif %elif %0 >= 9 __instr %6, %7, %8, %9 %elif %0 == 8 __instr %6, %7, %8 %elif %0 == 7 __instr %6, %7 %else __instr %6 %endif %endmacro ;%1 == instruction ;%2 == minimal instruction set ;%3 == 1 if float, 0 if int ;%4 == 1 if non-destructive or 4-operand (xmm, xmm, xmm, imm), 0 otherwise ;%5 == 1 if commutative (i.e. doesn't matter which src arg is which), 0 if not %macro AVX_INSTR 1-5 fnord, 0, 1, 0 %macro %1 1-10 fnord, fnord, fnord, fnord, %1, %2, %3, %4, %5 %ifidn %2, fnord RUN_AVX_INSTR %6, %7, %8, %9, %10, %1 %elifidn %3, fnord RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2 %elifidn %4, fnord RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3 %elifidn %5, fnord RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4 %else RUN_AVX_INSTR %6, %7, %8, %9, %10, %1, %2, %3, %4, %5 %endif %endmacro %endmacro ; Instructions with both VEX and non-VEX encodings ; Non-destructive instructions are written without parameters AVX_INSTR addpd, sse2, 1, 0, 1 AVX_INSTR addps, sse, 1, 0, 1 AVX_INSTR addsd, sse2, 1, 0, 1 AVX_INSTR addss, sse, 1, 0, 1 AVX_INSTR addsubpd, sse3, 1, 0, 0 AVX_INSTR addsubps, sse3, 1, 0, 0 AVX_INSTR aesdec, fnord, 0, 0, 0 AVX_INSTR aesdeclast, fnord, 0, 0, 0 AVX_INSTR aesenc, fnord, 0, 0, 0 AVX_INSTR aesenclast, fnord, 0, 0, 0 AVX_INSTR aesimc AVX_INSTR aeskeygenassist AVX_INSTR andnpd, sse2, 1, 0, 0 AVX_INSTR andnps, sse, 1, 0, 0 AVX_INSTR andpd, sse2, 1, 0, 1 AVX_INSTR andps, sse, 1, 0, 1 AVX_INSTR blendpd, sse4, 1, 0, 0 AVX_INSTR blendps, sse4, 1, 0, 0 AVX_INSTR blendvpd, sse4, 1, 0, 0 AVX_INSTR blendvps, sse4, 1, 0, 0 AVX_INSTR cmppd, sse2, 1, 1, 0 AVX_INSTR cmpps, sse, 1, 1, 0 AVX_INSTR cmpsd, sse2, 1, 1, 0 AVX_INSTR cmpss, sse, 1, 1, 0 AVX_INSTR comisd, sse2 AVX_INSTR comiss, sse AVX_INSTR cvtdq2pd, sse2 AVX_INSTR cvtdq2ps, sse2 AVX_INSTR cvtpd2dq, sse2 AVX_INSTR cvtpd2ps, sse2 AVX_INSTR cvtps2dq, sse2 AVX_INSTR cvtps2pd, sse2 AVX_INSTR cvtsd2si, sse2 AVX_INSTR cvtsd2ss, sse2 AVX_INSTR cvtsi2sd, sse2 AVX_INSTR cvtsi2ss, sse AVX_INSTR cvtss2sd, sse2 AVX_INSTR cvtss2si, sse AVX_INSTR cvttpd2dq, sse2 AVX_INSTR cvttps2dq, sse2 AVX_INSTR cvttsd2si, sse2 AVX_INSTR cvttss2si, sse AVX_INSTR divpd, sse2, 1, 0, 0 AVX_INSTR divps, sse, 1, 0, 0 AVX_INSTR divsd, sse2, 1, 0, 0 AVX_INSTR divss, sse, 1, 0, 0 AVX_INSTR dppd, sse4, 1, 1, 0 AVX_INSTR dpps, sse4, 1, 1, 0 AVX_INSTR extractps, sse4 AVX_INSTR haddpd, sse3, 1, 0, 0 AVX_INSTR haddps, sse3, 1, 0, 0 AVX_INSTR hsubpd, sse3, 1, 0, 0 AVX_INSTR hsubps, sse3, 1, 0, 0 AVX_INSTR insertps, sse4, 1, 1, 0 AVX_INSTR lddqu, sse3 AVX_INSTR ldmxcsr, sse AVX_INSTR maskmovdqu, sse2 AVX_INSTR maxpd, sse2, 1, 0, 1 AVX_INSTR maxps, sse, 1, 0, 1 AVX_INSTR maxsd, sse2, 1, 0, 1 AVX_INSTR maxss, sse, 1, 0, 1 AVX_INSTR minpd, sse2, 1, 0, 1 AVX_INSTR minps, sse, 1, 0, 1 AVX_INSTR minsd, sse2, 1, 0, 1 AVX_INSTR minss, sse, 1, 0, 1 AVX_INSTR movapd, sse2 AVX_INSTR movaps, sse AVX_INSTR movd, mmx AVX_INSTR movddup, sse3 AVX_INSTR movdqa, sse2 AVX_INSTR movdqu, sse2 AVX_INSTR movhlps, sse, 1, 0, 0 AVX_INSTR movhpd, sse2, 1, 0, 0 AVX_INSTR movhps, sse, 1, 0, 0 AVX_INSTR movlhps, sse, 1, 0, 0 AVX_INSTR movlpd, sse2, 1, 0, 0 AVX_INSTR movlps, sse, 1, 0, 0 AVX_INSTR movmskpd, sse2 AVX_INSTR movmskps, sse AVX_INSTR movntdq, sse2 AVX_INSTR movntdqa, sse4 AVX_INSTR movntpd, sse2 AVX_INSTR movntps, sse AVX_INSTR movq, mmx AVX_INSTR movsd, sse2, 1, 0, 0 AVX_INSTR movshdup, sse3 AVX_INSTR movsldup, sse3 AVX_INSTR movss, sse, 1, 0, 0 AVX_INSTR movupd, sse2 AVX_INSTR movups, sse AVX_INSTR mpsadbw, sse4 AVX_INSTR mulpd, sse2, 1, 0, 1 AVX_INSTR mulps, sse, 1, 0, 1 AVX_INSTR mulsd, sse2, 1, 0, 1 AVX_INSTR mulss, sse, 1, 0, 1 AVX_INSTR orpd, sse2, 1, 0, 1 AVX_INSTR orps, sse, 1, 0, 1 AVX_INSTR pabsb, ssse3 AVX_INSTR pabsd, ssse3 AVX_INSTR pabsw, ssse3 AVX_INSTR packsswb, mmx, 0, 0, 0 AVX_INSTR packssdw, mmx, 0, 0, 0 AVX_INSTR packuswb, mmx, 0, 0, 0 AVX_INSTR packusdw, sse4, 0, 0, 0 AVX_INSTR paddb, mmx, 0, 0, 1 AVX_INSTR paddw, mmx, 0, 0, 1 AVX_INSTR paddd, mmx, 0, 0, 1 AVX_INSTR paddq, sse2, 0, 0, 1 AVX_INSTR paddsb, mmx, 0, 0, 1 AVX_INSTR paddsw, mmx, 0, 0, 1 AVX_INSTR paddusb, mmx, 0, 0, 1 AVX_INSTR paddusw, mmx, 0, 0, 1 AVX_INSTR palignr, ssse3 AVX_INSTR pand, mmx, 0, 0, 1 AVX_INSTR pandn, mmx, 0, 0, 0 AVX_INSTR pavgb, mmx2, 0, 0, 1 AVX_INSTR pavgw, mmx2, 0, 0, 1 AVX_INSTR pblendvb, sse4, 0, 0, 0 AVX_INSTR pblendw, sse4 AVX_INSTR pclmulqdq AVX_INSTR pcmpestri, sse42 AVX_INSTR pcmpestrm, sse42 AVX_INSTR pcmpistri, sse42 AVX_INSTR pcmpistrm, sse42 AVX_INSTR pcmpeqb, mmx, 0, 0, 1 AVX_INSTR pcmpeqw, mmx, 0, 0, 1 AVX_INSTR pcmpeqd, mmx, 0, 0, 1 AVX_INSTR pcmpeqq, sse4, 0, 0, 1 AVX_INSTR pcmpgtb, mmx, 0, 0, 0 AVX_INSTR pcmpgtw, mmx, 0, 0, 0 AVX_INSTR pcmpgtd, mmx, 0, 0, 0 AVX_INSTR pcmpgtq, sse42, 0, 0, 0 AVX_INSTR pextrb, sse4 AVX_INSTR pextrd, sse4 AVX_INSTR pextrq, sse4 AVX_INSTR pextrw, mmx2 AVX_INSTR phaddw, ssse3, 0, 0, 0 AVX_INSTR phaddd, ssse3, 0, 0, 0 AVX_INSTR phaddsw, ssse3, 0, 0, 0 AVX_INSTR phminposuw, sse4 AVX_INSTR phsubw, ssse3, 0, 0, 0 AVX_INSTR phsubd, ssse3, 0, 0, 0 AVX_INSTR phsubsw, ssse3, 0, 0, 0 AVX_INSTR pinsrb, sse4 AVX_INSTR pinsrd, sse4 AVX_INSTR pinsrq, sse4 AVX_INSTR pinsrw, mmx2 AVX_INSTR pmaddwd, mmx, 0, 0, 1 AVX_INSTR pmaddubsw, ssse3, 0, 0, 0 AVX_INSTR pmaxsb, sse4, 0, 0, 1 AVX_INSTR pmaxsw, mmx2, 0, 0, 1 AVX_INSTR pmaxsd, sse4, 0, 0, 1 AVX_INSTR pmaxub, mmx2, 0, 0, 1 AVX_INSTR pmaxuw, sse4, 0, 0, 1 AVX_INSTR pmaxud, sse4, 0, 0, 1 AVX_INSTR pminsb, sse4, 0, 0, 1 AVX_INSTR pminsw, mmx2, 0, 0, 1 AVX_INSTR pminsd, sse4, 0, 0, 1 AVX_INSTR pminub, mmx2, 0, 0, 1 AVX_INSTR pminuw, sse4, 0, 0, 1 AVX_INSTR pminud, sse4, 0, 0, 1 AVX_INSTR pmovmskb, mmx2 AVX_INSTR pmovsxbw, sse4 AVX_INSTR pmovsxbd, sse4 AVX_INSTR pmovsxbq, sse4 AVX_INSTR pmovsxwd, sse4 AVX_INSTR pmovsxwq, sse4 AVX_INSTR pmovsxdq, sse4 AVX_INSTR pmovzxbw, sse4 AVX_INSTR pmovzxbd, sse4 AVX_INSTR pmovzxbq, sse4 AVX_INSTR pmovzxwd, sse4 AVX_INSTR pmovzxwq, sse4 AVX_INSTR pmovzxdq, sse4 AVX_INSTR pmuldq, sse4, 0, 0, 1 AVX_INSTR pmulhrsw, ssse3, 0, 0, 1 AVX_INSTR pmulhuw, mmx2, 0, 0, 1 AVX_INSTR pmulhw, mmx, 0, 0, 1 AVX_INSTR pmullw, mmx, 0, 0, 1 AVX_INSTR pmulld, sse4, 0, 0, 1 AVX_INSTR pmuludq, sse2, 0, 0, 1 AVX_INSTR por, mmx, 0, 0, 1 AVX_INSTR psadbw, mmx2, 0, 0, 1 AVX_INSTR pshufb, ssse3, 0, 0, 0 AVX_INSTR pshufd, sse2 AVX_INSTR pshufhw, sse2 AVX_INSTR pshuflw, sse2 AVX_INSTR psignb, ssse3, 0, 0, 0 AVX_INSTR psignw, ssse3, 0, 0, 0 AVX_INSTR psignd, ssse3, 0, 0, 0 AVX_INSTR psllw, mmx, 0, 0, 0 AVX_INSTR pslld, mmx, 0, 0, 0 AVX_INSTR psllq, mmx, 0, 0, 0 AVX_INSTR pslldq, sse2, 0, 0, 0 AVX_INSTR psraw, mmx, 0, 0, 0 AVX_INSTR psrad, mmx, 0, 0, 0 AVX_INSTR psrlw, mmx, 0, 0, 0 AVX_INSTR psrld, mmx, 0, 0, 0 AVX_INSTR psrlq, mmx, 0, 0, 0 AVX_INSTR psrldq, sse2, 0, 0, 0 AVX_INSTR psubb, mmx, 0, 0, 0 AVX_INSTR psubw, mmx, 0, 0, 0 AVX_INSTR psubd, mmx, 0, 0, 0 AVX_INSTR psubq, sse2, 0, 0, 0 AVX_INSTR psubsb, mmx, 0, 0, 0 AVX_INSTR psubsw, mmx, 0, 0, 0 AVX_INSTR psubusb, mmx, 0, 0, 0 AVX_INSTR psubusw, mmx, 0, 0, 0 AVX_INSTR ptest, sse4 AVX_INSTR punpckhbw, mmx, 0, 0, 0 AVX_INSTR punpckhwd, mmx, 0, 0, 0 AVX_INSTR punpckhdq, mmx, 0, 0, 0 AVX_INSTR punpckhqdq, sse2, 0, 0, 0 AVX_INSTR punpcklbw, mmx, 0, 0, 0 AVX_INSTR punpcklwd, mmx, 0, 0, 0 AVX_INSTR punpckldq, mmx, 0, 0, 0 AVX_INSTR punpcklqdq, sse2, 0, 0, 0 AVX_INSTR pxor, mmx, 0, 0, 1 AVX_INSTR rcpps, sse, 1, 0, 0 AVX_INSTR rcpss, sse, 1, 0, 0 AVX_INSTR roundpd, sse4 AVX_INSTR roundps, sse4 AVX_INSTR roundsd, sse4 AVX_INSTR roundss, sse4 AVX_INSTR rsqrtps, sse, 1, 0, 0 AVX_INSTR rsqrtss, sse, 1, 0, 0 AVX_INSTR shufpd, sse2, 1, 1, 0 AVX_INSTR shufps, sse, 1, 1, 0 AVX_INSTR sqrtpd, sse2, 1, 0, 0 AVX_INSTR sqrtps, sse, 1, 0, 0 AVX_INSTR sqrtsd, sse2, 1, 0, 0 AVX_INSTR sqrtss, sse, 1, 0, 0 AVX_INSTR stmxcsr, sse AVX_INSTR subpd, sse2, 1, 0, 0 AVX_INSTR subps, sse, 1, 0, 0 AVX_INSTR subsd, sse2, 1, 0, 0 AVX_INSTR subss, sse, 1, 0, 0 AVX_INSTR ucomisd, sse2 AVX_INSTR ucomiss, sse AVX_INSTR unpckhpd, sse2, 1, 0, 0 AVX_INSTR unpckhps, sse, 1, 0, 0 AVX_INSTR unpcklpd, sse2, 1, 0, 0 AVX_INSTR unpcklps, sse, 1, 0, 0 AVX_INSTR xorpd, sse2, 1, 0, 1 AVX_INSTR xorps, sse, 1, 0, 1 ; 3DNow instructions, for sharing code between AVX, SSE and 3DN AVX_INSTR pfadd, 3dnow, 1, 0, 1 AVX_INSTR pfsub, 3dnow, 1, 0, 0 AVX_INSTR pfmul, 3dnow, 1, 0, 1 ; base-4 constants for shuffles %assign i 0 %rep 256 %assign j ((i>>6)&3)*1000 + ((i>>4)&3)*100 + ((i>>2)&3)*10 + (i&3) %if j < 10 CAT_XDEFINE q000, j, i %elif j < 100 CAT_XDEFINE q00, j, i %elif j < 1000 CAT_XDEFINE q0, j, i %else CAT_XDEFINE q, j, i %endif %assign i i+1 %endrep %undef i %undef j %macro FMA_INSTR 3 %macro %1 4-7 %1, %2, %3 %if cpuflag(xop) v%5 %1, %2, %3, %4 %elifnidn %1, %4 %6 %1, %2, %3 %7 %1, %4 %else %error non-xop emulation of ``%5 %1, %2, %3, %4'' is not supported %endif %endmacro %endmacro FMA_INSTR pmacsww, pmullw, paddw FMA_INSTR pmacsdd, pmulld, paddd ; sse4 emulation FMA_INSTR pmacsdql, pmuldq, paddq ; sse4 emulation FMA_INSTR pmadcswd, pmaddwd, paddd ; Macros for consolidating FMA3 and FMA4 using 4-operand (dst, src1, src2, src3) syntax. ; FMA3 is only possible if dst is the same as one of the src registers. ; Either src2 or src3 can be a memory operand. %macro FMA4_INSTR 2-* %push fma4_instr %xdefine %$prefix %1 %rep %0 - 1 %macro %$prefix%2 4-6 %$prefix, %2 %if notcpuflag(fma3) && notcpuflag(fma4) %error use of ``%5%6'' fma instruction in cpuname function: current_function %elif cpuflag(fma4) v%5%6 %1, %2, %3, %4 %elifidn %1, %2 ; If %3 or %4 is a memory operand it needs to be encoded as the last operand. %ifid %3 v%{5}213%6 %2, %3, %4 %else v%{5}132%6 %2, %4, %3 %endif %elifidn %1, %3 v%{5}213%6 %3, %2, %4 %elifidn %1, %4 v%{5}231%6 %4, %2, %3 %else %error fma3 emulation of ``%5%6 %1, %2, %3, %4'' is not supported %endif %endmacro %rotate 1 %endrep %pop %endmacro FMA4_INSTR fmadd, pd, ps, sd, ss FMA4_INSTR fmaddsub, pd, ps FMA4_INSTR fmsub, pd, ps, sd, ss FMA4_INSTR fmsubadd, pd, ps FMA4_INSTR fnmadd, pd, ps, sd, ss FMA4_INSTR fnmsub, pd, ps, sd, ss ; workaround: vpbroadcastq is broken in x86_32 due to a yasm bug (fixed in 1.3.0) %ifdef __YASM_VER__ %if __YASM_VERSION_ID__ < 0x01030000 && VPX_ARCH_X86_64 == 0 %macro vpbroadcastq 2 %if sizeof%1 == 16 movddup %1, %2 %else vbroadcastsd %1, %2 %endif %endmacro %endif %endif libvpx-1.8.2/tools.mk000066400000000000000000000105751357355204000145330ustar00rootroot00000000000000## ## Copyright (c) 2016 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## # List of tools to build. TOOLS-yes += tiny_ssim.c tiny_ssim.SRCS += vpx/vpx_integer.h y4minput.c y4minput.h \ vpx/vpx_codec.h vpx/src/vpx_image.c tiny_ssim.SRCS += vpx_mem/vpx_mem.c vpx_mem/vpx_mem.h tiny_ssim.SRCS += vpx_dsp/ssim.h vpx_scale/yv12config.h tiny_ssim.SRCS += vpx_ports/mem.h vpx_ports/mem.h tiny_ssim.SRCS += vpx_mem/include/vpx_mem_intrnl.h tiny_ssim.GUID = 3afa9b05-940b-4d68-b5aa-55157d8ed7b4 tiny_ssim.DESCRIPTION = Generate SSIM/PSNR from raw .yuv files # # End of specified files. The rest of the build rules should happen # automagically from here. # # Expand list of selected tools to build (as specified above) TOOLS = $(addprefix tools/,$(call enabled,TOOLS)) ALL_SRCS = $(foreach ex,$(TOOLS),$($(notdir $(ex:.c=)).SRCS)) CFLAGS += -I../include ifneq ($(CONFIG_CODEC_SRCS), yes) CFLAGS += -I../include/vpx endif # Expand all tools sources into a variable containing all sources # for that tools (not just them main one specified in TOOLS) # and add this file to the list (for MSVS workspace generation) $(foreach ex,$(TOOLS),$(eval $(notdir $(ex:.c=)).SRCS += $(ex) tools.mk)) # Create build/install dependencies for all tools. The common case # is handled here. The MSVS case is handled below. NOT_MSVS = $(if $(CONFIG_MSVS),,yes) DIST-BINS-$(NOT_MSVS) += $(addprefix bin/,$(TOOLS:.c=$(EXE_SFX))) DIST-SRCS-yes += $(ALL_SRCS) OBJS-$(NOT_MSVS) += $(call objs,$(ALL_SRCS)) BINS-$(NOT_MSVS) += $(addprefix $(BUILD_PFX),$(TOOLS:.c=$(EXE_SFX))) # Instantiate linker template for all tools. $(foreach bin,$(BINS-yes),\ $(eval $(bin):)\ $(eval $(call linker_template,$(bin),\ $(call objs,$($(notdir $(bin:$(EXE_SFX)=)).SRCS)) -lm))) # The following pairs define a mapping of locations in the distribution # tree to locations in the source/build trees. INSTALL_MAPS += src/%.c %.c INSTALL_MAPS += src/% $(SRC_PATH_BARE)/% INSTALL_MAPS += bin/% % INSTALL_MAPS += % % # Build Visual Studio Projects. We use a template here to instantiate # explicit rules rather than using an implicit rule because we want to # leverage make's VPATH searching rather than specifying the paths on # each file in TOOLS. This has the unfortunate side effect that # touching the source files trigger a rebuild of the project files # even though there is no real dependency there (the dependency is on # the makefiles). We may want to revisit this. define vcproj_template $(1): $($(1:.$(VCPROJ_SFX)=).SRCS) vpx.$(VCPROJ_SFX) $(if $(quiet),@echo " [vcproj] $$@") $(qexec)$$(GEN_VCPROJ)\ --exe\ --target=$$(TOOLCHAIN)\ --name=$$(@:.$(VCPROJ_SFX)=)\ --ver=$$(CONFIG_VS_VERSION)\ --proj-guid=$$($$(@:.$(VCPROJ_SFX)=).GUID)\ --src-path-bare="$(SRC_PATH_BARE)" \ $$(if $$(CONFIG_STATIC_MSVCRT),--static-crt) \ --out=$$@ $$(INTERNAL_CFLAGS) $$(CFLAGS) \ $$(INTERNAL_LDFLAGS) $$(LDFLAGS) $$^ endef TOOLS_BASENAME := $(notdir $(TOOLS)) PROJECTS-$(CONFIG_MSVS) += $(TOOLS_BASENAME:.c=.$(VCPROJ_SFX)) INSTALL-BINS-$(CONFIG_MSVS) += $(foreach p,$(VS_PLATFORMS),\ $(addprefix bin/$(p)/,$(TOOLS_BASENAME:.c=.exe))) $(foreach proj,$(call enabled,PROJECTS),\ $(eval $(call vcproj_template,$(proj)))) # # Documentation Rules # %.dox: %.c @echo " [DOXY] $@" @mkdir -p $(dir $@) @echo "/*!\page tools_$(@F:.dox=) $(@F:.dox=)" > $@ @echo " \includelineno $(> $@ @echo "*/" >> $@ tools.dox: tools.mk @echo " [DOXY] $@" @echo "/*!\page tools Tools" > $@ @echo " This SDK includes a number of tools/utilities."\ "The following tools are included: ">>$@ @$(foreach ex,$(sort $(notdir $(TOOLS:.c=))),\ echo " - \subpage tools_$(ex) $($(ex).DESCRIPTION)" >> $@;) @echo "*/" >> $@ CLEAN-OBJS += tools.doxy tools.dox $(TOOLS:.c=.dox) DOCS-yes += tools.doxy tools.dox tools.doxy: tools.dox $(TOOLS:.c=.dox) @echo "INPUT += $^" > $@ libvpx-1.8.2/tools/000077500000000000000000000000001357355204000141725ustar00rootroot00000000000000libvpx-1.8.2/tools/3D-Reconstruction/000077500000000000000000000000001357355204000174575ustar00rootroot00000000000000libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/000077500000000000000000000000001357355204000213005ustar00rootroot00000000000000libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/Anandan.py000066400000000000000000000137651357355204000232260ustar00rootroot00000000000000#!/usr/bin/env python # coding: utf-8 import numpy as np import numpy.linalg as LA from scipy.ndimage.filters import gaussian_filter from scipy.sparse import csc_matrix from scipy.sparse.linalg import inv from MotionEST import MotionEST """Anandan Model""" class Anandan(MotionEST): """ constructor: cur_f: current frame ref_f: reference frame blk_sz: block size beta: smooth constrain weight k1,k2,k3: confidence coefficients max_iter: maximum number of iterations """ def __init__(self, cur_f, ref_f, blk_sz, beta, k1, k2, k3, max_iter=100): super(Anandan, self).__init__(cur_f, ref_f, blk_sz) self.levels = int(np.log2(blk_sz)) self.intensity_hierarchy() self.c_maxs = [] self.c_mins = [] self.e_maxs = [] self.e_mins = [] for l in xrange(self.levels + 1): c_max, c_min, e_max, e_min = self.get_curvature(self.cur_Is[l]) self.c_maxs.append(c_max) self.c_mins.append(c_min) self.e_maxs.append(e_max) self.e_mins.append(e_min) self.beta = beta self.k1, self.k2, self.k3 = k1, k2, k3 self.max_iter = max_iter """ build intensity hierarchy """ def intensity_hierarchy(self): level = 0 self.cur_Is = [] self.ref_Is = [] #build each level itensity by using gaussian filters while level <= self.levels: cur_I = gaussian_filter(self.cur_yuv[:, :, 0], sigma=(2**level) * 0.56) ref_I = gaussian_filter(self.ref_yuv[:, :, 0], sigma=(2**level) * 0.56) self.ref_Is.append(ref_I) self.cur_Is.append(cur_I) level += 1 """ get curvature of each block """ def get_curvature(self, I): c_max = np.zeros((self.num_row, self.num_col)) c_min = np.zeros((self.num_row, self.num_col)) e_max = np.zeros((self.num_row, self.num_col, 2)) e_min = np.zeros((self.num_row, self.num_col, 2)) for r in xrange(self.num_row): for c in xrange(self.num_col): h11, h12, h21, h22 = 0, 0, 0, 0 for i in xrange(r * self.blk_sz, r * self.blk_sz + self.blk_sz): for j in xrange(c * self.blk_sz, c * self.blk_sz + self.blk_sz): if 0 <= i < self.height - 1 and 0 <= j < self.width - 1: Ix = I[i][j + 1] - I[i][j] Iy = I[i + 1][j] - I[i][j] h11 += Iy * Iy h12 += Ix * Iy h21 += Ix * Iy h22 += Ix * Ix U, S, _ = LA.svd(np.array([[h11, h12], [h21, h22]])) c_max[r, c], c_min[r, c] = S[0], S[1] e_max[r, c] = U[:, 0] e_min[r, c] = U[:, 1] return c_max, c_min, e_max, e_min """ get ssd of motion vector: cur_I: current intensity ref_I: reference intensity center: current position mv: motion vector """ def get_ssd(self, cur_I, ref_I, center, mv): ssd = 0 for r in xrange(int(center[0]), int(center[0]) + self.blk_sz): for c in xrange(int(center[1]), int(center[1]) + self.blk_sz): if 0 <= r < self.height and 0 <= c < self.width: tr, tc = r + int(mv[0]), c + int(mv[1]) if 0 <= tr < self.height and 0 <= tc < self.width: ssd += (ref_I[tr, tc] - cur_I[r, c])**2 else: ssd += cur_I[r, c]**2 return ssd """ get region match of level l l: current level last_mvs: matchine results of last level radius: movenment radius """ def region_match(self, l, last_mvs, radius): mvs = np.zeros((self.num_row, self.num_col, 2)) min_ssds = np.zeros((self.num_row, self.num_col)) for r in xrange(self.num_row): for c in xrange(self.num_col): center = np.array([r * self.blk_sz, c * self.blk_sz]) #use overlap hierarchy policy init_mvs = [] if last_mvs is None: init_mvs = [np.array([0, 0])] else: for i, j in {(r, c), (r, c + 1), (r + 1, c), (r + 1, c + 1)}: if 0 <= i < last_mvs.shape[0] and 0 <= j < last_mvs.shape[1]: init_mvs.append(last_mvs[i, j]) #use last matching results as the start postion as current level min_ssd = None min_mv = None for init_mv in init_mvs: for i in xrange(-2, 3): for j in xrange(-2, 3): mv = init_mv + np.array([i, j]) * radius ssd = self.get_ssd(self.cur_Is[l], self.ref_Is[l], center, mv) if min_ssd is None or ssd < min_ssd: min_ssd = ssd min_mv = mv min_ssds[r, c] = min_ssd mvs[r, c] = min_mv return mvs, min_ssds """ smooth motion field based on neighbor constraint uvs: current estimation mvs: matching results min_ssds: minimum ssd of matching results l: current level """ def smooth(self, uvs, mvs, min_ssds, l): sm_uvs = np.zeros((self.num_row, self.num_col, 2)) c_max = self.c_maxs[l] c_min = self.c_mins[l] e_max = self.e_maxs[l] e_min = self.e_mins[l] for r in xrange(self.num_row): for c in xrange(self.num_col): w_max = c_max[r, c] / ( self.k1 + self.k2 * min_ssds[r, c] + self.k3 * c_max[r, c]) w_min = c_min[r, c] / ( self.k1 + self.k2 * min_ssds[r, c] + self.k3 * c_min[r, c]) w = w_max * w_min / (w_max + w_min + 1e-6) if w < 0: w = 0 avg_uv = np.array([0.0, 0.0]) for i, j in {(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: avg_uv += 0.25 * uvs[i, j] sm_uvs[r, c] = (w * w * mvs[r, c] + self.beta * avg_uv) / ( self.beta + w * w) return sm_uvs """ motion field estimation """ def motion_field_estimation(self): last_mvs = None for l in xrange(self.levels, -1, -1): mvs, min_ssds = self.region_match(l, last_mvs, 2**l) uvs = np.zeros(mvs.shape) for _ in xrange(self.max_iter): uvs = self.smooth(uvs, mvs, min_ssds, l) last_mvs = uvs for r in xrange(self.num_row): for c in xrange(self.num_col): self.mf[r, c] = uvs[r, c] libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/Exhaust.py000066400000000000000000000177711357355204000233100ustar00rootroot00000000000000#!/usr/bin/env python # coding: utf-8 import numpy as np import numpy.linalg as LA from Util import MSE from MotionEST import MotionEST """Exhaust Search:""" class Exhaust(MotionEST): """ Constructor: cur_f: current frame ref_f: reference frame blk_sz: block size wnd_size: search window size metric: metric to compare the blocks distrotion """ def __init__(self, cur_f, ref_f, blk_size, wnd_size, metric=MSE): self.name = 'exhaust' self.wnd_sz = wnd_size self.metric = metric super(Exhaust, self).__init__(cur_f, ref_f, blk_size) """ search method: cur_r: start row cur_c: start column """ def search(self, cur_r, cur_c): min_loss = self.block_dist(cur_r, cur_c, [0, 0], self.metric) cur_x = cur_c * self.blk_sz cur_y = cur_r * self.blk_sz ref_x = cur_x ref_y = cur_y #search all validate positions and select the one with minimum distortion for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz): for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz): if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz: loss = self.block_dist(cur_r, cur_c, [y - cur_y, x - cur_x], self.metric) if loss < min_loss: min_loss = loss ref_x = x ref_y = y return ref_x, ref_y def motion_field_estimation(self): for i in xrange(self.num_row): for j in xrange(self.num_col): ref_x, ref_y = self.search(i, j) self.mf[i, j] = np.array( [ref_y - i * self.blk_sz, ref_x - j * self.blk_sz]) """Exhaust with Neighbor Constraint""" class ExhaustNeighbor(MotionEST): """ Constructor: cur_f: current frame ref_f: reference frame blk_sz: block size wnd_size: search window size beta: neigbor loss weight metric: metric to compare the blocks distrotion """ def __init__(self, cur_f, ref_f, blk_size, wnd_size, beta, metric=MSE): self.name = 'exhaust + neighbor' self.wnd_sz = wnd_size self.beta = beta self.metric = metric super(ExhaustNeighbor, self).__init__(cur_f, ref_f, blk_size) self.assign = np.zeros((self.num_row, self.num_col), dtype=np.bool) """ estimate neighbor loss: cur_r: current row cur_c: current column mv: current motion vector """ def neighborLoss(self, cur_r, cur_c, mv): loss = 0 #accumulate difference between current block's motion vector with neighbors' for i, j in {(-1, 0), (1, 0), (0, 1), (0, -1)}: nb_r = cur_r + i nb_c = cur_c + j if 0 <= nb_r < self.num_row and 0 <= nb_c < self.num_col and self.assign[ nb_r, nb_c]: loss += LA.norm(mv - self.mf[nb_r, nb_c]) return loss """ search method: cur_r: start row cur_c: start column """ def search(self, cur_r, cur_c): dist_loss = self.block_dist(cur_r, cur_c, [0, 0], self.metric) nb_loss = self.neighborLoss(cur_r, cur_c, np.array([0, 0])) min_loss = dist_loss + self.beta * nb_loss cur_x = cur_c * self.blk_sz cur_y = cur_r * self.blk_sz ref_x = cur_x ref_y = cur_y #search all validate positions and select the one with minimum distortion # as well as weighted neighbor loss for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz): for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz): if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz: dist_loss = self.block_dist(cur_r, cur_c, [y - cur_y, x - cur_x], self.metric) nb_loss = self.neighborLoss(cur_r, cur_c, [y - cur_y, x - cur_x]) loss = dist_loss + self.beta * nb_loss if loss < min_loss: min_loss = loss ref_x = x ref_y = y return ref_x, ref_y def motion_field_estimation(self): for i in xrange(self.num_row): for j in xrange(self.num_col): ref_x, ref_y = self.search(i, j) self.mf[i, j] = np.array( [ref_y - i * self.blk_sz, ref_x - j * self.blk_sz]) self.assign[i, j] = True """Exhaust with Neighbor Constraint and Feature Score""" class ExhaustNeighborFeatureScore(MotionEST): """ Constructor: cur_f: current frame ref_f: reference frame blk_sz: block size wnd_size: search window size beta: neigbor loss weight max_iter: maximum number of iterations metric: metric to compare the blocks distrotion """ def __init__(self, cur_f, ref_f, blk_size, wnd_size, beta=1, max_iter=100, metric=MSE): self.name = 'exhaust + neighbor+feature score' self.wnd_sz = wnd_size self.beta = beta self.metric = metric self.max_iter = max_iter super(ExhaustNeighborFeatureScore, self).__init__(cur_f, ref_f, blk_size) self.fs = self.getFeatureScore() """ get feature score of each block """ def getFeatureScore(self): fs = np.zeros((self.num_row, self.num_col)) for r in xrange(self.num_row): for c in xrange(self.num_col): IxIx = 0 IyIy = 0 IxIy = 0 #get ssd surface for x in xrange(self.blk_sz - 1): for y in xrange(self.blk_sz - 1): ox = c * self.blk_sz + x oy = r * self.blk_sz + y Ix = self.cur_yuv[oy, ox + 1, 0] - self.cur_yuv[oy, ox, 0] Iy = self.cur_yuv[oy + 1, ox, 0] - self.cur_yuv[oy, ox, 0] IxIx += Ix * Ix IyIy += Iy * Iy IxIy += Ix * Iy #get maximum and minimum eigenvalues lambda_max = 0.5 * ((IxIx + IyIy) + np.sqrt(4 * IxIy * IxIy + (IxIx - IyIy)**2)) lambda_min = 0.5 * ((IxIx + IyIy) - np.sqrt(4 * IxIy * IxIy + (IxIx - IyIy)**2)) fs[r, c] = lambda_max * lambda_min / (1e-6 + lambda_max + lambda_min) if fs[r, c] < 0: fs[r, c] = 0 return fs """ do exhaust search """ def search(self, cur_r, cur_c): min_loss = self.block_dist(cur_r, cur_c, [0, 0], self.metric) cur_x = cur_c * self.blk_sz cur_y = cur_r * self.blk_sz ref_x = cur_x ref_y = cur_y #search all validate positions and select the one with minimum distortion for y in xrange(cur_y - self.wnd_sz, cur_y + self.wnd_sz): for x in xrange(cur_x - self.wnd_sz, cur_x + self.wnd_sz): if 0 <= x < self.width - self.blk_sz and 0 <= y < self.height - self.blk_sz: loss = self.block_dist(cur_r, cur_c, [y - cur_y, x - cur_x], self.metric) if loss < min_loss: min_loss = loss ref_x = x ref_y = y return ref_x, ref_y """ add smooth constraint """ def smooth(self, uvs, mvs): sm_uvs = np.zeros(uvs.shape) for r in xrange(self.num_row): for c in xrange(self.num_col): avg_uv = np.array([0.0, 0.0]) for i, j in {(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: avg_uv += uvs[i, j] / 6.0 for i, j in {(r - 1, c - 1), (r - 1, c + 1), (r + 1, c - 1), (r + 1, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: avg_uv += uvs[i, j] / 12.0 sm_uvs[r, c] = (self.fs[r, c] * mvs[r, c] + self.beta * avg_uv) / ( self.beta + self.fs[r, c]) return sm_uvs def motion_field_estimation(self): #get matching results mvs = np.zeros(self.mf.shape) for r in xrange(self.num_row): for c in xrange(self.num_col): ref_x, ref_y = self.search(r, c) mvs[r, c] = np.array([ref_y - r * self.blk_sz, ref_x - c * self.blk_sz]) #add smoothness constraint uvs = np.zeros(self.mf.shape) for _ in xrange(self.max_iter): uvs = self.smooth(uvs, mvs) self.mf = uvs libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/GroundTruth.py000066400000000000000000000022311357355204000241350ustar00rootroot00000000000000#!/ usr / bin / env python #coding : utf - 8 import numpy as np import numpy.linalg as LA from MotionEST import MotionEST """Ground Truth: Load in ground truth motion field and mask """ class GroundTruth(MotionEST): """constructor: cur_f:current frame ref_f:reference frame blk_sz:block size gt_path:ground truth motion field file path """ def __init__(self, cur_f, ref_f, blk_sz, gt_path, mf=None, mask=None): self.name = 'ground truth' super(GroundTruth, self).__init__(cur_f, ref_f, blk_sz) self.mask = np.zeros((self.num_row, self.num_col), dtype=np.bool) if gt_path: with open(gt_path) as gt_file: lines = gt_file.readlines() for i in xrange(len(lines)): info = lines[i].split(';') for j in xrange(len(info)): x, y = info[j].split(',') #-, - stands for nothing if x == '-' or y == '-': self.mask[i, -j - 1] = True continue #the order of original file is flipped on the x axis self.mf[i, -j - 1] = np.array([float(y), -float(x)], dtype=np.int) else: self.mf = mf self.mask = mask libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/HornSchunck.py000066400000000000000000000154141357355204000241040ustar00rootroot00000000000000#!/usr/bin/env python # coding: utf-8 import numpy as np import numpy.linalg as LA from scipy.ndimage.filters import gaussian_filter from scipy.sparse import csc_matrix from scipy.sparse.linalg import inv from MotionEST import MotionEST """Horn & Schunck Model""" class HornSchunck(MotionEST): """ constructor: cur_f: current frame ref_f: reference frame blk_sz: block size alpha: smooth constrain weight sigma: gaussian blur parameter """ def __init__(self, cur_f, ref_f, blk_sz, alpha, sigma, max_iter=100): super(HornSchunck, self).__init__(cur_f, ref_f, blk_sz) self.cur_I, self.ref_I = self.getIntensity() #perform gaussian blur to smooth the intensity self.cur_I = gaussian_filter(self.cur_I, sigma=sigma) self.ref_I = gaussian_filter(self.ref_I, sigma=sigma) self.alpha = alpha self.max_iter = max_iter self.Ix, self.Iy, self.It = self.intensityDiff() """ Build Frame Intensity """ def getIntensity(self): cur_I = np.zeros((self.num_row, self.num_col)) ref_I = np.zeros((self.num_row, self.num_col)) #use average intensity as block's intensity for i in xrange(self.num_row): for j in xrange(self.num_col): r = i * self.blk_sz c = j * self.blk_sz cur_I[i, j] = np.mean(self.cur_yuv[r:r + self.blk_sz, c:c + self.blk_sz, 0]) ref_I[i, j] = np.mean(self.ref_yuv[r:r + self.blk_sz, c:c + self.blk_sz, 0]) return cur_I, ref_I """ Get First Order Derivative """ def intensityDiff(self): Ix = np.zeros((self.num_row, self.num_col)) Iy = np.zeros((self.num_row, self.num_col)) It = np.zeros((self.num_row, self.num_col)) sz = self.blk_sz for i in xrange(self.num_row - 1): for j in xrange(self.num_col - 1): """ Ix: (i ,j) <--- (i ,j+1) (i+1,j) <--- (i+1,j+1) """ count = 0 for r, c in {(i, j + 1), (i + 1, j + 1)}: if 0 <= r < self.num_row and 0 < c < self.num_col: Ix[i, j] += ( self.cur_I[r, c] - self.cur_I[r, c - 1] + self.ref_I[r, c] - self.ref_I[r, c - 1]) count += 2 Ix[i, j] /= count """ Iy: (i ,j) (i ,j+1) ^ ^ | | (i+1,j) (i+1,j+1) """ count = 0 for r, c in {(i + 1, j), (i + 1, j + 1)}: if 0 < r < self.num_row and 0 <= c < self.num_col: Iy[i, j] += ( self.cur_I[r, c] - self.cur_I[r - 1, c] + self.ref_I[r, c] - self.ref_I[r - 1, c]) count += 2 Iy[i, j] /= count count = 0 #It: for r in xrange(i, i + 2): for c in xrange(j, j + 2): if 0 <= r < self.num_row and 0 <= c < self.num_col: It[i, j] += (self.ref_I[r, c] - self.cur_I[r, c]) count += 1 It[i, j] /= count return Ix, Iy, It """ Get weighted average of neighbor motion vectors for evaluation of laplacian """ def averageMV(self): avg = np.zeros((self.num_row, self.num_col, 2)) """ 1/12 --- 1/6 --- 1/12 | | | 1/6 --- -1/8 --- 1/6 | | | 1/12 --- 1/6 --- 1/12 """ for i in xrange(self.num_row): for j in xrange(self.num_col): for r, c in {(-1, 0), (1, 0), (0, -1), (0, 1)}: if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col: avg[i, j] += self.mf[i + r, j + c] / 6.0 for r, c in {(-1, -1), (-1, 1), (1, -1), (1, 1)}: if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col: avg[i, j] += self.mf[i + r, j + c] / 12.0 return avg def motion_field_estimation(self): count = 0 """ u_{n+1} = ~u_n - Ix(Ix.~u_n+Iy.~v+It)/(IxIx+IyIy+alpha^2) v_{n+1} = ~v_n - Iy(Ix.~u_n+Iy.~v+It)/(IxIx+IyIy+alpha^2) """ denom = self.alpha**2 + np.power(self.Ix, 2) + np.power(self.Iy, 2) while count < self.max_iter: avg = self.averageMV() self.mf[:, :, 1] = avg[:, :, 1] - self.Ix * ( self.Ix * avg[:, :, 1] + self.Iy * avg[:, :, 0] + self.It) / denom self.mf[:, :, 0] = avg[:, :, 0] - self.Iy * ( self.Ix * avg[:, :, 1] + self.Iy * avg[:, :, 0] + self.It) / denom count += 1 self.mf *= self.blk_sz def motion_field_estimation_mat(self): row_idx = [] col_idx = [] data = [] N = 2 * self.num_row * self.num_col b = np.zeros((N, 1)) for i in xrange(self.num_row): for j in xrange(self.num_col): """(IxIx+alpha^2)u+IxIy.v-alpha^2~u IxIy.u+(IyIy+alpha^2)v-alpha^2~v""" u_idx = i * 2 * self.num_col + 2 * j v_idx = u_idx + 1 b[u_idx, 0] = -self.Ix[i, j] * self.It[i, j] b[v_idx, 0] = -self.Iy[i, j] * self.It[i, j] #u: (IxIx+alpha^2)u row_idx.append(u_idx) col_idx.append(u_idx) data.append(self.Ix[i, j] * self.Ix[i, j] + self.alpha**2) #IxIy.v row_idx.append(u_idx) col_idx.append(v_idx) data.append(self.Ix[i, j] * self.Iy[i, j]) #v: IxIy.u row_idx.append(v_idx) col_idx.append(u_idx) data.append(self.Ix[i, j] * self.Iy[i, j]) #(IyIy+alpha^2)v row_idx.append(v_idx) col_idx.append(v_idx) data.append(self.Iy[i, j] * self.Iy[i, j] + self.alpha**2) #-alpha^2~u #-alpha^2~v for r, c in {(-1, 0), (1, 0), (0, -1), (0, 1)}: if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col: u_nb = (i + r) * 2 * self.num_col + 2 * (j + c) v_nb = u_nb + 1 row_idx.append(u_idx) col_idx.append(u_nb) data.append(-1 * self.alpha**2 / 6.0) row_idx.append(v_idx) col_idx.append(v_nb) data.append(-1 * self.alpha**2 / 6.0) for r, c in {(-1, -1), (-1, 1), (1, -1), (1, 1)}: if 0 <= i + r < self.num_row and 0 <= j + c < self.num_col: u_nb = (i + r) * 2 * self.num_col + 2 * (j + c) v_nb = u_nb + 1 row_idx.append(u_idx) col_idx.append(u_nb) data.append(-1 * self.alpha**2 / 12.0) row_idx.append(v_idx) col_idx.append(v_nb) data.append(-1 * self.alpha**2 / 12.0) M = csc_matrix((data, (row_idx, col_idx)), shape=(N, N)) M_inv = inv(M) uv = M_inv.dot(b) for i in xrange(self.num_row): for j in xrange(self.num_col): self.mf[i, j, 0] = uv[i * 2 * self.num_col + 2 * j + 1, 0] * self.blk_sz self.mf[i, j, 1] = uv[i * 2 * self.num_col + 2 * j, 0] * self.blk_sz libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/MotionEST.py000066400000000000000000000060001357355204000234670ustar00rootroot00000000000000#!/ usr / bin / env python #coding : utf - 8 import numpy as np import numpy.linalg as LA import matplotlib.pyplot as plt from Util import drawMF, MSE """The Base Class of Estimators""" class MotionEST(object): """ constructor: cur_f: current frame ref_f: reference frame blk_sz: block size """ def __init__(self, cur_f, ref_f, blk_sz): self.cur_f = cur_f self.ref_f = ref_f self.blk_sz = blk_sz #convert RGB to YUV self.cur_yuv = np.array(self.cur_f.convert('YCbCr'), dtype=np.int) self.ref_yuv = np.array(self.ref_f.convert('YCbCr'), dtype=np.int) #frame size self.width = self.cur_f.size[0] self.height = self.cur_f.size[1] #motion field size self.num_row = self.height // self.blk_sz self.num_col = self.width // self.blk_sz #initialize motion field self.mf = np.zeros((self.num_row, self.num_col, 2)) """estimation function Override by child classes""" def motion_field_estimation(self): pass """ distortion of a block: cur_r: current row cur_c: current column mv: motion vector metric: distortion metric """ def block_dist(self, cur_r, cur_c, mv, metric=MSE): cur_x = cur_c * self.blk_sz cur_y = cur_r * self.blk_sz h = min(self.blk_sz, self.height - cur_y) w = min(self.blk_sz, self.width - cur_x) cur_blk = self.cur_yuv[cur_y:cur_y + h, cur_x:cur_x + w, :] ref_x = int(cur_x + mv[1]) ref_y = int(cur_y + mv[0]) if 0 <= ref_x < self.width - w and 0 <= ref_y < self.height - h: ref_blk = self.ref_yuv[ref_y:ref_y + h, ref_x:ref_x + w, :] else: ref_blk = np.zeros((h, w, 3)) return metric(cur_blk, ref_blk) """ distortion of motion field """ def distortion(self, mask=None, metric=MSE): loss = 0 count = 0 for i in xrange(self.num_row): for j in xrange(self.num_col): if mask is not None and mask[i, j]: continue loss += self.block_dist(i, j, self.mf[i, j], metric) count += 1 return loss / count """evaluation compare the difference with ground truth""" def motion_field_evaluation(self, ground_truth): loss = 0 count = 0 gt = ground_truth.mf mask = ground_truth.mask for i in xrange(self.num_row): for j in xrange(self.num_col): if mask is not None and mask[i][j]: continue loss += LA.norm(gt[i, j] - self.mf[i, j]) count += 1 return loss / count """render the motion field""" def show(self, ground_truth=None, size=10): cur_mf = drawMF(self.cur_f, self.blk_sz, self.mf) if ground_truth is None: n_row = 1 else: gt_mf = drawMF(self.cur_f, self.blk_sz, ground_truth) n_row = 2 plt.figure(figsize=(n_row * size, size * self.height / self.width)) plt.subplot(1, n_row, 1) plt.imshow(cur_mf) plt.title('Estimated Motion Field') if ground_truth is not None: plt.subplot(1, n_row, 2) plt.imshow(gt_mf) plt.title('Ground Truth') plt.tight_layout() plt.show() libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/SearchSmooth.py000066400000000000000000000162171357355204000242600ustar00rootroot00000000000000#!/usr/bin/env python # coding: utf-8 import numpy as np import numpy.linalg as LA from Util import MSE from MotionEST import MotionEST """Search & Smooth Model with Adapt Weights""" class SearchSmoothAdapt(MotionEST): """ Constructor: cur_f: current frame ref_f: reference frame blk_sz: block size wnd_size: search window size beta: neigbor loss weight max_iter: maximum number of iterations metric: metric to compare the blocks distrotion """ def __init__(self, cur_f, ref_f, blk_size, search, max_iter=100): self.search = search self.max_iter = max_iter super(SearchSmoothAdapt, self).__init__(cur_f, ref_f, blk_size) """ get local diffiencial of refernce """ def getRefLocalDiff(self, mvs): m, n = self.num_row, self.num_col localDiff = [[] for _ in xrange(m)] blk_sz = self.blk_sz for r in xrange(m): for c in xrange(n): I_row = 0 I_col = 0 #get ssd surface count = 0 center = self.cur_yuv[r * blk_sz:(r + 1) * blk_sz, c * blk_sz:(c + 1) * blk_sz, 0] ty = np.clip(r * blk_sz + int(mvs[r, c, 0]), 0, self.height - blk_sz) tx = np.clip(c * blk_sz + int(mvs[r, c, 1]), 0, self.width - blk_sz) target = self.ref_yuv[ty:ty + blk_sz, tx:tx + blk_sz, 0] for y, x in {(ty - blk_sz, tx), (ty + blk_sz, tx)}: if 0 <= y < self.height - blk_sz and 0 <= x < self.width - blk_sz: nb = self.ref_yuv[y:y + blk_sz, x:x + blk_sz, 0] I_row += np.sum(np.abs(nb - center)) - np.sum( np.abs(target - center)) count += 1 I_row //= (count * blk_sz * blk_sz) count = 0 for y, x in {(ty, tx - blk_sz), (ty, tx + blk_sz)}: if 0 <= y < self.height - blk_sz and 0 <= x < self.width - blk_sz: nb = self.ref_yuv[y:y + blk_sz, x:x + blk_sz, 0] I_col += np.sum(np.abs(nb - center)) - np.sum( np.abs(target - center)) count += 1 I_col //= (count * blk_sz * blk_sz) localDiff[r].append( np.array([[I_row * I_row, I_row * I_col], [I_col * I_row, I_col * I_col]])) return localDiff """ add smooth constraint """ def smooth(self, uvs, mvs): sm_uvs = np.zeros(uvs.shape) blk_sz = self.blk_sz for r in xrange(self.num_row): for c in xrange(self.num_col): nb_uv = np.array([0.0, 0.0]) for i, j in {(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: nb_uv += uvs[i, j] / 6.0 else: nb_uv += uvs[r, c] / 6.0 for i, j in {(r - 1, c - 1), (r - 1, c + 1), (r + 1, c - 1), (r + 1, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: nb_uv += uvs[i, j] / 12.0 else: nb_uv += uvs[r, c] / 12.0 ssd_nb = self.block_dist(r, c, self.blk_sz * nb_uv) mv = mvs[r, c] ssd_mv = self.block_dist(r, c, mv) alpha = (ssd_nb - ssd_mv) / (ssd_mv + 1e-6) M = alpha * self.localDiff[r][c] P = M + np.identity(2) inv_P = LA.inv(P) sm_uvs[r, c] = np.dot(inv_P, nb_uv) + np.dot( np.matmul(inv_P, M), mv / blk_sz) return sm_uvs def block_matching(self): self.search.motion_field_estimation() def motion_field_estimation(self): self.localDiff = self.getRefLocalDiff(self.search.mf) #get matching results mvs = self.search.mf #add smoothness constraint uvs = mvs / self.blk_sz for _ in xrange(self.max_iter): uvs = self.smooth(uvs, mvs) self.mf = uvs * self.blk_sz """Search & Smooth Model with Fixed Weights""" class SearchSmoothFix(MotionEST): """ Constructor: cur_f: current frame ref_f: reference frame blk_sz: block size wnd_size: search window size beta: neigbor loss weight max_iter: maximum number of iterations metric: metric to compare the blocks distrotion """ def __init__(self, cur_f, ref_f, blk_size, search, beta, max_iter=100): self.search = search self.max_iter = max_iter self.beta = beta super(SearchSmoothFix, self).__init__(cur_f, ref_f, blk_size) """ get local diffiencial of refernce """ def getRefLocalDiff(self, mvs): m, n = self.num_row, self.num_col localDiff = [[] for _ in xrange(m)] blk_sz = self.blk_sz for r in xrange(m): for c in xrange(n): I_row = 0 I_col = 0 #get ssd surface count = 0 center = self.cur_yuv[r * blk_sz:(r + 1) * blk_sz, c * blk_sz:(c + 1) * blk_sz, 0] ty = np.clip(r * blk_sz + int(mvs[r, c, 0]), 0, self.height - blk_sz) tx = np.clip(c * blk_sz + int(mvs[r, c, 1]), 0, self.width - blk_sz) target = self.ref_yuv[ty:ty + blk_sz, tx:tx + blk_sz, 0] for y, x in {(ty - blk_sz, tx), (ty + blk_sz, tx)}: if 0 <= y < self.height - blk_sz and 0 <= x < self.width - blk_sz: nb = self.ref_yuv[y:y + blk_sz, x:x + blk_sz, 0] I_row += np.sum(np.abs(nb - center)) - np.sum( np.abs(target - center)) count += 1 I_row //= (count * blk_sz * blk_sz) count = 0 for y, x in {(ty, tx - blk_sz), (ty, tx + blk_sz)}: if 0 <= y < self.height - blk_sz and 0 <= x < self.width - blk_sz: nb = self.ref_yuv[y:y + blk_sz, x:x + blk_sz, 0] I_col += np.sum(np.abs(nb - center)) - np.sum( np.abs(target - center)) count += 1 I_col //= (count * blk_sz * blk_sz) localDiff[r].append( np.array([[I_row * I_row, I_row * I_col], [I_col * I_row, I_col * I_col]])) return localDiff """ add smooth constraint """ def smooth(self, uvs, mvs): sm_uvs = np.zeros(uvs.shape) blk_sz = self.blk_sz for r in xrange(self.num_row): for c in xrange(self.num_col): nb_uv = np.array([0.0, 0.0]) for i, j in {(r - 1, c), (r + 1, c), (r, c - 1), (r, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: nb_uv += uvs[i, j] / 6.0 else: nb_uv += uvs[r, c] / 6.0 for i, j in {(r - 1, c - 1), (r - 1, c + 1), (r + 1, c - 1), (r + 1, c + 1)}: if 0 <= i < self.num_row and 0 <= j < self.num_col: nb_uv += uvs[i, j] / 12.0 else: nb_uv += uvs[r, c] / 12.0 mv = mvs[r, c] / blk_sz M = self.localDiff[r][c] P = M + self.beta * np.identity(2) inv_P = LA.inv(P) sm_uvs[r, c] = np.dot(inv_P, self.beta * nb_uv) + np.dot( np.matmul(inv_P, M), mv) return sm_uvs def block_matching(self): self.search.motion_field_estimation() def motion_field_estimation(self): #get local structure self.localDiff = self.getRefLocalDiff(self.search.mf) #get matching results mvs = self.search.mf #add smoothness constraint uvs = mvs / self.blk_sz for _ in xrange(self.max_iter): uvs = self.smooth(uvs, mvs) self.mf = uvs * self.blk_sz libvpx-1.8.2/tools/3D-Reconstruction/MotionEST/Util.py000066400000000000000000000024601357355204000225710ustar00rootroot00000000000000#!/usr/bin/env python # coding: utf-8 import numpy as np import numpy.linalg as LA import matplotlib.pyplot as plt from scipy.ndimage import filters from PIL import Image, ImageDraw def MSE(blk1, blk2): return np.mean( LA.norm( np.array(blk1, dtype=np.int) - np.array(blk2, dtype=np.int), axis=2)) def drawMF(img, blk_sz, mf): img_rgba = img.convert('RGBA') mf_layer = Image.new(mode='RGBA', size=img_rgba.size, color=(0, 0, 0, 0)) draw = ImageDraw.Draw(mf_layer) width = img_rgba.size[0] height = img_rgba.size[1] num_row = height // blk_sz num_col = width // blk_sz for i in xrange(num_row): left = (0, i * blk_sz) right = (width, i * blk_sz) draw.line([left, right], fill=(0, 0, 255, 255)) for j in xrange(num_col): up = (j * blk_sz, 0) down = (j * blk_sz, height) draw.line([up, down], fill=(0, 0, 255, 255)) for i in xrange(num_row): for j in xrange(num_col): center = (j * blk_sz + 0.5 * blk_sz, i * blk_sz + 0.5 * blk_sz) """mf[i,j][0] is the row shift and mf[i,j][1] is the column shift In PIL coordinates, head[0] is x (column shift) and head[1] is y (row shift).""" head = (center[0] + mf[i, j][1], center[1] + mf[i, j][0]) draw.line([center, head], fill=(255, 0, 0, 255)) return Image.alpha_composite(img_rgba, mf_layer) libvpx-1.8.2/tools/3D-Reconstruction/genY4M/000077500000000000000000000000001357355204000205625ustar00rootroot00000000000000libvpx-1.8.2/tools/3D-Reconstruction/genY4M/genY4M.py000066400000000000000000000052611357355204000222430ustar00rootroot00000000000000import argparse from os import listdir, path from PIL import Image import sys parser = argparse.ArgumentParser() parser.add_argument("--frame_path", default="../data/frame/", type=str) parser.add_argument("--frame_rate", default="25:1", type=str) parser.add_argument("--interlacing", default="Ip", type=str) parser.add_argument("--pix_ratio", default="0:0", type=str) parser.add_argument("--color_space", default="4:2:0", type=str) parser.add_argument("--output", default="output.y4m", type=str) def generate(args, frames): if len(frames) == 0: return #sort the frames based on the frame index frames = sorted(frames, key=lambda x: x[0]) #convert the frames to YUV form frames = [f.convert("YCbCr") for _, f in frames] #write the header header = "YUV4MPEG2 W%d H%d F%s %s A%s" % (frames[0].width, frames[0].height, args.frame_rate, args.interlacing, args.pix_ratio) cs = args.color_space.split(":") header += " C%s%s%s\n" % (cs[0], cs[1], cs[2]) #estimate the sample step based on subsample value subsamples = [int(c) for c in cs] r_step = [1, int(subsamples[2] == 0) + 1, int(subsamples[2] == 0) + 1] c_step = [1, 4 // subsamples[1], 4 // subsamples[1]] #write in frames with open(args.output, "wb") as y4m: y4m.write(header) for f in frames: y4m.write("FRAME\n") px = f.load() for k in xrange(3): for i in xrange(0, f.height, r_step[k]): for j in xrange(0, f.width, c_step[k]): yuv = px[j, i] y4m.write(chr(yuv[k])) if __name__ == "__main__": args = parser.parse_args() frames = [] frames_mv = [] for filename in listdir(args.frame_path): name, ext = filename.split(".") if ext == "png": name_parse = name.split("_") idx = int(name_parse[-1]) img = Image.open(path.join(args.frame_path, filename)) if name_parse[-2] == "mv": frames_mv.append((idx, img)) else: frames.append((idx, img)) if len(frames) == 0: print("No frames in directory: " + args.frame_path) sys.exit() print("----------------------Y4M Info----------------------") print("width: %d" % frames[0][1].width) print("height: %d" % frames[0][1].height) print("#frame: %d" % len(frames)) print("frame rate: %s" % args.frame_rate) print("interlacing: %s" % args.interlacing) print("pixel ratio: %s" % args.pix_ratio) print("color space: %s" % args.color_space) print("----------------------------------------------------") print("Generating ...") generate(args, frames) if len(frames_mv) != 0: args.output = args.output.replace(".y4m", "_mv.y4m") generate(args, frames_mv) libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/000077500000000000000000000000001357355204000244275ustar00rootroot00000000000000libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/BVH.pde000066400000000000000000000112601357355204000255400ustar00rootroot00000000000000/* *AABB bounding box *Bouding Volume Hierarchy */ class BoundingBox { float min_x, min_y, min_z, max_x, max_y, max_z; PVector center; BoundingBox() { min_x = Float.POSITIVE_INFINITY; min_y = Float.POSITIVE_INFINITY; min_z = Float.POSITIVE_INFINITY; max_x = Float.NEGATIVE_INFINITY; max_y = Float.NEGATIVE_INFINITY; max_z = Float.NEGATIVE_INFINITY; center = new PVector(); } // build a bounding box for a triangle void create(Triangle t) { min_x = min(t.p1.x, min(t.p2.x, t.p3.x)); max_x = max(t.p1.x, max(t.p2.x, t.p3.x)); min_y = min(t.p1.y, min(t.p2.y, t.p3.y)); max_y = max(t.p1.y, max(t.p2.y, t.p3.y)); min_z = min(t.p1.z, min(t.p2.z, t.p3.z)); max_z = max(t.p1.z, max(t.p2.z, t.p3.z)); center.x = (max_x + min_x) / 2; center.y = (max_y + min_y) / 2; center.z = (max_z + min_z) / 2; } // merge two bounding boxes void add(BoundingBox bbx) { min_x = min(min_x, bbx.min_x); min_y = min(min_y, bbx.min_y); min_z = min(min_z, bbx.min_z); max_x = max(max_x, bbx.max_x); max_y = max(max_y, bbx.max_y); max_z = max(max_z, bbx.max_z); center.x = (max_x + min_x) / 2; center.y = (max_y + min_y) / 2; center.z = (max_z + min_z) / 2; } // get bounding box center axis value float getCenterAxisValue(int axis) { if (axis == 1) { return center.x; } else if (axis == 2) { return center.y; } // when axis == 3 return center.z; } // check if a ray is intersected with the bounding box boolean intersect(Ray r) { float tmin, tmax; if (r.dir.x >= 0) { tmin = (min_x - r.ori.x) * (1.0f / r.dir.x); tmax = (max_x - r.ori.x) * (1.0f / r.dir.x); } else { tmin = (max_x - r.ori.x) * (1.0f / r.dir.x); tmax = (min_x - r.ori.x) * (1.0f / r.dir.x); } float tymin, tymax; if (r.dir.y >= 0) { tymin = (min_y - r.ori.y) * (1.0f / r.dir.y); tymax = (max_y - r.ori.y) * (1.0f / r.dir.y); } else { tymin = (max_y - r.ori.y) * (1.0f / r.dir.y); tymax = (min_y - r.ori.y) * (1.0f / r.dir.y); } if (tmax < tymin || tymax < tmin) { return false; } tmin = tmin < tymin ? tymin : tmin; tmax = tmax > tymax ? tymax : tmax; float tzmin, tzmax; if (r.dir.z >= 0) { tzmin = (min_z - r.ori.z) * (1.0f / r.dir.z); tzmax = (max_z - r.ori.z) * (1.0f / r.dir.z); } else { tzmin = (max_z - r.ori.z) * (1.0f / r.dir.z); tzmax = (min_z - r.ori.z) * (1.0f / r.dir.z); } if (tmax < tzmin || tmin > tzmax) { return false; } return true; } } // Bounding Volume Hierarchy class BVH { // Binary Tree BVH left, right; BoundingBox overall_bbx; ArrayList mesh; BVH(ArrayList mesh) { this.mesh = mesh; overall_bbx = new BoundingBox(); left = null; right = null; int mesh_size = this.mesh.size(); if (mesh_size <= 1) { return; } // random select an axis int axis = int(random(100)) % 3 + 1; // build bounding box and save the selected center component float[] axis_values = new float[mesh_size]; for (int i = 0; i < mesh_size; i++) { Triangle t = this.mesh.get(i); overall_bbx.add(t.bbx); axis_values[i] = t.bbx.getCenterAxisValue(axis); } // find the median value of selected center component as pivot axis_values = sort(axis_values); float pivot; if (mesh_size % 2 == 1) { pivot = axis_values[mesh_size / 2]; } else { pivot = 0.5f * (axis_values[mesh_size / 2 - 1] + axis_values[mesh_size / 2]); } // Build left node and right node by partitioning the mesh based on triangle // bounding box center component value ArrayList left_mesh = new ArrayList(); ArrayList right_mesh = new ArrayList(); for (int i = 0; i < mesh_size; i++) { Triangle t = this.mesh.get(i); if (t.bbx.getCenterAxisValue(axis) < pivot) { left_mesh.add(t); } else if (t.bbx.getCenterAxisValue(axis) > pivot) { right_mesh.add(t); } else if (left_mesh.size() < right_mesh.size()) { left_mesh.add(t); } else { right_mesh.add(t); } } left = new BVH(left_mesh); right = new BVH(right_mesh); } // check if a ray intersect with current volume boolean intersect(Ray r, float[] param) { if (mesh.size() == 0) { return false; } if (mesh.size() == 1) { Triangle t = mesh.get(0); return t.intersect(r, param); } if (!overall_bbx.intersect(r)) { return false; } boolean left_res = left.intersect(r, param); boolean right_res = right.intersect(r, param); return left_res || right_res; } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/Camera.pde000066400000000000000000000103401357355204000263070ustar00rootroot00000000000000class Camera { // camera's field of view float fov; // camera's position, look at point and axis PVector pos, center, axis; PVector init_pos, init_center, init_axis; float move_speed; float rot_speed; Camera(float fov, PVector pos, PVector center, PVector axis) { this.fov = fov; this.pos = pos; this.center = center; this.axis = axis; this.axis.normalize(); move_speed = 0.001; rot_speed = 0.01 * PI; init_pos = pos.copy(); init_center = center.copy(); init_axis = axis.copy(); } Camera copy() { Camera cam = new Camera(fov, pos.copy(), center.copy(), axis.copy()); return cam; } PVector project(PVector pos) { PVector proj = MatxVec3(getCameraMat(), PVector.sub(pos, this.pos)); proj.x = (float)height / 2.0 * proj.x / proj.z / tan(fov / 2.0f); proj.y = (float)height / 2.0 * proj.y / proj.z / tan(fov / 2.0f); proj.z = proj.z; return proj; } float[] getCameraMat() { float[] mat = new float[9]; PVector dir = PVector.sub(center, pos); dir.normalize(); PVector left = dir.cross(axis); left.normalize(); // processing camera system does not follow right hand rule mat[0] = -left.x; mat[1] = -left.y; mat[2] = -left.z; mat[3] = axis.x; mat[4] = axis.y; mat[5] = axis.z; mat[6] = dir.x; mat[7] = dir.y; mat[8] = dir.z; return mat; } void run() { PVector dir, left; if (mousePressed) { float angleX = (float)mouseX / width * PI - PI / 2; float angleY = (float)mouseY / height * PI - PI; PVector diff = PVector.sub(center, pos); float radius = diff.mag(); pos.x = radius * sin(angleY) * sin(angleX) + center.x; pos.y = radius * cos(angleY) + center.y; pos.z = radius * sin(angleY) * cos(angleX) + center.z; dir = PVector.sub(center, pos); dir.normalize(); PVector up = new PVector(0, 1, 0); left = up.cross(dir); left.normalize(); axis = dir.cross(left); axis.normalize(); } if (keyPressed) { switch (key) { case 'w': dir = PVector.sub(center, pos); dir.normalize(); pos = PVector.add(pos, PVector.mult(dir, move_speed)); center = PVector.add(center, PVector.mult(dir, move_speed)); break; case 's': dir = PVector.sub(center, pos); dir.normalize(); pos = PVector.sub(pos, PVector.mult(dir, move_speed)); center = PVector.sub(center, PVector.mult(dir, move_speed)); break; case 'a': dir = PVector.sub(center, pos); dir.normalize(); left = axis.cross(dir); left.normalize(); pos = PVector.add(pos, PVector.mult(left, move_speed)); center = PVector.add(center, PVector.mult(left, move_speed)); break; case 'd': dir = PVector.sub(center, pos); dir.normalize(); left = axis.cross(dir); left.normalize(); pos = PVector.sub(pos, PVector.mult(left, move_speed)); center = PVector.sub(center, PVector.mult(left, move_speed)); break; case 'r': dir = PVector.sub(center, pos); dir.normalize(); float[] mat = getRotationMat3x3(rot_speed, dir.x, dir.y, dir.z); axis = MatxVec3(mat, axis); axis.normalize(); break; case 'b': pos = init_pos.copy(); center = init_center.copy(); axis = init_axis.copy(); break; case '+': move_speed *= 2.0f; break; case '-': move_speed /= 2.0; break; case CODED: if (keyCode == UP) { pos = PVector.add(pos, PVector.mult(axis, move_speed)); center = PVector.add(center, PVector.mult(axis, move_speed)); } else if (keyCode == DOWN) { pos = PVector.sub(pos, PVector.mult(axis, move_speed)); center = PVector.sub(center, PVector.mult(axis, move_speed)); } } } } void open() { perspective(fov, float(width) / height, 1e-6, 1e5); camera(pos.x, pos.y, pos.z, center.x, center.y, center.z, axis.x, axis.y, axis.z); } void close() { ortho(-width, 0, -height, 0); camera(0, 0, 0, 0, 0, 1, 0, 1, 0); } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/MotionField.pde000066400000000000000000000070141357355204000273340ustar00rootroot00000000000000class MotionField { int block_size; ArrayList motion_field; MotionField(int block_size) { this.block_size = block_size; motion_field = new ArrayList(); } void update(Camera last_cam, Camera current_cam, PointCloud point_cloud, BVH bvh) { // clear motion field motion_field = new ArrayList(); int r_num = height / block_size, c_num = width / block_size; for (int i = 0; i < r_num * c_num; i++) motion_field.add(new PVector(0, 0, 0)); // estimate motion vector of each point in point cloud for (int i = 0; i < point_cloud.size(); i++) { PVector p = point_cloud.getPosition(i); PVector p0 = current_cam.project(p); PVector p1 = last_cam.project(p); int row = int((p0.y + height / 2.0f) / block_size); int col = int((p0.x + width / 2.0f) / block_size); if (row >= 0 && row < r_num && col >= 0 && col < c_num) { PVector accu = motion_field.get(row * c_num + col); accu.x += p1.x - p0.x; accu.y += p1.y - p0.y; accu.z += 1; } } // if some blocks do not have point, then use ray tracing to see if they are // in triangles for (int i = 0; i < r_num; i++) for (int j = 0; j < c_num; j++) { PVector accu = motion_field.get(i * c_num + j); if (accu.z > 0) { continue; } // use the center of the block to generate view ray float cx = j * block_size + block_size / 2.0f - width / 2.0f; float cy = i * block_size + block_size / 2.0f - height / 2.0f; float cz = 0.5f * height / tan(current_cam.fov / 2.0f); PVector dir = new PVector(cx, cy, cz); float[] camMat = current_cam.getCameraMat(); dir = MatxVec3(transpose3x3(camMat), dir); dir.normalize(); Ray r = new Ray(current_cam.pos, dir); // ray tracing float[] param = new float[4]; param[0] = Float.POSITIVE_INFINITY; if (bvh.intersect(r, param)) { PVector p = new PVector(param[1], param[2], param[3]); PVector p0 = current_cam.project(p); PVector p1 = last_cam.project(p); accu.x += p1.x - p0.x; accu.y += p1.y - p0.y; accu.z += 1; } } // estimate the motion vector of each block for (int i = 0; i < r_num * c_num; i++) { PVector mv = motion_field.get(i); if (mv.z > 0) { motion_field.set(i, new PVector(mv.x / mv.z, mv.y / mv.z, 0)); } else // there is nothing in the block, use -1 to mark it. { motion_field.set(i, new PVector(0.0, 0.0, -1)); } } } void render() { int r_num = height / block_size, c_num = width / block_size; for (int i = 0; i < r_num; i++) for (int j = 0; j < c_num; j++) { PVector mv = motion_field.get(i * c_num + j); float ox = j * block_size + 0.5f * block_size; float oy = i * block_size + 0.5f * block_size; stroke(255, 0, 0); line(ox, oy, ox + mv.x, oy + mv.y); } } void save(String path) { int r_num = height / block_size; int c_num = width / block_size; String[] mvs = new String[r_num]; for (int i = 0; i < r_num; i++) { mvs[i] = ""; for (int j = 0; j < c_num; j++) { PVector mv = motion_field.get(i * c_num + j); if (mv.z != -1) { mvs[i] += str(mv.x) + "," + str(mv.y); } else // there is nothing { mvs[i] += "-,-"; } if (j != c_num - 1) mvs[i] += ";"; } } saveStrings(path, mvs); } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/PointCloud.pde000066400000000000000000000101141357355204000271760ustar00rootroot00000000000000class PointCloud { ArrayList points; // array to save points IntList point_colors; // array to save points color PVector cloud_mass; float[] depth; boolean[] real; PointCloud() { // initialize points = new ArrayList(); point_colors = new IntList(); cloud_mass = new PVector(0, 0, 0); depth = new float[width * height]; real = new boolean[width * height]; } void generate(PImage rgb_img, PImage depth_img, Transform trans) { if (depth_img.width != width || depth_img.height != height || rgb_img.width != width || rgb_img.height != height) { println("rgb and depth file dimension should be same with window size"); exit(); } // clear depth and real for (int i = 0; i < width * height; i++) { depth[i] = 0; real[i] = false; } for (int v = 0; v < height; v++) for (int u = 0; u < width; u++) { // get depth value (red channel) color depth_px = depth_img.get(u, v); depth[v * width + u] = depth_px & 0x0000FFFF; if (int(depth[v * width + u]) != 0) { real[v * width + u] = true; } point_colors.append(rgb_img.get(u, v)); } for (int v = 0; v < height; v++) for (int u = 0; u < width; u++) { if (int(depth[v * width + u]) == 0) { interpolateDepth(v, u); } // add transformed pixel as well as pixel color to the list PVector pos = trans.transform(u, v, int(depth[v * width + u])); points.add(pos); // accumulate z value cloud_mass = PVector.add(cloud_mass, pos); } } void fillInDepthAlongPath(float d, Node node) { node = node.parent; while (node != null) { int i = node.row; int j = node.col; if (depth[i * width + j] == 0) { depth[i * width + j] = d; } node = node.parent; } } // interpolate void interpolateDepth(int row, int col) { if (row < 0 || row >= height || col < 0 || col >= width || int(depth[row * width + col]) != 0) { return; } ArrayList queue = new ArrayList(); queue.add(new Node(row, col, null)); boolean[] visited = new boolean[width * height]; for (int i = 0; i < width * height; i++) visited[i] = false; visited[row * width + col] = true; // Using BFS to Find the Nearest Neighbor while (queue.size() > 0) { // pop Node node = queue.get(0); queue.remove(0); int i = node.row; int j = node.col; // if current position have a real depth if (depth[i * width + j] != 0 && real[i * width + j]) { fillInDepthAlongPath(depth[i * width + j], node); break; } else { // search unvisited 8 neighbors for (int r = max(0, i - 1); r < min(height, i + 2); r++) { for (int c = max(0, j - 1); c < min(width, j + 2); c++) { if (!visited[r * width + c]) { visited[r * width + c] = true; queue.add(new Node(r, c, node)); } } } } } } // get point cloud size int size() { return points.size(); } // get ith position PVector getPosition(int i) { if (i >= points.size()) { println("point position: index " + str(i) + " exceeds"); exit(); } return points.get(i); } // get ith color color getColor(int i) { if (i >= point_colors.size()) { println("point color: index " + str(i) + " exceeds"); exit(); } return point_colors.get(i); } // get cloud center PVector getCloudCenter() { if (points.size() > 0) { return PVector.div(cloud_mass, points.size()); } return new PVector(0, 0, 0); } // merge two clouds void merge(PointCloud point_cloud) { for (int i = 0; i < point_cloud.size(); i++) { points.add(point_cloud.getPosition(i)); point_colors.append(point_cloud.getColor(i)); } cloud_mass = PVector.add(cloud_mass, point_cloud.cloud_mass); } } class Node { int row, col; Node parent; Node(int row, int col, Node parent) { this.row = row; this.col = col; this.parent = parent; } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/Ray_Tracing.pde000066400000000000000000000030631357355204000273250ustar00rootroot00000000000000// Triangle class Triangle { // position PVector p1, p2, p3; // color color c1, c2, c3; BoundingBox bbx; Triangle(PVector p1, PVector p2, PVector p3, color c1, color c2, color c3) { this.p1 = p1; this.p2 = p2; this.p3 = p3; this.c1 = c1; this.c2 = c2; this.c3 = c3; bbx = new BoundingBox(); bbx.create(this); } // check to see if a ray intersects with the triangle boolean intersect(Ray r, float[] param) { PVector p21 = PVector.sub(p2, p1); PVector p31 = PVector.sub(p3, p1); PVector po1 = PVector.sub(r.ori, p1); PVector dxp31 = r.dir.cross(p31); PVector po1xp21 = po1.cross(p21); float denom = p21.dot(dxp31); float t = p31.dot(po1xp21) / denom; float alpha = po1.dot(dxp31) / denom; float beta = r.dir.dot(po1xp21) / denom; boolean res = t > 0 && alpha > 0 && alpha < 1 && beta > 0 && beta < 1 && alpha + beta < 1; // depth test if (res && t < param[0]) { param[0] = t; param[1] = alpha * p1.x + beta * p2.x + (1 - alpha - beta) * p3.x; param[2] = alpha * p1.y + beta * p2.y + (1 - alpha - beta) * p3.y; param[3] = alpha * p1.z + beta * p2.z + (1 - alpha - beta) * p3.z; } return res; } void render() { beginShape(TRIANGLES); fill(c1); vertex(p1.x, p1.y, p1.z); fill(c2); vertex(p2.x, p2.y, p2.z); fill(c3); vertex(p3.x, p3.y, p3.z); endShape(); } } // Ray class Ray { // origin and direction PVector ori, dir; Ray(PVector ori, PVector dir) { this.ori = ori; this.dir = dir; } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/Scene.pde000066400000000000000000000034421357355204000261610ustar00rootroot00000000000000class Scene { PointCloud point_cloud; ArrayList mesh; BVH bvh; MotionField motion_field; Camera last_cam; Camera current_cam; int frame_count; Scene(Camera camera, PointCloud point_cloud, MotionField motion_field) { this.point_cloud = point_cloud; this.motion_field = motion_field; mesh = new ArrayList(); for (int v = 0; v < height - 1; v++) for (int u = 0; u < width - 1; u++) { PVector p1 = point_cloud.getPosition(v * width + u); PVector p2 = point_cloud.getPosition(v * width + u + 1); PVector p3 = point_cloud.getPosition((v + 1) * width + u + 1); PVector p4 = point_cloud.getPosition((v + 1) * width + u); color c1 = point_cloud.getColor(v * width + u); color c2 = point_cloud.getColor(v * width + u + 1); color c3 = point_cloud.getColor((v + 1) * width + u + 1); color c4 = point_cloud.getColor((v + 1) * width + u); mesh.add(new Triangle(p1, p2, p3, c1, c2, c3)); mesh.add(new Triangle(p3, p4, p1, c3, c4, c1)); } bvh = new BVH(mesh); last_cam = camera.copy(); current_cam = camera; frame_count = 0; } void run() { last_cam = current_cam.copy(); current_cam.run(); motion_field.update(last_cam, current_cam, point_cloud, bvh); frame_count += 1; } void render(boolean show_motion_field) { // build mesh current_cam.open(); noStroke(); for (int i = 0; i < mesh.size(); i++) { Triangle t = mesh.get(i); t.render(); } if (show_motion_field) { current_cam.close(); motion_field.render(); } } void save(String path) { saveFrame(path + "_" + str(frame_count) + ".png"); } void saveMotionField(String path) { motion_field.save(path + "_" + str(frame_count) + ".txt"); } } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/Transform.pde000066400000000000000000000051501357355204000270750ustar00rootroot00000000000000class Transform { float[] inv_rot; // inverse of rotation matrix PVector inv_mov; // inverse of movement vector float focal; // the focal distacne of real camera int w, h; // the width and height of the frame float normalier; // nomalization factor of depth Transform(float tx, float ty, float tz, float qx, float qy, float qz, float qw, float fov, int w, int h, float normalier) { // currently, we did not use the info of real camera's position and // quaternion maybe we will use it in the future when combine all frames float[] rot = quaternion2Mat3x3(qx, qy, qz, qw); inv_rot = transpose3x3(rot); inv_mov = new PVector(-tx, -ty, -tz); this.focal = 0.5f * h / tan(fov / 2.0); this.w = w; this.h = h; this.normalier = normalier; } PVector transform(int i, int j, float d) { // transfer from camera view to world view float z = d / normalier; float x = (i - w / 2.0f) * z / focal; float y = (j - h / 2.0f) * z / focal; return new PVector(x, y, z); } } // get rotation matrix by using rotation axis and angle float[] getRotationMat3x3(float angle, float ax, float ay, float az) { float[] mat = new float[9]; float c = cos(angle); float s = sin(angle); mat[0] = c + ax * ax * (1 - c); mat[1] = ax * ay * (1 - c) - az * s; mat[2] = ax * az * (1 - c) + ay * s; mat[3] = ay * ax * (1 - c) + az * s; mat[4] = c + ay * ay * (1 - c); mat[5] = ay * az * (1 - c) - ax * s; mat[6] = az * ax * (1 - c) - ay * s; mat[7] = az * ay * (1 - c) + ax * s; mat[8] = c + az * az * (1 - c); return mat; } // get rotation matrix by using quaternion float[] quaternion2Mat3x3(float qx, float qy, float qz, float qw) { float[] mat = new float[9]; mat[0] = 1 - 2 * qy * qy - 2 * qz * qz; mat[1] = 2 * qx * qy - 2 * qz * qw; mat[2] = 2 * qx * qz + 2 * qy * qw; mat[3] = 2 * qx * qy + 2 * qz * qw; mat[4] = 1 - 2 * qx * qx - 2 * qz * qz; mat[5] = 2 * qy * qz - 2 * qx * qw; mat[6] = 2 * qx * qz - 2 * qy * qw; mat[7] = 2 * qy * qz + 2 * qx * qw; mat[8] = 1 - 2 * qx * qx - 2 * qy * qy; return mat; } // tranpose a 3x3 matrix float[] transpose3x3(float[] mat) { float[] Tmat = new float[9]; for (int i = 0; i < 3; i++) for (int j = 0; j < 3; j++) { Tmat[i * 3 + j] = mat[j * 3 + i]; } return Tmat; } // multiply a matrix with vector PVector MatxVec3(float[] mat, PVector v) { float[] vec = v.array(); float[] res = new float[3]; for (int i = 0; i < 3; i++) { res[i] = 0.0f; for (int j = 0; j < 3; j++) { res[i] += mat[i * 3 + j] * vec[j]; } } return new PVector(res[0], res[1], res[2]); } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/Util.pde000066400000000000000000000020131357355204000260320ustar00rootroot00000000000000// show grids void showGrids(int block_size) { ortho(-width, 0, -height, 0); camera(0, 0, 0, 0, 0, 1, 0, 1, 0); stroke(0, 0, 255); for (int i = 0; i < height; i += block_size) { line(0, i, width, i); } for (int i = 0; i < width; i += block_size) { line(i, 0, i, height); } } // save the point clould information void savePointCloud(PointCloud point_cloud, String file_name) { String[] positions = new String[point_cloud.points.size()]; String[] colors = new String[point_cloud.points.size()]; for (int i = 0; i < point_cloud.points.size(); i++) { PVector point = point_cloud.getPosition(i); color point_color = point_cloud.getColor(i); positions[i] = str(point.x) + ' ' + str(point.y) + ' ' + str(point.z); colors[i] = str(((point_color >> 16) & 0xFF) / 255.0) + ' ' + str(((point_color >> 8) & 0xFF) / 255.0) + ' ' + str((point_color & 0xFF) / 255.0); } saveStrings(file_name + "_pos.txt", positions); saveStrings(file_name + "_color.txt", colors); } libvpx-1.8.2/tools/3D-Reconstruction/sketch_3D_reconstruction/sketch_3D_reconstruction.pde000066400000000000000000000047731357355204000321040ustar00rootroot00000000000000/*The dataset is from *Computer Vision Group *TUM Department of Informatics Technical *University of Munich *https://vision.in.tum.de/data/datasets/rgbd-dataset/download#freiburg1_xyz */ Scene scene; void setup() { size(640, 480, P3D); // default settings int frame_no = 0; // frame number float fov = PI / 3; // field of view int block_size = 8; // block size float normalizer = 5000.0f; // normalizer // initialize PointCloud point_cloud = new PointCloud(); // synchronized rgb, depth and ground truth String head = "../data/"; String[] rgb_depth_gt = loadStrings(head + "rgb_depth_groundtruth.txt"); // read in rgb and depth image file paths as well as corresponding camera // posiiton and quaternion String[] info = split(rgb_depth_gt[frame_no], ' '); String rgb_path = head + info[1]; String depth_path = head + info[3]; float tx = float(info[7]), ty = float(info[8]), tz = float(info[9]); // real camera position float qx = float(info[10]), qy = float(info[11]), qz = float(info[12]), qw = float(info[13]); // quaternion // build transformer Transform trans = new Transform(tx, ty, tz, qx, qy, qz, qw, fov, width, height, normalizer); PImage rgb = loadImage(rgb_path); PImage depth = loadImage(depth_path); // generate point cloud point_cloud.generate(rgb, depth, trans); // initialize camera Camera camera = new Camera(fov, new PVector(0, 0, 0), new PVector(0, 0, 1), new PVector(0, 1, 0)); // initialize motion field MotionField motion_field = new MotionField(block_size); // initialize scene scene = new Scene(camera, point_cloud, motion_field); } boolean inter = false; void draw() { background(0); // run camera dragged mouse to rotate camera // w: go forward // s: go backward // a: go left // d: go right // up arrow: go up // down arrow: go down //+ increase move speed //- decrease move speed // r: rotate the camera // b: reset to initial position scene.run(); // true: make interpolation; false: do not make // interpolation if (keyPressed && key == 'o') { inter = true; } scene.render( false); // true: turn on motion field; false: turn off motion field // save frame with no motion field scene.save("../data/frame/raw"); background(0); scene.render(true); showGrids(scene.motion_field.block_size); // save frame with motion field scene.save("../data/frame/raw_mv"); scene.saveMotionField("../data/frame/mv"); } libvpx-1.8.2/tools/cpplint.py000077500000000000000000005451161357355204000162340ustar00rootroot00000000000000#!/usr/bin/python # # Copyright (c) 2009 Google Inc. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following disclaimer # in the documentation and/or other materials provided with the # distribution. # * Neither the name of Google Inc. nor the names of its # contributors may be used to endorse or promote products derived from # this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Does google-lint on c++ files. The goal of this script is to identify places in the code that *may* be in non-compliance with google style. It does not attempt to fix up these problems -- the point is to educate. It does also not attempt to find all problems, or to ensure that everything it does find is legitimately a problem. In particular, we can get very confused by /* and // inside strings! We do a small hack, which is to ignore //'s with "'s after them on the same line, but it is far from perfect (in either direction). """ import codecs import copy import getopt import math # for log import os import re import sre_compile import string import sys import unicodedata _USAGE = """ Syntax: cpplint.py [--verbose=#] [--output=vs7] [--filter=-x,+y,...] [--counting=total|toplevel|detailed] [--root=subdir] [--linelength=digits] [file] ... The style guidelines this tries to follow are those in http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml Every problem is given a confidence score from 1-5, with 5 meaning we are certain of the problem, and 1 meaning it could be a legitimate construct. This will miss some errors, and is not a substitute for a code review. To suppress false-positive errors of a certain category, add a 'NOLINT(category)' comment to the line. NOLINT or NOLINT(*) suppresses errors of all categories on that line. The files passed in will be linted; at least one file must be provided. Default linted extensions are .cc, .cpp, .cu, .cuh and .h. Change the extensions with the --extensions flag. Flags: output=vs7 By default, the output is formatted to ease emacs parsing. Visual Studio compatible output (vs7) may also be used. Other formats are unsupported. verbose=# Specify a number 0-5 to restrict errors to certain verbosity levels. filter=-x,+y,... Specify a comma-separated list of category-filters to apply: only error messages whose category names pass the filters will be printed. (Category names are printed with the message and look like "[whitespace/indent]".) Filters are evaluated left to right. "-FOO" and "FOO" means "do not print categories that start with FOO". "+FOO" means "do print categories that start with FOO". Examples: --filter=-whitespace,+whitespace/braces --filter=whitespace,runtime/printf,+runtime/printf_format --filter=-,+build/include_what_you_use To see a list of all the categories used in cpplint, pass no arg: --filter= counting=total|toplevel|detailed The total number of errors found is always printed. If 'toplevel' is provided, then the count of errors in each of the top-level categories like 'build' and 'whitespace' will also be printed. If 'detailed' is provided, then a count is provided for each category like 'build/class'. root=subdir The root directory used for deriving header guard CPP variable. By default, the header guard CPP variable is calculated as the relative path to the directory that contains .git, .hg, or .svn. When this flag is specified, the relative path is calculated from the specified directory. If the specified directory does not exist, this flag is ignored. Examples: Assuing that src/.git exists, the header guard CPP variables for src/chrome/browser/ui/browser.h are: No flag => CHROME_BROWSER_UI_BROWSER_H_ --root=chrome => BROWSER_UI_BROWSER_H_ --root=chrome/browser => UI_BROWSER_H_ linelength=digits This is the allowed line length for the project. The default value is 80 characters. Examples: --linelength=120 extensions=extension,extension,... The allowed file extensions that cpplint will check Examples: --extensions=hpp,cpp """ # We categorize each error message we print. Here are the categories. # We want an explicit list so we can list them all in cpplint --filter=. # If you add a new error message with a new category, add it to the list # here! cpplint_unittest.py should tell you if you forget to do this. _ERROR_CATEGORIES = [ 'build/class', 'build/deprecated', 'build/endif_comment', 'build/explicit_make_pair', 'build/forward_decl', 'build/header_guard', 'build/include', 'build/include_alpha', 'build/include_order', 'build/include_what_you_use', 'build/namespaces', 'build/printf_format', 'build/storage_class', 'legal/copyright', 'readability/alt_tokens', 'readability/braces', 'readability/casting', 'readability/check', 'readability/constructors', 'readability/fn_size', 'readability/function', 'readability/multiline_comment', 'readability/multiline_string', 'readability/namespace', 'readability/nolint', 'readability/nul', 'readability/streams', 'readability/todo', 'readability/utf8', 'runtime/arrays', 'runtime/casting', 'runtime/explicit', 'runtime/int', 'runtime/init', 'runtime/invalid_increment', 'runtime/member_string_references', 'runtime/memset', 'runtime/operator', 'runtime/printf', 'runtime/printf_format', 'runtime/references', 'runtime/sizeof', 'runtime/string', 'runtime/threadsafe_fn', 'runtime/vlog', 'whitespace/blank_line', 'whitespace/braces', 'whitespace/comma', 'whitespace/comments', 'whitespace/empty_conditional_body', 'whitespace/empty_loop_body', 'whitespace/end_of_line', 'whitespace/ending_newline', 'whitespace/forcolon', 'whitespace/indent', 'whitespace/line_length', 'whitespace/newline', 'whitespace/operators', 'whitespace/parens', 'whitespace/semicolon', 'whitespace/tab', 'whitespace/todo' ] # The default state of the category filter. This is overrided by the --filter= # flag. By default all errors are on, so only add here categories that should be # off by default (i.e., categories that must be enabled by the --filter= flags). # All entries here should start with a '-' or '+', as in the --filter= flag. _DEFAULT_FILTERS = ['-build/include_alpha'] # We used to check for high-bit characters, but after much discussion we # decided those were OK, as long as they were in UTF-8 and didn't represent # hard-coded international strings, which belong in a separate i18n file. # C++ headers _CPP_HEADERS = frozenset([ # Legacy 'algobase.h', 'algo.h', 'alloc.h', 'builtinbuf.h', 'bvector.h', 'complex.h', 'defalloc.h', 'deque.h', 'editbuf.h', 'fstream.h', 'function.h', 'hash_map', 'hash_map.h', 'hash_set', 'hash_set.h', 'hashtable.h', 'heap.h', 'indstream.h', 'iomanip.h', 'iostream.h', 'istream.h', 'iterator.h', 'list.h', 'map.h', 'multimap.h', 'multiset.h', 'ostream.h', 'pair.h', 'parsestream.h', 'pfstream.h', 'procbuf.h', 'pthread_alloc', 'pthread_alloc.h', 'rope', 'rope.h', 'ropeimpl.h', 'set.h', 'slist', 'slist.h', 'stack.h', 'stdiostream.h', 'stl_alloc.h', 'stl_relops.h', 'streambuf.h', 'stream.h', 'strfile.h', 'strstream.h', 'tempbuf.h', 'tree.h', 'type_traits.h', 'vector.h', # 17.6.1.2 C++ library headers 'algorithm', 'array', 'atomic', 'bitset', 'chrono', 'codecvt', 'complex', 'condition_variable', 'deque', 'exception', 'forward_list', 'fstream', 'functional', 'future', 'initializer_list', 'iomanip', 'ios', 'iosfwd', 'iostream', 'istream', 'iterator', 'limits', 'list', 'locale', 'map', 'memory', 'mutex', 'new', 'numeric', 'ostream', 'queue', 'random', 'ratio', 'regex', 'set', 'sstream', 'stack', 'stdexcept', 'streambuf', 'string', 'strstream', 'system_error', 'thread', 'tuple', 'typeindex', 'typeinfo', 'type_traits', 'unordered_map', 'unordered_set', 'utility', 'valarray', 'vector', # 17.6.1.2 C++ headers for C library facilities 'cassert', 'ccomplex', 'cctype', 'cerrno', 'cfenv', 'cfloat', 'cinttypes', 'ciso646', 'climits', 'clocale', 'cmath', 'csetjmp', 'csignal', 'cstdalign', 'cstdarg', 'cstdbool', 'cstddef', 'cstdint', 'cstdio', 'cstdlib', 'cstring', 'ctgmath', 'ctime', 'cuchar', 'cwchar', 'cwctype', ]) # Assertion macros. These are defined in base/logging.h and # testing/base/gunit.h. Note that the _M versions need to come first # for substring matching to work. _CHECK_MACROS = [ 'DCHECK', 'CHECK', 'EXPECT_TRUE_M', 'EXPECT_TRUE', 'ASSERT_TRUE_M', 'ASSERT_TRUE', 'EXPECT_FALSE_M', 'EXPECT_FALSE', 'ASSERT_FALSE_M', 'ASSERT_FALSE', ] # Replacement macros for CHECK/DCHECK/EXPECT_TRUE/EXPECT_FALSE _CHECK_REPLACEMENT = dict([(m, {}) for m in _CHECK_MACROS]) for op, replacement in [('==', 'EQ'), ('!=', 'NE'), ('>=', 'GE'), ('>', 'GT'), ('<=', 'LE'), ('<', 'LT')]: _CHECK_REPLACEMENT['DCHECK'][op] = 'DCHECK_%s' % replacement _CHECK_REPLACEMENT['CHECK'][op] = 'CHECK_%s' % replacement _CHECK_REPLACEMENT['EXPECT_TRUE'][op] = 'EXPECT_%s' % replacement _CHECK_REPLACEMENT['ASSERT_TRUE'][op] = 'ASSERT_%s' % replacement _CHECK_REPLACEMENT['EXPECT_TRUE_M'][op] = 'EXPECT_%s_M' % replacement _CHECK_REPLACEMENT['ASSERT_TRUE_M'][op] = 'ASSERT_%s_M' % replacement for op, inv_replacement in [('==', 'NE'), ('!=', 'EQ'), ('>=', 'LT'), ('>', 'LE'), ('<=', 'GT'), ('<', 'GE')]: _CHECK_REPLACEMENT['EXPECT_FALSE'][op] = 'EXPECT_%s' % inv_replacement _CHECK_REPLACEMENT['ASSERT_FALSE'][op] = 'ASSERT_%s' % inv_replacement _CHECK_REPLACEMENT['EXPECT_FALSE_M'][op] = 'EXPECT_%s_M' % inv_replacement _CHECK_REPLACEMENT['ASSERT_FALSE_M'][op] = 'ASSERT_%s_M' % inv_replacement # Alternative tokens and their replacements. For full list, see section 2.5 # Alternative tokens [lex.digraph] in the C++ standard. # # Digraphs (such as '%:') are not included here since it's a mess to # match those on a word boundary. _ALT_TOKEN_REPLACEMENT = { 'and': '&&', 'bitor': '|', 'or': '||', 'xor': '^', 'compl': '~', 'bitand': '&', 'and_eq': '&=', 'or_eq': '|=', 'xor_eq': '^=', 'not': '!', 'not_eq': '!=' } # Compile regular expression that matches all the above keywords. The "[ =()]" # bit is meant to avoid matching these keywords outside of boolean expressions. # # False positives include C-style multi-line comments and multi-line strings # but those have always been troublesome for cpplint. _ALT_TOKEN_REPLACEMENT_PATTERN = re.compile( r'[ =()](' + ('|'.join(_ALT_TOKEN_REPLACEMENT.keys())) + r')(?=[ (]|$)') # These constants define types of headers for use with # _IncludeState.CheckNextIncludeOrder(). _C_SYS_HEADER = 1 _CPP_SYS_HEADER = 2 _LIKELY_MY_HEADER = 3 _POSSIBLE_MY_HEADER = 4 _OTHER_HEADER = 5 # These constants define the current inline assembly state _NO_ASM = 0 # Outside of inline assembly block _INSIDE_ASM = 1 # Inside inline assembly block _END_ASM = 2 # Last line of inline assembly block _BLOCK_ASM = 3 # The whole block is an inline assembly block # Match start of assembly blocks _MATCH_ASM = re.compile(r'^\s*(?:asm|_asm|__asm|__asm__)' r'(?:\s+(volatile|__volatile__))?' r'\s*[{(]') _regexp_compile_cache = {} # Finds occurrences of NOLINT or NOLINT(...). _RE_SUPPRESSION = re.compile(r'\bNOLINT\b(\([^)]*\))?') # {str, set(int)}: a map from error categories to sets of linenumbers # on which those errors are expected and should be suppressed. _error_suppressions = {} # The root directory used for deriving header guard CPP variable. # This is set by --root flag. _root = None # The allowed line length of files. # This is set by --linelength flag. _line_length = 80 # The allowed extensions for file names # This is set by --extensions flag. _valid_extensions = set(['cc', 'h', 'cpp', 'cu', 'cuh']) def ParseNolintSuppressions(filename, raw_line, linenum, error): """Updates the global list of error-suppressions. Parses any NOLINT comments on the current line, updating the global error_suppressions store. Reports an error if the NOLINT comment was malformed. Args: filename: str, the name of the input file. raw_line: str, the line of input text, with comments. linenum: int, the number of the current line. error: function, an error handler. """ # FIXME(adonovan): "NOLINT(" is misparsed as NOLINT(*). matched = _RE_SUPPRESSION.search(raw_line) if matched: category = matched.group(1) if category in (None, '(*)'): # => "suppress all" _error_suppressions.setdefault(None, set()).add(linenum) else: if category.startswith('(') and category.endswith(')'): category = category[1:-1] if category in _ERROR_CATEGORIES: _error_suppressions.setdefault(category, set()).add(linenum) else: error(filename, linenum, 'readability/nolint', 5, 'Unknown NOLINT error category: %s' % category) def ResetNolintSuppressions(): "Resets the set of NOLINT suppressions to empty." _error_suppressions.clear() def IsErrorSuppressedByNolint(category, linenum): """Returns true if the specified error category is suppressed on this line. Consults the global error_suppressions map populated by ParseNolintSuppressions/ResetNolintSuppressions. Args: category: str, the category of the error. linenum: int, the current line number. Returns: bool, True iff the error should be suppressed due to a NOLINT comment. """ return (linenum in _error_suppressions.get(category, set()) or linenum in _error_suppressions.get(None, set())) def Match(pattern, s): """Matches the string with the pattern, caching the compiled regexp.""" # The regexp compilation caching is inlined in both Match and Search for # performance reasons; factoring it out into a separate function turns out # to be noticeably expensive. if pattern not in _regexp_compile_cache: _regexp_compile_cache[pattern] = sre_compile.compile(pattern) return _regexp_compile_cache[pattern].match(s) def ReplaceAll(pattern, rep, s): """Replaces instances of pattern in a string with a replacement. The compiled regex is kept in a cache shared by Match and Search. Args: pattern: regex pattern rep: replacement text s: search string Returns: string with replacements made (or original string if no replacements) """ if pattern not in _regexp_compile_cache: _regexp_compile_cache[pattern] = sre_compile.compile(pattern) return _regexp_compile_cache[pattern].sub(rep, s) def Search(pattern, s): """Searches the string for the pattern, caching the compiled regexp.""" if pattern not in _regexp_compile_cache: _regexp_compile_cache[pattern] = sre_compile.compile(pattern) return _regexp_compile_cache[pattern].search(s) class _IncludeState(dict): """Tracks line numbers for includes, and the order in which includes appear. As a dict, an _IncludeState object serves as a mapping between include filename and line number on which that file was included. Call CheckNextIncludeOrder() once for each header in the file, passing in the type constants defined above. Calls in an illegal order will raise an _IncludeError with an appropriate error message. """ # self._section will move monotonically through this set. If it ever # needs to move backwards, CheckNextIncludeOrder will raise an error. _INITIAL_SECTION = 0 _MY_H_SECTION = 1 _C_SECTION = 2 _CPP_SECTION = 3 _OTHER_H_SECTION = 4 _TYPE_NAMES = { _C_SYS_HEADER: 'C system header', _CPP_SYS_HEADER: 'C++ system header', _LIKELY_MY_HEADER: 'header this file implements', _POSSIBLE_MY_HEADER: 'header this file may implement', _OTHER_HEADER: 'other header', } _SECTION_NAMES = { _INITIAL_SECTION: "... nothing. (This can't be an error.)", _MY_H_SECTION: 'a header this file implements', _C_SECTION: 'C system header', _CPP_SECTION: 'C++ system header', _OTHER_H_SECTION: 'other header', } def __init__(self): dict.__init__(self) self.ResetSection() def ResetSection(self): # The name of the current section. self._section = self._INITIAL_SECTION # The path of last found header. self._last_header = '' def SetLastHeader(self, header_path): self._last_header = header_path def CanonicalizeAlphabeticalOrder(self, header_path): """Returns a path canonicalized for alphabetical comparison. - replaces "-" with "_" so they both cmp the same. - removes '-inl' since we don't require them to be after the main header. - lowercase everything, just in case. Args: header_path: Path to be canonicalized. Returns: Canonicalized path. """ return header_path.replace('-inl.h', '.h').replace('-', '_').lower() def IsInAlphabeticalOrder(self, clean_lines, linenum, header_path): """Check if a header is in alphabetical order with the previous header. Args: clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. header_path: Canonicalized header to be checked. Returns: Returns true if the header is in alphabetical order. """ # If previous section is different from current section, _last_header will # be reset to empty string, so it's always less than current header. # # If previous line was a blank line, assume that the headers are # intentionally sorted the way they are. if (self._last_header > header_path and not Match(r'^\s*$', clean_lines.elided[linenum - 1])): return False return True def CheckNextIncludeOrder(self, header_type): """Returns a non-empty error message if the next header is out of order. This function also updates the internal state to be ready to check the next include. Args: header_type: One of the _XXX_HEADER constants defined above. Returns: The empty string if the header is in the right order, or an error message describing what's wrong. """ error_message = ('Found %s after %s' % (self._TYPE_NAMES[header_type], self._SECTION_NAMES[self._section])) last_section = self._section if header_type == _C_SYS_HEADER: if self._section <= self._C_SECTION: self._section = self._C_SECTION else: self._last_header = '' return error_message elif header_type == _CPP_SYS_HEADER: if self._section <= self._CPP_SECTION: self._section = self._CPP_SECTION else: self._last_header = '' return error_message elif header_type == _LIKELY_MY_HEADER: if self._section <= self._MY_H_SECTION: self._section = self._MY_H_SECTION else: self._section = self._OTHER_H_SECTION elif header_type == _POSSIBLE_MY_HEADER: if self._section <= self._MY_H_SECTION: self._section = self._MY_H_SECTION else: # This will always be the fallback because we're not sure # enough that the header is associated with this file. self._section = self._OTHER_H_SECTION else: assert header_type == _OTHER_HEADER self._section = self._OTHER_H_SECTION if last_section != self._section: self._last_header = '' return '' class _CppLintState(object): """Maintains module-wide state..""" def __init__(self): self.verbose_level = 1 # global setting. self.error_count = 0 # global count of reported errors # filters to apply when emitting error messages self.filters = _DEFAULT_FILTERS[:] self.counting = 'total' # In what way are we counting errors? self.errors_by_category = {} # string to int dict storing error counts # output format: # "emacs" - format that emacs can parse (default) # "vs7" - format that Microsoft Visual Studio 7 can parse self.output_format = 'emacs' def SetOutputFormat(self, output_format): """Sets the output format for errors.""" self.output_format = output_format def SetVerboseLevel(self, level): """Sets the module's verbosity, and returns the previous setting.""" last_verbose_level = self.verbose_level self.verbose_level = level return last_verbose_level def SetCountingStyle(self, counting_style): """Sets the module's counting options.""" self.counting = counting_style def SetFilters(self, filters): """Sets the error-message filters. These filters are applied when deciding whether to emit a given error message. Args: filters: A string of comma-separated filters (eg "+whitespace/indent"). Each filter should start with + or -; else we die. Raises: ValueError: The comma-separated filters did not all start with '+' or '-'. E.g. "-,+whitespace,-whitespace/indent,whitespace/badfilter" """ # Default filters always have less priority than the flag ones. self.filters = _DEFAULT_FILTERS[:] for filt in filters.split(','): clean_filt = filt.strip() if clean_filt: self.filters.append(clean_filt) for filt in self.filters: if not (filt.startswith('+') or filt.startswith('-')): raise ValueError('Every filter in --filters must start with + or -' ' (%s does not)' % filt) def ResetErrorCounts(self): """Sets the module's error statistic back to zero.""" self.error_count = 0 self.errors_by_category = {} def IncrementErrorCount(self, category): """Bumps the module's error statistic.""" self.error_count += 1 if self.counting in ('toplevel', 'detailed'): if self.counting != 'detailed': category = category.split('/')[0] if category not in self.errors_by_category: self.errors_by_category[category] = 0 self.errors_by_category[category] += 1 def PrintErrorCounts(self): """Print a summary of errors by category, and the total.""" for category, count in self.errors_by_category.iteritems(): sys.stderr.write('Category \'%s\' errors found: %d\n' % (category, count)) sys.stderr.write('Total errors found: %d\n' % self.error_count) _cpplint_state = _CppLintState() def _OutputFormat(): """Gets the module's output format.""" return _cpplint_state.output_format def _SetOutputFormat(output_format): """Sets the module's output format.""" _cpplint_state.SetOutputFormat(output_format) def _VerboseLevel(): """Returns the module's verbosity setting.""" return _cpplint_state.verbose_level def _SetVerboseLevel(level): """Sets the module's verbosity, and returns the previous setting.""" return _cpplint_state.SetVerboseLevel(level) def _SetCountingStyle(level): """Sets the module's counting options.""" _cpplint_state.SetCountingStyle(level) def _Filters(): """Returns the module's list of output filters, as a list.""" return _cpplint_state.filters def _SetFilters(filters): """Sets the module's error-message filters. These filters are applied when deciding whether to emit a given error message. Args: filters: A string of comma-separated filters (eg "whitespace/indent"). Each filter should start with + or -; else we die. """ _cpplint_state.SetFilters(filters) class _FunctionState(object): """Tracks current function name and the number of lines in its body.""" _NORMAL_TRIGGER = 250 # for --v=0, 500 for --v=1, etc. _TEST_TRIGGER = 400 # about 50% more than _NORMAL_TRIGGER. def __init__(self): self.in_a_function = False self.lines_in_function = 0 self.current_function = '' def Begin(self, function_name): """Start analyzing function body. Args: function_name: The name of the function being tracked. """ self.in_a_function = True self.lines_in_function = 0 self.current_function = function_name def Count(self): """Count line in current function body.""" if self.in_a_function: self.lines_in_function += 1 def Check(self, error, filename, linenum): """Report if too many lines in function body. Args: error: The function to call with any errors found. filename: The name of the current file. linenum: The number of the line to check. """ if Match(r'T(EST|est)', self.current_function): base_trigger = self._TEST_TRIGGER else: base_trigger = self._NORMAL_TRIGGER trigger = base_trigger * 2**_VerboseLevel() if self.lines_in_function > trigger: error_level = int(math.log(self.lines_in_function / base_trigger, 2)) # 50 => 0, 100 => 1, 200 => 2, 400 => 3, 800 => 4, 1600 => 5, ... if error_level > 5: error_level = 5 error(filename, linenum, 'readability/fn_size', error_level, 'Small and focused functions are preferred:' ' %s has %d non-comment lines' ' (error triggered by exceeding %d lines).' % ( self.current_function, self.lines_in_function, trigger)) def End(self): """Stop analyzing function body.""" self.in_a_function = False class _IncludeError(Exception): """Indicates a problem with the include order in a file.""" pass class FileInfo: """Provides utility functions for filenames. FileInfo provides easy access to the components of a file's path relative to the project root. """ def __init__(self, filename): self._filename = filename def FullName(self): """Make Windows paths like Unix.""" return os.path.abspath(self._filename).replace('\\', '/') def RepositoryName(self): """FullName after removing the local path to the repository. If we have a real absolute path name here we can try to do something smart: detecting the root of the checkout and truncating /path/to/checkout from the name so that we get header guards that don't include things like "C:\Documents and Settings\..." or "/home/username/..." in them and thus people on different computers who have checked the source out to different locations won't see bogus errors. """ fullname = self.FullName() if os.path.exists(fullname): project_dir = os.path.dirname(fullname) if os.path.exists(os.path.join(project_dir, ".svn")): # If there's a .svn file in the current directory, we recursively look # up the directory tree for the top of the SVN checkout root_dir = project_dir one_up_dir = os.path.dirname(root_dir) while os.path.exists(os.path.join(one_up_dir, ".svn")): root_dir = os.path.dirname(root_dir) one_up_dir = os.path.dirname(one_up_dir) prefix = os.path.commonprefix([root_dir, project_dir]) return fullname[len(prefix) + 1:] # Not SVN <= 1.6? Try to find a git, hg, or svn top level directory by # searching up from the current path. root_dir = os.path.dirname(fullname) while (root_dir != os.path.dirname(root_dir) and not os.path.exists(os.path.join(root_dir, ".git")) and not os.path.exists(os.path.join(root_dir, ".hg")) and not os.path.exists(os.path.join(root_dir, ".svn"))): root_dir = os.path.dirname(root_dir) if (os.path.exists(os.path.join(root_dir, ".git")) or os.path.exists(os.path.join(root_dir, ".hg")) or os.path.exists(os.path.join(root_dir, ".svn"))): prefix = os.path.commonprefix([root_dir, project_dir]) return fullname[len(prefix) + 1:] # Don't know what to do; header guard warnings may be wrong... return fullname def Split(self): """Splits the file into the directory, basename, and extension. For 'chrome/browser/browser.cc', Split() would return ('chrome/browser', 'browser', '.cc') Returns: A tuple of (directory, basename, extension). """ googlename = self.RepositoryName() project, rest = os.path.split(googlename) return (project,) + os.path.splitext(rest) def BaseName(self): """File base name - text after the final slash, before the final period.""" return self.Split()[1] def Extension(self): """File extension - text following the final period.""" return self.Split()[2] def NoExtension(self): """File has no source file extension.""" return '/'.join(self.Split()[0:2]) def IsSource(self): """File has a source file extension.""" return self.Extension()[1:] in ('c', 'cc', 'cpp', 'cxx') def _ShouldPrintError(category, confidence, linenum): """If confidence >= verbose, category passes filter and is not suppressed.""" # There are three ways we might decide not to print an error message: # a "NOLINT(category)" comment appears in the source, # the verbosity level isn't high enough, or the filters filter it out. if IsErrorSuppressedByNolint(category, linenum): return False if confidence < _cpplint_state.verbose_level: return False is_filtered = False for one_filter in _Filters(): if one_filter.startswith('-'): if category.startswith(one_filter[1:]): is_filtered = True elif one_filter.startswith('+'): if category.startswith(one_filter[1:]): is_filtered = False else: assert False # should have been checked for in SetFilter. if is_filtered: return False return True def Error(filename, linenum, category, confidence, message): """Logs the fact we've found a lint error. We log where the error was found, and also our confidence in the error, that is, how certain we are this is a legitimate style regression, and not a misidentification or a use that's sometimes justified. False positives can be suppressed by the use of "cpplint(category)" comments on the offending line. These are parsed into _error_suppressions. Args: filename: The name of the file containing the error. linenum: The number of the line containing the error. category: A string used to describe the "category" this bug falls under: "whitespace", say, or "runtime". Categories may have a hierarchy separated by slashes: "whitespace/indent". confidence: A number from 1-5 representing a confidence score for the error, with 5 meaning that we are certain of the problem, and 1 meaning that it could be a legitimate construct. message: The error message. """ if _ShouldPrintError(category, confidence, linenum): _cpplint_state.IncrementErrorCount(category) if _cpplint_state.output_format == 'vs7': sys.stderr.write('%s(%s): %s [%s] [%d]\n' % ( filename, linenum, message, category, confidence)) elif _cpplint_state.output_format == 'eclipse': sys.stderr.write('%s:%s: warning: %s [%s] [%d]\n' % ( filename, linenum, message, category, confidence)) else: sys.stderr.write('%s:%s: %s [%s] [%d]\n' % ( filename, linenum, message, category, confidence)) # Matches standard C++ escape sequences per 2.13.2.3 of the C++ standard. _RE_PATTERN_CLEANSE_LINE_ESCAPES = re.compile( r'\\([abfnrtv?"\\\']|\d+|x[0-9a-fA-F]+)') # Matches strings. Escape codes should already be removed by ESCAPES. _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES = re.compile(r'"[^"]*"') # Matches characters. Escape codes should already be removed by ESCAPES. _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES = re.compile(r"'.'") # Matches multi-line C++ comments. # This RE is a little bit more complicated than one might expect, because we # have to take care of space removals tools so we can handle comments inside # statements better. # The current rule is: We only clear spaces from both sides when we're at the # end of the line. Otherwise, we try to remove spaces from the right side, # if this doesn't work we try on left side but only if there's a non-character # on the right. _RE_PATTERN_CLEANSE_LINE_C_COMMENTS = re.compile( r"""(\s*/\*.*\*/\s*$| /\*.*\*/\s+| \s+/\*.*\*/(?=\W)| /\*.*\*/)""", re.VERBOSE) def IsCppString(line): """Does line terminate so, that the next symbol is in string constant. This function does not consider single-line nor multi-line comments. Args: line: is a partial line of code starting from the 0..n. Returns: True, if next character appended to 'line' is inside a string constant. """ line = line.replace(r'\\', 'XX') # after this, \\" does not match to \" return ((line.count('"') - line.count(r'\"') - line.count("'\"'")) & 1) == 1 def CleanseRawStrings(raw_lines): """Removes C++11 raw strings from lines. Before: static const char kData[] = R"( multi-line string )"; After: static const char kData[] = "" (replaced by blank line) ""; Args: raw_lines: list of raw lines. Returns: list of lines with C++11 raw strings replaced by empty strings. """ delimiter = None lines_without_raw_strings = [] for line in raw_lines: if delimiter: # Inside a raw string, look for the end end = line.find(delimiter) if end >= 0: # Found the end of the string, match leading space for this # line and resume copying the original lines, and also insert # a "" on the last line. leading_space = Match(r'^(\s*)\S', line) line = leading_space.group(1) + '""' + line[end + len(delimiter):] delimiter = None else: # Haven't found the end yet, append a blank line. line = '' else: # Look for beginning of a raw string. # See 2.14.15 [lex.string] for syntax. matched = Match(r'^(.*)\b(?:R|u8R|uR|UR|LR)"([^\s\\()]*)\((.*)$', line) if matched: delimiter = ')' + matched.group(2) + '"' end = matched.group(3).find(delimiter) if end >= 0: # Raw string ended on same line line = (matched.group(1) + '""' + matched.group(3)[end + len(delimiter):]) delimiter = None else: # Start of a multi-line raw string line = matched.group(1) + '""' lines_without_raw_strings.append(line) # TODO(unknown): if delimiter is not None here, we might want to # emit a warning for unterminated string. return lines_without_raw_strings def FindNextMultiLineCommentStart(lines, lineix): """Find the beginning marker for a multiline comment.""" while lineix < len(lines): if lines[lineix].strip().startswith('/*'): # Only return this marker if the comment goes beyond this line if lines[lineix].strip().find('*/', 2) < 0: return lineix lineix += 1 return len(lines) def FindNextMultiLineCommentEnd(lines, lineix): """We are inside a comment, find the end marker.""" while lineix < len(lines): if lines[lineix].strip().endswith('*/'): return lineix lineix += 1 return len(lines) def RemoveMultiLineCommentsFromRange(lines, begin, end): """Clears a range of lines for multi-line comments.""" # Having // dummy comments makes the lines non-empty, so we will not get # unnecessary blank line warnings later in the code. for i in range(begin, end): lines[i] = '// dummy' def RemoveMultiLineComments(filename, lines, error): """Removes multiline (c-style) comments from lines.""" lineix = 0 while lineix < len(lines): lineix_begin = FindNextMultiLineCommentStart(lines, lineix) if lineix_begin >= len(lines): return lineix_end = FindNextMultiLineCommentEnd(lines, lineix_begin) if lineix_end >= len(lines): error(filename, lineix_begin + 1, 'readability/multiline_comment', 5, 'Could not find end of multi-line comment') return RemoveMultiLineCommentsFromRange(lines, lineix_begin, lineix_end + 1) lineix = lineix_end + 1 def CleanseComments(line): """Removes //-comments and single-line C-style /* */ comments. Args: line: A line of C++ source. Returns: The line with single-line comments removed. """ commentpos = line.find('//') if commentpos != -1 and not IsCppString(line[:commentpos]): line = line[:commentpos].rstrip() # get rid of /* ... */ return _RE_PATTERN_CLEANSE_LINE_C_COMMENTS.sub('', line) class CleansedLines(object): """Holds 3 copies of all lines with different preprocessing applied to them. 1) elided member contains lines without strings and comments, 2) lines member contains lines without comments, and 3) raw_lines member contains all the lines without processing. All these three members are of , and of the same length. """ def __init__(self, lines): self.elided = [] self.lines = [] self.raw_lines = lines self.num_lines = len(lines) self.lines_without_raw_strings = CleanseRawStrings(lines) for linenum in range(len(self.lines_without_raw_strings)): self.lines.append(CleanseComments( self.lines_without_raw_strings[linenum])) elided = self._CollapseStrings(self.lines_without_raw_strings[linenum]) self.elided.append(CleanseComments(elided)) def NumLines(self): """Returns the number of lines represented.""" return self.num_lines @staticmethod def _CollapseStrings(elided): """Collapses strings and chars on a line to simple "" or '' blocks. We nix strings first so we're not fooled by text like '"http://"' Args: elided: The line being processed. Returns: The line with collapsed strings. """ if not _RE_PATTERN_INCLUDE.match(elided): # Remove escaped characters first to make quote/single quote collapsing # basic. Things that look like escaped characters shouldn't occur # outside of strings and chars. elided = _RE_PATTERN_CLEANSE_LINE_ESCAPES.sub('', elided) elided = _RE_PATTERN_CLEANSE_LINE_SINGLE_QUOTES.sub("''", elided) elided = _RE_PATTERN_CLEANSE_LINE_DOUBLE_QUOTES.sub('""', elided) return elided def FindEndOfExpressionInLine(line, startpos, depth, startchar, endchar): """Find the position just after the matching endchar. Args: line: a CleansedLines line. startpos: start searching at this position. depth: nesting level at startpos. startchar: expression opening character. endchar: expression closing character. Returns: On finding matching endchar: (index just after matching endchar, 0) Otherwise: (-1, new depth at end of this line) """ for i in xrange(startpos, len(line)): if line[i] == startchar: depth += 1 elif line[i] == endchar: depth -= 1 if depth == 0: return (i + 1, 0) return (-1, depth) def CloseExpression(clean_lines, linenum, pos): """If input points to ( or { or [ or <, finds the position that closes it. If lines[linenum][pos] points to a '(' or '{' or '[' or '<', finds the linenum/pos that correspond to the closing of the expression. Args: clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. pos: A position on the line. Returns: A tuple (line, linenum, pos) pointer *past* the closing brace, or (line, len(lines), -1) if we never find a close. Note we ignore strings and comments when matching; and the line we return is the 'cleansed' line at linenum. """ line = clean_lines.elided[linenum] startchar = line[pos] if startchar not in '({[<': return (line, clean_lines.NumLines(), -1) if startchar == '(': endchar = ')' if startchar == '[': endchar = ']' if startchar == '{': endchar = '}' if startchar == '<': endchar = '>' # Check first line (end_pos, num_open) = FindEndOfExpressionInLine( line, pos, 0, startchar, endchar) if end_pos > -1: return (line, linenum, end_pos) # Continue scanning forward while linenum < clean_lines.NumLines() - 1: linenum += 1 line = clean_lines.elided[linenum] (end_pos, num_open) = FindEndOfExpressionInLine( line, 0, num_open, startchar, endchar) if end_pos > -1: return (line, linenum, end_pos) # Did not find endchar before end of file, give up return (line, clean_lines.NumLines(), -1) def FindStartOfExpressionInLine(line, endpos, depth, startchar, endchar): """Find position at the matching startchar. This is almost the reverse of FindEndOfExpressionInLine, but note that the input position and returned position differs by 1. Args: line: a CleansedLines line. endpos: start searching at this position. depth: nesting level at endpos. startchar: expression opening character. endchar: expression closing character. Returns: On finding matching startchar: (index at matching startchar, 0) Otherwise: (-1, new depth at beginning of this line) """ for i in xrange(endpos, -1, -1): if line[i] == endchar: depth += 1 elif line[i] == startchar: depth -= 1 if depth == 0: return (i, 0) return (-1, depth) def ReverseCloseExpression(clean_lines, linenum, pos): """If input points to ) or } or ] or >, finds the position that opens it. If lines[linenum][pos] points to a ')' or '}' or ']' or '>', finds the linenum/pos that correspond to the opening of the expression. Args: clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. pos: A position on the line. Returns: A tuple (line, linenum, pos) pointer *at* the opening brace, or (line, 0, -1) if we never find the matching opening brace. Note we ignore strings and comments when matching; and the line we return is the 'cleansed' line at linenum. """ line = clean_lines.elided[linenum] endchar = line[pos] if endchar not in ')}]>': return (line, 0, -1) if endchar == ')': startchar = '(' if endchar == ']': startchar = '[' if endchar == '}': startchar = '{' if endchar == '>': startchar = '<' # Check last line (start_pos, num_open) = FindStartOfExpressionInLine( line, pos, 0, startchar, endchar) if start_pos > -1: return (line, linenum, start_pos) # Continue scanning backward while linenum > 0: linenum -= 1 line = clean_lines.elided[linenum] (start_pos, num_open) = FindStartOfExpressionInLine( line, len(line) - 1, num_open, startchar, endchar) if start_pos > -1: return (line, linenum, start_pos) # Did not find startchar before beginning of file, give up return (line, 0, -1) def CheckForCopyright(filename, lines, error): """Logs an error if no Copyright message appears at the top of the file.""" # We'll say it should occur by line 10. Don't forget there's a # dummy line at the front. for line in xrange(1, min(len(lines), 11)): if re.search(r'Copyright', lines[line], re.I): break else: # means no copyright line was found error(filename, 0, 'legal/copyright', 5, 'No copyright message found. ' 'You should have a line: "Copyright [year] "') def GetHeaderGuardCPPVariable(filename): """Returns the CPP variable that should be used as a header guard. Args: filename: The name of a C++ header file. Returns: The CPP variable that should be used as a header guard in the named file. """ # Restores original filename in case that cpplint is invoked from Emacs's # flymake. filename = re.sub(r'_flymake\.h$', '.h', filename) filename = re.sub(r'/\.flymake/([^/]*)$', r'/\1', filename) fileinfo = FileInfo(filename) file_path_from_root = fileinfo.RepositoryName() if _root: file_path_from_root = re.sub('^' + _root + os.sep, '', file_path_from_root) return re.sub(r'[-./\s]', '_', file_path_from_root).upper() + '_' def CheckForHeaderGuard(filename, lines, error): """Checks that the file contains a header guard. Logs an error if no #ifndef header guard is present. For other headers, checks that the full pathname is used. Args: filename: The name of the C++ header file. lines: An array of strings, each representing a line of the file. error: The function to call with any errors found. """ cppvar = GetHeaderGuardCPPVariable(filename) ifndef = None ifndef_linenum = 0 define = None endif = None endif_linenum = 0 for linenum, line in enumerate(lines): linesplit = line.split() if len(linesplit) >= 2: # find the first occurrence of #ifndef and #define, save arg if not ifndef and linesplit[0] == '#ifndef': # set ifndef to the header guard presented on the #ifndef line. ifndef = linesplit[1] ifndef_linenum = linenum if not define and linesplit[0] == '#define': define = linesplit[1] # find the last occurrence of #endif, save entire line if line.startswith('#endif'): endif = line endif_linenum = linenum if not ifndef: error(filename, 0, 'build/header_guard', 5, 'No #ifndef header guard found, suggested CPP variable is: %s' % cppvar) return if not define: error(filename, 0, 'build/header_guard', 5, 'No #define header guard found, suggested CPP variable is: %s' % cppvar) return # The guard should be PATH_FILE_H_, but we also allow PATH_FILE_H__ # for backward compatibility. if ifndef != cppvar: error_level = 0 if ifndef != cppvar + '_': error_level = 5 ParseNolintSuppressions(filename, lines[ifndef_linenum], ifndef_linenum, error) error(filename, ifndef_linenum, 'build/header_guard', error_level, '#ifndef header guard has wrong style, please use: %s' % cppvar) if define != ifndef: error(filename, 0, 'build/header_guard', 5, '#ifndef and #define don\'t match, suggested CPP variable is: %s' % cppvar) return if endif != ('#endif // %s' % cppvar): error_level = 0 if endif != ('#endif // %s' % (cppvar + '_')): error_level = 5 ParseNolintSuppressions(filename, lines[endif_linenum], endif_linenum, error) error(filename, endif_linenum, 'build/header_guard', error_level, '#endif line should be "#endif // %s"' % cppvar) def CheckForBadCharacters(filename, lines, error): """Logs an error for each line containing bad characters. Two kinds of bad characters: 1. Unicode replacement characters: These indicate that either the file contained invalid UTF-8 (likely) or Unicode replacement characters (which it shouldn't). Note that it's possible for this to throw off line numbering if the invalid UTF-8 occurred adjacent to a newline. 2. NUL bytes. These are problematic for some tools. Args: filename: The name of the current file. lines: An array of strings, each representing a line of the file. error: The function to call with any errors found. """ for linenum, line in enumerate(lines): if u'\ufffd' in line: error(filename, linenum, 'readability/utf8', 5, 'Line contains invalid UTF-8 (or Unicode replacement character).') if '\0' in line: error(filename, linenum, 'readability/nul', 5, 'Line contains NUL byte.') def CheckForNewlineAtEOF(filename, lines, error): """Logs an error if there is no newline char at the end of the file. Args: filename: The name of the current file. lines: An array of strings, each representing a line of the file. error: The function to call with any errors found. """ # The array lines() was created by adding two newlines to the # original file (go figure), then splitting on \n. # To verify that the file ends in \n, we just have to make sure the # last-but-two element of lines() exists and is empty. if len(lines) < 3 or lines[-2]: error(filename, len(lines) - 2, 'whitespace/ending_newline', 5, 'Could not find a newline character at the end of the file.') def CheckForMultilineCommentsAndStrings(filename, clean_lines, linenum, error): """Logs an error if we see /* ... */ or "..." that extend past one line. /* ... */ comments are legit inside macros, for one line. Otherwise, we prefer // comments, so it's ok to warn about the other. Likewise, it's ok for strings to extend across multiple lines, as long as a line continuation character (backslash) terminates each line. Although not currently prohibited by the C++ style guide, it's ugly and unnecessary. We don't do well with either in this lint program, so we warn about both. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] # Remove all \\ (escaped backslashes) from the line. They are OK, and the # second (escaped) slash may trigger later \" detection erroneously. line = line.replace('\\\\', '') if line.count('/*') > line.count('*/'): error(filename, linenum, 'readability/multiline_comment', 5, 'Complex multi-line /*...*/-style comment found. ' 'Lint may give bogus warnings. ' 'Consider replacing these with //-style comments, ' 'with #if 0...#endif, ' 'or with more clearly structured multi-line comments.') if (line.count('"') - line.count('\\"')) % 2: error(filename, linenum, 'readability/multiline_string', 5, 'Multi-line string ("...") found. This lint script doesn\'t ' 'do well with such strings, and may give bogus warnings. ' 'Use C++11 raw strings or concatenation instead.') threading_list = ( ('asctime(', 'asctime_r('), ('ctime(', 'ctime_r('), ('getgrgid(', 'getgrgid_r('), ('getgrnam(', 'getgrnam_r('), ('getlogin(', 'getlogin_r('), ('getpwnam(', 'getpwnam_r('), ('getpwuid(', 'getpwuid_r('), ('gmtime(', 'gmtime_r('), ('localtime(', 'localtime_r('), ('rand(', 'rand_r('), ('strtok(', 'strtok_r('), ('ttyname(', 'ttyname_r('), ) def CheckPosixThreading(filename, clean_lines, linenum, error): """Checks for calls to thread-unsafe functions. Much code has been originally written without consideration of multi-threading. Also, engineers are relying on their old experience; they have learned posix before threading extensions were added. These tests guide the engineers to use thread-safe functions (when using posix directly). Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] for single_thread_function, multithread_safe_function in threading_list: ix = line.find(single_thread_function) # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison if ix >= 0 and (ix == 0 or (not line[ix - 1].isalnum() and line[ix - 1] not in ('_', '.', '>'))): error(filename, linenum, 'runtime/threadsafe_fn', 2, 'Consider using ' + multithread_safe_function + '...) instead of ' + single_thread_function + '...) for improved thread safety.') def CheckVlogArguments(filename, clean_lines, linenum, error): """Checks that VLOG() is only used for defining a logging level. For example, VLOG(2) is correct. VLOG(INFO), VLOG(WARNING), VLOG(ERROR), and VLOG(FATAL) are not. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] if Search(r'\bVLOG\((INFO|ERROR|WARNING|DFATAL|FATAL)\)', line): error(filename, linenum, 'runtime/vlog', 5, 'VLOG() should be used with numeric verbosity level. ' 'Use LOG() if you want symbolic severity levels.') # Matches invalid increment: *count++, which moves pointer instead of # incrementing a value. _RE_PATTERN_INVALID_INCREMENT = re.compile( r'^\s*\*\w+(\+\+|--);') def CheckInvalidIncrement(filename, clean_lines, linenum, error): """Checks for invalid increment *count++. For example following function: void increment_counter(int* count) { *count++; } is invalid, because it effectively does count++, moving pointer, and should be replaced with ++*count, (*count)++ or *count += 1. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] if _RE_PATTERN_INVALID_INCREMENT.match(line): error(filename, linenum, 'runtime/invalid_increment', 5, 'Changing pointer instead of value (or unused value of operator*).') class _BlockInfo(object): """Stores information about a generic block of code.""" def __init__(self, seen_open_brace): self.seen_open_brace = seen_open_brace self.open_parentheses = 0 self.inline_asm = _NO_ASM def CheckBegin(self, filename, clean_lines, linenum, error): """Run checks that applies to text up to the opening brace. This is mostly for checking the text after the class identifier and the "{", usually where the base class is specified. For other blocks, there isn't much to check, so we always pass. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ pass def CheckEnd(self, filename, clean_lines, linenum, error): """Run checks that applies to text after the closing brace. This is mostly used for checking end of namespace comments. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ pass class _ClassInfo(_BlockInfo): """Stores information about a class.""" def __init__(self, name, class_or_struct, clean_lines, linenum): _BlockInfo.__init__(self, False) self.name = name self.starting_linenum = linenum self.is_derived = False if class_or_struct == 'struct': self.access = 'public' self.is_struct = True else: self.access = 'private' self.is_struct = False # Remember initial indentation level for this class. Using raw_lines here # instead of elided to account for leading comments. initial_indent = Match(r'^( *)\S', clean_lines.raw_lines[linenum]) if initial_indent: self.class_indent = len(initial_indent.group(1)) else: self.class_indent = 0 # Try to find the end of the class. This will be confused by things like: # class A { # } *x = { ... # # But it's still good enough for CheckSectionSpacing. self.last_line = 0 depth = 0 for i in range(linenum, clean_lines.NumLines()): line = clean_lines.elided[i] depth += line.count('{') - line.count('}') if not depth: self.last_line = i break def CheckBegin(self, filename, clean_lines, linenum, error): # Look for a bare ':' if Search('(^|[^:]):($|[^:])', clean_lines.elided[linenum]): self.is_derived = True def CheckEnd(self, filename, clean_lines, linenum, error): # Check that closing brace is aligned with beginning of the class. # Only do this if the closing brace is indented by only whitespaces. # This means we will not check single-line class definitions. indent = Match(r'^( *)\}', clean_lines.elided[linenum]) if indent and len(indent.group(1)) != self.class_indent: if self.is_struct: parent = 'struct ' + self.name else: parent = 'class ' + self.name error(filename, linenum, 'whitespace/indent', 3, 'Closing brace should be aligned with beginning of %s' % parent) class _NamespaceInfo(_BlockInfo): """Stores information about a namespace.""" def __init__(self, name, linenum): _BlockInfo.__init__(self, False) self.name = name or '' self.starting_linenum = linenum def CheckEnd(self, filename, clean_lines, linenum, error): """Check end of namespace comments.""" line = clean_lines.raw_lines[linenum] # Check how many lines is enclosed in this namespace. Don't issue # warning for missing namespace comments if there aren't enough # lines. However, do apply checks if there is already an end of # namespace comment and it's incorrect. # # TODO(unknown): We always want to check end of namespace comments # if a namespace is large, but sometimes we also want to apply the # check if a short namespace contained nontrivial things (something # other than forward declarations). There is currently no logic on # deciding what these nontrivial things are, so this check is # triggered by namespace size only, which works most of the time. if (linenum - self.starting_linenum < 10 and not Match(r'};*\s*(//|/\*).*\bnamespace\b', line)): return # Look for matching comment at end of namespace. # # Note that we accept C style "/* */" comments for terminating # namespaces, so that code that terminate namespaces inside # preprocessor macros can be cpplint clean. # # We also accept stuff like "// end of namespace ." with the # period at the end. # # Besides these, we don't accept anything else, otherwise we might # get false negatives when existing comment is a substring of the # expected namespace. if self.name: # Named namespace if not Match((r'};*\s*(//|/\*).*\bnamespace\s+' + re.escape(self.name) + r'[\*/\.\\\s]*$'), line): error(filename, linenum, 'readability/namespace', 5, 'Namespace should be terminated with "// namespace %s"' % self.name) else: # Anonymous namespace if not Match(r'};*\s*(//|/\*).*\bnamespace[\*/\.\\\s]*$', line): error(filename, linenum, 'readability/namespace', 5, 'Namespace should be terminated with "// namespace"') class _PreprocessorInfo(object): """Stores checkpoints of nesting stacks when #if/#else is seen.""" def __init__(self, stack_before_if): # The entire nesting stack before #if self.stack_before_if = stack_before_if # The entire nesting stack up to #else self.stack_before_else = [] # Whether we have already seen #else or #elif self.seen_else = False class _NestingState(object): """Holds states related to parsing braces.""" def __init__(self): # Stack for tracking all braces. An object is pushed whenever we # see a "{", and popped when we see a "}". Only 3 types of # objects are possible: # - _ClassInfo: a class or struct. # - _NamespaceInfo: a namespace. # - _BlockInfo: some other type of block. self.stack = [] # Stack of _PreprocessorInfo objects. self.pp_stack = [] def SeenOpenBrace(self): """Check if we have seen the opening brace for the innermost block. Returns: True if we have seen the opening brace, False if the innermost block is still expecting an opening brace. """ return (not self.stack) or self.stack[-1].seen_open_brace def InNamespaceBody(self): """Check if we are currently one level inside a namespace body. Returns: True if top of the stack is a namespace block, False otherwise. """ return self.stack and isinstance(self.stack[-1], _NamespaceInfo) def UpdatePreprocessor(self, line): """Update preprocessor stack. We need to handle preprocessors due to classes like this: #ifdef SWIG struct ResultDetailsPageElementExtensionPoint { #else struct ResultDetailsPageElementExtensionPoint : public Extension { #endif We make the following assumptions (good enough for most files): - Preprocessor condition evaluates to true from #if up to first #else/#elif/#endif. - Preprocessor condition evaluates to false from #else/#elif up to #endif. We still perform lint checks on these lines, but these do not affect nesting stack. Args: line: current line to check. """ if Match(r'^\s*#\s*(if|ifdef|ifndef)\b', line): # Beginning of #if block, save the nesting stack here. The saved # stack will allow us to restore the parsing state in the #else case. self.pp_stack.append(_PreprocessorInfo(copy.deepcopy(self.stack))) elif Match(r'^\s*#\s*(else|elif)\b', line): # Beginning of #else block if self.pp_stack: if not self.pp_stack[-1].seen_else: # This is the first #else or #elif block. Remember the # whole nesting stack up to this point. This is what we # keep after the #endif. self.pp_stack[-1].seen_else = True self.pp_stack[-1].stack_before_else = copy.deepcopy(self.stack) # Restore the stack to how it was before the #if self.stack = copy.deepcopy(self.pp_stack[-1].stack_before_if) else: # TODO(unknown): unexpected #else, issue warning? pass elif Match(r'^\s*#\s*endif\b', line): # End of #if or #else blocks. if self.pp_stack: # If we saw an #else, we will need to restore the nesting # stack to its former state before the #else, otherwise we # will just continue from where we left off. if self.pp_stack[-1].seen_else: # Here we can just use a shallow copy since we are the last # reference to it. self.stack = self.pp_stack[-1].stack_before_else # Drop the corresponding #if self.pp_stack.pop() else: # TODO(unknown): unexpected #endif, issue warning? pass def Update(self, filename, clean_lines, linenum, error): """Update nesting state with current line. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] # Update pp_stack first self.UpdatePreprocessor(line) # Count parentheses. This is to avoid adding struct arguments to # the nesting stack. if self.stack: inner_block = self.stack[-1] depth_change = line.count('(') - line.count(')') inner_block.open_parentheses += depth_change # Also check if we are starting or ending an inline assembly block. if inner_block.inline_asm in (_NO_ASM, _END_ASM): if (depth_change != 0 and inner_block.open_parentheses == 1 and _MATCH_ASM.match(line)): # Enter assembly block inner_block.inline_asm = _INSIDE_ASM else: # Not entering assembly block. If previous line was _END_ASM, # we will now shift to _NO_ASM state. inner_block.inline_asm = _NO_ASM elif (inner_block.inline_asm == _INSIDE_ASM and inner_block.open_parentheses == 0): # Exit assembly block inner_block.inline_asm = _END_ASM # Consume namespace declaration at the beginning of the line. Do # this in a loop so that we catch same line declarations like this: # namespace proto2 { namespace bridge { class MessageSet; } } while True: # Match start of namespace. The "\b\s*" below catches namespace # declarations even if it weren't followed by a whitespace, this # is so that we don't confuse our namespace checker. The # missing spaces will be flagged by CheckSpacing. namespace_decl_match = Match(r'^\s*namespace\b\s*([:\w]+)?(.*)$', line) if not namespace_decl_match: break new_namespace = _NamespaceInfo(namespace_decl_match.group(1), linenum) self.stack.append(new_namespace) line = namespace_decl_match.group(2) if line.find('{') != -1: new_namespace.seen_open_brace = True line = line[line.find('{') + 1:] # Look for a class declaration in whatever is left of the line # after parsing namespaces. The regexp accounts for decorated classes # such as in: # class LOCKABLE API Object { # }; # # Templates with class arguments may confuse the parser, for example: # template , # class Vector = vector > # class HeapQueue { # # Because this parser has no nesting state about templates, by the # time it saw "class Comparator", it may think that it's a new class. # Nested templates have a similar problem: # template < # typename ExportedType, # typename TupleType, # template class ImplTemplate> # # To avoid these cases, we ignore classes that are followed by '=' or '>' class_decl_match = Match( r'\s*(template\s*<[\w\s<>,:]*>\s*)?' r'(class|struct)\s+([A-Z_]+\s+)*(\w+(?:::\w+)*)' r'(([^=>]|<[^<>]*>|<[^<>]*<[^<>]*>\s*>)*)$', line) if (class_decl_match and (not self.stack or self.stack[-1].open_parentheses == 0)): self.stack.append(_ClassInfo( class_decl_match.group(4), class_decl_match.group(2), clean_lines, linenum)) line = class_decl_match.group(5) # If we have not yet seen the opening brace for the innermost block, # run checks here. if not self.SeenOpenBrace(): self.stack[-1].CheckBegin(filename, clean_lines, linenum, error) # Update access control if we are inside a class/struct if self.stack and isinstance(self.stack[-1], _ClassInfo): classinfo = self.stack[-1] access_match = Match( r'^(.*)\b(public|private|protected|signals)(\s+(?:slots\s*)?)?' r':(?:[^:]|$)', line) if access_match: classinfo.access = access_match.group(2) # Check that access keywords are indented +1 space. Skip this # check if the keywords are not preceded by whitespaces. indent = access_match.group(1) if (len(indent) != classinfo.class_indent + 1 and Match(r'^\s*$', indent)): if classinfo.is_struct: parent = 'struct ' + classinfo.name else: parent = 'class ' + classinfo.name slots = '' if access_match.group(3): slots = access_match.group(3) error(filename, linenum, 'whitespace/indent', 3, '%s%s: should be indented +1 space inside %s' % ( access_match.group(2), slots, parent)) # Consume braces or semicolons from what's left of the line while True: # Match first brace, semicolon, or closed parenthesis. matched = Match(r'^[^{;)}]*([{;)}])(.*)$', line) if not matched: break token = matched.group(1) if token == '{': # If namespace or class hasn't seen a opening brace yet, mark # namespace/class head as complete. Push a new block onto the # stack otherwise. if not self.SeenOpenBrace(): self.stack[-1].seen_open_brace = True else: self.stack.append(_BlockInfo(True)) if _MATCH_ASM.match(line): self.stack[-1].inline_asm = _BLOCK_ASM elif token == ';' or token == ')': # If we haven't seen an opening brace yet, but we already saw # a semicolon, this is probably a forward declaration. Pop # the stack for these. # # Similarly, if we haven't seen an opening brace yet, but we # already saw a closing parenthesis, then these are probably # function arguments with extra "class" or "struct" keywords. # Also pop these stack for these. if not self.SeenOpenBrace(): self.stack.pop() else: # token == '}' # Perform end of block checks and pop the stack. if self.stack: self.stack[-1].CheckEnd(filename, clean_lines, linenum, error) self.stack.pop() line = matched.group(2) def InnermostClass(self): """Get class info on the top of the stack. Returns: A _ClassInfo object if we are inside a class, or None otherwise. """ for i in range(len(self.stack), 0, -1): classinfo = self.stack[i - 1] if isinstance(classinfo, _ClassInfo): return classinfo return None def CheckCompletedBlocks(self, filename, error): """Checks that all classes and namespaces have been completely parsed. Call this when all lines in a file have been processed. Args: filename: The name of the current file. error: The function to call with any errors found. """ # Note: This test can result in false positives if #ifdef constructs # get in the way of brace matching. See the testBuildClass test in # cpplint_unittest.py for an example of this. for obj in self.stack: if isinstance(obj, _ClassInfo): error(filename, obj.starting_linenum, 'build/class', 5, 'Failed to find complete declaration of class %s' % obj.name) elif isinstance(obj, _NamespaceInfo): error(filename, obj.starting_linenum, 'build/namespaces', 5, 'Failed to find complete declaration of namespace %s' % obj.name) def CheckForNonStandardConstructs(filename, clean_lines, linenum, nesting_state, error): r"""Logs an error if we see certain non-ANSI constructs ignored by gcc-2. Complain about several constructs which gcc-2 accepts, but which are not standard C++. Warning about these in lint is one way to ease the transition to new compilers. - put storage class first (e.g. "static const" instead of "const static"). - "%lld" instead of %qd" in printf-type functions. - "%1$d" is non-standard in printf-type functions. - "\%" is an undefined character escape sequence. - text after #endif is not allowed. - invalid inner-style forward declaration. - >? and ?= and )\?=?\s*(\w+|[+-]?\d+)(\.\d*)?', line): error(filename, linenum, 'build/deprecated', 3, '>? and ))?' # r'\s*const\s*' + type_name + '\s*&\s*\w+\s*;' error(filename, linenum, 'runtime/member_string_references', 2, 'const string& members are dangerous. It is much better to use ' 'alternatives, such as pointers or simple constants.') # Everything else in this function operates on class declarations. # Return early if the top of the nesting stack is not a class, or if # the class head is not completed yet. classinfo = nesting_state.InnermostClass() if not classinfo or not classinfo.seen_open_brace: return # The class may have been declared with namespace or classname qualifiers. # The constructor and destructor will not have those qualifiers. base_classname = classinfo.name.split('::')[-1] # Look for single-argument constructors that aren't marked explicit. # Technically a valid construct, but against style. args = Match(r'\s+(?:inline\s+)?%s\s*\(([^,()]+)\)' % re.escape(base_classname), line) if (args and args.group(1) != 'void' and not Match(r'(const\s+)?%s(\s+const)?\s*(?:<\w+>\s*)?&' % re.escape(base_classname), args.group(1).strip())): error(filename, linenum, 'runtime/explicit', 5, 'Single-argument constructors should be marked explicit.') def CheckSpacingForFunctionCall(filename, line, linenum, error): """Checks for the correctness of various spacing around function calls. Args: filename: The name of the current file. line: The text of the line to check. linenum: The number of the line to check. error: The function to call with any errors found. """ # Since function calls often occur inside if/for/while/switch # expressions - which have their own, more liberal conventions - we # first see if we should be looking inside such an expression for a # function call, to which we can apply more strict standards. fncall = line # if there's no control flow construct, look at whole line for pattern in (r'\bif\s*\((.*)\)\s*{', r'\bfor\s*\((.*)\)\s*{', r'\bwhile\s*\((.*)\)\s*[{;]', r'\bswitch\s*\((.*)\)\s*{'): match = Search(pattern, line) if match: fncall = match.group(1) # look inside the parens for function calls break # Except in if/for/while/switch, there should never be space # immediately inside parens (eg "f( 3, 4 )"). We make an exception # for nested parens ( (a+b) + c ). Likewise, there should never be # a space before a ( when it's a function argument. I assume it's a # function argument when the char before the whitespace is legal in # a function name (alnum + _) and we're not starting a macro. Also ignore # pointers and references to arrays and functions coz they're too tricky: # we use a very simple way to recognize these: # " (something)(maybe-something)" or # " (something)(maybe-something," or # " (something)[something]" # Note that we assume the contents of [] to be short enough that # they'll never need to wrap. if ( # Ignore control structures. not Search(r'\b(if|for|while|switch|return|new|delete|catch|sizeof)\b', fncall) and # Ignore pointers/references to functions. not Search(r' \([^)]+\)\([^)]*(\)|,$)', fncall) and # Ignore pointers/references to arrays. not Search(r' \([^)]+\)\[[^\]]+\]', fncall)): if Search(r'\w\s*\(\s(?!\s*\\$)', fncall): # a ( used for a fn call error(filename, linenum, 'whitespace/parens', 4, 'Extra space after ( in function call') elif Search(r'\(\s+(?!(\s*\\)|\()', fncall): error(filename, linenum, 'whitespace/parens', 2, 'Extra space after (') if (Search(r'\w\s+\(', fncall) and not Search(r'#\s*define|typedef', fncall) and not Search(r'\w\s+\((\w+::)*\*\w+\)\(', fncall)): error(filename, linenum, 'whitespace/parens', 4, 'Extra space before ( in function call') # If the ) is followed only by a newline or a { + newline, assume it's # part of a control statement (if/while/etc), and don't complain if Search(r'[^)]\s+\)\s*[^{\s]', fncall): # If the closing parenthesis is preceded by only whitespaces, # try to give a more descriptive error message. if Search(r'^\s+\)', fncall): error(filename, linenum, 'whitespace/parens', 2, 'Closing ) should be moved to the previous line') else: error(filename, linenum, 'whitespace/parens', 2, 'Extra space before )') def IsBlankLine(line): """Returns true if the given line is blank. We consider a line to be blank if the line is empty or consists of only white spaces. Args: line: A line of a string. Returns: True, if the given line is blank. """ return not line or line.isspace() def CheckForFunctionLengths(filename, clean_lines, linenum, function_state, error): """Reports for long function bodies. For an overview why this is done, see: http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Write_Short_Functions Uses a simplistic algorithm assuming other style guidelines (especially spacing) are followed. Only checks unindented functions, so class members are unchecked. Trivial bodies are unchecked, so constructors with huge initializer lists may be missed. Blank/comment lines are not counted so as to avoid encouraging the removal of vertical space and comments just to get through a lint check. NOLINT *on the last line of a function* disables this check. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. function_state: Current function name and lines in body so far. error: The function to call with any errors found. """ lines = clean_lines.lines line = lines[linenum] raw = clean_lines.raw_lines raw_line = raw[linenum] joined_line = '' starting_func = False regexp = r'(\w(\w|::|\*|\&|\s)*)\(' # decls * & space::name( ... match_result = Match(regexp, line) if match_result: # If the name is all caps and underscores, figure it's a macro and # ignore it, unless it's TEST or TEST_F. function_name = match_result.group(1).split()[-1] if function_name == 'TEST' or function_name == 'TEST_F' or ( not Match(r'[A-Z_]+$', function_name)): starting_func = True if starting_func: body_found = False for start_linenum in xrange(linenum, clean_lines.NumLines()): start_line = lines[start_linenum] joined_line += ' ' + start_line.lstrip() if Search(r'(;|})', start_line): # Declarations and trivial functions body_found = True break # ... ignore elif Search(r'{', start_line): body_found = True function = Search(r'((\w|:)*)\(', line).group(1) if Match(r'TEST', function): # Handle TEST... macros parameter_regexp = Search(r'(\(.*\))', joined_line) if parameter_regexp: # Ignore bad syntax function += parameter_regexp.group(1) else: function += '()' function_state.Begin(function) break if not body_found: # No body for the function (or evidence of a non-function) was found. error(filename, linenum, 'readability/fn_size', 5, 'Lint failed to find start of function body.') elif Match(r'^\}\s*$', line): # function end function_state.Check(error, filename, linenum) function_state.End() elif not Match(r'^\s*$', line): function_state.Count() # Count non-blank/non-comment lines. _RE_PATTERN_TODO = re.compile(r'^//(\s*)TODO(\(.+?\))?:?(\s|$)?') def CheckComment(comment, filename, linenum, error): """Checks for common mistakes in TODO comments. Args: comment: The text of the comment from the line in question. filename: The name of the current file. linenum: The number of the line to check. error: The function to call with any errors found. """ match = _RE_PATTERN_TODO.match(comment) if match: # One whitespace is correct; zero whitespace is handled elsewhere. leading_whitespace = match.group(1) if len(leading_whitespace) > 1: error(filename, linenum, 'whitespace/todo', 2, 'Too many spaces before TODO') username = match.group(2) if not username: error(filename, linenum, 'readability/todo', 2, 'Missing username in TODO; it should look like ' '"// TODO(my_username): Stuff."') middle_whitespace = match.group(3) # Comparisons made explicit for correctness -- pylint: disable=g-explicit-bool-comparison if middle_whitespace != ' ' and middle_whitespace != '': error(filename, linenum, 'whitespace/todo', 2, 'TODO(my_username) should be followed by a space') def CheckAccess(filename, clean_lines, linenum, nesting_state, error): """Checks for improper use of DISALLOW* macros. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] # get rid of comments and strings matched = Match((r'\s*(DISALLOW_COPY_AND_ASSIGN|' r'DISALLOW_EVIL_CONSTRUCTORS|' r'DISALLOW_IMPLICIT_CONSTRUCTORS)'), line) if not matched: return if nesting_state.stack and isinstance(nesting_state.stack[-1], _ClassInfo): if nesting_state.stack[-1].access != 'private': error(filename, linenum, 'readability/constructors', 3, '%s must be in the private: section' % matched.group(1)) else: # Found DISALLOW* macro outside a class declaration, or perhaps it # was used inside a function when it should have been part of the # class declaration. We could issue a warning here, but it # probably resulted in a compiler error already. pass def FindNextMatchingAngleBracket(clean_lines, linenum, init_suffix): """Find the corresponding > to close a template. Args: clean_lines: A CleansedLines instance containing the file. linenum: Current line number. init_suffix: Remainder of the current line after the initial <. Returns: True if a matching bracket exists. """ line = init_suffix nesting_stack = ['<'] while True: # Find the next operator that can tell us whether < is used as an # opening bracket or as a less-than operator. We only want to # warn on the latter case. # # We could also check all other operators and terminate the search # early, e.g. if we got something like this "a(),;\[\]]*([<>(),;\[\]])(.*)$', line) if match: # Found an operator, update nesting stack operator = match.group(1) line = match.group(2) if nesting_stack[-1] == '<': # Expecting closing angle bracket if operator in ('<', '(', '['): nesting_stack.append(operator) elif operator == '>': nesting_stack.pop() if not nesting_stack: # Found matching angle bracket return True elif operator == ',': # Got a comma after a bracket, this is most likely a template # argument. We have not seen a closing angle bracket yet, but # it's probably a few lines later if we look for it, so just # return early here. return True else: # Got some other operator. return False else: # Expecting closing parenthesis or closing bracket if operator in ('<', '(', '['): nesting_stack.append(operator) elif operator in (')', ']'): # We don't bother checking for matching () or []. If we got # something like (] or [), it would have been a syntax error. nesting_stack.pop() else: # Scan the next line linenum += 1 if linenum >= len(clean_lines.elided): break line = clean_lines.elided[linenum] # Exhausted all remaining lines and still no matching angle bracket. # Most likely the input was incomplete, otherwise we should have # seen a semicolon and returned early. return True def FindPreviousMatchingAngleBracket(clean_lines, linenum, init_prefix): """Find the corresponding < that started a template. Args: clean_lines: A CleansedLines instance containing the file. linenum: Current line number. init_prefix: Part of the current line before the initial >. Returns: True if a matching bracket exists. """ line = init_prefix nesting_stack = ['>'] while True: # Find the previous operator match = Search(r'^(.*)([<>(),;\[\]])[^<>(),;\[\]]*$', line) if match: # Found an operator, update nesting stack operator = match.group(2) line = match.group(1) if nesting_stack[-1] == '>': # Expecting opening angle bracket if operator in ('>', ')', ']'): nesting_stack.append(operator) elif operator == '<': nesting_stack.pop() if not nesting_stack: # Found matching angle bracket return True elif operator == ',': # Got a comma before a bracket, this is most likely a # template argument. The opening angle bracket is probably # there if we look for it, so just return early here. return True else: # Got some other operator. return False else: # Expecting opening parenthesis or opening bracket if operator in ('>', ')', ']'): nesting_stack.append(operator) elif operator in ('(', '['): nesting_stack.pop() else: # Scan the previous line linenum -= 1 if linenum < 0: break line = clean_lines.elided[linenum] # Exhausted all earlier lines and still no matching angle bracket. return False def CheckSpacing(filename, clean_lines, linenum, nesting_state, error): """Checks for the correctness of various spacing issues in the code. Things we check for: spaces around operators, spaces after if/for/while/switch, no spaces around parens in function calls, two spaces between code and comment, don't start a block with a blank line, don't end a function with a blank line, don't add a blank line after public/protected/private, don't have too many blank lines in a row. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. """ # Don't use "elided" lines here, otherwise we can't check commented lines. # Don't want to use "raw" either, because we don't want to check inside C++11 # raw strings, raw = clean_lines.lines_without_raw_strings line = raw[linenum] # Before nixing comments, check if the line is blank for no good # reason. This includes the first line after a block is opened, and # blank lines at the end of a function (ie, right before a line like '}' # # Skip all the blank line checks if we are immediately inside a # namespace body. In other words, don't issue blank line warnings # for this block: # namespace { # # } # # A warning about missing end of namespace comments will be issued instead. if IsBlankLine(line) and not nesting_state.InNamespaceBody(): elided = clean_lines.elided prev_line = elided[linenum - 1] prevbrace = prev_line.rfind('{') # TODO(unknown): Don't complain if line before blank line, and line after, # both start with alnums and are indented the same amount. # This ignores whitespace at the start of a namespace block # because those are not usually indented. if prevbrace != -1 and prev_line[prevbrace:].find('}') == -1: # OK, we have a blank line at the start of a code block. Before we # complain, we check if it is an exception to the rule: The previous # non-empty line has the parameters of a function header that are indented # 4 spaces (because they did not fit in a 80 column line when placed on # the same line as the function name). We also check for the case where # the previous line is indented 6 spaces, which may happen when the # initializers of a constructor do not fit into a 80 column line. exception = False if Match(r' {6}\w', prev_line): # Initializer list? # We are looking for the opening column of initializer list, which # should be indented 4 spaces to cause 6 space indentation afterwards. search_position = linenum-2 while (search_position >= 0 and Match(r' {6}\w', elided[search_position])): search_position -= 1 exception = (search_position >= 0 and elided[search_position][:5] == ' :') else: # Search for the function arguments or an initializer list. We use a # simple heuristic here: If the line is indented 4 spaces; and we have a # closing paren, without the opening paren, followed by an opening brace # or colon (for initializer lists) we assume that it is the last line of # a function header. If we have a colon indented 4 spaces, it is an # initializer list. exception = (Match(r' {4}\w[^\(]*\)\s*(const\s*)?(\{\s*$|:)', prev_line) or Match(r' {4}:', prev_line)) if not exception: error(filename, linenum, 'whitespace/blank_line', 2, 'Redundant blank line at the start of a code block ' 'should be deleted.') # Ignore blank lines at the end of a block in a long if-else # chain, like this: # if (condition1) { # // Something followed by a blank line # # } else if (condition2) { # // Something else # } if linenum + 1 < clean_lines.NumLines(): next_line = raw[linenum + 1] if (next_line and Match(r'\s*}', next_line) and next_line.find('} else ') == -1): error(filename, linenum, 'whitespace/blank_line', 3, 'Redundant blank line at the end of a code block ' 'should be deleted.') matched = Match(r'\s*(public|protected|private):', prev_line) if matched: error(filename, linenum, 'whitespace/blank_line', 3, 'Do not leave a blank line after "%s:"' % matched.group(1)) # Next, we complain if there's a comment too near the text commentpos = line.find('//') if commentpos != -1: # Check if the // may be in quotes. If so, ignore it # Comparisons made explicit for clarity -- pylint: disable=g-explicit-bool-comparison if (line.count('"', 0, commentpos) - line.count('\\"', 0, commentpos)) % 2 == 0: # not in quotes # Allow one space for new scopes, two spaces otherwise: if (not Match(r'^\s*{ //', line) and ((commentpos >= 1 and line[commentpos-1] not in string.whitespace) or (commentpos >= 2 and line[commentpos-2] not in string.whitespace))): error(filename, linenum, 'whitespace/comments', 2, 'At least two spaces is best between code and comments') # There should always be a space between the // and the comment commentend = commentpos + 2 if commentend < len(line) and not line[commentend] == ' ': # but some lines are exceptions -- e.g. if they're big # comment delimiters like: # //---------------------------------------------------------- # or are an empty C++ style Doxygen comment, like: # /// # or C++ style Doxygen comments placed after the variable: # ///< Header comment # //!< Header comment # or they begin with multiple slashes followed by a space: # //////// Header comment match = (Search(r'[=/-]{4,}\s*$', line[commentend:]) or Search(r'^/$', line[commentend:]) or Search(r'^!< ', line[commentend:]) or Search(r'^/< ', line[commentend:]) or Search(r'^/+ ', line[commentend:])) if not match: error(filename, linenum, 'whitespace/comments', 4, 'Should have a space between // and comment') CheckComment(line[commentpos:], filename, linenum, error) line = clean_lines.elided[linenum] # get rid of comments and strings # Don't try to do spacing checks for operator methods line = re.sub(r'operator(==|!=|<|<<|<=|>=|>>|>)\(', 'operator\(', line) # We allow no-spaces around = within an if: "if ( (a=Foo()) == 0 )". # Otherwise not. Note we only check for non-spaces on *both* sides; # sometimes people put non-spaces on one side when aligning ='s among # many lines (not that this is behavior that I approve of...) if Search(r'[\w.]=[\w.]', line) and not Search(r'\b(if|while) ', line): error(filename, linenum, 'whitespace/operators', 4, 'Missing spaces around =') # It's ok not to have spaces around binary operators like + - * /, but if # there's too little whitespace, we get concerned. It's hard to tell, # though, so we punt on this one for now. TODO. # You should always have whitespace around binary operators. # # Check <= and >= first to avoid false positives with < and >, then # check non-include lines for spacing around < and >. match = Search(r'[^<>=!\s](==|!=|<=|>=)[^<>=!\s]', line) if match: error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around %s' % match.group(1)) # We allow no-spaces around << when used like this: 10<<20, but # not otherwise (particularly, not when used as streams) # Also ignore using ns::operator<<; match = Search(r'(operator|\S)(?:L|UL|ULL|l|ul|ull)?<<(\S)', line) if (match and not (match.group(1).isdigit() and match.group(2).isdigit()) and not (match.group(1) == 'operator' and match.group(2) == ';')): error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around <<') elif not Match(r'#.*include', line): # Avoid false positives on -> reduced_line = line.replace('->', '') # Look for < that is not surrounded by spaces. This is only # triggered if both sides are missing spaces, even though # technically should should flag if at least one side is missing a # space. This is done to avoid some false positives with shifts. match = Search(r'[^\s<]<([^\s=<].*)', reduced_line) if (match and not FindNextMatchingAngleBracket(clean_lines, linenum, match.group(1))): error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around <') # Look for > that is not surrounded by spaces. Similar to the # above, we only trigger if both sides are missing spaces to avoid # false positives with shifts. match = Search(r'^(.*[^\s>])>[^\s=>]', reduced_line) if (match and not FindPreviousMatchingAngleBracket(clean_lines, linenum, match.group(1))): error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around >') # We allow no-spaces around >> for almost anything. This is because # C++11 allows ">>" to close nested templates, which accounts for # most cases when ">>" is not followed by a space. # # We still warn on ">>" followed by alpha character, because that is # likely due to ">>" being used for right shifts, e.g.: # value >> alpha # # When ">>" is used to close templates, the alphanumeric letter that # follows would be part of an identifier, and there should still be # a space separating the template type and the identifier. # type> alpha match = Search(r'>>[a-zA-Z_]', line) if match: error(filename, linenum, 'whitespace/operators', 3, 'Missing spaces around >>') # There shouldn't be space around unary operators match = Search(r'(!\s|~\s|[\s]--[\s;]|[\s]\+\+[\s;])', line) if match: error(filename, linenum, 'whitespace/operators', 4, 'Extra space for operator %s' % match.group(1)) # A pet peeve of mine: no spaces after an if, while, switch, or for match = Search(r' (if\(|for\(|while\(|switch\()', line) if match: error(filename, linenum, 'whitespace/parens', 5, 'Missing space before ( in %s' % match.group(1)) # For if/for/while/switch, the left and right parens should be # consistent about how many spaces are inside the parens, and # there should either be zero or one spaces inside the parens. # We don't want: "if ( foo)" or "if ( foo )". # Exception: "for ( ; foo; bar)" and "for (foo; bar; )" are allowed. match = Search(r'\b(if|for|while|switch)\s*' r'\(([ ]*)(.).*[^ ]+([ ]*)\)\s*{\s*$', line) if match: if len(match.group(2)) != len(match.group(4)): if not (match.group(3) == ';' and len(match.group(2)) == 1 + len(match.group(4)) or not match.group(2) and Search(r'\bfor\s*\(.*; \)', line)): error(filename, linenum, 'whitespace/parens', 5, 'Mismatching spaces inside () in %s' % match.group(1)) if len(match.group(2)) not in [0, 1]: error(filename, linenum, 'whitespace/parens', 5, 'Should have zero or one spaces inside ( and ) in %s' % match.group(1)) # You should always have a space after a comma (either as fn arg or operator) # # This does not apply when the non-space character following the # comma is another comma, since the only time when that happens is # for empty macro arguments. # # We run this check in two passes: first pass on elided lines to # verify that lines contain missing whitespaces, second pass on raw # lines to confirm that those missing whitespaces are not due to # elided comments. if Search(r',[^,\s]', line) and Search(r',[^,\s]', raw[linenum]): error(filename, linenum, 'whitespace/comma', 3, 'Missing space after ,') # You should always have a space after a semicolon # except for few corner cases # TODO(unknown): clarify if 'if (1) { return 1;}' is requires one more # space after ; if Search(r';[^\s};\\)/]', line): error(filename, linenum, 'whitespace/semicolon', 3, 'Missing space after ;') # Next we will look for issues with function calls. CheckSpacingForFunctionCall(filename, line, linenum, error) # Except after an opening paren, or after another opening brace (in case of # an initializer list, for instance), you should have spaces before your # braces. And since you should never have braces at the beginning of a line, # this is an easy test. match = Match(r'^(.*[^ ({]){', line) if match: # Try a bit harder to check for brace initialization. This # happens in one of the following forms: # Constructor() : initializer_list_{} { ... } # Constructor{}.MemberFunction() # Type variable{}; # FunctionCall(type{}, ...); # LastArgument(..., type{}); # LOG(INFO) << type{} << " ..."; # map_of_type[{...}] = ...; # # We check for the character following the closing brace, and # silence the warning if it's one of those listed above, i.e. # "{.;,)<]". # # To account for nested initializer list, we allow any number of # closing braces up to "{;,)<". We can't simply silence the # warning on first sight of closing brace, because that would # cause false negatives for things that are not initializer lists. # Silence this: But not this: # Outer{ if (...) { # Inner{...} if (...){ // Missing space before { # }; } # # There is a false negative with this approach if people inserted # spurious semicolons, e.g. "if (cond){};", but we will catch the # spurious semicolon with a separate check. (endline, endlinenum, endpos) = CloseExpression( clean_lines, linenum, len(match.group(1))) trailing_text = '' if endpos > -1: trailing_text = endline[endpos:] for offset in xrange(endlinenum + 1, min(endlinenum + 3, clean_lines.NumLines() - 1)): trailing_text += clean_lines.elided[offset] if not Match(r'^[\s}]*[{.;,)<\]]', trailing_text): error(filename, linenum, 'whitespace/braces', 5, 'Missing space before {') # Make sure '} else {' has spaces. if Search(r'}else', line): error(filename, linenum, 'whitespace/braces', 5, 'Missing space before else') # You shouldn't have spaces before your brackets, except maybe after # 'delete []' or 'new char * []'. if Search(r'\w\s+\[', line) and not Search(r'delete\s+\[', line): error(filename, linenum, 'whitespace/braces', 5, 'Extra space before [') # You shouldn't have a space before a semicolon at the end of the line. # There's a special case for "for" since the style guide allows space before # the semicolon there. if Search(r':\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, 'Semicolon defining empty statement. Use {} instead.') elif Search(r'^\s*;\s*$', line): error(filename, linenum, 'whitespace/semicolon', 5, 'Line contains only semicolon. If this should be an empty statement, ' 'use {} instead.') elif (Search(r'\s+;\s*$', line) and not Search(r'\bfor\b', line)): error(filename, linenum, 'whitespace/semicolon', 5, 'Extra space before last semicolon. If this should be an empty ' 'statement, use {} instead.') # In range-based for, we wanted spaces before and after the colon, but # not around "::" tokens that might appear. if (Search('for *\(.*[^:]:[^: ]', line) or Search('for *\(.*[^: ]:[^:]', line)): error(filename, linenum, 'whitespace/forcolon', 2, 'Missing space around colon in range-based for loop') def CheckSectionSpacing(filename, clean_lines, class_info, linenum, error): """Checks for additional blank line issues related to sections. Currently the only thing checked here is blank line before protected/private. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. class_info: A _ClassInfo objects. linenum: The number of the line to check. error: The function to call with any errors found. """ # Skip checks if the class is small, where small means 25 lines or less. # 25 lines seems like a good cutoff since that's the usual height of # terminals, and any class that can't fit in one screen can't really # be considered "small". # # Also skip checks if we are on the first line. This accounts for # classes that look like # class Foo { public: ... }; # # If we didn't find the end of the class, last_line would be zero, # and the check will be skipped by the first condition. if (class_info.last_line - class_info.starting_linenum <= 24 or linenum <= class_info.starting_linenum): return matched = Match(r'\s*(public|protected|private):', clean_lines.lines[linenum]) if matched: # Issue warning if the line before public/protected/private was # not a blank line, but don't do this if the previous line contains # "class" or "struct". This can happen two ways: # - We are at the beginning of the class. # - We are forward-declaring an inner class that is semantically # private, but needed to be public for implementation reasons. # Also ignores cases where the previous line ends with a backslash as can be # common when defining classes in C macros. prev_line = clean_lines.lines[linenum - 1] if (not IsBlankLine(prev_line) and not Search(r'\b(class|struct)\b', prev_line) and not Search(r'\\$', prev_line)): # Try a bit harder to find the beginning of the class. This is to # account for multi-line base-specifier lists, e.g.: # class Derived # : public Base { end_class_head = class_info.starting_linenum for i in range(class_info.starting_linenum, linenum): if Search(r'\{\s*$', clean_lines.lines[i]): end_class_head = i break if end_class_head < linenum - 1: error(filename, linenum, 'whitespace/blank_line', 3, '"%s:" should be preceded by a blank line' % matched.group(1)) def GetPreviousNonBlankLine(clean_lines, linenum): """Return the most recent non-blank line and its line number. Args: clean_lines: A CleansedLines instance containing the file contents. linenum: The number of the line to check. Returns: A tuple with two elements. The first element is the contents of the last non-blank line before the current line, or the empty string if this is the first non-blank line. The second is the line number of that line, or -1 if this is the first non-blank line. """ prevlinenum = linenum - 1 while prevlinenum >= 0: prevline = clean_lines.elided[prevlinenum] if not IsBlankLine(prevline): # if not a blank line... return (prevline, prevlinenum) prevlinenum -= 1 return ('', -1) def CheckBraces(filename, clean_lines, linenum, error): """Looks for misplaced braces (e.g. at the end of line). Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] # get rid of comments and strings if Match(r'\s*{\s*$', line): # We allow an open brace to start a line in the case where someone is using # braces in a block to explicitly create a new scope, which is commonly used # to control the lifetime of stack-allocated variables. Braces are also # used for brace initializers inside function calls. We don't detect this # perfectly: we just don't complain if the last non-whitespace character on # the previous non-blank line is ',', ';', ':', '(', '{', or '}', or if the # previous line starts a preprocessor block. prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] if (not Search(r'[,;:}{(]\s*$', prevline) and not Match(r'\s*#', prevline)): error(filename, linenum, 'whitespace/braces', 4, '{ should almost always be at the end of the previous line') # An else clause should be on the same line as the preceding closing brace. if Match(r'\s*else\s*', line): prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] if Match(r'\s*}\s*$', prevline): error(filename, linenum, 'whitespace/newline', 4, 'An else should appear on the same line as the preceding }') # If braces come on one side of an else, they should be on both. # However, we have to worry about "else if" that spans multiple lines! if Search(r'}\s*else[^{]*$', line) or Match(r'[^}]*else\s*{', line): if Search(r'}\s*else if([^{]*)$', line): # could be multi-line if # find the ( after the if pos = line.find('else if') pos = line.find('(', pos) if pos > 0: (endline, _, endpos) = CloseExpression(clean_lines, linenum, pos) if endline[endpos:].find('{') == -1: # must be brace after if error(filename, linenum, 'readability/braces', 5, 'If an else has a brace on one side, it should have it on both') else: # common case: else not followed by a multi-line if error(filename, linenum, 'readability/braces', 5, 'If an else has a brace on one side, it should have it on both') # Likewise, an else should never have the else clause on the same line if Search(r'\belse [^\s{]', line) and not Search(r'\belse if\b', line): error(filename, linenum, 'whitespace/newline', 4, 'Else clause should never be on same line as else (use 2 lines)') # In the same way, a do/while should never be on one line if Match(r'\s*do [^\s{]', line): error(filename, linenum, 'whitespace/newline', 4, 'do/while clauses should not be on a single line') # Block bodies should not be followed by a semicolon. Due to C++11 # brace initialization, there are more places where semicolons are # required than not, so we use a whitelist approach to check these # rather than a blacklist. These are the places where "};" should # be replaced by just "}": # 1. Some flavor of block following closing parenthesis: # for (;;) {}; # while (...) {}; # switch (...) {}; # Function(...) {}; # if (...) {}; # if (...) else if (...) {}; # # 2. else block: # if (...) else {}; # # 3. const member function: # Function(...) const {}; # # 4. Block following some statement: # x = 42; # {}; # # 5. Block at the beginning of a function: # Function(...) { # {}; # } # # Note that naively checking for the preceding "{" will also match # braces inside multi-dimensional arrays, but this is fine since # that expression will not contain semicolons. # # 6. Block following another block: # while (true) {} # {}; # # 7. End of namespaces: # namespace {}; # # These semicolons seems far more common than other kinds of # redundant semicolons, possibly due to people converting classes # to namespaces. For now we do not warn for this case. # # Try matching case 1 first. match = Match(r'^(.*\)\s*)\{', line) if match: # Matched closing parenthesis (case 1). Check the token before the # matching opening parenthesis, and don't warn if it looks like a # macro. This avoids these false positives: # - macro that defines a base class # - multi-line macro that defines a base class # - macro that defines the whole class-head # # But we still issue warnings for macros that we know are safe to # warn, specifically: # - TEST, TEST_F, TEST_P, MATCHER, MATCHER_P # - TYPED_TEST # - INTERFACE_DEF # - EXCLUSIVE_LOCKS_REQUIRED, SHARED_LOCKS_REQUIRED, LOCKS_EXCLUDED: # # We implement a whitelist of safe macros instead of a blacklist of # unsafe macros, even though the latter appears less frequently in # google code and would have been easier to implement. This is because # the downside for getting the whitelist wrong means some extra # semicolons, while the downside for getting the blacklist wrong # would result in compile errors. # # In addition to macros, we also don't want to warn on compound # literals. closing_brace_pos = match.group(1).rfind(')') opening_parenthesis = ReverseCloseExpression( clean_lines, linenum, closing_brace_pos) if opening_parenthesis[2] > -1: line_prefix = opening_parenthesis[0][0:opening_parenthesis[2]] macro = Search(r'\b([A-Z_]+)\s*$', line_prefix) if ((macro and macro.group(1) not in ( 'TEST', 'TEST_F', 'MATCHER', 'MATCHER_P', 'TYPED_TEST', 'EXCLUSIVE_LOCKS_REQUIRED', 'SHARED_LOCKS_REQUIRED', 'LOCKS_EXCLUDED', 'INTERFACE_DEF')) or Search(r'\s+=\s*$', line_prefix)): match = None else: # Try matching cases 2-3. match = Match(r'^(.*(?:else|\)\s*const)\s*)\{', line) if not match: # Try matching cases 4-6. These are always matched on separate lines. # # Note that we can't simply concatenate the previous line to the # current line and do a single match, otherwise we may output # duplicate warnings for the blank line case: # if (cond) { # // blank line # } prevline = GetPreviousNonBlankLine(clean_lines, linenum)[0] if prevline and Search(r'[;{}]\s*$', prevline): match = Match(r'^(\s*)\{', line) # Check matching closing brace if match: (endline, endlinenum, endpos) = CloseExpression( clean_lines, linenum, len(match.group(1))) if endpos > -1 and Match(r'^\s*;', endline[endpos:]): # Current {} pair is eligible for semicolon check, and we have found # the redundant semicolon, output warning here. # # Note: because we are scanning forward for opening braces, and # outputting warnings for the matching closing brace, if there are # nested blocks with trailing semicolons, we will get the error # messages in reversed order. error(filename, endlinenum, 'readability/braces', 4, "You don't need a ; after a }") def CheckEmptyBlockBody(filename, clean_lines, linenum, error): """Look for empty loop/conditional body with only a single semicolon. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ # Search for loop keywords at the beginning of the line. Because only # whitespaces are allowed before the keywords, this will also ignore most # do-while-loops, since those lines should start with closing brace. # # We also check "if" blocks here, since an empty conditional block # is likely an error. line = clean_lines.elided[linenum] matched = Match(r'\s*(for|while|if)\s*\(', line) if matched: # Find the end of the conditional expression (end_line, end_linenum, end_pos) = CloseExpression( clean_lines, linenum, line.find('(')) # Output warning if what follows the condition expression is a semicolon. # No warning for all other cases, including whitespace or newline, since we # have a separate check for semicolons preceded by whitespace. if end_pos >= 0 and Match(r';', end_line[end_pos:]): if matched.group(1) == 'if': error(filename, end_linenum, 'whitespace/empty_conditional_body', 5, 'Empty conditional bodies should use {}') else: error(filename, end_linenum, 'whitespace/empty_loop_body', 5, 'Empty loop bodies should use {} or continue') def CheckCheck(filename, clean_lines, linenum, error): """Checks the use of CHECK and EXPECT macros. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ # Decide the set of replacement macros that should be suggested lines = clean_lines.elided check_macro = None start_pos = -1 for macro in _CHECK_MACROS: i = lines[linenum].find(macro) if i >= 0: check_macro = macro # Find opening parenthesis. Do a regular expression match here # to make sure that we are matching the expected CHECK macro, as # opposed to some other macro that happens to contain the CHECK # substring. matched = Match(r'^(.*\b' + check_macro + r'\s*)\(', lines[linenum]) if not matched: continue start_pos = len(matched.group(1)) break if not check_macro or start_pos < 0: # Don't waste time here if line doesn't contain 'CHECK' or 'EXPECT' return # Find end of the boolean expression by matching parentheses (last_line, end_line, end_pos) = CloseExpression( clean_lines, linenum, start_pos) if end_pos < 0: return if linenum == end_line: expression = lines[linenum][start_pos + 1:end_pos - 1] else: expression = lines[linenum][start_pos + 1:] for i in xrange(linenum + 1, end_line): expression += lines[i] expression += last_line[0:end_pos - 1] # Parse expression so that we can take parentheses into account. # This avoids false positives for inputs like "CHECK((a < 4) == b)", # which is not replaceable by CHECK_LE. lhs = '' rhs = '' operator = None while expression: matched = Match(r'^\s*(<<|<<=|>>|>>=|->\*|->|&&|\|\||' r'==|!=|>=|>|<=|<|\()(.*)$', expression) if matched: token = matched.group(1) if token == '(': # Parenthesized operand expression = matched.group(2) (end, _) = FindEndOfExpressionInLine(expression, 0, 1, '(', ')') if end < 0: return # Unmatched parenthesis lhs += '(' + expression[0:end] expression = expression[end:] elif token in ('&&', '||'): # Logical and/or operators. This means the expression # contains more than one term, for example: # CHECK(42 < a && a < b); # # These are not replaceable with CHECK_LE, so bail out early. return elif token in ('<<', '<<=', '>>', '>>=', '->*', '->'): # Non-relational operator lhs += token expression = matched.group(2) else: # Relational operator operator = token rhs = matched.group(2) break else: # Unparenthesized operand. Instead of appending to lhs one character # at a time, we do another regular expression match to consume several # characters at once if possible. Trivial benchmark shows that this # is more efficient when the operands are longer than a single # character, which is generally the case. matched = Match(r'^([^-=!<>()&|]+)(.*)$', expression) if not matched: matched = Match(r'^(\s*\S)(.*)$', expression) if not matched: break lhs += matched.group(1) expression = matched.group(2) # Only apply checks if we got all parts of the boolean expression if not (lhs and operator and rhs): return # Check that rhs do not contain logical operators. We already know # that lhs is fine since the loop above parses out && and ||. if rhs.find('&&') > -1 or rhs.find('||') > -1: return # At least one of the operands must be a constant literal. This is # to avoid suggesting replacements for unprintable things like # CHECK(variable != iterator) # # The following pattern matches decimal, hex integers, strings, and # characters (in that order). lhs = lhs.strip() rhs = rhs.strip() match_constant = r'^([-+]?(\d+|0[xX][0-9a-fA-F]+)[lLuU]{0,3}|".*"|\'.*\')$' if Match(match_constant, lhs) or Match(match_constant, rhs): # Note: since we know both lhs and rhs, we can provide a more # descriptive error message like: # Consider using CHECK_EQ(x, 42) instead of CHECK(x == 42) # Instead of: # Consider using CHECK_EQ instead of CHECK(a == b) # # We are still keeping the less descriptive message because if lhs # or rhs gets long, the error message might become unreadable. error(filename, linenum, 'readability/check', 2, 'Consider using %s instead of %s(a %s b)' % ( _CHECK_REPLACEMENT[check_macro][operator], check_macro, operator)) def CheckAltTokens(filename, clean_lines, linenum, error): """Check alternative keywords being used in boolean expressions. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] # Avoid preprocessor lines if Match(r'^\s*#', line): return # Last ditch effort to avoid multi-line comments. This will not help # if the comment started before the current line or ended after the # current line, but it catches most of the false positives. At least, # it provides a way to workaround this warning for people who use # multi-line comments in preprocessor macros. # # TODO(unknown): remove this once cpplint has better support for # multi-line comments. if line.find('/*') >= 0 or line.find('*/') >= 0: return for match in _ALT_TOKEN_REPLACEMENT_PATTERN.finditer(line): error(filename, linenum, 'readability/alt_tokens', 2, 'Use operator %s instead of %s' % ( _ALT_TOKEN_REPLACEMENT[match.group(1)], match.group(1))) def GetLineWidth(line): """Determines the width of the line in column positions. Args: line: A string, which may be a Unicode string. Returns: The width of the line in column positions, accounting for Unicode combining characters and wide characters. """ if isinstance(line, unicode): width = 0 for uc in unicodedata.normalize('NFC', line): if unicodedata.east_asian_width(uc) in ('W', 'F'): width += 2 elif not unicodedata.combining(uc): width += 1 return width else: return len(line) def CheckStyle(filename, clean_lines, linenum, file_extension, nesting_state, error): """Checks rules from the 'C++ style rules' section of cppguide.html. Most of these rules are hard to test (naming, comment style), but we do what we can. In particular we check for 2-space indents, line lengths, tab usage, spaces inside code, etc. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. file_extension: The extension (without the dot) of the filename. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. """ # Don't use "elided" lines here, otherwise we can't check commented lines. # Don't want to use "raw" either, because we don't want to check inside C++11 # raw strings, raw_lines = clean_lines.lines_without_raw_strings line = raw_lines[linenum] if line.find('\t') != -1: error(filename, linenum, 'whitespace/tab', 1, 'Tab found; better to use spaces') # One or three blank spaces at the beginning of the line is weird; it's # hard to reconcile that with 2-space indents. # NOTE: here are the conditions rob pike used for his tests. Mine aren't # as sophisticated, but it may be worth becoming so: RLENGTH==initial_spaces # if(RLENGTH > 20) complain = 0; # if(match($0, " +(error|private|public|protected):")) complain = 0; # if(match(prev, "&& *$")) complain = 0; # if(match(prev, "\\|\\| *$")) complain = 0; # if(match(prev, "[\",=><] *$")) complain = 0; # if(match($0, " <<")) complain = 0; # if(match(prev, " +for \\(")) complain = 0; # if(prevodd && match(prevprev, " +for \\(")) complain = 0; initial_spaces = 0 cleansed_line = clean_lines.elided[linenum] while initial_spaces < len(line) and line[initial_spaces] == ' ': initial_spaces += 1 if line and line[-1].isspace(): error(filename, linenum, 'whitespace/end_of_line', 4, 'Line ends in whitespace. Consider deleting these extra spaces.') # There are certain situations we allow one space, notably for section labels elif ((initial_spaces == 1 or initial_spaces == 3) and not Match(r'\s*\w+\s*:\s*$', cleansed_line)): error(filename, linenum, 'whitespace/indent', 3, 'Weird number of spaces at line-start. ' 'Are you using a 2-space indent?') # Check if the line is a header guard. is_header_guard = False if file_extension == 'h': cppvar = GetHeaderGuardCPPVariable(filename) if (line.startswith('#ifndef %s' % cppvar) or line.startswith('#define %s' % cppvar) or line.startswith('#endif // %s' % cppvar)): is_header_guard = True # #include lines and header guards can be long, since there's no clean way to # split them. # # URLs can be long too. It's possible to split these, but it makes them # harder to cut&paste. # # The "$Id:...$" comment may also get very long without it being the # developers fault. if (not line.startswith('#include') and not is_header_guard and not Match(r'^\s*//.*http(s?)://\S*$', line) and not Match(r'^// \$Id:.*#[0-9]+ \$$', line)): line_width = GetLineWidth(line) extended_length = int((_line_length * 1.25)) if line_width > extended_length: error(filename, linenum, 'whitespace/line_length', 4, 'Lines should very rarely be longer than %i characters' % extended_length) elif line_width > _line_length: error(filename, linenum, 'whitespace/line_length', 2, 'Lines should be <= %i characters long' % _line_length) if (cleansed_line.count(';') > 1 and # for loops are allowed two ;'s (and may run over two lines). cleansed_line.find('for') == -1 and (GetPreviousNonBlankLine(clean_lines, linenum)[0].find('for') == -1 or GetPreviousNonBlankLine(clean_lines, linenum)[0].find(';') != -1) and # It's ok to have many commands in a switch case that fits in 1 line not ((cleansed_line.find('case ') != -1 or cleansed_line.find('default:') != -1) and cleansed_line.find('break;') != -1)): error(filename, linenum, 'whitespace/newline', 0, 'More than one command on the same line') # Some more style checks CheckBraces(filename, clean_lines, linenum, error) CheckEmptyBlockBody(filename, clean_lines, linenum, error) CheckAccess(filename, clean_lines, linenum, nesting_state, error) CheckSpacing(filename, clean_lines, linenum, nesting_state, error) CheckCheck(filename, clean_lines, linenum, error) CheckAltTokens(filename, clean_lines, linenum, error) classinfo = nesting_state.InnermostClass() if classinfo: CheckSectionSpacing(filename, clean_lines, classinfo, linenum, error) _RE_PATTERN_INCLUDE_NEW_STYLE = re.compile(r'#include +"[^/]+\.h"') _RE_PATTERN_INCLUDE = re.compile(r'^\s*#\s*include\s*([<"])([^>"]*)[>"].*$') # Matches the first component of a filename delimited by -s and _s. That is: # _RE_FIRST_COMPONENT.match('foo').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo-bar_baz.cc').group(0) == 'foo' # _RE_FIRST_COMPONENT.match('foo_bar-baz.cc').group(0) == 'foo' _RE_FIRST_COMPONENT = re.compile(r'^[^-_.]+') def _DropCommonSuffixes(filename): """Drops common suffixes like _test.cc or -inl.h from filename. For example: >>> _DropCommonSuffixes('foo/foo-inl.h') 'foo/foo' >>> _DropCommonSuffixes('foo/bar/foo.cc') 'foo/bar/foo' >>> _DropCommonSuffixes('foo/foo_internal.h') 'foo/foo' >>> _DropCommonSuffixes('foo/foo_unusualinternal.h') 'foo/foo_unusualinternal' Args: filename: The input filename. Returns: The filename with the common suffix removed. """ for suffix in ('test.cc', 'regtest.cc', 'unittest.cc', 'inl.h', 'impl.h', 'internal.h'): if (filename.endswith(suffix) and len(filename) > len(suffix) and filename[-len(suffix) - 1] in ('-', '_')): return filename[:-len(suffix) - 1] return os.path.splitext(filename)[0] def _IsTestFilename(filename): """Determines if the given filename has a suffix that identifies it as a test. Args: filename: The input filename. Returns: True if 'filename' looks like a test, False otherwise. """ if (filename.endswith('_test.cc') or filename.endswith('_unittest.cc') or filename.endswith('_regtest.cc')): return True else: return False def _ClassifyInclude(fileinfo, include, is_system): """Figures out what kind of header 'include' is. Args: fileinfo: The current file cpplint is running over. A FileInfo instance. include: The path to a #included file. is_system: True if the #include used <> rather than "". Returns: One of the _XXX_HEADER constants. For example: >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'stdio.h', True) _C_SYS_HEADER >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'string', True) _CPP_SYS_HEADER >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/foo.h', False) _LIKELY_MY_HEADER >>> _ClassifyInclude(FileInfo('foo/foo_unknown_extension.cc'), ... 'bar/foo_other_ext.h', False) _POSSIBLE_MY_HEADER >>> _ClassifyInclude(FileInfo('foo/foo.cc'), 'foo/bar.h', False) _OTHER_HEADER """ # This is a list of all standard c++ header files, except # those already checked for above. is_cpp_h = include in _CPP_HEADERS if is_system: if is_cpp_h: return _CPP_SYS_HEADER else: return _C_SYS_HEADER # If the target file and the include we're checking share a # basename when we drop common extensions, and the include # lives in . , then it's likely to be owned by the target file. target_dir, target_base = ( os.path.split(_DropCommonSuffixes(fileinfo.RepositoryName()))) include_dir, include_base = os.path.split(_DropCommonSuffixes(include)) if target_base == include_base and ( include_dir == target_dir or include_dir == os.path.normpath(target_dir + '/../public')): return _LIKELY_MY_HEADER # If the target and include share some initial basename # component, it's possible the target is implementing the # include, so it's allowed to be first, but we'll never # complain if it's not there. target_first_component = _RE_FIRST_COMPONENT.match(target_base) include_first_component = _RE_FIRST_COMPONENT.match(include_base) if (target_first_component and include_first_component and target_first_component.group(0) == include_first_component.group(0)): return _POSSIBLE_MY_HEADER return _OTHER_HEADER def CheckIncludeLine(filename, clean_lines, linenum, include_state, error): """Check rules that are applicable to #include lines. Strings on #include lines are NOT removed from elided line, to make certain tasks easier. However, to prevent false positives, checks applicable to #include lines in CheckLanguage must be put here. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. include_state: An _IncludeState instance in which the headers are inserted. error: The function to call with any errors found. """ fileinfo = FileInfo(filename) line = clean_lines.lines[linenum] # "include" should use the new style "foo/bar.h" instead of just "bar.h" if _RE_PATTERN_INCLUDE_NEW_STYLE.search(line): error(filename, linenum, 'build/include', 4, 'Include the directory when naming .h files') # we shouldn't include a file more than once. actually, there are a # handful of instances where doing so is okay, but in general it's # not. match = _RE_PATTERN_INCLUDE.search(line) if match: include = match.group(2) is_system = (match.group(1) == '<') if include in include_state: error(filename, linenum, 'build/include', 4, '"%s" already included at %s:%s' % (include, filename, include_state[include])) else: include_state[include] = linenum # We want to ensure that headers appear in the right order: # 1) for foo.cc, foo.h (preferred location) # 2) c system files # 3) cpp system files # 4) for foo.cc, foo.h (deprecated location) # 5) other google headers # # We classify each include statement as one of those 5 types # using a number of techniques. The include_state object keeps # track of the highest type seen, and complains if we see a # lower type after that. error_message = include_state.CheckNextIncludeOrder( _ClassifyInclude(fileinfo, include, is_system)) if error_message: error(filename, linenum, 'build/include_order', 4, '%s. Should be: %s.h, c system, c++ system, other.' % (error_message, fileinfo.BaseName())) canonical_include = include_state.CanonicalizeAlphabeticalOrder(include) if not include_state.IsInAlphabeticalOrder( clean_lines, linenum, canonical_include): error(filename, linenum, 'build/include_alpha', 4, 'Include "%s" not in alphabetical order' % include) include_state.SetLastHeader(canonical_include) # Look for any of the stream classes that are part of standard C++. match = _RE_PATTERN_INCLUDE.match(line) if match: include = match.group(2) if Match(r'(f|ind|io|i|o|parse|pf|stdio|str|)?stream$', include): # Many unit tests use cout, so we exempt them. if not _IsTestFilename(filename): error(filename, linenum, 'readability/streams', 3, 'Streams are highly discouraged.') def _GetTextInside(text, start_pattern): r"""Retrieves all the text between matching open and close parentheses. Given a string of lines and a regular expression string, retrieve all the text following the expression and between opening punctuation symbols like (, [, or {, and the matching close-punctuation symbol. This properly nested occurrences of the punctuations, so for the text like printf(a(), b(c())); a call to _GetTextInside(text, r'printf\(') will return 'a(), b(c())'. start_pattern must match string having an open punctuation symbol at the end. Args: text: The lines to extract text. Its comments and strings must be elided. It can be single line and can span multiple lines. start_pattern: The regexp string indicating where to start extracting the text. Returns: The extracted text. None if either the opening string or ending punctuation could not be found. """ # TODO(sugawarayu): Audit cpplint.py to see what places could be profitably # rewritten to use _GetTextInside (and use inferior regexp matching today). # Give opening punctuations to get the matching close-punctuations. matching_punctuation = {'(': ')', '{': '}', '[': ']'} closing_punctuation = set(matching_punctuation.itervalues()) # Find the position to start extracting text. match = re.search(start_pattern, text, re.M) if not match: # start_pattern not found in text. return None start_position = match.end(0) assert start_position > 0, ( 'start_pattern must ends with an opening punctuation.') assert text[start_position - 1] in matching_punctuation, ( 'start_pattern must ends with an opening punctuation.') # Stack of closing punctuations we expect to have in text after position. punctuation_stack = [matching_punctuation[text[start_position - 1]]] position = start_position while punctuation_stack and position < len(text): if text[position] == punctuation_stack[-1]: punctuation_stack.pop() elif text[position] in closing_punctuation: # A closing punctuation without matching opening punctuations. return None elif text[position] in matching_punctuation: punctuation_stack.append(matching_punctuation[text[position]]) position += 1 if punctuation_stack: # Opening punctuations left without matching close-punctuations. return None # punctuations match. return text[start_position:position - 1] # Patterns for matching call-by-reference parameters. # # Supports nested templates up to 2 levels deep using this messy pattern: # < (?: < (?: < [^<>]* # > # | [^<>] )* # > # | [^<>] )* # > _RE_PATTERN_IDENT = r'[_a-zA-Z]\w*' # =~ [[:alpha:]][[:alnum:]]* _RE_PATTERN_TYPE = ( r'(?:const\s+)?(?:typename\s+|class\s+|struct\s+|union\s+|enum\s+)?' r'(?:\w|' r'\s*<(?:<(?:<[^<>]*>|[^<>])*>|[^<>])*>|' r'::)+') # A call-by-reference parameter ends with '& identifier'. _RE_PATTERN_REF_PARAM = re.compile( r'(' + _RE_PATTERN_TYPE + r'(?:\s*(?:\bconst\b|[*]))*\s*' r'&\s*' + _RE_PATTERN_IDENT + r')\s*(?:=[^,()]+)?[,)]') # A call-by-const-reference parameter either ends with 'const& identifier' # or looks like 'const type& identifier' when 'type' is atomic. _RE_PATTERN_CONST_REF_PARAM = ( r'(?:.*\s*\bconst\s*&\s*' + _RE_PATTERN_IDENT + r'|const\s+' + _RE_PATTERN_TYPE + r'\s*&\s*' + _RE_PATTERN_IDENT + r')') def CheckLanguage(filename, clean_lines, linenum, file_extension, include_state, nesting_state, error): """Checks rules from the 'C++ language rules' section of cppguide.html. Some of these rules are hard to test (function overloading, using uint32 inappropriately), but we do the best we can. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. file_extension: The extension (without the dot) of the filename. include_state: An _IncludeState instance in which the headers are inserted. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. """ # If the line is empty or consists of entirely a comment, no need to # check it. line = clean_lines.elided[linenum] if not line: return match = _RE_PATTERN_INCLUDE.search(line) if match: CheckIncludeLine(filename, clean_lines, linenum, include_state, error) return # Reset include state across preprocessor directives. This is meant # to silence warnings for conditional includes. if Match(r'^\s*#\s*(?:ifdef|elif|else|endif)\b', line): include_state.ResetSection() # Make Windows paths like Unix. fullname = os.path.abspath(filename).replace('\\', '/') # TODO(unknown): figure out if they're using default arguments in fn proto. # Check to see if they're using an conversion function cast. # I just try to capture the most common basic types, though there are more. # Parameterless conversion functions, such as bool(), are allowed as they are # probably a member operator declaration or default constructor. match = Search( r'(\bnew\s+)?\b' # Grab 'new' operator, if it's there r'(int|float|double|bool|char|int32|uint32|int64|uint64)' r'(\([^)].*)', line) if match: matched_new = match.group(1) matched_type = match.group(2) matched_funcptr = match.group(3) # gMock methods are defined using some variant of MOCK_METHODx(name, type) # where type may be float(), int(string), etc. Without context they are # virtually indistinguishable from int(x) casts. Likewise, gMock's # MockCallback takes a template parameter of the form return_type(arg_type), # which looks much like the cast we're trying to detect. # # std::function<> wrapper has a similar problem. # # Return types for function pointers also look like casts if they # don't have an extra space. if (matched_new is None and # If new operator, then this isn't a cast not (Match(r'^\s*MOCK_(CONST_)?METHOD\d+(_T)?\(', line) or Search(r'\bMockCallback<.*>', line) or Search(r'\bstd::function<.*>', line)) and not (matched_funcptr and Match(r'\((?:[^() ]+::\s*\*\s*)?[^() ]+\)\s*\(', matched_funcptr))): # Try a bit harder to catch gmock lines: the only place where # something looks like an old-style cast is where we declare the # return type of the mocked method, and the only time when we # are missing context is if MOCK_METHOD was split across # multiple lines. The missing MOCK_METHOD is usually one or two # lines back, so scan back one or two lines. # # It's not possible for gmock macros to appear in the first 2 # lines, since the class head + section name takes up 2 lines. if (linenum < 2 or not (Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\((?:\S+,)?\s*$', clean_lines.elided[linenum - 1]) or Match(r'^\s*MOCK_(?:CONST_)?METHOD\d+(?:_T)?\(\s*$', clean_lines.elided[linenum - 2]))): error(filename, linenum, 'readability/casting', 4, 'Using deprecated casting style. ' 'Use static_cast<%s>(...) instead' % matched_type) CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], 'static_cast', r'\((int|float|double|bool|char|u?int(16|32|64))\)', error) # This doesn't catch all cases. Consider (const char * const)"hello". # # (char *) "foo" should always be a const_cast (reinterpret_cast won't # compile). if CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], 'const_cast', r'\((char\s?\*+\s?)\)\s*"', error): pass else: # Check pointer casts for other than string constants CheckCStyleCast(filename, linenum, line, clean_lines.raw_lines[linenum], 'reinterpret_cast', r'\((\w+\s?\*+\s?)\)', error) # In addition, we look for people taking the address of a cast. This # is dangerous -- casts can assign to temporaries, so the pointer doesn't # point where you think. match = Search( r'(?:&\(([^)]+)\)[\w(])|' r'(?:&(static|dynamic|down|reinterpret)_cast\b)', line) if match and match.group(1) != '*': error(filename, linenum, 'runtime/casting', 4, ('Are you taking an address of a cast? ' 'This is dangerous: could be a temp var. ' 'Take the address before doing the cast, rather than after')) # Create an extended_line, which is the concatenation of the current and # next lines, for more effective checking of code that may span more than one # line. if linenum + 1 < clean_lines.NumLines(): extended_line = line + clean_lines.elided[linenum + 1] else: extended_line = line # Check for people declaring static/global STL strings at the top level. # This is dangerous because the C++ language does not guarantee that # globals with constructors are initialized before the first access. match = Match( r'((?:|static +)(?:|const +))string +([a-zA-Z0-9_:]+)\b(.*)', line) # Make sure it's not a function. # Function template specialization looks like: "string foo(...". # Class template definitions look like: "string Foo::Method(...". # # Also ignore things that look like operators. These are matched separately # because operator names cross non-word boundaries. If we change the pattern # above, we would decrease the accuracy of matching identifiers. if (match and not Search(r'\boperator\W', line) and not Match(r'\s*(<.*>)?(::[a-zA-Z0-9_]+)?\s*\(([^"]|$)', match.group(3))): error(filename, linenum, 'runtime/string', 4, 'For a static/global string constant, use a C style string instead: ' '"%schar %s[]".' % (match.group(1), match.group(2))) if Search(r'\b([A-Za-z0-9_]*_)\(\1\)', line): error(filename, linenum, 'runtime/init', 4, 'You seem to be initializing a member variable with itself.') if file_extension == 'h': # TODO(unknown): check that 1-arg constructors are explicit. # How to tell it's a constructor? # (handled in CheckForNonStandardConstructs for now) # TODO(unknown): check that classes have DISALLOW_EVIL_CONSTRUCTORS # (level 1 error) pass # Check if people are using the verboten C basic types. The only exception # we regularly allow is "unsigned short port" for port. if Search(r'\bshort port\b', line): if not Search(r'\bunsigned short port\b', line): error(filename, linenum, 'runtime/int', 4, 'Use "unsigned short" for ports, not "short"') else: match = Search(r'\b(short|long(?! +double)|long long)\b', line) if match: error(filename, linenum, 'runtime/int', 4, 'Use int16/int64/etc, rather than the C type %s' % match.group(1)) # When snprintf is used, the second argument shouldn't be a literal. match = Search(r'snprintf\s*\(([^,]*),\s*([0-9]*)\s*,', line) if match and match.group(2) != '0': # If 2nd arg is zero, snprintf is used to calculate size. error(filename, linenum, 'runtime/printf', 3, 'If you can, use sizeof(%s) instead of %s as the 2nd arg ' 'to snprintf.' % (match.group(1), match.group(2))) # Check if some verboten C functions are being used. if Search(r'\bsprintf\b', line): error(filename, linenum, 'runtime/printf', 5, 'Never use sprintf. Use snprintf instead.') match = Search(r'\b(strcpy|strcat)\b', line) if match: error(filename, linenum, 'runtime/printf', 4, 'Almost always, snprintf is better than %s' % match.group(1)) # Check if some verboten operator overloading is going on # TODO(unknown): catch out-of-line unary operator&: # class X {}; # int operator&(const X& x) { return 42; } // unary operator& # The trick is it's hard to tell apart from binary operator&: # class Y { int operator&(const Y& x) { return 23; } }; // binary operator& if Search(r'\boperator\s*&\s*\(\s*\)', line): error(filename, linenum, 'runtime/operator', 4, 'Unary operator& is dangerous. Do not use it.') # Check for suspicious usage of "if" like # } if (a == b) { if Search(r'\}\s*if\s*\(', line): error(filename, linenum, 'readability/braces', 4, 'Did you mean "else if"? If not, start a new line for "if".') # Check for potential format string bugs like printf(foo). # We constrain the pattern not to pick things like DocidForPrintf(foo). # Not perfect but it can catch printf(foo.c_str()) and printf(foo->c_str()) # TODO(sugawarayu): Catch the following case. Need to change the calling # convention of the whole function to process multiple line to handle it. # printf( # boy_this_is_a_really_long_variable_that_cannot_fit_on_the_prev_line); printf_args = _GetTextInside(line, r'(?i)\b(string)?printf\s*\(') if printf_args: match = Match(r'([\w.\->()]+)$', printf_args) if match and match.group(1) != '__VA_ARGS__': function_name = re.search(r'\b((?:string)?printf)\s*\(', line, re.I).group(1) error(filename, linenum, 'runtime/printf', 4, 'Potential format string bug. Do %s("%%s", %s) instead.' % (function_name, match.group(1))) # Check for potential memset bugs like memset(buf, sizeof(buf), 0). match = Search(r'memset\s*\(([^,]*),\s*([^,]*),\s*0\s*\)', line) if match and not Match(r"^''|-?[0-9]+|0x[0-9A-Fa-f]$", match.group(2)): error(filename, linenum, 'runtime/memset', 4, 'Did you mean "memset(%s, 0, %s)"?' % (match.group(1), match.group(2))) if Search(r'\busing namespace\b', line): error(filename, linenum, 'build/namespaces', 5, 'Do not use namespace using-directives. ' 'Use using-declarations instead.') # Detect variable-length arrays. match = Match(r'\s*(.+::)?(\w+) [a-z]\w*\[(.+)];', line) if (match and match.group(2) != 'return' and match.group(2) != 'delete' and match.group(3).find(']') == -1): # Split the size using space and arithmetic operators as delimiters. # If any of the resulting tokens are not compile time constants then # report the error. tokens = re.split(r'\s|\+|\-|\*|\/|<<|>>]', match.group(3)) is_const = True skip_next = False for tok in tokens: if skip_next: skip_next = False continue if Search(r'sizeof\(.+\)', tok): continue if Search(r'arraysize\(\w+\)', tok): continue tok = tok.lstrip('(') tok = tok.rstrip(')') if not tok: continue if Match(r'\d+', tok): continue if Match(r'0[xX][0-9a-fA-F]+', tok): continue if Match(r'k[A-Z0-9]\w*', tok): continue if Match(r'(.+::)?k[A-Z0-9]\w*', tok): continue if Match(r'(.+::)?[A-Z][A-Z0-9_]*', tok): continue # A catch all for tricky sizeof cases, including 'sizeof expression', # 'sizeof(*type)', 'sizeof(const type)', 'sizeof(struct StructName)' # requires skipping the next token because we split on ' ' and '*'. if tok.startswith('sizeof'): skip_next = True continue is_const = False break if not is_const: error(filename, linenum, 'runtime/arrays', 1, 'Do not use variable-length arrays. Use an appropriately named ' "('k' followed by CamelCase) compile-time constant for the size.") # If DISALLOW_EVIL_CONSTRUCTORS, DISALLOW_COPY_AND_ASSIGN, or # DISALLOW_IMPLICIT_CONSTRUCTORS is present, then it should be the last thing # in the class declaration. match = Match( (r'\s*' r'(DISALLOW_(EVIL_CONSTRUCTORS|COPY_AND_ASSIGN|IMPLICIT_CONSTRUCTORS))' r'\(.*\);$'), line) if match and linenum + 1 < clean_lines.NumLines(): next_line = clean_lines.elided[linenum + 1] # We allow some, but not all, declarations of variables to be present # in the statement that defines the class. The [\w\*,\s]* fragment of # the regular expression below allows users to declare instances of # the class or pointers to instances, but not less common types such # as function pointers or arrays. It's a tradeoff between allowing # reasonable code and avoiding trying to parse more C++ using regexps. if not Search(r'^\s*}[\w\*,\s]*;', next_line): error(filename, linenum, 'readability/constructors', 3, match.group(1) + ' should be the last thing in the class') # Check for use of unnamed namespaces in header files. Registration # macros are typically OK, so we allow use of "namespace {" on lines # that end with backslashes. if (file_extension == 'h' and Search(r'\bnamespace\s*{', line) and line[-1] != '\\'): error(filename, linenum, 'build/namespaces', 4, 'Do not use unnamed namespaces in header files. See ' 'http://google-styleguide.googlecode.com/svn/trunk/cppguide.xml#Namespaces' ' for more information.') def CheckForNonConstReference(filename, clean_lines, linenum, nesting_state, error): """Check for non-const references. Separate from CheckLanguage since it scans backwards from current line, instead of scanning forward. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: The function to call with any errors found. """ # Do nothing if there is no '&' on current line. line = clean_lines.elided[linenum] if '&' not in line: return # Long type names may be broken across multiple lines, usually in one # of these forms: # LongType # ::LongTypeContinued &identifier # LongType:: # LongTypeContinued &identifier # LongType< # ...>::LongTypeContinued &identifier # # If we detected a type split across two lines, join the previous # line to current line so that we can match const references # accordingly. # # Note that this only scans back one line, since scanning back # arbitrary number of lines would be expensive. If you have a type # that spans more than 2 lines, please use a typedef. if linenum > 1: previous = None if Match(r'\s*::(?:[\w<>]|::)+\s*&\s*\S', line): # previous_line\n + ::current_line previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+[\w<>])\s*$', clean_lines.elided[linenum - 1]) elif Match(r'\s*[a-zA-Z_]([\w<>]|::)+\s*&\s*\S', line): # previous_line::\n + current_line previous = Search(r'\b((?:const\s*)?(?:[\w<>]|::)+::)\s*$', clean_lines.elided[linenum - 1]) if previous: line = previous.group(1) + line.lstrip() else: # Check for templated parameter that is split across multiple lines endpos = line.rfind('>') if endpos > -1: (_, startline, startpos) = ReverseCloseExpression( clean_lines, linenum, endpos) if startpos > -1 and startline < linenum: # Found the matching < on an earlier line, collect all # pieces up to current line. line = '' for i in xrange(startline, linenum + 1): line += clean_lines.elided[i].strip() # Check for non-const references in function parameters. A single '&' may # found in the following places: # inside expression: binary & for bitwise AND # inside expression: unary & for taking the address of something # inside declarators: reference parameter # We will exclude the first two cases by checking that we are not inside a # function body, including one that was just introduced by a trailing '{'. # TODO(unknwon): Doesn't account for preprocessor directives. # TODO(unknown): Doesn't account for 'catch(Exception& e)' [rare]. check_params = False if not nesting_state.stack: check_params = True # top level elif (isinstance(nesting_state.stack[-1], _ClassInfo) or isinstance(nesting_state.stack[-1], _NamespaceInfo)): check_params = True # within class or namespace elif Match(r'.*{\s*$', line): if (len(nesting_state.stack) == 1 or isinstance(nesting_state.stack[-2], _ClassInfo) or isinstance(nesting_state.stack[-2], _NamespaceInfo)): check_params = True # just opened global/class/namespace block # We allow non-const references in a few standard places, like functions # called "swap()" or iostream operators like "<<" or ">>". Do not check # those function parameters. # # We also accept & in static_assert, which looks like a function but # it's actually a declaration expression. whitelisted_functions = (r'(?:[sS]wap(?:<\w:+>)?|' r'operator\s*[<>][<>]|' r'static_assert|COMPILE_ASSERT' r')\s*\(') if Search(whitelisted_functions, line): check_params = False elif not Search(r'\S+\([^)]*$', line): # Don't see a whitelisted function on this line. Actually we # didn't see any function name on this line, so this is likely a # multi-line parameter list. Try a bit harder to catch this case. for i in xrange(2): if (linenum > i and Search(whitelisted_functions, clean_lines.elided[linenum - i - 1])): check_params = False break if check_params: decls = ReplaceAll(r'{[^}]*}', ' ', line) # exclude function body for parameter in re.findall(_RE_PATTERN_REF_PARAM, decls): if not Match(_RE_PATTERN_CONST_REF_PARAM, parameter): error(filename, linenum, 'runtime/references', 2, 'Is this a non-const reference? ' 'If so, make const or use a pointer: ' + ReplaceAll(' *<', '<', parameter)) def CheckCStyleCast(filename, linenum, line, raw_line, cast_type, pattern, error): """Checks for a C-style cast by looking for the pattern. Args: filename: The name of the current file. linenum: The number of the line to check. line: The line of code to check. raw_line: The raw line of code to check, with comments. cast_type: The string for the C++ cast to recommend. This is either reinterpret_cast, static_cast, or const_cast, depending. pattern: The regular expression used to find C-style casts. error: The function to call with any errors found. Returns: True if an error was emitted. False otherwise. """ match = Search(pattern, line) if not match: return False # e.g., sizeof(int) sizeof_match = Match(r'.*sizeof\s*$', line[0:match.start(1) - 1]) if sizeof_match: error(filename, linenum, 'runtime/sizeof', 1, 'Using sizeof(type). Use sizeof(varname) instead if possible') return True # operator++(int) and operator--(int) if (line[0:match.start(1) - 1].endswith(' operator++') or line[0:match.start(1) - 1].endswith(' operator--')): return False # A single unnamed argument for a function tends to look like old # style cast. If we see those, don't issue warnings for deprecated # casts, instead issue warnings for unnamed arguments where # appropriate. # # These are things that we want warnings for, since the style guide # explicitly require all parameters to be named: # Function(int); # Function(int) { # ConstMember(int) const; # ConstMember(int) const { # ExceptionMember(int) throw (...); # ExceptionMember(int) throw (...) { # PureVirtual(int) = 0; # # These are functions of some sort, where the compiler would be fine # if they had named parameters, but people often omit those # identifiers to reduce clutter: # (FunctionPointer)(int); # (FunctionPointer)(int) = value; # Function((function_pointer_arg)(int)) # ; # <(FunctionPointerTemplateArgument)(int)>; remainder = line[match.end(0):] if Match(r'^\s*(?:;|const\b|throw\b|=|>|\{|\))', remainder): # Looks like an unnamed parameter. # Don't warn on any kind of template arguments. if Match(r'^\s*>', remainder): return False # Don't warn on assignments to function pointers, but keep warnings for # unnamed parameters to pure virtual functions. Note that this pattern # will also pass on assignments of "0" to function pointers, but the # preferred values for those would be "nullptr" or "NULL". matched_zero = Match(r'^\s=\s*(\S+)\s*;', remainder) if matched_zero and matched_zero.group(1) != '0': return False # Don't warn on function pointer declarations. For this we need # to check what came before the "(type)" string. if Match(r'.*\)\s*$', line[0:match.start(0)]): return False # Don't warn if the parameter is named with block comments, e.g.: # Function(int /*unused_param*/); if '/*' in raw_line: return False # Passed all filters, issue warning here. error(filename, linenum, 'readability/function', 3, 'All parameters should be named in a function') return True # At this point, all that should be left is actual casts. error(filename, linenum, 'readability/casting', 4, 'Using C-style cast. Use %s<%s>(...) instead' % (cast_type, match.group(1))) return True _HEADERS_CONTAINING_TEMPLATES = ( ('', ('deque',)), ('', ('unary_function', 'binary_function', 'plus', 'minus', 'multiplies', 'divides', 'modulus', 'negate', 'equal_to', 'not_equal_to', 'greater', 'less', 'greater_equal', 'less_equal', 'logical_and', 'logical_or', 'logical_not', 'unary_negate', 'not1', 'binary_negate', 'not2', 'bind1st', 'bind2nd', 'pointer_to_unary_function', 'pointer_to_binary_function', 'ptr_fun', 'mem_fun_t', 'mem_fun', 'mem_fun1_t', 'mem_fun1_ref_t', 'mem_fun_ref_t', 'const_mem_fun_t', 'const_mem_fun1_t', 'const_mem_fun_ref_t', 'const_mem_fun1_ref_t', 'mem_fun_ref', )), ('', ('numeric_limits',)), ('', ('list',)), ('', ('map', 'multimap',)), ('', ('allocator',)), ('', ('queue', 'priority_queue',)), ('', ('set', 'multiset',)), ('', ('stack',)), ('', ('char_traits', 'basic_string',)), ('', ('pair',)), ('', ('vector',)), # gcc extensions. # Note: std::hash is their hash, ::hash is our hash ('', ('hash_map', 'hash_multimap',)), ('', ('hash_set', 'hash_multiset',)), ('', ('slist',)), ) _RE_PATTERN_STRING = re.compile(r'\bstring\b') _re_pattern_algorithm_header = [] for _template in ('copy', 'max', 'min', 'min_element', 'sort', 'swap', 'transform'): # Match max(..., ...), max(..., ...), but not foo->max, foo.max or # type::max(). _re_pattern_algorithm_header.append( (re.compile(r'[^>.]\b' + _template + r'(<.*?>)?\([^\)]'), _template, '')) _re_pattern_templates = [] for _header, _templates in _HEADERS_CONTAINING_TEMPLATES: for _template in _templates: _re_pattern_templates.append( (re.compile(r'(\<|\b)' + _template + r'\s*\<'), _template + '<>', _header)) def FilesBelongToSameModule(filename_cc, filename_h): """Check if these two filenames belong to the same module. The concept of a 'module' here is a as follows: foo.h, foo-inl.h, foo.cc, foo_test.cc and foo_unittest.cc belong to the same 'module' if they are in the same directory. some/path/public/xyzzy and some/path/internal/xyzzy are also considered to belong to the same module here. If the filename_cc contains a longer path than the filename_h, for example, '/absolute/path/to/base/sysinfo.cc', and this file would include 'base/sysinfo.h', this function also produces the prefix needed to open the header. This is used by the caller of this function to more robustly open the header file. We don't have access to the real include paths in this context, so we need this guesswork here. Known bugs: tools/base/bar.cc and base/bar.h belong to the same module according to this implementation. Because of this, this function gives some false positives. This should be sufficiently rare in practice. Args: filename_cc: is the path for the .cc file filename_h: is the path for the header path Returns: Tuple with a bool and a string: bool: True if filename_cc and filename_h belong to the same module. string: the additional prefix needed to open the header file. """ if not filename_cc.endswith('.cc'): return (False, '') filename_cc = filename_cc[:-len('.cc')] if filename_cc.endswith('_unittest'): filename_cc = filename_cc[:-len('_unittest')] elif filename_cc.endswith('_test'): filename_cc = filename_cc[:-len('_test')] filename_cc = filename_cc.replace('/public/', '/') filename_cc = filename_cc.replace('/internal/', '/') if not filename_h.endswith('.h'): return (False, '') filename_h = filename_h[:-len('.h')] if filename_h.endswith('-inl'): filename_h = filename_h[:-len('-inl')] filename_h = filename_h.replace('/public/', '/') filename_h = filename_h.replace('/internal/', '/') files_belong_to_same_module = filename_cc.endswith(filename_h) common_path = '' if files_belong_to_same_module: common_path = filename_cc[:-len(filename_h)] return files_belong_to_same_module, common_path def UpdateIncludeState(filename, include_state, io=codecs): """Fill up the include_state with new includes found from the file. Args: filename: the name of the header to read. include_state: an _IncludeState instance in which the headers are inserted. io: The io factory to use to read the file. Provided for testability. Returns: True if a header was succesfully added. False otherwise. """ headerfile = None try: headerfile = io.open(filename, 'r', 'utf8', 'replace') except IOError: return False linenum = 0 for line in headerfile: linenum += 1 clean_line = CleanseComments(line) match = _RE_PATTERN_INCLUDE.search(clean_line) if match: include = match.group(2) # The value formatting is cute, but not really used right now. # What matters here is that the key is in include_state. include_state.setdefault(include, '%s:%d' % (filename, linenum)) return True def CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error, io=codecs): """Reports for missing stl includes. This function will output warnings to make sure you are including the headers necessary for the stl containers and functions that you use. We only give one reason to include a header. For example, if you use both equal_to<> and less<> in a .h file, only one (the latter in the file) of these will be reported as a reason to include the . Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. include_state: An _IncludeState instance. error: The function to call with any errors found. io: The IO factory to use to read the header file. Provided for unittest injection. """ required = {} # A map of header name to linenumber and the template entity. # Example of required: { '': (1219, 'less<>') } for linenum in xrange(clean_lines.NumLines()): line = clean_lines.elided[linenum] if not line or line[0] == '#': continue # String is special -- it is a non-templatized type in STL. matched = _RE_PATTERN_STRING.search(line) if matched: # Don't warn about strings in non-STL namespaces: # (We check only the first match per line; good enough.) prefix = line[:matched.start()] if prefix.endswith('std::') or not prefix.endswith('::'): required[''] = (linenum, 'string') for pattern, template, header in _re_pattern_algorithm_header: if pattern.search(line): required[header] = (linenum, template) # The following function is just a speed up, no semantics are changed. if not '<' in line: # Reduces the cpu time usage by skipping lines. continue for pattern, template, header in _re_pattern_templates: if pattern.search(line): required[header] = (linenum, template) # The policy is that if you #include something in foo.h you don't need to # include it again in foo.cc. Here, we will look at possible includes. # Let's copy the include_state so it is only messed up within this function. include_state = include_state.copy() # Did we find the header for this file (if any) and succesfully load it? header_found = False # Use the absolute path so that matching works properly. abs_filename = FileInfo(filename).FullName() # For Emacs's flymake. # If cpplint is invoked from Emacs's flymake, a temporary file is generated # by flymake and that file name might end with '_flymake.cc'. In that case, # restore original file name here so that the corresponding header file can be # found. # e.g. If the file name is 'foo_flymake.cc', we should search for 'foo.h' # instead of 'foo_flymake.h' abs_filename = re.sub(r'_flymake\.cc$', '.cc', abs_filename) # include_state is modified during iteration, so we iterate over a copy of # the keys. header_keys = include_state.keys() for header in header_keys: (same_module, common_path) = FilesBelongToSameModule(abs_filename, header) fullpath = common_path + header if same_module and UpdateIncludeState(fullpath, include_state, io): header_found = True # If we can't find the header file for a .cc, assume it's because we don't # know where to look. In that case we'll give up as we're not sure they # didn't include it in the .h file. # TODO(unknown): Do a better job of finding .h files so we are confident that # not having the .h file means there isn't one. if filename.endswith('.cc') and not header_found: return # All the lines have been processed, report the errors found. for required_header_unstripped in required: template = required[required_header_unstripped][1] if required_header_unstripped.strip('<>"') not in include_state: error(filename, required[required_header_unstripped][0], 'build/include_what_you_use', 4, 'Add #include ' + required_header_unstripped + ' for ' + template) _RE_PATTERN_EXPLICIT_MAKEPAIR = re.compile(r'\bmake_pair\s*<') def CheckMakePairUsesDeduction(filename, clean_lines, linenum, error): """Check that make_pair's template arguments are deduced. G++ 4.6 in C++0x mode fails badly if make_pair's template arguments are specified explicitly, and such use isn't intended in any case. Args: filename: The name of the current file. clean_lines: A CleansedLines instance containing the file. linenum: The number of the line to check. error: The function to call with any errors found. """ line = clean_lines.elided[linenum] match = _RE_PATTERN_EXPLICIT_MAKEPAIR.search(line) if match: error(filename, linenum, 'build/explicit_make_pair', 4, # 4 = high confidence 'For C++11-compatibility, omit template arguments from make_pair' ' OR use pair directly OR if appropriate, construct a pair directly') def ProcessLine(filename, file_extension, clean_lines, line, include_state, function_state, nesting_state, error, extra_check_functions=[]): """Processes a single line in the file. Args: filename: Filename of the file that is being processed. file_extension: The extension (dot not included) of the file. clean_lines: An array of strings, each representing a line of the file, with comments stripped. line: Number of line being processed. include_state: An _IncludeState instance in which the headers are inserted. function_state: A _FunctionState instance which counts function lines, etc. nesting_state: A _NestingState instance which maintains information about the current stack of nested blocks being parsed. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message extra_check_functions: An array of additional check functions that will be run on each source line. Each function takes 4 arguments: filename, clean_lines, line, error """ raw_lines = clean_lines.raw_lines ParseNolintSuppressions(filename, raw_lines[line], line, error) nesting_state.Update(filename, clean_lines, line, error) if nesting_state.stack and nesting_state.stack[-1].inline_asm != _NO_ASM: return CheckForFunctionLengths(filename, clean_lines, line, function_state, error) CheckForMultilineCommentsAndStrings(filename, clean_lines, line, error) CheckStyle(filename, clean_lines, line, file_extension, nesting_state, error) CheckLanguage(filename, clean_lines, line, file_extension, include_state, nesting_state, error) CheckForNonConstReference(filename, clean_lines, line, nesting_state, error) CheckForNonStandardConstructs(filename, clean_lines, line, nesting_state, error) CheckVlogArguments(filename, clean_lines, line, error) CheckPosixThreading(filename, clean_lines, line, error) CheckInvalidIncrement(filename, clean_lines, line, error) CheckMakePairUsesDeduction(filename, clean_lines, line, error) for check_fn in extra_check_functions: check_fn(filename, clean_lines, line, error) def ProcessFileData(filename, file_extension, lines, error, extra_check_functions=[]): """Performs lint checks and reports any errors to the given error function. Args: filename: Filename of the file that is being processed. file_extension: The extension (dot not included) of the file. lines: An array of strings, each representing a line of the file, with the last element being empty if the file is terminated with a newline. error: A callable to which errors are reported, which takes 4 arguments: filename, line number, error level, and message extra_check_functions: An array of additional check functions that will be run on each source line. Each function takes 4 arguments: filename, clean_lines, line, error """ lines = (['// marker so line numbers and indices both start at 1'] + lines + ['// marker so line numbers end in a known way']) include_state = _IncludeState() function_state = _FunctionState() nesting_state = _NestingState() ResetNolintSuppressions() CheckForCopyright(filename, lines, error) if file_extension == 'h': CheckForHeaderGuard(filename, lines, error) RemoveMultiLineComments(filename, lines, error) clean_lines = CleansedLines(lines) for line in xrange(clean_lines.NumLines()): ProcessLine(filename, file_extension, clean_lines, line, include_state, function_state, nesting_state, error, extra_check_functions) nesting_state.CheckCompletedBlocks(filename, error) CheckForIncludeWhatYouUse(filename, clean_lines, include_state, error) # We check here rather than inside ProcessLine so that we see raw # lines rather than "cleaned" lines. CheckForBadCharacters(filename, lines, error) CheckForNewlineAtEOF(filename, lines, error) def ProcessFile(filename, vlevel, extra_check_functions=[]): """Does google-lint on a single file. Args: filename: The name of the file to parse. vlevel: The level of errors to report. Every error of confidence >= verbose_level will be reported. 0 is a good default. extra_check_functions: An array of additional check functions that will be run on each source line. Each function takes 4 arguments: filename, clean_lines, line, error """ _SetVerboseLevel(vlevel) try: # Support the UNIX convention of using "-" for stdin. Note that # we are not opening the file with universal newline support # (which codecs doesn't support anyway), so the resulting lines do # contain trailing '\r' characters if we are reading a file that # has CRLF endings. # If after the split a trailing '\r' is present, it is removed # below. If it is not expected to be present (i.e. os.linesep != # '\r\n' as in Windows), a warning is issued below if this file # is processed. if filename == '-': lines = codecs.StreamReaderWriter(sys.stdin, codecs.getreader('utf8'), codecs.getwriter('utf8'), 'replace').read().split('\n') else: lines = codecs.open(filename, 'r', 'utf8', 'replace').read().split('\n') carriage_return_found = False # Remove trailing '\r'. for linenum in range(len(lines)): if lines[linenum].endswith('\r'): lines[linenum] = lines[linenum].rstrip('\r') carriage_return_found = True except IOError: sys.stderr.write( "Skipping input '%s': Can't open for reading\n" % filename) return # Note, if no dot is found, this will give the entire filename as the ext. file_extension = filename[filename.rfind('.') + 1:] # When reading from stdin, the extension is unknown, so no cpplint tests # should rely on the extension. if filename != '-' and file_extension not in _valid_extensions: sys.stderr.write('Ignoring %s; not a valid file name ' '(%s)\n' % (filename, ', '.join(_valid_extensions))) else: ProcessFileData(filename, file_extension, lines, Error, extra_check_functions) if carriage_return_found and os.linesep != '\r\n': # Use 0 for linenum since outputting only one error for potentially # several lines. Error(filename, 0, 'whitespace/newline', 1, 'One or more unexpected \\r (^M) found;' 'better to use only a \\n') sys.stderr.write('Done processing %s\n' % filename) def PrintUsage(message): """Prints a brief usage string and exits, optionally with an error message. Args: message: The optional error message. """ sys.stderr.write(_USAGE) if message: sys.exit('\nFATAL ERROR: ' + message) else: sys.exit(1) def PrintCategories(): """Prints a list of all the error-categories used by error messages. These are the categories used to filter messages via --filter. """ sys.stderr.write(''.join(' %s\n' % cat for cat in _ERROR_CATEGORIES)) sys.exit(0) def ParseArguments(args): """Parses the command line arguments. This may set the output format and verbosity level as side-effects. Args: args: The command line arguments: Returns: The list of filenames to lint. """ try: (opts, filenames) = getopt.getopt(args, '', ['help', 'output=', 'verbose=', 'counting=', 'filter=', 'root=', 'linelength=', 'extensions=']) except getopt.GetoptError: PrintUsage('Invalid arguments.') verbosity = _VerboseLevel() output_format = _OutputFormat() filters = '' counting_style = '' for (opt, val) in opts: if opt == '--help': PrintUsage(None) elif opt == '--output': if val not in ('emacs', 'vs7', 'eclipse'): PrintUsage('The only allowed output formats are emacs, vs7 and eclipse.') output_format = val elif opt == '--verbose': verbosity = int(val) elif opt == '--filter': filters = val if not filters: PrintCategories() elif opt == '--counting': if val not in ('total', 'toplevel', 'detailed'): PrintUsage('Valid counting options are total, toplevel, and detailed') counting_style = val elif opt == '--root': global _root _root = val elif opt == '--linelength': global _line_length try: _line_length = int(val) except ValueError: PrintUsage('Line length must be digits.') elif opt == '--extensions': global _valid_extensions try: _valid_extensions = set(val.split(',')) except ValueError: PrintUsage('Extensions must be comma seperated list.') if not filenames: PrintUsage('No files were specified.') _SetOutputFormat(output_format) _SetVerboseLevel(verbosity) _SetFilters(filters) _SetCountingStyle(counting_style) return filenames def main(): filenames = ParseArguments(sys.argv[1:]) # Change stderr to write with replacement characters so we don't die # if we try to print something containing non-ASCII characters. sys.stderr = codecs.StreamReaderWriter(sys.stderr, codecs.getreader('utf8'), codecs.getwriter('utf8'), 'replace') _cpplint_state.ResetErrorCounts() for filename in filenames: ProcessFile(filename, _cpplint_state.verbose_level) _cpplint_state.PrintErrorCounts() sys.exit(_cpplint_state.error_count > 0) if __name__ == '__main__': main() libvpx-1.8.2/tools/diff.py000066400000000000000000000101721357355204000154550ustar00rootroot00000000000000#!/usr/bin/env python ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## """Classes for representing diff pieces.""" __author__ = "jkoleszar@google.com" import re class DiffLines(object): """A container for one half of a diff.""" def __init__(self, filename, offset, length): self.filename = filename self.offset = offset self.length = length self.lines = [] self.delta_line_nums = [] def Append(self, line): l = len(self.lines) if line[0] != " ": self.delta_line_nums.append(self.offset + l) self.lines.append(line[1:]) assert l+1 <= self.length def Complete(self): return len(self.lines) == self.length def __contains__(self, item): return item >= self.offset and item <= self.offset + self.length - 1 class DiffHunk(object): """A container for one diff hunk, consisting of two DiffLines.""" def __init__(self, header, file_a, file_b, start_a, len_a, start_b, len_b): self.header = header self.left = DiffLines(file_a, start_a, len_a) self.right = DiffLines(file_b, start_b, len_b) self.lines = [] def Append(self, line): """Adds a line to the DiffHunk and its DiffLines children.""" if line[0] == "-": self.left.Append(line) elif line[0] == "+": self.right.Append(line) elif line[0] == " ": self.left.Append(line) self.right.Append(line) elif line[0] == "\\": # Ignore newline messages from git diff. pass else: assert False, ("Unrecognized character at start of diff line " "%r" % line[0]) self.lines.append(line) def Complete(self): return self.left.Complete() and self.right.Complete() def __repr__(self): return "DiffHunk(%s, %s, len %d)" % ( self.left.filename, self.right.filename, max(self.left.length, self.right.length)) def ParseDiffHunks(stream): """Walk a file-like object, yielding DiffHunks as they're parsed.""" file_regex = re.compile(r"(\+\+\+|---) (\S+)") range_regex = re.compile(r"@@ -(\d+)(,(\d+))? \+(\d+)(,(\d+))?") hunk = None while True: line = stream.readline() if not line: break if hunk is None: # Parse file names diff_file = file_regex.match(line) if diff_file: if line.startswith("---"): a_line = line a = diff_file.group(2) continue if line.startswith("+++"): b_line = line b = diff_file.group(2) continue # Parse offset/lengths diffrange = range_regex.match(line) if diffrange: if diffrange.group(2): start_a = int(diffrange.group(1)) len_a = int(diffrange.group(3)) else: start_a = 1 len_a = int(diffrange.group(1)) if diffrange.group(5): start_b = int(diffrange.group(4)) len_b = int(diffrange.group(6)) else: start_b = 1 len_b = int(diffrange.group(4)) header = [a_line, b_line, line] hunk = DiffHunk(header, a, b, start_a, len_a, start_b, len_b) else: # Add the current line to the hunk hunk.Append(line) # See if the whole hunk has been parsed. If so, yield it and prepare # for the next hunk. if hunk.Complete(): yield hunk hunk = None # Partial hunks are a parse error assert hunk is None libvpx-1.8.2/tools/gen_authors.sh000077500000000000000000000004721357355204000170520ustar00rootroot00000000000000#!/bin/bash # Add organization names manually. cat <" | sort | uniq | grep -v corp.google \ | grep -v noreply) Google Inc. The Mozilla Foundation The Xiph.Org Foundation EOF libvpx-1.8.2/tools/intersect-diffs.py000077500000000000000000000044741357355204000176510ustar00rootroot00000000000000#!/usr/bin/env python ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## """Calculates the "intersection" of two unified diffs. Given two diffs, A and B, it finds all hunks in B that had non-context lines in A and prints them to stdout. This is useful to determine the hunks in B that are relevant to A. The resulting file can be applied with patch(1) on top of A. """ __author__ = "jkoleszar@google.com" import sys import diff def FormatDiffHunks(hunks): """Re-serialize a list of DiffHunks.""" r = [] last_header = None for hunk in hunks: this_header = hunk.header[0:2] if last_header != this_header: r.extend(hunk.header) last_header = this_header else: r.extend(hunk.header[2]) r.extend(hunk.lines) r.append("\n") return "".join(r) def ZipHunks(rhs_hunks, lhs_hunks): """Join two hunk lists on filename.""" for rhs_hunk in rhs_hunks: rhs_file = rhs_hunk.right.filename.split("/")[1:] for lhs_hunk in lhs_hunks: lhs_file = lhs_hunk.left.filename.split("/")[1:] if lhs_file != rhs_file: continue yield (rhs_hunk, lhs_hunk) def main(): old_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[1], "r"))] new_hunks = [x for x in diff.ParseDiffHunks(open(sys.argv[2], "r"))] out_hunks = [] # Join the right hand side of the older diff with the left hand side of the # newer diff. for old_hunk, new_hunk in ZipHunks(old_hunks, new_hunks): if new_hunk in out_hunks: continue old_lines = old_hunk.right new_lines = new_hunk.left # Determine if this hunk overlaps any non-context line from the other for i in old_lines.delta_line_nums: if i in new_lines: out_hunks.append(new_hunk) break if out_hunks: print FormatDiffHunks(out_hunks) sys.exit(1) if __name__ == "__main__": main() libvpx-1.8.2/tools/lint-hunks.py000077500000000000000000000115471357355204000166530ustar00rootroot00000000000000#!/usr/bin/python ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## """Performs style checking on each diff hunk.""" import getopt import os import StringIO import subprocess import sys import diff SHORT_OPTIONS = "h" LONG_OPTIONS = ["help"] TOPLEVEL_CMD = ["git", "rev-parse", "--show-toplevel"] DIFF_CMD = ["git", "diff"] DIFF_INDEX_CMD = ["git", "diff-index", "-u", "HEAD", "--"] SHOW_CMD = ["git", "show"] CPPLINT_FILTERS = ["-readability/casting"] class Usage(Exception): pass class SubprocessException(Exception): def __init__(self, args): msg = "Failed to execute '%s'"%(" ".join(args)) super(SubprocessException, self).__init__(msg) class Subprocess(subprocess.Popen): """Adds the notion of an expected returncode to Popen.""" def __init__(self, args, expected_returncode=0, **kwargs): self._args = args self._expected_returncode = expected_returncode super(Subprocess, self).__init__(args, **kwargs) def communicate(self, *args, **kwargs): result = super(Subprocess, self).communicate(*args, **kwargs) if self._expected_returncode is not None: try: ok = self.returncode in self._expected_returncode except TypeError: ok = self.returncode == self._expected_returncode if not ok: raise SubprocessException(self._args) return result def main(argv=None): if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(argv[1:], SHORT_OPTIONS, LONG_OPTIONS) except getopt.error, msg: raise Usage(msg) # process options for o, _ in opts: if o in ("-h", "--help"): print __doc__ sys.exit(0) if args and len(args) > 1: print __doc__ sys.exit(0) # Find the fully qualified path to the root of the tree tl = Subprocess(TOPLEVEL_CMD, stdout=subprocess.PIPE) tl = tl.communicate()[0].strip() # See if we're working on the index or not. if args: diff_cmd = DIFF_CMD + [args[0] + "^!"] else: diff_cmd = DIFF_INDEX_CMD # Build the command line to execute cpplint cpplint_cmd = [os.path.join(tl, "tools", "cpplint.py"), "--filter=" + ",".join(CPPLINT_FILTERS), "-"] # Get a list of all affected lines file_affected_line_map = {} p = Subprocess(diff_cmd, stdout=subprocess.PIPE) stdout = p.communicate()[0] for hunk in diff.ParseDiffHunks(StringIO.StringIO(stdout)): filename = hunk.right.filename[2:] if filename not in file_affected_line_map: file_affected_line_map[filename] = set() file_affected_line_map[filename].update(hunk.right.delta_line_nums) # Run each affected file through cpplint lint_failed = False for filename, affected_lines in file_affected_line_map.iteritems(): if filename.split(".")[-1] not in ("c", "h", "cc"): continue if args: # File contents come from git show_cmd = SHOW_CMD + [args[0] + ":" + filename] show = Subprocess(show_cmd, stdout=subprocess.PIPE) lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1), stdin=show.stdout, stderr=subprocess.PIPE) lint_out = lint.communicate()[1] else: # File contents come from the working tree lint = Subprocess(cpplint_cmd, expected_returncode=(0, 1), stdin=subprocess.PIPE, stderr=subprocess.PIPE) stdin = open(os.path.join(tl, filename)).read() lint_out = lint.communicate(stdin)[1] for line in lint_out.split("\n"): fields = line.split(":") if fields[0] != "-": continue warning_line_num = int(fields[1]) if warning_line_num in affected_lines: print "%s:%d:%s"%(filename, warning_line_num, ":".join(fields[2:])) lint_failed = True # Set exit code if any relevant lint errors seen if lint_failed: return 1 except Usage, err: print >>sys.stderr, err print >>sys.stderr, "for help use --help" return 2 if __name__ == "__main__": sys.exit(main()) libvpx-1.8.2/tools/non_greedy_mv/000077500000000000000000000000001357355204000170255ustar00rootroot00000000000000libvpx-1.8.2/tools/non_greedy_mv/non_greedy_mv.py000066400000000000000000000133711357355204000222370ustar00rootroot00000000000000import sys import matplotlib.pyplot as plt from matplotlib.collections import LineCollection from matplotlib import colors as mcolors import numpy as np import math def draw_mv_ls(axis, mv_ls, mode=0): colors = np.array([(1., 0., 0., 1.)]) segs = np.array([ np.array([[ptr[0], ptr[1]], [ptr[0] + ptr[2], ptr[1] + ptr[3]]]) for ptr in mv_ls ]) line_segments = LineCollection( segs, linewidths=(1.,), colors=colors, linestyle='solid') axis.add_collection(line_segments) if mode == 0: axis.scatter(mv_ls[:, 0], mv_ls[:, 1], s=2, c='b') else: axis.scatter( mv_ls[:, 0] + mv_ls[:, 2], mv_ls[:, 1] + mv_ls[:, 3], s=2, c='b') def draw_pred_block_ls(axis, mv_ls, bs, mode=0): colors = np.array([(0., 0., 0., 1.)]) segs = [] for ptr in mv_ls: if mode == 0: x = ptr[0] y = ptr[1] else: x = ptr[0] + ptr[2] y = ptr[1] + ptr[3] x_ls = [x, x + bs, x + bs, x, x] y_ls = [y, y, y + bs, y + bs, y] segs.append(np.column_stack([x_ls, y_ls])) line_segments = LineCollection( segs, linewidths=(.5,), colors=colors, linestyle='solid') axis.add_collection(line_segments) def read_frame(fp, no_swap=0): plane = [None, None, None] for i in range(3): line = fp.readline() word_ls = line.split() word_ls = [int(item) for item in word_ls] rows = word_ls[0] cols = word_ls[1] line = fp.readline() word_ls = line.split() word_ls = [int(item) for item in word_ls] plane[i] = np.array(word_ls).reshape(rows, cols) if i > 0: plane[i] = plane[i].repeat(2, axis=0).repeat(2, axis=1) plane = np.array(plane) if no_swap == 0: plane = np.swapaxes(np.swapaxes(plane, 0, 1), 1, 2) return plane def yuv_to_rgb(yuv): #mat = np.array([ # [1.164, 0 , 1.596 ], # [1.164, -0.391, -0.813], # [1.164, 2.018 , 0 ] ] # ) #c = np.array([[ -16 , -16 , -16 ], # [ 0 , -128, -128 ], # [ -128, -128, 0 ]]) mat = np.array([[1, 0, 1.4075], [1, -0.3445, -0.7169], [1, 1.7790, 0]]) c = np.array([[0, 0, 0], [0, -128, -128], [-128, -128, 0]]) mat_c = np.dot(mat, c) v = np.array([mat_c[0, 0], mat_c[1, 1], mat_c[2, 2]]) mat = mat.transpose() rgb = np.dot(yuv, mat) + v rgb = rgb.astype(int) rgb = rgb.clip(0, 255) return rgb / 255. def read_feature_score(fp, mv_rows, mv_cols): line = fp.readline() word_ls = line.split() feature_score = np.array([math.log(float(v) + 1, 2) for v in word_ls]) feature_score = feature_score.reshape(mv_rows, mv_cols) return feature_score def read_mv_mode_arr(fp, mv_rows, mv_cols): line = fp.readline() word_ls = line.split() mv_mode_arr = np.array([int(v) for v in word_ls]) mv_mode_arr = mv_mode_arr.reshape(mv_rows, mv_cols) return mv_mode_arr def read_frame_dpl_stats(fp): line = fp.readline() word_ls = line.split() frame_idx = int(word_ls[1]) mi_rows = int(word_ls[3]) mi_cols = int(word_ls[5]) bs = int(word_ls[7]) ref_frame_idx = int(word_ls[9]) rf_idx = int(word_ls[11]) gf_frame_offset = int(word_ls[13]) ref_gf_frame_offset = int(word_ls[15]) mi_size = bs / 8 mv_ls = [] mv_rows = int((math.ceil(mi_rows * 1. / mi_size))) mv_cols = int((math.ceil(mi_cols * 1. / mi_size))) for i in range(mv_rows * mv_cols): line = fp.readline() word_ls = line.split() row = int(word_ls[0]) * 8. col = int(word_ls[1]) * 8. mv_row = int(word_ls[2]) / 8. mv_col = int(word_ls[3]) / 8. mv_ls.append([col, row, mv_col, mv_row]) mv_ls = np.array(mv_ls) feature_score = read_feature_score(fp, mv_rows, mv_cols) mv_mode_arr = read_mv_mode_arr(fp, mv_rows, mv_cols) img = yuv_to_rgb(read_frame(fp)) ref = yuv_to_rgb(read_frame(fp)) return rf_idx, frame_idx, ref_frame_idx, gf_frame_offset, ref_gf_frame_offset, mv_ls, img, ref, bs, feature_score, mv_mode_arr def read_dpl_stats_file(filename, frame_num=0): fp = open(filename) line = fp.readline() width = 0 height = 0 data_ls = [] while (line): if line[0] == '=': data_ls.append(read_frame_dpl_stats(fp)) line = fp.readline() if frame_num > 0 and len(data_ls) == frame_num: break return data_ls if __name__ == '__main__': filename = sys.argv[1] data_ls = read_dpl_stats_file(filename, frame_num=5) for rf_idx, frame_idx, ref_frame_idx, gf_frame_offset, ref_gf_frame_offset, mv_ls, img, ref, bs, feature_score, mv_mode_arr in data_ls: fig, axes = plt.subplots(2, 2) axes[0][0].imshow(img) draw_mv_ls(axes[0][0], mv_ls) draw_pred_block_ls(axes[0][0], mv_ls, bs, mode=0) #axes[0].grid(color='k', linestyle='-') axes[0][0].set_ylim(img.shape[0], 0) axes[0][0].set_xlim(0, img.shape[1]) if ref is not None: axes[0][1].imshow(ref) draw_mv_ls(axes[0][1], mv_ls, mode=1) draw_pred_block_ls(axes[0][1], mv_ls, bs, mode=1) #axes[1].grid(color='k', linestyle='-') axes[0][1].set_ylim(ref.shape[0], 0) axes[0][1].set_xlim(0, ref.shape[1]) axes[1][0].imshow(feature_score) #feature_score_arr = feature_score.flatten() #feature_score_max = feature_score_arr.max() #feature_score_min = feature_score_arr.min() #step = (feature_score_max - feature_score_min) / 20. #feature_score_bins = np.arange(feature_score_min, feature_score_max, step) #axes[1][1].hist(feature_score_arr, bins=feature_score_bins) im = axes[1][1].imshow(mv_mode_arr) #axes[1][1].figure.colorbar(im, ax=axes[1][1]) print rf_idx, frame_idx, ref_frame_idx, gf_frame_offset, ref_gf_frame_offset, len(mv_ls) flatten_mv_mode = mv_mode_arr.flatten() zero_mv_count = sum(flatten_mv_mode == 0); new_mv_count = sum(flatten_mv_mode == 1); ref_mv_count = sum(flatten_mv_mode == 2) + sum(flatten_mv_mode == 3); print zero_mv_count, new_mv_count, ref_mv_count plt.show() libvpx-1.8.2/tools/set_analyzer_env.sh000066400000000000000000000075361357355204000201110ustar00rootroot00000000000000## Copyright (c) 2018 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## ## Sourcing this file sets environment variables to simplify setting up ## sanitizer builds and testing. sanitizer="${1}" case "${sanitizer}" in address) ;; cfi) ;; integer) ;; memory) ;; thread) ;; undefined) ;; clear) echo "Clearing environment:" set -x unset CC CXX LD AR unset CFLAGS CXXFLAGS LDFLAGS unset ASAN_OPTIONS MSAN_OPTIONS TSAN_OPTIONS UBSAN_OPTIONS set +x return ;; *) echo "Usage: source set_analyzer_env.sh [|clear]" echo " Supported sanitizers:" echo " address cfi integer memory thread undefined" return 1 ;; esac if [ ! $(which clang) ]; then # TODO(johannkoenig): Support gcc analyzers. echo "ERROR: 'clang' must be in your PATH" return 1 fi # Warnings. if [ "${sanitizer}" = "undefined" -o "${sanitizer}" = "integer" ]; then echo "WARNING: When building the ${sanitizer} sanitizer for 32 bit targets" echo "you must run:" echo "export LDFLAGS=\"\${LDFLAGS} --rtlib=compiler-rt -lgcc_s\"" echo "See http://llvm.org/bugs/show_bug.cgi?id=17693 for details." fi if [ "${sanitizer}" = "undefined" ]; then major_version=$(clang --version | head -n 1 \ | grep -o -E "[[:digit:]]\.[[:digit:]]\.[[:digit:]]" | cut -f1 -d.) if [ ${major_version} -eq 5 ]; then echo "WARNING: clang v5 has a problem with vp9 x86_64 high bit depth" echo "configurations. It can take ~40 minutes to compile" echo "vpx_dsp/x86/fwd_txfm_sse2.c" echo "clang v4 did not have this issue." fi fi echo "It is recommended to configure with '--enable-debug' to improve stack" echo "traces. On mac builds, run 'dysmutil' on the output binaries (vpxenc," echo "test_libvpx, etc) to link the stack traces to source code lines." # Build configuration. cflags="-fsanitize=${sanitizer}" ldflags="-fsanitize=${sanitizer}" # http://code.google.com/p/webm/issues/detail?id=570 cflags="${cflags} -fno-strict-aliasing" # Useful backtraces. cflags="${cflags} -fno-omit-frame-pointer" # Exact backtraces. cflags="${cflags} -fno-optimize-sibling-calls" if [ "${sanitizer}" = "cfi" ]; then # https://clang.llvm.org/docs/ControlFlowIntegrity.html cflags="${cflags} -fno-sanitize-trap=cfi -flto -fvisibility=hidden" ldflags="${ldflags} -fno-sanitize-trap=cfi -flto -fuse-ld=gold" export AR="llvm-ar" fi set -x export CC="clang" export CXX="clang++" export LD="clang++" export CFLAGS="${cflags}" export CXXFLAGS="${cflags}" export LDFLAGS="${ldflags}" set +x # Execution configuration. sanitizer_options="" sanitizer_options="${sanitizer_options}:handle_segv=1" sanitizer_options="${sanitizer_options}:handle_abort=1" sanitizer_options="${sanitizer_options}:handle_sigfpe=1" sanitizer_options="${sanitizer_options}:fast_unwind_on_fatal=1" sanitizer_options="${sanitizer_options}:allocator_may_return_null=1" case "${sanitizer}" in address) sanitizer_options="${sanitizer_options}:detect_stack_use_after_return=1" sanitizer_options="${sanitizer_options}:max_uar_stack_size_log=17" set -x export ASAN_OPTIONS="${sanitizer_options}" set +x ;; cfi) # No environment settings ;; memory) set -x export MSAN_OPTIONS="${sanitizer_options}" set +x ;; thread) # The thread sanitizer uses an entirely independent set of options. set -x export TSAN_OPTIONS="halt_on_error=1" set +x ;; undefined|integer) sanitizer_options="${sanitizer_options}:print_stacktrace=1" set -x export UBSAN_OPTIONS="${sanitizer_options}" set +x ;; esac libvpx-1.8.2/tools/tiny_ssim.c000066400000000000000000000413721357355204000163630ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "./y4minput.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/mem.h" static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2 static const int64_t cc2_10 = 3857925; // (64^2*(.03*1023)^2 static const int64_t cc1_12 = 6868593; // (64^2*(.01*4095)^2 static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2 #if CONFIG_VP9_HIGHBITDEPTH static uint64_t calc_plane_error16(uint16_t *orig, int orig_stride, uint16_t *recon, int recon_stride, unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; int diff; if (orig == NULL || recon == NULL) { assert(0); return 0; } for (row = 0; row < rows; row++) { for (col = 0; col < cols; col++) { diff = orig[col] - recon[col]; total_sse += diff * diff; } orig += orig_stride; recon += recon_stride; } return total_sse; } #endif // CONFIG_VP9_HIGHBITDEPTH static uint64_t calc_plane_error(uint8_t *orig, int orig_stride, uint8_t *recon, int recon_stride, unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; int diff; if (orig == NULL || recon == NULL) { assert(0); return 0; } for (row = 0; row < rows; row++) { for (col = 0; col < cols; col++) { diff = orig[col] - recon[col]; total_sse += diff * diff; } orig += orig_stride; recon += recon_stride; } return total_sse; } #define MAX_PSNR 100 static double mse2psnr(double samples, double peak, double mse) { double psnr; if (mse > 0.0) psnr = 10.0 * log10(peak * peak * samples / mse); else psnr = MAX_PSNR; // Limit to prevent / 0 if (psnr > MAX_PSNR) psnr = MAX_PSNR; return psnr; } typedef enum { RAW_YUV, Y4M } input_file_type; typedef struct input_file { FILE *file; input_file_type type; unsigned char *buf; y4m_input y4m; vpx_image_t img; int w; int h; int bit_depth; int frame_size; } input_file_t; // Open a file and determine if its y4m or raw. If y4m get the header. static int open_input_file(const char *file_name, input_file_t *input, int w, int h, int bit_depth) { char y4m_buf[4]; input->w = w; input->h = h; input->bit_depth = bit_depth; input->type = RAW_YUV; input->buf = NULL; input->file = strcmp(file_name, "-") ? fopen(file_name, "rb") : stdin; if (input->file == NULL) return -1; if (fread(y4m_buf, 1, 4, input->file) != 4) return -1; if (memcmp(y4m_buf, "YUV4", 4) == 0) input->type = Y4M; switch (input->type) { case Y4M: y4m_input_open(&input->y4m, input->file, y4m_buf, 4, 0); input->w = input->y4m.pic_w; input->h = input->y4m.pic_h; input->bit_depth = input->y4m.bit_depth; // Y4M alloc's its own buf. Init this to avoid problems if we never // read frames. memset(&input->img, 0, sizeof(input->img)); break; case RAW_YUV: fseek(input->file, 0, SEEK_SET); input->w = w; input->h = h; // handle odd frame sizes input->frame_size = w * h + ((w + 1) / 2) * ((h + 1) / 2) * 2; if (bit_depth > 8) { input->frame_size *= 2; } input->buf = malloc(input->frame_size); break; } return 0; } static void close_input_file(input_file_t *in) { if (in->file) fclose(in->file); if (in->type == Y4M) { vpx_img_free(&in->img); } else { free(in->buf); } } static size_t read_input_file(input_file_t *in, unsigned char **y, unsigned char **u, unsigned char **v, int bd) { size_t r1 = 0; switch (in->type) { case Y4M: r1 = y4m_input_fetch_frame(&in->y4m, in->file, &in->img); *y = in->img.planes[0]; *u = in->img.planes[1]; *v = in->img.planes[2]; break; case RAW_YUV: if (bd < 9) { r1 = fread(in->buf, in->frame_size, 1, in->file); *y = in->buf; *u = in->buf + in->w * in->h; *v = *u + ((1 + in->w) / 2) * ((1 + in->h) / 2); } else { r1 = fread(in->buf, in->frame_size, 1, in->file); *y = in->buf; *u = in->buf + (in->w * in->h) * 2; *v = *u + 2 * ((1 + in->w) / 2) * ((1 + in->h) / 2); } break; } return r1; } static void ssim_parms_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr) { int i, j; if (s == NULL || r == NULL || sum_s == NULL || sum_r == NULL || sum_sq_s == NULL || sum_sq_r == NULL || sum_sxr == NULL) { assert(0); return; } for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { *sum_s += s[j]; *sum_r += r[j]; *sum_sq_s += s[j] * s[j]; *sum_sq_r += r[j] * r[j]; *sum_sxr += s[j] * r[j]; } } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_ssim_parms_8x8(const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr) { int i, j; if (s == NULL || r == NULL || sum_s == NULL || sum_r == NULL || sum_sq_s == NULL || sum_sq_r == NULL || sum_sxr == NULL) { assert(0); return; } for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { *sum_s += s[j]; *sum_r += r[j]; *sum_sq_s += s[j] * s[j]; *sum_sq_r += r[j] * r[j]; *sum_sxr += s[j] * r[j]; } } } #endif // CONFIG_VP9_HIGHBITDEPTH static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count, uint32_t bd) { double ssim_n, ssim_d; int64_t c1 = 0, c2 = 0; if (bd == 8) { // scale the constants by number of pixels c1 = (cc1 * count * count) >> 12; c2 = (cc2 * count * count) >> 12; } else if (bd == 10) { c1 = (cc1_10 * count * count) >> 12; c2 = (cc2_10 * count * count) >> 12; } else if (bd == 12) { c1 = (cc1_12 * count * count) >> 12; c2 = (cc2_12 * count * count) >> 12; } else { assert(0); } ssim_n = (2.0 * sum_s * sum_r + c1) * (2.0 * count * sum_sxr - 2.0 * sum_s * sum_r + c2); ssim_d = ((double)sum_s * sum_s + (double)sum_r * sum_r + c1) * ((double)count * sum_sq_s - (double)sum_s * sum_s + (double)count * sum_sq_r - (double)sum_r * sum_r + c2); return ssim_n / ssim_d; } static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); } #if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t bd) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. static double ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, int height) { int i, j; int samples = 0; double ssim_total = 0; // sample point start with each 4x4 location for (i = 0; i <= height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j <= width - 8; j += 4) { double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); ssim_total += v; samples++; } } ssim_total /= samples; return ssim_total; } #if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, int height, uint32_t bd) { int i, j; int samples = 0; double ssim_total = 0; // sample point start with each 4x4 location for (i = 0; i <= height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j <= width - 8; j += 4) { double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd); ssim_total += v; samples++; } } ssim_total /= samples; return ssim_total; } #endif // CONFIG_VP9_HIGHBITDEPTH int main(int argc, char *argv[]) { FILE *framestats = NULL; int bit_depth = 8; int w = 0, h = 0, tl_skip = 0, tl_skips_remaining = 0; double ssimavg = 0, ssimyavg = 0, ssimuavg = 0, ssimvavg = 0; double psnrglb = 0, psnryglb = 0, psnruglb = 0, psnrvglb = 0; double psnravg = 0, psnryavg = 0, psnruavg = 0, psnrvavg = 0; double *ssimy = NULL, *ssimu = NULL, *ssimv = NULL; uint64_t *psnry = NULL, *psnru = NULL, *psnrv = NULL; size_t i, n_frames = 0, allocated_frames = 0; int return_value = 0; input_file_t in[2]; double peak = 255.0; memset(in, 0, sizeof(in)); if (argc < 2) { fprintf(stderr, "Usage: %s file1.{yuv|y4m} file2.{yuv|y4m}" "[WxH tl_skip={0,1,3} frame_stats_file bits]\n", argv[0]); return 1; } if (argc > 3) { sscanf(argv[3], "%dx%d", &w, &h); } if (argc > 6) { sscanf(argv[6], "%d", &bit_depth); } if (open_input_file(argv[1], &in[0], w, h, bit_depth) < 0) { fprintf(stderr, "File %s can't be opened or parsed!\n", argv[1]); goto clean_up; } if (w == 0 && h == 0) { // If a y4m is the first file and w, h is not set grab from first file. w = in[0].w; h = in[0].h; bit_depth = in[0].bit_depth; } if (bit_depth == 10) peak = 1023.0; if (bit_depth == 12) peak = 4095.0; if (open_input_file(argv[2], &in[1], w, h, bit_depth) < 0) { fprintf(stderr, "File %s can't be opened or parsed!\n", argv[2]); goto clean_up; } if (in[0].w != in[1].w || in[0].h != in[1].h || in[0].w != w || in[0].h != h || w == 0 || h == 0) { fprintf(stderr, "Failing: Image dimensions don't match or are unspecified!\n"); return_value = 1; goto clean_up; } if (in[0].bit_depth != in[1].bit_depth) { fprintf(stderr, "Failing: Image bit depths don't match or are unspecified!\n"); return_value = 1; goto clean_up; } bit_depth = in[0].bit_depth; // Number of frames to skip from file1.yuv for every frame used. Normal // values 0, 1 and 3 correspond to TL2, TL1 and TL0 respectively for a 3TL // encoding in mode 10. 7 would be reasonable for comparing TL0 of a 4-layer // encoding. if (argc > 4) { sscanf(argv[4], "%d", &tl_skip); if (argc > 5) { framestats = fopen(argv[5], "w"); if (!framestats) { fprintf(stderr, "Could not open \"%s\" for writing: %s\n", argv[5], strerror(errno)); return_value = 1; goto clean_up; } } } while (1) { size_t r1, r2; unsigned char *y[2], *u[2], *v[2]; r1 = read_input_file(&in[0], &y[0], &u[0], &v[0], bit_depth); if (r1) { // Reading parts of file1.yuv that were not used in temporal layer. if (tl_skips_remaining > 0) { --tl_skips_remaining; continue; } // Use frame, but skip |tl_skip| after it. tl_skips_remaining = tl_skip; } r2 = read_input_file(&in[1], &y[1], &u[1], &v[1], bit_depth); if (r1 && r2 && r1 != r2) { fprintf(stderr, "Failed to read data: %s [%d/%d]\n", strerror(errno), (int)r1, (int)r2); return_value = 1; goto clean_up; } else if (r1 == 0 || r2 == 0) { break; } #if CONFIG_VP9_HIGHBITDEPTH #define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \ if (bit_depth < 9) { \ ssim = ssim2(buf0, buf1, w, w, w, h); \ psnr = calc_plane_error(buf0, w, buf1, w, w, h); \ } else { \ ssim = highbd_ssim2(CONVERT_TO_BYTEPTR(buf0), CONVERT_TO_BYTEPTR(buf1), w, \ w, w, h, bit_depth); \ psnr = calc_plane_error16(CAST_TO_SHORTPTR(buf0), w, \ CAST_TO_SHORTPTR(buf1), w, w, h); \ } #else #define psnr_and_ssim(ssim, psnr, buf0, buf1, w, h) \ ssim = ssim2(buf0, buf1, w, w, w, h); \ psnr = calc_plane_error(buf0, w, buf1, w, w, h); #endif // CONFIG_VP9_HIGHBITDEPTH if (n_frames == allocated_frames) { allocated_frames = allocated_frames == 0 ? 1024 : allocated_frames * 2; ssimy = realloc(ssimy, allocated_frames * sizeof(*ssimy)); ssimu = realloc(ssimu, allocated_frames * sizeof(*ssimu)); ssimv = realloc(ssimv, allocated_frames * sizeof(*ssimv)); psnry = realloc(psnry, allocated_frames * sizeof(*psnry)); psnru = realloc(psnru, allocated_frames * sizeof(*psnru)); psnrv = realloc(psnrv, allocated_frames * sizeof(*psnrv)); } psnr_and_ssim(ssimy[n_frames], psnry[n_frames], y[0], y[1], w, h); psnr_and_ssim(ssimu[n_frames], psnru[n_frames], u[0], u[1], (w + 1) / 2, (h + 1) / 2); psnr_and_ssim(ssimv[n_frames], psnrv[n_frames], v[0], v[1], (w + 1) / 2, (h + 1) / 2); n_frames++; } if (framestats) { fprintf(framestats, "ssim,ssim-y,ssim-u,ssim-v,psnr,psnr-y,psnr-u,psnr-v\n"); } for (i = 0; i < n_frames; ++i) { double frame_ssim; double frame_psnr, frame_psnry, frame_psnru, frame_psnrv; frame_ssim = 0.8 * ssimy[i] + 0.1 * (ssimu[i] + ssimv[i]); ssimavg += frame_ssim; ssimyavg += ssimy[i]; ssimuavg += ssimu[i]; ssimvavg += ssimv[i]; frame_psnr = mse2psnr(w * h * 6 / 4, peak, (double)psnry[i] + psnru[i] + psnrv[i]); frame_psnry = mse2psnr(w * h * 4 / 4, peak, (double)psnry[i]); frame_psnru = mse2psnr(w * h * 1 / 4, peak, (double)psnru[i]); frame_psnrv = mse2psnr(w * h * 1 / 4, peak, (double)psnrv[i]); psnravg += frame_psnr; psnryavg += frame_psnry; psnruavg += frame_psnru; psnrvavg += frame_psnrv; psnryglb += psnry[i]; psnruglb += psnru[i]; psnrvglb += psnrv[i]; if (framestats) { fprintf(framestats, "%lf,%lf,%lf,%lf,%lf,%lf,%lf,%lf\n", frame_ssim, ssimy[i], ssimu[i], ssimv[i], frame_psnr, frame_psnry, frame_psnru, frame_psnrv); } } ssimavg /= n_frames; ssimyavg /= n_frames; ssimuavg /= n_frames; ssimvavg /= n_frames; printf("VpxSSIM: %lf\n", 100 * pow(ssimavg, 8.0)); printf("SSIM: %lf\n", ssimavg); printf("SSIM-Y: %lf\n", ssimyavg); printf("SSIM-U: %lf\n", ssimuavg); printf("SSIM-V: %lf\n", ssimvavg); puts(""); psnravg /= n_frames; psnryavg /= n_frames; psnruavg /= n_frames; psnrvavg /= n_frames; printf("AvgPSNR: %lf\n", psnravg); printf("AvgPSNR-Y: %lf\n", psnryavg); printf("AvgPSNR-U: %lf\n", psnruavg); printf("AvgPSNR-V: %lf\n", psnrvavg); puts(""); psnrglb = psnryglb + psnruglb + psnrvglb; psnrglb = mse2psnr((double)n_frames * w * h * 6 / 4, peak, psnrglb); psnryglb = mse2psnr((double)n_frames * w * h * 4 / 4, peak, psnryglb); psnruglb = mse2psnr((double)n_frames * w * h * 1 / 4, peak, psnruglb); psnrvglb = mse2psnr((double)n_frames * w * h * 1 / 4, peak, psnrvglb); printf("GlbPSNR: %lf\n", psnrglb); printf("GlbPSNR-Y: %lf\n", psnryglb); printf("GlbPSNR-U: %lf\n", psnruglb); printf("GlbPSNR-V: %lf\n", psnrvglb); puts(""); printf("Nframes: %d\n", (int)n_frames); clean_up: close_input_file(&in[0]); close_input_file(&in[1]); if (framestats) fclose(framestats); free(ssimy); free(ssimu); free(ssimv); free(psnry); free(psnru); free(psnrv); return return_value; } libvpx-1.8.2/tools/wrap-commit-msg.py000077500000000000000000000040761357355204000176010ustar00rootroot00000000000000#!/usr/bin/env python ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## """Wraps paragraphs of text, preserving manual formatting This is like fold(1), but has the special convention of not modifying lines that start with whitespace. This allows you to intersperse blocks with special formatting, like code blocks, with written prose. The prose will be wordwrapped, and the manual formatting will be preserved. * This won't handle the case of a bulleted (or ordered) list specially, so manual wrapping must be done. Occasionally it's useful to put something with explicit formatting that doesn't look at all like a block of text inline. indicator = has_leading_whitespace(line); if (indicator) preserve_formatting(line); The intent is that this docstring would make it through the transform and still be legible and presented as it is in the source. If additional cases are handled, update this doc to describe the effect. """ __author__ = "jkoleszar@google.com" import textwrap import sys def wrap(text): if text: return textwrap.fill(text, break_long_words=False) + '\n' return "" def main(fileobj): text = "" output = "" while True: line = fileobj.readline() if not line: break if line.lstrip() == line: text += line else: output += wrap(text) text="" output += line output += wrap(text) # Replace the file or write to stdout. if fileobj == sys.stdin: fileobj = sys.stdout else: fileobj.seek(0) fileobj.truncate(0) fileobj.write(output) if __name__ == "__main__": if len(sys.argv) > 1: main(open(sys.argv[1], "r+")) else: main(sys.stdin) libvpx-1.8.2/tools_common.c000066400000000000000000000571231357355204000157160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "./tools_common.h" #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif #if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif #if defined(_WIN32) || defined(__OS2__) #include #include #ifdef __OS2__ #define _setmode setmode #define _fileno fileno #define _O_BINARY O_BINARY #endif #endif #define LOG_ERROR(label) \ do { \ const char *l = label; \ va_list ap; \ va_start(ap, fmt); \ if (l) fprintf(stderr, "%s: ", l); \ vfprintf(stderr, fmt, ap); \ fprintf(stderr, "\n"); \ va_end(ap); \ } while (0) #if CONFIG_ENCODERS /* Swallow warnings about unused results of fread/fwrite */ static size_t wrap_fread(void *ptr, size_t size, size_t nmemb, FILE *stream) { return fread(ptr, size, nmemb, stream); } #define fread wrap_fread #endif FILE *set_binary_mode(FILE *stream) { (void)stream; #if defined(_WIN32) || defined(__OS2__) _setmode(_fileno(stream), _O_BINARY); #endif return stream; } void die(const char *fmt, ...) { LOG_ERROR(NULL); usage_exit(); } void fatal(const char *fmt, ...) { LOG_ERROR("Fatal"); exit(EXIT_FAILURE); } void warn(const char *fmt, ...) { LOG_ERROR("Warning"); } void die_codec(vpx_codec_ctx_t *ctx, const char *s) { const char *detail = vpx_codec_error_detail(ctx); printf("%s: %s\n", s, vpx_codec_error(ctx)); if (detail) printf(" %s\n", detail); exit(EXIT_FAILURE); } int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame) { FILE *f = input_ctx->file; struct FileTypeDetectionBuffer *detect = &input_ctx->detect; int plane = 0; int shortread = 0; const int bytespp = (yuv_frame->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; for (plane = 0; plane < 3; ++plane) { uint8_t *ptr; const int w = vpx_img_plane_width(yuv_frame, plane); const int h = vpx_img_plane_height(yuv_frame, plane); int r; /* Determine the correct plane based on the image format. The for-loop * always counts in Y,U,V order, but this may not match the order of * the data on disk. */ switch (plane) { case 1: ptr = yuv_frame->planes[yuv_frame->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_V : VPX_PLANE_U]; break; case 2: ptr = yuv_frame->planes[yuv_frame->fmt == VPX_IMG_FMT_YV12 ? VPX_PLANE_U : VPX_PLANE_V]; break; default: ptr = yuv_frame->planes[plane]; } for (r = 0; r < h; ++r) { size_t needed = w * bytespp; size_t buf_position = 0; const size_t left = detect->buf_read - detect->position; if (left > 0) { const size_t more = (left < needed) ? left : needed; memcpy(ptr, detect->buf + detect->position, more); buf_position = more; needed -= more; detect->position += more; } if (needed > 0) { shortread |= (fread(ptr + buf_position, 1, needed, f) < needed); } ptr += yuv_frame->stride[plane]; } } return shortread; } #if CONFIG_ENCODERS static const VpxInterface vpx_encoders[] = { #if CONFIG_VP8_ENCODER { "vp8", VP8_FOURCC, &vpx_codec_vp8_cx }, #endif #if CONFIG_VP9_ENCODER { "vp9", VP9_FOURCC, &vpx_codec_vp9_cx }, #endif }; int get_vpx_encoder_count(void) { return sizeof(vpx_encoders) / sizeof(vpx_encoders[0]); } const VpxInterface *get_vpx_encoder_by_index(int i) { return &vpx_encoders[i]; } const VpxInterface *get_vpx_encoder_by_name(const char *name) { int i; for (i = 0; i < get_vpx_encoder_count(); ++i) { const VpxInterface *encoder = get_vpx_encoder_by_index(i); if (strcmp(encoder->name, name) == 0) return encoder; } return NULL; } #endif // CONFIG_ENCODERS #if CONFIG_DECODERS static const VpxInterface vpx_decoders[] = { #if CONFIG_VP8_DECODER { "vp8", VP8_FOURCC, &vpx_codec_vp8_dx }, #endif #if CONFIG_VP9_DECODER { "vp9", VP9_FOURCC, &vpx_codec_vp9_dx }, #endif }; int get_vpx_decoder_count(void) { return sizeof(vpx_decoders) / sizeof(vpx_decoders[0]); } const VpxInterface *get_vpx_decoder_by_index(int i) { return &vpx_decoders[i]; } const VpxInterface *get_vpx_decoder_by_name(const char *name) { int i; for (i = 0; i < get_vpx_decoder_count(); ++i) { const VpxInterface *const decoder = get_vpx_decoder_by_index(i); if (strcmp(decoder->name, name) == 0) return decoder; } return NULL; } const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc) { int i; for (i = 0; i < get_vpx_decoder_count(); ++i) { const VpxInterface *const decoder = get_vpx_decoder_by_index(i); if (decoder->fourcc == fourcc) return decoder; } return NULL; } #endif // CONFIG_DECODERS int vpx_img_plane_width(const vpx_image_t *img, int plane) { if (plane > 0 && img->x_chroma_shift > 0) return (img->d_w + 1) >> img->x_chroma_shift; else return img->d_w; } int vpx_img_plane_height(const vpx_image_t *img, int plane) { if (plane > 0 && img->y_chroma_shift > 0) return (img->d_h + 1) >> img->y_chroma_shift; else return img->d_h; } void vpx_img_write(const vpx_image_t *img, FILE *file) { int plane; for (plane = 0; plane < 3; ++plane) { const unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = vpx_img_plane_width(img, plane) * ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); const int h = vpx_img_plane_height(img, plane); int y; for (y = 0; y < h; ++y) { fwrite(buf, 1, w, file); buf += stride; } } } int vpx_img_read(vpx_image_t *img, FILE *file) { int plane; for (plane = 0; plane < 3; ++plane) { unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = vpx_img_plane_width(img, plane) * ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); const int h = vpx_img_plane_height(img, plane); int y; for (y = 0; y < h; ++y) { if (fread(buf, 1, w, file) != (size_t)w) return 0; buf += stride; } } return 1; } // TODO(dkovalev) change sse_to_psnr signature: double -> int64_t double sse_to_psnr(double samples, double peak, double sse) { static const double kMaxPSNR = 100.0; if (sse > 0.0) { const double psnr = 10.0 * log10(samples * peak * peak / sse); return psnr > kMaxPSNR ? kMaxPSNR : psnr; } else { return kMaxPSNR; } } #if CONFIG_ENCODERS int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img) { FILE *f = input_ctx->file; y4m_input *y4m = &input_ctx->y4m; int shortread = 0; if (input_ctx->file_type == FILE_TYPE_Y4M) { if (y4m_input_fetch_frame(y4m, f, img) < 1) return 0; } else { shortread = read_yuv_frame(input_ctx, img); } return !shortread; } int file_is_y4m(const char detect[4]) { if (memcmp(detect, "YUV4", 4) == 0) { return 1; } return 0; } int fourcc_is_ivf(const char detect[4]) { if (memcmp(detect, "DKIF", 4) == 0) { return 1; } return 0; } void open_input_file(struct VpxInputContext *input) { /* Parse certain options from the input file, if possible */ input->file = strcmp(input->filename, "-") ? fopen(input->filename, "rb") : set_binary_mode(stdin); if (!input->file) fatal("Failed to open input file"); if (!fseeko(input->file, 0, SEEK_END)) { /* Input file is seekable. Figure out how long it is, so we can get * progress info. */ input->length = ftello(input->file); rewind(input->file); } /* Default to 1:1 pixel aspect ratio. */ input->pixel_aspect_ratio.numerator = 1; input->pixel_aspect_ratio.denominator = 1; /* For RAW input sources, these bytes will applied on the first frame * in read_frame(). */ input->detect.buf_read = fread(input->detect.buf, 1, 4, input->file); input->detect.position = 0; if (input->detect.buf_read == 4 && file_is_y4m(input->detect.buf)) { if (y4m_input_open(&input->y4m, input->file, input->detect.buf, 4, input->only_i420) >= 0) { input->file_type = FILE_TYPE_Y4M; input->width = input->y4m.pic_w; input->height = input->y4m.pic_h; input->pixel_aspect_ratio.numerator = input->y4m.par_n; input->pixel_aspect_ratio.denominator = input->y4m.par_d; input->framerate.numerator = input->y4m.fps_n; input->framerate.denominator = input->y4m.fps_d; input->fmt = input->y4m.vpx_fmt; input->bit_depth = input->y4m.bit_depth; } else { fatal("Unsupported Y4M stream."); } } else if (input->detect.buf_read == 4 && fourcc_is_ivf(input->detect.buf)) { fatal("IVF is not supported as input."); } else { input->file_type = FILE_TYPE_RAW; } } void close_input_file(struct VpxInputContext *input) { fclose(input->file); if (input->file_type == FILE_TYPE_Y4M) y4m_input_close(&input->y4m); } #endif // TODO(debargha): Consolidate the functions below into a separate file. #if CONFIG_VP9_HIGHBITDEPTH static void highbd_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift) { // Note the offset is 1 less than half. const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0; int plane; if (dst->d_w != src->d_w || dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift || dst->y_chroma_shift != src->y_chroma_shift || dst->fmt != src->fmt || input_shift < 0) { fatal("Unsupported image conversion"); } switch (src->fmt) { case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: case VPX_IMG_FMT_I44416: case VPX_IMG_FMT_I44016: break; default: fatal("Unsupported image conversion"); break; } for (plane = 0; plane < 3; plane++) { int w = src->d_w; int h = src->d_h; int x, y; if (plane) { w = (w + src->x_chroma_shift) >> src->x_chroma_shift; h = (h + src->y_chroma_shift) >> src->y_chroma_shift; } for (y = 0; y < h; y++) { uint16_t *p_src = (uint16_t *)(src->planes[plane] + y * src->stride[plane]); uint16_t *p_dst = (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]); for (x = 0; x < w; x++) *p_dst++ = (*p_src++ << input_shift) + offset; } } } static void lowbd_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift) { // Note the offset is 1 less than half. const int offset = input_shift > 0 ? (1 << (input_shift - 1)) - 1 : 0; int plane; if (dst->d_w != src->d_w || dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift || dst->y_chroma_shift != src->y_chroma_shift || dst->fmt != src->fmt + VPX_IMG_FMT_HIGHBITDEPTH || input_shift < 0) { fatal("Unsupported image conversion"); } switch (src->fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I440: break; default: fatal("Unsupported image conversion"); break; } for (plane = 0; plane < 3; plane++) { int w = src->d_w; int h = src->d_h; int x, y; if (plane) { w = (w + src->x_chroma_shift) >> src->x_chroma_shift; h = (h + src->y_chroma_shift) >> src->y_chroma_shift; } for (y = 0; y < h; y++) { uint8_t *p_src = src->planes[plane] + y * src->stride[plane]; uint16_t *p_dst = (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]); for (x = 0; x < w; x++) { *p_dst++ = (*p_src++ << input_shift) + offset; } } } } void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift) { if (src->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { highbd_img_upshift(dst, src, input_shift); } else { lowbd_img_upshift(dst, src, input_shift); } } void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src) { int plane; if (dst->fmt + VPX_IMG_FMT_HIGHBITDEPTH != src->fmt || dst->d_w != src->d_w || dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift || dst->y_chroma_shift != src->y_chroma_shift) { fatal("Unsupported image conversion"); } switch (dst->fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I440: break; default: fatal("Unsupported image conversion"); break; } for (plane = 0; plane < 3; plane++) { int w = src->d_w; int h = src->d_h; int x, y; if (plane) { w = (w + src->x_chroma_shift) >> src->x_chroma_shift; h = (h + src->y_chroma_shift) >> src->y_chroma_shift; } for (y = 0; y < h; y++) { uint16_t *p_src = (uint16_t *)(src->planes[plane] + y * src->stride[plane]); uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane]; for (x = 0; x < w; x++) { *p_dst++ = (uint8_t)(*p_src++); } } } } static void highbd_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift) { int plane; if (dst->d_w != src->d_w || dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift || dst->y_chroma_shift != src->y_chroma_shift || dst->fmt != src->fmt || down_shift < 0) { fatal("Unsupported image conversion"); } switch (src->fmt) { case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: case VPX_IMG_FMT_I44416: case VPX_IMG_FMT_I44016: break; default: fatal("Unsupported image conversion"); break; } for (plane = 0; plane < 3; plane++) { int w = src->d_w; int h = src->d_h; int x, y; if (plane) { w = (w + src->x_chroma_shift) >> src->x_chroma_shift; h = (h + src->y_chroma_shift) >> src->y_chroma_shift; } for (y = 0; y < h; y++) { uint16_t *p_src = (uint16_t *)(src->planes[plane] + y * src->stride[plane]); uint16_t *p_dst = (uint16_t *)(dst->planes[plane] + y * dst->stride[plane]); for (x = 0; x < w; x++) *p_dst++ = *p_src++ >> down_shift; } } } static void lowbd_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift) { int plane; if (dst->d_w != src->d_w || dst->d_h != src->d_h || dst->x_chroma_shift != src->x_chroma_shift || dst->y_chroma_shift != src->y_chroma_shift || src->fmt != dst->fmt + VPX_IMG_FMT_HIGHBITDEPTH || down_shift < 0) { fatal("Unsupported image conversion"); } switch (dst->fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I440: break; default: fatal("Unsupported image conversion"); break; } for (plane = 0; plane < 3; plane++) { int w = src->d_w; int h = src->d_h; int x, y; if (plane) { w = (w + src->x_chroma_shift) >> src->x_chroma_shift; h = (h + src->y_chroma_shift) >> src->y_chroma_shift; } for (y = 0; y < h; y++) { uint16_t *p_src = (uint16_t *)(src->planes[plane] + y * src->stride[plane]); uint8_t *p_dst = dst->planes[plane] + y * dst->stride[plane]; for (x = 0; x < w; x++) { *p_dst++ = *p_src++ >> down_shift; } } } } void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift) { if (dst->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { highbd_img_downshift(dst, src, down_shift); } else { lowbd_img_downshift(dst, src, down_shift); } } #endif // CONFIG_VP9_HIGHBITDEPTH int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2) { uint32_t l_w = img1->d_w; uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; const uint32_t c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; uint32_t i; int match = 1; match &= (img1->fmt == img2->fmt); match &= (img1->d_w == img2->d_w); match &= (img1->d_h == img2->d_h); #if CONFIG_VP9_HIGHBITDEPTH if (img1->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { l_w *= 2; c_w *= 2; } #endif for (i = 0; i < img1->d_h; ++i) match &= (memcmp(img1->planes[VPX_PLANE_Y] + i * img1->stride[VPX_PLANE_Y], img2->planes[VPX_PLANE_Y] + i * img2->stride[VPX_PLANE_Y], l_w) == 0); for (i = 0; i < c_h; ++i) match &= (memcmp(img1->planes[VPX_PLANE_U] + i * img1->stride[VPX_PLANE_U], img2->planes[VPX_PLANE_U] + i * img2->stride[VPX_PLANE_U], c_w) == 0); for (i = 0; i < c_h; ++i) match &= (memcmp(img1->planes[VPX_PLANE_V] + i * img1->stride[VPX_PLANE_V], img2->planes[VPX_PLANE_V] + i * img2->stride[VPX_PLANE_V], c_w) == 0); return match; } #define mmin(a, b) ((a) < (b) ? (a) : (b)) #if CONFIG_VP9_HIGHBITDEPTH void find_mismatch_high(const vpx_image_t *const img1, const vpx_image_t *const img2, int yloc[4], int uloc[4], int vloc[4]) { uint16_t *plane1, *plane2; uint32_t stride1, stride2; const uint32_t bsize = 64; const uint32_t bsizey = bsize >> img1->y_chroma_shift; const uint32_t bsizex = bsize >> img1->x_chroma_shift; const uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; const uint32_t c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; int match = 1; uint32_t i, j; yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; plane1 = (uint16_t *)img1->planes[VPX_PLANE_Y]; plane2 = (uint16_t *)img2->planes[VPX_PLANE_Y]; stride1 = img1->stride[VPX_PLANE_Y] / 2; stride2 = img2->stride[VPX_PLANE_Y] / 2; for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { for (j = 0; match && j < img1->d_w; j += bsize) { int k, l; const int si = mmin(i + bsize, img1->d_h) - i; const int sj = mmin(j + bsize, img1->d_w) - j; for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(plane1 + (i + k) * stride1 + j + l) != *(plane2 + (i + k) * stride2 + j + l)) { yloc[0] = i + k; yloc[1] = j + l; yloc[2] = *(plane1 + (i + k) * stride1 + j + l); yloc[3] = *(plane2 + (i + k) * stride2 + j + l); match = 0; break; } } } } } uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; plane1 = (uint16_t *)img1->planes[VPX_PLANE_U]; plane2 = (uint16_t *)img2->planes[VPX_PLANE_U]; stride1 = img1->stride[VPX_PLANE_U] / 2; stride2 = img2->stride[VPX_PLANE_U] / 2; for (i = 0, match = 1; match && i < c_h; i += bsizey) { for (j = 0; match && j < c_w; j += bsizex) { int k, l; const int si = mmin(i + bsizey, c_h - i); const int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(plane1 + (i + k) * stride1 + j + l) != *(plane2 + (i + k) * stride2 + j + l)) { uloc[0] = i + k; uloc[1] = j + l; uloc[2] = *(plane1 + (i + k) * stride1 + j + l); uloc[3] = *(plane2 + (i + k) * stride2 + j + l); match = 0; break; } } } } } vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; plane1 = (uint16_t *)img1->planes[VPX_PLANE_V]; plane2 = (uint16_t *)img2->planes[VPX_PLANE_V]; stride1 = img1->stride[VPX_PLANE_V] / 2; stride2 = img2->stride[VPX_PLANE_V] / 2; for (i = 0, match = 1; match && i < c_h; i += bsizey) { for (j = 0; match && j < c_w; j += bsizex) { int k, l; const int si = mmin(i + bsizey, c_h - i); const int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(plane1 + (i + k) * stride1 + j + l) != *(plane2 + (i + k) * stride2 + j + l)) { vloc[0] = i + k; vloc[1] = j + l; vloc[2] = *(plane1 + (i + k) * stride1 + j + l); vloc[3] = *(plane2 + (i + k) * stride2 + j + l); match = 0; break; } } } } } } #endif // CONFIG_VP9_HIGHBITDEPTH void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, int yloc[4], int uloc[4], int vloc[4]) { const uint32_t bsize = 64; const uint32_t bsizey = bsize >> img1->y_chroma_shift; const uint32_t bsizex = bsize >> img1->x_chroma_shift; const uint32_t c_w = (img1->d_w + img1->x_chroma_shift) >> img1->x_chroma_shift; const uint32_t c_h = (img1->d_h + img1->y_chroma_shift) >> img1->y_chroma_shift; int match = 1; uint32_t i, j; yloc[0] = yloc[1] = yloc[2] = yloc[3] = -1; for (i = 0, match = 1; match && i < img1->d_h; i += bsize) { for (j = 0; match && j < img1->d_w; j += bsize) { int k, l; const int si = mmin(i + bsize, img1->d_h) - i; const int sj = mmin(j + bsize, img1->d_w) - j; for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(img1->planes[VPX_PLANE_Y] + (i + k) * img1->stride[VPX_PLANE_Y] + j + l) != *(img2->planes[VPX_PLANE_Y] + (i + k) * img2->stride[VPX_PLANE_Y] + j + l)) { yloc[0] = i + k; yloc[1] = j + l; yloc[2] = *(img1->planes[VPX_PLANE_Y] + (i + k) * img1->stride[VPX_PLANE_Y] + j + l); yloc[3] = *(img2->planes[VPX_PLANE_Y] + (i + k) * img2->stride[VPX_PLANE_Y] + j + l); match = 0; break; } } } } } uloc[0] = uloc[1] = uloc[2] = uloc[3] = -1; for (i = 0, match = 1; match && i < c_h; i += bsizey) { for (j = 0; match && j < c_w; j += bsizex) { int k, l; const int si = mmin(i + bsizey, c_h - i); const int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(img1->planes[VPX_PLANE_U] + (i + k) * img1->stride[VPX_PLANE_U] + j + l) != *(img2->planes[VPX_PLANE_U] + (i + k) * img2->stride[VPX_PLANE_U] + j + l)) { uloc[0] = i + k; uloc[1] = j + l; uloc[2] = *(img1->planes[VPX_PLANE_U] + (i + k) * img1->stride[VPX_PLANE_U] + j + l); uloc[3] = *(img2->planes[VPX_PLANE_U] + (i + k) * img2->stride[VPX_PLANE_U] + j + l); match = 0; break; } } } } } vloc[0] = vloc[1] = vloc[2] = vloc[3] = -1; for (i = 0, match = 1; match && i < c_h; i += bsizey) { for (j = 0; match && j < c_w; j += bsizex) { int k, l; const int si = mmin(i + bsizey, c_h - i); const int sj = mmin(j + bsizex, c_w - j); for (k = 0; match && k < si; ++k) { for (l = 0; match && l < sj; ++l) { if (*(img1->planes[VPX_PLANE_V] + (i + k) * img1->stride[VPX_PLANE_V] + j + l) != *(img2->planes[VPX_PLANE_V] + (i + k) * img2->stride[VPX_PLANE_V] + j + l)) { vloc[0] = i + k; vloc[1] = j + l; vloc[2] = *(img1->planes[VPX_PLANE_V] + (i + k) * img1->stride[VPX_PLANE_V] + j + l); vloc[3] = *(img2->planes[VPX_PLANE_V] + (i + k) * img2->stride[VPX_PLANE_V] + j + l); match = 0; break; } } } } } } libvpx-1.8.2/tools_common.h000066400000000000000000000114701357355204000157160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_TOOLS_COMMON_H_ #define VPX_TOOLS_COMMON_H_ #include #include "./vpx_config.h" #include "vpx/vpx_codec.h" #include "vpx/vpx_image.h" #include "vpx/vpx_integer.h" #include "vpx_ports/msvc.h" #if CONFIG_ENCODERS #include "./y4minput.h" #endif #if defined(_MSC_VER) /* MSVS uses _f{seek,tell}i64. */ #define fseeko _fseeki64 #define ftello _ftelli64 typedef int64_t FileOffset; #elif defined(_WIN32) /* MinGW uses f{seek,tell}o64 for large files. */ #define fseeko fseeko64 #define ftello ftello64 typedef off64_t FileOffset; #elif CONFIG_OS_SUPPORT #include /* NOLINT */ typedef off_t FileOffset; /* Use 32-bit file operations in WebM file format when building ARM * executables (.axf) with RVCT. */ #else #define fseeko fseek #define ftello ftell typedef long FileOffset; /* NOLINT */ #endif /* CONFIG_OS_SUPPORT */ #if CONFIG_OS_SUPPORT #if defined(_MSC_VER) #include /* NOLINT */ #define isatty _isatty #define fileno _fileno #else #include /* NOLINT */ #endif /* _MSC_VER */ #endif /* CONFIG_OS_SUPPORT */ #define LITERALU64(hi, lo) ((((uint64_t)hi) << 32) | lo) #ifndef PATH_MAX #define PATH_MAX 512 #endif #define IVF_FRAME_HDR_SZ (4 + 8) /* 4 byte size + 8 byte timestamp */ #define IVF_FILE_HDR_SZ 32 #define RAW_FRAME_HDR_SZ sizeof(uint32_t) #define VP8_FOURCC 0x30385056 #define VP9_FOURCC 0x30395056 enum VideoFileType { FILE_TYPE_RAW, FILE_TYPE_IVF, FILE_TYPE_Y4M, FILE_TYPE_WEBM }; struct FileTypeDetectionBuffer { char buf[4]; size_t buf_read; size_t position; }; struct VpxRational { int numerator; int denominator; }; struct VpxInputContext { const char *filename; FILE *file; int64_t length; struct FileTypeDetectionBuffer detect; enum VideoFileType file_type; uint32_t width; uint32_t height; struct VpxRational pixel_aspect_ratio; vpx_img_fmt_t fmt; vpx_bit_depth_t bit_depth; int only_i420; uint32_t fourcc; struct VpxRational framerate; #if CONFIG_ENCODERS y4m_input y4m; #endif }; #ifdef __cplusplus extern "C" { #endif #if defined(__GNUC__) #define VPX_NO_RETURN __attribute__((noreturn)) #else #define VPX_NO_RETURN #endif /* Sets a stdio stream into binary mode */ FILE *set_binary_mode(FILE *stream); void die(const char *fmt, ...) VPX_NO_RETURN; void fatal(const char *fmt, ...) VPX_NO_RETURN; void warn(const char *fmt, ...); void die_codec(vpx_codec_ctx_t *ctx, const char *s) VPX_NO_RETURN; /* The tool including this file must define usage_exit() */ void usage_exit(void) VPX_NO_RETURN; #undef VPX_NO_RETURN int read_yuv_frame(struct VpxInputContext *input_ctx, vpx_image_t *yuv_frame); typedef struct VpxInterface { const char *const name; const uint32_t fourcc; vpx_codec_iface_t *(*const codec_interface)(); } VpxInterface; int get_vpx_encoder_count(void); const VpxInterface *get_vpx_encoder_by_index(int i); const VpxInterface *get_vpx_encoder_by_name(const char *name); int get_vpx_decoder_count(void); const VpxInterface *get_vpx_decoder_by_index(int i); const VpxInterface *get_vpx_decoder_by_name(const char *name); const VpxInterface *get_vpx_decoder_by_fourcc(uint32_t fourcc); int vpx_img_plane_width(const vpx_image_t *img, int plane); int vpx_img_plane_height(const vpx_image_t *img, int plane); void vpx_img_write(const vpx_image_t *img, FILE *file); int vpx_img_read(vpx_image_t *img, FILE *file); double sse_to_psnr(double samples, double peak, double mse); #if CONFIG_ENCODERS int read_frame(struct VpxInputContext *input_ctx, vpx_image_t *img); int file_is_y4m(const char detect[4]); int fourcc_is_ivf(const char detect[4]); void open_input_file(struct VpxInputContext *input); void close_input_file(struct VpxInputContext *input); #endif #if CONFIG_VP9_HIGHBITDEPTH void vpx_img_upshift(vpx_image_t *dst, vpx_image_t *src, int input_shift); void vpx_img_downshift(vpx_image_t *dst, vpx_image_t *src, int down_shift); void vpx_img_truncate_16_to_8(vpx_image_t *dst, vpx_image_t *src); #endif int compare_img(const vpx_image_t *const img1, const vpx_image_t *const img2); #if CONFIG_VP9_HIGHBITDEPTH void find_mismatch_high(const vpx_image_t *const img1, const vpx_image_t *const img2, int yloc[4], int uloc[4], int vloc[4]); #endif void find_mismatch(const vpx_image_t *const img1, const vpx_image_t *const img2, int yloc[4], int uloc[4], int vloc[4]); #ifdef __cplusplus } /* extern "C" */ #endif #endif // VPX_TOOLS_COMMON_H_ libvpx-1.8.2/usage.dox000066400000000000000000000151121357355204000146520ustar00rootroot00000000000000/*!\page usage Usage The vpx multi-format codec SDK provides a unified interface amongst its supported codecs. This abstraction allows applications using this SDK to easily support multiple video formats with minimal code duplication or "special casing." This section describes the interface common to all codecs. For codec-specific details, see the \ref codecs page. The following sections are common to all codecs: - \ref usage_types - \ref usage_features - \ref usage_init - \ref usage_errors For more information on decoder and encoder specific usage, see the following pages: \if decoder \li \subpage usage_decode \endif \if encoder \li \subpage usage_encode \endif \section usage_types Important Data Types There are two important data structures to consider in this interface. \subsection usage_ctxs Contexts A context is a storage area allocated by the calling application that the codec may write into to store details about a single instance of that codec. Most of the context is implementation specific, and thus opaque to the application. The context structure as seen by the application is of fixed size, and thus can be allocated with automatic storage or dynamically on the heap. Most operations require an initialized codec context. Codec context instances are codec specific. That is, the codec to be used for the encoded video must be known at initialization time. See #vpx_codec_ctx_t for further information. \subsection usage_ifaces Interfaces A codec interface is an opaque structure that controls how function calls into the generic interface are dispatched to their codec-specific implementations. Applications \ref MUSTNOT attempt to examine or override this storage, as it contains internal implementation details likely to change from release to release. Each supported codec will expose an interface structure to the application as an extern reference to a structure of the incomplete type #vpx_codec_iface_t. \section usage_features Features Several "features" are defined that are optionally implemented by codec algorithms. Indeed, the same algorithm may support different features on different platforms. The purpose of defining these features is that when they are implemented, they conform to a common interface. The features, or capabilities, of an algorithm can be queried from it's interface by using the vpx_codec_get_caps() method. Attempts to invoke features not supported by an algorithm will generally result in #VPX_CODEC_INCAPABLE. \if decoder Currently defined decoder features include: - \ref usage_cb - \ref usage_postproc \endif \section usage_init Initialization To initialize a codec instance, the address of the codec context and interface structures are passed to an initialization function. Depending on the \ref usage_features that the codec supports, the codec could be initialized in different modes. To prevent cases of confusion where the ABI of the library changes, the ABI is versioned. The ABI version number must be passed at initialization time to ensure the application is using a header file that matches the library. The current ABI version number is stored in the preprocessor macros #VPX_CODEC_ABI_VERSION, #VPX_ENCODER_ABI_VERSION, and #VPX_DECODER_ABI_VERSION. For convenience, each initialization function has a wrapper macro that inserts the correct version number. These macros are named like the initialization methods, but without the _ver suffix. The available initialization methods are: \if encoder \li #vpx_codec_enc_init (calls vpx_codec_enc_init_ver()) \li #vpx_codec_enc_init_multi (calls vpx_codec_enc_init_multi_ver()) \endif \if decoder \li #vpx_codec_dec_init (calls vpx_codec_dec_init_ver()) \endif \section usage_errors Error Handling Almost all codec functions return an error status of type #vpx_codec_err_t. The semantics of how each error condition should be processed is clearly defined in the definitions of each enumerated value. Error values can be converted into ASCII strings with the vpx_codec_error() and vpx_codec_err_to_string() methods. The difference between these two methods is that vpx_codec_error() returns the error state from an initialized context, whereas vpx_codec_err_to_string() can be used in cases where an error occurs outside any context. The enumerated value returned from the last call can be retrieved from the err member of the decoder context as well. Finally, more detailed error information may be able to be obtained by using the vpx_codec_error_detail() method. Not all errors produce detailed error information. In addition to error information, the codec library's build configuration is available at runtime on some platforms. This information can be returned by calling vpx_codec_build_config(), and is formatted as a base64 coded string (comprised of characters in the set [a-z_a-Z0-9+/]). This information is not useful to an application at runtime, but may be of use to vpx for support. \section usage_deadline Deadline Both the encoding and decoding functions have a deadline parameter. This parameter indicates the amount of time, in microseconds (us), that the application wants the codec to spend processing before returning. This is a soft deadline -- that is, the semantics of the requested operation take precedence over meeting the deadline. If, for example, an application sets a deadline of 1000us, and the frame takes 2000us to decode, the call to vpx_codec_decode() will return after 2000us. In this case the deadline is not met, but the semantics of the function are preserved. If, for the same frame, an application instead sets a deadline of 5000us, the decoder will see that it has 3000us remaining in its time slice when decoding completes. It could then choose to run a set of \ref usage_postproc filters, and perhaps would return after 4000us (instead of the allocated 5000us). In this case the deadline is met, and the semantics of the call are preserved, as before. The special value 0 is reserved to represent an infinite deadline. In this case, the codec will perform as much processing as possible to yield the highest quality frame. By convention, the value 1 is used to mean "return as fast as possible." */ libvpx-1.8.2/usage_cx.dox000066400000000000000000000007341357355204000153500ustar00rootroot00000000000000/*! \page usage_encode Encoding The vpx_codec_encode() function is at the core of the encode loop. It processes raw images passed by the application, producing packets of compressed data. The deadline parameter controls the amount of time in microseconds the encoder should spend working on the frame. For more information on the deadline parameter, see \ref usage_deadline. \if samples \ref samples \endif */ libvpx-1.8.2/usage_dx.dox000066400000000000000000000072201357355204000153460ustar00rootroot00000000000000/*! \page usage_decode Decoding The vpx_codec_decode() function is at the core of the decode loop. It processes packets of compressed data passed by the application, producing decoded images. The decoder expects packets to comprise exactly one image frame of data. Packets \ref MUST be passed in decode order. If the application wishes to associate some data with the frame, the user_priv member may be set. The deadline parameter controls the amount of time in microseconds the decoder should spend working on the frame. This is typically used to support adaptive \ref usage_postproc based on the amount of free CPU time. For more information on the deadline parameter, see \ref usage_deadline. \if samples \ref samples \endif \section usage_cb Callback Based Decoding There are two methods for the application to access decoded frame data. Some codecs support asynchronous (callback-based) decoding \ref usage_features that allow the application to register a callback to be invoked by the decoder when decoded data becomes available. Decoders are not required to support this feature, however. Like all \ref usage_features, support can be determined by calling vpx_codec_get_caps(). Callbacks are available in both frame-based and slice-based variants. Frame based callbacks conform to the signature of #vpx_codec_put_frame_cb_fn_t and are invoked once the entire frame has been decoded. Slice based callbacks conform to the signature of #vpx_codec_put_slice_cb_fn_t and are invoked after a subsection of the frame is decoded. For example, a slice callback could be issued for each macroblock row. However, the number and size of slices to return is implementation specific. Also, the image data passed in a slice callback is not necessarily in the same memory segment as the data will be when it is assembled into a full frame. For this reason, the application \ref MUST examine the rectangles that describe what data is valid to access and what data has been updated in this call. For all their additional complexity, slice based decoding callbacks provide substantial speed gains to the overall application in some cases, due to improved cache behavior. \section usage_frame_iter Frame Iterator Based Decoding If the codec does not support callback based decoding, or the application chooses not to make use of that feature, decoded frames are made available through the vpx_codec_get_frame() iterator. The application initializes the iterator storage (of type #vpx_codec_iter_t) to NULL, then calls vpx_codec_get_frame repeatedly until it returns NULL, indicating that all images have been returned. This process may result in zero, one, or many frames that are ready for display, depending on the codec. \section usage_postproc Postprocessing Postprocessing is a process that is applied after a frame is decoded to enhance the image's appearance by removing artifacts introduced in the compression process. It is not required to properly decode the frame, and is generally done only when there is enough spare CPU time to execute the required filters. Codecs may support a number of different postprocessing filters, and the available filters may differ from platform to platform. Embedded devices often do not have enough CPU to implement postprocessing in software. The filter selection is generally handled automatically by the codec, depending on the amount of time remaining before hitting the user-specified \ref usage_deadline after decoding the frame. */ libvpx-1.8.2/video_common.h000066400000000000000000000012231357355204000156570ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VIDEO_COMMON_H_ #define VPX_VIDEO_COMMON_H_ #include "./tools_common.h" typedef struct { uint32_t codec_fourcc; int frame_width; int frame_height; struct VpxRational time_base; } VpxVideoInfo; #endif // VPX_VIDEO_COMMON_H_ libvpx-1.8.2/video_reader.c000066400000000000000000000051321357355204000156270ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./ivfdec.h" #include "./video_reader.h" #include "vpx_ports/mem_ops.h" static const char *const kIVFSignature = "DKIF"; struct VpxVideoReaderStruct { VpxVideoInfo info; FILE *file; uint8_t *buffer; size_t buffer_size; size_t frame_size; }; VpxVideoReader *vpx_video_reader_open(const char *filename) { char header[32]; VpxVideoReader *reader = NULL; FILE *const file = fopen(filename, "rb"); if (!file) { fprintf(stderr, "%s can't be opened.\n", filename); // Can't open file return NULL; } if (fread(header, 1, 32, file) != 32) { fprintf(stderr, "File header on %s can't be read.\n", filename); // Can't read file header return NULL; } if (memcmp(kIVFSignature, header, 4) != 0) { fprintf(stderr, "The IVF signature on %s is wrong.\n", filename); // Wrong IVF signature return NULL; } if (mem_get_le16(header + 4) != 0) { fprintf(stderr, "%s uses the wrong IVF version.\n", filename); // Wrong IVF version return NULL; } reader = calloc(1, sizeof(*reader)); if (!reader) { fprintf( stderr, "Can't allocate VpxVideoReader\n"); // Can't allocate VpxVideoReader return NULL; } reader->file = file; reader->info.codec_fourcc = mem_get_le32(header + 8); reader->info.frame_width = mem_get_le16(header + 12); reader->info.frame_height = mem_get_le16(header + 14); reader->info.time_base.numerator = mem_get_le32(header + 16); reader->info.time_base.denominator = mem_get_le32(header + 20); return reader; } void vpx_video_reader_close(VpxVideoReader *reader) { if (reader) { fclose(reader->file); free(reader->buffer); free(reader); } } int vpx_video_reader_read_frame(VpxVideoReader *reader) { return !ivf_read_frame(reader->file, &reader->buffer, &reader->frame_size, &reader->buffer_size); } const uint8_t *vpx_video_reader_get_frame(VpxVideoReader *reader, size_t *size) { if (size) *size = reader->frame_size; return reader->buffer; } const VpxVideoInfo *vpx_video_reader_get_info(VpxVideoReader *reader) { return &reader->info; } libvpx-1.8.2/video_reader.h000066400000000000000000000035551357355204000156430ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VIDEO_READER_H_ #define VPX_VIDEO_READER_H_ #include "./video_common.h" // The following code is work in progress. It is going to support transparent // reading of input files. Right now only IVF format is supported for // simplicity. The main goal the API is to be simple and easy to use in example // code and in vpxenc/vpxdec later. All low-level details like memory // buffer management are hidden from API users. struct VpxVideoReaderStruct; typedef struct VpxVideoReaderStruct VpxVideoReader; #ifdef __cplusplus extern "C" { #endif // Opens the input file for reading and inspects it to determine file type. // Returns an opaque VpxVideoReader* upon success, or NULL upon failure. // Right now only IVF format is supported. VpxVideoReader *vpx_video_reader_open(const char *filename); // Frees all resources associated with VpxVideoReader* returned from // vpx_video_reader_open() call. void vpx_video_reader_close(VpxVideoReader *reader); // Reads frame from the file and stores it in internal buffer. int vpx_video_reader_read_frame(VpxVideoReader *reader); // Returns the pointer to memory buffer with frame data read by last call to // vpx_video_reader_read_frame(). const uint8_t *vpx_video_reader_get_frame(VpxVideoReader *reader, size_t *size); // Fills VpxVideoInfo with information from opened video file. const VpxVideoInfo *vpx_video_reader_get_info(VpxVideoReader *reader); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VIDEO_READER_H_ libvpx-1.8.2/video_writer.c000066400000000000000000000044131357355204000157020ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./ivfenc.h" #include "./video_writer.h" #include "vpx/vpx_encoder.h" struct VpxVideoWriterStruct { VpxVideoInfo info; FILE *file; int frame_count; }; static void write_header(FILE *file, const VpxVideoInfo *info, int frame_count) { struct vpx_codec_enc_cfg cfg; cfg.g_w = info->frame_width; cfg.g_h = info->frame_height; cfg.g_timebase.num = info->time_base.numerator; cfg.g_timebase.den = info->time_base.denominator; ivf_write_file_header(file, &cfg, info->codec_fourcc, frame_count); } VpxVideoWriter *vpx_video_writer_open(const char *filename, VpxContainer container, const VpxVideoInfo *info) { if (container == kContainerIVF) { VpxVideoWriter *writer = NULL; FILE *const file = fopen(filename, "wb"); if (!file) { fprintf(stderr, "%s can't be written to.\n", filename); return NULL; } writer = malloc(sizeof(*writer)); if (!writer) { fprintf(stderr, "Can't allocate VpxVideoWriter.\n"); return NULL; } writer->frame_count = 0; writer->info = *info; writer->file = file; write_header(writer->file, info, 0); return writer; } fprintf(stderr, "VpxVideoWriter supports only IVF.\n"); return NULL; } void vpx_video_writer_close(VpxVideoWriter *writer) { if (writer) { // Rewriting frame header with real frame count rewind(writer->file); write_header(writer->file, &writer->info, writer->frame_count); fclose(writer->file); free(writer); } } int vpx_video_writer_write_frame(VpxVideoWriter *writer, const uint8_t *buffer, size_t size, int64_t pts) { ivf_write_frame_header(writer->file, pts, size); if (fwrite(buffer, 1, size, writer->file) != size) return 0; ++writer->frame_count; return 1; } libvpx-1.8.2/video_writer.h000066400000000000000000000026331357355204000157110ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VIDEO_WRITER_H_ #define VPX_VIDEO_WRITER_H_ #include "./video_common.h" typedef enum { kContainerIVF } VpxContainer; struct VpxVideoWriterStruct; typedef struct VpxVideoWriterStruct VpxVideoWriter; #ifdef __cplusplus extern "C" { #endif // Finds and opens writer for specified container format. // Returns an opaque VpxVideoWriter* upon success, or NULL upon failure. // Right now only IVF format is supported. VpxVideoWriter *vpx_video_writer_open(const char *filename, VpxContainer container, const VpxVideoInfo *info); // Frees all resources associated with VpxVideoWriter* returned from // vpx_video_writer_open() call. void vpx_video_writer_close(VpxVideoWriter *writer); // Writes frame bytes to the file. int vpx_video_writer_write_frame(VpxVideoWriter *writer, const uint8_t *buffer, size_t size, int64_t pts); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VIDEO_WRITER_H_ libvpx-1.8.2/vp8/000077500000000000000000000000001357355204000135475ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/000077500000000000000000000000001357355204000150375ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/alloccommon.c000066400000000000000000000121661357355204000175140ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "alloccommon.h" #include "blockd.h" #include "vpx_mem/vpx_mem.h" #include "onyxc_int.h" #include "findnearmv.h" #include "entropymode.h" #include "systemdependent.h" void vp8_de_alloc_frame_buffers(VP8_COMMON *oci) { int i; for (i = 0; i < NUM_YV12_BUFFERS; ++i) { vp8_yv12_de_alloc_frame_buffer(&oci->yv12_fb[i]); } vp8_yv12_de_alloc_frame_buffer(&oci->temp_scale_frame); #if CONFIG_POSTPROC vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer); if (oci->post_proc_buffer_int_used) { vp8_yv12_de_alloc_frame_buffer(&oci->post_proc_buffer_int); } vpx_free(oci->pp_limits_buffer); oci->pp_limits_buffer = NULL; vpx_free(oci->postproc_state.generated_noise); oci->postproc_state.generated_noise = NULL; #endif vpx_free(oci->above_context); vpx_free(oci->mip); #if CONFIG_ERROR_CONCEALMENT vpx_free(oci->prev_mip); oci->prev_mip = NULL; #endif oci->above_context = NULL; oci->mip = NULL; } int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height) { int i; vp8_de_alloc_frame_buffers(oci); /* our internal buffers are always multiples of 16 */ if ((width & 0xf) != 0) width += 16 - (width & 0xf); if ((height & 0xf) != 0) height += 16 - (height & 0xf); for (i = 0; i < NUM_YV12_BUFFERS; ++i) { oci->fb_idx_ref_cnt[i] = 0; oci->yv12_fb[i].flags = 0; if (vp8_yv12_alloc_frame_buffer(&oci->yv12_fb[i], width, height, VP8BORDERINPIXELS) < 0) { goto allocation_fail; } } oci->new_fb_idx = 0; oci->lst_fb_idx = 1; oci->gld_fb_idx = 2; oci->alt_fb_idx = 3; oci->fb_idx_ref_cnt[0] = 1; oci->fb_idx_ref_cnt[1] = 1; oci->fb_idx_ref_cnt[2] = 1; oci->fb_idx_ref_cnt[3] = 1; if (vp8_yv12_alloc_frame_buffer(&oci->temp_scale_frame, width, 16, VP8BORDERINPIXELS) < 0) { goto allocation_fail; } oci->mb_rows = height >> 4; oci->mb_cols = width >> 4; oci->MBs = oci->mb_rows * oci->mb_cols; oci->mode_info_stride = oci->mb_cols + 1; oci->mip = vpx_calloc((oci->mb_cols + 1) * (oci->mb_rows + 1), sizeof(MODE_INFO)); if (!oci->mip) goto allocation_fail; oci->mi = oci->mip + oci->mode_info_stride + 1; /* Allocation of previous mode info will be done in vp8_decode_frame() * as it is a decoder only data */ oci->above_context = vpx_calloc(sizeof(ENTROPY_CONTEXT_PLANES) * oci->mb_cols, 1); if (!oci->above_context) goto allocation_fail; #if CONFIG_POSTPROC if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer, width, height, VP8BORDERINPIXELS) < 0) { goto allocation_fail; } oci->post_proc_buffer_int_used = 0; memset(&oci->postproc_state, 0, sizeof(oci->postproc_state)); memset(oci->post_proc_buffer.buffer_alloc, 128, oci->post_proc_buffer.frame_size); /* Allocate buffer to store post-processing filter coefficients. * * Note: Round up mb_cols to support SIMD reads */ oci->pp_limits_buffer = vpx_memalign(16, 24 * ((oci->mb_cols + 1) & ~1)); if (!oci->pp_limits_buffer) goto allocation_fail; #endif return 0; allocation_fail: vp8_de_alloc_frame_buffers(oci); return 1; } void vp8_setup_version(VP8_COMMON *cm) { switch (cm->version) { case 0: cm->no_lpf = 0; cm->filter_type = NORMAL_LOOPFILTER; cm->use_bilinear_mc_filter = 0; cm->full_pixel = 0; break; case 1: cm->no_lpf = 0; cm->filter_type = SIMPLE_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 0; break; case 2: cm->no_lpf = 1; cm->filter_type = NORMAL_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 0; break; case 3: cm->no_lpf = 1; cm->filter_type = SIMPLE_LOOPFILTER; cm->use_bilinear_mc_filter = 1; cm->full_pixel = 1; break; default: /*4,5,6,7 are reserved for future use*/ cm->no_lpf = 0; cm->filter_type = NORMAL_LOOPFILTER; cm->use_bilinear_mc_filter = 0; cm->full_pixel = 0; break; } } void vp8_create_common(VP8_COMMON *oci) { vp8_machine_specific_config(oci); vp8_init_mbmode_probs(oci); vp8_default_bmode_probs(oci->fc.bmode_prob); oci->mb_no_coeff_skip = 1; oci->no_lpf = 0; oci->filter_type = NORMAL_LOOPFILTER; oci->use_bilinear_mc_filter = 0; oci->full_pixel = 0; oci->multi_token_partition = ONE_PARTITION; oci->clamp_type = RECON_CLAMP_REQUIRED; /* Initialize reference frame sign bias structure to defaults */ memset(oci->ref_frame_sign_bias, 0, sizeof(oci->ref_frame_sign_bias)); /* Default disable buffer to buffer copying */ oci->copy_buffer_to_gf = 0; oci->copy_buffer_to_arf = 0; } void vp8_remove_common(VP8_COMMON *oci) { vp8_de_alloc_frame_buffers(oci); } libvpx-1.8.2/vp8/common/alloccommon.h000066400000000000000000000015601357355204000175150ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ALLOCCOMMON_H_ #define VPX_VP8_COMMON_ALLOCCOMMON_H_ #include "onyxc_int.h" #ifdef __cplusplus extern "C" { #endif void vp8_create_common(VP8_COMMON *oci); void vp8_remove_common(VP8_COMMON *oci); void vp8_de_alloc_frame_buffers(VP8_COMMON *oci); int vp8_alloc_frame_buffers(VP8_COMMON *oci, int width, int height); void vp8_setup_version(VP8_COMMON *cm); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_ALLOCCOMMON_H_ libvpx-1.8.2/vp8/common/arm/000077500000000000000000000000001357355204000156165ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/arm/loopfilter_arm.c000066400000000000000000000067751357355204000210170ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "vp8/common/arm/loopfilter_arm.h" #include "vp8/common/loopfilter.h" #include "vp8/common/onyxc_int.h" /* NEON loopfilter functions */ /* Horizontal MB filtering */ void vp8_loop_filter_mbh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned char mblim = *lfi->mblim; unsigned char lim = *lfi->lim; unsigned char hev_thr = *lfi->hev_thr; vp8_mbloop_filter_horizontal_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); if (u_ptr) vp8_mbloop_filter_horizontal_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned char mblim = *lfi->mblim; unsigned char lim = *lfi->lim; unsigned char hev_thr = *lfi->hev_thr; vp8_mbloop_filter_vertical_edge_y_neon(y_ptr, y_stride, mblim, lim, hev_thr); if (u_ptr) vp8_mbloop_filter_vertical_edge_uv_neon(u_ptr, uv_stride, mblim, lim, hev_thr, v_ptr); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned char blim = *lfi->blim; unsigned char lim = *lfi->lim; unsigned char hev_thr = *lfi->hev_thr; vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 4 * y_stride, y_stride, blim, lim, hev_thr); vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 8 * y_stride, y_stride, blim, lim, hev_thr); vp8_loop_filter_horizontal_edge_y_neon(y_ptr + 12 * y_stride, y_stride, blim, lim, hev_thr); if (u_ptr) vp8_loop_filter_horizontal_edge_uv_neon(u_ptr + 4 * uv_stride, uv_stride, blim, lim, hev_thr, v_ptr + 4 * uv_stride); } /* Vertical B Filtering */ void vp8_loop_filter_bv_neon(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned char blim = *lfi->blim; unsigned char lim = *lfi->lim; unsigned char hev_thr = *lfi->hev_thr; vp8_loop_filter_vertical_edge_y_neon(y_ptr + 4, y_stride, blim, lim, hev_thr); vp8_loop_filter_vertical_edge_y_neon(y_ptr + 8, y_stride, blim, lim, hev_thr); vp8_loop_filter_vertical_edge_y_neon(y_ptr + 12, y_stride, blim, lim, hev_thr); if (u_ptr) vp8_loop_filter_vertical_edge_uv_neon(u_ptr + 4, uv_stride, blim, lim, hev_thr, v_ptr + 4); } libvpx-1.8.2/vp8/common/arm/loopfilter_arm.h000066400000000000000000000026211357355204000210060ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ #define VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ typedef void loopfilter_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh); typedef void loopfilter_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v); loopfilter_y_neon vp8_loop_filter_horizontal_edge_y_neon; loopfilter_y_neon vp8_loop_filter_vertical_edge_y_neon; loopfilter_uv_neon vp8_loop_filter_horizontal_edge_uv_neon; loopfilter_uv_neon vp8_loop_filter_vertical_edge_uv_neon; loopfilter_y_neon vp8_mbloop_filter_horizontal_edge_y_neon; loopfilter_y_neon vp8_mbloop_filter_vertical_edge_y_neon; loopfilter_uv_neon vp8_mbloop_filter_horizontal_edge_uv_neon; loopfilter_uv_neon vp8_mbloop_filter_vertical_edge_uv_neon; #endif // VPX_VP8_COMMON_ARM_LOOPFILTER_ARM_H_ libvpx-1.8.2/vp8/common/arm/neon/000077500000000000000000000000001357355204000165555ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/arm/neon/bilinearpredict_neon.c000066400000000000000000000614471357355204000231140ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" static const uint8_t bifilter4_coeff[8][2] = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 } }; static INLINE uint8x8_t load_and_shift(const unsigned char *a) { return vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(vld1_u8(a)), 32)); } void vp8_bilinear_predict4x4_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { uint8x8_t e0, e1, e2; if (xoffset == 0) { // skip_1stpass_filter uint8x8_t a0, a1, a2, a3, a4; a0 = load_and_shift(src_ptr); src_ptr += src_pixels_per_line; a1 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a2 = load_and_shift(src_ptr); src_ptr += src_pixels_per_line; a3 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a4 = vld1_u8(src_ptr); e0 = vext_u8(a0, a1, 4); e1 = vext_u8(a2, a3, 4); e2 = a4; } else { uint8x8_t a0, a1, a2, a3, a4, b4; uint8x16_t a01, a23; uint8x16_t b01, b23; uint32x2x2_t c0, c1, c2, c3; uint16x8_t d0, d1, d2; const uint8x8_t filter0 = vdup_n_u8(bifilter4_coeff[xoffset][0]); const uint8x8_t filter1 = vdup_n_u8(bifilter4_coeff[xoffset][1]); a0 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a1 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a2 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a3 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; a4 = vld1_u8(src_ptr); a01 = vcombine_u8(a0, a1); a23 = vcombine_u8(a2, a3); b01 = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(a01), 8)); b23 = vreinterpretq_u8_u64(vshrq_n_u64(vreinterpretq_u64_u8(a23), 8)); b4 = vreinterpret_u8_u64(vshr_n_u64(vreinterpret_u64_u8(a4), 8)); c0 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a01)), vreinterpret_u32_u8(vget_high_u8(a01))); c1 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a23)), vreinterpret_u32_u8(vget_high_u8(a23))); c2 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b01)), vreinterpret_u32_u8(vget_high_u8(b01))); c3 = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b23)), vreinterpret_u32_u8(vget_high_u8(b23))); d0 = vmull_u8(vreinterpret_u8_u32(c0.val[0]), filter0); d1 = vmull_u8(vreinterpret_u8_u32(c1.val[0]), filter0); d2 = vmull_u8(a4, filter0); d0 = vmlal_u8(d0, vreinterpret_u8_u32(c2.val[0]), filter1); d1 = vmlal_u8(d1, vreinterpret_u8_u32(c3.val[0]), filter1); d2 = vmlal_u8(d2, b4, filter1); e0 = vqrshrn_n_u16(d0, 7); e1 = vqrshrn_n_u16(d1, 7); e2 = vqrshrn_n_u16(d2, 7); } // secondpass_filter if (yoffset == 0) { // skip_2ndpass_filter store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(e0, e1)); } else { uint8x8_t f0, f1; const uint8x8_t filter0 = vdup_n_u8(bifilter4_coeff[yoffset][0]); const uint8x8_t filter1 = vdup_n_u8(bifilter4_coeff[yoffset][1]); uint16x8_t b0 = vmull_u8(e0, filter0); uint16x8_t b1 = vmull_u8(e1, filter0); const uint8x8_t a0 = vext_u8(e0, e1, 4); const uint8x8_t a1 = vext_u8(e1, e2, 4); b0 = vmlal_u8(b0, a0, filter1); b1 = vmlal_u8(b1, a1, filter1); f0 = vqrshrn_n_u16(b0, 7); f1 = vqrshrn_n_u16(b1, 7); store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(f0, f1)); } } void vp8_bilinear_predict8x4_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8; uint8x8_t d7u8, d9u8, d11u8, d22u8, d23u8, d24u8, d25u8, d26u8; uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; uint16x8_t q1u16, q2u16, q3u16, q4u16; uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; if (xoffset == 0) { // skip_1stpass_filter d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d26u8 = vld1_u8(src_ptr); } else { q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q5u8 = vld1q_u8(src_ptr); d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); q6u16 = vmlal_u8(q6u16, d3u8, d1u8); q7u16 = vmlal_u8(q7u16, d5u8, d1u8); q8u16 = vmlal_u8(q8u16, d7u8, d1u8); q9u16 = vmlal_u8(q9u16, d9u8, d1u8); q10u16 = vmlal_u8(q10u16, d11u8, d1u8); d22u8 = vqrshrn_n_u16(q6u16, 7); d23u8 = vqrshrn_n_u16(q7u16, 7); d24u8 = vqrshrn_n_u16(q8u16, 7); d25u8 = vqrshrn_n_u16(q9u16, 7); d26u8 = vqrshrn_n_u16(q10u16, 7); } // secondpass_filter if (yoffset == 0) { // skip_2ndpass_filter vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d25u8); } else { d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); q1u16 = vmull_u8(d22u8, d0u8); q2u16 = vmull_u8(d23u8, d0u8); q3u16 = vmull_u8(d24u8, d0u8); q4u16 = vmull_u8(d25u8, d0u8); q1u16 = vmlal_u8(q1u16, d23u8, d1u8); q2u16 = vmlal_u8(q2u16, d24u8, d1u8); q3u16 = vmlal_u8(q3u16, d25u8, d1u8); q4u16 = vmlal_u8(q4u16, d26u8, d1u8); d2u8 = vqrshrn_n_u16(q1u16, 7); d3u8 = vqrshrn_n_u16(q2u16, 7); d4u8 = vqrshrn_n_u16(q3u16, 7); d5u8 = vqrshrn_n_u16(q4u16, 7); vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d5u8); } return; } void vp8_bilinear_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8, d11u8; uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8, d27u8, d28u8, d29u8, d30u8; uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8; uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16; uint16x8_t q6u16, q7u16, q8u16, q9u16, q10u16; if (xoffset == 0) { // skip_1stpass_filter d22u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d23u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d24u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d25u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d26u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d27u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d28u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d29u8 = vld1_u8(src_ptr); src_ptr += src_pixels_per_line; d30u8 = vld1_u8(src_ptr); } else { q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); q6u16 = vmlal_u8(q6u16, d3u8, d1u8); q7u16 = vmlal_u8(q7u16, d5u8, d1u8); q8u16 = vmlal_u8(q8u16, d7u8, d1u8); q9u16 = vmlal_u8(q9u16, d9u8, d1u8); d22u8 = vqrshrn_n_u16(q6u16, 7); d23u8 = vqrshrn_n_u16(q7u16, 7); d24u8 = vqrshrn_n_u16(q8u16, 7); d25u8 = vqrshrn_n_u16(q9u16, 7); // first_pass filtering on the rest 5-line data q1u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q2u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q3u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q4u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q5u8 = vld1q_u8(src_ptr); q6u16 = vmull_u8(vget_low_u8(q1u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q2u8), d0u8); q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); d3u8 = vext_u8(vget_low_u8(q1u8), vget_high_u8(q1u8), 1); d5u8 = vext_u8(vget_low_u8(q2u8), vget_high_u8(q2u8), 1); d7u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d9u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d11u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); q6u16 = vmlal_u8(q6u16, d3u8, d1u8); q7u16 = vmlal_u8(q7u16, d5u8, d1u8); q8u16 = vmlal_u8(q8u16, d7u8, d1u8); q9u16 = vmlal_u8(q9u16, d9u8, d1u8); q10u16 = vmlal_u8(q10u16, d11u8, d1u8); d26u8 = vqrshrn_n_u16(q6u16, 7); d27u8 = vqrshrn_n_u16(q7u16, 7); d28u8 = vqrshrn_n_u16(q8u16, 7); d29u8 = vqrshrn_n_u16(q9u16, 7); d30u8 = vqrshrn_n_u16(q10u16, 7); } // secondpass_filter if (yoffset == 0) { // skip_2ndpass_filter vst1_u8((uint8_t *)dst_ptr, d22u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d23u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d24u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d25u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d26u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d27u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d28u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d29u8); } else { d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); q1u16 = vmull_u8(d22u8, d0u8); q2u16 = vmull_u8(d23u8, d0u8); q3u16 = vmull_u8(d24u8, d0u8); q4u16 = vmull_u8(d25u8, d0u8); q5u16 = vmull_u8(d26u8, d0u8); q6u16 = vmull_u8(d27u8, d0u8); q7u16 = vmull_u8(d28u8, d0u8); q8u16 = vmull_u8(d29u8, d0u8); q1u16 = vmlal_u8(q1u16, d23u8, d1u8); q2u16 = vmlal_u8(q2u16, d24u8, d1u8); q3u16 = vmlal_u8(q3u16, d25u8, d1u8); q4u16 = vmlal_u8(q4u16, d26u8, d1u8); q5u16 = vmlal_u8(q5u16, d27u8, d1u8); q6u16 = vmlal_u8(q6u16, d28u8, d1u8); q7u16 = vmlal_u8(q7u16, d29u8, d1u8); q8u16 = vmlal_u8(q8u16, d30u8, d1u8); d2u8 = vqrshrn_n_u16(q1u16, 7); d3u8 = vqrshrn_n_u16(q2u16, 7); d4u8 = vqrshrn_n_u16(q3u16, 7); d5u8 = vqrshrn_n_u16(q4u16, 7); d6u8 = vqrshrn_n_u16(q5u16, 7); d7u8 = vqrshrn_n_u16(q6u16, 7); d8u8 = vqrshrn_n_u16(q7u16, 7); d9u8 = vqrshrn_n_u16(q8u16, 7); vst1_u8((uint8_t *)dst_ptr, d2u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d3u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d4u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d5u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d6u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d7u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8((uint8_t *)dst_ptr, d9u8); } return; } void vp8_bilinear_predict16x16_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { int i; unsigned char tmp[272]; unsigned char *tmpp; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d16u8, d17u8, d18u8; uint8x8_t d19u8, d20u8, d21u8; uint8x16_t q1u8, q2u8, q3u8, q4u8, q5u8, q6u8, q7u8, q8u8, q9u8, q10u8; uint8x16_t q11u8, q12u8, q13u8, q14u8, q15u8; uint16x8_t q1u16, q2u16, q3u16, q4u16, q5u16, q6u16, q7u16, q8u16; uint16x8_t q9u16, q10u16, q11u16, q12u16, q13u16, q14u16; if (xoffset == 0) { // secondpass_bfilter16x16_only d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); q11u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; for (i = 4; i > 0; i--) { q12u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q13u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q14u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q15u8 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); d2u8 = vqrshrn_n_u16(q1u16, 7); d3u8 = vqrshrn_n_u16(q2u16, 7); d4u8 = vqrshrn_n_u16(q3u16, 7); d5u8 = vqrshrn_n_u16(q4u16, 7); d6u8 = vqrshrn_n_u16(q5u16, 7); d7u8 = vqrshrn_n_u16(q6u16, 7); d8u8 = vqrshrn_n_u16(q7u16, 7); d9u8 = vqrshrn_n_u16(q8u16, 7); q1u8 = vcombine_u8(d2u8, d3u8); q2u8 = vcombine_u8(d4u8, d5u8); q3u8 = vcombine_u8(d6u8, d7u8); q4u8 = vcombine_u8(d8u8, d9u8); q11u8 = q15u8; vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; } return; } if (yoffset == 0) { // firstpass_bfilter16x16_only d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); for (i = 4; i > 0; i--) { d2u8 = vld1_u8(src_ptr); d3u8 = vld1_u8(src_ptr + 8); d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d5u8 = vld1_u8(src_ptr); d6u8 = vld1_u8(src_ptr + 8); d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d8u8 = vld1_u8(src_ptr); d9u8 = vld1_u8(src_ptr + 8); d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d11u8 = vld1_u8(src_ptr); d12u8 = vld1_u8(src_ptr + 8); d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; q7u16 = vmull_u8(d2u8, d0u8); q8u16 = vmull_u8(d3u8, d0u8); q9u16 = vmull_u8(d5u8, d0u8); q10u16 = vmull_u8(d6u8, d0u8); q11u16 = vmull_u8(d8u8, d0u8); q12u16 = vmull_u8(d9u8, d0u8); q13u16 = vmull_u8(d11u8, d0u8); q14u16 = vmull_u8(d12u8, d0u8); d2u8 = vext_u8(d2u8, d3u8, 1); d5u8 = vext_u8(d5u8, d6u8, 1); d8u8 = vext_u8(d8u8, d9u8, 1); d11u8 = vext_u8(d11u8, d12u8, 1); q7u16 = vmlal_u8(q7u16, d2u8, d1u8); q9u16 = vmlal_u8(q9u16, d5u8, d1u8); q11u16 = vmlal_u8(q11u16, d8u8, d1u8); q13u16 = vmlal_u8(q13u16, d11u8, d1u8); d3u8 = vext_u8(d3u8, d4u8, 1); d6u8 = vext_u8(d6u8, d7u8, 1); d9u8 = vext_u8(d9u8, d10u8, 1); d12u8 = vext_u8(d12u8, d13u8, 1); q8u16 = vmlal_u8(q8u16, d3u8, d1u8); q10u16 = vmlal_u8(q10u16, d6u8, d1u8); q12u16 = vmlal_u8(q12u16, d9u8, d1u8); q14u16 = vmlal_u8(q14u16, d12u8, d1u8); d14u8 = vqrshrn_n_u16(q7u16, 7); d15u8 = vqrshrn_n_u16(q8u16, 7); d16u8 = vqrshrn_n_u16(q9u16, 7); d17u8 = vqrshrn_n_u16(q10u16, 7); d18u8 = vqrshrn_n_u16(q11u16, 7); d19u8 = vqrshrn_n_u16(q12u16, 7); d20u8 = vqrshrn_n_u16(q13u16, 7); d21u8 = vqrshrn_n_u16(q14u16, 7); q7u8 = vcombine_u8(d14u8, d15u8); q8u8 = vcombine_u8(d16u8, d17u8); q9u8 = vcombine_u8(d18u8, d19u8); q10u8 = vcombine_u8(d20u8, d21u8); vst1q_u8((uint8_t *)dst_ptr, q7u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q8u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q9u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q10u8); dst_ptr += dst_pitch; } return; } d0u8 = vdup_n_u8(bifilter4_coeff[xoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[xoffset][1]); d2u8 = vld1_u8(src_ptr); d3u8 = vld1_u8(src_ptr + 8); d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d5u8 = vld1_u8(src_ptr); d6u8 = vld1_u8(src_ptr + 8); d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d8u8 = vld1_u8(src_ptr); d9u8 = vld1_u8(src_ptr + 8); d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d11u8 = vld1_u8(src_ptr); d12u8 = vld1_u8(src_ptr + 8); d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; // First Pass: output_height lines x output_width columns (17x16) tmpp = tmp; for (i = 3; i > 0; i--) { q7u16 = vmull_u8(d2u8, d0u8); q8u16 = vmull_u8(d3u8, d0u8); q9u16 = vmull_u8(d5u8, d0u8); q10u16 = vmull_u8(d6u8, d0u8); q11u16 = vmull_u8(d8u8, d0u8); q12u16 = vmull_u8(d9u8, d0u8); q13u16 = vmull_u8(d11u8, d0u8); q14u16 = vmull_u8(d12u8, d0u8); d2u8 = vext_u8(d2u8, d3u8, 1); d5u8 = vext_u8(d5u8, d6u8, 1); d8u8 = vext_u8(d8u8, d9u8, 1); d11u8 = vext_u8(d11u8, d12u8, 1); q7u16 = vmlal_u8(q7u16, d2u8, d1u8); q9u16 = vmlal_u8(q9u16, d5u8, d1u8); q11u16 = vmlal_u8(q11u16, d8u8, d1u8); q13u16 = vmlal_u8(q13u16, d11u8, d1u8); d3u8 = vext_u8(d3u8, d4u8, 1); d6u8 = vext_u8(d6u8, d7u8, 1); d9u8 = vext_u8(d9u8, d10u8, 1); d12u8 = vext_u8(d12u8, d13u8, 1); q8u16 = vmlal_u8(q8u16, d3u8, d1u8); q10u16 = vmlal_u8(q10u16, d6u8, d1u8); q12u16 = vmlal_u8(q12u16, d9u8, d1u8); q14u16 = vmlal_u8(q14u16, d12u8, d1u8); d14u8 = vqrshrn_n_u16(q7u16, 7); d15u8 = vqrshrn_n_u16(q8u16, 7); d16u8 = vqrshrn_n_u16(q9u16, 7); d17u8 = vqrshrn_n_u16(q10u16, 7); d18u8 = vqrshrn_n_u16(q11u16, 7); d19u8 = vqrshrn_n_u16(q12u16, 7); d20u8 = vqrshrn_n_u16(q13u16, 7); d21u8 = vqrshrn_n_u16(q14u16, 7); d2u8 = vld1_u8(src_ptr); d3u8 = vld1_u8(src_ptr + 8); d4u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d5u8 = vld1_u8(src_ptr); d6u8 = vld1_u8(src_ptr + 8); d7u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d8u8 = vld1_u8(src_ptr); d9u8 = vld1_u8(src_ptr + 8); d10u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; d11u8 = vld1_u8(src_ptr); d12u8 = vld1_u8(src_ptr + 8); d13u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; q7u8 = vcombine_u8(d14u8, d15u8); q8u8 = vcombine_u8(d16u8, d17u8); q9u8 = vcombine_u8(d18u8, d19u8); q10u8 = vcombine_u8(d20u8, d21u8); vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q9u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q10u8); tmpp += 16; } // First-pass filtering for rest 5 lines d14u8 = vld1_u8(src_ptr); d15u8 = vld1_u8(src_ptr + 8); d16u8 = vld1_u8(src_ptr + 16); src_ptr += src_pixels_per_line; q9u16 = vmull_u8(d2u8, d0u8); q10u16 = vmull_u8(d3u8, d0u8); q11u16 = vmull_u8(d5u8, d0u8); q12u16 = vmull_u8(d6u8, d0u8); q13u16 = vmull_u8(d8u8, d0u8); q14u16 = vmull_u8(d9u8, d0u8); d2u8 = vext_u8(d2u8, d3u8, 1); d5u8 = vext_u8(d5u8, d6u8, 1); d8u8 = vext_u8(d8u8, d9u8, 1); q9u16 = vmlal_u8(q9u16, d2u8, d1u8); q11u16 = vmlal_u8(q11u16, d5u8, d1u8); q13u16 = vmlal_u8(q13u16, d8u8, d1u8); d3u8 = vext_u8(d3u8, d4u8, 1); d6u8 = vext_u8(d6u8, d7u8, 1); d9u8 = vext_u8(d9u8, d10u8, 1); q10u16 = vmlal_u8(q10u16, d3u8, d1u8); q12u16 = vmlal_u8(q12u16, d6u8, d1u8); q14u16 = vmlal_u8(q14u16, d9u8, d1u8); q1u16 = vmull_u8(d11u8, d0u8); q2u16 = vmull_u8(d12u8, d0u8); q3u16 = vmull_u8(d14u8, d0u8); q4u16 = vmull_u8(d15u8, d0u8); d11u8 = vext_u8(d11u8, d12u8, 1); d14u8 = vext_u8(d14u8, d15u8, 1); q1u16 = vmlal_u8(q1u16, d11u8, d1u8); q3u16 = vmlal_u8(q3u16, d14u8, d1u8); d12u8 = vext_u8(d12u8, d13u8, 1); d15u8 = vext_u8(d15u8, d16u8, 1); q2u16 = vmlal_u8(q2u16, d12u8, d1u8); q4u16 = vmlal_u8(q4u16, d15u8, d1u8); d10u8 = vqrshrn_n_u16(q9u16, 7); d11u8 = vqrshrn_n_u16(q10u16, 7); d12u8 = vqrshrn_n_u16(q11u16, 7); d13u8 = vqrshrn_n_u16(q12u16, 7); d14u8 = vqrshrn_n_u16(q13u16, 7); d15u8 = vqrshrn_n_u16(q14u16, 7); d16u8 = vqrshrn_n_u16(q1u16, 7); d17u8 = vqrshrn_n_u16(q2u16, 7); d18u8 = vqrshrn_n_u16(q3u16, 7); d19u8 = vqrshrn_n_u16(q4u16, 7); q5u8 = vcombine_u8(d10u8, d11u8); q6u8 = vcombine_u8(d12u8, d13u8); q7u8 = vcombine_u8(d14u8, d15u8); q8u8 = vcombine_u8(d16u8, d17u8); q9u8 = vcombine_u8(d18u8, d19u8); vst1q_u8((uint8_t *)tmpp, q5u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q6u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q7u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q8u8); tmpp += 16; vst1q_u8((uint8_t *)tmpp, q9u8); // secondpass_filter d0u8 = vdup_n_u8(bifilter4_coeff[yoffset][0]); d1u8 = vdup_n_u8(bifilter4_coeff[yoffset][1]); tmpp = tmp; q11u8 = vld1q_u8(tmpp); tmpp += 16; for (i = 4; i > 0; i--) { q12u8 = vld1q_u8(tmpp); tmpp += 16; q13u8 = vld1q_u8(tmpp); tmpp += 16; q14u8 = vld1q_u8(tmpp); tmpp += 16; q15u8 = vld1q_u8(tmpp); tmpp += 16; q1u16 = vmull_u8(vget_low_u8(q11u8), d0u8); q2u16 = vmull_u8(vget_high_u8(q11u8), d0u8); q3u16 = vmull_u8(vget_low_u8(q12u8), d0u8); q4u16 = vmull_u8(vget_high_u8(q12u8), d0u8); q5u16 = vmull_u8(vget_low_u8(q13u8), d0u8); q6u16 = vmull_u8(vget_high_u8(q13u8), d0u8); q7u16 = vmull_u8(vget_low_u8(q14u8), d0u8); q8u16 = vmull_u8(vget_high_u8(q14u8), d0u8); q1u16 = vmlal_u8(q1u16, vget_low_u8(q12u8), d1u8); q2u16 = vmlal_u8(q2u16, vget_high_u8(q12u8), d1u8); q3u16 = vmlal_u8(q3u16, vget_low_u8(q13u8), d1u8); q4u16 = vmlal_u8(q4u16, vget_high_u8(q13u8), d1u8); q5u16 = vmlal_u8(q5u16, vget_low_u8(q14u8), d1u8); q6u16 = vmlal_u8(q6u16, vget_high_u8(q14u8), d1u8); q7u16 = vmlal_u8(q7u16, vget_low_u8(q15u8), d1u8); q8u16 = vmlal_u8(q8u16, vget_high_u8(q15u8), d1u8); d2u8 = vqrshrn_n_u16(q1u16, 7); d3u8 = vqrshrn_n_u16(q2u16, 7); d4u8 = vqrshrn_n_u16(q3u16, 7); d5u8 = vqrshrn_n_u16(q4u16, 7); d6u8 = vqrshrn_n_u16(q5u16, 7); d7u8 = vqrshrn_n_u16(q6u16, 7); d8u8 = vqrshrn_n_u16(q7u16, 7); d9u8 = vqrshrn_n_u16(q8u16, 7); q1u8 = vcombine_u8(d2u8, d3u8); q2u8 = vcombine_u8(d4u8, d5u8); q3u8 = vcombine_u8(d6u8, d7u8); q4u8 = vcombine_u8(d8u8, d9u8); q11u8 = q15u8; vst1q_u8((uint8_t *)dst_ptr, q1u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q2u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q3u8); dst_ptr += dst_pitch; vst1q_u8((uint8_t *)dst_ptr, q4u8); dst_ptr += dst_pitch; } return; } libvpx-1.8.2/vp8/common/arm/neon/copymem_neon.c000066400000000000000000000024471357355204000214200ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" void vp8_copy_mem8x4_neon(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { uint8x8_t vtmp; int r; for (r = 0; r < 4; ++r) { vtmp = vld1_u8(src); vst1_u8(dst, vtmp); src += src_stride; dst += dst_stride; } } void vp8_copy_mem8x8_neon(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { uint8x8_t vtmp; int r; for (r = 0; r < 8; ++r) { vtmp = vld1_u8(src); vst1_u8(dst, vtmp); src += src_stride; dst += dst_stride; } } void vp8_copy_mem16x16_neon(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { int r; uint8x16_t qtmp; for (r = 0; r < 16; ++r) { qtmp = vld1q_u8(src); vst1q_u8(dst, qtmp); src += src_stride; dst += dst_stride; } } libvpx-1.8.2/vp8/common/arm/neon/dc_only_idct_add_neon.c000066400000000000000000000025101357355204000232000ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" void vp8_dc_only_idct_add_neon(int16_t input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int i; uint16_t a1 = ((input_dc + 4) >> 3); uint32x2_t d2u32 = vdup_n_u32(0); uint8x8_t d2u8; uint16x8_t q1u16; uint16x8_t qAdd; qAdd = vdupq_n_u16(a1); for (i = 0; i < 2; ++i) { d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 0); pred_ptr += pred_stride; d2u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d2u32, 1); pred_ptr += pred_stride; q1u16 = vaddw_u8(qAdd, vreinterpret_u8_u32(d2u32)); d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 0); dst_ptr += dst_stride; vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d2u8), 1); dst_ptr += dst_stride; } } libvpx-1.8.2/vp8/common/arm/neon/dequant_idct_neon.c000066400000000000000000000106231357355204000224060ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" static const int16_t cospi8sqrt2minus1 = 20091; // 35468 exceeds INT16_MAX and gets converted to a negative number. Because of // the way it is used in vqdmulh, where the result is doubled, it can be divided // by 2 beforehand. This saves compensating for the negative value as well as // shifting the result. static const int16_t sinpi8sqrt2 = 35468 >> 1; void vp8_dequant_idct_add_neon(int16_t *input, int16_t *dq, unsigned char *dst, int stride) { unsigned char *dst0; int32x2_t d14, d15; int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; int16x8_t q1, q2, q3, q4, q5, q6; int16x8_t qEmpty = vdupq_n_s16(0); int32x2x2_t d2tmp0, d2tmp1; int16x4x2_t d2tmp2, d2tmp3; d14 = d15 = vdup_n_s32(0); // load input q3 = vld1q_s16(input); vst1q_s16(input, qEmpty); input += 8; q4 = vld1q_s16(input); vst1q_s16(input, qEmpty); // load dq q5 = vld1q_s16(dq); dq += 8; q6 = vld1q_s16(dq); // load src from dst dst0 = dst; d14 = vld1_lane_s32((const int32_t *)dst0, d14, 0); dst0 += stride; d14 = vld1_lane_s32((const int32_t *)dst0, d14, 1); dst0 += stride; d15 = vld1_lane_s32((const int32_t *)dst0, d15, 0); dst0 += stride; d15 = vld1_lane_s32((const int32_t *)dst0, d15, 1); q1 = vreinterpretq_s16_u16( vmulq_u16(vreinterpretq_u16_s16(q3), vreinterpretq_u16_s16(q5))); q2 = vreinterpretq_s16_u16( vmulq_u16(vreinterpretq_u16_s16(q4), vreinterpretq_u16_s16(q6))); d12 = vqadd_s16(vget_low_s16(q1), vget_low_s16(q2)); d13 = vqsub_s16(vget_low_s16(q1), vget_low_s16(q2)); q2 = vcombine_s16(vget_high_s16(q1), vget_high_s16(q2)); q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); q4 = vshrq_n_s16(q4, 1); q4 = vqaddq_s16(q4, q2); d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); d2 = vqadd_s16(d12, d11); d3 = vqadd_s16(d13, d10); d4 = vqsub_s16(d13, d10); d5 = vqsub_s16(d12, d11); d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), vreinterpret_s16_s32(d2tmp1.val[0])); d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), vreinterpret_s16_s32(d2tmp1.val[1])); // loop 2 q2 = vcombine_s16(d2tmp2.val[1], d2tmp3.val[1]); q3 = vqdmulhq_n_s16(q2, sinpi8sqrt2); q4 = vqdmulhq_n_s16(q2, cospi8sqrt2minus1); d12 = vqadd_s16(d2tmp2.val[0], d2tmp3.val[0]); d13 = vqsub_s16(d2tmp2.val[0], d2tmp3.val[0]); q4 = vshrq_n_s16(q4, 1); q4 = vqaddq_s16(q4, q2); d10 = vqsub_s16(vget_low_s16(q3), vget_high_s16(q4)); d11 = vqadd_s16(vget_high_s16(q3), vget_low_s16(q4)); d2 = vqadd_s16(d12, d11); d3 = vqadd_s16(d13, d10); d4 = vqsub_s16(d13, d10); d5 = vqsub_s16(d12, d11); d2 = vrshr_n_s16(d2, 3); d3 = vrshr_n_s16(d3, 3); d4 = vrshr_n_s16(d4, 3); d5 = vrshr_n_s16(d5, 3); d2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); d2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); d2tmp2 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[0]), vreinterpret_s16_s32(d2tmp1.val[0])); d2tmp3 = vtrn_s16(vreinterpret_s16_s32(d2tmp0.val[1]), vreinterpret_s16_s32(d2tmp1.val[1])); q1 = vcombine_s16(d2tmp2.val[0], d2tmp2.val[1]); q2 = vcombine_s16(d2tmp3.val[0], d2tmp3.val[1]); q1 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q1), vreinterpret_u8_s32(d14))); q2 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q2), vreinterpret_u8_s32(d15))); d14 = vreinterpret_s32_u8(vqmovun_s16(q1)); d15 = vreinterpret_s32_u8(vqmovun_s16(q2)); dst0 = dst; vst1_lane_s32((int32_t *)dst0, d14, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d14, 1); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d15, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d15, 1); return; } libvpx-1.8.2/vp8/common/arm/neon/dequantizeb_neon.c000066400000000000000000000013711357355204000222550ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vp8/common/blockd.h" void vp8_dequantize_b_neon(BLOCKD *d, short *DQC) { int16x8x2_t qQ, qDQC, qDQ; qQ = vld2q_s16(d->qcoeff); qDQC = vld2q_s16(DQC); qDQ.val[0] = vmulq_s16(qQ.val[0], qDQC.val[0]); qDQ.val[1] = vmulq_s16(qQ.val[1], qDQC.val[1]); vst2q_s16(d->dqcoeff, qDQ); } libvpx-1.8.2/vp8/common/arm/neon/idct_blk_neon.c000066400000000000000000000212051357355204000215130ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" static void idct_dequant_0_2x_neon(int16_t *q, int16_t dq, unsigned char *dst, int stride) { unsigned char *dst0; int i, a0, a1; int16x8x2_t q2Add; int32x2_t d2s32 = vdup_n_s32(0), d4s32 = vdup_n_s32(0); uint8x8_t d2u8, d4u8; uint16x8_t q1u16, q2u16; a0 = ((q[0] * dq) + 4) >> 3; a1 = ((q[16] * dq) + 4) >> 3; q[0] = q[16] = 0; q2Add.val[0] = vdupq_n_s16((int16_t)a0); q2Add.val[1] = vdupq_n_s16((int16_t)a1); for (i = 0; i < 2; i++, dst += 4) { dst0 = dst; d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 0); dst0 += stride; d2s32 = vld1_lane_s32((const int32_t *)dst0, d2s32, 1); dst0 += stride; d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 0); dst0 += stride; d4s32 = vld1_lane_s32((const int32_t *)dst0, d4s32, 1); q1u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), vreinterpret_u8_s32(d2s32)); q2u16 = vaddw_u8(vreinterpretq_u16_s16(q2Add.val[i]), vreinterpret_u8_s32(d4s32)); d2u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); d4u8 = vqmovun_s16(vreinterpretq_s16_u16(q2u16)); d2s32 = vreinterpret_s32_u8(d2u8); d4s32 = vreinterpret_s32_u8(d4u8); dst0 = dst; vst1_lane_s32((int32_t *)dst0, d2s32, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d2s32, 1); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d4s32, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst0, d4s32, 1); } } static const int16_t cospi8sqrt2minus1 = 20091; static const int16_t sinpi8sqrt2 = 17734; // because the lowest bit in 0x8a8c is 0, we can pre-shift this static void idct_dequant_full_2x_neon(int16_t *q, int16_t *dq, unsigned char *dst, int stride) { unsigned char *dst0, *dst1; int32x2_t d28, d29, d30, d31; int16x8_t q0, q1, q2, q3, q4, q5, q6, q7, q8, q9, q10, q11; int16x8_t qEmpty = vdupq_n_s16(0); int32x4x2_t q2tmp0, q2tmp1; int16x8x2_t q2tmp2, q2tmp3; int16x4_t dLow0, dLow1, dHigh0, dHigh1; d28 = d29 = d30 = d31 = vdup_n_s32(0); // load dq q0 = vld1q_s16(dq); dq += 8; q1 = vld1q_s16(dq); // load q q2 = vld1q_s16(q); vst1q_s16(q, qEmpty); q += 8; q3 = vld1q_s16(q); vst1q_s16(q, qEmpty); q += 8; q4 = vld1q_s16(q); vst1q_s16(q, qEmpty); q += 8; q5 = vld1q_s16(q); vst1q_s16(q, qEmpty); // load src from dst dst0 = dst; dst1 = dst + 4; d28 = vld1_lane_s32((const int32_t *)dst0, d28, 0); dst0 += stride; d28 = vld1_lane_s32((const int32_t *)dst1, d28, 1); dst1 += stride; d29 = vld1_lane_s32((const int32_t *)dst0, d29, 0); dst0 += stride; d29 = vld1_lane_s32((const int32_t *)dst1, d29, 1); dst1 += stride; d30 = vld1_lane_s32((const int32_t *)dst0, d30, 0); dst0 += stride; d30 = vld1_lane_s32((const int32_t *)dst1, d30, 1); dst1 += stride; d31 = vld1_lane_s32((const int32_t *)dst0, d31, 0); d31 = vld1_lane_s32((const int32_t *)dst1, d31, 1); q2 = vmulq_s16(q2, q0); q3 = vmulq_s16(q3, q1); q4 = vmulq_s16(q4, q0); q5 = vmulq_s16(q5, q1); // vswp dLow0 = vget_low_s16(q2); dHigh0 = vget_high_s16(q2); dLow1 = vget_low_s16(q4); dHigh1 = vget_high_s16(q4); q2 = vcombine_s16(dLow0, dLow1); q4 = vcombine_s16(dHigh0, dHigh1); dLow0 = vget_low_s16(q3); dHigh0 = vget_high_s16(q3); dLow1 = vget_low_s16(q5); dHigh1 = vget_high_s16(q5); q3 = vcombine_s16(dLow0, dLow1); q5 = vcombine_s16(dHigh0, dHigh1); q6 = vqdmulhq_n_s16(q4, sinpi8sqrt2); q7 = vqdmulhq_n_s16(q5, sinpi8sqrt2); q8 = vqdmulhq_n_s16(q4, cospi8sqrt2minus1); q9 = vqdmulhq_n_s16(q5, cospi8sqrt2minus1); q10 = vqaddq_s16(q2, q3); q11 = vqsubq_s16(q2, q3); q8 = vshrq_n_s16(q8, 1); q9 = vshrq_n_s16(q9, 1); q4 = vqaddq_s16(q4, q8); q5 = vqaddq_s16(q5, q9); q2 = vqsubq_s16(q6, q5); q3 = vqaddq_s16(q7, q4); q4 = vqaddq_s16(q10, q3); q5 = vqaddq_s16(q11, q2); q6 = vqsubq_s16(q11, q2); q7 = vqsubq_s16(q10, q3); q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), vreinterpretq_s16_s32(q2tmp1.val[0])); q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), vreinterpretq_s16_s32(q2tmp1.val[1])); // loop 2 q8 = vqdmulhq_n_s16(q2tmp2.val[1], sinpi8sqrt2); q9 = vqdmulhq_n_s16(q2tmp3.val[1], sinpi8sqrt2); q10 = vqdmulhq_n_s16(q2tmp2.val[1], cospi8sqrt2minus1); q11 = vqdmulhq_n_s16(q2tmp3.val[1], cospi8sqrt2minus1); q2 = vqaddq_s16(q2tmp2.val[0], q2tmp3.val[0]); q3 = vqsubq_s16(q2tmp2.val[0], q2tmp3.val[0]); q10 = vshrq_n_s16(q10, 1); q11 = vshrq_n_s16(q11, 1); q10 = vqaddq_s16(q2tmp2.val[1], q10); q11 = vqaddq_s16(q2tmp3.val[1], q11); q8 = vqsubq_s16(q8, q11); q9 = vqaddq_s16(q9, q10); q4 = vqaddq_s16(q2, q9); q5 = vqaddq_s16(q3, q8); q6 = vqsubq_s16(q3, q8); q7 = vqsubq_s16(q2, q9); q4 = vrshrq_n_s16(q4, 3); q5 = vrshrq_n_s16(q5, 3); q6 = vrshrq_n_s16(q6, 3); q7 = vrshrq_n_s16(q7, 3); q2tmp0 = vtrnq_s32(vreinterpretq_s32_s16(q4), vreinterpretq_s32_s16(q6)); q2tmp1 = vtrnq_s32(vreinterpretq_s32_s16(q5), vreinterpretq_s32_s16(q7)); q2tmp2 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[0]), vreinterpretq_s16_s32(q2tmp1.val[0])); q2tmp3 = vtrnq_s16(vreinterpretq_s16_s32(q2tmp0.val[1]), vreinterpretq_s16_s32(q2tmp1.val[1])); q4 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[0]), vreinterpret_u8_s32(d28))); q5 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q2tmp2.val[1]), vreinterpret_u8_s32(d29))); q6 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[0]), vreinterpret_u8_s32(d30))); q7 = vreinterpretq_s16_u16( vaddw_u8(vreinterpretq_u16_s16(q2tmp3.val[1]), vreinterpret_u8_s32(d31))); d28 = vreinterpret_s32_u8(vqmovun_s16(q4)); d29 = vreinterpret_s32_u8(vqmovun_s16(q5)); d30 = vreinterpret_s32_u8(vqmovun_s16(q6)); d31 = vreinterpret_s32_u8(vqmovun_s16(q7)); dst0 = dst; dst1 = dst + 4; vst1_lane_s32((int32_t *)dst0, d28, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst1, d28, 1); dst1 += stride; vst1_lane_s32((int32_t *)dst0, d29, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst1, d29, 1); dst1 += stride; vst1_lane_s32((int32_t *)dst0, d30, 0); dst0 += stride; vst1_lane_s32((int32_t *)dst1, d30, 1); dst1 += stride; vst1_lane_s32((int32_t *)dst0, d31, 0); vst1_lane_s32((int32_t *)dst1, d31, 1); } void vp8_dequant_idct_add_y_block_neon(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { int i; for (i = 0; i < 4; ++i) { if (((short *)(eobs))[0]) { if (((short *)eobs)[0] & 0xfefe) idct_dequant_full_2x_neon(q, dq, dst, stride); else idct_dequant_0_2x_neon(q, dq[0], dst, stride); } if (((short *)(eobs))[1]) { if (((short *)eobs)[1] & 0xfefe) idct_dequant_full_2x_neon(q + 32, dq, dst + 8, stride); else idct_dequant_0_2x_neon(q + 32, dq[0], dst + 8, stride); } q += 64; dst += 4 * stride; eobs += 4; } } void vp8_dequant_idct_add_uv_block_neon(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs) { if (((short *)(eobs))[0]) { if (((short *)eobs)[0] & 0xfefe) idct_dequant_full_2x_neon(q, dq, dst_u, stride); else idct_dequant_0_2x_neon(q, dq[0], dst_u, stride); } q += 32; dst_u += 4 * stride; if (((short *)(eobs))[1]) { if (((short *)eobs)[1] & 0xfefe) idct_dequant_full_2x_neon(q, dq, dst_u, stride); else idct_dequant_0_2x_neon(q, dq[0], dst_u, stride); } q += 32; if (((short *)(eobs))[2]) { if (((short *)eobs)[2] & 0xfefe) idct_dequant_full_2x_neon(q, dq, dst_v, stride); else idct_dequant_0_2x_neon(q, dq[0], dst_v, stride); } q += 32; dst_v += 4 * stride; if (((short *)(eobs))[3]) { if (((short *)eobs)[3] & 0xfefe) idct_dequant_full_2x_neon(q, dq, dst_v, stride); else idct_dequant_0_2x_neon(q, dq[0], dst_v, stride); } } libvpx-1.8.2/vp8/common/arm/neon/iwalsh_neon.c000066400000000000000000000065001357355204000212300ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" void vp8_short_inv_walsh4x4_neon(int16_t *input, int16_t *mb_dqcoeff) { int16x8_t q0s16, q1s16, q2s16, q3s16; int16x4_t d4s16, d5s16, d6s16, d7s16; int16x4x2_t v2tmp0, v2tmp1; int32x2x2_t v2tmp2, v2tmp3; int16x8_t qAdd3; q0s16 = vld1q_s16(input); q1s16 = vld1q_s16(input + 8); // 1st for loop d4s16 = vadd_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); d6s16 = vadd_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); d5s16 = vsub_s16(vget_low_s16(q0s16), vget_high_s16(q1s16)); d7s16 = vsub_s16(vget_high_s16(q0s16), vget_low_s16(q1s16)); q2s16 = vcombine_s16(d4s16, d5s16); q3s16 = vcombine_s16(d6s16, d7s16); q0s16 = vaddq_s16(q2s16, q3s16); q1s16 = vsubq_s16(q2s16, q3s16); v2tmp2 = vtrn_s32(vreinterpret_s32_s16(vget_low_s16(q0s16)), vreinterpret_s32_s16(vget_low_s16(q1s16))); v2tmp3 = vtrn_s32(vreinterpret_s32_s16(vget_high_s16(q0s16)), vreinterpret_s32_s16(vget_high_s16(q1s16))); v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), vreinterpret_s16_s32(v2tmp3.val[0])); v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), vreinterpret_s16_s32(v2tmp3.val[1])); // 2nd for loop d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); d6s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); d5s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); d7s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); q2s16 = vcombine_s16(d4s16, d5s16); q3s16 = vcombine_s16(d6s16, d7s16); qAdd3 = vdupq_n_s16(3); q0s16 = vaddq_s16(q2s16, q3s16); q1s16 = vsubq_s16(q2s16, q3s16); q0s16 = vaddq_s16(q0s16, qAdd3); q1s16 = vaddq_s16(q1s16, qAdd3); q0s16 = vshrq_n_s16(q0s16, 3); q1s16 = vshrq_n_s16(q1s16, 3); // store vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 0); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 0); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 0); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 0); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 1); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 1); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 1); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 1); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 2); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 2); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 2); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 2); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q0s16), 3); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q0s16), 3); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_low_s16(q1s16), 3); mb_dqcoeff += 16; vst1_lane_s16(mb_dqcoeff, vget_high_s16(q1s16), 3); mb_dqcoeff += 16; return; } libvpx-1.8.2/vp8/common/arm/neon/loopfiltersimplehorizontaledge_neon.c000066400000000000000000000061351357355204000262750ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vp8_rtcd.h" static INLINE void vp8_loop_filter_simple_horizontal_edge_neon( unsigned char *s, int p, const unsigned char *blimit) { uint8_t *sp; uint8x16_t qblimit, q0u8; uint8x16_t q5u8, q6u8, q7u8, q8u8, q9u8, q10u8, q14u8, q15u8; int16x8_t q2s16, q3s16, q13s16; int8x8_t d8s8, d9s8; int8x16_t q2s8, q3s8, q4s8, q10s8, q11s8, q14s8; qblimit = vdupq_n_u8(*blimit); sp = s - (p << 1); q5u8 = vld1q_u8(sp); sp += p; q6u8 = vld1q_u8(sp); sp += p; q7u8 = vld1q_u8(sp); sp += p; q8u8 = vld1q_u8(sp); q15u8 = vabdq_u8(q6u8, q7u8); q14u8 = vabdq_u8(q5u8, q8u8); q15u8 = vqaddq_u8(q15u8, q15u8); q14u8 = vshrq_n_u8(q14u8, 1); q0u8 = vdupq_n_u8(0x80); q13s16 = vdupq_n_s16(3); q15u8 = vqaddq_u8(q15u8, q14u8); q5u8 = veorq_u8(q5u8, q0u8); q6u8 = veorq_u8(q6u8, q0u8); q7u8 = veorq_u8(q7u8, q0u8); q8u8 = veorq_u8(q8u8, q0u8); q15u8 = vcgeq_u8(qblimit, q15u8); q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7u8)), vget_low_s8(vreinterpretq_s8_u8(q6u8))); q3s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7u8)), vget_high_s8(vreinterpretq_s8_u8(q6u8))); q4s8 = vqsubq_s8(vreinterpretq_s8_u8(q5u8), vreinterpretq_s8_u8(q8u8)); q2s16 = vmulq_s16(q2s16, q13s16); q3s16 = vmulq_s16(q3s16, q13s16); q10u8 = vdupq_n_u8(3); q9u8 = vdupq_n_u8(4); q2s16 = vaddw_s8(q2s16, vget_low_s8(q4s8)); q3s16 = vaddw_s8(q3s16, vget_high_s8(q4s8)); d8s8 = vqmovn_s16(q2s16); d9s8 = vqmovn_s16(q3s16); q4s8 = vcombine_s8(d8s8, d9s8); q14s8 = vandq_s8(q4s8, vreinterpretq_s8_u8(q15u8)); q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q10u8)); q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q9u8)); q2s8 = vshrq_n_s8(q2s8, 3); q3s8 = vshrq_n_s8(q3s8, 3); q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6u8), q2s8); q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7u8), q3s8); q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); vst1q_u8(s, q7u8); s -= p; vst1q_u8(s, q6u8); return; } void vp8_loop_filter_bhs_neon(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { y_ptr += y_stride * 4; vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); y_ptr += y_stride * 4; vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); y_ptr += y_stride * 4; vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); return; } void vp8_loop_filter_mbhs_neon(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_horizontal_edge_neon(y_ptr, y_stride, blimit); return; } libvpx-1.8.2/vp8/common/arm/neon/loopfiltersimpleverticaledge_neon.c000066400000000000000000000206511357355204000257140ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "vpx_ports/arm.h" #ifdef VPX_INCOMPATIBLE_GCC static INLINE void write_2x4(unsigned char *dst, int pitch, const uint8x8x2_t result) { /* * uint8x8x2_t result 00 01 02 03 | 04 05 06 07 10 11 12 13 | 14 15 16 17 --- * after vtrn_u8 00 10 02 12 | 04 14 06 16 01 11 03 13 | 05 15 07 17 */ const uint8x8x2_t r01_u8 = vtrn_u8(result.val[0], result.val[1]); const uint16x4_t x_0_4 = vreinterpret_u16_u8(r01_u8.val[0]); const uint16x4_t x_1_5 = vreinterpret_u16_u8(r01_u8.val[1]); vst1_lane_u16((uint16_t *)dst, x_0_4, 0); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 0); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 1); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 1); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 2); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 2); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_0_4, 3); dst += pitch; vst1_lane_u16((uint16_t *)dst, x_1_5, 3); } static INLINE void write_2x8(unsigned char *dst, int pitch, const uint8x8x2_t result, const uint8x8x2_t result2) { write_2x4(dst, pitch, result); dst += pitch * 8; write_2x4(dst, pitch, result2); } #else static INLINE void write_2x8(unsigned char *dst, int pitch, const uint8x8x2_t result, const uint8x8x2_t result2) { vst2_lane_u8(dst, result, 0); dst += pitch; vst2_lane_u8(dst, result, 1); dst += pitch; vst2_lane_u8(dst, result, 2); dst += pitch; vst2_lane_u8(dst, result, 3); dst += pitch; vst2_lane_u8(dst, result, 4); dst += pitch; vst2_lane_u8(dst, result, 5); dst += pitch; vst2_lane_u8(dst, result, 6); dst += pitch; vst2_lane_u8(dst, result, 7); dst += pitch; vst2_lane_u8(dst, result2, 0); dst += pitch; vst2_lane_u8(dst, result2, 1); dst += pitch; vst2_lane_u8(dst, result2, 2); dst += pitch; vst2_lane_u8(dst, result2, 3); dst += pitch; vst2_lane_u8(dst, result2, 4); dst += pitch; vst2_lane_u8(dst, result2, 5); dst += pitch; vst2_lane_u8(dst, result2, 6); dst += pitch; vst2_lane_u8(dst, result2, 7); } #endif // VPX_INCOMPATIBLE_GCC #ifdef VPX_INCOMPATIBLE_GCC static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) { uint8x8x4_t x; const uint8x8_t a = vld1_u8(src); const uint8x8_t b = vld1_u8(src + pitch * 1); const uint8x8_t c = vld1_u8(src + pitch * 2); const uint8x8_t d = vld1_u8(src + pitch * 3); const uint8x8_t e = vld1_u8(src + pitch * 4); const uint8x8_t f = vld1_u8(src + pitch * 5); const uint8x8_t g = vld1_u8(src + pitch * 6); const uint8x8_t h = vld1_u8(src + pitch * 7); const uint32x2x2_t r04_u32 = vtrn_u32(vreinterpret_u32_u8(a), vreinterpret_u32_u8(e)); const uint32x2x2_t r15_u32 = vtrn_u32(vreinterpret_u32_u8(b), vreinterpret_u32_u8(f)); const uint32x2x2_t r26_u32 = vtrn_u32(vreinterpret_u32_u8(c), vreinterpret_u32_u8(g)); const uint32x2x2_t r37_u32 = vtrn_u32(vreinterpret_u32_u8(d), vreinterpret_u32_u8(h)); const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u32(r04_u32.val[0]), vreinterpret_u16_u32(r26_u32.val[0])); const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u32(r15_u32.val[0]), vreinterpret_u16_u32(r37_u32.val[0])); const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), vreinterpret_u8_u16(r13_u16.val[0])); const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), vreinterpret_u8_u16(r13_u16.val[1])); /* * after vtrn_u32 00 01 02 03 | 40 41 42 43 10 11 12 13 | 50 51 52 53 20 21 22 23 | 60 61 62 63 30 31 32 33 | 70 71 72 73 --- * after vtrn_u16 00 01 20 21 | 40 41 60 61 02 03 22 23 | 42 43 62 63 10 11 30 31 | 50 51 70 71 12 13 32 33 | 52 52 72 73 00 01 20 21 | 40 41 60 61 10 11 30 31 | 50 51 70 71 02 03 22 23 | 42 43 62 63 12 13 32 33 | 52 52 72 73 --- * after vtrn_u8 00 10 20 30 | 40 50 60 70 01 11 21 31 | 41 51 61 71 02 12 22 32 | 42 52 62 72 03 13 23 33 | 43 53 63 73 */ x.val[0] = r01_u8.val[0]; x.val[1] = r01_u8.val[1]; x.val[2] = r23_u8.val[0]; x.val[3] = r23_u8.val[1]; return x; } #else static INLINE uint8x8x4_t read_4x8(unsigned char *src, int pitch) { uint8x8x4_t x; x.val[0] = x.val[1] = x.val[2] = x.val[3] = vdup_n_u8(0); x = vld4_lane_u8(src, x, 0); src += pitch; x = vld4_lane_u8(src, x, 1); src += pitch; x = vld4_lane_u8(src, x, 2); src += pitch; x = vld4_lane_u8(src, x, 3); src += pitch; x = vld4_lane_u8(src, x, 4); src += pitch; x = vld4_lane_u8(src, x, 5); src += pitch; x = vld4_lane_u8(src, x, 6); src += pitch; x = vld4_lane_u8(src, x, 7); return x; } #endif // VPX_INCOMPATIBLE_GCC static INLINE void vp8_loop_filter_simple_vertical_edge_neon( unsigned char *s, int p, const unsigned char *blimit) { unsigned char *src1; uint8x16_t qblimit, q0u8; uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q11u8, q12u8, q14u8, q15u8; int16x8_t q2s16, q13s16, q11s16; int8x8_t d28s8, d29s8; int8x16_t q2s8, q3s8, q10s8, q11s8, q14s8; uint8x8x4_t d0u8x4; // d6, d7, d8, d9 uint8x8x4_t d1u8x4; // d10, d11, d12, d13 uint8x8x2_t d2u8x2; // d12, d13 uint8x8x2_t d3u8x2; // d14, d15 qblimit = vdupq_n_u8(*blimit); src1 = s - 2; d0u8x4 = read_4x8(src1, p); src1 += p * 8; d1u8x4 = read_4x8(src1, p); q3u8 = vcombine_u8(d0u8x4.val[0], d1u8x4.val[0]); // d6 d10 q4u8 = vcombine_u8(d0u8x4.val[2], d1u8x4.val[2]); // d8 d12 q5u8 = vcombine_u8(d0u8x4.val[1], d1u8x4.val[1]); // d7 d11 q6u8 = vcombine_u8(d0u8x4.val[3], d1u8x4.val[3]); // d9 d13 q15u8 = vabdq_u8(q5u8, q4u8); q14u8 = vabdq_u8(q3u8, q6u8); q15u8 = vqaddq_u8(q15u8, q15u8); q14u8 = vshrq_n_u8(q14u8, 1); q0u8 = vdupq_n_u8(0x80); q11s16 = vdupq_n_s16(3); q15u8 = vqaddq_u8(q15u8, q14u8); q3u8 = veorq_u8(q3u8, q0u8); q4u8 = veorq_u8(q4u8, q0u8); q5u8 = veorq_u8(q5u8, q0u8); q6u8 = veorq_u8(q6u8, q0u8); q15u8 = vcgeq_u8(qblimit, q15u8); q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q4u8)), vget_low_s8(vreinterpretq_s8_u8(q5u8))); q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q4u8)), vget_high_s8(vreinterpretq_s8_u8(q5u8))); q14s8 = vqsubq_s8(vreinterpretq_s8_u8(q3u8), vreinterpretq_s8_u8(q6u8)); q2s16 = vmulq_s16(q2s16, q11s16); q13s16 = vmulq_s16(q13s16, q11s16); q11u8 = vdupq_n_u8(3); q12u8 = vdupq_n_u8(4); q2s16 = vaddw_s8(q2s16, vget_low_s8(q14s8)); q13s16 = vaddw_s8(q13s16, vget_high_s8(q14s8)); d28s8 = vqmovn_s16(q2s16); d29s8 = vqmovn_s16(q13s16); q14s8 = vcombine_s8(d28s8, d29s8); q14s8 = vandq_s8(q14s8, vreinterpretq_s8_u8(q15u8)); q2s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q11u8)); q3s8 = vqaddq_s8(q14s8, vreinterpretq_s8_u8(q12u8)); q2s8 = vshrq_n_s8(q2s8, 3); q14s8 = vshrq_n_s8(q3s8, 3); q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q5u8), q2s8); q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q4u8), q14s8); q6u8 = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); q7u8 = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); d2u8x2.val[0] = vget_low_u8(q6u8); // d12 d2u8x2.val[1] = vget_low_u8(q7u8); // d14 d3u8x2.val[0] = vget_high_u8(q6u8); // d13 d3u8x2.val[1] = vget_high_u8(q7u8); // d15 src1 = s - 1; write_2x8(src1, p, d2u8x2, d3u8x2); } void vp8_loop_filter_bvs_neon(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { y_ptr += 4; vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); y_ptr += 4; vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); y_ptr += 4; vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); return; } void vp8_loop_filter_mbvs_neon(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_vertical_edge_neon(y_ptr, y_stride, blimit); return; } libvpx-1.8.2/vp8/common/arm/neon/mbloopfilter_neon.c000066400000000000000000000462551357355204000224520ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "vp8/common/arm/loopfilter_arm.h" static INLINE void vp8_mbloop_filter_neon(uint8x16_t qblimit, // mblimit uint8x16_t qlimit, // limit uint8x16_t qthresh, // thresh uint8x16_t q3, // p2 uint8x16_t q4, // p2 uint8x16_t q5, // p1 uint8x16_t q6, // p0 uint8x16_t q7, // q0 uint8x16_t q8, // q1 uint8x16_t q9, // q2 uint8x16_t q10, // q3 uint8x16_t *q4r, // p1 uint8x16_t *q5r, // p1 uint8x16_t *q6r, // p0 uint8x16_t *q7r, // q0 uint8x16_t *q8r, // q1 uint8x16_t *q9r) { // q1 uint8x16_t q0u8, q1u8, q11u8, q12u8, q13u8, q14u8, q15u8; int16x8_t q0s16, q2s16, q11s16, q12s16, q13s16, q14s16, q15s16; int8x16_t q1s8, q6s8, q7s8, q2s8, q11s8, q13s8; uint16x8_t q0u16, q11u16, q12u16, q13u16, q14u16, q15u16; int8x16_t q0s8, q12s8, q14s8, q15s8; int8x8_t d0, d1, d2, d3, d4, d5, d24, d25, d28, d29; q11u8 = vabdq_u8(q3, q4); q12u8 = vabdq_u8(q4, q5); q13u8 = vabdq_u8(q5, q6); q14u8 = vabdq_u8(q8, q7); q1u8 = vabdq_u8(q9, q8); q0u8 = vabdq_u8(q10, q9); q11u8 = vmaxq_u8(q11u8, q12u8); q12u8 = vmaxq_u8(q13u8, q14u8); q1u8 = vmaxq_u8(q1u8, q0u8); q15u8 = vmaxq_u8(q11u8, q12u8); q12u8 = vabdq_u8(q6, q7); // vp8_hevmask q13u8 = vcgtq_u8(q13u8, qthresh); q14u8 = vcgtq_u8(q14u8, qthresh); q15u8 = vmaxq_u8(q15u8, q1u8); q15u8 = vcgeq_u8(qlimit, q15u8); q1u8 = vabdq_u8(q5, q8); q12u8 = vqaddq_u8(q12u8, q12u8); // vp8_filter() function // convert to signed q0u8 = vdupq_n_u8(0x80); q9 = veorq_u8(q9, q0u8); q8 = veorq_u8(q8, q0u8); q7 = veorq_u8(q7, q0u8); q6 = veorq_u8(q6, q0u8); q5 = veorq_u8(q5, q0u8); q4 = veorq_u8(q4, q0u8); q1u8 = vshrq_n_u8(q1u8, 1); q12u8 = vqaddq_u8(q12u8, q1u8); q14u8 = vorrq_u8(q13u8, q14u8); q12u8 = vcgeq_u8(qblimit, q12u8); q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), vget_low_s8(vreinterpretq_s8_u8(q6))); q13s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), vget_high_s8(vreinterpretq_s8_u8(q6))); q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8)); q11s16 = vdupq_n_s16(3); q2s16 = vmulq_s16(q2s16, q11s16); q13s16 = vmulq_s16(q13s16, q11s16); q15u8 = vandq_u8(q15u8, q12u8); q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); q13s16 = vaddw_s8(q13s16, vget_high_s8(q1s8)); q12u8 = vdupq_n_u8(3); q11u8 = vdupq_n_u8(4); // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) d2 = vqmovn_s16(q2s16); d3 = vqmovn_s16(q13s16); q1s8 = vcombine_s8(d2, d3); q1s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q15u8)); q13s8 = vandq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); q2s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q11u8)); q13s8 = vqaddq_s8(q13s8, vreinterpretq_s8_u8(q12u8)); q2s8 = vshrq_n_s8(q2s8, 3); q13s8 = vshrq_n_s8(q13s8, 3); q7s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q2s8); q6s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q13s8); q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); q0u16 = q11u16 = q12u16 = q13u16 = q14u16 = q15u16 = vdupq_n_u16(63); d5 = vdup_n_s8(9); d4 = vdup_n_s8(18); q0s16 = vmlal_s8(vreinterpretq_s16_u16(q0u16), vget_low_s8(q1s8), d5); q11s16 = vmlal_s8(vreinterpretq_s16_u16(q11u16), vget_high_s8(q1s8), d5); d5 = vdup_n_s8(27); q12s16 = vmlal_s8(vreinterpretq_s16_u16(q12u16), vget_low_s8(q1s8), d4); q13s16 = vmlal_s8(vreinterpretq_s16_u16(q13u16), vget_high_s8(q1s8), d4); q14s16 = vmlal_s8(vreinterpretq_s16_u16(q14u16), vget_low_s8(q1s8), d5); q15s16 = vmlal_s8(vreinterpretq_s16_u16(q15u16), vget_high_s8(q1s8), d5); d0 = vqshrn_n_s16(q0s16, 7); d1 = vqshrn_n_s16(q11s16, 7); d24 = vqshrn_n_s16(q12s16, 7); d25 = vqshrn_n_s16(q13s16, 7); d28 = vqshrn_n_s16(q14s16, 7); d29 = vqshrn_n_s16(q15s16, 7); q0s8 = vcombine_s8(d0, d1); q12s8 = vcombine_s8(d24, d25); q14s8 = vcombine_s8(d28, d29); q11s8 = vqsubq_s8(vreinterpretq_s8_u8(q9), q0s8); q0s8 = vqaddq_s8(vreinterpretq_s8_u8(q4), q0s8); q13s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q12s8); q12s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q12s8); q15s8 = vqsubq_s8((q7s8), q14s8); q14s8 = vqaddq_s8((q6s8), q14s8); q1u8 = vdupq_n_u8(0x80); *q9r = veorq_u8(vreinterpretq_u8_s8(q11s8), q1u8); *q8r = veorq_u8(vreinterpretq_u8_s8(q13s8), q1u8); *q7r = veorq_u8(vreinterpretq_u8_s8(q15s8), q1u8); *q6r = veorq_u8(vreinterpretq_u8_s8(q14s8), q1u8); *q5r = veorq_u8(vreinterpretq_u8_s8(q12s8), q1u8); *q4r = veorq_u8(vreinterpretq_u8_s8(q0s8), q1u8); return; } void vp8_mbloop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh) { uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); src -= (pitch << 2); q3 = vld1q_u8(src); src += pitch; q4 = vld1q_u8(src); src += pitch; q5 = vld1q_u8(src); src += pitch; q6 = vld1q_u8(src); src += pitch; q7 = vld1q_u8(src); src += pitch; q8 = vld1q_u8(src); src += pitch; q9 = vld1q_u8(src); src += pitch; q10 = vld1q_u8(src); vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); src -= (pitch * 6); vst1q_u8(src, q4); src += pitch; vst1q_u8(src, q5); src += pitch; vst1q_u8(src, q6); src += pitch; vst1q_u8(src, q7); src += pitch; vst1q_u8(src, q8); src += pitch; vst1q_u8(src, q9); return; } void vp8_mbloop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v) { uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); u -= (pitch << 2); v -= (pitch << 2); d6 = vld1_u8(u); u += pitch; d7 = vld1_u8(v); v += pitch; d8 = vld1_u8(u); u += pitch; d9 = vld1_u8(v); v += pitch; d10 = vld1_u8(u); u += pitch; d11 = vld1_u8(v); v += pitch; d12 = vld1_u8(u); u += pitch; d13 = vld1_u8(v); v += pitch; d14 = vld1_u8(u); u += pitch; d15 = vld1_u8(v); v += pitch; d16 = vld1_u8(u); u += pitch; d17 = vld1_u8(v); v += pitch; d18 = vld1_u8(u); u += pitch; d19 = vld1_u8(v); v += pitch; d20 = vld1_u8(u); d21 = vld1_u8(v); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); u -= (pitch * 6); v -= (pitch * 6); vst1_u8(u, vget_low_u8(q4)); u += pitch; vst1_u8(v, vget_high_u8(q4)); v += pitch; vst1_u8(u, vget_low_u8(q5)); u += pitch; vst1_u8(v, vget_high_u8(q5)); v += pitch; vst1_u8(u, vget_low_u8(q6)); u += pitch; vst1_u8(v, vget_high_u8(q6)); v += pitch; vst1_u8(u, vget_low_u8(q7)); u += pitch; vst1_u8(v, vget_high_u8(q7)); v += pitch; vst1_u8(u, vget_low_u8(q8)); u += pitch; vst1_u8(v, vget_high_u8(q8)); v += pitch; vst1_u8(u, vget_low_u8(q9)); vst1_u8(v, vget_high_u8(q9)); return; } void vp8_mbloop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh) { unsigned char *s1, *s2; uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); s1 = src - 4; s2 = s1 + 8 * pitch; d6 = vld1_u8(s1); s1 += pitch; d7 = vld1_u8(s2); s2 += pitch; d8 = vld1_u8(s1); s1 += pitch; d9 = vld1_u8(s2); s2 += pitch; d10 = vld1_u8(s1); s1 += pitch; d11 = vld1_u8(s2); s2 += pitch; d12 = vld1_u8(s1); s1 += pitch; d13 = vld1_u8(s2); s2 += pitch; d14 = vld1_u8(s1); s1 += pitch; d15 = vld1_u8(s2); s2 += pitch; d16 = vld1_u8(s1); s1 += pitch; d17 = vld1_u8(s2); s2 += pitch; d18 = vld1_u8(s1); s1 += pitch; d19 = vld1_u8(s2); s2 += pitch; d20 = vld1_u8(s1); d21 = vld1_u8(s2); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; s1 -= 7 * pitch; s2 -= 7 * pitch; vst1_u8(s1, vget_low_u8(q3)); s1 += pitch; vst1_u8(s2, vget_high_u8(q3)); s2 += pitch; vst1_u8(s1, vget_low_u8(q4)); s1 += pitch; vst1_u8(s2, vget_high_u8(q4)); s2 += pitch; vst1_u8(s1, vget_low_u8(q5)); s1 += pitch; vst1_u8(s2, vget_high_u8(q5)); s2 += pitch; vst1_u8(s1, vget_low_u8(q6)); s1 += pitch; vst1_u8(s2, vget_high_u8(q6)); s2 += pitch; vst1_u8(s1, vget_low_u8(q7)); s1 += pitch; vst1_u8(s2, vget_high_u8(q7)); s2 += pitch; vst1_u8(s1, vget_low_u8(q8)); s1 += pitch; vst1_u8(s2, vget_high_u8(q8)); s2 += pitch; vst1_u8(s1, vget_low_u8(q9)); s1 += pitch; vst1_u8(s2, vget_high_u8(q9)); s2 += pitch; vst1_u8(s1, vget_low_u8(q10)); vst1_u8(s2, vget_high_u8(q10)); return; } void vp8_mbloop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v) { unsigned char *us, *ud; unsigned char *vs, *vd; uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); us = u - 4; vs = v - 4; d6 = vld1_u8(us); us += pitch; d7 = vld1_u8(vs); vs += pitch; d8 = vld1_u8(us); us += pitch; d9 = vld1_u8(vs); vs += pitch; d10 = vld1_u8(us); us += pitch; d11 = vld1_u8(vs); vs += pitch; d12 = vld1_u8(us); us += pitch; d13 = vld1_u8(vs); vs += pitch; d14 = vld1_u8(us); us += pitch; d15 = vld1_u8(vs); vs += pitch; d16 = vld1_u8(us); us += pitch; d17 = vld1_u8(vs); vs += pitch; d18 = vld1_u8(us); us += pitch; d19 = vld1_u8(vs); vs += pitch; d20 = vld1_u8(us); d21 = vld1_u8(vs); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; vp8_mbloop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q4, &q5, &q6, &q7, &q8, &q9); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; ud = u - 4; vst1_u8(ud, vget_low_u8(q3)); ud += pitch; vst1_u8(ud, vget_low_u8(q4)); ud += pitch; vst1_u8(ud, vget_low_u8(q5)); ud += pitch; vst1_u8(ud, vget_low_u8(q6)); ud += pitch; vst1_u8(ud, vget_low_u8(q7)); ud += pitch; vst1_u8(ud, vget_low_u8(q8)); ud += pitch; vst1_u8(ud, vget_low_u8(q9)); ud += pitch; vst1_u8(ud, vget_low_u8(q10)); vd = v - 4; vst1_u8(vd, vget_high_u8(q3)); vd += pitch; vst1_u8(vd, vget_high_u8(q4)); vd += pitch; vst1_u8(vd, vget_high_u8(q5)); vd += pitch; vst1_u8(vd, vget_high_u8(q6)); vd += pitch; vst1_u8(vd, vget_high_u8(q7)); vd += pitch; vst1_u8(vd, vget_high_u8(q8)); vd += pitch; vst1_u8(vd, vget_high_u8(q9)); vd += pitch; vst1_u8(vd, vget_high_u8(q10)); return; } libvpx-1.8.2/vp8/common/arm/neon/shortidct4x4llm_neon.c000066400000000000000000000102641357355204000230130ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" static const int16_t cospi8sqrt2minus1 = 20091; // 35468 exceeds INT16_MAX and gets converted to a negative number. Because of // the way it is used in vqdmulh, where the result is doubled, it can be divided // by 2 beforehand. This saves compensating for the negative value as well as // shifting the result. static const int16_t sinpi8sqrt2 = 35468 >> 1; void vp8_short_idct4x4llm_neon(int16_t *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int i; uint32x2_t d6u32 = vdup_n_u32(0); uint8x8_t d1u8; int16x4_t d2, d3, d4, d5, d10, d11, d12, d13; uint16x8_t q1u16; int16x8_t q1s16, q2s16, q3s16, q4s16; int32x2x2_t v2tmp0, v2tmp1; int16x4x2_t v2tmp2, v2tmp3; d2 = vld1_s16(input); d3 = vld1_s16(input + 4); d4 = vld1_s16(input + 8); d5 = vld1_s16(input + 12); // 1st for loop q1s16 = vcombine_s16(d2, d4); // Swap d3 d4 here q2s16 = vcombine_s16(d3, d5); q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 q4s16 = vshrq_n_s16(q4s16, 1); q4s16 = vqaddq_s16(q4s16, q2s16); d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 d2 = vqadd_s16(d12, d11); d3 = vqadd_s16(d13, d10); d4 = vqsub_s16(d13, d10); d5 = vqsub_s16(d12, d11); v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), vreinterpret_s16_s32(v2tmp1.val[0])); v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), vreinterpret_s16_s32(v2tmp1.val[1])); // 2nd for loop q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp3.val[0]); q2s16 = vcombine_s16(v2tmp2.val[1], v2tmp3.val[1]); q3s16 = vqdmulhq_n_s16(q2s16, sinpi8sqrt2); q4s16 = vqdmulhq_n_s16(q2s16, cospi8sqrt2minus1); d12 = vqadd_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // a1 d13 = vqsub_s16(vget_low_s16(q1s16), vget_high_s16(q1s16)); // b1 q4s16 = vshrq_n_s16(q4s16, 1); q4s16 = vqaddq_s16(q4s16, q2s16); d10 = vqsub_s16(vget_low_s16(q3s16), vget_high_s16(q4s16)); // c1 d11 = vqadd_s16(vget_high_s16(q3s16), vget_low_s16(q4s16)); // d1 d2 = vqadd_s16(d12, d11); d3 = vqadd_s16(d13, d10); d4 = vqsub_s16(d13, d10); d5 = vqsub_s16(d12, d11); d2 = vrshr_n_s16(d2, 3); d3 = vrshr_n_s16(d3, 3); d4 = vrshr_n_s16(d4, 3); d5 = vrshr_n_s16(d5, 3); v2tmp0 = vtrn_s32(vreinterpret_s32_s16(d2), vreinterpret_s32_s16(d4)); v2tmp1 = vtrn_s32(vreinterpret_s32_s16(d3), vreinterpret_s32_s16(d5)); v2tmp2 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[0]), vreinterpret_s16_s32(v2tmp1.val[0])); v2tmp3 = vtrn_s16(vreinterpret_s16_s32(v2tmp0.val[1]), vreinterpret_s16_s32(v2tmp1.val[1])); q1s16 = vcombine_s16(v2tmp2.val[0], v2tmp2.val[1]); q2s16 = vcombine_s16(v2tmp3.val[0], v2tmp3.val[1]); // dc_only_idct_add for (i = 0; i < 2; i++, q1s16 = q2s16) { d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 0); pred_ptr += pred_stride; d6u32 = vld1_lane_u32((const uint32_t *)pred_ptr, d6u32, 1); pred_ptr += pred_stride; q1u16 = vaddw_u8(vreinterpretq_u16_s16(q1s16), vreinterpret_u8_u32(d6u32)); d1u8 = vqmovun_s16(vreinterpretq_s16_u16(q1u16)); vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 0); dst_ptr += dst_stride; vst1_lane_u32((uint32_t *)dst_ptr, vreinterpret_u32_u8(d1u8), 1); dst_ptr += dst_stride; } return; } libvpx-1.8.2/vp8/common/arm/neon/sixtappredict_neon.c000066400000000000000000001660771357355204000226440ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_ports/mem.h" static const int8_t vp8_sub_pel_filters[8][8] = { { 0, 0, 128, 0, 0, 0, 0, 0 }, /* note that 1/8 pel positionyys are */ { 0, -6, 123, 12, -1, 0, 0, 0 }, /* just as per alpha -0.5 bicubic */ { 2, -11, 108, 36, -8, 1, 0, 0 }, /* New 1/4 pel 6 tap filter */ { 0, -9, 93, 50, -6, 0, 0, 0 }, { 3, -16, 77, 77, -16, 3, 0, 0 }, /* New 1/2 pel 6 tap filter */ { 0, -6, 50, 93, -9, 0, 0, 0 }, { 1, -8, 36, 108, -11, 2, 0, 0 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0, 0, 0 }, }; // This table is derived from vp8/common/filter.c:vp8_sub_pel_filters. // Apply abs() to all the values. Elements 0, 2, 3, and 5 are always positive. // Elements 1 and 4 are either 0 or negative. The code accounts for this with // multiply/accumulates which either add or subtract as needed. The other // functions will be updated to use this table later. // It is also expanded to 8 elements to allow loading into 64 bit neon // registers. static const uint8_t abs_filters[8][8] = { { 0, 0, 128, 0, 0, 0, 0, 0 }, { 0, 6, 123, 12, 1, 0, 0, 0 }, { 2, 11, 108, 36, 8, 1, 0, 0 }, { 0, 9, 93, 50, 6, 0, 0, 0 }, { 3, 16, 77, 77, 16, 3, 0, 0 }, { 0, 6, 50, 93, 9, 0, 0, 0 }, { 1, 8, 36, 108, 11, 2, 0, 0 }, { 0, 1, 12, 123, 6, 0, 0, 0 }, }; static INLINE uint8x8_t load_and_shift(const unsigned char *a) { return vreinterpret_u8_u64(vshl_n_u64(vreinterpret_u64_u8(vld1_u8(a)), 32)); } static INLINE void filter_add_accumulate(const uint8x16_t a, const uint8x16_t b, const uint8x8_t filter, uint16x8_t *c, uint16x8_t *d) { const uint32x2x2_t a_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a)), vreinterpret_u32_u8(vget_high_u8(a))); const uint32x2x2_t b_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b)), vreinterpret_u32_u8(vget_high_u8(b))); *c = vmlal_u8(*c, vreinterpret_u8_u32(a_shuf.val[0]), filter); *d = vmlal_u8(*d, vreinterpret_u8_u32(b_shuf.val[0]), filter); } static INLINE void filter_sub_accumulate(const uint8x16_t a, const uint8x16_t b, const uint8x8_t filter, uint16x8_t *c, uint16x8_t *d) { const uint32x2x2_t a_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(a)), vreinterpret_u32_u8(vget_high_u8(a))); const uint32x2x2_t b_shuf = vzip_u32(vreinterpret_u32_u8(vget_low_u8(b)), vreinterpret_u32_u8(vget_high_u8(b))); *c = vmlsl_u8(*c, vreinterpret_u8_u32(a_shuf.val[0]), filter); *d = vmlsl_u8(*d, vreinterpret_u8_u32(b_shuf.val[0]), filter); } static INLINE void yonly4x4(const unsigned char *src, int src_stride, int filter_offset, unsigned char *dst, int dst_stride) { uint8x8_t a0, a1, a2, a3, a4, a5, a6, a7, a8; uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7, b8; uint16x8_t c0, c1, c2, c3; int16x8_t d0, d1; uint8x8_t e0, e1; const uint8x8_t filter = vld1_u8(abs_filters[filter_offset]); const uint8x8_t filter0 = vdup_lane_u8(filter, 0); const uint8x8_t filter1 = vdup_lane_u8(filter, 1); const uint8x8_t filter2 = vdup_lane_u8(filter, 2); const uint8x8_t filter3 = vdup_lane_u8(filter, 3); const uint8x8_t filter4 = vdup_lane_u8(filter, 4); const uint8x8_t filter5 = vdup_lane_u8(filter, 5); src -= src_stride * 2; // Shift the even rows to allow using 'vext' to combine the vectors. armv8 // has vcopy_lane which would be interesting. This started as just a // horrible workaround for clang adding alignment hints to 32bit loads: // https://llvm.org/bugs/show_bug.cgi?id=24421 // But it turns out it almost identical to casting the loads. a0 = load_and_shift(src); src += src_stride; a1 = vld1_u8(src); src += src_stride; a2 = load_and_shift(src); src += src_stride; a3 = vld1_u8(src); src += src_stride; a4 = load_and_shift(src); src += src_stride; a5 = vld1_u8(src); src += src_stride; a6 = load_and_shift(src); src += src_stride; a7 = vld1_u8(src); src += src_stride; a8 = vld1_u8(src); // Combine the rows so we can operate on 8 at a time. b0 = vext_u8(a0, a1, 4); b2 = vext_u8(a2, a3, 4); b4 = vext_u8(a4, a5, 4); b6 = vext_u8(a6, a7, 4); b8 = a8; // To keep with the 8-at-a-time theme, combine *alternate* rows. This // allows combining the odd rows with the even. b1 = vext_u8(b0, b2, 4); b3 = vext_u8(b2, b4, 4); b5 = vext_u8(b4, b6, 4); b7 = vext_u8(b6, b8, 4); // Multiply and expand to 16 bits. c0 = vmull_u8(b0, filter0); c1 = vmull_u8(b2, filter0); c2 = vmull_u8(b5, filter5); c3 = vmull_u8(b7, filter5); // Multiply, subtract and accumulate for filters 1 and 4 (the negative // ones). c0 = vmlsl_u8(c0, b4, filter4); c1 = vmlsl_u8(c1, b6, filter4); c2 = vmlsl_u8(c2, b1, filter1); c3 = vmlsl_u8(c3, b3, filter1); // Add more positive ones. vmlal should really return a signed type. // It's doing signed math internally, as evidenced by the fact we can do // subtractions followed by more additions. Ideally we could use // vqmlal/sl but that instruction doesn't exist. Might be able to // shoehorn vqdmlal/vqdmlsl in here but it would take some effort. c0 = vmlal_u8(c0, b2, filter2); c1 = vmlal_u8(c1, b4, filter2); c2 = vmlal_u8(c2, b3, filter3); c3 = vmlal_u8(c3, b5, filter3); // Use signed saturation math because vmlsl may have left some negative // numbers in there. d0 = vqaddq_s16(vreinterpretq_s16_u16(c2), vreinterpretq_s16_u16(c0)); d1 = vqaddq_s16(vreinterpretq_s16_u16(c3), vreinterpretq_s16_u16(c1)); // Use signed again because numbers like -200 need to be saturated to 0. e0 = vqrshrun_n_s16(d0, 7); e1 = vqrshrun_n_s16(d1, 7); store_unaligned_u8q(dst, dst_stride, vcombine_u8(e0, e1)); } void vp8_sixtap_predict4x4_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { uint8x16_t s0, s1, s2, s3, s4; uint64x2_t s01, s23; // Variables to hold src[] elements for the given filter[] uint8x8_t s0_f5, s1_f5, s2_f5, s3_f5, s4_f5; uint8x8_t s4_f1, s4_f2, s4_f3, s4_f4; uint8x16_t s01_f0, s23_f0; uint64x2_t s01_f3, s23_f3; uint32x2x2_t s01_f3_q, s23_f3_q, s01_f5_q, s23_f5_q; // Accumulator variables. uint16x8_t d0123, d4567, d89; uint16x8_t d0123_a, d4567_a, d89_a; int16x8_t e0123, e4567, e89; // Second pass intermediates. uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7, b8; uint16x8_t c0, c1, c2, c3; int16x8_t d0, d1; uint8x8_t e0, e1; uint8x8_t filter, filter0, filter1, filter2, filter3, filter4, filter5; if (xoffset == 0) { // Second pass only. yonly4x4(src_ptr, src_pixels_per_line, yoffset, dst_ptr, dst_pitch); return; } if (yoffset == 0) { // First pass only. src_ptr -= 2; } else { // Add context for the second pass. 2 extra lines on top. src_ptr -= 2 + (src_pixels_per_line * 2); } filter = vld1_u8(abs_filters[xoffset]); filter0 = vdup_lane_u8(filter, 0); filter1 = vdup_lane_u8(filter, 1); filter2 = vdup_lane_u8(filter, 2); filter3 = vdup_lane_u8(filter, 3); filter4 = vdup_lane_u8(filter, 4); filter5 = vdup_lane_u8(filter, 5); // 2 bytes of context, 4 bytes of src values, 3 bytes of context, 7 bytes of // garbage. So much effort for that last single bit. // The low values of each pair are for filter0. s0 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s1 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s2 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s3 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; // Shift to extract values for filter[5] // If src[] is 0, this puts: // 3 4 5 6 7 8 9 10 in s0_f5 // Can't use vshr.u64 because it crosses the double word boundary. s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); s2_f5 = vext_u8(vget_low_u8(s2), vget_high_u8(s2), 5); s3_f5 = vext_u8(vget_low_u8(s3), vget_high_u8(s3), 5); s01_f0 = vcombine_u8(vget_low_u8(s0), vget_low_u8(s1)); s23_f0 = vcombine_u8(vget_low_u8(s2), vget_low_u8(s3)); s01_f5_q = vzip_u32(vreinterpret_u32_u8(s0_f5), vreinterpret_u32_u8(s1_f5)); s23_f5_q = vzip_u32(vreinterpret_u32_u8(s2_f5), vreinterpret_u32_u8(s3_f5)); d0123 = vmull_u8(vreinterpret_u8_u32(s01_f5_q.val[0]), filter5); d4567 = vmull_u8(vreinterpret_u8_u32(s23_f5_q.val[0]), filter5); // Keep original src data as 64 bits to simplify shifting and extracting. s01 = vreinterpretq_u64_u8(s01_f0); s23 = vreinterpretq_u64_u8(s23_f0); // 3 4 5 6 * filter0 filter_add_accumulate(s01_f0, s23_f0, filter0, &d0123, &d4567); // Shift over one to use -1, 0, 1, 2 for filter1 // -1 0 1 2 * filter1 filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 8)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 8)), filter1, &d0123, &d4567); // 2 3 4 5 * filter4 filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 32)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 32)), filter4, &d0123, &d4567); // 0 1 2 3 * filter2 filter_add_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 16)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 16)), filter2, &d0123, &d4567); // 1 2 3 4 * filter3 s01_f3 = vshrq_n_u64(s01, 24); s23_f3 = vshrq_n_u64(s23, 24); s01_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s01_f3)), vreinterpret_u32_u64(vget_high_u64(s01_f3))); s23_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s23_f3)), vreinterpret_u32_u64(vget_high_u64(s23_f3))); // Accumulate into different registers so it can use saturated addition. d0123_a = vmull_u8(vreinterpret_u8_u32(s01_f3_q.val[0]), filter3); d4567_a = vmull_u8(vreinterpret_u8_u32(s23_f3_q.val[0]), filter3); e0123 = vqaddq_s16(vreinterpretq_s16_u16(d0123), vreinterpretq_s16_u16(d0123_a)); e4567 = vqaddq_s16(vreinterpretq_s16_u16(d4567), vreinterpretq_s16_u16(d4567_a)); // Shift and narrow. b0 = vqrshrun_n_s16(e0123, 7); b2 = vqrshrun_n_s16(e4567, 7); if (yoffset == 0) { // firstpass_filter4x4_only store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(b0, b2)); return; } // Load additional context when doing both filters. s0 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s1 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s2 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s3 = vld1q_u8(src_ptr); src_ptr += src_pixels_per_line; s4 = vld1q_u8(src_ptr); s0_f5 = vext_u8(vget_low_u8(s0), vget_high_u8(s0), 5); s1_f5 = vext_u8(vget_low_u8(s1), vget_high_u8(s1), 5); s2_f5 = vext_u8(vget_low_u8(s2), vget_high_u8(s2), 5); s3_f5 = vext_u8(vget_low_u8(s3), vget_high_u8(s3), 5); s4_f5 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 5); // 3 4 5 6 * filter0 s01_f0 = vcombine_u8(vget_low_u8(s0), vget_low_u8(s1)); s23_f0 = vcombine_u8(vget_low_u8(s2), vget_low_u8(s3)); s01_f5_q = vzip_u32(vreinterpret_u32_u8(s0_f5), vreinterpret_u32_u8(s1_f5)); s23_f5_q = vzip_u32(vreinterpret_u32_u8(s2_f5), vreinterpret_u32_u8(s3_f5)); // But this time instead of 16 pixels to filter, there are 20. So an extra // run with a doubleword register. d0123 = vmull_u8(vreinterpret_u8_u32(s01_f5_q.val[0]), filter5); d4567 = vmull_u8(vreinterpret_u8_u32(s23_f5_q.val[0]), filter5); d89 = vmull_u8(s4_f5, filter5); // Save a copy as u64 for shifting. s01 = vreinterpretq_u64_u8(s01_f0); s23 = vreinterpretq_u64_u8(s23_f0); filter_add_accumulate(s01_f0, s23_f0, filter0, &d0123, &d4567); d89 = vmlal_u8(d89, vget_low_u8(s4), filter0); filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 8)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 8)), filter1, &d0123, &d4567); s4_f1 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 1); d89 = vmlsl_u8(d89, s4_f1, filter1); filter_sub_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 32)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 32)), filter4, &d0123, &d4567); s4_f4 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 4); d89 = vmlsl_u8(d89, s4_f4, filter4); filter_add_accumulate(vreinterpretq_u8_u64(vshrq_n_u64(s01, 16)), vreinterpretq_u8_u64(vshrq_n_u64(s23, 16)), filter2, &d0123, &d4567); s4_f2 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 2); d89 = vmlal_u8(d89, s4_f2, filter2); s01_f3 = vshrq_n_u64(s01, 24); s23_f3 = vshrq_n_u64(s23, 24); s01_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s01_f3)), vreinterpret_u32_u64(vget_high_u64(s01_f3))); s23_f3_q = vzip_u32(vreinterpret_u32_u64(vget_low_u64(s23_f3)), vreinterpret_u32_u64(vget_high_u64(s23_f3))); s4_f3 = vext_u8(vget_low_u8(s4), vget_high_u8(s4), 3); d0123_a = vmull_u8(vreinterpret_u8_u32(s01_f3_q.val[0]), filter3); d4567_a = vmull_u8(vreinterpret_u8_u32(s23_f3_q.val[0]), filter3); d89_a = vmull_u8(s4_f3, filter3); e0123 = vqaddq_s16(vreinterpretq_s16_u16(d0123), vreinterpretq_s16_u16(d0123_a)); e4567 = vqaddq_s16(vreinterpretq_s16_u16(d4567), vreinterpretq_s16_u16(d4567_a)); e89 = vqaddq_s16(vreinterpretq_s16_u16(d89), vreinterpretq_s16_u16(d89_a)); b4 = vqrshrun_n_s16(e0123, 7); b6 = vqrshrun_n_s16(e4567, 7); b8 = vqrshrun_n_s16(e89, 7); // Second pass: 4x4 filter = vld1_u8(abs_filters[yoffset]); filter0 = vdup_lane_u8(filter, 0); filter1 = vdup_lane_u8(filter, 1); filter2 = vdup_lane_u8(filter, 2); filter3 = vdup_lane_u8(filter, 3); filter4 = vdup_lane_u8(filter, 4); filter5 = vdup_lane_u8(filter, 5); b1 = vext_u8(b0, b2, 4); b3 = vext_u8(b2, b4, 4); b5 = vext_u8(b4, b6, 4); b7 = vext_u8(b6, b8, 4); c0 = vmull_u8(b0, filter0); c1 = vmull_u8(b2, filter0); c2 = vmull_u8(b5, filter5); c3 = vmull_u8(b7, filter5); c0 = vmlsl_u8(c0, b4, filter4); c1 = vmlsl_u8(c1, b6, filter4); c2 = vmlsl_u8(c2, b1, filter1); c3 = vmlsl_u8(c3, b3, filter1); c0 = vmlal_u8(c0, b2, filter2); c1 = vmlal_u8(c1, b4, filter2); c2 = vmlal_u8(c2, b3, filter3); c3 = vmlal_u8(c3, b5, filter3); d0 = vqaddq_s16(vreinterpretq_s16_u16(c2), vreinterpretq_s16_u16(c0)); d1 = vqaddq_s16(vreinterpretq_s16_u16(c3), vreinterpretq_s16_u16(c1)); e0 = vqrshrun_n_s16(d0, 7); e1 = vqrshrun_n_s16(d1, 7); store_unaligned_u8q(dst_ptr, dst_pitch, vcombine_u8(e0, e1)); } void vp8_sixtap_predict8x4_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { unsigned char *src; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; uint8x8_t d22u8, d23u8, d24u8, d25u8, d26u8; uint8x8_t d27u8, d28u8, d29u8, d30u8, d31u8; int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8; if (xoffset == 0) { // secondpass_filter8x4_only // load second_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // load src data src = src_ptr - src_pixels_per_line * 2; d22u8 = vld1_u8(src); src += src_pixels_per_line; d23u8 = vld1_u8(src); src += src_pixels_per_line; d24u8 = vld1_u8(src); src += src_pixels_per_line; d25u8 = vld1_u8(src); src += src_pixels_per_line; d26u8 = vld1_u8(src); src += src_pixels_per_line; d27u8 = vld1_u8(src); src += src_pixels_per_line; d28u8 = vld1_u8(src); src += src_pixels_per_line; d29u8 = vld1_u8(src); src += src_pixels_per_line; d30u8 = vld1_u8(src); q3u16 = vmull_u8(d22u8, d0u8); q4u16 = vmull_u8(d23u8, d0u8); q5u16 = vmull_u8(d24u8, d0u8); q6u16 = vmull_u8(d25u8, d0u8); q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); q3u16 = vmlal_u8(q3u16, d24u8, d2u8); q4u16 = vmlal_u8(q4u16, d25u8, d2u8); q5u16 = vmlal_u8(q5u16, d26u8, d2u8); q6u16 = vmlal_u8(q6u16, d27u8, d2u8); q3u16 = vmlal_u8(q3u16, d27u8, d5u8); q4u16 = vmlal_u8(q4u16, d28u8, d5u8); q5u16 = vmlal_u8(q5u16, d29u8, d5u8); q6u16 = vmlal_u8(q6u16, d30u8, d5u8); q7u16 = vmull_u8(d25u8, d3u8); q8u16 = vmull_u8(d26u8, d3u8); q9u16 = vmull_u8(d27u8, d3u8); q10u16 = vmull_u8(d28u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); vst1_u8(dst_ptr, d6u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d7u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d9u8); return; } // load first_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // First pass: output_height lines x output_width columns (9x4) if (yoffset == 0) // firstpass_filter4x4_only src = src_ptr - 2; else src = src_ptr - 2 - (src_pixels_per_line * 2); q3u8 = vld1q_u8(src); src += src_pixels_per_line; q4u8 = vld1q_u8(src); src += src_pixels_per_line; q5u8 = vld1q_u8(src); src += src_pixels_per_line; q6u8 = vld1q_u8(src); q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); q7u16 = vmlal_u8(q7u16, d28u8, d2u8); q8u16 = vmlal_u8(q8u16, d29u8, d2u8); q9u16 = vmlal_u8(q9u16, d30u8, d2u8); q10u16 = vmlal_u8(q10u16, d31u8, d2u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); q7u16 = vmlal_u8(q7u16, d28u8, d5u8); q8u16 = vmlal_u8(q8u16, d29u8, d5u8); q9u16 = vmlal_u8(q9u16, d30u8, d5u8); q10u16 = vmlal_u8(q10u16, d31u8, d5u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); q3u16 = vmull_u8(d28u8, d3u8); q4u16 = vmull_u8(d29u8, d3u8); q5u16 = vmull_u8(d30u8, d3u8); q6u16 = vmull_u8(d31u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d22u8 = vqrshrun_n_s16(q7s16, 7); d23u8 = vqrshrun_n_s16(q8s16, 7); d24u8 = vqrshrun_n_s16(q9s16, 7); d25u8 = vqrshrun_n_s16(q10s16, 7); if (yoffset == 0) { // firstpass_filter8x4_only vst1_u8(dst_ptr, d22u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d23u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d24u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d25u8); return; } // First Pass on rest 5-line data src += src_pixels_per_line; q3u8 = vld1q_u8(src); src += src_pixels_per_line; q4u8 = vld1q_u8(src); src += src_pixels_per_line; q5u8 = vld1q_u8(src); src += src_pixels_per_line; q6u8 = vld1q_u8(src); src += src_pixels_per_line; q7u8 = vld1q_u8(src); q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); q8u16 = vmlal_u8(q8u16, d27u8, d2u8); q9u16 = vmlal_u8(q9u16, d28u8, d2u8); q10u16 = vmlal_u8(q10u16, d29u8, d2u8); q11u16 = vmlal_u8(q11u16, d30u8, d2u8); q12u16 = vmlal_u8(q12u16, d31u8, d2u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); q8u16 = vmlal_u8(q8u16, d27u8, d5u8); q9u16 = vmlal_u8(q9u16, d28u8, d5u8); q10u16 = vmlal_u8(q10u16, d29u8, d5u8); q11u16 = vmlal_u8(q11u16, d30u8, d5u8); q12u16 = vmlal_u8(q12u16, d31u8, d5u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); q3u16 = vmull_u8(d27u8, d3u8); q4u16 = vmull_u8(d28u8, d3u8); q5u16 = vmull_u8(d29u8, d3u8); q6u16 = vmull_u8(d30u8, d3u8); q7u16 = vmull_u8(d31u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q11s16 = vreinterpretq_s16_u16(q11u16); q12s16 = vreinterpretq_s16_u16(q12u16); q8s16 = vqaddq_s16(q8s16, q3s16); q9s16 = vqaddq_s16(q9s16, q4s16); q10s16 = vqaddq_s16(q10s16, q5s16); q11s16 = vqaddq_s16(q11s16, q6s16); q12s16 = vqaddq_s16(q12s16, q7s16); d26u8 = vqrshrun_n_s16(q8s16, 7); d27u8 = vqrshrun_n_s16(q9s16, 7); d28u8 = vqrshrun_n_s16(q10s16, 7); d29u8 = vqrshrun_n_s16(q11s16, 7); d30u8 = vqrshrun_n_s16(q12s16, 7); // Second pass: 8x4 dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); q3u16 = vmull_u8(d22u8, d0u8); q4u16 = vmull_u8(d23u8, d0u8); q5u16 = vmull_u8(d24u8, d0u8); q6u16 = vmull_u8(d25u8, d0u8); q3u16 = vmlsl_u8(q3u16, d23u8, d1u8); q4u16 = vmlsl_u8(q4u16, d24u8, d1u8); q5u16 = vmlsl_u8(q5u16, d25u8, d1u8); q6u16 = vmlsl_u8(q6u16, d26u8, d1u8); q3u16 = vmlsl_u8(q3u16, d26u8, d4u8); q4u16 = vmlsl_u8(q4u16, d27u8, d4u8); q5u16 = vmlsl_u8(q5u16, d28u8, d4u8); q6u16 = vmlsl_u8(q6u16, d29u8, d4u8); q3u16 = vmlal_u8(q3u16, d24u8, d2u8); q4u16 = vmlal_u8(q4u16, d25u8, d2u8); q5u16 = vmlal_u8(q5u16, d26u8, d2u8); q6u16 = vmlal_u8(q6u16, d27u8, d2u8); q3u16 = vmlal_u8(q3u16, d27u8, d5u8); q4u16 = vmlal_u8(q4u16, d28u8, d5u8); q5u16 = vmlal_u8(q5u16, d29u8, d5u8); q6u16 = vmlal_u8(q6u16, d30u8, d5u8); q7u16 = vmull_u8(d25u8, d3u8); q8u16 = vmull_u8(d26u8, d3u8); q9u16 = vmull_u8(d27u8, d3u8); q10u16 = vmull_u8(d28u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); vst1_u8(dst_ptr, d6u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d7u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d9u8); return; } void vp8_sixtap_predict8x8_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { unsigned char *src, *tmpp; unsigned char tmp[64]; int i; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; uint8x8_t d18u8, d19u8, d20u8, d21u8, d22u8, d23u8, d24u8, d25u8; uint8x8_t d26u8, d27u8, d28u8, d29u8, d30u8, d31u8; int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16; uint16x8_t q8u16, q9u16, q10u16, q11u16, q12u16; int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16; int16x8_t q8s16, q9s16, q10s16, q11s16, q12s16; uint8x16_t q3u8, q4u8, q5u8, q6u8, q7u8, q9u8, q10u8, q11u8, q12u8; if (xoffset == 0) { // secondpass_filter8x8_only // load second_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // load src data src = src_ptr - src_pixels_per_line * 2; d18u8 = vld1_u8(src); src += src_pixels_per_line; d19u8 = vld1_u8(src); src += src_pixels_per_line; d20u8 = vld1_u8(src); src += src_pixels_per_line; d21u8 = vld1_u8(src); src += src_pixels_per_line; d22u8 = vld1_u8(src); src += src_pixels_per_line; d23u8 = vld1_u8(src); src += src_pixels_per_line; d24u8 = vld1_u8(src); src += src_pixels_per_line; d25u8 = vld1_u8(src); src += src_pixels_per_line; d26u8 = vld1_u8(src); src += src_pixels_per_line; d27u8 = vld1_u8(src); src += src_pixels_per_line; d28u8 = vld1_u8(src); src += src_pixels_per_line; d29u8 = vld1_u8(src); src += src_pixels_per_line; d30u8 = vld1_u8(src); for (i = 2; i > 0; i--) { q3u16 = vmull_u8(d18u8, d0u8); q4u16 = vmull_u8(d19u8, d0u8); q5u16 = vmull_u8(d20u8, d0u8); q6u16 = vmull_u8(d21u8, d0u8); q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); q3u16 = vmlal_u8(q3u16, d20u8, d2u8); q4u16 = vmlal_u8(q4u16, d21u8, d2u8); q5u16 = vmlal_u8(q5u16, d22u8, d2u8); q6u16 = vmlal_u8(q6u16, d23u8, d2u8); q3u16 = vmlal_u8(q3u16, d23u8, d5u8); q4u16 = vmlal_u8(q4u16, d24u8, d5u8); q5u16 = vmlal_u8(q5u16, d25u8, d5u8); q6u16 = vmlal_u8(q6u16, d26u8, d5u8); q7u16 = vmull_u8(d21u8, d3u8); q8u16 = vmull_u8(d22u8, d3u8); q9u16 = vmull_u8(d23u8, d3u8); q10u16 = vmull_u8(d24u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); d18u8 = d22u8; d19u8 = d23u8; d20u8 = d24u8; d21u8 = d25u8; d22u8 = d26u8; d23u8 = d27u8; d24u8 = d28u8; d25u8 = d29u8; d26u8 = d30u8; vst1_u8(dst_ptr, d6u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d7u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d9u8); dst_ptr += dst_pitch; } return; } // load first_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // First pass: output_height lines x output_width columns (9x4) if (yoffset == 0) // firstpass_filter4x4_only src = src_ptr - 2; else src = src_ptr - 2 - (src_pixels_per_line * 2); tmpp = tmp; for (i = 2; i > 0; i--) { q3u8 = vld1q_u8(src); src += src_pixels_per_line; q4u8 = vld1q_u8(src); src += src_pixels_per_line; q5u8 = vld1q_u8(src); src += src_pixels_per_line; q6u8 = vld1q_u8(src); src += src_pixels_per_line; __builtin_prefetch(src); __builtin_prefetch(src + src_pixels_per_line); __builtin_prefetch(src + src_pixels_per_line * 2); q7u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q8u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q5u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q6u8), d0u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); q7u16 = vmlsl_u8(q7u16, d28u8, d1u8); q8u16 = vmlsl_u8(q8u16, d29u8, d1u8); q9u16 = vmlsl_u8(q9u16, d30u8, d1u8); q10u16 = vmlsl_u8(q10u16, d31u8, d1u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); q7u16 = vmlsl_u8(q7u16, d28u8, d4u8); q8u16 = vmlsl_u8(q8u16, d29u8, d4u8); q9u16 = vmlsl_u8(q9u16, d30u8, d4u8); q10u16 = vmlsl_u8(q10u16, d31u8, d4u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); q7u16 = vmlal_u8(q7u16, d28u8, d2u8); q8u16 = vmlal_u8(q8u16, d29u8, d2u8); q9u16 = vmlal_u8(q9u16, d30u8, d2u8); q10u16 = vmlal_u8(q10u16, d31u8, d2u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); q7u16 = vmlal_u8(q7u16, d28u8, d5u8); q8u16 = vmlal_u8(q8u16, d29u8, d5u8); q9u16 = vmlal_u8(q9u16, d30u8, d5u8); q10u16 = vmlal_u8(q10u16, d31u8, d5u8); d28u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); d29u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); d30u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); d31u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); q3u16 = vmull_u8(d28u8, d3u8); q4u16 = vmull_u8(d29u8, d3u8); q5u16 = vmull_u8(d30u8, d3u8); q6u16 = vmull_u8(d31u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d22u8 = vqrshrun_n_s16(q7s16, 7); d23u8 = vqrshrun_n_s16(q8s16, 7); d24u8 = vqrshrun_n_s16(q9s16, 7); d25u8 = vqrshrun_n_s16(q10s16, 7); if (yoffset == 0) { // firstpass_filter8x4_only vst1_u8(dst_ptr, d22u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d23u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d24u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d25u8); dst_ptr += dst_pitch; } else { vst1_u8(tmpp, d22u8); tmpp += 8; vst1_u8(tmpp, d23u8); tmpp += 8; vst1_u8(tmpp, d24u8); tmpp += 8; vst1_u8(tmpp, d25u8); tmpp += 8; } } if (yoffset == 0) return; // First Pass on rest 5-line data q3u8 = vld1q_u8(src); src += src_pixels_per_line; q4u8 = vld1q_u8(src); src += src_pixels_per_line; q5u8 = vld1q_u8(src); src += src_pixels_per_line; q6u8 = vld1q_u8(src); src += src_pixels_per_line; q7u8 = vld1q_u8(src); q8u16 = vmull_u8(vget_low_u8(q3u8), d0u8); q9u16 = vmull_u8(vget_low_u8(q4u8), d0u8); q10u16 = vmull_u8(vget_low_u8(q5u8), d0u8); q11u16 = vmull_u8(vget_low_u8(q6u8), d0u8); q12u16 = vmull_u8(vget_low_u8(q7u8), d0u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 1); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 1); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 1); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 1); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 1); q8u16 = vmlsl_u8(q8u16, d27u8, d1u8); q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); q11u16 = vmlsl_u8(q11u16, d30u8, d1u8); q12u16 = vmlsl_u8(q12u16, d31u8, d1u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 4); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 4); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 4); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 4); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 4); q8u16 = vmlsl_u8(q8u16, d27u8, d4u8); q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); q11u16 = vmlsl_u8(q11u16, d30u8, d4u8); q12u16 = vmlsl_u8(q12u16, d31u8, d4u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 2); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 2); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 2); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 2); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 2); q8u16 = vmlal_u8(q8u16, d27u8, d2u8); q9u16 = vmlal_u8(q9u16, d28u8, d2u8); q10u16 = vmlal_u8(q10u16, d29u8, d2u8); q11u16 = vmlal_u8(q11u16, d30u8, d2u8); q12u16 = vmlal_u8(q12u16, d31u8, d2u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 5); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 5); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 5); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 5); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 5); q8u16 = vmlal_u8(q8u16, d27u8, d5u8); q9u16 = vmlal_u8(q9u16, d28u8, d5u8); q10u16 = vmlal_u8(q10u16, d29u8, d5u8); q11u16 = vmlal_u8(q11u16, d30u8, d5u8); q12u16 = vmlal_u8(q12u16, d31u8, d5u8); d27u8 = vext_u8(vget_low_u8(q3u8), vget_high_u8(q3u8), 3); d28u8 = vext_u8(vget_low_u8(q4u8), vget_high_u8(q4u8), 3); d29u8 = vext_u8(vget_low_u8(q5u8), vget_high_u8(q5u8), 3); d30u8 = vext_u8(vget_low_u8(q6u8), vget_high_u8(q6u8), 3); d31u8 = vext_u8(vget_low_u8(q7u8), vget_high_u8(q7u8), 3); q3u16 = vmull_u8(d27u8, d3u8); q4u16 = vmull_u8(d28u8, d3u8); q5u16 = vmull_u8(d29u8, d3u8); q6u16 = vmull_u8(d30u8, d3u8); q7u16 = vmull_u8(d31u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q11s16 = vreinterpretq_s16_u16(q11u16); q12s16 = vreinterpretq_s16_u16(q12u16); q8s16 = vqaddq_s16(q8s16, q3s16); q9s16 = vqaddq_s16(q9s16, q4s16); q10s16 = vqaddq_s16(q10s16, q5s16); q11s16 = vqaddq_s16(q11s16, q6s16); q12s16 = vqaddq_s16(q12s16, q7s16); d26u8 = vqrshrun_n_s16(q8s16, 7); d27u8 = vqrshrun_n_s16(q9s16, 7); d28u8 = vqrshrun_n_s16(q10s16, 7); d29u8 = vqrshrun_n_s16(q11s16, 7); d30u8 = vqrshrun_n_s16(q12s16, 7); // Second pass: 8x8 dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); tmpp = tmp; q9u8 = vld1q_u8(tmpp); tmpp += 16; q10u8 = vld1q_u8(tmpp); tmpp += 16; q11u8 = vld1q_u8(tmpp); tmpp += 16; q12u8 = vld1q_u8(tmpp); d18u8 = vget_low_u8(q9u8); d19u8 = vget_high_u8(q9u8); d20u8 = vget_low_u8(q10u8); d21u8 = vget_high_u8(q10u8); d22u8 = vget_low_u8(q11u8); d23u8 = vget_high_u8(q11u8); d24u8 = vget_low_u8(q12u8); d25u8 = vget_high_u8(q12u8); for (i = 2; i > 0; i--) { q3u16 = vmull_u8(d18u8, d0u8); q4u16 = vmull_u8(d19u8, d0u8); q5u16 = vmull_u8(d20u8, d0u8); q6u16 = vmull_u8(d21u8, d0u8); q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); q3u16 = vmlal_u8(q3u16, d20u8, d2u8); q4u16 = vmlal_u8(q4u16, d21u8, d2u8); q5u16 = vmlal_u8(q5u16, d22u8, d2u8); q6u16 = vmlal_u8(q6u16, d23u8, d2u8); q3u16 = vmlal_u8(q3u16, d23u8, d5u8); q4u16 = vmlal_u8(q4u16, d24u8, d5u8); q5u16 = vmlal_u8(q5u16, d25u8, d5u8); q6u16 = vmlal_u8(q6u16, d26u8, d5u8); q7u16 = vmull_u8(d21u8, d3u8); q8u16 = vmull_u8(d22u8, d3u8); q9u16 = vmull_u8(d23u8, d3u8); q10u16 = vmull_u8(d24u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); d18u8 = d22u8; d19u8 = d23u8; d20u8 = d24u8; d21u8 = d25u8; d22u8 = d26u8; d23u8 = d27u8; d24u8 = d28u8; d25u8 = d29u8; d26u8 = d30u8; vst1_u8(dst_ptr, d6u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d7u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d8u8); dst_ptr += dst_pitch; vst1_u8(dst_ptr, d9u8); dst_ptr += dst_pitch; } return; } void vp8_sixtap_predict16x16_neon(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { unsigned char *src, *src_tmp, *dst, *tmpp; unsigned char tmp[336]; int i, j; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8, d8u8, d9u8; uint8x8_t d10u8, d11u8, d12u8, d13u8, d14u8, d15u8, d18u8, d19u8; uint8x8_t d20u8, d21u8, d22u8, d23u8, d24u8, d25u8, d26u8, d27u8; uint8x8_t d28u8, d29u8, d30u8, d31u8; int8x8_t dtmps8, d0s8, d1s8, d2s8, d3s8, d4s8, d5s8; uint8x16_t q3u8, q4u8; uint16x8_t q3u16, q4u16, q5u16, q6u16, q7u16, q8u16, q9u16, q10u16; uint16x8_t q11u16, q12u16, q13u16, q15u16; int16x8_t q3s16, q4s16, q5s16, q6s16, q7s16, q8s16, q9s16, q10s16; int16x8_t q11s16, q12s16, q13s16, q15s16; if (xoffset == 0) { // secondpass_filter8x8_only // load second_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // load src data src_tmp = src_ptr - src_pixels_per_line * 2; for (i = 0; i < 2; ++i) { src = src_tmp + i * 8; dst = dst_ptr + i * 8; d18u8 = vld1_u8(src); src += src_pixels_per_line; d19u8 = vld1_u8(src); src += src_pixels_per_line; d20u8 = vld1_u8(src); src += src_pixels_per_line; d21u8 = vld1_u8(src); src += src_pixels_per_line; d22u8 = vld1_u8(src); src += src_pixels_per_line; for (j = 0; j < 4; ++j) { d23u8 = vld1_u8(src); src += src_pixels_per_line; d24u8 = vld1_u8(src); src += src_pixels_per_line; d25u8 = vld1_u8(src); src += src_pixels_per_line; d26u8 = vld1_u8(src); src += src_pixels_per_line; q3u16 = vmull_u8(d18u8, d0u8); q4u16 = vmull_u8(d19u8, d0u8); q5u16 = vmull_u8(d20u8, d0u8); q6u16 = vmull_u8(d21u8, d0u8); q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); q3u16 = vmlal_u8(q3u16, d20u8, d2u8); q4u16 = vmlal_u8(q4u16, d21u8, d2u8); q5u16 = vmlal_u8(q5u16, d22u8, d2u8); q6u16 = vmlal_u8(q6u16, d23u8, d2u8); q3u16 = vmlal_u8(q3u16, d23u8, d5u8); q4u16 = vmlal_u8(q4u16, d24u8, d5u8); q5u16 = vmlal_u8(q5u16, d25u8, d5u8); q6u16 = vmlal_u8(q6u16, d26u8, d5u8); q7u16 = vmull_u8(d21u8, d3u8); q8u16 = vmull_u8(d22u8, d3u8); q9u16 = vmull_u8(d23u8, d3u8); q10u16 = vmull_u8(d24u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); d18u8 = d22u8; d19u8 = d23u8; d20u8 = d24u8; d21u8 = d25u8; d22u8 = d26u8; vst1_u8(dst, d6u8); dst += dst_pitch; vst1_u8(dst, d7u8); dst += dst_pitch; vst1_u8(dst, d8u8); dst += dst_pitch; vst1_u8(dst, d9u8); dst += dst_pitch; } } return; } // load first_pass filter dtmps8 = vld1_s8(vp8_sub_pel_filters[xoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); // First pass: output_height lines x output_width columns (9x4) if (yoffset == 0) { // firstpass_filter4x4_only src = src_ptr - 2; dst = dst_ptr; for (i = 0; i < 8; ++i) { d6u8 = vld1_u8(src); d7u8 = vld1_u8(src + 8); d8u8 = vld1_u8(src + 16); src += src_pixels_per_line; d9u8 = vld1_u8(src); d10u8 = vld1_u8(src + 8); d11u8 = vld1_u8(src + 16); src += src_pixels_per_line; __builtin_prefetch(src); __builtin_prefetch(src + src_pixels_per_line); q6u16 = vmull_u8(d6u8, d0u8); q7u16 = vmull_u8(d7u8, d0u8); q8u16 = vmull_u8(d9u8, d0u8); q9u16 = vmull_u8(d10u8, d0u8); d20u8 = vext_u8(d6u8, d7u8, 1); d21u8 = vext_u8(d9u8, d10u8, 1); d22u8 = vext_u8(d7u8, d8u8, 1); d23u8 = vext_u8(d10u8, d11u8, 1); d24u8 = vext_u8(d6u8, d7u8, 4); d25u8 = vext_u8(d9u8, d10u8, 4); d26u8 = vext_u8(d7u8, d8u8, 4); d27u8 = vext_u8(d10u8, d11u8, 4); d28u8 = vext_u8(d6u8, d7u8, 5); d29u8 = vext_u8(d9u8, d10u8, 5); q6u16 = vmlsl_u8(q6u16, d20u8, d1u8); q8u16 = vmlsl_u8(q8u16, d21u8, d1u8); q7u16 = vmlsl_u8(q7u16, d22u8, d1u8); q9u16 = vmlsl_u8(q9u16, d23u8, d1u8); q6u16 = vmlsl_u8(q6u16, d24u8, d4u8); q8u16 = vmlsl_u8(q8u16, d25u8, d4u8); q7u16 = vmlsl_u8(q7u16, d26u8, d4u8); q9u16 = vmlsl_u8(q9u16, d27u8, d4u8); q6u16 = vmlal_u8(q6u16, d28u8, d5u8); q8u16 = vmlal_u8(q8u16, d29u8, d5u8); d20u8 = vext_u8(d7u8, d8u8, 5); d21u8 = vext_u8(d10u8, d11u8, 5); d22u8 = vext_u8(d6u8, d7u8, 2); d23u8 = vext_u8(d9u8, d10u8, 2); d24u8 = vext_u8(d7u8, d8u8, 2); d25u8 = vext_u8(d10u8, d11u8, 2); d26u8 = vext_u8(d6u8, d7u8, 3); d27u8 = vext_u8(d9u8, d10u8, 3); d28u8 = vext_u8(d7u8, d8u8, 3); d29u8 = vext_u8(d10u8, d11u8, 3); q7u16 = vmlal_u8(q7u16, d20u8, d5u8); q9u16 = vmlal_u8(q9u16, d21u8, d5u8); q6u16 = vmlal_u8(q6u16, d22u8, d2u8); q8u16 = vmlal_u8(q8u16, d23u8, d2u8); q7u16 = vmlal_u8(q7u16, d24u8, d2u8); q9u16 = vmlal_u8(q9u16, d25u8, d2u8); q10u16 = vmull_u8(d26u8, d3u8); q11u16 = vmull_u8(d27u8, d3u8); q12u16 = vmull_u8(d28u8, d3u8); q15u16 = vmull_u8(d29u8, d3u8); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q11s16 = vreinterpretq_s16_u16(q11u16); q12s16 = vreinterpretq_s16_u16(q12u16); q15s16 = vreinterpretq_s16_u16(q15u16); q6s16 = vqaddq_s16(q6s16, q10s16); q8s16 = vqaddq_s16(q8s16, q11s16); q7s16 = vqaddq_s16(q7s16, q12s16); q9s16 = vqaddq_s16(q9s16, q15s16); d6u8 = vqrshrun_n_s16(q6s16, 7); d7u8 = vqrshrun_n_s16(q7s16, 7); d8u8 = vqrshrun_n_s16(q8s16, 7); d9u8 = vqrshrun_n_s16(q9s16, 7); q3u8 = vcombine_u8(d6u8, d7u8); q4u8 = vcombine_u8(d8u8, d9u8); vst1q_u8(dst, q3u8); dst += dst_pitch; vst1q_u8(dst, q4u8); dst += dst_pitch; } return; } src = src_ptr - 2 - src_pixels_per_line * 2; tmpp = tmp; for (i = 0; i < 7; ++i) { d6u8 = vld1_u8(src); d7u8 = vld1_u8(src + 8); d8u8 = vld1_u8(src + 16); src += src_pixels_per_line; d9u8 = vld1_u8(src); d10u8 = vld1_u8(src + 8); d11u8 = vld1_u8(src + 16); src += src_pixels_per_line; d12u8 = vld1_u8(src); d13u8 = vld1_u8(src + 8); d14u8 = vld1_u8(src + 16); src += src_pixels_per_line; __builtin_prefetch(src); __builtin_prefetch(src + src_pixels_per_line); __builtin_prefetch(src + src_pixels_per_line * 2); q8u16 = vmull_u8(d6u8, d0u8); q9u16 = vmull_u8(d7u8, d0u8); q10u16 = vmull_u8(d9u8, d0u8); q11u16 = vmull_u8(d10u8, d0u8); q12u16 = vmull_u8(d12u8, d0u8); q13u16 = vmull_u8(d13u8, d0u8); d28u8 = vext_u8(d6u8, d7u8, 1); d29u8 = vext_u8(d9u8, d10u8, 1); d30u8 = vext_u8(d12u8, d13u8, 1); q8u16 = vmlsl_u8(q8u16, d28u8, d1u8); q10u16 = vmlsl_u8(q10u16, d29u8, d1u8); q12u16 = vmlsl_u8(q12u16, d30u8, d1u8); d28u8 = vext_u8(d7u8, d8u8, 1); d29u8 = vext_u8(d10u8, d11u8, 1); d30u8 = vext_u8(d13u8, d14u8, 1); q9u16 = vmlsl_u8(q9u16, d28u8, d1u8); q11u16 = vmlsl_u8(q11u16, d29u8, d1u8); q13u16 = vmlsl_u8(q13u16, d30u8, d1u8); d28u8 = vext_u8(d6u8, d7u8, 4); d29u8 = vext_u8(d9u8, d10u8, 4); d30u8 = vext_u8(d12u8, d13u8, 4); q8u16 = vmlsl_u8(q8u16, d28u8, d4u8); q10u16 = vmlsl_u8(q10u16, d29u8, d4u8); q12u16 = vmlsl_u8(q12u16, d30u8, d4u8); d28u8 = vext_u8(d7u8, d8u8, 4); d29u8 = vext_u8(d10u8, d11u8, 4); d30u8 = vext_u8(d13u8, d14u8, 4); q9u16 = vmlsl_u8(q9u16, d28u8, d4u8); q11u16 = vmlsl_u8(q11u16, d29u8, d4u8); q13u16 = vmlsl_u8(q13u16, d30u8, d4u8); d28u8 = vext_u8(d6u8, d7u8, 5); d29u8 = vext_u8(d9u8, d10u8, 5); d30u8 = vext_u8(d12u8, d13u8, 5); q8u16 = vmlal_u8(q8u16, d28u8, d5u8); q10u16 = vmlal_u8(q10u16, d29u8, d5u8); q12u16 = vmlal_u8(q12u16, d30u8, d5u8); d28u8 = vext_u8(d7u8, d8u8, 5); d29u8 = vext_u8(d10u8, d11u8, 5); d30u8 = vext_u8(d13u8, d14u8, 5); q9u16 = vmlal_u8(q9u16, d28u8, d5u8); q11u16 = vmlal_u8(q11u16, d29u8, d5u8); q13u16 = vmlal_u8(q13u16, d30u8, d5u8); d28u8 = vext_u8(d6u8, d7u8, 2); d29u8 = vext_u8(d9u8, d10u8, 2); d30u8 = vext_u8(d12u8, d13u8, 2); q8u16 = vmlal_u8(q8u16, d28u8, d2u8); q10u16 = vmlal_u8(q10u16, d29u8, d2u8); q12u16 = vmlal_u8(q12u16, d30u8, d2u8); d28u8 = vext_u8(d7u8, d8u8, 2); d29u8 = vext_u8(d10u8, d11u8, 2); d30u8 = vext_u8(d13u8, d14u8, 2); q9u16 = vmlal_u8(q9u16, d28u8, d2u8); q11u16 = vmlal_u8(q11u16, d29u8, d2u8); q13u16 = vmlal_u8(q13u16, d30u8, d2u8); d28u8 = vext_u8(d6u8, d7u8, 3); d29u8 = vext_u8(d9u8, d10u8, 3); d30u8 = vext_u8(d12u8, d13u8, 3); d15u8 = vext_u8(d7u8, d8u8, 3); d31u8 = vext_u8(d10u8, d11u8, 3); d6u8 = vext_u8(d13u8, d14u8, 3); q4u16 = vmull_u8(d28u8, d3u8); q5u16 = vmull_u8(d29u8, d3u8); q6u16 = vmull_u8(d30u8, d3u8); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q8s16 = vreinterpretq_s16_u16(q8u16); q10s16 = vreinterpretq_s16_u16(q10u16); q12s16 = vreinterpretq_s16_u16(q12u16); q8s16 = vqaddq_s16(q8s16, q4s16); q10s16 = vqaddq_s16(q10s16, q5s16); q12s16 = vqaddq_s16(q12s16, q6s16); q6u16 = vmull_u8(d15u8, d3u8); q7u16 = vmull_u8(d31u8, d3u8); q3u16 = vmull_u8(d6u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q9s16 = vreinterpretq_s16_u16(q9u16); q11s16 = vreinterpretq_s16_u16(q11u16); q13s16 = vreinterpretq_s16_u16(q13u16); q9s16 = vqaddq_s16(q9s16, q6s16); q11s16 = vqaddq_s16(q11s16, q7s16); q13s16 = vqaddq_s16(q13s16, q3s16); d6u8 = vqrshrun_n_s16(q8s16, 7); d7u8 = vqrshrun_n_s16(q9s16, 7); d8u8 = vqrshrun_n_s16(q10s16, 7); d9u8 = vqrshrun_n_s16(q11s16, 7); d10u8 = vqrshrun_n_s16(q12s16, 7); d11u8 = vqrshrun_n_s16(q13s16, 7); vst1_u8(tmpp, d6u8); tmpp += 8; vst1_u8(tmpp, d7u8); tmpp += 8; vst1_u8(tmpp, d8u8); tmpp += 8; vst1_u8(tmpp, d9u8); tmpp += 8; vst1_u8(tmpp, d10u8); tmpp += 8; vst1_u8(tmpp, d11u8); tmpp += 8; } // Second pass: 16x16 dtmps8 = vld1_s8(vp8_sub_pel_filters[yoffset]); d0s8 = vdup_lane_s8(dtmps8, 0); d1s8 = vdup_lane_s8(dtmps8, 1); d2s8 = vdup_lane_s8(dtmps8, 2); d3s8 = vdup_lane_s8(dtmps8, 3); d4s8 = vdup_lane_s8(dtmps8, 4); d5s8 = vdup_lane_s8(dtmps8, 5); d0u8 = vreinterpret_u8_s8(vabs_s8(d0s8)); d1u8 = vreinterpret_u8_s8(vabs_s8(d1s8)); d2u8 = vreinterpret_u8_s8(vabs_s8(d2s8)); d3u8 = vreinterpret_u8_s8(vabs_s8(d3s8)); d4u8 = vreinterpret_u8_s8(vabs_s8(d4s8)); d5u8 = vreinterpret_u8_s8(vabs_s8(d5s8)); for (i = 0; i < 2; ++i) { dst = dst_ptr + 8 * i; tmpp = tmp + 8 * i; d18u8 = vld1_u8(tmpp); tmpp += 16; d19u8 = vld1_u8(tmpp); tmpp += 16; d20u8 = vld1_u8(tmpp); tmpp += 16; d21u8 = vld1_u8(tmpp); tmpp += 16; d22u8 = vld1_u8(tmpp); tmpp += 16; for (j = 0; j < 4; ++j) { d23u8 = vld1_u8(tmpp); tmpp += 16; d24u8 = vld1_u8(tmpp); tmpp += 16; d25u8 = vld1_u8(tmpp); tmpp += 16; d26u8 = vld1_u8(tmpp); tmpp += 16; q3u16 = vmull_u8(d18u8, d0u8); q4u16 = vmull_u8(d19u8, d0u8); q5u16 = vmull_u8(d20u8, d0u8); q6u16 = vmull_u8(d21u8, d0u8); q3u16 = vmlsl_u8(q3u16, d19u8, d1u8); q4u16 = vmlsl_u8(q4u16, d20u8, d1u8); q5u16 = vmlsl_u8(q5u16, d21u8, d1u8); q6u16 = vmlsl_u8(q6u16, d22u8, d1u8); q3u16 = vmlsl_u8(q3u16, d22u8, d4u8); q4u16 = vmlsl_u8(q4u16, d23u8, d4u8); q5u16 = vmlsl_u8(q5u16, d24u8, d4u8); q6u16 = vmlsl_u8(q6u16, d25u8, d4u8); q3u16 = vmlal_u8(q3u16, d20u8, d2u8); q4u16 = vmlal_u8(q4u16, d21u8, d2u8); q5u16 = vmlal_u8(q5u16, d22u8, d2u8); q6u16 = vmlal_u8(q6u16, d23u8, d2u8); q3u16 = vmlal_u8(q3u16, d23u8, d5u8); q4u16 = vmlal_u8(q4u16, d24u8, d5u8); q5u16 = vmlal_u8(q5u16, d25u8, d5u8); q6u16 = vmlal_u8(q6u16, d26u8, d5u8); q7u16 = vmull_u8(d21u8, d3u8); q8u16 = vmull_u8(d22u8, d3u8); q9u16 = vmull_u8(d23u8, d3u8); q10u16 = vmull_u8(d24u8, d3u8); q3s16 = vreinterpretq_s16_u16(q3u16); q4s16 = vreinterpretq_s16_u16(q4u16); q5s16 = vreinterpretq_s16_u16(q5u16); q6s16 = vreinterpretq_s16_u16(q6u16); q7s16 = vreinterpretq_s16_u16(q7u16); q8s16 = vreinterpretq_s16_u16(q8u16); q9s16 = vreinterpretq_s16_u16(q9u16); q10s16 = vreinterpretq_s16_u16(q10u16); q7s16 = vqaddq_s16(q7s16, q3s16); q8s16 = vqaddq_s16(q8s16, q4s16); q9s16 = vqaddq_s16(q9s16, q5s16); q10s16 = vqaddq_s16(q10s16, q6s16); d6u8 = vqrshrun_n_s16(q7s16, 7); d7u8 = vqrshrun_n_s16(q8s16, 7); d8u8 = vqrshrun_n_s16(q9s16, 7); d9u8 = vqrshrun_n_s16(q10s16, 7); d18u8 = d22u8; d19u8 = d23u8; d20u8 = d24u8; d21u8 = d25u8; d22u8 = d26u8; vst1_u8(dst, d6u8); dst += dst_pitch; vst1_u8(dst, d7u8); dst += dst_pitch; vst1_u8(dst, d8u8); dst += dst_pitch; vst1_u8(dst, d9u8); dst += dst_pitch; } } return; } libvpx-1.8.2/vp8/common/arm/neon/vp8_loopfilter_neon.c000066400000000000000000000405741357355204000227260ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "vp8/common/arm/loopfilter_arm.h" #include "vpx_ports/arm.h" static INLINE void vp8_loop_filter_neon(uint8x16_t qblimit, // flimit uint8x16_t qlimit, // limit uint8x16_t qthresh, // thresh uint8x16_t q3, // p3 uint8x16_t q4, // p2 uint8x16_t q5, // p1 uint8x16_t q6, // p0 uint8x16_t q7, // q0 uint8x16_t q8, // q1 uint8x16_t q9, // q2 uint8x16_t q10, // q3 uint8x16_t *q5r, // p1 uint8x16_t *q6r, // p0 uint8x16_t *q7r, // q0 uint8x16_t *q8r) { // q1 uint8x16_t q0u8, q1u8, q2u8, q11u8, q12u8, q13u8, q14u8, q15u8; int16x8_t q2s16, q11s16; uint16x8_t q4u16; int8x16_t q1s8, q2s8, q10s8, q11s8, q12s8, q13s8; int8x8_t d2s8, d3s8; q11u8 = vabdq_u8(q3, q4); q12u8 = vabdq_u8(q4, q5); q13u8 = vabdq_u8(q5, q6); q14u8 = vabdq_u8(q8, q7); q3 = vabdq_u8(q9, q8); q4 = vabdq_u8(q10, q9); q11u8 = vmaxq_u8(q11u8, q12u8); q12u8 = vmaxq_u8(q13u8, q14u8); q3 = vmaxq_u8(q3, q4); q15u8 = vmaxq_u8(q11u8, q12u8); q9 = vabdq_u8(q6, q7); // vp8_hevmask q13u8 = vcgtq_u8(q13u8, qthresh); q14u8 = vcgtq_u8(q14u8, qthresh); q15u8 = vmaxq_u8(q15u8, q3); q2u8 = vabdq_u8(q5, q8); q9 = vqaddq_u8(q9, q9); q15u8 = vcgeq_u8(qlimit, q15u8); // vp8_filter() function // convert to signed q10 = vdupq_n_u8(0x80); q8 = veorq_u8(q8, q10); q7 = veorq_u8(q7, q10); q6 = veorq_u8(q6, q10); q5 = veorq_u8(q5, q10); q2u8 = vshrq_n_u8(q2u8, 1); q9 = vqaddq_u8(q9, q2u8); q10 = vdupq_n_u8(3); q2s16 = vsubl_s8(vget_low_s8(vreinterpretq_s8_u8(q7)), vget_low_s8(vreinterpretq_s8_u8(q6))); q11s16 = vsubl_s8(vget_high_s8(vreinterpretq_s8_u8(q7)), vget_high_s8(vreinterpretq_s8_u8(q6))); q9 = vcgeq_u8(qblimit, q9); q1s8 = vqsubq_s8(vreinterpretq_s8_u8(q5), vreinterpretq_s8_u8(q8)); q14u8 = vorrq_u8(q13u8, q14u8); q4u16 = vmovl_u8(vget_low_u8(q10)); q2s16 = vmulq_s16(q2s16, vreinterpretq_s16_u16(q4u16)); q11s16 = vmulq_s16(q11s16, vreinterpretq_s16_u16(q4u16)); q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q14u8); q15u8 = vandq_u8(q15u8, q9); q1s8 = vreinterpretq_s8_u8(q1u8); q2s16 = vaddw_s8(q2s16, vget_low_s8(q1s8)); q11s16 = vaddw_s8(q11s16, vget_high_s8(q1s8)); q9 = vdupq_n_u8(4); // vp8_filter = clamp(vp8_filter + 3 * ( qs0 - ps0)) d2s8 = vqmovn_s16(q2s16); d3s8 = vqmovn_s16(q11s16); q1s8 = vcombine_s8(d2s8, d3s8); q1u8 = vandq_u8(vreinterpretq_u8_s8(q1s8), q15u8); q1s8 = vreinterpretq_s8_u8(q1u8); q2s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q10)); q1s8 = vqaddq_s8(q1s8, vreinterpretq_s8_u8(q9)); q2s8 = vshrq_n_s8(q2s8, 3); q1s8 = vshrq_n_s8(q1s8, 3); q11s8 = vqaddq_s8(vreinterpretq_s8_u8(q6), q2s8); q10s8 = vqsubq_s8(vreinterpretq_s8_u8(q7), q1s8); q1s8 = vrshrq_n_s8(q1s8, 1); q1s8 = vbicq_s8(q1s8, vreinterpretq_s8_u8(q14u8)); q13s8 = vqaddq_s8(vreinterpretq_s8_u8(q5), q1s8); q12s8 = vqsubq_s8(vreinterpretq_s8_u8(q8), q1s8); q0u8 = vdupq_n_u8(0x80); *q8r = veorq_u8(vreinterpretq_u8_s8(q12s8), q0u8); *q7r = veorq_u8(vreinterpretq_u8_s8(q10s8), q0u8); *q6r = veorq_u8(vreinterpretq_u8_s8(q11s8), q0u8); *q5r = veorq_u8(vreinterpretq_u8_s8(q13s8), q0u8); return; } void vp8_loop_filter_horizontal_edge_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh) { uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); src -= (pitch << 2); q3 = vld1q_u8(src); src += pitch; q4 = vld1q_u8(src); src += pitch; q5 = vld1q_u8(src); src += pitch; q6 = vld1q_u8(src); src += pitch; q7 = vld1q_u8(src); src += pitch; q8 = vld1q_u8(src); src += pitch; q9 = vld1q_u8(src); src += pitch; q10 = vld1q_u8(src); vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q5, &q6, &q7, &q8); src -= (pitch * 5); vst1q_u8(src, q5); src += pitch; vst1q_u8(src, q6); src += pitch; vst1q_u8(src, q7); src += pitch; vst1q_u8(src, q8); return; } void vp8_loop_filter_horizontal_edge_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v) { uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); u -= (pitch << 2); v -= (pitch << 2); d6 = vld1_u8(u); u += pitch; d7 = vld1_u8(v); v += pitch; d8 = vld1_u8(u); u += pitch; d9 = vld1_u8(v); v += pitch; d10 = vld1_u8(u); u += pitch; d11 = vld1_u8(v); v += pitch; d12 = vld1_u8(u); u += pitch; d13 = vld1_u8(v); v += pitch; d14 = vld1_u8(u); u += pitch; d15 = vld1_u8(v); v += pitch; d16 = vld1_u8(u); u += pitch; d17 = vld1_u8(v); v += pitch; d18 = vld1_u8(u); u += pitch; d19 = vld1_u8(v); v += pitch; d20 = vld1_u8(u); d21 = vld1_u8(v); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q5, &q6, &q7, &q8); u -= (pitch * 5); vst1_u8(u, vget_low_u8(q5)); u += pitch; vst1_u8(u, vget_low_u8(q6)); u += pitch; vst1_u8(u, vget_low_u8(q7)); u += pitch; vst1_u8(u, vget_low_u8(q8)); v -= (pitch * 5); vst1_u8(v, vget_high_u8(q5)); v += pitch; vst1_u8(v, vget_high_u8(q6)); v += pitch; vst1_u8(v, vget_high_u8(q7)); v += pitch; vst1_u8(v, vget_high_u8(q8)); return; } static INLINE void write_4x8(unsigned char *dst, int pitch, const uint8x8x4_t result) { #ifdef VPX_INCOMPATIBLE_GCC /* * uint8x8x4_t result 00 01 02 03 | 04 05 06 07 10 11 12 13 | 14 15 16 17 20 21 22 23 | 24 25 26 27 30 31 32 33 | 34 35 36 37 --- * after vtrn_u16 00 01 20 21 | 04 05 24 25 02 03 22 23 | 06 07 26 27 10 11 30 31 | 14 15 34 35 12 13 32 33 | 16 17 36 37 --- * after vtrn_u8 00 10 20 30 | 04 14 24 34 01 11 21 31 | 05 15 25 35 02 12 22 32 | 06 16 26 36 03 13 23 33 | 07 17 27 37 */ const uint16x4x2_t r02_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[0]), vreinterpret_u16_u8(result.val[2])); const uint16x4x2_t r13_u16 = vtrn_u16(vreinterpret_u16_u8(result.val[1]), vreinterpret_u16_u8(result.val[3])); const uint8x8x2_t r01_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[0]), vreinterpret_u8_u16(r13_u16.val[0])); const uint8x8x2_t r23_u8 = vtrn_u8(vreinterpret_u8_u16(r02_u16.val[1]), vreinterpret_u8_u16(r13_u16.val[1])); const uint32x2_t x_0_4 = vreinterpret_u32_u8(r01_u8.val[0]); const uint32x2_t x_1_5 = vreinterpret_u32_u8(r01_u8.val[1]); const uint32x2_t x_2_6 = vreinterpret_u32_u8(r23_u8.val[0]); const uint32x2_t x_3_7 = vreinterpret_u32_u8(r23_u8.val[1]); vst1_lane_u32((uint32_t *)dst, x_0_4, 0); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_1_5, 0); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_2_6, 0); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_3_7, 0); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_0_4, 1); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_1_5, 1); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_2_6, 1); dst += pitch; vst1_lane_u32((uint32_t *)dst, x_3_7, 1); #else vst4_lane_u8(dst, result, 0); dst += pitch; vst4_lane_u8(dst, result, 1); dst += pitch; vst4_lane_u8(dst, result, 2); dst += pitch; vst4_lane_u8(dst, result, 3); dst += pitch; vst4_lane_u8(dst, result, 4); dst += pitch; vst4_lane_u8(dst, result, 5); dst += pitch; vst4_lane_u8(dst, result, 6); dst += pitch; vst4_lane_u8(dst, result, 7); #endif // VPX_INCOMPATIBLE_GCC } void vp8_loop_filter_vertical_edge_y_neon(unsigned char *src, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh) { unsigned char *s, *d; uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; uint8x8x4_t q4ResultH, q4ResultL; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); s = src - 4; d6 = vld1_u8(s); s += pitch; d8 = vld1_u8(s); s += pitch; d10 = vld1_u8(s); s += pitch; d12 = vld1_u8(s); s += pitch; d14 = vld1_u8(s); s += pitch; d16 = vld1_u8(s); s += pitch; d18 = vld1_u8(s); s += pitch; d20 = vld1_u8(s); s += pitch; d7 = vld1_u8(s); s += pitch; d9 = vld1_u8(s); s += pitch; d11 = vld1_u8(s); s += pitch; d13 = vld1_u8(s); s += pitch; d15 = vld1_u8(s); s += pitch; d17 = vld1_u8(s); s += pitch; d19 = vld1_u8(s); s += pitch; d21 = vld1_u8(s); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q5, &q6, &q7, &q8); q4ResultL.val[0] = vget_low_u8(q5); // d10 q4ResultL.val[1] = vget_low_u8(q6); // d12 q4ResultL.val[2] = vget_low_u8(q7); // d14 q4ResultL.val[3] = vget_low_u8(q8); // d16 q4ResultH.val[0] = vget_high_u8(q5); // d11 q4ResultH.val[1] = vget_high_u8(q6); // d13 q4ResultH.val[2] = vget_high_u8(q7); // d15 q4ResultH.val[3] = vget_high_u8(q8); // d17 d = src - 2; write_4x8(d, pitch, q4ResultL); d += pitch * 8; write_4x8(d, pitch, q4ResultH); } void vp8_loop_filter_vertical_edge_uv_neon(unsigned char *u, int pitch, unsigned char blimit, unsigned char limit, unsigned char thresh, unsigned char *v) { unsigned char *us, *ud; unsigned char *vs, *vd; uint8x16_t qblimit, qlimit, qthresh, q3, q4; uint8x16_t q5, q6, q7, q8, q9, q10; uint8x8_t d6, d7, d8, d9, d10, d11, d12, d13, d14; uint8x8_t d15, d16, d17, d18, d19, d20, d21; uint32x4x2_t q2tmp0, q2tmp1, q2tmp2, q2tmp3; uint16x8x2_t q2tmp4, q2tmp5, q2tmp6, q2tmp7; uint8x16x2_t q2tmp8, q2tmp9, q2tmp10, q2tmp11; uint8x8x4_t q4ResultH, q4ResultL; qblimit = vdupq_n_u8(blimit); qlimit = vdupq_n_u8(limit); qthresh = vdupq_n_u8(thresh); us = u - 4; d6 = vld1_u8(us); us += pitch; d8 = vld1_u8(us); us += pitch; d10 = vld1_u8(us); us += pitch; d12 = vld1_u8(us); us += pitch; d14 = vld1_u8(us); us += pitch; d16 = vld1_u8(us); us += pitch; d18 = vld1_u8(us); us += pitch; d20 = vld1_u8(us); vs = v - 4; d7 = vld1_u8(vs); vs += pitch; d9 = vld1_u8(vs); vs += pitch; d11 = vld1_u8(vs); vs += pitch; d13 = vld1_u8(vs); vs += pitch; d15 = vld1_u8(vs); vs += pitch; d17 = vld1_u8(vs); vs += pitch; d19 = vld1_u8(vs); vs += pitch; d21 = vld1_u8(vs); q3 = vcombine_u8(d6, d7); q4 = vcombine_u8(d8, d9); q5 = vcombine_u8(d10, d11); q6 = vcombine_u8(d12, d13); q7 = vcombine_u8(d14, d15); q8 = vcombine_u8(d16, d17); q9 = vcombine_u8(d18, d19); q10 = vcombine_u8(d20, d21); q2tmp0 = vtrnq_u32(vreinterpretq_u32_u8(q3), vreinterpretq_u32_u8(q7)); q2tmp1 = vtrnq_u32(vreinterpretq_u32_u8(q4), vreinterpretq_u32_u8(q8)); q2tmp2 = vtrnq_u32(vreinterpretq_u32_u8(q5), vreinterpretq_u32_u8(q9)); q2tmp3 = vtrnq_u32(vreinterpretq_u32_u8(q6), vreinterpretq_u32_u8(q10)); q2tmp4 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[0]), vreinterpretq_u16_u32(q2tmp2.val[0])); q2tmp5 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[0]), vreinterpretq_u16_u32(q2tmp3.val[0])); q2tmp6 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp0.val[1]), vreinterpretq_u16_u32(q2tmp2.val[1])); q2tmp7 = vtrnq_u16(vreinterpretq_u16_u32(q2tmp1.val[1]), vreinterpretq_u16_u32(q2tmp3.val[1])); q2tmp8 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[0]), vreinterpretq_u8_u16(q2tmp5.val[0])); q2tmp9 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp4.val[1]), vreinterpretq_u8_u16(q2tmp5.val[1])); q2tmp10 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[0]), vreinterpretq_u8_u16(q2tmp7.val[0])); q2tmp11 = vtrnq_u8(vreinterpretq_u8_u16(q2tmp6.val[1]), vreinterpretq_u8_u16(q2tmp7.val[1])); q3 = q2tmp8.val[0]; q4 = q2tmp8.val[1]; q5 = q2tmp9.val[0]; q6 = q2tmp9.val[1]; q7 = q2tmp10.val[0]; q8 = q2tmp10.val[1]; q9 = q2tmp11.val[0]; q10 = q2tmp11.val[1]; vp8_loop_filter_neon(qblimit, qlimit, qthresh, q3, q4, q5, q6, q7, q8, q9, q10, &q5, &q6, &q7, &q8); q4ResultL.val[0] = vget_low_u8(q5); // d10 q4ResultL.val[1] = vget_low_u8(q6); // d12 q4ResultL.val[2] = vget_low_u8(q7); // d14 q4ResultL.val[3] = vget_low_u8(q8); // d16 ud = u - 2; write_4x8(ud, pitch, q4ResultL); q4ResultH.val[0] = vget_high_u8(q5); // d11 q4ResultH.val[1] = vget_high_u8(q6); // d13 q4ResultH.val[2] = vget_high_u8(q7); // d15 q4ResultH.val[3] = vget_high_u8(q8); // d17 vd = v - 2; write_4x8(vd, pitch, q4ResultH); } libvpx-1.8.2/vp8/common/blockd.c000066400000000000000000000015551357355204000164470ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "blockd.h" #include "vpx_mem/vpx_mem.h" const unsigned char vp8_block2left[25] = { 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; const unsigned char vp8_block2above[25] = { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8 }; libvpx-1.8.2/vp8/common/blockd.h000066400000000000000000000206071357355204000164530ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_BLOCKD_H_ #define VPX_VP8_COMMON_BLOCKD_H_ void vpx_log(const char *format, ...); #include "vpx/internal/vpx_codec_internal.h" #include "vpx_config.h" #include "vpx_scale/yv12config.h" #include "mv.h" #include "treecoder.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif /*#define DCPRED 1*/ #define DCPREDSIMTHRESH 0 #define DCPREDCNTTHRESH 3 #define MB_FEATURE_TREE_PROBS 3 #define MAX_MB_SEGMENTS 4 #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 4 /* Segment Feature Masks */ #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 typedef struct { int r, c; } POS; #define PLANE_TYPE_Y_NO_DC 0 #define PLANE_TYPE_Y2 1 #define PLANE_TYPE_UV 2 #define PLANE_TYPE_Y_WITH_DC 3 typedef char ENTROPY_CONTEXT; typedef struct { ENTROPY_CONTEXT y1[4]; ENTROPY_CONTEXT u[2]; ENTROPY_CONTEXT v[2]; ENTROPY_CONTEXT y2; } ENTROPY_CONTEXT_PLANES; extern const unsigned char vp8_block2left[25]; extern const unsigned char vp8_block2above[25]; #define VP8_COMBINEENTROPYCONTEXTS(Dest, A, B) Dest = (A) + (B); typedef enum { KEY_FRAME = 0, INTER_FRAME = 1 } FRAME_TYPE; typedef enum { DC_PRED, /* average of above and left pixels */ V_PRED, /* vertical prediction */ H_PRED, /* horizontal prediction */ TM_PRED, /* Truemotion prediction */ B_PRED, /* block based prediction, each block has its own prediction mode */ NEARESTMV, NEARMV, ZEROMV, NEWMV, SPLITMV, MB_MODE_COUNT } MB_PREDICTION_MODE; /* Macroblock level features */ typedef enum { MB_LVL_ALT_Q = 0, /* Use alternate Quantizer .... */ MB_LVL_ALT_LF = 1, /* Use alternate loop filter value... */ MB_LVL_MAX = 2 /* Number of MB level features supported */ } MB_LVL_FEATURES; /* Segment Feature Masks */ #define SEGMENT_ALTQ 0x01 #define SEGMENT_ALT_LF 0x02 #define VP8_YMODES (B_PRED + 1) #define VP8_UV_MODES (TM_PRED + 1) #define VP8_MVREFS (1 + SPLITMV - NEARESTMV) typedef enum { B_DC_PRED, /* average of above and left pixels */ B_TM_PRED, B_VE_PRED, /* vertical prediction */ B_HE_PRED, /* horizontal prediction */ B_LD_PRED, B_RD_PRED, B_VR_PRED, B_VL_PRED, B_HD_PRED, B_HU_PRED, LEFT4X4, ABOVE4X4, ZERO4X4, NEW4X4, B_MODE_COUNT } B_PREDICTION_MODE; #define VP8_BINTRAMODES (B_HU_PRED + 1) /* 10 */ #define VP8_SUBMVREFS (1 + NEW4X4 - LEFT4X4) /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there is a single probability table. */ union b_mode_info { B_PREDICTION_MODE as_mode; int_mv mv; }; typedef enum { INTRA_FRAME = 0, LAST_FRAME = 1, GOLDEN_FRAME = 2, ALTREF_FRAME = 3, MAX_REF_FRAMES = 4 } MV_REFERENCE_FRAME; typedef struct { uint8_t mode, uv_mode; uint8_t ref_frame; uint8_t is_4x4; int_mv mv; uint8_t partitioning; /* does this mb has coefficients at all, 1=no coefficients, 0=need decode tokens */ uint8_t mb_skip_coeff; uint8_t need_to_clamp_mvs; /* Which set of segmentation parameters should be used for this MB */ uint8_t segment_id; } MB_MODE_INFO; typedef struct modeinfo { MB_MODE_INFO mbmi; union b_mode_info bmi[16]; } MODE_INFO; #if CONFIG_MULTI_RES_ENCODING /* The mb-level information needed to be stored for higher-resolution encoder */ typedef struct { MB_PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame; int_mv mv; int dissim; /* dissimilarity level of the macroblock */ } LOWER_RES_MB_INFO; /* The frame-level information needed to be stored for higher-resolution * encoder */ typedef struct { FRAME_TYPE frame_type; int is_frame_dropped; // If frame is dropped due to overshoot after encode_frame. This triggers a // drop and resets rate control with Q forced to max for following frame. // The check for this dropping due to overshoot is only done on lowest stream, // and if set will force drop on all spatial streams for that current frame. int is_frame_dropped_overshoot_maxqp; // The frame rate for the lowest resolution. double low_res_framerate; /* The frame number of each reference frames */ unsigned int low_res_ref_frames[MAX_REF_FRAMES]; // The video frame counter value for the key frame, for lowest resolution. unsigned int key_frame_counter_value; // Flags to signal skipped encoding of previous and base layer stream. unsigned int skip_encoding_prev_stream; unsigned int skip_encoding_base_stream; LOWER_RES_MB_INFO *mb_info; } LOWER_RES_FRAME_INFO; #endif typedef struct blockd { short *qcoeff; short *dqcoeff; unsigned char *predictor; short *dequant; int offset; char *eob; union b_mode_info bmi; } BLOCKD; typedef void (*vp8_subpix_fn_t)(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch); typedef struct macroblockd { DECLARE_ALIGNED(16, unsigned char, predictor[384]); DECLARE_ALIGNED(16, short, qcoeff[400]); DECLARE_ALIGNED(16, short, dqcoeff[400]); DECLARE_ALIGNED(16, char, eobs[25]); DECLARE_ALIGNED(16, short, dequant_y1[16]); DECLARE_ALIGNED(16, short, dequant_y1_dc[16]); DECLARE_ALIGNED(16, short, dequant_y2[16]); DECLARE_ALIGNED(16, short, dequant_uv[16]); /* 16 Y blocks, 4 U, 4 V, 1 DC 2nd order block, each with 16 entries. */ BLOCKD block[25]; int fullpixel_mask; YV12_BUFFER_CONFIG pre; /* Filtered copy of previous frame reconstruction */ YV12_BUFFER_CONFIG dst; MODE_INFO *mode_info_context; int mode_info_stride; FRAME_TYPE frame_type; int up_available; int left_available; unsigned char *recon_above[3]; unsigned char *recon_left[3]; int recon_left_stride[2]; /* Y,U,V,Y2 */ ENTROPY_CONTEXT_PLANES *above_context; ENTROPY_CONTEXT_PLANES *left_context; /* 0 indicates segmentation at MB level is not enabled. Otherwise the * individual bits indicate which features are active. */ unsigned char segmentation_enabled; /* 0 (do not update) 1 (update) the macroblock segmentation map. */ unsigned char update_mb_segmentation_map; /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ unsigned char update_mb_segmentation_data; /* 0 (do not update) 1 (update) the macroblock segmentation feature data. */ unsigned char mb_segement_abs_delta; /* Per frame flags that define which MB level features (such as quantizer or * loop filter level) */ /* are enabled and when enabled the proabilities used to decode the per MB * flags in MB_MODE_INFO */ /* Probability Tree used to code Segment number */ vp8_prob mb_segment_tree_probs[MB_FEATURE_TREE_PROBS]; /* Segment parameters */ signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; /* mode_based Loop filter adjustment */ unsigned char mode_ref_lf_delta_enabled; unsigned char mode_ref_lf_delta_update; /* Delta values have the range +/- MAX_LOOP_FILTER */ signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ signed char ref_lf_deltas[MAX_REF_LF_DELTAS]; /* 0 = Intra, Last, GF, ARF */ /* 0 = BPRED, ZERO_MV, MV, SPLIT */ signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; signed char mode_lf_deltas[MAX_MODE_LF_DELTAS]; /* 0 = BPRED, ZERO_MV, MV, SPLIT */ /* Distance of MB away from frame edges */ int mb_to_left_edge; int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; vp8_subpix_fn_t subpixel_predict; vp8_subpix_fn_t subpixel_predict8x4; vp8_subpix_fn_t subpixel_predict8x8; vp8_subpix_fn_t subpixel_predict16x16; void *current_bc; int corrupted; struct vpx_internal_error_info error_info; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 /* This is an intermediate buffer currently used in sub-pixel motion search * to keep a copy of the reference area. This buffer can be used for other * purpose. */ DECLARE_ALIGNED(32, unsigned char, y_buf[22 * 32]); #endif } MACROBLOCKD; extern void vp8_build_block_doffsets(MACROBLOCKD *x); extern void vp8_setup_block_dptrs(MACROBLOCKD *x); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_BLOCKD_H_ libvpx-1.8.2/vp8/common/coefupdateprobs.h000066400000000000000000000210251357355204000203750ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_COEFUPDATEPROBS_H_ #define VPX_VP8_COMMON_COEFUPDATEPROBS_H_ #ifdef __cplusplus extern "C" { #endif /* Update probabilities for the nodes in the token entropy tree. Generated file included by entropy.c */ const vp8_prob vp8_coef_update_probs [BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = { { { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 }, { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 }, { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, }, { { { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 }, { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }, }, { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, }, { { { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 }, { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }, }, { { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, }, { { { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 }, { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }, }, { { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, { { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, }, }, }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_COEFUPDATEPROBS_H_ libvpx-1.8.2/vp8/common/common.h000066400000000000000000000025061357355204000165030ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_COMMON_H_ #define VPX_VP8_COMMON_COMMON_H_ #include /* Interface header for common constant data structures and lookup tables */ #include "vpx_mem/vpx_mem.h" #ifdef __cplusplus extern "C" { #endif /* Only need this for fixed-size arrays, for structs just assign. */ #define vp8_copy(Dest, Src) \ { \ assert(sizeof(Dest) == sizeof(Src)); \ memcpy(Dest, Src, sizeof(Src)); \ } /* Use this for variably-sized arrays. */ #define vp8_copy_array(Dest, Src, N) \ { \ assert(sizeof(*(Dest)) == sizeof(*(Src))); \ memcpy(Dest, Src, (N) * sizeof(*(Src))); \ } #define vp8_zero(Dest) memset(&(Dest), 0, sizeof(Dest)); #define vp8_zero_array(Dest, N) memset(Dest, 0, (N) * sizeof(*(Dest))); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_COMMON_H_ libvpx-1.8.2/vp8/common/context.c000066400000000000000000000423241357355204000166740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "entropy.h" /* *** GENERATED FILE: DO NOT EDIT *** */ #if 0 int Contexts[vp8_coef_counter_dimen]; const int default_contexts[vp8_coef_counter_dimen] = { { // Block Type ( 0 ) { // Coeff Band ( 0 ) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, }, { // Coeff Band ( 1 ) {30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593,}, {26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987,}, {10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104,}, }, { // Coeff Band ( 2 ) {25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0,}, {9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294,}, {1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879,}, }, { // Coeff Band ( 3 ) {26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0,}, {8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302,}, { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611,}, }, { // Coeff Band ( 4 ) {10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0,}, {2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073,}, { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50,}, }, { // Coeff Band ( 5 ) {10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0,}, {2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362,}, { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190,}, }, { // Coeff Band ( 6 ) {40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0,}, {6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164,}, { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345,}, }, { // Coeff Band ( 7 ) { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8,}, }, }, { // Block Type ( 1 ) { // Coeff Band ( 0 ) {3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289,}, {8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914,}, {9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620,}, }, { // Coeff Band ( 1 ) {12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0,}, {11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988,}, {7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136,}, }, { // Coeff Band ( 2 ) {15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0,}, {7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980,}, {1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429,}, }, { // Coeff Band ( 3 ) {19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0,}, {9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820,}, {1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679,}, }, { // Coeff Band ( 4 ) {12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0,}, {4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127,}, { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101,}, }, { // Coeff Band ( 5 ) {12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0,}, {4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157,}, { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198,}, }, { // Coeff Band ( 6 ) {61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0,}, {15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195,}, { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507,}, }, { // Coeff Band ( 7 ) { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641,}, { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30,}, }, }, { // Block Type ( 2 ) { // Coeff Band ( 0 ) { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798,}, {1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837,}, {1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122,}, }, { // Coeff Band ( 1 ) {1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0,}, {1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063,}, {1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047,}, }, { // Coeff Band ( 2 ) { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0,}, { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404,}, { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236,}, }, { // Coeff Band ( 3 ) { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157,}, { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300,}, }, { // Coeff Band ( 4 ) { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427,}, { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7,}, }, { // Coeff Band ( 5 ) { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652,}, { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30,}, }, { // Coeff Band ( 6 ) { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517,}, { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3,}, }, { // Coeff Band ( 7 ) { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, }, }, { // Block Type ( 3 ) { // Coeff Band ( 0 ) {2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694,}, {8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572,}, {11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284,}, }, { // Coeff Band ( 1 ) {9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0,}, {12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280,}, {10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460,}, }, { // Coeff Band ( 2 ) {6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0,}, {6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539,}, {3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138,}, }, { // Coeff Band ( 3 ) {11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0,}, {9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181,}, {4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267,}, }, { // Coeff Band ( 4 ) {4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0,}, {3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401,}, {1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268,}, }, { // Coeff Band ( 5 ) {8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0,}, {3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811,}, {1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527,}, }, { // Coeff Band ( 6 ) {27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0,}, {5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954,}, {1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979,}, }, { // Coeff Band ( 7 ) { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,}, { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459,}, { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13,}, }, }, }; //Update probabilities for the nodes in the token entropy tree. const vp8_prob tree_update_probs[vp8_coef_tree_dimen] = { { { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, {249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, {234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255, }, {250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255, }, {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, }, { { {217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255, }, {234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255, }, }, { {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, }, { { {186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255, }, {234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255, }, {251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255, }, }, { {255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, }, { { {248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255, }, {248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255, }, }, { {255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255, }, {248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, }, {250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, { {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, }, }, }, }; #endif libvpx-1.8.2/vp8/common/debugmodes.c000066400000000000000000000062131357355204000173230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "blockd.h" void vp8_print_modes_and_motion_vectors(MODE_INFO *mi, int rows, int cols, int frame) { int mb_row; int mb_col; int mb_index = 0; FILE *mvs = fopen("mvs.stt", "a"); /* print out the macroblock Y modes */ mb_index = 0; fprintf(mvs, "Mb Modes for Frame %d\n", frame); for (mb_row = 0; mb_row < rows; ++mb_row) { for (mb_col = 0; mb_col < cols; ++mb_col) { fprintf(mvs, "%2d ", mi[mb_index].mbmi.mode); mb_index++; } fprintf(mvs, "\n"); mb_index++; } fprintf(mvs, "\n"); mb_index = 0; fprintf(mvs, "Mb mv ref for Frame %d\n", frame); for (mb_row = 0; mb_row < rows; ++mb_row) { for (mb_col = 0; mb_col < cols; ++mb_col) { fprintf(mvs, "%2d ", mi[mb_index].mbmi.ref_frame); mb_index++; } fprintf(mvs, "\n"); mb_index++; } fprintf(mvs, "\n"); /* print out the macroblock UV modes */ mb_index = 0; fprintf(mvs, "UV Modes for Frame %d\n", frame); for (mb_row = 0; mb_row < rows; ++mb_row) { for (mb_col = 0; mb_col < cols; ++mb_col) { fprintf(mvs, "%2d ", mi[mb_index].mbmi.uv_mode); mb_index++; } mb_index++; fprintf(mvs, "\n"); } fprintf(mvs, "\n"); /* print out the block modes */ fprintf(mvs, "Mbs for Frame %d\n", frame); { int b_row; for (b_row = 0; b_row < 4 * rows; ++b_row) { int b_col; int bindex; for (b_col = 0; b_col < 4 * cols; ++b_col) { mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); bindex = (b_row & 3) * 4 + (b_col & 3); if (mi[mb_index].mbmi.mode == B_PRED) fprintf(mvs, "%2d ", mi[mb_index].bmi[bindex].as_mode); else fprintf(mvs, "xx "); } fprintf(mvs, "\n"); } } fprintf(mvs, "\n"); /* print out the macroblock mvs */ mb_index = 0; fprintf(mvs, "MVs for Frame %d\n", frame); for (mb_row = 0; mb_row < rows; ++mb_row) { for (mb_col = 0; mb_col < cols; ++mb_col) { fprintf(mvs, "%5d:%-5d", mi[mb_index].mbmi.mv.as_mv.row / 2, mi[mb_index].mbmi.mv.as_mv.col / 2); mb_index++; } mb_index++; fprintf(mvs, "\n"); } fprintf(mvs, "\n"); /* print out the block modes */ fprintf(mvs, "MVs for Frame %d\n", frame); { int b_row; for (b_row = 0; b_row < 4 * rows; ++b_row) { int b_col; int bindex; for (b_col = 0; b_col < 4 * cols; ++b_col) { mb_index = (b_row >> 2) * (cols + 1) + (b_col >> 2); bindex = (b_row & 3) * 4 + (b_col & 3); fprintf(mvs, "%3d:%-3d ", mi[mb_index].bmi[bindex].mv.as_mv.row, mi[mb_index].bmi[bindex].mv.as_mv.col); } fprintf(mvs, "\n"); } } fprintf(mvs, "\n"); fclose(mvs); } libvpx-1.8.2/vp8/common/default_coef_probs.h000066400000000000000000000205061357355204000210400ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ #define VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ #ifdef __cplusplus extern "C" { #endif /*Generated file, included by entropy.c*/ static const vp8_prob default_coef_probs [BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES] = { { /* Block Type ( 0 ) */ { /* Coeff Band ( 0 )*/ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 1 )*/ { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 } }, { /* Coeff Band ( 2 )*/ { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 } }, { /* Coeff Band ( 3 )*/ { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 4 )*/ { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 } }, { /* Coeff Band ( 5 )*/ { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 6 )*/ { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 7 )*/ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } }, { /* Block Type ( 1 ) */ { /* Coeff Band ( 0 )*/ { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 } }, { /* Coeff Band ( 1 )*/ { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 } }, { /* Coeff Band ( 2 )*/ { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 } }, { /* Coeff Band ( 3 )*/ { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 } }, { /* Coeff Band ( 4 )*/ { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 } }, { /* Coeff Band ( 5 )*/ { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 } }, { /* Coeff Band ( 6 )*/ { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 } }, { /* Coeff Band ( 7 )*/ { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 } } }, { /* Block Type ( 2 ) */ { /* Coeff Band ( 0 )*/ { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 } }, { /* Coeff Band ( 1 )*/ { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 } }, { /* Coeff Band ( 2 )*/ { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 } }, { /* Coeff Band ( 3 )*/ { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 4 )*/ { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 5 )*/ { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 6 )*/ { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 } }, { /* Coeff Band ( 7 )*/ { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 } } }, { /* Block Type ( 3 ) */ { /* Coeff Band ( 0 )*/ { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 } }, { /* Coeff Band ( 1 )*/ { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 } }, { /* Coeff Band ( 2 )*/ { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 } }, { /* Coeff Band ( 3 )*/ { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 } }, { /* Coeff Band ( 4 )*/ { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 } }, { /* Coeff Band ( 5 )*/ { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 } }, { /* Coeff Band ( 6 )*/ { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 } }, { /* Coeff Band ( 7 )*/ { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 } } } }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_DEFAULT_COEF_PROBS_H_ libvpx-1.8.2/vp8/common/dequantize.c000066400000000000000000000017141357355204000173570ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vp8/common/blockd.h" #include "vpx_mem/vpx_mem.h" void vp8_dequantize_b_c(BLOCKD *d, short *DQC) { int i; short *DQ = d->dqcoeff; short *Q = d->qcoeff; for (i = 0; i < 16; ++i) { DQ[i] = Q[i] * DQC[i]; } } void vp8_dequant_idct_add_c(short *input, short *dq, unsigned char *dest, int stride) { int i; for (i = 0; i < 16; ++i) { input[i] = dq[i] * input[i]; } vp8_short_idct4x4llm_c(input, dest, stride, dest, stride); memset(input, 0, 32); } libvpx-1.8.2/vp8/common/entropy.c000066400000000000000000000127031357355204000167060ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "entropy.h" #include "blockd.h" #include "onyxc_int.h" #include "vpx_mem/vpx_mem.h" #include "coefupdateprobs.h" DECLARE_ALIGNED(16, const unsigned char, vp8_norm[256]) = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]) = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 }; DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]) = { 0, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0 }; DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15, }; DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]) = { 1, 2, 6, 7, 3, 5, 8, 13, 4, 9, 12, 14, 10, 11, 15, 16 }; /* vp8_default_zig_zag_mask generated with: void vp8_init_scan_order_mask() { int i; for (i = 0; i < 16; ++i) { vp8_default_zig_zag_mask[vp8_default_zig_zag1d[i]] = 1 << i; } } */ DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]) = { 1, 2, 32, 64, 4, 16, 128, 4096, 8, 256, 2048, 8192, 512, 1024, 16384, -32768 }; const int vp8_mb_feature_data_bits[MB_LVL_MAX] = { 7, 6 }; /* Array indices are identical to previously-existing CONTEXT_NODE indices */ /* corresponding _CONTEXT_NODEs */ /* clang-format off */ const vp8_tree_index vp8_coef_tree[22] = { -DCT_EOB_TOKEN, 2, /* 0 = EOB */ -ZERO_TOKEN, 4, /* 1 = ZERO */ -ONE_TOKEN, 6, /* 2 = ONE */ 8, 12, /* 3 = LOW_VAL */ -TWO_TOKEN, 10, /* 4 = TWO */ -THREE_TOKEN, -FOUR_TOKEN, /* 5 = THREE */ 14, 16, /* 6 = HIGH_LOW */ -DCT_VAL_CATEGORY1, -DCT_VAL_CATEGORY2, /* 7 = CAT_ONE */ 18, 20, /* 8 = CAT_THREEFOUR */ -DCT_VAL_CATEGORY3, -DCT_VAL_CATEGORY4, /* 9 = CAT_THREE */ -DCT_VAL_CATEGORY5, -DCT_VAL_CATEGORY6 /* 10 = CAT_FIVE */ }; /* clang-format on */ /* vp8_coef_encodings generated with: vp8_tokens_from_tree(vp8_coef_encodings, vp8_coef_tree); */ vp8_token vp8_coef_encodings[MAX_ENTROPY_TOKENS] = { { 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 }, { 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 } }; /* Trees for extra bits. Probabilities are constant and do not depend on previously encoded bits */ static const vp8_prob Pcat1[] = { 159 }; static const vp8_prob Pcat2[] = { 165, 145 }; static const vp8_prob Pcat3[] = { 173, 148, 140 }; static const vp8_prob Pcat4[] = { 176, 155, 140, 135 }; static const vp8_prob Pcat5[] = { 180, 157, 141, 134, 130 }; static const vp8_prob Pcat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; /* tree index tables generated with: void init_bit_tree(vp8_tree_index *p, int n) { int i = 0; while (++i < n) { p[0] = p[1] = i << 1; p += 2; } p[0] = p[1] = 0; } void init_bit_trees() { init_bit_tree(cat1, 1); init_bit_tree(cat2, 2); init_bit_tree(cat3, 3); init_bit_tree(cat4, 4); init_bit_tree(cat5, 5); init_bit_tree(cat6, 11); } */ static const vp8_tree_index cat1[2] = { 0, 0 }; static const vp8_tree_index cat2[4] = { 2, 2, 0, 0 }; static const vp8_tree_index cat3[6] = { 2, 2, 4, 4, 0, 0 }; static const vp8_tree_index cat4[8] = { 2, 2, 4, 4, 6, 6, 0, 0 }; static const vp8_tree_index cat5[10] = { 2, 2, 4, 4, 6, 6, 8, 8, 0, 0 }; static const vp8_tree_index cat6[22] = { 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14, 16, 16, 18, 18, 20, 20, 0, 0 }; const vp8_extra_bit_struct vp8_extra_bits[12] = { { 0, 0, 0, 0 }, { 0, 0, 0, 1 }, { 0, 0, 0, 2 }, { 0, 0, 0, 3 }, { 0, 0, 0, 4 }, { cat1, Pcat1, 1, 5 }, { cat2, Pcat2, 2, 7 }, { cat3, Pcat3, 3, 11 }, { cat4, Pcat4, 4, 19 }, { cat5, Pcat5, 5, 35 }, { cat6, Pcat6, 11, 67 }, { 0, 0, 0, 0 } }; #include "default_coef_probs.h" void vp8_default_coef_probs(VP8_COMMON *pc) { memcpy(pc->fc.coef_probs, default_coef_probs, sizeof(default_coef_probs)); } libvpx-1.8.2/vp8/common/entropy.h000066400000000000000000000076261357355204000167230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ENTROPY_H_ #define VPX_VP8_COMMON_ENTROPY_H_ #include "treecoder.h" #include "blockd.h" #ifdef __cplusplus extern "C" { #endif /* Coefficient token alphabet */ #define ZERO_TOKEN 0 /* 0 Extra Bits 0+0 */ #define ONE_TOKEN 1 /* 1 Extra Bits 0+1 */ #define TWO_TOKEN 2 /* 2 Extra Bits 0+1 */ #define THREE_TOKEN 3 /* 3 Extra Bits 0+1 */ #define FOUR_TOKEN 4 /* 4 Extra Bits 0+1 */ #define DCT_VAL_CATEGORY1 5 /* 5-6 Extra Bits 1+1 */ #define DCT_VAL_CATEGORY2 6 /* 7-10 Extra Bits 2+1 */ #define DCT_VAL_CATEGORY3 7 /* 11-18 Extra Bits 3+1 */ #define DCT_VAL_CATEGORY4 8 /* 19-34 Extra Bits 4+1 */ #define DCT_VAL_CATEGORY5 9 /* 35-66 Extra Bits 5+1 */ #define DCT_VAL_CATEGORY6 10 /* 67+ Extra Bits 11+1 */ #define DCT_EOB_TOKEN 11 /* EOB Extra Bits 0+0 */ #define MAX_ENTROPY_TOKENS 12 #define ENTROPY_NODES 11 extern const vp8_tree_index vp8_coef_tree[]; extern const struct vp8_token_struct vp8_coef_encodings[MAX_ENTROPY_TOKENS]; typedef struct { vp8_tree_p tree; const vp8_prob *prob; int Len; int base_val; } vp8_extra_bit_struct; extern const vp8_extra_bit_struct vp8_extra_bits[12]; /* indexed by token value */ #define PROB_UPDATE_BASELINE_COST 7 #define MAX_PROB 255 #define DCT_MAX_VALUE 2048 /* Coefficients are predicted via a 3-dimensional probability table. */ /* Outside dimension. 0 = Y no DC, 1 = Y2, 2 = UV, 3 = Y with DC */ #define BLOCK_TYPES 4 /* Middle dimension is a coarsening of the coefficient's position within the 4x4 DCT. */ #define COEF_BANDS 8 extern DECLARE_ALIGNED(16, const unsigned char, vp8_coef_bands[16]); /* Inside dimension is 3-valued measure of nearby complexity, that is, the extent to which nearby coefficients are nonzero. For the first coefficient (DC, unless block type is 0), we look at the (already encoded) blocks above and to the left of the current block. The context index is then the number (0,1,or 2) of these blocks having nonzero coefficients. After decoding a coefficient, the measure is roughly the size of the most recently decoded coefficient (0 for 0, 1 for 1, 2 for >1). Note that the intuitive meaning of this measure changes as coefficients are decoded, e.g., prior to the first token, a zero means that my neighbors are empty while, after the first token, because of the use of end-of-block, a zero means we just decoded a zero and hence guarantees that a non-zero coefficient will appear later in this block. However, this shift in meaning is perfectly OK because our context depends also on the coefficient band (and since zigzag positions 0, 1, and 2 are in distinct bands). */ /*# define DC_TOKEN_CONTEXTS 3*/ /* 00, 0!0, !0!0 */ #define PREV_COEF_CONTEXTS 3 extern DECLARE_ALIGNED(16, const unsigned char, vp8_prev_token_class[MAX_ENTROPY_TOKENS]); extern const vp8_prob vp8_coef_update_probs[BLOCK_TYPES][COEF_BANDS] [PREV_COEF_CONTEXTS][ENTROPY_NODES]; struct VP8Common; void vp8_default_coef_probs(struct VP8Common *); extern DECLARE_ALIGNED(16, const int, vp8_default_zig_zag1d[16]); extern DECLARE_ALIGNED(16, const short, vp8_default_inv_zig_zag[16]); extern DECLARE_ALIGNED(16, const short, vp8_default_zig_zag_mask[16]); extern const int vp8_mb_feature_data_bits[MB_LVL_MAX]; void vp8_coef_tree_initialize(void); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_ENTROPY_H_ libvpx-1.8.2/vp8/common/entropymode.c000066400000000000000000000071221357355204000175520ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #define USE_PREBUILT_TABLES #include "entropymode.h" #include "entropy.h" #include "vpx_mem/vpx_mem.h" #include "vp8_entropymodedata.h" int vp8_mv_cont(const int_mv *l, const int_mv *a) { int lez = (l->as_int == 0); int aez = (a->as_int == 0); int lea = (l->as_int == a->as_int); if (lea && lez) return SUBMVREF_LEFT_ABOVE_ZED; if (lea) return SUBMVREF_LEFT_ABOVE_SAME; if (aez) return SUBMVREF_ABOVE_ZED; if (lez) return SUBMVREF_LEFT_ZED; return SUBMVREF_NORMAL; } static const vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1] = { 180, 162, 25 }; const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1] = { { 147, 136, 18 }, { 106, 145, 1 }, { 179, 121, 1 }, { 223, 1, 34 }, { 208, 1, 1 } }; const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS] = { { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 }, { 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1 }, { 0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; const int vp8_mbsplit_count[VP8_NUMMBSPLITS] = { 2, 2, 4, 16 }; const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1] = { 110, 111, 150 }; /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ const vp8_tree_index vp8_bmode_tree[18] = /* INTRAMODECONTEXTNODE value */ { -B_DC_PRED, 2, /* 0 = DC_NODE */ -B_TM_PRED, 4, /* 1 = TM_NODE */ -B_VE_PRED, 6, /* 2 = VE_NODE */ 8, 12, /* 3 = COM_NODE */ -B_HE_PRED, 10, /* 4 = HE_NODE */ -B_RD_PRED, -B_VR_PRED, /* 5 = RD_NODE */ -B_LD_PRED, 14, /* 6 = LD_NODE */ -B_VL_PRED, 16, /* 7 = VL_NODE */ -B_HD_PRED, -B_HU_PRED /* 8 = HD_NODE */ }; /* Again, these trees use the same probability indices as their explicitly-programmed predecessors. */ const vp8_tree_index vp8_ymode_tree[8] = { -DC_PRED, 2, 4, 6, -V_PRED, -H_PRED, -TM_PRED, -B_PRED }; const vp8_tree_index vp8_kf_ymode_tree[8] = { -B_PRED, 2, 4, 6, -DC_PRED, -V_PRED, -H_PRED, -TM_PRED }; const vp8_tree_index vp8_uv_mode_tree[6] = { -DC_PRED, 2, -V_PRED, 4, -H_PRED, -TM_PRED }; const vp8_tree_index vp8_mbsplit_tree[6] = { -3, 2, -2, 4, -0, -1 }; const vp8_tree_index vp8_mv_ref_tree[8] = { -ZEROMV, 2, -NEARESTMV, 4, -NEARMV, 6, -NEWMV, -SPLITMV }; const vp8_tree_index vp8_sub_mv_ref_tree[6] = { -LEFT4X4, 2, -ABOVE4X4, 4, -ZERO4X4, -NEW4X4 }; const vp8_tree_index vp8_small_mvtree[14] = { 2, 8, 4, 6, -0, -1, -2, -3, 10, 12, -4, -5, -6, -7 }; void vp8_init_mbmode_probs(VP8_COMMON *x) { memcpy(x->fc.ymode_prob, vp8_ymode_prob, sizeof(vp8_ymode_prob)); memcpy(x->fc.uv_mode_prob, vp8_uv_mode_prob, sizeof(vp8_uv_mode_prob)); memcpy(x->fc.sub_mv_ref_prob, sub_mv_ref_prob, sizeof(sub_mv_ref_prob)); } void vp8_default_bmode_probs(vp8_prob dest[VP8_BINTRAMODES - 1]) { memcpy(dest, vp8_bmode_prob, sizeof(vp8_bmode_prob)); } libvpx-1.8.2/vp8/common/entropymode.h000066400000000000000000000056361357355204000175670ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ENTROPYMODE_H_ #define VPX_VP8_COMMON_ENTROPYMODE_H_ #include "onyxc_int.h" #include "treecoder.h" #ifdef __cplusplus extern "C" { #endif typedef enum { SUBMVREF_NORMAL, SUBMVREF_LEFT_ZED, SUBMVREF_ABOVE_ZED, SUBMVREF_LEFT_ABOVE_SAME, SUBMVREF_LEFT_ABOVE_ZED } sumvfref_t; typedef int vp8_mbsplit[16]; #define VP8_NUMMBSPLITS 4 extern const vp8_mbsplit vp8_mbsplits[VP8_NUMMBSPLITS]; extern const int vp8_mbsplit_count[VP8_NUMMBSPLITS]; /* # of subsets */ extern const vp8_prob vp8_mbsplit_probs[VP8_NUMMBSPLITS - 1]; extern int vp8_mv_cont(const int_mv *l, const int_mv *a); #define SUBMVREF_COUNT 5 extern const vp8_prob vp8_sub_mv_ref_prob2[SUBMVREF_COUNT][VP8_SUBMVREFS - 1]; extern const unsigned int vp8_kf_default_bmode_counts[VP8_BINTRAMODES] [VP8_BINTRAMODES] [VP8_BINTRAMODES]; extern const vp8_tree_index vp8_bmode_tree[]; extern const vp8_tree_index vp8_ymode_tree[]; extern const vp8_tree_index vp8_kf_ymode_tree[]; extern const vp8_tree_index vp8_uv_mode_tree[]; extern const vp8_tree_index vp8_mbsplit_tree[]; extern const vp8_tree_index vp8_mv_ref_tree[]; extern const vp8_tree_index vp8_sub_mv_ref_tree[]; extern const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES]; extern const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES]; extern const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES]; extern const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES]; extern const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS]; /* Inter mode values do not start at zero */ extern const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS]; extern const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS]; extern const vp8_tree_index vp8_small_mvtree[]; extern const struct vp8_token_struct vp8_small_mvencodings[8]; /* Key frame default mode probs */ extern const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES] [VP8_BINTRAMODES - 1]; extern const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1]; extern const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1]; void vp8_init_mbmode_probs(VP8_COMMON *x); void vp8_default_bmode_probs(vp8_prob dest[VP8_BINTRAMODES - 1]); void vp8_kf_default_bmode_probs( vp8_prob dest[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_ENTROPYMODE_H_ libvpx-1.8.2/vp8/common/entropymv.c000066400000000000000000000026431357355204000172530ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "entropymv.h" /* clang-format off */ const MV_CONTEXT vp8_mv_update_probs[2] = { { { 237, 246, 253, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 } }, { { 231, 243, 245, 253, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 } } }; /* clang-format on */ const MV_CONTEXT vp8_default_mv_context[2] = { { { /* row */ 162, /* is short */ 128, /* sign */ 225, 146, 172, 147, 214, 39, 156, /* short tree */ 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 /* long bits */ } }, { { /* same for column */ 164, /* is short */ 128, /**/ 204, 170, 119, 235, 140, 230, 228, /**/ 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 /* long bits */ } } }; libvpx-1.8.2/vp8/common/entropymv.h000066400000000000000000000030451357355204000172550ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ENTROPYMV_H_ #define VPX_VP8_COMMON_ENTROPYMV_H_ #include "treecoder.h" #ifdef __cplusplus extern "C" { #endif enum { mv_max = 1023, /* max absolute value of a MV component */ MVvals = (2 * mv_max) + 1, /* # possible values "" */ mvfp_max = 255, /* max absolute value of a full pixel MV component */ MVfpvals = (2 * mvfp_max) + 1, /* # possible full pixel MV values */ mvlong_width = 10, /* Large MVs have 9 bit magnitudes */ mvnum_short = 8, /* magnitudes 0 through 7 */ /* probability offsets for coding each MV component */ mvpis_short = 0, /* short (<= 7) vs long (>= 8) */ MVPsign, /* sign for non-zero */ MVPshort, /* 8 short values = 7-position tree */ MVPbits = MVPshort + mvnum_short - 1, /* mvlong_width long value bits */ MVPcount = MVPbits + mvlong_width /* (with independent probabilities) */ }; typedef struct mv_context { vp8_prob prob[MVPcount]; /* often come in row, col pairs */ } MV_CONTEXT; extern const MV_CONTEXT vp8_mv_update_probs[2], vp8_default_mv_context[2]; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_ENTROPYMV_H_ libvpx-1.8.2/vp8/common/extend.c000066400000000000000000000121501357355204000164710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "extend.h" #include "vpx_mem/vpx_mem.h" static void copy_and_extend_plane(unsigned char *s, /* source */ int sp, /* source pitch */ unsigned char *d, /* destination */ int dp, /* destination pitch */ int h, /* height */ int w, /* width */ int et, /* extend top border */ int el, /* extend left border */ int eb, /* extend bottom border */ int er) { /* extend right border */ int i; unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr1, *dest_ptr2; int linesize; /* copy the left and right most columns out */ src_ptr1 = s; src_ptr2 = s + w - 1; dest_ptr1 = d - el; dest_ptr2 = d + w; for (i = 0; i < h; ++i) { memset(dest_ptr1, src_ptr1[0], el); memcpy(dest_ptr1 + el, src_ptr1, w); memset(dest_ptr2, src_ptr2[0], er); src_ptr1 += sp; src_ptr2 += sp; dest_ptr1 += dp; dest_ptr2 += dp; } /* Now copy the top and bottom lines into each line of the respective * borders */ src_ptr1 = d - el; src_ptr2 = d + dp * (h - 1) - el; dest_ptr1 = d + dp * (-et) - el; dest_ptr2 = d + dp * (h)-el; linesize = el + er + w; for (i = 0; i < et; ++i) { memcpy(dest_ptr1, src_ptr1, linesize); dest_ptr1 += dp; } for (i = 0; i < eb; ++i) { memcpy(dest_ptr2, src_ptr2, linesize); dest_ptr2 += dp; } } void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { int et = dst->border; int el = dst->border; int eb = dst->border + dst->y_height - src->y_height; int er = dst->border + dst->y_width - src->y_width; copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, src->y_height, src->y_width, et, el, eb, er); et = dst->border >> 1; el = dst->border >> 1; eb = (dst->border >> 1) + dst->uv_height - src->uv_height; er = (dst->border >> 1) + dst->uv_width - src->uv_width; copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, src->uv_height, src->uv_width, et, el, eb, er); copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, src->uv_height, src->uv_width, et, el, eb, er); } void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw) { int et = dst->border; int el = dst->border; int eb = dst->border + dst->y_height - src->y_height; int er = dst->border + dst->y_width - src->y_width; int src_y_offset = srcy * src->y_stride + srcx; int dst_y_offset = srcy * dst->y_stride + srcx; int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); /* If the side is not touching the bounder then don't extend. */ if (srcy) et = 0; if (srcx) el = 0; if (srcy + srch != src->y_height) eb = 0; if (srcx + srcw != src->y_width) er = 0; copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, dst->y_buffer + dst_y_offset, dst->y_stride, srch, srcw, et, el, eb, er); et = (et + 1) >> 1; el = (el + 1) >> 1; eb = (eb + 1) >> 1; er = (er + 1) >> 1; srch = (srch + 1) >> 1; srcw = (srcw + 1) >> 1; copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, dst->u_buffer + dst_uv_offset, dst->uv_stride, srch, srcw, et, el, eb, er); copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, dst->v_buffer + dst_uv_offset, dst->uv_stride, srch, srcw, et, el, eb, er); } /* note the extension is only for the last row, for intra prediction purpose */ void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr) { int i; YPtr += ybf->y_stride * 14; UPtr += ybf->uv_stride * 6; VPtr += ybf->uv_stride * 6; for (i = 0; i < 4; ++i) { YPtr[i] = YPtr[-1]; UPtr[i] = UPtr[-1]; VPtr[i] = VPtr[-1]; } YPtr += ybf->y_stride; UPtr += ybf->uv_stride; VPtr += ybf->uv_stride; for (i = 0; i < 4; ++i) { YPtr[i] = YPtr[-1]; UPtr[i] = UPtr[-1]; VPtr[i] = VPtr[-1]; } } libvpx-1.8.2/vp8/common/extend.h000066400000000000000000000021111357355204000164720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_EXTEND_H_ #define VPX_VP8_COMMON_EXTEND_H_ #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif void vp8_extend_mb_row(YV12_BUFFER_CONFIG *ybf, unsigned char *YPtr, unsigned char *UPtr, unsigned char *VPtr); void vp8_copy_and_extend_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst); void vp8_copy_and_extend_frame_with_rect(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_EXTEND_H_ libvpx-1.8.2/vp8/common/filter.c000066400000000000000000000343061357355204000164760ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vp8/common/filter.h" DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]) = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 } }; DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]) = { { 0, 0, 128, 0, 0, 0 }, /* note that 1/8 pel positions are just as per alpha -0.5 bicubic */ { 0, -6, 123, 12, -1, 0 }, { 2, -11, 108, 36, -8, 1 }, /* New 1/4 pel 6 tap filter */ { 0, -9, 93, 50, -6, 0 }, { 3, -16, 77, 77, -16, 3 }, /* New 1/2 pel 6 tap filter */ { 0, -6, 50, 93, -9, 0 }, { 1, -8, 36, 108, -11, 2 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0 }, }; static void filter_block2d_first_pass(unsigned char *src_ptr, int *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter) { unsigned int i, j; int Temp; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ((int)src_ptr[pixel_step] * vp8_filter[3]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; if (Temp < 0) { Temp = 0; } else if (Temp > 255) { Temp = 255; } output_ptr[j] = Temp; src_ptr++; } /* Next row... */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } } static void filter_block2d_second_pass(int *src_ptr, unsigned char *output_ptr, int output_pitch, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter) { unsigned int i, j; int Temp; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { /* Apply filter */ Temp = ((int)src_ptr[-2 * (int)pixel_step] * vp8_filter[0]) + ((int)src_ptr[-1 * (int)pixel_step] * vp8_filter[1]) + ((int)src_ptr[0] * vp8_filter[2]) + ((int)src_ptr[pixel_step] * vp8_filter[3]) + ((int)src_ptr[2 * pixel_step] * vp8_filter[4]) + ((int)src_ptr[3 * pixel_step] * vp8_filter[5]) + (VP8_FILTER_WEIGHT >> 1); /* Rounding */ /* Normalize back to 0-255 */ Temp = Temp >> VP8_FILTER_SHIFT; if (Temp < 0) { Temp = 0; } else if (Temp > 255) { Temp = 255; } output_ptr[j] = (unsigned char)Temp; src_ptr++; } /* Start next row */ src_ptr += src_pixels_per_line - output_width; output_ptr += output_pitch; } } static void filter_block2d(unsigned char *src_ptr, unsigned char *output_ptr, unsigned int src_pixels_per_line, int output_pitch, const short *HFilter, const short *VFilter) { int FData[9 * 4]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 4, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 8, output_ptr, output_pitch, 4, 4, 4, 4, VFilter); } void vp8_sixtap_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ filter_block2d(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter); } void vp8_sixtap_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; int FData[13 * 16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 13, 8, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 8, 8, VFilter); } void vp8_sixtap_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; int FData[13 * 16]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 9, 8, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 16, dst_ptr, dst_pitch, 8, 8, 4, 8, VFilter); } void vp8_sixtap_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; int FData[21 * 24]; /* Temp data buffer used in filtering */ HFilter = vp8_sub_pel_filters[xoffset]; /* 6 tap */ VFilter = vp8_sub_pel_filters[yoffset]; /* 6 tap */ /* First filter 1-D horizontally... */ filter_block2d_first_pass(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 1, 21, 16, HFilter); /* then filter verticaly... */ filter_block2d_second_pass(FData + 32, dst_ptr, dst_pitch, 16, 16, 16, 16, VFilter); } /**************************************************************************** * * ROUTINE : filter_block2d_bil_first_pass * * INPUTS : UINT8 *src_ptr : Pointer to source block. * UINT32 src_stride : Stride of source block. * UINT32 height : Block height. * UINT32 width : Block width. * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * * OUTPUTS : INT32 *dst_ptr : Pointer to filtered block. * * RETURNS : void * * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block * in the horizontal direction to produce the filtered output * block. Used to implement first-pass of 2-D separable filter. * * SPECIAL NOTES : Produces INT32 output to retain precision for next pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. * ****************************************************************************/ static void filter_block2d_bil_first_pass( unsigned char *src_ptr, unsigned short *dst_ptr, unsigned int src_stride, unsigned int height, unsigned int width, const short *vp8_filter) { unsigned int i, j; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { /* Apply bilinear filter */ dst_ptr[j] = (((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[1] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2)) >> VP8_FILTER_SHIFT; src_ptr++; } /* Next row... */ src_ptr += src_stride - width; dst_ptr += width; } } /**************************************************************************** * * ROUTINE : filter_block2d_bil_second_pass * * INPUTS : INT32 *src_ptr : Pointer to source block. * UINT32 dst_pitch : Destination block pitch. * UINT32 height : Block height. * UINT32 width : Block width. * INT32 *vp8_filter : Array of 2 bi-linear filter taps. * * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. * * RETURNS : void * * FUNCTION : Applies a 1-D 2-tap bi-linear filter to the source block * in the vertical direction to produce the filtered output * block. Used to implement second-pass of 2-D separable * filter. * * SPECIAL NOTES : Requires 32-bit input as produced by * filter_block2d_bil_first_pass. * Two filter taps should sum to VP8_FILTER_WEIGHT. * ****************************************************************************/ static void filter_block2d_bil_second_pass(unsigned short *src_ptr, unsigned char *dst_ptr, int dst_pitch, unsigned int height, unsigned int width, const short *vp8_filter) { unsigned int i, j; int Temp; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { /* Apply filter */ Temp = ((int)src_ptr[0] * vp8_filter[0]) + ((int)src_ptr[width] * vp8_filter[1]) + (VP8_FILTER_WEIGHT / 2); dst_ptr[j] = (unsigned int)(Temp >> VP8_FILTER_SHIFT); src_ptr++; } /* Next row... */ dst_ptr += dst_pitch; } } /**************************************************************************** * * ROUTINE : filter_block2d_bil * * INPUTS : UINT8 *src_ptr : Pointer to source block. * UINT32 src_pitch : Stride of source block. * UINT32 dst_pitch : Stride of destination block. * INT32 *HFilter : Array of 2 horizontal filter * taps. * INT32 *VFilter : Array of 2 vertical filter taps. * INT32 Width : Block width * INT32 Height : Block height * * OUTPUTS : UINT16 *dst_ptr : Pointer to filtered block. * * RETURNS : void * * FUNCTION : 2-D filters an input block by applying a 2-tap * bi-linear filter horizontally followed by a 2-tap * bi-linear filter vertically on the result. * * SPECIAL NOTES : The largest block size can be handled here is 16x16 * ****************************************************************************/ static void filter_block2d_bil(unsigned char *src_ptr, unsigned char *dst_ptr, unsigned int src_pitch, unsigned int dst_pitch, const short *HFilter, const short *VFilter, int Width, int Height) { unsigned short FData[17 * 16]; /* Temp data buffer used in filtering */ /* First filter 1-D horizontally... */ filter_block2d_bil_first_pass(src_ptr, FData, src_pitch, Height + 1, Width, HFilter); /* then 1-D vertically... */ filter_block2d_bil_second_pass(FData, dst_ptr, dst_pitch, Height, Width, VFilter); } void vp8_bilinear_predict4x4_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; // This represents a copy and is not required to be handled by optimizations. assert((xoffset | yoffset) != 0); HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 4, 4); } void vp8_bilinear_predict8x8_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; assert((xoffset | yoffset) != 0); HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 8); } void vp8_bilinear_predict8x4_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; assert((xoffset | yoffset) != 0); HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 8, 4); } void vp8_bilinear_predict16x16_c(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { const short *HFilter; const short *VFilter; assert((xoffset | yoffset) != 0); HFilter = vp8_bilinear_filters[xoffset]; VFilter = vp8_bilinear_filters[yoffset]; filter_block2d_bil(src_ptr, dst_ptr, src_pixels_per_line, dst_pitch, HFilter, VFilter, 16, 16); } libvpx-1.8.2/vp8/common/filter.h000066400000000000000000000015241357355204000164770ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_FILTER_H_ #define VPX_VP8_COMMON_FILTER_H_ #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif #define BLOCK_HEIGHT_WIDTH 4 #define VP8_FILTER_WEIGHT 128 #define VP8_FILTER_SHIFT 7 extern DECLARE_ALIGNED(16, const short, vp8_bilinear_filters[8][2]); extern DECLARE_ALIGNED(16, const short, vp8_sub_pel_filters[8][6]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_FILTER_H_ libvpx-1.8.2/vp8/common/findnearmv.c000066400000000000000000000125351357355204000173420ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "findnearmv.h" const unsigned char vp8_mbsplit_offset[4][16] = { { 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 2, 8, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; /* Predict motion vectors using those from already-decoded nearby blocks. Note that we only consider one 4x4 subblock from each candidate 16x16 macroblock. */ void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, int_mv *nearby, int_mv *best_mv, int near_mv_ref_cnts[4], int refframe, int *ref_frame_sign_bias) { const MODE_INFO *above = here - xd->mode_info_stride; const MODE_INFO *left = here - 1; const MODE_INFO *aboveleft = above - 1; int_mv near_mvs[4]; int_mv *mv = near_mvs; int *cntx = near_mv_ref_cnts; enum { CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; /* Zero accumulators */ mv[0].as_int = mv[1].as_int = mv[2].as_int = 0; near_mv_ref_cnts[0] = near_mv_ref_cnts[1] = near_mv_ref_cnts[2] = near_mv_ref_cnts[3] = 0; /* Process above */ if (above->mbmi.ref_frame != INTRA_FRAME) { if (above->mbmi.mv.as_int) { (++mv)->as_int = above->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, mv, ref_frame_sign_bias); ++cntx; } *cntx += 2; } /* Process left */ if (left->mbmi.ref_frame != INTRA_FRAME) { if (left->mbmi.mv.as_int) { int_mv this_mv; this_mv.as_int = left->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != mv->as_int) { (++mv)->as_int = this_mv.as_int; ++cntx; } *cntx += 2; } else { near_mv_ref_cnts[CNT_INTRA] += 2; } } /* Process above left */ if (aboveleft->mbmi.ref_frame != INTRA_FRAME) { if (aboveleft->mbmi.mv.as_int) { int_mv this_mv; this_mv.as_int = aboveleft->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != mv->as_int) { (++mv)->as_int = this_mv.as_int; ++cntx; } *cntx += 1; } else { near_mv_ref_cnts[CNT_INTRA] += 1; } } /* If we have three distinct MV's ... */ if (near_mv_ref_cnts[CNT_SPLITMV]) { /* See if above-left MV can be merged with NEAREST */ if (mv->as_int == near_mvs[CNT_NEAREST].as_int) near_mv_ref_cnts[CNT_NEAREST] += 1; } near_mv_ref_cnts[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) + (left->mbmi.mode == SPLITMV)) * 2 + (aboveleft->mbmi.mode == SPLITMV); /* Swap near and nearest if necessary */ if (near_mv_ref_cnts[CNT_NEAR] > near_mv_ref_cnts[CNT_NEAREST]) { int tmp; tmp = near_mv_ref_cnts[CNT_NEAREST]; near_mv_ref_cnts[CNT_NEAREST] = near_mv_ref_cnts[CNT_NEAR]; near_mv_ref_cnts[CNT_NEAR] = tmp; tmp = near_mvs[CNT_NEAREST].as_int; near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; near_mvs[CNT_NEAR].as_int = tmp; } /* Use near_mvs[0] to store the "best" MV */ if (near_mv_ref_cnts[CNT_NEAREST] >= near_mv_ref_cnts[CNT_INTRA]) { near_mvs[CNT_INTRA] = near_mvs[CNT_NEAREST]; } /* Set up return values */ best_mv->as_int = near_mvs[0].as_int; nearest->as_int = near_mvs[CNT_NEAREST].as_int; nearby->as_int = near_mvs[CNT_NEAR].as_int; } static void invert_and_clamp_mvs(int_mv *inv, int_mv *src, MACROBLOCKD *xd) { inv->as_mv.row = src->as_mv.row * -1; inv->as_mv.col = src->as_mv.col * -1; vp8_clamp_mv2(inv, xd); vp8_clamp_mv2(src, xd); } int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here, int_mv mode_mv_sb[2][MB_MODE_COUNT], int_mv best_mv_sb[2], int cnt[4], int refframe, int *ref_frame_sign_bias) { int sign_bias = ref_frame_sign_bias[refframe]; vp8_find_near_mvs(xd, here, &mode_mv_sb[sign_bias][NEARESTMV], &mode_mv_sb[sign_bias][NEARMV], &best_mv_sb[sign_bias], cnt, refframe, ref_frame_sign_bias); invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARESTMV], &mode_mv_sb[sign_bias][NEARESTMV], xd); invert_and_clamp_mvs(&mode_mv_sb[!sign_bias][NEARMV], &mode_mv_sb[sign_bias][NEARMV], xd); invert_and_clamp_mvs(&best_mv_sb[!sign_bias], &best_mv_sb[sign_bias], xd); return sign_bias; } vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1], const int near_mv_ref_ct[4]) { p[0] = vp8_mode_contexts[near_mv_ref_ct[0]][0]; p[1] = vp8_mode_contexts[near_mv_ref_ct[1]][1]; p[2] = vp8_mode_contexts[near_mv_ref_ct[2]][2]; p[3] = vp8_mode_contexts[near_mv_ref_ct[3]][3]; /* p[3] = vp8_mode_contexts[near_mv_ref_ct[1] + near_mv_ref_ct[2] + near_mv_ref_ct[3]][3]; */ return p; } libvpx-1.8.2/vp8/common/findnearmv.h000066400000000000000000000121161357355204000173420ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_FINDNEARMV_H_ #define VPX_VP8_COMMON_FINDNEARMV_H_ #include "./vpx_config.h" #include "mv.h" #include "blockd.h" #include "modecont.h" #include "treecoder.h" #ifdef __cplusplus extern "C" { #endif static INLINE void mv_bias(int refmb_ref_frame_sign_bias, int refframe, int_mv *mvp, const int *ref_frame_sign_bias) { if (refmb_ref_frame_sign_bias != ref_frame_sign_bias[refframe]) { mvp->as_mv.row *= -1; mvp->as_mv.col *= -1; } } #define LEFT_TOP_MARGIN (16 << 3) #define RIGHT_BOTTOM_MARGIN (16 << 3) static INLINE void vp8_clamp_mv2(int_mv *mv, const MACROBLOCKD *xd) { if (mv->as_mv.col < (xd->mb_to_left_edge - LEFT_TOP_MARGIN)) { mv->as_mv.col = xd->mb_to_left_edge - LEFT_TOP_MARGIN; } else if (mv->as_mv.col > xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN) { mv->as_mv.col = xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN; } if (mv->as_mv.row < (xd->mb_to_top_edge - LEFT_TOP_MARGIN)) { mv->as_mv.row = xd->mb_to_top_edge - LEFT_TOP_MARGIN; } else if (mv->as_mv.row > xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN) { mv->as_mv.row = xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN; } } static INLINE void vp8_clamp_mv(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge, int mb_to_top_edge, int mb_to_bottom_edge) { mv->as_mv.col = (mv->as_mv.col < mb_to_left_edge) ? mb_to_left_edge : mv->as_mv.col; mv->as_mv.col = (mv->as_mv.col > mb_to_right_edge) ? mb_to_right_edge : mv->as_mv.col; mv->as_mv.row = (mv->as_mv.row < mb_to_top_edge) ? mb_to_top_edge : mv->as_mv.row; mv->as_mv.row = (mv->as_mv.row > mb_to_bottom_edge) ? mb_to_bottom_edge : mv->as_mv.row; } static INLINE unsigned int vp8_check_mv_bounds(int_mv *mv, int mb_to_left_edge, int mb_to_right_edge, int mb_to_top_edge, int mb_to_bottom_edge) { unsigned int need_to_clamp; need_to_clamp = (mv->as_mv.col < mb_to_left_edge); need_to_clamp |= (mv->as_mv.col > mb_to_right_edge); need_to_clamp |= (mv->as_mv.row < mb_to_top_edge); need_to_clamp |= (mv->as_mv.row > mb_to_bottom_edge); return need_to_clamp; } void vp8_find_near_mvs(MACROBLOCKD *xd, const MODE_INFO *here, int_mv *nearest, int_mv *nearby, int_mv *best_mv, int near_mv_ref_cnts[4], int refframe, int *ref_frame_sign_bias); int vp8_find_near_mvs_bias(MACROBLOCKD *xd, const MODE_INFO *here, int_mv mode_mv_sb[2][MB_MODE_COUNT], int_mv best_mv_sb[2], int cnt[4], int refframe, int *ref_frame_sign_bias); vp8_prob *vp8_mv_ref_probs(vp8_prob p[VP8_MVREFS - 1], const int near_mv_ref_ct[4]); extern const unsigned char vp8_mbsplit_offset[4][16]; static INLINE uint32_t left_block_mv(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { /* On L edge, get from MB to left of us */ --cur_mb; if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int; b += 4; } return (cur_mb->bmi + b - 1)->mv.as_int; } static INLINE uint32_t above_block_mv(const MODE_INFO *cur_mb, int b, int mi_stride) { if (!(b >> 2)) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; if (cur_mb->mbmi.mode != SPLITMV) return cur_mb->mbmi.mv.as_int; b += 16; } return (cur_mb->bmi + (b - 4))->mv.as_int; } static INLINE B_PREDICTION_MODE left_block_mode(const MODE_INFO *cur_mb, int b) { if (!(b & 3)) { /* On L edge, get from MB to left of us */ --cur_mb; switch (cur_mb->mbmi.mode) { case B_PRED: return (cur_mb->bmi + b + 3)->as_mode; case DC_PRED: return B_DC_PRED; case V_PRED: return B_VE_PRED; case H_PRED: return B_HE_PRED; case TM_PRED: return B_TM_PRED; default: return B_DC_PRED; } } return (cur_mb->bmi + b - 1)->as_mode; } static INLINE B_PREDICTION_MODE above_block_mode(const MODE_INFO *cur_mb, int b, int mi_stride) { if (!(b >> 2)) { /* On top edge, get from MB above us */ cur_mb -= mi_stride; switch (cur_mb->mbmi.mode) { case B_PRED: return (cur_mb->bmi + b + 12)->as_mode; case DC_PRED: return B_DC_PRED; case V_PRED: return B_VE_PRED; case H_PRED: return B_HE_PRED; case TM_PRED: return B_TM_PRED; default: return B_DC_PRED; } } return (cur_mb->bmi + b - 4)->as_mode; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_FINDNEARMV_H_ libvpx-1.8.2/vp8/common/generic/000077500000000000000000000000001357355204000164535ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/generic/systemdependent.c000066400000000000000000000046721357355204000220430ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #if VPX_ARCH_ARM #include "vpx_ports/arm.h" #elif VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" #elif VPX_ARCH_PPC #include "vpx_ports/ppc.h" #endif #include "vp8/common/onyxc_int.h" #include "vp8/common/systemdependent.h" #if CONFIG_MULTITHREAD #if HAVE_UNISTD_H && !defined(__OS2__) #include #elif defined(_WIN32) #include typedef void(WINAPI *PGNSI)(LPSYSTEM_INFO); #elif defined(__OS2__) #define INCL_DOS #define INCL_DOSSPINLOCK #include #endif #endif #if CONFIG_MULTITHREAD static int get_cpu_count() { int core_count = 16; #if HAVE_UNISTD_H && !defined(__OS2__) #if defined(_SC_NPROCESSORS_ONLN) core_count = (int)sysconf(_SC_NPROCESSORS_ONLN); #elif defined(_SC_NPROC_ONLN) core_count = (int)sysconf(_SC_NPROC_ONLN); #endif #elif defined(_WIN32) { #if _WIN32_WINNT >= 0x0501 SYSTEM_INFO sysinfo; GetNativeSystemInfo(&sysinfo); #else PGNSI pGNSI; SYSTEM_INFO sysinfo; /* Call GetNativeSystemInfo if supported or * GetSystemInfo otherwise. */ pGNSI = (PGNSI)GetProcAddress(GetModuleHandle(TEXT("kernel32.dll")), "GetNativeSystemInfo"); if (pGNSI != NULL) pGNSI(&sysinfo); else GetSystemInfo(&sysinfo); #endif core_count = (int)sysinfo.dwNumberOfProcessors; } #elif defined(__OS2__) { ULONG proc_id; ULONG status; core_count = 0; for (proc_id = 1;; ++proc_id) { if (DosGetProcessorStatus(proc_id, &status)) break; if (status == PROC_ONLINE) core_count++; } } #else /* other platforms */ #endif return core_count > 0 ? core_count : 1; } #endif void vp8_machine_specific_config(VP8_COMMON *ctx) { #if CONFIG_MULTITHREAD ctx->processor_core_count = get_cpu_count(); #endif /* CONFIG_MULTITHREAD */ #if VPX_ARCH_ARM ctx->cpu_caps = arm_cpu_caps(); #elif VPX_ARCH_X86 || VPX_ARCH_X86_64 ctx->cpu_caps = x86_simd_caps(); #elif VPX_ARCH_PPC ctx->cpu_caps = ppc_simd_caps(); #else // generic-gnu targets. ctx->cpu_caps = 0; #endif } libvpx-1.8.2/vp8/common/header.h000066400000000000000000000021401357355204000164350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_HEADER_H_ #define VPX_VP8_COMMON_HEADER_H_ #ifdef __cplusplus extern "C" { #endif /* 24 bits total */ typedef struct { unsigned int type : 1; unsigned int version : 3; unsigned int show_frame : 1; /* Allow 2^20 bytes = 8 megabits for first partition */ unsigned int first_partition_length_in_bytes : 19; #ifdef PACKET_TESTING unsigned int frame_number; unsigned int update_gold : 1; unsigned int uses_gold : 1; unsigned int update_last : 1; unsigned int uses_last : 1; #endif } VP8_HEADER; #ifdef PACKET_TESTING #define VP8_HEADER_SIZE 8 #else #define VP8_HEADER_SIZE 3 #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_HEADER_H_ libvpx-1.8.2/vp8/common/idct_blk.c000066400000000000000000000035561357355204000167670ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" void vp8_dequant_idct_add_y_block_c(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { int i, j; for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (*eobs++ > 1) { vp8_dequant_idct_add_c(q, dq, dst, stride); } else { vp8_dc_only_idct_add_c(q[0] * dq[0], dst, stride, dst, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst += 4; } dst += 4 * stride - 16; } } void vp8_dequant_idct_add_uv_block_c(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) { vp8_dequant_idct_add_c(q, dq, dst_u, stride); } else { vp8_dc_only_idct_add_c(q[0] * dq[0], dst_u, stride, dst_u, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst_u += 4; } dst_u += 4 * stride - 8; } for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) { vp8_dequant_idct_add_c(q, dq, dst_v, stride); } else { vp8_dc_only_idct_add_c(q[0] * dq[0], dst_v, stride, dst_v, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst_v += 4; } dst_v += 4 * stride - 8; } } libvpx-1.8.2/vp8/common/idctllm.c000066400000000000000000000102411357355204000166310ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" /**************************************************************************** * Notes: * * This implementation makes use of 16 bit fixed point verio of two multiply * constants: * 1. sqrt(2) * cos (pi/8) * 2. sqrt(2) * sin (pi/8) * Becuase the first constant is bigger than 1, to maintain the same 16 bit * fixed point precision as the second one, we use a trick of * x * a = x + x*(a-1) * so * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). **************************************************************************/ static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; void vp8_short_idct4x4llm_c(short *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int i; int r, c; int a1, b1, c1, d1; short output[16]; short *ip = input; short *op = output; int temp1, temp2; int shortpitch = 4; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[8]; b1 = ip[0] - ip[8]; temp1 = (ip[4] * sinpi8sqrt2) >> 16; temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); temp2 = (ip[12] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; op[shortpitch * 0] = a1 + d1; op[shortpitch * 3] = a1 - d1; op[shortpitch * 1] = b1 + c1; op[shortpitch * 2] = b1 - c1; ip++; op++; } ip = output; op = output; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[2]; b1 = ip[0] - ip[2]; temp1 = (ip[1] * sinpi8sqrt2) >> 16; temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); temp2 = (ip[3] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; op[0] = (a1 + d1 + 4) >> 3; op[3] = (a1 - d1 + 4) >> 3; op[1] = (b1 + c1 + 4) >> 3; op[2] = (b1 - c1 + 4) >> 3; ip += shortpitch; op += shortpitch; } ip = output; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { int a = ip[c] + pred_ptr[c]; if (a < 0) a = 0; if (a > 255) a = 255; dst_ptr[c] = (unsigned char)a; } ip += 4; dst_ptr += dst_stride; pred_ptr += pred_stride; } } void vp8_dc_only_idct_add_c(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int a1 = ((input_dc + 4) >> 3); int r, c; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { int a = a1 + pred_ptr[c]; if (a < 0) a = 0; if (a > 255) a = 255; dst_ptr[c] = (unsigned char)a; } dst_ptr += dst_stride; pred_ptr += pred_stride; } } void vp8_short_inv_walsh4x4_c(short *input, short *mb_dqcoeff) { short output[16]; int i; int a1, b1, c1, d1; int a2, b2, c2, d2; short *ip = input; short *op = output; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[12]; b1 = ip[4] + ip[8]; c1 = ip[4] - ip[8]; d1 = ip[0] - ip[12]; op[0] = a1 + b1; op[4] = c1 + d1; op[8] = a1 - b1; op[12] = d1 - c1; ip++; op++; } ip = output; op = output; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[3]; b1 = ip[1] + ip[2]; c1 = ip[1] - ip[2]; d1 = ip[0] - ip[3]; a2 = a1 + b1; b2 = c1 + d1; c2 = a1 - b1; d2 = d1 - c1; op[0] = (a2 + 3) >> 3; op[1] = (b2 + 3) >> 3; op[2] = (c2 + 3) >> 3; op[3] = (d2 + 3) >> 3; ip += 4; op += 4; } for (i = 0; i < 16; ++i) { mb_dqcoeff[i * 16] = output[i]; } } void vp8_short_inv_walsh4x4_1_c(short *input, short *mb_dqcoeff) { int i; int a1; a1 = ((input[0] + 3) >> 3); for (i = 0; i < 16; ++i) { mb_dqcoeff[i * 16] = a1; } } libvpx-1.8.2/vp8/common/invtrans.h000066400000000000000000000030661357355204000170610ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_INVTRANS_H_ #define VPX_VP8_COMMON_INVTRANS_H_ #include "./vpx_config.h" #include "vp8_rtcd.h" #include "blockd.h" #include "onyxc_int.h" #if CONFIG_MULTITHREAD #include "vpx_mem/vpx_mem.h" #endif #ifdef __cplusplus extern "C" { #endif static void eob_adjust(char *eobs, short *diff) { /* eob adjust.... the idct can only skip if both the dc and eob are zero */ int js; for (js = 0; js < 16; ++js) { if ((eobs[js] == 0) && (diff[0] != 0)) eobs[js]++; diff += 16; } } static INLINE void vp8_inverse_transform_mby(MACROBLOCKD *xd) { short *DQC = xd->dequant_y1; if (xd->mode_info_context->mbmi.mode != SPLITMV) { /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { vp8_short_inv_walsh4x4(&xd->block[24].dqcoeff[0], xd->qcoeff); } else { vp8_short_inv_walsh4x4_1(&xd->block[24].dqcoeff[0], xd->qcoeff); } eob_adjust(xd->eobs, xd->qcoeff); DQC = xd->dequant_y1_dc; } vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_INVTRANS_H_ libvpx-1.8.2/vp8/common/loopfilter.h000066400000000000000000000070321357355204000173710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_LOOPFILTER_H_ #define VPX_VP8_COMMON_LOOPFILTER_H_ #include "vpx_ports/mem.h" #include "vpx_config.h" #include "vp8_rtcd.h" #ifdef __cplusplus extern "C" { #endif #define MAX_LOOP_FILTER 63 /* fraction of total macroblock rows to be used in fast filter level picking */ /* has to be > 2 */ #define PARTIAL_FRAME_FRACTION 8 typedef enum { NORMAL_LOOPFILTER = 0, SIMPLE_LOOPFILTER = 1 } LOOPFILTERTYPE; #if VPX_ARCH_ARM #define SIMD_WIDTH 1 #else #define SIMD_WIDTH 16 #endif /* Need to align this structure so when it is declared and * passed it can be loaded into vector registers. */ typedef struct { DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, mblim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, blim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, lim[MAX_LOOP_FILTER + 1][SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, unsigned char, hev_thr[4][SIMD_WIDTH]); unsigned char lvl[4][4][4]; unsigned char hev_thr_lut[2][MAX_LOOP_FILTER + 1]; unsigned char mode_lf_lut[10]; } loop_filter_info_n; typedef struct loop_filter_info { const unsigned char *mblim; const unsigned char *blim; const unsigned char *lim; const unsigned char *hev_thr; } loop_filter_info; typedef void loop_filter_uvfunction(unsigned char *u, /* source pointer */ int p, /* pitch */ const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, unsigned char *v); /* assorted loopfilter functions which get used elsewhere */ struct VP8Common; struct macroblockd; struct modeinfo; void vp8_loop_filter_init(struct VP8Common *cm); void vp8_loop_filter_frame_init(struct VP8Common *cm, struct macroblockd *mbd, int default_filt_lvl); void vp8_loop_filter_frame(struct VP8Common *cm, struct macroblockd *mbd, int frame_type); void vp8_loop_filter_partial_frame(struct VP8Common *cm, struct macroblockd *mbd, int default_filt_lvl); void vp8_loop_filter_frame_yonly(struct VP8Common *cm, struct macroblockd *mbd, int default_filt_lvl); void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl); void vp8_loop_filter_row_normal(struct VP8Common *cm, struct modeinfo *mode_info_context, int mb_row, int post_ystride, int post_uvstride, unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr); void vp8_loop_filter_row_simple(struct VP8Common *cm, struct modeinfo *mode_info_context, int mb_row, int post_ystride, unsigned char *y_ptr); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_LOOPFILTER_H_ libvpx-1.8.2/vp8/common/loopfilter_filters.c000066400000000000000000000335701357355204000211220ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "loopfilter.h" #include "onyxc_int.h" typedef unsigned char uc; static signed char vp8_signed_char_clamp(int t) { t = (t < -128 ? -128 : t); t = (t > 127 ? 127 : t); return (signed char)t; } /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ static signed char vp8_filter_mask(uc limit, uc blimit, uc p3, uc p2, uc p1, uc p0, uc q0, uc q1, uc q2, uc q3) { signed char mask = 0; mask |= (abs(p3 - p2) > limit); mask |= (abs(p2 - p1) > limit); mask |= (abs(p1 - p0) > limit); mask |= (abs(q1 - q0) > limit); mask |= (abs(q2 - q1) > limit); mask |= (abs(q3 - q2) > limit); mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit); return mask - 1; } /* is there high variance internal edge ( 11111111 yes, 00000000 no) */ static signed char vp8_hevmask(uc thresh, uc p1, uc p0, uc q0, uc q1) { signed char hev = 0; hev |= (abs(p1 - p0) > thresh) * -1; hev |= (abs(q1 - q0) > thresh) * -1; return hev; } static void vp8_filter(signed char mask, uc hev, uc *op1, uc *op0, uc *oq0, uc *oq1) { signed char ps0, qs0; signed char ps1, qs1; signed char filter_value, Filter1, Filter2; signed char u; ps1 = (signed char)*op1 ^ 0x80; ps0 = (signed char)*op0 ^ 0x80; qs0 = (signed char)*oq0 ^ 0x80; qs1 = (signed char)*oq1 ^ 0x80; /* add outer taps if we have high edge variance */ filter_value = vp8_signed_char_clamp(ps1 - qs1); filter_value &= hev; /* inner taps */ filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); filter_value &= mask; /* save bottom 3 bits so that we round one side +4 and the other +3 * if it equals 4 we'll set it to adjust by -1 to account for the fact * we'd round it by 3 the other way */ Filter1 = vp8_signed_char_clamp(filter_value + 4); Filter2 = vp8_signed_char_clamp(filter_value + 3); Filter1 >>= 3; Filter2 >>= 3; u = vp8_signed_char_clamp(qs0 - Filter1); *oq0 = u ^ 0x80; u = vp8_signed_char_clamp(ps0 + Filter2); *op0 = u ^ 0x80; filter_value = Filter1; /* outer tap adjustments */ filter_value += 1; filter_value >>= 1; filter_value &= ~hev; u = vp8_signed_char_clamp(qs1 - filter_value); *oq1 = u ^ 0x80; u = vp8_signed_char_clamp(ps1 + filter_value); *op1 = u ^ 0x80; } static void loop_filter_horizontal_edge_c(unsigned char *s, int p, /* pitch */ const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { int hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ do { mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]); hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); vp8_filter(mask, hev, s - 2 * p, s - 1 * p, s, s + 1 * p); ++s; } while (++i < count * 8); } static void loop_filter_vertical_edge_c(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { int hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ do { mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); vp8_filter(mask, hev, s - 2, s - 1, s, s + 1); s += p; } while (++i < count * 8); } static void vp8_mbfilter(signed char mask, uc hev, uc *op2, uc *op1, uc *op0, uc *oq0, uc *oq1, uc *oq2) { signed char s, u; signed char filter_value, Filter1, Filter2; signed char ps2 = (signed char)*op2 ^ 0x80; signed char ps1 = (signed char)*op1 ^ 0x80; signed char ps0 = (signed char)*op0 ^ 0x80; signed char qs0 = (signed char)*oq0 ^ 0x80; signed char qs1 = (signed char)*oq1 ^ 0x80; signed char qs2 = (signed char)*oq2 ^ 0x80; /* add outer taps if we have high edge variance */ filter_value = vp8_signed_char_clamp(ps1 - qs1); filter_value = vp8_signed_char_clamp(filter_value + 3 * (qs0 - ps0)); filter_value &= mask; Filter2 = filter_value; Filter2 &= hev; /* save bottom 3 bits so that we round one side +4 and the other +3 */ Filter1 = vp8_signed_char_clamp(Filter2 + 4); Filter2 = vp8_signed_char_clamp(Filter2 + 3); Filter1 >>= 3; Filter2 >>= 3; qs0 = vp8_signed_char_clamp(qs0 - Filter1); ps0 = vp8_signed_char_clamp(ps0 + Filter2); /* only apply wider filter if not high edge variance */ filter_value &= ~hev; Filter2 = filter_value; /* roughly 3/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 27) >> 7); s = vp8_signed_char_clamp(qs0 - u); *oq0 = s ^ 0x80; s = vp8_signed_char_clamp(ps0 + u); *op0 = s ^ 0x80; /* roughly 2/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 18) >> 7); s = vp8_signed_char_clamp(qs1 - u); *oq1 = s ^ 0x80; s = vp8_signed_char_clamp(ps1 + u); *op1 = s ^ 0x80; /* roughly 1/7th difference across boundary */ u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); s = vp8_signed_char_clamp(qs2 - u); *oq2 = s ^ 0x80; s = vp8_signed_char_clamp(ps2 + u); *op2 = s ^ 0x80; } static void mbloop_filter_horizontal_edge_c(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { signed char hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ do { mask = vp8_filter_mask(limit[0], blimit[0], s[-4 * p], s[-3 * p], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p], s[2 * p], s[3 * p]); hev = vp8_hevmask(thresh[0], s[-2 * p], s[-1 * p], s[0 * p], s[1 * p]); vp8_mbfilter(mask, hev, s - 3 * p, s - 2 * p, s - 1 * p, s, s + 1 * p, s + 2 * p); ++s; } while (++i < count * 8); } static void mbloop_filter_vertical_edge_c(unsigned char *s, int p, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { signed char hev = 0; /* high edge variance */ signed char mask = 0; int i = 0; do { mask = vp8_filter_mask(limit[0], blimit[0], s[-4], s[-3], s[-2], s[-1], s[0], s[1], s[2], s[3]); hev = vp8_hevmask(thresh[0], s[-2], s[-1], s[0], s[1]); vp8_mbfilter(mask, hev, s - 3, s - 2, s - 1, s, s + 1, s + 2); s += p; } while (++i < count * 8); } /* should we apply any filter at all ( 11111111 yes, 00000000 no) */ static signed char vp8_simple_filter_mask(uc blimit, uc p1, uc p0, uc q0, uc q1) { /* Why does this cause problems for win32? * error C2143: syntax error : missing ';' before 'type' * (void) limit; */ signed char mask = (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 <= blimit) * -1; return mask; } static void vp8_simple_filter(signed char mask, uc *op1, uc *op0, uc *oq0, uc *oq1) { signed char filter_value, Filter1, Filter2; signed char p1 = (signed char)*op1 ^ 0x80; signed char p0 = (signed char)*op0 ^ 0x80; signed char q0 = (signed char)*oq0 ^ 0x80; signed char q1 = (signed char)*oq1 ^ 0x80; signed char u; filter_value = vp8_signed_char_clamp(p1 - q1); filter_value = vp8_signed_char_clamp(filter_value + 3 * (q0 - p0)); filter_value &= mask; /* save bottom 3 bits so that we round one side +4 and the other +3 */ Filter1 = vp8_signed_char_clamp(filter_value + 4); Filter1 >>= 3; u = vp8_signed_char_clamp(q0 - Filter1); *oq0 = u ^ 0x80; Filter2 = vp8_signed_char_clamp(filter_value + 3); Filter2 >>= 3; u = vp8_signed_char_clamp(p0 + Filter2); *op0 = u ^ 0x80; } void vp8_loop_filter_simple_horizontal_edge_c(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { signed char mask = 0; int i = 0; do { mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2 * y_stride], y_ptr[-1 * y_stride], y_ptr[0 * y_stride], y_ptr[1 * y_stride]); vp8_simple_filter(mask, y_ptr - 2 * y_stride, y_ptr - 1 * y_stride, y_ptr, y_ptr + 1 * y_stride); ++y_ptr; } while (++i < 16); } void vp8_loop_filter_simple_vertical_edge_c(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { signed char mask = 0; int i = 0; do { mask = vp8_simple_filter_mask(blimit[0], y_ptr[-2], y_ptr[-1], y_ptr[0], y_ptr[1]); vp8_simple_filter(mask, y_ptr - 2, y_ptr - 1, y_ptr, y_ptr + 1); y_ptr += y_stride; } while (++i < 16); } /* Horizontal MB filtering */ void vp8_loop_filter_mbh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { mbloop_filter_horizontal_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) { mbloop_filter_horizontal_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } if (v_ptr) { mbloop_filter_horizontal_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { mbloop_filter_vertical_edge_c(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) { mbloop_filter_vertical_edge_c(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } if (v_ptr) { mbloop_filter_vertical_edge_c(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } } /* Horizontal B Filtering */ void vp8_loop_filter_bh_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { loop_filter_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); loop_filter_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); loop_filter_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) { loop_filter_horizontal_edge_c(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } if (v_ptr) { loop_filter_horizontal_edge_c(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } } void vp8_loop_filter_bhs_c(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 4 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 8 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_c(y_ptr + 12 * y_stride, y_stride, blimit); } /* Vertical B Filtering */ void vp8_loop_filter_bv_c(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { loop_filter_vertical_edge_c(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); loop_filter_vertical_edge_c(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); loop_filter_vertical_edge_c(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) { loop_filter_vertical_edge_c(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } if (v_ptr) { loop_filter_vertical_edge_c(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } } void vp8_loop_filter_bvs_c(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_vertical_edge_c(y_ptr + 4, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_c(y_ptr + 8, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_c(y_ptr + 12, y_stride, blimit); } libvpx-1.8.2/vp8/common/mbpitch.c000066400000000000000000000027771357355204000166460ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "blockd.h" void vp8_setup_block_dptrs(MACROBLOCKD *x) { int r, c; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { x->block[r * 4 + c].predictor = x->predictor + r * 4 * 16 + c * 4; } } for (r = 0; r < 2; ++r) { for (c = 0; c < 2; ++c) { x->block[16 + r * 2 + c].predictor = x->predictor + 256 + r * 4 * 8 + c * 4; } } for (r = 0; r < 2; ++r) { for (c = 0; c < 2; ++c) { x->block[20 + r * 2 + c].predictor = x->predictor + 320 + r * 4 * 8 + c * 4; } } for (r = 0; r < 25; ++r) { x->block[r].qcoeff = x->qcoeff + r * 16; x->block[r].dqcoeff = x->dqcoeff + r * 16; x->block[r].eob = x->eobs + r; } } void vp8_build_block_doffsets(MACROBLOCKD *x) { int block; for (block = 0; block < 16; ++block) /* y blocks */ { x->block[block].offset = (block >> 2) * 4 * x->dst.y_stride + (block & 3) * 4; } for (block = 16; block < 20; ++block) /* U and V blocks */ { x->block[block + 4].offset = x->block[block].offset = ((block - 16) >> 1) * 4 * x->dst.uv_stride + (block & 1) * 4; } } libvpx-1.8.2/vp8/common/mfqe.c000066400000000000000000000272521357355204000161430ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /* MFQE: Multiframe Quality Enhancement * In rate limited situations keyframes may cause significant visual artifacts * commonly referred to as "popping." This file implements a postproccesing * algorithm which blends data from the preceeding frame when there is no * motion and the q from the previous frame is lower which indicates that it is * higher quality. */ #include "./vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp8/common/common.h" #include "vp8/common/postproc.h" #include "vpx_dsp/variance.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/yv12config.h" #include #include static void filter_by_weight(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int block_size, int src_weight) { int dst_weight = (1 << MFQE_PRECISION) - src_weight; int rounding_bit = 1 << (MFQE_PRECISION - 1); int r, c; for (r = 0; r < block_size; ++r) { for (c = 0; c < block_size; ++c) { dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >> MFQE_PRECISION; } src += src_stride; dst += dst_stride; } } void vp8_filter_by_weight16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight) { filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); } void vp8_filter_by_weight8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight) { filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); } void vp8_filter_by_weight4x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride, int src_weight) { filter_by_weight(src, src_stride, dst, dst_stride, 4, src_weight); } static void apply_ifactor(unsigned char *y_src, int y_src_stride, unsigned char *y_dst, int y_dst_stride, unsigned char *u_src, unsigned char *v_src, int uv_src_stride, unsigned char *u_dst, unsigned char *v_dst, int uv_dst_stride, int block_size, int src_weight) { if (block_size == 16) { vp8_filter_by_weight16x16(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); vp8_filter_by_weight8x8(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); vp8_filter_by_weight8x8(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); } else { vp8_filter_by_weight8x8(y_src, y_src_stride, y_dst, y_dst_stride, src_weight); vp8_filter_by_weight4x4(u_src, uv_src_stride, u_dst, uv_dst_stride, src_weight); vp8_filter_by_weight4x4(v_src, uv_src_stride, v_dst, uv_dst_stride, src_weight); } } static unsigned int int_sqrt(unsigned int x) { unsigned int y = x; unsigned int guess; int p = 1; while (y >>= 1) p++; p >>= 1; guess = 0; while (p >= 0) { guess |= (1 << p); if (x < guess * guess) guess -= (1 << p); p--; } /* choose between guess or guess+1 */ return guess + (guess * guess + guess + 1 <= x); } #define USE_SSD static void multiframe_quality_enhance_block( int blksize, /* Currently only values supported are 16, 8 */ int qcurr, int qprev, unsigned char *y, unsigned char *u, unsigned char *v, int y_stride, int uv_stride, unsigned char *yd, unsigned char *ud, unsigned char *vd, int yd_stride, int uvd_stride) { static const unsigned char VP8_ZEROS[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; int uvblksize = blksize >> 1; int qdiff = qcurr - qprev; int i; unsigned char *up; unsigned char *udp; unsigned char *vp; unsigned char *vdp; unsigned int act, actd, sad, usad, vsad, sse, thr, thrsq, actrisk; if (blksize == 16) { actd = (vpx_variance16x16(yd, yd_stride, VP8_ZEROS, 0, &sse) + 128) >> 8; act = (vpx_variance16x16(y, y_stride, VP8_ZEROS, 0, &sse) + 128) >> 8; #ifdef USE_SSD vpx_variance16x16(y, y_stride, yd, yd_stride, &sse); sad = (sse + 128) >> 8; vpx_variance8x8(u, uv_stride, ud, uvd_stride, &sse); usad = (sse + 32) >> 6; vpx_variance8x8(v, uv_stride, vd, uvd_stride, &sse); vsad = (sse + 32) >> 6; #else sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; usad = (vpx_sad8x8(u, uv_stride, ud, uvd_stride) + 32) >> 6; vsad = (vpx_sad8x8(v, uv_stride, vd, uvd_stride) + 32) >> 6; #endif } else { actd = (vpx_variance8x8(yd, yd_stride, VP8_ZEROS, 0, &sse) + 32) >> 6; act = (vpx_variance8x8(y, y_stride, VP8_ZEROS, 0, &sse) + 32) >> 6; #ifdef USE_SSD vpx_variance8x8(y, y_stride, yd, yd_stride, &sse); sad = (sse + 32) >> 6; vpx_variance4x4(u, uv_stride, ud, uvd_stride, &sse); usad = (sse + 8) >> 4; vpx_variance4x4(v, uv_stride, vd, uvd_stride, &sse); vsad = (sse + 8) >> 4; #else sad = (vpx_sad8x8(y, y_stride, yd, yd_stride) + 32) >> 6; usad = (vpx_sad4x4(u, uv_stride, ud, uvd_stride) + 8) >> 4; vsad = (vpx_sad4x4(v, uv_stride, vd, uvd_stride) + 8) >> 4; #endif } actrisk = (actd > act * 5); /* thr = qdiff/16 + log2(act) + log4(qprev) */ thr = (qdiff >> 4); while (actd >>= 1) thr++; while (qprev >>= 2) thr++; #ifdef USE_SSD thrsq = thr * thr; if (sad < thrsq && /* additional checks for color mismatch and excessive addition of * high-frequencies */ 4 * usad < thrsq && 4 * vsad < thrsq && !actrisk) #else if (sad < thr && /* additional checks for color mismatch and excessive addition of * high-frequencies */ 2 * usad < thr && 2 * vsad < thr && !actrisk) #endif { int ifactor; #ifdef USE_SSD /* TODO: optimize this later to not need sqr root */ sad = int_sqrt(sad); #endif ifactor = (sad << MFQE_PRECISION) / thr; ifactor >>= (qdiff >> 5); if (ifactor) { apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd, uvd_stride, blksize, ifactor); } } else { /* else implicitly copy from previous frame */ if (blksize == 16) { vp8_copy_mem16x16(y, y_stride, yd, yd_stride); vp8_copy_mem8x8(u, uv_stride, ud, uvd_stride); vp8_copy_mem8x8(v, uv_stride, vd, uvd_stride); } else { vp8_copy_mem8x8(y, y_stride, yd, yd_stride); for (up = u, udp = ud, i = 0; i < uvblksize; ++i, up += uv_stride, udp += uvd_stride) { memcpy(udp, up, uvblksize); } for (vp = v, vdp = vd, i = 0; i < uvblksize; ++i, vp += uv_stride, vdp += uvd_stride) { memcpy(vdp, vp, uvblksize); } } } } static int qualify_inter_mb(const MODE_INFO *mode_info_context, int *map) { if (mode_info_context->mbmi.mb_skip_coeff) { map[0] = map[1] = map[2] = map[3] = 1; } else if (mode_info_context->mbmi.mode == SPLITMV) { static int ndx[4][4] = { { 0, 1, 4, 5 }, { 2, 3, 6, 7 }, { 8, 9, 12, 13 }, { 10, 11, 14, 15 } }; int i, j; vp8_zero(*map); for (i = 0; i < 4; ++i) { map[i] = 1; for (j = 0; j < 4 && map[j]; ++j) { map[i] &= (mode_info_context->bmi[ndx[i][j]].mv.as_mv.row <= 2 && mode_info_context->bmi[ndx[i][j]].mv.as_mv.col <= 2); } } } else { map[0] = map[1] = map[2] = map[3] = (mode_info_context->mbmi.mode > B_PRED && abs(mode_info_context->mbmi.mv.as_mv.row) <= 2 && abs(mode_info_context->mbmi.mv.as_mv.col) <= 2); } return (map[0] + map[1] + map[2] + map[3]); } void vp8_multiframe_quality_enhance(VP8_COMMON *cm) { YV12_BUFFER_CONFIG *show = cm->frame_to_show; YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; FRAME_TYPE frame_type = cm->frame_type; /* Point at base of Mb MODE_INFO list has motion vectors etc */ const MODE_INFO *mode_info_context = cm->mi; int mb_row; int mb_col; int totmap, map[4]; int qcurr = cm->base_qindex; int qprev = cm->postproc_state.last_base_qindex; unsigned char *y_ptr, *u_ptr, *v_ptr; unsigned char *yd_ptr, *ud_ptr, *vd_ptr; /* Set up the buffer pointers */ y_ptr = show->y_buffer; u_ptr = show->u_buffer; v_ptr = show->v_buffer; yd_ptr = dest->y_buffer; ud_ptr = dest->u_buffer; vd_ptr = dest->v_buffer; /* postprocess each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { /* if motion is high there will likely be no benefit */ if (frame_type == INTER_FRAME) { totmap = qualify_inter_mb(mode_info_context, map); } else { totmap = (frame_type == KEY_FRAME ? 4 : 0); } if (totmap) { if (totmap < 4) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (map[i * 2 + j]) { multiframe_quality_enhance_block( 8, qcurr, qprev, y_ptr + 8 * (i * show->y_stride + j), u_ptr + 4 * (i * show->uv_stride + j), v_ptr + 4 * (i * show->uv_stride + j), show->y_stride, show->uv_stride, yd_ptr + 8 * (i * dest->y_stride + j), ud_ptr + 4 * (i * dest->uv_stride + j), vd_ptr + 4 * (i * dest->uv_stride + j), dest->y_stride, dest->uv_stride); } else { /* copy a 8x8 block */ int k; unsigned char *up = u_ptr + 4 * (i * show->uv_stride + j); unsigned char *udp = ud_ptr + 4 * (i * dest->uv_stride + j); unsigned char *vp = v_ptr + 4 * (i * show->uv_stride + j); unsigned char *vdp = vd_ptr + 4 * (i * dest->uv_stride + j); vp8_copy_mem8x8( y_ptr + 8 * (i * show->y_stride + j), show->y_stride, yd_ptr + 8 * (i * dest->y_stride + j), dest->y_stride); for (k = 0; k < 4; ++k, up += show->uv_stride, udp += dest->uv_stride, vp += show->uv_stride, vdp += dest->uv_stride) { memcpy(udp, up, 4); memcpy(vdp, vp, 4); } } } } } else { /* totmap = 4 */ multiframe_quality_enhance_block( 16, qcurr, qprev, y_ptr, u_ptr, v_ptr, show->y_stride, show->uv_stride, yd_ptr, ud_ptr, vd_ptr, dest->y_stride, dest->uv_stride); } } else { vp8_copy_mem16x16(y_ptr, show->y_stride, yd_ptr, dest->y_stride); vp8_copy_mem8x8(u_ptr, show->uv_stride, ud_ptr, dest->uv_stride); vp8_copy_mem8x8(v_ptr, show->uv_stride, vd_ptr, dest->uv_stride); } y_ptr += 16; u_ptr += 8; v_ptr += 8; yd_ptr += 16; ud_ptr += 8; vd_ptr += 8; mode_info_context++; /* step to next MB */ } y_ptr += show->y_stride * 16 - 16 * cm->mb_cols; u_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; v_ptr += show->uv_stride * 8 - 8 * cm->mb_cols; yd_ptr += dest->y_stride * 16 - 16 * cm->mb_cols; ud_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; vd_ptr += dest->uv_stride * 8 - 8 * cm->mb_cols; mode_info_context++; /* Skip border mb */ } } libvpx-1.8.2/vp8/common/mips/000077500000000000000000000000001357355204000160075ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/mips/dspr2/000077500000000000000000000000001357355204000170415ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/mips/dspr2/dequantize_dspr2.c000066400000000000000000000014441357355204000224730ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 void vp8_dequant_idct_add_dspr2(short *input, short *dq, unsigned char *dest, int stride) { int i; for (i = 0; i < 16; ++i) { input[i] = dq[i] * input[i]; } vp8_short_idct4x4llm_dspr2(input, dest, stride, dest, stride); memset(input, 0, 32); } #endif libvpx-1.8.2/vp8/common/mips/dspr2/filter_dspr2.c000066400000000000000000004243101357355204000216100ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp8_rtcd.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 #define CROP_WIDTH 256 unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; static const unsigned short sub_pel_filterss[8][3] = { { 0, 0, 0 }, { 0, 0x0601, 0x7b0c }, { 0x0201, 0x0b08, 0x6c24 }, { 0, 0x0906, 0x5d32 }, { 0x0303, 0x1010, 0x4d4d }, { 0, 0x0609, 0x325d }, { 0x0102, 0x080b, 0x246c }, { 0, 0x0106, 0x0c7b }, }; static const int sub_pel_filters_int[8][3] = { { 0, 0, 0 }, { 0x0000fffa, 0x007b000c, 0xffff0000 }, { 0x0002fff5, 0x006c0024, 0xfff80001 }, { 0x0000fff7, 0x005d0032, 0xfffa0000 }, { 0x0003fff0, 0x004d004d, 0xfff00003 }, { 0x0000fffa, 0x0032005d, 0xfff70000 }, { 0x0001fff8, 0x0024006c, 0xfff50002 }, { 0x0000ffff, 0x000c007b, 0xfffa0000 }, }; static const int sub_pel_filters_inv[8][3] = { { 0, 0, 0 }, { 0xfffa0000, 0x000c007b, 0x0000ffff }, { 0xfff50002, 0x0024006c, 0x0001fff8 }, { 0xfff70000, 0x0032005d, 0x0000fffa }, { 0xfff00003, 0x004d004d, 0x0003fff0 }, { 0xfffa0000, 0x005d0032, 0x0000fff7 }, { 0xfff80001, 0x006c0024, 0x0002fff5 }, { 0xffff0000, 0x007b000c, 0x0000fffa }, }; /* clang-format off */ static const int sub_pel_filters_int_tap_4[8][2] = { { 0, 0}, { 0xfffa007b, 0x000cffff}, { 0, 0}, { 0xfff7005d, 0x0032fffa}, { 0, 0}, { 0xfffa0032, 0x005dfff7}, { 0, 0}, { 0xffff000c, 0x007bfffa}, }; static const int sub_pel_filters_inv_tap_4[8][2] = { { 0, 0}, { 0x007bfffa, 0xffff000c}, { 0, 0}, { 0x005dfff7, 0xfffa0032}, { 0, 0}, { 0x0032fffa, 0xfff7005d}, { 0, 0}, { 0x000cffff, 0xfffa007b}, }; /* clang-format on */ inline void prefetch_load(unsigned char *src) { __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); } inline void prefetch_store(unsigned char *dst) { __asm__ __volatile__("pref 1, 0(%[dst]) \n\t" : : [dst] "r"(dst)); } void dsputil_static_init(void) { int i; for (i = 0; i < 256; ++i) ff_cropTbl[i + CROP_WIDTH] = i; for (i = 0; i < CROP_WIDTH; ++i) { ff_cropTbl[i] = 0; ff_cropTbl[i + CROP_WIDTH + 256] = 255; } } void vp8_filter_block2d_first_pass_4(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT dst_ptr, unsigned int src_pixels_per_line, unsigned int output_height, int xoffset, int pitch) { unsigned int i; int Temp1, Temp2, Temp3, Temp4; unsigned int vector4a = 64; int vector1b, vector2b, vector3b; unsigned int tp1, tp2, tn1, tn2; unsigned int p1, p2, p3; unsigned int n1, n2, n3; unsigned char *cm = ff_cropTbl + CROP_WIDTH; vector3b = sub_pel_filters_inv[xoffset][2]; /* if (xoffset == 0) we don't need any filtering */ if (vector3b == 0) { for (i = 0; i < output_height; ++i) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; dst_ptr[3] = src_ptr[3]; /* next row... */ src_ptr += src_pixels_per_line; dst_ptr += 4; } } else { if (vector3b > 65536) { /* 6 tap filter */ vector1b = sub_pel_filters_inv[xoffset][0]; vector2b = sub_pel_filters_inv[xoffset][1]; /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); for (i = output_height; i--;) { /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -2(%[src_ptr]) \n\t" "ulw %[tp2], 2(%[src_ptr]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p1], %[tp2] \n\t" "balign %[tp2], %[tp1], 3 \n\t" "extp %[Temp1], $ac3, 9 \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" /* odd 1. pixel */ "ulw %[tn2], 3(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "extp %[Temp3], $ac2, 9 \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n1], %[tn2] \n\t" "extp %[Temp2], $ac3, 9 \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "extp %[Temp4], $ac2, 9 \n\t" /* clamp */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "lbux %[tn1], %[Temp2](%[cm]) \n\t" "lbux %[tp2], %[Temp3](%[cm]) \n\t" "lbux %[n2], %[Temp4](%[cm]) \n\t" /* store bytes */ "sb %[tp1], 0(%[dst_ptr]) \n\t" "sb %[tn1], 1(%[dst_ptr]) \n\t" "sb %[tp2], 2(%[dst_ptr]) \n\t" "sb %[n2], 3(%[dst_ptr]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr), [vector3b] "r"(vector3b), [src_ptr] "r"(src_ptr)); /* Next row... */ src_ptr += src_pixels_per_line; dst_ptr += pitch; } } else { /* 4 tap filter */ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; for (i = output_height; i--;) { /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -1(%[src_ptr]) \n\t" "ulw %[tp2], 3(%[src_ptr]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 1. pixel */ "srl %[tn1], %[tp2], 8 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn1] \n\t" "extp %[Temp3], $ac2, 9 \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "extp %[Temp4], $ac2, 9 \n\t" /* clamp and store results */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "lbux %[tn1], %[Temp2](%[cm]) \n\t" "lbux %[tp2], %[Temp3](%[cm]) \n\t" "sb %[tp1], 0(%[dst_ptr]) \n\t" "sb %[tn1], 1(%[dst_ptr]) \n\t" "lbux %[n2], %[Temp4](%[cm]) \n\t" "sb %[tp2], 2(%[dst_ptr]) \n\t" "sb %[n2], 3(%[dst_ptr]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr)); /* Next row... */ src_ptr += src_pixels_per_line; dst_ptr += pitch; } } } } void vp8_filter_block2d_first_pass_8_all(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT dst_ptr, unsigned int src_pixels_per_line, unsigned int output_height, int xoffset, int pitch) { unsigned int i; int Temp1, Temp2, Temp3, Temp4; unsigned int vector4a = 64; unsigned int vector1b, vector2b, vector3b; unsigned int tp1, tp2, tn1, tn2; unsigned int p1, p2, p3, p4; unsigned int n1, n2, n3, n4; unsigned char *cm = ff_cropTbl + CROP_WIDTH; /* if (xoffset == 0) we don't need any filtering */ if (xoffset == 0) { for (i = 0; i < output_height; ++i) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; dst_ptr[3] = src_ptr[3]; dst_ptr[4] = src_ptr[4]; dst_ptr[5] = src_ptr[5]; dst_ptr[6] = src_ptr[6]; dst_ptr[7] = src_ptr[7]; /* next row... */ src_ptr += src_pixels_per_line; dst_ptr += 8; } } else { vector3b = sub_pel_filters_inv[xoffset][2]; if (vector3b > 65536) { /* 6 tap filter */ vector1b = sub_pel_filters_inv[xoffset][0]; vector2b = sub_pel_filters_inv[xoffset][1]; for (i = output_height; i--;) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -2(%[src_ptr]) \n\t" "ulw %[tp2], 2(%[src_ptr]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p1], %[tp2] \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "balign %[tp2], %[tp1], 3 \n\t" "extp %[Temp1], $ac3, 9 \n\t" "ulw %[tn2], 3(%[src_ptr]) \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "extp %[Temp3], $ac2, 9 \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n1], %[tn2] \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "ulw %[tp1], 6(%[src_ptr]) \n\t" "extp %[Temp2], $ac3, 9 \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p2], %[tp1] \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [vector3b] "r"(vector3b), [src_ptr] "r"(src_ptr)); /* clamp and store results */ dst_ptr[0] = cm[Temp1]; dst_ptr[1] = cm[Temp2]; dst_ptr[2] = cm[Temp3]; dst_ptr[3] = cm[Temp4]; /* next 4 pixels */ __asm__ __volatile__( /* even 3. pixel */ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p4], %[tp1] \n\t" "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "ulw %[tn1], 7(%[src_ptr]) \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 3. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n2], %[tn1] \n\t" "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" "extp %[Temp3], $ac2, 9 \n\t" /* odd 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n4], %[tn1] \n\t" "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" "extp %[Temp2], $ac3, 9 \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tn1] "=&r"(tn1), [n2] "=&r"(n2), [p4] "=&r"(p4), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4) : [tp1] "r"(tp1), [vector1b] "r"(vector1b), [p2] "r"(p2), [vector2b] "r"(vector2b), [n1] "r"(n1), [p1] "r"(p1), [vector4a] "r"(vector4a), [vector3b] "r"(vector3b), [p3] "r"(p3), [n3] "r"(n3), [src_ptr] "r"(src_ptr)); /* clamp and store results */ dst_ptr[4] = cm[Temp1]; dst_ptr[5] = cm[Temp2]; dst_ptr[6] = cm[Temp3]; dst_ptr[7] = cm[Temp4]; src_ptr += src_pixels_per_line; dst_ptr += pitch; } } else { /* 4 tap filter */ vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; for (i = output_height; i--;) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -1(%[src_ptr]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "ulw %[tp2], 3(%[src_ptr]) \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "extp %[Temp1], $ac3, 9 \n\t" "balign %[tp2], %[tp1], 3 \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "extp %[Temp3], $ac2, 9 \n\t" "ulw %[tn2], 4(%[src_ptr]) \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "preceu.ph.qbl %[n4], %[tn2] \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "ulw %[tp1], 7(%[src_ptr]) \n\t" "extp %[Temp2], $ac3, 9 \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ dst_ptr[0] = cm[Temp1]; dst_ptr[1] = cm[Temp2]; dst_ptr[2] = cm[Temp3]; dst_ptr[3] = cm[Temp4]; /* next 4 pixels */ __asm__ __volatile__( /* even 3. pixel */ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbr %[p2], %[tp1] \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 3. pixel */ "mtlo %[vector4a], $ac3 \n\t" "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" "ulw %[tn1], 8(%[src_ptr]) \n\t" "extp %[Temp3], $ac2, 9 \n\t" /* odd 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbr %[n2], %[tn1] \n\t" "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" "extp %[Temp2], $ac3, 9 \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tn1] "=&r"(tn1), [p2] "=&r"(p2), [n2] "=&r"(n2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4) : [tp1] "r"(tp1), [p3] "r"(p3), [p4] "r"(p4), [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr), [n3] "r"(n3), [n4] "r"(n4)); /* clamp and store results */ dst_ptr[4] = cm[Temp1]; dst_ptr[5] = cm[Temp2]; dst_ptr[6] = cm[Temp3]; dst_ptr[7] = cm[Temp4]; /* next row... */ src_ptr += src_pixels_per_line; dst_ptr += pitch; } } } } void vp8_filter_block2d_first_pass16_6tap(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT dst_ptr, unsigned int src_pixels_per_line, unsigned int output_height, int xoffset, int pitch) { unsigned int i; int Temp1, Temp2, Temp3, Temp4; unsigned int vector4a; unsigned int vector1b, vector2b, vector3b; unsigned int tp1, tp2, tn1, tn2; unsigned int p1, p2, p3, p4; unsigned int n1, n2, n3, n4; unsigned char *cm = ff_cropTbl + CROP_WIDTH; vector1b = sub_pel_filters_inv[xoffset][0]; vector2b = sub_pel_filters_inv[xoffset][1]; vector3b = sub_pel_filters_inv[xoffset][2]; vector4a = 64; for (i = output_height; i--;) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + src_pixels_per_line); /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -2(%[src_ptr]) \n\t" "ulw %[tp2], 2(%[src_ptr]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p1], %[tp2] \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "balign %[tp2], %[tp1], 3 \n\t" "ulw %[tn2], 3(%[src_ptr]) \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "extp %[Temp3], $ac2, 9 \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n1], %[tn2] \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "ulw %[tp1], 6(%[src_ptr]) \n\t" "extp %[Temp2], $ac3, 9 \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p2], %[tp1] \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [vector3b] "r"(vector3b), [src_ptr] "r"(src_ptr)); /* clamp and store results */ dst_ptr[0] = cm[Temp1]; dst_ptr[1] = cm[Temp2]; dst_ptr[2] = cm[Temp3]; dst_ptr[3] = cm[Temp4]; /* next 4 pixels */ __asm__ __volatile__( /* even 3. pixel */ "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector3b] \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p4], %[tp1] \n\t" "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "ulw %[tn1], 7(%[src_ptr]) \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 3. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n2], %[tn1] \n\t" "dpa.w.ph $ac3, %[n3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector3b] \n\t" "extp %[Temp3], $ac2, 9 \n\t" /* odd 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n4], %[tn1] \n\t" "dpa.w.ph $ac2, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" "ulw %[tp2], 10(%[src_ptr]) \n\t" "extp %[Temp2], $ac3, 9 \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tn1] "=&r"(tn1), [tp2] "=&r"(tp2), [n2] "=&r"(n2), [p4] "=&r"(p4), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [p1] "+r"(p1) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [tp1] "r"(tp1), [n1] "r"(n1), [vector4a] "r"(vector4a), [p2] "r"(p2), [vector3b] "r"(vector3b), [p3] "r"(p3), [n3] "r"(n3), [src_ptr] "r"(src_ptr)); /* clamp and store results */ dst_ptr[4] = cm[Temp1]; dst_ptr[5] = cm[Temp2]; dst_ptr[6] = cm[Temp3]; dst_ptr[7] = cm[Temp4]; /* next 4 pixels */ __asm__ __volatile__( /* even 5. pixel */ "dpa.w.ph $ac3, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" /* even 6. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p3], %[tp2] \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector3b] \n\t" "ulw %[tn1], 11(%[src_ptr]) \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 5. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n1], %[tn1] \n\t" "dpa.w.ph $ac3, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n4], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" "extp %[Temp3], $ac2, 9 \n\t" /* odd 6. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n3], %[tn1] \n\t" "dpa.w.ph $ac2, %[n4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector3b] \n\t" "ulw %[tp1], 14(%[src_ptr]) \n\t" "extp %[Temp2], $ac3, 9 \n\t" "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[p4], %[tp1] \n\t" "extp %[Temp4], $ac2, 9 \n\t" : [tn1] "=&r"(tn1), [tp1] "=&r"(tp1), [n1] "=&r"(n1), [p3] "=&r"(p3), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [p4] "+r"(p4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [tp2] "r"(tp2), [p2] "r"(p2), [n2] "r"(n2), [n4] "r"(n4), [p1] "r"(p1), [src_ptr] "r"(src_ptr), [vector4a] "r"(vector4a), [vector3b] "r"(vector3b)); /* clamp and store results */ dst_ptr[8] = cm[Temp1]; dst_ptr[9] = cm[Temp2]; dst_ptr[10] = cm[Temp3]; dst_ptr[11] = cm[Temp4]; /* next 4 pixels */ __asm__ __volatile__( /* even 7. pixel */ "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector3b] \n\t" /* even 8. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "dpa.w.ph $ac2, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector3b] \n\t" "ulw %[tn1], 15(%[src_ptr]) \n\t" "extp %[Temp1], $ac3, 9 \n\t" /* odd 7. pixel */ "mtlo %[vector4a], $ac3 \n\t" "preceu.ph.qbr %[n4], %[tn1] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n4], %[vector3b] \n\t" "extp %[Temp3], $ac2, 9 \n\t" /* odd 8. pixel */ "mtlo %[vector4a], $ac2 \n\t" "preceu.ph.qbl %[n2], %[tn1] \n\t" "dpa.w.ph $ac2, %[n3], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n4], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n2], %[vector3b] \n\t" "extp %[Temp2], $ac3, 9 \n\t" "extp %[Temp4], $ac2, 9 \n\t" /* clamp and store results */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "lbux %[tn1], %[Temp2](%[cm]) \n\t" "lbux %[p2], %[Temp3](%[cm]) \n\t" "sb %[tp1], 12(%[dst_ptr]) \n\t" "sb %[tn1], 13(%[dst_ptr]) \n\t" "lbux %[n2], %[Temp4](%[cm]) \n\t" "sb %[p2], 14(%[dst_ptr]) \n\t" "sb %[n2], 15(%[dst_ptr]) \n\t" : [tn1] "=&r"(tn1), [p2] "=&r"(p2), [n2] "=&r"(n2), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [tp1] "+r"(tp1) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [p4] "r"(p4), [n1] "r"(n1), [p1] "r"(p1), [vector4a] "r"(vector4a), [vector3b] "r"(vector3b), [p3] "r"(p3), [n3] "r"(n3), [src_ptr] "r"(src_ptr), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); src_ptr += src_pixels_per_line; dst_ptr += pitch; } } void vp8_filter_block2d_first_pass16_0(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT output_ptr, unsigned int src_pixels_per_line) { int Temp1, Temp2, Temp3, Temp4; int i; /* prefetch src_ptr data to cache memory */ prefetch_store(output_ptr + 32); /* copy memory from src buffer to dst buffer */ for (i = 0; i < 7; ++i) { __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "ulw %[Temp3], 8(%[src_ptr]) \n\t" "ulw %[Temp4], 12(%[src_ptr]) \n\t" "sw %[Temp1], 0(%[output_ptr]) \n\t" "sw %[Temp2], 4(%[output_ptr]) \n\t" "sw %[Temp3], 8(%[output_ptr]) \n\t" "sw %[Temp4], 12(%[output_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [src_ptr] "+r"(src_ptr) : [src_pixels_per_line] "r"(src_pixels_per_line), [output_ptr] "r"(output_ptr)); __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "ulw %[Temp3], 8(%[src_ptr]) \n\t" "ulw %[Temp4], 12(%[src_ptr]) \n\t" "sw %[Temp1], 16(%[output_ptr]) \n\t" "sw %[Temp2], 20(%[output_ptr]) \n\t" "sw %[Temp3], 24(%[output_ptr]) \n\t" "sw %[Temp4], 28(%[output_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [src_ptr] "+r"(src_ptr) : [src_pixels_per_line] "r"(src_pixels_per_line), [output_ptr] "r"(output_ptr)); __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "ulw %[Temp3], 8(%[src_ptr]) \n\t" "ulw %[Temp4], 12(%[src_ptr]) \n\t" "sw %[Temp1], 32(%[output_ptr]) \n\t" "sw %[Temp2], 36(%[output_ptr]) \n\t" "sw %[Temp3], 40(%[output_ptr]) \n\t" "sw %[Temp4], 44(%[output_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [src_ptr] "+r"(src_ptr) : [src_pixels_per_line] "r"(src_pixels_per_line), [output_ptr] "r"(output_ptr)); output_ptr += 48; } } void vp8_filter_block2d_first_pass16_4tap( unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT output_ptr, unsigned int src_pixels_per_line, unsigned int output_width, unsigned int output_height, int xoffset, int yoffset, unsigned char *RESTRICT dst_ptr, int pitch) { unsigned int i, j; int Temp1, Temp2, Temp3, Temp4; unsigned int vector4a; int vector1b, vector2b; unsigned int tp1, tp2, tp3, tn1; unsigned int p1, p2, p3; unsigned int n1, n2, n3; unsigned char *cm = ff_cropTbl + CROP_WIDTH; vector4a = 64; vector1b = sub_pel_filters_inv_tap_4[xoffset][0]; vector2b = sub_pel_filters_inv_tap_4[xoffset][1]; /* if (yoffset == 0) don't need temp buffer, data will be stored in dst_ptr */ if (yoffset == 0) { output_height -= 5; src_ptr += (src_pixels_per_line + src_pixels_per_line); for (i = output_height; i--;) { __asm__ __volatile__("ulw %[tp3], -1(%[src_ptr]) \n\t" : [tp3] "=&r"(tp3) : [src_ptr] "r"(src_ptr)); /* processing 4 adjacent pixels */ for (j = 0; j < 16; j += 4) { /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp2], 3(%[src_ptr]) " "\n\t" "move %[tp1], %[tp3] " "\n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 " "\n\t" "mthi $0, $ac3 " "\n\t" "move %[tp3], %[tp2] " "\n\t" "preceu.ph.qbr %[p1], %[tp1] " "\n\t" "preceu.ph.qbl %[p2], %[tp1] " "\n\t" "preceu.ph.qbr %[p3], %[tp2] " "\n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] " "\n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] " "\n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 " "\n\t" "mthi $0, $ac2 " "\n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] " "\n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] " "\n\t" "extr.w %[Temp1], $ac3, 7 " "\n\t" /* odd 1. pixel */ "ulw %[tn1], 4(%[src_ptr]) " "\n\t" "balign %[tp2], %[tp1], 3 " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "mthi $0, $ac3 " "\n\t" "preceu.ph.qbr %[n1], %[tp2] " "\n\t" "preceu.ph.qbl %[n2], %[tp2] " "\n\t" "preceu.ph.qbr %[n3], %[tn1] " "\n\t" "extr.w %[Temp3], $ac2, 7 " "\n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] " "\n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] " "\n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 " "\n\t" "mthi $0, $ac2 " "\n\t" "extr.w %[Temp2], $ac3, 7 " "\n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] " "\n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] " "\n\t" "extr.w %[Temp4], $ac2, 7 " "\n\t" /* clamp and store results */ "lbux %[tp1], %[Temp1](%[cm]) " "\n\t" "lbux %[tn1], %[Temp2](%[cm]) " "\n\t" "lbux %[tp2], %[Temp3](%[cm]) " "\n\t" "sb %[tp1], 0(%[dst_ptr]) " "\n\t" "sb %[tn1], 1(%[dst_ptr]) " "\n\t" "lbux %[n2], %[Temp4](%[cm]) " "\n\t" "sb %[tp2], 2(%[dst_ptr]) " "\n\t" "sb %[n2], 3(%[dst_ptr]) " "\n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tn1] "=&r"(tn1), [p1] "=&r"(p1), [p2] "=&r"(p2), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [p3] "=&r"(p3), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr)); src_ptr += 4; } /* Next row... */ src_ptr += src_pixels_per_line - 16; dst_ptr += pitch; } } else { for (i = output_height; i--;) { /* processing 4 adjacent pixels */ for (j = 0; j < 16; j += 4) { /* apply filter with vectors pairs */ __asm__ __volatile__( "ulw %[tp1], -1(%[src_ptr]) " "\n\t" "ulw %[tp2], 3(%[src_ptr]) " "\n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 " "\n\t" "mthi $0, $ac3 " "\n\t" "preceu.ph.qbr %[p1], %[tp1] " "\n\t" "preceu.ph.qbl %[p2], %[tp1] " "\n\t" "preceu.ph.qbr %[p3], %[tp2] " "\n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] " "\n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] " "\n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 " "\n\t" "mthi $0, $ac2 " "\n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] " "\n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] " "\n\t" "extr.w %[Temp1], $ac3, 7 " "\n\t" /* odd 1. pixel */ "ulw %[tn1], 4(%[src_ptr]) " "\n\t" "balign %[tp2], %[tp1], 3 " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "mthi $0, $ac3 " "\n\t" "preceu.ph.qbr %[n1], %[tp2] " "\n\t" "preceu.ph.qbl %[n2], %[tp2] " "\n\t" "preceu.ph.qbr %[n3], %[tn1] " "\n\t" "extr.w %[Temp3], $ac2, 7 " "\n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] " "\n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] " "\n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac2 " "\n\t" "mthi $0, $ac2 " "\n\t" "extr.w %[Temp2], $ac3, 7 " "\n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] " "\n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] " "\n\t" "extr.w %[Temp4], $ac2, 7 " "\n\t" /* clamp and store results */ "lbux %[tp1], %[Temp1](%[cm]) " "\n\t" "lbux %[tn1], %[Temp2](%[cm]) " "\n\t" "lbux %[tp2], %[Temp3](%[cm]) " "\n\t" "sb %[tp1], 0(%[output_ptr]) " "\n\t" "sb %[tn1], 1(%[output_ptr]) " "\n\t" "lbux %[n2], %[Temp4](%[cm]) " "\n\t" "sb %[tp2], 2(%[output_ptr]) " "\n\t" "sb %[n2], 3(%[output_ptr]) " "\n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector4a] "r"(vector4a), [cm] "r"(cm), [output_ptr] "r"(output_ptr), [src_ptr] "r"(src_ptr)); src_ptr += 4; } /* next row... */ src_ptr += src_pixels_per_line; output_ptr += output_width; } } } void vp8_filter_block2d_second_pass4(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT output_ptr, int output_pitch, int yoffset) { unsigned int i; int Temp1, Temp2, Temp3, Temp4; unsigned int vector1b, vector2b, vector3b, vector4a; unsigned char src_ptr_l2; unsigned char src_ptr_l1; unsigned char src_ptr_0; unsigned char src_ptr_r1; unsigned char src_ptr_r2; unsigned char src_ptr_r3; unsigned char *cm = ff_cropTbl + CROP_WIDTH; vector4a = 64; /* load filter coefficients */ vector1b = sub_pel_filterss[yoffset][0]; vector2b = sub_pel_filterss[yoffset][2]; vector3b = sub_pel_filterss[yoffset][1]; if (vector1b) { /* 6 tap filter */ for (i = 2; i--;) { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr); /* do not allow compiler to reorder instructions */ __asm__ __volatile__( ".set noreorder \n\t" : :); /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -8(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 12(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -7(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 13(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -6(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 14(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -5(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 15(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp3], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; output_ptr += output_pitch; /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -4(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 16(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -3(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 17(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -2(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 18(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -1(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 19(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp3], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; src_ptr += 8; output_ptr += output_pitch; } } else { /* 4 tap filter */ /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr); for (i = 2; i--;) { /* do not allow compiler to reorder instructions */ __asm__ __volatile__( ".set noreorder \n\t" : :); /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 8(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 9(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 10(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 11(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp3], $ac0, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; output_ptr += output_pitch; /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l1], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 12(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 13(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 14(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l1], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 15(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp3], $ac0, 9 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=r"(Temp4), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; src_ptr += 8; output_ptr += output_pitch; } } } void vp8_filter_block2d_second_pass_8(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT output_ptr, int output_pitch, unsigned int output_height, unsigned int output_width, unsigned int yoffset) { unsigned int i; int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; unsigned int vector1b, vector2b, vector3b, vector4a; unsigned char src_ptr_l2; unsigned char src_ptr_l1; unsigned char src_ptr_0; unsigned char src_ptr_r1; unsigned char src_ptr_r2; unsigned char src_ptr_r3; unsigned char *cm = ff_cropTbl + CROP_WIDTH; (void)output_width; vector4a = 64; vector1b = sub_pel_filterss[yoffset][0]; vector2b = sub_pel_filterss[yoffset][2]; vector3b = sub_pel_filterss[yoffset][1]; if (vector1b) { /* 6 tap filter */ /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr); for (i = output_height; i--;) { /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -16(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 24(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -15(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 25(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -14(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 10(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 18(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 26(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp2], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -13(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 11(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 19(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 27(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp3], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -12(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 12(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 20(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 28(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" "lbu %[src_ptr_l2], -11(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 13(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 21(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 29(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp5], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -10(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 14(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 22(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 30(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp6], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -9(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 15(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 23(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 31(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp7], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp8], $ac1, 9 \n\t" : [Temp4] "=&r"(Temp4), [Temp5] "=&r"(Temp5), [Temp6] "=&r"(Temp6), [Temp7] "=&r"(Temp7), [Temp8] "=r"(Temp8), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; output_ptr[4] = cm[Temp5]; output_ptr[5] = cm[Temp6]; output_ptr[6] = cm[Temp7]; output_ptr[7] = cm[Temp8]; src_ptr += 8; output_ptr += output_pitch; } } else { /* 4 tap filter */ /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr); for (i = output_height; i--;) { __asm__ __volatile__( "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 16(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" : [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); __asm__ __volatile__( "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 17(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp1], $ac2, 9 \n\t" : [Temp1] "=r"(Temp1), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); src_ptr_l1 = src_ptr[-6]; src_ptr_0 = src_ptr[2]; src_ptr_r1 = src_ptr[10]; src_ptr_r2 = src_ptr[18]; __asm__ __volatile__( "mtlo %[vector4a], $ac0 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp2], $ac3, 9 \n\t" : [Temp2] "=r"(Temp2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); src_ptr_l1 = src_ptr[-5]; src_ptr_0 = src_ptr[3]; src_ptr_r1 = src_ptr[11]; src_ptr_r2 = src_ptr[19]; __asm__ __volatile__( "mtlo %[vector4a], $ac1 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp3], $ac0, 9 \n\t" : [Temp3] "=r"(Temp3) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); src_ptr_l1 = src_ptr[-4]; src_ptr_0 = src_ptr[4]; src_ptr_r1 = src_ptr[12]; src_ptr_r2 = src_ptr[20]; __asm__ __volatile__( "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp4], $ac1, 9 \n\t" : [Temp4] "=r"(Temp4) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); src_ptr_l1 = src_ptr[-3]; src_ptr_0 = src_ptr[5]; src_ptr_r1 = src_ptr[13]; src_ptr_r2 = src_ptr[21]; __asm__ __volatile__( "mtlo %[vector4a], $ac3 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp5], $ac2, 9 \n\t" : [Temp5] "=&r"(Temp5) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); src_ptr_l1 = src_ptr[-2]; src_ptr_0 = src_ptr[6]; src_ptr_r1 = src_ptr[14]; src_ptr_r2 = src_ptr[22]; __asm__ __volatile__( "mtlo %[vector4a], $ac0 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp6], $ac3, 9 \n\t" : [Temp6] "=r"(Temp6) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); src_ptr_l1 = src_ptr[-1]; src_ptr_0 = src_ptr[7]; src_ptr_r1 = src_ptr[15]; src_ptr_r2 = src_ptr[23]; __asm__ __volatile__( "mtlo %[vector4a], $ac1 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp7], $ac0, 9 \n\t" "extp %[Temp8], $ac1, 9 \n\t" : [Temp7] "=&r"(Temp7), [Temp8] "=r"(Temp8) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [src_ptr_l1] "r"(src_ptr_l1), [src_ptr_0] "r"(src_ptr_0), [src_ptr_r1] "r"(src_ptr_r1), [src_ptr_r2] "r"(src_ptr_r2), [vector4a] "r"(vector4a)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; output_ptr[4] = cm[Temp5]; output_ptr[5] = cm[Temp6]; output_ptr[6] = cm[Temp7]; output_ptr[7] = cm[Temp8]; src_ptr += 8; output_ptr += output_pitch; } } } void vp8_filter_block2d_second_pass161(unsigned char *RESTRICT src_ptr, unsigned char *RESTRICT output_ptr, int output_pitch, const unsigned short *vp8_filter) { unsigned int i, j; int Temp1, Temp2, Temp3, Temp4, Temp5, Temp6, Temp7, Temp8; unsigned int vector4a; unsigned int vector1b, vector2b, vector3b; unsigned char src_ptr_l2; unsigned char src_ptr_l1; unsigned char src_ptr_0; unsigned char src_ptr_r1; unsigned char src_ptr_r2; unsigned char src_ptr_r3; unsigned char *cm = ff_cropTbl + CROP_WIDTH; vector4a = 64; vector1b = vp8_filter[0]; vector2b = vp8_filter[2]; vector3b = vp8_filter[1]; if (vector1b == 0) { /* 4 tap filter */ /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + 16); for (i = 16; i--;) { /* unrolling for loop */ for (j = 0; j < 16; j += 8) { /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l1], -16(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 16(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 32(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac2 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -15(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 17(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 33(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "extp %[Temp1], $ac2, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -14(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 2(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 18(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 34(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac1 " "\n\t" "extp %[Temp2], $ac3, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -13(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 3(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 19(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 35(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "extp %[Temp3], $ac1, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -12(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 4(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 20(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 36(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac2 " "\n\t" "extp %[Temp4], $ac3, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -11(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 5(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 21(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 37(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "extp %[Temp5], $ac2, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -10(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 6(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 22(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 38(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac1 " "\n\t" "extp %[Temp6], $ac3, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] " "\n\t" "lbu %[src_ptr_l1], -9(%[src_ptr]) " "\n\t" "lbu %[src_ptr_0], 7(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r1], 23(%[src_ptr]) " "\n\t" "lbu %[src_ptr_r2], 39(%[src_ptr]) " "\n\t" "mtlo %[vector4a], $ac3 " "\n\t" "extp %[Temp7], $ac1, 9 " "\n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 " "\n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 " "\n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] " "\n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] " "\n\t" "extp %[Temp8], $ac3, 9 " "\n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [Temp5] "=&r"(Temp5), [Temp6] "=&r"(Temp6), [Temp7] "=&r"(Temp7), [Temp8] "=r"(Temp8), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2) : [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[j] = cm[Temp1]; output_ptr[j + 1] = cm[Temp2]; output_ptr[j + 2] = cm[Temp3]; output_ptr[j + 3] = cm[Temp4]; output_ptr[j + 4] = cm[Temp5]; output_ptr[j + 5] = cm[Temp6]; output_ptr[j + 6] = cm[Temp7]; output_ptr[j + 7] = cm[Temp8]; src_ptr += 8; } output_ptr += output_pitch; } } else { /* 4 tap filter */ /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + 16); /* unroll for loop */ for (i = 16; i--;) { /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -32(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -16(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 0(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 16(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 32(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 48(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -31(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -15(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 1(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 17(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 33(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 49(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -30(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -14(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 2(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 18(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 34(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 50(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp2], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -29(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -13(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 3(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 19(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 35(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 51(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp3], $ac1, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -28(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -12(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 4(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 20(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 36(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 52(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "extp %[Temp4], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -27(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -11(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 5(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 21(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 37(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 53(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp5], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -26(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -10(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 6(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 22(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 38(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 54(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp6], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -25(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -9(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 7(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 23(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 39(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 55(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp7], $ac1, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp8], $ac3, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [Temp5] "=&r"(Temp5), [Temp6] "=&r"(Temp6), [Temp7] "=&r"(Temp7), [Temp8] "=r"(Temp8), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); /* clamp and store results */ output_ptr[0] = cm[Temp1]; output_ptr[1] = cm[Temp2]; output_ptr[2] = cm[Temp3]; output_ptr[3] = cm[Temp4]; output_ptr[4] = cm[Temp5]; output_ptr[5] = cm[Temp6]; output_ptr[6] = cm[Temp7]; output_ptr[7] = cm[Temp8]; /* apply filter with vectors pairs */ __asm__ __volatile__( "lbu %[src_ptr_l2], -24(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -8(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 8(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 24(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 40(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 56(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -23(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -7(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 9(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 25(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 41(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 57(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp1], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -22(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -6(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 10(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 26(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 42(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 58(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp2], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -21(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -5(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 11(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 27(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 43(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 59(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp3], $ac1, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -20(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -4(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 12(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 28(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 44(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 60(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "extp %[Temp4], $ac3, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac2, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac2, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac2, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -19(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -3(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 13(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 29(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 45(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 61(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "extp %[Temp5], $ac2, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac0, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac0, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac0, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -18(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -2(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 14(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 30(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 46(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 62(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "extp %[Temp6], $ac0, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac1, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac1, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac1, %[src_ptr_l1], %[vector3b] \n\t" "lbu %[src_ptr_l2], -17(%[src_ptr]) \n\t" "lbu %[src_ptr_l1], -1(%[src_ptr]) \n\t" "lbu %[src_ptr_0], 15(%[src_ptr]) \n\t" "lbu %[src_ptr_r1], 31(%[src_ptr]) \n\t" "lbu %[src_ptr_r2], 47(%[src_ptr]) \n\t" "lbu %[src_ptr_r3], 63(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "extp %[Temp7], $ac1, 9 \n\t" "append %[src_ptr_l2], %[src_ptr_r3], 8 \n\t" "append %[src_ptr_0], %[src_ptr_r1], 8 \n\t" "append %[src_ptr_l1], %[src_ptr_r2], 8 \n\t" "dpau.h.qbr $ac3, %[src_ptr_l2], %[vector1b] \n\t" "dpau.h.qbr $ac3, %[src_ptr_0], %[vector2b] \n\t" "dpsu.h.qbr $ac3, %[src_ptr_l1], %[vector3b] \n\t" "extp %[Temp8], $ac3, 9 \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [Temp5] "=&r"(Temp5), [Temp6] "=&r"(Temp6), [Temp7] "=&r"(Temp7), [Temp8] "=r"(Temp8), [src_ptr_l1] "=&r"(src_ptr_l1), [src_ptr_0] "=&r"(src_ptr_0), [src_ptr_r1] "=&r"(src_ptr_r1), [src_ptr_r2] "=&r"(src_ptr_r2), [src_ptr_l2] "=&r"(src_ptr_l2), [src_ptr_r3] "=&r"(src_ptr_r3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4a] "r"(vector4a), [src_ptr] "r"(src_ptr)); src_ptr += 16; output_ptr[8] = cm[Temp1]; output_ptr[9] = cm[Temp2]; output_ptr[10] = cm[Temp3]; output_ptr[11] = cm[Temp4]; output_ptr[12] = cm[Temp5]; output_ptr[13] = cm[Temp6]; output_ptr[14] = cm[Temp7]; output_ptr[15] = cm[Temp8]; output_ptr += output_pitch; } } } void vp8_sixtap_predict4x4_dspr2(unsigned char *RESTRICT src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *RESTRICT dst_ptr, int dst_pitch) { unsigned char FData[9 * 4]; /* Temp data bufffer used in filtering */ unsigned int pos = 16; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); if (yoffset) { /* First filter 1-D horizontally... */ vp8_filter_block2d_first_pass_4(src_ptr - (2 * src_pixels_per_line), FData, src_pixels_per_line, 9, xoffset, 4); /* then filter verticaly... */ vp8_filter_block2d_second_pass4(FData + 8, dst_ptr, dst_pitch, yoffset); } else /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ vp8_filter_block2d_first_pass_4(src_ptr, dst_ptr, src_pixels_per_line, 4, xoffset, dst_pitch); } void vp8_sixtap_predict8x8_dspr2(unsigned char *RESTRICT src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *RESTRICT dst_ptr, int dst_pitch) { unsigned char FData[13 * 8]; /* Temp data bufffer used in filtering */ unsigned int pos, Temp1, Temp2; pos = 16; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); if (yoffset) { src_ptr = src_ptr - (2 * src_pixels_per_line); if (xoffset) /* filter 1-D horizontally... */ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, 13, xoffset, 8); else { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + 2 * src_pixels_per_line); __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 0(%[FData]) \n\t" "sw %[Temp2], 4(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 8(%[FData]) \n\t" "sw %[Temp2], 12(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 16(%[FData]) \n\t" "sw %[Temp2], 20(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 24(%[FData]) \n\t" "sw %[Temp2], 28(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 32(%[FData]) \n\t" "sw %[Temp2], 36(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 40(%[FData]) \n\t" "sw %[Temp2], 44(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 48(%[FData]) \n\t" "sw %[Temp2], 52(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 56(%[FData]) \n\t" "sw %[Temp2], 60(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 64(%[FData]) \n\t" "sw %[Temp2], 68(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 72(%[FData]) \n\t" "sw %[Temp2], 76(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 80(%[FData]) \n\t" "sw %[Temp2], 84(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 88(%[FData]) \n\t" "sw %[Temp2], 92(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 96(%[FData]) \n\t" "sw %[Temp2], 100(%[FData]) \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2) : [FData] "r"(FData), [src_ptr] "r"(src_ptr), [src_pixels_per_line] "r"(src_pixels_per_line)); } /* filter verticaly... */ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 8, 8, yoffset); } /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ else { if (xoffset) vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, 8, xoffset, dst_pitch); else { /* copy from src buffer to dst buffer */ __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 0(%[dst_ptr]) \n\t" "sw %[Temp2], 4(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 8(%[dst_ptr]) \n\t" "sw %[Temp2], 12(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 16(%[dst_ptr]) \n\t" "sw %[Temp2], 20(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 24(%[dst_ptr]) \n\t" "sw %[Temp2], 28(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 32(%[dst_ptr]) \n\t" "sw %[Temp2], 36(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 40(%[dst_ptr]) \n\t" "sw %[Temp2], 44(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 48(%[dst_ptr]) \n\t" "sw %[Temp2], 52(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 56(%[dst_ptr]) \n\t" "sw %[Temp2], 60(%[dst_ptr]) \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2) : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [src_pixels_per_line] "r"(src_pixels_per_line)); } } } void vp8_sixtap_predict8x4_dspr2(unsigned char *RESTRICT src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *RESTRICT dst_ptr, int dst_pitch) { unsigned char FData[9 * 8]; /* Temp data bufffer used in filtering */ unsigned int pos, Temp1, Temp2; pos = 16; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); if (yoffset) { src_ptr = src_ptr - (2 * src_pixels_per_line); if (xoffset) /* filter 1-D horizontally... */ vp8_filter_block2d_first_pass_8_all(src_ptr, FData, src_pixels_per_line, 9, xoffset, 8); else { /* prefetch src_ptr data to cache memory */ prefetch_load(src_ptr + 2 * src_pixels_per_line); __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 0(%[FData]) \n\t" "sw %[Temp2], 4(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 8(%[FData]) \n\t" "sw %[Temp2], 12(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 16(%[FData]) \n\t" "sw %[Temp2], 20(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 24(%[FData]) \n\t" "sw %[Temp2], 28(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 32(%[FData]) \n\t" "sw %[Temp2], 36(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 40(%[FData]) \n\t" "sw %[Temp2], 44(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 48(%[FData]) \n\t" "sw %[Temp2], 52(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 56(%[FData]) \n\t" "sw %[Temp2], 60(%[FData]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 64(%[FData]) \n\t" "sw %[Temp2], 68(%[FData]) \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2) : [FData] "r"(FData), [src_ptr] "r"(src_ptr), [src_pixels_per_line] "r"(src_pixels_per_line)); } /* filter verticaly... */ vp8_filter_block2d_second_pass_8(FData + 16, dst_ptr, dst_pitch, 4, 8, yoffset); } /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ else { if (xoffset) vp8_filter_block2d_first_pass_8_all(src_ptr, dst_ptr, src_pixels_per_line, 4, xoffset, dst_pitch); else { /* copy from src buffer to dst buffer */ __asm__ __volatile__( "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 0(%[dst_ptr]) \n\t" "sw %[Temp2], 4(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 8(%[dst_ptr]) \n\t" "sw %[Temp2], 12(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 16(%[dst_ptr]) \n\t" "sw %[Temp2], 20(%[dst_ptr]) \n\t" "addu %[src_ptr], %[src_ptr], %[src_pixels_per_line] \n\t" "ulw %[Temp1], 0(%[src_ptr]) \n\t" "ulw %[Temp2], 4(%[src_ptr]) \n\t" "sw %[Temp1], 24(%[dst_ptr]) \n\t" "sw %[Temp2], 28(%[dst_ptr]) \n\t" : [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2) : [dst_ptr] "r"(dst_ptr), [src_ptr] "r"(src_ptr), [src_pixels_per_line] "r"(src_pixels_per_line)); } } } void vp8_sixtap_predict16x16_dspr2(unsigned char *RESTRICT src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *RESTRICT dst_ptr, int dst_pitch) { const unsigned short *VFilter; unsigned char FData[21 * 16]; /* Temp data bufffer used in filtering */ unsigned int pos; VFilter = sub_pel_filterss[yoffset]; pos = 16; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); if (yoffset) { src_ptr = src_ptr - (2 * src_pixels_per_line); switch (xoffset) { /* filter 1-D horizontally... */ case 2: case 4: case 6: /* 6 tap filter */ vp8_filter_block2d_first_pass16_6tap( src_ptr, FData, src_pixels_per_line, 21, xoffset, 16); break; case 0: /* only copy buffer */ vp8_filter_block2d_first_pass16_0(src_ptr, FData, src_pixels_per_line); break; case 1: case 3: case 5: case 7: /* 4 tap filter */ vp8_filter_block2d_first_pass16_4tap( src_ptr, FData, src_pixels_per_line, 16, 21, xoffset, yoffset, dst_ptr, dst_pitch); break; } /* filter verticaly... */ vp8_filter_block2d_second_pass161(FData + 32, dst_ptr, dst_pitch, VFilter); } else { /* if (yoffsset == 0) vp8_filter_block2d_first_pass save data to dst_ptr */ switch (xoffset) { case 2: case 4: case 6: /* 6 tap filter */ vp8_filter_block2d_first_pass16_6tap( src_ptr, dst_ptr, src_pixels_per_line, 16, xoffset, dst_pitch); break; case 1: case 3: case 5: case 7: /* 4 tap filter */ vp8_filter_block2d_first_pass16_4tap( src_ptr, dst_ptr, src_pixels_per_line, 16, 21, xoffset, yoffset, dst_ptr, dst_pitch); break; } } } #endif libvpx-1.8.2/vp8/common/mips/dspr2/idct_blk_dspr2.c000066400000000000000000000036131357355204000220750ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #if HAVE_DSPR2 void vp8_dequant_idct_add_y_block_dspr2(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { int i, j; for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { if (*eobs++ > 1) vp8_dequant_idct_add_dspr2(q, dq, dst, stride); else { vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst, stride, dst, stride); ((int *)q)[0] = 0; } q += 16; dst += 4; } dst += 4 * stride - 16; } } void vp8_dequant_idct_add_uv_block_dspr2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) vp8_dequant_idct_add_dspr2(q, dq, dst_u, stride); else { vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst_u, stride, dst_u, stride); ((int *)q)[0] = 0; } q += 16; dst_u += 4; } dst_u += 4 * stride - 8; } for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { if (*eobs++ > 1) vp8_dequant_idct_add_dspr2(q, dq, dst_v, stride); else { vp8_dc_only_idct_add_dspr2(q[0] * dq[0], dst_v, stride, dst_v, stride); ((int *)q)[0] = 0; } q += 16; dst_v += 4; } dst_v += 4 * stride - 8; } } #endif libvpx-1.8.2/vp8/common/mips/dspr2/idctllm_dspr2.c000066400000000000000000000233221357355204000217510ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8_rtcd.h" #if HAVE_DSPR2 #define CROP_WIDTH 256 /****************************************************************************** * Notes: * * This implementation makes use of 16 bit fixed point version of two multiply * constants: * 1. sqrt(2) * cos (pi/8) * 2. sqrt(2) * sin (pi/8) * Since the first constant is bigger than 1, to maintain the same 16 bit * fixed point precision as the second one, we use a trick of * x * a = x + x*(a-1) * so * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). ****************************************************************************/ extern unsigned char ff_cropTbl[256 + 2 * CROP_WIDTH]; static const int cospi8sqrt2minus1 = 20091; static const int sinpi8sqrt2 = 35468; inline void prefetch_load_short(short *src) { __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); } void vp8_short_idct4x4llm_dspr2(short *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int r, c; int a1, b1, c1, d1; short output[16]; short *ip = input; short *op = output; int temp1, temp2; int shortpitch = 4; int c2, d2; int temp3, temp4; unsigned char *cm = ff_cropTbl + CROP_WIDTH; /* prepare data for load */ prefetch_load_short(ip + 8); /* first loop is unrolled */ a1 = ip[0] + ip[8]; b1 = ip[0] - ip[8]; temp1 = (ip[4] * sinpi8sqrt2) >> 16; temp2 = ip[12] + ((ip[12] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[4] + ((ip[4] * cospi8sqrt2minus1) >> 16); temp2 = (ip[12] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; temp3 = (ip[5] * sinpi8sqrt2) >> 16; temp4 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); c2 = temp3 - temp4; temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); temp4 = (ip[13] * sinpi8sqrt2) >> 16; d2 = temp3 + temp4; op[0] = a1 + d1; op[12] = a1 - d1; op[4] = b1 + c1; op[8] = b1 - c1; a1 = ip[1] + ip[9]; b1 = ip[1] - ip[9]; op[1] = a1 + d2; op[13] = a1 - d2; op[5] = b1 + c2; op[9] = b1 - c2; a1 = ip[2] + ip[10]; b1 = ip[2] - ip[10]; temp1 = (ip[6] * sinpi8sqrt2) >> 16; temp2 = ip[14] + ((ip[14] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[6] + ((ip[6] * cospi8sqrt2minus1) >> 16); temp2 = (ip[14] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; temp3 = (ip[7] * sinpi8sqrt2) >> 16; temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); c2 = temp3 - temp4; temp3 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); temp4 = (ip[15] * sinpi8sqrt2) >> 16; d2 = temp3 + temp4; op[2] = a1 + d1; op[14] = a1 - d1; op[6] = b1 + c1; op[10] = b1 - c1; a1 = ip[3] + ip[11]; b1 = ip[3] - ip[11]; op[3] = a1 + d2; op[15] = a1 - d2; op[7] = b1 + c2; op[11] = b1 - c2; ip = output; /* prepare data for load */ prefetch_load_short(ip + shortpitch); /* second loop is unrolled */ a1 = ip[0] + ip[2]; b1 = ip[0] - ip[2]; temp1 = (ip[1] * sinpi8sqrt2) >> 16; temp2 = ip[3] + ((ip[3] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[1] + ((ip[1] * cospi8sqrt2minus1) >> 16); temp2 = (ip[3] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; temp3 = (ip[5] * sinpi8sqrt2) >> 16; temp4 = ip[7] + ((ip[7] * cospi8sqrt2minus1) >> 16); c2 = temp3 - temp4; temp3 = ip[5] + ((ip[5] * cospi8sqrt2minus1) >> 16); temp4 = (ip[7] * sinpi8sqrt2) >> 16; d2 = temp3 + temp4; op[0] = (a1 + d1 + 4) >> 3; op[3] = (a1 - d1 + 4) >> 3; op[1] = (b1 + c1 + 4) >> 3; op[2] = (b1 - c1 + 4) >> 3; a1 = ip[4] + ip[6]; b1 = ip[4] - ip[6]; op[4] = (a1 + d2 + 4) >> 3; op[7] = (a1 - d2 + 4) >> 3; op[5] = (b1 + c2 + 4) >> 3; op[6] = (b1 - c2 + 4) >> 3; a1 = ip[8] + ip[10]; b1 = ip[8] - ip[10]; temp1 = (ip[9] * sinpi8sqrt2) >> 16; temp2 = ip[11] + ((ip[11] * cospi8sqrt2minus1) >> 16); c1 = temp1 - temp2; temp1 = ip[9] + ((ip[9] * cospi8sqrt2minus1) >> 16); temp2 = (ip[11] * sinpi8sqrt2) >> 16; d1 = temp1 + temp2; temp3 = (ip[13] * sinpi8sqrt2) >> 16; temp4 = ip[15] + ((ip[15] * cospi8sqrt2minus1) >> 16); c2 = temp3 - temp4; temp3 = ip[13] + ((ip[13] * cospi8sqrt2minus1) >> 16); temp4 = (ip[15] * sinpi8sqrt2) >> 16; d2 = temp3 + temp4; op[8] = (a1 + d1 + 4) >> 3; op[11] = (a1 - d1 + 4) >> 3; op[9] = (b1 + c1 + 4) >> 3; op[10] = (b1 - c1 + 4) >> 3; a1 = ip[12] + ip[14]; b1 = ip[12] - ip[14]; op[12] = (a1 + d2 + 4) >> 3; op[15] = (a1 - d2 + 4) >> 3; op[13] = (b1 + c2 + 4) >> 3; op[14] = (b1 - c2 + 4) >> 3; ip = output; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { short a = ip[c] + pred_ptr[c]; dst_ptr[c] = cm[a]; } ip += 4; dst_ptr += dst_stride; pred_ptr += pred_stride; } } void vp8_dc_only_idct_add_dspr2(short input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int a1; int i, absa1; int t2, vector_a1, vector_a; /* a1 = ((input_dc + 4) >> 3); */ __asm__ __volatile__( "addi %[a1], %[input_dc], 4 \n\t" "sra %[a1], %[a1], 3 \n\t" : [a1] "=r"(a1) : [input_dc] "r"(input_dc)); if (a1 < 0) { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__( "abs %[absa1], %[a1] \n\t" "replv.qb %[vector_a1], %[absa1] \n\t" : [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); /* use (a1 - predptr[c]) instead a1 + predptr[c] */ for (i = 4; i--;) { __asm__ __volatile__( "lw %[t2], 0(%[pred_ptr]) \n\t" "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" "sw %[vector_a], 0(%[dst_ptr]) \n\t" "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr) : [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride), [vector_a1] "r"(vector_a1)); } } else { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t" : [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (i = 4; i--;) { __asm__ __volatile__( "lw %[t2], 0(%[pred_ptr]) \n\t" "add %[pred_ptr], %[pred_ptr], %[pred_stride] \n\t" "addu_s.qb %[vector_a], %[vector_a1], %[t2] \n\t" "sw %[vector_a], 0(%[dst_ptr]) \n\t" "add %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dst_ptr] "+&r"(dst_ptr), [pred_ptr] "+&r"(pred_ptr) : [dst_stride] "r"(dst_stride), [pred_stride] "r"(pred_stride), [vector_a1] "r"(vector_a1)); } } } void vp8_short_inv_walsh4x4_dspr2(short *input, short *mb_dqcoeff) { short output[16]; int i; int a1, b1, c1, d1; int a2, b2, c2, d2; short *ip = input; short *op = output; prefetch_load_short(ip); for (i = 4; i--;) { a1 = ip[0] + ip[12]; b1 = ip[4] + ip[8]; c1 = ip[4] - ip[8]; d1 = ip[0] - ip[12]; op[0] = a1 + b1; op[4] = c1 + d1; op[8] = a1 - b1; op[12] = d1 - c1; ip++; op++; } ip = output; op = output; prefetch_load_short(ip); for (i = 4; i--;) { a1 = ip[0] + ip[3] + 3; b1 = ip[1] + ip[2]; c1 = ip[1] - ip[2]; d1 = ip[0] - ip[3] + 3; a2 = a1 + b1; b2 = d1 + c1; c2 = a1 - b1; d2 = d1 - c1; op[0] = a2 >> 3; op[1] = b2 >> 3; op[2] = c2 >> 3; op[3] = d2 >> 3; ip += 4; op += 4; } for (i = 0; i < 16; ++i) { mb_dqcoeff[i * 16] = output[i]; } } void vp8_short_inv_walsh4x4_1_dspr2(short *input, short *mb_dqcoeff) { int a1; a1 = ((input[0] + 3) >> 3); __asm__ __volatile__( "sh %[a1], 0(%[mb_dqcoeff]) \n\t" "sh %[a1], 32(%[mb_dqcoeff]) \n\t" "sh %[a1], 64(%[mb_dqcoeff]) \n\t" "sh %[a1], 96(%[mb_dqcoeff]) \n\t" "sh %[a1], 128(%[mb_dqcoeff]) \n\t" "sh %[a1], 160(%[mb_dqcoeff]) \n\t" "sh %[a1], 192(%[mb_dqcoeff]) \n\t" "sh %[a1], 224(%[mb_dqcoeff]) \n\t" "sh %[a1], 256(%[mb_dqcoeff]) \n\t" "sh %[a1], 288(%[mb_dqcoeff]) \n\t" "sh %[a1], 320(%[mb_dqcoeff]) \n\t" "sh %[a1], 352(%[mb_dqcoeff]) \n\t" "sh %[a1], 384(%[mb_dqcoeff]) \n\t" "sh %[a1], 416(%[mb_dqcoeff]) \n\t" "sh %[a1], 448(%[mb_dqcoeff]) \n\t" "sh %[a1], 480(%[mb_dqcoeff]) \n\t" : : [a1] "r"(a1), [mb_dqcoeff] "r"(mb_dqcoeff)); } #endif libvpx-1.8.2/vp8/common/mips/dspr2/reconinter_dspr2.c000066400000000000000000000061251357355204000224730ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx/vpx_integer.h" #if HAVE_DSPR2 inline void prefetch_load_int(unsigned char *src) { __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); } __inline void vp8_copy_mem16x16_dspr2(unsigned char *RESTRICT src, int src_stride, unsigned char *RESTRICT dst, int dst_stride) { int r; unsigned int a0, a1, a2, a3; for (r = 16; r--;) { /* load src data in cache memory */ prefetch_load_int(src + src_stride); /* use unaligned memory load and store */ __asm__ __volatile__( "ulw %[a0], 0(%[src]) \n\t" "ulw %[a1], 4(%[src]) \n\t" "ulw %[a2], 8(%[src]) \n\t" "ulw %[a3], 12(%[src]) \n\t" "sw %[a0], 0(%[dst]) \n\t" "sw %[a1], 4(%[dst]) \n\t" "sw %[a2], 8(%[dst]) \n\t" "sw %[a3], 12(%[dst]) \n\t" : [a0] "=&r"(a0), [a1] "=&r"(a1), [a2] "=&r"(a2), [a3] "=&r"(a3) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } } __inline void vp8_copy_mem8x8_dspr2(unsigned char *RESTRICT src, int src_stride, unsigned char *RESTRICT dst, int dst_stride) { int r; unsigned int a0, a1; /* load src data in cache memory */ prefetch_load_int(src + src_stride); for (r = 8; r--;) { /* use unaligned memory load and store */ __asm__ __volatile__( "ulw %[a0], 0(%[src]) \n\t" "ulw %[a1], 4(%[src]) \n\t" "sw %[a0], 0(%[dst]) \n\t" "sw %[a1], 4(%[dst]) \n\t" : [a0] "=&r"(a0), [a1] "=&r"(a1) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } } __inline void vp8_copy_mem8x4_dspr2(unsigned char *RESTRICT src, int src_stride, unsigned char *RESTRICT dst, int dst_stride) { int r; unsigned int a0, a1; /* load src data in cache memory */ prefetch_load_int(src + src_stride); for (r = 4; r--;) { /* use unaligned memory load and store */ __asm__ __volatile__( "ulw %[a0], 0(%[src]) \n\t" "ulw %[a1], 4(%[src]) \n\t" "sw %[a0], 0(%[dst]) \n\t" "sw %[a1], 4(%[dst]) \n\t" : [a0] "=&r"(a0), [a1] "=&r"(a1) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } } #endif libvpx-1.8.2/vp8/common/mips/dspr2/vp8_loopfilter_filters_dspr2.c000066400000000000000000002540571357355204000250400ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp8_rtcd.h" #include "vp8/common/onyxc_int.h" #if HAVE_DSPR2 typedef unsigned char uc; /* prefetch data for load */ inline void prefetch_load_lf(unsigned char *src) { __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); } /* prefetch data for store */ inline void prefetch_store_lf(unsigned char *dst) { __asm__ __volatile__("pref 1, 0(%[dst]) \n\t" : : [dst] "r"(dst)); } /* processing 4 pixels at the same time * compute hev and mask in the same function */ static __inline void vp8_filter_mask_vec_mips( uint32_t limit, uint32_t flimit, uint32_t p1, uint32_t p0, uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t thresh, uint32_t *hev, uint32_t *mask) { uint32_t c, r, r3, r_k; uint32_t s1, s2, s3; uint32_t ones = 0xFFFFFFFF; uint32_t hev1; __asm__ __volatile__( /* mask |= (abs(p3 - p2) > limit) */ "subu_s.qb %[c], %[p3], %[p2] \n\t" "subu_s.qb %[r_k], %[p2], %[p3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], $0, %[c] \n\t" /* mask |= (abs(p2 - p1) > limit) */ "subu_s.qb %[c], %[p2], %[p1] \n\t" "subu_s.qb %[r_k], %[p1], %[p2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(p1 - p0) > limit) * hev |= (abs(p1 - p0) > thresh) */ "subu_s.qb %[c], %[p1], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], $0, %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(q1 - q0) > limit) * hev |= (abs(q1 - q0) > thresh) */ "subu_s.qb %[c], %[q1], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], %[r3], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(q2 - q1) > limit) */ "subu_s.qb %[c], %[q2], %[q1] \n\t" "subu_s.qb %[r_k], %[q1], %[q2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r3], %[r3], 24 \n\t" /* mask |= (abs(q3 - q2) > limit) */ "subu_s.qb %[c], %[q3], %[q2] \n\t" "subu_s.qb %[r_k], %[q2], %[q3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3) : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3), [thresh] "r"(thresh)); __asm__ __volatile__( /* abs(p0 - q0) */ "subu_s.qb %[c], %[p0], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[p0] \n\t" "wrdsp %[r3] \n\t" "or %[s1], %[r_k], %[c] \n\t" /* abs(p1 - q1) */ "subu_s.qb %[c], %[p1], %[q1] \n\t" "addu_s.qb %[s3], %[s1], %[s1] \n\t" "pick.qb %[hev1], %[ones], $0 \n\t" "subu_s.qb %[r_k], %[q1], %[p1] \n\t" "or %[s2], %[r_k], %[c] \n\t" /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ "shrl.qb %[s2], %[s2], 1 \n\t" "addu_s.qb %[s1], %[s2], %[s3] \n\t" "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r], %[r], 24 \n\t" "wrdsp %[r] \n\t" "pick.qb %[s2], $0, %[ones] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1), [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3) : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1), [ones] "r"(ones), [flimit] "r"(flimit)); *hev = hev1; *mask = s2; } /* inputs & outputs are quad-byte vectors */ static __inline void vp8_filter_mips(uint32_t mask, uint32_t hev, uint32_t *ps1, uint32_t *ps0, uint32_t *qs0, uint32_t *qs1) { int32_t vp8_filter_l, vp8_filter_r; int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; int32_t subr_r, subr_l; uint32_t t1, t2, HWM, t3; uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; int32_t vps1, vps0, vqs0, vqs1; int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; uint32_t N128; N128 = 0x80808080; t1 = 0x03000300; t2 = 0x04000400; t3 = 0x01000100; HWM = 0xFF00FF00; vps0 = (*ps0) ^ N128; vps1 = (*ps1) ^ N128; vqs0 = (*qs0) ^ N128; vqs1 = (*qs1) ^ N128; /* use halfword pairs instead quad-bytes because of accuracy */ vps0_l = vps0 & HWM; vps0_r = vps0 << 8; vps0_r = vps0_r & HWM; vps1_l = vps1 & HWM; vps1_r = vps1 << 8; vps1_r = vps1_r & HWM; vqs0_l = vqs0 & HWM; vqs0_r = vqs0 << 8; vqs0_r = vqs0_r & HWM; vqs1_l = vqs1 & HWM; vqs1_r = vqs1 << 8; vqs1_r = vqs1_r & HWM; mask_l = mask & HWM; mask_r = mask << 8; mask_r = mask_r & HWM; hev_l = hev & HWM; hev_r = hev << 8; hev_r = hev_r & HWM; __asm__ __volatile__( /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" /* vp8_filter &= hev; */ "and %[vp8_filter_l], %[vp8_filter_l], %[hev_l] \n\t" "and %[vp8_filter_r], %[vp8_filter_r], %[hev_r] \n\t" /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" /* vp8_filter &= mask; */ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" : [vp8_filter_l] "=&r"(vp8_filter_l), [vp8_filter_r] "=&r"(vp8_filter_r), [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r), [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r) : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l), [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r), [HWM] "r"(HWM)); /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__( /* Filter2 = vp8_signed_char_clamp(vp8_filter + 3) >>= 3; */ "addq_s.ph %[Filter1_l], %[vp8_filter_l], %[t2] \n\t" "addq_s.ph %[Filter1_r], %[vp8_filter_r], %[t2] \n\t" /* Filter1 = vp8_signed_char_clamp(vp8_filter + 4) >>= 3; */ "addq_s.ph %[Filter2_l], %[vp8_filter_l], %[t1] \n\t" "addq_s.ph %[Filter2_r], %[vp8_filter_r], %[t1] \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r) : [t1] "r"(t1), [t2] "r"(t2), [vp8_filter_l] "r"(vp8_filter_l), [vp8_filter_r] "r"(vp8_filter_r), [HWM] "r"(HWM)); __asm__ __volatile__( /* (vp8_filter += 1) >>= 1 */ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" /* vp8_filter &= ~hev; */ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" /* vps1 = vp8_signed_char_clamp(ps1 + vp8_filter); */ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" /* vqs1 = vp8_signed_char_clamp(qs1 - vp8_filter); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r), [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), [vqs1_r] "+r"(vqs1_r) : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); /* Create quad-bytes from halfword pairs */ vqs0_l = vqs0_l & HWM; vqs1_l = vqs1_l & HWM; vps0_l = vps0_l & HWM; vps1_l = vps1_l & HWM; __asm__ __volatile__( "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r), [vqs0_r] "+r"(vqs0_r) :); vqs0 = vqs0_l | vqs0_r; vqs1 = vqs1_l | vqs1_r; vps0 = vps0_l | vps0_r; vps1 = vps1_l | vps1_r; *ps0 = vps0 ^ N128; *ps1 = vps1 ^ N128; *qs0 = vqs0 ^ N128; *qs1 = vqs1 ^ N128; } void vp8_loop_filter_horizontal_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { uint32_t mask; uint32_t hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; (void)count; mask = 0; hev = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; /* prefetch data for store */ prefetch_store_lf(s); /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ sm1 = s - (p << 2); s0 = s - p - p - p; s1 = s - p - p; s2 = s - p; s3 = s; s4 = s + p; s5 = s + p + p; s6 = s + p + p + p; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } } void vp8_loop_filter_uvhorizontal_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { uint32_t mask; uint32_t hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; (void)count; mask = 0; hev = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ sm1 = s - (p << 2); s0 = s - p - p - p; s1 = s - p - p; s2 = s - p; s3 = s; s4 = s + p; s5 = s + p + p; s6 = s + p + p + p; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood */ *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; } } } void vp8_loop_filter_vertical_edge_mips(unsigned char *s, int p, const unsigned int flimit, const unsigned int limit, const unsigned int thresh, int count) { int i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; hev = 0; mask = 0; i = 0; pm1 = 0; p0 = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; p5 = 0; p6 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ /* apply filter on 4 pixesl at the same time */ do { /* prefetch data for store */ prefetch_store_lf(s + p); s1 = s; s2 = s + p; s3 = s2 + p; s4 = s3 + p; s = s4 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" : [p1] "+r"(p1) : [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1)); } } s1 = s; s2 = s + p; s3 = s2 + p; s4 = s3 + p; s = s4 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" : [p1] "+r"(p1) : [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1)); } } i += 8; } while (i < count); } void vp8_loop_filter_uvvertical_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; (void)count; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ /* apply filter on 4 pixesl at the same time */ s1 = s; s2 = s + p; s3 = s2 + p; s4 = s3 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" : [p1] "+r"(p1) : [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1)); } } s1 = s4 + p; s2 = s1 + p; s3 = s2 + p; s4 = s3 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_filter_mips(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" : [p1] "+r"(p1) : [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1)); } } } /* inputs & outputs are quad-byte vectors */ static __inline void vp8_mbfilter_mips(uint32_t mask, uint32_t hev, uint32_t *ps2, uint32_t *ps1, uint32_t *ps0, uint32_t *qs0, uint32_t *qs1, uint32_t *qs2) { int32_t vps2, vps1, vps0, vqs0, vqs1, vqs2; int32_t vps2_l, vps1_l, vps0_l, vqs0_l, vqs1_l, vqs2_l; int32_t vps2_r, vps1_r, vps0_r, vqs0_r, vqs1_r, vqs2_r; uint32_t HWM, vp8_filter_l, vp8_filter_r, mask_l, mask_r, hev_l, hev_r, subr_r, subr_l; uint32_t Filter2_l, Filter2_r, t1, t2, Filter1_l, Filter1_r, invhev_l, invhev_r; uint32_t N128, R63; uint32_t u1_l, u1_r, u2_l, u2_r, u3_l, u3_r; R63 = 0x003F003F; HWM = 0xFF00FF00; N128 = 0x80808080; t1 = 0x03000300; t2 = 0x04000400; vps0 = (*ps0) ^ N128; vps1 = (*ps1) ^ N128; vps2 = (*ps2) ^ N128; vqs0 = (*qs0) ^ N128; vqs1 = (*qs1) ^ N128; vqs2 = (*qs2) ^ N128; /* use halfword pairs instead quad-bytes because of accuracy */ vps0_l = vps0 & HWM; vps0_r = vps0 << 8; vps0_r = vps0_r & HWM; vqs0_l = vqs0 & HWM; vqs0_r = vqs0 << 8; vqs0_r = vqs0_r & HWM; vps1_l = vps1 & HWM; vps1_r = vps1 << 8; vps1_r = vps1_r & HWM; vqs1_l = vqs1 & HWM; vqs1_r = vqs1 << 8; vqs1_r = vqs1_r & HWM; vqs2_l = vqs2 & HWM; vqs2_r = vqs2 << 8; vqs2_r = vqs2_r & HWM; __asm__ __volatile__( /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" /* vp8_filter = vp8_signed_char_clamp(ps1 - qs1); */ "subq_s.ph %[vp8_filter_l], %[vps1_l], %[vqs1_l] \n\t" "subq_s.ph %[vp8_filter_r], %[vps1_r], %[vqs1_r] \n\t" : [vp8_filter_l] "=&r"(vp8_filter_l), [vp8_filter_r] "=r"(vp8_filter_r), [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r) : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r)); vps2_l = vps2 & HWM; vps2_r = vps2 << 8; vps2_r = vps2_r & HWM; /* add outer taps if we have high edge variance */ __asm__ __volatile__( /* vp8_filter = vp8_signed_char_clamp(vp8_filter + 3 * (qs0 - ps0)); */ "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" "and %[mask_l], %[HWM], %[mask] \n\t" "sll %[mask_r], %[mask], 8 \n\t" "and %[mask_r], %[HWM], %[mask_r] \n\t" "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" "and %[hev_l], %[HWM], %[hev] \n\t" "sll %[hev_r], %[hev], 8 \n\t" "and %[hev_r], %[HWM], %[hev_r] \n\t" "addq_s.ph %[vp8_filter_l], %[vp8_filter_l], %[subr_l] \n\t" "addq_s.ph %[vp8_filter_r], %[vp8_filter_r], %[subr_r] \n\t" /* vp8_filter &= mask; */ "and %[vp8_filter_l], %[vp8_filter_l], %[mask_l] \n\t" "and %[vp8_filter_r], %[vp8_filter_r], %[mask_r] \n\t" /* Filter2 = vp8_filter & hev; */ "and %[Filter2_l], %[vp8_filter_l], %[hev_l] \n\t" "and %[Filter2_r], %[vp8_filter_r], %[hev_r] \n\t" : [vp8_filter_l] "+r"(vp8_filter_l), [vp8_filter_r] "+r"(vp8_filter_r), [hev_l] "=&r"(hev_l), [hev_r] "=&r"(hev_r), [mask_l] "=&r"(mask_l), [mask_r] "=&r"(mask_r), [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r) : [subr_l] "r"(subr_l), [subr_r] "r"(subr_r), [HWM] "r"(HWM), [hev] "r"(hev), [mask] "r"(mask)); /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__( /* Filter1 = vp8_signed_char_clamp(Filter2 + 4) >>= 3; */ "addq_s.ph %[Filter1_l], %[Filter2_l], %[t2] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" "addq_s.ph %[Filter1_r], %[Filter2_r], %[t2] \n\t" /* Filter2 = vp8_signed_char_clamp(Filter2 + 3) >>= 3; */ "addq_s.ph %[Filter2_l], %[Filter2_l], %[t1] \n\t" "addq_s.ph %[Filter2_r], %[Filter2_r], %[t1] \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" /* qs0 = vp8_signed_char_clamp(qs0 - Filter1); */ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" /* ps0 = vp8_signed_char_clamp(ps0 + Filter2); */ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" : [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r), [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), [Filter2_l] "+r"(Filter2_l), [Filter2_r] "+r"(Filter2_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r) : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r)); /* only apply wider filter if not high edge variance */ __asm__ __volatile__( /* vp8_filter &= ~hev; */ "and %[Filter2_l], %[vp8_filter_l], %[invhev_l] \n\t" "and %[Filter2_r], %[vp8_filter_r], %[invhev_r] \n\t" "shra.ph %[Filter2_l], %[Filter2_l], 8 \n\t" "shra.ph %[Filter2_r], %[Filter2_r], 8 \n\t" : [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r) : [vp8_filter_l] "r"(vp8_filter_l), [vp8_filter_r] "r"(vp8_filter_r), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); /* roughly 3/7th difference across boundary */ __asm__ __volatile__( "shll.ph %[u3_l], %[Filter2_l], 3 \n\t" "shll.ph %[u3_r], %[Filter2_r], 3 \n\t" "addq.ph %[u3_l], %[u3_l], %[Filter2_l] \n\t" "addq.ph %[u3_r], %[u3_r], %[Filter2_r] \n\t" "shll.ph %[u2_l], %[u3_l], 1 \n\t" "shll.ph %[u2_r], %[u3_r], 1 \n\t" "addq.ph %[u1_l], %[u3_l], %[u2_l] \n\t" "addq.ph %[u1_r], %[u3_r], %[u2_r] \n\t" "addq.ph %[u2_l], %[u2_l], %[R63] \n\t" "addq.ph %[u2_r], %[u2_r], %[R63] \n\t" "addq.ph %[u3_l], %[u3_l], %[R63] \n\t" "addq.ph %[u3_r], %[u3_r], %[R63] \n\t" /* vp8_signed_char_clamp((63 + Filter2 * 27) >> 7) * vp8_signed_char_clamp((63 + Filter2 * 18) >> 7) */ "addq.ph %[u1_l], %[u1_l], %[R63] \n\t" "addq.ph %[u1_r], %[u1_r], %[R63] \n\t" "shra.ph %[u1_l], %[u1_l], 7 \n\t" "shra.ph %[u1_r], %[u1_r], 7 \n\t" "shra.ph %[u2_l], %[u2_l], 7 \n\t" "shra.ph %[u2_r], %[u2_r], 7 \n\t" "shll.ph %[u1_l], %[u1_l], 8 \n\t" "shll.ph %[u1_r], %[u1_r], 8 \n\t" "shll.ph %[u2_l], %[u2_l], 8 \n\t" "shll.ph %[u2_r], %[u2_r], 8 \n\t" /* vqs0 = vp8_signed_char_clamp(qs0 - u); */ "subq_s.ph %[vqs0_l], %[vqs0_l], %[u1_l] \n\t" "subq_s.ph %[vqs0_r], %[vqs0_r], %[u1_r] \n\t" /* vps0 = vp8_signed_char_clamp(ps0 + u); */ "addq_s.ph %[vps0_l], %[vps0_l], %[u1_l] \n\t" "addq_s.ph %[vps0_r], %[vps0_r], %[u1_r] \n\t" : [u1_l] "=&r"(u1_l), [u1_r] "=&r"(u1_r), [u2_l] "=&r"(u2_l), [u2_r] "=&r"(u2_r), [u3_l] "=&r"(u3_l), [u3_r] "=&r"(u3_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r) : [R63] "r"(R63), [Filter2_l] "r"(Filter2_l), [Filter2_r] "r"(Filter2_r)); __asm__ __volatile__( /* vqs1 = vp8_signed_char_clamp(qs1 - u); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[u2_l] \n\t" "addq_s.ph %[vps1_l], %[vps1_l], %[u2_l] \n\t" /* vps1 = vp8_signed_char_clamp(ps1 + u); */ "addq_s.ph %[vps1_r], %[vps1_r], %[u2_r] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[u2_r] \n\t" : [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), [vqs1_r] "+r"(vqs1_r) : [u2_l] "r"(u2_l), [u2_r] "r"(u2_r)); /* roughly 1/7th difference across boundary */ __asm__ __volatile__( /* u = vp8_signed_char_clamp((63 + Filter2 * 9) >> 7); */ "shra.ph %[u3_l], %[u3_l], 7 \n\t" "shra.ph %[u3_r], %[u3_r], 7 \n\t" "shll.ph %[u3_l], %[u3_l], 8 \n\t" "shll.ph %[u3_r], %[u3_r], 8 \n\t" /* vqs2 = vp8_signed_char_clamp(qs2 - u); */ "subq_s.ph %[vqs2_l], %[vqs2_l], %[u3_l] \n\t" "subq_s.ph %[vqs2_r], %[vqs2_r], %[u3_r] \n\t" /* vps2 = vp8_signed_char_clamp(ps2 + u); */ "addq_s.ph %[vps2_l], %[vps2_l], %[u3_l] \n\t" "addq_s.ph %[vps2_r], %[vps2_r], %[u3_r] \n\t" : [u3_l] "+r"(u3_l), [u3_r] "+r"(u3_r), [vps2_l] "+r"(vps2_l), [vps2_r] "+r"(vps2_r), [vqs2_l] "+r"(vqs2_l), [vqs2_r] "+r"(vqs2_r) :); /* Create quad-bytes from halfword pairs */ __asm__ __volatile__( "and %[vqs0_l], %[vqs0_l], %[HWM] \n\t" "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" "and %[vps0_l], %[vps0_l], %[HWM] \n\t" "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" "and %[vqs1_l], %[vqs1_l], %[HWM] \n\t" "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" "and %[vps1_l], %[vps1_l], %[HWM] \n\t" "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" "and %[vqs2_l], %[vqs2_l], %[HWM] \n\t" "shrl.ph %[vqs2_r], %[vqs2_r], 8 \n\t" "and %[vps2_l], %[vps2_l], %[HWM] \n\t" "shrl.ph %[vps2_r], %[vps2_r], 8 \n\t" "or %[vqs0_r], %[vqs0_l], %[vqs0_r] \n\t" "or %[vps0_r], %[vps0_l], %[vps0_r] \n\t" "or %[vqs1_r], %[vqs1_l], %[vqs1_r] \n\t" "or %[vps1_r], %[vps1_l], %[vps1_r] \n\t" "or %[vqs2_r], %[vqs2_l], %[vqs2_r] \n\t" "or %[vps2_r], %[vps2_l], %[vps2_r] \n\t" : [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), [vqs1_r] "+r"(vqs1_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r), [vqs2_l] "+r"(vqs2_l), [vqs2_r] "+r"(vqs2_r), [vps2_r] "+r"(vps2_r), [vps2_l] "+r"(vps2_l) : [HWM] "r"(HWM)); *ps0 = vps0_r ^ N128; *ps1 = vps1_r ^ N128; *ps2 = vps2_r ^ N128; *qs0 = vqs0_r ^ N128; *qs1 = vqs1_r ^ N128; *qs2 = vqs2_r ^ N128; } void vp8_mbloop_filter_horizontal_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { int i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; mask = 0; hev = 0; i = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ sm1 = s - (p << 2); s0 = s - p - p - p; s1 = s - p - p; s2 = s - p; s3 = s; s4 = s + p; s5 = s + p + p; s6 = s + p + p + p; /* prefetch data for load */ prefetch_load_lf(s + p); /* apply filter on 4 pixesl at the same time */ do { /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* unpack processed 4x4 neighborhood * memory is 4 byte aligned */ *((uint32_t *)s0) = p0; *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; *((uint32_t *)s5) = p5; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* unpack processed 4x4 neighborhood * memory is 4 byte aligned */ *((uint32_t *)s0) = p0; *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; *((uint32_t *)s5) = p5; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; i += 8; } while (i < count); } void vp8_mbloop_filter_uvhorizontal_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; (void)count; mask = 0; hev = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ sm1 = s - (p << 2); s0 = s - p - p - p; s1 = s - p - p; s2 = s - p; s3 = s; s4 = s + p; s5 = s + p + p; s6 = s + p + p + p; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); /* if mask == 0 do filtering is not needed */ vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* unpack processed 4x4 neighborhood * memory is 4 byte aligned */ *((uint32_t *)s0) = p0; *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; *((uint32_t *)s5) = p5; } } sm1 += 4; s0 += 4; s1 += 4; s2 += 4; s3 += 4; s4 += 4; s5 += 4; s6 += 4; /* load quad-byte vectors * memory is 4 byte aligned */ p1 = *((uint32_t *)(s1)); p2 = *((uint32_t *)(s2)); p3 = *((uint32_t *)(s3)); p4 = *((uint32_t *)(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { pm1 = *((uint32_t *)(sm1)); p0 = *((uint32_t *)(s0)); p5 = *((uint32_t *)(s5)); p6 = *((uint32_t *)(s6)); vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* unpack processed 4x4 neighborhood * memory is 4 byte aligned */ *((uint32_t *)s0) = p0; *((uint32_t *)s1) = p1; *((uint32_t *)s2) = p2; *((uint32_t *)s3) = p3; *((uint32_t *)s4) = p4; *((uint32_t *)s5) = p5; } } } void vp8_mbloop_filter_vertical_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { int i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; mask = 0; hev = 0; i = 0; pm1 = 0; p0 = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; p5 = 0; p6 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ /* apply filter on 4 pixesl at the same time */ do { s1 = s; s2 = s + p; s3 = s2 + p; s4 = s3 + p; s = s4 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p5], 2(%[s4]) \n\t" "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" "sb %[p0], -3(%[s4]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s3]) \n\t" "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" "sb %[p0], -3(%[s3]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s2]) \n\t" "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" "sb %[p0], -3(%[s2]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s1]) \n\t" "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" "sb %[p0], -3(%[s1]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); } } i += 4; } while (i < count); } void vp8_mbloop_filter_uvvertical_edge_mips(unsigned char *s, int p, unsigned int flimit, unsigned int limit, unsigned int thresh, int count) { uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; unsigned char *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; (void)count; mask = 0; hev = 0; pm1 = 0; p0 = 0; p1 = 0; p2 = 0; p3 = 0; p4 = 0; p5 = 0; p6 = 0; /* loop filter designed to work using chars so that we can make maximum use * of 8 bit simd instructions. */ /* apply filter on 4 pixesl at the same time */ s1 = s; s2 = s + p; s3 = s2 + p; s4 = s3 + p; /* prefetch data for load */ prefetch_load_lf(s + 2 * p); /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p5], 2(%[s4]) \n\t" "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" "sb %[p0], -3(%[s4]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s3]) \n\t" "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" "sb %[p0], -3(%[s3]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s2]) \n\t" "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" "sb %[p0], -3(%[s2]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s1]) \n\t" "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" "sb %[p0], -3(%[s1]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); } } s1 = s4 + p; s2 = s1 + p; s3 = s2 + p; s4 = s3 + p; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { vp8_filter_mask_vec_mips(limit, flimit, p1, p2, pm1, p0, p3, p4, p5, p6, thresh, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ vp8_mbfilter_mips(mask, hev, &p0, &p1, &p2, &p3, &p4, &p5); /* don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p5], 2(%[s4]) \n\t" "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" "sb %[p0], -3(%[s4]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s4] "r"(s4), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s3]) \n\t" "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" "sb %[p0], -3(%[s3]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s3] "r"(s3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s2]) \n\t" "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" "sb %[p0], -3(%[s2]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s2] "r"(s2), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); __asm__ __volatile__( "srl %[p5], %[p5], 8 \n\t" "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" "srl %[p0], %[p0], 8 \n\t" : [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0) :); __asm__ __volatile__( "sb %[p5], 2(%[s1]) \n\t" "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" "sb %[p0], -3(%[s1]) \n\t" : : [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [s1] "r"(s1), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0)); } } } /* Horizontal MB filtering */ void vp8_loop_filter_mbh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned int thresh_vec, flimit_vec, limit_vec; unsigned char thresh, flimit, limit, flimit_temp; /* use direct value instead pointers */ limit = *(lfi->lim); flimit_temp = *(lfi->mblim); thresh = *(lfi->hev_thr); flimit = flimit_temp; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[thresh] \n\t" "replv.qb %[flimit_vec], %[flimit] \n\t" "replv.qb %[limit_vec], %[limit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [thresh] "r"(thresh), [flimit] "r"(flimit), [limit] "r"(limit)); vp8_mbloop_filter_horizontal_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); if (u_ptr) { vp8_mbloop_filter_uvhorizontal_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); } if (v_ptr) { vp8_mbloop_filter_uvhorizontal_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); } } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned int thresh_vec, flimit_vec, limit_vec; unsigned char thresh, flimit, limit, flimit_temp; /* use direct value instead pointers */ limit = *(lfi->lim); flimit_temp = *(lfi->mblim); thresh = *(lfi->hev_thr); flimit = flimit_temp; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[thresh] \n\t" "replv.qb %[flimit_vec], %[flimit] \n\t" "replv.qb %[limit_vec], %[limit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [thresh] "r"(thresh), [flimit] "r"(flimit), [limit] "r"(limit)); vp8_mbloop_filter_vertical_edge_mips(y_ptr, y_stride, flimit_vec, limit_vec, thresh_vec, 16); if (u_ptr) vp8_mbloop_filter_uvvertical_edge_mips(u_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); if (v_ptr) vp8_mbloop_filter_uvvertical_edge_mips(v_ptr, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned int thresh_vec, flimit_vec, limit_vec; unsigned char thresh, flimit, limit, flimit_temp; /* use direct value instead pointers */ limit = *(lfi->lim); flimit_temp = *(lfi->blim); thresh = *(lfi->hev_thr); flimit = flimit_temp; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[thresh] \n\t" "replv.qb %[flimit_vec], %[flimit] \n\t" "replv.qb %[limit_vec], %[limit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [thresh] "r"(thresh), [flimit] "r"(flimit), [limit] "r"(limit)); vp8_loop_filter_horizontal_edge_mips(y_ptr + 4 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); vp8_loop_filter_horizontal_edge_mips(y_ptr + 8 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); vp8_loop_filter_horizontal_edge_mips(y_ptr + 12 * y_stride, y_stride, flimit_vec, limit_vec, thresh_vec, 16); if (u_ptr) vp8_loop_filter_uvhorizontal_edge_mips( u_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); if (v_ptr) vp8_loop_filter_uvhorizontal_edge_mips( v_ptr + 4 * uv_stride, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); } /* Vertical B Filtering */ void vp8_loop_filter_bv_dspr2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { unsigned int thresh_vec, flimit_vec, limit_vec; unsigned char thresh, flimit, limit, flimit_temp; /* use direct value instead pointers */ limit = *(lfi->lim); flimit_temp = *(lfi->blim); thresh = *(lfi->hev_thr); flimit = flimit_temp; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[thresh] \n\t" "replv.qb %[flimit_vec], %[flimit] \n\t" "replv.qb %[limit_vec], %[limit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [thresh] "r"(thresh), [flimit] "r"(flimit), [limit] "r"(limit)); vp8_loop_filter_vertical_edge_mips(y_ptr + 4, y_stride, flimit_vec, limit_vec, thresh_vec, 16); vp8_loop_filter_vertical_edge_mips(y_ptr + 8, y_stride, flimit_vec, limit_vec, thresh_vec, 16); vp8_loop_filter_vertical_edge_mips(y_ptr + 12, y_stride, flimit_vec, limit_vec, thresh_vec, 16); if (u_ptr) vp8_loop_filter_uvvertical_edge_mips(u_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); if (v_ptr) vp8_loop_filter_uvvertical_edge_mips(v_ptr + 4, uv_stride, flimit_vec, limit_vec, thresh_vec, 0); } #endif libvpx-1.8.2/vp8/common/mips/mmi/000077500000000000000000000000001357355204000165715ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/mips/mmi/copymem_mmi.c000066400000000000000000000112401357355204000212460ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_ports/asmdefs_mmi.h" #define COPY_MEM_16X2 \ "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t" \ "ldl %[tmp0], 0x0f(%[src]) \n\t" \ "ldr %[tmp0], 0x08(%[src]) \n\t" \ MMI_ADDU(%[src], %[src], %[src_stride]) \ "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t" \ "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t" \ "sdl %[tmp0], 0x0f(%[dst]) \n\t" \ "sdr %[tmp0], 0x08(%[dst]) \n\t" \ MMI_ADDU(%[dst], %[dst], %[dst_stride]) \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "ldl %[tmp1], 0x0f(%[src]) \n\t" \ "ldr %[tmp1], 0x08(%[src]) \n\t" \ MMI_ADDU(%[src], %[src], %[src_stride]) \ "gssdlc1 %[ftmp1], 0x07(%[dst]) \n\t" \ "gssdrc1 %[ftmp1], 0x00(%[dst]) \n\t" \ "sdl %[tmp1], 0x0f(%[dst]) \n\t" \ "sdr %[tmp1], 0x08(%[dst]) \n\t" \ MMI_ADDU(%[dst], %[dst], %[dst_stride]) #define COPY_MEM_8X2 \ "gsldlc1 %[ftmp0], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp0], 0x00(%[src]) \n\t" \ MMI_ADDU(%[src], %[src], %[src_stride]) \ "ldl %[tmp0], 0x07(%[src]) \n\t" \ "ldr %[tmp0], 0x00(%[src]) \n\t" \ MMI_ADDU(%[src], %[src], %[src_stride]) \ \ "gssdlc1 %[ftmp0], 0x07(%[dst]) \n\t" \ "gssdrc1 %[ftmp0], 0x00(%[dst]) \n\t" \ MMI_ADDU(%[dst], %[dst], %[dst_stride]) \ "sdl %[tmp0], 0x07(%[dst]) \n\t" \ "sdr %[tmp0], 0x00(%[dst]) \n\t" \ MMI_ADDU(%[dst], %[dst], %[dst_stride]) void vp8_copy_mem16x16_mmi(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { double ftmp[2]; uint64_t tmp[2]; uint8_t loop_count = 4; /* clang-format off */ __asm__ volatile ( "1: \n\t" COPY_MEM_16X2 COPY_MEM_16X2 MMI_ADDIU(%[loop_count], %[loop_count], -0x01) "bnez %[loop_count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [loop_count]"+&r"(loop_count), [dst]"+&r"(dst), [src]"+&r"(src) : [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); /* clang-format on */ } void vp8_copy_mem8x8_mmi(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { double ftmp[2]; uint64_t tmp[1]; uint8_t loop_count = 4; /* clang-format off */ __asm__ volatile ( "1: \n\t" COPY_MEM_8X2 MMI_ADDIU(%[loop_count], %[loop_count], -0x01) "bnez %[loop_count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [tmp0]"=&r"(tmp[0]), [loop_count]"+&r"(loop_count), [dst]"+&r"(dst), [src]"+&r"(src) : [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); /* clang-format on */ } void vp8_copy_mem8x4_mmi(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { double ftmp[2]; uint64_t tmp[1]; /* clang-format off */ __asm__ volatile ( COPY_MEM_8X2 COPY_MEM_8X2 : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [tmp0]"=&r"(tmp[0]), [dst]"+&r"(dst), [src]"+&r"(src) : [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); /* clang-format on */ } libvpx-1.8.2/vp8/common/mips/mmi/dequantize_mmi.c000066400000000000000000000135141357355204000217540ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/blockd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/asmdefs_mmi.h" void vp8_dequantize_b_mmi(BLOCKD *d, int16_t *DQC) { double ftmp[8]; __asm__ volatile( "gsldlc1 %[ftmp0], 0x07(%[qcoeff]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[qcoeff]) \n\t" "gsldlc1 %[ftmp1], 0x0f(%[qcoeff]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[qcoeff]) \n\t" "gsldlc1 %[ftmp2], 0x17(%[qcoeff]) \n\t" "gsldrc1 %[ftmp2], 0x10(%[qcoeff]) \n\t" "gsldlc1 %[ftmp3], 0x1f(%[qcoeff]) \n\t" "gsldrc1 %[ftmp3], 0x18(%[qcoeff]) \n\t" "gsldlc1 %[ftmp4], 0x07(%[DQC]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[DQC]) \n\t" "gsldlc1 %[ftmp5], 0x0f(%[DQC]) \n\t" "gsldrc1 %[ftmp5], 0x08(%[DQC]) \n\t" "gsldlc1 %[ftmp6], 0x17(%[DQC]) \n\t" "gsldrc1 %[ftmp6], 0x10(%[DQC]) \n\t" "gsldlc1 %[ftmp7], 0x1f(%[DQC]) \n\t" "gsldrc1 %[ftmp7], 0x18(%[DQC]) \n\t" "pmullh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "gssdlc1 %[ftmp0], 0x07(%[dqcoeff]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[dqcoeff]) \n\t" "gssdlc1 %[ftmp1], 0x0f(%[dqcoeff]) \n\t" "gssdrc1 %[ftmp1], 0x08(%[dqcoeff]) \n\t" "gssdlc1 %[ftmp2], 0x17(%[dqcoeff]) \n\t" "gssdrc1 %[ftmp2], 0x10(%[dqcoeff]) \n\t" "gssdlc1 %[ftmp3], 0x1f(%[dqcoeff]) \n\t" "gssdrc1 %[ftmp3], 0x18(%[dqcoeff]) \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]) : [dqcoeff] "r"(d->dqcoeff), [qcoeff] "r"(d->qcoeff), [DQC] "r"(DQC) : "memory"); } void vp8_dequant_idct_add_mmi(int16_t *input, int16_t *dq, unsigned char *dest, int stride) { double ftmp[8]; __asm__ volatile( "gsldlc1 %[ftmp0], 0x07(%[dq]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[dq]) \n\t" "gsldlc1 %[ftmp1], 0x0f(%[dq]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[dq]) \n\t" "gsldlc1 %[ftmp2], 0x17(%[dq]) \n\t" "gsldrc1 %[ftmp2], 0x10(%[dq]) \n\t" "gsldlc1 %[ftmp3], 0x1f(%[dq]) \n\t" "gsldrc1 %[ftmp3], 0x18(%[dq]) \n\t" "gsldlc1 %[ftmp4], 0x07(%[input]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[input]) \n\t" "gsldlc1 %[ftmp5], 0x0f(%[input]) \n\t" "gsldrc1 %[ftmp5], 0x08(%[input]) \n\t" "gsldlc1 %[ftmp6], 0x17(%[input]) \n\t" "gsldrc1 %[ftmp6], 0x10(%[input]) \n\t" "gsldlc1 %[ftmp7], 0x1f(%[input]) \n\t" "gsldrc1 %[ftmp7], 0x18(%[input]) \n\t" "pmullh %[ftmp0], %[ftmp0], %[ftmp4] \n\t" "pmullh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "pmullh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" "pmullh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "gssdlc1 %[ftmp0], 0x07(%[input]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[input]) \n\t" "gssdlc1 %[ftmp1], 0x0f(%[input]) \n\t" "gssdrc1 %[ftmp1], 0x08(%[input]) \n\t" "gssdlc1 %[ftmp2], 0x17(%[input]) \n\t" "gssdrc1 %[ftmp2], 0x10(%[input]) \n\t" "gssdlc1 %[ftmp3], 0x1f(%[input]) \n\t" "gssdrc1 %[ftmp3], 0x18(%[input]) \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]) : [dq] "r"(dq), [input] "r"(input) : "memory"); vp8_short_idct4x4llm_mmi(input, dest, stride, dest, stride); __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gssdlc1 %[ftmp0], 0x07(%[input]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[input]) \n\t" "sdl $0, 0x0f(%[input]) \n\t" "sdr $0, 0x08(%[input]) \n\t" "gssdlc1 %[ftmp0], 0x17(%[input]) \n\t" "gssdrc1 %[ftmp0], 0x10(%[input]) \n\t" "sdl $0, 0x1f(%[input]) \n\t" "sdr $0, 0x18(%[input]) \n\t" : [ftmp0] "=&f"(ftmp[0]) : [input] "r"(input) : "memory"); } libvpx-1.8.2/vp8/common/mips/mmi/idct_blk_mmi.c000066400000000000000000000034751357355204000213630ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" void vp8_dequant_idct_add_y_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst, int stride, char *eobs) { int i, j; for (i = 0; i < 4; i++) { for (j = 0; j < 4; j++) { if (*eobs++ > 1) { vp8_dequant_idct_add_mmi(q, dq, dst, stride); } else { vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst, stride, dst, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst += 4; } dst += 4 * stride - 16; } } void vp8_dequant_idct_add_uv_block_mmi(int16_t *q, int16_t *dq, uint8_t *dst_u, uint8_t *dst_v, int stride, char *eobs) { int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { vp8_dequant_idct_add_mmi(q, dq, dst_u, stride); } else { vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst_u, stride, dst_u, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst_u += 4; } dst_u += 4 * stride - 8; } for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { if (*eobs++ > 1) { vp8_dequant_idct_add_mmi(q, dq, dst_v, stride); } else { vp8_dc_only_idct_add_mmi(q[0] * dq[0], dst_v, stride, dst_v, stride); memset(q, 0, 2 * sizeof(q[0])); } q += 16; dst_v += 4; } dst_v += 4 * stride - 8; } } libvpx-1.8.2/vp8/common/mips/mmi/idctllm_mmi.c000066400000000000000000000412141357355204000212310ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_ports/asmdefs_mmi.h" #define TRANSPOSE_4H \ "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" \ MMI_LI(%[tmp0], 0x93) \ "mtc1 %[tmp0], %[ftmp10] \n\t" \ "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ "punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t" void vp8_short_idct4x4llm_mmi(int16_t *input, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { double ftmp[12]; uint32_t tmp[0]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_04) = { 0x0004000400040004ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_4e7b) = { 0x4e7b4e7b4e7b4e7bULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_22a3) = { 0x22a322a322a322a3ULL }; __asm__ volatile ( MMI_LI(%[tmp0], 0x02) "mtc1 %[tmp0], %[ftmp11] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t" "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t" "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t" "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t" "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t" // ip[0...3] + ip[8...11] "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" // ip[0...3] - ip[8...11] "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" // (ip[12...15] * sinpi8sqrt2) >> 16 "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" "pmulhh %[ftmp7], %[ftmp9], %[ff_ph_22a3] \n\t" // (ip[ 4... 7] * sinpi8sqrt2) >> 16 "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" "pmulhh %[ftmp8], %[ftmp9], %[ff_ph_22a3] \n\t" // ip[ 4... 7] + ((ip[ 4... 7] * cospi8sqrt2minus1) >> 16) "pmulhh %[ftmp9], %[ftmp2], %[ff_ph_4e7b] \n\t" "paddh %[ftmp9], %[ftmp9], %[ftmp2] \n\t" // ip[12...15] + ((ip[12...15] * cospi8sqrt2minus1) >> 16) "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t" "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" "psubh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" "paddh %[ftmp3], %[ftmp3], %[ftmp10] \n\t" "psubh %[ftmp4], %[ftmp5], %[ftmp7] \n\t" "psubh %[ftmp4], %[ftmp4], %[ftmp9] \n\t" TRANSPOSE_4H // a "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" // b "psubh %[ftmp6], %[ftmp1], %[ftmp3] \n\t" // c "psllh %[ftmp9], %[ftmp2], %[ftmp11] \n\t" "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" "psubh %[ftmp7], %[ftmp9], %[ftmp4] \n\t" "pmulhh %[ftmp10], %[ftmp4], %[ff_ph_4e7b] \n\t" "psubh %[ftmp7], %[ftmp7], %[ftmp10] \n\t" // d "psllh %[ftmp9], %[ftmp4], %[ftmp11] \n\t" "pmulhh %[ftmp9], %[ftmp9], %[ff_ph_22a3] \n\t" "paddh %[ftmp8], %[ftmp9], %[ftmp2] \n\t" "pmulhh %[ftmp10], %[ftmp2], %[ff_ph_4e7b] \n\t" "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" MMI_LI(%[tmp0], 0x03) "mtc1 %[tmp0], %[ftmp11] \n\t" // a + d "paddh %[ftmp1], %[ftmp5], %[ftmp8] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_ph_04] \n\t" "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" // b + c "paddh %[ftmp2], %[ftmp6], %[ftmp7] \n\t" "paddh %[ftmp2], %[ftmp2], %[ff_ph_04] \n\t" "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" // b - c "psubh %[ftmp3], %[ftmp6], %[ftmp7] \n\t" "paddh %[ftmp3], %[ftmp3], %[ff_ph_04] \n\t" "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" // a - d "psubh %[ftmp4], %[ftmp5], %[ftmp8] \n\t" "paddh %[ftmp4], %[ftmp4], %[ff_ph_04] \n\t" "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" TRANSPOSE_4H #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" "mtc1 %[tmp0], %[ftmp5] \n\t" #else "gslwlc1 %[ftmp5], 0x03(%[pred_ptr]) \n\t" "gslwrc1 %[ftmp5], 0x00(%[pred_ptr]) \n\t" #endif "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" "mtc1 %[tmp0], %[ftmp6] \n\t" #else "gslwlc1 %[ftmp6], 0x03(%[pred_ptr]) \n\t" "gslwrc1 %[ftmp6], 0x00(%[pred_ptr]) \n\t" #endif "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "paddh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "gsswlc1 %[ftmp2], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp2], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" #else "gslwlc1 %[ftmp7], 0x03(%[pred_ptr]) \n\t" "gslwrc1 %[ftmp7], 0x00(%[pred_ptr]) \n\t" #endif "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" "paddh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "packushb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" "gsswlc1 %[ftmp3], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp3], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[pred_prt]) \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t" #else "gslwlc1 %[ftmp8], 0x03(%[pred_ptr]) \n\t" "gslwrc1 %[ftmp8], 0x00(%[pred_ptr]) \n\t" #endif "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" "paddh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "gsswlc1 %[ftmp4], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp4], 0x00(%[dst_ptr]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [pred_ptr]"+&r"(pred_ptr), [dst_ptr]"+&r"(dst_ptr) : [ip]"r"(input), [ff_ph_22a3]"f"(ff_ph_22a3), [ff_ph_4e7b]"f"(ff_ph_4e7b), [ff_ph_04]"f"(ff_ph_04), [pred_stride]"r"((mips_reg)pred_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); } void vp8_dc_only_idct_add_mmi(int16_t input_dc, unsigned char *pred_ptr, int pred_stride, unsigned char *dst_ptr, int dst_stride) { int a1 = ((input_dc + 4) >> 3); double ftmp[5]; int low32; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "pshufh %[a1], %[a1], %[ftmp0] \n\t" "ulw %[low32], 0x00(%[pred_ptr]) \n\t" "mtc1 %[low32], %[ftmp1] \n\t" "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) "ulw %[low32], 0x00(%[pred_ptr]) \n\t" "mtc1 %[low32], %[ftmp1] \n\t" "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) "ulw %[low32], 0x00(%[pred_ptr]) \n\t" "mtc1 %[low32], %[ftmp1] \n\t" "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" MMI_ADDU(%[pred_ptr], %[pred_ptr], %[pred_stride]) MMI_ADDU(%[dst_ptr], %[dst_ptr], %[dst_stride]) "ulw %[low32], 0x00(%[pred_ptr]) \n\t" "mtc1 %[low32], %[ftmp1] \n\t" "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" "paddsh %[ftmp2], %[ftmp2], %[a1] \n\t" "packushb %[ftmp1], %[ftmp2], %[ftmp0] \n\t" "gsswlc1 %[ftmp1], 0x03(%[dst_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[dst_ptr]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [low32]"=&r"(low32), [dst_ptr]"+&r"(dst_ptr), [pred_ptr]"+&r"(pred_ptr) : [dst_stride]"r"((mips_reg)dst_stride), [pred_stride]"r"((mips_reg)pred_stride), [a1]"f"(a1) : "memory" ); } void vp8_short_inv_walsh4x4_mmi(int16_t *input, int16_t *mb_dqcoeff) { int i; int16_t output[16]; double ftmp[12]; uint32_t tmp[1]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_03) = { 0x0003000300030003ULL }; __asm__ volatile ( MMI_LI(%[tmp0], 0x03) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ip]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ip]) \n\t" "gsldlc1 %[ftmp3], 0x17(%[ip]) \n\t" "gsldrc1 %[ftmp3], 0x10(%[ip]) \n\t" "gsldlc1 %[ftmp4], 0x1f(%[ip]) \n\t" "gsldrc1 %[ftmp4], 0x18(%[ip]) \n\t" "paddh %[ftmp5], %[ftmp1], %[ftmp2] \n\t" "psubh %[ftmp6], %[ftmp1], %[ftmp2] \n\t" "paddh %[ftmp7], %[ftmp3], %[ftmp4] \n\t" "psubh %[ftmp8], %[ftmp3], %[ftmp4] \n\t" "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" "psubh %[ftmp2], %[ftmp5], %[ftmp7] \n\t" "psubh %[ftmp3], %[ftmp6], %[ftmp8] \n\t" "paddh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" TRANSPOSE_4H // a "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" // d "psubh %[ftmp6], %[ftmp1], %[ftmp4] \n\t" // b "paddh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" // c "psubh %[ftmp8], %[ftmp2], %[ftmp3] \n\t" "paddh %[ftmp1], %[ftmp5], %[ftmp7] \n\t" "paddh %[ftmp2], %[ftmp6], %[ftmp8] \n\t" "psubh %[ftmp3], %[ftmp5], %[ftmp7] \n\t" "psubh %[ftmp4], %[ftmp6], %[ftmp8] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_ph_03] \n\t" "psrah %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "paddh %[ftmp2], %[ftmp2], %[ff_ph_03] \n\t" "psrah %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "paddh %[ftmp3], %[ftmp3], %[ff_ph_03] \n\t" "psrah %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "paddh %[ftmp4], %[ftmp4], %[ff_ph_03] \n\t" "psrah %[ftmp4], %[ftmp4], %[ftmp11] \n\t" TRANSPOSE_4H "gssdlc1 %[ftmp1], 0x07(%[op]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[op]) \n\t" "gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t" "gssdrc1 %[ftmp2], 0x08(%[op]) \n\t" "gssdlc1 %[ftmp3], 0x17(%[op]) \n\t" "gssdrc1 %[ftmp3], 0x10(%[op]) \n\t" "gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t" "gssdrc1 %[ftmp4], 0x18(%[op]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]) : [ip]"r"(input), [op]"r"(output), [ff_ph_03]"f"(ff_ph_03) : "memory" ); for (i = 0; i < 16; i++) { mb_dqcoeff[i * 16] = output[i]; } } libvpx-1.8.2/vp8/common/mips/mmi/loopfilter_filters_mmi.c000066400000000000000000002251461357355204000235200ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vp8/common/loopfilter.h" #include "vp8/common/onyxc_int.h" #include "vpx_ports/asmdefs_mmi.h" DECLARE_ALIGNED(8, static const uint64_t, ff_ph_01) = { 0x0001000100010001ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_ph_003f) = { 0x003f003f003f003fULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_ph_0900) = { 0x0900090009000900ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_ph_1200) = { 0x1200120012001200ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_ph_1b00) = { 0x1b001b001b001b00ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_pb_fe) = { 0xfefefefefefefefeULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_pb_80) = { 0x8080808080808080ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_pb_04) = { 0x0404040404040404ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_pb_03) = { 0x0303030303030303ULL }; DECLARE_ALIGNED(8, static const uint64_t, ff_pb_01) = { 0x0101010101010101ULL }; void vp8_loop_filter_horizontal_edge_mmi( unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { uint32_t tmp[1]; mips_reg addr[2]; double ftmp[12]; __asm__ volatile ( "1: \n\t" "gsldlc1 %[ftmp10], 0x07(%[limit]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[limit]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x4]) "gsldlc1 %[ftmp1], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[addr0], %[src_pixel_step_x4]) "gsldlc1 %[ftmp3], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[addr1]) \n\t" "pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t" "psubusb %[ftmp0], %[ftmp0], %[ftmp10] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gsldlc1 %[ftmp4], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[addr1]) \n\t" "pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp5], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[addr1]) \n\t" "pasubub %[ftmp9], %[ftmp4], %[ftmp5] \n\t" "psubusb %[ftmp1], %[ftmp9], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t" "pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t" "psubusb %[ftmp1], %[ftmp11], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" MMI_ADDU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gsldlc1 %[ftmp8], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[addr1]) \n\t" "pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step_x2]) "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t" "pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp1], %[ftmp5], %[ftmp6] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" "pasubub %[ftmp2], %[ftmp4], %[ftmp7] \n\t" "and %[ftmp2], %[ftmp2], %[ff_pb_fe] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp10] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "gsldlc1 %[ftmp10], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[blimit]) \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "pcmpeqb %[ftmp0], %[ftmp0], %[ftmp10] \n\t" "gsldlc1 %[ftmp10], 0x07(%[thresh]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[thresh]) \n\t" "psubusb %[ftmp1], %[ftmp9], %[ftmp10] \n\t" "psubusb %[ftmp2], %[ftmp11], %[ftmp10] \n\t" "paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "pcmpeqb %[ftmp2], %[ftmp2], %[ftmp2] \n\t" "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp1] \n\t" "psubsb %[ftmp3], %[ftmp6], %[ftmp5] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "paddsb %[ftmp8], %[ftmp2], %[ff_pb_03] \n\t" "paddsb %[ftmp9], %[ftmp2], %[ff_pb_04] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp11], %[ftmp11], %[ftmp11] \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp8] \n\t" "punpckhbh %[ftmp11], %[ftmp11], %[ftmp8] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t" "psrah %[ftmp11], %[ftmp11], %[ftmp10] \n\t" "packsshb %[ftmp8], %[ftmp0], %[ftmp11] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp10] \n\t" "xor %[ftmp11], %[ftmp11], %[ftmp11] \n\t" "punpckhbh %[ftmp9], %[ftmp11], %[ftmp9] \n\t" "psrah %[ftmp9], %[ftmp9], %[ftmp10] \n\t" "paddsh %[ftmp11], %[ftmp0], %[ff_ph_01] \n\t" "packsshb %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "paddsh %[ftmp9], %[ftmp9], %[ff_ph_01] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" "psrah %[ftmp11], %[ftmp11], %[ftmp10] \n\t" "psrah %[ftmp9], %[ftmp9], %[ftmp10] \n\t" "packsshb %[ftmp11], %[ftmp11], %[ftmp9] \n\t" "pandn %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ftmp8] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp5], 0x07(%[addr1]) \n\t" "gssdrc1 %[ftmp5], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" "gssdlc1 %[ftmp4], 0x07(%[addr1]) \n\t" "gssdrc1 %[ftmp4], 0x00(%[addr1]) \n\t" "psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" "psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "gssdlc1 %[ftmp7], 0x07(%[addr0]) \n\t" "gssdrc1 %[ftmp7], 0x00(%[addr0]) \n\t" "addiu %[count], %[count], -0x01 \n\t" MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08) "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [limit]"r"(limit), [blimit]"r"(blimit), [thresh]"r"(thresh), [src_pixel_step]"r"((mips_reg)src_pixel_step), [src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)), [src_pixel_step_x4]"r"((mips_reg)(src_pixel_step<<2)), [ff_ph_01]"f"(ff_ph_01), [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80), [ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03) : "memory" ); } void vp8_loop_filter_vertical_edge_mmi(unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { uint32_t tmp[1]; mips_reg addr[2]; double ftmp[13]; __asm__ volatile ( MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0]) MMI_SUBU(%[src_ptr], %[src_ptr], 0x04) "1: \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step]) MMI_SLL (%[tmp0], %[src_pixel_step], 0x01) MMI_ADDU(%[addr1], %[src_ptr], %[tmp0]) "gsldlc1 %[ftmp11], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr1]) \n\t" MMI_ADDU(%[addr1], %[addr0], %[tmp0]) "gsldlc1 %[ftmp12], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp1], %[ftmp11], %[ftmp12] \n\t" "punpckhbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t" "gsldlc1 %[ftmp11], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp12], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[addr0]) \n\t" "punpcklbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t" "punpckhbh %[ftmp4], %[ftmp11], %[ftmp12] \n\t" "punpcklhw %[ftmp5], %[ftmp4], %[ftmp2] \n\t" "punpckhhw %[ftmp6], %[ftmp4], %[ftmp2] \n\t" "punpcklhw %[ftmp7], %[ftmp3], %[ftmp1] \n\t" "punpckhhw %[ftmp8], %[ftmp3], %[ftmp1] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x01) MMI_SUBU(%[addr1], %[src_ptr], %[tmp0]) "gsldlc1 %[ftmp11], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp12], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp9], %[ftmp11], %[ftmp12] \n\t" "punpckhbh %[ftmp10], %[ftmp11], %[ftmp12] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[addr1], %[src_ptr], %[tmp0]) "gsldlc1 %[ftmp11], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr1]) \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[addr1], %[addr0], %[tmp0]) "gsldlc1 %[ftmp12], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp0], %[ftmp11], %[ftmp12] \n\t" "punpckhbh %[ftmp11], %[ftmp11], %[ftmp12] \n\t" "punpcklhw %[ftmp1], %[ftmp11], %[ftmp10] \n\t" "punpckhhw %[ftmp2], %[ftmp11], %[ftmp10] \n\t" "punpcklhw %[ftmp3], %[ftmp0], %[ftmp9] \n\t" "punpckhhw %[ftmp4], %[ftmp0], %[ftmp9] \n\t" /* ftmp9:q0 ftmp10:q1 */ "punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t" "punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t" /* ftmp11:q2 ftmp12:q3 */ "punpcklwd %[ftmp11], %[ftmp2], %[ftmp6] \n\t" "punpckhwd %[ftmp12], %[ftmp2], %[ftmp6] \n\t" /* ftmp1:p3 ftmp2:p2 */ "punpcklwd %[ftmp1], %[ftmp3], %[ftmp7] \n\t" "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t" /* ftmp5:p1 ftmp6:p0 */ "punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t" "punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t" "gsldlc1 %[ftmp8], 0x07(%[limit]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[limit]) \n\t" /* abs (q3-q2) */ "pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t" "psubusb %[ftmp0], %[ftmp7], %[ftmp8] \n\t" /* abs (q2-q1) */ "pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* ftmp3: abs(q1-q0) */ "pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t" "psubusb %[ftmp7], %[ftmp3], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* ftmp4: abs(p1-p0) */ "pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t" "psubusb %[ftmp7], %[ftmp4], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* abs (p2-p1) */ "pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* abs (p3-p2) */ "pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "gsldlc1 %[ftmp8], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[blimit]) \n\t" /* abs (p0-q0) */ "pasubub %[ftmp11], %[ftmp9], %[ftmp6] \n\t" "paddusb %[ftmp11], %[ftmp11], %[ftmp11] \n\t" /* abs (p1-q1) */ "pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t" "and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp1] \n\t" "psrlh %[ftmp12], %[ftmp12], %[ftmp1] \n\t" "paddusb %[ftmp1], %[ftmp11], %[ftmp12] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "xor %[ftmp1], %[ftmp1], %[ftmp1] \n\t" /* ftmp0:mask */ "pcmpeqb %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "gsldlc1 %[ftmp8], 0x07(%[thresh]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[thresh]) \n\t" /* ftmp3: abs(q1-q0) ftmp4: abs(p1-p0) */ "psubusb %[ftmp4], %[ftmp4], %[ftmp8] \n\t" "psubusb %[ftmp3], %[ftmp3], %[ftmp8] \n\t" "or %[ftmp2], %[ftmp4], %[ftmp3] \n\t" "pcmpeqb %[ftmp2], %[ftmp2], %[ftmp1] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" /* ftmp1:hev */ "xor %[ftmp1], %[ftmp2], %[ftmp1] \n\t" "xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t" "xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "psubsb %[ftmp2], %[ftmp5], %[ftmp10] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp1] \n\t" "psubsb %[ftmp3], %[ftmp9], %[ftmp6] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" /* ftmp2:filter_value */ "and %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "paddsb %[ftmp11], %[ftmp2], %[ff_pb_04] \n\t" "paddsb %[ftmp12], %[ftmp2], %[ff_pb_03] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp12] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp12] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" "packsshb %[ftmp12], %[ftmp0], %[ftmp8] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp11] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp11] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" "packsshb %[ftmp11], %[ftmp0], %[ftmp8] \n\t" "psubsb %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t" "paddsb %[ftmp6], %[ftmp6], %[ftmp12] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "paddsh %[ftmp0], %[ftmp0], %[ff_ph_01] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ff_ph_01] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" "packsshb %[ftmp2], %[ftmp0], %[ftmp8] \n\t" "pandn %[ftmp2], %[ftmp1], %[ftmp2] \n\t" "psubsb %[ftmp10], %[ftmp10], %[ftmp2] \n\t" "xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ftmp2] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" /* ftmp5: *op1 ; ftmp6: *op0 */ "punpcklbh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" "punpckhbh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" /* ftmp9: *oq0 ; ftmp10: *oq1 */ "punpcklbh %[ftmp4], %[ftmp9], %[ftmp10] \n\t" "punpckhbh %[ftmp3], %[ftmp9], %[ftmp10] \n\t" "punpckhhw %[ftmp6], %[ftmp2], %[ftmp4] \n\t" "punpcklhw %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "punpckhhw %[ftmp5], %[ftmp1], %[ftmp3] \n\t" "punpcklhw %[ftmp1], %[ftmp1], %[ftmp3] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[addr1], %[src_ptr], %[tmp0]) "gsswlc1 %[ftmp2], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp2], 0x02(%[addr1]) \n\t" "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "dsrl %[ftmp2], %[ftmp2], %[ftmp9] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[addr1], %[addr0], %[tmp0]) "gsswlc1 %[ftmp2], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp2], 0x02(%[addr1]) \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x01) MMI_SUBU(%[addr1], %[src_ptr], %[tmp0]) "gsswlc1 %[ftmp6], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp6], 0x02(%[addr1]) \n\t" "dsrl %[ftmp6], %[ftmp6], %[ftmp9] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gsswlc1 %[ftmp6], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp6], 0x02(%[addr1]) \n\t" "gsswlc1 %[ftmp1], 0x05(%[src_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x02(%[src_ptr]) \n\t" "dsrl %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "gsswlc1 %[ftmp1], 0x05(%[addr0]) \n\t" "gsswrc1 %[ftmp1], 0x02(%[addr0]) \n\t" MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step]) "gsswlc1 %[ftmp5], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp5], 0x02(%[addr1]) \n\t" "dsrl %[ftmp5], %[ftmp5], %[ftmp9] \n\t" MMI_ADDU(%[addr1], %[addr0], %[tmp0]) "gsswlc1 %[ftmp5], 0x05(%[addr1]) \n\t" "gsswrc1 %[ftmp5], 0x02(%[addr1]) \n\t" MMI_ADDIU(%[count], %[count], -0x01) MMI_SLL(%[tmp0], %[src_pixel_step], 0x03) MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0]) "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [limit]"r"(limit), [blimit]"r"(blimit), [thresh]"r"(thresh), [src_pixel_step]"r"((mips_reg)src_pixel_step), [ff_ph_01]"f"(ff_ph_01), [ff_pb_03]"f"(ff_pb_03), [ff_pb_04]"f"(ff_pb_04), [ff_pb_80]"f"(ff_pb_80), [ff_pb_fe]"f"(ff_pb_fe) : "memory" ); } /* clang-format off */ #define VP8_MBLOOP_HPSRAB \ "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" \ "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" \ "psrah %[ftmp10], %[ftmp10], %[ftmp9] \n\t" \ "psrah %[ftmp11], %[ftmp11], %[ftmp9] \n\t" \ "packsshb %[ftmp0], %[ftmp10], %[ftmp11] \n\t" #define VP8_MBLOOP_HPSRAB_ADD(reg) \ "punpcklbh %[ftmp1], %[ftmp0], %[ftmp12] \n\t" \ "punpckhbh %[ftmp2], %[ftmp0], %[ftmp12] \n\t" \ "pmulhh %[ftmp1], %[ftmp1], " #reg " \n\t" \ "pmulhh %[ftmp2], %[ftmp2], " #reg " \n\t" \ "paddh %[ftmp1], %[ftmp1], %[ff_ph_003f] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_003f] \n\t" \ "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" \ "psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" \ "packsshb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" /* clang-format on */ void vp8_mbloop_filter_horizontal_edge_mmi( unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { uint32_t tmp[1]; double ftmp[13]; __asm__ volatile ( MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) "1: \n\t" "gsldlc1 %[ftmp9], 0x07(%[limit]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[limit]) \n\t" /* ftmp1: p3 */ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" /* ftmp3: p2 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t" /* ftmp4: p1 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t" /* ftmp5: p0 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" /* ftmp6: q0 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" /* ftmp7: q1 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" /* ftmp8: q2 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t" /* ftmp2: q3 */ MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp2], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp12], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp12], 0x00(%[blimit]) \n\t" "pasubub %[ftmp0], %[ftmp1], %[ftmp3] \n\t" "psubusb %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "pasubub %[ftmp1], %[ftmp3], %[ftmp4] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp10], %[ftmp4], %[ftmp5] \n\t" "psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp11], %[ftmp7], %[ftmp6] \n\t" "psubusb %[ftmp1], %[ftmp11], %[ftmp9] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp1], %[ftmp8], %[ftmp7] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp1], %[ftmp2], %[ftmp8] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "pasubub %[ftmp1], %[ftmp5], %[ftmp6] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" "pasubub %[ftmp2], %[ftmp4], %[ftmp7] \n\t" "and %[ftmp2], %[ftmp2], %[ff_pb_fe] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "psrlh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "psubusb %[ftmp1], %[ftmp1], %[ftmp12] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" /* ftmp0: mask */ "pcmpeqb %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "gsldlc1 %[ftmp9], 0x07(%[thresh]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[thresh]) \n\t" "psubusb %[ftmp1], %[ftmp10], %[ftmp9] \n\t" "psubusb %[ftmp2], %[ftmp11], %[ftmp9] \n\t" "paddb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "xor %[ftmp2], %[ftmp2], %[ftmp2] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "pcmpeqb %[ftmp2], %[ftmp2], %[ftmp2] \n\t" /* ftmp1: hev */ "xor %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "psubsb %[ftmp2], %[ftmp4], %[ftmp7] \n\t" "psubsb %[ftmp9], %[ftmp6], %[ftmp5] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "pandn %[ftmp12], %[ftmp1], %[ftmp2] \n\t" "and %[ftmp2], %[ftmp2], %[ftmp1] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "paddsb %[ftmp0], %[ftmp2], %[ff_pb_03] \n\t" VP8_MBLOOP_HPSRAB "paddsb %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "paddsb %[ftmp0], %[ftmp2], %[ff_pb_04] \n\t" VP8_MBLOOP_HPSRAB "psubsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "li %[tmp0], 0x07 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1b00]) "psubsb %[ftmp6], %[ftmp6], %[ftmp1] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x02) MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) "gssdlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_1200]) "paddsb %[ftmp4], %[ftmp4], %[ftmp1] \n\t" "psubsb %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "xor %[ftmp4], %[ftmp4], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp4], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t" VP8_MBLOOP_HPSRAB_ADD(%[ff_ph_0900]) "xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" "xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp1] \n\t" "psubsb %[ftmp8], %[ftmp8], %[ftmp1] \n\t" "xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" "xor %[ftmp8], %[ftmp8], %[ff_pb_80] \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0]) "gssdlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[tmp0]) MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08) "addiu %[count], %[count], -0x01 \n\t" "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [limit]"r"(limit), [blimit]"r"(blimit), [thresh]"r"(thresh), [src_pixel_step]"r"((mips_reg)src_pixel_step), [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80), [ff_pb_04]"f"(ff_pb_04), [ff_pb_03]"f"(ff_pb_03), [ff_ph_0900]"f"(ff_ph_0900), [ff_ph_1b00]"f"(ff_ph_1b00), [ff_ph_1200]"f"(ff_ph_1200), [ff_ph_003f]"f"(ff_ph_003f) : "memory" ); } #define VP8_MBLOOP_VPSRAB_ADDH \ "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" \ "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" \ "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" \ "punpckhbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" #define VP8_MBLOOP_VPSRAB_ADDT \ "paddh %[ftmp7], %[ftmp7], %[ff_ph_003f] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ff_ph_003f] \n\t" \ "psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" \ "psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" \ "packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t" void vp8_mbloop_filter_vertical_edge_mmi( unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh, int count) { mips_reg tmp[1]; DECLARE_ALIGNED(8, const uint64_t, srct[1]); double ftmp[14]; __asm__ volatile ( MMI_SUBU(%[src_ptr], %[src_ptr], 0x04) "1: \n\t" "gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t" "punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" "punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t" "punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t" "punpcklhw %[ftmp1], %[ftmp12], %[ftmp10] \n\t" "punpckhhw %[ftmp2], %[ftmp12], %[ftmp10] \n\t" "punpcklhw %[ftmp3], %[ftmp11], %[ftmp9] \n\t" "punpckhhw %[ftmp4], %[ftmp11], %[ftmp9] \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp5], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp7], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp8], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[src_ptr]) \n\t" "punpcklbh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" "punpckhbh %[ftmp12], %[ftmp5], %[ftmp6] \n\t" "punpcklbh %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "punpckhbh %[ftmp10], %[ftmp7], %[ftmp8] \n\t" "punpcklhw %[ftmp5], %[ftmp12], %[ftmp10] \n\t" "punpckhhw %[ftmp6], %[ftmp12], %[ftmp10] \n\t" "punpcklhw %[ftmp7], %[ftmp11], %[ftmp9] \n\t" "punpckhhw %[ftmp8], %[ftmp11], %[ftmp9] \n\t" "gsldlc1 %[ftmp13], 0x07(%[limit]) \n\t" "gsldrc1 %[ftmp13], 0x00(%[limit]) \n\t" /* ftmp9:q0 ftmp10:q1 */ "punpcklwd %[ftmp9], %[ftmp1], %[ftmp5] \n\t" "punpckhwd %[ftmp10], %[ftmp1], %[ftmp5] \n\t" /* ftmp11:q2 ftmp12:q3 */ "punpcklwd %[ftmp11], %[ftmp2], %[ftmp6] \n\t" "punpckhwd %[ftmp12], %[ftmp2], %[ftmp6] \n\t" /* srct[0x00]: q3 */ "sdc1 %[ftmp12], 0x00(%[srct]) \n\t" /* ftmp1:p3 ftmp2:p2 */ "punpcklwd %[ftmp1], %[ftmp3], %[ftmp7] \n\t" "punpckhwd %[ftmp2], %[ftmp3], %[ftmp7] \n\t" /* srct[0x08]: p3 */ "sdc1 %[ftmp1], 0x08(%[srct]) \n\t" /* ftmp5:p1 ftmp6:p0 */ "punpcklwd %[ftmp5], %[ftmp4], %[ftmp8] \n\t" "punpckhwd %[ftmp6], %[ftmp4], %[ftmp8] \n\t" /* abs (q3-q2) */ "pasubub %[ftmp7], %[ftmp12], %[ftmp11] \n\t" "psubusb %[ftmp0], %[ftmp7], %[ftmp13] \n\t" /* abs (q2-q1) */ "pasubub %[ftmp7], %[ftmp11], %[ftmp10] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* ftmp3: abs(q1-q0) */ "pasubub %[ftmp3], %[ftmp10], %[ftmp9] \n\t" "psubusb %[ftmp7], %[ftmp3], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* ftmp4: abs(p1-p0) */ "pasubub %[ftmp4], %[ftmp5], %[ftmp6] \n\t" "psubusb %[ftmp7], %[ftmp4], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* abs (p2-p1) */ "pasubub %[ftmp7], %[ftmp2], %[ftmp5] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" /* abs (p3-p2) */ "pasubub %[ftmp7], %[ftmp1], %[ftmp2] \n\t" "psubusb %[ftmp7], %[ftmp7], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "gsldlc1 %[ftmp13], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp13], 0x00(%[blimit]) \n\t" "gsldlc1 %[ftmp7], 0x07(%[thresh]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[thresh]) \n\t" /* abs (p0-q0) * 2 */ "pasubub %[ftmp1], %[ftmp9], %[ftmp6] \n\t" "paddusb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" /* abs (p1-q1) / 2 */ "pasubub %[ftmp12], %[ftmp10], %[ftmp5] \n\t" "and %[ftmp12], %[ftmp12], %[ff_pb_fe] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t" "psrlh %[ftmp12], %[ftmp12], %[ftmp8] \n\t" "paddusb %[ftmp12], %[ftmp1], %[ftmp12] \n\t" "psubusb %[ftmp12], %[ftmp12], %[ftmp13] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp12] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" /* ftmp0: mask */ "pcmpeqb %[ftmp0], %[ftmp0], %[ftmp12] \n\t" /* abs(p1-p0) - thresh */ "psubusb %[ftmp4], %[ftmp4], %[ftmp7] \n\t" /* abs(q1-q0) - thresh */ "psubusb %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "or %[ftmp3], %[ftmp4], %[ftmp3] \n\t" "pcmpeqb %[ftmp3], %[ftmp3], %[ftmp12] \n\t" "pcmpeqb %[ftmp1], %[ftmp1], %[ftmp1] \n\t" /* ftmp1: hev */ "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" /* ftmp2:ps2, ftmp5:ps1, ftmp6:ps0, ftmp9:qs0, ftmp10:qs1, ftmp11:qs2 */ "xor %[ftmp11], %[ftmp11], %[ff_pb_80] \n\t" "xor %[ftmp10], %[ftmp10], %[ff_pb_80] \n\t" "xor %[ftmp9], %[ftmp9], %[ff_pb_80] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp5], %[ftmp5], %[ff_pb_80] \n\t" "xor %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t" "psubsb %[ftmp3], %[ftmp5], %[ftmp10] \n\t" "psubsb %[ftmp4], %[ftmp9], %[ftmp6] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ftmp4] \n\t" /* filter_value &= mask */ "and %[ftmp0], %[ftmp0], %[ftmp3] \n\t" /* Filter2 = filter_value & hev */ "and %[ftmp3], %[ftmp1], %[ftmp0] \n\t" /* filter_value &= ~hev */ "pandn %[ftmp0], %[ftmp1], %[ftmp0] \n\t" "paddsb %[ftmp4], %[ftmp3], %[ff_pb_04] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp12] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp4] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" "packsshb %[ftmp4], %[ftmp7], %[ftmp8] \n\t" /* ftmp9: qs0 */ "psubsb %[ftmp9], %[ftmp9], %[ftmp4] \n\t" "paddsb %[ftmp3], %[ftmp3], %[ff_pb_03] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "punpckhbh %[ftmp8], %[ftmp8], %[ftmp3] \n\t" "psrah %[ftmp7], %[ftmp7], %[ftmp12] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp12] \n\t" "packsshb %[ftmp3], %[ftmp7], %[ftmp8] \n\t" /* ftmp6: ps0 */ "paddsb %[ftmp6], %[ftmp6], %[ftmp3] \n\t" "li %[tmp0], 0x07 \n\t" "mtc1 %[tmp0], %[ftmp12] \n\t" VP8_MBLOOP_VPSRAB_ADDH "paddh %[ftmp1], %[ff_ph_0900], %[ff_ph_0900] \n\t" "paddh %[ftmp1], %[ftmp1], %[ff_ph_0900] \n\t" "pmulhh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t" VP8_MBLOOP_VPSRAB_ADDT "psubsb %[ftmp4], %[ftmp9], %[ftmp3] \n\t" /* ftmp9: oq0 */ "xor %[ftmp9], %[ftmp4], %[ff_pb_80] \n\t" "paddsb %[ftmp4], %[ftmp6], %[ftmp3] \n\t" /* ftmp6: op0 */ "xor %[ftmp6], %[ftmp4], %[ff_pb_80] \n\t" VP8_MBLOOP_VPSRAB_ADDH "paddh %[ftmp1], %[ff_ph_0900], %[ff_ph_0900] \n\t" "pmulhh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ftmp1] \n\t" VP8_MBLOOP_VPSRAB_ADDT "psubsb %[ftmp4], %[ftmp10], %[ftmp3] \n\t" /* ftmp10: oq1 */ "xor %[ftmp10], %[ftmp4], %[ff_pb_80] \n\t" "paddsb %[ftmp4], %[ftmp5], %[ftmp3] \n\t" /* ftmp5: op1 */ "xor %[ftmp5], %[ftmp4], %[ff_pb_80] \n\t" VP8_MBLOOP_VPSRAB_ADDH "pmulhh %[ftmp7], %[ftmp7], %[ff_ph_0900] \n\t" "pmulhh %[ftmp8], %[ftmp8], %[ff_ph_0900] \n\t" VP8_MBLOOP_VPSRAB_ADDT "psubsb %[ftmp4], %[ftmp11], %[ftmp3] \n\t" /* ftmp11: oq2 */ "xor %[ftmp11], %[ftmp4], %[ff_pb_80] \n\t" "paddsb %[ftmp4], %[ftmp2], %[ftmp3] \n\t" /* ftmp2: op2 */ "xor %[ftmp2], %[ftmp4], %[ff_pb_80] \n\t" "ldc1 %[ftmp12], 0x00(%[srct]) \n\t" "ldc1 %[ftmp8], 0x08(%[srct]) \n\t" "punpcklbh %[ftmp0], %[ftmp8], %[ftmp2] \n\t" "punpckhbh %[ftmp1], %[ftmp8], %[ftmp2] \n\t" "punpcklbh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" "punpckhbh %[ftmp3], %[ftmp5], %[ftmp6] \n\t" "punpcklhw %[ftmp4], %[ftmp0], %[ftmp2] \n\t" "punpckhhw %[ftmp5], %[ftmp0], %[ftmp2] \n\t" "punpcklhw %[ftmp6], %[ftmp1], %[ftmp3] \n\t" "punpckhhw %[ftmp7], %[ftmp1], %[ftmp3] \n\t" "punpcklbh %[ftmp0], %[ftmp9], %[ftmp10] \n\t" "punpckhbh %[ftmp1], %[ftmp9], %[ftmp10] \n\t" "punpcklbh %[ftmp2], %[ftmp11], %[ftmp12] \n\t" "punpckhbh %[ftmp3], %[ftmp11], %[ftmp12] \n\t" "punpcklhw %[ftmp8], %[ftmp0], %[ftmp2] \n\t" "punpckhhw %[ftmp9], %[ftmp0], %[ftmp2] \n\t" "punpcklhw %[ftmp10], %[ftmp1], %[ftmp3] \n\t" "punpckhhw %[ftmp11], %[ftmp1], %[ftmp3] \n\t" "punpcklwd %[ftmp0], %[ftmp7], %[ftmp11] \n\t" "punpckhwd %[ftmp1], %[ftmp7], %[ftmp11] \n\t" "gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "punpcklwd %[ftmp0], %[ftmp6], %[ftmp10] \n\t" "punpckhwd %[ftmp1], %[ftmp6], %[ftmp10] \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "punpcklwd %[ftmp1], %[ftmp5], %[ftmp9] \n\t" "punpckhwd %[ftmp0], %[ftmp5], %[ftmp9] \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "punpcklwd %[ftmp1], %[ftmp4], %[ftmp8] \n\t" "punpckhwd %[ftmp0], %[ftmp4], %[ftmp8] \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[src_ptr], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "addiu %[count], %[count], -0x01 \n\t" MMI_SLL(%[tmp0], %[src_pixel_step], 0x03) MMI_ADDU(%[src_ptr], %[src_ptr], %[tmp0]) "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [ftmp13]"=&f"(ftmp[13]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [limit]"r"(limit), [blimit]"r"(blimit), [srct]"r"(srct), [thresh]"r"(thresh), [src_pixel_step]"r"((mips_reg)src_pixel_step), [ff_ph_003f]"f"(ff_ph_003f), [ff_ph_0900]"f"(ff_ph_0900), [ff_pb_03]"f"(ff_pb_03), [ff_pb_04]"f"(ff_pb_04), [ff_pb_80]"f"(ff_pb_80), [ff_pb_fe]"f"(ff_pb_fe) : "memory" ); } #define VP8_SIMPLE_HPSRAB \ "psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t" \ "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" \ "psrlh %[ftmp0], %[ftmp0], %[ftmp8] \n\t" \ "psrah %[ftmp1], %[ftmp5], %[ftmp10] \n\t" \ "psllh %[ftmp1], %[ftmp1], %[ftmp8] \n\t" \ "or %[ftmp0], %[ftmp0], %[ftmp1] \n\t" void vp8_loop_filter_simple_horizontal_edge_mmi(unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit) { uint32_t tmp[1], count = 2; mips_reg addr[2]; double ftmp[12]; __asm__ volatile ( "li %[tmp0], 0x08 \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t" "li %[tmp0], 0x03 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" "1: \n\t" "gsldlc1 %[ftmp3], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[blimit]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step]) MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gsldlc1 %[ftmp2], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[addr1]) \n\t" "gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t" "pasubub %[ftmp1], %[ftmp7], %[ftmp2] \n\t" "and %[ftmp1], %[ftmp1], %[ff_pb_fe] \n\t" "psrlh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gsldlc1 %[ftmp6], 0x07(%[addr1]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[addr1]) \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" "pasubub %[ftmp5], %[ftmp6], %[ftmp0] \n\t" "paddusb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "paddusb %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "psubusb %[ftmp5], %[ftmp5], %[ftmp3] \n\t" "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp3] \n\t" "xor %[ftmp2], %[ftmp2], %[ff_pb_80] \n\t" "xor %[ftmp7], %[ftmp7], %[ff_pb_80] \n\t" "psubsb %[ftmp2], %[ftmp2], %[ftmp7] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "xor %[ftmp3], %[ftmp0], %[ff_pb_80] \n\t" "psubsb %[ftmp0], %[ftmp3], %[ftmp6] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "paddsb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "and %[ftmp5], %[ftmp5], %[ftmp2] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ff_pb_04] \n\t" VP8_SIMPLE_HPSRAB "psubsb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" "xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" "gssdlc1 %[ftmp3], 0x07(%[src_ptr]) \n\t" "gssdrc1 %[ftmp3], 0x00(%[src_ptr]) \n\t" "psubsb %[ftmp5], %[ftmp5], %[ff_pb_01] \n\t" VP8_SIMPLE_HPSRAB "paddsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gssdlc1 %[ftmp6], 0x07(%[addr1]) \n\t" "gssdrc1 %[ftmp6], 0x00(%[addr1]) \n\t" "addiu %[count], %[count], -0x01 \n\t" MMI_ADDIU(%[src_ptr], %[src_ptr], 0x08) "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [blimit]"r"(blimit), [src_pixel_step]"r"((mips_reg)src_pixel_step), [src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)), [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80), [ff_pb_04]"f"(ff_pb_04), [ff_pb_01]"f"(ff_pb_01) : "memory" ); } void vp8_loop_filter_simple_vertical_edge_mmi(unsigned char *src_ptr, int src_pixel_step, const unsigned char *blimit) { uint32_t tmp[1], count = 2; mips_reg addr[2]; DECLARE_ALIGNED(8, const uint64_t, srct[1]); double ftmp[12]; __asm__ volatile ( "li %[tmp0], 0x08 \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t" "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step_x4]) MMI_SUBU(%[src_ptr], %[src_ptr], 0x02) "1: \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[src_pixel_step]) MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step_x2]) "gslwlc1 %[ftmp0], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp0], 0x00(%[addr1]) \n\t" MMI_ADDU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gslwlc1 %[ftmp6], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp6], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" MMI_ADDU(%[addr1], %[src_ptr], %[src_pixel_step]) "gslwlc1 %[ftmp0], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp0], 0x00(%[addr1]) \n\t" "gslwlc1 %[ftmp4], 0x03(%[src_ptr]) \n\t" "gslwrc1 %[ftmp4], 0x00(%[src_ptr]) \n\t" "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "punpckhhw %[ftmp5], %[ftmp4], %[ftmp6] \n\t" "punpcklhw %[ftmp4], %[ftmp4], %[ftmp6] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gslwlc1 %[ftmp7], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp7], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gslwlc1 %[ftmp6], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp6], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp6], %[ftmp6], %[ftmp7] \n\t" MMI_SUBU(%[addr1], %[addr0], %[src_pixel_step_x4]) "gslwlc1 %[ftmp1], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp1], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x4]) "gslwlc1 %[ftmp0], 0x03(%[addr1]) \n\t" "gslwrc1 %[ftmp0], 0x00(%[addr1]) \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp1] \n\t" "punpckhhw %[ftmp2], %[ftmp0], %[ftmp6] \n\t" "punpcklhw %[ftmp0], %[ftmp0], %[ftmp6] \n\t" "punpckhwd %[ftmp1], %[ftmp0], %[ftmp4] \n\t" "punpcklwd %[ftmp0], %[ftmp0], %[ftmp4] \n\t" "punpckhwd %[ftmp3], %[ftmp2], %[ftmp5] \n\t" "punpcklwd %[ftmp2], %[ftmp2], %[ftmp5] \n\t" "li %[tmp0], 0x01 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "pasubub %[ftmp6], %[ftmp3], %[ftmp0] \n\t" "and %[ftmp6], %[ftmp6], %[ff_pb_fe] \n\t" "psrlh %[ftmp6], %[ftmp6], %[ftmp9] \n\t" "pasubub %[ftmp5], %[ftmp1], %[ftmp2] \n\t" "paddusb %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "paddusb %[ftmp5], %[ftmp5], %[ftmp6] \n\t" "gsldlc1 %[ftmp7], 0x07(%[blimit]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[blimit]) \n\t" "psubusb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" "pcmpeqb %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "sdc1 %[ftmp0], 0x00(%[srct]) \n\t" "sdc1 %[ftmp3], 0x08(%[srct]) \n\t" "xor %[ftmp0], %[ftmp0], %[ff_pb_80] \n\t" "xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" "psubsb %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "xor %[ftmp6], %[ftmp1], %[ff_pb_80] \n\t" "xor %[ftmp3], %[ftmp2], %[ff_pb_80] \n\t" "psubsb %[ftmp7], %[ftmp3], %[ftmp6] \n\t" "paddsb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "paddsb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "paddsb %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "and %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "paddsb %[ftmp5], %[ftmp5], %[ff_pb_04] \n\t" "li %[tmp0], 0x03 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "psrlh %[ftmp0], %[ftmp0], %[ftmp8] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "psrah %[ftmp7], %[ftmp5], %[ftmp9] \n\t" "psllh %[ftmp7], %[ftmp7], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp7] \n\t" "psubsb %[ftmp3], %[ftmp3], %[ftmp0] \n\t" "xor %[ftmp3], %[ftmp3], %[ff_pb_80] \n\t" "psubsb %[ftmp5], %[ftmp5], %[ff_pb_01] \n\t" "li %[tmp0], 0x03 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "psllh %[ftmp0], %[ftmp5], %[ftmp8] \n\t" "psrah %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "psrlh %[ftmp0], %[ftmp0], %[ftmp8] \n\t" "li %[tmp0], 0x0b \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "psrah %[ftmp5], %[ftmp5], %[ftmp9] \n\t" "psllh %[ftmp5], %[ftmp5], %[ftmp8] \n\t" "or %[ftmp0], %[ftmp0], %[ftmp5] \n\t" "paddsb %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "xor %[ftmp6], %[ftmp6], %[ff_pb_80] \n\t" "ldc1 %[ftmp0], 0x00(%[srct]) \n\t" "ldc1 %[ftmp4], 0x08(%[srct]) \n\t" "punpckhbh %[ftmp1], %[ftmp0], %[ftmp6] \n\t" "punpcklbh %[ftmp0], %[ftmp0], %[ftmp6] \n\t" "punpcklbh %[ftmp2], %[ftmp3], %[ftmp4] \n\t" "punpckhbh %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "punpckhhw %[ftmp6], %[ftmp0], %[ftmp2] \n\t" "punpcklhw %[ftmp0], %[ftmp0], %[ftmp2] \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x4]) "gsswlc1 %[ftmp0], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp0], 0x00(%[addr1]) \n\t" "punpckhhw %[ftmp5], %[ftmp1], %[ftmp3] \n\t" "punpcklhw %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "dsrl %[ftmp0], %[ftmp0], %[ftmp10] \n\t" MMI_SUBU(%[addr1], %[addr0], %[src_pixel_step_x4]) "gsswlc1 %[ftmp0], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp0], 0x00(%[addr1]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gsswlc1 %[ftmp6], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp6], 0x00(%[addr1]) \n\t" "dsrl %[ftmp6], %[ftmp6], %[ftmp10] \n\t" "gsswlc1 %[ftmp1], 0x03(%[src_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" MMI_SUBU(%[addr1], %[src_ptr], %[src_pixel_step]) "gsswlc1 %[ftmp6], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp6], 0x00(%[addr1]) \n\t" MMI_ADDU(%[addr1], %[src_ptr], %[src_pixel_step_x2]) "gsswlc1 %[ftmp5], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp5], 0x00(%[addr1]) \n\t" "dsrl %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "gsswlc1 %[ftmp1], 0x03(%[addr0]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[addr0]) \n\t" "dsrl %[ftmp5], %[ftmp5], %[ftmp10] \n\t" MMI_ADDU(%[addr1], %[addr0], %[src_pixel_step_x2]) "gsswlc1 %[ftmp5], 0x03(%[addr1]) \n\t" "gsswrc1 %[ftmp5], 0x00(%[addr1]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixel_step_x8]) "addiu %[count], %[count], -0x01 \n\t" "bnez %[count], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [addr1]"=&r"(addr[1]), [src_ptr]"+&r"(src_ptr), [count]"+&r"(count) : [blimit]"r"(blimit), [srct]"r"(srct), [src_pixel_step]"r"((mips_reg)src_pixel_step), [src_pixel_step_x2]"r"((mips_reg)(src_pixel_step<<1)), [src_pixel_step_x4]"r"((mips_reg)(src_pixel_step<<2)), [src_pixel_step_x8]"r"((mips_reg)(src_pixel_step<<3)), [ff_pb_fe]"f"(ff_pb_fe), [ff_pb_80]"f"(ff_pb_80), [ff_pb_04]"f"(ff_pb_04), [ff_pb_01]"f"(ff_pb_01) : "memory" ); } /* Horizontal MB filtering */ void vp8_loop_filter_mbh_mmi(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_mbloop_filter_horizontal_edge_mmi(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) vp8_mbloop_filter_horizontal_edge_mmi(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) vp8_mbloop_filter_horizontal_edge_mmi(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_mmi(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_mbloop_filter_vertical_edge_mmi(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) vp8_mbloop_filter_vertical_edge_mmi(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) vp8_mbloop_filter_vertical_edge_mmi(v_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, 1); } /* Horizontal B Filtering */ void vp8_loop_filter_bh_mmi(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_loop_filter_horizontal_edge_mmi(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_mmi(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_horizontal_edge_mmi(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) vp8_loop_filter_horizontal_edge_mmi(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) vp8_loop_filter_horizontal_edge_mmi(v_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } /* Vertical B Filtering */ void vp8_loop_filter_bv_mmi(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_loop_filter_vertical_edge_mmi(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_mmi(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); vp8_loop_filter_vertical_edge_mmi(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); if (u_ptr) vp8_loop_filter_vertical_edge_mmi(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); if (v_ptr) vp8_loop_filter_vertical_edge_mmi(v_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, 1); } void vp8_loop_filter_bhs_mmi(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_horizontal_edge_mmi(y_ptr + 4 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_mmi(y_ptr + 8 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_mmi(y_ptr + 12 * y_stride, y_stride, blimit); } void vp8_loop_filter_bvs_mmi(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_vertical_edge_mmi(y_ptr + 4, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_mmi(y_ptr + 8, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_mmi(y_ptr + 12, y_stride, blimit); } libvpx-1.8.2/vp8/common/mips/mmi/sixtap_filter_mmi.c000066400000000000000000000531571357355204000224670ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/filter.h" #include "vpx_ports/asmdefs_mmi.h" DECLARE_ALIGNED(8, static const int16_t, vp8_six_tap_mmi[8][6 * 8]) = { { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0080, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 }, { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0x004d, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0xfff0, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003, 0x0003 }, { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x0032, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0x005d, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0xfff7, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 }, { 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0xfff8, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x0024, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0x006c, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0xfff5, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002, 0x0002 }, { 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0xffff, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x000c, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0x007b, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0xfffa, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000 } }; /* Horizontal filter: pixel_step is 1, output_height and output_width are the size of horizontal filtering output, output_height is always H + 5 */ static INLINE void vp8_filter_block1d_h6_mmi(unsigned char *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, unsigned int output_height, unsigned int output_width, const int16_t *vp8_filter) { uint32_t tmp[1]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; #if _MIPS_SIM == _ABIO32 register double fzero asm("$f0"); register double ftmp0 asm("$f2"); register double ftmp1 asm("$f4"); register double ftmp2 asm("$f6"); register double ftmp3 asm("$f8"); register double ftmp4 asm("$f10"); register double ftmp5 asm("$f12"); register double ftmp6 asm("$f14"); register double ftmp7 asm("$f16"); register double ftmp8 asm("$f18"); register double ftmp9 asm("$f20"); register double ftmp10 asm("$f22"); register double ftmp11 asm("$f24"); #else register double fzero asm("$f0"); register double ftmp0 asm("$f1"); register double ftmp1 asm("$f2"); register double ftmp2 asm("$f3"); register double ftmp3 asm("$f4"); register double ftmp4 asm("$f5"); register double ftmp5 asm("$f6"); register double ftmp6 asm("$f7"); register double ftmp7 asm("$f8"); register double ftmp8 asm("$f9"); register double ftmp9 asm("$f10"); register double ftmp10 asm("$f11"); register double ftmp11 asm("$f12"); #endif // _MIPS_SIM == _ABIO32 __asm__ volatile ( "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" "xor %[fzero], %[fzero], %[fzero] \n\t" "li %[tmp0], 0x07 \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" "li %[tmp0], 0x08 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" "1: \n\t" "gsldlc1 %[ftmp9], 0x05(%[src_ptr]) \n\t" "gsldrc1 %[ftmp9], -0x02(%[src_ptr]) \n\t" "gsldlc1 %[ftmp10], 0x06(%[src_ptr]) \n\t" "gsldrc1 %[ftmp10], -0x01(%[src_ptr]) \n\t" "punpcklbh %[ftmp6], %[ftmp9], %[fzero] \n\t" "pmullh %[ftmp8], %[ftmp6], %[ftmp0] \n\t" "punpckhbh %[ftmp6], %[ftmp9], %[fzero] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp4] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp1] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "punpckhbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp5] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "dsrl %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "punpcklbh %[ftmp6], %[ftmp10], %[fzero] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp3] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ftmp6] \n\t" "paddsh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t" "psrah %[ftmp8], %[ftmp8], %[ftmp7] \n\t" "packushb %[ftmp8], %[ftmp8], %[fzero] \n\t" "punpcklbh %[ftmp8], %[ftmp8], %[fzero] \n\t" "gssdlc1 %[ftmp8], 0x07(%[output_ptr]) \n\t" "gssdrc1 %[ftmp8], 0x00(%[output_ptr]) \n\t" "addiu %[output_height], %[output_height], -0x01 \n\t" MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width]) MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line]) "bnez %[output_height], 1b \n\t" : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), [ftmp11]"=&f"(ftmp11), [tmp0]"=&r"(tmp[0]), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height), [src_ptr]"+&r"(src_ptr) : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), [vp8_filter]"r"(vp8_filter), [output_width]"r"(output_width), [ff_ph_40]"f"(ff_ph_40) : "memory" ); } /* Horizontal filter: pixel_step is always W */ static INLINE void vp8_filter_block1dc_v6_mmi( uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height, int output_pitch, unsigned int pixels_per_line, const int16_t *vp8_filter) { DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; uint32_t tmp[1]; mips_reg addr[1]; #if _MIPS_SIM == _ABIO32 register double fzero asm("$f0"); register double ftmp0 asm("$f2"); register double ftmp1 asm("$f4"); register double ftmp2 asm("$f6"); register double ftmp3 asm("$f8"); register double ftmp4 asm("$f10"); register double ftmp5 asm("$f12"); register double ftmp6 asm("$f14"); register double ftmp7 asm("$f16"); register double ftmp8 asm("$f18"); register double ftmp9 asm("$f20"); register double ftmp10 asm("$f22"); register double ftmp11 asm("$f24"); register double ftmp12 asm("$f26"); register double ftmp13 asm("$f28"); #else register double fzero asm("$f0"); register double ftmp0 asm("$f1"); register double ftmp1 asm("$f2"); register double ftmp2 asm("$f3"); register double ftmp3 asm("$f4"); register double ftmp4 asm("$f5"); register double ftmp5 asm("$f6"); register double ftmp6 asm("$f7"); register double ftmp7 asm("$f8"); register double ftmp8 asm("$f9"); register double ftmp9 asm("$f10"); register double ftmp10 asm("$f11"); register double ftmp11 asm("$f12"); register double ftmp12 asm("$f13"); register double ftmp13 asm("$f14"); #endif // _MIPS_SIM == _ABIO32 __asm__ volatile ( "ldc1 %[ftmp0], 0x00(%[vp8_filter]) \n\t" "ldc1 %[ftmp1], 0x10(%[vp8_filter]) \n\t" "ldc1 %[ftmp2], 0x20(%[vp8_filter]) \n\t" "ldc1 %[ftmp3], 0x30(%[vp8_filter]) \n\t" "ldc1 %[ftmp4], 0x40(%[vp8_filter]) \n\t" "ldc1 %[ftmp5], 0x50(%[vp8_filter]) \n\t" "xor %[fzero], %[fzero], %[fzero] \n\t" "li %[tmp0], 0x07 \n\t" "mtc1 %[tmp0], %[ftmp13] \n\t" /* In order to make full use of memory load delay slot, * Operation of memory loading and calculating has been rearranged. */ "1: \n\t" "gsldlc1 %[ftmp6], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line]) "gsldlc1 %[ftmp7], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[addr0]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) "gsldlc1 %[ftmp8], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[addr0]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) "gsldlc1 %[ftmp9], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[addr0]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line]) MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x2]) "gsldlc1 %[ftmp10], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[addr0]) \n\t" MMI_ADDU(%[addr0], %[src_ptr], %[pixels_per_line_x4]) "gsldlc1 %[ftmp11], 0x07(%[addr0]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[addr0]) \n\t" "pmullh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" "pmullh %[ftmp7], %[ftmp7], %[ftmp1] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ftmp7] \n\t" "pmullh %[ftmp8], %[ftmp8], %[ftmp2] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ftmp8] \n\t" "pmullh %[ftmp9], %[ftmp9], %[ftmp4] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ftmp9] \n\t" "pmullh %[ftmp10], %[ftmp10], %[ftmp3] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ftmp10] \n\t" "pmullh %[ftmp11], %[ftmp11], %[ftmp5] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ftmp11] \n\t" "paddsh %[ftmp12], %[ftmp12], %[ff_ph_40] \n\t" "psrah %[ftmp12], %[ftmp12], %[ftmp13] \n\t" "packushb %[ftmp12], %[ftmp12], %[fzero] \n\t" "gsswlc1 %[ftmp12], 0x03(%[output_ptr]) \n\t" "gsswrc1 %[ftmp12], 0x00(%[output_ptr]) \n\t" MMI_ADDIU(%[output_height], %[output_height], -0x01) MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) "bnez %[output_height], 1b \n\t" : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [ftmp6]"=&f"(ftmp6), [ftmp7]"=&f"(ftmp7), [ftmp8]"=&f"(ftmp8), [ftmp9]"=&f"(ftmp9), [ftmp10]"=&f"(ftmp10), [ftmp11]"=&f"(ftmp11), [ftmp12]"=&f"(ftmp12), [ftmp13]"=&f"(ftmp13), [tmp0]"=&r"(tmp[0]), [addr0]"=&r"(addr[0]), [src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) : [pixels_per_line]"r"((mips_reg)pixels_per_line), [pixels_per_line_x2]"r"((mips_reg)(pixels_per_line<<1)), [pixels_per_line_x4]"r"((mips_reg)(pixels_per_line<<2)), [vp8_filter]"r"(vp8_filter), [output_pitch]"r"((mips_reg)output_pitch), [ff_ph_40]"f"(ff_ph_40) : "memory" ); } /* When xoffset == 0, vp8_filter= {0,0,128,0,0,0}, function vp8_filter_block1d_h6_mmi and vp8_filter_block1d_v6_mmi can be simplified */ static INLINE void vp8_filter_block1d_h6_filter0_mmi( unsigned char *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, unsigned int output_height, unsigned int output_width) { #if _MIPS_SIM == _ABIO32 register double fzero asm("$f0"); register double ftmp0 asm("$f2"); register double ftmp1 asm("$f4"); #else register double fzero asm("$f0"); register double ftmp0 asm("$f1"); register double ftmp1 asm("$f2"); #endif // _MIPS_SIM == _ABIO32 __asm__ volatile ( "xor %[fzero], %[fzero], %[fzero] \n\t" "1: \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_pixels_per_line]) "punpcklbh %[ftmp1], %[ftmp0], %[fzero] \n\t" "gssdlc1 %[ftmp1], 0x07(%[output_ptr]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t" "addiu %[output_height], %[output_height], -0x01 \n\t" MMI_ADDU(%[output_ptr], %[output_ptr], %[output_width]) "bnez %[output_height], 1b \n\t" : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), [ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) : [src_pixels_per_line]"r"((mips_reg)src_pixels_per_line), [output_width]"r"(output_width) : "memory" ); } static INLINE void vp8_filter_block1dc_v6_filter0_mmi( uint16_t *src_ptr, unsigned char *output_ptr, unsigned int output_height, int output_pitch, unsigned int pixels_per_line) { #if _MIPS_SIM == _ABIO32 register double fzero asm("$f0"); register double ftmp0 asm("$f2"); register double ftmp1 asm("$f4"); #else register double fzero asm("$f0"); register double ftmp0 asm("$f1"); register double ftmp1 asm("$f2"); #endif // _MIPS_SIM == _ABIO32 __asm__ volatile ( "xor %[fzero], %[fzero], %[fzero] \n\t" "1: \n\t" "gsldlc1 %[ftmp0], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp0], 0x00(%[src_ptr]) \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[pixels_per_line]) MMI_ADDIU(%[output_height], %[output_height], -0x01) "packushb %[ftmp1], %[ftmp0], %[fzero] \n\t" "gsswlc1 %[ftmp1], 0x03(%[output_ptr]) \n\t" "gsswrc1 %[ftmp1], 0x00(%[output_ptr]) \n\t" MMI_ADDU(%[output_ptr], %[output_ptr], %[output_pitch]) "bnez %[output_height], 1b \n\t" : [fzero]"=&f"(fzero), [ftmp0]"=&f"(ftmp0), [ftmp1]"=&f"(ftmp1), [src_ptr]"+&r"(src_ptr), [output_ptr]"+&r"(output_ptr), [output_height]"+&r"(output_height) : [pixels_per_line]"r"((mips_reg)pixels_per_line), [output_pitch]"r"((mips_reg)output_pitch) : "memory" ); } #define sixtapNxM(n, m) \ void vp8_sixtap_predict##n##x##m##_mmi( \ unsigned char *src_ptr, int src_pixels_per_line, int xoffset, \ int yoffset, unsigned char *dst_ptr, int dst_pitch) { \ DECLARE_ALIGNED(16, uint16_t, \ FData2[(n + 5) * (n == 16 ? 24 : (n == 8 ? 16 : n))]); \ const int16_t *HFilter, *VFilter; \ int i, loop = n / 4; \ HFilter = vp8_six_tap_mmi[xoffset]; \ VFilter = vp8_six_tap_mmi[yoffset]; \ \ if (xoffset == 0) { \ for (i = 0; i < loop; ++i) { \ vp8_filter_block1d_h6_filter0_mmi( \ src_ptr - (2 * src_pixels_per_line) + i * 4, FData2 + i * 4, \ src_pixels_per_line, m + 5, n * 2); \ } \ } else { \ for (i = 0; i < loop; ++i) { \ vp8_filter_block1d_h6_mmi(src_ptr - (2 * src_pixels_per_line) + i * 4, \ FData2 + i * 4, src_pixels_per_line, m + 5, \ n * 2, HFilter); \ } \ } \ if (yoffset == 0) { \ for (i = 0; i < loop; ++i) { \ vp8_filter_block1dc_v6_filter0_mmi( \ FData2 + n * 2 + i * 4, dst_ptr + i * 4, m, dst_pitch, n * 2); \ } \ } else { \ for (i = 0; i < loop; ++i) { \ vp8_filter_block1dc_v6_mmi(FData2 + i * 4, dst_ptr + i * 4, m, \ dst_pitch, n * 2, VFilter); \ } \ } \ } sixtapNxM(4, 4); sixtapNxM(8, 8); sixtapNxM(8, 4); sixtapNxM(16, 16); libvpx-1.8.2/vp8/common/mips/msa/000077500000000000000000000000001357355204000165675ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/mips/msa/bilinear_filter_msa.c000066400000000000000000000755071357355204000227430ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_ports/mem.h" #include "vp8/common/filter.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" DECLARE_ALIGNED(16, static const int8_t, vp8_bilinear_filters_msa[7][2]) = { { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 } }; static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { /* 8 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, /* 4 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, /* 4 width cases */ 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 }; static void common_hz_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, mask; v16u8 filt0, vec0, vec1, res0, res1; v8u16 vec2, vec3, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[16]); filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); SRARI_H2_UH(vec2, vec3, VP8_FILTER_SHIFT); PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); } static void common_hz_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16u8 vec0, vec1, vec2, vec3, filt0; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16i8 res0, res1, res2, res3; v8u16 vec4, vec5, vec6, vec7, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[16]); filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5, vec6, vec7); SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT); PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, res3); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); dst += (4 * dst_stride); ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); } static void common_hz_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16u8 filt0; v16i8 src0, src1, src2, src3, mask; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1); ST8x4_UB(src0, src1, dst, dst_stride); } static void common_hz_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { v16u8 filt0; v16i8 src0, src1, src2, src3, mask, out0, out1; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); if (16 == height) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, VP8_FILTER_SHIFT); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride); } } static void common_hz_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_hz_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); loop_cnt = (height >> 2) - 1; filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT); PCKEV_ST_SB(out0, out1, dst); dst += dst_stride; PCKEV_ST_SB(out2, out3, dst); dst += dst_stride; PCKEV_ST_SB(out4, out5, dst); dst += dst_stride; PCKEV_ST_SB(out6, out7, dst); dst += dst_stride; for (; loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SRARI_H4_UH(out4, out5, out6, out7, VP8_FILTER_SHIFT); PCKEV_ST_SB(out0, out1, dst); dst += dst_stride; PCKEV_ST_SB(out2, out3, dst); dst += dst_stride; PCKEV_ST_SB(out4, out5, dst); dst += dst_stride; PCKEV_ST_SB(out6, out7, dst); dst += dst_stride; } } static void common_vt_2t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, src4; v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332; v16u8 filt0; v8i16 filt; v8u16 tmp0, tmp1; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); } static void common_vt_2t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r; v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 filt0; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); src += src_stride; ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, src76_r, src87_r); ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src2110, src4332, src6554, src8776); DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332); ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride); } static void common_vt_2t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_vt_2t_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0; v16i8 out0, out1; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_UB5(src, src_stride, src0, src1, src2, src3, src4); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1); ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); } static void common_vt_2t_8x8mult_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v16i8 out0, out1; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 3); loop_cnt--;) { LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); src += (8 * src_stride); ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2, vec3); ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); src0 = src8; } } static void common_vt_2t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_vt_2t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp0, tmp1, dst); dst += dst_stride; ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp2, tmp3, dst); dst += dst_stride; DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp0, tmp1, dst); dst += dst_stride; DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp2, tmp3, dst); dst += dst_stride; src0 = src4; } } static void common_hv_2ht_2vt_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, mask; v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1; mask = LD_SB(&vp8_mc_filt_mask_arr[16]); filt = LD_UH(filter_horiz); filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); filt = LD_UH(filter_vert); filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, VP8_FILTER_SHIFT); PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); } static void common_hv_2ht_2vt_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask; v16i8 res0, res1, res2, res3; v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt; mask = LD_SB(&vp8_mc_filt_mask_arr[16]); filt = LD_UH(filter_horiz); filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); filt = LD_UH(filter_vert); filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, VP8_FILTER_SHIFT); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, VP8_FILTER_SHIFT); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, VP8_FILTER_SHIFT); hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, VP8_FILTER_SHIFT); hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, VP8_FILTER_SHIFT); SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1, hz_out3, hz_out5, 8); hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt, vec4, vec5, vec6, vec7); SRARI_H4_UH(vec4, vec5, vec6, vec7, VP8_FILTER_SHIFT); PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, res3); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); dst += (4 * dst_stride); ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); } static void common_hv_2ht_2vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else if (8 == height) { common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } } static void common_hv_2ht_2vt_8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, mask, out0, out1; v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; v8i16 filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT); vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec1, filt_vt); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT); vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec2, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec3, filt_vt); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); } static void common_hv_2ht_2vt_8x8mult_msa( uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, mask, out0, out1; v16u8 filt_hz, filt_vt, vec0; v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; v8i16 filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); src0 = LD_SB(src); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); for (loop_cnt = (height >> 3); loop_cnt--;) { LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp1 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp2 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp3 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp4 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp3, tmp4, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp5 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp6 = __msa_dotp_u_h(vec0, filt_vt); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp7 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp8 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, VP8_FILTER_SHIFT); PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hv_2ht_2vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else { common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); } } static void common_hv_2ht_2vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt_hz, filt_vt, vec0, vec1; v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3; v8i16 filt; mask = LD_SB(&vp8_mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB2(src, 8, src0, src1); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, VP8_FILTER_SHIFT); hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, VP8_FILTER_SHIFT); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, VP8_FILTER_SHIFT); hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, VP8_FILTER_SHIFT); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, VP8_FILTER_SHIFT); hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, VP8_FILTER_SHIFT); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, VP8_FILTER_SHIFT); hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, VP8_FILTER_SHIFT); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, VP8_FILTER_SHIFT); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; } } void vp8_bilinear_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { common_hv_2ht_2vt_4w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 4); } else { common_vt_2t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4); } } else { if (xoffset) { common_hz_2t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4); } else { uint32_t tp0, tp1, tp2, tp3; LW4(src, src_stride, tp0, tp1, tp2, tp3); SW4(tp0, tp1, tp2, tp3, dst, dst_stride); } } } void vp8_bilinear_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 4); } else { common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4); } } else { if (xoffset) { common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4); } else { vp8_copy_mem8x4(src, src_stride, dst, dst_stride); } } } void vp8_bilinear_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { common_hv_2ht_2vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 8); } else { common_vt_2t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8); } } else { if (xoffset) { common_hz_2t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8); } else { vp8_copy_mem8x8(src, src_stride, dst, dst_stride); } } } void vp8_bilinear_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_bilinear_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_bilinear_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 16); } else { common_vt_2t_16w_msa(src, src_stride, dst, dst_stride, v_filter, 16); } } else { if (xoffset) { common_hz_2t_16w_msa(src, src_stride, dst, dst_stride, h_filter, 16); } else { vp8_copy_mem16x16(src, src_stride, dst, dst_stride); } } } libvpx-1.8.2/vp8/common/mips/msa/copymem_msa.c000066400000000000000000000043531357355204000212510ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" static void copy_8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { uint64_t src0, src1, src2, src3; LD4(src, src_stride, src0, src1, src2, src3); SD4(src0, src1, src2, src3, dst, dst_stride); } static void copy_8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { uint64_t src0, src1, src2, src3; LD4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); SD4(src0, src1, src2, src3, dst, dst_stride); dst += (4 * dst_stride); LD4(src, src_stride, src0, src1, src2, src3); SD4(src0, src1, src2, src3, dst, dst_stride); } static void copy_16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 src8, src9, src10, src11, src12, src13, src14, src15; LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); LD_UB8(src, src_stride, src8, src9, src10, src11, src12, src13, src14, src15); ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, dst, dst_stride); } void vp8_copy_mem16x16_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { copy_16x16_msa(src, src_stride, dst, dst_stride); } void vp8_copy_mem8x8_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { copy_8x8_msa(src, src_stride, dst, dst_stride); } void vp8_copy_mem8x4_msa(uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride) { copy_8x4_msa(src, src_stride, dst, dst_stride); } libvpx-1.8.2/vp8/common/mips/msa/idct_msa.c000066400000000000000000000405201357355204000205170ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/blockd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" static const int32_t cospi8sqrt2minus1 = 20091; static const int32_t sinpi8sqrt2 = 35468; #define TRANSPOSE_TWO_4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 s4_m, s5_m, s6_m, s7_m; \ \ TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, s4_m, s5_m, s6_m, s7_m); \ ILVR_D2_SH(s6_m, s4_m, s7_m, s5_m, out0, out2); \ out1 = (v8i16)__msa_ilvl_d((v2i64)s6_m, (v2i64)s4_m); \ out3 = (v8i16)__msa_ilvl_d((v2i64)s7_m, (v2i64)s5_m); \ } #define EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in) \ ({ \ v8i16 out_m; \ v8i16 zero_m = { 0 }; \ v4i32 tmp1_m, tmp2_m; \ v4i32 sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \ \ ILVRL_H2_SW(in, zero_m, tmp1_m, tmp2_m); \ tmp1_m >>= 16; \ tmp2_m >>= 16; \ tmp1_m = (tmp1_m * sinpi8_sqrt2_m) >> 16; \ tmp2_m = (tmp2_m * sinpi8_sqrt2_m) >> 16; \ out_m = __msa_pckev_h((v8i16)tmp2_m, (v8i16)tmp1_m); \ \ out_m; \ }) #define VP8_IDCT_1D_H(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 a1_m, b1_m, c1_m, d1_m; \ v8i16 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \ v8i16 const_cospi8sqrt2minus1_m; \ \ const_cospi8sqrt2minus1_m = __msa_fill_h(cospi8sqrt2minus1); \ a1_m = in0 + in2; \ b1_m = in0 - in2; \ c_tmp1_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in1); \ c_tmp2_m = __msa_mul_q_h(in3, const_cospi8sqrt2minus1_m); \ c_tmp2_m = c_tmp2_m >> 1; \ c_tmp2_m = in3 + c_tmp2_m; \ c1_m = c_tmp1_m - c_tmp2_m; \ d_tmp1_m = __msa_mul_q_h(in1, const_cospi8sqrt2minus1_m); \ d_tmp1_m = d_tmp1_m >> 1; \ d_tmp1_m = in1 + d_tmp1_m; \ d_tmp2_m = EXPAND_TO_H_MULTIPLY_SINPI8SQRT2_PCK_TO_W(in3); \ d1_m = d_tmp1_m + d_tmp2_m; \ BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \ } #define VP8_IDCT_1D_W(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v4i32 a1_m, b1_m, c1_m, d1_m; \ v4i32 c_tmp1_m, c_tmp2_m, d_tmp1_m, d_tmp2_m; \ v4i32 const_cospi8sqrt2minus1_m, sinpi8_sqrt2_m; \ \ const_cospi8sqrt2minus1_m = __msa_fill_w(cospi8sqrt2minus1); \ sinpi8_sqrt2_m = __msa_fill_w(sinpi8sqrt2); \ a1_m = in0 + in2; \ b1_m = in0 - in2; \ c_tmp1_m = (in1 * sinpi8_sqrt2_m) >> 16; \ c_tmp2_m = in3 + ((in3 * const_cospi8sqrt2minus1_m) >> 16); \ c1_m = c_tmp1_m - c_tmp2_m; \ d_tmp1_m = in1 + ((in1 * const_cospi8sqrt2minus1_m) >> 16); \ d_tmp2_m = (in3 * sinpi8_sqrt2_m) >> 16; \ d1_m = d_tmp1_m + d_tmp2_m; \ BUTTERFLY_4(a1_m, b1_m, c1_m, d1_m, out0, out1, out2, out3); \ } static void idct4x4_addblk_msa(int16_t *input, uint8_t *pred, int32_t pred_stride, uint8_t *dest, int32_t dest_stride) { v8i16 input0, input1; v4i32 in0, in1, in2, in3, hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3; v4i32 res0, res1, res2, res3; v16i8 zero = { 0 }; v16i8 pred0, pred1, pred2, pred3; LD_SH2(input, 8, input0, input1); UNPCK_SH_SW(input0, in0, in1); UNPCK_SH_SW(input1, in2, in3); VP8_IDCT_1D_W(in0, in1, in2, in3, hz0, hz1, hz2, hz3); TRANSPOSE4x4_SW_SW(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3); VP8_IDCT_1D_W(hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3); SRARI_W4_SW(vt0, vt1, vt2, vt3, 3); TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3); LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3); ILVR_B4_SW(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1, res2, res3); ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2, res3); ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3); res0 = CLIP_SW_0_255(res0); res1 = CLIP_SW_0_255(res1); res2 = CLIP_SW_0_255(res2); res3 = CLIP_SW_0_255(res3); PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1); res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1); ST4x4_UB(res0, res0, 3, 2, 1, 0, dest, dest_stride); } static void idct4x4_addconst_msa(int16_t in_dc, uint8_t *pred, int32_t pred_stride, uint8_t *dest, int32_t dest_stride) { v8i16 vec, res0, res1, res2, res3, dst0, dst1; v16i8 zero = { 0 }; v16i8 pred0, pred1, pred2, pred3; vec = __msa_fill_h(in_dc); vec = __msa_srari_h(vec, 3); LD_SB4(pred, pred_stride, pred0, pred1, pred2, pred3); ILVR_B4_SH(zero, pred0, zero, pred1, zero, pred2, zero, pred3, res0, res1, res2, res3); ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3); CLIP_SH4_0_255(res0, res1, res2, res3); PCKEV_B2_SH(res1, res0, res3, res2, dst0, dst1); dst0 = (v8i16)__msa_pckev_w((v4i32)dst1, (v4i32)dst0); ST4x4_UB(dst0, dst0, 0, 1, 2, 3, dest, dest_stride); } void vp8_short_inv_walsh4x4_msa(int16_t *input, int16_t *mb_dqcoeff) { v8i16 input0, input1, tmp0, tmp1, tmp2, tmp3, out0, out1; const v8i16 mask0 = { 0, 1, 2, 3, 8, 9, 10, 11 }; const v8i16 mask1 = { 4, 5, 6, 7, 12, 13, 14, 15 }; const v8i16 mask2 = { 0, 4, 8, 12, 1, 5, 9, 13 }; const v8i16 mask3 = { 3, 7, 11, 15, 2, 6, 10, 14 }; LD_SH2(input, 8, input0, input1); input1 = (v8i16)__msa_sldi_b((v16i8)input1, (v16i8)input1, 8); tmp0 = input0 + input1; tmp1 = input0 - input1; VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3); out0 = tmp2 + tmp3; out1 = tmp2 - tmp3; VSHF_H2_SH(out0, out1, out0, out1, mask2, mask3, input0, input1); tmp0 = input0 + input1; tmp1 = input0 - input1; VSHF_H2_SH(tmp0, tmp1, tmp0, tmp1, mask0, mask1, tmp2, tmp3); tmp0 = tmp2 + tmp3; tmp1 = tmp2 - tmp3; ADD2(tmp0, 3, tmp1, 3, out0, out1); out0 >>= 3; out1 >>= 3; mb_dqcoeff[0] = __msa_copy_s_h(out0, 0); mb_dqcoeff[16] = __msa_copy_s_h(out0, 4); mb_dqcoeff[32] = __msa_copy_s_h(out1, 0); mb_dqcoeff[48] = __msa_copy_s_h(out1, 4); mb_dqcoeff[64] = __msa_copy_s_h(out0, 1); mb_dqcoeff[80] = __msa_copy_s_h(out0, 5); mb_dqcoeff[96] = __msa_copy_s_h(out1, 1); mb_dqcoeff[112] = __msa_copy_s_h(out1, 5); mb_dqcoeff[128] = __msa_copy_s_h(out0, 2); mb_dqcoeff[144] = __msa_copy_s_h(out0, 6); mb_dqcoeff[160] = __msa_copy_s_h(out1, 2); mb_dqcoeff[176] = __msa_copy_s_h(out1, 6); mb_dqcoeff[192] = __msa_copy_s_h(out0, 3); mb_dqcoeff[208] = __msa_copy_s_h(out0, 7); mb_dqcoeff[224] = __msa_copy_s_h(out1, 3); mb_dqcoeff[240] = __msa_copy_s_h(out1, 7); } static void dequant_idct4x4_addblk_msa(int16_t *input, int16_t *dequant_input, uint8_t *dest, int32_t dest_stride) { v8i16 input0, input1, dequant_in0, dequant_in1, mul0, mul1; v8i16 in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h; v16u8 dest0, dest1, dest2, dest3; v4i32 hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3, res0, res1, res2, res3; v2i64 zero = { 0 }; LD_SH2(input, 8, input0, input1); LD_SH2(dequant_input, 8, dequant_in0, dequant_in1); MUL2(input0, dequant_in0, input1, dequant_in1, mul0, mul1); PCKEV_D2_SH(zero, mul0, zero, mul1, in0, in2); PCKOD_D2_SH(zero, mul0, zero, mul1, in1, in3); VP8_IDCT_1D_H(in0, in1, in2, in3, hz0_h, hz1_h, hz2_h, hz3_h); PCKEV_D2_SH(hz1_h, hz0_h, hz3_h, hz2_h, mul0, mul1); UNPCK_SH_SW(mul0, hz0_w, hz1_w); UNPCK_SH_SW(mul1, hz2_w, hz3_w); TRANSPOSE4x4_SW_SW(hz0_w, hz1_w, hz2_w, hz3_w, hz0_w, hz1_w, hz2_w, hz3_w); VP8_IDCT_1D_W(hz0_w, hz1_w, hz2_w, hz3_w, vt0, vt1, vt2, vt3); SRARI_W4_SW(vt0, vt1, vt2, vt3, 3); TRANSPOSE4x4_SW_SW(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3); LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3); ILVR_B4_SW(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1, res2, res3); ILVR_H4_SW(zero, res0, zero, res1, zero, res2, zero, res3, res0, res1, res2, res3); ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3); res0 = CLIP_SW_0_255(res0); res1 = CLIP_SW_0_255(res1); res2 = CLIP_SW_0_255(res2); res3 = CLIP_SW_0_255(res3); PCKEV_B2_SW(res0, res1, res2, res3, vt0, vt1); res0 = (v4i32)__msa_pckev_b((v16i8)vt0, (v16i8)vt1); ST4x4_UB(res0, res0, 3, 2, 1, 0, dest, dest_stride); } static void dequant_idct4x4_addblk_2x_msa(int16_t *input, int16_t *dequant_input, uint8_t *dest, int32_t dest_stride) { v16u8 dest0, dest1, dest2, dest3; v8i16 in0, in1, in2, in3, mul0, mul1, mul2, mul3, dequant_in0, dequant_in1; v8i16 hz0, hz1, hz2, hz3, vt0, vt1, vt2, vt3, res0, res1, res2, res3; v4i32 hz0l, hz1l, hz2l, hz3l, hz0r, hz1r, hz2r, hz3r; v4i32 vt0l, vt1l, vt2l, vt3l, vt0r, vt1r, vt2r, vt3r; v16i8 zero = { 0 }; LD_SH4(input, 8, in0, in1, in2, in3); LD_SH2(dequant_input, 8, dequant_in0, dequant_in1); MUL4(in0, dequant_in0, in1, dequant_in1, in2, dequant_in0, in3, dequant_in1, mul0, mul1, mul2, mul3); PCKEV_D2_SH(mul2, mul0, mul3, mul1, in0, in2); PCKOD_D2_SH(mul2, mul0, mul3, mul1, in1, in3); VP8_IDCT_1D_H(in0, in1, in2, in3, hz0, hz1, hz2, hz3); TRANSPOSE_TWO_4x4_H(hz0, hz1, hz2, hz3, hz0, hz1, hz2, hz3); UNPCK_SH_SW(hz0, hz0r, hz0l); UNPCK_SH_SW(hz1, hz1r, hz1l); UNPCK_SH_SW(hz2, hz2r, hz2l); UNPCK_SH_SW(hz3, hz3r, hz3l); VP8_IDCT_1D_W(hz0l, hz1l, hz2l, hz3l, vt0l, vt1l, vt2l, vt3l); SRARI_W4_SW(vt0l, vt1l, vt2l, vt3l, 3); VP8_IDCT_1D_W(hz0r, hz1r, hz2r, hz3r, vt0r, vt1r, vt2r, vt3r); SRARI_W4_SW(vt0r, vt1r, vt2r, vt3r, 3); PCKEV_H4_SH(vt0l, vt0r, vt1l, vt1r, vt2l, vt2r, vt3l, vt3r, vt0, vt1, vt2, vt3); TRANSPOSE_TWO_4x4_H(vt0, vt1, vt2, vt3, vt0, vt1, vt2, vt3); LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3); ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1, res2, res3); ADD4(res0, vt0, res1, vt1, res2, vt2, res3, vt3, res0, res1, res2, res3); CLIP_SH4_0_255(res0, res1, res2, res3); PCKEV_B2_SW(res1, res0, res3, res2, vt0l, vt1l); ST8x4_UB(vt0l, vt1l, dest, dest_stride); __asm__ __volatile__( "sw $zero, 0(%[input]) \n\t" "sw $zero, 4(%[input]) \n\t" "sw $zero, 8(%[input]) \n\t" "sw $zero, 12(%[input]) \n\t" "sw $zero, 16(%[input]) \n\t" "sw $zero, 20(%[input]) \n\t" "sw $zero, 24(%[input]) \n\t" "sw $zero, 28(%[input]) \n\t" "sw $zero, 32(%[input]) \n\t" "sw $zero, 36(%[input]) \n\t" "sw $zero, 40(%[input]) \n\t" "sw $zero, 44(%[input]) \n\t" "sw $zero, 48(%[input]) \n\t" "sw $zero, 52(%[input]) \n\t" "sw $zero, 56(%[input]) \n\t" "sw $zero, 60(%[input]) \n\t" :: [input] "r"(input)); } static void dequant_idct_addconst_2x_msa(int16_t *input, int16_t *dequant_input, uint8_t *dest, int32_t dest_stride) { v8i16 input_dc0, input_dc1, vec, res0, res1, res2, res3; v16u8 dest0, dest1, dest2, dest3; v16i8 zero = { 0 }; input_dc0 = __msa_fill_h(input[0] * dequant_input[0]); input_dc1 = __msa_fill_h(input[16] * dequant_input[0]); SRARI_H2_SH(input_dc0, input_dc1, 3); vec = (v8i16)__msa_pckev_d((v2i64)input_dc1, (v2i64)input_dc0); input[0] = 0; input[16] = 0; LD_UB4(dest, dest_stride, dest0, dest1, dest2, dest3); ILVR_B4_SH(zero, dest0, zero, dest1, zero, dest2, zero, dest3, res0, res1, res2, res3); ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3); CLIP_SH4_0_255(res0, res1, res2, res3); PCKEV_B2_SH(res1, res0, res3, res2, res0, res1); ST8x4_UB(res0, res1, dest, dest_stride); } void vp8_short_idct4x4llm_msa(int16_t *input, uint8_t *pred_ptr, int32_t pred_stride, uint8_t *dst_ptr, int32_t dst_stride) { idct4x4_addblk_msa(input, pred_ptr, pred_stride, dst_ptr, dst_stride); } void vp8_dc_only_idct_add_msa(int16_t input_dc, uint8_t *pred_ptr, int32_t pred_stride, uint8_t *dst_ptr, int32_t dst_stride) { idct4x4_addconst_msa(input_dc, pred_ptr, pred_stride, dst_ptr, dst_stride); } void vp8_dequantize_b_msa(BLOCKD *d, int16_t *DQC) { v8i16 dqc0, dqc1, q0, q1, dq0, dq1; LD_SH2(DQC, 8, dqc0, dqc1); LD_SH2(d->qcoeff, 8, q0, q1); MUL2(dqc0, q0, dqc1, q1, dq0, dq1); ST_SH2(dq0, dq1, d->dqcoeff, 8); } void vp8_dequant_idct_add_msa(int16_t *input, int16_t *dq, uint8_t *dest, int32_t stride) { dequant_idct4x4_addblk_msa(input, dq, dest, stride); __asm__ __volatile__( "sw $zero, 0(%[input]) \n\t" "sw $zero, 4(%[input]) \n\t" "sw $zero, 8(%[input]) \n\t" "sw $zero, 12(%[input]) \n\t" "sw $zero, 16(%[input]) \n\t" "sw $zero, 20(%[input]) \n\t" "sw $zero, 24(%[input]) \n\t" "sw $zero, 28(%[input]) \n\t" : : [input] "r"(input)); } void vp8_dequant_idct_add_y_block_msa(int16_t *q, int16_t *dq, uint8_t *dst, int32_t stride, char *eobs) { int16_t *eobs_h = (int16_t *)eobs; uint8_t i; for (i = 4; i--;) { if (eobs_h[0]) { if (eobs_h[0] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst, stride); } } q += 32; if (eobs_h[1]) { if (eobs_h[1] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst + 8, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst + 8, stride); } } q += 32; dst += (4 * stride); eobs_h += 2; } } void vp8_dequant_idct_add_uv_block_msa(int16_t *q, int16_t *dq, uint8_t *dst_u, uint8_t *dst_v, int32_t stride, char *eobs) { int16_t *eobs_h = (int16_t *)eobs; if (eobs_h[0]) { if (eobs_h[0] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst_u, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst_u, stride); } } q += 32; dst_u += (stride * 4); if (eobs_h[1]) { if (eobs_h[1] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst_u, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst_u, stride); } } q += 32; if (eobs_h[2]) { if (eobs_h[2] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst_v, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst_v, stride); } } q += 32; dst_v += (stride * 4); if (eobs_h[3]) { if (eobs_h[3] & 0xfefe) { dequant_idct4x4_addblk_2x_msa(q, dq, dst_v, stride); } else { dequant_idct_addconst_2x_msa(q, dq, dst_v, stride); } } } libvpx-1.8.2/vp8/common/mips/msa/loopfilter_filters_msa.c000066400000000000000000001010571357355204000235060ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/loopfilter.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" #define VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask) \ { \ v16u8 p1_a_sub_q1, p0_a_sub_q0; \ \ p0_a_sub_q0 = __msa_asub_u_b(p0, q0); \ p1_a_sub_q1 = __msa_asub_u_b(p1, q1); \ p1_a_sub_q1 = (v16u8)__msa_srli_b((v16i8)p1_a_sub_q1, 1); \ p0_a_sub_q0 = __msa_adds_u_b(p0_a_sub_q0, p0_a_sub_q0); \ mask = __msa_adds_u_b(p0_a_sub_q0, p1_a_sub_q1); \ mask = ((v16u8)mask <= b_limit); \ } #define VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev) \ { \ v16i8 p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \ const v16i8 cnst4b = __msa_ldi_b(4); \ const v16i8 cnst3b = __msa_ldi_b(3); \ \ p1_m = (v16i8)__msa_xori_b(p1, 0x80); \ p0_m = (v16i8)__msa_xori_b(p0, 0x80); \ q0_m = (v16i8)__msa_xori_b(q0, 0x80); \ q1_m = (v16i8)__msa_xori_b(q1, 0x80); \ \ filt = __msa_subs_s_b(p1_m, q1_m); \ filt &= hev; \ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt &= mask; \ t1 = __msa_adds_s_b(filt, cnst4b); \ t1 >>= cnst3b; \ t2 = __msa_adds_s_b(filt, cnst3b); \ t2 >>= cnst3b; \ q0_m = __msa_subs_s_b(q0_m, t1); \ q0 = __msa_xori_b((v16u8)q0_m, 0x80); \ p0_m = __msa_adds_s_b(p0_m, t2); \ p0 = __msa_xori_b((v16u8)p0_m, 0x80); \ filt = __msa_srari_b(t1, 1); \ hev = __msa_xori_b(hev, 0xff); \ filt &= hev; \ q1_m = __msa_subs_s_b(q1_m, filt); \ q1 = __msa_xori_b((v16u8)q1_m, 0x80); \ p1_m = __msa_adds_s_b(p1_m, filt); \ p1 = __msa_xori_b((v16u8)p1_m, 0x80); \ } #define VP8_SIMPLE_FILT(p1_in, p0_in, q0_in, q1_in, mask) \ { \ v16i8 p1_m, p0_m, q0_m, q1_m, filt, filt1, filt2; \ v16i8 q0_sub_p0; \ const v16i8 cnst4b = __msa_ldi_b(4); \ const v16i8 cnst3b = __msa_ldi_b(3); \ \ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \ \ filt = __msa_subs_s_b(p1_m, q1_m); \ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt &= mask; \ filt1 = __msa_adds_s_b(filt, cnst4b); \ filt1 >>= cnst3b; \ filt2 = __msa_adds_s_b(filt, cnst3b); \ filt2 >>= cnst3b; \ q0_m = __msa_subs_s_b(q0_m, filt1); \ p0_m = __msa_adds_s_b(p0_m, filt2); \ q0_in = __msa_xori_b((v16u8)q0_m, 0x80); \ p0_in = __msa_xori_b((v16u8)p0_m, 0x80); \ } #define VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev) \ { \ v16i8 p2_m, p1_m, p0_m, q2_m, q1_m, q0_m; \ v16i8 u, filt, t1, t2, filt_sign, q0_sub_p0; \ v8i16 filt_r, filt_l, u_r, u_l; \ v8i16 temp0, temp1, temp2, temp3; \ const v16i8 cnst4b = __msa_ldi_b(4); \ const v16i8 cnst3b = __msa_ldi_b(3); \ const v8i16 cnst9h = __msa_ldi_h(9); \ const v8i16 cnst63h = __msa_ldi_h(63); \ \ p2_m = (v16i8)__msa_xori_b(p2, 0x80); \ p1_m = (v16i8)__msa_xori_b(p1, 0x80); \ p0_m = (v16i8)__msa_xori_b(p0, 0x80); \ q0_m = (v16i8)__msa_xori_b(q0, 0x80); \ q1_m = (v16i8)__msa_xori_b(q1, 0x80); \ q2_m = (v16i8)__msa_xori_b(q2, 0x80); \ \ filt = __msa_subs_s_b(p1_m, q1_m); \ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt &= mask; \ \ t2 = filt & hev; \ hev = __msa_xori_b(hev, 0xff); \ filt &= hev; \ t1 = __msa_adds_s_b(t2, cnst4b); \ t1 >>= cnst3b; \ t2 = __msa_adds_s_b(t2, cnst3b); \ t2 >>= cnst3b; \ q0_m = __msa_subs_s_b(q0_m, t1); \ p0_m = __msa_adds_s_b(p0_m, t2); \ filt_sign = __msa_clti_s_b(filt, 0); \ ILVRL_B2_SH(filt_sign, filt, filt_r, filt_l); \ temp0 = filt_r * cnst9h; \ temp1 = temp0 + cnst63h; \ temp2 = filt_l * cnst9h; \ temp3 = temp2 + cnst63h; \ \ u_r = temp1 >> 7; \ u_r = __msa_sat_s_h(u_r, 7); \ u_l = temp3 >> 7; \ u_l = __msa_sat_s_h(u_l, 7); \ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \ q2_m = __msa_subs_s_b(q2_m, u); \ p2_m = __msa_adds_s_b(p2_m, u); \ q2 = __msa_xori_b((v16u8)q2_m, 0x80); \ p2 = __msa_xori_b((v16u8)p2_m, 0x80); \ \ temp1 += temp0; \ temp3 += temp2; \ \ u_r = temp1 >> 7; \ u_r = __msa_sat_s_h(u_r, 7); \ u_l = temp3 >> 7; \ u_l = __msa_sat_s_h(u_l, 7); \ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \ q1_m = __msa_subs_s_b(q1_m, u); \ p1_m = __msa_adds_s_b(p1_m, u); \ q1 = __msa_xori_b((v16u8)q1_m, 0x80); \ p1 = __msa_xori_b((v16u8)p1_m, 0x80); \ \ temp1 += temp0; \ temp3 += temp2; \ \ u_r = temp1 >> 7; \ u_r = __msa_sat_s_h(u_r, 7); \ u_l = temp3 >> 7; \ u_l = __msa_sat_s_h(u_l, 7); \ u = __msa_pckev_b((v16i8)u_l, (v16i8)u_r); \ q0_m = __msa_subs_s_b(q0_m, u); \ p0_m = __msa_adds_s_b(p0_m, u); \ q0 = __msa_xori_b((v16u8)q0_m, 0x80); \ p0 = __msa_xori_b((v16u8)p0_m, 0x80); \ } #define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ limit_in, b_limit_in, thresh_in, hev_out, mask_out, \ flat_out) \ { \ v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \ v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \ \ p3_asub_p2_m = __msa_asub_u_b((p3_in), (p2_in)); \ p2_asub_p1_m = __msa_asub_u_b((p2_in), (p1_in)); \ p1_asub_p0_m = __msa_asub_u_b((p1_in), (p0_in)); \ q1_asub_q0_m = __msa_asub_u_b((q1_in), (q0_in)); \ q2_asub_q1_m = __msa_asub_u_b((q2_in), (q1_in)); \ q3_asub_q2_m = __msa_asub_u_b((q3_in), (q2_in)); \ p0_asub_q0_m = __msa_asub_u_b((p0_in), (q0_in)); \ p1_asub_q1_m = __msa_asub_u_b((p1_in), (q1_in)); \ flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \ hev_out = (thresh_in) < (v16u8)flat_out; \ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \ p1_asub_q1_m >>= 1; \ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \ mask_out = (b_limit_in) < p0_asub_q0_m; \ mask_out = __msa_max_u_b(flat_out, mask_out); \ p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \ mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \ q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \ mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \ mask_out = (limit_in) < (v16u8)mask_out; \ mask_out = __msa_xori_b(mask_out, 0xff); \ } #define VP8_ST6x1_UB(in0, in0_idx, in1, in1_idx, pdst, stride) \ { \ uint16_t tmp0_h; \ uint32_t tmp0_w; \ \ tmp0_w = __msa_copy_u_w((v4i32)in0, in0_idx); \ tmp0_h = __msa_copy_u_h((v8i16)in1, in1_idx); \ SW(tmp0_w, pdst); \ SH(tmp0_h, pdst + stride); \ } static void loop_filter_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, const uint8_t *b_limit1_ptr, const uint8_t *limit1_ptr, const uint8_t *thresh1_ptr) { v16u8 mask, hev, flat; v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr); thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr); thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0); b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr); b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr); b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0); limit0 = (v16u8)__msa_fill_b(*limit0_ptr); limit1 = (v16u8)__msa_fill_b(*limit1_ptr); limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev, mask, flat); VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev); ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch); } static void loop_filter_vertical_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, const uint8_t *b_limit1_ptr, const uint8_t *limit1_ptr, const uint8_t *thresh1_ptr) { v16u8 mask, hev, flat; v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 row0, row1, row2, row3, row4, row5, row6, row7; v16u8 row8, row9, row10, row11, row12, row13, row14, row15; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); LD_UB8(src - 4 + (8 * pitch), pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr); thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr); thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0); b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr); b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr); b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0); limit0 = (v16u8)__msa_fill_b(*limit0_ptr); limit1 = (v16u8)__msa_fill_b(*limit1_ptr); limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev, mask, flat); VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev); ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3); ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5); src -= 2; ST4x8_UB(tmp2, tmp3, src, pitch); src += (8 * pitch); ST4x8_UB(tmp4, tmp5, src, pitch); } static void mbloop_filter_horizontal_edge_y_msa(uint8_t *src, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { uint8_t *temp_src; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; b_limit = (v16u8)__msa_fill_b(b_limit_in); limit = (v16u8)__msa_fill_b(limit_in); thresh = (v16u8)__msa_fill_b(thresh_in); temp_src = src - (pitch << 2); LD_UB8(temp_src, pitch, p3, p2, p1, p0, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev); temp_src = src - 3 * pitch; ST_UB4(p2, p1, p0, q0, temp_src, pitch); temp_src += (4 * pitch); ST_UB2(q1, q2, temp_src, pitch); } static void mbloop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { uint8_t *temp_src; uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u; v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v; b_limit = (v16u8)__msa_fill_b(b_limit_in); limit = (v16u8)__msa_fill_b(limit_in); thresh = (v16u8)__msa_fill_b(thresh_in); temp_src = src_u - (pitch << 2); LD_UB8(temp_src, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u); temp_src = src_v - (pitch << 2); LD_UB8(temp_src, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v); ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0); ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev); p2_d = __msa_copy_u_d((v2i64)p2, 0); p1_d = __msa_copy_u_d((v2i64)p1, 0); p0_d = __msa_copy_u_d((v2i64)p0, 0); q0_d = __msa_copy_u_d((v2i64)q0, 0); q1_d = __msa_copy_u_d((v2i64)q1, 0); q2_d = __msa_copy_u_d((v2i64)q2, 0); src_u -= (pitch * 3); SD4(p2_d, p1_d, p0_d, q0_d, src_u, pitch); src_u += 4 * pitch; SD(q1_d, src_u); src_u += pitch; SD(q2_d, src_u); p2_d = __msa_copy_u_d((v2i64)p2, 1); p1_d = __msa_copy_u_d((v2i64)p1, 1); p0_d = __msa_copy_u_d((v2i64)p0, 1); q0_d = __msa_copy_u_d((v2i64)q0, 1); q1_d = __msa_copy_u_d((v2i64)q1, 1); q2_d = __msa_copy_u_d((v2i64)q2, 1); src_v -= (pitch * 3); SD4(p2_d, p1_d, p0_d, q0_d, src_v, pitch); src_v += 4 * pitch; SD(q1_d, src_v); src_v += pitch; SD(q2_d, src_v); } static void mbloop_filter_vertical_edge_y_msa(uint8_t *src, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { uint8_t *temp_src; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; v16u8 row9, row10, row11, row12, row13, row14, row15; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; b_limit = (v16u8)__msa_fill_b(b_limit_in); limit = (v16u8)__msa_fill_b(limit_in); thresh = (v16u8)__msa_fill_b(thresh_in); temp_src = src - 4; LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); temp_src += (8 * pitch); LD_UB8(temp_src, pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev); ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4); ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7); ILVRL_B2_SH(q2, q1, tmp2, tmp5); temp_src = src - 3; VP8_ST6x1_UB(tmp3, 0, tmp2, 0, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp3, 1, tmp2, 1, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp3, 2, tmp2, 2, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp3, 3, tmp2, 3, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp4, 0, tmp2, 4, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp4, 1, tmp2, 5, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp4, 2, tmp2, 6, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp4, 3, tmp2, 7, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp6, 0, tmp5, 0, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp6, 1, tmp5, 1, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp6, 2, tmp5, 2, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp6, 3, tmp5, 3, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp7, 0, tmp5, 4, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp7, 1, tmp5, 5, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp7, 2, tmp5, 6, temp_src, 4); temp_src += pitch; VP8_ST6x1_UB(tmp7, 3, tmp5, 7, temp_src, 4); } static void mbloop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; v16u8 row9, row10, row11, row12, row13, row14, row15; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; b_limit = (v16u8)__msa_fill_b(b_limit_in); limit = (v16u8)__msa_fill_b(limit_in); thresh = (v16u8)__msa_fill_b(thresh_in); LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); LD_UB8(src_v - 4, pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_MBFILTER(p2, p1, p0, q0, q1, q2, mask, hev); ILVR_B2_SH(p1, p2, q0, p0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp3, tmp4); ILVL_B2_SH(p1, p2, q0, p0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp6, tmp7); ILVRL_B2_SH(q2, q1, tmp2, tmp5); src_u -= 3; VP8_ST6x1_UB(tmp3, 0, tmp2, 0, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp3, 1, tmp2, 1, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp3, 2, tmp2, 2, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp3, 3, tmp2, 3, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp4, 0, tmp2, 4, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp4, 1, tmp2, 5, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp4, 2, tmp2, 6, src_u, 4); src_u += pitch; VP8_ST6x1_UB(tmp4, 3, tmp2, 7, src_u, 4); src_v -= 3; VP8_ST6x1_UB(tmp6, 0, tmp5, 0, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp6, 1, tmp5, 1, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp6, 2, tmp5, 2, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp6, 3, tmp5, 3, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp7, 0, tmp5, 4, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp7, 1, tmp5, 5, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp7, 2, tmp5, 6, src_v, 4); src_v += pitch; VP8_ST6x1_UB(tmp7, 3, tmp5, 7, src_v, 4); } void vp8_loop_filter_simple_horizontal_edge_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr) { v16u8 p1, p0, q1, q0; v16u8 mask, b_limit; b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); LD_UB4(src - (pitch << 1), pitch, p1, p0, q0, q1); VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask); VP8_SIMPLE_FILT(p1, p0, q0, q1, mask); ST_UB2(p0, q0, (src - pitch), pitch); } void vp8_loop_filter_simple_vertical_edge_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr) { uint8_t *temp_src; v16u8 p1, p0, q1, q0; v16u8 mask, b_limit; v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; v16u8 row9, row10, row11, row12, row13, row14, row15; v8i16 tmp0, tmp1; b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); temp_src = src - 2; LD_UB8(temp_src, pitch, row0, row1, row2, row3, row4, row5, row6, row7); temp_src += (8 * pitch); LD_UB8(temp_src, pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x4_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p1, p0, q0, q1); VP8_SIMPLE_MASK(p1, p0, q0, q1, b_limit, mask); VP8_SIMPLE_FILT(p1, p0, q0, q1, mask); ILVRL_B2_SH(q0, p0, tmp1, tmp0); src -= 1; ST2x4_UB(tmp1, 0, src, pitch); src += 4 * pitch; ST2x4_UB(tmp1, 4, src, pitch); src += 4 * pitch; ST2x4_UB(tmp0, 0, src, pitch); src += 4 * pitch; ST2x4_UB(tmp0, 4, src, pitch); src += 4 * pitch; } static void loop_filter_horizontal_edge_uv_msa(uint8_t *src_u, uint8_t *src_v, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { uint64_t p1_d, p0_d, q0_d, q1_d; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; v16u8 p3_u, p2_u, p1_u, p0_u, q3_u, q2_u, q1_u, q0_u; v16u8 p3_v, p2_v, p1_v, p0_v, q3_v, q2_v, q1_v, q0_v; thresh = (v16u8)__msa_fill_b(thresh_in); limit = (v16u8)__msa_fill_b(limit_in); b_limit = (v16u8)__msa_fill_b(b_limit_in); src_u = src_u - (pitch << 2); LD_UB8(src_u, pitch, p3_u, p2_u, p1_u, p0_u, q0_u, q1_u, q2_u, q3_u); src_u += (5 * pitch); src_v = src_v - (pitch << 2); LD_UB8(src_v, pitch, p3_v, p2_v, p1_v, p0_v, q0_v, q1_v, q2_v, q3_v); src_v += (5 * pitch); /* right 8 element of p3 are u pixel and left 8 element of p3 are v pixel */ ILVR_D4_UB(p3_v, p3_u, p2_v, p2_u, p1_v, p1_u, p0_v, p0_u, p3, p2, p1, p0); ILVR_D4_UB(q0_v, q0_u, q1_v, q1_u, q2_v, q2_u, q3_v, q3_u, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev); p1_d = __msa_copy_u_d((v2i64)p1, 0); p0_d = __msa_copy_u_d((v2i64)p0, 0); q0_d = __msa_copy_u_d((v2i64)q0, 0); q1_d = __msa_copy_u_d((v2i64)q1, 0); SD4(q1_d, q0_d, p0_d, p1_d, src_u, (-pitch)); p1_d = __msa_copy_u_d((v2i64)p1, 1); p0_d = __msa_copy_u_d((v2i64)p0, 1); q0_d = __msa_copy_u_d((v2i64)q0, 1); q1_d = __msa_copy_u_d((v2i64)q1, 1); SD4(q1_d, q0_d, p0_d, p1_d, src_v, (-pitch)); } static void loop_filter_vertical_edge_uv_msa(uint8_t *src_u, uint8_t *src_v, int32_t pitch, const uint8_t b_limit_in, const uint8_t limit_in, const uint8_t thresh_in) { uint8_t *temp_src_u, *temp_src_v; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 mask, hev, flat, thresh, limit, b_limit; v16u8 row0, row1, row2, row3, row4, row5, row6, row7, row8; v16u8 row9, row10, row11, row12, row13, row14, row15; v4i32 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; thresh = (v16u8)__msa_fill_b(thresh_in); limit = (v16u8)__msa_fill_b(limit_in); b_limit = (v16u8)__msa_fill_b(b_limit_in); LD_UB8(src_u - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); LD_UB8(src_v - 4, pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP8_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev); ILVR_B2_SW(p0, p1, q1, q0, tmp0, tmp1); ILVRL_H2_SW(tmp1, tmp0, tmp2, tmp3); tmp0 = (v4i32)__msa_ilvl_b((v16i8)p0, (v16i8)p1); tmp1 = (v4i32)__msa_ilvl_b((v16i8)q1, (v16i8)q0); ILVRL_H2_SW(tmp1, tmp0, tmp4, tmp5); temp_src_u = src_u - 2; ST4x4_UB(tmp2, tmp2, 0, 1, 2, 3, temp_src_u, pitch); temp_src_u += 4 * pitch; ST4x4_UB(tmp3, tmp3, 0, 1, 2, 3, temp_src_u, pitch); temp_src_v = src_v - 2; ST4x4_UB(tmp4, tmp4, 0, 1, 2, 3, temp_src_v, pitch); temp_src_v += 4 * pitch; ST4x4_UB(tmp5, tmp5, 0, 1, 2, 3, temp_src_v, pitch); } void vp8_loop_filter_mbh_msa(uint8_t *src_y, uint8_t *src_u, uint8_t *src_v, int32_t pitch_y, int32_t pitch_u_v, loop_filter_info *lpf_info_ptr) { mbloop_filter_horizontal_edge_y_msa(src_y, pitch_y, *lpf_info_ptr->mblim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); if (src_u) { mbloop_filter_horizontal_edge_uv_msa( src_u, src_v, pitch_u_v, *lpf_info_ptr->mblim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); } } void vp8_loop_filter_mbv_msa(uint8_t *src_y, uint8_t *src_u, uint8_t *src_v, int32_t pitch_y, int32_t pitch_u_v, loop_filter_info *lpf_info_ptr) { mbloop_filter_vertical_edge_y_msa(src_y, pitch_y, *lpf_info_ptr->mblim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); if (src_u) { mbloop_filter_vertical_edge_uv_msa(src_u, src_v, pitch_u_v, *lpf_info_ptr->mblim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); } } void vp8_loop_filter_bh_msa(uint8_t *src_y, uint8_t *src_u, uint8_t *src_v, int32_t pitch_y, int32_t pitch_u_v, loop_filter_info *lpf_info_ptr) { loop_filter_horizontal_4_dual_msa(src_y + 4 * pitch_y, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); loop_filter_horizontal_4_dual_msa(src_y + 8 * pitch_y, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); loop_filter_horizontal_4_dual_msa(src_y + 12 * pitch_y, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); if (src_u) { loop_filter_horizontal_edge_uv_msa( src_u + (4 * pitch_u_v), src_v + (4 * pitch_u_v), pitch_u_v, *lpf_info_ptr->blim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); } } void vp8_loop_filter_bv_msa(uint8_t *src_y, uint8_t *src_u, uint8_t *src_v, int32_t pitch_y, int32_t pitch_u_v, loop_filter_info *lpf_info_ptr) { loop_filter_vertical_4_dual_msa(src_y + 4, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); loop_filter_vertical_4_dual_msa(src_y + 8, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); loop_filter_vertical_4_dual_msa(src_y + 12, pitch_y, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr, lpf_info_ptr->blim, lpf_info_ptr->lim, lpf_info_ptr->hev_thr); if (src_u) { loop_filter_vertical_edge_uv_msa(src_u + 4, src_v + 4, pitch_u_v, *lpf_info_ptr->blim, *lpf_info_ptr->lim, *lpf_info_ptr->hev_thr); } } void vp8_loop_filter_bhs_msa(uint8_t *src_y, int32_t pitch_y, const uint8_t *b_limit_ptr) { vp8_loop_filter_simple_horizontal_edge_msa(src_y + (4 * pitch_y), pitch_y, b_limit_ptr); vp8_loop_filter_simple_horizontal_edge_msa(src_y + (8 * pitch_y), pitch_y, b_limit_ptr); vp8_loop_filter_simple_horizontal_edge_msa(src_y + (12 * pitch_y), pitch_y, b_limit_ptr); } void vp8_loop_filter_bvs_msa(uint8_t *src_y, int32_t pitch_y, const uint8_t *b_limit_ptr) { vp8_loop_filter_simple_vertical_edge_msa(src_y + 4, pitch_y, b_limit_ptr); vp8_loop_filter_simple_vertical_edge_msa(src_y + 8, pitch_y, b_limit_ptr); vp8_loop_filter_simple_vertical_edge_msa(src_y + 12, pitch_y, b_limit_ptr); } libvpx-1.8.2/vp8/common/mips/msa/mfqe_msa.c000066400000000000000000000114751357355204000205330ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/postproc.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" static void filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; int32_t row; uint64_t src0_d, src1_d, dst0_d, dst1_d; v16i8 src0 = { 0 }; v16i8 src1 = { 0 }; v16i8 dst0 = { 0 }; v16i8 dst1 = { 0 }; v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; src_wt = __msa_fill_h(src_weight); dst_wt = __msa_fill_h(dst_weight); for (row = 2; row--;) { LD2(src_ptr, src_stride, src0_d, src1_d); src_ptr += (2 * src_stride); LD2(dst_ptr, dst_stride, dst0_d, dst1_d); INSERT_D2_SB(src0_d, src1_d, src0); INSERT_D2_SB(dst0_d, dst1_d, dst0); LD2(src_ptr, src_stride, src0_d, src1_d); src_ptr += (2 * src_stride); LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d); INSERT_D2_SB(src0_d, src1_d, src1); INSERT_D2_SB(dst0_d, dst1_d, dst1); UNPCK_UB_SH(src0, src_r, src_l); UNPCK_UB_SH(dst0, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); ST8x2_UB(dst0, dst_ptr, dst_stride); dst_ptr += (2 * dst_stride); UNPCK_UB_SH(src1, src_r, src_l); UNPCK_UB_SH(dst1, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); ST8x2_UB(dst1, dst_ptr, dst_stride); dst_ptr += (2 * dst_stride); } } static void filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; int32_t row; v16i8 src0, src1, src2, src3; v16i8 dst0, dst1, dst2, dst3; v8i16 src_wt, dst_wt; v8i16 res_h_r, res_h_l; v8i16 src_r, src_l, dst_r, dst_l; src_wt = __msa_fill_h(src_weight); dst_wt = __msa_fill_h(dst_weight); for (row = 4; row--;) { LD_SB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3); UNPCK_UB_SH(src0, src_r, src_l); UNPCK_UB_SH(dst0, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src1, src_r, src_l); UNPCK_UB_SH(dst1, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src2, src_r, src_l); UNPCK_UB_SH(dst2, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src3, src_r, src_l); UNPCK_UB_SH(dst3, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; } } void vp8_filter_by_weight16x16_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { filter_by_weight16x16_msa(src_ptr, src_stride, dst_ptr, dst_stride, src_weight); } void vp8_filter_by_weight8x8_msa(uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { filter_by_weight8x8_msa(src_ptr, src_stride, dst_ptr, dst_stride, src_weight); } libvpx-1.8.2/vp8/common/mips/msa/sixtap_filter_msa.c000066400000000000000000001756711357355204000224710ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_ports/mem.h" #include "vp8/common/filter.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" DECLARE_ALIGNED(16, static const int8_t, vp8_subpel_filters_msa[7][8]) = { { 0, -6, 123, 12, -1, 0, 0, 0 }, { 2, -11, 108, 36, -8, 1, 0, 0 }, /* New 1/4 pel 6 tap filter */ { 0, -9, 93, 50, -6, 0, 0, 0 }, { 3, -16, 77, 77, -16, 3, 0, 0 }, /* New 1/2 pel 6 tap filter */ { 0, -6, 50, 93, -9, 0, 0, 0 }, { 1, -8, 36, 108, -11, 2, 0, 0 }, /* New 1/4 pel 6 tap filter */ { 0, -1, 12, 123, -6, 0, 0, 0 }, }; static const uint8_t vp8_mc_filt_mask_arr[16 * 3] = { /* 8 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, /* 4 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, /* 4 width cases */ 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 }; #define HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_h0, filt_h1, \ filt_h2) \ ({ \ v16i8 vec0_m, vec1_m, vec2_m; \ v8i16 hz_out_m; \ \ VSHF_B3_SB(src0, src1, src0, src1, src0, src1, mask0, mask1, mask2, \ vec0_m, vec1_m, vec2_m); \ hz_out_m = \ DPADD_SH3_SH(vec0_m, vec1_m, vec2_m, filt_h0, filt_h1, filt_h2); \ \ hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \ hz_out_m = __msa_sat_s_h(hz_out_m, 7); \ \ hz_out_m; \ }) #define HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, filt0, filt1, filt2, out0, out1) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m; \ \ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \ VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \ DPADD_SB2_SH(vec4_m, vec5_m, filt2, filt2, out0, out1); \ } #define HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, filt0, filt1, filt2, out0, out1, \ out2, out3) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ \ VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ out0, out1, out2, out3); \ VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \ VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec4_m, vec5_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec6_m, vec7_m); \ DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \ out0, out1, out2, out3); \ DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt2, filt2, filt2, filt2, \ out0, out1, out2, out3); \ } #define FILT_4TAP_DPADD_S_H(vec0, vec1, filt0, filt1) \ ({ \ v8i16 tmp0; \ \ tmp0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \ tmp0 = __msa_dpadd_s_h(tmp0, (v16i8)vec1, (v16i8)filt1); \ \ tmp0; \ }) #define HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_h0, filt_h1) \ ({ \ v16i8 vec0_m, vec1_m; \ v8i16 hz_out_m; \ \ VSHF_B2_SB(src0, src1, src0, src1, mask0, mask1, vec0_m, vec1_m); \ hz_out_m = FILT_4TAP_DPADD_S_H(vec0_m, vec1_m, filt_h0, filt_h1); \ \ hz_out_m = __msa_srari_h(hz_out_m, VP8_FILTER_SHIFT); \ hz_out_m = __msa_sat_s_h(hz_out_m, 7); \ \ hz_out_m; \ }) #define HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ filt0, filt1, out0, out1) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ \ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, out0, out1); \ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, out0, out1); \ } #define HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ filt0, filt1, out0, out1, out2, out3) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ \ VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ out0, out1, out2, out3); \ VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec2_m, vec3_m); \ DPADD_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt1, filt1, filt1, filt1, \ out0, out1, out2, out3); \ } static void common_hz_6t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, filt0, filt1, filt2; v16u8 mask0, mask1, mask2, out; v8i16 filt, out0, out1; mask0 = LD_UB(&vp8_mc_filt_mask_arr[16]); src -= 2; filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out0, out1); SRARI_H2_SH(out0, out1, VP8_FILTER_SHIFT); SAT_SH2_SH(out0, out1, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_6t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, filt0, filt1, filt2; v16u8 mask0, mask1, mask2, out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&vp8_mc_filt_mask_arr[16]); src -= 2; filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out0, out1); LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_6TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); out = PCKEV_XORI128_UB(out2, out3); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_6t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_hz_6t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_6t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_6t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2; v16u8 mask0, mask1, mask2, tmp0, tmp1; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&vp8_mc_filt_mask_arr[0]); src -= 2; filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); for (loop_cnt = (height >> 2) - 1; loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hz_6t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, filt0, filt1, filt2; v16u8 mask0, mask1, mask2, out; v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7; mask0 = LD_UB(&vp8_mc_filt_mask_arr[0]); src -= 2; filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); mask1 = mask0 + 2; mask2 = mask0 + 4; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7); src += (4 * src_stride); HORIZ_6TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, filt0, filt1, filt2, out0, out1, out2, out3); HORIZ_6TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, mask2, filt0, filt1, filt2, out4, out5, out6, out7); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SRARI_H4_SH(out4, out5, out6, out7, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); SAT_SH4_SH(out4, out5, out6, out7, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out4, out5); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out6, out7); ST_UB(out, dst); dst += dst_stride; } } static void common_vt_6t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r; v16i8 src87_r, src2110, src4332, src6554, src8776, filt0, filt1, filt2; v16u8 out; v8i16 filt, out10, out32; src -= (2 * src_stride); filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); XORI_B2_128_SB(src2110, src4332); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src5, src6, src7, src8); src += (4 * src_stride); ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, src76_r, src87_r); ILVR_D2_SB(src65_r, src54_r, src87_r, src76_r, src6554, src8776); XORI_B2_128_SB(src6554, src8776); out10 = DPADD_SH3_SH(src2110, src4332, src6554, filt0, filt1, filt2); out32 = DPADD_SH3_SH(src4332, src6554, src8776, filt0, filt1, filt2); SRARI_H2_SH(out10, out32, VP8_FILTER_SHIFT); SAT_SH2_SH(out10, out32, 7); out = PCKEV_XORI128_UB(out10, out32); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); src2110 = src6554; src4332 = src8776; src4 = src8; } } static void common_vt_6t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src7, src8, src9, src10; v16i8 src10_r, src32_r, src76_r, src98_r, src21_r, src43_r, src87_r; v16i8 src109_r, filt0, filt1, filt2; v16u8 tmp0, tmp1; v8i16 filt, out0_r, out1_r, out2_r, out3_r; src -= (2 * src_stride); filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); ILVR_B4_SB(src1, src0, src3, src2, src2, src1, src4, src3, src10_r, src32_r, src21_r, src43_r); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); ILVR_B4_SB(src7, src4, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); out0_r = DPADD_SH3_SH(src10_r, src32_r, src76_r, filt0, filt1, filt2); out1_r = DPADD_SH3_SH(src21_r, src43_r, src87_r, filt0, filt1, filt2); out2_r = DPADD_SH3_SH(src32_r, src76_r, src98_r, filt0, filt1, filt2); out3_r = DPADD_SH3_SH(src43_r, src87_r, src109_r, filt0, filt1, filt2); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, VP8_FILTER_SHIFT); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); tmp0 = PCKEV_XORI128_UB(out0_r, out1_r); tmp1 = PCKEV_XORI128_UB(out2_r, out3_r); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); src10_r = src76_r; src32_r = src98_r; src21_r = src87_r; src43_r = src109_r; src4 = src10; } } static void common_vt_6t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r; v16i8 src87_r, src10_l, src32_l, src54_l, src76_l, src21_l, src43_l; v16i8 src65_l, src87_l, filt0, filt1, filt2; v16u8 tmp0, tmp1, tmp2, tmp3; v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt; src -= (2 * src_stride); filt = LD_SH(filter); SPLATI_H3_SB(filt, 0, 1, 2, filt0, filt1, filt2); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); ILVR_B4_SB(src1, src0, src3, src2, src4, src3, src2, src1, src10_r, src32_r, src43_r, src21_r); ILVL_B4_SB(src1, src0, src3, src2, src4, src3, src2, src1, src10_l, src32_l, src43_l, src21_l); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src5, src6, src7, src8); src += (4 * src_stride); XORI_B4_128_SB(src5, src6, src7, src8); ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, src76_r, src87_r); ILVL_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_l, src65_l, src76_l, src87_l); out0_r = DPADD_SH3_SH(src10_r, src32_r, src54_r, filt0, filt1, filt2); out1_r = DPADD_SH3_SH(src21_r, src43_r, src65_r, filt0, filt1, filt2); out2_r = DPADD_SH3_SH(src32_r, src54_r, src76_r, filt0, filt1, filt2); out3_r = DPADD_SH3_SH(src43_r, src65_r, src87_r, filt0, filt1, filt2); out0_l = DPADD_SH3_SH(src10_l, src32_l, src54_l, filt0, filt1, filt2); out1_l = DPADD_SH3_SH(src21_l, src43_l, src65_l, filt0, filt1, filt2); out2_l = DPADD_SH3_SH(src32_l, src54_l, src76_l, filt0, filt1, filt2); out3_l = DPADD_SH3_SH(src43_l, src65_l, src87_l, filt0, filt1, filt2); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, VP8_FILTER_SHIFT); SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, VP8_FILTER_SHIFT); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7); PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r, tmp0, tmp1, tmp2, tmp3); XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3); ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride); dst += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src21_r = src65_r; src43_r = src87_r; src10_l = src54_l; src32_l = src76_l; src21_l = src65_l; src43_l = src87_l; src4 = src8; } } static void common_hv_6ht_6vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 filt_hz0, filt_hz1, filt_hz2; v16u8 mask0, mask1, mask2, out; v8i16 tmp0, tmp1; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, filt, filt_vt0, filt_vt1, filt_vt2, out0, out1, out2, out3; mask0 = LD_UB(&vp8_mc_filt_mask_arr[16]); src -= (2 + 2 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2); filt = LD_SH(filter_vert); SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out2 = HORIZ_6TAP_FILT(src2, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out1 = (v8i16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB2(src, src_stride, src5, src6); src += (2 * src_stride); XORI_B2_128_SB(src5, src6); hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out4 = (v8i16)__msa_sldi_b((v16i8)hz_out5, (v16i8)hz_out3, 8); LD_SB2(src, src_stride, src7, src8); src += (2 * src_stride); XORI_B2_128_SB(src7, src8); hz_out7 = HORIZ_6TAP_FILT(src7, src8, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8); out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp1 = DPADD_SH3_SH(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); SRARI_H2_SH(tmp0, tmp1, 7); SAT_SH2_SH(tmp0, tmp1, 7); out = PCKEV_XORI128_UB(tmp0, tmp1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); hz_out3 = hz_out7; out0 = out2; out1 = out3; } } static void common_hv_6ht_6vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 filt_hz0, filt_hz1, filt_hz2; v16u8 mask0, mask1, mask2, vec0, vec1; v8i16 filt, filt_vt0, filt_vt1, filt_vt2; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, out0, out1, out2, out3, out4, out5, out6, out7; v8i16 tmp0, tmp1, tmp2, tmp3; mask0 = LD_UB(&vp8_mc_filt_mask_arr[0]); src -= (2 + 2 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out4 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); filt = LD_SH(filter_vert); SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); ILVEV_B2_SH(hz_out1, hz_out2, hz_out3, hz_out4, out3, out4); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src5, src6, src7, src8); src += (4 * src_stride); XORI_B4_128_SB(src5, src6, src7, src8); hz_out5 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); hz_out6 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out5 = (v8i16)__msa_ilvev_b((v16i8)hz_out6, (v16i8)hz_out5); tmp1 = DPADD_SH3_SH(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); hz_out7 = HORIZ_6TAP_FILT(src7, src7, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp2 = DPADD_SH3_SH(out1, out2, out7, filt_vt0, filt_vt1, filt_vt2); hz_out8 = HORIZ_6TAP_FILT(src8, src8, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); out6 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7); tmp3 = DPADD_SH3_SH(out4, out5, out6, filt_vt0, filt_vt1, filt_vt2); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); vec0 = PCKEV_XORI128_UB(tmp0, tmp1); vec1 = PCKEV_XORI128_UB(tmp2, tmp3); ST8x4_UB(vec0, vec1, dst, dst_stride); dst += (4 * dst_stride); hz_out4 = hz_out8; out0 = out2; out1 = out7; out3 = out5; out4 = out6; } } static void common_hv_6ht_6vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_6ht_6vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hz_4t_4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1; v8i16 filt, out0, out1; v16u8 out; mask0 = LD_SB(&vp8_mc_filt_mask_arr[16]); src -= 1; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); mask1 = mask0 + 2; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0, filt1, out0, out1); SRARI_H2_SH(out0, out1, VP8_FILTER_SHIFT); SAT_SH2_SH(out0, out1, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_4t_4x8_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter) { v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1; v16u8 out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_SB(&vp8_mc_filt_mask_arr[16]); src -= 1; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); mask1 = mask0 + 2; LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0, filt1, out0, out1); LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_4TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0, filt1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); out = PCKEV_XORI128_UB(out2, out3); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_4t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { if (4 == height) { common_hz_4t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_4t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_4t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, mask0, mask1; v16u8 tmp0, tmp1; v8i16 filt, out0, out1, out2, out3; mask0 = LD_SB(&vp8_mc_filt_mask_arr[0]); src -= 1; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); mask1 = mask0 + 2; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0, filt1, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hz_4t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 filt0, filt1, mask0, mask1; v8i16 filt, out0, out1, out2, out3, out4, out5, out6, out7; v16u8 out; mask0 = LD_SB(&vp8_mc_filt_mask_arr[0]); src -= 1; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); mask1 = mask0 + 2; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); XORI_B8_128_SB(src0, src1, src2, src3, src4, src5, src6, src7); HORIZ_4TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, filt0, filt1, out0, out1, out2, out3); HORIZ_4TAP_8WID_4VECS_FILT(src4, src5, src6, src7, mask0, mask1, filt0, filt1, out4, out5, out6, out7); SRARI_H4_SH(out0, out1, out2, out3, VP8_FILTER_SHIFT); SRARI_H4_SH(out4, out5, out6, out7, VP8_FILTER_SHIFT); SAT_SH4_SH(out0, out1, out2, out3, 7); SAT_SH4_SH(out4, out5, out6, out7, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out4, out5); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out6, out7); ST_UB(out, dst); dst += dst_stride; } } static void common_vt_4t_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5; v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r; v16i8 src2110, src4332, filt0, filt1; v8i16 filt, out10, out32; v16u8 out; src -= src_stride; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r); src2110 = (v16i8)__msa_ilvr_d((v2i64)src21_r, (v2i64)src10_r); src2110 = (v16i8)__msa_xori_b((v16u8)src2110, 128); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB3(src, src_stride, src3, src4, src5); src += (3 * src_stride); ILVR_B2_SB(src3, src2, src4, src3, src32_r, src43_r); src4332 = (v16i8)__msa_ilvr_d((v2i64)src43_r, (v2i64)src32_r); src4332 = (v16i8)__msa_xori_b((v16u8)src4332, 128); out10 = FILT_4TAP_DPADD_S_H(src2110, src4332, filt0, filt1); src2 = LD_SB(src); src += (src_stride); ILVR_B2_SB(src5, src4, src2, src5, src54_r, src65_r); src2110 = (v16i8)__msa_ilvr_d((v2i64)src65_r, (v2i64)src54_r); src2110 = (v16i8)__msa_xori_b((v16u8)src2110, 128); out32 = FILT_4TAP_DPADD_S_H(src4332, src2110, filt0, filt1); SRARI_H2_SH(out10, out32, VP8_FILTER_SHIFT); SAT_SH2_SH(out10, out32, 7); out = PCKEV_XORI128_UB(out10, out32); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); } } static void common_vt_4t_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src7, src8, src9, src10; v16i8 src10_r, src72_r, src98_r, src21_r, src87_r, src109_r, filt0, filt1; v16u8 tmp0, tmp1; v8i16 filt, out0_r, out1_r, out2_r, out3_r; src -= src_stride; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); src += (4 * src_stride); XORI_B4_128_SB(src7, src8, src9, src10); ILVR_B4_SB(src7, src2, src8, src7, src9, src8, src10, src9, src72_r, src87_r, src98_r, src109_r); out0_r = FILT_4TAP_DPADD_S_H(src10_r, src72_r, filt0, filt1); out1_r = FILT_4TAP_DPADD_S_H(src21_r, src87_r, filt0, filt1); out2_r = FILT_4TAP_DPADD_S_H(src72_r, src98_r, filt0, filt1); out3_r = FILT_4TAP_DPADD_S_H(src87_r, src109_r, filt0, filt1); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, VP8_FILTER_SHIFT); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); tmp0 = PCKEV_XORI128_UB(out0_r, out1_r); tmp1 = PCKEV_XORI128_UB(out2_r, out3_r); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); src10_r = src98_r; src21_r = src109_r; src2 = src10; } } static void common_vt_4t_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6; v16i8 src10_r, src32_r, src54_r, src21_r, src43_r, src65_r, src10_l; v16i8 src32_l, src54_l, src21_l, src43_l, src65_l, filt0, filt1; v16u8 tmp0, tmp1, tmp2, tmp3; v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l; src -= src_stride; filt = LD_SH(filter); SPLATI_H2_SB(filt, 0, 1, filt0, filt1); LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); ILVR_B2_SB(src1, src0, src2, src1, src10_r, src21_r); ILVL_B2_SB(src1, src0, src2, src1, src10_l, src21_l); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src3, src4, src5, src6); src += (4 * src_stride); XORI_B4_128_SB(src3, src4, src5, src6); ILVR_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5, src32_r, src43_r, src54_r, src65_r); ILVL_B4_SB(src3, src2, src4, src3, src5, src4, src6, src5, src32_l, src43_l, src54_l, src65_l); out0_r = FILT_4TAP_DPADD_S_H(src10_r, src32_r, filt0, filt1); out1_r = FILT_4TAP_DPADD_S_H(src21_r, src43_r, filt0, filt1); out2_r = FILT_4TAP_DPADD_S_H(src32_r, src54_r, filt0, filt1); out3_r = FILT_4TAP_DPADD_S_H(src43_r, src65_r, filt0, filt1); out0_l = FILT_4TAP_DPADD_S_H(src10_l, src32_l, filt0, filt1); out1_l = FILT_4TAP_DPADD_S_H(src21_l, src43_l, filt0, filt1); out2_l = FILT_4TAP_DPADD_S_H(src32_l, src54_l, filt0, filt1); out3_l = FILT_4TAP_DPADD_S_H(src43_l, src65_l, filt0, filt1); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, VP8_FILTER_SHIFT); SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, VP8_FILTER_SHIFT); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7); PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r, tmp0, tmp1, tmp2, tmp3); XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3); ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride); dst += (4 * dst_stride); src10_r = src54_r; src21_r = src65_r; src10_l = src54_l; src21_l = src65_l; src2 = src6; } } static void common_hv_4ht_4vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, filt_hz0, filt_hz1; v16u8 mask0, mask1, out; v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, vec0, vec1, vec2; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5; mask0 = LD_UB(&vp8_mc_filt_mask_arr[16]); src -= (1 + 1 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1); mask1 = mask0 + 2; LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1); hz_out1 = HORIZ_4TAP_FILT(src1, src2, mask0, mask1, filt_hz0, filt_hz1); vec0 = (v8i16)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); filt = LD_SH(filter_vert); SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src3, src4, src5, src6); src += (4 * src_stride); XORI_B2_128_SB(src3, src4); hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1); hz_out2 = (v8i16)__msa_sldi_b((v16i8)hz_out3, (v16i8)hz_out1, 8); vec1 = (v8i16)__msa_ilvev_b((v16i8)hz_out3, (v16i8)hz_out2); tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1); XORI_B2_128_SB(src5, src6); hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1); hz_out4 = (v8i16)__msa_sldi_b((v16i8)hz_out5, (v16i8)hz_out3, 8); vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp1 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1); SRARI_H2_SH(tmp0, tmp1, 7); SAT_SH2_SH(tmp0, tmp1, 7); out = PCKEV_XORI128_UB(tmp0, tmp1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); hz_out1 = hz_out5; vec0 = vec2; } } static void common_hv_4ht_4vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, filt_hz0, filt_hz1; v16u8 mask0, mask1, out0, out1; v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, tmp2, tmp3; v8i16 hz_out0, hz_out1, hz_out2, hz_out3; v8i16 vec0, vec1, vec2, vec3, vec4; mask0 = LD_UB(&vp8_mc_filt_mask_arr[0]); src -= (1 + 1 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1); mask1 = mask0 + 2; LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1); hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1); hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1); ILVEV_B2_SH(hz_out0, hz_out1, hz_out1, hz_out2, vec0, vec2); filt = LD_SH(filter_vert); SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src3, src4, src5, src6); src += (4 * src_stride); XORI_B4_128_SB(src3, src4, src5, src6); hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1); vec1 = (v8i16)__msa_ilvev_b((v16i8)hz_out3, (v16i8)hz_out2); tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1); hz_out0 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1); vec3 = (v8i16)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out3); tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt_vt0, filt_vt1); hz_out1 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1); vec4 = (v8i16)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec4, filt_vt0, filt_vt1); hz_out2 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1); ILVEV_B2_SH(hz_out3, hz_out0, hz_out1, hz_out2, vec0, vec1); tmp3 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); vec0 = vec4; vec2 = vec1; } } static void common_hv_4ht_4vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_4ht_4vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_6ht_4vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6; v16i8 filt_hz0, filt_hz1, filt_hz2; v16u8 res0, res1, mask0, mask1, mask2; v8i16 filt, filt_vt0, filt_vt1, tmp0, tmp1, vec0, vec1, vec2; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5; mask0 = LD_UB(&vp8_mc_filt_mask_arr[16]); src -= (2 + 1 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); hz_out0 = HORIZ_6TAP_FILT(src0, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out1 = HORIZ_6TAP_FILT(src1, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec0 = (v8i16)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); filt = LD_SH(filter_vert); SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src3, src4, src5, src6); src += (4 * src_stride); XORI_B4_128_SB(src3, src4, src5, src6); hz_out3 = HORIZ_6TAP_FILT(src3, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out2 = (v8i16)__msa_sldi_b((v16i8)hz_out3, (v16i8)hz_out1, 8); vec1 = (v8i16)__msa_ilvev_b((v16i8)hz_out3, (v16i8)hz_out2); tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1); hz_out5 = HORIZ_6TAP_FILT(src5, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out4 = (v8i16)__msa_sldi_b((v16i8)hz_out5, (v16i8)hz_out3, 8); vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp1 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1); SRARI_H2_SH(tmp0, tmp1, 7); SAT_SH2_SH(tmp0, tmp1, 7); PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1); XORI_B2_128_UB(res0, res1); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); dst += (4 * dst_stride); hz_out1 = hz_out5; vec0 = vec2; } } static void common_hv_6ht_4vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6; v16i8 filt_hz0, filt_hz1, filt_hz2, mask0, mask1, mask2; v8i16 filt, filt_vt0, filt_vt1, hz_out0, hz_out1, hz_out2, hz_out3; v8i16 tmp0, tmp1, tmp2, tmp3, vec0, vec1, vec2, vec3; v16u8 out0, out1; mask0 = LD_SB(&vp8_mc_filt_mask_arr[0]); src -= (2 + src_stride); filt = LD_SH(filter_horiz); SPLATI_H3_SB(filt, 0, 1, 2, filt_hz0, filt_hz1, filt_hz2); mask1 = mask0 + 2; mask2 = mask0 + 4; LD_SB3(src, src_stride, src0, src1, src2); src += (3 * src_stride); XORI_B3_128_SB(src0, src1, src2); hz_out0 = HORIZ_6TAP_FILT(src0, src0, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out1 = HORIZ_6TAP_FILT(src1, src1, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); hz_out2 = HORIZ_6TAP_FILT(src2, src2, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); ILVEV_B2_SH(hz_out0, hz_out1, hz_out1, hz_out2, vec0, vec2); filt = LD_SH(filter_vert); SPLATI_H2_SH(filt, 0, 1, filt_vt0, filt_vt1); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src3, src4, src5, src6); src += (4 * src_stride); XORI_B4_128_SB(src3, src4, src5, src6); hz_out3 = HORIZ_6TAP_FILT(src3, src3, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec1 = (v8i16)__msa_ilvev_b((v16i8)hz_out3, (v16i8)hz_out2); tmp0 = FILT_4TAP_DPADD_S_H(vec0, vec1, filt_vt0, filt_vt1); hz_out0 = HORIZ_6TAP_FILT(src4, src4, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec3 = (v8i16)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out3); tmp1 = FILT_4TAP_DPADD_S_H(vec2, vec3, filt_vt0, filt_vt1); hz_out1 = HORIZ_6TAP_FILT(src5, src5, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); vec0 = (v8i16)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = FILT_4TAP_DPADD_S_H(vec1, vec0, filt_vt0, filt_vt1); hz_out2 = HORIZ_6TAP_FILT(src6, src6, mask0, mask1, mask2, filt_hz0, filt_hz1, filt_hz2); ILVEV_B2_SH(hz_out3, hz_out0, hz_out1, hz_out2, vec1, vec2); tmp3 = FILT_4TAP_DPADD_S_H(vec1, vec2, filt_vt0, filt_vt1); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); out0 = PCKEV_XORI128_UB(tmp0, tmp1); out1 = PCKEV_XORI128_UB(tmp2, tmp3); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hv_6ht_4vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_6ht_4vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_4ht_6vt_4w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 filt_hz0, filt_hz1, mask0, mask1; v16u8 out; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, tmp0, tmp1, out0, out1, out2, out3; v8i16 filt, filt_vt0, filt_vt1, filt_vt2; mask0 = LD_SB(&vp8_mc_filt_mask_arr[16]); src -= (1 + 2 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1); mask1 = mask0 + 2; LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); hz_out0 = HORIZ_4TAP_FILT(src0, src1, mask0, mask1, filt_hz0, filt_hz1); hz_out2 = HORIZ_4TAP_FILT(src2, src3, mask0, mask1, filt_hz0, filt_hz1); hz_out3 = HORIZ_4TAP_FILT(src3, src4, mask0, mask1, filt_hz0, filt_hz1); hz_out1 = (v8i16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); filt = LD_SH(filter_vert); SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src5, src6, src7, src8); XORI_B4_128_SB(src5, src6, src7, src8); src += (4 * src_stride); hz_out5 = HORIZ_4TAP_FILT(src5, src6, mask0, mask1, filt_hz0, filt_hz1); hz_out4 = (v8i16)__msa_sldi_b((v16i8)hz_out5, (v16i8)hz_out3, 8); out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); hz_out7 = HORIZ_4TAP_FILT(src7, src8, mask0, mask1, filt_hz0, filt_hz1); hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8); out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp1 = DPADD_SH3_SH(out1, out2, out3, filt_vt0, filt_vt1, filt_vt2); SRARI_H2_SH(tmp0, tmp1, 7); SAT_SH2_SH(tmp0, tmp1, 7); out = PCKEV_XORI128_UB(tmp0, tmp1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); hz_out3 = hz_out7; out0 = out2; out1 = out3; } } static void common_hv_4ht_6vt_8w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 filt_hz0, filt_hz1, mask0, mask1; v8i16 filt, filt_vt0, filt_vt1, filt_vt2, tmp0, tmp1, tmp2, tmp3; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, out0, out1, out2, out3, out4, out5, out6, out7; v16u8 vec0, vec1; mask0 = LD_SB(&vp8_mc_filt_mask_arr[0]); src -= (1 + 2 * src_stride); filt = LD_SH(filter_horiz); SPLATI_H2_SB(filt, 0, 1, filt_hz0, filt_hz1); mask1 = mask0 + 2; LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); XORI_B5_128_SB(src0, src1, src2, src3, src4); hz_out0 = HORIZ_4TAP_FILT(src0, src0, mask0, mask1, filt_hz0, filt_hz1); hz_out1 = HORIZ_4TAP_FILT(src1, src1, mask0, mask1, filt_hz0, filt_hz1); hz_out2 = HORIZ_4TAP_FILT(src2, src2, mask0, mask1, filt_hz0, filt_hz1); hz_out3 = HORIZ_4TAP_FILT(src3, src3, mask0, mask1, filt_hz0, filt_hz1); hz_out4 = HORIZ_4TAP_FILT(src4, src4, mask0, mask1, filt_hz0, filt_hz1); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); ILVEV_B2_SH(hz_out1, hz_out2, hz_out3, hz_out4, out3, out4); filt = LD_SH(filter_vert); SPLATI_H3_SH(filt, 0, 1, 2, filt_vt0, filt_vt1, filt_vt2); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src5, src6, src7, src8); src += (4 * src_stride); XORI_B4_128_SB(src5, src6, src7, src8); hz_out5 = HORIZ_4TAP_FILT(src5, src5, mask0, mask1, filt_hz0, filt_hz1); out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); tmp0 = DPADD_SH3_SH(out0, out1, out2, filt_vt0, filt_vt1, filt_vt2); hz_out6 = HORIZ_4TAP_FILT(src6, src6, mask0, mask1, filt_hz0, filt_hz1); out5 = (v8i16)__msa_ilvev_b((v16i8)hz_out6, (v16i8)hz_out5); tmp1 = DPADD_SH3_SH(out3, out4, out5, filt_vt0, filt_vt1, filt_vt2); hz_out7 = HORIZ_4TAP_FILT(src7, src7, mask0, mask1, filt_hz0, filt_hz1); out6 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp2 = DPADD_SH3_SH(out1, out2, out6, filt_vt0, filt_vt1, filt_vt2); hz_out8 = HORIZ_4TAP_FILT(src8, src8, mask0, mask1, filt_hz0, filt_hz1); out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7); tmp3 = DPADD_SH3_SH(out4, out5, out7, filt_vt0, filt_vt1, filt_vt2); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, 7); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); vec0 = PCKEV_XORI128_UB(tmp0, tmp1); vec1 = PCKEV_XORI128_UB(tmp2, tmp3); ST8x4_UB(vec0, vec1, dst, dst_stride); dst += (4 * dst_stride); hz_out4 = hz_out8; out0 = out2; out1 = out6; out3 = out5; out4 = out7; } } static void common_hv_4ht_6vt_16w_msa(uint8_t *RESTRICT src, int32_t src_stride, uint8_t *RESTRICT dst, int32_t dst_stride, const int8_t *filter_horiz, const int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_4ht_6vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } void vp8_sixtap_predict4x4_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_subpel_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_subpel_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { switch (xoffset) { case 2: case 4: case 6: switch (yoffset) { case 2: case 4: case 6: common_hv_6ht_6vt_4w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 4); break; case 1: case 3: case 5: case 7: common_hv_6ht_4vt_4w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter + 1, 4); break; } break; case 1: case 3: case 5: case 7: switch (yoffset) { case 2: case 4: case 6: common_hv_4ht_6vt_4w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter, 4); break; case 1: case 3: case 5: case 7: common_hv_4ht_4vt_4w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter + 1, 4); break; } break; } } else { switch (yoffset) { case 2: case 4: case 6: common_vt_6t_4w_msa(src, src_stride, dst, dst_stride, v_filter, 4); break; case 1: case 3: case 5: case 7: common_vt_4t_4w_msa(src, src_stride, dst, dst_stride, v_filter + 1, 4); break; } } } else { switch (xoffset) { case 0: { uint32_t tp0, tp1, tp2, tp3; LW4(src, src_stride, tp0, tp1, tp2, tp3); SW4(tp0, tp1, tp2, tp3, dst, dst_stride); break; } case 2: case 4: case 6: common_hz_6t_4w_msa(src, src_stride, dst, dst_stride, h_filter, 4); break; case 1: case 3: case 5: case 7: common_hz_4t_4w_msa(src, src_stride, dst, dst_stride, h_filter + 1, 4); break; } } } void vp8_sixtap_predict8x4_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_subpel_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_subpel_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { switch (xoffset) { case 2: case 4: case 6: switch (yoffset) { case 2: case 4: case 6: common_hv_6ht_6vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 4); break; case 1: case 3: case 5: case 7: common_hv_6ht_4vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter + 1, 4); break; } break; case 1: case 3: case 5: case 7: switch (yoffset) { case 2: case 4: case 6: common_hv_4ht_6vt_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter, 4); break; case 1: case 3: case 5: case 7: common_hv_4ht_4vt_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter + 1, 4); break; } break; } } else { switch (yoffset) { case 2: case 4: case 6: common_vt_6t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 4); break; case 1: case 3: case 5: case 7: common_vt_4t_8w_msa(src, src_stride, dst, dst_stride, v_filter + 1, 4); break; } } } else { switch (xoffset) { case 0: vp8_copy_mem8x4(src, src_stride, dst, dst_stride); break; case 2: case 4: case 6: common_hz_6t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 4); break; case 1: case 3: case 5: case 7: common_hz_4t_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, 4); break; } } } void vp8_sixtap_predict8x8_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_subpel_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_subpel_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { switch (xoffset) { case 2: case 4: case 6: switch (yoffset) { case 2: case 4: case 6: common_hv_6ht_6vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 8); break; case 1: case 3: case 5: case 7: common_hv_6ht_4vt_8w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter + 1, 8); break; } break; case 1: case 3: case 5: case 7: switch (yoffset) { case 2: case 4: case 6: common_hv_4ht_6vt_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter, 8); break; case 1: case 3: case 5: case 7: common_hv_4ht_4vt_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter + 1, 8); break; } break; } } else { switch (yoffset) { case 2: case 4: case 6: common_vt_6t_8w_msa(src, src_stride, dst, dst_stride, v_filter, 8); break; case 1: case 3: case 5: case 7: common_vt_4t_8w_msa(src, src_stride, dst, dst_stride, v_filter + 1, 8); break; } } } else { switch (xoffset) { case 0: vp8_copy_mem8x8(src, src_stride, dst, dst_stride); break; case 2: case 4: case 6: common_hz_6t_8w_msa(src, src_stride, dst, dst_stride, h_filter, 8); break; case 1: case 3: case 5: case 7: common_hz_4t_8w_msa(src, src_stride, dst, dst_stride, h_filter + 1, 8); break; } } } void vp8_sixtap_predict16x16_msa(uint8_t *RESTRICT src, int32_t src_stride, int32_t xoffset, int32_t yoffset, uint8_t *RESTRICT dst, int32_t dst_stride) { const int8_t *h_filter = vp8_subpel_filters_msa[xoffset - 1]; const int8_t *v_filter = vp8_subpel_filters_msa[yoffset - 1]; if (yoffset) { if (xoffset) { switch (xoffset) { case 2: case 4: case 6: switch (yoffset) { case 2: case 4: case 6: common_hv_6ht_6vt_16w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter, 16); break; case 1: case 3: case 5: case 7: common_hv_6ht_4vt_16w_msa(src, src_stride, dst, dst_stride, h_filter, v_filter + 1, 16); break; } break; case 1: case 3: case 5: case 7: switch (yoffset) { case 2: case 4: case 6: common_hv_4ht_6vt_16w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter, 16); break; case 1: case 3: case 5: case 7: common_hv_4ht_4vt_16w_msa(src, src_stride, dst, dst_stride, h_filter + 1, v_filter + 1, 16); break; } break; } } else { switch (yoffset) { case 2: case 4: case 6: common_vt_6t_16w_msa(src, src_stride, dst, dst_stride, v_filter, 16); break; case 1: case 3: case 5: case 7: common_vt_4t_16w_msa(src, src_stride, dst, dst_stride, v_filter + 1, 16); break; } } } else { switch (xoffset) { case 0: vp8_copy_mem16x16(src, src_stride, dst, dst_stride); break; case 2: case 4: case 6: common_hz_6t_16w_msa(src, src_stride, dst, dst_stride, h_filter, 16); break; case 1: case 3: case 5: case 7: common_hz_4t_16w_msa(src, src_stride, dst, dst_stride, h_filter + 1, 16); break; } } } libvpx-1.8.2/vp8/common/mips/msa/vp8_macros_msa.h000066400000000000000000002461231357355204000216710ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ #define VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #define LD_B(RTYPE, psrc) *((const RTYPE *)(psrc)) #define LD_UB(...) LD_B(v16u8, __VA_ARGS__) #define LD_SB(...) LD_B(v16i8, __VA_ARGS__) #define LD_H(RTYPE, psrc) *((const RTYPE *)(psrc)) #define LD_UH(...) LD_H(v8u16, __VA_ARGS__) #define LD_SH(...) LD_H(v8i16, __VA_ARGS__) #define LD_W(RTYPE, psrc) *((const RTYPE *)(psrc)) #define LD_UW(...) LD_W(v4u32, __VA_ARGS__) #define LD_SW(...) LD_W(v4i32, __VA_ARGS__) #define ST_B(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) #define ST_UB(...) ST_B(v16u8, __VA_ARGS__) #define ST_SB(...) ST_B(v16i8, __VA_ARGS__) #define ST_H(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) #define ST_UH(...) ST_H(v8u16, __VA_ARGS__) #define ST_SH(...) ST_H(v8i16, __VA_ARGS__) #define ST_W(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) #define ST_SW(...) ST_W(v4i32, __VA_ARGS__) #if (__mips_isa_rev >= 6) #define LW(psrc) \ ({ \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \ uint32_t val_m; \ \ asm volatile("lw %[val_m], %[psrc_m] \n\t" \ \ : [val_m] "=r"(val_m) \ : [psrc_m] "m"(*psrc_m)); \ \ val_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \ uint64_t val_m = 0; \ \ asm volatile("ld %[val_m], %[psrc_m] \n\t" \ \ : [val_m] "=r"(val_m) \ : [psrc_m] "m"(*psrc_m)); \ \ val_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ val0_m = LW(psrc_m); \ val1_m = LW(psrc_m + 4); \ \ val_m = (uint64_t)(val1_m); \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); \ \ val_m; \ }) #endif // (__mips == 64) #define SH(val, pdst) \ { \ uint8_t *pdst_m = (uint8_t *)(pdst); \ const uint16_t val_m = (val); \ \ asm volatile("sh %[val_m], %[pdst_m] \n\t" \ \ : [pdst_m] "=m"(*pdst_m) \ : [val_m] "r"(val_m)); \ } #define SW(val, pdst) \ { \ uint8_t *pdst_m = (uint8_t *)(pdst); \ const uint32_t val_m = (val); \ \ asm volatile("sw %[val_m], %[pdst_m] \n\t" \ \ : [pdst_m] "=m"(*pdst_m) \ : [val_m] "r"(val_m)); \ } #define SD(val, pdst) \ { \ uint8_t *pdst_m = (uint8_t *)(pdst); \ const uint64_t val_m = (val); \ \ asm volatile("sd %[val_m], %[pdst_m] \n\t" \ \ : [pdst_m] "=m"(*pdst_m) \ : [val_m] "r"(val_m)); \ } #else // !(__mips_isa_rev >= 6) #define LW(psrc) \ ({ \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \ uint32_t val_m; \ \ asm volatile("ulw %[val_m], %[psrc_m] \n\t" \ \ : [val_m] "=r"(val_m) \ : [psrc_m] "m"(*psrc_m)); \ \ val_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_m = (const uint8_t *)(psrc); \ uint64_t val_m = 0; \ \ asm volatile("uld %[val_m], %[psrc_m] \n\t" \ \ : [val_m] "=r"(val_m) \ : [psrc_m] "m"(*psrc_m)); \ \ val_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_m1 = (const uint8_t *)(psrc); \ uint32_t val0_m, val1_m; \ uint64_t val_m = 0; \ \ val0_m = LW(psrc_m1); \ val1_m = LW(psrc_m1 + 4); \ \ val_m = (uint64_t)(val1_m); \ val_m = (uint64_t)((val_m << 32) & 0xFFFFFFFF00000000); \ val_m = (uint64_t)(val_m | (uint64_t)val0_m); \ \ val_m; \ }) #endif // (__mips == 64) #define SH(val, pdst) \ { \ uint8_t *pdst_m = (uint8_t *)(pdst); \ const uint16_t val_m = (val); \ \ asm volatile("ush %[val_m], %[pdst_m] \n\t" \ \ : [pdst_m] "=m"(*pdst_m) \ : [val_m] "r"(val_m)); \ } #define SW(val, pdst) \ { \ uint8_t *pdst_m = (uint8_t *)(pdst); \ const uint32_t val_m = (val); \ \ asm volatile("usw %[val_m], %[pdst_m] \n\t" \ \ : [pdst_m] "=m"(*pdst_m) \ : [val_m] "r"(val_m)); \ } #define SD(val, pdst) \ { \ uint8_t *pdst_m1 = (uint8_t *)(pdst); \ uint32_t val0_m, val1_m; \ \ val0_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ val1_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ \ SW(val0_m, pdst_m1); \ SW(val1_m, pdst_m1 + 4); \ } #endif // (__mips_isa_rev >= 6) /* Description : Load 4 words with stride Arguments : Inputs - psrc, stride Outputs - out0, out1, out2, out3 Details : Load word in 'out0' from (psrc) Load word in 'out1' from (psrc + stride) Load word in 'out2' from (psrc + 2 * stride) Load word in 'out3' from (psrc + 3 * stride) */ #define LW4(psrc, stride, out0, out1, out2, out3) \ { \ out0 = LW((psrc)); \ out1 = LW((psrc) + stride); \ out2 = LW((psrc) + 2 * stride); \ out3 = LW((psrc) + 3 * stride); \ } /* Description : Load double words with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Details : Load double word in 'out0' from (psrc) Load double word in 'out1' from (psrc + stride) */ #define LD2(psrc, stride, out0, out1) \ { \ out0 = LD((psrc)); \ out1 = LD((psrc) + stride); \ } #define LD4(psrc, stride, out0, out1, out2, out3) \ { \ LD2((psrc), stride, out0, out1); \ LD2((psrc) + 2 * stride, stride, out2, out3); \ } /* Description : Store 4 words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store word from 'in0' to (pdst) Store word from 'in1' to (pdst + stride) Store word from 'in2' to (pdst + 2 * stride) Store word from 'in3' to (pdst + 3 * stride) */ #define SW4(in0, in1, in2, in3, pdst, stride) \ { \ SW(in0, (pdst)); \ SW(in1, (pdst) + stride); \ SW(in2, (pdst) + 2 * stride); \ SW(in3, (pdst) + 3 * stride); \ } /* Description : Store 4 double words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store double word from 'in0' to (pdst) Store double word from 'in1' to (pdst + stride) Store double word from 'in2' to (pdst + 2 * stride) Store double word from 'in3' to (pdst + 3 * stride) */ #define SD4(in0, in1, in2, in3, pdst, stride) \ { \ SD(in0, (pdst)); \ SD(in1, (pdst) + stride); \ SD(in2, (pdst) + 2 * stride); \ SD(in3, (pdst) + 3 * stride); \ } /* Description : Load vectors with 16 byte elements with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Return Type - as per RTYPE Details : Load 16 byte elements in 'out0' from (psrc) Load 16 byte elements in 'out1' from (psrc + stride) */ #define LD_B2(RTYPE, psrc, stride, out0, out1) \ { \ out0 = LD_B(RTYPE, (psrc)); \ out1 = LD_B(RTYPE, (psrc) + stride); \ } #define LD_UB2(...) LD_B2(v16u8, __VA_ARGS__) #define LD_SB2(...) LD_B2(v16i8, __VA_ARGS__) #define LD_B3(RTYPE, psrc, stride, out0, out1, out2) \ { \ LD_B2(RTYPE, (psrc), stride, out0, out1); \ out2 = LD_B(RTYPE, (psrc) + 2 * stride); \ } #define LD_UB3(...) LD_B3(v16u8, __VA_ARGS__) #define LD_SB3(...) LD_B3(v16i8, __VA_ARGS__) #define LD_B4(RTYPE, psrc, stride, out0, out1, out2, out3) \ { \ LD_B2(RTYPE, (psrc), stride, out0, out1); \ LD_B2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ } #define LD_UB4(...) LD_B4(v16u8, __VA_ARGS__) #define LD_SB4(...) LD_B4(v16i8, __VA_ARGS__) #define LD_B5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ { \ LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ out4 = LD_B(RTYPE, (psrc) + 4 * stride); \ } #define LD_UB5(...) LD_B5(v16u8, __VA_ARGS__) #define LD_SB5(...) LD_B5(v16i8, __VA_ARGS__) #define LD_B8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \ out7) \ { \ LD_B4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ LD_B4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ } #define LD_UB8(...) LD_B8(v16u8, __VA_ARGS__) #define LD_SB8(...) LD_B8(v16i8, __VA_ARGS__) /* Description : Load vectors with 8 halfword elements with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Details : Load 8 halfword elements in 'out0' from (psrc) Load 8 halfword elements in 'out1' from (psrc + stride) */ #define LD_H2(RTYPE, psrc, stride, out0, out1) \ { \ out0 = LD_H(RTYPE, (psrc)); \ out1 = LD_H(RTYPE, (psrc) + (stride)); \ } #define LD_SH2(...) LD_H2(v8i16, __VA_ARGS__) #define LD_H4(RTYPE, psrc, stride, out0, out1, out2, out3) \ { \ LD_H2(RTYPE, (psrc), stride, out0, out1); \ LD_H2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ } #define LD_SH4(...) LD_H4(v8i16, __VA_ARGS__) /* Description : Load 2 vectors of signed word elements with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Return Type - signed word */ #define LD_SW2(psrc, stride, out0, out1) \ { \ out0 = LD_SW((psrc)); \ out1 = LD_SW((psrc) + stride); \ } /* Description : Store vectors of 16 byte elements with stride Arguments : Inputs - in0, in1, pdst, stride Details : Store 16 byte elements from 'in0' to (pdst) Store 16 byte elements from 'in1' to (pdst + stride) */ #define ST_B2(RTYPE, in0, in1, pdst, stride) \ { \ ST_B(RTYPE, in0, (pdst)); \ ST_B(RTYPE, in1, (pdst) + stride); \ } #define ST_UB2(...) ST_B2(v16u8, __VA_ARGS__) #define ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride) \ { \ ST_B2(RTYPE, in0, in1, (pdst), stride); \ ST_B2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ } #define ST_UB4(...) ST_B4(v16u8, __VA_ARGS__) #define ST_SB4(...) ST_B4(v16i8, __VA_ARGS__) #define ST_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ { \ ST_B4(RTYPE, in0, in1, in2, in3, pdst, stride); \ ST_B4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \ } #define ST_UB8(...) ST_B8(v16u8, __VA_ARGS__) /* Description : Store vectors of 8 halfword elements with stride Arguments : Inputs - in0, in1, pdst, stride Details : Store 8 halfword elements from 'in0' to (pdst) Store 8 halfword elements from 'in1' to (pdst + stride) */ #define ST_H2(RTYPE, in0, in1, pdst, stride) \ { \ ST_H(RTYPE, in0, (pdst)); \ ST_H(RTYPE, in1, (pdst) + stride); \ } #define ST_SH2(...) ST_H2(v8i16, __VA_ARGS__) /* Description : Store vectors of word elements with stride Arguments : Inputs - in0, in1, pdst, stride Details : Store 4 word elements from 'in0' to (pdst) Store 4 word elements from 'in1' to (pdst + stride) */ #define ST_SW2(in0, in1, pdst, stride) \ { \ ST_SW(in0, (pdst)); \ ST_SW(in1, (pdst) + stride); \ } /* Description : Store 2x4 byte block to destination memory from input vector Arguments : Inputs - in, stidx, pdst, stride Details : Index 'stidx' halfword element from 'in' vector is copied to the GP register and stored to (pdst) Index 'stidx+1' halfword element from 'in' vector is copied to the GP register and stored to (pdst + stride) Index 'stidx+2' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 'stidx+3' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST2x4_UB(in, stidx, pdst, stride) \ { \ uint16_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_2x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \ out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \ out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \ out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \ \ SH(out0_m, pblk_2x4_m); \ SH(out1_m, pblk_2x4_m + stride); \ SH(out2_m, pblk_2x4_m + 2 * stride); \ SH(out3_m, pblk_2x4_m + 3 * stride); \ } /* Description : Store 4x4 byte block to destination memory from input vector Arguments : Inputs - in0, in1, pdst, stride Details : 'Idx0' word element from input vector 'in0' is copied to the GP register and stored to (pdst) 'Idx1' word element from input vector 'in0' is copied to the GP register and stored to (pdst + stride) 'Idx2' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 2 * stride) 'Idx3' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ { \ uint32_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_4x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_w((v4i32)in0, idx0); \ out1_m = __msa_copy_u_w((v4i32)in0, idx1); \ out2_m = __msa_copy_u_w((v4i32)in1, idx2); \ out3_m = __msa_copy_u_w((v4i32)in1, idx3); \ \ SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \ } #define ST4x8_UB(in0, in1, pdst, stride) \ { \ uint8_t *pblk_4x8 = (uint8_t *)(pdst); \ \ ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride); \ ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \ } /* Description : Store 8x1 byte block to destination memory from input vector Arguments : Inputs - in, pdst Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst) */ #define ST8x1_UB(in, pdst) \ { \ uint64_t out0_m; \ \ out0_m = __msa_copy_u_d((v2i64)in, 0); \ SD(out0_m, pdst); \ } /* Description : Store 8x2 byte block to destination memory from input vector Arguments : Inputs - in, pdst, stride Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in' vector is copied to the GP register and stored to (pdst + stride) */ #define ST8x2_UB(in, pdst, stride) \ { \ uint64_t out0_m, out1_m; \ uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_d((v2i64)in, 0); \ out1_m = __msa_copy_u_d((v2i64)in, 1); \ \ SD(out0_m, pblk_8x2_m); \ SD(out1_m, pblk_8x2_m + stride); \ } /* Description : Store 8x4 byte block to destination memory from input vectors Arguments : Inputs - in0, in1, pdst, stride Details : Index 0 double word element from 'in0' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in0' vector is copied to the GP register and stored to (pdst + stride) Index 0 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 1 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST8x4_UB(in0, in1, pdst, stride) \ { \ uint64_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_d((v2i64)in0, 0); \ out1_m = __msa_copy_u_d((v2i64)in0, 1); \ out2_m = __msa_copy_u_d((v2i64)in1, 0); \ out3_m = __msa_copy_u_d((v2i64)in1, 1); \ \ SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \ } /* Description : Immediate number of elements to slide with zero Arguments : Inputs - in0, in1, slide_val Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'zero_m' vector are slid into 'in0' by value specified in the 'slide_val' */ #define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) \ { \ v16i8 zero_m = { 0 }; \ \ out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \ out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \ } #define SLDI_B2_0_UB(...) SLDI_B2_0(v16u8, __VA_ARGS__) /* Description : Immediate number of elements to slide Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by value specified in the 'slide_val' */ #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \ { \ out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \ out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \ } #define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, out0, out1, \ out2, slide_val) \ { \ SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val); \ out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \ } #define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__) /* Description : Shuffle byte vector elements as per mask vector Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'in0' & 'in1' are copied selectively to 'out0' as per control vector 'mask0' */ #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ { \ out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ } #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) #define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__) #define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__) #define VSHF_B3(RTYPE, in0, in1, in2, in3, in4, in5, mask0, mask1, mask2, \ out0, out1, out2) \ { \ VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1); \ out2 = (RTYPE)__msa_vshf_b((v16i8)mask2, (v16i8)in5, (v16i8)in4); \ } #define VSHF_B3_SB(...) VSHF_B3(v16i8, __VA_ARGS__) /* Description : Shuffle halfword vector elements as per mask vector Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 Outputs - out0, out1 Return Type - as per RTYPE Details : halfword elements from 'in0' & 'in1' are copied selectively to 'out0' as per control vector 'mask0' */ #define VSHF_H2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ { \ out0 = (RTYPE)__msa_vshf_h((v8i16)mask0, (v8i16)in1, (v8i16)in0); \ out1 = (RTYPE)__msa_vshf_h((v8i16)mask1, (v8i16)in3, (v8i16)in2); \ } #define VSHF_H2_SH(...) VSHF_H2(v8i16, __VA_ARGS__) /* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Unsigned byte elements from 'mult0' are multiplied with unsigned byte elements from 'cnst0' producing a result twice the size of input i.e. unsigned halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \ out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \ } #define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__) #define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__) /* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \ out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1); \ } #define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__) #define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__) /* Description : Dot product of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \ out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \ } #define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__) /* Description : Dot product of word vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed word elements from 'mult0' are multiplied with signed word elements from 'cnst0' producing a result twice the size of input i.e. signed double word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \ out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \ } #define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__) /* Description : Dot product & addition of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \ out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \ } #define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__) #define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__) /* Description : Dot product & addition of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \ out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \ } #define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__) #define DPADD_SH4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DPADD_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DPADD_SH4_SW(...) DPADD_SH4(v4i32, __VA_ARGS__) /* Description : Dot product & addition of double word vector elements Arguments : Inputs - mult0, mult1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed word element from 'mult0' is multiplied with itself producing an intermediate result twice the size of it i.e. signed double word The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \ out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \ } #define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__) /* Description : Clips all signed halfword elements of input vector between 0 & 255 Arguments : Input - in Output - out_m Return Type - signed halfword */ #define CLIP_SH_0_255(in) \ ({ \ v8i16 max_m = __msa_ldi_h(255); \ v8i16 out_m; \ \ out_m = __msa_maxi_s_h((v8i16)in, 0); \ out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m); \ out_m; \ }) #define CLIP_SH2_0_255(in0, in1) \ { \ in0 = CLIP_SH_0_255(in0); \ in1 = CLIP_SH_0_255(in1); \ } #define CLIP_SH4_0_255(in0, in1, in2, in3) \ { \ CLIP_SH2_0_255(in0, in1); \ CLIP_SH2_0_255(in2, in3); \ } /* Description : Clips all signed word elements of input vector between 0 & 255 Arguments : Input - in Output - out_m Return Type - signed word */ #define CLIP_SW_0_255(in) \ ({ \ v4i32 max_m = __msa_ldi_w(255); \ v4i32 out_m; \ \ out_m = __msa_maxi_s_w((v4i32)in, 0); \ out_m = __msa_min_s_w((v4i32)max_m, (v4i32)out_m); \ out_m; \ }) /* Description : Horizontal addition of 4 signed word elements of input vector Arguments : Input - in (signed word vector) Output - sum_m (i32 sum) Return Type - signed word (GP) Details : 4 signed word elements of 'in' vector are added together and the resulting integer sum is returned */ #define HADD_SW_S32(in) \ ({ \ v2i64 res0_m, res1_m; \ int32_t sum_m; \ \ res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \ res1_m = __msa_splati_d(res0_m, 1); \ res0_m = res0_m + res1_m; \ sum_m = __msa_copy_s_w((v4i32)res0_m, 0); \ sum_m; \ }) /* Description : Horizontal addition of 8 unsigned halfword elements Arguments : Inputs - in (unsigned halfword vector) Outputs - sum_m (u32 sum) Return Type - unsigned word Details : 8 unsigned halfword elements of input vector are added together and the resulting integer sum is returned */ #define HADD_UH_U32(in) \ ({ \ v4u32 res_m; \ v2u64 res0_m, res1_m; \ uint32_t sum_m; \ \ res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \ res0_m = __msa_hadd_u_d(res_m, res_m); \ res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \ res0_m = res0_m + res1_m; \ sum_m = __msa_copy_u_w((v4i32)res0_m, 0); \ sum_m; \ }) /* Description : Horizontal addition of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is added to even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0' */ #define HADD_UB2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \ out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \ } #define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__) /* Description : Horizontal subtraction of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is subtracted from even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0' */ #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \ out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \ } #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__) /* Description : Horizontal subtraction of signed halfword vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed odd halfword element from 'in0' is subtracted from even signed halfword element from 'in0' (pairwise) and the word result is written to 'out0' */ #define HSUB_UH2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \ out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \ } #define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__) /* Description : Set element n input vector to GPR value Arguments : Inputs - in0, in1, in2, in3 Output - out Return Type - as per RTYPE Details : Set element 0 in vector 'out' to value specified in 'in0' */ #define INSERT_D2(RTYPE, in0, in1, out) \ { \ out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \ out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \ } #define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__) /* Description : Interleave even byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ } #define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__) #define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__) #define ILVEV_B2_SD(...) ILVEV_B2(v2i64, __VA_ARGS__) /* Description : Interleave even halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \ out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \ } #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) #define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__) /* Description : Interleave even word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even word elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \ out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \ } #define ILVEV_W2_SD(...) ILVEV_W2(v2i64, __VA_ARGS__) /* Description : Interleave even double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double word elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \ out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \ } #define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__) /* Description : Interleave left half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of byte elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \ } #define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__) #define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__) #define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__) #define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__) #define ILVL_B4_SH(...) ILVL_B4(v8i16, __VA_ARGS__) /* Description : Interleave left half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \ } #define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__) #define ILVL_H2_SW(...) ILVL_H2(v4i32, __VA_ARGS__) /* Description : Interleave left half of word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of word elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \ } #define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__) /* Description : Interleave right half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements of 'in0' and 'in1' are interleaved and written to out0. */ #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \ } #define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__) #define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__) #define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__) #define ILVR_B2_SW(...) ILVR_B2(v4i32, __VA_ARGS__) #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__) #define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__) #define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__) #define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__) #define ILVR_B4_SW(...) ILVR_B4(v4i32, __VA_ARGS__) /* Description : Interleave right half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \ } #define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__) #define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__) #define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__) #define ILVR_H4_SW(...) ILVR_H4(v4i32, __VA_ARGS__) #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \ } #define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__) /* Description : Interleave right half of double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of double word elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \ out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \ } #define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__) #define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__) #define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__) #define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__) #define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__) /* Description : Interleave both left and right half of input vectors Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements from 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ } #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) #define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \ } #define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__) #define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__) #define ILVRL_W2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \ } #define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__) #define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__) #define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__) /* Description : Maximum values between signed elements of vector and 5-bit signed immediate value are copied to the output vector Arguments : Inputs - in0, in1, in2, in3, max_val Outputs - in place operation Return Type - unsigned halfword Details : Maximum of signed halfword element values from 'in0' and 'max_val' are written in place */ #define MAXI_SH2(RTYPE, in0, in1, max_val) \ { \ in0 = (RTYPE)__msa_maxi_s_h((v8i16)in0, (max_val)); \ in1 = (RTYPE)__msa_maxi_s_h((v8i16)in1, (max_val)); \ } #define MAXI_SH2_SH(...) MAXI_SH2(v8i16, __VA_ARGS__) /* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range. The results are written in place */ #define SAT_UH2(RTYPE, in0, in1, sat_val) \ { \ in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \ in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \ } #define SAT_UH2_SH(...) SAT_UH2(v8i16, __VA_ARGS__) /* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range The results are written in place */ #define SAT_SH2(RTYPE, in0, in1, sat_val) \ { \ in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \ in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \ } #define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__) #define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \ { \ SAT_SH2(RTYPE, in0, in1, sat_val); \ SAT_SH2(RTYPE, in2, in3, sat_val); \ } #define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__) /* Description : Indexed halfword element values are replicated to all elements in output vector Arguments : Inputs - in, idx0, idx1 Outputs - out0, out1 Return Type - as per RTYPE Details : 'idx0' element value from 'in' vector is replicated to all elements in 'out0' vector Valid index range for halfword operation is 0-7 */ #define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ { \ out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0); \ out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1); \ } #define SPLATI_H2_SB(...) SPLATI_H2(v16i8, __VA_ARGS__) #define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__) #define SPLATI_H3(RTYPE, in, idx0, idx1, idx2, out0, out1, out2) \ { \ SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \ out2 = (RTYPE)__msa_splati_h((v8i16)in, idx2); \ } #define SPLATI_H3_SB(...) SPLATI_H3(v16i8, __VA_ARGS__) #define SPLATI_H3_SH(...) SPLATI_H3(v8i16, __VA_ARGS__) /* Description : Indexed word element values are replicated to all elements in output vector Arguments : Inputs - in, stidx Outputs - out0, out1 Return Type - as per RTYPE Details : 'stidx' element value from 'in' vector is replicated to all elements in 'out0' vector 'stidx + 1' element value from 'in' vector is replicated to all elements in 'out1' vector Valid index range for word operation is 0-3 */ #define SPLATI_W2(RTYPE, in, stidx, out0, out1) \ { \ out0 = (RTYPE)__msa_splati_w((v4i32)in, stidx); \ out1 = (RTYPE)__msa_splati_w((v4i32)in, (stidx + 1)); \ } #define SPLATI_W2_SW(...) SPLATI_W2(v4i32, __VA_ARGS__) /* Description : Pack even byte elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' are copied to the left half of 'out0' & even byte elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \ } #define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__) #define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__) #define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__) #define PCKEV_B2_SW(...) PCKEV_B2(v4i32, __VA_ARGS__) #define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__) #define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__) #define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__) /* Description : Pack even halfword elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' are copied to the left half of 'out0' & even halfword elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \ } #define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__) #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__) /* Description : Pack even double word elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double elements of 'in0' are copied to the left half of 'out0' & even double elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \ out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \ } #define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__) #define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__) /* Description : Pack odd double word elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Odd double word elements of 'in0' are copied to the left half of 'out0' & odd double word elements of 'in1' are copied to the right half of 'out0'. */ #define PCKOD_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckod_d((v2i64)in0, (v2i64)in1); \ out1 = (RTYPE)__msa_pckod_d((v2i64)in2, (v2i64)in3); \ } #define PCKOD_D2_UB(...) PCKOD_D2(v16u8, __VA_ARGS__) #define PCKOD_D2_SH(...) PCKOD_D2(v8i16, __VA_ARGS__) /* Description : Each byte element is logically xor'ed with immediate 128 Arguments : Inputs - in0, in1 Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned byte element from input vector 'in0' is logically xor'ed with 128 and the result is stored in-place. */ #define XORI_B2_128(RTYPE, in0, in1) \ { \ in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \ in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \ } #define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__) #define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__) #define XORI_B3_128(RTYPE, in0, in1, in2) \ { \ XORI_B2_128(RTYPE, in0, in1); \ in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128); \ } #define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__) #define XORI_B4_128(RTYPE, in0, in1, in2, in3) \ { \ XORI_B2_128(RTYPE, in0, in1); \ XORI_B2_128(RTYPE, in2, in3); \ } #define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__) #define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__) #define XORI_B5_128(RTYPE, in0, in1, in2, in3, in4) \ { \ XORI_B3_128(RTYPE, in0, in1, in2); \ XORI_B2_128(RTYPE, in3, in4); \ } #define XORI_B5_128_SB(...) XORI_B5_128(v16i8, __VA_ARGS__) #define XORI_B8_128(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7) \ { \ XORI_B4_128(RTYPE, in0, in1, in2, in3); \ XORI_B4_128(RTYPE, in4, in5, in6, in7); \ } #define XORI_B8_128_SB(...) XORI_B8_128(v16i8, __VA_ARGS__) /* Description : Shift left all elements of vector (generic for all data types) Arguments : Inputs - in0, in1, in2, in3, shift Outputs - in place operation Return Type - as per input vector RTYPE Details : Each element of vector 'in0' is left shifted by 'shift' and the result is written in-place. */ #define SLLI_4V(in0, in1, in2, in3, shift) \ { \ in0 = in0 << shift; \ in1 = in1 << shift; \ in2 = in2 << shift; \ in3 = in3 << shift; \ } /* Description : Arithmetic shift right all elements of vector (generic for all data types) Arguments : Inputs - in0, in1, in2, in3, shift Outputs - in place operation Return Type - as per input vector RTYPE Details : Each element of vector 'in0' is right shifted by 'shift' and the result is written in-place. 'shift' is a GP variable. */ #define SRA_4V(in0, in1, in2, in3, shift) \ { \ in0 = in0 >> shift; \ in1 = in1 >> shift; \ in2 = in2 >> shift; \ in3 = in3 >> shift; \ } /* Description : Shift right arithmetic rounded words Arguments : Inputs - in0, in1, shift Outputs - in place operation Return Type - as per RTYPE Details : Each element of vector 'in0' is shifted right arithmetically by the number of bits in the corresponding element in the vector 'shift'. The last discarded bit is added to shifted value for rounding and the result is written in-place. 'shift' is a vector. */ #define SRAR_W2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift); \ in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift); \ } #define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRAR_W2(RTYPE, in0, in1, shift); \ SRAR_W2(RTYPE, in2, in3, shift); \ } #define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__) /* Description : Shift right arithmetic rounded (immediate) Arguments : Inputs - in0, in1, shift Outputs - in place operation Return Type - as per RTYPE Details : Each element of vector 'in0' is shifted right arithmetically by the value in 'shift'. The last discarded bit is added to the shifted value for rounding and the result is written in-place. 'shift' is an immediate value. */ #define SRARI_H2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift); \ in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift); \ } #define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__) #define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__) #define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRARI_H2(RTYPE, in0, in1, shift); \ SRARI_H2(RTYPE, in2, in3, shift); \ } #define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__) #define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__) #define SRARI_W2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \ in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \ } #define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRARI_W2(RTYPE, in0, in1, shift); \ SRARI_W2(RTYPE, in2, in3, shift); \ } #define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__) /* Description : Multiplication of pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element from 'in0' is multiplied with elements from 'in1' and the result is written to 'out0' */ #define MUL2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 * in1; \ out1 = in2 * in3; \ } #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ MUL2(in0, in1, in2, in3, out0, out1); \ MUL2(in4, in5, in6, in7, out2, out3); \ } /* Description : Addition of 2 pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element in 'in0' is added to 'in1' and result is written to 'out0'. */ #define ADD2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 + in1; \ out1 = in2 + in3; \ } #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ ADD2(in0, in1, in2, in3, out0, out1); \ ADD2(in4, in5, in6, in7, out2, out3); \ } /* Description : Subtraction of 2 pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element in 'in1' is subtracted from 'in0' and result is written to 'out0'. */ #define SUB2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 - in1; \ out1 = in2 - in3; \ } #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ out0 = in0 - in1; \ out1 = in2 - in3; \ out2 = in4 - in5; \ out3 = in6 - in7; \ } /* Description : Sign extend halfword elements from right half of the vector Arguments : Input - in (halfword vector) Output - out (sign extended word vector) Return Type - signed word Details : Sign bit of halfword elements from input vector 'in' is extracted and interleaved with same vector 'in0' to generate 4 word elements keeping sign intact */ #define UNPCK_R_SH_SW(in, out) \ { \ v8i16 sign_m; \ \ sign_m = __msa_clti_s_h((v8i16)in, 0); \ out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \ } /* Description : Zero extend unsigned byte elements to halfword elements Arguments : Input - in (unsigned byte vector) Outputs - out0, out1 (unsigned halfword vectors) Return Type - signed halfword Details : Zero extended right half of vector is returned in 'out0' Zero extended left half of vector is returned in 'out1' */ #define UNPCK_UB_SH(in, out0, out1) \ { \ v16i8 zero_m = { 0 }; \ \ ILVRL_B2_SH(zero_m, in, out0, out1); \ } /* Description : Sign extend halfword elements from input vector and return the result in pair of vectors Arguments : Input - in (halfword vector) Outputs - out0, out1 (sign extended word vectors) Return Type - signed word Details : Sign bit of halfword elements from input vector 'in' is extracted and interleaved right with same vector 'in0' to generate 4 signed word elements in 'out0' Then interleaved left with same vector 'in0' to generate 4 signed word elements in 'out1' */ #define UNPCK_SH_SW(in, out0, out1) \ { \ v8i16 tmp_m; \ \ tmp_m = __msa_clti_s_h((v8i16)in, 0); \ ILVRL_H2_SW(tmp_m, in, out0, out1); \ } /* Description : Butterfly of 4 input vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Details : Butterfly operation */ #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = in0 + in3; \ out1 = in1 + in2; \ \ out2 = in1 - in2; \ out3 = in0 - in3; \ } /* Description : Transpose input 8x8 byte block Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - as per RTYPE */ #define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \ out1, out2, out3, out4, out5, out6, out7) \ { \ v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ \ ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, tmp0_m, tmp1_m, tmp2_m, \ tmp3_m); \ ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m); \ ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m); \ ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2); \ ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6); \ SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8); \ SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8); \ } #define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__) /* Description : Transpose 16x4 block into 4x16 with byte elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15 Outputs - out0, out1, out2, out3 Return Type - unsigned byte */ #define TRANSPOSE16x4_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \ in10, in11, in12, in13, in14, in15, out0, out1, \ out2, out3) \ { \ v2i64 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ \ ILVEV_W2_SD(in0, in4, in8, in12, tmp0_m, tmp1_m); \ out1 = (v16u8)__msa_ilvev_d(tmp1_m, tmp0_m); \ \ ILVEV_W2_SD(in1, in5, in9, in13, tmp0_m, tmp1_m); \ out3 = (v16u8)__msa_ilvev_d(tmp1_m, tmp0_m); \ \ ILVEV_W2_SD(in2, in6, in10, in14, tmp0_m, tmp1_m); \ \ tmp2_m = __msa_ilvev_d(tmp1_m, tmp0_m); \ ILVEV_W2_SD(in3, in7, in11, in15, tmp0_m, tmp1_m); \ \ tmp3_m = __msa_ilvev_d(tmp1_m, tmp0_m); \ ILVEV_B2_SD(out1, out3, tmp2_m, tmp3_m, tmp0_m, tmp1_m); \ out0 = (v16u8)__msa_ilvev_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ out2 = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ \ tmp0_m = (v2i64)__msa_ilvod_b((v16i8)out3, (v16i8)out1); \ tmp1_m = (v2i64)__msa_ilvod_b((v16i8)tmp3_m, (v16i8)tmp2_m); \ out1 = (v16u8)__msa_ilvev_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ out3 = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ } /* Description : Transpose 16x8 block into 8x16 with byte elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - unsigned byte */ #define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \ in10, in11, in12, in13, in14, in15, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ \ ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \ ILVEV_D2_UB(in2, in10, in3, in11, out5, out4); \ ILVEV_D2_UB(in4, in12, in5, in13, out3, out2); \ ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \ \ tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7); \ tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7); \ tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5); \ tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5); \ out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3); \ tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3); \ out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1); \ tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1); \ \ ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m); \ out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5); \ out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m); \ out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \ out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ } /* Description : Transpose 4x4 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Return Type - signed halfword */ #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 s0_m, s1_m; \ \ ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \ ILVRL_W2_SH(s1_m, s0_m, out0, out2); \ out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2); \ } /* Description : Transpose 8x4 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - signed halfword */ #define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ \ ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m); \ ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m); \ ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \ ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \ } /* Description : Transpose 4x4 block with word elements in vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Return Type - signed word */ #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v4i32 s0_m, s1_m, s2_m, s3_m; \ \ ILVRL_W2_SW(in1, in0, s0_m, s1_m); \ ILVRL_W2_SW(in3, in2, s2_m, s3_m); \ \ out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \ out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \ out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \ out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \ } /* Description : Dot product and addition of 3 signed halfword input vectors Arguments : Inputs - in0, in1, in2, coeff0, coeff1, coeff2 Output - out0_m Return Type - signed halfword Details : Dot product of 'in0' with 'coeff0' Dot product of 'in1' with 'coeff1' Dot product of 'in2' with 'coeff2' Addition of all the 3 vector results out0_m = (in0 * coeff0) + (in1 * coeff1) + (in2 * coeff2) */ #define DPADD_SH3_SH(in0, in1, in2, coeff0, coeff1, coeff2) \ ({ \ v8i16 tmp1_m; \ v8i16 out0_m; \ \ out0_m = __msa_dotp_s_h((v16i8)in0, (v16i8)coeff0); \ out0_m = __msa_dpadd_s_h(out0_m, (v16i8)in1, (v16i8)coeff1); \ tmp1_m = __msa_dotp_s_h((v16i8)in2, (v16i8)coeff2); \ out0_m = __msa_adds_s_h(out0_m, tmp1_m); \ \ out0_m; \ }) /* Description : Pack even elements of input vectors & xor with 128 Arguments : Inputs - in0, in1 Output - out_m Return Type - unsigned byte Details : Signed byte even elements from 'in0' and 'in1' are packed together in one vector and the resulting vector is xor'ed with 128 to shift the range from signed to unsigned byte */ #define PCKEV_XORI128_UB(in0, in1) \ ({ \ v16u8 out_m; \ out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \ out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \ out_m; \ }) /* Description : Pack even byte elements and store byte vector in destination memory Arguments : Inputs - in0, in1, pdst */ #define PCKEV_ST_SB(in0, in1, pdst) \ { \ v16i8 tmp_m; \ tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \ ST_SB(tmp_m, (pdst)); \ } /* Description : Horizontal 2 tap filter kernel code Arguments : Inputs - in0, in1, mask, coeff, shift */ #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \ ({ \ v16i8 tmp0_m; \ v8u16 tmp1_m; \ \ tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \ tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \ tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \ \ tmp1_m; \ }) #endif // VPX_VP8_COMMON_MIPS_MSA_VP8_MACROS_MSA_H_ libvpx-1.8.2/vp8/common/modecont.c000066400000000000000000000012561357355204000170170ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "entropy.h" const int vp8_mode_contexts[6][4] = { { /* 0 */ 7, 1, 1, 143 }, { /* 1 */ 14, 18, 14, 107 }, { /* 2 */ 135, 64, 57, 68 }, { /* 3 */ 60, 56, 128, 65 }, { /* 4 */ 159, 134, 128, 34 }, { /* 5 */ 234, 188, 128, 28 }, }; libvpx-1.8.2/vp8/common/modecont.h000066400000000000000000000012101357355204000170120ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_MODECONT_H_ #define VPX_VP8_COMMON_MODECONT_H_ #ifdef __cplusplus extern "C" { #endif extern const int vp8_mode_contexts[6][4]; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_MODECONT_H_ libvpx-1.8.2/vp8/common/mv.h000066400000000000000000000014161357355204000156340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_MV_H_ #define VPX_VP8_COMMON_MV_H_ #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef struct { short row; short col; } MV; typedef union int_mv { uint32_t as_int; MV as_mv; } int_mv; /* facilitates faster equality tests and copies */ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_MV_H_ libvpx-1.8.2/vp8/common/onyx.h000066400000000000000000000211211357355204000162020ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ONYX_H_ #define VPX_VP8_COMMON_ONYX_H_ #ifdef __cplusplus extern "C" { #endif #include "vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" #include "vpx/vpx_encoder.h" #include "vpx_scale/yv12config.h" #include "ppflags.h" struct VP8_COMP; /* Create/destroy static data structures. */ typedef enum { NORMAL = 0, FOURFIVE = 1, THREEFIVE = 2, ONETWO = 3 } VPX_SCALING; typedef enum { USAGE_LOCAL_FILE_PLAYBACK = 0x0, USAGE_STREAM_FROM_SERVER = 0x1, USAGE_CONSTRAINED_QUALITY = 0x2, USAGE_CONSTANT_QUALITY = 0x3 } END_USAGE; typedef enum { MODE_REALTIME = 0x0, MODE_GOODQUALITY = 0x1, MODE_BESTQUALITY = 0x2, MODE_FIRSTPASS = 0x3, MODE_SECONDPASS = 0x4, MODE_SECONDPASS_BEST = 0x5 } MODE; typedef enum { FRAMEFLAGS_KEY = 1, FRAMEFLAGS_GOLDEN = 2, FRAMEFLAGS_ALTREF = 4 } FRAMETYPE_FLAGS; #include static INLINE void Scale2Ratio(int mode, int *hr, int *hs) { switch (mode) { case NORMAL: *hr = 1; *hs = 1; break; case FOURFIVE: *hr = 4; *hs = 5; break; case THREEFIVE: *hr = 3; *hs = 5; break; case ONETWO: *hr = 1; *hs = 2; break; default: *hr = 1; *hs = 1; assert(0); break; } } typedef struct { /* 4 versions of bitstream defined: * 0 best quality/slowest decode, 3 lowest quality/fastest decode */ int Version; int Width; int Height; struct vpx_rational timebase; unsigned int target_bandwidth; /* kilobits per second */ /* Parameter used for applying denoiser. * For temporal denoiser: noise_sensitivity = 0 means off, * noise_sensitivity = 1 means temporal denoiser on for Y channel only, * noise_sensitivity = 2 means temporal denoiser on for all channels. * noise_sensitivity = 3 means aggressive denoising mode. * noise_sensitivity >= 4 means adaptive denoising mode. * Temporal denoiser is enabled via the configuration option: * CONFIG_TEMPORAL_DENOISING. * For spatial denoiser: noise_sensitivity controls the amount of * pre-processing blur: noise_sensitivity = 0 means off. * Spatial denoiser invoked under !CONFIG_TEMPORAL_DENOISING. */ int noise_sensitivity; /* parameter used for sharpening output: recommendation 0: */ int Sharpness; int cpu_used; unsigned int rc_max_intra_bitrate_pct; /* percent of rate boost for golden frame in CBR mode. */ unsigned int gf_cbr_boost_pct; unsigned int screen_content_mode; /* mode -> *(0)=Realtime/Live Encoding. This mode is optimized for realtim * encoding (for example, capturing a television signal or feed * from a live camera). ( speed setting controls how fast ) *(1)=Good Quality Fast Encoding. The encoder balances quality with * the amount of time it takes to encode the output. ( speed * setting controls how fast ) *(2)=One Pass - Best Quality. The encoder places priority on the * quality of the output over encoding speed. The output is * compressed at the highest possible quality. This option takes * the longest amount of time to encode. ( speed setting ignored * ) *(3)=Two Pass - First Pass. The encoder generates a file of * statistics for use in the second encoding pass. ( speed * setting controls how fast ) *(4)=Two Pass - Second Pass. The encoder uses the statistics that * were generated in the first encoding pass to create the * compressed output. ( speed setting controls how fast ) *(5)=Two Pass - Second Pass Best. The encoder uses the statistics * that were generated in the first encoding pass to create the * compressed output using the highest possible quality, and * taking a longer amount of time to encode.. ( speed setting * ignored ) */ int Mode; /* Key Framing Operations */ int auto_key; /* automatically detect cut scenes */ int key_freq; /* maximum distance to key frame. */ /* lagged compression (if allow_lag == 0 lag_in_frames is ignored) */ int allow_lag; int lag_in_frames; /* how many frames lag before we start encoding */ /* * DATARATE CONTROL OPTIONS */ int end_usage; /* vbr or cbr */ /* buffer targeting aggressiveness */ int under_shoot_pct; int over_shoot_pct; /* buffering parameters */ int64_t starting_buffer_level; int64_t optimal_buffer_level; int64_t maximum_buffer_size; int64_t starting_buffer_level_in_ms; int64_t optimal_buffer_level_in_ms; int64_t maximum_buffer_size_in_ms; /* controlling quality */ int fixed_q; int worst_allowed_q; int best_allowed_q; int cq_level; /* allow internal resizing */ int allow_spatial_resampling; int resample_down_water_mark; int resample_up_water_mark; /* allow internal frame rate alterations */ int allow_df; int drop_frames_water_mark; /* two pass datarate control */ int two_pass_vbrbias; int two_pass_vbrmin_section; int two_pass_vbrmax_section; /* * END DATARATE CONTROL OPTIONS */ /* these parameters aren't to be used in final build don't use!!! */ int play_alternate; int alt_freq; int alt_q; int key_q; int gold_q; int multi_threaded; /* how many threads to run the encoder on */ int token_partitions; /* how many token partitions to create */ /* early breakout threshold: for video conf recommend 800 */ int encode_breakout; /* Bitfield defining the error resiliency features to enable. * Can provide decodable frames after losses in previous * frames and decodable partitions after losses in the same frame. */ unsigned int error_resilient_mode; int arnr_max_frames; int arnr_strength; int arnr_type; vpx_fixed_buf_t two_pass_stats_in; struct vpx_codec_pkt_list *output_pkt_list; vp8e_tuning tuning; /* Temporal scaling parameters */ unsigned int number_of_layers; unsigned int target_bitrate[VPX_TS_MAX_PERIODICITY]; unsigned int rate_decimator[VPX_TS_MAX_PERIODICITY]; unsigned int periodicity; unsigned int layer_id[VPX_TS_MAX_PERIODICITY]; #if CONFIG_MULTI_RES_ENCODING /* Number of total resolutions encoded */ unsigned int mr_total_resolutions; /* Current encoder ID */ unsigned int mr_encoder_id; /* Down-sampling factor */ vpx_rational_t mr_down_sampling_factor; /* Memory location to store low-resolution encoder's mode info */ void *mr_low_res_mode_info; #endif } VP8_CONFIG; void vp8_initialize(); struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf); void vp8_remove_compressor(struct VP8_COMP **comp); void vp8_init_config(struct VP8_COMP *onyx, VP8_CONFIG *oxcf); void vp8_change_config(struct VP8_COMP *cpi, VP8_CONFIG *oxcf); int vp8_receive_raw_frame(struct VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time); int vp8_get_compressed_data(struct VP8_COMP *cpi, unsigned int *frame_flags, size_t *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush); int vp8_get_preview_raw_frame(struct VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags); int vp8_use_as_reference(struct VP8_COMP *cpi, int ref_frame_flags); int vp8_update_reference(struct VP8_COMP *cpi, int ref_frame_flags); int vp8_get_reference(struct VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8_set_reference(struct VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8_update_entropy(struct VP8_COMP *cpi, int update); int vp8_set_roimap(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]); int vp8_set_active_map(struct VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols); int vp8_set_internal_size(struct VP8_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); int vp8_get_quantizer(struct VP8_COMP *cpi); #ifdef __cplusplus } #endif #endif // VPX_VP8_COMMON_ONYX_H_ libvpx-1.8.2/vp8/common/onyxc_int.h000066400000000000000000000105711357355204000172260ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ONYXC_INT_H_ #define VPX_VP8_COMMON_ONYXC_INT_H_ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx/internal/vpx_codec_internal.h" #include "loopfilter.h" #include "entropymv.h" #include "entropy.h" #if CONFIG_POSTPROC #include "postproc.h" #endif /*#ifdef PACKET_TESTING*/ #include "header.h" /*#endif*/ #ifdef __cplusplus extern "C" { #endif #define MINQ 0 #define MAXQ 127 #define QINDEX_RANGE (MAXQ + 1) #define NUM_YV12_BUFFERS 4 #define MAX_PARTITIONS 9 typedef struct frame_contexts { vp8_prob bmode_prob[VP8_BINTRAMODES - 1]; vp8_prob ymode_prob[VP8_YMODES - 1]; /* interframe intra mode probs */ vp8_prob uv_mode_prob[VP8_UV_MODES - 1]; vp8_prob sub_mv_ref_prob[VP8_SUBMVREFS - 1]; vp8_prob coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [ENTROPY_NODES]; MV_CONTEXT mvc[2]; } FRAME_CONTEXT; typedef enum { ONE_PARTITION = 0, TWO_PARTITION = 1, FOUR_PARTITION = 2, EIGHT_PARTITION = 3 } TOKEN_PARTITION; typedef enum { RECON_CLAMP_REQUIRED = 0, RECON_CLAMP_NOTREQUIRED = 1 } CLAMP_TYPE; typedef struct VP8Common { struct vpx_internal_error_info error; DECLARE_ALIGNED(16, short, Y1dequant[QINDEX_RANGE][2]); DECLARE_ALIGNED(16, short, Y2dequant[QINDEX_RANGE][2]); DECLARE_ALIGNED(16, short, UVdequant[QINDEX_RANGE][2]); int Width; int Height; int horiz_scale; int vert_scale; CLAMP_TYPE clamp_type; YV12_BUFFER_CONFIG *frame_to_show; YV12_BUFFER_CONFIG yv12_fb[NUM_YV12_BUFFERS]; int fb_idx_ref_cnt[NUM_YV12_BUFFERS]; int new_fb_idx, lst_fb_idx, gld_fb_idx, alt_fb_idx; YV12_BUFFER_CONFIG temp_scale_frame; #if CONFIG_POSTPROC YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; int post_proc_buffer_int_used; unsigned char *pp_limits_buffer; /* post-processing filter coefficients */ #endif FRAME_TYPE last_frame_type; /* Save last frame's frame type for motion search. */ FRAME_TYPE frame_type; int show_frame; int frame_flags; int MBs; int mb_rows; int mb_cols; int mode_info_stride; /* profile settings */ int mb_no_coeff_skip; int no_lpf; int use_bilinear_mc_filter; int full_pixel; int base_qindex; int y1dc_delta_q; int y2dc_delta_q; int y2ac_delta_q; int uvdc_delta_q; int uvac_delta_q; /* We allocate a MODE_INFO struct for each macroblock, together with an extra row on top and column on the left to simplify prediction. */ MODE_INFO *mip; /* Base of allocated array */ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ #if CONFIG_ERROR_CONCEALMENT MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ #endif /* MODE_INFO for the last decoded frame to show */ MODE_INFO *show_frame_mi; LOOPFILTERTYPE filter_type; loop_filter_info_n lf_info; int filter_level; int last_sharpness_level; int sharpness_level; int refresh_last_frame; /* Two state 0 = NO, 1 = YES */ int refresh_golden_frame; /* Two state 0 = NO, 1 = YES */ int refresh_alt_ref_frame; /* Two state 0 = NO, 1 = YES */ int copy_buffer_to_gf; /* 0 none, 1 Last to GF, 2 ARF to GF */ int copy_buffer_to_arf; /* 0 none, 1 Last to ARF, 2 GF to ARF */ int refresh_entropy_probs; /* Two state 0 = NO, 1 = YES */ int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ /* Y,U,V,Y2 */ ENTROPY_CONTEXT_PLANES *above_context; /* row of context for each plane */ ENTROPY_CONTEXT_PLANES left_context; /* (up to) 4 contexts "" */ FRAME_CONTEXT lfc; /* last frame entropy */ FRAME_CONTEXT fc; /* this frame entropy */ unsigned int current_video_frame; int version; TOKEN_PARTITION multi_token_partition; #ifdef PACKET_TESTING VP8_HEADER oh; #endif #if CONFIG_MULTITHREAD int processor_core_count; #endif #if CONFIG_POSTPROC struct postproc_state postproc_state; #endif int cpu_caps; } VP8_COMMON; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_ONYXC_INT_H_ libvpx-1.8.2/vp8/common/onyxd.h000066400000000000000000000036101357355204000163510ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_ONYXD_H_ #define VPX_VP8_COMMON_ONYXD_H_ /* Create/destroy static data structures. */ #ifdef __cplusplus extern "C" { #endif #include "vpx_scale/yv12config.h" #include "ppflags.h" #include "vpx_ports/mem.h" #include "vpx/vpx_codec.h" #include "vpx/vp8.h" struct VP8D_COMP; struct VP8Common; typedef struct { int Width; int Height; int Version; int postprocess; int max_threads; int error_concealment; } VP8D_CONFIG; typedef enum { VP8D_OK = 0 } VP8D_SETTING; void vp8dx_initialize(void); void vp8dx_set_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst, int x); int vp8dx_get_setting(struct VP8D_COMP *comp, VP8D_SETTING oxst); int vp8dx_receive_compressed_data(struct VP8D_COMP *pbi, int64_t time_stamp); int vp8dx_get_raw_frame(struct VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags); int vp8dx_references_buffer(struct VP8Common *oci, int ref_frame); vpx_codec_err_t vp8dx_get_reference(struct VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); vpx_codec_err_t vp8dx_set_reference(struct VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp8dx_get_quantizer(const struct VP8D_COMP *pbi); #ifdef __cplusplus } #endif #endif // VPX_VP8_COMMON_ONYXD_H_ libvpx-1.8.2/vp8/common/postproc.c000066400000000000000000000230601357355204000170550ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vpx_dsp_rtcd.h" #include "vp8_rtcd.h" #include "vpx_dsp/postproc.h" #include "vpx_ports/system_state.h" #include "vpx_scale_rtcd.h" #include "vpx_scale/yv12config.h" #include "postproc.h" #include "common.h" #include "vpx_scale/vpx_scale.h" #include "systemdependent.h" #include #include #include #include /* clang-format off */ #define RGB_TO_YUV(t) \ (unsigned char)((0.257 * (float)(t >> 16)) + \ (0.504 * (float)(t >> 8 & 0xff)) + \ (0.098 * (float)(t & 0xff)) + 16), \ (unsigned char)(-(0.148 * (float)(t >> 16)) - \ (0.291 * (float)(t >> 8 & 0xff)) + \ (0.439 * (float)(t & 0xff)) + 128), \ (unsigned char)((0.439 * (float)(t >> 16)) - \ (0.368 * (float)(t >> 8 & 0xff)) - \ (0.071 * (float)(t & 0xff)) + 128) /* clang-format on */ extern void vp8_blit_text(const char *msg, unsigned char *address, const int pitch); extern void vp8_blit_line(int x0, int x1, int y0, int y1, unsigned char *image, const int pitch); /*********************************************************************************************************** */ #if CONFIG_POSTPROC static int q2mbl(int x) { if (x < 20) x = 20; x = 50 + (x - 50) * 10 / 8; return x * x / 3; } static void vp8_de_mblock(YV12_BUFFER_CONFIG *post, int q) { vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); } void vp8_deblock(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q) { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); const MODE_INFO *mode_info_context = cm->mi; int mbr, mbc; /* The pixel thresholds are adjusted according to if or not the macroblock * is a skipped block. */ unsigned char *ylimits = cm->pp_limits_buffer; unsigned char *uvlimits = cm->pp_limits_buffer + 16 * cm->mb_cols; if (ppl > 0) { for (mbr = 0; mbr < cm->mb_rows; ++mbr) { unsigned char *ylptr = ylimits; unsigned char *uvlptr = uvlimits; for (mbc = 0; mbc < cm->mb_cols; ++mbc) { unsigned char mb_ppl; if (mode_info_context->mbmi.mb_skip_coeff) { mb_ppl = (unsigned char)ppl >> 1; } else { mb_ppl = (unsigned char)ppl; } memset(ylptr, mb_ppl, 16); memset(uvlptr, mb_ppl, 8); ylptr += 16; uvlptr += 8; mode_info_context++; } mode_info_context++; vpx_post_proc_down_and_across_mb_row( source->y_buffer + 16 * mbr * source->y_stride, post->y_buffer + 16 * mbr * post->y_stride, source->y_stride, post->y_stride, source->y_width, ylimits, 16); vpx_post_proc_down_and_across_mb_row( source->u_buffer + 8 * mbr * source->uv_stride, post->u_buffer + 8 * mbr * post->uv_stride, source->uv_stride, post->uv_stride, source->uv_width, uvlimits, 8); vpx_post_proc_down_and_across_mb_row( source->v_buffer + 8 * mbr * source->uv_stride, post->v_buffer + 8 * mbr * post->uv_stride, source->uv_stride, post->uv_stride, source->uv_width, uvlimits, 8); } } else { vp8_yv12_copy_frame(source, post); } } void vp8_de_noise(VP8_COMMON *cm, YV12_BUFFER_CONFIG *source, int q, int uvfilter) { int mbr; double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); int mb_rows = cm->mb_rows; int mb_cols = cm->mb_cols; unsigned char *limits = cm->pp_limits_buffer; memset(limits, (unsigned char)ppl, 16 * mb_cols); /* TODO: The original code don't filter the 2 outer rows and columns. */ for (mbr = 0; mbr < mb_rows; ++mbr) { vpx_post_proc_down_and_across_mb_row( source->y_buffer + 16 * mbr * source->y_stride, source->y_buffer + 16 * mbr * source->y_stride, source->y_stride, source->y_stride, source->y_width, limits, 16); if (uvfilter == 1) { vpx_post_proc_down_and_across_mb_row( source->u_buffer + 8 * mbr * source->uv_stride, source->u_buffer + 8 * mbr * source->uv_stride, source->uv_stride, source->uv_stride, source->uv_width, limits, 8); vpx_post_proc_down_and_across_mb_row( source->v_buffer + 8 * mbr * source->uv_stride, source->v_buffer + 8 * mbr * source->uv_stride, source->uv_stride, source->uv_stride, source->uv_width, limits, 8); } } } #endif // CONFIG_POSTPROC #if CONFIG_POSTPROC int vp8_post_proc_frame(VP8_COMMON *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags) { int q = oci->filter_level * 10 / 6; int flags = ppflags->post_proc_flag; int deblock_level = ppflags->deblocking_level; int noise_level = ppflags->noise_level; if (!oci->frame_to_show) return -1; if (q > 63) q = 63; if (!flags) { *dest = *oci->frame_to_show; /* handle problem with extending borders */ dest->y_width = oci->Width; dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; oci->postproc_state.last_base_qindex = oci->base_qindex; oci->postproc_state.last_frame_valid = 1; return 0; } if (flags & VP8D_ADDNOISE) { if (!oci->postproc_state.generated_noise) { oci->postproc_state.generated_noise = vpx_calloc( oci->Width + 256, sizeof(*oci->postproc_state.generated_noise)); if (!oci->postproc_state.generated_noise) return 1; } } /* Allocate post_proc_buffer_int if needed */ if ((flags & VP8D_MFQE) && !oci->post_proc_buffer_int_used) { if ((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) { int width = (oci->Width + 15) & ~15; int height = (oci->Height + 15) & ~15; if (vp8_yv12_alloc_frame_buffer(&oci->post_proc_buffer_int, width, height, VP8BORDERINPIXELS)) { vpx_internal_error(&oci->error, VPX_CODEC_MEM_ERROR, "Failed to allocate MFQE framebuffer"); } oci->post_proc_buffer_int_used = 1; /* insure that postproc is set to all 0's so that post proc * doesn't pull random data in from edge */ memset((&oci->post_proc_buffer_int)->buffer_alloc, 128, (&oci->post_proc_buffer)->frame_size); } } vpx_clear_system_state(); if ((flags & VP8D_MFQE) && oci->postproc_state.last_frame_valid && oci->current_video_frame > 10 && oci->postproc_state.last_base_qindex < 60 && oci->base_qindex - oci->postproc_state.last_base_qindex >= 20) { vp8_multiframe_quality_enhance(oci); if (((flags & VP8D_DEBLOCK) || (flags & VP8D_DEMACROBLOCK)) && oci->post_proc_buffer_int_used) { vp8_yv12_copy_frame(&oci->post_proc_buffer, &oci->post_proc_buffer_int); if (flags & VP8D_DEMACROBLOCK) { vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q + (deblock_level - 5) * 10); vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); } else if (flags & VP8D_DEBLOCK) { vp8_deblock(oci, &oci->post_proc_buffer_int, &oci->post_proc_buffer, q); } } /* Move partially towards the base q of the previous frame */ oci->postproc_state.last_base_qindex = (3 * oci->postproc_state.last_base_qindex + oci->base_qindex) >> 2; } else if (flags & VP8D_DEMACROBLOCK) { vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, q + (deblock_level - 5) * 10); vp8_de_mblock(&oci->post_proc_buffer, q + (deblock_level - 5) * 10); oci->postproc_state.last_base_qindex = oci->base_qindex; } else if (flags & VP8D_DEBLOCK) { vp8_deblock(oci, oci->frame_to_show, &oci->post_proc_buffer, q); oci->postproc_state.last_base_qindex = oci->base_qindex; } else { vp8_yv12_copy_frame(oci->frame_to_show, &oci->post_proc_buffer); oci->postproc_state.last_base_qindex = oci->base_qindex; } oci->postproc_state.last_frame_valid = 1; if (flags & VP8D_ADDNOISE) { if (oci->postproc_state.last_q != q || oci->postproc_state.last_noise != noise_level) { double sigma; struct postproc_state *ppstate = &oci->postproc_state; vpx_clear_system_state(); sigma = noise_level + .5 + .6 * q / 63.0; ppstate->clamp = vpx_setup_noise(sigma, ppstate->generated_noise, oci->Width + 256); ppstate->last_q = q; ppstate->last_noise = noise_level; } vpx_plane_add_noise( oci->post_proc_buffer.y_buffer, oci->postproc_state.generated_noise, oci->postproc_state.clamp, oci->postproc_state.clamp, oci->post_proc_buffer.y_width, oci->post_proc_buffer.y_height, oci->post_proc_buffer.y_stride); } *dest = oci->post_proc_buffer; /* handle problem with extending borders */ dest->y_width = oci->Width; dest->y_height = oci->Height; dest->uv_height = dest->y_height / 2; return 0; } #endif libvpx-1.8.2/vp8/common/postproc.h000066400000000000000000000023531357355204000170640ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_POSTPROC_H_ #define VPX_VP8_COMMON_POSTPROC_H_ #include "vpx_ports/mem.h" struct postproc_state { int last_q; int last_noise; int last_base_qindex; int last_frame_valid; int clamp; int8_t *generated_noise; }; #include "onyxc_int.h" #include "ppflags.h" #ifdef __cplusplus extern "C" { #endif int vp8_post_proc_frame(struct VP8Common *oci, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *ppflags); void vp8_de_noise(struct VP8Common *cm, YV12_BUFFER_CONFIG *source, int q, int uvfilter); void vp8_deblock(struct VP8Common *cm, YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q); #define MFQE_PRECISION 4 void vp8_multiframe_quality_enhance(struct VP8Common *cm); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_POSTPROC_H_ libvpx-1.8.2/vp8/common/ppflags.h000066400000000000000000000016641357355204000166530ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_PPFLAGS_H_ #define VPX_VP8_COMMON_PPFLAGS_H_ #ifdef __cplusplus extern "C" { #endif enum { VP8D_NOFILTERING = 0, VP8D_DEBLOCK = 1 << 0, VP8D_DEMACROBLOCK = 1 << 1, VP8D_ADDNOISE = 1 << 2, VP8D_MFQE = 1 << 3 }; typedef struct { int post_proc_flag; int deblocking_level; int noise_level; int display_ref_frame_flag; int display_mb_modes_flag; int display_b_modes_flag; int display_mv_flag; } vp8_ppflags_t; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_PPFLAGS_H_ libvpx-1.8.2/vp8/common/quant_common.c000066400000000000000000000065231357355204000177110ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "quant_common.h" static const int dc_qlookup[QINDEX_RANGE] = { 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, }; static const int ac_qlookup[QINDEX_RANGE] = { 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, }; int vp8_dc_quant(int QIndex, int Delta) { int retval; QIndex = QIndex + Delta; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } retval = dc_qlookup[QIndex]; return retval; } int vp8_dc2quant(int QIndex, int Delta) { int retval; QIndex = QIndex + Delta; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } retval = dc_qlookup[QIndex] * 2; return retval; } int vp8_dc_uv_quant(int QIndex, int Delta) { int retval; QIndex = QIndex + Delta; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } retval = dc_qlookup[QIndex]; if (retval > 132) retval = 132; return retval; } int vp8_ac_yquant(int QIndex) { int retval; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } retval = ac_qlookup[QIndex]; return retval; } int vp8_ac2quant(int QIndex, int Delta) { int retval; QIndex = QIndex + Delta; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } /* For all x in [0..284], x*155/100 is bitwise equal to (x*101581) >> 16. * The smallest precision for that is '(x*6349) >> 12' but 16 is a good * word size. */ retval = (ac_qlookup[QIndex] * 101581) >> 16; if (retval < 8) retval = 8; return retval; } int vp8_ac_uv_quant(int QIndex, int Delta) { int retval; QIndex = QIndex + Delta; if (QIndex > 127) { QIndex = 127; } else if (QIndex < 0) { QIndex = 0; } retval = ac_qlookup[QIndex]; return retval; } libvpx-1.8.2/vp8/common/quant_common.h000066400000000000000000000017061357355204000177140ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_QUANT_COMMON_H_ #define VPX_VP8_COMMON_QUANT_COMMON_H_ #include "string.h" #include "blockd.h" #include "onyxc_int.h" #ifdef __cplusplus extern "C" { #endif extern int vp8_ac_yquant(int QIndex); extern int vp8_dc_quant(int QIndex, int Delta); extern int vp8_dc2quant(int QIndex, int Delta); extern int vp8_ac2quant(int QIndex, int Delta); extern int vp8_dc_uv_quant(int QIndex, int Delta); extern int vp8_ac_uv_quant(int QIndex, int Delta); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_QUANT_COMMON_H_ libvpx-1.8.2/vp8/common/reconinter.c000066400000000000000000000421011357355204000173510ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx/vpx_integer.h" #include "blockd.h" #include "reconinter.h" #if CONFIG_RUNTIME_CPU_DETECT #include "onyxc_int.h" #endif void vp8_copy_mem16x16_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { int r; for (r = 0; r < 16; ++r) { memcpy(dst, src, 16); src += src_stride; dst += dst_stride; } } void vp8_copy_mem8x8_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { int r; for (r = 0; r < 8; ++r) { memcpy(dst, src, 8); src += src_stride; dst += dst_stride; } } void vp8_copy_mem8x4_c(unsigned char *src, int src_stride, unsigned char *dst, int dst_stride) { int r; for (r = 0; r < 4; ++r) { memcpy(dst, src, 8); src += src_stride; dst += dst_stride; } } void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) { int r; unsigned char *pred_ptr = d->predictor; unsigned char *ptr; ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, pred_ptr, pitch); } else { for (r = 0; r < 4; ++r) { pred_ptr[0] = ptr[0]; pred_ptr[1] = ptr[1]; pred_ptr[2] = ptr[2]; pred_ptr[3] = ptr[3]; pred_ptr += pitch; ptr += pre_stride; } } } static void build_inter_predictors4b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) { unsigned char *ptr; ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { x->subpixel_predict8x8(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); } else { vp8_copy_mem8x8(ptr, pre_stride, dst, dst_stride); } } static void build_inter_predictors2b(MACROBLOCKD *x, BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride) { unsigned char *ptr; ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { x->subpixel_predict8x4(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); } else { vp8_copy_mem8x4(ptr, pre_stride, dst, dst_stride); } } static void build_inter_predictors_b(BLOCKD *d, unsigned char *dst, int dst_stride, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf) { int r; unsigned char *ptr; ptr = base_pre + d->offset + (d->bmi.mv.as_mv.row >> 3) * pre_stride + (d->bmi.mv.as_mv.col >> 3); if (d->bmi.mv.as_mv.row & 7 || d->bmi.mv.as_mv.col & 7) { sppf(ptr, pre_stride, d->bmi.mv.as_mv.col & 7, d->bmi.mv.as_mv.row & 7, dst, dst_stride); } else { for (r = 0; r < 4; ++r) { dst[0] = ptr[0]; dst[1] = ptr[1]; dst[2] = ptr[2]; dst[3] = ptr[3]; dst += dst_stride; ptr += pre_stride; } } } /*encoder only*/ void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = &x->predictor[256]; unsigned char *vpred_ptr = &x->predictor[320]; int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int offset; int pre_stride = x->pre.uv_stride; /* calc uv motion vectors */ mv_row += 1 | (mv_row >> (sizeof(int) * CHAR_BIT - 1)); mv_col += 1 | (mv_col >> (sizeof(int) * CHAR_BIT - 1)); mv_row /= 2; mv_col /= 2; mv_row &= x->fullpixel_mask; mv_col &= x->fullpixel_mask; offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); uptr = x->pre.u_buffer + offset; vptr = x->pre.v_buffer + offset; if ((mv_row | mv_col) & 7) { x->subpixel_predict8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, 8); x->subpixel_predict8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, 8); } else { vp8_copy_mem8x8(uptr, pre_stride, upred_ptr, 8); vp8_copy_mem8x8(vptr, pre_stride, vpred_ptr, 8); } } /*encoder only*/ void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x) { int i, j; int pre_stride = x->pre.uv_stride; unsigned char *base_pre; /* build uv mvs */ for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { int yoffset = i * 8 + j * 2; int uoffset = 16 + i * 2 + j; int voffset = 20 + i * 2 + j; int temp; temp = x->block[yoffset].bmi.mv.as_mv.row + x->block[yoffset + 1].bmi.mv.as_mv.row + x->block[yoffset + 4].bmi.mv.as_mv.row + x->block[yoffset + 5].bmi.mv.as_mv.row; temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; temp = x->block[yoffset].bmi.mv.as_mv.col + x->block[yoffset + 1].bmi.mv.as_mv.col + x->block[yoffset + 4].bmi.mv.as_mv.col + x->block[yoffset + 5].bmi.mv.as_mv.col; temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; } } base_pre = x->pre.u_buffer; for (i = 16; i < 20; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i + 1]; if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); } else { vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); } } base_pre = x->pre.v_buffer; for (i = 20; i < 24; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i + 1]; if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { build_inter_predictors2b(x, d0, d0->predictor, 8, base_pre, pre_stride); } else { vp8_build_inter_predictors_b(d0, 8, base_pre, pre_stride, x->subpixel_predict); vp8_build_inter_predictors_b(d1, 8, base_pre, pre_stride, x->subpixel_predict); } } } /*encoder only*/ void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y, int dst_ystride) { unsigned char *ptr_base; unsigned char *ptr; int mv_row = x->mode_info_context->mbmi.mv.as_mv.row; int mv_col = x->mode_info_context->mbmi.mv.as_mv.col; int pre_stride = x->pre.y_stride; ptr_base = x->pre.y_buffer; ptr = ptr_base + (mv_row >> 3) * pre_stride + (mv_col >> 3); if ((mv_row | mv_col) & 7) { x->subpixel_predict16x16(ptr, pre_stride, mv_col & 7, mv_row & 7, dst_y, dst_ystride); } else { vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); } } static void clamp_mv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { /* If the MV points so far into the UMV border that no visible pixels * are used for reconstruction, the subpel part of the MV can be * discarded and the MV limited to 16 pixels with equivalent results. * * This limit kicks in at 19 pixels for the top and left edges, for * the 16 pixels plus 3 taps right of the central pixel when subpel * filtering. The bottom and right edges use 16 pixels plus 2 pixels * left of the central pixel when filtering. */ if (mv->col < (xd->mb_to_left_edge - (19 << 3))) { mv->col = xd->mb_to_left_edge - (16 << 3); } else if (mv->col > xd->mb_to_right_edge + (18 << 3)) { mv->col = xd->mb_to_right_edge + (16 << 3); } if (mv->row < (xd->mb_to_top_edge - (19 << 3))) { mv->row = xd->mb_to_top_edge - (16 << 3); } else if (mv->row > xd->mb_to_bottom_edge + (18 << 3)) { mv->row = xd->mb_to_bottom_edge + (16 << 3); } } /* A version of the above function for chroma block MVs.*/ static void clamp_uvmv_to_umv_border(MV *mv, const MACROBLOCKD *xd) { mv->col = (2 * mv->col < (xd->mb_to_left_edge - (19 << 3))) ? (xd->mb_to_left_edge - (16 << 3)) >> 1 : mv->col; mv->col = (2 * mv->col > xd->mb_to_right_edge + (18 << 3)) ? (xd->mb_to_right_edge + (16 << 3)) >> 1 : mv->col; mv->row = (2 * mv->row < (xd->mb_to_top_edge - (19 << 3))) ? (xd->mb_to_top_edge - (16 << 3)) >> 1 : mv->row; mv->row = (2 * mv->row > xd->mb_to_bottom_edge + (18 << 3)) ? (xd->mb_to_bottom_edge + (16 << 3)) >> 1 : mv->row; } void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y, unsigned char *dst_u, unsigned char *dst_v, int dst_ystride, int dst_uvstride) { int offset; unsigned char *ptr; unsigned char *uptr, *vptr; int_mv _16x16mv; unsigned char *ptr_base = x->pre.y_buffer; int pre_stride = x->pre.y_stride; _16x16mv.as_int = x->mode_info_context->mbmi.mv.as_int; if (x->mode_info_context->mbmi.need_to_clamp_mvs) { clamp_mv_to_umv_border(&_16x16mv.as_mv, x); } ptr = ptr_base + (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); if (_16x16mv.as_int & 0x00070007) { x->subpixel_predict16x16(ptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_y, dst_ystride); } else { vp8_copy_mem16x16(ptr, pre_stride, dst_y, dst_ystride); } /* calc uv motion vectors */ _16x16mv.as_mv.row += 1 | (_16x16mv.as_mv.row >> (sizeof(int) * CHAR_BIT - 1)); _16x16mv.as_mv.col += 1 | (_16x16mv.as_mv.col >> (sizeof(int) * CHAR_BIT - 1)); _16x16mv.as_mv.row /= 2; _16x16mv.as_mv.col /= 2; _16x16mv.as_mv.row &= x->fullpixel_mask; _16x16mv.as_mv.col &= x->fullpixel_mask; if (2 * _16x16mv.as_mv.col < (x->mb_to_left_edge - (19 << 3)) || 2 * _16x16mv.as_mv.col > x->mb_to_right_edge + (18 << 3) || 2 * _16x16mv.as_mv.row < (x->mb_to_top_edge - (19 << 3)) || 2 * _16x16mv.as_mv.row > x->mb_to_bottom_edge + (18 << 3)) { return; } pre_stride >>= 1; offset = (_16x16mv.as_mv.row >> 3) * pre_stride + (_16x16mv.as_mv.col >> 3); uptr = x->pre.u_buffer + offset; vptr = x->pre.v_buffer + offset; if (_16x16mv.as_int & 0x00070007) { x->subpixel_predict8x8(uptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_u, dst_uvstride); x->subpixel_predict8x8(vptr, pre_stride, _16x16mv.as_mv.col & 7, _16x16mv.as_mv.row & 7, dst_v, dst_uvstride); } else { vp8_copy_mem8x8(uptr, pre_stride, dst_u, dst_uvstride); vp8_copy_mem8x8(vptr, pre_stride, dst_v, dst_uvstride); } } static void build_inter4x4_predictors_mb(MACROBLOCKD *x) { int i; unsigned char *base_dst = x->dst.y_buffer; unsigned char *base_pre = x->pre.y_buffer; if (x->mode_info_context->mbmi.partitioning < 3) { BLOCKD *b; int dst_stride = x->dst.y_stride; x->block[0].bmi = x->mode_info_context->bmi[0]; x->block[2].bmi = x->mode_info_context->bmi[2]; x->block[8].bmi = x->mode_info_context->bmi[8]; x->block[10].bmi = x->mode_info_context->bmi[10]; if (x->mode_info_context->mbmi.need_to_clamp_mvs) { clamp_mv_to_umv_border(&x->block[0].bmi.mv.as_mv, x); clamp_mv_to_umv_border(&x->block[2].bmi.mv.as_mv, x); clamp_mv_to_umv_border(&x->block[8].bmi.mv.as_mv, x); clamp_mv_to_umv_border(&x->block[10].bmi.mv.as_mv, x); } b = &x->block[0]; build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); b = &x->block[2]; build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); b = &x->block[8]; build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); b = &x->block[10]; build_inter_predictors4b(x, b, base_dst + b->offset, dst_stride, base_pre, dst_stride); } else { for (i = 0; i < 16; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i + 1]; int dst_stride = x->dst.y_stride; x->block[i + 0].bmi = x->mode_info_context->bmi[i + 0]; x->block[i + 1].bmi = x->mode_info_context->bmi[i + 1]; if (x->mode_info_context->mbmi.need_to_clamp_mvs) { clamp_mv_to_umv_border(&x->block[i + 0].bmi.mv.as_mv, x); clamp_mv_to_umv_border(&x->block[i + 1].bmi.mv.as_mv, x); } if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); } else { build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); } } } base_dst = x->dst.u_buffer; base_pre = x->pre.u_buffer; for (i = 16; i < 20; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i + 1]; int dst_stride = x->dst.uv_stride; /* Note: uv mvs already clamped in build_4x4uvmvs() */ if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); } else { build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); } } base_dst = x->dst.v_buffer; base_pre = x->pre.v_buffer; for (i = 20; i < 24; i += 2) { BLOCKD *d0 = &x->block[i]; BLOCKD *d1 = &x->block[i + 1]; int dst_stride = x->dst.uv_stride; /* Note: uv mvs already clamped in build_4x4uvmvs() */ if (d0->bmi.mv.as_int == d1->bmi.mv.as_int) { build_inter_predictors2b(x, d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride); } else { build_inter_predictors_b(d0, base_dst + d0->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); build_inter_predictors_b(d1, base_dst + d1->offset, dst_stride, base_pre, dst_stride, x->subpixel_predict); } } } static void build_4x4uvmvs(MACROBLOCKD *x) { int i, j; for (i = 0; i < 2; ++i) { for (j = 0; j < 2; ++j) { int yoffset = i * 8 + j * 2; int uoffset = 16 + i * 2 + j; int voffset = 20 + i * 2 + j; int temp; temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.row + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.row + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.row + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.row; temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.row = (temp / 8) & x->fullpixel_mask; temp = x->mode_info_context->bmi[yoffset + 0].mv.as_mv.col + x->mode_info_context->bmi[yoffset + 1].mv.as_mv.col + x->mode_info_context->bmi[yoffset + 4].mv.as_mv.col + x->mode_info_context->bmi[yoffset + 5].mv.as_mv.col; temp += 4 + ((temp >> (sizeof(temp) * CHAR_BIT - 1)) * 8); x->block[uoffset].bmi.mv.as_mv.col = (temp / 8) & x->fullpixel_mask; if (x->mode_info_context->mbmi.need_to_clamp_mvs) { clamp_uvmv_to_umv_border(&x->block[uoffset].bmi.mv.as_mv, x); } x->block[voffset].bmi.mv.as_int = x->block[uoffset].bmi.mv.as_int; } } } void vp8_build_inter_predictors_mb(MACROBLOCKD *xd) { if (xd->mode_info_context->mbmi.mode != SPLITMV) { vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); } else { build_4x4uvmvs(xd); build_inter4x4_predictors_mb(xd); } } libvpx-1.8.2/vp8/common/reconinter.h000066400000000000000000000025111357355204000173570ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_RECONINTER_H_ #define VPX_VP8_COMMON_RECONINTER_H_ #ifdef __cplusplus extern "C" { #endif void vp8_build_inter_predictors_mb(MACROBLOCKD *xd); void vp8_build_inter16x16_predictors_mb(MACROBLOCKD *x, unsigned char *dst_y, unsigned char *dst_u, unsigned char *dst_v, int dst_ystride, int dst_uvstride); void vp8_build_inter16x16_predictors_mby(MACROBLOCKD *x, unsigned char *dst_y, int dst_ystride); void vp8_build_inter_predictors_b(BLOCKD *d, int pitch, unsigned char *base_pre, int pre_stride, vp8_subpix_fn_t sppf); void vp8_build_inter16x16_predictors_mbuv(MACROBLOCKD *x); void vp8_build_inter4x4_predictors_mbuv(MACROBLOCKD *x); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_RECONINTER_H_ libvpx-1.8.2/vp8/common/reconintra.c000066400000000000000000000064661357355204000173630ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "./vp8_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/vpx_once.h" #include "blockd.h" #include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" enum { SIZE_16, SIZE_8, NUM_SIZES, }; typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); static intra_pred_fn pred[4][NUM_SIZES]; static intra_pred_fn dc_pred[2][2][NUM_SIZES]; static void vp8_init_intra_predictors_internal(void) { #define INIT_SIZE(sz) \ pred[V_PRED][SIZE_##sz] = vpx_v_predictor_##sz##x##sz; \ pred[H_PRED][SIZE_##sz] = vpx_h_predictor_##sz##x##sz; \ pred[TM_PRED][SIZE_##sz] = vpx_tm_predictor_##sz##x##sz; \ \ dc_pred[0][0][SIZE_##sz] = vpx_dc_128_predictor_##sz##x##sz; \ dc_pred[0][1][SIZE_##sz] = vpx_dc_top_predictor_##sz##x##sz; \ dc_pred[1][0][SIZE_##sz] = vpx_dc_left_predictor_##sz##x##sz; \ dc_pred[1][1][SIZE_##sz] = vpx_dc_predictor_##sz##x##sz INIT_SIZE(16); INIT_SIZE(8); vp8_init_intra4x4_predictors_internal(); } void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row, unsigned char *yleft, int left_stride, unsigned char *ypred_ptr, int y_stride) { MB_PREDICTION_MODE mode = x->mode_info_context->mbmi.mode; DECLARE_ALIGNED(16, uint8_t, yleft_col[16]); int i; intra_pred_fn fn; for (i = 0; i < 16; ++i) { yleft_col[i] = yleft[i * left_stride]; } if (mode == DC_PRED) { fn = dc_pred[x->left_available][x->up_available][SIZE_16]; } else { fn = pred[mode][SIZE_16]; } fn(ypred_ptr, y_stride, yabove_row, yleft_col); } void vp8_build_intra_predictors_mbuv_s( MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride) { MB_PREDICTION_MODE uvmode = x->mode_info_context->mbmi.uv_mode; #if HAVE_VSX /* Power PC implementation uses "vec_vsx_ld" to read 16 bytes from uleft_col and vleft_col. Play it safe by reserving enough stack space here. */ unsigned char uleft_col[16]; unsigned char vleft_col[16]; #else unsigned char uleft_col[8]; unsigned char vleft_col[8]; #endif int i; intra_pred_fn fn; for (i = 0; i < 8; ++i) { uleft_col[i] = uleft[i * left_stride]; vleft_col[i] = vleft[i * left_stride]; } if (uvmode == DC_PRED) { fn = dc_pred[x->left_available][x->up_available][SIZE_8]; } else { fn = pred[uvmode][SIZE_8]; } fn(upred_ptr, pred_stride, uabove_row, uleft_col); fn(vpred_ptr, pred_stride, vabove_row, vleft_col); } void vp8_init_intra_predictors(void) { once(vp8_init_intra_predictors_internal); } libvpx-1.8.2/vp8/common/reconintra.h000066400000000000000000000022261357355204000173560ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_RECONINTRA_H_ #define VPX_VP8_COMMON_RECONINTRA_H_ #include "vp8/common/blockd.h" #ifdef __cplusplus extern "C" { #endif void vp8_build_intra_predictors_mby_s(MACROBLOCKD *x, unsigned char *yabove_row, unsigned char *yleft, int left_stride, unsigned char *ypred_ptr, int y_stride); void vp8_build_intra_predictors_mbuv_s( MACROBLOCKD *x, unsigned char *uabove_row, unsigned char *vabove_row, unsigned char *uleft, unsigned char *vleft, int left_stride, unsigned char *upred_ptr, unsigned char *vpred_ptr, int pred_stride); void vp8_init_intra_predictors(void); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_RECONINTRA_H_ libvpx-1.8.2/vp8/common/reconintra4x4.c000066400000000000000000000046741357355204000177220ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vp8_rtcd.h" #include "blockd.h" #include "reconintra4x4.h" #include "vp8/common/common.h" #include "vpx_ports/mem.h" typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); static intra_pred_fn pred[10]; void vp8_init_intra4x4_predictors_internal(void) { pred[B_DC_PRED] = vpx_dc_predictor_4x4; pred[B_TM_PRED] = vpx_tm_predictor_4x4; pred[B_VE_PRED] = vpx_ve_predictor_4x4; pred[B_HE_PRED] = vpx_he_predictor_4x4; pred[B_LD_PRED] = vpx_d45e_predictor_4x4; pred[B_RD_PRED] = vpx_d135_predictor_4x4; pred[B_VR_PRED] = vpx_d117_predictor_4x4; pred[B_VL_PRED] = vpx_d63e_predictor_4x4; pred[B_HD_PRED] = vpx_d153_predictor_4x4; pred[B_HU_PRED] = vpx_d207_predictor_4x4; } void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left) { /* Power PC implementation uses "vec_vsx_ld" to read 16 bytes from Above (aka, Aboveb + 4). Play it safe by reserving enough stack space here. Similary for "Left". */ #if HAVE_VSX unsigned char Aboveb[20]; #else unsigned char Aboveb[12]; #endif unsigned char *Above = Aboveb + 4; #if HAVE_NEON // Neon intrinsics are unable to load 32 bits, or 4 8 bit values. Instead, it // over reads but does not use the extra 4 values. unsigned char Left[8]; #if VPX_WITH_ASAN // Silence an 'uninitialized read' warning. Although uninitialized values are // indeed read, they are not used. vp8_zero_array(Left, 8); #endif // VPX_WITH_ASAN #elif HAVE_VSX unsigned char Left[16]; #else unsigned char Left[4]; #endif // HAVE_NEON Left[0] = yleft[0]; Left[1] = yleft[left_stride]; Left[2] = yleft[2 * left_stride]; Left[3] = yleft[3 * left_stride]; memcpy(Above, above, 8); Above[-1] = top_left; pred[b_mode](dst, dst_stride, Above, Left); } libvpx-1.8.2/vp8/common/reconintra4x4.h000066400000000000000000000030541357355204000177160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_RECONINTRA4X4_H_ #define VPX_VP8_COMMON_RECONINTRA4X4_H_ #include "vp8/common/blockd.h" #ifdef __cplusplus extern "C" { #endif static INLINE void intra_prediction_down_copy(MACROBLOCKD *xd, unsigned char *above_right_src) { int dst_stride = xd->dst.y_stride; unsigned char *above_right_dst = xd->dst.y_buffer - dst_stride + 16; unsigned int *src_ptr = (unsigned int *)above_right_src; unsigned int *dst_ptr0 = (unsigned int *)(above_right_dst + 4 * dst_stride); unsigned int *dst_ptr1 = (unsigned int *)(above_right_dst + 8 * dst_stride); unsigned int *dst_ptr2 = (unsigned int *)(above_right_dst + 12 * dst_stride); *dst_ptr0 = *src_ptr; *dst_ptr1 = *src_ptr; *dst_ptr2 = *src_ptr; } void vp8_intra4x4_predict(unsigned char *above, unsigned char *yleft, int left_stride, B_PREDICTION_MODE b_mode, unsigned char *dst, int dst_stride, unsigned char top_left); void vp8_init_intra4x4_predictors_internal(void); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_RECONINTRA4X4_H_ libvpx-1.8.2/vp8/common/rtcd.c000066400000000000000000000010541357355204000161370ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #define RTCD_C #include "./vp8_rtcd.h" #include "vpx_ports/vpx_once.h" void vp8_rtcd() { once(setup_rtcd_internal); } libvpx-1.8.2/vp8/common/rtcd_defs.pl000066400000000000000000000273451357355204000173440ustar00rootroot00000000000000## ## Copyright (c) 2017 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## sub vp8_common_forward_decls() { print <y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); for (i = 0; i < ybf->y_height; ++i) { ybf->y_buffer[ybf->y_stride * i - 1] = (unsigned char)129; } memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; ++i) { ybf->u_buffer[ybf->uv_stride * i - 1] = (unsigned char)129; } memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); for (i = 0; i < ybf->uv_height; ++i) { ybf->v_buffer[ybf->uv_stride * i - 1] = (unsigned char)129; } } void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf) { memset(ybf->y_buffer - 1 - ybf->y_stride, 127, ybf->y_width + 5); memset(ybf->u_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); memset(ybf->v_buffer - 1 - ybf->uv_stride, 127, ybf->uv_width + 5); } libvpx-1.8.2/vp8/common/setupintrarecon.h000066400000000000000000000024511357355204000204370ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_SETUPINTRARECON_H_ #define VPX_VP8_COMMON_SETUPINTRARECON_H_ #include "./vpx_config.h" #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif extern void vp8_setup_intra_recon(YV12_BUFFER_CONFIG *ybf); extern void vp8_setup_intra_recon_top_line(YV12_BUFFER_CONFIG *ybf); static INLINE void setup_intra_recon_left(unsigned char *y_buffer, unsigned char *u_buffer, unsigned char *v_buffer, int y_stride, int uv_stride) { int i; for (i = 0; i < 16; ++i) y_buffer[y_stride * i] = (unsigned char)129; for (i = 0; i < 8; ++i) u_buffer[uv_stride * i] = (unsigned char)129; for (i = 0; i < 8; ++i) v_buffer[uv_stride * i] = (unsigned char)129; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_SETUPINTRARECON_H_ libvpx-1.8.2/vp8/common/swapyv12buffer.c000066400000000000000000000020071357355204000200700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "swapyv12buffer.h" void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame) { unsigned char *temp; temp = last_frame->buffer_alloc; last_frame->buffer_alloc = new_frame->buffer_alloc; new_frame->buffer_alloc = temp; temp = last_frame->y_buffer; last_frame->y_buffer = new_frame->y_buffer; new_frame->y_buffer = temp; temp = last_frame->u_buffer; last_frame->u_buffer = new_frame->u_buffer; new_frame->u_buffer = temp; temp = last_frame->v_buffer; last_frame->v_buffer = new_frame->v_buffer; new_frame->v_buffer = temp; } libvpx-1.8.2/vp8/common/swapyv12buffer.h000066400000000000000000000014071357355204000201000ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_SWAPYV12BUFFER_H_ #define VPX_VP8_COMMON_SWAPYV12BUFFER_H_ #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif void vp8_swap_yv12_buffer(YV12_BUFFER_CONFIG *new_frame, YV12_BUFFER_CONFIG *last_frame); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_SWAPYV12BUFFER_H_ libvpx-1.8.2/vp8/common/systemdependent.h000066400000000000000000000013241357355204000204230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ #define VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ #include "vpx_config.h" #ifdef __cplusplus extern "C" { #endif struct VP8Common; void vp8_machine_specific_config(struct VP8Common *); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_SYSTEMDEPENDENT_H_ libvpx-1.8.2/vp8/common/threading.h000066400000000000000000000127101357355204000171560ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_THREADING_H_ #define VPX_VP8_COMMON_THREADING_H_ #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD /* Thread management macros */ #if defined(_WIN32) && !HAVE_PTHREAD_H /* Win32 */ #include #include #if defined(__GNUC__) && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) #define THREAD_FUNCTION \ __attribute__((force_align_arg_pointer)) unsigned int __stdcall #else #define THREAD_FUNCTION unsigned int __stdcall #endif #define THREAD_FUNCTION_RETURN DWORD #define THREAD_SPECIFIC_INDEX DWORD #define pthread_t HANDLE #define pthread_attr_t DWORD #define pthread_detach(thread) \ if (thread != NULL) CloseHandle(thread) #define thread_sleep(nms) Sleep(nms) #define pthread_cancel(thread) terminate_thread(thread, 0) #define ts_key_create(ts_key, destructor) \ { ts_key = TlsAlloc(); }; #define pthread_getspecific(ts_key) TlsGetValue(ts_key) #define pthread_setspecific(ts_key, value) TlsSetValue(ts_key, (void *)value) #define pthread_self() GetCurrentThreadId() #elif defined(__OS2__) /* OS/2 */ #define INCL_DOS #include #include #define THREAD_FUNCTION void * #define THREAD_FUNCTION_RETURN void * #define THREAD_SPECIFIC_INDEX PULONG #define pthread_t TID #define pthread_attr_t ULONG #define pthread_detach(thread) 0 #define thread_sleep(nms) DosSleep(nms) #define pthread_cancel(thread) DosKillThread(thread) #define ts_key_create(ts_key, destructor) \ DosAllocThreadLocalMemory(1, &(ts_key)); #define pthread_getspecific(ts_key) ((void *)(*(ts_key))) #define pthread_setspecific(ts_key, value) (*(ts_key) = (ULONG)(value)) #define pthread_self() _gettid() #else #ifdef __APPLE__ #include #include #include #include #include #else #include #endif #include /* pthreads */ /* Nearly everything is already defined */ #define THREAD_FUNCTION void * #define THREAD_FUNCTION_RETURN void * #define THREAD_SPECIFIC_INDEX pthread_key_t #define ts_key_create(ts_key, destructor) \ pthread_key_create(&(ts_key), destructor); #endif /* Synchronization macros: Win32 and Pthreads */ #if defined(_WIN32) && !HAVE_PTHREAD_H #define sem_t HANDLE #define pause(voidpara) __asm PAUSE #define sem_init(sem, sem_attr1, sem_init_value) \ (int)((*sem = CreateSemaphore(NULL, 0, 32768, NULL)) == NULL) #define sem_wait(sem) \ (int)(WAIT_OBJECT_0 != WaitForSingleObject(*sem, INFINITE)) #define sem_post(sem) ReleaseSemaphore(*sem, 1, NULL) #define sem_destroy(sem) \ if (*sem) ((int)(CloseHandle(*sem)) == TRUE) #define thread_sleep(nms) Sleep(nms) #elif defined(__OS2__) typedef struct { HEV event; HMTX wait_mutex; HMTX count_mutex; int count; } sem_t; static inline int sem_init(sem_t *sem, int pshared, unsigned int value) { DosCreateEventSem(NULL, &sem->event, pshared ? DC_SEM_SHARED : 0, value > 0 ? TRUE : FALSE); DosCreateMutexSem(NULL, &sem->wait_mutex, 0, FALSE); DosCreateMutexSem(NULL, &sem->count_mutex, 0, FALSE); sem->count = value; return 0; } static inline int sem_wait(sem_t *sem) { DosRequestMutexSem(sem->wait_mutex, -1); DosWaitEventSem(sem->event, -1); DosRequestMutexSem(sem->count_mutex, -1); sem->count--; if (sem->count == 0) { ULONG post_count; DosResetEventSem(sem->event, &post_count); } DosReleaseMutexSem(sem->count_mutex); DosReleaseMutexSem(sem->wait_mutex); return 0; } static inline int sem_post(sem_t *sem) { DosRequestMutexSem(sem->count_mutex, -1); if (sem->count < 32768) { sem->count++; DosPostEventSem(sem->event); } DosReleaseMutexSem(sem->count_mutex); return 0; } static inline int sem_destroy(sem_t *sem) { DosCloseEventSem(sem->event); DosCloseMutexSem(sem->wait_mutex); DosCloseMutexSem(sem->count_mutex); return 0; } #define thread_sleep(nms) DosSleep(nms) #else #ifdef __APPLE__ #define sem_t semaphore_t #define sem_init(X, Y, Z) \ semaphore_create(mach_task_self(), X, SYNC_POLICY_FIFO, Z) #define sem_wait(sem) (semaphore_wait(*sem)) #define sem_post(sem) semaphore_signal(*sem) #define sem_destroy(sem) semaphore_destroy(mach_task_self(), *sem) #define thread_sleep(nms) /* { struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ #else #include #include #define thread_sleep(nms) sched_yield(); /* {struct timespec ts;ts.tv_sec=0; ts.tv_nsec = 1000*nms;nanosleep(&ts, NULL);} */ #endif /* Not Windows. Assume pthreads */ #endif #if VPX_ARCH_X86 || VPX_ARCH_X86_64 #include "vpx_ports/x86.h" #else #define x86_pause_hint() #endif #include "vpx_util/vpx_thread.h" #include "vpx_util/vpx_atomics.h" static INLINE void vp8_atomic_spin_wait( int mb_col, const vpx_atomic_int *last_row_current_mb_col, const int nsync) { while (mb_col > (vpx_atomic_load_acquire(last_row_current_mb_col) - nsync)) { x86_pause_hint(); thread_sleep(0); } } #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_THREADING_H_ libvpx-1.8.2/vp8/common/treecoder.c000066400000000000000000000053541357355204000171660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vp8/common/treecoder.h" #include "vpx/vpx_integer.h" static void tree2tok(struct vp8_token_struct *const p, vp8_tree t, int i, int v, int L) { v += v; ++L; do { const vp8_tree_index j = t[i++]; if (j <= 0) { p[-j].value = v; p[-j].Len = L; } else { tree2tok(p, t, j, v, L); } } while (++v & 1); } void vp8_tokens_from_tree(struct vp8_token_struct *p, vp8_tree t) { tree2tok(p, t, 0, 0, 0); } void vp8_tokens_from_tree_offset(struct vp8_token_struct *p, vp8_tree t, int offset) { tree2tok(p - offset, t, 0, 0, 0); } static void branch_counts(int n, /* n = size of alphabet */ vp8_token tok[/* n */], vp8_tree tree, unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */]) { const int tree_len = n - 1; int t = 0; assert(tree_len); do { branch_ct[t][0] = branch_ct[t][1] = 0; } while (++t < tree_len); t = 0; do { int L = tok[t].Len; const int enc = tok[t].value; const unsigned int ct = num_events[t]; vp8_tree_index i = 0; do { const int b = (enc >> --L) & 1; const int j = i >> 1; assert(j < tree_len && 0 <= L); branch_ct[j][b] += ct; i = tree[i + b]; } while (i > 0); assert(!L); } while (++t < n); } void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */ vp8_token tok[/* n */], vp8_tree tree, vp8_prob probs[/* n-1 */], unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */], unsigned int Pfactor, int Round) { const int tree_len = n - 1; int t = 0; branch_counts(n, tok, tree, branch_ct, num_events); do { const unsigned int *const c = branch_ct[t]; const unsigned int tot = c[0] + c[1]; if (tot) { const unsigned int p = (unsigned int)(((uint64_t)c[0] * Pfactor) + (Round ? tot >> 1 : 0)) / tot; probs[t] = p < 256 ? (p ? p : 1) : 255; /* agree w/old version for now */ } else { probs[t] = vp8_prob_half; } } while (++t < tree_len); } libvpx-1.8.2/vp8/common/treecoder.h000066400000000000000000000057311357355204000171720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_TREECODER_H_ #define VPX_VP8_COMMON_TREECODER_H_ #ifdef __cplusplus extern "C" { #endif typedef unsigned char vp8bc_index_t; /* probability index */ typedef unsigned char vp8_prob; #define vp8_prob_half ((vp8_prob)128) typedef signed char vp8_tree_index; struct bool_coder_spec; typedef struct bool_coder_spec bool_coder_spec; typedef struct bool_writer bool_writer; typedef struct bool_reader bool_reader; typedef const bool_coder_spec c_bool_coder_spec; typedef const bool_writer c_bool_writer; typedef const bool_reader c_bool_reader; #define vp8_complement(x) (255 - (x)) /* We build coding trees compactly in arrays. Each node of the tree is a pair of vp8_tree_indices. Array index often references a corresponding probability table. Index <= 0 means done encoding/decoding and value = -Index, Index > 0 means need another bit, specification at index. Nonnegative indices are always even; processing begins at node 0. */ typedef const vp8_tree_index vp8_tree[], *vp8_tree_p; typedef const struct vp8_token_struct { int value; int Len; } vp8_token; /* Construct encoding array from tree. */ void vp8_tokens_from_tree(struct vp8_token_struct *, vp8_tree); void vp8_tokens_from_tree_offset(struct vp8_token_struct *, vp8_tree, int offset); /* Convert array of token occurrence counts into a table of probabilities for the associated binary encoding tree. Also writes count of branches taken for each node on the tree; this facilitiates decisions as to probability updates. */ void vp8_tree_probs_from_distribution(int n, /* n = size of alphabet */ vp8_token tok[/* n */], vp8_tree tree, vp8_prob probs[/* n-1 */], unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */], unsigned int Pfactor, int Round); /* Variant of above using coder spec rather than hardwired 8-bit probs. */ void vp8bc_tree_probs_from_distribution(int n, /* n = size of alphabet */ vp8_token tok[/* n */], vp8_tree tree, vp8_prob probs[/* n-1 */], unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */], c_bool_coder_spec *s); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_TREECODER_H_ libvpx-1.8.2/vp8/common/vp8_entropymodedata.h000066400000000000000000000163541357355204000212150ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ #define VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ #ifdef __cplusplus extern "C" { #endif /*Generated file, included by entropymode.c*/ const struct vp8_token_struct vp8_bmode_encodings[VP8_BINTRAMODES] = { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 28, 5 }, { 30, 5 }, { 58, 6 }, { 59, 6 }, { 62, 6 }, { 126, 7 }, { 127, 7 } }; const struct vp8_token_struct vp8_ymode_encodings[VP8_YMODES] = { { 0, 1 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 } }; const struct vp8_token_struct vp8_kf_ymode_encodings[VP8_YMODES] = { { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 }, { 0, 1 } }; const struct vp8_token_struct vp8_uv_mode_encodings[VP8_UV_MODES] = { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }; const struct vp8_token_struct vp8_mbsplit_encodings[VP8_NUMMBSPLITS] = { { 6, 3 }, { 7, 3 }, { 2, 2 }, { 0, 1 } }; const struct vp8_token_struct vp8_mv_ref_encoding_array[VP8_MVREFS] = { { 2, 2 }, { 6, 3 }, { 0, 1 }, { 14, 4 }, { 15, 4 } }; const struct vp8_token_struct vp8_sub_mv_ref_encoding_array[VP8_SUBMVREFS] = { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }; const struct vp8_token_struct vp8_small_mvencodings[8] = { { 0, 3 }, { 1, 3 }, { 2, 3 }, { 3, 3 }, { 4, 3 }, { 5, 3 }, { 6, 3 }, { 7, 3 } }; const vp8_prob vp8_ymode_prob[VP8_YMODES - 1] = { 112, 86, 140, 37 }; const vp8_prob vp8_kf_ymode_prob[VP8_YMODES - 1] = { 145, 156, 163, 128 }; const vp8_prob vp8_uv_mode_prob[VP8_UV_MODES - 1] = { 162, 101, 204 }; const vp8_prob vp8_kf_uv_mode_prob[VP8_UV_MODES - 1] = { 142, 114, 183 }; const vp8_prob vp8_bmode_prob[VP8_BINTRAMODES - 1] = { 120, 90, 79, 133, 87, 85, 80, 111, 151 }; const vp8_prob vp8_kf_bmode_prob[VP8_BINTRAMODES][VP8_BINTRAMODES][VP8_BINTRAMODES - 1] = { { { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, { 81, 40, 11, 96, 182, 84, 29, 16, 36 } }, { { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, { 66, 45, 25, 102, 197, 189, 23, 18, 22 } }, { { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, { 62, 18, 78, 95, 85, 57, 50, 48, 51 } }, { { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, { 51, 50, 17, 168, 209, 192, 23, 25, 82 } }, { { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, { 117, 20, 15, 36, 163, 128, 68, 1, 26 } }, { { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, { 87, 37, 9, 115, 59, 77, 64, 21, 47 } }, { { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, { 55, 38, 70, 124, 73, 102, 1, 34, 98 } }, { { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, { 58, 15, 20, 82, 135, 57, 26, 121, 40 } }, { { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, { 35, 27, 10, 146, 174, 171, 12, 26, 128 } }, { { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, { 112, 19, 12, 61, 195, 128, 48, 4, 24 } } }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_VP8_ENTROPYMODEDATA_H_ libvpx-1.8.2/vp8/common/vp8_loopfilter.c000066400000000000000000000447751357355204000202000ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "loopfilter.h" #include "onyxc_int.h" #include "vpx_mem/vpx_mem.h" static void lf_init_lut(loop_filter_info_n *lfi) { int filt_lvl; for (filt_lvl = 0; filt_lvl <= MAX_LOOP_FILTER; ++filt_lvl) { if (filt_lvl >= 40) { lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 2; lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 3; } else if (filt_lvl >= 20) { lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 2; } else if (filt_lvl >= 15) { lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 1; lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 1; } else { lfi->hev_thr_lut[KEY_FRAME][filt_lvl] = 0; lfi->hev_thr_lut[INTER_FRAME][filt_lvl] = 0; } } lfi->mode_lf_lut[DC_PRED] = 1; lfi->mode_lf_lut[V_PRED] = 1; lfi->mode_lf_lut[H_PRED] = 1; lfi->mode_lf_lut[TM_PRED] = 1; lfi->mode_lf_lut[B_PRED] = 0; lfi->mode_lf_lut[ZEROMV] = 1; lfi->mode_lf_lut[NEARESTMV] = 2; lfi->mode_lf_lut[NEARMV] = 2; lfi->mode_lf_lut[NEWMV] = 2; lfi->mode_lf_lut[SPLITMV] = 3; } void vp8_loop_filter_update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int i; /* For each possible value for the loop filter fill out limits */ for (i = 0; i <= MAX_LOOP_FILTER; ++i) { int filt_lvl = i; int block_inside_limit = 0; /* Set loop filter paramaeters that control sharpness. */ block_inside_limit = filt_lvl >> (sharpness_lvl > 0); block_inside_limit = block_inside_limit >> (sharpness_lvl > 4); if (sharpness_lvl > 0) { if (block_inside_limit > (9 - sharpness_lvl)) { block_inside_limit = (9 - sharpness_lvl); } } if (block_inside_limit < 1) block_inside_limit = 1; memset(lfi->lim[i], block_inside_limit, SIMD_WIDTH); memset(lfi->blim[i], (2 * filt_lvl + block_inside_limit), SIMD_WIDTH); memset(lfi->mblim[i], (2 * (filt_lvl + 2) + block_inside_limit), SIMD_WIDTH); } } void vp8_loop_filter_init(VP8_COMMON *cm) { loop_filter_info_n *lfi = &cm->lf_info; int i; /* init limits for given sharpness*/ vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); cm->last_sharpness_level = cm->sharpness_level; /* init LUT for lvl and hev thr picking */ lf_init_lut(lfi); /* init hev threshold const vectors */ for (i = 0; i < 4; ++i) { memset(lfi->hev_thr[i], i, SIMD_WIDTH); } } void vp8_loop_filter_frame_init(VP8_COMMON *cm, MACROBLOCKD *mbd, int default_filt_lvl) { int seg, /* segment number */ ref, /* index in ref_lf_deltas */ mode; /* index in mode_lf_deltas */ loop_filter_info_n *lfi = &cm->lf_info; /* update limits if sharpness has changed */ if (cm->last_sharpness_level != cm->sharpness_level) { vp8_loop_filter_update_sharpness(lfi, cm->sharpness_level); cm->last_sharpness_level = cm->sharpness_level; } for (seg = 0; seg < MAX_MB_SEGMENTS; ++seg) { int lvl_seg = default_filt_lvl; int lvl_ref, lvl_mode; /* Note the baseline filter values for each segment */ if (mbd->segmentation_enabled) { if (mbd->mb_segement_abs_delta == SEGMENT_ABSDATA) { lvl_seg = mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; } else { /* Delta Value */ lvl_seg += mbd->segment_feature_data[MB_LVL_ALT_LF][seg]; } lvl_seg = (lvl_seg > 0) ? ((lvl_seg > 63) ? 63 : lvl_seg) : 0; } if (!mbd->mode_ref_lf_delta_enabled) { /* we could get rid of this if we assume that deltas are set to * zero when not in use; encoder always uses deltas */ memset(lfi->lvl[seg][0], lvl_seg, 4 * 4); continue; } /* INTRA_FRAME */ ref = INTRA_FRAME; /* Apply delta for reference frame */ lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; /* Apply delta for Intra modes */ mode = 0; /* B_PRED */ /* Only the split mode BPRED has a further special case */ lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; /* clamp */ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; lfi->lvl[seg][ref][mode] = lvl_mode; mode = 1; /* all the rest of Intra modes */ /* clamp */ lvl_mode = (lvl_ref > 0) ? (lvl_ref > 63 ? 63 : lvl_ref) : 0; lfi->lvl[seg][ref][mode] = lvl_mode; /* LAST, GOLDEN, ALT */ for (ref = 1; ref < MAX_REF_FRAMES; ++ref) { /* Apply delta for reference frame */ lvl_ref = lvl_seg + mbd->ref_lf_deltas[ref]; /* Apply delta for Inter modes */ for (mode = 1; mode < 4; ++mode) { lvl_mode = lvl_ref + mbd->mode_lf_deltas[mode]; /* clamp */ lvl_mode = (lvl_mode > 0) ? (lvl_mode > 63 ? 63 : lvl_mode) : 0; lfi->lvl[seg][ref][mode] = lvl_mode; } } } } void vp8_loop_filter_row_normal(VP8_COMMON *cm, MODE_INFO *mode_info_context, int mb_row, int post_ystride, int post_uvstride, unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr) { int mb_col; int filter_level; loop_filter_info_n *lfi_n = &cm->lf_info; loop_filter_info lfi; FRAME_TYPE frame_type = cm->frame_type; for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; if (mb_col > 0) vp8_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); if (!skip_lf) vp8_loop_filter_bv(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); if (!skip_lf) vp8_loop_filter_bh(y_ptr, u_ptr, v_ptr, post_ystride, post_uvstride, &lfi); } y_ptr += 16; u_ptr += 8; v_ptr += 8; mode_info_context++; /* step to next MB */ } } void vp8_loop_filter_row_simple(VP8_COMMON *cm, MODE_INFO *mode_info_context, int mb_row, int post_ystride, unsigned char *y_ptr) { int mb_col; int filter_level; loop_filter_info_n *lfi_n = &cm->lf_info; for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { if (mb_col > 0) vp8_loop_filter_simple_mbv(y_ptr, post_ystride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bv(y_ptr, post_ystride, lfi_n->blim[filter_level]); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_simple_mbh(y_ptr, post_ystride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bh(y_ptr, post_ystride, lfi_n->blim[filter_level]); } y_ptr += 16; mode_info_context++; /* step to next MB */ } } void vp8_loop_filter_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int frame_type) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; loop_filter_info_n *lfi_n = &cm->lf_info; loop_filter_info lfi; int mb_row; int mb_col; int mb_rows = cm->mb_rows; int mb_cols = cm->mb_cols; int filter_level; unsigned char *y_ptr, *u_ptr, *v_ptr; /* Point at base of Mb MODE_INFO list */ const MODE_INFO *mode_info_context = cm->mi; int post_y_stride = post->y_stride; int post_uv_stride = post->uv_stride; /* Initialize the loop filter for this frame. */ vp8_loop_filter_frame_init(cm, mbd, cm->filter_level); /* Set up the buffer pointers */ y_ptr = post->y_buffer; u_ptr = post->u_buffer; v_ptr = post->v_buffer; /* vp8_filter each macro block */ if (cm->filter_type == NORMAL_LOOPFILTER) { for (mb_row = 0; mb_row < mb_rows; ++mb_row) { for (mb_col = 0; mb_col < mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; if (mb_col > 0) vp8_loop_filter_mbv(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); if (!skip_lf) vp8_loop_filter_bv(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_mbh(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); if (!skip_lf) vp8_loop_filter_bh(y_ptr, u_ptr, v_ptr, post_y_stride, post_uv_stride, &lfi); } y_ptr += 16; u_ptr += 8; v_ptr += 8; mode_info_context++; /* step to next MB */ } y_ptr += post_y_stride * 16 - post->y_width; u_ptr += post_uv_stride * 8 - post->uv_width; v_ptr += post_uv_stride * 8 - post->uv_width; mode_info_context++; /* Skip border mb */ } } else { /* SIMPLE_LOOPFILTER */ for (mb_row = 0; mb_row < mb_rows; ++mb_row) { for (mb_col = 0; mb_col < mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { const unsigned char *mblim = lfi_n->mblim[filter_level]; const unsigned char *blim = lfi_n->blim[filter_level]; if (mb_col > 0) vp8_loop_filter_simple_mbv(y_ptr, post_y_stride, mblim); if (!skip_lf) vp8_loop_filter_simple_bv(y_ptr, post_y_stride, blim); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_simple_mbh(y_ptr, post_y_stride, mblim); if (!skip_lf) vp8_loop_filter_simple_bh(y_ptr, post_y_stride, blim); } y_ptr += 16; u_ptr += 8; v_ptr += 8; mode_info_context++; /* step to next MB */ } y_ptr += post_y_stride * 16 - post->y_width; u_ptr += post_uv_stride * 8 - post->uv_width; v_ptr += post_uv_stride * 8 - post->uv_width; mode_info_context++; /* Skip border mb */ } } } void vp8_loop_filter_frame_yonly(VP8_COMMON *cm, MACROBLOCKD *mbd, int default_filt_lvl) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; unsigned char *y_ptr; int mb_row; int mb_col; loop_filter_info_n *lfi_n = &cm->lf_info; loop_filter_info lfi; int filter_level; FRAME_TYPE frame_type = cm->frame_type; /* Point at base of Mb MODE_INFO list */ const MODE_INFO *mode_info_context = cm->mi; #if 0 if(default_filt_lvl == 0) /* no filter applied */ return; #endif /* Initialize the loop filter for this frame. */ vp8_loop_filter_frame_init(cm, mbd, default_filt_lvl); /* Set up the buffer pointers */ y_ptr = post->y_buffer; /* vp8_filter each macro block */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { if (cm->filter_type == NORMAL_LOOPFILTER) { const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; if (mb_col > 0) vp8_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf) vp8_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf) vp8_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi); } else { if (mb_col > 0) vp8_loop_filter_simple_mbv(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bv(y_ptr, post->y_stride, lfi_n->blim[filter_level]); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_simple_mbh(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bh(y_ptr, post->y_stride, lfi_n->blim[filter_level]); } } y_ptr += 16; mode_info_context++; /* step to next MB */ } y_ptr += post->y_stride * 16 - post->y_width; mode_info_context++; /* Skip border mb */ } } void vp8_loop_filter_partial_frame(VP8_COMMON *cm, MACROBLOCKD *mbd, int default_filt_lvl) { YV12_BUFFER_CONFIG *post = cm->frame_to_show; unsigned char *y_ptr; int mb_row; int mb_col; int mb_cols = post->y_width >> 4; int mb_rows = post->y_height >> 4; int linestocopy; loop_filter_info_n *lfi_n = &cm->lf_info; loop_filter_info lfi; int filter_level; FRAME_TYPE frame_type = cm->frame_type; const MODE_INFO *mode_info_context; #if 0 if(default_filt_lvl == 0) /* no filter applied */ return; #endif /* Initialize the loop filter for this frame. */ vp8_loop_filter_frame_init(cm, mbd, default_filt_lvl); /* number of MB rows to use in partial filtering */ linestocopy = mb_rows / PARTIAL_FRAME_FRACTION; linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ /* Set up the buffer pointers; partial image starts at ~middle of frame */ y_ptr = post->y_buffer + ((post->y_height >> 5) * 16) * post->y_stride; mode_info_context = cm->mi + (post->y_height >> 5) * (mb_cols + 1); /* vp8_filter each macro block */ for (mb_row = 0; mb_row < (linestocopy >> 4); ++mb_row) { for (mb_col = 0; mb_col < mb_cols; ++mb_col) { int skip_lf = (mode_info_context->mbmi.mode != B_PRED && mode_info_context->mbmi.mode != SPLITMV && mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[mode_info_context->mbmi.mode]; const int seg = mode_info_context->mbmi.segment_id; const int ref_frame = mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (filter_level) { if (cm->filter_type == NORMAL_LOOPFILTER) { const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; if (mb_col > 0) vp8_loop_filter_mbv(y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf) vp8_loop_filter_bv(y_ptr, 0, 0, post->y_stride, 0, &lfi); vp8_loop_filter_mbh(y_ptr, 0, 0, post->y_stride, 0, &lfi); if (!skip_lf) vp8_loop_filter_bh(y_ptr, 0, 0, post->y_stride, 0, &lfi); } else { if (mb_col > 0) vp8_loop_filter_simple_mbv(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bv(y_ptr, post->y_stride, lfi_n->blim[filter_level]); vp8_loop_filter_simple_mbh(y_ptr, post->y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bh(y_ptr, post->y_stride, lfi_n->blim[filter_level]); } } y_ptr += 16; mode_info_context += 1; /* step to next MB */ } y_ptr += post->y_stride * 16 - post->y_width; mode_info_context += 1; /* Skip border mb */ } } libvpx-1.8.2/vp8/common/vp8_skin_detection.c000066400000000000000000000072721357355204000210120ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/alloccommon.h" #include "vp8/common/vp8_skin_detection.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_util/vpx_write_yuv_frame.h" static int avg_2x2(const uint8_t *s, int p) { int i, j; int sum = 0; for (i = 0; i < 2; ++i, s += p) { for (j = 0; j < 2; ++j) { sum += s[j]; } } return (sum + 2) >> 2; } int vp8_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int stride, int strideuv, SKIN_DETECTION_BLOCK_SIZE bsize, int consec_zeromv, int curr_motion_magn) { // No skin if block has been zero/small motion for long consecutive time. if (consec_zeromv > 60 && curr_motion_magn == 0) { return 0; } else { int motion = 1; if (consec_zeromv > 25 && curr_motion_magn == 0) motion = 0; if (bsize == SKIN_16X16) { // Take the average of center 2x2 pixels. const int ysource = avg_2x2(y + 7 * stride + 7, stride); const int usource = avg_2x2(u + 3 * strideuv + 3, strideuv); const int vsource = avg_2x2(v + 3 * strideuv + 3, strideuv); return vpx_skin_pixel(ysource, usource, vsource, motion); } else { int num_skin = 0; int i, j; for (i = 0; i < 2; i++) { for (j = 0; j < 2; j++) { // Take the average of center 2x2 pixels. const int ysource = avg_2x2(y + 3 * stride + 3, stride); const int usource = avg_2x2(u + strideuv + 1, strideuv); const int vsource = avg_2x2(v + strideuv + 1, strideuv); num_skin += vpx_skin_pixel(ysource, usource, vsource, motion); if (num_skin >= 2) return 1; y += 8; u += 4; v += 4; } y += (stride << 3) - 16; u += (strideuv << 2) - 8; v += (strideuv << 2) - 8; } return 0; } } } #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. void vp8_compute_skin_map(VP8_COMP *const cpi, FILE *yuv_skinmap_file) { int i, j, mb_row, mb_col, num_bl; VP8_COMMON *const cm = &cpi->common; uint8_t *y; const uint8_t *src_y = cpi->Source->y_buffer; const int src_ystride = cpi->Source->y_stride; int offset = 0; YV12_BUFFER_CONFIG skinmap; memset(&skinmap, 0, sizeof(skinmap)); if (vp8_yv12_alloc_frame_buffer(&skinmap, cm->Width, cm->Height, VP8BORDERINPIXELS) < 0) { vpx_free_frame_buffer(&skinmap); return; } memset(skinmap.buffer_alloc, 128, skinmap.frame_size); y = skinmap.y_buffer; // Loop through blocks and set skin map based on center pixel of block. // Set y to white for skin block, otherwise set to source with gray scale. for (mb_row = 0; mb_row < cm->mb_rows; mb_row += 1) { num_bl = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col += 1) { const int is_skin = cpi->skin_map[offset++]; for (i = 0; i < 16; i++) { for (j = 0; j < 16; j++) { y[i * src_ystride + j] = is_skin ? 255 : src_y[i * src_ystride + j]; } } num_bl++; y += 16; src_y += 16; } y += (src_ystride << 4) - (num_bl << 4); src_y += (src_ystride << 4) - (num_bl << 4); } vpx_write_yuv_frame(yuv_skinmap_file, &skinmap); vpx_free_frame_buffer(&skinmap); } #endif // OUTPUT_YUV_SKINMAP libvpx-1.8.2/vp8/common/vp8_skin_detection.h000066400000000000000000000026331357355204000210130ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ #define VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ #include "vp8/encoder/onyx_int.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/skin_detection.h" #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; typedef enum { // Skin detection based on 8x8 block. If two of them are identified as skin, // the macroblock is marked as skin. SKIN_8X8, // Skin detection based on 16x16 block. SKIN_16X16 } SKIN_DETECTION_BLOCK_SIZE; int vp8_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int stride, int strideuv, SKIN_DETECTION_BLOCK_SIZE bsize, int consec_zeromv, int curr_motion_magn); #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. void vp8_compute_skin_map(struct VP8_COMP *const cpi, FILE *yuv_skinmap_file); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_COMMON_VP8_SKIN_DETECTION_H_ libvpx-1.8.2/vp8/common/x86/000077500000000000000000000000001357355204000154645ustar00rootroot00000000000000libvpx-1.8.2/vp8/common/x86/bilinear_filter_sse2.c000066400000000000000000000302371357355204000217230ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp8_rtcd.h" #include "./vpx_config.h" #include "vp8/common/filter.h" #include "vpx_dsp/x86/mem_sse2.h" #include "vpx_ports/mem.h" static INLINE void horizontal_16x16(uint8_t *src, const int stride, uint16_t *dst, const int xoffset) { int h; const __m128i zero = _mm_setzero_si128(); if (xoffset == 0) { for (h = 0; h < 17; ++h) { const __m128i a = _mm_loadu_si128((__m128i *)src); const __m128i a_lo = _mm_unpacklo_epi8(a, zero); const __m128i a_hi = _mm_unpackhi_epi8(a, zero); _mm_store_si128((__m128i *)dst, a_lo); _mm_store_si128((__m128i *)(dst + 8), a_hi); src += stride; dst += 16; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); for (h = 0; h < 17; ++h) { const __m128i a = _mm_loadu_si128((__m128i *)src); const __m128i a_lo = _mm_unpacklo_epi8(a, zero); const __m128i a_hi = _mm_unpackhi_epi8(a, zero); const __m128i a_lo_filtered = _mm_mullo_epi16(a_lo, hfilter_0); const __m128i a_hi_filtered = _mm_mullo_epi16(a_hi, hfilter_0); const __m128i b = _mm_loadu_si128((__m128i *)(src + 1)); const __m128i b_lo = _mm_unpacklo_epi8(b, zero); const __m128i b_hi = _mm_unpackhi_epi8(b, zero); const __m128i b_lo_filtered = _mm_mullo_epi16(b_lo, hfilter_1); const __m128i b_hi_filtered = _mm_mullo_epi16(b_hi, hfilter_1); const __m128i sum_lo = _mm_add_epi16(a_lo_filtered, b_lo_filtered); const __m128i sum_hi = _mm_add_epi16(a_hi_filtered, b_hi_filtered); const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); const __m128i shifted_lo = _mm_srai_epi16(compensated_lo, VP8_FILTER_SHIFT); const __m128i shifted_hi = _mm_srai_epi16(compensated_hi, VP8_FILTER_SHIFT); _mm_store_si128((__m128i *)dst, shifted_lo); _mm_store_si128((__m128i *)(dst + 8), shifted_hi); src += stride; dst += 16; } } } static INLINE void vertical_16x16(uint16_t *src, uint8_t *dst, const int stride, const int yoffset) { int h; if (yoffset == 0) { for (h = 0; h < 16; ++h) { const __m128i row_lo = _mm_load_si128((__m128i *)src); const __m128i row_hi = _mm_load_si128((__m128i *)(src + 8)); const __m128i packed = _mm_packus_epi16(row_lo, row_hi); _mm_store_si128((__m128i *)dst, packed); src += 16; dst += stride; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); __m128i row_0_lo = _mm_load_si128((__m128i *)src); __m128i row_0_hi = _mm_load_si128((__m128i *)(src + 8)); src += 16; for (h = 0; h < 16; ++h) { const __m128i row_0_lo_filtered = _mm_mullo_epi16(row_0_lo, vfilter_0); const __m128i row_0_hi_filtered = _mm_mullo_epi16(row_0_hi, vfilter_0); const __m128i row_1_lo = _mm_load_si128((__m128i *)src); const __m128i row_1_hi = _mm_load_si128((__m128i *)(src + 8)); const __m128i row_1_lo_filtered = _mm_mullo_epi16(row_1_lo, vfilter_1); const __m128i row_1_hi_filtered = _mm_mullo_epi16(row_1_hi, vfilter_1); const __m128i sum_lo = _mm_add_epi16(row_0_lo_filtered, row_1_lo_filtered); const __m128i sum_hi = _mm_add_epi16(row_0_hi_filtered, row_1_hi_filtered); const __m128i compensated_lo = _mm_add_epi16(sum_lo, round_factor); const __m128i compensated_hi = _mm_add_epi16(sum_hi, round_factor); const __m128i shifted_lo = _mm_srai_epi16(compensated_lo, VP8_FILTER_SHIFT); const __m128i shifted_hi = _mm_srai_epi16(compensated_hi, VP8_FILTER_SHIFT); const __m128i packed = _mm_packus_epi16(shifted_lo, shifted_hi); _mm_store_si128((__m128i *)dst, packed); row_0_lo = row_1_lo; row_0_hi = row_1_hi; src += 16; dst += stride; } } } void vp8_bilinear_predict16x16_sse2(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, uint16_t, FData[16 * 17]); assert((xoffset | yoffset) != 0); horizontal_16x16(src_ptr, src_pixels_per_line, FData, xoffset); vertical_16x16(FData, dst_ptr, dst_pitch, yoffset); } static INLINE void horizontal_8xN(uint8_t *src, const int stride, uint16_t *dst, const int xoffset, const int height) { int h; const __m128i zero = _mm_setzero_si128(); if (xoffset == 0) { for (h = 0; h < height; ++h) { const __m128i a = _mm_loadl_epi64((__m128i *)src); const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); _mm_store_si128((__m128i *)dst, a_u16); src += stride; dst += 8; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); // Filter horizontally. Rather than load the whole array and transpose, load // 16 values (overreading) and shift to set up the second value. Do an // "extra" 9th line so the vertical pass has the necessary context. for (h = 0; h < height; ++h) { const __m128i a = _mm_loadu_si128((__m128i *)src); const __m128i b = _mm_srli_si128(a, 1); const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); const __m128i b_u16 = _mm_unpacklo_epi8(b, zero); const __m128i a_filtered = _mm_mullo_epi16(a_u16, hfilter_0); const __m128i b_filtered = _mm_mullo_epi16(b_u16, hfilter_1); const __m128i sum = _mm_add_epi16(a_filtered, b_filtered); const __m128i compensated = _mm_add_epi16(sum, round_factor); const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); _mm_store_si128((__m128i *)dst, shifted); src += stride; dst += 8; } } } static INLINE void vertical_8xN(uint16_t *src, uint8_t *dst, const int stride, const int yoffset, const int height) { int h; if (yoffset == 0) { for (h = 0; h < height; ++h) { const __m128i row = _mm_load_si128((__m128i *)src); const __m128i packed = _mm_packus_epi16(row, row); _mm_storel_epi64((__m128i *)dst, packed); src += 8; dst += stride; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); __m128i row_0 = _mm_load_si128((__m128i *)src); src += 8; for (h = 0; h < height; ++h) { const __m128i row_1 = _mm_load_si128((__m128i *)src); const __m128i row_0_filtered = _mm_mullo_epi16(row_0, vfilter_0); const __m128i row_1_filtered = _mm_mullo_epi16(row_1, vfilter_1); const __m128i sum = _mm_add_epi16(row_0_filtered, row_1_filtered); const __m128i compensated = _mm_add_epi16(sum, round_factor); const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); const __m128i packed = _mm_packus_epi16(shifted, shifted); _mm_storel_epi64((__m128i *)dst, packed); row_0 = row_1; src += 8; dst += stride; } } } void vp8_bilinear_predict8x8_sse2(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, uint16_t, FData[8 * 9]); assert((xoffset | yoffset) != 0); horizontal_8xN(src_ptr, src_pixels_per_line, FData, xoffset, 9); vertical_8xN(FData, dst_ptr, dst_pitch, yoffset, 8); } void vp8_bilinear_predict8x4_sse2(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, uint16_t, FData[8 * 5]); assert((xoffset | yoffset) != 0); horizontal_8xN(src_ptr, src_pixels_per_line, FData, xoffset, 5); vertical_8xN(FData, dst_ptr, dst_pitch, yoffset, 4); } static INLINE void horizontal_4x4(uint8_t *src, const int stride, uint16_t *dst, const int xoffset) { int h; const __m128i zero = _mm_setzero_si128(); if (xoffset == 0) { for (h = 0; h < 5; ++h) { const __m128i a = load_unaligned_u32(src); const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); _mm_storel_epi64((__m128i *)dst, a_u16); src += stride; dst += 4; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i hfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][0]); const __m128i hfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[xoffset][1]); for (h = 0; h < 5; ++h) { const __m128i a = load_unaligned_u32(src); const __m128i b = load_unaligned_u32(src + 1); const __m128i a_u16 = _mm_unpacklo_epi8(a, zero); const __m128i b_u16 = _mm_unpacklo_epi8(b, zero); const __m128i a_filtered = _mm_mullo_epi16(a_u16, hfilter_0); const __m128i b_filtered = _mm_mullo_epi16(b_u16, hfilter_1); const __m128i sum = _mm_add_epi16(a_filtered, b_filtered); const __m128i compensated = _mm_add_epi16(sum, round_factor); const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); _mm_storel_epi64((__m128i *)dst, shifted); src += stride; dst += 4; } } } static INLINE void vertical_4x4(uint16_t *src, uint8_t *dst, const int stride, const int yoffset) { int h; if (yoffset == 0) { for (h = 0; h < 4; h += 2) { const __m128i row = _mm_load_si128((__m128i *)src); __m128i packed = _mm_packus_epi16(row, row); store_unaligned_u32(dst, packed); dst += stride; packed = _mm_srli_si128(packed, 4); store_unaligned_u32(dst, packed); dst += stride; src += 8; } return; } { const __m128i round_factor = _mm_set1_epi16(1 << (VP8_FILTER_SHIFT - 1)); const __m128i vfilter_0 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][0]); const __m128i vfilter_1 = _mm_set1_epi16(vp8_bilinear_filters[yoffset][1]); for (h = 0; h < 4; h += 2) { const __m128i row_0 = _mm_load_si128((__m128i *)src); const __m128i row_1 = _mm_loadu_si128((__m128i *)(src + 4)); const __m128i row_0_filtered = _mm_mullo_epi16(row_0, vfilter_0); const __m128i row_1_filtered = _mm_mullo_epi16(row_1, vfilter_1); const __m128i sum = _mm_add_epi16(row_0_filtered, row_1_filtered); const __m128i compensated = _mm_add_epi16(sum, round_factor); const __m128i shifted = _mm_srai_epi16(compensated, VP8_FILTER_SHIFT); __m128i packed = _mm_packus_epi16(shifted, shifted); storeu_uint32(dst, _mm_cvtsi128_si32(packed)); packed = _mm_srli_si128(packed, 4); dst += stride; storeu_uint32(dst, _mm_cvtsi128_si32(packed)); dst += stride; src += 8; } } } void vp8_bilinear_predict4x4_sse2(uint8_t *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, uint8_t *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, uint16_t, FData[4 * 5]); assert((xoffset | yoffset) != 0); horizontal_4x4(src_ptr, src_pixels_per_line, FData, xoffset); vertical_4x4(FData, dst_ptr, dst_pitch, yoffset); } libvpx-1.8.2/vp8/common/x86/dequantize_mmx.asm000066400000000000000000000176401357355204000212300ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q) global sym(vp8_dequantize_b_impl_mmx) PRIVATE sym(vp8_dequantize_b_impl_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 push rsi push rdi ; end prolog mov rsi, arg(0) ;sq mov rdi, arg(1) ;dq mov rax, arg(2) ;q movq mm1, [rsi] pmullw mm1, [rax+0] ; mm4 *= kernel 0 modifiers. movq [rdi], mm1 movq mm1, [rsi+8] pmullw mm1, [rax+8] ; mm4 *= kernel 0 modifiers. movq [rdi+8], mm1 movq mm1, [rsi+16] pmullw mm1, [rax+16] ; mm4 *= kernel 0 modifiers. movq [rdi+16], mm1 movq mm1, [rsi+24] pmullw mm1, [rax+24] ; mm4 *= kernel 0 modifiers. movq [rdi+24], mm1 ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void dequant_idct_add_mmx( ;short *input, 0 ;short *dq, 1 ;unsigned char *dest, 2 ;int stride) 3 global sym(vp8_dequant_idct_add_mmx) PRIVATE sym(vp8_dequant_idct_add_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 GET_GOT rbx push rdi ; end prolog mov rax, arg(0) ;input mov rdx, arg(1) ;dq movq mm0, [rax ] pmullw mm0, [rdx] movq mm1, [rax +8] pmullw mm1, [rdx +8] movq mm2, [rax+16] pmullw mm2, [rdx+16] movq mm3, [rax+24] pmullw mm3, [rdx+24] mov rdx, arg(2) ;dest pxor mm7, mm7 movq [rax], mm7 movq [rax+8], mm7 movq [rax+16],mm7 movq [rax+24],mm7 movsxd rdi, dword ptr arg(3) ;stride psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 movq mm5, mm1 movq mm4, mm3 pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 paddw mm2, mm3 ;0 paddw mm4, mm7 ;1 psubw mm0, mm7 ;2 psubw mm6, mm3 ;3 movq mm1, mm2 ; 03 02 01 00 movq mm3, mm4 ; 23 22 21 20 punpcklwd mm1, mm0 ; 11 01 10 00 punpckhwd mm2, mm0 ; 13 03 12 02 punpcklwd mm3, mm6 ; 31 21 30 20 punpckhwd mm4, mm6 ; 33 23 32 22 movq mm0, mm1 ; 11 01 10 00 movq mm5, mm2 ; 13 03 12 02 punpckldq mm0, mm3 ; 30 20 10 00 punpckhdq mm1, mm3 ; 31 21 11 01 punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 movq mm3, mm5 ; 33 23 13 03 psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 movq mm5, mm1 movq mm4, mm3 pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 paddw mm0, [GLOBAL(fours)] paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 paddw mm2, mm3 ;0 paddw mm4, mm7 ;1 psubw mm0, mm7 ;2 psubw mm6, mm3 ;3 psraw mm2, 3 psraw mm0, 3 psraw mm4, 3 psraw mm6, 3 movq mm1, mm2 ; 03 02 01 00 movq mm3, mm4 ; 23 22 21 20 punpcklwd mm1, mm0 ; 11 01 10 00 punpckhwd mm2, mm0 ; 13 03 12 02 punpcklwd mm3, mm6 ; 31 21 30 20 punpckhwd mm4, mm6 ; 33 23 32 22 movq mm0, mm1 ; 11 01 10 00 movq mm5, mm2 ; 13 03 12 02 punpckldq mm0, mm3 ; 30 20 10 00 punpckhdq mm1, mm3 ; 31 21 11 01 punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 pxor mm7, mm7 movd mm4, [rdx] punpcklbw mm4, mm7 paddsw mm0, mm4 packuswb mm0, mm7 movd [rdx], mm0 movd mm4, [rdx+rdi] punpcklbw mm4, mm7 paddsw mm1, mm4 packuswb mm1, mm7 movd [rdx+rdi], mm1 movd mm4, [rdx+2*rdi] punpcklbw mm4, mm7 paddsw mm2, mm4 packuswb mm2, mm7 movd [rdx+rdi*2], mm2 add rdx, rdi movd mm4, [rdx+2*rdi] punpcklbw mm4, mm7 paddsw mm5, mm4 packuswb mm5, mm7 movd [rdx+rdi*2], mm5 ; begin epilog pop rdi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 x_s1sqr2: times 4 dw 0x8A8C align 16 x_c1sqr2less1: times 4 dw 0x4E7B align 16 fours: times 4 dw 0x0004 libvpx-1.8.2/vp8/common/x86/idct_blk_mmx.c000066400000000000000000000013641357355204000202700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vp8/common/blockd.h" #include "vpx_mem/vpx_mem.h" extern void vp8_dequantize_b_impl_mmx(short *sq, short *dq, short *q); void vp8_dequantize_b_mmx(BLOCKD *d, short *DQC) { short *sq = (short *)d->qcoeff; short *dq = (short *)d->dqcoeff; vp8_dequantize_b_impl_mmx(sq, dq, DQC); } libvpx-1.8.2/vp8/common/x86/idct_blk_sse2.c000066400000000000000000000050221357355204000203360ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" void vp8_idct_dequant_0_2x_sse2(short *q, short *dq, unsigned char *dst, int dst_stride); void vp8_idct_dequant_full_2x_sse2(short *q, short *dq, unsigned char *dst, int dst_stride); void vp8_dequant_idct_add_y_block_sse2(short *q, short *dq, unsigned char *dst, int stride, char *eobs) { int i; for (i = 0; i < 4; ++i) { if (((short *)(eobs))[0]) { if (((short *)(eobs))[0] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q, dq, dst, stride); } else { vp8_idct_dequant_0_2x_sse2(q, dq, dst, stride); } } if (((short *)(eobs))[1]) { if (((short *)(eobs))[1] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q + 32, dq, dst + 8, stride); } else { vp8_idct_dequant_0_2x_sse2(q + 32, dq, dst + 8, stride); } } q += 64; dst += stride * 4; eobs += 4; } } void vp8_dequant_idct_add_uv_block_sse2(short *q, short *dq, unsigned char *dst_u, unsigned char *dst_v, int stride, char *eobs) { if (((short *)(eobs))[0]) { if (((short *)(eobs))[0] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); } else { vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); } } q += 32; dst_u += stride * 4; if (((short *)(eobs))[1]) { if (((short *)(eobs))[1] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q, dq, dst_u, stride); } else { vp8_idct_dequant_0_2x_sse2(q, dq, dst_u, stride); } } q += 32; if (((short *)(eobs))[2]) { if (((short *)(eobs))[2] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); } else { vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); } } q += 32; dst_v += stride * 4; if (((short *)(eobs))[3]) { if (((short *)(eobs))[3] & 0xfefe) { vp8_idct_dequant_full_2x_sse2(q, dq, dst_v, stride); } else { vp8_idct_dequant_0_2x_sse2(q, dq, dst_v, stride); } } } libvpx-1.8.2/vp8/common/x86/idctllm_mmx.asm000066400000000000000000000224001357355204000204750ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ; /**************************************************************************** ; * Notes: ; * ; * This implementation makes use of 16 bit fixed point version of two multiply ; * constants: ; * 1. sqrt(2) * cos (pi/8) ; * 2. sqrt(2) * sin (pi/8) ; * Because the first constant is bigger than 1, to maintain the same 16 bit ; * fixed point precision as the second one, we use a trick of ; * x * a = x + x*(a-1) ; * so ; * x * sqrt(2) * cos (pi/8) = x + x * (sqrt(2) *cos(pi/8)-1). ; * ; * For the second constant, because of the 16bit version is 35468, which ; * is bigger than 32768, in signed 16 bit multiply, it becomes a negative ; * number. ; * (x * (unsigned)35468 >> 16) = x * (signed)35468 >> 16 + x ; * ; **************************************************************************/ SECTION .text ;void vp8_short_idct4x4llm_mmx(short *input, unsigned char *pred, ;int pitch, unsigned char *dest,int stride) global sym(vp8_short_idct4x4llm_mmx) PRIVATE sym(vp8_short_idct4x4llm_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi ; end prolog mov rax, arg(0) ;input mov rsi, arg(1) ;pred movq mm0, [rax ] movq mm1, [rax+ 8] movq mm2, [rax+16] movq mm3, [rax+24] %if 0 pxor mm7, mm7 movq [rax], mm7 movq [rax+8], mm7 movq [rax+16],mm7 movq [rax+24],mm7 %endif movsxd rax, dword ptr arg(2) ;pitch mov rdx, arg(3) ;dest movsxd rdi, dword ptr arg(4) ;stride psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 movq mm5, mm1 movq mm4, mm3 pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 paddw mm2, mm3 ;0 paddw mm4, mm7 ;1 psubw mm0, mm7 ;2 psubw mm6, mm3 ;3 movq mm1, mm2 ; 03 02 01 00 movq mm3, mm4 ; 23 22 21 20 punpcklwd mm1, mm0 ; 11 01 10 00 punpckhwd mm2, mm0 ; 13 03 12 02 punpcklwd mm3, mm6 ; 31 21 30 20 punpckhwd mm4, mm6 ; 33 23 32 22 movq mm0, mm1 ; 11 01 10 00 movq mm5, mm2 ; 13 03 12 02 punpckldq mm0, mm3 ; 30 20 10 00 punpckhdq mm1, mm3 ; 31 21 11 01 punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 movq mm3, mm5 ; 33 23 13 03 psubw mm0, mm2 ; b1= 0-2 paddw mm2, mm2 ; movq mm5, mm1 paddw mm2, mm0 ; a1 =0+2 pmulhw mm5, [GLOBAL(x_s1sqr2)]; paddw mm5, mm1 ; ip1 * sin(pi/8) * sqrt(2) movq mm7, mm3 ; pmulhw mm7, [GLOBAL(x_c1sqr2less1)]; paddw mm7, mm3 ; ip3 * cos(pi/8) * sqrt(2) psubw mm7, mm5 ; c1 movq mm5, mm1 movq mm4, mm3 pmulhw mm5, [GLOBAL(x_c1sqr2less1)] paddw mm5, mm1 pmulhw mm3, [GLOBAL(x_s1sqr2)] paddw mm3, mm4 paddw mm3, mm5 ; d1 paddw mm0, [GLOBAL(fours)] paddw mm2, [GLOBAL(fours)] movq mm6, mm2 ; a1 movq mm4, mm0 ; b1 paddw mm2, mm3 ;0 paddw mm4, mm7 ;1 psubw mm0, mm7 ;2 psubw mm6, mm3 ;3 psraw mm2, 3 psraw mm0, 3 psraw mm4, 3 psraw mm6, 3 movq mm1, mm2 ; 03 02 01 00 movq mm3, mm4 ; 23 22 21 20 punpcklwd mm1, mm0 ; 11 01 10 00 punpckhwd mm2, mm0 ; 13 03 12 02 punpcklwd mm3, mm6 ; 31 21 30 20 punpckhwd mm4, mm6 ; 33 23 32 22 movq mm0, mm1 ; 11 01 10 00 movq mm5, mm2 ; 13 03 12 02 punpckldq mm0, mm3 ; 30 20 10 00 punpckhdq mm1, mm3 ; 31 21 11 01 punpckldq mm2, mm4 ; 32 22 12 02 punpckhdq mm5, mm4 ; 33 23 13 03 pxor mm7, mm7 movd mm4, [rsi] punpcklbw mm4, mm7 paddsw mm0, mm4 packuswb mm0, mm7 movd [rdx], mm0 movd mm4, [rsi+rax] punpcklbw mm4, mm7 paddsw mm1, mm4 packuswb mm1, mm7 movd [rdx+rdi], mm1 movd mm4, [rsi+2*rax] punpcklbw mm4, mm7 paddsw mm2, mm4 packuswb mm2, mm7 movd [rdx+rdi*2], mm2 add rdx, rdi add rsi, rax movd mm4, [rsi+2*rax] punpcklbw mm4, mm7 paddsw mm5, mm4 packuswb mm5, mm7 movd [rdx+rdi*2], mm5 ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_dc_only_idct_add_mmx( ;short input_dc, ;unsigned char *pred_ptr, ;int pred_stride, ;unsigned char *dst_ptr, ;int stride) global sym(vp8_dc_only_idct_add_mmx) PRIVATE sym(vp8_dc_only_idct_add_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx ; end prolog movd mm5, arg(0) ;input_dc mov rax, arg(1) ;pred_ptr movsxd rdx, dword ptr arg(2) ;pred_stride pxor mm0, mm0 paddw mm5, [GLOBAL(fours)] lea rcx, [rdx + rdx*2] psraw mm5, 3 punpcklwd mm5, mm5 punpckldq mm5, mm5 movd mm1, [rax] movd mm2, [rax+rdx] movd mm3, [rax+2*rdx] movd mm4, [rax+rcx] mov rax, arg(3) ;d -- destination movsxd rdx, dword ptr arg(4) ;dst_stride punpcklbw mm1, mm0 paddsw mm1, mm5 packuswb mm1, mm0 ; pack and unpack to saturate lea rcx, [rdx + rdx*2] punpcklbw mm2, mm0 paddsw mm2, mm5 packuswb mm2, mm0 ; pack and unpack to saturate punpcklbw mm3, mm0 paddsw mm3, mm5 packuswb mm3, mm0 ; pack and unpack to saturate punpcklbw mm4, mm0 paddsw mm4, mm5 packuswb mm4, mm0 ; pack and unpack to saturate movd [rax], mm1 movd [rax+rdx], mm2 movd [rax+2*rdx], mm3 movd [rax+rcx], mm4 ; begin epilog RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 x_s1sqr2: times 4 dw 0x8A8C align 16 x_c1sqr2less1: times 4 dw 0x4E7B align 16 fours: times 4 dw 0x0004 libvpx-1.8.2/vp8/common/x86/idctllm_sse2.asm000066400000000000000000000600251357355204000205550ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ;void vp8_idct_dequant_0_2x_sse2 ; ( ; short *qcoeff - 0 ; short *dequant - 1 ; unsigned char *dst - 2 ; int dst_stride - 3 ; ) SECTION .text global sym(vp8_idct_dequant_0_2x_sse2) PRIVATE sym(vp8_idct_dequant_0_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 GET_GOT rbx ; end prolog mov rdx, arg(1) ; dequant mov rax, arg(0) ; qcoeff movd xmm4, [rax] movd xmm5, [rdx] pinsrw xmm4, [rax+32], 4 pinsrw xmm5, [rdx], 4 pmullw xmm4, xmm5 ; Zero out xmm5, for use unpacking pxor xmm5, xmm5 ; clear coeffs movd [rax], xmm5 movd [rax+32], xmm5 ;pshufb mov rax, arg(2) ; dst movsxd rdx, dword ptr arg(3) ; dst_stride pshuflw xmm4, xmm4, 00000000b pshufhw xmm4, xmm4, 00000000b lea rcx, [rdx + rdx*2] paddw xmm4, [GLOBAL(fours)] psraw xmm4, 3 movq xmm0, [rax] movq xmm1, [rax+rdx] movq xmm2, [rax+2*rdx] movq xmm3, [rax+rcx] punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 ; Add to predict buffer paddw xmm0, xmm4 paddw xmm1, xmm4 paddw xmm2, xmm4 paddw xmm3, xmm4 ; pack up before storing packuswb xmm0, xmm5 packuswb xmm1, xmm5 packuswb xmm2, xmm5 packuswb xmm3, xmm5 ; store blocks back out movq [rax], xmm0 movq [rax + rdx], xmm1 lea rax, [rax + 2*rdx] movq [rax], xmm2 movq [rax + rdx], xmm3 ; begin epilog RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_idct_dequant_full_2x_sse2 ; ( ; short *qcoeff - 0 ; short *dequant - 1 ; unsigned char *dst - 2 ; int dst_stride - 3 ; ) global sym(vp8_idct_dequant_full_2x_sse2) PRIVATE sym(vp8_idct_dequant_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ; special case when 2 blocks have 0 or 1 coeffs ; dc is set as first coeff, so no need to load qcoeff mov rax, arg(0) ; qcoeff mov rdx, arg(1) ; dequant mov rdi, arg(2) ; dst ; Zero out xmm7, for use unpacking pxor xmm7, xmm7 ; note the transpose of xmm1 and xmm2, necessary for shuffle ; to spit out sensicle data movdqa xmm0, [rax] movdqa xmm2, [rax+16] movdqa xmm1, [rax+32] movdqa xmm3, [rax+48] ; Clear out coeffs movdqa [rax], xmm7 movdqa [rax+16], xmm7 movdqa [rax+32], xmm7 movdqa [rax+48], xmm7 ; dequantize qcoeff buffer pmullw xmm0, [rdx] pmullw xmm2, [rdx+16] pmullw xmm1, [rdx] pmullw xmm3, [rdx+16] movsxd rdx, dword ptr arg(3) ; dst_stride ; repack so block 0 row x and block 1 row x are together movdqa xmm4, xmm0 punpckldq xmm0, xmm1 punpckhdq xmm4, xmm1 pshufd xmm0, xmm0, 11011000b pshufd xmm1, xmm4, 11011000b movdqa xmm4, xmm2 punpckldq xmm2, xmm3 punpckhdq xmm4, xmm3 pshufd xmm2, xmm2, 11011000b pshufd xmm3, xmm4, 11011000b ; first pass psubw xmm0, xmm2 ; b1 = 0-2 paddw xmm2, xmm2 ; movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 pmulhw xmm5, [GLOBAL(x_s1sqr2)] lea rcx, [rdx + rdx*2] ;dst_stride * 3 paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 movdqa xmm5, xmm1 movdqa xmm4, xmm3 pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 paddw xmm2, xmm3 ;0 paddw xmm4, xmm7 ;1 psubw xmm0, xmm7 ;2 psubw xmm6, xmm3 ;3 ; transpose for the second pass movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 pshufd xmm0, xmm2, 11011000b pshufd xmm2, xmm1, 11011000b pshufd xmm1, xmm5, 11011000b pshufd xmm3, xmm7, 11011000b ; second pass psubw xmm0, xmm2 ; b1 = 0-2 paddw xmm2, xmm2 movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 movdqa xmm5, xmm1 movdqa xmm4, xmm3 pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 paddw xmm0, [GLOBAL(fours)] paddw xmm2, [GLOBAL(fours)] movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 paddw xmm2, xmm3 ;0 paddw xmm4, xmm7 ;1 psubw xmm0, xmm7 ;2 psubw xmm6, xmm3 ;3 psraw xmm2, 3 psraw xmm0, 3 psraw xmm4, 3 psraw xmm6, 3 ; transpose to save movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 pshufd xmm0, xmm2, 11011000b pshufd xmm2, xmm1, 11011000b pshufd xmm1, xmm5, 11011000b pshufd xmm3, xmm7, 11011000b pxor xmm7, xmm7 ; Load up predict blocks movq xmm4, [rdi] movq xmm5, [rdi+rdx] punpcklbw xmm4, xmm7 punpcklbw xmm5, xmm7 paddw xmm0, xmm4 paddw xmm1, xmm5 movq xmm4, [rdi+2*rdx] movq xmm5, [rdi+rcx] punpcklbw xmm4, xmm7 punpcklbw xmm5, xmm7 paddw xmm2, xmm4 paddw xmm3, xmm5 .finish: ; pack up before storing packuswb xmm0, xmm7 packuswb xmm1, xmm7 packuswb xmm2, xmm7 packuswb xmm3, xmm7 ; store blocks back out movq [rdi], xmm0 movq [rdi + rdx], xmm1 movq [rdi + rdx*2], xmm2 movq [rdi + rcx], xmm3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_idct_dequant_dc_0_2x_sse2 ; ( ; short *qcoeff - 0 ; short *dequant - 1 ; unsigned char *dst - 2 ; int dst_stride - 3 ; short *dc - 4 ; ) global sym(vp8_idct_dequant_dc_0_2x_sse2) PRIVATE sym(vp8_idct_dequant_dc_0_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rdi ; end prolog ; special case when 2 blocks have 0 or 1 coeffs ; dc is set as first coeff, so no need to load qcoeff mov rax, arg(0) ; qcoeff mov rdi, arg(2) ; dst mov rdx, arg(4) ; dc ; Zero out xmm5, for use unpacking pxor xmm5, xmm5 ; load up 2 dc words here == 2*16 = doubleword movd xmm4, [rdx] movsxd rdx, dword ptr arg(3) ; dst_stride lea rcx, [rdx + rdx*2] ; Load up predict blocks movq xmm0, [rdi] movq xmm1, [rdi+rdx*1] movq xmm2, [rdi+rdx*2] movq xmm3, [rdi+rcx] ; Duplicate and expand dc across punpcklwd xmm4, xmm4 punpckldq xmm4, xmm4 ; Rounding to dequant and downshift paddw xmm4, [GLOBAL(fours)] psraw xmm4, 3 ; Predict buffer needs to be expanded from bytes to words punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 punpcklbw xmm2, xmm5 punpcklbw xmm3, xmm5 ; Add to predict buffer paddw xmm0, xmm4 paddw xmm1, xmm4 paddw xmm2, xmm4 paddw xmm3, xmm4 ; pack up before storing packuswb xmm0, xmm5 packuswb xmm1, xmm5 packuswb xmm2, xmm5 packuswb xmm3, xmm5 ; store blocks back out movq [rdi], xmm0 movq [rdi + rdx], xmm1 movq [rdi + rdx*2], xmm2 movq [rdi + rcx], xmm3 ; begin epilog pop rdi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_idct_dequant_dc_full_2x_sse2 ; ( ; short *qcoeff - 0 ; short *dequant - 1 ; unsigned char *dst - 2 ; int dst_stride - 3 ; short *dc - 4 ; ) global sym(vp8_idct_dequant_dc_full_2x_sse2) PRIVATE sym(vp8_idct_dequant_dc_full_2x_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rdi ; end prolog ; special case when 2 blocks have 0 or 1 coeffs ; dc is set as first coeff, so no need to load qcoeff mov rax, arg(0) ; qcoeff mov rdx, arg(1) ; dequant mov rdi, arg(2) ; dst ; Zero out xmm7, for use unpacking pxor xmm7, xmm7 ; note the transpose of xmm1 and xmm2, necessary for shuffle ; to spit out sensicle data movdqa xmm0, [rax] movdqa xmm2, [rax+16] movdqa xmm1, [rax+32] movdqa xmm3, [rax+48] ; Clear out coeffs movdqa [rax], xmm7 movdqa [rax+16], xmm7 movdqa [rax+32], xmm7 movdqa [rax+48], xmm7 ; dequantize qcoeff buffer pmullw xmm0, [rdx] pmullw xmm2, [rdx+16] pmullw xmm1, [rdx] pmullw xmm3, [rdx+16] ; DC component mov rdx, arg(4) ; repack so block 0 row x and block 1 row x are together movdqa xmm4, xmm0 punpckldq xmm0, xmm1 punpckhdq xmm4, xmm1 pshufd xmm0, xmm0, 11011000b pshufd xmm1, xmm4, 11011000b movdqa xmm4, xmm2 punpckldq xmm2, xmm3 punpckhdq xmm4, xmm3 pshufd xmm2, xmm2, 11011000b pshufd xmm3, xmm4, 11011000b ; insert DC component pinsrw xmm0, [rdx], 0 pinsrw xmm0, [rdx+2], 4 ; first pass psubw xmm0, xmm2 ; b1 = 0-2 paddw xmm2, xmm2 ; movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 movdqa xmm5, xmm1 movdqa xmm4, xmm3 pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 paddw xmm2, xmm3 ;0 paddw xmm4, xmm7 ;1 psubw xmm0, xmm7 ;2 psubw xmm6, xmm3 ;3 ; transpose for the second pass movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 pshufd xmm0, xmm2, 11011000b pshufd xmm2, xmm1, 11011000b pshufd xmm1, xmm5, 11011000b pshufd xmm3, xmm7, 11011000b ; second pass psubw xmm0, xmm2 ; b1 = 0-2 paddw xmm2, xmm2 movdqa xmm5, xmm1 paddw xmm2, xmm0 ; a1 = 0+2 pmulhw xmm5, [GLOBAL(x_s1sqr2)] paddw xmm5, xmm1 ; ip1 * sin(pi/8) * sqrt(2) movdqa xmm7, xmm3 pmulhw xmm7, [GLOBAL(x_c1sqr2less1)] paddw xmm7, xmm3 ; ip3 * cos(pi/8) * sqrt(2) psubw xmm7, xmm5 ; c1 movdqa xmm5, xmm1 movdqa xmm4, xmm3 pmulhw xmm5, [GLOBAL(x_c1sqr2less1)] paddw xmm5, xmm1 pmulhw xmm3, [GLOBAL(x_s1sqr2)] paddw xmm3, xmm4 paddw xmm3, xmm5 ; d1 paddw xmm0, [GLOBAL(fours)] paddw xmm2, [GLOBAL(fours)] movdqa xmm6, xmm2 ; a1 movdqa xmm4, xmm0 ; b1 paddw xmm2, xmm3 ;0 paddw xmm4, xmm7 ;1 psubw xmm0, xmm7 ;2 psubw xmm6, xmm3 ;3 psraw xmm2, 3 psraw xmm0, 3 psraw xmm4, 3 psraw xmm6, 3 ; transpose to save movdqa xmm7, xmm2 ; 103 102 101 100 003 002 001 000 punpcklwd xmm2, xmm0 ; 007 003 006 002 005 001 004 000 punpckhwd xmm7, xmm0 ; 107 103 106 102 105 101 104 100 movdqa xmm5, xmm4 ; 111 110 109 108 011 010 009 008 punpcklwd xmm4, xmm6 ; 015 011 014 010 013 009 012 008 punpckhwd xmm5, xmm6 ; 115 111 114 110 113 109 112 108 movdqa xmm1, xmm2 ; 007 003 006 002 005 001 004 000 punpckldq xmm2, xmm4 ; 013 009 005 001 012 008 004 000 punpckhdq xmm1, xmm4 ; 015 011 007 003 014 010 006 002 movdqa xmm6, xmm7 ; 107 103 106 102 105 101 104 100 punpckldq xmm7, xmm5 ; 113 109 105 101 112 108 104 100 punpckhdq xmm6, xmm5 ; 115 111 107 103 114 110 106 102 movdqa xmm5, xmm2 ; 013 009 005 001 012 008 004 000 punpckldq xmm2, xmm7 ; 112 108 012 008 104 100 004 000 punpckhdq xmm5, xmm7 ; 113 109 013 009 105 101 005 001 movdqa xmm7, xmm1 ; 015 011 007 003 014 010 006 002 punpckldq xmm1, xmm6 ; 114 110 014 010 106 102 006 002 punpckhdq xmm7, xmm6 ; 115 111 015 011 107 103 007 003 pshufd xmm0, xmm2, 11011000b pshufd xmm2, xmm1, 11011000b pshufd xmm1, xmm5, 11011000b pshufd xmm3, xmm7, 11011000b pxor xmm7, xmm7 ; Load up predict blocks movsxd rdx, dword ptr arg(3) ; dst_stride movq xmm4, [rdi] movq xmm5, [rdi+rdx] lea rcx, [rdx + rdx*2] punpcklbw xmm4, xmm7 punpcklbw xmm5, xmm7 paddw xmm0, xmm4 paddw xmm1, xmm5 movq xmm4, [rdi+rdx*2] movq xmm5, [rdi+rcx] punpcklbw xmm4, xmm7 punpcklbw xmm5, xmm7 paddw xmm2, xmm4 paddw xmm3, xmm5 .finish: ; pack up before storing packuswb xmm0, xmm7 packuswb xmm1, xmm7 packuswb xmm2, xmm7 packuswb xmm3, xmm7 ; Load destination stride before writing out, ; doesn't need to persist movsxd rdx, dword ptr arg(3) ; dst_stride ; store blocks back out movq [rdi], xmm0 movq [rdi + rdx], xmm1 lea rdi, [rdi + 2*rdx] movq [rdi], xmm2 movq [rdi + rdx], xmm3 ; begin epilog pop rdi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 fours: times 8 dw 0x0004 align 16 x_s1sqr2: times 8 dw 0x8A8C align 16 x_c1sqr2less1: times 8 dw 0x4E7B libvpx-1.8.2/vp8/common/x86/iwalsh_sse2.asm000066400000000000000000000075011357355204000204140ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp8_short_inv_walsh4x4_sse2(short *input, short *mb_dqcoeff) global sym(vp8_short_inv_walsh4x4_sse2) PRIVATE sym(vp8_short_inv_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 ; end prolog mov rcx, arg(0) mov rdx, arg(1) mov rax, 30003h movdqa xmm0, [rcx + 0] ;ip[4] ip[0] movdqa xmm1, [rcx + 16] ;ip[12] ip[8] pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] movdqa xmm3, xmm0 ;ip[4] ip[0] paddw xmm0, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 movdqa xmm4, xmm0 punpcklqdq xmm0, xmm3 ;d1 a1 punpckhqdq xmm4, xmm3 ;c1 b1 movdqa xmm1, xmm4 ;c1 b1 paddw xmm4, xmm0 ;dl+cl a1+b1 aka op[4] op[0] psubw xmm0, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ; 13 12 11 10 03 02 01 00 ; ; 33 32 31 30 23 22 21 20 ; movdqa xmm3, xmm4 ; 13 12 11 10 03 02 01 00 punpcklwd xmm4, xmm0 ; 23 03 22 02 21 01 20 00 punpckhwd xmm3, xmm0 ; 33 13 32 12 31 11 30 10 movdqa xmm1, xmm4 ; 23 03 22 02 21 01 20 00 punpcklwd xmm4, xmm3 ; 31 21 11 01 30 20 10 00 punpckhwd xmm1, xmm3 ; 33 23 13 03 32 22 12 02 ;~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ movd xmm0, eax pshufd xmm2, xmm1, 4eh ;ip[8] ip[12] movdqa xmm3, xmm4 ;ip[4] ip[0] pshufd xmm0, xmm0, 0 ;03 03 03 03 03 03 03 03 paddw xmm4, xmm2 ;ip[4]+ip[8] ip[0]+ip[12] aka b1 a1 psubw xmm3, xmm2 ;ip[4]-ip[8] ip[0]-ip[12] aka c1 d1 movdqa xmm5, xmm4 punpcklqdq xmm4, xmm3 ;d1 a1 punpckhqdq xmm5, xmm3 ;c1 b1 movdqa xmm1, xmm5 ;c1 b1 paddw xmm5, xmm4 ;dl+cl a1+b1 aka op[4] op[0] psubw xmm4, xmm1 ;d1-c1 a1-b1 aka op[12] op[8] paddw xmm5, xmm0 paddw xmm4, xmm0 psraw xmm5, 3 psraw xmm4, 3 movd eax, xmm5 movd ecx, xmm4 psrldq xmm5, 4 psrldq xmm4, 4 mov word ptr[rdx+32*0], ax mov word ptr[rdx+32*2], cx shr eax, 16 shr ecx, 16 mov word ptr[rdx+32*4], ax mov word ptr[rdx+32*6], cx movd eax, xmm5 movd ecx, xmm4 psrldq xmm5, 4 psrldq xmm4, 4 mov word ptr[rdx+32*8], ax mov word ptr[rdx+32*10], cx shr eax, 16 shr ecx, 16 mov word ptr[rdx+32*12], ax mov word ptr[rdx+32*14], cx movd eax, xmm5 movd ecx, xmm4 psrldq xmm5, 4 psrldq xmm4, 4 mov word ptr[rdx+32*1], ax mov word ptr[rdx+32*3], cx shr eax, 16 shr ecx, 16 mov word ptr[rdx+32*5], ax mov word ptr[rdx+32*7], cx movd eax, xmm5 movd ecx, xmm4 mov word ptr[rdx+32*9], ax mov word ptr[rdx+32*11], cx shr eax, 16 shr ecx, 16 mov word ptr[rdx+32*13], ax mov word ptr[rdx+32*15], cx ; begin epilog UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vp8/common/x86/loopfilter_block_sse2_x86_64.asm000066400000000000000000000570201357355204000234750ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro LF_ABS 2 ; %1 value not preserved ; %2 value preserved ; output in %1 movdqa scratch1, %2 ; v2 psubusb scratch1, %1 ; v2 - v1 psubusb %1, %2 ; v1 - v2 por %1, scratch1 ; abs(v2 - v1) %endmacro %macro LF_FILTER_HEV_MASK 8-9 LF_ABS %1, %2 ; abs(p3 - p2) LF_ABS %2, %3 ; abs(p2 - p1) pmaxub %1, %2 ; accumulate mask %if %0 == 8 movdqa scratch2, %3 ; save p1 LF_ABS scratch2, %4 ; abs(p1 - p0) %endif LF_ABS %4, %5 ; abs(p0 - q0) LF_ABS %5, %6 ; abs(q0 - q1) %if %0 == 8 pmaxub %5, scratch2 ; accumulate hev %else pmaxub %5, %9 %endif pmaxub %1, %5 ; accumulate mask LF_ABS %3, %6 ; abs(p1 - q1) LF_ABS %6, %7 ; abs(q1 - q2) pmaxub %1, %6 ; accumulate mask LF_ABS %7, %8 ; abs(q2 - q3) pmaxub %1, %7 ; accumulate mask paddusb %4, %4 ; 2 * abs(p0 - q0) pand %3, [GLOBAL(tfe)] psrlw %3, 1 ; abs(p1 - q1) / 2 paddusb %4, %3 ; abs(p0 - q0) * 2 + abs(p1 - q1) / 2 psubusb %1, [limit] psubusb %4, [blimit] por %1, %4 pcmpeqb %1, zero ; mask psubusb %5, [thresh] pcmpeqb %5, zero ; ~hev %endmacro %macro LF_FILTER 6 ; %1-%4: p1-q1 ; %5: mask ; %6: hev movdqa scratch2, %6 ; save hev pxor %1, [GLOBAL(t80)] ; ps1 pxor %4, [GLOBAL(t80)] ; qs1 movdqa scratch1, %1 psubsb scratch1, %4 ; signed_char_clamp(ps1 - qs1) pandn scratch2, scratch1 ; vp8_filter &= hev pxor %2, [GLOBAL(t80)] ; ps0 pxor %3, [GLOBAL(t80)] ; qs0 movdqa scratch1, %3 psubsb scratch1, %2 ; qs0 - ps0 paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) paddsb scratch2, scratch1 ; vp8_filter += (qs0 - ps0) pand %5, scratch2 ; &= mask movdqa scratch2, %5 paddsb %5, [GLOBAL(t4)] ; Filter1 paddsb scratch2, [GLOBAL(t3)] ; Filter2 ; Filter1 >> 3 movdqa scratch1, zero pcmpgtb scratch1, %5 psrlw %5, 3 pand scratch1, [GLOBAL(te0)] pand %5, [GLOBAL(t1f)] por %5, scratch1 psubsb %3, %5 ; qs0 - Filter1 pxor %3, [GLOBAL(t80)] ; Filter2 >> 3 movdqa scratch1, zero pcmpgtb scratch1, scratch2 psrlw scratch2, 3 pand scratch1, [GLOBAL(te0)] pand scratch2, [GLOBAL(t1f)] por scratch2, scratch1 paddsb %2, scratch2 ; ps0 + Filter2 pxor %2, [GLOBAL(t80)] ; outer tap adjustments paddsb %5, [GLOBAL(t1)] movdqa scratch1, zero pcmpgtb scratch1, %5 psrlw %5, 1 pand scratch1, [GLOBAL(t80)] pand %5, [GLOBAL(t7f)] por %5, scratch1 pand %5, %6 ; vp8_filter &= ~hev psubsb %4, %5 ; qs1 - vp8_filter pxor %4, [GLOBAL(t80)] paddsb %1, %5 ; ps1 + vp8_filter pxor %1, [GLOBAL(t80)] %endmacro SECTION .text ;void vp8_loop_filter_bh_y_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh ;) global sym(vp8_loop_filter_bh_y_sse2) PRIVATE sym(vp8_loop_filter_bh_y_sse2): %if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 %define limit r9 %define thresh r10 %define spp rax %define stride3 r11 %define stride5 r12 %define stride7 r13 push rbp mov rbp, rsp SAVE_XMM 11 push r12 push r13 mov thresh, arg(4) %else %define src rdi ; src_ptr %define stride rsi ; src_pixel_step %define blimit rdx %define limit rcx %define thresh r8 %define spp rax %define stride3 r9 %define stride5 r10 %define stride7 r11 %endif %define scratch1 xmm5 %define scratch2 xmm6 %define zero xmm7 %define i0 [src] %define i1 [spp] %define i2 [src + 2 * stride] %define i3 [spp + 2 * stride] %define i4 [src + 4 * stride] %define i5 [spp + 4 * stride] %define i6 [src + 2 * stride3] %define i7 [spp + 2 * stride3] %define i8 [src + 8 * stride] %define i9 [spp + 8 * stride] %define i10 [src + 2 * stride5] %define i11 [spp + 2 * stride5] %define i12 [src + 4 * stride3] %define i13 [spp + 4 * stride3] %define i14 [src + 2 * stride7] %define i15 [spp + 2 * stride7] ; prep work lea spp, [src + stride] lea stride3, [stride + 2 * stride] lea stride5, [stride3 + 2 * stride] lea stride7, [stride3 + 4 * stride] pxor zero, zero ; load the first set into registers movdqa xmm0, i0 movdqa xmm1, i1 movdqa xmm2, i2 movdqa xmm3, i3 movdqa xmm4, i4 movdqa xmm8, i5 movdqa xmm9, i6 ; q2, will contain abs(p1-p0) movdqa xmm10, i7 LF_FILTER_HEV_MASK xmm0, xmm1, xmm2, xmm3, xmm4, xmm8, xmm9, xmm10 movdqa xmm1, i2 movdqa xmm2, i3 movdqa xmm3, i4 movdqa xmm8, i5 LF_FILTER xmm1, xmm2, xmm3, xmm8, xmm0, xmm4 movdqa i2, xmm1 movdqa i3, xmm2 ; second set movdqa i4, xmm3 movdqa i5, xmm8 movdqa xmm0, i6 movdqa xmm1, i7 movdqa xmm2, i8 movdqa xmm4, i9 movdqa xmm10, i10 ; q2, will contain abs(p1-p0) movdqa xmm11, i11 LF_FILTER_HEV_MASK xmm3, xmm8, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm9 movdqa xmm0, i6 movdqa xmm1, i7 movdqa xmm4, i8 movdqa xmm8, i9 LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 movdqa i6, xmm0 movdqa i7, xmm1 ; last set movdqa i8, xmm4 movdqa i9, xmm8 movdqa xmm0, i10 movdqa xmm1, i11 movdqa xmm2, i12 movdqa xmm3, i13 movdqa xmm9, i14 ; q2, will contain abs(p1-p0) movdqa xmm11, i15 LF_FILTER_HEV_MASK xmm4, xmm8, xmm0, xmm1, xmm2, xmm3, xmm9, xmm11, xmm10 movdqa xmm0, i10 movdqa xmm1, i11 movdqa xmm3, i12 movdqa xmm8, i13 LF_FILTER xmm0, xmm1, xmm3, xmm8, xmm4, xmm2 movdqa i10, xmm0 movdqa i11, xmm1 movdqa i12, xmm3 movdqa i13, xmm8 %if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM pop rbp %endif ret ;void vp8_loop_filter_bv_y_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh ;) global sym(vp8_loop_filter_bv_y_sse2) PRIVATE sym(vp8_loop_filter_bv_y_sse2): %if LIBVPX_YASM_WIN64 %define src rcx ; src_ptr %define stride rdx ; src_pixel_step %define blimit r8 %define limit r9 %define thresh r10 %define spp rax %define stride3 r11 %define stride5 r12 %define stride7 r13 push rbp mov rbp, rsp SAVE_XMM 15 push r12 push r13 mov thresh, arg(4) %else %define src rdi %define stride rsi %define blimit rdx %define limit rcx %define thresh r8 %define spp rax %define stride3 r9 %define stride5 r10 %define stride7 r11 %endif %define scratch1 xmm5 %define scratch2 xmm6 %define zero xmm7 %define s0 [src] %define s1 [spp] %define s2 [src + 2 * stride] %define s3 [spp + 2 * stride] %define s4 [src + 4 * stride] %define s5 [spp + 4 * stride] %define s6 [src + 2 * stride3] %define s7 [spp + 2 * stride3] %define s8 [src + 8 * stride] %define s9 [spp + 8 * stride] %define s10 [src + 2 * stride5] %define s11 [spp + 2 * stride5] %define s12 [src + 4 * stride3] %define s13 [spp + 4 * stride3] %define s14 [src + 2 * stride7] %define s15 [spp + 2 * stride7] %define i0 [rsp] %define i1 [rsp + 16] %define i2 [rsp + 32] %define i3 [rsp + 48] %define i4 [rsp + 64] %define i5 [rsp + 80] %define i6 [rsp + 96] %define i7 [rsp + 112] %define i8 [rsp + 128] %define i9 [rsp + 144] %define i10 [rsp + 160] %define i11 [rsp + 176] %define i12 [rsp + 192] %define i13 [rsp + 208] %define i14 [rsp + 224] %define i15 [rsp + 240] ALIGN_STACK 16, rax ; reserve stack space %define temp_storage 0 ; size is 256 (16*16) %define stack_size 256 sub rsp, stack_size ; prep work lea spp, [src + stride] lea stride3, [stride + 2 * stride] lea stride5, [stride3 + 2 * stride] lea stride7, [stride3 + 4 * stride] ; 8-f movdqa xmm0, s8 movdqa xmm1, xmm0 punpcklbw xmm0, s9 ; 80 90 punpckhbw xmm1, s9 ; 88 98 movdqa xmm2, s10 movdqa xmm3, xmm2 punpcklbw xmm2, s11 ; a0 b0 punpckhbw xmm3, s11 ; a8 b8 movdqa xmm4, xmm0 punpcklwd xmm0, xmm2 ; 80 90 a0 b0 punpckhwd xmm4, xmm2 ; 84 94 a4 b4 movdqa xmm2, xmm1 punpcklwd xmm1, xmm3 ; 88 98 a8 b8 punpckhwd xmm2, xmm3 ; 8c 9c ac bc ; using xmm[0124] ; work on next 4 rows movdqa xmm3, s12 movdqa xmm5, xmm3 punpcklbw xmm3, s13 ; c0 d0 punpckhbw xmm5, s13 ; c8 d8 movdqa xmm6, s14 movdqa xmm7, xmm6 punpcklbw xmm6, s15 ; e0 f0 punpckhbw xmm7, s15 ; e8 f8 movdqa xmm8, xmm3 punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 movdqa xmm6, xmm5 punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 punpckhwd xmm6, xmm7 ; cc dc ec fc ; pull the third and fourth sets together movdqa xmm7, xmm0 punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 movdqa xmm3, xmm4 punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 movdqa xmm8, xmm1 punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa movdqa xmm5, xmm2 punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe ; save the calculations. we only have 15 registers ... movdqa i0, xmm0 movdqa i1, xmm7 movdqa i2, xmm4 movdqa i3, xmm3 movdqa i4, xmm1 movdqa i5, xmm8 movdqa i6, xmm2 movdqa i7, xmm5 ; 0-7 movdqa xmm0, s0 movdqa xmm1, xmm0 punpcklbw xmm0, s1 ; 00 10 punpckhbw xmm1, s1 ; 08 18 movdqa xmm2, s2 movdqa xmm3, xmm2 punpcklbw xmm2, s3 ; 20 30 punpckhbw xmm3, s3 ; 28 38 movdqa xmm4, xmm0 punpcklwd xmm0, xmm2 ; 00 10 20 30 punpckhwd xmm4, xmm2 ; 04 14 24 34 movdqa xmm2, xmm1 punpcklwd xmm1, xmm3 ; 08 18 28 38 punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c ; using xmm[0124] ; work on next 4 rows movdqa xmm3, s4 movdqa xmm5, xmm3 punpcklbw xmm3, s5 ; 40 50 punpckhbw xmm5, s5 ; 48 58 movdqa xmm6, s6 movdqa xmm7, xmm6 punpcklbw xmm6, s7 ; 60 70 punpckhbw xmm7, s7 ; 68 78 movdqa xmm8, xmm3 punpcklwd xmm3, xmm6 ; 40 50 60 70 punpckhwd xmm8, xmm6 ; 44 54 64 74 movdqa xmm6, xmm5 punpcklwd xmm5, xmm7 ; 48 58 68 78 punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c ; pull the first two sets together movdqa xmm7, xmm0 punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 movdqa xmm3, xmm4 punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 movdqa xmm8, xmm1 punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a movdqa xmm5, xmm2 punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e ; final combination movdqa xmm6, xmm0 punpcklqdq xmm0, i0 punpckhqdq xmm6, i0 movdqa xmm9, xmm7 punpcklqdq xmm7, i1 punpckhqdq xmm9, i1 movdqa xmm10, xmm4 punpcklqdq xmm4, i2 punpckhqdq xmm10, i2 movdqa xmm11, xmm3 punpcklqdq xmm3, i3 punpckhqdq xmm11, i3 movdqa xmm12, xmm1 punpcklqdq xmm1, i4 punpckhqdq xmm12, i4 movdqa xmm13, xmm8 punpcklqdq xmm8, i5 punpckhqdq xmm13, i5 movdqa xmm14, xmm2 punpcklqdq xmm2, i6 punpckhqdq xmm14, i6 movdqa xmm15, xmm5 punpcklqdq xmm5, i7 punpckhqdq xmm15, i7 movdqa i0, xmm0 movdqa i1, xmm6 movdqa i2, xmm7 movdqa i3, xmm9 movdqa i4, xmm4 movdqa i5, xmm10 movdqa i6, xmm3 movdqa i7, xmm11 movdqa i8, xmm1 movdqa i9, xmm12 movdqa i10, xmm8 movdqa i11, xmm13 movdqa i12, xmm2 movdqa i13, xmm14 movdqa i14, xmm5 movdqa i15, xmm15 ; TRANSPOSED DATA AVAILABLE ON THE STACK movdqa xmm12, xmm6 movdqa xmm13, xmm7 pxor zero, zero LF_FILTER_HEV_MASK xmm0, xmm12, xmm13, xmm9, xmm4, xmm10, xmm3, xmm11 movdqa xmm1, i2 movdqa xmm2, i3 movdqa xmm8, i4 movdqa xmm9, i5 LF_FILTER xmm1, xmm2, xmm8, xmm9, xmm0, xmm4 movdqa i2, xmm1 movdqa i3, xmm2 ; second set movdqa i4, xmm8 movdqa i5, xmm9 movdqa xmm0, i6 movdqa xmm1, i7 movdqa xmm2, i8 movdqa xmm4, i9 movdqa xmm10, i10 ; q2, will contain abs(p1-p0) movdqa xmm11, i11 LF_FILTER_HEV_MASK xmm8, xmm9, xmm0, xmm1, xmm2, xmm4, xmm10, xmm11, xmm3 movdqa xmm0, i6 movdqa xmm1, i7 movdqa xmm3, i8 movdqa xmm4, i9 LF_FILTER xmm0, xmm1, xmm3, xmm4, xmm8, xmm2 movdqa i6, xmm0 movdqa i7, xmm1 ; last set movdqa i8, xmm3 movdqa i9, xmm4 movdqa xmm0, i10 movdqa xmm1, i11 movdqa xmm2, i12 movdqa xmm8, i13 movdqa xmm9, i14 ; q2, will contain abs(p1-p0) movdqa xmm11, i15 LF_FILTER_HEV_MASK xmm3, xmm4, xmm0, xmm1, xmm2, xmm8, xmm9, xmm11, xmm10 movdqa xmm0, i10 movdqa xmm1, i11 movdqa xmm4, i12 movdqa xmm8, i13 LF_FILTER xmm0, xmm1, xmm4, xmm8, xmm3, xmm2 movdqa i10, xmm0 movdqa i11, xmm1 movdqa i12, xmm4 movdqa i13, xmm8 ; RESHUFFLE AND WRITE OUT ; 8-f movdqa xmm0, i8 movdqa xmm1, xmm0 punpcklbw xmm0, i9 ; 80 90 punpckhbw xmm1, i9 ; 88 98 movdqa xmm2, i10 movdqa xmm3, xmm2 punpcklbw xmm2, i11 ; a0 b0 punpckhbw xmm3, i11 ; a8 b8 movdqa xmm4, xmm0 punpcklwd xmm0, xmm2 ; 80 90 a0 b0 punpckhwd xmm4, xmm2 ; 84 94 a4 b4 movdqa xmm2, xmm1 punpcklwd xmm1, xmm3 ; 88 98 a8 b8 punpckhwd xmm2, xmm3 ; 8c 9c ac bc ; using xmm[0124] ; work on next 4 rows movdqa xmm3, i12 movdqa xmm5, xmm3 punpcklbw xmm3, i13 ; c0 d0 punpckhbw xmm5, i13 ; c8 d8 movdqa xmm6, i14 movdqa xmm7, xmm6 punpcklbw xmm6, i15 ; e0 f0 punpckhbw xmm7, i15 ; e8 f8 movdqa xmm8, xmm3 punpcklwd xmm3, xmm6 ; c0 d0 e0 f0 punpckhwd xmm8, xmm6 ; c4 d4 e4 f4 movdqa xmm6, xmm5 punpcklwd xmm5, xmm7 ; c8 d8 e8 f8 punpckhwd xmm6, xmm7 ; cc dc ec fc ; pull the third and fourth sets together movdqa xmm7, xmm0 punpckldq xmm0, xmm3 ; 80 90 a0 b0 c0 d0 e0 f0 punpckhdq xmm7, xmm3 ; 82 92 a2 b2 c2 d2 e2 f2 movdqa xmm3, xmm4 punpckldq xmm4, xmm8 ; 84 94 a4 b4 c4 d4 e4 f4 punpckhdq xmm3, xmm8 ; 86 96 a6 b6 c6 d6 e6 f6 movdqa xmm8, xmm1 punpckldq xmm1, xmm5 ; 88 88 a8 b8 c8 d8 e8 f8 punpckhdq xmm8, xmm5 ; 8a 9a aa ba ca da ea fa movdqa xmm5, xmm2 punpckldq xmm2, xmm6 ; 8c 9c ac bc cc dc ec fc punpckhdq xmm5, xmm6 ; 8e 9e ae be ce de ee fe ; save the calculations. we only have 15 registers ... movdqa i8, xmm0 movdqa i9, xmm7 movdqa i10, xmm4 movdqa i11, xmm3 movdqa i12, xmm1 movdqa i13, xmm8 movdqa i14, xmm2 movdqa i15, xmm5 ; 0-7 movdqa xmm0, i0 movdqa xmm1, xmm0 punpcklbw xmm0, i1 ; 00 10 punpckhbw xmm1, i1 ; 08 18 movdqa xmm2, i2 movdqa xmm3, xmm2 punpcklbw xmm2, i3 ; 20 30 punpckhbw xmm3, i3 ; 28 38 movdqa xmm4, xmm0 punpcklwd xmm0, xmm2 ; 00 10 20 30 punpckhwd xmm4, xmm2 ; 04 14 24 34 movdqa xmm2, xmm1 punpcklwd xmm1, xmm3 ; 08 18 28 38 punpckhwd xmm2, xmm3 ; 0c 1c 2c 3c ; using xmm[0124] ; work on next 4 rows movdqa xmm3, i4 movdqa xmm5, xmm3 punpcklbw xmm3, i5 ; 40 50 punpckhbw xmm5, i5 ; 48 58 movdqa xmm6, i6 movdqa xmm7, xmm6 punpcklbw xmm6, i7 ; 60 70 punpckhbw xmm7, i7 ; 68 78 movdqa xmm8, xmm3 punpcklwd xmm3, xmm6 ; 40 50 60 70 punpckhwd xmm8, xmm6 ; 44 54 64 74 movdqa xmm6, xmm5 punpcklwd xmm5, xmm7 ; 48 58 68 78 punpckhwd xmm6, xmm7 ; 4c 5c 6c 7c ; pull the first two sets together movdqa xmm7, xmm0 punpckldq xmm0, xmm3 ; 00 10 20 30 40 50 60 70 punpckhdq xmm7, xmm3 ; 02 12 22 32 42 52 62 72 movdqa xmm3, xmm4 punpckldq xmm4, xmm8 ; 04 14 24 34 44 54 64 74 punpckhdq xmm3, xmm8 ; 06 16 26 36 46 56 66 76 movdqa xmm8, xmm1 punpckldq xmm1, xmm5 ; 08 18 28 38 48 58 68 78 punpckhdq xmm8, xmm5 ; 0a 1a 2a 3a 4a 5a 6a 7a movdqa xmm5, xmm2 punpckldq xmm2, xmm6 ; 0c 1c 2c 3c 4c 5c 6c 7c punpckhdq xmm5, xmm6 ; 0e 1e 2e 3e 4e 5e 6e 7e ; final combination movdqa xmm6, xmm0 punpcklqdq xmm0, i8 punpckhqdq xmm6, i8 movdqa xmm9, xmm7 punpcklqdq xmm7, i9 punpckhqdq xmm9, i9 movdqa xmm10, xmm4 punpcklqdq xmm4, i10 punpckhqdq xmm10, i10 movdqa xmm11, xmm3 punpcklqdq xmm3, i11 punpckhqdq xmm11, i11 movdqa xmm12, xmm1 punpcklqdq xmm1, i12 punpckhqdq xmm12, i12 movdqa xmm13, xmm8 punpcklqdq xmm8, i13 punpckhqdq xmm13, i13 movdqa xmm14, xmm2 punpcklqdq xmm2, i14 punpckhqdq xmm14, i14 movdqa xmm15, xmm5 punpcklqdq xmm5, i15 punpckhqdq xmm15, i15 movdqa s0, xmm0 movdqa s1, xmm6 movdqa s2, xmm7 movdqa s3, xmm9 movdqa s4, xmm4 movdqa s5, xmm10 movdqa s6, xmm3 movdqa s7, xmm11 movdqa s8, xmm1 movdqa s9, xmm12 movdqa s10, xmm8 movdqa s11, xmm13 movdqa s12, xmm2 movdqa s13, xmm14 movdqa s14, xmm5 movdqa s15, xmm15 ; free stack space add rsp, stack_size ; un-ALIGN_STACK pop rsp %if LIBVPX_YASM_WIN64 pop r13 pop r12 RESTORE_XMM pop rbp %endif ret SECTION_RODATA align 16 te0: times 16 db 0xe0 align 16 t7f: times 16 db 0x7f align 16 tfe: times 16 db 0xfe align 16 t1f: times 16 db 0x1f align 16 t80: times 16 db 0x80 align 16 t1: times 16 db 0x01 align 16 t3: times 16 db 0x03 align 16 t4: times 16 db 0x04 libvpx-1.8.2/vp8/common/x86/loopfilter_sse2.asm000066400000000000000000002067261357355204000213160ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %define _t0 0 %define _t1 _t0 + 16 %define _p3 _t1 + 16 %define _p2 _p3 + 16 %define _p1 _p2 + 16 %define _p0 _p1 + 16 %define _q0 _p0 + 16 %define _q1 _q0 + 16 %define _q2 _q1 + 16 %define _q3 _q2 + 16 %define lf_var_size 160 ; Use of pmaxub instead of psubusb to compute filter mask was seen ; in ffvp8 %macro LFH_FILTER_AND_HEV_MASK 1 %if %1 movdqa xmm2, [rdi+2*rax] ; q3 movdqa xmm1, [rsi+2*rax] ; q2 movdqa xmm4, [rsi+rax] ; q1 movdqa xmm5, [rsi] ; q0 neg rax ; negate pitch to deal with above border %else movlps xmm2, [rsi + rcx*2] ; q3 movlps xmm1, [rsi + rcx] ; q2 movlps xmm4, [rsi] ; q1 movlps xmm5, [rsi + rax] ; q0 movhps xmm2, [rdi + rcx*2] movhps xmm1, [rdi + rcx] movhps xmm4, [rdi] movhps xmm5, [rdi + rax] lea rsi, [rsi + rax*4] lea rdi, [rdi + rax*4] movdqa [rsp+_q2], xmm1 ; store q2 movdqa [rsp+_q1], xmm4 ; store q1 %endif movdqa xmm7, [rdx] ;limit movdqa xmm6, xmm1 ; q2 movdqa xmm3, xmm4 ; q1 psubusb xmm1, xmm2 ; q2-=q3 psubusb xmm2, xmm6 ; q3-=q2 psubusb xmm4, xmm6 ; q1-=q2 psubusb xmm6, xmm3 ; q2-=q1 por xmm4, xmm6 ; abs(q2-q1) por xmm1, xmm2 ; abs(q3-q2) movdqa xmm0, xmm5 ; q0 pmaxub xmm1, xmm4 psubusb xmm5, xmm3 ; q0-=q1 psubusb xmm3, xmm0 ; q1-=q0 por xmm5, xmm3 ; abs(q0-q1) movdqa [rsp+_t0], xmm5 ; save to t0 pmaxub xmm1, xmm5 %if %1 movdqa xmm2, [rsi+4*rax] ; p3 movdqa xmm4, [rdi+4*rax] ; p2 movdqa xmm6, [rsi+2*rax] ; p1 %else movlps xmm2, [rsi + rax] ; p3 movlps xmm4, [rsi] ; p2 movlps xmm6, [rsi + rcx] ; p1 movhps xmm2, [rdi + rax] movhps xmm4, [rdi] movhps xmm6, [rdi + rcx] movdqa [rsp+_p2], xmm4 ; store p2 movdqa [rsp+_p1], xmm6 ; store p1 %endif movdqa xmm5, xmm4 ; p2 movdqa xmm3, xmm6 ; p1 psubusb xmm4, xmm2 ; p2-=p3 psubusb xmm2, xmm5 ; p3-=p2 psubusb xmm3, xmm5 ; p1-=p2 pmaxub xmm1, xmm4 ; abs(p3 - p2) psubusb xmm5, xmm6 ; p2-=p1 pmaxub xmm1, xmm2 ; abs(p3 - p2) pmaxub xmm1, xmm5 ; abs(p2 - p1) movdqa xmm2, xmm6 ; p1 pmaxub xmm1, xmm3 ; abs(p2 - p1) %if %1 movdqa xmm4, [rsi+rax] ; p0 movdqa xmm3, [rdi] ; q1 %else movlps xmm4, [rsi + rcx*2] ; p0 movhps xmm4, [rdi + rcx*2] movdqa xmm3, [rsp+_q1] ; q1 %endif movdqa xmm5, xmm4 ; p0 psubusb xmm4, xmm6 ; p0-=p1 psubusb xmm6, xmm5 ; p1-=p0 por xmm6, xmm4 ; abs(p1 - p0) mov rdx, arg(2) ; get blimit movdqa [rsp+_t1], xmm6 ; save to t1 movdqa xmm4, xmm3 ; q1 pmaxub xmm1, xmm6 psubusb xmm3, xmm2 ; q1-=p1 psubusb xmm2, xmm4 ; p1-=q1 psubusb xmm1, xmm7 por xmm2, xmm3 ; abs(p1-q1) movdqa xmm7, [rdx] ; blimit mov rdx, arg(4) ; hev get thresh movdqa xmm3, xmm0 ; q0 pand xmm2, [GLOBAL(tfe)] ; set lsb of each byte to zero movdqa xmm6, xmm5 ; p0 psrlw xmm2, 1 ; abs(p1-q1)/2 psubusb xmm5, xmm3 ; p0-=q0 psubusb xmm3, xmm6 ; q0-=p0 por xmm5, xmm3 ; abs(p0 - q0) paddusb xmm5, xmm5 ; abs(p0-q0)*2 movdqa xmm4, [rsp+_t0] ; hev get abs (q1 - q0) movdqa xmm3, [rsp+_t1] ; get abs (p1 - p0) paddusb xmm5, xmm2 ; abs (p0 - q0) *2 + abs(p1-q1)/2 movdqa xmm2, [rdx] ; hev psubusb xmm5, xmm7 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit psubusb xmm4, xmm2 ; hev psubusb xmm3, xmm2 ; hev por xmm1, xmm5 pxor xmm7, xmm7 paddb xmm4, xmm3 ; hev abs(q1 - q0) > thresh || abs(p1 - p0) > thresh pcmpeqb xmm4, xmm5 ; hev pcmpeqb xmm3, xmm3 ; hev pcmpeqb xmm1, xmm7 ; mask xmm1 pxor xmm4, xmm3 ; hev %endmacro %macro B_FILTER 1 movdqa xmm3, [GLOBAL(t80)] %if %1 == 0 movdqa xmm2, [rsp+_p1] ; p1 movdqa xmm7, [rsp+_q1] ; q1 %elif %1 == 1 movdqa xmm2, [rsi+2*rax] ; p1 movdqa xmm7, [rdi] ; q1 %elif %1 == 2 movdqa xmm2, [rsp+_p1] ; p1 movdqa xmm6, [rsp+_p0] ; p0 movdqa xmm0, [rsp+_q0] ; q0 movdqa xmm7, [rsp+_q1] ; q1 %endif pxor xmm2, xmm3 ; p1 offset to convert to signed values pxor xmm7, xmm3 ; q1 offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 pxor xmm6, xmm3 ; offset to convert to signed values pand xmm2, xmm4 ; high var mask (hvm)(p1 - q1) pxor xmm0, xmm3 ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; 1 * (q0 - p0) + hvm(p1 - q1) paddsb xmm2, xmm0 ; 2 * (q0 - p0) + hvm(p1 - q1) paddsb xmm2, xmm0 ; 3 * (q0 - p0) + hvm(p1 - q1) pand xmm1, xmm2 ; mask filter values we don't care about movdqa xmm2, xmm1 paddsb xmm1, [GLOBAL(t4)] ; 3* (q0 - p0) + hvm(p1 - q1) + 4 paddsb xmm2, [GLOBAL(t3)] ; 3* (q0 - p0) + hvm(p1 - q1) + 3 punpckhbw xmm5, xmm2 ; axbxcxdx punpcklbw xmm2, xmm2 ; exfxgxhx punpcklbw xmm0, xmm1 ; exfxgxhx psraw xmm5, 11 ; sign extended shift right by 3 punpckhbw xmm1, xmm1 ; axbxcxdx psraw xmm2, 11 ; sign extended shift right by 3 packsswb xmm2, xmm5 ; (3* (q0 - p0) + hvm(p1 - q1) + 3) >> 3; psraw xmm0, 11 ; sign extended shift right by 3 psraw xmm1, 11 ; sign extended shift right by 3 movdqa xmm5, xmm0 ; save results packsswb xmm0, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>3 paddsb xmm6, xmm2 ; p0+= p0 add movdqa xmm2, [GLOBAL(ones)] paddsw xmm5, xmm2 paddsw xmm1, xmm2 psraw xmm5, 1 ; partial shifted one more time for 2nd tap psraw xmm1, 1 ; partial shifted one more time for 2nd tap packsswb xmm5, xmm1 ; (3* (q0 - p0) + hvm(p1 - q1) + 4) >>4 movdqa xmm2, [GLOBAL(t80)] %if %1 == 0 movdqa xmm1, [rsp+_p1] ; p1 lea rsi, [rsi + rcx*2] lea rdi, [rdi + rcx*2] %elif %1 == 1 movdqa xmm1, [rsi+2*rax] ; p1 %elif %1 == 2 movdqa xmm1, [rsp+_p1] ; p1 %endif pandn xmm4, xmm5 ; high edge variance additive pxor xmm6, xmm2 ; unoffset pxor xmm1, xmm2 ; reoffset psubsb xmm3, xmm0 ; q0-= q0 add paddsb xmm1, xmm4 ; p1+= p1 add pxor xmm3, xmm2 ; unoffset pxor xmm1, xmm2 ; unoffset psubsb xmm7, xmm4 ; q1-= q1 add pxor xmm7, xmm2 ; unoffset %if %1 == 0 movq [rsi], xmm6 ; p0 movhps [rdi], xmm6 movq [rsi + rax], xmm1 ; p1 movhps [rdi + rax], xmm1 movq [rsi + rcx], xmm3 ; q0 movhps [rdi + rcx], xmm3 movq [rsi + rcx*2], xmm7 ; q1 movhps [rdi + rcx*2], xmm7 %elif %1 == 1 movdqa [rsi+rax], xmm6 ; write back movdqa [rsi+2*rax], xmm1 ; write back movdqa [rsi], xmm3 ; write back movdqa [rdi], xmm7 ; write back %endif %endmacro SECTION .text %if ABI_IS_32BIT ;void vp8_loop_filter_horizontal_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ;) global sym(vp8_loop_filter_horizontal_edge_sse2) PRIVATE sym(vp8_loop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step mov rdx, arg(3) ;limit lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing ; calculate breakout conditions and high edge variance LFH_FILTER_AND_HEV_MASK 1 ; filter and write back the result B_FILTER 1 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif ;void vp8_loop_filter_horizontal_edge_uv_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ; int count ;) global sym(vp8_loop_filter_horizontal_edge_uv_sse2) PRIVATE sym(vp8_loop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; u mov rdi, arg(5) ; v movsxd rax, dword ptr arg(1) ; src_pixel_step mov rcx, rax neg rax ; negate pitch to deal with above border mov rdx, arg(3) ;limit lea rsi, [rsi + rcx] lea rdi, [rdi + rcx] ; calculate breakout conditions and high edge variance LFH_FILTER_AND_HEV_MASK 0 ; filter and write back the result B_FILTER 0 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret %macro MB_FILTER_AND_WRITEBACK 1 movdqa xmm3, [GLOBAL(t80)] %if %1 == 0 movdqa xmm2, [rsp+_p1] ; p1 movdqa xmm7, [rsp+_q1] ; q1 %elif %1 == 1 movdqa xmm2, [rsi+2*rax] ; p1 movdqa xmm7, [rdi] ; q1 mov rcx, rax neg rcx %elif %1 == 2 movdqa xmm2, [rsp+_p1] ; p1 movdqa xmm6, [rsp+_p0] ; p0 movdqa xmm0, [rsp+_q0] ; q0 movdqa xmm7, [rsp+_q1] ; q1 %endif pxor xmm2, xmm3 ; p1 offset to convert to signed values pxor xmm7, xmm3 ; q1 offset to convert to signed values pxor xmm6, xmm3 ; offset to convert to signed values pxor xmm0, xmm3 ; offset to convert to signed values psubsb xmm2, xmm7 ; p1 - q1 movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; 1 * (q0 - p0) + (p1 - q1) paddsb xmm2, xmm0 ; 2 * (q0 - p0) paddsb xmm2, xmm0 ; 3 * (q0 - p0) + (p1 - q1) pand xmm1, xmm2 ; mask filter values we don't care about movdqa xmm2, xmm1 ; vp8_filter pand xmm2, xmm4 ; Filter2 = vp8_filter & hev pxor xmm0, xmm0 pandn xmm4, xmm1 ; vp8_filter&=~hev pxor xmm1, xmm1 punpcklbw xmm0, xmm4 ; Filter 2 (hi) punpckhbw xmm1, xmm4 ; Filter 2 (lo) movdqa xmm5, xmm2 movdqa xmm4, [GLOBAL(s9)] paddsb xmm5, [GLOBAL(t3)] ; vp8_signed_char_clamp(Filter2 + 3) paddsb xmm2, [GLOBAL(t4)] ; vp8_signed_char_clamp(Filter2 + 4) pmulhw xmm1, xmm4 ; Filter 2 (lo) * 9 pmulhw xmm0, xmm4 ; Filter 2 (hi) * 9 punpckhbw xmm7, xmm5 ; axbxcxdx punpcklbw xmm5, xmm5 ; exfxgxhx psraw xmm7, 11 ; sign extended shift right by 3 psraw xmm5, 11 ; sign extended shift right by 3 punpckhbw xmm4, xmm2 ; axbxcxdx punpcklbw xmm2, xmm2 ; exfxgxhx psraw xmm4, 11 ; sign extended shift right by 3 packsswb xmm5, xmm7 ; Filter2 >>=3; psraw xmm2, 11 ; sign extended shift right by 3 packsswb xmm2, xmm4 ; Filter1 >>=3; paddsb xmm6, xmm5 ; ps0 =ps0 + Fitler2 psubsb xmm3, xmm2 ; qs0 =qs0 - Filter1 movdqa xmm7, xmm1 movdqa xmm4, [GLOBAL(s63)] movdqa xmm5, xmm0 movdqa xmm2, xmm5 paddw xmm0, xmm4 ; Filter 2 (hi) * 9 + 63 paddw xmm1, xmm4 ; Filter 2 (lo) * 9 + 63 movdqa xmm4, xmm7 paddw xmm5, xmm5 ; Filter 2 (hi) * 18 paddw xmm7, xmm7 ; Filter 2 (lo) * 18 paddw xmm5, xmm0 ; Filter 2 (hi) * 27 + 63 paddw xmm7, xmm1 ; Filter 2 (lo) * 27 + 63 paddw xmm2, xmm0 ; Filter 2 (hi) * 18 + 63 psraw xmm0, 7 ; (Filter 2 (hi) * 9 + 63) >> 7 paddw xmm4, xmm1 ; Filter 2 (lo) * 18 + 63 psraw xmm1, 7 ; (Filter 2 (lo) * 9 + 63) >> 7 psraw xmm2, 7 ; (Filter 2 (hi) * 18 + 63) >> 7 packsswb xmm0, xmm1 ; u1 = vp8_signed_char_clamp((63 + Filter2 * 9)>>7) psraw xmm4, 7 ; (Filter 2 (lo) * 18 + 63) >> 7 psraw xmm5, 7 ; (Filter 2 (hi) * 27 + 63) >> 7 psraw xmm7, 7 ; (Filter 2 (lo) * 27 + 63) >> 7 packsswb xmm5, xmm7 ; u3 = vp8_signed_char_clamp((63 + Filter2 * 27)>>7) packsswb xmm2, xmm4 ; u2 = vp8_signed_char_clamp((63 + Filter2 * 18)>>7) movdqa xmm7, [GLOBAL(t80)] %if %1 == 0 movdqa xmm1, [rsp+_q1] ; q1 movdqa xmm4, [rsp+_p1] ; p1 lea rsi, [rsi+rcx*2] lea rdi, [rdi+rcx*2] %elif %1 == 1 movdqa xmm1, [rdi] ; q1 movdqa xmm4, [rsi+rax*2] ; p1 %elif %1 == 2 movdqa xmm4, [rsp+_p1] ; p1 movdqa xmm1, [rsp+_q1] ; q1 %endif pxor xmm1, xmm7 pxor xmm4, xmm7 psubsb xmm3, xmm5 ; sq = vp8_signed_char_clamp(qs0 - u3) paddsb xmm6, xmm5 ; sp = vp8_signed_char_clamp(ps0 - u3) psubsb xmm1, xmm2 ; sq = vp8_signed_char_clamp(qs1 - u2) paddsb xmm4, xmm2 ; sp = vp8_signed_char_clamp(ps1 - u2) %if %1 == 1 movdqa xmm2, [rdi+rax*4] ; p2 movdqa xmm5, [rdi+rcx] ; q2 %else movdqa xmm2, [rsp+_p2] ; p2 movdqa xmm5, [rsp+_q2] ; q2 %endif pxor xmm1, xmm7 ; *oq1 = sq^0x80; pxor xmm4, xmm7 ; *op1 = sp^0x80; pxor xmm2, xmm7 pxor xmm5, xmm7 paddsb xmm2, xmm0 ; sp = vp8_signed_char_clamp(ps2 - u) psubsb xmm5, xmm0 ; sq = vp8_signed_char_clamp(qs2 - u) pxor xmm2, xmm7 ; *op2 = sp^0x80; pxor xmm5, xmm7 ; *oq2 = sq^0x80; pxor xmm3, xmm7 ; *oq0 = sq^0x80 pxor xmm6, xmm7 ; *oq0 = sp^0x80 %if %1 == 0 movq [rsi], xmm6 ; p0 movhps [rdi], xmm6 movq [rsi + rcx], xmm3 ; q0 movhps [rdi + rcx], xmm3 lea rdx, [rcx + rcx*2] movq [rsi+rcx*2], xmm1 ; q1 movhps [rdi+rcx*2], xmm1 movq [rsi + rax], xmm4 ; p1 movhps [rdi + rax], xmm4 movq [rsi+rax*2], xmm2 ; p2 movhps [rdi+rax*2], xmm2 movq [rsi+rdx], xmm5 ; q2 movhps [rdi+rdx], xmm5 %elif %1 == 1 movdqa [rdi+rcx], xmm5 ; q2 movdqa [rdi], xmm1 ; q1 movdqa [rsi], xmm3 ; q0 movdqa [rsi+rax ], xmm6 ; p0 movdqa [rsi+rax*2], xmm4 ; p1 movdqa [rdi+rax*4], xmm2 ; p2 %elif %1 == 2 movdqa [rsp+_p1], xmm4 ; p1 movdqa [rsp+_p0], xmm6 ; p0 movdqa [rsp+_q0], xmm3 ; q0 movdqa [rsp+_q1], xmm1 ; q1 %endif %endmacro ;void vp8_mbloop_filter_horizontal_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ;) global sym(vp8_mbloop_filter_horizontal_edge_sse2) PRIVATE sym(vp8_mbloop_filter_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step mov rdx, arg(3) ;limit lea rdi, [rsi+rax] ; rdi points to row +1 for indirect addressing ; calculate breakout conditions and high edge variance LFH_FILTER_AND_HEV_MASK 1 ; filter and write back the results MB_FILTER_AND_WRITEBACK 1 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_mbloop_filter_horizontal_edge_uv_sse2 ;( ; unsigned char *u, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v ;) global sym(vp8_mbloop_filter_horizontal_edge_uv_sse2) PRIVATE sym(vp8_mbloop_filter_horizontal_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; u mov rdi, arg(5) ; v movsxd rax, dword ptr arg(1) ; src_pixel_step mov rcx, rax neg rax ; negate pitch to deal with above border mov rdx, arg(3) ;limit lea rsi, [rsi + rcx] lea rdi, [rdi + rcx] ; calculate breakout conditions and high edge variance LFH_FILTER_AND_HEV_MASK 0 ; filter and write back the results MB_FILTER_AND_WRITEBACK 0 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret %macro TRANSPOSE_16X8 2 movq xmm4, [rsi] ; xx xx xx xx xx xx xx xx 07 06 05 04 03 02 01 00 movq xmm1, [rdi] ; xx xx xx xx xx xx xx xx 17 16 15 14 13 12 11 10 movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx 27 26 25 24 23 22 21 20 movq xmm7, [rdi+2*rax] ; xx xx xx xx xx xx xx xx 37 36 35 34 33 32 31 30 movq xmm5, [rsi+4*rax] ; xx xx xx xx xx xx xx xx 47 46 45 44 43 42 41 40 movq xmm2, [rdi+4*rax] ; xx xx xx xx xx xx xx xx 57 56 55 54 53 52 51 50 punpcklbw xmm4, xmm1 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 movq xmm1, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx 77 76 75 74 73 72 71 70 movdqa xmm3, xmm4 ; 17 07 16 06 15 05 14 04 13 03 12 02 11 01 10 00 punpcklbw xmm0, xmm7 ; 37 27 36 36 35 25 34 24 33 23 32 22 31 21 30 20 movq xmm7, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx 67 66 65 64 63 62 61 60 punpcklbw xmm5, xmm2 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 %if %1 lea rsi, [rsi+rax*8] lea rdi, [rdi+rax*8] %else mov rsi, arg(5) ; v_ptr %endif movdqa xmm6, xmm5 ; 57 47 56 46 55 45 54 44 53 43 52 42 51 41 50 40 punpcklbw xmm7, xmm1 ; 77 67 76 66 75 65 74 64 73 63 72 62 71 61 70 60 punpcklwd xmm5, xmm7 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 punpckhwd xmm6, xmm7 ; 77 67 57 47 76 66 56 46 75 65 55 45 74 64 54 44 punpcklwd xmm3, xmm0 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 %if %1 == 0 lea rdi, [rsi + rax - 4] ; rdi points to row +1 for indirect addressing lea rsi, [rsi - 4] %endif movdqa xmm2, xmm3 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 punpckhwd xmm4, xmm0 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 movdqa xmm7, xmm4 ; 37 27 17 07 36 26 16 06 35 25 15 05 34 24 14 04 punpckhdq xmm3, xmm5 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 punpckhdq xmm7, xmm6 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 punpckldq xmm4, xmm6 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 punpckldq xmm2, xmm5 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 movdqa [rsp+_t0], xmm2 ; save to free XMM2 movq xmm2, [rsi] ; xx xx xx xx xx xx xx xx 87 86 85 84 83 82 81 80 movq xmm6, [rdi] ; xx xx xx xx xx xx xx xx 97 96 95 94 93 92 91 90 movq xmm0, [rsi+2*rax] ; xx xx xx xx xx xx xx xx a7 a6 a5 a4 a3 a2 a1 a0 movq xmm5, [rdi+2*rax] ; xx xx xx xx xx xx xx xx b7 b6 b5 b4 b3 b2 b1 b0 movq xmm1, [rsi+4*rax] ; xx xx xx xx xx xx xx xx c7 c6 c5 c4 c3 c2 c1 c0 punpcklbw xmm2, xmm6 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 movq xmm6, [rdi+4*rax] ; xx xx xx xx xx xx xx xx d7 d6 d5 d4 d3 d2 d1 d0 punpcklbw xmm0, xmm5 ; b7 a7 b6 a6 b5 a5 b4 a4 b3 a3 b2 a2 b1 a1 b0 a0 movq xmm5, [rsi+2*rcx] ; xx xx xx xx xx xx xx xx e7 e6 e5 e4 e3 e2 e1 e0 punpcklbw xmm1, xmm6 ; d7 c7 d6 c6 d5 c5 d4 c4 d3 c3 d2 c2 d1 e1 d0 c0 movq xmm6, [rdi+2*rcx] ; xx xx xx xx xx xx xx xx f7 f6 f5 f4 f3 f2 f1 f0 punpcklbw xmm5, xmm6 ; f7 e7 f6 e6 f5 e5 f4 e4 f3 e3 f2 e2 f1 e1 f0 e0 movdqa xmm6, xmm1 ; punpckhwd xmm6, xmm5 ; f7 e7 d7 c7 f6 e6 d6 c6 f5 e5 d5 c5 f4 e4 d4 c4 punpcklwd xmm1, xmm5 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 movdqa xmm5, xmm2 ; 97 87 96 86 95 85 94 84 93 83 92 82 91 81 90 80 punpcklwd xmm5, xmm0 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 punpckhwd xmm2, xmm0 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 movdqa xmm0, xmm5 punpckldq xmm0, xmm1 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 punpckhdq xmm5, xmm1 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 movdqa xmm1, xmm2 ; b7 a7 97 87 b6 a6 96 86 b5 a5 95 85 b4 a4 94 84 punpckldq xmm1, xmm6 ; f5 e5 d5 c5 b5 a5 95 85 f4 e4 d4 c4 b4 a4 94 84 punpckhdq xmm2, xmm6 ; f7 e7 d7 c7 b7 a7 97 87 f6 e6 d6 c6 b6 a6 96 86 movdqa xmm6, xmm7 ; 77 67 57 47 37 27 17 07 76 66 56 46 36 26 16 06 punpcklqdq xmm6, xmm2 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 punpckhqdq xmm7, xmm2 ; f7 e7 d7 c7 b7 a7 97 87 77 67 57 47 37 27 17 07 %if %2 == 0 movdqa [rsp+_q3], xmm7 ; save 7 movdqa [rsp+_q2], xmm6 ; save 6 %endif movdqa xmm2, xmm3 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 punpckhqdq xmm3, xmm5 ; f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 punpcklqdq xmm2, xmm5 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 movdqa [rsp+_p1], xmm2 ; save 2 movdqa xmm5, xmm4 ; 75 65 55 45 35 25 15 05 74 64 54 44 34 24 14 04 punpcklqdq xmm4, xmm1 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 movdqa [rsp+_p0], xmm3 ; save 3 punpckhqdq xmm5, xmm1 ; f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 movdqa [rsp+_q0], xmm4 ; save 4 movdqa [rsp+_q1], xmm5 ; save 5 movdqa xmm1, [rsp+_t0] movdqa xmm2, xmm1 ; punpckhqdq xmm1, xmm0 ; f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 punpcklqdq xmm2, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 %if %2 == 0 movdqa [rsp+_p2], xmm1 movdqa [rsp+_p3], xmm2 %endif %endmacro %macro LFV_FILTER_MASK_HEV_MASK 0 movdqa xmm0, xmm6 ; q2 psubusb xmm0, xmm7 ; q2-q3 psubusb xmm7, xmm6 ; q3-q2 movdqa xmm4, xmm5 ; q1 por xmm7, xmm0 ; abs (q3-q2) psubusb xmm4, xmm6 ; q1-q2 movdqa xmm0, xmm1 psubusb xmm6, xmm5 ; q2-q1 por xmm6, xmm4 ; abs (q2-q1) psubusb xmm0, xmm2 ; p2 - p3; psubusb xmm2, xmm1 ; p3 - p2; por xmm0, xmm2 ; abs(p2-p3) movdqa xmm5, [rsp+_p1] ; p1 pmaxub xmm0, xmm7 movdqa xmm2, xmm5 ; p1 psubusb xmm5, xmm1 ; p1-p2 psubusb xmm1, xmm2 ; p2-p1 movdqa xmm7, xmm3 ; p0 psubusb xmm7, xmm2 ; p0-p1 por xmm1, xmm5 ; abs(p2-p1) pmaxub xmm0, xmm6 pmaxub xmm0, xmm1 movdqa xmm1, xmm2 ; p1 psubusb xmm2, xmm3 ; p1-p0 por xmm2, xmm7 ; abs(p1-p0) pmaxub xmm0, xmm2 movdqa xmm5, [rsp+_q0] ; q0 movdqa xmm7, [rsp+_q1] ; q1 mov rdx, arg(3) ; limit movdqa xmm6, xmm5 ; q0 movdqa xmm4, xmm7 ; q1 psubusb xmm5, xmm7 ; q0-q1 psubusb xmm7, xmm6 ; q1-q0 por xmm7, xmm5 ; abs(q1-q0) pmaxub xmm0, xmm7 psubusb xmm0, [rdx] ; limit mov rdx, arg(2) ; blimit movdqa xmm5, xmm4 ; q1 psubusb xmm5, xmm1 ; q1-=p1 psubusb xmm1, xmm4 ; p1-=q1 por xmm5, xmm1 ; abs(p1-q1) movdqa xmm1, xmm3 ; p0 pand xmm5, [GLOBAL(tfe)] ; set lsb of each byte to zero psubusb xmm1, xmm6 ; p0-q0 movdqa xmm4, [rdx] ; blimit mov rdx, arg(4) ; get thresh psrlw xmm5, 1 ; abs(p1-q1)/2 psubusb xmm6, xmm3 ; q0-p0 por xmm1, xmm6 ; abs(q0-p0) paddusb xmm1, xmm1 ; abs(q0-p0)*2 movdqa xmm3, [rdx] paddusb xmm1, xmm5 ; abs (p0 - q0) *2 + abs(p1-q1)/2 psubusb xmm2, xmm3 ; abs(q1 - q0) > thresh psubusb xmm7, xmm3 ; abs(p1 - p0)> thresh psubusb xmm1, xmm4 ; abs (p0 - q0) *2 + abs(p1-q1)/2 > blimit por xmm2, xmm7 ; abs(q1 - q0) > thresh || abs(p1 - p0) > thresh por xmm1, xmm0 ; mask pcmpeqb xmm2, xmm0 pxor xmm0, xmm0 pcmpeqb xmm4, xmm4 pcmpeqb xmm1, xmm0 pxor xmm4, xmm2 %endmacro %macro BV_TRANSPOSE 0 ; xmm1 = f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 ; xmm6 = f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 ; xmm3 = f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 ; xmm7 = f5 e5 d5 c5 b5 a5 95 85 75 65 55 45 35 25 15 05 movdqa xmm2, xmm1 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 punpcklbw xmm2, xmm6 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 movdqa xmm4, xmm3 ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 punpckhbw xmm1, xmm6 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 punpcklbw xmm4, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 punpckhbw xmm3, xmm7 ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 movdqa xmm6, xmm2 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 punpcklwd xmm2, xmm4 ; 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 punpckhwd xmm6, xmm4 ; 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 movdqa xmm5, xmm1 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 punpcklwd xmm1, xmm3 ; b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 punpckhwd xmm5, xmm3 ; f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 ; xmm2 = 35 34 33 32 25 24 23 22 15 14 13 12 05 04 03 02 ; xmm6 = 75 74 73 72 65 64 63 62 55 54 53 52 45 44 43 42 ; xmm1 = b5 b4 b3 b2 a5 a4 a3 a2 95 94 93 92 85 84 83 82 ; xmm5 = f5 f4 f3 f2 e5 e4 e3 e2 d5 d4 d3 d2 c5 c4 c3 c2 %endmacro %macro BV_WRITEBACK 2 movd [rsi+2], %1 movd [rsi+4*rax+2], %2 psrldq %1, 4 psrldq %2, 4 movd [rdi+2], %1 movd [rdi+4*rax+2], %2 psrldq %1, 4 psrldq %2, 4 movd [rsi+2*rax+2], %1 movd [rsi+2*rcx+2], %2 psrldq %1, 4 psrldq %2, 4 movd [rdi+2*rax+2], %1 movd [rdi+2*rcx+2], %2 %endmacro %if ABI_IS_32BIT ;void vp8_loop_filter_vertical_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ;) global sym(vp8_loop_filter_vertical_edge_sse2) PRIVATE sym(vp8_loop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; src_ptr movsxd rax, dword ptr arg(1) ; src_pixel_step lea rsi, [rsi - 4] lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing lea rcx, [rax*2+rax] ;transpose 16x8 to 8x16, and store the 8-line result on stack. TRANSPOSE_16X8 1, 1 ; calculate filter mask and high edge variance LFV_FILTER_MASK_HEV_MASK ; start work on filters B_FILTER 2 ; transpose and write back - only work on q1, q0, p0, p1 BV_TRANSPOSE ; store 16-line result lea rdx, [rax] neg rdx BV_WRITEBACK xmm1, xmm5 lea rsi, [rsi+rdx*8] lea rdi, [rdi+rdx*8] BV_WRITEBACK xmm2, xmm6 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif ;void vp8_loop_filter_vertical_edge_uv_sse2 ;( ; unsigned char *u, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v ;) global sym(vp8_loop_filter_vertical_edge_uv_sse2) PRIVATE sym(vp8_loop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; u_ptr movsxd rax, dword ptr arg(1) ; src_pixel_step lea rsi, [rsi - 4] lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing lea rcx, [rax+2*rax] ;transpose 16x8 to 8x16, and store the 8-line result on stack. TRANSPOSE_16X8 0, 1 ; calculate filter mask and high edge variance LFV_FILTER_MASK_HEV_MASK ; start work on filters B_FILTER 2 ; transpose and write back - only work on q1, q0, p0, p1 BV_TRANSPOSE lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing ; store 16-line result BV_WRITEBACK xmm1, xmm5 mov rsi, arg(0) ; u_ptr lea rsi, [rsi - 4] lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing BV_WRITEBACK xmm2, xmm6 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret %macro MBV_TRANSPOSE 0 movdqa xmm0, [rsp+_p3] ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 movdqa xmm1, xmm0 ; f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 punpcklbw xmm0, xmm2 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 punpckhbw xmm1, xmm2 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 movdqa xmm7, [rsp+_p1] ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 movdqa xmm6, xmm7 ; f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 punpcklbw xmm7, [rsp+_p0] ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 punpckhbw xmm6, [rsp+_p0] ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 movdqa xmm3, xmm0 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 punpcklwd xmm0, xmm7 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 punpckhwd xmm3, xmm7 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 movdqa xmm4, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 punpcklwd xmm1, xmm6 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 punpckhwd xmm4, xmm6 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 punpcklbw xmm7, [rsp+_q1] ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 movdqa xmm6, xmm5 ; f6 e6 d6 c6 b6 a6 96 86 76 66 56 46 36 26 16 06 punpcklbw xmm6, [rsp+_q3] ; 77 76 67 66 57 56 47 46 37 36 27 26 17 16 07 06 movdqa xmm2, xmm7 ; 75 74 65 64 55 54 45 44 35 34 25 24 15 14 05 04 punpcklwd xmm7, xmm6 ; 37 36 35 34 27 26 25 24 17 16 15 14 07 06 05 04 punpckhwd xmm2, xmm6 ; 77 76 75 74 67 66 65 64 57 56 55 54 47 46 45 44 movdqa xmm6, xmm0 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 punpckldq xmm0, xmm7 ; 17 16 15 14 13 12 11 10 07 06 05 04 03 02 01 00 punpckhdq xmm6, xmm7 ; 37 36 35 34 33 32 31 30 27 26 25 24 23 22 21 20 %endmacro %macro MBV_WRITEBACK_1 0 movq [rsi], xmm0 movhps [rdi], xmm0 movq [rsi+2*rax], xmm6 movhps [rdi+2*rax], xmm6 movdqa xmm0, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 punpckldq xmm0, xmm2 ; 57 56 55 54 53 52 51 50 47 46 45 44 43 42 41 40 punpckhdq xmm3, xmm2 ; 77 76 75 74 73 72 71 70 67 66 65 64 63 62 61 60 movq [rsi+4*rax], xmm0 movhps [rdi+4*rax], xmm0 movq [rsi+2*rcx], xmm3 movhps [rdi+2*rcx], xmm3 movdqa xmm7, [rsp+_q0] ; f4 e4 d4 c4 b4 a4 94 84 74 64 54 44 34 24 14 04 punpckhbw xmm7, [rsp+_q1] ; f5 f4 e5 e4 d5 d4 c5 c4 b5 b4 a5 a4 95 94 85 84 punpckhbw xmm5, [rsp+_q3] ; f7 f6 e7 e6 d7 d6 c7 c6 b7 b6 a7 a6 97 96 87 86 movdqa xmm0, xmm7 punpcklwd xmm0, xmm5 ; b7 b6 b4 b4 a7 a6 a5 a4 97 96 95 94 87 86 85 84 punpckhwd xmm7, xmm5 ; f7 f6 f5 f4 e7 e6 e5 e4 d7 d6 d5 d4 c7 c6 c5 c4 movdqa xmm5, xmm1 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 punpckldq xmm1, xmm0 ; 97 96 95 94 93 92 91 90 87 86 85 83 84 82 81 80 punpckhdq xmm5, xmm0 ; b7 b6 b5 b4 b3 b2 b1 b0 a7 a6 a5 a4 a3 a2 a1 a0 %endmacro %macro MBV_WRITEBACK_2 0 movq [rsi], xmm1 movhps [rdi], xmm1 movq [rsi+2*rax], xmm5 movhps [rdi+2*rax], xmm5 movdqa xmm1, xmm4 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 punpckldq xmm1, xmm7 ; d7 d6 d5 d4 d3 d2 d1 d0 c7 c6 c5 c4 c3 c2 c1 c0 punpckhdq xmm4, xmm7 ; f7 f6 f4 f4 f3 f2 f1 f0 e7 e6 e5 e4 e3 e2 e1 e0 movq [rsi+4*rax], xmm1 movhps [rdi+4*rax], xmm1 movq [rsi+2*rcx], xmm4 movhps [rdi+2*rcx], xmm4 %endmacro ;void vp8_mbloop_filter_vertical_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ;) global sym(vp8_mbloop_filter_vertical_edge_sse2) PRIVATE sym(vp8_mbloop_filter_vertical_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; src_ptr movsxd rax, dword ptr arg(1) ; src_pixel_step lea rsi, [rsi - 4] lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing lea rcx, [rax*2+rax] ; Transpose TRANSPOSE_16X8 1, 0 ; calculate filter mask and high edge variance LFV_FILTER_MASK_HEV_MASK neg rax ; start work on filters MB_FILTER_AND_WRITEBACK 2 lea rsi, [rsi+rax*8] lea rdi, [rdi+rax*8] ; transpose and write back MBV_TRANSPOSE neg rax MBV_WRITEBACK_1 lea rsi, [rsi+rax*8] lea rdi, [rdi+rax*8] MBV_WRITEBACK_2 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_mbloop_filter_vertical_edge_uv_sse2 ;( ; unsigned char *u, ; int src_pixel_step, ; const char *blimit, ; const char *limit, ; const char *thresh, ; unsigned char *v ;) global sym(vp8_mbloop_filter_vertical_edge_uv_sse2) PRIVATE sym(vp8_mbloop_filter_vertical_edge_uv_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, lf_var_size mov rsi, arg(0) ; u_ptr movsxd rax, dword ptr arg(1) ; src_pixel_step lea rsi, [rsi - 4] lea rdi, [rsi + rax] ; rdi points to row +1 for indirect addressing lea rcx, [rax+2*rax] ; Transpose TRANSPOSE_16X8 0, 0 ; calculate filter mask and high edge variance LFV_FILTER_MASK_HEV_MASK ; start work on filters MB_FILTER_AND_WRITEBACK 2 ; transpose and write back MBV_TRANSPOSE mov rsi, arg(0) ;u_ptr lea rsi, [rsi - 4] lea rdi, [rsi + rax] MBV_WRITEBACK_1 mov rsi, arg(5) ;v_ptr lea rsi, [rsi - 4] lea rdi, [rsi + rax] MBV_WRITEBACK_2 add rsp, lf_var_size pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_loop_filter_simple_horizontal_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ;) global sym(vp8_loop_filter_simple_horizontal_edge_sse2) PRIVATE sym(vp8_loop_filter_simple_horizontal_edge_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx ; end prolog mov rcx, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? movdqa xmm6, [GLOBAL(tfe)] lea rdx, [rcx + rax] neg rax ; calculate mask movdqa xmm0, [rdx] ; q1 mov rdx, arg(2) ;blimit movdqa xmm1, [rcx+2*rax] ; p1 movdqa xmm2, xmm1 movdqa xmm3, xmm0 psubusb xmm0, xmm1 ; q1-=p1 psubusb xmm1, xmm3 ; p1-=q1 por xmm1, xmm0 ; abs(p1-q1) pand xmm1, xmm6 ; set lsb of each byte to zero psrlw xmm1, 1 ; abs(p1-q1)/2 movdqa xmm7, XMMWORD PTR [rdx] movdqa xmm5, [rcx+rax] ; p0 movdqa xmm4, [rcx] ; q0 movdqa xmm0, xmm4 ; q0 movdqa xmm6, xmm5 ; p0 psubusb xmm5, xmm4 ; p0-=q0 psubusb xmm4, xmm6 ; q0-=p0 por xmm5, xmm4 ; abs(p0 - q0) movdqa xmm4, [GLOBAL(t80)] paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm1 ; abs (p0 - q0) *2 + abs(p1-q1)/2 psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm7, xmm7 pcmpeqb xmm5, xmm7 ; start work on filters pxor xmm2, xmm4 ; p1 offset to convert to signed values pxor xmm3, xmm4 ; q1 offset to convert to signed values psubsb xmm2, xmm3 ; p1 - q1 pxor xmm6, xmm4 ; offset to convert to signed values pxor xmm0, xmm4 ; offset to convert to signed values movdqa xmm3, xmm0 ; q0 psubsb xmm0, xmm6 ; q0 - p0 paddsb xmm2, xmm0 ; p1 - q1 + 1 * (q0 - p0) paddsb xmm2, xmm0 ; p1 - q1 + 2 * (q0 - p0) paddsb xmm2, xmm0 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm2 ; mask filter values we don't care about movdqa xmm0, xmm5 paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 movdqa xmm1, [GLOBAL(te0)] movdqa xmm2, [GLOBAL(t1f)] ; pxor xmm7, xmm7 pcmpgtb xmm7, xmm0 ;save sign pand xmm7, xmm1 ;preserve the upper 3 bits psrlw xmm0, 3 pand xmm0, xmm2 ;clear out upper 3 bits por xmm0, xmm7 ;add sign psubsb xmm3, xmm0 ; q0-= q0sz add pxor xmm7, xmm7 pcmpgtb xmm7, xmm5 ;save sign pand xmm7, xmm1 ;preserve the upper 3 bits psrlw xmm5, 3 pand xmm5, xmm2 ;clear out upper 3 bits por xmm5, xmm7 ;add sign paddsb xmm6, xmm5 ; p0+= p0 add pxor xmm3, xmm4 ; unoffset movdqa [rcx], xmm3 ; write back pxor xmm6, xmm4 ; unoffset movdqa [rcx+rax], xmm6 ; write back ; begin epilog RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_loop_filter_simple_vertical_edge_sse2 ;( ; unsigned char *src_ptr, ; int src_pixel_step, ; const char *blimit, ;) global sym(vp8_loop_filter_simple_vertical_edge_sse2) PRIVATE sym(vp8_loop_filter_simple_vertical_edge_sse2): push rbp ; save old base pointer value. mov rbp, rsp ; set new base pointer value. SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx ; save callee-saved reg push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 32 ; reserve 32 bytes %define t0 [rsp + 0] ;__declspec(align(16)) char t0[16]; %define t1 [rsp + 16] ;__declspec(align(16)) char t1[16]; mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixel_step ; destination pitch? lea rsi, [rsi - 2 ] lea rdi, [rsi + rax] lea rdx, [rsi + rax*4] lea rcx, [rdx + rax] movd xmm0, [rsi] ; (high 96 bits unused) 03 02 01 00 movd xmm1, [rdx] ; (high 96 bits unused) 43 42 41 40 movd xmm2, [rdi] ; 13 12 11 10 movd xmm3, [rcx] ; 53 52 51 50 punpckldq xmm0, xmm1 ; (high 64 bits unused) 43 42 41 40 03 02 01 00 punpckldq xmm2, xmm3 ; 53 52 51 50 13 12 11 10 movd xmm4, [rsi + rax*2] ; 23 22 21 20 movd xmm5, [rdx + rax*2] ; 63 62 61 60 movd xmm6, [rdi + rax*2] ; 33 32 31 30 movd xmm7, [rcx + rax*2] ; 73 72 71 70 punpckldq xmm4, xmm5 ; 63 62 61 60 23 22 21 20 punpckldq xmm6, xmm7 ; 73 72 71 70 33 32 31 30 punpcklbw xmm0, xmm2 ; 53 43 52 42 51 41 50 40 13 03 12 02 11 01 10 00 punpcklbw xmm4, xmm6 ; 73 63 72 62 71 61 70 60 33 23 32 22 31 21 30 20 movdqa xmm1, xmm0 punpcklwd xmm0, xmm4 ; 33 23 13 03 32 22 12 02 31 21 11 01 30 20 10 00 punpckhwd xmm1, xmm4 ; 73 63 53 43 72 62 52 42 71 61 51 41 70 60 50 40 movdqa xmm2, xmm0 punpckldq xmm0, xmm1 ; 71 61 51 41 31 21 11 01 70 60 50 40 30 20 10 00 punpckhdq xmm2, xmm1 ; 73 63 53 43 33 23 13 03 72 62 52 42 32 22 12 02 lea rsi, [rsi + rax*8] lea rdi, [rsi + rax] lea rdx, [rsi + rax*4] lea rcx, [rdx + rax] movd xmm4, [rsi] ; 83 82 81 80 movd xmm1, [rdx] ; c3 c2 c1 c0 movd xmm6, [rdi] ; 93 92 91 90 movd xmm3, [rcx] ; d3 d2 d1 d0 punpckldq xmm4, xmm1 ; c3 c2 c1 c0 83 82 81 80 punpckldq xmm6, xmm3 ; d3 d2 d1 d0 93 92 91 90 movd xmm1, [rsi + rax*2] ; a3 a2 a1 a0 movd xmm5, [rdx + rax*2] ; e3 e2 e1 e0 movd xmm3, [rdi + rax*2] ; b3 b2 b1 b0 movd xmm7, [rcx + rax*2] ; f3 f2 f1 f0 punpckldq xmm1, xmm5 ; e3 e2 e1 e0 a3 a2 a1 a0 punpckldq xmm3, xmm7 ; f3 f2 f1 f0 b3 b2 b1 b0 punpcklbw xmm4, xmm6 ; d3 c3 d2 c2 d1 c1 d0 c0 93 83 92 82 91 81 90 80 punpcklbw xmm1, xmm3 ; f3 e3 f2 e2 f1 e1 f0 e0 b3 a3 b2 a2 b1 a1 b0 a0 movdqa xmm7, xmm4 punpcklwd xmm4, xmm1 ; b3 a3 93 83 b2 a2 92 82 b1 a1 91 81 b0 a0 90 80 punpckhwd xmm7, xmm1 ; f3 e3 d3 c3 f2 e2 d2 c2 f1 e1 d1 c1 f0 e0 d0 c0 movdqa xmm6, xmm4 punpckldq xmm4, xmm7 ; f1 e1 d1 c1 b1 a1 91 81 f0 e0 d0 c0 b0 a0 90 80 punpckhdq xmm6, xmm7 ; f3 e3 d3 c3 b3 a3 93 83 f2 e2 d2 c2 b2 a2 92 82 movdqa xmm1, xmm0 movdqa xmm3, xmm2 punpcklqdq xmm0, xmm4 ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 punpckhqdq xmm1, xmm4 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 punpcklqdq xmm2, xmm6 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 punpckhqdq xmm3, xmm6 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 mov rdx, arg(2) ;blimit ; calculate mask movdqa xmm6, xmm0 ; p1 movdqa xmm7, xmm3 ; q1 psubusb xmm7, xmm0 ; q1-=p1 psubusb xmm6, xmm3 ; p1-=q1 por xmm6, xmm7 ; abs(p1-q1) pand xmm6, [GLOBAL(tfe)] ; set lsb of each byte to zero psrlw xmm6, 1 ; abs(p1-q1)/2 movdqa xmm7, [rdx] movdqa xmm5, xmm1 ; p0 movdqa xmm4, xmm2 ; q0 psubusb xmm5, xmm2 ; p0-=q0 psubusb xmm4, xmm1 ; q0-=p0 por xmm5, xmm4 ; abs(p0 - q0) paddusb xmm5, xmm5 ; abs(p0-q0)*2 paddusb xmm5, xmm6 ; abs (p0 - q0) *2 + abs(p1-q1)/2 movdqa xmm4, [GLOBAL(t80)] psubusb xmm5, xmm7 ; abs(p0 - q0) *2 + abs(p1-q1)/2 > blimit pxor xmm7, xmm7 pcmpeqb xmm5, xmm7 ; mm5 = mask ; start work on filters movdqa t0, xmm0 movdqa t1, xmm3 pxor xmm0, xmm4 ; p1 offset to convert to signed values pxor xmm3, xmm4 ; q1 offset to convert to signed values psubsb xmm0, xmm3 ; p1 - q1 pxor xmm1, xmm4 ; offset to convert to signed values pxor xmm2, xmm4 ; offset to convert to signed values movdqa xmm3, xmm2 ; offseted ; q0 psubsb xmm2, xmm1 ; q0 - p0 paddsb xmm0, xmm2 ; p1 - q1 + 1 * (q0 - p0) paddsb xmm0, xmm2 ; p1 - q1 + 2 * (q0 - p0) paddsb xmm0, xmm2 ; p1 - q1 + 3 * (q0 - p0) pand xmm5, xmm0 ; mask filter values we don't care about movdqa xmm0, xmm5 paddsb xmm5, [GLOBAL(t3)] ; 3* (q0 - p0) + (p1 - q1) + 4 paddsb xmm0, [GLOBAL(t4)] ; +3 instead of +4 movdqa xmm6, [GLOBAL(te0)] movdqa xmm2, [GLOBAL(t1f)] ; pxor xmm7, xmm7 pcmpgtb xmm7, xmm0 ;save sign pand xmm7, xmm6 ;preserve the upper 3 bits psrlw xmm0, 3 pand xmm0, xmm2 ;clear out upper 3 bits por xmm0, xmm7 ;add sign psubsb xmm3, xmm0 ; q0-= q0sz add pxor xmm7, xmm7 pcmpgtb xmm7, xmm5 ;save sign pand xmm7, xmm6 ;preserve the upper 3 bits psrlw xmm5, 3 pand xmm5, xmm2 ;clear out upper 3 bits por xmm5, xmm7 ;add sign paddsb xmm1, xmm5 ; p0+= p0 add pxor xmm3, xmm4 ; unoffset q0 pxor xmm1, xmm4 ; unoffset p0 movdqa xmm0, t0 ; p1 movdqa xmm4, t1 ; q1 ; write out order: xmm0 xmm2 xmm1 xmm3 lea rdx, [rsi + rax*4] ; transpose back to write out ; p1 f0 e0 d0 c0 b0 a0 90 80 70 60 50 40 30 20 10 00 ; p0 f1 e1 d1 c1 b1 a1 91 81 71 61 51 41 31 21 11 01 ; q0 f2 e2 d2 c2 b2 a2 92 82 72 62 52 42 32 22 12 02 ; q1 f3 e3 d3 c3 b3 a3 93 83 73 63 53 43 33 23 13 03 movdqa xmm6, xmm0 punpcklbw xmm0, xmm1 ; 71 70 61 60 51 50 41 40 31 30 21 20 11 10 01 00 punpckhbw xmm6, xmm1 ; f1 f0 e1 e0 d1 d0 c1 c0 b1 b0 a1 a0 91 90 81 80 movdqa xmm5, xmm3 punpcklbw xmm3, xmm4 ; 73 72 63 62 53 52 43 42 33 32 23 22 13 12 03 02 punpckhbw xmm5, xmm4 ; f3 f2 e3 e2 d3 d2 c3 c2 b3 b2 a3 a2 93 92 83 82 movdqa xmm2, xmm0 punpcklwd xmm0, xmm3 ; 33 32 31 30 23 22 21 20 13 12 11 10 03 02 01 00 punpckhwd xmm2, xmm3 ; 73 72 71 70 63 62 61 60 53 52 51 50 43 42 41 40 movdqa xmm3, xmm6 punpcklwd xmm6, xmm5 ; b3 b2 b1 b0 a3 a2 a1 a0 93 92 91 90 83 82 81 80 punpckhwd xmm3, xmm5 ; f3 f2 f1 f0 e3 e2 e1 e0 d3 d2 d1 d0 c3 c2 c1 c0 movd [rsi], xmm6 ; write the second 8-line result movd [rdx], xmm3 psrldq xmm6, 4 psrldq xmm3, 4 movd [rdi], xmm6 movd [rcx], xmm3 psrldq xmm6, 4 psrldq xmm3, 4 movd [rsi + rax*2], xmm6 movd [rdx + rax*2], xmm3 psrldq xmm6, 4 psrldq xmm3, 4 movd [rdi + rax*2], xmm6 movd [rcx + rax*2], xmm3 neg rax lea rsi, [rsi + rax*8] neg rax lea rdi, [rsi + rax] lea rdx, [rsi + rax*4] lea rcx, [rdx + rax] movd [rsi], xmm0 ; write the first 8-line result movd [rdx], xmm2 psrldq xmm0, 4 psrldq xmm2, 4 movd [rdi], xmm0 movd [rcx], xmm2 psrldq xmm0, 4 psrldq xmm2, 4 movd [rsi + rax*2], xmm0 movd [rdx + rax*2], xmm2 psrldq xmm0, 4 psrldq xmm2, 4 movd [rdi + rax*2], xmm0 movd [rcx + rax*2], xmm2 add rsp, 32 pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 tfe: times 16 db 0xfe align 16 t80: times 16 db 0x80 align 16 t1s: times 16 db 0x01 align 16 t3: times 16 db 0x03 align 16 t4: times 16 db 0x04 align 16 ones: times 8 dw 0x0001 align 16 s9: times 8 dw 0x0900 align 16 s63: times 8 dw 0x003f align 16 te0: times 16 db 0xe0 align 16 t1f: times 16 db 0x1f libvpx-1.8.2/vp8/common/x86/loopfilter_x86.c000066400000000000000000000131371357355204000205210ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8/common/loopfilter.h" #define prototype_loopfilter(sym) \ void sym(unsigned char *src, int pitch, const unsigned char *blimit, \ const unsigned char *limit, const unsigned char *thresh, int count) #define prototype_loopfilter_nc(sym) \ void sym(unsigned char *src, int pitch, const unsigned char *blimit, \ const unsigned char *limit, const unsigned char *thresh) #define prototype_simple_loopfilter(sym) \ void sym(unsigned char *y, int ystride, const unsigned char *blimit) #if HAVE_SSE2 && VPX_ARCH_X86_64 prototype_loopfilter(vp8_loop_filter_bv_y_sse2); prototype_loopfilter(vp8_loop_filter_bh_y_sse2); #else prototype_loopfilter_nc(vp8_loop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_loop_filter_horizontal_edge_sse2); #endif prototype_loopfilter_nc(vp8_mbloop_filter_vertical_edge_sse2); prototype_loopfilter_nc(vp8_mbloop_filter_horizontal_edge_sse2); extern loop_filter_uvfunction vp8_loop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_loop_filter_vertical_edge_uv_sse2; extern loop_filter_uvfunction vp8_mbloop_filter_horizontal_edge_uv_sse2; extern loop_filter_uvfunction vp8_mbloop_filter_vertical_edge_uv_sse2; /* Horizontal MB filtering */ #if HAVE_SSE2 void vp8_loop_filter_mbh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_mbloop_filter_horizontal_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); if (u_ptr) { vp8_mbloop_filter_horizontal_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } } /* Vertical MB Filtering */ void vp8_loop_filter_mbv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { vp8_mbloop_filter_vertical_edge_sse2(y_ptr, y_stride, lfi->mblim, lfi->lim, lfi->hev_thr); if (u_ptr) { vp8_mbloop_filter_vertical_edge_uv_sse2(u_ptr, uv_stride, lfi->mblim, lfi->lim, lfi->hev_thr, v_ptr); } } /* Horizontal B Filtering */ void vp8_loop_filter_bh_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { #if VPX_ARCH_X86_64 vp8_loop_filter_bh_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); #else vp8_loop_filter_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); vp8_loop_filter_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); #endif if (u_ptr) { vp8_loop_filter_horizontal_edge_uv_sse2(u_ptr + 4 * uv_stride, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4 * uv_stride); } } void vp8_loop_filter_bhs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 4 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 8 * y_stride, y_stride, blimit); vp8_loop_filter_simple_horizontal_edge_sse2(y_ptr + 12 * y_stride, y_stride, blimit); } /* Vertical B Filtering */ void vp8_loop_filter_bv_sse2(unsigned char *y_ptr, unsigned char *u_ptr, unsigned char *v_ptr, int y_stride, int uv_stride, loop_filter_info *lfi) { #if VPX_ARCH_X86_64 vp8_loop_filter_bv_y_sse2(y_ptr, y_stride, lfi->blim, lfi->lim, lfi->hev_thr, 2); #else vp8_loop_filter_vertical_edge_sse2(y_ptr + 4, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); vp8_loop_filter_vertical_edge_sse2(y_ptr + 8, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); vp8_loop_filter_vertical_edge_sse2(y_ptr + 12, y_stride, lfi->blim, lfi->lim, lfi->hev_thr); #endif if (u_ptr) { vp8_loop_filter_vertical_edge_uv_sse2(u_ptr + 4, uv_stride, lfi->blim, lfi->lim, lfi->hev_thr, v_ptr + 4); } } void vp8_loop_filter_bvs_sse2(unsigned char *y_ptr, int y_stride, const unsigned char *blimit) { vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 4, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 8, y_stride, blimit); vp8_loop_filter_simple_vertical_edge_sse2(y_ptr + 12, y_stride, blimit); } #endif libvpx-1.8.2/vp8/common/x86/mfqe_sse2.asm000066400000000000000000000164171357355204000200630ustar00rootroot00000000000000; ; Copyright (c) 2012 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp8_filter_by_weight16x16_sse2 ;( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride, ; int src_weight ;) global sym(vp8_filter_by_weight16x16_sse2) PRIVATE sym(vp8_filter_by_weight16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 6 GET_GOT rbx push rsi push rdi ; end prolog movd xmm0, arg(4) ; src_weight pshuflw xmm0, xmm0, 0x0 ; replicate to all low words punpcklqdq xmm0, xmm0 ; replicate to all hi words movdqa xmm1, [GLOBAL(tMFQE)] psubw xmm1, xmm0 ; dst_weight mov rax, arg(0) ; src mov rsi, arg(1) ; src_stride mov rdx, arg(2) ; dst mov rdi, arg(3) ; dst_stride mov rcx, 16 ; loop count pxor xmm6, xmm6 .combine: movdqa xmm2, [rax] movdqa xmm4, [rdx] add rax, rsi ; src * src_weight movdqa xmm3, xmm2 punpcklbw xmm2, xmm6 punpckhbw xmm3, xmm6 pmullw xmm2, xmm0 pmullw xmm3, xmm0 ; dst * dst_weight movdqa xmm5, xmm4 punpcklbw xmm4, xmm6 punpckhbw xmm5, xmm6 pmullw xmm4, xmm1 pmullw xmm5, xmm1 ; sum, round and shift paddw xmm2, xmm4 paddw xmm3, xmm5 paddw xmm2, [GLOBAL(tMFQE_round)] paddw xmm3, [GLOBAL(tMFQE_round)] psrlw xmm2, 4 psrlw xmm3, 4 packuswb xmm2, xmm3 movdqa [rdx], xmm2 add rdx, rdi dec rcx jnz .combine ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_by_weight8x8_sse2 ;( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride, ; int src_weight ;) global sym(vp8_filter_by_weight8x8_sse2) PRIVATE sym(vp8_filter_by_weight8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi ; end prolog movd xmm0, arg(4) ; src_weight pshuflw xmm0, xmm0, 0x0 ; replicate to all low words punpcklqdq xmm0, xmm0 ; replicate to all hi words movdqa xmm1, [GLOBAL(tMFQE)] psubw xmm1, xmm0 ; dst_weight mov rax, arg(0) ; src mov rsi, arg(1) ; src_stride mov rdx, arg(2) ; dst mov rdi, arg(3) ; dst_stride mov rcx, 8 ; loop count pxor xmm4, xmm4 .combine: movq xmm2, [rax] movq xmm3, [rdx] add rax, rsi ; src * src_weight punpcklbw xmm2, xmm4 pmullw xmm2, xmm0 ; dst * dst_weight punpcklbw xmm3, xmm4 pmullw xmm3, xmm1 ; sum, round and shift paddw xmm2, xmm3 paddw xmm2, [GLOBAL(tMFQE_round)] psrlw xmm2, 4 packuswb xmm2, xmm4 movq [rdx], xmm2 add rdx, rdi dec rcx jnz .combine ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_variance_and_sad_16x16_sse2 | arg ;( ; unsigned char *src1, 0 ; int stride1, 1 ; unsigned char *src2, 2 ; int stride2, 3 ; unsigned int *variance, 4 ; unsigned int *sad, 5 ;) global sym(vp8_variance_and_sad_16x16_sse2) PRIVATE sym(vp8_variance_and_sad_16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 GET_GOT rbx push rsi push rdi ; end prolog mov rax, arg(0) ; src1 mov rcx, arg(1) ; stride1 mov rdx, arg(2) ; src2 mov rdi, arg(3) ; stride2 mov rsi, 16 ; block height ; Prep accumulator registers pxor xmm3, xmm3 ; SAD pxor xmm4, xmm4 ; sum of src2 pxor xmm5, xmm5 ; sum of src2^2 ; Because we're working with the actual output frames ; we can't depend on any kind of data alignment. .accumulate: movdqa xmm0, [rax] ; src1 movdqa xmm1, [rdx] ; src2 add rax, rcx ; src1 + stride1 add rdx, rdi ; src2 + stride2 ; SAD(src1, src2) psadbw xmm0, xmm1 paddusw xmm3, xmm0 ; SUM(src2) pxor xmm2, xmm2 psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0 paddusw xmm4, xmm2 ; pmaddubsw would be ideal if it took two unsigned values. instead, ; it expects a signed and an unsigned value. so instead we zero extend ; and operate on words. pxor xmm2, xmm2 movdqa xmm0, xmm1 punpcklbw xmm0, xmm2 punpckhbw xmm1, xmm2 pmaddwd xmm0, xmm0 pmaddwd xmm1, xmm1 paddd xmm5, xmm0 paddd xmm5, xmm1 sub rsi, 1 jnz .accumulate ; phaddd only operates on adjacent double words. ; Finalize SAD and store movdqa xmm0, xmm3 psrldq xmm0, 8 paddusw xmm0, xmm3 paddd xmm0, [GLOBAL(t128)] psrld xmm0, 8 mov rax, arg(5) movd [rax], xmm0 ; Accumulate sum of src2 movdqa xmm0, xmm4 psrldq xmm0, 8 paddusw xmm0, xmm4 ; Square src2. Ignore high value pmuludq xmm0, xmm0 psrld xmm0, 8 ; phaddw could be used to sum adjacent values but we want ; all the values summed. promote to doubles, accumulate, ; shift and sum pxor xmm2, xmm2 movdqa xmm1, xmm5 punpckldq xmm1, xmm2 punpckhdq xmm5, xmm2 paddd xmm1, xmm5 movdqa xmm2, xmm1 psrldq xmm1, 8 paddd xmm1, xmm2 psubd xmm1, xmm0 ; (variance + 128) >> 8 paddd xmm1, [GLOBAL(t128)] psrld xmm1, 8 mov rax, arg(4) movd [rax], xmm1 ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 t128: %ifndef __NASM_VER__ ddq 128 %elif CONFIG_BIG_ENDIAN dq 0, 128 %else dq 128, 0 %endif align 16 tMFQE: ; 1 << MFQE_PRECISION times 8 dw 0x10 align 16 tMFQE_round: ; 1 << (MFQE_PRECISION - 1) times 8 dw 0x08 libvpx-1.8.2/vp8/common/x86/recon_mmx.asm000066400000000000000000000057051357355204000201640ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void copy_mem8x8_mmx( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride ; ) global sym(vp8_copy_mem8x8_mmx) PRIVATE sym(vp8_copy_mem8x8_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 push rsi push rdi ; end prolog mov rsi, arg(0) ;src; movq mm0, [rsi] movsxd rax, dword ptr arg(1) ;src_stride; mov rdi, arg(2) ;dst; movq mm1, [rsi+rax] movq mm2, [rsi+rax*2] movsxd rcx, dword ptr arg(3) ;dst_stride lea rsi, [rsi+rax*2] movq [rdi], mm0 add rsi, rax movq [rdi+rcx], mm1 movq [rdi+rcx*2], mm2 lea rdi, [rdi+rcx*2] movq mm3, [rsi] add rdi, rcx movq mm4, [rsi+rax] movq mm5, [rsi+rax*2] movq [rdi], mm3 lea rsi, [rsi+rax*2] movq [rdi+rcx], mm4 movq [rdi+rcx*2], mm5 lea rdi, [rdi+rcx*2] movq mm0, [rsi+rax] movq mm1, [rsi+rax*2] movq [rdi+rcx], mm0 movq [rdi+rcx*2],mm1 ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void copy_mem8x4_mmx( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride ; ) global sym(vp8_copy_mem8x4_mmx) PRIVATE sym(vp8_copy_mem8x4_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 push rsi push rdi ; end prolog mov rsi, arg(0) ;src; movq mm0, [rsi] movsxd rax, dword ptr arg(1) ;src_stride; mov rdi, arg(2) ;dst; movq mm1, [rsi+rax] movq mm2, [rsi+rax*2] movsxd rcx, dword ptr arg(3) ;dst_stride lea rsi, [rsi+rax*2] movq [rdi], mm0 movq [rdi+rcx], mm1 movq [rdi+rcx*2], mm2 lea rdi, [rdi+rcx*2] movq mm3, [rsi+rax] movq [rdi+rcx], mm3 ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vp8/common/x86/recon_sse2.asm000066400000000000000000000060261357355204000202340ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void copy_mem16x16_sse2( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride ; ) global sym(vp8_copy_mem16x16_sse2) PRIVATE sym(vp8_copy_mem16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 4 push rsi push rdi ; end prolog mov rsi, arg(0) ;src; movdqu xmm0, [rsi] movsxd rax, dword ptr arg(1) ;src_stride; mov rdi, arg(2) ;dst; movdqu xmm1, [rsi+rax] movdqu xmm2, [rsi+rax*2] movsxd rcx, dword ptr arg(3) ;dst_stride lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 add rsi, rax movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2],xmm2 lea rdi, [rdi+rcx*2] movdqu xmm3, [rsi] add rdi, rcx movdqu xmm4, [rsi+rax] movdqu xmm5, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm3 add rsi, rax movdqa [rdi+rcx], xmm4 movdqa [rdi+rcx*2],xmm5 lea rdi, [rdi+rcx*2] movdqu xmm0, [rsi] add rdi, rcx movdqu xmm1, [rsi+rax] movdqu xmm2, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 add rsi, rax movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2], xmm2 movdqu xmm3, [rsi] movdqu xmm4, [rsi+rax] lea rdi, [rdi+rcx*2] add rdi, rcx movdqu xmm5, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm3 add rsi, rax movdqa [rdi+rcx], xmm4 movdqa [rdi+rcx*2],xmm5 movdqu xmm0, [rsi] lea rdi, [rdi+rcx*2] movdqu xmm1, [rsi+rax] add rdi, rcx movdqu xmm2, [rsi+rax*2] lea rsi, [rsi+rax*2] movdqa [rdi], xmm0 movdqa [rdi+rcx], xmm1 movdqa [rdi+rcx*2],xmm2 movdqu xmm3, [rsi+rax] lea rdi, [rdi+rcx*2] movdqa [rdi+rcx], xmm3 ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vp8/common/x86/subpixel_mmx.asm000066400000000000000000000202121357355204000206770ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %define BLOCK_HEIGHT_WIDTH 4 %define vp8_filter_weight 128 %define VP8_FILTER_SHIFT 7 SECTION .text ;void vp8_filter_block1d_h6_mmx ;( ; unsigned char *src_ptr, ; unsigned short *output_ptr, ; unsigned int src_pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; short * vp8_filter ;) global sym(vp8_filter_block1d_h6_mmx) PRIVATE sym(vp8_filter_block1d_h6_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 GET_GOT rbx push rsi push rdi ; end prolog mov rdx, arg(6) ;vp8_filter movq mm1, [rdx + 16] ; do both the negative taps first!!! movq mm2, [rdx + 32] ; movq mm6, [rdx + 48] ; movq mm7, [rdx + 64] ; mov rdi, arg(1) ;output_ptr mov rsi, arg(0) ;src_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rax, dword ptr arg(5) ;output_width ; destination pitch? pxor mm0, mm0 ; mm0 = 00000000 .nextrow: movq mm3, [rsi-2] ; mm3 = p-2..p5 movq mm4, mm3 ; mm4 = p-2..p5 psrlq mm3, 8 ; mm3 = p-1..p5 punpcklbw mm3, mm0 ; mm3 = p-1..p2 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. movq mm5, mm4 ; mm5 = p-2..p5 punpckhbw mm4, mm0 ; mm5 = p2..p5 pmullw mm4, mm7 ; mm5 *= kernel 4 modifiers paddsw mm3, mm4 ; mm3 += mm5 movq mm4, mm5 ; mm4 = p-2..p5; psrlq mm5, 16 ; mm5 = p0..p5; punpcklbw mm5, mm0 ; mm5 = p0..p3 pmullw mm5, mm2 ; mm5 *= kernel 2 modifiers paddsw mm3, mm5 ; mm3 += mm5 movq mm5, mm4 ; mm5 = p-2..p5 psrlq mm4, 24 ; mm4 = p1..p5 punpcklbw mm4, mm0 ; mm4 = p1..p4 pmullw mm4, mm6 ; mm5 *= kernel 3 modifiers paddsw mm3, mm4 ; mm3 += mm5 ; do outer positive taps movd mm4, [rsi+3] punpcklbw mm4, mm0 ; mm5 = p3..p6 pmullw mm4, [rdx+80] ; mm5 *= kernel 0 modifiers paddsw mm3, mm4 ; mm3 += mm5 punpcklbw mm5, mm0 ; mm5 = p-2..p1 pmullw mm5, [rdx] ; mm5 *= kernel 5 modifiers paddsw mm3, mm5 ; mm3 += mm5 paddsw mm3, [GLOBAL(rd)] ; mm3 += round value psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 packuswb mm3, mm0 ; pack and unpack to saturate punpcklbw mm3, mm0 ; movq [rdi], mm3 ; store the results in the destination %if ABI_IS_32BIT add rsi, dword ptr arg(2) ;src_pixels_per_line ; next line add rdi, rax; %else movsxd r8, dword ptr arg(2) ;src_pixels_per_line add rdi, rax; add rsi, r8 ; next line %endif dec rcx ; decrement count jnz .nextrow ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1dc_v6_mmx ;( ; short *src_ptr, ; unsigned char *output_ptr, ; int output_pitch, ; unsigned int pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; short * vp8_filter ;) global sym(vp8_filter_block1dc_v6_mmx) PRIVATE sym(vp8_filter_block1dc_v6_mmx): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 GET_GOT rbx push rsi push rdi ; end prolog movq mm5, [GLOBAL(rd)] push rbx mov rbx, arg(7) ;vp8_filter movq mm1, [rbx + 16] ; do both the negative taps first!!! movq mm2, [rbx + 32] ; movq mm6, [rbx + 48] ; movq mm7, [rbx + 64] ; movsxd rdx, dword ptr arg(3) ;pixels_per_line mov rdi, arg(1) ;output_ptr mov rsi, arg(0) ;src_ptr sub rsi, rdx sub rsi, rdx movsxd rcx, DWORD PTR arg(5) ;output_height movsxd rax, DWORD PTR arg(2) ;output_pitch ; destination pitch? pxor mm0, mm0 ; mm0 = 00000000 .nextrow_cv: movq mm3, [rsi+rdx] ; mm3 = p0..p8 = row -1 pmullw mm3, mm1 ; mm3 *= kernel 1 modifiers. movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 2 pmullw mm4, mm7 ; mm4 *= kernel 4 modifiers. paddsw mm3, mm4 ; mm3 += mm4 movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 0 pmullw mm4, mm2 ; mm4 *= kernel 2 modifiers. paddsw mm3, mm4 ; mm3 += mm4 movq mm4, [rsi] ; mm4 = p0..p3 = row -2 pmullw mm4, [rbx] ; mm4 *= kernel 0 modifiers. paddsw mm3, mm4 ; mm3 += mm4 add rsi, rdx ; move source forward 1 line to avoid 3 * pitch movq mm4, [rsi + 2*rdx] ; mm4 = p0..p3 = row 1 pmullw mm4, mm6 ; mm4 *= kernel 3 modifiers. paddsw mm3, mm4 ; mm3 += mm4 movq mm4, [rsi + 4*rdx] ; mm4 = p0..p3 = row 3 pmullw mm4, [rbx +80] ; mm4 *= kernel 3 modifiers. paddsw mm3, mm4 ; mm3 += mm4 paddsw mm3, mm5 ; mm3 += round value psraw mm3, VP8_FILTER_SHIFT ; mm3 /= 128 packuswb mm3, mm0 ; pack and saturate movd [rdi],mm3 ; store the results in the destination ; the subsequent iterations repeat 3 out of 4 of these reads. Since the ; recon block should be in cache this shouldn't cost much. Its obviously ; avoidable!!!. lea rdi, [rdi+rax] ; dec rcx ; decrement count jnz .nextrow_cv ; next row pop rbx ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 rd: times 4 dw 0x40 align 16 global HIDDEN_DATA(sym(vp8_six_tap_x86)) sym(vp8_six_tap_x86): times 8 dw 0 times 8 dw 0 times 8 dw 128 times 8 dw 0 times 8 dw 0 times 8 dw 0 times 8 dw 0 times 8 dw -6 times 8 dw 123 times 8 dw 12 times 8 dw -1 times 8 dw 0 times 8 dw 2 times 8 dw -11 times 8 dw 108 times 8 dw 36 times 8 dw -8 times 8 dw 1 times 8 dw 0 times 8 dw -9 times 8 dw 93 times 8 dw 50 times 8 dw -6 times 8 dw 0 times 8 dw 3 times 8 dw -16 times 8 dw 77 times 8 dw 77 times 8 dw -16 times 8 dw 3 times 8 dw 0 times 8 dw -6 times 8 dw 50 times 8 dw 93 times 8 dw -9 times 8 dw 0 times 8 dw 1 times 8 dw -8 times 8 dw 36 times 8 dw 108 times 8 dw -11 times 8 dw 2 times 8 dw 0 times 8 dw -1 times 8 dw 12 times 8 dw 123 times 8 dw -6 times 8 dw 0 libvpx-1.8.2/vp8/common/x86/subpixel_sse2.asm000066400000000000000000001015111357355204000207540ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %define BLOCK_HEIGHT_WIDTH 4 %define VP8_FILTER_WEIGHT 128 %define VP8_FILTER_SHIFT 7 SECTION .text ;/************************************************************************************ ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The ; input pixel array has output_height rows. This routine assumes that output_height is an ; even number. This function handles 8 pixels in horizontal direction, calculating ONE ; rows each iteration to take advantage of the 128 bits operations. ;*************************************************************************************/ ;void vp8_filter_block1d8_h6_sse2 ;( ; unsigned char *src_ptr, ; unsigned short *output_ptr, ; unsigned int src_pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; short *vp8_filter ;) global sym(vp8_filter_block1d8_h6_sse2) PRIVATE sym(vp8_filter_block1d8_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rdx, arg(6) ;vp8_filter mov rsi, arg(0) ;src_ptr mov rdi, arg(1) ;output_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(5) ;output_width %endif pxor xmm0, xmm0 ; clear xmm0 for unpack .filter_block1d8_h6_rowloop: movq xmm3, MMWORD PTR [rsi - 2] movq xmm1, MMWORD PTR [rsi + 6] prefetcht2 [rsi+rax-2] pslldq xmm1, 8 por xmm1, xmm3 movdqa xmm4, xmm1 movdqa xmm5, xmm1 movdqa xmm6, xmm1 movdqa xmm7, xmm1 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm1 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 punpcklbw xmm4, xmm0 movdqa XMMWORD Ptr [rdi], xmm4 lea rsi, [rsi + rax] %if ABI_IS_32BIT add rdi, DWORD Ptr arg(5) ;[output_width] %else add rdi, r8 %endif dec rcx jnz .filter_block1d8_h6_rowloop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d16_h6_sse2 ;( ; unsigned char *src_ptr, ; unsigned short *output_ptr, ; unsigned int src_pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; short *vp8_filter ;) ;/************************************************************************************ ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The ; input pixel array has output_height rows. This routine assumes that output_height is an ; even number. This function handles 8 pixels in horizontal direction, calculating ONE ; rows each iteration to take advantage of the 128 bits operations. ;*************************************************************************************/ global sym(vp8_filter_block1d16_h6_sse2) PRIVATE sym(vp8_filter_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rdx, arg(6) ;vp8_filter mov rsi, arg(0) ;src_ptr mov rdi, arg(1) ;output_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(5) ;output_width %endif pxor xmm0, xmm0 ; clear xmm0 for unpack .filter_block1d16_h6_sse2_rowloop: movq xmm3, MMWORD PTR [rsi - 2] movq xmm1, MMWORD PTR [rsi + 6] ; Load from 11 to avoid reading out of bounds. movq xmm2, MMWORD PTR [rsi +11] ; The lower bits are not cleared before 'or'ing with xmm1, ; but that is OK because the values in the overlapping positions ; are already equal to the ones in xmm1. pslldq xmm2, 5 por xmm2, xmm1 prefetcht2 [rsi+rax-2] pslldq xmm1, 8 por xmm1, xmm3 movdqa xmm4, xmm1 movdqa xmm5, xmm1 movdqa xmm6, xmm1 movdqa xmm7, xmm1 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm1 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 punpcklbw xmm4, xmm0 movdqa XMMWORD Ptr [rdi], xmm4 movdqa xmm3, xmm2 movdqa xmm4, xmm2 movdqa xmm5, xmm2 movdqa xmm6, xmm2 movdqa xmm7, xmm2 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm2 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 punpcklbw xmm4, xmm0 movdqa XMMWORD Ptr [rdi+16], xmm4 lea rsi, [rsi + rax] %if ABI_IS_32BIT add rdi, DWORD Ptr arg(5) ;[output_width] %else add rdi, r8 %endif dec rcx jnz .filter_block1d16_h6_sse2_rowloop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d8_v6_sse2 ;( ; short *src_ptr, ; unsigned char *output_ptr, ; int dst_ptich, ; unsigned int pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; short * vp8_filter ;) ;/************************************************************************************ ; Notes: filter_block1d8_v6 applies a 6 tap filter vertically to the input pixels. The ; input pixel array has output_height rows. ;*************************************************************************************/ global sym(vp8_filter_block1d8_v6_sse2) PRIVATE sym(vp8_filter_block1d8_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rax, arg(7) ;vp8_filter movsxd rdx, dword ptr arg(3) ;pixels_per_line mov rdi, arg(1) ;output_ptr mov rsi, arg(0) ;src_ptr sub rsi, rdx sub rsi, rdx movsxd rcx, DWORD PTR arg(5) ;[output_height] pxor xmm0, xmm0 ; clear xmm0 movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(2) ; dst_ptich %endif .vp8_filter_block1d8_v6_sse2_loop: movdqa xmm1, XMMWORD PTR [rsi] pmullw xmm1, [rax] movdqa xmm2, XMMWORD PTR [rsi + rdx] pmullw xmm2, [rax + 16] movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] pmullw xmm3, [rax + 32] movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] pmullw xmm5, [rax + 64] add rsi, rdx movdqa xmm4, XMMWORD PTR [rsi + rdx * 2] pmullw xmm4, [rax + 48] movdqa xmm6, XMMWORD PTR [rsi + rdx * 4] pmullw xmm6, [rax + 80] paddsw xmm2, xmm5 paddsw xmm2, xmm3 paddsw xmm2, xmm1 paddsw xmm2, xmm4 paddsw xmm2, xmm6 paddsw xmm2, xmm7 psraw xmm2, 7 packuswb xmm2, xmm0 ; pack and saturate movq QWORD PTR [rdi], xmm2 ; store the results in the destination %if ABI_IS_32BIT add rdi, DWORD PTR arg(2) ;[dst_ptich] %else add rdi, r8 %endif dec rcx ; decrement count jnz .vp8_filter_block1d8_v6_sse2_loop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d16_v6_sse2 ;( ; unsigned short *src_ptr, ; unsigned char *output_ptr, ; int dst_ptich, ; unsigned int pixels_per_line, ; unsigned int pixel_step, ; unsigned int output_height, ; unsigned int output_width, ; const short *vp8_filter ;) ;/************************************************************************************ ; Notes: filter_block1d16_v6 applies a 6 tap filter vertically to the input pixels. The ; input pixel array has output_height rows. ;*************************************************************************************/ global sym(vp8_filter_block1d16_v6_sse2) PRIVATE sym(vp8_filter_block1d16_v6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rax, arg(7) ;vp8_filter movsxd rdx, dword ptr arg(3) ;pixels_per_line mov rdi, arg(1) ;output_ptr mov rsi, arg(0) ;src_ptr sub rsi, rdx sub rsi, rdx movsxd rcx, DWORD PTR arg(5) ;[output_height] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(2) ; dst_ptich %endif .vp8_filter_block1d16_v6_sse2_loop: ; The order for adding 6-tap is 2 5 3 1 4 6. Read in data in that order. movdqa xmm1, XMMWORD PTR [rsi + rdx] ; line 2 movdqa xmm2, XMMWORD PTR [rsi + rdx + 16] pmullw xmm1, [rax + 16] pmullw xmm2, [rax + 16] movdqa xmm3, XMMWORD PTR [rsi + rdx * 4] ; line 5 movdqa xmm4, XMMWORD PTR [rsi + rdx * 4 + 16] pmullw xmm3, [rax + 64] pmullw xmm4, [rax + 64] movdqa xmm5, XMMWORD PTR [rsi + rdx * 2] ; line 3 movdqa xmm6, XMMWORD PTR [rsi + rdx * 2 + 16] pmullw xmm5, [rax + 32] pmullw xmm6, [rax + 32] movdqa xmm7, XMMWORD PTR [rsi] ; line 1 movdqa xmm0, XMMWORD PTR [rsi + 16] pmullw xmm7, [rax] pmullw xmm0, [rax] paddsw xmm1, xmm3 paddsw xmm2, xmm4 paddsw xmm1, xmm5 paddsw xmm2, xmm6 paddsw xmm1, xmm7 paddsw xmm2, xmm0 add rsi, rdx movdqa xmm3, XMMWORD PTR [rsi + rdx * 2] ; line 4 movdqa xmm4, XMMWORD PTR [rsi + rdx * 2 + 16] pmullw xmm3, [rax + 48] pmullw xmm4, [rax + 48] movdqa xmm5, XMMWORD PTR [rsi + rdx * 4] ; line 6 movdqa xmm6, XMMWORD PTR [rsi + rdx * 4 + 16] pmullw xmm5, [rax + 80] pmullw xmm6, [rax + 80] movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] pxor xmm0, xmm0 ; clear xmm0 paddsw xmm1, xmm3 paddsw xmm2, xmm4 paddsw xmm1, xmm5 paddsw xmm2, xmm6 paddsw xmm1, xmm7 paddsw xmm2, xmm7 psraw xmm1, 7 psraw xmm2, 7 packuswb xmm1, xmm2 ; pack and saturate movdqa XMMWORD PTR [rdi], xmm1 ; store the results in the destination %if ABI_IS_32BIT add rdi, DWORD PTR arg(2) ;[dst_ptich] %else add rdi, r8 %endif dec rcx ; decrement count jnz .vp8_filter_block1d16_v6_sse2_loop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d8_h6_only_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; int dst_ptich, ; unsigned int output_height, ; const short *vp8_filter ;) ; First-pass filter only when yoffset==0 global sym(vp8_filter_block1d8_h6_only_sse2) PRIVATE sym(vp8_filter_block1d8_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rdx, arg(5) ;vp8_filter mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(3) ;dst_ptich %endif pxor xmm0, xmm0 ; clear xmm0 for unpack .filter_block1d8_h6_only_rowloop: movq xmm3, MMWORD PTR [rsi - 2] movq xmm1, MMWORD PTR [rsi + 6] prefetcht2 [rsi+rax-2] pslldq xmm1, 8 por xmm1, xmm3 movdqa xmm4, xmm1 movdqa xmm5, xmm1 movdqa xmm6, xmm1 movdqa xmm7, xmm1 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm1 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 movq QWORD PTR [rdi], xmm4 ; store the results in the destination lea rsi, [rsi + rax] %if ABI_IS_32BIT add rdi, DWORD Ptr arg(3) ;dst_ptich %else add rdi, r8 %endif dec rcx jnz .filter_block1d8_h6_only_rowloop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d16_h6_only_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; int dst_ptich, ; unsigned int output_height, ; const short *vp8_filter ;) ; First-pass filter only when yoffset==0 global sym(vp8_filter_block1d16_h6_only_sse2) PRIVATE sym(vp8_filter_block1d16_h6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rdx, arg(5) ;vp8_filter mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rax, dword ptr arg(1) ;src_pixels_per_line ; Pitch for Source %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(3) ;dst_ptich %endif pxor xmm0, xmm0 ; clear xmm0 for unpack .filter_block1d16_h6_only_sse2_rowloop: movq xmm3, MMWORD PTR [rsi - 2] movq xmm1, MMWORD PTR [rsi + 6] movq xmm2, MMWORD PTR [rsi +14] pslldq xmm2, 8 por xmm2, xmm1 prefetcht2 [rsi+rax-2] pslldq xmm1, 8 por xmm1, xmm3 movdqa xmm4, xmm1 movdqa xmm5, xmm1 movdqa xmm6, xmm1 movdqa xmm7, xmm1 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm1, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm1, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm1, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm1 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 ; lower 8 bytes movq QWORD Ptr [rdi], xmm4 ; store the results in the destination movdqa xmm3, xmm2 movdqa xmm4, xmm2 movdqa xmm5, xmm2 movdqa xmm6, xmm2 movdqa xmm7, xmm2 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 psrldq xmm4, 1 ; xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 pmullw xmm3, XMMWORD PTR [rdx] ; x[-2] * H[-2]; Tap 1 punpcklbw xmm4, xmm0 ; xx06 xx05 xx04 xx03 xx02 xx01 xx00 xx-1 psrldq xmm5, 2 ; xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 pmullw xmm4, XMMWORD PTR [rdx+16] ; x[-1] * H[-1]; Tap 2 punpcklbw xmm5, xmm0 ; xx07 xx06 xx05 xx04 xx03 xx02 xx01 xx00 psrldq xmm6, 3 ; xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 pmullw xmm5, [rdx+32] ; x[ 0] * H[ 0]; Tap 3 punpcklbw xmm6, xmm0 ; xx08 xx07 xx06 xx05 xx04 xx03 xx02 xx01 psrldq xmm7, 4 ; xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 02 pmullw xmm6, [rdx+48] ; x[ 1] * h[ 1] ; Tap 4 punpcklbw xmm7, xmm0 ; xx09 xx08 xx07 xx06 xx05 xx04 xx03 xx02 psrldq xmm2, 5 ; xx xx xx xx xx 0d 0c 0b 0a 09 08 07 06 05 04 03 pmullw xmm7, [rdx+64] ; x[ 2] * h[ 2] ; Tap 5 punpcklbw xmm2, xmm0 ; xx0a xx09 xx08 xx07 xx06 xx05 xx04 xx03 pmullw xmm2, [rdx+80] ; x[ 3] * h[ 3] ; Tap 6 paddsw xmm4, xmm7 paddsw xmm4, xmm5 paddsw xmm4, xmm3 paddsw xmm4, xmm6 paddsw xmm4, xmm2 paddsw xmm4, [GLOBAL(rd)] psraw xmm4, 7 packuswb xmm4, xmm0 ; higher 8 bytes movq QWORD Ptr [rdi+8], xmm4 ; store the results in the destination lea rsi, [rsi + rax] %if ABI_IS_32BIT add rdi, DWORD Ptr arg(3) ;dst_ptich %else add rdi, r8 %endif dec rcx jnz .filter_block1d16_h6_only_sse2_rowloop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d8_v6_only_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; int dst_ptich, ; unsigned int output_height, ; const short *vp8_filter ;) ; Second-pass filter only when xoffset==0 global sym(vp8_filter_block1d8_v6_only_sse2) PRIVATE sym(vp8_filter_block1d8_v6_only_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(1) ;src_pixels_per_line mov rax, arg(5) ;vp8_filter pxor xmm0, xmm0 ; clear xmm0 movdqa xmm7, XMMWORD PTR [GLOBAL(rd)] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(3) ; dst_ptich %endif .vp8_filter_block1d8_v6_only_sse2_loop: movq xmm1, MMWORD PTR [rsi] movq xmm2, MMWORD PTR [rsi + rdx] movq xmm3, MMWORD PTR [rsi + rdx * 2] movq xmm5, MMWORD PTR [rsi + rdx * 4] add rsi, rdx movq xmm4, MMWORD PTR [rsi + rdx * 2] movq xmm6, MMWORD PTR [rsi + rdx * 4] punpcklbw xmm1, xmm0 pmullw xmm1, [rax] punpcklbw xmm2, xmm0 pmullw xmm2, [rax + 16] punpcklbw xmm3, xmm0 pmullw xmm3, [rax + 32] punpcklbw xmm5, xmm0 pmullw xmm5, [rax + 64] punpcklbw xmm4, xmm0 pmullw xmm4, [rax + 48] punpcklbw xmm6, xmm0 pmullw xmm6, [rax + 80] paddsw xmm2, xmm5 paddsw xmm2, xmm3 paddsw xmm2, xmm1 paddsw xmm2, xmm4 paddsw xmm2, xmm6 paddsw xmm2, xmm7 psraw xmm2, 7 packuswb xmm2, xmm0 ; pack and saturate movq QWORD PTR [rdi], xmm2 ; store the results in the destination %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;[dst_ptich] %else add rdi, r8 %endif dec rcx ; decrement count jnz .vp8_filter_block1d8_v6_only_sse2_loop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_unpack_block1d16_h6_sse2 ;( ; unsigned char *src_ptr, ; unsigned short *output_ptr, ; unsigned int src_pixels_per_line, ; unsigned int output_height, ; unsigned int output_width ;) global sym(vp8_unpack_block1d16_h6_sse2) PRIVATE sym(vp8_unpack_block1d16_h6_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(1) ;output_ptr movsxd rcx, dword ptr arg(3) ;output_height movsxd rax, dword ptr arg(2) ;src_pixels_per_line ; Pitch for Source pxor xmm0, xmm0 ; clear xmm0 for unpack %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(4) ;output_width ; Pitch for Source %endif .unpack_block1d16_h6_sse2_rowloop: movq xmm1, MMWORD PTR [rsi] ; 0d 0c 0b 0a 09 08 07 06 05 04 03 02 01 00 -1 -2 movq xmm3, MMWORD PTR [rsi+8] ; make copy of xmm1 punpcklbw xmm3, xmm0 ; xx05 xx04 xx03 xx02 xx01 xx01 xx-1 xx-2 punpcklbw xmm1, xmm0 movdqa XMMWORD Ptr [rdi], xmm1 movdqa XMMWORD Ptr [rdi + 16], xmm3 lea rsi, [rsi + rax] %if ABI_IS_32BIT add rdi, DWORD Ptr arg(4) ;[output_width] %else add rdi, r8 %endif dec rcx jnz .unpack_block1d16_h6_sse2_rowloop ; next row ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 rd: times 8 dw 0x40 libvpx-1.8.2/vp8/common/x86/subpixel_ssse3.asm000066400000000000000000001241561357355204000211520ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %define BLOCK_HEIGHT_WIDTH 4 %define VP8_FILTER_WEIGHT 128 %define VP8_FILTER_SHIFT 7 SECTION .text ;/************************************************************************************ ; Notes: filter_block1d_h6 applies a 6 tap filter horizontally to the input pixels. The ; input pixel array has output_height rows. This routine assumes that output_height is an ; even number. This function handles 8 pixels in horizontal direction, calculating ONE ; rows each iteration to take advantage of the 128 bits operations. ; ; This is an implementation of some of the SSE optimizations first seen in ffvp8 ; ;*************************************************************************************/ ;void vp8_filter_block1d8_h6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d8_h6_ssse3) PRIVATE sym(vp8_filter_block1d8_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 movdqa xmm7, [GLOBAL(rd)] lea rax, [GLOBAL(k0_k5)] add rax, rdx mov rdi, arg(2) ;output_ptr cmp esi, DWORD PTR [rax] je vp8_filter_block1d8_h4_ssse3 movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(3) ;output_pitch sub rdi, rdx ;xmm3 free .filter_block1d8_h6_rowloop_ssse3: movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 movq xmm2, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 punpcklbw xmm0, xmm2 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 movdqa xmm1, xmm0 pmaddubsw xmm0, xmm4 movdqa xmm2, xmm1 pshufb xmm1, [GLOBAL(shuf2bfrom1)] pshufb xmm2, [GLOBAL(shuf3bfrom1)] pmaddubsw xmm1, xmm5 lea rdi, [rdi + rdx] pmaddubsw xmm2, xmm6 lea rsi, [rsi + rax] dec rcx paddsw xmm0, xmm1 paddsw xmm2, xmm7 paddsw xmm0, xmm2 psraw xmm0, 7 packuswb xmm0, xmm0 movq MMWORD Ptr [rdi], xmm0 jnz .filter_block1d8_h6_rowloop_ssse3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret vp8_filter_block1d8_h4_ssse3: movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 movdqa xmm3, XMMWORD PTR [GLOBAL(shuf2bfrom1)] movdqa xmm4, XMMWORD PTR [GLOBAL(shuf3bfrom1)] mov rsi, arg(0) ;src_ptr movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(3) ;output_pitch sub rdi, rdx .filter_block1d8_h4_rowloop_ssse3: movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 movq xmm1, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 punpcklbw xmm0, xmm1 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 movdqa xmm2, xmm0 pshufb xmm0, xmm3 pshufb xmm2, xmm4 pmaddubsw xmm0, xmm5 lea rdi, [rdi + rdx] pmaddubsw xmm2, xmm6 lea rsi, [rsi + rax] dec rcx paddsw xmm0, xmm7 paddsw xmm0, xmm2 psraw xmm0, 7 packuswb xmm0, xmm0 movq MMWORD Ptr [rdi], xmm0 jnz .filter_block1d8_h4_rowloop_ssse3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d16_h6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d16_h6_ssse3) PRIVATE sym(vp8_filter_block1d16_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 ; lea rax, [GLOBAL(k0_k5)] add rax, rdx mov rdi, arg(2) ;output_ptr mov rsi, arg(0) ;src_ptr movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(3) ;output_pitch .filter_block1d16_h6_rowloop_ssse3: movq xmm0, MMWORD PTR [rsi - 2] ; -2 -1 0 1 2 3 4 5 movq xmm3, MMWORD PTR [rsi + 3] ; 3 4 5 6 7 8 9 10 punpcklbw xmm0, xmm3 ; -2 3 -1 4 0 5 1 6 2 7 3 8 4 9 5 10 movdqa xmm1, xmm0 pmaddubsw xmm0, xmm4 movdqa xmm2, xmm1 pshufb xmm1, [GLOBAL(shuf2bfrom1)] pshufb xmm2, [GLOBAL(shuf3bfrom1)] movq xmm3, MMWORD PTR [rsi + 6] pmaddubsw xmm1, xmm5 movq xmm7, MMWORD PTR [rsi + 11] pmaddubsw xmm2, xmm6 punpcklbw xmm3, xmm7 paddsw xmm0, xmm1 movdqa xmm1, xmm3 pmaddubsw xmm3, xmm4 paddsw xmm0, xmm2 movdqa xmm2, xmm1 paddsw xmm0, [GLOBAL(rd)] pshufb xmm1, [GLOBAL(shuf2bfrom1)] pshufb xmm2, [GLOBAL(shuf3bfrom1)] psraw xmm0, 7 pmaddubsw xmm1, xmm5 pmaddubsw xmm2, xmm6 packuswb xmm0, xmm0 lea rsi, [rsi + rax] paddsw xmm3, xmm1 paddsw xmm3, xmm2 paddsw xmm3, [GLOBAL(rd)] psraw xmm3, 7 packuswb xmm3, xmm3 punpcklqdq xmm0, xmm3 movdqa XMMWORD Ptr [rdi], xmm0 lea rdi, [rdi + rdx] dec rcx jnz .filter_block1d16_h6_rowloop_ssse3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d4_h6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d4_h6_ssse3) PRIVATE sym(vp8_filter_block1d4_h6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 ; lea rax, [GLOBAL(k0_k5)] add rax, rdx movdqa xmm7, [GLOBAL(rd)] cmp esi, DWORD PTR [rax] je .vp8_filter_block1d4_h4_ssse3 movdqa xmm4, XMMWORD PTR [rax] ;k0_k5 movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(3) ;output_pitch ;xmm3 free .filter_block1d4_h6_rowloop_ssse3: movdqu xmm0, XMMWORD PTR [rsi - 2] movdqa xmm1, xmm0 pshufb xmm0, [GLOBAL(shuf1b)] movdqa xmm2, xmm1 pshufb xmm1, [GLOBAL(shuf2b)] pmaddubsw xmm0, xmm4 pshufb xmm2, [GLOBAL(shuf3b)] pmaddubsw xmm1, xmm5 ;-- pmaddubsw xmm2, xmm6 lea rsi, [rsi + rax] ;-- paddsw xmm0, xmm1 paddsw xmm0, xmm7 pxor xmm1, xmm1 paddsw xmm0, xmm2 psraw xmm0, 7 packuswb xmm0, xmm0 movd DWORD PTR [rdi], xmm0 add rdi, rdx dec rcx jnz .filter_block1d4_h6_rowloop_ssse3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret .vp8_filter_block1d4_h4_ssse3: movdqa xmm5, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm6, XMMWORD PTR [rax+128] ;k1_k3 movdqa xmm0, XMMWORD PTR [GLOBAL(shuf2b)] movdqa xmm3, XMMWORD PTR [GLOBAL(shuf3b)] mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, dword ptr arg(1) ;src_pixels_per_line movsxd rcx, dword ptr arg(4) ;output_height movsxd rdx, dword ptr arg(3) ;output_pitch .filter_block1d4_h4_rowloop_ssse3: movdqu xmm1, XMMWORD PTR [rsi - 2] movdqa xmm2, xmm1 pshufb xmm1, xmm0 ;;[GLOBAL(shuf2b)] pshufb xmm2, xmm3 ;;[GLOBAL(shuf3b)] pmaddubsw xmm1, xmm5 ;-- pmaddubsw xmm2, xmm6 lea rsi, [rsi + rax] ;-- paddsw xmm1, xmm7 paddsw xmm1, xmm2 psraw xmm1, 7 packuswb xmm1, xmm1 movd DWORD PTR [rdi], xmm1 add rdi, rdx dec rcx jnz .filter_block1d4_h4_rowloop_ssse3 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d16_v6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d16_v6_ssse3) PRIVATE sym(vp8_filter_block1d16_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 ; lea rax, [GLOBAL(k0_k5)] add rax, rdx cmp esi, DWORD PTR [rax] je .vp8_filter_block1d16_v4_ssse3 movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr movsxd rdx, DWORD PTR arg(1) ;pixels_per_line mov rdi, arg(2) ;output_ptr %if ABI_IS_32BIT=0 movsxd r8, DWORD PTR arg(3) ;out_pitch %endif mov rax, rsi movsxd rcx, DWORD PTR arg(4) ;output_height add rax, rdx .vp8_filter_block1d16_v6_ssse3_loop: movq xmm1, MMWORD PTR [rsi] ;A movq xmm2, MMWORD PTR [rsi + rdx] ;B movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C movq xmm4, MMWORD PTR [rax + rdx * 2] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E punpcklbw xmm2, xmm4 ;B D punpcklbw xmm3, xmm0 ;C E movq xmm0, MMWORD PTR [rax + rdx * 4] ;F pmaddubsw xmm3, xmm6 punpcklbw xmm1, xmm0 ;A F pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm5 paddsw xmm2, xmm3 paddsw xmm2, xmm1 paddsw xmm2, [GLOBAL(rd)] psraw xmm2, 7 packuswb xmm2, xmm2 movq MMWORD PTR [rdi], xmm2 ;store the results movq xmm1, MMWORD PTR [rsi + 8] ;A movq xmm2, MMWORD PTR [rsi + rdx + 8] ;B movq xmm3, MMWORD PTR [rsi + rdx * 2 + 8] ;C movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E punpcklbw xmm2, xmm4 ;B D punpcklbw xmm3, xmm0 ;C E movq xmm0, MMWORD PTR [rax + rdx * 4 + 8] ;F pmaddubsw xmm3, xmm6 punpcklbw xmm1, xmm0 ;A F pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm5 add rsi, rdx add rax, rdx ;-- ;-- paddsw xmm2, xmm3 paddsw xmm2, xmm1 paddsw xmm2, [GLOBAL(rd)] psraw xmm2, 7 packuswb xmm2, xmm2 movq MMWORD PTR [rdi+8], xmm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;out_pitch %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d16_v6_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret .vp8_filter_block1d16_v4_ssse3: movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr movsxd rdx, DWORD PTR arg(1) ;pixels_per_line mov rdi, arg(2) ;output_ptr %if ABI_IS_32BIT=0 movsxd r8, DWORD PTR arg(3) ;out_pitch %endif mov rax, rsi movsxd rcx, DWORD PTR arg(4) ;output_height add rax, rdx .vp8_filter_block1d16_v4_ssse3_loop: movq xmm2, MMWORD PTR [rsi + rdx] ;B movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C movq xmm4, MMWORD PTR [rax + rdx * 2] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E punpcklbw xmm2, xmm4 ;B D punpcklbw xmm3, xmm0 ;C E pmaddubsw xmm3, xmm6 pmaddubsw xmm2, xmm7 movq xmm5, MMWORD PTR [rsi + rdx + 8] ;B movq xmm1, MMWORD PTR [rsi + rdx * 2 + 8] ;C movq xmm4, MMWORD PTR [rax + rdx * 2 + 8] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4 + 8] ;E paddsw xmm2, [GLOBAL(rd)] paddsw xmm2, xmm3 psraw xmm2, 7 packuswb xmm2, xmm2 punpcklbw xmm5, xmm4 ;B D punpcklbw xmm1, xmm0 ;C E pmaddubsw xmm1, xmm6 pmaddubsw xmm5, xmm7 movdqa xmm4, [GLOBAL(rd)] add rsi, rdx add rax, rdx ;-- ;-- paddsw xmm5, xmm1 paddsw xmm5, xmm4 psraw xmm5, 7 packuswb xmm5, xmm5 punpcklqdq xmm2, xmm5 movdqa XMMWORD PTR [rdi], xmm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;out_pitch %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d16_v4_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d8_v6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d8_v6_ssse3) PRIVATE sym(vp8_filter_block1d8_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 ; lea rax, [GLOBAL(k0_k5)] add rax, rdx movsxd rdx, DWORD PTR arg(1) ;pixels_per_line mov rdi, arg(2) ;output_ptr %if ABI_IS_32BIT=0 movsxd r8, DWORD PTR arg(3) ; out_pitch %endif movsxd rcx, DWORD PTR arg(4) ;[output_height] cmp esi, DWORD PTR [rax] je .vp8_filter_block1d8_v4_ssse3 movdqa xmm5, XMMWORD PTR [rax] ;k0_k5 movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr mov rax, rsi add rax, rdx .vp8_filter_block1d8_v6_ssse3_loop: movq xmm1, MMWORD PTR [rsi] ;A movq xmm2, MMWORD PTR [rsi + rdx] ;B movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C movq xmm4, MMWORD PTR [rax + rdx * 2] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E punpcklbw xmm2, xmm4 ;B D punpcklbw xmm3, xmm0 ;C E movq xmm0, MMWORD PTR [rax + rdx * 4] ;F movdqa xmm4, [GLOBAL(rd)] pmaddubsw xmm3, xmm6 punpcklbw xmm1, xmm0 ;A F pmaddubsw xmm2, xmm7 pmaddubsw xmm1, xmm5 add rsi, rdx add rax, rdx ;-- ;-- paddsw xmm2, xmm3 paddsw xmm2, xmm1 paddsw xmm2, xmm4 psraw xmm2, 7 packuswb xmm2, xmm2 movq MMWORD PTR [rdi], xmm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;[out_pitch] %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d8_v6_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret .vp8_filter_block1d8_v4_ssse3: movdqa xmm6, XMMWORD PTR [rax+256] ;k2_k4 movdqa xmm7, XMMWORD PTR [rax+128] ;k1_k3 movdqa xmm5, [GLOBAL(rd)] mov rsi, arg(0) ;src_ptr mov rax, rsi add rax, rdx .vp8_filter_block1d8_v4_ssse3_loop: movq xmm2, MMWORD PTR [rsi + rdx] ;B movq xmm3, MMWORD PTR [rsi + rdx * 2] ;C movq xmm4, MMWORD PTR [rax + rdx * 2] ;D movq xmm0, MMWORD PTR [rsi + rdx * 4] ;E punpcklbw xmm2, xmm4 ;B D punpcklbw xmm3, xmm0 ;C E pmaddubsw xmm3, xmm6 pmaddubsw xmm2, xmm7 add rsi, rdx add rax, rdx ;-- ;-- paddsw xmm2, xmm3 paddsw xmm2, xmm5 psraw xmm2, 7 packuswb xmm2, xmm2 movq MMWORD PTR [rdi], xmm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;[out_pitch] %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d8_v4_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_filter_block1d4_v6_ssse3 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; unsigned int vp8_filter_index ;) global sym(vp8_filter_block1d4_v6_ssse3) PRIVATE sym(vp8_filter_block1d4_v6_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 GET_GOT rbx push rsi push rdi ; end prolog movsxd rdx, DWORD PTR arg(5) ;table index xor rsi, rsi shl rdx, 4 ; lea rax, [GLOBAL(k0_k5)] add rax, rdx movsxd rdx, DWORD PTR arg(1) ;pixels_per_line mov rdi, arg(2) ;output_ptr %if ABI_IS_32BIT=0 movsxd r8, DWORD PTR arg(3) ; out_pitch %endif movsxd rcx, DWORD PTR arg(4) ;[output_height] cmp esi, DWORD PTR [rax] je .vp8_filter_block1d4_v4_ssse3 movq mm5, MMWORD PTR [rax] ;k0_k5 movq mm6, MMWORD PTR [rax+256] ;k2_k4 movq mm7, MMWORD PTR [rax+128] ;k1_k3 mov rsi, arg(0) ;src_ptr mov rax, rsi add rax, rdx .vp8_filter_block1d4_v6_ssse3_loop: movd mm1, DWORD PTR [rsi] ;A movd mm2, DWORD PTR [rsi + rdx] ;B movd mm3, DWORD PTR [rsi + rdx * 2] ;C movd mm4, DWORD PTR [rax + rdx * 2] ;D movd mm0, DWORD PTR [rsi + rdx * 4] ;E punpcklbw mm2, mm4 ;B D punpcklbw mm3, mm0 ;C E movd mm0, DWORD PTR [rax + rdx * 4] ;F movq mm4, [GLOBAL(rd)] pmaddubsw mm3, mm6 punpcklbw mm1, mm0 ;A F pmaddubsw mm2, mm7 pmaddubsw mm1, mm5 add rsi, rdx add rax, rdx ;-- ;-- paddsw mm2, mm3 paddsw mm2, mm1 paddsw mm2, mm4 psraw mm2, 7 packuswb mm2, mm2 movd DWORD PTR [rdi], mm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;[out_pitch] %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d4_v6_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret .vp8_filter_block1d4_v4_ssse3: movq mm6, MMWORD PTR [rax+256] ;k2_k4 movq mm7, MMWORD PTR [rax+128] ;k1_k3 movq mm5, MMWORD PTR [GLOBAL(rd)] mov rsi, arg(0) ;src_ptr mov rax, rsi add rax, rdx .vp8_filter_block1d4_v4_ssse3_loop: movd mm2, DWORD PTR [rsi + rdx] ;B movd mm3, DWORD PTR [rsi + rdx * 2] ;C movd mm4, DWORD PTR [rax + rdx * 2] ;D movd mm0, DWORD PTR [rsi + rdx * 4] ;E punpcklbw mm2, mm4 ;B D punpcklbw mm3, mm0 ;C E pmaddubsw mm3, mm6 pmaddubsw mm2, mm7 add rsi, rdx add rax, rdx ;-- ;-- paddsw mm2, mm3 paddsw mm2, mm5 psraw mm2, 7 packuswb mm2, mm2 movd DWORD PTR [rdi], mm2 %if ABI_IS_32BIT add rdi, DWORD PTR arg(3) ;[out_pitch] %else add rdi, r8 %endif dec rcx jnz .vp8_filter_block1d4_v4_ssse3_loop ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp8_bilinear_predict16x16_ssse3 ;( ; unsigned char *src_ptr, ; int src_pixels_per_line, ; int xoffset, ; int yoffset, ; unsigned char *dst_ptr, ; int dst_pitch ;) global sym(vp8_bilinear_predict16x16_ssse3) PRIVATE sym(vp8_bilinear_predict16x16_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] movsxd rax, dword ptr arg(2) ; xoffset cmp rax, 0 ; skip first_pass filter if xoffset=0 je .b16x16_sp_only shl rax, 4 lea rax, [rax + rcx] ; HFilter mov rdi, arg(4) ; dst_ptr mov rsi, arg(0) ; src_ptr movsxd rdx, dword ptr arg(5) ; dst_pitch movdqa xmm1, [rax] movsxd rax, dword ptr arg(3) ; yoffset cmp rax, 0 ; skip second_pass filter if yoffset=0 je .b16x16_fp_only shl rax, 4 lea rax, [rax + rcx] ; VFilter lea rcx, [rdi+rdx*8] lea rcx, [rcx+rdx*8] movsxd rdx, dword ptr arg(1) ; src_pixels_per_line movdqa xmm2, [rax] %if ABI_IS_32BIT=0 movsxd r8, dword ptr arg(5) ; dst_pitch %endif movq xmm3, [rsi] ; 00 01 02 03 04 05 06 07 movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 lea rsi, [rsi + rdx] ; next line pmaddubsw xmm3, xmm1 ; 00 02 04 06 08 10 12 14 punpcklbw xmm4, xmm5 ; 08 09 09 10 10 11 11 12 12 13 13 14 14 15 15 16 pmaddubsw xmm4, xmm1 ; 01 03 05 07 09 11 13 15 paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 movdqa xmm7, xmm3 packuswb xmm7, xmm4 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 .next_row: movq xmm6, [rsi] ; 00 01 02 03 04 05 06 07 movq xmm5, [rsi+1] ; 01 02 03 04 05 06 07 08 punpcklbw xmm6, xmm5 movq xmm4, [rsi+8] ; 08 09 10 11 12 13 14 15 movq xmm5, [rsi+9] ; 09 10 11 12 13 14 15 16 lea rsi, [rsi + rdx] ; next line pmaddubsw xmm6, xmm1 punpcklbw xmm4, xmm5 pmaddubsw xmm4, xmm1 paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 paddw xmm4, [GLOBAL(rd)] ; xmm4 += round value psraw xmm4, VP8_FILTER_SHIFT ; xmm4 /= 128 packuswb xmm6, xmm4 movdqa xmm5, xmm7 punpcklbw xmm5, xmm6 pmaddubsw xmm5, xmm2 punpckhbw xmm7, xmm6 pmaddubsw xmm7, xmm2 paddw xmm5, [GLOBAL(rd)] ; xmm5 += round value psraw xmm5, VP8_FILTER_SHIFT ; xmm5 /= 128 paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 packuswb xmm5, xmm7 movdqa xmm7, xmm6 movdqa [rdi], xmm5 ; store the results in the destination %if ABI_IS_32BIT add rdi, DWORD PTR arg(5) ; dst_pitch %else add rdi, r8 %endif cmp rdi, rcx jne .next_row jmp .done .b16x16_sp_only: movsxd rax, dword ptr arg(3) ; yoffset shl rax, 4 lea rax, [rax + rcx] ; VFilter mov rdi, arg(4) ; dst_ptr mov rsi, arg(0) ; src_ptr movsxd rdx, dword ptr arg(5) ; dst_pitch movdqa xmm1, [rax] ; VFilter lea rcx, [rdi+rdx*8] lea rcx, [rcx+rdx*8] movsxd rax, dword ptr arg(1) ; src_pixels_per_line ; get the first horizontal line done movq xmm4, [rsi] ; load row 0 movq xmm2, [rsi + 8] ; load row 0 lea rsi, [rsi + rax] ; next line .next_row_sp: movq xmm3, [rsi] ; load row + 1 movq xmm5, [rsi + 8] ; load row + 1 punpcklbw xmm4, xmm3 punpcklbw xmm2, xmm5 pmaddubsw xmm4, xmm1 movq xmm7, [rsi + rax] ; load row + 2 pmaddubsw xmm2, xmm1 movq xmm6, [rsi + rax + 8] ; load row + 2 punpcklbw xmm3, xmm7 punpcklbw xmm5, xmm6 pmaddubsw xmm3, xmm1 paddw xmm4, [GLOBAL(rd)] pmaddubsw xmm5, xmm1 paddw xmm2, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT psraw xmm2, VP8_FILTER_SHIFT packuswb xmm4, xmm2 paddw xmm3, [GLOBAL(rd)] movdqa [rdi], xmm4 ; store row 0 paddw xmm5, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT psraw xmm5, VP8_FILTER_SHIFT packuswb xmm3, xmm5 movdqa xmm4, xmm7 movdqa [rdi + rdx],xmm3 ; store row 1 lea rsi, [rsi + 2*rax] movdqa xmm2, xmm6 lea rdi, [rdi + 2*rdx] cmp rdi, rcx jne .next_row_sp jmp .done .b16x16_fp_only: lea rcx, [rdi+rdx*8] lea rcx, [rcx+rdx*8] movsxd rax, dword ptr arg(1) ; src_pixels_per_line .next_row_fp: movq xmm2, [rsi] ; 00 01 02 03 04 05 06 07 movq xmm4, [rsi+1] ; 01 02 03 04 05 06 07 08 punpcklbw xmm2, xmm4 movq xmm3, [rsi+8] ; 08 09 10 11 12 13 14 15 pmaddubsw xmm2, xmm1 movq xmm4, [rsi+9] ; 09 10 11 12 13 14 15 16 lea rsi, [rsi + rax] ; next line punpcklbw xmm3, xmm4 pmaddubsw xmm3, xmm1 movq xmm5, [rsi] paddw xmm2, [GLOBAL(rd)] movq xmm7, [rsi+1] movq xmm6, [rsi+8] psraw xmm2, VP8_FILTER_SHIFT punpcklbw xmm5, xmm7 movq xmm7, [rsi+9] paddw xmm3, [GLOBAL(rd)] pmaddubsw xmm5, xmm1 psraw xmm3, VP8_FILTER_SHIFT punpcklbw xmm6, xmm7 packuswb xmm2, xmm3 pmaddubsw xmm6, xmm1 movdqa [rdi], xmm2 ; store the results in the destination paddw xmm5, [GLOBAL(rd)] lea rdi, [rdi + rdx] ; dst_pitch psraw xmm5, VP8_FILTER_SHIFT paddw xmm6, [GLOBAL(rd)] psraw xmm6, VP8_FILTER_SHIFT packuswb xmm5, xmm6 lea rsi, [rsi + rax] ; next line movdqa [rdi], xmm5 ; store the results in the destination lea rdi, [rdi + rdx] ; dst_pitch cmp rdi, rcx jne .next_row_fp .done: ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp8_bilinear_predict8x8_ssse3 ;( ; unsigned char *src_ptr, ; int src_pixels_per_line, ; int xoffset, ; int yoffset, ; unsigned char *dst_ptr, ; int dst_pitch ;) global sym(vp8_bilinear_predict8x8_ssse3) PRIVATE sym(vp8_bilinear_predict8x8_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 144 ; reserve 144 bytes lea rcx, [GLOBAL(vp8_bilinear_filters_ssse3)] mov rsi, arg(0) ;src_ptr movsxd rdx, dword ptr arg(1) ;src_pixels_per_line ;Read 9-line unaligned data in and put them on stack. This gives a big ;performance boost. movdqu xmm0, [rsi] lea rax, [rdx + rdx*2] movdqu xmm1, [rsi+rdx] movdqu xmm2, [rsi+rdx*2] add rsi, rax movdqu xmm3, [rsi] movdqu xmm4, [rsi+rdx] movdqu xmm5, [rsi+rdx*2] add rsi, rax movdqu xmm6, [rsi] movdqu xmm7, [rsi+rdx] movdqa XMMWORD PTR [rsp], xmm0 movdqu xmm0, [rsi+rdx*2] movdqa XMMWORD PTR [rsp+16], xmm1 movdqa XMMWORD PTR [rsp+32], xmm2 movdqa XMMWORD PTR [rsp+48], xmm3 movdqa XMMWORD PTR [rsp+64], xmm4 movdqa XMMWORD PTR [rsp+80], xmm5 movdqa XMMWORD PTR [rsp+96], xmm6 movdqa XMMWORD PTR [rsp+112], xmm7 movdqa XMMWORD PTR [rsp+128], xmm0 movsxd rax, dword ptr arg(2) ; xoffset cmp rax, 0 ; skip first_pass filter if xoffset=0 je .b8x8_sp_only shl rax, 4 add rax, rcx ; HFilter mov rdi, arg(4) ; dst_ptr movsxd rdx, dword ptr arg(5) ; dst_pitch movdqa xmm0, [rax] movsxd rax, dword ptr arg(3) ; yoffset cmp rax, 0 ; skip second_pass filter if yoffset=0 je .b8x8_fp_only shl rax, 4 lea rax, [rax + rcx] ; VFilter lea rcx, [rdi+rdx*8] movdqa xmm1, [rax] ; get the first horizontal line done movdqa xmm3, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 movdqa xmm5, xmm3 ; 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 xx psrldq xmm5, 1 lea rsp, [rsp + 16] ; next line punpcklbw xmm3, xmm5 ; 00 01 01 02 02 03 03 04 04 05 05 06 06 07 07 08 pmaddubsw xmm3, xmm0 ; 00 02 04 06 08 10 12 14 paddw xmm3, [GLOBAL(rd)] ; xmm3 += round value psraw xmm3, VP8_FILTER_SHIFT ; xmm3 /= 128 movdqa xmm7, xmm3 packuswb xmm7, xmm7 ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 .next_row: movdqa xmm6, [rsp] ; 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 lea rsp, [rsp + 16] ; next line movdqa xmm5, xmm6 psrldq xmm5, 1 punpcklbw xmm6, xmm5 pmaddubsw xmm6, xmm0 paddw xmm6, [GLOBAL(rd)] ; xmm6 += round value psraw xmm6, VP8_FILTER_SHIFT ; xmm6 /= 128 packuswb xmm6, xmm6 punpcklbw xmm7, xmm6 pmaddubsw xmm7, xmm1 paddw xmm7, [GLOBAL(rd)] ; xmm7 += round value psraw xmm7, VP8_FILTER_SHIFT ; xmm7 /= 128 packuswb xmm7, xmm7 movq [rdi], xmm7 ; store the results in the destination lea rdi, [rdi + rdx] movdqa xmm7, xmm6 cmp rdi, rcx jne .next_row jmp .done8x8 .b8x8_sp_only: movsxd rax, dword ptr arg(3) ; yoffset shl rax, 4 lea rax, [rax + rcx] ; VFilter mov rdi, arg(4) ;dst_ptr movsxd rdx, dword ptr arg(5) ; dst_pitch movdqa xmm0, [rax] ; VFilter movq xmm1, XMMWORD PTR [rsp] movq xmm2, XMMWORD PTR [rsp+16] movq xmm3, XMMWORD PTR [rsp+32] punpcklbw xmm1, xmm2 movq xmm4, XMMWORD PTR [rsp+48] punpcklbw xmm2, xmm3 movq xmm5, XMMWORD PTR [rsp+64] punpcklbw xmm3, xmm4 movq xmm6, XMMWORD PTR [rsp+80] punpcklbw xmm4, xmm5 movq xmm7, XMMWORD PTR [rsp+96] punpcklbw xmm5, xmm6 ; Because the source register (xmm0) is always treated as signed by ; pmaddubsw, the constant '128' is treated as '-128'. pmaddubsw xmm1, xmm0 pmaddubsw xmm2, xmm0 pmaddubsw xmm3, xmm0 pmaddubsw xmm4, xmm0 pmaddubsw xmm5, xmm0 punpcklbw xmm6, xmm7 pmaddubsw xmm6, xmm0 paddw xmm1, [GLOBAL(rd)] paddw xmm2, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT paddw xmm3, [GLOBAL(rd)] psraw xmm2, VP8_FILTER_SHIFT paddw xmm4, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT paddw xmm5, [GLOBAL(rd)] psraw xmm4, VP8_FILTER_SHIFT paddw xmm6, [GLOBAL(rd)] psraw xmm5, VP8_FILTER_SHIFT psraw xmm6, VP8_FILTER_SHIFT ; Having multiplied everything by '-128' and obtained negative ; numbers, the unsigned saturation truncates those values to 0, ; resulting in incorrect handling of xoffset == 0 && yoffset == 0 packuswb xmm1, xmm1 packuswb xmm2, xmm2 movq [rdi], xmm1 packuswb xmm3, xmm3 movq [rdi+rdx], xmm2 packuswb xmm4, xmm4 movq xmm1, XMMWORD PTR [rsp+112] lea rdi, [rdi + 2*rdx] movq xmm2, XMMWORD PTR [rsp+128] packuswb xmm5, xmm5 movq [rdi], xmm3 packuswb xmm6, xmm6 movq [rdi+rdx], xmm4 lea rdi, [rdi + 2*rdx] punpcklbw xmm7, xmm1 movq [rdi], xmm5 pmaddubsw xmm7, xmm0 movq [rdi+rdx], xmm6 punpcklbw xmm1, xmm2 pmaddubsw xmm1, xmm0 paddw xmm7, [GLOBAL(rd)] psraw xmm7, VP8_FILTER_SHIFT paddw xmm1, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT packuswb xmm7, xmm7 packuswb xmm1, xmm1 lea rdi, [rdi + 2*rdx] movq [rdi], xmm7 movq [rdi+rdx], xmm1 lea rsp, [rsp + 144] jmp .done8x8 .b8x8_fp_only: lea rcx, [rdi+rdx*8] .next_row_fp: movdqa xmm1, XMMWORD PTR [rsp] movdqa xmm3, XMMWORD PTR [rsp+16] movdqa xmm2, xmm1 movdqa xmm5, XMMWORD PTR [rsp+32] psrldq xmm2, 1 movdqa xmm7, XMMWORD PTR [rsp+48] movdqa xmm4, xmm3 psrldq xmm4, 1 movdqa xmm6, xmm5 psrldq xmm6, 1 punpcklbw xmm1, xmm2 pmaddubsw xmm1, xmm0 punpcklbw xmm3, xmm4 pmaddubsw xmm3, xmm0 punpcklbw xmm5, xmm6 pmaddubsw xmm5, xmm0 movdqa xmm2, xmm7 psrldq xmm2, 1 punpcklbw xmm7, xmm2 pmaddubsw xmm7, xmm0 paddw xmm1, [GLOBAL(rd)] psraw xmm1, VP8_FILTER_SHIFT paddw xmm3, [GLOBAL(rd)] psraw xmm3, VP8_FILTER_SHIFT paddw xmm5, [GLOBAL(rd)] psraw xmm5, VP8_FILTER_SHIFT paddw xmm7, [GLOBAL(rd)] psraw xmm7, VP8_FILTER_SHIFT packuswb xmm1, xmm1 packuswb xmm3, xmm3 packuswb xmm5, xmm5 movq [rdi], xmm1 packuswb xmm7, xmm7 movq [rdi+rdx], xmm3 lea rdi, [rdi + 2*rdx] movq [rdi], xmm5 lea rsp, [rsp + 4*16] movq [rdi+rdx], xmm7 lea rdi, [rdi + 2*rdx] cmp rdi, rcx jne .next_row_fp lea rsp, [rsp + 16] .done8x8: ;add rsp, 144 pop rsp ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 shuf1b: db 0, 5, 1, 6, 2, 7, 3, 8, 4, 9, 5, 10, 6, 11, 7, 12 shuf2b: db 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10, 9, 11 shuf3b: db 1, 3, 2, 4, 3, 5, 4, 6, 5, 7, 6, 8, 7, 9, 8, 10 align 16 shuf2bfrom1: db 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11, 9,13 align 16 shuf3bfrom1: db 2, 6, 4, 8, 6, 1, 8, 3, 1, 5, 3, 7, 5, 9, 7,11 align 16 rd: times 8 dw 0x40 align 16 k0_k5: times 8 db 0, 0 ;placeholder times 8 db 0, 0 times 8 db 2, 1 times 8 db 0, 0 times 8 db 3, 3 times 8 db 0, 0 times 8 db 1, 2 times 8 db 0, 0 k1_k3: times 8 db 0, 0 ;placeholder times 8 db -6, 12 times 8 db -11, 36 times 8 db -9, 50 times 8 db -16, 77 times 8 db -6, 93 times 8 db -8, 108 times 8 db -1, 123 k2_k4: times 8 db 128, 0 ;placeholder times 8 db 123, -1 times 8 db 108, -8 times 8 db 93, -6 times 8 db 77, -16 times 8 db 50, -9 times 8 db 36, -11 times 8 db 12, -6 align 16 vp8_bilinear_filters_ssse3: times 8 db 128, 0 times 8 db 112, 16 times 8 db 96, 32 times 8 db 80, 48 times 8 db 64, 64 times 8 db 48, 80 times 8 db 32, 96 times 8 db 16, 112 libvpx-1.8.2/vp8/common/x86/vp8_asm_stubs.c000066400000000000000000000411341357355204000204300ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_ports/mem.h" extern const short vp8_six_tap_x86[8][6 * 8]; extern void vp8_filter_block1d_h6_mmx(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_filter_block1dc_v6_mmx( unsigned short *src_ptr, unsigned char *output_ptr, int output_pitch, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_filter_block1d8_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_filter_block1d16_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_filter_block1d8_v6_sse2( unsigned short *src_ptr, unsigned char *output_ptr, int dst_ptich, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_filter_block1d16_v6_sse2( unsigned short *src_ptr, unsigned char *output_ptr, int dst_ptich, unsigned int pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const short *vp8_filter); extern void vp8_unpack_block1d16_h6_sse2(unsigned char *src_ptr, unsigned short *output_ptr, unsigned int src_pixels_per_line, unsigned int output_height, unsigned int output_width); extern void vp8_filter_block1d8_h6_only_sse2(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, int dst_ptich, unsigned int output_height, const short *vp8_filter); extern void vp8_filter_block1d16_h6_only_sse2(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, int dst_ptich, unsigned int output_height, const short *vp8_filter); extern void vp8_filter_block1d8_v6_only_sse2(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, int dst_ptich, unsigned int output_height, const short *vp8_filter); #if HAVE_MMX void vp8_sixtap_predict4x4_mmx(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned short, FData2[16 * 16]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d_h6_mmx(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 8, HFilter); VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1dc_v6_mmx(FData2 + 8, dst_ptr, dst_pitch, 8, 4, 4, 4, VFilter); } #endif #if HAVE_SSE2 void vp8_sixtap_predict16x16_sse2(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned short, FData2[24 * 24]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { if (yoffset) { HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 21, 32, HFilter); VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16, dst_pitch, VFilter); } else { /* First-pass only */ HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d16_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, HFilter); } } else { /* Second-pass only */ VFilter = vp8_six_tap_x86[yoffset]; vp8_unpack_block1d16_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 21, 32); vp8_filter_block1d16_v6_sse2(FData2 + 32, dst_ptr, dst_pitch, 32, 16, 16, dst_pitch, VFilter); } } void vp8_sixtap_predict8x8_sse2(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { if (yoffset) { HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 13, 16, HFilter); VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, 8, dst_pitch, VFilter); } else { /* First-pass only */ HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, HFilter); } } else { /* Second-pass only */ VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, VFilter); } } void vp8_sixtap_predict8x4_sse2(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned short, FData2[256]); /* Temp data bufffer used in filtering */ const short *HFilter, *VFilter; if (xoffset) { if (yoffset) { HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d8_h6_sse2(src_ptr - (2 * src_pixels_per_line), FData2, src_pixels_per_line, 1, 9, 16, HFilter); VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1d8_v6_sse2(FData2 + 16, dst_ptr, dst_pitch, 16, 8, 4, dst_pitch, VFilter); } else { /* First-pass only */ HFilter = vp8_six_tap_x86[xoffset]; vp8_filter_block1d8_h6_only_sse2(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, HFilter); } } else { /* Second-pass only */ VFilter = vp8_six_tap_x86[yoffset]; vp8_filter_block1d8_v6_only_sse2(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, VFilter); } } #endif #if HAVE_SSSE3 extern void vp8_filter_block1d8_h6_ssse3(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, unsigned int output_pitch, unsigned int output_height, unsigned int vp8_filter_index); extern void vp8_filter_block1d16_h6_ssse3(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, unsigned int output_pitch, unsigned int output_height, unsigned int vp8_filter_index); extern void vp8_filter_block1d16_v6_ssse3(unsigned char *src_ptr, unsigned int src_pitch, unsigned char *output_ptr, unsigned int out_pitch, unsigned int output_height, unsigned int vp8_filter_index); extern void vp8_filter_block1d8_v6_ssse3(unsigned char *src_ptr, unsigned int src_pitch, unsigned char *output_ptr, unsigned int out_pitch, unsigned int output_height, unsigned int vp8_filter_index); extern void vp8_filter_block1d4_h6_ssse3(unsigned char *src_ptr, unsigned int src_pixels_per_line, unsigned char *output_ptr, unsigned int output_pitch, unsigned int output_height, unsigned int vp8_filter_index); extern void vp8_filter_block1d4_v6_ssse3(unsigned char *src_ptr, unsigned int src_pitch, unsigned char *output_ptr, unsigned int out_pitch, unsigned int output_height, unsigned int vp8_filter_index); void vp8_sixtap_predict16x16_ssse3(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned char, FData2[24 * 24]); if (xoffset) { if (yoffset) { vp8_filter_block1d16_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 16, 21, xoffset); vp8_filter_block1d16_v6_ssse3(FData2, 16, dst_ptr, dst_pitch, 16, yoffset); } else { /* First-pass only */ vp8_filter_block1d16_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 16, xoffset); } } else { if (yoffset) { /* Second-pass only */ vp8_filter_block1d16_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 16, yoffset); } else { /* ssse3 second-pass only function couldn't handle (xoffset==0 && * yoffset==0) case correctly. Add copy function here to guarantee * six-tap function handles all possible offsets. */ vp8_copy_mem16x16(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); } } } void vp8_sixtap_predict8x8_ssse3(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned char, FData2[256]); if (xoffset) { if (yoffset) { vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 13, xoffset); vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 8, yoffset); } else { vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 8, xoffset); } } else { if (yoffset) { /* Second-pass only */ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 8, yoffset); } else { /* ssse3 second-pass only function couldn't handle (xoffset==0 && * yoffset==0) case correctly. Add copy function here to guarantee * six-tap function handles all possible offsets. */ vp8_copy_mem8x8(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); } } } void vp8_sixtap_predict8x4_ssse3(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned char, FData2[256]); if (xoffset) { if (yoffset) { vp8_filter_block1d8_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 8, 9, xoffset); vp8_filter_block1d8_v6_ssse3(FData2, 8, dst_ptr, dst_pitch, 4, yoffset); } else { /* First-pass only */ vp8_filter_block1d8_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); } } else { if (yoffset) { /* Second-pass only */ vp8_filter_block1d8_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); } else { /* ssse3 second-pass only function couldn't handle (xoffset==0 && * yoffset==0) case correctly. Add copy function here to guarantee * six-tap function handles all possible offsets. */ vp8_copy_mem8x4(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch); } } } void vp8_sixtap_predict4x4_ssse3(unsigned char *src_ptr, int src_pixels_per_line, int xoffset, int yoffset, unsigned char *dst_ptr, int dst_pitch) { DECLARE_ALIGNED(16, unsigned char, FData2[4 * 9]); if (xoffset) { if (yoffset) { vp8_filter_block1d4_h6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, FData2, 4, 9, xoffset); vp8_filter_block1d4_v6_ssse3(FData2, 4, dst_ptr, dst_pitch, 4, yoffset); } else { vp8_filter_block1d4_h6_ssse3(src_ptr, src_pixels_per_line, dst_ptr, dst_pitch, 4, xoffset); } } else { if (yoffset) { vp8_filter_block1d4_v6_ssse3(src_ptr - (2 * src_pixels_per_line), src_pixels_per_line, dst_ptr, dst_pitch, 4, yoffset); } else { /* ssse3 second-pass only function couldn't handle (xoffset==0 && * yoffset==0) case correctly. Add copy function here to guarantee * six-tap function handles all possible offsets. */ int r; for (r = 0; r < 4; ++r) { dst_ptr[0] = src_ptr[0]; dst_ptr[1] = src_ptr[1]; dst_ptr[2] = src_ptr[2]; dst_ptr[3] = src_ptr[3]; dst_ptr += dst_pitch; src_ptr += src_pixels_per_line; } } } } #endif libvpx-1.8.2/vp8/decoder/000077500000000000000000000000001357355204000151545ustar00rootroot00000000000000libvpx-1.8.2/vp8/decoder/dboolhuff.c000066400000000000000000000036171357355204000172770ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "dboolhuff.h" #include "vp8/common/common.h" #include "vpx_dsp/vpx_dsp_common.h" int vp8dx_start_decode(BOOL_DECODER *br, const unsigned char *source, unsigned int source_sz, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { br->user_buffer_end = source + source_sz; br->user_buffer = source; br->value = 0; br->count = -8; br->range = 255; br->decrypt_cb = decrypt_cb; br->decrypt_state = decrypt_state; if (source_sz && !source) return 1; /* Populate the buffer */ vp8dx_bool_decoder_fill(br); return 0; } void vp8dx_bool_decoder_fill(BOOL_DECODER *br) { const unsigned char *bufptr = br->user_buffer; VP8_BD_VALUE value = br->value; int count = br->count; int shift = VP8_BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); size_t bytes_left = br->user_buffer_end - bufptr; size_t bits_left = bytes_left * CHAR_BIT; int x = shift + CHAR_BIT - (int)bits_left; int loop_end = 0; unsigned char decrypted[sizeof(VP8_BD_VALUE) + 1]; if (br->decrypt_cb) { size_t n = VPXMIN(sizeof(decrypted), bytes_left); br->decrypt_cb(br->decrypt_state, bufptr, decrypted, (int)n); bufptr = decrypted; } if (x >= 0) { count += VP8_LOTS_OF_BITS; loop_end = x; } if (x < 0 || bits_left) { while (shift >= loop_end) { count += CHAR_BIT; value |= (VP8_BD_VALUE)*bufptr << shift; ++bufptr; ++br->user_buffer; shift -= CHAR_BIT; } } br->value = value; br->count = count; } libvpx-1.8.2/vp8/decoder/dboolhuff.h000066400000000000000000000066401357355204000173030ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_DBOOLHUFF_H_ #define VPX_VP8_DECODER_DBOOLHUFF_H_ #include #include #include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vp8dx.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef size_t VP8_BD_VALUE; #define VP8_BD_VALUE_SIZE ((int)sizeof(VP8_BD_VALUE) * CHAR_BIT) /*This is meant to be a large, positive constant that can still be efficiently loaded as an immediate (on platforms like ARM, for example). Even relatively modest values like 100 would work fine.*/ #define VP8_LOTS_OF_BITS (0x40000000) typedef struct { const unsigned char *user_buffer_end; const unsigned char *user_buffer; VP8_BD_VALUE value; int count; unsigned int range; vpx_decrypt_cb decrypt_cb; void *decrypt_state; } BOOL_DECODER; DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); int vp8dx_start_decode(BOOL_DECODER *br, const unsigned char *source, unsigned int source_sz, vpx_decrypt_cb decrypt_cb, void *decrypt_state); void vp8dx_bool_decoder_fill(BOOL_DECODER *br); static int vp8dx_decode_bool(BOOL_DECODER *br, int probability) { unsigned int bit = 0; VP8_BD_VALUE value; unsigned int split; VP8_BD_VALUE bigsplit; int count; unsigned int range; split = 1 + (((br->range - 1) * probability) >> 8); if (br->count < 0) vp8dx_bool_decoder_fill(br); value = br->value; count = br->count; bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); range = split; if (value >= bigsplit) { range = br->range - split; value = value - bigsplit; bit = 1; } { const unsigned char shift = vp8_norm[(unsigned char)range]; range <<= shift; value <<= shift; count -= shift; } br->value = value; br->count = count; br->range = range; return bit; } static INLINE int vp8_decode_value(BOOL_DECODER *br, int bits) { int z = 0; int bit; for (bit = bits - 1; bit >= 0; bit--) { z |= (vp8dx_decode_bool(br, 0x80) << bit); } return z; } static INLINE int vp8dx_bool_error(BOOL_DECODER *br) { /* Check if we have reached the end of the buffer. * * Variable 'count' stores the number of bits in the 'value' buffer, minus * 8. The top byte is part of the algorithm, and the remainder is buffered * to be shifted into it. So if count == 8, the top 16 bits of 'value' are * occupied, 8 for the algorithm and 8 in the buffer. * * When reading a byte from the user's buffer, count is filled with 8 and * one byte is filled into the value buffer. When we reach the end of the * data, count is additionally filled with VP8_LOTS_OF_BITS. So when * count == VP8_LOTS_OF_BITS - 1, the user's data has been exhausted. */ if ((br->count > VP8_BD_VALUE_SIZE) && (br->count < VP8_LOTS_OF_BITS)) { /* We have tried to decode bits after the end of * stream was encountered. */ return 1; } /* No error. */ return 0; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_DBOOLHUFF_H_ libvpx-1.8.2/vp8/decoder/decodeframe.c000066400000000000000000001213421357355204000175610ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "./vpx_scale_rtcd.h" #include "onyxd_int.h" #include "vp8/common/header.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/reconinter.h" #include "detokenize.h" #include "vp8/common/common.h" #include "vp8/common/invtrans.h" #include "vp8/common/alloccommon.h" #include "vp8/common/entropymode.h" #include "vp8/common/quant_common.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #include "decodemv.h" #include "vp8/common/extend.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif #include "vpx_mem/vpx_mem.h" #include "vp8/common/threading.h" #include "decoderthreading.h" #include "dboolhuff.h" #include "vpx_dsp/vpx_dsp_common.h" #include #include void vp8cx_init_de_quantizer(VP8D_COMP *pbi) { int Q; VP8_COMMON *const pc = &pbi->common; for (Q = 0; Q < QINDEX_RANGE; ++Q) { pc->Y1dequant[Q][0] = (short)vp8_dc_quant(Q, pc->y1dc_delta_q); pc->Y2dequant[Q][0] = (short)vp8_dc2quant(Q, pc->y2dc_delta_q); pc->UVdequant[Q][0] = (short)vp8_dc_uv_quant(Q, pc->uvdc_delta_q); pc->Y1dequant[Q][1] = (short)vp8_ac_yquant(Q); pc->Y2dequant[Q][1] = (short)vp8_ac2quant(Q, pc->y2ac_delta_q); pc->UVdequant[Q][1] = (short)vp8_ac_uv_quant(Q, pc->uvac_delta_q); } } void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd) { int i; int QIndex; MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; VP8_COMMON *const pc = &pbi->common; /* Decide whether to use the default or alternate baseline Q value. */ if (xd->segmentation_enabled) { /* Abs Value */ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) { QIndex = xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; /* Delta Value */ } else { QIndex = pc->base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q][mbmi->segment_id]; } QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; /* Clamp to valid range */ } else { QIndex = pc->base_qindex; } /* Set up the macroblock dequant constants */ xd->dequant_y1_dc[0] = 1; xd->dequant_y1[0] = pc->Y1dequant[QIndex][0]; xd->dequant_y2[0] = pc->Y2dequant[QIndex][0]; xd->dequant_uv[0] = pc->UVdequant[QIndex][0]; for (i = 1; i < 16; ++i) { xd->dequant_y1_dc[i] = xd->dequant_y1[i] = pc->Y1dequant[QIndex][1]; xd->dequant_y2[i] = pc->Y2dequant[QIndex][1]; xd->dequant_uv[i] = pc->UVdequant[QIndex][1]; } } static void decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) { MB_PREDICTION_MODE mode; int i; #if CONFIG_ERROR_CONCEALMENT int corruption_detected = 0; #else (void)mb_idx; #endif if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp8_reset_mb_tokens_context(xd); } else if (!vp8dx_bool_error(xd->current_bc)) { int eobtotal; eobtotal = vp8_decode_mb_tokens(pbi, xd); /* Special case: Force the loopfilter to skip when eobtotal is zero */ xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0); } mode = xd->mode_info_context->mbmi.mode; if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd); #if CONFIG_ERROR_CONCEALMENT if (pbi->ec_active) { int throw_residual; /* When we have independent partitions we can apply residual even * though other partitions within the frame are corrupt. */ throw_residual = (!pbi->independent_partitions && pbi->frame_corrupt_residual); throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) { /* MB with corrupt residuals or corrupt mode/motion vectors. * Better to use the predictor as reconstruction. */ pbi->frame_corrupt_residual = 1; memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); corruption_detected = 1; /* force idct to be skipped for B_PRED and use the * prediction only for reconstruction * */ memset(xd->eobs, 0, 25); } } #endif /* do prediction */ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_build_intra_predictors_mbuv_s( xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1], xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride); if (mode != B_PRED) { vp8_build_intra_predictors_mby_s( xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0], xd->dst.y_buffer, xd->dst.y_stride); } else { short *DQC = xd->dequant_y1; int dst_stride = xd->dst.y_stride; /* clear out residual eob info */ if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25); intra_prediction_down_copy(xd, xd->recon_above[0] + 16); for (i = 0; i < 16; ++i) { BLOCKD *b = &xd->block[i]; unsigned char *dst = xd->dst.y_buffer + b->offset; B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode; unsigned char *Above = dst - dst_stride; unsigned char *yleft = dst - 1; int left_stride = dst_stride; unsigned char top_left = Above[-1]; vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride, top_left); if (xd->eobs[i]) { if (xd->eobs[i] > 1) { vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); } else { vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } } } else { vp8_build_inter_predictors_mb(xd); } #if CONFIG_ERROR_CONCEALMENT if (corruption_detected) { return; } #endif if (!xd->mode_info_context->mbmi.mb_skip_coeff) { /* dequantization and idct */ if (mode != B_PRED) { short *DQC = xd->dequant_y1; if (mode != SPLITMV) { BLOCKD *b = &xd->block[24]; /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { vp8_dequantize_b(b, xd->dequant_y2); vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = (short)(b->qcoeff[0] * xd->dequant_y2[0]); vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the * dc components */ DQC = xd->dequant_y1_dc; } vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } } static int get_delta_q(vp8_reader *bc, int prev, int *q_update) { int ret_val = 0; if (vp8_read_bit(bc)) { ret_val = vp8_read_literal(bc, 4); if (vp8_read_bit(bc)) ret_val = -ret_val; } /* Trigger a quantizer update if the delta-q value has changed */ if (ret_val != prev) *q_update = 1; return ret_val; } #ifdef PACKET_TESTING #include FILE *vpxlog = 0; #endif static void yv12_extend_frame_top_c(YV12_BUFFER_CONFIG *ybf) { int i; unsigned char *src_ptr1; unsigned char *dest_ptr1; unsigned int Border; int plane_stride; /***********/ /* Y Plane */ /***********/ Border = ybf->border; plane_stride = ybf->y_stride; src_ptr1 = ybf->y_buffer - Border; dest_ptr1 = src_ptr1 - (Border * plane_stride); for (i = 0; i < (int)Border; ++i) { memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } /***********/ /* U Plane */ /***********/ plane_stride = ybf->uv_stride; Border /= 2; src_ptr1 = ybf->u_buffer - Border; dest_ptr1 = src_ptr1 - (Border * plane_stride); for (i = 0; i < (int)(Border); ++i) { memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } /***********/ /* V Plane */ /***********/ src_ptr1 = ybf->v_buffer - Border; dest_ptr1 = src_ptr1 - (Border * plane_stride); for (i = 0; i < (int)(Border); ++i) { memcpy(dest_ptr1, src_ptr1, plane_stride); dest_ptr1 += plane_stride; } } static void yv12_extend_frame_bottom_c(YV12_BUFFER_CONFIG *ybf) { int i; unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr2; unsigned int Border; int plane_stride; int plane_height; /***********/ /* Y Plane */ /***********/ Border = ybf->border; plane_stride = ybf->y_stride; plane_height = ybf->y_height; src_ptr1 = ybf->y_buffer - Border; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; dest_ptr2 = src_ptr2 + plane_stride; for (i = 0; i < (int)Border; ++i) { memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } /***********/ /* U Plane */ /***********/ plane_stride = ybf->uv_stride; plane_height = ybf->uv_height; Border /= 2; src_ptr1 = ybf->u_buffer - Border; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; dest_ptr2 = src_ptr2 + plane_stride; for (i = 0; i < (int)(Border); ++i) { memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } /***********/ /* V Plane */ /***********/ src_ptr1 = ybf->v_buffer - Border; src_ptr2 = src_ptr1 + (plane_height * plane_stride) - plane_stride; dest_ptr2 = src_ptr2 + plane_stride; for (i = 0; i < (int)(Border); ++i) { memcpy(dest_ptr2, src_ptr2, plane_stride); dest_ptr2 += plane_stride; } } static void yv12_extend_frame_left_right_c(YV12_BUFFER_CONFIG *ybf, unsigned char *y_src, unsigned char *u_src, unsigned char *v_src) { int i; unsigned char *src_ptr1, *src_ptr2; unsigned char *dest_ptr1, *dest_ptr2; unsigned int Border; int plane_stride; int plane_height; int plane_width; /***********/ /* Y Plane */ /***********/ Border = ybf->border; plane_stride = ybf->y_stride; plane_height = 16; plane_width = ybf->y_width; /* copy the left and right most columns out */ src_ptr1 = y_src; src_ptr2 = src_ptr1 + plane_width - 1; dest_ptr1 = src_ptr1 - Border; dest_ptr2 = src_ptr2 + 1; for (i = 0; i < plane_height; ++i) { memset(dest_ptr1, src_ptr1[0], Border); memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; dest_ptr2 += plane_stride; } /***********/ /* U Plane */ /***********/ plane_stride = ybf->uv_stride; plane_height = 8; plane_width = ybf->uv_width; Border /= 2; /* copy the left and right most columns out */ src_ptr1 = u_src; src_ptr2 = src_ptr1 + plane_width - 1; dest_ptr1 = src_ptr1 - Border; dest_ptr2 = src_ptr2 + 1; for (i = 0; i < plane_height; ++i) { memset(dest_ptr1, src_ptr1[0], Border); memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; dest_ptr2 += plane_stride; } /***********/ /* V Plane */ /***********/ /* copy the left and right most columns out */ src_ptr1 = v_src; src_ptr2 = src_ptr1 + plane_width - 1; dest_ptr1 = src_ptr1 - Border; dest_ptr2 = src_ptr2 + 1; for (i = 0; i < plane_height; ++i) { memset(dest_ptr1, src_ptr1[0], Border); memset(dest_ptr2, src_ptr2[0], Border); src_ptr1 += plane_stride; src_ptr2 += plane_stride; dest_ptr1 += plane_stride; dest_ptr2 += plane_stride; } } static void decode_mb_rows(VP8D_COMP *pbi) { VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; MODE_INFO *lf_mic = xd->mode_info_context; int ibc = 0; int num_part = 1 << pc->multi_token_partition; int recon_yoffset, recon_uvoffset; int mb_row, mb_col; int mb_idx = 0; YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; int recon_y_stride = yv12_fb_new->y_stride; int recon_uv_stride = yv12_fb_new->uv_stride; unsigned char *ref_buffer[MAX_REF_FRAMES][3]; unsigned char *dst_buffer[3]; unsigned char *lf_dst[3]; unsigned char *eb_dst[3]; int i; int ref_fb_corrupted[MAX_REF_FRAMES]; ref_fb_corrupted[INTRA_FRAME] = 0; for (i = 1; i < MAX_REF_FRAMES; ++i) { YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; ref_buffer[i][0] = this_fb->y_buffer; ref_buffer[i][1] = this_fb->u_buffer; ref_buffer[i][2] = this_fb->v_buffer; ref_fb_corrupted[i] = this_fb->corrupted; } /* Set up the buffer pointers */ eb_dst[0] = lf_dst[0] = dst_buffer[0] = yv12_fb_new->y_buffer; eb_dst[1] = lf_dst[1] = dst_buffer[1] = yv12_fb_new->u_buffer; eb_dst[2] = lf_dst[2] = dst_buffer[2] = yv12_fb_new->v_buffer; xd->up_available = 0; /* Initialize the loop filter for this frame. */ if (pc->filter_level) vp8_loop_filter_frame_init(pc, xd, pc->filter_level); vp8_setup_intra_recon_top_line(yv12_fb_new); /* Decode the individual macro block */ for (mb_row = 0; mb_row < pc->mb_rows; ++mb_row) { if (num_part > 1) { xd->current_bc = &pbi->mbc[ibc]; ibc++; if (ibc == num_part) ibc = 0; } recon_yoffset = mb_row * recon_y_stride * 16; recon_uvoffset = mb_row * recon_uv_stride * 8; /* reset contexts */ xd->above_context = pc->above_context; memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); xd->left_available = 0; xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; xd->recon_above[0] = dst_buffer[0] + recon_yoffset; xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; xd->recon_left[0] = xd->recon_above[0] - 1; xd->recon_left[1] = xd->recon_above[1] - 1; xd->recon_left[2] = xd->recon_above[2] - 1; xd->recon_above[0] -= xd->dst.y_stride; xd->recon_above[1] -= xd->dst.uv_stride; xd->recon_above[2] -= xd->dst.uv_stride; /* TODO: move to outside row loop */ xd->recon_left_stride[0] = xd->dst.y_stride; xd->recon_left_stride[1] = xd->dst.uv_stride; setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], xd->recon_left[2], xd->dst.y_stride, xd->dst.uv_stride); for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) { /* Distance of Mb to the various image edges. * These are specified to 8th pel as they are always compared to values * that are in 1/8th pel units */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; #if CONFIG_ERROR_CONCEALMENT { int corrupt_residual = (!pbi->independent_partitions && pbi->frame_corrupt_residual) || vp8dx_bool_error(xd->current_bc); if (pbi->ec_active && xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME && corrupt_residual) { /* We have an intra block with corrupt coefficients, better to * conceal with an inter block. Interpolate MVs from neighboring * MBs. * * Note that for the first mb with corrupt residual in a frame, * we might not discover that before decoding the residual. That * happens after this check, and therefore no inter concealment * will be done. */ vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols); } } #endif xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; } else { // ref_frame is INTRA_FRAME, pre buffer should not be used. xd->pre.y_buffer = 0; xd->pre.u_buffer = 0; xd->pre.v_buffer = 0; } /* propagate errors from reference frames */ xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; decode_macroblock(pbi, xd, mb_idx); mb_idx++; xd->left_available = 1; /* check if the boolean decoder has suffered an error */ xd->corrupted |= vp8dx_bool_error(xd->current_bc); xd->recon_above[0] += 16; xd->recon_above[1] += 8; xd->recon_above[2] += 8; xd->recon_left[0] += 16; xd->recon_left[1] += 8; xd->recon_left[2] += 8; recon_yoffset += 16; recon_uvoffset += 8; ++xd->mode_info_context; /* next mb */ xd->above_context++; } /* adjust to the next row of mbs */ vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; if (pc->filter_level) { if (mb_row > 0) { if (pc->filter_type == NORMAL_LOOPFILTER) { vp8_loop_filter_row_normal(pc, lf_mic, mb_row - 1, recon_y_stride, recon_uv_stride, lf_dst[0], lf_dst[1], lf_dst[2]); } else { vp8_loop_filter_row_simple(pc, lf_mic, mb_row - 1, recon_y_stride, lf_dst[0]); } if (mb_row > 1) { yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], eb_dst[2]); eb_dst[0] += recon_y_stride * 16; eb_dst[1] += recon_uv_stride * 8; eb_dst[2] += recon_uv_stride * 8; } lf_dst[0] += recon_y_stride * 16; lf_dst[1] += recon_uv_stride * 8; lf_dst[2] += recon_uv_stride * 8; lf_mic += pc->mb_cols; lf_mic++; /* Skip border mb */ } } else { if (mb_row > 0) { /**/ yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], eb_dst[2]); eb_dst[0] += recon_y_stride * 16; eb_dst[1] += recon_uv_stride * 8; eb_dst[2] += recon_uv_stride * 8; } } } if (pc->filter_level) { if (pc->filter_type == NORMAL_LOOPFILTER) { vp8_loop_filter_row_normal(pc, lf_mic, mb_row - 1, recon_y_stride, recon_uv_stride, lf_dst[0], lf_dst[1], lf_dst[2]); } else { vp8_loop_filter_row_simple(pc, lf_mic, mb_row - 1, recon_y_stride, lf_dst[0]); } yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], eb_dst[2]); eb_dst[0] += recon_y_stride * 16; eb_dst[1] += recon_uv_stride * 8; eb_dst[2] += recon_uv_stride * 8; } yv12_extend_frame_left_right_c(yv12_fb_new, eb_dst[0], eb_dst[1], eb_dst[2]); yv12_extend_frame_top_c(yv12_fb_new); yv12_extend_frame_bottom_c(yv12_fb_new); } static unsigned int read_partition_size(VP8D_COMP *pbi, const unsigned char *cx_size) { unsigned char temp[3]; if (pbi->decrypt_cb) { pbi->decrypt_cb(pbi->decrypt_state, cx_size, temp, 3); cx_size = temp; } return cx_size[0] + (cx_size[1] << 8) + (cx_size[2] << 16); } static int read_is_valid(const unsigned char *start, size_t len, const unsigned char *end) { return len != 0 && end > start && len <= (size_t)(end - start); } static unsigned int read_available_partition_size( VP8D_COMP *pbi, const unsigned char *token_part_sizes, const unsigned char *fragment_start, const unsigned char *first_fragment_end, const unsigned char *fragment_end, int i, int num_part) { VP8_COMMON *pc = &pbi->common; const unsigned char *partition_size_ptr = token_part_sizes + i * 3; unsigned int partition_size = 0; ptrdiff_t bytes_left = fragment_end - fragment_start; if (bytes_left < 0) { vpx_internal_error( &pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition. No bytes left %d.", (int)bytes_left); } /* Calculate the length of this partition. The last partition * size is implicit. If the partition size can't be read, then * either use the remaining data in the buffer (for EC mode) * or throw an error. */ if (i < num_part - 1) { if (read_is_valid(partition_size_ptr, 3, first_fragment_end)) { partition_size = read_partition_size(pbi, partition_size_ptr); } else if (pbi->ec_active) { partition_size = (unsigned int)bytes_left; } else { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated partition size data"); } } else { partition_size = (unsigned int)bytes_left; } /* Validate the calculated partition length. If the buffer * described by the partition can't be fully read, then restrict * it to the portion that can be (for EC mode) or throw an error. */ if (!read_is_valid(fragment_start, partition_size, fragment_end)) { if (pbi->ec_active) { partition_size = (unsigned int)bytes_left; } else { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition " "%d length", i + 1); } } return partition_size; } static void setup_token_decoder(VP8D_COMP *pbi, const unsigned char *token_part_sizes) { vp8_reader *bool_decoder = &pbi->mbc[0]; unsigned int partition_idx; unsigned int fragment_idx; unsigned int num_token_partitions; const unsigned char *first_fragment_end = pbi->fragments.ptrs[0] + pbi->fragments.sizes[0]; TOKEN_PARTITION multi_token_partition = (TOKEN_PARTITION)vp8_read_literal(&pbi->mbc[8], 2); if (!vp8dx_bool_error(&pbi->mbc[8])) { pbi->common.multi_token_partition = multi_token_partition; } num_token_partitions = 1 << pbi->common.multi_token_partition; /* Check for partitions within the fragments and unpack the fragments * so that each fragment pointer points to its corresponding partition. */ for (fragment_idx = 0; fragment_idx < pbi->fragments.count; ++fragment_idx) { unsigned int fragment_size = pbi->fragments.sizes[fragment_idx]; const unsigned char *fragment_end = pbi->fragments.ptrs[fragment_idx] + fragment_size; /* Special case for handling the first partition since we have already * read its size. */ if (fragment_idx == 0) { /* Size of first partition + token partition sizes element */ ptrdiff_t ext_first_part_size = token_part_sizes - pbi->fragments.ptrs[0] + 3 * (num_token_partitions - 1); if (fragment_size < (unsigned int)ext_first_part_size) vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, "Corrupted fragment size %d", fragment_size); fragment_size -= (unsigned int)ext_first_part_size; if (fragment_size > 0) { pbi->fragments.sizes[0] = (unsigned int)ext_first_part_size; /* The fragment contains an additional partition. Move to * next. */ fragment_idx++; pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[0] + pbi->fragments.sizes[0]; } } /* Split the chunk into partitions read from the bitstream */ while (fragment_size > 0) { ptrdiff_t partition_size = read_available_partition_size( pbi, token_part_sizes, pbi->fragments.ptrs[fragment_idx], first_fragment_end, fragment_end, fragment_idx - 1, num_token_partitions); pbi->fragments.sizes[fragment_idx] = (unsigned int)partition_size; if (fragment_size < (unsigned int)partition_size) vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, "Corrupted fragment size %d", fragment_size); fragment_size -= (unsigned int)partition_size; assert(fragment_idx <= num_token_partitions); if (fragment_size > 0) { /* The fragment contains an additional partition. * Move to next. */ fragment_idx++; pbi->fragments.ptrs[fragment_idx] = pbi->fragments.ptrs[fragment_idx - 1] + partition_size; } } } pbi->fragments.count = num_token_partitions + 1; for (partition_idx = 1; partition_idx < pbi->fragments.count; ++partition_idx) { if (vp8dx_start_decode(bool_decoder, pbi->fragments.ptrs[partition_idx], pbi->fragments.sizes[partition_idx], pbi->decrypt_cb, pbi->decrypt_state)) { vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", partition_idx); } bool_decoder++; } #if CONFIG_MULTITHREAD /* Clamp number of decoder threads */ if (pbi->decoding_thread_count > num_token_partitions - 1) { pbi->decoding_thread_count = num_token_partitions - 1; } if ((int)pbi->decoding_thread_count > pbi->common.mb_rows - 1) { assert(pbi->common.mb_rows > 0); pbi->decoding_thread_count = pbi->common.mb_rows - 1; } #endif } static void init_frame(VP8D_COMP *pbi) { VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; if (pc->frame_type == KEY_FRAME) { /* Various keyframe initializations */ memcpy(pc->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_init_mbmode_probs(pc); vp8_default_coef_probs(pc); /* reset the segment feature data to 0 with delta coding (Default state). */ memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); xd->mb_segement_abs_delta = SEGMENT_DELTADATA; /* reset the mode ref deltasa for loop filter */ memset(xd->ref_lf_deltas, 0, sizeof(xd->ref_lf_deltas)); memset(xd->mode_lf_deltas, 0, sizeof(xd->mode_lf_deltas)); /* All buffers are implicitly updated on key frames. */ pc->refresh_golden_frame = 1; pc->refresh_alt_ref_frame = 1; pc->copy_buffer_to_gf = 0; pc->copy_buffer_to_arf = 0; /* Note that Golden and Altref modes cannot be used on a key frame so * ref_frame_sign_bias[] is undefined and meaningless */ pc->ref_frame_sign_bias[GOLDEN_FRAME] = 0; pc->ref_frame_sign_bias[ALTREF_FRAME] = 0; } else { /* To enable choice of different interploation filters */ if (!pc->use_bilinear_mc_filter) { xd->subpixel_predict = vp8_sixtap_predict4x4; xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; } else { xd->subpixel_predict = vp8_bilinear_predict4x4; xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; } if (pbi->decoded_key_frame && pbi->ec_enabled && !pbi->ec_active) { pbi->ec_active = 1; } } xd->left_context = &pc->left_context; xd->mode_info_context = pc->mi; xd->frame_type = pc->frame_type; xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_stride = pc->mode_info_stride; xd->corrupted = 0; /* init without corruption */ xd->fullpixel_mask = 0xffffffff; if (pc->full_pixel) xd->fullpixel_mask = 0xfffffff8; } int vp8_decode_frame(VP8D_COMP *pbi) { vp8_reader *const bc = &pbi->mbc[8]; VP8_COMMON *const pc = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; const unsigned char *data = pbi->fragments.ptrs[0]; const unsigned int data_sz = pbi->fragments.sizes[0]; const unsigned char *data_end = data + data_sz; ptrdiff_t first_partition_length_in_bytes; int i, j, k, l; const int *const mb_feature_data_bits = vp8_mb_feature_data_bits; int corrupt_tokens = 0; int prev_independent_partitions = pbi->independent_partitions; YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; /* start with no corruption of current frame */ xd->corrupted = 0; yv12_fb_new->corrupted = 0; if (data_end - data < 3) { if (!pbi->ec_active) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } /* Declare the missing frame as an inter frame since it will be handled as an inter frame when we have estimated its motion vectors. */ pc->frame_type = INTER_FRAME; pc->version = 0; pc->show_frame = 1; first_partition_length_in_bytes = 0; } else { unsigned char clear_buffer[10]; const unsigned char *clear = data; if (pbi->decrypt_cb) { int n = (int)VPXMIN(sizeof(clear_buffer), data_sz); pbi->decrypt_cb(pbi->decrypt_state, data, clear_buffer, n); clear = clear_buffer; } pc->frame_type = (FRAME_TYPE)(clear[0] & 1); pc->version = (clear[0] >> 1) & 7; pc->show_frame = (clear[0] >> 4) & 1; first_partition_length_in_bytes = (clear[0] | (clear[1] << 8) | (clear[2] << 16)) >> 5; if (!pbi->ec_active && (data + first_partition_length_in_bytes > data_end || data + first_partition_length_in_bytes < data)) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition 0 length"); } data += 3; clear += 3; vp8_setup_version(pc); if (pc->frame_type == KEY_FRAME) { /* vet via sync code */ /* When error concealment is enabled we should only check the sync * code if we have enough bits available */ if (data + 3 < data_end) { if (clear[0] != 0x9d || clear[1] != 0x01 || clear[2] != 0x2a) { vpx_internal_error(&pc->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); } } /* If error concealment is enabled we should only parse the new size * if we have enough data. Otherwise we will end up with the wrong * size. */ if (data + 6 < data_end) { pc->Width = (clear[3] | (clear[4] << 8)) & 0x3fff; pc->horiz_scale = clear[4] >> 6; pc->Height = (clear[5] | (clear[6] << 8)) & 0x3fff; pc->vert_scale = clear[6] >> 6; data += 7; } else if (!pbi->ec_active) { vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Truncated key frame header"); } else { /* Error concealment is active, clear the frame. */ data = data_end; } } else { memcpy(&xd->pre, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); memcpy(&xd->dst, yv12_fb_new, sizeof(YV12_BUFFER_CONFIG)); } } if ((!pbi->decoded_key_frame && pc->frame_type != KEY_FRAME)) { return -1; } init_frame(pbi); if (vp8dx_start_decode(bc, data, (unsigned int)(data_end - data), pbi->decrypt_cb, pbi->decrypt_state)) { vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); } if (pc->frame_type == KEY_FRAME) { (void)vp8_read_bit(bc); // colorspace pc->clamp_type = (CLAMP_TYPE)vp8_read_bit(bc); } /* Is segmentation enabled */ xd->segmentation_enabled = (unsigned char)vp8_read_bit(bc); if (xd->segmentation_enabled) { /* Signal whether or not the segmentation map is being explicitly updated * this frame. */ xd->update_mb_segmentation_map = (unsigned char)vp8_read_bit(bc); xd->update_mb_segmentation_data = (unsigned char)vp8_read_bit(bc); if (xd->update_mb_segmentation_data) { xd->mb_segement_abs_delta = (unsigned char)vp8_read_bit(bc); memset(xd->segment_feature_data, 0, sizeof(xd->segment_feature_data)); /* For each segmentation feature (Quant and loop filter level) */ for (i = 0; i < MB_LVL_MAX; ++i) { for (j = 0; j < MAX_MB_SEGMENTS; ++j) { /* Frame level data */ if (vp8_read_bit(bc)) { xd->segment_feature_data[i][j] = (signed char)vp8_read_literal(bc, mb_feature_data_bits[i]); if (vp8_read_bit(bc)) { xd->segment_feature_data[i][j] = -xd->segment_feature_data[i][j]; } } else { xd->segment_feature_data[i][j] = 0; } } } } if (xd->update_mb_segmentation_map) { /* Which macro block level features are enabled */ memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); /* Read the probs used to decode the segment id for each macro block. */ for (i = 0; i < MB_FEATURE_TREE_PROBS; ++i) { /* If not explicitly set value is defaulted to 255 by memset above */ if (vp8_read_bit(bc)) { xd->mb_segment_tree_probs[i] = (vp8_prob)vp8_read_literal(bc, 8); } } } } else { /* No segmentation updates on this frame */ xd->update_mb_segmentation_map = 0; xd->update_mb_segmentation_data = 0; } /* Read the loop filter level and type */ pc->filter_type = (LOOPFILTERTYPE)vp8_read_bit(bc); pc->filter_level = vp8_read_literal(bc, 6); pc->sharpness_level = vp8_read_literal(bc, 3); /* Read in loop filter deltas applied at the MB level based on mode or ref * frame. */ xd->mode_ref_lf_delta_update = 0; xd->mode_ref_lf_delta_enabled = (unsigned char)vp8_read_bit(bc); if (xd->mode_ref_lf_delta_enabled) { /* Do the deltas need to be updated */ xd->mode_ref_lf_delta_update = (unsigned char)vp8_read_bit(bc); if (xd->mode_ref_lf_delta_update) { /* Send update */ for (i = 0; i < MAX_REF_LF_DELTAS; ++i) { if (vp8_read_bit(bc)) { /*sign = vp8_read_bit( bc );*/ xd->ref_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); if (vp8_read_bit(bc)) { /* Apply sign */ xd->ref_lf_deltas[i] = xd->ref_lf_deltas[i] * -1; } } } /* Send update */ for (i = 0; i < MAX_MODE_LF_DELTAS; ++i) { if (vp8_read_bit(bc)) { /*sign = vp8_read_bit( bc );*/ xd->mode_lf_deltas[i] = (signed char)vp8_read_literal(bc, 6); if (vp8_read_bit(bc)) { /* Apply sign */ xd->mode_lf_deltas[i] = xd->mode_lf_deltas[i] * -1; } } } } } setup_token_decoder(pbi, data + first_partition_length_in_bytes); xd->current_bc = &pbi->mbc[0]; /* Read the default quantizers. */ { int Q, q_update; Q = vp8_read_literal(bc, 7); /* AC 1st order Q = default */ pc->base_qindex = Q; q_update = 0; pc->y1dc_delta_q = get_delta_q(bc, pc->y1dc_delta_q, &q_update); pc->y2dc_delta_q = get_delta_q(bc, pc->y2dc_delta_q, &q_update); pc->y2ac_delta_q = get_delta_q(bc, pc->y2ac_delta_q, &q_update); pc->uvdc_delta_q = get_delta_q(bc, pc->uvdc_delta_q, &q_update); pc->uvac_delta_q = get_delta_q(bc, pc->uvac_delta_q, &q_update); if (q_update) vp8cx_init_de_quantizer(pbi); /* MB level dequantizer setup */ vp8_mb_init_dequantizer(pbi, &pbi->mb); } /* Determine if the golden frame or ARF buffer should be updated and how. * For all non key frames the GF and ARF refresh flags and sign bias * flags must be set explicitly. */ if (pc->frame_type != KEY_FRAME) { /* Should the GF or ARF be updated from the current frame */ pc->refresh_golden_frame = vp8_read_bit(bc); #if CONFIG_ERROR_CONCEALMENT /* Assume we shouldn't refresh golden if the bit is missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->refresh_golden_frame = 0; #endif pc->refresh_alt_ref_frame = vp8_read_bit(bc); #if CONFIG_ERROR_CONCEALMENT /* Assume we shouldn't refresh altref if the bit is missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->refresh_alt_ref_frame = 0; #endif /* Buffer to buffer copy flags. */ pc->copy_buffer_to_gf = 0; if (!pc->refresh_golden_frame) { pc->copy_buffer_to_gf = vp8_read_literal(bc, 2); } #if CONFIG_ERROR_CONCEALMENT /* Assume we shouldn't copy to the golden if the bit is missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->copy_buffer_to_gf = 0; #endif pc->copy_buffer_to_arf = 0; if (!pc->refresh_alt_ref_frame) { pc->copy_buffer_to_arf = vp8_read_literal(bc, 2); } #if CONFIG_ERROR_CONCEALMENT /* Assume we shouldn't copy to the alt-ref if the bit is missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->copy_buffer_to_arf = 0; #endif pc->ref_frame_sign_bias[GOLDEN_FRAME] = vp8_read_bit(bc); pc->ref_frame_sign_bias[ALTREF_FRAME] = vp8_read_bit(bc); } pc->refresh_entropy_probs = vp8_read_bit(bc); #if CONFIG_ERROR_CONCEALMENT /* Assume we shouldn't refresh the probabilities if the bit is * missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->refresh_entropy_probs = 0; #endif if (pc->refresh_entropy_probs == 0) { memcpy(&pc->lfc, &pc->fc, sizeof(pc->fc)); } pc->refresh_last_frame = pc->frame_type == KEY_FRAME || vp8_read_bit(bc); #if CONFIG_ERROR_CONCEALMENT /* Assume we should refresh the last frame if the bit is missing */ xd->corrupted |= vp8dx_bool_error(bc); if (pbi->ec_active && xd->corrupted) pc->refresh_last_frame = 1; #endif if (0) { FILE *z = fopen("decodestats.stt", "a"); fprintf(z, "%6d F:%d,G:%d,A:%d,L:%d,Q:%d\n", pc->current_video_frame, pc->frame_type, pc->refresh_golden_frame, pc->refresh_alt_ref_frame, pc->refresh_last_frame, pc->base_qindex); fclose(z); } { pbi->independent_partitions = 1; /* read coef probability tree */ for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { for (l = 0; l < ENTROPY_NODES; ++l) { vp8_prob *const p = pc->fc.coef_probs[i][j][k] + l; if (vp8_read(bc, vp8_coef_update_probs[i][j][k][l])) { *p = (vp8_prob)vp8_read_literal(bc, 8); } if (k > 0 && *p != pc->fc.coef_probs[i][j][k - 1][l]) { pbi->independent_partitions = 0; } } } } } } /* clear out the coeff buffer */ memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); vp8_decode_mode_mvs(pbi); #if CONFIG_ERROR_CONCEALMENT if (pbi->ec_active && pbi->mvs_corrupt_from_mb < (unsigned int)pc->mb_cols * pc->mb_rows) { /* Motion vectors are missing in this frame. We will try to estimate * them and then continue decoding the frame as usual */ vp8_estimate_missing_mvs(pbi); } #endif memset(pc->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * pc->mb_cols); pbi->frame_corrupt_residual = 0; #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) && pc->multi_token_partition != ONE_PARTITION) { unsigned int thread; if (vp8mt_decode_mb_rows(pbi, xd)) { vp8_decoder_remove_threads(pbi); pbi->restart_threads = 1; vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, NULL); } vp8_yv12_extend_frame_borders(yv12_fb_new); for (thread = 0; thread < pbi->decoding_thread_count; ++thread) { corrupt_tokens |= pbi->mb_row_di[thread].mbd.corrupted; } } else #endif { decode_mb_rows(pbi); corrupt_tokens |= xd->corrupted; } /* Collect information about decoder corruption. */ /* 1. Check first boolean decoder for errors. */ yv12_fb_new->corrupted = vp8dx_bool_error(bc); /* 2. Check the macroblock information */ yv12_fb_new->corrupted |= corrupt_tokens; if (!pbi->decoded_key_frame) { if (pc->frame_type == KEY_FRAME && !yv12_fb_new->corrupted) { pbi->decoded_key_frame = 1; } else { vpx_internal_error(&pbi->common.error, VPX_CODEC_CORRUPT_FRAME, "A stream must start with a complete key frame"); } } /* vpx_log("Decoder: Frame Decoded, Size Roughly:%d bytes * \n",bc->pos+pbi->bc2.pos); */ if (pc->refresh_entropy_probs == 0) { memcpy(&pc->fc, &pc->lfc, sizeof(pc->fc)); pbi->independent_partitions = prev_independent_partitions; } #ifdef PACKET_TESTING { FILE *f = fopen("decompressor.VP8", "ab"); unsigned int size = pbi->bc2.pos + pbi->bc.pos + 8; fwrite((void *)&size, 4, 1, f); fwrite((void *)pbi->Source, size, 1, f); fclose(f); } #endif return 0; } libvpx-1.8.2/vp8/decoder/decodemv.c000066400000000000000000000422501357355204000171110ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "decodemv.h" #include "treereader.h" #include "vp8/common/entropymv.h" #include "vp8/common/entropymode.h" #include "onyxd_int.h" #include "vp8/common/findnearmv.h" static B_PREDICTION_MODE read_bmode(vp8_reader *bc, const vp8_prob *p) { const int i = vp8_treed_read(bc, vp8_bmode_tree, p); return (B_PREDICTION_MODE)i; } static MB_PREDICTION_MODE read_ymode(vp8_reader *bc, const vp8_prob *p) { const int i = vp8_treed_read(bc, vp8_ymode_tree, p); return (MB_PREDICTION_MODE)i; } static MB_PREDICTION_MODE read_kf_ymode(vp8_reader *bc, const vp8_prob *p) { const int i = vp8_treed_read(bc, vp8_kf_ymode_tree, p); return (MB_PREDICTION_MODE)i; } static MB_PREDICTION_MODE read_uv_mode(vp8_reader *bc, const vp8_prob *p) { const int i = vp8_treed_read(bc, vp8_uv_mode_tree, p); return (MB_PREDICTION_MODE)i; } static void read_kf_modes(VP8D_COMP *pbi, MODE_INFO *mi) { vp8_reader *const bc = &pbi->mbc[8]; const int mis = pbi->common.mode_info_stride; mi->mbmi.ref_frame = INTRA_FRAME; mi->mbmi.mode = read_kf_ymode(bc, vp8_kf_ymode_prob); if (mi->mbmi.mode == B_PRED) { int i = 0; mi->mbmi.is_4x4 = 1; do { const B_PREDICTION_MODE A = above_block_mode(mi, i, mis); const B_PREDICTION_MODE L = left_block_mode(mi, i); mi->bmi[i].as_mode = read_bmode(bc, vp8_kf_bmode_prob[A][L]); } while (++i < 16); } mi->mbmi.uv_mode = read_uv_mode(bc, vp8_kf_uv_mode_prob); } static int read_mvcomponent(vp8_reader *r, const MV_CONTEXT *mvc) { const vp8_prob *const p = (const vp8_prob *)mvc; int x = 0; if (vp8_read(r, p[mvpis_short])) { /* Large */ int i = 0; do { x += vp8_read(r, p[MVPbits + i]) << i; } while (++i < 3); i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ do { x += vp8_read(r, p[MVPbits + i]) << i; } while (--i > 3); if (!(x & 0xFFF0) || vp8_read(r, p[MVPbits + 3])) x += 8; } else { /* small */ x = vp8_treed_read(r, vp8_small_mvtree, p + MVPshort); } if (x && vp8_read(r, p[MVPsign])) x = -x; return x; } static void read_mv(vp8_reader *r, MV *mv, const MV_CONTEXT *mvc) { mv->row = (short)(read_mvcomponent(r, mvc) * 2); mv->col = (short)(read_mvcomponent(r, ++mvc) * 2); } static void read_mvcontexts(vp8_reader *bc, MV_CONTEXT *mvc) { int i = 0; do { const vp8_prob *up = vp8_mv_update_probs[i].prob; vp8_prob *p = (vp8_prob *)(mvc + i); vp8_prob *const pstop = p + MVPcount; do { if (vp8_read(bc, *up++)) { const vp8_prob x = (vp8_prob)vp8_read_literal(bc, 7); *p = x ? x << 1 : 1; } } while (++p < pstop); } while (++i < 2); } static const unsigned char mbsplit_fill_count[4] = { 8, 8, 4, 1 }; static const unsigned char mbsplit_fill_offset[4][16] = { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }, { 0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15 }, { 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15 }, { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 } }; static void mb_mode_mv_init(VP8D_COMP *pbi) { vp8_reader *const bc = &pbi->mbc[8]; MV_CONTEXT *const mvc = pbi->common.fc.mvc; #if CONFIG_ERROR_CONCEALMENT /* Default is that no macroblock is corrupt, therefore we initialize * mvs_corrupt_from_mb to something very big, which we can be sure is * outside the frame. */ pbi->mvs_corrupt_from_mb = UINT_MAX; #endif /* Read the mb_no_coeff_skip flag */ pbi->common.mb_no_coeff_skip = (int)vp8_read_bit(bc); pbi->prob_skip_false = 0; if (pbi->common.mb_no_coeff_skip) { pbi->prob_skip_false = (vp8_prob)vp8_read_literal(bc, 8); } if (pbi->common.frame_type != KEY_FRAME) { pbi->prob_intra = (vp8_prob)vp8_read_literal(bc, 8); pbi->prob_last = (vp8_prob)vp8_read_literal(bc, 8); pbi->prob_gf = (vp8_prob)vp8_read_literal(bc, 8); if (vp8_read_bit(bc)) { int i = 0; do { pbi->common.fc.ymode_prob[i] = (vp8_prob)vp8_read_literal(bc, 8); } while (++i < 4); } if (vp8_read_bit(bc)) { int i = 0; do { pbi->common.fc.uv_mode_prob[i] = (vp8_prob)vp8_read_literal(bc, 8); } while (++i < 3); } read_mvcontexts(bc, mvc); } } const vp8_prob vp8_sub_mv_ref_prob3[8][VP8_SUBMVREFS - 1] = { { 147, 136, 18 }, /* SUBMVREF_NORMAL */ { 223, 1, 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ { 106, 145, 1 }, /* SUBMVREF_LEFT_ZED */ { 208, 1, 1 }, /* SUBMVREF_LEFT_ABOVE_ZED */ { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ { 223, 1, 34 }, /* SUBMVREF_LEFT_ABOVE_SAME */ { 179, 121, 1 }, /* SUBMVREF_ABOVE_ZED */ { 208, 1, 1 } /* SUBMVREF_LEFT_ABOVE_ZED */ }; static const vp8_prob *get_sub_mv_ref_prob(const int left, const int above) { int lez = (left == 0); int aez = (above == 0); int lea = (left == above); const vp8_prob *prob; prob = vp8_sub_mv_ref_prob3[(aez << 2) | (lez << 1) | (lea)]; return prob; } static void decode_split_mv(vp8_reader *const bc, MODE_INFO *mi, const MODE_INFO *left_mb, const MODE_INFO *above_mb, MB_MODE_INFO *mbmi, int_mv best_mv, MV_CONTEXT *const mvc, int mb_to_left_edge, int mb_to_right_edge, int mb_to_top_edge, int mb_to_bottom_edge) { int s; /* split configuration (16x8, 8x16, 8x8, 4x4) */ /* number of partitions in the split configuration (see vp8_mbsplit_count) */ int num_p; int j = 0; s = 3; num_p = 16; if (vp8_read(bc, 110)) { s = 2; num_p = 4; if (vp8_read(bc, 111)) { s = vp8_read(bc, 150); num_p = 2; } } do /* for each subset j */ { int_mv leftmv, abovemv; int_mv blockmv; int k; /* first block in subset j */ const vp8_prob *prob; k = vp8_mbsplit_offset[s][j]; if (!(k & 3)) { /* On L edge, get from MB to left of us */ if (left_mb->mbmi.mode != SPLITMV) { leftmv.as_int = left_mb->mbmi.mv.as_int; } else { leftmv.as_int = (left_mb->bmi + k + 4 - 1)->mv.as_int; } } else { leftmv.as_int = (mi->bmi + k - 1)->mv.as_int; } if (!(k >> 2)) { /* On top edge, get from MB above us */ if (above_mb->mbmi.mode != SPLITMV) { abovemv.as_int = above_mb->mbmi.mv.as_int; } else { abovemv.as_int = (above_mb->bmi + k + 16 - 4)->mv.as_int; } } else { abovemv.as_int = (mi->bmi + k - 4)->mv.as_int; } prob = get_sub_mv_ref_prob(leftmv.as_int, abovemv.as_int); if (vp8_read(bc, prob[0])) { if (vp8_read(bc, prob[1])) { blockmv.as_int = 0; if (vp8_read(bc, prob[2])) { blockmv.as_mv.row = read_mvcomponent(bc, &mvc[0]) * 2; blockmv.as_mv.row += best_mv.as_mv.row; blockmv.as_mv.col = read_mvcomponent(bc, &mvc[1]) * 2; blockmv.as_mv.col += best_mv.as_mv.col; } } else { blockmv.as_int = abovemv.as_int; } } else { blockmv.as_int = leftmv.as_int; } mbmi->need_to_clamp_mvs |= vp8_check_mv_bounds(&blockmv, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); { /* Fill (uniform) modes, mvs of jth subset. Must do it here because ensuing subsets can refer back to us via "left" or "above". */ const unsigned char *fill_offset; unsigned int fill_count = mbsplit_fill_count[s]; fill_offset = &mbsplit_fill_offset[s][(unsigned char)j * mbsplit_fill_count[s]]; do { mi->bmi[*fill_offset].mv.as_int = blockmv.as_int; fill_offset++; } while (--fill_count); } } while (++j < num_p); mbmi->partitioning = s; } static void read_mb_modes_mv(VP8D_COMP *pbi, MODE_INFO *mi, MB_MODE_INFO *mbmi) { vp8_reader *const bc = &pbi->mbc[8]; mbmi->ref_frame = (MV_REFERENCE_FRAME)vp8_read(bc, pbi->prob_intra); if (mbmi->ref_frame) { /* inter MB */ enum { CNT_INTRA, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; int cnt[4]; int *cntx = cnt; int_mv near_mvs[4]; int_mv *nmv = near_mvs; const int mis = pbi->mb.mode_info_stride; const MODE_INFO *above = mi - mis; const MODE_INFO *left = mi - 1; const MODE_INFO *aboveleft = above - 1; int *ref_frame_sign_bias = pbi->common.ref_frame_sign_bias; mbmi->need_to_clamp_mvs = 0; if (vp8_read(bc, pbi->prob_last)) { mbmi->ref_frame = (MV_REFERENCE_FRAME)((int)(2 + vp8_read(bc, pbi->prob_gf))); } /* Zero accumulators */ nmv[0].as_int = nmv[1].as_int = nmv[2].as_int = 0; cnt[0] = cnt[1] = cnt[2] = cnt[3] = 0; /* Process above */ if (above->mbmi.ref_frame != INTRA_FRAME) { if (above->mbmi.mv.as_int) { (++nmv)->as_int = above->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], mbmi->ref_frame, nmv, ref_frame_sign_bias); ++cntx; } *cntx += 2; } /* Process left */ if (left->mbmi.ref_frame != INTRA_FRAME) { if (left->mbmi.mv.as_int) { int_mv this_mv; this_mv.as_int = left->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], mbmi->ref_frame, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != nmv->as_int) { (++nmv)->as_int = this_mv.as_int; ++cntx; } *cntx += 2; } else { cnt[CNT_INTRA] += 2; } } /* Process above left */ if (aboveleft->mbmi.ref_frame != INTRA_FRAME) { if (aboveleft->mbmi.mv.as_int) { int_mv this_mv; this_mv.as_int = aboveleft->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], mbmi->ref_frame, &this_mv, ref_frame_sign_bias); if (this_mv.as_int != nmv->as_int) { (++nmv)->as_int = this_mv.as_int; ++cntx; } *cntx += 1; } else { cnt[CNT_INTRA] += 1; } } if (vp8_read(bc, vp8_mode_contexts[cnt[CNT_INTRA]][0])) { /* If we have three distinct MV's ... */ /* See if above-left MV can be merged with NEAREST */ cnt[CNT_NEAREST] += ((cnt[CNT_SPLITMV] > 0) & (nmv->as_int == near_mvs[CNT_NEAREST].as_int)); /* Swap near and nearest if necessary */ if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { int tmp; tmp = cnt[CNT_NEAREST]; cnt[CNT_NEAREST] = cnt[CNT_NEAR]; cnt[CNT_NEAR] = tmp; tmp = near_mvs[CNT_NEAREST].as_int; near_mvs[CNT_NEAREST].as_int = near_mvs[CNT_NEAR].as_int; near_mvs[CNT_NEAR].as_int = tmp; } if (vp8_read(bc, vp8_mode_contexts[cnt[CNT_NEAREST]][1])) { if (vp8_read(bc, vp8_mode_contexts[cnt[CNT_NEAR]][2])) { int mb_to_top_edge; int mb_to_bottom_edge; int mb_to_left_edge; int mb_to_right_edge; MV_CONTEXT *const mvc = pbi->common.fc.mvc; int near_index; mb_to_top_edge = pbi->mb.mb_to_top_edge; mb_to_bottom_edge = pbi->mb.mb_to_bottom_edge; mb_to_top_edge -= LEFT_TOP_MARGIN; mb_to_bottom_edge += RIGHT_BOTTOM_MARGIN; mb_to_right_edge = pbi->mb.mb_to_right_edge; mb_to_right_edge += RIGHT_BOTTOM_MARGIN; mb_to_left_edge = pbi->mb.mb_to_left_edge; mb_to_left_edge -= LEFT_TOP_MARGIN; /* Use near_mvs[0] to store the "best" MV */ near_index = CNT_INTRA + (cnt[CNT_NEAREST] >= cnt[CNT_INTRA]); vp8_clamp_mv2(&near_mvs[near_index], &pbi->mb); cnt[CNT_SPLITMV] = ((above->mbmi.mode == SPLITMV) + (left->mbmi.mode == SPLITMV)) * 2 + (aboveleft->mbmi.mode == SPLITMV); if (vp8_read(bc, vp8_mode_contexts[cnt[CNT_SPLITMV]][3])) { decode_split_mv(bc, mi, left, above, mbmi, near_mvs[near_index], mvc, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); mbmi->mv.as_int = mi->bmi[15].mv.as_int; mbmi->mode = SPLITMV; mbmi->is_4x4 = 1; } else { int_mv *const mbmi_mv = &mbmi->mv; read_mv(bc, &mbmi_mv->as_mv, (const MV_CONTEXT *)mvc); mbmi_mv->as_mv.row += near_mvs[near_index].as_mv.row; mbmi_mv->as_mv.col += near_mvs[near_index].as_mv.col; /* Don't need to check this on NEARMV and NEARESTMV * modes since those modes clamp the MV. The NEWMV mode * does not, so signal to the prediction stage whether * special handling may be required. */ mbmi->need_to_clamp_mvs = vp8_check_mv_bounds(mbmi_mv, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); mbmi->mode = NEWMV; } } else { mbmi->mode = NEARMV; mbmi->mv.as_int = near_mvs[CNT_NEAR].as_int; vp8_clamp_mv2(&mbmi->mv, &pbi->mb); } } else { mbmi->mode = NEARESTMV; mbmi->mv.as_int = near_mvs[CNT_NEAREST].as_int; vp8_clamp_mv2(&mbmi->mv, &pbi->mb); } } else { mbmi->mode = ZEROMV; mbmi->mv.as_int = 0; } #if CONFIG_ERROR_CONCEALMENT if (pbi->ec_enabled && (mbmi->mode != SPLITMV)) { mi->bmi[0].mv.as_int = mi->bmi[1].mv.as_int = mi->bmi[2].mv.as_int = mi->bmi[3].mv.as_int = mi->bmi[4].mv.as_int = mi->bmi[5].mv.as_int = mi->bmi[6].mv.as_int = mi->bmi[7].mv.as_int = mi->bmi[8].mv.as_int = mi->bmi[9].mv.as_int = mi->bmi[10].mv.as_int = mi->bmi[11].mv.as_int = mi->bmi[12].mv.as_int = mi->bmi[13].mv.as_int = mi->bmi[14].mv.as_int = mi->bmi[15].mv.as_int = mbmi->mv.as_int; } #endif } else { /* required for left and above block mv */ mbmi->mv.as_int = 0; /* MB is intra coded */ if ((mbmi->mode = read_ymode(bc, pbi->common.fc.ymode_prob)) == B_PRED) { int j = 0; mbmi->is_4x4 = 1; do { mi->bmi[j].as_mode = read_bmode(bc, pbi->common.fc.bmode_prob); } while (++j < 16); } mbmi->uv_mode = read_uv_mode(bc, pbi->common.fc.uv_mode_prob); } } static void read_mb_features(vp8_reader *r, MB_MODE_INFO *mi, MACROBLOCKD *x) { /* Is segmentation enabled */ if (x->segmentation_enabled && x->update_mb_segmentation_map) { /* If so then read the segment id. */ if (vp8_read(r, x->mb_segment_tree_probs[0])) { mi->segment_id = (unsigned char)(2 + vp8_read(r, x->mb_segment_tree_probs[2])); } else { mi->segment_id = (unsigned char)(vp8_read(r, x->mb_segment_tree_probs[1])); } } } static void decode_mb_mode_mvs(VP8D_COMP *pbi, MODE_INFO *mi) { /* Read the Macroblock segmentation map if it is being updated explicitly * this frame (reset to 0 above by default) * By default on a key frame reset all MBs to segment 0 */ if (pbi->mb.update_mb_segmentation_map) { read_mb_features(&pbi->mbc[8], &mi->mbmi, &pbi->mb); } else if (pbi->common.frame_type == KEY_FRAME) { mi->mbmi.segment_id = 0; } /* Read the macroblock coeff skip flag if this feature is in use, * else default to 0 */ if (pbi->common.mb_no_coeff_skip) { mi->mbmi.mb_skip_coeff = vp8_read(&pbi->mbc[8], pbi->prob_skip_false); } else { mi->mbmi.mb_skip_coeff = 0; } mi->mbmi.is_4x4 = 0; if (pbi->common.frame_type == KEY_FRAME) { read_kf_modes(pbi, mi); } else { read_mb_modes_mv(pbi, mi, &mi->mbmi); } } void vp8_decode_mode_mvs(VP8D_COMP *pbi) { MODE_INFO *mi = pbi->common.mi; int mb_row = -1; int mb_to_right_edge_start; mb_mode_mv_init(pbi); pbi->mb.mb_to_top_edge = 0; pbi->mb.mb_to_bottom_edge = ((pbi->common.mb_rows - 1) * 16) << 3; mb_to_right_edge_start = ((pbi->common.mb_cols - 1) * 16) << 3; while (++mb_row < pbi->common.mb_rows) { int mb_col = -1; pbi->mb.mb_to_left_edge = 0; pbi->mb.mb_to_right_edge = mb_to_right_edge_start; while (++mb_col < pbi->common.mb_cols) { #if CONFIG_ERROR_CONCEALMENT int mb_num = mb_row * pbi->common.mb_cols + mb_col; #endif decode_mb_mode_mvs(pbi, mi); #if CONFIG_ERROR_CONCEALMENT /* look for corruption. set mvs_corrupt_from_mb to the current * mb_num if the frame is corrupt from this macroblock. */ if (vp8dx_bool_error(&pbi->mbc[8]) && mb_num < (int)pbi->mvs_corrupt_from_mb) { pbi->mvs_corrupt_from_mb = mb_num; /* no need to continue since the partition is corrupt from * here on. */ return; } #endif pbi->mb.mb_to_left_edge -= (16 << 3); pbi->mb.mb_to_right_edge -= (16 << 3); mi++; /* next macroblock */ } pbi->mb.mb_to_top_edge -= (16 << 3); pbi->mb.mb_to_bottom_edge -= (16 << 3); mi++; /* skip left predictor each row */ } } libvpx-1.8.2/vp8/decoder/decodemv.h000066400000000000000000000012401357355204000171100ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_DECODEMV_H_ #define VPX_VP8_DECODER_DECODEMV_H_ #include "onyxd_int.h" #ifdef __cplusplus extern "C" { #endif void vp8_decode_mode_mvs(VP8D_COMP *); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_DECODEMV_H_ libvpx-1.8.2/vp8/decoder/decoderthreading.h000066400000000000000000000016771357355204000206330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_DECODERTHREADING_H_ #define VPX_VP8_DECODER_DECODERTHREADING_H_ #ifdef __cplusplus extern "C" { #endif #if CONFIG_MULTITHREAD int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd); void vp8_decoder_remove_threads(VP8D_COMP *pbi); void vp8_decoder_create_threads(VP8D_COMP *pbi); void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows); void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_DECODERTHREADING_H_ libvpx-1.8.2/vp8/decoder/detokenize.c000066400000000000000000000127221357355204000174650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/blockd.h" #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "detokenize.h" void vp8_reset_mb_tokens_context(MACROBLOCKD *x) { ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); memset(a_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); memset(l_ctx, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); /* Clear entropy contexts for Y2 blocks */ if (!x->mode_info_context->mbmi.is_4x4) { a_ctx[8] = l_ctx[8] = 0; } } /* ------------------------------------------------------------------------------ Residual decoding (Paragraph 13.2 / 13.3) */ static const uint8_t kBands[16 + 1] = { 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7, 0 /* extra entry as sentinel */ }; static const uint8_t kCat3[] = { 173, 148, 140, 0 }; static const uint8_t kCat4[] = { 176, 155, 140, 135, 0 }; static const uint8_t kCat5[] = { 180, 157, 141, 134, 130, 0 }; static const uint8_t kCat6[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; static const uint8_t *const kCat3456[] = { kCat3, kCat4, kCat5, kCat6 }; static const uint8_t kZigzag[16] = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; #define VP8GetBit vp8dx_decode_bool #define NUM_PROBAS 11 #define NUM_CTX 3 /* for const-casting */ typedef const uint8_t (*ProbaArray)[NUM_CTX][NUM_PROBAS]; static int GetSigned(BOOL_DECODER *br, int value_to_sign) { int split = (br->range + 1) >> 1; VP8_BD_VALUE bigsplit = (VP8_BD_VALUE)split << (VP8_BD_VALUE_SIZE - 8); int v; if (br->count < 0) vp8dx_bool_decoder_fill(br); if (br->value < bigsplit) { br->range = split; v = value_to_sign; } else { br->range = br->range - split; br->value = br->value - bigsplit; v = -value_to_sign; } br->range += br->range; br->value += br->value; br->count--; return v; } /* Returns the position of the last non-zero coeff plus one (and 0 if there's no coeff at all) */ static int GetCoeffs(BOOL_DECODER *br, ProbaArray prob, int ctx, int n, int16_t *out) { const uint8_t *p = prob[n][ctx]; if (!VP8GetBit(br, p[0])) { /* first EOB is more a 'CBP' bit. */ return 0; } while (1) { ++n; if (!VP8GetBit(br, p[1])) { p = prob[kBands[n]][0]; } else { /* non zero coeff */ int v, j; if (!VP8GetBit(br, p[2])) { p = prob[kBands[n]][1]; v = 1; } else { if (!VP8GetBit(br, p[3])) { if (!VP8GetBit(br, p[4])) { v = 2; } else { v = 3 + VP8GetBit(br, p[5]); } } else { if (!VP8GetBit(br, p[6])) { if (!VP8GetBit(br, p[7])) { v = 5 + VP8GetBit(br, 159); } else { v = 7 + 2 * VP8GetBit(br, 165); v += VP8GetBit(br, 145); } } else { const uint8_t *tab; const int bit1 = VP8GetBit(br, p[8]); const int bit0 = VP8GetBit(br, p[9 + bit1]); const int cat = 2 * bit1 + bit0; v = 0; for (tab = kCat3456[cat]; *tab; ++tab) { v += v + VP8GetBit(br, *tab); } v += 3 + (8 << cat); } } p = prob[kBands[n]][2]; } j = kZigzag[n - 1]; out[j] = GetSigned(br, v); if (n == 16 || !VP8GetBit(br, p[0])) { /* EOB */ return n; } } if (n == 16) { return 16; } } } int vp8_decode_mb_tokens(VP8D_COMP *dx, MACROBLOCKD *x) { BOOL_DECODER *bc = x->current_bc; const FRAME_CONTEXT *const fc = &dx->common.fc; char *eobs = x->eobs; int i; int nonzeros; int eobtotal = 0; short *qcoeff_ptr; ProbaArray coef_probs; ENTROPY_CONTEXT *a_ctx = ((ENTROPY_CONTEXT *)x->above_context); ENTROPY_CONTEXT *l_ctx = ((ENTROPY_CONTEXT *)x->left_context); ENTROPY_CONTEXT *a; ENTROPY_CONTEXT *l; int skip_dc = 0; qcoeff_ptr = &x->qcoeff[0]; if (!x->mode_info_context->mbmi.is_4x4) { a = a_ctx + 8; l = l_ctx + 8; coef_probs = fc->coef_probs[1]; nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr + 24 * 16); *a = *l = (nonzeros > 0); eobs[24] = nonzeros; eobtotal += nonzeros - 16; coef_probs = fc->coef_probs[0]; skip_dc = 1; } else { coef_probs = fc->coef_probs[3]; skip_dc = 0; } for (i = 0; i < 16; ++i) { a = a_ctx + (i & 3); l = l_ctx + ((i & 0xc) >> 2); nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), skip_dc, qcoeff_ptr); *a = *l = (nonzeros > 0); nonzeros += skip_dc; eobs[i] = nonzeros; eobtotal += nonzeros; qcoeff_ptr += 16; } coef_probs = fc->coef_probs[2]; a_ctx += 4; l_ctx += 4; for (i = 16; i < 24; ++i) { a = a_ctx + ((i > 19) << 1) + (i & 1); l = l_ctx + ((i > 19) << 1) + ((i & 3) > 1); nonzeros = GetCoeffs(bc, coef_probs, (*a + *l), 0, qcoeff_ptr); *a = *l = (nonzeros > 0); eobs[i] = nonzeros; eobtotal += nonzeros; qcoeff_ptr += 16; } return eobtotal; } libvpx-1.8.2/vp8/decoder/detokenize.h000066400000000000000000000013471357355204000174730ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_DETOKENIZE_H_ #define VPX_VP8_DECODER_DETOKENIZE_H_ #include "onyxd_int.h" #ifdef __cplusplus extern "C" { #endif void vp8_reset_mb_tokens_context(MACROBLOCKD *x); int vp8_decode_mb_tokens(VP8D_COMP *, MACROBLOCKD *); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_DETOKENIZE_H_ libvpx-1.8.2/vp8/decoder/ec_types.h000066400000000000000000000026061357355204000171440ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_EC_TYPES_H_ #define VPX_VP8_DECODER_EC_TYPES_H_ #ifdef __cplusplus extern "C" { #endif #define MAX_OVERLAPS 16 /* The area (pixel area in Q6) the block pointed to by bmi overlaps * another block with. */ typedef struct { int overlap; union b_mode_info *bmi; } OVERLAP_NODE; /* Structure to keep track of overlapping blocks on a block level. */ typedef struct { /* TODO(holmer): This array should be exchanged for a linked list */ OVERLAP_NODE overlaps[MAX_OVERLAPS]; } B_OVERLAP; /* Structure used to hold all the overlaps of a macroblock. The overlaps of a * macroblock is further divided into block overlaps. */ typedef struct { B_OVERLAP overlaps[16]; } MB_OVERLAP; /* Structure for keeping track of motion vectors and which reference frame they * refer to. Used for motion vector interpolation. */ typedef struct { MV mv; MV_REFERENCE_FRAME ref_frame; } EC_BLOCK; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_EC_TYPES_H_ libvpx-1.8.2/vp8/decoder/error_concealment.c000066400000000000000000000427751357355204000210400ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "error_concealment.h" #include "onyxd_int.h" #include "decodemv.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/findnearmv.h" #include "vp8/common/common.h" #include "vpx_dsp/vpx_dsp_common.h" #define FLOOR(x, q) ((x) & -(1 << (q))) #define NUM_NEIGHBORS 20 typedef struct ec_position { int row; int col; } EC_POS; /* * Regenerate the table in Matlab with: * x = meshgrid((1:4), (1:4)); * y = meshgrid((1:4), (1:4))'; * W = round((1./(sqrt(x.^2 + y.^2))*2^7)); * W(1,1) = 0; */ static const int weights_q7[5][5] = { { 0, 128, 64, 43, 32 }, { 128, 91, 57, 40, 31 }, { 64, 57, 45, 36, 29 }, { 43, 40, 36, 30, 26 }, { 32, 31, 29, 26, 23 } }; int vp8_alloc_overlap_lists(VP8D_COMP *pbi) { if (pbi->overlaps != NULL) { vpx_free(pbi->overlaps); pbi->overlaps = NULL; } pbi->overlaps = vpx_calloc(pbi->common.mb_rows * pbi->common.mb_cols, sizeof(MB_OVERLAP)); if (pbi->overlaps == NULL) return -1; return 0; } void vp8_de_alloc_overlap_lists(VP8D_COMP *pbi) { vpx_free(pbi->overlaps); pbi->overlaps = NULL; } /* Inserts a new overlap area value to the list of overlaps of a block */ static void assign_overlap(OVERLAP_NODE *overlaps, union b_mode_info *bmi, int overlap) { int i; if (overlap <= 0) return; /* Find and assign to the next empty overlap node in the list of overlaps. * Empty is defined as bmi == NULL */ for (i = 0; i < MAX_OVERLAPS; ++i) { if (overlaps[i].bmi == NULL) { overlaps[i].bmi = bmi; overlaps[i].overlap = overlap; break; } } } /* Calculates the overlap area between two 4x4 squares, where the first * square has its upper-left corner at (b1_row, b1_col) and the second * square has its upper-left corner at (b2_row, b2_col). Doesn't * properly handle squares which do not overlap. */ static int block_overlap(int b1_row, int b1_col, int b2_row, int b2_col) { const int int_top = VPXMAX(b1_row, b2_row); // top const int int_left = VPXMAX(b1_col, b2_col); // left /* Since each block is 4x4 pixels, adding 4 (Q3) to the left/top edge * gives us the right/bottom edge. */ const int int_right = VPXMIN(b1_col + (4 << 3), b2_col + (4 << 3)); // right const int int_bottom = VPXMIN(b1_row + (4 << 3), b2_row + (4 << 3)); // bottom return (int_bottom - int_top) * (int_right - int_left); } /* Calculates the overlap area for all blocks in a macroblock at position * (mb_row, mb_col) in macroblocks, which are being overlapped by a given * overlapping block at position (new_row, new_col) (in pixels, Q3). The * first block being overlapped in the macroblock has position (first_blk_row, * first_blk_col) in blocks relative the upper-left corner of the image. */ static void calculate_overlaps_mb(B_OVERLAP *b_overlaps, union b_mode_info *bmi, int new_row, int new_col, int mb_row, int mb_col, int first_blk_row, int first_blk_col) { /* Find the blocks within this MB (defined by mb_row, mb_col) which are * overlapped by bmi and calculate and assign overlap for each of those * blocks. */ /* Block coordinates relative the upper-left block */ const int rel_ol_blk_row = first_blk_row - mb_row * 4; const int rel_ol_blk_col = first_blk_col - mb_col * 4; /* If the block partly overlaps any previous MB, these coordinates * can be < 0. We don't want to access blocks in previous MBs. */ const int blk_idx = VPXMAX(rel_ol_blk_row, 0) * 4 + VPXMAX(rel_ol_blk_col, 0); /* Upper left overlapping block */ B_OVERLAP *b_ol_ul = &(b_overlaps[blk_idx]); /* Calculate and assign overlaps for all blocks in this MB * which the motion compensated block overlaps */ /* Avoid calculating overlaps for blocks in later MBs */ int end_row = VPXMIN(4 + mb_row * 4 - first_blk_row, 2); int end_col = VPXMIN(4 + mb_col * 4 - first_blk_col, 2); int row, col; /* Check if new_row and new_col are evenly divisible by 4 (Q3), * and if so we shouldn't check neighboring blocks */ if (new_row >= 0 && (new_row & 0x1F) == 0) end_row = 1; if (new_col >= 0 && (new_col & 0x1F) == 0) end_col = 1; /* Check if the overlapping block partly overlaps a previous MB * and if so, we're overlapping fewer blocks in this MB. */ if (new_row < (mb_row * 16) << 3) end_row = 1; if (new_col < (mb_col * 16) << 3) end_col = 1; for (row = 0; row < end_row; ++row) { for (col = 0; col < end_col; ++col) { /* input in Q3, result in Q6 */ const int overlap = block_overlap(new_row, new_col, (((first_blk_row + row) * 4) << 3), (((first_blk_col + col) * 4) << 3)); assign_overlap(b_ol_ul[row * 4 + col].overlaps, bmi, overlap); } } } static void calculate_overlaps(MB_OVERLAP *overlap_ul, int mb_rows, int mb_cols, union b_mode_info *bmi, int b_row, int b_col) { MB_OVERLAP *mb_overlap; int row, col, rel_row, rel_col; int new_row, new_col; int end_row, end_col; int overlap_b_row, overlap_b_col; int overlap_mb_row, overlap_mb_col; /* mb subpixel position */ row = (4 * b_row) << 3; /* Q3 */ col = (4 * b_col) << 3; /* Q3 */ /* reverse compensate for motion */ new_row = row - bmi->mv.as_mv.row; new_col = col - bmi->mv.as_mv.col; if (new_row >= ((16 * mb_rows) << 3) || new_col >= ((16 * mb_cols) << 3)) { /* the new block ended up outside the frame */ return; } if (new_row <= -32 || new_col <= -32) { /* outside the frame */ return; } /* overlapping block's position in blocks */ overlap_b_row = FLOOR(new_row / 4, 3) >> 3; overlap_b_col = FLOOR(new_col / 4, 3) >> 3; /* overlapping block's MB position in MBs * operations are done in Q3 */ overlap_mb_row = FLOOR((overlap_b_row << 3) / 4, 3) >> 3; overlap_mb_col = FLOOR((overlap_b_col << 3) / 4, 3) >> 3; end_row = VPXMIN(mb_rows - overlap_mb_row, 2); end_col = VPXMIN(mb_cols - overlap_mb_col, 2); /* Don't calculate overlap for MBs we don't overlap */ /* Check if the new block row starts at the last block row of the MB */ if (abs(new_row - ((16 * overlap_mb_row) << 3)) < ((3 * 4) << 3)) end_row = 1; /* Check if the new block col starts at the last block col of the MB */ if (abs(new_col - ((16 * overlap_mb_col) << 3)) < ((3 * 4) << 3)) end_col = 1; /* find the MB(s) this block is overlapping */ for (rel_row = 0; rel_row < end_row; ++rel_row) { for (rel_col = 0; rel_col < end_col; ++rel_col) { if (overlap_mb_row + rel_row < 0 || overlap_mb_col + rel_col < 0) continue; mb_overlap = overlap_ul + (overlap_mb_row + rel_row) * mb_cols + overlap_mb_col + rel_col; calculate_overlaps_mb(mb_overlap->overlaps, bmi, new_row, new_col, overlap_mb_row + rel_row, overlap_mb_col + rel_col, overlap_b_row + rel_row, overlap_b_col + rel_col); } } } /* Estimates a motion vector given the overlapping blocks' motion vectors. * Filters out all overlapping blocks which do not refer to the correct * reference frame type. */ static void estimate_mv(const OVERLAP_NODE *overlaps, union b_mode_info *bmi) { int i; int overlap_sum = 0; int row_acc = 0; int col_acc = 0; bmi->mv.as_int = 0; for (i = 0; i < MAX_OVERLAPS; ++i) { if (overlaps[i].bmi == NULL) break; col_acc += overlaps[i].overlap * overlaps[i].bmi->mv.as_mv.col; row_acc += overlaps[i].overlap * overlaps[i].bmi->mv.as_mv.row; overlap_sum += overlaps[i].overlap; } if (overlap_sum > 0) { /* Q9 / Q6 = Q3 */ bmi->mv.as_mv.col = col_acc / overlap_sum; bmi->mv.as_mv.row = row_acc / overlap_sum; } else { bmi->mv.as_mv.col = 0; bmi->mv.as_mv.row = 0; } } /* Estimates all motion vectors for a macroblock given the lists of * overlaps for each block. Decides whether or not the MVs must be clamped. */ static void estimate_mb_mvs(const B_OVERLAP *block_overlaps, MODE_INFO *mi, int mb_to_left_edge, int mb_to_right_edge, int mb_to_top_edge, int mb_to_bottom_edge) { int row, col; int non_zero_count = 0; MV *const filtered_mv = &(mi->mbmi.mv.as_mv); union b_mode_info *const bmi = mi->bmi; filtered_mv->col = 0; filtered_mv->row = 0; mi->mbmi.need_to_clamp_mvs = 0; for (row = 0; row < 4; ++row) { int this_b_to_top_edge = mb_to_top_edge + ((row * 4) << 3); int this_b_to_bottom_edge = mb_to_bottom_edge - ((row * 4) << 3); for (col = 0; col < 4; ++col) { int i = row * 4 + col; int this_b_to_left_edge = mb_to_left_edge + ((col * 4) << 3); int this_b_to_right_edge = mb_to_right_edge - ((col * 4) << 3); /* Estimate vectors for all blocks which are overlapped by this */ /* type. Interpolate/extrapolate the rest of the block's MVs */ estimate_mv(block_overlaps[i].overlaps, &(bmi[i])); mi->mbmi.need_to_clamp_mvs |= vp8_check_mv_bounds( &bmi[i].mv, this_b_to_left_edge, this_b_to_right_edge, this_b_to_top_edge, this_b_to_bottom_edge); if (bmi[i].mv.as_int != 0) { ++non_zero_count; filtered_mv->col += bmi[i].mv.as_mv.col; filtered_mv->row += bmi[i].mv.as_mv.row; } } } if (non_zero_count > 0) { filtered_mv->col /= non_zero_count; filtered_mv->row /= non_zero_count; } } static void calc_prev_mb_overlaps(MB_OVERLAP *overlaps, MODE_INFO *prev_mi, int mb_row, int mb_col, int mb_rows, int mb_cols) { int sub_row; int sub_col; for (sub_row = 0; sub_row < 4; ++sub_row) { for (sub_col = 0; sub_col < 4; ++sub_col) { calculate_overlaps(overlaps, mb_rows, mb_cols, &(prev_mi->bmi[sub_row * 4 + sub_col]), 4 * mb_row + sub_row, 4 * mb_col + sub_col); } } } /* Estimate all missing motion vectors. This function does the same as the one * above, but has different input arguments. */ static void estimate_missing_mvs(MB_OVERLAP *overlaps, MODE_INFO *mi, MODE_INFO *prev_mi, int mb_rows, int mb_cols, unsigned int first_corrupt) { int mb_row, mb_col; memset(overlaps, 0, sizeof(MB_OVERLAP) * mb_rows * mb_cols); /* First calculate the overlaps for all blocks */ for (mb_row = 0; mb_row < mb_rows; ++mb_row) { for (mb_col = 0; mb_col < mb_cols; ++mb_col) { /* We're only able to use blocks referring to the last frame * when extrapolating new vectors. */ if (prev_mi->mbmi.ref_frame == LAST_FRAME) { calc_prev_mb_overlaps(overlaps, prev_mi, mb_row, mb_col, mb_rows, mb_cols); } ++prev_mi; } ++prev_mi; } mb_row = first_corrupt / mb_cols; mb_col = first_corrupt - mb_row * mb_cols; mi += mb_row * (mb_cols + 1) + mb_col; /* Go through all macroblocks in the current image with missing MVs * and calculate new MVs using the overlaps. */ for (; mb_row < mb_rows; ++mb_row) { int mb_to_top_edge = -((mb_row * 16)) << 3; int mb_to_bottom_edge = ((mb_rows - 1 - mb_row) * 16) << 3; for (; mb_col < mb_cols; ++mb_col) { int mb_to_left_edge = -((mb_col * 16) << 3); int mb_to_right_edge = ((mb_cols - 1 - mb_col) * 16) << 3; const B_OVERLAP *block_overlaps = overlaps[mb_row * mb_cols + mb_col].overlaps; mi->mbmi.ref_frame = LAST_FRAME; mi->mbmi.mode = SPLITMV; mi->mbmi.uv_mode = DC_PRED; mi->mbmi.partitioning = 3; mi->mbmi.segment_id = 0; estimate_mb_mvs(block_overlaps, mi, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); ++mi; } mb_col = 0; ++mi; } } void vp8_estimate_missing_mvs(VP8D_COMP *pbi) { VP8_COMMON *const pc = &pbi->common; estimate_missing_mvs(pbi->overlaps, pc->mi, pc->prev_mi, pc->mb_rows, pc->mb_cols, pbi->mvs_corrupt_from_mb); } static void assign_neighbor(EC_BLOCK *neighbor, MODE_INFO *mi, int block_idx) { assert(mi->mbmi.ref_frame < MAX_REF_FRAMES); neighbor->ref_frame = mi->mbmi.ref_frame; neighbor->mv = mi->bmi[block_idx].mv.as_mv; } /* Finds the neighboring blocks of a macroblocks. In the general case * 20 blocks are found. If a fewer number of blocks are found due to * image boundaries, those positions in the EC_BLOCK array are left "empty". * The neighbors are enumerated with the upper-left neighbor as the first * element, the second element refers to the neighbor to right of the previous * neighbor, and so on. The last element refers to the neighbor below the first * neighbor. */ static void find_neighboring_blocks(MODE_INFO *mi, EC_BLOCK *neighbors, int mb_row, int mb_col, int mb_rows, int mb_cols, int mi_stride) { int i = 0; int j; if (mb_row > 0) { /* upper left */ if (mb_col > 0) assign_neighbor(&neighbors[i], mi - mi_stride - 1, 15); ++i; /* above */ for (j = 12; j < 16; ++j, ++i) assign_neighbor(&neighbors[i], mi - mi_stride, j); } else i += 5; if (mb_col < mb_cols - 1) { /* upper right */ if (mb_row > 0) assign_neighbor(&neighbors[i], mi - mi_stride + 1, 12); ++i; /* right */ for (j = 0; j <= 12; j += 4, ++i) assign_neighbor(&neighbors[i], mi + 1, j); } else i += 5; if (mb_row < mb_rows - 1) { /* lower right */ if (mb_col < mb_cols - 1) assign_neighbor(&neighbors[i], mi + mi_stride + 1, 0); ++i; /* below */ for (j = 0; j < 4; ++j, ++i) assign_neighbor(&neighbors[i], mi + mi_stride, j); } else i += 5; if (mb_col > 0) { /* lower left */ if (mb_row < mb_rows - 1) assign_neighbor(&neighbors[i], mi + mi_stride - 1, 4); ++i; /* left */ for (j = 3; j < 16; j += 4, ++i) { assign_neighbor(&neighbors[i], mi - 1, j); } } else i += 5; assert(i == 20); } /* Interpolates all motion vectors for a macroblock from the neighboring blocks' * motion vectors. */ static void interpolate_mvs(MACROBLOCKD *mb, EC_BLOCK *neighbors, MV_REFERENCE_FRAME dom_ref_frame) { int row, col, i; MODE_INFO *const mi = mb->mode_info_context; /* Table with the position of the neighboring blocks relative the position * of the upper left block of the current MB. Starting with the upper left * neighbor and going to the right. */ const EC_POS neigh_pos[NUM_NEIGHBORS] = { { -1, -1 }, { -1, 0 }, { -1, 1 }, { -1, 2 }, { -1, 3 }, { -1, 4 }, { 0, 4 }, { 1, 4 }, { 2, 4 }, { 3, 4 }, { 4, 4 }, { 4, 3 }, { 4, 2 }, { 4, 1 }, { 4, 0 }, { 4, -1 }, { 3, -1 }, { 2, -1 }, { 1, -1 }, { 0, -1 } }; mi->mbmi.need_to_clamp_mvs = 0; for (row = 0; row < 4; ++row) { int mb_to_top_edge = mb->mb_to_top_edge + ((row * 4) << 3); int mb_to_bottom_edge = mb->mb_to_bottom_edge - ((row * 4) << 3); for (col = 0; col < 4; ++col) { int mb_to_left_edge = mb->mb_to_left_edge + ((col * 4) << 3); int mb_to_right_edge = mb->mb_to_right_edge - ((col * 4) << 3); int w_sum = 0; int mv_row_sum = 0; int mv_col_sum = 0; int_mv *const mv = &(mi->bmi[row * 4 + col].mv); mv->as_int = 0; for (i = 0; i < NUM_NEIGHBORS; ++i) { /* Calculate the weighted sum of neighboring MVs referring * to the dominant frame type. */ const int w = weights_q7[abs(row - neigh_pos[i].row)] [abs(col - neigh_pos[i].col)]; if (neighbors[i].ref_frame != dom_ref_frame) continue; w_sum += w; /* Q7 * Q3 = Q10 */ mv_row_sum += w * neighbors[i].mv.row; mv_col_sum += w * neighbors[i].mv.col; } if (w_sum > 0) { /* Avoid division by zero. * Normalize with the sum of the coefficients * Q3 = Q10 / Q7 */ mv->as_mv.row = mv_row_sum / w_sum; mv->as_mv.col = mv_col_sum / w_sum; mi->mbmi.need_to_clamp_mvs |= vp8_check_mv_bounds(mv, mb_to_left_edge, mb_to_right_edge, mb_to_top_edge, mb_to_bottom_edge); } } } } void vp8_interpolate_motion(MACROBLOCKD *mb, int mb_row, int mb_col, int mb_rows, int mb_cols) { /* Find relevant neighboring blocks */ EC_BLOCK neighbors[NUM_NEIGHBORS]; int i; /* Initialize the array. MAX_REF_FRAMES is interpreted as "doesn't exist" */ for (i = 0; i < NUM_NEIGHBORS; ++i) { neighbors[i].ref_frame = MAX_REF_FRAMES; neighbors[i].mv.row = neighbors[i].mv.col = 0; } find_neighboring_blocks(mb->mode_info_context, neighbors, mb_row, mb_col, mb_rows, mb_cols, mb->mode_info_stride); /* Interpolate MVs for the missing blocks from the surrounding * blocks which refer to the last frame. */ interpolate_mvs(mb, neighbors, LAST_FRAME); mb->mode_info_context->mbmi.ref_frame = LAST_FRAME; mb->mode_info_context->mbmi.mode = SPLITMV; mb->mode_info_context->mbmi.uv_mode = DC_PRED; mb->mode_info_context->mbmi.partitioning = 3; mb->mode_info_context->mbmi.segment_id = 0; } libvpx-1.8.2/vp8/decoder/error_concealment.h000066400000000000000000000022711357355204000210300ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ #define VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ #include "onyxd_int.h" #include "ec_types.h" #ifdef __cplusplus extern "C" { #endif /* Allocate memory for the overlap lists */ int vp8_alloc_overlap_lists(VP8D_COMP *pbi); /* Deallocate the overlap lists */ void vp8_de_alloc_overlap_lists(VP8D_COMP *pbi); /* Estimate all missing motion vectors. */ void vp8_estimate_missing_mvs(VP8D_COMP *pbi); /* Functions for spatial MV interpolation */ /* Interpolates all motion vectors for a macroblock mb at position * (mb_row, mb_col). */ void vp8_interpolate_motion(MACROBLOCKD *mb, int mb_row, int mb_col, int mb_rows, int mb_cols); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_ERROR_CONCEALMENT_H_ libvpx-1.8.2/vp8/decoder/onyxd_if.c000066400000000000000000000314711357355204000171450ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/onyxc_int.h" #if CONFIG_POSTPROC #include "vp8/common/postproc.h" #endif #include "vp8/common/onyxd.h" #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/alloccommon.h" #include "vp8/common/common.h" #include "vp8/common/loopfilter.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" #include "decoderthreading.h" #include #include #include "vp8/common/quant_common.h" #include "vp8/common/reconintra.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/systemdependent.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_timer.h" #include "detokenize.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif #if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif extern void vp8_init_loop_filter(VP8_COMMON *cm); static int get_free_fb(VP8_COMMON *cm); static void ref_cnt_fb(int *buf, int *idx, int new_idx); static void initialize_dec(void) { static volatile int init_done = 0; if (!init_done) { vpx_dsp_rtcd(); vp8_init_intra_predictors(); init_done = 1; } } static void remove_decompressor(VP8D_COMP *pbi) { #if CONFIG_ERROR_CONCEALMENT vp8_de_alloc_overlap_lists(pbi); #endif vp8_remove_common(&pbi->common); vpx_free(pbi); } static struct VP8D_COMP *create_decompressor(VP8D_CONFIG *oxcf) { VP8D_COMP *pbi = vpx_memalign(32, sizeof(VP8D_COMP)); if (!pbi) return NULL; memset(pbi, 0, sizeof(VP8D_COMP)); if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; remove_decompressor(pbi); return 0; } pbi->common.error.setjmp = 1; vp8_create_common(&pbi->common); pbi->common.current_video_frame = 0; pbi->ready_for_new_data = 1; /* vp8cx_init_de_quantizer() is first called here. Add check in * frame_init_dequantizer() to avoid * unnecessary calling of vp8cx_init_de_quantizer() for every frame. */ vp8cx_init_de_quantizer(pbi); vp8_loop_filter_init(&pbi->common); pbi->common.error.setjmp = 0; #if CONFIG_ERROR_CONCEALMENT pbi->ec_enabled = oxcf->error_concealment; pbi->overlaps = NULL; #else (void)oxcf; pbi->ec_enabled = 0; #endif /* Error concealment is activated after a key frame has been * decoded without errors when error concealment is enabled. */ pbi->ec_active = 0; pbi->decoded_key_frame = 0; /* Independent partitions is activated when a frame updates the * token probability table to have equal probabilities over the * PREV_COEF context. */ pbi->independent_partitions = 0; vp8_setup_block_dptrs(&pbi->mb); once(initialize_dec); return pbi; } vpx_codec_err_t vp8dx_get_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &pbi->common; int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FRAME) { ref_fb_idx = cm->lst_fb_idx; } else if (ref_frame_flag == VP8_GOLD_FRAME) { ref_fb_idx = cm->gld_fb_idx; } else if (ref_frame_flag == VP8_ALTR_FRAME) { ref_fb_idx = cm->alt_fb_idx; } else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); return pbi->common.error.error_code; } if (cm->yv12_fb[ref_fb_idx].y_height != sd->y_height || cm->yv12_fb[ref_fb_idx].y_width != sd->y_width || cm->yv12_fb[ref_fb_idx].uv_height != sd->uv_height || cm->yv12_fb[ref_fb_idx].uv_width != sd->uv_width) { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); return pbi->common.error.error_code; } vpx_codec_err_t vp8dx_set_reference(VP8D_COMP *pbi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &pbi->common; int *ref_fb_ptr = NULL; int free_fb; if (ref_frame_flag == VP8_LAST_FRAME) { ref_fb_ptr = &cm->lst_fb_idx; } else if (ref_frame_flag == VP8_GOLD_FRAME) { ref_fb_ptr = &cm->gld_fb_idx; } else if (ref_frame_flag == VP8_ALTR_FRAME) { ref_fb_ptr = &cm->alt_fb_idx; } else { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Invalid reference frame"); return pbi->common.error.error_code; } if (cm->yv12_fb[*ref_fb_ptr].y_height != sd->y_height || cm->yv12_fb[*ref_fb_ptr].y_width != sd->y_width || cm->yv12_fb[*ref_fb_ptr].uv_height != sd->uv_height || cm->yv12_fb[*ref_fb_ptr].uv_width != sd->uv_width) { vpx_internal_error(&pbi->common.error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else { /* Find an empty frame buffer. */ free_fb = get_free_fb(cm); /* Decrease fb_idx_ref_cnt since it will be increased again in * ref_cnt_fb() below. */ cm->fb_idx_ref_cnt[free_fb]--; /* Manage the reference counters and copy image. */ ref_cnt_fb(cm->fb_idx_ref_cnt, ref_fb_ptr, free_fb); vp8_yv12_copy_frame(sd, &cm->yv12_fb[*ref_fb_ptr]); } return pbi->common.error.error_code; } static int get_free_fb(VP8_COMMON *cm) { int i; for (i = 0; i < NUM_YV12_BUFFERS; ++i) { if (cm->fb_idx_ref_cnt[i] == 0) break; } assert(i < NUM_YV12_BUFFERS); cm->fb_idx_ref_cnt[i] = 1; return i; } static void ref_cnt_fb(int *buf, int *idx, int new_idx) { if (buf[*idx] > 0) buf[*idx]--; *idx = new_idx; buf[new_idx]++; } /* If any buffer copy / swapping is signalled it should be done here. */ static int swap_frame_buffers(VP8_COMMON *cm) { int err = 0; /* The alternate reference frame or golden frame can be updated * using the new, last, or golden/alt ref frame. If it * is updated using the newly decoded frame it is a refresh. * An update using the last or golden/alt ref frame is a copy. */ if (cm->copy_buffer_to_arf) { int new_fb = 0; if (cm->copy_buffer_to_arf == 1) { new_fb = cm->lst_fb_idx; } else if (cm->copy_buffer_to_arf == 2) { new_fb = cm->gld_fb_idx; } else { err = -1; } ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, new_fb); } if (cm->copy_buffer_to_gf) { int new_fb = 0; if (cm->copy_buffer_to_gf == 1) { new_fb = cm->lst_fb_idx; } else if (cm->copy_buffer_to_gf == 2) { new_fb = cm->alt_fb_idx; } else { err = -1; } ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, new_fb); } if (cm->refresh_golden_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->gld_fb_idx, cm->new_fb_idx); } if (cm->refresh_alt_ref_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->alt_fb_idx, cm->new_fb_idx); } if (cm->refresh_last_frame) { ref_cnt_fb(cm->fb_idx_ref_cnt, &cm->lst_fb_idx, cm->new_fb_idx); cm->frame_to_show = &cm->yv12_fb[cm->lst_fb_idx]; } else { cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; } cm->fb_idx_ref_cnt[cm->new_fb_idx]--; return err; } static int check_fragments_for_errors(VP8D_COMP *pbi) { if (!pbi->ec_active && pbi->fragments.count <= 1 && pbi->fragments.sizes[0] == 0) { VP8_COMMON *cm = &pbi->common; /* If error concealment is disabled we won't signal missing frames * to the decoder. */ if (cm->fb_idx_ref_cnt[cm->lst_fb_idx] > 1) { /* The last reference shares buffer with another reference * buffer. Move it to its own buffer before setting it as * corrupt, otherwise we will make multiple buffers corrupt. */ const int prev_idx = cm->lst_fb_idx; cm->fb_idx_ref_cnt[prev_idx]--; cm->lst_fb_idx = get_free_fb(cm); vp8_yv12_copy_frame(&cm->yv12_fb[prev_idx], &cm->yv12_fb[cm->lst_fb_idx]); } /* This is used to signal that we are missing frames. * We do not know if the missing frame(s) was supposed to update * any of the reference buffers, but we act conservative and * mark only the last buffer as corrupted. */ cm->yv12_fb[cm->lst_fb_idx].corrupted = 1; /* Signal that we have no frame to show. */ cm->show_frame = 0; /* Nothing more to do. */ return 0; } return 1; } int vp8dx_receive_compressed_data(VP8D_COMP *pbi, int64_t time_stamp) { VP8_COMMON *cm = &pbi->common; int retcode = -1; pbi->common.error.error_code = VPX_CODEC_OK; retcode = check_fragments_for_errors(pbi); if (retcode <= 0) return retcode; cm->new_fb_idx = get_free_fb(cm); /* setup reference frames for vp8_decode_frame */ pbi->dec_fb_ref[INTRA_FRAME] = &cm->yv12_fb[cm->new_fb_idx]; pbi->dec_fb_ref[LAST_FRAME] = &cm->yv12_fb[cm->lst_fb_idx]; pbi->dec_fb_ref[GOLDEN_FRAME] = &cm->yv12_fb[cm->gld_fb_idx]; pbi->dec_fb_ref[ALTREF_FRAME] = &cm->yv12_fb[cm->alt_fb_idx]; retcode = vp8_decode_frame(pbi); if (retcode < 0) { if (cm->fb_idx_ref_cnt[cm->new_fb_idx] > 0) { cm->fb_idx_ref_cnt[cm->new_fb_idx]--; } pbi->common.error.error_code = VPX_CODEC_ERROR; // Propagate the error info. if (pbi->mb.error_info.error_code != 0) { pbi->common.error.error_code = pbi->mb.error_info.error_code; memcpy(pbi->common.error.detail, pbi->mb.error_info.detail, sizeof(pbi->mb.error_info.detail)); } goto decode_exit; } if (swap_frame_buffers(cm)) { pbi->common.error.error_code = VPX_CODEC_ERROR; goto decode_exit; } vpx_clear_system_state(); if (cm->show_frame) { cm->current_video_frame++; cm->show_frame_mi = cm->mi; } #if CONFIG_ERROR_CONCEALMENT /* swap the mode infos to storage for future error concealment */ if (pbi->ec_enabled && pbi->common.prev_mi) { MODE_INFO *tmp = pbi->common.prev_mi; int row, col; pbi->common.prev_mi = pbi->common.mi; pbi->common.mi = tmp; /* Propagate the segment_ids to the next frame */ for (row = 0; row < pbi->common.mb_rows; ++row) { for (col = 0; col < pbi->common.mb_cols; ++col) { const int i = row * pbi->common.mode_info_stride + col; pbi->common.mi[i].mbmi.segment_id = pbi->common.prev_mi[i].mbmi.segment_id; } } } #endif pbi->ready_for_new_data = 0; pbi->last_time_stamp = time_stamp; decode_exit: vpx_clear_system_state(); return retcode; } int vp8dx_get_raw_frame(VP8D_COMP *pbi, YV12_BUFFER_CONFIG *sd, int64_t *time_stamp, int64_t *time_end_stamp, vp8_ppflags_t *flags) { int ret = -1; if (pbi->ready_for_new_data == 1) return ret; /* ie no raw frame to show!!! */ if (pbi->common.show_frame == 0) return ret; pbi->ready_for_new_data = 1; *time_stamp = pbi->last_time_stamp; *time_end_stamp = 0; #if CONFIG_POSTPROC ret = vp8_post_proc_frame(&pbi->common, sd, flags); #else (void)flags; if (pbi->common.frame_to_show) { *sd = *pbi->common.frame_to_show; sd->y_width = pbi->common.Width; sd->y_height = pbi->common.Height; sd->uv_height = pbi->common.Height / 2; ret = 0; } else { ret = -1; } #endif /*!CONFIG_POSTPROC*/ vpx_clear_system_state(); return ret; } /* This function as written isn't decoder specific, but the encoder has * much faster ways of computing this, so it's ok for it to live in a * decode specific file. */ int vp8dx_references_buffer(VP8_COMMON *oci, int ref_frame) { const MODE_INFO *mi = oci->mi; int mb_row, mb_col; for (mb_row = 0; mb_row < oci->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < oci->mb_cols; mb_col++, mi++) { if (mi->mbmi.ref_frame == ref_frame) return 1; } mi++; } return 0; } int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf) { /* decoder instance for single thread mode */ fb->pbi[0] = create_decompressor(oxcf); if (!fb->pbi[0]) return VPX_CODEC_ERROR; #if CONFIG_MULTITHREAD if (setjmp(fb->pbi[0]->common.error.jmp)) { vp8_remove_decoder_instances(fb); vp8_zero(fb->pbi); vpx_clear_system_state(); return VPX_CODEC_ERROR; } fb->pbi[0]->common.error.setjmp = 1; fb->pbi[0]->max_threads = oxcf->max_threads; vp8_decoder_create_threads(fb->pbi[0]); fb->pbi[0]->common.error.setjmp = 0; #endif return VPX_CODEC_OK; } int vp8_remove_decoder_instances(struct frame_buffers *fb) { VP8D_COMP *pbi = fb->pbi[0]; if (!pbi) return VPX_CODEC_ERROR; #if CONFIG_MULTITHREAD vp8_decoder_remove_threads(pbi); #endif /* decoder instance for single thread mode */ remove_decompressor(pbi); return VPX_CODEC_OK; } int vp8dx_get_quantizer(const VP8D_COMP *pbi) { return pbi->common.base_qindex; } libvpx-1.8.2/vp8/decoder/onyxd_int.h000066400000000000000000000106011357355204000173360ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_ONYXD_INT_H_ #define VPX_VP8_DECODER_ONYXD_INT_H_ #include "vpx_config.h" #include "vp8/common/onyxd.h" #include "treereader.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/threading.h" #if CONFIG_ERROR_CONCEALMENT #include "ec_types.h" #endif #ifdef __cplusplus extern "C" { #endif typedef struct { int ithread; void *ptr1; void *ptr2; } DECODETHREAD_DATA; typedef struct { MACROBLOCKD mbd; } MB_ROW_DEC; typedef struct { int enabled; unsigned int count; const unsigned char *ptrs[MAX_PARTITIONS]; unsigned int sizes[MAX_PARTITIONS]; } FRAGMENT_DATA; #define MAX_FB_MT_DEC 32 struct frame_buffers { /* * this struct will be populated with frame buffer management * info in future commits. */ /* decoder instances */ struct VP8D_COMP *pbi[MAX_FB_MT_DEC]; }; typedef struct VP8D_COMP { DECLARE_ALIGNED(16, MACROBLOCKD, mb); YV12_BUFFER_CONFIG *dec_fb_ref[NUM_YV12_BUFFERS]; DECLARE_ALIGNED(16, VP8_COMMON, common); /* the last partition will be used for the modes/mvs */ vp8_reader mbc[MAX_PARTITIONS]; VP8D_CONFIG oxcf; FRAGMENT_DATA fragments; #if CONFIG_MULTITHREAD /* variable for threading */ vpx_atomic_int b_multithreaded_rd; int max_threads; int current_mb_col_main; unsigned int decoding_thread_count; int allocated_decoding_thread_count; int mt_baseline_filter_level[MAX_MB_SEGMENTS]; int sync_range; /* Each row remembers its already decoded column. */ vpx_atomic_int *mt_current_mb_col; unsigned char **mt_yabove_row; /* mb_rows x width */ unsigned char **mt_uabove_row; unsigned char **mt_vabove_row; unsigned char **mt_yleft_col; /* mb_rows x 16 */ unsigned char **mt_uleft_col; /* mb_rows x 8 */ unsigned char **mt_vleft_col; /* mb_rows x 8 */ MB_ROW_DEC *mb_row_di; DECODETHREAD_DATA *de_thread_data; pthread_t *h_decoding_thread; sem_t *h_event_start_decoding; sem_t h_event_end_decoding; /* end of threading data */ #endif int64_t last_time_stamp; int ready_for_new_data; vp8_prob prob_intra; vp8_prob prob_last; vp8_prob prob_gf; vp8_prob prob_skip_false; #if CONFIG_ERROR_CONCEALMENT MB_OVERLAP *overlaps; /* the mb num from which modes and mvs (first partition) are corrupt */ unsigned int mvs_corrupt_from_mb; #endif int ec_enabled; int ec_active; int decoded_key_frame; int independent_partitions; int frame_corrupt_residual; vpx_decrypt_cb decrypt_cb; void *decrypt_state; #if CONFIG_MULTITHREAD // Restart threads on next frame if set to 1. // This is set when error happens in multithreaded decoding and all threads // are shut down. int restart_threads; #endif } VP8D_COMP; void vp8cx_init_de_quantizer(VP8D_COMP *pbi); void vp8_mb_init_dequantizer(VP8D_COMP *pbi, MACROBLOCKD *xd); int vp8_decode_frame(VP8D_COMP *pbi); int vp8_create_decoder_instances(struct frame_buffers *fb, VP8D_CONFIG *oxcf); int vp8_remove_decoder_instances(struct frame_buffers *fb); #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ } while (0) #else #define CHECK_MEM_ERROR(lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_ONYXD_INT_H_ libvpx-1.8.2/vp8/decoder/threading.c000066400000000000000000000752031357355204000172740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #if !defined(WIN32) && CONFIG_OS_SUPPORT == 1 #include #endif #include "onyxd_int.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/common.h" #include "vp8/common/threading.h" #include "vp8/common/loopfilter.h" #include "vp8/common/extend.h" #include "vpx_ports/vpx_timer.h" #include "decoderthreading.h" #include "detokenize.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/reconinter.h" #include "vp8/common/reconintra.h" #include "vp8/common/setupintrarecon.h" #if CONFIG_ERROR_CONCEALMENT #include "error_concealment.h" #endif #define CALLOC_ARRAY(p, n) CHECK_MEM_ERROR((p), vpx_calloc(sizeof(*(p)), (n))) #define CALLOC_ARRAY_ALIGNED(p, n, algn) \ do { \ CHECK_MEM_ERROR((p), vpx_memalign((algn), sizeof(*(p)) * (n))); \ memset((p), 0, (n) * sizeof(*(p))); \ } while (0) static void setup_decoding_thread_data(VP8D_COMP *pbi, MACROBLOCKD *xd, MB_ROW_DEC *mbrd, int count) { VP8_COMMON *const pc = &pbi->common; int i; for (i = 0; i < count; ++i) { MACROBLOCKD *mbd = &mbrd[i].mbd; mbd->subpixel_predict = xd->subpixel_predict; mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; mbd->frame_type = pc->frame_type; mbd->pre = xd->pre; mbd->dst = xd->dst; mbd->segmentation_enabled = xd->segmentation_enabled; mbd->mb_segement_abs_delta = xd->mb_segement_abs_delta; memcpy(mbd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); /*signed char ref_lf_deltas[MAX_REF_LF_DELTAS];*/ memcpy(mbd->ref_lf_deltas, xd->ref_lf_deltas, sizeof(xd->ref_lf_deltas)); /*signed char mode_lf_deltas[MAX_MODE_LF_DELTAS];*/ memcpy(mbd->mode_lf_deltas, xd->mode_lf_deltas, sizeof(xd->mode_lf_deltas)); /*unsigned char mode_ref_lf_delta_enabled; unsigned char mode_ref_lf_delta_update;*/ mbd->mode_ref_lf_delta_enabled = xd->mode_ref_lf_delta_enabled; mbd->mode_ref_lf_delta_update = xd->mode_ref_lf_delta_update; mbd->current_bc = &pbi->mbc[0]; memcpy(mbd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); memcpy(mbd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); memcpy(mbd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); memcpy(mbd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); mbd->fullpixel_mask = 0xffffffff; if (pc->full_pixel) mbd->fullpixel_mask = 0xfffffff8; } for (i = 0; i < pc->mb_rows; ++i) vpx_atomic_store_release(&pbi->mt_current_mb_col[i], -1); } static void mt_decode_macroblock(VP8D_COMP *pbi, MACROBLOCKD *xd, unsigned int mb_idx) { MB_PREDICTION_MODE mode; int i; #if CONFIG_ERROR_CONCEALMENT int corruption_detected = 0; #else (void)mb_idx; #endif if (xd->mode_info_context->mbmi.mb_skip_coeff) { vp8_reset_mb_tokens_context(xd); } else if (!vp8dx_bool_error(xd->current_bc)) { int eobtotal; eobtotal = vp8_decode_mb_tokens(pbi, xd); /* Special case: Force the loopfilter to skip when eobtotal is zero */ xd->mode_info_context->mbmi.mb_skip_coeff = (eobtotal == 0); } mode = xd->mode_info_context->mbmi.mode; if (xd->segmentation_enabled) vp8_mb_init_dequantizer(pbi, xd); #if CONFIG_ERROR_CONCEALMENT if (pbi->ec_active) { int throw_residual; /* When we have independent partitions we can apply residual even * though other partitions within the frame are corrupt. */ throw_residual = (!pbi->independent_partitions && pbi->frame_corrupt_residual); throw_residual = (throw_residual || vp8dx_bool_error(xd->current_bc)); if ((mb_idx >= pbi->mvs_corrupt_from_mb || throw_residual)) { /* MB with corrupt residuals or corrupt mode/motion vectors. * Better to use the predictor as reconstruction. */ pbi->frame_corrupt_residual = 1; memset(xd->qcoeff, 0, sizeof(xd->qcoeff)); corruption_detected = 1; /* force idct to be skipped for B_PRED and use the * prediction only for reconstruction * */ memset(xd->eobs, 0, 25); } } #endif /* do prediction */ if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_build_intra_predictors_mbuv_s( xd, xd->recon_above[1], xd->recon_above[2], xd->recon_left[1], xd->recon_left[2], xd->recon_left_stride[1], xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride); if (mode != B_PRED) { vp8_build_intra_predictors_mby_s( xd, xd->recon_above[0], xd->recon_left[0], xd->recon_left_stride[0], xd->dst.y_buffer, xd->dst.y_stride); } else { short *DQC = xd->dequant_y1; int dst_stride = xd->dst.y_stride; /* clear out residual eob info */ if (xd->mode_info_context->mbmi.mb_skip_coeff) memset(xd->eobs, 0, 25); intra_prediction_down_copy(xd, xd->recon_above[0] + 16); for (i = 0; i < 16; ++i) { BLOCKD *b = &xd->block[i]; unsigned char *dst = xd->dst.y_buffer + b->offset; B_PREDICTION_MODE b_mode = xd->mode_info_context->bmi[i].as_mode; unsigned char *Above; unsigned char *yleft; int left_stride; unsigned char top_left; /*Caution: For some b_mode, it needs 8 pixels (4 above + 4 * above-right).*/ if (i < 4 && pbi->common.filter_level) { Above = xd->recon_above[0] + b->offset; } else { Above = dst - dst_stride; } if (i % 4 == 0 && pbi->common.filter_level) { yleft = xd->recon_left[0] + i; left_stride = 1; } else { yleft = dst - 1; left_stride = dst_stride; } if ((i == 4 || i == 8 || i == 12) && pbi->common.filter_level) { top_left = *(xd->recon_left[0] + i - 1); } else { top_left = Above[-1]; } vp8_intra4x4_predict(Above, yleft, left_stride, b_mode, dst, dst_stride, top_left); if (xd->eobs[i]) { if (xd->eobs[i] > 1) { vp8_dequant_idct_add(b->qcoeff, DQC, dst, dst_stride); } else { vp8_dc_only_idct_add(b->qcoeff[0] * DQC[0], dst, dst_stride, dst, dst_stride); memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } } } } } else { vp8_build_inter_predictors_mb(xd); } #if CONFIG_ERROR_CONCEALMENT if (corruption_detected) { return; } #endif if (!xd->mode_info_context->mbmi.mb_skip_coeff) { /* dequantization and idct */ if (mode != B_PRED) { short *DQC = xd->dequant_y1; if (mode != SPLITMV) { BLOCKD *b = &xd->block[24]; /* do 2nd order transform on the dc block */ if (xd->eobs[24] > 1) { vp8_dequantize_b(b, xd->dequant_y2); vp8_short_inv_walsh4x4(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 16 * sizeof(b->qcoeff[0])); } else { b->dqcoeff[0] = b->qcoeff[0] * xd->dequant_y2[0]; vp8_short_inv_walsh4x4_1(&b->dqcoeff[0], xd->qcoeff); memset(b->qcoeff, 0, 2 * sizeof(b->qcoeff[0])); } /* override the dc dequant constant in order to preserve the * dc components */ DQC = xd->dequant_y1_dc; } vp8_dequant_idct_add_y_block(xd->qcoeff, DQC, xd->dst.y_buffer, xd->dst.y_stride, xd->eobs); } vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } } static void mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd, int start_mb_row) { const vpx_atomic_int *last_row_current_mb_col; vpx_atomic_int *current_mb_col; int mb_row; VP8_COMMON *pc = &pbi->common; const int nsync = pbi->sync_range; const vpx_atomic_int first_row_no_sync_above = VPX_ATOMIC_INIT(pc->mb_cols + nsync); int num_part = 1 << pbi->common.multi_token_partition; int last_mb_row = start_mb_row; YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; YV12_BUFFER_CONFIG *yv12_fb_lst = pbi->dec_fb_ref[LAST_FRAME]; int recon_y_stride = yv12_fb_new->y_stride; int recon_uv_stride = yv12_fb_new->uv_stride; unsigned char *ref_buffer[MAX_REF_FRAMES][3]; unsigned char *dst_buffer[3]; int i; int ref_fb_corrupted[MAX_REF_FRAMES]; ref_fb_corrupted[INTRA_FRAME] = 0; for (i = 1; i < MAX_REF_FRAMES; ++i) { YV12_BUFFER_CONFIG *this_fb = pbi->dec_fb_ref[i]; ref_buffer[i][0] = this_fb->y_buffer; ref_buffer[i][1] = this_fb->u_buffer; ref_buffer[i][2] = this_fb->v_buffer; ref_fb_corrupted[i] = this_fb->corrupted; } dst_buffer[0] = yv12_fb_new->y_buffer; dst_buffer[1] = yv12_fb_new->u_buffer; dst_buffer[2] = yv12_fb_new->v_buffer; xd->up_available = (start_mb_row != 0); xd->mode_info_context = pc->mi + pc->mode_info_stride * start_mb_row; xd->mode_info_stride = pc->mode_info_stride; for (mb_row = start_mb_row; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { int recon_yoffset, recon_uvoffset; int mb_col; int filter_level; loop_filter_info_n *lfi_n = &pc->lf_info; /* save last row processed by this thread */ last_mb_row = mb_row; /* select bool coder for current partition */ xd->current_bc = &pbi->mbc[mb_row % num_part]; if (mb_row > 0) { last_row_current_mb_col = &pbi->mt_current_mb_col[mb_row - 1]; } else { last_row_current_mb_col = &first_row_no_sync_above; } current_mb_col = &pbi->mt_current_mb_col[mb_row]; recon_yoffset = mb_row * recon_y_stride * 16; recon_uvoffset = mb_row * recon_uv_stride * 8; /* reset contexts */ xd->above_context = pc->above_context; memset(xd->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); xd->left_available = 0; xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; if (pbi->common.filter_level) { xd->recon_above[0] = pbi->mt_yabove_row[mb_row] + 0 * 16 + 32; xd->recon_above[1] = pbi->mt_uabove_row[mb_row] + 0 * 8 + 16; xd->recon_above[2] = pbi->mt_vabove_row[mb_row] + 0 * 8 + 16; xd->recon_left[0] = pbi->mt_yleft_col[mb_row]; xd->recon_left[1] = pbi->mt_uleft_col[mb_row]; xd->recon_left[2] = pbi->mt_vleft_col[mb_row]; /* TODO: move to outside row loop */ xd->recon_left_stride[0] = 1; xd->recon_left_stride[1] = 1; } else { xd->recon_above[0] = dst_buffer[0] + recon_yoffset; xd->recon_above[1] = dst_buffer[1] + recon_uvoffset; xd->recon_above[2] = dst_buffer[2] + recon_uvoffset; xd->recon_left[0] = xd->recon_above[0] - 1; xd->recon_left[1] = xd->recon_above[1] - 1; xd->recon_left[2] = xd->recon_above[2] - 1; xd->recon_above[0] -= xd->dst.y_stride; xd->recon_above[1] -= xd->dst.uv_stride; xd->recon_above[2] -= xd->dst.uv_stride; /* TODO: move to outside row loop */ xd->recon_left_stride[0] = xd->dst.y_stride; xd->recon_left_stride[1] = xd->dst.uv_stride; setup_intra_recon_left(xd->recon_left[0], xd->recon_left[1], xd->recon_left[2], xd->dst.y_stride, xd->dst.uv_stride); } for (mb_col = 0; mb_col < pc->mb_cols; ++mb_col) { if (((mb_col - 1) % nsync) == 0) { vpx_atomic_store_release(current_mb_col, mb_col - 1); } if (mb_row && !(mb_col & (nsync - 1))) { vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); } /* Distance of MB to the various image edges. * These are specified to 8th pel as they are always * compared to values that are in 1/8th pel units. */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; #if CONFIG_ERROR_CONCEALMENT { int corrupt_residual = (!pbi->independent_partitions && pbi->frame_corrupt_residual) || vp8dx_bool_error(xd->current_bc); if (pbi->ec_active && (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) && corrupt_residual) { /* We have an intra block with corrupt * coefficients, better to conceal with an inter * block. * Interpolate MVs from neighboring MBs * * Note that for the first mb with corrupt * residual in a frame, we might not discover * that before decoding the residual. That * happens after this check, and therefore no * inter concealment will be done. */ vp8_interpolate_motion(xd, mb_row, mb_col, pc->mb_rows, pc->mb_cols); } } #endif xd->dst.y_buffer = dst_buffer[0] + recon_yoffset; xd->dst.u_buffer = dst_buffer[1] + recon_uvoffset; xd->dst.v_buffer = dst_buffer[2] + recon_uvoffset; /* propagate errors from reference frames */ xd->corrupted |= ref_fb_corrupted[xd->mode_info_context->mbmi.ref_frame]; if (xd->corrupted) { // Move current decoding marcoblock to the end of row for all rows // assigned to this thread, such that other threads won't be waiting. for (; mb_row < pc->mb_rows; mb_row += (pbi->decoding_thread_count + 1)) { current_mb_col = &pbi->mt_current_mb_col[mb_row]; vpx_atomic_store_release(current_mb_col, pc->mb_cols + nsync); } vpx_internal_error(&xd->error_info, VPX_CODEC_CORRUPT_FRAME, "Corrupted reference frame"); } if (xd->mode_info_context->mbmi.ref_frame >= LAST_FRAME) { const MV_REFERENCE_FRAME ref = xd->mode_info_context->mbmi.ref_frame; xd->pre.y_buffer = ref_buffer[ref][0] + recon_yoffset; xd->pre.u_buffer = ref_buffer[ref][1] + recon_uvoffset; xd->pre.v_buffer = ref_buffer[ref][2] + recon_uvoffset; } else { // ref_frame is INTRA_FRAME, pre buffer should not be used. xd->pre.y_buffer = 0; xd->pre.u_buffer = 0; xd->pre.v_buffer = 0; } mt_decode_macroblock(pbi, xd, 0); xd->left_available = 1; /* check if the boolean decoder has suffered an error */ xd->corrupted |= vp8dx_bool_error(xd->current_bc); xd->recon_above[0] += 16; xd->recon_above[1] += 8; xd->recon_above[2] += 8; if (!pbi->common.filter_level) { xd->recon_left[0] += 16; xd->recon_left[1] += 8; xd->recon_left[2] += 8; } if (pbi->common.filter_level) { int skip_lf = (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV && xd->mode_info_context->mbmi.mb_skip_coeff); const int mode_index = lfi_n->mode_lf_lut[xd->mode_info_context->mbmi.mode]; const int seg = xd->mode_info_context->mbmi.segment_id; const int ref_frame = xd->mode_info_context->mbmi.ref_frame; filter_level = lfi_n->lvl[seg][ref_frame][mode_index]; if (mb_row != pc->mb_rows - 1) { /* Save decoded MB last row data for next-row decoding */ memcpy((pbi->mt_yabove_row[mb_row + 1] + 32 + mb_col * 16), (xd->dst.y_buffer + 15 * recon_y_stride), 16); memcpy((pbi->mt_uabove_row[mb_row + 1] + 16 + mb_col * 8), (xd->dst.u_buffer + 7 * recon_uv_stride), 8); memcpy((pbi->mt_vabove_row[mb_row + 1] + 16 + mb_col * 8), (xd->dst.v_buffer + 7 * recon_uv_stride), 8); } /* save left_col for next MB decoding */ if (mb_col != pc->mb_cols - 1) { MODE_INFO *next = xd->mode_info_context + 1; if (next->mbmi.ref_frame == INTRA_FRAME) { for (i = 0; i < 16; ++i) { pbi->mt_yleft_col[mb_row][i] = xd->dst.y_buffer[i * recon_y_stride + 15]; } for (i = 0; i < 8; ++i) { pbi->mt_uleft_col[mb_row][i] = xd->dst.u_buffer[i * recon_uv_stride + 7]; pbi->mt_vleft_col[mb_row][i] = xd->dst.v_buffer[i * recon_uv_stride + 7]; } } } /* loopfilter on this macroblock. */ if (filter_level) { if (pc->filter_type == NORMAL_LOOPFILTER) { loop_filter_info lfi; FRAME_TYPE frame_type = pc->frame_type; const int hev_index = lfi_n->hev_thr_lut[frame_type][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; if (mb_col > 0) vp8_loop_filter_mbv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); if (!skip_lf) vp8_loop_filter_bv(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_mbh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); if (!skip_lf) vp8_loop_filter_bh(xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, recon_y_stride, recon_uv_stride, &lfi); } else { if (mb_col > 0) vp8_loop_filter_simple_mbv(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bv(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); /* don't apply across umv border */ if (mb_row > 0) vp8_loop_filter_simple_mbh(xd->dst.y_buffer, recon_y_stride, lfi_n->mblim[filter_level]); if (!skip_lf) vp8_loop_filter_simple_bh(xd->dst.y_buffer, recon_y_stride, lfi_n->blim[filter_level]); } } } recon_yoffset += 16; recon_uvoffset += 8; ++xd->mode_info_context; /* next mb */ xd->above_context++; } /* adjust to the next row of mbs */ if (pbi->common.filter_level) { if (mb_row != pc->mb_rows - 1) { int lasty = yv12_fb_lst->y_width + VP8BORDERINPIXELS; int lastuv = (yv12_fb_lst->y_width >> 1) + (VP8BORDERINPIXELS >> 1); for (i = 0; i < 4; ++i) { pbi->mt_yabove_row[mb_row + 1][lasty + i] = pbi->mt_yabove_row[mb_row + 1][lasty - 1]; pbi->mt_uabove_row[mb_row + 1][lastuv + i] = pbi->mt_uabove_row[mb_row + 1][lastuv - 1]; pbi->mt_vabove_row[mb_row + 1][lastuv + i] = pbi->mt_vabove_row[mb_row + 1][lastuv - 1]; } } } else { vp8_extend_mb_row(yv12_fb_new, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); } /* last MB of row is ready just after extension is done */ vpx_atomic_store_release(current_mb_col, mb_col + nsync); ++xd->mode_info_context; /* skip prediction column */ xd->up_available = 1; /* since we have multithread */ xd->mode_info_context += xd->mode_info_stride * pbi->decoding_thread_count; } /* signal end of decoding of current thread for current frame */ if (last_mb_row + (int)pbi->decoding_thread_count + 1 >= pc->mb_rows) sem_post(&pbi->h_event_end_decoding); } static THREAD_FUNCTION thread_decoding_proc(void *p_data) { int ithread = ((DECODETHREAD_DATA *)p_data)->ithread; VP8D_COMP *pbi = (VP8D_COMP *)(((DECODETHREAD_DATA *)p_data)->ptr1); MB_ROW_DEC *mbrd = (MB_ROW_DEC *)(((DECODETHREAD_DATA *)p_data)->ptr2); ENTROPY_CONTEXT_PLANES mb_row_left_context; while (1) { if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) break; if (sem_wait(&pbi->h_event_start_decoding[ithread]) == 0) { if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd) == 0) { break; } else { MACROBLOCKD *xd = &mbrd->mbd; xd->left_context = &mb_row_left_context; if (setjmp(xd->error_info.jmp)) { xd->error_info.setjmp = 0; // Signal the end of decoding for current thread. sem_post(&pbi->h_event_end_decoding); continue; } xd->error_info.setjmp = 1; mt_decode_mb_rows(pbi, xd, ithread + 1); } } } return 0; } void vp8_decoder_create_threads(VP8D_COMP *pbi) { int core_count = 0; unsigned int ithread; vpx_atomic_init(&pbi->b_multithreaded_rd, 0); pbi->allocated_decoding_thread_count = 0; /* limit decoding threads to the max number of token partitions */ core_count = (pbi->max_threads > 8) ? 8 : pbi->max_threads; /* limit decoding threads to the available cores */ if (core_count > pbi->common.processor_core_count) { core_count = pbi->common.processor_core_count; } if (core_count > 1) { vpx_atomic_init(&pbi->b_multithreaded_rd, 1); pbi->decoding_thread_count = core_count - 1; CALLOC_ARRAY(pbi->h_decoding_thread, pbi->decoding_thread_count); CALLOC_ARRAY(pbi->h_event_start_decoding, pbi->decoding_thread_count); CALLOC_ARRAY_ALIGNED(pbi->mb_row_di, pbi->decoding_thread_count, 32); CALLOC_ARRAY(pbi->de_thread_data, pbi->decoding_thread_count); if (sem_init(&pbi->h_event_end_decoding, 0, 0)) { vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, "Failed to initialize semaphore"); } for (ithread = 0; ithread < pbi->decoding_thread_count; ++ithread) { if (sem_init(&pbi->h_event_start_decoding[ithread], 0, 0)) break; vp8_setup_block_dptrs(&pbi->mb_row_di[ithread].mbd); pbi->de_thread_data[ithread].ithread = ithread; pbi->de_thread_data[ithread].ptr1 = (void *)pbi; pbi->de_thread_data[ithread].ptr2 = (void *)&pbi->mb_row_di[ithread]; if (pthread_create(&pbi->h_decoding_thread[ithread], 0, thread_decoding_proc, &pbi->de_thread_data[ithread])) { sem_destroy(&pbi->h_event_start_decoding[ithread]); break; } } pbi->allocated_decoding_thread_count = ithread; if (pbi->allocated_decoding_thread_count != (int)pbi->decoding_thread_count) { /* the remainder of cleanup cases will be handled in * vp8_decoder_remove_threads(). */ if (pbi->allocated_decoding_thread_count == 0) { sem_destroy(&pbi->h_event_end_decoding); } vpx_internal_error(&pbi->common.error, VPX_CODEC_MEM_ERROR, "Failed to create threads"); } } } void vp8mt_de_alloc_temp_buffers(VP8D_COMP *pbi, int mb_rows) { int i; vpx_free(pbi->mt_current_mb_col); pbi->mt_current_mb_col = NULL; /* Free above_row buffers. */ if (pbi->mt_yabove_row) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_yabove_row[i]); pbi->mt_yabove_row[i] = NULL; } vpx_free(pbi->mt_yabove_row); pbi->mt_yabove_row = NULL; } if (pbi->mt_uabove_row) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_uabove_row[i]); pbi->mt_uabove_row[i] = NULL; } vpx_free(pbi->mt_uabove_row); pbi->mt_uabove_row = NULL; } if (pbi->mt_vabove_row) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_vabove_row[i]); pbi->mt_vabove_row[i] = NULL; } vpx_free(pbi->mt_vabove_row); pbi->mt_vabove_row = NULL; } /* Free left_col buffers. */ if (pbi->mt_yleft_col) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_yleft_col[i]); pbi->mt_yleft_col[i] = NULL; } vpx_free(pbi->mt_yleft_col); pbi->mt_yleft_col = NULL; } if (pbi->mt_uleft_col) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_uleft_col[i]); pbi->mt_uleft_col[i] = NULL; } vpx_free(pbi->mt_uleft_col); pbi->mt_uleft_col = NULL; } if (pbi->mt_vleft_col) { for (i = 0; i < mb_rows; ++i) { vpx_free(pbi->mt_vleft_col[i]); pbi->mt_vleft_col[i] = NULL; } vpx_free(pbi->mt_vleft_col); pbi->mt_vleft_col = NULL; } } void vp8mt_alloc_temp_buffers(VP8D_COMP *pbi, int width, int prev_mb_rows) { VP8_COMMON *const pc = &pbi->common; int i; int uv_width; if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { vp8mt_de_alloc_temp_buffers(pbi, prev_mb_rows); /* our internal buffers are always multiples of 16 */ if ((width & 0xf) != 0) width += 16 - (width & 0xf); if (width < 640) { pbi->sync_range = 1; } else if (width <= 1280) { pbi->sync_range = 8; } else if (width <= 2560) { pbi->sync_range = 16; } else { pbi->sync_range = 32; } uv_width = width >> 1; /* Allocate a vpx_atomic_int for each mb row. */ CHECK_MEM_ERROR(pbi->mt_current_mb_col, vpx_malloc(sizeof(*pbi->mt_current_mb_col) * pc->mb_rows)); for (i = 0; i < pc->mb_rows; ++i) vpx_atomic_init(&pbi->mt_current_mb_col[i], 0); /* Allocate memory for above_row buffers. */ CALLOC_ARRAY(pbi->mt_yabove_row, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) { CHECK_MEM_ERROR(pbi->mt_yabove_row[i], vpx_memalign(16, sizeof(unsigned char) * (width + (VP8BORDERINPIXELS << 1)))); vp8_zero_array(pbi->mt_yabove_row[i], width + (VP8BORDERINPIXELS << 1)); } CALLOC_ARRAY(pbi->mt_uabove_row, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) { CHECK_MEM_ERROR(pbi->mt_uabove_row[i], vpx_memalign(16, sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); vp8_zero_array(pbi->mt_uabove_row[i], uv_width + VP8BORDERINPIXELS); } CALLOC_ARRAY(pbi->mt_vabove_row, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) { CHECK_MEM_ERROR(pbi->mt_vabove_row[i], vpx_memalign(16, sizeof(unsigned char) * (uv_width + VP8BORDERINPIXELS))); vp8_zero_array(pbi->mt_vabove_row[i], uv_width + VP8BORDERINPIXELS); } /* Allocate memory for left_col buffers. */ CALLOC_ARRAY(pbi->mt_yleft_col, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) CHECK_MEM_ERROR(pbi->mt_yleft_col[i], vpx_calloc(sizeof(unsigned char) * 16, 1)); CALLOC_ARRAY(pbi->mt_uleft_col, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) CHECK_MEM_ERROR(pbi->mt_uleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); CALLOC_ARRAY(pbi->mt_vleft_col, pc->mb_rows); for (i = 0; i < pc->mb_rows; ++i) CHECK_MEM_ERROR(pbi->mt_vleft_col[i], vpx_calloc(sizeof(unsigned char) * 8, 1)); } } void vp8_decoder_remove_threads(VP8D_COMP *pbi) { /* shutdown MB Decoding thread; */ if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { int i; vpx_atomic_store_release(&pbi->b_multithreaded_rd, 0); /* allow all threads to exit */ for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { sem_post(&pbi->h_event_start_decoding[i]); pthread_join(pbi->h_decoding_thread[i], NULL); } for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { sem_destroy(&pbi->h_event_start_decoding[i]); } if (pbi->allocated_decoding_thread_count) { sem_destroy(&pbi->h_event_end_decoding); } vpx_free(pbi->h_decoding_thread); pbi->h_decoding_thread = NULL; vpx_free(pbi->h_event_start_decoding); pbi->h_event_start_decoding = NULL; vpx_free(pbi->mb_row_di); pbi->mb_row_di = NULL; vpx_free(pbi->de_thread_data); pbi->de_thread_data = NULL; vp8mt_de_alloc_temp_buffers(pbi, pbi->common.mb_rows); } } int vp8mt_decode_mb_rows(VP8D_COMP *pbi, MACROBLOCKD *xd) { VP8_COMMON *pc = &pbi->common; unsigned int i; int j; int filter_level = pc->filter_level; YV12_BUFFER_CONFIG *yv12_fb_new = pbi->dec_fb_ref[INTRA_FRAME]; if (filter_level) { /* Set above_row buffer to 127 for decoding first MB row */ memset(pbi->mt_yabove_row[0] + VP8BORDERINPIXELS - 1, 127, yv12_fb_new->y_width + 5); memset(pbi->mt_uabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, (yv12_fb_new->y_width >> 1) + 5); memset(pbi->mt_vabove_row[0] + (VP8BORDERINPIXELS >> 1) - 1, 127, (yv12_fb_new->y_width >> 1) + 5); for (j = 1; j < pc->mb_rows; ++j) { memset(pbi->mt_yabove_row[j] + VP8BORDERINPIXELS - 1, (unsigned char)129, 1); memset(pbi->mt_uabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, (unsigned char)129, 1); memset(pbi->mt_vabove_row[j] + (VP8BORDERINPIXELS >> 1) - 1, (unsigned char)129, 1); } /* Set left_col to 129 initially */ for (j = 0; j < pc->mb_rows; ++j) { memset(pbi->mt_yleft_col[j], (unsigned char)129, 16); memset(pbi->mt_uleft_col[j], (unsigned char)129, 8); memset(pbi->mt_vleft_col[j], (unsigned char)129, 8); } /* Initialize the loop filter for this frame. */ vp8_loop_filter_frame_init(pc, &pbi->mb, filter_level); } else { vp8_setup_intra_recon_top_line(yv12_fb_new); } setup_decoding_thread_data(pbi, xd, pbi->mb_row_di, pbi->decoding_thread_count); for (i = 0; i < pbi->decoding_thread_count; ++i) { sem_post(&pbi->h_event_start_decoding[i]); } if (setjmp(xd->error_info.jmp)) { xd->error_info.setjmp = 0; xd->corrupted = 1; // Wait for other threads to finish. This prevents other threads decoding // the current frame while the main thread starts decoding the next frame, // which causes a data race. for (i = 0; i < pbi->decoding_thread_count; ++i) sem_wait(&pbi->h_event_end_decoding); return -1; } xd->error_info.setjmp = 1; mt_decode_mb_rows(pbi, xd, 0); for (i = 0; i < pbi->decoding_thread_count + 1; ++i) sem_wait(&pbi->h_event_end_decoding); /* add back for each frame */ return 0; } libvpx-1.8.2/vp8/decoder/treereader.h000066400000000000000000000022121357355204000174440ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_DECODER_TREEREADER_H_ #define VPX_VP8_DECODER_TREEREADER_H_ #include "./vpx_config.h" #include "vp8/common/treecoder.h" #include "dboolhuff.h" #ifdef __cplusplus extern "C" { #endif typedef BOOL_DECODER vp8_reader; #define vp8_read vp8dx_decode_bool #define vp8_read_literal vp8_decode_value #define vp8_read_bit(R) vp8_read(R, vp8_prob_half) /* Intent of tree data structure is to make decoding trivial. */ static INLINE int vp8_treed_read( vp8_reader *const r, /* !!! must return a 0 or 1 !!! */ vp8_tree t, const vp8_prob *const p) { vp8_tree_index i = 0; while ((i = t[i + vp8_read(r, p[i >> 1])]) > 0) { } return -i; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_DECODER_TREEREADER_H_ libvpx-1.8.2/vp8/encoder/000077500000000000000000000000001357355204000151665ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/arm/000077500000000000000000000000001357355204000157455ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/arm/neon/000077500000000000000000000000001357355204000167045ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/arm/neon/denoising_neon.c000066400000000000000000000462201357355204000220520ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp8/encoder/denoising.h" #include "vpx_mem/vpx_mem.h" #include "./vp8_rtcd.h" /* * The filter function was modified to reduce the computational complexity. * * Step 1: * Instead of applying tap coefficients for each pixel, we calculated the * pixel adjustments vs. pixel diff value ahead of time. * adjustment = filtered_value - current_raw * = (filter_coefficient * diff + 128) >> 8 * where * filter_coefficient = (255 << 8) / (256 + ((abs_diff * 330) >> 3)); * filter_coefficient += filter_coefficient / * (3 + motion_magnitude_adjustment); * filter_coefficient is clamped to 0 ~ 255. * * Step 2: * The adjustment vs. diff curve becomes flat very quick when diff increases. * This allowed us to use only several levels to approximate the curve without * changing the filtering algorithm too much. * The adjustments were further corrected by checking the motion magnitude. * The levels used are: * diff level adjustment w/o adjustment w/ * motion correction motion correction * [-255, -16] 3 -6 -7 * [-15, -8] 2 -4 -5 * [-7, -4] 1 -3 -4 * [-3, 3] 0 diff diff * [4, 7] 1 3 4 * [8, 15] 2 4 5 * [16, 255] 3 6 7 */ int vp8_denoiser_filter_neon(unsigned char *mc_running_avg_y, int mc_running_avg_y_stride, unsigned char *running_avg_y, int running_avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level, level1 adjustment is * increased, the deltas stay the same. */ int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; const uint8x16_t v_level1_adjustment = vmovq_n_u8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3); const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); int64x2_t v_sum_diff_total = vdupq_n_s64(0); /* Go over lines. */ int r; for (r = 0; r < 16; ++r) { /* Load inputs. */ const uint8x16_t v_sig = vld1q_u8(sig); const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); /* Figure out which level that put us in. */ const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, v_abs_diff); const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, v_abs_diff); const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, v_abs_diff); /* Calculate absolute adjustments for level 1, 2 and 3. */ const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask, v_delta_level_1_and_2); const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask, v_delta_level_2_and_3); const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment, v_level2_adjustment); const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(v_level1and2_adjustment, v_level3_adjustment); /* Figure adjustment absolute value by selecting between the absolute * difference if in level0 or the value for level 1, 2 and 3. */ const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask, v_level1and2and3_adjustment, v_abs_diff); /* Calculate positive and negative adjustments. Apply them to the signal * and accumulate them. Adjustments are less than eight and the maximum * sum of them (7 * 16) can fit in a signed char. */ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment); v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment); /* Store results. */ vst1q_u8(running_avg_y, v_running_avg_y); /* Sum all the accumulators to have the sum of all pixel differences * for this macroblock. */ { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), vreinterpretq_s8_u8(v_neg_adjustment)); const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); } /* Update pointers for next iteration. */ sig += sig_stride; mc_running_avg_y += mc_running_avg_y_stride; running_avg_y += running_avg_y_stride; } /* Too much adjustments => copy block. */ { int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), vget_low_s64(v_sum_diff_total)); int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); int sum_diff_thresh = SUM_DIFF_THRESHOLD; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; if (sum_diff > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // checK if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the accceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const uint8x16_t k_delta = vmovq_n_u8(delta); sig -= sig_stride * 16; mc_running_avg_y -= mc_running_avg_y_stride * 16; running_avg_y -= running_avg_y_stride * 16; for (r = 0; r < 16; ++r) { uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); const uint8x16_t v_sig = vld1q_u8(sig); const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); // Clamp absolute difference to delta to get the adjustment. const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); /* Store results. */ vst1q_u8(running_avg_y, v_running_avg_y); { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), vreinterpretq_s8_u8(v_pos_adjustment)); const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); } /* Update pointers for next iteration. */ sig += sig_stride; mc_running_avg_y += mc_running_avg_y_stride; running_avg_y += running_avg_y_stride; } { // Update the sum of all pixel differences of this MB. x = vqadd_s64(vget_high_s64(v_sum_diff_total), vget_low_s64(v_sum_diff_total)); sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); if (sum_diff > sum_diff_thresh) { return COPY_BLOCK; } } } else { return COPY_BLOCK; } } } /* Tell above level that block was filtered. */ running_avg_y -= running_avg_y_stride * 16; sig -= sig_stride * 16; vp8_copy_mem16x16(running_avg_y, running_avg_y_stride, sig, sig_stride); return FILTER_BLOCK; } int vp8_denoiser_filter_uv_neon(unsigned char *mc_running_avg, int mc_running_avg_stride, unsigned char *running_avg, int running_avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level, level1 adjustment is * increased, the deltas stay the same. */ int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0; const uint8x16_t v_level1_adjustment = vmovq_n_u8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 4 + shift_inc : 3); const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); int64x2_t v_sum_diff_total = vdupq_n_s64(0); int r; { uint16x4_t v_sum_block = vdup_n_u16(0); // Avoid denoising color signal if its close to average level. for (r = 0; r < 8; ++r) { const uint8x8_t v_sig = vld1_u8(sig); const uint16x4_t _76_54_32_10 = vpaddl_u8(v_sig); v_sum_block = vqadd_u16(v_sum_block, _76_54_32_10); sig += sig_stride; } sig -= sig_stride * 8; { const uint32x2_t _7654_3210 = vpaddl_u16(v_sum_block); const uint64x1_t _76543210 = vpaddl_u32(_7654_3210); const int sum_block = vget_lane_s32(vreinterpret_s32_u64(_76543210), 0); if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) { return COPY_BLOCK; } } } /* Go over lines. */ for (r = 0; r < 4; ++r) { /* Load inputs. */ const uint8x8_t v_sig_lo = vld1_u8(sig); const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]); const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi); const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg); const uint8x8_t v_mc_running_avg_hi = vld1_u8(&mc_running_avg[mc_running_avg_stride]); const uint8x16_t v_mc_running_avg = vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg); /* Figure out which level that put us in. */ const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, v_abs_diff); const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, v_abs_diff); const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, v_abs_diff); /* Calculate absolute adjustments for level 1, 2 and 3. */ const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask, v_delta_level_1_and_2); const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask, v_delta_level_2_and_3); const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment, v_level2_adjustment); const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(v_level1and2_adjustment, v_level3_adjustment); /* Figure adjustment absolute value by selecting between the absolute * difference if in level0 or the value for level 1, 2 and 3. */ const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask, v_level1and2and3_adjustment, v_abs_diff); /* Calculate positive and negative adjustments. Apply them to the signal * and accumulate them. Adjustments are less than eight and the maximum * sum of them (7 * 16) can fit in a signed char. */ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); uint8x16_t v_running_avg = vqaddq_u8(v_sig, v_pos_adjustment); v_running_avg = vqsubq_u8(v_running_avg, v_neg_adjustment); /* Store results. */ vst1_u8(running_avg, vget_low_u8(v_running_avg)); vst1_u8(&running_avg[running_avg_stride], vget_high_u8(v_running_avg)); /* Sum all the accumulators to have the sum of all pixel differences * for this macroblock. */ { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), vreinterpretq_s8_u8(v_neg_adjustment)); const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); } /* Update pointers for next iteration. */ sig += sig_stride * 2; mc_running_avg += mc_running_avg_stride * 2; running_avg += running_avg_stride * 2; } /* Too much adjustments => copy block. */ { int64x1_t x = vqadd_s64(vget_high_s64(v_sum_diff_total), vget_low_s64(v_sum_diff_total)); int sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); int sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV; if (sum_diff > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // checK if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the accceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. int delta = ((sum_diff - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const uint8x16_t k_delta = vmovq_n_u8(delta); sig -= sig_stride * 8; mc_running_avg -= mc_running_avg_stride * 8; running_avg -= running_avg_stride * 8; for (r = 0; r < 4; ++r) { const uint8x8_t v_sig_lo = vld1_u8(sig); const uint8x8_t v_sig_hi = vld1_u8(&sig[sig_stride]); const uint8x16_t v_sig = vcombine_u8(v_sig_lo, v_sig_hi); const uint8x8_t v_mc_running_avg_lo = vld1_u8(mc_running_avg); const uint8x8_t v_mc_running_avg_hi = vld1_u8(&mc_running_avg[mc_running_avg_stride]); const uint8x16_t v_mc_running_avg = vcombine_u8(v_mc_running_avg_lo, v_mc_running_avg_hi); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg); // Clamp absolute difference to delta to get the adjustment. const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); const uint8x8_t v_running_avg_lo = vld1_u8(running_avg); const uint8x8_t v_running_avg_hi = vld1_u8(&running_avg[running_avg_stride]); uint8x16_t v_running_avg = vcombine_u8(v_running_avg_lo, v_running_avg_hi); v_running_avg = vqsubq_u8(v_running_avg, v_pos_adjustment); v_running_avg = vqaddq_u8(v_running_avg, v_neg_adjustment); /* Store results. */ vst1_u8(running_avg, vget_low_u8(v_running_avg)); vst1_u8(&running_avg[running_avg_stride], vget_high_u8(v_running_avg)); { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), vreinterpretq_s8_u8(v_pos_adjustment)); const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); v_sum_diff_total = vqaddq_s64(v_sum_diff_total, fedcba98_76543210); } /* Update pointers for next iteration. */ sig += sig_stride * 2; mc_running_avg += mc_running_avg_stride * 2; running_avg += running_avg_stride * 2; } { // Update the sum of all pixel differences of this MB. x = vqadd_s64(vget_high_s64(v_sum_diff_total), vget_low_s64(v_sum_diff_total)); sum_diff = vget_lane_s32(vabs_s32(vreinterpret_s32_s64(x)), 0); if (sum_diff > sum_diff_thresh) { return COPY_BLOCK; } } } else { return COPY_BLOCK; } } } /* Tell above level that block was filtered. */ running_avg -= running_avg_stride * 8; sig -= sig_stride * 8; vp8_copy_mem8x8(running_avg, running_avg_stride, sig, sig_stride); return FILTER_BLOCK; } libvpx-1.8.2/vp8/encoder/arm/neon/fastquantizeb_neon.c000066400000000000000000000053411357355204000227520ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vp8/encoder/block.h" static const uint16_t inv_zig_zag[16] = { 1, 2, 6, 7, 3, 5, 8, 13, 4, 9, 12, 14, 10, 11, 15, 16 }; void vp8_fast_quantize_b_neon(BLOCK *b, BLOCKD *d) { const int16x8_t one_q = vdupq_n_s16(-1), z0 = vld1q_s16(b->coeff), z1 = vld1q_s16(b->coeff + 8), round0 = vld1q_s16(b->round), round1 = vld1q_s16(b->round + 8), quant0 = vld1q_s16(b->quant_fast), quant1 = vld1q_s16(b->quant_fast + 8), dequant0 = vld1q_s16(d->dequant), dequant1 = vld1q_s16(d->dequant + 8); const uint16x8_t zig_zag0 = vld1q_u16(inv_zig_zag), zig_zag1 = vld1q_u16(inv_zig_zag + 8); int16x8_t x0, x1, sz0, sz1, y0, y1; uint16x8_t eob0, eob1; #ifndef __aarch64__ uint16x4_t eob_d16; uint32x2_t eob_d32; uint32x4_t eob_q32; #endif // __arch64__ /* sign of z: z >> 15 */ sz0 = vshrq_n_s16(z0, 15); sz1 = vshrq_n_s16(z1, 15); /* x = abs(z) */ x0 = vabsq_s16(z0); x1 = vabsq_s16(z1); /* x += round */ x0 = vaddq_s16(x0, round0); x1 = vaddq_s16(x1, round1); /* y = 2 * (x * quant) >> 16 */ y0 = vqdmulhq_s16(x0, quant0); y1 = vqdmulhq_s16(x1, quant1); /* Compensate for doubling in vqdmulhq */ y0 = vshrq_n_s16(y0, 1); y1 = vshrq_n_s16(y1, 1); /* Restore sign bit */ y0 = veorq_s16(y0, sz0); y1 = veorq_s16(y1, sz1); x0 = vsubq_s16(y0, sz0); x1 = vsubq_s16(y1, sz1); /* find non-zero elements */ eob0 = vtstq_s16(x0, one_q); eob1 = vtstq_s16(x1, one_q); /* mask zig zag */ eob0 = vandq_u16(eob0, zig_zag0); eob1 = vandq_u16(eob1, zig_zag1); /* select the largest value */ eob0 = vmaxq_u16(eob0, eob1); #ifdef __aarch64__ *d->eob = (int8_t)vmaxvq_u16(eob0); #else eob_d16 = vmax_u16(vget_low_u16(eob0), vget_high_u16(eob0)); eob_q32 = vmovl_u16(eob_d16); eob_d32 = vmax_u32(vget_low_u32(eob_q32), vget_high_u32(eob_q32)); eob_d32 = vpmax_u32(eob_d32, eob_d32); vst1_lane_s8((int8_t *)d->eob, vreinterpret_s8_u32(eob_d32), 0); #endif // __aarch64__ /* qcoeff = x */ vst1q_s16(d->qcoeff, x0); vst1q_s16(d->qcoeff + 8, x1); /* dqcoeff = x * dequant */ vst1q_s16(d->dqcoeff, vmulq_s16(dequant0, x0)); vst1q_s16(d->dqcoeff + 8, vmulq_s16(dequant1, x1)); } libvpx-1.8.2/vp8/encoder/arm/neon/shortfdct_neon.c000066400000000000000000000203201357355204000220640ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" void vp8_short_fdct4x4_neon(int16_t *input, int16_t *output, int pitch) { int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; int16x4_t d16s16, d17s16, d26s16, dEmptys16; uint16x4_t d4u16; int16x8_t q0s16, q1s16; int32x4_t q9s32, q10s32, q11s32, q12s32; int16x4x2_t v2tmp0, v2tmp1; int32x2x2_t v2tmp2, v2tmp3; d16s16 = vdup_n_s16(5352); d17s16 = vdup_n_s16(2217); q9s32 = vdupq_n_s32(14500); q10s32 = vdupq_n_s32(7500); q11s32 = vdupq_n_s32(12000); q12s32 = vdupq_n_s32(51000); // Part one pitch >>= 1; d0s16 = vld1_s16(input); input += pitch; d1s16 = vld1_s16(input); input += pitch; d2s16 = vld1_s16(input); input += pitch; d3s16 = vld1_s16(input); v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), vreinterpret_s32_s16(d2s16)); v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), vreinterpret_s32_s16(d3s16)); v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 vreinterpret_s16_s32(v2tmp3.val[0])); // d1 v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 vreinterpret_s16_s32(v2tmp3.val[1])); // d3 d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); d4s16 = vshl_n_s16(d4s16, 3); d5s16 = vshl_n_s16(d5s16, 3); d6s16 = vshl_n_s16(d6s16, 3); d7s16 = vshl_n_s16(d7s16, 3); d0s16 = vadd_s16(d4s16, d5s16); d2s16 = vsub_s16(d4s16, d5s16); q9s32 = vmlal_s16(q9s32, d7s16, d16s16); q10s32 = vmlal_s16(q10s32, d7s16, d17s16); q9s32 = vmlal_s16(q9s32, d6s16, d17s16); q10s32 = vmlsl_s16(q10s32, d6s16, d16s16); d1s16 = vshrn_n_s32(q9s32, 12); d3s16 = vshrn_n_s32(q10s32, 12); // Part two v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), vreinterpret_s32_s16(d2s16)); v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), vreinterpret_s32_s16(d3s16)); v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 vreinterpret_s16_s32(v2tmp3.val[0])); // d1 v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 vreinterpret_s16_s32(v2tmp3.val[1])); // d3 d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[1]); d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[0]); d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[0]); d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[1]); d26s16 = vdup_n_s16(7); d4s16 = vadd_s16(d4s16, d26s16); d0s16 = vadd_s16(d4s16, d5s16); d2s16 = vsub_s16(d4s16, d5s16); q11s32 = vmlal_s16(q11s32, d7s16, d16s16); q12s32 = vmlal_s16(q12s32, d7s16, d17s16); dEmptys16 = vdup_n_s16(0); d4u16 = vceq_s16(d7s16, dEmptys16); d0s16 = vshr_n_s16(d0s16, 4); d2s16 = vshr_n_s16(d2s16, 4); q11s32 = vmlal_s16(q11s32, d6s16, d17s16); q12s32 = vmlsl_s16(q12s32, d6s16, d16s16); d4u16 = vmvn_u16(d4u16); d1s16 = vshrn_n_s32(q11s32, 16); d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d4u16)); d3s16 = vshrn_n_s32(q12s32, 16); q0s16 = vcombine_s16(d0s16, d1s16); q1s16 = vcombine_s16(d2s16, d3s16); vst1q_s16(output, q0s16); vst1q_s16(output + 8, q1s16); return; } void vp8_short_fdct8x4_neon(int16_t *input, int16_t *output, int pitch) { int16x4_t d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; int16x4_t d16s16, d17s16, d26s16, d27s16, d28s16, d29s16; uint16x4_t d28u16, d29u16; uint16x8_t q14u16; int16x8_t q0s16, q1s16, q2s16, q3s16; int16x8_t q11s16, q12s16, q13s16, q14s16, q15s16, qEmptys16; int32x4_t q9s32, q10s32, q11s32, q12s32; int16x8x2_t v2tmp0, v2tmp1; int32x4x2_t v2tmp2, v2tmp3; d16s16 = vdup_n_s16(5352); d17s16 = vdup_n_s16(2217); q9s32 = vdupq_n_s32(14500); q10s32 = vdupq_n_s32(7500); // Part one pitch >>= 1; q0s16 = vld1q_s16(input); input += pitch; q1s16 = vld1q_s16(input); input += pitch; q2s16 = vld1q_s16(input); input += pitch; q3s16 = vld1q_s16(input); v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16), vreinterpretq_s32_s16(q2s16)); v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16), vreinterpretq_s32_s16(q3s16)); v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0 vreinterpretq_s16_s32(v2tmp3.val[0])); // q1 v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2 vreinterpretq_s16_s32(v2tmp3.val[1])); // q3 q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]); q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]); q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]); q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]); q11s16 = vshlq_n_s16(q11s16, 3); q12s16 = vshlq_n_s16(q12s16, 3); q13s16 = vshlq_n_s16(q13s16, 3); q14s16 = vshlq_n_s16(q14s16, 3); q0s16 = vaddq_s16(q11s16, q12s16); q2s16 = vsubq_s16(q11s16, q12s16); q11s32 = q9s32; q12s32 = q10s32; d26s16 = vget_low_s16(q13s16); d27s16 = vget_high_s16(q13s16); d28s16 = vget_low_s16(q14s16); d29s16 = vget_high_s16(q14s16); q9s32 = vmlal_s16(q9s32, d28s16, d16s16); q10s32 = vmlal_s16(q10s32, d28s16, d17s16); q11s32 = vmlal_s16(q11s32, d29s16, d16s16); q12s32 = vmlal_s16(q12s32, d29s16, d17s16); q9s32 = vmlal_s16(q9s32, d26s16, d17s16); q10s32 = vmlsl_s16(q10s32, d26s16, d16s16); q11s32 = vmlal_s16(q11s32, d27s16, d17s16); q12s32 = vmlsl_s16(q12s32, d27s16, d16s16); d2s16 = vshrn_n_s32(q9s32, 12); d6s16 = vshrn_n_s32(q10s32, 12); d3s16 = vshrn_n_s32(q11s32, 12); d7s16 = vshrn_n_s32(q12s32, 12); q1s16 = vcombine_s16(d2s16, d3s16); q3s16 = vcombine_s16(d6s16, d7s16); // Part two q9s32 = vdupq_n_s32(12000); q10s32 = vdupq_n_s32(51000); v2tmp2 = vtrnq_s32(vreinterpretq_s32_s16(q0s16), vreinterpretq_s32_s16(q2s16)); v2tmp3 = vtrnq_s32(vreinterpretq_s32_s16(q1s16), vreinterpretq_s32_s16(q3s16)); v2tmp0 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[0]), // q0 vreinterpretq_s16_s32(v2tmp3.val[0])); // q1 v2tmp1 = vtrnq_s16(vreinterpretq_s16_s32(v2tmp2.val[1]), // q2 vreinterpretq_s16_s32(v2tmp3.val[1])); // q3 q11s16 = vaddq_s16(v2tmp0.val[0], v2tmp1.val[1]); q12s16 = vaddq_s16(v2tmp0.val[1], v2tmp1.val[0]); q13s16 = vsubq_s16(v2tmp0.val[1], v2tmp1.val[0]); q14s16 = vsubq_s16(v2tmp0.val[0], v2tmp1.val[1]); q15s16 = vdupq_n_s16(7); q11s16 = vaddq_s16(q11s16, q15s16); q0s16 = vaddq_s16(q11s16, q12s16); q1s16 = vsubq_s16(q11s16, q12s16); q11s32 = q9s32; q12s32 = q10s32; d0s16 = vget_low_s16(q0s16); d1s16 = vget_high_s16(q0s16); d2s16 = vget_low_s16(q1s16); d3s16 = vget_high_s16(q1s16); d0s16 = vshr_n_s16(d0s16, 4); d4s16 = vshr_n_s16(d1s16, 4); d2s16 = vshr_n_s16(d2s16, 4); d6s16 = vshr_n_s16(d3s16, 4); d26s16 = vget_low_s16(q13s16); d27s16 = vget_high_s16(q13s16); d28s16 = vget_low_s16(q14s16); d29s16 = vget_high_s16(q14s16); q9s32 = vmlal_s16(q9s32, d28s16, d16s16); q10s32 = vmlal_s16(q10s32, d28s16, d17s16); q11s32 = vmlal_s16(q11s32, d29s16, d16s16); q12s32 = vmlal_s16(q12s32, d29s16, d17s16); q9s32 = vmlal_s16(q9s32, d26s16, d17s16); q10s32 = vmlsl_s16(q10s32, d26s16, d16s16); q11s32 = vmlal_s16(q11s32, d27s16, d17s16); q12s32 = vmlsl_s16(q12s32, d27s16, d16s16); d1s16 = vshrn_n_s32(q9s32, 16); d3s16 = vshrn_n_s32(q10s32, 16); d5s16 = vshrn_n_s32(q11s32, 16); d7s16 = vshrn_n_s32(q12s32, 16); qEmptys16 = vdupq_n_s16(0); q14u16 = vceqq_s16(q14s16, qEmptys16); q14u16 = vmvnq_u16(q14u16); d28u16 = vget_low_u16(q14u16); d29u16 = vget_high_u16(q14u16); d1s16 = vsub_s16(d1s16, vreinterpret_s16_u16(d28u16)); d5s16 = vsub_s16(d5s16, vreinterpret_s16_u16(d29u16)); q0s16 = vcombine_s16(d0s16, d1s16); q1s16 = vcombine_s16(d2s16, d3s16); q2s16 = vcombine_s16(d4s16, d5s16); q3s16 = vcombine_s16(d6s16, d7s16); vst1q_s16(output, q0s16); vst1q_s16(output + 8, q1s16); vst1q_s16(output + 16, q2s16); vst1q_s16(output + 24, q3s16); return; } libvpx-1.8.2/vp8/encoder/arm/neon/vp8_shortwalsh4x4_neon.c000066400000000000000000000077471357355204000234410ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vpx_ports/arm.h" #ifdef VPX_INCOMPATIBLE_GCC #include "./vp8_rtcd.h" void vp8_short_walsh4x4_neon(int16_t *input, int16_t *output, int pitch) { vp8_short_walsh4x4_c(input, output, pitch); } #else void vp8_short_walsh4x4_neon(int16_t *input, int16_t *output, int pitch) { uint16x4_t d16u16; int16x8_t q0s16, q1s16; int16x4_t dEmptys16, d0s16, d1s16, d2s16, d3s16, d4s16, d5s16, d6s16, d7s16; int32x4_t qEmptys32, q0s32, q1s32, q2s32, q3s32, q8s32; int32x4_t q9s32, q10s32, q11s32, q15s32; uint32x4_t q8u32, q9u32, q10u32, q11u32; int16x4x2_t v2tmp0, v2tmp1; int32x2x2_t v2tmp2, v2tmp3; dEmptys16 = vdup_n_s16(0); qEmptys32 = vdupq_n_s32(0); q15s32 = vdupq_n_s32(3); d0s16 = vld1_s16(input); input += pitch / 2; d1s16 = vld1_s16(input); input += pitch / 2; d2s16 = vld1_s16(input); input += pitch / 2; d3s16 = vld1_s16(input); v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d0s16), vreinterpret_s32_s16(d2s16)); v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d1s16), vreinterpret_s32_s16(d3s16)); v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[0]), // d0 vreinterpret_s16_s32(v2tmp3.val[0])); // d1 v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp2.val[1]), // d2 vreinterpret_s16_s32(v2tmp3.val[1])); // d3 d4s16 = vadd_s16(v2tmp0.val[0], v2tmp1.val[0]); d5s16 = vadd_s16(v2tmp0.val[1], v2tmp1.val[1]); d6s16 = vsub_s16(v2tmp0.val[1], v2tmp1.val[1]); d7s16 = vsub_s16(v2tmp0.val[0], v2tmp1.val[0]); d4s16 = vshl_n_s16(d4s16, 2); d5s16 = vshl_n_s16(d5s16, 2); d6s16 = vshl_n_s16(d6s16, 2); d7s16 = vshl_n_s16(d7s16, 2); d16u16 = vceq_s16(d4s16, dEmptys16); d16u16 = vmvn_u16(d16u16); d0s16 = vadd_s16(d4s16, d5s16); d3s16 = vsub_s16(d4s16, d5s16); d1s16 = vadd_s16(d7s16, d6s16); d2s16 = vsub_s16(d7s16, d6s16); d0s16 = vsub_s16(d0s16, vreinterpret_s16_u16(d16u16)); // Second for-loop v2tmp2 = vtrn_s32(vreinterpret_s32_s16(d1s16), vreinterpret_s32_s16(d3s16)); v2tmp3 = vtrn_s32(vreinterpret_s32_s16(d0s16), vreinterpret_s32_s16(d2s16)); v2tmp0 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[1]), // d2 vreinterpret_s16_s32(v2tmp2.val[1])); // d3 v2tmp1 = vtrn_s16(vreinterpret_s16_s32(v2tmp3.val[0]), // d0 vreinterpret_s16_s32(v2tmp2.val[0])); // d1 q8s32 = vaddl_s16(v2tmp1.val[0], v2tmp0.val[0]); q9s32 = vaddl_s16(v2tmp1.val[1], v2tmp0.val[1]); q10s32 = vsubl_s16(v2tmp1.val[1], v2tmp0.val[1]); q11s32 = vsubl_s16(v2tmp1.val[0], v2tmp0.val[0]); q0s32 = vaddq_s32(q8s32, q9s32); q1s32 = vaddq_s32(q11s32, q10s32); q2s32 = vsubq_s32(q11s32, q10s32); q3s32 = vsubq_s32(q8s32, q9s32); q8u32 = vcltq_s32(q0s32, qEmptys32); q9u32 = vcltq_s32(q1s32, qEmptys32); q10u32 = vcltq_s32(q2s32, qEmptys32); q11u32 = vcltq_s32(q3s32, qEmptys32); q8s32 = vreinterpretq_s32_u32(q8u32); q9s32 = vreinterpretq_s32_u32(q9u32); q10s32 = vreinterpretq_s32_u32(q10u32); q11s32 = vreinterpretq_s32_u32(q11u32); q0s32 = vsubq_s32(q0s32, q8s32); q1s32 = vsubq_s32(q1s32, q9s32); q2s32 = vsubq_s32(q2s32, q10s32); q3s32 = vsubq_s32(q3s32, q11s32); q8s32 = vaddq_s32(q0s32, q15s32); q9s32 = vaddq_s32(q1s32, q15s32); q10s32 = vaddq_s32(q2s32, q15s32); q11s32 = vaddq_s32(q3s32, q15s32); d0s16 = vshrn_n_s32(q8s32, 3); d1s16 = vshrn_n_s32(q9s32, 3); d2s16 = vshrn_n_s32(q10s32, 3); d3s16 = vshrn_n_s32(q11s32, 3); q0s16 = vcombine_s16(d0s16, d1s16); q1s16 = vcombine_s16(d2s16, d3s16); vst1q_s16(output, q0s16); vst1q_s16(output + 8, q1s16); return; } #endif // VPX_INCOMPATIBLE_GCC libvpx-1.8.2/vp8/encoder/bitstream.c000066400000000000000000001171451357355204000173350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/header.h" #include "encodemv.h" #include "vp8/common/entropymode.h" #include "vp8/common/findnearmv.h" #include "mcomp.h" #include "vp8/common/systemdependent.h" #include #include #include #include "vpx/vpx_encoder.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #include "bitstream.h" #include "defaultcoefcounts.h" #include "vp8/common/common.h" const int vp8cx_base_skip_false_prob[128] = { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 251, 248, 244, 240, 236, 232, 229, 225, 221, 217, 213, 208, 204, 199, 194, 190, 187, 183, 179, 175, 172, 168, 164, 160, 157, 153, 149, 145, 142, 138, 134, 130, 127, 124, 120, 117, 114, 110, 107, 104, 101, 98, 95, 92, 89, 86, 83, 80, 77, 74, 71, 68, 65, 62, 59, 56, 53, 50, 47, 44, 41, 38, 35, 32, 30, 28, 26, 24, 22, 20, 18, 16, }; #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; #endif #ifdef MODE_STATS int count_mb_seg[4] = { 0, 0, 0, 0 }; #endif static void update_mode(vp8_writer *const w, int n, vp8_token tok[/* n */], vp8_tree tree, vp8_prob Pnew[/* n-1 */], vp8_prob Pcur[/* n-1 */], unsigned int bct[/* n-1 */][2], const unsigned int num_events[/* n */]) { unsigned int new_b = 0, old_b = 0; int i = 0; vp8_tree_probs_from_distribution(n--, tok, tree, Pnew, bct, num_events, 256, 1); do { new_b += vp8_cost_branch(bct[i], Pnew[i]); old_b += vp8_cost_branch(bct[i], Pcur[i]); } while (++i < n); if (new_b + (n << 8) < old_b) { int j = 0; vp8_write_bit(w, 1); do { const vp8_prob p = Pnew[j]; vp8_write_literal(w, Pcur[j] = p ? p : 1, 8); } while (++j < n); } else vp8_write_bit(w, 0); } static void update_mbintra_mode_probs(VP8_COMP *cpi) { VP8_COMMON *const x = &cpi->common; vp8_writer *const w = cpi->bc; { vp8_prob Pnew[VP8_YMODES - 1]; unsigned int bct[VP8_YMODES - 1][2]; update_mode(w, VP8_YMODES, vp8_ymode_encodings, vp8_ymode_tree, Pnew, x->fc.ymode_prob, bct, (unsigned int *)cpi->mb.ymode_count); } { vp8_prob Pnew[VP8_UV_MODES - 1]; unsigned int bct[VP8_UV_MODES - 1][2]; update_mode(w, VP8_UV_MODES, vp8_uv_mode_encodings, vp8_uv_mode_tree, Pnew, x->fc.uv_mode_prob, bct, (unsigned int *)cpi->mb.uv_mode_count); } } static void write_ymode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_ymode_tree, p, vp8_ymode_encodings + m); } static void kfwrite_ymode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_kf_ymode_tree, p, vp8_kf_ymode_encodings + m); } static void write_uv_mode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_uv_mode_tree, p, vp8_uv_mode_encodings + m); } static void write_bmode(vp8_writer *bc, int m, const vp8_prob *p) { vp8_write_token(bc, vp8_bmode_tree, p, vp8_bmode_encodings + m); } static void write_split(vp8_writer *bc, int x) { vp8_write_token(bc, vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + x); } void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount) { const TOKENEXTRA *stop = p + xcount; unsigned int split; int shift; int count = w->count; unsigned int range = w->range; unsigned int lowvalue = w->lowvalue; while (p < stop) { const int t = p->Token; vp8_token *a = vp8_coef_encodings + t; const vp8_extra_bit_struct *b = vp8_extra_bits + t; int i = 0; const unsigned char *pp = p->context_tree; int v = a->value; int n = a->Len; if (p->skip_eob_node) { n--; i = 2; } do { const int bb = (v >> --n) & 1; split = 1 + (((range - 1) * pp[i >> 1]) >> 8); i = vp8_coef_tree[i + bb]; if (bb) { lowvalue += split; range = range - split; } else { range = split; } shift = vp8_norm[range]; range <<= shift; count += shift; if (count >= 0) { int offset = shift - count; if ((lowvalue << (offset - 1)) & 0x80000000) { int x = w->pos - 1; while (x >= 0 && w->buffer[x] == 0xff) { w->buffer[x] = (unsigned char)0; x--; } w->buffer[x] += 1; } validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error); w->buffer[w->pos++] = (lowvalue >> (24 - offset)) & 0xff; lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; count -= 8; } lowvalue <<= shift; } while (n); if (b->base_val) { const int e = p->Extra, L = b->Len; if (L) { const unsigned char *proba = b->prob; const int v2 = e >> 1; int n2 = L; /* number of bits in v2, assumed nonzero */ i = 0; do { const int bb = (v2 >> --n2) & 1; split = 1 + (((range - 1) * proba[i >> 1]) >> 8); i = b->tree[i + bb]; if (bb) { lowvalue += split; range = range - split; } else { range = split; } shift = vp8_norm[range]; range <<= shift; count += shift; if (count >= 0) { int offset = shift - count; if ((lowvalue << (offset - 1)) & 0x80000000) { int x = w->pos - 1; while (x >= 0 && w->buffer[x] == 0xff) { w->buffer[x] = (unsigned char)0; x--; } w->buffer[x] += 1; } validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error); w->buffer[w->pos++] = (lowvalue >> (24 - offset)); lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; count -= 8; } lowvalue <<= shift; } while (n2); } { split = (range + 1) >> 1; if (e & 1) { lowvalue += split; range = range - split; } else { range = split; } range <<= 1; if ((lowvalue & 0x80000000)) { int x = w->pos - 1; while (x >= 0 && w->buffer[x] == 0xff) { w->buffer[x] = (unsigned char)0; x--; } w->buffer[x] += 1; } lowvalue <<= 1; if (!++count) { count = -8; validate_buffer(w->buffer + w->pos, 1, w->buffer_end, w->error); w->buffer[w->pos++] = (lowvalue >> 24); lowvalue &= 0xffffff; } } } ++p; } w->count = count; w->lowvalue = lowvalue; w->range = range; } static void write_partition_size(unsigned char *cx_data, int size) { signed char csize; csize = size & 0xff; *cx_data = csize; csize = (size >> 8) & 0xff; *(cx_data + 1) = csize; csize = (size >> 16) & 0xff; *(cx_data + 2) = csize; } static void pack_tokens_into_partitions(VP8_COMP *cpi, unsigned char *cx_data, unsigned char *cx_data_end, int num_part) { int i; unsigned char *ptr = cx_data; unsigned char *ptr_end = cx_data_end; vp8_writer *w; for (i = 0; i < num_part; ++i) { int mb_row; w = cpi->bc + i + 1; vp8_start_encode(w, ptr, ptr_end); for (mb_row = i; mb_row < cpi->common.mb_rows; mb_row += num_part) { const TOKENEXTRA *p = cpi->tplist[mb_row].start; const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = (int)(stop - p); vp8_pack_tokens(w, p, tokens); } vp8_stop_encode(w); ptr += w->pos; } } #if CONFIG_MULTITHREAD static void pack_mb_row_tokens(VP8_COMP *cpi, vp8_writer *w) { int mb_row; for (mb_row = 0; mb_row < cpi->common.mb_rows; ++mb_row) { const TOKENEXTRA *p = cpi->tplist[mb_row].start; const TOKENEXTRA *stop = cpi->tplist[mb_row].stop; int tokens = (int)(stop - p); vp8_pack_tokens(w, p, tokens); } } #endif // CONFIG_MULTITHREAD static void write_mv_ref(vp8_writer *w, MB_PREDICTION_MODE m, const vp8_prob *p) { assert(NEARESTMV <= m && m <= SPLITMV); vp8_write_token(w, vp8_mv_ref_tree, p, vp8_mv_ref_encoding_array + (m - NEARESTMV)); } static void write_sub_mv_ref(vp8_writer *w, B_PREDICTION_MODE m, const vp8_prob *p) { assert(LEFT4X4 <= m && m <= NEW4X4); vp8_write_token(w, vp8_sub_mv_ref_tree, p, vp8_sub_mv_ref_encoding_array + (m - LEFT4X4)); } static void write_mv(vp8_writer *w, const MV *mv, const int_mv *ref, const MV_CONTEXT *mvc) { MV e; e.row = mv->row - ref->as_mv.row; e.col = mv->col - ref->as_mv.col; vp8_encode_motion_vector(w, &e, mvc); } static void write_mb_features(vp8_writer *w, const MB_MODE_INFO *mi, const MACROBLOCKD *x) { /* Encode the MB segment id. */ if (x->segmentation_enabled && x->update_mb_segmentation_map) { switch (mi->segment_id) { case 0: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); break; case 1: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[1]); break; case 2: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[2]); break; case 3: vp8_write(w, 1, x->mb_segment_tree_probs[0]); vp8_write(w, 1, x->mb_segment_tree_probs[2]); break; /* TRAP.. This should not happen */ default: vp8_write(w, 0, x->mb_segment_tree_probs[0]); vp8_write(w, 0, x->mb_segment_tree_probs[1]); break; } } } void vp8_convert_rfct_to_prob(VP8_COMP *const cpi) { const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; /* Calculate the probabilities used to code the ref frame based on usage */ if (!(cpi->prob_intra_coded = rf_intra * 255 / (rf_intra + rf_inter))) { cpi->prob_intra_coded = 1; } cpi->prob_last_coded = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128; if (!cpi->prob_last_coded) cpi->prob_last_coded = 1; cpi->prob_gf_coded = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128; if (!cpi->prob_gf_coded) cpi->prob_gf_coded = 1; } static void pack_inter_mode_mvs(VP8_COMP *const cpi) { VP8_COMMON *const pc = &cpi->common; vp8_writer *const w = cpi->bc; const MV_CONTEXT *mvc = pc->fc.mvc; MODE_INFO *m = pc->mi; const int mis = pc->mode_info_stride; int mb_row = -1; int prob_skip_false = 0; cpi->mb.partition_info = cpi->mb.pi; vp8_convert_rfct_to_prob(cpi); if (pc->mb_no_coeff_skip) { int total_mbs = pc->mb_rows * pc->mb_cols; prob_skip_false = (total_mbs - cpi->mb.skip_true_count) * 256 / total_mbs; if (prob_skip_false <= 1) prob_skip_false = 1; if (prob_skip_false > 255) prob_skip_false = 255; cpi->prob_skip_false = prob_skip_false; vp8_write_literal(w, prob_skip_false, 8); } vp8_write_literal(w, cpi->prob_intra_coded, 8); vp8_write_literal(w, cpi->prob_last_coded, 8); vp8_write_literal(w, cpi->prob_gf_coded, 8); update_mbintra_mode_probs(cpi); vp8_write_mvprobs(cpi); while (++mb_row < pc->mb_rows) { int mb_col = -1; while (++mb_col < pc->mb_cols) { const MB_MODE_INFO *const mi = &m->mbmi; const MV_REFERENCE_FRAME rf = mi->ref_frame; const MB_PREDICTION_MODE mode = mi->mode; MACROBLOCKD *xd = &cpi->mb.e_mbd; /* Distance of Mb to the various image edges. * These specified to 8th pel as they are always compared to MV * values that are in 1/8th pel units */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((pc->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((pc->mb_rows - 1 - mb_row) * 16) << 3; if (cpi->mb.e_mbd.update_mb_segmentation_map) { write_mb_features(w, mi, &cpi->mb.e_mbd); } if (pc->mb_no_coeff_skip) { vp8_encode_bool(w, m->mbmi.mb_skip_coeff, prob_skip_false); } if (rf == INTRA_FRAME) { vp8_write(w, 0, cpi->prob_intra_coded); write_ymode(w, mode, pc->fc.ymode_prob); if (mode == B_PRED) { int j = 0; do { write_bmode(w, m->bmi[j].as_mode, pc->fc.bmode_prob); } while (++j < 16); } write_uv_mode(w, mi->uv_mode, pc->fc.uv_mode_prob); } else { /* inter coded */ int_mv best_mv; vp8_prob mv_ref_p[VP8_MVREFS - 1]; vp8_write(w, 1, cpi->prob_intra_coded); if (rf == LAST_FRAME) vp8_write(w, 0, cpi->prob_last_coded); else { vp8_write(w, 1, cpi->prob_last_coded); vp8_write(w, (rf == GOLDEN_FRAME) ? 0 : 1, cpi->prob_gf_coded); } { int_mv n1, n2; int ct[4]; vp8_find_near_mvs(xd, m, &n1, &n2, &best_mv, ct, rf, cpi->common.ref_frame_sign_bias); vp8_clamp_mv2(&best_mv, xd); vp8_mv_ref_probs(mv_ref_p, ct); } write_mv_ref(w, mode, mv_ref_p); switch (mode) /* new, split require MVs */ { case NEWMV: write_mv(w, &mi->mv.as_mv, &best_mv, mvc); break; case SPLITMV: { int j = 0; #ifdef MODE_STATS ++count_mb_seg[mi->partitioning]; #endif write_split(w, mi->partitioning); do { B_PREDICTION_MODE blockmode; int_mv blockmv; const int *const L = vp8_mbsplits[mi->partitioning]; int k = -1; /* first block in subset j */ int mv_contz; int_mv leftmv, abovemv; blockmode = cpi->mb.partition_info->bmi[j].mode; blockmv = cpi->mb.partition_info->bmi[j].mv; while (j != L[++k]) { assert(k < 16); } leftmv.as_int = left_block_mv(m, k); abovemv.as_int = above_block_mv(m, k, mis); mv_contz = vp8_mv_cont(&leftmv, &abovemv); write_sub_mv_ref(w, blockmode, vp8_sub_mv_ref_prob2[mv_contz]); if (blockmode == NEW4X4) { write_mv(w, &blockmv.as_mv, &best_mv, (const MV_CONTEXT *)mvc); } } while (++j < cpi->mb.partition_info->count); break; } default: break; } } ++m; cpi->mb.partition_info++; } ++m; /* skip L prediction border */ cpi->mb.partition_info++; } } static void write_kfmodes(VP8_COMP *cpi) { vp8_writer *const bc = cpi->bc; const VP8_COMMON *const c = &cpi->common; /* const */ MODE_INFO *m = c->mi; int mb_row = -1; int prob_skip_false = 0; if (c->mb_no_coeff_skip) { int total_mbs = c->mb_rows * c->mb_cols; prob_skip_false = (total_mbs - cpi->mb.skip_true_count) * 256 / total_mbs; if (prob_skip_false <= 1) prob_skip_false = 1; if (prob_skip_false >= 255) prob_skip_false = 255; cpi->prob_skip_false = prob_skip_false; vp8_write_literal(bc, prob_skip_false, 8); } while (++mb_row < c->mb_rows) { int mb_col = -1; while (++mb_col < c->mb_cols) { const int ym = m->mbmi.mode; if (cpi->mb.e_mbd.update_mb_segmentation_map) { write_mb_features(bc, &m->mbmi, &cpi->mb.e_mbd); } if (c->mb_no_coeff_skip) { vp8_encode_bool(bc, m->mbmi.mb_skip_coeff, prob_skip_false); } kfwrite_ymode(bc, ym, vp8_kf_ymode_prob); if (ym == B_PRED) { const int mis = c->mode_info_stride; int i = 0; do { const B_PREDICTION_MODE A = above_block_mode(m, i, mis); const B_PREDICTION_MODE L = left_block_mode(m, i); const int bm = m->bmi[i].as_mode; write_bmode(bc, bm, vp8_kf_bmode_prob[A][L]); } while (++i < 16); } write_uv_mode(bc, (m++)->mbmi.uv_mode, vp8_kf_uv_mode_prob); } m++; /* skip L prediction border */ } } #if 0 /* This function is used for debugging probability trees. */ static void print_prob_tree(vp8_prob coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]) { /* print coef probability tree */ int i,j,k,l; FILE* f = fopen("enc_tree_probs.txt", "a"); fprintf(f, "{\n"); for (i = 0; i < BLOCK_TYPES; ++i) { fprintf(f, " {\n"); for (j = 0; j < COEF_BANDS; ++j) { fprintf(f, " {\n"); for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { fprintf(f, " {"); for (l = 0; l < ENTROPY_NODES; ++l) { fprintf(f, "%3u, ", (unsigned int)(coef_probs [i][j][k][l])); } fprintf(f, " }\n"); } fprintf(f, " }\n"); } fprintf(f, " }\n"); } fprintf(f, "}\n"); fclose(f); } #endif static void sum_probs_over_prev_coef_context( const unsigned int probs[PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], unsigned int *out) { int i, j; for (i = 0; i < MAX_ENTROPY_TOKENS; ++i) { for (j = 0; j < PREV_COEF_CONTEXTS; ++j) { const unsigned int tmp = out[i]; out[i] += probs[j][i]; /* check for wrap */ if (out[i] < tmp) out[i] = UINT_MAX; } } } static int prob_update_savings(const unsigned int *ct, const vp8_prob oldp, const vp8_prob newp, const vp8_prob upd) { const int old_b = vp8_cost_branch(ct, oldp); const int new_b = vp8_cost_branch(ct, newp); const int update_b = 8 + ((vp8_cost_one(upd) - vp8_cost_zero(upd)) >> 8); return old_b - new_b - update_b; } static int independent_coef_context_savings(VP8_COMP *cpi) { MACROBLOCK *const x = &cpi->mb; int savings = 0; int i = 0; do { int j = 0; do { int k = 0; unsigned int prev_coef_count_sum[MAX_ENTROPY_TOKENS] = { 0 }; int prev_coef_savings[MAX_ENTROPY_TOKENS] = { 0 }; const unsigned int(*probs)[MAX_ENTROPY_TOKENS]; /* Calculate new probabilities given the constraint that * they must be equal over the prev coef contexts */ probs = (const unsigned int(*)[MAX_ENTROPY_TOKENS])x->coef_counts[i][j]; /* Reset to default probabilities at key frames */ if (cpi->common.frame_type == KEY_FRAME) { probs = default_coef_counts[i][j]; } sum_probs_over_prev_coef_context(probs, prev_coef_count_sum); do { /* at every context */ /* calc probs and branch cts for this frame only */ int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs[i][j][k], cpi->frame_branch_ct[i][j][k], prev_coef_count_sum, 256, 1); do { const unsigned int *ct = cpi->frame_branch_ct[i][j][k][t]; const vp8_prob newp = cpi->frame_coef_probs[i][j][k][t]; const vp8_prob oldp = cpi->common.fc.coef_probs[i][j][k][t]; const vp8_prob upd = vp8_coef_update_probs[i][j][k][t]; const int s = prob_update_savings(ct, oldp, newp, upd); if (cpi->common.frame_type != KEY_FRAME || (cpi->common.frame_type == KEY_FRAME && newp != oldp)) { prev_coef_savings[t] += s; } } while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); k = 0; do { /* We only update probabilities if we can save bits, except * for key frames where we have to update all probabilities * to get the equal probabilities across the prev coef * contexts. */ if (prev_coef_savings[k] > 0 || cpi->common.frame_type == KEY_FRAME) { savings += prev_coef_savings[k]; } } while (++k < ENTROPY_NODES); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); return savings; } static int default_coef_context_savings(VP8_COMP *cpi) { MACROBLOCK *const x = &cpi->mb; int savings = 0; int i = 0; do { int j = 0; do { int k = 0; do { /* at every context */ /* calc probs and branch cts for this frame only */ int t = 0; /* token/prob index */ vp8_tree_probs_from_distribution( MAX_ENTROPY_TOKENS, vp8_coef_encodings, vp8_coef_tree, cpi->frame_coef_probs[i][j][k], cpi->frame_branch_ct[i][j][k], x->coef_counts[i][j][k], 256, 1); do { const unsigned int *ct = cpi->frame_branch_ct[i][j][k][t]; const vp8_prob newp = cpi->frame_coef_probs[i][j][k][t]; const vp8_prob oldp = cpi->common.fc.coef_probs[i][j][k][t]; const vp8_prob upd = vp8_coef_update_probs[i][j][k][t]; const int s = prob_update_savings(ct, oldp, newp, upd); if (s > 0) { savings += s; } } while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); return savings; } void vp8_calc_ref_frame_costs(int *ref_frame_cost, int prob_intra, int prob_last, int prob_garf) { assert(prob_intra >= 0); assert(prob_intra <= 255); assert(prob_last >= 0); assert(prob_last <= 255); assert(prob_garf >= 0); assert(prob_garf <= 255); ref_frame_cost[INTRA_FRAME] = vp8_cost_zero(prob_intra); ref_frame_cost[LAST_FRAME] = vp8_cost_one(prob_intra) + vp8_cost_zero(prob_last); ref_frame_cost[GOLDEN_FRAME] = vp8_cost_one(prob_intra) + vp8_cost_one(prob_last) + vp8_cost_zero(prob_garf); ref_frame_cost[ALTREF_FRAME] = vp8_cost_one(prob_intra) + vp8_cost_one(prob_last) + vp8_cost_one(prob_garf); } int vp8_estimate_entropy_savings(VP8_COMP *cpi) { int savings = 0; const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; int new_intra, new_last, new_garf, oldtotal, newtotal; int ref_frame_cost[MAX_REF_FRAMES]; vpx_clear_system_state(); if (cpi->common.frame_type != KEY_FRAME) { if (!(new_intra = rf_intra * 255 / (rf_intra + rf_inter))) new_intra = 1; new_last = rf_inter ? (rfct[LAST_FRAME] * 255) / rf_inter : 128; new_garf = (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) ? (rfct[GOLDEN_FRAME] * 255) / (rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]) : 128; vp8_calc_ref_frame_costs(ref_frame_cost, new_intra, new_last, new_garf); newtotal = rfct[INTRA_FRAME] * ref_frame_cost[INTRA_FRAME] + rfct[LAST_FRAME] * ref_frame_cost[LAST_FRAME] + rfct[GOLDEN_FRAME] * ref_frame_cost[GOLDEN_FRAME] + rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME]; /* old costs */ vp8_calc_ref_frame_costs(ref_frame_cost, cpi->prob_intra_coded, cpi->prob_last_coded, cpi->prob_gf_coded); oldtotal = rfct[INTRA_FRAME] * ref_frame_cost[INTRA_FRAME] + rfct[LAST_FRAME] * ref_frame_cost[LAST_FRAME] + rfct[GOLDEN_FRAME] * ref_frame_cost[GOLDEN_FRAME] + rfct[ALTREF_FRAME] * ref_frame_cost[ALTREF_FRAME]; savings += (oldtotal - newtotal) / 256; } if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { savings += independent_coef_context_savings(cpi); } else { savings += default_coef_context_savings(cpi); } return savings; } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING int vp8_update_coef_context(VP8_COMP *cpi) { int savings = 0; if (cpi->common.frame_type == KEY_FRAME) { /* Reset to default counts/probabilities at key frames */ vp8_copy(cpi->mb.coef_counts, default_coef_counts); } if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) savings += independent_coef_context_savings(cpi); else savings += default_coef_context_savings(cpi); return savings; } #endif void vp8_update_coef_probs(VP8_COMP *cpi) { int i = 0; #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_writer *const w = cpi->bc; #endif int savings = 0; vpx_clear_system_state(); do { int j = 0; do { int k = 0; int prev_coef_savings[ENTROPY_NODES] = { 0 }; if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { int t; /* token/prob index */ for (t = 0; t < ENTROPY_NODES; ++t) { const unsigned int *ct = cpi->frame_branch_ct[i][j][k][t]; const vp8_prob newp = cpi->frame_coef_probs[i][j][k][t]; const vp8_prob oldp = cpi->common.fc.coef_probs[i][j][k][t]; const vp8_prob upd = vp8_coef_update_probs[i][j][k][t]; prev_coef_savings[t] += prob_update_savings(ct, oldp, newp, upd); } } k = 0; } do { /* note: use result from vp8_estimate_entropy_savings, so no * need to call vp8_tree_probs_from_distribution here. */ /* at every context */ /* calc probs and branch cts for this frame only */ int t = 0; /* token/prob index */ do { const vp8_prob newp = cpi->frame_coef_probs[i][j][k][t]; vp8_prob *Pold = cpi->common.fc.coef_probs[i][j][k] + t; const vp8_prob upd = vp8_coef_update_probs[i][j][k][t]; int s = prev_coef_savings[t]; int u = 0; if (!(cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS)) { s = prob_update_savings(cpi->frame_branch_ct[i][j][k][t], *Pold, newp, upd); } if (s > 0) u = 1; /* Force updates on key frames if the new is different, * so that we can be sure we end up with equal probabilities * over the prev coef contexts. */ if ((cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) && cpi->common.frame_type == KEY_FRAME && newp != *Pold) { u = 1; } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING cpi->update_probs[i][j][k][t] = u; #else vp8_write(w, u, upd); #endif if (u) { /* send/use new probability */ *Pold = newp; #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_write_literal(w, newp, 8); #endif savings += s; } } while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING static void pack_coef_probs(VP8_COMP *cpi) { int i = 0; vp8_writer *const w = cpi->bc; do { int j = 0; do { int k = 0; do { int t = 0; /* token/prob index */ do { const vp8_prob newp = cpi->common.fc.coef_probs[i][j][k][t]; const vp8_prob upd = vp8_coef_update_probs[i][j][k][t]; const char u = cpi->update_probs[i][j][k][t]; vp8_write(w, u, upd); if (u) { /* send/use new probability */ vp8_write_literal(w, newp, 8); } } while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); } #endif #ifdef PACKET_TESTING FILE *vpxlogc = 0; #endif static void put_delta_q(vp8_writer *bc, int delta_q) { if (delta_q != 0) { vp8_write_bit(bc, 1); vp8_write_literal(bc, abs(delta_q), 4); if (delta_q < 0) vp8_write_bit(bc, 1); else vp8_write_bit(bc, 0); } else vp8_write_bit(bc, 0); } void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, size_t *size) { int i, j; VP8_HEADER oh; VP8_COMMON *const pc = &cpi->common; vp8_writer *const bc = cpi->bc; MACROBLOCKD *const xd = &cpi->mb.e_mbd; int extra_bytes_packed = 0; unsigned char *cx_data = dest; unsigned char *cx_data_end = dest_end; const int *mb_feature_data_bits; oh.show_frame = (int)pc->show_frame; oh.type = (int)pc->frame_type; oh.version = pc->version; oh.first_partition_length_in_bytes = 0; mb_feature_data_bits = vp8_mb_feature_data_bits; bc[0].error = &pc->error; validate_buffer(cx_data, 3, cx_data_end, &cpi->common.error); cx_data += 3; #if defined(SECTIONBITS_OUTPUT) Sectionbits[active_section = 1] += sizeof(VP8_HEADER) * 8 * 256; #endif /* every keyframe send startcode, width, height, scale factor, clamp * and color type */ if (oh.type == KEY_FRAME) { int v; validate_buffer(cx_data, 7, cx_data_end, &cpi->common.error); /* Start / synch code */ cx_data[0] = 0x9D; cx_data[1] = 0x01; cx_data[2] = 0x2a; /* Pack scale and frame size into 16 bits. Store it 8 bits at a time. * https://tools.ietf.org/html/rfc6386 * 9.1. Uncompressed Data Chunk * 16 bits : (2 bits Horizontal Scale << 14) | Width (14 bits) * 16 bits : (2 bits Vertical Scale << 14) | Height (14 bits) */ v = (pc->horiz_scale << 14) | pc->Width; cx_data[3] = v & 0xff; cx_data[4] = v >> 8; v = (pc->vert_scale << 14) | pc->Height; cx_data[5] = v & 0xff; cx_data[6] = v >> 8; extra_bytes_packed = 7; cx_data += extra_bytes_packed; vp8_start_encode(bc, cx_data, cx_data_end); /* signal clr type */ vp8_write_bit(bc, 0); vp8_write_bit(bc, pc->clamp_type); } else { vp8_start_encode(bc, cx_data, cx_data_end); } /* Signal whether or not Segmentation is enabled */ vp8_write_bit(bc, xd->segmentation_enabled); /* Indicate which features are enabled */ if (xd->segmentation_enabled) { /* Signal whether or not the segmentation map is being updated. */ vp8_write_bit(bc, xd->update_mb_segmentation_map); vp8_write_bit(bc, xd->update_mb_segmentation_data); if (xd->update_mb_segmentation_data) { signed char Data; vp8_write_bit(bc, xd->mb_segement_abs_delta); /* For each segmentation feature (Quant and loop filter level) */ for (i = 0; i < MB_LVL_MAX; ++i) { /* For each of the segments */ for (j = 0; j < MAX_MB_SEGMENTS; ++j) { Data = xd->segment_feature_data[i][j]; /* Frame level data */ if (Data) { vp8_write_bit(bc, 1); if (Data < 0) { Data = -Data; vp8_write_literal(bc, Data, mb_feature_data_bits[i]); vp8_write_bit(bc, 1); } else { vp8_write_literal(bc, Data, mb_feature_data_bits[i]); vp8_write_bit(bc, 0); } } else vp8_write_bit(bc, 0); } } } if (xd->update_mb_segmentation_map) { /* Write the probs used to decode the segment id for each mb */ for (i = 0; i < MB_FEATURE_TREE_PROBS; ++i) { int Data = xd->mb_segment_tree_probs[i]; if (Data != 255) { vp8_write_bit(bc, 1); vp8_write_literal(bc, Data, 8); } else vp8_write_bit(bc, 0); } } } vp8_write_bit(bc, pc->filter_type); vp8_write_literal(bc, pc->filter_level, 6); vp8_write_literal(bc, pc->sharpness_level, 3); /* Write out loop filter deltas applied at the MB level based on mode * or ref frame (if they are enabled). */ vp8_write_bit(bc, xd->mode_ref_lf_delta_enabled); if (xd->mode_ref_lf_delta_enabled) { /* Do the deltas need to be updated */ int send_update = xd->mode_ref_lf_delta_update || cpi->oxcf.error_resilient_mode; vp8_write_bit(bc, send_update); if (send_update) { int Data; /* Send update */ for (i = 0; i < MAX_REF_LF_DELTAS; ++i) { Data = xd->ref_lf_deltas[i]; /* Frame level data */ if (xd->ref_lf_deltas[i] != xd->last_ref_lf_deltas[i] || cpi->oxcf.error_resilient_mode) { xd->last_ref_lf_deltas[i] = xd->ref_lf_deltas[i]; vp8_write_bit(bc, 1); if (Data > 0) { vp8_write_literal(bc, (Data & 0x3F), 6); vp8_write_bit(bc, 0); /* sign */ } else { Data = -Data; vp8_write_literal(bc, (Data & 0x3F), 6); vp8_write_bit(bc, 1); /* sign */ } } else vp8_write_bit(bc, 0); } /* Send update */ for (i = 0; i < MAX_MODE_LF_DELTAS; ++i) { Data = xd->mode_lf_deltas[i]; if (xd->mode_lf_deltas[i] != xd->last_mode_lf_deltas[i] || cpi->oxcf.error_resilient_mode) { xd->last_mode_lf_deltas[i] = xd->mode_lf_deltas[i]; vp8_write_bit(bc, 1); if (Data > 0) { vp8_write_literal(bc, (Data & 0x3F), 6); vp8_write_bit(bc, 0); /* sign */ } else { Data = -Data; vp8_write_literal(bc, (Data & 0x3F), 6); vp8_write_bit(bc, 1); /* sign */ } } else vp8_write_bit(bc, 0); } } } /* signal here is multi token partition is enabled */ vp8_write_literal(bc, pc->multi_token_partition, 2); /* Frame Qbaseline quantizer index */ vp8_write_literal(bc, pc->base_qindex, 7); /* Transmit Dc, Second order and Uv quantizer delta information */ put_delta_q(bc, pc->y1dc_delta_q); put_delta_q(bc, pc->y2dc_delta_q); put_delta_q(bc, pc->y2ac_delta_q); put_delta_q(bc, pc->uvdc_delta_q); put_delta_q(bc, pc->uvac_delta_q); /* When there is a key frame all reference buffers are updated using * the new key frame */ if (pc->frame_type != KEY_FRAME) { /* Should the GF or ARF be updated using the transmitted frame * or buffer */ vp8_write_bit(bc, pc->refresh_golden_frame); vp8_write_bit(bc, pc->refresh_alt_ref_frame); /* If not being updated from current frame should either GF or ARF * be updated from another buffer */ if (!pc->refresh_golden_frame) vp8_write_literal(bc, pc->copy_buffer_to_gf, 2); if (!pc->refresh_alt_ref_frame) vp8_write_literal(bc, pc->copy_buffer_to_arf, 2); /* Indicate reference frame sign bias for Golden and ARF frames * (always 0 for last frame buffer) */ vp8_write_bit(bc, pc->ref_frame_sign_bias[GOLDEN_FRAME]); vp8_write_bit(bc, pc->ref_frame_sign_bias[ALTREF_FRAME]); } #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { if (pc->frame_type == KEY_FRAME) { pc->refresh_entropy_probs = 1; } else { pc->refresh_entropy_probs = 0; } } #endif vp8_write_bit(bc, pc->refresh_entropy_probs); if (pc->frame_type != KEY_FRAME) vp8_write_bit(bc, pc->refresh_last_frame); vpx_clear_system_state(); #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING pack_coef_probs(cpi); #else if (pc->refresh_entropy_probs == 0) { /* save a copy for later refresh */ memcpy(&cpi->common.lfc, &cpi->common.fc, sizeof(cpi->common.fc)); } vp8_update_coef_probs(cpi); #endif /* Write out the mb_no_coeff_skip flag */ vp8_write_bit(bc, pc->mb_no_coeff_skip); if (pc->frame_type == KEY_FRAME) { write_kfmodes(cpi); } else { pack_inter_mode_mvs(cpi); } vp8_stop_encode(bc); cx_data += bc->pos; oh.first_partition_length_in_bytes = cpi->bc->pos; /* update frame tag */ { /* Pack partition size, show frame, version and frame type into to 24 bits. * Store it 8 bits at a time. * https://tools.ietf.org/html/rfc6386 * 9.1. Uncompressed Data Chunk * The uncompressed data chunk comprises a common (for key frames and * interframes) 3-byte frame tag that contains four fields, as follows: * * 1. A 1-bit frame type (0 for key frames, 1 for interframes). * * 2. A 3-bit version number (0 - 3 are defined as four different * profiles with different decoding complexity; other values may be * defined for future variants of the VP8 data format). * * 3. A 1-bit show_frame flag (0 when current frame is not for display, * 1 when current frame is for display). * * 4. A 19-bit field containing the size of the first data partition in * bytes */ int v = (oh.first_partition_length_in_bytes << 5) | (oh.show_frame << 4) | (oh.version << 1) | oh.type; dest[0] = v & 0xff; dest[1] = (v >> 8) & 0xff; dest[2] = v >> 16; } *size = VP8_HEADER_SIZE + extra_bytes_packed + cpi->bc->pos; cpi->partition_sz[0] = (unsigned int)*size; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { const int num_part = (1 << pc->multi_token_partition); unsigned char *dp = cpi->partition_d[0] + cpi->partition_sz[0]; if (num_part > 1) { /* write token part sizes (all but last) if more than 1 */ validate_buffer(dp, 3 * (num_part - 1), cpi->partition_d_end[0], &pc->error); cpi->partition_sz[0] += 3 * (num_part - 1); for (i = 1; i < num_part; ++i) { write_partition_size(dp, cpi->partition_sz[i]); dp += 3; } } if (!cpi->output_partition) { /* concatenate partition buffers */ for (i = 0; i < num_part; ++i) { memmove(dp, cpi->partition_d[i + 1], cpi->partition_sz[i + 1]); cpi->partition_d[i + 1] = dp; dp += cpi->partition_sz[i + 1]; } } /* update total size */ *size = 0; for (i = 0; i < num_part + 1; ++i) { *size += cpi->partition_sz[i]; } } #else if (pc->multi_token_partition != ONE_PARTITION) { int num_part = 1 << pc->multi_token_partition; /* partition size table at the end of first partition */ cpi->partition_sz[0] += 3 * (num_part - 1); *size += 3 * (num_part - 1); validate_buffer(cx_data, 3 * (num_part - 1), cx_data_end, &pc->error); for (i = 1; i < num_part + 1; ++i) { cpi->bc[i].error = &pc->error; } pack_tokens_into_partitions(cpi, cx_data + 3 * (num_part - 1), cx_data_end, num_part); for (i = 1; i < num_part; ++i) { cpi->partition_sz[i] = cpi->bc[i].pos; write_partition_size(cx_data, cpi->partition_sz[i]); cx_data += 3; *size += cpi->partition_sz[i]; /* add to total */ } /* add last partition to total size */ cpi->partition_sz[i] = cpi->bc[i].pos; *size += cpi->partition_sz[i]; } else { bc[1].error = &pc->error; vp8_start_encode(&cpi->bc[1], cx_data, cx_data_end); #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { pack_mb_row_tokens(cpi, &cpi->bc[1]); } else { vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count); } #else vp8_pack_tokens(&cpi->bc[1], cpi->tok, cpi->tok_count); #endif // CONFIG_MULTITHREAD vp8_stop_encode(&cpi->bc[1]); *size += cpi->bc[1].pos; cpi->partition_sz[1] = cpi->bc[1].pos; } #endif } libvpx-1.8.2/vp8/encoder/bitstream.h000066400000000000000000000020261357355204000173310ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_BITSTREAM_H_ #define VPX_VP8_ENCODER_BITSTREAM_H_ #ifdef __cplusplus extern "C" { #endif #include "vp8/encoder/treewriter.h" #include "vp8/encoder/tokenize.h" void vp8_pack_tokens(vp8_writer *w, const TOKENEXTRA *p, int xcount); void vp8_convert_rfct_to_prob(struct VP8_COMP *const cpi); void vp8_calc_ref_frame_costs(int *ref_frame_cost, int prob_intra, int prob_last, int prob_garf); int vp8_estimate_entropy_savings(struct VP8_COMP *cpi); void vp8_update_coef_probs(struct VP8_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_BITSTREAM_H_ libvpx-1.8.2/vp8/encoder/block.h000066400000000000000000000102521357355204000164310ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_BLOCK_H_ #define VPX_VP8_ENCODER_BLOCK_H_ #include "vp8/common/onyx.h" #include "vp8/common/blockd.h" #include "vp8/common/entropymv.h" #include "vp8/common/entropy.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif #define MAX_MODES 20 #define MAX_ERROR_BINS 1024 /* motion search site */ typedef struct { MV mv; int offset; } search_site; typedef struct block { /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ short *src_diff; short *coeff; /* 16 Y blocks, 4 U blocks, 4 V blocks each with 16 entries */ short *quant; short *quant_fast; short *quant_shift; short *zbin; short *zrun_zbin_boost; short *round; /* Zbin Over Quant value */ short zbin_extra; unsigned char **base_src; int src; int src_stride; } BLOCK; typedef struct { int count; struct { B_PREDICTION_MODE mode; int_mv mv; } bmi[16]; } PARTITION_INFO; typedef struct macroblock { DECLARE_ALIGNED(16, short, src_diff[400]); /* 25 blocks Y,U,V,Y2 */ DECLARE_ALIGNED(16, short, coeff[400]); /* 25 blocks Y,U,V,Y2 */ DECLARE_ALIGNED(16, unsigned char, thismb[256]); unsigned char *thismb_ptr; /* 16 Y, 4 U, 4 V, 1 DC 2nd order block */ BLOCK block[25]; YV12_BUFFER_CONFIG src; MACROBLOCKD e_mbd; PARTITION_INFO *partition_info; /* work pointer */ PARTITION_INFO *pi; /* Corresponds to upper left visible macroblock */ PARTITION_INFO *pip; /* Base of allocated array */ int ref_frame_cost[MAX_REF_FRAMES]; search_site *ss; int ss_count; int searches_per_step; int errorperbit; int sadperbit16; int sadperbit4; int rddiv; int rdmult; unsigned int *mb_activity_ptr; int *mb_norm_activity_ptr; signed int act_zbin_adj; signed int last_act_zbin_adj; int *mvcost[2]; int *mvsadcost[2]; int (*mbmode_cost)[MB_MODE_COUNT]; int (*intra_uv_mode_cost)[MB_MODE_COUNT]; int (*bmode_costs)[10][10]; int *inter_bmode_costs; int (*token_costs)[COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS]; /* These define limits to motion vector components to prevent * them from extending outside the UMV borders. */ int mv_col_min; int mv_col_max; int mv_row_min; int mv_row_max; int skip; unsigned int encode_breakout; signed char *gf_active_ptr; unsigned char *active_ptr; MV_CONTEXT *mvc; int optimize; int q_index; int is_skin; int denoise_zeromv; #if CONFIG_TEMPORAL_DENOISING int increase_denoising; MB_PREDICTION_MODE best_sse_inter_mode; int_mv best_sse_mv; MV_REFERENCE_FRAME best_reference_frame; MV_REFERENCE_FRAME best_zeromv_reference_frame; unsigned char need_to_clamp_best_mvs; #endif int skip_true_count; unsigned int coef_counts[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; unsigned int MVcount[2][MVvals]; /* (row,col) MV cts this frame */ int ymode_count[VP8_YMODES]; /* intra MB type cts this frame */ int uv_mode_count[VP8_UV_MODES]; /* intra MB type cts this frame */ int64_t prediction_error; int64_t intra_error; int count_mb_ref_frame_usage[MAX_REF_FRAMES]; int rd_thresh_mult[MAX_MODES]; int rd_threshes[MAX_MODES]; unsigned int mbs_tested_so_far; unsigned int mode_test_hit_counts[MAX_MODES]; int zbin_mode_boost_enabled; int zbin_mode_boost; int last_zbin_mode_boost; int last_zbin_over_quant; int zbin_over_quant; int error_bins[MAX_ERROR_BINS]; void (*short_fdct4x4)(short *input, short *output, int pitch); void (*short_fdct8x4)(short *input, short *output, int pitch); void (*short_walsh4x4)(short *input, short *output, int pitch); void (*quantize_b)(BLOCK *b, BLOCKD *d); unsigned int mbs_zero_last_dot_suppress; int zero_last_dot_suppress; } MACROBLOCK; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_BLOCK_H_ libvpx-1.8.2/vp8/encoder/boolhuff.c000066400000000000000000000051051357355204000171370ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "boolhuff.h" #if defined(SECTIONBITS_OUTPUT) unsigned __int64 Sectionbits[500]; #endif const unsigned int vp8_prob_cost[256] = { 2047, 2047, 1791, 1641, 1535, 1452, 1385, 1328, 1279, 1235, 1196, 1161, 1129, 1099, 1072, 1046, 1023, 1000, 979, 959, 940, 922, 905, 889, 873, 858, 843, 829, 816, 803, 790, 778, 767, 755, 744, 733, 723, 713, 703, 693, 684, 675, 666, 657, 649, 641, 633, 625, 617, 609, 602, 594, 587, 580, 573, 567, 560, 553, 547, 541, 534, 528, 522, 516, 511, 505, 499, 494, 488, 483, 477, 472, 467, 462, 457, 452, 447, 442, 437, 433, 428, 424, 419, 415, 410, 406, 401, 397, 393, 389, 385, 381, 377, 373, 369, 365, 361, 357, 353, 349, 346, 342, 338, 335, 331, 328, 324, 321, 317, 314, 311, 307, 304, 301, 297, 294, 291, 288, 285, 281, 278, 275, 272, 269, 266, 263, 260, 257, 255, 252, 249, 246, 243, 240, 238, 235, 232, 229, 227, 224, 221, 219, 216, 214, 211, 208, 206, 203, 201, 198, 196, 194, 191, 189, 186, 184, 181, 179, 177, 174, 172, 170, 168, 165, 163, 161, 159, 156, 154, 152, 150, 148, 145, 143, 141, 139, 137, 135, 133, 131, 129, 127, 125, 123, 121, 119, 117, 115, 113, 111, 109, 107, 105, 103, 101, 99, 97, 95, 93, 92, 90, 88, 86, 84, 82, 81, 79, 77, 75, 73, 72, 70, 68, 66, 65, 63, 61, 60, 58, 56, 55, 53, 51, 50, 48, 46, 45, 43, 41, 40, 38, 37, 35, 33, 32, 30, 29, 27, 25, 24, 22, 21, 19, 18, 16, 15, 13, 12, 10, 9, 7, 6, 4, 3, 1, 1 }; void vp8_start_encode(BOOL_CODER *bc, unsigned char *source, unsigned char *source_end) { bc->lowvalue = 0; bc->range = 255; bc->count = -24; bc->buffer = source; bc->buffer_end = source_end; bc->pos = 0; } void vp8_stop_encode(BOOL_CODER *bc) { int i; for (i = 0; i < 32; ++i) vp8_encode_bool(bc, 0, 128); } void vp8_encode_value(BOOL_CODER *bc, int data, int bits) { int bit; for (bit = bits - 1; bit >= 0; bit--) { vp8_encode_bool(bc, (1 & (data >> bit)), 0x80); } } libvpx-1.8.2/vp8/encoder/boolhuff.h000066400000000000000000000054731357355204000171540ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /**************************************************************************** * * Module Title : boolhuff.h * * Description : Bool Coder header file. * ****************************************************************************/ #ifndef VPX_VP8_ENCODER_BOOLHUFF_H_ #define VPX_VP8_ENCODER_BOOLHUFF_H_ #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" #ifdef __cplusplus extern "C" { #endif typedef struct { unsigned int lowvalue; unsigned int range; int count; unsigned int pos; unsigned char *buffer; unsigned char *buffer_end; struct vpx_internal_error_info *error; } BOOL_CODER; void vp8_start_encode(BOOL_CODER *bc, unsigned char *source, unsigned char *source_end); void vp8_encode_value(BOOL_CODER *bc, int data, int bits); void vp8_stop_encode(BOOL_CODER *bc); extern const unsigned int vp8_prob_cost[256]; DECLARE_ALIGNED(16, extern const unsigned char, vp8_norm[256]); static int validate_buffer(const unsigned char *start, size_t len, const unsigned char *end, struct vpx_internal_error_info *error) { if (start + len > start && start + len < end) { return 1; } else { vpx_internal_error(error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt partition "); } return 0; } static void vp8_encode_bool(BOOL_CODER *bc, int bit, int probability) { unsigned int split; int count = bc->count; unsigned int range = bc->range; unsigned int lowvalue = bc->lowvalue; int shift; split = 1 + (((range - 1) * probability) >> 8); range = split; if (bit) { lowvalue += split; range = bc->range - split; } shift = vp8_norm[range]; range <<= shift; count += shift; if (count >= 0) { int offset = shift - count; if ((lowvalue << (offset - 1)) & 0x80000000) { int x = bc->pos - 1; while (x >= 0 && bc->buffer[x] == 0xff) { bc->buffer[x] = (unsigned char)0; x--; } bc->buffer[x] += 1; } validate_buffer(bc->buffer + bc->pos, 1, bc->buffer_end, bc->error); bc->buffer[bc->pos++] = (lowvalue >> (24 - offset) & 0xff); lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; count -= 8; } lowvalue <<= shift; bc->count = count; bc->lowvalue = lowvalue; bc->range = range; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_BOOLHUFF_H_ libvpx-1.8.2/vp8/encoder/copy_c.c000066400000000000000000000014421357355204000166070ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vpx/vpx_integer.h" /* Copy 2 macroblocks to a buffer */ void vp8_copy32xn_c(const unsigned char *src_ptr, int src_stride, unsigned char *dst_ptr, int dst_stride, int height) { int r; for (r = 0; r < height; ++r) { memcpy(dst_ptr, src_ptr, 32); src_ptr += src_stride; dst_ptr += dst_stride; } } libvpx-1.8.2/vp8/encoder/dct.c000066400000000000000000000045031357355204000161060ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" void vp8_short_fdct4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; short *ip = input; short *op = output; for (i = 0; i < 4; ++i) { a1 = ((ip[0] + ip[3]) * 8); b1 = ((ip[1] + ip[2]) * 8); c1 = ((ip[1] - ip[2]) * 8); d1 = ((ip[0] - ip[3]) * 8); op[0] = a1 + b1; op[2] = a1 - b1; op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12; op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12; ip += pitch / 2; op += 4; } ip = output; op = output; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[12]; b1 = ip[4] + ip[8]; c1 = ip[4] - ip[8]; d1 = ip[0] - ip[12]; op[0] = (a1 + b1 + 7) >> 4; op[8] = (a1 - b1 + 7) >> 4; op[4] = ((c1 * 2217 + d1 * 5352 + 12000) >> 16) + (d1 != 0); op[12] = (d1 * 2217 - c1 * 5352 + 51000) >> 16; ip++; op++; } } void vp8_short_fdct8x4_c(short *input, short *output, int pitch) { vp8_short_fdct4x4_c(input, output, pitch); vp8_short_fdct4x4_c(input + 4, output + 16, pitch); } void vp8_short_walsh4x4_c(short *input, short *output, int pitch) { int i; int a1, b1, c1, d1; int a2, b2, c2, d2; short *ip = input; short *op = output; for (i = 0; i < 4; ++i) { a1 = ((ip[0] + ip[2]) * 4); d1 = ((ip[1] + ip[3]) * 4); c1 = ((ip[1] - ip[3]) * 4); b1 = ((ip[0] - ip[2]) * 4); op[0] = a1 + d1 + (a1 != 0); op[1] = b1 + c1; op[2] = b1 - c1; op[3] = a1 - d1; ip += pitch / 2; op += 4; } ip = output; op = output; for (i = 0; i < 4; ++i) { a1 = ip[0] + ip[8]; d1 = ip[4] + ip[12]; c1 = ip[4] - ip[12]; b1 = ip[0] - ip[8]; a2 = a1 + d1; b2 = b1 + c1; c2 = b1 - c1; d2 = a1 - d1; a2 += a2 < 0; b2 += b2 < 0; c2 += c2 < 0; d2 += d2 < 0; op[0] = (a2 + 3) >> 3; op[4] = (b2 + 3) >> 3; op[8] = (c2 + 3) >> 3; op[12] = (d2 + 3) >> 3; ip++; op++; } } libvpx-1.8.2/vp8/encoder/dct_value_cost.h000066400000000000000000000625701357355204000203470ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_DCT_VALUE_COST_H_ #define VPX_VP8_ENCODER_DCT_VALUE_COST_H_ #ifdef __cplusplus extern "C" { #endif /* Generated file, included by tokenize.c */ /* Values generated by fill_value_tokens() */ static const short dct_value_cost[2048 * 2] = { 8285, 8277, 8267, 8259, 8253, 8245, 8226, 8218, 8212, 8204, 8194, 8186, 8180, 8172, 8150, 8142, 8136, 8128, 8118, 8110, 8104, 8096, 8077, 8069, 8063, 8055, 8045, 8037, 8031, 8023, 7997, 7989, 7983, 7975, 7965, 7957, 7951, 7943, 7924, 7916, 7910, 7902, 7892, 7884, 7878, 7870, 7848, 7840, 7834, 7826, 7816, 7808, 7802, 7794, 7775, 7767, 7761, 7753, 7743, 7735, 7729, 7721, 7923, 7915, 7909, 7901, 7891, 7883, 7877, 7869, 7850, 7842, 7836, 7828, 7818, 7810, 7804, 7796, 7774, 7766, 7760, 7752, 7742, 7734, 7728, 7720, 7701, 7693, 7687, 7679, 7669, 7661, 7655, 7647, 7621, 7613, 7607, 7599, 7589, 7581, 7575, 7567, 7548, 7540, 7534, 7526, 7516, 7508, 7502, 7494, 7472, 7464, 7458, 7450, 7440, 7432, 7426, 7418, 7399, 7391, 7385, 7377, 7367, 7359, 7353, 7345, 7479, 7471, 7465, 7457, 7447, 7439, 7433, 7425, 7406, 7398, 7392, 7384, 7374, 7366, 7360, 7352, 7330, 7322, 7316, 7308, 7298, 7290, 7284, 7276, 7257, 7249, 7243, 7235, 7225, 7217, 7211, 7203, 7177, 7169, 7163, 7155, 7145, 7137, 7131, 7123, 7104, 7096, 7090, 7082, 7072, 7064, 7058, 7050, 7028, 7020, 7014, 7006, 6996, 6988, 6982, 6974, 6955, 6947, 6941, 6933, 6923, 6915, 6909, 6901, 7632, 7624, 7618, 7610, 7600, 7592, 7586, 7578, 7559, 7551, 7545, 7537, 7527, 7519, 7513, 7505, 7483, 7475, 7469, 7461, 7451, 7443, 7437, 7429, 7410, 7402, 7396, 7388, 7378, 7370, 7364, 7356, 7330, 7322, 7316, 7308, 7298, 7290, 7284, 7276, 7257, 7249, 7243, 7235, 7225, 7217, 7211, 7203, 7181, 7173, 7167, 7159, 7149, 7141, 7135, 7127, 7108, 7100, 7094, 7086, 7076, 7068, 7062, 7054, 7188, 7180, 7174, 7166, 7156, 7148, 7142, 7134, 7115, 7107, 7101, 7093, 7083, 7075, 7069, 7061, 7039, 7031, 7025, 7017, 7007, 6999, 6993, 6985, 6966, 6958, 6952, 6944, 6934, 6926, 6920, 6912, 6886, 6878, 6872, 6864, 6854, 6846, 6840, 6832, 6813, 6805, 6799, 6791, 6781, 6773, 6767, 6759, 6737, 6729, 6723, 6715, 6705, 6697, 6691, 6683, 6664, 6656, 6650, 6642, 6632, 6624, 6618, 6610, 6812, 6804, 6798, 6790, 6780, 6772, 6766, 6758, 6739, 6731, 6725, 6717, 6707, 6699, 6693, 6685, 6663, 6655, 6649, 6641, 6631, 6623, 6617, 6609, 6590, 6582, 6576, 6568, 6558, 6550, 6544, 6536, 6510, 6502, 6496, 6488, 6478, 6470, 6464, 6456, 6437, 6429, 6423, 6415, 6405, 6397, 6391, 6383, 6361, 6353, 6347, 6339, 6329, 6321, 6315, 6307, 6288, 6280, 6274, 6266, 6256, 6248, 6242, 6234, 6368, 6360, 6354, 6346, 6336, 6328, 6322, 6314, 6295, 6287, 6281, 6273, 6263, 6255, 6249, 6241, 6219, 6211, 6205, 6197, 6187, 6179, 6173, 6165, 6146, 6138, 6132, 6124, 6114, 6106, 6100, 6092, 6066, 6058, 6052, 6044, 6034, 6026, 6020, 6012, 5993, 5985, 5979, 5971, 5961, 5953, 5947, 5939, 5917, 5909, 5903, 5895, 5885, 5877, 5871, 5863, 5844, 5836, 5830, 5822, 5812, 5804, 5798, 5790, 6697, 6689, 6683, 6675, 6665, 6657, 6651, 6643, 6624, 6616, 6610, 6602, 6592, 6584, 6578, 6570, 6548, 6540, 6534, 6526, 6516, 6508, 6502, 6494, 6475, 6467, 6461, 6453, 6443, 6435, 6429, 6421, 6395, 6387, 6381, 6373, 6363, 6355, 6349, 6341, 6322, 6314, 6308, 6300, 6290, 6282, 6276, 6268, 6246, 6238, 6232, 6224, 6214, 6206, 6200, 6192, 6173, 6165, 6159, 6151, 6141, 6133, 6127, 6119, 6253, 6245, 6239, 6231, 6221, 6213, 6207, 6199, 6180, 6172, 6166, 6158, 6148, 6140, 6134, 6126, 6104, 6096, 6090, 6082, 6072, 6064, 6058, 6050, 6031, 6023, 6017, 6009, 5999, 5991, 5985, 5977, 5951, 5943, 5937, 5929, 5919, 5911, 5905, 5897, 5878, 5870, 5864, 5856, 5846, 5838, 5832, 5824, 5802, 5794, 5788, 5780, 5770, 5762, 5756, 5748, 5729, 5721, 5715, 5707, 5697, 5689, 5683, 5675, 5877, 5869, 5863, 5855, 5845, 5837, 5831, 5823, 5804, 5796, 5790, 5782, 5772, 5764, 5758, 5750, 5728, 5720, 5714, 5706, 5696, 5688, 5682, 5674, 5655, 5647, 5641, 5633, 5623, 5615, 5609, 5601, 5575, 5567, 5561, 5553, 5543, 5535, 5529, 5521, 5502, 5494, 5488, 5480, 5470, 5462, 5456, 5448, 5426, 5418, 5412, 5404, 5394, 5386, 5380, 5372, 5353, 5345, 5339, 5331, 5321, 5313, 5307, 5299, 5433, 5425, 5419, 5411, 5401, 5393, 5387, 5379, 5360, 5352, 5346, 5338, 5328, 5320, 5314, 5306, 5284, 5276, 5270, 5262, 5252, 5244, 5238, 5230, 5211, 5203, 5197, 5189, 5179, 5171, 5165, 5157, 5131, 5123, 5117, 5109, 5099, 5091, 5085, 5077, 5058, 5050, 5044, 5036, 5026, 5018, 5012, 5004, 4982, 4974, 4968, 4960, 4950, 4942, 4936, 4928, 4909, 4901, 4895, 4887, 4877, 4869, 4863, 4855, 5586, 5578, 5572, 5564, 5554, 5546, 5540, 5532, 5513, 5505, 5499, 5491, 5481, 5473, 5467, 5459, 5437, 5429, 5423, 5415, 5405, 5397, 5391, 5383, 5364, 5356, 5350, 5342, 5332, 5324, 5318, 5310, 5284, 5276, 5270, 5262, 5252, 5244, 5238, 5230, 5211, 5203, 5197, 5189, 5179, 5171, 5165, 5157, 5135, 5127, 5121, 5113, 5103, 5095, 5089, 5081, 5062, 5054, 5048, 5040, 5030, 5022, 5016, 5008, 5142, 5134, 5128, 5120, 5110, 5102, 5096, 5088, 5069, 5061, 5055, 5047, 5037, 5029, 5023, 5015, 4993, 4985, 4979, 4971, 4961, 4953, 4947, 4939, 4920, 4912, 4906, 4898, 4888, 4880, 4874, 4866, 4840, 4832, 4826, 4818, 4808, 4800, 4794, 4786, 4767, 4759, 4753, 4745, 4735, 4727, 4721, 4713, 4691, 4683, 4677, 4669, 4659, 4651, 4645, 4637, 4618, 4610, 4604, 4596, 4586, 4578, 4572, 4564, 4766, 4758, 4752, 4744, 4734, 4726, 4720, 4712, 4693, 4685, 4679, 4671, 4661, 4653, 4647, 4639, 4617, 4609, 4603, 4595, 4585, 4577, 4571, 4563, 4544, 4536, 4530, 4522, 4512, 4504, 4498, 4490, 4464, 4456, 4450, 4442, 4432, 4424, 4418, 4410, 4391, 4383, 4377, 4369, 4359, 4351, 4345, 4337, 4315, 4307, 4301, 4293, 4283, 4275, 4269, 4261, 4242, 4234, 4228, 4220, 4210, 4202, 4196, 4188, 4322, 4314, 4308, 4300, 4290, 4282, 4276, 4268, 4249, 4241, 4235, 4227, 4217, 4209, 4203, 4195, 4173, 4165, 4159, 4151, 4141, 4133, 4127, 4119, 4100, 4092, 4086, 4078, 4068, 4060, 4054, 4046, 4020, 4012, 4006, 3998, 3988, 3980, 3974, 3966, 3947, 3939, 3933, 3925, 3915, 3907, 3901, 3893, 3871, 3863, 3857, 3849, 3839, 3831, 3825, 3817, 3798, 3790, 3784, 3776, 3766, 3758, 3752, 3744, 6697, 6689, 6683, 6675, 6665, 6657, 6651, 6643, 6624, 6616, 6610, 6602, 6592, 6584, 6578, 6570, 6548, 6540, 6534, 6526, 6516, 6508, 6502, 6494, 6475, 6467, 6461, 6453, 6443, 6435, 6429, 6421, 6395, 6387, 6381, 6373, 6363, 6355, 6349, 6341, 6322, 6314, 6308, 6300, 6290, 6282, 6276, 6268, 6246, 6238, 6232, 6224, 6214, 6206, 6200, 6192, 6173, 6165, 6159, 6151, 6141, 6133, 6127, 6119, 6253, 6245, 6239, 6231, 6221, 6213, 6207, 6199, 6180, 6172, 6166, 6158, 6148, 6140, 6134, 6126, 6104, 6096, 6090, 6082, 6072, 6064, 6058, 6050, 6031, 6023, 6017, 6009, 5999, 5991, 5985, 5977, 5951, 5943, 5937, 5929, 5919, 5911, 5905, 5897, 5878, 5870, 5864, 5856, 5846, 5838, 5832, 5824, 5802, 5794, 5788, 5780, 5770, 5762, 5756, 5748, 5729, 5721, 5715, 5707, 5697, 5689, 5683, 5675, 5877, 5869, 5863, 5855, 5845, 5837, 5831, 5823, 5804, 5796, 5790, 5782, 5772, 5764, 5758, 5750, 5728, 5720, 5714, 5706, 5696, 5688, 5682, 5674, 5655, 5647, 5641, 5633, 5623, 5615, 5609, 5601, 5575, 5567, 5561, 5553, 5543, 5535, 5529, 5521, 5502, 5494, 5488, 5480, 5470, 5462, 5456, 5448, 5426, 5418, 5412, 5404, 5394, 5386, 5380, 5372, 5353, 5345, 5339, 5331, 5321, 5313, 5307, 5299, 5433, 5425, 5419, 5411, 5401, 5393, 5387, 5379, 5360, 5352, 5346, 5338, 5328, 5320, 5314, 5306, 5284, 5276, 5270, 5262, 5252, 5244, 5238, 5230, 5211, 5203, 5197, 5189, 5179, 5171, 5165, 5157, 5131, 5123, 5117, 5109, 5099, 5091, 5085, 5077, 5058, 5050, 5044, 5036, 5026, 5018, 5012, 5004, 4982, 4974, 4968, 4960, 4950, 4942, 4936, 4928, 4909, 4901, 4895, 4887, 4877, 4869, 4863, 4855, 5586, 5578, 5572, 5564, 5554, 5546, 5540, 5532, 5513, 5505, 5499, 5491, 5481, 5473, 5467, 5459, 5437, 5429, 5423, 5415, 5405, 5397, 5391, 5383, 5364, 5356, 5350, 5342, 5332, 5324, 5318, 5310, 5284, 5276, 5270, 5262, 5252, 5244, 5238, 5230, 5211, 5203, 5197, 5189, 5179, 5171, 5165, 5157, 5135, 5127, 5121, 5113, 5103, 5095, 5089, 5081, 5062, 5054, 5048, 5040, 5030, 5022, 5016, 5008, 5142, 5134, 5128, 5120, 5110, 5102, 5096, 5088, 5069, 5061, 5055, 5047, 5037, 5029, 5023, 5015, 4993, 4985, 4979, 4971, 4961, 4953, 4947, 4939, 4920, 4912, 4906, 4898, 4888, 4880, 4874, 4866, 4840, 4832, 4826, 4818, 4808, 4800, 4794, 4786, 4767, 4759, 4753, 4745, 4735, 4727, 4721, 4713, 4691, 4683, 4677, 4669, 4659, 4651, 4645, 4637, 4618, 4610, 4604, 4596, 4586, 4578, 4572, 4564, 4766, 4758, 4752, 4744, 4734, 4726, 4720, 4712, 4693, 4685, 4679, 4671, 4661, 4653, 4647, 4639, 4617, 4609, 4603, 4595, 4585, 4577, 4571, 4563, 4544, 4536, 4530, 4522, 4512, 4504, 4498, 4490, 4464, 4456, 4450, 4442, 4432, 4424, 4418, 4410, 4391, 4383, 4377, 4369, 4359, 4351, 4345, 4337, 4315, 4307, 4301, 4293, 4283, 4275, 4269, 4261, 4242, 4234, 4228, 4220, 4210, 4202, 4196, 4188, 4322, 4314, 4308, 4300, 4290, 4282, 4276, 4268, 4249, 4241, 4235, 4227, 4217, 4209, 4203, 4195, 4173, 4165, 4159, 4151, 4141, 4133, 4127, 4119, 4100, 4092, 4086, 4078, 4068, 4060, 4054, 4046, 4020, 4012, 4006, 3998, 3988, 3980, 3974, 3966, 3947, 3939, 3933, 3925, 3915, 3907, 3901, 3893, 3871, 3863, 3857, 3849, 3839, 3831, 3825, 3817, 3798, 3790, 3784, 3776, 3766, 3758, 3752, 3744, 4651, 4643, 4637, 4629, 4619, 4611, 4605, 4597, 4578, 4570, 4564, 4556, 4546, 4538, 4532, 4524, 4502, 4494, 4488, 4480, 4470, 4462, 4456, 4448, 4429, 4421, 4415, 4407, 4397, 4389, 4383, 4375, 4349, 4341, 4335, 4327, 4317, 4309, 4303, 4295, 4276, 4268, 4262, 4254, 4244, 4236, 4230, 4222, 4200, 4192, 4186, 4178, 4168, 4160, 4154, 4146, 4127, 4119, 4113, 4105, 4095, 4087, 4081, 4073, 4207, 4199, 4193, 4185, 4175, 4167, 4161, 4153, 4134, 4126, 4120, 4112, 4102, 4094, 4088, 4080, 4058, 4050, 4044, 4036, 4026, 4018, 4012, 4004, 3985, 3977, 3971, 3963, 3953, 3945, 3939, 3931, 3905, 3897, 3891, 3883, 3873, 3865, 3859, 3851, 3832, 3824, 3818, 3810, 3800, 3792, 3786, 3778, 3756, 3748, 3742, 3734, 3724, 3716, 3710, 3702, 3683, 3675, 3669, 3661, 3651, 3643, 3637, 3629, 3831, 3823, 3817, 3809, 3799, 3791, 3785, 3777, 3758, 3750, 3744, 3736, 3726, 3718, 3712, 3704, 3682, 3674, 3668, 3660, 3650, 3642, 3636, 3628, 3609, 3601, 3595, 3587, 3577, 3569, 3563, 3555, 3529, 3521, 3515, 3507, 3497, 3489, 3483, 3475, 3456, 3448, 3442, 3434, 3424, 3416, 3410, 3402, 3380, 3372, 3366, 3358, 3348, 3340, 3334, 3326, 3307, 3299, 3293, 3285, 3275, 3267, 3261, 3253, 3387, 3379, 3373, 3365, 3355, 3347, 3341, 3333, 3314, 3306, 3300, 3292, 3282, 3274, 3268, 3260, 3238, 3230, 3224, 3216, 3206, 3198, 3192, 3184, 3165, 3157, 3151, 3143, 3133, 3125, 3119, 3111, 3085, 3077, 3071, 3063, 3053, 3045, 3039, 3031, 3012, 3004, 2998, 2990, 2980, 2972, 2966, 2958, 2936, 2928, 2922, 2914, 2904, 2896, 2890, 2882, 2863, 2855, 2849, 2841, 2831, 2823, 2817, 2809, 3540, 3532, 3526, 3518, 3508, 3500, 3494, 3486, 3467, 3459, 3453, 3445, 3435, 3427, 3421, 3413, 3391, 3383, 3377, 3369, 3359, 3351, 3345, 3337, 3318, 3310, 3304, 3296, 3286, 3278, 3272, 3264, 3238, 3230, 3224, 3216, 3206, 3198, 3192, 3184, 3165, 3157, 3151, 3143, 3133, 3125, 3119, 3111, 3089, 3081, 3075, 3067, 3057, 3049, 3043, 3035, 3016, 3008, 3002, 2994, 2984, 2976, 2970, 2962, 3096, 3088, 3082, 3074, 3064, 3056, 3050, 3042, 3023, 3015, 3009, 3001, 2991, 2983, 2977, 2969, 2947, 2939, 2933, 2925, 2915, 2907, 2901, 2893, 2874, 2866, 2860, 2852, 2842, 2834, 2828, 2820, 2794, 2786, 2780, 2772, 2762, 2754, 2748, 2740, 2721, 2713, 2707, 2699, 2689, 2681, 2675, 2667, 2645, 2637, 2631, 2623, 2613, 2605, 2599, 2591, 2572, 2564, 2558, 2550, 2540, 2532, 2526, 2518, 2720, 2712, 2706, 2698, 2688, 2680, 2674, 2666, 2647, 2639, 2633, 2625, 2615, 2607, 2601, 2593, 2571, 2563, 2557, 2549, 2539, 2531, 2525, 2517, 2498, 2490, 2484, 2476, 2466, 2458, 2452, 2444, 2418, 2410, 2404, 2396, 2386, 2378, 2372, 2364, 2345, 2337, 2331, 2323, 2313, 2305, 2299, 2291, 2269, 2261, 2255, 2247, 2237, 2229, 2223, 2215, 2196, 2188, 2182, 2174, 2164, 2156, 2150, 2142, 2276, 2268, 2262, 2254, 2244, 2236, 2230, 2222, 2203, 2195, 2189, 2181, 2171, 2163, 2157, 2149, 2127, 2119, 2113, 2105, 2095, 2087, 2081, 2073, 2054, 2046, 2040, 2032, 2022, 2014, 2008, 2000, 1974, 1966, 1960, 1952, 1942, 1934, 1928, 1920, 1901, 1893, 1887, 1879, 1869, 1861, 1855, 1847, 1825, 1817, 1811, 1803, 1793, 1785, 1779, 1771, 1752, 1744, 1738, 1730, 1720, 1712, 1706, 1698, 1897, 1883, 1860, 1846, 1819, 1805, 1782, 1768, 1723, 1709, 1686, 1672, 1645, 1631, 1608, 1594, 1574, 1560, 1537, 1523, 1496, 1482, 1459, 1445, 1400, 1386, 1363, 1349, 1322, 1308, 1285, 1271, 1608, 1565, 1535, 1492, 1446, 1403, 1373, 1330, 1312, 1269, 1239, 1196, 1150, 1107, 1077, 1034, 1291, 1218, 1171, 1098, 1015, 942, 895, 822, 953, 850, 729, 626, 618, 431, 257, 257, 257, 257, 0, 255, 255, 255, 255, 429, 616, 624, 727, 848, 951, 820, 893, 940, 1013, 1096, 1169, 1216, 1289, 1032, 1075, 1105, 1148, 1194, 1237, 1267, 1310, 1328, 1371, 1401, 1444, 1490, 1533, 1563, 1606, 1269, 1283, 1306, 1320, 1347, 1361, 1384, 1398, 1443, 1457, 1480, 1494, 1521, 1535, 1558, 1572, 1592, 1606, 1629, 1643, 1670, 1684, 1707, 1721, 1766, 1780, 1803, 1817, 1844, 1858, 1881, 1895, 1696, 1704, 1710, 1718, 1728, 1736, 1742, 1750, 1769, 1777, 1783, 1791, 1801, 1809, 1815, 1823, 1845, 1853, 1859, 1867, 1877, 1885, 1891, 1899, 1918, 1926, 1932, 1940, 1950, 1958, 1964, 1972, 1998, 2006, 2012, 2020, 2030, 2038, 2044, 2052, 2071, 2079, 2085, 2093, 2103, 2111, 2117, 2125, 2147, 2155, 2161, 2169, 2179, 2187, 2193, 2201, 2220, 2228, 2234, 2242, 2252, 2260, 2266, 2274, 2140, 2148, 2154, 2162, 2172, 2180, 2186, 2194, 2213, 2221, 2227, 2235, 2245, 2253, 2259, 2267, 2289, 2297, 2303, 2311, 2321, 2329, 2335, 2343, 2362, 2370, 2376, 2384, 2394, 2402, 2408, 2416, 2442, 2450, 2456, 2464, 2474, 2482, 2488, 2496, 2515, 2523, 2529, 2537, 2547, 2555, 2561, 2569, 2591, 2599, 2605, 2613, 2623, 2631, 2637, 2645, 2664, 2672, 2678, 2686, 2696, 2704, 2710, 2718, 2516, 2524, 2530, 2538, 2548, 2556, 2562, 2570, 2589, 2597, 2603, 2611, 2621, 2629, 2635, 2643, 2665, 2673, 2679, 2687, 2697, 2705, 2711, 2719, 2738, 2746, 2752, 2760, 2770, 2778, 2784, 2792, 2818, 2826, 2832, 2840, 2850, 2858, 2864, 2872, 2891, 2899, 2905, 2913, 2923, 2931, 2937, 2945, 2967, 2975, 2981, 2989, 2999, 3007, 3013, 3021, 3040, 3048, 3054, 3062, 3072, 3080, 3086, 3094, 2960, 2968, 2974, 2982, 2992, 3000, 3006, 3014, 3033, 3041, 3047, 3055, 3065, 3073, 3079, 3087, 3109, 3117, 3123, 3131, 3141, 3149, 3155, 3163, 3182, 3190, 3196, 3204, 3214, 3222, 3228, 3236, 3262, 3270, 3276, 3284, 3294, 3302, 3308, 3316, 3335, 3343, 3349, 3357, 3367, 3375, 3381, 3389, 3411, 3419, 3425, 3433, 3443, 3451, 3457, 3465, 3484, 3492, 3498, 3506, 3516, 3524, 3530, 3538, 2807, 2815, 2821, 2829, 2839, 2847, 2853, 2861, 2880, 2888, 2894, 2902, 2912, 2920, 2926, 2934, 2956, 2964, 2970, 2978, 2988, 2996, 3002, 3010, 3029, 3037, 3043, 3051, 3061, 3069, 3075, 3083, 3109, 3117, 3123, 3131, 3141, 3149, 3155, 3163, 3182, 3190, 3196, 3204, 3214, 3222, 3228, 3236, 3258, 3266, 3272, 3280, 3290, 3298, 3304, 3312, 3331, 3339, 3345, 3353, 3363, 3371, 3377, 3385, 3251, 3259, 3265, 3273, 3283, 3291, 3297, 3305, 3324, 3332, 3338, 3346, 3356, 3364, 3370, 3378, 3400, 3408, 3414, 3422, 3432, 3440, 3446, 3454, 3473, 3481, 3487, 3495, 3505, 3513, 3519, 3527, 3553, 3561, 3567, 3575, 3585, 3593, 3599, 3607, 3626, 3634, 3640, 3648, 3658, 3666, 3672, 3680, 3702, 3710, 3716, 3724, 3734, 3742, 3748, 3756, 3775, 3783, 3789, 3797, 3807, 3815, 3821, 3829, 3627, 3635, 3641, 3649, 3659, 3667, 3673, 3681, 3700, 3708, 3714, 3722, 3732, 3740, 3746, 3754, 3776, 3784, 3790, 3798, 3808, 3816, 3822, 3830, 3849, 3857, 3863, 3871, 3881, 3889, 3895, 3903, 3929, 3937, 3943, 3951, 3961, 3969, 3975, 3983, 4002, 4010, 4016, 4024, 4034, 4042, 4048, 4056, 4078, 4086, 4092, 4100, 4110, 4118, 4124, 4132, 4151, 4159, 4165, 4173, 4183, 4191, 4197, 4205, 4071, 4079, 4085, 4093, 4103, 4111, 4117, 4125, 4144, 4152, 4158, 4166, 4176, 4184, 4190, 4198, 4220, 4228, 4234, 4242, 4252, 4260, 4266, 4274, 4293, 4301, 4307, 4315, 4325, 4333, 4339, 4347, 4373, 4381, 4387, 4395, 4405, 4413, 4419, 4427, 4446, 4454, 4460, 4468, 4478, 4486, 4492, 4500, 4522, 4530, 4536, 4544, 4554, 4562, 4568, 4576, 4595, 4603, 4609, 4617, 4627, 4635, 4641, 4649, 3742, 3750, 3756, 3764, 3774, 3782, 3788, 3796, 3815, 3823, 3829, 3837, 3847, 3855, 3861, 3869, 3891, 3899, 3905, 3913, 3923, 3931, 3937, 3945, 3964, 3972, 3978, 3986, 3996, 4004, 4010, 4018, 4044, 4052, 4058, 4066, 4076, 4084, 4090, 4098, 4117, 4125, 4131, 4139, 4149, 4157, 4163, 4171, 4193, 4201, 4207, 4215, 4225, 4233, 4239, 4247, 4266, 4274, 4280, 4288, 4298, 4306, 4312, 4320, 4186, 4194, 4200, 4208, 4218, 4226, 4232, 4240, 4259, 4267, 4273, 4281, 4291, 4299, 4305, 4313, 4335, 4343, 4349, 4357, 4367, 4375, 4381, 4389, 4408, 4416, 4422, 4430, 4440, 4448, 4454, 4462, 4488, 4496, 4502, 4510, 4520, 4528, 4534, 4542, 4561, 4569, 4575, 4583, 4593, 4601, 4607, 4615, 4637, 4645, 4651, 4659, 4669, 4677, 4683, 4691, 4710, 4718, 4724, 4732, 4742, 4750, 4756, 4764, 4562, 4570, 4576, 4584, 4594, 4602, 4608, 4616, 4635, 4643, 4649, 4657, 4667, 4675, 4681, 4689, 4711, 4719, 4725, 4733, 4743, 4751, 4757, 4765, 4784, 4792, 4798, 4806, 4816, 4824, 4830, 4838, 4864, 4872, 4878, 4886, 4896, 4904, 4910, 4918, 4937, 4945, 4951, 4959, 4969, 4977, 4983, 4991, 5013, 5021, 5027, 5035, 5045, 5053, 5059, 5067, 5086, 5094, 5100, 5108, 5118, 5126, 5132, 5140, 5006, 5014, 5020, 5028, 5038, 5046, 5052, 5060, 5079, 5087, 5093, 5101, 5111, 5119, 5125, 5133, 5155, 5163, 5169, 5177, 5187, 5195, 5201, 5209, 5228, 5236, 5242, 5250, 5260, 5268, 5274, 5282, 5308, 5316, 5322, 5330, 5340, 5348, 5354, 5362, 5381, 5389, 5395, 5403, 5413, 5421, 5427, 5435, 5457, 5465, 5471, 5479, 5489, 5497, 5503, 5511, 5530, 5538, 5544, 5552, 5562, 5570, 5576, 5584, 4853, 4861, 4867, 4875, 4885, 4893, 4899, 4907, 4926, 4934, 4940, 4948, 4958, 4966, 4972, 4980, 5002, 5010, 5016, 5024, 5034, 5042, 5048, 5056, 5075, 5083, 5089, 5097, 5107, 5115, 5121, 5129, 5155, 5163, 5169, 5177, 5187, 5195, 5201, 5209, 5228, 5236, 5242, 5250, 5260, 5268, 5274, 5282, 5304, 5312, 5318, 5326, 5336, 5344, 5350, 5358, 5377, 5385, 5391, 5399, 5409, 5417, 5423, 5431, 5297, 5305, 5311, 5319, 5329, 5337, 5343, 5351, 5370, 5378, 5384, 5392, 5402, 5410, 5416, 5424, 5446, 5454, 5460, 5468, 5478, 5486, 5492, 5500, 5519, 5527, 5533, 5541, 5551, 5559, 5565, 5573, 5599, 5607, 5613, 5621, 5631, 5639, 5645, 5653, 5672, 5680, 5686, 5694, 5704, 5712, 5718, 5726, 5748, 5756, 5762, 5770, 5780, 5788, 5794, 5802, 5821, 5829, 5835, 5843, 5853, 5861, 5867, 5875, 5673, 5681, 5687, 5695, 5705, 5713, 5719, 5727, 5746, 5754, 5760, 5768, 5778, 5786, 5792, 5800, 5822, 5830, 5836, 5844, 5854, 5862, 5868, 5876, 5895, 5903, 5909, 5917, 5927, 5935, 5941, 5949, 5975, 5983, 5989, 5997, 6007, 6015, 6021, 6029, 6048, 6056, 6062, 6070, 6080, 6088, 6094, 6102, 6124, 6132, 6138, 6146, 6156, 6164, 6170, 6178, 6197, 6205, 6211, 6219, 6229, 6237, 6243, 6251, 6117, 6125, 6131, 6139, 6149, 6157, 6163, 6171, 6190, 6198, 6204, 6212, 6222, 6230, 6236, 6244, 6266, 6274, 6280, 6288, 6298, 6306, 6312, 6320, 6339, 6347, 6353, 6361, 6371, 6379, 6385, 6393, 6419, 6427, 6433, 6441, 6451, 6459, 6465, 6473, 6492, 6500, 6506, 6514, 6524, 6532, 6538, 6546, 6568, 6576, 6582, 6590, 6600, 6608, 6614, 6622, 6641, 6649, 6655, 6663, 6673, 6681, 6687, 6695, 3742, 3750, 3756, 3764, 3774, 3782, 3788, 3796, 3815, 3823, 3829, 3837, 3847, 3855, 3861, 3869, 3891, 3899, 3905, 3913, 3923, 3931, 3937, 3945, 3964, 3972, 3978, 3986, 3996, 4004, 4010, 4018, 4044, 4052, 4058, 4066, 4076, 4084, 4090, 4098, 4117, 4125, 4131, 4139, 4149, 4157, 4163, 4171, 4193, 4201, 4207, 4215, 4225, 4233, 4239, 4247, 4266, 4274, 4280, 4288, 4298, 4306, 4312, 4320, 4186, 4194, 4200, 4208, 4218, 4226, 4232, 4240, 4259, 4267, 4273, 4281, 4291, 4299, 4305, 4313, 4335, 4343, 4349, 4357, 4367, 4375, 4381, 4389, 4408, 4416, 4422, 4430, 4440, 4448, 4454, 4462, 4488, 4496, 4502, 4510, 4520, 4528, 4534, 4542, 4561, 4569, 4575, 4583, 4593, 4601, 4607, 4615, 4637, 4645, 4651, 4659, 4669, 4677, 4683, 4691, 4710, 4718, 4724, 4732, 4742, 4750, 4756, 4764, 4562, 4570, 4576, 4584, 4594, 4602, 4608, 4616, 4635, 4643, 4649, 4657, 4667, 4675, 4681, 4689, 4711, 4719, 4725, 4733, 4743, 4751, 4757, 4765, 4784, 4792, 4798, 4806, 4816, 4824, 4830, 4838, 4864, 4872, 4878, 4886, 4896, 4904, 4910, 4918, 4937, 4945, 4951, 4959, 4969, 4977, 4983, 4991, 5013, 5021, 5027, 5035, 5045, 5053, 5059, 5067, 5086, 5094, 5100, 5108, 5118, 5126, 5132, 5140, 5006, 5014, 5020, 5028, 5038, 5046, 5052, 5060, 5079, 5087, 5093, 5101, 5111, 5119, 5125, 5133, 5155, 5163, 5169, 5177, 5187, 5195, 5201, 5209, 5228, 5236, 5242, 5250, 5260, 5268, 5274, 5282, 5308, 5316, 5322, 5330, 5340, 5348, 5354, 5362, 5381, 5389, 5395, 5403, 5413, 5421, 5427, 5435, 5457, 5465, 5471, 5479, 5489, 5497, 5503, 5511, 5530, 5538, 5544, 5552, 5562, 5570, 5576, 5584, 4853, 4861, 4867, 4875, 4885, 4893, 4899, 4907, 4926, 4934, 4940, 4948, 4958, 4966, 4972, 4980, 5002, 5010, 5016, 5024, 5034, 5042, 5048, 5056, 5075, 5083, 5089, 5097, 5107, 5115, 5121, 5129, 5155, 5163, 5169, 5177, 5187, 5195, 5201, 5209, 5228, 5236, 5242, 5250, 5260, 5268, 5274, 5282, 5304, 5312, 5318, 5326, 5336, 5344, 5350, 5358, 5377, 5385, 5391, 5399, 5409, 5417, 5423, 5431, 5297, 5305, 5311, 5319, 5329, 5337, 5343, 5351, 5370, 5378, 5384, 5392, 5402, 5410, 5416, 5424, 5446, 5454, 5460, 5468, 5478, 5486, 5492, 5500, 5519, 5527, 5533, 5541, 5551, 5559, 5565, 5573, 5599, 5607, 5613, 5621, 5631, 5639, 5645, 5653, 5672, 5680, 5686, 5694, 5704, 5712, 5718, 5726, 5748, 5756, 5762, 5770, 5780, 5788, 5794, 5802, 5821, 5829, 5835, 5843, 5853, 5861, 5867, 5875, 5673, 5681, 5687, 5695, 5705, 5713, 5719, 5727, 5746, 5754, 5760, 5768, 5778, 5786, 5792, 5800, 5822, 5830, 5836, 5844, 5854, 5862, 5868, 5876, 5895, 5903, 5909, 5917, 5927, 5935, 5941, 5949, 5975, 5983, 5989, 5997, 6007, 6015, 6021, 6029, 6048, 6056, 6062, 6070, 6080, 6088, 6094, 6102, 6124, 6132, 6138, 6146, 6156, 6164, 6170, 6178, 6197, 6205, 6211, 6219, 6229, 6237, 6243, 6251, 6117, 6125, 6131, 6139, 6149, 6157, 6163, 6171, 6190, 6198, 6204, 6212, 6222, 6230, 6236, 6244, 6266, 6274, 6280, 6288, 6298, 6306, 6312, 6320, 6339, 6347, 6353, 6361, 6371, 6379, 6385, 6393, 6419, 6427, 6433, 6441, 6451, 6459, 6465, 6473, 6492, 6500, 6506, 6514, 6524, 6532, 6538, 6546, 6568, 6576, 6582, 6590, 6600, 6608, 6614, 6622, 6641, 6649, 6655, 6663, 6673, 6681, 6687, 6695, 5788, 5796, 5802, 5810, 5820, 5828, 5834, 5842, 5861, 5869, 5875, 5883, 5893, 5901, 5907, 5915, 5937, 5945, 5951, 5959, 5969, 5977, 5983, 5991, 6010, 6018, 6024, 6032, 6042, 6050, 6056, 6064, 6090, 6098, 6104, 6112, 6122, 6130, 6136, 6144, 6163, 6171, 6177, 6185, 6195, 6203, 6209, 6217, 6239, 6247, 6253, 6261, 6271, 6279, 6285, 6293, 6312, 6320, 6326, 6334, 6344, 6352, 6358, 6366, 6232, 6240, 6246, 6254, 6264, 6272, 6278, 6286, 6305, 6313, 6319, 6327, 6337, 6345, 6351, 6359, 6381, 6389, 6395, 6403, 6413, 6421, 6427, 6435, 6454, 6462, 6468, 6476, 6486, 6494, 6500, 6508, 6534, 6542, 6548, 6556, 6566, 6574, 6580, 6588, 6607, 6615, 6621, 6629, 6639, 6647, 6653, 6661, 6683, 6691, 6697, 6705, 6715, 6723, 6729, 6737, 6756, 6764, 6770, 6778, 6788, 6796, 6802, 6810, 6608, 6616, 6622, 6630, 6640, 6648, 6654, 6662, 6681, 6689, 6695, 6703, 6713, 6721, 6727, 6735, 6757, 6765, 6771, 6779, 6789, 6797, 6803, 6811, 6830, 6838, 6844, 6852, 6862, 6870, 6876, 6884, 6910, 6918, 6924, 6932, 6942, 6950, 6956, 6964, 6983, 6991, 6997, 7005, 7015, 7023, 7029, 7037, 7059, 7067, 7073, 7081, 7091, 7099, 7105, 7113, 7132, 7140, 7146, 7154, 7164, 7172, 7178, 7186, 7052, 7060, 7066, 7074, 7084, 7092, 7098, 7106, 7125, 7133, 7139, 7147, 7157, 7165, 7171, 7179, 7201, 7209, 7215, 7223, 7233, 7241, 7247, 7255, 7274, 7282, 7288, 7296, 7306, 7314, 7320, 7328, 7354, 7362, 7368, 7376, 7386, 7394, 7400, 7408, 7427, 7435, 7441, 7449, 7459, 7467, 7473, 7481, 7503, 7511, 7517, 7525, 7535, 7543, 7549, 7557, 7576, 7584, 7590, 7598, 7608, 7616, 7622, 7630, 6899, 6907, 6913, 6921, 6931, 6939, 6945, 6953, 6972, 6980, 6986, 6994, 7004, 7012, 7018, 7026, 7048, 7056, 7062, 7070, 7080, 7088, 7094, 7102, 7121, 7129, 7135, 7143, 7153, 7161, 7167, 7175, 7201, 7209, 7215, 7223, 7233, 7241, 7247, 7255, 7274, 7282, 7288, 7296, 7306, 7314, 7320, 7328, 7350, 7358, 7364, 7372, 7382, 7390, 7396, 7404, 7423, 7431, 7437, 7445, 7455, 7463, 7469, 7477, 7343, 7351, 7357, 7365, 7375, 7383, 7389, 7397, 7416, 7424, 7430, 7438, 7448, 7456, 7462, 7470, 7492, 7500, 7506, 7514, 7524, 7532, 7538, 7546, 7565, 7573, 7579, 7587, 7597, 7605, 7611, 7619, 7645, 7653, 7659, 7667, 7677, 7685, 7691, 7699, 7718, 7726, 7732, 7740, 7750, 7758, 7764, 7772, 7794, 7802, 7808, 7816, 7826, 7834, 7840, 7848, 7867, 7875, 7881, 7889, 7899, 7907, 7913, 7921, 7719, 7727, 7733, 7741, 7751, 7759, 7765, 7773, 7792, 7800, 7806, 7814, 7824, 7832, 7838, 7846, 7868, 7876, 7882, 7890, 7900, 7908, 7914, 7922, 7941, 7949, 7955, 7963, 7973, 7981, 7987, 7995, 8021, 8029, 8035, 8043, 8053, 8061, 8067, 8075, 8094, 8102, 8108, 8116, 8126, 8134, 8140, 8148, 8170, 8178, 8184, 8192, 8202, 8210, 8216, 8224, 8243, 8251, 8257, 8265, 8275 }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_DCT_VALUE_COST_H_ libvpx-1.8.2/vp8/encoder/dct_value_tokens.h000066400000000000000000001640761357355204000207060ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ #define VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ #ifdef __cplusplus extern "C" { #endif /* Generated file, included by tokenize.c */ /* Values generated by fill_value_tokens() */ static const TOKENVALUE dct_value_tokens[2048 * 2] = { { 10, 3963 }, { 10, 3961 }, { 10, 3959 }, { 10, 3957 }, { 10, 3955 }, { 10, 3953 }, { 10, 3951 }, { 10, 3949 }, { 10, 3947 }, { 10, 3945 }, { 10, 3943 }, { 10, 3941 }, { 10, 3939 }, { 10, 3937 }, { 10, 3935 }, { 10, 3933 }, { 10, 3931 }, { 10, 3929 }, { 10, 3927 }, { 10, 3925 }, { 10, 3923 }, { 10, 3921 }, { 10, 3919 }, { 10, 3917 }, { 10, 3915 }, { 10, 3913 }, { 10, 3911 }, { 10, 3909 }, { 10, 3907 }, { 10, 3905 }, { 10, 3903 }, { 10, 3901 }, { 10, 3899 }, { 10, 3897 }, { 10, 3895 }, { 10, 3893 }, { 10, 3891 }, { 10, 3889 }, { 10, 3887 }, { 10, 3885 }, { 10, 3883 }, { 10, 3881 }, { 10, 3879 }, { 10, 3877 }, { 10, 3875 }, { 10, 3873 }, { 10, 3871 }, { 10, 3869 }, { 10, 3867 }, { 10, 3865 }, { 10, 3863 }, { 10, 3861 }, { 10, 3859 }, { 10, 3857 }, { 10, 3855 }, { 10, 3853 }, { 10, 3851 }, { 10, 3849 }, { 10, 3847 }, { 10, 3845 }, { 10, 3843 }, { 10, 3841 }, { 10, 3839 }, { 10, 3837 }, { 10, 3835 }, { 10, 3833 }, { 10, 3831 }, { 10, 3829 }, { 10, 3827 }, { 10, 3825 }, { 10, 3823 }, { 10, 3821 }, { 10, 3819 }, { 10, 3817 }, { 10, 3815 }, { 10, 3813 }, { 10, 3811 }, { 10, 3809 }, { 10, 3807 }, { 10, 3805 }, { 10, 3803 }, { 10, 3801 }, { 10, 3799 }, { 10, 3797 }, { 10, 3795 }, { 10, 3793 }, { 10, 3791 }, { 10, 3789 }, { 10, 3787 }, { 10, 3785 }, { 10, 3783 }, { 10, 3781 }, { 10, 3779 }, { 10, 3777 }, { 10, 3775 }, { 10, 3773 }, { 10, 3771 }, { 10, 3769 }, { 10, 3767 }, { 10, 3765 }, { 10, 3763 }, { 10, 3761 }, { 10, 3759 }, { 10, 3757 }, { 10, 3755 }, { 10, 3753 }, { 10, 3751 }, { 10, 3749 }, { 10, 3747 }, { 10, 3745 }, { 10, 3743 }, { 10, 3741 }, { 10, 3739 }, { 10, 3737 }, { 10, 3735 }, { 10, 3733 }, { 10, 3731 }, { 10, 3729 }, { 10, 3727 }, { 10, 3725 }, { 10, 3723 }, { 10, 3721 }, { 10, 3719 }, { 10, 3717 }, { 10, 3715 }, { 10, 3713 }, { 10, 3711 }, { 10, 3709 }, { 10, 3707 }, { 10, 3705 }, { 10, 3703 }, { 10, 3701 }, { 10, 3699 }, { 10, 3697 }, { 10, 3695 }, { 10, 3693 }, { 10, 3691 }, { 10, 3689 }, { 10, 3687 }, { 10, 3685 }, { 10, 3683 }, { 10, 3681 }, { 10, 3679 }, { 10, 3677 }, { 10, 3675 }, { 10, 3673 }, { 10, 3671 }, { 10, 3669 }, { 10, 3667 }, { 10, 3665 }, { 10, 3663 }, { 10, 3661 }, { 10, 3659 }, { 10, 3657 }, { 10, 3655 }, { 10, 3653 }, { 10, 3651 }, { 10, 3649 }, { 10, 3647 }, { 10, 3645 }, { 10, 3643 }, { 10, 3641 }, { 10, 3639 }, { 10, 3637 }, { 10, 3635 }, { 10, 3633 }, { 10, 3631 }, { 10, 3629 }, { 10, 3627 }, { 10, 3625 }, { 10, 3623 }, { 10, 3621 }, { 10, 3619 }, { 10, 3617 }, { 10, 3615 }, { 10, 3613 }, { 10, 3611 }, { 10, 3609 }, { 10, 3607 }, { 10, 3605 }, { 10, 3603 }, { 10, 3601 }, { 10, 3599 }, { 10, 3597 }, { 10, 3595 }, { 10, 3593 }, { 10, 3591 }, { 10, 3589 }, { 10, 3587 }, { 10, 3585 }, { 10, 3583 }, { 10, 3581 }, { 10, 3579 }, { 10, 3577 }, { 10, 3575 }, { 10, 3573 }, { 10, 3571 }, { 10, 3569 }, { 10, 3567 }, { 10, 3565 }, { 10, 3563 }, { 10, 3561 }, { 10, 3559 }, { 10, 3557 }, { 10, 3555 }, { 10, 3553 }, { 10, 3551 }, { 10, 3549 }, { 10, 3547 }, { 10, 3545 }, { 10, 3543 }, { 10, 3541 }, { 10, 3539 }, { 10, 3537 }, { 10, 3535 }, { 10, 3533 }, { 10, 3531 }, { 10, 3529 }, { 10, 3527 }, { 10, 3525 }, { 10, 3523 }, { 10, 3521 }, { 10, 3519 }, { 10, 3517 }, { 10, 3515 }, { 10, 3513 }, { 10, 3511 }, { 10, 3509 }, { 10, 3507 }, { 10, 3505 }, { 10, 3503 }, { 10, 3501 }, { 10, 3499 }, { 10, 3497 }, { 10, 3495 }, { 10, 3493 }, { 10, 3491 }, { 10, 3489 }, { 10, 3487 }, { 10, 3485 }, { 10, 3483 }, { 10, 3481 }, { 10, 3479 }, { 10, 3477 }, { 10, 3475 }, { 10, 3473 }, { 10, 3471 }, { 10, 3469 }, { 10, 3467 }, { 10, 3465 }, { 10, 3463 }, { 10, 3461 }, { 10, 3459 }, { 10, 3457 }, { 10, 3455 }, { 10, 3453 }, { 10, 3451 }, { 10, 3449 }, { 10, 3447 }, { 10, 3445 }, { 10, 3443 }, { 10, 3441 }, { 10, 3439 }, { 10, 3437 }, { 10, 3435 }, { 10, 3433 }, { 10, 3431 }, { 10, 3429 }, { 10, 3427 }, { 10, 3425 }, { 10, 3423 }, { 10, 3421 }, { 10, 3419 }, { 10, 3417 }, { 10, 3415 }, { 10, 3413 }, { 10, 3411 }, { 10, 3409 }, { 10, 3407 }, { 10, 3405 }, { 10, 3403 }, { 10, 3401 }, { 10, 3399 }, { 10, 3397 }, { 10, 3395 }, { 10, 3393 }, { 10, 3391 }, { 10, 3389 }, { 10, 3387 }, { 10, 3385 }, { 10, 3383 }, { 10, 3381 }, { 10, 3379 }, { 10, 3377 }, { 10, 3375 }, { 10, 3373 }, { 10, 3371 }, { 10, 3369 }, { 10, 3367 }, { 10, 3365 }, { 10, 3363 }, { 10, 3361 }, { 10, 3359 }, { 10, 3357 }, { 10, 3355 }, { 10, 3353 }, { 10, 3351 }, { 10, 3349 }, { 10, 3347 }, { 10, 3345 }, { 10, 3343 }, { 10, 3341 }, { 10, 3339 }, { 10, 3337 }, { 10, 3335 }, { 10, 3333 }, { 10, 3331 }, { 10, 3329 }, { 10, 3327 }, { 10, 3325 }, { 10, 3323 }, { 10, 3321 }, { 10, 3319 }, { 10, 3317 }, { 10, 3315 }, { 10, 3313 }, { 10, 3311 }, { 10, 3309 }, { 10, 3307 }, { 10, 3305 }, { 10, 3303 }, { 10, 3301 }, { 10, 3299 }, { 10, 3297 }, { 10, 3295 }, { 10, 3293 }, { 10, 3291 }, { 10, 3289 }, { 10, 3287 }, { 10, 3285 }, { 10, 3283 }, { 10, 3281 }, { 10, 3279 }, { 10, 3277 }, { 10, 3275 }, { 10, 3273 }, { 10, 3271 }, { 10, 3269 }, { 10, 3267 }, { 10, 3265 }, { 10, 3263 }, { 10, 3261 }, { 10, 3259 }, { 10, 3257 }, { 10, 3255 }, { 10, 3253 }, { 10, 3251 }, { 10, 3249 }, { 10, 3247 }, { 10, 3245 }, { 10, 3243 }, { 10, 3241 }, { 10, 3239 }, { 10, 3237 }, { 10, 3235 }, { 10, 3233 }, { 10, 3231 }, { 10, 3229 }, { 10, 3227 }, { 10, 3225 }, { 10, 3223 }, { 10, 3221 }, { 10, 3219 }, { 10, 3217 }, { 10, 3215 }, { 10, 3213 }, { 10, 3211 }, { 10, 3209 }, { 10, 3207 }, { 10, 3205 }, { 10, 3203 }, { 10, 3201 }, { 10, 3199 }, { 10, 3197 }, { 10, 3195 }, { 10, 3193 }, { 10, 3191 }, { 10, 3189 }, { 10, 3187 }, { 10, 3185 }, { 10, 3183 }, { 10, 3181 }, { 10, 3179 }, { 10, 3177 }, { 10, 3175 }, { 10, 3173 }, { 10, 3171 }, { 10, 3169 }, { 10, 3167 }, { 10, 3165 }, { 10, 3163 }, { 10, 3161 }, { 10, 3159 }, { 10, 3157 }, { 10, 3155 }, { 10, 3153 }, { 10, 3151 }, { 10, 3149 }, { 10, 3147 }, { 10, 3145 }, { 10, 3143 }, { 10, 3141 }, { 10, 3139 }, { 10, 3137 }, { 10, 3135 }, { 10, 3133 }, { 10, 3131 }, { 10, 3129 }, { 10, 3127 }, { 10, 3125 }, { 10, 3123 }, { 10, 3121 }, { 10, 3119 }, { 10, 3117 }, { 10, 3115 }, { 10, 3113 }, { 10, 3111 }, { 10, 3109 }, { 10, 3107 }, { 10, 3105 }, { 10, 3103 }, { 10, 3101 }, { 10, 3099 }, { 10, 3097 }, { 10, 3095 }, { 10, 3093 }, { 10, 3091 }, { 10, 3089 }, { 10, 3087 }, { 10, 3085 }, { 10, 3083 }, { 10, 3081 }, { 10, 3079 }, { 10, 3077 }, { 10, 3075 }, { 10, 3073 }, { 10, 3071 }, { 10, 3069 }, { 10, 3067 }, { 10, 3065 }, { 10, 3063 }, { 10, 3061 }, { 10, 3059 }, { 10, 3057 }, { 10, 3055 }, { 10, 3053 }, { 10, 3051 }, { 10, 3049 }, { 10, 3047 }, { 10, 3045 }, { 10, 3043 }, { 10, 3041 }, { 10, 3039 }, { 10, 3037 }, { 10, 3035 }, { 10, 3033 }, { 10, 3031 }, { 10, 3029 }, { 10, 3027 }, { 10, 3025 }, { 10, 3023 }, { 10, 3021 }, { 10, 3019 }, { 10, 3017 }, { 10, 3015 }, { 10, 3013 }, { 10, 3011 }, { 10, 3009 }, { 10, 3007 }, { 10, 3005 }, { 10, 3003 }, { 10, 3001 }, { 10, 2999 }, { 10, 2997 }, { 10, 2995 }, { 10, 2993 }, { 10, 2991 }, { 10, 2989 }, { 10, 2987 }, { 10, 2985 }, { 10, 2983 }, { 10, 2981 }, { 10, 2979 }, { 10, 2977 }, { 10, 2975 }, { 10, 2973 }, { 10, 2971 }, { 10, 2969 }, { 10, 2967 }, { 10, 2965 }, { 10, 2963 }, { 10, 2961 }, { 10, 2959 }, { 10, 2957 }, { 10, 2955 }, { 10, 2953 }, { 10, 2951 }, { 10, 2949 }, { 10, 2947 }, { 10, 2945 }, { 10, 2943 }, { 10, 2941 }, { 10, 2939 }, { 10, 2937 }, { 10, 2935 }, { 10, 2933 }, { 10, 2931 }, { 10, 2929 }, { 10, 2927 }, { 10, 2925 }, { 10, 2923 }, { 10, 2921 }, { 10, 2919 }, { 10, 2917 }, { 10, 2915 }, { 10, 2913 }, { 10, 2911 }, { 10, 2909 }, { 10, 2907 }, { 10, 2905 }, { 10, 2903 }, { 10, 2901 }, { 10, 2899 }, { 10, 2897 }, { 10, 2895 }, { 10, 2893 }, { 10, 2891 }, { 10, 2889 }, { 10, 2887 }, { 10, 2885 }, { 10, 2883 }, { 10, 2881 }, { 10, 2879 }, { 10, 2877 }, { 10, 2875 }, { 10, 2873 }, { 10, 2871 }, { 10, 2869 }, { 10, 2867 }, { 10, 2865 }, { 10, 2863 }, { 10, 2861 }, { 10, 2859 }, { 10, 2857 }, { 10, 2855 }, { 10, 2853 }, { 10, 2851 }, { 10, 2849 }, { 10, 2847 }, { 10, 2845 }, { 10, 2843 }, { 10, 2841 }, { 10, 2839 }, { 10, 2837 }, { 10, 2835 }, { 10, 2833 }, { 10, 2831 }, { 10, 2829 }, { 10, 2827 }, { 10, 2825 }, { 10, 2823 }, { 10, 2821 }, { 10, 2819 }, { 10, 2817 }, { 10, 2815 }, { 10, 2813 }, { 10, 2811 }, { 10, 2809 }, { 10, 2807 }, { 10, 2805 }, { 10, 2803 }, { 10, 2801 }, { 10, 2799 }, { 10, 2797 }, { 10, 2795 }, { 10, 2793 }, { 10, 2791 }, { 10, 2789 }, { 10, 2787 }, { 10, 2785 }, { 10, 2783 }, { 10, 2781 }, { 10, 2779 }, { 10, 2777 }, { 10, 2775 }, { 10, 2773 }, { 10, 2771 }, { 10, 2769 }, { 10, 2767 }, { 10, 2765 }, { 10, 2763 }, { 10, 2761 }, { 10, 2759 }, { 10, 2757 }, { 10, 2755 }, { 10, 2753 }, { 10, 2751 }, { 10, 2749 }, { 10, 2747 }, { 10, 2745 }, { 10, 2743 }, { 10, 2741 }, { 10, 2739 }, { 10, 2737 }, { 10, 2735 }, { 10, 2733 }, { 10, 2731 }, { 10, 2729 }, { 10, 2727 }, { 10, 2725 }, { 10, 2723 }, { 10, 2721 }, { 10, 2719 }, { 10, 2717 }, { 10, 2715 }, { 10, 2713 }, { 10, 2711 }, { 10, 2709 }, { 10, 2707 }, { 10, 2705 }, { 10, 2703 }, { 10, 2701 }, { 10, 2699 }, { 10, 2697 }, { 10, 2695 }, { 10, 2693 }, { 10, 2691 }, { 10, 2689 }, { 10, 2687 }, { 10, 2685 }, { 10, 2683 }, { 10, 2681 }, { 10, 2679 }, { 10, 2677 }, { 10, 2675 }, { 10, 2673 }, { 10, 2671 }, { 10, 2669 }, { 10, 2667 }, { 10, 2665 }, { 10, 2663 }, { 10, 2661 }, { 10, 2659 }, { 10, 2657 }, { 10, 2655 }, { 10, 2653 }, { 10, 2651 }, { 10, 2649 }, { 10, 2647 }, { 10, 2645 }, { 10, 2643 }, { 10, 2641 }, { 10, 2639 }, { 10, 2637 }, { 10, 2635 }, { 10, 2633 }, { 10, 2631 }, { 10, 2629 }, { 10, 2627 }, { 10, 2625 }, { 10, 2623 }, { 10, 2621 }, { 10, 2619 }, { 10, 2617 }, { 10, 2615 }, { 10, 2613 }, { 10, 2611 }, { 10, 2609 }, { 10, 2607 }, { 10, 2605 }, { 10, 2603 }, { 10, 2601 }, { 10, 2599 }, { 10, 2597 }, { 10, 2595 }, { 10, 2593 }, { 10, 2591 }, { 10, 2589 }, { 10, 2587 }, { 10, 2585 }, { 10, 2583 }, { 10, 2581 }, { 10, 2579 }, { 10, 2577 }, { 10, 2575 }, { 10, 2573 }, { 10, 2571 }, { 10, 2569 }, { 10, 2567 }, { 10, 2565 }, { 10, 2563 }, { 10, 2561 }, { 10, 2559 }, { 10, 2557 }, { 10, 2555 }, { 10, 2553 }, { 10, 2551 }, { 10, 2549 }, { 10, 2547 }, { 10, 2545 }, { 10, 2543 }, { 10, 2541 }, { 10, 2539 }, { 10, 2537 }, { 10, 2535 }, { 10, 2533 }, { 10, 2531 }, { 10, 2529 }, { 10, 2527 }, { 10, 2525 }, { 10, 2523 }, { 10, 2521 }, { 10, 2519 }, { 10, 2517 }, { 10, 2515 }, { 10, 2513 }, { 10, 2511 }, { 10, 2509 }, { 10, 2507 }, { 10, 2505 }, { 10, 2503 }, { 10, 2501 }, { 10, 2499 }, { 10, 2497 }, { 10, 2495 }, { 10, 2493 }, { 10, 2491 }, { 10, 2489 }, { 10, 2487 }, { 10, 2485 }, { 10, 2483 }, { 10, 2481 }, { 10, 2479 }, { 10, 2477 }, { 10, 2475 }, { 10, 2473 }, { 10, 2471 }, { 10, 2469 }, { 10, 2467 }, { 10, 2465 }, { 10, 2463 }, { 10, 2461 }, { 10, 2459 }, { 10, 2457 }, { 10, 2455 }, { 10, 2453 }, { 10, 2451 }, { 10, 2449 }, { 10, 2447 }, { 10, 2445 }, { 10, 2443 }, { 10, 2441 }, { 10, 2439 }, { 10, 2437 }, { 10, 2435 }, { 10, 2433 }, { 10, 2431 }, { 10, 2429 }, { 10, 2427 }, { 10, 2425 }, { 10, 2423 }, { 10, 2421 }, { 10, 2419 }, { 10, 2417 }, { 10, 2415 }, { 10, 2413 }, { 10, 2411 }, { 10, 2409 }, { 10, 2407 }, { 10, 2405 }, { 10, 2403 }, { 10, 2401 }, { 10, 2399 }, { 10, 2397 }, { 10, 2395 }, { 10, 2393 }, { 10, 2391 }, { 10, 2389 }, { 10, 2387 }, { 10, 2385 }, { 10, 2383 }, { 10, 2381 }, { 10, 2379 }, { 10, 2377 }, { 10, 2375 }, { 10, 2373 }, { 10, 2371 }, { 10, 2369 }, { 10, 2367 }, { 10, 2365 }, { 10, 2363 }, { 10, 2361 }, { 10, 2359 }, { 10, 2357 }, { 10, 2355 }, { 10, 2353 }, { 10, 2351 }, { 10, 2349 }, { 10, 2347 }, { 10, 2345 }, { 10, 2343 }, { 10, 2341 }, { 10, 2339 }, { 10, 2337 }, { 10, 2335 }, { 10, 2333 }, { 10, 2331 }, { 10, 2329 }, { 10, 2327 }, { 10, 2325 }, { 10, 2323 }, { 10, 2321 }, { 10, 2319 }, { 10, 2317 }, { 10, 2315 }, { 10, 2313 }, { 10, 2311 }, { 10, 2309 }, { 10, 2307 }, { 10, 2305 }, { 10, 2303 }, { 10, 2301 }, { 10, 2299 }, { 10, 2297 }, { 10, 2295 }, { 10, 2293 }, { 10, 2291 }, { 10, 2289 }, { 10, 2287 }, { 10, 2285 }, { 10, 2283 }, { 10, 2281 }, { 10, 2279 }, { 10, 2277 }, { 10, 2275 }, { 10, 2273 }, { 10, 2271 }, { 10, 2269 }, { 10, 2267 }, { 10, 2265 }, { 10, 2263 }, { 10, 2261 }, { 10, 2259 }, { 10, 2257 }, { 10, 2255 }, { 10, 2253 }, { 10, 2251 }, { 10, 2249 }, { 10, 2247 }, { 10, 2245 }, { 10, 2243 }, { 10, 2241 }, { 10, 2239 }, { 10, 2237 }, { 10, 2235 }, { 10, 2233 }, { 10, 2231 }, { 10, 2229 }, { 10, 2227 }, { 10, 2225 }, { 10, 2223 }, { 10, 2221 }, { 10, 2219 }, { 10, 2217 }, { 10, 2215 }, { 10, 2213 }, { 10, 2211 }, { 10, 2209 }, { 10, 2207 }, { 10, 2205 }, { 10, 2203 }, { 10, 2201 }, { 10, 2199 }, { 10, 2197 }, { 10, 2195 }, { 10, 2193 }, { 10, 2191 }, { 10, 2189 }, { 10, 2187 }, { 10, 2185 }, { 10, 2183 }, { 10, 2181 }, { 10, 2179 }, { 10, 2177 }, { 10, 2175 }, { 10, 2173 }, { 10, 2171 }, { 10, 2169 }, { 10, 2167 }, { 10, 2165 }, { 10, 2163 }, { 10, 2161 }, { 10, 2159 }, { 10, 2157 }, { 10, 2155 }, { 10, 2153 }, { 10, 2151 }, { 10, 2149 }, { 10, 2147 }, { 10, 2145 }, { 10, 2143 }, { 10, 2141 }, { 10, 2139 }, { 10, 2137 }, { 10, 2135 }, { 10, 2133 }, { 10, 2131 }, { 10, 2129 }, { 10, 2127 }, { 10, 2125 }, { 10, 2123 }, { 10, 2121 }, { 10, 2119 }, { 10, 2117 }, { 10, 2115 }, { 10, 2113 }, { 10, 2111 }, { 10, 2109 }, { 10, 2107 }, { 10, 2105 }, { 10, 2103 }, { 10, 2101 }, { 10, 2099 }, { 10, 2097 }, { 10, 2095 }, { 10, 2093 }, { 10, 2091 }, { 10, 2089 }, { 10, 2087 }, { 10, 2085 }, { 10, 2083 }, { 10, 2081 }, { 10, 2079 }, { 10, 2077 }, { 10, 2075 }, { 10, 2073 }, { 10, 2071 }, { 10, 2069 }, { 10, 2067 }, { 10, 2065 }, { 10, 2063 }, { 10, 2061 }, { 10, 2059 }, { 10, 2057 }, { 10, 2055 }, { 10, 2053 }, { 10, 2051 }, { 10, 2049 }, { 10, 2047 }, { 10, 2045 }, { 10, 2043 }, { 10, 2041 }, { 10, 2039 }, { 10, 2037 }, { 10, 2035 }, { 10, 2033 }, { 10, 2031 }, { 10, 2029 }, { 10, 2027 }, { 10, 2025 }, { 10, 2023 }, { 10, 2021 }, { 10, 2019 }, { 10, 2017 }, { 10, 2015 }, { 10, 2013 }, { 10, 2011 }, { 10, 2009 }, { 10, 2007 }, { 10, 2005 }, { 10, 2003 }, { 10, 2001 }, { 10, 1999 }, { 10, 1997 }, { 10, 1995 }, { 10, 1993 }, { 10, 1991 }, { 10, 1989 }, { 10, 1987 }, { 10, 1985 }, { 10, 1983 }, { 10, 1981 }, { 10, 1979 }, { 10, 1977 }, { 10, 1975 }, { 10, 1973 }, { 10, 1971 }, { 10, 1969 }, { 10, 1967 }, { 10, 1965 }, { 10, 1963 }, { 10, 1961 }, { 10, 1959 }, { 10, 1957 }, { 10, 1955 }, { 10, 1953 }, { 10, 1951 }, { 10, 1949 }, { 10, 1947 }, { 10, 1945 }, { 10, 1943 }, { 10, 1941 }, { 10, 1939 }, { 10, 1937 }, { 10, 1935 }, { 10, 1933 }, { 10, 1931 }, { 10, 1929 }, { 10, 1927 }, { 10, 1925 }, { 10, 1923 }, { 10, 1921 }, { 10, 1919 }, { 10, 1917 }, { 10, 1915 }, { 10, 1913 }, { 10, 1911 }, { 10, 1909 }, { 10, 1907 }, { 10, 1905 }, { 10, 1903 }, { 10, 1901 }, { 10, 1899 }, { 10, 1897 }, { 10, 1895 }, { 10, 1893 }, { 10, 1891 }, { 10, 1889 }, { 10, 1887 }, { 10, 1885 }, { 10, 1883 }, { 10, 1881 }, { 10, 1879 }, { 10, 1877 }, { 10, 1875 }, { 10, 1873 }, { 10, 1871 }, { 10, 1869 }, { 10, 1867 }, { 10, 1865 }, { 10, 1863 }, { 10, 1861 }, { 10, 1859 }, { 10, 1857 }, { 10, 1855 }, { 10, 1853 }, { 10, 1851 }, { 10, 1849 }, { 10, 1847 }, { 10, 1845 }, { 10, 1843 }, { 10, 1841 }, { 10, 1839 }, { 10, 1837 }, { 10, 1835 }, { 10, 1833 }, { 10, 1831 }, { 10, 1829 }, { 10, 1827 }, { 10, 1825 }, { 10, 1823 }, { 10, 1821 }, { 10, 1819 }, { 10, 1817 }, { 10, 1815 }, { 10, 1813 }, { 10, 1811 }, { 10, 1809 }, { 10, 1807 }, { 10, 1805 }, { 10, 1803 }, { 10, 1801 }, { 10, 1799 }, { 10, 1797 }, { 10, 1795 }, { 10, 1793 }, { 10, 1791 }, { 10, 1789 }, { 10, 1787 }, { 10, 1785 }, { 10, 1783 }, { 10, 1781 }, { 10, 1779 }, { 10, 1777 }, { 10, 1775 }, { 10, 1773 }, { 10, 1771 }, { 10, 1769 }, { 10, 1767 }, { 10, 1765 }, { 10, 1763 }, { 10, 1761 }, { 10, 1759 }, { 10, 1757 }, { 10, 1755 }, { 10, 1753 }, { 10, 1751 }, { 10, 1749 }, { 10, 1747 }, { 10, 1745 }, { 10, 1743 }, { 10, 1741 }, { 10, 1739 }, { 10, 1737 }, { 10, 1735 }, { 10, 1733 }, { 10, 1731 }, { 10, 1729 }, { 10, 1727 }, { 10, 1725 }, { 10, 1723 }, { 10, 1721 }, { 10, 1719 }, { 10, 1717 }, { 10, 1715 }, { 10, 1713 }, { 10, 1711 }, { 10, 1709 }, { 10, 1707 }, { 10, 1705 }, { 10, 1703 }, { 10, 1701 }, { 10, 1699 }, { 10, 1697 }, { 10, 1695 }, { 10, 1693 }, { 10, 1691 }, { 10, 1689 }, { 10, 1687 }, { 10, 1685 }, { 10, 1683 }, { 10, 1681 }, { 10, 1679 }, { 10, 1677 }, { 10, 1675 }, { 10, 1673 }, { 10, 1671 }, { 10, 1669 }, { 10, 1667 }, { 10, 1665 }, { 10, 1663 }, { 10, 1661 }, { 10, 1659 }, { 10, 1657 }, { 10, 1655 }, { 10, 1653 }, { 10, 1651 }, { 10, 1649 }, { 10, 1647 }, { 10, 1645 }, { 10, 1643 }, { 10, 1641 }, { 10, 1639 }, { 10, 1637 }, { 10, 1635 }, { 10, 1633 }, { 10, 1631 }, { 10, 1629 }, { 10, 1627 }, { 10, 1625 }, { 10, 1623 }, { 10, 1621 }, { 10, 1619 }, { 10, 1617 }, { 10, 1615 }, { 10, 1613 }, { 10, 1611 }, { 10, 1609 }, { 10, 1607 }, { 10, 1605 }, { 10, 1603 }, { 10, 1601 }, { 10, 1599 }, { 10, 1597 }, { 10, 1595 }, { 10, 1593 }, { 10, 1591 }, { 10, 1589 }, { 10, 1587 }, { 10, 1585 }, { 10, 1583 }, { 10, 1581 }, { 10, 1579 }, { 10, 1577 }, { 10, 1575 }, { 10, 1573 }, { 10, 1571 }, { 10, 1569 }, { 10, 1567 }, { 10, 1565 }, { 10, 1563 }, { 10, 1561 }, { 10, 1559 }, { 10, 1557 }, { 10, 1555 }, { 10, 1553 }, { 10, 1551 }, { 10, 1549 }, { 10, 1547 }, { 10, 1545 }, { 10, 1543 }, { 10, 1541 }, { 10, 1539 }, { 10, 1537 }, { 10, 1535 }, { 10, 1533 }, { 10, 1531 }, { 10, 1529 }, { 10, 1527 }, { 10, 1525 }, { 10, 1523 }, { 10, 1521 }, { 10, 1519 }, { 10, 1517 }, { 10, 1515 }, { 10, 1513 }, { 10, 1511 }, { 10, 1509 }, { 10, 1507 }, { 10, 1505 }, { 10, 1503 }, { 10, 1501 }, { 10, 1499 }, { 10, 1497 }, { 10, 1495 }, { 10, 1493 }, { 10, 1491 }, { 10, 1489 }, { 10, 1487 }, { 10, 1485 }, { 10, 1483 }, { 10, 1481 }, { 10, 1479 }, { 10, 1477 }, { 10, 1475 }, { 10, 1473 }, { 10, 1471 }, { 10, 1469 }, { 10, 1467 }, { 10, 1465 }, { 10, 1463 }, { 10, 1461 }, { 10, 1459 }, { 10, 1457 }, { 10, 1455 }, { 10, 1453 }, { 10, 1451 }, { 10, 1449 }, { 10, 1447 }, { 10, 1445 }, { 10, 1443 }, { 10, 1441 }, { 10, 1439 }, { 10, 1437 }, { 10, 1435 }, { 10, 1433 }, { 10, 1431 }, { 10, 1429 }, { 10, 1427 }, { 10, 1425 }, { 10, 1423 }, { 10, 1421 }, { 10, 1419 }, { 10, 1417 }, { 10, 1415 }, { 10, 1413 }, { 10, 1411 }, { 10, 1409 }, { 10, 1407 }, { 10, 1405 }, { 10, 1403 }, { 10, 1401 }, { 10, 1399 }, { 10, 1397 }, { 10, 1395 }, { 10, 1393 }, { 10, 1391 }, { 10, 1389 }, { 10, 1387 }, { 10, 1385 }, { 10, 1383 }, { 10, 1381 }, { 10, 1379 }, { 10, 1377 }, { 10, 1375 }, { 10, 1373 }, { 10, 1371 }, { 10, 1369 }, { 10, 1367 }, { 10, 1365 }, { 10, 1363 }, { 10, 1361 }, { 10, 1359 }, { 10, 1357 }, { 10, 1355 }, { 10, 1353 }, { 10, 1351 }, { 10, 1349 }, { 10, 1347 }, { 10, 1345 }, { 10, 1343 }, { 10, 1341 }, { 10, 1339 }, { 10, 1337 }, { 10, 1335 }, { 10, 1333 }, { 10, 1331 }, { 10, 1329 }, { 10, 1327 }, { 10, 1325 }, { 10, 1323 }, { 10, 1321 }, { 10, 1319 }, { 10, 1317 }, { 10, 1315 }, { 10, 1313 }, { 10, 1311 }, { 10, 1309 }, { 10, 1307 }, { 10, 1305 }, { 10, 1303 }, { 10, 1301 }, { 10, 1299 }, { 10, 1297 }, { 10, 1295 }, { 10, 1293 }, { 10, 1291 }, { 10, 1289 }, { 10, 1287 }, { 10, 1285 }, { 10, 1283 }, { 10, 1281 }, { 10, 1279 }, { 10, 1277 }, { 10, 1275 }, { 10, 1273 }, { 10, 1271 }, { 10, 1269 }, { 10, 1267 }, { 10, 1265 }, { 10, 1263 }, { 10, 1261 }, { 10, 1259 }, { 10, 1257 }, { 10, 1255 }, { 10, 1253 }, { 10, 1251 }, { 10, 1249 }, { 10, 1247 }, { 10, 1245 }, { 10, 1243 }, { 10, 1241 }, { 10, 1239 }, { 10, 1237 }, { 10, 1235 }, { 10, 1233 }, { 10, 1231 }, { 10, 1229 }, { 10, 1227 }, { 10, 1225 }, { 10, 1223 }, { 10, 1221 }, { 10, 1219 }, { 10, 1217 }, { 10, 1215 }, { 10, 1213 }, { 10, 1211 }, { 10, 1209 }, { 10, 1207 }, { 10, 1205 }, { 10, 1203 }, { 10, 1201 }, { 10, 1199 }, { 10, 1197 }, { 10, 1195 }, { 10, 1193 }, { 10, 1191 }, { 10, 1189 }, { 10, 1187 }, { 10, 1185 }, { 10, 1183 }, { 10, 1181 }, { 10, 1179 }, { 10, 1177 }, { 10, 1175 }, { 10, 1173 }, { 10, 1171 }, { 10, 1169 }, { 10, 1167 }, { 10, 1165 }, { 10, 1163 }, { 10, 1161 }, { 10, 1159 }, { 10, 1157 }, { 10, 1155 }, { 10, 1153 }, { 10, 1151 }, { 10, 1149 }, { 10, 1147 }, { 10, 1145 }, { 10, 1143 }, { 10, 1141 }, { 10, 1139 }, { 10, 1137 }, { 10, 1135 }, { 10, 1133 }, { 10, 1131 }, { 10, 1129 }, { 10, 1127 }, { 10, 1125 }, { 10, 1123 }, { 10, 1121 }, { 10, 1119 }, { 10, 1117 }, { 10, 1115 }, { 10, 1113 }, { 10, 1111 }, { 10, 1109 }, { 10, 1107 }, { 10, 1105 }, { 10, 1103 }, { 10, 1101 }, { 10, 1099 }, { 10, 1097 }, { 10, 1095 }, { 10, 1093 }, { 10, 1091 }, { 10, 1089 }, { 10, 1087 }, { 10, 1085 }, { 10, 1083 }, { 10, 1081 }, { 10, 1079 }, { 10, 1077 }, { 10, 1075 }, { 10, 1073 }, { 10, 1071 }, { 10, 1069 }, { 10, 1067 }, { 10, 1065 }, { 10, 1063 }, { 10, 1061 }, { 10, 1059 }, { 10, 1057 }, { 10, 1055 }, { 10, 1053 }, { 10, 1051 }, { 10, 1049 }, { 10, 1047 }, { 10, 1045 }, { 10, 1043 }, { 10, 1041 }, { 10, 1039 }, { 10, 1037 }, { 10, 1035 }, { 10, 1033 }, { 10, 1031 }, { 10, 1029 }, { 10, 1027 }, { 10, 1025 }, { 10, 1023 }, { 10, 1021 }, { 10, 1019 }, { 10, 1017 }, { 10, 1015 }, { 10, 1013 }, { 10, 1011 }, { 10, 1009 }, { 10, 1007 }, { 10, 1005 }, { 10, 1003 }, { 10, 1001 }, { 10, 999 }, { 10, 997 }, { 10, 995 }, { 10, 993 }, { 10, 991 }, { 10, 989 }, { 10, 987 }, { 10, 985 }, { 10, 983 }, { 10, 981 }, { 10, 979 }, { 10, 977 }, { 10, 975 }, { 10, 973 }, { 10, 971 }, { 10, 969 }, { 10, 967 }, { 10, 965 }, { 10, 963 }, { 10, 961 }, { 10, 959 }, { 10, 957 }, { 10, 955 }, { 10, 953 }, { 10, 951 }, { 10, 949 }, { 10, 947 }, { 10, 945 }, { 10, 943 }, { 10, 941 }, { 10, 939 }, { 10, 937 }, { 10, 935 }, { 10, 933 }, { 10, 931 }, { 10, 929 }, { 10, 927 }, { 10, 925 }, { 10, 923 }, { 10, 921 }, { 10, 919 }, { 10, 917 }, { 10, 915 }, { 10, 913 }, { 10, 911 }, { 10, 909 }, { 10, 907 }, { 10, 905 }, { 10, 903 }, { 10, 901 }, { 10, 899 }, { 10, 897 }, { 10, 895 }, { 10, 893 }, { 10, 891 }, { 10, 889 }, { 10, 887 }, { 10, 885 }, { 10, 883 }, { 10, 881 }, { 10, 879 }, { 10, 877 }, { 10, 875 }, { 10, 873 }, { 10, 871 }, { 10, 869 }, { 10, 867 }, { 10, 865 }, { 10, 863 }, { 10, 861 }, { 10, 859 }, { 10, 857 }, { 10, 855 }, { 10, 853 }, { 10, 851 }, { 10, 849 }, { 10, 847 }, { 10, 845 }, { 10, 843 }, { 10, 841 }, { 10, 839 }, { 10, 837 }, { 10, 835 }, { 10, 833 }, { 10, 831 }, { 10, 829 }, { 10, 827 }, { 10, 825 }, { 10, 823 }, { 10, 821 }, { 10, 819 }, { 10, 817 }, { 10, 815 }, { 10, 813 }, { 10, 811 }, { 10, 809 }, { 10, 807 }, { 10, 805 }, { 10, 803 }, { 10, 801 }, { 10, 799 }, { 10, 797 }, { 10, 795 }, { 10, 793 }, { 10, 791 }, { 10, 789 }, { 10, 787 }, { 10, 785 }, { 10, 783 }, { 10, 781 }, { 10, 779 }, { 10, 777 }, { 10, 775 }, { 10, 773 }, { 10, 771 }, { 10, 769 }, { 10, 767 }, { 10, 765 }, { 10, 763 }, { 10, 761 }, { 10, 759 }, { 10, 757 }, { 10, 755 }, { 10, 753 }, { 10, 751 }, { 10, 749 }, { 10, 747 }, { 10, 745 }, { 10, 743 }, { 10, 741 }, { 10, 739 }, { 10, 737 }, { 10, 735 }, { 10, 733 }, { 10, 731 }, { 10, 729 }, { 10, 727 }, { 10, 725 }, { 10, 723 }, { 10, 721 }, { 10, 719 }, { 10, 717 }, { 10, 715 }, { 10, 713 }, { 10, 711 }, { 10, 709 }, { 10, 707 }, { 10, 705 }, { 10, 703 }, { 10, 701 }, { 10, 699 }, { 10, 697 }, { 10, 695 }, { 10, 693 }, { 10, 691 }, { 10, 689 }, { 10, 687 }, { 10, 685 }, { 10, 683 }, { 10, 681 }, { 10, 679 }, { 10, 677 }, { 10, 675 }, { 10, 673 }, { 10, 671 }, { 10, 669 }, { 10, 667 }, { 10, 665 }, { 10, 663 }, { 10, 661 }, { 10, 659 }, { 10, 657 }, { 10, 655 }, { 10, 653 }, { 10, 651 }, { 10, 649 }, { 10, 647 }, { 10, 645 }, { 10, 643 }, { 10, 641 }, { 10, 639 }, { 10, 637 }, { 10, 635 }, { 10, 633 }, { 10, 631 }, { 10, 629 }, { 10, 627 }, { 10, 625 }, { 10, 623 }, { 10, 621 }, { 10, 619 }, { 10, 617 }, { 10, 615 }, { 10, 613 }, { 10, 611 }, { 10, 609 }, { 10, 607 }, { 10, 605 }, { 10, 603 }, { 10, 601 }, { 10, 599 }, { 10, 597 }, { 10, 595 }, { 10, 593 }, { 10, 591 }, { 10, 589 }, { 10, 587 }, { 10, 585 }, { 10, 583 }, { 10, 581 }, { 10, 579 }, { 10, 577 }, { 10, 575 }, { 10, 573 }, { 10, 571 }, { 10, 569 }, { 10, 567 }, { 10, 565 }, { 10, 563 }, { 10, 561 }, { 10, 559 }, { 10, 557 }, { 10, 555 }, { 10, 553 }, { 10, 551 }, { 10, 549 }, { 10, 547 }, { 10, 545 }, { 10, 543 }, { 10, 541 }, { 10, 539 }, { 10, 537 }, { 10, 535 }, { 10, 533 }, { 10, 531 }, { 10, 529 }, { 10, 527 }, { 10, 525 }, { 10, 523 }, { 10, 521 }, { 10, 519 }, { 10, 517 }, { 10, 515 }, { 10, 513 }, { 10, 511 }, { 10, 509 }, { 10, 507 }, { 10, 505 }, { 10, 503 }, { 10, 501 }, { 10, 499 }, { 10, 497 }, { 10, 495 }, { 10, 493 }, { 10, 491 }, { 10, 489 }, { 10, 487 }, { 10, 485 }, { 10, 483 }, { 10, 481 }, { 10, 479 }, { 10, 477 }, { 10, 475 }, { 10, 473 }, { 10, 471 }, { 10, 469 }, { 10, 467 }, { 10, 465 }, { 10, 463 }, { 10, 461 }, { 10, 459 }, { 10, 457 }, { 10, 455 }, { 10, 453 }, { 10, 451 }, { 10, 449 }, { 10, 447 }, { 10, 445 }, { 10, 443 }, { 10, 441 }, { 10, 439 }, { 10, 437 }, { 10, 435 }, { 10, 433 }, { 10, 431 }, { 10, 429 }, { 10, 427 }, { 10, 425 }, { 10, 423 }, { 10, 421 }, { 10, 419 }, { 10, 417 }, { 10, 415 }, { 10, 413 }, { 10, 411 }, { 10, 409 }, { 10, 407 }, { 10, 405 }, { 10, 403 }, { 10, 401 }, { 10, 399 }, { 10, 397 }, { 10, 395 }, { 10, 393 }, { 10, 391 }, { 10, 389 }, { 10, 387 }, { 10, 385 }, { 10, 383 }, { 10, 381 }, { 10, 379 }, { 10, 377 }, { 10, 375 }, { 10, 373 }, { 10, 371 }, { 10, 369 }, { 10, 367 }, { 10, 365 }, { 10, 363 }, { 10, 361 }, { 10, 359 }, { 10, 357 }, { 10, 355 }, { 10, 353 }, { 10, 351 }, { 10, 349 }, { 10, 347 }, { 10, 345 }, { 10, 343 }, { 10, 341 }, { 10, 339 }, { 10, 337 }, { 10, 335 }, { 10, 333 }, { 10, 331 }, { 10, 329 }, { 10, 327 }, { 10, 325 }, { 10, 323 }, { 10, 321 }, { 10, 319 }, { 10, 317 }, { 10, 315 }, { 10, 313 }, { 10, 311 }, { 10, 309 }, { 10, 307 }, { 10, 305 }, { 10, 303 }, { 10, 301 }, { 10, 299 }, { 10, 297 }, { 10, 295 }, { 10, 293 }, { 10, 291 }, { 10, 289 }, { 10, 287 }, { 10, 285 }, { 10, 283 }, { 10, 281 }, { 10, 279 }, { 10, 277 }, { 10, 275 }, { 10, 273 }, { 10, 271 }, { 10, 269 }, { 10, 267 }, { 10, 265 }, { 10, 263 }, { 10, 261 }, { 10, 259 }, { 10, 257 }, { 10, 255 }, { 10, 253 }, { 10, 251 }, { 10, 249 }, { 10, 247 }, { 10, 245 }, { 10, 243 }, { 10, 241 }, { 10, 239 }, { 10, 237 }, { 10, 235 }, { 10, 233 }, { 10, 231 }, { 10, 229 }, { 10, 227 }, { 10, 225 }, { 10, 223 }, { 10, 221 }, { 10, 219 }, { 10, 217 }, { 10, 215 }, { 10, 213 }, { 10, 211 }, { 10, 209 }, { 10, 207 }, { 10, 205 }, { 10, 203 }, { 10, 201 }, { 10, 199 }, { 10, 197 }, { 10, 195 }, { 10, 193 }, { 10, 191 }, { 10, 189 }, { 10, 187 }, { 10, 185 }, { 10, 183 }, { 10, 181 }, { 10, 179 }, { 10, 177 }, { 10, 175 }, { 10, 173 }, { 10, 171 }, { 10, 169 }, { 10, 167 }, { 10, 165 }, { 10, 163 }, { 10, 161 }, { 10, 159 }, { 10, 157 }, { 10, 155 }, { 10, 153 }, { 10, 151 }, { 10, 149 }, { 10, 147 }, { 10, 145 }, { 10, 143 }, { 10, 141 }, { 10, 139 }, { 10, 137 }, { 10, 135 }, { 10, 133 }, { 10, 131 }, { 10, 129 }, { 10, 127 }, { 10, 125 }, { 10, 123 }, { 10, 121 }, { 10, 119 }, { 10, 117 }, { 10, 115 }, { 10, 113 }, { 10, 111 }, { 10, 109 }, { 10, 107 }, { 10, 105 }, { 10, 103 }, { 10, 101 }, { 10, 99 }, { 10, 97 }, { 10, 95 }, { 10, 93 }, { 10, 91 }, { 10, 89 }, { 10, 87 }, { 10, 85 }, { 10, 83 }, { 10, 81 }, { 10, 79 }, { 10, 77 }, { 10, 75 }, { 10, 73 }, { 10, 71 }, { 10, 69 }, { 10, 67 }, { 10, 65 }, { 10, 63 }, { 10, 61 }, { 10, 59 }, { 10, 57 }, { 10, 55 }, { 10, 53 }, { 10, 51 }, { 10, 49 }, { 10, 47 }, { 10, 45 }, { 10, 43 }, { 10, 41 }, { 10, 39 }, { 10, 37 }, { 10, 35 }, { 10, 33 }, { 10, 31 }, { 10, 29 }, { 10, 27 }, { 10, 25 }, { 10, 23 }, { 10, 21 }, { 10, 19 }, { 10, 17 }, { 10, 15 }, { 10, 13 }, { 10, 11 }, { 10, 9 }, { 10, 7 }, { 10, 5 }, { 10, 3 }, { 10, 1 }, { 9, 63 }, { 9, 61 }, { 9, 59 }, { 9, 57 }, { 9, 55 }, { 9, 53 }, { 9, 51 }, { 9, 49 }, { 9, 47 }, { 9, 45 }, { 9, 43 }, { 9, 41 }, { 9, 39 }, { 9, 37 }, { 9, 35 }, { 9, 33 }, { 9, 31 }, { 9, 29 }, { 9, 27 }, { 9, 25 }, { 9, 23 }, { 9, 21 }, { 9, 19 }, { 9, 17 }, { 9, 15 }, { 9, 13 }, { 9, 11 }, { 9, 9 }, { 9, 7 }, { 9, 5 }, { 9, 3 }, { 9, 1 }, { 8, 31 }, { 8, 29 }, { 8, 27 }, { 8, 25 }, { 8, 23 }, { 8, 21 }, { 8, 19 }, { 8, 17 }, { 8, 15 }, { 8, 13 }, { 8, 11 }, { 8, 9 }, { 8, 7 }, { 8, 5 }, { 8, 3 }, { 8, 1 }, { 7, 15 }, { 7, 13 }, { 7, 11 }, { 7, 9 }, { 7, 7 }, { 7, 5 }, { 7, 3 }, { 7, 1 }, { 6, 7 }, { 6, 5 }, { 6, 3 }, { 6, 1 }, { 5, 3 }, { 5, 1 }, { 4, 1 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 5, 2 }, { 6, 0 }, { 6, 2 }, { 6, 4 }, { 6, 6 }, { 7, 0 }, { 7, 2 }, { 7, 4 }, { 7, 6 }, { 7, 8 }, { 7, 10 }, { 7, 12 }, { 7, 14 }, { 8, 0 }, { 8, 2 }, { 8, 4 }, { 8, 6 }, { 8, 8 }, { 8, 10 }, { 8, 12 }, { 8, 14 }, { 8, 16 }, { 8, 18 }, { 8, 20 }, { 8, 22 }, { 8, 24 }, { 8, 26 }, { 8, 28 }, { 8, 30 }, { 9, 0 }, { 9, 2 }, { 9, 4 }, { 9, 6 }, { 9, 8 }, { 9, 10 }, { 9, 12 }, { 9, 14 }, { 9, 16 }, { 9, 18 }, { 9, 20 }, { 9, 22 }, { 9, 24 }, { 9, 26 }, { 9, 28 }, { 9, 30 }, { 9, 32 }, { 9, 34 }, { 9, 36 }, { 9, 38 }, { 9, 40 }, { 9, 42 }, { 9, 44 }, { 9, 46 }, { 9, 48 }, { 9, 50 }, { 9, 52 }, { 9, 54 }, { 9, 56 }, { 9, 58 }, { 9, 60 }, { 9, 62 }, { 10, 0 }, { 10, 2 }, { 10, 4 }, { 10, 6 }, { 10, 8 }, { 10, 10 }, { 10, 12 }, { 10, 14 }, { 10, 16 }, { 10, 18 }, { 10, 20 }, { 10, 22 }, { 10, 24 }, { 10, 26 }, { 10, 28 }, { 10, 30 }, { 10, 32 }, { 10, 34 }, { 10, 36 }, { 10, 38 }, { 10, 40 }, { 10, 42 }, { 10, 44 }, { 10, 46 }, { 10, 48 }, { 10, 50 }, { 10, 52 }, { 10, 54 }, { 10, 56 }, { 10, 58 }, { 10, 60 }, { 10, 62 }, { 10, 64 }, { 10, 66 }, { 10, 68 }, { 10, 70 }, { 10, 72 }, { 10, 74 }, { 10, 76 }, { 10, 78 }, { 10, 80 }, { 10, 82 }, { 10, 84 }, { 10, 86 }, { 10, 88 }, { 10, 90 }, { 10, 92 }, { 10, 94 }, { 10, 96 }, { 10, 98 }, { 10, 100 }, { 10, 102 }, { 10, 104 }, { 10, 106 }, { 10, 108 }, { 10, 110 }, { 10, 112 }, { 10, 114 }, { 10, 116 }, { 10, 118 }, { 10, 120 }, { 10, 122 }, { 10, 124 }, { 10, 126 }, { 10, 128 }, { 10, 130 }, { 10, 132 }, { 10, 134 }, { 10, 136 }, { 10, 138 }, { 10, 140 }, { 10, 142 }, { 10, 144 }, { 10, 146 }, { 10, 148 }, { 10, 150 }, { 10, 152 }, { 10, 154 }, { 10, 156 }, { 10, 158 }, { 10, 160 }, { 10, 162 }, { 10, 164 }, { 10, 166 }, { 10, 168 }, { 10, 170 }, { 10, 172 }, { 10, 174 }, { 10, 176 }, { 10, 178 }, { 10, 180 }, { 10, 182 }, { 10, 184 }, { 10, 186 }, { 10, 188 }, { 10, 190 }, { 10, 192 }, { 10, 194 }, { 10, 196 }, { 10, 198 }, { 10, 200 }, { 10, 202 }, { 10, 204 }, { 10, 206 }, { 10, 208 }, { 10, 210 }, { 10, 212 }, { 10, 214 }, { 10, 216 }, { 10, 218 }, { 10, 220 }, { 10, 222 }, { 10, 224 }, { 10, 226 }, { 10, 228 }, { 10, 230 }, { 10, 232 }, { 10, 234 }, { 10, 236 }, { 10, 238 }, { 10, 240 }, { 10, 242 }, { 10, 244 }, { 10, 246 }, { 10, 248 }, { 10, 250 }, { 10, 252 }, { 10, 254 }, { 10, 256 }, { 10, 258 }, { 10, 260 }, { 10, 262 }, { 10, 264 }, { 10, 266 }, { 10, 268 }, { 10, 270 }, { 10, 272 }, { 10, 274 }, { 10, 276 }, { 10, 278 }, { 10, 280 }, { 10, 282 }, { 10, 284 }, { 10, 286 }, { 10, 288 }, { 10, 290 }, { 10, 292 }, { 10, 294 }, { 10, 296 }, { 10, 298 }, { 10, 300 }, { 10, 302 }, { 10, 304 }, { 10, 306 }, { 10, 308 }, { 10, 310 }, { 10, 312 }, { 10, 314 }, { 10, 316 }, { 10, 318 }, { 10, 320 }, { 10, 322 }, { 10, 324 }, { 10, 326 }, { 10, 328 }, { 10, 330 }, { 10, 332 }, { 10, 334 }, { 10, 336 }, { 10, 338 }, { 10, 340 }, { 10, 342 }, { 10, 344 }, { 10, 346 }, { 10, 348 }, { 10, 350 }, { 10, 352 }, { 10, 354 }, { 10, 356 }, { 10, 358 }, { 10, 360 }, { 10, 362 }, { 10, 364 }, { 10, 366 }, { 10, 368 }, { 10, 370 }, { 10, 372 }, { 10, 374 }, { 10, 376 }, { 10, 378 }, { 10, 380 }, { 10, 382 }, { 10, 384 }, { 10, 386 }, { 10, 388 }, { 10, 390 }, { 10, 392 }, { 10, 394 }, { 10, 396 }, { 10, 398 }, { 10, 400 }, { 10, 402 }, { 10, 404 }, { 10, 406 }, { 10, 408 }, { 10, 410 }, { 10, 412 }, { 10, 414 }, { 10, 416 }, { 10, 418 }, { 10, 420 }, { 10, 422 }, { 10, 424 }, { 10, 426 }, { 10, 428 }, { 10, 430 }, { 10, 432 }, { 10, 434 }, { 10, 436 }, { 10, 438 }, { 10, 440 }, { 10, 442 }, { 10, 444 }, { 10, 446 }, { 10, 448 }, { 10, 450 }, { 10, 452 }, { 10, 454 }, { 10, 456 }, { 10, 458 }, { 10, 460 }, { 10, 462 }, { 10, 464 }, { 10, 466 }, { 10, 468 }, { 10, 470 }, { 10, 472 }, { 10, 474 }, { 10, 476 }, { 10, 478 }, { 10, 480 }, { 10, 482 }, { 10, 484 }, { 10, 486 }, { 10, 488 }, { 10, 490 }, { 10, 492 }, { 10, 494 }, { 10, 496 }, { 10, 498 }, { 10, 500 }, { 10, 502 }, { 10, 504 }, { 10, 506 }, { 10, 508 }, { 10, 510 }, { 10, 512 }, { 10, 514 }, { 10, 516 }, { 10, 518 }, { 10, 520 }, { 10, 522 }, { 10, 524 }, { 10, 526 }, { 10, 528 }, { 10, 530 }, { 10, 532 }, { 10, 534 }, { 10, 536 }, { 10, 538 }, { 10, 540 }, { 10, 542 }, { 10, 544 }, { 10, 546 }, { 10, 548 }, { 10, 550 }, { 10, 552 }, { 10, 554 }, { 10, 556 }, { 10, 558 }, { 10, 560 }, { 10, 562 }, { 10, 564 }, { 10, 566 }, { 10, 568 }, { 10, 570 }, { 10, 572 }, { 10, 574 }, { 10, 576 }, { 10, 578 }, { 10, 580 }, { 10, 582 }, { 10, 584 }, { 10, 586 }, { 10, 588 }, { 10, 590 }, { 10, 592 }, { 10, 594 }, { 10, 596 }, { 10, 598 }, { 10, 600 }, { 10, 602 }, { 10, 604 }, { 10, 606 }, { 10, 608 }, { 10, 610 }, { 10, 612 }, { 10, 614 }, { 10, 616 }, { 10, 618 }, { 10, 620 }, { 10, 622 }, { 10, 624 }, { 10, 626 }, { 10, 628 }, { 10, 630 }, { 10, 632 }, { 10, 634 }, { 10, 636 }, { 10, 638 }, { 10, 640 }, { 10, 642 }, { 10, 644 }, { 10, 646 }, { 10, 648 }, { 10, 650 }, { 10, 652 }, { 10, 654 }, { 10, 656 }, { 10, 658 }, { 10, 660 }, { 10, 662 }, { 10, 664 }, { 10, 666 }, { 10, 668 }, { 10, 670 }, { 10, 672 }, { 10, 674 }, { 10, 676 }, { 10, 678 }, { 10, 680 }, { 10, 682 }, { 10, 684 }, { 10, 686 }, { 10, 688 }, { 10, 690 }, { 10, 692 }, { 10, 694 }, { 10, 696 }, { 10, 698 }, { 10, 700 }, { 10, 702 }, { 10, 704 }, { 10, 706 }, { 10, 708 }, { 10, 710 }, { 10, 712 }, { 10, 714 }, { 10, 716 }, { 10, 718 }, { 10, 720 }, { 10, 722 }, { 10, 724 }, { 10, 726 }, { 10, 728 }, { 10, 730 }, { 10, 732 }, { 10, 734 }, { 10, 736 }, { 10, 738 }, { 10, 740 }, { 10, 742 }, { 10, 744 }, { 10, 746 }, { 10, 748 }, { 10, 750 }, { 10, 752 }, { 10, 754 }, { 10, 756 }, { 10, 758 }, { 10, 760 }, { 10, 762 }, { 10, 764 }, { 10, 766 }, { 10, 768 }, { 10, 770 }, { 10, 772 }, { 10, 774 }, { 10, 776 }, { 10, 778 }, { 10, 780 }, { 10, 782 }, { 10, 784 }, { 10, 786 }, { 10, 788 }, { 10, 790 }, { 10, 792 }, { 10, 794 }, { 10, 796 }, { 10, 798 }, { 10, 800 }, { 10, 802 }, { 10, 804 }, { 10, 806 }, { 10, 808 }, { 10, 810 }, { 10, 812 }, { 10, 814 }, { 10, 816 }, { 10, 818 }, { 10, 820 }, { 10, 822 }, { 10, 824 }, { 10, 826 }, { 10, 828 }, { 10, 830 }, { 10, 832 }, { 10, 834 }, { 10, 836 }, { 10, 838 }, { 10, 840 }, { 10, 842 }, { 10, 844 }, { 10, 846 }, { 10, 848 }, { 10, 850 }, { 10, 852 }, { 10, 854 }, { 10, 856 }, { 10, 858 }, { 10, 860 }, { 10, 862 }, { 10, 864 }, { 10, 866 }, { 10, 868 }, { 10, 870 }, { 10, 872 }, { 10, 874 }, { 10, 876 }, { 10, 878 }, { 10, 880 }, { 10, 882 }, { 10, 884 }, { 10, 886 }, { 10, 888 }, { 10, 890 }, { 10, 892 }, { 10, 894 }, { 10, 896 }, { 10, 898 }, { 10, 900 }, { 10, 902 }, { 10, 904 }, { 10, 906 }, { 10, 908 }, { 10, 910 }, { 10, 912 }, { 10, 914 }, { 10, 916 }, { 10, 918 }, { 10, 920 }, { 10, 922 }, { 10, 924 }, { 10, 926 }, { 10, 928 }, { 10, 930 }, { 10, 932 }, { 10, 934 }, { 10, 936 }, { 10, 938 }, { 10, 940 }, { 10, 942 }, { 10, 944 }, { 10, 946 }, { 10, 948 }, { 10, 950 }, { 10, 952 }, { 10, 954 }, { 10, 956 }, { 10, 958 }, { 10, 960 }, { 10, 962 }, { 10, 964 }, { 10, 966 }, { 10, 968 }, { 10, 970 }, { 10, 972 }, { 10, 974 }, { 10, 976 }, { 10, 978 }, { 10, 980 }, { 10, 982 }, { 10, 984 }, { 10, 986 }, { 10, 988 }, { 10, 990 }, { 10, 992 }, { 10, 994 }, { 10, 996 }, { 10, 998 }, { 10, 1000 }, { 10, 1002 }, { 10, 1004 }, { 10, 1006 }, { 10, 1008 }, { 10, 1010 }, { 10, 1012 }, { 10, 1014 }, { 10, 1016 }, { 10, 1018 }, { 10, 1020 }, { 10, 1022 }, { 10, 1024 }, { 10, 1026 }, { 10, 1028 }, { 10, 1030 }, { 10, 1032 }, { 10, 1034 }, { 10, 1036 }, { 10, 1038 }, { 10, 1040 }, { 10, 1042 }, { 10, 1044 }, { 10, 1046 }, { 10, 1048 }, { 10, 1050 }, { 10, 1052 }, { 10, 1054 }, { 10, 1056 }, { 10, 1058 }, { 10, 1060 }, { 10, 1062 }, { 10, 1064 }, { 10, 1066 }, { 10, 1068 }, { 10, 1070 }, { 10, 1072 }, { 10, 1074 }, { 10, 1076 }, { 10, 1078 }, { 10, 1080 }, { 10, 1082 }, { 10, 1084 }, { 10, 1086 }, { 10, 1088 }, { 10, 1090 }, { 10, 1092 }, { 10, 1094 }, { 10, 1096 }, { 10, 1098 }, { 10, 1100 }, { 10, 1102 }, { 10, 1104 }, { 10, 1106 }, { 10, 1108 }, { 10, 1110 }, { 10, 1112 }, { 10, 1114 }, { 10, 1116 }, { 10, 1118 }, { 10, 1120 }, { 10, 1122 }, { 10, 1124 }, { 10, 1126 }, { 10, 1128 }, { 10, 1130 }, { 10, 1132 }, { 10, 1134 }, { 10, 1136 }, { 10, 1138 }, { 10, 1140 }, { 10, 1142 }, { 10, 1144 }, { 10, 1146 }, { 10, 1148 }, { 10, 1150 }, { 10, 1152 }, { 10, 1154 }, { 10, 1156 }, { 10, 1158 }, { 10, 1160 }, { 10, 1162 }, { 10, 1164 }, { 10, 1166 }, { 10, 1168 }, { 10, 1170 }, { 10, 1172 }, { 10, 1174 }, { 10, 1176 }, { 10, 1178 }, { 10, 1180 }, { 10, 1182 }, { 10, 1184 }, { 10, 1186 }, { 10, 1188 }, { 10, 1190 }, { 10, 1192 }, { 10, 1194 }, { 10, 1196 }, { 10, 1198 }, { 10, 1200 }, { 10, 1202 }, { 10, 1204 }, { 10, 1206 }, { 10, 1208 }, { 10, 1210 }, { 10, 1212 }, { 10, 1214 }, { 10, 1216 }, { 10, 1218 }, { 10, 1220 }, { 10, 1222 }, { 10, 1224 }, { 10, 1226 }, { 10, 1228 }, { 10, 1230 }, { 10, 1232 }, { 10, 1234 }, { 10, 1236 }, { 10, 1238 }, { 10, 1240 }, { 10, 1242 }, { 10, 1244 }, { 10, 1246 }, { 10, 1248 }, { 10, 1250 }, { 10, 1252 }, { 10, 1254 }, { 10, 1256 }, { 10, 1258 }, { 10, 1260 }, { 10, 1262 }, { 10, 1264 }, { 10, 1266 }, { 10, 1268 }, { 10, 1270 }, { 10, 1272 }, { 10, 1274 }, { 10, 1276 }, { 10, 1278 }, { 10, 1280 }, { 10, 1282 }, { 10, 1284 }, { 10, 1286 }, { 10, 1288 }, { 10, 1290 }, { 10, 1292 }, { 10, 1294 }, { 10, 1296 }, { 10, 1298 }, { 10, 1300 }, { 10, 1302 }, { 10, 1304 }, { 10, 1306 }, { 10, 1308 }, { 10, 1310 }, { 10, 1312 }, { 10, 1314 }, { 10, 1316 }, { 10, 1318 }, { 10, 1320 }, { 10, 1322 }, { 10, 1324 }, { 10, 1326 }, { 10, 1328 }, { 10, 1330 }, { 10, 1332 }, { 10, 1334 }, { 10, 1336 }, { 10, 1338 }, { 10, 1340 }, { 10, 1342 }, { 10, 1344 }, { 10, 1346 }, { 10, 1348 }, { 10, 1350 }, { 10, 1352 }, { 10, 1354 }, { 10, 1356 }, { 10, 1358 }, { 10, 1360 }, { 10, 1362 }, { 10, 1364 }, { 10, 1366 }, { 10, 1368 }, { 10, 1370 }, { 10, 1372 }, { 10, 1374 }, { 10, 1376 }, { 10, 1378 }, { 10, 1380 }, { 10, 1382 }, { 10, 1384 }, { 10, 1386 }, { 10, 1388 }, { 10, 1390 }, { 10, 1392 }, { 10, 1394 }, { 10, 1396 }, { 10, 1398 }, { 10, 1400 }, { 10, 1402 }, { 10, 1404 }, { 10, 1406 }, { 10, 1408 }, { 10, 1410 }, { 10, 1412 }, { 10, 1414 }, { 10, 1416 }, { 10, 1418 }, { 10, 1420 }, { 10, 1422 }, { 10, 1424 }, { 10, 1426 }, { 10, 1428 }, { 10, 1430 }, { 10, 1432 }, { 10, 1434 }, { 10, 1436 }, { 10, 1438 }, { 10, 1440 }, { 10, 1442 }, { 10, 1444 }, { 10, 1446 }, { 10, 1448 }, { 10, 1450 }, { 10, 1452 }, { 10, 1454 }, { 10, 1456 }, { 10, 1458 }, { 10, 1460 }, { 10, 1462 }, { 10, 1464 }, { 10, 1466 }, { 10, 1468 }, { 10, 1470 }, { 10, 1472 }, { 10, 1474 }, { 10, 1476 }, { 10, 1478 }, { 10, 1480 }, { 10, 1482 }, { 10, 1484 }, { 10, 1486 }, { 10, 1488 }, { 10, 1490 }, { 10, 1492 }, { 10, 1494 }, { 10, 1496 }, { 10, 1498 }, { 10, 1500 }, { 10, 1502 }, { 10, 1504 }, { 10, 1506 }, { 10, 1508 }, { 10, 1510 }, { 10, 1512 }, { 10, 1514 }, { 10, 1516 }, { 10, 1518 }, { 10, 1520 }, { 10, 1522 }, { 10, 1524 }, { 10, 1526 }, { 10, 1528 }, { 10, 1530 }, { 10, 1532 }, { 10, 1534 }, { 10, 1536 }, { 10, 1538 }, { 10, 1540 }, { 10, 1542 }, { 10, 1544 }, { 10, 1546 }, { 10, 1548 }, { 10, 1550 }, { 10, 1552 }, { 10, 1554 }, { 10, 1556 }, { 10, 1558 }, { 10, 1560 }, { 10, 1562 }, { 10, 1564 }, { 10, 1566 }, { 10, 1568 }, { 10, 1570 }, { 10, 1572 }, { 10, 1574 }, { 10, 1576 }, { 10, 1578 }, { 10, 1580 }, { 10, 1582 }, { 10, 1584 }, { 10, 1586 }, { 10, 1588 }, { 10, 1590 }, { 10, 1592 }, { 10, 1594 }, { 10, 1596 }, { 10, 1598 }, { 10, 1600 }, { 10, 1602 }, { 10, 1604 }, { 10, 1606 }, { 10, 1608 }, { 10, 1610 }, { 10, 1612 }, { 10, 1614 }, { 10, 1616 }, { 10, 1618 }, { 10, 1620 }, { 10, 1622 }, { 10, 1624 }, { 10, 1626 }, { 10, 1628 }, { 10, 1630 }, { 10, 1632 }, { 10, 1634 }, { 10, 1636 }, { 10, 1638 }, { 10, 1640 }, { 10, 1642 }, { 10, 1644 }, { 10, 1646 }, { 10, 1648 }, { 10, 1650 }, { 10, 1652 }, { 10, 1654 }, { 10, 1656 }, { 10, 1658 }, { 10, 1660 }, { 10, 1662 }, { 10, 1664 }, { 10, 1666 }, { 10, 1668 }, { 10, 1670 }, { 10, 1672 }, { 10, 1674 }, { 10, 1676 }, { 10, 1678 }, { 10, 1680 }, { 10, 1682 }, { 10, 1684 }, { 10, 1686 }, { 10, 1688 }, { 10, 1690 }, { 10, 1692 }, { 10, 1694 }, { 10, 1696 }, { 10, 1698 }, { 10, 1700 }, { 10, 1702 }, { 10, 1704 }, { 10, 1706 }, { 10, 1708 }, { 10, 1710 }, { 10, 1712 }, { 10, 1714 }, { 10, 1716 }, { 10, 1718 }, { 10, 1720 }, { 10, 1722 }, { 10, 1724 }, { 10, 1726 }, { 10, 1728 }, { 10, 1730 }, { 10, 1732 }, { 10, 1734 }, { 10, 1736 }, { 10, 1738 }, { 10, 1740 }, { 10, 1742 }, { 10, 1744 }, { 10, 1746 }, { 10, 1748 }, { 10, 1750 }, { 10, 1752 }, { 10, 1754 }, { 10, 1756 }, { 10, 1758 }, { 10, 1760 }, { 10, 1762 }, { 10, 1764 }, { 10, 1766 }, { 10, 1768 }, { 10, 1770 }, { 10, 1772 }, { 10, 1774 }, { 10, 1776 }, { 10, 1778 }, { 10, 1780 }, { 10, 1782 }, { 10, 1784 }, { 10, 1786 }, { 10, 1788 }, { 10, 1790 }, { 10, 1792 }, { 10, 1794 }, { 10, 1796 }, { 10, 1798 }, { 10, 1800 }, { 10, 1802 }, { 10, 1804 }, { 10, 1806 }, { 10, 1808 }, { 10, 1810 }, { 10, 1812 }, { 10, 1814 }, { 10, 1816 }, { 10, 1818 }, { 10, 1820 }, { 10, 1822 }, { 10, 1824 }, { 10, 1826 }, { 10, 1828 }, { 10, 1830 }, { 10, 1832 }, { 10, 1834 }, { 10, 1836 }, { 10, 1838 }, { 10, 1840 }, { 10, 1842 }, { 10, 1844 }, { 10, 1846 }, { 10, 1848 }, { 10, 1850 }, { 10, 1852 }, { 10, 1854 }, { 10, 1856 }, { 10, 1858 }, { 10, 1860 }, { 10, 1862 }, { 10, 1864 }, { 10, 1866 }, { 10, 1868 }, { 10, 1870 }, { 10, 1872 }, { 10, 1874 }, { 10, 1876 }, { 10, 1878 }, { 10, 1880 }, { 10, 1882 }, { 10, 1884 }, { 10, 1886 }, { 10, 1888 }, { 10, 1890 }, { 10, 1892 }, { 10, 1894 }, { 10, 1896 }, { 10, 1898 }, { 10, 1900 }, { 10, 1902 }, { 10, 1904 }, { 10, 1906 }, { 10, 1908 }, { 10, 1910 }, { 10, 1912 }, { 10, 1914 }, { 10, 1916 }, { 10, 1918 }, { 10, 1920 }, { 10, 1922 }, { 10, 1924 }, { 10, 1926 }, { 10, 1928 }, { 10, 1930 }, { 10, 1932 }, { 10, 1934 }, { 10, 1936 }, { 10, 1938 }, { 10, 1940 }, { 10, 1942 }, { 10, 1944 }, { 10, 1946 }, { 10, 1948 }, { 10, 1950 }, { 10, 1952 }, { 10, 1954 }, { 10, 1956 }, { 10, 1958 }, { 10, 1960 }, { 10, 1962 }, { 10, 1964 }, { 10, 1966 }, { 10, 1968 }, { 10, 1970 }, { 10, 1972 }, { 10, 1974 }, { 10, 1976 }, { 10, 1978 }, { 10, 1980 }, { 10, 1982 }, { 10, 1984 }, { 10, 1986 }, { 10, 1988 }, { 10, 1990 }, { 10, 1992 }, { 10, 1994 }, { 10, 1996 }, { 10, 1998 }, { 10, 2000 }, { 10, 2002 }, { 10, 2004 }, { 10, 2006 }, { 10, 2008 }, { 10, 2010 }, { 10, 2012 }, { 10, 2014 }, { 10, 2016 }, { 10, 2018 }, { 10, 2020 }, { 10, 2022 }, { 10, 2024 }, { 10, 2026 }, { 10, 2028 }, { 10, 2030 }, { 10, 2032 }, { 10, 2034 }, { 10, 2036 }, { 10, 2038 }, { 10, 2040 }, { 10, 2042 }, { 10, 2044 }, { 10, 2046 }, { 10, 2048 }, { 10, 2050 }, { 10, 2052 }, { 10, 2054 }, { 10, 2056 }, { 10, 2058 }, { 10, 2060 }, { 10, 2062 }, { 10, 2064 }, { 10, 2066 }, { 10, 2068 }, { 10, 2070 }, { 10, 2072 }, { 10, 2074 }, { 10, 2076 }, { 10, 2078 }, { 10, 2080 }, { 10, 2082 }, { 10, 2084 }, { 10, 2086 }, { 10, 2088 }, { 10, 2090 }, { 10, 2092 }, { 10, 2094 }, { 10, 2096 }, { 10, 2098 }, { 10, 2100 }, { 10, 2102 }, { 10, 2104 }, { 10, 2106 }, { 10, 2108 }, { 10, 2110 }, { 10, 2112 }, { 10, 2114 }, { 10, 2116 }, { 10, 2118 }, { 10, 2120 }, { 10, 2122 }, { 10, 2124 }, { 10, 2126 }, { 10, 2128 }, { 10, 2130 }, { 10, 2132 }, { 10, 2134 }, { 10, 2136 }, { 10, 2138 }, { 10, 2140 }, { 10, 2142 }, { 10, 2144 }, { 10, 2146 }, { 10, 2148 }, { 10, 2150 }, { 10, 2152 }, { 10, 2154 }, { 10, 2156 }, { 10, 2158 }, { 10, 2160 }, { 10, 2162 }, { 10, 2164 }, { 10, 2166 }, { 10, 2168 }, { 10, 2170 }, { 10, 2172 }, { 10, 2174 }, { 10, 2176 }, { 10, 2178 }, { 10, 2180 }, { 10, 2182 }, { 10, 2184 }, { 10, 2186 }, { 10, 2188 }, { 10, 2190 }, { 10, 2192 }, { 10, 2194 }, { 10, 2196 }, { 10, 2198 }, { 10, 2200 }, { 10, 2202 }, { 10, 2204 }, { 10, 2206 }, { 10, 2208 }, { 10, 2210 }, { 10, 2212 }, { 10, 2214 }, { 10, 2216 }, { 10, 2218 }, { 10, 2220 }, { 10, 2222 }, { 10, 2224 }, { 10, 2226 }, { 10, 2228 }, { 10, 2230 }, { 10, 2232 }, { 10, 2234 }, { 10, 2236 }, { 10, 2238 }, { 10, 2240 }, { 10, 2242 }, { 10, 2244 }, { 10, 2246 }, { 10, 2248 }, { 10, 2250 }, { 10, 2252 }, { 10, 2254 }, { 10, 2256 }, { 10, 2258 }, { 10, 2260 }, { 10, 2262 }, { 10, 2264 }, { 10, 2266 }, { 10, 2268 }, { 10, 2270 }, { 10, 2272 }, { 10, 2274 }, { 10, 2276 }, { 10, 2278 }, { 10, 2280 }, { 10, 2282 }, { 10, 2284 }, { 10, 2286 }, { 10, 2288 }, { 10, 2290 }, { 10, 2292 }, { 10, 2294 }, { 10, 2296 }, { 10, 2298 }, { 10, 2300 }, { 10, 2302 }, { 10, 2304 }, { 10, 2306 }, { 10, 2308 }, { 10, 2310 }, { 10, 2312 }, { 10, 2314 }, { 10, 2316 }, { 10, 2318 }, { 10, 2320 }, { 10, 2322 }, { 10, 2324 }, { 10, 2326 }, { 10, 2328 }, { 10, 2330 }, { 10, 2332 }, { 10, 2334 }, { 10, 2336 }, { 10, 2338 }, { 10, 2340 }, { 10, 2342 }, { 10, 2344 }, { 10, 2346 }, { 10, 2348 }, { 10, 2350 }, { 10, 2352 }, { 10, 2354 }, { 10, 2356 }, { 10, 2358 }, { 10, 2360 }, { 10, 2362 }, { 10, 2364 }, { 10, 2366 }, { 10, 2368 }, { 10, 2370 }, { 10, 2372 }, { 10, 2374 }, { 10, 2376 }, { 10, 2378 }, { 10, 2380 }, { 10, 2382 }, { 10, 2384 }, { 10, 2386 }, { 10, 2388 }, { 10, 2390 }, { 10, 2392 }, { 10, 2394 }, { 10, 2396 }, { 10, 2398 }, { 10, 2400 }, { 10, 2402 }, { 10, 2404 }, { 10, 2406 }, { 10, 2408 }, { 10, 2410 }, { 10, 2412 }, { 10, 2414 }, { 10, 2416 }, { 10, 2418 }, { 10, 2420 }, { 10, 2422 }, { 10, 2424 }, { 10, 2426 }, { 10, 2428 }, { 10, 2430 }, { 10, 2432 }, { 10, 2434 }, { 10, 2436 }, { 10, 2438 }, { 10, 2440 }, { 10, 2442 }, { 10, 2444 }, { 10, 2446 }, { 10, 2448 }, { 10, 2450 }, { 10, 2452 }, { 10, 2454 }, { 10, 2456 }, { 10, 2458 }, { 10, 2460 }, { 10, 2462 }, { 10, 2464 }, { 10, 2466 }, { 10, 2468 }, { 10, 2470 }, { 10, 2472 }, { 10, 2474 }, { 10, 2476 }, { 10, 2478 }, { 10, 2480 }, { 10, 2482 }, { 10, 2484 }, { 10, 2486 }, { 10, 2488 }, { 10, 2490 }, { 10, 2492 }, { 10, 2494 }, { 10, 2496 }, { 10, 2498 }, { 10, 2500 }, { 10, 2502 }, { 10, 2504 }, { 10, 2506 }, { 10, 2508 }, { 10, 2510 }, { 10, 2512 }, { 10, 2514 }, { 10, 2516 }, { 10, 2518 }, { 10, 2520 }, { 10, 2522 }, { 10, 2524 }, { 10, 2526 }, { 10, 2528 }, { 10, 2530 }, { 10, 2532 }, { 10, 2534 }, { 10, 2536 }, { 10, 2538 }, { 10, 2540 }, { 10, 2542 }, { 10, 2544 }, { 10, 2546 }, { 10, 2548 }, { 10, 2550 }, { 10, 2552 }, { 10, 2554 }, { 10, 2556 }, { 10, 2558 }, { 10, 2560 }, { 10, 2562 }, { 10, 2564 }, { 10, 2566 }, { 10, 2568 }, { 10, 2570 }, { 10, 2572 }, { 10, 2574 }, { 10, 2576 }, { 10, 2578 }, { 10, 2580 }, { 10, 2582 }, { 10, 2584 }, { 10, 2586 }, { 10, 2588 }, { 10, 2590 }, { 10, 2592 }, { 10, 2594 }, { 10, 2596 }, { 10, 2598 }, { 10, 2600 }, { 10, 2602 }, { 10, 2604 }, { 10, 2606 }, { 10, 2608 }, { 10, 2610 }, { 10, 2612 }, { 10, 2614 }, { 10, 2616 }, { 10, 2618 }, { 10, 2620 }, { 10, 2622 }, { 10, 2624 }, { 10, 2626 }, { 10, 2628 }, { 10, 2630 }, { 10, 2632 }, { 10, 2634 }, { 10, 2636 }, { 10, 2638 }, { 10, 2640 }, { 10, 2642 }, { 10, 2644 }, { 10, 2646 }, { 10, 2648 }, { 10, 2650 }, { 10, 2652 }, { 10, 2654 }, { 10, 2656 }, { 10, 2658 }, { 10, 2660 }, { 10, 2662 }, { 10, 2664 }, { 10, 2666 }, { 10, 2668 }, { 10, 2670 }, { 10, 2672 }, { 10, 2674 }, { 10, 2676 }, { 10, 2678 }, { 10, 2680 }, { 10, 2682 }, { 10, 2684 }, { 10, 2686 }, { 10, 2688 }, { 10, 2690 }, { 10, 2692 }, { 10, 2694 }, { 10, 2696 }, { 10, 2698 }, { 10, 2700 }, { 10, 2702 }, { 10, 2704 }, { 10, 2706 }, { 10, 2708 }, { 10, 2710 }, { 10, 2712 }, { 10, 2714 }, { 10, 2716 }, { 10, 2718 }, { 10, 2720 }, { 10, 2722 }, { 10, 2724 }, { 10, 2726 }, { 10, 2728 }, { 10, 2730 }, { 10, 2732 }, { 10, 2734 }, { 10, 2736 }, { 10, 2738 }, { 10, 2740 }, { 10, 2742 }, { 10, 2744 }, { 10, 2746 }, { 10, 2748 }, { 10, 2750 }, { 10, 2752 }, { 10, 2754 }, { 10, 2756 }, { 10, 2758 }, { 10, 2760 }, { 10, 2762 }, { 10, 2764 }, { 10, 2766 }, { 10, 2768 }, { 10, 2770 }, { 10, 2772 }, { 10, 2774 }, { 10, 2776 }, { 10, 2778 }, { 10, 2780 }, { 10, 2782 }, { 10, 2784 }, { 10, 2786 }, { 10, 2788 }, { 10, 2790 }, { 10, 2792 }, { 10, 2794 }, { 10, 2796 }, { 10, 2798 }, { 10, 2800 }, { 10, 2802 }, { 10, 2804 }, { 10, 2806 }, { 10, 2808 }, { 10, 2810 }, { 10, 2812 }, { 10, 2814 }, { 10, 2816 }, { 10, 2818 }, { 10, 2820 }, { 10, 2822 }, { 10, 2824 }, { 10, 2826 }, { 10, 2828 }, { 10, 2830 }, { 10, 2832 }, { 10, 2834 }, { 10, 2836 }, { 10, 2838 }, { 10, 2840 }, { 10, 2842 }, { 10, 2844 }, { 10, 2846 }, { 10, 2848 }, { 10, 2850 }, { 10, 2852 }, { 10, 2854 }, { 10, 2856 }, { 10, 2858 }, { 10, 2860 }, { 10, 2862 }, { 10, 2864 }, { 10, 2866 }, { 10, 2868 }, { 10, 2870 }, { 10, 2872 }, { 10, 2874 }, { 10, 2876 }, { 10, 2878 }, { 10, 2880 }, { 10, 2882 }, { 10, 2884 }, { 10, 2886 }, { 10, 2888 }, { 10, 2890 }, { 10, 2892 }, { 10, 2894 }, { 10, 2896 }, { 10, 2898 }, { 10, 2900 }, { 10, 2902 }, { 10, 2904 }, { 10, 2906 }, { 10, 2908 }, { 10, 2910 }, { 10, 2912 }, { 10, 2914 }, { 10, 2916 }, { 10, 2918 }, { 10, 2920 }, { 10, 2922 }, { 10, 2924 }, { 10, 2926 }, { 10, 2928 }, { 10, 2930 }, { 10, 2932 }, { 10, 2934 }, { 10, 2936 }, { 10, 2938 }, { 10, 2940 }, { 10, 2942 }, { 10, 2944 }, { 10, 2946 }, { 10, 2948 }, { 10, 2950 }, { 10, 2952 }, { 10, 2954 }, { 10, 2956 }, { 10, 2958 }, { 10, 2960 }, { 10, 2962 }, { 10, 2964 }, { 10, 2966 }, { 10, 2968 }, { 10, 2970 }, { 10, 2972 }, { 10, 2974 }, { 10, 2976 }, { 10, 2978 }, { 10, 2980 }, { 10, 2982 }, { 10, 2984 }, { 10, 2986 }, { 10, 2988 }, { 10, 2990 }, { 10, 2992 }, { 10, 2994 }, { 10, 2996 }, { 10, 2998 }, { 10, 3000 }, { 10, 3002 }, { 10, 3004 }, { 10, 3006 }, { 10, 3008 }, { 10, 3010 }, { 10, 3012 }, { 10, 3014 }, { 10, 3016 }, { 10, 3018 }, { 10, 3020 }, { 10, 3022 }, { 10, 3024 }, { 10, 3026 }, { 10, 3028 }, { 10, 3030 }, { 10, 3032 }, { 10, 3034 }, { 10, 3036 }, { 10, 3038 }, { 10, 3040 }, { 10, 3042 }, { 10, 3044 }, { 10, 3046 }, { 10, 3048 }, { 10, 3050 }, { 10, 3052 }, { 10, 3054 }, { 10, 3056 }, { 10, 3058 }, { 10, 3060 }, { 10, 3062 }, { 10, 3064 }, { 10, 3066 }, { 10, 3068 }, { 10, 3070 }, { 10, 3072 }, { 10, 3074 }, { 10, 3076 }, { 10, 3078 }, { 10, 3080 }, { 10, 3082 }, { 10, 3084 }, { 10, 3086 }, { 10, 3088 }, { 10, 3090 }, { 10, 3092 }, { 10, 3094 }, { 10, 3096 }, { 10, 3098 }, { 10, 3100 }, { 10, 3102 }, { 10, 3104 }, { 10, 3106 }, { 10, 3108 }, { 10, 3110 }, { 10, 3112 }, { 10, 3114 }, { 10, 3116 }, { 10, 3118 }, { 10, 3120 }, { 10, 3122 }, { 10, 3124 }, { 10, 3126 }, { 10, 3128 }, { 10, 3130 }, { 10, 3132 }, { 10, 3134 }, { 10, 3136 }, { 10, 3138 }, { 10, 3140 }, { 10, 3142 }, { 10, 3144 }, { 10, 3146 }, { 10, 3148 }, { 10, 3150 }, { 10, 3152 }, { 10, 3154 }, { 10, 3156 }, { 10, 3158 }, { 10, 3160 }, { 10, 3162 }, { 10, 3164 }, { 10, 3166 }, { 10, 3168 }, { 10, 3170 }, { 10, 3172 }, { 10, 3174 }, { 10, 3176 }, { 10, 3178 }, { 10, 3180 }, { 10, 3182 }, { 10, 3184 }, { 10, 3186 }, { 10, 3188 }, { 10, 3190 }, { 10, 3192 }, { 10, 3194 }, { 10, 3196 }, { 10, 3198 }, { 10, 3200 }, { 10, 3202 }, { 10, 3204 }, { 10, 3206 }, { 10, 3208 }, { 10, 3210 }, { 10, 3212 }, { 10, 3214 }, { 10, 3216 }, { 10, 3218 }, { 10, 3220 }, { 10, 3222 }, { 10, 3224 }, { 10, 3226 }, { 10, 3228 }, { 10, 3230 }, { 10, 3232 }, { 10, 3234 }, { 10, 3236 }, { 10, 3238 }, { 10, 3240 }, { 10, 3242 }, { 10, 3244 }, { 10, 3246 }, { 10, 3248 }, { 10, 3250 }, { 10, 3252 }, { 10, 3254 }, { 10, 3256 }, { 10, 3258 }, { 10, 3260 }, { 10, 3262 }, { 10, 3264 }, { 10, 3266 }, { 10, 3268 }, { 10, 3270 }, { 10, 3272 }, { 10, 3274 }, { 10, 3276 }, { 10, 3278 }, { 10, 3280 }, { 10, 3282 }, { 10, 3284 }, { 10, 3286 }, { 10, 3288 }, { 10, 3290 }, { 10, 3292 }, { 10, 3294 }, { 10, 3296 }, { 10, 3298 }, { 10, 3300 }, { 10, 3302 }, { 10, 3304 }, { 10, 3306 }, { 10, 3308 }, { 10, 3310 }, { 10, 3312 }, { 10, 3314 }, { 10, 3316 }, { 10, 3318 }, { 10, 3320 }, { 10, 3322 }, { 10, 3324 }, { 10, 3326 }, { 10, 3328 }, { 10, 3330 }, { 10, 3332 }, { 10, 3334 }, { 10, 3336 }, { 10, 3338 }, { 10, 3340 }, { 10, 3342 }, { 10, 3344 }, { 10, 3346 }, { 10, 3348 }, { 10, 3350 }, { 10, 3352 }, { 10, 3354 }, { 10, 3356 }, { 10, 3358 }, { 10, 3360 }, { 10, 3362 }, { 10, 3364 }, { 10, 3366 }, { 10, 3368 }, { 10, 3370 }, { 10, 3372 }, { 10, 3374 }, { 10, 3376 }, { 10, 3378 }, { 10, 3380 }, { 10, 3382 }, { 10, 3384 }, { 10, 3386 }, { 10, 3388 }, { 10, 3390 }, { 10, 3392 }, { 10, 3394 }, { 10, 3396 }, { 10, 3398 }, { 10, 3400 }, { 10, 3402 }, { 10, 3404 }, { 10, 3406 }, { 10, 3408 }, { 10, 3410 }, { 10, 3412 }, { 10, 3414 }, { 10, 3416 }, { 10, 3418 }, { 10, 3420 }, { 10, 3422 }, { 10, 3424 }, { 10, 3426 }, { 10, 3428 }, { 10, 3430 }, { 10, 3432 }, { 10, 3434 }, { 10, 3436 }, { 10, 3438 }, { 10, 3440 }, { 10, 3442 }, { 10, 3444 }, { 10, 3446 }, { 10, 3448 }, { 10, 3450 }, { 10, 3452 }, { 10, 3454 }, { 10, 3456 }, { 10, 3458 }, { 10, 3460 }, { 10, 3462 }, { 10, 3464 }, { 10, 3466 }, { 10, 3468 }, { 10, 3470 }, { 10, 3472 }, { 10, 3474 }, { 10, 3476 }, { 10, 3478 }, { 10, 3480 }, { 10, 3482 }, { 10, 3484 }, { 10, 3486 }, { 10, 3488 }, { 10, 3490 }, { 10, 3492 }, { 10, 3494 }, { 10, 3496 }, { 10, 3498 }, { 10, 3500 }, { 10, 3502 }, { 10, 3504 }, { 10, 3506 }, { 10, 3508 }, { 10, 3510 }, { 10, 3512 }, { 10, 3514 }, { 10, 3516 }, { 10, 3518 }, { 10, 3520 }, { 10, 3522 }, { 10, 3524 }, { 10, 3526 }, { 10, 3528 }, { 10, 3530 }, { 10, 3532 }, { 10, 3534 }, { 10, 3536 }, { 10, 3538 }, { 10, 3540 }, { 10, 3542 }, { 10, 3544 }, { 10, 3546 }, { 10, 3548 }, { 10, 3550 }, { 10, 3552 }, { 10, 3554 }, { 10, 3556 }, { 10, 3558 }, { 10, 3560 }, { 10, 3562 }, { 10, 3564 }, { 10, 3566 }, { 10, 3568 }, { 10, 3570 }, { 10, 3572 }, { 10, 3574 }, { 10, 3576 }, { 10, 3578 }, { 10, 3580 }, { 10, 3582 }, { 10, 3584 }, { 10, 3586 }, { 10, 3588 }, { 10, 3590 }, { 10, 3592 }, { 10, 3594 }, { 10, 3596 }, { 10, 3598 }, { 10, 3600 }, { 10, 3602 }, { 10, 3604 }, { 10, 3606 }, { 10, 3608 }, { 10, 3610 }, { 10, 3612 }, { 10, 3614 }, { 10, 3616 }, { 10, 3618 }, { 10, 3620 }, { 10, 3622 }, { 10, 3624 }, { 10, 3626 }, { 10, 3628 }, { 10, 3630 }, { 10, 3632 }, { 10, 3634 }, { 10, 3636 }, { 10, 3638 }, { 10, 3640 }, { 10, 3642 }, { 10, 3644 }, { 10, 3646 }, { 10, 3648 }, { 10, 3650 }, { 10, 3652 }, { 10, 3654 }, { 10, 3656 }, { 10, 3658 }, { 10, 3660 }, { 10, 3662 }, { 10, 3664 }, { 10, 3666 }, { 10, 3668 }, { 10, 3670 }, { 10, 3672 }, { 10, 3674 }, { 10, 3676 }, { 10, 3678 }, { 10, 3680 }, { 10, 3682 }, { 10, 3684 }, { 10, 3686 }, { 10, 3688 }, { 10, 3690 }, { 10, 3692 }, { 10, 3694 }, { 10, 3696 }, { 10, 3698 }, { 10, 3700 }, { 10, 3702 }, { 10, 3704 }, { 10, 3706 }, { 10, 3708 }, { 10, 3710 }, { 10, 3712 }, { 10, 3714 }, { 10, 3716 }, { 10, 3718 }, { 10, 3720 }, { 10, 3722 }, { 10, 3724 }, { 10, 3726 }, { 10, 3728 }, { 10, 3730 }, { 10, 3732 }, { 10, 3734 }, { 10, 3736 }, { 10, 3738 }, { 10, 3740 }, { 10, 3742 }, { 10, 3744 }, { 10, 3746 }, { 10, 3748 }, { 10, 3750 }, { 10, 3752 }, { 10, 3754 }, { 10, 3756 }, { 10, 3758 }, { 10, 3760 }, { 10, 3762 }, { 10, 3764 }, { 10, 3766 }, { 10, 3768 }, { 10, 3770 }, { 10, 3772 }, { 10, 3774 }, { 10, 3776 }, { 10, 3778 }, { 10, 3780 }, { 10, 3782 }, { 10, 3784 }, { 10, 3786 }, { 10, 3788 }, { 10, 3790 }, { 10, 3792 }, { 10, 3794 }, { 10, 3796 }, { 10, 3798 }, { 10, 3800 }, { 10, 3802 }, { 10, 3804 }, { 10, 3806 }, { 10, 3808 }, { 10, 3810 }, { 10, 3812 }, { 10, 3814 }, { 10, 3816 }, { 10, 3818 }, { 10, 3820 }, { 10, 3822 }, { 10, 3824 }, { 10, 3826 }, { 10, 3828 }, { 10, 3830 }, { 10, 3832 }, { 10, 3834 }, { 10, 3836 }, { 10, 3838 }, { 10, 3840 }, { 10, 3842 }, { 10, 3844 }, { 10, 3846 }, { 10, 3848 }, { 10, 3850 }, { 10, 3852 }, { 10, 3854 }, { 10, 3856 }, { 10, 3858 }, { 10, 3860 }, { 10, 3862 }, { 10, 3864 }, { 10, 3866 }, { 10, 3868 }, { 10, 3870 }, { 10, 3872 }, { 10, 3874 }, { 10, 3876 }, { 10, 3878 }, { 10, 3880 }, { 10, 3882 }, { 10, 3884 }, { 10, 3886 }, { 10, 3888 }, { 10, 3890 }, { 10, 3892 }, { 10, 3894 }, { 10, 3896 }, { 10, 3898 }, { 10, 3900 }, { 10, 3902 }, { 10, 3904 }, { 10, 3906 }, { 10, 3908 }, { 10, 3910 }, { 10, 3912 }, { 10, 3914 }, { 10, 3916 }, { 10, 3918 }, { 10, 3920 }, { 10, 3922 }, { 10, 3924 }, { 10, 3926 }, { 10, 3928 }, { 10, 3930 }, { 10, 3932 }, { 10, 3934 }, { 10, 3936 }, { 10, 3938 }, { 10, 3940 }, { 10, 3942 }, { 10, 3944 }, { 10, 3946 }, { 10, 3948 }, { 10, 3950 }, { 10, 3952 }, { 10, 3954 }, { 10, 3956 }, { 10, 3958 }, { 10, 3960 } }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_DCT_VALUE_TOKENS_H_ libvpx-1.8.2/vp8/encoder/defaultcoefcounts.h000066400000000000000000000220721357355204000210570ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ #define VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ #ifdef __cplusplus extern "C" { #endif /* Generated file, included by entropy.c */ static const unsigned int default_coef_counts [BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS] = { { /* Block Type ( 0 ) */ { /* Coeff Band ( 0 ) */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }, { /* Coeff Band ( 1 ) */ { 30190, 26544, 225, 24, 4, 0, 0, 0, 0, 0, 0, 4171593 }, { 26846, 25157, 1241, 130, 26, 6, 1, 0, 0, 0, 0, 149987 }, { 10484, 9538, 1006, 160, 36, 18, 0, 0, 0, 0, 0, 15104 }, }, { /* Coeff Band ( 2 ) */ { 25842, 40456, 1126, 83, 11, 2, 0, 0, 0, 0, 0, 0 }, { 9338, 8010, 512, 73, 7, 3, 2, 0, 0, 0, 0, 43294 }, { 1047, 751, 149, 31, 13, 6, 1, 0, 0, 0, 0, 879 }, }, { /* Coeff Band ( 3 ) */ { 26136, 9826, 252, 13, 0, 0, 0, 0, 0, 0, 0, 0 }, { 8134, 5574, 191, 14, 2, 0, 0, 0, 0, 0, 0, 35302 }, { 605, 677, 116, 9, 1, 0, 0, 0, 0, 0, 0, 611 }, }, { /* Coeff Band ( 4 ) */ { 10263, 15463, 283, 17, 0, 0, 0, 0, 0, 0, 0, 0 }, { 2773, 2191, 128, 9, 2, 2, 0, 0, 0, 0, 0, 10073 }, { 134, 125, 32, 4, 0, 2, 0, 0, 0, 0, 0, 50 }, }, { /* Coeff Band ( 5 ) */ { 10483, 2663, 23, 1, 0, 0, 0, 0, 0, 0, 0, 0 }, { 2137, 1251, 27, 1, 1, 0, 0, 0, 0, 0, 0, 14362 }, { 116, 156, 14, 2, 1, 0, 0, 0, 0, 0, 0, 190 }, }, { /* Coeff Band ( 6 ) */ { 40977, 27614, 412, 28, 0, 0, 0, 0, 0, 0, 0, 0 }, { 6113, 5213, 261, 22, 3, 0, 0, 0, 0, 0, 0, 26164 }, { 382, 312, 50, 14, 2, 0, 0, 0, 0, 0, 0, 345 }, }, { /* Coeff Band ( 7 ) */ { 0, 26, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 319 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }, }, }, { /* Block Type ( 1 ) */ { /* Coeff Band ( 0 ) */ { 3268, 19382, 1043, 250, 93, 82, 49, 26, 17, 8, 25, 82289 }, { 8758, 32110, 5436, 1832, 827, 668, 420, 153, 24, 0, 3, 52914 }, { 9337, 23725, 8487, 3954, 2107, 1836, 1069, 399, 59, 0, 0, 18620 }, }, { /* Coeff Band ( 1 ) */ { 12419, 8420, 452, 62, 9, 1, 0, 0, 0, 0, 0, 0 }, { 11715, 8705, 693, 92, 15, 7, 2, 0, 0, 0, 0, 53988 }, { 7603, 8585, 2306, 778, 270, 145, 39, 5, 0, 0, 0, 9136 }, }, { /* Coeff Band ( 2 ) */ { 15938, 14335, 1207, 184, 55, 13, 4, 1, 0, 0, 0, 0 }, { 7415, 6829, 1138, 244, 71, 26, 7, 0, 0, 0, 0, 9980 }, { 1580, 1824, 655, 241, 89, 46, 10, 2, 0, 0, 0, 429 }, }, { /* Coeff Band ( 3 ) */ { 19453, 5260, 201, 19, 0, 0, 0, 0, 0, 0, 0, 0 }, { 9173, 3758, 213, 22, 1, 1, 0, 0, 0, 0, 0, 9820 }, { 1689, 1277, 276, 51, 17, 4, 0, 0, 0, 0, 0, 679 }, }, { /* Coeff Band ( 4 ) */ { 12076, 10667, 620, 85, 19, 9, 5, 0, 0, 0, 0, 0 }, { 4665, 3625, 423, 55, 19, 9, 0, 0, 0, 0, 0, 5127 }, { 415, 440, 143, 34, 20, 7, 2, 0, 0, 0, 0, 101 }, }, { /* Coeff Band ( 5 ) */ { 12183, 4846, 115, 11, 1, 0, 0, 0, 0, 0, 0, 0 }, { 4226, 3149, 177, 21, 2, 0, 0, 0, 0, 0, 0, 7157 }, { 375, 621, 189, 51, 11, 4, 1, 0, 0, 0, 0, 198 }, }, { /* Coeff Band ( 6 ) */ { 61658, 37743, 1203, 94, 10, 3, 0, 0, 0, 0, 0, 0 }, { 15514, 11563, 903, 111, 14, 5, 0, 0, 0, 0, 0, 25195 }, { 929, 1077, 291, 78, 14, 7, 1, 0, 0, 0, 0, 507 }, }, { /* Coeff Band ( 7 ) */ { 0, 990, 15, 3, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 412, 13, 0, 0, 0, 0, 0, 0, 0, 0, 1641 }, { 0, 18, 7, 1, 0, 0, 0, 0, 0, 0, 0, 30 }, }, }, { /* Block Type ( 2 ) */ { /* Coeff Band ( 0 ) */ { 953, 24519, 628, 120, 28, 12, 4, 0, 0, 0, 0, 2248798 }, { 1525, 25654, 2647, 617, 239, 143, 42, 5, 0, 0, 0, 66837 }, { 1180, 11011, 3001, 1237, 532, 448, 239, 54, 5, 0, 0, 7122 }, }, { /* Coeff Band ( 1 ) */ { 1356, 2220, 67, 10, 4, 1, 0, 0, 0, 0, 0, 0 }, { 1450, 2544, 102, 18, 4, 3, 0, 0, 0, 0, 0, 57063 }, { 1182, 2110, 470, 130, 41, 21, 0, 0, 0, 0, 0, 6047 }, }, { /* Coeff Band ( 2 ) */ { 370, 3378, 200, 30, 5, 4, 1, 0, 0, 0, 0, 0 }, { 293, 1006, 131, 29, 11, 0, 0, 0, 0, 0, 0, 5404 }, { 114, 387, 98, 23, 4, 8, 1, 0, 0, 0, 0, 236 }, }, { /* Coeff Band ( 3 ) */ { 579, 194, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 395, 213, 5, 1, 0, 0, 0, 0, 0, 0, 0, 4157 }, { 119, 122, 4, 0, 0, 0, 0, 0, 0, 0, 0, 300 }, }, { /* Coeff Band ( 4 ) */ { 38, 557, 19, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 21, 114, 12, 1, 0, 0, 0, 0, 0, 0, 0, 427 }, { 0, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7 }, }, { /* Coeff Band ( 5 ) */ { 52, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 18, 6, 0, 0, 0, 0, 0, 0, 0, 0, 0, 652 }, { 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 30 }, }, { /* Coeff Band ( 6 ) */ { 640, 569, 10, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 25, 77, 2, 0, 0, 0, 0, 0, 0, 0, 0, 517 }, { 4, 7, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3 }, }, { /* Coeff Band ( 7 ) */ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, }, }, { /* Block Type ( 3 ) */ { /* Coeff Band ( 0 ) */ { 2506, 20161, 2707, 767, 261, 178, 107, 30, 14, 3, 0, 100694 }, { 8806, 36478, 8817, 3268, 1280, 850, 401, 114, 42, 0, 0, 58572 }, { 11003, 27214, 11798, 5716, 2482, 2072, 1048, 175, 32, 0, 0, 19284 }, }, { /* Coeff Band ( 1 ) */ { 9738, 11313, 959, 205, 70, 18, 11, 1, 0, 0, 0, 0 }, { 12628, 15085, 1507, 273, 52, 19, 9, 0, 0, 0, 0, 54280 }, { 10701, 15846, 5561, 1926, 813, 570, 249, 36, 0, 0, 0, 6460 }, }, { /* Coeff Band ( 2 ) */ { 6781, 22539, 2784, 634, 182, 123, 20, 4, 0, 0, 0, 0 }, { 6263, 11544, 2649, 790, 259, 168, 27, 5, 0, 0, 0, 20539 }, { 3109, 4075, 2031, 896, 457, 386, 158, 29, 0, 0, 0, 1138 }, }, { /* Coeff Band ( 3 ) */ { 11515, 4079, 465, 73, 5, 14, 2, 0, 0, 0, 0, 0 }, { 9361, 5834, 650, 96, 24, 8, 4, 0, 0, 0, 0, 22181 }, { 4343, 3974, 1360, 415, 132, 96, 14, 1, 0, 0, 0, 1267 }, }, { /* Coeff Band ( 4 ) */ { 4787, 9297, 823, 168, 44, 12, 4, 0, 0, 0, 0, 0 }, { 3619, 4472, 719, 198, 60, 31, 3, 0, 0, 0, 0, 8401 }, { 1157, 1175, 483, 182, 88, 31, 8, 0, 0, 0, 0, 268 }, }, { /* Coeff Band ( 5 ) */ { 8299, 1226, 32, 5, 1, 0, 0, 0, 0, 0, 0, 0 }, { 3502, 1568, 57, 4, 1, 1, 0, 0, 0, 0, 0, 9811 }, { 1055, 1070, 166, 29, 6, 1, 0, 0, 0, 0, 0, 527 }, }, { /* Coeff Band ( 6 ) */ { 27414, 27927, 1989, 347, 69, 26, 0, 0, 0, 0, 0, 0 }, { 5876, 10074, 1574, 341, 91, 24, 4, 0, 0, 0, 0, 21954 }, { 1571, 2171, 778, 324, 124, 65, 16, 0, 0, 0, 0, 979 }, }, { /* Coeff Band ( 7 ) */ { 0, 29, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }, { 0, 23, 0, 0, 0, 0, 0, 0, 0, 0, 0, 459 }, { 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 13 }, }, }, }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_DEFAULTCOEFCOUNTS_H_ libvpx-1.8.2/vp8/encoder/denoising.c000066400000000000000000000656661357355204000173340ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "denoising.h" #include "vp8/common/reconinter.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #include "vp8_rtcd.h" static const unsigned int NOISE_MOTION_THRESHOLD = 25 * 25; /* SSE_DIFF_THRESHOLD is selected as ~95% confidence assuming * var(noise) ~= 100. */ static const unsigned int SSE_DIFF_THRESHOLD = 16 * 16 * 20; static const unsigned int SSE_THRESHOLD = 16 * 16 * 40; static const unsigned int SSE_THRESHOLD_HIGH = 16 * 16 * 80; /* * The filter function was modified to reduce the computational complexity. * Step 1: * Instead of applying tap coefficients for each pixel, we calculated the * pixel adjustments vs. pixel diff value ahead of time. * adjustment = filtered_value - current_raw * = (filter_coefficient * diff + 128) >> 8 * where * filter_coefficient = (255 << 8) / (256 + ((absdiff * 330) >> 3)); * filter_coefficient += filter_coefficient / * (3 + motion_magnitude_adjustment); * filter_coefficient is clamped to 0 ~ 255. * * Step 2: * The adjustment vs. diff curve becomes flat very quick when diff increases. * This allowed us to use only several levels to approximate the curve without * changing the filtering algorithm too much. * The adjustments were further corrected by checking the motion magnitude. * The levels used are: * diff adjustment w/o motion correction adjustment w/ motion correction * [-255, -16] -6 -7 * [-15, -8] -4 -5 * [-7, -4] -3 -4 * [-3, 3] diff diff * [4, 7] 3 4 * [8, 15] 4 5 * [16, 255] 6 7 */ int vp8_denoiser_filter_c(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { unsigned char *running_avg_y_start = running_avg_y; unsigned char *sig_start = sig; int sum_diff_thresh; int r, c; int sum_diff = 0; int adj_val[3] = { 3, 4, 6 }; int shift_inc1 = 0; int shift_inc2 = 1; int col_sum[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level. Add another increment for * blocks that are labeled for increase denoising. */ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { if (increase_denoising) { shift_inc1 = 1; shift_inc2 = 2; } adj_val[0] += shift_inc2; adj_val[1] += shift_inc2; adj_val[2] += shift_inc2; } for (r = 0; r < 16; ++r) { for (c = 0; c < 16; ++c) { int diff = 0; int adjustment = 0; int absdiff = 0; diff = mc_running_avg_y[c] - sig[c]; absdiff = abs(diff); // When |diff| <= |3 + shift_inc1|, use pixel value from // last denoised raw. if (absdiff <= 3 + shift_inc1) { running_avg_y[c] = mc_running_avg_y[c]; col_sum[c] += diff; } else { if (absdiff >= 4 + shift_inc1 && absdiff <= 7) { adjustment = adj_val[0]; } else if (absdiff >= 8 && absdiff <= 15) { adjustment = adj_val[1]; } else { adjustment = adj_val[2]; } if (diff > 0) { if ((sig[c] + adjustment) > 255) { running_avg_y[c] = 255; } else { running_avg_y[c] = sig[c] + adjustment; } col_sum[c] += adjustment; } else { if ((sig[c] - adjustment) < 0) { running_avg_y[c] = 0; } else { running_avg_y[c] = sig[c] - adjustment; } col_sum[c] -= adjustment; } } } /* Update pointers for next iteration. */ sig += sig_stride; mc_running_avg_y += mc_avg_y_stride; running_avg_y += avg_y_stride; } for (c = 0; c < 16; ++c) { // Below we clip the value in the same way which SSE code use. // When adopting aggressive denoiser, the adj_val for each pixel // could be at most 8 (this is current max adjustment of the map). // In SSE code, we calculate the sum of adj_val for // the columns, so the sum could be upto 128(16 rows). However, // the range of the value is -128 ~ 127 in SSE code, that's why // we do this change in C code. // We don't do this for UV denoiser, since there are only 8 rows, // and max adjustments <= 8, so the sum of the columns will not // exceed 64. if (col_sum[c] >= 128) { col_sum[c] = 127; } sum_diff += col_sum[c]; } sum_diff_thresh = SUM_DIFF_THRESHOLD; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; if (abs(sum_diff) > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), check // if we can still apply some (weaker) temporal filtering to this block, // that would otherwise not be denoised at all. Simplest is to apply // an additional adjustment to running_avg_y to bring it closer to sig. // The adjustment is capped by a maximum delta, and chosen such that // in most cases the resulting sum_diff will be within the // accceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over threshold. int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { sig -= sig_stride * 16; mc_running_avg_y -= mc_avg_y_stride * 16; running_avg_y -= avg_y_stride * 16; for (r = 0; r < 16; ++r) { for (c = 0; c < 16; ++c) { int diff = mc_running_avg_y[c] - sig[c]; int adjustment = abs(diff); if (adjustment > delta) adjustment = delta; if (diff > 0) { // Bring denoised signal down. if (running_avg_y[c] - adjustment < 0) { running_avg_y[c] = 0; } else { running_avg_y[c] = running_avg_y[c] - adjustment; } col_sum[c] -= adjustment; } else if (diff < 0) { // Bring denoised signal up. if (running_avg_y[c] + adjustment > 255) { running_avg_y[c] = 255; } else { running_avg_y[c] = running_avg_y[c] + adjustment; } col_sum[c] += adjustment; } } // TODO(marpan): Check here if abs(sum_diff) has gone below the // threshold sum_diff_thresh, and if so, we can exit the row loop. sig += sig_stride; mc_running_avg_y += mc_avg_y_stride; running_avg_y += avg_y_stride; } sum_diff = 0; for (c = 0; c < 16; ++c) { if (col_sum[c] >= 128) { col_sum[c] = 127; } sum_diff += col_sum[c]; } if (abs(sum_diff) > sum_diff_thresh) return COPY_BLOCK; } else { return COPY_BLOCK; } } vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } int vp8_denoiser_filter_uv_c(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { unsigned char *running_avg_start = running_avg; unsigned char *sig_start = sig; int sum_diff_thresh; int r, c; int sum_diff = 0; int sum_block = 0; int adj_val[3] = { 3, 4, 6 }; int shift_inc1 = 0; int shift_inc2 = 1; /* If motion_magnitude is small, making the denoiser more aggressive by * increasing the adjustment for each level. Add another increment for * blocks that are labeled for increase denoising. */ if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) { if (increase_denoising) { shift_inc1 = 1; shift_inc2 = 2; } adj_val[0] += shift_inc2; adj_val[1] += shift_inc2; adj_val[2] += shift_inc2; } // Avoid denoising color signal if its close to average level. for (r = 0; r < 8; ++r) { for (c = 0; c < 8; ++c) { sum_block += sig[c]; } sig += sig_stride; } if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) { return COPY_BLOCK; } sig -= sig_stride * 8; for (r = 0; r < 8; ++r) { for (c = 0; c < 8; ++c) { int diff = 0; int adjustment = 0; int absdiff = 0; diff = mc_running_avg[c] - sig[c]; absdiff = abs(diff); // When |diff| <= |3 + shift_inc1|, use pixel value from // last denoised raw. if (absdiff <= 3 + shift_inc1) { running_avg[c] = mc_running_avg[c]; sum_diff += diff; } else { if (absdiff >= 4 && absdiff <= 7) { adjustment = adj_val[0]; } else if (absdiff >= 8 && absdiff <= 15) { adjustment = adj_val[1]; } else { adjustment = adj_val[2]; } if (diff > 0) { if ((sig[c] + adjustment) > 255) { running_avg[c] = 255; } else { running_avg[c] = sig[c] + adjustment; } sum_diff += adjustment; } else { if ((sig[c] - adjustment) < 0) { running_avg[c] = 0; } else { running_avg[c] = sig[c] - adjustment; } sum_diff -= adjustment; } } } /* Update pointers for next iteration. */ sig += sig_stride; mc_running_avg += mc_avg_stride; running_avg += avg_stride; } sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV; if (abs(sum_diff) > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), check // if we can still apply some (weaker) temporal filtering to this block, // that would otherwise not be denoised at all. Simplest is to apply // an additional adjustment to running_avg_y to bring it closer to sig. // The adjustment is capped by a maximum delta, and chosen such that // in most cases the resulting sum_diff will be within the // accceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over threshold. int delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { sig -= sig_stride * 8; mc_running_avg -= mc_avg_stride * 8; running_avg -= avg_stride * 8; for (r = 0; r < 8; ++r) { for (c = 0; c < 8; ++c) { int diff = mc_running_avg[c] - sig[c]; int adjustment = abs(diff); if (adjustment > delta) adjustment = delta; if (diff > 0) { // Bring denoised signal down. if (running_avg[c] - adjustment < 0) { running_avg[c] = 0; } else { running_avg[c] = running_avg[c] - adjustment; } sum_diff -= adjustment; } else if (diff < 0) { // Bring denoised signal up. if (running_avg[c] + adjustment > 255) { running_avg[c] = 255; } else { running_avg[c] = running_avg[c] + adjustment; } sum_diff += adjustment; } } // TODO(marpan): Check here if abs(sum_diff) has gone below the // threshold sum_diff_thresh, and if so, we can exit the row loop. sig += sig_stride; mc_running_avg += mc_avg_stride; running_avg += avg_stride; } if (abs(sum_diff) > sum_diff_thresh) return COPY_BLOCK; } else { return COPY_BLOCK; } } vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride); return FILTER_BLOCK; } void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode) { assert(mode > 0); // Denoiser is allocated only if mode > 0. if (mode == 1) { denoiser->denoiser_mode = kDenoiserOnYOnly; } else if (mode == 2) { denoiser->denoiser_mode = kDenoiserOnYUV; } else if (mode == 3) { denoiser->denoiser_mode = kDenoiserOnYUVAggressive; } else { denoiser->denoiser_mode = kDenoiserOnYUV; } if (denoiser->denoiser_mode != kDenoiserOnYUVAggressive) { denoiser->denoise_pars.scale_sse_thresh = 1; denoiser->denoise_pars.scale_motion_thresh = 8; denoiser->denoise_pars.scale_increase_filter = 0; denoiser->denoise_pars.denoise_mv_bias = 95; denoiser->denoise_pars.pickmode_mv_bias = 100; denoiser->denoise_pars.qp_thresh = 0; denoiser->denoise_pars.consec_zerolast = UINT_MAX; denoiser->denoise_pars.spatial_blur = 0; } else { denoiser->denoise_pars.scale_sse_thresh = 2; denoiser->denoise_pars.scale_motion_thresh = 16; denoiser->denoise_pars.scale_increase_filter = 1; denoiser->denoise_pars.denoise_mv_bias = 60; denoiser->denoise_pars.pickmode_mv_bias = 75; denoiser->denoise_pars.qp_thresh = 80; denoiser->denoise_pars.consec_zerolast = 15; denoiser->denoise_pars.spatial_blur = 0; } } int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, int num_mb_rows, int num_mb_cols, int mode) { int i; assert(denoiser); denoiser->num_mb_cols = num_mb_cols; for (i = 0; i < MAX_REF_FRAMES; ++i) { denoiser->yv12_running_avg[i].flags = 0; if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_running_avg[i]), width, height, VP8BORDERINPIXELS) < 0) { vp8_denoiser_free(denoiser); return 1; } memset(denoiser->yv12_running_avg[i].buffer_alloc, 0, denoiser->yv12_running_avg[i].frame_size); } denoiser->yv12_mc_running_avg.flags = 0; if (vp8_yv12_alloc_frame_buffer(&(denoiser->yv12_mc_running_avg), width, height, VP8BORDERINPIXELS) < 0) { vp8_denoiser_free(denoiser); return 1; } memset(denoiser->yv12_mc_running_avg.buffer_alloc, 0, denoiser->yv12_mc_running_avg.frame_size); if (vp8_yv12_alloc_frame_buffer(&denoiser->yv12_last_source, width, height, VP8BORDERINPIXELS) < 0) { vp8_denoiser_free(denoiser); return 1; } memset(denoiser->yv12_last_source.buffer_alloc, 0, denoiser->yv12_last_source.frame_size); denoiser->denoise_state = vpx_calloc((num_mb_rows * num_mb_cols), 1); if (!denoiser->denoise_state) { vp8_denoiser_free(denoiser); return 1; } memset(denoiser->denoise_state, 0, (num_mb_rows * num_mb_cols)); vp8_denoiser_set_parameters(denoiser, mode); denoiser->nmse_source_diff = 0; denoiser->nmse_source_diff_count = 0; denoiser->qp_avg = 0; // QP threshold below which we can go up to aggressive mode. denoiser->qp_threshold_up = 80; // QP threshold above which we can go back down to normal mode. // For now keep this second threshold high, so not used currently. denoiser->qp_threshold_down = 128; // Bitrate thresholds and noise metric (nmse) thresholds for switching to // aggressive mode. // TODO(marpan): Adjust thresholds, including effect on resolution. denoiser->bitrate_threshold = 400000; // (bits/sec). denoiser->threshold_aggressive_mode = 80; if (width * height > 1280 * 720) { denoiser->bitrate_threshold = 3000000; denoiser->threshold_aggressive_mode = 200; } else if (width * height > 960 * 540) { denoiser->bitrate_threshold = 1200000; denoiser->threshold_aggressive_mode = 120; } else if (width * height > 640 * 480) { denoiser->bitrate_threshold = 600000; denoiser->threshold_aggressive_mode = 100; } return 0; } void vp8_denoiser_free(VP8_DENOISER *denoiser) { int i; assert(denoiser); for (i = 0; i < MAX_REF_FRAMES; ++i) { vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_running_avg[i]); } vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_mc_running_avg); vp8_yv12_de_alloc_frame_buffer(&denoiser->yv12_last_source); vpx_free(denoiser->denoise_state); } void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MACROBLOCK *x, unsigned int best_sse, unsigned int zero_mv_sse, int recon_yoffset, int recon_uvoffset, loop_filter_info_n *lfi_n, int mb_row, int mb_col, int block_index, int consec_zero_last) { int mv_row; int mv_col; unsigned int motion_threshold; unsigned int motion_magnitude2; unsigned int sse_thresh; int sse_diff_thresh = 0; // Spatial loop filter: only applied selectively based on // temporal filter state of block relative to top/left neighbors. int apply_spatial_loop_filter = 1; MV_REFERENCE_FRAME frame = x->best_reference_frame; MV_REFERENCE_FRAME zero_frame = x->best_zeromv_reference_frame; enum vp8_denoiser_decision decision = FILTER_BLOCK; enum vp8_denoiser_decision decision_u = COPY_BLOCK; enum vp8_denoiser_decision decision_v = COPY_BLOCK; if (zero_frame) { YV12_BUFFER_CONFIG *src = &denoiser->yv12_running_avg[frame]; YV12_BUFFER_CONFIG *dst = &denoiser->yv12_mc_running_avg; YV12_BUFFER_CONFIG saved_pre, saved_dst; MB_MODE_INFO saved_mbmi; MACROBLOCKD *filter_xd = &x->e_mbd; MB_MODE_INFO *mbmi = &filter_xd->mode_info_context->mbmi; int sse_diff = 0; // Bias on zero motion vector sse. const int zero_bias = denoiser->denoise_pars.denoise_mv_bias; zero_mv_sse = (unsigned int)((int64_t)zero_mv_sse * zero_bias / 100); sse_diff = (int)zero_mv_sse - (int)best_sse; saved_mbmi = *mbmi; /* Use the best MV for the compensation. */ mbmi->ref_frame = x->best_reference_frame; mbmi->mode = x->best_sse_inter_mode; mbmi->mv = x->best_sse_mv; mbmi->need_to_clamp_mvs = x->need_to_clamp_best_mvs; mv_col = x->best_sse_mv.as_mv.col; mv_row = x->best_sse_mv.as_mv.row; // Bias to zero_mv if small amount of motion. // Note sse_diff_thresh is intialized to zero, so this ensures // we will always choose zero_mv for denoising if // zero_mv_see <= best_sse (i.e., sse_diff <= 0). if ((unsigned int)(mv_row * mv_row + mv_col * mv_col) <= NOISE_MOTION_THRESHOLD) { sse_diff_thresh = (int)SSE_DIFF_THRESHOLD; } if (frame == INTRA_FRAME || sse_diff <= sse_diff_thresh) { /* * Handle intra blocks as referring to last frame with zero motion * and let the absolute pixel difference affect the filter factor. * Also consider small amount of motion as being random walk due * to noise, if it doesn't mean that we get a much bigger error. * Note that any changes to the mode info only affects the * denoising. */ x->denoise_zeromv = 1; mbmi->ref_frame = x->best_zeromv_reference_frame; src = &denoiser->yv12_running_avg[zero_frame]; mbmi->mode = ZEROMV; mbmi->mv.as_int = 0; x->best_sse_inter_mode = ZEROMV; x->best_sse_mv.as_int = 0; best_sse = zero_mv_sse; } mv_row = x->best_sse_mv.as_mv.row; mv_col = x->best_sse_mv.as_mv.col; motion_magnitude2 = mv_row * mv_row + mv_col * mv_col; motion_threshold = denoiser->denoise_pars.scale_motion_thresh * NOISE_MOTION_THRESHOLD; if (motion_magnitude2 < denoiser->denoise_pars.scale_increase_filter * NOISE_MOTION_THRESHOLD) { x->increase_denoising = 1; } sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD; if (x->increase_denoising) { sse_thresh = denoiser->denoise_pars.scale_sse_thresh * SSE_THRESHOLD_HIGH; } if (best_sse > sse_thresh || motion_magnitude2 > motion_threshold) { decision = COPY_BLOCK; } // If block is considered skin, don't denoise if the block // (1) is selected as non-zero motion for current frame, or // (2) has not been selected as ZERO_LAST mode at least x past frames // in a row. // TODO(marpan): Parameter "x" should be varied with framerate. // In particualar, should be reduced for layers (base layer/LAST). if (x->is_skin && (consec_zero_last < 2 || motion_magnitude2 > 0)) { decision = COPY_BLOCK; } if (decision == FILTER_BLOCK) { saved_pre = filter_xd->pre; saved_dst = filter_xd->dst; /* Compensate the running average. */ filter_xd->pre.y_buffer = src->y_buffer + recon_yoffset; filter_xd->pre.u_buffer = src->u_buffer + recon_uvoffset; filter_xd->pre.v_buffer = src->v_buffer + recon_uvoffset; /* Write the compensated running average to the destination buffer. */ filter_xd->dst.y_buffer = dst->y_buffer + recon_yoffset; filter_xd->dst.u_buffer = dst->u_buffer + recon_uvoffset; filter_xd->dst.v_buffer = dst->v_buffer + recon_uvoffset; if (!x->skip) { vp8_build_inter_predictors_mb(filter_xd); } else { vp8_build_inter16x16_predictors_mb( filter_xd, filter_xd->dst.y_buffer, filter_xd->dst.u_buffer, filter_xd->dst.v_buffer, filter_xd->dst.y_stride, filter_xd->dst.uv_stride); } filter_xd->pre = saved_pre; filter_xd->dst = saved_dst; *mbmi = saved_mbmi; } } else { // zero_frame should always be 1 for real-time mode, as the // ZEROMV mode is always checked, so we should never go into this branch. // If case ZEROMV is not checked, then we will force no denoise (COPY). decision = COPY_BLOCK; } if (decision == FILTER_BLOCK) { unsigned char *mc_running_avg_y = denoiser->yv12_mc_running_avg.y_buffer + recon_yoffset; int mc_avg_y_stride = denoiser->yv12_mc_running_avg.y_stride; unsigned char *running_avg_y = denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset; int avg_y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride; /* Filter. */ decision = vp8_denoiser_filter(mc_running_avg_y, mc_avg_y_stride, running_avg_y, avg_y_stride, x->thismb, 16, motion_magnitude2, x->increase_denoising); denoiser->denoise_state[block_index] = motion_magnitude2 > 0 ? kFilterNonZeroMV : kFilterZeroMV; // Only denoise UV for zero motion, and if y channel was denoised. if (denoiser->denoiser_mode != kDenoiserOnYOnly && motion_magnitude2 == 0 && decision == FILTER_BLOCK) { unsigned char *mc_running_avg_u = denoiser->yv12_mc_running_avg.u_buffer + recon_uvoffset; unsigned char *running_avg_u = denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset; unsigned char *mc_running_avg_v = denoiser->yv12_mc_running_avg.v_buffer + recon_uvoffset; unsigned char *running_avg_v = denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset; int mc_avg_uv_stride = denoiser->yv12_mc_running_avg.uv_stride; int avg_uv_stride = denoiser->yv12_running_avg[INTRA_FRAME].uv_stride; int signal_stride = x->block[16].src_stride; decision_u = vp8_denoiser_filter_uv( mc_running_avg_u, mc_avg_uv_stride, running_avg_u, avg_uv_stride, x->block[16].src + *x->block[16].base_src, signal_stride, motion_magnitude2, 0); decision_v = vp8_denoiser_filter_uv( mc_running_avg_v, mc_avg_uv_stride, running_avg_v, avg_uv_stride, x->block[20].src + *x->block[20].base_src, signal_stride, motion_magnitude2, 0); } } if (decision == COPY_BLOCK) { /* No filtering of this block; it differs too much from the predictor, * or the motion vector magnitude is considered too big. */ x->denoise_zeromv = 0; vp8_copy_mem16x16( x->thismb, 16, denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, denoiser->yv12_running_avg[INTRA_FRAME].y_stride); denoiser->denoise_state[block_index] = kNoFilter; } if (denoiser->denoiser_mode != kDenoiserOnYOnly) { if (decision_u == COPY_BLOCK) { vp8_copy_mem8x8( x->block[16].src + *x->block[16].base_src, x->block[16].src_stride, denoiser->yv12_running_avg[INTRA_FRAME].u_buffer + recon_uvoffset, denoiser->yv12_running_avg[INTRA_FRAME].uv_stride); } if (decision_v == COPY_BLOCK) { vp8_copy_mem8x8( x->block[20].src + *x->block[20].base_src, x->block[16].src_stride, denoiser->yv12_running_avg[INTRA_FRAME].v_buffer + recon_uvoffset, denoiser->yv12_running_avg[INTRA_FRAME].uv_stride); } } // Option to selectively deblock the denoised signal, for y channel only. if (apply_spatial_loop_filter) { loop_filter_info lfi; int apply_filter_col = 0; int apply_filter_row = 0; int apply_filter = 0; int y_stride = denoiser->yv12_running_avg[INTRA_FRAME].y_stride; int uv_stride = denoiser->yv12_running_avg[INTRA_FRAME].uv_stride; // Fix filter level to some nominal value for now. int filter_level = 48; int hev_index = lfi_n->hev_thr_lut[INTER_FRAME][filter_level]; lfi.mblim = lfi_n->mblim[filter_level]; lfi.blim = lfi_n->blim[filter_level]; lfi.lim = lfi_n->lim[filter_level]; lfi.hev_thr = lfi_n->hev_thr[hev_index]; // Apply filter if there is a difference in the denoiser filter state // between the current and left/top block, or if non-zero motion vector // is used for the motion-compensated filtering. if (mb_col > 0) { apply_filter_col = !((denoiser->denoise_state[block_index] == denoiser->denoise_state[block_index - 1]) && denoiser->denoise_state[block_index] != kFilterNonZeroMV); if (apply_filter_col) { // Filter left vertical edge. apply_filter = 1; vp8_loop_filter_mbv( denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, NULL, NULL, y_stride, uv_stride, &lfi); } } if (mb_row > 0) { apply_filter_row = !((denoiser->denoise_state[block_index] == denoiser->denoise_state[block_index - denoiser->num_mb_cols]) && denoiser->denoise_state[block_index] != kFilterNonZeroMV); if (apply_filter_row) { // Filter top horizontal edge. apply_filter = 1; vp8_loop_filter_mbh( denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, NULL, NULL, y_stride, uv_stride, &lfi); } } if (apply_filter) { // Update the signal block |x|. Pixel changes are only to top and/or // left boundary pixels: can we avoid full block copy here. vp8_copy_mem16x16( denoiser->yv12_running_avg[INTRA_FRAME].y_buffer + recon_yoffset, y_stride, x->thismb, 16); } } } libvpx-1.8.2/vp8/encoder/denoising.h000066400000000000000000000070411357355204000173200ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_DENOISING_H_ #define VPX_VP8_ENCODER_DENOISING_H_ #include "block.h" #include "vp8/common/loopfilter.h" #ifdef __cplusplus extern "C" { #endif #define SUM_DIFF_THRESHOLD 512 #define SUM_DIFF_THRESHOLD_HIGH 600 #define MOTION_MAGNITUDE_THRESHOLD (8 * 3) #define SUM_DIFF_THRESHOLD_UV (96) // (8 * 8 * 1.5) #define SUM_DIFF_THRESHOLD_HIGH_UV (8 * 8 * 2) #define SUM_DIFF_FROM_AVG_THRESH_UV (8 * 8 * 8) #define MOTION_MAGNITUDE_THRESHOLD_UV (8 * 3) #define MAX_GF_ARF_DENOISE_RANGE (8) enum vp8_denoiser_decision { COPY_BLOCK, FILTER_BLOCK }; enum vp8_denoiser_filter_state { kNoFilter, kFilterZeroMV, kFilterNonZeroMV }; enum vp8_denoiser_mode { kDenoiserOff, kDenoiserOnYOnly, kDenoiserOnYUV, kDenoiserOnYUVAggressive, kDenoiserOnAdaptive }; typedef struct { // Scale factor on sse threshold above which no denoising is done. unsigned int scale_sse_thresh; // Scale factor on motion magnitude threshold above which no // denoising is done. unsigned int scale_motion_thresh; // Scale factor on motion magnitude below which we increase the strength of // the temporal filter (in function vp8_denoiser_filter). unsigned int scale_increase_filter; // Scale factor to bias to ZEROMV for denoising. unsigned int denoise_mv_bias; // Scale factor to bias to ZEROMV for coding mode selection. unsigned int pickmode_mv_bias; // Quantizer threshold below which we use the segmentation map to switch off // loop filter for blocks that have been coded as ZEROMV-LAST a certain number // (consec_zerolast) of consecutive frames. Note that the delta-QP is set to // 0 when segmentation map is used for shutting off loop filter. unsigned int qp_thresh; // Threshold for number of consecutive frames for blocks coded as ZEROMV-LAST. unsigned int consec_zerolast; // Threshold for amount of spatial blur on Y channel. 0 means no spatial blur. unsigned int spatial_blur; } denoise_params; typedef struct vp8_denoiser { YV12_BUFFER_CONFIG yv12_running_avg[MAX_REF_FRAMES]; YV12_BUFFER_CONFIG yv12_mc_running_avg; // TODO(marpan): Should remove yv12_last_source and use vp8_lookahead_peak. YV12_BUFFER_CONFIG yv12_last_source; unsigned char *denoise_state; int num_mb_cols; int denoiser_mode; int threshold_aggressive_mode; int nmse_source_diff; int nmse_source_diff_count; int qp_avg; int qp_threshold_up; int qp_threshold_down; int bitrate_threshold; denoise_params denoise_pars; } VP8_DENOISER; int vp8_denoiser_allocate(VP8_DENOISER *denoiser, int width, int height, int num_mb_rows, int num_mb_cols, int mode); void vp8_denoiser_free(VP8_DENOISER *denoiser); void vp8_denoiser_set_parameters(VP8_DENOISER *denoiser, int mode); void vp8_denoiser_denoise_mb(VP8_DENOISER *denoiser, MACROBLOCK *x, unsigned int best_sse, unsigned int zero_mv_sse, int recon_yoffset, int recon_uvoffset, loop_filter_info_n *lfi_n, int mb_row, int mb_col, int block_index, int consec_zero_last); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_DENOISING_H_ libvpx-1.8.2/vp8/encoder/encodeframe.c000066400000000000000000001154011357355204000176040ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "bitstream.h" #include "encodemb.h" #include "encodemv.h" #if CONFIG_MULTITHREAD #include "ethreading.h" #endif #include "vp8/common/common.h" #include "onyx_int.h" #include "vp8/common/extend.h" #include "vp8/common/entropymode.h" #include "vp8/common/quant_common.h" #include "segmentation.h" #include "vp8/common/setupintrarecon.h" #include "encodeintra.h" #include "vp8/common/reconinter.h" #include "rdopt.h" #include "pickinter.h" #include "vp8/common/findnearmv.h" #include #include #include "vp8/common/invtrans.h" #include "vpx_ports/vpx_timer.h" #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING #include "bitstream.h" #endif #include "encodeframe.h" extern void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x); #ifdef MODE_STATS unsigned int inter_y_modes[10] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned int inter_uv_modes[4] = { 0, 0, 0, 0 }; unsigned int inter_b_modes[15] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned int y_modes[5] = { 0, 0, 0, 0, 0 }; unsigned int uv_modes[4] = { 0, 0, 0, 0 }; unsigned int b_modes[14] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; #endif /* activity_avg must be positive, or flat regions could get a zero weight * (infinite lambda), which confounds analysis. * This also avoids the need for divide by zero checks in * vp8_activity_masking(). */ #define VP8_ACTIVITY_AVG_MIN (64) /* This is used as a reference when computing the source variance for the * purposes of activity masking. * Eventually this should be replaced by custom no-reference routines, * which will be faster. */ static const unsigned char VP8_VAR_OFFS[16] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; /* Original activity measure from Tim T's code. */ static unsigned int tt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x) { unsigned int act; unsigned int sse; (void)cpi; /* TODO: This could also be done over smaller areas (8x8), but that would * require extensive changes elsewhere, as lambda is assumed to be fixed * over an entire MB in most of the code. * Another option is to compute four 8x8 variances, and pick a single * lambda using a non-linear combination (e.g., the smallest, or second * smallest, etc.). */ act = vpx_variance16x16(x->src.y_buffer, x->src.y_stride, VP8_VAR_OFFS, 0, &sse); act = act << 4; /* If the region is flat, lower the activity some more. */ if (act < 8 << 12) act = act < 5 << 12 ? act : 5 << 12; return act; } /* Stub for alternative experimental activity measures. */ static unsigned int alt_activity_measure(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) { return vp8_encode_intra(cpi, x, use_dc_pred); } /* Measure the activity of the current macroblock * What we measure here is TBD so abstracted to this function */ #define ALT_ACT_MEASURE 1 static unsigned int mb_activity_measure(VP8_COMP *cpi, MACROBLOCK *x, int mb_row, int mb_col) { unsigned int mb_activity; if (ALT_ACT_MEASURE) { int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); /* Or use and alternative. */ mb_activity = alt_activity_measure(cpi, x, use_dc_pred); } else { /* Original activity measure from Tim T's code. */ mb_activity = tt_activity_measure(cpi, x); } if (mb_activity < VP8_ACTIVITY_AVG_MIN) mb_activity = VP8_ACTIVITY_AVG_MIN; return mb_activity; } /* Calculate an "average" mb activity value for the frame */ #define ACT_MEDIAN 0 static void calc_av_activity(VP8_COMP *cpi, int64_t activity_sum) { #if ACT_MEDIAN /* Find median: Simple n^2 algorithm for experimentation */ { unsigned int median; unsigned int i, j; unsigned int *sortlist; unsigned int tmp; /* Create a list to sort to */ CHECK_MEM_ERROR(sortlist, vpx_calloc(sizeof(unsigned int), cpi->common.MBs)); /* Copy map to sort list */ memcpy(sortlist, cpi->mb_activity_map, sizeof(unsigned int) * cpi->common.MBs); /* Ripple each value down to its correct position */ for (i = 1; i < cpi->common.MBs; ++i) { for (j = i; j > 0; j--) { if (sortlist[j] < sortlist[j - 1]) { /* Swap values */ tmp = sortlist[j - 1]; sortlist[j - 1] = sortlist[j]; sortlist[j] = tmp; } else break; } } /* Even number MBs so estimate median as mean of two either side. */ median = (1 + sortlist[cpi->common.MBs >> 1] + sortlist[(cpi->common.MBs >> 1) + 1]) >> 1; cpi->activity_avg = median; vpx_free(sortlist); } #else /* Simple mean for now */ cpi->activity_avg = (unsigned int)(activity_sum / cpi->common.MBs); #endif if (cpi->activity_avg < VP8_ACTIVITY_AVG_MIN) { cpi->activity_avg = VP8_ACTIVITY_AVG_MIN; } /* Experimental code: return fixed value normalized for several clips */ if (ALT_ACT_MEASURE) cpi->activity_avg = 100000; } #define USE_ACT_INDEX 0 #define OUTPUT_NORM_ACT_STATS 0 #if USE_ACT_INDEX /* Calculate and activity index for each mb */ static void calc_activity_index(VP8_COMP *cpi, MACROBLOCK *x) { VP8_COMMON *const cm = &cpi->common; int mb_row, mb_col; int64_t act; int64_t a; int64_t b; #if OUTPUT_NORM_ACT_STATS FILE *f = fopen("norm_act.stt", "a"); fprintf(f, "\n%12d\n", cpi->activity_avg); #endif /* Reset pointers to start of activity map */ x->mb_activity_ptr = cpi->mb_activity_map; /* Calculate normalized mb activity number. */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { /* Read activity from the map */ act = *(x->mb_activity_ptr); /* Calculate a normalized activity number */ a = act + 4 * cpi->activity_avg; b = 4 * act + cpi->activity_avg; if (b >= a) *(x->activity_ptr) = (int)((b + (a >> 1)) / a) - 1; else *(x->activity_ptr) = 1 - (int)((a + (b >> 1)) / b); #if OUTPUT_NORM_ACT_STATS fprintf(f, " %6d", *(x->mb_activity_ptr)); #endif /* Increment activity map pointers */ x->mb_activity_ptr++; } #if OUTPUT_NORM_ACT_STATS fprintf(f, "\n"); #endif } #if OUTPUT_NORM_ACT_STATS fclose(f); #endif } #endif /* Loop through all MBs. Note activity of each, average activity and * calculate a normalized activity for each */ static void build_activity_map(VP8_COMP *cpi) { MACROBLOCK *const x = &cpi->mb; MACROBLOCKD *xd = &x->e_mbd; VP8_COMMON *const cm = &cpi->common; #if ALT_ACT_MEASURE YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; int recon_yoffset; int recon_y_stride = new_yv12->y_stride; #endif int mb_row, mb_col; unsigned int mb_activity; int64_t activity_sum = 0; /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { #if ALT_ACT_MEASURE /* reset above block coeffs */ xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); #endif /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { #if ALT_ACT_MEASURE xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; xd->left_available = (mb_col != 0); recon_yoffset += 16; #endif /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); /* measure activity */ mb_activity = mb_activity_measure(cpi, x, mb_row, mb_col); /* Keep frame sum */ activity_sum += mb_activity; /* Store MB level activity details. */ *x->mb_activity_ptr = mb_activity; /* Increment activity map pointer */ x->mb_activity_ptr++; /* adjust to the next column of source macroblocks */ x->src.y_buffer += 16; } /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; #if ALT_ACT_MEASURE /* extend the recon for intra prediction */ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); #endif } /* Calculate an "average" MB activity */ calc_av_activity(cpi, activity_sum); #if USE_ACT_INDEX /* Calculate an activity index number of each mb */ calc_activity_index(cpi, x); #endif } /* Macroblock activity masking */ void vp8_activity_masking(VP8_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->rdmult += *(x->mb_activity_ptr) * (x->rdmult >> 2); x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); x->errorperbit += (x->errorperbit == 0); #else int64_t a; int64_t b; int64_t act = *(x->mb_activity_ptr); /* Apply the masking to the RD multiplier. */ a = act + (2 * cpi->activity_avg); b = (2 * act) + cpi->activity_avg; x->rdmult = (unsigned int)(((int64_t)x->rdmult * b + (a >> 1)) / a); x->errorperbit = x->rdmult * 100 / (110 * x->rddiv); x->errorperbit += (x->errorperbit == 0); #endif /* Activity based Zbin adjustment */ adjust_act_zbin(cpi, x); } static void encode_mb_row(VP8_COMP *cpi, VP8_COMMON *cm, int mb_row, MACROBLOCK *x, MACROBLOCKD *xd, TOKENEXTRA **tp, int *segment_counts, int *totalrate) { int recon_yoffset, recon_uvoffset; int mb_col; int ref_fb_idx = cm->lst_fb_idx; int dst_fb_idx = cm->new_fb_idx; int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cpi->common.mb_cols); #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) const int num_part = (1 << cm->multi_token_partition); TOKENEXTRA *tp_start = cpi->tok; vp8_writer *w; #endif #if CONFIG_MULTITHREAD const int nsync = cpi->mt_sync_range; vpx_atomic_int rightmost_col = VPX_ATOMIC_INIT(cm->mb_cols + nsync); const vpx_atomic_int *last_row_current_mb_col; vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0 && mb_row != 0) { last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; } else { last_row_current_mb_col = &rightmost_col; } #endif #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (num_part > 1) w = &cpi->bc[1 + (mb_row % num_part)]; else w = &cpi->bc[1]; #endif /* reset above block coeffs */ xd->above_context = cm->above_context; xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); cpi->tplist[mb_row].start = *tp; /* printf("Main mb_row = %d\n", mb_row); */ /* Distance of Mb to the top & bottom edges, specified in 1/8th pel * units as they are always compared to values that are in 1/8th pel */ xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; /* Set up limit values for vertical motion vector components * to prevent them extending beyond the UMV borders */ x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); /* Set the mb activity pointer to the start of the row. */ x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) *tp = cpi->tok; #endif /* Distance of Mb to the left & right edges, specified in * 1/8th pel units as they are always compared to values * that are in 1/8th pel units */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; /* Set up limit values for horizontal motion vector components * to prevent them extending beyond the UMV borders */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); x->rddiv = cpi->RDDIV; x->rdmult = cpi->RDMULT; /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) { if (((mb_col - 1) % nsync) == 0) { vpx_atomic_store_release(current_mb_col, mb_col - 1); } if (mb_row && !(mb_col & (nsync - 1))) { vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); } } #endif if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); /* Is segmentation enabled */ /* MB level adjustment to quantizer */ if (xd->segmentation_enabled) { /* Code to set segment id in xd->mbmi.segment_id for current MB * (with range checking) */ if (cpi->segmentation_map[map_index + mb_col] <= 3) { xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; } else { xd->mode_info_context->mbmi.segment_id = 0; } vp8cx_mb_init_quantizer(cpi, x, 1); } else { /* Set to Segment 0 by default */ xd->mode_info_context->mbmi.segment_id = 0; } x->active_ptr = cpi->active_map + map_index + mb_col; if (cm->frame_type == KEY_FRAME) { *totalrate += vp8cx_encode_intra_macroblock(cpi, x, tp); #ifdef MODE_STATS y_modes[xd->mbmi.mode]++; #endif } else { *totalrate += vp8cx_encode_inter_macroblock( cpi, x, tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); #ifdef MODE_STATS inter_y_modes[xd->mbmi.mode]++; if (xd->mbmi.mode == SPLITMV) { int b; for (b = 0; b < xd->mbmi.partition_count; ++b) { inter_b_modes[x->partition->bmi[b].mode]++; } } #endif // Keep track of how many (consecutive) times a block is coded // as ZEROMV_LASTREF, for base layer frames. // Reset to 0 if its coded as anything else. if (cpi->current_layer == 0) { if (xd->mode_info_context->mbmi.mode == ZEROMV && xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { // Increment, check for wrap-around. if (cpi->consec_zero_last[map_index + mb_col] < 255) { cpi->consec_zero_last[map_index + mb_col] += 1; } if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) { cpi->consec_zero_last_mvbias[map_index + mb_col] += 1; } } else { cpi->consec_zero_last[map_index + mb_col] = 0; cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; } if (x->zero_last_dot_suppress) { cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; } } /* Special case code for cyclic refresh * If cyclic update enabled then copy xd->mbmi.segment_id; (which * may have been updated based on mode during * vp8cx_encode_inter_macroblock()) back into the global * segmentation map */ if ((cpi->current_layer == 0) && (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) { cpi->segmentation_map[map_index + mb_col] = xd->mode_info_context->mbmi.segment_id; /* If the block has been refreshed mark it as clean (the * magnitude of the -ve influences how long it will be before * we consider another refresh): * Else if it was coded (last frame 0,0) and has not already * been refreshed then mark it as a candidate for cleanup * next time (marked 0) else mark it as dirty (1). */ if (xd->mode_info_context->mbmi.segment_id) { cpi->cyclic_refresh_map[map_index + mb_col] = -1; } else if ((xd->mode_info_context->mbmi.mode == ZEROMV) && (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME)) { if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) { cpi->cyclic_refresh_map[map_index + mb_col] = 0; } } else { cpi->cyclic_refresh_map[map_index + mb_col] = 1; } } } cpi->tplist[mb_row].stop = *tp; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING /* pack tokens for this MB */ { int tok_count = *tp - tp_start; vp8_pack_tokens(w, tp_start, tok_count); } #endif /* Increment pointer into gf usage flags structure. */ x->gf_active_ptr++; /* Increment the activity mask pointers. */ x->mb_activity_ptr++; /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; recon_yoffset += 16; recon_uvoffset += 8; /* Keep track of segment usage */ segment_counts[xd->mode_info_context->mbmi.segment_id]++; /* skip to next mb */ xd->mode_info_context++; x->partition_info++; xd->above_context++; } /* extend the recon for intra prediction */ vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) != 0) { vpx_atomic_store_release(current_mb_col, vpx_atomic_load_acquire(&rightmost_col)); } #endif /* this is to account for the border */ xd->mode_info_context++; x->partition_info++; } static void init_encode_frame_mb_context(VP8_COMP *cpi) { MACROBLOCK *const x = &cpi->mb; VP8_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; /* GF active flags data structure */ x->gf_active_ptr = (signed char *)cpi->gf_active_flags; /* Activity map pointer */ x->mb_activity_ptr = cpi->mb_activity_map; x->act_zbin_adj = 0; x->partition_info = x->pi; xd->mode_info_context = cm->mi; xd->mode_info_stride = cm->mode_info_stride; xd->frame_type = cm->frame_type; /* reset intra mode contexts */ if (cm->frame_type == KEY_FRAME) vp8_init_mbmode_probs(cm); /* Copy data over into macro block data structures. */ x->src = *cpi->Source; xd->pre = cm->yv12_fb[cm->lst_fb_idx]; xd->dst = cm->yv12_fb[cm->new_fb_idx]; /* set up frame for intra coded blocks */ vp8_setup_intra_recon(&cm->yv12_fb[cm->new_fb_idx]); vp8_build_block_offsets(x); xd->mode_info_context->mbmi.mode = DC_PRED; xd->mode_info_context->mbmi.uv_mode = DC_PRED; xd->left_context = &cm->left_context; x->mvc = cm->fc.mvc; memset(cm->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) * cm->mb_cols); /* Special case treatment when GF and ARF are not sensible options * for reference */ if (cpi->ref_frame_flags == VP8_LAST_FRAME) { vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 255, 128); } else if ((cpi->oxcf.number_of_layers > 1) && (cpi->ref_frame_flags == VP8_GOLD_FRAME)) { vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 255); } else if ((cpi->oxcf.number_of_layers > 1) && (cpi->ref_frame_flags == VP8_ALTR_FRAME)) { vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, 1, 1); } else { vp8_calc_ref_frame_costs(x->ref_frame_cost, cpi->prob_intra_coded, cpi->prob_last_coded, cpi->prob_gf_coded); } xd->fullpixel_mask = 0xffffffff; if (cm->full_pixel) xd->fullpixel_mask = 0xfffffff8; vp8_zero(x->coef_counts); vp8_zero(x->ymode_count); vp8_zero(x->uv_mode_count) x->prediction_error = 0; x->intra_error = 0; vp8_zero(x->count_mb_ref_frame_usage); } #if CONFIG_MULTITHREAD static void sum_coef_counts(MACROBLOCK *x, MACROBLOCK *x_thread) { int i = 0; do { int j = 0; do { int k = 0; do { /* at every context */ /* calc probs and branch cts for this frame only */ int t = 0; /* token/prob index */ do { x->coef_counts[i][j][k][t] += x_thread->coef_counts[i][j][k][t]; } while (++t < ENTROPY_NODES); } while (++k < PREV_COEF_CONTEXTS); } while (++j < COEF_BANDS); } while (++i < BLOCK_TYPES); } #endif // CONFIG_MULTITHREAD void vp8_encode_frame(VP8_COMP *cpi) { int mb_row; MACROBLOCK *const x = &cpi->mb; VP8_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; TOKENEXTRA *tp = cpi->tok; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING BOOL_CODER *bc = &cpi->bc[1]; /* bc[0] is for control partition */ const int num_part = (1 << cm->multi_token_partition); #endif memset(segment_counts, 0, sizeof(segment_counts)); totalrate = 0; if (cpi->compressor_speed == 2) { if (cpi->oxcf.cpu_used < 0) { cpi->Speed = -(cpi->oxcf.cpu_used); } else { vp8_auto_select_speed(cpi); } } /* Functions setup for all frame types so we can use MC in AltRef */ if (!cm->use_bilinear_mc_filter) { xd->subpixel_predict = vp8_sixtap_predict4x4; xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; } else { xd->subpixel_predict = vp8_bilinear_predict4x4; xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; } cpi->mb.skip_true_count = 0; cpi->tok_count = 0; #if 0 /* Experimental code */ cpi->frame_distortion = 0; cpi->last_mb_distortion = 0; #endif xd->mode_info_context = cm->mi; vp8_zero(cpi->mb.MVcount); vp8cx_frame_init_quantizer(cpi); vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); vp8cx_initialize_me_consts(cpi, cm->base_qindex); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { /* Initialize encode frame context. */ init_encode_frame_mb_context(cpi); /* Build a frame level activity map */ build_activity_map(cpi); } /* re-init encode frame context. */ init_encode_frame_mb_context(cpi); #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { int i; for (i = 0; i < num_part; ++i) { vp8_start_encode(&bc[i], cpi->partition_d[i + 1], cpi->partition_d_end[i + 1]); bc[i].error = &cm->error; } } #endif { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { int i; vp8cx_init_mbrthread_data(cpi, x, cpi->mb_row_ei, cpi->encoding_thread_count); for (i = 0; i < cm->mb_rows; ++i) vpx_atomic_store_release(&cpi->mt_current_mb_col[i], -1); for (i = 0; i < cpi->encoding_thread_count; ++i) { sem_post(&cpi->h_event_start_encoding[i]); } for (mb_row = 0; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { vp8_zero(cm->left_context) #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING tp = cpi->tok; #else tp = cpi->tok + mb_row * (cm->mb_cols * 16 * 24); #endif encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; } /* Wait for all the threads to finish. */ for (i = 0; i < cpi->encoding_thread_count; ++i) { sem_wait(&cpi->h_event_end_encoding[i]); } for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { cpi->tok_count += (unsigned int)(cpi->tplist[mb_row].stop - cpi->tplist[mb_row].start); } if (xd->segmentation_enabled) { int j; if (xd->segmentation_enabled) { for (i = 0; i < cpi->encoding_thread_count; ++i) { for (j = 0; j < 4; ++j) { segment_counts[j] += cpi->mb_row_ei[i].segment_counts[j]; } } } } for (i = 0; i < cpi->encoding_thread_count; ++i) { int mode_count; int c_idx; totalrate += cpi->mb_row_ei[i].totalrate; cpi->mb.skip_true_count += cpi->mb_row_ei[i].mb.skip_true_count; for (mode_count = 0; mode_count < VP8_YMODES; ++mode_count) { cpi->mb.ymode_count[mode_count] += cpi->mb_row_ei[i].mb.ymode_count[mode_count]; } for (mode_count = 0; mode_count < VP8_UV_MODES; ++mode_count) { cpi->mb.uv_mode_count[mode_count] += cpi->mb_row_ei[i].mb.uv_mode_count[mode_count]; } for (c_idx = 0; c_idx < MVvals; ++c_idx) { cpi->mb.MVcount[0][c_idx] += cpi->mb_row_ei[i].mb.MVcount[0][c_idx]; cpi->mb.MVcount[1][c_idx] += cpi->mb_row_ei[i].mb.MVcount[1][c_idx]; } cpi->mb.prediction_error += cpi->mb_row_ei[i].mb.prediction_error; cpi->mb.intra_error += cpi->mb_row_ei[i].mb.intra_error; for (c_idx = 0; c_idx < MAX_REF_FRAMES; ++c_idx) { cpi->mb.count_mb_ref_frame_usage[c_idx] += cpi->mb_row_ei[i].mb.count_mb_ref_frame_usage[c_idx]; } for (c_idx = 0; c_idx < MAX_ERROR_BINS; ++c_idx) { cpi->mb.error_bins[c_idx] += cpi->mb_row_ei[i].mb.error_bins[c_idx]; } /* add up counts for each thread */ sum_coef_counts(x, &cpi->mb_row_ei[i].mb); } } else #endif // CONFIG_MULTITHREAD { /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { vp8_zero(cm->left_context) #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING tp = cpi->tok; #endif encode_mb_row(cpi, cm, mb_row, x, xd, &tp, segment_counts, &totalrate); /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; } cpi->tok_count = (unsigned int)(tp - cpi->tok); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { int i; for (i = 0; i < num_part; ++i) { vp8_stop_encode(&bc[i]); cpi->partition_sz[i + 1] = bc[i].pos; } } #endif vpx_usec_timer_mark(&emr_timer); cpi->time_encode_mb_row += vpx_usec_timer_elapsed(&emr_timer); } // Work out the segment probabilities if segmentation is enabled // and needs to be updated if (xd->segmentation_enabled && xd->update_mb_segmentation_map) { int tot_count; int i; /* Set to defaults */ memset(xd->mb_segment_tree_probs, 255, sizeof(xd->mb_segment_tree_probs)); tot_count = segment_counts[0] + segment_counts[1] + segment_counts[2] + segment_counts[3]; if (tot_count) { xd->mb_segment_tree_probs[0] = ((segment_counts[0] + segment_counts[1]) * 255) / tot_count; tot_count = segment_counts[0] + segment_counts[1]; if (tot_count > 0) { xd->mb_segment_tree_probs[1] = (segment_counts[0] * 255) / tot_count; } tot_count = segment_counts[2] + segment_counts[3]; if (tot_count > 0) { xd->mb_segment_tree_probs[2] = (segment_counts[2] * 255) / tot_count; } /* Zero probabilities not allowed */ for (i = 0; i < MB_FEATURE_TREE_PROBS; ++i) { if (xd->mb_segment_tree_probs[i] == 0) xd->mb_segment_tree_probs[i] = 1; } } } /* projected_frame_size in units of BYTES */ cpi->projected_frame_size = totalrate >> 8; /* Make a note of the percentage MBs coded Intra. */ if (cm->frame_type == KEY_FRAME) { cpi->this_frame_percent_intra = 100; } else { int tot_modes; tot_modes = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] + cpi->mb.count_mb_ref_frame_usage[LAST_FRAME] + cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME] + cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; if (tot_modes) { cpi->this_frame_percent_intra = cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME] * 100 / tot_modes; } } #if !CONFIG_REALTIME_ONLY /* Adjust the projected reference frame usage probability numbers to * reflect what we have just seen. This may be useful when we make * multiple iterations of the recode loop rather than continuing to use * values from the previous frame. */ if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || (!cm->refresh_alt_ref_frame && !cm->refresh_golden_frame))) { vp8_convert_rfct_to_prob(cpi); } #endif } void vp8_setup_block_ptrs(MACROBLOCK *x) { int r, c; int i; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { x->block[r * 4 + c].src_diff = x->src_diff + r * 4 * 16 + c * 4; } } for (r = 0; r < 2; ++r) { for (c = 0; c < 2; ++c) { x->block[16 + r * 2 + c].src_diff = x->src_diff + 256 + r * 4 * 8 + c * 4; } } for (r = 0; r < 2; ++r) { for (c = 0; c < 2; ++c) { x->block[20 + r * 2 + c].src_diff = x->src_diff + 320 + r * 4 * 8 + c * 4; } } x->block[24].src_diff = x->src_diff + 384; for (i = 0; i < 25; ++i) { x->block[i].coeff = x->coeff + i * 16; } } void vp8_build_block_offsets(MACROBLOCK *x) { int block = 0; int br, bc; vp8_build_block_doffsets(&x->e_mbd); /* y blocks */ x->thismb_ptr = &x->thismb[0]; for (br = 0; br < 4; ++br) { for (bc = 0; bc < 4; ++bc) { BLOCK *this_block = &x->block[block]; this_block->base_src = &x->thismb_ptr; this_block->src_stride = 16; this_block->src = 4 * br * 16 + 4 * bc; ++block; } } /* u blocks */ for (br = 0; br < 2; ++br) { for (bc = 0; bc < 2; ++bc) { BLOCK *this_block = &x->block[block]; this_block->base_src = &x->src.u_buffer; this_block->src_stride = x->src.uv_stride; this_block->src = 4 * br * this_block->src_stride + 4 * bc; ++block; } } /* v blocks */ for (br = 0; br < 2; ++br) { for (bc = 0; bc < 2; ++bc) { BLOCK *this_block = &x->block[block]; this_block->base_src = &x->src.v_buffer; this_block->src_stride = x->src.uv_stride; this_block->src = 4 * br * this_block->src_stride + 4 * bc; ++block; } } } static void sum_intra_stats(VP8_COMP *cpi, MACROBLOCK *x) { const MACROBLOCKD *xd = &x->e_mbd; const MB_PREDICTION_MODE m = xd->mode_info_context->mbmi.mode; const MB_PREDICTION_MODE uvm = xd->mode_info_context->mbmi.uv_mode; #ifdef MODE_STATS const int is_key = cpi->common.frame_type == KEY_FRAME; ++(is_key ? uv_modes : inter_uv_modes)[uvm]; if (m == B_PRED) { unsigned int *const bct = is_key ? b_modes : inter_b_modes; int b = 0; do { ++bct[xd->block[b].bmi.mode]; } while (++b < 16); } #else (void)cpi; #endif ++x->ymode_count[m]; ++x->uv_mode_count[uvm]; } /* Experimental stub function to create a per MB zbin adjustment based on * some previously calculated measure of MB activity. */ static void adjust_act_zbin(VP8_COMP *cpi, MACROBLOCK *x) { #if USE_ACT_INDEX x->act_zbin_adj = *(x->mb_activity_ptr); #else int64_t a; int64_t b; int64_t act = *(x->mb_activity_ptr); /* Apply the masking to the RD multiplier. */ a = act + 4 * cpi->activity_avg; b = 4 * act + cpi->activity_avg; if (act > cpi->activity_avg) { x->act_zbin_adj = (int)(((int64_t)b + (a >> 1)) / a) - 1; } else { x->act_zbin_adj = 1 - (int)(((int64_t)a + (b >> 1)) / b); } #endif } int vp8cx_encode_intra_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { MACROBLOCKD *xd = &x->e_mbd; int rate; if (cpi->sf.RD && cpi->compressor_speed != 2) { vp8_rd_pick_intra_mode(x, &rate); } else { vp8_pick_intra_mode(x, &rate); } if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { adjust_act_zbin(cpi, x); vp8_update_zbin_extra(cpi, x); } if (x->e_mbd.mode_info_context->mbmi.mode == B_PRED) { vp8_encode_intra4x4mby(x); } else { vp8_encode_intra16x16mby(x); } vp8_encode_intra16x16mbuv(x); sum_intra_stats(cpi, x); vp8_tokenize_mb(cpi, x, t); if (xd->mode_info_context->mbmi.mode != B_PRED) vp8_inverse_transform_mby(xd); vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); return rate; } #ifdef SPEEDSTATS extern int cnt_pm; #endif extern void vp8_fix_contexts(MACROBLOCKD *x); int vp8cx_encode_inter_macroblock(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int mb_row, int mb_col) { MACROBLOCKD *const xd = &x->e_mbd; int intra_error = 0; int rate; int distortion; x->skip = 0; if (xd->segmentation_enabled) { x->encode_breakout = cpi->segment_encode_breakout[xd->mode_info_context->mbmi.segment_id]; } else { x->encode_breakout = cpi->oxcf.encode_breakout; } #if CONFIG_TEMPORAL_DENOISING /* Reset the best sse mode/mv for each macroblock. */ x->best_reference_frame = INTRA_FRAME; x->best_zeromv_reference_frame = INTRA_FRAME; x->best_sse_inter_mode = 0; x->best_sse_mv.as_int = 0; x->need_to_clamp_best_mvs = 0; #endif if (cpi->sf.RD) { int zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; /* Are we using the fast quantizer for the mode selection? */ if (cpi->sf.use_fastquant_for_pick) { x->quantize_b = vp8_fast_quantize_b; /* the fast quantizer does not use zbin_extra, so * do not recalculate */ x->zbin_mode_boost_enabled = 0; } vp8_rd_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error, mb_row, mb_col); /* switch back to the regular quantizer for the encode */ if (cpi->sf.improved_quant) { x->quantize_b = vp8_regular_quantize_b; } /* restore cpi->zbin_mode_boost_enabled */ x->zbin_mode_boost_enabled = zbin_mode_boost_enabled; } else { vp8_pick_inter_mode(cpi, x, recon_yoffset, recon_uvoffset, &rate, &distortion, &intra_error, mb_row, mb_col); } x->prediction_error += distortion; x->intra_error += intra_error; if (cpi->oxcf.tuning == VP8_TUNE_SSIM) { /* Adjust the zbin based on this MB rate. */ adjust_act_zbin(cpi, x); } #if 0 /* Experimental RD code */ cpi->frame_distortion += distortion; cpi->last_mb_distortion = distortion; #endif /* MB level adjutment to quantizer setup */ if (xd->segmentation_enabled) { /* If cyclic update enabled */ if (cpi->current_layer == 0 && cpi->cyclic_refresh_mode_enabled) { /* Clear segment_id back to 0 if not coded (last frame 0,0) */ if ((xd->mode_info_context->mbmi.segment_id == 1) && ((xd->mode_info_context->mbmi.ref_frame != LAST_FRAME) || (xd->mode_info_context->mbmi.mode != ZEROMV))) { xd->mode_info_context->mbmi.segment_id = 0; /* segment_id changed, so update */ vp8cx_mb_init_quantizer(cpi, x, 1); } } } { /* Experimental code. * Special case for gf and arf zeromv modes, for 1 temporal layer. * Increase zbin size to supress noise. */ x->zbin_mode_boost = 0; if (x->zbin_mode_boost_enabled) { if (xd->mode_info_context->mbmi.ref_frame != INTRA_FRAME) { if (xd->mode_info_context->mbmi.mode == ZEROMV) { if (xd->mode_info_context->mbmi.ref_frame != LAST_FRAME && cpi->oxcf.number_of_layers == 1) { x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; } else { x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; } } else if (xd->mode_info_context->mbmi.mode == SPLITMV) { x->zbin_mode_boost = 0; } else { x->zbin_mode_boost = MV_ZBIN_BOOST; } } } /* The fast quantizer doesn't use zbin_extra, only do so with * the regular quantizer. */ if (cpi->sf.improved_quant) vp8_update_zbin_extra(cpi, x); } x->count_mb_ref_frame_usage[xd->mode_info_context->mbmi.ref_frame]++; if (xd->mode_info_context->mbmi.ref_frame == INTRA_FRAME) { vp8_encode_intra16x16mbuv(x); if (xd->mode_info_context->mbmi.mode == B_PRED) { vp8_encode_intra4x4mby(x); } else { vp8_encode_intra16x16mby(x); } sum_intra_stats(cpi, x); } else { int ref_fb_idx; if (xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { ref_fb_idx = cpi->common.lst_fb_idx; } else if (xd->mode_info_context->mbmi.ref_frame == GOLDEN_FRAME) { ref_fb_idx = cpi->common.gld_fb_idx; } else { ref_fb_idx = cpi->common.alt_fb_idx; } xd->pre.y_buffer = cpi->common.yv12_fb[ref_fb_idx].y_buffer + recon_yoffset; xd->pre.u_buffer = cpi->common.yv12_fb[ref_fb_idx].u_buffer + recon_uvoffset; xd->pre.v_buffer = cpi->common.yv12_fb[ref_fb_idx].v_buffer + recon_uvoffset; if (!x->skip) { vp8_encode_inter16x16(x); } else { vp8_build_inter16x16_predictors_mb(xd, xd->dst.y_buffer, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.y_stride, xd->dst.uv_stride); } } if (!x->skip) { vp8_tokenize_mb(cpi, x, t); if (xd->mode_info_context->mbmi.mode != B_PRED) { vp8_inverse_transform_mby(xd); } vp8_dequant_idct_add_uv_block(xd->qcoeff + 16 * 16, xd->dequant_uv, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride, xd->eobs + 16); } else { /* always set mb_skip_coeff as it is needed by the loopfilter */ xd->mode_info_context->mbmi.mb_skip_coeff = 1; if (cpi->common.mb_no_coeff_skip) { x->skip_true_count++; vp8_fix_contexts(xd); } else { vp8_stuff_mb(cpi, x, t); } } return rate; } libvpx-1.8.2/vp8/encoder/encodeframe.h000066400000000000000000000023521357355204000176110ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ENCODEFRAME_H_ #define VPX_VP8_ENCODER_ENCODEFRAME_H_ #include "vp8/encoder/tokenize.h" #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; struct macroblock; void vp8_activity_masking(struct VP8_COMP *cpi, MACROBLOCK *x); void vp8_build_block_offsets(struct macroblock *x); void vp8_setup_block_ptrs(struct macroblock *x); void vp8_encode_frame(struct VP8_COMP *cpi); int vp8cx_encode_inter_macroblock(struct VP8_COMP *cpi, struct macroblock *x, TOKENEXTRA **t, int recon_yoffset, int recon_uvoffset, int mb_row, int mb_col); int vp8cx_encode_intra_macroblock(struct VP8_COMP *cpi, struct macroblock *x, TOKENEXTRA **t); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_ENCODEFRAME_H_ libvpx-1.8.2/vp8/encoder/encodeintra.c000066400000000000000000000067321357355204000176350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp8/encoder/quantize.h" #include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "encodemb.h" #include "vp8/common/invtrans.h" #include "encodeintra.h" int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred) { int i; int intra_pred_var = 0; (void)cpi; if (use_dc_pred) { x->e_mbd.mode_info_context->mbmi.mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_encode_intra16x16mby(x); vp8_inverse_transform_mby(&x->e_mbd); } else { for (i = 0; i < 16; ++i) { x->e_mbd.block[i].bmi.as_mode = B_DC_PRED; vp8_encode_intra4x4block(x, i); } } intra_pred_var = vpx_get_mb_ss(x->src_diff); return intra_pred_var; } void vp8_encode_intra4x4block(MACROBLOCK *x, int ib) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; int dst_stride = x->e_mbd.dst.y_stride; unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; unsigned char *Above = dst - dst_stride; unsigned char *yleft = dst - 1; unsigned char top_left = Above[-1]; vp8_intra4x4_predict(Above, yleft, dst_stride, b->bmi.as_mode, b->predictor, 16, top_left); vp8_subtract_b(be, b, 16); x->short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b(be, b); if (*b->eob > 1) { vp8_short_idct4x4llm(b->dqcoeff, b->predictor, 16, dst, dst_stride); } else { vp8_dc_only_idct_add(b->dqcoeff[0], b->predictor, 16, dst, dst_stride); } } void vp8_encode_intra4x4mby(MACROBLOCK *mb) { int i; MACROBLOCKD *xd = &mb->e_mbd; intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); for (i = 0; i < 16; ++i) vp8_encode_intra4x4block(mb, i); return; } void vp8_encode_intra16x16mby(MACROBLOCK *x) { BLOCK *b = &x->block[0]; MACROBLOCKD *xd = &x->e_mbd; vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, xd->dst.y_stride, xd->dst.y_buffer, xd->dst.y_stride); vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride, xd->dst.y_buffer, xd->dst.y_stride); vp8_transform_intra_mby(x); vp8_quantize_mby(x); if (x->optimize) vp8_optimize_mby(x); } void vp8_encode_intra16x16mbuv(MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; vp8_build_intra_predictors_mbuv_s(xd, xd->dst.u_buffer - xd->dst.uv_stride, xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1, xd->dst.v_buffer - 1, xd->dst.uv_stride, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride); vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, xd->dst.u_buffer, xd->dst.v_buffer, xd->dst.uv_stride); vp8_transform_mbuv(x); vp8_quantize_mbuv(x); if (x->optimize) vp8_optimize_mbuv(x); } libvpx-1.8.2/vp8/encoder/encodeintra.h000066400000000000000000000016041357355204000176330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ENCODEINTRA_H_ #define VPX_VP8_ENCODER_ENCODEINTRA_H_ #include "onyx_int.h" #ifdef __cplusplus extern "C" { #endif int vp8_encode_intra(VP8_COMP *cpi, MACROBLOCK *x, int use_dc_pred); void vp8_encode_intra16x16mby(MACROBLOCK *x); void vp8_encode_intra16x16mbuv(MACROBLOCK *x); void vp8_encode_intra4x4mby(MACROBLOCK *mb); void vp8_encode_intra4x4block(MACROBLOCK *x, int ib); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_ENCODEINTRA_H_ libvpx-1.8.2/vp8/encoder/encodemb.c000066400000000000000000000361441357355204000171160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_config.h" #include "vp8_rtcd.h" #include "encodemb.h" #include "vp8/common/reconinter.h" #include "vp8/encoder/quantize.h" #include "tokenize.h" #include "vp8/common/invtrans.h" #include "vpx_mem/vpx_mem.h" #include "rdopt.h" void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch) { unsigned char *src_ptr = (*(be->base_src) + be->src); short *diff_ptr = be->src_diff; unsigned char *pred_ptr = bd->predictor; int src_stride = be->src_stride; vpx_subtract_block(4, 4, diff_ptr, pitch, src_ptr, src_stride, pred_ptr, pitch); } void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride) { short *udiff = diff + 256; short *vdiff = diff + 320; vpx_subtract_block(8, 8, udiff, 8, usrc, src_stride, upred, pred_stride); vpx_subtract_block(8, 8, vdiff, 8, vsrc, src_stride, vpred, pred_stride); } void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride) { vpx_subtract_block(16, 16, diff, 16, src, src_stride, pred, pred_stride); } static void vp8_subtract_mb(MACROBLOCK *x) { BLOCK *b = &x->block[0]; vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, x->e_mbd.dst.u_buffer, x->e_mbd.dst.v_buffer, x->e_mbd.dst.uv_stride); } static void build_dcblock(MACROBLOCK *x) { short *src_diff_ptr = &x->src_diff[384]; int i; for (i = 0; i < 16; ++i) { src_diff_ptr[i] = x->coeff[i * 16]; } } void vp8_transform_mbuv(MACROBLOCK *x) { int i; for (i = 16; i < 24; i += 2) { x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } } void vp8_transform_intra_mby(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } /* build dc block from 16 y dc values */ build_dcblock(x); /* do 2nd order transform on the dc block */ x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } static void transform_mb(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } /* build dc block from 16 y dc values */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) build_dcblock(x); for (i = 16; i < 24; i += 2) { x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 16); } /* do 2nd order transform on the dc block */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } } static void transform_mby(MACROBLOCK *x) { int i; for (i = 0; i < 16; i += 2) { x->short_fdct8x4(&x->block[i].src_diff[0], &x->block[i].coeff[0], 32); } /* build dc block from 16 y dc values */ if (x->e_mbd.mode_info_context->mbmi.mode != SPLITMV) { build_dcblock(x); x->short_walsh4x4(&x->block[24].src_diff[0], &x->block[24].coeff[0], 8); } } #define RDTRUNC(RM, DM, R, D) ((128 + (R) * (RM)) & 0xFF) typedef struct vp8_token_state vp8_token_state; struct vp8_token_state { int rate; int error; signed char next; signed char token; short qc; }; /* TODO: experiments to find optimal multiple numbers */ #define Y1_RD_MULT 4 #define UV_RD_MULT 2 #define Y2_RD_MULT 16 static const int plane_rd_mult[4] = { Y1_RD_MULT, Y2_RD_MULT, UV_RD_MULT, Y1_RD_MULT }; static void optimize_b(MACROBLOCK *mb, int ib, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { BLOCK *b; BLOCKD *d; vp8_token_state tokens[17][2]; unsigned best_mask[2]; const short *dequant_ptr; const short *coeff_ptr; short *qcoeff_ptr; short *dqcoeff_ptr; int eob; int i0; int rc; int x; int sz = 0; int next; int rdmult; int rddiv; int final_eob; int rd_cost0; int rd_cost1; int rate0; int rate1; int error0; int error1; int t0; int t1; int best; int band; int pt; int i; int err_mult = plane_rd_mult[type]; b = &mb->block[ib]; d = &mb->e_mbd.block[ib]; dequant_ptr = d->dequant; coeff_ptr = b->coeff; qcoeff_ptr = d->qcoeff; dqcoeff_ptr = d->dqcoeff; i0 = !type; eob = *d->eob; /* Now set up a Viterbi trellis to evaluate alternative roundings. */ rdmult = mb->rdmult * err_mult; if (mb->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) { rdmult = (rdmult * 9) >> 4; } rddiv = mb->rddiv; best_mask[0] = best_mask[1] = 0; /* Initialize the sentinel node of the trellis. */ tokens[eob][0].rate = 0; tokens[eob][0].error = 0; tokens[eob][0].next = 16; tokens[eob][0].token = DCT_EOB_TOKEN; tokens[eob][0].qc = 0; *(tokens[eob] + 1) = *(tokens[eob] + 0); next = eob; for (i = eob; i-- > i0;) { int base_bits; int d2; int dx; rc = vp8_default_zig_zag1d[i]; x = qcoeff_ptr[rc]; /* Only add a trellis state for non-zero coefficients. */ if (x) { int shortcut = 0; error0 = tokens[next][0].error; error1 = tokens[next][1].error; /* Evaluate the first possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; t0 = (vp8_dct_value_tokens_ptr + x)->Token; /* Consider both possible successor states. */ if (next < 16) { band = vp8_coef_bands[i + 1]; pt = vp8_prev_token_class[t0]; rate0 += mb->token_costs[type][band][pt][tokens[next][0].token]; rate1 += mb->token_costs[type][band][pt][tokens[next][1].token]; } rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); if (rd_cost0 == rd_cost1) { rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); } /* And pick the best. */ best = rd_cost1 < rd_cost0; base_bits = *(vp8_dct_value_cost_ptr + x); dx = dqcoeff_ptr[rc] - coeff_ptr[rc]; d2 = dx * dx; tokens[i][0].rate = base_bits + (best ? rate1 : rate0); tokens[i][0].error = d2 + (best ? error1 : error0); tokens[i][0].next = next; tokens[i][0].token = t0; tokens[i][0].qc = x; best_mask[0] |= best << i; /* Evaluate the second possibility for this state. */ rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; if ((abs(x) * dequant_ptr[rc] > abs(coeff_ptr[rc])) && (abs(x) * dequant_ptr[rc] < abs(coeff_ptr[rc]) + dequant_ptr[rc])) { shortcut = 1; } else { shortcut = 0; } if (shortcut) { sz = -(x < 0); x -= 2 * sz + 1; } /* Consider both possible successor states. */ if (!x) { /* If we reduced this coefficient to zero, check to see if * we need to move the EOB back here. */ t0 = tokens[next][0].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN; t1 = tokens[next][1].token == DCT_EOB_TOKEN ? DCT_EOB_TOKEN : ZERO_TOKEN; } else { t0 = t1 = (vp8_dct_value_tokens_ptr + x)->Token; } if (next < 16) { band = vp8_coef_bands[i + 1]; if (t0 != DCT_EOB_TOKEN) { pt = vp8_prev_token_class[t0]; rate0 += mb->token_costs[type][band][pt][tokens[next][0].token]; } if (t1 != DCT_EOB_TOKEN) { pt = vp8_prev_token_class[t1]; rate1 += mb->token_costs[type][band][pt][tokens[next][1].token]; } } rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); if (rd_cost0 == rd_cost1) { rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); } /* And pick the best. */ best = rd_cost1 < rd_cost0; base_bits = *(vp8_dct_value_cost_ptr + x); if (shortcut) { dx -= (dequant_ptr[rc] + sz) ^ sz; d2 = dx * dx; } tokens[i][1].rate = base_bits + (best ? rate1 : rate0); tokens[i][1].error = d2 + (best ? error1 : error0); tokens[i][1].next = next; tokens[i][1].token = best ? t1 : t0; tokens[i][1].qc = x; best_mask[1] |= best << i; /* Finally, make this the new head of the trellis. */ next = i; } /* There's no choice to make for a zero coefficient, so we don't * add a new trellis node, but we do need to update the costs. */ else { band = vp8_coef_bands[i + 1]; t0 = tokens[next][0].token; t1 = tokens[next][1].token; /* Update the cost of each path if we're past the EOB token. */ if (t0 != DCT_EOB_TOKEN) { tokens[next][0].rate += mb->token_costs[type][band][0][t0]; tokens[next][0].token = ZERO_TOKEN; } if (t1 != DCT_EOB_TOKEN) { tokens[next][1].rate += mb->token_costs[type][band][0][t1]; tokens[next][1].token = ZERO_TOKEN; } /* Don't update next, because we didn't add a new node. */ } } /* Now pick the best path through the whole trellis. */ band = vp8_coef_bands[i + 1]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); rate0 = tokens[next][0].rate; rate1 = tokens[next][1].rate; error0 = tokens[next][0].error; error1 = tokens[next][1].error; t0 = tokens[next][0].token; t1 = tokens[next][1].token; rate0 += mb->token_costs[type][band][pt][t0]; rate1 += mb->token_costs[type][band][pt][t1]; rd_cost0 = RDCOST(rdmult, rddiv, rate0, error0); rd_cost1 = RDCOST(rdmult, rddiv, rate1, error1); if (rd_cost0 == rd_cost1) { rd_cost0 = RDTRUNC(rdmult, rddiv, rate0, error0); rd_cost1 = RDTRUNC(rdmult, rddiv, rate1, error1); } best = rd_cost1 < rd_cost0; final_eob = i0 - 1; for (i = next; i < eob; i = next) { x = tokens[i][best].qc; if (x) final_eob = i; rc = vp8_default_zig_zag1d[i]; qcoeff_ptr[rc] = x; dqcoeff_ptr[rc] = x * dequant_ptr[rc]; next = tokens[i][best].next; best = (best_mask[best] >> i) & 1; } final_eob++; *a = *l = (final_eob != !type); *d->eob = (char)final_eob; } static void check_reset_2nd_coeffs(MACROBLOCKD *x, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { int sum = 0; int i; BLOCKD *bd = &x->block[24]; if (bd->dequant[0] >= 35 && bd->dequant[1] >= 35) return; for (i = 0; i < (*bd->eob); ++i) { int coef = bd->dqcoeff[vp8_default_zig_zag1d[i]]; sum += (coef >= 0) ? coef : -coef; if (sum >= 35) return; } /************************************************************************** our inverse hadamard transform effectively is weighted sum of all 16 inputs with weight either 1 or -1. It has a last stage scaling of (sum+3)>>3. And dc only idct is (dc+4)>>3. So if all the sums are between -35 and 29, the output after inverse wht and idct will be all zero. A sum of absolute value smaller than 35 guarantees all 16 different (+1/-1) weighted sums in wht fall between -35 and +35. **************************************************************************/ if (sum < 35) { for (i = 0; i < (*bd->eob); ++i) { int rc = vp8_default_zig_zag1d[i]; bd->qcoeff[rc] = 0; bd->dqcoeff[rc] = 0; } *bd->eob = 0; *a = *l = (*bd->eob != !type); } } static void optimize_mb(MACROBLOCK *x) { int b; int type; int has_2nd_order; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; ++b) { optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]); } for (b = 16; b < 24; ++b) { optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b]); } if (has_2nd_order) { b = 24; optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b]); check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b]); } } void vp8_optimize_mby(MACROBLOCK *x) { int b; int type; int has_2nd_order; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; if (!x->e_mbd.above_context) return; if (!x->e_mbd.left_context) return; memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); type = has_2nd_order ? PLANE_TYPE_Y_NO_DC : PLANE_TYPE_Y_WITH_DC; for (b = 0; b < 16; ++b) { optimize_b(x, b, type, ta + vp8_block2above[b], tl + vp8_block2left[b]); } if (has_2nd_order) { b = 24; optimize_b(x, b, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b]); check_reset_2nd_coeffs(&x->e_mbd, PLANE_TYPE_Y2, ta + vp8_block2above[b], tl + vp8_block2left[b]); } } void vp8_optimize_mbuv(MACROBLOCK *x) { int b; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; if (!x->e_mbd.above_context) return; if (!x->e_mbd.left_context) return; memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 24; ++b) { optimize_b(x, b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b]); } } void vp8_encode_inter16x16(MACROBLOCK *x) { vp8_build_inter_predictors_mb(&x->e_mbd); vp8_subtract_mb(x); transform_mb(x); vp8_quantize_mb(x); if (x->optimize) optimize_mb(x); } /* this funciton is used by first pass only */ void vp8_encode_inter16x16y(MACROBLOCK *x) { BLOCK *b = &x->block[0]; vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); vp8_subtract_mby(x->src_diff, *(b->base_src), b->src_stride, x->e_mbd.dst.y_buffer, x->e_mbd.dst.y_stride); transform_mby(x); vp8_quantize_mby(x); vp8_inverse_transform_mby(&x->e_mbd); } libvpx-1.8.2/vp8/encoder/encodemb.h000066400000000000000000000025051357355204000171150ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ENCODEMB_H_ #define VPX_VP8_ENCODER_ENCODEMB_H_ #include "onyx_int.h" #ifdef __cplusplus extern "C" { #endif void vp8_encode_inter16x16(MACROBLOCK *x); void vp8_subtract_b(BLOCK *be, BLOCKD *bd, int pitch); void vp8_subtract_mbuv(short *diff, unsigned char *usrc, unsigned char *vsrc, int src_stride, unsigned char *upred, unsigned char *vpred, int pred_stride); void vp8_subtract_mby(short *diff, unsigned char *src, int src_stride, unsigned char *pred, int pred_stride); void vp8_build_dcblock(MACROBLOCK *b); void vp8_transform_mb(MACROBLOCK *mb); void vp8_transform_mbuv(MACROBLOCK *x); void vp8_transform_intra_mby(MACROBLOCK *x); void vp8_optimize_mby(MACROBLOCK *x); void vp8_optimize_mbuv(MACROBLOCK *x); void vp8_encode_inter16x16y(MACROBLOCK *x); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_ENCODEMB_H_ libvpx-1.8.2/vp8/encoder/encodemv.c000066400000000000000000000205131357355204000171330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/common.h" #include "encodemv.h" #include "vp8/common/entropymode.h" #include "vp8/common/systemdependent.h" #include "vpx_ports/system_state.h" #include static void encode_mvcomponent(vp8_writer *const w, const int v, const struct mv_context *mvc) { const vp8_prob *p = mvc->prob; const int x = v < 0 ? -v : v; if (x < mvnum_short) { /* Small */ vp8_write(w, 0, p[mvpis_short]); vp8_treed_write(w, vp8_small_mvtree, p + MVPshort, x, 3); if (!x) return; /* no sign bit */ } else { /* Large */ int i = 0; vp8_write(w, 1, p[mvpis_short]); do vp8_write(w, (x >> i) & 1, p[MVPbits + i]); while (++i < 3); i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ do vp8_write(w, (x >> i) & 1, p[MVPbits + i]); while (--i > 3); if (x & 0xFFF0) vp8_write(w, (x >> 3) & 1, p[MVPbits + 3]); } vp8_write(w, v < 0, p[MVPsign]); } #if 0 static int max_mv_r = 0; static int max_mv_c = 0; #endif void vp8_encode_motion_vector(vp8_writer *w, const MV *mv, const MV_CONTEXT *mvc) { #if 0 { if (abs(mv->row >> 1) > max_mv_r) { FILE *f = fopen("maxmv.stt", "a"); max_mv_r = abs(mv->row >> 1); fprintf(f, "New Mv Row Max %6d\n", (mv->row >> 1)); if ((abs(mv->row) / 2) != max_mv_r) fprintf(f, "MV Row conversion error %6d\n", abs(mv->row) / 2); fclose(f); } if (abs(mv->col >> 1) > max_mv_c) { FILE *f = fopen("maxmv.stt", "a"); fprintf(f, "New Mv Col Max %6d\n", (mv->col >> 1)); max_mv_c = abs(mv->col >> 1); fclose(f); } } #endif encode_mvcomponent(w, mv->row >> 1, &mvc[0]); encode_mvcomponent(w, mv->col >> 1, &mvc[1]); } static unsigned int cost_mvcomponent(const int v, const struct mv_context *mvc) { const vp8_prob *p = mvc->prob; const int x = v; unsigned int cost; if (x < mvnum_short) { cost = vp8_cost_zero(p[mvpis_short]) + vp8_treed_cost(vp8_small_mvtree, p + MVPshort, x, 3); if (!x) return cost; } else { int i = 0; cost = vp8_cost_one(p[mvpis_short]); do { cost += vp8_cost_bit(p[MVPbits + i], (x >> i) & 1); } while (++i < 3); i = mvlong_width - 1; /* Skip bit 3, which is sometimes implicit */ do { cost += vp8_cost_bit(p[MVPbits + i], (x >> i) & 1); } while (--i > 3); if (x & 0xFFF0) cost += vp8_cost_bit(p[MVPbits + 3], (x >> 3) & 1); } return cost; /* + vp8_cost_bit( p [MVPsign], v < 0); */ } void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]) { int i = 1; unsigned int cost0 = 0; unsigned int cost1 = 0; vpx_clear_system_state(); i = 1; if (mvc_flag[0]) { mvcost[0][0] = cost_mvcomponent(0, &mvc[0]); do { cost0 = cost_mvcomponent(i, &mvc[0]); mvcost[0][i] = cost0 + vp8_cost_zero(mvc[0].prob[MVPsign]); mvcost[0][-i] = cost0 + vp8_cost_one(mvc[0].prob[MVPsign]); } while (++i <= mv_max); } i = 1; if (mvc_flag[1]) { mvcost[1][0] = cost_mvcomponent(0, &mvc[1]); do { cost1 = cost_mvcomponent(i, &mvc[1]); mvcost[1][i] = cost1 + vp8_cost_zero(mvc[1].prob[MVPsign]); mvcost[1][-i] = cost1 + vp8_cost_one(mvc[1].prob[MVPsign]); } while (++i <= mv_max); } } /* Motion vector probability table update depends on benefit. * Small correction allows for the fact that an update to an MV probability * may have benefit in subsequent frames as well as the current one. */ #define MV_PROB_UPDATE_CORRECTION -1 static void calc_prob(vp8_prob *p, const unsigned int ct[2]) { const unsigned int tot = ct[0] + ct[1]; if (tot) { const vp8_prob x = ((ct[0] * 255) / tot) & -2; *p = x ? x : 1; } } static void update(vp8_writer *const w, const unsigned int ct[2], vp8_prob *const cur_p, const vp8_prob new_p, const vp8_prob update_p, int *updated) { const int cur_b = vp8_cost_branch(ct, *cur_p); const int new_b = vp8_cost_branch(ct, new_p); const int cost = 7 + MV_PROB_UPDATE_CORRECTION + ((vp8_cost_one(update_p) - vp8_cost_zero(update_p) + 128) >> 8); if (cur_b - new_b > cost) { *cur_p = new_p; vp8_write(w, 1, update_p); vp8_write_literal(w, new_p >> 1, 7); *updated = 1; } else vp8_write(w, 0, update_p); } static void write_component_probs(vp8_writer *const w, struct mv_context *cur_mvc, const struct mv_context *default_mvc_, const struct mv_context *update_mvc, const unsigned int events[MVvals], unsigned int rc, int *updated) { vp8_prob *Pcur = cur_mvc->prob; const vp8_prob *default_mvc = default_mvc_->prob; const vp8_prob *Pupdate = update_mvc->prob; unsigned int is_short_ct[2], sign_ct[2]; unsigned int bit_ct[mvlong_width][2]; unsigned int short_ct[mvnum_short]; unsigned int short_bct[mvnum_short - 1][2]; vp8_prob Pnew[MVPcount]; (void)rc; vp8_copy_array(Pnew, default_mvc, MVPcount); vp8_zero(is_short_ct) vp8_zero(sign_ct) vp8_zero(bit_ct) vp8_zero(short_ct) vp8_zero(short_bct) /* j=0 */ { const int c = events[mv_max]; is_short_ct[0] += c; /* Short vector */ short_ct[0] += c; /* Magnitude distribution */ } /* j: 1 ~ mv_max (1023) */ { int j = 1; do { const int c1 = events[mv_max + j]; /* positive */ const int c2 = events[mv_max - j]; /* negative */ const int c = c1 + c2; int a = j; sign_ct[0] += c1; sign_ct[1] += c2; if (a < mvnum_short) { is_short_ct[0] += c; /* Short vector */ short_ct[a] += c; /* Magnitude distribution */ } else { int k = mvlong_width - 1; is_short_ct[1] += c; /* Long vector */ /* bit 3 not always encoded. */ do { bit_ct[k][(a >> k) & 1] += c; } while (--k >= 0); } } while (++j <= mv_max); } calc_prob(Pnew + mvpis_short, is_short_ct); calc_prob(Pnew + MVPsign, sign_ct); { vp8_prob p[mvnum_short - 1]; /* actually only need branch ct */ int j = 0; vp8_tree_probs_from_distribution(8, vp8_small_mvencodings, vp8_small_mvtree, p, short_bct, short_ct, 256, 1); do { calc_prob(Pnew + MVPshort + j, short_bct[j]); } while (++j < mvnum_short - 1); } { int j = 0; do { calc_prob(Pnew + MVPbits + j, bit_ct[j]); } while (++j < mvlong_width); } update(w, is_short_ct, Pcur + mvpis_short, Pnew[mvpis_short], *Pupdate++, updated); update(w, sign_ct, Pcur + MVPsign, Pnew[MVPsign], *Pupdate++, updated); { const vp8_prob *const new_p = Pnew + MVPshort; vp8_prob *const cur_p = Pcur + MVPshort; int j = 0; do { update(w, short_bct[j], cur_p + j, new_p[j], *Pupdate++, updated); } while (++j < mvnum_short - 1); } { const vp8_prob *const new_p = Pnew + MVPbits; vp8_prob *const cur_p = Pcur + MVPbits; int j = 0; do { update(w, bit_ct[j], cur_p + j, new_p[j], *Pupdate++, updated); } while (++j < mvlong_width); } } void vp8_write_mvprobs(VP8_COMP *cpi) { vp8_writer *const w = cpi->bc; MV_CONTEXT *mvc = cpi->common.fc.mvc; int flags[2] = { 0, 0 }; write_component_probs(w, &mvc[0], &vp8_default_mv_context[0], &vp8_mv_update_probs[0], cpi->mb.MVcount[0], 0, &flags[0]); write_component_probs(w, &mvc[1], &vp8_default_mv_context[1], &vp8_mv_update_probs[1], cpi->mb.MVcount[1], 1, &flags[1]); if (flags[0] || flags[1]) { vp8_build_component_cost_table( cpi->mb.mvcost, (const MV_CONTEXT *)cpi->common.fc.mvc, flags); } } libvpx-1.8.2/vp8/encoder/encodemv.h000066400000000000000000000015521357355204000171420ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ENCODEMV_H_ #define VPX_VP8_ENCODER_ENCODEMV_H_ #include "onyx_int.h" #ifdef __cplusplus extern "C" { #endif void vp8_write_mvprobs(VP8_COMP *); void vp8_encode_motion_vector(vp8_writer *, const MV *, const MV_CONTEXT *); void vp8_build_component_cost_table(int *mvcost[2], const MV_CONTEXT *mvc, int mvc_flag[2]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_ENCODEMV_H_ libvpx-1.8.2/vp8/encoder/ethreading.c000066400000000000000000000541121357355204000174470ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "onyx_int.h" #include "vp8/common/threading.h" #include "vp8/common/common.h" #include "vp8/common/extend.h" #include "bitstream.h" #include "encodeframe.h" #include "ethreading.h" #if CONFIG_MULTITHREAD extern void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip); static THREAD_FUNCTION thread_loopfilter(void *p_data) { VP8_COMP *cpi = (VP8_COMP *)(((LPFTHREAD_DATA *)p_data)->ptr1); VP8_COMMON *cm = &cpi->common; while (1) { if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; if (sem_wait(&cpi->h_event_start_lpf) == 0) { /* we're shutting down */ if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; vp8_loopfilter_frame(cpi, cm); sem_post(&cpi->h_event_end_lpf); } } return 0; } static THREAD_FUNCTION thread_encoding_proc(void *p_data) { int ithread = ((ENCODETHREAD_DATA *)p_data)->ithread; VP8_COMP *cpi = (VP8_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr1); MB_ROW_COMP *mbri = (MB_ROW_COMP *)(((ENCODETHREAD_DATA *)p_data)->ptr2); ENTROPY_CONTEXT_PLANES mb_row_left_context; while (1) { if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; if (sem_wait(&cpi->h_event_start_encoding[ithread]) == 0) { const int nsync = cpi->mt_sync_range; VP8_COMMON *cm = &cpi->common; int mb_row; MACROBLOCK *x = &mbri->mb; MACROBLOCKD *xd = &x->e_mbd; TOKENEXTRA *tp; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING TOKENEXTRA *tp_start = cpi->tok + (1 + ithread) * (16 * 24); const int num_part = (1 << cm->multi_token_partition); #endif int *segment_counts = mbri->segment_counts; int *totalrate = &mbri->totalrate; /* we're shutting down */ if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) == 0) break; xd->mode_info_context = cm->mi + cm->mode_info_stride * (ithread + 1); xd->mode_info_stride = cm->mode_info_stride; for (mb_row = ithread + 1; mb_row < cm->mb_rows; mb_row += (cpi->encoding_thread_count + 1)) { int recon_yoffset, recon_uvoffset; int mb_col; int ref_fb_idx = cm->lst_fb_idx; int dst_fb_idx = cm->new_fb_idx; int recon_y_stride = cm->yv12_fb[ref_fb_idx].y_stride; int recon_uv_stride = cm->yv12_fb[ref_fb_idx].uv_stride; int map_index = (mb_row * cm->mb_cols); const vpx_atomic_int *last_row_current_mb_col; vpx_atomic_int *current_mb_col = &cpi->mt_current_mb_col[mb_row]; #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) vp8_writer *w = &cpi->bc[1 + (mb_row % num_part)]; #else tp = cpi->tok + (mb_row * (cm->mb_cols * 16 * 24)); cpi->tplist[mb_row].start = tp; #endif last_row_current_mb_col = &cpi->mt_current_mb_col[mb_row - 1]; /* reset above block coeffs */ xd->above_context = cm->above_context; xd->left_context = &mb_row_left_context; vp8_zero(mb_row_left_context); xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); /* Set the mb activity pointer to the start of the row. */ x->mb_activity_ptr = &cpi->mb_activity_map[map_index]; /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { if (((mb_col - 1) % nsync) == 0) { vpx_atomic_store_release(current_mb_col, mb_col - 1); } if (mb_row && !(mb_col & (nsync - 1))) { vp8_atomic_spin_wait(mb_col, last_row_current_mb_col, nsync); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING tp = tp_start; #endif /* Distance of Mb to the various image edges. * These specified to 8th pel as they are always compared * to values that are in 1/8th pel units */ xd->mb_to_left_edge = -((mb_col * 16) << 3); xd->mb_to_right_edge = ((cm->mb_cols - 1 - mb_col) * 16) << 3; xd->mb_to_top_edge = -((mb_row * 16) << 3); xd->mb_to_bottom_edge = ((cm->mb_rows - 1 - mb_row) * 16) << 3; /* Set up limit values for motion vectors used to prevent * them extending outside the UMV borders */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); xd->dst.y_buffer = cm->yv12_fb[dst_fb_idx].y_buffer + recon_yoffset; xd->dst.u_buffer = cm->yv12_fb[dst_fb_idx].u_buffer + recon_uvoffset; xd->dst.v_buffer = cm->yv12_fb[dst_fb_idx].v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); x->rddiv = cpi->RDDIV; x->rdmult = cpi->RDMULT; /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); if (cpi->oxcf.tuning == VP8_TUNE_SSIM) vp8_activity_masking(cpi, x); /* Is segmentation enabled */ /* MB level adjustment to quantizer */ if (xd->segmentation_enabled) { /* Code to set segment id in xd->mbmi.segment_id for * current MB (with range checking) */ if (cpi->segmentation_map[map_index + mb_col] <= 3) { xd->mode_info_context->mbmi.segment_id = cpi->segmentation_map[map_index + mb_col]; } else { xd->mode_info_context->mbmi.segment_id = 0; } vp8cx_mb_init_quantizer(cpi, x, 1); } else { /* Set to Segment 0 by default */ xd->mode_info_context->mbmi.segment_id = 0; } x->active_ptr = cpi->active_map + map_index + mb_col; if (cm->frame_type == KEY_FRAME) { *totalrate += vp8cx_encode_intra_macroblock(cpi, x, &tp); #ifdef MODE_STATS y_modes[xd->mbmi.mode]++; #endif } else { *totalrate += vp8cx_encode_inter_macroblock( cpi, x, &tp, recon_yoffset, recon_uvoffset, mb_row, mb_col); #ifdef MODE_STATS inter_y_modes[xd->mbmi.mode]++; if (xd->mbmi.mode == SPLITMV) { int b; for (b = 0; b < xd->mbmi.partition_count; ++b) { inter_b_modes[x->partition->bmi[b].mode]++; } } #endif // Keep track of how many (consecutive) times a block // is coded as ZEROMV_LASTREF, for base layer frames. // Reset to 0 if its coded as anything else. if (cpi->current_layer == 0) { if (xd->mode_info_context->mbmi.mode == ZEROMV && xd->mode_info_context->mbmi.ref_frame == LAST_FRAME) { // Increment, check for wrap-around. if (cpi->consec_zero_last[map_index + mb_col] < 255) { cpi->consec_zero_last[map_index + mb_col] += 1; } if (cpi->consec_zero_last_mvbias[map_index + mb_col] < 255) { cpi->consec_zero_last_mvbias[map_index + mb_col] += 1; } } else { cpi->consec_zero_last[map_index + mb_col] = 0; cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; } if (x->zero_last_dot_suppress) { cpi->consec_zero_last_mvbias[map_index + mb_col] = 0; } } /* Special case code for cyclic refresh * If cyclic update enabled then copy * xd->mbmi.segment_id; (which may have been updated * based on mode during * vp8cx_encode_inter_macroblock()) back into the * global segmentation map */ if ((cpi->current_layer == 0) && (cpi->cyclic_refresh_mode_enabled && xd->segmentation_enabled)) { const MB_MODE_INFO *mbmi = &xd->mode_info_context->mbmi; cpi->segmentation_map[map_index + mb_col] = mbmi->segment_id; /* If the block has been refreshed mark it as clean * (the magnitude of the -ve influences how long it * will be before we consider another refresh): * Else if it was coded (last frame 0,0) and has * not already been refreshed then mark it as a * candidate for cleanup next time (marked 0) else * mark it as dirty (1). */ if (mbmi->segment_id) { cpi->cyclic_refresh_map[map_index + mb_col] = -1; } else if ((mbmi->mode == ZEROMV) && (mbmi->ref_frame == LAST_FRAME)) { if (cpi->cyclic_refresh_map[map_index + mb_col] == 1) { cpi->cyclic_refresh_map[map_index + mb_col] = 0; } } else { cpi->cyclic_refresh_map[map_index + mb_col] = 1; } } } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING /* pack tokens for this MB */ { int tok_count = tp - tp_start; vp8_pack_tokens(w, tp_start, tok_count); } #else cpi->tplist[mb_row].stop = tp; #endif /* Increment pointer into gf usage flags structure. */ x->gf_active_ptr++; /* Increment the activity mask pointers. */ x->mb_activity_ptr++; /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; recon_yoffset += 16; recon_uvoffset += 8; /* Keep track of segment usage */ segment_counts[xd->mode_info_context->mbmi.segment_id]++; /* skip to next mb */ xd->mode_info_context++; x->partition_info++; xd->above_context++; } vp8_extend_mb_row(&cm->yv12_fb[dst_fb_idx], xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); vpx_atomic_store_release(current_mb_col, mb_col + nsync); /* this is to account for the border */ xd->mode_info_context++; x->partition_info++; x->src.y_buffer += 16 * x->src.y_stride * (cpi->encoding_thread_count + 1) - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride * (cpi->encoding_thread_count + 1) - 8 * cm->mb_cols; xd->mode_info_context += xd->mode_info_stride * cpi->encoding_thread_count; x->partition_info += xd->mode_info_stride * cpi->encoding_thread_count; x->gf_active_ptr += cm->mb_cols * cpi->encoding_thread_count; } /* Signal that this thread has completed processing its rows. */ sem_post(&cpi->h_event_end_encoding[ithread]); } } /* printf("exit thread %d\n", ithread); */ return 0; } static void setup_mbby_copy(MACROBLOCK *mbdst, MACROBLOCK *mbsrc) { MACROBLOCK *x = mbsrc; MACROBLOCK *z = mbdst; int i; z->ss = x->ss; z->ss_count = x->ss_count; z->searches_per_step = x->searches_per_step; z->errorperbit = x->errorperbit; z->sadperbit16 = x->sadperbit16; z->sadperbit4 = x->sadperbit4; /* z->mv_col_min = x->mv_col_min; z->mv_col_max = x->mv_col_max; z->mv_row_min = x->mv_row_min; z->mv_row_max = x->mv_row_max; */ z->short_fdct4x4 = x->short_fdct4x4; z->short_fdct8x4 = x->short_fdct8x4; z->short_walsh4x4 = x->short_walsh4x4; z->quantize_b = x->quantize_b; z->optimize = x->optimize; /* z->mvc = x->mvc; z->src.y_buffer = x->src.y_buffer; z->src.u_buffer = x->src.u_buffer; z->src.v_buffer = x->src.v_buffer; */ z->mvcost[0] = x->mvcost[0]; z->mvcost[1] = x->mvcost[1]; z->mvsadcost[0] = x->mvsadcost[0]; z->mvsadcost[1] = x->mvsadcost[1]; z->token_costs = x->token_costs; z->inter_bmode_costs = x->inter_bmode_costs; z->mbmode_cost = x->mbmode_cost; z->intra_uv_mode_cost = x->intra_uv_mode_cost; z->bmode_costs = x->bmode_costs; for (i = 0; i < 25; ++i) { z->block[i].quant = x->block[i].quant; z->block[i].quant_fast = x->block[i].quant_fast; z->block[i].quant_shift = x->block[i].quant_shift; z->block[i].zbin = x->block[i].zbin; z->block[i].zrun_zbin_boost = x->block[i].zrun_zbin_boost; z->block[i].round = x->block[i].round; z->block[i].src_stride = x->block[i].src_stride; } z->q_index = x->q_index; z->act_zbin_adj = x->act_zbin_adj; z->last_act_zbin_adj = x->last_act_zbin_adj; { MACROBLOCKD *xd = &x->e_mbd; MACROBLOCKD *zd = &z->e_mbd; /* zd->mode_info_context = xd->mode_info_context; zd->mode_info = xd->mode_info; zd->mode_info_stride = xd->mode_info_stride; zd->frame_type = xd->frame_type; zd->up_available = xd->up_available ; zd->left_available = xd->left_available; zd->left_context = xd->left_context; zd->last_frame_dc = xd->last_frame_dc; zd->last_frame_dccons = xd->last_frame_dccons; zd->gold_frame_dc = xd->gold_frame_dc; zd->gold_frame_dccons = xd->gold_frame_dccons; zd->mb_to_left_edge = xd->mb_to_left_edge; zd->mb_to_right_edge = xd->mb_to_right_edge; zd->mb_to_top_edge = xd->mb_to_top_edge ; zd->mb_to_bottom_edge = xd->mb_to_bottom_edge; zd->gf_active_ptr = xd->gf_active_ptr; zd->frames_since_golden = xd->frames_since_golden; zd->frames_till_alt_ref_frame = xd->frames_till_alt_ref_frame; */ zd->subpixel_predict = xd->subpixel_predict; zd->subpixel_predict8x4 = xd->subpixel_predict8x4; zd->subpixel_predict8x8 = xd->subpixel_predict8x8; zd->subpixel_predict16x16 = xd->subpixel_predict16x16; zd->segmentation_enabled = xd->segmentation_enabled; zd->mb_segement_abs_delta = xd->mb_segement_abs_delta; memcpy(zd->segment_feature_data, xd->segment_feature_data, sizeof(xd->segment_feature_data)); memcpy(zd->dequant_y1_dc, xd->dequant_y1_dc, sizeof(xd->dequant_y1_dc)); memcpy(zd->dequant_y1, xd->dequant_y1, sizeof(xd->dequant_y1)); memcpy(zd->dequant_y2, xd->dequant_y2, sizeof(xd->dequant_y2)); memcpy(zd->dequant_uv, xd->dequant_uv, sizeof(xd->dequant_uv)); #if 1 /*TODO: Remove dequant from BLOCKD. This is a temporary solution until * the quantizer code uses a passed in pointer to the dequant constants. * This will also require modifications to the x86 and neon assembly. * */ for (i = 0; i < 16; ++i) zd->block[i].dequant = zd->dequant_y1; for (i = 16; i < 24; ++i) zd->block[i].dequant = zd->dequant_uv; zd->block[24].dequant = zd->dequant_y2; #endif memcpy(z->rd_threshes, x->rd_threshes, sizeof(x->rd_threshes)); memcpy(z->rd_thresh_mult, x->rd_thresh_mult, sizeof(x->rd_thresh_mult)); z->zbin_over_quant = x->zbin_over_quant; z->zbin_mode_boost_enabled = x->zbin_mode_boost_enabled; z->zbin_mode_boost = x->zbin_mode_boost; memset(z->error_bins, 0, sizeof(z->error_bins)); } } void vp8cx_init_mbrthread_data(VP8_COMP *cpi, MACROBLOCK *x, MB_ROW_COMP *mbr_ei, int count) { VP8_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; int i; for (i = 0; i < count; ++i) { MACROBLOCK *mb = &mbr_ei[i].mb; MACROBLOCKD *mbd = &mb->e_mbd; mbd->subpixel_predict = xd->subpixel_predict; mbd->subpixel_predict8x4 = xd->subpixel_predict8x4; mbd->subpixel_predict8x8 = xd->subpixel_predict8x8; mbd->subpixel_predict16x16 = xd->subpixel_predict16x16; mb->gf_active_ptr = x->gf_active_ptr; memset(mbr_ei[i].segment_counts, 0, sizeof(mbr_ei[i].segment_counts)); mbr_ei[i].totalrate = 0; mb->partition_info = x->pi + x->e_mbd.mode_info_stride * (i + 1); mbd->frame_type = cm->frame_type; mb->src = *cpi->Source; mbd->pre = cm->yv12_fb[cm->lst_fb_idx]; mbd->dst = cm->yv12_fb[cm->new_fb_idx]; mb->src.y_buffer += 16 * x->src.y_stride * (i + 1); mb->src.u_buffer += 8 * x->src.uv_stride * (i + 1); mb->src.v_buffer += 8 * x->src.uv_stride * (i + 1); vp8_build_block_offsets(mb); mbd->left_context = &cm->left_context; mb->mvc = cm->fc.mvc; setup_mbby_copy(&mbr_ei[i].mb, x); mbd->fullpixel_mask = 0xffffffff; if (cm->full_pixel) mbd->fullpixel_mask = 0xfffffff8; vp8_zero(mb->coef_counts); vp8_zero(x->ymode_count); mb->skip_true_count = 0; vp8_zero(mb->MVcount); mb->prediction_error = 0; mb->intra_error = 0; vp8_zero(mb->count_mb_ref_frame_usage); mb->mbs_tested_so_far = 0; mb->mbs_zero_last_dot_suppress = 0; } } int vp8cx_create_encoder_threads(VP8_COMP *cpi) { const VP8_COMMON *cm = &cpi->common; vpx_atomic_init(&cpi->b_multi_threaded, 0); cpi->encoding_thread_count = 0; cpi->b_lpf_running = 0; if (cm->processor_core_count > 1 && cpi->oxcf.multi_threaded > 1) { int ithread; int th_count = cpi->oxcf.multi_threaded - 1; int rc = 0; /* don't allocate more threads than cores available */ if (cpi->oxcf.multi_threaded > cm->processor_core_count) { th_count = cm->processor_core_count - 1; } /* we have th_count + 1 (main) threads processing one row each */ /* no point to have more threads than the sync range allows */ if (th_count > ((cm->mb_cols / cpi->mt_sync_range) - 1)) { th_count = (cm->mb_cols / cpi->mt_sync_range) - 1; } if (th_count == 0) return 0; CHECK_MEM_ERROR(cpi->h_encoding_thread, vpx_malloc(sizeof(pthread_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_start_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->h_event_end_encoding, vpx_malloc(sizeof(sem_t) * th_count)); CHECK_MEM_ERROR(cpi->mb_row_ei, vpx_memalign(32, sizeof(MB_ROW_COMP) * th_count)); memset(cpi->mb_row_ei, 0, sizeof(MB_ROW_COMP) * th_count); CHECK_MEM_ERROR(cpi->en_thread_data, vpx_malloc(sizeof(ENCODETHREAD_DATA) * th_count)); vpx_atomic_store_release(&cpi->b_multi_threaded, 1); cpi->encoding_thread_count = th_count; /* printf("[VP8:] multi_threaded encoding is enabled with %d threads\n\n", (cpi->encoding_thread_count +1)); */ for (ithread = 0; ithread < th_count; ++ithread) { ENCODETHREAD_DATA *ethd = &cpi->en_thread_data[ithread]; /* Setup block ptrs and offsets */ vp8_setup_block_ptrs(&cpi->mb_row_ei[ithread].mb); vp8_setup_block_dptrs(&cpi->mb_row_ei[ithread].mb.e_mbd); sem_init(&cpi->h_event_start_encoding[ithread], 0, 0); sem_init(&cpi->h_event_end_encoding[ithread], 0, 0); ethd->ithread = ithread; ethd->ptr1 = (void *)cpi; ethd->ptr2 = (void *)&cpi->mb_row_ei[ithread]; rc = pthread_create(&cpi->h_encoding_thread[ithread], 0, thread_encoding_proc, ethd); if (rc) break; } if (rc) { /* shutdown other threads */ vpx_atomic_store_release(&cpi->b_multi_threaded, 0); for (--ithread; ithread >= 0; ithread--) { pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); sem_destroy(&cpi->h_event_end_encoding[ithread]); } /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_event_end_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -1; } { LPFTHREAD_DATA *lpfthd = &cpi->lpf_thread_data; sem_init(&cpi->h_event_start_lpf, 0, 0); sem_init(&cpi->h_event_end_lpf, 0, 0); lpfthd->ptr1 = (void *)cpi; rc = pthread_create(&cpi->h_filter_thread, 0, thread_loopfilter, lpfthd); if (rc) { /* shutdown other threads */ vpx_atomic_store_release(&cpi->b_multi_threaded, 0); for (--ithread; ithread >= 0; ithread--) { sem_post(&cpi->h_event_start_encoding[ithread]); sem_post(&cpi->h_event_end_encoding[ithread]); pthread_join(cpi->h_encoding_thread[ithread], 0); sem_destroy(&cpi->h_event_start_encoding[ithread]); sem_destroy(&cpi->h_event_end_encoding[ithread]); } sem_destroy(&cpi->h_event_end_lpf); sem_destroy(&cpi->h_event_start_lpf); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_event_end_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); return -2; } } } return 0; } void vp8cx_remove_encoder_threads(VP8_COMP *cpi) { if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { /* shutdown other threads */ vpx_atomic_store_release(&cpi->b_multi_threaded, 0); { int i; for (i = 0; i < cpi->encoding_thread_count; ++i) { sem_post(&cpi->h_event_start_encoding[i]); sem_post(&cpi->h_event_end_encoding[i]); pthread_join(cpi->h_encoding_thread[i], 0); sem_destroy(&cpi->h_event_start_encoding[i]); sem_destroy(&cpi->h_event_end_encoding[i]); } sem_post(&cpi->h_event_start_lpf); pthread_join(cpi->h_filter_thread, 0); } sem_destroy(&cpi->h_event_end_lpf); sem_destroy(&cpi->h_event_start_lpf); /* free thread related resources */ vpx_free(cpi->h_event_start_encoding); vpx_free(cpi->h_event_end_encoding); vpx_free(cpi->h_encoding_thread); vpx_free(cpi->mb_row_ei); vpx_free(cpi->en_thread_data); } } #endif libvpx-1.8.2/vp8/encoder/ethreading.h000066400000000000000000000016341357355204000174550ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ETHREADING_H_ #define VPX_VP8_ENCODER_ETHREADING_H_ #include "vp8/encoder/onyx_int.h" #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; struct macroblock; void vp8cx_init_mbrthread_data(struct VP8_COMP *cpi, struct macroblock *x, MB_ROW_COMP *mbr_ei, int count); int vp8cx_create_encoder_threads(struct VP8_COMP *cpi); void vp8cx_remove_encoder_threads(struct VP8_COMP *cpi); #ifdef __cplusplus } #endif #endif // VPX_VP8_ENCODER_ETHREADING_H_ libvpx-1.8.2/vp8/encoder/firstpass.c000066400000000000000000003270731357355204000173640ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "block.h" #include "onyx_int.h" #include "vpx_dsp/variance.h" #include "encodeintra.h" #include "vp8/common/common.h" #include "vp8/common/setupintrarecon.h" #include "vp8/common/systemdependent.h" #include "mcomp.h" #include "firstpass.h" #include "vpx_scale/vpx_scale.h" #include "encodemb.h" #include "vp8/common/extend.h" #include "vpx_ports/system_state.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/swapyv12buffer.h" #include "rdopt.h" #include "vp8/common/quant_common.h" #include "encodemv.h" #include "encodeframe.h" #define OUTPUT_FPF 0 extern void vp8cx_frame_init_quantizer(VP8_COMP *cpi); #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] extern int vp8_kf_boost_qadjustment[QINDEX_RANGE]; extern const int vp8_gf_boost_qadjustment[QINDEX_RANGE]; #define IIFACTOR 1.5 #define IIKFACTOR1 1.40 #define IIKFACTOR2 1.5 #define RMAX 14.0 #define GF_RMAX 48.0 #define KF_MB_INTRA_MIN 300 #define GF_MB_INTRA_MIN 200 #define DOUBLE_DIVIDE_CHECK(X) ((X) < 0 ? (X)-.000001 : (X) + .000001) #define POW1 (double)cpi->oxcf.two_pass_vbrbias / 100.0 #define POW2 (double)cpi->oxcf.two_pass_vbrbias / 100.0 #define NEW_BOOST 1 static int vscale_lookup[7] = { 0, 1, 1, 2, 2, 3, 3 }; static int hscale_lookup[7] = { 0, 0, 1, 1, 2, 2, 3 }; static const int cq_level[QINDEX_RANGE] = { 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 24, 24, 25, 26, 27, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38, 39, 39, 40, 41, 42, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 }; static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame); /* Resets the first pass file to the given position using a relative seek * from the current position */ static void reset_fpf_position(VP8_COMP *cpi, FIRSTPASS_STATS *Position) { cpi->twopass.stats_in = Position; } static int lookup_next_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *next_frame) { if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) return EOF; *next_frame = *cpi->twopass.stats_in; return 1; } /* Read frame stats at an offset from the current position */ static int read_frame_stats(VP8_COMP *cpi, FIRSTPASS_STATS *frame_stats, int offset) { FIRSTPASS_STATS *fps_ptr = cpi->twopass.stats_in; /* Check legality of offset */ if (offset >= 0) { if (&fps_ptr[offset] >= cpi->twopass.stats_in_end) return EOF; } else if (offset < 0) { if (&fps_ptr[offset] < cpi->twopass.stats_in_start) return EOF; } *frame_stats = fps_ptr[offset]; return 1; } static int input_stats(VP8_COMP *cpi, FIRSTPASS_STATS *fps) { if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) return EOF; *fps = *cpi->twopass.stats_in; cpi->twopass.stats_in = (void *)((char *)cpi->twopass.stats_in + sizeof(FIRSTPASS_STATS)); return 1; } static void output_stats(struct vpx_codec_pkt_list *pktlist, FIRSTPASS_STATS *stats) { struct vpx_codec_cx_pkt pkt; pkt.kind = VPX_CODEC_STATS_PKT; pkt.data.twopass_stats.buf = stats; pkt.data.twopass_stats.sz = sizeof(FIRSTPASS_STATS); vpx_codec_pkt_list_add(pktlist, &pkt); /* TEMP debug code */ #if OUTPUT_FPF { FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); fprintf(fpfile, "%12.0f %12.0f %12.0f %12.4f %12.4f %12.4f %12.4f" " %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f %12.4f" " %12.0f %12.0f %12.4f\n", stats->frame, stats->intra_error, stats->coded_error, stats->ssim_weighted_pred_err, stats->pcnt_inter, stats->pcnt_motion, stats->pcnt_second_ref, stats->pcnt_neutral, stats->MVr, stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv, stats->MVcv, stats->mv_in_out_count, stats->new_mv_count, stats->count, stats->duration); fclose(fpfile); } #endif } static void zero_stats(FIRSTPASS_STATS *section) { section->frame = 0.0; section->intra_error = 0.0; section->coded_error = 0.0; section->ssim_weighted_pred_err = 0.0; section->pcnt_inter = 0.0; section->pcnt_motion = 0.0; section->pcnt_second_ref = 0.0; section->pcnt_neutral = 0.0; section->MVr = 0.0; section->mvr_abs = 0.0; section->MVc = 0.0; section->mvc_abs = 0.0; section->MVrv = 0.0; section->MVcv = 0.0; section->mv_in_out_count = 0.0; section->new_mv_count = 0.0; section->count = 0.0; section->duration = 1.0; } static void accumulate_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { section->frame += frame->frame; section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->ssim_weighted_pred_err += frame->ssim_weighted_pred_err; section->pcnt_inter += frame->pcnt_inter; section->pcnt_motion += frame->pcnt_motion; section->pcnt_second_ref += frame->pcnt_second_ref; section->pcnt_neutral += frame->pcnt_neutral; section->MVr += frame->MVr; section->mvr_abs += frame->mvr_abs; section->MVc += frame->MVc; section->mvc_abs += frame->mvc_abs; section->MVrv += frame->MVrv; section->MVcv += frame->MVcv; section->mv_in_out_count += frame->mv_in_out_count; section->new_mv_count += frame->new_mv_count; section->count += frame->count; section->duration += frame->duration; } static void subtract_stats(FIRSTPASS_STATS *section, FIRSTPASS_STATS *frame) { section->frame -= frame->frame; section->intra_error -= frame->intra_error; section->coded_error -= frame->coded_error; section->ssim_weighted_pred_err -= frame->ssim_weighted_pred_err; section->pcnt_inter -= frame->pcnt_inter; section->pcnt_motion -= frame->pcnt_motion; section->pcnt_second_ref -= frame->pcnt_second_ref; section->pcnt_neutral -= frame->pcnt_neutral; section->MVr -= frame->MVr; section->mvr_abs -= frame->mvr_abs; section->MVc -= frame->MVc; section->mvc_abs -= frame->mvc_abs; section->MVrv -= frame->MVrv; section->MVcv -= frame->MVcv; section->mv_in_out_count -= frame->mv_in_out_count; section->new_mv_count -= frame->new_mv_count; section->count -= frame->count; section->duration -= frame->duration; } static void avg_stats(FIRSTPASS_STATS *section) { if (section->count < 1.0) return; section->intra_error /= section->count; section->coded_error /= section->count; section->ssim_weighted_pred_err /= section->count; section->pcnt_inter /= section->count; section->pcnt_second_ref /= section->count; section->pcnt_neutral /= section->count; section->pcnt_motion /= section->count; section->MVr /= section->count; section->mvr_abs /= section->count; section->MVc /= section->count; section->mvc_abs /= section->count; section->MVrv /= section->count; section->MVcv /= section->count; section->mv_in_out_count /= section->count; section->duration /= section->count; } /* Calculate a modified Error used in distributing bits between easier * and harder frames */ static double calculate_modified_err(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { double av_err = (cpi->twopass.total_stats.ssim_weighted_pred_err / cpi->twopass.total_stats.count); double this_err = this_frame->ssim_weighted_pred_err; double modified_err; if (this_err > av_err) { modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW1); } else { modified_err = av_err * pow((this_err / DOUBLE_DIVIDE_CHECK(av_err)), POW2); } return modified_err; } static const double weight_table[256] = { 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.020000, 0.031250, 0.062500, 0.093750, 0.125000, 0.156250, 0.187500, 0.218750, 0.250000, 0.281250, 0.312500, 0.343750, 0.375000, 0.406250, 0.437500, 0.468750, 0.500000, 0.531250, 0.562500, 0.593750, 0.625000, 0.656250, 0.687500, 0.718750, 0.750000, 0.781250, 0.812500, 0.843750, 0.875000, 0.906250, 0.937500, 0.968750, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 1.000000 }; static double simple_weight(YV12_BUFFER_CONFIG *source) { int i, j; unsigned char *src = source->y_buffer; double sum_weights = 0.0; /* Loop throught the Y plane raw examining levels and creating a weight * for the image */ i = source->y_height; do { j = source->y_width; do { sum_weights += weight_table[*src]; src++; } while (--j); src -= source->y_width; src += source->y_stride; } while (--i); sum_weights /= (source->y_height * source->y_width); return sum_weights; } /* This function returns the current per frame maximum bitrate target */ static int frame_max_bits(VP8_COMP *cpi) { /* Max allocation for a single frame based on the max section guidelines * passed in and how many bits are left */ int max_bits; /* For CBR we need to also consider buffer fullness. * If we are running below the optimal level then we need to gradually * tighten up on max_bits. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double buffer_fullness_ratio = (double)cpi->buffer_level / DOUBLE_DIVIDE_CHECK((double)cpi->oxcf.optimal_buffer_level); /* For CBR base this on the target average bits per frame plus the * maximum sedction rate passed in by the user */ max_bits = (int)(cpi->av_per_frame_bandwidth * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); /* If our buffer is below the optimum level */ if (buffer_fullness_ratio < 1.0) { /* The lower of max_bits / 4 or cpi->av_per_frame_bandwidth / 4. */ int min_max_bits = ((cpi->av_per_frame_bandwidth >> 2) < (max_bits >> 2)) ? cpi->av_per_frame_bandwidth >> 2 : max_bits >> 2; max_bits = (int)(max_bits * buffer_fullness_ratio); /* Lowest value we will set ... which should allow the buffer to * refill. */ if (max_bits < min_max_bits) max_bits = min_max_bits; } } /* VBR */ else { /* For VBR base this on the bits and frames left plus the * two_pass_vbrmax_section rate passed in by the user */ max_bits = (int)(((double)cpi->twopass.bits_left / (cpi->twopass.total_stats.count - (double)cpi->common.current_video_frame)) * ((double)cpi->oxcf.two_pass_vbrmax_section / 100.0)); } /* Trap case where we are out of bits */ if (max_bits < 0) max_bits = 0; return max_bits; } void vp8_init_first_pass(VP8_COMP *cpi) { zero_stats(&cpi->twopass.total_stats); } void vp8_end_first_pass(VP8_COMP *cpi) { output_stats(cpi->output_pkt_list, &cpi->twopass.total_stats); } static void zz_motion_search(MACROBLOCK *x, YV12_BUFFER_CONFIG *raw_buffer, int *raw_motion_err, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { MACROBLOCKD *const xd = &x->e_mbd; BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; unsigned char *src_ptr = (*(b->base_src) + b->src); int src_stride = b->src_stride; unsigned char *raw_ptr; int raw_stride = raw_buffer->y_stride; unsigned char *ref_ptr; int ref_stride = x->e_mbd.pre.y_stride; /* Set up pointers for this macro block raw buffer */ raw_ptr = (unsigned char *)(raw_buffer->y_buffer + recon_yoffset + d->offset); vpx_mse16x16(src_ptr, src_stride, raw_ptr, raw_stride, (unsigned int *)(raw_motion_err)); /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; ref_ptr = (unsigned char *)(xd->pre.y_buffer + d->offset); vpx_mse16x16(src_ptr, src_stride, ref_ptr, ref_stride, (unsigned int *)(best_motion_err)); } static void first_pass_motion_search(VP8_COMP *cpi, MACROBLOCK *x, int_mv *ref_mv, MV *best_mv, YV12_BUFFER_CONFIG *recon_buffer, int *best_motion_err, int recon_yoffset) { MACROBLOCKD *const xd = &x->e_mbd; BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; int num00; int_mv tmp_mv; int_mv ref_mv_full; int tmp_err; int step_param = 3; /* Dont search over full range for first pass */ int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; int n; vp8_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; int new_mv_mode_penalty = 256; /* override the default variance function to use MSE */ v_fn_ptr.vf = vpx_mse16x16; /* Set up pointers for this macro block recon buffer */ xd->pre.y_buffer = recon_buffer->y_buffer + recon_yoffset; /* Initial step/diamond search centred on best mv */ tmp_mv.as_int = 0; ref_mv_full.as_mv.col = ref_mv->as_mv.col >> 3; ref_mv_full.as_mv.row = ref_mv->as_mv.row >> 3; tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, step_param, x->sadperbit16, &num00, &v_fn_ptr, x->mvcost, ref_mv); if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; if (tmp_err < *best_motion_err) { *best_motion_err = tmp_err; best_mv->row = tmp_mv.as_mv.row; best_mv->col = tmp_mv.as_mv.col; } /* Further step/diamond searches as necessary */ n = num00; num00 = 0; while (n < further_steps) { n++; if (num00) { num00--; } else { tmp_err = cpi->diamond_search_sad(x, b, d, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, &num00, &v_fn_ptr, x->mvcost, ref_mv); if (tmp_err < INT_MAX - new_mv_mode_penalty) { tmp_err += new_mv_mode_penalty; } if (tmp_err < *best_motion_err) { *best_motion_err = tmp_err; best_mv->row = tmp_mv.as_mv.row; best_mv->col = tmp_mv.as_mv.col; } } } } void vp8_first_pass(VP8_COMP *cpi) { int mb_row, mb_col; MACROBLOCK *const x = &cpi->mb; VP8_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; int recon_yoffset, recon_uvoffset; YV12_BUFFER_CONFIG *lst_yv12 = &cm->yv12_fb[cm->lst_fb_idx]; YV12_BUFFER_CONFIG *new_yv12 = &cm->yv12_fb[cm->new_fb_idx]; YV12_BUFFER_CONFIG *gld_yv12 = &cm->yv12_fb[cm->gld_fb_idx]; int recon_y_stride = lst_yv12->y_stride; int recon_uv_stride = lst_yv12->uv_stride; int64_t intra_error = 0; int64_t coded_error = 0; int sum_mvr = 0, sum_mvc = 0; int sum_mvr_abs = 0, sum_mvc_abs = 0; int sum_mvrs = 0, sum_mvcs = 0; int mvcount = 0; int intercount = 0; int second_ref_count = 0; int intrapenalty = 256; int neutral_count = 0; int new_mv_count = 0; int sum_in_vectors = 0; uint32_t lastmv_as_int = 0; int_mv zero_ref_mv; zero_ref_mv.as_int = 0; vpx_clear_system_state(); x->src = *cpi->Source; xd->pre = *lst_yv12; xd->dst = *new_yv12; x->partition_info = x->pi; xd->mode_info_context = cm->mi; if (!cm->use_bilinear_mc_filter) { xd->subpixel_predict = vp8_sixtap_predict4x4; xd->subpixel_predict8x4 = vp8_sixtap_predict8x4; xd->subpixel_predict8x8 = vp8_sixtap_predict8x8; xd->subpixel_predict16x16 = vp8_sixtap_predict16x16; } else { xd->subpixel_predict = vp8_bilinear_predict4x4; xd->subpixel_predict8x4 = vp8_bilinear_predict8x4; xd->subpixel_predict8x8 = vp8_bilinear_predict8x8; xd->subpixel_predict16x16 = vp8_bilinear_predict16x16; } vp8_build_block_offsets(x); /* set up frame new frame for intra coded blocks */ vp8_setup_intra_recon(new_yv12); vp8cx_frame_init_quantizer(cpi); /* Initialise the MV cost table to the defaults */ { int flag[2] = { 1, 1 }; vp8_initialize_rd_consts(cpi, x, vp8_dc_quant(cm->base_qindex, cm->y1dc_delta_q)); memcpy(cm->fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); vp8_build_component_cost_table(cpi->mb.mvcost, (const MV_CONTEXT *)cm->fc.mvc, flag); } /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { int_mv best_ref_mv; best_ref_mv.as_int = 0; /* reset above block coeffs */ xd->up_available = (mb_row != 0); recon_yoffset = (mb_row * recon_y_stride * 16); recon_uvoffset = (mb_row * recon_uv_stride * 8); /* Set up limit values for motion vectors to prevent them extending * outside the UMV borders */ x->mv_row_min = -((mb_row * 16) + (VP8BORDERINPIXELS - 16)); x->mv_row_max = ((cm->mb_rows - 1 - mb_row) * 16) + (VP8BORDERINPIXELS - 16); /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int this_error; int gf_motion_error = INT_MAX; int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); xd->dst.y_buffer = new_yv12->y_buffer + recon_yoffset; xd->dst.u_buffer = new_yv12->u_buffer + recon_uvoffset; xd->dst.v_buffer = new_yv12->v_buffer + recon_uvoffset; xd->left_available = (mb_col != 0); /* Copy current mb to a buffer */ vp8_copy_mem16x16(x->src.y_buffer, x->src.y_stride, x->thismb, 16); /* do intra 16x16 prediction */ this_error = vp8_encode_intra(cpi, x, use_dc_pred); /* "intrapenalty" below deals with situations where the intra * and inter error scores are very low (eg a plain black frame) * We do not have special cases in first pass for 0,0 and * nearest etc so all inter modes carry an overhead cost * estimate fot the mv. When the error score is very low this * causes us to pick all or lots of INTRA modes and throw lots * of key frames. This penalty adds a cost matching that of a * 0,0 mv to the intra case. */ this_error += intrapenalty; /* Cumulative intra error total */ intra_error += (int64_t)this_error; /* Set up limit values for motion vectors to prevent them * extending outside the UMV borders */ x->mv_col_min = -((mb_col * 16) + (VP8BORDERINPIXELS - 16)); x->mv_col_max = ((cm->mb_cols - 1 - mb_col) * 16) + (VP8BORDERINPIXELS - 16); /* Other than for the first frame do a motion search */ if (cm->current_video_frame > 0) { BLOCKD *d = &x->e_mbd.block[0]; MV tmp_mv = { 0, 0 }; int tmp_err; int motion_error = INT_MAX; int raw_motion_error = INT_MAX; /* Simple 0,0 motion with no mv overhead */ zz_motion_search(x, cpi->last_frame_unscaled_source, &raw_motion_error, lst_yv12, &motion_error, recon_yoffset); d->bmi.mv.as_mv.row = 0; d->bmi.mv.as_mv.col = 0; if (raw_motion_error < cpi->oxcf.encode_breakout) { goto skip_motion_search; } /* Test last reference frame using the previous best mv as the * starting point (best reference) for the search */ first_pass_motion_search(cpi, x, &best_ref_mv, &d->bmi.mv.as_mv, lst_yv12, &motion_error, recon_yoffset); /* If the current best reference mv is not centred on 0,0 * then do a 0,0 based search as well */ if (best_ref_mv.as_int) { tmp_err = INT_MAX; first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, lst_yv12, &tmp_err, recon_yoffset); if (tmp_err < motion_error) { motion_error = tmp_err; d->bmi.mv.as_mv.row = tmp_mv.row; d->bmi.mv.as_mv.col = tmp_mv.col; } } /* Experimental search in a second reference frame ((0,0) * based only) */ if (cm->current_video_frame > 1) { first_pass_motion_search(cpi, x, &zero_ref_mv, &tmp_mv, gld_yv12, &gf_motion_error, recon_yoffset); if ((gf_motion_error < motion_error) && (gf_motion_error < this_error)) { second_ref_count++; } /* Reset to last frame as reference buffer */ xd->pre.y_buffer = lst_yv12->y_buffer + recon_yoffset; xd->pre.u_buffer = lst_yv12->u_buffer + recon_uvoffset; xd->pre.v_buffer = lst_yv12->v_buffer + recon_uvoffset; } skip_motion_search: /* Intra assumed best */ best_ref_mv.as_int = 0; if (motion_error <= this_error) { /* Keep a count of cases where the inter and intra were * very close and very low. This helps with scene cut * detection for example in cropped clips with black bars * at the sides or top and bottom. */ if ((((this_error - intrapenalty) * 9) <= (motion_error * 10)) && (this_error < (2 * intrapenalty))) { neutral_count++; } d->bmi.mv.as_mv.row *= 8; d->bmi.mv.as_mv.col *= 8; this_error = motion_error; vp8_set_mbmode_and_mvs(x, NEWMV, &d->bmi.mv); vp8_encode_inter16x16y(x); sum_mvr += d->bmi.mv.as_mv.row; sum_mvr_abs += abs(d->bmi.mv.as_mv.row); sum_mvc += d->bmi.mv.as_mv.col; sum_mvc_abs += abs(d->bmi.mv.as_mv.col); sum_mvrs += d->bmi.mv.as_mv.row * d->bmi.mv.as_mv.row; sum_mvcs += d->bmi.mv.as_mv.col * d->bmi.mv.as_mv.col; intercount++; best_ref_mv.as_int = d->bmi.mv.as_int; /* Was the vector non-zero */ if (d->bmi.mv.as_int) { mvcount++; /* Was it different from the last non zero vector */ if (d->bmi.mv.as_int != lastmv_as_int) new_mv_count++; lastmv_as_int = d->bmi.mv.as_int; /* Does the Row vector point inwards or outwards */ if (mb_row < cm->mb_rows / 2) { if (d->bmi.mv.as_mv.row > 0) { sum_in_vectors--; } else if (d->bmi.mv.as_mv.row < 0) { sum_in_vectors++; } } else if (mb_row > cm->mb_rows / 2) { if (d->bmi.mv.as_mv.row > 0) { sum_in_vectors++; } else if (d->bmi.mv.as_mv.row < 0) { sum_in_vectors--; } } /* Does the Row vector point inwards or outwards */ if (mb_col < cm->mb_cols / 2) { if (d->bmi.mv.as_mv.col > 0) { sum_in_vectors--; } else if (d->bmi.mv.as_mv.col < 0) { sum_in_vectors++; } } else if (mb_col > cm->mb_cols / 2) { if (d->bmi.mv.as_mv.col > 0) { sum_in_vectors++; } else if (d->bmi.mv.as_mv.col < 0) { sum_in_vectors--; } } } } } coded_error += (int64_t)this_error; /* adjust to the next column of macroblocks */ x->src.y_buffer += 16; x->src.u_buffer += 8; x->src.v_buffer += 8; recon_yoffset += 16; recon_uvoffset += 8; } /* adjust to the next row of mbs */ x->src.y_buffer += 16 * x->src.y_stride - 16 * cm->mb_cols; x->src.u_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; x->src.v_buffer += 8 * x->src.uv_stride - 8 * cm->mb_cols; /* extend the recon for intra prediction */ vp8_extend_mb_row(new_yv12, xd->dst.y_buffer + 16, xd->dst.u_buffer + 8, xd->dst.v_buffer + 8); vpx_clear_system_state(); } vpx_clear_system_state(); { double weight = 0.0; FIRSTPASS_STATS fps; fps.frame = cm->current_video_frame; fps.intra_error = (double)(intra_error >> 8); fps.coded_error = (double)(coded_error >> 8); weight = simple_weight(cpi->Source); if (weight < 0.1) weight = 0.1; fps.ssim_weighted_pred_err = fps.coded_error * weight; fps.pcnt_inter = 0.0; fps.pcnt_motion = 0.0; fps.MVr = 0.0; fps.mvr_abs = 0.0; fps.MVc = 0.0; fps.mvc_abs = 0.0; fps.MVrv = 0.0; fps.MVcv = 0.0; fps.mv_in_out_count = 0.0; fps.new_mv_count = 0.0; fps.count = 1.0; fps.pcnt_inter = 1.0 * (double)intercount / cm->MBs; fps.pcnt_second_ref = 1.0 * (double)second_ref_count / cm->MBs; fps.pcnt_neutral = 1.0 * (double)neutral_count / cm->MBs; if (mvcount > 0) { fps.MVr = (double)sum_mvr / (double)mvcount; fps.mvr_abs = (double)sum_mvr_abs / (double)mvcount; fps.MVc = (double)sum_mvc / (double)mvcount; fps.mvc_abs = (double)sum_mvc_abs / (double)mvcount; fps.MVrv = ((double)sum_mvrs - (fps.MVr * fps.MVr / (double)mvcount)) / (double)mvcount; fps.MVcv = ((double)sum_mvcs - (fps.MVc * fps.MVc / (double)mvcount)) / (double)mvcount; fps.mv_in_out_count = (double)sum_in_vectors / (double)(mvcount * 2); fps.new_mv_count = new_mv_count; fps.pcnt_motion = 1.0 * (double)mvcount / cpi->common.MBs; } /* TODO: handle the case when duration is set to 0, or something less * than the full time between subsequent cpi->source_time_stamps */ fps.duration = (double)(cpi->source->ts_end - cpi->source->ts_start); /* don't want to do output stats with a stack variable! */ memcpy(&cpi->twopass.this_frame_stats, &fps, sizeof(FIRSTPASS_STATS)); output_stats(cpi->output_pkt_list, &cpi->twopass.this_frame_stats); accumulate_stats(&cpi->twopass.total_stats, &fps); } /* Copy the previous Last Frame into the GF buffer if specific * conditions for doing so are met */ if ((cm->current_video_frame > 0) && (cpi->twopass.this_frame_stats.pcnt_inter > 0.20) && ((cpi->twopass.this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(cpi->twopass.this_frame_stats.coded_error)) > 2.0)) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } /* swap frame pointers so last frame refers to the frame we just * compressed */ vp8_swap_yv12_buffer(lst_yv12, new_yv12); vp8_yv12_extend_frame_borders(lst_yv12); /* Special case for the first frame. Copy into the GF buffer as a * second reference. */ if (cm->current_video_frame == 0) { vp8_yv12_copy_frame(lst_yv12, gld_yv12); } /* use this to see what the first pass reconstruction looks like */ if (0) { char filename[512]; FILE *recon_file; sprintf(filename, "enc%04d.yuv", (int)cm->current_video_frame); if (cm->current_video_frame == 0) { recon_file = fopen(filename, "wb"); } else { recon_file = fopen(filename, "ab"); } (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file); fclose(recon_file); } cm->current_video_frame++; } extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; /* Estimate a cost per mb attributable to overheads such as the coding of * modes and motion vectors. * Currently simplistic in its assumptions for testing. */ static double bitcost(double prob) { if (prob > 0.000122) { return -log(prob) / log(2.0); } else { return 13.0; } } static int64_t estimate_modemvcost(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats) { int mv_cost; int64_t mode_cost; double av_pct_inter = fpstats->pcnt_inter / fpstats->count; double av_pct_motion = fpstats->pcnt_motion / fpstats->count; double av_intra = (1.0 - av_pct_inter); double zz_cost; double motion_cost; double intra_cost; zz_cost = bitcost(av_pct_inter - av_pct_motion); motion_cost = bitcost(av_pct_motion); intra_cost = bitcost(av_intra); /* Estimate of extra bits per mv overhead for mbs * << 9 is the normalization to the (bits * 512) used in vp8_bits_per_mb */ mv_cost = ((int)(fpstats->new_mv_count / fpstats->count) * 8) << 9; /* Crude estimate of overhead cost from modes * << 9 is the normalization to (bits * 512) used in vp8_bits_per_mb */ mode_cost = (int64_t)((((av_pct_inter - av_pct_motion) * zz_cost) + (av_pct_motion * motion_cost) + (av_intra * intra_cost)) * cpi->common.MBs) * 512; return mv_cost + mode_cost; } static double calc_correction_factor(double err_per_mb, double err_devisor, double pt_low, double pt_high, int Q) { double power_term; double error_term = err_per_mb / err_devisor; double correction_factor; /* Adjustment based on Q to power term. */ power_term = pt_low + (Q * 0.01); power_term = (power_term > pt_high) ? pt_high : power_term; /* Adjustments to error term */ /* TBD */ /* Calculate correction factor */ correction_factor = pow(error_term, power_term); /* Clip range */ correction_factor = (correction_factor < 0.05) ? 0.05 : (correction_factor > 5.0) ? 5.0 : correction_factor; return correction_factor; } static int estimate_max_q(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh, int overhead_bits) { int Q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; double section_err = (fpstats->coded_error / fpstats->count); double err_per_mb = section_err / num_mbs; double err_correction_factor; double speed_correction = 1.0; int overhead_bits_per_mb; if (section_target_bandwitdh <= 0) { return cpi->twopass.maxq_max_limit; /* Highest value allowed */ } target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); /* Calculate a corrective factor based on a rolling ratio of bits spent * vs target bits */ if ((cpi->rolling_target_bits > 0) && (cpi->active_worst_quality < cpi->worst_quality)) { double rolling_ratio; rolling_ratio = (double)cpi->rolling_actual_bits / (double)cpi->rolling_target_bits; if (rolling_ratio < 0.95) { cpi->twopass.est_max_qcorrection_factor -= 0.005; } else if (rolling_ratio > 1.05) { cpi->twopass.est_max_qcorrection_factor += 0.005; } cpi->twopass.est_max_qcorrection_factor = (cpi->twopass.est_max_qcorrection_factor < 0.1) ? 0.1 : (cpi->twopass.est_max_qcorrection_factor > 10.0) ? 10.0 : cpi->twopass.est_max_qcorrection_factor; } /* Corrections for higher compression speed settings * (reduced compression expected) */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) { speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); } else { speed_correction = 1.25; } } /* Estimate of overhead bits per mb */ /* Correction to overhead bits for min allowed Q. */ overhead_bits_per_mb = overhead_bits / num_mbs; overhead_bits_per_mb = (int)(overhead_bits_per_mb * pow(0.98, (double)cpi->twopass.maxq_min_limit)); /* Try and pick a max Q that will be high enough to encode the * content at the given rate. */ for (Q = cpi->twopass.maxq_min_limit; Q < cpi->twopass.maxq_max_limit; ++Q) { int bits_per_mb_at_this_q; /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); bits_per_mb_at_this_q = vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * speed_correction * cpi->twopass.est_max_qcorrection_factor * cpi->twopass.section_max_qfactor * (double)bits_per_mb_at_this_q); /* Mode and motion overhead */ /* As Q rises in real encode loop rd code will force overhead down * We make a crude adjustment for this here as *.98 per Q step. */ overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } /* Restriction on active max q for constrained quality mode. */ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < cpi->cq_target_quality)) { Q = cpi->cq_target_quality; } /* Adjust maxq_min_limit and maxq_max_limit limits based on * average q observed in clip for non kf/gf.arf frames * Give average a chance to settle though. */ if ((cpi->ni_frames > ((int)cpi->twopass.total_stats.count >> 8)) && (cpi->ni_frames > 150)) { cpi->twopass.maxq_max_limit = ((cpi->ni_av_qi + 32) < cpi->worst_quality) ? (cpi->ni_av_qi + 32) : cpi->worst_quality; cpi->twopass.maxq_min_limit = ((cpi->ni_av_qi - 32) > cpi->best_quality) ? (cpi->ni_av_qi - 32) : cpi->best_quality; } return Q; } /* For cq mode estimate a cq level that matches the observed * complexity and data rate. */ static int estimate_cq(VP8_COMP *cpi, FIRSTPASS_STATS *fpstats, int section_target_bandwitdh, int overhead_bits) { int Q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; double section_err = (fpstats->coded_error / fpstats->count); double err_per_mb = section_err / num_mbs; double err_correction_factor; double speed_correction = 1.0; double clip_iiratio; double clip_iifactor; int overhead_bits_per_mb; if (0) { FILE *f = fopen("epmp.stt", "a"); fprintf(f, "%10.2f\n", err_per_mb); fclose(f); } target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); /* Estimate of overhead bits per mb */ overhead_bits_per_mb = overhead_bits / num_mbs; /* Corrections for higher compression speed settings * (reduced compression expected) */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) { speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); } else { speed_correction = 1.25; } } /* II ratio correction factor for clip as a whole */ clip_iiratio = cpi->twopass.total_stats.intra_error / DOUBLE_DIVIDE_CHECK(cpi->twopass.total_stats.coded_error); clip_iifactor = 1.0 - ((clip_iiratio - 10.0) * 0.025); if (clip_iifactor < 0.80) clip_iifactor = 0.80; /* Try and pick a Q that can encode the content at the given rate. */ for (Q = 0; Q < MAXQ; ++Q) { int bits_per_mb_at_this_q; /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 100.0, 0.40, 0.90, Q); bits_per_mb_at_this_q = vp8_bits_per_mb[INTER_FRAME][Q] + overhead_bits_per_mb; bits_per_mb_at_this_q = (int)(.5 + err_correction_factor * speed_correction * clip_iifactor * (double)bits_per_mb_at_this_q); /* Mode and motion overhead */ /* As Q rises in real encode loop rd code will force overhead down * We make a crude adjustment for this here as *.98 per Q step. */ overhead_bits_per_mb = (int)((double)overhead_bits_per_mb * 0.98); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } /* Clip value to range "best allowed to (worst allowed - 1)" */ Q = cq_level[Q]; if (Q >= cpi->worst_quality) Q = cpi->worst_quality - 1; if (Q < cpi->best_quality) Q = cpi->best_quality; return Q; } static int estimate_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh) { int Q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb; double err_per_mb = section_err / num_mbs; double err_correction_factor; double speed_correction = 1.0; target_norm_bits_per_mb = (section_target_bandwitdh < (1 << 20)) ? (512 * section_target_bandwitdh) / num_mbs : 512 * (section_target_bandwitdh / num_mbs); /* Corrections for higher compression speed settings * (reduced compression expected) */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) { speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); } else { speed_correction = 1.25; } } /* Try and pick a Q that can encode the content at the given rate. */ for (Q = 0; Q < MAXQ; ++Q) { int bits_per_mb_at_this_q; /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, 0.40, 0.90, Q); bits_per_mb_at_this_q = (int)(.5 + (err_correction_factor * speed_correction * cpi->twopass.est_max_qcorrection_factor * (double)vp8_bits_per_mb[INTER_FRAME][Q] / 1.0)); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } return Q; } /* Estimate a worst case Q for a KF group */ static int estimate_kf_group_q(VP8_COMP *cpi, double section_err, int section_target_bandwitdh, double group_iiratio) { int Q; int num_mbs = cpi->common.MBs; int target_norm_bits_per_mb = (512 * section_target_bandwitdh) / num_mbs; int bits_per_mb_at_this_q; double err_per_mb = section_err / num_mbs; double err_correction_factor; double speed_correction = 1.0; double current_spend_ratio = 1.0; double pow_highq = (POW1 < 0.6) ? POW1 + 0.3 : 0.90; double pow_lowq = (POW1 < 0.7) ? POW1 + 0.1 : 0.80; double iiratio_correction_factor = 1.0; double combined_correction_factor; /* Trap special case where the target is <= 0 */ if (target_norm_bits_per_mb <= 0) return MAXQ * 2; /* Calculate a corrective factor based on a rolling ratio of bits spent * vs target bits * This is clamped to the range 0.1 to 10.0 */ if (cpi->long_rolling_target_bits <= 0) { current_spend_ratio = 10.0; } else { current_spend_ratio = (double)cpi->long_rolling_actual_bits / (double)cpi->long_rolling_target_bits; current_spend_ratio = (current_spend_ratio > 10.0) ? 10.0 : (current_spend_ratio < 0.1) ? 0.1 : current_spend_ratio; } /* Calculate a correction factor based on the quality of prediction in * the sequence as indicated by intra_inter error score ratio (IIRatio) * The idea here is to favour subsampling in the hardest sections vs * the easyest. */ iiratio_correction_factor = 1.0 - ((group_iiratio - 6.0) * 0.1); if (iiratio_correction_factor < 0.5) iiratio_correction_factor = 0.5; /* Corrections for higher compression speed settings * (reduced compression expected) */ if ((cpi->compressor_speed == 3) || (cpi->compressor_speed == 1)) { if (cpi->oxcf.cpu_used <= 5) { speed_correction = 1.04 + (cpi->oxcf.cpu_used * 0.04); } else { speed_correction = 1.25; } } /* Combine the various factors calculated above */ combined_correction_factor = speed_correction * iiratio_correction_factor * current_spend_ratio; /* Try and pick a Q that should be high enough to encode the content at * the given rate. */ for (Q = 0; Q < MAXQ; ++Q) { /* Error per MB based correction factor */ err_correction_factor = calc_correction_factor(err_per_mb, 150.0, pow_lowq, pow_highq, Q); bits_per_mb_at_this_q = (int)(.5 + (err_correction_factor * combined_correction_factor * (double)vp8_bits_per_mb[INTER_FRAME][Q])); if (bits_per_mb_at_this_q <= target_norm_bits_per_mb) break; } /* If we could not hit the target even at Max Q then estimate what Q * would have been required */ while ((bits_per_mb_at_this_q > target_norm_bits_per_mb) && (Q < (MAXQ * 2))) { bits_per_mb_at_this_q = (int)(0.96 * bits_per_mb_at_this_q); Q++; } if (0) { FILE *f = fopen("estkf_q.stt", "a"); fprintf(f, "%8d %8d %8d %8.2f %8.3f %8.2f %8.3f %8.3f %8.3f %8d\n", cpi->common.current_video_frame, bits_per_mb_at_this_q, target_norm_bits_per_mb, err_per_mb, err_correction_factor, current_spend_ratio, group_iiratio, iiratio_correction_factor, (double)cpi->buffer_level / (double)cpi->oxcf.optimal_buffer_level, Q); fclose(f); } return Q; } void vp8_init_second_pass(VP8_COMP *cpi) { FIRSTPASS_STATS this_frame; FIRSTPASS_STATS *start_pos; double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); zero_stats(&cpi->twopass.total_stats); zero_stats(&cpi->twopass.total_left_stats); if (!cpi->twopass.stats_in_end) return; cpi->twopass.total_stats = *cpi->twopass.stats_in_end; cpi->twopass.total_left_stats = cpi->twopass.total_stats; /* each frame can have a different duration, as the frame rate in the * source isn't guaranteed to be constant. The frame rate prior to * the first frame encoded in the second pass is a guess. However the * sum duration is not. Its calculated based on the actual durations of * all frames from the first pass. */ vp8_new_framerate(cpi, 10000000.0 * cpi->twopass.total_stats.count / cpi->twopass.total_stats.duration); cpi->output_framerate = cpi->framerate; cpi->twopass.bits_left = (int64_t)(cpi->twopass.total_stats.duration * cpi->oxcf.target_bandwidth / 10000000.0); cpi->twopass.bits_left -= (int64_t)(cpi->twopass.total_stats.duration * two_pass_min_rate / 10000000.0); /* Calculate a minimum intra value to be used in determining the IIratio * scores used in the second pass. We have this minimum to make sure * that clips that are static but "low complexity" in the intra domain * are still boosted appropriately for KF/GF/ARF */ cpi->twopass.kf_intra_err_min = KF_MB_INTRA_MIN * cpi->common.MBs; cpi->twopass.gf_intra_err_min = GF_MB_INTRA_MIN * cpi->common.MBs; /* Scan the first pass file and calculate an average Intra / Inter error * score ratio for the sequence */ { double sum_iiratio = 0.0; double IIRatio; start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ while (input_stats(cpi, &this_frame) != EOF) { IIRatio = this_frame.intra_error / DOUBLE_DIVIDE_CHECK(this_frame.coded_error); IIRatio = (IIRatio < 1.0) ? 1.0 : (IIRatio > 20.0) ? 20.0 : IIRatio; sum_iiratio += IIRatio; } cpi->twopass.avg_iiratio = sum_iiratio / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.total_stats.count); /* Reset file position */ reset_fpf_position(cpi, start_pos); } /* Scan the first pass file and calculate a modified total error based * upon the bias/power function used to allocate bits */ { start_pos = cpi->twopass.stats_in; /* Note starting "file" position */ cpi->twopass.modified_error_total = 0.0; cpi->twopass.modified_error_used = 0.0; while (input_stats(cpi, &this_frame) != EOF) { cpi->twopass.modified_error_total += calculate_modified_err(cpi, &this_frame); } cpi->twopass.modified_error_left = cpi->twopass.modified_error_total; reset_fpf_position(cpi, start_pos); /* Reset file position */ } } void vp8_end_second_pass(VP8_COMP *cpi) { (void)cpi; } /* This function gives and estimate of how badly we believe the prediction * quality is decaying from frame to frame. */ static double get_prediction_decay_rate(FIRSTPASS_STATS *next_frame) { double prediction_decay_rate; double motion_decay; double motion_pct = next_frame->pcnt_motion; /* Initial basis is the % mbs inter coded */ prediction_decay_rate = next_frame->pcnt_inter; /* High % motion -> somewhat higher decay rate */ motion_decay = (1.0 - (motion_pct / 20.0)); if (motion_decay < prediction_decay_rate) { prediction_decay_rate = motion_decay; } /* Adjustment to decay rate based on speed of motion */ { double this_mv_rabs; double this_mv_cabs; double distance_factor; this_mv_rabs = fabs(next_frame->mvr_abs * motion_pct); this_mv_cabs = fabs(next_frame->mvc_abs * motion_pct); distance_factor = sqrt((this_mv_rabs * this_mv_rabs) + (this_mv_cabs * this_mv_cabs)) / 250.0; distance_factor = ((distance_factor > 1.0) ? 0.0 : (1.0 - distance_factor)); if (distance_factor < prediction_decay_rate) { prediction_decay_rate = distance_factor; } } return prediction_decay_rate; } /* Function to test for a condition where a complex transition is followed * by a static section. For example in slide shows where there is a fade * between slides. This is to help with more optimal kf and gf positioning. */ static int detect_transition_to_still(VP8_COMP *cpi, int frame_interval, int still_interval, double loop_decay_rate, double decay_accumulator) { int trans_to_still = 0; /* Break clause to detect very still sections after motion * For example a static image after a fade or other transition * instead of a clean scene cut. */ if ((frame_interval > MIN_GF_INTERVAL) && (loop_decay_rate >= 0.999) && (decay_accumulator < 0.9)) { int j; FIRSTPASS_STATS *position = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_next_frame; double decay_rate; /* Look ahead a few frames to see if static condition persists... */ for (j = 0; j < still_interval; ++j) { if (EOF == input_stats(cpi, &tmp_next_frame)) break; decay_rate = get_prediction_decay_rate(&tmp_next_frame); if (decay_rate < 0.999) break; } /* Reset file position */ reset_fpf_position(cpi, position); /* Only if it does do we signal a transition to still */ if (j == still_interval) trans_to_still = 1; } return trans_to_still; } /* This function detects a flash through the high relative pcnt_second_ref * score in the frame following a flash frame. The offset passed in should * reflect this */ static int detect_flash(VP8_COMP *cpi, int offset) { FIRSTPASS_STATS next_frame; int flash_detected = 0; /* Read the frame data. */ /* The return is 0 (no flash detected) if not a valid frame */ if (read_frame_stats(cpi, &next_frame, offset) != EOF) { /* What we are looking for here is a situation where there is a * brief break in prediction (such as a flash) but subsequent frames * are reasonably well predicted by an earlier (pre flash) frame. * The recovery after a flash is indicated by a high pcnt_second_ref * comapred to pcnt_inter. */ if ((next_frame.pcnt_second_ref > next_frame.pcnt_inter) && (next_frame.pcnt_second_ref >= 0.5)) { flash_detected = 1; /*if (1) { FILE *f = fopen("flash.stt", "a"); fprintf(f, "%8.0f %6.2f %6.2f\n", next_frame.frame, next_frame.pcnt_inter, next_frame.pcnt_second_ref); fclose(f); }*/ } } return flash_detected; } /* Update the motion related elements to the GF arf boost calculation */ static void accumulate_frame_motion_stats(FIRSTPASS_STATS *this_frame, double *this_frame_mv_in_out, double *mv_in_out_accumulator, double *abs_mv_in_out_accumulator, double *mv_ratio_accumulator) { double this_frame_mvr_ratio; double this_frame_mvc_ratio; double motion_pct; /* Accumulate motion stats. */ motion_pct = this_frame->pcnt_motion; /* Accumulate Motion In/Out of frame stats */ *this_frame_mv_in_out = this_frame->mv_in_out_count * motion_pct; *mv_in_out_accumulator += this_frame->mv_in_out_count * motion_pct; *abs_mv_in_out_accumulator += fabs(this_frame->mv_in_out_count * motion_pct); /* Accumulate a measure of how uniform (or conversely how random) * the motion field is. (A ratio of absmv / mv) */ if (motion_pct > 0.05) { this_frame_mvr_ratio = fabs(this_frame->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVr)); this_frame_mvc_ratio = fabs(this_frame->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(this_frame->MVc)); *mv_ratio_accumulator += (this_frame_mvr_ratio < this_frame->mvr_abs) ? (this_frame_mvr_ratio * motion_pct) : this_frame->mvr_abs * motion_pct; *mv_ratio_accumulator += (this_frame_mvc_ratio < this_frame->mvc_abs) ? (this_frame_mvc_ratio * motion_pct) : this_frame->mvc_abs * motion_pct; } } /* Calculate a baseline boost number for the current frame. */ static double calc_frame_boost(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame, double this_frame_mv_in_out) { double frame_boost; /* Underlying boost factor is based on inter intra error ratio */ if (this_frame->intra_error > cpi->twopass.gf_intra_err_min) { frame_boost = (IIFACTOR * this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); } else { frame_boost = (IIFACTOR * cpi->twopass.gf_intra_err_min / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)); } /* Increase boost for frames where new data coming into frame * (eg zoom out). Slightly reduce boost if there is a net balance * of motion out of the frame (zoom in). * The range for this_frame_mv_in_out is -1.0 to +1.0 */ if (this_frame_mv_in_out > 0.0) { frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); /* In extreme case boost is halved */ } else { frame_boost += frame_boost * (this_frame_mv_in_out / 2.0); } /* Clip to maximum */ if (frame_boost > GF_RMAX) frame_boost = GF_RMAX; return frame_boost; } #if NEW_BOOST static int calc_arf_boost(VP8_COMP *cpi, int offset, int f_frames, int b_frames, int *f_boost, int *b_boost) { FIRSTPASS_STATS this_frame; int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; double decay_accumulator = 1.0; double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double r; int flash_detected = 0; /* Search forward from the proposed arf/next gf position */ for (i = 0; i < f_frames; ++i) { if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF) break; /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate the baseline boost number for this frame */ r = calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out); /* We want to discount the the flash frame itself and the recovery * frame that follows as both will have poor scores. */ flash_detected = detect_flash(cpi, (i + offset)) || detect_flash(cpi, (i + offset + 1)); /* Cumulative effect of prediction quality decay */ if (!flash_detected) { decay_accumulator = decay_accumulator * get_prediction_decay_rate(&this_frame); decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } boost_score += (decay_accumulator * r); /* Break out conditions. */ if ((!flash_detected) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || (mv_in_out_accumulator < -2.0))) { break; } } *f_boost = (int)(boost_score * 100.0) >> 4; /* Reset for backward looking loop */ boost_score = 0.0; mv_ratio_accumulator = 0.0; decay_accumulator = 1.0; this_frame_mv_in_out = 0.0; mv_in_out_accumulator = 0.0; abs_mv_in_out_accumulator = 0.0; /* Search forward from the proposed arf/next gf position */ for (i = -1; i >= -b_frames; i--) { if (read_frame_stats(cpi, &this_frame, (i + offset)) == EOF) break; /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( &this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate the baseline boost number for this frame */ r = calc_frame_boost(cpi, &this_frame, this_frame_mv_in_out); /* We want to discount the the flash frame itself and the recovery * frame that follows as both will have poor scores. */ flash_detected = detect_flash(cpi, (i + offset)) || detect_flash(cpi, (i + offset + 1)); /* Cumulative effect of prediction quality decay */ if (!flash_detected) { decay_accumulator = decay_accumulator * get_prediction_decay_rate(&this_frame); decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } boost_score += (decay_accumulator * r); /* Break out conditions. */ if ((!flash_detected) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || (mv_in_out_accumulator < -2.0))) { break; } } *b_boost = (int)(boost_score * 100.0) >> 4; return (*f_boost + *b_boost); } #endif /* Analyse and define a gf/arf group . */ static void define_gf_group(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { FIRSTPASS_STATS next_frame; FIRSTPASS_STATS *start_pos; int i; double r; double boost_score = 0.0; double old_boost_score = 0.0; double gf_group_err = 0.0; double gf_first_frame_err = 0.0; double mod_frame_err = 0.0; double mv_ratio_accumulator = 0.0; double decay_accumulator = 1.0; double loop_decay_rate = 1.00; /* Starting decay rate */ double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double mod_err_per_mb_accumulator = 0.0; int max_bits = frame_max_bits(cpi); /* Max for a single frame */ unsigned int allow_alt_ref = cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames; int alt_boost = 0; int f_boost = 0; int b_boost = 0; int flash_detected; cpi->twopass.gf_group_bits = 0; cpi->twopass.gf_decay_rate = 0; vpx_clear_system_state(); start_pos = cpi->twopass.stats_in; memset(&next_frame, 0, sizeof(next_frame)); /* assure clean */ /* Load stats for the current frame. */ mod_frame_err = calculate_modified_err(cpi, this_frame); /* Note the error of the frame at the start of the group (this will be * the GF frame error if we code a normal gf */ gf_first_frame_err = mod_frame_err; /* Special treatment if the current frame is a key frame (which is also * a gf). If it is then its error score (and hence bit allocation) need * to be subtracted out from the calculation for the GF group */ if (cpi->common.frame_type == KEY_FRAME) gf_group_err -= gf_first_frame_err; /* Scan forward to try and work out how many frames the next gf group * should contain and what level of boost is appropriate for the GF * or ARF that will be coded with the group */ i = 0; while (((i < cpi->twopass.static_scene_max_gf_interval) || ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL)) && (i < cpi->twopass.frames_to_key)) { i++; /* Accumulate error score of frames in this gf group */ mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; mod_err_per_mb_accumulator += mod_frame_err / DOUBLE_DIVIDE_CHECK((double)cpi->common.MBs); if (EOF == input_stats(cpi, &next_frame)) break; /* Test for the case where there is a brief flash but the prediction * quality back to an earlier frame is then restored. */ flash_detected = detect_flash(cpi, 0); /* Update the motion related elements to the boost calculation */ accumulate_frame_motion_stats( &next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); /* Calculate a baseline boost number for this frame */ r = calc_frame_boost(cpi, &next_frame, this_frame_mv_in_out); /* Cumulative effect of prediction quality decay */ if (!flash_detected) { loop_decay_rate = get_prediction_decay_rate(&next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; } boost_score += (decay_accumulator * r); /* Break clause to detect very still sections after motion * For example a staic image after a fade or other transition. */ if (detect_transition_to_still(cpi, i, 5, loop_decay_rate, decay_accumulator)) { allow_alt_ref = 0; boost_score = old_boost_score; break; } /* Break out conditions. */ if ( /* Break at cpi->max_gf_interval unless almost totally static */ (i >= cpi->max_gf_interval && (decay_accumulator < 0.995)) || ( /* Dont break out with a very short interval */ (i > MIN_GF_INTERVAL) && /* Dont break out very close to a key frame */ ((cpi->twopass.frames_to_key - i) >= MIN_GF_INTERVAL) && ((boost_score > 20.0) || (next_frame.pcnt_inter < 0.75)) && (!flash_detected) && ((mv_ratio_accumulator > 100.0) || (abs_mv_in_out_accumulator > 3.0) || (mv_in_out_accumulator < -2.0) || ((boost_score - old_boost_score) < 2.0)))) { boost_score = old_boost_score; break; } memcpy(this_frame, &next_frame, sizeof(*this_frame)); old_boost_score = boost_score; } cpi->twopass.gf_decay_rate = (i > 0) ? (int)(100.0 * (1.0 - decay_accumulator)) / i : 0; /* When using CBR apply additional buffer related upper limits */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double max_boost; /* For cbr apply buffer related limits */ if (cpi->drop_frames_allowed) { int64_t df_buffer_level = cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100); if (cpi->buffer_level > df_buffer_level) { max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); } else { max_boost = 0.0; } } else if (cpi->buffer_level > 0) { max_boost = ((double)(cpi->buffer_level * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); } else { max_boost = 0.0; } if (boost_score > max_boost) boost_score = max_boost; } /* Dont allow conventional gf too near the next kf */ if ((cpi->twopass.frames_to_key - i) < MIN_GF_INTERVAL) { while (i < cpi->twopass.frames_to_key) { i++; if (EOF == input_stats(cpi, this_frame)) break; if (i < cpi->twopass.frames_to_key) { mod_frame_err = calculate_modified_err(cpi, this_frame); gf_group_err += mod_frame_err; } } } cpi->gfu_boost = (int)(boost_score * 100.0) >> 4; #if NEW_BOOST /* Alterrnative boost calculation for alt ref */ alt_boost = calc_arf_boost(cpi, 0, (i - 1), (i - 1), &f_boost, &b_boost); #endif /* Should we use the alternate refernce frame */ if (allow_alt_ref && (i >= MIN_GF_INTERVAL) && /* dont use ARF very near next kf */ (i <= (cpi->twopass.frames_to_key - MIN_GF_INTERVAL)) && #if NEW_BOOST ((next_frame.pcnt_inter > 0.75) || (next_frame.pcnt_second_ref > 0.5)) && ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) && (b_boost > 100) && (f_boost > 100)) #else (next_frame.pcnt_inter > 0.75) && ((mv_in_out_accumulator / (double)i > -0.2) || (mv_in_out_accumulator > -2.0)) && (cpi->gfu_boost > 100) && (cpi->twopass.gf_decay_rate <= (ARF_DECAY_THRESH + (cpi->gfu_boost / 200)))) #endif { int Boost; int allocation_chunks; int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int tmp_q; int arf_frame_bits = 0; int group_bits; #if NEW_BOOST cpi->gfu_boost = alt_boost; #endif /* Estimate the bits to be allocated to the group as a whole */ if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { group_bits = (int)((double)cpi->twopass.kf_group_bits * (gf_group_err / (double)cpi->twopass.kf_group_error_left)); } else { group_bits = 0; } /* Boost for arf frame */ #if NEW_BOOST Boost = (alt_boost * GFQ_ADJUSTMENT) / 100; #else Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); #endif Boost += (i * 50); /* Set max and minimum boost and hence minimum allocation */ if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) { Boost = ((cpi->baseline_gf_interval + 1) * 200); } else if (Boost < 125) { Boost = 125; } allocation_chunks = (i * 100) + Boost; /* Normalize Altboost and allocations chunck down to prevent overflow */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } /* Calculate the number of bits to be spent on the arf based on the * boost number */ arf_frame_bits = (int)((double)Boost * (group_bits / (double)allocation_chunks)); /* Estimate if there are enough bits available to make worthwhile use * of an arf. */ tmp_q = estimate_q(cpi, mod_frame_err, (int)arf_frame_bits); /* Only use an arf if it is likely we will be able to code * it at a lower Q than the surrounding frames. */ if (tmp_q < cpi->worst_quality) { int half_gf_int; int frames_after_arf; int frames_bwd = cpi->oxcf.arnr_max_frames - 1; int frames_fwd = cpi->oxcf.arnr_max_frames - 1; cpi->source_alt_ref_pending = 1; /* * For alt ref frames the error score for the end frame of the * group (the alt ref frame) should not contribute to the group * total and hence the number of bit allocated to the group. * Rather it forms part of the next group (it is the GF at the * start of the next group) * gf_group_err -= mod_frame_err; * * For alt ref frames alt ref frame is technically part of the * GF frame for the next group but we always base the error * calculation and bit allocation on the current group of frames. * * Set the interval till the next gf or arf. * For ARFs this is the number of frames to be coded before the * future frame that is coded as an ARF. * The future frame itself is part of the next group */ cpi->baseline_gf_interval = i; /* * Define the arnr filter width for this group of frames: * We only filter frames that lie within a distance of half * the GF interval from the ARF frame. We also have to trap * cases where the filter extends beyond the end of clip. * Note: this_frame->frame has been updated in the loop * so it now points at the ARF frame. */ half_gf_int = cpi->baseline_gf_interval >> 1; frames_after_arf = (int)(cpi->twopass.total_stats.count - this_frame->frame - 1); switch (cpi->oxcf.arnr_type) { case 1: /* Backward filter */ frames_fwd = 0; if (frames_bwd > half_gf_int) frames_bwd = half_gf_int; break; case 2: /* Forward filter */ if (frames_fwd > half_gf_int) frames_fwd = half_gf_int; if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; frames_bwd = 0; break; case 3: /* Centered filter */ default: frames_fwd >>= 1; if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; if (frames_fwd > half_gf_int) frames_fwd = half_gf_int; frames_bwd = frames_fwd; /* For even length filter there is one more frame backward * than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. */ if (frames_bwd < half_gf_int) { frames_bwd += (cpi->oxcf.arnr_max_frames + 1) & 0x1; } break; } cpi->active_arnr_frames = frames_bwd + 1 + frames_fwd; } else { cpi->source_alt_ref_pending = 0; cpi->baseline_gf_interval = i; } } else { cpi->source_alt_ref_pending = 0; cpi->baseline_gf_interval = i; } /* * Now decide how many bits should be allocated to the GF group as a * proportion of those remaining in the kf group. * The final key frame group in the clip is treated as a special case * where cpi->twopass.kf_group_bits is tied to cpi->twopass.bits_left. * This is also important for short clips where there may only be one * key frame. */ if (cpi->twopass.frames_to_key >= (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame)) { cpi->twopass.kf_group_bits = (cpi->twopass.bits_left > 0) ? cpi->twopass.bits_left : 0; } /* Calculate the bits to be allocated to the group as a whole */ if ((cpi->twopass.kf_group_bits > 0) && (cpi->twopass.kf_group_error_left > 0)) { cpi->twopass.gf_group_bits = (int64_t)(cpi->twopass.kf_group_bits * (gf_group_err / cpi->twopass.kf_group_error_left)); } else { cpi->twopass.gf_group_bits = 0; } cpi->twopass.gf_group_bits = (cpi->twopass.gf_group_bits < 0) ? 0 : (cpi->twopass.gf_group_bits > cpi->twopass.kf_group_bits) ? cpi->twopass.kf_group_bits : cpi->twopass.gf_group_bits; /* Clip cpi->twopass.gf_group_bits based on user supplied data rate * variability limit (cpi->oxcf.two_pass_vbrmax_section) */ if (cpi->twopass.gf_group_bits > (int64_t)max_bits * cpi->baseline_gf_interval) { cpi->twopass.gf_group_bits = (int64_t)max_bits * cpi->baseline_gf_interval; } /* Reset the file position */ reset_fpf_position(cpi, start_pos); /* Update the record of error used so far (only done once per gf group) */ cpi->twopass.modified_error_used += gf_group_err; /* Assign bits to the arf or gf. */ for (i = 0; i <= (cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME); i++) { int Boost; int allocation_chunks; int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int gf_bits; /* For ARF frames */ if (cpi->source_alt_ref_pending && i == 0) { #if NEW_BOOST Boost = (alt_boost * GFQ_ADJUSTMENT) / 100; #else Boost = (cpi->gfu_boost * 3 * GFQ_ADJUSTMENT) / (2 * 100); #endif Boost += (cpi->baseline_gf_interval * 50); /* Set max and minimum boost and hence minimum allocation */ if (Boost > ((cpi->baseline_gf_interval + 1) * 200)) { Boost = ((cpi->baseline_gf_interval + 1) * 200); } else if (Boost < 125) { Boost = 125; } allocation_chunks = ((cpi->baseline_gf_interval + 1) * 100) + Boost; } /* Else for standard golden frames */ else { /* boost based on inter / intra ratio of subsequent frames */ Boost = (cpi->gfu_boost * GFQ_ADJUSTMENT) / 100; /* Set max and minimum boost and hence minimum allocation */ if (Boost > (cpi->baseline_gf_interval * 150)) { Boost = (cpi->baseline_gf_interval * 150); } else if (Boost < 125) { Boost = 125; } allocation_chunks = (cpi->baseline_gf_interval * 100) + (Boost - 100); } /* Normalize Altboost and allocations chunck down to prevent overflow */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } /* Calculate the number of bits to be spent on the gf or arf based on * the boost number */ gf_bits = (int)((double)Boost * (cpi->twopass.gf_group_bits / (double)allocation_chunks)); /* If the frame that is to be boosted is simpler than the average for * the gf/arf group then use an alternative calculation * based on the error score of the frame itself */ if (mod_frame_err < gf_group_err / (double)cpi->baseline_gf_interval) { double alt_gf_grp_bits; int alt_gf_bits; alt_gf_grp_bits = (double)cpi->twopass.kf_group_bits * (mod_frame_err * (double)cpi->baseline_gf_interval) / DOUBLE_DIVIDE_CHECK((double)cpi->twopass.kf_group_error_left); alt_gf_bits = (int)((double)Boost * (alt_gf_grp_bits / (double)allocation_chunks)); if (gf_bits > alt_gf_bits) { gf_bits = alt_gf_bits; } } /* Else if it is harder than other frames in the group make sure it at * least receives an allocation in keeping with its relative error * score, otherwise it may be worse off than an "un-boosted" frame */ else { // Avoid division by 0 by clamping cpi->twopass.kf_group_error_left to 1 int alt_gf_bits = (int)((double)cpi->twopass.kf_group_bits * mod_frame_err / (double)VPXMAX(cpi->twopass.kf_group_error_left, 1)); if (alt_gf_bits > gf_bits) { gf_bits = alt_gf_bits; } } /* Apply an additional limit for CBR */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->twopass.gf_bits > (int)(cpi->buffer_level >> 1)) { cpi->twopass.gf_bits = (int)(cpi->buffer_level >> 1); } } /* Dont allow a negative value for gf_bits */ if (gf_bits < 0) gf_bits = 0; /* Add in minimum for a frame */ gf_bits += cpi->min_frame_bandwidth; if (i == 0) { cpi->twopass.gf_bits = gf_bits; } if (i == 1 || (!cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME))) { /* Per frame bit target for this frame */ cpi->per_frame_bandwidth = gf_bits; } } { /* Adjust KF group bits and error remainin */ cpi->twopass.kf_group_error_left -= (int64_t)gf_group_err; cpi->twopass.kf_group_bits -= cpi->twopass.gf_group_bits; if (cpi->twopass.kf_group_bits < 0) cpi->twopass.kf_group_bits = 0; /* Note the error score left in the remaining frames of the group. * For normal GFs we want to remove the error score for the first * frame of the group (except in Key frame case where this has * already happened) */ if (!cpi->source_alt_ref_pending && cpi->common.frame_type != KEY_FRAME) { cpi->twopass.gf_group_error_left = (int)(gf_group_err - gf_first_frame_err); } else { cpi->twopass.gf_group_error_left = (int)gf_group_err; } cpi->twopass.gf_group_bits -= cpi->twopass.gf_bits - cpi->min_frame_bandwidth; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; /* This condition could fail if there are two kfs very close together * despite (MIN_GF_INTERVAL) and would cause a devide by 0 in the * calculation of cpi->twopass.alt_extra_bits. */ if (cpi->baseline_gf_interval >= 3) { #if NEW_BOOST int boost = (cpi->source_alt_ref_pending) ? b_boost : cpi->gfu_boost; #else int boost = cpi->gfu_boost; #endif if (boost >= 150) { int pct_extra; pct_extra = (boost - 100) / 50; pct_extra = (pct_extra > 20) ? 20 : pct_extra; cpi->twopass.alt_extra_bits = (int)(cpi->twopass.gf_group_bits * pct_extra) / 100; cpi->twopass.gf_group_bits -= cpi->twopass.alt_extra_bits; cpi->twopass.alt_extra_bits /= ((cpi->baseline_gf_interval - 1) >> 1); } else { cpi->twopass.alt_extra_bits = 0; } } else { cpi->twopass.alt_extra_bits = 0; } } /* Adjustments based on a measure of complexity of the section */ if (cpi->common.frame_type != KEY_FRAME) { FIRSTPASS_STATS sectionstats; double Ratio; zero_stats(§ionstats); reset_fpf_position(cpi, start_pos); for (i = 0; i < cpi->baseline_gf_interval; ++i) { input_stats(cpi, &next_frame); accumulate_stats(§ionstats, &next_frame); } avg_stats(§ionstats); cpi->twopass.section_intra_rating = (unsigned int)(sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); if (cpi->twopass.section_max_qfactor < 0.80) { cpi->twopass.section_max_qfactor = 0.80; } reset_fpf_position(cpi, start_pos); } } /* Allocate bits to a normal frame that is neither a gf an arf or a key frame. */ static void assign_std_frame_bits(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { int target_frame_size; double modified_err; double err_fraction; int max_bits = frame_max_bits(cpi); /* Max for a single frame */ /* Calculate modified prediction error used in bit allocation */ modified_err = calculate_modified_err(cpi, this_frame); /* What portion of the remaining GF group error is used by this frame */ if (cpi->twopass.gf_group_error_left > 0) { err_fraction = modified_err / cpi->twopass.gf_group_error_left; } else { err_fraction = 0.0; } /* How many of those bits available for allocation should we give it? */ target_frame_size = (int)((double)cpi->twopass.gf_group_bits * err_fraction); /* Clip to target size to 0 - max_bits (or cpi->twopass.gf_group_bits) * at the top end. */ if (target_frame_size < 0) { target_frame_size = 0; } else { if (target_frame_size > max_bits) target_frame_size = max_bits; if (target_frame_size > cpi->twopass.gf_group_bits) { target_frame_size = (int)cpi->twopass.gf_group_bits; } } /* Adjust error and bits remaining */ cpi->twopass.gf_group_error_left -= (int)modified_err; cpi->twopass.gf_group_bits -= target_frame_size; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; /* Add in the minimum number of bits that is set aside for every frame. */ target_frame_size += cpi->min_frame_bandwidth; /* Every other frame gets a few extra bits */ if ((cpi->frames_since_golden & 0x01) && (cpi->frames_till_gf_update_due > 0)) { target_frame_size += cpi->twopass.alt_extra_bits; } /* Per frame bit target for this frame */ cpi->per_frame_bandwidth = target_frame_size; } void vp8_second_pass(VP8_COMP *cpi) { int tmp_q; int frames_left = (int)(cpi->twopass.total_stats.count - cpi->common.current_video_frame); FIRSTPASS_STATS this_frame; FIRSTPASS_STATS this_frame_copy; double this_frame_intra_error; double this_frame_coded_error; int overhead_bits; vp8_zero(this_frame); if (!cpi->twopass.stats_in) { return; } vpx_clear_system_state(); if (EOF == input_stats(cpi, &this_frame)) return; this_frame_intra_error = this_frame.intra_error; this_frame_coded_error = this_frame.coded_error; /* keyframe and section processing ! */ if (cpi->twopass.frames_to_key == 0) { /* Define next KF group and assign bits to it */ memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); find_next_key_frame(cpi, &this_frame_copy); /* Special case: Error error_resilient_mode mode does not make much * sense for two pass but with its current meaning this code is * designed to stop outlandish behaviour if someone does set it when * using two pass. It effectively disables GF groups. This is * temporary code until we decide what should really happen in this * case. */ if (cpi->oxcf.error_resilient_mode) { cpi->twopass.gf_group_bits = cpi->twopass.kf_group_bits; cpi->twopass.gf_group_error_left = (int)cpi->twopass.kf_group_error_left; cpi->baseline_gf_interval = cpi->twopass.frames_to_key; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; cpi->source_alt_ref_pending = 0; } } /* Is this a GF / ARF (Note that a KF is always also a GF) */ if (cpi->frames_till_gf_update_due == 0) { /* Define next gf group and assign bits to it */ memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); define_gf_group(cpi, &this_frame_copy); /* If we are going to code an altref frame at the end of the group * and the current frame is not a key frame.... If the previous * group used an arf this frame has already benefited from that arf * boost and it should not be given extra bits If the previous * group was NOT coded using arf we may want to apply some boost to * this GF as well */ if (cpi->source_alt_ref_pending && (cpi->common.frame_type != KEY_FRAME)) { /* Assign a standard frames worth of bits from those allocated * to the GF group */ int bak = cpi->per_frame_bandwidth; memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); cpi->per_frame_bandwidth = bak; } } /* Otherwise this is an ordinary frame */ else { /* Special case: Error error_resilient_mode mode does not make much * sense for two pass but with its current meaning but this code is * designed to stop outlandish behaviour if someone does set it * when using two pass. It effectively disables GF groups. This is * temporary code till we decide what should really happen in this * case. */ if (cpi->oxcf.error_resilient_mode) { cpi->frames_till_gf_update_due = cpi->twopass.frames_to_key; if (cpi->common.frame_type != KEY_FRAME) { /* Assign bits from those allocated to the GF group */ memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } else { /* Assign bits from those allocated to the GF group */ memcpy(&this_frame_copy, &this_frame, sizeof(this_frame)); assign_std_frame_bits(cpi, &this_frame_copy); } } /* Keep a globally available copy of this and the next frame's iiratio. */ cpi->twopass.this_iiratio = (unsigned int)(this_frame_intra_error / DOUBLE_DIVIDE_CHECK(this_frame_coded_error)); { FIRSTPASS_STATS next_frame; if (lookup_next_frame_stats(cpi, &next_frame) != EOF) { cpi->twopass.next_iiratio = (unsigned int)(next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); } } /* Set nominal per second bandwidth for this frame */ cpi->target_bandwidth = (int)(cpi->per_frame_bandwidth * cpi->output_framerate); if (cpi->target_bandwidth < 0) cpi->target_bandwidth = 0; /* Account for mv, mode and other overheads. */ overhead_bits = (int)estimate_modemvcost(cpi, &cpi->twopass.total_left_stats); /* Special case code for first frame. */ if (cpi->common.current_video_frame == 0) { cpi->twopass.est_max_qcorrection_factor = 1.0; /* Set a cq_level in constrained quality mode. */ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { int est_cq; est_cq = estimate_cq(cpi, &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left), overhead_bits); cpi->cq_target_quality = cpi->oxcf.cq_level; if (est_cq > cpi->cq_target_quality) cpi->cq_target_quality = est_cq; } /* guess at maxq needed in 2nd pass */ cpi->twopass.maxq_max_limit = cpi->worst_quality; cpi->twopass.maxq_min_limit = cpi->best_quality; tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left), overhead_bits); /* Limit the maxq value returned subsequently. * This increases the risk of overspend or underspend if the initial * estimate for the clip is bad, but helps prevent excessive * variation in Q, especially near the end of a clip * where for example a small overspend may cause Q to crash */ cpi->twopass.maxq_max_limit = ((tmp_q + 32) < cpi->worst_quality) ? (tmp_q + 32) : cpi->worst_quality; cpi->twopass.maxq_min_limit = ((tmp_q - 32) > cpi->best_quality) ? (tmp_q - 32) : cpi->best_quality; cpi->active_worst_quality = tmp_q; cpi->ni_av_qi = tmp_q; } /* The last few frames of a clip almost always have to few or too many * bits and for the sake of over exact rate control we dont want to make * radical adjustments to the allowed quantizer range just to use up a * few surplus bits or get beneath the target rate. */ else if ((cpi->common.current_video_frame < (((unsigned int)cpi->twopass.total_stats.count * 255) >> 8)) && ((cpi->common.current_video_frame + cpi->baseline_gf_interval) < (unsigned int)cpi->twopass.total_stats.count)) { if (frames_left < 1) frames_left = 1; tmp_q = estimate_max_q(cpi, &cpi->twopass.total_left_stats, (int)(cpi->twopass.bits_left / frames_left), overhead_bits); /* Move active_worst_quality but in a damped way */ if (tmp_q > cpi->active_worst_quality) { cpi->active_worst_quality++; } else if (tmp_q < cpi->active_worst_quality) { cpi->active_worst_quality--; } cpi->active_worst_quality = ((cpi->active_worst_quality * 3) + tmp_q + 2) / 4; } cpi->twopass.frames_to_key--; /* Update the total stats remaining sturcture */ subtract_stats(&cpi->twopass.total_left_stats, &this_frame); } static int test_candidate_kf(VP8_COMP *cpi, FIRSTPASS_STATS *last_frame, FIRSTPASS_STATS *this_frame, FIRSTPASS_STATS *next_frame) { int is_viable_kf = 0; /* Does the frame satisfy the primary criteria of a key frame * If so, then examine how well it predicts subsequent frames */ if ((this_frame->pcnt_second_ref < 0.10) && (next_frame->pcnt_second_ref < 0.10) && ((this_frame->pcnt_inter < 0.05) || (((this_frame->pcnt_inter - this_frame->pcnt_neutral) < .25) && ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < 2.5) && ((fabs(last_frame->coded_error - this_frame->coded_error) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error) > .40) || (fabs(last_frame->intra_error - this_frame->intra_error) / DOUBLE_DIVIDE_CHECK(this_frame->intra_error) > .40) || ((next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error)) > 3.5))))) { int i; FIRSTPASS_STATS *start_pos; FIRSTPASS_STATS local_next_frame; double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; double next_iiratio; memcpy(&local_next_frame, next_frame, sizeof(*next_frame)); /* Note the starting file position so we can reset to it */ start_pos = cpi->twopass.stats_in; /* Examine how well the key frame predicts subsequent frames */ for (i = 0; i < 16; ++i) { next_iiratio = (IIKFACTOR1 * local_next_frame.intra_error / DOUBLE_DIVIDE_CHECK(local_next_frame.coded_error)); if (next_iiratio > RMAX) next_iiratio = RMAX; /* Cumulative effect of decay in prediction quality */ if (local_next_frame.pcnt_inter > 0.85) { decay_accumulator = decay_accumulator * local_next_frame.pcnt_inter; } else { decay_accumulator = decay_accumulator * ((0.85 + local_next_frame.pcnt_inter) / 2.0); } /* Keep a running total */ boost_score += (decay_accumulator * next_iiratio); /* Test various breakout clauses */ if ((local_next_frame.pcnt_inter < 0.05) || (next_iiratio < 1.5) || (((local_next_frame.pcnt_inter - local_next_frame.pcnt_neutral) < 0.20) && (next_iiratio < 3.0)) || ((boost_score - old_boost_score) < 0.5) || (local_next_frame.intra_error < 200)) { break; } old_boost_score = boost_score; /* Get the next frame details */ if (EOF == input_stats(cpi, &local_next_frame)) break; } /* If there is tolerable prediction for at least the next 3 frames * then break out else discard this pottential key frame and move on */ if (boost_score > 5.0 && (i > 3)) { is_viable_kf = 1; } else { /* Reset the file position */ reset_fpf_position(cpi, start_pos); is_viable_kf = 0; } } return is_viable_kf; } static void find_next_key_frame(VP8_COMP *cpi, FIRSTPASS_STATS *this_frame) { int i, j; FIRSTPASS_STATS last_frame; FIRSTPASS_STATS first_frame; FIRSTPASS_STATS next_frame; FIRSTPASS_STATS *start_position; double decay_accumulator = 1.0; double boost_score = 0; double old_boost_score = 0.0; double loop_decay_rate; double kf_mod_err = 0.0; double kf_group_err = 0.0; double kf_group_intra_err = 0.0; double kf_group_coded_err = 0.0; double recent_loop_decay[8] = { 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0 }; memset(&next_frame, 0, sizeof(next_frame)); vpx_clear_system_state(); start_position = cpi->twopass.stats_in; cpi->common.frame_type = KEY_FRAME; /* is this a forced key frame by interval */ cpi->this_key_frame_forced = cpi->next_key_frame_forced; /* Clear the alt ref active flag as this can never be active on a key * frame */ cpi->source_alt_ref_active = 0; /* Kf is always a gf so clear frames till next gf counter */ cpi->frames_till_gf_update_due = 0; cpi->twopass.frames_to_key = 1; /* Take a copy of the initial frame details */ memcpy(&first_frame, this_frame, sizeof(*this_frame)); cpi->twopass.kf_group_bits = 0; cpi->twopass.kf_group_error_left = 0; kf_mod_err = calculate_modified_err(cpi, this_frame); /* find the next keyframe */ i = 0; while (cpi->twopass.stats_in < cpi->twopass.stats_in_end) { /* Accumulate kf group error */ kf_group_err += calculate_modified_err(cpi, this_frame); /* These figures keep intra and coded error counts for all frames * including key frames in the group. The effect of the key frame * itself can be subtracted out using the first_frame data * collected above */ kf_group_intra_err += this_frame->intra_error; kf_group_coded_err += this_frame->coded_error; /* Load the next frame's stats. */ memcpy(&last_frame, this_frame, sizeof(*this_frame)); input_stats(cpi, this_frame); /* Provided that we are not at the end of the file... */ if (cpi->oxcf.auto_key && lookup_next_frame_stats(cpi, &next_frame) != EOF) { /* Normal scene cut check */ if ((i >= MIN_GF_INTERVAL) && test_candidate_kf(cpi, &last_frame, this_frame, &next_frame)) { break; } /* How fast is prediction quality decaying */ loop_decay_rate = get_prediction_decay_rate(&next_frame); /* We want to know something about the recent past... rather than * as used elsewhere where we are concened with decay in prediction * quality since the last GF or KF. */ recent_loop_decay[i % 8] = loop_decay_rate; decay_accumulator = 1.0; for (j = 0; j < 8; ++j) { decay_accumulator = decay_accumulator * recent_loop_decay[j]; } /* Special check for transition or high motion followed by a * static scene. */ if (detect_transition_to_still(cpi, i, ((int)(cpi->key_frame_frequency) - (int)i), loop_decay_rate, decay_accumulator)) { break; } /* Step on to the next frame */ cpi->twopass.frames_to_key++; /* If we don't have a real key frame within the next two * forcekeyframeevery intervals then break out of the loop. */ if (cpi->twopass.frames_to_key >= 2 * (int)cpi->key_frame_frequency) { break; } } else { cpi->twopass.frames_to_key++; } i++; } /* If there is a max kf interval set by the user we must obey it. * We already breakout of the loop above at 2x max. * This code centers the extra kf if the actual natural * interval is between 1x and 2x */ if (cpi->oxcf.auto_key && cpi->twopass.frames_to_key > (int)cpi->key_frame_frequency) { FIRSTPASS_STATS *current_pos = cpi->twopass.stats_in; FIRSTPASS_STATS tmp_frame; cpi->twopass.frames_to_key /= 2; /* Copy first frame details */ memcpy(&tmp_frame, &first_frame, sizeof(first_frame)); /* Reset to the start of the group */ reset_fpf_position(cpi, start_position); kf_group_err = 0; kf_group_intra_err = 0; kf_group_coded_err = 0; /* Rescan to get the correct error data for the forced kf group */ for (i = 0; i < cpi->twopass.frames_to_key; ++i) { /* Accumulate kf group errors */ kf_group_err += calculate_modified_err(cpi, &tmp_frame); kf_group_intra_err += tmp_frame.intra_error; kf_group_coded_err += tmp_frame.coded_error; /* Load a the next frame's stats */ input_stats(cpi, &tmp_frame); } /* Reset to the start of the group */ reset_fpf_position(cpi, current_pos); cpi->next_key_frame_forced = 1; } else { cpi->next_key_frame_forced = 0; } /* Special case for the last frame of the file */ if (cpi->twopass.stats_in >= cpi->twopass.stats_in_end) { /* Accumulate kf group error */ kf_group_err += calculate_modified_err(cpi, this_frame); /* These figures keep intra and coded error counts for all frames * including key frames in the group. The effect of the key frame * itself can be subtracted out using the first_frame data * collected above */ kf_group_intra_err += this_frame->intra_error; kf_group_coded_err += this_frame->coded_error; } /* Calculate the number of bits that should be assigned to the kf group. */ if ((cpi->twopass.bits_left > 0) && (cpi->twopass.modified_error_left > 0.0)) { /* Max for a single normal frame (not key frame) */ int max_bits = frame_max_bits(cpi); /* Maximum bits for the kf group */ int64_t max_grp_bits; /* Default allocation based on bits left and relative * complexity of the section */ cpi->twopass.kf_group_bits = (int64_t)(cpi->twopass.bits_left * (kf_group_err / cpi->twopass.modified_error_left)); /* Clip based on maximum per frame rate defined by the user. */ max_grp_bits = (int64_t)max_bits * (int64_t)cpi->twopass.frames_to_key; if (cpi->twopass.kf_group_bits > max_grp_bits) { cpi->twopass.kf_group_bits = max_grp_bits; } /* Additional special case for CBR if buffer is getting full. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { int64_t opt_buffer_lvl = cpi->oxcf.optimal_buffer_level; int64_t buffer_lvl = cpi->buffer_level; /* If the buffer is near or above the optimal and this kf group is * not being allocated much then increase the allocation a bit. */ if (buffer_lvl >= opt_buffer_lvl) { int64_t high_water_mark = (opt_buffer_lvl + cpi->oxcf.maximum_buffer_size) >> 1; int64_t av_group_bits; /* Av bits per frame * number of frames */ av_group_bits = (int64_t)cpi->av_per_frame_bandwidth * (int64_t)cpi->twopass.frames_to_key; /* We are at or above the maximum. */ if (cpi->buffer_level >= high_water_mark) { int64_t min_group_bits; min_group_bits = av_group_bits + (int64_t)(buffer_lvl - high_water_mark); if (cpi->twopass.kf_group_bits < min_group_bits) { cpi->twopass.kf_group_bits = min_group_bits; } } /* We are above optimal but below the maximum */ else if (cpi->twopass.kf_group_bits < av_group_bits) { int64_t bits_below_av = av_group_bits - cpi->twopass.kf_group_bits; cpi->twopass.kf_group_bits += (int64_t)( (double)bits_below_av * (double)(buffer_lvl - opt_buffer_lvl) / (double)(high_water_mark - opt_buffer_lvl)); } } } } else { cpi->twopass.kf_group_bits = 0; } /* Reset the first pass file position */ reset_fpf_position(cpi, start_position); /* determine how big to make this keyframe based on how well the * subsequent frames use inter blocks */ decay_accumulator = 1.0; boost_score = 0.0; for (i = 0; i < cpi->twopass.frames_to_key; ++i) { double r; if (EOF == input_stats(cpi, &next_frame)) break; if (next_frame.intra_error > cpi->twopass.kf_intra_err_min) { r = (IIKFACTOR2 * next_frame.intra_error / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); } else { r = (IIKFACTOR2 * cpi->twopass.kf_intra_err_min / DOUBLE_DIVIDE_CHECK(next_frame.coded_error)); } if (r > RMAX) r = RMAX; /* How fast is prediction quality decaying */ loop_decay_rate = get_prediction_decay_rate(&next_frame); decay_accumulator = decay_accumulator * loop_decay_rate; decay_accumulator = decay_accumulator < 0.1 ? 0.1 : decay_accumulator; boost_score += (decay_accumulator * r); if ((i > MIN_GF_INTERVAL) && ((boost_score - old_boost_score) < 1.0)) { break; } old_boost_score = boost_score; } if (1) { FIRSTPASS_STATS sectionstats; double Ratio; zero_stats(§ionstats); reset_fpf_position(cpi, start_position); for (i = 0; i < cpi->twopass.frames_to_key; ++i) { input_stats(cpi, &next_frame); accumulate_stats(§ionstats, &next_frame); } avg_stats(§ionstats); cpi->twopass.section_intra_rating = (unsigned int)(sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error)); Ratio = sectionstats.intra_error / DOUBLE_DIVIDE_CHECK(sectionstats.coded_error); cpi->twopass.section_max_qfactor = 1.0 - ((Ratio - 10.0) * 0.025); if (cpi->twopass.section_max_qfactor < 0.80) { cpi->twopass.section_max_qfactor = 0.80; } } /* When using CBR apply additional buffer fullness related upper limits */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { double max_boost; if (cpi->drop_frames_allowed) { int df_buffer_level = (int)(cpi->oxcf.drop_frames_water_mark * (cpi->oxcf.optimal_buffer_level / 100)); if (cpi->buffer_level > df_buffer_level) { max_boost = ((double)((cpi->buffer_level - df_buffer_level) * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); } else { max_boost = 0.0; } } else if (cpi->buffer_level > 0) { max_boost = ((double)(cpi->buffer_level * 2 / 3) * 16.0) / DOUBLE_DIVIDE_CHECK((double)cpi->av_per_frame_bandwidth); } else { max_boost = 0.0; } if (boost_score > max_boost) boost_score = max_boost; } /* Reset the first pass file position */ reset_fpf_position(cpi, start_position); /* Work out how many bits to allocate for the key frame itself */ if (1) { int kf_boost = (int)boost_score; int allocation_chunks; int Counter = cpi->twopass.frames_to_key; int alt_kf_bits; YV12_BUFFER_CONFIG *lst_yv12 = &cpi->common.yv12_fb[cpi->common.lst_fb_idx]; /* Min boost based on kf interval */ #if 0 while ((kf_boost < 48) && (Counter > 0)) { Counter -= 2; kf_boost ++; } #endif if (kf_boost < 48) { kf_boost += ((Counter + 1) >> 1); if (kf_boost > 48) kf_boost = 48; } /* bigger frame sizes need larger kf boosts, smaller frames smaller * boosts... */ if ((lst_yv12->y_width * lst_yv12->y_height) > (320 * 240)) { kf_boost += 2 * (lst_yv12->y_width * lst_yv12->y_height) / (320 * 240); } else if ((lst_yv12->y_width * lst_yv12->y_height) < (320 * 240)) { kf_boost -= 4 * (320 * 240) / (lst_yv12->y_width * lst_yv12->y_height); } /* Min KF boost */ kf_boost = (int)((double)kf_boost * 100.0) >> 4; /* Scale 16 to 100 */ if (kf_boost < 250) kf_boost = 250; /* * We do three calculations for kf size. * The first is based on the error score for the whole kf group. * The second (optionaly) on the key frames own error if this is * smaller than the average for the group. * The final one insures that the frame receives at least the * allocation it would have received based on its own error score vs * the error score remaining * Special case if the sequence appears almost totaly static * as measured by the decay accumulator. In this case we want to * spend almost all of the bits on the key frame. * cpi->twopass.frames_to_key-1 because key frame itself is taken * care of by kf_boost. */ if (decay_accumulator >= 0.99) { allocation_chunks = ((cpi->twopass.frames_to_key - 1) * 10) + kf_boost; } else { allocation_chunks = ((cpi->twopass.frames_to_key - 1) * 100) + kf_boost; } /* Normalize Altboost and allocations chunck down to prevent overflow */ while (kf_boost > 1000) { kf_boost /= 2; allocation_chunks /= 2; } cpi->twopass.kf_group_bits = (cpi->twopass.kf_group_bits < 0) ? 0 : cpi->twopass.kf_group_bits; /* Calculate the number of bits to be spent on the key frame */ cpi->twopass.kf_bits = (int)((double)kf_boost * ((double)cpi->twopass.kf_group_bits / (double)allocation_chunks)); /* Apply an additional limit for CBR */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->twopass.kf_bits > (int)((3 * cpi->buffer_level) >> 2)) { cpi->twopass.kf_bits = (int)((3 * cpi->buffer_level) >> 2); } } /* If the key frame is actually easier than the average for the * kf group (which does sometimes happen... eg a blank intro frame) * Then use an alternate calculation based on the kf error score * which should give a smaller key frame. */ if (kf_mod_err < kf_group_err / cpi->twopass.frames_to_key) { double alt_kf_grp_bits = ((double)cpi->twopass.bits_left * (kf_mod_err * (double)cpi->twopass.frames_to_key) / DOUBLE_DIVIDE_CHECK(cpi->twopass.modified_error_left)); alt_kf_bits = (int)((double)kf_boost * (alt_kf_grp_bits / (double)allocation_chunks)); if (cpi->twopass.kf_bits > alt_kf_bits) { cpi->twopass.kf_bits = alt_kf_bits; } } /* Else if it is much harder than other frames in the group make sure * it at least receives an allocation in keeping with its relative * error score */ else { alt_kf_bits = (int)((double)cpi->twopass.bits_left * (kf_mod_err / DOUBLE_DIVIDE_CHECK( cpi->twopass.modified_error_left))); if (alt_kf_bits > cpi->twopass.kf_bits) { cpi->twopass.kf_bits = alt_kf_bits; } } cpi->twopass.kf_group_bits -= cpi->twopass.kf_bits; /* Add in the minimum frame allowance */ cpi->twopass.kf_bits += cpi->min_frame_bandwidth; /* Peer frame bit target for this frame */ cpi->per_frame_bandwidth = cpi->twopass.kf_bits; /* Convert to a per second bitrate */ cpi->target_bandwidth = (int)(cpi->twopass.kf_bits * cpi->output_framerate); } /* Note the total error score of the kf group minus the key frame itself */ cpi->twopass.kf_group_error_left = (int)(kf_group_err - kf_mod_err); /* Adjust the count of total modified error left. The count of bits left * is adjusted elsewhere based on real coded frame sizes */ cpi->twopass.modified_error_left -= kf_group_err; if (cpi->oxcf.allow_spatial_resampling) { int resample_trigger = 0; int last_kf_resampled = 0; int kf_q; int scale_val = 0; int hr, hs, vr, vs; int new_width = cpi->oxcf.Width; int new_height = cpi->oxcf.Height; int projected_buffer_level; int tmp_q; double projected_bits_perframe; double group_iiratio = (kf_group_intra_err - first_frame.intra_error) / (kf_group_coded_err - first_frame.coded_error); double err_per_frame = kf_group_err / cpi->twopass.frames_to_key; double bits_per_frame; double av_bits_per_frame; double effective_size_ratio; if ((cpi->common.Width != cpi->oxcf.Width) || (cpi->common.Height != cpi->oxcf.Height)) { last_kf_resampled = 1; } /* Set back to unscaled by defaults */ cpi->common.horiz_scale = NORMAL; cpi->common.vert_scale = NORMAL; /* Calculate Average bits per frame. */ av_bits_per_frame = cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate); /* CBR... Use the clip average as the target for deciding resample */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { bits_per_frame = av_bits_per_frame; } /* In VBR we want to avoid downsampling in easy section unless we * are under extreme pressure So use the larger of target bitrate * for this section or average bitrate for sequence */ else { /* This accounts for how hard the section is... */ bits_per_frame = (double)(cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key); /* Dont turn to resampling in easy sections just because they * have been assigned a small number of bits */ if (bits_per_frame < av_bits_per_frame) { bits_per_frame = av_bits_per_frame; } } /* bits_per_frame should comply with our minimum */ if (bits_per_frame < (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100)) { bits_per_frame = (cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); } /* Work out if spatial resampling is necessary */ kf_q = estimate_kf_group_q(cpi, err_per_frame, (int)bits_per_frame, group_iiratio); /* If we project a required Q higher than the maximum allowed Q then * make a guess at the actual size of frames in this section */ projected_bits_perframe = bits_per_frame; tmp_q = kf_q; while (tmp_q > cpi->worst_quality) { projected_bits_perframe *= 1.04; tmp_q--; } /* Guess at buffer level at the end of the section */ projected_buffer_level = (int)(cpi->buffer_level - (int)((projected_bits_perframe - av_bits_per_frame) * cpi->twopass.frames_to_key)); if (0) { FILE *f = fopen("Subsamle.stt", "a"); fprintf(f, " %8d %8d %8d %8d %12.0f %8d %8d %8d\n", cpi->common.current_video_frame, kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->twopass.frames_to_key, (int)(cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key), new_height, new_width); fclose(f); } /* The trigger for spatial resampling depends on the various * parameters such as whether we are streaming (CBR) or VBR. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { /* Trigger resample if we are projected to fall below down * sample level or resampled last time and are projected to * remain below the up sample level */ if ((projected_buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) || (last_kf_resampled && (projected_buffer_level < (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)))) { resample_trigger = 1; } else { resample_trigger = 0; } } else { int64_t clip_bits = (int64_t)( cpi->twopass.total_stats.count * cpi->oxcf.target_bandwidth / DOUBLE_DIVIDE_CHECK((double)cpi->framerate)); int64_t over_spend = cpi->oxcf.starting_buffer_level - cpi->buffer_level; /* If triggered last time the threshold for triggering again is * reduced: * * Projected Q higher than allowed and Overspend > 5% of total * bits */ if ((last_kf_resampled && (kf_q > cpi->worst_quality)) || ((kf_q > cpi->worst_quality) && (over_spend > clip_bits / 20))) { resample_trigger = 1; } else { resample_trigger = 0; } } if (resample_trigger) { while ((kf_q >= cpi->worst_quality) && (scale_val < 6)) { scale_val++; cpi->common.vert_scale = vscale_lookup[scale_val]; cpi->common.horiz_scale = hscale_lookup[scale_val]; Scale2Ratio(cpi->common.horiz_scale, &hr, &hs); Scale2Ratio(cpi->common.vert_scale, &vr, &vs); new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; /* Reducing the area to 1/4 does not reduce the complexity * (err_per_frame) to 1/4... effective_sizeratio attempts * to provide a crude correction for this */ effective_size_ratio = (double)(new_width * new_height) / (double)(cpi->oxcf.Width * cpi->oxcf.Height); effective_size_ratio = (1.0 + (3.0 * effective_size_ratio)) / 4.0; /* Now try again and see what Q we get with the smaller * image size */ kf_q = estimate_kf_group_q(cpi, err_per_frame * effective_size_ratio, (int)bits_per_frame, group_iiratio); if (0) { FILE *f = fopen("Subsamle.stt", "a"); fprintf( f, "******** %8d %8d %8d %12.0f %8d %8d %8d\n", kf_q, cpi->common.horiz_scale, cpi->common.vert_scale, kf_group_err / cpi->twopass.frames_to_key, (int)(cpi->twopass.kf_group_bits / cpi->twopass.frames_to_key), new_height, new_width); fclose(f); } } } if ((cpi->common.Width != new_width) || (cpi->common.Height != new_height)) { cpi->common.Width = new_width; cpi->common.Height = new_height; vp8_alloc_compressor_data(cpi); } } } libvpx-1.8.2/vp8/encoder/firstpass.h000066400000000000000000000016711357355204000173620ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_FIRSTPASS_H_ #define VPX_VP8_ENCODER_FIRSTPASS_H_ #ifdef __cplusplus extern "C" { #endif extern void vp8_init_first_pass(VP8_COMP *cpi); extern void vp8_first_pass(VP8_COMP *cpi); extern void vp8_end_first_pass(VP8_COMP *cpi); extern void vp8_init_second_pass(VP8_COMP *cpi); extern void vp8_second_pass(VP8_COMP *cpi); extern void vp8_end_second_pass(VP8_COMP *cpi); extern size_t vp8_firstpass_stats_sz(unsigned int mb_count); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_FIRSTPASS_H_ libvpx-1.8.2/vp8/encoder/lookahead.c000066400000000000000000000123161357355204000172640ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_config.h" #include "lookahead.h" #include "vp8/common/extend.h" #define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY ? 1 : 25) struct lookahead_ctx { unsigned int max_sz; /* Absolute size of the queue */ unsigned int sz; /* Number of buffers currently in the queue */ unsigned int read_idx; /* Read index */ unsigned int write_idx; /* Write index */ struct lookahead_entry *buf; /* Buffer list */ }; /* Return the buffer at the given absolute index and increment the index */ static struct lookahead_entry *pop(struct lookahead_ctx *ctx, unsigned int *idx) { unsigned int index = *idx; struct lookahead_entry *buf = ctx->buf + index; assert(index < ctx->max_sz); if (++index >= ctx->max_sz) index -= ctx->max_sz; *idx = index; return buf; } void vp8_lookahead_destroy(struct lookahead_ctx *ctx) { if (ctx) { if (ctx->buf) { unsigned int i; for (i = 0; i < ctx->max_sz; ++i) { vp8_yv12_de_alloc_frame_buffer(&ctx->buf[i].img); } free(ctx->buf); } free(ctx); } } struct lookahead_ctx *vp8_lookahead_init(unsigned int width, unsigned int height, unsigned int depth) { struct lookahead_ctx *ctx = NULL; unsigned int i; /* Clamp the lookahead queue depth */ if (depth < 1) { depth = 1; } else if (depth > MAX_LAG_BUFFERS) { depth = MAX_LAG_BUFFERS; } /* Keep last frame in lookahead buffer by increasing depth by 1.*/ depth += 1; /* Align the buffer dimensions */ width = (width + 15) & ~15; height = (height + 15) & ~15; /* Allocate the lookahead structures */ ctx = calloc(1, sizeof(*ctx)); if (ctx) { ctx->max_sz = depth; ctx->buf = calloc(depth, sizeof(*ctx->buf)); if (!ctx->buf) goto bail; for (i = 0; i < depth; ++i) { if (vp8_yv12_alloc_frame_buffer(&ctx->buf[i].img, width, height, VP8BORDERINPIXELS)) { goto bail; } } } return ctx; bail: vp8_lookahead_destroy(ctx); return NULL; } int vp8_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, unsigned int flags, unsigned char *active_map) { struct lookahead_entry *buf; int row, col, active_end; int mb_rows = (src->y_height + 15) >> 4; int mb_cols = (src->y_width + 15) >> 4; if (ctx->sz + 2 > ctx->max_sz) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); /* Only do this partial copy if the following conditions are all met: * 1. Lookahead queue has has size of 1. * 2. Active map is provided. * 3. This is not a key frame, golden nor altref frame. */ if (ctx->max_sz == 1 && active_map && !flags) { for (row = 0; row < mb_rows; ++row) { col = 0; while (1) { /* Find the first active macroblock in this row. */ for (; col < mb_cols; ++col) { if (active_map[col]) break; } /* No more active macroblock in this row. */ if (col == mb_cols) break; /* Find the end of active region in this row. */ active_end = col; for (; active_end < mb_cols; ++active_end) { if (!active_map[active_end]) break; } /* Only copy this active region. */ vp8_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4, 16, (active_end - col) << 4); /* Start again from the end of this active region. */ col = active_end; } active_map += mb_cols; } } else { vp8_copy_and_extend_frame(src, &buf->img); } buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; return 0; } struct lookahead_entry *vp8_lookahead_pop(struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; assert(ctx != NULL); if (ctx->sz && (drain || ctx->sz == ctx->max_sz - 1)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } return buf; } struct lookahead_entry *vp8_lookahead_peek(struct lookahead_ctx *ctx, unsigned int index, int direction) { struct lookahead_entry *buf = NULL; if (direction == PEEK_FORWARD) { assert(index < ctx->max_sz - 1); if (index < ctx->sz) { index += ctx->read_idx; if (index >= ctx->max_sz) index -= ctx->max_sz; buf = ctx->buf + index; } } else if (direction == PEEK_BACKWARD) { assert(index == 1); if (ctx->read_idx == 0) { index = ctx->max_sz - 1; } else { index = ctx->read_idx - index; } buf = ctx->buf + index; } return buf; } unsigned int vp8_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; } libvpx-1.8.2/vp8/encoder/lookahead.h000066400000000000000000000062201357355204000172660ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_LOOKAHEAD_H_ #define VPX_VP8_ENCODER_LOOKAHEAD_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; int64_t ts_end; unsigned int flags; }; struct lookahead_ctx; /**\brief Initializes the lookahead stage * * The lookahead stage is a queue of frame buffers on which some analysis * may be done when buffers are enqueued. * * */ struct lookahead_ctx *vp8_lookahead_init(unsigned int width, unsigned int height, unsigned int depth); /**\brief Destroys the lookahead stage * */ void vp8_lookahead_destroy(struct lookahead_ctx *ctx); /**\brief Enqueue a source buffer * * This function will copy the source image into a new framebuffer with * the expected stride/border. * * If active_map is non-NULL and there is only one frame in the queue, then copy * only active macroblocks. * * \param[in] ctx Pointer to the lookahead context * \param[in] src Pointer to the image to enqueue * \param[in] ts_start Timestamp for the start of this frame * \param[in] ts_end Timestamp for the end of this frame * \param[in] flags Flags set on this frame * \param[in] active_map Map that specifies which macroblock is active */ int vp8_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, unsigned int flags, unsigned char *active_map); /**\brief Get the next source buffer to encode * * * \param[in] ctx Pointer to the lookahead context * \param[in] drain Flag indicating the buffer should be drained * (return a buffer regardless of the current queue depth) * * \retval NULL, if drain set and queue is empty * \retval NULL, if drain not set and queue not of the configured depth * */ struct lookahead_entry *vp8_lookahead_pop(struct lookahead_ctx *ctx, int drain); #define PEEK_FORWARD 1 #define PEEK_BACKWARD (-1) /**\brief Get a future source buffer to encode * * \param[in] ctx Pointer to the lookahead context * \param[in] index Index of the frame to be returned, 0 == next frame * * \retval NULL, if no buffer exists at the specified index * */ struct lookahead_entry *vp8_lookahead_peek(struct lookahead_ctx *ctx, unsigned int index, int direction); /**\brief Get the number of frames currently in the lookahead queue * * \param[in] ctx Pointer to the lookahead context */ unsigned int vp8_lookahead_depth(struct lookahead_ctx *ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_LOOKAHEAD_H_ libvpx-1.8.2/vp8/encoder/mcomp.c000066400000000000000000001612451357355204000164560ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "onyx_int.h" #include "mcomp.h" #include "vpx_mem/vpx_mem.h" #include "vpx_config.h" #include #include #include #include "vp8/common/findnearmv.h" #include "vp8/common/common.h" #include "vpx_dsp/vpx_dsp_common.h" int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight) { /* MV costing is based on the distribution of vectors in the previous * frame and as such will tend to over state the cost of vectors. In * addition coding a new vector can have a knock on effect on the cost * of subsequent vectors and the quality of prediction from NEAR and * NEAREST for subsequent blocks. The "Weight" parameter allows, to a * limited extent, for some account to be taken of these factors. */ const int mv_idx_row = clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); const int mv_idx_col = clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * Weight) >> 7; } static int mv_err_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int error_per_bit) { /* Ignore mv costing if mvcost is NULL */ if (mvcost) { const int mv_idx_row = clamp((mv->as_mv.row - ref->as_mv.row) >> 1, 0, MVvals); const int mv_idx_col = clamp((mv->as_mv.col - ref->as_mv.col) >> 1, 0, MVvals); return ((mvcost[0][mv_idx_row] + mvcost[1][mv_idx_col]) * error_per_bit + 128) >> 8; } return 0; } static int mvsad_err_cost(int_mv *mv, int_mv *ref, int *mvsadcost[2], int error_per_bit) { /* Calculate sad error cost on full pixel basis. */ /* Ignore mv costing if mvsadcost is NULL */ if (mvsadcost) { return ((mvsadcost[0][(mv->as_mv.row - ref->as_mv.row)] + mvsadcost[1][(mv->as_mv.col - ref->as_mv.col)]) * error_per_bit + 128) >> 8; } return 0; } void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride) { int Len; int search_site_count = 0; /* Generate offsets for 4 search sites per step. */ Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; while (Len > 0) { /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; /* Contract. */ Len /= 2; } x->ss_count = search_site_count; x->searches_per_step = 4; } void vp8_init3smotion_compensation(MACROBLOCK *x, int stride) { int Len; int search_site_count = 0; /* Generate offsets for 8 search sites per step. */ Len = MAX_FIRST_STEP; x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = 0; search_site_count++; while (Len > 0) { /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = 0; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = -Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = 0; x->ss[search_site_count].offset = Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride - Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = -Len; x->ss[search_site_count].offset = -Len * stride + Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = -Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride - Len; search_site_count++; /* Compute offsets for search sites. */ x->ss[search_site_count].mv.col = Len; x->ss[search_site_count].mv.row = Len; x->ss[search_site_count].offset = Len * stride + Len; search_site_count++; /* Contract. */ Len /= 2; } x->ss_count = search_site_count; x->searches_per_step = 8; } /* * To avoid the penalty for crossing cache-line read, preload the reference * area in a small buffer, which is aligned to make sure there won't be crossing * cache-line read while reading from this buffer. This reduced the cpu * cycles spent on reading ref data in sub-pixel filter functions. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we * could reduce the area. */ /* estimated cost of a motion vector (r,c) */ #define MVC(r, c) \ (mvcost \ ? ((mvcost[0][(r)-rr] + mvcost[1][(c)-rc]) * error_per_bit + 128) >> 8 \ : 0) /* pointer to predictor base of a motionvector */ #define PRE(r, c) (y + (((r) >> 2) * y_stride + ((c) >> 2) - (offset))) /* convert motion vector component to offset for svf calc */ #define SP(x) (((x)&3) << 1) /* returns subpixel variance error function. */ #define DIST(r, c) \ vfp->svf(PRE(r, c), y_stride, SP(c), SP(r), z, b->src_stride, &sse) #define IFMVCV(r, c, s, e) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) s else e; /* returns distortion + motion vector cost */ #define ERR(r, c) (MVC(r, c) + DIST(r, c)) /* checks if (r,c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ IFMVCV(r, c, \ { \ thismse = DIST(r, c); \ if ((v = (MVC(r, c) + thismse)) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ }, \ v = UINT_MAX;) int vp8_find_best_sub_pixel_step_iteratively(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { unsigned char *z = (*(b->base_src) + b->src); int rr = ref_mv->as_mv.row >> 1, rc = ref_mv->as_mv.col >> 1; int br = bestmv->as_mv.row * 4, bc = bestmv->as_mv.col * 4; int tr = br, tc = bc; unsigned int besterr; unsigned int left, right, up, down, diag; unsigned int sse; unsigned int whichdir; unsigned int halfiters = 4; unsigned int quarteriters = 4; int thismse; int minc = VPXMAX(x->mv_col_min * 4, (ref_mv->as_mv.col >> 1) - ((1 << mvlong_width) - 1)); int maxc = VPXMIN(x->mv_col_max * 4, (ref_mv->as_mv.col >> 1) + ((1 << mvlong_width) - 1)); int minr = VPXMAX(x->mv_row_min * 4, (ref_mv->as_mv.row >> 1) - ((1 << mvlong_width) - 1)); int maxr = VPXMIN(x->mv_row_max * 4, (ref_mv->as_mv.row >> 1) + ((1 << mvlong_width) - 1)); int y_stride; int offset; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; int buf_r1, buf_r2, buf_c1; /* Clamping to avoid out-of-range data access */ buf_r1 = ((bestmv->as_mv.row - 3) < x->mv_row_min) ? (bestmv->as_mv.row - x->mv_row_min) : 3; buf_r2 = ((bestmv->as_mv.row + 3) > x->mv_row_max) ? (x->mv_row_max - bestmv->as_mv.row) : 3; buf_c1 = ((bestmv->as_mv.col - 3) < x->mv_col_min) ? (bestmv->as_mv.col - x->mv_col_min) : 3; y_stride = 32; /* Copy to intermediate buffer before searching. */ vfp->copymem(y_0 - buf_c1 - pre_stride * buf_r1, pre_stride, xd->y_buf, y_stride, 16 + buf_r1 + buf_r2); y = xd->y_buf + y_stride * buf_r1 + buf_c1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; y_stride = pre_stride; #endif offset = (bestmv->as_mv.row) * y_stride + bestmv->as_mv.col; /* central mv */ bestmv->as_mv.row *= 8; bestmv->as_mv.col *= 8; /* calculate central point error */ besterr = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); /* TODO: Each subsequent iteration checks at least one point in common * with the last iteration could be 2 ( if diag selected) */ while (--halfiters) { /* 1/2 pel */ CHECK_BETTER(left, tr, tc - 2); CHECK_BETTER(right, tr, tc + 2); CHECK_BETTER(up, tr - 2, tc); CHECK_BETTER(down, tr + 2, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - 2, tc - 2); break; case 1: CHECK_BETTER(diag, tr - 2, tc + 2); break; case 2: CHECK_BETTER(diag, tr + 2, tc - 2); break; case 3: CHECK_BETTER(diag, tr + 2, tc + 2); break; } /* no reason to check the same one again. */ if (tr == br && tc == bc) break; tr = br; tc = bc; } /* TODO: Each subsequent iteration checks at least one point in common * with the last iteration could be 2 ( if diag selected) */ /* 1/4 pel */ while (--quarteriters) { CHECK_BETTER(left, tr, tc - 1); CHECK_BETTER(right, tr, tc + 1); CHECK_BETTER(up, tr - 1, tc); CHECK_BETTER(down, tr + 1, tc); whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(diag, tr - 1, tc - 1); break; case 1: CHECK_BETTER(diag, tr - 1, tc + 1); break; case 2: CHECK_BETTER(diag, tr + 1, tc - 1); break; case 3: CHECK_BETTER(diag, tr + 1, tc + 1); break; } /* no reason to check the same one again. */ if (tr == br && tc == bc) break; tr = br; tc = bc; } bestmv->as_mv.row = br * 2; bestmv->as_mv.col = bc * 2; if ((abs(bestmv->as_mv.col - ref_mv->as_mv.col) > (MAX_FULL_PEL_VAL << 3)) || (abs(bestmv->as_mv.row - ref_mv->as_mv.row) > (MAX_FULL_PEL_VAL << 3))) { return INT_MAX; } return besterr; } #undef MVC #undef PRE #undef SP #undef DIST #undef IFMVCV #undef ERR #undef CHECK_BETTER int vp8_find_best_sub_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; unsigned char *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir; int thismse; int y_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; y_stride = pre_stride; #endif /* central mv */ bestmv->as_mv.row *= 8; bestmv->as_mv.col *= 8; startmv = *bestmv; /* calculate central point error */ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); /* "halfpix" horizontal variance */ thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; /* "halfpix" horizontal variance */ thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); /* "halfpix" vertical variance */ thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 8; /* "halfpix" vertical variance */ thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } /* now check 1 more diagonal */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); this_mv = startmv; switch (whichdir) { case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } /* time to check quarter pels. */ if (bestmv->as_mv.row < startmv.as_mv.row) y -= y_stride; if (bestmv->as_mv.col < startmv.as_mv.col) y--; startmv = *bestmv; /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; if (startmv.as_mv.col & 7) { this_mv.as_mv.col = startmv.as_mv.col - 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 4; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; if (startmv.as_mv.row & 7) { this_mv.as_mv.row = startmv.as_mv.row - 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); } up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 4; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } /* now check 1 more diagonal */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); this_mv = startmv; switch (whichdir) { case 0: if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - y_stride - 1, y_stride, 6, 6, z, b->src_stride, &sse); } } break; case 1: this_mv.as_mv.col += 2; if (startmv.as_mv.row & 7) { this_mv.as_mv.row -= 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.as_mv.row = (startmv.as_mv.row - 8) | 6; thismse = vfp->svf(y - y_stride, y_stride, this_mv.as_mv.col & 7, 6, z, b->src_stride, &sse); } break; case 2: this_mv.as_mv.row += 2; if (startmv.as_mv.col & 7) { this_mv.as_mv.col -= 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } else { this_mv.as_mv.col = (startmv.as_mv.col - 8) | 6; thismse = vfp->svf(y - 1, y_stride, 6, this_mv.as_mv.row & 7, z, b->src_stride, &sse); } break; case 3: this_mv.as_mv.col += 2; this_mv.as_mv.row += 2; thismse = vfp->svf(y, y_stride, this_mv.as_mv.col & 7, this_mv.as_mv.row & 7, z, b->src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } return bestmse; } int vp8_find_best_half_pixel_step(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse1) { int bestmse = INT_MAX; int_mv startmv; int_mv this_mv; unsigned char *z = (*(b->base_src) + b->src); int left, right, up, down, diag; unsigned int sse; int whichdir; int thismse; int y_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 MACROBLOCKD *xd = &x->e_mbd; unsigned char *y_0 = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; unsigned char *y; y_stride = 32; /* Copy 18 rows x 32 cols area to intermediate buffer before searching. */ vfp->copymem(y_0 - 1 - pre_stride, pre_stride, xd->y_buf, y_stride, 18); y = xd->y_buf + y_stride + 1; #else unsigned char *y = base_pre + d->offset + (bestmv->as_mv.row) * pre_stride + bestmv->as_mv.col; y_stride = pre_stride; #endif /* central mv */ bestmv->as_mv.row *= 8; bestmv->as_mv.col *= 8; startmv = *bestmv; /* calculate central point error */ bestmse = vfp->vf(y, y_stride, z, b->src_stride, sse1); *distortion = bestmse; bestmse += mv_err_cost(bestmv, ref_mv, mvcost, error_per_bit); /* go left then right and check error */ this_mv.as_mv.row = startmv.as_mv.row; this_mv.as_mv.col = ((startmv.as_mv.col - 8) | 4); /* "halfpix" horizontal variance */ thismse = vfp->svf(y - 1, y_stride, 4, 0, z, b->src_stride, &sse); left = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (left < bestmse) { *bestmv = this_mv; bestmse = left; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.col += 8; /* "halfpix" horizontal variance */ thismse = vfp->svf(y, y_stride, 4, 0, z, b->src_stride, &sse); right = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (right < bestmse) { *bestmv = this_mv; bestmse = right; *distortion = thismse; *sse1 = sse; } /* go up then down and check error */ this_mv.as_mv.col = startmv.as_mv.col; this_mv.as_mv.row = ((startmv.as_mv.row - 8) | 4); /* "halfpix" vertical variance */ thismse = vfp->svf(y - y_stride, y_stride, 0, 4, z, b->src_stride, &sse); up = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (up < bestmse) { *bestmv = this_mv; bestmse = up; *distortion = thismse; *sse1 = sse; } this_mv.as_mv.row += 8; /* "halfpix" vertical variance */ thismse = vfp->svf(y, y_stride, 0, 4, z, b->src_stride, &sse); down = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (down < bestmse) { *bestmv = this_mv; bestmse = down; *distortion = thismse; *sse1 = sse; } /* now check 1 more diagonal - */ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); this_mv = startmv; switch (whichdir) { case 0: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - 1 - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); break; case 1: this_mv.as_mv.col += 4; this_mv.as_mv.row = (this_mv.as_mv.row - 8) | 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - y_stride, y_stride, 4, 4, z, b->src_stride, &sse); break; case 2: this_mv.as_mv.col = (this_mv.as_mv.col - 8) | 4; this_mv.as_mv.row += 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y - 1, y_stride, 4, 4, z, b->src_stride, &sse); break; case 3: default: this_mv.as_mv.col += 4; this_mv.as_mv.row += 4; /* "halfpix" horizontal/vertical variance */ thismse = vfp->svf(y, y_stride, 4, 4, z, b->src_stride, &sse); break; } diag = thismse + mv_err_cost(&this_mv, ref_mv, mvcost, error_per_bit); if (diag < bestmse) { *bestmv = this_mv; bestmse = diag; *distortion = thismse; *sse1 = sse; } return bestmse; } #define CHECK_BOUNDS(range) \ { \ all_in = 1; \ all_in &= ((br - range) >= x->mv_row_min); \ all_in &= ((br + range) <= x->mv_row_max); \ all_in &= ((bc - range) >= x->mv_col_min); \ all_in &= ((bc + range) <= x->mv_col_max); \ } #define CHECK_POINT \ { \ if (this_mv.as_mv.col < x->mv_col_min) continue; \ if (this_mv.as_mv.col > x->mv_col_max) continue; \ if (this_mv.as_mv.row < x->mv_row_min) continue; \ if (this_mv.as_mv.row > x->mv_row_max) continue; \ } #define CHECK_BETTER \ { \ if (thissad < bestsad) { \ thissad += \ mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); \ if (thissad < bestsad) { \ bestsad = thissad; \ best_site = i; \ } \ } \ } static const MV next_chkpts[6][3] = { { { -2, 0 }, { -1, -2 }, { 1, -2 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 } }, { { 1, -2 }, { 2, 0 }, { 1, 2 } }, { { 2, 0 }, { 1, 2 }, { -1, 2 } }, { { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -1, 2 }, { -2, 0 }, { -1, -2 } } }; int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], int_mv *center_mv) { MV hex[6] = { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }; MV neighbors[4] = { { 0, -1 }, { -1, 0 }, { 1, 0 }, { 0, 1 } }; int i, j; unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; int br, bc; int_mv this_mv; unsigned int bestsad; unsigned int thissad; unsigned char *base_offset; unsigned char *this_offset; int k = -1; int all_in; int best_site = -1; int hex_range = 127; int dia_range = 8; int_mv fcenter_mv; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; /* adjust ref_mv to make sure it is within MV range */ vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); br = ref_mv->as_mv.row; bc = ref_mv->as_mv.col; /* Work out the start point for the search */ base_offset = (unsigned char *)(base_pre + d->offset); this_offset = base_offset + (br * (pre_stride)) + bc; this_mv.as_mv.row = br; this_mv.as_mv.col = bc; bestsad = vfp->sdf(what, what_stride, this_offset, in_what_stride) + mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); #if CONFIG_MULTI_RES_ENCODING /* Lower search range based on prediction info */ if (search_param >= 6) goto cal_neighbors; else if (search_param >= 5) hex_range = 4; else if (search_param >= 4) hex_range = 6; else if (search_param >= 3) hex_range = 15; else if (search_param >= 2) hex_range = 31; else if (search_param >= 1) hex_range = 63; dia_range = 8; #else (void)search_param; #endif /* hex search */ CHECK_BOUNDS(2) if (all_in) { for (i = 0; i < 6; ++i) { this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } else { for (i = 0; i < 6; ++i) { this_mv.as_mv.row = br + hex[i].row; this_mv.as_mv.col = bc + hex[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * in_what_stride) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } if (best_site == -1) { goto cal_neighbors; } else { br += hex[best_site].row; bc += hex[best_site].col; k = best_site; } for (j = 1; j < hex_range; ++j) { best_site = -1; CHECK_BOUNDS(2) if (all_in) { for (i = 0; i < 3; ++i) { this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } else { for (i = 0; i < 3; ++i) { this_mv.as_mv.row = br + next_chkpts[k][i].row; this_mv.as_mv.col = bc + next_chkpts[k][i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } if (best_site == -1) { break; } else { br += next_chkpts[k][best_site].row; bc += next_chkpts[k][best_site].col; k += 5 + best_site; if (k >= 12) { k -= 12; } else if (k >= 6) { k -= 6; } } } /* check 4 1-away neighbors */ cal_neighbors: for (j = 0; j < dia_range; ++j) { best_site = -1; CHECK_BOUNDS(1) if (all_in) { for (i = 0; i < 4; ++i) { this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } else { for (i = 0; i < 4; ++i) { this_mv.as_mv.row = br + neighbors[i].row; this_mv.as_mv.col = bc + neighbors[i].col; CHECK_POINT this_offset = base_offset + (this_mv.as_mv.row * (in_what_stride)) + this_mv.as_mv.col; thissad = vfp->sdf(what, what_stride, this_offset, in_what_stride); CHECK_BETTER } } if (best_site == -1) { break; } else { br += neighbors[best_site].row; bc += neighbors[best_site].col; } } best_mv->as_mv.row = br; best_mv->as_mv.col = bc; return bestsad; } #undef CHECK_BOUNDS #undef CHECK_POINT #undef CHECK_BETTER int vp8_diamond_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { int i, j, step; unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; unsigned char *in_what; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; unsigned char *best_address; int tot_steps; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int best_site = 0; int last_site = 0; int ref_row; int ref_col; int this_row_offset; int this_col_offset; search_site *ss; unsigned char *check_here; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; /* Work out the start point for the search */ in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); best_address = in_what; /* Check the starting position */ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* search_param determines the length of the initial step and hence * the number of iterations 0 = initial step (MAX_FIRST_STEP) pel : * 1 = (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. */ ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; for (step = 0; step < tot_steps; ++step) { for (j = 0; j < x->searches_per_step; ++j) { /* Trap illegal vectors */ this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } if (best_site != last_site) { best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; last_site = best_site; } else if (best_address == in_what) { (*num00)++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #if HAVE_SSE2 || HAVE_MSA int vp8_diamond_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { int i, j, step; unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; unsigned char *in_what; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; unsigned char *best_address; int tot_steps; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int best_site = 0; int last_site = 0; int ref_row; int ref_col; int this_row_offset; int this_col_offset; search_site *ss; unsigned char *check_here; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; vp8_clamp_mv(ref_mv, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); ref_row = ref_mv->as_mv.row; ref_col = ref_mv->as_mv.col; *num00 = 0; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; /* Work out the start point for the search */ in_what = (unsigned char *)(base_pre + d->offset + (ref_row * pre_stride) + ref_col); best_address = in_what; /* Check the starting position */ bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* search_param determines the length of the initial step and hence the * number of iterations 0 = initial step (MAX_FIRST_STEP) pel : 1 = * (MAX_FIRST_STEP/2) pel, 2 = (MAX_FIRST_STEP/4) pel... etc. */ ss = &x->ss[search_param * x->searches_per_step]; tot_steps = (x->ss_count / x->searches_per_step) - search_param; i = 1; for (step = 0; step < tot_steps; ++step) { int all_in = 1, t; /* To know if all neighbor points are within the bounds, 4 bounds * checking are enough instead of checking 4 bounds for each * points. */ all_in &= ((best_mv->as_mv.row + ss[i].mv.row) > x->mv_row_min); all_in &= ((best_mv->as_mv.row + ss[i + 1].mv.row) < x->mv_row_max); all_in &= ((best_mv->as_mv.col + ss[i + 2].mv.col) > x->mv_col_min); all_in &= ((best_mv->as_mv.col + ss[i + 3].mv.col) < x->mv_col_max); if (all_in) { unsigned int sad_array[4]; for (j = 0; j < x->searches_per_step; j += 4) { const unsigned char *block_offset[4]; for (t = 0; t < 4; ++t) { block_offset[t] = ss[i + t].offset + best_address; } fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { this_mv.as_mv.row = best_mv->as_mv.row + ss[i].mv.row; this_mv.as_mv.col = best_mv->as_mv.col + ss[i].mv.col; sad_array[t] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (sad_array[t] < bestsad) { bestsad = sad_array[t]; best_site = i; } } } } } else { for (j = 0; j < x->searches_per_step; ++j) { /* Trap illegal vectors */ this_row_offset = best_mv->as_mv.row + ss[i].mv.row; this_col_offset = best_mv->as_mv.col + ss[i].mv.col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = ss[i].offset + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } } if (best_site != last_site) { best_mv->as_mv.row += ss[best_site].mv.row; best_mv->as_mv.col += ss[best_site].mv.col; best_address += ss[best_site].offset; last_site = best_site; } else if (best_address == in_what) { (*num00)++; } } this_mv.as_mv.row = best_mv->as_mv.row * 8; this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #endif // HAVE_SSE2 || HAVE_MSA int vp8_full_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; unsigned char *in_what; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; int mv_stride = pre_stride; unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int r, c; unsigned char *check_here; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that * stretch beyiond the UMV border */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; ++r) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min; for (c = col_min; c < col_max; ++c) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } check_here++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #if HAVE_SSSE3 int vp8_full_search_sadx3(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; unsigned char *in_what; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; int mv_stride = pre_stride; unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int r, c; unsigned char *check_here; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; unsigned int sad_array[3]; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that stretch * beyond the UMV border */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; ++r) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; while ((c + 2) < col_max) { int i; fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; ++i) { thissad = sad_array[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } while (c < col_max) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } this_mv.as_mv.row = best_mv->as_mv.row << 3; this_mv.as_mv.col = best_mv->as_mv.col << 3; return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #endif // HAVE_SSSE3 #if HAVE_SSE4_1 int vp8_full_search_sadx8(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; unsigned char *in_what; int in_what_stride = pre_stride; int mv_stride = pre_stride; unsigned char *bestaddress; int_mv *best_mv = &d->bmi.mv; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int r, c; unsigned char *check_here; int ref_row = ref_mv->as_mv.row; int ref_col = ref_mv->as_mv.col; int row_min = ref_row - distance; int row_max = ref_row + distance; int col_min = ref_col - distance; int col_max = ref_col + distance; DECLARE_ALIGNED(16, unsigned int, sad_array8[8]); unsigned int sad_array[3]; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; /* Work out the mid point for the search */ in_what = base_pre + d->offset; bestaddress = in_what + (ref_row * pre_stride) + ref_col; best_mv->as_mv.row = ref_row; best_mv->as_mv.col = ref_col; /* Baseline value at the centre */ bestsad = fn_ptr->sdf(what, what_stride, bestaddress, in_what_stride) + mvsad_err_cost(best_mv, &fcenter_mv, mvsadcost, sad_per_bit); /* Apply further limits to prevent us looking using vectors that stretch * beyond the UMV border */ if (col_min < x->mv_col_min) col_min = x->mv_col_min; if (col_max > x->mv_col_max) col_max = x->mv_col_max; if (row_min < x->mv_row_min) row_min = x->mv_row_min; if (row_max > x->mv_row_max) row_max = x->mv_row_max; for (r = row_min; r < row_max; ++r) { this_mv.as_mv.row = r; check_here = r * mv_stride + in_what + col_min; c = col_min; while ((c + 7) < col_max) { int i; fn_ptr->sdx8f(what, what_stride, check_here, in_what_stride, sad_array8); for (i = 0; i < 8; ++i) { thissad = sad_array8[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } while ((c + 2) < col_max) { int i; fn_ptr->sdx3f(what, what_stride, check_here, in_what_stride, sad_array); for (i = 0; i < 3; ++i) { thissad = sad_array[i]; if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } while (c < col_max) { thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.col = c; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->as_mv.row = r; best_mv->as_mv.col = c; bestaddress = check_here; } } check_here++; c++; } } this_mv.as_mv.row = best_mv->as_mv.row * 8; this_mv.as_mv.col = best_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, bestaddress, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #endif // HAVE_SSE4_1 int vp8_refining_search_sad_c(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; int i, j; short this_row_offset, this_col_offset; int what_stride = b->src_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; unsigned char *what = (*(b->base_src) + b->src); unsigned char *best_address = (unsigned char *)(base_pre + d->offset + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); unsigned char *check_here; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i = 0; i < search_range; ++i) { int best_site = -1; for (j = 0; j < 4; ++j) { this_row_offset = ref_mv->as_mv.row + neighbors[j].row; this_col_offset = ref_mv->as_mv.col + neighbors[j].col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = j; } } } } if (best_site == -1) { break; } else { ref_mv->as_mv.row += neighbors[best_site].row; ref_mv->as_mv.col += neighbors[best_site].col; best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; } } this_mv.as_mv.row = ref_mv->as_mv.row << 3; this_mv.as_mv.col = ref_mv->as_mv.col << 3; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #if HAVE_SSE2 || HAVE_MSA int vp8_refining_search_sadx4(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int error_per_bit, int search_range, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv) { MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; int i, j; short this_row_offset, this_col_offset; int what_stride = b->src_stride; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int in_what_stride = pre_stride; unsigned char *what = (*(b->base_src) + b->src); unsigned char *best_address = (unsigned char *)(base_pre + d->offset + (ref_mv->as_mv.row * pre_stride) + ref_mv->as_mv.col); unsigned char *check_here; int_mv this_mv; unsigned int bestsad; unsigned int thissad; int *mvsadcost[2]; int_mv fcenter_mv; mvsadcost[0] = x->mvsadcost[0]; mvsadcost[1] = x->mvsadcost[1]; fcenter_mv.as_mv.row = center_mv->as_mv.row >> 3; fcenter_mv.as_mv.col = center_mv->as_mv.col >> 3; bestsad = fn_ptr->sdf(what, what_stride, best_address, in_what_stride) + mvsad_err_cost(ref_mv, &fcenter_mv, mvsadcost, error_per_bit); for (i = 0; i < search_range; ++i) { int best_site = -1; int all_in = 1; all_in &= ((ref_mv->as_mv.row - 1) > x->mv_row_min); all_in &= ((ref_mv->as_mv.row + 1) < x->mv_row_max); all_in &= ((ref_mv->as_mv.col - 1) > x->mv_col_min); all_in &= ((ref_mv->as_mv.col + 1) < x->mv_col_max); if (all_in) { unsigned int sad_array[4]; const unsigned char *block_offset[4]; block_offset[0] = best_address - in_what_stride; block_offset[1] = best_address - 1; block_offset[2] = best_address + 1; block_offset[3] = best_address + in_what_stride; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (j = 0; j < 4; ++j) { if (sad_array[j] < bestsad) { this_mv.as_mv.row = ref_mv->as_mv.row + neighbors[j].row; this_mv.as_mv.col = ref_mv->as_mv.col + neighbors[j].col; sad_array[j] += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (sad_array[j] < bestsad) { bestsad = sad_array[j]; best_site = j; } } } } else { for (j = 0; j < 4; ++j) { this_row_offset = ref_mv->as_mv.row + neighbors[j].row; this_col_offset = ref_mv->as_mv.col + neighbors[j].col; if ((this_col_offset > x->mv_col_min) && (this_col_offset < x->mv_col_max) && (this_row_offset > x->mv_row_min) && (this_row_offset < x->mv_row_max)) { check_here = (neighbors[j].row) * in_what_stride + neighbors[j].col + best_address; thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { this_mv.as_mv.row = this_row_offset; this_mv.as_mv.col = this_col_offset; thissad += mvsad_err_cost(&this_mv, &fcenter_mv, mvsadcost, error_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = j; } } } } } if (best_site == -1) { break; } else { ref_mv->as_mv.row += neighbors[best_site].row; ref_mv->as_mv.col += neighbors[best_site].col; best_address += (neighbors[best_site].row) * in_what_stride + neighbors[best_site].col; } } this_mv.as_mv.row = ref_mv->as_mv.row * 8; this_mv.as_mv.col = ref_mv->as_mv.col * 8; return fn_ptr->vf(what, what_stride, best_address, in_what_stride, &thissad) + mv_err_cost(&this_mv, center_mv, mvcost, x->errorperbit); } #endif // HAVE_SSE2 || HAVE_MSA libvpx-1.8.2/vp8/encoder/mcomp.h000066400000000000000000000060551357355204000164600ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_MCOMP_H_ #define VPX_VP8_ENCODER_MCOMP_H_ #include "block.h" #include "vpx_dsp/variance.h" #ifdef __cplusplus extern "C" { #endif /* The maximum number of steps in a step search given the largest allowed * initial step */ #define MAX_MVSEARCH_STEPS 8 /* Max full pel mv specified in 1 pel units */ #define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS)) - 1) /* Maximum size of the first step in full pel units */ #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1)) int vp8_mv_bit_cost(int_mv *mv, int_mv *ref, int *mvcost[2], int Weight); void vp8_init_dsmotion_compensation(MACROBLOCK *x, int stride); void vp8_init3smotion_compensation(MACROBLOCK *x, int stride); int vp8_hex_search(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvsadcost[2], int_mv *center_mv); typedef int(fractional_mv_step_fp)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse); fractional_mv_step_fp vp8_find_best_sub_pixel_step_iteratively; fractional_mv_step_fp vp8_find_best_sub_pixel_step; fractional_mv_step_fp vp8_find_best_half_pixel_step; fractional_mv_step_fp vp8_skip_fractional_mv_step; typedef int (*vp8_full_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv); typedef int (*vp8_refining_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int sad_per_bit, int distance, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv); typedef int (*vp8_diamond_search_fn_t)(MACROBLOCK *x, BLOCK *b, BLOCKD *d, int_mv *ref_mv, int_mv *best_mv, int search_param, int sad_per_bit, int *num00, vp8_variance_fn_ptr_t *fn_ptr, int *mvcost[2], int_mv *center_mv); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_MCOMP_H_ libvpx-1.8.2/vp8/encoder/mips/000077500000000000000000000000001357355204000161365ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/mips/mmi/000077500000000000000000000000001357355204000167205ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/mips/mmi/dct_mmi.c000066400000000000000000000503721357355204000205070ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_ports/asmdefs_mmi.h" /* clang-format off */ /* TRANSPOSE_4H: transpose 4x4 matrix. Input: ftmp1,ftmp2,ftmp3,ftmp4 Output: ftmp1,ftmp2,ftmp3,ftmp4 Note: ftmp0 always be 0, ftmp5~9 used for temporary value. */ #define TRANSPOSE_4H \ MMI_LI(%[tmp0], 0x93) \ "mtc1 %[tmp0], %[ftmp10] \n\t" \ "punpcklhw %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp5], %[ftmp5], %[ftmp9] \n\t" \ "punpckhhw %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp2], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp6], %[ftmp6], %[ftmp9] \n\t" \ "punpcklhw %[ftmp7], %[ftmp3], %[ftmp0] \n\t" \ "punpcklhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp7], %[ftmp7], %[ftmp9] \n\t" \ "punpckhhw %[ftmp8], %[ftmp3], %[ftmp0] \n\t" \ "punpckhhw %[ftmp9], %[ftmp4], %[ftmp0] \n\t" \ "pshufh %[ftmp9], %[ftmp9], %[ftmp10] \n\t" \ "or %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "punpcklwd %[ftmp1], %[ftmp5], %[ftmp7] \n\t" \ "punpckhwd %[ftmp2], %[ftmp5], %[ftmp7] \n\t" \ "punpcklwd %[ftmp3], %[ftmp6], %[ftmp8] \n\t" \ "punpckhwd %[ftmp4], %[ftmp6], %[ftmp8] \n\t" /* clang-format on */ void vp8_short_fdct4x4_mmi(int16_t *input, int16_t *output, int pitch) { uint64_t tmp[1]; int16_t *ip = input; #if _MIPS_SIM == _ABIO32 register double ftmp0 asm("$f0"); register double ftmp1 asm("$f2"); register double ftmp2 asm("$f4"); register double ftmp3 asm("$f6"); register double ftmp4 asm("$f8"); register double ftmp5 asm("$f10"); register double ftmp6 asm("$f12"); register double ftmp7 asm("$f14"); register double ftmp8 asm("$f16"); register double ftmp9 asm("$f18"); register double ftmp10 asm("$f20"); register double ftmp11 asm("$f22"); register double ftmp12 asm("$f24"); #else register double ftmp0 asm("$f0"); register double ftmp1 asm("$f1"); register double ftmp2 asm("$f2"); register double ftmp3 asm("$f3"); register double ftmp4 asm("$f4"); register double ftmp5 asm("$f5"); register double ftmp6 asm("$f6"); register double ftmp7 asm("$f7"); register double ftmp8 asm("$f8"); register double ftmp9 asm("$f9"); register double ftmp10 asm("$f10"); register double ftmp11 asm("$f11"); register double ftmp12 asm("$f12"); #endif // _MIPS_SIM == _ABIO32 DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_07) = { 0x0007000700070007ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_12000) = { 0x00002ee000002ee0ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_51000) = { 0x0000c7380000c738ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_14500) = { 0x000038a4000038a4ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_7500) = { 0x00001d4c00001d4cULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_op1) = { 0x14e808a914e808a9ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_op3) = { 0xeb1808a9eb1808a9ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_5352) = { 0x000014e8000014e8ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_2217) = { 0x000008a9000008a9ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_ph_8) = { 0x0008000800080008ULL }; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp2], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp3], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp4], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) TRANSPOSE_4H "ldc1 %[ftmp11], %[ff_ph_8] \n\t" // f1 + f4 "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" // a1 "pmullh %[ftmp5], %[ftmp5], %[ftmp11] \n\t" // f2 + f3 "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" // b1 "pmullh %[ftmp6], %[ftmp6], %[ftmp11] \n\t" // f2 - f3 "psubh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" // c1 "pmullh %[ftmp7], %[ftmp7], %[ftmp11] \n\t" // f1 - f4 "psubh %[ftmp8], %[ftmp1], %[ftmp4] \n\t" // d1 "pmullh %[ftmp8], %[ftmp8], %[ftmp11] \n\t" // op[0] = a1 + b1 "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" // op[2] = a1 - b1 "psubh %[ftmp3], %[ftmp5], %[ftmp6] \n\t" // op[1] = (c1 * 2217 + d1 * 5352 + 14500) >> 12 MMI_LI(%[tmp0], 0x0c) "mtc1 %[tmp0], %[ftmp11] \n\t" "ldc1 %[ftmp12], %[ff_pw_14500] \n\t" "punpcklhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op1] \n\t" "punpckhhw %[ftmp9], %[ftmp7], %[ftmp8] \n\t" "pmaddhw %[ftmp6], %[ftmp9], %[ff_ph_op1] \n\t" "paddw %[ftmp5], %[ftmp5], %[ftmp12] \n\t" "paddw %[ftmp6], %[ftmp6], %[ftmp12] \n\t" "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" "packsswh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" // op[3] = (d1 * 2217 - c1 * 5352 + 7500) >> 12 "ldc1 %[ftmp12], %[ff_pw_7500] \n\t" "punpcklhw %[ftmp9], %[ftmp8], %[ftmp7] \n\t" "pmaddhw %[ftmp5], %[ftmp9], %[ff_ph_op3] \n\t" "punpckhhw %[ftmp9], %[ftmp8], %[ftmp7] \n\t" "pmaddhw %[ftmp6], %[ftmp9], %[ff_ph_op3] \n\t" "paddw %[ftmp5], %[ftmp5], %[ftmp12] \n\t" "paddw %[ftmp6], %[ftmp6], %[ftmp12] \n\t" "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" "packsswh %[ftmp4], %[ftmp5], %[ftmp6] \n\t" TRANSPOSE_4H "paddh %[ftmp5], %[ftmp1], %[ftmp4] \n\t" "paddh %[ftmp6], %[ftmp2], %[ftmp3] \n\t" "psubh %[ftmp7], %[ftmp2], %[ftmp3] \n\t" "psubh %[ftmp8], %[ftmp1], %[ftmp4] \n\t" "pcmpeqh %[ftmp0], %[ftmp8], %[ftmp0] \n\t" "ldc1 %[ftmp9], %[ff_ph_01] \n\t" "paddh %[ftmp0], %[ftmp0], %[ftmp9] \n\t" "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" "psubh %[ftmp2], %[ftmp5], %[ftmp6] \n\t" "ldc1 %[ftmp9], %[ff_ph_07] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "paddh %[ftmp2], %[ftmp2], %[ftmp9] \n\t" MMI_LI(%[tmp0], 0x04) "mtc1 %[tmp0], %[ftmp9] \n\t" "psrah %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "psrah %[ftmp2], %[ftmp2], %[ftmp9] \n\t" MMI_LI(%[tmp0], 0x10) "mtc1 %[tmp0], %[ftmp9] \n\t" "ldc1 %[ftmp12], %[ff_pw_12000] \n\t" "punpcklhw %[ftmp5], %[ftmp7], %[ftmp8] \n\t" "pmaddhw %[ftmp10], %[ftmp5], %[ff_ph_op1] \n\t" "punpckhhw %[ftmp5], %[ftmp7], %[ftmp8] \n\t" "pmaddhw %[ftmp11], %[ftmp5], %[ff_ph_op1] \n\t" "paddw %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" "psraw %[ftmp10], %[ftmp10], %[ftmp9] \n\t" "psraw %[ftmp11], %[ftmp11], %[ftmp9] \n\t" "packsswh %[ftmp3], %[ftmp10], %[ftmp11] \n\t" "paddh %[ftmp3], %[ftmp3], %[ftmp0] \n\t" "ldc1 %[ftmp12], %[ff_pw_51000] \n\t" "punpcklhw %[ftmp5], %[ftmp8], %[ftmp7] \n\t" "pmaddhw %[ftmp10], %[ftmp5], %[ff_ph_op3] \n\t" "punpckhhw %[ftmp5], %[ftmp8], %[ftmp7] \n\t" "pmaddhw %[ftmp11], %[ftmp5], %[ff_ph_op3] \n\t" "paddw %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "paddw %[ftmp11], %[ftmp11], %[ftmp12] \n\t" "psraw %[ftmp10], %[ftmp10], %[ftmp9] \n\t" "psraw %[ftmp11], %[ftmp11], %[ftmp9] \n\t" "packsswh %[ftmp4], %[ftmp10], %[ftmp11] \n\t" "gssdlc1 %[ftmp1], 0x07(%[output]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[output]) \n\t" "gssdlc1 %[ftmp3], 0x0f(%[output]) \n\t" "gssdrc1 %[ftmp3], 0x08(%[output]) \n\t" "gssdlc1 %[ftmp2], 0x17(%[output]) \n\t" "gssdrc1 %[ftmp2], 0x10(%[output]) \n\t" "gssdlc1 %[ftmp4], 0x1f(%[output]) \n\t" "gssdrc1 %[ftmp4], 0x18(%[output]) \n\t" : [ftmp0] "=&f"(ftmp0), [ftmp1] "=&f"(ftmp1), [ftmp2] "=&f"(ftmp2), [ftmp3] "=&f"(ftmp3), [ftmp4] "=&f"(ftmp4), [ftmp5] "=&f"(ftmp5), [ftmp6] "=&f"(ftmp6), [ftmp7] "=&f"(ftmp7), [ftmp8] "=&f"(ftmp8), [ftmp9] "=&f"(ftmp9), [ftmp10] "=&f"(ftmp10), [ftmp11] "=&f"(ftmp11), [ftmp12] "=&f"(ftmp12), [tmp0] "=&r"(tmp[0]), [ip]"+&r"(ip) : [ff_ph_01] "m"(ff_ph_01), [ff_ph_07] "m"(ff_ph_07), [ff_ph_op1] "f"(ff_ph_op1), [ff_ph_op3] "f"(ff_ph_op3), [ff_pw_14500] "m"(ff_pw_14500), [ff_pw_7500] "m"(ff_pw_7500), [ff_pw_12000] "m"(ff_pw_12000), [ff_pw_51000] "m"(ff_pw_51000), [ff_pw_5352]"m"(ff_pw_5352), [ff_pw_2217]"m"(ff_pw_2217), [ff_ph_8]"m"(ff_ph_8), [pitch]"r"(pitch), [output] "r"(output) : "memory" ); } void vp8_short_fdct8x4_mmi(int16_t *input, int16_t *output, int pitch) { vp8_short_fdct4x4_mmi(input, output, pitch); vp8_short_fdct4x4_mmi(input + 4, output + 16, pitch); } void vp8_short_walsh4x4_mmi(int16_t *input, int16_t *output, int pitch) { double ftmp[13]; uint32_t tmp[1]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_01) = { 0x0001000100010001ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_01) = { 0x0000000100000001ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_03) = { 0x0000000300000003ULL }; DECLARE_ALIGNED(8, const uint64_t, ff_pw_mask) = { 0x0001000000010000ULL }; __asm__ volatile ( MMI_LI(%[tmp0], 0x02) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" "gsldlc1 %[ftmp1], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp2], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp3], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[ip]) \n\t" MMI_ADDU(%[ip], %[ip], %[pitch]) "gsldlc1 %[ftmp4], 0x07(%[ip]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[ip]) \n\t" TRANSPOSE_4H "psllh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "psllh %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "psllh %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "psllh %[ftmp4], %[ftmp4], %[ftmp11] \n\t" // a "paddh %[ftmp5], %[ftmp1], %[ftmp3] \n\t" // d "paddh %[ftmp6], %[ftmp2], %[ftmp4] \n\t" // c "psubh %[ftmp7], %[ftmp2], %[ftmp4] \n\t" // b "psubh %[ftmp8], %[ftmp1], %[ftmp3] \n\t" // a + d "paddh %[ftmp1], %[ftmp5], %[ftmp6] \n\t" // b + c "paddh %[ftmp2], %[ftmp8], %[ftmp7] \n\t" // b - c "psubh %[ftmp3], %[ftmp8], %[ftmp7] \n\t" // a - d "psubh %[ftmp4], %[ftmp5], %[ftmp6] \n\t" "pcmpeqh %[ftmp6], %[ftmp5], %[ftmp0] \n\t" "paddh %[ftmp6], %[ftmp6], %[ff_ph_01] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp6] \n\t" TRANSPOSE_4H // op[2], op[0] "pmaddhw %[ftmp5], %[ftmp1], %[ff_pw_01] \n\t" // op[3], op[1] "pmaddhw %[ftmp1], %[ftmp1], %[ff_pw_mask] \n\t" // op[6], op[4] "pmaddhw %[ftmp6], %[ftmp2], %[ff_pw_01] \n\t" // op[7], op[5] "pmaddhw %[ftmp2], %[ftmp2], %[ff_pw_mask] \n\t" // op[10], op[8] "pmaddhw %[ftmp7], %[ftmp3], %[ff_pw_01] \n\t" // op[11], op[9] "pmaddhw %[ftmp3], %[ftmp3], %[ff_pw_mask] \n\t" // op[14], op[12] "pmaddhw %[ftmp8], %[ftmp4], %[ff_pw_01] \n\t" // op[15], op[13] "pmaddhw %[ftmp4], %[ftmp4], %[ff_pw_mask] \n\t" // a1, a3 "paddw %[ftmp9], %[ftmp5], %[ftmp7] \n\t" // d1, d3 "paddw %[ftmp10], %[ftmp6], %[ftmp8] \n\t" // c1, c3 "psubw %[ftmp11], %[ftmp6], %[ftmp8] \n\t" // b1, b3 "psubw %[ftmp12], %[ftmp5], %[ftmp7] \n\t" // a1 + d1, a3 + d3 "paddw %[ftmp5], %[ftmp9], %[ftmp10] \n\t" // b1 + c1, b3 + c3 "paddw %[ftmp6], %[ftmp12], %[ftmp11] \n\t" // b1 - c1, b3 - c3 "psubw %[ftmp7], %[ftmp12], %[ftmp11] \n\t" // a1 - d1, a3 - d3 "psubw %[ftmp8], %[ftmp9], %[ftmp10] \n\t" // a2, a4 "paddw %[ftmp9], %[ftmp1], %[ftmp3] \n\t" // d2, d4 "paddw %[ftmp10], %[ftmp2], %[ftmp4] \n\t" // c2, c4 "psubw %[ftmp11], %[ftmp2], %[ftmp4] \n\t" // b2, b4 "psubw %[ftmp12], %[ftmp1], %[ftmp3] \n\t" // a2 + d2, a4 + d4 "paddw %[ftmp1], %[ftmp9], %[ftmp10] \n\t" // b2 + c2, b4 + c4 "paddw %[ftmp2], %[ftmp12], %[ftmp11] \n\t" // b2 - c2, b4 - c4 "psubw %[ftmp3], %[ftmp12], %[ftmp11] \n\t" // a2 - d2, a4 - d4 "psubw %[ftmp4], %[ftmp9], %[ftmp10] \n\t" MMI_LI(%[tmp0], 0x03) "mtc1 %[tmp0], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp1] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp9] \n\t" "paddw %[ftmp1], %[ftmp1], %[ff_pw_03] \n\t" "psraw %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp2] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp2], %[ftmp2], %[ftmp9] \n\t" "paddw %[ftmp2], %[ftmp2], %[ff_pw_03] \n\t" "psraw %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp3] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp9] \n\t" "paddw %[ftmp3], %[ftmp3], %[ff_pw_03] \n\t" "psraw %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp4] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp4], %[ftmp4], %[ftmp9] \n\t" "paddw %[ftmp4], %[ftmp4], %[ff_pw_03] \n\t" "psraw %[ftmp4], %[ftmp4], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp5] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp5], %[ftmp5], %[ftmp9] \n\t" "paddw %[ftmp5], %[ftmp5], %[ff_pw_03] \n\t" "psraw %[ftmp5], %[ftmp5], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp6] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp6], %[ftmp6], %[ftmp9] \n\t" "paddw %[ftmp6], %[ftmp6], %[ff_pw_03] \n\t" "psraw %[ftmp6], %[ftmp6], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp7] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp7], %[ftmp7], %[ftmp9] \n\t" "paddw %[ftmp7], %[ftmp7], %[ff_pw_03] \n\t" "psraw %[ftmp7], %[ftmp7], %[ftmp11] \n\t" "pcmpgtw %[ftmp9], %[ftmp0], %[ftmp8] \n\t" "and %[ftmp9], %[ftmp9], %[ff_pw_01] \n\t" "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" "paddw %[ftmp8], %[ftmp8], %[ff_pw_03] \n\t" "psraw %[ftmp8], %[ftmp8], %[ftmp11] \n\t" "packsswh %[ftmp1], %[ftmp1], %[ftmp5] \n\t" "packsswh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" "packsswh %[ftmp3], %[ftmp3], %[ftmp7] \n\t" "packsswh %[ftmp4], %[ftmp4], %[ftmp8] \n\t" MMI_LI(%[tmp0], 0x72) "mtc1 %[tmp0], %[ftmp11] \n\t" "pshufh %[ftmp1], %[ftmp1], %[ftmp11] \n\t" "pshufh %[ftmp2], %[ftmp2], %[ftmp11] \n\t" "pshufh %[ftmp3], %[ftmp3], %[ftmp11] \n\t" "pshufh %[ftmp4], %[ftmp4], %[ftmp11] \n\t" "gssdlc1 %[ftmp1], 0x07(%[op]) \n\t" "gssdrc1 %[ftmp1], 0x00(%[op]) \n\t" "gssdlc1 %[ftmp2], 0x0f(%[op]) \n\t" "gssdrc1 %[ftmp2], 0x08(%[op]) \n\t" "gssdlc1 %[ftmp3], 0x17(%[op]) \n\t" "gssdrc1 %[ftmp3], 0x10(%[op]) \n\t" "gssdlc1 %[ftmp4], 0x1f(%[op]) \n\t" "gssdrc1 %[ftmp4], 0x18(%[op]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [ip]"+&r"(input) : [op]"r"(output), [ff_pw_01]"f"(ff_pw_01), [pitch]"r"((mips_reg)pitch), [ff_pw_03]"f"(ff_pw_03), [ff_pw_mask]"f"(ff_pw_mask), [ff_ph_01]"f"(ff_ph_01) : "memory" ); } libvpx-1.8.2/vp8/encoder/mips/mmi/vp8_quantize_mmi.c000066400000000000000000000324011357355204000223630ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/asmdefs_mmi.h" #include "vp8/encoder/onyx_int.h" #include "vp8/encoder/quantize.h" #include "vp8/common/quant_common.h" #define REGULAR_SELECT_EOB(i, rc) \ z = coeff_ptr[rc]; \ sz = (z >> 31); \ x = (z ^ sz) - sz; \ zbin = zbin_ptr[rc] + *(zbin_boost_ptr++) + zbin_oq_value; \ if (x >= zbin) { \ x += round_ptr[rc]; \ y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; \ if (y) { \ x = (y ^ sz) - sz; \ qcoeff_ptr[rc] = x; \ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; \ eob = i; \ zbin_boost_ptr = b->zrun_zbin_boost; \ } \ } void vp8_fast_quantize_b_mmi(BLOCK *b, BLOCKD *d) { const int16_t *coeff_ptr = b->coeff; const int16_t *round_ptr = b->round; const int16_t *quant_ptr = b->quant_fast; int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; const int16_t *dequant_ptr = d->dequant; const int16_t *inv_zig_zag = vp8_default_inv_zig_zag; double ftmp[13]; uint64_t tmp[1]; DECLARE_ALIGNED(8, const uint64_t, ones) = { 0xffffffffffffffffULL }; int eob = 0; __asm__ volatile( // loop 0 ~ 7 "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[ftmp1], 0x07(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[coeff_ptr]) \n\t" "li %[tmp0], 0x0f \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "gsldlc1 %[ftmp2], 0x0f(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[coeff_ptr]) \n\t" "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "gsldlc1 %[ftmp5], 0x07(%[round_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[round_ptr]) \n\t" "gsldlc1 %[ftmp6], 0x0f(%[round_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x08(%[round_ptr]) \n\t" "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "gsldlc1 %[ftmp7], 0x07(%[quant_ptr]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[quant_ptr]) \n\t" "gsldlc1 %[ftmp8], 0x0f(%[quant_ptr]) \n\t" "gsldrc1 %[ftmp8], 0x08(%[quant_ptr]) \n\t" "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "gssdlc1 %[ftmp7], 0x07(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp7], 0x00(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp8], 0x0f(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp8], 0x08(%[qcoeff_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x07(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[inv_zig_zag]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[inv_zig_zag]) \n\t" "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "xor %[ftmp5], %[ftmp5], %[ones] \n\t" "xor %[ftmp6], %[ftmp6], %[ones] \n\t" "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pmaxsh %[ftmp10], %[ftmp5], %[ftmp6] \n\t" "gsldlc1 %[ftmp5], 0x07(%[dequant_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[dequant_ptr]) \n\t" "gsldlc1 %[ftmp6], 0x0f(%[dequant_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x08(%[dequant_ptr]) \n\t" "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "gssdlc1 %[ftmp5], 0x07(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp5], 0x00(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp6], 0x0f(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x08(%[dqcoeff_ptr]) \n\t" // loop 8 ~ 15 "gsldlc1 %[ftmp1], 0x17(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x10(%[coeff_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x1f(%[coeff_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[coeff_ptr]) \n\t" "psrah %[ftmp3], %[ftmp1], %[ftmp9] \n\t" "xor %[ftmp1], %[ftmp3], %[ftmp1] \n\t" "psubh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "psrah %[ftmp4], %[ftmp2], %[ftmp9] \n\t" "xor %[ftmp2], %[ftmp4], %[ftmp2] \n\t" "psubh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" "gsldlc1 %[ftmp5], 0x17(%[round_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x10(%[round_ptr]) \n\t" "gsldlc1 %[ftmp6], 0x1f(%[round_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x18(%[round_ptr]) \n\t" "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "paddh %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "gsldlc1 %[ftmp7], 0x17(%[quant_ptr]) \n\t" "gsldrc1 %[ftmp7], 0x10(%[quant_ptr]) \n\t" "gsldlc1 %[ftmp8], 0x1f(%[quant_ptr]) \n\t" "gsldrc1 %[ftmp8], 0x18(%[quant_ptr]) \n\t" "pmulhuh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmulhuh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "xor %[ftmp7], %[ftmp5], %[ftmp3] \n\t" "xor %[ftmp8], %[ftmp6], %[ftmp4] \n\t" "psubh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" "psubh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" "gssdlc1 %[ftmp7], 0x17(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp7], 0x10(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp8], 0x1f(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp8], 0x18(%[qcoeff_ptr]) \n\t" "gsldlc1 %[ftmp1], 0x17(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp1], 0x10(%[inv_zig_zag]) \n\t" "gsldlc1 %[ftmp2], 0x1f(%[inv_zig_zag]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[inv_zig_zag]) \n\t" "pcmpeqh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "pcmpeqh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "xor %[ftmp5], %[ftmp5], %[ones] \n\t" "xor %[ftmp6], %[ftmp6], %[ones] \n\t" "and %[ftmp5], %[ftmp5], %[ftmp1] \n\t" "and %[ftmp6], %[ftmp6], %[ftmp2] \n\t" "pmaxsh %[ftmp11], %[ftmp5], %[ftmp6] \n\t" "gsldlc1 %[ftmp5], 0x17(%[dequant_ptr]) \n\t" "gsldrc1 %[ftmp5], 0x10(%[dequant_ptr]) \n\t" "gsldlc1 %[ftmp6], 0x1f(%[dequant_ptr]) \n\t" "gsldrc1 %[ftmp6], 0x18(%[dequant_ptr]) \n\t" "pmullh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" "pmullh %[ftmp6], %[ftmp6], %[ftmp8] \n\t" "gssdlc1 %[ftmp5], 0x17(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp5], 0x10(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp6], 0x1f(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp6], 0x18(%[dqcoeff_ptr]) \n\t" "li %[tmp0], 0x10 \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "psrlw %[ftmp11], %[ftmp10], %[ftmp9] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "li %[tmp0], 0xaa \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "pshufh %[ftmp11], %[ftmp10], %[ftmp9] \n\t" "pmaxsh %[ftmp10], %[ftmp10], %[ftmp11] \n\t" "li %[tmp0], 0xffff \n\t" "mtc1 %[tmp0], %[ftmp9] \n\t" "and %[ftmp10], %[ftmp10], %[ftmp9] \n\t" "gssdlc1 %[ftmp10], 0x07(%[eob]) \n\t" "gssdrc1 %[ftmp10], 0x00(%[eob]) \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]) : [coeff_ptr] "r"((mips_reg)coeff_ptr), [qcoeff_ptr] "r"((mips_reg)qcoeff_ptr), [dequant_ptr] "r"((mips_reg)dequant_ptr), [round_ptr] "r"((mips_reg)round_ptr), [quant_ptr] "r"((mips_reg)quant_ptr), [dqcoeff_ptr] "r"((mips_reg)dqcoeff_ptr), [inv_zig_zag] "r"((mips_reg)inv_zig_zag), [eob] "r"((mips_reg)&eob), [ones] "f"(ones) : "memory"); *d->eob = eob; } void vp8_regular_quantize_b_mmi(BLOCK *b, BLOCKD *d) { int eob = 0; int x, y, z, sz, zbin; const int16_t *zbin_boost_ptr = b->zrun_zbin_boost; const int16_t *coeff_ptr = b->coeff; const int16_t *zbin_ptr = b->zbin; const int16_t *round_ptr = b->round; const int16_t *quant_ptr = b->quant; const int16_t *quant_shift_ptr = b->quant_shift; int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; const int16_t *dequant_ptr = d->dequant; const int16_t zbin_oq_value = b->zbin_extra; register double ftmp0 asm("$f0"); // memset(qcoeff_ptr, 0, 32); // memset(dqcoeff_ptr, 0, 32); /* clang-format off */ __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gssdlc1 %[ftmp0], 0x07(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x0f(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x08(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x17(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x10(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x1f(%[qcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x18(%[qcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x07(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x00(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x0f(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x08(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x17(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x10(%[dqcoeff_ptr]) \n\t" "gssdlc1 %[ftmp0], 0x1f(%[dqcoeff_ptr]) \n\t" "gssdrc1 %[ftmp0], 0x18(%[dqcoeff_ptr]) \n\t" : [ftmp0]"=&f"(ftmp0) : [qcoeff_ptr]"r"(qcoeff_ptr), [dqcoeff_ptr]"r"(dqcoeff_ptr) : "memory" ); /* clang-format on */ REGULAR_SELECT_EOB(1, 0); REGULAR_SELECT_EOB(2, 1); REGULAR_SELECT_EOB(3, 4); REGULAR_SELECT_EOB(4, 8); REGULAR_SELECT_EOB(5, 5); REGULAR_SELECT_EOB(6, 2); REGULAR_SELECT_EOB(7, 3); REGULAR_SELECT_EOB(8, 6); REGULAR_SELECT_EOB(9, 9); REGULAR_SELECT_EOB(10, 12); REGULAR_SELECT_EOB(11, 13); REGULAR_SELECT_EOB(12, 10); REGULAR_SELECT_EOB(13, 7); REGULAR_SELECT_EOB(14, 11); REGULAR_SELECT_EOB(15, 14); REGULAR_SELECT_EOB(16, 15); *d->eob = (char)eob; } libvpx-1.8.2/vp8/encoder/mips/msa/000077500000000000000000000000001357355204000167165ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/mips/msa/dct_msa.c000066400000000000000000000163171357355204000205040ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" #define TRANSPOSE4x4_H(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 s0_m, s1_m, tp0_m, tp1_m, tp2_m, tp3_m; \ \ ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tp0_m, tp1_m); \ ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tp2_m, tp3_m); \ PCKEV_D2_SH(tp2_m, tp0_m, tp3_m, tp1_m, out0, out2); \ PCKOD_D2_SH(tp2_m, tp0_m, tp3_m, tp1_m, out1, out3); \ } #define SET_DOTP_VALUES(coeff, val0, val1, val2, const1, const2) \ { \ v8i16 tmp0_m; \ \ SPLATI_H3_SH(coeff, val0, val1, val2, tmp0_m, const1, const2); \ ILVEV_H2_SH(tmp0_m, const1, const2, tmp0_m, const1, const2); \ } #define RET_1_IF_NZERO_H(in0) \ ({ \ v8i16 tmp0_m; \ v8i16 one_m = __msa_ldi_h(1); \ \ tmp0_m = __msa_ceqi_h(in0, 0); \ tmp0_m = tmp0_m ^ 255; \ tmp0_m = one_m & tmp0_m; \ \ tmp0_m; \ }) #define RET_1_IF_NZERO_W(in0) \ ({ \ v4i32 tmp0_m; \ v4i32 one_m = __msa_ldi_w(1); \ \ tmp0_m = __msa_ceqi_w(in0, 0); \ tmp0_m = tmp0_m ^ 255; \ tmp0_m = one_m & tmp0_m; \ \ tmp0_m; \ }) #define RET_1_IF_NEG_W(in0) \ ({ \ v4i32 tmp0_m; \ \ v4i32 one_m = __msa_ldi_w(1); \ tmp0_m = __msa_clti_s_w(in0, 0); \ tmp0_m = one_m & tmp0_m; \ \ tmp0_m; \ }) void vp8_short_fdct4x4_msa(int16_t *input, int16_t *output, int32_t pitch) { v8i16 in0, in1, in2, in3; v8i16 temp0, temp1; v8i16 const0, const1; v8i16 coeff = { 2217, 5352, -5352, 14500, 7500, 12000, 25000, 26000 }; v4i32 out0, out1, out2, out3; v8i16 zero = { 0 }; LD_SH4(input, pitch / 2, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, temp1, in1, in3); SLLI_4V(temp0, temp1, in1, in3, 3); in0 = temp0 + temp1; in2 = temp0 - temp1; SET_DOTP_VALUES(coeff, 0, 1, 2, const0, const1); temp0 = __msa_ilvr_h(in3, in1); in1 = __msa_splati_h(coeff, 3); out0 = (v4i32)__msa_ilvev_h(zero, in1); coeff = __msa_ilvl_h(zero, coeff); out1 = __msa_splati_w((v4i32)coeff, 0); DPADD_SH2_SW(temp0, temp0, const0, const1, out0, out1); out0 >>= 12; out1 >>= 12; PCKEV_H2_SH(out0, out0, out1, out1, in1, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, temp1, in1, in3); in0 = temp0 + temp1 + 7; in2 = temp0 - temp1 + 7; in0 >>= 4; in2 >>= 4; ILVR_H2_SW(zero, in0, zero, in2, out0, out2); temp1 = RET_1_IF_NZERO_H(in3); ILVR_H2_SH(zero, temp1, in3, in1, temp1, temp0); SPLATI_W2_SW(coeff, 2, out3, out1); out3 += out1; out1 = __msa_splati_w((v4i32)coeff, 1); DPADD_SH2_SW(temp0, temp0, const0, const1, out1, out3); out1 >>= 16; out3 >>= 16; out1 += (v4i32)temp1; PCKEV_H2_SH(out1, out0, out3, out2, in0, in2); ST_SH2(in0, in2, output, 8); } void vp8_short_fdct8x4_msa(int16_t *input, int16_t *output, int32_t pitch) { v8i16 in0, in1, in2, in3; v8i16 temp0, temp1, tmp0, tmp1; v8i16 const0, const1, const2; v8i16 coeff = { 2217, 5352, -5352, 14500, 7500, 12000, 25000, 26000 }; v8i16 zero = { 0 }; v4i32 vec0_w, vec1_w, vec2_w, vec3_w; LD_SH4(input, pitch / 2, in0, in1, in2, in3); TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, temp1, in1, in3); SLLI_4V(temp0, temp1, in1, in3, 3); in0 = temp0 + temp1; in2 = temp0 - temp1; SET_DOTP_VALUES(coeff, 0, 1, 2, const1, const2); temp0 = __msa_splati_h(coeff, 3); vec1_w = (v4i32)__msa_ilvev_h(zero, temp0); coeff = __msa_ilvl_h(zero, coeff); vec3_w = __msa_splati_w((v4i32)coeff, 0); ILVRL_H2_SH(in3, in1, tmp1, tmp0); vec0_w = vec1_w; vec2_w = vec3_w; DPADD_SH4_SW(tmp1, tmp0, tmp1, tmp0, const1, const1, const2, const2, vec0_w, vec1_w, vec2_w, vec3_w); SRA_4V(vec1_w, vec0_w, vec3_w, vec2_w, 12); PCKEV_H2_SH(vec1_w, vec0_w, vec3_w, vec2_w, in1, in3); TRANSPOSE4x4_H(in0, in1, in2, in3, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, temp1, in1, in3); in0 = temp0 + temp1 + 7; in2 = temp0 - temp1 + 7; in0 >>= 4; in2 >>= 4; SPLATI_W2_SW(coeff, 2, vec3_w, vec1_w); vec3_w += vec1_w; vec1_w = __msa_splati_w((v4i32)coeff, 1); const0 = RET_1_IF_NZERO_H(in3); ILVRL_H2_SH(in3, in1, tmp1, tmp0); vec0_w = vec1_w; vec2_w = vec3_w; DPADD_SH4_SW(tmp1, tmp0, tmp1, tmp0, const1, const1, const2, const2, vec0_w, vec1_w, vec2_w, vec3_w); SRA_4V(vec1_w, vec0_w, vec3_w, vec2_w, 16); PCKEV_H2_SH(vec1_w, vec0_w, vec3_w, vec2_w, in1, in3); in1 += const0; PCKEV_D2_SH(in1, in0, in3, in2, temp0, temp1); ST_SH2(temp0, temp1, output, 8); PCKOD_D2_SH(in1, in0, in3, in2, in0, in2); ST_SH2(in0, in2, output + 16, 8); } void vp8_short_walsh4x4_msa(int16_t *input, int16_t *output, int32_t pitch) { v8i16 in0_h, in1_h, in2_h, in3_h; v4i32 in0_w, in1_w, in2_w, in3_w, temp0, temp1, temp2, temp3; LD_SH4(input, pitch / 2, in0_h, in1_h, in2_h, in3_h); TRANSPOSE4x4_SH_SH(in0_h, in1_h, in2_h, in3_h, in0_h, in1_h, in2_h, in3_h); UNPCK_R_SH_SW(in0_h, in0_w); UNPCK_R_SH_SW(in1_h, in1_w); UNPCK_R_SH_SW(in2_h, in2_w); UNPCK_R_SH_SW(in3_h, in3_w); BUTTERFLY_4(in0_w, in1_w, in3_w, in2_w, temp0, temp3, temp2, temp1); SLLI_4V(temp0, temp1, temp2, temp3, 2); BUTTERFLY_4(temp0, temp1, temp2, temp3, in0_w, in1_w, in2_w, in3_w); temp0 = RET_1_IF_NZERO_W(temp0); in0_w += temp0; TRANSPOSE4x4_SW_SW(in0_w, in1_w, in2_w, in3_w, in0_w, in1_w, in2_w, in3_w); BUTTERFLY_4(in0_w, in1_w, in3_w, in2_w, temp0, temp3, temp2, temp1); BUTTERFLY_4(temp0, temp1, temp2, temp3, in0_w, in1_w, in2_w, in3_w); in0_w += RET_1_IF_NEG_W(in0_w); in1_w += RET_1_IF_NEG_W(in1_w); in2_w += RET_1_IF_NEG_W(in2_w); in3_w += RET_1_IF_NEG_W(in3_w); ADD4(in0_w, 3, in1_w, 3, in2_w, 3, in3_w, 3, in0_w, in1_w, in2_w, in3_w); SRA_4V(in0_w, in1_w, in2_w, in3_w, 3); PCKEV_H2_SH(in1_w, in0_w, in3_w, in2_w, in0_h, in1_h); ST_SH2(in0_h, in1_h, output, 8); } libvpx-1.8.2/vp8/encoder/mips/msa/denoising_msa.c000066400000000000000000000531121357355204000217030ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" #include "vp8/encoder/denoising.h" int32_t vp8_denoiser_filter_msa(uint8_t *mc_running_avg_y_ptr, int32_t mc_avg_y_stride, uint8_t *running_avg_y_ptr, int32_t avg_y_stride, uint8_t *sig_ptr, int32_t sig_stride, uint32_t motion_magnitude, int32_t increase_denoising) { uint8_t *running_avg_y_start = running_avg_y_ptr; uint8_t *sig_start = sig_ptr; int32_t cnt = 0; int32_t sum_diff = 0; int32_t shift_inc1 = 3; int32_t delta = 0; int32_t sum_diff_thresh; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 src8, src9, src10, src11, src12, src13, src14, src15; v16u8 mc_running_avg_y0, running_avg_y, sig0; v16u8 mc_running_avg_y1, running_avg_y1, sig1; v16u8 coeff0, coeff1; v8i16 diff0, diff1, abs_diff0, abs_diff1, abs_diff_neg0, abs_diff_neg1; v8i16 adjust0, adjust1, adjust2, adjust3; v8i16 shift_inc1_vec = { 0 }; v8i16 col_sum0 = { 0 }; v8i16 col_sum1 = { 0 }; v8i16 col_sum2 = { 0 }; v8i16 col_sum3 = { 0 }; v8i16 temp0_h, temp1_h, temp2_h, temp3_h, cmp, delta_vec; v4i32 temp0_w; v2i64 temp0_d, temp1_d; v8i16 zero = { 0 }; v8i16 one = __msa_ldi_h(1); v8i16 four = __msa_ldi_h(4); v8i16 val_127 = __msa_ldi_h(127); v8i16 adj_val = { 6, 4, 3, 0, -6, -4, -3, 0 }; if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { adj_val = __msa_add_a_h(adj_val, one); if (increase_denoising) { adj_val = __msa_add_a_h(adj_val, one); shift_inc1 = 4; } temp0_h = zero - adj_val; adj_val = (v8i16)__msa_ilvev_d((v2i64)temp0_h, (v2i64)adj_val); } adj_val = __msa_insert_h(adj_val, 3, cnt); adj_val = __msa_insert_h(adj_val, 7, cnt); shift_inc1_vec = __msa_fill_h(shift_inc1); for (cnt = 8; cnt--;) { v8i16 mask0 = { 0 }; v8i16 mask1 = { 0 }; mc_running_avg_y0 = LD_UB(mc_running_avg_y_ptr); sig0 = LD_UB(sig_ptr); sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; mc_running_avg_y1 = LD_UB(mc_running_avg_y_ptr); sig1 = LD_UB(sig_ptr); ILVRL_B2_UB(mc_running_avg_y0, sig0, coeff0, coeff1); HSUB_UB2_SH(coeff0, coeff1, diff0, diff1); abs_diff0 = __msa_add_a_h(diff0, zero); abs_diff1 = __msa_add_a_h(diff1, zero); cmp = __msa_clei_s_h(abs_diff0, 15); cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff0, 7); cmp = cmp & one; mask0 += cmp; cmp = abs_diff0 < shift_inc1_vec; cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff1, 15); cmp = cmp & one; mask1 += cmp; cmp = __msa_clei_s_h(abs_diff1, 7); cmp = cmp & one; mask1 += cmp; cmp = abs_diff1 < shift_inc1_vec; cmp = cmp & one; mask1 += cmp; temp0_h = __msa_clei_s_h(diff0, 0); temp0_h = temp0_h & four; mask0 += temp0_h; temp1_h = __msa_clei_s_h(diff1, 0); temp1_h = temp1_h & four; mask1 += temp1_h; VSHF_H2_SH(adj_val, adj_val, adj_val, adj_val, mask0, mask1, adjust0, adjust1); temp2_h = __msa_ceqi_h(adjust0, 0); temp3_h = __msa_ceqi_h(adjust1, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)diff0, (v16u8)temp2_h); adjust1 = (v8i16)__msa_bmnz_v((v16u8)adjust1, (v16u8)diff1, (v16u8)temp3_h); ADD2(col_sum0, adjust0, col_sum1, adjust1, col_sum0, col_sum1); UNPCK_UB_SH(sig0, temp0_h, temp1_h); ADD2(temp0_h, adjust0, temp1_h, adjust1, temp0_h, temp1_h); MAXI_SH2_SH(temp0_h, temp1_h, 0); SAT_UH2_SH(temp0_h, temp1_h, 7); temp2_h = (v8i16)__msa_pckev_b((v16i8)temp3_h, (v16i8)temp2_h); running_avg_y = (v16u8)__msa_pckev_b((v16i8)temp1_h, (v16i8)temp0_h); running_avg_y = __msa_bmnz_v(running_avg_y, mc_running_avg_y0, (v16u8)temp2_h); ST_UB(running_avg_y, running_avg_y_ptr); running_avg_y_ptr += avg_y_stride; mask0 = zero; mask1 = zero; ILVRL_B2_UB(mc_running_avg_y1, sig1, coeff0, coeff1); HSUB_UB2_SH(coeff0, coeff1, diff0, diff1); abs_diff0 = __msa_add_a_h(diff0, zero); abs_diff1 = __msa_add_a_h(diff1, zero); cmp = __msa_clei_s_h(abs_diff0, 15); cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff0, 7); cmp = cmp & one; mask0 += cmp; cmp = abs_diff0 < shift_inc1_vec; cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff1, 15); cmp = cmp & one; mask1 += cmp; cmp = __msa_clei_s_h(abs_diff1, 7); cmp = cmp & one; mask1 += cmp; cmp = abs_diff1 < shift_inc1_vec; cmp = cmp & one; mask1 += cmp; temp0_h = __msa_clei_s_h(diff0, 0); temp0_h = temp0_h & four; mask0 += temp0_h; temp1_h = __msa_clei_s_h(diff1, 0); temp1_h = temp1_h & four; mask1 += temp1_h; VSHF_H2_SH(adj_val, adj_val, adj_val, adj_val, mask0, mask1, adjust0, adjust1); temp2_h = __msa_ceqi_h(adjust0, 0); temp3_h = __msa_ceqi_h(adjust1, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)diff0, (v16u8)temp2_h); adjust1 = (v8i16)__msa_bmnz_v((v16u8)adjust1, (v16u8)diff1, (v16u8)temp3_h); ADD2(col_sum0, adjust0, col_sum1, adjust1, col_sum0, col_sum1); UNPCK_UB_SH(sig1, temp0_h, temp1_h); ADD2(temp0_h, adjust0, temp1_h, adjust1, temp0_h, temp1_h); MAXI_SH2_SH(temp0_h, temp1_h, 0); SAT_UH2_SH(temp0_h, temp1_h, 7); temp2_h = (v8i16)__msa_pckev_b((v16i8)temp3_h, (v16i8)temp2_h); running_avg_y = (v16u8)__msa_pckev_b((v16i8)temp1_h, (v16i8)temp0_h); running_avg_y = __msa_bmnz_v(running_avg_y, mc_running_avg_y1, (v16u8)temp2_h); ST_UB(running_avg_y, running_avg_y_ptr); sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; running_avg_y_ptr += avg_y_stride; } col_sum0 = __msa_min_s_h(col_sum0, val_127); col_sum1 = __msa_min_s_h(col_sum1, val_127); temp0_h = col_sum0 + col_sum1; temp0_w = __msa_hadd_s_w(temp0_h, temp0_h); temp0_d = __msa_hadd_s_d(temp0_w, temp0_w); temp1_d = __msa_splati_d(temp0_d, 1); temp0_d += temp1_d; sum_diff = __msa_copy_s_w((v4i32)temp0_d, 0); sig_ptr -= sig_stride * 16; mc_running_avg_y_ptr -= mc_avg_y_stride * 16; running_avg_y_ptr -= avg_y_stride * 16; if (increase_denoising) { sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; } if (abs(sum_diff) > sum_diff_thresh) { delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; delta_vec = __msa_fill_h(delta); if (delta < 4) { for (cnt = 8; cnt--;) { running_avg_y = LD_UB(running_avg_y_ptr); mc_running_avg_y0 = LD_UB(mc_running_avg_y_ptr); sig0 = LD_UB(sig_ptr); sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; running_avg_y_ptr += avg_y_stride; mc_running_avg_y1 = LD_UB(mc_running_avg_y_ptr); sig1 = LD_UB(sig_ptr); running_avg_y1 = LD_UB(running_avg_y_ptr); ILVRL_B2_UB(mc_running_avg_y0, sig0, coeff0, coeff1); HSUB_UB2_SH(coeff0, coeff1, diff0, diff1); abs_diff0 = __msa_add_a_h(diff0, zero); abs_diff1 = __msa_add_a_h(diff1, zero); temp0_h = abs_diff0 < delta_vec; temp1_h = abs_diff1 < delta_vec; abs_diff0 = (v8i16)__msa_bmz_v((v16u8)abs_diff0, (v16u8)delta_vec, (v16u8)temp0_h); abs_diff1 = (v8i16)__msa_bmz_v((v16u8)abs_diff1, (v16u8)delta_vec, (v16u8)temp1_h); SUB2(zero, abs_diff0, zero, abs_diff1, abs_diff_neg0, abs_diff_neg1); abs_diff_neg0 = zero - abs_diff0; abs_diff_neg1 = zero - abs_diff1; temp0_h = __msa_clei_s_h(diff0, 0); temp1_h = __msa_clei_s_h(diff1, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)abs_diff0, (v16u8)abs_diff_neg0, (v16u8)temp0_h); adjust1 = (v8i16)__msa_bmnz_v((v16u8)abs_diff1, (v16u8)abs_diff_neg1, (v16u8)temp1_h); ILVRL_B2_SH(zero, running_avg_y, temp2_h, temp3_h); ADD2(temp2_h, adjust0, temp3_h, adjust1, adjust2, adjust3); MAXI_SH2_SH(adjust2, adjust3, 0); SAT_UH2_SH(adjust2, adjust3, 7); temp0_h = __msa_ceqi_h(diff0, 0); temp1_h = __msa_ceqi_h(diff1, 0); adjust2 = (v8i16)__msa_bmz_v((v16u8)adjust2, (v16u8)temp2_h, (v16u8)temp0_h); adjust3 = (v8i16)__msa_bmz_v((v16u8)adjust3, (v16u8)temp3_h, (v16u8)temp1_h); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)zero, (v16u8)temp0_h); adjust1 = (v8i16)__msa_bmnz_v((v16u8)adjust1, (v16u8)zero, (v16u8)temp1_h); ADD2(col_sum2, adjust0, col_sum3, adjust1, col_sum2, col_sum3); running_avg_y = (v16u8)__msa_pckev_b((v16i8)adjust3, (v16i8)adjust2); ST_UB(running_avg_y, running_avg_y_ptr - avg_y_stride); ILVRL_B2_UB(mc_running_avg_y1, sig1, coeff0, coeff1); HSUB_UB2_SH(coeff0, coeff1, diff0, diff1); abs_diff0 = __msa_add_a_h(diff0, zero); abs_diff1 = __msa_add_a_h(diff1, zero); temp0_h = abs_diff0 < delta_vec; temp1_h = abs_diff1 < delta_vec; abs_diff0 = (v8i16)__msa_bmz_v((v16u8)abs_diff0, (v16u8)delta_vec, (v16u8)temp0_h); abs_diff1 = (v8i16)__msa_bmz_v((v16u8)abs_diff1, (v16u8)delta_vec, (v16u8)temp1_h); SUB2(zero, abs_diff0, zero, abs_diff1, abs_diff_neg0, abs_diff_neg1); temp0_h = __msa_clei_s_h(diff0, 0); temp1_h = __msa_clei_s_h(diff1, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)abs_diff0, (v16u8)abs_diff_neg0, (v16u8)temp0_h); adjust1 = (v8i16)__msa_bmnz_v((v16u8)abs_diff1, (v16u8)abs_diff_neg1, (v16u8)temp1_h); ILVRL_H2_SH(zero, running_avg_y1, temp2_h, temp3_h); ADD2(temp2_h, adjust0, temp3_h, adjust1, adjust2, adjust3); MAXI_SH2_SH(adjust2, adjust3, 0); SAT_UH2_SH(adjust2, adjust3, 7); temp0_h = __msa_ceqi_h(diff0, 0); temp1_h = __msa_ceqi_h(diff1, 0); adjust2 = (v8i16)__msa_bmz_v((v16u8)adjust2, (v16u8)temp2_h, (v16u8)temp0_h); adjust3 = (v8i16)__msa_bmz_v((v16u8)adjust3, (v16u8)temp3_h, (v16u8)temp1_h); adjust0 = (v8i16)__msa_bmz_v((v16u8)adjust0, (v16u8)zero, (v16u8)temp0_h); adjust1 = (v8i16)__msa_bmz_v((v16u8)adjust1, (v16u8)zero, (v16u8)temp1_h); ADD2(col_sum2, adjust0, col_sum3, adjust1, col_sum2, col_sum3); running_avg_y = (v16u8)__msa_pckev_b((v16i8)adjust3, (v16i8)adjust2); ST_UB(running_avg_y, running_avg_y_ptr); running_avg_y_ptr += avg_y_stride; } col_sum2 = __msa_min_s_h(col_sum2, val_127); col_sum3 = __msa_min_s_h(col_sum3, val_127); temp0_h = col_sum2 + col_sum3; temp0_w = __msa_hadd_s_w(temp0_h, temp0_h); temp0_d = __msa_hadd_s_d(temp0_w, temp0_w); temp1_d = __msa_splati_d(temp0_d, 1); temp0_d += (v2i64)temp1_d; sum_diff = __msa_copy_s_w((v4i32)temp0_d, 0); if (abs(sum_diff) > SUM_DIFF_THRESHOLD) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } LD_UB8(sig_start, sig_stride, src0, src1, src2, src3, src4, src5, src6, src7); sig_start += (8 * sig_stride); LD_UB8(sig_start, sig_stride, src8, src9, src10, src11, src12, src13, src14, src15); ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, running_avg_y_start, avg_y_stride); running_avg_y_start += (8 * avg_y_stride); ST_UB8(src8, src9, src10, src11, src12, src13, src14, src15, running_avg_y_start, avg_y_stride); return FILTER_BLOCK; } int32_t vp8_denoiser_filter_uv_msa( uint8_t *mc_running_avg_y_ptr, int32_t mc_avg_y_stride, uint8_t *running_avg_y_ptr, int32_t avg_y_stride, uint8_t *sig_ptr, int32_t sig_stride, uint32_t motion_magnitude, int32_t increase_denoising) { uint8_t *running_avg_y_start = running_avg_y_ptr; uint8_t *sig_start = sig_ptr; int32_t cnt = 0; int32_t sum_diff = 0; int32_t shift_inc1 = 3; int32_t delta = 0; int32_t sum_block = 0; int32_t sum_diff_thresh; int64_t dst0, dst1, src0, src1, src2, src3; v16u8 mc_running_avg_y0, running_avg_y, sig0; v16u8 mc_running_avg_y1, running_avg_y1, sig1; v16u8 sig2, sig3, sig4, sig5, sig6, sig7; v16u8 coeff0; v8i16 diff0, abs_diff0, abs_diff_neg0; v8i16 adjust0, adjust2; v8i16 shift_inc1_vec = { 0 }; v8i16 col_sum0 = { 0 }; v8i16 temp0_h, temp2_h, cmp, delta_vec; v4i32 temp0_w; v2i64 temp0_d, temp1_d; v16i8 zero = { 0 }; v8i16 one = __msa_ldi_h(1); v8i16 four = __msa_ldi_h(4); v8i16 adj_val = { 6, 4, 3, 0, -6, -4, -3, 0 }; sig0 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h = (v8i16)__msa_ilvr_b(zero, (v16i8)sig0); sig1 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig1); sig2 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig2); sig3 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig3); sig4 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig4); sig5 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig5); sig6 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig6); sig7 = LD_UB(sig_ptr); sig_ptr += sig_stride; temp0_h += (v8i16)__msa_ilvr_b(zero, (v16i8)sig7); temp0_w = __msa_hadd_s_w(temp0_h, temp0_h); temp0_d = __msa_hadd_s_d(temp0_w, temp0_w); temp1_d = __msa_splati_d(temp0_d, 1); temp0_d += temp1_d; sum_block = __msa_copy_s_w((v4i32)temp0_d, 0); sig_ptr -= sig_stride * 8; if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) { return COPY_BLOCK; } if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { adj_val = __msa_add_a_h(adj_val, one); if (increase_denoising) { adj_val = __msa_add_a_h(adj_val, one); shift_inc1 = 4; } temp0_h = (v8i16)zero - adj_val; adj_val = (v8i16)__msa_ilvev_d((v2i64)temp0_h, (v2i64)adj_val); } adj_val = __msa_insert_h(adj_val, 3, cnt); adj_val = __msa_insert_h(adj_val, 7, cnt); shift_inc1_vec = __msa_fill_h(shift_inc1); for (cnt = 4; cnt--;) { v8i16 mask0 = { 0 }; mc_running_avg_y0 = LD_UB(mc_running_avg_y_ptr); sig0 = LD_UB(sig_ptr); sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; mc_running_avg_y1 = LD_UB(mc_running_avg_y_ptr); sig1 = LD_UB(sig_ptr); coeff0 = (v16u8)__msa_ilvr_b((v16i8)mc_running_avg_y0, (v16i8)sig0); diff0 = __msa_hsub_u_h(coeff0, coeff0); abs_diff0 = __msa_add_a_h(diff0, (v8i16)zero); cmp = __msa_clei_s_h(abs_diff0, 15); cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff0, 7); cmp = cmp & one; mask0 += cmp; cmp = abs_diff0 < shift_inc1_vec; cmp = cmp & one; mask0 += cmp; temp0_h = __msa_clei_s_h(diff0, 0); temp0_h = temp0_h & four; mask0 += temp0_h; adjust0 = __msa_vshf_h(mask0, adj_val, adj_val); temp2_h = __msa_ceqi_h(adjust0, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)diff0, (v16u8)temp2_h); col_sum0 += adjust0; temp0_h = (v8i16)__msa_ilvr_b(zero, (v16i8)sig0); temp0_h += adjust0; temp0_h = __msa_maxi_s_h(temp0_h, 0); temp0_h = (v8i16)__msa_sat_u_h((v8u16)temp0_h, 7); temp2_h = (v8i16)__msa_pckev_b((v16i8)temp2_h, (v16i8)temp2_h); running_avg_y = (v16u8)__msa_pckev_b((v16i8)temp0_h, (v16i8)temp0_h); running_avg_y = __msa_bmnz_v(running_avg_y, mc_running_avg_y0, (v16u8)temp2_h); dst0 = __msa_copy_s_d((v2i64)running_avg_y, 0); SD(dst0, running_avg_y_ptr); running_avg_y_ptr += avg_y_stride; mask0 = __msa_ldi_h(0); coeff0 = (v16u8)__msa_ilvr_b((v16i8)mc_running_avg_y1, (v16i8)sig1); diff0 = __msa_hsub_u_h(coeff0, coeff0); abs_diff0 = __msa_add_a_h(diff0, (v8i16)zero); cmp = __msa_clei_s_h(abs_diff0, 15); cmp = cmp & one; mask0 += cmp; cmp = __msa_clei_s_h(abs_diff0, 7); cmp = cmp & one; mask0 += cmp; cmp = abs_diff0 < shift_inc1_vec; cmp = cmp & one; mask0 += cmp; temp0_h = __msa_clei_s_h(diff0, 0); temp0_h = temp0_h & four; mask0 += temp0_h; adjust0 = __msa_vshf_h(mask0, adj_val, adj_val); temp2_h = __msa_ceqi_h(adjust0, 0); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)diff0, (v16u8)temp2_h); col_sum0 += adjust0; temp0_h = (v8i16)__msa_ilvr_b(zero, (v16i8)sig1); temp0_h += adjust0; temp0_h = __msa_maxi_s_h(temp0_h, 0); temp0_h = (v8i16)__msa_sat_u_h((v8u16)temp0_h, 7); temp2_h = (v8i16)__msa_pckev_b((v16i8)temp2_h, (v16i8)temp2_h); running_avg_y = (v16u8)__msa_pckev_b((v16i8)temp0_h, (v16i8)temp0_h); running_avg_y = __msa_bmnz_v(running_avg_y, mc_running_avg_y1, (v16u8)temp2_h); dst1 = __msa_copy_s_d((v2i64)running_avg_y, 0); SD(dst1, running_avg_y_ptr); sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; running_avg_y_ptr += avg_y_stride; } temp0_h = col_sum0; temp0_w = __msa_hadd_s_w(temp0_h, temp0_h); temp0_d = __msa_hadd_s_d(temp0_w, temp0_w); temp1_d = __msa_splati_d(temp0_d, 1); temp0_d += temp1_d; sum_diff = __msa_copy_s_w((v4i32)temp0_d, 0); sig_ptr -= sig_stride * 8; mc_running_avg_y_ptr -= mc_avg_y_stride * 8; running_avg_y_ptr -= avg_y_stride * 8; sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; if (increase_denoising) { sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV; } if (abs(sum_diff) > sum_diff_thresh) { delta = ((abs(sum_diff) - sum_diff_thresh) >> 8) + 1; delta_vec = __msa_fill_h(delta); if (delta < 4) { for (cnt = 4; cnt--;) { running_avg_y = LD_UB(running_avg_y_ptr); mc_running_avg_y0 = LD_UB(mc_running_avg_y_ptr); sig0 = LD_UB(sig_ptr); /* Update pointers for next iteration. */ sig_ptr += sig_stride; mc_running_avg_y_ptr += mc_avg_y_stride; running_avg_y_ptr += avg_y_stride; mc_running_avg_y1 = LD_UB(mc_running_avg_y_ptr); sig1 = LD_UB(sig_ptr); running_avg_y1 = LD_UB(running_avg_y_ptr); coeff0 = (v16u8)__msa_ilvr_b((v16i8)mc_running_avg_y0, (v16i8)sig0); diff0 = __msa_hsub_u_h(coeff0, coeff0); abs_diff0 = __msa_add_a_h(diff0, (v8i16)zero); temp0_h = delta_vec < abs_diff0; abs_diff0 = (v8i16)__msa_bmnz_v((v16u8)abs_diff0, (v16u8)delta_vec, (v16u8)temp0_h); abs_diff_neg0 = (v8i16)zero - abs_diff0; temp0_h = __msa_clei_s_h(diff0, 0); adjust0 = (v8i16)__msa_bmz_v((v16u8)abs_diff0, (v16u8)abs_diff_neg0, (v16u8)temp0_h); temp2_h = (v8i16)__msa_ilvr_b(zero, (v16i8)running_avg_y); adjust2 = temp2_h + adjust0; adjust2 = __msa_maxi_s_h(adjust2, 0); adjust2 = (v8i16)__msa_sat_u_h((v8u16)adjust2, 7); temp0_h = __msa_ceqi_h(diff0, 0); adjust2 = (v8i16)__msa_bmnz_v((v16u8)adjust2, (v16u8)temp2_h, (v16u8)temp0_h); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)zero, (v16u8)temp0_h); col_sum0 += adjust0; running_avg_y = (v16u8)__msa_pckev_b((v16i8)adjust2, (v16i8)adjust2); dst0 = __msa_copy_s_d((v2i64)running_avg_y, 0); SD(dst0, running_avg_y_ptr - avg_y_stride); coeff0 = (v16u8)__msa_ilvr_b((v16i8)mc_running_avg_y1, (v16i8)sig1); diff0 = __msa_hsub_u_h(coeff0, coeff0); abs_diff0 = __msa_add_a_h(diff0, (v8i16)zero); temp0_h = delta_vec < abs_diff0; abs_diff0 = (v8i16)__msa_bmnz_v((v16u8)abs_diff0, (v16u8)delta_vec, (v16u8)temp0_h); abs_diff_neg0 = (v8i16)zero - abs_diff0; temp0_h = __msa_clei_s_h(diff0, 0); adjust0 = (v8i16)__msa_bmz_v((v16u8)abs_diff0, (v16u8)abs_diff_neg0, (v16u8)temp0_h); temp2_h = (v8i16)__msa_ilvr_b(zero, (v16i8)running_avg_y1); adjust2 = temp2_h + adjust0; adjust2 = __msa_maxi_s_h(adjust2, 0); adjust2 = (v8i16)__msa_sat_u_h((v8u16)adjust2, 7); temp0_h = __msa_ceqi_h(diff0, 0); adjust2 = (v8i16)__msa_bmnz_v((v16u8)adjust2, (v16u8)temp2_h, (v16u8)temp0_h); adjust0 = (v8i16)__msa_bmnz_v((v16u8)adjust0, (v16u8)zero, (v16u8)temp0_h); col_sum0 += adjust0; running_avg_y = (v16u8)__msa_pckev_b((v16i8)adjust2, (v16i8)adjust2); dst1 = __msa_copy_s_d((v2i64)running_avg_y, 0); SD(dst1, running_avg_y_ptr); running_avg_y_ptr += avg_y_stride; } temp0_h = col_sum0; temp0_w = __msa_hadd_s_w(temp0_h, temp0_h); temp0_d = __msa_hadd_s_d(temp0_w, temp0_w); temp1_d = __msa_splati_d(temp0_d, 1); temp0_d += temp1_d; sum_diff = __msa_copy_s_w((v4i32)temp0_d, 0); if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } LD4(sig_start, sig_stride, src0, src1, src2, src3); sig_start += (4 * sig_stride); SD4(src0, src1, src2, src3, running_avg_y_start, avg_y_stride); running_avg_y_start += (4 * avg_y_stride); LD4(sig_start, sig_stride, src0, src1, src2, src3); SD4(src0, src1, src2, src3, running_avg_y_start, avg_y_stride); return FILTER_BLOCK; } libvpx-1.8.2/vp8/encoder/mips/msa/encodeopt_msa.c000066400000000000000000000124041357355204000217030ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" #include "vp8/encoder/block.h" int32_t vp8_block_error_msa(int16_t *coeff_ptr, int16_t *dq_coeff_ptr) { int32_t err = 0; uint32_t loop_cnt; v8i16 coeff, dq_coeff, coeff0, coeff1; v4i32 diff0, diff1; v2i64 err0 = { 0 }; v2i64 err1 = { 0 }; for (loop_cnt = 2; loop_cnt--;) { coeff = LD_SH(coeff_ptr); dq_coeff = LD_SH(dq_coeff_ptr); ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DPADD_SD2_SD(diff0, diff1, err0, err1); coeff_ptr += 8; dq_coeff_ptr += 8; } err0 += __msa_splati_d(err0, 1); err1 += __msa_splati_d(err1, 1); err = __msa_copy_s_d(err0, 0); err += __msa_copy_s_d(err1, 0); return err; } int32_t vp8_mbblock_error_msa(MACROBLOCK *mb, int32_t dc) { BLOCK *be; BLOCKD *bd; int16_t *coeff_ptr, *dq_coeff_ptr; int32_t err = 0; uint32_t loop_cnt; v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; v4i32 diff0, diff1; v2i64 err0, err1; v16u8 zero = { 0 }; v16u8 mask0 = (v16u8)__msa_ldi_b(255); if (1 == dc) { mask0 = (v16u8)__msa_insve_w((v4i32)mask0, 0, (v4i32)zero); } for (loop_cnt = 0; loop_cnt < 8; ++loop_cnt) { be = &mb->block[2 * loop_cnt]; bd = &mb->e_mbd.block[2 * loop_cnt]; coeff_ptr = be->coeff; dq_coeff_ptr = bd->dqcoeff; coeff = LD_SH(coeff_ptr); dq_coeff = LD_SH(dq_coeff_ptr); coeff_ptr += 8; dq_coeff_ptr += 8; coeff2 = LD_SH(coeff_ptr); dq_coeff2 = LD_SH(dq_coeff_ptr); be = &mb->block[2 * loop_cnt + 1]; bd = &mb->e_mbd.block[2 * loop_cnt + 1]; coeff_ptr = be->coeff; dq_coeff_ptr = bd->dqcoeff; coeff3 = LD_SH(coeff_ptr); dq_coeff3 = LD_SH(dq_coeff_ptr); coeff_ptr += 8; dq_coeff_ptr += 8; coeff4 = LD_SH(coeff_ptr); dq_coeff4 = LD_SH(dq_coeff_ptr); ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DPADD_SD2_SD(diff0, diff1, err0, err1); err0 += __msa_splati_d(err0, 1); err1 += __msa_splati_d(err1, 1); err += __msa_copy_s_d(err0, 0); err += __msa_copy_s_d(err1, 0); ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); diff0 = (v4i32)__msa_bmnz_v(zero, (v16u8)diff0, mask0); DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DPADD_SD2_SD(diff0, diff1, err0, err1); err0 += __msa_splati_d(err0, 1); err1 += __msa_splati_d(err1, 1); err += __msa_copy_s_d(err0, 0); err += __msa_copy_s_d(err1, 0); } return err; } int32_t vp8_mbuverror_msa(MACROBLOCK *mb) { BLOCK *be; BLOCKD *bd; int16_t *coeff_ptr, *dq_coeff_ptr; int32_t err = 0; uint32_t loop_cnt; v8i16 coeff, coeff0, coeff1, coeff2, coeff3, coeff4; v8i16 dq_coeff, dq_coeff2, dq_coeff3, dq_coeff4; v4i32 diff0, diff1; v2i64 err0, err1, err_dup0, err_dup1; for (loop_cnt = 16; loop_cnt < 24; loop_cnt += 2) { be = &mb->block[loop_cnt]; bd = &mb->e_mbd.block[loop_cnt]; coeff_ptr = be->coeff; dq_coeff_ptr = bd->dqcoeff; coeff = LD_SH(coeff_ptr); dq_coeff = LD_SH(dq_coeff_ptr); coeff_ptr += 8; dq_coeff_ptr += 8; coeff2 = LD_SH(coeff_ptr); dq_coeff2 = LD_SH(dq_coeff_ptr); be = &mb->block[loop_cnt + 1]; bd = &mb->e_mbd.block[loop_cnt + 1]; coeff_ptr = be->coeff; dq_coeff_ptr = bd->dqcoeff; coeff3 = LD_SH(coeff_ptr); dq_coeff3 = LD_SH(dq_coeff_ptr); coeff_ptr += 8; dq_coeff_ptr += 8; coeff4 = LD_SH(coeff_ptr); dq_coeff4 = LD_SH(dq_coeff_ptr); ILVRL_H2_SH(coeff, dq_coeff, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); ILVRL_H2_SH(coeff2, dq_coeff2, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DPADD_SD2_SD(diff0, diff1, err0, err1); err_dup0 = __msa_splati_d(err0, 1); err_dup1 = __msa_splati_d(err1, 1); ADD2(err0, err_dup0, err1, err_dup1, err0, err1); err += __msa_copy_s_d(err0, 0); err += __msa_copy_s_d(err1, 0); ILVRL_H2_SH(coeff3, dq_coeff3, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DOTP_SW2_SD(diff0, diff1, diff0, diff1, err0, err1); ILVRL_H2_SH(coeff4, dq_coeff4, coeff0, coeff1); HSUB_UH2_SW(coeff0, coeff1, diff0, diff1); DPADD_SD2_SD(diff0, diff1, err0, err1); err_dup0 = __msa_splati_d(err0, 1); err_dup1 = __msa_splati_d(err1, 1); ADD2(err0, err_dup0, err1, err_dup1, err0, err1); err += __msa_copy_s_d(err0, 0); err += __msa_copy_s_d(err1, 0); } return err; } libvpx-1.8.2/vp8/encoder/mips/msa/quantize_msa.c000066400000000000000000000173471357355204000215760ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" #include "vp8/encoder/block.h" static int8_t fast_quantize_b_msa(int16_t *coeff_ptr, int16_t *round, int16_t *quant, int16_t *de_quant, int16_t *q_coeff, int16_t *dq_coeff) { int32_t cnt, eob; v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 }; v8i16 round0, round1; v8i16 sign_z0, sign_z1; v8i16 q_coeff0, q_coeff1; v8i16 x0, x1, de_quant0, de_quant1; v8i16 coeff0, coeff1, z0, z1; v8i16 quant0, quant1, quant2, quant3; v8i16 zero = { 0 }; v8i16 inv_zig_zag0, inv_zig_zag1; v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 }; v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 }; v8i16 temp0_h, temp1_h, temp2_h, temp3_h; v4i32 temp0_w, temp1_w, temp2_w, temp3_w; ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1); eob = -1; LD_SH2(coeff_ptr, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z0, z1); LD_SH2(round, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, round0, round1); LD_SH2(quant, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0, quant2); sign_z0 = z0 >> 15; sign_z1 = z1 >> 15; x0 = __msa_add_a_h(z0, zero); x1 = __msa_add_a_h(z1, zero); ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h); ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h); DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, quant3, temp0_w, temp1_w, temp2_w, temp3_w); SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1); x0 = x0 ^ sign_z0; x1 = x1 ^ sign_z1; SUB2(x0, sign_z0, x1, sign_z1, x0, x1); VSHF_H2_SH(x0, x1, x0, x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1); ST_SH2(q_coeff0, q_coeff1, q_coeff, 8); LD_SH2(de_quant, 8, de_quant0, de_quant1); q_coeff0 *= de_quant0; q_coeff1 *= de_quant1; ST_SH2(q_coeff0, q_coeff1, dq_coeff, 8); for (cnt = 0; cnt < 16; ++cnt) { if ((cnt <= 7) && (x1[7 - cnt] != 0)) { eob = (15 - cnt); break; } if ((cnt > 7) && (x0[7 - (cnt - 8)] != 0)) { eob = (7 - (cnt - 8)); break; } } return (int8_t)(eob + 1); } static int8_t exact_regular_quantize_b_msa( int16_t *zbin_boost, int16_t *coeff_ptr, int16_t *zbin, int16_t *round, int16_t *quant, int16_t *quant_shift, int16_t *de_quant, int16_t zbin_oq_in, int16_t *q_coeff, int16_t *dq_coeff) { int32_t cnt, eob; int16_t *boost_temp = zbin_boost; v16i8 inv_zig_zag = { 0, 1, 5, 6, 2, 4, 7, 12, 3, 8, 11, 13, 9, 10, 14, 15 }; v8i16 round0, round1; v8i16 sign_z0, sign_z1; v8i16 q_coeff0, q_coeff1; v8i16 z_bin0, z_bin1, zbin_o_q; v8i16 x0, x1, sign_x0, sign_x1, de_quant0, de_quant1; v8i16 coeff0, coeff1, z0, z1; v8i16 quant0, quant1, quant2, quant3; v8i16 zero = { 0 }; v8i16 inv_zig_zag0, inv_zig_zag1; v8i16 zigzag_mask0 = { 0, 1, 4, 8, 5, 2, 3, 6 }; v8i16 zigzag_mask1 = { 9, 12, 13, 10, 7, 11, 14, 15 }; v8i16 temp0_h, temp1_h, temp2_h, temp3_h; v4i32 temp0_w, temp1_w, temp2_w, temp3_w; ILVRL_B2_SH(zero, inv_zig_zag, inv_zig_zag0, inv_zig_zag1); zbin_o_q = __msa_fill_h(zbin_oq_in); eob = -1; LD_SH2(coeff_ptr, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z0, z1); LD_SH2(round, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, round0, round1); LD_SH2(quant, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0, quant2); LD_SH2(zbin, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, z_bin0, z_bin1); sign_z0 = z0 >> 15; sign_z1 = z1 >> 15; x0 = __msa_add_a_h(z0, zero); x1 = __msa_add_a_h(z1, zero); SUB2(x0, z_bin0, x1, z_bin1, z_bin0, z_bin1); SUB2(z_bin0, zbin_o_q, z_bin1, zbin_o_q, z_bin0, z_bin1); ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); ILVL_H2_SH(round0, x0, round1, x1, temp1_h, temp3_h); ILVR_H2_SH(round0, x0, round1, x1, temp0_h, temp2_h); DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, quant3, temp0_w, temp1_w, temp2_w, temp3_w); SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, temp0_h, temp2_h); LD_SH2(quant_shift, 8, coeff0, coeff1); VSHF_H2_SH(coeff0, coeff1, coeff0, coeff1, zigzag_mask0, zigzag_mask1, quant0, quant2); ILVL_H2_SH(quant0, quant0, quant2, quant2, quant1, quant3); ILVR_H2_SH(quant0, quant0, quant2, quant2, quant0, quant2); ADD2(x0, round0, x1, round1, x0, x1); ILVL_H2_SH(temp0_h, x0, temp2_h, x1, temp1_h, temp3_h); ILVR_H2_SH(temp0_h, x0, temp2_h, x1, temp0_h, temp2_h); DOTP_SH4_SW(temp0_h, temp1_h, temp2_h, temp3_h, quant0, quant1, quant2, quant3, temp0_w, temp1_w, temp2_w, temp3_w); SRA_4V(temp0_w, temp1_w, temp2_w, temp3_w, 16); PCKEV_H2_SH(temp1_w, temp0_w, temp3_w, temp2_w, x0, x1); sign_x0 = x0 ^ sign_z0; sign_x1 = x1 ^ sign_z1; SUB2(sign_x0, sign_z0, sign_x1, sign_z1, sign_x0, sign_x1); for (cnt = 0; cnt < 16; ++cnt) { if (cnt <= 7) { if (boost_temp[0] <= z_bin0[cnt]) { if (x0[cnt]) { eob = cnt; boost_temp = zbin_boost; } else { boost_temp++; } } else { sign_x0[cnt] = 0; boost_temp++; } } else { if (boost_temp[0] <= z_bin1[cnt - 8]) { if (x1[cnt - 8]) { eob = cnt; boost_temp = zbin_boost; } else { boost_temp++; } } else { sign_x1[cnt - 8] = 0; boost_temp++; } } } VSHF_H2_SH(sign_x0, sign_x1, sign_x0, sign_x1, inv_zig_zag0, inv_zig_zag1, q_coeff0, q_coeff1); ST_SH2(q_coeff0, q_coeff1, q_coeff, 8); LD_SH2(de_quant, 8, de_quant0, de_quant1); MUL2(de_quant0, q_coeff0, de_quant1, q_coeff1, de_quant0, de_quant1); ST_SH2(de_quant0, de_quant1, dq_coeff, 8); return (int8_t)(eob + 1); } void vp8_fast_quantize_b_msa(BLOCK *b, BLOCKD *d) { int16_t *coeff_ptr = b->coeff; int16_t *round_ptr = b->round; int16_t *quant_ptr = b->quant_fast; int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; int16_t *dequant_ptr = d->dequant; *d->eob = fast_quantize_b_msa(coeff_ptr, round_ptr, quant_ptr, dequant_ptr, qcoeff_ptr, dqcoeff_ptr); } void vp8_regular_quantize_b_msa(BLOCK *b, BLOCKD *d) { int16_t *zbin_boost_ptr = b->zrun_zbin_boost; int16_t *coeff_ptr = b->coeff; int16_t *zbin_ptr = b->zbin; int16_t *round_ptr = b->round; int16_t *quant_ptr = b->quant; int16_t *quant_shift_ptr = b->quant_shift; int16_t *qcoeff_ptr = d->qcoeff; int16_t *dqcoeff_ptr = d->dqcoeff; int16_t *dequant_ptr = d->dequant; int16_t zbin_oq_value = b->zbin_extra; *d->eob = exact_regular_quantize_b_msa( zbin_boost_ptr, coeff_ptr, zbin_ptr, round_ptr, quant_ptr, quant_shift_ptr, dequant_ptr, zbin_oq_value, qcoeff_ptr, dqcoeff_ptr); } libvpx-1.8.2/vp8/encoder/mips/msa/temporal_filter_msa.c000066400000000000000000000254241357355204000231210ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp8_rtcd.h" #include "vp8/common/mips/msa/vp8_macros_msa.h" static void temporal_filter_apply_16size_msa( uint8_t *frame1_ptr, uint32_t stride, uint8_t *frame2_ptr, int32_t strength_in, int32_t filter_wt_in, uint32_t *acc, uint16_t *cnt) { uint32_t row; v16i8 frame1_0_b, frame1_1_b, frame2_0_b, frame2_1_b; v16u8 frame_l, frame_h; v16i8 zero = { 0 }; v8i16 frame2_0_h, frame2_1_h, mod0_h, mod1_h; v8i16 diff0, diff1, cnt0, cnt1; v4i32 const3, const16, filter_wt, strength; v4i32 mod0_w, mod1_w, mod2_w, mod3_w; v4i32 diff0_r, diff0_l, diff1_r, diff1_l; v4i32 frame2_0, frame2_1, frame2_2, frame2_3; v4i32 acc0, acc1, acc2, acc3; filter_wt = __msa_fill_w(filter_wt_in); strength = __msa_fill_w(strength_in); const3 = __msa_ldi_w(3); const16 = __msa_ldi_w(16); for (row = 8; row--;) { frame1_0_b = LD_SB(frame1_ptr); frame2_0_b = LD_SB(frame2_ptr); frame1_ptr += stride; frame2_ptr += 16; frame1_1_b = LD_SB(frame1_ptr); frame2_1_b = LD_SB(frame2_ptr); LD_SW2(acc, 4, acc0, acc1); LD_SW2(acc + 8, 4, acc2, acc3); LD_SH2(cnt, 8, cnt0, cnt1); ILVRL_B2_UB(frame1_0_b, frame2_0_b, frame_l, frame_h); HSUB_UB2_SH(frame_l, frame_h, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); MUL4(mod0_w, const3, mod1_w, const3, mod2_w, const3, mod3_w, const3, mod0_w, mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < const16); diff0_l = (mod1_w < const16); diff1_r = (mod2_w < const16); diff1_l = (mod3_w < const16); SUB4(const16, mod0_w, const16, mod1_w, const16, mod2_w, const16, mod3_w, mod0_w, mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; mod2_w = diff1_r & mod2_w; mod3_w = diff1_l & mod3_w; MUL4(mod0_w, filter_wt, mod1_w, filter_wt, mod2_w, filter_wt, mod3_w, filter_wt, mod0_w, mod1_w, mod2_w, mod3_w); PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h) ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); ST_SH2(mod0_h, mod1_h, cnt, 8); cnt += 16; ILVRL_B2_SH(zero, frame2_0_b, frame2_0_h, frame2_1_h); UNPCK_SH_SW(frame2_0_h, frame2_0, frame2_1); UNPCK_SH_SW(frame2_1_h, frame2_2, frame2_3); MUL4(mod0_w, frame2_0, mod1_w, frame2_1, mod2_w, frame2_2, mod3_w, frame2_3, mod0_w, mod1_w, mod2_w, mod3_w); ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); ST_SW2(mod2_w, mod3_w, acc + 8, 4); acc += 16; LD_SW2(acc, 4, acc0, acc1); LD_SW2(acc + 8, 4, acc2, acc3); LD_SH2(cnt, 8, cnt0, cnt1); ILVRL_B2_UB(frame1_1_b, frame2_1_b, frame_l, frame_h); HSUB_UB2_SH(frame_l, frame_h, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); MUL4(mod0_w, const3, mod1_w, const3, mod2_w, const3, mod3_w, const3, mod0_w, mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < const16); diff0_l = (mod1_w < const16); diff1_r = (mod2_w < const16); diff1_l = (mod3_w < const16); SUB4(const16, mod0_w, const16, mod1_w, const16, mod2_w, const16, mod3_w, mod0_w, mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; mod2_w = diff1_r & mod2_w; mod3_w = diff1_l & mod3_w; MUL4(mod0_w, filter_wt, mod1_w, filter_wt, mod2_w, filter_wt, mod3_w, filter_wt, mod0_w, mod1_w, mod2_w, mod3_w); PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); ST_SH2(mod0_h, mod1_h, cnt, 8); cnt += 16; UNPCK_UB_SH(frame2_1_b, frame2_0_h, frame2_1_h); UNPCK_SH_SW(frame2_0_h, frame2_0, frame2_1); UNPCK_SH_SW(frame2_1_h, frame2_2, frame2_3); MUL4(mod0_w, frame2_0, mod1_w, frame2_1, mod2_w, frame2_2, mod3_w, frame2_3, mod0_w, mod1_w, mod2_w, mod3_w); ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); ST_SW2(mod2_w, mod3_w, acc + 8, 4); acc += 16; frame1_ptr += stride; frame2_ptr += 16; } } static void temporal_filter_apply_8size_msa( uint8_t *frame1_ptr, uint32_t stride, uint8_t *frame2_ptr, int32_t strength_in, int32_t filter_wt_in, uint32_t *acc, uint16_t *cnt) { uint32_t row; uint64_t f0, f1, f2, f3, f4, f5, f6, f7; v16i8 frame1 = { 0 }; v16i8 frame2 = { 0 }; v16i8 frame3 = { 0 }; v16i8 frame4 = { 0 }; v16u8 frame_l, frame_h; v8i16 frame2_0_h, frame2_1_h, mod0_h, mod1_h; v8i16 diff0, diff1, cnt0, cnt1; v4i32 const3, const16; v4i32 filter_wt, strength; v4i32 mod0_w, mod1_w, mod2_w, mod3_w; v4i32 diff0_r, diff0_l, diff1_r, diff1_l; v4i32 frame2_0, frame2_1, frame2_2, frame2_3; v4i32 acc0, acc1, acc2, acc3; filter_wt = __msa_fill_w(filter_wt_in); strength = __msa_fill_w(strength_in); const3 = __msa_ldi_w(3); const16 = __msa_ldi_w(16); for (row = 2; row--;) { LD2(frame1_ptr, stride, f0, f1); frame1_ptr += (2 * stride); LD2(frame2_ptr, 8, f2, f3); frame2_ptr += 16; LD2(frame1_ptr, stride, f4, f5); frame1_ptr += (2 * stride); LD2(frame2_ptr, 8, f6, f7); frame2_ptr += 16; LD_SW2(acc, 4, acc0, acc1); LD_SW2(acc + 8, 4, acc2, acc3); LD_SH2(cnt, 8, cnt0, cnt1); INSERT_D2_SB(f0, f1, frame1); INSERT_D2_SB(f2, f3, frame2); INSERT_D2_SB(f4, f5, frame3); INSERT_D2_SB(f6, f7, frame4); ILVRL_B2_UB(frame1, frame2, frame_l, frame_h); HSUB_UB2_SH(frame_l, frame_h, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); MUL4(mod0_w, const3, mod1_w, const3, mod2_w, const3, mod3_w, const3, mod0_w, mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < const16); diff0_l = (mod1_w < const16); diff1_r = (mod2_w < const16); diff1_l = (mod3_w < const16); SUB4(const16, mod0_w, const16, mod1_w, const16, mod2_w, const16, mod3_w, mod0_w, mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; mod2_w = diff1_r & mod2_w; mod3_w = diff1_l & mod3_w; MUL4(mod0_w, filter_wt, mod1_w, filter_wt, mod2_w, filter_wt, mod3_w, filter_wt, mod0_w, mod1_w, mod2_w, mod3_w); PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); ST_SH2(mod0_h, mod1_h, cnt, 8); cnt += 16; UNPCK_UB_SH(frame2, frame2_0_h, frame2_1_h); UNPCK_SH_SW(frame2_0_h, frame2_0, frame2_1); UNPCK_SH_SW(frame2_1_h, frame2_2, frame2_3); MUL4(mod0_w, frame2_0, mod1_w, frame2_1, mod2_w, frame2_2, mod3_w, frame2_3, mod0_w, mod1_w, mod2_w, mod3_w); ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); ST_SW2(mod2_w, mod3_w, acc + 8, 4); acc += 16; LD_SW2(acc, 4, acc0, acc1); LD_SW2(acc + 8, 4, acc2, acc3); LD_SH2(cnt, 8, cnt0, cnt1); ILVRL_B2_UB(frame3, frame4, frame_l, frame_h); HSUB_UB2_SH(frame_l, frame_h, diff0, diff1); UNPCK_SH_SW(diff0, diff0_r, diff0_l); UNPCK_SH_SW(diff1, diff1_r, diff1_l); MUL4(diff0_r, diff0_r, diff0_l, diff0_l, diff1_r, diff1_r, diff1_l, diff1_l, mod0_w, mod1_w, mod2_w, mod3_w); MUL4(mod0_w, const3, mod1_w, const3, mod2_w, const3, mod3_w, const3, mod0_w, mod1_w, mod2_w, mod3_w); SRAR_W4_SW(mod0_w, mod1_w, mod2_w, mod3_w, strength); diff0_r = (mod0_w < const16); diff0_l = (mod1_w < const16); diff1_r = (mod2_w < const16); diff1_l = (mod3_w < const16); SUB4(const16, mod0_w, const16, mod1_w, const16, mod2_w, const16, mod3_w, mod0_w, mod1_w, mod2_w, mod3_w); mod0_w = diff0_r & mod0_w; mod1_w = diff0_l & mod1_w; mod2_w = diff1_r & mod2_w; mod3_w = diff1_l & mod3_w; MUL4(mod0_w, filter_wt, mod1_w, filter_wt, mod2_w, filter_wt, mod3_w, filter_wt, mod0_w, mod1_w, mod2_w, mod3_w); PCKEV_H2_SH(mod1_w, mod0_w, mod3_w, mod2_w, mod0_h, mod1_h); ADD2(mod0_h, cnt0, mod1_h, cnt1, mod0_h, mod1_h); ST_SH2(mod0_h, mod1_h, cnt, 8); cnt += 16; UNPCK_UB_SH(frame4, frame2_0_h, frame2_1_h); UNPCK_SH_SW(frame2_0_h, frame2_0, frame2_1); UNPCK_SH_SW(frame2_1_h, frame2_2, frame2_3); MUL4(mod0_w, frame2_0, mod1_w, frame2_1, mod2_w, frame2_2, mod3_w, frame2_3, mod0_w, mod1_w, mod2_w, mod3_w); ADD4(mod0_w, acc0, mod1_w, acc1, mod2_w, acc2, mod3_w, acc3, mod0_w, mod1_w, mod2_w, mod3_w); ST_SW2(mod0_w, mod1_w, acc, 4); ST_SW2(mod2_w, mod3_w, acc + 8, 4); acc += 16; } } void vp8_temporal_filter_apply_msa(uint8_t *frame1, uint32_t stride, uint8_t *frame2, uint32_t block_size, int32_t strength, int32_t filter_weight, uint32_t *accumulator, uint16_t *count) { if (8 == block_size) { temporal_filter_apply_8size_msa(frame1, stride, frame2, strength, filter_weight, accumulator, count); } else if (16 == block_size) { temporal_filter_apply_16size_msa(frame1, stride, frame2, strength, filter_weight, accumulator, count); } else { uint32_t i, j, k; int32_t modifier; int32_t byte = 0; const int32_t rounding = strength > 0 ? 1 << (strength - 1) : 0; for (i = 0, k = 0; i < block_size; ++i) { for (j = 0; j < block_size; ++j, ++k) { int src_byte = frame1[byte]; int pixel_value = *frame2++; modifier = src_byte - pixel_value; modifier *= modifier; modifier *= 3; modifier += rounding; modifier >>= strength; if (modifier > 16) modifier = 16; modifier = 16 - modifier; modifier *= filter_weight; count[k] += modifier; accumulator[k] += modifier * pixel_value; byte++; } byte += stride - block_size; } } } libvpx-1.8.2/vp8/encoder/modecosts.c000066400000000000000000000030041357355204000173270ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/blockd.h" #include "modecosts.h" #include "onyx_int.h" #include "treewriter.h" #include "vp8/common/entropymode.h" void vp8_init_mode_costs(VP8_COMP *c) { VP8_COMMON *x = &c->common; struct rd_costs_struct *rd_costs = &c->rd_costs; { const vp8_tree_p T = vp8_bmode_tree; int i = 0; do { int j = 0; do { vp8_cost_tokens(rd_costs->bmode_costs[i][j], vp8_kf_bmode_prob[i][j], T); } while (++j < VP8_BINTRAMODES); } while (++i < VP8_BINTRAMODES); vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.bmode_prob, T); } vp8_cost_tokens(rd_costs->inter_bmode_costs, x->fc.sub_mv_ref_prob, vp8_sub_mv_ref_tree); vp8_cost_tokens(rd_costs->mbmode_cost[1], x->fc.ymode_prob, vp8_ymode_tree); vp8_cost_tokens(rd_costs->mbmode_cost[0], vp8_kf_ymode_prob, vp8_kf_ymode_tree); vp8_cost_tokens(rd_costs->intra_uv_mode_cost[1], x->fc.uv_mode_prob, vp8_uv_mode_tree); vp8_cost_tokens(rd_costs->intra_uv_mode_cost[0], vp8_kf_uv_mode_prob, vp8_uv_mode_tree); } libvpx-1.8.2/vp8/encoder/modecosts.h000066400000000000000000000012441357355204000173400ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_MODECOSTS_H_ #define VPX_VP8_ENCODER_MODECOSTS_H_ #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; void vp8_init_mode_costs(struct VP8_COMP *c); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_MODECOSTS_H_ libvpx-1.8.2/vp8/encoder/mr_dissim.c000066400000000000000000000166461357355204000173350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_config.h" #include "onyx_int.h" #include "mr_dissim.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "rdopt.h" #include "vp8/common/common.h" void vp8_cal_low_res_mb_cols(VP8_COMP *cpi) { int low_res_w; /* Support arbitrary down-sampling factor */ unsigned int iw = cpi->oxcf.Width * cpi->oxcf.mr_down_sampling_factor.den + cpi->oxcf.mr_down_sampling_factor.num - 1; low_res_w = iw / cpi->oxcf.mr_down_sampling_factor.num; cpi->mr_low_res_mb_cols = ((low_res_w + 15) >> 4); } #define GET_MV(x) \ if (x->mbmi.ref_frame != INTRA_FRAME) { \ mvx[cnt] = x->mbmi.mv.as_mv.row; \ mvy[cnt] = x->mbmi.mv.as_mv.col; \ cnt++; \ } #define GET_MV_SIGN(x) \ if (x->mbmi.ref_frame != INTRA_FRAME) { \ mvx[cnt] = x->mbmi.mv.as_mv.row; \ mvy[cnt] = x->mbmi.mv.as_mv.col; \ if (cm->ref_frame_sign_bias[x->mbmi.ref_frame] != \ cm->ref_frame_sign_bias[tmp->mbmi.ref_frame]) { \ mvx[cnt] *= -1; \ mvy[cnt] *= -1; \ } \ cnt++; \ } void vp8_cal_dissimilarity(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; int i; /* Note: The first row & first column in mip are outside the frame, which * were initialized to all 0.(ref_frame, mode, mv...) * Their ref_frame = 0 means they won't be counted in the following * calculation. */ if (cpi->oxcf.mr_total_resolutions > 1 && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1)) { /* Store info for show/no-show frames for supporting alt_ref. * If parent frame is alt_ref, child has one too. */ LOWER_RES_FRAME_INFO *store_info = (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; store_info->frame_type = cm->frame_type; if (cm->frame_type != KEY_FRAME) { store_info->is_frame_dropped = 0; for (i = 1; i < MAX_REF_FRAMES; ++i) store_info->low_res_ref_frames[i] = cpi->current_ref_frames[i]; } if (cm->frame_type != KEY_FRAME) { int mb_row; int mb_col; /* Point to beginning of allocated MODE_INFO arrays. */ MODE_INFO *tmp = cm->mip + cm->mode_info_stride; LOWER_RES_MB_INFO *store_mode_info = store_info->mb_info; for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { tmp++; for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int dissim = INT_MAX; if (tmp->mbmi.ref_frame != INTRA_FRAME) { int mvx[8]; int mvy[8]; int mmvx; int mmvy; int cnt = 0; const MODE_INFO *here = tmp; const MODE_INFO *above = here - cm->mode_info_stride; const MODE_INFO *left = here - 1; const MODE_INFO *aboveleft = above - 1; const MODE_INFO *aboveright = NULL; const MODE_INFO *right = NULL; const MODE_INFO *belowleft = NULL; const MODE_INFO *below = NULL; const MODE_INFO *belowright = NULL; /* If alternate reference frame is used, we have to * check sign of MV. */ if (cpi->oxcf.play_alternate) { /* Gather mv of neighboring MBs */ GET_MV_SIGN(above) GET_MV_SIGN(left) GET_MV_SIGN(aboveleft) if (mb_col < (cm->mb_cols - 1)) { right = here + 1; aboveright = above + 1; GET_MV_SIGN(right) GET_MV_SIGN(aboveright) } if (mb_row < (cm->mb_rows - 1)) { below = here + cm->mode_info_stride; belowleft = below - 1; GET_MV_SIGN(below) GET_MV_SIGN(belowleft) } if (mb_col < (cm->mb_cols - 1) && mb_row < (cm->mb_rows - 1)) { belowright = below + 1; GET_MV_SIGN(belowright) } } else { /* No alt_ref and gather mv of neighboring MBs */ GET_MV(above) GET_MV(left) GET_MV(aboveleft) if (mb_col < (cm->mb_cols - 1)) { right = here + 1; aboveright = above + 1; GET_MV(right) GET_MV(aboveright) } if (mb_row < (cm->mb_rows - 1)) { below = here + cm->mode_info_stride; belowleft = below - 1; GET_MV(below) GET_MV(belowleft) } if (mb_col < (cm->mb_cols - 1) && mb_row < (cm->mb_rows - 1)) { belowright = below + 1; GET_MV(belowright) } } if (cnt > 0) { int max_mvx = mvx[0]; int min_mvx = mvx[0]; int max_mvy = mvy[0]; int min_mvy = mvy[0]; int i; if (cnt > 1) { for (i = 1; i < cnt; ++i) { if (mvx[i] > max_mvx) max_mvx = mvx[i]; else if (mvx[i] < min_mvx) min_mvx = mvx[i]; if (mvy[i] > max_mvy) max_mvy = mvy[i]; else if (mvy[i] < min_mvy) min_mvy = mvy[i]; } } mmvx = VPXMAX(abs(min_mvx - here->mbmi.mv.as_mv.row), abs(max_mvx - here->mbmi.mv.as_mv.row)); mmvy = VPXMAX(abs(min_mvy - here->mbmi.mv.as_mv.col), abs(max_mvy - here->mbmi.mv.as_mv.col)); dissim = VPXMAX(mmvx, mmvy); } } /* Store mode info for next resolution encoding */ store_mode_info->mode = tmp->mbmi.mode; store_mode_info->ref_frame = tmp->mbmi.ref_frame; store_mode_info->mv.as_int = tmp->mbmi.mv.as_int; store_mode_info->dissim = dissim; tmp++; store_mode_info++; } } } } } /* This function is called only when this frame is dropped at current resolution level. */ void vp8_store_drop_frame_info(VP8_COMP *cpi) { /* If the frame is dropped in lower-resolution encoding, this information is passed to higher resolution level so that the encoder knows there is no mode & motion info available. */ if (cpi->oxcf.mr_total_resolutions > 1 && cpi->oxcf.mr_encoder_id < (cpi->oxcf.mr_total_resolutions - 1)) { /* Store info for show/no-show frames for supporting alt_ref. * If parent frame is alt_ref, child has one too. */ LOWER_RES_FRAME_INFO *store_info = (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; /* Set frame_type to be INTER_FRAME since we won't drop key frame. */ store_info->frame_type = INTER_FRAME; store_info->is_frame_dropped = 1; } } libvpx-1.8.2/vp8/encoder/mr_dissim.h000066400000000000000000000014301357355204000173230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_MR_DISSIM_H_ #define VPX_VP8_ENCODER_MR_DISSIM_H_ #include "vpx_config.h" #ifdef __cplusplus extern "C" { #endif extern void vp8_cal_low_res_mb_cols(VP8_COMP *cpi); extern void vp8_cal_dissimilarity(VP8_COMP *cpi); extern void vp8_store_drop_frame_info(VP8_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_MR_DISSIM_H_ libvpx-1.8.2/vp8/encoder/onyx_if.c000066400000000000000000005464351357355204000170260ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "./vpx_scale_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vp8_rtcd.h" #include "bitstream.h" #include "vp8/common/onyxc_int.h" #include "vp8/common/blockd.h" #include "onyx_int.h" #include "vp8/common/systemdependent.h" #include "vp8/common/vp8_skin_detection.h" #include "vp8/encoder/quantize.h" #include "vp8/common/alloccommon.h" #include "mcomp.h" #include "firstpass.h" #include "vpx_dsp/psnr.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" #include "segmentation.h" #if CONFIG_POSTPROC #include "vp8/common/postproc.h" #endif #include "vpx_mem/vpx_mem.h" #include "vp8/common/reconintra.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_timer.h" #include "vpx_util/vpx_write_yuv_frame.h" #if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif #if CONFIG_MULTI_RES_ENCODING #include "mr_dissim.h" #endif #include "encodeframe.h" #if CONFIG_MULTITHREAD #include "ethreading.h" #endif #include "picklpf.h" #if !CONFIG_REALTIME_ONLY #include "temporal_filter.h" #endif #include #include #include #include #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING extern int vp8_update_coef_context(VP8_COMP *cpi); #endif extern unsigned int vp8_get_processor_freq(); int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); static void set_default_lf_deltas(VP8_COMP *cpi); extern const int vp8_gf_interval_table[101]; #if CONFIG_INTERNAL_STATS #include "math.h" #include "vpx_dsp/ssim.h" #endif #ifdef OUTPUT_YUV_SRC FILE *yuv_file; #endif #ifdef OUTPUT_YUV_DENOISED FILE *yuv_denoised_file; #endif #ifdef OUTPUT_YUV_SKINMAP static FILE *yuv_skinmap_file = NULL; #endif #if 0 FILE *framepsnr; FILE *kf_list; FILE *keyfile; #endif #if 0 extern int skip_true_count; extern int skip_false_count; #endif #ifdef SPEEDSTATS unsigned int frames_at_speed[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; unsigned int tot_pm = 0; unsigned int cnt_pm = 0; unsigned int tot_ef = 0; unsigned int cnt_ef = 0; #endif #ifdef MODE_STATS extern unsigned __int64 Sectionbits[50]; extern int y_modes[5]; extern int uv_modes[4]; extern int b_modes[10]; extern int inter_y_modes[10]; extern int inter_uv_modes[4]; extern unsigned int inter_b_modes[15]; #endif extern const int vp8_bits_per_mb[2][QINDEX_RANGE]; extern const int qrounding_factors[129]; extern const int qzbin_factors[129]; extern void vp8cx_init_quantizer(VP8_COMP *cpi); extern const int vp8cx_base_skip_false_prob[128]; /* Tables relating active max Q to active min Q */ static const unsigned char kf_low_motion_minq[QINDEX_RANGE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 18, 18, 18, 18, 19, 20, 20, 21, 21, 22, 23, 23 }; static const unsigned char kf_high_motion_minq[QINDEX_RANGE] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 8, 8, 8, 8, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 18, 18, 18, 18, 19, 19, 20, 20, 20, 20, 21, 21, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 26, 27, 28, 28, 29, 30 }; static const unsigned char gf_low_motion_minq[QINDEX_RANGE] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58 }; static const unsigned char gf_mid_motion_minq[QINDEX_RANGE] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 12, 12, 12, 12, 13, 13, 13, 14, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64 }; static const unsigned char gf_high_motion_minq[QINDEX_RANGE] = { 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 12, 12, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 24, 25, 25, 26, 26, 27, 27, 28, 28, 29, 29, 30, 30, 31, 31, 32, 32, 33, 33, 34, 34, 35, 35, 36, 36, 37, 37, 38, 38, 39, 39, 40, 40, 41, 41, 42, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 62, 64, 66, 68, 70, 72, 74, 76, 78, 80 }; static const unsigned char inter_minq[QINDEX_RANGE] = { 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 6, 6, 7, 8, 8, 9, 9, 10, 11, 11, 12, 13, 13, 14, 15, 15, 16, 17, 17, 18, 19, 20, 20, 21, 22, 22, 23, 24, 24, 25, 26, 27, 27, 28, 29, 30, 30, 31, 32, 33, 33, 34, 35, 36, 36, 37, 38, 39, 39, 40, 41, 42, 42, 43, 44, 45, 46, 46, 47, 48, 49, 50, 50, 51, 52, 53, 54, 55, 55, 56, 57, 58, 59, 60, 60, 61, 62, 63, 64, 65, 66, 67, 67, 68, 69, 70, 71, 72, 73, 74, 75, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100 }; #ifdef PACKET_TESTING extern FILE *vpxlogc; #endif static void save_layer_context(VP8_COMP *cpi) { LAYER_CONTEXT *lc = &cpi->layer_context[cpi->current_layer]; /* Save layer dependent coding state */ lc->target_bandwidth = cpi->target_bandwidth; lc->starting_buffer_level = cpi->oxcf.starting_buffer_level; lc->optimal_buffer_level = cpi->oxcf.optimal_buffer_level; lc->maximum_buffer_size = cpi->oxcf.maximum_buffer_size; lc->starting_buffer_level_in_ms = cpi->oxcf.starting_buffer_level_in_ms; lc->optimal_buffer_level_in_ms = cpi->oxcf.optimal_buffer_level_in_ms; lc->maximum_buffer_size_in_ms = cpi->oxcf.maximum_buffer_size_in_ms; lc->buffer_level = cpi->buffer_level; lc->bits_off_target = cpi->bits_off_target; lc->total_actual_bits = cpi->total_actual_bits; lc->worst_quality = cpi->worst_quality; lc->active_worst_quality = cpi->active_worst_quality; lc->best_quality = cpi->best_quality; lc->active_best_quality = cpi->active_best_quality; lc->ni_av_qi = cpi->ni_av_qi; lc->ni_tot_qi = cpi->ni_tot_qi; lc->ni_frames = cpi->ni_frames; lc->avg_frame_qindex = cpi->avg_frame_qindex; lc->rate_correction_factor = cpi->rate_correction_factor; lc->key_frame_rate_correction_factor = cpi->key_frame_rate_correction_factor; lc->gf_rate_correction_factor = cpi->gf_rate_correction_factor; lc->zbin_over_quant = cpi->mb.zbin_over_quant; lc->inter_frame_target = cpi->inter_frame_target; lc->total_byte_count = cpi->total_byte_count; lc->filter_level = cpi->common.filter_level; lc->frames_since_last_drop_overshoot = cpi->frames_since_last_drop_overshoot; lc->force_maxqp = cpi->force_maxqp; lc->last_frame_percent_intra = cpi->last_frame_percent_intra; lc->last_q[0] = cpi->last_q[0]; lc->last_q[1] = cpi->last_q[1]; memcpy(lc->count_mb_ref_frame_usage, cpi->mb.count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); } static void restore_layer_context(VP8_COMP *cpi, const int layer) { LAYER_CONTEXT *lc = &cpi->layer_context[layer]; /* Restore layer dependent coding state */ cpi->current_layer = layer; cpi->target_bandwidth = lc->target_bandwidth; cpi->oxcf.target_bandwidth = lc->target_bandwidth; cpi->oxcf.starting_buffer_level = lc->starting_buffer_level; cpi->oxcf.optimal_buffer_level = lc->optimal_buffer_level; cpi->oxcf.maximum_buffer_size = lc->maximum_buffer_size; cpi->oxcf.starting_buffer_level_in_ms = lc->starting_buffer_level_in_ms; cpi->oxcf.optimal_buffer_level_in_ms = lc->optimal_buffer_level_in_ms; cpi->oxcf.maximum_buffer_size_in_ms = lc->maximum_buffer_size_in_ms; cpi->buffer_level = lc->buffer_level; cpi->bits_off_target = lc->bits_off_target; cpi->total_actual_bits = lc->total_actual_bits; cpi->active_worst_quality = lc->active_worst_quality; cpi->active_best_quality = lc->active_best_quality; cpi->ni_av_qi = lc->ni_av_qi; cpi->ni_tot_qi = lc->ni_tot_qi; cpi->ni_frames = lc->ni_frames; cpi->avg_frame_qindex = lc->avg_frame_qindex; cpi->rate_correction_factor = lc->rate_correction_factor; cpi->key_frame_rate_correction_factor = lc->key_frame_rate_correction_factor; cpi->gf_rate_correction_factor = lc->gf_rate_correction_factor; cpi->mb.zbin_over_quant = lc->zbin_over_quant; cpi->inter_frame_target = lc->inter_frame_target; cpi->total_byte_count = lc->total_byte_count; cpi->common.filter_level = lc->filter_level; cpi->frames_since_last_drop_overshoot = lc->frames_since_last_drop_overshoot; cpi->force_maxqp = lc->force_maxqp; cpi->last_frame_percent_intra = lc->last_frame_percent_intra; cpi->last_q[0] = lc->last_q[0]; cpi->last_q[1] = lc->last_q[1]; memcpy(cpi->mb.count_mb_ref_frame_usage, lc->count_mb_ref_frame_usage, sizeof(cpi->mb.count_mb_ref_frame_usage)); } static int rescale(int val, int num, int denom) { int64_t llnum = num; int64_t llden = denom; int64_t llval = val; return (int)(llval * llnum / llden); } static void init_temporal_layer_context(VP8_COMP *cpi, VP8_CONFIG *oxcf, const int layer, double prev_layer_framerate) { LAYER_CONTEXT *lc = &cpi->layer_context[layer]; lc->framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[layer]; lc->target_bandwidth = cpi->oxcf.target_bitrate[layer] * 1000; lc->starting_buffer_level_in_ms = oxcf->starting_buffer_level; lc->optimal_buffer_level_in_ms = oxcf->optimal_buffer_level; lc->maximum_buffer_size_in_ms = oxcf->maximum_buffer_size; lc->starting_buffer_level = rescale((int)(oxcf->starting_buffer_level), lc->target_bandwidth, 1000); if (oxcf->optimal_buffer_level == 0) { lc->optimal_buffer_level = lc->target_bandwidth / 8; } else { lc->optimal_buffer_level = rescale((int)(oxcf->optimal_buffer_level), lc->target_bandwidth, 1000); } if (oxcf->maximum_buffer_size == 0) { lc->maximum_buffer_size = lc->target_bandwidth / 8; } else { lc->maximum_buffer_size = rescale((int)(oxcf->maximum_buffer_size), lc->target_bandwidth, 1000); } /* Work out the average size of a frame within this layer */ if (layer > 0) { lc->avg_frame_size_for_layer = (int)((cpi->oxcf.target_bitrate[layer] - cpi->oxcf.target_bitrate[layer - 1]) * 1000 / (lc->framerate - prev_layer_framerate)); } lc->active_worst_quality = cpi->oxcf.worst_allowed_q; lc->active_best_quality = cpi->oxcf.best_allowed_q; lc->avg_frame_qindex = cpi->oxcf.worst_allowed_q; lc->buffer_level = lc->starting_buffer_level; lc->bits_off_target = lc->starting_buffer_level; lc->total_actual_bits = 0; lc->ni_av_qi = 0; lc->ni_tot_qi = 0; lc->ni_frames = 0; lc->rate_correction_factor = 1.0; lc->key_frame_rate_correction_factor = 1.0; lc->gf_rate_correction_factor = 1.0; lc->inter_frame_target = 0; } // Upon a run-time change in temporal layers, reset the layer context parameters // for any "new" layers. For "existing" layers, let them inherit the parameters // from the previous layer state (at the same layer #). In future we may want // to better map the previous layer state(s) to the "new" ones. static void reset_temporal_layer_change(VP8_COMP *cpi, VP8_CONFIG *oxcf, const int prev_num_layers) { int i; double prev_layer_framerate = 0; const int curr_num_layers = cpi->oxcf.number_of_layers; // If the previous state was 1 layer, get current layer context from cpi. // We need this to set the layer context for the new layers below. if (prev_num_layers == 1) { cpi->current_layer = 0; save_layer_context(cpi); } for (i = 0; i < curr_num_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; if (i >= prev_num_layers) { init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate); } // The initial buffer levels are set based on their starting levels. // We could set the buffer levels based on the previous state (normalized // properly by the layer bandwidths) but we would need to keep track of // the previous set of layer bandwidths (i.e., target_bitrate[i]) // before the layer change. For now, reset to the starting levels. lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms * cpi->oxcf.target_bitrate[i]; lc->bits_off_target = lc->buffer_level; // TDOD(marpan): Should we set the rate_correction_factor and // active_worst/best_quality to values derived from the previous layer // state (to smooth-out quality dips/rate fluctuation at transition)? // We need to treat the 1 layer case separately: oxcf.target_bitrate[i] // is not set for 1 layer, and the restore_layer_context/save_context() // are not called in the encoding loop, so we need to call it here to // pass the layer context state to |cpi|. if (curr_num_layers == 1) { lc->target_bandwidth = cpi->oxcf.target_bandwidth; lc->buffer_level = cpi->oxcf.starting_buffer_level_in_ms * lc->target_bandwidth / 1000; lc->bits_off_target = lc->buffer_level; restore_layer_context(cpi, 0); } prev_layer_framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[i]; } } static void setup_features(VP8_COMP *cpi) { // If segmentation enabled set the update flags if (cpi->mb.e_mbd.segmentation_enabled) { cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } else { cpi->mb.e_mbd.update_mb_segmentation_map = 0; cpi->mb.e_mbd.update_mb_segmentation_data = 0; } cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 0; cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); memset(cpi->mb.e_mbd.last_ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); memset(cpi->mb.e_mbd.last_mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); set_default_lf_deltas(cpi); } static void dealloc_raw_frame_buffers(VP8_COMP *cpi); void vp8_initialize_enc(void) { static volatile int init_done = 0; if (!init_done) { vpx_dsp_rtcd(); vp8_init_intra_predictors(); init_done = 1; } } static void dealloc_compressor_data(VP8_COMP *cpi) { vpx_free(cpi->tplist); cpi->tplist = NULL; /* Delete last frame MV storage buffers */ vpx_free(cpi->lfmv); cpi->lfmv = 0; vpx_free(cpi->lf_ref_frame_sign_bias); cpi->lf_ref_frame_sign_bias = 0; vpx_free(cpi->lf_ref_frame); cpi->lf_ref_frame = 0; /* Delete sementation map */ vpx_free(cpi->segmentation_map); cpi->segmentation_map = 0; vpx_free(cpi->active_map); cpi->active_map = 0; vp8_de_alloc_frame_buffers(&cpi->common); vp8_yv12_de_alloc_frame_buffer(&cpi->pick_lf_lvl_frame); vp8_yv12_de_alloc_frame_buffer(&cpi->scaled_source); dealloc_raw_frame_buffers(cpi); vpx_free(cpi->tok); cpi->tok = 0; /* Structure used to monitor GF usage */ vpx_free(cpi->gf_active_flags); cpi->gf_active_flags = 0; /* Activity mask based per mb zbin adjustments */ vpx_free(cpi->mb_activity_map); cpi->mb_activity_map = 0; vpx_free(cpi->mb.pip); cpi->mb.pip = 0; #if CONFIG_MULTITHREAD vpx_free(cpi->mt_current_mb_col); cpi->mt_current_mb_col = NULL; #endif } static void enable_segmentation(VP8_COMP *cpi) { /* Set the appropriate feature bit */ cpi->mb.e_mbd.segmentation_enabled = 1; cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } static void disable_segmentation(VP8_COMP *cpi) { /* Clear the appropriate feature bit */ cpi->mb.e_mbd.segmentation_enabled = 0; } /* Valid values for a segment are 0 to 3 * Segmentation map is arrange as [Rows][Columns] */ static void set_segmentation_map(VP8_COMP *cpi, unsigned char *segmentation_map) { /* Copy in the new segmentation map */ memcpy(cpi->segmentation_map, segmentation_map, (cpi->common.mb_rows * cpi->common.mb_cols)); /* Signal that the map should be updated. */ cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; } /* The values given for each segment can be either deltas (from the default * value chosen for the frame) or absolute values. * * Valid range for abs values is: * (0-127 for MB_LVL_ALT_Q), (0-63 for SEGMENT_ALT_LF) * Valid range for delta values are: * (+/-127 for MB_LVL_ALT_Q), (+/-63 for SEGMENT_ALT_LF) * * abs_delta = SEGMENT_DELTADATA (deltas) * abs_delta = SEGMENT_ABSDATA (use the absolute values given). * */ static void set_segment_data(VP8_COMP *cpi, signed char *feature_data, unsigned char abs_delta) { cpi->mb.e_mbd.mb_segement_abs_delta = abs_delta; memcpy(cpi->segment_feature_data, feature_data, sizeof(cpi->segment_feature_data)); } /* A simple function to cyclically refresh the background at a lower Q */ static void cyclic_background_refresh(VP8_COMP *cpi, int Q, int lf_adjustment) { unsigned char *seg_map = cpi->segmentation_map; signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; int i; int block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; int mbs_in_frame = cpi->common.mb_rows * cpi->common.mb_cols; cpi->cyclic_refresh_q = Q / 2; if (cpi->oxcf.screen_content_mode) { // Modify quality ramp-up based on Q. Above some Q level, increase the // number of blocks to be refreshed, and reduce it below the thredhold. // Turn-off under certain conditions (i.e., away from key frame, and if // we are at good quality (low Q) and most of the blocks were // skipped-encoded // in previous frame. int qp_thresh = (cpi->oxcf.screen_content_mode == 2) ? 80 : 100; if (Q >= qp_thresh) { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 10; } else if (cpi->frames_since_key > 250 && Q < 20 && cpi->mb.skip_true_count > (int)(0.95 * mbs_in_frame)) { cpi->cyclic_refresh_mode_max_mbs_perframe = 0; } else { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 20; } block_count = cpi->cyclic_refresh_mode_max_mbs_perframe; } // Set every macroblock to be eligible for update. // For key frame this will reset seg map to 0. memset(cpi->segmentation_map, 0, mbs_in_frame); if (cpi->common.frame_type != KEY_FRAME && block_count > 0) { /* Cycle through the macro_block rows */ /* MB loop to set local segmentation map */ i = cpi->cyclic_refresh_mode_index; assert(i < mbs_in_frame); do { /* If the MB is as a candidate for clean up then mark it for * possible boost/refresh (segment 1) The segment id may get * reset to 0 later if the MB gets coded anything other than * last frame 0,0 as only (last frame 0,0) MBs are eligable for * refresh : that is to say Mbs likely to be background blocks. */ if (cpi->cyclic_refresh_map[i] == 0) { seg_map[i] = 1; block_count--; } else if (cpi->cyclic_refresh_map[i] < 0) { cpi->cyclic_refresh_map[i]++; } i++; if (i == mbs_in_frame) i = 0; } while (block_count && i != cpi->cyclic_refresh_mode_index); cpi->cyclic_refresh_mode_index = i; #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive && Q < (int)cpi->denoiser.denoise_pars.qp_thresh && (cpi->frames_since_key > 2 * cpi->denoiser.denoise_pars.consec_zerolast)) { // Under aggressive denoising, use segmentation to turn off loop // filter below some qp thresh. The filter is reduced for all // blocks that have been encoded as ZEROMV LAST x frames in a row, // where x is set by cpi->denoiser.denoise_pars.consec_zerolast. // This is to avoid "dot" artifacts that can occur from repeated // loop filtering on noisy input source. cpi->cyclic_refresh_q = Q; // lf_adjustment = -MAX_LOOP_FILTER; lf_adjustment = -40; for (i = 0; i < mbs_in_frame; ++i) { seg_map[i] = (cpi->consec_zero_last[i] > cpi->denoiser.denoise_pars.consec_zerolast) ? 1 : 0; } } } #endif } /* Activate segmentation. */ cpi->mb.e_mbd.update_mb_segmentation_map = 1; cpi->mb.e_mbd.update_mb_segmentation_data = 1; enable_segmentation(cpi); /* Set up the quant segment data */ feature_data[MB_LVL_ALT_Q][0] = 0; feature_data[MB_LVL_ALT_Q][1] = (cpi->cyclic_refresh_q - Q); feature_data[MB_LVL_ALT_Q][2] = 0; feature_data[MB_LVL_ALT_Q][3] = 0; /* Set up the loop segment data */ feature_data[MB_LVL_ALT_LF][0] = 0; feature_data[MB_LVL_ALT_LF][1] = lf_adjustment; feature_data[MB_LVL_ALT_LF][2] = 0; feature_data[MB_LVL_ALT_LF][3] = 0; /* Initialise the feature data structure */ set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); } static void compute_skin_map(VP8_COMP *cpi) { int mb_row, mb_col, num_bl; VP8_COMMON *cm = &cpi->common; const uint8_t *src_y = cpi->Source->y_buffer; const uint8_t *src_u = cpi->Source->u_buffer; const uint8_t *src_v = cpi->Source->v_buffer; const int src_ystride = cpi->Source->y_stride; const int src_uvstride = cpi->Source->uv_stride; const SKIN_DETECTION_BLOCK_SIZE bsize = (cm->Width * cm->Height <= 352 * 288) ? SKIN_8X8 : SKIN_16X16; for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { num_bl = 0; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { const int bl_index = mb_row * cm->mb_cols + mb_col; cpi->skin_map[bl_index] = vp8_compute_skin_block(src_y, src_u, src_v, src_ystride, src_uvstride, bsize, cpi->consec_zero_last[bl_index], 0); num_bl++; src_y += 16; src_u += 8; src_v += 8; } src_y += (src_ystride << 4) - (num_bl << 4); src_u += (src_uvstride << 3) - (num_bl << 3); src_v += (src_uvstride << 3) - (num_bl << 3); } // Remove isolated skin blocks (none of its neighbors are skin) and isolated // non-skin blocks (all of its neighbors are skin). Skip the boundary. for (mb_row = 1; mb_row < cm->mb_rows - 1; mb_row++) { for (mb_col = 1; mb_col < cm->mb_cols - 1; mb_col++) { const int bl_index = mb_row * cm->mb_cols + mb_col; int num_neighbor = 0; int mi, mj; int non_skin_threshold = 8; for (mi = -1; mi <= 1; mi += 1) { for (mj = -1; mj <= 1; mj += 1) { int bl_neighbor_index = (mb_row + mi) * cm->mb_cols + mb_col + mj; if (cpi->skin_map[bl_neighbor_index]) num_neighbor++; } } if (cpi->skin_map[bl_index] && num_neighbor < 2) cpi->skin_map[bl_index] = 0; if (!cpi->skin_map[bl_index] && num_neighbor == non_skin_threshold) cpi->skin_map[bl_index] = 1; } } } static void set_default_lf_deltas(VP8_COMP *cpi) { cpi->mb.e_mbd.mode_ref_lf_delta_enabled = 1; cpi->mb.e_mbd.mode_ref_lf_delta_update = 1; memset(cpi->mb.e_mbd.ref_lf_deltas, 0, sizeof(cpi->mb.e_mbd.ref_lf_deltas)); memset(cpi->mb.e_mbd.mode_lf_deltas, 0, sizeof(cpi->mb.e_mbd.mode_lf_deltas)); /* Test of ref frame deltas */ cpi->mb.e_mbd.ref_lf_deltas[INTRA_FRAME] = 2; cpi->mb.e_mbd.ref_lf_deltas[LAST_FRAME] = 0; cpi->mb.e_mbd.ref_lf_deltas[GOLDEN_FRAME] = -2; cpi->mb.e_mbd.ref_lf_deltas[ALTREF_FRAME] = -2; cpi->mb.e_mbd.mode_lf_deltas[0] = 4; /* BPRED */ if (cpi->oxcf.Mode == MODE_REALTIME) { cpi->mb.e_mbd.mode_lf_deltas[1] = -12; /* Zero */ } else { cpi->mb.e_mbd.mode_lf_deltas[1] = -2; /* Zero */ } cpi->mb.e_mbd.mode_lf_deltas[2] = 2; /* New mv */ cpi->mb.e_mbd.mode_lf_deltas[3] = 4; /* Split mv */ } /* Convenience macros for mapping speed and mode into a continuous * range */ #define GOOD(x) ((x) + 1) #define RT(x) ((x) + 7) static int speed_map(int speed, const int *map) { int res; do { res = *map++; } while (speed >= *map++); return res; } static const int thresh_mult_map_znn[] = { /* map common to zero, nearest, and near */ 0, GOOD(2), 1500, GOOD(3), 2000, RT(0), 1000, RT(2), 2000, INT_MAX }; static const int thresh_mult_map_vhpred[] = { 1000, GOOD(2), 1500, GOOD(3), 2000, RT(0), 1000, RT(1), 2000, RT(7), INT_MAX, INT_MAX }; static const int thresh_mult_map_bpred[] = { 2000, GOOD(0), 2500, GOOD(2), 5000, GOOD(3), 7500, RT(0), 2500, RT(1), 5000, RT(6), INT_MAX, INT_MAX }; static const int thresh_mult_map_tm[] = { 1000, GOOD(2), 1500, GOOD(3), 2000, RT(0), 0, RT(1), 1000, RT(2), 2000, RT(7), INT_MAX, INT_MAX }; static const int thresh_mult_map_new1[] = { 1000, GOOD(2), 2000, RT(0), 2000, INT_MAX }; static const int thresh_mult_map_new2[] = { 1000, GOOD(2), 2000, GOOD(3), 2500, GOOD(5), 4000, RT(0), 2000, RT(2), 2500, RT(5), 4000, INT_MAX }; static const int thresh_mult_map_split1[] = { 2500, GOOD(0), 1700, GOOD(2), 10000, GOOD(3), 25000, GOOD(4), INT_MAX, RT(0), 5000, RT(1), 10000, RT(2), 25000, RT(3), INT_MAX, INT_MAX }; static const int thresh_mult_map_split2[] = { 5000, GOOD(0), 4500, GOOD(2), 20000, GOOD(3), 50000, GOOD(4), INT_MAX, RT(0), 10000, RT(1), 20000, RT(2), 50000, RT(3), INT_MAX, INT_MAX }; static const int mode_check_freq_map_zn2[] = { /* {zero,nearest}{2,3} */ 0, RT(10), 1 << 1, RT(11), 1 << 2, RT(12), 1 << 3, INT_MAX }; static const int mode_check_freq_map_vhbpred[] = { 0, GOOD(5), 2, RT(0), 0, RT(3), 2, RT(5), 4, INT_MAX }; static const int mode_check_freq_map_near2[] = { 0, GOOD(5), 2, RT(0), 0, RT(3), 2, RT(10), 1 << 2, RT(11), 1 << 3, RT(12), 1 << 4, INT_MAX }; static const int mode_check_freq_map_new1[] = { 0, RT(10), 1 << 1, RT(11), 1 << 2, RT(12), 1 << 3, INT_MAX }; static const int mode_check_freq_map_new2[] = { 0, GOOD(5), 4, RT(0), 0, RT(3), 4, RT(10), 1 << 3, RT(11), 1 << 4, RT(12), 1 << 5, INT_MAX }; static const int mode_check_freq_map_split1[] = { 0, GOOD(2), 2, GOOD(3), 7, RT(1), 2, RT(2), 7, INT_MAX }; static const int mode_check_freq_map_split2[] = { 0, GOOD(1), 2, GOOD(2), 4, GOOD(3), 15, RT(1), 4, RT(2), 15, INT_MAX }; void vp8_set_speed_features(VP8_COMP *cpi) { SPEED_FEATURES *sf = &cpi->sf; int Mode = cpi->compressor_speed; int Speed = cpi->Speed; int Speed2; int i; VP8_COMMON *cm = &cpi->common; int last_improved_quant = sf->improved_quant; int ref_frames; /* Initialise default mode frequency sampling variables */ for (i = 0; i < MAX_MODES; ++i) { cpi->mode_check_freq[i] = 0; } cpi->mb.mbs_tested_so_far = 0; cpi->mb.mbs_zero_last_dot_suppress = 0; /* best quality defaults */ sf->RD = 1; sf->search_method = NSTEP; sf->improved_quant = 1; sf->improved_dct = 1; sf->auto_filter = 1; sf->recode_loop = 1; sf->quarter_pixel_search = 1; sf->half_pixel_search = 1; sf->iterative_sub_pixel = 1; sf->optimize_coefficients = 1; sf->use_fastquant_for_pick = 0; sf->no_skip_block4x4_search = 1; sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; sf->improved_mv_pred = 1; /* default thresholds to 0 */ for (i = 0; i < MAX_MODES; ++i) sf->thresh_mult[i] = 0; /* Count enabled references */ ref_frames = 1; if (cpi->ref_frame_flags & VP8_LAST_FRAME) ref_frames++; if (cpi->ref_frame_flags & VP8_GOLD_FRAME) ref_frames++; if (cpi->ref_frame_flags & VP8_ALTR_FRAME) ref_frames++; /* Convert speed to continuous range, with clamping */ if (Mode == 0) { Speed = 0; } else if (Mode == 2) { Speed = RT(Speed); } else { if (Speed > 5) Speed = 5; Speed = GOOD(Speed); } sf->thresh_mult[THR_ZERO1] = sf->thresh_mult[THR_NEAREST1] = sf->thresh_mult[THR_NEAR1] = sf->thresh_mult[THR_DC] = 0; /* always */ sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO3] = sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST3] = sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR3] = speed_map(Speed, thresh_mult_map_znn); sf->thresh_mult[THR_V_PRED] = sf->thresh_mult[THR_H_PRED] = speed_map(Speed, thresh_mult_map_vhpred); sf->thresh_mult[THR_B_PRED] = speed_map(Speed, thresh_mult_map_bpred); sf->thresh_mult[THR_TM] = speed_map(Speed, thresh_mult_map_tm); sf->thresh_mult[THR_NEW1] = speed_map(Speed, thresh_mult_map_new1); sf->thresh_mult[THR_NEW2] = sf->thresh_mult[THR_NEW3] = speed_map(Speed, thresh_mult_map_new2); sf->thresh_mult[THR_SPLIT1] = speed_map(Speed, thresh_mult_map_split1); sf->thresh_mult[THR_SPLIT2] = sf->thresh_mult[THR_SPLIT3] = speed_map(Speed, thresh_mult_map_split2); // Special case for temporal layers. // Reduce the thresholds for zero/nearest/near for GOLDEN, if GOLDEN is // used as second reference. We don't modify thresholds for ALTREF case // since ALTREF is usually used as long-term reference in temporal layers. if ((cpi->Speed <= 6) && (cpi->oxcf.number_of_layers > 1) && (cpi->ref_frame_flags & VP8_LAST_FRAME) && (cpi->ref_frame_flags & VP8_GOLD_FRAME)) { if (cpi->closest_reference_frame == GOLDEN_FRAME) { sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 3; sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 3; sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 3; } else { sf->thresh_mult[THR_ZERO2] = sf->thresh_mult[THR_ZERO2] >> 1; sf->thresh_mult[THR_NEAREST2] = sf->thresh_mult[THR_NEAREST2] >> 1; sf->thresh_mult[THR_NEAR2] = sf->thresh_mult[THR_NEAR2] >> 1; } } cpi->mode_check_freq[THR_ZERO1] = cpi->mode_check_freq[THR_NEAREST1] = cpi->mode_check_freq[THR_NEAR1] = cpi->mode_check_freq[THR_TM] = cpi->mode_check_freq[THR_DC] = 0; /* always */ cpi->mode_check_freq[THR_ZERO2] = cpi->mode_check_freq[THR_ZERO3] = cpi->mode_check_freq[THR_NEAREST2] = cpi->mode_check_freq[THR_NEAREST3] = speed_map(Speed, mode_check_freq_map_zn2); cpi->mode_check_freq[THR_NEAR2] = cpi->mode_check_freq[THR_NEAR3] = speed_map(Speed, mode_check_freq_map_near2); cpi->mode_check_freq[THR_V_PRED] = cpi->mode_check_freq[THR_H_PRED] = cpi->mode_check_freq[THR_B_PRED] = speed_map(Speed, mode_check_freq_map_vhbpred); // For real-time mode at speed 10 keep the mode_check_freq threshold // for NEW1 similar to that of speed 9. Speed2 = Speed; if (cpi->Speed == 10 && Mode == 2) Speed2 = RT(9); cpi->mode_check_freq[THR_NEW1] = speed_map(Speed2, mode_check_freq_map_new1); cpi->mode_check_freq[THR_NEW2] = cpi->mode_check_freq[THR_NEW3] = speed_map(Speed, mode_check_freq_map_new2); cpi->mode_check_freq[THR_SPLIT1] = speed_map(Speed, mode_check_freq_map_split1); cpi->mode_check_freq[THR_SPLIT2] = cpi->mode_check_freq[THR_SPLIT3] = speed_map(Speed, mode_check_freq_map_split2); Speed = cpi->Speed; switch (Mode) { #if !CONFIG_REALTIME_ONLY case 0: /* best quality mode */ sf->first_step = 0; sf->max_step_search_steps = MAX_MVSEARCH_STEPS; break; case 1: case 3: if (Speed > 0) { /* Disable coefficient optimization above speed 0 */ sf->optimize_coefficients = 0; sf->use_fastquant_for_pick = 1; sf->no_skip_block4x4_search = 0; sf->first_step = 1; } if (Speed > 2) { sf->improved_quant = 0; sf->improved_dct = 0; /* Only do recode loop on key frames, golden frames and * alt ref frames */ sf->recode_loop = 2; } if (Speed > 3) { sf->auto_filter = 1; sf->recode_loop = 0; /* recode loop off */ sf->RD = 0; /* Turn rd off */ } if (Speed > 4) { sf->auto_filter = 0; /* Faster selection of loop filter */ } break; #endif case 2: sf->optimize_coefficients = 0; sf->recode_loop = 0; sf->auto_filter = 1; sf->iterative_sub_pixel = 1; sf->search_method = NSTEP; if (Speed > 0) { sf->improved_quant = 0; sf->improved_dct = 0; sf->use_fastquant_for_pick = 1; sf->no_skip_block4x4_search = 0; sf->first_step = 1; } if (Speed > 2) sf->auto_filter = 0; /* Faster selection of loop filter */ if (Speed > 3) { sf->RD = 0; sf->auto_filter = 1; } if (Speed > 4) { sf->auto_filter = 0; /* Faster selection of loop filter */ sf->search_method = HEX; sf->iterative_sub_pixel = 0; } if (Speed > 6) { unsigned int sum = 0; unsigned int total_mbs = cm->MBs; int thresh; unsigned int total_skip; int min = 2000; if (cpi->oxcf.encode_breakout > 2000) min = cpi->oxcf.encode_breakout; min >>= 7; for (i = 0; i < min; ++i) { sum += cpi->mb.error_bins[i]; } total_skip = sum; sum = 0; /* i starts from 2 to make sure thresh started from 2048 */ for (; i < 1024; ++i) { sum += cpi->mb.error_bins[i]; if (10 * sum >= (unsigned int)(cpi->Speed - 6) * (total_mbs - total_skip)) { break; } } i--; thresh = (i << 7); if (thresh < 2000) thresh = 2000; if (ref_frames > 1) { sf->thresh_mult[THR_NEW1] = thresh; sf->thresh_mult[THR_NEAREST1] = thresh >> 1; sf->thresh_mult[THR_NEAR1] = thresh >> 1; } if (ref_frames > 2) { sf->thresh_mult[THR_NEW2] = thresh << 1; sf->thresh_mult[THR_NEAREST2] = thresh; sf->thresh_mult[THR_NEAR2] = thresh; } if (ref_frames > 3) { sf->thresh_mult[THR_NEW3] = thresh << 1; sf->thresh_mult[THR_NEAREST3] = thresh; sf->thresh_mult[THR_NEAR3] = thresh; } sf->improved_mv_pred = 0; } if (Speed > 8) sf->quarter_pixel_search = 0; if (cm->version == 0) { cm->filter_type = NORMAL_LOOPFILTER; if (Speed >= 14) cm->filter_type = SIMPLE_LOOPFILTER; } else { cm->filter_type = SIMPLE_LOOPFILTER; } /* This has a big hit on quality. Last resort */ if (Speed >= 15) sf->half_pixel_search = 0; memset(cpi->mb.error_bins, 0, sizeof(cpi->mb.error_bins)); }; /* switch */ /* Slow quant, dct and trellis not worthwhile for first pass * so make sure they are always turned off. */ if (cpi->pass == 1) { sf->improved_quant = 0; sf->optimize_coefficients = 0; sf->improved_dct = 0; } if (cpi->sf.search_method == NSTEP) { vp8_init3smotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride); } else if (cpi->sf.search_method == DIAMOND) { vp8_init_dsmotion_compensation(&cpi->mb, cm->yv12_fb[cm->lst_fb_idx].y_stride); } if (cpi->sf.improved_dct) { cpi->mb.short_fdct8x4 = vp8_short_fdct8x4; cpi->mb.short_fdct4x4 = vp8_short_fdct4x4; } else { /* No fast FDCT defined for any platform at this time. */ cpi->mb.short_fdct8x4 = vp8_short_fdct8x4; cpi->mb.short_fdct4x4 = vp8_short_fdct4x4; } cpi->mb.short_walsh4x4 = vp8_short_walsh4x4; if (cpi->sf.improved_quant) { cpi->mb.quantize_b = vp8_regular_quantize_b; } else { cpi->mb.quantize_b = vp8_fast_quantize_b; } if (cpi->sf.improved_quant != last_improved_quant) vp8cx_init_quantizer(cpi); if (cpi->sf.iterative_sub_pixel == 1) { cpi->find_fractional_mv_step = vp8_find_best_sub_pixel_step_iteratively; } else if (cpi->sf.quarter_pixel_search) { cpi->find_fractional_mv_step = vp8_find_best_sub_pixel_step; } else if (cpi->sf.half_pixel_search) { cpi->find_fractional_mv_step = vp8_find_best_half_pixel_step; } else { cpi->find_fractional_mv_step = vp8_skip_fractional_mv_step; } if (cpi->sf.optimize_coefficients == 1 && cpi->pass != 1) { cpi->mb.optimize = 1; } else { cpi->mb.optimize = 0; } if (cpi->common.full_pixel) { cpi->find_fractional_mv_step = vp8_skip_fractional_mv_step; } #ifdef SPEEDSTATS frames_at_speed[cpi->Speed]++; #endif } #undef GOOD #undef RT static void alloc_raw_frame_buffers(VP8_COMP *cpi) { #if VP8_TEMPORAL_ALT_REF int width = (cpi->oxcf.Width + 15) & ~15; int height = (cpi->oxcf.Height + 15) & ~15; #endif cpi->lookahead = vp8_lookahead_init(cpi->oxcf.Width, cpi->oxcf.Height, cpi->oxcf.lag_in_frames); if (!cpi->lookahead) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffers"); } #if VP8_TEMPORAL_ALT_REF if (vp8_yv12_alloc_frame_buffer(&cpi->alt_ref_buffer, width, height, VP8BORDERINPIXELS)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); } #endif } static void dealloc_raw_frame_buffers(VP8_COMP *cpi) { #if VP8_TEMPORAL_ALT_REF vp8_yv12_de_alloc_frame_buffer(&cpi->alt_ref_buffer); #endif vp8_lookahead_destroy(cpi->lookahead); } static int vp8_alloc_partition_data(VP8_COMP *cpi) { vpx_free(cpi->mb.pip); cpi->mb.pip = vpx_calloc((cpi->common.mb_cols + 1) * (cpi->common.mb_rows + 1), sizeof(PARTITION_INFO)); if (!cpi->mb.pip) return 1; cpi->mb.pi = cpi->mb.pip + cpi->common.mode_info_stride + 1; return 0; } void vp8_alloc_compressor_data(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; int width = cm->Width; int height = cm->Height; if (vp8_alloc_frame_buffers(cm, width, height)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); } if (vp8_alloc_partition_data(cpi)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate partition data"); } if ((width & 0xf) != 0) width += 16 - (width & 0xf); if ((height & 0xf) != 0) height += 16 - (height & 0xf); if (vp8_yv12_alloc_frame_buffer(&cpi->pick_lf_lvl_frame, width, height, VP8BORDERINPIXELS)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); } if (vp8_yv12_alloc_frame_buffer(&cpi->scaled_source, width, height, VP8BORDERINPIXELS)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); } vpx_free(cpi->tok); { #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING unsigned int tokens = 8 * 24 * 16; /* one MB for each thread */ #else unsigned int tokens = cm->mb_rows * cm->mb_cols * 24 * 16; #endif CHECK_MEM_ERROR(cpi->tok, vpx_calloc(tokens, sizeof(*cpi->tok))); } /* Data used for real time vc mode to see if gf needs refreshing */ cpi->zeromv_count = 0; /* Structures used to monitor GF usage */ vpx_free(cpi->gf_active_flags); CHECK_MEM_ERROR( cpi->gf_active_flags, vpx_calloc(sizeof(*cpi->gf_active_flags), cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; vpx_free(cpi->mb_activity_map); CHECK_MEM_ERROR( cpi->mb_activity_map, vpx_calloc(sizeof(*cpi->mb_activity_map), cm->mb_rows * cm->mb_cols)); /* allocate memory for storing last frame's MVs for MV prediction. */ vpx_free(cpi->lfmv); CHECK_MEM_ERROR(cpi->lfmv, vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2), sizeof(*cpi->lfmv))); vpx_free(cpi->lf_ref_frame_sign_bias); CHECK_MEM_ERROR(cpi->lf_ref_frame_sign_bias, vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2), sizeof(*cpi->lf_ref_frame_sign_bias))); vpx_free(cpi->lf_ref_frame); CHECK_MEM_ERROR(cpi->lf_ref_frame, vpx_calloc((cm->mb_rows + 2) * (cm->mb_cols + 2), sizeof(*cpi->lf_ref_frame))); /* Create the encoder segmentation map and set all entries to 0 */ vpx_free(cpi->segmentation_map); CHECK_MEM_ERROR( cpi->segmentation_map, vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->segmentation_map))); cpi->cyclic_refresh_mode_index = 0; vpx_free(cpi->active_map); CHECK_MEM_ERROR(cpi->active_map, vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->active_map))); memset(cpi->active_map, 1, (cm->mb_rows * cm->mb_cols)); #if CONFIG_MULTITHREAD if (width < 640) { cpi->mt_sync_range = 1; } else if (width <= 1280) { cpi->mt_sync_range = 4; } else if (width <= 2560) { cpi->mt_sync_range = 8; } else { cpi->mt_sync_range = 16; } if (cpi->oxcf.multi_threaded > 1) { int i; vpx_free(cpi->mt_current_mb_col); CHECK_MEM_ERROR(cpi->mt_current_mb_col, vpx_malloc(sizeof(*cpi->mt_current_mb_col) * cm->mb_rows)); for (i = 0; i < cm->mb_rows; ++i) vpx_atomic_init(&cpi->mt_current_mb_col[i], 0); } #endif vpx_free(cpi->tplist); CHECK_MEM_ERROR(cpi->tplist, vpx_malloc(sizeof(TOKENLIST) * cm->mb_rows)); #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { vp8_denoiser_free(&cpi->denoiser); if (vp8_denoiser_allocate(&cpi->denoiser, width, height, cm->mb_rows, cm->mb_cols, cpi->oxcf.noise_sensitivity)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate denoiser"); } } #endif } /* Quant MOD */ static const int q_trans[] = { 0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 12, 13, 15, 17, 18, 19, 20, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 33, 35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59, 61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97, 100, 103, 106, 109, 112, 115, 118, 121, 124, 127, }; int vp8_reverse_trans(int x) { int i; for (i = 0; i < 64; ++i) { if (q_trans[i] >= x) return i; } return 63; } void vp8_new_framerate(VP8_COMP *cpi, double framerate) { if (framerate < .1) framerate = 30; cpi->framerate = framerate; cpi->output_framerate = framerate; cpi->per_frame_bandwidth = (int)(cpi->oxcf.target_bandwidth / cpi->output_framerate); cpi->av_per_frame_bandwidth = cpi->per_frame_bandwidth; cpi->min_frame_bandwidth = (int)(cpi->av_per_frame_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); /* Set Maximum gf/arf interval */ cpi->max_gf_interval = ((int)(cpi->output_framerate / 2.0) + 2); if (cpi->max_gf_interval < 12) cpi->max_gf_interval = 12; /* Extended interval for genuinely static scenes */ cpi->twopass.static_scene_max_gf_interval = cpi->key_frame_frequency >> 1; /* Special conditions when altr ref frame enabled in lagged compress mode */ if (cpi->oxcf.play_alternate && cpi->oxcf.lag_in_frames) { if (cpi->max_gf_interval > cpi->oxcf.lag_in_frames - 1) { cpi->max_gf_interval = cpi->oxcf.lag_in_frames - 1; } if (cpi->twopass.static_scene_max_gf_interval > cpi->oxcf.lag_in_frames - 1) { cpi->twopass.static_scene_max_gf_interval = cpi->oxcf.lag_in_frames - 1; } } if (cpi->max_gf_interval > cpi->twopass.static_scene_max_gf_interval) { cpi->max_gf_interval = cpi->twopass.static_scene_max_gf_interval; } } static void init_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { VP8_COMMON *cm = &cpi->common; cpi->oxcf = *oxcf; cpi->auto_gold = 1; cpi->auto_adjust_gold_quantizer = 1; cm->version = oxcf->Version; vp8_setup_version(cm); /* Frame rate is not available on the first frame, as it's derived from * the observed timestamps. The actual value used here doesn't matter * too much, as it will adapt quickly. */ if (oxcf->timebase.num > 0) { cpi->framerate = (double)(oxcf->timebase.den) / (double)(oxcf->timebase.num); } else { cpi->framerate = 30; } /* If the reciprocal of the timebase seems like a reasonable framerate, * then use that as a guess, otherwise use 30. */ if (cpi->framerate > 180) cpi->framerate = 30; cpi->ref_framerate = cpi->framerate; cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 1; cm->refresh_entropy_probs = 1; /* change includes all joint functionality */ vp8_change_config(cpi, oxcf); /* Initialize active best and worst q and average q values. */ cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; cpi->active_best_quality = cpi->oxcf.best_allowed_q; cpi->avg_frame_qindex = cpi->oxcf.worst_allowed_q; /* Initialise the starting buffer levels */ cpi->buffer_level = cpi->oxcf.starting_buffer_level; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; cpi->rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_target_bits = cpi->av_per_frame_bandwidth; cpi->long_rolling_actual_bits = cpi->av_per_frame_bandwidth; cpi->total_actual_bits = 0; cpi->total_target_vs_actual = 0; /* Temporal scalabilty */ if (cpi->oxcf.number_of_layers > 1) { unsigned int i; double prev_layer_framerate = 0; for (i = 0; i < cpi->oxcf.number_of_layers; ++i) { init_temporal_layer_context(cpi, oxcf, i, prev_layer_framerate); prev_layer_framerate = cpi->output_framerate / cpi->oxcf.rate_decimator[i]; } } #if VP8_TEMPORAL_ALT_REF { int i; cpi->fixed_divide[0] = 0; for (i = 1; i < 512; ++i) cpi->fixed_divide[i] = 0x80000 / i; } #endif } static void update_layer_contexts(VP8_COMP *cpi) { VP8_CONFIG *oxcf = &cpi->oxcf; /* Update snapshots of the layer contexts to reflect new parameters */ if (oxcf->number_of_layers > 1) { unsigned int i; double prev_layer_framerate = 0; assert(oxcf->number_of_layers <= VPX_TS_MAX_LAYERS); for (i = 0; i < oxcf->number_of_layers && i < VPX_TS_MAX_LAYERS; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->framerate = cpi->ref_framerate / oxcf->rate_decimator[i]; lc->target_bandwidth = oxcf->target_bitrate[i] * 1000; lc->starting_buffer_level = rescale( (int)oxcf->starting_buffer_level_in_ms, lc->target_bandwidth, 1000); if (oxcf->optimal_buffer_level == 0) { lc->optimal_buffer_level = lc->target_bandwidth / 8; } else { lc->optimal_buffer_level = rescale( (int)oxcf->optimal_buffer_level_in_ms, lc->target_bandwidth, 1000); } if (oxcf->maximum_buffer_size == 0) { lc->maximum_buffer_size = lc->target_bandwidth / 8; } else { lc->maximum_buffer_size = rescale((int)oxcf->maximum_buffer_size_in_ms, lc->target_bandwidth, 1000); } /* Work out the average size of a frame within this layer */ if (i > 0) { lc->avg_frame_size_for_layer = (int)((oxcf->target_bitrate[i] - oxcf->target_bitrate[i - 1]) * 1000 / (lc->framerate - prev_layer_framerate)); } prev_layer_framerate = lc->framerate; } } } void vp8_change_config(VP8_COMP *cpi, VP8_CONFIG *oxcf) { VP8_COMMON *cm = &cpi->common; int last_w, last_h; unsigned int prev_number_of_layers; if (!cpi) return; if (!oxcf) return; if (cm->version != oxcf->Version) { cm->version = oxcf->Version; vp8_setup_version(cm); } last_w = cpi->oxcf.Width; last_h = cpi->oxcf.Height; prev_number_of_layers = cpi->oxcf.number_of_layers; cpi->oxcf = *oxcf; switch (cpi->oxcf.Mode) { case MODE_REALTIME: cpi->pass = 0; cpi->compressor_speed = 2; if (cpi->oxcf.cpu_used < -16) { cpi->oxcf.cpu_used = -16; } if (cpi->oxcf.cpu_used > 16) cpi->oxcf.cpu_used = 16; break; case MODE_GOODQUALITY: cpi->pass = 0; cpi->compressor_speed = 1; if (cpi->oxcf.cpu_used < -5) { cpi->oxcf.cpu_used = -5; } if (cpi->oxcf.cpu_used > 5) cpi->oxcf.cpu_used = 5; break; case MODE_BESTQUALITY: cpi->pass = 0; cpi->compressor_speed = 0; break; case MODE_FIRSTPASS: cpi->pass = 1; cpi->compressor_speed = 1; break; case MODE_SECONDPASS: cpi->pass = 2; cpi->compressor_speed = 1; if (cpi->oxcf.cpu_used < -5) { cpi->oxcf.cpu_used = -5; } if (cpi->oxcf.cpu_used > 5) cpi->oxcf.cpu_used = 5; break; case MODE_SECONDPASS_BEST: cpi->pass = 2; cpi->compressor_speed = 0; break; } if (cpi->pass == 0) cpi->auto_worst_q = 1; cpi->oxcf.worst_allowed_q = q_trans[oxcf->worst_allowed_q]; cpi->oxcf.best_allowed_q = q_trans[oxcf->best_allowed_q]; cpi->oxcf.cq_level = q_trans[cpi->oxcf.cq_level]; if (oxcf->fixed_q >= 0) { if (oxcf->worst_allowed_q < 0) { cpi->oxcf.fixed_q = q_trans[0]; } else { cpi->oxcf.fixed_q = q_trans[oxcf->worst_allowed_q]; } if (oxcf->alt_q < 0) { cpi->oxcf.alt_q = q_trans[0]; } else { cpi->oxcf.alt_q = q_trans[oxcf->alt_q]; } if (oxcf->key_q < 0) { cpi->oxcf.key_q = q_trans[0]; } else { cpi->oxcf.key_q = q_trans[oxcf->key_q]; } if (oxcf->gold_q < 0) { cpi->oxcf.gold_q = q_trans[0]; } else { cpi->oxcf.gold_q = q_trans[oxcf->gold_q]; } } cpi->ext_refresh_frame_flags_pending = 0; cpi->baseline_gf_interval = cpi->oxcf.alt_freq ? cpi->oxcf.alt_freq : DEFAULT_GF_INTERVAL; // GF behavior for 1 pass CBR, used when error_resilience is off. if (!cpi->oxcf.error_resilient_mode && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && cpi->oxcf.Mode == MODE_REALTIME) cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr; #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) cpi->oxcf.token_partitions = 3; #endif if (cpi->oxcf.token_partitions >= 0 && cpi->oxcf.token_partitions <= 3) { cm->multi_token_partition = (TOKEN_PARTITION)cpi->oxcf.token_partitions; } setup_features(cpi); if (!cpi->use_roi_static_threshold) { int i; for (i = 0; i < MAX_MB_SEGMENTS; ++i) { cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; } } /* At the moment the first order values may not be > MAXQ */ if (cpi->oxcf.fixed_q > MAXQ) cpi->oxcf.fixed_q = MAXQ; /* local file playback mode == really big buffer */ if (cpi->oxcf.end_usage == USAGE_LOCAL_FILE_PLAYBACK) { cpi->oxcf.starting_buffer_level = 60000; cpi->oxcf.optimal_buffer_level = 60000; cpi->oxcf.maximum_buffer_size = 240000; cpi->oxcf.starting_buffer_level_in_ms = 60000; cpi->oxcf.optimal_buffer_level_in_ms = 60000; cpi->oxcf.maximum_buffer_size_in_ms = 240000; } /* Convert target bandwidth from Kbit/s to Bit/s */ cpi->oxcf.target_bandwidth *= 1000; cpi->oxcf.starting_buffer_level = rescale( (int)cpi->oxcf.starting_buffer_level, cpi->oxcf.target_bandwidth, 1000); /* Set or reset optimal and maximum buffer levels. */ if (cpi->oxcf.optimal_buffer_level == 0) { cpi->oxcf.optimal_buffer_level = cpi->oxcf.target_bandwidth / 8; } else { cpi->oxcf.optimal_buffer_level = rescale( (int)cpi->oxcf.optimal_buffer_level, cpi->oxcf.target_bandwidth, 1000); } if (cpi->oxcf.maximum_buffer_size == 0) { cpi->oxcf.maximum_buffer_size = cpi->oxcf.target_bandwidth / 8; } else { cpi->oxcf.maximum_buffer_size = rescale((int)cpi->oxcf.maximum_buffer_size, cpi->oxcf.target_bandwidth, 1000); } // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) { cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; cpi->buffer_level = cpi->bits_off_target; } /* Set up frame rate and related parameters rate control values. */ vp8_new_framerate(cpi, cpi->framerate); /* Set absolute upper and lower quality limits */ cpi->worst_quality = cpi->oxcf.worst_allowed_q; cpi->best_quality = cpi->oxcf.best_allowed_q; /* active values should only be modified if out of new range */ if (cpi->active_worst_quality > cpi->oxcf.worst_allowed_q) { cpi->active_worst_quality = cpi->oxcf.worst_allowed_q; } /* less likely */ else if (cpi->active_worst_quality < cpi->oxcf.best_allowed_q) { cpi->active_worst_quality = cpi->oxcf.best_allowed_q; } if (cpi->active_best_quality < cpi->oxcf.best_allowed_q) { cpi->active_best_quality = cpi->oxcf.best_allowed_q; } /* less likely */ else if (cpi->active_best_quality > cpi->oxcf.worst_allowed_q) { cpi->active_best_quality = cpi->oxcf.worst_allowed_q; } cpi->buffered_mode = cpi->oxcf.optimal_buffer_level > 0; cpi->cq_target_quality = cpi->oxcf.cq_level; /* Only allow dropped frames in buffered mode */ cpi->drop_frames_allowed = cpi->oxcf.allow_df && cpi->buffered_mode; cpi->target_bandwidth = cpi->oxcf.target_bandwidth; // Check if the number of temporal layers has changed, and if so reset the // pattern counter and set/initialize the temporal layer context for the // new layer configuration. if (cpi->oxcf.number_of_layers != prev_number_of_layers) { // If the number of temporal layers are changed we must start at the // base of the pattern cycle, so set the layer id to 0 and reset // the temporal pattern counter. if (cpi->temporal_layer_id > 0) { cpi->temporal_layer_id = 0; } cpi->temporal_pattern_counter = 0; reset_temporal_layer_change(cpi, oxcf, prev_number_of_layers); } if (!cpi->initial_width) { cpi->initial_width = cpi->oxcf.Width; cpi->initial_height = cpi->oxcf.Height; } cm->Width = cpi->oxcf.Width; cm->Height = cpi->oxcf.Height; assert(cm->Width <= cpi->initial_width); assert(cm->Height <= cpi->initial_height); /* TODO(jkoleszar): if an internal spatial resampling is active, * and we downsize the input image, maybe we should clear the * internal scale immediately rather than waiting for it to * correct. */ /* VP8 sharpness level mapping 0-7 (vs 0-10 in general VPx dialogs) */ if (cpi->oxcf.Sharpness > 7) cpi->oxcf.Sharpness = 7; cm->sharpness_level = cpi->oxcf.Sharpness; if (cm->horiz_scale != NORMAL || cm->vert_scale != NORMAL) { int hr, hs, vr, vs; Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); /* always go to the next whole number */ cm->Width = (hs - 1 + cpi->oxcf.Width * hr) / hs; cm->Height = (vs - 1 + cpi->oxcf.Height * vr) / vs; } if (last_w != cpi->oxcf.Width || last_h != cpi->oxcf.Height) { cpi->force_next_frame_intra = 1; } if (((cm->Width + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_width || ((cm->Height + 15) & ~15) != cm->yv12_fb[cm->lst_fb_idx].y_height || cm->yv12_fb[cm->lst_fb_idx].y_width == 0) { dealloc_raw_frame_buffers(cpi); alloc_raw_frame_buffers(cpi); vp8_alloc_compressor_data(cpi); } if (cpi->oxcf.fixed_q >= 0) { cpi->last_q[0] = cpi->oxcf.fixed_q; cpi->last_q[1] = cpi->oxcf.fixed_q; } cpi->Speed = cpi->oxcf.cpu_used; /* force to allowlag to 0 if lag_in_frames is 0; */ if (cpi->oxcf.lag_in_frames == 0) { cpi->oxcf.allow_lag = 0; } /* Limit on lag buffers as these are not currently dynamically allocated */ else if (cpi->oxcf.lag_in_frames > MAX_LAG_BUFFERS) { cpi->oxcf.lag_in_frames = MAX_LAG_BUFFERS; } /* YX Temp */ cpi->alt_ref_source = NULL; cpi->is_src_frame_alt_ref = 0; #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { if (!cpi->denoiser.yv12_mc_running_avg.buffer_alloc) { int width = (cpi->oxcf.Width + 15) & ~15; int height = (cpi->oxcf.Height + 15) & ~15; if (vp8_denoiser_allocate(&cpi->denoiser, width, height, cm->mb_rows, cm->mb_cols, cpi->oxcf.noise_sensitivity)) { vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate denoiser"); } } } #endif #if 0 /* Experimental RD Code */ cpi->frame_distortion = 0; cpi->last_frame_distortion = 0; #endif } #ifndef M_LOG2_E #define M_LOG2_E 0.693147180559945309417 #endif #define log2f(x) (log(x) / (float)M_LOG2_E) static void cal_mvsadcosts(int *mvsadcost[2]) { int i = 1; mvsadcost[0][0] = 300; mvsadcost[1][0] = 300; do { double z = 256 * (2 * (log2f(8 * i) + .6)); mvsadcost[0][i] = (int)z; mvsadcost[1][i] = (int)z; mvsadcost[0][-i] = (int)z; mvsadcost[1][-i] = (int)z; } while (++i <= mvfp_max); } struct VP8_COMP *vp8_create_compressor(VP8_CONFIG *oxcf) { int i; VP8_COMP *cpi; VP8_COMMON *cm; cpi = vpx_memalign(32, sizeof(VP8_COMP)); /* Check that the CPI instance is valid */ if (!cpi) return 0; cm = &cpi->common; memset(cpi, 0, sizeof(VP8_COMP)); if (setjmp(cm->error.jmp)) { cpi->common.error.setjmp = 0; vp8_remove_compressor(&cpi); return 0; } cpi->common.error.setjmp = 1; CHECK_MEM_ERROR(cpi->mb.ss, vpx_calloc(sizeof(search_site), (MAX_MVSEARCH_STEPS * 8) + 1)); vp8_create_common(&cpi->common); init_config(cpi, oxcf); memcpy(cpi->base_skip_false_prob, vp8cx_base_skip_false_prob, sizeof(vp8cx_base_skip_false_prob)); cpi->common.current_video_frame = 0; cpi->temporal_pattern_counter = 0; cpi->temporal_layer_id = -1; cpi->kf_overspend_bits = 0; cpi->kf_bitrate_adjustment = 0; cpi->frames_till_gf_update_due = 0; cpi->gf_overspend_bits = 0; cpi->non_gf_bitrate_adjustment = 0; cpi->prob_last_coded = 128; cpi->prob_gf_coded = 128; cpi->prob_intra_coded = 63; /* Prime the recent reference frame usage counters. * Hereafter they will be maintained as a sort of moving average */ cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; /* Set reference frame sign bias for ALTREF frame to 1 (for now) */ cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; cpi->twopass.gf_decay_rate = 0; cpi->baseline_gf_interval = DEFAULT_GF_INTERVAL; cpi->gold_is_last = 0; cpi->alt_is_last = 0; cpi->gold_is_alt = 0; cpi->active_map_enabled = 0; cpi->use_roi_static_threshold = 0; #if 0 /* Experimental code for lagged and one pass */ /* Initialise one_pass GF frames stats */ /* Update stats used for GF selection */ if (cpi->pass == 0) { cpi->one_pass_frame_index = 0; for (i = 0; i < MAX_LAG_BUFFERS; ++i) { cpi->one_pass_frame_stats[i].frames_so_far = 0; cpi->one_pass_frame_stats[i].frame_intra_error = 0.0; cpi->one_pass_frame_stats[i].frame_coded_error = 0.0; cpi->one_pass_frame_stats[i].frame_pcnt_inter = 0.0; cpi->one_pass_frame_stats[i].frame_pcnt_motion = 0.0; cpi->one_pass_frame_stats[i].frame_mvr = 0.0; cpi->one_pass_frame_stats[i].frame_mvr_abs = 0.0; cpi->one_pass_frame_stats[i].frame_mvc = 0.0; cpi->one_pass_frame_stats[i].frame_mvc_abs = 0.0; } } #endif cpi->mse_source_denoised = 0; /* Should we use the cyclic refresh method. * Currently there is no external control for this. * Enable it for error_resilient_mode, or for 1 pass CBR mode. */ cpi->cyclic_refresh_mode_enabled = (cpi->oxcf.error_resilient_mode || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && cpi->oxcf.Mode <= 2)); cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 7; if (cpi->oxcf.number_of_layers == 1) { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 20; } else if (cpi->oxcf.number_of_layers == 2) { cpi->cyclic_refresh_mode_max_mbs_perframe = (cpi->common.mb_rows * cpi->common.mb_cols) / 10; } cpi->cyclic_refresh_mode_index = 0; cpi->cyclic_refresh_q = 32; // GF behavior for 1 pass CBR, used when error_resilience is off. cpi->gf_update_onepass_cbr = 0; cpi->gf_noboost_onepass_cbr = 0; if (!cpi->oxcf.error_resilient_mode && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && cpi->oxcf.Mode <= 2) { cpi->gf_update_onepass_cbr = 1; cpi->gf_noboost_onepass_cbr = 1; cpi->gf_interval_onepass_cbr = cpi->cyclic_refresh_mode_max_mbs_perframe > 0 ? (2 * (cpi->common.mb_rows * cpi->common.mb_cols) / cpi->cyclic_refresh_mode_max_mbs_perframe) : 10; cpi->gf_interval_onepass_cbr = VPXMIN(40, VPXMAX(6, cpi->gf_interval_onepass_cbr)); cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr; } if (cpi->cyclic_refresh_mode_enabled) { CHECK_MEM_ERROR(cpi->cyclic_refresh_map, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); } else { cpi->cyclic_refresh_map = (signed char *)NULL; } CHECK_MEM_ERROR(cpi->skin_map, vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(cpi->skin_map[0]))); CHECK_MEM_ERROR(cpi->consec_zero_last, vpx_calloc(cm->mb_rows * cm->mb_cols, 1)); CHECK_MEM_ERROR(cpi->consec_zero_last_mvbias, vpx_calloc((cpi->common.mb_rows * cpi->common.mb_cols), 1)); /*Initialize the feed-forward activity masking.*/ cpi->activity_avg = 90 << 12; /* Give a sensible default for the first frame. */ cpi->frames_since_key = 8; cpi->key_frame_frequency = cpi->oxcf.key_freq; cpi->this_key_frame_forced = 0; cpi->next_key_frame_forced = 0; cpi->source_alt_ref_pending = 0; cpi->source_alt_ref_active = 0; cpi->common.refresh_alt_ref_frame = 0; cpi->force_maxqp = 0; cpi->frames_since_last_drop_overshoot = 0; cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; #if CONFIG_INTERNAL_STATS cpi->b_calculate_ssimg = 0; cpi->count = 0; cpi->bytes = 0; if (cpi->b_calculate_psnr) { cpi->total_sq_error = 0.0; cpi->total_sq_error2 = 0.0; cpi->total_y = 0.0; cpi->total_u = 0.0; cpi->total_v = 0.0; cpi->total = 0.0; cpi->totalp_y = 0.0; cpi->totalp_u = 0.0; cpi->totalp_v = 0.0; cpi->totalp = 0.0; cpi->tot_recode_hits = 0; cpi->summed_quality = 0; cpi->summed_weights = 0; } #endif cpi->first_time_stamp_ever = 0x7FFFFFFF; cpi->frames_till_gf_update_due = 0; cpi->key_frame_count = 1; cpi->ni_av_qi = cpi->oxcf.worst_allowed_q; cpi->ni_tot_qi = 0; cpi->ni_frames = 0; cpi->total_byte_count = 0; cpi->drop_frame = 0; cpi->rate_correction_factor = 1.0; cpi->key_frame_rate_correction_factor = 1.0; cpi->gf_rate_correction_factor = 1.0; cpi->twopass.est_max_qcorrection_factor = 1.0; for (i = 0; i < KEY_FRAME_CONTEXT; ++i) { cpi->prior_key_frame_distance[i] = (int)cpi->output_framerate; } #ifdef OUTPUT_YUV_SRC yuv_file = fopen("bd.yuv", "ab"); #endif #ifdef OUTPUT_YUV_DENOISED yuv_denoised_file = fopen("denoised.yuv", "ab"); #endif #ifdef OUTPUT_YUV_SKINMAP yuv_skinmap_file = fopen("skinmap.yuv", "wb"); #endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); kf_list = fopen("kf_list.stt", "w"); #endif cpi->output_pkt_list = oxcf->output_pkt_list; #if !CONFIG_REALTIME_ONLY if (cpi->pass == 1) { vp8_init_first_pass(cpi); } else if (cpi->pass == 2) { size_t packet_sz = sizeof(FIRSTPASS_STATS); int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; cpi->twopass.stats_in_end = (void *)((char *)cpi->twopass.stats_in + (packets - 1) * packet_sz); vp8_init_second_pass(cpi); } #endif if (cpi->compressor_speed == 2) { cpi->avg_encode_time = 0; cpi->avg_pick_mode_time = 0; } vp8_set_speed_features(cpi); /* Set starting values of RD threshold multipliers (128 = *1) */ for (i = 0; i < MAX_MODES; ++i) { cpi->mb.rd_thresh_mult[i] = 128; } #if CONFIG_MULTITHREAD if (vp8cx_create_encoder_threads(cpi)) { vp8_remove_compressor(&cpi); return 0; } #endif cpi->fn_ptr[BLOCK_16X16].sdf = vpx_sad16x16; cpi->fn_ptr[BLOCK_16X16].vf = vpx_variance16x16; cpi->fn_ptr[BLOCK_16X16].svf = vpx_sub_pixel_variance16x16; cpi->fn_ptr[BLOCK_16X16].sdx3f = vpx_sad16x16x3; cpi->fn_ptr[BLOCK_16X16].sdx8f = vpx_sad16x16x8; cpi->fn_ptr[BLOCK_16X16].sdx4df = vpx_sad16x16x4d; cpi->fn_ptr[BLOCK_16X8].sdf = vpx_sad16x8; cpi->fn_ptr[BLOCK_16X8].vf = vpx_variance16x8; cpi->fn_ptr[BLOCK_16X8].svf = vpx_sub_pixel_variance16x8; cpi->fn_ptr[BLOCK_16X8].sdx3f = vpx_sad16x8x3; cpi->fn_ptr[BLOCK_16X8].sdx8f = vpx_sad16x8x8; cpi->fn_ptr[BLOCK_16X8].sdx4df = vpx_sad16x8x4d; cpi->fn_ptr[BLOCK_8X16].sdf = vpx_sad8x16; cpi->fn_ptr[BLOCK_8X16].vf = vpx_variance8x16; cpi->fn_ptr[BLOCK_8X16].svf = vpx_sub_pixel_variance8x16; cpi->fn_ptr[BLOCK_8X16].sdx3f = vpx_sad8x16x3; cpi->fn_ptr[BLOCK_8X16].sdx8f = vpx_sad8x16x8; cpi->fn_ptr[BLOCK_8X16].sdx4df = vpx_sad8x16x4d; cpi->fn_ptr[BLOCK_8X8].sdf = vpx_sad8x8; cpi->fn_ptr[BLOCK_8X8].vf = vpx_variance8x8; cpi->fn_ptr[BLOCK_8X8].svf = vpx_sub_pixel_variance8x8; cpi->fn_ptr[BLOCK_8X8].sdx3f = vpx_sad8x8x3; cpi->fn_ptr[BLOCK_8X8].sdx8f = vpx_sad8x8x8; cpi->fn_ptr[BLOCK_8X8].sdx4df = vpx_sad8x8x4d; cpi->fn_ptr[BLOCK_4X4].sdf = vpx_sad4x4; cpi->fn_ptr[BLOCK_4X4].vf = vpx_variance4x4; cpi->fn_ptr[BLOCK_4X4].svf = vpx_sub_pixel_variance4x4; cpi->fn_ptr[BLOCK_4X4].sdx3f = vpx_sad4x4x3; cpi->fn_ptr[BLOCK_4X4].sdx8f = vpx_sad4x4x8; cpi->fn_ptr[BLOCK_4X4].sdx4df = vpx_sad4x4x4d; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 cpi->fn_ptr[BLOCK_16X16].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_16X8].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_8X16].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_8X8].copymem = vp8_copy32xn; cpi->fn_ptr[BLOCK_4X4].copymem = vp8_copy32xn; #endif cpi->full_search_sad = vp8_full_search_sad; cpi->diamond_search_sad = vp8_diamond_search_sad; cpi->refining_search_sad = vp8_refining_search_sad; /* make sure frame 1 is okay */ cpi->mb.error_bins[0] = cpi->common.MBs; /* vp8cx_init_quantizer() is first called here. Add check in * vp8cx_frame_init_quantizer() so that vp8cx_init_quantizer is only * called later when needed. This will avoid unnecessary calls of * vp8cx_init_quantizer() for every frame. */ vp8cx_init_quantizer(cpi); vp8_loop_filter_init(cm); cpi->common.error.setjmp = 0; #if CONFIG_MULTI_RES_ENCODING /* Calculate # of MBs in a row in lower-resolution level image. */ if (cpi->oxcf.mr_encoder_id > 0) vp8_cal_low_res_mb_cols(cpi); #endif /* setup RD costs to MACROBLOCK struct */ cpi->mb.mvcost[0] = &cpi->rd_costs.mvcosts[0][mv_max + 1]; cpi->mb.mvcost[1] = &cpi->rd_costs.mvcosts[1][mv_max + 1]; cpi->mb.mvsadcost[0] = &cpi->rd_costs.mvsadcosts[0][mvfp_max + 1]; cpi->mb.mvsadcost[1] = &cpi->rd_costs.mvsadcosts[1][mvfp_max + 1]; cal_mvsadcosts(cpi->mb.mvsadcost); cpi->mb.mbmode_cost = cpi->rd_costs.mbmode_cost; cpi->mb.intra_uv_mode_cost = cpi->rd_costs.intra_uv_mode_cost; cpi->mb.bmode_costs = cpi->rd_costs.bmode_costs; cpi->mb.inter_bmode_costs = cpi->rd_costs.inter_bmode_costs; cpi->mb.token_costs = cpi->rd_costs.token_costs; /* setup block ptrs & offsets */ vp8_setup_block_ptrs(&cpi->mb); vp8_setup_block_dptrs(&cpi->mb.e_mbd); return cpi; } void vp8_remove_compressor(VP8_COMP **comp) { VP8_COMP *cpi = *comp; if (!cpi) return; if (cpi && (cpi->common.current_video_frame > 0)) { #if !CONFIG_REALTIME_ONLY if (cpi->pass == 2) { vp8_end_second_pass(cpi); } #endif #if CONFIG_INTERNAL_STATS if (cpi->pass != 1) { FILE *f = fopen("opsnr.stt", "a"); double time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / 10000000.000; double dr = (double)cpi->bytes * 8.0 / 1000.0 / time_encoded; if (cpi->b_calculate_psnr) { if (cpi->oxcf.number_of_layers > 1) { int i; fprintf(f, "Layer\tBitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" "GLPsnrP\tVPXSSIM\n"); for (i = 0; i < (int)cpi->oxcf.number_of_layers; ++i) { double dr = (double)cpi->bytes_in_layer[i] * 8.0 / 1000.0 / time_encoded; double samples = 3.0 / 2 * cpi->frames_in_layer[i] * cpi->common.Width * cpi->common.Height; double total_psnr = vpx_sse_to_psnr(samples, 255.0, cpi->total_error2[i]); double total_psnr2 = vpx_sse_to_psnr(samples, 255.0, cpi->total_error2_p[i]); double total_ssim = 100 * pow(cpi->sum_ssim[i] / cpi->sum_weights[i], 8.0); fprintf(f, "%5d\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" "%7.3f\t%7.3f\n", i, dr, cpi->sum_psnr[i] / cpi->frames_in_layer[i], total_psnr, cpi->sum_psnr_p[i] / cpi->frames_in_layer[i], total_psnr2, total_ssim); } } else { double samples = 3.0 / 2 * cpi->count * cpi->common.Width * cpi->common.Height; double total_psnr = vpx_sse_to_psnr(samples, 255.0, cpi->total_sq_error); double total_psnr2 = vpx_sse_to_psnr(samples, 255.0, cpi->total_sq_error2); double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); fprintf(f, "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\t" "GLPsnrP\tVPXSSIM\n"); fprintf(f, "%7.3f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" "%7.3f\n", dr, cpi->total / cpi->count, total_psnr, cpi->totalp / cpi->count, total_psnr2, total_ssim); } } fclose(f); #if 0 f = fopen("qskip.stt", "a"); fprintf(f, "minq:%d -maxq:%d skiptrue:skipfalse = %d:%d\n", cpi->oxcf.best_allowed_q, cpi->oxcf.worst_allowed_q, skiptruecount, skipfalsecount); fclose(f); #endif } #endif #ifdef SPEEDSTATS if (cpi->compressor_speed == 2) { int i; FILE *f = fopen("cxspeed.stt", "a"); cnt_pm /= cpi->common.MBs; for (i = 0; i < 16; ++i) fprintf(f, "%5d", frames_at_speed[i]); fprintf(f, "\n"); fclose(f); } #endif #ifdef MODE_STATS { extern int count_mb_seg[4]; FILE *f = fopen("modes.stt", "a"); double dr = (double)cpi->framerate * (double)bytes * (double)8 / (double)count / (double)1000; fprintf(f, "intra_mode in Intra Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d\n", y_modes[0], y_modes[1], y_modes[2], y_modes[3], y_modes[4]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", uv_modes[0], uv_modes[1], uv_modes[2], uv_modes[3]); fprintf(f, "B: "); { int i; for (i = 0; i < 10; ++i) fprintf(f, "%8d, ", b_modes[i]); fprintf(f, "\n"); } fprintf(f, "Modes in Inter Frames:\n"); fprintf(f, "Y: %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d, %8d\n", inter_y_modes[0], inter_y_modes[1], inter_y_modes[2], inter_y_modes[3], inter_y_modes[4], inter_y_modes[5], inter_y_modes[6], inter_y_modes[7], inter_y_modes[8], inter_y_modes[9]); fprintf(f, "UV:%8d, %8d, %8d, %8d\n", inter_uv_modes[0], inter_uv_modes[1], inter_uv_modes[2], inter_uv_modes[3]); fprintf(f, "B: "); { int i; for (i = 0; i < 15; ++i) fprintf(f, "%8d, ", inter_b_modes[i]); fprintf(f, "\n"); } fprintf(f, "P:%8d, %8d, %8d, %8d\n", count_mb_seg[0], count_mb_seg[1], count_mb_seg[2], count_mb_seg[3]); fprintf(f, "PB:%8d, %8d, %8d, %8d\n", inter_b_modes[LEFT4X4], inter_b_modes[ABOVE4X4], inter_b_modes[ZERO4X4], inter_b_modes[NEW4X4]); fclose(f); } #endif #if defined(SECTIONBITS_OUTPUT) if (0) { int i; FILE *f = fopen("tokenbits.stt", "a"); for (i = 0; i < 28; ++i) fprintf(f, "%8d", (int)(Sectionbits[i] / 256)); fprintf(f, "\n"); fclose(f); } #endif #if 0 { printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); printf("\n_frames recive_data encod_mb_row compress_frame Total\n"); printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_mb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000); } #endif } #if CONFIG_MULTITHREAD vp8cx_remove_encoder_threads(cpi); #endif #if CONFIG_TEMPORAL_DENOISING vp8_denoiser_free(&cpi->denoiser); #endif dealloc_compressor_data(cpi); vpx_free(cpi->mb.ss); vpx_free(cpi->tok); vpx_free(cpi->skin_map); vpx_free(cpi->cyclic_refresh_map); vpx_free(cpi->consec_zero_last); vpx_free(cpi->consec_zero_last_mvbias); vp8_remove_common(&cpi->common); vpx_free(cpi); *comp = 0; #ifdef OUTPUT_YUV_SRC fclose(yuv_file); #endif #ifdef OUTPUT_YUV_DENOISED fclose(yuv_denoised_file); #endif #ifdef OUTPUT_YUV_SKINMAP fclose(yuv_skinmap_file); #endif #if 0 if (keyfile) fclose(keyfile); if (framepsnr) fclose(framepsnr); if (kf_list) fclose(kf_list); #endif } static uint64_t calc_plane_error(unsigned char *orig, int orig_stride, unsigned char *recon, int recon_stride, unsigned int cols, unsigned int rows) { unsigned int row, col; uint64_t total_sse = 0; int diff; for (row = 0; row + 16 <= rows; row += 16) { for (col = 0; col + 16 <= cols; col += 16) { unsigned int sse; vpx_mse16x16(orig + col, orig_stride, recon + col, recon_stride, &sse); total_sse += sse; } /* Handle odd-sized width */ if (col < cols) { unsigned int border_row, border_col; unsigned char *border_orig = orig; unsigned char *border_recon = recon; for (border_row = 0; border_row < 16; ++border_row) { for (border_col = col; border_col < cols; ++border_col) { diff = border_orig[border_col] - border_recon[border_col]; total_sse += diff * diff; } border_orig += orig_stride; border_recon += recon_stride; } } orig += orig_stride * 16; recon += recon_stride * 16; } /* Handle odd-sized height */ for (; row < rows; ++row) { for (col = 0; col < cols; ++col) { diff = orig[col] - recon[col]; total_sse += diff * diff; } orig += orig_stride; recon += recon_stride; } vpx_clear_system_state(); return total_sse; } static void generate_psnr_packet(VP8_COMP *cpi) { YV12_BUFFER_CONFIG *orig = cpi->Source; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; struct vpx_codec_cx_pkt pkt; uint64_t sse; int i; unsigned int width = cpi->common.Width; unsigned int height = cpi->common.Height; pkt.kind = VPX_CODEC_PSNR_PKT; sse = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, width, height); pkt.data.psnr.sse[0] = sse; pkt.data.psnr.sse[1] = sse; pkt.data.psnr.samples[0] = width * height; pkt.data.psnr.samples[1] = width * height; width = (width + 1) / 2; height = (height + 1) / 2; sse = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[2] = sse; pkt.data.psnr.samples[0] += width * height; pkt.data.psnr.samples[2] = width * height; sse = calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, width, height); pkt.data.psnr.sse[0] += sse; pkt.data.psnr.sse[3] = sse; pkt.data.psnr.samples[0] += width * height; pkt.data.psnr.samples[3] = width * height; for (i = 0; i < 4; ++i) { pkt.data.psnr.psnr[i] = vpx_sse_to_psnr(pkt.data.psnr.samples[i], 255.0, (double)(pkt.data.psnr.sse[i])); } vpx_codec_pkt_list_add(cpi->output_pkt_list, &pkt); } int vp8_use_as_reference(VP8_COMP *cpi, int ref_frame_flags) { if (ref_frame_flags > 7) return -1; cpi->ref_frame_flags = ref_frame_flags; return 0; } int vp8_update_reference(VP8_COMP *cpi, int ref_frame_flags) { if (ref_frame_flags > 7) return -1; cpi->common.refresh_golden_frame = 0; cpi->common.refresh_alt_ref_frame = 0; cpi->common.refresh_last_frame = 0; if (ref_frame_flags & VP8_LAST_FRAME) cpi->common.refresh_last_frame = 1; if (ref_frame_flags & VP8_GOLD_FRAME) cpi->common.refresh_golden_frame = 1; if (ref_frame_flags & VP8_ALTR_FRAME) cpi->common.refresh_alt_ref_frame = 1; cpi->ext_refresh_frame_flags_pending = 1; return 0; } int vp8_get_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &cpi->common; int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FRAME) { ref_fb_idx = cm->lst_fb_idx; } else if (ref_frame_flag == VP8_GOLD_FRAME) { ref_fb_idx = cm->gld_fb_idx; } else if (ref_frame_flag == VP8_ALTR_FRAME) { ref_fb_idx = cm->alt_fb_idx; } else { return -1; } vp8_yv12_copy_frame(&cm->yv12_fb[ref_fb_idx], sd); return 0; } int vp8_set_reference(VP8_COMP *cpi, enum vpx_ref_frame_type ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP8_COMMON *cm = &cpi->common; int ref_fb_idx; if (ref_frame_flag == VP8_LAST_FRAME) { ref_fb_idx = cm->lst_fb_idx; } else if (ref_frame_flag == VP8_GOLD_FRAME) { ref_fb_idx = cm->gld_fb_idx; } else if (ref_frame_flag == VP8_ALTR_FRAME) { ref_fb_idx = cm->alt_fb_idx; } else { return -1; } vp8_yv12_copy_frame(sd, &cm->yv12_fb[ref_fb_idx]); return 0; } int vp8_update_entropy(VP8_COMP *cpi, int update) { VP8_COMMON *cm = &cpi->common; cm->refresh_entropy_probs = update; return 0; } static void scale_and_extend_source(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; /* are we resizing the image */ if (cm->horiz_scale != 0 || cm->vert_scale != 0) { #if CONFIG_SPATIAL_RESAMPLING int hr, hs, vr, vs; int tmp_height; if (cm->vert_scale == 3) { tmp_height = 9; } else { tmp_height = 11; } Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); vpx_scale_frame(sd, &cpi->scaled_source, cm->temp_scale_frame.y_buffer, tmp_height, hs, hr, vs, vr, 0); vp8_yv12_extend_frame_borders(&cpi->scaled_source); cpi->Source = &cpi->scaled_source; #endif } else { cpi->Source = sd; } } static int resize_key_frame(VP8_COMP *cpi) { #if CONFIG_SPATIAL_RESAMPLING VP8_COMMON *cm = &cpi->common; /* Do we need to apply resampling for one pass cbr. * In one pass this is more limited than in two pass cbr. * The test and any change is only made once per key frame sequence. */ if (cpi->oxcf.allow_spatial_resampling && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) { int hr, hs, vr, vs; int new_width, new_height; /* If we are below the resample DOWN watermark then scale down a * notch. */ if (cpi->buffer_level < (cpi->oxcf.resample_down_water_mark * cpi->oxcf.optimal_buffer_level / 100)) { cm->horiz_scale = (cm->horiz_scale < ONETWO) ? cm->horiz_scale + 1 : ONETWO; cm->vert_scale = (cm->vert_scale < ONETWO) ? cm->vert_scale + 1 : ONETWO; } /* Should we now start scaling back up */ else if (cpi->buffer_level > (cpi->oxcf.resample_up_water_mark * cpi->oxcf.optimal_buffer_level / 100)) { cm->horiz_scale = (cm->horiz_scale > NORMAL) ? cm->horiz_scale - 1 : NORMAL; cm->vert_scale = (cm->vert_scale > NORMAL) ? cm->vert_scale - 1 : NORMAL; } /* Get the new height and width */ Scale2Ratio(cm->horiz_scale, &hr, &hs); Scale2Ratio(cm->vert_scale, &vr, &vs); new_width = ((hs - 1) + (cpi->oxcf.Width * hr)) / hs; new_height = ((vs - 1) + (cpi->oxcf.Height * vr)) / vs; /* If the image size has changed we need to reallocate the buffers * and resample the source image */ if ((cm->Width != new_width) || (cm->Height != new_height)) { cm->Width = new_width; cm->Height = new_height; vp8_alloc_compressor_data(cpi); scale_and_extend_source(cpi->un_scaled_source, cpi); return 1; } } #endif return 0; } static void update_alt_ref_frame_stats(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; /* Select an interval before next GF or altref */ if (!cpi->auto_gold) cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; if ((cpi->pass != 2) && cpi->frames_till_gf_update_due) { cpi->current_gf_interval = cpi->frames_till_gf_update_due; /* Set the bits per frame that we should try and recover in * subsequent inter frames to account for the extra GF spend... * note that his does not apply for GF updates that occur * coincident with a key frame as the extra cost of key frames is * dealt with elsewhere. */ cpi->gf_overspend_bits += cpi->projected_frame_size; cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due; } /* Update data structure that monitors level of reference to last GF */ memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; /* this frame refreshes means next frames don't unless specified by user */ cpi->frames_since_golden = 0; /* Clear the alternate reference update pending flag. */ cpi->source_alt_ref_pending = 0; /* Set the alternate reference frame active flag */ cpi->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; /* Update the Golden frame usage counts. */ if (cm->refresh_golden_frame) { /* Select an interval before next GF */ if (!cpi->auto_gold) cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; if ((cpi->pass != 2) && (cpi->frames_till_gf_update_due > 0)) { cpi->current_gf_interval = cpi->frames_till_gf_update_due; /* Set the bits per frame that we should try and recover in * subsequent inter frames to account for the extra GF spend... * note that his does not apply for GF updates that occur * coincident with a key frame as the extra cost of key frames * is dealt with elsewhere. */ if ((cm->frame_type != KEY_FRAME) && !cpi->source_alt_ref_active) { /* Calcluate GF bits to be recovered * Projected size - av frame bits available for inter * frames for clip as a whole */ cpi->gf_overspend_bits += (cpi->projected_frame_size - cpi->inter_frame_target); } cpi->non_gf_bitrate_adjustment = cpi->gf_overspend_bits / cpi->frames_till_gf_update_due; } /* Update data structure that monitors level of reference to last GF */ memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; /* this frame refreshes means next frames don't unless specified by * user */ cm->refresh_golden_frame = 0; cpi->frames_since_golden = 0; cpi->recent_ref_frame_usage[INTRA_FRAME] = 1; cpi->recent_ref_frame_usage[LAST_FRAME] = 1; cpi->recent_ref_frame_usage[GOLDEN_FRAME] = 1; cpi->recent_ref_frame_usage[ALTREF_FRAME] = 1; /* ******** Fixed Q test code only ************ */ /* If we are going to use the ALT reference for the next group of * frames set a flag to say so. */ if (cpi->oxcf.fixed_q >= 0 && cpi->oxcf.play_alternate && !cpi->common.refresh_alt_ref_frame) { cpi->source_alt_ref_pending = 1; cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } if (!cpi->source_alt_ref_pending) cpi->source_alt_ref_active = 0; /* Decrement count down till next gf */ if (cpi->frames_till_gf_update_due > 0) cpi->frames_till_gf_update_due--; } else if (!cpi->common.refresh_alt_ref_frame) { /* Decrement count down till next gf */ if (cpi->frames_till_gf_update_due > 0) cpi->frames_till_gf_update_due--; if (cpi->frames_till_alt_ref_frame) cpi->frames_till_alt_ref_frame--; cpi->frames_since_golden++; if (cpi->frames_since_golden > 1) { cpi->recent_ref_frame_usage[INTRA_FRAME] += cpi->mb.count_mb_ref_frame_usage[INTRA_FRAME]; cpi->recent_ref_frame_usage[LAST_FRAME] += cpi->mb.count_mb_ref_frame_usage[LAST_FRAME]; cpi->recent_ref_frame_usage[GOLDEN_FRAME] += cpi->mb.count_mb_ref_frame_usage[GOLDEN_FRAME]; cpi->recent_ref_frame_usage[ALTREF_FRAME] += cpi->mb.count_mb_ref_frame_usage[ALTREF_FRAME]; } } } /* This function updates the reference frame probability estimates that * will be used during mode selection */ static void update_rd_ref_frame_probs(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; const int *const rfct = cpi->mb.count_mb_ref_frame_usage; const int rf_intra = rfct[INTRA_FRAME]; const int rf_inter = rfct[LAST_FRAME] + rfct[GOLDEN_FRAME] + rfct[ALTREF_FRAME]; if (cm->frame_type == KEY_FRAME) { cpi->prob_intra_coded = 255; cpi->prob_last_coded = 128; cpi->prob_gf_coded = 128; } else if (!(rf_intra + rf_inter)) { cpi->prob_intra_coded = 63; cpi->prob_last_coded = 128; cpi->prob_gf_coded = 128; } /* update reference frame costs since we can do better than what we got * last frame. */ if (cpi->oxcf.number_of_layers == 1) { if (cpi->common.refresh_alt_ref_frame) { cpi->prob_intra_coded += 40; if (cpi->prob_intra_coded > 255) cpi->prob_intra_coded = 255; cpi->prob_last_coded = 200; cpi->prob_gf_coded = 1; } else if (cpi->frames_since_golden == 0) { cpi->prob_last_coded = 214; } else if (cpi->frames_since_golden == 1) { cpi->prob_last_coded = 192; cpi->prob_gf_coded = 220; } else if (cpi->source_alt_ref_active) { cpi->prob_gf_coded -= 20; if (cpi->prob_gf_coded < 10) cpi->prob_gf_coded = 10; } if (!cpi->source_alt_ref_active) cpi->prob_gf_coded = 255; } } #if !CONFIG_REALTIME_ONLY /* 1 = key, 0 = inter */ static int decide_key_frame(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; int code_key_frame = 0; cpi->kf_boost = 0; if (cpi->Speed > 11) return 0; /* Clear down mmx registers */ vpx_clear_system_state(); if ((cpi->compressor_speed == 2) && (cpi->Speed >= 5) && (cpi->sf.RD == 0)) { double change = 1.0 * abs((int)(cpi->mb.intra_error - cpi->last_intra_error)) / (1 + cpi->last_intra_error); double change2 = 1.0 * abs((int)(cpi->mb.prediction_error - cpi->last_prediction_error)) / (1 + cpi->last_prediction_error); double minerror = cm->MBs * 256; cpi->last_intra_error = cpi->mb.intra_error; cpi->last_prediction_error = cpi->mb.prediction_error; if (10 * cpi->mb.intra_error / (1 + cpi->mb.prediction_error) < 15 && cpi->mb.prediction_error > minerror && (change > .25 || change2 > .25)) { /*(change > 1.4 || change < .75)&& cpi->this_frame_percent_intra > * cpi->last_frame_percent_intra + 3*/ return 1; } return 0; } /* If the following are true we might as well code a key frame */ if (((cpi->this_frame_percent_intra == 100) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 2))) || ((cpi->this_frame_percent_intra > 95) && (cpi->this_frame_percent_intra >= (cpi->last_frame_percent_intra + 5)))) { code_key_frame = 1; } /* in addition if the following are true and this is not a golden frame * then code a key frame Note that on golden frames there often seems * to be a pop in intra useage anyway hence this restriction is * designed to prevent spurious key frames. The Intra pop needs to be * investigated. */ else if (((cpi->this_frame_percent_intra > 60) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 2))) || ((cpi->this_frame_percent_intra > 75) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra * 3 / 2))) || ((cpi->this_frame_percent_intra > 90) && (cpi->this_frame_percent_intra > (cpi->last_frame_percent_intra + 10)))) { if (!cm->refresh_golden_frame) code_key_frame = 1; } return code_key_frame; } static void Pass1Encode(VP8_COMP *cpi) { vp8_set_quantizer(cpi, 26); vp8_first_pass(cpi); } #endif #if 0 void write_cx_frame_to_file(YV12_BUFFER_CONFIG *frame, int this_frame) { /* write the frame */ FILE *yframe; int i; char filename[255]; sprintf(filename, "cx\\y%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->y_height; ++i) fwrite(frame->y_buffer + i * frame->y_stride, frame->y_width, 1, yframe); fclose(yframe); sprintf(filename, "cx\\u%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; ++i) fwrite(frame->u_buffer + i * frame->uv_stride, frame->uv_width, 1, yframe); fclose(yframe); sprintf(filename, "cx\\v%04d.raw", this_frame); yframe = fopen(filename, "wb"); for (i = 0; i < frame->uv_height; ++i) fwrite(frame->v_buffer + i * frame->uv_stride, frame->uv_width, 1, yframe); fclose(yframe); } #endif #if !CONFIG_REALTIME_ONLY /* Function to test for conditions that indeicate we should loop * back and recode a frame. */ static int recode_loop_test(VP8_COMP *cpi, int high_limit, int low_limit, int q, int maxq, int minq) { int force_recode = 0; VP8_COMMON *cm = &cpi->common; /* Is frame recode allowed at all * Yes if either recode mode 1 is selected or mode two is selcted * and the frame is a key frame. golden frame or alt_ref_frame */ if ((cpi->sf.recode_loop == 1) || ((cpi->sf.recode_loop == 2) && ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cm->refresh_alt_ref_frame))) { /* General over and under shoot tests */ if (((cpi->projected_frame_size > high_limit) && (q < maxq)) || ((cpi->projected_frame_size < low_limit) && (q > minq))) { force_recode = 1; } /* Special Constrained quality tests */ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { /* Undershoot and below auto cq level */ if ((q > cpi->cq_target_quality) && (cpi->projected_frame_size < ((cpi->this_frame_target * 7) >> 3))) { force_recode = 1; } /* Severe undershoot and between auto and user cq level */ else if ((q > cpi->oxcf.cq_level) && (cpi->projected_frame_size < cpi->min_frame_bandwidth) && (cpi->active_best_quality > cpi->oxcf.cq_level)) { force_recode = 1; cpi->active_best_quality = cpi->oxcf.cq_level; } } } return force_recode; } #endif // !CONFIG_REALTIME_ONLY static void update_reference_frames(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; YV12_BUFFER_CONFIG *yv12_fb = cm->yv12_fb; /* At this point the new frame has been encoded. * If any buffer copy / swapping is signaled it should be done here. */ if (cm->frame_type == KEY_FRAME) { yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME | VP8_ALTR_FRAME; yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->gld_fb_idx = cm->new_fb_idx; cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; } else { if (cm->refresh_alt_ref_frame) { assert(!cm->copy_buffer_to_arf); cm->yv12_fb[cm->new_fb_idx].flags |= VP8_ALTR_FRAME; cm->yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->new_fb_idx; cpi->current_ref_frames[ALTREF_FRAME] = cm->current_video_frame; } else if (cm->copy_buffer_to_arf) { assert(!(cm->copy_buffer_to_arf & ~0x3)); if (cm->copy_buffer_to_arf == 1) { if (cm->alt_fb_idx != cm->lst_fb_idx) { yv12_fb[cm->lst_fb_idx].flags |= VP8_ALTR_FRAME; yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->lst_fb_idx; cpi->current_ref_frames[ALTREF_FRAME] = cpi->current_ref_frames[LAST_FRAME]; } } else { if (cm->alt_fb_idx != cm->gld_fb_idx) { yv12_fb[cm->gld_fb_idx].flags |= VP8_ALTR_FRAME; yv12_fb[cm->alt_fb_idx].flags &= ~VP8_ALTR_FRAME; cm->alt_fb_idx = cm->gld_fb_idx; cpi->current_ref_frames[ALTREF_FRAME] = cpi->current_ref_frames[GOLDEN_FRAME]; } } } if (cm->refresh_golden_frame) { assert(!cm->copy_buffer_to_gf); cm->yv12_fb[cm->new_fb_idx].flags |= VP8_GOLD_FRAME; cm->yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->new_fb_idx; cpi->current_ref_frames[GOLDEN_FRAME] = cm->current_video_frame; } else if (cm->copy_buffer_to_gf) { assert(!(cm->copy_buffer_to_arf & ~0x3)); if (cm->copy_buffer_to_gf == 1) { if (cm->gld_fb_idx != cm->lst_fb_idx) { yv12_fb[cm->lst_fb_idx].flags |= VP8_GOLD_FRAME; yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->lst_fb_idx; cpi->current_ref_frames[GOLDEN_FRAME] = cpi->current_ref_frames[LAST_FRAME]; } } else { if (cm->alt_fb_idx != cm->gld_fb_idx) { yv12_fb[cm->alt_fb_idx].flags |= VP8_GOLD_FRAME; yv12_fb[cm->gld_fb_idx].flags &= ~VP8_GOLD_FRAME; cm->gld_fb_idx = cm->alt_fb_idx; cpi->current_ref_frames[GOLDEN_FRAME] = cpi->current_ref_frames[ALTREF_FRAME]; } } } } if (cm->refresh_last_frame) { cm->yv12_fb[cm->new_fb_idx].flags |= VP8_LAST_FRAME; cm->yv12_fb[cm->lst_fb_idx].flags &= ~VP8_LAST_FRAME; cm->lst_fb_idx = cm->new_fb_idx; cpi->current_ref_frames[LAST_FRAME] = cm->current_video_frame; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { /* we shouldn't have to keep multiple copies as we know in advance which * buffer we should start - for now to get something up and running * I've chosen to copy the buffers */ if (cm->frame_type == KEY_FRAME) { int i; for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) vp8_yv12_copy_frame(cpi->Source, &cpi->denoiser.yv12_running_avg[i]); } else { vp8_yv12_extend_frame_borders( &cpi->denoiser.yv12_running_avg[INTRA_FRAME]); if (cm->refresh_alt_ref_frame || cm->copy_buffer_to_arf) { vp8_yv12_copy_frame(&cpi->denoiser.yv12_running_avg[INTRA_FRAME], &cpi->denoiser.yv12_running_avg[ALTREF_FRAME]); } if (cm->refresh_golden_frame || cm->copy_buffer_to_gf) { vp8_yv12_copy_frame(&cpi->denoiser.yv12_running_avg[INTRA_FRAME], &cpi->denoiser.yv12_running_avg[GOLDEN_FRAME]); } if (cm->refresh_last_frame) { vp8_yv12_copy_frame(&cpi->denoiser.yv12_running_avg[INTRA_FRAME], &cpi->denoiser.yv12_running_avg[LAST_FRAME]); } } if (cpi->oxcf.noise_sensitivity == 4) vp8_yv12_copy_frame(cpi->Source, &cpi->denoiser.yv12_last_source); } #endif } static int measure_square_diff_partial(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest, VP8_COMP *cpi) { int i, j; int Total = 0; int num_blocks = 0; int skip = 2; int min_consec_zero_last = 10; int tot_num_blocks = (source->y_height * source->y_width) >> 8; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; /* Loop through the Y plane, every |skip| blocks along rows and colmumns, * summing the square differences, and only for blocks that have been * zero_last mode at least |x| frames in a row. */ for (i = 0; i < source->y_height; i += 16 * skip) { int block_index_row = (i >> 4) * cpi->common.mb_cols; for (j = 0; j < source->y_width; j += 16 * skip) { int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); num_blocks++; } } src += 16 * skip * source->y_stride; dst += 16 * skip * dest->y_stride; } // Only return non-zero if we have at least ~1/16 samples for estimate. if (num_blocks > (tot_num_blocks >> 4)) { assert(num_blocks != 0); return (Total / num_blocks); } else { return 0; } } #if CONFIG_TEMPORAL_DENOISING static void process_denoiser_mode_change(VP8_COMP *cpi) { const VP8_COMMON *const cm = &cpi->common; int i, j; int total = 0; int num_blocks = 0; // Number of blocks skipped along row/column in computing the // nmse (normalized mean square error) of source. int skip = 2; // Only select blocks for computing nmse that have been encoded // as ZERO LAST min_consec_zero_last frames in a row. // Scale with number of temporal layers. int min_consec_zero_last = 12 / cpi->oxcf.number_of_layers; // Decision is tested for changing the denoising mode every // num_mode_change times this function is called. Note that this // function called every 8 frames, so (8 * num_mode_change) is number // of frames where denoising mode change is tested for switch. int num_mode_change = 20; // Framerate factor, to compensate for larger mse at lower framerates. // Use ref_framerate, which is full source framerate for temporal layers. // TODO(marpan): Adjust this factor. int fac_framerate = cpi->ref_framerate < 25.0f ? 80 : 100; int tot_num_blocks = cm->mb_rows * cm->mb_cols; int ystride = cpi->Source->y_stride; unsigned char *src = cpi->Source->y_buffer; unsigned char *dst = cpi->denoiser.yv12_last_source.y_buffer; static const unsigned char const_source[16] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; int bandwidth = (int)(cpi->target_bandwidth); // For temporal layers, use full bandwidth (top layer). if (cpi->oxcf.number_of_layers > 1) { LAYER_CONTEXT *lc = &cpi->layer_context[cpi->oxcf.number_of_layers - 1]; bandwidth = (int)(lc->target_bandwidth); } // Loop through the Y plane, every skip blocks along rows and columns, // summing the normalized mean square error, only for blocks that have // been encoded as ZEROMV LAST at least min_consec_zero_last least frames in // a row and have small sum difference between current and previous frame. // Normalization here is by the contrast of the current frame block. for (i = 0; i < cm->Height; i += 16 * skip) { int block_index_row = (i >> 4) * cm->mb_cols; for (j = 0; j < cm->Width; j += 16 * skip) { int index = block_index_row + (j >> 4); if (cpi->consec_zero_last[index] >= min_consec_zero_last) { unsigned int sse; const unsigned int var = vpx_variance16x16(src + j, ystride, dst + j, ystride, &sse); // Only consider this block as valid for noise measurement // if the sum_diff average of the current and previous frame // is small (to avoid effects from lighting change). if ((sse - var) < 128) { unsigned int sse2; const unsigned int act = vpx_variance16x16(src + j, ystride, const_source, 0, &sse2); if (act > 0) total += sse / act; num_blocks++; } } } src += 16 * skip * ystride; dst += 16 * skip * ystride; } total = total * fac_framerate / 100; // Only consider this frame as valid sample if we have computed nmse over // at least ~1/16 blocks, and Total > 0 (Total == 0 can happen if the // application inputs duplicate frames, or contrast is all zero). if (total > 0 && (num_blocks > (tot_num_blocks >> 4))) { // Update the recursive mean square source_diff. total = (total << 8) / num_blocks; if (cpi->denoiser.nmse_source_diff_count == 0) { // First sample in new interval. cpi->denoiser.nmse_source_diff = total; cpi->denoiser.qp_avg = cm->base_qindex; } else { // For subsequent samples, use average with weight ~1/4 for new sample. cpi->denoiser.nmse_source_diff = (int)((total + 3 * cpi->denoiser.nmse_source_diff) >> 2); cpi->denoiser.qp_avg = (int)((cm->base_qindex + 3 * cpi->denoiser.qp_avg) >> 2); } cpi->denoiser.nmse_source_diff_count++; } // Check for changing the denoiser mode, when we have obtained #samples = // num_mode_change. Condition the change also on the bitrate and QP. if (cpi->denoiser.nmse_source_diff_count == num_mode_change) { // Check for going up: from normal to aggressive mode. if ((cpi->denoiser.denoiser_mode == kDenoiserOnYUV) && (cpi->denoiser.nmse_source_diff > cpi->denoiser.threshold_aggressive_mode) && (cpi->denoiser.qp_avg < cpi->denoiser.qp_threshold_up && bandwidth > cpi->denoiser.bitrate_threshold)) { vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUVAggressive); } else { // Check for going down: from aggressive to normal mode. if (((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) && (cpi->denoiser.nmse_source_diff < cpi->denoiser.threshold_aggressive_mode)) || ((cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) && (cpi->denoiser.qp_avg > cpi->denoiser.qp_threshold_down || bandwidth < cpi->denoiser.bitrate_threshold))) { vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV); } } // Reset metric and counter for next interval. cpi->denoiser.nmse_source_diff = 0; cpi->denoiser.qp_avg = 0; cpi->denoiser.nmse_source_diff_count = 0; } } #endif void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm) { const FRAME_TYPE frame_type = cm->frame_type; int update_any_ref_buffers = 1; if (cpi->common.refresh_last_frame == 0 && cpi->common.refresh_golden_frame == 0 && cpi->common.refresh_alt_ref_frame == 0) { update_any_ref_buffers = 0; } if (cm->no_lpf) { cm->filter_level = 0; } else { struct vpx_usec_timer timer; vpx_clear_system_state(); vpx_usec_timer_start(&timer); if (cpi->sf.auto_filter == 0) { #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) { // Use the denoised buffer for selecting base loop filter level. // Denoised signal for current frame is stored in INTRA_FRAME. // No denoising on key frames. vp8cx_pick_filter_level_fast( &cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi); } else { vp8cx_pick_filter_level_fast(cpi->Source, cpi); } #else vp8cx_pick_filter_level_fast(cpi->Source, cpi); #endif } else { #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity && cm->frame_type != KEY_FRAME) { // Use the denoised buffer for selecting base loop filter level. // Denoised signal for current frame is stored in INTRA_FRAME. // No denoising on key frames. vp8cx_pick_filter_level(&cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi); } else { vp8cx_pick_filter_level(cpi->Source, cpi); } #else vp8cx_pick_filter_level(cpi->Source, cpi); #endif } if (cm->filter_level > 0) { vp8cx_set_alt_lf_level(cpi, cm->filter_level); } vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { sem_post(&cpi->h_event_end_lpf); /* signal that we have set filter_level */ } #endif // No need to apply loop-filter if the encoded frame does not update // any reference buffers. if (cm->filter_level > 0 && update_any_ref_buffers) { vp8_loop_filter_frame(cm, &cpi->mb.e_mbd, frame_type); } vp8_yv12_extend_frame_borders(cm->frame_to_show); } static void encode_frame_to_data_rate(VP8_COMP *cpi, size_t *size, unsigned char *dest, unsigned char *dest_end, unsigned int *frame_flags) { int Q; int frame_over_shoot_limit; int frame_under_shoot_limit; int Loop = 0; int loop_count; VP8_COMMON *cm = &cpi->common; int active_worst_qchanged = 0; #if !CONFIG_REALTIME_ONLY int q_low; int q_high; int zbin_oq_high; int zbin_oq_low = 0; int top_index; int bottom_index; int overshoot_seen = 0; int undershoot_seen = 0; #endif int drop_mark = (int)(cpi->oxcf.drop_frames_water_mark * cpi->oxcf.optimal_buffer_level / 100); int drop_mark75 = drop_mark * 2 / 3; int drop_mark50 = drop_mark / 4; int drop_mark25 = drop_mark / 8; /* Clear down mmx registers to allow floating point in what follows */ vpx_clear_system_state(); if (cpi->force_next_frame_intra) { cm->frame_type = KEY_FRAME; /* delayed intra frame */ cpi->force_next_frame_intra = 0; } /* For an alt ref frame in 2 pass we skip the call to the second pass * function that sets the target bandwidth */ switch (cpi->pass) { #if !CONFIG_REALTIME_ONLY case 2: if (cpi->common.refresh_alt_ref_frame) { /* Per frame bit target for the alt ref frame */ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; /* per second target bitrate */ cpi->target_bandwidth = (int)(cpi->twopass.gf_bits * cpi->output_framerate); } break; #endif // !CONFIG_REALTIME_ONLY default: cpi->per_frame_bandwidth = (int)(cpi->target_bandwidth / cpi->output_framerate); break; } /* Default turn off buffer to buffer copying */ cm->copy_buffer_to_gf = 0; cm->copy_buffer_to_arf = 0; /* Clear zbin over-quant value and mode boost values. */ cpi->mb.zbin_over_quant = 0; cpi->mb.zbin_mode_boost = 0; /* Enable or disable mode based tweaking of the zbin * For 2 Pass Only used where GF/ARF prediction quality * is above a threshold */ cpi->mb.zbin_mode_boost_enabled = 1; if (cpi->pass == 2) { if (cpi->gfu_boost <= 400) { cpi->mb.zbin_mode_boost_enabled = 0; } } /* Current default encoder behaviour for the altref sign bias */ if (cpi->source_alt_ref_active) { cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 1; } else { cpi->common.ref_frame_sign_bias[ALTREF_FRAME] = 0; } /* Check to see if a key frame is signaled * For two pass with auto key frame enabled cm->frame_type may already * be set, but not for one pass. */ if ((cm->current_video_frame == 0) || (cm->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (cpi->frames_since_key % cpi->key_frame_frequency == 0))) { /* Key frame from VFW/auto-keyframe/first frame */ cm->frame_type = KEY_FRAME; #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity == 4) { // For adaptive mode, reset denoiser to normal mode on key frame. vp8_denoiser_set_parameters(&cpi->denoiser, kDenoiserOnYUV); } #endif } #if CONFIG_MULTI_RES_ENCODING if (cpi->oxcf.mr_total_resolutions > 1) { LOWER_RES_FRAME_INFO *low_res_frame_info = (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; if (cpi->oxcf.mr_encoder_id) { // Check if lower resolution is available for motion vector reuse. if (cm->frame_type != KEY_FRAME) { cpi->mr_low_res_mv_avail = 1; cpi->mr_low_res_mv_avail &= !(low_res_frame_info->is_frame_dropped); if (cpi->ref_frame_flags & VP8_LAST_FRAME) cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[LAST_FRAME] == low_res_frame_info->low_res_ref_frames[LAST_FRAME]); if (cpi->ref_frame_flags & VP8_GOLD_FRAME) cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[GOLDEN_FRAME] == low_res_frame_info->low_res_ref_frames[GOLDEN_FRAME]); // Don't use altref to determine whether low res is available. // TODO (marpan): Should we make this type of condition on a // per-reference frame basis? /* if (cpi->ref_frame_flags & VP8_ALTR_FRAME) cpi->mr_low_res_mv_avail &= (cpi->current_ref_frames[ALTREF_FRAME] == low_res_frame_info->low_res_ref_frames[ALTREF_FRAME]); */ } // Disable motion vector reuse (i.e., disable any usage of the low_res) // if the previous lower stream is skipped/disabled. if (low_res_frame_info->skip_encoding_prev_stream) { cpi->mr_low_res_mv_avail = 0; } } // This stream is not skipped (i.e., it's being encoded), so set this skip // flag to 0. This is needed for the next stream (i.e., which is the next // frame to be encoded). low_res_frame_info->skip_encoding_prev_stream = 0; // On a key frame: For the lowest resolution, keep track of the key frame // counter value. For the higher resolutions, reset the current video // frame counter to that of the lowest resolution. // This is done to the handle the case where we may stop/start encoding // higher layer(s). The restart-encoding of higher layer is only signaled // by a key frame for now. // TODO (marpan): Add flag to indicate restart-encoding of higher layer. if (cm->frame_type == KEY_FRAME) { if (cpi->oxcf.mr_encoder_id) { // If the initial starting value of the buffer level is zero (this can // happen because we may have not started encoding this higher stream), // then reset it to non-zero value based on |starting_buffer_level|. if (cpi->common.current_video_frame == 0 && cpi->buffer_level == 0) { unsigned int i; cpi->bits_off_target = cpi->oxcf.starting_buffer_level; cpi->buffer_level = cpi->oxcf.starting_buffer_level; for (i = 0; i < cpi->oxcf.number_of_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->bits_off_target = lc->starting_buffer_level; lc->buffer_level = lc->starting_buffer_level; } } cpi->common.current_video_frame = low_res_frame_info->key_frame_counter_value; } else { low_res_frame_info->key_frame_counter_value = cpi->common.current_video_frame; } } } #endif // Find the reference frame closest to the current frame. cpi->closest_reference_frame = LAST_FRAME; if (cm->frame_type != KEY_FRAME) { int i; MV_REFERENCE_FRAME closest_ref = INTRA_FRAME; if (cpi->ref_frame_flags & VP8_LAST_FRAME) { closest_ref = LAST_FRAME; } else if (cpi->ref_frame_flags & VP8_GOLD_FRAME) { closest_ref = GOLDEN_FRAME; } else if (cpi->ref_frame_flags & VP8_ALTR_FRAME) { closest_ref = ALTREF_FRAME; } for (i = 1; i <= 3; ++i) { vpx_ref_frame_type_t ref_frame_type = (vpx_ref_frame_type_t)((i == 3) ? 4 : i); if (cpi->ref_frame_flags & ref_frame_type) { if ((cm->current_video_frame - cpi->current_ref_frames[i]) < (cm->current_video_frame - cpi->current_ref_frames[closest_ref])) { closest_ref = i; } } } cpi->closest_reference_frame = closest_ref; } /* Set various flags etc to special state if it is a key frame */ if (cm->frame_type == KEY_FRAME) { int i; // Set the loop filter deltas and segmentation map update setup_features(cpi); /* The alternate reference frame cannot be active for a key frame */ cpi->source_alt_ref_active = 0; /* Reset the RD threshold multipliers to default of * 1 (128) */ for (i = 0; i < MAX_MODES; ++i) { cpi->mb.rd_thresh_mult[i] = 128; } // Reset the zero_last counter to 0 on key frame. memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); memset(cpi->consec_zero_last_mvbias, 0, (cpi->common.mb_rows * cpi->common.mb_cols)); } #if 0 /* Experimental code for lagged compress and one pass * Initialise one_pass GF frames stats * Update stats used for GF selection */ { cpi->one_pass_frame_index = cm->current_video_frame % MAX_LAG_BUFFERS; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frames_so_far = 0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_intra_error = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_coded_error = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_pcnt_inter = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_pcnt_motion = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_mvr = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_mvr_abs = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_mvc = 0.0; cpi->one_pass_frame_stats[cpi->one_pass_frame_index ].frame_mvc_abs = 0.0; } #endif update_rd_ref_frame_probs(cpi); if (cpi->drop_frames_allowed) { /* The reset to decimation 0 is only done here for one pass. * Once it is set two pass leaves decimation on till the next kf. */ if ((cpi->buffer_level > drop_mark) && (cpi->decimation_factor > 0)) { cpi->decimation_factor--; } if (cpi->buffer_level > drop_mark75 && cpi->decimation_factor > 0) { cpi->decimation_factor = 1; } else if (cpi->buffer_level < drop_mark25 && (cpi->decimation_factor == 2 || cpi->decimation_factor == 3)) { cpi->decimation_factor = 3; } else if (cpi->buffer_level < drop_mark50 && (cpi->decimation_factor == 1 || cpi->decimation_factor == 2)) { cpi->decimation_factor = 2; } else if (cpi->buffer_level < drop_mark75 && (cpi->decimation_factor == 0 || cpi->decimation_factor == 1)) { cpi->decimation_factor = 1; } } /* The following decimates the frame rate according to a regular * pattern (i.e. to 1/2 or 2/3 frame rate) This can be used to help * prevent buffer under-run in CBR mode. Alternatively it might be * desirable in some situations to drop frame rate but throw more bits * at each frame. * * Note that dropping a key frame can be problematic if spatial * resampling is also active */ if (cpi->decimation_factor > 0) { switch (cpi->decimation_factor) { case 1: cpi->per_frame_bandwidth = cpi->per_frame_bandwidth * 3 / 2; break; case 2: cpi->per_frame_bandwidth = cpi->per_frame_bandwidth * 5 / 4; break; case 3: cpi->per_frame_bandwidth = cpi->per_frame_bandwidth * 5 / 4; break; } /* Note that we should not throw out a key frame (especially when * spatial resampling is enabled). */ if (cm->frame_type == KEY_FRAME) { cpi->decimation_count = cpi->decimation_factor; } else if (cpi->decimation_count > 0) { cpi->decimation_count--; cpi->bits_off_target += cpi->av_per_frame_bandwidth; if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) { cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; } #if CONFIG_MULTI_RES_ENCODING vp8_store_drop_frame_info(cpi); #endif cm->current_video_frame++; cpi->frames_since_key++; cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; #if CONFIG_INTERNAL_STATS cpi->count++; #endif cpi->buffer_level = cpi->bits_off_target; if (cpi->oxcf.number_of_layers > 1) { unsigned int i; /* Propagate bits saved by dropping the frame to higher * layers */ for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate); if (lc->bits_off_target > lc->maximum_buffer_size) { lc->bits_off_target = lc->maximum_buffer_size; } lc->buffer_level = lc->bits_off_target; } } return; } else { cpi->decimation_count = cpi->decimation_factor; } } else { cpi->decimation_count = 0; } /* Decide how big to make the frame */ if (!vp8_pick_frame_size(cpi)) { /*TODO: 2 drop_frame and return code could be put together. */ #if CONFIG_MULTI_RES_ENCODING vp8_store_drop_frame_info(cpi); #endif cm->current_video_frame++; cpi->frames_since_key++; cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; return; } /* Reduce active_worst_allowed_q for CBR if our buffer is getting too full. * This has a knock on effect on active best quality as well. * For CBR if the buffer reaches its maximum level then we can no longer * save up bits for later frames so we might as well use them up * on the current frame. */ if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level >= cpi->oxcf.optimal_buffer_level) && cpi->buffered_mode) { /* Max adjustment is 1/4 */ int Adjustment = cpi->active_worst_quality / 4; if (Adjustment) { int buff_lvl_step; if (cpi->buffer_level < cpi->oxcf.maximum_buffer_size) { buff_lvl_step = (int)((cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level) / Adjustment); if (buff_lvl_step) { Adjustment = (int)((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / buff_lvl_step); } else { Adjustment = 0; } } cpi->active_worst_quality -= Adjustment; if (cpi->active_worst_quality < cpi->active_best_quality) { cpi->active_worst_quality = cpi->active_best_quality; } } } /* Set an active best quality and if necessary active worst quality * There is some odd behavior for one pass here that needs attention. */ if ((cpi->pass == 2) || (cpi->ni_frames > 150)) { vpx_clear_system_state(); Q = cpi->active_worst_quality; if (cm->frame_type == KEY_FRAME) { if (cpi->pass == 2) { if (cpi->gfu_boost > 600) { cpi->active_best_quality = kf_low_motion_minq[Q]; } else { cpi->active_best_quality = kf_high_motion_minq[Q]; } /* Special case for key frames forced because we have reached * the maximum key frame interval. Here force the Q to a range * based on the ambient Q to reduce the risk of popping */ if (cpi->this_key_frame_forced) { if (cpi->active_best_quality > cpi->avg_frame_qindex * 7 / 8) { cpi->active_best_quality = cpi->avg_frame_qindex * 7 / 8; } else if (cpi->active_best_qualityavg_frame_qindex>> 2) { cpi->active_best_quality = cpi->avg_frame_qindex >> 2; } } } /* One pass more conservative */ else { cpi->active_best_quality = kf_high_motion_minq[Q]; } } else if (cpi->oxcf.number_of_layers == 1 && (cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame)) { /* Use the lower of cpi->active_worst_quality and recent * average Q as basis for GF/ARF Q limit unless last frame was * a key frame. */ if ((cpi->frames_since_key > 1) && (cpi->avg_frame_qindex < cpi->active_worst_quality)) { Q = cpi->avg_frame_qindex; } /* For constrained quality dont allow Q less than the cq level */ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < cpi->cq_target_quality)) { Q = cpi->cq_target_quality; } if (cpi->pass == 2) { if (cpi->gfu_boost > 1000) { cpi->active_best_quality = gf_low_motion_minq[Q]; } else if (cpi->gfu_boost < 400) { cpi->active_best_quality = gf_high_motion_minq[Q]; } else { cpi->active_best_quality = gf_mid_motion_minq[Q]; } /* Constrained quality use slightly lower active best. */ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { cpi->active_best_quality = cpi->active_best_quality * 15 / 16; } } /* One pass more conservative */ else { cpi->active_best_quality = gf_high_motion_minq[Q]; } } else { cpi->active_best_quality = inter_minq[Q]; /* For the constant/constrained quality mode we dont want * q to fall below the cq level. */ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (cpi->active_best_quality < cpi->cq_target_quality)) { /* If we are strongly undershooting the target rate in the last * frames then use the user passed in cq value not the auto * cq value. */ if (cpi->rolling_actual_bits < cpi->min_frame_bandwidth) { cpi->active_best_quality = cpi->oxcf.cq_level; } else { cpi->active_best_quality = cpi->cq_target_quality; } } } /* If CBR and the buffer is as full then it is reasonable to allow * higher quality on the frames to prevent bits just going to waste. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { /* Note that the use of >= here elliminates the risk of a devide * by 0 error in the else if clause */ if (cpi->buffer_level >= cpi->oxcf.maximum_buffer_size) { cpi->active_best_quality = cpi->best_quality; } else if (cpi->buffer_level > cpi->oxcf.optimal_buffer_level) { int Fraction = (int)(((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) * 128) / (cpi->oxcf.maximum_buffer_size - cpi->oxcf.optimal_buffer_level)); int min_qadjustment = ((cpi->active_best_quality - cpi->best_quality) * Fraction) / 128; cpi->active_best_quality -= min_qadjustment; } } } /* Make sure constrained quality mode limits are adhered to for the first * few frames of one pass encodes */ else if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { if ((cm->frame_type == KEY_FRAME) || cm->refresh_golden_frame || cpi->common.refresh_alt_ref_frame) { cpi->active_best_quality = cpi->best_quality; } else if (cpi->active_best_quality < cpi->cq_target_quality) { cpi->active_best_quality = cpi->cq_target_quality; } } /* Clip the active best and worst quality values to limits */ if (cpi->active_worst_quality > cpi->worst_quality) { cpi->active_worst_quality = cpi->worst_quality; } if (cpi->active_best_quality < cpi->best_quality) { cpi->active_best_quality = cpi->best_quality; } if (cpi->active_worst_quality < cpi->active_best_quality) { cpi->active_worst_quality = cpi->active_best_quality; } /* Determine initial Q to try */ Q = vp8_regulate_q(cpi, cpi->this_frame_target); #if !CONFIG_REALTIME_ONLY /* Set highest allowed value for Zbin over quant */ if (cm->frame_type == KEY_FRAME) { zbin_oq_high = 0; } else if ((cpi->oxcf.number_of_layers == 1) && ((cm->refresh_alt_ref_frame || (cm->refresh_golden_frame && !cpi->source_alt_ref_active)))) { zbin_oq_high = 16; } else { zbin_oq_high = ZBIN_OQ_MAX; } #endif compute_skin_map(cpi); /* Setup background Q adjustment for error resilient mode. * For multi-layer encodes only enable this for the base layer. */ if (cpi->cyclic_refresh_mode_enabled) { // Special case for screen_content_mode with golden frame updates. int disable_cr_gf = (cpi->oxcf.screen_content_mode == 2 && cm->refresh_golden_frame); if (cpi->current_layer == 0 && cpi->force_maxqp == 0 && !disable_cr_gf) { cyclic_background_refresh(cpi, Q, 0); } else { disable_segmentation(cpi); } } vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); #if !CONFIG_REALTIME_ONLY /* Limit Q range for the adaptive loop. */ bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; q_low = cpi->active_best_quality; q_high = cpi->active_worst_quality; #endif vp8_save_coding_context(cpi); loop_count = 0; scale_and_extend_source(cpi->un_scaled_source, cpi); #if CONFIG_TEMPORAL_DENOISING && CONFIG_POSTPROC // Option to apply spatial blur under the aggressive or adaptive // (temporal denoising) mode. if (cpi->oxcf.noise_sensitivity >= 3) { if (cpi->denoiser.denoise_pars.spatial_blur != 0) { vp8_de_noise(cm, cpi->Source, cpi->denoiser.denoise_pars.spatial_blur, 1); } } #endif #if !(CONFIG_REALTIME_ONLY) && CONFIG_POSTPROC && !(CONFIG_TEMPORAL_DENOISING) if (cpi->oxcf.noise_sensitivity > 0) { unsigned char *src; int l = 0; switch (cpi->oxcf.noise_sensitivity) { case 1: l = 20; break; case 2: l = 40; break; case 3: l = 60; break; case 4: l = 80; break; case 5: l = 100; break; case 6: l = 150; break; } if (cm->frame_type == KEY_FRAME) { vp8_de_noise(cm, cpi->Source, l, 1); } else { vp8_de_noise(cm, cpi->Source, l, 1); src = cpi->Source->y_buffer; if (cpi->Source->y_stride < 0) { src += cpi->Source->y_stride * (cpi->Source->y_height - 1); } } } #endif #ifdef OUTPUT_YUV_SRC vpx_write_yuv_frame(yuv_file, cpi->Source); #endif do { vpx_clear_system_state(); vp8_set_quantizer(cpi, Q); /* setup skip prob for costing in mode/mv decision */ if (cpi->common.mb_no_coeff_skip) { cpi->prob_skip_false = cpi->base_skip_false_prob[Q]; if (cm->frame_type != KEY_FRAME) { if (cpi->common.refresh_alt_ref_frame) { if (cpi->last_skip_false_probs[2] != 0) { cpi->prob_skip_false = cpi->last_skip_false_probs[2]; } /* if(cpi->last_skip_false_probs[2]!=0 && abs(Q- cpi->last_skip_probs_q[2])<=16 ) cpi->prob_skip_false = cpi->last_skip_false_probs[2]; else if (cpi->last_skip_false_probs[2]!=0) cpi->prob_skip_false = (cpi->last_skip_false_probs[2] + cpi->prob_skip_false ) / 2; */ } else if (cpi->common.refresh_golden_frame) { if (cpi->last_skip_false_probs[1] != 0) { cpi->prob_skip_false = cpi->last_skip_false_probs[1]; } /* if(cpi->last_skip_false_probs[1]!=0 && abs(Q- cpi->last_skip_probs_q[1])<=16 ) cpi->prob_skip_false = cpi->last_skip_false_probs[1]; else if (cpi->last_skip_false_probs[1]!=0) cpi->prob_skip_false = (cpi->last_skip_false_probs[1] + cpi->prob_skip_false ) / 2; */ } else { if (cpi->last_skip_false_probs[0] != 0) { cpi->prob_skip_false = cpi->last_skip_false_probs[0]; } /* if(cpi->last_skip_false_probs[0]!=0 && abs(Q- cpi->last_skip_probs_q[0])<=16 ) cpi->prob_skip_false = cpi->last_skip_false_probs[0]; else if(cpi->last_skip_false_probs[0]!=0) cpi->prob_skip_false = (cpi->last_skip_false_probs[0] + cpi->prob_skip_false ) / 2; */ } /* as this is for cost estimate, let's make sure it does not * go extreme eitehr way */ if (cpi->prob_skip_false < 5) cpi->prob_skip_false = 5; if (cpi->prob_skip_false > 250) cpi->prob_skip_false = 250; if (cpi->oxcf.number_of_layers == 1 && cpi->is_src_frame_alt_ref) { cpi->prob_skip_false = 1; } } #if 0 if (cpi->pass != 1) { FILE *f = fopen("skip.stt", "a"); fprintf(f, "%d, %d, %4d ", cpi->common.refresh_golden_frame, cpi->common.refresh_alt_ref_frame, cpi->prob_skip_false); fclose(f); } #endif } if (cm->frame_type == KEY_FRAME) { if (resize_key_frame(cpi)) { /* If the frame size has changed, need to reset Q, quantizer, * and background refresh. */ Q = vp8_regulate_q(cpi, cpi->this_frame_target); if (cpi->cyclic_refresh_mode_enabled) { if (cpi->current_layer == 0) { cyclic_background_refresh(cpi, Q, 0); } else { disable_segmentation(cpi); } } // Reset the zero_last counter to 0 on key frame. memset(cpi->consec_zero_last, 0, cm->mb_rows * cm->mb_cols); memset(cpi->consec_zero_last_mvbias, 0, (cpi->common.mb_rows * cpi->common.mb_cols)); vp8_set_quantizer(cpi, Q); } vp8_setup_key_frame(cpi); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { if (cpi->oxcf.error_resilient_mode) cm->refresh_entropy_probs = 0; if (cpi->oxcf.error_resilient_mode & VPX_ERROR_RESILIENT_PARTITIONS) { if (cm->frame_type == KEY_FRAME) cm->refresh_entropy_probs = 1; } if (cm->refresh_entropy_probs == 0) { /* save a copy for later refresh */ memcpy(&cm->lfc, &cm->fc, sizeof(cm->fc)); } vp8_update_coef_context(cpi); vp8_update_coef_probs(cpi); /* transform / motion compensation build reconstruction frame * +pack coef partitions */ vp8_encode_frame(cpi); /* cpi->projected_frame_size is not needed for RT mode */ } #else /* transform / motion compensation build reconstruction frame */ vp8_encode_frame(cpi); if (cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (vp8_drop_encodedframe_overshoot(cpi, Q)) { vpx_clear_system_state(); return; } if (cm->frame_type != KEY_FRAME) cpi->last_pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs); } cpi->projected_frame_size -= vp8_estimate_entropy_savings(cpi); cpi->projected_frame_size = (cpi->projected_frame_size > 0) ? cpi->projected_frame_size : 0; #endif vpx_clear_system_state(); /* Test to see if the stats generated for this frame indicate that * we should have coded a key frame (assuming that we didn't)! */ if (cpi->pass != 2 && cpi->oxcf.auto_key && cm->frame_type != KEY_FRAME && cpi->compressor_speed != 2) { #if !CONFIG_REALTIME_ONLY if (decide_key_frame(cpi)) { /* Reset all our sizing numbers and recode */ cm->frame_type = KEY_FRAME; vp8_pick_frame_size(cpi); /* Clear the Alt reference frame active flag when we have * a key frame */ cpi->source_alt_ref_active = 0; // Set the loop filter deltas and segmentation map update setup_features(cpi); vp8_restore_coding_context(cpi); Q = vp8_regulate_q(cpi, cpi->this_frame_target); vp8_compute_frame_size_bounds(cpi, &frame_under_shoot_limit, &frame_over_shoot_limit); /* Limit Q range for the adaptive loop. */ bottom_index = cpi->active_best_quality; top_index = cpi->active_worst_quality; q_low = cpi->active_best_quality; q_high = cpi->active_worst_quality; loop_count++; Loop = 1; continue; } #endif } vpx_clear_system_state(); if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; /* Are we are overshooting and up against the limit of active max Q. */ if (((cpi->pass != 2) || (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER)) && (Q == cpi->active_worst_quality) && (cpi->active_worst_quality < cpi->worst_quality) && (cpi->projected_frame_size > frame_over_shoot_limit)) { int over_size_percent = ((cpi->projected_frame_size - frame_over_shoot_limit) * 100) / frame_over_shoot_limit; /* If so is there any scope for relaxing it */ while ((cpi->active_worst_quality < cpi->worst_quality) && (over_size_percent > 0)) { cpi->active_worst_quality++; /* Assume 1 qstep = about 4% on frame size. */ over_size_percent = (int)(over_size_percent * 0.96); } #if !CONFIG_REALTIME_ONLY top_index = cpi->active_worst_quality; #endif // !CONFIG_REALTIME_ONLY /* If we have updated the active max Q do not call * vp8_update_rate_correction_factors() this loop. */ active_worst_qchanged = 1; } else { active_worst_qchanged = 0; } #if CONFIG_REALTIME_ONLY Loop = 0; #else /* Special case handling for forced key frames */ if ((cm->frame_type == KEY_FRAME) && cpi->this_key_frame_forced) { int last_q = Q; int kf_err = vp8_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); /* The key frame is not good enough */ if (kf_err > ((cpi->ambient_err * 7) >> 3)) { /* Lower q_high */ q_high = (Q > q_low) ? (Q - 1) : q_low; /* Adjust Q */ Q = (q_high + q_low) >> 1; } /* The key frame is much better than the previous frame */ else if (kf_err < (cpi->ambient_err >> 1)) { /* Raise q_low */ q_low = (Q < q_high) ? (Q + 1) : q_high; /* Adjust Q */ Q = (q_high + q_low + 1) >> 1; } /* Clamp Q to upper and lower limits: */ if (Q > q_high) { Q = q_high; } else if (Q < q_low) { Q = q_low; } Loop = Q != last_q; } /* Is the projected frame size out of range and are we allowed * to attempt to recode. */ else if (recode_loop_test(cpi, frame_over_shoot_limit, frame_under_shoot_limit, Q, top_index, bottom_index)) { int last_q = Q; int Retries = 0; /* Frame size out of permitted range. Update correction factor * & compute new Q to try... */ /* Frame is too large */ if (cpi->projected_frame_size > cpi->this_frame_target) { /* Raise Qlow as to at least the current value */ q_low = (Q < q_high) ? (Q + 1) : q_high; /* If we are using over quant do the same for zbin_oq_low */ if (cpi->mb.zbin_over_quant > 0) { zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; } if (undershoot_seen) { /* Update rate_correction_factor unless * cpi->active_worst_quality has changed. */ if (!active_worst_qchanged) { vp8_update_rate_correction_factors(cpi, 1); } Q = (q_high + q_low + 1) / 2; /* Adjust cpi->zbin_over_quant (only allowed when Q * is max) */ if (Q < MAXQ) { cpi->mb.zbin_over_quant = 0; } else { zbin_oq_low = (cpi->mb.zbin_over_quant < zbin_oq_high) ? (cpi->mb.zbin_over_quant + 1) : zbin_oq_high; cpi->mb.zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; } } else { /* Update rate_correction_factor unless * cpi->active_worst_quality has changed. */ if (!active_worst_qchanged) { vp8_update_rate_correction_factors(cpi, 0); } Q = vp8_regulate_q(cpi, cpi->this_frame_target); while (((Q < q_low) || (cpi->mb.zbin_over_quant < zbin_oq_low)) && (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); Retries++; } } overshoot_seen = 1; } /* Frame is too small */ else { if (cpi->mb.zbin_over_quant == 0) { /* Lower q_high if not using over quant */ q_high = (Q > q_low) ? (Q - 1) : q_low; } else { /* else lower zbin_oq_high */ zbin_oq_high = (cpi->mb.zbin_over_quant > zbin_oq_low) ? (cpi->mb.zbin_over_quant - 1) : zbin_oq_low; } if (overshoot_seen) { /* Update rate_correction_factor unless * cpi->active_worst_quality has changed. */ if (!active_worst_qchanged) { vp8_update_rate_correction_factors(cpi, 1); } Q = (q_high + q_low) / 2; /* Adjust cpi->zbin_over_quant (only allowed when Q * is max) */ if (Q < MAXQ) { cpi->mb.zbin_over_quant = 0; } else { cpi->mb.zbin_over_quant = (zbin_oq_high + zbin_oq_low) / 2; } } else { /* Update rate_correction_factor unless * cpi->active_worst_quality has changed. */ if (!active_worst_qchanged) { vp8_update_rate_correction_factors(cpi, 0); } Q = vp8_regulate_q(cpi, cpi->this_frame_target); /* Special case reset for qlow for constrained quality. * This should only trigger where there is very substantial * undershoot on a frame and the auto cq level is above * the user passsed in value. */ if ((cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) && (Q < q_low)) { q_low = Q; } while (((Q > q_high) || (cpi->mb.zbin_over_quant > zbin_oq_high)) && (Retries < 10)) { vp8_update_rate_correction_factors(cpi, 0); Q = vp8_regulate_q(cpi, cpi->this_frame_target); Retries++; } } undershoot_seen = 1; } /* Clamp Q to upper and lower limits: */ if (Q > q_high) { Q = q_high; } else if (Q < q_low) { Q = q_low; } /* Clamp cpi->zbin_over_quant */ cpi->mb.zbin_over_quant = (cpi->mb.zbin_over_quant < zbin_oq_low) ? zbin_oq_low : (cpi->mb.zbin_over_quant > zbin_oq_high) ? zbin_oq_high : cpi->mb.zbin_over_quant; Loop = Q != last_q; } else { Loop = 0; } #endif // CONFIG_REALTIME_ONLY if (cpi->is_src_frame_alt_ref) Loop = 0; if (Loop == 1) { vp8_restore_coding_context(cpi); loop_count++; #if CONFIG_INTERNAL_STATS cpi->tot_recode_hits++; #endif } } while (Loop == 1); #if defined(DROP_UNCODED_FRAMES) /* if there are no coded macroblocks at all drop this frame */ if (cpi->common.MBs == cpi->mb.skip_true_count && (cpi->drop_frame_count & 7) != 7 && cm->frame_type != KEY_FRAME) { cpi->common.current_video_frame++; cpi->frames_since_key++; cpi->drop_frame_count++; cpi->ext_refresh_frame_flags_pending = 0; // We advance the temporal pattern for dropped frames. cpi->temporal_pattern_counter++; return; } cpi->drop_frame_count = 0; #endif #if 0 /* Experimental code for lagged and one pass * Update stats used for one pass GF selection */ { cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_coded_error = (double)cpi->prediction_error; cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_intra_error = (double)cpi->intra_error; cpi->one_pass_frame_stats[cpi->one_pass_frame_index].frame_pcnt_inter = (double)(100 - cpi->this_frame_percent_intra) / 100.0; } #endif /* Special case code to reduce pulsing when key frames are forced at a * fixed interval. Note the reconstruction error if it is the frame before * the force key frame */ if (cpi->next_key_frame_forced && (cpi->twopass.frames_to_key == 0)) { cpi->ambient_err = vp8_calc_ss_err(cpi->Source, &cm->yv12_fb[cm->new_fb_idx]); } /* This frame's MVs are saved and will be used in next frame's MV predictor. * Last frame has one more line(add to bottom) and one more column(add to * right) than cm->mip. The edge elements are initialized to 0. */ #if CONFIG_MULTI_RES_ENCODING if (!cpi->oxcf.mr_encoder_id && cm->show_frame) #else if (cm->show_frame) /* do not save for altref frame */ #endif { int mb_row; int mb_col; /* Point to beginning of allocated MODE_INFO arrays. */ MODE_INFO *tmp = cm->mip; if (cm->frame_type != KEY_FRAME) { for (mb_row = 0; mb_row < cm->mb_rows + 1; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols + 1; ++mb_col) { if (tmp->mbmi.ref_frame != INTRA_FRAME) { cpi->lfmv[mb_col + mb_row * (cm->mode_info_stride + 1)].as_int = tmp->mbmi.mv.as_int; } cpi->lf_ref_frame_sign_bias[mb_col + mb_row * (cm->mode_info_stride + 1)] = cm->ref_frame_sign_bias[tmp->mbmi.ref_frame]; cpi->lf_ref_frame[mb_col + mb_row * (cm->mode_info_stride + 1)] = tmp->mbmi.ref_frame; tmp++; } } } } /* Count last ref frame 0,0 usage on current encoded frame. */ { int mb_row; int mb_col; /* Point to beginning of MODE_INFO arrays. */ MODE_INFO *tmp = cm->mi; cpi->zeromv_count = 0; if (cm->frame_type != KEY_FRAME) { for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { if (tmp->mbmi.mode == ZEROMV && tmp->mbmi.ref_frame == LAST_FRAME) { cpi->zeromv_count++; } tmp++; } tmp++; } } } #if CONFIG_MULTI_RES_ENCODING vp8_cal_dissimilarity(cpi); #endif /* Update the GF useage maps. * This is done after completing the compression of a frame when all * modes etc. are finalized but before loop filter */ if (cpi->oxcf.number_of_layers == 1) { vp8_update_gf_useage_maps(cpi, cm, &cpi->mb); } if (cm->frame_type == KEY_FRAME) cm->refresh_last_frame = 1; #if 0 { FILE *f = fopen("gfactive.stt", "a"); fprintf(f, "%8d %8d %8d %8d %8d\n", cm->current_video_frame, (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols), cpi->this_iiratio, cpi->next_iiratio, cm->refresh_golden_frame); fclose(f); } #endif /* For inter frames the current default behavior is that when * cm->refresh_golden_frame is set we copy the old GF over to the ARF buffer * This is purely an encoder decision at present. * Avoid this behavior when refresh flags are set by the user. */ if (!cpi->oxcf.error_resilient_mode && cm->refresh_golden_frame && !cpi->ext_refresh_frame_flags_pending) { cm->copy_buffer_to_arf = 2; } else { cm->copy_buffer_to_arf = 0; } cm->frame_to_show = &cm->yv12_fb[cm->new_fb_idx]; #if CONFIG_TEMPORAL_DENOISING // Get some measure of the amount of noise, by measuring the (partial) mse // between source and denoised buffer, for y channel. Partial refers to // computing the sse for a sub-sample of the frame (i.e., skip x blocks along // row/column), // and only for blocks in that set that are consecutive ZEROMV_LAST mode. // Do this every ~8 frames, to further reduce complexity. // TODO(marpan): Keep this for now for the case cpi->oxcf.noise_sensitivity < // 4, // should be removed in favor of the process_denoiser_mode_change() function // below. if (cpi->oxcf.noise_sensitivity > 0 && cpi->oxcf.noise_sensitivity < 4 && !cpi->oxcf.screen_content_mode && cpi->frames_since_key % 8 == 0 && cm->frame_type != KEY_FRAME) { cpi->mse_source_denoised = measure_square_diff_partial( &cpi->denoiser.yv12_running_avg[INTRA_FRAME], cpi->Source, cpi); } // For the adaptive denoising mode (noise_sensitivity == 4), sample the mse // of source diff (between current and previous frame), and determine if we // should switch the denoiser mode. Sampling refers to computing the mse for // a sub-sample of the frame (i.e., skip x blocks along row/column), and // only for blocks in that set that have used ZEROMV LAST, along with some // constraint on the sum diff between blocks. This process is called every // ~8 frames, to further reduce complexity. if (cpi->oxcf.noise_sensitivity == 4 && !cpi->oxcf.screen_content_mode && cpi->frames_since_key % 8 == 0 && cm->frame_type != KEY_FRAME) { process_denoiser_mode_change(cpi); } #endif #ifdef OUTPUT_YUV_SKINMAP if (cpi->common.current_video_frame > 1) { vp8_compute_skin_map(cpi, yuv_skinmap_file); } #endif #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&cpi->b_multi_threaded)) { /* start loopfilter in separate thread */ sem_post(&cpi->h_event_start_lpf); cpi->b_lpf_running = 1; /* wait for the filter_level to be picked so that we can continue with * stream packing */ sem_wait(&cpi->h_event_end_lpf); } else #endif { vp8_loopfilter_frame(cpi, cm); } update_reference_frames(cpi); #ifdef OUTPUT_YUV_DENOISED vpx_write_yuv_frame(yuv_denoised_file, &cpi->denoiser.yv12_running_avg[INTRA_FRAME]); #endif #if !(CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cpi->oxcf.error_resilient_mode) { cm->refresh_entropy_probs = 0; } #endif /* build the bitstream */ vp8_pack_bitstream(cpi, dest, dest_end, size); /* Move storing frame_type out of the above loop since it is also * needed in motion search besides loopfilter */ cm->last_frame_type = cm->frame_type; /* Update rate control heuristics */ cpi->total_byte_count += (*size); cpi->projected_frame_size = (int)(*size) << 3; if (cpi->oxcf.number_of_layers > 1) { unsigned int i; for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) { cpi->layer_context[i].total_byte_count += (*size); } } if (!active_worst_qchanged) vp8_update_rate_correction_factors(cpi, 2); cpi->last_q[cm->frame_type] = cm->base_qindex; if (cm->frame_type == KEY_FRAME) { vp8_adjust_key_frame_context(cpi); } /* Keep a record of ambient average Q. */ if (cm->frame_type != KEY_FRAME) { cpi->avg_frame_qindex = (2 + 3 * cpi->avg_frame_qindex + cm->base_qindex) >> 2; } /* Keep a record from which we can calculate the average Q excluding * GF updates and key frames */ if ((cm->frame_type != KEY_FRAME) && ((cpi->oxcf.number_of_layers > 1) || (!cm->refresh_golden_frame && !cm->refresh_alt_ref_frame))) { cpi->ni_frames++; /* Calculate the average Q for normal inter frames (not key or GFU * frames). */ if (cpi->pass == 2) { cpi->ni_tot_qi += Q; cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); } else { /* Damp value for first few frames */ if (cpi->ni_frames > 150) { cpi->ni_tot_qi += Q; cpi->ni_av_qi = (cpi->ni_tot_qi / cpi->ni_frames); } /* For one pass, early in the clip ... average the current frame Q * value with the worstq entered by the user as a dampening measure */ else { cpi->ni_tot_qi += Q; cpi->ni_av_qi = ((cpi->ni_tot_qi / cpi->ni_frames) + cpi->worst_quality + 1) / 2; } /* If the average Q is higher than what was used in the last * frame (after going through the recode loop to keep the frame * size within range) then use the last frame value - 1. The -1 * is designed to stop Q and hence the data rate, from * progressively falling away during difficult sections, but at * the same time reduce the number of itterations around the * recode loop. */ if (Q > cpi->ni_av_qi) cpi->ni_av_qi = Q - 1; } } /* Update the buffer level variable. */ /* Non-viewable frames are a special case and are treated as pure overhead. */ if (!cm->show_frame) { cpi->bits_off_target -= cpi->projected_frame_size; } else { cpi->bits_off_target += cpi->av_per_frame_bandwidth - cpi->projected_frame_size; } /* Clip the buffer level to the maximum specified buffer size */ if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) { cpi->bits_off_target = cpi->oxcf.maximum_buffer_size; } // If the frame dropper is not enabled, don't let the buffer level go below // some threshold, given here by -|maximum_buffer_size|. For now we only do // this for screen content input. if (cpi->drop_frames_allowed == 0 && cpi->oxcf.screen_content_mode && cpi->bits_off_target < -cpi->oxcf.maximum_buffer_size) { cpi->bits_off_target = -cpi->oxcf.maximum_buffer_size; } /* Rolling monitors of whether we are over or underspending used to * help regulate min and Max Q in two pass. */ cpi->rolling_target_bits = ((cpi->rolling_target_bits * 3) + cpi->this_frame_target + 2) / 4; cpi->rolling_actual_bits = ((cpi->rolling_actual_bits * 3) + cpi->projected_frame_size + 2) / 4; cpi->long_rolling_target_bits = ((cpi->long_rolling_target_bits * 31) + cpi->this_frame_target + 16) / 32; cpi->long_rolling_actual_bits = ((cpi->long_rolling_actual_bits * 31) + cpi->projected_frame_size + 16) / 32; /* Actual bits spent */ cpi->total_actual_bits += cpi->projected_frame_size; /* Debug stats */ cpi->total_target_vs_actual += (cpi->this_frame_target - cpi->projected_frame_size); cpi->buffer_level = cpi->bits_off_target; /* Propagate values to higher temporal layers */ if (cpi->oxcf.number_of_layers > 1) { unsigned int i; for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; int bits_off_for_this_layer = (int)(lc->target_bandwidth / lc->framerate - cpi->projected_frame_size); lc->bits_off_target += bits_off_for_this_layer; /* Clip buffer level to maximum buffer size for the layer */ if (lc->bits_off_target > lc->maximum_buffer_size) { lc->bits_off_target = lc->maximum_buffer_size; } lc->total_actual_bits += cpi->projected_frame_size; lc->total_target_vs_actual += bits_off_for_this_layer; lc->buffer_level = lc->bits_off_target; } } /* Update bits left to the kf and gf groups to account for overshoot * or undershoot on these frames */ if (cm->frame_type == KEY_FRAME) { cpi->twopass.kf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; if (cpi->twopass.kf_group_bits < 0) cpi->twopass.kf_group_bits = 0; } else if (cm->refresh_golden_frame || cm->refresh_alt_ref_frame) { cpi->twopass.gf_group_bits += cpi->this_frame_target - cpi->projected_frame_size; if (cpi->twopass.gf_group_bits < 0) cpi->twopass.gf_group_bits = 0; } if (cm->frame_type != KEY_FRAME) { if (cpi->common.refresh_alt_ref_frame) { cpi->last_skip_false_probs[2] = cpi->prob_skip_false; cpi->last_skip_probs_q[2] = cm->base_qindex; } else if (cpi->common.refresh_golden_frame) { cpi->last_skip_false_probs[1] = cpi->prob_skip_false; cpi->last_skip_probs_q[1] = cm->base_qindex; } else { cpi->last_skip_false_probs[0] = cpi->prob_skip_false; cpi->last_skip_probs_q[0] = cm->base_qindex; /* update the baseline */ cpi->base_skip_false_prob[cm->base_qindex] = cpi->prob_skip_false; } } #if 0 && CONFIG_INTERNAL_STATS { FILE *f = fopen("tmp.stt", "a"); vpx_clear_system_state(); if (cpi->twopass.total_left_stats.coded_error != 0.0) fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64 "%10"PRId64" %10d %6d %6d %6d %6d %5d %5d %5d %8d " "%8.2lf %"PRId64" %10.3lf %10"PRId64" %8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), cpi->total_target_vs_actual, cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->ni_av_qi, cpi->cq_target_quality, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, cpi->twopass.bits_left, cpi->twopass.total_left_stats.coded_error, (double)cpi->twopass.bits_left / cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); else fprintf(f, "%10d %10d %10d %10d %10d %10"PRId64" %10"PRId64 "%10"PRId64" %10d %6d %6d %6d %6d %5d %5d %5d %8d " "%8.2lf %"PRId64" %10.3lf %8d\n", cpi->common.current_video_frame, cpi->this_frame_target, cpi->projected_frame_size, (cpi->projected_frame_size - cpi->this_frame_target), cpi->total_target_vs_actual, cpi->buffer_level, (cpi->oxcf.starting_buffer_level-cpi->bits_off_target), cpi->total_actual_bits, cm->base_qindex, cpi->active_best_quality, cpi->active_worst_quality, cpi->ni_av_qi, cpi->cq_target_quality, cm->refresh_golden_frame, cm->refresh_alt_ref_frame, cm->frame_type, cpi->gfu_boost, cpi->twopass.est_max_qcorrection_factor, cpi->twopass.bits_left, cpi->twopass.total_left_stats.coded_error, cpi->tot_recode_hits); fclose(f); { FILE *fmodes = fopen("Modes.stt", "a"); fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, cm->frame_type, cm->refresh_golden_frame, cm->refresh_alt_ref_frame); fprintf(fmodes, "\n"); fclose(fmodes); } } #endif cpi->ext_refresh_frame_flags_pending = 0; if (cm->refresh_golden_frame == 1) { cm->frame_flags = cm->frame_flags | FRAMEFLAGS_GOLDEN; } else { cm->frame_flags = cm->frame_flags & ~FRAMEFLAGS_GOLDEN; } if (cm->refresh_alt_ref_frame == 1) { cm->frame_flags = cm->frame_flags | FRAMEFLAGS_ALTREF; } else { cm->frame_flags = cm->frame_flags & ~FRAMEFLAGS_ALTREF; } if (cm->refresh_last_frame & cm->refresh_golden_frame) { /* both refreshed */ cpi->gold_is_last = 1; } else if (cm->refresh_last_frame ^ cm->refresh_golden_frame) { /* 1 refreshed but not the other */ cpi->gold_is_last = 0; } if (cm->refresh_last_frame & cm->refresh_alt_ref_frame) { /* both refreshed */ cpi->alt_is_last = 1; } else if (cm->refresh_last_frame ^ cm->refresh_alt_ref_frame) { /* 1 refreshed but not the other */ cpi->alt_is_last = 0; } if (cm->refresh_alt_ref_frame & cm->refresh_golden_frame) { /* both refreshed */ cpi->gold_is_alt = 1; } else if (cm->refresh_alt_ref_frame ^ cm->refresh_golden_frame) { /* 1 refreshed but not the other */ cpi->gold_is_alt = 0; } cpi->ref_frame_flags = VP8_ALTR_FRAME | VP8_GOLD_FRAME | VP8_LAST_FRAME; if (cpi->gold_is_last) cpi->ref_frame_flags &= ~VP8_GOLD_FRAME; if (cpi->alt_is_last) cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; if (cpi->gold_is_alt) cpi->ref_frame_flags &= ~VP8_ALTR_FRAME; if (!cpi->oxcf.error_resilient_mode) { if (cpi->oxcf.play_alternate && cm->refresh_alt_ref_frame && (cm->frame_type != KEY_FRAME)) { /* Update the alternate reference frame stats as appropriate. */ update_alt_ref_frame_stats(cpi); } else { /* Update the Golden frame stats as appropriate. */ update_golden_frame_stats(cpi); } } if (cm->frame_type == KEY_FRAME) { /* Tell the caller that the frame was coded as a key frame */ *frame_flags = cm->frame_flags | FRAMEFLAGS_KEY; /* As this frame is a key frame the next defaults to an inter frame. */ cm->frame_type = INTER_FRAME; cpi->last_frame_percent_intra = 100; } else { *frame_flags = cm->frame_flags & ~FRAMEFLAGS_KEY; cpi->last_frame_percent_intra = cpi->this_frame_percent_intra; } /* Clear the one shot update flags for segmentation map and mode/ref * loop filter deltas. */ cpi->mb.e_mbd.update_mb_segmentation_map = 0; cpi->mb.e_mbd.update_mb_segmentation_data = 0; cpi->mb.e_mbd.mode_ref_lf_delta_update = 0; /* Dont increment frame counters if this was an altref buffer update * not a real frame */ if (cm->show_frame) { cm->current_video_frame++; cpi->frames_since_key++; cpi->temporal_pattern_counter++; } #if 0 { char filename[512]; FILE *recon_file; sprintf(filename, "enc%04d.yuv", (int) cm->current_video_frame); recon_file = fopen(filename, "wb"); fwrite(cm->yv12_fb[cm->lst_fb_idx].buffer_alloc, cm->yv12_fb[cm->lst_fb_idx].frame_size, 1, recon_file); fclose(recon_file); } #endif /* DEBUG */ /* vpx_write_yuv_frame("encoder_recon.yuv", cm->frame_to_show); */ } #if !CONFIG_REALTIME_ONLY static void Pass2Encode(VP8_COMP *cpi, size_t *size, unsigned char *dest, unsigned char *dest_end, unsigned int *frame_flags) { if (!cpi->common.refresh_alt_ref_frame) vp8_second_pass(cpi); encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags); cpi->twopass.bits_left -= 8 * (int)(*size); if (!cpi->common.refresh_alt_ref_frame) { double two_pass_min_rate = (double)(cpi->oxcf.target_bandwidth * cpi->oxcf.two_pass_vbrmin_section / 100); cpi->twopass.bits_left += (int64_t)(two_pass_min_rate / cpi->framerate); } } #endif int vp8_receive_raw_frame(VP8_COMP *cpi, unsigned int frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { struct vpx_usec_timer timer; int res = 0; vpx_usec_timer_start(&timer); /* Reinit the lookahead buffer if the frame size changes */ if (sd->y_width != cpi->oxcf.Width || sd->y_height != cpi->oxcf.Height) { assert(cpi->oxcf.lag_in_frames < 2); dealloc_raw_frame_buffers(cpi); alloc_raw_frame_buffers(cpi); } if (vp8_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, frame_flags, cpi->active_map_enabled ? cpi->active_map : NULL)) { res = -1; } vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); return res; } static int frame_is_reference(const VP8_COMP *cpi) { const VP8_COMMON *cm = &cpi->common; const MACROBLOCKD *xd = &cpi->mb.e_mbd; return cm->frame_type == KEY_FRAME || cm->refresh_last_frame || cm->refresh_golden_frame || cm->refresh_alt_ref_frame || cm->copy_buffer_to_gf || cm->copy_buffer_to_arf || cm->refresh_entropy_probs || xd->mode_ref_lf_delta_update || xd->update_mb_segmentation_map || xd->update_mb_segmentation_data; } int vp8_get_compressed_data(VP8_COMP *cpi, unsigned int *frame_flags, size_t *size, unsigned char *dest, unsigned char *dest_end, int64_t *time_stamp, int64_t *time_end, int flush) { VP8_COMMON *cm; struct vpx_usec_timer tsctimer; struct vpx_usec_timer ticktimer; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; if (!cpi) return -1; cm = &cpi->common; vpx_usec_timer_start(&cmptimer); cpi->source = NULL; #if !CONFIG_REALTIME_ONLY /* Should we code an alternate reference frame */ if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.play_alternate && cpi->source_alt_ref_pending) { if ((cpi->source = vp8_lookahead_peek( cpi->lookahead, cpi->frames_till_gf_update_due, PEEK_FORWARD))) { cpi->alt_ref_source = cpi->source; if (cpi->oxcf.arnr_max_frames > 0) { vp8_temporal_filter_prepare_c(cpi, cpi->frames_till_gf_update_due); force_src_buffer = &cpi->alt_ref_buffer; } cpi->frames_till_alt_ref_frame = cpi->frames_till_gf_update_due; cm->refresh_alt_ref_frame = 1; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; cm->show_frame = 0; /* Clear Pending alt Ref flag. */ cpi->source_alt_ref_pending = 0; cpi->is_src_frame_alt_ref = 0; } } #endif if (!cpi->source) { /* Read last frame source if we are encoding first pass. */ if (cpi->pass == 1 && cm->current_video_frame > 0) { if ((cpi->last_source = vp8_lookahead_peek(cpi->lookahead, 1, PEEK_BACKWARD)) == NULL) { return -1; } } if ((cpi->source = vp8_lookahead_pop(cpi->lookahead, flush))) { cm->show_frame = 1; cpi->is_src_frame_alt_ref = cpi->alt_ref_source && (cpi->source == cpi->alt_ref_source); if (cpi->is_src_frame_alt_ref) cpi->alt_ref_source = NULL; } } if (cpi->source) { cpi->Source = force_src_buffer ? force_src_buffer : &cpi->source->img; cpi->un_scaled_source = cpi->Source; *time_stamp = cpi->source->ts_start; *time_end = cpi->source->ts_end; *frame_flags = cpi->source->flags; if (cpi->pass == 1 && cm->current_video_frame > 0) { cpi->last_frame_unscaled_source = &cpi->last_source->img; } } else { *size = 0; #if !CONFIG_REALTIME_ONLY if (flush && cpi->pass == 1 && !cpi->twopass.first_pass_done) { vp8_end_first_pass(cpi); /* get last stats packet */ cpi->twopass.first_pass_done = 1; } #endif return -1; } if (cpi->source->ts_start < cpi->first_time_stamp_ever) { cpi->first_time_stamp_ever = cpi->source->ts_start; cpi->last_end_time_stamp_seen = cpi->source->ts_start; } /* adjust frame rates based on timestamps given */ if (cm->show_frame) { int64_t this_duration; int step = 0; if (cpi->source->ts_start == cpi->first_time_stamp_ever) { this_duration = cpi->source->ts_end - cpi->source->ts_start; step = 1; } else { int64_t last_duration; this_duration = cpi->source->ts_end - cpi->last_end_time_stamp_seen; last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen; /* do a step update if the duration changes by 10% */ if (last_duration) { step = (int)(((this_duration - last_duration) * 10 / last_duration)); } } if (this_duration) { if (step) { cpi->ref_framerate = 10000000.0 / this_duration; } else { double avg_duration, interval; /* Average this frame's rate into the last second's average * frame rate. If we haven't seen 1 second yet, then average * over the whole interval seen. */ interval = (double)(cpi->source->ts_end - cpi->first_time_stamp_ever); if (interval > 10000000.0) interval = 10000000; avg_duration = 10000000.0 / cpi->ref_framerate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; cpi->ref_framerate = 10000000.0 / avg_duration; } #if CONFIG_MULTI_RES_ENCODING if (cpi->oxcf.mr_total_resolutions > 1) { LOWER_RES_FRAME_INFO *low_res_frame_info = (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; // Frame rate should be the same for all spatial layers in // multi-res-encoding (simulcast), so we constrain the frame for // higher layers to be that of lowest resolution. This is needed // as he application may decide to skip encoding a high layer and // then start again, in which case a big jump in time-stamps will // be received for that high layer, which will yield an incorrect // frame rate (from time-stamp adjustment in above calculation). if (cpi->oxcf.mr_encoder_id) { if (!low_res_frame_info->skip_encoding_base_stream) cpi->ref_framerate = low_res_frame_info->low_res_framerate; } else { // Keep track of frame rate for lowest resolution. low_res_frame_info->low_res_framerate = cpi->ref_framerate; // The base stream is being encoded so set skip flag to 0. low_res_frame_info->skip_encoding_base_stream = 0; } } #endif if (cpi->oxcf.number_of_layers > 1) { unsigned int i; /* Update frame rates for each layer */ assert(cpi->oxcf.number_of_layers <= VPX_TS_MAX_LAYERS); for (i = 0; i < cpi->oxcf.number_of_layers && i < VPX_TS_MAX_LAYERS; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->framerate = cpi->ref_framerate / cpi->oxcf.rate_decimator[i]; } } else { vp8_new_framerate(cpi, cpi->ref_framerate); } } cpi->last_time_stamp_seen = cpi->source->ts_start; cpi->last_end_time_stamp_seen = cpi->source->ts_end; } if (cpi->oxcf.number_of_layers > 1) { int layer; update_layer_contexts(cpi); /* Restore layer specific context & set frame rate */ if (cpi->temporal_layer_id >= 0) { layer = cpi->temporal_layer_id; } else { layer = cpi->oxcf .layer_id[cpi->temporal_pattern_counter % cpi->oxcf.periodicity]; } restore_layer_context(cpi, layer); vp8_new_framerate(cpi, cpi->layer_context[layer].framerate); } if (cpi->compressor_speed == 2) { vpx_usec_timer_start(&tsctimer); vpx_usec_timer_start(&ticktimer); } cpi->lf_zeromv_pct = (cpi->zeromv_count * 100) / cm->MBs; #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING { int i; const int num_part = (1 << cm->multi_token_partition); /* the available bytes in dest */ const unsigned long dest_size = dest_end - dest; const int tok_part_buff_size = (dest_size * 9) / (10 * num_part); unsigned char *dp = dest; cpi->partition_d[0] = dp; dp += dest_size / 10; /* reserve 1/10 for control partition */ cpi->partition_d_end[0] = dp; for (i = 0; i < num_part; ++i) { cpi->partition_d[i + 1] = dp; dp += tok_part_buff_size; cpi->partition_d_end[i + 1] = dp; } } #endif /* start with a 0 size frame */ *size = 0; /* Clear down mmx registers */ vpx_clear_system_state(); cm->frame_type = INTER_FRAME; cm->frame_flags = *frame_flags; #if 0 if (cm->refresh_alt_ref_frame) { cm->refresh_golden_frame = 0; cm->refresh_last_frame = 0; } else { cm->refresh_golden_frame = 0; cm->refresh_last_frame = 1; } #endif /* find a free buffer for the new frame */ { int i = 0; for (; i < NUM_YV12_BUFFERS; ++i) { if (!cm->yv12_fb[i].flags) { cm->new_fb_idx = i; break; } } assert(i < NUM_YV12_BUFFERS); } switch (cpi->pass) { #if !CONFIG_REALTIME_ONLY case 1: Pass1Encode(cpi); break; case 2: Pass2Encode(cpi, size, dest, dest_end, frame_flags); break; #endif // !CONFIG_REALTIME_ONLY default: encode_frame_to_data_rate(cpi, size, dest, dest_end, frame_flags); break; } if (cpi->compressor_speed == 2) { unsigned int duration, duration2; vpx_usec_timer_mark(&tsctimer); vpx_usec_timer_mark(&ticktimer); duration = (int)(vpx_usec_timer_elapsed(&ticktimer)); duration2 = (unsigned int)((double)duration / 2); if (cm->frame_type != KEY_FRAME) { if (cpi->avg_encode_time == 0) { cpi->avg_encode_time = duration; } else { cpi->avg_encode_time = (7 * cpi->avg_encode_time + duration) >> 3; } } if (duration2) { { if (cpi->avg_pick_mode_time == 0) { cpi->avg_pick_mode_time = duration2; } else { cpi->avg_pick_mode_time = (7 * cpi->avg_pick_mode_time + duration2) >> 3; } } } } if (cm->refresh_entropy_probs == 0) { memcpy(&cm->fc, &cm->lfc, sizeof(cm->fc)); } /* Save the contexts separately for alt ref, gold and last. */ /* (TODO jbb -> Optimize this with pointers to avoid extra copies. ) */ if (cm->refresh_alt_ref_frame) memcpy(&cpi->lfc_a, &cm->fc, sizeof(cm->fc)); if (cm->refresh_golden_frame) memcpy(&cpi->lfc_g, &cm->fc, sizeof(cm->fc)); if (cm->refresh_last_frame) memcpy(&cpi->lfc_n, &cm->fc, sizeof(cm->fc)); /* if its a dropped frame honor the requests on subsequent frames */ if (*size > 0) { cpi->droppable = !frame_is_reference(cpi); /* return to normal state */ cm->refresh_entropy_probs = 1; cm->refresh_alt_ref_frame = 0; cm->refresh_golden_frame = 0; cm->refresh_last_frame = 1; cm->frame_type = INTER_FRAME; } /* Save layer specific state */ if (cpi->oxcf.number_of_layers > 1) save_layer_context(cpi); vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); if (cpi->b_calculate_psnr && cpi->pass != 1 && cm->show_frame) { generate_psnr_packet(cpi); } #if CONFIG_INTERNAL_STATS if (cpi->pass != 1) { cpi->bytes += *size; if (cm->show_frame) { cpi->common.show_frame_mi = cpi->common.mi; cpi->count++; if (cpi->b_calculate_psnr) { uint64_t ye, ue, ve; double frame_psnr; YV12_BUFFER_CONFIG *orig = cpi->Source; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; unsigned int y_width = cpi->common.Width; unsigned int y_height = cpi->common.Height; unsigned int uv_width = (y_width + 1) / 2; unsigned int uv_height = (y_height + 1) / 2; int y_samples = y_height * y_width; int uv_samples = uv_height * uv_width; int t_samples = y_samples + 2 * uv_samples; double sq_error; ye = calc_plane_error(orig->y_buffer, orig->y_stride, recon->y_buffer, recon->y_stride, y_width, y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, recon->u_buffer, recon->uv_stride, uv_width, uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, recon->v_buffer, recon->uv_stride, uv_width, uv_height); sq_error = (double)(ye + ue + ve); frame_psnr = vpx_sse_to_psnr(t_samples, 255.0, sq_error); cpi->total_y += vpx_sse_to_psnr(y_samples, 255.0, (double)ye); cpi->total_u += vpx_sse_to_psnr(uv_samples, 255.0, (double)ue); cpi->total_v += vpx_sse_to_psnr(uv_samples, 255.0, (double)ve); cpi->total_sq_error += sq_error; cpi->total += frame_psnr; #if CONFIG_POSTPROC { YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; double sq_error2; double frame_psnr2, frame_ssim2 = 0; double weight = 0; vp8_deblock(cm, cm->frame_to_show, &cm->post_proc_buffer, cm->filter_level * 10 / 6); vpx_clear_system_state(); ye = calc_plane_error(orig->y_buffer, orig->y_stride, pp->y_buffer, pp->y_stride, y_width, y_height); ue = calc_plane_error(orig->u_buffer, orig->uv_stride, pp->u_buffer, pp->uv_stride, uv_width, uv_height); ve = calc_plane_error(orig->v_buffer, orig->uv_stride, pp->v_buffer, pp->uv_stride, uv_width, uv_height); sq_error2 = (double)(ye + ue + ve); frame_psnr2 = vpx_sse_to_psnr(t_samples, 255.0, sq_error2); cpi->totalp_y += vpx_sse_to_psnr(y_samples, 255.0, (double)ye); cpi->totalp_u += vpx_sse_to_psnr(uv_samples, 255.0, (double)ue); cpi->totalp_v += vpx_sse_to_psnr(uv_samples, 255.0, (double)ve); cpi->total_sq_error2 += sq_error2; cpi->totalp += frame_psnr2; frame_ssim2 = vpx_calc_ssim(cpi->Source, &cm->post_proc_buffer, &weight); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; if (cpi->oxcf.number_of_layers > 1) { unsigned int i; for (i = cpi->current_layer; i < cpi->oxcf.number_of_layers; ++i) { cpi->frames_in_layer[i]++; cpi->bytes_in_layer[i] += *size; cpi->sum_psnr[i] += frame_psnr; cpi->sum_psnr_p[i] += frame_psnr2; cpi->total_error2[i] += sq_error; cpi->total_error2_p[i] += sq_error2; cpi->sum_ssim[i] += frame_ssim2 * weight; cpi->sum_weights[i] += weight; } } } #endif } } } #if 0 if (cpi->common.frame_type != 0 && cpi->common.base_qindex == cpi->oxcf.worst_allowed_q) { skiptruecount += cpi->skip_true_count; skipfalsecount += cpi->skip_false_count; } #endif #if 0 if (cpi->pass != 1) { FILE *f = fopen("skip.stt", "a"); fprintf(f, "frame:%4d flags:%4x Q:%4d P:%4d Size:%5d\n", cpi->common.current_video_frame, *frame_flags, cpi->common.base_qindex, cpi->prob_skip_false, *size); if (cpi->is_src_frame_alt_ref == 1) fprintf(f, "skipcount: %4d framesize: %d\n", cpi->skip_true_count , *size); fclose(f); } #endif #endif cpi->common.error.setjmp = 0; #if CONFIG_MULTITHREAD /* wait for the lpf thread done */ if (vpx_atomic_load_acquire(&cpi->b_multi_threaded) && cpi->b_lpf_running) { sem_wait(&cpi->h_event_end_lpf); cpi->b_lpf_running = 0; } #endif return 0; } int vp8_get_preview_raw_frame(VP8_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp8_ppflags_t *flags) { if (cpi->common.refresh_alt_ref_frame) { return -1; } else { int ret; #if CONFIG_POSTPROC cpi->common.show_frame_mi = cpi->common.mi; ret = vp8_post_proc_frame(&cpi->common, dest, flags); #else (void)flags; if (cpi->common.frame_to_show) { *dest = *cpi->common.frame_to_show; dest->y_width = cpi->common.Width; dest->y_height = cpi->common.Height; dest->uv_height = cpi->common.Height / 2; ret = 0; } else { ret = -1; } #endif vpx_clear_system_state(); return ret; } } int vp8_set_roimap(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[4], int delta_lf[4], unsigned int threshold[4]) { signed char feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; int internal_delta_q[MAX_MB_SEGMENTS]; const int range = 63; int i; // Check number of rows and columns match if (cpi->common.mb_rows != (int)rows || cpi->common.mb_cols != (int)cols) { return -1; } // Range check the delta Q values and convert the external Q range values // to internal ones. if ((abs(delta_q[0]) > range) || (abs(delta_q[1]) > range) || (abs(delta_q[2]) > range) || (abs(delta_q[3]) > range)) { return -1; } // Range check the delta lf values if ((abs(delta_lf[0]) > range) || (abs(delta_lf[1]) > range) || (abs(delta_lf[2]) > range) || (abs(delta_lf[3]) > range)) { return -1; } // Also disable segmentation if no deltas are specified. if (!map || (delta_q[0] == 0 && delta_q[1] == 0 && delta_q[2] == 0 && delta_q[3] == 0 && delta_lf[0] == 0 && delta_lf[1] == 0 && delta_lf[2] == 0 && delta_lf[3] == 0 && threshold[0] == 0 && threshold[1] == 0 && threshold[2] == 0 && threshold[3] == 0)) { disable_segmentation(cpi); return 0; } // Translate the external delta q values to internal values. for (i = 0; i < MAX_MB_SEGMENTS; ++i) { internal_delta_q[i] = (delta_q[i] >= 0) ? q_trans[delta_q[i]] : -q_trans[-delta_q[i]]; } /* Set the segmentation Map */ set_segmentation_map(cpi, map); /* Activate segmentation. */ enable_segmentation(cpi); /* Set up the quant segment data */ feature_data[MB_LVL_ALT_Q][0] = internal_delta_q[0]; feature_data[MB_LVL_ALT_Q][1] = internal_delta_q[1]; feature_data[MB_LVL_ALT_Q][2] = internal_delta_q[2]; feature_data[MB_LVL_ALT_Q][3] = internal_delta_q[3]; /* Set up the loop segment data s */ feature_data[MB_LVL_ALT_LF][0] = delta_lf[0]; feature_data[MB_LVL_ALT_LF][1] = delta_lf[1]; feature_data[MB_LVL_ALT_LF][2] = delta_lf[2]; feature_data[MB_LVL_ALT_LF][3] = delta_lf[3]; cpi->segment_encode_breakout[0] = threshold[0]; cpi->segment_encode_breakout[1] = threshold[1]; cpi->segment_encode_breakout[2] = threshold[2]; cpi->segment_encode_breakout[3] = threshold[3]; /* Initialise the feature data structure */ set_segment_data(cpi, &feature_data[0][0], SEGMENT_DELTADATA); if (threshold[0] != 0 || threshold[1] != 0 || threshold[2] != 0 || threshold[3] != 0) cpi->use_roi_static_threshold = 1; cpi->cyclic_refresh_mode_enabled = 0; return 0; } int vp8_set_active_map(VP8_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols) { if ((int)rows == cpi->common.mb_rows && (int)cols == cpi->common.mb_cols) { if (map) { memcpy(cpi->active_map, map, rows * cols); cpi->active_map_enabled = 1; } else { cpi->active_map_enabled = 0; } return 0; } else { return -1; } } int vp8_set_internal_size(VP8_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { if (horiz_mode <= ONETWO) { cpi->common.horiz_scale = horiz_mode; } else { return -1; } if (vert_mode <= ONETWO) { cpi->common.vert_scale = vert_mode; } else { return -1; } return 0; } int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; int Total = 0; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; /* Loop through the Y plane raw and reconstruction data summing * (square differences) */ for (i = 0; i < source->y_height; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); } src += 16 * source->y_stride; dst += 16 * dest->y_stride; } return Total; } int vp8_get_quantizer(VP8_COMP *cpi) { return cpi->common.base_qindex; } libvpx-1.8.2/vp8/encoder/onyx_int.h000066400000000000000000000461731357355204000172210ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_ONYX_INT_H_ #define VPX_VP8_ENCODER_ONYX_INT_H_ #include #include "vpx_config.h" #include "vp8/common/onyx.h" #include "treewriter.h" #include "tokenize.h" #include "vp8/common/onyxc_int.h" #include "vpx_dsp/variance.h" #include "encodemb.h" #include "vp8/encoder/quantize.h" #include "vp8/common/entropy.h" #include "vp8/common/threading.h" #include "vpx_ports/mem.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8.h" #include "mcomp.h" #include "vp8/common/findnearmv.h" #include "lookahead.h" #if CONFIG_TEMPORAL_DENOISING #include "vp8/encoder/denoising.h" #endif #ifdef __cplusplus extern "C" { #endif #define MIN_GF_INTERVAL 4 #define DEFAULT_GF_INTERVAL 7 #define KEY_FRAME_CONTEXT 5 #define MAX_LAG_BUFFERS (CONFIG_REALTIME_ONLY ? 1 : 25) #define AF_THRESH 25 #define AF_THRESH2 100 #define ARF_DECAY_THRESH 12 #define MIN_THRESHMULT 32 #define MAX_THRESHMULT 512 #define GF_ZEROMV_ZBIN_BOOST 12 #define LF_ZEROMV_ZBIN_BOOST 6 #define MV_ZBIN_BOOST 4 #define ZBIN_OQ_MAX 192 #define VP8_TEMPORAL_ALT_REF !CONFIG_REALTIME_ONLY /* vp8 uses 10,000,000 ticks/second as time stamp */ #define TICKS_PER_SEC 10000000 typedef struct { int kf_indicated; unsigned int frames_since_key; unsigned int frames_since_golden; int filter_level; int frames_till_gf_update_due; int recent_ref_frame_usage[MAX_REF_FRAMES]; MV_CONTEXT mvc[2]; int mvcosts[2][MVvals + 1]; #ifdef MODE_STATS int y_modes[5]; int uv_modes[4]; int b_modes[10]; int inter_y_modes[10]; int inter_uv_modes[4]; int inter_b_modes[10]; #endif vp8_prob ymode_prob[4], uv_mode_prob[3]; /* interframe intra mode probs */ vp8_prob kf_ymode_prob[4], kf_uv_mode_prob[3]; /* keyframe "" */ int ymode_count[5], uv_mode_count[4]; /* intra MB type cts this frame */ int count_mb_ref_frame_usage[MAX_REF_FRAMES]; int this_frame_percent_intra; int last_frame_percent_intra; } CODING_CONTEXT; typedef struct { double frame; double intra_error; double coded_error; double ssim_weighted_pred_err; double pcnt_inter; double pcnt_motion; double pcnt_second_ref; double pcnt_neutral; double MVr; double mvr_abs; double MVc; double mvc_abs; double MVrv; double MVcv; double mv_in_out_count; double new_mv_count; double duration; double count; } FIRSTPASS_STATS; typedef struct { int frames_so_far; double frame_intra_error; double frame_coded_error; double frame_pcnt_inter; double frame_pcnt_motion; double frame_mvr; double frame_mvr_abs; double frame_mvc; double frame_mvc_abs; } ONEPASS_FRAMESTATS; typedef enum { THR_ZERO1 = 0, THR_DC = 1, THR_NEAREST1 = 2, THR_NEAR1 = 3, THR_ZERO2 = 4, THR_NEAREST2 = 5, THR_ZERO3 = 6, THR_NEAREST3 = 7, THR_NEAR2 = 8, THR_NEAR3 = 9, THR_V_PRED = 10, THR_H_PRED = 11, THR_TM = 12, THR_NEW1 = 13, THR_NEW2 = 14, THR_NEW3 = 15, THR_SPLIT1 = 16, THR_SPLIT2 = 17, THR_SPLIT3 = 18, THR_B_PRED = 19 } THR_MODES; typedef enum { DIAMOND = 0, NSTEP = 1, HEX = 2 } SEARCH_METHODS; typedef struct { int RD; SEARCH_METHODS search_method; int improved_quant; int improved_dct; int auto_filter; int recode_loop; int iterative_sub_pixel; int half_pixel_search; int quarter_pixel_search; int thresh_mult[MAX_MODES]; int max_step_search_steps; int first_step; int optimize_coefficients; int use_fastquant_for_pick; int no_skip_block4x4_search; int improved_mv_pred; } SPEED_FEATURES; typedef struct { MACROBLOCK mb; int segment_counts[MAX_MB_SEGMENTS]; int totalrate; } MB_ROW_COMP; typedef struct { TOKENEXTRA *start; TOKENEXTRA *stop; } TOKENLIST; typedef struct { int ithread; void *ptr1; void *ptr2; } ENCODETHREAD_DATA; typedef struct { int ithread; void *ptr1; } LPFTHREAD_DATA; enum { BLOCK_16X8, BLOCK_8X16, BLOCK_8X8, BLOCK_4X4, BLOCK_16X16, BLOCK_MAX_SEGMENTS }; typedef struct { /* Layer configuration */ double framerate; int target_bandwidth; /* Layer specific coding parameters */ int64_t starting_buffer_level; int64_t optimal_buffer_level; int64_t maximum_buffer_size; int64_t starting_buffer_level_in_ms; int64_t optimal_buffer_level_in_ms; int64_t maximum_buffer_size_in_ms; int avg_frame_size_for_layer; int64_t buffer_level; int64_t bits_off_target; int64_t total_actual_bits; int total_target_vs_actual; int worst_quality; int active_worst_quality; int best_quality; int active_best_quality; int ni_av_qi; int ni_tot_qi; int ni_frames; int avg_frame_qindex; double rate_correction_factor; double key_frame_rate_correction_factor; double gf_rate_correction_factor; int zbin_over_quant; int inter_frame_target; int64_t total_byte_count; int filter_level; int frames_since_last_drop_overshoot; int force_maxqp; int last_frame_percent_intra; int count_mb_ref_frame_usage[MAX_REF_FRAMES]; int last_q[2]; } LAYER_CONTEXT; typedef struct VP8_COMP { DECLARE_ALIGNED(16, short, Y1quant[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1quant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1round[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2quant[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2quant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2zbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2round[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVquant[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVquant_shift[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVzbin[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVround[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y1[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_y2[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, zrun_zbin_boost_uv[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y1quant_fast[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, Y2quant_fast[QINDEX_RANGE][16]); DECLARE_ALIGNED(16, short, UVquant_fast[QINDEX_RANGE][16]); MACROBLOCK mb; VP8_COMMON common; vp8_writer bc[9]; /* one boolcoder for each partition */ VP8_CONFIG oxcf; struct lookahead_ctx *lookahead; struct lookahead_entry *source; struct lookahead_entry *alt_ref_source; struct lookahead_entry *last_source; YV12_BUFFER_CONFIG *Source; YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; YV12_BUFFER_CONFIG *last_frame_unscaled_source; unsigned int frames_till_alt_ref_frame; /* frame in src_buffers has been identified to be encoded as an alt ref */ int source_alt_ref_pending; /* an alt ref frame has been encoded and is usable */ int source_alt_ref_active; /* source of frame to encode is an exact copy of an alt ref frame */ int is_src_frame_alt_ref; /* golden frame same as last frame ( short circuit gold searches) */ int gold_is_last; /* Alt reference frame same as last ( short circuit altref search) */ int alt_is_last; /* don't do both alt and gold search ( just do gold). */ int gold_is_alt; YV12_BUFFER_CONFIG pick_lf_lvl_frame; TOKENEXTRA *tok; unsigned int tok_count; unsigned int frames_since_key; unsigned int key_frame_frequency; unsigned int this_key_frame_forced; unsigned int next_key_frame_forced; /* Ambient reconstruction err target for force key frames */ int ambient_err; unsigned int mode_check_freq[MAX_MODES]; int rd_baseline_thresh[MAX_MODES]; int RDMULT; int RDDIV; CODING_CONTEXT coding_context; /* Rate targetting variables */ int64_t last_prediction_error; int64_t last_intra_error; int this_frame_target; int projected_frame_size; int last_q[2]; /* Separate values for Intra/Inter */ double rate_correction_factor; double key_frame_rate_correction_factor; double gf_rate_correction_factor; int frames_since_golden; /* Count down till next GF */ int frames_till_gf_update_due; /* GF interval chosen when we coded the last GF */ int current_gf_interval; /* Total bits overspent becasue of GF boost (cumulative) */ int gf_overspend_bits; /* Used in the few frames following a GF to recover the extra bits * spent in that GF */ int non_gf_bitrate_adjustment; /* Extra bits spent on key frames that need to be recovered */ int kf_overspend_bits; /* Current number of bit s to try and recover on each inter frame. */ int kf_bitrate_adjustment; int max_gf_interval; int baseline_gf_interval; int active_arnr_frames; int64_t key_frame_count; int prior_key_frame_distance[KEY_FRAME_CONTEXT]; /* Current section per frame bandwidth target */ int per_frame_bandwidth; /* Average frame size target for clip */ int av_per_frame_bandwidth; /* Minimum allocation that should be used for any frame */ int min_frame_bandwidth; int inter_frame_target; double output_framerate; int64_t last_time_stamp_seen; int64_t last_end_time_stamp_seen; int64_t first_time_stamp_ever; int ni_av_qi; int ni_tot_qi; int ni_frames; int avg_frame_qindex; int64_t total_byte_count; int buffered_mode; double framerate; double ref_framerate; int64_t buffer_level; int64_t bits_off_target; int rolling_target_bits; int rolling_actual_bits; int long_rolling_target_bits; int long_rolling_actual_bits; int64_t total_actual_bits; int total_target_vs_actual; /* debug stats */ int worst_quality; int active_worst_quality; int best_quality; int active_best_quality; int cq_target_quality; int drop_frames_allowed; /* Are we permitted to drop frames? */ int drop_frame; /* Drop this frame? */ #if defined(DROP_UNCODED_FRAMES) int drop_frame_count; #endif vp8_prob frame_coef_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [ENTROPY_NODES]; char update_probs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][ENTROPY_NODES]; unsigned int frame_branch_ct[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [ENTROPY_NODES][2]; int gfu_boost; int kf_boost; int last_boost; int target_bandwidth; struct vpx_codec_pkt_list *output_pkt_list; #if 0 /* Experimental code for lagged and one pass */ ONEPASS_FRAMESTATS one_pass_frame_stats[MAX_LAG_BUFFERS]; int one_pass_frame_index; #endif int decimation_factor; int decimation_count; /* for real time encoding */ int avg_encode_time; /* microsecond */ int avg_pick_mode_time; /* microsecond */ int Speed; int compressor_speed; int auto_gold; int auto_adjust_gold_quantizer; int auto_worst_q; int cpu_used; int pass; int prob_intra_coded; int prob_last_coded; int prob_gf_coded; int prob_skip_false; int last_skip_false_probs[3]; int last_skip_probs_q[3]; int recent_ref_frame_usage[MAX_REF_FRAMES]; int this_frame_percent_intra; int last_frame_percent_intra; int ref_frame_flags; SPEED_FEATURES sf; /* Count ZEROMV on all reference frames. */ int zeromv_count; int lf_zeromv_pct; unsigned char *skin_map; unsigned char *segmentation_map; signed char segment_feature_data[MB_LVL_MAX][MAX_MB_SEGMENTS]; int segment_encode_breakout[MAX_MB_SEGMENTS]; unsigned char *active_map; unsigned int active_map_enabled; /* Video conferencing cyclic refresh mode flags. This is a mode * designed to clean up the background over time in live encoding * scenarious. It uses segmentation. */ int cyclic_refresh_mode_enabled; int cyclic_refresh_mode_max_mbs_perframe; int cyclic_refresh_mode_index; int cyclic_refresh_q; signed char *cyclic_refresh_map; // Count on how many (consecutive) times a macroblock uses ZER0MV_LAST. unsigned char *consec_zero_last; // Counter that is reset when a block is checked for a mode-bias against // ZEROMV_LASTREF. unsigned char *consec_zero_last_mvbias; // Frame counter for the temporal pattern. Counter is rest when the temporal // layers are changed dynamically (run-time change). unsigned int temporal_pattern_counter; // Temporal layer id. int temporal_layer_id; // Measure of average squared difference between source and denoised signal. int mse_source_denoised; int force_maxqp; int frames_since_last_drop_overshoot; int last_pred_err_mb; // GF update for 1 pass cbr. int gf_update_onepass_cbr; int gf_interval_onepass_cbr; int gf_noboost_onepass_cbr; #if CONFIG_MULTITHREAD /* multithread data */ vpx_atomic_int *mt_current_mb_col; int mt_sync_range; vpx_atomic_int b_multi_threaded; int encoding_thread_count; int b_lpf_running; pthread_t *h_encoding_thread; pthread_t h_filter_thread; MB_ROW_COMP *mb_row_ei; ENCODETHREAD_DATA *en_thread_data; LPFTHREAD_DATA lpf_thread_data; /* events */ sem_t *h_event_start_encoding; sem_t *h_event_end_encoding; sem_t h_event_start_lpf; sem_t h_event_end_lpf; #endif TOKENLIST *tplist; unsigned int partition_sz[MAX_PARTITIONS]; unsigned char *partition_d[MAX_PARTITIONS]; unsigned char *partition_d_end[MAX_PARTITIONS]; fractional_mv_step_fp *find_fractional_mv_step; vp8_full_search_fn_t full_search_sad; vp8_refining_search_fn_t refining_search_sad; vp8_diamond_search_fn_t diamond_search_sad; vp8_variance_fn_ptr_t fn_ptr[BLOCK_MAX_SEGMENTS]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; uint64_t time_encode_mb_row; int base_skip_false_prob[128]; FRAME_CONTEXT lfc_n; /* last frame entropy */ FRAME_CONTEXT lfc_a; /* last alt ref entropy */ FRAME_CONTEXT lfc_g; /* last gold ref entropy */ struct twopass_rc { unsigned int section_intra_rating; double section_max_qfactor; unsigned int next_iiratio; unsigned int this_iiratio; FIRSTPASS_STATS total_stats; FIRSTPASS_STATS this_frame_stats; FIRSTPASS_STATS *stats_in, *stats_in_end, *stats_in_start; FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; int64_t clip_bits_total; double avg_iiratio; double modified_error_total; double modified_error_used; double modified_error_left; double kf_intra_err_min; double gf_intra_err_min; int frames_to_key; int maxq_max_limit; int maxq_min_limit; int gf_decay_rate; int static_scene_max_gf_interval; int kf_bits; /* Remaining error from uncoded frames in a gf group. */ int gf_group_error_left; /* Projected total bits available for a key frame group of frames */ int64_t kf_group_bits; /* Error score of frames still to be coded in kf group */ int64_t kf_group_error_left; /* Projected Bits available for a group including 1 GF or ARF */ int64_t gf_group_bits; /* Bits for the golden frame or ARF */ int gf_bits; int alt_extra_bits; double est_max_qcorrection_factor; } twopass; #if VP8_TEMPORAL_ALT_REF YV12_BUFFER_CONFIG alt_ref_buffer; YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; int fixed_divide[512]; #endif #if CONFIG_INTERNAL_STATS int count; double total_y; double total_u; double total_v; double total; double total_sq_error; double totalp_y; double totalp_u; double totalp_v; double totalp; double total_sq_error2; int bytes; double summed_quality; double summed_weights; unsigned int tot_recode_hits; int b_calculate_ssimg; #endif int b_calculate_psnr; /* Per MB activity measurement */ unsigned int activity_avg; unsigned int *mb_activity_map; /* Record of which MBs still refer to last golden frame either * directly or through 0,0 */ unsigned char *gf_active_flags; int gf_active_count; int output_partition; /* Store last frame's MV info for next frame MV prediction */ int_mv *lfmv; int *lf_ref_frame_sign_bias; int *lf_ref_frame; /* force next frame to intra when kf_auto says so */ int force_next_frame_intra; int droppable; int initial_width; int initial_height; #if CONFIG_TEMPORAL_DENOISING VP8_DENOISER denoiser; #endif /* Coding layer state variables */ unsigned int current_layer; LAYER_CONTEXT layer_context[VPX_TS_MAX_LAYERS]; int64_t frames_in_layer[VPX_TS_MAX_LAYERS]; int64_t bytes_in_layer[VPX_TS_MAX_LAYERS]; double sum_psnr[VPX_TS_MAX_LAYERS]; double sum_psnr_p[VPX_TS_MAX_LAYERS]; double total_error2[VPX_TS_MAX_LAYERS]; double total_error2_p[VPX_TS_MAX_LAYERS]; double sum_ssim[VPX_TS_MAX_LAYERS]; double sum_weights[VPX_TS_MAX_LAYERS]; double total_ssimg_y_in_layer[VPX_TS_MAX_LAYERS]; double total_ssimg_u_in_layer[VPX_TS_MAX_LAYERS]; double total_ssimg_v_in_layer[VPX_TS_MAX_LAYERS]; double total_ssimg_all_in_layer[VPX_TS_MAX_LAYERS]; #if CONFIG_MULTI_RES_ENCODING /* Number of MBs per row at lower-resolution level */ int mr_low_res_mb_cols; /* Indicate if lower-res mv info is available */ unsigned char mr_low_res_mv_avail; #endif /* The frame number of each reference frames */ unsigned int current_ref_frames[MAX_REF_FRAMES]; // Closest reference frame to current frame. MV_REFERENCE_FRAME closest_reference_frame; struct rd_costs_struct { int mvcosts[2][MVvals + 1]; int mvsadcosts[2][MVfpvals + 1]; int mbmode_cost[2][MB_MODE_COUNT]; int intra_uv_mode_cost[2][MB_MODE_COUNT]; int bmode_costs[10][10][10]; int inter_bmode_costs[B_MODE_COUNT]; int token_costs[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [MAX_ENTROPY_TOKENS]; } rd_costs; // Use the static threshold from ROI settings. int use_roi_static_threshold; int ext_refresh_frame_flags_pending; } VP8_COMP; void vp8_initialize_enc(void); void vp8_alloc_compressor_data(VP8_COMP *cpi); int vp8_reverse_trans(int x); void vp8_new_framerate(VP8_COMP *cpi, double framerate); void vp8_loopfilter_frame(VP8_COMP *cpi, VP8_COMMON *cm); void vp8_pack_bitstream(VP8_COMP *cpi, unsigned char *dest, unsigned char *dest_end, size_t *size); void vp8_tokenize_mb(VP8_COMP *, MACROBLOCK *, TOKENEXTRA **); void vp8_set_speed_features(VP8_COMP *cpi); #if CONFIG_DEBUG #define CHECK_MEM_ERROR(lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ } while (0) #else #define CHECK_MEM_ERROR(lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_ONYX_INT_H_ libvpx-1.8.2/vp8/encoder/pickinter.c000066400000000000000000001337201357355204000173300ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" #include "vp8/common/common.h" #include "vp8/common/entropymode.h" #include "pickinter.h" #include "vp8/common/findnearmv.h" #include "encodemb.h" #include "vp8/common/reconinter.h" #include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vpx_dsp/variance.h" #include "mcomp.h" #include "vp8/common/vp8_skin_detection.h" #include "rdopt.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_TEMPORAL_DENOISING #include "denoising.h" #endif #ifdef SPEEDSTATS extern unsigned int cnt_pm; #endif extern const int vp8_ref_frame_order[MAX_MODES]; extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; static int macroblock_corner_grad(unsigned char *signal, int stride, int offsetx, int offsety, int sgnx, int sgny) { int y1 = signal[offsetx * stride + offsety]; int y2 = signal[offsetx * stride + offsety + sgny]; int y3 = signal[(offsetx + sgnx) * stride + offsety]; int y4 = signal[(offsetx + sgnx) * stride + offsety + sgny]; return VPXMAX(VPXMAX(abs(y1 - y2), abs(y1 - y3)), abs(y1 - y4)); } static int check_dot_artifact_candidate(VP8_COMP *cpi, MACROBLOCK *x, unsigned char *target_last, int stride, unsigned char *last_ref, int mb_row, int mb_col, int channel) { int threshold1 = 6; int threshold2 = 3; unsigned int max_num = (cpi->common.MBs) / 10; int grad_last = 0; int grad_source = 0; int index = mb_row * cpi->common.mb_cols + mb_col; // Threshold for #consecutive (base layer) frames using zero_last mode. int num_frames = 30; int shift = 15; if (channel > 0) { shift = 7; } if (cpi->oxcf.number_of_layers > 1) { num_frames = 20; } x->zero_last_dot_suppress = 0; // Blocks on base layer frames that have been using ZEROMV_LAST repeatedly // (i.e, at least |x| consecutive frames are candidates for increasing the // rd adjustment for zero_last mode. // Only allow this for at most |max_num| blocks per frame. // Don't allow this for screen content input. if (cpi->current_layer == 0 && cpi->consec_zero_last_mvbias[index] > num_frames && x->mbs_zero_last_dot_suppress < max_num && !cpi->oxcf.screen_content_mode) { // If this block is checked here, label it so we don't check it again until // ~|x| framaes later. x->zero_last_dot_suppress = 1; // Dot artifact is noticeable as strong gradient at corners of macroblock, // for flat areas. As a simple detector for now, we look for a high // corner gradient on last ref, and a smaller gradient on source. // Check 4 corners, return if any satisfy condition. // Top-left: grad_last = macroblock_corner_grad(last_ref, stride, 0, 0, 1, 1); grad_source = macroblock_corner_grad(target_last, stride, 0, 0, 1, 1); if (grad_last >= threshold1 && grad_source <= threshold2) { x->mbs_zero_last_dot_suppress++; return 1; } // Top-right: grad_last = macroblock_corner_grad(last_ref, stride, 0, shift, 1, -1); grad_source = macroblock_corner_grad(target_last, stride, 0, shift, 1, -1); if (grad_last >= threshold1 && grad_source <= threshold2) { x->mbs_zero_last_dot_suppress++; return 1; } // Bottom-left: grad_last = macroblock_corner_grad(last_ref, stride, shift, 0, -1, 1); grad_source = macroblock_corner_grad(target_last, stride, shift, 0, -1, 1); if (grad_last >= threshold1 && grad_source <= threshold2) { x->mbs_zero_last_dot_suppress++; return 1; } // Bottom-right: grad_last = macroblock_corner_grad(last_ref, stride, shift, shift, -1, -1); grad_source = macroblock_corner_grad(target_last, stride, shift, shift, -1, -1); if (grad_last >= threshold1 && grad_source <= threshold2) { x->mbs_zero_last_dot_suppress++; return 1; } return 0; } return 0; } int vp8_skip_fractional_mv_step(MACROBLOCK *mb, BLOCK *b, BLOCKD *d, int_mv *bestmv, int_mv *ref_mv, int error_per_bit, const vp8_variance_fn_ptr_t *vfp, int *mvcost[2], int *distortion, unsigned int *sse) { (void)b; (void)d; (void)ref_mv; (void)error_per_bit; (void)vfp; (void)mb; (void)mvcost; (void)distortion; (void)sse; bestmv->as_mv.row *= 8; bestmv->as_mv.col *= 8; return 0; } int vp8_get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse, int_mv this_mv) { BLOCK *b = &mb->block[0]; BLOCKD *d = &mb->e_mbd.block[0]; unsigned char *what = (*(b->base_src) + b->src); int what_stride = b->src_stride; int pre_stride = mb->e_mbd.pre.y_stride; unsigned char *in_what = mb->e_mbd.pre.y_buffer + d->offset; int in_what_stride = pre_stride; int xoffset = this_mv.as_mv.col & 7; int yoffset = this_mv.as_mv.row & 7; in_what += (this_mv.as_mv.row >> 3) * pre_stride + (this_mv.as_mv.col >> 3); if (xoffset | yoffset) { return vfp->svf(in_what, in_what_stride, xoffset, yoffset, what, what_stride, sse); } else { return vfp->vf(what, what_stride, in_what, in_what_stride, sse); } } static int get_prediction_error(BLOCK *be, BLOCKD *b) { unsigned char *sptr; unsigned char *dptr; sptr = (*(be->base_src) + be->src); dptr = b->predictor; return vpx_get4x4sse_cs(sptr, be->src_stride, dptr, 16); } static int pick_intra4x4block(MACROBLOCK *x, int ib, B_PREDICTION_MODE *best_mode, const int *mode_costs, int *bestrate, int *bestdistortion) { BLOCKD *b = &x->e_mbd.block[ib]; BLOCK *be = &x->block[ib]; int dst_stride = x->e_mbd.dst.y_stride; unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; B_PREDICTION_MODE mode; int best_rd = INT_MAX; int rate; int distortion; unsigned char *Above = dst - dst_stride; unsigned char *yleft = dst - 1; unsigned char top_left = Above[-1]; for (mode = B_DC_PRED; mode <= B_HE_PRED; ++mode) { int this_rd; rate = mode_costs[mode]; vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16, top_left); distortion = get_prediction_error(be, b); this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; } } b->bmi.as_mode = *best_mode; vp8_encode_intra4x4block(x, ib); return best_rd; } static int pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *best_dist) { MACROBLOCKD *const xd = &mb->e_mbd; int i; int cost = mb->mbmode_cost[xd->frame_type][B_PRED]; int error; int distortion = 0; const int *bmode_costs; intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); bmode_costs = mb->inter_bmode_costs; for (i = 0; i < 16; ++i) { MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE best_mode = B_MODE_COUNT; int r = 0, d = 0; if (mb->e_mbd.frame_type == KEY_FRAME) { const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); const B_PREDICTION_MODE L = left_block_mode(mic, i); bmode_costs = mb->bmode_costs[A][L]; } pick_intra4x4block(mb, i, &best_mode, bmode_costs, &r, &d); cost += r; distortion += d; assert(best_mode != B_MODE_COUNT); mic->bmi[i].as_mode = best_mode; /* Break out case where we have already exceeded best so far value * that was passed in */ if (distortion > *best_dist) break; } *Rate = cost; if (i == 16) { *best_dist = distortion; error = RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } else { *best_dist = INT_MAX; error = INT_MAX; } return error; } static void pick_intra_mbuv_mode(MACROBLOCK *mb) { MACROBLOCKD *x = &mb->e_mbd; unsigned char *uabove_row = x->dst.u_buffer - x->dst.uv_stride; unsigned char *vabove_row = x->dst.v_buffer - x->dst.uv_stride; unsigned char *usrc_ptr = (mb->block[16].src + *mb->block[16].base_src); unsigned char *vsrc_ptr = (mb->block[20].src + *mb->block[20].base_src); int uvsrc_stride = mb->block[16].src_stride; unsigned char uleft_col[8]; unsigned char vleft_col[8]; unsigned char utop_left = uabove_row[-1]; unsigned char vtop_left = vabove_row[-1]; int i, j; int expected_udc; int expected_vdc; int shift; int Uaverage = 0; int Vaverage = 0; int diff; int pred_error[4] = { 0, 0, 0, 0 }, best_error = INT_MAX; MB_PREDICTION_MODE best_mode = MB_MODE_COUNT; for (i = 0; i < 8; ++i) { uleft_col[i] = x->dst.u_buffer[i * x->dst.uv_stride - 1]; vleft_col[i] = x->dst.v_buffer[i * x->dst.uv_stride - 1]; } if (!x->up_available && !x->left_available) { expected_udc = 128; expected_vdc = 128; } else { shift = 2; if (x->up_available) { for (i = 0; i < 8; ++i) { Uaverage += uabove_row[i]; Vaverage += vabove_row[i]; } shift++; } if (x->left_available) { for (i = 0; i < 8; ++i) { Uaverage += uleft_col[i]; Vaverage += vleft_col[i]; } shift++; } expected_udc = (Uaverage + (1 << (shift - 1))) >> shift; expected_vdc = (Vaverage + (1 << (shift - 1))) >> shift; } for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) { int predu = uleft_col[i] + uabove_row[j] - utop_left; int predv = vleft_col[i] + vabove_row[j] - vtop_left; int u_p, v_p; u_p = usrc_ptr[j]; v_p = vsrc_ptr[j]; if (predu < 0) predu = 0; if (predu > 255) predu = 255; if (predv < 0) predv = 0; if (predv > 255) predv = 255; diff = u_p - expected_udc; pred_error[DC_PRED] += diff * diff; diff = v_p - expected_vdc; pred_error[DC_PRED] += diff * diff; diff = u_p - uabove_row[j]; pred_error[V_PRED] += diff * diff; diff = v_p - vabove_row[j]; pred_error[V_PRED] += diff * diff; diff = u_p - uleft_col[i]; pred_error[H_PRED] += diff * diff; diff = v_p - vleft_col[i]; pred_error[H_PRED] += diff * diff; diff = u_p - predu; pred_error[TM_PRED] += diff * diff; diff = v_p - predv; pred_error[TM_PRED] += diff * diff; } usrc_ptr += uvsrc_stride; vsrc_ptr += uvsrc_stride; if (i == 3) { usrc_ptr = (mb->block[18].src + *mb->block[18].base_src); vsrc_ptr = (mb->block[22].src + *mb->block[22].base_src); } } for (i = DC_PRED; i <= TM_PRED; ++i) { if (best_error > pred_error[i]) { best_error = pred_error[i]; best_mode = (MB_PREDICTION_MODE)i; } } assert(best_mode != MB_MODE_COUNT); mb->e_mbd.mode_info_context->mbmi.uv_mode = best_mode; } static void update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { MACROBLOCKD *xd = &x->e_mbd; /* Split MV modes currently not supported when RD is nopt enabled, * therefore, only need to modify MVcount in NEWMV mode. */ if (xd->mode_info_context->mbmi.mode == NEWMV) { x->MVcount[0][mv_max + ((xd->mode_info_context->mbmi.mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; x->MVcount[1][mv_max + ((xd->mode_info_context->mbmi.mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } #if CONFIG_MULTI_RES_ENCODING static void get_lower_res_motion_info(VP8_COMP *cpi, MACROBLOCKD *xd, int *dissim, int *parent_ref_frame, MB_PREDICTION_MODE *parent_mode, int_mv *parent_ref_mv, int mb_row, int mb_col) { LOWER_RES_MB_INFO *store_mode_info = ((LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info)->mb_info; unsigned int parent_mb_index; /* Consider different down_sampling_factor. */ { /* TODO: Removed the loop that supports special down_sampling_factor * such as 2, 4, 8. Will revisit it if needed. * Should also try using a look-up table to see if it helps * performance. */ int parent_mb_row, parent_mb_col; parent_mb_row = mb_row * cpi->oxcf.mr_down_sampling_factor.den / cpi->oxcf.mr_down_sampling_factor.num; parent_mb_col = mb_col * cpi->oxcf.mr_down_sampling_factor.den / cpi->oxcf.mr_down_sampling_factor.num; parent_mb_index = parent_mb_row * cpi->mr_low_res_mb_cols + parent_mb_col; } /* Read lower-resolution mode & motion result from memory.*/ *parent_ref_frame = store_mode_info[parent_mb_index].ref_frame; *parent_mode = store_mode_info[parent_mb_index].mode; *dissim = store_mode_info[parent_mb_index].dissim; /* For highest-resolution encoder, adjust dissim value. Lower its quality * for good performance. */ if (cpi->oxcf.mr_encoder_id == (cpi->oxcf.mr_total_resolutions - 1)) *dissim >>= 1; if (*parent_ref_frame != INTRA_FRAME) { /* Consider different down_sampling_factor. * The result can be rounded to be more precise, but it takes more time. */ (*parent_ref_mv).as_mv.row = store_mode_info[parent_mb_index].mv.as_mv.row * cpi->oxcf.mr_down_sampling_factor.num / cpi->oxcf.mr_down_sampling_factor.den; (*parent_ref_mv).as_mv.col = store_mode_info[parent_mb_index].mv.as_mv.col * cpi->oxcf.mr_down_sampling_factor.num / cpi->oxcf.mr_down_sampling_factor.den; vp8_clamp_mv2(parent_ref_mv, xd); } } #endif static void check_for_encode_breakout(unsigned int sse, MACROBLOCK *x) { MACROBLOCKD *xd = &x->e_mbd; unsigned int threshold = (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4); if (threshold < x->encode_breakout) threshold = x->encode_breakout; if (sse < threshold) { /* Check u and v to make sure skip is ok */ unsigned int sse2 = 0; sse2 = VP8_UVSSE(x); if (sse2 * 2 < x->encode_breakout) { x->skip = 1; } else { x->skip = 0; } } } static int evaluate_inter_mode(unsigned int *sse, int rate2, int *distortion2, VP8_COMP *cpi, MACROBLOCK *x, int rd_adj) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; int_mv mv = x->e_mbd.mode_info_context->mbmi.mv; int this_rd; int denoise_aggressive = 0; /* Exit early and don't compute the distortion if this macroblock * is marked inactive. */ if (cpi->active_map_enabled && x->active_ptr[0] == 0) { *sse = 0; *distortion2 = 0; x->skip = 1; return INT_MAX; } if ((this_mode != NEWMV) || !(cpi->sf.half_pixel_search) || cpi->common.full_pixel == 1) { *distortion2 = vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], sse, mv); } this_rd = RDCOST(x->rdmult, x->rddiv, rate2, *distortion2); #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { denoise_aggressive = (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) ? 1 : 0; } #endif // Adjust rd for ZEROMV and LAST, if LAST is the closest reference frame. // TODO: We should also add condition on distance of closest to current. if (!cpi->oxcf.screen_content_mode && this_mode == ZEROMV && x->e_mbd.mode_info_context->mbmi.ref_frame == LAST_FRAME && (denoise_aggressive || (cpi->closest_reference_frame == LAST_FRAME))) { // No adjustment if block is considered to be skin area. if (x->is_skin) rd_adj = 100; this_rd = (int)(((int64_t)this_rd) * rd_adj / 100); } check_for_encode_breakout(*sse, x); return this_rd; } static void calculate_zeromv_rd_adjustment(VP8_COMP *cpi, MACROBLOCK *x, int *rd_adjustment) { MODE_INFO *mic = x->e_mbd.mode_info_context; int_mv mv_l, mv_a, mv_al; int local_motion_check = 0; if (cpi->lf_zeromv_pct > 40) { /* left mb */ mic -= 1; mv_l = mic->mbmi.mv; if (mic->mbmi.ref_frame != INTRA_FRAME) { if (abs(mv_l.as_mv.row) < 8 && abs(mv_l.as_mv.col) < 8) { local_motion_check++; } } /* above-left mb */ mic -= x->e_mbd.mode_info_stride; mv_al = mic->mbmi.mv; if (mic->mbmi.ref_frame != INTRA_FRAME) { if (abs(mv_al.as_mv.row) < 8 && abs(mv_al.as_mv.col) < 8) { local_motion_check++; } } /* above mb */ mic += 1; mv_a = mic->mbmi.mv; if (mic->mbmi.ref_frame != INTRA_FRAME) { if (abs(mv_a.as_mv.row) < 8 && abs(mv_a.as_mv.col) < 8) { local_motion_check++; } } if (((!x->e_mbd.mb_to_top_edge || !x->e_mbd.mb_to_left_edge) && local_motion_check > 0) || local_motion_check > 2) { *rd_adjustment = 80; } else if (local_motion_check > 0) { *rd_adjustment = 90; } } } void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, int mb_col) { BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; MACROBLOCKD *xd = &x->e_mbd; MB_MODE_INFO best_mbmode; int_mv best_ref_mv_sb[2] = { { 0 }, { 0 } }; int_mv mode_mv_sb[2][MB_MODE_COUNT]; int_mv best_ref_mv; int_mv *mode_mv; MB_PREDICTION_MODE this_mode; int num00; int mdcounts[4]; int best_rd = INT_MAX; int rd_adjustment = 100; int best_intra_rd = INT_MAX; int mode_index; int rate; int rate2; int distortion2; int bestsme = INT_MAX; int best_mode_index = 0; unsigned int sse = UINT_MAX, best_rd_sse = UINT_MAX; #if CONFIG_TEMPORAL_DENOISING unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX; #endif int sf_improved_mv_pred = cpi->sf.improved_mv_pred; #if CONFIG_MULTI_RES_ENCODING int dissim = INT_MAX; int parent_ref_frame = 0; int_mv parent_ref_mv; MB_PREDICTION_MODE parent_mode = 0; int parent_ref_valid = 0; #endif int_mv mvp; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; /* search range got from mv_pred(). It uses step_param levels. (0-7) */ int sr = 0; unsigned char *plane[4][3] = { { 0, 0 } }; int ref_frame_map[4]; int sign_bias = 0; int dot_artifact_candidate = 0; get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); // If the current frame is using LAST as a reference, check for // biasing the mode selection for dot artifacts. if (cpi->ref_frame_flags & VP8_LAST_FRAME) { unsigned char *target_y = x->src.y_buffer; unsigned char *target_u = x->block[16].src + *x->block[16].base_src; unsigned char *target_v = x->block[20].src + *x->block[20].base_src; int stride = x->src.y_stride; int stride_uv = x->block[16].src_stride; #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { const int uv_denoise = (cpi->oxcf.noise_sensitivity >= 2) ? 1 : 0; target_y = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_buffer + recon_yoffset; stride = cpi->denoiser.yv12_running_avg[LAST_FRAME].y_stride; if (uv_denoise) { target_u = cpi->denoiser.yv12_running_avg[LAST_FRAME].u_buffer + recon_uvoffset; target_v = cpi->denoiser.yv12_running_avg[LAST_FRAME].v_buffer + recon_uvoffset; stride_uv = cpi->denoiser.yv12_running_avg[LAST_FRAME].uv_stride; } } #endif assert(plane[LAST_FRAME][0] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_y, stride, plane[LAST_FRAME][0], mb_row, mb_col, 0); // If not found in Y channel, check UV channel. if (!dot_artifact_candidate) { assert(plane[LAST_FRAME][1] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_u, stride_uv, plane[LAST_FRAME][1], mb_row, mb_col, 1); if (!dot_artifact_candidate) { assert(plane[LAST_FRAME][2] != NULL); dot_artifact_candidate = check_dot_artifact_candidate( cpi, x, target_v, stride_uv, plane[LAST_FRAME][2], mb_row, mb_col, 2); } } } #if CONFIG_MULTI_RES_ENCODING // |parent_ref_valid| will be set here if potentially we can do mv resue for // this higher resol (|cpi->oxcf.mr_encoder_id| > 0) frame. // |parent_ref_valid| may be reset depending on |parent_ref_frame| for // the current macroblock below. parent_ref_valid = cpi->oxcf.mr_encoder_id && cpi->mr_low_res_mv_avail; if (parent_ref_valid) { int parent_ref_flag; get_lower_res_motion_info(cpi, xd, &dissim, &parent_ref_frame, &parent_mode, &parent_ref_mv, mb_row, mb_col); /* TODO(jkoleszar): The references available (ref_frame_flags) to the * lower res encoder should match those available to this encoder, but * there seems to be a situation where this mismatch can happen in the * case of frame dropping and temporal layers. For example, * GOLD being disallowed in ref_frame_flags, but being returned as * parent_ref_frame. * * In this event, take the conservative approach of disabling the * lower res info for this MB. */ parent_ref_flag = 0; // Note availability for mv reuse is only based on last and golden. if (parent_ref_frame == LAST_FRAME) parent_ref_flag = (cpi->ref_frame_flags & VP8_LAST_FRAME); else if (parent_ref_frame == GOLDEN_FRAME) parent_ref_flag = (cpi->ref_frame_flags & VP8_GOLD_FRAME); // assert(!parent_ref_frame || parent_ref_flag); // If |parent_ref_frame| did not match either last or golden then // shut off mv reuse. if (parent_ref_frame && !parent_ref_flag) parent_ref_valid = 0; // Don't do mv reuse since we want to allow for another mode besides // ZEROMV_LAST to remove dot artifact. if (dot_artifact_candidate) parent_ref_valid = 0; } #endif // Check if current macroblock is in skin area. x->is_skin = 0; if (!cpi->oxcf.screen_content_mode) { int block_index = mb_row * cpi->common.mb_cols + mb_col; x->is_skin = cpi->skin_map[block_index]; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { // Under aggressive denoising mode, should we use skin map to reduce // denoiser // and ZEROMV bias? Will need to revisit the accuracy of this detection for // very noisy input. For now keep this as is (i.e., don't turn it off). // if (cpi->denoiser.denoiser_mode == kDenoiserOnYUVAggressive) // x->is_skin = 0; } #endif mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); memset(&best_mbmode, 0, sizeof(best_mbmode)); /* Setup search priorities */ #if CONFIG_MULTI_RES_ENCODING if (parent_ref_valid && parent_ref_frame && dissim < 8) { ref_frame_map[0] = -1; ref_frame_map[1] = parent_ref_frame; ref_frame_map[2] = -1; ref_frame_map[3] = -1; } else #endif get_reference_search_order(cpi, ref_frame_map); /* Check to see if there is at least 1 valid reference frame that we need * to calculate near_mvs. */ if (ref_frame_map[1] > 0) { sign_bias = vp8_find_near_mvs_bias( &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb, mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias); mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; } /* Count of the number of MBs tested so far this frame */ x->mbs_tested_so_far++; *returnintra = INT_MAX; x->skip = 0; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; /* If the frame has big static background and current MB is in low * motion area, its mode decision is biased to ZEROMV mode. * No adjustment if cpu_used is <= -12 (i.e., cpi->Speed >= 12). * At such speed settings, ZEROMV is already heavily favored. */ if (cpi->Speed < 12) { calculate_zeromv_rd_adjustment(cpi, x, &rd_adjustment); } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { rd_adjustment = (int)(rd_adjustment * cpi->denoiser.denoise_pars.pickmode_mv_bias / 100); } #endif if (dot_artifact_candidate) { // Bias against ZEROMV_LAST mode. rd_adjustment = 150; } /* if we encode a new mv this is important * find the best new motion vector */ for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int frame_cost; int this_rd = INT_MAX; int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; if (best_rd <= x->rd_threshes[mode_index]) continue; if (this_ref_frame < 0) continue; x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; /* everything but intra */ if (x->e_mbd.mode_info_context->mbmi.ref_frame) { x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) { sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame]; mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; } #if CONFIG_MULTI_RES_ENCODING if (parent_ref_valid) { if (vp8_mode_order[mode_index] == NEARESTMV && mode_mv[NEARESTMV].as_int == 0) continue; if (vp8_mode_order[mode_index] == NEARMV && mode_mv[NEARMV].as_int == 0) continue; if (vp8_mode_order[mode_index] == NEWMV && parent_mode == ZEROMV && best_ref_mv.as_int == 0) continue; else if (vp8_mode_order[mode_index] == NEWMV && dissim == 0 && best_ref_mv.as_int == parent_ref_mv.as_int) continue; } #endif } /* Check to see if the testing frequency for this mode is at its max * If so then prevent it from being tested and increase the threshold * for its testing */ if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) { if (x->mbs_tested_so_far <= (cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index])) { /* Increase the threshold for coding this mode to make it less * likely to be chosen */ x->rd_thresh_mult[mode_index] += 4; if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; } x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * x->rd_thresh_mult[mode_index]; continue; } } /* We have now reached the point where we are going to test the current * mode so increment the counter for the number of times it has been * tested */ x->mode_test_hit_counts[mode_index]++; rate2 = 0; distortion2 = 0; this_mode = vp8_mode_order[mode_index]; x->e_mbd.mode_info_context->mbmi.mode = this_mode; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; /* Work out the cost assosciated with selecting the reference frame */ frame_cost = x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; rate2 += frame_cost; /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, * unless ARNR filtering is enabled in which case we want * an unfiltered alternative */ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) { continue; } } switch (this_mode) { case B_PRED: /* Pass best so far to pick_intra4x4mby_modes to use as breakout */ distortion2 = best_rd_sse; pick_intra4x4mby_modes(x, &rate, &distortion2); if (distortion2 == INT_MAX) { this_rd = INT_MAX; } else { rate2 += rate; distortion2 = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) { best_intra_rd = this_rd; *returnintra = distortion2; } } break; case SPLITMV: /* Split MV modes currently not supported when RD is not enabled. */ break; case DC_PRED: case V_PRED: case H_PRED: case TM_PRED: vp8_build_intra_predictors_mby_s( xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, xd->dst.y_stride, xd->predictor, 16); distortion2 = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); rate2 += x->mbmode_cost[x->e_mbd.frame_type] [x->e_mbd.mode_info_context->mbmi.mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); if (this_rd < best_intra_rd) { best_intra_rd = this_rd; *returnintra = distortion2; } break; case NEWMV: { int thissme; int step_param; int further_steps; int n = 0; int sadpb = x->sadperbit16; int_mv mvp_full; int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL; int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL; int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL; int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; int speed_adjust = (cpi->Speed > 5) ? ((cpi->Speed >= 8) ? 3 : 2) : 1; /* Further step/diamond searches as necessary */ step_param = cpi->sf.first_step + speed_adjust; #if CONFIG_MULTI_RES_ENCODING /* If lower-res frame is not available for mv reuse (because of frame dropping or different temporal layer pattern), then higher resol encoder does motion search without any previous knowledge. Also, since last frame motion info is not stored, then we can not use improved_mv_pred. */ if (cpi->oxcf.mr_encoder_id) sf_improved_mv_pred = 0; // Only use parent MV as predictor if this candidate reference frame // (|this_ref_frame|) is equal to |parent_ref_frame|. if (parent_ref_valid && (parent_ref_frame == this_ref_frame)) { /* Use parent MV as predictor. Adjust search range * accordingly. */ mvp.as_int = parent_ref_mv.as_int; mvp_full.as_mv.col = parent_ref_mv.as_mv.col >> 3; mvp_full.as_mv.row = parent_ref_mv.as_mv.row >> 3; if (dissim <= 32) step_param += 3; else if (dissim <= 128) step_param += 2; else step_param += 1; } else #endif { if (sf_improved_mv_pred) { if (!saddone) { vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]); saddone = 1; } vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); sr += speed_adjust; /* adjust search range according to sr from mv prediction */ if (sr > step_param) step_param = sr; mvp_full.as_mv.col = mvp.as_mv.col >> 3; mvp_full.as_mv.row = mvp.as_mv.row >> 3; } else { mvp.as_int = best_ref_mv.as_int; mvp_full.as_mv.col = best_ref_mv.as_mv.col >> 3; mvp_full.as_mv.row = best_ref_mv.as_mv.row >> 3; } } #if CONFIG_MULTI_RES_ENCODING if (parent_ref_valid && (parent_ref_frame == this_ref_frame) && dissim <= 2 && VPXMAX(abs(best_ref_mv.as_mv.row - parent_ref_mv.as_mv.row), abs(best_ref_mv.as_mv.col - parent_ref_mv.as_mv.col)) <= 4) { d->bmi.mv.as_int = mvp_full.as_int; mode_mv[NEWMV].as_int = mvp_full.as_int; cpi->find_fractional_mv_step( x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse); } else #endif { /* Get intersection of UMV window and valid MV window to * reduce # of checks in diamond search. */ if (x->mv_col_min < col_min) x->mv_col_min = col_min; if (x->mv_col_max > col_max) x->mv_col_max = col_max; if (x->mv_row_min < row_min) x->mv_row_min = row_min; if (x->mv_row_max > row_max) x->mv_row_max = row_max; further_steps = (cpi->Speed >= 8) ? 0 : (cpi->sf.max_step_search_steps - 1 - step_param); if (cpi->sf.search_method == HEX) { #if CONFIG_MULTI_RES_ENCODING /* TODO: In higher-res pick_inter_mode, step_param is used to * modify hex search range. Here, set step_param to 0 not to * change the behavior in lowest-resolution encoder. * Will improve it later. */ /* Set step_param to 0 to ensure large-range motion search * when mv reuse if not valid (i.e. |parent_ref_valid| = 0), * or if this candidate reference frame (|this_ref_frame|) is * not equal to |parent_ref_frame|. */ if (!parent_ref_valid || (parent_ref_frame != this_ref_frame)) step_param = 0; #endif bestsme = vp8_hex_search(x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], x->mvsadcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; } else { bestsme = cpi->diamond_search_sad( x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; /* Further step/diamond searches as necessary */ n = num00; num00 = 0; while (n < further_steps) { n++; if (num00) { num00--; } else { thissme = cpi->diamond_search_sad( x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEWMV].as_int = d->bmi.mv.as_int; } else { d->bmi.mv.as_int = mode_mv[NEWMV].as_int; } } } } x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; if (bestsme < INT_MAX) { cpi->find_fractional_mv_step( x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], cpi->mb.mvcost, &distortion2, &sse); } } mode_mv[NEWMV].as_int = d->bmi.mv.as_int; // The clamp below is not necessary from the perspective // of VP8 bitstream, but is added to improve ChromeCast // mirroring's robustness. Please do not remove. vp8_clamp_mv2(&mode_mv[this_mode], xd); /* mv cost; */ rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, cpi->mb.mvcost, 128); } // fall through case NEARESTMV: case NEARMV: if (mode_mv[this_mode].as_int == 0) continue; // fall through case ZEROMV: /* Trap vectors that reach beyond the UMV borders * Note that ALL New MV, Nearest MV Near MV and Zero MV code drops * through to this point because of the lack of break statements * in the previous two cases. */ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { continue; } rate2 += vp8_cost_mv_ref(this_mode, mdcounts); x->e_mbd.mode_info_context->mbmi.mv.as_int = mode_mv[this_mode].as_int; this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, rd_adjustment); break; default: break; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { /* Store for later use by denoiser. */ // Dont' denoise with GOLDEN OR ALTREF is they are old reference // frames (greater than MAX_GF_ARF_DENOISE_RANGE frames in past). int skip_old_reference = ((this_ref_frame != LAST_FRAME) && (cpi->common.current_video_frame - cpi->current_ref_frames[this_ref_frame] > MAX_GF_ARF_DENOISE_RANGE)) ? 1 : 0; if (this_mode == ZEROMV && sse < zero_mv_sse && !skip_old_reference) { zero_mv_sse = sse; x->best_zeromv_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame; } // Store the best NEWMV in x for later use in the denoiser. if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse && !skip_old_reference) { best_sse = sse; x->best_sse_inter_mode = NEWMV; x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; x->need_to_clamp_best_mvs = x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame; } } #endif if (this_rd < best_rd || x->skip) { /* Note index of best mode */ best_mode_index = mode_index; *returnrate = rate2; *returndistortion = distortion2; best_rd_sse = sse; best_rd = this_rd; memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); /* Testing this mode gave rise to an improvement in best error * score. Lower threshold a bit for next time */ x->rd_thresh_mult[mode_index] = (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * x->rd_thresh_mult[mode_index]; } /* If the mode did not help improve the best error case then raise the * threshold for testing that mode next time around. */ else { x->rd_thresh_mult[mode_index] += 4; if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; } x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * x->rd_thresh_mult[mode_index]; } if (x->skip) break; } /* Reduce the activation RD thresholds for the best choice mode */ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 3); x->rd_thresh_mult[best_mode_index] = (x->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? x->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; x->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * x->rd_thresh_mult[best_mode_index]; } { int this_rdbin = (*returndistortion >> 7); if (this_rdbin >= 1024) { this_rdbin = 1023; } x->error_bins[this_rdbin]++; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { int block_index = mb_row * cpi->common.mb_cols + mb_col; int reevaluate = 0; int is_noisy = 0; if (x->best_sse_inter_mode == DC_PRED) { /* No best MV found. */ x->best_sse_inter_mode = best_mbmode.mode; x->best_sse_mv = best_mbmode.mv; x->need_to_clamp_best_mvs = best_mbmode.need_to_clamp_mvs; x->best_reference_frame = best_mbmode.ref_frame; best_sse = best_rd_sse; } // For non-skin blocks that have selected ZEROMV for this current frame, // and have been selecting ZEROMV_LAST (on the base layer frame) at // least |x~20| consecutive past frames in a row, label the block for // possible increase in denoising strength. We also condition this // labeling on there being significant denoising in the scene if (cpi->oxcf.noise_sensitivity == 4) { if (cpi->denoiser.nmse_source_diff > 70 * cpi->denoiser.threshold_aggressive_mode / 100) { is_noisy = 1; } } else { if (cpi->mse_source_denoised > 1000) is_noisy = 1; } x->increase_denoising = 0; if (!x->is_skin && x->best_sse_inter_mode == ZEROMV && (x->best_reference_frame == LAST_FRAME || x->best_reference_frame == cpi->closest_reference_frame) && cpi->consec_zero_last[block_index] >= 20 && is_noisy) { x->increase_denoising = 1; } x->denoise_zeromv = 0; vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, recon_yoffset, recon_uvoffset, &cpi->common.lf_info, mb_row, mb_col, block_index, cpi->consec_zero_last_mvbias[block_index]); // Reevaluate ZEROMV after denoising: for large noise content // (i.e., cpi->mse_source_denoised is above threshold), do this for all // blocks that did not pick ZEROMV as best mode but are using ZEROMV // for denoising. Otherwise, always re-evaluate for blocks that picked // INTRA mode as best mode. // Avoid blocks that have been biased against ZERO_LAST // (i.e., dot artifact candidate blocks). reevaluate = (best_mbmode.ref_frame == INTRA_FRAME) || (best_mbmode.mode != ZEROMV && x->denoise_zeromv && cpi->mse_source_denoised > 2000); if (!dot_artifact_candidate && reevaluate && x->best_zeromv_reference_frame != INTRA_FRAME) { int this_rd = 0; int this_ref_frame = x->best_zeromv_reference_frame; rd_adjustment = 100; rate2 = x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts); distortion2 = 0; /* set up the proper prediction buffers for the frame */ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; this_rd = evaluate_inter_mode(&sse, rate2, &distortion2, cpi, x, rd_adjustment); if (this_rd < best_rd) { memcpy(&best_mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); } } } #endif if (cpi->is_src_frame_alt_ref && (best_mbmode.mode != ZEROMV || best_mbmode.ref_frame != ALTREF_FRAME)) { x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME; x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip); x->e_mbd.mode_info_context->mbmi.partitioning = 0; return; } /* set to the best mb mode, this copy can be skip if x->skip since it * already has the right content */ if (!x->skip) { memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mbmode, sizeof(MB_MODE_INFO)); } if (best_mbmode.mode <= B_PRED) { /* set mode_info_context->mbmi.uv_mode */ pick_intra_mbuv_mode(x); } if (sign_bias != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) { best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; } update_mvcount(x, &best_ref_mv); } void vp8_pick_intra_mode(MACROBLOCK *x, int *rate) { int error4x4, error16x16 = INT_MAX; int rate_, best_rate = 0, distortion, best_sse; MB_PREDICTION_MODE mode, best_mode = DC_PRED; int this_rd; unsigned int sse; BLOCK *b = &x->block[0]; MACROBLOCKD *xd = &x->e_mbd; xd->mode_info_context->mbmi.ref_frame = INTRA_FRAME; pick_intra_mbuv_mode(x); for (mode = DC_PRED; mode <= TM_PRED; ++mode) { xd->mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, xd->dst.y_stride, xd->predictor, 16); distortion = vpx_variance16x16(*(b->base_src), b->src_stride, xd->predictor, 16, &sse); rate_ = x->mbmode_cost[xd->frame_type][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate_, distortion); if (error16x16 > this_rd) { error16x16 = this_rd; best_mode = mode; best_sse = sse; best_rate = rate_; } } xd->mode_info_context->mbmi.mode = best_mode; error4x4 = pick_intra4x4mby_modes(x, &rate_, &best_sse); if (error4x4 < error16x16) { xd->mode_info_context->mbmi.mode = B_PRED; best_rate = rate_; } *rate = best_rate; } libvpx-1.8.2/vp8/encoder/pickinter.h000066400000000000000000000022711357355204000173310ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_PICKINTER_H_ #define VPX_VP8_ENCODER_PICKINTER_H_ #include "vpx_config.h" #include "vp8/common/onyxc_int.h" #ifdef __cplusplus extern "C" { #endif extern void vp8_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, int mb_col); extern void vp8_pick_intra_mode(MACROBLOCK *x, int *rate); extern int vp8_get_inter_mbpred_error(MACROBLOCK *mb, const vp8_variance_fn_ptr_t *vfp, unsigned int *sse, int_mv this_mv); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_PICKINTER_H_ libvpx-1.8.2/vp8/encoder/picklpf.c000066400000000000000000000270621357355204000167710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp8/common/onyxc_int.h" #include "onyx_int.h" #include "vp8/encoder/picklpf.h" #include "vp8/encoder/quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/alloccommon.h" #include "vp8/common/loopfilter.h" #if VPX_ARCH_ARM #include "vpx_ports/arm.h" #endif extern int vp8_calc_ss_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest); static void yv12_copy_partial_frame(YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { unsigned char *src_y, *dst_y; int yheight; int ystride; int yoffset; int linestocopy; yheight = src_ybc->y_height; ystride = src_ybc->y_stride; /* number of MB rows to use in partial filtering */ linestocopy = (yheight >> 4) / PARTIAL_FRAME_FRACTION; linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ /* Copy extra 4 so that full filter context is available if filtering done * on the copied partial frame and not original. Partial filter does mb * filtering for top row also, which can modify3 pixels above. */ linestocopy += 4; /* partial image starts at ~middle of frame (macroblock border)*/ yoffset = ystride * (((yheight >> 5) * 16) - 4); src_y = src_ybc->y_buffer + yoffset; dst_y = dst_ybc->y_buffer + yoffset; memcpy(dst_y, src_y, ystride * linestocopy); } static int calc_partial_ssl_err(YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *dest) { int i, j; int Total = 0; int srcoffset, dstoffset; unsigned char *src = source->y_buffer; unsigned char *dst = dest->y_buffer; int linestocopy; /* number of MB rows to use in partial filtering */ linestocopy = (source->y_height >> 4) / PARTIAL_FRAME_FRACTION; linestocopy = linestocopy ? linestocopy << 4 : 16; /* 16 lines per MB */ /* partial image starts at ~middle of frame (macroblock border)*/ srcoffset = source->y_stride * ((dest->y_height >> 5) * 16); dstoffset = dest->y_stride * ((dest->y_height >> 5) * 16); src += srcoffset; dst += dstoffset; /* Loop through the Y plane raw and reconstruction data summing * (square differences) */ for (i = 0; i < linestocopy; i += 16) { for (j = 0; j < source->y_width; j += 16) { unsigned int sse; Total += vpx_mse16x16(src + j, source->y_stride, dst + j, dest->y_stride, &sse); } src += 16 * source->y_stride; dst += 16 * dest->y_stride; } return Total; } /* Enforce a minimum filter level based upon baseline Q */ static int get_min_filter_level(VP8_COMP *cpi, int base_qindex) { int min_filter_level; if (cpi->source_alt_ref_active && cpi->common.refresh_golden_frame && !cpi->common.refresh_alt_ref_frame) { min_filter_level = 0; } else { if (base_qindex <= 6) { min_filter_level = 0; } else if (base_qindex <= 16) { min_filter_level = 1; } else { min_filter_level = (base_qindex / 8); } } return min_filter_level; } /* Enforce a maximum filter level based upon baseline Q */ static int get_max_filter_level(VP8_COMP *cpi, int base_qindex) { /* PGW August 2006: Highest filter values almost always a bad idea */ /* jbb chg: 20100118 - not so any more with this overquant stuff allow * high values with lots of intra coming in. */ int max_filter_level = MAX_LOOP_FILTER; (void)base_qindex; if (cpi->twopass.section_intra_rating > 8) { max_filter_level = MAX_LOOP_FILTER * 3 / 4; } return max_filter_level; } void vp8cx_pick_filter_level_fast(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; int best_err = 0; int filt_err = 0; int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); int filt_val; int best_filt_val; YV12_BUFFER_CONFIG *saved_frame = cm->frame_to_show; /* Replace unfiltered frame buffer with a new one */ cm->frame_to_show = &cpi->pick_lf_lvl_frame; if (cm->frame_type == KEY_FRAME) { cm->sharpness_level = 0; } else { cm->sharpness_level = cpi->oxcf.Sharpness; } if (cm->sharpness_level != cm->last_sharpness_level) { vp8_loop_filter_update_sharpness(&cm->lf_info, cm->sharpness_level); cm->last_sharpness_level = cm->sharpness_level; } /* Start the search at the previous frame filter level unless it is * now out of range. */ if (cm->filter_level < min_filter_level) { cm->filter_level = min_filter_level; } else if (cm->filter_level > max_filter_level) { cm->filter_level = max_filter_level; } filt_val = cm->filter_level; best_filt_val = filt_val; /* Get the err using the previous frame's filter value. */ /* Copy the unfiltered / processed recon buffer to the new buffer */ yv12_copy_partial_frame(saved_frame, cm->frame_to_show); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); best_err = calc_partial_ssl_err(sd, cm->frame_to_show); filt_val -= 1 + (filt_val > 10); /* Search lower filter levels */ while (filt_val >= min_filter_level) { /* Apply the loop filter */ yv12_copy_partial_frame(saved_frame, cm->frame_to_show); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); /* Get the err for filtered frame */ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); /* Update the best case record or exit loop. */ if (filt_err < best_err) { best_err = filt_err; best_filt_val = filt_val; } else { break; } /* Adjust filter level */ filt_val -= 1 + (filt_val > 10); } /* Search up (note that we have already done filt_val = cm->filter_level) */ filt_val = cm->filter_level + 1 + (filt_val > 10); if (best_filt_val == cm->filter_level) { /* Resist raising filter level for very small gains */ best_err -= (best_err >> 10); while (filt_val < max_filter_level) { /* Apply the loop filter */ yv12_copy_partial_frame(saved_frame, cm->frame_to_show); vp8_loop_filter_partial_frame(cm, &cpi->mb.e_mbd, filt_val); /* Get the err for filtered frame */ filt_err = calc_partial_ssl_err(sd, cm->frame_to_show); /* Update the best case record or exit loop. */ if (filt_err < best_err) { /* Do not raise filter level if improvement is < 1 part * in 4096 */ best_err = filt_err - (filt_err >> 10); best_filt_val = filt_val; } else { break; } /* Adjust filter level */ filt_val += 1 + (filt_val > 10); } } cm->filter_level = best_filt_val; if (cm->filter_level < min_filter_level) cm->filter_level = min_filter_level; if (cm->filter_level > max_filter_level) cm->filter_level = max_filter_level; /* restore unfiltered frame pointer */ cm->frame_to_show = saved_frame; } /* Stub function for now Alt LF not used */ void vp8cx_set_alt_lf_level(VP8_COMP *cpi, int filt_val) { MACROBLOCKD *mbd = &cpi->mb.e_mbd; (void)filt_val; mbd->segment_feature_data[MB_LVL_ALT_LF][0] = cpi->segment_feature_data[MB_LVL_ALT_LF][0]; mbd->segment_feature_data[MB_LVL_ALT_LF][1] = cpi->segment_feature_data[MB_LVL_ALT_LF][1]; mbd->segment_feature_data[MB_LVL_ALT_LF][2] = cpi->segment_feature_data[MB_LVL_ALT_LF][2]; mbd->segment_feature_data[MB_LVL_ALT_LF][3] = cpi->segment_feature_data[MB_LVL_ALT_LF][3]; } void vp8cx_pick_filter_level(YV12_BUFFER_CONFIG *sd, VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; int best_err = 0; int filt_err = 0; int min_filter_level = get_min_filter_level(cpi, cm->base_qindex); int max_filter_level = get_max_filter_level(cpi, cm->base_qindex); int filter_step; int filt_high = 0; int filt_mid; int filt_low = 0; int filt_best; int filt_direction = 0; /* Bias against raising loop filter and in favor of lowering it */ int Bias = 0; int ss_err[MAX_LOOP_FILTER + 1]; YV12_BUFFER_CONFIG *saved_frame = cm->frame_to_show; memset(ss_err, 0, sizeof(ss_err)); /* Replace unfiltered frame buffer with a new one */ cm->frame_to_show = &cpi->pick_lf_lvl_frame; if (cm->frame_type == KEY_FRAME) { cm->sharpness_level = 0; } else { cm->sharpness_level = cpi->oxcf.Sharpness; } /* Start the search at the previous frame filter level unless it is * now out of range. */ filt_mid = cm->filter_level; if (filt_mid < min_filter_level) { filt_mid = min_filter_level; } else if (filt_mid > max_filter_level) { filt_mid = max_filter_level; } /* Define the initial step size */ filter_step = (filt_mid < 16) ? 4 : filt_mid / 4; /* Get baseline error score */ /* Copy the unfiltered / processed recon buffer to the new buffer */ vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_mid); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_mid); best_err = vp8_calc_ss_err(sd, cm->frame_to_show); ss_err[filt_mid] = best_err; filt_best = filt_mid; while (filter_step > 0) { Bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; if (cpi->twopass.section_intra_rating < 20) { Bias = Bias * cpi->twopass.section_intra_rating / 20; } filt_high = ((filt_mid + filter_step) > max_filter_level) ? max_filter_level : (filt_mid + filter_step); filt_low = ((filt_mid - filter_step) < min_filter_level) ? min_filter_level : (filt_mid - filter_step); if ((filt_direction <= 0) && (filt_low != filt_mid)) { if (ss_err[filt_low] == 0) { /* Get Low filter error score */ vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_low); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_low); filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); ss_err[filt_low] = filt_err; } else { filt_err = ss_err[filt_low]; } /* If value is close to the best so far then bias towards a * lower loop filter value. */ if ((filt_err - Bias) < best_err) { /* Was it actually better than the previous best? */ if (filt_err < best_err) best_err = filt_err; filt_best = filt_low; } } /* Now look at filt_high */ if ((filt_direction >= 0) && (filt_high != filt_mid)) { if (ss_err[filt_high] == 0) { vpx_yv12_copy_y(saved_frame, cm->frame_to_show); vp8cx_set_alt_lf_level(cpi, filt_high); vp8_loop_filter_frame_yonly(cm, &cpi->mb.e_mbd, filt_high); filt_err = vp8_calc_ss_err(sd, cm->frame_to_show); ss_err[filt_high] = filt_err; } else { filt_err = ss_err[filt_high]; } /* Was it better than the previous best? */ if (filt_err < (best_err - Bias)) { best_err = filt_err; filt_best = filt_high; } } /* Half the step distance if the best filter value was the same * as last time */ if (filt_best == filt_mid) { filter_step = filter_step / 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; filt_mid = filt_best; } } cm->filter_level = filt_best; /* restore unfiltered frame pointer */ cm->frame_to_show = saved_frame; } libvpx-1.8.2/vp8/encoder/picklpf.h000066400000000000000000000016031357355204000167670ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_PICKLPF_H_ #define VPX_VP8_ENCODER_PICKLPF_H_ #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; struct yv12_buffer_config; void vp8cx_pick_filter_level_fast(struct yv12_buffer_config *sd, struct VP8_COMP *cpi); void vp8cx_set_alt_lf_level(struct VP8_COMP *cpi, int filt_val); void vp8cx_pick_filter_level(struct yv12_buffer_config *sd, VP8_COMP *cpi); #ifdef __cplusplus } #endif #endif // VPX_VP8_ENCODER_PICKLPF_H_ libvpx-1.8.2/vp8/encoder/quantize.h000066400000000000000000000022471357355204000172040ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_QUANTIZE_H_ #define VPX_VP8_ENCODER_QUANTIZE_H_ #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; struct macroblock; extern void vp8_quantize_mb(struct macroblock *x); extern void vp8_quantize_mby(struct macroblock *x); extern void vp8_quantize_mbuv(struct macroblock *x); extern void vp8_set_quantizer(struct VP8_COMP *cpi, int Q); extern void vp8cx_frame_init_quantizer(struct VP8_COMP *cpi); extern void vp8_update_zbin_extra(struct VP8_COMP *cpi, struct macroblock *x); extern void vp8cx_mb_init_quantizer(struct VP8_COMP *cpi, struct macroblock *x, int ok_to_skip); extern void vp8cx_init_quantizer(struct VP8_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_QUANTIZE_H_ libvpx-1.8.2/vp8/encoder/ratectrl.c000066400000000000000000001613511357355204000171610ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "math.h" #include "vp8/common/common.h" #include "ratectrl.h" #include "vp8/common/entropymode.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/systemdependent.h" #include "encodemv.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/system_state.h" #define MIN_BPB_FACTOR 0.01 #define MAX_BPB_FACTOR 50 extern const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES]; #ifdef MODE_STATS extern int y_modes[5]; extern int uv_modes[4]; extern int b_modes[10]; extern int inter_y_modes[10]; extern int inter_uv_modes[4]; extern int inter_b_modes[10]; #endif /* Bits Per MB at different Q (Multiplied by 512) */ #define BPER_MB_NORMBITS 9 /* Work in progress recalibration of baseline rate tables based on * the assumption that bits per mb is inversely proportional to the * quantizer value. */ const int vp8_bits_per_mb[2][QINDEX_RANGE] = { /* Intra case 450000/Qintra */ { 1125000, 900000, 750000, 642857, 562500, 500000, 450000, 450000, 409090, 375000, 346153, 321428, 300000, 281250, 264705, 264705, 250000, 236842, 225000, 225000, 214285, 214285, 204545, 204545, 195652, 195652, 187500, 180000, 180000, 173076, 166666, 160714, 155172, 150000, 145161, 140625, 136363, 132352, 128571, 125000, 121621, 121621, 118421, 115384, 112500, 109756, 107142, 104651, 102272, 100000, 97826, 97826, 95744, 93750, 91836, 90000, 88235, 86538, 84905, 83333, 81818, 80357, 78947, 77586, 76271, 75000, 73770, 72580, 71428, 70312, 69230, 68181, 67164, 66176, 65217, 64285, 63380, 62500, 61643, 60810, 60000, 59210, 59210, 58441, 57692, 56962, 56250, 55555, 54878, 54216, 53571, 52941, 52325, 51724, 51136, 50561, 49450, 48387, 47368, 46875, 45918, 45000, 44554, 44117, 43269, 42452, 41666, 40909, 40178, 39473, 38793, 38135, 36885, 36290, 35714, 35156, 34615, 34090, 33582, 33088, 32608, 32142, 31468, 31034, 30405, 29801, 29220, 28662, }, /* Inter case 285000/Qinter */ { 712500, 570000, 475000, 407142, 356250, 316666, 285000, 259090, 237500, 219230, 203571, 190000, 178125, 167647, 158333, 150000, 142500, 135714, 129545, 123913, 118750, 114000, 109615, 105555, 101785, 98275, 95000, 91935, 89062, 86363, 83823, 81428, 79166, 77027, 75000, 73076, 71250, 69512, 67857, 66279, 64772, 63333, 61956, 60638, 59375, 58163, 57000, 55882, 54807, 53773, 52777, 51818, 50892, 50000, 49137, 47500, 45967, 44531, 43181, 41911, 40714, 39583, 38513, 37500, 36538, 35625, 34756, 33928, 33139, 32386, 31666, 30978, 30319, 29687, 29081, 28500, 27941, 27403, 26886, 26388, 25909, 25446, 25000, 24568, 23949, 23360, 22800, 22265, 21755, 21268, 20802, 20357, 19930, 19520, 19127, 18750, 18387, 18037, 17701, 17378, 17065, 16764, 16473, 16101, 15745, 15405, 15079, 14766, 14467, 14179, 13902, 13636, 13380, 13133, 12895, 12666, 12445, 12179, 11924, 11632, 11445, 11220, 11003, 10795, 10594, 10401, 10215, 10035, } }; static const int kf_boost_qadjustment[QINDEX_RANGE] = { 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 200, 201, 201, 202, 203, 203, 203, 204, 204, 205, 205, 206, 206, 207, 207, 208, 208, 209, 209, 210, 210, 211, 211, 212, 212, 213, 213, 214, 214, 215, 215, 216, 216, 217, 217, 218, 218, 219, 219, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, 220, }; /* #define GFQ_ADJUSTMENT (Q+100) */ #define GFQ_ADJUSTMENT vp8_gf_boost_qadjustment[Q] const int vp8_gf_boost_qadjustment[QINDEX_RANGE] = { 80, 82, 84, 86, 88, 90, 92, 94, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 184, 185, 185, 186, 186, 187, 187, 188, 188, 189, 189, 190, 190, 191, 191, 192, 192, 193, 193, 194, 194, 194, 194, 195, 195, 196, 196, 197, 197, 198, 198 }; /* const int vp8_gf_boost_qadjustment[QINDEX_RANGE] = { 100,101,102,103,104,105,105,106, 106,107,107,108,109,109,110,111, 112,113,114,115,116,117,118,119, 120,121,122,123,124,125,126,127, 128,129,130,131,132,133,134,135, 136,137,138,139,140,141,142,143, 144,145,146,147,148,149,150,151, 152,153,154,155,156,157,158,159, 160,161,162,163,164,165,166,167, 168,169,170,170,171,171,172,172, 173,173,173,174,174,174,175,175, 175,176,176,176,177,177,177,177, 178,178,179,179,180,180,181,181, 182,182,183,183,184,184,185,185, 186,186,187,187,188,188,189,189, 190,190,191,191,192,192,193,193, }; */ static const int kf_gf_boost_qlimits[QINDEX_RANGE] = { 150, 155, 160, 165, 170, 175, 180, 185, 190, 195, 200, 205, 210, 215, 220, 225, 230, 235, 240, 245, 250, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 410, 420, 430, 440, 450, 460, 470, 480, 490, 500, 510, 520, 530, 540, 550, 560, 570, 580, 590, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, 600, }; static const int gf_adjust_table[101] = { 100, 115, 130, 145, 160, 175, 190, 200, 210, 220, 230, 240, 260, 270, 280, 290, 300, 310, 320, 330, 340, 350, 360, 370, 380, 390, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, 400, }; static const int gf_intra_usage_adjustment[20] = { 125, 120, 115, 110, 105, 100, 95, 85, 80, 75, 70, 65, 60, 55, 50, 50, 50, 50, 50, 50, }; static const int gf_interval_table[101] = { 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, }; static const unsigned int prior_key_frame_weight[KEY_FRAME_CONTEXT] = { 1, 2, 3, 4, 5 }; void vp8_save_coding_context(VP8_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; /* Stores a snapshot of key state variables which can subsequently be * restored with a call to vp8_restore_coding_context. These functions are * intended for use in a re-code loop in vp8_compress_frame where the * quantizer value is adjusted between loop iterations. */ cc->frames_since_key = cpi->frames_since_key; cc->filter_level = cpi->common.filter_level; cc->frames_till_gf_update_due = cpi->frames_till_gf_update_due; cc->frames_since_golden = cpi->frames_since_golden; vp8_copy(cc->mvc, cpi->common.fc.mvc); vp8_copy(cc->mvcosts, cpi->rd_costs.mvcosts); vp8_copy(cc->ymode_prob, cpi->common.fc.ymode_prob); vp8_copy(cc->uv_mode_prob, cpi->common.fc.uv_mode_prob); vp8_copy(cc->ymode_count, cpi->mb.ymode_count); vp8_copy(cc->uv_mode_count, cpi->mb.uv_mode_count); /* Stats */ #ifdef MODE_STATS vp8_copy(cc->y_modes, y_modes); vp8_copy(cc->uv_modes, uv_modes); vp8_copy(cc->b_modes, b_modes); vp8_copy(cc->inter_y_modes, inter_y_modes); vp8_copy(cc->inter_uv_modes, inter_uv_modes); vp8_copy(cc->inter_b_modes, inter_b_modes); #endif cc->this_frame_percent_intra = cpi->this_frame_percent_intra; } void vp8_restore_coding_context(VP8_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; /* Restore key state variables to the snapshot state stored in the * previous call to vp8_save_coding_context. */ cpi->frames_since_key = cc->frames_since_key; cpi->common.filter_level = cc->filter_level; cpi->frames_till_gf_update_due = cc->frames_till_gf_update_due; cpi->frames_since_golden = cc->frames_since_golden; vp8_copy(cpi->common.fc.mvc, cc->mvc); vp8_copy(cpi->rd_costs.mvcosts, cc->mvcosts); vp8_copy(cpi->common.fc.ymode_prob, cc->ymode_prob); vp8_copy(cpi->common.fc.uv_mode_prob, cc->uv_mode_prob); vp8_copy(cpi->mb.ymode_count, cc->ymode_count); vp8_copy(cpi->mb.uv_mode_count, cc->uv_mode_count); /* Stats */ #ifdef MODE_STATS vp8_copy(y_modes, cc->y_modes); vp8_copy(uv_modes, cc->uv_modes); vp8_copy(b_modes, cc->b_modes); vp8_copy(inter_y_modes, cc->inter_y_modes); vp8_copy(inter_uv_modes, cc->inter_uv_modes); vp8_copy(inter_b_modes, cc->inter_b_modes); #endif cpi->this_frame_percent_intra = cc->this_frame_percent_intra; } void vp8_setup_key_frame(VP8_COMP *cpi) { /* Setup for Key frame: */ vp8_default_coef_probs(&cpi->common); memcpy(cpi->common.fc.mvc, vp8_default_mv_context, sizeof(vp8_default_mv_context)); { int flag[2] = { 1, 1 }; vp8_build_component_cost_table( cpi->mb.mvcost, (const MV_CONTEXT *)cpi->common.fc.mvc, flag); } /* Make sure we initialize separate contexts for altref,gold, and normal. * TODO shouldn't need 3 different copies of structure to do this! */ memcpy(&cpi->lfc_a, &cpi->common.fc, sizeof(cpi->common.fc)); memcpy(&cpi->lfc_g, &cpi->common.fc, sizeof(cpi->common.fc)); memcpy(&cpi->lfc_n, &cpi->common.fc, sizeof(cpi->common.fc)); cpi->common.filter_level = cpi->common.base_qindex * 3 / 8; /* Provisional interval before next GF */ if (cpi->auto_gold) { cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } else { cpi->frames_till_gf_update_due = DEFAULT_GF_INTERVAL; } cpi->common.refresh_golden_frame = 1; cpi->common.refresh_alt_ref_frame = 1; } static int estimate_bits_at_q(int frame_kind, int Q, int MBs, double correction_factor) { int Bpm = (int)(.5 + correction_factor * vp8_bits_per_mb[frame_kind][Q]); /* Attempt to retain reasonable accuracy without overflow. The cutoff is * chosen such that the maximum product of Bpm and MBs fits 31 bits. The * largest Bpm takes 20 bits. */ if (MBs > (1 << 11)) { return (Bpm >> BPER_MB_NORMBITS) * MBs; } else { return (Bpm * MBs) >> BPER_MB_NORMBITS; } } static void calc_iframe_target_size(VP8_COMP *cpi) { /* boost defaults to half second */ int kf_boost; uint64_t target; /* Clear down mmx registers to allow floating point in what follows */ vpx_clear_system_state(); if (cpi->oxcf.fixed_q >= 0) { int Q = cpi->oxcf.key_q; target = estimate_bits_at_q(INTRA_FRAME, Q, cpi->common.MBs, cpi->key_frame_rate_correction_factor); } else if (cpi->pass == 2) { /* New Two pass RC */ target = cpi->per_frame_bandwidth; } /* First Frame is a special case */ else if (cpi->common.current_video_frame == 0) { /* 1 Pass there is no information on which to base size so use * bandwidth per second * fraction of the initial buffer * level */ target = cpi->oxcf.starting_buffer_level / 2; if (target > cpi->oxcf.target_bandwidth * 3 / 2) { target = cpi->oxcf.target_bandwidth * 3 / 2; } } else { /* if this keyframe was forced, use a more recent Q estimate */ int Q = (cpi->common.frame_flags & FRAMEFLAGS_KEY) ? cpi->avg_frame_qindex : cpi->ni_av_qi; int initial_boost = 32; /* |3.0 * per_frame_bandwidth| */ /* Boost depends somewhat on frame rate: only used for 1 layer case. */ if (cpi->oxcf.number_of_layers == 1) { kf_boost = VPXMAX(initial_boost, (int)(2 * cpi->output_framerate - 16)); } else { /* Initial factor: set target size to: |3.0 * per_frame_bandwidth|. */ kf_boost = initial_boost; } /* adjustment up based on q: this factor ranges from ~1.2 to 2.2. */ kf_boost = kf_boost * kf_boost_qadjustment[Q] / 100; /* frame separation adjustment ( down) */ if (cpi->frames_since_key < cpi->output_framerate / 2) { kf_boost = (int)(kf_boost * cpi->frames_since_key / (cpi->output_framerate / 2)); } /* Minimal target size is |2* per_frame_bandwidth|. */ if (kf_boost < 16) kf_boost = 16; target = ((16 + kf_boost) * cpi->per_frame_bandwidth) >> 4; } if (cpi->oxcf.rc_max_intra_bitrate_pct) { unsigned int max_rate = cpi->per_frame_bandwidth * cpi->oxcf.rc_max_intra_bitrate_pct / 100; if (target > max_rate) target = max_rate; } cpi->this_frame_target = (int)target; /* TODO: if we separate rate targeting from Q targetting, move this. * Reset the active worst quality to the baseline value for key frames. */ if (cpi->pass != 2) cpi->active_worst_quality = cpi->worst_quality; #if 0 { FILE *f; f = fopen("kf_boost.stt", "a"); fprintf(f, " %8u %10d %10d %10d\n", cpi->common.current_video_frame, cpi->gfu_boost, cpi->baseline_gf_interval, cpi->source_alt_ref_pending); fclose(f); } #endif } /* Do the best we can to define the parameters for the next GF based on what * information we have available. */ static void calc_gf_params(VP8_COMP *cpi) { int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int Boost = 0; int gf_frame_useage = 0; /* Golden frame useage since last GF */ int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + cpi->recent_ref_frame_usage[LAST_FRAME] + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]; int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); if (tot_mbs) { gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; } if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active; /* Not two pass */ if (cpi->pass != 2) { /* Single Pass lagged mode: TBD */ if (0) { } /* Single Pass compression: Has to use current and historical data */ else { #if 0 /* Experimental code */ int index = cpi->one_pass_frame_index; int frames_to_scan = (cpi->max_gf_interval <= MAX_LAG_BUFFERS) ? cpi->max_gf_interval : MAX_LAG_BUFFERS; /* ************** Experimental code - incomplete */ /* double decay_val = 1.0; double IIAccumulator = 0.0; double last_iiaccumulator = 0.0; double IIRatio; cpi->one_pass_frame_index = cpi->common.current_video_frame%MAX_LAG_BUFFERS; for ( i = 0; i < (frames_to_scan - 1); i++ ) { if ( index < 0 ) index = MAX_LAG_BUFFERS; index --; if ( cpi->one_pass_frame_stats[index].frame_coded_error > 0.0 ) { IIRatio = cpi->one_pass_frame_stats[index].frame_intra_error / cpi->one_pass_frame_stats[index].frame_coded_error; if ( IIRatio > 30.0 ) IIRatio = 30.0; } else IIRatio = 30.0; IIAccumulator += IIRatio * decay_val; decay_val = decay_val * cpi->one_pass_frame_stats[index].frame_pcnt_inter; if ( (i > MIN_GF_INTERVAL) && ((IIAccumulator - last_iiaccumulator) < 2.0) ) { break; } last_iiaccumulator = IIAccumulator; } Boost = IIAccumulator*100.0/16.0; cpi->baseline_gf_interval = i; */ #else /*************************************************************/ /* OLD code */ /* Adjust boost based upon ambient Q */ Boost = GFQ_ADJUSTMENT; /* Adjust based upon most recently measure intra useage */ Boost = Boost * gf_intra_usage_adjustment[(cpi->this_frame_percent_intra < 15) ? cpi->this_frame_percent_intra : 14] / 100; /* Adjust gf boost based upon GF usage since last GF */ Boost = Boost * gf_adjust_table[gf_frame_useage] / 100; #endif } /* golden frame boost without recode loop often goes awry. be * safe by keeping numbers down. */ if (!cpi->sf.recode_loop) { if (cpi->compressor_speed == 2) Boost = Boost / 2; } /* Apply an upper limit based on Q for 1 pass encodes */ if (Boost > kf_gf_boost_qlimits[Q] && (cpi->pass == 0)) { Boost = kf_gf_boost_qlimits[Q]; /* Apply lower limits to boost. */ } else if (Boost < 110) { Boost = 110; } /* Note the boost used */ cpi->last_boost = Boost; } /* Estimate next interval * This is updated once the real frame size/boost is known. */ if (cpi->oxcf.fixed_q == -1) { if (cpi->pass == 2) { /* 2 Pass */ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } else { /* 1 Pass */ cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; if (cpi->last_boost > 750) cpi->frames_till_gf_update_due++; if (cpi->last_boost > 1000) cpi->frames_till_gf_update_due++; if (cpi->last_boost > 1250) cpi->frames_till_gf_update_due++; if (cpi->last_boost >= 1500) cpi->frames_till_gf_update_due++; if (gf_interval_table[gf_frame_useage] > cpi->frames_till_gf_update_due) { cpi->frames_till_gf_update_due = gf_interval_table[gf_frame_useage]; } if (cpi->frames_till_gf_update_due > cpi->max_gf_interval) { cpi->frames_till_gf_update_due = cpi->max_gf_interval; } } } else { cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; } /* ARF on or off */ if (cpi->pass != 2) { /* For now Alt ref is not allowed except in 2 pass modes. */ cpi->source_alt_ref_pending = 0; /*if ( cpi->oxcf.fixed_q == -1) { if ( cpi->oxcf.play_alternate && (cpi->last_boost > (100 + (AF_THRESH*cpi->frames_till_gf_update_due)) ) ) cpi->source_alt_ref_pending = 1; else cpi->source_alt_ref_pending = 0; }*/ } } static void calc_pframe_target_size(VP8_COMP *cpi) { int min_frame_target; int old_per_frame_bandwidth = cpi->per_frame_bandwidth; if (cpi->current_layer > 0) { cpi->per_frame_bandwidth = cpi->layer_context[cpi->current_layer].avg_frame_size_for_layer; } min_frame_target = 0; if (cpi->pass == 2) { min_frame_target = cpi->min_frame_bandwidth; if (min_frame_target < (cpi->av_per_frame_bandwidth >> 5)) { min_frame_target = cpi->av_per_frame_bandwidth >> 5; } } else if (min_frame_target < cpi->per_frame_bandwidth / 4) { min_frame_target = cpi->per_frame_bandwidth / 4; } /* Special alt reference frame case */ if ((cpi->common.refresh_alt_ref_frame) && (cpi->oxcf.number_of_layers == 1)) { if (cpi->pass == 2) { /* Per frame bit target for the alt ref frame */ cpi->per_frame_bandwidth = cpi->twopass.gf_bits; cpi->this_frame_target = cpi->per_frame_bandwidth; } /* One Pass ??? TBD */ } /* Normal frames (gf,and inter) */ else { /* 2 pass */ if (cpi->pass == 2) { cpi->this_frame_target = cpi->per_frame_bandwidth; } /* 1 pass */ else { int Adjustment; /* Make rate adjustment to recover bits spent in key frame * Test to see if the key frame inter data rate correction * should still be in force */ if (cpi->kf_overspend_bits > 0) { Adjustment = (cpi->kf_bitrate_adjustment <= cpi->kf_overspend_bits) ? cpi->kf_bitrate_adjustment : cpi->kf_overspend_bits; if (Adjustment > (cpi->per_frame_bandwidth - min_frame_target)) { Adjustment = (cpi->per_frame_bandwidth - min_frame_target); } cpi->kf_overspend_bits -= Adjustment; /* Calculate an inter frame bandwidth target for the next * few frames designed to recover any extra bits spent on * the key frame. */ cpi->this_frame_target = cpi->per_frame_bandwidth - Adjustment; if (cpi->this_frame_target < min_frame_target) { cpi->this_frame_target = min_frame_target; } } else { cpi->this_frame_target = cpi->per_frame_bandwidth; } /* If appropriate make an adjustment to recover bits spent on a * recent GF */ if ((cpi->gf_overspend_bits > 0) && (cpi->this_frame_target > min_frame_target)) { Adjustment = (cpi->non_gf_bitrate_adjustment <= cpi->gf_overspend_bits) ? cpi->non_gf_bitrate_adjustment : cpi->gf_overspend_bits; if (Adjustment > (cpi->this_frame_target - min_frame_target)) { Adjustment = (cpi->this_frame_target - min_frame_target); } cpi->gf_overspend_bits -= Adjustment; cpi->this_frame_target -= Adjustment; } /* Apply small + and - boosts for non gf frames */ if ((cpi->last_boost > 150) && (cpi->frames_till_gf_update_due > 0) && (cpi->current_gf_interval >= (MIN_GF_INTERVAL << 1))) { /* % Adjustment limited to the range 1% to 10% */ Adjustment = (cpi->last_boost - 100) >> 5; if (Adjustment < 1) { Adjustment = 1; } else if (Adjustment > 10) { Adjustment = 10; } /* Convert to bits */ Adjustment = (cpi->this_frame_target * Adjustment) / 100; if (Adjustment > (cpi->this_frame_target - min_frame_target)) { Adjustment = (cpi->this_frame_target - min_frame_target); } if (cpi->frames_since_golden == (cpi->current_gf_interval >> 1)) { Adjustment = (cpi->current_gf_interval - 1) * Adjustment; // Limit adjustment to 10% of current target. if (Adjustment > (10 * cpi->this_frame_target) / 100) { Adjustment = (10 * cpi->this_frame_target) / 100; } cpi->this_frame_target += Adjustment; } else { cpi->this_frame_target -= Adjustment; } } } } /* Sanity check that the total sum of adjustments is not above the * maximum allowed That is that having allowed for KF and GF penalties * we have not pushed the current interframe target to low. If the * adjustment we apply here is not capable of recovering all the extra * bits we have spent in the KF or GF then the remainder will have to * be recovered over a longer time span via other buffer / rate control * mechanisms. */ if (cpi->this_frame_target < min_frame_target) { cpi->this_frame_target = min_frame_target; } if (!cpi->common.refresh_alt_ref_frame) { /* Note the baseline target data rate for this inter frame. */ cpi->inter_frame_target = cpi->this_frame_target; } /* One Pass specific code */ if (cpi->pass == 0) { /* Adapt target frame size with respect to any buffering constraints: */ if (cpi->buffered_mode) { int one_percent_bits = (int)(1 + cpi->oxcf.optimal_buffer_level / 100); if ((cpi->buffer_level < cpi->oxcf.optimal_buffer_level) || (cpi->bits_off_target < cpi->oxcf.optimal_buffer_level)) { int percent_low = 0; /* Decide whether or not we need to adjust the frame data * rate target. * * If we are are below the optimal buffer fullness level * and adherence to buffering constraints is important to * the end usage then adjust the per frame target. */ if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level < cpi->oxcf.optimal_buffer_level)) { percent_low = (int)((cpi->oxcf.optimal_buffer_level - cpi->buffer_level) / one_percent_bits); } /* Are we overshooting the long term clip data rate... */ else if (cpi->bits_off_target < 0) { /* Adjust per frame data target downwards to compensate. */ percent_low = (int)(100 * -cpi->bits_off_target / (cpi->total_byte_count * 8)); } if (percent_low > cpi->oxcf.under_shoot_pct) { percent_low = cpi->oxcf.under_shoot_pct; } else if (percent_low < 0) { percent_low = 0; } /* lower the target bandwidth for this frame. */ cpi->this_frame_target -= (cpi->this_frame_target * percent_low) / 200; /* Are we using allowing control of active_worst_allowed_q * according to buffer level. */ if (cpi->auto_worst_q && cpi->ni_frames > 150) { int64_t critical_buffer_level; /* For streaming applications the most important factor is * cpi->buffer_level as this takes into account the * specified short term buffering constraints. However, * hitting the long term clip data rate target is also * important. */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { /* Take the smaller of cpi->buffer_level and * cpi->bits_off_target */ critical_buffer_level = (cpi->buffer_level < cpi->bits_off_target) ? cpi->buffer_level : cpi->bits_off_target; } /* For local file playback short term buffering constraints * are less of an issue */ else { /* Consider only how we are doing for the clip as a * whole */ critical_buffer_level = cpi->bits_off_target; } /* Set the active worst quality based upon the selected * buffer fullness number. */ if (critical_buffer_level < cpi->oxcf.optimal_buffer_level) { if (critical_buffer_level > (cpi->oxcf.optimal_buffer_level >> 2)) { int64_t qadjustment_range = cpi->worst_quality - cpi->ni_av_qi; int64_t above_base = (critical_buffer_level - (cpi->oxcf.optimal_buffer_level >> 2)); /* Step active worst quality down from * cpi->ni_av_qi when (critical_buffer_level == * cpi->optimal_buffer_level) to * cpi->worst_quality when * (critical_buffer_level == * cpi->optimal_buffer_level >> 2) */ cpi->active_worst_quality = cpi->worst_quality - (int)((qadjustment_range * above_base) / (cpi->oxcf.optimal_buffer_level * 3 >> 2)); } else { cpi->active_worst_quality = cpi->worst_quality; } } else { cpi->active_worst_quality = cpi->ni_av_qi; } } else { cpi->active_worst_quality = cpi->worst_quality; } } else { int percent_high = 0; if ((cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && (cpi->buffer_level > cpi->oxcf.optimal_buffer_level)) { percent_high = (int)((cpi->buffer_level - cpi->oxcf.optimal_buffer_level) / one_percent_bits); } else if (cpi->bits_off_target > cpi->oxcf.optimal_buffer_level) { percent_high = (int)((100 * cpi->bits_off_target) / (cpi->total_byte_count * 8)); } if (percent_high > cpi->oxcf.over_shoot_pct) { percent_high = cpi->oxcf.over_shoot_pct; } else if (percent_high < 0) { percent_high = 0; } cpi->this_frame_target += (cpi->this_frame_target * percent_high) / 200; /* Are we allowing control of active_worst_allowed_q according * to buffer level. */ if (cpi->auto_worst_q && cpi->ni_frames > 150) { /* When using the relaxed buffer model stick to the * user specified value */ cpi->active_worst_quality = cpi->ni_av_qi; } else { cpi->active_worst_quality = cpi->worst_quality; } } /* Set active_best_quality to prevent quality rising too high */ cpi->active_best_quality = cpi->best_quality; /* Worst quality obviously must not be better than best quality */ if (cpi->active_worst_quality <= cpi->active_best_quality) { cpi->active_worst_quality = cpi->active_best_quality + 1; } if (cpi->active_worst_quality > 127) cpi->active_worst_quality = 127; } /* Unbuffered mode (eg. video conferencing) */ else { /* Set the active worst quality */ cpi->active_worst_quality = cpi->worst_quality; } /* Special trap for constrained quality mode * "active_worst_quality" may never drop below cq level * for any frame type. */ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY && cpi->active_worst_quality < cpi->cq_target_quality) { cpi->active_worst_quality = cpi->cq_target_quality; } } /* Test to see if we have to drop a frame * The auto-drop frame code is only used in buffered mode. * In unbufferd mode (eg vide conferencing) the descision to * code or drop a frame is made outside the codec in response to real * world comms or buffer considerations. */ if (cpi->drop_frames_allowed && (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) && ((cpi->common.frame_type != KEY_FRAME))) { /* Check for a buffer underun-crisis in which case we have to drop * a frame */ if ((cpi->buffer_level < 0)) { #if 0 FILE *f = fopen("dec.stt", "a"); fprintf(f, "%10d %10d %10d %10d ***** BUFFER EMPTY\n", (int) cpi->common.current_video_frame, cpi->decimation_factor, cpi->common.horiz_scale, (cpi->buffer_level * 100) / cpi->oxcf.optimal_buffer_level); fclose(f); #endif cpi->drop_frame = 1; /* Update the buffer level variable. */ cpi->bits_off_target += cpi->av_per_frame_bandwidth; if (cpi->bits_off_target > cpi->oxcf.maximum_buffer_size) { cpi->bits_off_target = (int)cpi->oxcf.maximum_buffer_size; } cpi->buffer_level = cpi->bits_off_target; if (cpi->oxcf.number_of_layers > 1) { unsigned int i; // Propagate bits saved by dropping the frame to higher layers. for (i = cpi->current_layer + 1; i < cpi->oxcf.number_of_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate); if (lc->bits_off_target > lc->maximum_buffer_size) { lc->bits_off_target = lc->maximum_buffer_size; } lc->buffer_level = lc->bits_off_target; } } } } /* Adjust target frame size for Golden Frames: */ if (cpi->oxcf.error_resilient_mode == 0 && (cpi->frames_till_gf_update_due == 0) && !cpi->drop_frame) { if (!cpi->gf_update_onepass_cbr) { int Q = (cpi->oxcf.fixed_q < 0) ? cpi->last_q[INTER_FRAME] : cpi->oxcf.fixed_q; int gf_frame_useage = 0; /* Golden frame useage since last GF */ int tot_mbs = cpi->recent_ref_frame_usage[INTRA_FRAME] + cpi->recent_ref_frame_usage[LAST_FRAME] + cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]; int pct_gf_active = (100 * cpi->gf_active_count) / (cpi->common.mb_rows * cpi->common.mb_cols); if (tot_mbs) { gf_frame_useage = (cpi->recent_ref_frame_usage[GOLDEN_FRAME] + cpi->recent_ref_frame_usage[ALTREF_FRAME]) * 100 / tot_mbs; } if (pct_gf_active > gf_frame_useage) gf_frame_useage = pct_gf_active; /* Is a fixed manual GF frequency being used */ if (cpi->auto_gold) { /* For one pass throw a GF if recent frame intra useage is * low or the GF useage is high */ if ((cpi->pass == 0) && (cpi->this_frame_percent_intra < 15 || gf_frame_useage >= 5)) { cpi->common.refresh_golden_frame = 1; /* Two pass GF descision */ } else if (cpi->pass == 2) { cpi->common.refresh_golden_frame = 1; } } #if 0 /* Debug stats */ if (0) { FILE *f; f = fopen("gf_useaget.stt", "a"); fprintf(f, " %8ld %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->gfu_boost, GFQ_ADJUSTMENT, cpi->gfu_boost, gf_frame_useage); fclose(f); } #endif if (cpi->common.refresh_golden_frame == 1) { #if 0 if (0) { FILE *f; f = fopen("GFexit.stt", "a"); fprintf(f, "%8ld GF coded\n", cpi->common.current_video_frame); fclose(f); } #endif if (cpi->auto_adjust_gold_quantizer) { calc_gf_params(cpi); } /* If we are using alternate ref instead of gf then do not apply the * boost It will instead be applied to the altref update Jims * modified boost */ if (!cpi->source_alt_ref_active) { if (cpi->oxcf.fixed_q < 0) { if (cpi->pass == 2) { /* The spend on the GF is defined in the two pass * code for two pass encodes */ cpi->this_frame_target = cpi->per_frame_bandwidth; } else { int Boost = cpi->last_boost; int frames_in_section = cpi->frames_till_gf_update_due + 1; int allocation_chunks = (frames_in_section * 100) + (Boost - 100); int bits_in_section = cpi->inter_frame_target * frames_in_section; /* Normalize Altboost and allocations chunck down to * prevent overflow */ while (Boost > 1000) { Boost /= 2; allocation_chunks /= 2; } /* Avoid loss of precision but avoid overflow */ if ((bits_in_section >> 7) > allocation_chunks) { cpi->this_frame_target = Boost * (bits_in_section / allocation_chunks); } else { cpi->this_frame_target = (Boost * bits_in_section) / allocation_chunks; } } } else { cpi->this_frame_target = (estimate_bits_at_q(1, Q, cpi->common.MBs, 1.0) * cpi->last_boost) / 100; } } else { /* If there is an active ARF at this location use the minimum * bits on this frame even if it is a contructed arf. * The active maximum quantizer insures that an appropriate * number of bits will be spent if needed for contstructed ARFs. */ cpi->this_frame_target = 0; } cpi->current_gf_interval = cpi->frames_till_gf_update_due; } } else { // Special case for 1 pass CBR: fixed gf period. // TODO(marpan): Adjust this boost/interval logic. // If gf_cbr_boost_pct is small (below threshold) set the flag // gf_noboost_onepass_cbr = 1, which forces the gf to use the same // rate correction factor as last. cpi->gf_noboost_onepass_cbr = (cpi->oxcf.gf_cbr_boost_pct <= 100); cpi->baseline_gf_interval = cpi->gf_interval_onepass_cbr; // Skip this update if the zero_mvcount is low. if (cpi->zeromv_count > (cpi->common.MBs >> 1)) { cpi->common.refresh_golden_frame = 1; cpi->this_frame_target = (cpi->this_frame_target * (100 + cpi->oxcf.gf_cbr_boost_pct)) / 100; } cpi->frames_till_gf_update_due = cpi->baseline_gf_interval; cpi->current_gf_interval = cpi->frames_till_gf_update_due; } } cpi->per_frame_bandwidth = old_per_frame_bandwidth; } void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var) { int Q = cpi->common.base_qindex; int correction_factor = 100; double rate_correction_factor; double adjustment_limit; int projected_size_based_on_q = 0; /* Clear down mmx registers to allow floating point in what follows */ vpx_clear_system_state(); if (cpi->common.frame_type == KEY_FRAME) { rate_correction_factor = cpi->key_frame_rate_correction_factor; } else { if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr && (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)) { rate_correction_factor = cpi->gf_rate_correction_factor; } else { rate_correction_factor = cpi->rate_correction_factor; } } /* Work out how big we would have expected the frame to be at this Q * given the current correction factor. Stay in double to avoid int * overflow when values are large */ projected_size_based_on_q = (int)(((.5 + rate_correction_factor * vp8_bits_per_mb[cpi->common.frame_type][Q]) * cpi->common.MBs) / (1 << BPER_MB_NORMBITS)); /* Make some allowance for cpi->zbin_over_quant */ if (cpi->mb.zbin_over_quant > 0) { int Z = cpi->mb.zbin_over_quant; double Factor = 0.99; double factor_adjustment = 0.01 / 256.0; while (Z > 0) { Z--; projected_size_based_on_q = (int)(Factor * projected_size_based_on_q); Factor += factor_adjustment; if (Factor >= 0.999) Factor = 0.999; } } /* Work out a size correction factor. */ if (projected_size_based_on_q > 0) { correction_factor = (100 * cpi->projected_frame_size) / projected_size_based_on_q; } /* More heavily damped adjustment used if we have been oscillating * either side of target */ switch (damp_var) { case 0: adjustment_limit = 0.75; break; case 1: adjustment_limit = 0.375; break; case 2: default: adjustment_limit = 0.25; break; } if (correction_factor > 102) { /* We are not already at the worst allowable quality */ correction_factor = (int)(100.5 + ((correction_factor - 100) * adjustment_limit)); rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); /* Keep rate_correction_factor within limits */ if (rate_correction_factor > MAX_BPB_FACTOR) { rate_correction_factor = MAX_BPB_FACTOR; } } else if (correction_factor < 99) { /* We are not already at the best allowable quality */ correction_factor = (int)(100.5 - ((100 - correction_factor) * adjustment_limit)); rate_correction_factor = ((rate_correction_factor * correction_factor) / 100); /* Keep rate_correction_factor within limits */ if (rate_correction_factor < MIN_BPB_FACTOR) { rate_correction_factor = MIN_BPB_FACTOR; } } if (cpi->common.frame_type == KEY_FRAME) { cpi->key_frame_rate_correction_factor = rate_correction_factor; } else { if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr && (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)) { cpi->gf_rate_correction_factor = rate_correction_factor; } else { cpi->rate_correction_factor = rate_correction_factor; } } } static int limit_q_cbr_inter(int last_q, int current_q) { int limit_down = 12; if (last_q - current_q > limit_down) return (last_q - limit_down); else return current_q; } int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame) { int Q = cpi->active_worst_quality; if (cpi->force_maxqp == 1) { cpi->active_worst_quality = cpi->worst_quality; return cpi->worst_quality; } /* Reset Zbin OQ value */ cpi->mb.zbin_over_quant = 0; if (cpi->oxcf.fixed_q >= 0) { Q = cpi->oxcf.fixed_q; if (cpi->common.frame_type == KEY_FRAME) { Q = cpi->oxcf.key_q; } else if (cpi->oxcf.number_of_layers == 1 && cpi->common.refresh_alt_ref_frame && !cpi->gf_noboost_onepass_cbr) { Q = cpi->oxcf.alt_q; } else if (cpi->oxcf.number_of_layers == 1 && cpi->common.refresh_golden_frame && !cpi->gf_noboost_onepass_cbr) { Q = cpi->oxcf.gold_q; } } else { int i; int last_error = INT_MAX; int target_bits_per_mb; int bits_per_mb_at_this_q; double correction_factor; /* Select the appropriate correction factor based upon type of frame. */ if (cpi->common.frame_type == KEY_FRAME) { correction_factor = cpi->key_frame_rate_correction_factor; } else { if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr && (cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame)) { correction_factor = cpi->gf_rate_correction_factor; } else { correction_factor = cpi->rate_correction_factor; } } /* Calculate required scaling factor based on target frame size and * size of frame produced using previous Q */ if (target_bits_per_frame >= (INT_MAX >> BPER_MB_NORMBITS)) { /* Case where we would overflow int */ target_bits_per_mb = (target_bits_per_frame / cpi->common.MBs) << BPER_MB_NORMBITS; } else { target_bits_per_mb = (target_bits_per_frame << BPER_MB_NORMBITS) / cpi->common.MBs; } i = cpi->active_best_quality; do { bits_per_mb_at_this_q = (int)(.5 + correction_factor * vp8_bits_per_mb[cpi->common.frame_type][i]); if (bits_per_mb_at_this_q <= target_bits_per_mb) { if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) { Q = i; } else { Q = i - 1; } break; } else { last_error = bits_per_mb_at_this_q - target_bits_per_mb; } } while (++i <= cpi->active_worst_quality); /* If we are at MAXQ then enable Q over-run which seeks to claw * back additional bits through things like the RD multiplier * and zero bin size. */ if (Q >= MAXQ) { int zbin_oqmax; double Factor = 0.99; double factor_adjustment = 0.01 / 256.0; if (cpi->common.frame_type == KEY_FRAME) { zbin_oqmax = 0; } else if (cpi->oxcf.number_of_layers == 1 && !cpi->gf_noboost_onepass_cbr && (cpi->common.refresh_alt_ref_frame || (cpi->common.refresh_golden_frame && !cpi->source_alt_ref_active))) { zbin_oqmax = 16; } else { zbin_oqmax = ZBIN_OQ_MAX; } /*{ double Factor = (double)target_bits_per_mb/(double)bits_per_mb_at_this_q; double Oq; Factor = Factor/1.2683; Oq = pow( Factor, (1.0/-0.165) ); if ( Oq > zbin_oqmax ) Oq = zbin_oqmax; cpi->zbin_over_quant = (int)Oq; }*/ /* Each incrment in the zbin is assumed to have a fixed effect * on bitrate. This is not of course true. The effect will be * highly clip dependent and may well have sudden steps. The * idea here is to acheive higher effective quantizers than the * normal maximum by expanding the zero bin and hence * decreasing the number of low magnitude non zero coefficients. */ while (cpi->mb.zbin_over_quant < zbin_oqmax) { cpi->mb.zbin_over_quant++; if (cpi->mb.zbin_over_quant > zbin_oqmax) { cpi->mb.zbin_over_quant = zbin_oqmax; } /* Adjust bits_per_mb_at_this_q estimate */ bits_per_mb_at_this_q = (int)(Factor * bits_per_mb_at_this_q); Factor += factor_adjustment; if (Factor >= 0.999) Factor = 0.999; /* Break out if we get down to the target rate */ if (bits_per_mb_at_this_q <= target_bits_per_mb) break; } } } // Limit decrease in Q for 1 pass CBR screen content mode. if (cpi->common.frame_type != KEY_FRAME && cpi->pass == 0 && cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER && cpi->oxcf.screen_content_mode) Q = limit_q_cbr_inter(cpi->last_q[1], Q); return Q; } static int estimate_keyframe_frequency(VP8_COMP *cpi) { int i; /* Average key frame frequency */ int av_key_frame_frequency = 0; /* First key frame at start of sequence is a special case. We have no * frequency data. */ if (cpi->key_frame_count == 1) { /* Assume a default of 1 kf every 2 seconds, or the max kf interval, * whichever is smaller. */ int key_freq = cpi->oxcf.key_freq > 0 ? cpi->oxcf.key_freq : 1; av_key_frame_frequency = 1 + (int)cpi->output_framerate * 2; if (cpi->oxcf.auto_key && av_key_frame_frequency > key_freq) { av_key_frame_frequency = key_freq; } cpi->prior_key_frame_distance[KEY_FRAME_CONTEXT - 1] = av_key_frame_frequency; } else { unsigned int total_weight = 0; int last_kf_interval = (cpi->frames_since_key > 0) ? cpi->frames_since_key : 1; /* reset keyframe context and calculate weighted average of last * KEY_FRAME_CONTEXT keyframes */ for (i = 0; i < KEY_FRAME_CONTEXT; ++i) { if (i < KEY_FRAME_CONTEXT - 1) { cpi->prior_key_frame_distance[i] = cpi->prior_key_frame_distance[i + 1]; } else { cpi->prior_key_frame_distance[i] = last_kf_interval; } av_key_frame_frequency += prior_key_frame_weight[i] * cpi->prior_key_frame_distance[i]; total_weight += prior_key_frame_weight[i]; } av_key_frame_frequency /= total_weight; } // TODO (marpan): Given the checks above, |av_key_frame_frequency| // should always be above 0. But for now we keep the sanity check in. if (av_key_frame_frequency == 0) av_key_frame_frequency = 1; return av_key_frame_frequency; } void vp8_adjust_key_frame_context(VP8_COMP *cpi) { /* Clear down mmx registers to allow floating point in what follows */ vpx_clear_system_state(); /* Do we have any key frame overspend to recover? */ /* Two-pass overspend handled elsewhere. */ if ((cpi->pass != 2) && (cpi->projected_frame_size > cpi->per_frame_bandwidth)) { int overspend; /* Update the count of key frame overspend to be recovered in * subsequent frames. A portion of the KF overspend is treated as gf * overspend (and hence recovered more quickly) as the kf is also a * gf. Otherwise the few frames following each kf tend to get more * bits allocated than those following other gfs. */ overspend = (cpi->projected_frame_size - cpi->per_frame_bandwidth); if (cpi->oxcf.number_of_layers > 1) { cpi->kf_overspend_bits += overspend; } else { cpi->kf_overspend_bits += overspend * 7 / 8; cpi->gf_overspend_bits += overspend * 1 / 8; } /* Work out how much to try and recover per frame. */ cpi->kf_bitrate_adjustment = cpi->kf_overspend_bits / estimate_keyframe_frequency(cpi); } cpi->frames_since_key = 0; cpi->key_frame_count++; } void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { /* Set-up bounds on acceptable frame size: */ if (cpi->oxcf.fixed_q >= 0) { /* Fixed Q scenario: frame size never outranges target * (there is no target!) */ *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { if (cpi->common.frame_type == KEY_FRAME) { *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8; } else { if (cpi->oxcf.number_of_layers > 1 || cpi->common.refresh_alt_ref_frame || cpi->common.refresh_golden_frame) { *frame_over_shoot_limit = cpi->this_frame_target * 9 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 7 / 8; } else { /* For CBR take buffer fullness into account */ if (cpi->oxcf.end_usage == USAGE_STREAM_FROM_SERVER) { if (cpi->buffer_level >= ((cpi->oxcf.optimal_buffer_level + cpi->oxcf.maximum_buffer_size) >> 1)) { /* Buffer is too full so relax overshoot and tighten * undershoot */ *frame_over_shoot_limit = cpi->this_frame_target * 12 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 6 / 8; } else if (cpi->buffer_level <= (cpi->oxcf.optimal_buffer_level >> 1)) { /* Buffer is too low so relax undershoot and tighten * overshoot */ *frame_over_shoot_limit = cpi->this_frame_target * 10 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 4 / 8; } else { *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; } } /* VBR and CQ mode */ /* Note that tighter restrictions here can help quality * but hurt encode speed */ else { /* Stron overshoot limit for constrained quality */ if (cpi->oxcf.end_usage == USAGE_CONSTRAINED_QUALITY) { *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 2 / 8; } else { *frame_over_shoot_limit = cpi->this_frame_target * 11 / 8; *frame_under_shoot_limit = cpi->this_frame_target * 5 / 8; } } } } /* For very small rate targets where the fractional adjustment * (eg * 7/8) may be tiny make sure there is at least a minimum * range. */ *frame_over_shoot_limit += 200; *frame_under_shoot_limit -= 200; if (*frame_under_shoot_limit < 0) *frame_under_shoot_limit = 0; } } /* return of 0 means drop frame */ int vp8_pick_frame_size(VP8_COMP *cpi) { VP8_COMMON *cm = &cpi->common; if (cm->frame_type == KEY_FRAME) { calc_iframe_target_size(cpi); } else { calc_pframe_target_size(cpi); /* Check if we're dropping the frame: */ if (cpi->drop_frame) { cpi->drop_frame = 0; return 0; } } return 1; } // If this just encoded frame (mcomp/transform/quant, but before loopfilter and // pack_bitstream) has large overshoot, and was not being encoded close to the // max QP, then drop this frame and force next frame to be encoded at max QP. // Allow this for screen_content_mode = 2, or if drop frames is allowed. // TODO(marpan): Should do this exit condition during the encode_frame // (i.e., halfway during the encoding of the frame) to save cycles. int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q) { int force_drop_overshoot = 0; #if CONFIG_MULTI_RES_ENCODING // Only check for dropping due to overshoot on the lowest stream. // If the lowest stream of the multi-res encoding was dropped due to // overshoot, then force dropping on all upper layer streams // (mr_encoder_id > 0). LOWER_RES_FRAME_INFO *low_res_frame_info = (LOWER_RES_FRAME_INFO *)cpi->oxcf.mr_low_res_mode_info; if (cpi->oxcf.mr_total_resolutions > 1 && cpi->oxcf.mr_encoder_id > 0) { force_drop_overshoot = low_res_frame_info->is_frame_dropped_overshoot_maxqp; if (!force_drop_overshoot) { cpi->force_maxqp = 0; cpi->frames_since_last_drop_overshoot++; return 0; } } #endif if (cpi->common.frame_type != KEY_FRAME && (cpi->oxcf.screen_content_mode == 2 || (cpi->drop_frames_allowed && (force_drop_overshoot || (cpi->rate_correction_factor < (8.0f * MIN_BPB_FACTOR) && cpi->frames_since_last_drop_overshoot > (int)cpi->framerate))))) { // Note: the "projected_frame_size" from encode_frame() only gives estimate // of mode/motion vector rate (in non-rd mode): so below we only require // that projected_frame_size is somewhat greater than per-frame-bandwidth, // but add additional condition with high threshold on prediction residual. // QP threshold: only allow dropping if we are not close to qp_max. int thresh_qp = 3 * cpi->worst_quality >> 2; // Rate threshold, in bytes. int thresh_rate = 2 * (cpi->av_per_frame_bandwidth >> 3); // Threshold for the average (over all macroblocks) of the pixel-sum // residual error over 16x16 block. int thresh_pred_err_mb = (200 << 4); int pred_err_mb = (int)(cpi->mb.prediction_error / cpi->common.MBs); // Reduce/ignore thresh_rate if pred_err_mb much larger than its threshold, // give more weight to pred_err metric for overshoot detection. if (cpi->drop_frames_allowed && pred_err_mb > (thresh_pred_err_mb << 4)) thresh_rate = thresh_rate >> 3; if ((Q < thresh_qp && cpi->projected_frame_size > thresh_rate && pred_err_mb > thresh_pred_err_mb && pred_err_mb > 2 * cpi->last_pred_err_mb) || force_drop_overshoot) { unsigned int i; double new_correction_factor; int target_bits_per_mb; const int target_size = cpi->av_per_frame_bandwidth; // Flag to indicate we will force next frame to be encoded at max QP. cpi->force_maxqp = 1; // Reset the buffer levels. cpi->buffer_level = cpi->oxcf.optimal_buffer_level; cpi->bits_off_target = cpi->oxcf.optimal_buffer_level; // Compute a new rate correction factor, corresponding to the current // target frame size and max_QP, and adjust the rate correction factor // upwards, if needed. // This is to prevent a bad state where the re-encoded frame at max_QP // undershoots significantly, and then we end up dropping every other // frame because the QP/rate_correction_factor may have been too low // before the drop and then takes too long to come up. if (target_size >= (INT_MAX >> BPER_MB_NORMBITS)) { target_bits_per_mb = (target_size / cpi->common.MBs) << BPER_MB_NORMBITS; } else { target_bits_per_mb = (target_size << BPER_MB_NORMBITS) / cpi->common.MBs; } // Rate correction factor based on target_size_per_mb and max_QP. new_correction_factor = (double)target_bits_per_mb / (double)vp8_bits_per_mb[INTER_FRAME][cpi->worst_quality]; if (new_correction_factor > cpi->rate_correction_factor) { cpi->rate_correction_factor = VPXMIN(2.0 * cpi->rate_correction_factor, new_correction_factor); } if (cpi->rate_correction_factor > MAX_BPB_FACTOR) { cpi->rate_correction_factor = MAX_BPB_FACTOR; } // Drop this frame: update frame counters. cpi->common.current_video_frame++; cpi->frames_since_key++; cpi->temporal_pattern_counter++; cpi->frames_since_last_drop_overshoot = 0; if (cpi->oxcf.number_of_layers > 1) { // Set max_qp and rate correction for all temporal layers if overshoot // is detected. for (i = 0; i < cpi->oxcf.number_of_layers; ++i) { LAYER_CONTEXT *lc = &cpi->layer_context[i]; lc->force_maxqp = 1; lc->frames_since_last_drop_overshoot = 0; lc->rate_correction_factor = cpi->rate_correction_factor; } } #if CONFIG_MULTI_RES_ENCODING if (cpi->oxcf.mr_total_resolutions > 1) low_res_frame_info->is_frame_dropped_overshoot_maxqp = 1; #endif return 1; } cpi->force_maxqp = 0; cpi->frames_since_last_drop_overshoot++; #if CONFIG_MULTI_RES_ENCODING if (cpi->oxcf.mr_total_resolutions > 1) low_res_frame_info->is_frame_dropped_overshoot_maxqp = 0; #endif return 0; } cpi->force_maxqp = 0; cpi->frames_since_last_drop_overshoot++; #if CONFIG_MULTI_RES_ENCODING if (cpi->oxcf.mr_total_resolutions > 1) low_res_frame_info->is_frame_dropped_overshoot_maxqp = 0; #endif return 0; } libvpx-1.8.2/vp8/encoder/ratectrl.h000066400000000000000000000024761357355204000171700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_RATECTRL_H_ #define VPX_VP8_ENCODER_RATECTRL_H_ #include "onyx_int.h" #ifdef __cplusplus extern "C" { #endif extern void vp8_save_coding_context(VP8_COMP *cpi); extern void vp8_restore_coding_context(VP8_COMP *cpi); extern void vp8_setup_key_frame(VP8_COMP *cpi); extern void vp8_update_rate_correction_factors(VP8_COMP *cpi, int damp_var); extern int vp8_regulate_q(VP8_COMP *cpi, int target_bits_per_frame); extern void vp8_adjust_key_frame_context(VP8_COMP *cpi); extern void vp8_compute_frame_size_bounds(VP8_COMP *cpi, int *frame_under_shoot_limit, int *frame_over_shoot_limit); /* return of 0 means drop frame */ extern int vp8_pick_frame_size(VP8_COMP *cpi); extern int vp8_drop_encodedframe_overshoot(VP8_COMP *cpi, int Q); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_RATECTRL_H_ libvpx-1.8.2/vp8/encoder/rdopt.c000066400000000000000000002245751357355204000165010ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "vpx_config.h" #include "vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "encodeframe.h" #include "tokenize.h" #include "treewriter.h" #include "onyx_int.h" #include "modecosts.h" #include "encodeintra.h" #include "pickinter.h" #include "vp8/common/common.h" #include "vp8/common/entropymode.h" #include "vp8/common/reconinter.h" #include "vp8/common/reconintra.h" #include "vp8/common/reconintra4x4.h" #include "vp8/common/findnearmv.h" #include "vp8/common/quant_common.h" #include "encodemb.h" #include "vp8/encoder/quantize.h" #include "vpx_dsp/variance.h" #include "vpx_ports/system_state.h" #include "mcomp.h" #include "rdopt.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/systemdependent.h" #if CONFIG_TEMPORAL_DENOISING #include "denoising.h" #endif extern void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x); #define MAXF(a, b) (((a) > (b)) ? (a) : (b)) typedef struct rate_distortion_struct { int rate2; int rate_y; int rate_uv; int distortion2; int distortion_uv; } RATE_DISTORTION; typedef struct best_mode_struct { int yrd; int rd; int intra_rd; MB_MODE_INFO mbmode; union b_mode_info bmodes[16]; PARTITION_INFO partition; } BEST_MODE; static const int auto_speed_thresh[17] = { 1000, 200, 150, 130, 150, 125, 120, 115, 115, 115, 115, 115, 115, 115, 115, 115, 105 }; const MB_PREDICTION_MODE vp8_mode_order[MAX_MODES] = { ZEROMV, DC_PRED, NEARESTMV, NEARMV, ZEROMV, NEARESTMV, ZEROMV, NEARESTMV, NEARMV, NEARMV, V_PRED, H_PRED, TM_PRED, NEWMV, NEWMV, NEWMV, SPLITMV, SPLITMV, SPLITMV, B_PRED, }; /* This table determines the search order in reference frame priority order, * which may not necessarily match INTRA,LAST,GOLDEN,ARF */ const int vp8_ref_frame_order[MAX_MODES] = { 1, 0, 1, 1, 2, 2, 3, 3, 2, 3, 0, 0, 0, 1, 2, 3, 1, 2, 3, 0, }; static void fill_token_costs( int c[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS][MAX_ENTROPY_TOKENS], const vp8_prob p[BLOCK_TYPES][COEF_BANDS][PREV_COEF_CONTEXTS] [ENTROPY_NODES]) { int i, j, k; for (i = 0; i < BLOCK_TYPES; ++i) { for (j = 0; j < COEF_BANDS; ++j) { for (k = 0; k < PREV_COEF_CONTEXTS; ++k) { /* check for pt=0 and band > 1 if block type 0 * and 0 if blocktype 1 */ if (k == 0 && j > (i == 0)) { vp8_cost_tokens2(c[i][j][k], p[i][j][k], vp8_coef_tree, 2); } else { vp8_cost_tokens(c[i][j][k], p[i][j][k], vp8_coef_tree); } } } } } static const int rd_iifactor[32] = { 4, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; /* values are now correlated to quantizer */ static const int sad_per_bit16lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 14, 14 }; static const int sad_per_bit4lut[QINDEX_RANGE] = { 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, 12, 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, 15, 15, 16, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, }; void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex) { cpi->mb.sadperbit16 = sad_per_bit16lut[QIndex]; cpi->mb.sadperbit4 = sad_per_bit4lut[QIndex]; } void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue) { int q; int i; double capped_q = (Qvalue < 160) ? (double)Qvalue : 160.0; double rdconst = 2.80; vpx_clear_system_state(); /* Further tests required to see if optimum is different * for key frames, golden frames and arf frames. */ cpi->RDMULT = (int)(rdconst * (capped_q * capped_q)); /* Extend rate multiplier along side quantizer zbin increases */ if (cpi->mb.zbin_over_quant > 0) { double oq_factor; double modq; /* Experimental code using the same basic equation as used for Q above * The units of cpi->mb.zbin_over_quant are 1/128 of Q bin size */ oq_factor = 1.0 + ((double)0.0015625 * cpi->mb.zbin_over_quant); modq = (int)((double)capped_q * oq_factor); cpi->RDMULT = (int)(rdconst * (modq * modq)); } if (cpi->pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { if (cpi->twopass.next_iiratio > 31) { cpi->RDMULT += (cpi->RDMULT * rd_iifactor[31]) >> 4; } else { cpi->RDMULT += (cpi->RDMULT * rd_iifactor[cpi->twopass.next_iiratio]) >> 4; } } cpi->mb.errorperbit = (cpi->RDMULT / 110); cpi->mb.errorperbit += (cpi->mb.errorperbit == 0); vp8_set_speed_features(cpi); for (i = 0; i < MAX_MODES; ++i) { x->mode_test_hit_counts[i] = 0; } q = (int)pow(Qvalue, 1.25); if (q < 8) q = 8; if (cpi->RDMULT > 1000) { cpi->RDDIV = 1; cpi->RDMULT /= 100; for (i = 0; i < MAX_MODES; ++i) { if (cpi->sf.thresh_mult[i] < INT_MAX) { x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q / 100; } else { x->rd_threshes[i] = INT_MAX; } cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; } } else { cpi->RDDIV = 100; for (i = 0; i < MAX_MODES; ++i) { if (cpi->sf.thresh_mult[i] < (INT_MAX / q)) { x->rd_threshes[i] = cpi->sf.thresh_mult[i] * q; } else { x->rd_threshes[i] = INT_MAX; } cpi->rd_baseline_thresh[i] = x->rd_threshes[i]; } } { /* build token cost array for the type of frame we have now */ FRAME_CONTEXT *l = &cpi->lfc_n; if (cpi->common.refresh_alt_ref_frame) { l = &cpi->lfc_a; } else if (cpi->common.refresh_golden_frame) { l = &cpi->lfc_g; } fill_token_costs(cpi->mb.token_costs, (const vp8_prob(*)[8][3][11])l->coef_probs); /* fill_token_costs( cpi->mb.token_costs, (const vp8_prob( *)[8][3][11]) cpi->common.fc.coef_probs); */ /* TODO make these mode costs depend on last,alt or gold too. (jbb) */ vp8_init_mode_costs(cpi); } } void vp8_auto_select_speed(VP8_COMP *cpi) { int milliseconds_for_compress = (int)(1000000 / cpi->framerate); milliseconds_for_compress = milliseconds_for_compress * (16 - cpi->oxcf.cpu_used) / 16; #if 0 if (0) { FILE *f; f = fopen("speed.stt", "a"); fprintf(f, " %8ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->Speed, milliseconds_for_compress, cpi->avg_pick_mode_time); fclose(f); } #endif if (cpi->avg_pick_mode_time < milliseconds_for_compress && (cpi->avg_encode_time - cpi->avg_pick_mode_time) < milliseconds_for_compress) { if (cpi->avg_pick_mode_time == 0) { cpi->Speed = 4; } else { if (milliseconds_for_compress * 100 < cpi->avg_encode_time * 95) { cpi->Speed += 2; cpi->avg_pick_mode_time = 0; cpi->avg_encode_time = 0; if (cpi->Speed > 16) { cpi->Speed = 16; } } if (milliseconds_for_compress * 100 > cpi->avg_encode_time * auto_speed_thresh[cpi->Speed]) { cpi->Speed -= 1; cpi->avg_pick_mode_time = 0; cpi->avg_encode_time = 0; /* In real-time mode, cpi->speed is in [4, 16]. */ if (cpi->Speed < 4) { cpi->Speed = 4; } } } } else { cpi->Speed += 4; if (cpi->Speed > 16) cpi->Speed = 16; cpi->avg_pick_mode_time = 0; cpi->avg_encode_time = 0; } } int vp8_block_error_c(short *coeff, short *dqcoeff) { int i; int error = 0; for (i = 0; i < 16; ++i) { int this_diff = coeff[i] - dqcoeff[i]; error += this_diff * this_diff; } return error; } int vp8_mbblock_error_c(MACROBLOCK *mb, int dc) { BLOCK *be; BLOCKD *bd; int i, j; int berror, error = 0; for (i = 0; i < 16; ++i) { be = &mb->block[i]; bd = &mb->e_mbd.block[i]; berror = 0; for (j = dc; j < 16; ++j) { int this_diff = be->coeff[j] - bd->dqcoeff[j]; berror += this_diff * this_diff; } error += berror; } return error; } int vp8_mbuverror_c(MACROBLOCK *mb) { BLOCK *be; BLOCKD *bd; int i; int error = 0; for (i = 16; i < 24; ++i) { be = &mb->block[i]; bd = &mb->e_mbd.block[i]; error += vp8_block_error_c(be->coeff, bd->dqcoeff); } return error; } int VP8_UVSSE(MACROBLOCK *x) { unsigned char *uptr, *vptr; unsigned char *upred_ptr = (*(x->block[16].base_src) + x->block[16].src); unsigned char *vpred_ptr = (*(x->block[20].base_src) + x->block[20].src); int uv_stride = x->block[16].src_stride; unsigned int sse1 = 0; unsigned int sse2 = 0; int mv_row = x->e_mbd.mode_info_context->mbmi.mv.as_mv.row; int mv_col = x->e_mbd.mode_info_context->mbmi.mv.as_mv.col; int offset; int pre_stride = x->e_mbd.pre.uv_stride; if (mv_row < 0) { mv_row -= 1; } else { mv_row += 1; } if (mv_col < 0) { mv_col -= 1; } else { mv_col += 1; } mv_row /= 2; mv_col /= 2; offset = (mv_row >> 3) * pre_stride + (mv_col >> 3); uptr = x->e_mbd.pre.u_buffer + offset; vptr = x->e_mbd.pre.v_buffer + offset; if ((mv_row | mv_col) & 7) { vpx_sub_pixel_variance8x8(uptr, pre_stride, mv_col & 7, mv_row & 7, upred_ptr, uv_stride, &sse2); vpx_sub_pixel_variance8x8(vptr, pre_stride, mv_col & 7, mv_row & 7, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } else { vpx_variance8x8(uptr, pre_stride, upred_ptr, uv_stride, &sse2); vpx_variance8x8(vptr, pre_stride, vpred_ptr, uv_stride, &sse1); sse2 += sse1; } return sse2; } static int cost_coeffs(MACROBLOCK *mb, BLOCKD *b, int type, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l) { int c = !type; /* start at coef 0, unless Y with Y2 */ int eob = (int)(*b->eob); int pt; /* surrounding block/prev coef predictor */ int cost = 0; short *qcoeff_ptr = b->qcoeff; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); assert(eob <= 16); for (; c < eob; ++c) { const int v = qcoeff_ptr[vp8_default_zig_zag1d[c]]; const int t = vp8_dct_value_tokens_ptr[v].Token; cost += mb->token_costs[type][vp8_coef_bands[c]][pt][t]; cost += vp8_dct_value_cost_ptr[v]; pt = vp8_prev_token_class[t]; } if (c < 16) { cost += mb->token_costs[type][vp8_coef_bands[c]][pt][DCT_EOB_TOKEN]; } pt = (c != !type); /* is eob first coefficient; */ *a = *l = pt; return cost; } static int vp8_rdcost_mby(MACROBLOCK *mb) { int cost = 0; int b; MACROBLOCKD *x = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; for (b = 0; b < 16; ++b) { cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_NO_DC, ta + vp8_block2above[b], tl + vp8_block2left[b]); } cost += cost_coeffs(mb, x->block + 24, PLANE_TYPE_Y2, ta + vp8_block2above[24], tl + vp8_block2left[24]); return cost; } static void macro_block_yrd(MACROBLOCK *mb, int *Rate, int *Distortion) { int b; MACROBLOCKD *const x = &mb->e_mbd; BLOCK *const mb_y2 = mb->block + 24; BLOCKD *const x_y2 = x->block + 24; short *Y2DCPtr = mb_y2->src_diff; BLOCK *beptr; int d; vp8_subtract_mby(mb->src_diff, *(mb->block[0].base_src), mb->block[0].src_stride, mb->e_mbd.predictor, 16); /* Fdct and building the 2nd order block */ for (beptr = mb->block; beptr < mb->block + 16; beptr += 2) { mb->short_fdct8x4(beptr->src_diff, beptr->coeff, 32); *Y2DCPtr++ = beptr->coeff[0]; *Y2DCPtr++ = beptr->coeff[16]; } /* 2nd order fdct */ mb->short_walsh4x4(mb_y2->src_diff, mb_y2->coeff, 8); /* Quantization */ for (b = 0; b < 16; ++b) { mb->quantize_b(&mb->block[b], &mb->e_mbd.block[b]); } /* DC predication and Quantization of 2nd Order block */ mb->quantize_b(mb_y2, x_y2); /* Distortion */ d = vp8_mbblock_error(mb, 1) << 2; d += vp8_block_error(mb_y2->coeff, x_y2->dqcoeff); *Distortion = (d >> 4); /* rate */ *Rate = vp8_rdcost_mby(mb); } static void copy_predictor(unsigned char *dst, const unsigned char *predictor) { const unsigned int *p = (const unsigned int *)predictor; unsigned int *d = (unsigned int *)dst; d[0] = p[0]; d[4] = p[4]; d[8] = p[8]; d[12] = p[12]; } static int rd_pick_intra4x4block(MACROBLOCK *x, BLOCK *be, BLOCKD *b, B_PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int *bestdistortion) { B_PREDICTION_MODE mode; int best_rd = INT_MAX; int rate = 0; int distortion; ENTROPY_CONTEXT ta = *a, tempa = *a; ENTROPY_CONTEXT tl = *l, templ = *l; /* * The predictor buffer is a 2d buffer with a stride of 16. Create * a temp buffer that meets the stride requirements, but we are only * interested in the left 4x4 block * */ DECLARE_ALIGNED(16, unsigned char, best_predictor[16 * 4]); DECLARE_ALIGNED(16, short, best_dqcoeff[16]); int dst_stride = x->e_mbd.dst.y_stride; unsigned char *dst = x->e_mbd.dst.y_buffer + b->offset; unsigned char *Above = dst - dst_stride; unsigned char *yleft = dst - 1; unsigned char top_left = Above[-1]; for (mode = B_DC_PRED; mode <= B_HU_PRED; ++mode) { int this_rd; int ratey; rate = bmode_costs[mode]; vp8_intra4x4_predict(Above, yleft, dst_stride, mode, b->predictor, 16, top_left); vp8_subtract_b(be, b, 16); x->short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b(be, b); tempa = ta; templ = tl; ratey = cost_coeffs(x, b, PLANE_TYPE_Y_WITH_DC, &tempa, &templ); rate += ratey; distortion = vp8_block_error(be->coeff, b->dqcoeff) >> 2; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; *bestratey = ratey; *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; *a = tempa; *l = templ; copy_predictor(best_predictor, b->predictor); memcpy(best_dqcoeff, b->dqcoeff, 32); } } b->bmi.as_mode = *best_mode; vp8_short_idct4x4llm(best_dqcoeff, best_predictor, 16, dst, dst_stride); return best_rd; } static int rd_pick_intra4x4mby_modes(MACROBLOCK *mb, int *Rate, int *rate_y, int *Distortion, int best_rd) { MACROBLOCKD *const xd = &mb->e_mbd; int i; int cost = mb->mbmode_cost[xd->frame_type][B_PRED]; int distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; const int *bmode_costs; memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; intra_prediction_down_copy(xd, xd->dst.y_buffer - xd->dst.y_stride + 16); bmode_costs = mb->inter_bmode_costs; for (i = 0; i < 16; ++i) { MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; B_PREDICTION_MODE best_mode = B_MODE_COUNT; int r = 0, ry = 0, d = 0; if (mb->e_mbd.frame_type == KEY_FRAME) { const B_PREDICTION_MODE A = above_block_mode(mic, i, mis); const B_PREDICTION_MODE L = left_block_mode(mic, i); bmode_costs = mb->bmode_costs[A][L]; } total_rd += rd_pick_intra4x4block( mb, mb->block + i, xd->block + i, &best_mode, bmode_costs, ta + vp8_block2above[i], tl + vp8_block2left[i], &r, &ry, &d); cost += r; distortion += d; tot_rate_y += ry; assert(best_mode != B_MODE_COUNT); mic->bmi[i].as_mode = best_mode; if (total_rd >= (int64_t)best_rd) break; } if (total_rd >= (int64_t)best_rd) return INT_MAX; *Rate = cost; *rate_y = tot_rate_y; *Distortion = distortion; return RDCOST(mb->rdmult, mb->rddiv, cost, distortion); } static int rd_pick_intra16x16mby_mode(MACROBLOCK *x, int *Rate, int *rate_y, int *Distortion) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT; int rate, ratey; int distortion; int best_rd = INT_MAX; int this_rd; MACROBLOCKD *xd = &x->e_mbd; /* Y Search for 16x16 intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; ++mode) { xd->mode_info_context->mbmi.mode = mode; vp8_build_intra_predictors_mby_s(xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, xd->dst.y_stride, xd->predictor, 16); macro_block_yrd(x, &ratey, &distortion); rate = ratey + x->mbmode_cost[xd->frame_type][xd->mode_info_context->mbmi.mode]; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; *Rate = rate; *rate_y = ratey; *Distortion = distortion; } } assert(mode_selected != MB_MODE_COUNT); xd->mode_info_context->mbmi.mode = mode_selected; return best_rd; } static int rd_cost_mbuv(MACROBLOCK *mb) { int b; int cost = 0; MACROBLOCKD *x = &mb->e_mbd; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; memcpy(&t_above, mb->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, mb->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); ta = (ENTROPY_CONTEXT *)&t_above; tl = (ENTROPY_CONTEXT *)&t_left; for (b = 16; b < 24; ++b) { cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_UV, ta + vp8_block2above[b], tl + vp8_block2left[b]); } return cost; } static int rd_inter16x16_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel) { (void)cpi; (void)fullpixel; vp8_build_inter16x16_predictors_mbuv(&x->e_mbd); vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8); vp8_transform_mbuv(x); vp8_quantize_mbuv(x); *rate = rd_cost_mbuv(x); *distortion = vp8_mbuverror(x) / 4; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } static int rd_inter4x4_uv(VP8_COMP *cpi, MACROBLOCK *x, int *rate, int *distortion, int fullpixel) { (void)cpi; (void)fullpixel; vp8_build_inter4x4_predictors_mbuv(&x->e_mbd); vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, &x->e_mbd.predictor[256], &x->e_mbd.predictor[320], 8); vp8_transform_mbuv(x); vp8_quantize_mbuv(x); *rate = rd_cost_mbuv(x); *distortion = vp8_mbuverror(x) / 4; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } static void rd_pick_intra_mbuv_mode(MACROBLOCK *x, int *rate, int *rate_tokenonly, int *distortion) { MB_PREDICTION_MODE mode; MB_PREDICTION_MODE mode_selected = MB_MODE_COUNT; int best_rd = INT_MAX; int d = 0, r = 0; int rate_to; MACROBLOCKD *xd = &x->e_mbd; for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int this_rate; int this_distortion; int this_rd; xd->mode_info_context->mbmi.uv_mode = mode; vp8_build_intra_predictors_mbuv_s( xd, xd->dst.u_buffer - xd->dst.uv_stride, xd->dst.v_buffer - xd->dst.uv_stride, xd->dst.u_buffer - 1, xd->dst.v_buffer - 1, xd->dst.uv_stride, &xd->predictor[256], &xd->predictor[320], 8); vp8_subtract_mbuv(x->src_diff, x->src.u_buffer, x->src.v_buffer, x->src.uv_stride, &xd->predictor[256], &xd->predictor[320], 8); vp8_transform_mbuv(x); vp8_quantize_mbuv(x); rate_to = rd_cost_mbuv(x); this_rate = rate_to + x->intra_uv_mode_cost[xd->frame_type] [xd->mode_info_context->mbmi.uv_mode]; this_distortion = vp8_mbuverror(x) / 4; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { best_rd = this_rd; d = this_distortion; r = this_rate; *rate_tokenonly = rate_to; mode_selected = mode; } } *rate = r; *distortion = d; assert(mode_selected != MB_MODE_COUNT); xd->mode_info_context->mbmi.uv_mode = mode_selected; } int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]) { vp8_prob p[VP8_MVREFS - 1]; assert(NEARESTMV <= m && m <= SPLITMV); vp8_mv_ref_probs(p, near_mv_ref_ct); return vp8_cost_token(vp8_mv_ref_tree, p, vp8_mv_ref_encoding_array + (m - NEARESTMV)); } void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv) { x->e_mbd.mode_info_context->mbmi.mode = mb; x->e_mbd.mode_info_context->mbmi.mv.as_int = mv->as_int; } static int labels2mode(MACROBLOCK *x, int const *labelings, int which_label, B_PREDICTION_MODE this_mode, int_mv *this_mv, int_mv *best_ref_mv, int *mvcost[2]) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mode_info_context; const int mis = xd->mode_info_stride; int cost = 0; int thismvcost = 0; /* We have to be careful retrieving previously-encoded motion vectors. Ones from this macroblock have to be pulled from the BLOCKD array as they have not yet made it to the bmi array in our MB_MODE_INFO. */ int i = 0; do { BLOCKD *const d = xd->block + i; const int row = i >> 2, col = i & 3; B_PREDICTION_MODE m; if (labelings[i] != which_label) continue; if (col && labelings[i] == labelings[i - 1]) { m = LEFT4X4; } else if (row && labelings[i] == labelings[i - 4]) { m = ABOVE4X4; } else { /* the only time we should do costing for new motion vector * or mode is when we are on a new label (jbb May 08, 2007) */ switch (m = this_mode) { case NEW4X4: thismvcost = vp8_mv_bit_cost(this_mv, best_ref_mv, mvcost, 102); break; case LEFT4X4: this_mv->as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i); break; case ABOVE4X4: this_mv->as_int = row ? d[-4].bmi.mv.as_int : above_block_mv(mic, i, mis); break; case ZERO4X4: this_mv->as_int = 0; break; default: break; } if (m == ABOVE4X4) { /* replace above with left if same */ int_mv left_mv; left_mv.as_int = col ? d[-1].bmi.mv.as_int : left_block_mv(mic, i); if (left_mv.as_int == this_mv->as_int) m = LEFT4X4; } cost = x->inter_bmode_costs[m]; } d->bmi.mv.as_int = this_mv->as_int; x->partition_info->bmi[i].mode = m; x->partition_info->bmi[i].mv.as_int = this_mv->as_int; } while (++i < 16); cost += thismvcost; return cost; } static int rdcost_mbsegment_y(MACROBLOCK *mb, const int *labels, int which_label, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl) { int cost = 0; int b; MACROBLOCKD *x = &mb->e_mbd; for (b = 0; b < 16; ++b) { if (labels[b] == which_label) { cost += cost_coeffs(mb, x->block + b, PLANE_TYPE_Y_WITH_DC, ta + vp8_block2above[b], tl + vp8_block2left[b]); } } return cost; } static unsigned int vp8_encode_inter_mb_segment(MACROBLOCK *x, int const *labels, int which_label) { int i; unsigned int distortion = 0; int pre_stride = x->e_mbd.pre.y_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; for (i = 0; i < 16; ++i) { if (labels[i] == which_label) { BLOCKD *bd = &x->e_mbd.block[i]; BLOCK *be = &x->block[i]; vp8_build_inter_predictors_b(bd, 16, base_pre, pre_stride, x->e_mbd.subpixel_predict); vp8_subtract_b(be, bd, 16); x->short_fdct4x4(be->src_diff, be->coeff, 32); x->quantize_b(be, bd); distortion += vp8_block_error(be->coeff, bd->dqcoeff); } } return distortion; } static const unsigned int segmentation_to_sseshift[4] = { 3, 3, 2, 0 }; typedef struct { int_mv *ref_mv; int_mv mvp; int segment_rd; int segment_num; int r; int d; int segment_yrate; B_PREDICTION_MODE modes[16]; int_mv mvs[16]; unsigned char eobs[16]; int mvthresh; int *mdcounts; int_mv sv_mvp[4]; /* save 4 mvp from 8x8 */ int sv_istep[2]; /* save 2 initial step_param for 16x8/8x16 */ } BEST_SEG_INFO; static void rd_check_segment(VP8_COMP *cpi, MACROBLOCK *x, BEST_SEG_INFO *bsi, unsigned int segmentation) { int i; int const *labels; int br = 0; int bd = 0; B_PREDICTION_MODE this_mode; int label_count; int this_segment_rd = 0; int label_mv_thresh; int rate = 0; int sbr = 0; int sbd = 0; int segmentyrate = 0; vp8_variance_fn_ptr_t *v_fn_ptr; ENTROPY_CONTEXT_PLANES t_above, t_left; ENTROPY_CONTEXT_PLANES t_above_b, t_left_b; memcpy(&t_above, x->e_mbd.above_context, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, x->e_mbd.left_context, sizeof(ENTROPY_CONTEXT_PLANES)); vp8_zero(t_above_b); vp8_zero(t_left_b); br = 0; bd = 0; v_fn_ptr = &cpi->fn_ptr[segmentation]; labels = vp8_mbsplits[segmentation]; label_count = vp8_mbsplit_count[segmentation]; /* 64 makes this threshold really big effectively making it so that we * very rarely check mvs on segments. setting this to 1 would make mv * thresh roughly equal to what it is for macroblocks */ label_mv_thresh = 1 * bsi->mvthresh / label_count; /* Segmentation method overheads */ rate = vp8_cost_token(vp8_mbsplit_tree, vp8_mbsplit_probs, vp8_mbsplit_encodings + segmentation); rate += vp8_cost_mv_ref(SPLITMV, bsi->mdcounts); this_segment_rd += RDCOST(x->rdmult, x->rddiv, rate, 0); br += rate; for (i = 0; i < label_count; ++i) { int_mv mode_mv[B_MODE_COUNT] = { { 0 }, { 0 } }; int best_label_rd = INT_MAX; B_PREDICTION_MODE mode_selected = ZERO4X4; int bestlabelyrate = 0; /* search for the best motion vector on this segment */ for (this_mode = LEFT4X4; this_mode <= NEW4X4; ++this_mode) { int this_rd; int distortion; int labelyrate; ENTROPY_CONTEXT_PLANES t_above_s, t_left_s; ENTROPY_CONTEXT *ta_s; ENTROPY_CONTEXT *tl_s; memcpy(&t_above_s, &t_above, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left_s, &t_left, sizeof(ENTROPY_CONTEXT_PLANES)); ta_s = (ENTROPY_CONTEXT *)&t_above_s; tl_s = (ENTROPY_CONTEXT *)&t_left_s; if (this_mode == NEW4X4) { int sseshift; int num00; int step_param = 0; int further_steps; int n; int thissme; int bestsme = INT_MAX; int_mv temp_mv; BLOCK *c; BLOCKD *e; /* Is the best so far sufficiently good that we cant justify * doing a new motion search. */ if (best_label_rd < label_mv_thresh) break; if (cpi->compressor_speed) { if (segmentation == BLOCK_8X16 || segmentation == BLOCK_16X8) { bsi->mvp.as_int = bsi->sv_mvp[i].as_int; if (i == 1 && segmentation == BLOCK_16X8) { bsi->mvp.as_int = bsi->sv_mvp[2].as_int; } step_param = bsi->sv_istep[i]; } /* use previous block's result as next block's MV * predictor. */ if (segmentation == BLOCK_4X4 && i > 0) { bsi->mvp.as_int = x->e_mbd.block[i - 1].bmi.mv.as_int; if (i == 4 || i == 8 || i == 12) { bsi->mvp.as_int = x->e_mbd.block[i - 4].bmi.mv.as_int; } step_param = 2; } } further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; { int sadpb = x->sadperbit4; int_mv mvp_full; mvp_full.as_mv.row = bsi->mvp.as_mv.row >> 3; mvp_full.as_mv.col = bsi->mvp.as_mv.col >> 3; /* find first label */ n = vp8_mbsplit_offset[segmentation][i]; c = &x->block[n]; e = &x->e_mbd.block[n]; { bestsme = cpi->diamond_search_sad( x, c, e, &mvp_full, &mode_mv[NEW4X4], step_param, sadpb, &num00, v_fn_ptr, x->mvcost, bsi->ref_mv); n = num00; num00 = 0; while (n < further_steps) { n++; if (num00) { num00--; } else { thissme = cpi->diamond_search_sad( x, c, e, &mvp_full, &temp_mv, step_param + n, sadpb, &num00, v_fn_ptr, x->mvcost, bsi->ref_mv); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEW4X4].as_int = temp_mv.as_int; } } } } sseshift = segmentation_to_sseshift[segmentation]; /* Should we do a full search (best quality only) */ if ((cpi->compressor_speed == 0) && (bestsme >> sseshift) > 4000) { /* Check if mvp_full is within the range. */ vp8_clamp_mv(&mvp_full, x->mv_col_min, x->mv_col_max, x->mv_row_min, x->mv_row_max); thissme = cpi->full_search_sad(x, c, e, &mvp_full, sadpb, 16, v_fn_ptr, x->mvcost, bsi->ref_mv); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEW4X4].as_int = e->bmi.mv.as_int; } else { /* The full search result is actually worse so * re-instate the previous best vector */ e->bmi.mv.as_int = mode_mv[NEW4X4].as_int; } } } if (bestsme < INT_MAX) { int disto; unsigned int sse; cpi->find_fractional_mv_step(x, c, e, &mode_mv[NEW4X4], bsi->ref_mv, x->errorperbit, v_fn_ptr, x->mvcost, &disto, &sse); } } /* NEW4X4 */ rate = labels2mode(x, labels, i, this_mode, &mode_mv[this_mode], bsi->ref_mv, x->mvcost); /* Trap vectors that reach beyond the UMV borders */ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { continue; } distortion = vp8_encode_inter_mb_segment(x, labels, i) / 4; labelyrate = rdcost_mbsegment_y(x, labels, i, ta_s, tl_s); rate += labelyrate; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_label_rd) { sbr = rate; sbd = distortion; bestlabelyrate = labelyrate; mode_selected = this_mode; best_label_rd = this_rd; memcpy(&t_above_b, &t_above_s, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left_b, &t_left_s, sizeof(ENTROPY_CONTEXT_PLANES)); } } /*for each 4x4 mode*/ memcpy(&t_above, &t_above_b, sizeof(ENTROPY_CONTEXT_PLANES)); memcpy(&t_left, &t_left_b, sizeof(ENTROPY_CONTEXT_PLANES)); labels2mode(x, labels, i, mode_selected, &mode_mv[mode_selected], bsi->ref_mv, x->mvcost); br += sbr; bd += sbd; segmentyrate += bestlabelyrate; this_segment_rd += best_label_rd; if (this_segment_rd >= bsi->segment_rd) break; } /* for each label */ if (this_segment_rd < bsi->segment_rd) { bsi->r = br; bsi->d = bd; bsi->segment_yrate = segmentyrate; bsi->segment_rd = this_segment_rd; bsi->segment_num = segmentation; /* store everything needed to come back to this!! */ for (i = 0; i < 16; ++i) { bsi->mvs[i].as_mv = x->partition_info->bmi[i].mv.as_mv; bsi->modes[i] = x->partition_info->bmi[i].mode; bsi->eobs[i] = x->e_mbd.eobs[i]; } } } static void vp8_cal_step_param(int sr, int *sp) { int step = 0; if (sr > MAX_FIRST_STEP) { sr = MAX_FIRST_STEP; } else if (sr < 1) { sr = 1; } while (sr >>= 1) step++; *sp = MAX_MVSEARCH_STEPS - 1 - step; } static int vp8_rd_pick_best_mbsegmentation(VP8_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int best_rd, int *mdcounts, int *returntotrate, int *returnyrate, int *returndistortion, int mvthresh) { int i; BEST_SEG_INFO bsi; memset(&bsi, 0, sizeof(bsi)); bsi.segment_rd = best_rd; bsi.ref_mv = best_ref_mv; bsi.mvp.as_int = best_ref_mv->as_int; bsi.mvthresh = mvthresh; bsi.mdcounts = mdcounts; for (i = 0; i < 16; ++i) { bsi.modes[i] = ZERO4X4; } if (cpi->compressor_speed == 0) { /* for now, we will keep the original segmentation order when in best quality mode */ rd_check_segment(cpi, x, &bsi, BLOCK_16X8); rd_check_segment(cpi, x, &bsi, BLOCK_8X16); rd_check_segment(cpi, x, &bsi, BLOCK_8X8); rd_check_segment(cpi, x, &bsi, BLOCK_4X4); } else { int sr; rd_check_segment(cpi, x, &bsi, BLOCK_8X8); if (bsi.segment_rd < best_rd) { int col_min = ((best_ref_mv->as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL; int row_min = ((best_ref_mv->as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL; int col_max = (best_ref_mv->as_mv.col >> 3) + MAX_FULL_PEL_VAL; int row_max = (best_ref_mv->as_mv.row >> 3) + MAX_FULL_PEL_VAL; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; /* Get intersection of UMV window and valid MV window to reduce # of * checks in diamond search. */ if (x->mv_col_min < col_min) x->mv_col_min = col_min; if (x->mv_col_max > col_max) x->mv_col_max = col_max; if (x->mv_row_min < row_min) x->mv_row_min = row_min; if (x->mv_row_max > row_max) x->mv_row_max = row_max; /* Get 8x8 result */ bsi.sv_mvp[0].as_int = bsi.mvs[0].as_int; bsi.sv_mvp[1].as_int = bsi.mvs[2].as_int; bsi.sv_mvp[2].as_int = bsi.mvs[8].as_int; bsi.sv_mvp[3].as_int = bsi.mvs[10].as_int; /* Use 8x8 result as 16x8/8x16's predictor MV. Adjust search range * according to the closeness of 2 MV. */ /* block 8X16 */ { sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[2].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[2].as_mv.col)) >> 3); vp8_cal_step_param(sr, &bsi.sv_istep[0]); sr = MAXF((abs(bsi.sv_mvp[1].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[1].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); vp8_cal_step_param(sr, &bsi.sv_istep[1]); rd_check_segment(cpi, x, &bsi, BLOCK_8X16); } /* block 16X8 */ { sr = MAXF((abs(bsi.sv_mvp[0].as_mv.row - bsi.sv_mvp[1].as_mv.row)) >> 3, (abs(bsi.sv_mvp[0].as_mv.col - bsi.sv_mvp[1].as_mv.col)) >> 3); vp8_cal_step_param(sr, &bsi.sv_istep[0]); sr = MAXF((abs(bsi.sv_mvp[2].as_mv.row - bsi.sv_mvp[3].as_mv.row)) >> 3, (abs(bsi.sv_mvp[2].as_mv.col - bsi.sv_mvp[3].as_mv.col)) >> 3); vp8_cal_step_param(sr, &bsi.sv_istep[1]); rd_check_segment(cpi, x, &bsi, BLOCK_16X8); } /* If 8x8 is better than 16x8/8x16, then do 4x4 search */ /* Not skip 4x4 if speed=0 (good quality) */ if (cpi->sf.no_skip_block4x4_search || bsi.segment_num == BLOCK_8X8) /* || (sv_segment_rd8x8-bsi.segment_rd) < sv_segment_rd8x8>>5) */ { bsi.mvp.as_int = bsi.sv_mvp[0].as_int; rd_check_segment(cpi, x, &bsi, BLOCK_4X4); } /* restore UMV window */ x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; } } /* set it to the best */ for (i = 0; i < 16; ++i) { BLOCKD *bd = &x->e_mbd.block[i]; bd->bmi.mv.as_int = bsi.mvs[i].as_int; *bd->eob = bsi.eobs[i]; } *returntotrate = bsi.r; *returndistortion = bsi.d; *returnyrate = bsi.segment_yrate; /* save partitions */ x->e_mbd.mode_info_context->mbmi.partitioning = bsi.segment_num; x->partition_info->count = vp8_mbsplit_count[bsi.segment_num]; for (i = 0; i < x->partition_info->count; ++i) { int j; j = vp8_mbsplit_offset[bsi.segment_num][i]; x->partition_info->bmi[i].mode = bsi.modes[j]; x->partition_info->bmi[i].mv.as_mv = bsi.mvs[j].as_mv; } /* * used to set x->e_mbd.mode_info_context->mbmi.mv.as_int */ x->partition_info->bmi[15].mv.as_int = bsi.mvs[15].as_int; return bsi.segment_rd; } /* The improved MV prediction */ void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr, int near_sadidx[]) { const MODE_INFO *above = here - xd->mode_info_stride; const MODE_INFO *left = here - 1; const MODE_INFO *aboveleft = above - 1; int_mv near_mvs[8]; int near_ref[8]; int_mv mv; int vcnt = 0; int find = 0; int mb_offset; int mvx[8]; int mvy[8]; int i; mv.as_int = 0; if (here->mbmi.ref_frame != INTRA_FRAME) { near_mvs[0].as_int = near_mvs[1].as_int = near_mvs[2].as_int = near_mvs[3].as_int = near_mvs[4].as_int = near_mvs[5].as_int = near_mvs[6].as_int = near_mvs[7].as_int = 0; near_ref[0] = near_ref[1] = near_ref[2] = near_ref[3] = near_ref[4] = near_ref[5] = near_ref[6] = near_ref[7] = 0; /* read in 3 nearby block's MVs from current frame as prediction * candidates. */ if (above->mbmi.ref_frame != INTRA_FRAME) { near_mvs[vcnt].as_int = above->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[above->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = above->mbmi.ref_frame; } vcnt++; if (left->mbmi.ref_frame != INTRA_FRAME) { near_mvs[vcnt].as_int = left->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[left->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = left->mbmi.ref_frame; } vcnt++; if (aboveleft->mbmi.ref_frame != INTRA_FRAME) { near_mvs[vcnt].as_int = aboveleft->mbmi.mv.as_int; mv_bias(ref_frame_sign_bias[aboveleft->mbmi.ref_frame], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = aboveleft->mbmi.ref_frame; } vcnt++; /* read in 5 nearby block's MVs from last frame. */ if (cpi->common.last_frame_type != KEY_FRAME) { mb_offset = (-xd->mb_to_top_edge / 128 + 1) * (xd->mode_info_stride + 1) + (-xd->mb_to_left_edge / 128 + 1); /* current in last frame */ if (cpi->lf_ref_frame[mb_offset] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset].as_int; mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = cpi->lf_ref_frame[mb_offset]; } vcnt++; /* above in last frame */ if (cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - xd->mode_info_stride - 1].as_int; mv_bias( cpi->lf_ref_frame_sign_bias[mb_offset - xd->mode_info_stride - 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - xd->mode_info_stride - 1]; } vcnt++; /* left in last frame */ if (cpi->lf_ref_frame[mb_offset - 1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset - 1].as_int; mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset - 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = cpi->lf_ref_frame[mb_offset - 1]; } vcnt++; /* right in last frame */ if (cpi->lf_ref_frame[mb_offset + 1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + 1].as_int; mv_bias(cpi->lf_ref_frame_sign_bias[mb_offset + 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + 1]; } vcnt++; /* below in last frame */ if (cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1] != INTRA_FRAME) { near_mvs[vcnt].as_int = cpi->lfmv[mb_offset + xd->mode_info_stride + 1].as_int; mv_bias( cpi->lf_ref_frame_sign_bias[mb_offset + xd->mode_info_stride + 1], refframe, &near_mvs[vcnt], ref_frame_sign_bias); near_ref[vcnt] = cpi->lf_ref_frame[mb_offset + xd->mode_info_stride + 1]; } vcnt++; } for (i = 0; i < vcnt; ++i) { if (near_ref[near_sadidx[i]] != INTRA_FRAME) { if (here->mbmi.ref_frame == near_ref[near_sadidx[i]]) { mv.as_int = near_mvs[near_sadidx[i]].as_int; find = 1; if (i < 3) { *sr = 3; } else { *sr = 2; } break; } } } if (!find) { for (i = 0; i < vcnt; ++i) { mvx[i] = near_mvs[i].as_mv.row; mvy[i] = near_mvs[i].as_mv.col; } insertsortmv(mvx, vcnt); insertsortmv(mvy, vcnt); mv.as_mv.row = mvx[vcnt / 2]; mv.as_mv.col = mvy[vcnt / 2]; /* sr is set to 0 to allow calling function to decide the search * range. */ *sr = 0; } } /* Set up return values */ mvp->as_int = mv.as_int; vp8_clamp_mv2(mvp, xd); } void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]) { /* near_sad indexes: * 0-cf above, 1-cf left, 2-cf aboveleft, * 3-lf current, 4-lf above, 5-lf left, 6-lf right, 7-lf below */ int near_sad[8] = { 0 }; BLOCK *b = &x->block[0]; unsigned char *src_y_ptr = *(b->base_src); /* calculate sad for current frame 3 nearby MBs. */ if (xd->mb_to_top_edge == 0 && xd->mb_to_left_edge == 0) { near_sad[0] = near_sad[1] = near_sad[2] = INT_MAX; } else if (xd->mb_to_top_edge == 0) { /* only has left MB for sad calculation. */ near_sad[0] = near_sad[2] = INT_MAX; near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride); } else if (xd->mb_to_left_edge == 0) { /* only has left MB for sad calculation. */ near_sad[1] = near_sad[2] = INT_MAX; near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16, xd->dst.y_stride); } else { near_sad[0] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16, xd->dst.y_stride); near_sad[1] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, xd->dst.y_buffer - 16, xd->dst.y_stride); near_sad[2] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, xd->dst.y_buffer - xd->dst.y_stride * 16 - 16, xd->dst.y_stride); } if (cpi->common.last_frame_type != KEY_FRAME) { /* calculate sad for last frame 5 nearby MBs. */ unsigned char *pre_y_buffer = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_buffer + recon_yoffset; int pre_y_stride = cpi->common.yv12_fb[cpi->common.lst_fb_idx].y_stride; if (xd->mb_to_top_edge == 0) near_sad[4] = INT_MAX; if (xd->mb_to_left_edge == 0) near_sad[5] = INT_MAX; if (xd->mb_to_right_edge == 0) near_sad[6] = INT_MAX; if (xd->mb_to_bottom_edge == 0) near_sad[7] = INT_MAX; if (near_sad[4] != INT_MAX) { near_sad[4] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, pre_y_buffer - pre_y_stride * 16, pre_y_stride); } if (near_sad[5] != INT_MAX) { near_sad[5] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, pre_y_buffer - 16, pre_y_stride); } near_sad[3] = cpi->fn_ptr[BLOCK_16X16].sdf(src_y_ptr, b->src_stride, pre_y_buffer, pre_y_stride); if (near_sad[6] != INT_MAX) { near_sad[6] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, pre_y_buffer + 16, pre_y_stride); } if (near_sad[7] != INT_MAX) { near_sad[7] = cpi->fn_ptr[BLOCK_16X16].sdf( src_y_ptr, b->src_stride, pre_y_buffer + pre_y_stride * 16, pre_y_stride); } } if (cpi->common.last_frame_type != KEY_FRAME) { insertsortsad(near_sad, near_sadidx, 8); } else { insertsortsad(near_sad, near_sadidx, 3); } } static void rd_update_mvcount(MACROBLOCK *x, int_mv *best_ref_mv) { if (x->e_mbd.mode_info_context->mbmi.mode == SPLITMV) { int i; for (i = 0; i < x->partition_info->count; ++i) { if (x->partition_info->bmi[i].mode == NEW4X4) { x->MVcount[0][mv_max + ((x->partition_info->bmi[i].mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; x->MVcount[1][mv_max + ((x->partition_info->bmi[i].mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } } else if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV) { x->MVcount[0][mv_max + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.row - best_ref_mv->as_mv.row) >> 1)]++; x->MVcount[1][mv_max + ((x->e_mbd.mode_info_context->mbmi.mv.as_mv.col - best_ref_mv->as_mv.col) >> 1)]++; } } static int evaluate_inter_mode_rd(int mdcounts[4], RATE_DISTORTION *rd, int *disable_skip, VP8_COMP *cpi, MACROBLOCK *x) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; BLOCK *b = &x->block[0]; MACROBLOCKD *xd = &x->e_mbd; int distortion; vp8_build_inter16x16_predictors_mby(&x->e_mbd, x->e_mbd.predictor, 16); if (cpi->active_map_enabled && x->active_ptr[0] == 0) { x->skip = 1; } else if (x->encode_breakout) { unsigned int sse; unsigned int var; unsigned int threshold = (xd->block[0].dequant[1] * xd->block[0].dequant[1] >> 4); if (threshold < x->encode_breakout) threshold = x->encode_breakout; var = vpx_variance16x16(*(b->base_src), b->src_stride, x->e_mbd.predictor, 16, &sse); if (sse < threshold) { unsigned int q2dc = xd->block[24].dequant[0]; /* If theres is no codeable 2nd order dc or a very small uniform pixel change change */ if ((sse - var> 4) || (sse / 2 > var && sse - var < 64)) { /* Check u and v to make sure skip is ok */ unsigned int sse2 = VP8_UVSSE(x); if (sse2 * 2 < threshold) { x->skip = 1; rd->distortion2 = sse + sse2; rd->rate2 = 500; /* for best_yrd calculation */ rd->rate_uv = 0; rd->distortion_uv = sse2; *disable_skip = 1; return RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); } } } } /* Add in the Mv/mode cost */ rd->rate2 += vp8_cost_mv_ref(this_mode, mdcounts); /* Y cost and distortion */ macro_block_yrd(x, &rd->rate_y, &distortion); rd->rate2 += rd->rate_y; rd->distortion2 += distortion; /* UV cost and distortion */ rd_inter16x16_uv(cpi, x, &rd->rate_uv, &rd->distortion_uv, cpi->common.full_pixel); rd->rate2 += rd->rate_uv; rd->distortion2 += rd->distortion_uv; return INT_MAX; } static int calculate_final_rd_costs(int this_rd, RATE_DISTORTION *rd, int *other_cost, int disable_skip, int uv_intra_tteob, int intra_rd_penalty, VP8_COMP *cpi, MACROBLOCK *x) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; /* Where skip is allowable add in the default per mb cost for the no * skip case. where we then decide to skip we have to delete this and * replace it with the cost of signalling a skip */ if (cpi->common.mb_no_coeff_skip) { *other_cost += vp8_cost_bit(cpi->prob_skip_false, 0); rd->rate2 += *other_cost; } /* Estimate the reference frame signaling cost and add it * to the rolling cost variable. */ rd->rate2 += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; if (!disable_skip) { /* Test for the condition where skip block will be activated * because there are no non zero coefficients and make any * necessary adjustment for rate */ if (cpi->common.mb_no_coeff_skip) { int i; int tteob; int has_y2_block = (this_mode != SPLITMV && this_mode != B_PRED); tteob = 0; if (has_y2_block) tteob += x->e_mbd.eobs[24]; for (i = 0; i < 16; ++i) tteob += (x->e_mbd.eobs[i] > has_y2_block); if (x->e_mbd.mode_info_context->mbmi.ref_frame) { for (i = 16; i < 24; ++i) tteob += x->e_mbd.eobs[i]; } else { tteob += uv_intra_tteob; } if (tteob == 0) { rd->rate2 -= (rd->rate_y + rd->rate_uv); /* for best_yrd calculation */ rd->rate_uv = 0; /* Back out no skip flag costing and add in skip flag costing */ if (cpi->prob_skip_false) { int prob_skip_cost; prob_skip_cost = vp8_cost_bit(cpi->prob_skip_false, 1); prob_skip_cost -= (int)vp8_cost_bit(cpi->prob_skip_false, 0); rd->rate2 += prob_skip_cost; *other_cost += prob_skip_cost; } } } /* Calculate the final RD estimate for this mode */ this_rd = RDCOST(x->rdmult, x->rddiv, rd->rate2, rd->distortion2); if (this_rd < INT_MAX && x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) { this_rd += intra_rd_penalty; } } return this_rd; } static void update_best_mode(BEST_MODE *best_mode, int this_rd, RATE_DISTORTION *rd, int other_cost, MACROBLOCK *x) { MB_PREDICTION_MODE this_mode = x->e_mbd.mode_info_context->mbmi.mode; other_cost += x->ref_frame_cost[x->e_mbd.mode_info_context->mbmi.ref_frame]; /* Calculate the final y RD estimate for this mode */ best_mode->yrd = RDCOST(x->rdmult, x->rddiv, (rd->rate2 - rd->rate_uv - other_cost), (rd->distortion2 - rd->distortion_uv)); best_mode->rd = this_rd; memcpy(&best_mode->mbmode, &x->e_mbd.mode_info_context->mbmi, sizeof(MB_MODE_INFO)); memcpy(&best_mode->partition, x->partition_info, sizeof(PARTITION_INFO)); if ((this_mode == B_PRED) || (this_mode == SPLITMV)) { int i; for (i = 0; i < 16; ++i) { best_mode->bmodes[i] = x->e_mbd.block[i].bmi; } } } void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, int mb_col) { BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; MACROBLOCKD *xd = &x->e_mbd; int_mv best_ref_mv_sb[2]; int_mv mode_mv_sb[2][MB_MODE_COUNT]; int_mv best_ref_mv; int_mv *mode_mv; MB_PREDICTION_MODE this_mode; int num00; int best_mode_index = 0; BEST_MODE best_mode; int i; int mode_index; int mdcounts[4]; int rate; RATE_DISTORTION rd; int uv_intra_rate, uv_intra_distortion, uv_intra_rate_tokenonly; int uv_intra_tteob = 0; int uv_intra_done = 0; MB_PREDICTION_MODE uv_intra_mode = 0; int_mv mvp; int near_sadidx[8] = { 0, 1, 2, 3, 4, 5, 6, 7 }; int saddone = 0; /* search range got from mv_pred(). It uses step_param levels. (0-7) */ int sr = 0; unsigned char *plane[4][3] = { { 0, 0 } }; int ref_frame_map[4]; int sign_bias = 0; int intra_rd_penalty = 10 * vp8_dc_quant(cpi->common.base_qindex, cpi->common.y1dc_delta_q); #if CONFIG_TEMPORAL_DENOISING unsigned int zero_mv_sse = UINT_MAX, best_sse = UINT_MAX, best_rd_sse = UINT_MAX; #endif // _uv variables are not set consistantly before calling update_best_mode. rd.rate_uv = 0; rd.distortion_uv = 0; mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = 0; best_mode.rd = INT_MAX; best_mode.yrd = INT_MAX; best_mode.intra_rd = INT_MAX; memset(mode_mv_sb, 0, sizeof(mode_mv_sb)); memset(&best_mode.mbmode, 0, sizeof(best_mode.mbmode)); memset(&best_mode.bmodes, 0, sizeof(best_mode.bmodes)); /* Setup search priorities */ get_reference_search_order(cpi, ref_frame_map); /* Check to see if there is at least 1 valid reference frame that we need * to calculate near_mvs. */ if (ref_frame_map[1] > 0) { sign_bias = vp8_find_near_mvs_bias( &x->e_mbd, x->e_mbd.mode_info_context, mode_mv_sb, best_ref_mv_sb, mdcounts, ref_frame_map[1], cpi->common.ref_frame_sign_bias); mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; } get_predictor_pointers(cpi, plane, recon_yoffset, recon_uvoffset); *returnintra = INT_MAX; /* Count of the number of MBs tested so far this frame */ x->mbs_tested_so_far++; x->skip = 0; for (mode_index = 0; mode_index < MAX_MODES; ++mode_index) { int this_rd = INT_MAX; int disable_skip = 0; int other_cost = 0; int this_ref_frame = ref_frame_map[vp8_ref_frame_order[mode_index]]; /* Test best rd so far against threshold for trying this mode. */ if (best_mode.rd <= x->rd_threshes[mode_index]) continue; if (this_ref_frame < 0) continue; /* These variables hold are rolling total cost and distortion for * this mode */ rd.rate2 = 0; rd.distortion2 = 0; this_mode = vp8_mode_order[mode_index]; x->e_mbd.mode_info_context->mbmi.mode = this_mode; x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; /* Only consider ZEROMV/ALTREF_FRAME for alt ref frame, * unless ARNR filtering is enabled in which case we want * an unfiltered alternative */ if (cpi->is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { if (this_mode != ZEROMV || x->e_mbd.mode_info_context->mbmi.ref_frame != ALTREF_FRAME) { continue; } } /* everything but intra */ if (x->e_mbd.mode_info_context->mbmi.ref_frame) { assert(plane[this_ref_frame][0] != NULL && plane[this_ref_frame][1] != NULL && plane[this_ref_frame][2] != NULL); x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; if (sign_bias != cpi->common.ref_frame_sign_bias[this_ref_frame]) { sign_bias = cpi->common.ref_frame_sign_bias[this_ref_frame]; mode_mv = mode_mv_sb[sign_bias]; best_ref_mv.as_int = best_ref_mv_sb[sign_bias].as_int; } } /* Check to see if the testing frequency for this mode is at its * max If so then prevent it from being tested and increase the * threshold for its testing */ if (x->mode_test_hit_counts[mode_index] && (cpi->mode_check_freq[mode_index] > 1)) { if (x->mbs_tested_so_far <= cpi->mode_check_freq[mode_index] * x->mode_test_hit_counts[mode_index]) { /* Increase the threshold for coding this mode to make it * less likely to be chosen */ x->rd_thresh_mult[mode_index] += 4; if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; } x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * x->rd_thresh_mult[mode_index]; continue; } } /* We have now reached the point where we are going to test the * current mode so increment the counter for the number of times * it has been tested */ x->mode_test_hit_counts[mode_index]++; /* Experimental code. Special case for gf and arf zeromv modes. * Increase zbin size to supress noise */ if (x->zbin_mode_boost_enabled) { if (this_ref_frame == INTRA_FRAME) { x->zbin_mode_boost = 0; } else { if (vp8_mode_order[mode_index] == ZEROMV) { if (this_ref_frame != LAST_FRAME) { x->zbin_mode_boost = GF_ZEROMV_ZBIN_BOOST; } else { x->zbin_mode_boost = LF_ZEROMV_ZBIN_BOOST; } } else if (vp8_mode_order[mode_index] == SPLITMV) { x->zbin_mode_boost = 0; } else { x->zbin_mode_boost = MV_ZBIN_BOOST; } } vp8_update_zbin_extra(cpi, x); } if (!uv_intra_done && this_ref_frame == INTRA_FRAME) { rd_pick_intra_mbuv_mode(x, &uv_intra_rate, &uv_intra_rate_tokenonly, &uv_intra_distortion); uv_intra_mode = x->e_mbd.mode_info_context->mbmi.uv_mode; /* * Total of the eobs is used later to further adjust rate2. Since uv * block's intra eobs will be overwritten when we check inter modes, * we need to save uv_intra_tteob here. */ for (i = 16; i < 24; ++i) uv_intra_tteob += x->e_mbd.eobs[i]; uv_intra_done = 1; } switch (this_mode) { case B_PRED: { int tmp_rd; /* Note the rate value returned here includes the cost of * coding the BPRED mode: x->mbmode_cost[x->e_mbd.frame_type][BPRED] */ int distortion; tmp_rd = rd_pick_intra4x4mby_modes(x, &rate, &rd.rate_y, &distortion, best_mode.yrd); rd.rate2 += rate; rd.distortion2 += distortion; if (tmp_rd < best_mode.yrd) { assert(uv_intra_done); rd.rate2 += uv_intra_rate; rd.rate_uv = uv_intra_rate_tokenonly; rd.distortion2 += uv_intra_distortion; rd.distortion_uv = uv_intra_distortion; } else { this_rd = INT_MAX; disable_skip = 1; } break; } case SPLITMV: { int tmp_rd; int this_rd_thresh; int distortion; this_rd_thresh = (vp8_ref_frame_order[mode_index] == 1) ? x->rd_threshes[THR_NEW1] : x->rd_threshes[THR_NEW3]; this_rd_thresh = (vp8_ref_frame_order[mode_index] == 2) ? x->rd_threshes[THR_NEW2] : this_rd_thresh; tmp_rd = vp8_rd_pick_best_mbsegmentation( cpi, x, &best_ref_mv, best_mode.yrd, mdcounts, &rate, &rd.rate_y, &distortion, this_rd_thresh); rd.rate2 += rate; rd.distortion2 += distortion; /* If even the 'Y' rd value of split is higher than best so far * then dont bother looking at UV */ if (tmp_rd < best_mode.yrd) { /* Now work out UV cost and add it in */ rd_inter4x4_uv(cpi, x, &rd.rate_uv, &rd.distortion_uv, cpi->common.full_pixel); rd.rate2 += rd.rate_uv; rd.distortion2 += rd.distortion_uv; } else { this_rd = INT_MAX; disable_skip = 1; } break; } case DC_PRED: case V_PRED: case H_PRED: case TM_PRED: { int distortion; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; vp8_build_intra_predictors_mby_s( xd, xd->dst.y_buffer - xd->dst.y_stride, xd->dst.y_buffer - 1, xd->dst.y_stride, xd->predictor, 16); macro_block_yrd(x, &rd.rate_y, &distortion); rd.rate2 += rd.rate_y; rd.distortion2 += distortion; rd.rate2 += x->mbmode_cost[x->e_mbd.frame_type] [x->e_mbd.mode_info_context->mbmi.mode]; assert(uv_intra_done); rd.rate2 += uv_intra_rate; rd.rate_uv = uv_intra_rate_tokenonly; rd.distortion2 += uv_intra_distortion; rd.distortion_uv = uv_intra_distortion; break; } case NEWMV: { int thissme; int bestsme = INT_MAX; int step_param = cpi->sf.first_step; int further_steps; int n; /* If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ int do_refine = 1; int sadpb = x->sadperbit16; int_mv mvp_full; int col_min = ((best_ref_mv.as_mv.col + 7) >> 3) - MAX_FULL_PEL_VAL; int row_min = ((best_ref_mv.as_mv.row + 7) >> 3) - MAX_FULL_PEL_VAL; int col_max = (best_ref_mv.as_mv.col >> 3) + MAX_FULL_PEL_VAL; int row_max = (best_ref_mv.as_mv.row >> 3) + MAX_FULL_PEL_VAL; int tmp_col_min = x->mv_col_min; int tmp_col_max = x->mv_col_max; int tmp_row_min = x->mv_row_min; int tmp_row_max = x->mv_row_max; if (!saddone) { vp8_cal_sad(cpi, xd, x, recon_yoffset, &near_sadidx[0]); saddone = 1; } vp8_mv_pred(cpi, &x->e_mbd, x->e_mbd.mode_info_context, &mvp, x->e_mbd.mode_info_context->mbmi.ref_frame, cpi->common.ref_frame_sign_bias, &sr, &near_sadidx[0]); mvp_full.as_mv.col = mvp.as_mv.col >> 3; mvp_full.as_mv.row = mvp.as_mv.row >> 3; /* Get intersection of UMV window and valid MV window to * reduce # of checks in diamond search. */ if (x->mv_col_min < col_min) x->mv_col_min = col_min; if (x->mv_col_max > col_max) x->mv_col_max = col_max; if (x->mv_row_min < row_min) x->mv_row_min = row_min; if (x->mv_row_max > row_max) x->mv_row_max = row_max; /* adjust search range according to sr from mv prediction */ if (sr > step_param) step_param = sr; /* Initial step/diamond search */ { bestsme = cpi->diamond_search_sad( x, b, d, &mvp_full, &d->bmi.mv, step_param, sadpb, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); mode_mv[NEWMV].as_int = d->bmi.mv.as_int; /* Further step/diamond searches as necessary */ further_steps = (cpi->sf.max_step_search_steps - 1) - step_param; n = num00; num00 = 0; /* If there won't be more n-step search, check to see if refining * search is needed. */ if (n > further_steps) do_refine = 0; while (n < further_steps) { n++; if (num00) { num00--; } else { thissme = cpi->diamond_search_sad( x, b, d, &mvp_full, &d->bmi.mv, step_param + n, sadpb, &num00, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); /* check to see if refining search is needed. */ if (num00 > (further_steps - n)) do_refine = 0; if (thissme < bestsme) { bestsme = thissme; mode_mv[NEWMV].as_int = d->bmi.mv.as_int; } else { d->bmi.mv.as_int = mode_mv[NEWMV].as_int; } } } } /* final 1-away diamond refining search */ if (do_refine == 1) { int search_range; search_range = 8; thissme = cpi->refining_search_sad( x, b, d, &d->bmi.mv, sadpb, search_range, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &best_ref_mv); if (thissme < bestsme) { bestsme = thissme; mode_mv[NEWMV].as_int = d->bmi.mv.as_int; } else { d->bmi.mv.as_int = mode_mv[NEWMV].as_int; } } x->mv_col_min = tmp_col_min; x->mv_col_max = tmp_col_max; x->mv_row_min = tmp_row_min; x->mv_row_max = tmp_row_max; if (bestsme < INT_MAX) { int dis; /* TODO: use dis in distortion calculation later. */ unsigned int sse; cpi->find_fractional_mv_step( x, b, d, &d->bmi.mv, &best_ref_mv, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], x->mvcost, &dis, &sse); } mode_mv[NEWMV].as_int = d->bmi.mv.as_int; /* Add the new motion vector cost to our rolling cost variable */ rd.rate2 += vp8_mv_bit_cost(&mode_mv[NEWMV], &best_ref_mv, x->mvcost, 96); } // fall through case NEARESTMV: case NEARMV: /* Clip "next_nearest" so that it does not extend to far out * of image */ vp8_clamp_mv2(&mode_mv[this_mode], xd); /* Do not bother proceeding if the vector (from newmv, nearest * or near) is 0,0 as this should then be coded using the zeromv * mode. */ if (((this_mode == NEARMV) || (this_mode == NEARESTMV)) && (mode_mv[this_mode].as_int == 0)) { continue; } // fall through case ZEROMV: /* Trap vectors that reach beyond the UMV borders * Note that ALL New MV, Nearest MV Near MV and Zero MV code * drops through to this point because of the lack of break * statements in the previous two cases. */ if (((mode_mv[this_mode].as_mv.row >> 3) < x->mv_row_min) || ((mode_mv[this_mode].as_mv.row >> 3) > x->mv_row_max) || ((mode_mv[this_mode].as_mv.col >> 3) < x->mv_col_min) || ((mode_mv[this_mode].as_mv.col >> 3) > x->mv_col_max)) { continue; } vp8_set_mbmode_and_mvs(x, this_mode, &mode_mv[this_mode]); this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); break; default: break; } this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip, uv_intra_tteob, intra_rd_penalty, cpi, x); /* Keep record of best intra distortion */ if ((x->e_mbd.mode_info_context->mbmi.ref_frame == INTRA_FRAME) && (this_rd < best_mode.intra_rd)) { best_mode.intra_rd = this_rd; *returnintra = rd.distortion2; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { unsigned int sse; vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &sse, mode_mv[this_mode]); if (sse < best_rd_sse) best_rd_sse = sse; /* Store for later use by denoiser. */ if (this_mode == ZEROMV && sse < zero_mv_sse) { zero_mv_sse = sse; x->best_zeromv_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame; } /* Store the best NEWMV in x for later use in the denoiser. */ if (x->e_mbd.mode_info_context->mbmi.mode == NEWMV && sse < best_sse) { best_sse = sse; vp8_get_inter_mbpred_error(x, &cpi->fn_ptr[BLOCK_16X16], &best_sse, mode_mv[this_mode]); x->best_sse_inter_mode = NEWMV; x->best_sse_mv = x->e_mbd.mode_info_context->mbmi.mv; x->need_to_clamp_best_mvs = x->e_mbd.mode_info_context->mbmi.need_to_clamp_mvs; x->best_reference_frame = x->e_mbd.mode_info_context->mbmi.ref_frame; } } #endif /* Did this mode help.. i.i is it the new best mode */ if (this_rd < best_mode.rd || x->skip) { /* Note index of best mode so far */ best_mode_index = mode_index; *returnrate = rd.rate2; *returndistortion = rd.distortion2; if (this_mode <= B_PRED) { x->e_mbd.mode_info_context->mbmi.uv_mode = uv_intra_mode; /* required for left and above block mv */ x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; } update_best_mode(&best_mode, this_rd, &rd, other_cost, x); /* Testing this mode gave rise to an improvement in best error * score. Lower threshold a bit for next time */ x->rd_thresh_mult[mode_index] = (x->rd_thresh_mult[mode_index] >= (MIN_THRESHMULT + 2)) ? x->rd_thresh_mult[mode_index] - 2 : MIN_THRESHMULT; } /* If the mode did not help improve the best error case then raise * the threshold for testing that mode next time around. */ else { x->rd_thresh_mult[mode_index] += 4; if (x->rd_thresh_mult[mode_index] > MAX_THRESHMULT) { x->rd_thresh_mult[mode_index] = MAX_THRESHMULT; } } x->rd_threshes[mode_index] = (cpi->rd_baseline_thresh[mode_index] >> 7) * x->rd_thresh_mult[mode_index]; if (x->skip) break; } /* Reduce the activation RD thresholds for the best choice mode */ if ((cpi->rd_baseline_thresh[best_mode_index] > 0) && (cpi->rd_baseline_thresh[best_mode_index] < (INT_MAX >> 2))) { int best_adjustment = (x->rd_thresh_mult[best_mode_index] >> 2); x->rd_thresh_mult[best_mode_index] = (x->rd_thresh_mult[best_mode_index] >= (MIN_THRESHMULT + best_adjustment)) ? x->rd_thresh_mult[best_mode_index] - best_adjustment : MIN_THRESHMULT; x->rd_threshes[best_mode_index] = (cpi->rd_baseline_thresh[best_mode_index] >> 7) * x->rd_thresh_mult[best_mode_index]; } #if CONFIG_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity) { int block_index = mb_row * cpi->common.mb_cols + mb_col; if (x->best_sse_inter_mode == DC_PRED) { /* No best MV found. */ x->best_sse_inter_mode = best_mode.mbmode.mode; x->best_sse_mv = best_mode.mbmode.mv; x->need_to_clamp_best_mvs = best_mode.mbmode.need_to_clamp_mvs; x->best_reference_frame = best_mode.mbmode.ref_frame; best_sse = best_rd_sse; } vp8_denoiser_denoise_mb(&cpi->denoiser, x, best_sse, zero_mv_sse, recon_yoffset, recon_uvoffset, &cpi->common.lf_info, mb_row, mb_col, block_index, 0); /* Reevaluate ZEROMV after denoising. */ if (best_mode.mbmode.ref_frame == INTRA_FRAME && x->best_zeromv_reference_frame != INTRA_FRAME) { int this_rd = INT_MAX; int disable_skip = 0; int other_cost = 0; int this_ref_frame = x->best_zeromv_reference_frame; rd.rate2 = x->ref_frame_cost[this_ref_frame] + vp8_cost_mv_ref(ZEROMV, mdcounts); rd.distortion2 = 0; /* set up the proper prediction buffers for the frame */ x->e_mbd.mode_info_context->mbmi.ref_frame = this_ref_frame; x->e_mbd.pre.y_buffer = plane[this_ref_frame][0]; x->e_mbd.pre.u_buffer = plane[this_ref_frame][1]; x->e_mbd.pre.v_buffer = plane[this_ref_frame][2]; x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; this_rd = evaluate_inter_mode_rd(mdcounts, &rd, &disable_skip, cpi, x); this_rd = calculate_final_rd_costs(this_rd, &rd, &other_cost, disable_skip, uv_intra_tteob, intra_rd_penalty, cpi, x); if (this_rd < best_mode.rd || x->skip) { *returnrate = rd.rate2; *returndistortion = rd.distortion2; update_best_mode(&best_mode, this_rd, &rd, other_cost, x); } } } #endif if (cpi->is_src_frame_alt_ref && (best_mode.mbmode.mode != ZEROMV || best_mode.mbmode.ref_frame != ALTREF_FRAME)) { x->e_mbd.mode_info_context->mbmi.mode = ZEROMV; x->e_mbd.mode_info_context->mbmi.ref_frame = ALTREF_FRAME; x->e_mbd.mode_info_context->mbmi.mv.as_int = 0; x->e_mbd.mode_info_context->mbmi.uv_mode = DC_PRED; x->e_mbd.mode_info_context->mbmi.mb_skip_coeff = (cpi->common.mb_no_coeff_skip); x->e_mbd.mode_info_context->mbmi.partitioning = 0; return; } /* macroblock modes */ memcpy(&x->e_mbd.mode_info_context->mbmi, &best_mode.mbmode, sizeof(MB_MODE_INFO)); if (best_mode.mbmode.mode == B_PRED) { for (i = 0; i < 16; ++i) { xd->mode_info_context->bmi[i].as_mode = best_mode.bmodes[i].as_mode; } } if (best_mode.mbmode.mode == SPLITMV) { for (i = 0; i < 16; ++i) { xd->mode_info_context->bmi[i].mv.as_int = best_mode.bmodes[i].mv.as_int; } memcpy(x->partition_info, &best_mode.partition, sizeof(PARTITION_INFO)); x->e_mbd.mode_info_context->mbmi.mv.as_int = x->partition_info->bmi[15].mv.as_int; } if (sign_bias != cpi->common.ref_frame_sign_bias[xd->mode_info_context->mbmi.ref_frame]) { best_ref_mv.as_int = best_ref_mv_sb[!sign_bias].as_int; } rd_update_mvcount(x, &best_ref_mv); } void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate) { int error4x4, error16x16; int rate4x4, rate16x16 = 0, rateuv; int dist4x4, dist16x16, distuv; int rate_; int rate4x4_tokenonly = 0; int rate16x16_tokenonly = 0; int rateuv_tokenonly = 0; x->e_mbd.mode_info_context->mbmi.ref_frame = INTRA_FRAME; rd_pick_intra_mbuv_mode(x, &rateuv, &rateuv_tokenonly, &distuv); rate_ = rateuv; error16x16 = rd_pick_intra16x16mby_mode(x, &rate16x16, &rate16x16_tokenonly, &dist16x16); error4x4 = rd_pick_intra4x4mby_modes(x, &rate4x4, &rate4x4_tokenonly, &dist4x4, error16x16); if (error4x4 < error16x16) { x->e_mbd.mode_info_context->mbmi.mode = B_PRED; rate_ += rate4x4; } else { rate_ += rate16x16; } *rate = rate_; } libvpx-1.8.2/vp8/encoder/rdopt.h000066400000000000000000000100711357355204000164660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_RDOPT_H_ #define VPX_VP8_ENCODER_RDOPT_H_ #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif #define RDCOST(RM, DM, R, D) (((128 + (R) * (RM)) >> 8) + (DM) * (D)) void vp8cx_initialize_me_consts(VP8_COMP *cpi, int QIndex); void vp8_auto_select_speed(VP8_COMP *cpi); static INLINE void insertsortmv(int arr[], int len) { int i, j, k; for (i = 1; i <= len - 1; ++i) { for (j = 0; j < i; ++j) { if (arr[j] > arr[i]) { int temp; temp = arr[i]; for (k = i; k > j; k--) arr[k] = arr[k - 1]; arr[j] = temp; } } } } static INLINE void insertsortsad(int arr[], int idx[], int len) { int i, j, k; for (i = 1; i <= len - 1; ++i) { for (j = 0; j < i; ++j) { if (arr[j] > arr[i]) { int temp, tempi; temp = arr[i]; tempi = idx[i]; for (k = i; k > j; k--) { arr[k] = arr[k - 1]; idx[k] = idx[k - 1]; } arr[j] = temp; idx[j] = tempi; } } } } void vp8_initialize_rd_consts(VP8_COMP *cpi, MACROBLOCK *x, int Qvalue); void vp8_rd_pick_inter_mode(VP8_COMP *cpi, MACROBLOCK *x, int recon_yoffset, int recon_uvoffset, int *returnrate, int *returndistortion, int *returnintra, int mb_row, int mb_col); void vp8_rd_pick_intra_mode(MACROBLOCK *x, int *rate); static INLINE void get_plane_pointers(const YV12_BUFFER_CONFIG *fb, unsigned char *plane[3], unsigned int recon_yoffset, unsigned int recon_uvoffset) { plane[0] = fb->y_buffer + recon_yoffset; plane[1] = fb->u_buffer + recon_uvoffset; plane[2] = fb->v_buffer + recon_uvoffset; } static INLINE void get_predictor_pointers(const VP8_COMP *cpi, unsigned char *plane[4][3], unsigned int recon_yoffset, unsigned int recon_uvoffset) { if (cpi->ref_frame_flags & VP8_LAST_FRAME) { get_plane_pointers(&cpi->common.yv12_fb[cpi->common.lst_fb_idx], plane[LAST_FRAME], recon_yoffset, recon_uvoffset); } if (cpi->ref_frame_flags & VP8_GOLD_FRAME) { get_plane_pointers(&cpi->common.yv12_fb[cpi->common.gld_fb_idx], plane[GOLDEN_FRAME], recon_yoffset, recon_uvoffset); } if (cpi->ref_frame_flags & VP8_ALTR_FRAME) { get_plane_pointers(&cpi->common.yv12_fb[cpi->common.alt_fb_idx], plane[ALTREF_FRAME], recon_yoffset, recon_uvoffset); } } static INLINE void get_reference_search_order(const VP8_COMP *cpi, int ref_frame_map[4]) { int i = 0; ref_frame_map[i++] = INTRA_FRAME; if (cpi->ref_frame_flags & VP8_LAST_FRAME) ref_frame_map[i++] = LAST_FRAME; if (cpi->ref_frame_flags & VP8_GOLD_FRAME) ref_frame_map[i++] = GOLDEN_FRAME; if (cpi->ref_frame_flags & VP8_ALTR_FRAME) ref_frame_map[i++] = ALTREF_FRAME; for (; i < 4; ++i) ref_frame_map[i] = -1; } void vp8_mv_pred(VP8_COMP *cpi, MACROBLOCKD *xd, const MODE_INFO *here, int_mv *mvp, int refframe, int *ref_frame_sign_bias, int *sr, int near_sadidx[]); void vp8_cal_sad(VP8_COMP *cpi, MACROBLOCKD *xd, MACROBLOCK *x, int recon_yoffset, int near_sadidx[]); int VP8_UVSSE(MACROBLOCK *x); int vp8_cost_mv_ref(MB_PREDICTION_MODE m, const int near_mv_ref_ct[4]); void vp8_set_mbmode_and_mvs(MACROBLOCK *x, MB_PREDICTION_MODE mb, int_mv *mv); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_RDOPT_H_ libvpx-1.8.2/vp8/encoder/segmentation.c000066400000000000000000000037261357355204000200370ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "segmentation.h" #include "vpx_mem/vpx_mem.h" void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x) { int mb_row, mb_col; MODE_INFO *this_mb_mode_info = cm->mi; x->gf_active_ptr = (signed char *)cpi->gf_active_flags; if ((cm->frame_type == KEY_FRAME) || (cm->refresh_golden_frame)) { /* Reset Gf useage monitors */ memset(cpi->gf_active_flags, 1, (cm->mb_rows * cm->mb_cols)); cpi->gf_active_count = cm->mb_rows * cm->mb_cols; } else { /* for each macroblock row in image */ for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { /* for each macroblock col in image */ for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { /* If using golden then set GF active flag if not already set. * If using last frame 0,0 mode then leave flag as it is * else if using non 0,0 motion or intra modes then clear * flag if it is currently set */ if ((this_mb_mode_info->mbmi.ref_frame == GOLDEN_FRAME) || (this_mb_mode_info->mbmi.ref_frame == ALTREF_FRAME)) { if (*(x->gf_active_ptr) == 0) { *(x->gf_active_ptr) = 1; cpi->gf_active_count++; } } else if ((this_mb_mode_info->mbmi.mode != ZEROMV) && *(x->gf_active_ptr)) { *(x->gf_active_ptr) = 0; cpi->gf_active_count--; } x->gf_active_ptr++; /* Step onto next entry */ this_mb_mode_info++; /* skip to next mb */ } /* this is to account for the border */ this_mb_mode_info++; } } } libvpx-1.8.2/vp8/encoder/segmentation.h000066400000000000000000000014621357355204000200370ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_SEGMENTATION_H_ #define VPX_VP8_ENCODER_SEGMENTATION_H_ #include "string.h" #include "vp8/common/blockd.h" #include "onyx_int.h" #ifdef __cplusplus extern "C" { #endif extern void vp8_update_gf_useage_maps(VP8_COMP *cpi, VP8_COMMON *cm, MACROBLOCK *x); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_SEGMENTATION_H_ libvpx-1.8.2/vp8/encoder/temporal_filter.c000066400000000000000000000332121357355204000205230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/common/onyxc_int.h" #include "onyx_int.h" #include "vp8/common/systemdependent.h" #include "vp8/encoder/quantize.h" #include "vp8/common/alloccommon.h" #include "mcomp.h" #include "firstpass.h" #include "vpx_scale/vpx_scale.h" #include "vp8/common/extend.h" #include "ratectrl.h" #include "vp8/common/quant_common.h" #include "segmentation.h" #include "temporal_filter.h" #include "vpx_mem/vpx_mem.h" #include "vp8/common/swapyv12buffer.h" #include "vp8/common/threading.h" #include "vpx_ports/vpx_timer.h" #include #include #define ALT_REF_MC_ENABLED 1 /* toggle MC in AltRef filtering */ #define ALT_REF_SUBPEL_ENABLED 1 /* toggle subpel in MC AltRef filtering */ #if VP8_TEMPORAL_ALT_REF static void vp8_temporal_filter_predictors_mb_c( MACROBLOCKD *x, unsigned char *y_mb_ptr, unsigned char *u_mb_ptr, unsigned char *v_mb_ptr, int stride, int mv_row, int mv_col, unsigned char *pred) { int offset; unsigned char *yptr, *uptr, *vptr; /* Y */ yptr = y_mb_ptr + (mv_row >> 3) * stride + (mv_col >> 3); if ((mv_row | mv_col) & 7) { x->subpixel_predict16x16(yptr, stride, mv_col & 7, mv_row & 7, &pred[0], 16); } else { vp8_copy_mem16x16(yptr, stride, &pred[0], 16); } /* U & V */ mv_row >>= 1; mv_col >>= 1; stride = (stride + 1) >> 1; offset = (mv_row >> 3) * stride + (mv_col >> 3); uptr = u_mb_ptr + offset; vptr = v_mb_ptr + offset; if ((mv_row | mv_col) & 7) { x->subpixel_predict8x8(uptr, stride, mv_col & 7, mv_row & 7, &pred[256], 8); x->subpixel_predict8x8(vptr, stride, mv_col & 7, mv_row & 7, &pred[320], 8); } else { vp8_copy_mem8x8(uptr, stride, &pred[256], 8); vp8_copy_mem8x8(vptr, stride, &pred[320], 8); } } void vp8_temporal_filter_apply_c(unsigned char *frame1, unsigned int stride, unsigned char *frame2, unsigned int block_size, int strength, int filter_weight, unsigned int *accumulator, unsigned short *count) { unsigned int i, j, k; int modifier; int byte = 0; const int rounding = strength > 0 ? 1 << (strength - 1) : 0; for (i = 0, k = 0; i < block_size; ++i) { for (j = 0; j < block_size; j++, k++) { int src_byte = frame1[byte]; int pixel_value = *frame2++; modifier = src_byte - pixel_value; /* This is an integer approximation of: * float coeff = (3.0 * modifer * modifier) / pow(2, strength); * modifier = (int)roundf(coeff > 16 ? 0 : 16-coeff); */ modifier *= modifier; modifier *= 3; modifier += rounding; modifier >>= strength; if (modifier > 16) modifier = 16; modifier = 16 - modifier; modifier *= filter_weight; count[k] += modifier; accumulator[k] += modifier * pixel_value; byte++; } byte += stride - block_size; } } #if ALT_REF_MC_ENABLED static int vp8_temporal_filter_find_matching_mb_c(VP8_COMP *cpi, YV12_BUFFER_CONFIG *arf_frame, YV12_BUFFER_CONFIG *frame_ptr, int mb_offset, int error_thresh) { MACROBLOCK *x = &cpi->mb; int step_param; int sadpb = x->sadperbit16; int bestsme = INT_MAX; BLOCK *b = &x->block[0]; BLOCKD *d = &x->e_mbd.block[0]; int_mv best_ref_mv1; int_mv best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ /* Save input state */ unsigned char **base_src = b->base_src; int src = b->src; int src_stride = b->src_stride; unsigned char *base_pre = x->e_mbd.pre.y_buffer; int pre = d->offset; int pre_stride = x->e_mbd.pre.y_stride; (void)error_thresh; best_ref_mv1.as_int = 0; best_ref_mv1_full.as_mv.col = best_ref_mv1.as_mv.col >> 3; best_ref_mv1_full.as_mv.row = best_ref_mv1.as_mv.row >> 3; /* Setup frame pointers */ b->base_src = &arf_frame->y_buffer; b->src_stride = arf_frame->y_stride; b->src = mb_offset; x->e_mbd.pre.y_buffer = frame_ptr->y_buffer; x->e_mbd.pre.y_stride = frame_ptr->y_stride; d->offset = mb_offset; /* Further step/diamond searches as necessary */ if (cpi->Speed < 8) { step_param = cpi->sf.first_step + (cpi->Speed > 5); } else { step_param = cpi->sf.first_step + 2; } /* TODO Check that the 16x16 vf & sdf are selected here */ /* Ignore mv costing by sending NULL cost arrays */ bestsme = vp8_hex_search(x, b, d, &best_ref_mv1_full, &d->bmi.mv, step_param, sadpb, &cpi->fn_ptr[BLOCK_16X16], NULL, &best_ref_mv1); (void)bestsme; // Ignore unused return value. #if ALT_REF_SUBPEL_ENABLED /* Try sub-pixel MC? */ { int distortion; unsigned int sse; /* Ignore mv costing by sending NULL cost array */ bestsme = cpi->find_fractional_mv_step( x, b, d, &d->bmi.mv, &best_ref_mv1, x->errorperbit, &cpi->fn_ptr[BLOCK_16X16], NULL, &distortion, &sse); } #endif /* Save input state */ b->base_src = base_src; b->src = src; b->src_stride = src_stride; x->e_mbd.pre.y_buffer = base_pre; d->offset = pre; x->e_mbd.pre.y_stride = pre_stride; return bestsme; } #endif static void vp8_temporal_filter_iterate_c(VP8_COMP *cpi, int frame_count, int alt_ref_index, int strength) { int byte; int frame; int mb_col, mb_row; unsigned int filter_weight; int mb_cols = cpi->common.mb_cols; int mb_rows = cpi->common.mb_rows; int mb_y_offset = 0; int mb_uv_offset = 0; DECLARE_ALIGNED(16, unsigned int, accumulator[16 * 16 + 8 * 8 + 8 * 8]); DECLARE_ALIGNED(16, unsigned short, count[16 * 16 + 8 * 8 + 8 * 8]); MACROBLOCKD *mbd = &cpi->mb.e_mbd; YV12_BUFFER_CONFIG *f = cpi->frames[alt_ref_index]; unsigned char *dst1, *dst2; DECLARE_ALIGNED(16, unsigned char, predictor[16 * 16 + 8 * 8 + 8 * 8]); /* Save input state */ unsigned char *y_buffer = mbd->pre.y_buffer; unsigned char *u_buffer = mbd->pre.u_buffer; unsigned char *v_buffer = mbd->pre.v_buffer; for (mb_row = 0; mb_row < mb_rows; ++mb_row) { #if ALT_REF_MC_ENABLED /* Source frames are extended to 16 pixels. This is different than * L/A/G reference frames that have a border of 32 (VP8BORDERINPIXELS) * A 6 tap filter is used for motion search. This requires 2 pixels * before and 3 pixels after. So the largest Y mv on a border would * then be 16 - 3. The UV blocks are half the size of the Y and * therefore only extended by 8. The largest mv that a UV block * can support is 8 - 3. A UV mv is half of a Y mv. * (16 - 3) >> 1 == 6 which is greater than 8 - 3. * To keep the mv in play for both Y and UV planes the max that it * can be on a border is therefore 16 - 5. */ cpi->mb.mv_row_min = -((mb_row * 16) + (16 - 5)); cpi->mb.mv_row_max = ((cpi->common.mb_rows - 1 - mb_row) * 16) + (16 - 5); #endif for (mb_col = 0; mb_col < mb_cols; ++mb_col) { int i, j, k; int stride; memset(accumulator, 0, 384 * sizeof(unsigned int)); memset(count, 0, 384 * sizeof(unsigned short)); #if ALT_REF_MC_ENABLED cpi->mb.mv_col_min = -((mb_col * 16) + (16 - 5)); cpi->mb.mv_col_max = ((cpi->common.mb_cols - 1 - mb_col) * 16) + (16 - 5); #endif for (frame = 0; frame < frame_count; ++frame) { if (cpi->frames[frame] == NULL) continue; mbd->block[0].bmi.mv.as_mv.row = 0; mbd->block[0].bmi.mv.as_mv.col = 0; if (frame == alt_ref_index) { filter_weight = 2; } else { int err = 0; #if ALT_REF_MC_ENABLED #define THRESH_LOW 10000 #define THRESH_HIGH 20000 /* Find best match in this frame by MC */ err = vp8_temporal_filter_find_matching_mb_c( cpi, cpi->frames[alt_ref_index], cpi->frames[frame], mb_y_offset, THRESH_LOW); #endif /* Assign higher weight to matching MB if it's error * score is lower. If not applying MC default behavior * is to weight all MBs equal. */ filter_weight = err < THRESH_LOW ? 2 : err < THRESH_HIGH ? 1 : 0; } if (filter_weight != 0) { /* Construct the predictors */ vp8_temporal_filter_predictors_mb_c( mbd, cpi->frames[frame]->y_buffer + mb_y_offset, cpi->frames[frame]->u_buffer + mb_uv_offset, cpi->frames[frame]->v_buffer + mb_uv_offset, cpi->frames[frame]->y_stride, mbd->block[0].bmi.mv.as_mv.row, mbd->block[0].bmi.mv.as_mv.col, predictor); /* Apply the filter (YUV) */ vp8_temporal_filter_apply(f->y_buffer + mb_y_offset, f->y_stride, predictor, 16, strength, filter_weight, accumulator, count); vp8_temporal_filter_apply(f->u_buffer + mb_uv_offset, f->uv_stride, predictor + 256, 8, strength, filter_weight, accumulator + 256, count + 256); vp8_temporal_filter_apply(f->v_buffer + mb_uv_offset, f->uv_stride, predictor + 320, 8, strength, filter_weight, accumulator + 320, count + 320); } } /* Normalize filter output to produce AltRef frame */ dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; for (i = 0, k = 0; i < 16; ++i) { for (j = 0; j < 16; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= cpi->fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (unsigned char)pval; /* move to next pixel */ byte++; } byte += stride - 16; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; for (i = 0, k = 256; i < 8; ++i) { for (j = 0; j < 8; j++, k++) { int m = k + 64; /* U */ unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= cpi->fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (unsigned char)pval; /* V */ pval = accumulator[m] + (count[m] >> 1); pval *= cpi->fixed_divide[count[m]]; pval >>= 19; dst2[byte] = (unsigned char)pval; /* move to next pixel */ byte++; } byte += stride - 8; } mb_y_offset += 16; mb_uv_offset += 8; } mb_y_offset += 16 * (f->y_stride - mb_cols); mb_uv_offset += 8 * (f->uv_stride - mb_cols); } /* Restore input state */ mbd->pre.y_buffer = y_buffer; mbd->pre.u_buffer = u_buffer; mbd->pre.v_buffer = v_buffer; } void vp8_temporal_filter_prepare_c(VP8_COMP *cpi, int distance) { int frame = 0; int num_frames_backward = 0; int num_frames_forward = 0; int frames_to_blur_backward = 0; int frames_to_blur_forward = 0; int frames_to_blur = 0; int start_frame = 0; int strength = cpi->oxcf.arnr_strength; int blur_type = cpi->oxcf.arnr_type; int max_frames = cpi->active_arnr_frames; num_frames_backward = distance; num_frames_forward = vp8_lookahead_depth(cpi->lookahead) - (num_frames_backward + 1); switch (blur_type) { case 1: /* Backward Blur */ frames_to_blur_backward = num_frames_backward; if (frames_to_blur_backward >= max_frames) { frames_to_blur_backward = max_frames - 1; } frames_to_blur = frames_to_blur_backward + 1; break; case 2: /* Forward Blur */ frames_to_blur_forward = num_frames_forward; if (frames_to_blur_forward >= max_frames) { frames_to_blur_forward = max_frames - 1; } frames_to_blur = frames_to_blur_forward + 1; break; case 3: default: /* Center Blur */ frames_to_blur_forward = num_frames_forward; frames_to_blur_backward = num_frames_backward; if (frames_to_blur_forward > frames_to_blur_backward) { frames_to_blur_forward = frames_to_blur_backward; } if (frames_to_blur_backward > frames_to_blur_forward) { frames_to_blur_backward = frames_to_blur_forward; } /* When max_frames is even we have 1 more frame backward than forward */ if (frames_to_blur_forward > (max_frames - 1) / 2) { frames_to_blur_forward = ((max_frames - 1) / 2); } if (frames_to_blur_backward > (max_frames / 2)) { frames_to_blur_backward = (max_frames / 2); } frames_to_blur = frames_to_blur_backward + frames_to_blur_forward + 1; break; } start_frame = distance + frames_to_blur_forward; /* Setup frame pointers, NULL indicates frame not included in filter */ memset(cpi->frames, 0, max_frames * sizeof(YV12_BUFFER_CONFIG *)); for (frame = 0; frame < frames_to_blur; ++frame) { int which_buffer = start_frame - frame; struct lookahead_entry *buf = vp8_lookahead_peek(cpi->lookahead, which_buffer, PEEK_FORWARD); cpi->frames[frames_to_blur - 1 - frame] = &buf->img; } vp8_temporal_filter_iterate_c(cpi, frames_to_blur, frames_to_blur_backward, strength); } #endif libvpx-1.8.2/vp8/encoder/temporal_filter.h000066400000000000000000000013011357355204000205220ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ #define VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ #ifdef __cplusplus extern "C" { #endif struct VP8_COMP; void vp8_temporal_filter_prepare_c(struct VP8_COMP *cpi, int distance); #ifdef __cplusplus } #endif #endif // VPX_VP8_ENCODER_TEMPORAL_FILTER_H_ libvpx-1.8.2/vp8/encoder/tokenize.c000066400000000000000000000300651357355204000171660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "onyx_int.h" #include "tokenize.h" #include "vpx_mem/vpx_mem.h" /* Global event counters used for accumulating statistics across several compressions, then generating context.c = initial stats. */ void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t); void vp8_fix_contexts(MACROBLOCKD *x); #include "dct_value_tokens.h" #include "dct_value_cost.h" const TOKENVALUE *const vp8_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE; const short *const vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; #if 0 int skip_true_count = 0; int skip_false_count = 0; #endif /* function used to generate dct_value_tokens and dct_value_cost tables */ /* static void fill_value_tokens() { TOKENVALUE *t = dct_value_tokens + DCT_MAX_VALUE; const vp8_extra_bit_struct *e = vp8_extra_bits; int i = -DCT_MAX_VALUE; int sign = 1; do { if (!i) sign = 0; { const int a = sign ? -i : i; int eb = sign; if (a > 4) { int j = 4; while (++j < 11 && e[j].base_val <= a) {} t[i].Token = --j; eb |= (a - e[j].base_val) << 1; } else t[i].Token = a; t[i].Extra = eb; } // initialize the cost for extra bits for all possible coefficient value. { int cost = 0; const vp8_extra_bit_struct *p = vp8_extra_bits + t[i].Token; if (p->base_val) { const int extra = t[i].Extra; const int Length = p->Len; if (Length) cost += vp8_treed_cost(p->tree, p->prob, extra >> 1, Length); cost += vp8_cost_bit(vp8_prob_half, extra & 1); // sign dct_value_cost[i + DCT_MAX_VALUE] = cost; } } } while (++i < DCT_MAX_VALUE); vp8_dct_value_tokens_ptr = dct_value_tokens + DCT_MAX_VALUE; vp8_dct_value_cost_ptr = dct_value_cost + DCT_MAX_VALUE; } */ static void tokenize2nd_order_b(MACROBLOCK *x, TOKENEXTRA **tp, VP8_COMP *cpi) { MACROBLOCKD *xd = &x->e_mbd; int pt; /* near block/prev token context index */ int c; /* start at DC */ TOKENEXTRA *t = *tp; /* store tokens starting here */ const BLOCKD *b; const short *qcoeff_ptr; ENTROPY_CONTEXT *a; ENTROPY_CONTEXT *l; int band, rc, v, token; int eob; b = xd->block + 24; qcoeff_ptr = b->qcoeff; a = (ENTROPY_CONTEXT *)xd->above_context + 8; l = (ENTROPY_CONTEXT *)xd->left_context + 8; eob = xd->eobs[24]; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); if (!eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[1][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[1][0][pt][DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; return; } v = qcoeff_ptr[0]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[1][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[1][0][pt][token]; pt = vp8_prev_token_class[token]; t++; c = 1; for (; c < eob; ++c) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; v = qcoeff_ptr[rc]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[1][band][pt]; t->skip_eob_node = ((pt == 0)); ++x->coef_counts[1][band][pt][token]; pt = vp8_prev_token_class[token]; t++; } if (c < 16) { band = vp8_coef_bands[c]; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[1][band][pt]; t->skip_eob_node = 0; ++x->coef_counts[1][band][pt][DCT_EOB_TOKEN]; t++; } *tp = t; *a = *l = 1; } static void tokenize1st_order_b( MACROBLOCK *x, TOKENEXTRA **tp, int type, /* which plane: 0=Y no DC, 1=Y2, 2=UV, 3=Y with DC */ VP8_COMP *cpi) { MACROBLOCKD *xd = &x->e_mbd; unsigned int block; const BLOCKD *b; int pt; /* near block/prev token context index */ int c; int token; TOKENEXTRA *t = *tp; /* store tokens starting here */ const short *qcoeff_ptr; ENTROPY_CONTEXT *a; ENTROPY_CONTEXT *l; int band, rc, v; int tmp1, tmp2; b = xd->block; /* Luma */ for (block = 0; block < 16; block++, b++) { const int eob = *b->eob; tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); c = type ? 0 : 1; if (c >= eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[type][c][pt]; t->skip_eob_node = 0; ++x->coef_counts[type][c][pt][DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; continue; } v = qcoeff_ptr[c]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[type][c][pt]; t->skip_eob_node = 0; ++x->coef_counts[type][c][pt][token]; pt = vp8_prev_token_class[token]; t++; c++; assert(eob <= 16); for (; c < eob; ++c) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; v = qcoeff_ptr[rc]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[type][band][pt]; t->skip_eob_node = (pt == 0); ++x->coef_counts[type][band][pt][token]; pt = vp8_prev_token_class[token]; t++; } if (c < 16) { band = vp8_coef_bands[c]; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[type][band][pt]; t->skip_eob_node = 0; ++x->coef_counts[type][band][pt][DCT_EOB_TOKEN]; t++; } *tp = t; *a = *l = 1; } /* Chroma */ for (block = 16; block < 24; block++, b++) { const int eob = *b->eob; tmp1 = vp8_block2above[block]; tmp2 = vp8_block2left[block]; qcoeff_ptr = b->qcoeff; a = (ENTROPY_CONTEXT *)xd->above_context + tmp1; l = (ENTROPY_CONTEXT *)xd->left_context + tmp2; VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); if (!eob) { /* c = band for this case */ t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[2][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[2][0][pt][DCT_EOB_TOKEN]; t++; *tp = t; *a = *l = 0; continue; } v = qcoeff_ptr[0]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[2][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[2][0][pt][token]; pt = vp8_prev_token_class[token]; t++; c = 1; assert(eob <= 16); for (; c < eob; ++c) { rc = vp8_default_zig_zag1d[c]; band = vp8_coef_bands[c]; v = qcoeff_ptr[rc]; t->Extra = vp8_dct_value_tokens_ptr[v].Extra; token = vp8_dct_value_tokens_ptr[v].Token; t->Token = token; t->context_tree = cpi->common.fc.coef_probs[2][band][pt]; t->skip_eob_node = (pt == 0); ++x->coef_counts[2][band][pt][token]; pt = vp8_prev_token_class[token]; t++; } if (c < 16) { band = vp8_coef_bands[c]; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[2][band][pt]; t->skip_eob_node = 0; ++x->coef_counts[2][band][pt][DCT_EOB_TOKEN]; t++; } *tp = t; *a = *l = 1; } } static int mb_is_skippable(MACROBLOCKD *x, int has_y2_block) { int skip = 1; int i = 0; if (has_y2_block) { for (i = 0; i < 16; ++i) skip &= (x->eobs[i] < 2); } for (; i < 24 + has_y2_block; ++i) skip &= (!x->eobs[i]); return skip; } void vp8_tokenize_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { MACROBLOCKD *xd = &x->e_mbd; int plane_type; int has_y2_block; has_y2_block = (xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV); xd->mode_info_context->mbmi.mb_skip_coeff = mb_is_skippable(xd, has_y2_block); if (xd->mode_info_context->mbmi.mb_skip_coeff) { if (!cpi->common.mb_no_coeff_skip) { vp8_stuff_mb(cpi, x, t); } else { vp8_fix_contexts(xd); x->skip_true_count++; } return; } plane_type = 3; if (has_y2_block) { tokenize2nd_order_b(x, t, cpi); plane_type = 0; } tokenize1st_order_b(x, t, plane_type, cpi); } static void stuff2nd_order_b(TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi, MACROBLOCK *x) { int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[1][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[1][0][pt][DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; *a = *l = pt; } static void stuff1st_order_b(TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int type, VP8_COMP *cpi, MACROBLOCK *x) { int pt; /* near block/prev token context index */ int band; TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); band = type ? 0 : 1; t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[type][band][pt]; t->skip_eob_node = 0; ++x->coef_counts[type][band][pt][DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; /* 0 <-> all coeff data is zero */ *a = *l = pt; } static void stuff1st_order_buv(TOKENEXTRA **tp, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, VP8_COMP *cpi, MACROBLOCK *x) { int pt; /* near block/prev token context index */ TOKENEXTRA *t = *tp; /* store tokens starting here */ VP8_COMBINEENTROPYCONTEXTS(pt, *a, *l); t->Token = DCT_EOB_TOKEN; t->context_tree = cpi->common.fc.coef_probs[2][0][pt]; t->skip_eob_node = 0; ++x->coef_counts[2][0][pt][DCT_EOB_TOKEN]; ++t; *tp = t; pt = 0; /* 0 <-> all coeff data is zero */ *a = *l = pt; } void vp8_stuff_mb(VP8_COMP *cpi, MACROBLOCK *x, TOKENEXTRA **t) { MACROBLOCKD *xd = &x->e_mbd; ENTROPY_CONTEXT *A = (ENTROPY_CONTEXT *)xd->above_context; ENTROPY_CONTEXT *L = (ENTROPY_CONTEXT *)xd->left_context; int plane_type; int b; plane_type = 3; if ((xd->mode_info_context->mbmi.mode != B_PRED && xd->mode_info_context->mbmi.mode != SPLITMV)) { stuff2nd_order_b(t, A + vp8_block2above[24], L + vp8_block2left[24], cpi, x); plane_type = 0; } for (b = 0; b < 16; ++b) { stuff1st_order_b(t, A + vp8_block2above[b], L + vp8_block2left[b], plane_type, cpi, x); } for (b = 16; b < 24; ++b) { stuff1st_order_buv(t, A + vp8_block2above[b], L + vp8_block2left[b], cpi, x); } } void vp8_fix_contexts(MACROBLOCKD *x) { /* Clear entropy contexts for Y2 blocks */ if (x->mode_info_context->mbmi.mode != B_PRED && x->mode_info_context->mbmi.mode != SPLITMV) { memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES)); } else { memset(x->above_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); memset(x->left_context, 0, sizeof(ENTROPY_CONTEXT_PLANES) - 1); } } libvpx-1.8.2/vp8/encoder/tokenize.h000066400000000000000000000022701357355204000171700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_TOKENIZE_H_ #define VPX_VP8_ENCODER_TOKENIZE_H_ #include "vp8/common/entropy.h" #include "block.h" #ifdef __cplusplus extern "C" { #endif void vp8_tokenize_initialize(); typedef struct { short Token; short Extra; } TOKENVALUE; typedef struct { const vp8_prob *context_tree; short Extra; unsigned char Token; unsigned char skip_eob_node; } TOKENEXTRA; int rd_cost_mby(MACROBLOCKD *); extern const short *const vp8_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the * fields are not. */ extern const TOKENVALUE *const vp8_dct_value_tokens_ptr; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_TOKENIZE_H_ libvpx-1.8.2/vp8/encoder/treewriter.c000066400000000000000000000016721357355204000175340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "treewriter.h" static void cost(int *const C, vp8_tree T, const vp8_prob *const P, int i, int c) { const vp8_prob p = P[i >> 1]; do { const vp8_tree_index j = T[i]; const int d = c + vp8_cost_bit(p, i & 1); if (j <= 0) { C[-j] = d; } else { cost(C, T, P, j, d); } } while (++i & 1); } void vp8_cost_tokens(int *c, const vp8_prob *p, vp8_tree t) { cost(c, t, p, 0, 0); } void vp8_cost_tokens2(int *c, const vp8_prob *p, vp8_tree t, int start) { cost(c, t, p, start, 0); } libvpx-1.8.2/vp8/encoder/treewriter.h000066400000000000000000000060341357355204000175360ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP8_ENCODER_TREEWRITER_H_ #define VPX_VP8_ENCODER_TREEWRITER_H_ /* Trees map alphabets into huffman-like codes suitable for an arithmetic bit coder. Timothy S Murphy 11 October 2004 */ #include "./vpx_config.h" #include "vp8/common/treecoder.h" #include "boolhuff.h" /* for now */ #ifdef __cplusplus extern "C" { #endif typedef BOOL_CODER vp8_writer; #define vp8_write vp8_encode_bool #define vp8_write_literal vp8_encode_value #define vp8_write_bit(W, V) vp8_write(W, V, vp8_prob_half) #define vp8bc_write vp8bc_write_bool #define vp8bc_write_literal vp8bc_write_bits #define vp8bc_write_bit(W, V) vp8bc_write_bits(W, V, 1) /* Approximate length of an encoded bool in 256ths of a bit at given prob */ #define vp8_cost_zero(x) (vp8_prob_cost[x]) #define vp8_cost_one(x) vp8_cost_zero(vp8_complement(x)) #define vp8_cost_bit(x, b) vp8_cost_zero((b) ? vp8_complement(x) : (x)) /* VP8BC version is scaled by 2^20 rather than 2^8; see bool_coder.h */ /* Both of these return bits, not scaled bits. */ static INLINE unsigned int vp8_cost_branch(const unsigned int ct[2], vp8_prob p) { /* Imitate existing calculation */ return ((ct[0] * vp8_cost_zero(p)) + (ct[1] * vp8_cost_one(p))) >> 8; } /* Small functions to write explicit values and tokens, as well as estimate their lengths. */ static void vp8_treed_write(vp8_writer *const w, vp8_tree t, const vp8_prob *const p, int v, int n) { /* number of bits in v, assumed nonzero */ vp8_tree_index i = 0; do { const int b = (v >> --n) & 1; vp8_write(w, b, p[i >> 1]); i = t[i + b]; } while (n); } static INLINE void vp8_write_token(vp8_writer *const w, vp8_tree t, const vp8_prob *const p, vp8_token *const x) { vp8_treed_write(w, t, p, x->value, x->Len); } static int vp8_treed_cost(vp8_tree t, const vp8_prob *const p, int v, int n) { /* number of bits in v, assumed nonzero */ int c = 0; vp8_tree_index i = 0; do { const int b = (v >> --n) & 1; c += vp8_cost_bit(p[i >> 1], b); i = t[i + b]; } while (n); return c; } static INLINE int vp8_cost_token(vp8_tree t, const vp8_prob *const p, vp8_token *const x) { return vp8_treed_cost(t, p, x->value, x->Len); } /* Fill array of costs for all possible token values. */ void vp8_cost_tokens(int *c, const vp8_prob *, vp8_tree); void vp8_cost_tokens2(int *c, const vp8_prob *, vp8_tree, int); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP8_ENCODER_TREEWRITER_H_ libvpx-1.8.2/vp8/encoder/vp8_quantize.c000066400000000000000000000431001357355204000177650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_mem/vpx_mem.h" #include "onyx_int.h" #include "vp8/encoder/quantize.h" #include "vp8/common/quant_common.h" void vp8_fast_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; int x, y, z, sz; short *coeff_ptr = b->coeff; short *round_ptr = b->round; short *quant_ptr = b->quant_fast; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; eob = -1; for (i = 0; i < 16; ++i) { rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; sz = (z >> 31); /* sign of z */ x = (z ^ sz) - sz; /* x = abs(z) */ y = ((x + round_ptr[rc]) * quant_ptr[rc]) >> 16; /* quantize (x) */ x = (y ^ sz) - sz; /* get the sign back */ qcoeff_ptr[rc] = x; /* write to destination */ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { eob = i; /* last nonzero coeffs */ } } *d->eob = (char)(eob + 1); } void vp8_regular_quantize_b_c(BLOCK *b, BLOCKD *d) { int i, rc, eob; int zbin; int x, y, z, sz; short *zbin_boost_ptr = b->zrun_zbin_boost; short *coeff_ptr = b->coeff; short *zbin_ptr = b->zbin; short *round_ptr = b->round; short *quant_ptr = b->quant; short *quant_shift_ptr = b->quant_shift; short *qcoeff_ptr = d->qcoeff; short *dqcoeff_ptr = d->dqcoeff; short *dequant_ptr = d->dequant; short zbin_oq_value = b->zbin_extra; memset(qcoeff_ptr, 0, 32); memset(dqcoeff_ptr, 0, 32); eob = -1; for (i = 0; i < 16; ++i) { rc = vp8_default_zig_zag1d[i]; z = coeff_ptr[rc]; zbin = zbin_ptr[rc] + *zbin_boost_ptr + zbin_oq_value; zbin_boost_ptr++; sz = (z >> 31); /* sign of z */ x = (z ^ sz) - sz; /* x = abs(z) */ if (x >= zbin) { x += round_ptr[rc]; y = ((((x * quant_ptr[rc]) >> 16) + x) * quant_shift_ptr[rc]) >> 16; /* quantize (x) */ x = (y ^ sz) - sz; /* get the sign back */ qcoeff_ptr[rc] = x; /* write to destination */ dqcoeff_ptr[rc] = x * dequant_ptr[rc]; /* dequantized value */ if (y) { eob = i; /* last nonzero coeffs */ zbin_boost_ptr = b->zrun_zbin_boost; /* reset zero runlength */ } } } *d->eob = (char)(eob + 1); } void vp8_quantize_mby(MACROBLOCK *x) { int i; int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); for (i = 0; i < 16; ++i) x->quantize_b(&x->block[i], &x->e_mbd.block[i]); if (has_2nd_order) x->quantize_b(&x->block[24], &x->e_mbd.block[24]); } void vp8_quantize_mb(MACROBLOCK *x) { int i; int has_2nd_order = (x->e_mbd.mode_info_context->mbmi.mode != B_PRED && x->e_mbd.mode_info_context->mbmi.mode != SPLITMV); for (i = 0; i < 24 + has_2nd_order; ++i) { x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } } void vp8_quantize_mbuv(MACROBLOCK *x) { int i; for (i = 16; i < 24; ++i) x->quantize_b(&x->block[i], &x->e_mbd.block[i]); } static const int qrounding_factors[129] = { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 }; static const int qzbin_factors[129] = { 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 }; static const int qrounding_factors_y2[129] = { 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48, 48 }; static const int qzbin_factors_y2[129] = { 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80, 80 }; static void invert_quant(int improved_quant, short *quant, short *shift, short d) { if (improved_quant) { unsigned t; int l, m; t = d; for (l = 0; t > 1; ++l) t >>= 1; m = 1 + (1 << (16 + l)) / d; *quant = (short)(m - (1 << 16)); *shift = l; /* use multiplication and constant shift by 16 */ *shift = 1 << (16 - *shift); } else { *quant = (1 << 16) / d; *shift = 0; } } void vp8cx_init_quantizer(VP8_COMP *cpi) { int i; int quant_val; int Q; int zbin_boost[16] = { 0, 0, 8, 10, 12, 14, 16, 20, 24, 28, 32, 36, 40, 44, 44, 44 }; for (Q = 0; Q < QINDEX_RANGE; ++Q) { /* dc values */ quant_val = vp8_dc_quant(Q, cpi->common.y1dc_delta_q); cpi->Y1quant_fast[Q][0] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 0, cpi->Y1quant_shift[Q] + 0, quant_val); cpi->Y1zbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y1dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y1[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc2quant(Q, cpi->common.y2dc_delta_q); cpi->Y2quant_fast[Q][0] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 0, cpi->Y2quant_shift[Q] + 0, quant_val); cpi->Y2zbin[Q][0] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][0] = (qrounding_factors_y2[Q] * quant_val) >> 7; cpi->common.Y2dequant[Q][0] = quant_val; cpi->zrun_zbin_boost_y2[Q][0] = (quant_val * zbin_boost[0]) >> 7; quant_val = vp8_dc_uv_quant(Q, cpi->common.uvdc_delta_q); cpi->UVquant_fast[Q][0] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 0, cpi->UVquant_shift[Q] + 0, quant_val); cpi->UVzbin[Q][0] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->UVround[Q][0] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.UVdequant[Q][0] = quant_val; cpi->zrun_zbin_boost_uv[Q][0] = (quant_val * zbin_boost[0]) >> 7; /* all the ac values = ; */ quant_val = vp8_ac_yquant(Q); cpi->Y1quant_fast[Q][1] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y1quant[Q] + 1, cpi->Y1quant_shift[Q] + 1, quant_val); cpi->Y1zbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->Y1round[Q][1] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.Y1dequant[Q][1] = quant_val; cpi->zrun_zbin_boost_y1[Q][1] = (quant_val * zbin_boost[1]) >> 7; quant_val = vp8_ac2quant(Q, cpi->common.y2ac_delta_q); cpi->Y2quant_fast[Q][1] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->Y2quant[Q] + 1, cpi->Y2quant_shift[Q] + 1, quant_val); cpi->Y2zbin[Q][1] = ((qzbin_factors_y2[Q] * quant_val) + 64) >> 7; cpi->Y2round[Q][1] = (qrounding_factors_y2[Q] * quant_val) >> 7; cpi->common.Y2dequant[Q][1] = quant_val; cpi->zrun_zbin_boost_y2[Q][1] = (quant_val * zbin_boost[1]) >> 7; quant_val = vp8_ac_uv_quant(Q, cpi->common.uvac_delta_q); cpi->UVquant_fast[Q][1] = (1 << 16) / quant_val; invert_quant(cpi->sf.improved_quant, cpi->UVquant[Q] + 1, cpi->UVquant_shift[Q] + 1, quant_val); cpi->UVzbin[Q][1] = ((qzbin_factors[Q] * quant_val) + 64) >> 7; cpi->UVround[Q][1] = (qrounding_factors[Q] * quant_val) >> 7; cpi->common.UVdequant[Q][1] = quant_val; cpi->zrun_zbin_boost_uv[Q][1] = (quant_val * zbin_boost[1]) >> 7; for (i = 2; i < 16; ++i) { cpi->Y1quant_fast[Q][i] = cpi->Y1quant_fast[Q][1]; cpi->Y1quant[Q][i] = cpi->Y1quant[Q][1]; cpi->Y1quant_shift[Q][i] = cpi->Y1quant_shift[Q][1]; cpi->Y1zbin[Q][i] = cpi->Y1zbin[Q][1]; cpi->Y1round[Q][i] = cpi->Y1round[Q][1]; cpi->zrun_zbin_boost_y1[Q][i] = (cpi->common.Y1dequant[Q][1] * zbin_boost[i]) >> 7; cpi->Y2quant_fast[Q][i] = cpi->Y2quant_fast[Q][1]; cpi->Y2quant[Q][i] = cpi->Y2quant[Q][1]; cpi->Y2quant_shift[Q][i] = cpi->Y2quant_shift[Q][1]; cpi->Y2zbin[Q][i] = cpi->Y2zbin[Q][1]; cpi->Y2round[Q][i] = cpi->Y2round[Q][1]; cpi->zrun_zbin_boost_y2[Q][i] = (cpi->common.Y2dequant[Q][1] * zbin_boost[i]) >> 7; cpi->UVquant_fast[Q][i] = cpi->UVquant_fast[Q][1]; cpi->UVquant[Q][i] = cpi->UVquant[Q][1]; cpi->UVquant_shift[Q][i] = cpi->UVquant_shift[Q][1]; cpi->UVzbin[Q][i] = cpi->UVzbin[Q][1]; cpi->UVround[Q][i] = cpi->UVround[Q][1]; cpi->zrun_zbin_boost_uv[Q][i] = (cpi->common.UVdequant[Q][1] * zbin_boost[i]) >> 7; } } } #define ZBIN_EXTRA_Y \ ((cpi->common.Y1dequant[QIndex][1] * \ (x->zbin_over_quant + x->zbin_mode_boost + x->act_zbin_adj)) >> \ 7) #define ZBIN_EXTRA_UV \ ((cpi->common.UVdequant[QIndex][1] * \ (x->zbin_over_quant + x->zbin_mode_boost + x->act_zbin_adj)) >> \ 7) #define ZBIN_EXTRA_Y2 \ ((cpi->common.Y2dequant[QIndex][1] * \ ((x->zbin_over_quant / 2) + x->zbin_mode_boost + x->act_zbin_adj)) >> \ 7) void vp8cx_mb_init_quantizer(VP8_COMP *cpi, MACROBLOCK *x, int ok_to_skip) { int i; int QIndex; MACROBLOCKD *xd = &x->e_mbd; int zbin_extra; /* Select the baseline MB Q index. */ if (xd->segmentation_enabled) { /* Abs Value */ if (xd->mb_segement_abs_delta == SEGMENT_ABSDATA) { QIndex = xd->segment_feature_data[MB_LVL_ALT_Q] [xd->mode_info_context->mbmi.segment_id]; /* Delta Value */ } else { QIndex = cpi->common.base_qindex + xd->segment_feature_data[MB_LVL_ALT_Q] [xd->mode_info_context->mbmi.segment_id]; /* Clamp to valid range */ QIndex = (QIndex >= 0) ? ((QIndex <= MAXQ) ? QIndex : MAXQ) : 0; } } else { QIndex = cpi->common.base_qindex; } /* This initialization should be called at least once. Use ok_to_skip to * decide if it is ok to skip. * Before encoding a frame, this function is always called with ok_to_skip * =0, which means no skiping of calculations. The "last" values are * initialized at that time. */ if (!ok_to_skip || QIndex != x->q_index) { xd->dequant_y1_dc[0] = 1; xd->dequant_y1[0] = cpi->common.Y1dequant[QIndex][0]; xd->dequant_y2[0] = cpi->common.Y2dequant[QIndex][0]; xd->dequant_uv[0] = cpi->common.UVdequant[QIndex][0]; for (i = 1; i < 16; ++i) { xd->dequant_y1_dc[i] = xd->dequant_y1[i] = cpi->common.Y1dequant[QIndex][1]; xd->dequant_y2[i] = cpi->common.Y2dequant[QIndex][1]; xd->dequant_uv[i] = cpi->common.UVdequant[QIndex][1]; } #if 1 /*TODO: Remove dequant from BLOCKD. This is a temporary solution until * the quantizer code uses a passed in pointer to the dequant constants. * This will also require modifications to the x86 and neon assembly. * */ for (i = 0; i < 16; ++i) x->e_mbd.block[i].dequant = xd->dequant_y1; for (i = 16; i < 24; ++i) x->e_mbd.block[i].dequant = xd->dequant_uv; x->e_mbd.block[24].dequant = xd->dequant_y2; #endif /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; ++i) { x->block[i].quant = cpi->Y1quant[QIndex]; x->block[i].quant_fast = cpi->Y1quant_fast[QIndex]; x->block[i].quant_shift = cpi->Y1quant_shift[QIndex]; x->block[i].zbin = cpi->Y1zbin[QIndex]; x->block[i].round = cpi->Y1round[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_y1[QIndex]; x->block[i].zbin_extra = (short)zbin_extra; } /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; ++i) { x->block[i].quant = cpi->UVquant[QIndex]; x->block[i].quant_fast = cpi->UVquant_fast[QIndex]; x->block[i].quant_shift = cpi->UVquant_shift[QIndex]; x->block[i].zbin = cpi->UVzbin[QIndex]; x->block[i].round = cpi->UVround[QIndex]; x->block[i].zrun_zbin_boost = cpi->zrun_zbin_boost_uv[QIndex]; x->block[i].zbin_extra = (short)zbin_extra; } /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].quant_fast = cpi->Y2quant_fast[QIndex]; x->block[24].quant = cpi->Y2quant[QIndex]; x->block[24].quant_shift = cpi->Y2quant_shift[QIndex]; x->block[24].zbin = cpi->Y2zbin[QIndex]; x->block[24].round = cpi->Y2round[QIndex]; x->block[24].zrun_zbin_boost = cpi->zrun_zbin_boost_y2[QIndex]; x->block[24].zbin_extra = (short)zbin_extra; /* save this macroblock QIndex for vp8_update_zbin_extra() */ x->q_index = QIndex; x->last_zbin_over_quant = x->zbin_over_quant; x->last_zbin_mode_boost = x->zbin_mode_boost; x->last_act_zbin_adj = x->act_zbin_adj; } else if (x->last_zbin_over_quant != x->zbin_over_quant || x->last_zbin_mode_boost != x->zbin_mode_boost || x->last_act_zbin_adj != x->act_zbin_adj) { /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; ++i) x->block[i].zbin_extra = (short)zbin_extra; /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; ++i) x->block[i].zbin_extra = (short)zbin_extra; /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].zbin_extra = (short)zbin_extra; x->last_zbin_over_quant = x->zbin_over_quant; x->last_zbin_mode_boost = x->zbin_mode_boost; x->last_act_zbin_adj = x->act_zbin_adj; } } void vp8_update_zbin_extra(VP8_COMP *cpi, MACROBLOCK *x) { int i; int QIndex = x->q_index; int zbin_extra; /* Y */ zbin_extra = ZBIN_EXTRA_Y; for (i = 0; i < 16; ++i) x->block[i].zbin_extra = (short)zbin_extra; /* UV */ zbin_extra = ZBIN_EXTRA_UV; for (i = 16; i < 24; ++i) x->block[i].zbin_extra = (short)zbin_extra; /* Y2 */ zbin_extra = ZBIN_EXTRA_Y2; x->block[24].zbin_extra = (short)zbin_extra; } #undef ZBIN_EXTRA_Y #undef ZBIN_EXTRA_UV #undef ZBIN_EXTRA_Y2 void vp8cx_frame_init_quantizer(VP8_COMP *cpi) { /* Clear Zbin mode boost for default case */ cpi->mb.zbin_mode_boost = 0; /* MB level quantizer setup */ vp8cx_mb_init_quantizer(cpi, &cpi->mb, 0); } void vp8_set_quantizer(struct VP8_COMP *cpi, int Q) { VP8_COMMON *cm = &cpi->common; MACROBLOCKD *mbd = &cpi->mb.e_mbd; int update = 0; int new_delta_q; int new_uv_delta_q; cm->base_qindex = Q; /* if any of the delta_q values are changing update flag has to be set */ /* currently only y2dc_delta_q may change */ cm->y1dc_delta_q = 0; cm->y2ac_delta_q = 0; if (Q < 4) { new_delta_q = 4 - Q; } else { new_delta_q = 0; } update |= cm->y2dc_delta_q != new_delta_q; cm->y2dc_delta_q = new_delta_q; new_uv_delta_q = 0; // For screen content, lower the q value for UV channel. For now, select // conservative delta; same delta for dc and ac, and decrease it with lower // Q, and set to 0 below some threshold. May want to condition this in // future on the variance/energy in UV channel. if (cpi->oxcf.screen_content_mode && Q > 40) { new_uv_delta_q = -(int)(0.15 * Q); // Check range: magnitude of delta is 4 bits. if (new_uv_delta_q < -15) { new_uv_delta_q = -15; } } update |= cm->uvdc_delta_q != new_uv_delta_q; cm->uvdc_delta_q = new_uv_delta_q; cm->uvac_delta_q = new_uv_delta_q; /* Set Segment specific quatizers */ mbd->segment_feature_data[MB_LVL_ALT_Q][0] = cpi->segment_feature_data[MB_LVL_ALT_Q][0]; mbd->segment_feature_data[MB_LVL_ALT_Q][1] = cpi->segment_feature_data[MB_LVL_ALT_Q][1]; mbd->segment_feature_data[MB_LVL_ALT_Q][2] = cpi->segment_feature_data[MB_LVL_ALT_Q][2]; mbd->segment_feature_data[MB_LVL_ALT_Q][3] = cpi->segment_feature_data[MB_LVL_ALT_Q][3]; /* quantizer has to be reinitialized for any delta_q changes */ if (update) vp8cx_init_quantizer(cpi); } libvpx-1.8.2/vp8/encoder/x86/000077500000000000000000000000001357355204000156135ustar00rootroot00000000000000libvpx-1.8.2/vp8/encoder/x86/block_error_sse2.asm000066400000000000000000000106371357355204000215630ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;int vp8_block_error_sse2(short *coeff_ptr, short *dcoef_ptr) global sym(vp8_block_error_sse2) PRIVATE sym(vp8_block_error_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 push rsi push rdi ; end prologue mov rsi, arg(0) ;coeff_ptr mov rdi, arg(1) ;dcoef_ptr movdqa xmm0, [rsi] movdqa xmm1, [rdi] movdqa xmm2, [rsi+16] movdqa xmm3, [rdi+16] psubw xmm0, xmm1 psubw xmm2, xmm3 pmaddwd xmm0, xmm0 pmaddwd xmm2, xmm2 paddd xmm0, xmm2 pxor xmm5, xmm5 movdqa xmm1, xmm0 punpckldq xmm0, xmm5 punpckhdq xmm1, xmm5 paddd xmm0, xmm1 movdqa xmm1, xmm0 psrldq xmm0, 8 paddd xmm0, xmm1 movq rax, xmm0 pop rdi pop rsi ; begin epilog UNSHADOW_ARGS pop rbp ret ;int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); global sym(vp8_mbblock_error_sse2_impl) PRIVATE sym(vp8_mbblock_error_sse2_impl): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 SAVE_XMM 6 push rsi push rdi ; end prolog mov rsi, arg(0) ;coeff_ptr pxor xmm6, xmm6 mov rdi, arg(1) ;dcoef_ptr pxor xmm4, xmm4 movd xmm5, dword ptr arg(2) ;dc por xmm5, xmm4 pcmpeqw xmm5, xmm6 mov rcx, 16 .mberror_loop: movdqa xmm0, [rsi] movdqa xmm1, [rdi] movdqa xmm2, [rsi+16] movdqa xmm3, [rdi+16] psubw xmm2, xmm3 pmaddwd xmm2, xmm2 psubw xmm0, xmm1 pand xmm0, xmm5 pmaddwd xmm0, xmm0 add rsi, 32 add rdi, 32 sub rcx, 1 paddd xmm4, xmm2 paddd xmm4, xmm0 jnz .mberror_loop movdqa xmm0, xmm4 punpckldq xmm0, xmm6 punpckhdq xmm4, xmm6 paddd xmm0, xmm4 movdqa xmm1, xmm0 psrldq xmm0, 8 paddd xmm0, xmm1 movq rax, xmm0 pop rdi pop rsi ; begin epilog RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); global sym(vp8_mbuverror_sse2_impl) PRIVATE sym(vp8_mbuverror_sse2_impl): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 2 push rsi push rdi ; end prolog mov rsi, arg(0) ;s_ptr mov rdi, arg(1) ;d_ptr mov rcx, 16 pxor xmm3, xmm3 .mbuverror_loop: movdqa xmm1, [rsi] movdqa xmm2, [rdi] psubw xmm1, xmm2 pmaddwd xmm1, xmm1 paddd xmm3, xmm1 add rsi, 16 add rdi, 16 dec rcx jnz .mbuverror_loop pxor xmm0, xmm0 movdqa xmm1, xmm3 movdqa xmm2, xmm1 punpckldq xmm1, xmm0 punpckhdq xmm2, xmm0 paddd xmm1, xmm2 movdqa xmm2, xmm1 psrldq xmm1, 8 paddd xmm1, xmm2 movq rax, xmm1 pop rdi pop rsi ; begin epilog UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vp8/encoder/x86/copy_sse2.asm000066400000000000000000000055741357355204000202360ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp8_copy32xn_sse2( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *dst_ptr, ; int dst_stride, ; int height); global sym(vp8_copy32xn_sse2) PRIVATE sym(vp8_copy32xn_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;dst_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;dst_stride movsxd rcx, dword ptr arg(4) ;height .block_copy_sse2_loopx4: movdqu xmm0, XMMWORD PTR [rsi] movdqu xmm1, XMMWORD PTR [rsi + 16] movdqu xmm2, XMMWORD PTR [rsi + rax] movdqu xmm3, XMMWORD PTR [rsi + rax + 16] lea rsi, [rsi+rax*2] movdqu xmm4, XMMWORD PTR [rsi] movdqu xmm5, XMMWORD PTR [rsi + 16] movdqu xmm6, XMMWORD PTR [rsi + rax] movdqu xmm7, XMMWORD PTR [rsi + rax + 16] lea rsi, [rsi+rax*2] movdqa XMMWORD PTR [rdi], xmm0 movdqa XMMWORD PTR [rdi + 16], xmm1 movdqa XMMWORD PTR [rdi + rdx], xmm2 movdqa XMMWORD PTR [rdi + rdx + 16], xmm3 lea rdi, [rdi+rdx*2] movdqa XMMWORD PTR [rdi], xmm4 movdqa XMMWORD PTR [rdi + 16], xmm5 movdqa XMMWORD PTR [rdi + rdx], xmm6 movdqa XMMWORD PTR [rdi + rdx + 16], xmm7 lea rdi, [rdi+rdx*2] sub rcx, 4 cmp rcx, 4 jge .block_copy_sse2_loopx4 cmp rcx, 0 je .copy_is_done .block_copy_sse2_loop: movdqu xmm0, XMMWORD PTR [rsi] movdqu xmm1, XMMWORD PTR [rsi + 16] lea rsi, [rsi+rax] movdqa XMMWORD PTR [rdi], xmm0 movdqa XMMWORD PTR [rdi + 16], xmm1 lea rdi, [rdi+rdx] sub rcx, 1 jne .block_copy_sse2_loop .copy_is_done: ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vp8/encoder/x86/copy_sse3.asm000066400000000000000000000107721357355204000202330ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro STACK_FRAME_CREATE_X3 0 %if ABI_IS_32BIT %define src_ptr rsi %define src_stride rax %define ref_ptr rdi %define ref_stride rdx %define end_ptr rcx %define ret_var rbx %define result_ptr arg(4) %define max_sad arg(4) %define height dword ptr arg(4) push rbp mov rbp, rsp push rsi push rdi push rbx mov rsi, arg(0) ; src_ptr mov rdi, arg(2) ; ref_ptr movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx %define ref_ptr r8 %define ref_stride r9 %define end_ptr r10 %define ret_var r11 %define result_ptr [rsp+xmm_stack_space+8+4*8] %define max_sad [rsp+xmm_stack_space+8+4*8] %define height dword ptr [rsp+xmm_stack_space+8+4*8] %else %define src_ptr rdi %define src_stride rsi %define ref_ptr rdx %define ref_stride rcx %define end_ptr r9 %define ret_var r10 %define result_ptr r8 %define max_sad r8 %define height r8 %endif %endif %endmacro %macro STACK_FRAME_DESTROY_X3 0 %define src_ptr %define src_stride %define ref_ptr %define ref_stride %define end_ptr %define ret_var %define result_ptr %define max_sad %define height %if ABI_IS_32BIT pop rbx pop rdi pop rsi pop rbp %else %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif ret %endmacro SECTION .text ;void vp8_copy32xn_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *dst_ptr, ; int dst_stride, ; int height); global sym(vp8_copy32xn_sse3) PRIVATE sym(vp8_copy32xn_sse3): STACK_FRAME_CREATE_X3 .block_copy_sse3_loopx4: lea end_ptr, [src_ptr+src_stride*2] movdqu xmm0, XMMWORD PTR [src_ptr] movdqu xmm1, XMMWORD PTR [src_ptr + 16] movdqu xmm2, XMMWORD PTR [src_ptr + src_stride] movdqu xmm3, XMMWORD PTR [src_ptr + src_stride + 16] movdqu xmm4, XMMWORD PTR [end_ptr] movdqu xmm5, XMMWORD PTR [end_ptr + 16] movdqu xmm6, XMMWORD PTR [end_ptr + src_stride] movdqu xmm7, XMMWORD PTR [end_ptr + src_stride + 16] lea src_ptr, [src_ptr+src_stride*4] lea end_ptr, [ref_ptr+ref_stride*2] movdqa XMMWORD PTR [ref_ptr], xmm0 movdqa XMMWORD PTR [ref_ptr + 16], xmm1 movdqa XMMWORD PTR [ref_ptr + ref_stride], xmm2 movdqa XMMWORD PTR [ref_ptr + ref_stride + 16], xmm3 movdqa XMMWORD PTR [end_ptr], xmm4 movdqa XMMWORD PTR [end_ptr + 16], xmm5 movdqa XMMWORD PTR [end_ptr + ref_stride], xmm6 movdqa XMMWORD PTR [end_ptr + ref_stride + 16], xmm7 lea ref_ptr, [ref_ptr+ref_stride*4] sub height, 4 cmp height, 4 jge .block_copy_sse3_loopx4 ;Check to see if there is more rows need to be copied. cmp height, 0 je .copy_is_done .block_copy_sse3_loop: movdqu xmm0, XMMWORD PTR [src_ptr] movdqu xmm1, XMMWORD PTR [src_ptr + 16] lea src_ptr, [src_ptr+src_stride] movdqa XMMWORD PTR [ref_ptr], xmm0 movdqa XMMWORD PTR [ref_ptr + 16], xmm1 lea ref_ptr, [ref_ptr+ref_stride] sub height, 1 jne .block_copy_sse3_loop .copy_is_done: STACK_FRAME_DESTROY_X3 libvpx-1.8.2/vp8/encoder/x86/dct_sse2.asm000066400000000000000000000354451357355204000200360ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro STACK_FRAME_CREATE 0 %if ABI_IS_32BIT %define input rsi %define output rdi %define pitch rax push rbp mov rbp, rsp GET_GOT rbx push rsi push rdi ; end prolog mov rsi, arg(0) mov rdi, arg(1) movsxd rax, dword ptr arg(2) lea rcx, [rsi + rax*2] %else %if LIBVPX_YASM_WIN64 %define input rcx %define output rdx %define pitch r8 SAVE_XMM 7, u %else %define input rdi %define output rsi %define pitch rdx %endif %endif %endmacro %macro STACK_FRAME_DESTROY 0 %define input %define output %define pitch %if ABI_IS_32BIT pop rdi pop rsi RESTORE_GOT pop rbp %else %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif ret %endmacro SECTION .text ;void vp8_short_fdct4x4_sse2(short *input, short *output, int pitch) global sym(vp8_short_fdct4x4_sse2) PRIVATE sym(vp8_short_fdct4x4_sse2): STACK_FRAME_CREATE movq xmm0, MMWORD PTR[input ] ;03 02 01 00 movq xmm2, MMWORD PTR[input+ pitch] ;13 12 11 10 lea input, [input+2*pitch] movq xmm1, MMWORD PTR[input ] ;23 22 21 20 movq xmm3, MMWORD PTR[input+ pitch] ;33 32 31 30 punpcklqdq xmm0, xmm2 ;13 12 11 10 03 02 01 00 punpcklqdq xmm1, xmm3 ;33 32 31 30 23 22 21 20 movdqa xmm2, xmm0 punpckldq xmm0, xmm1 ;23 22 03 02 21 20 01 00 punpckhdq xmm2, xmm1 ;33 32 13 12 31 30 11 10 movdqa xmm1, xmm0 punpckldq xmm0, xmm2 ;31 21 30 20 11 10 01 00 pshufhw xmm1, xmm1, 0b1h ;22 23 02 03 xx xx xx xx pshufhw xmm2, xmm2, 0b1h ;32 33 12 13 xx xx xx xx punpckhdq xmm1, xmm2 ;32 33 22 23 12 13 02 03 movdqa xmm3, xmm0 paddw xmm0, xmm1 ;b1 a1 b1 a1 b1 a1 b1 a1 psubw xmm3, xmm1 ;c1 d1 c1 d1 c1 d1 c1 d1 psllw xmm0, 3 ;b1 <<= 3 a1 <<= 3 psllw xmm3, 3 ;c1 <<= 3 d1 <<= 3 movdqa xmm1, xmm0 pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 movdqa xmm4, xmm3 pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)];d1*2217 - c1*5352 paddd xmm3, XMMWORD PTR[GLOBAL(_14500)] paddd xmm4, XMMWORD PTR[GLOBAL(_7500)] psrad xmm3, 12 ;(c1 * 2217 + d1 * 5352 + 14500)>>12 psrad xmm4, 12 ;(d1 * 2217 - c1 * 5352 + 7500)>>12 packssdw xmm0, xmm1 ;op[2] op[0] packssdw xmm3, xmm4 ;op[3] op[1] ; 23 22 21 20 03 02 01 00 ; ; 33 32 31 30 13 12 11 10 ; movdqa xmm2, xmm0 punpcklqdq xmm0, xmm3 ;13 12 11 10 03 02 01 00 punpckhqdq xmm2, xmm3 ;23 22 21 20 33 32 31 30 movdqa xmm3, xmm0 punpcklwd xmm0, xmm2 ;32 30 22 20 12 10 02 00 punpckhwd xmm3, xmm2 ;33 31 23 21 13 11 03 01 movdqa xmm2, xmm0 punpcklwd xmm0, xmm3 ;13 12 11 10 03 02 01 00 punpckhwd xmm2, xmm3 ;33 32 31 30 23 22 21 20 movdqa xmm5, XMMWORD PTR[GLOBAL(_7)] pshufd xmm2, xmm2, 04eh movdqa xmm3, xmm0 paddw xmm0, xmm2 ;b1 b1 b1 b1 a1 a1 a1 a1 psubw xmm3, xmm2 ;c1 c1 c1 c1 d1 d1 d1 d1 pshufd xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 b1 a1 a1 movdqa xmm2, xmm3 ;save d1 for compare pshufd xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 c1 d1 d1 pshuflw xmm0, xmm0, 0d8h ;b1 b1 a1 a1 b1 a1 b1 a1 pshuflw xmm3, xmm3, 0d8h ;c1 c1 d1 d1 c1 d1 c1 d1 pshufhw xmm0, xmm0, 0d8h ;b1 a1 b1 a1 b1 a1 b1 a1 pshufhw xmm3, xmm3, 0d8h ;c1 d1 c1 d1 c1 d1 c1 d1 movdqa xmm1, xmm0 pmaddwd xmm0, XMMWORD PTR[GLOBAL(_mult_add)] ;a1 + b1 pmaddwd xmm1, XMMWORD PTR[GLOBAL(_mult_sub)] ;a1 - b1 pxor xmm4, xmm4 ;zero out for compare paddd xmm0, xmm5 paddd xmm1, xmm5 pcmpeqw xmm2, xmm4 psrad xmm0, 4 ;(a1 + b1 + 7)>>4 psrad xmm1, 4 ;(a1 - b1 + 7)>>4 pandn xmm2, XMMWORD PTR[GLOBAL(_cmp_mask)] ;clear upper, ;and keep bit 0 of lower movdqa xmm4, xmm3 pmaddwd xmm3, XMMWORD PTR[GLOBAL(_5352_2217)] ;c1*2217 + d1*5352 pmaddwd xmm4, XMMWORD PTR[GLOBAL(_2217_neg5352)] ;d1*2217 - c1*5352 paddd xmm3, XMMWORD PTR[GLOBAL(_12000)] paddd xmm4, XMMWORD PTR[GLOBAL(_51000)] packssdw xmm0, xmm1 ;op[8] op[0] psrad xmm3, 16 ;(c1 * 2217 + d1 * 5352 + 12000)>>16 psrad xmm4, 16 ;(d1 * 2217 - c1 * 5352 + 51000)>>16 packssdw xmm3, xmm4 ;op[12] op[4] movdqa xmm1, xmm0 paddw xmm3, xmm2 ;op[4] += (d1!=0) punpcklqdq xmm0, xmm3 ;op[4] op[0] punpckhqdq xmm1, xmm3 ;op[12] op[8] movdqa XMMWORD PTR[output + 0], xmm0 movdqa XMMWORD PTR[output + 16], xmm1 STACK_FRAME_DESTROY ;void vp8_short_fdct8x4_sse2(short *input, short *output, int pitch) global sym(vp8_short_fdct8x4_sse2) PRIVATE sym(vp8_short_fdct8x4_sse2): STACK_FRAME_CREATE ; read the input data movdqa xmm0, [input ] movdqa xmm2, [input+ pitch] lea input, [input+2*pitch] movdqa xmm4, [input ] movdqa xmm3, [input+ pitch] ; transpose for the first stage movdqa xmm1, xmm0 ; 00 01 02 03 04 05 06 07 movdqa xmm5, xmm4 ; 20 21 22 23 24 25 26 27 punpcklwd xmm0, xmm2 ; 00 10 01 11 02 12 03 13 punpckhwd xmm1, xmm2 ; 04 14 05 15 06 16 07 17 punpcklwd xmm4, xmm3 ; 20 30 21 31 22 32 23 33 punpckhwd xmm5, xmm3 ; 24 34 25 35 26 36 27 37 movdqa xmm2, xmm0 ; 00 10 01 11 02 12 03 13 punpckldq xmm0, xmm4 ; 00 10 20 30 01 11 21 31 punpckhdq xmm2, xmm4 ; 02 12 22 32 03 13 23 33 movdqa xmm4, xmm1 ; 04 14 05 15 06 16 07 17 punpckldq xmm4, xmm5 ; 04 14 24 34 05 15 25 35 punpckhdq xmm1, xmm5 ; 06 16 26 36 07 17 27 37 movdqa xmm3, xmm2 ; 02 12 22 32 03 13 23 33 punpckhqdq xmm3, xmm1 ; 03 13 23 33 07 17 27 37 punpcklqdq xmm2, xmm1 ; 02 12 22 32 06 16 26 36 movdqa xmm1, xmm0 ; 00 10 20 30 01 11 21 31 punpcklqdq xmm0, xmm4 ; 00 10 20 30 04 14 24 34 punpckhqdq xmm1, xmm4 ; 01 11 21 32 05 15 25 35 ; xmm0 0 ; xmm1 1 ; xmm2 2 ; xmm3 3 ; first stage movdqa xmm5, xmm0 movdqa xmm4, xmm1 paddw xmm0, xmm3 ; a1 = 0 + 3 paddw xmm1, xmm2 ; b1 = 1 + 2 psubw xmm4, xmm2 ; c1 = 1 - 2 psubw xmm5, xmm3 ; d1 = 0 - 3 psllw xmm5, 3 psllw xmm4, 3 psllw xmm0, 3 psllw xmm1, 3 ; output 0 and 2 movdqa xmm2, xmm0 ; a1 paddw xmm0, xmm1 ; op[0] = a1 + b1 psubw xmm2, xmm1 ; op[2] = a1 - b1 ; output 1 and 3 ; interleave c1, d1 movdqa xmm1, xmm5 ; d1 punpcklwd xmm1, xmm4 ; c1 d1 punpckhwd xmm5, xmm4 ; c1 d1 movdqa xmm3, xmm1 movdqa xmm4, xmm5 pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 paddd xmm1, XMMWORD PTR[GLOBAL(_14500)] paddd xmm4, XMMWORD PTR[GLOBAL(_14500)] paddd xmm3, XMMWORD PTR[GLOBAL(_7500)] paddd xmm5, XMMWORD PTR[GLOBAL(_7500)] psrad xmm1, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 psrad xmm4, 12 ; (c1 * 2217 + d1 * 5352 + 14500)>>12 psrad xmm3, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 psrad xmm5, 12 ; (d1 * 2217 - c1 * 5352 + 7500)>>12 packssdw xmm1, xmm4 ; op[1] packssdw xmm3, xmm5 ; op[3] ; done with vertical ; transpose for the second stage movdqa xmm4, xmm0 ; 00 10 20 30 04 14 24 34 movdqa xmm5, xmm2 ; 02 12 22 32 06 16 26 36 punpcklwd xmm0, xmm1 ; 00 01 10 11 20 21 30 31 punpckhwd xmm4, xmm1 ; 04 05 14 15 24 25 34 35 punpcklwd xmm2, xmm3 ; 02 03 12 13 22 23 32 33 punpckhwd xmm5, xmm3 ; 06 07 16 17 26 27 36 37 movdqa xmm1, xmm0 ; 00 01 10 11 20 21 30 31 punpckldq xmm0, xmm2 ; 00 01 02 03 10 11 12 13 punpckhdq xmm1, xmm2 ; 20 21 22 23 30 31 32 33 movdqa xmm2, xmm4 ; 04 05 14 15 24 25 34 35 punpckldq xmm2, xmm5 ; 04 05 06 07 14 15 16 17 punpckhdq xmm4, xmm5 ; 24 25 26 27 34 35 36 37 movdqa xmm3, xmm1 ; 20 21 22 23 30 31 32 33 punpckhqdq xmm3, xmm4 ; 30 31 32 33 34 35 36 37 punpcklqdq xmm1, xmm4 ; 20 21 22 23 24 25 26 27 movdqa xmm4, xmm0 ; 00 01 02 03 10 11 12 13 punpcklqdq xmm0, xmm2 ; 00 01 02 03 04 05 06 07 punpckhqdq xmm4, xmm2 ; 10 11 12 13 14 15 16 17 ; xmm0 0 ; xmm1 4 ; xmm2 1 ; xmm3 3 movdqa xmm5, xmm0 movdqa xmm2, xmm1 paddw xmm0, xmm3 ; a1 = 0 + 3 paddw xmm1, xmm4 ; b1 = 1 + 2 psubw xmm4, xmm2 ; c1 = 1 - 2 psubw xmm5, xmm3 ; d1 = 0 - 3 pxor xmm6, xmm6 ; zero out for compare pcmpeqw xmm6, xmm5 ; d1 != 0 pandn xmm6, XMMWORD PTR[GLOBAL(_cmp_mask8x4)] ; clear upper, ; and keep bit 0 of lower ; output 0 and 2 movdqa xmm2, xmm0 ; a1 paddw xmm0, xmm1 ; a1 + b1 psubw xmm2, xmm1 ; a1 - b1 paddw xmm0, XMMWORD PTR[GLOBAL(_7w)] paddw xmm2, XMMWORD PTR[GLOBAL(_7w)] psraw xmm0, 4 ; op[0] = (a1 + b1 + 7)>>4 psraw xmm2, 4 ; op[8] = (a1 - b1 + 7)>>4 ; output 1 and 3 ; interleave c1, d1 movdqa xmm1, xmm5 ; d1 punpcklwd xmm1, xmm4 ; c1 d1 punpckhwd xmm5, xmm4 ; c1 d1 movdqa xmm3, xmm1 movdqa xmm4, xmm5 pmaddwd xmm1, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 pmaddwd xmm4, XMMWORD PTR[GLOBAL (_5352_2217)] ; c1*2217 + d1*5352 pmaddwd xmm3, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 pmaddwd xmm5, XMMWORD PTR[GLOBAL(_2217_neg5352)] ; d1*2217 - c1*5352 paddd xmm1, XMMWORD PTR[GLOBAL(_12000)] paddd xmm4, XMMWORD PTR[GLOBAL(_12000)] paddd xmm3, XMMWORD PTR[GLOBAL(_51000)] paddd xmm5, XMMWORD PTR[GLOBAL(_51000)] psrad xmm1, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 psrad xmm4, 16 ; (c1 * 2217 + d1 * 5352 + 14500)>>16 psrad xmm3, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 psrad xmm5, 16 ; (d1 * 2217 - c1 * 5352 + 7500)>>16 packssdw xmm1, xmm4 ; op[4] packssdw xmm3, xmm5 ; op[12] paddw xmm1, xmm6 ; op[4] += (d1!=0) movdqa xmm4, xmm0 movdqa xmm5, xmm2 punpcklqdq xmm0, xmm1 punpckhqdq xmm4, xmm1 punpcklqdq xmm2, xmm3 punpckhqdq xmm5, xmm3 movdqa XMMWORD PTR[output + 0 ], xmm0 movdqa XMMWORD PTR[output + 16], xmm2 movdqa XMMWORD PTR[output + 32], xmm4 movdqa XMMWORD PTR[output + 48], xmm5 STACK_FRAME_DESTROY SECTION_RODATA align 16 _5352_2217: dw 5352 dw 2217 dw 5352 dw 2217 dw 5352 dw 2217 dw 5352 dw 2217 align 16 _2217_neg5352: dw 2217 dw -5352 dw 2217 dw -5352 dw 2217 dw -5352 dw 2217 dw -5352 align 16 _mult_add: times 8 dw 1 align 16 _cmp_mask: times 4 dw 1 times 4 dw 0 align 16 _cmp_mask8x4: times 8 dw 1 align 16 _mult_sub: dw 1 dw -1 dw 1 dw -1 dw 1 dw -1 dw 1 dw -1 align 16 _7: times 4 dd 7 align 16 _7w: times 8 dw 7 align 16 _14500: times 4 dd 14500 align 16 _7500: times 4 dd 7500 align 16 _12000: times 4 dd 12000 align 16 _51000: times 4 dd 51000 libvpx-1.8.2/vp8/encoder/x86/denoising_sse2.c000066400000000000000000000373761357355204000207120ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp8/encoder/denoising.h" #include "vp8/common/reconinter.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #include "vp8_rtcd.h" #include #include "vpx_ports/emmintrin_compat.h" /* Compute the sum of all pixel differences of this MB. */ static INLINE unsigned int abs_sum_diff_16x1(__m128i acc_diff) { const __m128i k_1 = _mm_set1_epi16(1); const __m128i acc_diff_lo = _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8); const __m128i acc_diff_hi = _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8); const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi); const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1); const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8)); const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4)); unsigned int sum_diff = abs(_mm_cvtsi128_si32(hgfedcba)); return sum_diff; } int vp8_denoiser_filter_sse2(unsigned char *mc_running_avg_y, int mc_avg_y_stride, unsigned char *running_avg_y, int avg_y_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { unsigned char *running_avg_y_start = running_avg_y; unsigned char *sig_start = sig; unsigned int sum_diff_thresh; int r; int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; __m128i acc_diff = _mm_setzero_si128(); const __m128i k_0 = _mm_setzero_si128(); const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); const __m128i k_8 = _mm_set1_epi8(8); const __m128i k_16 = _mm_set1_epi8(16); /* Modify each level's adjustment according to motion_magnitude. */ const __m128i l3 = _mm_set1_epi8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); /* Difference between level 3 and level 2 is 2. */ const __m128i l32 = _mm_set1_epi8(2); /* Difference between level 2 and level 1 is 1. */ const __m128i l21 = _mm_set1_epi8(1); for (r = 0; r < 16; ++r) { /* Calculate differences */ const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0])); __m128i v_running_avg_y; const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); /* Obtain the sign. FF if diff is negative. */ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); /* Clamp absolute difference to 16 to be used to get mask. Doing this * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */ const __m128i clamped_absdiff = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_16); /* Get masks for l2 l1 and l0 adjustments */ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); /* Get adjustments for l2, l1, and l0 */ __m128i adj2 = _mm_and_si128(mask2, l32); const __m128i adj1 = _mm_and_si128(mask1, l21); const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); __m128i adj, padj, nadj; /* Combine the adjustments and get absolute adjustments. */ adj2 = _mm_add_epi8(adj2, adj1); adj = _mm_sub_epi8(l3, adj2); adj = _mm_andnot_si128(mask0, adj); adj = _mm_or_si128(adj, adj0); /* Restore the sign and get positive and negative adjustments. */ padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); /* Calculate filtered value. */ v_running_avg_y = _mm_adds_epu8(v_sig, padj); v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); /* Adjustments <=7, and each element in acc_diff can fit in signed * char. */ acc_diff = _mm_adds_epi8(acc_diff, padj); acc_diff = _mm_subs_epi8(acc_diff, nadj); /* Update pointers for next iteration. */ sig += sig_stride; mc_running_avg_y += mc_avg_y_stride; running_avg_y += avg_y_stride; } { /* Compute the sum of all pixel differences of this MB. */ unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff); sum_diff_thresh = SUM_DIFF_THRESHOLD; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH; if (abs_sum_diff > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // check if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the acceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); sig -= sig_stride * 16; mc_running_avg_y -= mc_avg_y_stride * 16; running_avg_y -= avg_y_stride * 16; for (r = 0; r < 16; ++r) { __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0])); // Calculate differences. const __m128i v_sig = _mm_loadu_si128((__m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128((__m128i *)(&mc_running_avg_y[0])); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); // Clamp absolute difference to delta to get the adjustment. const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); // Restore the sign and get positive and negative adjustments. __m128i padj, nadj; padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); // Calculate filtered value. v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); // Accumulate the adjustments. acc_diff = _mm_subs_epi8(acc_diff, padj); acc_diff = _mm_adds_epi8(acc_diff, nadj); // Update pointers for next iteration. sig += sig_stride; mc_running_avg_y += mc_avg_y_stride; running_avg_y += avg_y_stride; } abs_sum_diff = abs_sum_diff_16x1(acc_diff); if (abs_sum_diff > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } vp8_copy_mem16x16(running_avg_y_start, avg_y_stride, sig_start, sig_stride); return FILTER_BLOCK; } int vp8_denoiser_filter_uv_sse2(unsigned char *mc_running_avg, int mc_avg_stride, unsigned char *running_avg, int avg_stride, unsigned char *sig, int sig_stride, unsigned int motion_magnitude, int increase_denoising) { unsigned char *running_avg_start = running_avg; unsigned char *sig_start = sig; unsigned int sum_diff_thresh; int r; int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 1 : 0; __m128i acc_diff = _mm_setzero_si128(); const __m128i k_0 = _mm_setzero_si128(); const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); const __m128i k_8 = _mm_set1_epi8(8); const __m128i k_16 = _mm_set1_epi8(16); /* Modify each level's adjustment according to motion_magnitude. */ const __m128i l3 = _mm_set1_epi8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD_UV) ? 7 + shift_inc : 6); /* Difference between level 3 and level 2 is 2. */ const __m128i l32 = _mm_set1_epi8(2); /* Difference between level 2 and level 1 is 1. */ const __m128i l21 = _mm_set1_epi8(1); { const __m128i k_1 = _mm_set1_epi16(1); __m128i vec_sum_block = _mm_setzero_si128(); // Avoid denoising color signal if its close to average level. for (r = 0; r < 8; ++r) { const __m128i v_sig = _mm_loadl_epi64((__m128i *)(&sig[0])); const __m128i v_sig_unpack = _mm_unpacklo_epi8(v_sig, k_0); vec_sum_block = _mm_add_epi16(vec_sum_block, v_sig_unpack); sig += sig_stride; } sig -= sig_stride * 8; { const __m128i hg_fe_dc_ba = _mm_madd_epi16(vec_sum_block, k_1); const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8)); const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4)); const int sum_block = _mm_cvtsi128_si32(hgfedcba); if (abs(sum_block - (128 * 8 * 8)) < SUM_DIFF_FROM_AVG_THRESH_UV) { return COPY_BLOCK; } } } for (r = 0; r < 4; ++r) { /* Calculate differences */ const __m128i v_sig_low = _mm_castpd_si128(_mm_load_sd((double *)(&sig[0]))); const __m128i v_sig = _mm_castpd_si128(_mm_loadh_pd( _mm_castsi128_pd(v_sig_low), (double *)(&sig[sig_stride]))); const __m128i v_mc_running_avg_low = _mm_castpd_si128(_mm_load_sd((double *)(&mc_running_avg[0]))); const __m128i v_mc_running_avg = _mm_castpd_si128( _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low), (double *)(&mc_running_avg[mc_avg_stride]))); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg); /* Obtain the sign. FF if diff is negative. */ const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); /* Clamp absolute difference to 16 to be used to get mask. Doing this * allows us to use _mm_cmpgt_epi8, which operates on signed byte. */ const __m128i clamped_absdiff = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_16); /* Get masks for l2 l1 and l0 adjustments */ const __m128i mask2 = _mm_cmpgt_epi8(k_16, clamped_absdiff); const __m128i mask1 = _mm_cmpgt_epi8(k_8, clamped_absdiff); const __m128i mask0 = _mm_cmpgt_epi8(k_4, clamped_absdiff); /* Get adjustments for l2, l1, and l0 */ __m128i adj2 = _mm_and_si128(mask2, l32); const __m128i adj1 = _mm_and_si128(mask1, l21); const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); __m128i adj, padj, nadj; __m128i v_running_avg; /* Combine the adjustments and get absolute adjustments. */ adj2 = _mm_add_epi8(adj2, adj1); adj = _mm_sub_epi8(l3, adj2); adj = _mm_andnot_si128(mask0, adj); adj = _mm_or_si128(adj, adj0); /* Restore the sign and get positive and negative adjustments. */ padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); /* Calculate filtered value. */ v_running_avg = _mm_adds_epu8(v_sig, padj); v_running_avg = _mm_subs_epu8(v_running_avg, nadj); _mm_storel_pd((double *)&running_avg[0], _mm_castsi128_pd(v_running_avg)); _mm_storeh_pd((double *)&running_avg[avg_stride], _mm_castsi128_pd(v_running_avg)); /* Adjustments <=7, and each element in acc_diff can fit in signed * char. */ acc_diff = _mm_adds_epi8(acc_diff, padj); acc_diff = _mm_subs_epi8(acc_diff, nadj); /* Update pointers for next iteration. */ sig += sig_stride * 2; mc_running_avg += mc_avg_stride * 2; running_avg += avg_stride * 2; } { unsigned int abs_sum_diff = abs_sum_diff_16x1(acc_diff); sum_diff_thresh = SUM_DIFF_THRESHOLD_UV; if (increase_denoising) sum_diff_thresh = SUM_DIFF_THRESHOLD_HIGH_UV; if (abs_sum_diff > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // check if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the acceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. int delta = ((abs_sum_diff - sum_diff_thresh) >> 8) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); sig -= sig_stride * 8; mc_running_avg -= mc_avg_stride * 8; running_avg -= avg_stride * 8; for (r = 0; r < 4; ++r) { // Calculate differences. const __m128i v_sig_low = _mm_castpd_si128(_mm_load_sd((double *)(&sig[0]))); const __m128i v_sig = _mm_castpd_si128(_mm_loadh_pd( _mm_castsi128_pd(v_sig_low), (double *)(&sig[sig_stride]))); const __m128i v_mc_running_avg_low = _mm_castpd_si128(_mm_load_sd((double *)(&mc_running_avg[0]))); const __m128i v_mc_running_avg = _mm_castpd_si128( _mm_loadh_pd(_mm_castsi128_pd(v_mc_running_avg_low), (double *)(&mc_running_avg[mc_avg_stride]))); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg); // Obtain the sign. FF if diff is negative. const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); // Clamp absolute difference to delta to get the adjustment. const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); // Restore the sign and get positive and negative adjustments. __m128i padj, nadj; const __m128i v_running_avg_low = _mm_castpd_si128(_mm_load_sd((double *)(&running_avg[0]))); __m128i v_running_avg = _mm_castpd_si128( _mm_loadh_pd(_mm_castsi128_pd(v_running_avg_low), (double *)(&running_avg[avg_stride]))); padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); // Calculate filtered value. v_running_avg = _mm_subs_epu8(v_running_avg, padj); v_running_avg = _mm_adds_epu8(v_running_avg, nadj); _mm_storel_pd((double *)&running_avg[0], _mm_castsi128_pd(v_running_avg)); _mm_storeh_pd((double *)&running_avg[avg_stride], _mm_castsi128_pd(v_running_avg)); // Accumulate the adjustments. acc_diff = _mm_subs_epi8(acc_diff, padj); acc_diff = _mm_adds_epi8(acc_diff, nadj); // Update pointers for next iteration. sig += sig_stride * 2; mc_running_avg += mc_avg_stride * 2; running_avg += avg_stride * 2; } abs_sum_diff = abs_sum_diff_16x1(acc_diff); if (abs_sum_diff > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } vp8_copy_mem8x8(running_avg_start, avg_stride, sig_start, sig_stride); return FILTER_BLOCK; } libvpx-1.8.2/vp8/encoder/x86/fwalsh_sse2.asm000066400000000000000000000113631357355204000205410ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp8_short_walsh4x4_sse2(short *input, short *output, int pitch) global sym(vp8_short_walsh4x4_sse2) PRIVATE sym(vp8_short_walsh4x4_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 3 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog mov rsi, arg(0) ; input mov rdi, arg(1) ; output movsxd rdx, dword ptr arg(2) ; pitch ; first for loop movq xmm0, MMWORD PTR [rsi] ; load input movq xmm1, MMWORD PTR [rsi + rdx] lea rsi, [rsi + rdx*2] movq xmm2, MMWORD PTR [rsi] movq xmm3, MMWORD PTR [rsi + rdx] punpcklwd xmm0, xmm1 punpcklwd xmm2, xmm3 movdqa xmm1, xmm0 punpckldq xmm0, xmm2 ; ip[1] ip[0] punpckhdq xmm1, xmm2 ; ip[3] ip[2] movdqa xmm2, xmm0 paddw xmm0, xmm1 psubw xmm2, xmm1 psllw xmm0, 2 ; d1 a1 psllw xmm2, 2 ; c1 b1 movdqa xmm1, xmm0 punpcklqdq xmm0, xmm2 ; b1 a1 punpckhqdq xmm1, xmm2 ; c1 d1 pxor xmm6, xmm6 movq xmm6, xmm0 pxor xmm7, xmm7 pcmpeqw xmm7, xmm6 paddw xmm7, [GLOBAL(c1)] movdqa xmm2, xmm0 paddw xmm0, xmm1 ; b1+c1 a1+d1 psubw xmm2, xmm1 ; b1-c1 a1-d1 paddw xmm0, xmm7 ; b1+c1 a1+d1+(a1!=0) ; second for loop ; input: 13 9 5 1 12 8 4 0 (xmm0) ; 14 10 6 2 15 11 7 3 (xmm2) ; after shuffle: ; 13 5 9 1 12 4 8 0 (xmm0) ; 14 6 10 2 15 7 11 3 (xmm1) pshuflw xmm3, xmm0, 0xd8 pshufhw xmm0, xmm3, 0xd8 pshuflw xmm3, xmm2, 0xd8 pshufhw xmm1, xmm3, 0xd8 movdqa xmm2, xmm0 pmaddwd xmm0, [GLOBAL(c1)] ; d11 a11 d10 a10 pmaddwd xmm2, [GLOBAL(cn1)] ; c11 b11 c10 b10 movdqa xmm3, xmm1 pmaddwd xmm1, [GLOBAL(c1)] ; d12 a12 d13 a13 pmaddwd xmm3, [GLOBAL(cn1)] ; c12 b12 c13 b13 pshufd xmm4, xmm0, 0xd8 ; d11 d10 a11 a10 pshufd xmm5, xmm2, 0xd8 ; c11 c10 b11 b10 pshufd xmm6, xmm1, 0x72 ; d13 d12 a13 a12 pshufd xmm7, xmm3, 0x72 ; c13 c12 b13 b12 movdqa xmm0, xmm4 punpcklqdq xmm0, xmm5 ; b11 b10 a11 a10 punpckhqdq xmm4, xmm5 ; c11 c10 d11 d10 movdqa xmm1, xmm6 punpcklqdq xmm1, xmm7 ; b13 b12 a13 a12 punpckhqdq xmm6, xmm7 ; c13 c12 d13 d12 movdqa xmm2, xmm0 paddd xmm0, xmm4 ; b21 b20 a21 a20 psubd xmm2, xmm4 ; c21 c20 d21 d20 movdqa xmm3, xmm1 paddd xmm1, xmm6 ; b23 b22 a23 a22 psubd xmm3, xmm6 ; c23 c22 d23 d22 pxor xmm4, xmm4 movdqa xmm5, xmm4 pcmpgtd xmm4, xmm0 pcmpgtd xmm5, xmm2 pand xmm4, [GLOBAL(cd1)] pand xmm5, [GLOBAL(cd1)] pxor xmm6, xmm6 movdqa xmm7, xmm6 pcmpgtd xmm6, xmm1 pcmpgtd xmm7, xmm3 pand xmm6, [GLOBAL(cd1)] pand xmm7, [GLOBAL(cd1)] paddd xmm0, xmm4 paddd xmm2, xmm5 paddd xmm0, [GLOBAL(cd3)] paddd xmm2, [GLOBAL(cd3)] paddd xmm1, xmm6 paddd xmm3, xmm7 paddd xmm1, [GLOBAL(cd3)] paddd xmm3, [GLOBAL(cd3)] psrad xmm0, 3 psrad xmm1, 3 psrad xmm2, 3 psrad xmm3, 3 movdqa xmm4, xmm0 punpcklqdq xmm0, xmm1 ; a23 a22 a21 a20 punpckhqdq xmm4, xmm1 ; b23 b22 b21 b20 movdqa xmm5, xmm2 punpckhqdq xmm2, xmm3 ; c23 c22 c21 c20 punpcklqdq xmm5, xmm3 ; d23 d22 d21 d20 packssdw xmm0, xmm4 ; b23 b22 b21 b20 a23 a22 a21 a20 packssdw xmm2, xmm5 ; d23 d22 d21 d20 c23 c22 c21 c20 movdqa XMMWORD PTR [rdi], xmm0 movdqa XMMWORD PTR [rdi + 16], xmm2 ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 c1: dw 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001, 0x0001 align 16 cn1: dw 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff, 0x0001, 0xffff align 16 cd1: dd 0x00000001, 0x00000001, 0x00000001, 0x00000001 align 16 cd3: dd 0x00000003, 0x00000003, 0x00000003, 0x00000003 libvpx-1.8.2/vp8/encoder/x86/quantize_sse4.c000066400000000000000000000114431357355204000205600ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include /* SSE4.1 */ #include "./vp8_rtcd.h" #include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ #include "vp8/encoder/block.h" #define SELECT_EOB(i, z, x, y, q) \ do { \ short boost = *zbin_boost_ptr; \ /* Technically _mm_extract_epi16() returns an int: */ \ /* https://bugs.llvm.org/show_bug.cgi?id=41657 */ \ short x_z = (short)_mm_extract_epi16(x, z); \ short y_z = (short)_mm_extract_epi16(y, z); \ int cmp = (x_z < boost) | (y_z == 0); \ zbin_boost_ptr++; \ if (cmp) break; \ q = _mm_insert_epi16(q, y_z, z); \ eob = i; \ zbin_boost_ptr = b->zrun_zbin_boost; \ } while (0) void vp8_regular_quantize_b_sse4_1(BLOCK *b, BLOCKD *d) { char eob = 0; short *zbin_boost_ptr = b->zrun_zbin_boost; __m128i x0, x1, y0, y1, x_minus_zbin0, x_minus_zbin1, dqcoeff0, dqcoeff1; __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); __m128i round0 = _mm_load_si128((__m128i *)(b->round)); __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); __m128i qcoeff0 = _mm_setzero_si128(); __m128i qcoeff1 = _mm_setzero_si128(); /* Duplicate to all lanes. */ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); /* x = abs(z) */ x0 = _mm_abs_epi16(z0); x1 = _mm_abs_epi16(z1); /* zbin[] + zbin_extra */ zbin0 = _mm_add_epi16(zbin0, zbin_extra); zbin1 = _mm_add_epi16(zbin1, zbin_extra); /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance * the equation because boost is the only value which can change: * x - (zbin[] + extra) >= boost */ x_minus_zbin0 = _mm_sub_epi16(x0, zbin0); x_minus_zbin1 = _mm_sub_epi16(x1, zbin1); /* All the remaining calculations are valid whether they are done now with * simd or later inside the loop one at a time. */ x0 = _mm_add_epi16(x0, round0); x1 = _mm_add_epi16(x1, round1); y0 = _mm_mulhi_epi16(x0, quant0); y1 = _mm_mulhi_epi16(x1, quant1); y0 = _mm_add_epi16(y0, x0); y1 = _mm_add_epi16(y1, x1); /* Instead of shifting each value independently we convert the scaling * factor with 1 << (16 - shift) so we can use multiply/return high half. */ y0 = _mm_mulhi_epi16(y0, quant_shift0); y1 = _mm_mulhi_epi16(y1, quant_shift1); /* Restore the sign. */ y0 = _mm_sign_epi16(y0, z0); y1 = _mm_sign_epi16(y1, z1); /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ SELECT_EOB(1, 0, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(2, 1, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(3, 4, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(4, 0, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(5, 5, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(6, 2, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(7, 3, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(8, 6, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(9, 1, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(10, 4, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(11, 5, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(12, 2, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(13, 7, x_minus_zbin0, y0, qcoeff0); SELECT_EOB(14, 3, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(15, 6, x_minus_zbin1, y1, qcoeff1); SELECT_EOB(16, 7, x_minus_zbin1, y1, qcoeff1); _mm_store_si128((__m128i *)(d->qcoeff), qcoeff0); _mm_store_si128((__m128i *)(d->qcoeff + 8), qcoeff1); dqcoeff0 = _mm_mullo_epi16(qcoeff0, dequant0); dqcoeff1 = _mm_mullo_epi16(qcoeff1, dequant1); _mm_store_si128((__m128i *)(d->dqcoeff), dqcoeff0); _mm_store_si128((__m128i *)(d->dqcoeff + 8), dqcoeff1); *d->eob = eob; } libvpx-1.8.2/vp8/encoder/x86/temporal_filter_apply_sse2.asm000066400000000000000000000166431357355204000236600ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ; void vp8_temporal_filter_apply_sse2 | arg ; (unsigned char *frame1, | 0 ; unsigned int stride, | 1 ; unsigned char *frame2, | 2 ; unsigned int block_size, | 3 ; int strength, | 4 ; int filter_weight, | 5 ; unsigned int *accumulator, | 6 ; unsigned short *count) | 7 global sym(vp8_temporal_filter_apply_sse2) PRIVATE sym(vp8_temporal_filter_apply_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 8 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ALIGN_STACK 16, rax %define block_size 0 %define strength 16 %define filter_weight 32 %define rounding_bit 48 %define rbp_backup 64 %define stack_size 80 sub rsp, stack_size mov [rsp + rbp_backup], rbp ; end prolog mov rdx, arg(3) mov [rsp + block_size], rdx movd xmm6, arg(4) movdqa [rsp + strength], xmm6 ; where strength is used, all 16 bytes are read ; calculate the rounding bit outside the loop ; 0x8000 >> (16 - strength) mov rdx, 16 sub rdx, arg(4) ; 16 - strength movq xmm4, rdx ; can't use rdx w/ shift movdqa xmm5, [GLOBAL(_const_top_bit)] psrlw xmm5, xmm4 movdqa [rsp + rounding_bit], xmm5 mov rsi, arg(0) ; src/frame1 mov rdx, arg(2) ; predictor frame mov rdi, arg(6) ; accumulator mov rax, arg(7) ; count ; dup the filter weight and store for later movd xmm0, arg(5) ; filter_weight pshuflw xmm0, xmm0, 0 punpcklwd xmm0, xmm0 movdqa [rsp + filter_weight], xmm0 mov rbp, arg(1) ; stride pxor xmm7, xmm7 ; zero for extraction lea rcx, [rdx + 16*16*1] cmp dword ptr [rsp + block_size], 8 jne .temporal_filter_apply_load_16 lea rcx, [rdx + 8*8*1] .temporal_filter_apply_load_8: movq xmm0, [rsi] ; first row lea rsi, [rsi + rbp] ; += stride punpcklbw xmm0, xmm7 ; src[ 0- 7] movq xmm1, [rsi] ; second row lea rsi, [rsi + rbp] ; += stride punpcklbw xmm1, xmm7 ; src[ 8-15] jmp .temporal_filter_apply_load_finished .temporal_filter_apply_load_16: movdqa xmm0, [rsi] ; src (frame1) lea rsi, [rsi + rbp] ; += stride movdqa xmm1, xmm0 punpcklbw xmm0, xmm7 ; src[ 0- 7] punpckhbw xmm1, xmm7 ; src[ 8-15] .temporal_filter_apply_load_finished: movdqa xmm2, [rdx] ; predictor (frame2) movdqa xmm3, xmm2 punpcklbw xmm2, xmm7 ; pred[ 0- 7] punpckhbw xmm3, xmm7 ; pred[ 8-15] ; modifier = src_byte - pixel_value psubw xmm0, xmm2 ; src - pred[ 0- 7] psubw xmm1, xmm3 ; src - pred[ 8-15] ; modifier *= modifier pmullw xmm0, xmm0 ; modifer[ 0- 7]^2 pmullw xmm1, xmm1 ; modifer[ 8-15]^2 ; modifier *= 3 pmullw xmm0, [GLOBAL(_const_3w)] pmullw xmm1, [GLOBAL(_const_3w)] ; modifer += 0x8000 >> (16 - strength) paddw xmm0, [rsp + rounding_bit] paddw xmm1, [rsp + rounding_bit] ; modifier >>= strength psrlw xmm0, [rsp + strength] psrlw xmm1, [rsp + strength] ; modifier = 16 - modifier ; saturation takes care of modifier > 16 movdqa xmm3, [GLOBAL(_const_16w)] movdqa xmm2, [GLOBAL(_const_16w)] psubusw xmm3, xmm1 psubusw xmm2, xmm0 ; modifier *= filter_weight pmullw xmm2, [rsp + filter_weight] pmullw xmm3, [rsp + filter_weight] ; count movdqa xmm4, [rax] movdqa xmm5, [rax+16] ; += modifier paddw xmm4, xmm2 paddw xmm5, xmm3 ; write back movdqa [rax], xmm4 movdqa [rax+16], xmm5 lea rax, [rax + 16*2] ; count += 16*(sizeof(short)) ; load and extract the predictor up to shorts pxor xmm7, xmm7 movdqa xmm0, [rdx] lea rdx, [rdx + 16*1] ; pred += 16*(sizeof(char)) movdqa xmm1, xmm0 punpcklbw xmm0, xmm7 ; pred[ 0- 7] punpckhbw xmm1, xmm7 ; pred[ 8-15] ; modifier *= pixel_value pmullw xmm0, xmm2 pmullw xmm1, xmm3 ; expand to double words movdqa xmm2, xmm0 punpcklwd xmm0, xmm7 ; [ 0- 3] punpckhwd xmm2, xmm7 ; [ 4- 7] movdqa xmm3, xmm1 punpcklwd xmm1, xmm7 ; [ 8-11] punpckhwd xmm3, xmm7 ; [12-15] ; accumulator movdqa xmm4, [rdi] movdqa xmm5, [rdi+16] movdqa xmm6, [rdi+32] movdqa xmm7, [rdi+48] ; += modifier paddd xmm4, xmm0 paddd xmm5, xmm2 paddd xmm6, xmm1 paddd xmm7, xmm3 ; write back movdqa [rdi], xmm4 movdqa [rdi+16], xmm5 movdqa [rdi+32], xmm6 movdqa [rdi+48], xmm7 lea rdi, [rdi + 16*4] ; accumulator += 16*(sizeof(int)) cmp rdx, rcx je .temporal_filter_apply_epilog pxor xmm7, xmm7 ; zero for extraction cmp dword ptr [rsp + block_size], 16 je .temporal_filter_apply_load_16 jmp .temporal_filter_apply_load_8 .temporal_filter_apply_epilog: ; begin epilog mov rbp, [rsp + rbp_backup] add rsp, stack_size pop rsp pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 _const_3w: times 8 dw 3 align 16 _const_top_bit: times 8 dw 1<<15 align 16 _const_16w: times 8 dw 16 libvpx-1.8.2/vp8/encoder/x86/vp8_enc_stubs_sse2.c000066400000000000000000000020041357355204000214710ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_ports/x86.h" #include "vp8/encoder/block.h" int vp8_mbblock_error_sse2_impl(short *coeff_ptr, short *dcoef_ptr, int dc); int vp8_mbblock_error_sse2(MACROBLOCK *mb, int dc) { short *coeff_ptr = mb->block[0].coeff; short *dcoef_ptr = mb->e_mbd.block[0].dqcoeff; return vp8_mbblock_error_sse2_impl(coeff_ptr, dcoef_ptr, dc); } int vp8_mbuverror_sse2_impl(short *s_ptr, short *d_ptr); int vp8_mbuverror_sse2(MACROBLOCK *mb) { short *s_ptr = &mb->coeff[256]; short *d_ptr = &mb->e_mbd.dqcoeff[256]; return vp8_mbuverror_sse2_impl(s_ptr, d_ptr); } libvpx-1.8.2/vp8/encoder/x86/vp8_quantize_sse2.c000066400000000000000000000160551357355204000213570ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_config.h" #include "vp8_rtcd.h" #include "vpx_ports/x86.h" #include "vpx_mem/vpx_mem.h" #include "vp8/encoder/block.h" #include "vp8/common/entropy.h" /* vp8_default_inv_zig_zag */ #include /* MMX */ #include /* SSE */ #include /* SSE2 */ #define SELECT_EOB(i, z) \ do { \ short boost = *zbin_boost_ptr; \ int cmp = (x[z] < boost) | (y[z] == 0); \ zbin_boost_ptr++; \ if (cmp) break; \ qcoeff_ptr[z] = y[z]; \ eob = i; \ zbin_boost_ptr = b->zrun_zbin_boost; \ } while (0) void vp8_regular_quantize_b_sse2(BLOCK *b, BLOCKD *d) { char eob = 0; short *zbin_boost_ptr; short *qcoeff_ptr = d->qcoeff; DECLARE_ALIGNED(16, short, x[16]); DECLARE_ALIGNED(16, short, y[16]); __m128i sz0, x0, sz1, x1, y0, y1, x_minus_zbin0, x_minus_zbin1; __m128i quant_shift0 = _mm_load_si128((__m128i *)(b->quant_shift)); __m128i quant_shift1 = _mm_load_si128((__m128i *)(b->quant_shift + 8)); __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); __m128i zbin_extra = _mm_cvtsi32_si128(b->zbin_extra); __m128i zbin0 = _mm_load_si128((__m128i *)(b->zbin)); __m128i zbin1 = _mm_load_si128((__m128i *)(b->zbin + 8)); __m128i round0 = _mm_load_si128((__m128i *)(b->round)); __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); __m128i quant0 = _mm_load_si128((__m128i *)(b->quant)); __m128i quant1 = _mm_load_si128((__m128i *)(b->quant + 8)); __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); memset(qcoeff_ptr, 0, 32); /* Duplicate to all lanes. */ zbin_extra = _mm_shufflelo_epi16(zbin_extra, 0); zbin_extra = _mm_unpacklo_epi16(zbin_extra, zbin_extra); /* Sign of z: z >> 15 */ sz0 = _mm_srai_epi16(z0, 15); sz1 = _mm_srai_epi16(z1, 15); /* x = abs(z): (z ^ sz) - sz */ x0 = _mm_xor_si128(z0, sz0); x1 = _mm_xor_si128(z1, sz1); x0 = _mm_sub_epi16(x0, sz0); x1 = _mm_sub_epi16(x1, sz1); /* zbin[] + zbin_extra */ zbin0 = _mm_add_epi16(zbin0, zbin_extra); zbin1 = _mm_add_epi16(zbin1, zbin_extra); /* In C x is compared to zbin where zbin = zbin[] + boost + extra. Rebalance * the equation because boost is the only value which can change: * x - (zbin[] + extra) >= boost */ x_minus_zbin0 = _mm_sub_epi16(x0, zbin0); x_minus_zbin1 = _mm_sub_epi16(x1, zbin1); _mm_store_si128((__m128i *)(x), x_minus_zbin0); _mm_store_si128((__m128i *)(x + 8), x_minus_zbin1); /* All the remaining calculations are valid whether they are done now with * simd or later inside the loop one at a time. */ x0 = _mm_add_epi16(x0, round0); x1 = _mm_add_epi16(x1, round1); y0 = _mm_mulhi_epi16(x0, quant0); y1 = _mm_mulhi_epi16(x1, quant1); y0 = _mm_add_epi16(y0, x0); y1 = _mm_add_epi16(y1, x1); /* Instead of shifting each value independently we convert the scaling * factor with 1 << (16 - shift) so we can use multiply/return high half. */ y0 = _mm_mulhi_epi16(y0, quant_shift0); y1 = _mm_mulhi_epi16(y1, quant_shift1); /* Return the sign: (y ^ sz) - sz */ y0 = _mm_xor_si128(y0, sz0); y1 = _mm_xor_si128(y1, sz1); y0 = _mm_sub_epi16(y0, sz0); y1 = _mm_sub_epi16(y1, sz1); _mm_store_si128((__m128i *)(y), y0); _mm_store_si128((__m128i *)(y + 8), y1); zbin_boost_ptr = b->zrun_zbin_boost; /* The loop gets unrolled anyway. Avoid the vp8_default_zig_zag1d lookup. */ SELECT_EOB(1, 0); SELECT_EOB(2, 1); SELECT_EOB(3, 4); SELECT_EOB(4, 8); SELECT_EOB(5, 5); SELECT_EOB(6, 2); SELECT_EOB(7, 3); SELECT_EOB(8, 6); SELECT_EOB(9, 9); SELECT_EOB(10, 12); SELECT_EOB(11, 13); SELECT_EOB(12, 10); SELECT_EOB(13, 7); SELECT_EOB(14, 11); SELECT_EOB(15, 14); SELECT_EOB(16, 15); y0 = _mm_load_si128((__m128i *)(d->qcoeff)); y1 = _mm_load_si128((__m128i *)(d->qcoeff + 8)); /* dqcoeff = qcoeff * dequant */ y0 = _mm_mullo_epi16(y0, dequant0); y1 = _mm_mullo_epi16(y1, dequant1); _mm_store_si128((__m128i *)(d->dqcoeff), y0); _mm_store_si128((__m128i *)(d->dqcoeff + 8), y1); *d->eob = eob; } void vp8_fast_quantize_b_sse2(BLOCK *b, BLOCKD *d) { __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); __m128i round0 = _mm_load_si128((__m128i *)(b->round)); __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); __m128i inv_zig_zag0 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag)); __m128i inv_zig_zag1 = _mm_load_si128((const __m128i *)(vp8_default_inv_zig_zag + 8)); __m128i sz0, sz1, x0, x1, y0, y1, xdq0, xdq1, zeros, ones; /* sign of z: z >> 15 */ sz0 = _mm_srai_epi16(z0, 15); sz1 = _mm_srai_epi16(z1, 15); /* x = abs(z): (z ^ sz) - sz */ x0 = _mm_xor_si128(z0, sz0); x1 = _mm_xor_si128(z1, sz1); x0 = _mm_sub_epi16(x0, sz0); x1 = _mm_sub_epi16(x1, sz1); /* x += round */ x0 = _mm_add_epi16(x0, round0); x1 = _mm_add_epi16(x1, round1); /* y = (x * quant) >> 16 */ y0 = _mm_mulhi_epi16(x0, quant_fast0); y1 = _mm_mulhi_epi16(x1, quant_fast1); /* x = abs(y) = (y ^ sz) - sz */ y0 = _mm_xor_si128(y0, sz0); y1 = _mm_xor_si128(y1, sz1); x0 = _mm_sub_epi16(y0, sz0); x1 = _mm_sub_epi16(y1, sz1); /* qcoeff = x */ _mm_store_si128((__m128i *)(d->qcoeff), x0); _mm_store_si128((__m128i *)(d->qcoeff + 8), x1); /* x * dequant */ xdq0 = _mm_mullo_epi16(x0, dequant0); xdq1 = _mm_mullo_epi16(x1, dequant1); /* dqcoeff = x * dequant */ _mm_store_si128((__m128i *)(d->dqcoeff), xdq0); _mm_store_si128((__m128i *)(d->dqcoeff + 8), xdq1); /* build a mask for the zig zag */ zeros = _mm_setzero_si128(); x0 = _mm_cmpeq_epi16(x0, zeros); x1 = _mm_cmpeq_epi16(x1, zeros); ones = _mm_cmpeq_epi16(zeros, zeros); x0 = _mm_xor_si128(x0, ones); x1 = _mm_xor_si128(x1, ones); x0 = _mm_and_si128(x0, inv_zig_zag0); x1 = _mm_and_si128(x1, inv_zig_zag1); x0 = _mm_max_epi16(x0, x1); /* now down to 8 */ x1 = _mm_shuffle_epi32(x0, 0xE); // 0b00001110 x0 = _mm_max_epi16(x0, x1); /* only 4 left */ x1 = _mm_shufflelo_epi16(x0, 0xE); // 0b00001110 x0 = _mm_max_epi16(x0, x1); /* okay, just 2! */ x1 = _mm_shufflelo_epi16(x0, 0x1); // 0b00000001 x0 = _mm_max_epi16(x0, x1); *d->eob = 0xFF & _mm_cvtsi128_si32(x0); } libvpx-1.8.2/vp8/encoder/x86/vp8_quantize_ssse3.c000066400000000000000000000063121357355204000215360ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include /* SSSE3 */ #include "./vp8_rtcd.h" #include "vp8/encoder/block.h" /* bitscan reverse (bsr) */ #if defined(_MSC_VER) #include #pragma intrinsic(_BitScanReverse) static int bsr(int mask) { unsigned long eob; _BitScanReverse(&eob, mask); eob++; if (mask == 0) eob = 0; return eob; } #else static int bsr(int mask) { int eob; #if defined(__GNUC__) && __GNUC__ __asm__ __volatile__("bsr %1, %0" : "=r"(eob) : "r"(mask) : "flags"); #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) asm volatile("bsr %1, %0" : "=r"(eob) : "r"(mask) : "flags"); #endif eob++; if (mask == 0) eob = 0; return eob; } #endif void vp8_fast_quantize_b_ssse3(BLOCK *b, BLOCKD *d) { int eob, mask; __m128i z0 = _mm_load_si128((__m128i *)(b->coeff)); __m128i z1 = _mm_load_si128((__m128i *)(b->coeff + 8)); __m128i round0 = _mm_load_si128((__m128i *)(b->round)); __m128i round1 = _mm_load_si128((__m128i *)(b->round + 8)); __m128i quant_fast0 = _mm_load_si128((__m128i *)(b->quant_fast)); __m128i quant_fast1 = _mm_load_si128((__m128i *)(b->quant_fast + 8)); __m128i dequant0 = _mm_load_si128((__m128i *)(d->dequant)); __m128i dequant1 = _mm_load_si128((__m128i *)(d->dequant + 8)); __m128i sz0, sz1, x, x0, x1, y0, y1, zeros, abs0, abs1; DECLARE_ALIGNED(16, const uint8_t, pshufb_zig_zag_mask[16]) = { 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 }; __m128i zig_zag = _mm_load_si128((const __m128i *)pshufb_zig_zag_mask); /* sign of z: z >> 15 */ sz0 = _mm_srai_epi16(z0, 15); sz1 = _mm_srai_epi16(z1, 15); /* x = abs(z) */ x0 = _mm_abs_epi16(z0); x1 = _mm_abs_epi16(z1); /* x += round */ x0 = _mm_add_epi16(x0, round0); x1 = _mm_add_epi16(x1, round1); /* y = (x * quant) >> 16 */ y0 = _mm_mulhi_epi16(x0, quant_fast0); y1 = _mm_mulhi_epi16(x1, quant_fast1); /* ASM saves Y for EOB */ /* I think we can ignore that because adding the sign doesn't change anything * and multiplying 0 by dequant is OK as well */ abs0 = y0; abs1 = y1; /* Restore the sign bit. */ y0 = _mm_xor_si128(y0, sz0); y1 = _mm_xor_si128(y1, sz1); x0 = _mm_sub_epi16(y0, sz0); x1 = _mm_sub_epi16(y1, sz1); /* qcoeff = x */ _mm_store_si128((__m128i *)(d->qcoeff), x0); _mm_store_si128((__m128i *)(d->qcoeff + 8), x1); /* x * dequant */ x0 = _mm_mullo_epi16(x0, dequant0); x1 = _mm_mullo_epi16(x1, dequant1); /* dqcoeff = x * dequant */ _mm_store_si128((__m128i *)(d->dqcoeff), x0); _mm_store_si128((__m128i *)(d->dqcoeff + 8), x1); zeros = _mm_setzero_si128(); x0 = _mm_cmpgt_epi16(abs0, zeros); x1 = _mm_cmpgt_epi16(abs1, zeros); x = _mm_packs_epi16(x0, x1); x = _mm_shuffle_epi8(x, zig_zag); mask = _mm_movemask_epi8(x); eob = bsr(mask); *d->eob = 0xFF & eob; } libvpx-1.8.2/vp8/exports_dec000066400000000000000000000000611357355204000160060ustar00rootroot00000000000000data vpx_codec_vp8_dx_algo text vpx_codec_vp8_dx libvpx-1.8.2/vp8/exports_enc000066400000000000000000000000611357355204000160200ustar00rootroot00000000000000data vpx_codec_vp8_cx_algo text vpx_codec_vp8_cx libvpx-1.8.2/vp8/vp8_common.mk000066400000000000000000000150041357355204000161650ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP8_COMMON_SRCS-yes += vp8_common.mk VP8_COMMON_SRCS-yes += common/ppflags.h VP8_COMMON_SRCS-yes += common/onyx.h VP8_COMMON_SRCS-yes += common/onyxd.h VP8_COMMON_SRCS-yes += common/alloccommon.c VP8_COMMON_SRCS-yes += common/blockd.c VP8_COMMON_SRCS-yes += common/coefupdateprobs.h # VP8_COMMON_SRCS-yes += common/debugmodes.c VP8_COMMON_SRCS-yes += common/default_coef_probs.h VP8_COMMON_SRCS-yes += common/dequantize.c VP8_COMMON_SRCS-yes += common/entropy.c VP8_COMMON_SRCS-yes += common/entropymode.c VP8_COMMON_SRCS-yes += common/entropymv.c VP8_COMMON_SRCS-yes += common/extend.c VP8_COMMON_SRCS-yes += common/filter.c VP8_COMMON_SRCS-yes += common/filter.h VP8_COMMON_SRCS-yes += common/findnearmv.c VP8_COMMON_SRCS-yes += common/generic/systemdependent.c VP8_COMMON_SRCS-yes += common/idct_blk.c VP8_COMMON_SRCS-yes += common/idctllm.c VP8_COMMON_SRCS-yes += common/alloccommon.h VP8_COMMON_SRCS-yes += common/blockd.h VP8_COMMON_SRCS-yes += common/common.h VP8_COMMON_SRCS-yes += common/entropy.h VP8_COMMON_SRCS-yes += common/entropymode.h VP8_COMMON_SRCS-yes += common/entropymv.h VP8_COMMON_SRCS-yes += common/extend.h VP8_COMMON_SRCS-yes += common/findnearmv.h VP8_COMMON_SRCS-yes += common/header.h VP8_COMMON_SRCS-yes += common/invtrans.h VP8_COMMON_SRCS-yes += common/loopfilter.h VP8_COMMON_SRCS-yes += common/modecont.h VP8_COMMON_SRCS-yes += common/mv.h VP8_COMMON_SRCS-yes += common/onyxc_int.h VP8_COMMON_SRCS-yes += common/quant_common.h VP8_COMMON_SRCS-yes += common/reconinter.h VP8_COMMON_SRCS-yes += common/reconintra.h VP8_COMMON_SRCS-yes += common/reconintra4x4.h VP8_COMMON_SRCS-yes += common/rtcd.c VP8_COMMON_SRCS-yes += common/rtcd_defs.pl VP8_COMMON_SRCS-yes += common/setupintrarecon.h VP8_COMMON_SRCS-yes += common/swapyv12buffer.h VP8_COMMON_SRCS-yes += common/systemdependent.h VP8_COMMON_SRCS-yes += common/threading.h VP8_COMMON_SRCS-yes += common/treecoder.h VP8_COMMON_SRCS-yes += common/vp8_loopfilter.c VP8_COMMON_SRCS-yes += common/loopfilter_filters.c VP8_COMMON_SRCS-yes += common/mbpitch.c VP8_COMMON_SRCS-yes += common/modecont.c VP8_COMMON_SRCS-yes += common/quant_common.c VP8_COMMON_SRCS-yes += common/reconinter.c VP8_COMMON_SRCS-yes += common/reconintra.c VP8_COMMON_SRCS-yes += common/reconintra4x4.c VP8_COMMON_SRCS-yes += common/setupintrarecon.c VP8_COMMON_SRCS-yes += common/swapyv12buffer.c VP8_COMMON_SRCS-yes += common/vp8_entropymodedata.h VP8_COMMON_SRCS-yes += common/treecoder.c VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/vp8_asm_stubs.c VP8_COMMON_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += common/x86/loopfilter_x86.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/mfqe.c VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.h VP8_COMMON_SRCS-$(CONFIG_POSTPROC) += common/postproc.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/dequantize_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idct_blk_mmx.c VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/idctllm_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/recon_mmx.asm VP8_COMMON_SRCS-$(HAVE_MMX) += common/x86/subpixel_mmx.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idct_blk_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/idctllm_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/recon_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/bilinear_filter_sse2.c VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/subpixel_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/iwalsh_sse2.asm VP8_COMMON_SRCS-$(HAVE_SSSE3) += common/x86/subpixel_ssse3.asm ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/mfqe_sse2.asm endif ifeq ($(VPX_ARCH_X86_64),yes) VP8_COMMON_SRCS-$(HAVE_SSE2) += common/x86/loopfilter_block_sse2_x86_64.asm endif # common (c) VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idctllm_dspr2.c VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/filter_dspr2.c VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp8_loopfilter_filters_dspr2.c VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/reconinter_dspr2.c VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/idct_blk_dspr2.c VP8_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/dequantize_dspr2.c # common (c) VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/bilinear_filter_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/copymem_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/idct_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/loopfilter_filters_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/sixtap_filter_msa.c VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp8_macros_msa.h # common (c) VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/sixtap_filter_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/loopfilter_filters_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idctllm_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/dequantize_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/copymem_mmi.c VP8_COMMON_SRCS-$(HAVE_MMI) += common/mips/mmi/idct_blk_mmi.c ifeq ($(CONFIG_POSTPROC),yes) VP8_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/mfqe_msa.c endif # common (neon intrinsics) VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/loopfilter_arm.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/loopfilter_arm.h VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/bilinearpredict_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/copymem_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dc_only_idct_add_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequant_idct_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/dequantizeb_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/idct_blk_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/iwalsh_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp8_loopfilter_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimplehorizontaledge_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/loopfiltersimpleverticaledge_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/mbloopfilter_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/shortidct4x4llm_neon.c VP8_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/sixtappredict_neon.c $(eval $(call rtcd_h_template,vp8_rtcd,vp8/common/rtcd_defs.pl)) libvpx-1.8.2/vp8/vp8_cx_iface.c000066400000000000000000001316741357355204000162650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_once.h" #include "vpx_util/vpx_timestamp.h" #include "vp8/encoder/onyx_int.h" #include "vpx/vp8cx.h" #include "vp8/encoder/firstpass.h" #include "vp8/common/onyx.h" #include "vp8/common/common.h" #include #include struct vp8_extracfg { struct vpx_codec_pkt_list *pkt_list; int cpu_used; /** available cpu percentage in 1/16*/ /** if encoder decides to uses alternate reference frame */ unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; unsigned int Sharpness; unsigned int static_thresh; unsigned int token_partitions; unsigned int arnr_max_frames; /* alt_ref Noise Reduction Max Frame Count */ unsigned int arnr_strength; /* alt_ref Noise Reduction Strength */ unsigned int arnr_type; /* alt_ref filter type */ vp8e_tuning tuning; unsigned int cq_level; /* constrained quality level */ unsigned int rc_max_intra_bitrate_pct; unsigned int gf_cbr_boost_pct; unsigned int screen_content_mode; }; static struct vp8_extracfg default_extracfg = { NULL, #if !(CONFIG_REALTIME_ONLY) 0, /* cpu_used */ #else 4, /* cpu_used */ #endif 0, /* enable_auto_alt_ref */ 0, /* noise_sensitivity */ 0, /* Sharpness */ 0, /* static_thresh */ #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) VP8_EIGHT_TOKENPARTITION, #else VP8_ONE_TOKENPARTITION, /* token_partitions */ #endif 0, /* arnr_max_frames */ 3, /* arnr_strength */ 3, /* arnr_type*/ 0, /* tuning*/ 10, /* cq_level */ 0, /* rc_max_intra_bitrate_pct */ 0, /* gf_cbr_boost_pct */ 0, /* screen_content_mode */ }; struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; struct vp8_extracfg vp8_cfg; vpx_rational64_t timestamp_ratio; vpx_codec_pts_t pts_offset; unsigned char pts_offset_initialized; VP8_CONFIG oxcf; struct VP8_COMP *cpi; unsigned char *cx_data; unsigned int cx_data_sz; vpx_image_t preview_img; unsigned int next_frame_flag; vp8_postproc_cfg_t preview_ppcfg; /* pkt_list size depends on the maximum number of lagged frames allowed. */ vpx_codec_pkt_list_decl(64) pkt_list; unsigned int fixed_kf_cntr; vpx_enc_frame_flags_t control_frame_flags; }; static vpx_codec_err_t update_error_state( vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { vpx_codec_err_t res; if ((res = error->error_code)) { ctx->base.err_detail = error->has_detail ? error->detail : NULL; } return res; } #undef ERROR #define ERROR(str) \ do { \ ctx->base.err_detail = str; \ return VPX_CODEC_INVALID_PARAM; \ } while (0) #define RANGE_CHECK(p, memb, lo, hi) \ do { \ if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ ERROR(#memb " out of range [" #lo ".." #hi "]"); \ } while (0) #define RANGE_CHECK_HI(p, memb, hi) \ do { \ if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \ } while (0) #define RANGE_CHECK_LO(p, memb, lo) \ do { \ if (!((p)->memb >= (lo))) ERROR(#memb " out of range [" #lo "..]"); \ } while (0) #define RANGE_CHECK_BOOL(p, memb) \ do { \ if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \ } while (0) #if defined(_MSC_VER) #define COMPILE_TIME_ASSERT(boolexp) \ do { \ char compile_time_assert[(boolexp) ? 1 : -1]; \ (void)compile_time_assert; \ } while (0) #else /* !_MSC_VER */ #define COMPILE_TIME_ASSERT(boolexp) \ do { \ struct { \ unsigned int compile_time_assert : (boolexp) ? 1 : -1; \ } compile_time_assert; \ (void)compile_time_assert; \ } while (0) #endif /* _MSC_VER */ static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp8_extracfg *vp8_cfg, int finalize) { RANGE_CHECK(cfg, g_w, 1, 16383); /* 14 bits available */ RANGE_CHECK(cfg, g_h, 1, 16383); /* 14 bits available */ RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); RANGE_CHECK_HI(cfg, g_threads, 64); #if CONFIG_REALTIME_ONLY RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #elif CONFIG_MULTI_RES_ENCODING if (ctx->base.enc.total_encoders > 1) RANGE_CHECK_HI(cfg, g_lag_in_frames, 0); #else RANGE_CHECK_HI(cfg, g_lag_in_frames, 25); #endif RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q); RANGE_CHECK_HI(cfg, rc_undershoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_overshoot_pct, 1000); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); /* TODO: add spatial re-sampling support and frame dropping in * multi-res-encoder.*/ #if CONFIG_MULTI_RES_ENCODING if (ctx->base.enc.total_encoders > 1) RANGE_CHECK_HI(cfg, rc_resize_allowed, 0); #else RANGE_CHECK_BOOL(cfg, rc_resize_allowed); #endif RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); #if CONFIG_REALTIME_ONLY RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); #elif CONFIG_MULTI_RES_ENCODING if (ctx->base.enc.total_encoders > 1) RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); #else RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); #endif /* VP8 does not support a lower bound on the keyframe interval in * automatic keyframe placement mode. */ if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist && cfg->kf_min_dist > 0) ERROR( "kf_min_dist not supported in auto mode, use 0 " "or kf_max_dist instead."); RANGE_CHECK_BOOL(vp8_cfg, enable_auto_alt_ref); RANGE_CHECK(vp8_cfg, cpu_used, -16, 16); #if CONFIG_REALTIME_ONLY && !CONFIG_TEMPORAL_DENOISING RANGE_CHECK(vp8_cfg, noise_sensitivity, 0, 0); #else RANGE_CHECK_HI(vp8_cfg, noise_sensitivity, 6); #endif RANGE_CHECK(vp8_cfg, token_partitions, VP8_ONE_TOKENPARTITION, VP8_EIGHT_TOKENPARTITION); RANGE_CHECK_HI(vp8_cfg, Sharpness, 7); RANGE_CHECK(vp8_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(vp8_cfg, arnr_strength, 6); RANGE_CHECK(vp8_cfg, arnr_type, 1, 3); RANGE_CHECK(vp8_cfg, cq_level, 0, 63); RANGE_CHECK_HI(vp8_cfg, screen_content_mode, 2); if (finalize && (cfg->rc_end_usage == VPX_CQ || cfg->rc_end_usage == VPX_Q)) RANGE_CHECK(vp8_cfg, cq_level, cfg->rc_min_quantizer, cfg->rc_max_quantizer); #if !(CONFIG_REALTIME_ONLY) if (cfg->g_pass == VPX_RC_LAST_PASS) { size_t packet_sz = sizeof(FIRSTPASS_STATS); int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz); FIRSTPASS_STATS *stats; if (!cfg->rc_twopass_stats_in.buf) ERROR("rc_twopass_stats_in.buf not set."); if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) ERROR("rc_twopass_stats_in requires at least two packets."); stats = (void *)((char *)cfg->rc_twopass_stats_in.buf + (n_packets - 1) * packet_sz); if ((int)(stats->count + 0.5) != n_packets - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } #endif RANGE_CHECK(cfg, ts_number_layers, 1, 5); if (cfg->ts_number_layers > 1) { unsigned int i; RANGE_CHECK_HI(cfg, ts_periodicity, 16); for (i = 1; i < cfg->ts_number_layers; ++i) { if (cfg->ts_target_bitrate[i] <= cfg->ts_target_bitrate[i - 1] && cfg->rc_target_bitrate > 0) ERROR("ts_target_bitrate entries are not strictly increasing"); } RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); for (i = cfg->ts_number_layers - 2; i > 0; i--) { if (cfg->ts_rate_decimator[i - 1] != 2 * cfg->ts_rate_decimator[i]) ERROR("ts_rate_decimator factors are not powers of 2"); } RANGE_CHECK_HI(cfg, ts_layer_id[i], cfg->ts_number_layers - 1); } #if (CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING) if (cfg->g_threads > (1 << vp8_cfg->token_partitions)) ERROR("g_threads cannot be bigger than number of token partitions"); #endif return VPX_CODEC_OK; } static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: break; default: ERROR("Invalid image format. Only YV12 and I420 images are supported"); } if ((img->d_w != ctx->cfg.g_w) || (img->d_h != ctx->cfg.g_h)) ERROR("Image size must match encoder init configuration size"); return VPX_CODEC_OK; } static vpx_codec_err_t set_vp8e_config(VP8_CONFIG *oxcf, vpx_codec_enc_cfg_t cfg, struct vp8_extracfg vp8_cfg, vpx_codec_priv_enc_mr_cfg_t *mr_cfg) { oxcf->multi_threaded = cfg.g_threads; oxcf->Version = cfg.g_profile; oxcf->Width = cfg.g_w; oxcf->Height = cfg.g_h; oxcf->timebase = cfg.g_timebase; oxcf->error_resilient_mode = cfg.g_error_resilient; switch (cfg.g_pass) { case VPX_RC_ONE_PASS: oxcf->Mode = MODE_BESTQUALITY; break; case VPX_RC_FIRST_PASS: oxcf->Mode = MODE_FIRSTPASS; break; case VPX_RC_LAST_PASS: oxcf->Mode = MODE_SECONDPASS_BEST; break; } if (cfg.g_pass == VPX_RC_FIRST_PASS || cfg.g_pass == VPX_RC_ONE_PASS) { oxcf->allow_lag = 0; oxcf->lag_in_frames = 0; } else { oxcf->allow_lag = (cfg.g_lag_in_frames) > 0; oxcf->lag_in_frames = cfg.g_lag_in_frames; } oxcf->allow_df = (cfg.rc_dropframe_thresh > 0); oxcf->drop_frames_water_mark = cfg.rc_dropframe_thresh; oxcf->allow_spatial_resampling = cfg.rc_resize_allowed; oxcf->resample_up_water_mark = cfg.rc_resize_up_thresh; oxcf->resample_down_water_mark = cfg.rc_resize_down_thresh; if (cfg.rc_end_usage == VPX_VBR) { oxcf->end_usage = USAGE_LOCAL_FILE_PLAYBACK; } else if (cfg.rc_end_usage == VPX_CBR) { oxcf->end_usage = USAGE_STREAM_FROM_SERVER; } else if (cfg.rc_end_usage == VPX_CQ) { oxcf->end_usage = USAGE_CONSTRAINED_QUALITY; } else if (cfg.rc_end_usage == VPX_Q) { oxcf->end_usage = USAGE_CONSTANT_QUALITY; } oxcf->target_bandwidth = cfg.rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = vp8_cfg.rc_max_intra_bitrate_pct; oxcf->gf_cbr_boost_pct = vp8_cfg.gf_cbr_boost_pct; oxcf->best_allowed_q = cfg.rc_min_quantizer; oxcf->worst_allowed_q = cfg.rc_max_quantizer; oxcf->cq_level = vp8_cfg.cq_level; oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg.rc_undershoot_pct; oxcf->over_shoot_pct = cfg.rc_overshoot_pct; oxcf->maximum_buffer_size_in_ms = cfg.rc_buf_sz; oxcf->starting_buffer_level_in_ms = cfg.rc_buf_initial_sz; oxcf->optimal_buffer_level_in_ms = cfg.rc_buf_optimal_sz; oxcf->maximum_buffer_size = cfg.rc_buf_sz; oxcf->starting_buffer_level = cfg.rc_buf_initial_sz; oxcf->optimal_buffer_level = cfg.rc_buf_optimal_sz; oxcf->two_pass_vbrbias = cfg.rc_2pass_vbr_bias_pct; oxcf->two_pass_vbrmin_section = cfg.rc_2pass_vbr_minsection_pct; oxcf->two_pass_vbrmax_section = cfg.rc_2pass_vbr_maxsection_pct; oxcf->auto_key = cfg.kf_mode == VPX_KF_AUTO && cfg.kf_min_dist != cfg.kf_max_dist; oxcf->key_freq = cfg.kf_max_dist; oxcf->number_of_layers = cfg.ts_number_layers; oxcf->periodicity = cfg.ts_periodicity; if (oxcf->number_of_layers > 1) { memcpy(oxcf->target_bitrate, cfg.ts_target_bitrate, sizeof(cfg.ts_target_bitrate)); memcpy(oxcf->rate_decimator, cfg.ts_rate_decimator, sizeof(cfg.ts_rate_decimator)); memcpy(oxcf->layer_id, cfg.ts_layer_id, sizeof(cfg.ts_layer_id)); } #if CONFIG_MULTI_RES_ENCODING /* When mr_cfg is NULL, oxcf->mr_total_resolutions and oxcf->mr_encoder_id * are both memset to 0, which ensures the correct logic under this * situation. */ if (mr_cfg) { oxcf->mr_total_resolutions = mr_cfg->mr_total_resolutions; oxcf->mr_encoder_id = mr_cfg->mr_encoder_id; oxcf->mr_down_sampling_factor.num = mr_cfg->mr_down_sampling_factor.num; oxcf->mr_down_sampling_factor.den = mr_cfg->mr_down_sampling_factor.den; oxcf->mr_low_res_mode_info = mr_cfg->mr_low_res_mode_info; } #else (void)mr_cfg; #endif oxcf->cpu_used = vp8_cfg.cpu_used; oxcf->encode_breakout = vp8_cfg.static_thresh; oxcf->play_alternate = vp8_cfg.enable_auto_alt_ref; oxcf->noise_sensitivity = vp8_cfg.noise_sensitivity; oxcf->Sharpness = vp8_cfg.Sharpness; oxcf->token_partitions = vp8_cfg.token_partitions; oxcf->two_pass_stats_in = cfg.rc_twopass_stats_in; oxcf->output_pkt_list = vp8_cfg.pkt_list; oxcf->arnr_max_frames = vp8_cfg.arnr_max_frames; oxcf->arnr_strength = vp8_cfg.arnr_strength; oxcf->arnr_type = vp8_cfg.arnr_type; oxcf->tuning = vp8_cfg.tuning; oxcf->screen_content_mode = vp8_cfg.screen_content_mode; /* printf("Current VP8 Settings: \n"); printf("target_bandwidth: %d\n", oxcf->target_bandwidth); printf("noise_sensitivity: %d\n", oxcf->noise_sensitivity); printf("Sharpness: %d\n", oxcf->Sharpness); printf("cpu_used: %d\n", oxcf->cpu_used); printf("Mode: %d\n", oxcf->Mode); printf("auto_key: %d\n", oxcf->auto_key); printf("key_freq: %d\n", oxcf->key_freq); printf("end_usage: %d\n", oxcf->end_usage); printf("under_shoot_pct: %d\n", oxcf->under_shoot_pct); printf("over_shoot_pct: %d\n", oxcf->over_shoot_pct); printf("starting_buffer_level: %d\n", oxcf->starting_buffer_level); printf("optimal_buffer_level: %d\n", oxcf->optimal_buffer_level); printf("maximum_buffer_size: %d\n", oxcf->maximum_buffer_size); printf("fixed_q: %d\n", oxcf->fixed_q); printf("worst_allowed_q: %d\n", oxcf->worst_allowed_q); printf("best_allowed_q: %d\n", oxcf->best_allowed_q); printf("allow_spatial_resampling: %d\n", oxcf->allow_spatial_resampling); printf("resample_down_water_mark: %d\n", oxcf->resample_down_water_mark); printf("resample_up_water_mark: %d\n", oxcf->resample_up_water_mark); printf("allow_df: %d\n", oxcf->allow_df); printf("drop_frames_water_mark: %d\n", oxcf->drop_frames_water_mark); printf("two_pass_vbrbias: %d\n", oxcf->two_pass_vbrbias); printf("two_pass_vbrmin_section: %d\n", oxcf->two_pass_vbrmin_section); printf("two_pass_vbrmax_section: %d\n", oxcf->two_pass_vbrmax_section); printf("allow_lag: %d\n", oxcf->allow_lag); printf("lag_in_frames: %d\n", oxcf->lag_in_frames); printf("play_alternate: %d\n", oxcf->play_alternate); printf("Version: %d\n", oxcf->Version); printf("multi_threaded: %d\n", oxcf->multi_threaded); printf("encode_breakout: %d\n", oxcf->encode_breakout); */ return VPX_CODEC_OK; } static vpx_codec_err_t vp8e_set_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) { if (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS) ERROR("Cannot change width or height after initialization"); if ((ctx->cpi->initial_width && (int)cfg->g_w > ctx->cpi->initial_width) || (ctx->cpi->initial_height && (int)cfg->g_h > ctx->cpi->initial_height)) ERROR("Cannot increase width or height larger than their initial values"); } /* Prevent increasing lag_in_frames. This check is stricter than it needs * to be -- the limit is not increasing past the first lag_in_frames * value, but we don't track the initial config, only the last successful * config. */ if ((cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames)) ERROR("Cannot increase lag_in_frames"); res = validate_config(ctx, cfg, &ctx->vp8_cfg, 0); if (!res) { ctx->cfg = *cfg; set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg, NULL); vp8_change_config(ctx->cpi, &ctx->oxcf); } return res; } static vpx_codec_err_t get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = vp8_get_quantizer(ctx->cpi); return VPX_CODEC_OK; } static vpx_codec_err_t get_quantizer64(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = vp8_reverse_trans(vp8_get_quantizer(ctx->cpi)); return VPX_CODEC_OK; } static vpx_codec_err_t update_extracfg(vpx_codec_alg_priv_t *ctx, const struct vp8_extracfg *extra_cfg) { const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg, 0); if (res == VPX_CODEC_OK) { ctx->vp8_cfg = *extra_cfg; set_vp8e_config(&ctx->oxcf, ctx->cfg, ctx->vp8_cfg, NULL); vp8_change_config(ctx->cpi, &ctx->oxcf); } return res; } static vpx_codec_err_t set_cpu_used(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); // Use fastest speed setting (speed 16 or -16) if it's set beyond the range. extra_cfg.cpu_used = VPXMIN(16, extra_cfg.cpu_used); extra_cfg.cpu_used = VPXMAX(-16, extra_cfg.cpu_used); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_noise_sensitivity(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.noise_sensitivity = CAST(VP8E_SET_NOISE_SENSITIVITY, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_sharpness(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.Sharpness = CAST(VP8E_SET_SHARPNESS, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_static_thresh(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_token_partitions(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.token_partitions = CAST(VP8E_SET_TOKEN_PARTITIONS, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_arnr_max_frames(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_arnr_strength(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_arnr_type(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.arnr_type = CAST(VP8E_SET_ARNR_TYPE, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_tuning(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.tuning = CAST(VP8E_SET_TUNING, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_cq_level(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_rc_max_intra_bitrate_pct(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.rc_max_intra_bitrate_pct = CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.gf_cbr_boost_pct = CAST(VP8E_SET_GF_CBR_BOOST_PCT, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t set_screen_content_mode(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp8_extracfg extra_cfg = ctx->vp8_cfg; extra_cfg.screen_content_mode = CAST(VP8E_SET_SCREEN_CONTENT_MODE, args); return update_extracfg(ctx, &extra_cfg); } static vpx_codec_err_t vp8e_mr_alloc_mem(const vpx_codec_enc_cfg_t *cfg, void **mem_loc) { vpx_codec_err_t res = VPX_CODEC_OK; #if CONFIG_MULTI_RES_ENCODING LOWER_RES_FRAME_INFO *shared_mem_loc; int mb_rows = ((cfg->g_w + 15) >> 4); int mb_cols = ((cfg->g_h + 15) >> 4); shared_mem_loc = calloc(1, sizeof(LOWER_RES_FRAME_INFO)); if (!shared_mem_loc) { return VPX_CODEC_MEM_ERROR; } shared_mem_loc->mb_info = calloc(mb_rows * mb_cols, sizeof(LOWER_RES_MB_INFO)); if (!(shared_mem_loc->mb_info)) { free(shared_mem_loc); res = VPX_CODEC_MEM_ERROR; } else { *mem_loc = (void *)shared_mem_loc; res = VPX_CODEC_OK; } #else (void)cfg; (void)mem_loc; #endif return res; } static vpx_codec_err_t vp8e_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *mr_cfg) { vpx_codec_err_t res = VPX_CODEC_OK; vp8_rtcd(); vpx_dsp_rtcd(); vpx_scale_rtcd(); if (!ctx->priv) { struct vpx_codec_alg_priv *priv = (struct vpx_codec_alg_priv *)vpx_calloc(1, sizeof(*priv)); if (!priv) { return VPX_CODEC_MEM_ERROR; } ctx->priv = (vpx_codec_priv_t *)priv; ctx->priv->init_flags = ctx->init_flags; if (ctx->config.enc) { /* Update the reference to the config structure to an * internal copy. */ priv->cfg = *ctx->config.enc; ctx->config.enc = &priv->cfg; } priv->vp8_cfg = default_extracfg; priv->vp8_cfg.pkt_list = &priv->pkt_list.head; priv->cx_data_sz = priv->cfg.g_w * priv->cfg.g_h * 3 / 2 * 2; if (priv->cx_data_sz < 32768) priv->cx_data_sz = 32768; priv->cx_data = malloc(priv->cx_data_sz); if (!priv->cx_data) { return VPX_CODEC_MEM_ERROR; } if (mr_cfg) { ctx->priv->enc.total_encoders = mr_cfg->mr_total_resolutions; } else { ctx->priv->enc.total_encoders = 1; } once(vp8_initialize_enc); res = validate_config(priv, &priv->cfg, &priv->vp8_cfg, 0); if (!res) { priv->pts_offset_initialized = 0; priv->timestamp_ratio.den = priv->cfg.g_timebase.den; priv->timestamp_ratio.num = (int64_t)priv->cfg.g_timebase.num; priv->timestamp_ratio.num *= TICKS_PER_SEC; reduce_ratio(&priv->timestamp_ratio); set_vp8e_config(&priv->oxcf, priv->cfg, priv->vp8_cfg, mr_cfg); priv->cpi = vp8_create_compressor(&priv->oxcf); if (!priv->cpi) res = VPX_CODEC_MEM_ERROR; } } return res; } static vpx_codec_err_t vp8e_destroy(vpx_codec_alg_priv_t *ctx) { #if CONFIG_MULTI_RES_ENCODING /* Free multi-encoder shared memory */ if (ctx->oxcf.mr_total_resolutions > 0 && (ctx->oxcf.mr_encoder_id == ctx->oxcf.mr_total_resolutions - 1)) { LOWER_RES_FRAME_INFO *shared_mem_loc = (LOWER_RES_FRAME_INFO *)ctx->oxcf.mr_low_res_mode_info; free(shared_mem_loc->mb_info); free(ctx->oxcf.mr_low_res_mode_info); } #endif free(ctx->cx_data); vp8_remove_compressor(&ctx->cpi); vpx_free(ctx); return VPX_CODEC_OK; } static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, YV12_BUFFER_CONFIG *yv12) { const int y_w = img->d_w; const int y_h = img->d_h; const int uv_w = (img->d_w + 1) / 2; const int uv_h = (img->d_h + 1) / 2; vpx_codec_err_t res = VPX_CODEC_OK; yv12->y_buffer = img->planes[VPX_PLANE_Y]; yv12->u_buffer = img->planes[VPX_PLANE_U]; yv12->v_buffer = img->planes[VPX_PLANE_V]; yv12->y_crop_width = y_w; yv12->y_crop_height = y_h; yv12->y_width = y_w; yv12->y_height = y_h; yv12->uv_crop_width = uv_w; yv12->uv_crop_height = uv_h; yv12->uv_width = uv_w; yv12->uv_height = uv_h; yv12->y_stride = img->stride[VPX_PLANE_Y]; yv12->uv_stride = img->stride[VPX_PLANE_U]; yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; return res; } static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, unsigned long duration, unsigned long deadline) { int new_qc; #if !(CONFIG_REALTIME_ONLY) /* Use best quality mode if no deadline is given. */ new_qc = MODE_BESTQUALITY; if (deadline) { /* Convert duration parameter from stream timebase to microseconds */ uint64_t duration_us; COMPILE_TIME_ASSERT(TICKS_PER_SEC > 1000000 && (TICKS_PER_SEC % 1000000) == 0); duration_us = duration * (uint64_t)ctx->timestamp_ratio.num / (ctx->timestamp_ratio.den * (TICKS_PER_SEC / 1000000)); /* If the deadline is more that the duration this frame is to be shown, * use good quality mode. Otherwise use realtime mode. */ new_qc = (deadline > duration_us) ? MODE_GOODQUALITY : MODE_REALTIME; } #else (void)duration; new_qc = MODE_REALTIME; #endif if (deadline == VPX_DL_REALTIME) { new_qc = MODE_REALTIME; } else if (ctx->cfg.g_pass == VPX_RC_FIRST_PASS) { new_qc = MODE_FIRSTPASS; } else if (ctx->cfg.g_pass == VPX_RC_LAST_PASS) { new_qc = (new_qc == MODE_BESTQUALITY) ? MODE_SECONDPASS_BEST : MODE_SECONDPASS; } if (ctx->oxcf.Mode != new_qc) { ctx->oxcf.Mode = new_qc; vp8_change_config(ctx->cpi, &ctx->oxcf); } } static vpx_codec_err_t set_reference_and_update(vpx_codec_alg_priv_t *ctx, vpx_enc_frame_flags_t flags) { /* Handle Flags */ if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { ctx->base.err_detail = "Conflicting flags."; return VPX_CODEC_INVALID_PARAM; } if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) { int ref = 7; if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP8_LAST_FRAME; if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP8_GOLD_FRAME; if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP8_ALTR_FRAME; vp8_use_as_reference(ctx->cpi, ref); } if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) { int upd = 7; if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP8_LAST_FRAME; if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP8_GOLD_FRAME; if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP8_ALTR_FRAME; vp8_update_reference(ctx->cpi, upd); } if (flags & VP8_EFLAG_NO_UPD_ENTROPY) { vp8_update_entropy(ctx->cpi, 0); } return VPX_CODEC_OK; } static vpx_codec_err_t vp8e_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { volatile vpx_codec_err_t res = VPX_CODEC_OK; // Make a copy as volatile to avoid -Wclobbered with longjmp. volatile vpx_enc_frame_flags_t flags = enc_flags; volatile vpx_codec_pts_t pts_val = pts; if (!ctx->cfg.rc_target_bitrate) { #if CONFIG_MULTI_RES_ENCODING if (!ctx->cpi) return VPX_CODEC_ERROR; if (ctx->cpi->oxcf.mr_total_resolutions > 1) { LOWER_RES_FRAME_INFO *low_res_frame_info = (LOWER_RES_FRAME_INFO *)ctx->cpi->oxcf.mr_low_res_mode_info; if (!low_res_frame_info) return VPX_CODEC_ERROR; low_res_frame_info->skip_encoding_prev_stream = 1; if (ctx->cpi->oxcf.mr_encoder_id == 0) low_res_frame_info->skip_encoding_base_stream = 1; } #endif return res; } if (img) res = validate_img(ctx, img); if (!res) res = validate_config(ctx, &ctx->cfg, &ctx->vp8_cfg, 1); if (!ctx->pts_offset_initialized) { ctx->pts_offset = pts_val; ctx->pts_offset_initialized = 1; } pts_val -= ctx->pts_offset; pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); // If no flags are set in the encode call, then use the frame flags as // defined via the control function: vp8e_set_frame_flags. if (!flags) { flags = ctx->control_frame_flags; } ctx->control_frame_flags = 0; if (!res) res = set_reference_and_update(ctx, flags); /* Handle fixed keyframe intervals */ if (ctx->cfg.kf_mode == VPX_KF_AUTO && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) { flags |= VPX_EFLAG_FORCE_KF; ctx->fixed_kf_cntr = 1; } } if (setjmp(ctx->cpi->common.error.jmp)) { ctx->cpi->common.error.setjmp = 0; vpx_clear_system_state(); return VPX_CODEC_CORRUPT_FRAME; } /* Initialize the encoder instance on the first frame*/ if (!res && ctx->cpi) { unsigned int lib_flags; YV12_BUFFER_CONFIG sd; int64_t dst_time_stamp, dst_end_time_stamp; size_t size, cx_data_sz; unsigned char *cx_data; unsigned char *cx_data_end; int comp_data_state = 0; /* Set up internal flags */ if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) { ((VP8_COMP *)ctx->cpi)->b_calculate_psnr = 1; } if (ctx->base.init_flags & VPX_CODEC_USE_OUTPUT_PARTITION) { ((VP8_COMP *)ctx->cpi)->output_partition = 1; } /* Convert API flags to internal codec lib flags */ lib_flags = (flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; dst_time_stamp = pts_val * ctx->timestamp_ratio.num / ctx->timestamp_ratio.den; dst_end_time_stamp = (pts_val + (int64_t)duration) * ctx->timestamp_ratio.num / ctx->timestamp_ratio.den; if (img != NULL) { res = image2yuvconfig(img, &sd); if (vp8_receive_raw_frame(ctx->cpi, ctx->next_frame_flag | lib_flags, &sd, dst_time_stamp, dst_end_time_stamp)) { VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; res = update_error_state(ctx, &cpi->common.error); } /* reset for next frame */ ctx->next_frame_flag = 0; } cx_data = ctx->cx_data; cx_data_sz = ctx->cx_data_sz; cx_data_end = ctx->cx_data + cx_data_sz; lib_flags = 0; ctx->cpi->common.error.setjmp = 1; while (cx_data_sz >= ctx->cx_data_sz / 2) { comp_data_state = vp8_get_compressed_data( ctx->cpi, &lib_flags, &size, cx_data, cx_data_end, &dst_time_stamp, &dst_end_time_stamp, !img); if (comp_data_state == VPX_CODEC_CORRUPT_FRAME) { return VPX_CODEC_CORRUPT_FRAME; } else if (comp_data_state == -1) { break; } if (size) { vpx_codec_pts_t round, delta; vpx_codec_cx_pkt_t pkt; VP8_COMP *cpi = (VP8_COMP *)ctx->cpi; /* Add the frame packet to the list of returned packets. */ round = (vpx_codec_pts_t)ctx->timestamp_ratio.num / 2; if (round > 0) --round; delta = (dst_end_time_stamp - dst_time_stamp); pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = (dst_time_stamp * ctx->timestamp_ratio.den + round) / ctx->timestamp_ratio.num + ctx->pts_offset; pkt.data.frame.duration = (unsigned long)((delta * ctx->timestamp_ratio.den + round) / ctx->timestamp_ratio.num); pkt.data.frame.flags = lib_flags << 16; pkt.data.frame.width[0] = cpi->common.Width; pkt.data.frame.height[0] = cpi->common.Height; pkt.data.frame.spatial_layer_encoded[0] = 1; if (lib_flags & FRAMEFLAGS_KEY) { pkt.data.frame.flags |= VPX_FRAME_IS_KEY; } if (!cpi->common.show_frame) { pkt.data.frame.flags |= VPX_FRAME_IS_INVISIBLE; /* This timestamp should be as close as possible to the * prior PTS so that if a decoder uses pts to schedule when * to do this, we start right after last frame was decoded. * Invisible frames have no duration. */ pkt.data.frame.pts = ((cpi->last_time_stamp_seen * ctx->timestamp_ratio.den + round) / ctx->timestamp_ratio.num) + ctx->pts_offset + 1; pkt.data.frame.duration = 0; } if (cpi->droppable) pkt.data.frame.flags |= VPX_FRAME_IS_DROPPABLE; if (cpi->output_partition) { int i; const int num_partitions = (1 << cpi->common.multi_token_partition) + 1; pkt.data.frame.flags |= VPX_FRAME_IS_FRAGMENT; for (i = 0; i < num_partitions; ++i) { #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING pkt.data.frame.buf = cpi->partition_d[i]; #else pkt.data.frame.buf = cx_data; cx_data += cpi->partition_sz[i]; cx_data_sz -= cpi->partition_sz[i]; #endif pkt.data.frame.sz = cpi->partition_sz[i]; pkt.data.frame.partition_id = i; /* don't set the fragment bit for the last partition */ if (i == (num_partitions - 1)) { pkt.data.frame.flags &= ~VPX_FRAME_IS_FRAGMENT; } vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); } #if CONFIG_REALTIME_ONLY & CONFIG_ONTHEFLY_BITPACKING /* In lagged mode the encoder can buffer multiple frames. * We don't want this in partitioned output because * partitions are spread all over the output buffer. * So, force an exit! */ cx_data_sz -= ctx->cx_data_sz / 2; #endif } else { pkt.data.frame.buf = cx_data; pkt.data.frame.sz = size; pkt.data.frame.partition_id = -1; vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); cx_data += size; cx_data_sz -= size; } } } } return res; } static const vpx_codec_cx_pkt_t *vp8e_get_cxdata(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); } static vpx_codec_err_t vp8e_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); vp8_set_reference(ctx->cpi, frame->frame_type, &sd); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8e_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); vp8_get_reference(ctx->cpi, frame->frame_type, &sd); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8e_set_previewpp(vpx_codec_alg_priv_t *ctx, va_list args) { #if CONFIG_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); if (data) { ctx->preview_ppcfg = *((vp8_postproc_cfg_t *)data); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } #else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; #endif } static vpx_image_t *vp8e_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; vp8_ppflags_t flags; vp8_zero(flags); if (ctx->preview_ppcfg.post_proc_flag) { flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; flags.noise_level = ctx->preview_ppcfg.noise_level; } if (0 == vp8_get_preview_raw_frame(ctx->cpi, &sd, &flags)) { /* vpx_img_wrap(&ctx->preview_img, VPX_IMG_FMT_YV12, sd.y_width + 2*VP8BORDERINPIXELS, sd.y_height + 2*VP8BORDERINPIXELS, 1, sd.buffer_alloc); vpx_img_set_rect(&ctx->preview_img, VP8BORDERINPIXELS, VP8BORDERINPIXELS, sd.y_width, sd.y_height); */ ctx->preview_img.bps = 12; ctx->preview_img.planes[VPX_PLANE_Y] = sd.y_buffer; ctx->preview_img.planes[VPX_PLANE_U] = sd.u_buffer; ctx->preview_img.planes[VPX_PLANE_V] = sd.v_buffer; ctx->preview_img.fmt = VPX_IMG_FMT_I420; ctx->preview_img.x_chroma_shift = 1; ctx->preview_img.y_chroma_shift = 1; ctx->preview_img.d_w = sd.y_width; ctx->preview_img.d_h = sd.y_height; ctx->preview_img.stride[VPX_PLANE_Y] = sd.y_stride; ctx->preview_img.stride[VPX_PLANE_U] = sd.uv_stride; ctx->preview_img.stride[VPX_PLANE_V] = sd.uv_stride; ctx->preview_img.w = sd.y_width; ctx->preview_img.h = sd.y_height; return &ctx->preview_img; } else { return NULL; } } static vpx_codec_err_t vp8e_set_frame_flags(vpx_codec_alg_priv_t *ctx, va_list args) { int frame_flags = va_arg(args, int); ctx->control_frame_flags = frame_flags; return set_reference_and_update(ctx, frame_flags); } static vpx_codec_err_t vp8e_set_temporal_layer_id(vpx_codec_alg_priv_t *ctx, va_list args) { int layer_id = va_arg(args, int); if (layer_id < 0 || layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } ctx->cpi->temporal_layer_id = layer_id; return VPX_CODEC_OK; } static vpx_codec_err_t vp8e_set_roi_map(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); if (data) { vpx_roi_map_t *roi = (vpx_roi_map_t *)data; if (!vp8_set_roimap(ctx->cpi, roi->roi_map, roi->rows, roi->cols, roi->delta_q, roi->delta_lf, roi->static_threshold)) { return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8e_set_activemap(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_active_map_t *data = va_arg(args, vpx_active_map_t *); if (data) { vpx_active_map_t *map = (vpx_active_map_t *)data; if (!vp8_set_active_map(ctx->cpi, map->active_map, map->rows, map->cols)) { return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8e_set_scalemode(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_scaling_mode_t *data = va_arg(args, vpx_scaling_mode_t *); if (data) { int res; vpx_scaling_mode_t scalemode = *(vpx_scaling_mode_t *)data; res = vp8_set_internal_size(ctx->cpi, (VPX_SCALING)scalemode.h_scaling_mode, (VPX_SCALING)scalemode.v_scaling_mode); if (!res) { /*force next frame a key frame to effect scaling mode */ ctx->next_frame_flag |= FRAMEFLAGS_KEY; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_ctrl_fn_map_t vp8e_ctf_maps[] = { { VP8_SET_REFERENCE, vp8e_set_reference }, { VP8_COPY_REFERENCE, vp8e_get_reference }, { VP8_SET_POSTPROC, vp8e_set_previewpp }, { VP8E_SET_FRAME_FLAGS, vp8e_set_frame_flags }, { VP8E_SET_TEMPORAL_LAYER_ID, vp8e_set_temporal_layer_id }, { VP8E_SET_ROI_MAP, vp8e_set_roi_map }, { VP8E_SET_ACTIVEMAP, vp8e_set_activemap }, { VP8E_SET_SCALEMODE, vp8e_set_scalemode }, { VP8E_SET_CPUUSED, set_cpu_used }, { VP8E_SET_NOISE_SENSITIVITY, set_noise_sensitivity }, { VP8E_SET_ENABLEAUTOALTREF, set_enable_auto_alt_ref }, { VP8E_SET_SHARPNESS, set_sharpness }, { VP8E_SET_STATIC_THRESHOLD, set_static_thresh }, { VP8E_SET_TOKEN_PARTITIONS, set_token_partitions }, { VP8E_GET_LAST_QUANTIZER, get_quantizer }, { VP8E_GET_LAST_QUANTIZER_64, get_quantizer64 }, { VP8E_SET_ARNR_MAXFRAMES, set_arnr_max_frames }, { VP8E_SET_ARNR_STRENGTH, set_arnr_strength }, { VP8E_SET_ARNR_TYPE, set_arnr_type }, { VP8E_SET_TUNING, set_tuning }, { VP8E_SET_CQ_LEVEL, set_cq_level }, { VP8E_SET_MAX_INTRA_BITRATE_PCT, set_rc_max_intra_bitrate_pct }, { VP8E_SET_SCREEN_CONTENT_MODE, set_screen_content_mode }, { VP8E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct }, { -1, NULL }, }; static vpx_codec_enc_cfg_map_t vp8e_usage_cfg_map[] = { { 0, { 0, /* g_usage (unused) */ 0, /* g_threads */ 0, /* g_profile */ 320, /* g_width */ 240, /* g_height */ VPX_BITS_8, /* g_bit_depth */ 8, /* g_input_bit_depth */ { 1, 30 }, /* g_timebase */ 0, /* g_error_resilient */ VPX_RC_ONE_PASS, /* g_pass */ 0, /* g_lag_in_frames */ 0, /* rc_dropframe_thresh */ 0, /* rc_resize_allowed */ 1, /* rc_scaled_width */ 1, /* rc_scaled_height */ 60, /* rc_resize_down_thresold */ 30, /* rc_resize_up_thresold */ VPX_VBR, /* rc_end_usage */ { NULL, 0 }, /* rc_twopass_stats_in */ { NULL, 0 }, /* rc_firstpass_mb_stats_in */ 256, /* rc_target_bandwidth */ 4, /* rc_min_quantizer */ 63, /* rc_max_quantizer */ 100, /* rc_undershoot_pct */ 100, /* rc_overshoot_pct */ 6000, /* rc_max_buffer_size */ 4000, /* rc_buffer_initial_size; */ 5000, /* rc_buffer_optimal_size; */ 50, /* rc_two_pass_vbrbias */ 0, /* rc_two_pass_vbrmin_section */ 400, /* rc_two_pass_vbrmax_section */ 0, // rc_2pass_vbr_corpus_complexity (only has meaningfull for VP9) /* keyframing settings (kf) */ VPX_KF_AUTO, /* g_kfmode*/ 0, /* kf_min_dist */ 128, /* kf_max_dist */ VPX_SS_DEFAULT_LAYERS, /* ss_number_layers */ { 0 }, { 0 }, /* ss_target_bitrate */ 1, /* ts_number_layers */ { 0 }, /* ts_target_bitrate */ { 0 }, /* ts_rate_decimator */ 0, /* ts_periodicity */ { 0 }, /* ts_layer_id */ { 0 }, /* layer_target_bitrate */ 0 /* temporal_layering_mode */ } }, }; #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp8_cx) = { "WebM Project VP8 Encoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR | VPX_CODEC_CAP_OUTPUT_PARTITION, /* vpx_codec_caps_t caps; */ vp8e_init, /* vpx_codec_init_fn_t init; */ vp8e_destroy, /* vpx_codec_destroy_fn_t destroy; */ vp8e_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ { NULL, /* vpx_codec_peek_si_fn_t peek_si; */ NULL, /* vpx_codec_get_si_fn_t get_si; */ NULL, /* vpx_codec_decode_fn_t decode; */ NULL, /* vpx_codec_frame_get_fn_t frame_get; */ NULL, /* vpx_codec_set_fb_fn_t set_fb_fn; */ }, { 1, /* 1 cfg map */ vp8e_usage_cfg_map, /* vpx_codec_enc_cfg_map_t cfg_maps; */ vp8e_encode, /* vpx_codec_encode_fn_t encode; */ vp8e_get_cxdata, /* vpx_codec_get_cx_data_fn_t get_cx_data; */ vp8e_set_config, NULL, vp8e_get_preview, vp8e_mr_alloc_mem, } /* encoder functions */ }; libvpx-1.8.2/vp8/vp8_dx_iface.c000066400000000000000000000542031357355204000162560ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vp8_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx/vpx_decoder.h" #include "vpx/vp8dx.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #include "common/alloccommon.h" #include "common/common.h" #include "common/onyxd.h" #include "decoder/onyxd_int.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #if CONFIG_ERROR_CONCEALMENT #include "decoder/error_concealment.h" #endif #include "decoder/decoderthreading.h" #define VP8_CAP_POSTPROC (CONFIG_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) #define VP8_CAP_ERROR_CONCEALMENT \ (CONFIG_ERROR_CONCEALMENT ? VPX_CODEC_CAP_ERROR_CONCEALMENT : 0) typedef vpx_codec_stream_info_t vp8_stream_info_t; /* Structures for handling memory allocations */ typedef enum { VP8_SEG_ALG_PRIV = 256, VP8_SEG_MAX } mem_seg_id_t; #define NELEMENTS(x) ((int)(sizeof(x) / sizeof((x)[0]))) struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp8_stream_info_t si; int decoder_init; #if CONFIG_MULTITHREAD // Restart threads on next frame if set to 1. // This is set when error happens in multithreaded decoding and all threads // are shut down. int restart_threads; #endif int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; int img_setup; struct frame_buffers yv12_frame_buffers; void *user_priv; FRAGMENT_DATA fragments; }; static int vp8_init_ctx(vpx_codec_ctx_t *ctx) { vpx_codec_alg_priv_t *priv = (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); if (!priv) return 1; ctx->priv = (vpx_codec_priv_t *)priv; ctx->priv->init_flags = ctx->init_flags; priv->si.sz = sizeof(priv->si); priv->decrypt_cb = NULL; priv->decrypt_state = NULL; if (ctx->config.dec) { /* Update the reference to the config structure to an internal copy. */ priv->cfg = *ctx->config.dec; ctx->config.dec = &priv->cfg; } return 0; } static vpx_codec_err_t vp8_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { vpx_codec_err_t res = VPX_CODEC_OK; (void)data; vp8_rtcd(); vpx_dsp_rtcd(); vpx_scale_rtcd(); /* This function only allocates space for the vpx_codec_alg_priv_t * structure. More memory may be required at the time the stream * information becomes known. */ if (!ctx->priv) { vpx_codec_alg_priv_t *priv; if (vp8_init_ctx(ctx)) return VPX_CODEC_MEM_ERROR; priv = (vpx_codec_alg_priv_t *)ctx->priv; /* initialize number of fragments to zero */ priv->fragments.count = 0; /* is input fragments enabled? */ priv->fragments.enabled = (priv->base.init_flags & VPX_CODEC_USE_INPUT_FRAGMENTS); /*post processing level initialized to do nothing */ } return res; } static vpx_codec_err_t vp8_destroy(vpx_codec_alg_priv_t *ctx) { vp8_remove_decoder_instances(&ctx->yv12_frame_buffers); vpx_free(ctx); return VPX_CODEC_OK; } static vpx_codec_err_t vp8_peek_si_internal(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { vpx_codec_err_t res = VPX_CODEC_OK; assert(data != NULL); if (data + data_sz <= data) { res = VPX_CODEC_INVALID_PARAM; } else { /* Parse uncompresssed part of key frame header. * 3 bytes:- including version, frame type and an offset * 3 bytes:- sync code (0x9d, 0x01, 0x2a) * 4 bytes:- including image width and height in the lowest 14 bits * of each 2-byte value. */ uint8_t clear_buffer[10]; const uint8_t *clear = data; if (decrypt_cb) { int n = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, n); clear = clear_buffer; } si->is_kf = 0; if (data_sz >= 10 && !(clear[0] & 0x01)) { /* I-Frame */ si->is_kf = 1; /* vet via sync code */ if (clear[3] != 0x9d || clear[4] != 0x01 || clear[5] != 0x2a) { return VPX_CODEC_UNSUP_BITSTREAM; } si->w = (clear[6] | (clear[7] << 8)) & 0x3fff; si->h = (clear[8] | (clear[9] << 8)) & 0x3fff; /*printf("w=%d, h=%d\n", si->w, si->h);*/ if (!(si->h && si->w)) res = VPX_CODEC_CORRUPT_FRAME; } else { res = VPX_CODEC_UNSUP_BITSTREAM; } } return res; } static vpx_codec_err_t vp8_peek_si(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si) { return vp8_peek_si_internal(data, data_sz, si, NULL, NULL); } static vpx_codec_err_t vp8_get_si(vpx_codec_alg_priv_t *ctx, vpx_codec_stream_info_t *si) { unsigned int sz; if (si->sz >= sizeof(vp8_stream_info_t)) { sz = sizeof(vp8_stream_info_t); } else { sz = sizeof(vpx_codec_stream_info_t); } memcpy(si, &ctx->si, sz); si->sz = sz; return VPX_CODEC_OK; } static vpx_codec_err_t update_error_state( vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { vpx_codec_err_t res; if ((res = error->error_code)) { ctx->base.err_detail = error->has_detail ? error->detail : NULL; } return res; } static void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, void *user_priv) { /** vpx_img_wrap() doesn't allow specifying independent strides for * the Y, U, and V planes, nor other alignment adjustments that * might be representable by a YV12_BUFFER_CONFIG, so we just * initialize all the fields.*/ img->fmt = VPX_IMG_FMT_I420; img->w = yv12->y_stride; img->h = (yv12->y_height + 2 * VP8BORDERINPIXELS + 15) & ~15; img->d_w = img->r_w = yv12->y_width; img->d_h = img->r_h = yv12->y_height; img->x_chroma_shift = 1; img->y_chroma_shift = 1; img->planes[VPX_PLANE_Y] = yv12->y_buffer; img->planes[VPX_PLANE_U] = yv12->u_buffer; img->planes[VPX_PLANE_V] = yv12->v_buffer; img->planes[VPX_PLANE_ALPHA] = NULL; img->stride[VPX_PLANE_Y] = yv12->y_stride; img->stride[VPX_PLANE_U] = yv12->uv_stride; img->stride[VPX_PLANE_V] = yv12->uv_stride; img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; img->bit_depth = 8; img->bps = 12; img->user_priv = user_priv; img->img_data = yv12->buffer_alloc; img->img_data_owner = 0; img->self_allocd = 0; } static int update_fragments(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, volatile vpx_codec_err_t *res) { *res = VPX_CODEC_OK; if (ctx->fragments.count == 0) { /* New frame, reset fragment pointers and sizes */ memset((void *)ctx->fragments.ptrs, 0, sizeof(ctx->fragments.ptrs)); memset(ctx->fragments.sizes, 0, sizeof(ctx->fragments.sizes)); } if (ctx->fragments.enabled && !(data == NULL && data_sz == 0)) { /* Store a pointer to this fragment and return. We haven't * received the complete frame yet, so we will wait with decoding. */ ctx->fragments.ptrs[ctx->fragments.count] = data; ctx->fragments.sizes[ctx->fragments.count] = data_sz; ctx->fragments.count++; if (ctx->fragments.count > (1 << EIGHT_PARTITION) + 1) { ctx->fragments.count = 0; *res = VPX_CODEC_INVALID_PARAM; return -1; } return 0; } if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) { return 0; } if (!ctx->fragments.enabled) { ctx->fragments.ptrs[0] = data; ctx->fragments.sizes[0] = data_sz; ctx->fragments.count = 1; } return 1; } static vpx_codec_err_t vp8_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { volatile vpx_codec_err_t res; volatile unsigned int resolution_change = 0; unsigned int w, h; if (!ctx->fragments.enabled && (data == NULL && data_sz == 0)) { return 0; } /* Update the input fragment data */ if (update_fragments(ctx, data, data_sz, &res) <= 0) return res; /* Determine the stream parameters. Note that we rely on peek_si to * validate that we have a buffer that does not wrap around the top * of the heap. */ w = ctx->si.w; h = ctx->si.h; res = vp8_peek_si_internal(ctx->fragments.ptrs[0], ctx->fragments.sizes[0], &ctx->si, ctx->decrypt_cb, ctx->decrypt_state); if ((res == VPX_CODEC_UNSUP_BITSTREAM) && !ctx->si.is_kf) { /* the peek function returns an error for non keyframes, however for * this case, it is not an error */ res = VPX_CODEC_OK; } if (!ctx->decoder_init && !ctx->si.is_kf) res = VPX_CODEC_UNSUP_BITSTREAM; if ((ctx->si.h != h) || (ctx->si.w != w)) resolution_change = 1; #if CONFIG_MULTITHREAD if (!res && ctx->restart_threads) { struct frame_buffers *fb = &ctx->yv12_frame_buffers; VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; VP8_COMMON *const pc = &pbi->common; if (setjmp(pbi->common.error.jmp)) { vp8_remove_decoder_instances(fb); vp8_zero(fb->pbi); vpx_clear_system_state(); return VPX_CODEC_ERROR; } pbi->common.error.setjmp = 1; pbi->max_threads = ctx->cfg.threads; vp8_decoder_create_threads(pbi); if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { vp8mt_alloc_temp_buffers(pbi, pc->Width, pc->mb_rows); } ctx->restart_threads = 0; pbi->common.error.setjmp = 0; } #endif /* Initialize the decoder instance on the first frame*/ if (!res && !ctx->decoder_init) { VP8D_CONFIG oxcf; oxcf.Width = ctx->si.w; oxcf.Height = ctx->si.h; oxcf.Version = 9; oxcf.postprocess = 0; oxcf.max_threads = ctx->cfg.threads; oxcf.error_concealment = (ctx->base.init_flags & VPX_CODEC_USE_ERROR_CONCEALMENT); /* If postprocessing was enabled by the application and a * configuration has not been provided, default it. */ if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) { ctx->postproc_cfg.post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK | VP8_MFQE; ctx->postproc_cfg.deblocking_level = 4; ctx->postproc_cfg.noise_level = 0; } res = vp8_create_decoder_instances(&ctx->yv12_frame_buffers, &oxcf); if (res == VPX_CODEC_OK) ctx->decoder_init = 1; } /* Set these even if already initialized. The caller may have changed the * decrypt config between frames. */ if (ctx->decoder_init) { ctx->yv12_frame_buffers.pbi[0]->decrypt_cb = ctx->decrypt_cb; ctx->yv12_frame_buffers.pbi[0]->decrypt_state = ctx->decrypt_state; } if (!res) { VP8D_COMP *pbi = ctx->yv12_frame_buffers.pbi[0]; VP8_COMMON *const pc = &pbi->common; if (resolution_change) { MACROBLOCKD *const xd = &pbi->mb; #if CONFIG_MULTITHREAD int i; #endif pc->Width = ctx->si.w; pc->Height = ctx->si.h; { int prev_mb_rows = pc->mb_rows; if (setjmp(pbi->common.error.jmp)) { pbi->common.error.setjmp = 0; /* on failure clear the cached resolution to ensure a full * reallocation is attempted on resync. */ ctx->si.w = 0; ctx->si.h = 0; vpx_clear_system_state(); /* same return value as used in vp8dx_receive_compressed_data */ return -1; } pbi->common.error.setjmp = 1; if (pc->Width <= 0) { pc->Width = w; vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Invalid frame width"); } if (pc->Height <= 0) { pc->Height = h; vpx_internal_error(&pc->error, VPX_CODEC_CORRUPT_FRAME, "Invalid frame height"); } if (vp8_alloc_frame_buffers(pc, pc->Width, pc->Height)) { vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffers"); } xd->pre = pc->yv12_fb[pc->lst_fb_idx]; xd->dst = pc->yv12_fb[pc->new_fb_idx]; #if CONFIG_MULTITHREAD for (i = 0; i < pbi->allocated_decoding_thread_count; ++i) { pbi->mb_row_di[i].mbd.dst = pc->yv12_fb[pc->new_fb_idx]; vp8_build_block_doffsets(&pbi->mb_row_di[i].mbd); } #endif vp8_build_block_doffsets(&pbi->mb); /* allocate memory for last frame MODE_INFO array */ #if CONFIG_ERROR_CONCEALMENT if (pbi->ec_enabled) { /* old prev_mip was released by vp8_de_alloc_frame_buffers() * called in vp8_alloc_frame_buffers() */ pc->prev_mip = vpx_calloc((pc->mb_cols + 1) * (pc->mb_rows + 1), sizeof(MODE_INFO)); if (!pc->prev_mip) { vp8_de_alloc_frame_buffers(pc); vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate" "last frame MODE_INFO array"); } pc->prev_mi = pc->prev_mip + pc->mode_info_stride + 1; if (vp8_alloc_overlap_lists(pbi)) vpx_internal_error(&pc->error, VPX_CODEC_MEM_ERROR, "Failed to allocate overlap lists " "for error concealment"); } #endif #if CONFIG_MULTITHREAD if (vpx_atomic_load_acquire(&pbi->b_multithreaded_rd)) { vp8mt_alloc_temp_buffers(pbi, pc->Width, prev_mb_rows); } #else (void)prev_mb_rows; #endif } pbi->common.error.setjmp = 0; /* required to get past the first get_free_fb() call */ pbi->common.fb_idx_ref_cnt[0] = 0; } if (setjmp(pbi->common.error.jmp)) { /* We do not know if the missing frame(s) was supposed to update * any of the reference buffers, but we act conservative and * mark only the last buffer as corrupted. */ pc->yv12_fb[pc->lst_fb_idx].corrupted = 1; if (pc->fb_idx_ref_cnt[pc->new_fb_idx] > 0) { pc->fb_idx_ref_cnt[pc->new_fb_idx]--; } pc->error.setjmp = 0; #if CONFIG_MULTITHREAD if (pbi->restart_threads) { ctx->si.w = 0; ctx->si.h = 0; ctx->restart_threads = 1; } #endif res = update_error_state(ctx, &pbi->common.error); return res; } pbi->common.error.setjmp = 1; /* update the pbi fragment data */ pbi->fragments = ctx->fragments; #if CONFIG_MULTITHREAD pbi->restart_threads = 0; #endif ctx->user_priv = user_priv; if (vp8dx_receive_compressed_data(pbi, deadline)) { res = update_error_state(ctx, &pbi->common.error); } /* get ready for the next series of fragments */ ctx->fragments.count = 0; } return res; } static vpx_image_t *vp8_get_frame(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; /* iter acts as a flip flop, so an image is only returned on the first * call to get_frame. */ if (!(*iter) && ctx->yv12_frame_buffers.pbi[0]) { YV12_BUFFER_CONFIG sd; int64_t time_stamp = 0, time_end_stamp = 0; vp8_ppflags_t flags; vp8_zero(flags); if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) { flags.post_proc_flag = ctx->postproc_cfg.post_proc_flag; flags.deblocking_level = ctx->postproc_cfg.deblocking_level; flags.noise_level = ctx->postproc_cfg.noise_level; } if (0 == vp8dx_get_raw_frame(ctx->yv12_frame_buffers.pbi[0], &sd, &time_stamp, &time_end_stamp, &flags)) { yuvconfig2image(&ctx->img, &sd, ctx->user_priv); img = &ctx->img; *iter = img; } } return img; } static vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, YV12_BUFFER_CONFIG *yv12) { const int y_w = img->d_w; const int y_h = img->d_h; const int uv_w = (img->d_w + 1) / 2; const int uv_h = (img->d_h + 1) / 2; vpx_codec_err_t res = VPX_CODEC_OK; yv12->y_buffer = img->planes[VPX_PLANE_Y]; yv12->u_buffer = img->planes[VPX_PLANE_U]; yv12->v_buffer = img->planes[VPX_PLANE_V]; yv12->y_crop_width = y_w; yv12->y_crop_height = y_h; yv12->y_width = y_w; yv12->y_height = y_h; yv12->uv_crop_width = uv_w; yv12->uv_crop_height = uv_h; yv12->uv_width = uv_w; yv12->uv_height = uv_h; yv12->y_stride = img->stride[VPX_PLANE_Y]; yv12->uv_stride = img->stride[VPX_PLANE_U]; yv12->border = (img->stride[VPX_PLANE_Y] - img->d_w) / 2; return res; } static vpx_codec_err_t vp8_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); return vp8dx_set_reference(ctx->yv12_frame_buffers.pbi[0], frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); return vp8dx_get_reference(ctx->yv12_frame_buffers.pbi[0], frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8_get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = vp8dx_get_quantizer(ctx->yv12_frame_buffers.pbi[0]); return VPX_CODEC_OK; } static vpx_codec_err_t vp8_set_postproc(vpx_codec_alg_priv_t *ctx, va_list args) { #if CONFIG_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); if (data) { ctx->postproc_cfg_set = 1; ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } #else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; #endif } static vpx_codec_err_t vp8_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, va_list args) { int *update_info = va_arg(args, int *); if (update_info) { VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; *update_info = pbi->common.refresh_alt_ref_frame * (int)VP8_ALTR_FRAME + pbi->common.refresh_golden_frame * (int)VP8_GOLD_FRAME + pbi->common.refresh_last_frame * (int)VP8_LAST_FRAME; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8_get_last_ref_frame(vpx_codec_alg_priv_t *ctx, va_list args) { int *ref_info = va_arg(args, int *); if (ref_info) { VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; VP8_COMMON *oci = &pbi->common; *ref_info = (vp8dx_references_buffer(oci, ALTREF_FRAME) ? VP8_ALTR_FRAME : 0) | (vp8dx_references_buffer(oci, GOLDEN_FRAME) ? VP8_GOLD_FRAME : 0) | (vp8dx_references_buffer(oci, LAST_FRAME) ? VP8_LAST_FRAME : 0); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, va_list args) { int *corrupted = va_arg(args, int *); VP8D_COMP *pbi = (VP8D_COMP *)ctx->yv12_frame_buffers.pbi[0]; if (corrupted && pbi) { const YV12_BUFFER_CONFIG *const frame = pbi->common.frame_to_show; if (frame == NULL) return VPX_CODEC_ERROR; *corrupted = frame->corrupted; return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t vp8_set_decryptor(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); if (init) { ctx->decrypt_cb = init->decrypt_cb; ctx->decrypt_state = init->decrypt_state; } else { ctx->decrypt_cb = NULL; ctx->decrypt_state = NULL; } return VPX_CODEC_OK; } vpx_codec_ctrl_fn_map_t vp8_ctf_maps[] = { { VP8_SET_REFERENCE, vp8_set_reference }, { VP8_COPY_REFERENCE, vp8_get_reference }, { VP8_SET_POSTPROC, vp8_set_postproc }, { VP8D_GET_LAST_REF_UPDATES, vp8_get_last_ref_updates }, { VP8D_GET_FRAME_CORRUPTED, vp8_get_frame_corrupted }, { VP8D_GET_LAST_REF_USED, vp8_get_last_ref_frame }, { VPXD_GET_LAST_QUANTIZER, vp8_get_quantizer }, { VPXD_SET_DECRYPTOR, vp8_set_decryptor }, { -1, NULL }, }; #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp8_dx) = { "WebM Project VP8 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, VPX_CODEC_CAP_DECODER | VP8_CAP_POSTPROC | VP8_CAP_ERROR_CONCEALMENT | VPX_CODEC_CAP_INPUT_FRAGMENTS, /* vpx_codec_caps_t caps; */ vp8_init, /* vpx_codec_init_fn_t init; */ vp8_destroy, /* vpx_codec_destroy_fn_t destroy; */ vp8_ctf_maps, /* vpx_codec_ctrl_fn_map_t *ctrl_maps; */ { vp8_peek_si, /* vpx_codec_peek_si_fn_t peek_si; */ vp8_get_si, /* vpx_codec_get_si_fn_t get_si; */ vp8_decode, /* vpx_codec_decode_fn_t decode; */ vp8_get_frame, /* vpx_codec_frame_get_fn_t frame_get; */ NULL, }, { /* encoder functions */ 0, NULL, /* vpx_codec_enc_cfg_map_t */ NULL, /* vpx_codec_encode_fn_t */ NULL, /* vpx_codec_get_cx_data_fn_t */ NULL, /* vpx_codec_enc_config_set_fn_t */ NULL, /* vpx_codec_get_global_headers_fn_t */ NULL, /* vpx_codec_get_preview_frame_fn_t */ NULL /* vpx_codec_enc_mr_get_mem_loc_fn_t */ } }; libvpx-1.8.2/vp8/vp8cx.mk000066400000000000000000000116221357355204000151520ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP8_CX_EXPORTS += exports_enc VP8_CX_SRCS-yes += $(VP8_COMMON_SRCS-yes) VP8_CX_SRCS-no += $(VP8_COMMON_SRCS-no) VP8_CX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) VP8_CX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) VP8_CX_SRCS-yes += vp8cx.mk VP8_CX_SRCS-yes += vp8_cx_iface.c VP8_CX_SRCS-yes += encoder/defaultcoefcounts.h VP8_CX_SRCS-yes += encoder/bitstream.c VP8_CX_SRCS-yes += encoder/boolhuff.c VP8_CX_SRCS-yes += encoder/copy_c.c VP8_CX_SRCS-yes += encoder/dct.c VP8_CX_SRCS-yes += encoder/encodeframe.c VP8_CX_SRCS-yes += encoder/encodeframe.h VP8_CX_SRCS-yes += encoder/encodeintra.c VP8_CX_SRCS-yes += encoder/encodemb.c VP8_CX_SRCS-yes += encoder/encodemv.c VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.c VP8_CX_SRCS-$(CONFIG_MULTITHREAD) += encoder/ethreading.h VP8_CX_SRCS-yes += encoder/firstpass.c VP8_CX_SRCS-yes += encoder/block.h VP8_CX_SRCS-yes += encoder/boolhuff.h VP8_CX_SRCS-yes += encoder/bitstream.h VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.h VP8_CX_SRCS-$(CONFIG_TEMPORAL_DENOISING) += encoder/denoising.c VP8_CX_SRCS-yes += encoder/encodeintra.h VP8_CX_SRCS-yes += encoder/encodemb.h VP8_CX_SRCS-yes += encoder/encodemv.h VP8_CX_SRCS-yes += encoder/firstpass.h VP8_CX_SRCS-yes += encoder/lookahead.c VP8_CX_SRCS-yes += encoder/lookahead.h VP8_CX_SRCS-yes += encoder/mcomp.h VP8_CX_SRCS-yes += encoder/modecosts.h VP8_CX_SRCS-yes += encoder/onyx_int.h VP8_CX_SRCS-yes += encoder/pickinter.h VP8_CX_SRCS-yes += encoder/quantize.h VP8_CX_SRCS-yes += encoder/ratectrl.h VP8_CX_SRCS-yes += encoder/rdopt.h VP8_CX_SRCS-yes += encoder/tokenize.h VP8_CX_SRCS-yes += encoder/treewriter.h VP8_CX_SRCS-yes += encoder/mcomp.c VP8_CX_SRCS-yes += encoder/modecosts.c VP8_CX_SRCS-yes += encoder/onyx_if.c VP8_CX_SRCS-yes += encoder/pickinter.c VP8_CX_SRCS-yes += encoder/picklpf.c VP8_CX_SRCS-yes += encoder/picklpf.h VP8_CX_SRCS-yes += encoder/vp8_quantize.c VP8_CX_SRCS-yes += encoder/ratectrl.c VP8_CX_SRCS-yes += encoder/rdopt.c VP8_CX_SRCS-yes += encoder/segmentation.c VP8_CX_SRCS-yes += encoder/segmentation.h VP8_CX_SRCS-yes += common/vp8_skin_detection.c VP8_CX_SRCS-yes += common/vp8_skin_detection.h VP8_CX_SRCS-yes += encoder/tokenize.c VP8_CX_SRCS-yes += encoder/dct_value_cost.h VP8_CX_SRCS-yes += encoder/dct_value_tokens.h VP8_CX_SRCS-yes += encoder/treewriter.c VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.h VP8_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/postproc.c VP8_CX_SRCS-yes += encoder/temporal_filter.c VP8_CX_SRCS-yes += encoder/temporal_filter.h VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.c VP8_CX_SRCS-$(CONFIG_MULTI_RES_ENCODING) += encoder/mr_dissim.h ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-yes += encoder/firstpass.c VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.c VP8_CX_SRCS_REMOVE-yes += encoder/temporal_filter.h endif VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/copy_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/copy_sse3.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/dct_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/fwalsh_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_quantize_sse2.c VP8_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp8_quantize_ssse3.c VP8_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/quantize_sse4.c ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/denoising_sse2.c endif VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/block_error_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm VP8_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp8_enc_stubs_sse2.c ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-$(HAVE_SSE2) += encoder/x86/temporal_filter_apply_sse2.asm endif VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/denoising_neon.c VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/fastquantizeb_neon.c VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/shortfdct_neon.c VP8_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp8_shortwalsh4x4_neon.c VP8_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/dct_msa.c VP8_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/encodeopt_msa.c VP8_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/quantize_msa.c VP8_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/temporal_filter_msa.c VP8_CX_SRCS-$(HAVE_MMI) += encoder/mips/mmi/vp8_quantize_mmi.c VP8_CX_SRCS-$(HAVE_MMI) += encoder/mips/mmi/dct_mmi.c ifeq ($(CONFIG_TEMPORAL_DENOISING),yes) VP8_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/denoising_msa.c endif ifeq ($(CONFIG_REALTIME_ONLY),yes) VP8_CX_SRCS_REMOVE-$(HAVE_MSA) += encoder/mips/msa/temporal_filter_msa.c endif VP8_CX_SRCS-yes := $(filter-out $(VP8_CX_SRCS_REMOVE-yes),$(VP8_CX_SRCS-yes)) libvpx-1.8.2/vp8/vp8dx.mk000066400000000000000000000027101357355204000151510ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP8_DX_EXPORTS += exports_dec VP8_DX_SRCS-yes += $(VP8_COMMON_SRCS-yes) VP8_DX_SRCS-no += $(VP8_COMMON_SRCS-no) VP8_DX_SRCS_REMOVE-yes += $(VP8_COMMON_SRCS_REMOVE-yes) VP8_DX_SRCS_REMOVE-no += $(VP8_COMMON_SRCS_REMOVE-no) VP8_DX_SRCS-yes += vp8dx.mk VP8_DX_SRCS-yes += vp8_dx_iface.c VP8_DX_SRCS-yes += decoder/dboolhuff.c VP8_DX_SRCS-yes += decoder/decodemv.c VP8_DX_SRCS-yes += decoder/decodeframe.c VP8_DX_SRCS-yes += decoder/detokenize.c VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/ec_types.h VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.h VP8_DX_SRCS-$(CONFIG_ERROR_CONCEALMENT) += decoder/error_concealment.c VP8_DX_SRCS-yes += decoder/dboolhuff.h VP8_DX_SRCS-yes += decoder/decodemv.h VP8_DX_SRCS-yes += decoder/decoderthreading.h VP8_DX_SRCS-yes += decoder/detokenize.h VP8_DX_SRCS-yes += decoder/onyxd_int.h VP8_DX_SRCS-yes += decoder/treereader.h VP8_DX_SRCS-yes += decoder/onyxd_if.c VP8_DX_SRCS-$(CONFIG_MULTITHREAD) += decoder/threading.c VP8_DX_SRCS-yes := $(filter-out $(VP8_DX_SRCS_REMOVE-yes),$(VP8_DX_SRCS-yes)) libvpx-1.8.2/vp9/000077500000000000000000000000001357355204000135505ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/000077500000000000000000000000001357355204000150405ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/arm/000077500000000000000000000000001357355204000156175ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/arm/neon/000077500000000000000000000000001357355204000165565ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/arm/neon/vp9_highbd_iht16x16_add_neon.c000066400000000000000000000423531357355204000241550ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/inv_txfm.h" // Use macros to make sure argument lane is passed in as an constant integer. #define vmull_lane_s32_dual(in, c, lane, out) \ do { \ out[0].val[0] = vmull_lane_s32(vget_low_s32(in.val[0]), c, lane); \ out[0].val[1] = vmull_lane_s32(vget_low_s32(in.val[1]), c, lane); \ out[1].val[0] = vmull_lane_s32(vget_high_s32(in.val[0]), c, lane); \ out[1].val[1] = vmull_lane_s32(vget_high_s32(in.val[1]), c, lane); \ } while (0) #define vmlal_lane_s32_dual(in, c, lane, out) \ do { \ out[0].val[0] = \ vmlal_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \ out[0].val[1] = \ vmlal_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \ out[1].val[0] = \ vmlal_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \ out[1].val[1] = \ vmlal_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \ } while (0) #define vmlsl_lane_s32_dual(in, c, lane, out) \ do { \ out[0].val[0] = \ vmlsl_lane_s32(out[0].val[0], vget_low_s32(in.val[0]), c, lane); \ out[0].val[1] = \ vmlsl_lane_s32(out[0].val[1], vget_low_s32(in.val[1]), c, lane); \ out[1].val[0] = \ vmlsl_lane_s32(out[1].val[0], vget_high_s32(in.val[0]), c, lane); \ out[1].val[1] = \ vmlsl_lane_s32(out[1].val[1], vget_high_s32(in.val[1]), c, lane); \ } while (0) static INLINE int32x4x2_t highbd_dct_const_round_shift_low_8(const int64x2x2_t *const in) { int32x4x2_t out; out.val[0] = vcombine_s32(vrshrn_n_s64(in[0].val[0], DCT_CONST_BITS), vrshrn_n_s64(in[1].val[0], DCT_CONST_BITS)); out.val[1] = vcombine_s32(vrshrn_n_s64(in[0].val[1], DCT_CONST_BITS), vrshrn_n_s64(in[1].val[1], DCT_CONST_BITS)); return out; } #define highbd_iadst_half_butterfly(in, c, lane, out) \ do { \ int64x2x2_t t[2]; \ vmull_lane_s32_dual(in, c, lane, t); \ out = highbd_dct_const_round_shift_low_8(t); \ } while (0) #define highbd_iadst_butterfly(in0, in1, c, lane0, lane1, s0, s1) \ do { \ vmull_lane_s32_dual(in0, c, lane0, s0); \ vmull_lane_s32_dual(in0, c, lane1, s1); \ vmlal_lane_s32_dual(in1, c, lane1, s0); \ vmlsl_lane_s32_dual(in1, c, lane0, s1); \ } while (0) static INLINE int32x4x2_t vaddq_s32_dual(const int32x4x2_t in0, const int32x4x2_t in1) { int32x4x2_t out; out.val[0] = vaddq_s32(in0.val[0], in1.val[0]); out.val[1] = vaddq_s32(in0.val[1], in1.val[1]); return out; } static INLINE int64x2x2_t vaddq_s64_dual(const int64x2x2_t in0, const int64x2x2_t in1) { int64x2x2_t out; out.val[0] = vaddq_s64(in0.val[0], in1.val[0]); out.val[1] = vaddq_s64(in0.val[1], in1.val[1]); return out; } static INLINE int32x4x2_t vsubq_s32_dual(const int32x4x2_t in0, const int32x4x2_t in1) { int32x4x2_t out; out.val[0] = vsubq_s32(in0.val[0], in1.val[0]); out.val[1] = vsubq_s32(in0.val[1], in1.val[1]); return out; } static INLINE int64x2x2_t vsubq_s64_dual(const int64x2x2_t in0, const int64x2x2_t in1) { int64x2x2_t out; out.val[0] = vsubq_s64(in0.val[0], in1.val[0]); out.val[1] = vsubq_s64(in0.val[1], in1.val[1]); return out; } static INLINE int32x4x2_t vcombine_s32_dual(const int32x2x2_t in0, const int32x2x2_t in1) { int32x4x2_t out; out.val[0] = vcombine_s32(in0.val[0], in1.val[0]); out.val[1] = vcombine_s32(in0.val[1], in1.val[1]); return out; } static INLINE int32x4x2_t highbd_add_dct_const_round_shift_low_8( const int64x2x2_t *const in0, const int64x2x2_t *const in1) { const int64x2x2_t sum_lo = vaddq_s64_dual(in0[0], in1[0]); const int64x2x2_t sum_hi = vaddq_s64_dual(in0[1], in1[1]); int32x2x2_t out_lo, out_hi; out_lo.val[0] = vrshrn_n_s64(sum_lo.val[0], DCT_CONST_BITS); out_lo.val[1] = vrshrn_n_s64(sum_lo.val[1], DCT_CONST_BITS); out_hi.val[0] = vrshrn_n_s64(sum_hi.val[0], DCT_CONST_BITS); out_hi.val[1] = vrshrn_n_s64(sum_hi.val[1], DCT_CONST_BITS); return vcombine_s32_dual(out_lo, out_hi); } static INLINE int32x4x2_t highbd_sub_dct_const_round_shift_low_8( const int64x2x2_t *const in0, const int64x2x2_t *const in1) { const int64x2x2_t sub_lo = vsubq_s64_dual(in0[0], in1[0]); const int64x2x2_t sub_hi = vsubq_s64_dual(in0[1], in1[1]); int32x2x2_t out_lo, out_hi; out_lo.val[0] = vrshrn_n_s64(sub_lo.val[0], DCT_CONST_BITS); out_lo.val[1] = vrshrn_n_s64(sub_lo.val[1], DCT_CONST_BITS); out_hi.val[0] = vrshrn_n_s64(sub_hi.val[0], DCT_CONST_BITS); out_hi.val[1] = vrshrn_n_s64(sub_hi.val[1], DCT_CONST_BITS); return vcombine_s32_dual(out_lo, out_hi); } static INLINE int32x4x2_t vnegq_s32_dual(const int32x4x2_t in) { int32x4x2_t out; out.val[0] = vnegq_s32(in.val[0]); out.val[1] = vnegq_s32(in.val[1]); return out; } static void highbd_iadst16_neon(const int32_t *input, int32_t *output, uint16_t *dest, const int stride, const int bd) { const int32x4_t c_1_31_5_27 = create_s32x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64); const int32x4_t c_9_23_13_19 = create_s32x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64); const int32x4_t c_17_15_21_11 = create_s32x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64); const int32x4_t c_25_7_29_3 = create_s32x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64); const int32x4_t c_4_28_20_12 = create_s32x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64); const int32x4_t c_16_n16_8_24 = create_s32x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64); int32x4x2_t in[16], out[16]; int32x4x2_t x[16], t[12]; int64x2x2_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; int64x2x2_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; // Load input (16x8) in[0].val[0] = vld1q_s32(input); in[0].val[1] = vld1q_s32(input + 4); input += 8; in[8].val[0] = vld1q_s32(input); in[8].val[1] = vld1q_s32(input + 4); input += 8; in[1].val[0] = vld1q_s32(input); in[1].val[1] = vld1q_s32(input + 4); input += 8; in[9].val[0] = vld1q_s32(input); in[9].val[1] = vld1q_s32(input + 4); input += 8; in[2].val[0] = vld1q_s32(input); in[2].val[1] = vld1q_s32(input + 4); input += 8; in[10].val[0] = vld1q_s32(input); in[10].val[1] = vld1q_s32(input + 4); input += 8; in[3].val[0] = vld1q_s32(input); in[3].val[1] = vld1q_s32(input + 4); input += 8; in[11].val[0] = vld1q_s32(input); in[11].val[1] = vld1q_s32(input + 4); input += 8; in[4].val[0] = vld1q_s32(input); in[4].val[1] = vld1q_s32(input + 4); input += 8; in[12].val[0] = vld1q_s32(input); in[12].val[1] = vld1q_s32(input + 4); input += 8; in[5].val[0] = vld1q_s32(input); in[5].val[1] = vld1q_s32(input + 4); input += 8; in[13].val[0] = vld1q_s32(input); in[13].val[1] = vld1q_s32(input + 4); input += 8; in[6].val[0] = vld1q_s32(input); in[6].val[1] = vld1q_s32(input + 4); input += 8; in[14].val[0] = vld1q_s32(input); in[14].val[1] = vld1q_s32(input + 4); input += 8; in[7].val[0] = vld1q_s32(input); in[7].val[1] = vld1q_s32(input + 4); input += 8; in[15].val[0] = vld1q_s32(input); in[15].val[1] = vld1q_s32(input + 4); // Transpose transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s32_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); x[0] = in[15]; x[1] = in[0]; x[2] = in[13]; x[3] = in[2]; x[4] = in[11]; x[5] = in[4]; x[6] = in[9]; x[7] = in[6]; x[8] = in[7]; x[9] = in[8]; x[10] = in[5]; x[11] = in[10]; x[12] = in[3]; x[13] = in[12]; x[14] = in[1]; x[15] = in[14]; // stage 1 highbd_iadst_butterfly(x[0], x[1], vget_low_s32(c_1_31_5_27), 0, 1, s0, s1); highbd_iadst_butterfly(x[2], x[3], vget_high_s32(c_1_31_5_27), 0, 1, s2, s3); highbd_iadst_butterfly(x[4], x[5], vget_low_s32(c_9_23_13_19), 0, 1, s4, s5); highbd_iadst_butterfly(x[6], x[7], vget_high_s32(c_9_23_13_19), 0, 1, s6, s7); highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_17_15_21_11), 0, 1, s8, s9); highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_17_15_21_11), 0, 1, s10, s11); highbd_iadst_butterfly(x[12], x[13], vget_low_s32(c_25_7_29_3), 0, 1, s12, s13); highbd_iadst_butterfly(x[14], x[15], vget_high_s32(c_25_7_29_3), 0, 1, s14, s15); x[0] = highbd_add_dct_const_round_shift_low_8(s0, s8); x[1] = highbd_add_dct_const_round_shift_low_8(s1, s9); x[2] = highbd_add_dct_const_round_shift_low_8(s2, s10); x[3] = highbd_add_dct_const_round_shift_low_8(s3, s11); x[4] = highbd_add_dct_const_round_shift_low_8(s4, s12); x[5] = highbd_add_dct_const_round_shift_low_8(s5, s13); x[6] = highbd_add_dct_const_round_shift_low_8(s6, s14); x[7] = highbd_add_dct_const_round_shift_low_8(s7, s15); x[8] = highbd_sub_dct_const_round_shift_low_8(s0, s8); x[9] = highbd_sub_dct_const_round_shift_low_8(s1, s9); x[10] = highbd_sub_dct_const_round_shift_low_8(s2, s10); x[11] = highbd_sub_dct_const_round_shift_low_8(s3, s11); x[12] = highbd_sub_dct_const_round_shift_low_8(s4, s12); x[13] = highbd_sub_dct_const_round_shift_low_8(s5, s13); x[14] = highbd_sub_dct_const_round_shift_low_8(s6, s14); x[15] = highbd_sub_dct_const_round_shift_low_8(s7, s15); // stage 2 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; t[4] = x[4]; t[5] = x[5]; t[6] = x[6]; t[7] = x[7]; highbd_iadst_butterfly(x[8], x[9], vget_low_s32(c_4_28_20_12), 0, 1, s8, s9); highbd_iadst_butterfly(x[10], x[11], vget_high_s32(c_4_28_20_12), 0, 1, s10, s11); highbd_iadst_butterfly(x[13], x[12], vget_low_s32(c_4_28_20_12), 1, 0, s13, s12); highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_4_28_20_12), 1, 0, s15, s14); x[0] = vaddq_s32_dual(t[0], t[4]); x[1] = vaddq_s32_dual(t[1], t[5]); x[2] = vaddq_s32_dual(t[2], t[6]); x[3] = vaddq_s32_dual(t[3], t[7]); x[4] = vsubq_s32_dual(t[0], t[4]); x[5] = vsubq_s32_dual(t[1], t[5]); x[6] = vsubq_s32_dual(t[2], t[6]); x[7] = vsubq_s32_dual(t[3], t[7]); x[8] = highbd_add_dct_const_round_shift_low_8(s8, s12); x[9] = highbd_add_dct_const_round_shift_low_8(s9, s13); x[10] = highbd_add_dct_const_round_shift_low_8(s10, s14); x[11] = highbd_add_dct_const_round_shift_low_8(s11, s15); x[12] = highbd_sub_dct_const_round_shift_low_8(s8, s12); x[13] = highbd_sub_dct_const_round_shift_low_8(s9, s13); x[14] = highbd_sub_dct_const_round_shift_low_8(s10, s14); x[15] = highbd_sub_dct_const_round_shift_low_8(s11, s15); // stage 3 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; highbd_iadst_butterfly(x[4], x[5], vget_high_s32(c_16_n16_8_24), 0, 1, s4, s5); highbd_iadst_butterfly(x[7], x[6], vget_high_s32(c_16_n16_8_24), 1, 0, s7, s6); t[8] = x[8]; t[9] = x[9]; t[10] = x[10]; t[11] = x[11]; highbd_iadst_butterfly(x[12], x[13], vget_high_s32(c_16_n16_8_24), 0, 1, s12, s13); highbd_iadst_butterfly(x[15], x[14], vget_high_s32(c_16_n16_8_24), 1, 0, s15, s14); x[0] = vaddq_s32_dual(t[0], t[2]); x[1] = vaddq_s32_dual(t[1], t[3]); x[2] = vsubq_s32_dual(t[0], t[2]); x[3] = vsubq_s32_dual(t[1], t[3]); x[4] = highbd_add_dct_const_round_shift_low_8(s4, s6); x[5] = highbd_add_dct_const_round_shift_low_8(s5, s7); x[6] = highbd_sub_dct_const_round_shift_low_8(s4, s6); x[7] = highbd_sub_dct_const_round_shift_low_8(s5, s7); x[8] = vaddq_s32_dual(t[8], t[10]); x[9] = vaddq_s32_dual(t[9], t[11]); x[10] = vsubq_s32_dual(t[8], t[10]); x[11] = vsubq_s32_dual(t[9], t[11]); x[12] = highbd_add_dct_const_round_shift_low_8(s12, s14); x[13] = highbd_add_dct_const_round_shift_low_8(s13, s15); x[14] = highbd_sub_dct_const_round_shift_low_8(s12, s14); x[15] = highbd_sub_dct_const_round_shift_low_8(s13, s15); // stage 4 { const int32x4x2_t sum = vaddq_s32_dual(x[2], x[3]); const int32x4x2_t sub = vsubq_s32_dual(x[2], x[3]); highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[2]); highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[3]); } { const int32x4x2_t sum = vaddq_s32_dual(x[7], x[6]); const int32x4x2_t sub = vsubq_s32_dual(x[7], x[6]); highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[6]); highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[7]); } { const int32x4x2_t sum = vaddq_s32_dual(x[11], x[10]); const int32x4x2_t sub = vsubq_s32_dual(x[11], x[10]); highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 0, x[10]); highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[11]); } { const int32x4x2_t sum = vaddq_s32_dual(x[14], x[15]); const int32x4x2_t sub = vsubq_s32_dual(x[14], x[15]); highbd_iadst_half_butterfly(sum, vget_low_s32(c_16_n16_8_24), 1, x[14]); highbd_iadst_half_butterfly(sub, vget_low_s32(c_16_n16_8_24), 0, x[15]); } out[0] = x[0]; out[1] = vnegq_s32_dual(x[8]); out[2] = x[12]; out[3] = vnegq_s32_dual(x[4]); out[4] = x[6]; out[5] = x[14]; out[6] = x[10]; out[7] = x[2]; out[8] = x[3]; out[9] = x[11]; out[10] = x[15]; out[11] = x[7]; out[12] = x[5]; out[13] = vnegq_s32_dual(x[13]); out[14] = x[9]; out[15] = vnegq_s32_dual(x[1]); if (output) { highbd_idct16x16_store_pass1(out, output); } else { highbd_idct16x16_add_store(out, dest, stride, bd); } } typedef void (*highbd_iht_1d)(const int32_t *input, int32_t *output, uint16_t *dest, const int stride, const int bd); typedef struct { highbd_iht_1d cols, rows; // vertical and horizontal } highbd_iht_2d; void vp9_highbd_iht16x16_256_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { if (bd == 8) { static const iht_2d IHT_16[] = { { vpx_idct16x16_256_add_half1d, vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0 { vpx_iadst16x16_256_add_half1d, vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1 { vpx_idct16x16_256_add_half1d, vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2 { vpx_iadst16x16_256_add_half1d, vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3 }; const iht_2d ht = IHT_16[tx_type]; int16_t row_output[16 * 16]; // pass 1 ht.rows(input, row_output, dest, stride, 1); // upper 8 rows ht.rows(input + 8 * 16, row_output + 8, dest, stride, 1); // lower 8 rows // pass 2 ht.cols(row_output, NULL, dest, stride, 1); // left 8 columns ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 1); // right 8 columns } else { static const highbd_iht_2d IHT_16[] = { { vpx_highbd_idct16x16_256_add_half1d, vpx_highbd_idct16x16_256_add_half1d }, // DCT_DCT = 0 { highbd_iadst16_neon, vpx_highbd_idct16x16_256_add_half1d }, // ADST_DCT = 1 { vpx_highbd_idct16x16_256_add_half1d, highbd_iadst16_neon }, // DCT_ADST = 2 { highbd_iadst16_neon, highbd_iadst16_neon } // ADST_ADST = 3 }; const highbd_iht_2d ht = IHT_16[tx_type]; int32_t row_output[16 * 16]; // pass 1 ht.rows(input, row_output, dest, stride, bd); // upper 8 rows ht.rows(input + 8 * 16, row_output + 8, dest, stride, bd); // lower 8 rows // pass 2 ht.cols(row_output, NULL, dest, stride, bd); // left 8 columns ht.cols(row_output + 8 * 16, NULL, dest + 8, stride, bd); // right 8 columns } } libvpx-1.8.2/vp9/common/arm/neon/vp9_highbd_iht4x4_add_neon.c000066400000000000000000000150101357355204000237750ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void highbd_iadst4(int32x4_t *const io) { const int32_t sinpis[4] = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9 }; const int32x4_t sinpi = vld1q_s32(sinpis); int64x2x2_t s[7], t[4]; int32x4_t s7; s[0].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 0); s[0].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 0); s[1].val[0] = vmull_lane_s32(vget_low_s32(io[0]), vget_low_s32(sinpi), 1); s[1].val[1] = vmull_lane_s32(vget_high_s32(io[0]), vget_low_s32(sinpi), 1); s[2].val[0] = vmull_lane_s32(vget_low_s32(io[1]), vget_high_s32(sinpi), 0); s[2].val[1] = vmull_lane_s32(vget_high_s32(io[1]), vget_high_s32(sinpi), 0); s[3].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_high_s32(sinpi), 1); s[3].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_high_s32(sinpi), 1); s[4].val[0] = vmull_lane_s32(vget_low_s32(io[2]), vget_low_s32(sinpi), 0); s[4].val[1] = vmull_lane_s32(vget_high_s32(io[2]), vget_low_s32(sinpi), 0); s[5].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_low_s32(sinpi), 1); s[5].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_low_s32(sinpi), 1); s[6].val[0] = vmull_lane_s32(vget_low_s32(io[3]), vget_high_s32(sinpi), 1); s[6].val[1] = vmull_lane_s32(vget_high_s32(io[3]), vget_high_s32(sinpi), 1); s7 = vsubq_s32(io[0], io[2]); s7 = vaddq_s32(s7, io[3]); s[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]); s[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]); s[0].val[0] = vaddq_s64(s[0].val[0], s[5].val[0]); s[0].val[1] = vaddq_s64(s[0].val[1], s[5].val[1]); s[1].val[0] = vsubq_s64(s[1].val[0], s[4].val[0]); s[1].val[1] = vsubq_s64(s[1].val[1], s[4].val[1]); s[1].val[0] = vsubq_s64(s[1].val[0], s[6].val[0]); s[1].val[1] = vsubq_s64(s[1].val[1], s[6].val[1]); s[3] = s[2]; s[2].val[0] = vmull_lane_s32(vget_low_s32(s7), vget_high_s32(sinpi), 0); s[2].val[1] = vmull_lane_s32(vget_high_s32(s7), vget_high_s32(sinpi), 0); t[0].val[0] = vaddq_s64(s[0].val[0], s[3].val[0]); t[0].val[1] = vaddq_s64(s[0].val[1], s[3].val[1]); t[1].val[0] = vaddq_s64(s[1].val[0], s[3].val[0]); t[1].val[1] = vaddq_s64(s[1].val[1], s[3].val[1]); t[2] = s[2]; t[3].val[0] = vaddq_s64(s[0].val[0], s[1].val[0]); t[3].val[1] = vaddq_s64(s[0].val[1], s[1].val[1]); t[3].val[0] = vsubq_s64(t[3].val[0], s[3].val[0]); t[3].val[1] = vsubq_s64(t[3].val[1], s[3].val[1]); io[0] = vcombine_s32(vrshrn_n_s64(t[0].val[0], DCT_CONST_BITS), vrshrn_n_s64(t[0].val[1], DCT_CONST_BITS)); io[1] = vcombine_s32(vrshrn_n_s64(t[1].val[0], DCT_CONST_BITS), vrshrn_n_s64(t[1].val[1], DCT_CONST_BITS)); io[2] = vcombine_s32(vrshrn_n_s64(t[2].val[0], DCT_CONST_BITS), vrshrn_n_s64(t[2].val[1], DCT_CONST_BITS)); io[3] = vcombine_s32(vrshrn_n_s64(t[3].val[0], DCT_CONST_BITS), vrshrn_n_s64(t[3].val[1], DCT_CONST_BITS)); } void vp9_highbd_iht4x4_16_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); int16x8_t a[2]; int32x4_t c[4]; c[0] = vld1q_s32(input); c[1] = vld1q_s32(input + 4); c[2] = vld1q_s32(input + 8); c[3] = vld1q_s32(input + 12); if (bd == 8) { a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1])); a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3])); transpose_s16_4x4q(&a[0], &a[1]); switch (tx_type) { case DCT_DCT: idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_s16_4x4q(&a[0], &a[1]); idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; case ADST_DCT: idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_s16_4x4q(&a[0], &a[1]); iadst4(a); break; case DCT_ADST: iadst4(a); transpose_s16_4x4q(&a[0], &a[1]); idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; default: assert(tx_type == ADST_ADST); iadst4(a); transpose_s16_4x4q(&a[0], &a[1]); iadst4(a); break; } a[0] = vrshrq_n_s16(a[0], 4); a[1] = vrshrq_n_s16(a[1], 4); } else { switch (tx_type) { case DCT_DCT: { const int32x4_t cospis = vld1q_s32(kCospi32); if (bd == 10) { idct4x4_16_kernel_bd10(cospis, c); idct4x4_16_kernel_bd10(cospis, c); } else { idct4x4_16_kernel_bd12(cospis, c); idct4x4_16_kernel_bd12(cospis, c); } break; } case ADST_DCT: { const int32x4_t cospis = vld1q_s32(kCospi32); if (bd == 10) { idct4x4_16_kernel_bd10(cospis, c); } else { idct4x4_16_kernel_bd12(cospis, c); } transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); highbd_iadst4(c); break; } case DCT_ADST: { const int32x4_t cospis = vld1q_s32(kCospi32); transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); highbd_iadst4(c); if (bd == 10) { idct4x4_16_kernel_bd10(cospis, c); } else { idct4x4_16_kernel_bd12(cospis, c); } break; } default: { assert(tx_type == ADST_ADST); transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); highbd_iadst4(c); transpose_s32_4x4(&c[0], &c[1], &c[2], &c[3]); highbd_iadst4(c); break; } } a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4)); a[1] = vcombine_s16(vqrshrn_n_s32(c[2], 4), vqrshrn_n_s32(c[3], 4)); } highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max); highbd_idct4x4_1_add_kernel1(&dest, stride, a[1], max); } libvpx-1.8.2/vp9/common/arm/neon/vp9_highbd_iht8x8_add_neon.c000066400000000000000000000345711357355204000240220ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void highbd_iadst_half_butterfly_neon(int32x4_t *const x, const int32x2_t c) { const int32x4_t sum = vaddq_s32(x[0], x[1]); const int32x4_t sub = vsubq_s32(x[0], x[1]); const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(sum), c, 0); const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(sub), c, 0); const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(sum), c, 0); const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(sub), c, 0); const int32x2_t out0_lo = vrshrn_n_s64(t0_lo, DCT_CONST_BITS); const int32x2_t out1_lo = vrshrn_n_s64(t1_lo, DCT_CONST_BITS); const int32x2_t out0_hi = vrshrn_n_s64(t0_hi, DCT_CONST_BITS); const int32x2_t out1_hi = vrshrn_n_s64(t1_hi, DCT_CONST_BITS); x[0] = vcombine_s32(out0_lo, out0_hi); x[1] = vcombine_s32(out1_lo, out1_hi); } static INLINE void highbd_iadst_butterfly_lane_0_1_neon(const int32x4_t in0, const int32x4_t in1, const int32x2_t c, int64x2_t *const s0, int64x2_t *const s1) { const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 0); const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 1); const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 0); const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 1); s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 1); s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 0); s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 1); s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 0); } static INLINE void highbd_iadst_butterfly_lane_1_0_neon(const int32x4_t in0, const int32x4_t in1, const int32x2_t c, int64x2_t *const s0, int64x2_t *const s1) { const int64x2_t t0_lo = vmull_lane_s32(vget_low_s32(in0), c, 1); const int64x2_t t1_lo = vmull_lane_s32(vget_low_s32(in0), c, 0); const int64x2_t t0_hi = vmull_lane_s32(vget_high_s32(in0), c, 1); const int64x2_t t1_hi = vmull_lane_s32(vget_high_s32(in0), c, 0); s0[0] = vmlal_lane_s32(t0_lo, vget_low_s32(in1), c, 0); s1[0] = vmlsl_lane_s32(t1_lo, vget_low_s32(in1), c, 1); s0[1] = vmlal_lane_s32(t0_hi, vget_high_s32(in1), c, 0); s1[1] = vmlsl_lane_s32(t1_hi, vget_high_s32(in1), c, 1); } static INLINE int32x4_t highbd_add_dct_const_round_shift_low_8( const int64x2_t *const in0, const int64x2_t *const in1) { const int64x2_t sum_lo = vaddq_s64(in0[0], in1[0]); const int64x2_t sum_hi = vaddq_s64(in0[1], in1[1]); const int32x2_t out_lo = vrshrn_n_s64(sum_lo, DCT_CONST_BITS); const int32x2_t out_hi = vrshrn_n_s64(sum_hi, DCT_CONST_BITS); return vcombine_s32(out_lo, out_hi); } static INLINE int32x4_t highbd_sub_dct_const_round_shift_low_8( const int64x2_t *const in0, const int64x2_t *const in1) { const int64x2_t sub_lo = vsubq_s64(in0[0], in1[0]); const int64x2_t sub_hi = vsubq_s64(in0[1], in1[1]); const int32x2_t out_lo = vrshrn_n_s64(sub_lo, DCT_CONST_BITS); const int32x2_t out_hi = vrshrn_n_s64(sub_hi, DCT_CONST_BITS); return vcombine_s32(out_lo, out_hi); } static INLINE void highbd_iadst8(int32x4_t *const io0, int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { const int32x4_t c0 = create_s32x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64); const int32x4_t c1 = create_s32x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64); const int32x4_t c2 = create_s32x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64); int32x4_t x[8], t[4]; int64x2_t s[8][2]; x[0] = *io7; x[1] = *io0; x[2] = *io5; x[3] = *io2; x[4] = *io3; x[5] = *io4; x[6] = *io1; x[7] = *io6; // stage 1 highbd_iadst_butterfly_lane_0_1_neon(x[0], x[1], vget_low_s32(c0), s[0], s[1]); highbd_iadst_butterfly_lane_0_1_neon(x[2], x[3], vget_high_s32(c0), s[2], s[3]); highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_low_s32(c1), s[4], s[5]); highbd_iadst_butterfly_lane_0_1_neon(x[6], x[7], vget_high_s32(c1), s[6], s[7]); x[0] = highbd_add_dct_const_round_shift_low_8(s[0], s[4]); x[1] = highbd_add_dct_const_round_shift_low_8(s[1], s[5]); x[2] = highbd_add_dct_const_round_shift_low_8(s[2], s[6]); x[3] = highbd_add_dct_const_round_shift_low_8(s[3], s[7]); x[4] = highbd_sub_dct_const_round_shift_low_8(s[0], s[4]); x[5] = highbd_sub_dct_const_round_shift_low_8(s[1], s[5]); x[6] = highbd_sub_dct_const_round_shift_low_8(s[2], s[6]); x[7] = highbd_sub_dct_const_round_shift_low_8(s[3], s[7]); // stage 2 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; highbd_iadst_butterfly_lane_0_1_neon(x[4], x[5], vget_high_s32(c2), s[4], s[5]); highbd_iadst_butterfly_lane_1_0_neon(x[7], x[6], vget_high_s32(c2), s[7], s[6]); x[0] = vaddq_s32(t[0], t[2]); x[1] = vaddq_s32(t[1], t[3]); x[2] = vsubq_s32(t[0], t[2]); x[3] = vsubq_s32(t[1], t[3]); x[4] = highbd_add_dct_const_round_shift_low_8(s[4], s[6]); x[5] = highbd_add_dct_const_round_shift_low_8(s[5], s[7]); x[6] = highbd_sub_dct_const_round_shift_low_8(s[4], s[6]); x[7] = highbd_sub_dct_const_round_shift_low_8(s[5], s[7]); // stage 3 highbd_iadst_half_butterfly_neon(x + 2, vget_low_s32(c2)); highbd_iadst_half_butterfly_neon(x + 6, vget_low_s32(c2)); *io0 = x[0]; *io1 = vnegq_s32(x[4]); *io2 = x[6]; *io3 = vnegq_s32(x[2]); *io4 = x[3]; *io5 = vnegq_s32(x[7]); *io6 = x[5]; *io7 = vnegq_s32(x[1]); } void vp9_highbd_iht8x8_64_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { int32x4_t a[16]; int16x8_t c[8]; a[0] = vld1q_s32(input); a[1] = vld1q_s32(input + 4); a[2] = vld1q_s32(input + 8); a[3] = vld1q_s32(input + 12); a[4] = vld1q_s32(input + 16); a[5] = vld1q_s32(input + 20); a[6] = vld1q_s32(input + 24); a[7] = vld1q_s32(input + 28); a[8] = vld1q_s32(input + 32); a[9] = vld1q_s32(input + 36); a[10] = vld1q_s32(input + 40); a[11] = vld1q_s32(input + 44); a[12] = vld1q_s32(input + 48); a[13] = vld1q_s32(input + 52); a[14] = vld1q_s32(input + 56); a[15] = vld1q_s32(input + 60); if (bd == 8) { c[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1])); c[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3])); c[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5])); c[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7])); c[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9])); c[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11])); c[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13])); c[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15])); switch (tx_type) { case DCT_DCT: { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 idct8x8_64_1d_bd8(cospis0, cospis1, c); idct8x8_64_1d_bd8(cospis0, cospis1, c); break; } case ADST_DCT: { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 idct8x8_64_1d_bd8(cospis0, cospis1, c); transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7]); iadst8(c); break; } case DCT_ADST: { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7]); iadst8(c); idct8x8_64_1d_bd8(cospis0, cospis1, c); break; } default: { transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7]); iadst8(c); transpose_s16_8x8(&c[0], &c[1], &c[2], &c[3], &c[4], &c[5], &c[6], &c[7]); iadst8(c); break; } } c[0] = vrshrq_n_s16(c[0], 5); c[1] = vrshrq_n_s16(c[1], 5); c[2] = vrshrq_n_s16(c[2], 5); c[3] = vrshrq_n_s16(c[3], 5); c[4] = vrshrq_n_s16(c[4], 5); c[5] = vrshrq_n_s16(c[5], 5); c[6] = vrshrq_n_s16(c[6], 5); c[7] = vrshrq_n_s16(c[7], 5); } else { switch (tx_type) { case DCT_DCT: { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 if (bd == 10) { idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); } else { idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); } break; } case ADST_DCT: { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); break; } case DCT_ADST: { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); break; } default: { assert(tx_type == ADST_ADST); transpose_s32_8x4(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); highbd_iadst8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); transpose_s32_8x4(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); highbd_iadst8(&a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); transpose_s32_8x4(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); highbd_iadst8(&a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); transpose_s32_8x4(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); highbd_iadst8(&a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); break; } } c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); } highbd_add8x8(c, dest, stride, bd); } libvpx-1.8.2/vp9/common/arm/neon/vp9_iht16x16_add_neon.c000066400000000000000000000223401357355204000226420ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag) { int16x8_t in[16], out[16]; const int16x4_t c_1_31_5_27 = create_s16x4_neon(cospi_1_64, cospi_31_64, cospi_5_64, cospi_27_64); const int16x4_t c_9_23_13_19 = create_s16x4_neon(cospi_9_64, cospi_23_64, cospi_13_64, cospi_19_64); const int16x4_t c_17_15_21_11 = create_s16x4_neon(cospi_17_64, cospi_15_64, cospi_21_64, cospi_11_64); const int16x4_t c_25_7_29_3 = create_s16x4_neon(cospi_25_64, cospi_7_64, cospi_29_64, cospi_3_64); const int16x4_t c_4_28_20_12 = create_s16x4_neon(cospi_4_64, cospi_28_64, cospi_20_64, cospi_12_64); const int16x4_t c_16_n16_8_24 = create_s16x4_neon(cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64); int16x8_t x[16], t[12]; int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; int32x4_t s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; // Load input (16x8) if (output) { const tran_low_t *inputT = (const tran_low_t *)input; in[0] = load_tran_low_to_s16q(inputT); inputT += 8; in[8] = load_tran_low_to_s16q(inputT); inputT += 8; in[1] = load_tran_low_to_s16q(inputT); inputT += 8; in[9] = load_tran_low_to_s16q(inputT); inputT += 8; in[2] = load_tran_low_to_s16q(inputT); inputT += 8; in[10] = load_tran_low_to_s16q(inputT); inputT += 8; in[3] = load_tran_low_to_s16q(inputT); inputT += 8; in[11] = load_tran_low_to_s16q(inputT); inputT += 8; in[4] = load_tran_low_to_s16q(inputT); inputT += 8; in[12] = load_tran_low_to_s16q(inputT); inputT += 8; in[5] = load_tran_low_to_s16q(inputT); inputT += 8; in[13] = load_tran_low_to_s16q(inputT); inputT += 8; in[6] = load_tran_low_to_s16q(inputT); inputT += 8; in[14] = load_tran_low_to_s16q(inputT); inputT += 8; in[7] = load_tran_low_to_s16q(inputT); inputT += 8; in[15] = load_tran_low_to_s16q(inputT); } else { const int16_t *inputT = (const int16_t *)input; in[0] = vld1q_s16(inputT); inputT += 8; in[8] = vld1q_s16(inputT); inputT += 8; in[1] = vld1q_s16(inputT); inputT += 8; in[9] = vld1q_s16(inputT); inputT += 8; in[2] = vld1q_s16(inputT); inputT += 8; in[10] = vld1q_s16(inputT); inputT += 8; in[3] = vld1q_s16(inputT); inputT += 8; in[11] = vld1q_s16(inputT); inputT += 8; in[4] = vld1q_s16(inputT); inputT += 8; in[12] = vld1q_s16(inputT); inputT += 8; in[5] = vld1q_s16(inputT); inputT += 8; in[13] = vld1q_s16(inputT); inputT += 8; in[6] = vld1q_s16(inputT); inputT += 8; in[14] = vld1q_s16(inputT); inputT += 8; in[7] = vld1q_s16(inputT); inputT += 8; in[15] = vld1q_s16(inputT); } // Transpose transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s16_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); x[0] = in[15]; x[1] = in[0]; x[2] = in[13]; x[3] = in[2]; x[4] = in[11]; x[5] = in[4]; x[6] = in[9]; x[7] = in[6]; x[8] = in[7]; x[9] = in[8]; x[10] = in[5]; x[11] = in[10]; x[12] = in[3]; x[13] = in[12]; x[14] = in[1]; x[15] = in[14]; // stage 1 iadst_butterfly_lane_0_1_neon(x[0], x[1], c_1_31_5_27, s0, s1); iadst_butterfly_lane_2_3_neon(x[2], x[3], c_1_31_5_27, s2, s3); iadst_butterfly_lane_0_1_neon(x[4], x[5], c_9_23_13_19, s4, s5); iadst_butterfly_lane_2_3_neon(x[6], x[7], c_9_23_13_19, s6, s7); iadst_butterfly_lane_0_1_neon(x[8], x[9], c_17_15_21_11, s8, s9); iadst_butterfly_lane_2_3_neon(x[10], x[11], c_17_15_21_11, s10, s11); iadst_butterfly_lane_0_1_neon(x[12], x[13], c_25_7_29_3, s12, s13); iadst_butterfly_lane_2_3_neon(x[14], x[15], c_25_7_29_3, s14, s15); x[0] = add_dct_const_round_shift_low_8(s0, s8); x[1] = add_dct_const_round_shift_low_8(s1, s9); x[2] = add_dct_const_round_shift_low_8(s2, s10); x[3] = add_dct_const_round_shift_low_8(s3, s11); x[4] = add_dct_const_round_shift_low_8(s4, s12); x[5] = add_dct_const_round_shift_low_8(s5, s13); x[6] = add_dct_const_round_shift_low_8(s6, s14); x[7] = add_dct_const_round_shift_low_8(s7, s15); x[8] = sub_dct_const_round_shift_low_8(s0, s8); x[9] = sub_dct_const_round_shift_low_8(s1, s9); x[10] = sub_dct_const_round_shift_low_8(s2, s10); x[11] = sub_dct_const_round_shift_low_8(s3, s11); x[12] = sub_dct_const_round_shift_low_8(s4, s12); x[13] = sub_dct_const_round_shift_low_8(s5, s13); x[14] = sub_dct_const_round_shift_low_8(s6, s14); x[15] = sub_dct_const_round_shift_low_8(s7, s15); // stage 2 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; t[4] = x[4]; t[5] = x[5]; t[6] = x[6]; t[7] = x[7]; iadst_butterfly_lane_0_1_neon(x[8], x[9], c_4_28_20_12, s8, s9); iadst_butterfly_lane_2_3_neon(x[10], x[11], c_4_28_20_12, s10, s11); iadst_butterfly_lane_1_0_neon(x[13], x[12], c_4_28_20_12, s13, s12); iadst_butterfly_lane_3_2_neon(x[15], x[14], c_4_28_20_12, s15, s14); x[0] = vaddq_s16(t[0], t[4]); x[1] = vaddq_s16(t[1], t[5]); x[2] = vaddq_s16(t[2], t[6]); x[3] = vaddq_s16(t[3], t[7]); x[4] = vsubq_s16(t[0], t[4]); x[5] = vsubq_s16(t[1], t[5]); x[6] = vsubq_s16(t[2], t[6]); x[7] = vsubq_s16(t[3], t[7]); x[8] = add_dct_const_round_shift_low_8(s8, s12); x[9] = add_dct_const_round_shift_low_8(s9, s13); x[10] = add_dct_const_round_shift_low_8(s10, s14); x[11] = add_dct_const_round_shift_low_8(s11, s15); x[12] = sub_dct_const_round_shift_low_8(s8, s12); x[13] = sub_dct_const_round_shift_low_8(s9, s13); x[14] = sub_dct_const_round_shift_low_8(s10, s14); x[15] = sub_dct_const_round_shift_low_8(s11, s15); // stage 3 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; iadst_butterfly_lane_2_3_neon(x[4], x[5], c_16_n16_8_24, s4, s5); iadst_butterfly_lane_3_2_neon(x[7], x[6], c_16_n16_8_24, s7, s6); t[8] = x[8]; t[9] = x[9]; t[10] = x[10]; t[11] = x[11]; iadst_butterfly_lane_2_3_neon(x[12], x[13], c_16_n16_8_24, s12, s13); iadst_butterfly_lane_3_2_neon(x[15], x[14], c_16_n16_8_24, s15, s14); x[0] = vaddq_s16(t[0], t[2]); x[1] = vaddq_s16(t[1], t[3]); x[2] = vsubq_s16(t[0], t[2]); x[3] = vsubq_s16(t[1], t[3]); x[4] = add_dct_const_round_shift_low_8(s4, s6); x[5] = add_dct_const_round_shift_low_8(s5, s7); x[6] = sub_dct_const_round_shift_low_8(s4, s6); x[7] = sub_dct_const_round_shift_low_8(s5, s7); x[8] = vaddq_s16(t[8], t[10]); x[9] = vaddq_s16(t[9], t[11]); x[10] = vsubq_s16(t[8], t[10]); x[11] = vsubq_s16(t[9], t[11]); x[12] = add_dct_const_round_shift_low_8(s12, s14); x[13] = add_dct_const_round_shift_low_8(s13, s15); x[14] = sub_dct_const_round_shift_low_8(s12, s14); x[15] = sub_dct_const_round_shift_low_8(s13, s15); // stage 4 iadst_half_butterfly_neg_neon(&x[3], &x[2], c_16_n16_8_24); iadst_half_butterfly_pos_neon(&x[7], &x[6], c_16_n16_8_24); iadst_half_butterfly_pos_neon(&x[11], &x[10], c_16_n16_8_24); iadst_half_butterfly_neg_neon(&x[15], &x[14], c_16_n16_8_24); out[0] = x[0]; out[1] = vnegq_s16(x[8]); out[2] = x[12]; out[3] = vnegq_s16(x[4]); out[4] = x[6]; out[5] = x[14]; out[6] = x[10]; out[7] = x[2]; out[8] = x[3]; out[9] = x[11]; out[10] = x[15]; out[11] = x[7]; out[12] = x[5]; out[13] = vnegq_s16(x[13]); out[14] = x[9]; out[15] = vnegq_s16(x[1]); if (output) { idct16x16_store_pass1(out, output); } else { if (highbd_flag) { idct16x16_add_store_bd8(out, dest, stride); } else { idct16x16_add_store(out, dest, stride); } } } void vp9_iht16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { static const iht_2d IHT_16[] = { { vpx_idct16x16_256_add_half1d, vpx_idct16x16_256_add_half1d }, // DCT_DCT = 0 { vpx_iadst16x16_256_add_half1d, vpx_idct16x16_256_add_half1d }, // ADST_DCT = 1 { vpx_idct16x16_256_add_half1d, vpx_iadst16x16_256_add_half1d }, // DCT_ADST = 2 { vpx_iadst16x16_256_add_half1d, vpx_iadst16x16_256_add_half1d } // ADST_ADST = 3 }; const iht_2d ht = IHT_16[tx_type]; int16_t row_output[16 * 16]; // pass 1 ht.rows(input, row_output, dest, stride, 0); // upper 8 rows ht.rows(input + 8 * 16, row_output + 8, dest, stride, 0); // lower 8 rows // pass 2 ht.cols(row_output, NULL, dest, stride, 0); // left 8 columns ht.cols(row_output + 16 * 8, NULL, dest + 8, stride, 0); // right 8 columns } libvpx-1.8.2/vp9/common/arm/neon/vp9_iht4x4_add_neon.c000066400000000000000000000044401357355204000224750ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" void vp9_iht4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int16x8_t a[2]; uint8x8_t s[2], d[2]; uint16x8_t sum[2]; assert(!((intptr_t)dest % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); a[0] = load_tran_low_to_s16q(input); a[1] = load_tran_low_to_s16q(input + 8); transpose_s16_4x4q(&a[0], &a[1]); switch (tx_type) { case DCT_DCT: idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_s16_4x4q(&a[0], &a[1]); idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; case ADST_DCT: idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_s16_4x4q(&a[0], &a[1]); iadst4(a); break; case DCT_ADST: iadst4(a); transpose_s16_4x4q(&a[0], &a[1]); idct4x4_16_kernel_bd8(a); a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); break; default: assert(tx_type == ADST_ADST); iadst4(a); transpose_s16_4x4q(&a[0], &a[1]); iadst4(a); break; } a[0] = vrshrq_n_s16(a[0], 4); a[1] = vrshrq_n_s16(a[1], 4); s[0] = load_u8(dest, stride); s[1] = load_u8(dest + 2 * stride, stride); sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s[0]); sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), s[1]); d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0])); d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1])); store_u8(dest, stride, d[0]); store_u8(dest + 2 * stride, stride, d[1]); } libvpx-1.8.2/vp9/common/arm/neon/vp9_iht8x8_add_neon.c000066400000000000000000000044201357355204000225030ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/common/arm/neon/vp9_iht_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" void vp9_iht8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 int16x8_t a[8]; a[0] = load_tran_low_to_s16q(input + 0 * 8); a[1] = load_tran_low_to_s16q(input + 1 * 8); a[2] = load_tran_low_to_s16q(input + 2 * 8); a[3] = load_tran_low_to_s16q(input + 3 * 8); a[4] = load_tran_low_to_s16q(input + 4 * 8); a[5] = load_tran_low_to_s16q(input + 5 * 8); a[6] = load_tran_low_to_s16q(input + 6 * 8); a[7] = load_tran_low_to_s16q(input + 7 * 8); transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); switch (tx_type) { case DCT_DCT: idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); break; case ADST_DCT: idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); iadst8(a); break; case DCT_ADST: iadst8(a); transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_1d_bd8_kernel(cospis0, cospis1, a); break; default: assert(tx_type == ADST_ADST); iadst8(a); transpose_s16_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); iadst8(a); break; } idct8x8_add8x8_neon(a, dest, stride); } libvpx-1.8.2/vp9/common/arm/neon/vp9_iht_neon.h000066400000000000000000000241331357355204000213330ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ #define VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void iadst4(int16x8_t *const io) { const int32x4_t c3 = vdupq_n_s32(sinpi_3_9); int16x4_t x[4]; int32x4_t s[8], output[4]; const int16x4_t c = create_s16x4_neon(sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9); x[0] = vget_low_s16(io[0]); x[1] = vget_low_s16(io[1]); x[2] = vget_high_s16(io[0]); x[3] = vget_high_s16(io[1]); s[0] = vmull_lane_s16(x[0], c, 0); s[1] = vmull_lane_s16(x[0], c, 1); s[2] = vmull_lane_s16(x[1], c, 2); s[3] = vmull_lane_s16(x[2], c, 3); s[4] = vmull_lane_s16(x[2], c, 0); s[5] = vmull_lane_s16(x[3], c, 1); s[6] = vmull_lane_s16(x[3], c, 3); s[7] = vaddl_s16(x[0], x[3]); s[7] = vsubw_s16(s[7], x[2]); s[0] = vaddq_s32(s[0], s[3]); s[0] = vaddq_s32(s[0], s[5]); s[1] = vsubq_s32(s[1], s[4]); s[1] = vsubq_s32(s[1], s[6]); s[3] = s[2]; s[2] = vmulq_s32(c3, s[7]); output[0] = vaddq_s32(s[0], s[3]); output[1] = vaddq_s32(s[1], s[3]); output[2] = s[2]; output[3] = vaddq_s32(s[0], s[1]); output[3] = vsubq_s32(output[3], s[3]); dct_const_round_shift_low_8_dual(output, &io[0], &io[1]); } static INLINE void iadst_half_butterfly_neon(int16x8_t *const x, const int16x4_t c) { // Don't add/sub before multiply, which will overflow in iadst8. const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(x[0]), c, 0); const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(x[0]), c, 0); const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(x[1]), c, 0); const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(x[1]), c, 0); int32x4_t t0[2], t1[2]; t0[0] = vaddq_s32(x0_lo, x1_lo); t0[1] = vaddq_s32(x0_hi, x1_hi); t1[0] = vsubq_s32(x0_lo, x1_lo); t1[1] = vsubq_s32(x0_hi, x1_hi); x[0] = dct_const_round_shift_low_8(t0); x[1] = dct_const_round_shift_low_8(t1); } static INLINE void iadst_half_butterfly_neg_neon(int16x8_t *const x0, int16x8_t *const x1, const int16x4_t c) { // Don't add/sub before multiply, which will overflow in iadst8. const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 1); const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 1); const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 1); const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 1); int32x4_t t0[2], t1[2]; t0[0] = vaddq_s32(x0_lo, x1_lo); t0[1] = vaddq_s32(x0_hi, x1_hi); t1[0] = vsubq_s32(x0_lo, x1_lo); t1[1] = vsubq_s32(x0_hi, x1_hi); *x1 = dct_const_round_shift_low_8(t0); *x0 = dct_const_round_shift_low_8(t1); } static INLINE void iadst_half_butterfly_pos_neon(int16x8_t *const x0, int16x8_t *const x1, const int16x4_t c) { // Don't add/sub before multiply, which will overflow in iadst8. const int32x4_t x0_lo = vmull_lane_s16(vget_low_s16(*x0), c, 0); const int32x4_t x0_hi = vmull_lane_s16(vget_high_s16(*x0), c, 0); const int32x4_t x1_lo = vmull_lane_s16(vget_low_s16(*x1), c, 0); const int32x4_t x1_hi = vmull_lane_s16(vget_high_s16(*x1), c, 0); int32x4_t t0[2], t1[2]; t0[0] = vaddq_s32(x0_lo, x1_lo); t0[1] = vaddq_s32(x0_hi, x1_hi); t1[0] = vsubq_s32(x0_lo, x1_lo); t1[1] = vsubq_s32(x0_hi, x1_hi); *x1 = dct_const_round_shift_low_8(t0); *x0 = dct_const_round_shift_low_8(t1); } static INLINE void iadst_butterfly_lane_0_1_neon(const int16x8_t in0, const int16x8_t in1, const int16x4_t c, int32x4_t *const s0, int32x4_t *const s1) { s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 1); s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 1); s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 0); s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 0); } static INLINE void iadst_butterfly_lane_2_3_neon(const int16x8_t in0, const int16x8_t in1, const int16x4_t c, int32x4_t *const s0, int32x4_t *const s1) { s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 3); s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 3); s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 2); s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 2); } static INLINE void iadst_butterfly_lane_1_0_neon(const int16x8_t in0, const int16x8_t in1, const int16x4_t c, int32x4_t *const s0, int32x4_t *const s1) { s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 1); s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 1); s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 0); s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 0); s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 0); s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 0); s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 1); s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 1); } static INLINE void iadst_butterfly_lane_3_2_neon(const int16x8_t in0, const int16x8_t in1, const int16x4_t c, int32x4_t *const s0, int32x4_t *const s1) { s0[0] = vmull_lane_s16(vget_low_s16(in0), c, 3); s0[1] = vmull_lane_s16(vget_high_s16(in0), c, 3); s1[0] = vmull_lane_s16(vget_low_s16(in0), c, 2); s1[1] = vmull_lane_s16(vget_high_s16(in0), c, 2); s0[0] = vmlal_lane_s16(s0[0], vget_low_s16(in1), c, 2); s0[1] = vmlal_lane_s16(s0[1], vget_high_s16(in1), c, 2); s1[0] = vmlsl_lane_s16(s1[0], vget_low_s16(in1), c, 3); s1[1] = vmlsl_lane_s16(s1[1], vget_high_s16(in1), c, 3); } static INLINE int16x8_t add_dct_const_round_shift_low_8( const int32x4_t *const in0, const int32x4_t *const in1) { int32x4_t sum[2]; sum[0] = vaddq_s32(in0[0], in1[0]); sum[1] = vaddq_s32(in0[1], in1[1]); return dct_const_round_shift_low_8(sum); } static INLINE int16x8_t sub_dct_const_round_shift_low_8( const int32x4_t *const in0, const int32x4_t *const in1) { int32x4_t sum[2]; sum[0] = vsubq_s32(in0[0], in1[0]); sum[1] = vsubq_s32(in0[1], in1[1]); return dct_const_round_shift_low_8(sum); } static INLINE void iadst8(int16x8_t *const io) { const int16x4_t c0 = create_s16x4_neon(cospi_2_64, cospi_30_64, cospi_10_64, cospi_22_64); const int16x4_t c1 = create_s16x4_neon(cospi_18_64, cospi_14_64, cospi_26_64, cospi_6_64); const int16x4_t c2 = create_s16x4_neon(cospi_16_64, 0, cospi_8_64, cospi_24_64); int16x8_t x[8], t[4]; int32x4_t s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; x[0] = io[7]; x[1] = io[0]; x[2] = io[5]; x[3] = io[2]; x[4] = io[3]; x[5] = io[4]; x[6] = io[1]; x[7] = io[6]; // stage 1 iadst_butterfly_lane_0_1_neon(x[0], x[1], c0, s0, s1); iadst_butterfly_lane_2_3_neon(x[2], x[3], c0, s2, s3); iadst_butterfly_lane_0_1_neon(x[4], x[5], c1, s4, s5); iadst_butterfly_lane_2_3_neon(x[6], x[7], c1, s6, s7); x[0] = add_dct_const_round_shift_low_8(s0, s4); x[1] = add_dct_const_round_shift_low_8(s1, s5); x[2] = add_dct_const_round_shift_low_8(s2, s6); x[3] = add_dct_const_round_shift_low_8(s3, s7); x[4] = sub_dct_const_round_shift_low_8(s0, s4); x[5] = sub_dct_const_round_shift_low_8(s1, s5); x[6] = sub_dct_const_round_shift_low_8(s2, s6); x[7] = sub_dct_const_round_shift_low_8(s3, s7); // stage 2 t[0] = x[0]; t[1] = x[1]; t[2] = x[2]; t[3] = x[3]; iadst_butterfly_lane_2_3_neon(x[4], x[5], c2, s4, s5); iadst_butterfly_lane_3_2_neon(x[7], x[6], c2, s7, s6); x[0] = vaddq_s16(t[0], t[2]); x[1] = vaddq_s16(t[1], t[3]); x[2] = vsubq_s16(t[0], t[2]); x[3] = vsubq_s16(t[1], t[3]); x[4] = add_dct_const_round_shift_low_8(s4, s6); x[5] = add_dct_const_round_shift_low_8(s5, s7); x[6] = sub_dct_const_round_shift_low_8(s4, s6); x[7] = sub_dct_const_round_shift_low_8(s5, s7); // stage 3 iadst_half_butterfly_neon(x + 2, c2); iadst_half_butterfly_neon(x + 6, c2); io[0] = x[0]; io[1] = vnegq_s16(x[4]); io[2] = x[6]; io[3] = vnegq_s16(x[2]); io[4] = x[3]; io[5] = vnegq_s16(x[7]); io[6] = x[5]; io[7] = vnegq_s16(x[1]); } void vpx_iadst16x16_256_add_half1d(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag); typedef void (*iht_1d)(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag); typedef struct { iht_1d cols, rows; // vertical and horizontal } iht_2d; #endif // VPX_VP9_COMMON_ARM_NEON_VP9_IHT_NEON_H_ libvpx-1.8.2/vp9/common/mips/000077500000000000000000000000001357355204000160105ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/mips/dspr2/000077500000000000000000000000001357355204000170425ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/mips/dspr2/vp9_itrans16_dspr2.c000066400000000000000000000055571357355204000226010ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 void vp9_iht16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int pitch, int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *outptr = out; int16_t temp_out[16]; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical idct16_rows_dspr2(input, outptr, 16); idct16_cols_add_blk_dspr2(out, dest, pitch); break; case ADST_DCT: // ADST in vertical, DCT in horizontal idct16_rows_dspr2(input, outptr, 16); outptr = out; for (i = 0; i < 16; ++i) { iadst16_dspr2(outptr, temp_out); for (j = 0; j < 16; ++j) dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * pitch + i]); outptr += 16; } break; case DCT_ADST: // DCT in vertical, ADST in horizontal { int16_t temp_in[16 * 16]; for (i = 0; i < 16; ++i) { /* prefetch row */ prefetch_load((const uint8_t *)(input + 16)); iadst16_dspr2(input, outptr); input += 16; outptr += 16; } for (i = 0; i < 16; ++i) for (j = 0; j < 16; ++j) temp_in[j * 16 + i] = out[i * 16 + j]; idct16_cols_add_blk_dspr2(temp_in, dest, pitch); break; } case ADST_ADST: // ADST in both directions { int16_t temp_in[16]; for (i = 0; i < 16; ++i) { /* prefetch row */ prefetch_load((const uint8_t *)(input + 16)); iadst16_dspr2(input, outptr); input += 16; outptr += 16; } for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; iadst16_dspr2(temp_in, temp_out); for (j = 0; j < 16; ++j) dest[j * pitch + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 6) + dest[j * pitch + i]); } break; } default: printf("vp9_short_iht16x16_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vp9/common/mips/dspr2/vp9_itrans4_dspr2.c000066400000000000000000000053321357355204000225050ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 void vp9_iht4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; int16_t temp_in[4 * 4], temp_out[4]; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical vpx_idct4_rows_dspr2(input, outptr); vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride); break; case ADST_DCT: // ADST in vertical, DCT in horizontal vpx_idct4_rows_dspr2(input, outptr); outptr = out; for (i = 0; i < 4; ++i) { iadst4_dspr2(outptr, temp_out); for (j = 0; j < 4; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * stride + i]); outptr += 4; } break; case DCT_ADST: // DCT in vertical, ADST in horizontal for (i = 0; i < 4; ++i) { iadst4_dspr2(input, outptr); input += 4; outptr += 4; } for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) { temp_in[i * 4 + j] = out[j * 4 + i]; } } vpx_idct4_columns_add_blk_dspr2(&temp_in[0], dest, stride); break; case ADST_ADST: // ADST in both directions for (i = 0; i < 4; ++i) { iadst4_dspr2(input, outptr); input += 4; outptr += 4; } for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; iadst4_dspr2(temp_in, temp_out); for (j = 0; j < 4; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * stride + i]); } break; default: printf("vp9_short_iht4x4_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vp9/common/mips/dspr2/vp9_itrans8_dspr2.c000066400000000000000000000051221357355204000225060ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_blockd.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 void vp9_iht8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; int16_t temp_in[8 * 8], temp_out[8]; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); switch (tx_type) { case DCT_DCT: // DCT in both horizontal and vertical idct8_rows_dspr2(input, outptr, 8); idct8_columns_add_blk_dspr2(&out[0], dest, stride); break; case ADST_DCT: // ADST in vertical, DCT in horizontal idct8_rows_dspr2(input, outptr, 8); for (i = 0; i < 8; ++i) { iadst8_dspr2(&out[i * 8], temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); } break; case DCT_ADST: // DCT in vertical, ADST in horizontal for (i = 0; i < 8; ++i) { iadst8_dspr2(input, outptr); input += 8; outptr += 8; } for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) { temp_in[i * 8 + j] = out[j * 8 + i]; } } idct8_columns_add_blk_dspr2(&temp_in[0], dest, stride); break; case ADST_ADST: // ADST in both directions for (i = 0; i < 8; ++i) { iadst8_dspr2(input, outptr); input += 8; outptr += 8; } for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; iadst8_dspr2(temp_in, temp_out); for (j = 0; j < 8; ++j) dest[j * stride + i] = clip_pixel(ROUND_POWER_OF_TWO(temp_out[j], 5) + dest[j * stride + i]); } break; default: printf("vp9_short_iht8x8_add_dspr2 : Invalid tx_type\n"); break; } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vp9/common/mips/msa/000077500000000000000000000000001357355204000165705ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/mips/msa/vp9_idct16x16_msa.c000066400000000000000000000047621357355204000220340ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vp9_iht16x16_256_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride, int32_t tx_type) { int32_t i; DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *out_ptr = &out[0]; switch (tx_type) { case DCT_DCT: /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; case ADST_DCT: /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ vpx_idct16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; case DCT_ADST: /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ vpx_idct16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; case ADST_ADST: /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ vpx_iadst16_1d_rows_msa((input + (i << 7)), (out_ptr + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { vpx_iadst16_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } break; default: assert(0); break; } } libvpx-1.8.2/vp9/common/mips/msa/vp9_idct4x4_msa.c000066400000000000000000000042041357355204000216550ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vp9_iht4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3; /* load vector elements of 4x4 block */ LD4x4_SH(input, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); switch (tx_type) { case DCT_DCT: /* DCT in horizontal */ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* DCT in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_DCT: /* DCT in horizontal */ VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* ADST in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case DCT_ADST: /* ADST in horizontal */ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* DCT in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_ADST: /* ADST in horizontal */ VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* ADST in vertical */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IADST4x4(in0, in1, in2, in3, in0, in1, in2, in3); break; default: assert(0); break; } /* final rounding (add 2^3, divide by 2^4) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 4); /* add block and store 4x4 */ ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); } libvpx-1.8.2/vp9/common/mips/msa/vp9_idct8x8_msa.c000066400000000000000000000061051357355204000216670ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vp9_iht8x8_64_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride, int32_t tx_type) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; /* load vector elements of 8x8 block */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); switch (tx_type) { case DCT_DCT: /* DCT in horizontal */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* DCT in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_DCT: /* DCT in horizontal */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* ADST in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case DCT_ADST: /* ADST in horizontal */ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* DCT in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_ADST: /* ADST in horizontal */ VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* ADST in vertical */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; default: assert(0); break; } /* final rounding (add 2^4, divide by 2^5) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 5); SRARI_H4_SH(in4, in5, in6, in7, 5); /* add block and store 8x8 */ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); dst += (4 * dst_stride); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); } libvpx-1.8.2/vp9/common/mips/msa/vp9_mfqe_msa.c000066400000000000000000000113021357355204000213170ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vpx_dsp/mips/macros_msa.h" static void filter_by_weight8x8_msa(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; int32_t row; uint64_t src0_d, src1_d, dst0_d, dst1_d; v16i8 src0 = { 0 }; v16i8 src1 = { 0 }; v16i8 dst0 = { 0 }; v16i8 dst1 = { 0 }; v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; src_wt = __msa_fill_h(src_weight); dst_wt = __msa_fill_h(dst_weight); for (row = 2; row--;) { LD2(src_ptr, src_stride, src0_d, src1_d); src_ptr += (2 * src_stride); LD2(dst_ptr, dst_stride, dst0_d, dst1_d); INSERT_D2_SB(src0_d, src1_d, src0); INSERT_D2_SB(dst0_d, dst1_d, dst0); LD2(src_ptr, src_stride, src0_d, src1_d); src_ptr += (2 * src_stride); LD2((dst_ptr + 2 * dst_stride), dst_stride, dst0_d, dst1_d); INSERT_D2_SB(src0_d, src1_d, src1); INSERT_D2_SB(dst0_d, dst1_d, dst1); UNPCK_UB_SH(src0, src_r, src_l); UNPCK_UB_SH(dst0, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); dst0 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); ST8x2_UB(dst0, dst_ptr, dst_stride); dst_ptr += (2 * dst_stride); UNPCK_UB_SH(src1, src_r, src_l); UNPCK_UB_SH(dst1, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); dst1 = (v16i8)__msa_pckev_b((v16i8)res_h_l, (v16i8)res_h_r); ST8x2_UB(dst1, dst_ptr, dst_stride); dst_ptr += (2 * dst_stride); } } static void filter_by_weight16x16_msa(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, int32_t src_weight) { int32_t dst_weight = (1 << MFQE_PRECISION) - src_weight; int32_t row; v16i8 src0, src1, src2, src3, dst0, dst1, dst2, dst3; v8i16 src_wt, dst_wt, res_h_r, res_h_l, src_r, src_l, dst_r, dst_l; src_wt = __msa_fill_h(src_weight); dst_wt = __msa_fill_h(dst_weight); for (row = 4; row--;) { LD_SB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_SB4(dst_ptr, dst_stride, dst0, dst1, dst2, dst3); UNPCK_UB_SH(src0, src_r, src_l); UNPCK_UB_SH(dst0, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src1, src_r, src_l); UNPCK_UB_SH(dst1, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src2, src_r, src_l); UNPCK_UB_SH(dst2, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; UNPCK_UB_SH(src3, src_r, src_l); UNPCK_UB_SH(dst3, dst_r, dst_l); res_h_r = (src_r * src_wt); res_h_r += (dst_r * dst_wt); res_h_l = (src_l * src_wt); res_h_l += (dst_l * dst_wt); SRARI_H2_SH(res_h_r, res_h_l, MFQE_PRECISION); PCKEV_ST_SB(res_h_r, res_h_l, dst_ptr); dst_ptr += dst_stride; } } void vp9_filter_by_weight8x8_msa(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight) { filter_by_weight8x8_msa(src, src_stride, dst, dst_stride, src_weight); } void vp9_filter_by_weight16x16_msa(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight) { filter_by_weight16x16_msa(src, src_stride, dst, dst_stride, src_weight); } libvpx-1.8.2/vp9/common/ppc/000077500000000000000000000000001357355204000156225ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/ppc/vp9_idct_vsx.c000066400000000000000000000062341357355204000204140ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/ppc/inv_txfm_vsx.h" #include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" #include "vp9/common/vp9_enums.h" void vp9_iht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int16x8_t in[2], out[2]; in[0] = load_tran_low(0, input); in[1] = load_tran_low(8 * sizeof(*input), input); switch (tx_type) { case DCT_DCT: vpx_idct4_vsx(in, out); vpx_idct4_vsx(out, in); break; case ADST_DCT: vpx_idct4_vsx(in, out); vp9_iadst4_vsx(out, in); break; case DCT_ADST: vp9_iadst4_vsx(in, out); vpx_idct4_vsx(out, in); break; default: assert(tx_type == ADST_ADST); vp9_iadst4_vsx(in, out); vp9_iadst4_vsx(out, in); break; } vpx_round_store4x4_vsx(in, out, dest, stride); } void vp9_iht8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int16x8_t in[8], out[8]; // load input data in[0] = load_tran_low(0, input); in[1] = load_tran_low(8 * sizeof(*input), input); in[2] = load_tran_low(2 * 8 * sizeof(*input), input); in[3] = load_tran_low(3 * 8 * sizeof(*input), input); in[4] = load_tran_low(4 * 8 * sizeof(*input), input); in[5] = load_tran_low(5 * 8 * sizeof(*input), input); in[6] = load_tran_low(6 * 8 * sizeof(*input), input); in[7] = load_tran_low(7 * 8 * sizeof(*input), input); switch (tx_type) { case DCT_DCT: vpx_idct8_vsx(in, out); vpx_idct8_vsx(out, in); break; case ADST_DCT: vpx_idct8_vsx(in, out); vp9_iadst8_vsx(out, in); break; case DCT_ADST: vp9_iadst8_vsx(in, out); vpx_idct8_vsx(out, in); break; default: assert(tx_type == ADST_ADST); vp9_iadst8_vsx(in, out); vp9_iadst8_vsx(out, in); break; } vpx_round_store8x8_vsx(in, dest, stride); } void vp9_iht16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int16x8_t in0[16], in1[16]; LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), in0); LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input), 8 * sizeof(*input), in1); switch (tx_type) { case DCT_DCT: vpx_idct16_vsx(in0, in1); vpx_idct16_vsx(in0, in1); break; case ADST_DCT: vpx_idct16_vsx(in0, in1); vpx_iadst16_vsx(in0, in1); break; case DCT_ADST: vpx_iadst16_vsx(in0, in1); vpx_idct16_vsx(in0, in1); break; default: assert(tx_type == ADST_ADST); vpx_iadst16_vsx(in0, in1); vpx_iadst16_vsx(in0, in1); break; } vpx_round_store16x16_vsx(in0, in1, dest, stride); } libvpx-1.8.2/vp9/common/vp9_alloccommon.c000066400000000000000000000133171357355204000203120ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_onyxc_int.h" void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, int height) { const int aligned_width = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2); *mi_cols = aligned_width >> MI_SIZE_LOG2; *mi_rows = aligned_height >> MI_SIZE_LOG2; *mi_stride = calc_mi_size(*mi_cols); } void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, int mi_cols) { *mb_cols = (mi_cols + 1) >> 1; *mb_rows = (mi_rows + 1) >> 1; *mb_num = (*mb_rows) * (*mb_cols); } void vp9_set_mb_mi(VP9_COMMON *cm, int width, int height) { vp9_set_mi_size(&cm->mi_rows, &cm->mi_cols, &cm->mi_stride, width, height); vp9_set_mb_size(&cm->mb_rows, &cm->mb_cols, &cm->MBs, cm->mi_rows, cm->mi_cols); } static int alloc_seg_map(VP9_COMMON *cm, int seg_map_size) { int i; for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { cm->seg_map_array[i] = (uint8_t *)vpx_calloc(seg_map_size, 1); if (cm->seg_map_array[i] == NULL) return 1; } cm->seg_map_alloc_size = seg_map_size; // Init the index. cm->seg_map_idx = 0; cm->prev_seg_map_idx = 1; cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; return 0; } static void free_seg_map(VP9_COMMON *cm) { int i; for (i = 0; i < NUM_PING_PONG_BUFFERS; ++i) { vpx_free(cm->seg_map_array[i]); cm->seg_map_array[i] = NULL; } cm->current_frame_seg_map = NULL; cm->last_frame_seg_map = NULL; } void vp9_free_ref_frame_buffers(BufferPool *pool) { int i; for (i = 0; i < FRAME_BUFFERS; ++i) { if (!pool->frame_bufs[i].released && pool->frame_bufs[i].raw_frame_buffer.data != NULL) { pool->release_fb_cb(pool->cb_priv, &pool->frame_bufs[i].raw_frame_buffer); pool->frame_bufs[i].ref_count = 0; pool->frame_bufs[i].released = 1; } vpx_free(pool->frame_bufs[i].mvs); pool->frame_bufs[i].mvs = NULL; vpx_free_frame_buffer(&pool->frame_bufs[i].buf); } } void vp9_free_postproc_buffers(VP9_COMMON *cm) { #if CONFIG_VP9_POSTPROC vpx_free_frame_buffer(&cm->post_proc_buffer); vpx_free_frame_buffer(&cm->post_proc_buffer_int); vpx_free(cm->postproc_state.limits); cm->postproc_state.limits = NULL; vpx_free(cm->postproc_state.generated_noise); cm->postproc_state.generated_noise = NULL; #else (void)cm; #endif } void vp9_free_context_buffers(VP9_COMMON *cm) { cm->free_mi(cm); free_seg_map(cm); vpx_free(cm->above_context); cm->above_context = NULL; vpx_free(cm->above_seg_context); cm->above_seg_context = NULL; vpx_free(cm->lf.lfm); cm->lf.lfm = NULL; } int vp9_alloc_loop_filter(VP9_COMMON *cm) { vpx_free(cm->lf.lfm); // Each lfm holds bit masks for all the 8x8 blocks in a 64x64 region. The // stride and rows are rounded up / truncated to a multiple of 8. cm->lf.lfm_stride = (cm->mi_cols + (MI_BLOCK_SIZE - 1)) >> 3; cm->lf.lfm = (LOOP_FILTER_MASK *)vpx_calloc( ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride, sizeof(*cm->lf.lfm)); if (!cm->lf.lfm) return 1; return 0; } int vp9_alloc_context_buffers(VP9_COMMON *cm, int width, int height) { int new_mi_size; vp9_set_mb_mi(cm, width, height); new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); if (cm->mi_alloc_size < new_mi_size) { cm->free_mi(cm); if (cm->alloc_mi(cm, new_mi_size)) goto fail; } if (cm->seg_map_alloc_size < cm->mi_rows * cm->mi_cols) { // Create the segmentation map structure and set to 0. free_seg_map(cm); if (alloc_seg_map(cm, cm->mi_rows * cm->mi_cols)) goto fail; } if (cm->above_context_alloc_cols < cm->mi_cols) { vpx_free(cm->above_context); cm->above_context = (ENTROPY_CONTEXT *)vpx_calloc( 2 * mi_cols_aligned_to_sb(cm->mi_cols) * MAX_MB_PLANE, sizeof(*cm->above_context)); if (!cm->above_context) goto fail; vpx_free(cm->above_seg_context); cm->above_seg_context = (PARTITION_CONTEXT *)vpx_calloc( mi_cols_aligned_to_sb(cm->mi_cols), sizeof(*cm->above_seg_context)); if (!cm->above_seg_context) goto fail; cm->above_context_alloc_cols = cm->mi_cols; } if (vp9_alloc_loop_filter(cm)) goto fail; return 0; fail: // clear the mi_* values to force a realloc on resync vp9_set_mb_mi(cm, 0, 0); vp9_free_context_buffers(cm); return 1; } void vp9_remove_common(VP9_COMMON *cm) { #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); #endif vp9_free_context_buffers(cm); vpx_free(cm->fc); cm->fc = NULL; vpx_free(cm->frame_contexts); cm->frame_contexts = NULL; } void vp9_init_context_buffers(VP9_COMMON *cm) { cm->setup_mi(cm); if (cm->last_frame_seg_map) memset(cm->last_frame_seg_map, 0, cm->mi_rows * cm->mi_cols); } void vp9_swap_current_and_last_seg_map(VP9_COMMON *cm) { // Swap indices. const int tmp = cm->seg_map_idx; cm->seg_map_idx = cm->prev_seg_map_idx; cm->prev_seg_map_idx = tmp; cm->current_frame_seg_map = cm->seg_map_array[cm->seg_map_idx]; cm->last_frame_seg_map = cm->seg_map_array[cm->prev_seg_map_idx]; } libvpx-1.8.2/vp9/common/vp9_alloccommon.h000066400000000000000000000030601357355204000203110ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ #define VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ #define INVALID_IDX (-1) // Invalid buffer index. #ifdef __cplusplus extern "C" { #endif struct VP9Common; struct BufferPool; void vp9_remove_common(struct VP9Common *cm); int vp9_alloc_loop_filter(struct VP9Common *cm); int vp9_alloc_context_buffers(struct VP9Common *cm, int width, int height); void vp9_init_context_buffers(struct VP9Common *cm); void vp9_free_context_buffers(struct VP9Common *cm); void vp9_free_ref_frame_buffers(struct BufferPool *pool); void vp9_free_postproc_buffers(struct VP9Common *cm); int vp9_alloc_state_buffers(struct VP9Common *cm, int width, int height); void vp9_free_state_buffers(struct VP9Common *cm); void vp9_set_mi_size(int *mi_rows, int *mi_cols, int *mi_stride, int width, int height); void vp9_set_mb_size(int *mb_rows, int *mb_cols, int *mb_num, int mi_rows, int mi_cols); void vp9_set_mb_mi(struct VP9Common *cm, int width, int height); void vp9_swap_current_and_last_seg_map(struct VP9Common *cm); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ALLOCCOMMON_H_ libvpx-1.8.2/vp9/common/vp9_blockd.c000066400000000000000000000116211357355204000172410ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_blockd.h" PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b) { if (b == 0 || b == 2) { if (!left_mi || is_inter_block(left_mi)) return DC_PRED; return get_y_mode(left_mi, b + 1); } else { assert(b == 1 || b == 3); return cur_mi->bmi[b - 1].as_mode; } } PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *above_mi, int b) { if (b == 0 || b == 1) { if (!above_mi || is_inter_block(above_mi)) return DC_PRED; return get_y_mode(above_mi, b + 2); } else { assert(b == 2 || b == 3); return cur_mi->bmi[b - 2].as_mode; } } void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 // transform size varies per plane, look it up in a common way. const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int step = 1 << (tx_size << 1); int i = 0, r, c; // If mb_to_right_edge is < 0 we are in a situation in which // the current block size extends into the UMV and we won't // visit the sub blocks that are wholly within the UMV. const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); const int extra_step = ((num_4x4_w - max_blocks_wide) >> tx_size) * step; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. for (r = 0; r < max_blocks_high; r += (1 << tx_size)) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_blocks_wide; c += (1 << tx_size)) { visit(plane, i, r, c, plane_bsize, tx_size, arg); i += step; } i += extra_step; } } void vp9_foreach_transformed_block(const MACROBLOCKD *const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) vp9_foreach_transformed_block_in_plane(xd, bsize, plane, visit, arg); } void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff) { ENTROPY_CONTEXT *const a = pd->above_context + aoff; ENTROPY_CONTEXT *const l = pd->left_context + loff; const int tx_size_in_blocks = 1 << tx_size; // above if (has_eob && xd->mb_to_right_edge < 0) { int i; const int blocks_wide = num_4x4_blocks_wide_lookup[plane_bsize] + (xd->mb_to_right_edge >> (5 + pd->subsampling_x)); int above_contexts = tx_size_in_blocks; if (above_contexts + aoff > blocks_wide) above_contexts = blocks_wide - aoff; for (i = 0; i < above_contexts; ++i) a[i] = has_eob; for (i = above_contexts; i < tx_size_in_blocks; ++i) a[i] = 0; } else { memset(a, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } // left if (has_eob && xd->mb_to_bottom_edge < 0) { int i; const int blocks_high = num_4x4_blocks_high_lookup[plane_bsize] + (xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); int left_contexts = tx_size_in_blocks; if (left_contexts + loff > blocks_high) left_contexts = blocks_high - loff; for (i = 0; i < left_contexts; ++i) l[i] = has_eob; for (i = left_contexts; i < tx_size_in_blocks; ++i) l[i] = 0; } else { memset(l, has_eob, sizeof(ENTROPY_CONTEXT) * tx_size_in_blocks); } } void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].subsampling_x = i ? ss_x : 0; xd->plane[i].subsampling_y = i ? ss_y : 0; } } libvpx-1.8.2/vp9/common/vp9_blockd.h000066400000000000000000000223161357355204000172510ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_BLOCKD_H_ #define VPX_VP9_COMMON_VP9_BLOCKD_H_ #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_common_data.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_mv.h" #include "vp9/common/vp9_scale.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" #ifdef __cplusplus extern "C" { #endif #define MAX_MB_PLANE 3 typedef enum { KEY_FRAME = 0, INTER_FRAME = 1, FRAME_TYPES, } FRAME_TYPE; static INLINE int is_inter_mode(PREDICTION_MODE mode) { return mode >= NEARESTMV && mode <= NEWMV; } /* For keyframes, intra block modes are predicted by the (already decoded) modes for the Y blocks to the left and above us; for interframes, there is a single probability table. */ typedef struct { PREDICTION_MODE as_mode; int_mv as_mv[2]; // first, second inter predictor motion vectors } b_mode_info; // Note that the rate-distortion optimization loop, bit-stream writer, and // decoder implementation modules critically rely on the defined entry values // specified herein. They should be refactored concurrently. #define NONE (-1) #define INTRA_FRAME 0 #define LAST_FRAME 1 #define GOLDEN_FRAME 2 #define ALTREF_FRAME 3 #define MAX_REF_FRAMES 4 #define MAX_INTER_REF_FRAMES 3 typedef int8_t MV_REFERENCE_FRAME; // This structure now relates to 8x8 block regions. typedef struct MODE_INFO { // Common for both INTER and INTRA blocks BLOCK_SIZE sb_type; PREDICTION_MODE mode; TX_SIZE tx_size; int8_t skip; int8_t segment_id; int8_t seg_id_predicted; // valid only when temporal_update is enabled // Only for INTRA blocks PREDICTION_MODE uv_mode; // Only for INTER blocks INTERP_FILTER interp_filter; // if ref_frame[idx] is equal to ALTREF_FRAME then // MACROBLOCKD::block_ref[idx] is an altref MV_REFERENCE_FRAME ref_frame[2]; // TODO(slavarnway): Delete and use bmi[3].as_mv[] instead. int_mv mv[2]; b_mode_info bmi[4]; } MODE_INFO; static INLINE PREDICTION_MODE get_y_mode(const MODE_INFO *mi, int block) { return mi->sb_type < BLOCK_8X8 ? mi->bmi[block].as_mode : mi->mode; } static INLINE int is_inter_block(const MODE_INFO *mi) { return mi->ref_frame[0] > INTRA_FRAME; } static INLINE int has_second_ref(const MODE_INFO *mi) { return mi->ref_frame[1] > INTRA_FRAME; } PREDICTION_MODE vp9_left_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *left_mi, int b); PREDICTION_MODE vp9_above_block_mode(const MODE_INFO *cur_mi, const MODE_INFO *above_mi, int b); enum mv_precision { MV_PRECISION_Q3, MV_PRECISION_Q4 }; struct buf_2d { uint8_t *buf; int stride; }; struct macroblockd_plane { tran_low_t *dqcoeff; int subsampling_x; int subsampling_y; struct buf_2d dst; struct buf_2d pre[2]; ENTROPY_CONTEXT *above_context; ENTROPY_CONTEXT *left_context; int16_t seg_dequant[MAX_SEGMENTS][2]; // number of 4x4s in current block uint16_t n4_w, n4_h; // log2 of n4_w, n4_h uint8_t n4_wl, n4_hl; // encoder const int16_t *dequant; int *eob; }; #define BLOCK_OFFSET(x, i) ((x) + (i)*16) typedef struct RefBuffer { // TODO(dkovalev): idx is not really required and should be removed, now it // is used in vp9_onyxd_if.c int idx; YV12_BUFFER_CONFIG *buf; struct scale_factors sf; } RefBuffer; typedef struct macroblockd { struct macroblockd_plane plane[MAX_MB_PLANE]; uint8_t bmode_blocks_wl; uint8_t bmode_blocks_hl; FRAME_COUNTS *counts; TileInfo tile; int mi_stride; // Grid of 8x8 cells is placed over the block. // If some of them belong to the same mbtree-block // they will just have same mi[i][j] value MODE_INFO **mi; MODE_INFO *left_mi; MODE_INFO *above_mi; unsigned int max_blocks_wide; unsigned int max_blocks_high; const vpx_prob (*partition_probs)[PARTITION_TYPES - 1]; /* Distance of MB away from frame edges */ int mb_to_left_edge; int mb_to_right_edge; int mb_to_top_edge; int mb_to_bottom_edge; FRAME_CONTEXT *fc; /* pointers to reference frames */ const RefBuffer *block_refs[2]; /* pointer to current frame */ const YV12_BUFFER_CONFIG *cur_buf; ENTROPY_CONTEXT *above_context[MAX_MB_PLANE]; ENTROPY_CONTEXT left_context[MAX_MB_PLANE][16]; PARTITION_CONTEXT *above_seg_context; PARTITION_CONTEXT left_seg_context[8]; #if CONFIG_VP9_HIGHBITDEPTH /* Bit depth: 8, 10, 12 */ int bd; #endif int lossless; int corrupted; struct vpx_internal_error_info *error_info; PARTITION_TYPE *partition; } MACROBLOCKD; static INLINE PLANE_TYPE get_plane_type(int plane) { return (PLANE_TYPE)(plane > 0); } static INLINE BLOCK_SIZE get_subsize(BLOCK_SIZE bsize, PARTITION_TYPE partition) { return subsize_lookup[partition][bsize]; } extern const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES]; static INLINE TX_TYPE get_tx_type(PLANE_TYPE plane_type, const MACROBLOCKD *xd) { const MODE_INFO *const mi = xd->mi[0]; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; return intra_mode_to_tx_type_lookup[mi->mode]; } static INLINE TX_TYPE get_tx_type_4x4(PLANE_TYPE plane_type, const MACROBLOCKD *xd, int ib) { const MODE_INFO *const mi = xd->mi[0]; if (plane_type != PLANE_TYPE_Y || xd->lossless || is_inter_block(mi)) return DCT_DCT; return intra_mode_to_tx_type_lookup[get_y_mode(mi, ib)]; } void vp9_setup_block_planes(MACROBLOCKD *xd, int ss_x, int ss_y); static INLINE TX_SIZE get_uv_tx_size(const MODE_INFO *mi, const struct macroblockd_plane *pd) { assert(mi->sb_type < BLOCK_8X8 || ss_size_lookup[mi->sb_type][pd->subsampling_x][pd->subsampling_y] != BLOCK_INVALID); return uv_txsize_lookup[mi->sb_type][mi->tx_size][pd->subsampling_x] [pd->subsampling_y]; } static INLINE BLOCK_SIZE get_plane_block_size(BLOCK_SIZE bsize, const struct macroblockd_plane *pd) { return ss_size_lookup[bsize][pd->subsampling_x][pd->subsampling_y]; } static INLINE void reset_skip_context(MACROBLOCKD *xd, BLOCK_SIZE bsize) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide_lookup[plane_bsize]); memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high_lookup[plane_bsize]); } } static INLINE const vpx_prob *get_y_mode_probs(const MODE_INFO *mi, const MODE_INFO *above_mi, const MODE_INFO *left_mi, int block) { const PREDICTION_MODE above = vp9_above_block_mode(mi, above_mi, block); const PREDICTION_MODE left = vp9_left_block_mode(mi, left_mi, block); return vp9_kf_y_mode_prob[above][left]; } typedef void (*foreach_transformed_block_visitor)(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); void vp9_foreach_transformed_block_in_plane( const MACROBLOCKD *const xd, BLOCK_SIZE bsize, int plane, foreach_transformed_block_visitor visit, void *arg); void vp9_foreach_transformed_block(const MACROBLOCKD *const xd, BLOCK_SIZE bsize, foreach_transformed_block_visitor visit, void *arg); void vp9_set_contexts(const MACROBLOCKD *xd, struct macroblockd_plane *pd, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, int has_eob, int aoff, int loff); #if CONFIG_MISMATCH_DEBUG #define TX_UNIT_SIZE_LOG2 2 static INLINE void mi_to_pixel_loc(int *pixel_c, int *pixel_r, int mi_col, int mi_row, int tx_blk_col, int tx_blk_row, int subsampling_x, int subsampling_y) { *pixel_c = ((mi_col << MI_SIZE_LOG2) >> subsampling_x) + (tx_blk_col << TX_UNIT_SIZE_LOG2); *pixel_r = ((mi_row << MI_SIZE_LOG2) >> subsampling_y) + (tx_blk_row << TX_UNIT_SIZE_LOG2); } static INLINE int get_block_width(BLOCK_SIZE bsize) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; return 4 * num_4x4_w; } static INLINE int get_block_height(BLOCK_SIZE bsize) { const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; return 4 * num_4x4_h; } #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_BLOCKD_H_ libvpx-1.8.2/vp9/common/vp9_common.h000066400000000000000000000052001357355204000172740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_COMMON_H_ #define VPX_VP9_COMMON_VP9_COMMON_H_ /* Interface header for common constant data structures and lookup tables */ #include #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" #include "vpx_ports/bitops.h" #ifdef __cplusplus extern "C" { #endif // Only need this for fixed-size arrays, for structs just assign. #define vp9_copy(dest, src) \ { \ assert(sizeof(dest) == sizeof(src)); \ memcpy(dest, src, sizeof(src)); \ } // Use this for variably-sized arrays. #define vp9_copy_array(dest, src, n) \ { \ assert(sizeof(*(dest)) == sizeof(*(src))); \ memcpy(dest, src, (n) * sizeof(*(src))); \ } #define vp9_zero(dest) memset(&(dest), 0, sizeof(dest)) #define vp9_zero_array(dest, n) memset(dest, 0, (n) * sizeof(*(dest))) static INLINE int get_unsigned_bits(unsigned int num_values) { return num_values > 0 ? get_msb(num_values) + 1 : 0; } #if CONFIG_DEBUG #define CHECK_MEM_ERROR(cm, lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval " at %s:%d", __FILE__, \ __LINE__); \ } while (0) #else #define CHECK_MEM_ERROR(cm, lval, expr) \ do { \ (lval) = (expr); \ if (!(lval)) \ vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, \ "Failed to allocate " #lval); \ } while (0) #endif #define VP9_SYNC_CODE_0 0x49 #define VP9_SYNC_CODE_1 0x83 #define VP9_SYNC_CODE_2 0x42 #define VP9_FRAME_MARKER 0x2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_common_data.c000066400000000000000000000250141357355204000202650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_common_data.h" #include "vpx_dsp/vpx_dsp_common.h" // Log 2 conversion lookup tables for block width and height const uint8_t b_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4 }; const uint8_t b_height_log2_lookup[BLOCK_SIZES] = { 0, 1, 0, 1, 2, 1, 2, 3, 2, 3, 4, 3, 4 }; const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 2, 2, 2, 4, 4, 4, 8, 8, 8, 16, 16 }; const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES] = { 1, 2, 1, 2, 4, 2, 4, 8, 4, 8, 16, 8, 16 }; // Log 2 conversion lookup tables for modeinfo width and height const uint8_t mi_width_log2_lookup[BLOCK_SIZES] = { 0, 0, 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3 }; const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 2, 2, 2, 4, 4, 4, 8, 8 }; const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 2, 1, 2, 4, 2, 4, 8, 4, 8 }; // VPXMIN(3, VPXMIN(b_width_log2_lookup(bsize), b_height_log2_lookup(bsize))) const uint8_t size_group_lookup[BLOCK_SIZES] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3 }; const uint8_t num_pels_log2_lookup[BLOCK_SIZES] = { 4, 5, 5, 6, 7, 7, 8, 9, 9, 10, 11, 11, 12 }; const PARTITION_TYPE partition_lookup[][BLOCK_SIZES] = { { // 4X4 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 8X8 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 16X16 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 32X32 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE, PARTITION_INVALID, PARTITION_INVALID, PARTITION_INVALID }, { // 64X64 // 4X4, 4X8,8X4,8X8,8X16,16X8,16X16,16X32,32X16,32X32,32X64,64X32,64X64 PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_SPLIT, PARTITION_VERT, PARTITION_HORZ, PARTITION_NONE } }; const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES] = { { // PARTITION_NONE BLOCK_4X4, BLOCK_4X8, BLOCK_8X4, BLOCK_8X8, BLOCK_8X16, BLOCK_16X8, BLOCK_16X16, BLOCK_16X32, BLOCK_32X16, BLOCK_32X32, BLOCK_32X64, BLOCK_64X32, BLOCK_64X64 }, { // PARTITION_HORZ BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X4, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_64X32 }, { // PARTITION_VERT BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X32, BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X64 }, { // PARTITION_SPLIT BLOCK_INVALID, BLOCK_INVALID, BLOCK_INVALID, BLOCK_4X4, BLOCK_INVALID, BLOCK_INVALID, BLOCK_8X8, BLOCK_INVALID, BLOCK_INVALID, BLOCK_16X16, BLOCK_INVALID, BLOCK_INVALID, BLOCK_32X32 } }; const TX_SIZE max_txsize_lookup[BLOCK_SIZES] = { TX_4X4, TX_4X4, TX_4X4, TX_8X8, TX_8X8, TX_8X8, TX_16X16, TX_16X16, TX_16X16, TX_32X32, TX_32X32, TX_32X32, TX_32X32 }; const BLOCK_SIZE txsize_to_bsize[TX_SIZES] = { BLOCK_4X4, // TX_4X4 BLOCK_8X8, // TX_8X8 BLOCK_16X16, // TX_16X16 BLOCK_32X32, // TX_32X32 }; const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES] = { TX_4X4, // ONLY_4X4 TX_8X8, // ALLOW_8X8 TX_16X16, // ALLOW_16X16 TX_32X32, // ALLOW_32X32 TX_32X32, // TX_MODE_SELECT }; const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2] = { // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 { { BLOCK_4X4, BLOCK_INVALID }, { BLOCK_INVALID, BLOCK_INVALID } }, { { BLOCK_4X8, BLOCK_4X4 }, { BLOCK_INVALID, BLOCK_INVALID } }, { { BLOCK_8X4, BLOCK_INVALID }, { BLOCK_4X4, BLOCK_INVALID } }, { { BLOCK_8X8, BLOCK_8X4 }, { BLOCK_4X8, BLOCK_4X4 } }, { { BLOCK_8X16, BLOCK_8X8 }, { BLOCK_INVALID, BLOCK_4X8 } }, { { BLOCK_16X8, BLOCK_INVALID }, { BLOCK_8X8, BLOCK_8X4 } }, { { BLOCK_16X16, BLOCK_16X8 }, { BLOCK_8X16, BLOCK_8X8 } }, { { BLOCK_16X32, BLOCK_16X16 }, { BLOCK_INVALID, BLOCK_8X16 } }, { { BLOCK_32X16, BLOCK_INVALID }, { BLOCK_16X16, BLOCK_16X8 } }, { { BLOCK_32X32, BLOCK_32X16 }, { BLOCK_16X32, BLOCK_16X16 } }, { { BLOCK_32X64, BLOCK_32X32 }, { BLOCK_INVALID, BLOCK_16X32 } }, { { BLOCK_64X32, BLOCK_INVALID }, { BLOCK_32X32, BLOCK_32X16 } }, { { BLOCK_64X64, BLOCK_64X32 }, { BLOCK_32X64, BLOCK_32X32 } }, }; const TX_SIZE uv_txsize_lookup[BLOCK_SIZES][TX_SIZES][2][2] = { // ss_x == 0 ss_x == 0 ss_x == 1 ss_x == 1 // ss_y == 0 ss_y == 1 ss_y == 0 ss_y == 1 { // BLOCK_4X4 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, }, { // BLOCK_4X8 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, }, { // BLOCK_8X4 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, }, { // BLOCK_8X8 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_4X4 }, { TX_4X4, TX_4X4 } }, }, { // BLOCK_8X16 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_4X4, TX_4X4 } }, }, { // BLOCK_16X8 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_4X4 }, { TX_8X8, TX_4X4 } }, { { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } }, { { TX_8X8, TX_4X4 }, { TX_8X8, TX_8X8 } }, }, { // BLOCK_16X16 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_8X8 }, { TX_8X8, TX_8X8 } }, }, { // BLOCK_16X32 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_8X8, TX_8X8 } }, }, { // BLOCK_32X16 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } }, { { TX_16X16, TX_8X8 }, { TX_16X16, TX_8X8 } }, }, { // BLOCK_32X32 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } }, { { TX_32X32, TX_16X16 }, { TX_16X16, TX_16X16 } }, }, { // BLOCK_32X64 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } }, { { TX_32X32, TX_32X32 }, { TX_16X16, TX_16X16 } }, }, { // BLOCK_64X32 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } }, { { TX_32X32, TX_16X16 }, { TX_32X32, TX_16X16 } }, }, { // BLOCK_64X64 { { TX_4X4, TX_4X4 }, { TX_4X4, TX_4X4 } }, { { TX_8X8, TX_8X8 }, { TX_8X8, TX_8X8 } }, { { TX_16X16, TX_16X16 }, { TX_16X16, TX_16X16 } }, { { TX_32X32, TX_32X32 }, { TX_32X32, TX_32X32 } }, }, }; // Generates 4 bit field in which each bit set to 1 represents // a blocksize partition 1111 means we split 64x64, 32x32, 16x16 // and 8x8. 1000 means we just split the 64x64 to 32x32 const struct { PARTITION_CONTEXT above; PARTITION_CONTEXT left; } partition_context_lookup[BLOCK_SIZES] = { { 15, 15 }, // 4X4 - {0b1111, 0b1111} { 15, 14 }, // 4X8 - {0b1111, 0b1110} { 14, 15 }, // 8X4 - {0b1110, 0b1111} { 14, 14 }, // 8X8 - {0b1110, 0b1110} { 14, 12 }, // 8X16 - {0b1110, 0b1100} { 12, 14 }, // 16X8 - {0b1100, 0b1110} { 12, 12 }, // 16X16 - {0b1100, 0b1100} { 12, 8 }, // 16X32 - {0b1100, 0b1000} { 8, 12 }, // 32X16 - {0b1000, 0b1100} { 8, 8 }, // 32X32 - {0b1000, 0b1000} { 8, 0 }, // 32X64 - {0b1000, 0b0000} { 0, 8 }, // 64X32 - {0b0000, 0b1000} { 0, 0 }, // 64X64 - {0b0000, 0b0000} }; #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH const uint8_t need_top_left[INTRA_MODES] = { 0, // DC_PRED 0, // V_PRED 0, // H_PRED 0, // D45_PRED 1, // D135_PRED 1, // D117_PRED 1, // D153_PRED 0, // D207_PRED 0, // D63_PRED 1, // TM_PRED }; #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vp9/common/vp9_common_data.h000066400000000000000000000034301357355204000202700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_COMMON_DATA_H_ #define VPX_VP9_COMMON_VP9_COMMON_DATA_H_ #include "vp9/common/vp9_enums.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif extern const uint8_t b_width_log2_lookup[BLOCK_SIZES]; extern const uint8_t b_height_log2_lookup[BLOCK_SIZES]; extern const uint8_t mi_width_log2_lookup[BLOCK_SIZES]; extern const uint8_t num_8x8_blocks_wide_lookup[BLOCK_SIZES]; extern const uint8_t num_8x8_blocks_high_lookup[BLOCK_SIZES]; extern const uint8_t num_4x4_blocks_high_lookup[BLOCK_SIZES]; extern const uint8_t num_4x4_blocks_wide_lookup[BLOCK_SIZES]; extern const uint8_t size_group_lookup[BLOCK_SIZES]; extern const uint8_t num_pels_log2_lookup[BLOCK_SIZES]; extern const PARTITION_TYPE partition_lookup[][BLOCK_SIZES]; extern const BLOCK_SIZE subsize_lookup[PARTITION_TYPES][BLOCK_SIZES]; extern const TX_SIZE max_txsize_lookup[BLOCK_SIZES]; extern const BLOCK_SIZE txsize_to_bsize[TX_SIZES]; extern const TX_SIZE tx_mode_to_biggest_tx_size[TX_MODES]; extern const BLOCK_SIZE ss_size_lookup[BLOCK_SIZES][2][2]; extern const TX_SIZE uv_txsize_lookup[BLOCK_SIZES][TX_SIZES][2][2]; #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH extern const uint8_t need_top_left[INTRA_MODES]; #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_COMMON_DATA_H_ libvpx-1.8.2/vp9/common/vp9_debugmodes.c000066400000000000000000000053511357355204000201240ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" static void log_frame_info(VP9_COMMON *cm, const char *str, FILE *f) { fprintf(f, "%s", str); fprintf(f, "(Frame %d, Show:%d, Q:%d): \n", cm->current_video_frame, cm->show_frame, cm->base_qindex); } /* This function dereferences a pointer to the mbmi structure * and uses the passed in member offset to print out the value of an integer * for each mbmi member value in the mi structure. */ static void print_mi_data(VP9_COMMON *cm, FILE *file, const char *descriptor, size_t member_offset) { int mi_row, mi_col; MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; char prefix = descriptor[0]; log_frame_info(cm, descriptor, file); for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(file, "%c ", prefix); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(file, "%2d ", *((char *)((char *)(mi[0]) + member_offset))); mi++; } fprintf(file, "\n"); mi += 8; } fprintf(file, "\n"); } void vp9_print_modes_and_motion_vectors(VP9_COMMON *cm, const char *file) { int mi_row; int mi_col; FILE *mvs = fopen(file, "a"); MODE_INFO **mi = cm->mi_grid_visible; int rows = cm->mi_rows; int cols = cm->mi_cols; print_mi_data(cm, mvs, "Partitions:", offsetof(MODE_INFO, sb_type)); print_mi_data(cm, mvs, "Modes:", offsetof(MODE_INFO, mode)); print_mi_data(cm, mvs, "Ref frame:", offsetof(MODE_INFO, ref_frame[0])); print_mi_data(cm, mvs, "Transform:", offsetof(MODE_INFO, tx_size)); print_mi_data(cm, mvs, "UV Modes:", offsetof(MODE_INFO, uv_mode)); // output skip infomation. log_frame_info(cm, "Skips:", mvs); for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "S "); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(mvs, "%2d ", mi[0]->skip); mi++; } fprintf(mvs, "\n"); mi += 8; } fprintf(mvs, "\n"); // output motion vectors. log_frame_info(cm, "Vectors ", mvs); mi = cm->mi_grid_visible; for (mi_row = 0; mi_row < rows; mi_row++) { fprintf(mvs, "V "); for (mi_col = 0; mi_col < cols; mi_col++) { fprintf(mvs, "%4d:%4d ", mi[0]->mv[0].as_mv.row, mi[0]->mv[0].as_mv.col); mi++; } fprintf(mvs, "\n"); mi += 8; } fprintf(mvs, "\n"); fclose(mvs); } libvpx-1.8.2/vp9/common/vp9_entropy.c000066400000000000000000001066341357355204000175140ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymode.h" #include "vpx_mem/vpx_mem.h" #include "vpx/vpx_integer.h" // Unconstrained Node Tree /* clang-format off */ const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)] = { 2, 6, // 0 = LOW_VAL -TWO_TOKEN, 4, // 1 = TWO -THREE_TOKEN, -FOUR_TOKEN, // 2 = THREE 8, 10, // 3 = HIGH_LOW -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 4 = CAT_ONE 12, 14, // 5 = CAT_THREEFOUR -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 6 = CAT_THREE -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 7 = CAT_FIVE }; /* clang-format on */ const vpx_prob vp9_cat1_prob[] = { 159 }; const vpx_prob vp9_cat2_prob[] = { 165, 145 }; const vpx_prob vp9_cat3_prob[] = { 173, 148, 140 }; const vpx_prob vp9_cat4_prob[] = { 176, 155, 140, 135 }; const vpx_prob vp9_cat5_prob[] = { 180, 157, 141, 134, 130 }; const vpx_prob vp9_cat6_prob[] = { 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; #if CONFIG_VP9_HIGHBITDEPTH const vpx_prob vp9_cat6_prob_high12[] = { 255, 255, 255, 255, 254, 254, 254, 252, 249, 243, 230, 196, 177, 153, 140, 133, 130, 129 }; #endif /* clang-format off */ const uint8_t vp9_coefband_trans_8x8plus[1024] = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, // beyond MAXBAND_INDEX+1 all values are filled as 5 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, }; /* clang-format on */ const uint8_t vp9_coefband_trans_4x4[16] = { 0, 1, 1, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, }; const uint8_t vp9_pt_energy_class[ENTROPY_TOKENS] = { 0, 1, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5 }; // Model obtained from a 2-sided zero-centerd distribuition derived // from a Pareto distribution. The cdf of the distribution is: // cdf(x) = 0.5 + 0.5 * sgn(x) * [1 - {alpha/(alpha + |x|)} ^ beta] // // For a given beta and a given probablity of the 1-node, the alpha // is first solved, and then the {alpha, beta} pair is used to generate // the probabilities for the rest of the nodes. // beta = 8 // Every odd line in this table can be generated from the even lines // by averaging : // vp9_pareto8_full[l][node] = (vp9_pareto8_full[l-1][node] + // vp9_pareto8_full[l+1][node] ) >> 1; const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES] = { { 3, 86, 128, 6, 86, 23, 88, 29 }, { 6, 86, 128, 11, 87, 42, 91, 52 }, { 9, 86, 129, 17, 88, 61, 94, 76 }, { 12, 86, 129, 22, 88, 77, 97, 93 }, { 15, 87, 129, 28, 89, 93, 100, 110 }, { 17, 87, 129, 33, 90, 105, 103, 123 }, { 20, 88, 130, 38, 91, 118, 106, 136 }, { 23, 88, 130, 43, 91, 128, 108, 146 }, { 26, 89, 131, 48, 92, 139, 111, 156 }, { 28, 89, 131, 53, 93, 147, 114, 163 }, { 31, 90, 131, 58, 94, 156, 117, 171 }, { 34, 90, 131, 62, 94, 163, 119, 177 }, { 37, 90, 132, 66, 95, 171, 122, 184 }, { 39, 90, 132, 70, 96, 177, 124, 189 }, { 42, 91, 132, 75, 97, 183, 127, 194 }, { 44, 91, 132, 79, 97, 188, 129, 198 }, { 47, 92, 133, 83, 98, 193, 132, 202 }, { 49, 92, 133, 86, 99, 197, 134, 205 }, { 52, 93, 133, 90, 100, 201, 137, 208 }, { 54, 93, 133, 94, 100, 204, 139, 211 }, { 57, 94, 134, 98, 101, 208, 142, 214 }, { 59, 94, 134, 101, 102, 211, 144, 216 }, { 62, 94, 135, 105, 103, 214, 146, 218 }, { 64, 94, 135, 108, 103, 216, 148, 220 }, { 66, 95, 135, 111, 104, 219, 151, 222 }, { 68, 95, 135, 114, 105, 221, 153, 223 }, { 71, 96, 136, 117, 106, 224, 155, 225 }, { 73, 96, 136, 120, 106, 225, 157, 226 }, { 76, 97, 136, 123, 107, 227, 159, 228 }, { 78, 97, 136, 126, 108, 229, 160, 229 }, { 80, 98, 137, 129, 109, 231, 162, 231 }, { 82, 98, 137, 131, 109, 232, 164, 232 }, { 84, 98, 138, 134, 110, 234, 166, 233 }, { 86, 98, 138, 137, 111, 235, 168, 234 }, { 89, 99, 138, 140, 112, 236, 170, 235 }, { 91, 99, 138, 142, 112, 237, 171, 235 }, { 93, 100, 139, 145, 113, 238, 173, 236 }, { 95, 100, 139, 147, 114, 239, 174, 237 }, { 97, 101, 140, 149, 115, 240, 176, 238 }, { 99, 101, 140, 151, 115, 241, 177, 238 }, { 101, 102, 140, 154, 116, 242, 179, 239 }, { 103, 102, 140, 156, 117, 242, 180, 239 }, { 105, 103, 141, 158, 118, 243, 182, 240 }, { 107, 103, 141, 160, 118, 243, 183, 240 }, { 109, 104, 141, 162, 119, 244, 185, 241 }, { 111, 104, 141, 164, 119, 244, 186, 241 }, { 113, 104, 142, 166, 120, 245, 187, 242 }, { 114, 104, 142, 168, 121, 245, 188, 242 }, { 116, 105, 143, 170, 122, 246, 190, 243 }, { 118, 105, 143, 171, 122, 246, 191, 243 }, { 120, 106, 143, 173, 123, 247, 192, 244 }, { 121, 106, 143, 175, 124, 247, 193, 244 }, { 123, 107, 144, 177, 125, 248, 195, 244 }, { 125, 107, 144, 178, 125, 248, 196, 244 }, { 127, 108, 145, 180, 126, 249, 197, 245 }, { 128, 108, 145, 181, 127, 249, 198, 245 }, { 130, 109, 145, 183, 128, 249, 199, 245 }, { 132, 109, 145, 184, 128, 249, 200, 245 }, { 134, 110, 146, 186, 129, 250, 201, 246 }, { 135, 110, 146, 187, 130, 250, 202, 246 }, { 137, 111, 147, 189, 131, 251, 203, 246 }, { 138, 111, 147, 190, 131, 251, 204, 246 }, { 140, 112, 147, 192, 132, 251, 205, 247 }, { 141, 112, 147, 193, 132, 251, 206, 247 }, { 143, 113, 148, 194, 133, 251, 207, 247 }, { 144, 113, 148, 195, 134, 251, 207, 247 }, { 146, 114, 149, 197, 135, 252, 208, 248 }, { 147, 114, 149, 198, 135, 252, 209, 248 }, { 149, 115, 149, 199, 136, 252, 210, 248 }, { 150, 115, 149, 200, 137, 252, 210, 248 }, { 152, 115, 150, 201, 138, 252, 211, 248 }, { 153, 115, 150, 202, 138, 252, 212, 248 }, { 155, 116, 151, 204, 139, 253, 213, 249 }, { 156, 116, 151, 205, 139, 253, 213, 249 }, { 158, 117, 151, 206, 140, 253, 214, 249 }, { 159, 117, 151, 207, 141, 253, 215, 249 }, { 161, 118, 152, 208, 142, 253, 216, 249 }, { 162, 118, 152, 209, 142, 253, 216, 249 }, { 163, 119, 153, 210, 143, 253, 217, 249 }, { 164, 119, 153, 211, 143, 253, 217, 249 }, { 166, 120, 153, 212, 144, 254, 218, 250 }, { 167, 120, 153, 212, 145, 254, 219, 250 }, { 168, 121, 154, 213, 146, 254, 220, 250 }, { 169, 121, 154, 214, 146, 254, 220, 250 }, { 171, 122, 155, 215, 147, 254, 221, 250 }, { 172, 122, 155, 216, 147, 254, 221, 250 }, { 173, 123, 155, 217, 148, 254, 222, 250 }, { 174, 123, 155, 217, 149, 254, 222, 250 }, { 176, 124, 156, 218, 150, 254, 223, 250 }, { 177, 124, 156, 219, 150, 254, 223, 250 }, { 178, 125, 157, 220, 151, 254, 224, 251 }, { 179, 125, 157, 220, 151, 254, 224, 251 }, { 180, 126, 157, 221, 152, 254, 225, 251 }, { 181, 126, 157, 221, 152, 254, 225, 251 }, { 183, 127, 158, 222, 153, 254, 226, 251 }, { 184, 127, 158, 223, 154, 254, 226, 251 }, { 185, 128, 159, 224, 155, 255, 227, 251 }, { 186, 128, 159, 224, 155, 255, 227, 251 }, { 187, 129, 160, 225, 156, 255, 228, 251 }, { 188, 130, 160, 225, 156, 255, 228, 251 }, { 189, 131, 160, 226, 157, 255, 228, 251 }, { 190, 131, 160, 226, 158, 255, 228, 251 }, { 191, 132, 161, 227, 159, 255, 229, 251 }, { 192, 132, 161, 227, 159, 255, 229, 251 }, { 193, 133, 162, 228, 160, 255, 230, 252 }, { 194, 133, 162, 229, 160, 255, 230, 252 }, { 195, 134, 163, 230, 161, 255, 231, 252 }, { 196, 134, 163, 230, 161, 255, 231, 252 }, { 197, 135, 163, 231, 162, 255, 231, 252 }, { 198, 135, 163, 231, 162, 255, 231, 252 }, { 199, 136, 164, 232, 163, 255, 232, 252 }, { 200, 136, 164, 232, 164, 255, 232, 252 }, { 201, 137, 165, 233, 165, 255, 233, 252 }, { 201, 137, 165, 233, 165, 255, 233, 252 }, { 202, 138, 166, 233, 166, 255, 233, 252 }, { 203, 138, 166, 233, 166, 255, 233, 252 }, { 204, 139, 166, 234, 167, 255, 234, 252 }, { 205, 139, 166, 234, 167, 255, 234, 252 }, { 206, 140, 167, 235, 168, 255, 235, 252 }, { 206, 140, 167, 235, 168, 255, 235, 252 }, { 207, 141, 168, 236, 169, 255, 235, 252 }, { 208, 141, 168, 236, 170, 255, 235, 252 }, { 209, 142, 169, 237, 171, 255, 236, 252 }, { 209, 143, 169, 237, 171, 255, 236, 252 }, { 210, 144, 169, 237, 172, 255, 236, 252 }, { 211, 144, 169, 237, 172, 255, 236, 252 }, { 212, 145, 170, 238, 173, 255, 237, 252 }, { 213, 145, 170, 238, 173, 255, 237, 252 }, { 214, 146, 171, 239, 174, 255, 237, 253 }, { 214, 146, 171, 239, 174, 255, 237, 253 }, { 215, 147, 172, 240, 175, 255, 238, 253 }, { 215, 147, 172, 240, 175, 255, 238, 253 }, { 216, 148, 173, 240, 176, 255, 238, 253 }, { 217, 148, 173, 240, 176, 255, 238, 253 }, { 218, 149, 173, 241, 177, 255, 239, 253 }, { 218, 149, 173, 241, 178, 255, 239, 253 }, { 219, 150, 174, 241, 179, 255, 239, 253 }, { 219, 151, 174, 241, 179, 255, 239, 253 }, { 220, 152, 175, 242, 180, 255, 240, 253 }, { 221, 152, 175, 242, 180, 255, 240, 253 }, { 222, 153, 176, 242, 181, 255, 240, 253 }, { 222, 153, 176, 242, 181, 255, 240, 253 }, { 223, 154, 177, 243, 182, 255, 240, 253 }, { 223, 154, 177, 243, 182, 255, 240, 253 }, { 224, 155, 178, 244, 183, 255, 241, 253 }, { 224, 155, 178, 244, 183, 255, 241, 253 }, { 225, 156, 178, 244, 184, 255, 241, 253 }, { 225, 157, 178, 244, 184, 255, 241, 253 }, { 226, 158, 179, 244, 185, 255, 242, 253 }, { 227, 158, 179, 244, 185, 255, 242, 253 }, { 228, 159, 180, 245, 186, 255, 242, 253 }, { 228, 159, 180, 245, 186, 255, 242, 253 }, { 229, 160, 181, 245, 187, 255, 242, 253 }, { 229, 160, 181, 245, 187, 255, 242, 253 }, { 230, 161, 182, 246, 188, 255, 243, 253 }, { 230, 162, 182, 246, 188, 255, 243, 253 }, { 231, 163, 183, 246, 189, 255, 243, 253 }, { 231, 163, 183, 246, 189, 255, 243, 253 }, { 232, 164, 184, 247, 190, 255, 243, 253 }, { 232, 164, 184, 247, 190, 255, 243, 253 }, { 233, 165, 185, 247, 191, 255, 244, 253 }, { 233, 165, 185, 247, 191, 255, 244, 253 }, { 234, 166, 185, 247, 192, 255, 244, 253 }, { 234, 167, 185, 247, 192, 255, 244, 253 }, { 235, 168, 186, 248, 193, 255, 244, 253 }, { 235, 168, 186, 248, 193, 255, 244, 253 }, { 236, 169, 187, 248, 194, 255, 244, 253 }, { 236, 169, 187, 248, 194, 255, 244, 253 }, { 236, 170, 188, 248, 195, 255, 245, 253 }, { 236, 170, 188, 248, 195, 255, 245, 253 }, { 237, 171, 189, 249, 196, 255, 245, 254 }, { 237, 172, 189, 249, 196, 255, 245, 254 }, { 238, 173, 190, 249, 197, 255, 245, 254 }, { 238, 173, 190, 249, 197, 255, 245, 254 }, { 239, 174, 191, 249, 198, 255, 245, 254 }, { 239, 174, 191, 249, 198, 255, 245, 254 }, { 240, 175, 192, 249, 199, 255, 246, 254 }, { 240, 176, 192, 249, 199, 255, 246, 254 }, { 240, 177, 193, 250, 200, 255, 246, 254 }, { 240, 177, 193, 250, 200, 255, 246, 254 }, { 241, 178, 194, 250, 201, 255, 246, 254 }, { 241, 178, 194, 250, 201, 255, 246, 254 }, { 242, 179, 195, 250, 202, 255, 246, 254 }, { 242, 180, 195, 250, 202, 255, 246, 254 }, { 242, 181, 196, 250, 203, 255, 247, 254 }, { 242, 181, 196, 250, 203, 255, 247, 254 }, { 243, 182, 197, 251, 204, 255, 247, 254 }, { 243, 183, 197, 251, 204, 255, 247, 254 }, { 244, 184, 198, 251, 205, 255, 247, 254 }, { 244, 184, 198, 251, 205, 255, 247, 254 }, { 244, 185, 199, 251, 206, 255, 247, 254 }, { 244, 185, 199, 251, 206, 255, 247, 254 }, { 245, 186, 200, 251, 207, 255, 247, 254 }, { 245, 187, 200, 251, 207, 255, 247, 254 }, { 246, 188, 201, 252, 207, 255, 248, 254 }, { 246, 188, 201, 252, 207, 255, 248, 254 }, { 246, 189, 202, 252, 208, 255, 248, 254 }, { 246, 190, 202, 252, 208, 255, 248, 254 }, { 247, 191, 203, 252, 209, 255, 248, 254 }, { 247, 191, 203, 252, 209, 255, 248, 254 }, { 247, 192, 204, 252, 210, 255, 248, 254 }, { 247, 193, 204, 252, 210, 255, 248, 254 }, { 248, 194, 205, 252, 211, 255, 248, 254 }, { 248, 194, 205, 252, 211, 255, 248, 254 }, { 248, 195, 206, 252, 212, 255, 249, 254 }, { 248, 196, 206, 252, 212, 255, 249, 254 }, { 249, 197, 207, 253, 213, 255, 249, 254 }, { 249, 197, 207, 253, 213, 255, 249, 254 }, { 249, 198, 208, 253, 214, 255, 249, 254 }, { 249, 199, 209, 253, 214, 255, 249, 254 }, { 250, 200, 210, 253, 215, 255, 249, 254 }, { 250, 200, 210, 253, 215, 255, 249, 254 }, { 250, 201, 211, 253, 215, 255, 249, 254 }, { 250, 202, 211, 253, 215, 255, 249, 254 }, { 250, 203, 212, 253, 216, 255, 249, 254 }, { 250, 203, 212, 253, 216, 255, 249, 254 }, { 251, 204, 213, 253, 217, 255, 250, 254 }, { 251, 205, 213, 253, 217, 255, 250, 254 }, { 251, 206, 214, 254, 218, 255, 250, 254 }, { 251, 206, 215, 254, 218, 255, 250, 254 }, { 252, 207, 216, 254, 219, 255, 250, 254 }, { 252, 208, 216, 254, 219, 255, 250, 254 }, { 252, 209, 217, 254, 220, 255, 250, 254 }, { 252, 210, 217, 254, 220, 255, 250, 254 }, { 252, 211, 218, 254, 221, 255, 250, 254 }, { 252, 212, 218, 254, 221, 255, 250, 254 }, { 253, 213, 219, 254, 222, 255, 250, 254 }, { 253, 213, 220, 254, 222, 255, 250, 254 }, { 253, 214, 221, 254, 223, 255, 250, 254 }, { 253, 215, 221, 254, 223, 255, 250, 254 }, { 253, 216, 222, 254, 224, 255, 251, 254 }, { 253, 217, 223, 254, 224, 255, 251, 254 }, { 253, 218, 224, 254, 225, 255, 251, 254 }, { 253, 219, 224, 254, 225, 255, 251, 254 }, { 254, 220, 225, 254, 225, 255, 251, 254 }, { 254, 221, 226, 254, 225, 255, 251, 254 }, { 254, 222, 227, 255, 226, 255, 251, 254 }, { 254, 223, 227, 255, 226, 255, 251, 254 }, { 254, 224, 228, 255, 227, 255, 251, 254 }, { 254, 225, 229, 255, 227, 255, 251, 254 }, { 254, 226, 230, 255, 228, 255, 251, 254 }, { 254, 227, 230, 255, 229, 255, 251, 254 }, { 255, 228, 231, 255, 230, 255, 251, 254 }, { 255, 229, 232, 255, 230, 255, 251, 254 }, { 255, 230, 233, 255, 231, 255, 252, 254 }, { 255, 231, 234, 255, 231, 255, 252, 254 }, { 255, 232, 235, 255, 232, 255, 252, 254 }, { 255, 233, 236, 255, 232, 255, 252, 254 }, { 255, 235, 237, 255, 233, 255, 252, 254 }, { 255, 236, 238, 255, 234, 255, 252, 254 }, { 255, 238, 240, 255, 235, 255, 252, 255 }, { 255, 239, 241, 255, 235, 255, 252, 254 }, { 255, 241, 243, 255, 236, 255, 252, 254 }, { 255, 243, 245, 255, 237, 255, 252, 254 }, { 255, 246, 247, 255, 239, 255, 253, 255 }, }; static const vp9_coeff_probs_model default_coef_probs_4x4[PLANE_TYPES] = { { // Y plane { // Intra { // Band 0 { 195, 29, 183 }, { 84, 49, 136 }, { 8, 42, 71 } }, { // Band 1 { 31, 107, 169 }, { 35, 99, 159 }, { 17, 82, 140 }, { 8, 66, 114 }, { 2, 44, 76 }, { 1, 19, 32 } }, { // Band 2 { 40, 132, 201 }, { 29, 114, 187 }, { 13, 91, 157 }, { 7, 75, 127 }, { 3, 58, 95 }, { 1, 28, 47 } }, { // Band 3 { 69, 142, 221 }, { 42, 122, 201 }, { 15, 91, 159 }, { 6, 67, 121 }, { 1, 42, 77 }, { 1, 17, 31 } }, { // Band 4 { 102, 148, 228 }, { 67, 117, 204 }, { 17, 82, 154 }, { 6, 59, 114 }, { 2, 39, 75 }, { 1, 15, 29 } }, { // Band 5 { 156, 57, 233 }, { 119, 57, 212 }, { 58, 48, 163 }, { 29, 40, 124 }, { 12, 30, 81 }, { 3, 12, 31 } } }, { // Inter { // Band 0 { 191, 107, 226 }, { 124, 117, 204 }, { 25, 99, 155 } }, { // Band 1 { 29, 148, 210 }, { 37, 126, 194 }, { 8, 93, 157 }, { 2, 68, 118 }, { 1, 39, 69 }, { 1, 17, 33 } }, { // Band 2 { 41, 151, 213 }, { 27, 123, 193 }, { 3, 82, 144 }, { 1, 58, 105 }, { 1, 32, 60 }, { 1, 13, 26 } }, { // Band 3 { 59, 159, 220 }, { 23, 126, 198 }, { 4, 88, 151 }, { 1, 66, 114 }, { 1, 38, 71 }, { 1, 18, 34 } }, { // Band 4 { 114, 136, 232 }, { 51, 114, 207 }, { 11, 83, 155 }, { 3, 56, 105 }, { 1, 33, 65 }, { 1, 17, 34 } }, { // Band 5 { 149, 65, 234 }, { 121, 57, 215 }, { 61, 49, 166 }, { 28, 36, 114 }, { 12, 25, 76 }, { 3, 16, 42 } } } }, { // UV plane { // Intra { // Band 0 { 214, 49, 220 }, { 132, 63, 188 }, { 42, 65, 137 } }, { // Band 1 { 85, 137, 221 }, { 104, 131, 216 }, { 49, 111, 192 }, { 21, 87, 155 }, { 2, 49, 87 }, { 1, 16, 28 } }, { // Band 2 { 89, 163, 230 }, { 90, 137, 220 }, { 29, 100, 183 }, { 10, 70, 135 }, { 2, 42, 81 }, { 1, 17, 33 } }, { // Band 3 { 108, 167, 237 }, { 55, 133, 222 }, { 15, 97, 179 }, { 4, 72, 135 }, { 1, 45, 85 }, { 1, 19, 38 } }, { // Band 4 { 124, 146, 240 }, { 66, 124, 224 }, { 17, 88, 175 }, { 4, 58, 122 }, { 1, 36, 75 }, { 1, 18, 37 } }, { // Band 5 { 141, 79, 241 }, { 126, 70, 227 }, { 66, 58, 182 }, { 30, 44, 136 }, { 12, 34, 96 }, { 2, 20, 47 } } }, { // Inter { // Band 0 { 229, 99, 249 }, { 143, 111, 235 }, { 46, 109, 192 } }, { // Band 1 { 82, 158, 236 }, { 94, 146, 224 }, { 25, 117, 191 }, { 9, 87, 149 }, { 3, 56, 99 }, { 1, 33, 57 } }, { // Band 2 { 83, 167, 237 }, { 68, 145, 222 }, { 10, 103, 177 }, { 2, 72, 131 }, { 1, 41, 79 }, { 1, 20, 39 } }, { // Band 3 { 99, 167, 239 }, { 47, 141, 224 }, { 10, 104, 178 }, { 2, 73, 133 }, { 1, 44, 85 }, { 1, 22, 47 } }, { // Band 4 { 127, 145, 243 }, { 71, 129, 228 }, { 17, 93, 177 }, { 3, 61, 124 }, { 1, 41, 84 }, { 1, 21, 52 } }, { // Band 5 { 157, 78, 244 }, { 140, 72, 231 }, { 69, 58, 184 }, { 31, 44, 137 }, { 14, 38, 105 }, { 8, 23, 61 } } } } }; static const vp9_coeff_probs_model default_coef_probs_8x8[PLANE_TYPES] = { { // Y plane { // Intra { // Band 0 { 125, 34, 187 }, { 52, 41, 133 }, { 6, 31, 56 } }, { // Band 1 { 37, 109, 153 }, { 51, 102, 147 }, { 23, 87, 128 }, { 8, 67, 101 }, { 1, 41, 63 }, { 1, 19, 29 } }, { // Band 2 { 31, 154, 185 }, { 17, 127, 175 }, { 6, 96, 145 }, { 2, 73, 114 }, { 1, 51, 82 }, { 1, 28, 45 } }, { // Band 3 { 23, 163, 200 }, { 10, 131, 185 }, { 2, 93, 148 }, { 1, 67, 111 }, { 1, 41, 69 }, { 1, 14, 24 } }, { // Band 4 { 29, 176, 217 }, { 12, 145, 201 }, { 3, 101, 156 }, { 1, 69, 111 }, { 1, 39, 63 }, { 1, 14, 23 } }, { // Band 5 { 57, 192, 233 }, { 25, 154, 215 }, { 6, 109, 167 }, { 3, 78, 118 }, { 1, 48, 69 }, { 1, 21, 29 } } }, { // Inter { // Band 0 { 202, 105, 245 }, { 108, 106, 216 }, { 18, 90, 144 } }, { // Band 1 { 33, 172, 219 }, { 64, 149, 206 }, { 14, 117, 177 }, { 5, 90, 141 }, { 2, 61, 95 }, { 1, 37, 57 } }, { // Band 2 { 33, 179, 220 }, { 11, 140, 198 }, { 1, 89, 148 }, { 1, 60, 104 }, { 1, 33, 57 }, { 1, 12, 21 } }, { // Band 3 { 30, 181, 221 }, { 8, 141, 198 }, { 1, 87, 145 }, { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 20 } }, { // Band 4 { 32, 186, 224 }, { 7, 142, 198 }, { 1, 86, 143 }, { 1, 58, 100 }, { 1, 31, 55 }, { 1, 12, 22 } }, { // Band 5 { 57, 192, 227 }, { 20, 143, 204 }, { 3, 96, 154 }, { 1, 68, 112 }, { 1, 42, 69 }, { 1, 19, 32 } } } }, { // UV plane { // Intra { // Band 0 { 212, 35, 215 }, { 113, 47, 169 }, { 29, 48, 105 } }, { // Band 1 { 74, 129, 203 }, { 106, 120, 203 }, { 49, 107, 178 }, { 19, 84, 144 }, { 4, 50, 84 }, { 1, 15, 25 } }, { // Band 2 { 71, 172, 217 }, { 44, 141, 209 }, { 15, 102, 173 }, { 6, 76, 133 }, { 2, 51, 89 }, { 1, 24, 42 } }, { // Band 3 { 64, 185, 231 }, { 31, 148, 216 }, { 8, 103, 175 }, { 3, 74, 131 }, { 1, 46, 81 }, { 1, 18, 30 } }, { // Band 4 { 65, 196, 235 }, { 25, 157, 221 }, { 5, 105, 174 }, { 1, 67, 120 }, { 1, 38, 69 }, { 1, 15, 30 } }, { // Band 5 { 65, 204, 238 }, { 30, 156, 224 }, { 7, 107, 177 }, { 2, 70, 124 }, { 1, 42, 73 }, { 1, 18, 34 } } }, { // Inter { // Band 0 { 225, 86, 251 }, { 144, 104, 235 }, { 42, 99, 181 } }, { // Band 1 { 85, 175, 239 }, { 112, 165, 229 }, { 29, 136, 200 }, { 12, 103, 162 }, { 6, 77, 123 }, { 2, 53, 84 } }, { // Band 2 { 75, 183, 239 }, { 30, 155, 221 }, { 3, 106, 171 }, { 1, 74, 128 }, { 1, 44, 76 }, { 1, 17, 28 } }, { // Band 3 { 73, 185, 240 }, { 27, 159, 222 }, { 2, 107, 172 }, { 1, 75, 127 }, { 1, 42, 73 }, { 1, 17, 29 } }, { // Band 4 { 62, 190, 238 }, { 21, 159, 222 }, { 2, 107, 172 }, { 1, 72, 122 }, { 1, 40, 71 }, { 1, 18, 32 } }, { // Band 5 { 61, 199, 240 }, { 27, 161, 226 }, { 4, 113, 180 }, { 1, 76, 129 }, { 1, 46, 80 }, { 1, 23, 41 } } } } }; static const vp9_coeff_probs_model default_coef_probs_16x16[PLANE_TYPES] = { { // Y plane { // Intra { // Band 0 { 7, 27, 153 }, { 5, 30, 95 }, { 1, 16, 30 } }, { // Band 1 { 50, 75, 127 }, { 57, 75, 124 }, { 27, 67, 108 }, { 10, 54, 86 }, { 1, 33, 52 }, { 1, 12, 18 } }, { // Band 2 { 43, 125, 151 }, { 26, 108, 148 }, { 7, 83, 122 }, { 2, 59, 89 }, { 1, 38, 60 }, { 1, 17, 27 } }, { // Band 3 { 23, 144, 163 }, { 13, 112, 154 }, { 2, 75, 117 }, { 1, 50, 81 }, { 1, 31, 51 }, { 1, 14, 23 } }, { // Band 4 { 18, 162, 185 }, { 6, 123, 171 }, { 1, 78, 125 }, { 1, 51, 86 }, { 1, 31, 54 }, { 1, 14, 23 } }, { // Band 5 { 15, 199, 227 }, { 3, 150, 204 }, { 1, 91, 146 }, { 1, 55, 95 }, { 1, 30, 53 }, { 1, 11, 20 } } }, { // Inter { // Band 0 { 19, 55, 240 }, { 19, 59, 196 }, { 3, 52, 105 } }, { // Band 1 { 41, 166, 207 }, { 104, 153, 199 }, { 31, 123, 181 }, { 14, 101, 152 }, { 5, 72, 106 }, { 1, 36, 52 } }, { // Band 2 { 35, 176, 211 }, { 12, 131, 190 }, { 2, 88, 144 }, { 1, 60, 101 }, { 1, 36, 60 }, { 1, 16, 28 } }, { // Band 3 { 28, 183, 213 }, { 8, 134, 191 }, { 1, 86, 142 }, { 1, 56, 96 }, { 1, 30, 53 }, { 1, 12, 20 } }, { // Band 4 { 20, 190, 215 }, { 4, 135, 192 }, { 1, 84, 139 }, { 1, 53, 91 }, { 1, 28, 49 }, { 1, 11, 20 } }, { // Band 5 { 13, 196, 216 }, { 2, 137, 192 }, { 1, 86, 143 }, { 1, 57, 99 }, { 1, 32, 56 }, { 1, 13, 24 } } } }, { // UV plane { // Intra { // Band 0 { 211, 29, 217 }, { 96, 47, 156 }, { 22, 43, 87 } }, { // Band 1 { 78, 120, 193 }, { 111, 116, 186 }, { 46, 102, 164 }, { 15, 80, 128 }, { 2, 49, 76 }, { 1, 18, 28 } }, { // Band 2 { 71, 161, 203 }, { 42, 132, 192 }, { 10, 98, 150 }, { 3, 69, 109 }, { 1, 44, 70 }, { 1, 18, 29 } }, { // Band 3 { 57, 186, 211 }, { 30, 140, 196 }, { 4, 93, 146 }, { 1, 62, 102 }, { 1, 38, 65 }, { 1, 16, 27 } }, { // Band 4 { 47, 199, 217 }, { 14, 145, 196 }, { 1, 88, 142 }, { 1, 57, 98 }, { 1, 36, 62 }, { 1, 15, 26 } }, { // Band 5 { 26, 219, 229 }, { 5, 155, 207 }, { 1, 94, 151 }, { 1, 60, 104 }, { 1, 36, 62 }, { 1, 16, 28 } } }, { // Inter { // Band 0 { 233, 29, 248 }, { 146, 47, 220 }, { 43, 52, 140 } }, { // Band 1 { 100, 163, 232 }, { 179, 161, 222 }, { 63, 142, 204 }, { 37, 113, 174 }, { 26, 89, 137 }, { 18, 68, 97 } }, { // Band 2 { 85, 181, 230 }, { 32, 146, 209 }, { 7, 100, 164 }, { 3, 71, 121 }, { 1, 45, 77 }, { 1, 18, 30 } }, { // Band 3 { 65, 187, 230 }, { 20, 148, 207 }, { 2, 97, 159 }, { 1, 68, 116 }, { 1, 40, 70 }, { 1, 14, 29 } }, { // Band 4 { 40, 194, 227 }, { 8, 147, 204 }, { 1, 94, 155 }, { 1, 65, 112 }, { 1, 39, 66 }, { 1, 14, 26 } }, { // Band 5 { 16, 208, 228 }, { 3, 151, 207 }, { 1, 98, 160 }, { 1, 67, 117 }, { 1, 41, 74 }, { 1, 17, 31 } } } } }; static const vp9_coeff_probs_model default_coef_probs_32x32[PLANE_TYPES] = { { // Y plane { // Intra { // Band 0 { 17, 38, 140 }, { 7, 34, 80 }, { 1, 17, 29 } }, { // Band 1 { 37, 75, 128 }, { 41, 76, 128 }, { 26, 66, 116 }, { 12, 52, 94 }, { 2, 32, 55 }, { 1, 10, 16 } }, { // Band 2 { 50, 127, 154 }, { 37, 109, 152 }, { 16, 82, 121 }, { 5, 59, 85 }, { 1, 35, 54 }, { 1, 13, 20 } }, { // Band 3 { 40, 142, 167 }, { 17, 110, 157 }, { 2, 71, 112 }, { 1, 44, 72 }, { 1, 27, 45 }, { 1, 11, 17 } }, { // Band 4 { 30, 175, 188 }, { 9, 124, 169 }, { 1, 74, 116 }, { 1, 48, 78 }, { 1, 30, 49 }, { 1, 11, 18 } }, { // Band 5 { 10, 222, 223 }, { 2, 150, 194 }, { 1, 83, 128 }, { 1, 48, 79 }, { 1, 27, 45 }, { 1, 11, 17 } } }, { // Inter { // Band 0 { 36, 41, 235 }, { 29, 36, 193 }, { 10, 27, 111 } }, { // Band 1 { 85, 165, 222 }, { 177, 162, 215 }, { 110, 135, 195 }, { 57, 113, 168 }, { 23, 83, 120 }, { 10, 49, 61 } }, { // Band 2 { 85, 190, 223 }, { 36, 139, 200 }, { 5, 90, 146 }, { 1, 60, 103 }, { 1, 38, 65 }, { 1, 18, 30 } }, { // Band 3 { 72, 202, 223 }, { 23, 141, 199 }, { 2, 86, 140 }, { 1, 56, 97 }, { 1, 36, 61 }, { 1, 16, 27 } }, { // Band 4 { 55, 218, 225 }, { 13, 145, 200 }, { 1, 86, 141 }, { 1, 57, 99 }, { 1, 35, 61 }, { 1, 13, 22 } }, { // Band 5 { 15, 235, 212 }, { 1, 132, 184 }, { 1, 84, 139 }, { 1, 57, 97 }, { 1, 34, 56 }, { 1, 14, 23 } } } }, { // UV plane { // Intra { // Band 0 { 181, 21, 201 }, { 61, 37, 123 }, { 10, 38, 71 } }, { // Band 1 { 47, 106, 172 }, { 95, 104, 173 }, { 42, 93, 159 }, { 18, 77, 131 }, { 4, 50, 81 }, { 1, 17, 23 } }, { // Band 2 { 62, 147, 199 }, { 44, 130, 189 }, { 28, 102, 154 }, { 18, 75, 115 }, { 2, 44, 65 }, { 1, 12, 19 } }, { // Band 3 { 55, 153, 210 }, { 24, 130, 194 }, { 3, 93, 146 }, { 1, 61, 97 }, { 1, 31, 50 }, { 1, 10, 16 } }, { // Band 4 { 49, 186, 223 }, { 17, 148, 204 }, { 1, 96, 142 }, { 1, 53, 83 }, { 1, 26, 44 }, { 1, 11, 17 } }, { // Band 5 { 13, 217, 212 }, { 2, 136, 180 }, { 1, 78, 124 }, { 1, 50, 83 }, { 1, 29, 49 }, { 1, 14, 23 } } }, { // Inter { // Band 0 { 197, 13, 247 }, { 82, 17, 222 }, { 25, 17, 162 } }, { // Band 1 { 126, 186, 247 }, { 234, 191, 243 }, { 176, 177, 234 }, { 104, 158, 220 }, { 66, 128, 186 }, { 55, 90, 137 } }, { // Band 2 { 111, 197, 242 }, { 46, 158, 219 }, { 9, 104, 171 }, { 2, 65, 125 }, { 1, 44, 80 }, { 1, 17, 91 } }, { // Band 3 { 104, 208, 245 }, { 39, 168, 224 }, { 3, 109, 162 }, { 1, 79, 124 }, { 1, 50, 102 }, { 1, 43, 102 } }, { // Band 4 { 84, 220, 246 }, { 31, 177, 231 }, { 2, 115, 180 }, { 1, 79, 134 }, { 1, 55, 77 }, { 1, 60, 79 } }, { // Band 5 { 43, 243, 240 }, { 8, 180, 217 }, { 1, 115, 166 }, { 1, 84, 121 }, { 1, 51, 67 }, { 1, 16, 6 } } } } }; static void extend_to_full_distribution(vpx_prob *probs, vpx_prob p) { assert(p != 0); memcpy(probs, vp9_pareto8_full[p - 1], MODEL_NODES * sizeof(vpx_prob)); } void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full) { if (full != model) memcpy(full, model, sizeof(vpx_prob) * UNCONSTRAINED_NODES); extend_to_full_distribution(&full[UNCONSTRAINED_NODES], model[PIVOT_NODE]); } void vp9_default_coef_probs(VP9_COMMON *cm) { vp9_copy(cm->fc->coef_probs[TX_4X4], default_coef_probs_4x4); vp9_copy(cm->fc->coef_probs[TX_8X8], default_coef_probs_8x8); vp9_copy(cm->fc->coef_probs[TX_16X16], default_coef_probs_16x16); vp9_copy(cm->fc->coef_probs[TX_32X32], default_coef_probs_32x32); } #define COEF_COUNT_SAT 24 #define COEF_MAX_UPDATE_FACTOR 112 #define COEF_COUNT_SAT_KEY 24 #define COEF_MAX_UPDATE_FACTOR_KEY 112 #define COEF_COUNT_SAT_AFTER_KEY 24 #define COEF_MAX_UPDATE_FACTOR_AFTER_KEY 128 static void adapt_coef_probs(VP9_COMMON *cm, TX_SIZE tx_size, unsigned int count_sat, unsigned int update_factor) { const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; vp9_coeff_probs_model *const probs = cm->fc->coef_probs[tx_size]; const vp9_coeff_probs_model *const pre_probs = pre_fc->coef_probs[tx_size]; vp9_coeff_count_model *counts = cm->counts.coef[tx_size]; unsigned int(*eob_counts)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cm->counts.eob_branch[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { const int n0 = counts[i][j][k][l][ZERO_TOKEN]; const int n1 = counts[i][j][k][l][ONE_TOKEN]; const int n2 = counts[i][j][k][l][TWO_TOKEN]; const int neob = counts[i][j][k][l][EOB_MODEL_TOKEN]; const unsigned int branch_ct[UNCONSTRAINED_NODES][2] = { { neob, eob_counts[i][j][k][l] - neob }, { n0, n1 + n2 }, { n1, n2 } }; for (m = 0; m < UNCONSTRAINED_NODES; ++m) probs[i][j][k][l][m] = merge_probs(pre_probs[i][j][k][l][m], branch_ct[m], count_sat, update_factor); } } void vp9_adapt_coef_probs(VP9_COMMON *cm) { TX_SIZE t; unsigned int count_sat, update_factor; if (frame_is_intra_only(cm)) { update_factor = COEF_MAX_UPDATE_FACTOR_KEY; count_sat = COEF_COUNT_SAT_KEY; } else if (cm->last_frame_type == KEY_FRAME) { update_factor = COEF_MAX_UPDATE_FACTOR_AFTER_KEY; /* adapt quickly */ count_sat = COEF_COUNT_SAT_AFTER_KEY; } else { update_factor = COEF_MAX_UPDATE_FACTOR; count_sat = COEF_COUNT_SAT; } for (t = TX_4X4; t <= TX_32X32; t++) adapt_coef_probs(cm, t, count_sat, update_factor); } libvpx-1.8.2/vp9/common/vp9_entropy.h000066400000000000000000000163411357355204000175140ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ENTROPY_H_ #define VPX_VP9_COMMON_VP9_ENTROPY_H_ #include "vpx/vpx_integer.h" #include "vpx_dsp/prob.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" #ifdef __cplusplus extern "C" { #endif #define DIFF_UPDATE_PROB 252 // Coefficient token alphabet #define ZERO_TOKEN 0 // 0 Extra Bits 0+0 #define ONE_TOKEN 1 // 1 Extra Bits 0+1 #define TWO_TOKEN 2 // 2 Extra Bits 0+1 #define THREE_TOKEN 3 // 3 Extra Bits 0+1 #define FOUR_TOKEN 4 // 4 Extra Bits 0+1 #define CATEGORY1_TOKEN 5 // 5-6 Extra Bits 1+1 #define CATEGORY2_TOKEN 6 // 7-10 Extra Bits 2+1 #define CATEGORY3_TOKEN 7 // 11-18 Extra Bits 3+1 #define CATEGORY4_TOKEN 8 // 19-34 Extra Bits 4+1 #define CATEGORY5_TOKEN 9 // 35-66 Extra Bits 5+1 #define CATEGORY6_TOKEN 10 // 67+ Extra Bits 14+1 #define EOB_TOKEN 11 // EOB Extra Bits 0+0 #define ENTROPY_TOKENS 12 #define ENTROPY_NODES 11 DECLARE_ALIGNED(16, extern const uint8_t, vp9_pt_energy_class[ENTROPY_TOKENS]); #define CAT1_MIN_VAL 5 #define CAT2_MIN_VAL 7 #define CAT3_MIN_VAL 11 #define CAT4_MIN_VAL 19 #define CAT5_MIN_VAL 35 #define CAT6_MIN_VAL 67 // Extra bit probabilities. DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob[1]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob[2]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob[3]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob[4]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob[5]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob[14]); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high10[1]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high10[2]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high10[3]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high10[4]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high10[5]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high10[16]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat1_prob_high12[1]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat2_prob_high12[2]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat3_prob_high12[3]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat4_prob_high12[4]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat5_prob_high12[5]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_cat6_prob_high12[18]); #endif // CONFIG_VP9_HIGHBITDEPTH #define EOB_MODEL_TOKEN 3 #define DCT_MAX_VALUE 16384 #if CONFIG_VP9_HIGHBITDEPTH #define DCT_MAX_VALUE_HIGH10 65536 #define DCT_MAX_VALUE_HIGH12 262144 #endif // CONFIG_VP9_HIGHBITDEPTH /* Coefficients are predicted via a 3-dimensional probability table. */ #define REF_TYPES 2 // intra=0, inter=1 /* Middle dimension reflects the coefficient position within the transform. */ #define COEF_BANDS 6 /* Inside dimension is measure of nearby complexity, that reflects the energy of nearby coefficients are nonzero. For the first coefficient (DC, unless block type is 0), we look at the (already encoded) blocks above and to the left of the current block. The context index is then the number (0,1,or 2) of these blocks having nonzero coefficients. After decoding a coefficient, the measure is determined by the size of the most recently decoded coefficient. Note that the intuitive meaning of this measure changes as coefficients are decoded, e.g., prior to the first token, a zero means that my neighbors are empty while, after the first token, because of the use of end-of-block, a zero means we just decoded a zero and hence guarantees that a non-zero coefficient will appear later in this block. However, this shift in meaning is perfectly OK because our context depends also on the coefficient band (and since zigzag positions 0, 1, and 2 are in distinct bands). */ #define COEFF_CONTEXTS 6 #define BAND_COEFF_CONTEXTS(band) ((band) == 0 ? 3 : COEFF_CONTEXTS) // #define ENTROPY_STATS typedef unsigned int vp9_coeff_count[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] [ENTROPY_TOKENS]; typedef unsigned int vp9_coeff_stats[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] [ENTROPY_NODES][2]; #define SUBEXP_PARAM 4 /* Subexponential code parameter */ #define MODULUS_PARAM 13 /* Modulus parameter */ struct VP9Common; void vp9_default_coef_probs(struct VP9Common *cm); void vp9_adapt_coef_probs(struct VP9Common *cm); // This is the index in the scan order beyond which all coefficients for // 8x8 transform and above are in the top band. // This macro is currently unused but may be used by certain implementations #define MAXBAND_INDEX 21 DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_8x8plus[1024]); DECLARE_ALIGNED(16, extern const uint8_t, vp9_coefband_trans_4x4[16]); static INLINE const uint8_t *get_band_translate(TX_SIZE tx_size) { return tx_size == TX_4X4 ? vp9_coefband_trans_4x4 : vp9_coefband_trans_8x8plus; } // 128 lists of probabilities are stored for the following ONE node probs: // 1, 3, 5, 7, ..., 253, 255 // In between probabilities are interpolated linearly #define COEFF_PROB_MODELS 255 #define UNCONSTRAINED_NODES 3 #define PIVOT_NODE 2 // which node is pivot #define MODEL_NODES (ENTROPY_NODES - UNCONSTRAINED_NODES) extern const vpx_tree_index vp9_coef_con_tree[TREE_SIZE(ENTROPY_TOKENS)]; extern const vpx_prob vp9_pareto8_full[COEFF_PROB_MODELS][MODEL_NODES]; typedef vpx_prob vp9_coeff_probs_model[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] [UNCONSTRAINED_NODES]; typedef unsigned int vp9_coeff_count_model[REF_TYPES][COEF_BANDS] [COEFF_CONTEXTS] [UNCONSTRAINED_NODES + 1]; void vp9_model_to_full_probs(const vpx_prob *model, vpx_prob *full); typedef char ENTROPY_CONTEXT; static INLINE int combine_entropy_contexts(ENTROPY_CONTEXT a, ENTROPY_CONTEXT b) { return (a != 0) + (b != 0); } static INLINE int get_entropy_context(TX_SIZE tx_size, const ENTROPY_CONTEXT *a, const ENTROPY_CONTEXT *l) { ENTROPY_CONTEXT above_ec = 0, left_ec = 0; switch (tx_size) { case TX_4X4: above_ec = a[0] != 0; left_ec = l[0] != 0; break; case TX_8X8: above_ec = !!*(const uint16_t *)a; left_ec = !!*(const uint16_t *)l; break; case TX_16X16: above_ec = !!*(const uint32_t *)a; left_ec = !!*(const uint32_t *)l; break; case TX_32X32: above_ec = !!*(const uint64_t *)a; left_ec = !!*(const uint64_t *)l; break; default: assert(0 && "Invalid transform size."); break; } return combine_entropy_contexts(above_ec, left_ec); } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ENTROPY_H_ libvpx-1.8.2/vp9/common/vp9_entropymode.c000066400000000000000000000515561357355204000203630ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_seg_common.h" const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES][INTRA_MODES - 1] = { { // above = dc { 137, 30, 42, 148, 151, 207, 70, 52, 91 }, // left = dc { 92, 45, 102, 136, 116, 180, 74, 90, 100 }, // left = v { 73, 32, 19, 187, 222, 215, 46, 34, 100 }, // left = h { 91, 30, 32, 116, 121, 186, 93, 86, 94 }, // left = d45 { 72, 35, 36, 149, 68, 206, 68, 63, 105 }, // left = d135 { 73, 31, 28, 138, 57, 124, 55, 122, 151 }, // left = d117 { 67, 23, 21, 140, 126, 197, 40, 37, 171 }, // left = d153 { 86, 27, 28, 128, 154, 212, 45, 43, 53 }, // left = d207 { 74, 32, 27, 107, 86, 160, 63, 134, 102 }, // left = d63 { 59, 67, 44, 140, 161, 202, 78, 67, 119 } // left = tm }, { // above = v { 63, 36, 126, 146, 123, 158, 60, 90, 96 }, // left = dc { 43, 46, 168, 134, 107, 128, 69, 142, 92 }, // left = v { 44, 29, 68, 159, 201, 177, 50, 57, 77 }, // left = h { 58, 38, 76, 114, 97, 172, 78, 133, 92 }, // left = d45 { 46, 41, 76, 140, 63, 184, 69, 112, 57 }, // left = d135 { 38, 32, 85, 140, 46, 112, 54, 151, 133 }, // left = d117 { 39, 27, 61, 131, 110, 175, 44, 75, 136 }, // left = d153 { 52, 30, 74, 113, 130, 175, 51, 64, 58 }, // left = d207 { 47, 35, 80, 100, 74, 143, 64, 163, 74 }, // left = d63 { 36, 61, 116, 114, 128, 162, 80, 125, 82 } // left = tm }, { // above = h { 82, 26, 26, 171, 208, 204, 44, 32, 105 }, // left = dc { 55, 44, 68, 166, 179, 192, 57, 57, 108 }, // left = v { 42, 26, 11, 199, 241, 228, 23, 15, 85 }, // left = h { 68, 42, 19, 131, 160, 199, 55, 52, 83 }, // left = d45 { 58, 50, 25, 139, 115, 232, 39, 52, 118 }, // left = d135 { 50, 35, 33, 153, 104, 162, 64, 59, 131 }, // left = d117 { 44, 24, 16, 150, 177, 202, 33, 19, 156 }, // left = d153 { 55, 27, 12, 153, 203, 218, 26, 27, 49 }, // left = d207 { 53, 49, 21, 110, 116, 168, 59, 80, 76 }, // left = d63 { 38, 72, 19, 168, 203, 212, 50, 50, 107 } // left = tm }, { // above = d45 { 103, 26, 36, 129, 132, 201, 83, 80, 93 }, // left = dc { 59, 38, 83, 112, 103, 162, 98, 136, 90 }, // left = v { 62, 30, 23, 158, 200, 207, 59, 57, 50 }, // left = h { 67, 30, 29, 84, 86, 191, 102, 91, 59 }, // left = d45 { 60, 32, 33, 112, 71, 220, 64, 89, 104 }, // left = d135 { 53, 26, 34, 130, 56, 149, 84, 120, 103 }, // left = d117 { 53, 21, 23, 133, 109, 210, 56, 77, 172 }, // left = d153 { 77, 19, 29, 112, 142, 228, 55, 66, 36 }, // left = d207 { 61, 29, 29, 93, 97, 165, 83, 175, 162 }, // left = d63 { 47, 47, 43, 114, 137, 181, 100, 99, 95 } // left = tm }, { // above = d135 { 69, 23, 29, 128, 83, 199, 46, 44, 101 }, // left = dc { 53, 40, 55, 139, 69, 183, 61, 80, 110 }, // left = v { 40, 29, 19, 161, 180, 207, 43, 24, 91 }, // left = h { 60, 34, 19, 105, 61, 198, 53, 64, 89 }, // left = d45 { 52, 31, 22, 158, 40, 209, 58, 62, 89 }, // left = d135 { 44, 31, 29, 147, 46, 158, 56, 102, 198 }, // left = d117 { 35, 19, 12, 135, 87, 209, 41, 45, 167 }, // left = d153 { 55, 25, 21, 118, 95, 215, 38, 39, 66 }, // left = d207 { 51, 38, 25, 113, 58, 164, 70, 93, 97 }, // left = d63 { 47, 54, 34, 146, 108, 203, 72, 103, 151 } // left = tm }, { // above = d117 { 64, 19, 37, 156, 66, 138, 49, 95, 133 }, // left = dc { 46, 27, 80, 150, 55, 124, 55, 121, 135 }, // left = v { 36, 23, 27, 165, 149, 166, 54, 64, 118 }, // left = h { 53, 21, 36, 131, 63, 163, 60, 109, 81 }, // left = d45 { 40, 26, 35, 154, 40, 185, 51, 97, 123 }, // left = d135 { 35, 19, 34, 179, 19, 97, 48, 129, 124 }, // left = d117 { 36, 20, 26, 136, 62, 164, 33, 77, 154 }, // left = d153 { 45, 18, 32, 130, 90, 157, 40, 79, 91 }, // left = d207 { 45, 26, 28, 129, 45, 129, 49, 147, 123 }, // left = d63 { 38, 44, 51, 136, 74, 162, 57, 97, 121 } // left = tm }, { // above = d153 { 75, 17, 22, 136, 138, 185, 32, 34, 166 }, // left = dc { 56, 39, 58, 133, 117, 173, 48, 53, 187 }, // left = v { 35, 21, 12, 161, 212, 207, 20, 23, 145 }, // left = h { 56, 29, 19, 117, 109, 181, 55, 68, 112 }, // left = d45 { 47, 29, 17, 153, 64, 220, 59, 51, 114 }, // left = d135 { 46, 16, 24, 136, 76, 147, 41, 64, 172 }, // left = d117 { 34, 17, 11, 108, 152, 187, 13, 15, 209 }, // left = d153 { 51, 24, 14, 115, 133, 209, 32, 26, 104 }, // left = d207 { 55, 30, 18, 122, 79, 179, 44, 88, 116 }, // left = d63 { 37, 49, 25, 129, 168, 164, 41, 54, 148 } // left = tm }, { // above = d207 { 82, 22, 32, 127, 143, 213, 39, 41, 70 }, // left = dc { 62, 44, 61, 123, 105, 189, 48, 57, 64 }, // left = v { 47, 25, 17, 175, 222, 220, 24, 30, 86 }, // left = h { 68, 36, 17, 106, 102, 206, 59, 74, 74 }, // left = d45 { 57, 39, 23, 151, 68, 216, 55, 63, 58 }, // left = d135 { 49, 30, 35, 141, 70, 168, 82, 40, 115 }, // left = d117 { 51, 25, 15, 136, 129, 202, 38, 35, 139 }, // left = d153 { 68, 26, 16, 111, 141, 215, 29, 28, 28 }, // left = d207 { 59, 39, 19, 114, 75, 180, 77, 104, 42 }, // left = d63 { 40, 61, 26, 126, 152, 206, 61, 59, 93 } // left = tm }, { // above = d63 { 78, 23, 39, 111, 117, 170, 74, 124, 94 }, // left = dc { 48, 34, 86, 101, 92, 146, 78, 179, 134 }, // left = v { 47, 22, 24, 138, 187, 178, 68, 69, 59 }, // left = h { 56, 25, 33, 105, 112, 187, 95, 177, 129 }, // left = d45 { 48, 31, 27, 114, 63, 183, 82, 116, 56 }, // left = d135 { 43, 28, 37, 121, 63, 123, 61, 192, 169 }, // left = d117 { 42, 17, 24, 109, 97, 177, 56, 76, 122 }, // left = d153 { 58, 18, 28, 105, 139, 182, 70, 92, 63 }, // left = d207 { 46, 23, 32, 74, 86, 150, 67, 183, 88 }, // left = d63 { 36, 38, 48, 92, 122, 165, 88, 137, 91 } // left = tm }, { // above = tm { 65, 70, 60, 155, 159, 199, 61, 60, 81 }, // left = dc { 44, 78, 115, 132, 119, 173, 71, 112, 93 }, // left = v { 39, 38, 21, 184, 227, 206, 42, 32, 64 }, // left = h { 58, 47, 36, 124, 137, 193, 80, 82, 78 }, // left = d45 { 49, 50, 35, 144, 95, 205, 63, 78, 59 }, // left = d135 { 41, 53, 52, 148, 71, 142, 65, 128, 51 }, // left = d117 { 40, 36, 28, 143, 143, 202, 40, 55, 137 }, // left = d153 { 52, 34, 29, 129, 183, 227, 42, 35, 43 }, // left = d207 { 42, 44, 44, 104, 105, 164, 64, 130, 80 }, // left = d63 { 43, 81, 53, 140, 169, 204, 68, 84, 72 } // left = tm } }; const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1] = { { 144, 11, 54, 157, 195, 130, 46, 58, 108 }, // y = dc { 118, 15, 123, 148, 131, 101, 44, 93, 131 }, // y = v { 113, 12, 23, 188, 226, 142, 26, 32, 125 }, // y = h { 120, 11, 50, 123, 163, 135, 64, 77, 103 }, // y = d45 { 113, 9, 36, 155, 111, 157, 32, 44, 161 }, // y = d135 { 116, 9, 55, 176, 76, 96, 37, 61, 149 }, // y = d117 { 115, 9, 28, 141, 161, 167, 21, 25, 193 }, // y = d153 { 120, 12, 32, 145, 195, 142, 32, 38, 86 }, // y = d207 { 116, 12, 64, 120, 140, 125, 49, 115, 121 }, // y = d63 { 102, 19, 66, 162, 182, 122, 35, 59, 128 } // y = tm }; static const vpx_prob default_if_y_probs[BLOCK_SIZE_GROUPS][INTRA_MODES - 1] = { { 65, 32, 18, 144, 162, 194, 41, 51, 98 }, // block_size < 8x8 { 132, 68, 18, 165, 217, 196, 45, 40, 78 }, // block_size < 16x16 { 173, 80, 19, 176, 240, 193, 64, 35, 46 }, // block_size < 32x32 { 221, 135, 38, 194, 248, 121, 96, 85, 29 } // block_size >= 32x32 }; static const vpx_prob default_if_uv_probs[INTRA_MODES][INTRA_MODES - 1] = { { 120, 7, 76, 176, 208, 126, 28, 54, 103 }, // y = dc { 48, 12, 154, 155, 139, 90, 34, 117, 119 }, // y = v { 67, 6, 25, 204, 243, 158, 13, 21, 96 }, // y = h { 97, 5, 44, 131, 176, 139, 48, 68, 97 }, // y = d45 { 83, 5, 42, 156, 111, 152, 26, 49, 152 }, // y = d135 { 80, 5, 58, 178, 74, 83, 33, 62, 145 }, // y = d117 { 86, 5, 32, 154, 192, 168, 14, 22, 163 }, // y = d153 { 85, 5, 32, 156, 216, 148, 19, 29, 73 }, // y = d207 { 77, 7, 64, 116, 132, 122, 37, 126, 120 }, // y = d63 { 101, 21, 107, 181, 192, 103, 19, 67, 125 } // y = tm }; const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] [PARTITION_TYPES - 1] = { // 8x8 -> 4x4 { 158, 97, 94 }, // a/l both not split { 93, 24, 99 }, // a split, l not split { 85, 119, 44 }, // l split, a not split { 62, 59, 67 }, // a/l both split // 16x16 -> 8x8 { 149, 53, 53 }, // a/l both not split { 94, 20, 48 }, // a split, l not split { 83, 53, 24 }, // l split, a not split { 52, 18, 18 }, // a/l both split // 32x32 -> 16x16 { 150, 40, 39 }, // a/l both not split { 78, 12, 26 }, // a split, l not split { 67, 33, 11 }, // l split, a not split { 24, 7, 5 }, // a/l both split // 64x64 -> 32x32 { 174, 35, 49 }, // a/l both not split { 68, 11, 27 }, // a split, l not split { 57, 15, 9 }, // l split, a not split { 12, 3, 3 }, // a/l both split }; static const vpx_prob default_partition_probs[PARTITION_CONTEXTS][PARTITION_TYPES - 1] = { // 8x8 -> 4x4 { 199, 122, 141 }, // a/l both not split { 147, 63, 159 }, // a split, l not split { 148, 133, 118 }, // l split, a not split { 121, 104, 114 }, // a/l both split // 16x16 -> 8x8 { 174, 73, 87 }, // a/l both not split { 92, 41, 83 }, // a split, l not split { 82, 99, 50 }, // l split, a not split { 53, 39, 39 }, // a/l both split // 32x32 -> 16x16 { 177, 58, 59 }, // a/l both not split { 68, 26, 63 }, // a split, l not split { 52, 79, 25 }, // l split, a not split { 17, 14, 12 }, // a/l both split // 64x64 -> 32x32 { 222, 34, 30 }, // a/l both not split { 72, 16, 44 }, // a split, l not split { 58, 32, 12 }, // l split, a not split { 10, 7, 6 }, // a/l both split }; static const vpx_prob default_inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1] = { { 2, 173, 34 }, // 0 = both zero mv { 7, 145, 85 }, // 1 = one zero mv + one a predicted mv { 7, 166, 63 }, // 2 = two predicted mvs { 7, 94, 66 }, // 3 = one predicted/zero and one new mv { 8, 64, 46 }, // 4 = two new mvs { 17, 81, 31 }, // 5 = one intra neighbour + x { 25, 29, 30 }, // 6 = two intra neighbours }; /* Array indices are identical to previously-existing INTRAMODECONTEXTNODES. */ const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)] = { -DC_PRED, 2, /* 0 = DC_NODE */ -TM_PRED, 4, /* 1 = TM_NODE */ -V_PRED, 6, /* 2 = V_NODE */ 8, 12, /* 3 = COM_NODE */ -H_PRED, 10, /* 4 = H_NODE */ -D135_PRED, -D117_PRED, /* 5 = D135_NODE */ -D45_PRED, 14, /* 6 = D45_NODE */ -D63_PRED, 16, /* 7 = D63_NODE */ -D153_PRED, -D207_PRED /* 8 = D153_NODE */ }; const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)] = { -INTER_OFFSET(ZEROMV), 2, -INTER_OFFSET(NEARESTMV), 4, -INTER_OFFSET(NEARMV), -INTER_OFFSET(NEWMV) }; const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)] = { -PARTITION_NONE, 2, -PARTITION_HORZ, 4, -PARTITION_VERT, -PARTITION_SPLIT }; static const vpx_prob default_intra_inter_p[INTRA_INTER_CONTEXTS] = { 9, 102, 187, 225 }; static const vpx_prob default_comp_inter_p[COMP_INTER_CONTEXTS] = { 239, 183, 119, 96, 41 }; static const vpx_prob default_comp_ref_p[REF_CONTEXTS] = { 50, 126, 123, 221, 226 }; static const vpx_prob default_single_ref_p[REF_CONTEXTS][2] = { { 33, 16 }, { 77, 74 }, { 142, 142 }, { 172, 170 }, { 238, 247 } }; static const struct tx_probs default_tx_probs = { { { 3, 136, 37 }, { 5, 52, 13 } }, { { 20, 152 }, { 15, 101 } }, { { 100 }, { 66 } } }; void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, unsigned int (*ct_32x32p)[2]) { ct_32x32p[0][0] = tx_count_32x32p[TX_4X4]; ct_32x32p[0][1] = tx_count_32x32p[TX_8X8] + tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; ct_32x32p[1][0] = tx_count_32x32p[TX_8X8]; ct_32x32p[1][1] = tx_count_32x32p[TX_16X16] + tx_count_32x32p[TX_32X32]; ct_32x32p[2][0] = tx_count_32x32p[TX_16X16]; ct_32x32p[2][1] = tx_count_32x32p[TX_32X32]; } void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, unsigned int (*ct_16x16p)[2]) { ct_16x16p[0][0] = tx_count_16x16p[TX_4X4]; ct_16x16p[0][1] = tx_count_16x16p[TX_8X8] + tx_count_16x16p[TX_16X16]; ct_16x16p[1][0] = tx_count_16x16p[TX_8X8]; ct_16x16p[1][1] = tx_count_16x16p[TX_16X16]; } void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]) { ct_8x8p[0][0] = tx_count_8x8p[TX_4X4]; ct_8x8p[0][1] = tx_count_8x8p[TX_8X8]; } static const vpx_prob default_skip_probs[SKIP_CONTEXTS] = { 192, 128, 64 }; static const vpx_prob default_switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS - 1] = { { 235, 162 }, { 36, 255 }, { 34, 3 }, { 149, 144 }, }; static void init_mode_probs(FRAME_CONTEXT *fc) { vp9_copy(fc->uv_mode_prob, default_if_uv_probs); vp9_copy(fc->y_mode_prob, default_if_y_probs); vp9_copy(fc->switchable_interp_prob, default_switchable_interp_prob); vp9_copy(fc->partition_prob, default_partition_probs); vp9_copy(fc->intra_inter_prob, default_intra_inter_p); vp9_copy(fc->comp_inter_prob, default_comp_inter_p); vp9_copy(fc->comp_ref_prob, default_comp_ref_p); vp9_copy(fc->single_ref_prob, default_single_ref_p); fc->tx_probs = default_tx_probs; vp9_copy(fc->skip_probs, default_skip_probs); vp9_copy(fc->inter_mode_probs, default_inter_mode_probs); } const vpx_tree_index vp9_switchable_interp_tree[TREE_SIZE( SWITCHABLE_FILTERS)] = { -EIGHTTAP, 2, -EIGHTTAP_SMOOTH, -EIGHTTAP_SHARP }; void vp9_adapt_mode_probs(VP9_COMMON *cm) { int i, j; FRAME_CONTEXT *fc = cm->fc; const FRAME_CONTEXT *pre_fc = &cm->frame_contexts[cm->frame_context_idx]; const FRAME_COUNTS *counts = &cm->counts; for (i = 0; i < INTRA_INTER_CONTEXTS; i++) fc->intra_inter_prob[i] = mode_mv_merge_probs(pre_fc->intra_inter_prob[i], counts->intra_inter[i]); for (i = 0; i < COMP_INTER_CONTEXTS; i++) fc->comp_inter_prob[i] = mode_mv_merge_probs(pre_fc->comp_inter_prob[i], counts->comp_inter[i]); for (i = 0; i < REF_CONTEXTS; i++) fc->comp_ref_prob[i] = mode_mv_merge_probs(pre_fc->comp_ref_prob[i], counts->comp_ref[i]); for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) fc->single_ref_prob[i][j] = mode_mv_merge_probs( pre_fc->single_ref_prob[i][j], counts->single_ref[i][j]); for (i = 0; i < INTER_MODE_CONTEXTS; i++) vpx_tree_merge_probs(vp9_inter_mode_tree, pre_fc->inter_mode_probs[i], counts->inter_mode[i], fc->inter_mode_probs[i]); for (i = 0; i < BLOCK_SIZE_GROUPS; i++) vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->y_mode_prob[i], counts->y_mode[i], fc->y_mode_prob[i]); for (i = 0; i < INTRA_MODES; ++i) vpx_tree_merge_probs(vp9_intra_mode_tree, pre_fc->uv_mode_prob[i], counts->uv_mode[i], fc->uv_mode_prob[i]); for (i = 0; i < PARTITION_CONTEXTS; i++) vpx_tree_merge_probs(vp9_partition_tree, pre_fc->partition_prob[i], counts->partition[i], fc->partition_prob[i]); if (cm->interp_filter == SWITCHABLE) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) vpx_tree_merge_probs( vp9_switchable_interp_tree, pre_fc->switchable_interp_prob[i], counts->switchable_interp[i], fc->switchable_interp_prob[i]); } if (cm->tx_mode == TX_MODE_SELECT) { int j; unsigned int branch_ct_8x8p[TX_SIZES - 3][2]; unsigned int branch_ct_16x16p[TX_SIZES - 2][2]; unsigned int branch_ct_32x32p[TX_SIZES - 1][2]; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], branch_ct_8x8p); for (j = 0; j < TX_SIZES - 3; ++j) fc->tx_probs.p8x8[i][j] = mode_mv_merge_probs(pre_fc->tx_probs.p8x8[i][j], branch_ct_8x8p[j]); tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], branch_ct_16x16p); for (j = 0; j < TX_SIZES - 2; ++j) fc->tx_probs.p16x16[i][j] = mode_mv_merge_probs( pre_fc->tx_probs.p16x16[i][j], branch_ct_16x16p[j]); tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], branch_ct_32x32p); for (j = 0; j < TX_SIZES - 1; ++j) fc->tx_probs.p32x32[i][j] = mode_mv_merge_probs( pre_fc->tx_probs.p32x32[i][j], branch_ct_32x32p[j]); } } for (i = 0; i < SKIP_CONTEXTS; ++i) fc->skip_probs[i] = mode_mv_merge_probs(pre_fc->skip_probs[i], counts->skip[i]); } static void set_default_lf_deltas(struct loopfilter *lf) { lf->mode_ref_delta_enabled = 1; lf->mode_ref_delta_update = 1; lf->ref_deltas[INTRA_FRAME] = 1; lf->ref_deltas[LAST_FRAME] = 0; lf->ref_deltas[GOLDEN_FRAME] = -1; lf->ref_deltas[ALTREF_FRAME] = -1; lf->mode_deltas[0] = 0; lf->mode_deltas[1] = 0; } void vp9_setup_past_independence(VP9_COMMON *cm) { // Reset the segment feature data to the default stats: // Features disabled, 0, with delta coding (Default state). struct loopfilter *const lf = &cm->lf; int i; vp9_clearall_segfeatures(&cm->seg); cm->seg.abs_delta = SEGMENT_DELTADATA; if (cm->last_frame_seg_map) memset(cm->last_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); if (cm->current_frame_seg_map) memset(cm->current_frame_seg_map, 0, (cm->mi_rows * cm->mi_cols)); // Reset the mode ref deltas for loop filter vp9_zero(lf->last_ref_deltas); vp9_zero(lf->last_mode_deltas); set_default_lf_deltas(lf); // To force update of the sharpness lf->last_sharpness_level = -1; vp9_default_coef_probs(cm); init_mode_probs(cm->fc); vp9_init_mv_probs(cm); cm->fc->initialized = 1; if (cm->frame_type == KEY_FRAME || cm->error_resilient_mode || cm->reset_frame_context == 3) { // Reset all frame contexts. for (i = 0; i < FRAME_CONTEXTS; ++i) cm->frame_contexts[i] = *cm->fc; } else if (cm->reset_frame_context == 2) { // Reset only the frame context specified in the frame header. cm->frame_contexts[cm->frame_context_idx] = *cm->fc; } // prev_mip will only be allocated in encoder. if (frame_is_intra_only(cm) && cm->prev_mip) memset(cm->prev_mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->prev_mip)); vp9_zero(cm->ref_frame_sign_bias); cm->frame_context_idx = 0; } libvpx-1.8.2/vp9/common/vp9_entropymode.h000066400000000000000000000077441357355204000203700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ #define VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_filter.h" #include "vpx_dsp/vpx_filter.h" #ifdef __cplusplus extern "C" { #endif #define BLOCK_SIZE_GROUPS 4 #define TX_SIZE_CONTEXTS 2 #define INTER_OFFSET(mode) ((mode)-NEARESTMV) struct VP9Common; struct tx_probs { vpx_prob p32x32[TX_SIZE_CONTEXTS][TX_SIZES - 1]; vpx_prob p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 2]; vpx_prob p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 3]; }; struct tx_counts { unsigned int p32x32[TX_SIZE_CONTEXTS][TX_SIZES]; unsigned int p16x16[TX_SIZE_CONTEXTS][TX_SIZES - 1]; unsigned int p8x8[TX_SIZE_CONTEXTS][TX_SIZES - 2]; unsigned int tx_totals[TX_SIZES]; }; typedef struct frame_contexts { vpx_prob y_mode_prob[BLOCK_SIZE_GROUPS][INTRA_MODES - 1]; vpx_prob uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; vpx_prob partition_prob[PARTITION_CONTEXTS][PARTITION_TYPES - 1]; vp9_coeff_probs_model coef_probs[TX_SIZES][PLANE_TYPES]; vpx_prob switchable_interp_prob[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS - 1]; vpx_prob inter_mode_probs[INTER_MODE_CONTEXTS][INTER_MODES - 1]; vpx_prob intra_inter_prob[INTRA_INTER_CONTEXTS]; vpx_prob comp_inter_prob[COMP_INTER_CONTEXTS]; vpx_prob single_ref_prob[REF_CONTEXTS][2]; vpx_prob comp_ref_prob[REF_CONTEXTS]; struct tx_probs tx_probs; vpx_prob skip_probs[SKIP_CONTEXTS]; nmv_context nmvc; int initialized; } FRAME_CONTEXT; typedef struct FRAME_COUNTS { unsigned int y_mode[BLOCK_SIZE_GROUPS][INTRA_MODES]; unsigned int uv_mode[INTRA_MODES][INTRA_MODES]; unsigned int partition[PARTITION_CONTEXTS][PARTITION_TYPES]; vp9_coeff_count_model coef[TX_SIZES][PLANE_TYPES]; unsigned int eob_branch[TX_SIZES][PLANE_TYPES][REF_TYPES][COEF_BANDS] [COEFF_CONTEXTS]; unsigned int switchable_interp[SWITCHABLE_FILTER_CONTEXTS] [SWITCHABLE_FILTERS]; unsigned int inter_mode[INTER_MODE_CONTEXTS][INTER_MODES]; unsigned int intra_inter[INTRA_INTER_CONTEXTS][2]; unsigned int comp_inter[COMP_INTER_CONTEXTS][2]; unsigned int single_ref[REF_CONTEXTS][2][2]; unsigned int comp_ref[REF_CONTEXTS][2]; struct tx_counts tx; unsigned int skip[SKIP_CONTEXTS][2]; nmv_context_counts mv; } FRAME_COUNTS; extern const vpx_prob vp9_kf_uv_mode_prob[INTRA_MODES][INTRA_MODES - 1]; extern const vpx_prob vp9_kf_y_mode_prob[INTRA_MODES][INTRA_MODES] [INTRA_MODES - 1]; extern const vpx_prob vp9_kf_partition_probs[PARTITION_CONTEXTS] [PARTITION_TYPES - 1]; extern const vpx_tree_index vp9_intra_mode_tree[TREE_SIZE(INTRA_MODES)]; extern const vpx_tree_index vp9_inter_mode_tree[TREE_SIZE(INTER_MODES)]; extern const vpx_tree_index vp9_partition_tree[TREE_SIZE(PARTITION_TYPES)]; extern const vpx_tree_index vp9_switchable_interp_tree[TREE_SIZE(SWITCHABLE_FILTERS)]; void vp9_setup_past_independence(struct VP9Common *cm); void vp9_adapt_mode_probs(struct VP9Common *cm); void tx_counts_to_branch_counts_32x32(const unsigned int *tx_count_32x32p, unsigned int (*ct_32x32p)[2]); void tx_counts_to_branch_counts_16x16(const unsigned int *tx_count_16x16p, unsigned int (*ct_16x16p)[2]); void tx_counts_to_branch_counts_8x8(const unsigned int *tx_count_8x8p, unsigned int (*ct_8x8p)[2]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ENTROPYMODE_H_ libvpx-1.8.2/vp9/common/vp9_entropymv.c000066400000000000000000000210361357355204000200470ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_entropymv.h" const vpx_tree_index vp9_mv_joint_tree[TREE_SIZE(MV_JOINTS)] = { -MV_JOINT_ZERO, 2, -MV_JOINT_HNZVZ, 4, -MV_JOINT_HZVNZ, -MV_JOINT_HNZVNZ }; const vpx_tree_index vp9_mv_class_tree[TREE_SIZE(MV_CLASSES)] = { -MV_CLASS_0, 2, -MV_CLASS_1, 4, 6, 8, -MV_CLASS_2, -MV_CLASS_3, 10, 12, -MV_CLASS_4, -MV_CLASS_5, -MV_CLASS_6, 14, 16, 18, -MV_CLASS_7, -MV_CLASS_8, -MV_CLASS_9, -MV_CLASS_10, }; const vpx_tree_index vp9_mv_class0_tree[TREE_SIZE(CLASS0_SIZE)] = { -0, -1 }; const vpx_tree_index vp9_mv_fp_tree[TREE_SIZE(MV_FP_SIZE)] = { -0, 2, -1, 4, -2, -3 }; static const nmv_context default_nmv_context = { { 32, 64, 96 }, { { // Vertical component 128, // sign { 224, 144, 192, 168, 192, 176, 192, 198, 198, 245 }, // class { 216 }, // class0 { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp { 64, 96, 64 }, // fp 160, // class0_hp bit 128, // hp }, { // Horizontal component 128, // sign { 216, 128, 176, 160, 176, 176, 192, 198, 198, 208 }, // class { 208 }, // class0 { 136, 140, 148, 160, 176, 192, 224, 234, 234, 240 }, // bits { { 128, 128, 64 }, { 96, 112, 64 } }, // class0_fp { 64, 96, 64 }, // fp 160, // class0_hp bit 128, // hp } }, }; static const uint8_t log_in_base_2[] = { 0, 0, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10 }; static INLINE int mv_class_base(MV_CLASS_TYPE c) { return c ? CLASS0_SIZE << (c + 2) : 0; } MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset) { const MV_CLASS_TYPE c = (z >= CLASS0_SIZE * 4096) ? MV_CLASS_10 : (MV_CLASS_TYPE)log_in_base_2[z >> 3]; if (offset) *offset = z - mv_class_base(c); return c; } static void inc_mv_component(int v, nmv_component_counts *comp_counts, int incr, int usehp) { int s, z, c, o, d, e, f; assert(v != 0); /* should not be zero */ s = v < 0; comp_counts->sign[s] += incr; z = (s ? -v : v) - 1; /* magnitude - 1 */ c = vp9_get_mv_class(z, &o); comp_counts->classes[c] += incr; d = (o >> 3); /* int mv data */ f = (o >> 1) & 3; /* fractional pel mv data */ e = (o & 1); /* high precision mv data */ if (c == MV_CLASS_0) { comp_counts->class0[d] += incr; comp_counts->class0_fp[d][f] += incr; comp_counts->class0_hp[e] += usehp * incr; } else { int i; int b = c + CLASS0_BITS - 1; // number of bits for (i = 0; i < b; ++i) comp_counts->bits[i][((d >> i) & 1)] += incr; comp_counts->fp[f] += incr; comp_counts->hp[e] += usehp * incr; } } void vp9_inc_mv(const MV *mv, nmv_context_counts *counts) { if (counts != NULL) { const MV_JOINT_TYPE j = vp9_get_mv_joint(mv); ++counts->joints[j]; if (mv_joint_vertical(j)) { inc_mv_component(mv->row, &counts->comps[0], 1, 1); } if (mv_joint_horizontal(j)) { inc_mv_component(mv->col, &counts->comps[1], 1, 1); } } } void vp9_adapt_mv_probs(VP9_COMMON *cm, int allow_hp) { int i, j; nmv_context *fc = &cm->fc->nmvc; const nmv_context *pre_fc = &cm->frame_contexts[cm->frame_context_idx].nmvc; const nmv_context_counts *counts = &cm->counts.mv; vpx_tree_merge_probs(vp9_mv_joint_tree, pre_fc->joints, counts->joints, fc->joints); for (i = 0; i < 2; ++i) { nmv_component *comp = &fc->comps[i]; const nmv_component *pre_comp = &pre_fc->comps[i]; const nmv_component_counts *c = &counts->comps[i]; comp->sign = mode_mv_merge_probs(pre_comp->sign, c->sign); vpx_tree_merge_probs(vp9_mv_class_tree, pre_comp->classes, c->classes, comp->classes); vpx_tree_merge_probs(vp9_mv_class0_tree, pre_comp->class0, c->class0, comp->class0); for (j = 0; j < MV_OFFSET_BITS; ++j) comp->bits[j] = mode_mv_merge_probs(pre_comp->bits[j], c->bits[j]); for (j = 0; j < CLASS0_SIZE; ++j) vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->class0_fp[j], c->class0_fp[j], comp->class0_fp[j]); vpx_tree_merge_probs(vp9_mv_fp_tree, pre_comp->fp, c->fp, comp->fp); if (allow_hp) { comp->class0_hp = mode_mv_merge_probs(pre_comp->class0_hp, c->class0_hp); comp->hp = mode_mv_merge_probs(pre_comp->hp, c->hp); } } } void vp9_init_mv_probs(VP9_COMMON *cm) { cm->fc->nmvc = default_nmv_context; } libvpx-1.8.2/vp9/common/vp9_entropymv.h000066400000000000000000000076451357355204000200660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ENTROPYMV_H_ #define VPX_VP9_COMMON_VP9_ENTROPYMV_H_ #include "./vpx_config.h" #include "vpx_dsp/prob.h" #include "vp9/common/vp9_mv.h" #ifdef __cplusplus extern "C" { #endif struct VP9Common; void vp9_init_mv_probs(struct VP9Common *cm); void vp9_adapt_mv_probs(struct VP9Common *cm, int allow_hp); static INLINE int use_mv_hp(const MV *ref) { const int kMvRefThresh = 64; // threshold for use of high-precision 1/8 mv return abs(ref->row) < kMvRefThresh && abs(ref->col) < kMvRefThresh; } #define MV_UPDATE_PROB 252 /* Symbols for coding which components are zero jointly */ #define MV_JOINTS 4 typedef enum { MV_JOINT_ZERO = 0, /* Zero vector */ MV_JOINT_HNZVZ = 1, /* Vert zero, hor nonzero */ MV_JOINT_HZVNZ = 2, /* Hor zero, vert nonzero */ MV_JOINT_HNZVNZ = 3, /* Both components nonzero */ } MV_JOINT_TYPE; static INLINE int mv_joint_vertical(MV_JOINT_TYPE type) { return type == MV_JOINT_HZVNZ || type == MV_JOINT_HNZVNZ; } static INLINE int mv_joint_horizontal(MV_JOINT_TYPE type) { return type == MV_JOINT_HNZVZ || type == MV_JOINT_HNZVNZ; } /* Symbols for coding magnitude class of nonzero components */ #define MV_CLASSES 11 typedef enum { MV_CLASS_0 = 0, /* (0, 2] integer pel */ MV_CLASS_1 = 1, /* (2, 4] integer pel */ MV_CLASS_2 = 2, /* (4, 8] integer pel */ MV_CLASS_3 = 3, /* (8, 16] integer pel */ MV_CLASS_4 = 4, /* (16, 32] integer pel */ MV_CLASS_5 = 5, /* (32, 64] integer pel */ MV_CLASS_6 = 6, /* (64, 128] integer pel */ MV_CLASS_7 = 7, /* (128, 256] integer pel */ MV_CLASS_8 = 8, /* (256, 512] integer pel */ MV_CLASS_9 = 9, /* (512, 1024] integer pel */ MV_CLASS_10 = 10, /* (1024,2048] integer pel */ } MV_CLASS_TYPE; #define CLASS0_BITS 1 /* bits at integer precision for class 0 */ #define CLASS0_SIZE (1 << CLASS0_BITS) #define MV_OFFSET_BITS (MV_CLASSES + CLASS0_BITS - 2) #define MV_FP_SIZE 4 #define MV_MAX_BITS (MV_CLASSES + CLASS0_BITS + 2) #define MV_MAX ((1 << MV_MAX_BITS) - 1) #define MV_VALS ((MV_MAX << 1) + 1) #define MV_IN_USE_BITS 14 #define MV_UPP ((1 << MV_IN_USE_BITS) - 1) #define MV_LOW (-(1 << MV_IN_USE_BITS)) extern const vpx_tree_index vp9_mv_joint_tree[]; extern const vpx_tree_index vp9_mv_class_tree[]; extern const vpx_tree_index vp9_mv_class0_tree[]; extern const vpx_tree_index vp9_mv_fp_tree[]; typedef struct { vpx_prob sign; vpx_prob classes[MV_CLASSES - 1]; vpx_prob class0[CLASS0_SIZE - 1]; vpx_prob bits[MV_OFFSET_BITS]; vpx_prob class0_fp[CLASS0_SIZE][MV_FP_SIZE - 1]; vpx_prob fp[MV_FP_SIZE - 1]; vpx_prob class0_hp; vpx_prob hp; } nmv_component; typedef struct { vpx_prob joints[MV_JOINTS - 1]; nmv_component comps[2]; } nmv_context; static INLINE MV_JOINT_TYPE vp9_get_mv_joint(const MV *mv) { if (mv->row == 0) { return mv->col == 0 ? MV_JOINT_ZERO : MV_JOINT_HNZVZ; } else { return mv->col == 0 ? MV_JOINT_HZVNZ : MV_JOINT_HNZVNZ; } } MV_CLASS_TYPE vp9_get_mv_class(int z, int *offset); typedef struct { unsigned int sign[2]; unsigned int classes[MV_CLASSES]; unsigned int class0[CLASS0_SIZE]; unsigned int bits[MV_OFFSET_BITS][2]; unsigned int class0_fp[CLASS0_SIZE][MV_FP_SIZE]; unsigned int fp[MV_FP_SIZE]; unsigned int class0_hp[2]; unsigned int hp[2]; } nmv_component_counts; typedef struct { unsigned int joints[MV_JOINTS]; nmv_component_counts comps[2]; } nmv_context_counts; void vp9_inc_mv(const MV *mv, nmv_context_counts *counts); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ENTROPYMV_H_ libvpx-1.8.2/vp9/common/vp9_enums.h000066400000000000000000000102041357355204000171330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ENUMS_H_ #define VPX_VP9_COMMON_VP9_ENUMS_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif #define MI_SIZE_LOG2 3 #define MI_BLOCK_SIZE_LOG2 (6 - MI_SIZE_LOG2) // 64 = 2^6 #define MI_SIZE (1 << MI_SIZE_LOG2) // pixels per mi-unit #define MI_BLOCK_SIZE (1 << MI_BLOCK_SIZE_LOG2) // mi-units per max block #define MI_MASK (MI_BLOCK_SIZE - 1) // Bitstream profiles indicated by 2-3 bits in the uncompressed header. // 00: Profile 0. 8-bit 4:2:0 only. // 10: Profile 1. 8-bit 4:4:4, 4:2:2, and 4:4:0. // 01: Profile 2. 10-bit and 12-bit color only, with 4:2:0 sampling. // 110: Profile 3. 10-bit and 12-bit color only, with 4:2:2/4:4:4/4:4:0 // sampling. // 111: Undefined profile. typedef enum BITSTREAM_PROFILE { PROFILE_0, PROFILE_1, PROFILE_2, PROFILE_3, MAX_PROFILES } BITSTREAM_PROFILE; typedef enum PARSE_RECON_FLAG { PARSE = 1, RECON = 2 } PARSE_RECON_FLAG; #define BLOCK_4X4 0 #define BLOCK_4X8 1 #define BLOCK_8X4 2 #define BLOCK_8X8 3 #define BLOCK_8X16 4 #define BLOCK_16X8 5 #define BLOCK_16X16 6 #define BLOCK_16X32 7 #define BLOCK_32X16 8 #define BLOCK_32X32 9 #define BLOCK_32X64 10 #define BLOCK_64X32 11 #define BLOCK_64X64 12 #define BLOCK_SIZES 13 #define BLOCK_INVALID BLOCK_SIZES typedef uint8_t BLOCK_SIZE; typedef enum PARTITION_TYPE { PARTITION_NONE, PARTITION_HORZ, PARTITION_VERT, PARTITION_SPLIT, PARTITION_TYPES, PARTITION_INVALID = PARTITION_TYPES } PARTITION_TYPE; typedef char PARTITION_CONTEXT; #define PARTITION_PLOFFSET 4 // number of probability models per block size #define PARTITION_CONTEXTS (4 * PARTITION_PLOFFSET) // block transform size typedef uint8_t TX_SIZE; #define TX_4X4 ((TX_SIZE)0) // 4x4 transform #define TX_8X8 ((TX_SIZE)1) // 8x8 transform #define TX_16X16 ((TX_SIZE)2) // 16x16 transform #define TX_32X32 ((TX_SIZE)3) // 32x32 transform #define TX_SIZES ((TX_SIZE)4) // frame transform mode typedef enum { ONLY_4X4 = 0, // only 4x4 transform used ALLOW_8X8 = 1, // allow block transform size up to 8x8 ALLOW_16X16 = 2, // allow block transform size up to 16x16 ALLOW_32X32 = 3, // allow block transform size up to 32x32 TX_MODE_SELECT = 4, // transform specified for each block TX_MODES = 5, } TX_MODE; typedef enum { DCT_DCT = 0, // DCT in both horizontal and vertical ADST_DCT = 1, // ADST in vertical, DCT in horizontal DCT_ADST = 2, // DCT in vertical, ADST in horizontal ADST_ADST = 3, // ADST in both directions TX_TYPES = 4 } TX_TYPE; typedef enum { VP9_LAST_FLAG = 1 << 0, VP9_GOLD_FLAG = 1 << 1, VP9_ALT_FLAG = 1 << 2, } VP9_REFFRAME; typedef enum { PLANE_TYPE_Y = 0, PLANE_TYPE_UV = 1, PLANE_TYPES } PLANE_TYPE; #define DC_PRED 0 // Average of above and left pixels #define V_PRED 1 // Vertical #define H_PRED 2 // Horizontal #define D45_PRED 3 // Directional 45 deg = round(arctan(1/1) * 180/pi) #define D135_PRED 4 // Directional 135 deg = 180 - 45 #define D117_PRED 5 // Directional 117 deg = 180 - 63 #define D153_PRED 6 // Directional 153 deg = 180 - 27 #define D207_PRED 7 // Directional 207 deg = 180 + 27 #define D63_PRED 8 // Directional 63 deg = round(arctan(2/1) * 180/pi) #define TM_PRED 9 // True-motion #define NEARESTMV 10 #define NEARMV 11 #define ZEROMV 12 #define NEWMV 13 #define MB_MODE_COUNT 14 typedef uint8_t PREDICTION_MODE; #define INTRA_MODES (TM_PRED + 1) #define INTER_MODES (1 + NEWMV - NEARESTMV) #define SKIP_CONTEXTS 3 #define INTER_MODE_CONTEXTS 7 /* Segment Feature Masks */ #define MAX_MV_REF_CANDIDATES 2 #define INTRA_INTER_CONTEXTS 4 #define COMP_INTER_CONTEXTS 5 #define REF_CONTEXTS 5 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ENUMS_H_ libvpx-1.8.2/vp9/common/vp9_filter.c000066400000000000000000000077661357355204000173070ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_filter.h" DECLARE_ALIGNED(256, static const InterpKernel, bilinear_filters[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, 0, 120, 8, 0, 0, 0 }, { 0, 0, 0, 112, 16, 0, 0, 0 }, { 0, 0, 0, 104, 24, 0, 0, 0 }, { 0, 0, 0, 96, 32, 0, 0, 0 }, { 0, 0, 0, 88, 40, 0, 0, 0 }, { 0, 0, 0, 80, 48, 0, 0, 0 }, { 0, 0, 0, 72, 56, 0, 0, 0 }, { 0, 0, 0, 64, 64, 0, 0, 0 }, { 0, 0, 0, 56, 72, 0, 0, 0 }, { 0, 0, 0, 48, 80, 0, 0, 0 }, { 0, 0, 0, 40, 88, 0, 0, 0 }, { 0, 0, 0, 32, 96, 0, 0, 0 }, { 0, 0, 0, 24, 104, 0, 0, 0 }, { 0, 0, 0, 16, 112, 0, 0, 0 }, { 0, 0, 0, 8, 120, 0, 0, 0 } }; // Lagrangian interpolation filter DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -5, 126, 8, -3, 1, 0 }, { -1, 3, -10, 122, 18, -6, 2, 0 }, { -1, 4, -13, 118, 27, -9, 3, -1 }, { -1, 4, -16, 112, 37, -11, 4, -1 }, { -1, 5, -18, 105, 48, -14, 4, -1 }, { -1, 5, -19, 97, 58, -16, 5, -1 }, { -1, 6, -19, 88, 68, -18, 5, -1 }, { -1, 6, -19, 78, 78, -19, 6, -1 }, { -1, 5, -18, 68, 88, -19, 6, -1 }, { -1, 5, -16, 58, 97, -19, 5, -1 }, { -1, 4, -14, 48, 105, -18, 5, -1 }, { -1, 4, -11, 37, 112, -16, 4, -1 }, { -1, 3, -9, 27, 118, -13, 4, -1 }, { 0, 2, -6, 18, 122, -10, 3, -1 }, { 0, 1, -3, 8, 126, -5, 1, 0 } }; // DCT based filter DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8s[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { -1, 3, -7, 127, 8, -3, 1, 0 }, { -2, 5, -13, 125, 17, -6, 3, -1 }, { -3, 7, -17, 121, 27, -10, 5, -2 }, { -4, 9, -20, 115, 37, -13, 6, -2 }, { -4, 10, -23, 108, 48, -16, 8, -3 }, { -4, 10, -24, 100, 59, -19, 9, -3 }, { -4, 11, -24, 90, 70, -21, 10, -4 }, { -4, 11, -23, 80, 80, -23, 11, -4 }, { -4, 10, -21, 70, 90, -24, 11, -4 }, { -3, 9, -19, 59, 100, -24, 10, -4 }, { -3, 8, -16, 48, 108, -23, 10, -4 }, { -2, 6, -13, 37, 115, -20, 9, -4 }, { -2, 5, -10, 27, 121, -17, 7, -3 }, { -1, 3, -6, 17, 125, -13, 5, -2 }, { 0, 1, -3, 8, 127, -7, 3, -1 } }; // freqmultiplier = 0.5 DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_8lp[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 26, 63, 43, 4, -4, 0 }, { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 18, 59, 51, 9, -4, 0 }, { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 9, 51, 59, 18, -4, -1 }, { 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 4, 43, 63, 26, -2, -2 }, { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 } }; // 4-tap filter DECLARE_ALIGNED(256, static const InterpKernel, sub_pel_filters_4[SUBPEL_SHIFTS]) = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 0, -4, 126, 8, -2, 0, 0 }, { 0, 0, -6, 120, 18, -4, 0, 0 }, { 0, 0, -8, 114, 28, -6, 0, 0 }, { 0, 0, -10, 108, 36, -6, 0, 0 }, { 0, 0, -12, 102, 46, -8, 0, 0 }, { 0, 0, -12, 94, 56, -10, 0, 0 }, { 0, 0, -12, 84, 66, -10, 0, 0 }, { 0, 0, -12, 76, 76, -12, 0, 0 }, { 0, 0, -10, 66, 84, -12, 0, 0 }, { 0, 0, -10, 56, 94, -12, 0, 0 }, { 0, 0, -8, 46, 102, -12, 0, 0 }, { 0, 0, -6, 36, 108, -10, 0, 0 }, { 0, 0, -6, 28, 114, -8, 0, 0 }, { 0, 0, -4, 18, 120, -6, 0, 0 }, { 0, 0, -2, 8, 126, -4, 0, 0 } }; const InterpKernel *vp9_filter_kernels[5] = { sub_pel_filters_8, sub_pel_filters_8lp, sub_pel_filters_8s, bilinear_filters, sub_pel_filters_4 }; libvpx-1.8.2/vp9/common/vp9_filter.h000066400000000000000000000023211357355204000172720ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_FILTER_H_ #define VPX_VP9_COMMON_VP9_FILTER_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif #define EIGHTTAP 0 #define EIGHTTAP_SMOOTH 1 #define EIGHTTAP_SHARP 2 #define SWITCHABLE_FILTERS 3 /* Number of switchable filters */ #define BILINEAR 3 #define FOURTAP 4 // The codec can operate in four possible inter prediction filter mode: // 8-tap, 8-tap-smooth, 8-tap-sharp, and switching between the three. #define SWITCHABLE_FILTER_CONTEXTS (SWITCHABLE_FILTERS + 1) #define SWITCHABLE 4 /* should be the last one */ typedef uint8_t INTERP_FILTER; extern const InterpKernel *vp9_filter_kernels[5]; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_FILTER_H_ libvpx-1.8.2/vp9/common/vp9_frame_buffers.c000066400000000000000000000050031357355204000206060ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_frame_buffers.h" #include "vpx_mem/vpx_mem.h" int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list) { assert(list != NULL); vp9_free_internal_frame_buffers(list); list->num_internal_frame_buffers = VP9_MAXIMUM_REF_BUFFERS + VPX_MAXIMUM_WORK_BUFFERS; list->int_fb = (InternalFrameBuffer *)vpx_calloc( list->num_internal_frame_buffers, sizeof(*list->int_fb)); return (list->int_fb == NULL); } void vp9_free_internal_frame_buffers(InternalFrameBufferList *list) { int i; assert(list != NULL); for (i = 0; i < list->num_internal_frame_buffers; ++i) { vpx_free(list->int_fb[i].data); list->int_fb[i].data = NULL; } vpx_free(list->int_fb); list->int_fb = NULL; } int vp9_get_frame_buffer(void *cb_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { int i; InternalFrameBufferList *const int_fb_list = (InternalFrameBufferList *)cb_priv; if (int_fb_list == NULL) return -1; // Find a free frame buffer. for (i = 0; i < int_fb_list->num_internal_frame_buffers; ++i) { if (!int_fb_list->int_fb[i].in_use) break; } if (i == int_fb_list->num_internal_frame_buffers) return -1; if (int_fb_list->int_fb[i].size < min_size) { vpx_free(int_fb_list->int_fb[i].data); // The data must be zeroed to fix a valgrind error from the C loop filter // due to access uninitialized memory in frame border. It could be // skipped if border were totally removed. int_fb_list->int_fb[i].data = (uint8_t *)vpx_calloc(1, min_size); if (!int_fb_list->int_fb[i].data) return -1; int_fb_list->int_fb[i].size = min_size; } fb->data = int_fb_list->int_fb[i].data; fb->size = int_fb_list->int_fb[i].size; int_fb_list->int_fb[i].in_use = 1; // Set the frame buffer's private data to point at the internal frame buffer. fb->priv = &int_fb_list->int_fb[i]; return 0; } int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { InternalFrameBuffer *const int_fb = (InternalFrameBuffer *)fb->priv; (void)cb_priv; if (int_fb) int_fb->in_use = 0; return 0; } libvpx-1.8.2/vp9/common/vp9_frame_buffers.h000066400000000000000000000033331357355204000206170ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ #define VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ #include "vpx/vpx_frame_buffer.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef struct InternalFrameBuffer { uint8_t *data; size_t size; int in_use; } InternalFrameBuffer; typedef struct InternalFrameBufferList { int num_internal_frame_buffers; InternalFrameBuffer *int_fb; } InternalFrameBufferList; // Initializes |list|. Returns 0 on success. int vp9_alloc_internal_frame_buffers(InternalFrameBufferList *list); // Free any data allocated to the frame buffers. void vp9_free_internal_frame_buffers(InternalFrameBufferList *list); // Callback used by libvpx to request an external frame buffer. |cb_priv| // Callback private data, which points to an InternalFrameBufferList. // |min_size| is the minimum size in bytes needed to decode the next frame. // |fb| pointer to the frame buffer. int vp9_get_frame_buffer(void *cb_priv, size_t min_size, vpx_codec_frame_buffer_t *fb); // Callback used by libvpx when there are no references to the frame buffer. // |cb_priv| is not used. |fb| pointer to the frame buffer. int vp9_release_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_FRAME_BUFFERS_H_ libvpx-1.8.2/vp9/common/vp9_idct.c000066400000000000000000000306661357355204000167400ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_ports/mem.h" void vp9_iht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { const transform_2d IHT_4[] = { { idct4_c, idct4_c }, // DCT_DCT = 0 { iadst4_c, idct4_c }, // ADST_DCT = 1 { idct4_c, iadst4_c }, // DCT_ADST = 2 { iadst4_c, iadst4_c } // ADST_ADST = 3 }; int i, j; tran_low_t out[4 * 4]; tran_low_t *outptr = out; tran_low_t temp_in[4], temp_out[4]; // inverse transform row vectors for (i = 0; i < 4; ++i) { IHT_4[tx_type].rows(input, outptr); input += 4; outptr += 4; } // inverse transform column vectors for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; IHT_4[tx_type].cols(temp_in, temp_out); for (j = 0; j < 4; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4)); } } } static const transform_2d IHT_8[] = { { idct8_c, idct8_c }, // DCT_DCT = 0 { iadst8_c, idct8_c }, // ADST_DCT = 1 { idct8_c, iadst8_c }, // DCT_ADST = 2 { iadst8_c, iadst8_c } // ADST_ADST = 3 }; void vp9_iht8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; const transform_2d ht = IHT_8[tx_type]; // inverse transform row vectors for (i = 0; i < 8; ++i) { ht.rows(input, outptr); input += 8; outptr += 8; } // inverse transform column vectors for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5)); } } } static const transform_2d IHT_16[] = { { idct16_c, idct16_c }, // DCT_DCT = 0 { iadst16_c, idct16_c }, // ADST_DCT = 1 { idct16_c, iadst16_c }, // DCT_ADST = 2 { iadst16_c, iadst16_c } // ADST_ADST = 3 }; void vp9_iht16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; const transform_2d ht = IHT_16[tx_type]; // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr); input += 16; outptr += 16; } // Columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } // idct void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) vpx_idct4x4_16_add(input, dest, stride); else vpx_idct4x4_1_add(input, dest, stride); } void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob > 1) vpx_iwht4x4_16_add(input, dest, stride); else vpx_iwht4x4_1_add(input, dest, stride); } void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { // If dc is 1, then input[0] is the reconstructed value, do not need // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. // The calculation can be simplified if there are not many non-zero dct // coefficients. Use eobs to decide what to do. if (eob == 1) // DC only DCT coefficient vpx_idct8x8_1_add(input, dest, stride); else if (eob <= 12) vpx_idct8x8_12_add(input, dest, stride); else vpx_idct8x8_64_add(input, dest, stride); } void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { /* The calculation can be simplified if there are not many non-zero dct * coefficients. Use eobs to separate different cases. */ if (eob == 1) /* DC only DCT coefficient. */ vpx_idct16x16_1_add(input, dest, stride); else if (eob <= 10) vpx_idct16x16_10_add(input, dest, stride); else if (eob <= 38) vpx_idct16x16_38_add(input, dest, stride); else vpx_idct16x16_256_add(input, dest, stride); } void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (eob == 1) vpx_idct32x32_1_add(input, dest, stride); else if (eob <= 34) // non-zero coeff only in upper-left 8x8 vpx_idct32x32_34_add(input, dest, stride); else if (eob <= 135) // non-zero coeff only in upper-left 16x16 vpx_idct32x32_135_add(input, dest, stride); else vpx_idct32x32_1024_add(input, dest, stride); } // iht void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (tx_type == DCT_DCT) vp9_idct4x4_add(input, dest, stride, eob); else vp9_iht4x4_16_add(input, dest, stride, tx_type); } void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (tx_type == DCT_DCT) { vp9_idct8x8_add(input, dest, stride, eob); } else { vp9_iht8x8_64_add(input, dest, stride, tx_type); } } void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob) { if (tx_type == DCT_DCT) { vp9_idct16x16_add(input, dest, stride, eob); } else { vp9_iht16x16_256_add(input, dest, stride, tx_type); } } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_iht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { const highbd_transform_2d IHT_4[] = { { vpx_highbd_idct4_c, vpx_highbd_idct4_c }, // DCT_DCT = 0 { vpx_highbd_iadst4_c, vpx_highbd_idct4_c }, // ADST_DCT = 1 { vpx_highbd_idct4_c, vpx_highbd_iadst4_c }, // DCT_ADST = 2 { vpx_highbd_iadst4_c, vpx_highbd_iadst4_c } // ADST_ADST = 3 }; int i, j; tran_low_t out[4 * 4]; tran_low_t *outptr = out; tran_low_t temp_in[4], temp_out[4]; // Inverse transform row vectors. for (i = 0; i < 4; ++i) { IHT_4[tx_type].rows(input, outptr, bd); input += 4; outptr += 4; } // Inverse transform column vectors. for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; IHT_4[tx_type].cols(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); } } } static const highbd_transform_2d HIGH_IHT_8[] = { { vpx_highbd_idct8_c, vpx_highbd_idct8_c }, // DCT_DCT = 0 { vpx_highbd_iadst8_c, vpx_highbd_idct8_c }, // ADST_DCT = 1 { vpx_highbd_idct8_c, vpx_highbd_iadst8_c }, // DCT_ADST = 2 { vpx_highbd_iadst8_c, vpx_highbd_iadst8_c } // ADST_ADST = 3 }; void vp9_highbd_iht8x8_64_add_c(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; const highbd_transform_2d ht = HIGH_IHT_8[tx_type]; // Inverse transform row vectors. for (i = 0; i < 8; ++i) { ht.rows(input, outptr, bd); input += 8; outptr += 8; } // Inverse transform column vectors. for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; ht.cols(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); } } } static const highbd_transform_2d HIGH_IHT_16[] = { { vpx_highbd_idct16_c, vpx_highbd_idct16_c }, // DCT_DCT = 0 { vpx_highbd_iadst16_c, vpx_highbd_idct16_c }, // ADST_DCT = 1 { vpx_highbd_idct16_c, vpx_highbd_iadst16_c }, // DCT_ADST = 2 { vpx_highbd_iadst16_c, vpx_highbd_iadst16_c } // ADST_ADST = 3 }; void vp9_highbd_iht16x16_256_add_c(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; const highbd_transform_2d ht = HIGH_IHT_16[tx_type]; // Rows for (i = 0; i < 16; ++i) { ht.rows(input, outptr, bd); input += 16; outptr += 16; } // Columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; ht.cols(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } // idct void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { if (eob > 1) vpx_highbd_idct4x4_16_add(input, dest, stride, bd); else vpx_highbd_idct4x4_1_add(input, dest, stride, bd); } void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { if (eob > 1) vpx_highbd_iwht4x4_16_add(input, dest, stride, bd); else vpx_highbd_iwht4x4_1_add(input, dest, stride, bd); } void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { // If dc is 1, then input[0] is the reconstructed value, do not need // dequantization. Also, when dc is 1, dc is counted in eobs, namely eobs >=1. // The calculation can be simplified if there are not many non-zero dct // coefficients. Use eobs to decide what to do. // DC only DCT coefficient if (eob == 1) { vpx_highbd_idct8x8_1_add(input, dest, stride, bd); } else if (eob <= 12) { vpx_highbd_idct8x8_12_add(input, dest, stride, bd); } else { vpx_highbd_idct8x8_64_add(input, dest, stride, bd); } } void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { // The calculation can be simplified if there are not many non-zero dct // coefficients. Use eobs to separate different cases. // DC only DCT coefficient. if (eob == 1) { vpx_highbd_idct16x16_1_add(input, dest, stride, bd); } else if (eob <= 10) { vpx_highbd_idct16x16_10_add(input, dest, stride, bd); } else if (eob <= 38) { vpx_highbd_idct16x16_38_add(input, dest, stride, bd); } else { vpx_highbd_idct16x16_256_add(input, dest, stride, bd); } } void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { // Non-zero coeff only in upper-left 8x8 if (eob == 1) { vpx_highbd_idct32x32_1_add(input, dest, stride, bd); } else if (eob <= 34) { vpx_highbd_idct32x32_34_add(input, dest, stride, bd); } else if (eob <= 135) { vpx_highbd_idct32x32_135_add(input, dest, stride, bd); } else { vpx_highbd_idct32x32_1024_add(input, dest, stride, bd); } } // iht void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { if (tx_type == DCT_DCT) vp9_highbd_idct4x4_add(input, dest, stride, eob, bd); else vp9_highbd_iht4x4_16_add(input, dest, stride, tx_type, bd); } void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { if (tx_type == DCT_DCT) { vp9_highbd_idct8x8_add(input, dest, stride, eob, bd); } else { vp9_highbd_iht8x8_64_add(input, dest, stride, tx_type, bd); } } void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd) { if (tx_type == DCT_DCT) { vp9_highbd_idct16x16_add(input, dest, stride, eob, bd); } else { vp9_highbd_iht16x16_256_add(input, dest, stride, tx_type, bd); } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vp9/common/vp9_idct.h000066400000000000000000000062471357355204000167430ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_IDCT_H_ #define VPX_VP9_COMMON_VP9_IDCT_H_ #include #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_enums.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif typedef void (*transform_1d)(const tran_low_t *, tran_low_t *); typedef struct { transform_1d cols, rows; // vertical and horizontal } transform_2d; #if CONFIG_VP9_HIGHBITDEPTH typedef void (*highbd_transform_1d)(const tran_low_t *, tran_low_t *, int bd); typedef struct { highbd_transform_1d cols, rows; // vertical and horizontal } highbd_transform_2d; #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_iwht4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct4x4_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct8x8_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct16x16_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_idct32x32_add(const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob); void vp9_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint8_t *dest, int stride, int eob); #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_iwht4x4_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_idct4x4_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_idct8x8_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_idct16x16_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_idct32x32_add(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_iht4x4_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_iht8x8_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); void vp9_highbd_iht16x16_add(TX_TYPE tx_type, const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_IDCT_H_ libvpx-1.8.2/vp9/common/vp9_loopfilter.c000066400000000000000000001664751357355204000202040ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_reconinter.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_seg_common.h" // 64 bit masks for left transform size. Each 1 represents a position where // we should apply a loop filter across the left border of an 8x8 block // boundary. // // In the case of TX_16X16-> ( in low order byte first we end up with // a mask that looks like this // // 10101010 // 10101010 // 10101010 // 10101010 // 10101010 // 10101010 // 10101010 // 10101010 // // A loopfilter should be applied to every other 8x8 horizontally. static const uint64_t left_64x64_txform_mask[TX_SIZES] = { 0xffffffffffffffffULL, // TX_4X4 0xffffffffffffffffULL, // TX_8x8 0x5555555555555555ULL, // TX_16x16 0x1111111111111111ULL, // TX_32x32 }; // 64 bit masks for above transform size. Each 1 represents a position where // we should apply a loop filter across the top border of an 8x8 block // boundary. // // In the case of TX_32x32 -> ( in low order byte first we end up with // a mask that looks like this // // 11111111 // 00000000 // 00000000 // 00000000 // 11111111 // 00000000 // 00000000 // 00000000 // // A loopfilter should be applied to every other 4 the row vertically. static const uint64_t above_64x64_txform_mask[TX_SIZES] = { 0xffffffffffffffffULL, // TX_4X4 0xffffffffffffffffULL, // TX_8x8 0x00ff00ff00ff00ffULL, // TX_16x16 0x000000ff000000ffULL, // TX_32x32 }; // 64 bit masks for prediction sizes (left). Each 1 represents a position // where left border of an 8x8 block. These are aligned to the right most // appropriate bit, and then shifted into place. // // In the case of TX_16x32 -> ( low order byte first ) we end up with // a mask that looks like this : // // 10000000 // 10000000 // 10000000 // 10000000 // 00000000 // 00000000 // 00000000 // 00000000 static const uint64_t left_prediction_mask[BLOCK_SIZES] = { 0x0000000000000001ULL, // BLOCK_4X4, 0x0000000000000001ULL, // BLOCK_4X8, 0x0000000000000001ULL, // BLOCK_8X4, 0x0000000000000001ULL, // BLOCK_8X8, 0x0000000000000101ULL, // BLOCK_8X16, 0x0000000000000001ULL, // BLOCK_16X8, 0x0000000000000101ULL, // BLOCK_16X16, 0x0000000001010101ULL, // BLOCK_16X32, 0x0000000000000101ULL, // BLOCK_32X16, 0x0000000001010101ULL, // BLOCK_32X32, 0x0101010101010101ULL, // BLOCK_32X64, 0x0000000001010101ULL, // BLOCK_64X32, 0x0101010101010101ULL, // BLOCK_64X64 }; // 64 bit mask to shift and set for each prediction size. static const uint64_t above_prediction_mask[BLOCK_SIZES] = { 0x0000000000000001ULL, // BLOCK_4X4 0x0000000000000001ULL, // BLOCK_4X8 0x0000000000000001ULL, // BLOCK_8X4 0x0000000000000001ULL, // BLOCK_8X8 0x0000000000000001ULL, // BLOCK_8X16, 0x0000000000000003ULL, // BLOCK_16X8 0x0000000000000003ULL, // BLOCK_16X16 0x0000000000000003ULL, // BLOCK_16X32, 0x000000000000000fULL, // BLOCK_32X16, 0x000000000000000fULL, // BLOCK_32X32, 0x000000000000000fULL, // BLOCK_32X64, 0x00000000000000ffULL, // BLOCK_64X32, 0x00000000000000ffULL, // BLOCK_64X64 }; // 64 bit mask to shift and set for each prediction size. A bit is set for // each 8x8 block that would be in the left most block of the given block // size in the 64x64 block. static const uint64_t size_mask[BLOCK_SIZES] = { 0x0000000000000001ULL, // BLOCK_4X4 0x0000000000000001ULL, // BLOCK_4X8 0x0000000000000001ULL, // BLOCK_8X4 0x0000000000000001ULL, // BLOCK_8X8 0x0000000000000101ULL, // BLOCK_8X16, 0x0000000000000003ULL, // BLOCK_16X8 0x0000000000000303ULL, // BLOCK_16X16 0x0000000003030303ULL, // BLOCK_16X32, 0x0000000000000f0fULL, // BLOCK_32X16, 0x000000000f0f0f0fULL, // BLOCK_32X32, 0x0f0f0f0f0f0f0f0fULL, // BLOCK_32X64, 0x00000000ffffffffULL, // BLOCK_64X32, 0xffffffffffffffffULL, // BLOCK_64X64 }; // These are used for masking the left and above borders. static const uint64_t left_border = 0x1111111111111111ULL; static const uint64_t above_border = 0x000000ff000000ffULL; // 16 bit masks for uv transform sizes. static const uint16_t left_64x64_txform_mask_uv[TX_SIZES] = { 0xffff, // TX_4X4 0xffff, // TX_8x8 0x5555, // TX_16x16 0x1111, // TX_32x32 }; static const uint16_t above_64x64_txform_mask_uv[TX_SIZES] = { 0xffff, // TX_4X4 0xffff, // TX_8x8 0x0f0f, // TX_16x16 0x000f, // TX_32x32 }; // 16 bit left mask to shift and set for each uv prediction size. static const uint16_t left_prediction_mask_uv[BLOCK_SIZES] = { 0x0001, // BLOCK_4X4, 0x0001, // BLOCK_4X8, 0x0001, // BLOCK_8X4, 0x0001, // BLOCK_8X8, 0x0001, // BLOCK_8X16, 0x0001, // BLOCK_16X8, 0x0001, // BLOCK_16X16, 0x0011, // BLOCK_16X32, 0x0001, // BLOCK_32X16, 0x0011, // BLOCK_32X32, 0x1111, // BLOCK_32X64 0x0011, // BLOCK_64X32, 0x1111, // BLOCK_64X64 }; // 16 bit above mask to shift and set for uv each prediction size. static const uint16_t above_prediction_mask_uv[BLOCK_SIZES] = { 0x0001, // BLOCK_4X4 0x0001, // BLOCK_4X8 0x0001, // BLOCK_8X4 0x0001, // BLOCK_8X8 0x0001, // BLOCK_8X16, 0x0001, // BLOCK_16X8 0x0001, // BLOCK_16X16 0x0001, // BLOCK_16X32, 0x0003, // BLOCK_32X16, 0x0003, // BLOCK_32X32, 0x0003, // BLOCK_32X64, 0x000f, // BLOCK_64X32, 0x000f, // BLOCK_64X64 }; // 64 bit mask to shift and set for each uv prediction size static const uint16_t size_mask_uv[BLOCK_SIZES] = { 0x0001, // BLOCK_4X4 0x0001, // BLOCK_4X8 0x0001, // BLOCK_8X4 0x0001, // BLOCK_8X8 0x0001, // BLOCK_8X16, 0x0001, // BLOCK_16X8 0x0001, // BLOCK_16X16 0x0011, // BLOCK_16X32, 0x0003, // BLOCK_32X16, 0x0033, // BLOCK_32X32, 0x3333, // BLOCK_32X64, 0x00ff, // BLOCK_64X32, 0xffff, // BLOCK_64X64 }; static const uint16_t left_border_uv = 0x1111; static const uint16_t above_border_uv = 0x000f; static const int mode_lf_lut[MB_MODE_COUNT] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // INTRA_MODES 1, 1, 0, 1 // INTER_MODES (ZEROMV == 0) }; static void update_sharpness(loop_filter_info_n *lfi, int sharpness_lvl) { int lvl; // For each possible value for the loop filter fill out limits for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) { // Set loop filter parameters that control sharpness. int block_inside_limit = lvl >> ((sharpness_lvl > 0) + (sharpness_lvl > 4)); if (sharpness_lvl > 0) { if (block_inside_limit > (9 - sharpness_lvl)) block_inside_limit = (9 - sharpness_lvl); } if (block_inside_limit < 1) block_inside_limit = 1; memset(lfi->lfthr[lvl].lim, block_inside_limit, SIMD_WIDTH); memset(lfi->lfthr[lvl].mblim, (2 * (lvl + 2) + block_inside_limit), SIMD_WIDTH); } } static uint8_t get_filter_level(const loop_filter_info_n *lfi_n, const MODE_INFO *mi) { return lfi_n->lvl[mi->segment_id][mi->ref_frame[0]][mode_lf_lut[mi->mode]]; } void vp9_loop_filter_init(VP9_COMMON *cm) { loop_filter_info_n *lfi = &cm->lf_info; struct loopfilter *lf = &cm->lf; int lvl; // init limits for given sharpness update_sharpness(lfi, lf->sharpness_level); lf->last_sharpness_level = lf->sharpness_level; // init hev threshold const vectors for (lvl = 0; lvl <= MAX_LOOP_FILTER; lvl++) memset(lfi->lfthr[lvl].hev_thr, (lvl >> 4), SIMD_WIDTH); } void vp9_loop_filter_frame_init(VP9_COMMON *cm, int default_filt_lvl) { int seg_id; // n_shift is the multiplier for lf_deltas // the multiplier is 1 for when filter_lvl is between 0 and 31; // 2 when filter_lvl is between 32 and 63 const int scale = 1 << (default_filt_lvl >> 5); loop_filter_info_n *const lfi = &cm->lf_info; struct loopfilter *const lf = &cm->lf; const struct segmentation *const seg = &cm->seg; // update limits if sharpness has changed if (lf->last_sharpness_level != lf->sharpness_level) { update_sharpness(lfi, lf->sharpness_level); lf->last_sharpness_level = lf->sharpness_level; } for (seg_id = 0; seg_id < MAX_SEGMENTS; seg_id++) { int lvl_seg = default_filt_lvl; if (segfeature_active(seg, seg_id, SEG_LVL_ALT_LF)) { const int data = get_segdata(seg, seg_id, SEG_LVL_ALT_LF); lvl_seg = clamp( seg->abs_delta == SEGMENT_ABSDATA ? data : default_filt_lvl + data, 0, MAX_LOOP_FILTER); } if (!lf->mode_ref_delta_enabled) { // we could get rid of this if we assume that deltas are set to // zero when not in use; encoder always uses deltas memset(lfi->lvl[seg_id], lvl_seg, sizeof(lfi->lvl[seg_id])); } else { int ref, mode; const int intra_lvl = lvl_seg + lf->ref_deltas[INTRA_FRAME] * scale; lfi->lvl[seg_id][INTRA_FRAME][0] = clamp(intra_lvl, 0, MAX_LOOP_FILTER); for (ref = LAST_FRAME; ref < MAX_REF_FRAMES; ++ref) { for (mode = 0; mode < MAX_MODE_LF_DELTAS; ++mode) { const int inter_lvl = lvl_seg + lf->ref_deltas[ref] * scale + lf->mode_deltas[mode] * scale; lfi->lvl[seg_id][ref][mode] = clamp(inter_lvl, 0, MAX_LOOP_FILTER); } } } } } static void filter_selectively_vert_row2( int subsampling_factor, uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl) { const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; uint8_t *ss[2]; ss[0] = s; for (mask = (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; mask; mask = (mask & ~dual_one) >> 1) { if (mask & dual_one) { const loop_filter_thresh *lfis[2]; lfis[0] = lfthr + *lfl; lfis[1] = lfthr + *(lfl + lfl_forward); ss[1] = ss[0] + 8 * pitch; if (mask_16x16 & dual_one) { if ((mask_16x16 & dual_one) == dual_one) { vpx_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr); } else { const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; vpx_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_8x8 & dual_one) { if ((mask_8x8 & dual_one) == dual_one) { vpx_lpf_vertical_8_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr); } else { const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; vpx_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4 & dual_one) { if ((mask_4x4 & dual_one) == dual_one) { vpx_lpf_vertical_4_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr); } else { const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; vpx_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & dual_one) { if ((mask_4x4_int & dual_one) == dual_one) { vpx_lpf_vertical_4_dual( ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr); } else { const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; vpx_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } } ss[0] += 8; lfl += 1; mask_16x16 >>= 1; mask_8x8 >>= 1; mask_4x4 >>= 1; mask_4x4_int >>= 1; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_vert_row2( int subsampling_factor, uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { const int dual_mask_cutoff = subsampling_factor ? 0xff : 0xffff; const int lfl_forward = subsampling_factor ? 4 : 8; const unsigned int dual_one = 1 | (1 << lfl_forward); unsigned int mask; uint16_t *ss[2]; ss[0] = s; for (mask = (mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int) & dual_mask_cutoff; mask; mask = (mask & ~dual_one) >> 1) { if (mask & dual_one) { const loop_filter_thresh *lfis[2]; lfis[0] = lfthr + *lfl; lfis[1] = lfthr + *(lfl + lfl_forward); ss[1] = ss[0] + 8 * pitch; if (mask_16x16 & dual_one) { if ((mask_16x16 & dual_one) == dual_one) { vpx_highbd_lpf_vertical_16_dual(ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, bd); } else { const loop_filter_thresh *lfi = lfis[!(mask_16x16 & 1)]; vpx_highbd_lpf_vertical_16(ss[!(mask_16x16 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } if (mask_8x8 & dual_one) { if ((mask_8x8 & dual_one) == dual_one) { vpx_highbd_lpf_vertical_8_dual( ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); } else { const loop_filter_thresh *lfi = lfis[!(mask_8x8 & 1)]; vpx_highbd_lpf_vertical_8(ss[!(mask_8x8 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } if (mask_4x4 & dual_one) { if ((mask_4x4 & dual_one) == dual_one) { vpx_highbd_lpf_vertical_4_dual( ss[0], pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); } else { const loop_filter_thresh *lfi = lfis[!(mask_4x4 & 1)]; vpx_highbd_lpf_vertical_4(ss[!(mask_4x4 & 1)], pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } if (mask_4x4_int & dual_one) { if ((mask_4x4_int & dual_one) == dual_one) { vpx_highbd_lpf_vertical_4_dual( ss[0] + 4, pitch, lfis[0]->mblim, lfis[0]->lim, lfis[0]->hev_thr, lfis[1]->mblim, lfis[1]->lim, lfis[1]->hev_thr, bd); } else { const loop_filter_thresh *lfi = lfis[!(mask_4x4_int & 1)]; vpx_highbd_lpf_vertical_4(ss[!(mask_4x4_int & 1)] + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } ss[0] += 8; lfl += 1; mask_16x16 >>= 1; mask_8x8 >>= 1; mask_4x4 >>= 1; mask_4x4_int >>= 1; } } #endif // CONFIG_VP9_HIGHBITDEPTH static void filter_selectively_horiz( uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { count = 1; if (mask & 1) { const loop_filter_thresh *lfi = lfthr + *lfl; if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { vpx_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); count = 2; } else { vpx_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr); } count = 2; } else { vpx_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); if ((mask_4x4_int & 3) == 3) { vpx_lpf_horizontal_4_dual(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr); } else { if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); else if (mask_4x4_int & 2) vpx_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr); } count = 2; } else { vpx_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); if (mask_4x4_int & 1) vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } else { vpx_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } s += 8 * count; lfl += count; mask_16x16 >>= count; mask_8x8 >>= count; mask_4x4 >>= count; mask_4x4_int >>= count; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_horiz( uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; int count; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= count) { count = 1; if (mask & 1) { const loop_filter_thresh *lfi = lfthr + *lfl; if (mask_16x16 & 1) { if ((mask_16x16 & 3) == 3) { vpx_highbd_lpf_horizontal_16_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); count = 2; } else { vpx_highbd_lpf_horizontal_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } else if (mask_8x8 & 1) { if ((mask_8x8 & 3) == 3) { // Next block's thresholds. const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_8_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { vpx_highbd_lpf_horizontal_4_dual( s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } else if (mask_4x4 & 1) { if ((mask_4x4 & 3) == 3) { // Next block's thresholds. const loop_filter_thresh *lfin = lfthr + *(lfl + 1); vpx_highbd_lpf_horizontal_4_dual(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); if ((mask_4x4_int & 3) == 3) { vpx_highbd_lpf_horizontal_4_dual( s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } else { if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4_int & 2) { vpx_highbd_lpf_horizontal_4(s + 8 + 4 * pitch, pitch, lfin->mblim, lfin->lim, lfin->hev_thr, bd); } } count = 2; } else { vpx_highbd_lpf_horizontal_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); if (mask_4x4_int & 1) { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } } else { vpx_highbd_lpf_horizontal_4(s + 4 * pitch, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } s += 8 * count; lfl += count; mask_16x16 >>= count; mask_8x8 >>= count; mask_4x4 >>= count; mask_4x4_int >>= count; } } #endif // CONFIG_VP9_HIGHBITDEPTH // This function ors into the current lfm structure, where to do loop // filters for the specific mi we are looking at. It uses information // including the block_size_type (32x16, 32x32, etc.), the transform size, // whether there were any coefficients encoded, and the loop filter strength // block we are currently looking at. Shift is used to position the // 1's we produce. static void build_masks(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, const int shift_uv, LOOP_FILTER_MASK *lfm) { const BLOCK_SIZE block_size = mi->sb_type; const TX_SIZE tx_size_y = mi->tx_size; const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1]; const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; int i; // If filter level is 0 we don't loop filter. if (!filter_level) { return; } else { const int w = num_8x8_blocks_wide_lookup[block_size]; const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { memset(&lfm->lfl_y[index], filter_level, w); index += 8; } } // These set 1 in the current block size for the block size edges. // For instance if the block size is 32x16, we'll set: // above = 1111 // 0000 // and // left = 1000 // = 1000 // NOTE : In this example the low bit is left most ( 1000 ) is stored as // 1, not 8... // // U and V set things on a 16 bit scale. // *above_y |= above_prediction_mask[block_size] << shift_y; *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; *left_y |= left_prediction_mask[block_size] << shift_y; *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. if (mi->skip && is_inter_block(mi)) return; // Here we are adding a mask for the transform size. The transform // size mask is set to be correct for a 64x64 prediction block size. We // mask to match the size of the block we are working on and then shift it // into place.. *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) << shift_y; *above_uv |= (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) << shift_y; *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; // Here we are trying to determine what to do with the internal 4x4 block // boundaries. These differ from the 4x4 boundaries on the outside edge of // an 8x8 in that the internal ones can be skipped and don't depend on // the prediction block size. if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; if (tx_size_uv == TX_4X4) *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; } // This function does the same thing as the one above with the exception that // it only affects the y masks. It exists because for blocks < 16x16 in size, // we only update u and v masks on the first block. static void build_y_mask(const loop_filter_info_n *const lfi_n, const MODE_INFO *mi, const int shift_y, LOOP_FILTER_MASK *lfm) { const BLOCK_SIZE block_size = mi->sb_type; const TX_SIZE tx_size_y = mi->tx_size; const int filter_level = get_filter_level(lfi_n, mi); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; int i; if (!filter_level) { return; } else { const int w = num_8x8_blocks_wide_lookup[block_size]; const int h = num_8x8_blocks_high_lookup[block_size]; int index = shift_y; for (i = 0; i < h; i++) { memset(&lfm->lfl_y[index], filter_level, w); index += 8; } } *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; if (mi->skip && is_inter_block(mi)) return; *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) << shift_y; *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) << shift_y; if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; } void vp9_adjust_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, LOOP_FILTER_MASK *lfm) { int i; // The largest loopfilter we have is 16x16 so we use the 16x16 mask // for 32x32 transforms also. lfm->left_y[TX_16X16] |= lfm->left_y[TX_32X32]; lfm->above_y[TX_16X16] |= lfm->above_y[TX_32X32]; lfm->left_uv[TX_16X16] |= lfm->left_uv[TX_32X32]; lfm->above_uv[TX_16X16] |= lfm->above_uv[TX_32X32]; // We do at least 8 tap filter on every 32x32 even if the transform size // is 4x4. So if the 4x4 is set on a border pixel add it to the 8x8 and // remove it from the 4x4. lfm->left_y[TX_8X8] |= lfm->left_y[TX_4X4] & left_border; lfm->left_y[TX_4X4] &= ~left_border; lfm->above_y[TX_8X8] |= lfm->above_y[TX_4X4] & above_border; lfm->above_y[TX_4X4] &= ~above_border; lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_4X4] & left_border_uv; lfm->left_uv[TX_4X4] &= ~left_border_uv; lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_4X4] & above_border_uv; lfm->above_uv[TX_4X4] &= ~above_border_uv; // We do some special edge handling. if (mi_row + MI_BLOCK_SIZE > cm->mi_rows) { const uint64_t rows = cm->mi_rows - mi_row; // Each pixel inside the border gets a 1, const uint64_t mask_y = (((uint64_t)1 << (rows << 3)) - 1); const uint16_t mask_uv = (((uint16_t)1 << (((rows + 1) >> 1) << 2)) - 1); // Remove values completely outside our border. for (i = 0; i < TX_32X32; i++) { lfm->left_y[i] &= mask_y; lfm->above_y[i] &= mask_y; lfm->left_uv[i] &= mask_uv; lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; lfm->int_4x4_uv &= mask_uv; // We don't apply a wide loop filter on the last uv block row. If set // apply the shorter one instead. if (rows == 1) { lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16]; lfm->above_uv[TX_16X16] = 0; } if (rows == 5) { lfm->above_uv[TX_8X8] |= lfm->above_uv[TX_16X16] & 0xff00; lfm->above_uv[TX_16X16] &= ~(lfm->above_uv[TX_16X16] & 0xff00); } } if (mi_col + MI_BLOCK_SIZE > cm->mi_cols) { const uint64_t columns = cm->mi_cols - mi_col; // Each pixel inside the border gets a 1, the multiply copies the border // to where we need it. const uint64_t mask_y = (((1 << columns) - 1)) * 0x0101010101010101ULL; const uint16_t mask_uv = ((1 << ((columns + 1) >> 1)) - 1) * 0x1111; // Internal edges are not applied on the last column of the image so // we mask 1 more for the internal edges const uint16_t mask_uv_int = ((1 << (columns >> 1)) - 1) * 0x1111; // Remove the bits outside the image edge. for (i = 0; i < TX_32X32; i++) { lfm->left_y[i] &= mask_y; lfm->above_y[i] &= mask_y; lfm->left_uv[i] &= mask_uv; lfm->above_uv[i] &= mask_uv; } lfm->int_4x4_y &= mask_y; lfm->int_4x4_uv &= mask_uv_int; // We don't apply a wide loop filter on the last uv column. If set // apply the shorter one instead. if (columns == 1) { lfm->left_uv[TX_8X8] |= lfm->left_uv[TX_16X16]; lfm->left_uv[TX_16X16] = 0; } if (columns == 5) { lfm->left_uv[TX_8X8] |= (lfm->left_uv[TX_16X16] & 0xcccc); lfm->left_uv[TX_16X16] &= ~(lfm->left_uv[TX_16X16] & 0xcccc); } } // We don't apply a loop filter on the first column in the image, mask that // out. if (mi_col == 0) { for (i = 0; i < TX_32X32; i++) { lfm->left_y[i] &= 0xfefefefefefefefeULL; lfm->left_uv[i] &= 0xeeee; } } // Assert if we try to apply 2 different loop filters at the same position. assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_8X8])); assert(!(lfm->left_y[TX_16X16] & lfm->left_y[TX_4X4])); assert(!(lfm->left_y[TX_8X8] & lfm->left_y[TX_4X4])); assert(!(lfm->int_4x4_y & lfm->left_y[TX_16X16])); assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_8X8])); assert(!(lfm->left_uv[TX_16X16] & lfm->left_uv[TX_4X4])); assert(!(lfm->left_uv[TX_8X8] & lfm->left_uv[TX_4X4])); assert(!(lfm->int_4x4_uv & lfm->left_uv[TX_16X16])); assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_8X8])); assert(!(lfm->above_y[TX_16X16] & lfm->above_y[TX_4X4])); assert(!(lfm->above_y[TX_8X8] & lfm->above_y[TX_4X4])); assert(!(lfm->int_4x4_y & lfm->above_y[TX_16X16])); assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_8X8])); assert(!(lfm->above_uv[TX_16X16] & lfm->above_uv[TX_4X4])); assert(!(lfm->above_uv[TX_8X8] & lfm->above_uv[TX_4X4])); assert(!(lfm->int_4x4_uv & lfm->above_uv[TX_16X16])); } // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(VP9_COMMON *const cm, const int mi_row, const int mi_col, MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm) { int idx_32, idx_16, idx_8; const loop_filter_info_n *const lfi_n = &cm->lf_info; MODE_INFO **mip = mi8x8; MODE_INFO **mip2 = mi8x8; // These are offsets to the next mi in the 64x64 block. It is what gets // added to the mi ptr as we go through each loop. It helps us to avoid // setting up special row and column counters for each index. The last step // brings us out back to the starting position. const int offset_32[] = { 4, (mode_info_stride << 2) - 4, 4, -(mode_info_stride << 2) - 4 }; const int offset_16[] = { 2, (mode_info_stride << 1) - 2, 2, -(mode_info_stride << 1) - 2 }; const int offset[] = { 1, mode_info_stride - 1, 1, -mode_info_stride - 1 }; // Following variables represent shifts to position the current block // mask over the appropriate block. A shift of 36 to the left will move // the bits for the final 32 by 32 block in the 64x64 up 4 rows and left // 4 rows to the appropriate spot. const int shift_32_y[] = { 0, 4, 32, 36 }; const int shift_16_y[] = { 0, 2, 16, 18 }; const int shift_8_y[] = { 0, 1, 8, 9 }; const int shift_32_uv[] = { 0, 2, 8, 10 }; const int shift_16_uv[] = { 0, 1, 4, 5 }; const int max_rows = (mi_row + MI_BLOCK_SIZE > cm->mi_rows ? cm->mi_rows - mi_row : MI_BLOCK_SIZE); const int max_cols = (mi_col + MI_BLOCK_SIZE > cm->mi_cols ? cm->mi_cols - mi_col : MI_BLOCK_SIZE); vp9_zero(*lfm); assert(mip[0] != NULL); switch (mip[0]->sb_type) { case BLOCK_64X64: build_masks(lfi_n, mip[0], 0, 0, lfm); break; case BLOCK_64X32: build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + mode_info_stride * 4; if (4 >= max_rows) break; build_masks(lfi_n, mip2[0], 32, 8, lfm); break; case BLOCK_32X64: build_masks(lfi_n, mip[0], 0, 0, lfm); mip2 = mip + 4; if (4 >= max_cols) break; build_masks(lfi_n, mip2[0], 4, 2, lfm); break; default: for (idx_32 = 0; idx_32 < 4; mip += offset_32[idx_32], ++idx_32) { const int shift_y = shift_32_y[idx_32]; const int shift_uv = shift_32_uv[idx_32]; const int mi_32_col_offset = ((idx_32 & 1) << 2); const int mi_32_row_offset = ((idx_32 >> 1) << 2); if (mi_32_col_offset >= max_cols || mi_32_row_offset >= max_rows) continue; switch (mip[0]->sb_type) { case BLOCK_32X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_32X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_row_offset + 2 >= max_rows) continue; mip2 = mip + mode_info_stride * 2; build_masks(lfi_n, mip2[0], shift_y + 16, shift_uv + 4, lfm); break; case BLOCK_16X32: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_32_col_offset + 2 >= max_cols) continue; mip2 = mip + 2; build_masks(lfi_n, mip2[0], shift_y + 2, shift_uv + 1, lfm); break; default: for (idx_16 = 0; idx_16 < 4; mip += offset_16[idx_16], ++idx_16) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16]; const int shift_uv = shift_32_uv[idx_32] + shift_16_uv[idx_16]; const int mi_16_col_offset = mi_32_col_offset + ((idx_16 & 1) << 1); const int mi_16_row_offset = mi_32_row_offset + ((idx_16 >> 1) << 1); if (mi_16_col_offset >= max_cols || mi_16_row_offset >= max_rows) continue; switch (mip[0]->sb_type) { case BLOCK_16X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); break; case BLOCK_16X8: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_row_offset + 1 >= max_rows) continue; mip2 = mip + mode_info_stride; build_y_mask(lfi_n, mip2[0], shift_y + 8, lfm); break; case BLOCK_8X16: build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); if (mi_16_col_offset + 1 >= max_cols) continue; mip2 = mip + 1; build_y_mask(lfi_n, mip2[0], shift_y + 1, lfm); break; default: { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[0]; build_masks(lfi_n, mip[0], shift_y, shift_uv, lfm); mip += offset[0]; for (idx_8 = 1; idx_8 < 4; mip += offset[idx_8], ++idx_8) { const int shift_y = shift_32_y[idx_32] + shift_16_y[idx_16] + shift_8_y[idx_8]; const int mi_8_col_offset = mi_16_col_offset + ((idx_8 & 1)); const int mi_8_row_offset = mi_16_row_offset + ((idx_8 >> 1)); if (mi_8_col_offset >= max_cols || mi_8_row_offset >= max_rows) continue; build_y_mask(lfi_n, mip[0], shift_y, lfm); } break; } } } break; } } break; } } static void filter_selectively_vert( uint8_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { vpx_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_8x8 & 1) { vpx_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } else if (mask_4x4 & 1) { vpx_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); } } if (mask_4x4_int & 1) vpx_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr); s += 8; lfl += 1; mask_16x16 >>= 1; mask_8x8 >>= 1; mask_4x4 >>= 1; mask_4x4_int >>= 1; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_filter_selectively_vert( uint16_t *s, int pitch, unsigned int mask_16x16, unsigned int mask_8x8, unsigned int mask_4x4, unsigned int mask_4x4_int, const loop_filter_thresh *lfthr, const uint8_t *lfl, int bd) { unsigned int mask; for (mask = mask_16x16 | mask_8x8 | mask_4x4 | mask_4x4_int; mask; mask >>= 1) { const loop_filter_thresh *lfi = lfthr + *lfl; if (mask & 1) { if (mask_16x16 & 1) { vpx_highbd_lpf_vertical_16(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } else if (mask_8x8 & 1) { vpx_highbd_lpf_vertical_8(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } else if (mask_4x4 & 1) { vpx_highbd_lpf_vertical_4(s, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); } } if (mask_4x4_int & 1) vpx_highbd_lpf_vertical_4(s + 4, pitch, lfi->mblim, lfi->lim, lfi->hev_thr, bd); s += 8; lfl += 1; mask_16x16 >>= 1; mask_8x8 >>= 1; mask_4x4 >>= 1; mask_4x4_int >>= 1; } } #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_filter_block_plane_non420(VP9_COMMON *cm, struct macroblockd_plane *plane, MODE_INFO **mi_8x8, int mi_row, int mi_col) { const int ss_x = plane->subsampling_x; const int ss_y = plane->subsampling_y; const int row_step = 1 << ss_y; const int col_step = 1 << ss_x; const int row_step_stride = cm->mi_stride * row_step; struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; unsigned int mask_16x16[MI_BLOCK_SIZE]; unsigned int mask_8x8[MI_BLOCK_SIZE]; unsigned int mask_4x4[MI_BLOCK_SIZE]; unsigned int mask_4x4_int[MI_BLOCK_SIZE]; uint8_t lfl[MI_BLOCK_SIZE * MI_BLOCK_SIZE]; int r, c; vp9_zero(mask_16x16); vp9_zero(mask_8x8); vp9_zero(mask_4x4); vp9_zero(mask_4x4_int); vp9_zero(lfl); for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { unsigned int mask_16x16_c = 0; unsigned int mask_8x8_c = 0; unsigned int mask_4x4_c = 0; unsigned int border_mask; // Determine the vertical edges that need filtering for (c = 0; c < MI_BLOCK_SIZE && mi_col + c < cm->mi_cols; c += col_step) { const MODE_INFO *mi = mi_8x8[c]; const BLOCK_SIZE sb_type = mi[0].sb_type; const int skip_this = mi[0].skip && is_inter_block(mi); // left edge of current unit is block/partition edge -> no skip const int block_edge_left = (num_4x4_blocks_wide_lookup[sb_type] > 1) ? !(c & (num_8x8_blocks_wide_lookup[sb_type] - 1)) : 1; const int skip_this_c = skip_this && !block_edge_left; // top edge of current unit is block/partition edge -> no skip const int block_edge_above = (num_4x4_blocks_high_lookup[sb_type] > 1) ? !(r & (num_8x8_blocks_high_lookup[sb_type] - 1)) : 1; const int skip_this_r = skip_this && !block_edge_above; const TX_SIZE tx_size = get_uv_tx_size(mi, plane); const int skip_border_4x4_c = ss_x && mi_col + c == cm->mi_cols - 1; const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; // Filter level can vary per MI if (!(lfl[(r << 3) + (c >> ss_x)] = get_filter_level(&cm->lf_info, mi))) continue; // Build masks based on the transform size of each block if (tx_size == TX_32X32) { if (!skip_this_c && ((c >> ss_x) & 3) == 0) { if (!skip_border_4x4_c) mask_16x16_c |= 1 << (c >> ss_x); else mask_8x8_c |= 1 << (c >> ss_x); } if (!skip_this_r && ((r >> ss_y) & 3) == 0) { if (!skip_border_4x4_r) mask_16x16[r] |= 1 << (c >> ss_x); else mask_8x8[r] |= 1 << (c >> ss_x); } } else if (tx_size == TX_16X16) { if (!skip_this_c && ((c >> ss_x) & 1) == 0) { if (!skip_border_4x4_c) mask_16x16_c |= 1 << (c >> ss_x); else mask_8x8_c |= 1 << (c >> ss_x); } if (!skip_this_r && ((r >> ss_y) & 1) == 0) { if (!skip_border_4x4_r) mask_16x16[r] |= 1 << (c >> ss_x); else mask_8x8[r] |= 1 << (c >> ss_x); } } else { // force 8x8 filtering on 32x32 boundaries if (!skip_this_c) { if (tx_size == TX_8X8 || ((c >> ss_x) & 3) == 0) mask_8x8_c |= 1 << (c >> ss_x); else mask_4x4_c |= 1 << (c >> ss_x); } if (!skip_this_r) { if (tx_size == TX_8X8 || ((r >> ss_y) & 3) == 0) mask_8x8[r] |= 1 << (c >> ss_x); else mask_4x4[r] |= 1 << (c >> ss_x); } if (!skip_this && tx_size < TX_8X8 && !skip_border_4x4_c) mask_4x4_int[r] |= 1 << (c >> ss_x); } } // Disable filtering on the leftmost column border_mask = ~(mi_col == 0 ? 1 : 0); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_vert( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_vert(dst->buf, dst->stride, mask_16x16_c & border_mask, mask_8x8_c & border_mask, mask_4x4_c & border_mask, mask_4x4_int[r], cm->lf_info.lfthr, &lfl[r << 3]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mi_8x8 += row_step_stride; } // Now do horizontal pass dst->buf = dst0; for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += row_step) { const int skip_border_4x4_r = ss_y && mi_row + r == cm->mi_rows - 1; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : mask_4x4_int[r]; unsigned int mask_16x16_r; unsigned int mask_8x8_r; unsigned int mask_4x4_r; if (mi_row + r == 0) { mask_16x16_r = 0; mask_8x8_r = 0; mask_4x4_r = 0; } else { mask_16x16_r = mask_16x16[r]; mask_8x8_r = mask_8x8[r]; mask_4x4_r = mask_4x4[r]; } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl[r << 3], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl[r << 3]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; } } void vp9_filter_block_plane_ss00(VP9_COMMON *const cm, struct macroblockd_plane *const plane, int mi_row, LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; int r; uint64_t mask_16x16 = lfm->left_y[TX_16X16]; uint64_t mask_8x8 = lfm->left_y[TX_8X8]; uint64_t mask_4x4 = lfm->left_y[TX_4X4]; uint64_t mask_4x4_int = lfm->int_4x4_y; assert(plane->subsampling_x == 0 && plane->subsampling_y == 0); // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { // Disable filtering on the leftmost column. highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, (unsigned int)mask_16x16, (unsigned int)mask_8x8, (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH // Disable filtering on the leftmost column. filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16, (unsigned int)mask_8x8, (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 16; mask_8x8 >>= 16; mask_4x4 >>= 16; mask_4x4_int >>= 16; } // Horizontal pass dst->buf = dst0; mask_16x16 = lfm->above_y[TX_16X16]; mask_8x8 = lfm->above_y[TX_8X8]; mask_4x4 = lfm->above_y[TX_4X4]; mask_4x4_int = lfm->int_4x4_y; for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r++) { unsigned int mask_16x16_r; unsigned int mask_8x8_r; unsigned int mask_4x4_r; if (mi_row + r == 0) { mask_16x16_r = 0; mask_8x8_r = 0; mask_4x4_r = 0; } else { mask_16x16_r = mask_16x16 & 0xff; mask_8x8_r = mask_8x8 & 0xff; mask_4x4_r = mask_4x4 & 0xff; } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, cm->lf_info.lfthr, &lfm->lfl_y[r << 3], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int & 0xff, cm->lf_info.lfthr, &lfm->lfl_y[r << 3]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mask_16x16 >>= 8; mask_8x8 >>= 8; mask_4x4 >>= 8; mask_4x4_int >>= 8; } } void vp9_filter_block_plane_ss11(VP9_COMMON *const cm, struct macroblockd_plane *const plane, int mi_row, LOOP_FILTER_MASK *lfm) { struct buf_2d *const dst = &plane->dst; uint8_t *const dst0 = dst->buf; int r, c; uint8_t lfl_uv[16]; uint16_t mask_16x16 = lfm->left_uv[TX_16X16]; uint16_t mask_8x8 = lfm->left_uv[TX_8X8]; uint16_t mask_4x4 = lfm->left_uv[TX_4X4]; uint16_t mask_4x4_int = lfm->int_4x4_uv; vp9_zero(lfl_uv); assert(plane->subsampling_x == 1 && plane->subsampling_y == 1); // Vertical pass: do 2 rows at one time for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 4) { for (c = 0; c < (MI_BLOCK_SIZE >> 1); c++) { lfl_uv[(r << 1) + c] = lfm->lfl_y[(r << 3) + (c << 1)]; lfl_uv[((r + 2) << 1) + c] = lfm->lfl_y[((r + 2) << 3) + (c << 1)]; } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { // Disable filtering on the leftmost column. highbd_filter_selectively_vert_row2( plane->subsampling_x, CONVERT_TO_SHORTPTR(dst->buf), dst->stride, (unsigned int)mask_16x16, (unsigned int)mask_8x8, (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfl_uv[r << 1], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH // Disable filtering on the leftmost column. filter_selectively_vert_row2( plane->subsampling_x, dst->buf, dst->stride, (unsigned int)mask_16x16, (unsigned int)mask_8x8, (unsigned int)mask_4x4, (unsigned int)mask_4x4_int, cm->lf_info.lfthr, &lfl_uv[r << 1]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 16 * dst->stride; mask_16x16 >>= 8; mask_8x8 >>= 8; mask_4x4 >>= 8; mask_4x4_int >>= 8; } // Horizontal pass dst->buf = dst0; mask_16x16 = lfm->above_uv[TX_16X16]; mask_8x8 = lfm->above_uv[TX_8X8]; mask_4x4 = lfm->above_uv[TX_4X4]; mask_4x4_int = lfm->int_4x4_uv; for (r = 0; r < MI_BLOCK_SIZE && mi_row + r < cm->mi_rows; r += 2) { const int skip_border_4x4_r = mi_row + r == cm->mi_rows - 1; const unsigned int mask_4x4_int_r = skip_border_4x4_r ? 0 : (mask_4x4_int & 0xf); unsigned int mask_16x16_r; unsigned int mask_8x8_r; unsigned int mask_4x4_r; if (mi_row + r == 0) { mask_16x16_r = 0; mask_8x8_r = 0; mask_4x4_r = 0; } else { mask_16x16_r = mask_16x16 & 0xf; mask_8x8_r = mask_8x8 & 0xf; mask_4x4_r = mask_4x4 & 0xf; } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { highbd_filter_selectively_horiz( CONVERT_TO_SHORTPTR(dst->buf), dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1], (int)cm->bit_depth); } else { #endif // CONFIG_VP9_HIGHBITDEPTH filter_selectively_horiz(dst->buf, dst->stride, mask_16x16_r, mask_8x8_r, mask_4x4_r, mask_4x4_int_r, cm->lf_info.lfthr, &lfl_uv[r << 1]); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH dst->buf += 8 * dst->stride; mask_16x16 >>= 4; mask_8x8 >>= 4; mask_4x4 >>= 4; mask_4x4_int >>= 4; } } static void loop_filter_rows(YV12_BUFFER_CONFIG *frame_buffer, VP9_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; enum lf_path path; int mi_row, mi_col; if (y_only) path = LF_PATH_444; else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) path = LF_PATH_420; else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) path = LF_PATH_444; else path = LF_PATH_SLOW; for (mi_row = start; mi_row < stop; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { int plane; vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); // TODO(jimbankoski): For 444 only need to do y mask. vp9_adjust_mask(cm, mi_row, mi_col, lfm); vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { case LF_PATH_420: vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_444: vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_SLOW: vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, mi_col); break; } } } } } void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, MACROBLOCKD *xd, int frame_filter_level, int y_only, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; start_mi_row = 0; mi_rows_to_filter = cm->mi_rows; if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); } end_mi_row = start_mi_row + mi_rows_to_filter; loop_filter_rows(frame, cm, xd->plane, start_mi_row, end_mi_row, y_only); } // Used by the encoder to build the loopfilter masks. // TODO(slavarnway): Do the encoder the same way the decoder does it and // build the masks in line as part of the encode process. void vp9_build_mask_frame(VP9_COMMON *cm, int frame_filter_level, int partial_frame) { int start_mi_row, end_mi_row, mi_rows_to_filter; int mi_col, mi_row; if (!frame_filter_level) return; start_mi_row = 0; mi_rows_to_filter = cm->mi_rows; if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); } end_mi_row = start_mi_row + mi_rows_to_filter; vp9_loop_filter_frame_init(cm, frame_filter_level); for (mi_row = start_mi_row; mi_row < end_mi_row; mi_row += MI_BLOCK_SIZE) { MODE_INFO **mi = cm->mi_grid_visible + mi_row * cm->mi_stride; for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { // vp9_setup_mask() zeros lfm vp9_setup_mask(cm, mi_row, mi_col, mi + mi_col, cm->mi_stride, get_lfm(&cm->lf, mi_row, mi_col)); } } } // 8x8 blocks in a superblock. A "1" represents the first block in a 16x16 // or greater area. static const uint8_t first_block_in_16x16[8][8] = { { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 }, { 1, 0, 1, 0, 1, 0, 1, 0 }, { 0, 0, 0, 0, 0, 0, 0, 0 } }; // This function sets up the bit masks for a block represented // by mi_row, mi_col in a 64x64 region. // TODO(SJL): This function only works for yv12. void vp9_build_mask(VP9_COMMON *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh) { const BLOCK_SIZE block_size = mi->sb_type; const TX_SIZE tx_size_y = mi->tx_size; const loop_filter_info_n *const lfi_n = &cm->lf_info; const int filter_level = get_filter_level(lfi_n, mi); const TX_SIZE tx_size_uv = uv_txsize_lookup[block_size][tx_size_y][1][1]; LOOP_FILTER_MASK *const lfm = get_lfm(&cm->lf, mi_row, mi_col); uint64_t *const left_y = &lfm->left_y[tx_size_y]; uint64_t *const above_y = &lfm->above_y[tx_size_y]; uint64_t *const int_4x4_y = &lfm->int_4x4_y; uint16_t *const left_uv = &lfm->left_uv[tx_size_uv]; uint16_t *const above_uv = &lfm->above_uv[tx_size_uv]; uint16_t *const int_4x4_uv = &lfm->int_4x4_uv; const int row_in_sb = (mi_row & 7); const int col_in_sb = (mi_col & 7); const int shift_y = col_in_sb + (row_in_sb << 3); const int shift_uv = (col_in_sb >> 1) + ((row_in_sb >> 1) << 2); const int build_uv = first_block_in_16x16[row_in_sb][col_in_sb]; if (!filter_level) { return; } else { int index = shift_y; int i; for (i = 0; i < bh; i++) { memset(&lfm->lfl_y[index], filter_level, bw); index += 8; } } // These set 1 in the current block size for the block size edges. // For instance if the block size is 32x16, we'll set: // above = 1111 // 0000 // and // left = 1000 // = 1000 // NOTE : In this example the low bit is left most ( 1000 ) is stored as // 1, not 8... // // U and V set things on a 16 bit scale. // *above_y |= above_prediction_mask[block_size] << shift_y; *left_y |= left_prediction_mask[block_size] << shift_y; if (build_uv) { *above_uv |= above_prediction_mask_uv[block_size] << shift_uv; *left_uv |= left_prediction_mask_uv[block_size] << shift_uv; } // If the block has no coefficients and is not intra we skip applying // the loop filter on block edges. if (mi->skip && is_inter_block(mi)) return; // Add a mask for the transform size. The transform size mask is set to // be correct for a 64x64 prediction block size. Mask to match the size of // the block we are working on and then shift it into place. *above_y |= (size_mask[block_size] & above_64x64_txform_mask[tx_size_y]) << shift_y; *left_y |= (size_mask[block_size] & left_64x64_txform_mask[tx_size_y]) << shift_y; if (build_uv) { *above_uv |= (size_mask_uv[block_size] & above_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; *left_uv |= (size_mask_uv[block_size] & left_64x64_txform_mask_uv[tx_size_uv]) << shift_uv; } // Try to determine what to do with the internal 4x4 block boundaries. These // differ from the 4x4 boundaries on the outside edge of an 8x8 in that the // internal ones can be skipped and don't depend on the prediction block size. if (tx_size_y == TX_4X4) *int_4x4_y |= size_mask[block_size] << shift_y; if (build_uv && tx_size_uv == TX_4X4) *int_4x4_uv |= (size_mask_uv[block_size] & 0xffff) << shift_uv; } void vp9_loop_filter_data_reset( LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]) { lf_data->frame_buffer = frame_buffer; lf_data->cm = cm; lf_data->start = 0; lf_data->stop = 0; lf_data->y_only = 0; memcpy(lf_data->planes, planes, sizeof(lf_data->planes)); } void vp9_reset_lfm(VP9_COMMON *const cm) { if (cm->lf.filter_level) { memset(cm->lf.lfm, 0, ((cm->mi_rows + (MI_BLOCK_SIZE - 1)) >> 3) * cm->lf.lfm_stride * sizeof(*cm->lf.lfm)); } } int vp9_loop_filter_worker(void *arg1, void *unused) { LFWorkerData *const lf_data = (LFWorkerData *)arg1; (void)unused; loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only); return 1; } libvpx-1.8.2/vp9/common/vp9_loopfilter.h000066400000000000000000000125361357355204000201750ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_LOOPFILTER_H_ #define VPX_VP9_COMMON_VP9_LOOPFILTER_H_ #include "vpx_ports/mem.h" #include "./vpx_config.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_seg_common.h" #ifdef __cplusplus extern "C" { #endif #define MAX_LOOP_FILTER 63 #define MAX_SHARPNESS 7 #define SIMD_WIDTH 16 #define MAX_REF_LF_DELTAS 4 #define MAX_MODE_LF_DELTAS 2 enum lf_path { LF_PATH_420, LF_PATH_444, LF_PATH_SLOW, }; // Need to align this structure so when it is declared and // passed it can be loaded into vector registers. typedef struct { DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, mblim[SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, lim[SIMD_WIDTH]); DECLARE_ALIGNED(SIMD_WIDTH, uint8_t, hev_thr[SIMD_WIDTH]); } loop_filter_thresh; typedef struct { loop_filter_thresh lfthr[MAX_LOOP_FILTER + 1]; uint8_t lvl[MAX_SEGMENTS][MAX_REF_FRAMES][MAX_MODE_LF_DELTAS]; } loop_filter_info_n; // This structure holds bit masks for all 8x8 blocks in a 64x64 region. // Each 1 bit represents a position in which we want to apply the loop filter. // Left_ entries refer to whether we apply a filter on the border to the // left of the block. Above_ entries refer to whether or not to apply a // filter on the above border. Int_ entries refer to whether or not to // apply borders on the 4x4 edges within the 8x8 block that each bit // represents. // Since each transform is accompanied by a potentially different type of // loop filter there is a different entry in the array for each transform size. typedef struct { uint64_t left_y[TX_SIZES]; uint64_t above_y[TX_SIZES]; uint64_t int_4x4_y; uint16_t left_uv[TX_SIZES]; uint16_t above_uv[TX_SIZES]; uint16_t int_4x4_uv; uint8_t lfl_y[64]; } LOOP_FILTER_MASK; struct loopfilter { int filter_level; int last_filt_level; int sharpness_level; int last_sharpness_level; uint8_t mode_ref_delta_enabled; uint8_t mode_ref_delta_update; // 0 = Intra, Last, GF, ARF signed char ref_deltas[MAX_REF_LF_DELTAS]; signed char last_ref_deltas[MAX_REF_LF_DELTAS]; // 0 = ZERO_MV, MV signed char mode_deltas[MAX_MODE_LF_DELTAS]; signed char last_mode_deltas[MAX_MODE_LF_DELTAS]; LOOP_FILTER_MASK *lfm; int lfm_stride; }; /* assorted loopfilter functions which get used elsewhere */ struct VP9Common; struct macroblockd; struct VP9LfSyncData; // This function sets up the bit masks for the entire 64x64 region represented // by mi_row, mi_col. void vp9_setup_mask(struct VP9Common *const cm, const int mi_row, const int mi_col, MODE_INFO **mi8x8, const int mode_info_stride, LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_ss00(struct VP9Common *const cm, struct macroblockd_plane *const plane, int mi_row, LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_ss11(struct VP9Common *const cm, struct macroblockd_plane *const plane, int mi_row, LOOP_FILTER_MASK *lfm); void vp9_filter_block_plane_non420(struct VP9Common *cm, struct macroblockd_plane *plane, MODE_INFO **mi_8x8, int mi_row, int mi_col); void vp9_loop_filter_init(struct VP9Common *cm); // Update the loop filter for the current frame. // This should be called before vp9_loop_filter_frame(), vp9_build_mask_frame() // calls this function directly. void vp9_loop_filter_frame_init(struct VP9Common *cm, int default_filt_lvl); void vp9_loop_filter_frame(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, struct macroblockd *xd, int frame_filter_level, int y_only, int partial_frame); // Get the superblock lfm for a given mi_row, mi_col. static INLINE LOOP_FILTER_MASK *get_lfm(const struct loopfilter *lf, const int mi_row, const int mi_col) { return &lf->lfm[(mi_col >> 3) + ((mi_row >> 3) * lf->lfm_stride)]; } void vp9_build_mask(struct VP9Common *cm, const MODE_INFO *mi, int mi_row, int mi_col, int bw, int bh); void vp9_adjust_mask(struct VP9Common *const cm, const int mi_row, const int mi_col, LOOP_FILTER_MASK *lfm); void vp9_build_mask_frame(struct VP9Common *cm, int frame_filter_level, int partial_frame); void vp9_reset_lfm(struct VP9Common *const cm); typedef struct LoopFilterWorkerData { YV12_BUFFER_CONFIG *frame_buffer; struct VP9Common *cm; struct macroblockd_plane planes[MAX_MB_PLANE]; int start; int stop; int y_only; } LFWorkerData; void vp9_loop_filter_data_reset( LFWorkerData *lf_data, YV12_BUFFER_CONFIG *frame_buffer, struct VP9Common *cm, const struct macroblockd_plane planes[MAX_MB_PLANE]); // Operates on the rows described by 'arg1' (cast to LFWorkerData *). int vp9_loop_filter_worker(void *arg1, void *unused); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_LOOPFILTER_H_ libvpx-1.8.2/vp9/common/vp9_mfqe.c000066400000000000000000000402301357355204000167310ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_postproc.h" // TODO(jackychen): Replace this function with SSE2 code. There is // one SSE2 implementation in vp8, so will consider how to share it // between vp8 and vp9. static void filter_by_weight(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int block_size, int src_weight) { const int dst_weight = (1 << MFQE_PRECISION) - src_weight; const int rounding_bit = 1 << (MFQE_PRECISION - 1); int r, c; for (r = 0; r < block_size; r++) { for (c = 0; c < block_size; c++) { dst[c] = (src[c] * src_weight + dst[c] * dst_weight + rounding_bit) >> MFQE_PRECISION; } src += src_stride; dst += dst_stride; } } void vp9_filter_by_weight8x8_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight) { filter_by_weight(src, src_stride, dst, dst_stride, 8, src_weight); } void vp9_filter_by_weight16x16_c(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int src_weight) { filter_by_weight(src, src_stride, dst, dst_stride, 16, src_weight); } static void filter_by_weight32x32(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int weight) { vp9_filter_by_weight16x16(src, src_stride, dst, dst_stride, weight); vp9_filter_by_weight16x16(src + 16, src_stride, dst + 16, dst_stride, weight); vp9_filter_by_weight16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16, dst_stride, weight); vp9_filter_by_weight16x16(src + src_stride * 16 + 16, src_stride, dst + dst_stride * 16 + 16, dst_stride, weight); } static void filter_by_weight64x64(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int weight) { filter_by_weight32x32(src, src_stride, dst, dst_stride, weight); filter_by_weight32x32(src + 32, src_stride, dst + 32, dst_stride, weight); filter_by_weight32x32(src + src_stride * 32, src_stride, dst + dst_stride * 32, dst_stride, weight); filter_by_weight32x32(src + src_stride * 32 + 32, src_stride, dst + dst_stride * 32 + 32, dst_stride, weight); } static void apply_ifactor(const uint8_t *y, int y_stride, uint8_t *yd, int yd_stride, const uint8_t *u, const uint8_t *v, int uv_stride, uint8_t *ud, uint8_t *vd, int uvd_stride, BLOCK_SIZE block_size, int weight) { if (block_size == BLOCK_16X16) { vp9_filter_by_weight16x16(y, y_stride, yd, yd_stride, weight); vp9_filter_by_weight8x8(u, uv_stride, ud, uvd_stride, weight); vp9_filter_by_weight8x8(v, uv_stride, vd, uvd_stride, weight); } else if (block_size == BLOCK_32X32) { filter_by_weight32x32(y, y_stride, yd, yd_stride, weight); vp9_filter_by_weight16x16(u, uv_stride, ud, uvd_stride, weight); vp9_filter_by_weight16x16(v, uv_stride, vd, uvd_stride, weight); } else if (block_size == BLOCK_64X64) { filter_by_weight64x64(y, y_stride, yd, yd_stride, weight); filter_by_weight32x32(u, uv_stride, ud, uvd_stride, weight); filter_by_weight32x32(v, uv_stride, vd, uvd_stride, weight); } } // TODO(jackychen): Determine whether replace it with assembly code. static void copy_mem8x8(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride) { int r; for (r = 0; r < 8; r++) { memcpy(dst, src, 8); src += src_stride; dst += dst_stride; } } static void copy_mem16x16(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride) { int r; for (r = 0; r < 16; r++) { memcpy(dst, src, 16); src += src_stride; dst += dst_stride; } } static void copy_mem32x32(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride) { copy_mem16x16(src, src_stride, dst, dst_stride); copy_mem16x16(src + 16, src_stride, dst + 16, dst_stride); copy_mem16x16(src + src_stride * 16, src_stride, dst + dst_stride * 16, dst_stride); copy_mem16x16(src + src_stride * 16 + 16, src_stride, dst + dst_stride * 16 + 16, dst_stride); } static void copy_mem64x64(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride) { copy_mem32x32(src, src_stride, dst, dst_stride); copy_mem32x32(src + 32, src_stride, dst + 32, dst_stride); copy_mem32x32(src + src_stride * 32, src_stride, dst + src_stride * 32, dst_stride); copy_mem32x32(src + src_stride * 32 + 32, src_stride, dst + src_stride * 32 + 32, dst_stride); } static void copy_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, int uvd_stride, BLOCK_SIZE bs) { if (bs == BLOCK_16X16) { copy_mem16x16(y, y_stride, yd, yd_stride); copy_mem8x8(u, uv_stride, ud, uvd_stride); copy_mem8x8(v, uv_stride, vd, uvd_stride); } else if (bs == BLOCK_32X32) { copy_mem32x32(y, y_stride, yd, yd_stride); copy_mem16x16(u, uv_stride, ud, uvd_stride); copy_mem16x16(v, uv_stride, vd, uvd_stride); } else { copy_mem64x64(y, y_stride, yd, yd_stride); copy_mem32x32(u, uv_stride, ud, uvd_stride); copy_mem32x32(v, uv_stride, vd, uvd_stride); } } static void get_thr(BLOCK_SIZE bs, int qdiff, int *sad_thr, int *vdiff_thr) { const int adj = qdiff >> MFQE_PRECISION; if (bs == BLOCK_16X16) { *sad_thr = 7 + adj; } else if (bs == BLOCK_32X32) { *sad_thr = 6 + adj; } else { // BLOCK_64X64 *sad_thr = 5 + adj; } *vdiff_thr = 125 + qdiff; } static void mfqe_block(BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, const uint8_t *v, int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, int uvd_stride, int qdiff) { int sad, sad_thr, vdiff, vdiff_thr; uint32_t sse; get_thr(bs, qdiff, &sad_thr, &vdiff_thr); if (bs == BLOCK_16X16) { vdiff = (vpx_variance16x16(y, y_stride, yd, yd_stride, &sse) + 128) >> 8; sad = (vpx_sad16x16(y, y_stride, yd, yd_stride) + 128) >> 8; } else if (bs == BLOCK_32X32) { vdiff = (vpx_variance32x32(y, y_stride, yd, yd_stride, &sse) + 512) >> 10; sad = (vpx_sad32x32(y, y_stride, yd, yd_stride) + 512) >> 10; } else /* if (bs == BLOCK_64X64) */ { vdiff = (vpx_variance64x64(y, y_stride, yd, yd_stride, &sse) + 2048) >> 12; sad = (vpx_sad64x64(y, y_stride, yd, yd_stride) + 2048) >> 12; } // vdiff > sad * 3 means vdiff should not be too small, otherwise, // it might be a lighting change in smooth area. When there is a // lighting change in smooth area, it is dangerous to do MFQE. if (sad > 1 && vdiff > sad * 3) { const int weight = 1 << MFQE_PRECISION; int ifactor = weight * sad * vdiff / (sad_thr * vdiff_thr); // When ifactor equals weight, no MFQE is done. if (ifactor > weight) { ifactor = weight; } apply_ifactor(y, y_stride, yd, yd_stride, u, v, uv_stride, ud, vd, uvd_stride, bs, ifactor); } else { // Copy the block from current frame (i.e., no mfqe is done). copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride, bs); } } static int mfqe_decision(MODE_INFO *mi, BLOCK_SIZE cur_bs) { // Check the motion in current block(for inter frame), // or check the motion in the correlated block in last frame (for keyframe). const int mv_len_square = mi->mv[0].as_mv.row * mi->mv[0].as_mv.row + mi->mv[0].as_mv.col * mi->mv[0].as_mv.col; const int mv_threshold = 100; return mi->mode >= NEARESTMV && // Not an intra block cur_bs >= BLOCK_16X16 && mv_len_square <= mv_threshold; } // Process each partiton in a super block, recursively. static void mfqe_partition(VP9_COMMON *cm, MODE_INFO *mi, BLOCK_SIZE bs, const uint8_t *y, const uint8_t *u, const uint8_t *v, int y_stride, int uv_stride, uint8_t *yd, uint8_t *ud, uint8_t *vd, int yd_stride, int uvd_stride) { int mi_offset, y_offset, uv_offset; const BLOCK_SIZE cur_bs = mi->sb_type; const int qdiff = cm->base_qindex - cm->postproc_state.last_base_qindex; const int bsl = b_width_log2_lookup[bs]; PARTITION_TYPE partition = partition_lookup[bsl][cur_bs]; const BLOCK_SIZE subsize = get_subsize(bs, partition); if (cur_bs < BLOCK_8X8) { // If there are blocks smaller than 8x8, it must be on the boundary. return; } // No MFQE on blocks smaller than 16x16 if (bs == BLOCK_16X16) { partition = PARTITION_NONE; } if (bs == BLOCK_64X64) { mi_offset = 4; y_offset = 32; uv_offset = 16; } else { mi_offset = 2; y_offset = 16; uv_offset = 8; } switch (partition) { BLOCK_SIZE mfqe_bs, bs_tmp; case PARTITION_HORZ: if (bs == BLOCK_64X64) { mfqe_bs = BLOCK_64X32; bs_tmp = BLOCK_32X32; } else { mfqe_bs = BLOCK_32X16; bs_tmp = BLOCK_16X16; } if (mfqe_decision(mi, mfqe_bs)) { // Do mfqe on the first square partition. mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride, qdiff); // Do mfqe on the second square partition. mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride, uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride, qdiff); } if (mfqe_decision(mi + mi_offset * cm->mi_stride, mfqe_bs)) { // Do mfqe on the first square partition. mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, v + uv_offset * uv_stride, y_stride, uv_stride, yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); // Do mfqe on the second square partition. mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, u + uv_offset * uv_stride + uv_offset, v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride, yd + y_offset * yd_stride + y_offset, ud + uv_offset * uvd_stride + uv_offset, vd + uv_offset * uvd_stride + uv_offset, yd_stride, uvd_stride, qdiff); } break; case PARTITION_VERT: if (bs == BLOCK_64X64) { mfqe_bs = BLOCK_32X64; bs_tmp = BLOCK_32X32; } else { mfqe_bs = BLOCK_16X32; bs_tmp = BLOCK_16X16; } if (mfqe_decision(mi, mfqe_bs)) { // Do mfqe on the first square partition. mfqe_block(bs_tmp, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride, qdiff); // Do mfqe on the second square partition. mfqe_block(bs_tmp, y + y_offset * y_stride, u + uv_offset * uv_stride, v + uv_offset * uv_stride, y_stride, uv_stride, yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, vd + uv_offset * uvd_stride, yd_stride, uvd_stride, qdiff); } if (mfqe_decision(mi + mi_offset, mfqe_bs)) { // Do mfqe on the first square partition. mfqe_block(bs_tmp, y + y_offset, u + uv_offset, v + uv_offset, y_stride, uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride, qdiff); // Do mfqe on the second square partition. mfqe_block(bs_tmp, y + y_offset * y_stride + y_offset, u + uv_offset * uv_stride + uv_offset, v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride, yd + y_offset * yd_stride + y_offset, ud + uv_offset * uvd_stride + uv_offset, vd + uv_offset * uvd_stride + uv_offset, yd_stride, uvd_stride, qdiff); } break; case PARTITION_NONE: if (mfqe_decision(mi, cur_bs)) { // Do mfqe on this partition. mfqe_block(cur_bs, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride, qdiff); } else { // Copy the block from current frame(i.e., no mfqe is done). copy_block(y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride, bs); } break; case PARTITION_SPLIT: // Recursion on four square partitions, e.g. if bs is 64X64, // then look into four 32X32 blocks in it. mfqe_partition(cm, mi, subsize, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride); mfqe_partition(cm, mi + mi_offset, subsize, y + y_offset, u + uv_offset, v + uv_offset, y_stride, uv_stride, yd + y_offset, ud + uv_offset, vd + uv_offset, yd_stride, uvd_stride); mfqe_partition(cm, mi + mi_offset * cm->mi_stride, subsize, y + y_offset * y_stride, u + uv_offset * uv_stride, v + uv_offset * uv_stride, y_stride, uv_stride, yd + y_offset * yd_stride, ud + uv_offset * uvd_stride, vd + uv_offset * uvd_stride, yd_stride, uvd_stride); mfqe_partition(cm, mi + mi_offset * cm->mi_stride + mi_offset, subsize, y + y_offset * y_stride + y_offset, u + uv_offset * uv_stride + uv_offset, v + uv_offset * uv_stride + uv_offset, y_stride, uv_stride, yd + y_offset * yd_stride + y_offset, ud + uv_offset * uvd_stride + uv_offset, vd + uv_offset * uvd_stride + uv_offset, yd_stride, uvd_stride); break; default: assert(0); } } void vp9_mfqe(VP9_COMMON *cm) { int mi_row, mi_col; // Current decoded frame. const YV12_BUFFER_CONFIG *show = cm->frame_to_show; // Last decoded frame and will store the MFQE result. YV12_BUFFER_CONFIG *dest = &cm->post_proc_buffer; // Loop through each super block. for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { MODE_INFO *mi; MODE_INFO *mi_local = cm->mi + (mi_row * cm->mi_stride + mi_col); // Motion Info in last frame. MODE_INFO *mi_prev = cm->postproc_state.prev_mi + (mi_row * cm->mi_stride + mi_col); const uint32_t y_stride = show->y_stride; const uint32_t uv_stride = show->uv_stride; const uint32_t yd_stride = dest->y_stride; const uint32_t uvd_stride = dest->uv_stride; const uint32_t row_offset_y = mi_row << 3; const uint32_t row_offset_uv = mi_row << 2; const uint32_t col_offset_y = mi_col << 3; const uint32_t col_offset_uv = mi_col << 2; const uint8_t *y = show->y_buffer + row_offset_y * y_stride + col_offset_y; const uint8_t *u = show->u_buffer + row_offset_uv * uv_stride + col_offset_uv; const uint8_t *v = show->v_buffer + row_offset_uv * uv_stride + col_offset_uv; uint8_t *yd = dest->y_buffer + row_offset_y * yd_stride + col_offset_y; uint8_t *ud = dest->u_buffer + row_offset_uv * uvd_stride + col_offset_uv; uint8_t *vd = dest->v_buffer + row_offset_uv * uvd_stride + col_offset_uv; if (frame_is_intra_only(cm)) { mi = mi_prev; } else { mi = mi_local; } mfqe_partition(cm, mi, BLOCK_64X64, y, u, v, y_stride, uv_stride, yd, ud, vd, yd_stride, uvd_stride); } } } libvpx-1.8.2/vp9/common/vp9_mfqe.h000066400000000000000000000020321357355204000167340ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_MFQE_H_ #define VPX_VP9_COMMON_VP9_MFQE_H_ #ifdef __cplusplus extern "C" { #endif // Multiframe Quality Enhancement. // The aim for MFQE is to replace pixel blocks in the current frame with // the correlated pixel blocks (with higher quality) in the last frame. // The replacement can only be taken in stationary blocks by checking // the motion of the blocks and other conditions such as the SAD of // the current block and correlated block, the variance of the block // difference, etc. void vp9_mfqe(struct VP9Common *cm); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_MFQE_H_ libvpx-1.8.2/vp9/common/vp9_mv.h000066400000000000000000000025061357355204000164340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_MV_H_ #define VPX_VP9_COMMON_VP9_MV_H_ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_common.h" #ifdef __cplusplus extern "C" { #endif #define INVALID_MV 0x80008000 typedef struct mv { int16_t row; int16_t col; } MV; typedef union int_mv { uint32_t as_int; MV as_mv; } int_mv; /* facilitates faster equality tests and copies */ typedef struct mv32 { int32_t row; int32_t col; } MV32; static INLINE int is_zero_mv(const MV *mv) { return *((const uint32_t *)mv) == 0; } static INLINE int is_equal_mv(const MV *a, const MV *b) { return *((const uint32_t *)a) == *((const uint32_t *)b); } static INLINE void clamp_mv(MV *mv, int min_col, int max_col, int min_row, int max_row) { mv->col = clamp(mv->col, min_col, max_col); mv->row = clamp(mv->row, min_row, max_row); } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_MV_H_ libvpx-1.8.2/vp9/common/vp9_mvref_common.c000066400000000000000000000164131357355204000204760ustar00rootroot00000000000000 /* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_mvref_common.h" // This function searches the neighborhood of a given MB/SB // to try and find candidate reference vectors. static void find_mv_refs_idx(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int block, int mi_row, int mi_col, uint8_t *mode_context) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; int different_ref_found = 0; int context_counter = 0; const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; const TileInfo *const tile = &xd->tile; // Blank the reference vector list memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); // The nearest 2 blocks are treated differently // if the size < 8x8 we get the mv from the bmi substructure, // and we also need to keep a mode count. for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate_mi->mode]; different_ref_found = 1; if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), refmv_count, mv_ref_list, Done); else if (candidate_mi->ref_frame[1] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), refmv_count, mv_ref_list, Done); } } // Check the rest of the neighbors in much the same way // as before except we don't need to keep track of sub blocks or // mode counts. for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); else if (candidate_mi->ref_frame[1] == ref_frame) ADD_MV_REF_LIST(candidate_mi->mv[1], refmv_count, mv_ref_list, Done); } } // Check the last frame's mode and mv info. if (cm->use_prev_frame_mvs) { if (prev_frame_mvs->ref_frame[0] == ref_frame) { ADD_MV_REF_LIST(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { ADD_MV_REF_LIST(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); } } // Since we couldn't find 2 mvs from the same reference frame // go back through the neighbors and find motion vectors from // different reference frames. if (different_ref_found) { for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } } // Since we still don't have a candidate we'll try the last frame. if (cm->use_prev_frame_mvs) { if (prev_frame_mvs->ref_frame[0] != ref_frame && prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { int_mv mv = prev_frame_mvs->mv[0]; if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != ref_sign_bias[ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); } if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && prev_frame_mvs->ref_frame[1] != ref_frame && prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { int_mv mv = prev_frame_mvs->mv[1]; if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != ref_sign_bias[ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done); } } Done: mode_context[ref_frame] = counter_to_context[context_counter]; // Clamp vectors for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) clamp_mv_ref(&mv_ref_list[i].as_mv, xd); } void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, uint8_t *mode_context) { find_mv_refs_idx(cm, xd, mi, ref_frame, mv_ref_list, -1, mi_row, mi_col, mode_context); } void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv) { int i; // Make sure all the candidates are properly clamped etc for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) { lower_mv_precision(&mvlist[i].as_mv, allow_hp); clamp_mv2(&mvlist[i].as_mv, xd); } *nearest_mv = mvlist[0]; *near_mv = mvlist[1]; } void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest_mv, int_mv *near_mv, uint8_t *mode_context) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; assert(MAX_MV_REF_CANDIDATES == 2); find_mv_refs_idx(cm, xd, mi, mi->ref_frame[ref], mv_list, block, mi_row, mi_col, mode_context); near_mv->as_int = 0; switch (block) { case 0: nearest_mv->as_int = mv_list[0].as_int; near_mv->as_int = mv_list[1].as_int; break; case 1: case 2: nearest_mv->as_int = bmi[0].as_mv[ref].as_int; for (n = 0; n < MAX_MV_REF_CANDIDATES; ++n) if (nearest_mv->as_int != mv_list[n].as_int) { near_mv->as_int = mv_list[n].as_int; break; } break; case 3: { int_mv candidates[2 + MAX_MV_REF_CANDIDATES]; candidates[0] = bmi[1].as_mv[ref]; candidates[1] = bmi[0].as_mv[ref]; candidates[2] = mv_list[0]; candidates[3] = mv_list[1]; nearest_mv->as_int = bmi[2].as_mv[ref].as_int; for (n = 0; n < 2 + MAX_MV_REF_CANDIDATES; ++n) if (nearest_mv->as_int != candidates[n].as_int) { near_mv->as_int = candidates[n].as_int; break; } break; } default: assert(0 && "Invalid block index."); } } libvpx-1.8.2/vp9/common/vp9_mvref_common.h000066400000000000000000000230571357355204000205050ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ #define VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_blockd.h" #ifdef __cplusplus extern "C" { #endif #define LEFT_TOP_MARGIN ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) #define RIGHT_BOTTOM_MARGIN \ ((VP9_ENC_BORDER_IN_PIXELS - VP9_INTERP_EXTEND) << 3) #define MVREF_NEIGHBOURS 8 typedef struct position { int row; int col; } POSITION; typedef enum { BOTH_ZERO = 0, ZERO_PLUS_PREDICTED = 1, BOTH_PREDICTED = 2, NEW_PLUS_NON_INTRA = 3, BOTH_NEW = 4, INTRA_PLUS_NON_INTRA = 5, BOTH_INTRA = 6, INVALID_CASE = 9 } motion_vector_context; // This is used to figure out a context for the ref blocks. The code flattens // an array that would have 3 possible counts (0, 1 & 2) for 3 choices by // adding 9 for each intra block, 3 for each zero mv and 1 for each new // motion vector. This single number is then converted into a context // with a single lookup ( counter_to_context ). static const int mode_2_counter[MB_MODE_COUNT] = { 9, // DC_PRED 9, // V_PRED 9, // H_PRED 9, // D45_PRED 9, // D135_PRED 9, // D117_PRED 9, // D153_PRED 9, // D207_PRED 9, // D63_PRED 9, // TM_PRED 0, // NEARESTMV 0, // NEARMV 3, // ZEROMV 1, // NEWMV }; // There are 3^3 different combinations of 3 counts that can be either 0,1 or // 2. However the actual count can never be greater than 2 so the highest // counter we need is 18. 9 is an invalid counter that's never used. static const int counter_to_context[19] = { BOTH_PREDICTED, // 0 NEW_PLUS_NON_INTRA, // 1 BOTH_NEW, // 2 ZERO_PLUS_PREDICTED, // 3 NEW_PLUS_NON_INTRA, // 4 INVALID_CASE, // 5 BOTH_ZERO, // 6 INVALID_CASE, // 7 INVALID_CASE, // 8 INTRA_PLUS_NON_INTRA, // 9 INTRA_PLUS_NON_INTRA, // 10 INVALID_CASE, // 11 INTRA_PLUS_NON_INTRA, // 12 INVALID_CASE, // 13 INVALID_CASE, // 14 INVALID_CASE, // 15 INVALID_CASE, // 16 INVALID_CASE, // 17 BOTH_INTRA // 18 }; static const POSITION mv_ref_blocks[BLOCK_SIZES][MVREF_NEIGHBOURS] = { // 4X4 { { -1, 0 }, { 0, -1 }, { -1, -1 }, { -2, 0 }, { 0, -2 }, { -2, -1 }, { -1, -2 }, { -2, -2 } }, // 4X8 { { -1, 0 }, { 0, -1 }, { -1, -1 }, { -2, 0 }, { 0, -2 }, { -2, -1 }, { -1, -2 }, { -2, -2 } }, // 8X4 { { -1, 0 }, { 0, -1 }, { -1, -1 }, { -2, 0 }, { 0, -2 }, { -2, -1 }, { -1, -2 }, { -2, -2 } }, // 8X8 { { -1, 0 }, { 0, -1 }, { -1, -1 }, { -2, 0 }, { 0, -2 }, { -2, -1 }, { -1, -2 }, { -2, -2 } }, // 8X16 { { 0, -1 }, { -1, 0 }, { 1, -1 }, { -1, -1 }, { 0, -2 }, { -2, 0 }, { -2, -1 }, { -1, -2 } }, // 16X8 { { -1, 0 }, { 0, -1 }, { -1, 1 }, { -1, -1 }, { -2, 0 }, { 0, -2 }, { -1, -2 }, { -2, -1 } }, // 16X16 { { -1, 0 }, { 0, -1 }, { -1, 1 }, { 1, -1 }, { -1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 } }, // 16X32 { { 0, -1 }, { -1, 0 }, { 2, -1 }, { -1, -1 }, { -1, 1 }, { 0, -3 }, { -3, 0 }, { -3, -3 } }, // 32X16 { { -1, 0 }, { 0, -1 }, { -1, 2 }, { -1, -1 }, { 1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 } }, // 32X32 { { -1, 1 }, { 1, -1 }, { -1, 2 }, { 2, -1 }, { -1, -1 }, { -3, 0 }, { 0, -3 }, { -3, -3 } }, // 32X64 { { 0, -1 }, { -1, 0 }, { 4, -1 }, { -1, 2 }, { -1, -1 }, { 0, -3 }, { -3, 0 }, { 2, -1 } }, // 64X32 { { -1, 0 }, { 0, -1 }, { -1, 4 }, { 2, -1 }, { -1, -1 }, { -3, 0 }, { 0, -3 }, { -1, 2 } }, // 64X64 { { -1, 3 }, { 3, -1 }, { -1, 4 }, { 4, -1 }, { -1, -1 }, { -1, 0 }, { 0, -1 }, { -1, 6 } } }; static const int idx_n_column_to_subblock[4][2] = { { 1, 2 }, { 1, 3 }, { 3, 2 }, { 3, 3 } }; // clamp_mv_ref #define MV_BORDER (16 << 3) // Allow 16 pels in 1/8th pel units static INLINE void clamp_mv_ref(MV *mv, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - MV_BORDER, xd->mb_to_right_edge + MV_BORDER, xd->mb_to_top_edge - MV_BORDER, xd->mb_to_bottom_edge + MV_BORDER); } // This function returns either the appropriate sub block or block's mv // on whether the block_size < 8x8 and we have check_sub_blocks set. static INLINE int_mv get_sub_block_mv(const MODE_INFO *candidate, int which_mv, int search_col, int block_idx) { return block_idx >= 0 && candidate->sb_type < BLOCK_8X8 ? candidate ->bmi[idx_n_column_to_subblock[block_idx][search_col == 0]] .as_mv[which_mv] : candidate->mv[which_mv]; } // Performs mv sign inversion if indicated by the reference frame combination. static INLINE int_mv scale_mv(const MODE_INFO *mi, int ref, const MV_REFERENCE_FRAME this_ref_frame, const int *ref_sign_bias) { int_mv mv = mi->mv[ref]; if (ref_sign_bias[mi->ref_frame[ref]] != ref_sign_bias[this_ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } return mv; } // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector it will also // skip all additional processing and jump to Done! #define ADD_MV_REF_LIST(mv, refmv_count, mv_ref_list, Done) \ do { \ if (refmv_count) { \ if ((mv).as_int != (mv_ref_list)[0].as_int) { \ (mv_ref_list)[(refmv_count)] = (mv); \ goto Done; \ } \ } else { \ (mv_ref_list)[(refmv_count)++] = (mv); \ } \ } while (0) // If either reference frame is different, not INTRA, and they // are different from each other scale and add the mv to our list. #define IF_DIFF_REF_FRAME_ADD_MV(mbmi, ref_frame, ref_sign_bias, refmv_count, \ mv_ref_list, Done) \ do { \ if (is_inter_block(mbmi)) { \ if ((mbmi)->ref_frame[0] != (ref_frame)) \ ADD_MV_REF_LIST(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != (ref_frame) && \ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ ADD_MV_REF_LIST(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ } \ } while (0) // Checks that the given mi_row, mi_col and search point // are inside the borders of the tile. static INLINE int is_inside(const TileInfo *const tile, int mi_col, int mi_row, int mi_rows, const POSITION *mi_pos) { return !(mi_row + mi_pos->row < 0 || mi_col + mi_pos->col < tile->mi_col_start || mi_row + mi_pos->row >= mi_rows || mi_col + mi_pos->col >= tile->mi_col_end); } // TODO(jingning): this mv clamping function should be block size dependent. static INLINE void clamp_mv2(MV *mv, const MACROBLOCKD *xd) { clamp_mv(mv, xd->mb_to_left_edge - LEFT_TOP_MARGIN, xd->mb_to_right_edge + RIGHT_BOTTOM_MARGIN, xd->mb_to_top_edge - LEFT_TOP_MARGIN, xd->mb_to_bottom_edge + RIGHT_BOTTOM_MARGIN); } static INLINE void lower_mv_precision(MV *mv, int allow_hp) { const int use_hp = allow_hp && use_mv_hp(mv); if (!use_hp) { if (mv->row & 1) mv->row += (mv->row > 0 ? -1 : 1); if (mv->col & 1) mv->col += (mv->col > 0 ? -1 : 1); } } typedef void (*find_mv_refs_sync)(void *const data, int mi_row); void vp9_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int mi_row, int mi_col, uint8_t *mode_context); // check a list of motion vectors by sad score using a number rows of pixels // above and a number cols of pixels in the left to select the one with best // score to use as ref motion vector void vp9_find_best_ref_mvs(MACROBLOCKD *xd, int allow_hp, int_mv *mvlist, int_mv *nearest_mv, int_mv *near_mv); void vp9_append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, int block, int ref, int mi_row, int mi_col, int_mv *nearest_mv, int_mv *near_mv, uint8_t *mode_context); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_MVREF_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_onyxc_int.h000066400000000000000000000320071357355204000200230ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_ONYXC_INT_H_ #define VPX_VP9_COMMON_VP9_ONYXC_INT_H_ #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_util/vpx_thread.h" #include "./vp9_rtcd.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_frame_buffers.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_tile_common.h" #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif #ifdef __cplusplus extern "C" { #endif #define REFS_PER_FRAME 3 #define REF_FRAMES_LOG2 3 #define REF_FRAMES (1 << REF_FRAMES_LOG2) // 1 scratch frame for the new frame, REFS_PER_FRAME for scaled references on // the encoder. #define FRAME_BUFFERS (REF_FRAMES + 1 + REFS_PER_FRAME) #define FRAME_CONTEXTS_LOG2 2 #define FRAME_CONTEXTS (1 << FRAME_CONTEXTS_LOG2) #define NUM_PING_PONG_BUFFERS 2 extern const struct { PARTITION_CONTEXT above; PARTITION_CONTEXT left; } partition_context_lookup[BLOCK_SIZES]; typedef enum { SINGLE_REFERENCE = 0, COMPOUND_REFERENCE = 1, REFERENCE_MODE_SELECT = 2, REFERENCE_MODES = 3, } REFERENCE_MODE; typedef struct { int_mv mv[2]; MV_REFERENCE_FRAME ref_frame[2]; } MV_REF; typedef struct { int ref_count; MV_REF *mvs; int mi_rows; int mi_cols; uint8_t released; int frame_index; vpx_codec_frame_buffer_t raw_frame_buffer; YV12_BUFFER_CONFIG buf; } RefCntBuffer; typedef struct BufferPool { // Private data associated with the frame buffer callbacks. void *cb_priv; vpx_get_frame_buffer_cb_fn_t get_fb_cb; vpx_release_frame_buffer_cb_fn_t release_fb_cb; RefCntBuffer frame_bufs[FRAME_BUFFERS]; // Frame buffers allocated internally by the codec. InternalFrameBufferList int_frame_buffers; } BufferPool; typedef struct VP9Common { struct vpx_internal_error_info error; vpx_color_space_t color_space; vpx_color_range_t color_range; int width; int height; int render_width; int render_height; int last_width; int last_height; // TODO(jkoleszar): this implies chroma ss right now, but could vary per // plane. Revisit as part of the future change to YV12_BUFFER_CONFIG to // support additional planes. int subsampling_x; int subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth; // Marks if we need to use 16bit frame buffers. #endif YV12_BUFFER_CONFIG *frame_to_show; RefCntBuffer *prev_frame; // TODO(hkuang): Combine this with cur_buf in macroblockd. RefCntBuffer *cur_frame; int ref_frame_map[REF_FRAMES]; /* maps fb_idx to reference slot */ // Prepare ref_frame_map for the next frame. // Only used in frame parallel decode. int next_ref_frame_map[REF_FRAMES]; // TODO(jkoleszar): could expand active_ref_idx to 4, with 0 as intra, and // roll new_fb_idx into it. // Each frame can reference REFS_PER_FRAME buffers RefBuffer frame_refs[REFS_PER_FRAME]; int new_fb_idx; int cur_show_frame_fb_idx; #if CONFIG_VP9_POSTPROC YV12_BUFFER_CONFIG post_proc_buffer; YV12_BUFFER_CONFIG post_proc_buffer_int; #endif FRAME_TYPE last_frame_type; /* last frame's frame type for motion search.*/ FRAME_TYPE frame_type; int show_frame; int last_show_frame; int show_existing_frame; // Flag signaling that the frame is encoded using only INTRA modes. uint8_t intra_only; uint8_t last_intra_only; int allow_high_precision_mv; // Flag signaling that the frame context should be reset to default values. // 0 or 1 implies don't reset, 2 reset just the context specified in the // frame header, 3 reset all contexts. int reset_frame_context; // MBs, mb_rows/cols is in 16-pixel units; mi_rows/cols is in // MODE_INFO (8-pixel) units. int MBs; int mb_rows, mi_rows; int mb_cols, mi_cols; int mi_stride; /* profile settings */ TX_MODE tx_mode; int base_qindex; int y_dc_delta_q; int uv_dc_delta_q; int uv_ac_delta_q; int16_t y_dequant[MAX_SEGMENTS][2]; int16_t uv_dequant[MAX_SEGMENTS][2]; /* We allocate a MODE_INFO struct for each macroblock, together with an extra row on top and column on the left to simplify prediction. */ int mi_alloc_size; MODE_INFO *mip; /* Base of allocated array */ MODE_INFO *mi; /* Corresponds to upper left visible macroblock */ // TODO(agrange): Move prev_mi into encoder structure. // prev_mip and prev_mi will only be allocated in VP9 encoder. MODE_INFO *prev_mip; /* MODE_INFO array 'mip' from last decoded frame */ MODE_INFO *prev_mi; /* 'mi' from last frame (points into prev_mip) */ // Separate mi functions between encoder and decoder. int (*alloc_mi)(struct VP9Common *cm, int mi_size); void (*free_mi)(struct VP9Common *cm); void (*setup_mi)(struct VP9Common *cm); // Grid of pointers to 8x8 MODE_INFO structs. Any 8x8 not in the visible // area will be NULL. MODE_INFO **mi_grid_base; MODE_INFO **mi_grid_visible; MODE_INFO **prev_mi_grid_base; MODE_INFO **prev_mi_grid_visible; // Whether to use previous frame's motion vectors for prediction. int use_prev_frame_mvs; // Persistent mb segment id map used in prediction. int seg_map_idx; int prev_seg_map_idx; uint8_t *seg_map_array[NUM_PING_PONG_BUFFERS]; uint8_t *last_frame_seg_map; uint8_t *current_frame_seg_map; int seg_map_alloc_size; INTERP_FILTER interp_filter; loop_filter_info_n lf_info; int refresh_frame_context; /* Two state 0 = NO, 1 = YES */ int ref_frame_sign_bias[MAX_REF_FRAMES]; /* Two state 0, 1 */ struct loopfilter lf; struct segmentation seg; // Context probabilities for reference frame prediction MV_REFERENCE_FRAME comp_fixed_ref; MV_REFERENCE_FRAME comp_var_ref[2]; REFERENCE_MODE reference_mode; FRAME_CONTEXT *fc; /* this frame entropy */ FRAME_CONTEXT *frame_contexts; // FRAME_CONTEXTS unsigned int frame_context_idx; /* Context to use/update */ FRAME_COUNTS counts; unsigned int current_video_frame; BITSTREAM_PROFILE profile; // VPX_BITS_8 in profile 0 or 1, VPX_BITS_10 or VPX_BITS_12 in profile 2 or 3. vpx_bit_depth_t bit_depth; vpx_bit_depth_t dequant_bit_depth; // bit_depth of current dequantizer #if CONFIG_VP9_POSTPROC struct postproc_state postproc_state; #endif int error_resilient_mode; int frame_parallel_decoding_mode; int log2_tile_cols, log2_tile_rows; int byte_alignment; int skip_loop_filter; // External BufferPool passed from outside. BufferPool *buffer_pool; PARTITION_CONTEXT *above_seg_context; ENTROPY_CONTEXT *above_context; int above_context_alloc_cols; int lf_row; } VP9_COMMON; typedef struct { int frame_width; int frame_height; int render_frame_width; int render_frame_height; int mi_rows; int mi_cols; int mb_rows; int mb_cols; int num_mbs; vpx_bit_depth_t bit_depth; } FRAME_INFO; static INLINE void init_frame_info(FRAME_INFO *frame_info, const VP9_COMMON *cm) { frame_info->frame_width = cm->width; frame_info->frame_height = cm->height; frame_info->render_frame_width = cm->render_width; frame_info->render_frame_height = cm->render_height; frame_info->mi_cols = cm->mi_cols; frame_info->mi_rows = cm->mi_rows; frame_info->mb_cols = cm->mb_cols; frame_info->mb_rows = cm->mb_rows; frame_info->num_mbs = cm->MBs; frame_info->bit_depth = cm->bit_depth; // TODO(angiebird): Figure out how to get subsampling_x/y here } static INLINE YV12_BUFFER_CONFIG *get_buf_frame(VP9_COMMON *cm, int index) { if (index < 0 || index >= FRAME_BUFFERS) return NULL; if (cm->error.error_code != VPX_CODEC_OK) return NULL; return &cm->buffer_pool->frame_bufs[index].buf; } static INLINE YV12_BUFFER_CONFIG *get_ref_frame(VP9_COMMON *cm, int index) { if (index < 0 || index >= REF_FRAMES) return NULL; if (cm->ref_frame_map[index] < 0) return NULL; assert(cm->ref_frame_map[index] < FRAME_BUFFERS); return &cm->buffer_pool->frame_bufs[cm->ref_frame_map[index]].buf; } static INLINE YV12_BUFFER_CONFIG *get_frame_new_buffer(VP9_COMMON *cm) { return &cm->buffer_pool->frame_bufs[cm->new_fb_idx].buf; } static INLINE int get_free_fb(VP9_COMMON *cm) { RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; int i; for (i = 0; i < FRAME_BUFFERS; ++i) if (frame_bufs[i].ref_count == 0) break; if (i != FRAME_BUFFERS) { frame_bufs[i].ref_count = 1; } else { // Reset i to be INVALID_IDX to indicate no free buffer found. i = INVALID_IDX; } return i; } static INLINE void ref_cnt_fb(RefCntBuffer *bufs, int *idx, int new_idx) { const int ref_index = *idx; if (ref_index >= 0 && bufs[ref_index].ref_count > 0) bufs[ref_index].ref_count--; *idx = new_idx; bufs[new_idx].ref_count++; } static INLINE int mi_cols_aligned_to_sb(int n_mis) { return ALIGN_POWER_OF_TWO(n_mis, MI_BLOCK_SIZE_LOG2); } static INLINE int frame_is_intra_only(const VP9_COMMON *const cm) { return cm->frame_type == KEY_FRAME || cm->intra_only; } static INLINE void set_partition_probs(const VP9_COMMON *const cm, MACROBLOCKD *const xd) { xd->partition_probs = frame_is_intra_only(cm) ? &vp9_kf_partition_probs[0] : (const vpx_prob(*)[PARTITION_TYPES - 1]) cm->fc->partition_prob; } static INLINE void vp9_init_macroblockd(VP9_COMMON *cm, MACROBLOCKD *xd, tran_low_t *dqcoeff) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) { xd->plane[i].dqcoeff = dqcoeff; xd->above_context[i] = cm->above_context + i * sizeof(*cm->above_context) * 2 * mi_cols_aligned_to_sb(cm->mi_cols); if (get_plane_type(i) == PLANE_TYPE_Y) { memcpy(xd->plane[i].seg_dequant, cm->y_dequant, sizeof(cm->y_dequant)); } else { memcpy(xd->plane[i].seg_dequant, cm->uv_dequant, sizeof(cm->uv_dequant)); } xd->fc = cm->fc; } xd->above_seg_context = cm->above_seg_context; xd->mi_stride = cm->mi_stride; xd->error_info = &cm->error; set_partition_probs(cm, xd); } static INLINE const vpx_prob *get_partition_probs(const MACROBLOCKD *xd, int ctx) { return xd->partition_probs[ctx]; } static INLINE void set_skip_context(MACROBLOCKD *xd, int mi_row, int mi_col) { const int above_idx = mi_col * 2; const int left_idx = (mi_row * 2) & 15; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; pd->above_context = &xd->above_context[i][above_idx >> pd->subsampling_x]; pd->left_context = &xd->left_context[i][left_idx >> pd->subsampling_y]; } } static INLINE int calc_mi_size(int len) { // len is in mi units. return len + MI_BLOCK_SIZE; } static INLINE void set_mi_row_col(MACROBLOCKD *xd, const TileInfo *const tile, int mi_row, int bh, int mi_col, int bw, int mi_rows, int mi_cols) { xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = ((mi_rows - bh - mi_row) * MI_SIZE) * 8; xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); xd->mb_to_right_edge = ((mi_cols - bw - mi_col) * MI_SIZE) * 8; // Are edges available for intra prediction? xd->above_mi = (mi_row != 0) ? xd->mi[-xd->mi_stride] : NULL; xd->left_mi = (mi_col > tile->mi_col_start) ? xd->mi[-1] : NULL; } static INLINE void update_partition_context(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE subsize, BLOCK_SIZE bsize) { PARTITION_CONTEXT *const above_ctx = xd->above_seg_context + mi_col; PARTITION_CONTEXT *const left_ctx = xd->left_seg_context + (mi_row & MI_MASK); // num_4x4_blocks_wide_lookup[bsize] / 2 const int bs = num_8x8_blocks_wide_lookup[bsize]; // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition // bits of smaller block sizes to be zero. memset(above_ctx, partition_context_lookup[subsize].above, bs); memset(left_ctx, partition_context_lookup[subsize].left, bs); } static INLINE int partition_plane_context(const MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { const PARTITION_CONTEXT *above_ctx = xd->above_seg_context + mi_col; const PARTITION_CONTEXT *left_ctx = xd->left_seg_context + (mi_row & MI_MASK); const int bsl = mi_width_log2_lookup[bsize]; int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); assert(bsl >= 0); return (left * 2 + above) + bsl * PARTITION_PLOFFSET; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_ONYXC_INT_H_ libvpx-1.8.2/vp9/common/vp9_postproc.c000066400000000000000000000346171357355204000176660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "./vp9_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/postproc.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_postproc.h" #if CONFIG_VP9_POSTPROC static const uint8_t q_diff_thresh = 20; static const uint8_t last_q_thresh = 170; extern const int16_t vpx_rv[]; #if CONFIG_VP9_HIGHBITDEPTH static const int16_t kernel5[] = { 1, 1, 4, 1, 1 }; void vp9_highbd_post_proc_down_and_across_c(const uint16_t *src_ptr, uint16_t *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int rows, int cols, int flimit) { uint16_t const *p_src; uint16_t *p_dst; int row, col, i, v, kernel; int pitch = src_pixels_per_line; uint16_t d[8]; for (row = 0; row < rows; row++) { // post_proc_down for one row. p_src = src_ptr; p_dst = dst_ptr; for (col = 0; col < cols; col++) { kernel = 4; v = p_src[col]; for (i = -2; i <= 2; i++) { if (abs(v - p_src[col + i * pitch]) > flimit) goto down_skip_convolve; kernel += kernel5[2 + i] * p_src[col + i * pitch]; } v = (kernel >> 3); down_skip_convolve: p_dst[col] = v; } /* now post_proc_across */ p_src = dst_ptr; p_dst = dst_ptr; for (i = 0; i < 8; i++) d[i] = p_src[i]; for (col = 0; col < cols; col++) { kernel = 4; v = p_src[col]; d[col & 7] = v; for (i = -2; i <= 2; i++) { if (abs(v - p_src[col + i]) > flimit) goto across_skip_convolve; kernel += kernel5[2 + i] * p_src[col + i]; } d[col & 7] = (kernel >> 3); across_skip_convolve: if (col >= 2) p_dst[col - 2] = d[(col - 2) & 7]; } /* handle the last two pixels */ p_dst[col - 2] = d[(col - 2) & 7]; p_dst[col - 1] = d[(col - 1) & 7]; /* next row */ src_ptr += pitch; dst_ptr += dst_pixels_per_line; } } #endif // CONFIG_VP9_HIGHBITDEPTH static int q2mbl(int x) { if (x < 20) x = 20; x = 50 + (x - 50) * 10 / 8; return x * x / 3; } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_mbpost_proc_across_ip_c(uint16_t *src, int pitch, int rows, int cols, int flimit) { int r, c, i; uint16_t *s = src; uint16_t d[16]; for (r = 0; r < rows; r++) { int sumsq = 0; int sum = 0; for (i = -8; i <= 6; i++) { sumsq += s[i] * s[i]; sum += s[i]; d[i + 8] = 0; } for (c = 0; c < cols + 8; c++) { int x = s[c + 7] - s[c - 8]; int y = s[c + 7] + s[c - 8]; sum += x; sumsq += x * y; d[c & 15] = s[c]; if (sumsq * 15 - sum * sum < flimit) { d[c & 15] = (8 + sum + s[c]) >> 4; } s[c - 8] = d[(c - 8) & 15]; } s += pitch; } } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_mbpost_proc_down_c(uint16_t *dst, int pitch, int rows, int cols, int flimit) { int r, c, i; const int16_t *rv3 = &vpx_rv[63 & rand()]; // NOLINT for (c = 0; c < cols; c++) { uint16_t *s = &dst[c]; int sumsq = 0; int sum = 0; uint16_t d[16]; const int16_t *rv2 = rv3 + ((c * 17) & 127); for (i = -8; i <= 6; i++) { sumsq += s[i * pitch] * s[i * pitch]; sum += s[i * pitch]; } for (r = 0; r < rows + 8; r++) { sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; sum += s[7 * pitch] - s[-8 * pitch]; d[r & 15] = s[0]; if (sumsq * 15 - sum * sum < flimit) { d[r & 15] = (rv2[r & 127] + sum + s[0]) >> 4; } s[-8 * pitch] = d[(r - 8) & 15]; s += pitch; } } } #endif // CONFIG_VP9_HIGHBITDEPTH static void deblock_and_de_macro_block(VP9_COMMON *cm, YV12_BUFFER_CONFIG *source, YV12_BUFFER_CONFIG *post, int q, int low_var_thresh, int flag, uint8_t *limits) { (void)low_var_thresh; (void)flag; #if CONFIG_VP9_HIGHBITDEPTH if (source->flags & YV12_FLAG_HIGHBITDEPTH) { double level = 6.0e-05 * q * q * q - .0067 * q * q + .306 * q + .0065; int ppl = (int)(level + .5); vp9_highbd_post_proc_down_and_across( CONVERT_TO_SHORTPTR(source->y_buffer), CONVERT_TO_SHORTPTR(post->y_buffer), source->y_stride, post->y_stride, source->y_height, source->y_width, ppl); vp9_highbd_mbpost_proc_across_ip(CONVERT_TO_SHORTPTR(post->y_buffer), post->y_stride, post->y_height, post->y_width, q2mbl(q)); vp9_highbd_mbpost_proc_down(CONVERT_TO_SHORTPTR(post->y_buffer), post->y_stride, post->y_height, post->y_width, q2mbl(q)); vp9_highbd_post_proc_down_and_across( CONVERT_TO_SHORTPTR(source->u_buffer), CONVERT_TO_SHORTPTR(post->u_buffer), source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); vp9_highbd_post_proc_down_and_across( CONVERT_TO_SHORTPTR(source->v_buffer), CONVERT_TO_SHORTPTR(post->v_buffer), source->uv_stride, post->uv_stride, source->uv_height, source->uv_width, ppl); } else { #endif // CONFIG_VP9_HIGHBITDEPTH vp9_deblock(cm, source, post, q, limits); vpx_mbpost_proc_across_ip(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); vpx_mbpost_proc_down(post->y_buffer, post->y_stride, post->y_height, post->y_width, q2mbl(q)); #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH } void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { const int ppl = (int)(6.0e-05 * q * q * q - 0.0067 * q * q + 0.306 * q + 0.0065 + 0.5); #if CONFIG_VP9_HIGHBITDEPTH if (src->flags & YV12_FLAG_HIGHBITDEPTH) { int i; const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; const int src_widths[3] = { src->y_width, src->uv_width, src->uv_width }; const int src_heights[3] = { src->y_height, src->uv_height, src->uv_height }; uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; for (i = 0; i < MAX_MB_PLANE; ++i) { vp9_highbd_post_proc_down_and_across( CONVERT_TO_SHORTPTR(srcs[i]), CONVERT_TO_SHORTPTR(dsts[i]), src_strides[i], dst_strides[i], src_heights[i], src_widths[i], ppl); } } else { #endif // CONFIG_VP9_HIGHBITDEPTH int mbr; const int mb_rows = cm->mb_rows; const int mb_cols = cm->mb_cols; memset(limits, (unsigned char)ppl, 16 * mb_cols); for (mbr = 0; mbr < mb_rows; mbr++) { vpx_post_proc_down_and_across_mb_row( src->y_buffer + 16 * mbr * src->y_stride, dst->y_buffer + 16 * mbr * dst->y_stride, src->y_stride, dst->y_stride, src->y_width, limits, 16); vpx_post_proc_down_and_across_mb_row( src->u_buffer + 8 * mbr * src->uv_stride, dst->u_buffer + 8 * mbr * dst->uv_stride, src->uv_stride, dst->uv_stride, src->uv_width, limits, 8); vpx_post_proc_down_and_across_mb_row( src->v_buffer + 8 * mbr * src->uv_stride, dst->v_buffer + 8 * mbr * dst->uv_stride, src->uv_stride, dst->uv_stride, src->uv_width, limits, 8); } #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH } void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits) { vp9_deblock(cm, src, dst, q, limits); } static void swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. MODE_INFO *temp = cm->postproc_state.prev_mip; cm->postproc_state.prev_mip = cm->mip; cm->mip = temp; // Update the upper left visible macroblock ptrs. cm->mi = cm->mip + cm->mi_stride + 1; cm->postproc_state.prev_mi = cm->postproc_state.prev_mip + cm->mi_stride + 1; } int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags, int unscaled_width) { const int q = VPXMIN(105, cm->lf.filter_level * 2); const int flags = ppflags->post_proc_flag; YV12_BUFFER_CONFIG *const ppbuf = &cm->post_proc_buffer; struct postproc_state *const ppstate = &cm->postproc_state; if (!cm->frame_to_show) return -1; if (!flags) { *dest = *cm->frame_to_show; return 0; } vpx_clear_system_state(); // Alloc memory for prev_mip in the first frame. if (cm->current_video_frame == 1) { ppstate->last_base_qindex = cm->base_qindex; ppstate->last_frame_valid = 1; } if ((flags & VP9D_MFQE) && ppstate->prev_mip == NULL) { ppstate->prev_mip = vpx_calloc(cm->mi_alloc_size, sizeof(*cm->mip)); if (!ppstate->prev_mip) { return 1; } ppstate->prev_mi = ppstate->prev_mip + cm->mi_stride + 1; } // Allocate post_proc_buffer_int if needed. if ((flags & VP9D_MFQE) && !cm->post_proc_buffer_int.buffer_alloc) { if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { const int width = ALIGN_POWER_OF_TWO(cm->width, 4); const int height = ALIGN_POWER_OF_TWO(cm->height, 4); if (vpx_alloc_frame_buffer(&cm->post_proc_buffer_int, width, height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif // CONFIG_VP9_HIGHBITDEPTH VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate MFQE framebuffer"); } // Ensure that postproc is set to all 0s so that post proc // doesn't pull random data in from edge. memset(cm->post_proc_buffer_int.buffer_alloc, 128, cm->post_proc_buffer.frame_size); } } if (vpx_realloc_frame_buffer(&cm->post_proc_buffer, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL) < 0) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate post-processing buffer"); if (flags & (VP9D_DEMACROBLOCK | VP9D_DEBLOCK)) { if (!cm->postproc_state.limits) { cm->postproc_state.limits = vpx_calloc(unscaled_width, sizeof(*cm->postproc_state.limits)); } } if (flags & VP9D_ADDNOISE) { if (!cm->postproc_state.generated_noise) { cm->postproc_state.generated_noise = vpx_calloc( cm->width + 256, sizeof(*cm->postproc_state.generated_noise)); if (!cm->postproc_state.generated_noise) return 1; } } if ((flags & VP9D_MFQE) && cm->current_video_frame >= 2 && ppstate->last_frame_valid && cm->bit_depth == 8 && ppstate->last_base_qindex <= last_q_thresh && cm->base_qindex - ppstate->last_base_qindex >= q_diff_thresh) { vp9_mfqe(cm); // TODO(jackychen): Consider whether enable deblocking by default // if mfqe is enabled. Need to take both the quality and the speed // into consideration. if ((flags & VP9D_DEMACROBLOCK) || (flags & VP9D_DEBLOCK)) { vpx_yv12_copy_frame(ppbuf, &cm->post_proc_buffer_int); } if ((flags & VP9D_DEMACROBLOCK) && cm->post_proc_buffer_int.buffer_alloc) { deblock_and_de_macro_block(cm, &cm->post_proc_buffer_int, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { vp9_deblock(cm, &cm->post_proc_buffer_int, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(&cm->post_proc_buffer_int, ppbuf); } } else if (flags & VP9D_DEMACROBLOCK) { deblock_and_de_macro_block(cm, cm->frame_to_show, ppbuf, q + (ppflags->deblocking_level - 5) * 10, 1, 0, cm->postproc_state.limits); } else if (flags & VP9D_DEBLOCK) { vp9_deblock(cm, cm->frame_to_show, ppbuf, q, cm->postproc_state.limits); } else { vpx_yv12_copy_frame(cm->frame_to_show, ppbuf); } ppstate->last_base_qindex = cm->base_qindex; ppstate->last_frame_valid = 1; if (flags & VP9D_ADDNOISE) { const int noise_level = ppflags->noise_level; if (ppstate->last_q != q || ppstate->last_noise != noise_level) { double sigma; vpx_clear_system_state(); sigma = noise_level + .5 + .6 * q / 63.0; ppstate->clamp = vpx_setup_noise(sigma, ppstate->generated_noise, cm->width + 256); ppstate->last_q = q; ppstate->last_noise = noise_level; } vpx_plane_add_noise(ppbuf->y_buffer, ppstate->generated_noise, ppstate->clamp, ppstate->clamp, ppbuf->y_width, ppbuf->y_height, ppbuf->y_stride); } *dest = *ppbuf; /* handle problem with extending borders */ dest->y_width = cm->width; dest->y_height = cm->height; dest->uv_width = dest->y_width >> cm->subsampling_x; dest->uv_height = dest->y_height >> cm->subsampling_y; if (flags & VP9D_MFQE) swap_mi_and_prev_mi(cm); return 0; } #endif // CONFIG_VP9_POSTPROC libvpx-1.8.2/vp9/common/vp9_postproc.h000066400000000000000000000026621357355204000176660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_POSTPROC_H_ #define VPX_VP9_COMMON_VP9_POSTPROC_H_ #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_mfqe.h" #include "vp9/common/vp9_ppflags.h" #ifdef __cplusplus extern "C" { #endif struct postproc_state { int last_q; int last_noise; int last_base_qindex; int last_frame_valid; MODE_INFO *prev_mip; MODE_INFO *prev_mi; int clamp; uint8_t *limits; int8_t *generated_noise; }; struct VP9Common; #define MFQE_PRECISION 4 int vp9_post_proc_frame(struct VP9Common *cm, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *ppflags, int unscaled_width); void vp9_denoise(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); void vp9_deblock(struct VP9Common *cm, const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int q, uint8_t *limits); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_POSTPROC_H_ libvpx-1.8.2/vp9/common/vp9_ppflags.h000066400000000000000000000015231357355204000174440ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_PPFLAGS_H_ #define VPX_VP9_COMMON_VP9_PPFLAGS_H_ #ifdef __cplusplus extern "C" { #endif enum { VP9D_NOFILTERING = 0, VP9D_DEBLOCK = 1 << 0, VP9D_DEMACROBLOCK = 1 << 1, VP9D_ADDNOISE = 1 << 2, VP9D_MFQE = 1 << 3 }; typedef struct { int post_proc_flag; int deblocking_level; int noise_level; } vp9_ppflags_t; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_PPFLAGS_H_ libvpx-1.8.2/vp9/common/vp9_pred_common.c000066400000000000000000000310251357355204000203050ustar00rootroot00000000000000 /* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" int vp9_compound_reference_allowed(const VP9_COMMON *cm) { int i; for (i = 1; i < REFS_PER_FRAME; ++i) if (cm->ref_frame_sign_bias[i + 1] != cm->ref_frame_sign_bias[1]) return 1; return 0; } void vp9_setup_compound_reference_mode(VP9_COMMON *cm) { if (cm->ref_frame_sign_bias[LAST_FRAME] == cm->ref_frame_sign_bias[GOLDEN_FRAME]) { cm->comp_fixed_ref = ALTREF_FRAME; cm->comp_var_ref[0] = LAST_FRAME; cm->comp_var_ref[1] = GOLDEN_FRAME; } else if (cm->ref_frame_sign_bias[LAST_FRAME] == cm->ref_frame_sign_bias[ALTREF_FRAME]) { cm->comp_fixed_ref = GOLDEN_FRAME; cm->comp_var_ref[0] = LAST_FRAME; cm->comp_var_ref[1] = ALTREF_FRAME; } else { cm->comp_fixed_ref = LAST_FRAME; cm->comp_var_ref[0] = GOLDEN_FRAME; cm->comp_var_ref[1] = ALTREF_FRAME; } } int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int ctx; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int has_above = !!above_mi; const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available if (!has_second_ref(above_mi) && !has_second_ref(left_mi)) // neither edge uses comp pred (0/1) ctx = (above_mi->ref_frame[0] == cm->comp_fixed_ref) ^ (left_mi->ref_frame[0] == cm->comp_fixed_ref); else if (!has_second_ref(above_mi)) // one of two edges uses comp pred (2/3) ctx = 2 + (above_mi->ref_frame[0] == cm->comp_fixed_ref || !is_inter_block(above_mi)); else if (!has_second_ref(left_mi)) // one of two edges uses comp pred (2/3) ctx = 2 + (left_mi->ref_frame[0] == cm->comp_fixed_ref || !is_inter_block(left_mi)); else // both edges use comp pred (4) ctx = 4; } else if (has_above || has_left) { // one edge available const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; if (!has_second_ref(edge_mi)) // edge does not use comp pred (0/1) ctx = edge_mi->ref_frame[0] == cm->comp_fixed_ref; else // edge uses comp pred (3) ctx = 3; } else { // no edges available (1) ctx = 1; } assert(ctx >= 0 && ctx < COMP_INTER_CONTEXTS); return ctx; } // Returns a context number for the given MB prediction signal int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, const MACROBLOCKD *xd) { int pred_context; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int above_in_image = !!above_mi; const int left_in_image = !!left_mi; // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. const int fix_ref_idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int var_ref_idx = !fix_ref_idx; if (above_in_image && left_in_image) { // both edges available const int above_intra = !is_inter_block(above_mi); const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra (2) pred_context = 2; } else if (above_intra || left_intra) { // intra/inter const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; if (!has_second_ref(edge_mi)) // single pred (1/3) pred_context = 1 + 2 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); else // comp pred (1/3) pred_context = 1 + 2 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); } else { // inter/inter const int l_sg = !has_second_ref(left_mi); const int a_sg = !has_second_ref(above_mi); const MV_REFERENCE_FRAME vrfa = a_sg ? above_mi->ref_frame[0] : above_mi->ref_frame[var_ref_idx]; const MV_REFERENCE_FRAME vrfl = l_sg ? left_mi->ref_frame[0] : left_mi->ref_frame[var_ref_idx]; if (vrfa == vrfl && cm->comp_var_ref[1] == vrfa) { pred_context = 0; } else if (l_sg && a_sg) { // single/single if ((vrfa == cm->comp_fixed_ref && vrfl == cm->comp_var_ref[0]) || (vrfl == cm->comp_fixed_ref && vrfa == cm->comp_var_ref[0])) pred_context = 4; else if (vrfa == vrfl) pred_context = 3; else pred_context = 1; } else if (l_sg || a_sg) { // single/comp const MV_REFERENCE_FRAME vrfc = l_sg ? vrfa : vrfl; const MV_REFERENCE_FRAME rfs = a_sg ? vrfa : vrfl; if (vrfc == cm->comp_var_ref[1] && rfs != cm->comp_var_ref[1]) pred_context = 1; else if (rfs == cm->comp_var_ref[1] && vrfc != cm->comp_var_ref[1]) pred_context = 2; else pred_context = 4; } else if (vrfa == vrfl) { // comp/comp pred_context = 4; } else { pred_context = 2; } } } else if (above_in_image || left_in_image) { // one edge available const MODE_INFO *edge_mi = above_in_image ? above_mi : left_mi; if (!is_inter_block(edge_mi)) { pred_context = 2; } else { if (has_second_ref(edge_mi)) pred_context = 4 * (edge_mi->ref_frame[var_ref_idx] != cm->comp_var_ref[1]); else pred_context = 3 * (edge_mi->ref_frame[0] != cm->comp_var_ref[1]); } } else { // no edges available (2) pred_context = 2; } assert(pred_context >= 0 && pred_context < REF_CONTEXTS); return pred_context; } int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd) { int pred_context; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int has_above = !!above_mi; const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available const int above_intra = !is_inter_block(above_mi); const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; if (!has_second_ref(edge_mi)) pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || edge_mi->ref_frame[1] == LAST_FRAME); } else { // inter/inter const int above_has_second = has_second_ref(above_mi); const int left_has_second = has_second_ref(left_mi); const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { pred_context = 1 + (above0 == LAST_FRAME || above1 == LAST_FRAME || left0 == LAST_FRAME || left1 == LAST_FRAME); } else if (above_has_second || left_has_second) { const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; if (rfs == LAST_FRAME) pred_context = 3 + (crf1 == LAST_FRAME || crf2 == LAST_FRAME); else pred_context = (crf1 == LAST_FRAME || crf2 == LAST_FRAME); } else { pred_context = 2 * (above0 == LAST_FRAME) + 2 * (left0 == LAST_FRAME); } } } else if (has_above || has_left) { // one edge available const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; if (!is_inter_block(edge_mi)) { // intra pred_context = 2; } else { // inter if (!has_second_ref(edge_mi)) pred_context = 4 * (edge_mi->ref_frame[0] == LAST_FRAME); else pred_context = 1 + (edge_mi->ref_frame[0] == LAST_FRAME || edge_mi->ref_frame[1] == LAST_FRAME); } } else { // no edges available pred_context = 2; } assert(pred_context >= 0 && pred_context < REF_CONTEXTS); return pred_context; } int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd) { int pred_context; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int has_above = !!above_mi; const int has_left = !!left_mi; // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. if (has_above && has_left) { // both edges available const int above_intra = !is_inter_block(above_mi); const int left_intra = !is_inter_block(left_mi); if (above_intra && left_intra) { // intra/intra pred_context = 2; } else if (above_intra || left_intra) { // intra/inter or inter/intra const MODE_INFO *edge_mi = above_intra ? left_mi : above_mi; if (!has_second_ref(edge_mi)) { if (edge_mi->ref_frame[0] == LAST_FRAME) pred_context = 3; else pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); } else { pred_context = 1 + 2 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || edge_mi->ref_frame[1] == GOLDEN_FRAME); } } else { // inter/inter const int above_has_second = has_second_ref(above_mi); const int left_has_second = has_second_ref(left_mi); const MV_REFERENCE_FRAME above0 = above_mi->ref_frame[0]; const MV_REFERENCE_FRAME above1 = above_mi->ref_frame[1]; const MV_REFERENCE_FRAME left0 = left_mi->ref_frame[0]; const MV_REFERENCE_FRAME left1 = left_mi->ref_frame[1]; if (above_has_second && left_has_second) { if (above0 == left0 && above1 == left1) pred_context = 3 * (above0 == GOLDEN_FRAME || above1 == GOLDEN_FRAME || left0 == GOLDEN_FRAME || left1 == GOLDEN_FRAME); else pred_context = 2; } else if (above_has_second || left_has_second) { const MV_REFERENCE_FRAME rfs = !above_has_second ? above0 : left0; const MV_REFERENCE_FRAME crf1 = above_has_second ? above0 : left0; const MV_REFERENCE_FRAME crf2 = above_has_second ? above1 : left1; if (rfs == GOLDEN_FRAME) pred_context = 3 + (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); else if (rfs == ALTREF_FRAME) pred_context = crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME; else pred_context = 1 + 2 * (crf1 == GOLDEN_FRAME || crf2 == GOLDEN_FRAME); } else { if (above0 == LAST_FRAME && left0 == LAST_FRAME) { pred_context = 3; } else if (above0 == LAST_FRAME || left0 == LAST_FRAME) { const MV_REFERENCE_FRAME edge0 = (above0 == LAST_FRAME) ? left0 : above0; pred_context = 4 * (edge0 == GOLDEN_FRAME); } else { pred_context = 2 * (above0 == GOLDEN_FRAME) + 2 * (left0 == GOLDEN_FRAME); } } } } else if (has_above || has_left) { // one edge available const MODE_INFO *edge_mi = has_above ? above_mi : left_mi; if (!is_inter_block(edge_mi) || (edge_mi->ref_frame[0] == LAST_FRAME && !has_second_ref(edge_mi))) pred_context = 2; else if (!has_second_ref(edge_mi)) pred_context = 4 * (edge_mi->ref_frame[0] == GOLDEN_FRAME); else pred_context = 3 * (edge_mi->ref_frame[0] == GOLDEN_FRAME || edge_mi->ref_frame[1] == GOLDEN_FRAME); } else { // no edges available (2) pred_context = 2; } assert(pred_context >= 0 && pred_context < REF_CONTEXTS); return pred_context; } libvpx-1.8.2/vp9/common/vp9_pred_common.h000066400000000000000000000166771357355204000203320ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_PRED_COMMON_H_ #define VPX_VP9_COMMON_VP9_PRED_COMMON_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_onyxc_int.h" #include "vpx_dsp/vpx_dsp_common.h" #ifdef __cplusplus extern "C" { #endif static INLINE int get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, BLOCK_SIZE bsize, int mi_row, int mi_col) { const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); int x, y, segment_id = MAX_SEGMENTS; for (y = 0; y < ymis; ++y) for (x = 0; x < xmis; ++x) segment_id = VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); return segment_id; } static INLINE int vp9_get_pred_context_seg_id(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int above_sip = (above_mi != NULL) ? above_mi->seg_id_predicted : 0; const int left_sip = (left_mi != NULL) ? left_mi->seg_id_predicted : 0; return above_sip + left_sip; } static INLINE vpx_prob vp9_get_pred_prob_seg_id(const struct segmentation *seg, const MACROBLOCKD *xd) { return seg->pred_probs[vp9_get_pred_context_seg_id(xd)]; } static INLINE int vp9_get_skip_context(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int above_skip = (above_mi != NULL) ? above_mi->skip : 0; const int left_skip = (left_mi != NULL) ? left_mi->skip : 0; return above_skip + left_skip; } static INLINE vpx_prob vp9_get_skip_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { return cm->fc->skip_probs[vp9_get_skip_context(xd)]; } // Returns a context number for the given MB prediction signal static INLINE int get_pred_context_switchable_interp(const MACROBLOCKD *xd) { // Note: // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. const MODE_INFO *const left_mi = xd->left_mi; const int left_type = left_mi ? left_mi->interp_filter : SWITCHABLE_FILTERS; const MODE_INFO *const above_mi = xd->above_mi; const int above_type = above_mi ? above_mi->interp_filter : SWITCHABLE_FILTERS; if (left_type == above_type) return left_type; else if (left_type == SWITCHABLE_FILTERS) return above_type; else if (above_type == SWITCHABLE_FILTERS) return left_type; else return SWITCHABLE_FILTERS; } // The mode info data structure has a one element border above and to the // left of the entries corresponding to real macroblocks. // The prediction flags in these dummy entries are initialized to 0. // 0 - inter/inter, inter/--, --/inter, --/-- // 1 - intra/inter, inter/intra // 2 - intra/--, --/intra // 3 - intra/intra static INLINE int get_intra_inter_context(const MACROBLOCKD *xd) { const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int has_above = !!above_mi; const int has_left = !!left_mi; if (has_above && has_left) { // both edges available const int above_intra = !is_inter_block(above_mi); const int left_intra = !is_inter_block(left_mi); return left_intra && above_intra ? 3 : left_intra || above_intra; } else if (has_above || has_left) { // one edge available return 2 * !is_inter_block(has_above ? above_mi : left_mi); } return 0; } static INLINE vpx_prob vp9_get_intra_inter_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { return cm->fc->intra_inter_prob[get_intra_inter_context(xd)]; } int vp9_get_reference_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd); static INLINE vpx_prob vp9_get_reference_mode_prob(const VP9_COMMON *cm, const MACROBLOCKD *xd) { return cm->fc->comp_inter_prob[vp9_get_reference_mode_context(cm, xd)]; } int vp9_get_pred_context_comp_ref_p(const VP9_COMMON *cm, const MACROBLOCKD *xd); static INLINE vpx_prob vp9_get_pred_prob_comp_ref_p(const VP9_COMMON *cm, const MACROBLOCKD *xd) { const int pred_context = vp9_get_pred_context_comp_ref_p(cm, xd); return cm->fc->comp_ref_prob[pred_context]; } int vp9_get_pred_context_single_ref_p1(const MACROBLOCKD *xd); static INLINE vpx_prob vp9_get_pred_prob_single_ref_p1(const VP9_COMMON *cm, const MACROBLOCKD *xd) { return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p1(xd)][0]; } int vp9_get_pred_context_single_ref_p2(const MACROBLOCKD *xd); static INLINE vpx_prob vp9_get_pred_prob_single_ref_p2(const VP9_COMMON *cm, const MACROBLOCKD *xd) { return cm->fc->single_ref_prob[vp9_get_pred_context_single_ref_p2(xd)][1]; } int vp9_compound_reference_allowed(const VP9_COMMON *cm); void vp9_setup_compound_reference_mode(VP9_COMMON *cm); // Returns a context number for the given MB prediction signal // The mode info data structure has a one element border above and to the // left of the entries corresponding to real blocks. // The prediction flags in these dummy entries are initialized to 0. static INLINE int get_tx_size_context(const MACROBLOCKD *xd) { const int max_tx_size = max_txsize_lookup[xd->mi[0]->sb_type]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const int has_above = !!above_mi; const int has_left = !!left_mi; int above_ctx = (has_above && !above_mi->skip) ? (int)above_mi->tx_size : max_tx_size; int left_ctx = (has_left && !left_mi->skip) ? (int)left_mi->tx_size : max_tx_size; if (!has_left) left_ctx = above_ctx; if (!has_above) above_ctx = left_ctx; return (above_ctx + left_ctx) > max_tx_size; } static INLINE const vpx_prob *get_tx_probs(TX_SIZE max_tx_size, int ctx, const struct tx_probs *tx_probs) { switch (max_tx_size) { case TX_8X8: return tx_probs->p8x8[ctx]; case TX_16X16: return tx_probs->p16x16[ctx]; case TX_32X32: return tx_probs->p32x32[ctx]; default: assert(0 && "Invalid max_tx_size."); return NULL; } } static INLINE unsigned int *get_tx_counts(TX_SIZE max_tx_size, int ctx, struct tx_counts *tx_counts) { switch (max_tx_size) { case TX_8X8: return tx_counts->p8x8[ctx]; case TX_16X16: return tx_counts->p16x16[ctx]; case TX_32X32: return tx_counts->p32x32[ctx]; default: assert(0 && "Invalid max_tx_size."); return NULL; } } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_PRED_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_quant_common.c000066400000000000000000000276401357355204000205130ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" static const int16_t dc_qlookup[QINDEX_RANGE] = { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336, }; #if CONFIG_VP9_HIGHBITDEPTH static const int16_t dc_qlookup_10[QINDEX_RANGE] = { 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347, }; static const int16_t dc_qlookup_12[QINDEX_RANGE] = { 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387, }; #endif static const int16_t ac_qlookup[QINDEX_RANGE] = { 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828, }; #if CONFIG_VP9_HIGHBITDEPTH static const int16_t ac_qlookup_10[QINDEX_RANGE] = { 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312, }; static const int16_t ac_qlookup_12[QINDEX_RANGE] = { 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247, }; #endif int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; case VPX_BITS_10: return dc_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; case VPX_BITS_12: return dc_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else (void)bit_depth; return dc_qlookup[clamp(qindex + delta, 0, MAXQ)]; #endif } int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth) { #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; case VPX_BITS_10: return ac_qlookup_10[clamp(qindex + delta, 0, MAXQ)]; case VPX_BITS_12: return ac_qlookup_12[clamp(qindex + delta, 0, MAXQ)]; default: assert(0 && "bit_depth should be VPX_BITS_8, VPX_BITS_10 or VPX_BITS_12"); return -1; } #else (void)bit_depth; return ac_qlookup[clamp(qindex + delta, 0, MAXQ)]; #endif } int vp9_get_qindex(const struct segmentation *seg, int segment_id, int base_qindex) { if (segfeature_active(seg, segment_id, SEG_LVL_ALT_Q)) { const int data = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); const int seg_qindex = seg->abs_delta == SEGMENT_ABSDATA ? data : base_qindex + data; return clamp(seg_qindex, 0, MAXQ); } else { return base_qindex; } } libvpx-1.8.2/vp9/common/vp9_quant_common.h000066400000000000000000000020201357355204000205010ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ #define VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ #include "vpx/vpx_codec.h" #include "vp9/common/vp9_seg_common.h" #ifdef __cplusplus extern "C" { #endif #define MINQ 0 #define MAXQ 255 #define QINDEX_RANGE (MAXQ - MINQ + 1) #define QINDEX_BITS 8 int16_t vp9_dc_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); int16_t vp9_ac_quant(int qindex, int delta, vpx_bit_depth_t bit_depth); int vp9_get_qindex(const struct segmentation *seg, int segment_id, int base_qindex); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_QUANT_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_reconinter.c000066400000000000000000000276131357355204000201630ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_scale_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_build_inter_predictor( const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y, int bd) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); const int subpel_x = mv.col & SUBPEL_MASK; const int subpel_y = mv.row & SUBPEL_MASK; src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); highbd_inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y) { const int is_q4 = precision == MV_PRECISION_Q4; const MV mv_q4 = { is_q4 ? src_mv->row : src_mv->row * 2, is_q4 ? src_mv->col : src_mv->col * 2 }; MV32 mv = vp9_scale_mv(&mv_q4, x, y, sf); const int subpel_x = mv.col & SUBPEL_MASK; const int subpel_y = mv.row & SUBPEL_MASK; src += (mv.row >> SUBPEL_BITS) * src_stride + (mv.col >> SUBPEL_BITS); inter_predictor(src, src_stride, dst, dst_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, sf->x_step_q4, sf->y_step_q4); } static INLINE int round_mv_comp_q4(int value) { return (value < 0 ? value - 2 : value + 2) / 4; } static MV mi_mv_pred_q4(const MODE_INFO *mi, int idx) { MV res = { round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.row + mi->bmi[1].as_mv[idx].as_mv.row + mi->bmi[2].as_mv[idx].as_mv.row + mi->bmi[3].as_mv[idx].as_mv.row), round_mv_comp_q4(mi->bmi[0].as_mv[idx].as_mv.col + mi->bmi[1].as_mv[idx].as_mv.col + mi->bmi[2].as_mv[idx].as_mv.col + mi->bmi[3].as_mv[idx].as_mv.col) }; return res; } static INLINE int round_mv_comp_q2(int value) { return (value < 0 ? value - 1 : value + 1) / 2; } static MV mi_mv_pred_q2(const MODE_INFO *mi, int idx, int block0, int block1) { MV res = { round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.row + mi->bmi[block1].as_mv[idx].as_mv.row), round_mv_comp_q2(mi->bmi[block0].as_mv[idx].as_mv.col + mi->bmi[block1].as_mv[idx].as_mv.col) }; return res; } // TODO(jkoleszar): yet another mv clamping function :-( MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw, int bh, int ss_x, int ss_y) { // If the MV points so far into the UMV border that no visible pixels // are used for reconstruction, the subpel part of the MV can be // discarded and the MV limited to 16 pixels with equivalent results. const int spel_left = (VP9_INTERP_EXTEND + bw) << SUBPEL_BITS; const int spel_right = spel_left - SUBPEL_SHIFTS; const int spel_top = (VP9_INTERP_EXTEND + bh) << SUBPEL_BITS; const int spel_bottom = spel_top - SUBPEL_SHIFTS; MV clamped_mv = { (short)(src_mv->row * (1 << (1 - ss_y))), (short)(src_mv->col * (1 << (1 - ss_x))) }; assert(ss_x <= 1); assert(ss_y <= 1); clamp_mv(&clamped_mv, xd->mb_to_left_edge * (1 << (1 - ss_x)) - spel_left, xd->mb_to_right_edge * (1 << (1 - ss_x)) + spel_right, xd->mb_to_top_edge * (1 << (1 - ss_y)) - spel_top, xd->mb_to_bottom_edge * (1 << (1 - ss_y)) + spel_bottom); return clamped_mv; } MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, int ref, int block) { const int ss_idx = ((pd->subsampling_x > 0) << 1) | (pd->subsampling_y > 0); MV res = { 0, 0 }; switch (ss_idx) { case 0: res = mi->bmi[block].as_mv[ref].as_mv; break; case 1: res = mi_mv_pred_q2(mi, ref, block, block + 2); break; case 2: res = mi_mv_pred_q2(mi, ref, block, block + 1); break; case 3: res = mi_mv_pred_q4(mi, ref); break; default: assert(ss_idx <= 3 && ss_idx >= 0); } return res; } static void build_inter_predictors(MACROBLOCKD *xd, int plane, int block, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y) { struct macroblockd_plane *const pd = &xd->plane[plane]; const MODE_INFO *mi = xd->mi[0]; const int is_compound = has_second_ref(mi); const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; int ref; for (ref = 0; ref < 1 + is_compound; ++ref) { const struct scale_factors *const sf = &xd->block_refs[ref]->sf; struct buf_2d *const pre_buf = &pd->pre[ref]; struct buf_2d *const dst_buf = &pd->dst; uint8_t *const dst = dst_buf->buf + (int64_t)dst_buf->stride * y + x; const MV mv = mi->sb_type < BLOCK_8X8 ? average_split_mvs(pd, mi, ref, block) : mi->mv[ref].as_mv; // TODO(jkoleszar): This clamping is done in the incorrect place for the // scaling case. It needs to be done on the scaled MV, not the pre-scaling // MV. Note however that it performs the subsampling aware scaling so // that the result is always q4. // mv_precision precision is MV_PRECISION_Q4. const MV mv_q4 = clamp_mv_to_umv_border_sb( xd, &mv, bw, bh, pd->subsampling_x, pd->subsampling_y); uint8_t *pre; MV32 scaled_mv; int xs, ys, subpel_x, subpel_y; const int is_scaled = vp9_is_scaled(sf); if (is_scaled) { // Co-ordinate of containing block to pixel precision. const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); #if 0 // CONFIG_BETTER_HW_COMPATIBILITY assert(xd->mi[0]->sb_type != BLOCK_4X8 && xd->mi[0]->sb_type != BLOCK_8X4); assert(mv_q4.row == mv.row * (1 << (1 - pd->subsampling_y)) && mv_q4.col == mv.col * (1 << (1 - pd->subsampling_x))); #endif if (plane == 0) pre_buf->buf = xd->block_refs[ref]->buf->y_buffer; else if (plane == 1) pre_buf->buf = xd->block_refs[ref]->buf->u_buffer; else pre_buf->buf = xd->block_refs[ref]->buf->v_buffer; pre_buf->buf += scaled_buffer_offset(x_start + x, y_start + y, pre_buf->stride, sf); pre = pre_buf->buf; scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; } else { pre = pre_buf->buf + ((int64_t)y * pre_buf->stride + x); scaled_mv.row = mv_q4.row; scaled_mv.col = mv_q4.col; xs = ys = 16; } subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; pre += (scaled_mv.row >> SUBPEL_BITS) * pre_buf->stride + (scaled_mv.col >> SUBPEL_BITS); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_inter_predictor(CONVERT_TO_SHORTPTR(pre), pre_buf->stride, CONVERT_TO_SHORTPTR(dst), dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); } else { inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } #else inter_predictor(pre, pre_buf->stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH } } static void build_inter_predictors_for_planes(MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int plane_from, int plane_to) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; for (plane = plane_from; plane <= plane_to; ++plane) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, &xd->plane[plane]); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; if (xd->mi[0]->sb_type < BLOCK_8X8) { int i = 0, x, y; assert(bsize == BLOCK_8X8); for (y = 0; y < num_4x4_h; ++y) for (x = 0; x < num_4x4_w; ++x) build_inter_predictors(xd, plane, i++, bw, bh, 4 * x, 4 * y, 4, 4, mi_x, mi_y); } else { build_inter_predictors(xd, plane, 0, bw, bh, 0, 0, bw, bh, mi_x, mi_y); } } } void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, 0); } void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize, int plane) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, plane, plane); } void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 1, MAX_MB_PLANE - 1); } void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { build_inter_predictors_for_planes(xd, bsize, mi_row, mi_col, 0, MAX_MB_PLANE - 1); } void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, src->uv_stride }; int i; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &planes[i]; setup_pred_plane(&pd->dst, buffers[i], strides[i], mi_row, mi_col, NULL, pd->subsampling_x, pd->subsampling_y); } } void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf) { if (src != NULL) { int i; uint8_t *const buffers[MAX_MB_PLANE] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int strides[MAX_MB_PLANE] = { src->y_stride, src->uv_stride, src->uv_stride }; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblockd_plane *const pd = &xd->plane[i]; setup_pred_plane(&pd->pre[idx], buffers[i], strides[i], mi_row, mi_col, sf, pd->subsampling_x, pd->subsampling_y); } } } libvpx-1.8.2/vp9/common/vp9_reconinter.h000066400000000000000000000107671357355204000201720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_RECONINTER_H_ #define VPX_VP9_COMMON_VP9_RECONINTER_H_ #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_filter.h" #ifdef __cplusplus extern "C" { #endif static INLINE void inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, int xs, int ys) { sf->predict[subpel_x != 0][subpel_y != 0][ref](src, src_stride, dst, dst_stride, kernel, subpel_x, xs, subpel_y, ys, w, h); } #if CONFIG_VP9_HIGHBITDEPTH static INLINE void highbd_inter_predictor( const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, const int subpel_x, const int subpel_y, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, int xs, int ys, int bd) { sf->highbd_predict[subpel_x != 0][subpel_y != 0][ref]( src, src_stride, dst, dst_stride, kernel, subpel_x, xs, subpel_y, ys, w, h, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH MV average_split_mvs(const struct macroblockd_plane *pd, const MODE_INFO *mi, int ref, int block); MV clamp_mv_to_umv_border_sb(const MACROBLOCKD *xd, const MV *src_mv, int bw, int bh, int ss_x, int ss_y); void vp9_build_inter_predictors_sby(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); void vp9_build_inter_predictors_sbp(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize, int plane); void vp9_build_inter_predictors_sbuv(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); void vp9_build_inter_predictors_sb(MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize); void vp9_build_inter_predictor(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y); #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_build_inter_predictor( const uint16_t *src, int src_stride, uint16_t *dst, int dst_stride, const MV *src_mv, const struct scale_factors *sf, int w, int h, int ref, const InterpKernel *kernel, enum mv_precision precision, int x, int y, int bd); #endif static INLINE int64_t scaled_buffer_offset(int x_offset, int y_offset, int stride, const struct scale_factors *sf) { const int x = sf ? sf->scale_value_x(x_offset, sf) : x_offset; const int y = sf ? sf->scale_value_y(y_offset, sf) : y_offset; return (int64_t)y * stride + x; } static INLINE void setup_pred_plane(struct buf_2d *dst, uint8_t *src, int stride, int mi_row, int mi_col, const struct scale_factors *scale, int subsampling_x, int subsampling_y) { const int x = (MI_SIZE * mi_col) >> subsampling_x; const int y = (MI_SIZE * mi_row) >> subsampling_y; dst->buf = src + scaled_buffer_offset(x, y, stride, scale); dst->stride = stride; } void vp9_setup_dst_planes(struct macroblockd_plane planes[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col); void vp9_setup_pre_planes(MACROBLOCKD *xd, int idx, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *sf); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_RECONINTER_H_ libvpx-1.8.2/vp9/common/vp9_reconintra.c000066400000000000000000000362231357355204000201540ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #if CONFIG_VP9_HIGHBITDEPTH #include "vpx_dsp/vpx_dsp_common.h" #endif // CONFIG_VP9_HIGHBITDEPTH #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_once.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_onyxc_int.h" const TX_TYPE intra_mode_to_tx_type_lookup[INTRA_MODES] = { DCT_DCT, // DC ADST_DCT, // V DCT_ADST, // H DCT_DCT, // D45 ADST_ADST, // D135 ADST_DCT, // D117 DCT_ADST, // D153 DCT_ADST, // D207 ADST_DCT, // D63 ADST_ADST, // TM }; enum { NEED_LEFT = 1 << 1, NEED_ABOVE = 1 << 2, NEED_ABOVERIGHT = 1 << 3, }; static const uint8_t extend_modes[INTRA_MODES] = { NEED_ABOVE | NEED_LEFT, // DC NEED_ABOVE, // V NEED_LEFT, // H NEED_ABOVERIGHT, // D45 NEED_LEFT | NEED_ABOVE, // D135 NEED_LEFT | NEED_ABOVE, // D117 NEED_LEFT | NEED_ABOVE, // D153 NEED_LEFT, // D207 NEED_ABOVERIGHT, // D63 NEED_LEFT | NEED_ABOVE, // TM }; typedef void (*intra_pred_fn)(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left); static intra_pred_fn pred[INTRA_MODES][TX_SIZES]; static intra_pred_fn dc_pred[2][2][TX_SIZES]; #if CONFIG_VP9_HIGHBITDEPTH typedef void (*intra_high_pred_fn)(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd); static intra_high_pred_fn pred_high[INTRA_MODES][4]; static intra_high_pred_fn dc_pred_high[2][2][4]; #endif // CONFIG_VP9_HIGHBITDEPTH static void vp9_init_intra_predictors_internal(void) { #define INIT_ALL_SIZES(p, type) \ p[TX_4X4] = vpx_##type##_predictor_4x4; \ p[TX_8X8] = vpx_##type##_predictor_8x8; \ p[TX_16X16] = vpx_##type##_predictor_16x16; \ p[TX_32X32] = vpx_##type##_predictor_32x32 INIT_ALL_SIZES(pred[V_PRED], v); INIT_ALL_SIZES(pred[H_PRED], h); INIT_ALL_SIZES(pred[D207_PRED], d207); INIT_ALL_SIZES(pred[D45_PRED], d45); INIT_ALL_SIZES(pred[D63_PRED], d63); INIT_ALL_SIZES(pred[D117_PRED], d117); INIT_ALL_SIZES(pred[D135_PRED], d135); INIT_ALL_SIZES(pred[D153_PRED], d153); INIT_ALL_SIZES(pred[TM_PRED], tm); INIT_ALL_SIZES(dc_pred[0][0], dc_128); INIT_ALL_SIZES(dc_pred[0][1], dc_top); INIT_ALL_SIZES(dc_pred[1][0], dc_left); INIT_ALL_SIZES(dc_pred[1][1], dc); #if CONFIG_VP9_HIGHBITDEPTH INIT_ALL_SIZES(pred_high[V_PRED], highbd_v); INIT_ALL_SIZES(pred_high[H_PRED], highbd_h); INIT_ALL_SIZES(pred_high[D207_PRED], highbd_d207); INIT_ALL_SIZES(pred_high[D45_PRED], highbd_d45); INIT_ALL_SIZES(pred_high[D63_PRED], highbd_d63); INIT_ALL_SIZES(pred_high[D117_PRED], highbd_d117); INIT_ALL_SIZES(pred_high[D135_PRED], highbd_d135); INIT_ALL_SIZES(pred_high[D153_PRED], highbd_d153); INIT_ALL_SIZES(pred_high[TM_PRED], highbd_tm); INIT_ALL_SIZES(dc_pred_high[0][0], highbd_dc_128); INIT_ALL_SIZES(dc_pred_high[0][1], highbd_dc_top); INIT_ALL_SIZES(dc_pred_high[1][0], highbd_dc_left); INIT_ALL_SIZES(dc_pred_high[1][1], highbd_dc); #endif // CONFIG_VP9_HIGHBITDEPTH #undef intra_pred_allsizes } #if CONFIG_VP9_HIGHBITDEPTH static void build_intra_predictors_high( const MACROBLOCKD *xd, const uint8_t *ref8, int ref_stride, uint8_t *dst8, int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int up_available, int left_available, int right_available, int x, int y, int plane, int bd) { int i; uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); DECLARE_ALIGNED(16, uint16_t, left_col[32]); DECLARE_ALIGNED(16, uint16_t, above_data[64 + 16]); uint16_t *above_row = above_data + 16; const uint16_t *const_above_row = above_row; const int bs = 4 << tx_size; int frame_width, frame_height; int x0, y0; const struct macroblockd_plane *const pd = &xd->plane[plane]; const int need_left = extend_modes[mode] & NEED_LEFT; const int need_above = extend_modes[mode] & NEED_ABOVE; const int need_aboveright = extend_modes[mode] & NEED_ABOVERIGHT; int base = 128 << (bd - 8); // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z // 129 C D .. W X // 129 E F .. U V // 129 G H .. S T T T T T // For 10 bit and 12 bit, 127 and 129 are replaced by base -1 and base + 1. // Get current frame pointer, width and height. if (plane == 0) { frame_width = xd->cur_buf->y_width; frame_height = xd->cur_buf->y_height; } else { frame_width = xd->cur_buf->uv_width; frame_height = xd->cur_buf->uv_height; } // Get block position in current frame. x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; // NEED_LEFT if (need_left) { if (left_available) { if (xd->mb_to_bottom_edge < 0) { /* slower path if the block needs border extension */ if (y0 + bs <= frame_height) { for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } else { const int extend_bottom = frame_height - y0; for (i = 0; i < extend_bottom; ++i) left_col[i] = ref[i * ref_stride - 1]; for (; i < bs; ++i) left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { /* faster path if the block does not need extension */ for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } } else { vpx_memset16(left_col, base + 1, bs); } } // NEED_ABOVE if (need_above) { if (up_available) { const uint16_t *above_ref = ref - ref_stride; if (xd->mb_to_right_edge < 0) { /* slower path if the block needs border extension */ if (x0 + bs <= frame_width) { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); } else if (x0 <= frame_width) { const int r = frame_width - x0; memcpy(above_row, above_ref, r * sizeof(above_row[0])); vpx_memset16(above_row + r, above_row[r - 1], x0 + bs - frame_width); } } else { /* faster path if the block does not need extension */ if (bs == 4 && right_available && left_available) { const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); } } above_row[-1] = left_available ? above_ref[-1] : (base + 1); } else { vpx_memset16(above_row, base - 1, bs); above_row[-1] = base - 1; } } // NEED_ABOVERIGHT if (need_aboveright) { if (up_available) { const uint16_t *above_ref = ref - ref_stride; if (xd->mb_to_right_edge < 0) { /* slower path if the block needs border extension */ if (x0 + 2 * bs <= frame_width) { if (right_available && bs == 4) { memcpy(above_row, above_ref, 2 * bs * sizeof(above_row[0])); } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); vpx_memset16(above_row + bs, above_row[bs - 1], bs); } } else if (x0 + bs <= frame_width) { const int r = frame_width - x0; if (right_available && bs == 4) { memcpy(above_row, above_ref, r * sizeof(above_row[0])); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); vpx_memset16(above_row + bs, above_row[bs - 1], bs); } } else if (x0 <= frame_width) { const int r = frame_width - x0; memcpy(above_row, above_ref, r * sizeof(above_row[0])); vpx_memset16(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } above_row[-1] = left_available ? above_ref[-1] : (base + 1); } else { /* faster path if the block does not need extension */ if (bs == 4 && right_available && left_available) { const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs * sizeof(above_row[0])); if (bs == 4 && right_available) memcpy(above_row + bs, above_ref + bs, bs * sizeof(above_row[0])); else vpx_memset16(above_row + bs, above_row[bs - 1], bs); above_row[-1] = left_available ? above_ref[-1] : (base + 1); } } } else { vpx_memset16(above_row, base - 1, bs * 2); above_row[-1] = base - 1; } } // predict if (mode == DC_PRED) { dc_pred_high[left_available][up_available][tx_size]( dst, dst_stride, const_above_row, left_col, xd->bd); } else { pred_high[mode][tx_size](dst, dst_stride, const_above_row, left_col, xd->bd); } } #endif // CONFIG_VP9_HIGHBITDEPTH static void build_intra_predictors(const MACROBLOCKD *xd, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, PREDICTION_MODE mode, TX_SIZE tx_size, int up_available, int left_available, int right_available, int x, int y, int plane) { int i; DECLARE_ALIGNED(16, uint8_t, left_col[32]); DECLARE_ALIGNED(16, uint8_t, above_data[64 + 16]); uint8_t *above_row = above_data + 16; const uint8_t *const_above_row = above_row; const int bs = 4 << tx_size; int frame_width, frame_height; int x0, y0; const struct macroblockd_plane *const pd = &xd->plane[plane]; // 127 127 127 .. 127 127 127 127 127 127 // 129 A B .. Y Z // 129 C D .. W X // 129 E F .. U V // 129 G H .. S T T T T T // .. // Get current frame pointer, width and height. if (plane == 0) { frame_width = xd->cur_buf->y_width; frame_height = xd->cur_buf->y_height; } else { frame_width = xd->cur_buf->uv_width; frame_height = xd->cur_buf->uv_height; } // Get block position in current frame. x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; // NEED_LEFT if (extend_modes[mode] & NEED_LEFT) { if (left_available) { if (xd->mb_to_bottom_edge < 0) { /* slower path if the block needs border extension */ if (y0 + bs <= frame_height) { for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } else { const int extend_bottom = frame_height - y0; for (i = 0; i < extend_bottom; ++i) left_col[i] = ref[i * ref_stride - 1]; for (; i < bs; ++i) left_col[i] = ref[(extend_bottom - 1) * ref_stride - 1]; } } else { /* faster path if the block does not need extension */ for (i = 0; i < bs; ++i) left_col[i] = ref[i * ref_stride - 1]; } } else { memset(left_col, 129, bs); } } // NEED_ABOVE if (extend_modes[mode] & NEED_ABOVE) { if (up_available) { const uint8_t *above_ref = ref - ref_stride; if (xd->mb_to_right_edge < 0) { /* slower path if the block needs border extension */ if (x0 + bs <= frame_width) { memcpy(above_row, above_ref, bs); } else if (x0 <= frame_width) { const int r = frame_width - x0; memcpy(above_row, above_ref, r); memset(above_row + r, above_row[r - 1], x0 + bs - frame_width); } } else { /* faster path if the block does not need extension */ if (bs == 4 && right_available && left_available) { const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs); } } above_row[-1] = left_available ? above_ref[-1] : 129; } else { memset(above_row, 127, bs); above_row[-1] = 127; } } // NEED_ABOVERIGHT if (extend_modes[mode] & NEED_ABOVERIGHT) { if (up_available) { const uint8_t *above_ref = ref - ref_stride; if (xd->mb_to_right_edge < 0) { /* slower path if the block needs border extension */ if (x0 + 2 * bs <= frame_width) { if (right_available && bs == 4) { memcpy(above_row, above_ref, 2 * bs); } else { memcpy(above_row, above_ref, bs); memset(above_row + bs, above_row[bs - 1], bs); } } else if (x0 + bs <= frame_width) { const int r = frame_width - x0; if (right_available && bs == 4) { memcpy(above_row, above_ref, r); memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } else { memcpy(above_row, above_ref, bs); memset(above_row + bs, above_row[bs - 1], bs); } } else if (x0 <= frame_width) { const int r = frame_width - x0; memcpy(above_row, above_ref, r); memset(above_row + r, above_row[r - 1], x0 + 2 * bs - frame_width); } } else { /* faster path if the block does not need extension */ if (bs == 4 && right_available && left_available) { const_above_row = above_ref; } else { memcpy(above_row, above_ref, bs); if (bs == 4 && right_available) memcpy(above_row + bs, above_ref + bs, bs); else memset(above_row + bs, above_row[bs - 1], bs); } } above_row[-1] = left_available ? above_ref[-1] : 129; } else { memset(above_row, 127, bs * 2); above_row[-1] = 127; } } // predict if (mode == DC_PRED) { dc_pred[left_available][up_available][tx_size](dst, dst_stride, const_above_row, left_col); } else { pred[mode][tx_size](dst, dst_stride, const_above_row, left_col); } } void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size, PREDICTION_MODE mode, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, int aoff, int loff, int plane) { const int bw = (1 << bwl_in); const int txw = (1 << tx_size); const int have_top = loff || (xd->above_mi != NULL); const int have_left = aoff || (xd->left_mi != NULL); const int have_right = (aoff + txw) < bw; const int x = aoff * 4; const int y = loff * 4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { build_intra_predictors_high(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, have_top, have_left, have_right, x, y, plane, xd->bd); return; } #endif build_intra_predictors(xd, ref, ref_stride, dst, dst_stride, mode, tx_size, have_top, have_left, have_right, x, y, plane); } void vp9_init_intra_predictors(void) { once(vp9_init_intra_predictors_internal); } libvpx-1.8.2/vp9/common/vp9_reconintra.h000066400000000000000000000017671357355204000201660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_RECONINTRA_H_ #define VPX_VP9_COMMON_VP9_RECONINTRA_H_ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" #ifdef __cplusplus extern "C" { #endif void vp9_init_intra_predictors(void); void vp9_predict_intra_block(const MACROBLOCKD *xd, int bwl_in, TX_SIZE tx_size, PREDICTION_MODE mode, const uint8_t *ref, int ref_stride, uint8_t *dst, int dst_stride, int aoff, int loff, int plane); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_RECONINTRA_H_ libvpx-1.8.2/vp9/common/vp9_rtcd.c000066400000000000000000000012541357355204000167400ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #define RTCD_C #include "./vp9_rtcd.h" #include "vpx_ports/vpx_once.h" void vp9_rtcd() { // TODO(JBB): Remove this once, by insuring that both the encoder and // decoder setup functions are protected by once(); once(setup_rtcd_internal); } libvpx-1.8.2/vp9/common/vp9_rtcd_defs.pl000066400000000000000000000233731357355204000201400ustar00rootroot00000000000000## ## Copyright (c) 2017 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## sub vp9_common_forward_decls() { print <x_scale_fp >> REF_SCALE_SHIFT); } static INLINE int scaled_y(int val, const struct scale_factors *sf) { return (int)((int64_t)val * sf->y_scale_fp >> REF_SCALE_SHIFT); } static int unscaled_value(int val, const struct scale_factors *sf) { (void)sf; return val; } static int get_fixed_point_scale_factor(int other_size, int this_size) { // Calculate scaling factor once for each reference frame // and use fixed point scaling factors in decoding and encoding routines. // Hardware implementations can calculate scale factor in device driver // and use multiplication and shifting on hardware instead of division. return (other_size << REF_SCALE_SHIFT) / this_size; } MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf) { const int x_off_q4 = scaled_x(x << SUBPEL_BITS, sf) & SUBPEL_MASK; const int y_off_q4 = scaled_y(y << SUBPEL_BITS, sf) & SUBPEL_MASK; const MV32 res = { scaled_y(mv->row, sf) + y_off_q4, scaled_x(mv->col, sf) + x_off_q4 }; return res; } #if CONFIG_VP9_HIGHBITDEPTH void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h, int use_highbd) { #else void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h) { #endif if (!valid_ref_frame_size(other_w, other_h, this_w, this_h)) { sf->x_scale_fp = REF_INVALID_SCALE; sf->y_scale_fp = REF_INVALID_SCALE; return; } sf->x_scale_fp = get_fixed_point_scale_factor(other_w, this_w); sf->y_scale_fp = get_fixed_point_scale_factor(other_h, this_h); sf->x_step_q4 = scaled_x(16, sf); sf->y_step_q4 = scaled_y(16, sf); if (vp9_is_scaled(sf)) { sf->scale_value_x = scaled_x; sf->scale_value_y = scaled_y; } else { sf->scale_value_x = unscaled_value; sf->scale_value_y = unscaled_value; } // TODO(agrange): Investigate the best choice of functions to use here // for EIGHTTAP_SMOOTH. Since it is not interpolating, need to choose what // to do at full-pel offsets. The current selection, where the filter is // applied in one direction only, and not at all for 0,0, seems to give the // best quality, but it may be worth trying an additional mode that does // do the filtering on full-pel. if (sf->x_step_q4 == 16) { if (sf->y_step_q4 == 16) { // No scaling in either direction. sf->predict[0][0][0] = vpx_convolve_copy; sf->predict[0][0][1] = vpx_convolve_avg; sf->predict[0][1][0] = vpx_convolve8_vert; sf->predict[0][1][1] = vpx_convolve8_avg_vert; sf->predict[1][0][0] = vpx_convolve8_horiz; sf->predict[1][0][1] = vpx_convolve8_avg_horiz; } else { // No scaling in x direction. Must always scale in the y direction. sf->predict[0][0][0] = vpx_scaled_vert; sf->predict[0][0][1] = vpx_scaled_avg_vert; sf->predict[0][1][0] = vpx_scaled_vert; sf->predict[0][1][1] = vpx_scaled_avg_vert; sf->predict[1][0][0] = vpx_scaled_2d; sf->predict[1][0][1] = vpx_scaled_avg_2d; } } else { if (sf->y_step_q4 == 16) { // No scaling in the y direction. Must always scale in the x direction. sf->predict[0][0][0] = vpx_scaled_horiz; sf->predict[0][0][1] = vpx_scaled_avg_horiz; sf->predict[0][1][0] = vpx_scaled_2d; sf->predict[0][1][1] = vpx_scaled_avg_2d; sf->predict[1][0][0] = vpx_scaled_horiz; sf->predict[1][0][1] = vpx_scaled_avg_horiz; } else { // Must always scale in both directions. sf->predict[0][0][0] = vpx_scaled_2d; sf->predict[0][0][1] = vpx_scaled_avg_2d; sf->predict[0][1][0] = vpx_scaled_2d; sf->predict[0][1][1] = vpx_scaled_avg_2d; sf->predict[1][0][0] = vpx_scaled_2d; sf->predict[1][0][1] = vpx_scaled_avg_2d; } } // 2D subpel motion always gets filtered in both directions if ((sf->x_step_q4 != 16) || (sf->y_step_q4 != 16)) { sf->predict[1][1][0] = vpx_scaled_2d; sf->predict[1][1][1] = vpx_scaled_avg_2d; } else { sf->predict[1][1][0] = vpx_convolve8; sf->predict[1][1][1] = vpx_convolve8_avg; } #if CONFIG_VP9_HIGHBITDEPTH if (use_highbd) { if (sf->x_step_q4 == 16) { if (sf->y_step_q4 == 16) { // No scaling in either direction. sf->highbd_predict[0][0][0] = vpx_highbd_convolve_copy; sf->highbd_predict[0][0][1] = vpx_highbd_convolve_avg; sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; } else { // No scaling in x direction. Must always scale in the y direction. sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_vert; sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_vert; sf->highbd_predict[0][1][0] = vpx_highbd_convolve8_vert; sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg_vert; sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; } } else { if (sf->y_step_q4 == 16) { // No scaling in the y direction. Must always scale in the x direction. sf->highbd_predict[0][0][0] = vpx_highbd_convolve8_horiz; sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg_horiz; sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; sf->highbd_predict[1][0][0] = vpx_highbd_convolve8_horiz; sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg_horiz; } else { // Must always scale in both directions. sf->highbd_predict[0][0][0] = vpx_highbd_convolve8; sf->highbd_predict[0][0][1] = vpx_highbd_convolve8_avg; sf->highbd_predict[0][1][0] = vpx_highbd_convolve8; sf->highbd_predict[0][1][1] = vpx_highbd_convolve8_avg; sf->highbd_predict[1][0][0] = vpx_highbd_convolve8; sf->highbd_predict[1][0][1] = vpx_highbd_convolve8_avg; } } // 2D subpel motion always gets filtered in both directions. sf->highbd_predict[1][1][0] = vpx_highbd_convolve8; sf->highbd_predict[1][1][1] = vpx_highbd_convolve8_avg; } #endif } libvpx-1.8.2/vp9/common/vp9_scale.h000066400000000000000000000045041357355204000171010ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_SCALE_H_ #define VPX_VP9_COMMON_VP9_SCALE_H_ #include "vp9/common/vp9_mv.h" #include "vpx_dsp/vpx_convolve.h" #ifdef __cplusplus extern "C" { #endif #define REF_SCALE_SHIFT 14 #define REF_NO_SCALE (1 << REF_SCALE_SHIFT) #define REF_INVALID_SCALE (-1) struct scale_factors { int x_scale_fp; // horizontal fixed point scale factor int y_scale_fp; // vertical fixed point scale factor int x_step_q4; int y_step_q4; int (*scale_value_x)(int val, const struct scale_factors *sf); int (*scale_value_y)(int val, const struct scale_factors *sf); convolve_fn_t predict[2][2][2]; // horiz, vert, avg #if CONFIG_VP9_HIGHBITDEPTH highbd_convolve_fn_t highbd_predict[2][2][2]; // horiz, vert, avg #endif }; MV32 vp9_scale_mv(const MV *mv, int x, int y, const struct scale_factors *sf); #if CONFIG_VP9_HIGHBITDEPTH void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h, int use_highbd); #else void vp9_setup_scale_factors_for_frame(struct scale_factors *sf, int other_w, int other_h, int this_w, int this_h); #endif static INLINE int vp9_is_valid_scale(const struct scale_factors *sf) { return sf->x_scale_fp != REF_INVALID_SCALE && sf->y_scale_fp != REF_INVALID_SCALE; } static INLINE int vp9_is_scaled(const struct scale_factors *sf) { return vp9_is_valid_scale(sf) && (sf->x_scale_fp != REF_NO_SCALE || sf->y_scale_fp != REF_NO_SCALE); } static INLINE int valid_ref_frame_size(int ref_width, int ref_height, int this_width, int this_height) { return 2 * this_width >= ref_width && 2 * this_height >= ref_height && this_width <= 16 * ref_width && this_height <= 16 * ref_height; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_SCALE_H_ libvpx-1.8.2/vp9/common/vp9_scan.c000066400000000000000000001357561357355204000167470ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_scan.h" DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4[16]) = { 0, 4, 1, 5, 8, 2, 12, 9, 3, 6, 13, 10, 7, 14, 11, 15, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4[16]) = { 0, 4, 8, 1, 12, 5, 9, 2, 13, 6, 10, 3, 7, 14, 11, 15, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4[16]) = { 0, 1, 4, 2, 5, 3, 6, 8, 9, 7, 12, 10, 13, 11, 14, 15, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8[64]) = { 0, 8, 1, 16, 9, 2, 17, 24, 10, 3, 18, 25, 32, 11, 4, 26, 33, 19, 40, 12, 34, 27, 5, 41, 20, 48, 13, 35, 42, 28, 21, 6, 49, 56, 36, 43, 29, 7, 14, 50, 57, 44, 22, 37, 15, 51, 58, 30, 45, 23, 52, 59, 38, 31, 60, 53, 46, 39, 61, 54, 47, 62, 55, 63, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8[64]) = { 0, 8, 16, 1, 24, 9, 32, 17, 2, 40, 25, 10, 33, 18, 48, 3, 26, 41, 11, 56, 19, 34, 4, 49, 27, 42, 12, 35, 20, 57, 50, 28, 5, 43, 13, 36, 58, 51, 21, 44, 6, 29, 59, 37, 14, 52, 22, 7, 45, 60, 30, 15, 38, 53, 23, 46, 31, 61, 39, 54, 47, 62, 55, 63, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8[64]) = { 0, 1, 2, 8, 9, 3, 16, 10, 4, 17, 11, 24, 5, 18, 25, 12, 19, 26, 32, 6, 13, 20, 33, 27, 7, 34, 40, 21, 28, 41, 14, 35, 48, 42, 29, 36, 49, 22, 43, 15, 56, 37, 50, 44, 30, 57, 23, 51, 58, 45, 38, 52, 31, 59, 53, 46, 60, 39, 61, 47, 54, 55, 62, 63, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16[256]) = { 0, 16, 1, 32, 17, 2, 48, 33, 18, 3, 64, 34, 49, 19, 65, 80, 50, 4, 35, 66, 20, 81, 96, 51, 5, 36, 82, 97, 67, 112, 21, 52, 98, 37, 83, 113, 6, 68, 128, 53, 22, 99, 114, 84, 7, 129, 38, 69, 100, 115, 144, 130, 85, 54, 23, 8, 145, 39, 70, 116, 101, 131, 160, 146, 55, 86, 24, 71, 132, 117, 161, 40, 9, 102, 147, 176, 162, 87, 56, 25, 133, 118, 177, 148, 72, 103, 41, 163, 10, 192, 178, 88, 57, 134, 149, 119, 26, 164, 73, 104, 193, 42, 179, 208, 11, 135, 89, 165, 120, 150, 58, 194, 180, 27, 74, 209, 105, 151, 136, 43, 90, 224, 166, 195, 181, 121, 210, 59, 12, 152, 106, 167, 196, 75, 137, 225, 211, 240, 182, 122, 91, 28, 197, 13, 226, 168, 183, 153, 44, 212, 138, 107, 241, 60, 29, 123, 198, 184, 227, 169, 242, 76, 213, 154, 45, 92, 14, 199, 139, 61, 228, 214, 170, 185, 243, 108, 77, 155, 30, 15, 200, 229, 124, 215, 244, 93, 46, 186, 171, 201, 109, 140, 230, 62, 216, 245, 31, 125, 78, 156, 231, 47, 187, 202, 217, 94, 246, 141, 63, 232, 172, 110, 247, 157, 79, 218, 203, 126, 233, 188, 248, 95, 173, 142, 219, 111, 249, 234, 158, 127, 189, 204, 250, 235, 143, 174, 220, 205, 159, 251, 190, 221, 175, 236, 237, 191, 206, 252, 222, 253, 207, 238, 223, 254, 239, 255, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16[256]) = { 0, 16, 32, 48, 1, 64, 17, 80, 33, 96, 49, 2, 65, 112, 18, 81, 34, 128, 50, 97, 3, 66, 144, 19, 113, 35, 82, 160, 98, 51, 129, 4, 67, 176, 20, 114, 145, 83, 36, 99, 130, 52, 192, 5, 161, 68, 115, 21, 146, 84, 208, 177, 37, 131, 100, 53, 162, 224, 69, 6, 116, 193, 147, 85, 22, 240, 132, 38, 178, 101, 163, 54, 209, 117, 70, 7, 148, 194, 86, 179, 225, 23, 133, 39, 164, 8, 102, 210, 241, 55, 195, 118, 149, 71, 180, 24, 87, 226, 134, 165, 211, 40, 103, 56, 72, 150, 196, 242, 119, 9, 181, 227, 88, 166, 25, 135, 41, 104, 212, 57, 151, 197, 120, 73, 243, 182, 136, 167, 213, 89, 10, 228, 105, 152, 198, 26, 42, 121, 183, 244, 168, 58, 137, 229, 74, 214, 90, 153, 199, 184, 11, 106, 245, 27, 122, 230, 169, 43, 215, 59, 200, 138, 185, 246, 75, 12, 91, 154, 216, 231, 107, 28, 44, 201, 123, 170, 60, 247, 232, 76, 139, 13, 92, 217, 186, 248, 155, 108, 29, 124, 45, 202, 233, 171, 61, 14, 77, 140, 15, 249, 93, 30, 187, 156, 218, 46, 109, 125, 62, 172, 78, 203, 31, 141, 234, 94, 47, 188, 63, 157, 110, 250, 219, 79, 126, 204, 173, 142, 95, 189, 111, 235, 158, 220, 251, 127, 174, 143, 205, 236, 159, 190, 221, 252, 175, 206, 237, 191, 253, 222, 238, 207, 254, 223, 239, 255, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16[256]) = { 0, 1, 2, 16, 3, 17, 4, 18, 32, 5, 33, 19, 6, 34, 48, 20, 49, 7, 35, 21, 50, 64, 8, 36, 65, 22, 51, 37, 80, 9, 66, 52, 23, 38, 81, 67, 10, 53, 24, 82, 68, 96, 39, 11, 54, 83, 97, 69, 25, 98, 84, 40, 112, 55, 12, 70, 99, 113, 85, 26, 41, 56, 114, 100, 13, 71, 128, 86, 27, 115, 101, 129, 42, 57, 72, 116, 14, 87, 130, 102, 144, 73, 131, 117, 28, 58, 15, 88, 43, 145, 103, 132, 146, 118, 74, 160, 89, 133, 104, 29, 59, 147, 119, 44, 161, 148, 90, 105, 134, 162, 120, 176, 75, 135, 149, 30, 60, 163, 177, 45, 121, 91, 106, 164, 178, 150, 192, 136, 165, 179, 31, 151, 193, 76, 122, 61, 137, 194, 107, 152, 180, 208, 46, 166, 167, 195, 92, 181, 138, 209, 123, 153, 224, 196, 77, 168, 210, 182, 240, 108, 197, 62, 154, 225, 183, 169, 211, 47, 139, 93, 184, 226, 212, 241, 198, 170, 124, 155, 199, 78, 213, 185, 109, 227, 200, 63, 228, 242, 140, 214, 171, 186, 156, 229, 243, 125, 94, 201, 244, 215, 216, 230, 141, 187, 202, 79, 172, 110, 157, 245, 217, 231, 95, 246, 232, 126, 203, 247, 233, 173, 218, 142, 111, 158, 188, 248, 127, 234, 219, 249, 189, 204, 143, 174, 159, 250, 235, 205, 220, 175, 190, 251, 221, 191, 206, 236, 207, 237, 252, 222, 253, 223, 238, 239, 254, 255, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32[1024]) = { 0, 32, 1, 64, 33, 2, 96, 65, 34, 128, 3, 97, 66, 160, 129, 35, 98, 4, 67, 130, 161, 192, 36, 99, 224, 5, 162, 193, 68, 131, 37, 100, 225, 194, 256, 163, 69, 132, 6, 226, 257, 288, 195, 101, 164, 38, 258, 7, 227, 289, 133, 320, 70, 196, 165, 290, 259, 228, 39, 321, 102, 352, 8, 197, 71, 134, 322, 291, 260, 353, 384, 229, 166, 103, 40, 354, 323, 292, 135, 385, 198, 261, 72, 9, 416, 167, 386, 355, 230, 324, 104, 293, 41, 417, 199, 136, 262, 387, 448, 325, 356, 10, 73, 418, 231, 168, 449, 294, 388, 105, 419, 263, 42, 200, 357, 450, 137, 480, 74, 326, 232, 11, 389, 169, 295, 420, 106, 451, 481, 358, 264, 327, 201, 43, 138, 512, 482, 390, 296, 233, 170, 421, 75, 452, 359, 12, 513, 265, 483, 328, 107, 202, 514, 544, 422, 391, 453, 139, 44, 234, 484, 297, 360, 171, 76, 515, 545, 266, 329, 454, 13, 423, 203, 108, 546, 485, 576, 298, 235, 140, 361, 330, 172, 547, 45, 455, 267, 577, 486, 77, 204, 362, 608, 14, 299, 578, 109, 236, 487, 609, 331, 141, 579, 46, 15, 173, 610, 363, 78, 205, 16, 110, 237, 611, 142, 47, 174, 79, 206, 17, 111, 238, 48, 143, 80, 175, 112, 207, 49, 18, 239, 81, 113, 19, 50, 82, 114, 51, 83, 115, 640, 516, 392, 268, 144, 20, 672, 641, 548, 517, 424, 393, 300, 269, 176, 145, 52, 21, 704, 673, 642, 580, 549, 518, 456, 425, 394, 332, 301, 270, 208, 177, 146, 84, 53, 22, 736, 705, 674, 643, 612, 581, 550, 519, 488, 457, 426, 395, 364, 333, 302, 271, 240, 209, 178, 147, 116, 85, 54, 23, 737, 706, 675, 613, 582, 551, 489, 458, 427, 365, 334, 303, 241, 210, 179, 117, 86, 55, 738, 707, 614, 583, 490, 459, 366, 335, 242, 211, 118, 87, 739, 615, 491, 367, 243, 119, 768, 644, 520, 396, 272, 148, 24, 800, 769, 676, 645, 552, 521, 428, 397, 304, 273, 180, 149, 56, 25, 832, 801, 770, 708, 677, 646, 584, 553, 522, 460, 429, 398, 336, 305, 274, 212, 181, 150, 88, 57, 26, 864, 833, 802, 771, 740, 709, 678, 647, 616, 585, 554, 523, 492, 461, 430, 399, 368, 337, 306, 275, 244, 213, 182, 151, 120, 89, 58, 27, 865, 834, 803, 741, 710, 679, 617, 586, 555, 493, 462, 431, 369, 338, 307, 245, 214, 183, 121, 90, 59, 866, 835, 742, 711, 618, 587, 494, 463, 370, 339, 246, 215, 122, 91, 867, 743, 619, 495, 371, 247, 123, 896, 772, 648, 524, 400, 276, 152, 28, 928, 897, 804, 773, 680, 649, 556, 525, 432, 401, 308, 277, 184, 153, 60, 29, 960, 929, 898, 836, 805, 774, 712, 681, 650, 588, 557, 526, 464, 433, 402, 340, 309, 278, 216, 185, 154, 92, 61, 30, 992, 961, 930, 899, 868, 837, 806, 775, 744, 713, 682, 651, 620, 589, 558, 527, 496, 465, 434, 403, 372, 341, 310, 279, 248, 217, 186, 155, 124, 93, 62, 31, 993, 962, 931, 869, 838, 807, 745, 714, 683, 621, 590, 559, 497, 466, 435, 373, 342, 311, 249, 218, 187, 125, 94, 63, 994, 963, 870, 839, 746, 715, 622, 591, 498, 467, 374, 343, 250, 219, 126, 95, 995, 871, 747, 623, 499, 375, 251, 127, 900, 776, 652, 528, 404, 280, 156, 932, 901, 808, 777, 684, 653, 560, 529, 436, 405, 312, 281, 188, 157, 964, 933, 902, 840, 809, 778, 716, 685, 654, 592, 561, 530, 468, 437, 406, 344, 313, 282, 220, 189, 158, 996, 965, 934, 903, 872, 841, 810, 779, 748, 717, 686, 655, 624, 593, 562, 531, 500, 469, 438, 407, 376, 345, 314, 283, 252, 221, 190, 159, 997, 966, 935, 873, 842, 811, 749, 718, 687, 625, 594, 563, 501, 470, 439, 377, 346, 315, 253, 222, 191, 998, 967, 874, 843, 750, 719, 626, 595, 502, 471, 378, 347, 254, 223, 999, 875, 751, 627, 503, 379, 255, 904, 780, 656, 532, 408, 284, 936, 905, 812, 781, 688, 657, 564, 533, 440, 409, 316, 285, 968, 937, 906, 844, 813, 782, 720, 689, 658, 596, 565, 534, 472, 441, 410, 348, 317, 286, 1000, 969, 938, 907, 876, 845, 814, 783, 752, 721, 690, 659, 628, 597, 566, 535, 504, 473, 442, 411, 380, 349, 318, 287, 1001, 970, 939, 877, 846, 815, 753, 722, 691, 629, 598, 567, 505, 474, 443, 381, 350, 319, 1002, 971, 878, 847, 754, 723, 630, 599, 506, 475, 382, 351, 1003, 879, 755, 631, 507, 383, 908, 784, 660, 536, 412, 940, 909, 816, 785, 692, 661, 568, 537, 444, 413, 972, 941, 910, 848, 817, 786, 724, 693, 662, 600, 569, 538, 476, 445, 414, 1004, 973, 942, 911, 880, 849, 818, 787, 756, 725, 694, 663, 632, 601, 570, 539, 508, 477, 446, 415, 1005, 974, 943, 881, 850, 819, 757, 726, 695, 633, 602, 571, 509, 478, 447, 1006, 975, 882, 851, 758, 727, 634, 603, 510, 479, 1007, 883, 759, 635, 511, 912, 788, 664, 540, 944, 913, 820, 789, 696, 665, 572, 541, 976, 945, 914, 852, 821, 790, 728, 697, 666, 604, 573, 542, 1008, 977, 946, 915, 884, 853, 822, 791, 760, 729, 698, 667, 636, 605, 574, 543, 1009, 978, 947, 885, 854, 823, 761, 730, 699, 637, 606, 575, 1010, 979, 886, 855, 762, 731, 638, 607, 1011, 887, 763, 639, 916, 792, 668, 948, 917, 824, 793, 700, 669, 980, 949, 918, 856, 825, 794, 732, 701, 670, 1012, 981, 950, 919, 888, 857, 826, 795, 764, 733, 702, 671, 1013, 982, 951, 889, 858, 827, 765, 734, 703, 1014, 983, 890, 859, 766, 735, 1015, 891, 767, 920, 796, 952, 921, 828, 797, 984, 953, 922, 860, 829, 798, 1016, 985, 954, 923, 892, 861, 830, 799, 1017, 986, 955, 893, 862, 831, 1018, 987, 894, 863, 1019, 895, 924, 956, 925, 988, 957, 926, 1020, 989, 958, 927, 1021, 990, 959, 1022, 991, 1023, }; // Neighborhood 2-tuples for various scans and blocksizes, // in {top, left} order for each position in corresponding scan order. DECLARE_ALIGNED(16, static const int16_t, default_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 1, 4, 4, 4, 1, 1, 8, 8, 5, 8, 2, 2, 2, 5, 9, 12, 6, 9, 3, 6, 10, 13, 7, 10, 11, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 4, 4, 0, 0, 8, 8, 1, 1, 5, 5, 1, 1, 9, 9, 2, 2, 6, 6, 2, 2, 3, 3, 10, 10, 7, 7, 11, 11, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_4x4_neighbors[17 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 1, 1, 4, 4, 2, 2, 5, 5, 4, 4, 8, 8, 6, 6, 8, 8, 9, 9, 12, 12, 10, 10, 13, 13, 14, 14, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 8, 8, 0, 0, 16, 16, 1, 1, 24, 24, 9, 9, 1, 1, 32, 32, 17, 17, 2, 2, 25, 25, 10, 10, 40, 40, 2, 2, 18, 18, 33, 33, 3, 3, 48, 48, 11, 11, 26, 26, 3, 3, 41, 41, 19, 19, 34, 34, 4, 4, 27, 27, 12, 12, 49, 49, 42, 42, 20, 20, 4, 4, 35, 35, 5, 5, 28, 28, 50, 50, 43, 43, 13, 13, 36, 36, 5, 5, 21, 21, 51, 51, 29, 29, 6, 6, 44, 44, 14, 14, 6, 6, 37, 37, 52, 52, 22, 22, 7, 7, 30, 30, 45, 45, 15, 15, 38, 38, 23, 23, 53, 53, 31, 31, 46, 46, 39, 39, 54, 54, 47, 47, 55, 55, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 1, 1, 0, 0, 8, 8, 2, 2, 8, 8, 9, 9, 3, 3, 16, 16, 10, 10, 16, 16, 4, 4, 17, 17, 24, 24, 11, 11, 18, 18, 25, 25, 24, 24, 5, 5, 12, 12, 19, 19, 32, 32, 26, 26, 6, 6, 33, 33, 32, 32, 20, 20, 27, 27, 40, 40, 13, 13, 34, 34, 40, 40, 41, 41, 28, 28, 35, 35, 48, 48, 21, 21, 42, 42, 14, 14, 48, 48, 36, 36, 49, 49, 43, 43, 29, 29, 56, 56, 22, 22, 50, 50, 57, 57, 44, 44, 37, 37, 51, 51, 30, 30, 58, 58, 52, 52, 45, 45, 59, 59, 38, 38, 60, 60, 46, 46, 53, 53, 54, 54, 61, 61, 62, 62, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_8x8_neighbors[65 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 8, 8, 1, 8, 1, 1, 9, 16, 16, 16, 2, 9, 2, 2, 10, 17, 17, 24, 24, 24, 3, 10, 3, 3, 18, 25, 25, 32, 11, 18, 32, 32, 4, 11, 26, 33, 19, 26, 4, 4, 33, 40, 12, 19, 40, 40, 5, 12, 27, 34, 34, 41, 20, 27, 13, 20, 5, 5, 41, 48, 48, 48, 28, 35, 35, 42, 21, 28, 6, 6, 6, 13, 42, 49, 49, 56, 36, 43, 14, 21, 29, 36, 7, 14, 43, 50, 50, 57, 22, 29, 37, 44, 15, 22, 44, 51, 51, 58, 30, 37, 23, 30, 52, 59, 45, 52, 38, 45, 31, 38, 53, 60, 46, 53, 39, 46, 54, 61, 47, 54, 55, 62, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, col_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 16, 16, 32, 32, 0, 0, 48, 48, 1, 1, 64, 64, 17, 17, 80, 80, 33, 33, 1, 1, 49, 49, 96, 96, 2, 2, 65, 65, 18, 18, 112, 112, 34, 34, 81, 81, 2, 2, 50, 50, 128, 128, 3, 3, 97, 97, 19, 19, 66, 66, 144, 144, 82, 82, 35, 35, 113, 113, 3, 3, 51, 51, 160, 160, 4, 4, 98, 98, 129, 129, 67, 67, 20, 20, 83, 83, 114, 114, 36, 36, 176, 176, 4, 4, 145, 145, 52, 52, 99, 99, 5, 5, 130, 130, 68, 68, 192, 192, 161, 161, 21, 21, 115, 115, 84, 84, 37, 37, 146, 146, 208, 208, 53, 53, 5, 5, 100, 100, 177, 177, 131, 131, 69, 69, 6, 6, 224, 224, 116, 116, 22, 22, 162, 162, 85, 85, 147, 147, 38, 38, 193, 193, 101, 101, 54, 54, 6, 6, 132, 132, 178, 178, 70, 70, 163, 163, 209, 209, 7, 7, 117, 117, 23, 23, 148, 148, 7, 7, 86, 86, 194, 194, 225, 225, 39, 39, 179, 179, 102, 102, 133, 133, 55, 55, 164, 164, 8, 8, 71, 71, 210, 210, 118, 118, 149, 149, 195, 195, 24, 24, 87, 87, 40, 40, 56, 56, 134, 134, 180, 180, 226, 226, 103, 103, 8, 8, 165, 165, 211, 211, 72, 72, 150, 150, 9, 9, 119, 119, 25, 25, 88, 88, 196, 196, 41, 41, 135, 135, 181, 181, 104, 104, 57, 57, 227, 227, 166, 166, 120, 120, 151, 151, 197, 197, 73, 73, 9, 9, 212, 212, 89, 89, 136, 136, 182, 182, 10, 10, 26, 26, 105, 105, 167, 167, 228, 228, 152, 152, 42, 42, 121, 121, 213, 213, 58, 58, 198, 198, 74, 74, 137, 137, 183, 183, 168, 168, 10, 10, 90, 90, 229, 229, 11, 11, 106, 106, 214, 214, 153, 153, 27, 27, 199, 199, 43, 43, 184, 184, 122, 122, 169, 169, 230, 230, 59, 59, 11, 11, 75, 75, 138, 138, 200, 200, 215, 215, 91, 91, 12, 12, 28, 28, 185, 185, 107, 107, 154, 154, 44, 44, 231, 231, 216, 216, 60, 60, 123, 123, 12, 12, 76, 76, 201, 201, 170, 170, 232, 232, 139, 139, 92, 92, 13, 13, 108, 108, 29, 29, 186, 186, 217, 217, 155, 155, 45, 45, 13, 13, 61, 61, 124, 124, 14, 14, 233, 233, 77, 77, 14, 14, 171, 171, 140, 140, 202, 202, 30, 30, 93, 93, 109, 109, 46, 46, 156, 156, 62, 62, 187, 187, 15, 15, 125, 125, 218, 218, 78, 78, 31, 31, 172, 172, 47, 47, 141, 141, 94, 94, 234, 234, 203, 203, 63, 63, 110, 110, 188, 188, 157, 157, 126, 126, 79, 79, 173, 173, 95, 95, 219, 219, 142, 142, 204, 204, 235, 235, 111, 111, 158, 158, 127, 127, 189, 189, 220, 220, 143, 143, 174, 174, 205, 205, 236, 236, 159, 159, 190, 190, 221, 221, 175, 175, 237, 237, 206, 206, 222, 222, 191, 191, 238, 238, 207, 207, 223, 223, 239, 239, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, row_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 1, 1, 0, 0, 2, 2, 16, 16, 3, 3, 17, 17, 16, 16, 4, 4, 32, 32, 18, 18, 5, 5, 33, 33, 32, 32, 19, 19, 48, 48, 6, 6, 34, 34, 20, 20, 49, 49, 48, 48, 7, 7, 35, 35, 64, 64, 21, 21, 50, 50, 36, 36, 64, 64, 8, 8, 65, 65, 51, 51, 22, 22, 37, 37, 80, 80, 66, 66, 9, 9, 52, 52, 23, 23, 81, 81, 67, 67, 80, 80, 38, 38, 10, 10, 53, 53, 82, 82, 96, 96, 68, 68, 24, 24, 97, 97, 83, 83, 39, 39, 96, 96, 54, 54, 11, 11, 69, 69, 98, 98, 112, 112, 84, 84, 25, 25, 40, 40, 55, 55, 113, 113, 99, 99, 12, 12, 70, 70, 112, 112, 85, 85, 26, 26, 114, 114, 100, 100, 128, 128, 41, 41, 56, 56, 71, 71, 115, 115, 13, 13, 86, 86, 129, 129, 101, 101, 128, 128, 72, 72, 130, 130, 116, 116, 27, 27, 57, 57, 14, 14, 87, 87, 42, 42, 144, 144, 102, 102, 131, 131, 145, 145, 117, 117, 73, 73, 144, 144, 88, 88, 132, 132, 103, 103, 28, 28, 58, 58, 146, 146, 118, 118, 43, 43, 160, 160, 147, 147, 89, 89, 104, 104, 133, 133, 161, 161, 119, 119, 160, 160, 74, 74, 134, 134, 148, 148, 29, 29, 59, 59, 162, 162, 176, 176, 44, 44, 120, 120, 90, 90, 105, 105, 163, 163, 177, 177, 149, 149, 176, 176, 135, 135, 164, 164, 178, 178, 30, 30, 150, 150, 192, 192, 75, 75, 121, 121, 60, 60, 136, 136, 193, 193, 106, 106, 151, 151, 179, 179, 192, 192, 45, 45, 165, 165, 166, 166, 194, 194, 91, 91, 180, 180, 137, 137, 208, 208, 122, 122, 152, 152, 208, 208, 195, 195, 76, 76, 167, 167, 209, 209, 181, 181, 224, 224, 107, 107, 196, 196, 61, 61, 153, 153, 224, 224, 182, 182, 168, 168, 210, 210, 46, 46, 138, 138, 92, 92, 183, 183, 225, 225, 211, 211, 240, 240, 197, 197, 169, 169, 123, 123, 154, 154, 198, 198, 77, 77, 212, 212, 184, 184, 108, 108, 226, 226, 199, 199, 62, 62, 227, 227, 241, 241, 139, 139, 213, 213, 170, 170, 185, 185, 155, 155, 228, 228, 242, 242, 124, 124, 93, 93, 200, 200, 243, 243, 214, 214, 215, 215, 229, 229, 140, 140, 186, 186, 201, 201, 78, 78, 171, 171, 109, 109, 156, 156, 244, 244, 216, 216, 230, 230, 94, 94, 245, 245, 231, 231, 125, 125, 202, 202, 246, 246, 232, 232, 172, 172, 217, 217, 141, 141, 110, 110, 157, 157, 187, 187, 247, 247, 126, 126, 233, 233, 218, 218, 248, 248, 188, 188, 203, 203, 142, 142, 173, 173, 158, 158, 249, 249, 234, 234, 204, 204, 219, 219, 174, 174, 189, 189, 250, 250, 220, 220, 190, 190, 205, 205, 235, 235, 206, 206, 236, 236, 251, 251, 221, 221, 252, 252, 222, 222, 237, 237, 238, 238, 253, 253, 254, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_16x16_neighbors[257 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 16, 16, 1, 16, 1, 1, 32, 32, 17, 32, 2, 17, 2, 2, 48, 48, 18, 33, 33, 48, 3, 18, 49, 64, 64, 64, 34, 49, 3, 3, 19, 34, 50, 65, 4, 19, 65, 80, 80, 80, 35, 50, 4, 4, 20, 35, 66, 81, 81, 96, 51, 66, 96, 96, 5, 20, 36, 51, 82, 97, 21, 36, 67, 82, 97, 112, 5, 5, 52, 67, 112, 112, 37, 52, 6, 21, 83, 98, 98, 113, 68, 83, 6, 6, 113, 128, 22, 37, 53, 68, 84, 99, 99, 114, 128, 128, 114, 129, 69, 84, 38, 53, 7, 22, 7, 7, 129, 144, 23, 38, 54, 69, 100, 115, 85, 100, 115, 130, 144, 144, 130, 145, 39, 54, 70, 85, 8, 23, 55, 70, 116, 131, 101, 116, 145, 160, 24, 39, 8, 8, 86, 101, 131, 146, 160, 160, 146, 161, 71, 86, 40, 55, 9, 24, 117, 132, 102, 117, 161, 176, 132, 147, 56, 71, 87, 102, 25, 40, 147, 162, 9, 9, 176, 176, 162, 177, 72, 87, 41, 56, 118, 133, 133, 148, 103, 118, 10, 25, 148, 163, 57, 72, 88, 103, 177, 192, 26, 41, 163, 178, 192, 192, 10, 10, 119, 134, 73, 88, 149, 164, 104, 119, 134, 149, 42, 57, 178, 193, 164, 179, 11, 26, 58, 73, 193, 208, 89, 104, 135, 150, 120, 135, 27, 42, 74, 89, 208, 208, 150, 165, 179, 194, 165, 180, 105, 120, 194, 209, 43, 58, 11, 11, 136, 151, 90, 105, 151, 166, 180, 195, 59, 74, 121, 136, 209, 224, 195, 210, 224, 224, 166, 181, 106, 121, 75, 90, 12, 27, 181, 196, 12, 12, 210, 225, 152, 167, 167, 182, 137, 152, 28, 43, 196, 211, 122, 137, 91, 106, 225, 240, 44, 59, 13, 28, 107, 122, 182, 197, 168, 183, 211, 226, 153, 168, 226, 241, 60, 75, 197, 212, 138, 153, 29, 44, 76, 91, 13, 13, 183, 198, 123, 138, 45, 60, 212, 227, 198, 213, 154, 169, 169, 184, 227, 242, 92, 107, 61, 76, 139, 154, 14, 29, 14, 14, 184, 199, 213, 228, 108, 123, 199, 214, 228, 243, 77, 92, 30, 45, 170, 185, 155, 170, 185, 200, 93, 108, 124, 139, 214, 229, 46, 61, 200, 215, 229, 244, 15, 30, 109, 124, 62, 77, 140, 155, 215, 230, 31, 46, 171, 186, 186, 201, 201, 216, 78, 93, 230, 245, 125, 140, 47, 62, 216, 231, 156, 171, 94, 109, 231, 246, 141, 156, 63, 78, 202, 217, 187, 202, 110, 125, 217, 232, 172, 187, 232, 247, 79, 94, 157, 172, 126, 141, 203, 218, 95, 110, 233, 248, 218, 233, 142, 157, 111, 126, 173, 188, 188, 203, 234, 249, 219, 234, 127, 142, 158, 173, 204, 219, 189, 204, 143, 158, 235, 250, 174, 189, 205, 220, 159, 174, 220, 235, 221, 236, 175, 190, 190, 205, 236, 251, 206, 221, 237, 252, 191, 206, 222, 237, 207, 222, 238, 253, 223, 238, 239, 254, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, default_scan_32x32_neighbors[1025 * MAX_NEIGHBORS]) = { 0, 0, 0, 0, 0, 0, 32, 32, 1, 32, 1, 1, 64, 64, 33, 64, 2, 33, 96, 96, 2, 2, 65, 96, 34, 65, 128, 128, 97, 128, 3, 34, 66, 97, 3, 3, 35, 66, 98, 129, 129, 160, 160, 160, 4, 35, 67, 98, 192, 192, 4, 4, 130, 161, 161, 192, 36, 67, 99, 130, 5, 36, 68, 99, 193, 224, 162, 193, 224, 224, 131, 162, 37, 68, 100, 131, 5, 5, 194, 225, 225, 256, 256, 256, 163, 194, 69, 100, 132, 163, 6, 37, 226, 257, 6, 6, 195, 226, 257, 288, 101, 132, 288, 288, 38, 69, 164, 195, 133, 164, 258, 289, 227, 258, 196, 227, 7, 38, 289, 320, 70, 101, 320, 320, 7, 7, 165, 196, 39, 70, 102, 133, 290, 321, 259, 290, 228, 259, 321, 352, 352, 352, 197, 228, 134, 165, 71, 102, 8, 39, 322, 353, 291, 322, 260, 291, 103, 134, 353, 384, 166, 197, 229, 260, 40, 71, 8, 8, 384, 384, 135, 166, 354, 385, 323, 354, 198, 229, 292, 323, 72, 103, 261, 292, 9, 40, 385, 416, 167, 198, 104, 135, 230, 261, 355, 386, 416, 416, 293, 324, 324, 355, 9, 9, 41, 72, 386, 417, 199, 230, 136, 167, 417, 448, 262, 293, 356, 387, 73, 104, 387, 418, 231, 262, 10, 41, 168, 199, 325, 356, 418, 449, 105, 136, 448, 448, 42, 73, 294, 325, 200, 231, 10, 10, 357, 388, 137, 168, 263, 294, 388, 419, 74, 105, 419, 450, 449, 480, 326, 357, 232, 263, 295, 326, 169, 200, 11, 42, 106, 137, 480, 480, 450, 481, 358, 389, 264, 295, 201, 232, 138, 169, 389, 420, 43, 74, 420, 451, 327, 358, 11, 11, 481, 512, 233, 264, 451, 482, 296, 327, 75, 106, 170, 201, 482, 513, 512, 512, 390, 421, 359, 390, 421, 452, 107, 138, 12, 43, 202, 233, 452, 483, 265, 296, 328, 359, 139, 170, 44, 75, 483, 514, 513, 544, 234, 265, 297, 328, 422, 453, 12, 12, 391, 422, 171, 202, 76, 107, 514, 545, 453, 484, 544, 544, 266, 297, 203, 234, 108, 139, 329, 360, 298, 329, 140, 171, 515, 546, 13, 44, 423, 454, 235, 266, 545, 576, 454, 485, 45, 76, 172, 203, 330, 361, 576, 576, 13, 13, 267, 298, 546, 577, 77, 108, 204, 235, 455, 486, 577, 608, 299, 330, 109, 140, 547, 578, 14, 45, 14, 14, 141, 172, 578, 609, 331, 362, 46, 77, 173, 204, 15, 15, 78, 109, 205, 236, 579, 610, 110, 141, 15, 46, 142, 173, 47, 78, 174, 205, 16, 16, 79, 110, 206, 237, 16, 47, 111, 142, 48, 79, 143, 174, 80, 111, 175, 206, 17, 48, 17, 17, 207, 238, 49, 80, 81, 112, 18, 18, 18, 49, 50, 81, 82, 113, 19, 50, 51, 82, 83, 114, 608, 608, 484, 515, 360, 391, 236, 267, 112, 143, 19, 19, 640, 640, 609, 640, 516, 547, 485, 516, 392, 423, 361, 392, 268, 299, 237, 268, 144, 175, 113, 144, 20, 51, 20, 20, 672, 672, 641, 672, 610, 641, 548, 579, 517, 548, 486, 517, 424, 455, 393, 424, 362, 393, 300, 331, 269, 300, 238, 269, 176, 207, 145, 176, 114, 145, 52, 83, 21, 52, 21, 21, 704, 704, 673, 704, 642, 673, 611, 642, 580, 611, 549, 580, 518, 549, 487, 518, 456, 487, 425, 456, 394, 425, 363, 394, 332, 363, 301, 332, 270, 301, 239, 270, 208, 239, 177, 208, 146, 177, 115, 146, 84, 115, 53, 84, 22, 53, 22, 22, 705, 736, 674, 705, 643, 674, 581, 612, 550, 581, 519, 550, 457, 488, 426, 457, 395, 426, 333, 364, 302, 333, 271, 302, 209, 240, 178, 209, 147, 178, 85, 116, 54, 85, 23, 54, 706, 737, 675, 706, 582, 613, 551, 582, 458, 489, 427, 458, 334, 365, 303, 334, 210, 241, 179, 210, 86, 117, 55, 86, 707, 738, 583, 614, 459, 490, 335, 366, 211, 242, 87, 118, 736, 736, 612, 643, 488, 519, 364, 395, 240, 271, 116, 147, 23, 23, 768, 768, 737, 768, 644, 675, 613, 644, 520, 551, 489, 520, 396, 427, 365, 396, 272, 303, 241, 272, 148, 179, 117, 148, 24, 55, 24, 24, 800, 800, 769, 800, 738, 769, 676, 707, 645, 676, 614, 645, 552, 583, 521, 552, 490, 521, 428, 459, 397, 428, 366, 397, 304, 335, 273, 304, 242, 273, 180, 211, 149, 180, 118, 149, 56, 87, 25, 56, 25, 25, 832, 832, 801, 832, 770, 801, 739, 770, 708, 739, 677, 708, 646, 677, 615, 646, 584, 615, 553, 584, 522, 553, 491, 522, 460, 491, 429, 460, 398, 429, 367, 398, 336, 367, 305, 336, 274, 305, 243, 274, 212, 243, 181, 212, 150, 181, 119, 150, 88, 119, 57, 88, 26, 57, 26, 26, 833, 864, 802, 833, 771, 802, 709, 740, 678, 709, 647, 678, 585, 616, 554, 585, 523, 554, 461, 492, 430, 461, 399, 430, 337, 368, 306, 337, 275, 306, 213, 244, 182, 213, 151, 182, 89, 120, 58, 89, 27, 58, 834, 865, 803, 834, 710, 741, 679, 710, 586, 617, 555, 586, 462, 493, 431, 462, 338, 369, 307, 338, 214, 245, 183, 214, 90, 121, 59, 90, 835, 866, 711, 742, 587, 618, 463, 494, 339, 370, 215, 246, 91, 122, 864, 864, 740, 771, 616, 647, 492, 523, 368, 399, 244, 275, 120, 151, 27, 27, 896, 896, 865, 896, 772, 803, 741, 772, 648, 679, 617, 648, 524, 555, 493, 524, 400, 431, 369, 400, 276, 307, 245, 276, 152, 183, 121, 152, 28, 59, 28, 28, 928, 928, 897, 928, 866, 897, 804, 835, 773, 804, 742, 773, 680, 711, 649, 680, 618, 649, 556, 587, 525, 556, 494, 525, 432, 463, 401, 432, 370, 401, 308, 339, 277, 308, 246, 277, 184, 215, 153, 184, 122, 153, 60, 91, 29, 60, 29, 29, 960, 960, 929, 960, 898, 929, 867, 898, 836, 867, 805, 836, 774, 805, 743, 774, 712, 743, 681, 712, 650, 681, 619, 650, 588, 619, 557, 588, 526, 557, 495, 526, 464, 495, 433, 464, 402, 433, 371, 402, 340, 371, 309, 340, 278, 309, 247, 278, 216, 247, 185, 216, 154, 185, 123, 154, 92, 123, 61, 92, 30, 61, 30, 30, 961, 992, 930, 961, 899, 930, 837, 868, 806, 837, 775, 806, 713, 744, 682, 713, 651, 682, 589, 620, 558, 589, 527, 558, 465, 496, 434, 465, 403, 434, 341, 372, 310, 341, 279, 310, 217, 248, 186, 217, 155, 186, 93, 124, 62, 93, 31, 62, 962, 993, 931, 962, 838, 869, 807, 838, 714, 745, 683, 714, 590, 621, 559, 590, 466, 497, 435, 466, 342, 373, 311, 342, 218, 249, 187, 218, 94, 125, 63, 94, 963, 994, 839, 870, 715, 746, 591, 622, 467, 498, 343, 374, 219, 250, 95, 126, 868, 899, 744, 775, 620, 651, 496, 527, 372, 403, 248, 279, 124, 155, 900, 931, 869, 900, 776, 807, 745, 776, 652, 683, 621, 652, 528, 559, 497, 528, 404, 435, 373, 404, 280, 311, 249, 280, 156, 187, 125, 156, 932, 963, 901, 932, 870, 901, 808, 839, 777, 808, 746, 777, 684, 715, 653, 684, 622, 653, 560, 591, 529, 560, 498, 529, 436, 467, 405, 436, 374, 405, 312, 343, 281, 312, 250, 281, 188, 219, 157, 188, 126, 157, 964, 995, 933, 964, 902, 933, 871, 902, 840, 871, 809, 840, 778, 809, 747, 778, 716, 747, 685, 716, 654, 685, 623, 654, 592, 623, 561, 592, 530, 561, 499, 530, 468, 499, 437, 468, 406, 437, 375, 406, 344, 375, 313, 344, 282, 313, 251, 282, 220, 251, 189, 220, 158, 189, 127, 158, 965, 996, 934, 965, 903, 934, 841, 872, 810, 841, 779, 810, 717, 748, 686, 717, 655, 686, 593, 624, 562, 593, 531, 562, 469, 500, 438, 469, 407, 438, 345, 376, 314, 345, 283, 314, 221, 252, 190, 221, 159, 190, 966, 997, 935, 966, 842, 873, 811, 842, 718, 749, 687, 718, 594, 625, 563, 594, 470, 501, 439, 470, 346, 377, 315, 346, 222, 253, 191, 222, 967, 998, 843, 874, 719, 750, 595, 626, 471, 502, 347, 378, 223, 254, 872, 903, 748, 779, 624, 655, 500, 531, 376, 407, 252, 283, 904, 935, 873, 904, 780, 811, 749, 780, 656, 687, 625, 656, 532, 563, 501, 532, 408, 439, 377, 408, 284, 315, 253, 284, 936, 967, 905, 936, 874, 905, 812, 843, 781, 812, 750, 781, 688, 719, 657, 688, 626, 657, 564, 595, 533, 564, 502, 533, 440, 471, 409, 440, 378, 409, 316, 347, 285, 316, 254, 285, 968, 999, 937, 968, 906, 937, 875, 906, 844, 875, 813, 844, 782, 813, 751, 782, 720, 751, 689, 720, 658, 689, 627, 658, 596, 627, 565, 596, 534, 565, 503, 534, 472, 503, 441, 472, 410, 441, 379, 410, 348, 379, 317, 348, 286, 317, 255, 286, 969, 1000, 938, 969, 907, 938, 845, 876, 814, 845, 783, 814, 721, 752, 690, 721, 659, 690, 597, 628, 566, 597, 535, 566, 473, 504, 442, 473, 411, 442, 349, 380, 318, 349, 287, 318, 970, 1001, 939, 970, 846, 877, 815, 846, 722, 753, 691, 722, 598, 629, 567, 598, 474, 505, 443, 474, 350, 381, 319, 350, 971, 1002, 847, 878, 723, 754, 599, 630, 475, 506, 351, 382, 876, 907, 752, 783, 628, 659, 504, 535, 380, 411, 908, 939, 877, 908, 784, 815, 753, 784, 660, 691, 629, 660, 536, 567, 505, 536, 412, 443, 381, 412, 940, 971, 909, 940, 878, 909, 816, 847, 785, 816, 754, 785, 692, 723, 661, 692, 630, 661, 568, 599, 537, 568, 506, 537, 444, 475, 413, 444, 382, 413, 972, 1003, 941, 972, 910, 941, 879, 910, 848, 879, 817, 848, 786, 817, 755, 786, 724, 755, 693, 724, 662, 693, 631, 662, 600, 631, 569, 600, 538, 569, 507, 538, 476, 507, 445, 476, 414, 445, 383, 414, 973, 1004, 942, 973, 911, 942, 849, 880, 818, 849, 787, 818, 725, 756, 694, 725, 663, 694, 601, 632, 570, 601, 539, 570, 477, 508, 446, 477, 415, 446, 974, 1005, 943, 974, 850, 881, 819, 850, 726, 757, 695, 726, 602, 633, 571, 602, 478, 509, 447, 478, 975, 1006, 851, 882, 727, 758, 603, 634, 479, 510, 880, 911, 756, 787, 632, 663, 508, 539, 912, 943, 881, 912, 788, 819, 757, 788, 664, 695, 633, 664, 540, 571, 509, 540, 944, 975, 913, 944, 882, 913, 820, 851, 789, 820, 758, 789, 696, 727, 665, 696, 634, 665, 572, 603, 541, 572, 510, 541, 976, 1007, 945, 976, 914, 945, 883, 914, 852, 883, 821, 852, 790, 821, 759, 790, 728, 759, 697, 728, 666, 697, 635, 666, 604, 635, 573, 604, 542, 573, 511, 542, 977, 1008, 946, 977, 915, 946, 853, 884, 822, 853, 791, 822, 729, 760, 698, 729, 667, 698, 605, 636, 574, 605, 543, 574, 978, 1009, 947, 978, 854, 885, 823, 854, 730, 761, 699, 730, 606, 637, 575, 606, 979, 1010, 855, 886, 731, 762, 607, 638, 884, 915, 760, 791, 636, 667, 916, 947, 885, 916, 792, 823, 761, 792, 668, 699, 637, 668, 948, 979, 917, 948, 886, 917, 824, 855, 793, 824, 762, 793, 700, 731, 669, 700, 638, 669, 980, 1011, 949, 980, 918, 949, 887, 918, 856, 887, 825, 856, 794, 825, 763, 794, 732, 763, 701, 732, 670, 701, 639, 670, 981, 1012, 950, 981, 919, 950, 857, 888, 826, 857, 795, 826, 733, 764, 702, 733, 671, 702, 982, 1013, 951, 982, 858, 889, 827, 858, 734, 765, 703, 734, 983, 1014, 859, 890, 735, 766, 888, 919, 764, 795, 920, 951, 889, 920, 796, 827, 765, 796, 952, 983, 921, 952, 890, 921, 828, 859, 797, 828, 766, 797, 984, 1015, 953, 984, 922, 953, 891, 922, 860, 891, 829, 860, 798, 829, 767, 798, 985, 1016, 954, 985, 923, 954, 861, 892, 830, 861, 799, 830, 986, 1017, 955, 986, 862, 893, 831, 862, 987, 1018, 863, 894, 892, 923, 924, 955, 893, 924, 956, 987, 925, 956, 894, 925, 988, 1019, 957, 988, 926, 957, 895, 926, 989, 1020, 958, 989, 927, 958, 990, 1021, 959, 990, 991, 1022, 0, 0, }; DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_4x4[16]) = { 0, 2, 5, 8, 1, 3, 9, 12, 4, 7, 11, 14, 6, 10, 13, 15, }; DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_4x4[16]) = { 0, 3, 7, 11, 1, 5, 9, 12, 2, 6, 10, 14, 4, 8, 13, 15, }; DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_4x4[16]) = { 0, 1, 3, 5, 2, 4, 6, 9, 7, 8, 11, 13, 10, 12, 14, 15, }; DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_8x8[64]) = { 0, 3, 8, 15, 22, 32, 40, 47, 1, 5, 11, 18, 26, 34, 44, 51, 2, 7, 13, 20, 28, 38, 46, 54, 4, 10, 16, 24, 31, 41, 50, 56, 6, 12, 21, 27, 35, 43, 52, 58, 9, 17, 25, 33, 39, 48, 55, 60, 14, 23, 30, 37, 45, 53, 59, 62, 19, 29, 36, 42, 49, 57, 61, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_8x8[64]) = { 0, 1, 2, 5, 8, 12, 19, 24, 3, 4, 7, 10, 15, 20, 30, 39, 6, 9, 13, 16, 21, 27, 37, 46, 11, 14, 17, 23, 28, 34, 44, 52, 18, 22, 25, 31, 35, 41, 50, 57, 26, 29, 33, 38, 43, 49, 55, 59, 32, 36, 42, 47, 51, 54, 60, 61, 40, 45, 48, 53, 56, 58, 62, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_8x8[64]) = { 0, 2, 5, 9, 14, 22, 31, 37, 1, 4, 8, 13, 19, 26, 38, 44, 3, 6, 10, 17, 24, 30, 42, 49, 7, 11, 15, 21, 29, 36, 47, 53, 12, 16, 20, 27, 34, 43, 52, 57, 18, 23, 28, 35, 41, 48, 56, 60, 25, 32, 39, 45, 50, 55, 59, 62, 33, 40, 46, 51, 54, 58, 61, 63, }; DECLARE_ALIGNED(16, static const int16_t, vp9_col_iscan_16x16[256]) = { 0, 4, 11, 20, 31, 43, 59, 75, 85, 109, 130, 150, 165, 181, 195, 198, 1, 6, 14, 23, 34, 47, 64, 81, 95, 114, 135, 153, 171, 188, 201, 212, 2, 8, 16, 25, 38, 52, 67, 83, 101, 116, 136, 157, 172, 190, 205, 216, 3, 10, 18, 29, 41, 55, 71, 89, 103, 119, 141, 159, 176, 194, 208, 218, 5, 12, 21, 32, 45, 58, 74, 93, 104, 123, 144, 164, 179, 196, 210, 223, 7, 15, 26, 37, 49, 63, 78, 96, 112, 129, 146, 166, 182, 200, 215, 228, 9, 19, 28, 39, 54, 69, 86, 102, 117, 132, 151, 170, 187, 206, 220, 230, 13, 24, 35, 46, 60, 73, 91, 108, 122, 137, 154, 174, 189, 207, 224, 235, 17, 30, 40, 53, 66, 82, 98, 115, 126, 142, 161, 180, 197, 213, 227, 237, 22, 36, 48, 62, 76, 92, 105, 120, 133, 147, 167, 186, 203, 219, 232, 240, 27, 44, 56, 70, 84, 99, 113, 127, 140, 156, 175, 193, 209, 226, 236, 244, 33, 51, 68, 79, 94, 110, 125, 138, 149, 162, 184, 202, 217, 229, 241, 247, 42, 61, 77, 90, 106, 121, 134, 148, 160, 173, 191, 211, 225, 238, 245, 251, 50, 72, 87, 100, 118, 128, 145, 158, 168, 183, 204, 222, 233, 242, 249, 253, 57, 80, 97, 111, 131, 143, 155, 169, 178, 192, 214, 231, 239, 246, 250, 254, 65, 88, 107, 124, 139, 152, 163, 177, 185, 199, 221, 234, 243, 248, 252, 255, }; DECLARE_ALIGNED(16, static const int16_t, vp9_row_iscan_16x16[256]) = { 0, 1, 2, 4, 6, 9, 12, 17, 22, 29, 36, 43, 54, 64, 76, 86, 3, 5, 7, 11, 15, 19, 25, 32, 38, 48, 59, 68, 84, 99, 115, 130, 8, 10, 13, 18, 23, 27, 33, 42, 51, 60, 72, 88, 103, 119, 142, 167, 14, 16, 20, 26, 31, 37, 44, 53, 61, 73, 85, 100, 116, 135, 161, 185, 21, 24, 30, 35, 40, 47, 55, 65, 74, 81, 94, 112, 133, 154, 179, 205, 28, 34, 39, 45, 50, 58, 67, 77, 87, 96, 106, 121, 146, 169, 196, 212, 41, 46, 49, 56, 63, 70, 79, 90, 98, 107, 122, 138, 159, 182, 207, 222, 52, 57, 62, 69, 75, 83, 93, 102, 110, 120, 134, 150, 176, 195, 215, 226, 66, 71, 78, 82, 91, 97, 108, 113, 127, 136, 148, 168, 188, 202, 221, 232, 80, 89, 92, 101, 105, 114, 125, 131, 139, 151, 162, 177, 192, 208, 223, 234, 95, 104, 109, 117, 123, 128, 143, 144, 155, 165, 175, 190, 206, 219, 233, 239, 111, 118, 124, 129, 140, 147, 157, 164, 170, 181, 191, 203, 224, 230, 240, 243, 126, 132, 137, 145, 153, 160, 174, 178, 184, 197, 204, 216, 231, 237, 244, 246, 141, 149, 156, 166, 172, 180, 189, 199, 200, 210, 220, 228, 238, 242, 249, 251, 152, 163, 171, 183, 186, 193, 201, 211, 214, 218, 227, 236, 245, 247, 252, 253, 158, 173, 187, 194, 198, 209, 213, 217, 225, 229, 235, 241, 248, 250, 254, 255, }; DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_16x16[256]) = { 0, 2, 5, 9, 17, 24, 36, 44, 55, 72, 88, 104, 128, 143, 166, 179, 1, 4, 8, 13, 20, 30, 40, 54, 66, 79, 96, 113, 141, 154, 178, 196, 3, 7, 11, 18, 25, 33, 46, 57, 71, 86, 101, 119, 148, 164, 186, 201, 6, 12, 16, 23, 31, 39, 53, 64, 78, 92, 110, 127, 153, 169, 193, 208, 10, 14, 19, 28, 37, 47, 58, 67, 84, 98, 114, 133, 161, 176, 198, 214, 15, 21, 26, 34, 43, 52, 65, 77, 91, 106, 120, 140, 165, 185, 205, 221, 22, 27, 32, 41, 48, 60, 73, 85, 99, 116, 130, 151, 175, 190, 211, 225, 29, 35, 42, 49, 59, 69, 81, 95, 108, 125, 139, 155, 182, 197, 217, 229, 38, 45, 51, 61, 68, 80, 93, 105, 118, 134, 150, 168, 191, 207, 223, 234, 50, 56, 63, 74, 83, 94, 109, 117, 129, 147, 163, 177, 199, 213, 228, 238, 62, 70, 76, 87, 97, 107, 122, 131, 145, 159, 172, 188, 210, 222, 235, 242, 75, 82, 90, 102, 112, 124, 138, 146, 157, 173, 187, 202, 219, 230, 240, 245, 89, 100, 111, 123, 132, 142, 156, 167, 180, 189, 203, 216, 231, 237, 246, 250, 103, 115, 126, 136, 149, 162, 171, 183, 194, 204, 215, 224, 236, 241, 248, 252, 121, 135, 144, 158, 170, 181, 192, 200, 209, 218, 227, 233, 243, 244, 251, 254, 137, 152, 160, 174, 184, 195, 206, 212, 220, 226, 232, 239, 247, 249, 253, 255, }; DECLARE_ALIGNED(16, static const int16_t, vp9_default_iscan_32x32[1024]) = { 0, 2, 5, 10, 17, 25, 38, 47, 62, 83, 101, 121, 145, 170, 193, 204, 210, 219, 229, 233, 245, 257, 275, 299, 342, 356, 377, 405, 455, 471, 495, 527, 1, 4, 8, 15, 22, 30, 45, 58, 74, 92, 112, 133, 158, 184, 203, 215, 222, 228, 234, 237, 256, 274, 298, 317, 355, 376, 404, 426, 470, 494, 526, 551, 3, 7, 12, 18, 28, 36, 52, 64, 82, 102, 118, 142, 164, 189, 208, 217, 224, 231, 235, 238, 273, 297, 316, 329, 375, 403, 425, 440, 493, 525, 550, 567, 6, 11, 16, 23, 31, 43, 60, 73, 90, 109, 126, 150, 173, 196, 211, 220, 226, 232, 236, 239, 296, 315, 328, 335, 402, 424, 439, 447, 524, 549, 566, 575, 9, 14, 19, 29, 37, 50, 65, 78, 95, 116, 134, 157, 179, 201, 214, 223, 244, 255, 272, 295, 341, 354, 374, 401, 454, 469, 492, 523, 582, 596, 617, 645, 13, 20, 26, 35, 44, 54, 72, 85, 105, 123, 140, 163, 182, 205, 216, 225, 254, 271, 294, 314, 353, 373, 400, 423, 468, 491, 522, 548, 595, 616, 644, 666, 21, 27, 33, 42, 53, 63, 80, 94, 113, 132, 151, 172, 190, 209, 218, 227, 270, 293, 313, 327, 372, 399, 422, 438, 490, 521, 547, 565, 615, 643, 665, 680, 24, 32, 39, 48, 57, 71, 88, 104, 120, 139, 159, 178, 197, 212, 221, 230, 292, 312, 326, 334, 398, 421, 437, 446, 520, 546, 564, 574, 642, 664, 679, 687, 34, 40, 46, 56, 68, 81, 96, 111, 130, 147, 167, 186, 243, 253, 269, 291, 340, 352, 371, 397, 453, 467, 489, 519, 581, 594, 614, 641, 693, 705, 723, 747, 41, 49, 55, 67, 77, 91, 107, 124, 138, 161, 177, 194, 252, 268, 290, 311, 351, 370, 396, 420, 466, 488, 518, 545, 593, 613, 640, 663, 704, 722, 746, 765, 51, 59, 66, 76, 89, 99, 119, 131, 149, 168, 181, 200, 267, 289, 310, 325, 369, 395, 419, 436, 487, 517, 544, 563, 612, 639, 662, 678, 721, 745, 764, 777, 61, 69, 75, 87, 100, 114, 129, 144, 162, 180, 191, 207, 288, 309, 324, 333, 394, 418, 435, 445, 516, 543, 562, 573, 638, 661, 677, 686, 744, 763, 776, 783, 70, 79, 86, 97, 108, 122, 137, 155, 242, 251, 266, 287, 339, 350, 368, 393, 452, 465, 486, 515, 580, 592, 611, 637, 692, 703, 720, 743, 788, 798, 813, 833, 84, 93, 103, 110, 125, 141, 154, 171, 250, 265, 286, 308, 349, 367, 392, 417, 464, 485, 514, 542, 591, 610, 636, 660, 702, 719, 742, 762, 797, 812, 832, 848, 98, 106, 115, 127, 143, 156, 169, 185, 264, 285, 307, 323, 366, 391, 416, 434, 484, 513, 541, 561, 609, 635, 659, 676, 718, 741, 761, 775, 811, 831, 847, 858, 117, 128, 136, 148, 160, 175, 188, 198, 284, 306, 322, 332, 390, 415, 433, 444, 512, 540, 560, 572, 634, 658, 675, 685, 740, 760, 774, 782, 830, 846, 857, 863, 135, 146, 152, 165, 241, 249, 263, 283, 338, 348, 365, 389, 451, 463, 483, 511, 579, 590, 608, 633, 691, 701, 717, 739, 787, 796, 810, 829, 867, 875, 887, 903, 153, 166, 174, 183, 248, 262, 282, 305, 347, 364, 388, 414, 462, 482, 510, 539, 589, 607, 632, 657, 700, 716, 738, 759, 795, 809, 828, 845, 874, 886, 902, 915, 176, 187, 195, 202, 261, 281, 304, 321, 363, 387, 413, 432, 481, 509, 538, 559, 606, 631, 656, 674, 715, 737, 758, 773, 808, 827, 844, 856, 885, 901, 914, 923, 192, 199, 206, 213, 280, 303, 320, 331, 386, 412, 431, 443, 508, 537, 558, 571, 630, 655, 673, 684, 736, 757, 772, 781, 826, 843, 855, 862, 900, 913, 922, 927, 240, 247, 260, 279, 337, 346, 362, 385, 450, 461, 480, 507, 578, 588, 605, 629, 690, 699, 714, 735, 786, 794, 807, 825, 866, 873, 884, 899, 930, 936, 945, 957, 246, 259, 278, 302, 345, 361, 384, 411, 460, 479, 506, 536, 587, 604, 628, 654, 698, 713, 734, 756, 793, 806, 824, 842, 872, 883, 898, 912, 935, 944, 956, 966, 258, 277, 301, 319, 360, 383, 410, 430, 478, 505, 535, 557, 603, 627, 653, 672, 712, 733, 755, 771, 805, 823, 841, 854, 882, 897, 911, 921, 943, 955, 965, 972, 276, 300, 318, 330, 382, 409, 429, 442, 504, 534, 556, 570, 626, 652, 671, 683, 732, 754, 770, 780, 822, 840, 853, 861, 896, 910, 920, 926, 954, 964, 971, 975, 336, 344, 359, 381, 449, 459, 477, 503, 577, 586, 602, 625, 689, 697, 711, 731, 785, 792, 804, 821, 865, 871, 881, 895, 929, 934, 942, 953, 977, 981, 987, 995, 343, 358, 380, 408, 458, 476, 502, 533, 585, 601, 624, 651, 696, 710, 730, 753, 791, 803, 820, 839, 870, 880, 894, 909, 933, 941, 952, 963, 980, 986, 994, 1001, 357, 379, 407, 428, 475, 501, 532, 555, 600, 623, 650, 670, 709, 729, 752, 769, 802, 819, 838, 852, 879, 893, 908, 919, 940, 951, 962, 970, 985, 993, 1000, 1005, 378, 406, 427, 441, 500, 531, 554, 569, 622, 649, 669, 682, 728, 751, 768, 779, 818, 837, 851, 860, 892, 907, 918, 925, 950, 961, 969, 974, 992, 999, 1004, 1007, 448, 457, 474, 499, 576, 584, 599, 621, 688, 695, 708, 727, 784, 790, 801, 817, 864, 869, 878, 891, 928, 932, 939, 949, 976, 979, 984, 991, 1008, 1010, 1013, 1017, 456, 473, 498, 530, 583, 598, 620, 648, 694, 707, 726, 750, 789, 800, 816, 836, 868, 877, 890, 906, 931, 938, 948, 960, 978, 983, 990, 998, 1009, 1012, 1016, 1020, 472, 497, 529, 553, 597, 619, 647, 668, 706, 725, 749, 767, 799, 815, 835, 850, 876, 889, 905, 917, 937, 947, 959, 968, 982, 989, 997, 1003, 1011, 1015, 1019, 1022, 496, 528, 552, 568, 618, 646, 667, 681, 724, 748, 766, 778, 814, 834, 849, 859, 888, 904, 916, 924, 946, 958, 967, 973, 988, 996, 1002, 1006, 1014, 1018, 1021, 1023, }; const scan_order vp9_default_scan_orders[TX_SIZES] = { { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors }, { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors }, { default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors }, { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors }, }; const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES] = { { // TX_4X4 { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors }, { row_scan_4x4, vp9_row_iscan_4x4, row_scan_4x4_neighbors }, { col_scan_4x4, vp9_col_iscan_4x4, col_scan_4x4_neighbors }, { default_scan_4x4, vp9_default_iscan_4x4, default_scan_4x4_neighbors } }, { // TX_8X8 { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors }, { row_scan_8x8, vp9_row_iscan_8x8, row_scan_8x8_neighbors }, { col_scan_8x8, vp9_col_iscan_8x8, col_scan_8x8_neighbors }, { default_scan_8x8, vp9_default_iscan_8x8, default_scan_8x8_neighbors } }, { // TX_16X16 { default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors }, { row_scan_16x16, vp9_row_iscan_16x16, row_scan_16x16_neighbors }, { col_scan_16x16, vp9_col_iscan_16x16, col_scan_16x16_neighbors }, { default_scan_16x16, vp9_default_iscan_16x16, default_scan_16x16_neighbors } }, { // TX_32X32 { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors }, { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors }, { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors }, { default_scan_32x32, vp9_default_iscan_32x32, default_scan_32x32_neighbors } } }; libvpx-1.8.2/vp9/common/vp9_scan.h000066400000000000000000000032671357355204000167430ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_SCAN_H_ #define VPX_VP9_COMMON_VP9_SCAN_H_ #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_enums.h" #include "vp9/common/vp9_blockd.h" #ifdef __cplusplus extern "C" { #endif #define MAX_NEIGHBORS 2 typedef struct { const int16_t *scan; const int16_t *iscan; const int16_t *neighbors; } scan_order; extern const scan_order vp9_default_scan_orders[TX_SIZES]; extern const scan_order vp9_scan_orders[TX_SIZES][TX_TYPES]; static INLINE int get_coef_context(const int16_t *neighbors, const uint8_t *token_cache, int c) { return (1 + token_cache[neighbors[MAX_NEIGHBORS * c + 0]] + token_cache[neighbors[MAX_NEIGHBORS * c + 1]]) >> 1; } static INLINE const scan_order *get_scan(const MACROBLOCKD *xd, TX_SIZE tx_size, PLANE_TYPE type, int block_idx) { const MODE_INFO *const mi = xd->mi[0]; if (is_inter_block(mi) || type != PLANE_TYPE_Y || xd->lossless) { return &vp9_default_scan_orders[tx_size]; } else { const PREDICTION_MODE mode = get_y_mode(mi, block_idx); return &vp9_scan_orders[tx_size][intra_mode_to_tx_type_lookup[mode]]; } } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_SCAN_H_ libvpx-1.8.2/vp9/common/vp9_seg_common.c000066400000000000000000000042001357355204000201240ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_quant_common.h" static const int seg_feature_data_signed[SEG_LVL_MAX] = { 1, 1, 0, 0 }; static const int seg_feature_data_max[SEG_LVL_MAX] = { MAXQ, MAX_LOOP_FILTER, 3, 0 }; // These functions provide access to new segment level features. // Eventually these function may be "optimized out" but for the moment, // the coding mechanism is still subject to change so these provide a // convenient single point of change. void vp9_clearall_segfeatures(struct segmentation *seg) { vp9_zero(seg->feature_data); vp9_zero(seg->feature_mask); seg->aq_av_offset = 0; } void vp9_enable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { seg->feature_mask[segment_id] |= 1 << feature_id; } int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id) { return seg_feature_data_max[feature_id]; } int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id) { return seg_feature_data_signed[feature_id]; } void vp9_set_segdata(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id, int seg_data) { assert(seg_data <= seg_feature_data_max[feature_id]); if (seg_data < 0) { assert(seg_feature_data_signed[feature_id]); assert(-seg_data <= seg_feature_data_max[feature_id]); } seg->feature_data[segment_id][feature_id] = seg_data; } const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)] = { 2, 4, 6, 8, 10, 12, 0, -1, -2, -3, -4, -5, -6, -7 }; // TBD? Functions to read and write segment data with range / validity checking libvpx-1.8.2/vp9/common/vp9_seg_common.h000066400000000000000000000046421357355204000201430ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_SEG_COMMON_H_ #define VPX_VP9_COMMON_VP9_SEG_COMMON_H_ #include "vpx_dsp/prob.h" #ifdef __cplusplus extern "C" { #endif #define SEGMENT_DELTADATA 0 #define SEGMENT_ABSDATA 1 #define MAX_SEGMENTS 8 #define SEG_TREE_PROBS (MAX_SEGMENTS - 1) #define PREDICTION_PROBS 3 // Segment level features. typedef enum { SEG_LVL_ALT_Q = 0, // Use alternate Quantizer .... SEG_LVL_ALT_LF = 1, // Use alternate loop filter value... SEG_LVL_REF_FRAME = 2, // Optional Segment reference frame SEG_LVL_SKIP = 3, // Optional Segment (0,0) + skip mode SEG_LVL_MAX = 4 // Number of features supported } SEG_LVL_FEATURES; struct segmentation { uint8_t enabled; uint8_t update_map; uint8_t update_data; uint8_t abs_delta; uint8_t temporal_update; vpx_prob tree_probs[SEG_TREE_PROBS]; vpx_prob pred_probs[PREDICTION_PROBS]; int16_t feature_data[MAX_SEGMENTS][SEG_LVL_MAX]; uint32_t feature_mask[MAX_SEGMENTS]; int aq_av_offset; }; static INLINE int segfeature_active(const struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { return seg->enabled && (seg->feature_mask[segment_id] & (1 << feature_id)); } void vp9_clearall_segfeatures(struct segmentation *seg); void vp9_enable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id); int vp9_seg_feature_data_max(SEG_LVL_FEATURES feature_id); int vp9_is_segfeature_signed(SEG_LVL_FEATURES feature_id); void vp9_set_segdata(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id, int seg_data); static INLINE int get_segdata(const struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { return seg->feature_data[segment_id][feature_id]; } extern const vpx_tree_index vp9_segment_tree[TREE_SIZE(MAX_SEGMENTS)]; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_SEG_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_thread_common.c000066400000000000000000000467241357355204000206360ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_loopfilter.h" #if CONFIG_MULTITHREAD static INLINE void mutex_lock(pthread_mutex_t *const mutex) { const int kMaxTryLocks = 4000; int locked = 0; int i; for (i = 0; i < kMaxTryLocks; ++i) { if (!pthread_mutex_trylock(mutex)) { locked = 1; break; } } if (!locked) pthread_mutex_lock(mutex); } #endif // CONFIG_MULTITHREAD static INLINE void sync_read(VP9LfSync *const lf_sync, int r, int c) { #if CONFIG_MULTITHREAD const int nsync = lf_sync->sync_range; if (r && !(c & (nsync - 1))) { pthread_mutex_t *const mutex = &lf_sync->mutex[r - 1]; mutex_lock(mutex); while (c > lf_sync->cur_sb_col[r - 1] - nsync) { pthread_cond_wait(&lf_sync->cond[r - 1], mutex); } pthread_mutex_unlock(mutex); } #else (void)lf_sync; (void)r; (void)c; #endif // CONFIG_MULTITHREAD } static INLINE void sync_write(VP9LfSync *const lf_sync, int r, int c, const int sb_cols) { #if CONFIG_MULTITHREAD const int nsync = lf_sync->sync_range; int cur; // Only signal when there are enough filtered SB for next row to run. int sig = 1; if (c < sb_cols - 1) { cur = c; if (c % nsync) sig = 0; } else { cur = sb_cols + nsync; } if (sig) { mutex_lock(&lf_sync->mutex[r]); lf_sync->cur_sb_col[r] = cur; pthread_cond_signal(&lf_sync->cond[r]); pthread_mutex_unlock(&lf_sync->mutex[r]); } #else (void)lf_sync; (void)r; (void)c; (void)sb_cols; #endif // CONFIG_MULTITHREAD } // Implement row loopfiltering for each thread. static INLINE void thread_loop_filter_rows( const YV12_BUFFER_CONFIG *const frame_buffer, VP9_COMMON *const cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VP9LfSync *const lf_sync) { const int num_planes = y_only ? 1 : MAX_MB_PLANE; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; const int num_active_workers = lf_sync->num_active_workers; int mi_row, mi_col; enum lf_path path; if (y_only) path = LF_PATH_444; else if (planes[1].subsampling_y == 1 && planes[1].subsampling_x == 1) path = LF_PATH_420; else if (planes[1].subsampling_y == 0 && planes[1].subsampling_x == 0) path = LF_PATH_444; else path = LF_PATH_SLOW; assert(num_active_workers > 0); for (mi_row = start; mi_row < stop; mi_row += num_active_workers * MI_BLOCK_SIZE) { MODE_INFO **const mi = cm->mi_grid_visible + mi_row * cm->mi_stride; LOOP_FILTER_MASK *lfm = get_lfm(&cm->lf, mi_row, 0); for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE, ++lfm) { const int r = mi_row >> MI_BLOCK_SIZE_LOG2; const int c = mi_col >> MI_BLOCK_SIZE_LOG2; int plane; sync_read(lf_sync, r, c); vp9_setup_dst_planes(planes, frame_buffer, mi_row, mi_col); vp9_adjust_mask(cm, mi_row, mi_col, lfm); vp9_filter_block_plane_ss00(cm, &planes[0], mi_row, lfm); for (plane = 1; plane < num_planes; ++plane) { switch (path) { case LF_PATH_420: vp9_filter_block_plane_ss11(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_444: vp9_filter_block_plane_ss00(cm, &planes[plane], mi_row, lfm); break; case LF_PATH_SLOW: vp9_filter_block_plane_non420(cm, &planes[plane], mi + mi_col, mi_row, mi_col); break; } } sync_write(lf_sync, r, c, sb_cols); } } } // Row-based multi-threaded loopfilter hook static int loop_filter_row_worker(void *arg1, void *arg2) { VP9LfSync *const lf_sync = (VP9LfSync *)arg1; LFWorkerData *const lf_data = (LFWorkerData *)arg2; thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only, lf_sync); return 1; } static void loop_filter_rows_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int start, int stop, int y_only, VPxWorker *workers, int nworkers, VP9LfSync *lf_sync) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); // Number of superblock rows and cols const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int num_tile_cols = 1 << cm->log2_tile_cols; // Limit the number of workers to prevent changes in frame dimensions from // causing incorrect sync calculations when sb_rows < threads/tile_cols. // Further restrict them by the number of tile columns should the user // request more as this implementation doesn't scale well beyond that. const int num_workers = VPXMIN(nworkers, VPXMIN(num_tile_cols, sb_rows)); int i; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || num_workers > lf_sync->num_workers) { vp9_loop_filter_dealloc(lf_sync); vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } lf_sync->num_active_workers = num_workers; // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); // Set up loopfilter thread data. // The decoder is capping num_workers because it has been observed that using // more threads on the loopfilter than there are cores will hurt performance // on Android. This is because the system will only schedule the tile decode // workers on cores equal to the number of tile columns. Then if the decoder // tries to use more threads for the loopfilter, it will hurt performance // because of contention. If the multithreading code changes in the future // then the number of workers used by the loopfilter should be revisited. for (i = 0; i < num_workers; ++i) { VPxWorker *const worker = &workers[i]; LFWorkerData *const lf_data = &lf_sync->lfdata[i]; worker->hook = loop_filter_row_worker; worker->data1 = lf_sync; worker->data2 = lf_data; // Loopfilter data vp9_loop_filter_data_reset(lf_data, frame, cm, planes); lf_data->start = start + i * MI_BLOCK_SIZE; lf_data->stop = stop; lf_data->y_only = y_only; // Start loopfiltering if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } // Wait till all rows are finished for (i = 0; i < num_workers; ++i) { winterface->sync(&workers[i]); } } void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, VP9_COMMON *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int frame_filter_level, int y_only, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync) { int start_mi_row, end_mi_row, mi_rows_to_filter; if (!frame_filter_level) return; start_mi_row = 0; mi_rows_to_filter = cm->mi_rows; if (partial_frame && cm->mi_rows > 8) { start_mi_row = cm->mi_rows >> 1; start_mi_row &= 0xfffffff8; mi_rows_to_filter = VPXMAX(cm->mi_rows / 8, 8); } end_mi_row = start_mi_row + mi_rows_to_filter; vp9_loop_filter_frame_init(cm, frame_filter_level); loop_filter_rows_mt(frame, cm, planes, start_mi_row, end_mi_row, y_only, workers, num_workers, lf_sync); } void vp9_lpf_mt_init(VP9LfSync *lf_sync, VP9_COMMON *cm, int frame_filter_level, int num_workers) { const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; if (!frame_filter_level) return; if (!lf_sync->sync_range || sb_rows != lf_sync->rows || num_workers > lf_sync->num_workers) { vp9_loop_filter_dealloc(lf_sync); vp9_loop_filter_alloc(lf_sync, cm, sb_rows, cm->width, num_workers); } // Initialize cur_sb_col to -1 for all SB rows. memset(lf_sync->cur_sb_col, -1, sizeof(*lf_sync->cur_sb_col) * sb_rows); lf_sync->corrupted = 0; memset(lf_sync->num_tiles_done, 0, sizeof(*lf_sync->num_tiles_done) * sb_rows); cm->lf_row = 0; } // Set up nsync by width. static INLINE int get_sync_range(int width) { // nsync numbers are picked by testing. For example, for 4k // video, using 4 gives best performance. if (width < 640) return 1; else if (width <= 1280) return 2; else if (width <= 4096) return 4; else return 8; } // Allocate memory for lf row synchronization void vp9_loop_filter_alloc(VP9LfSync *lf_sync, VP9_COMMON *cm, int rows, int width, int num_workers) { lf_sync->rows = rows; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR(cm, lf_sync->mutex, vpx_malloc(sizeof(*lf_sync->mutex) * rows)); if (lf_sync->mutex) { for (i = 0; i < rows; ++i) { pthread_mutex_init(&lf_sync->mutex[i], NULL); } } CHECK_MEM_ERROR(cm, lf_sync->cond, vpx_malloc(sizeof(*lf_sync->cond) * rows)); if (lf_sync->cond) { for (i = 0; i < rows; ++i) { pthread_cond_init(&lf_sync->cond[i], NULL); } } CHECK_MEM_ERROR(cm, lf_sync->lf_mutex, vpx_malloc(sizeof(*lf_sync->lf_mutex))); pthread_mutex_init(lf_sync->lf_mutex, NULL); CHECK_MEM_ERROR(cm, lf_sync->recon_done_mutex, vpx_malloc(sizeof(*lf_sync->recon_done_mutex) * rows)); if (lf_sync->recon_done_mutex) { int i; for (i = 0; i < rows; ++i) { pthread_mutex_init(&lf_sync->recon_done_mutex[i], NULL); } } CHECK_MEM_ERROR(cm, lf_sync->recon_done_cond, vpx_malloc(sizeof(*lf_sync->recon_done_cond) * rows)); if (lf_sync->recon_done_cond) { int i; for (i = 0; i < rows; ++i) { pthread_cond_init(&lf_sync->recon_done_cond[i], NULL); } } } #endif // CONFIG_MULTITHREAD CHECK_MEM_ERROR(cm, lf_sync->lfdata, vpx_malloc(num_workers * sizeof(*lf_sync->lfdata))); lf_sync->num_workers = num_workers; lf_sync->num_active_workers = lf_sync->num_workers; CHECK_MEM_ERROR(cm, lf_sync->cur_sb_col, vpx_malloc(sizeof(*lf_sync->cur_sb_col) * rows)); CHECK_MEM_ERROR(cm, lf_sync->num_tiles_done, vpx_malloc(sizeof(*lf_sync->num_tiles_done) * mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2)); // Set up nsync. lf_sync->sync_range = get_sync_range(width); } // Deallocate lf synchronization related mutex and data void vp9_loop_filter_dealloc(VP9LfSync *lf_sync) { assert(lf_sync != NULL); #if CONFIG_MULTITHREAD if (lf_sync->mutex != NULL) { int i; for (i = 0; i < lf_sync->rows; ++i) { pthread_mutex_destroy(&lf_sync->mutex[i]); } vpx_free(lf_sync->mutex); } if (lf_sync->cond != NULL) { int i; for (i = 0; i < lf_sync->rows; ++i) { pthread_cond_destroy(&lf_sync->cond[i]); } vpx_free(lf_sync->cond); } if (lf_sync->recon_done_mutex != NULL) { int i; for (i = 0; i < lf_sync->rows; ++i) { pthread_mutex_destroy(&lf_sync->recon_done_mutex[i]); } vpx_free(lf_sync->recon_done_mutex); } if (lf_sync->lf_mutex != NULL) { pthread_mutex_destroy(lf_sync->lf_mutex); vpx_free(lf_sync->lf_mutex); } if (lf_sync->recon_done_cond != NULL) { int i; for (i = 0; i < lf_sync->rows; ++i) { pthread_cond_destroy(&lf_sync->recon_done_cond[i]); } vpx_free(lf_sync->recon_done_cond); } #endif // CONFIG_MULTITHREAD vpx_free(lf_sync->lfdata); vpx_free(lf_sync->cur_sb_col); vpx_free(lf_sync->num_tiles_done); // clear the structure as the source of this call may be a resize in which // case this call will be followed by an _alloc() which may fail. vp9_zero(*lf_sync); } static int get_next_row(VP9_COMMON *cm, VP9LfSync *lf_sync) { int return_val = -1; int cur_row; const int max_rows = cm->mi_rows; #if CONFIG_MULTITHREAD const int tile_cols = 1 << cm->log2_tile_cols; pthread_mutex_lock(lf_sync->lf_mutex); if (cm->lf_row < max_rows) { cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; return_val = cm->lf_row; cm->lf_row += MI_BLOCK_SIZE; if (cm->lf_row < max_rows) { /* If this is not the last row, make sure the next row is also decoded. * This is because the intra predict has to happen before loop filter */ cur_row += 1; } } pthread_mutex_unlock(lf_sync->lf_mutex); if (return_val == -1) return return_val; pthread_mutex_lock(&lf_sync->recon_done_mutex[cur_row]); if (lf_sync->num_tiles_done[cur_row] < tile_cols) { pthread_cond_wait(&lf_sync->recon_done_cond[cur_row], &lf_sync->recon_done_mutex[cur_row]); } pthread_mutex_unlock(&lf_sync->recon_done_mutex[cur_row]); pthread_mutex_lock(lf_sync->lf_mutex); if (lf_sync->corrupted) { int row = return_val >> MI_BLOCK_SIZE_LOG2; pthread_mutex_lock(&lf_sync->mutex[row]); lf_sync->cur_sb_col[row] = INT_MAX; pthread_cond_signal(&lf_sync->cond[row]); pthread_mutex_unlock(&lf_sync->mutex[row]); return_val = -1; } pthread_mutex_unlock(lf_sync->lf_mutex); #else (void)lf_sync; if (cm->lf_row < max_rows) { cur_row = cm->lf_row >> MI_BLOCK_SIZE_LOG2; return_val = cm->lf_row; cm->lf_row += MI_BLOCK_SIZE; if (cm->lf_row < max_rows) { /* If this is not the last row, make sure the next row is also decoded. * This is because the intra predict has to happen before loop filter */ cur_row += 1; } } #endif // CONFIG_MULTITHREAD return return_val; } void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync) { int mi_row; VP9_COMMON *cm = lf_data->cm; while ((mi_row = get_next_row(cm, lf_sync)) != -1 && mi_row < cm->mi_rows) { lf_data->start = mi_row; lf_data->stop = mi_row + MI_BLOCK_SIZE; thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only, lf_sync); } } void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, int corrupted) { #if CONFIG_MULTITHREAD pthread_mutex_lock(lf_sync->lf_mutex); lf_sync->corrupted |= corrupted; pthread_mutex_unlock(lf_sync->lf_mutex); pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); lf_sync->num_tiles_done[row] += 1; if (num_tiles == lf_sync->num_tiles_done[row]) { if (is_last_row) { /* The last 2 rows wait on the last row to be done. * So, we have to broadcast the signal in this case. */ pthread_cond_broadcast(&lf_sync->recon_done_cond[row]); } else { pthread_cond_signal(&lf_sync->recon_done_cond[row]); } } pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); #else (void)lf_sync; (void)num_tiles; (void)row; (void)is_last_row; (void)corrupted; #endif // CONFIG_MULTITHREAD } void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync) { thread_loop_filter_rows(lf_data->frame_buffer, lf_data->cm, lf_data->planes, lf_data->start, lf_data->stop, lf_data->y_only, lf_sync); } // Accumulate frame counts. void vp9_accumulate_frame_counts(FRAME_COUNTS *accum, const FRAME_COUNTS *counts, int is_dec) { int i, j, k, l, m; for (i = 0; i < BLOCK_SIZE_GROUPS; i++) for (j = 0; j < INTRA_MODES; j++) accum->y_mode[i][j] += counts->y_mode[i][j]; for (i = 0; i < INTRA_MODES; i++) for (j = 0; j < INTRA_MODES; j++) accum->uv_mode[i][j] += counts->uv_mode[i][j]; for (i = 0; i < PARTITION_CONTEXTS; i++) for (j = 0; j < PARTITION_TYPES; j++) accum->partition[i][j] += counts->partition[i][j]; if (is_dec) { int n; for (i = 0; i < TX_SIZES; i++) for (j = 0; j < PLANE_TYPES; j++) for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) { accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) accum->coef[i][j][k][l][m][n] += counts->coef[i][j][k][l][m][n]; } } else { for (i = 0; i < TX_SIZES; i++) for (j = 0; j < PLANE_TYPES; j++) for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) accum->eob_branch[i][j][k][l][m] += counts->eob_branch[i][j][k][l][m]; // In the encoder, coef is only updated at frame // level, so not need to accumulate it here. // for (n = 0; n < UNCONSTRAINED_NODES + 1; n++) // accum->coef[i][j][k][l][m][n] += // counts->coef[i][j][k][l][m][n]; } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) for (j = 0; j < SWITCHABLE_FILTERS; j++) accum->switchable_interp[i][j] += counts->switchable_interp[i][j]; for (i = 0; i < INTER_MODE_CONTEXTS; i++) for (j = 0; j < INTER_MODES; j++) accum->inter_mode[i][j] += counts->inter_mode[i][j]; for (i = 0; i < INTRA_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) accum->intra_inter[i][j] += counts->intra_inter[i][j]; for (i = 0; i < COMP_INTER_CONTEXTS; i++) for (j = 0; j < 2; j++) accum->comp_inter[i][j] += counts->comp_inter[i][j]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) for (k = 0; k < 2; k++) accum->single_ref[i][j][k] += counts->single_ref[i][j][k]; for (i = 0; i < REF_CONTEXTS; i++) for (j = 0; j < 2; j++) accum->comp_ref[i][j] += counts->comp_ref[i][j]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { for (j = 0; j < TX_SIZES; j++) accum->tx.p32x32[i][j] += counts->tx.p32x32[i][j]; for (j = 0; j < TX_SIZES - 1; j++) accum->tx.p16x16[i][j] += counts->tx.p16x16[i][j]; for (j = 0; j < TX_SIZES - 2; j++) accum->tx.p8x8[i][j] += counts->tx.p8x8[i][j]; } for (i = 0; i < TX_SIZES; i++) accum->tx.tx_totals[i] += counts->tx.tx_totals[i]; for (i = 0; i < SKIP_CONTEXTS; i++) for (j = 0; j < 2; j++) accum->skip[i][j] += counts->skip[i][j]; for (i = 0; i < MV_JOINTS; i++) accum->mv.joints[i] += counts->mv.joints[i]; for (k = 0; k < 2; k++) { nmv_component_counts *const comps = &accum->mv.comps[k]; const nmv_component_counts *const comps_t = &counts->mv.comps[k]; for (i = 0; i < 2; i++) { comps->sign[i] += comps_t->sign[i]; comps->class0_hp[i] += comps_t->class0_hp[i]; comps->hp[i] += comps_t->hp[i]; } for (i = 0; i < MV_CLASSES; i++) comps->classes[i] += comps_t->classes[i]; for (i = 0; i < CLASS0_SIZE; i++) { comps->class0[i] += comps_t->class0[i]; for (j = 0; j < MV_FP_SIZE; j++) comps->class0_fp[i][j] += comps_t->class0_fp[i][j]; } for (i = 0; i < MV_OFFSET_BITS; i++) for (j = 0; j < 2; j++) comps->bits[i][j] += comps_t->bits[i][j]; for (i = 0; i < MV_FP_SIZE; i++) comps->fp[i] += comps_t->fp[i]; } } libvpx-1.8.2/vp9/common/vp9_thread_common.h000066400000000000000000000054501357355204000206320ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ #define VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ #include "./vpx_config.h" #include "vp9/common/vp9_loopfilter.h" #include "vpx_util/vpx_thread.h" #ifdef __cplusplus extern "C" { #endif struct VP9Common; struct FRAME_COUNTS; // Loopfilter row synchronization typedef struct VP9LfSyncData { #if CONFIG_MULTITHREAD pthread_mutex_t *mutex; pthread_cond_t *cond; #endif // Allocate memory to store the loop-filtered superblock index in each row. int *cur_sb_col; // The optimal sync_range for different resolution and platform should be // determined by testing. Currently, it is chosen to be a power-of-2 number. int sync_range; int rows; // Row-based parallel loopfilter data LFWorkerData *lfdata; int num_workers; // number of allocated workers. int num_active_workers; // number of scheduled workers. #if CONFIG_MULTITHREAD pthread_mutex_t *lf_mutex; pthread_mutex_t *recon_done_mutex; pthread_cond_t *recon_done_cond; #endif int *num_tiles_done; int corrupted; } VP9LfSync; // Allocate memory for loopfilter row synchronization. void vp9_loop_filter_alloc(VP9LfSync *lf_sync, struct VP9Common *cm, int rows, int width, int num_workers); // Deallocate loopfilter synchronization related mutex and data. void vp9_loop_filter_dealloc(VP9LfSync *lf_sync); // Multi-threaded loopfilter that uses the tile threads. void vp9_loop_filter_frame_mt(YV12_BUFFER_CONFIG *frame, struct VP9Common *cm, struct macroblockd_plane planes[MAX_MB_PLANE], int frame_filter_level, int y_only, int partial_frame, VPxWorker *workers, int num_workers, VP9LfSync *lf_sync); // Multi-threaded loopfilter initialisations void vp9_lpf_mt_init(VP9LfSync *lf_sync, struct VP9Common *cm, int frame_filter_level, int num_workers); void vp9_loopfilter_rows(LFWorkerData *lf_data, VP9LfSync *lf_sync); void vp9_set_row(VP9LfSync *lf_sync, int num_tiles, int row, int is_last_row, int corrupted); void vp9_loopfilter_job(LFWorkerData *lf_data, VP9LfSync *lf_sync); void vp9_accumulate_frame_counts(struct FRAME_COUNTS *accum, const struct FRAME_COUNTS *counts, int is_dec); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_THREAD_COMMON_H_ libvpx-1.8.2/vp9/common/vp9_tile_common.c000066400000000000000000000041311357355204000203060ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_tile_common.h" #include "vp9/common/vp9_onyxc_int.h" #include "vpx_dsp/vpx_dsp_common.h" #define MIN_TILE_WIDTH_B64 4 #define MAX_TILE_WIDTH_B64 64 static int get_tile_offset(int idx, int mis, int log2) { const int sb_cols = mi_cols_aligned_to_sb(mis) >> MI_BLOCK_SIZE_LOG2; const int offset = ((idx * sb_cols) >> log2) << MI_BLOCK_SIZE_LOG2; return VPXMIN(offset, mis); } void vp9_tile_set_row(TileInfo *tile, const VP9_COMMON *cm, int row) { tile->mi_row_start = get_tile_offset(row, cm->mi_rows, cm->log2_tile_rows); tile->mi_row_end = get_tile_offset(row + 1, cm->mi_rows, cm->log2_tile_rows); } void vp9_tile_set_col(TileInfo *tile, const VP9_COMMON *cm, int col) { tile->mi_col_start = get_tile_offset(col, cm->mi_cols, cm->log2_tile_cols); tile->mi_col_end = get_tile_offset(col + 1, cm->mi_cols, cm->log2_tile_cols); } void vp9_tile_init(TileInfo *tile, const VP9_COMMON *cm, int row, int col) { vp9_tile_set_row(tile, cm, row); vp9_tile_set_col(tile, cm, col); } static int get_min_log2_tile_cols(const int sb64_cols) { int min_log2 = 0; while ((MAX_TILE_WIDTH_B64 << min_log2) < sb64_cols) ++min_log2; return min_log2; } static int get_max_log2_tile_cols(const int sb64_cols) { int max_log2 = 1; while ((sb64_cols >> max_log2) >= MIN_TILE_WIDTH_B64) ++max_log2; return max_log2 - 1; } void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols, int *max_log2_tile_cols) { const int sb64_cols = mi_cols_aligned_to_sb(mi_cols) >> MI_BLOCK_SIZE_LOG2; *min_log2_tile_cols = get_min_log2_tile_cols(sb64_cols); *max_log2_tile_cols = get_max_log2_tile_cols(sb64_cols); assert(*min_log2_tile_cols <= *max_log2_tile_cols); } libvpx-1.8.2/vp9/common/vp9_tile_common.h000066400000000000000000000023341357355204000203160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_COMMON_VP9_TILE_COMMON_H_ #define VPX_VP9_COMMON_VP9_TILE_COMMON_H_ #ifdef __cplusplus extern "C" { #endif struct VP9Common; typedef struct TileInfo { int mi_row_start, mi_row_end; int mi_col_start, mi_col_end; } TileInfo; // initializes 'tile->mi_(row|col)_(start|end)' for (row, col) based on // 'cm->log2_tile_(rows|cols)' & 'cm->mi_(rows|cols)' void vp9_tile_init(TileInfo *tile, const struct VP9Common *cm, int row, int col); void vp9_tile_set_row(TileInfo *tile, const struct VP9Common *cm, int row); void vp9_tile_set_col(TileInfo *tile, const struct VP9Common *cm, int col); void vp9_get_tile_n_bits(int mi_cols, int *min_log2_tile_cols, int *max_log2_tile_cols); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_COMMON_VP9_TILE_COMMON_H_ libvpx-1.8.2/vp9/common/x86/000077500000000000000000000000001357355204000154655ustar00rootroot00000000000000libvpx-1.8.2/vp9/common/x86/vp9_highbd_iht16x16_add_sse4.c000066400000000000000000000367661357355204000230160ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in, const int c, __m128i *const s) { const __m128i pair_c = pair_set_epi32(4 * c, 0); __m128i x[2]; extend_64bit(in, x); s[0] = _mm_mul_epi32(pair_c, x[0]); s[1] = _mm_mul_epi32(pair_c, x[1]); } static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0, const __m128i in1, const int c0, const int c1, __m128i *const s0, __m128i *const s1) { const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); __m128i t00[2], t01[2], t10[2], t11[2]; __m128i x0[2], x1[2]; extend_64bit(in0, x0); extend_64bit(in1, x1); t00[0] = _mm_mul_epi32(pair_c0, x0[0]); t00[1] = _mm_mul_epi32(pair_c0, x0[1]); t01[0] = _mm_mul_epi32(pair_c0, x1[0]); t01[1] = _mm_mul_epi32(pair_c0, x1[1]); t10[0] = _mm_mul_epi32(pair_c1, x0[0]); t10[1] = _mm_mul_epi32(pair_c1, x0[1]); t11[0] = _mm_mul_epi32(pair_c1, x1[0]); t11[1] = _mm_mul_epi32(pair_c1, x1[1]); s0[0] = _mm_add_epi64(t00[0], t11[0]); s0[1] = _mm_add_epi64(t00[1], t11[1]); s1[0] = _mm_sub_epi64(t10[0], t01[0]); s1[1] = _mm_sub_epi64(t10[1], t01[1]); } static void highbd_iadst16_4col_sse4_1(__m128i *const io /*io[16]*/) { __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2], s8[2], s9[2], s10[2], s11[2], s12[2], s13[2], s14[2], s15[2]; __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2], x8[2], x9[2], x10[2], x11[2], x12[2], x13[2], x14[2], x15[2]; // stage 1 highbd_iadst_butterfly_sse4_1(io[15], io[0], cospi_1_64, cospi_31_64, s0, s1); highbd_iadst_butterfly_sse4_1(io[13], io[2], cospi_5_64, cospi_27_64, s2, s3); highbd_iadst_butterfly_sse4_1(io[11], io[4], cospi_9_64, cospi_23_64, s4, s5); highbd_iadst_butterfly_sse4_1(io[9], io[6], cospi_13_64, cospi_19_64, s6, s7); highbd_iadst_butterfly_sse4_1(io[7], io[8], cospi_17_64, cospi_15_64, s8, s9); highbd_iadst_butterfly_sse4_1(io[5], io[10], cospi_21_64, cospi_11_64, s10, s11); highbd_iadst_butterfly_sse4_1(io[3], io[12], cospi_25_64, cospi_7_64, s12, s13); highbd_iadst_butterfly_sse4_1(io[1], io[14], cospi_29_64, cospi_3_64, s14, s15); x0[0] = _mm_add_epi64(s0[0], s8[0]); x0[1] = _mm_add_epi64(s0[1], s8[1]); x1[0] = _mm_add_epi64(s1[0], s9[0]); x1[1] = _mm_add_epi64(s1[1], s9[1]); x2[0] = _mm_add_epi64(s2[0], s10[0]); x2[1] = _mm_add_epi64(s2[1], s10[1]); x3[0] = _mm_add_epi64(s3[0], s11[0]); x3[1] = _mm_add_epi64(s3[1], s11[1]); x4[0] = _mm_add_epi64(s4[0], s12[0]); x4[1] = _mm_add_epi64(s4[1], s12[1]); x5[0] = _mm_add_epi64(s5[0], s13[0]); x5[1] = _mm_add_epi64(s5[1], s13[1]); x6[0] = _mm_add_epi64(s6[0], s14[0]); x6[1] = _mm_add_epi64(s6[1], s14[1]); x7[0] = _mm_add_epi64(s7[0], s15[0]); x7[1] = _mm_add_epi64(s7[1], s15[1]); x8[0] = _mm_sub_epi64(s0[0], s8[0]); x8[1] = _mm_sub_epi64(s0[1], s8[1]); x9[0] = _mm_sub_epi64(s1[0], s9[0]); x9[1] = _mm_sub_epi64(s1[1], s9[1]); x10[0] = _mm_sub_epi64(s2[0], s10[0]); x10[1] = _mm_sub_epi64(s2[1], s10[1]); x11[0] = _mm_sub_epi64(s3[0], s11[0]); x11[1] = _mm_sub_epi64(s3[1], s11[1]); x12[0] = _mm_sub_epi64(s4[0], s12[0]); x12[1] = _mm_sub_epi64(s4[1], s12[1]); x13[0] = _mm_sub_epi64(s5[0], s13[0]); x13[1] = _mm_sub_epi64(s5[1], s13[1]); x14[0] = _mm_sub_epi64(s6[0], s14[0]); x14[1] = _mm_sub_epi64(s6[1], s14[1]); x15[0] = _mm_sub_epi64(s7[0], s15[0]); x15[1] = _mm_sub_epi64(s7[1], s15[1]); x0[0] = dct_const_round_shift_64bit(x0[0]); x0[1] = dct_const_round_shift_64bit(x0[1]); x1[0] = dct_const_round_shift_64bit(x1[0]); x1[1] = dct_const_round_shift_64bit(x1[1]); x2[0] = dct_const_round_shift_64bit(x2[0]); x2[1] = dct_const_round_shift_64bit(x2[1]); x3[0] = dct_const_round_shift_64bit(x3[0]); x3[1] = dct_const_round_shift_64bit(x3[1]); x4[0] = dct_const_round_shift_64bit(x4[0]); x4[1] = dct_const_round_shift_64bit(x4[1]); x5[0] = dct_const_round_shift_64bit(x5[0]); x5[1] = dct_const_round_shift_64bit(x5[1]); x6[0] = dct_const_round_shift_64bit(x6[0]); x6[1] = dct_const_round_shift_64bit(x6[1]); x7[0] = dct_const_round_shift_64bit(x7[0]); x7[1] = dct_const_round_shift_64bit(x7[1]); x8[0] = dct_const_round_shift_64bit(x8[0]); x8[1] = dct_const_round_shift_64bit(x8[1]); x9[0] = dct_const_round_shift_64bit(x9[0]); x9[1] = dct_const_round_shift_64bit(x9[1]); x10[0] = dct_const_round_shift_64bit(x10[0]); x10[1] = dct_const_round_shift_64bit(x10[1]); x11[0] = dct_const_round_shift_64bit(x11[0]); x11[1] = dct_const_round_shift_64bit(x11[1]); x12[0] = dct_const_round_shift_64bit(x12[0]); x12[1] = dct_const_round_shift_64bit(x12[1]); x13[0] = dct_const_round_shift_64bit(x13[0]); x13[1] = dct_const_round_shift_64bit(x13[1]); x14[0] = dct_const_round_shift_64bit(x14[0]); x14[1] = dct_const_round_shift_64bit(x14[1]); x15[0] = dct_const_round_shift_64bit(x15[0]); x15[1] = dct_const_round_shift_64bit(x15[1]); x0[0] = pack_4(x0[0], x0[1]); x1[0] = pack_4(x1[0], x1[1]); x2[0] = pack_4(x2[0], x2[1]); x3[0] = pack_4(x3[0], x3[1]); x4[0] = pack_4(x4[0], x4[1]); x5[0] = pack_4(x5[0], x5[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); x8[0] = pack_4(x8[0], x8[1]); x9[0] = pack_4(x9[0], x9[1]); x10[0] = pack_4(x10[0], x10[1]); x11[0] = pack_4(x11[0], x11[1]); x12[0] = pack_4(x12[0], x12[1]); x13[0] = pack_4(x13[0], x13[1]); x14[0] = pack_4(x14[0], x14[1]); x15[0] = pack_4(x15[0], x15[1]); // stage 2 s0[0] = x0[0]; s1[0] = x1[0]; s2[0] = x2[0]; s3[0] = x3[0]; s4[0] = x4[0]; s5[0] = x5[0]; s6[0] = x6[0]; s7[0] = x7[0]; x0[0] = _mm_add_epi32(s0[0], s4[0]); x1[0] = _mm_add_epi32(s1[0], s5[0]); x2[0] = _mm_add_epi32(s2[0], s6[0]); x3[0] = _mm_add_epi32(s3[0], s7[0]); x4[0] = _mm_sub_epi32(s0[0], s4[0]); x5[0] = _mm_sub_epi32(s1[0], s5[0]); x6[0] = _mm_sub_epi32(s2[0], s6[0]); x7[0] = _mm_sub_epi32(s3[0], s7[0]); highbd_iadst_butterfly_sse4_1(x8[0], x9[0], cospi_4_64, cospi_28_64, s8, s9); highbd_iadst_butterfly_sse4_1(x10[0], x11[0], cospi_20_64, cospi_12_64, s10, s11); highbd_iadst_butterfly_sse4_1(x13[0], x12[0], cospi_28_64, cospi_4_64, s13, s12); highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_12_64, cospi_20_64, s15, s14); x8[0] = _mm_add_epi64(s8[0], s12[0]); x8[1] = _mm_add_epi64(s8[1], s12[1]); x9[0] = _mm_add_epi64(s9[0], s13[0]); x9[1] = _mm_add_epi64(s9[1], s13[1]); x10[0] = _mm_add_epi64(s10[0], s14[0]); x10[1] = _mm_add_epi64(s10[1], s14[1]); x11[0] = _mm_add_epi64(s11[0], s15[0]); x11[1] = _mm_add_epi64(s11[1], s15[1]); x12[0] = _mm_sub_epi64(s8[0], s12[0]); x12[1] = _mm_sub_epi64(s8[1], s12[1]); x13[0] = _mm_sub_epi64(s9[0], s13[0]); x13[1] = _mm_sub_epi64(s9[1], s13[1]); x14[0] = _mm_sub_epi64(s10[0], s14[0]); x14[1] = _mm_sub_epi64(s10[1], s14[1]); x15[0] = _mm_sub_epi64(s11[0], s15[0]); x15[1] = _mm_sub_epi64(s11[1], s15[1]); x8[0] = dct_const_round_shift_64bit(x8[0]); x8[1] = dct_const_round_shift_64bit(x8[1]); x9[0] = dct_const_round_shift_64bit(x9[0]); x9[1] = dct_const_round_shift_64bit(x9[1]); x10[0] = dct_const_round_shift_64bit(x10[0]); x10[1] = dct_const_round_shift_64bit(x10[1]); x11[0] = dct_const_round_shift_64bit(x11[0]); x11[1] = dct_const_round_shift_64bit(x11[1]); x12[0] = dct_const_round_shift_64bit(x12[0]); x12[1] = dct_const_round_shift_64bit(x12[1]); x13[0] = dct_const_round_shift_64bit(x13[0]); x13[1] = dct_const_round_shift_64bit(x13[1]); x14[0] = dct_const_round_shift_64bit(x14[0]); x14[1] = dct_const_round_shift_64bit(x14[1]); x15[0] = dct_const_round_shift_64bit(x15[0]); x15[1] = dct_const_round_shift_64bit(x15[1]); x8[0] = pack_4(x8[0], x8[1]); x9[0] = pack_4(x9[0], x9[1]); x10[0] = pack_4(x10[0], x10[1]); x11[0] = pack_4(x11[0], x11[1]); x12[0] = pack_4(x12[0], x12[1]); x13[0] = pack_4(x13[0], x13[1]); x14[0] = pack_4(x14[0], x14[1]); x15[0] = pack_4(x15[0], x15[1]); // stage 3 s0[0] = x0[0]; s1[0] = x1[0]; s2[0] = x2[0]; s3[0] = x3[0]; highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5); highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6); s8[0] = x8[0]; s9[0] = x9[0]; s10[0] = x10[0]; s11[0] = x11[0]; highbd_iadst_butterfly_sse4_1(x12[0], x13[0], cospi_8_64, cospi_24_64, s12, s13); highbd_iadst_butterfly_sse4_1(x15[0], x14[0], cospi_24_64, cospi_8_64, s15, s14); x0[0] = _mm_add_epi32(s0[0], s2[0]); x1[0] = _mm_add_epi32(s1[0], s3[0]); x2[0] = _mm_sub_epi32(s0[0], s2[0]); x3[0] = _mm_sub_epi32(s1[0], s3[0]); x4[0] = _mm_add_epi64(s4[0], s6[0]); x4[1] = _mm_add_epi64(s4[1], s6[1]); x5[0] = _mm_add_epi64(s5[0], s7[0]); x5[1] = _mm_add_epi64(s5[1], s7[1]); x6[0] = _mm_sub_epi64(s4[0], s6[0]); x6[1] = _mm_sub_epi64(s4[1], s6[1]); x7[0] = _mm_sub_epi64(s5[0], s7[0]); x7[1] = _mm_sub_epi64(s5[1], s7[1]); x4[0] = dct_const_round_shift_64bit(x4[0]); x4[1] = dct_const_round_shift_64bit(x4[1]); x5[0] = dct_const_round_shift_64bit(x5[0]); x5[1] = dct_const_round_shift_64bit(x5[1]); x6[0] = dct_const_round_shift_64bit(x6[0]); x6[1] = dct_const_round_shift_64bit(x6[1]); x7[0] = dct_const_round_shift_64bit(x7[0]); x7[1] = dct_const_round_shift_64bit(x7[1]); x4[0] = pack_4(x4[0], x4[1]); x5[0] = pack_4(x5[0], x5[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); x8[0] = _mm_add_epi32(s8[0], s10[0]); x9[0] = _mm_add_epi32(s9[0], s11[0]); x10[0] = _mm_sub_epi32(s8[0], s10[0]); x11[0] = _mm_sub_epi32(s9[0], s11[0]); x12[0] = _mm_add_epi64(s12[0], s14[0]); x12[1] = _mm_add_epi64(s12[1], s14[1]); x13[0] = _mm_add_epi64(s13[0], s15[0]); x13[1] = _mm_add_epi64(s13[1], s15[1]); x14[0] = _mm_sub_epi64(s12[0], s14[0]); x14[1] = _mm_sub_epi64(s12[1], s14[1]); x15[0] = _mm_sub_epi64(s13[0], s15[0]); x15[1] = _mm_sub_epi64(s13[1], s15[1]); x12[0] = dct_const_round_shift_64bit(x12[0]); x12[1] = dct_const_round_shift_64bit(x12[1]); x13[0] = dct_const_round_shift_64bit(x13[0]); x13[1] = dct_const_round_shift_64bit(x13[1]); x14[0] = dct_const_round_shift_64bit(x14[0]); x14[1] = dct_const_round_shift_64bit(x14[1]); x15[0] = dct_const_round_shift_64bit(x15[0]); x15[1] = dct_const_round_shift_64bit(x15[1]); x12[0] = pack_4(x12[0], x12[1]); x13[0] = pack_4(x13[0], x13[1]); x14[0] = pack_4(x14[0], x14[1]); x15[0] = pack_4(x15[0], x15[1]); // stage 4 s2[0] = _mm_add_epi32(x2[0], x3[0]); s3[0] = _mm_sub_epi32(x2[0], x3[0]); s6[0] = _mm_add_epi32(x7[0], x6[0]); s7[0] = _mm_sub_epi32(x7[0], x6[0]); s10[0] = _mm_add_epi32(x11[0], x10[0]); s11[0] = _mm_sub_epi32(x11[0], x10[0]); s14[0] = _mm_add_epi32(x14[0], x15[0]); s15[0] = _mm_sub_epi32(x14[0], x15[0]); highbd_iadst_half_butterfly_sse4_1(s2[0], -cospi_16_64, s2); highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3); highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6); highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7); highbd_iadst_half_butterfly_sse4_1(s10[0], cospi_16_64, s10); highbd_iadst_half_butterfly_sse4_1(s11[0], cospi_16_64, s11); highbd_iadst_half_butterfly_sse4_1(s14[0], -cospi_16_64, s14); highbd_iadst_half_butterfly_sse4_1(s15[0], cospi_16_64, s15); x2[0] = dct_const_round_shift_64bit(s2[0]); x2[1] = dct_const_round_shift_64bit(s2[1]); x3[0] = dct_const_round_shift_64bit(s3[0]); x3[1] = dct_const_round_shift_64bit(s3[1]); x6[0] = dct_const_round_shift_64bit(s6[0]); x6[1] = dct_const_round_shift_64bit(s6[1]); x7[0] = dct_const_round_shift_64bit(s7[0]); x7[1] = dct_const_round_shift_64bit(s7[1]); x10[0] = dct_const_round_shift_64bit(s10[0]); x10[1] = dct_const_round_shift_64bit(s10[1]); x11[0] = dct_const_round_shift_64bit(s11[0]); x11[1] = dct_const_round_shift_64bit(s11[1]); x14[0] = dct_const_round_shift_64bit(s14[0]); x14[1] = dct_const_round_shift_64bit(s14[1]); x15[0] = dct_const_round_shift_64bit(s15[0]); x15[1] = dct_const_round_shift_64bit(s15[1]); x2[0] = pack_4(x2[0], x2[1]); x3[0] = pack_4(x3[0], x3[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); x10[0] = pack_4(x10[0], x10[1]); x11[0] = pack_4(x11[0], x11[1]); x14[0] = pack_4(x14[0], x14[1]); x15[0] = pack_4(x15[0], x15[1]); io[0] = x0[0]; io[1] = _mm_sub_epi32(_mm_setzero_si128(), x8[0]); io[2] = x12[0]; io[3] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]); io[4] = x6[0]; io[5] = x14[0]; io[6] = x10[0]; io[7] = x2[0]; io[8] = x3[0]; io[9] = x11[0]; io[10] = x15[0]; io[11] = x7[0]; io[12] = x5[0]; io[13] = _mm_sub_epi32(_mm_setzero_si128(), x13[0]); io[14] = x9[0]; io[15] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]); } void vp9_highbd_iht16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { int i; __m128i out[16], *in; if (bd == 8) { __m128i l[16], r[16]; in = l; for (i = 0; i < 2; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]); if (tx_type == DCT_DCT || tx_type == ADST_DCT) { idct16_8col(in, in); } else { vpx_iadst16_8col_sse2(in); } in = r; input += 128; } for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(l + i, out); transpose_16bit_8x8(r + i, out + 8); if (tx_type == DCT_DCT || tx_type == DCT_ADST) { idct16_8col(out, out); } else { vpx_iadst16_8col_sse2(out); } for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[4][16]; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]); if (tx_type == DCT_DCT || tx_type == ADST_DCT) { vpx_highbd_idct16_4col_sse4_1(in); } else { highbd_iadst16_4col_sse4_1(in); } input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); if (tx_type == DCT_DCT || tx_type == DCT_ADST) { vpx_highbd_idct16_4col_sse4_1(out); } else { highbd_iadst16_4col_sse4_1(out); } for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } libvpx-1.8.2/vp9/common/x86/vp9_highbd_iht4x4_add_sse4.c000066400000000000000000000107641357355204000226360ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_iadst4_sse4_1(__m128i *const io) { const __m128i pair_c1 = pair_set_epi32(4 * sinpi_1_9, 0); const __m128i pair_c2 = pair_set_epi32(4 * sinpi_2_9, 0); const __m128i pair_c3 = pair_set_epi32(4 * sinpi_3_9, 0); const __m128i pair_c4 = pair_set_epi32(4 * sinpi_4_9, 0); __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], t0[2], t1[2], t2[2]; __m128i temp[2]; transpose_32bit_4x4(io, io); extend_64bit(io[0], temp); s0[0] = _mm_mul_epi32(pair_c1, temp[0]); s0[1] = _mm_mul_epi32(pair_c1, temp[1]); s1[0] = _mm_mul_epi32(pair_c2, temp[0]); s1[1] = _mm_mul_epi32(pair_c2, temp[1]); extend_64bit(io[1], temp); s2[0] = _mm_mul_epi32(pair_c3, temp[0]); s2[1] = _mm_mul_epi32(pair_c3, temp[1]); extend_64bit(io[2], temp); s3[0] = _mm_mul_epi32(pair_c4, temp[0]); s3[1] = _mm_mul_epi32(pair_c4, temp[1]); s4[0] = _mm_mul_epi32(pair_c1, temp[0]); s4[1] = _mm_mul_epi32(pair_c1, temp[1]); extend_64bit(io[3], temp); s5[0] = _mm_mul_epi32(pair_c2, temp[0]); s5[1] = _mm_mul_epi32(pair_c2, temp[1]); s6[0] = _mm_mul_epi32(pair_c4, temp[0]); s6[1] = _mm_mul_epi32(pair_c4, temp[1]); t0[0] = _mm_add_epi64(s0[0], s3[0]); t0[1] = _mm_add_epi64(s0[1], s3[1]); t0[0] = _mm_add_epi64(t0[0], s5[0]); t0[1] = _mm_add_epi64(t0[1], s5[1]); t1[0] = _mm_sub_epi64(s1[0], s4[0]); t1[1] = _mm_sub_epi64(s1[1], s4[1]); t1[0] = _mm_sub_epi64(t1[0], s6[0]); t1[1] = _mm_sub_epi64(t1[1], s6[1]); temp[0] = _mm_sub_epi32(io[0], io[2]); temp[0] = _mm_add_epi32(temp[0], io[3]); extend_64bit(temp[0], temp); t2[0] = _mm_mul_epi32(pair_c3, temp[0]); t2[1] = _mm_mul_epi32(pair_c3, temp[1]); s0[0] = _mm_add_epi64(t0[0], s2[0]); s0[1] = _mm_add_epi64(t0[1], s2[1]); s1[0] = _mm_add_epi64(t1[0], s2[0]); s1[1] = _mm_add_epi64(t1[1], s2[1]); s3[0] = _mm_add_epi64(t0[0], t1[0]); s3[1] = _mm_add_epi64(t0[1], t1[1]); s3[0] = _mm_sub_epi64(s3[0], s2[0]); s3[1] = _mm_sub_epi64(s3[1], s2[1]); s0[0] = dct_const_round_shift_64bit(s0[0]); s0[1] = dct_const_round_shift_64bit(s0[1]); s1[0] = dct_const_round_shift_64bit(s1[0]); s1[1] = dct_const_round_shift_64bit(s1[1]); s2[0] = dct_const_round_shift_64bit(t2[0]); s2[1] = dct_const_round_shift_64bit(t2[1]); s3[0] = dct_const_round_shift_64bit(s3[0]); s3[1] = dct_const_round_shift_64bit(s3[1]); io[0] = pack_4(s0[0], s0[1]); io[1] = pack_4(s1[0], s1[1]); io[2] = pack_4(s2[0], s2[1]); io[3] = pack_4(s3[0], s3[1]); } void vp9_highbd_iht4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { __m128i io[4]; io[0] = _mm_load_si128((const __m128i *)(input + 0)); io[1] = _mm_load_si128((const __m128i *)(input + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 8)); io[3] = _mm_load_si128((const __m128i *)(input + 12)); if (bd == 8) { __m128i io_short[2]; io_short[0] = _mm_packs_epi32(io[0], io[1]); io_short[1] = _mm_packs_epi32(io[2], io[3]); if (tx_type == DCT_DCT || tx_type == ADST_DCT) { idct4_sse2(io_short); } else { iadst4_sse2(io_short); } if (tx_type == DCT_DCT || tx_type == DCT_ADST) { idct4_sse2(io_short); } else { iadst4_sse2(io_short); } io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8)); io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8)); io[0] = _mm_srai_epi16(io_short[0], 4); io[1] = _mm_srai_epi16(io_short[1], 4); } else { if (tx_type == DCT_DCT || tx_type == ADST_DCT) { highbd_idct4_sse4_1(io); } else { highbd_iadst4_sse4_1(io); } if (tx_type == DCT_DCT || tx_type == DCT_ADST) { highbd_idct4_sse4_1(io); } else { highbd_iadst4_sse4_1(io); } io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8)); io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8)); } recon_and_store_4x4(io, dest, stride, bd); } libvpx-1.8.2/vp9/common/x86/vp9_highbd_iht8x8_add_sse4.c000066400000000000000000000225061357355204000226430ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_iadst_half_butterfly_sse4_1(const __m128i in, const int c, __m128i *const s) { const __m128i pair_c = pair_set_epi32(4 * c, 0); __m128i x[2]; extend_64bit(in, x); s[0] = _mm_mul_epi32(pair_c, x[0]); s[1] = _mm_mul_epi32(pair_c, x[1]); } static INLINE void highbd_iadst_butterfly_sse4_1(const __m128i in0, const __m128i in1, const int c0, const int c1, __m128i *const s0, __m128i *const s1) { const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); __m128i t00[2], t01[2], t10[2], t11[2]; __m128i x0[2], x1[2]; extend_64bit(in0, x0); extend_64bit(in1, x1); t00[0] = _mm_mul_epi32(pair_c0, x0[0]); t00[1] = _mm_mul_epi32(pair_c0, x0[1]); t01[0] = _mm_mul_epi32(pair_c0, x1[0]); t01[1] = _mm_mul_epi32(pair_c0, x1[1]); t10[0] = _mm_mul_epi32(pair_c1, x0[0]); t10[1] = _mm_mul_epi32(pair_c1, x0[1]); t11[0] = _mm_mul_epi32(pair_c1, x1[0]); t11[1] = _mm_mul_epi32(pair_c1, x1[1]); s0[0] = _mm_add_epi64(t00[0], t11[0]); s0[1] = _mm_add_epi64(t00[1], t11[1]); s1[0] = _mm_sub_epi64(t10[0], t01[0]); s1[1] = _mm_sub_epi64(t10[1], t01[1]); } static void highbd_iadst8_sse4_1(__m128i *const io) { __m128i s0[2], s1[2], s2[2], s3[2], s4[2], s5[2], s6[2], s7[2]; __m128i x0[2], x1[2], x2[2], x3[2], x4[2], x5[2], x6[2], x7[2]; transpose_32bit_4x4x2(io, io); // stage 1 highbd_iadst_butterfly_sse4_1(io[7], io[0], cospi_2_64, cospi_30_64, s0, s1); highbd_iadst_butterfly_sse4_1(io[3], io[4], cospi_18_64, cospi_14_64, s4, s5); x0[0] = _mm_add_epi64(s0[0], s4[0]); x0[1] = _mm_add_epi64(s0[1], s4[1]); x1[0] = _mm_add_epi64(s1[0], s5[0]); x1[1] = _mm_add_epi64(s1[1], s5[1]); x4[0] = _mm_sub_epi64(s0[0], s4[0]); x4[1] = _mm_sub_epi64(s0[1], s4[1]); x5[0] = _mm_sub_epi64(s1[0], s5[0]); x5[1] = _mm_sub_epi64(s1[1], s5[1]); highbd_iadst_butterfly_sse4_1(io[5], io[2], cospi_10_64, cospi_22_64, s2, s3); highbd_iadst_butterfly_sse4_1(io[1], io[6], cospi_26_64, cospi_6_64, s6, s7); x2[0] = _mm_add_epi64(s2[0], s6[0]); x2[1] = _mm_add_epi64(s2[1], s6[1]); x3[0] = _mm_add_epi64(s3[0], s7[0]); x3[1] = _mm_add_epi64(s3[1], s7[1]); x6[0] = _mm_sub_epi64(s2[0], s6[0]); x6[1] = _mm_sub_epi64(s2[1], s6[1]); x7[0] = _mm_sub_epi64(s3[0], s7[0]); x7[1] = _mm_sub_epi64(s3[1], s7[1]); x0[0] = dct_const_round_shift_64bit(x0[0]); x0[1] = dct_const_round_shift_64bit(x0[1]); x1[0] = dct_const_round_shift_64bit(x1[0]); x1[1] = dct_const_round_shift_64bit(x1[1]); x2[0] = dct_const_round_shift_64bit(x2[0]); x2[1] = dct_const_round_shift_64bit(x2[1]); x3[0] = dct_const_round_shift_64bit(x3[0]); x3[1] = dct_const_round_shift_64bit(x3[1]); x4[0] = dct_const_round_shift_64bit(x4[0]); x4[1] = dct_const_round_shift_64bit(x4[1]); x5[0] = dct_const_round_shift_64bit(x5[0]); x5[1] = dct_const_round_shift_64bit(x5[1]); x6[0] = dct_const_round_shift_64bit(x6[0]); x6[1] = dct_const_round_shift_64bit(x6[1]); x7[0] = dct_const_round_shift_64bit(x7[0]); x7[1] = dct_const_round_shift_64bit(x7[1]); s0[0] = pack_4(x0[0], x0[1]); // s0 = x0; s1[0] = pack_4(x1[0], x1[1]); // s1 = x1; s2[0] = pack_4(x2[0], x2[1]); // s2 = x2; s3[0] = pack_4(x3[0], x3[1]); // s3 = x3; x4[0] = pack_4(x4[0], x4[1]); x5[0] = pack_4(x5[0], x5[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); // stage 2 x0[0] = _mm_add_epi32(s0[0], s2[0]); x1[0] = _mm_add_epi32(s1[0], s3[0]); x2[0] = _mm_sub_epi32(s0[0], s2[0]); x3[0] = _mm_sub_epi32(s1[0], s3[0]); highbd_iadst_butterfly_sse4_1(x4[0], x5[0], cospi_8_64, cospi_24_64, s4, s5); highbd_iadst_butterfly_sse4_1(x7[0], x6[0], cospi_24_64, cospi_8_64, s7, s6); x4[0] = _mm_add_epi64(s4[0], s6[0]); x4[1] = _mm_add_epi64(s4[1], s6[1]); x5[0] = _mm_add_epi64(s5[0], s7[0]); x5[1] = _mm_add_epi64(s5[1], s7[1]); x6[0] = _mm_sub_epi64(s4[0], s6[0]); x6[1] = _mm_sub_epi64(s4[1], s6[1]); x7[0] = _mm_sub_epi64(s5[0], s7[0]); x7[1] = _mm_sub_epi64(s5[1], s7[1]); x4[0] = dct_const_round_shift_64bit(x4[0]); x4[1] = dct_const_round_shift_64bit(x4[1]); x5[0] = dct_const_round_shift_64bit(x5[0]); x5[1] = dct_const_round_shift_64bit(x5[1]); x6[0] = dct_const_round_shift_64bit(x6[0]); x6[1] = dct_const_round_shift_64bit(x6[1]); x7[0] = dct_const_round_shift_64bit(x7[0]); x7[1] = dct_const_round_shift_64bit(x7[1]); x4[0] = pack_4(x4[0], x4[1]); x5[0] = pack_4(x5[0], x5[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); // stage 3 s2[0] = _mm_add_epi32(x2[0], x3[0]); s3[0] = _mm_sub_epi32(x2[0], x3[0]); s6[0] = _mm_add_epi32(x6[0], x7[0]); s7[0] = _mm_sub_epi32(x6[0], x7[0]); highbd_iadst_half_butterfly_sse4_1(s2[0], cospi_16_64, s2); highbd_iadst_half_butterfly_sse4_1(s3[0], cospi_16_64, s3); highbd_iadst_half_butterfly_sse4_1(s6[0], cospi_16_64, s6); highbd_iadst_half_butterfly_sse4_1(s7[0], cospi_16_64, s7); x2[0] = dct_const_round_shift_64bit(s2[0]); x2[1] = dct_const_round_shift_64bit(s2[1]); x3[0] = dct_const_round_shift_64bit(s3[0]); x3[1] = dct_const_round_shift_64bit(s3[1]); x6[0] = dct_const_round_shift_64bit(s6[0]); x6[1] = dct_const_round_shift_64bit(s6[1]); x7[0] = dct_const_round_shift_64bit(s7[0]); x7[1] = dct_const_round_shift_64bit(s7[1]); x2[0] = pack_4(x2[0], x2[1]); x3[0] = pack_4(x3[0], x3[1]); x6[0] = pack_4(x6[0], x6[1]); x7[0] = pack_4(x7[0], x7[1]); io[0] = x0[0]; io[1] = _mm_sub_epi32(_mm_setzero_si128(), x4[0]); io[2] = x6[0]; io[3] = _mm_sub_epi32(_mm_setzero_si128(), x2[0]); io[4] = x3[0]; io[5] = _mm_sub_epi32(_mm_setzero_si128(), x7[0]); io[6] = x5[0]; io[7] = _mm_sub_epi32(_mm_setzero_si128(), x1[0]); } void vp9_highbd_iht8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int tx_type, int bd) { __m128i io[16]; io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); if (bd == 8) { __m128i io_short[8]; io_short[0] = _mm_packs_epi32(io[0], io[4]); io_short[1] = _mm_packs_epi32(io[1], io[5]); io_short[2] = _mm_packs_epi32(io[2], io[6]); io_short[3] = _mm_packs_epi32(io[3], io[7]); io_short[4] = _mm_packs_epi32(io[8], io[12]); io_short[5] = _mm_packs_epi32(io[9], io[13]); io_short[6] = _mm_packs_epi32(io[10], io[14]); io_short[7] = _mm_packs_epi32(io[11], io[15]); if (tx_type == DCT_DCT || tx_type == ADST_DCT) { vpx_idct8_sse2(io_short); } else { iadst8_sse2(io_short); } if (tx_type == DCT_DCT || tx_type == DCT_ADST) { vpx_idct8_sse2(io_short); } else { iadst8_sse2(io_short); } round_shift_8x8(io_short, io); } else { __m128i temp[4]; if (tx_type == DCT_DCT || tx_type == ADST_DCT) { vpx_highbd_idct8x8_half1d_sse4_1(io); vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); } else { highbd_iadst8_sse4_1(io); highbd_iadst8_sse4_1(&io[8]); } temp[0] = io[4]; temp[1] = io[5]; temp[2] = io[6]; temp[3] = io[7]; io[4] = io[8]; io[5] = io[9]; io[6] = io[10]; io[7] = io[11]; if (tx_type == DCT_DCT || tx_type == DCT_ADST) { vpx_highbd_idct8x8_half1d_sse4_1(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); } else { highbd_iadst8_sse4_1(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; highbd_iadst8_sse4_1(&io[8]); } highbd_idct8x8_final_round(io); } recon_and_store_8x8(io, dest, stride, bd); } libvpx-1.8.2/vp9/common/x86/vp9_idct_intrin_sse2.c000066400000000000000000000161501357355204000216740ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" void vp9_iht4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in[2]; const __m128i eight = _mm_set1_epi16(8); in[0] = load_input_data8(input); in[1] = load_input_data8(input + 8); switch (tx_type) { case DCT_DCT: idct4_sse2(in); idct4_sse2(in); break; case ADST_DCT: idct4_sse2(in); iadst4_sse2(in); break; case DCT_ADST: iadst4_sse2(in); idct4_sse2(in); break; default: assert(tx_type == ADST_ADST); iadst4_sse2(in); iadst4_sse2(in); break; } // Final round and shift in[0] = _mm_add_epi16(in[0], eight); in[1] = _mm_add_epi16(in[1], eight); in[0] = _mm_srai_epi16(in[0], 4); in[1] = _mm_srai_epi16(in[1], 4); recon_and_store4x4_sse2(in, dest, stride); } void vp9_iht8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in[8]; const __m128i final_rounding = _mm_set1_epi16(1 << 4); // load input data in[0] = load_input_data8(input); in[1] = load_input_data8(input + 8 * 1); in[2] = load_input_data8(input + 8 * 2); in[3] = load_input_data8(input + 8 * 3); in[4] = load_input_data8(input + 8 * 4); in[5] = load_input_data8(input + 8 * 5); in[6] = load_input_data8(input + 8 * 6); in[7] = load_input_data8(input + 8 * 7); switch (tx_type) { case DCT_DCT: vpx_idct8_sse2(in); vpx_idct8_sse2(in); break; case ADST_DCT: vpx_idct8_sse2(in); iadst8_sse2(in); break; case DCT_ADST: iadst8_sse2(in); vpx_idct8_sse2(in); break; default: assert(tx_type == ADST_ADST); iadst8_sse2(in); iadst8_sse2(in); break; } // Final rounding and shift in[0] = _mm_adds_epi16(in[0], final_rounding); in[1] = _mm_adds_epi16(in[1], final_rounding); in[2] = _mm_adds_epi16(in[2], final_rounding); in[3] = _mm_adds_epi16(in[3], final_rounding); in[4] = _mm_adds_epi16(in[4], final_rounding); in[5] = _mm_adds_epi16(in[5], final_rounding); in[6] = _mm_adds_epi16(in[6], final_rounding); in[7] = _mm_adds_epi16(in[7], final_rounding); in[0] = _mm_srai_epi16(in[0], 5); in[1] = _mm_srai_epi16(in[1], 5); in[2] = _mm_srai_epi16(in[2], 5); in[3] = _mm_srai_epi16(in[3], 5); in[4] = _mm_srai_epi16(in[4], 5); in[5] = _mm_srai_epi16(in[5], 5); in[6] = _mm_srai_epi16(in[6], 5); in[7] = _mm_srai_epi16(in[7], 5); recon_and_store(dest + 0 * stride, in[0]); recon_and_store(dest + 1 * stride, in[1]); recon_and_store(dest + 2 * stride, in[2]); recon_and_store(dest + 3 * stride, in[3]); recon_and_store(dest + 4 * stride, in[4]); recon_and_store(dest + 5 * stride, in[5]); recon_and_store(dest + 6 * stride, in[6]); recon_and_store(dest + 7 * stride, in[7]); } static INLINE void load_buffer_8x16(const tran_low_t *const input, __m128i *const in) { in[0] = load_input_data8(input + 0 * 16); in[1] = load_input_data8(input + 1 * 16); in[2] = load_input_data8(input + 2 * 16); in[3] = load_input_data8(input + 3 * 16); in[4] = load_input_data8(input + 4 * 16); in[5] = load_input_data8(input + 5 * 16); in[6] = load_input_data8(input + 6 * 16); in[7] = load_input_data8(input + 7 * 16); in[8] = load_input_data8(input + 8 * 16); in[9] = load_input_data8(input + 9 * 16); in[10] = load_input_data8(input + 10 * 16); in[11] = load_input_data8(input + 11 * 16); in[12] = load_input_data8(input + 12 * 16); in[13] = load_input_data8(input + 13 * 16); in[14] = load_input_data8(input + 14 * 16); in[15] = load_input_data8(input + 15 * 16); } static INLINE void write_buffer_8x16(uint8_t *const dest, __m128i *const in, const int stride) { const __m128i final_rounding = _mm_set1_epi16(1 << 5); // Final rounding and shift in[0] = _mm_adds_epi16(in[0], final_rounding); in[1] = _mm_adds_epi16(in[1], final_rounding); in[2] = _mm_adds_epi16(in[2], final_rounding); in[3] = _mm_adds_epi16(in[3], final_rounding); in[4] = _mm_adds_epi16(in[4], final_rounding); in[5] = _mm_adds_epi16(in[5], final_rounding); in[6] = _mm_adds_epi16(in[6], final_rounding); in[7] = _mm_adds_epi16(in[7], final_rounding); in[8] = _mm_adds_epi16(in[8], final_rounding); in[9] = _mm_adds_epi16(in[9], final_rounding); in[10] = _mm_adds_epi16(in[10], final_rounding); in[11] = _mm_adds_epi16(in[11], final_rounding); in[12] = _mm_adds_epi16(in[12], final_rounding); in[13] = _mm_adds_epi16(in[13], final_rounding); in[14] = _mm_adds_epi16(in[14], final_rounding); in[15] = _mm_adds_epi16(in[15], final_rounding); in[0] = _mm_srai_epi16(in[0], 6); in[1] = _mm_srai_epi16(in[1], 6); in[2] = _mm_srai_epi16(in[2], 6); in[3] = _mm_srai_epi16(in[3], 6); in[4] = _mm_srai_epi16(in[4], 6); in[5] = _mm_srai_epi16(in[5], 6); in[6] = _mm_srai_epi16(in[6], 6); in[7] = _mm_srai_epi16(in[7], 6); in[8] = _mm_srai_epi16(in[8], 6); in[9] = _mm_srai_epi16(in[9], 6); in[10] = _mm_srai_epi16(in[10], 6); in[11] = _mm_srai_epi16(in[11], 6); in[12] = _mm_srai_epi16(in[12], 6); in[13] = _mm_srai_epi16(in[13], 6); in[14] = _mm_srai_epi16(in[14], 6); in[15] = _mm_srai_epi16(in[15], 6); recon_and_store(dest + 0 * stride, in[0]); recon_and_store(dest + 1 * stride, in[1]); recon_and_store(dest + 2 * stride, in[2]); recon_and_store(dest + 3 * stride, in[3]); recon_and_store(dest + 4 * stride, in[4]); recon_and_store(dest + 5 * stride, in[5]); recon_and_store(dest + 6 * stride, in[6]); recon_and_store(dest + 7 * stride, in[7]); recon_and_store(dest + 8 * stride, in[8]); recon_and_store(dest + 9 * stride, in[9]); recon_and_store(dest + 10 * stride, in[10]); recon_and_store(dest + 11 * stride, in[11]); recon_and_store(dest + 12 * stride, in[12]); recon_and_store(dest + 13 * stride, in[13]); recon_and_store(dest + 14 * stride, in[14]); recon_and_store(dest + 15 * stride, in[15]); } void vp9_iht16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride, int tx_type) { __m128i in0[16], in1[16]; load_buffer_8x16(input, in0); input += 8; load_buffer_8x16(input, in1); switch (tx_type) { case DCT_DCT: idct16_sse2(in0, in1); idct16_sse2(in0, in1); break; case ADST_DCT: idct16_sse2(in0, in1); iadst16_sse2(in0, in1); break; case DCT_ADST: iadst16_sse2(in0, in1); idct16_sse2(in0, in1); break; default: assert(tx_type == ADST_ADST); iadst16_sse2(in0, in1); iadst16_sse2(in0, in1); break; } write_buffer_8x16(dest, in0, stride); dest += 8; write_buffer_8x16(dest, in1, stride); } libvpx-1.8.2/vp9/common/x86/vp9_mfqe_sse2.asm000066400000000000000000000165671357355204000206700ustar00rootroot00000000000000; ; Copyright (c) 2015 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ; This file is a duplicate of mfqe_sse2.asm in VP8. ; TODO(jackychen): Find a way to fix the duplicate. %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vp9_filter_by_weight16x16_sse2 ;( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride, ; int src_weight ;) global sym(vp9_filter_by_weight16x16_sse2) PRIVATE sym(vp9_filter_by_weight16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 6 GET_GOT rbx push rsi push rdi ; end prolog movd xmm0, arg(4) ; src_weight pshuflw xmm0, xmm0, 0x0 ; replicate to all low words punpcklqdq xmm0, xmm0 ; replicate to all hi words movdqa xmm1, [GLOBAL(tMFQE)] psubw xmm1, xmm0 ; dst_weight mov rax, arg(0) ; src mov rsi, arg(1) ; src_stride mov rdx, arg(2) ; dst mov rdi, arg(3) ; dst_stride mov rcx, 16 ; loop count pxor xmm6, xmm6 .combine: movdqa xmm2, [rax] movdqa xmm4, [rdx] add rax, rsi ; src * src_weight movdqa xmm3, xmm2 punpcklbw xmm2, xmm6 punpckhbw xmm3, xmm6 pmullw xmm2, xmm0 pmullw xmm3, xmm0 ; dst * dst_weight movdqa xmm5, xmm4 punpcklbw xmm4, xmm6 punpckhbw xmm5, xmm6 pmullw xmm4, xmm1 pmullw xmm5, xmm1 ; sum, round and shift paddw xmm2, xmm4 paddw xmm3, xmm5 paddw xmm2, [GLOBAL(tMFQE_round)] paddw xmm3, [GLOBAL(tMFQE_round)] psrlw xmm2, 4 psrlw xmm3, 4 packuswb xmm2, xmm3 movdqa [rdx], xmm2 add rdx, rdi dec rcx jnz .combine ; begin epilog pop rdi pop rsi RESTORE_GOT RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vp9_filter_by_weight8x8_sse2 ;( ; unsigned char *src, ; int src_stride, ; unsigned char *dst, ; int dst_stride, ; int src_weight ;) global sym(vp9_filter_by_weight8x8_sse2) PRIVATE sym(vp9_filter_by_weight8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 GET_GOT rbx push rsi push rdi ; end prolog movd xmm0, arg(4) ; src_weight pshuflw xmm0, xmm0, 0x0 ; replicate to all low words punpcklqdq xmm0, xmm0 ; replicate to all hi words movdqa xmm1, [GLOBAL(tMFQE)] psubw xmm1, xmm0 ; dst_weight mov rax, arg(0) ; src mov rsi, arg(1) ; src_stride mov rdx, arg(2) ; dst mov rdi, arg(3) ; dst_stride mov rcx, 8 ; loop count pxor xmm4, xmm4 .combine: movq xmm2, [rax] movq xmm3, [rdx] add rax, rsi ; src * src_weight punpcklbw xmm2, xmm4 pmullw xmm2, xmm0 ; dst * dst_weight punpcklbw xmm3, xmm4 pmullw xmm3, xmm1 ; sum, round and shift paddw xmm2, xmm3 paddw xmm2, [GLOBAL(tMFQE_round)] psrlw xmm2, 4 packuswb xmm2, xmm4 movq [rdx], xmm2 add rdx, rdi dec rcx jnz .combine ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret ;void vp9_variance_and_sad_16x16_sse2 | arg ;( ; unsigned char *src1, 0 ; int stride1, 1 ; unsigned char *src2, 2 ; int stride2, 3 ; unsigned int *variance, 4 ; unsigned int *sad, 5 ;) global sym(vp9_variance_and_sad_16x16_sse2) PRIVATE sym(vp9_variance_and_sad_16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 GET_GOT rbx push rsi push rdi ; end prolog mov rax, arg(0) ; src1 mov rcx, arg(1) ; stride1 mov rdx, arg(2) ; src2 mov rdi, arg(3) ; stride2 mov rsi, 16 ; block height ; Prep accumulator registers pxor xmm3, xmm3 ; SAD pxor xmm4, xmm4 ; sum of src2 pxor xmm5, xmm5 ; sum of src2^2 ; Because we're working with the actual output frames ; we can't depend on any kind of data alignment. .accumulate: movdqa xmm0, [rax] ; src1 movdqa xmm1, [rdx] ; src2 add rax, rcx ; src1 + stride1 add rdx, rdi ; src2 + stride2 ; SAD(src1, src2) psadbw xmm0, xmm1 paddusw xmm3, xmm0 ; SUM(src2) pxor xmm2, xmm2 psadbw xmm2, xmm1 ; sum src2 by misusing SAD against 0 paddusw xmm4, xmm2 ; pmaddubsw would be ideal if it took two unsigned values. instead, ; it expects a signed and an unsigned value. so instead we zero extend ; and operate on words. pxor xmm2, xmm2 movdqa xmm0, xmm1 punpcklbw xmm0, xmm2 punpckhbw xmm1, xmm2 pmaddwd xmm0, xmm0 pmaddwd xmm1, xmm1 paddd xmm5, xmm0 paddd xmm5, xmm1 sub rsi, 1 jnz .accumulate ; phaddd only operates on adjacent double words. ; Finalize SAD and store movdqa xmm0, xmm3 psrldq xmm0, 8 paddusw xmm0, xmm3 paddd xmm0, [GLOBAL(t128)] psrld xmm0, 8 mov rax, arg(5) movd [rax], xmm0 ; Accumulate sum of src2 movdqa xmm0, xmm4 psrldq xmm0, 8 paddusw xmm0, xmm4 ; Square src2. Ignore high value pmuludq xmm0, xmm0 psrld xmm0, 8 ; phaddw could be used to sum adjacent values but we want ; all the values summed. promote to doubles, accumulate, ; shift and sum pxor xmm2, xmm2 movdqa xmm1, xmm5 punpckldq xmm1, xmm2 punpckhdq xmm5, xmm2 paddd xmm1, xmm5 movdqa xmm2, xmm1 psrldq xmm1, 8 paddd xmm1, xmm2 psubd xmm1, xmm0 ; (variance + 128) >> 8 paddd xmm1, [GLOBAL(t128)] psrld xmm1, 8 mov rax, arg(4) movd [rax], xmm1 ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 t128: %ifndef __NASM_VER__ ddq 128 %elif CONFIG_BIG_ENDIAN dq 0, 128 %else dq 128, 0 %endif align 16 tMFQE: ; 1 << MFQE_PRECISION times 8 dw 0x10 align 16 tMFQE_round: ; 1 << (MFQE_PRECISION - 1) times 8 dw 0x08 libvpx-1.8.2/vp9/decoder/000077500000000000000000000000001357355204000151555ustar00rootroot00000000000000libvpx-1.8.2/vp9/decoder/vp9_decodeframe.c000066400000000000000000003400471357355204000203650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include // qsort() #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_dsp/bitreader_buffer.h" #include "vpx_dsp/bitreader.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/mem_ops.h" #include "vpx_scale/vpx_scale.h" #include "vpx_util/vpx_thread.h" #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_detokenize.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_dsubexp.h" #include "vp9/decoder/vp9_job_queue.h" #define MAX_VP9_HEADER_SIZE 80 typedef int (*predict_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size); typedef void (*intra_recon_func)(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size); static int read_is_valid(const uint8_t *start, size_t len, const uint8_t *end) { return len != 0 && len <= (size_t)(end - start); } static int decode_unsigned_max(struct vpx_read_bit_buffer *rb, int max) { const int data = vpx_rb_read_literal(rb, get_unsigned_bits(max)); return data > max ? max : data; } static TX_MODE read_tx_mode(vpx_reader *r) { TX_MODE tx_mode = vpx_read_literal(r, 2); if (tx_mode == ALLOW_32X32) tx_mode += vpx_read_bit(r); return tx_mode; } static void read_tx_mode_probs(struct tx_probs *tx_probs, vpx_reader *r) { int i, j; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 3; ++j) vp9_diff_update_prob(r, &tx_probs->p8x8[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 2; ++j) vp9_diff_update_prob(r, &tx_probs->p16x16[i][j]); for (i = 0; i < TX_SIZE_CONTEXTS; ++i) for (j = 0; j < TX_SIZES - 1; ++j) vp9_diff_update_prob(r, &tx_probs->p32x32[i][j]); } static void read_switchable_interp_probs(FRAME_CONTEXT *fc, vpx_reader *r) { int i, j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) for (i = 0; i < SWITCHABLE_FILTERS - 1; ++i) vp9_diff_update_prob(r, &fc->switchable_interp_prob[j][i]); } static void read_inter_mode_probs(FRAME_CONTEXT *fc, vpx_reader *r) { int i, j; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) for (j = 0; j < INTER_MODES - 1; ++j) vp9_diff_update_prob(r, &fc->inter_mode_probs[i][j]); } static REFERENCE_MODE read_frame_reference_mode(const VP9_COMMON *cm, vpx_reader *r) { if (vp9_compound_reference_allowed(cm)) { return vpx_read_bit(r) ? (vpx_read_bit(r) ? REFERENCE_MODE_SELECT : COMPOUND_REFERENCE) : SINGLE_REFERENCE; } else { return SINGLE_REFERENCE; } } static void read_frame_reference_mode_probs(VP9_COMMON *cm, vpx_reader *r) { FRAME_CONTEXT *const fc = cm->fc; int i; if (cm->reference_mode == REFERENCE_MODE_SELECT) for (i = 0; i < COMP_INTER_CONTEXTS; ++i) vp9_diff_update_prob(r, &fc->comp_inter_prob[i]); if (cm->reference_mode != COMPOUND_REFERENCE) for (i = 0; i < REF_CONTEXTS; ++i) { vp9_diff_update_prob(r, &fc->single_ref_prob[i][0]); vp9_diff_update_prob(r, &fc->single_ref_prob[i][1]); } if (cm->reference_mode != SINGLE_REFERENCE) for (i = 0; i < REF_CONTEXTS; ++i) vp9_diff_update_prob(r, &fc->comp_ref_prob[i]); } static void update_mv_probs(vpx_prob *p, int n, vpx_reader *r) { int i; for (i = 0; i < n; ++i) if (vpx_read(r, MV_UPDATE_PROB)) p[i] = (vpx_read_literal(r, 7) << 1) | 1; } static void read_mv_probs(nmv_context *ctx, int allow_hp, vpx_reader *r) { int i, j; update_mv_probs(ctx->joints, MV_JOINTS - 1, r); for (i = 0; i < 2; ++i) { nmv_component *const comp_ctx = &ctx->comps[i]; update_mv_probs(&comp_ctx->sign, 1, r); update_mv_probs(comp_ctx->classes, MV_CLASSES - 1, r); update_mv_probs(comp_ctx->class0, CLASS0_SIZE - 1, r); update_mv_probs(comp_ctx->bits, MV_OFFSET_BITS, r); } for (i = 0; i < 2; ++i) { nmv_component *const comp_ctx = &ctx->comps[i]; for (j = 0; j < CLASS0_SIZE; ++j) update_mv_probs(comp_ctx->class0_fp[j], MV_FP_SIZE - 1, r); update_mv_probs(comp_ctx->fp, 3, r); } if (allow_hp) { for (i = 0; i < 2; ++i) { nmv_component *const comp_ctx = &ctx->comps[i]; update_mv_probs(&comp_ctx->class0_hp, 1, r); update_mv_probs(&comp_ctx->hp, 1, r); } } } static void inverse_transform_block_inter(MACROBLOCKD *xd, int plane, const TX_SIZE tx_size, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = pd->dqcoeff; assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); if (xd->lossless) { vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: vp9_highbd_idct4x4_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_8X8: vp9_highbd_idct8x8_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_16X16: vp9_highbd_idct16x16_add(dqcoeff, dst16, stride, eob, xd->bd); break; case TX_32X32: vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd); break; default: assert(0 && "Invalid transform size"); } } } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { switch (tx_size) { case TX_4X4: vp9_idct4x4_add(dqcoeff, dst, stride, eob); break; case TX_8X8: vp9_idct8x8_add(dqcoeff, dst, stride, eob); break; case TX_16X16: vp9_idct16x16_add(dqcoeff, dst, stride, eob); break; case TX_32X32: vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: assert(0 && "Invalid transform size"); return; } } } #else if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { switch (tx_size) { case TX_4X4: vp9_idct4x4_add(dqcoeff, dst, stride, eob); break; case TX_8X8: vp9_idct8x8_add(dqcoeff, dst, stride, eob); break; case TX_16X16: vp9_idct16x16_add(dqcoeff, dst, stride, eob); break; case TX_32X32: vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: assert(0 && "Invalid transform size"); return; } } #endif // CONFIG_VP9_HIGHBITDEPTH if (eob == 1) { dqcoeff[0] = 0; } else { if (tx_size <= TX_16X16 && eob <= 10) memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); else if (tx_size == TX_32X32 && eob <= 34) memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); else memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } static void inverse_transform_block_intra(MACROBLOCKD *xd, int plane, const TX_TYPE tx_type, const TX_SIZE tx_size, uint8_t *dst, int stride, int eob) { struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = pd->dqcoeff; assert(eob > 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); if (xd->lossless) { vp9_highbd_iwht4x4_add(dqcoeff, dst16, stride, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: vp9_highbd_iht4x4_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_8X8: vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_16X16: vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, stride, eob, xd->bd); break; case TX_32X32: vp9_highbd_idct32x32_add(dqcoeff, dst16, stride, eob, xd->bd); break; default: assert(0 && "Invalid transform size"); } } } else { if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { switch (tx_size) { case TX_4X4: vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_8X8: vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: assert(0 && "Invalid transform size"); return; } } } #else if (xd->lossless) { vp9_iwht4x4_add(dqcoeff, dst, stride, eob); } else { switch (tx_size) { case TX_4X4: vp9_iht4x4_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_8X8: vp9_iht8x8_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_16X16: vp9_iht16x16_add(tx_type, dqcoeff, dst, stride, eob); break; case TX_32X32: vp9_idct32x32_add(dqcoeff, dst, stride, eob); break; default: assert(0 && "Invalid transform size"); return; } } #endif // CONFIG_VP9_HIGHBITDEPTH if (eob == 1) { dqcoeff[0] = 0; } else { if (tx_type == DCT_DCT && tx_size <= TX_16X16 && eob <= 10) memset(dqcoeff, 0, 4 * (4 << tx_size) * sizeof(dqcoeff[0])); else if (tx_size == TX_32X32 && eob <= 34) memset(dqcoeff, 0, 256 * sizeof(dqcoeff[0])); else memset(dqcoeff, 0, (16 << (tx_size << 1)) * sizeof(dqcoeff[0])); } } static void predict_and_reconstruct_intra_block(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; uint8_t *dst; dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; if (mi->sb_type < BLOCK_8X8) if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, dst, pd->dst.stride, dst, pd->dst.stride, col, row, plane); if (!mi->skip) { const TX_TYPE tx_type = (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; const scan_order *sc = (plane || xd->lossless) ? &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); if (eob > 0) { inverse_transform_block_intra(xd, plane, tx_type, tx_size, dst, pd->dst.stride, eob); } } } static void parse_intra_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; if (mi->sb_type < BLOCK_8X8) if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; if (!mi->skip) { struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_TYPE tx_type = (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; const scan_order *sc = (plane || xd->lossless) ? &vp9_default_scan_orders[tx_size] : &vp9_scan_orders[tx_size][tx_type]; *pd->eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); /* Keep the alignment to 16 */ pd->dqcoeff += (16 << (tx_size << 1)); pd->eob++; } } static void predict_and_reconstruct_intra_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; PREDICTION_MODE mode = (plane == 0) ? mi->mode : mi->uv_mode; uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; if (mi->sb_type < BLOCK_8X8) if (plane == 0) mode = xd->mi[0]->bmi[(row << 1) + col].as_mode; vp9_predict_intra_block(xd, pd->n4_wl, tx_size, mode, dst, pd->dst.stride, dst, pd->dst.stride, col, row, plane); if (!mi->skip) { const TX_TYPE tx_type = (plane || xd->lossless) ? DCT_DCT : intra_mode_to_tx_type_lookup[mode]; if (*pd->eob > 0) { inverse_transform_block_intra(xd, plane, tx_type, tx_size, dst, pd->dst.stride, *pd->eob); } /* Keep the alignment to 16 */ pd->dqcoeff += (16 << (tx_size << 1)); pd->eob++; } } static int reconstruct_inter_block(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size, int mi_row, int mi_col) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *sc = &vp9_default_scan_orders[tx_size]; const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); uint8_t *dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; if (eob > 0) { inverse_transform_block_inter(xd, plane, tx_size, dst, pd->dst.stride, eob); } #if CONFIG_MISMATCH_DEBUG { int pixel_c, pixel_r; int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, pd->subsampling_x, pd->subsampling_y); mismatch_check_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w, blk_h, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); } #else (void)mi_row; (void)mi_col; #endif return eob; } static int parse_inter_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *sc = &vp9_default_scan_orders[tx_size]; const int eob = vp9_decode_block_tokens(twd, plane, sc, col, row, tx_size, mi->segment_id); *pd->eob = eob; pd->dqcoeff += (16 << (tx_size << 1)); pd->eob++; return eob; } static int reconstruct_inter_block_row_mt(TileWorkerData *twd, MODE_INFO *const mi, int plane, int row, int col, TX_SIZE tx_size) { MACROBLOCKD *const xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const int eob = *pd->eob; (void)mi; if (eob > 0) { inverse_transform_block_inter( xd, plane, tx_size, &pd->dst.buf[4 * row * pd->dst.stride + 4 * col], pd->dst.stride, eob); } pd->dqcoeff += (16 << (tx_size << 1)); pd->eob++; return eob; } static void build_mc_border(const uint8_t *src, int src_stride, uint8_t *dst, int dst_stride, int x, int y, int b_w, int b_h, int w, int h) { // Get a pointer to the start of the real data for this row. const uint8_t *ref_row = src - x - y * src_stride; if (y >= h) ref_row += (h - 1) * src_stride; else if (y > 0) ref_row += y * src_stride; do { int right = 0, copy; int left = x < 0 ? -x : 0; if (left > b_w) left = b_w; if (x + b_w > w) right = x + b_w - w; if (right > b_w) right = b_w; copy = b_w - left - right; if (left) memset(dst, ref_row[0], left); if (copy) memcpy(dst + left, ref_row + x + left, copy); if (right) memset(dst + left + copy, ref_row[w - 1], right); dst += dst_stride; ++y; if (y > 0 && y < h) ref_row += src_stride; } while (--b_h); } #if CONFIG_VP9_HIGHBITDEPTH static void high_build_mc_border(const uint8_t *src8, int src_stride, uint16_t *dst, int dst_stride, int x, int y, int b_w, int b_h, int w, int h) { // Get a pointer to the start of the real data for this row. const uint16_t *src = CONVERT_TO_SHORTPTR(src8); const uint16_t *ref_row = src - x - y * src_stride; if (y >= h) ref_row += (h - 1) * src_stride; else if (y > 0) ref_row += y * src_stride; do { int right = 0, copy; int left = x < 0 ? -x : 0; if (left > b_w) left = b_w; if (x + b_w > w) right = x + b_w - w; if (right > b_w) right = b_w; copy = b_w - left - right; if (left) vpx_memset16(dst, ref_row[0], left); if (copy) memcpy(dst + left, ref_row + x + left, copy * sizeof(uint16_t)); if (right) vpx_memset16(dst + left + copy, ref_row[w - 1], right); dst += dst_stride; ++y; if (y > 0 && y < h) ref_row += src_stride; } while (--b_h); } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, int pre_buf_stride, int x0, int y0, int b_w, int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, MACROBLOCKD *xd, int w, int h, int ref, int xs, int ys) { uint16_t *mc_buf_high = twd->extend_and_predict_buf; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { high_build_mc_border(buf_ptr1, pre_buf_stride, mc_buf_high, b_w, x0, y0, b_w, b_h, frame_width, frame_height); highbd_inter_predictor(mc_buf_high + border_offset, b_w, CONVERT_TO_SHORTPTR(dst), dst_buf_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); } else { build_mc_border(buf_ptr1, pre_buf_stride, (uint8_t *)mc_buf_high, b_w, x0, y0, b_w, b_h, frame_width, frame_height); inter_predictor(((uint8_t *)mc_buf_high) + border_offset, b_w, dst, dst_buf_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } } #else static void extend_and_predict(TileWorkerData *twd, const uint8_t *buf_ptr1, int pre_buf_stride, int x0, int y0, int b_w, int b_h, int frame_width, int frame_height, int border_offset, uint8_t *const dst, int dst_buf_stride, int subpel_x, int subpel_y, const InterpKernel *kernel, const struct scale_factors *sf, int w, int h, int ref, int xs, int ys) { uint8_t *mc_buf = (uint8_t *)twd->extend_and_predict_buf; const uint8_t *buf_ptr; build_mc_border(buf_ptr1, pre_buf_stride, mc_buf, b_w, x0, y0, b_w, b_h, frame_width, frame_height); buf_ptr = mc_buf + border_offset; inter_predictor(buf_ptr, b_w, dst, dst_buf_stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } #endif // CONFIG_VP9_HIGHBITDEPTH static void dec_build_inter_predictors( TileWorkerData *twd, MACROBLOCKD *xd, int plane, int bw, int bh, int x, int y, int w, int h, int mi_x, int mi_y, const InterpKernel *kernel, const struct scale_factors *sf, struct buf_2d *pre_buf, struct buf_2d *dst_buf, const MV *mv, RefCntBuffer *ref_frame_buf, int is_scaled, int ref) { struct macroblockd_plane *const pd = &xd->plane[plane]; uint8_t *const dst = dst_buf->buf + dst_buf->stride * y + x; MV32 scaled_mv; int xs, ys, x0, y0, x0_16, y0_16, frame_width, frame_height, buf_stride, subpel_x, subpel_y; uint8_t *ref_frame, *buf_ptr; // Get reference frame pointer, width and height. if (plane == 0) { frame_width = ref_frame_buf->buf.y_crop_width; frame_height = ref_frame_buf->buf.y_crop_height; ref_frame = ref_frame_buf->buf.y_buffer; } else { frame_width = ref_frame_buf->buf.uv_crop_width; frame_height = ref_frame_buf->buf.uv_crop_height; ref_frame = plane == 1 ? ref_frame_buf->buf.u_buffer : ref_frame_buf->buf.v_buffer; } if (is_scaled) { const MV mv_q4 = clamp_mv_to_umv_border_sb( xd, mv, bw, bh, pd->subsampling_x, pd->subsampling_y); // Co-ordinate of containing block to pixel precision. int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); #if 0 // CONFIG_BETTER_HW_COMPATIBILITY assert(xd->mi[0]->sb_type != BLOCK_4X8 && xd->mi[0]->sb_type != BLOCK_8X4); assert(mv_q4.row == mv->row * (1 << (1 - pd->subsampling_y)) && mv_q4.col == mv->col * (1 << (1 - pd->subsampling_x))); #endif // Co-ordinate of the block to 1/16th pixel precision. x0_16 = (x_start + x) << SUBPEL_BITS; y0_16 = (y_start + y) << SUBPEL_BITS; // Co-ordinate of current block in reference frame // to 1/16th pixel precision. x0_16 = sf->scale_value_x(x0_16, sf); y0_16 = sf->scale_value_y(y0_16, sf); // Map the top left corner of the block into the reference frame. x0 = sf->scale_value_x(x_start + x, sf); y0 = sf->scale_value_y(y_start + y, sf); // Scale the MV and incorporate the sub-pixel offset of the block // in the reference frame. scaled_mv = vp9_scale_mv(&mv_q4, mi_x + x, mi_y + y, sf); xs = sf->x_step_q4; ys = sf->y_step_q4; } else { // Co-ordinate of containing block to pixel precision. x0 = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)) + x; y0 = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)) + y; // Co-ordinate of the block to 1/16th pixel precision. x0_16 = x0 << SUBPEL_BITS; y0_16 = y0 << SUBPEL_BITS; scaled_mv.row = mv->row * (1 << (1 - pd->subsampling_y)); scaled_mv.col = mv->col * (1 << (1 - pd->subsampling_x)); xs = ys = 16; } subpel_x = scaled_mv.col & SUBPEL_MASK; subpel_y = scaled_mv.row & SUBPEL_MASK; // Calculate the top left corner of the best matching block in the // reference frame. x0 += scaled_mv.col >> SUBPEL_BITS; y0 += scaled_mv.row >> SUBPEL_BITS; x0_16 += scaled_mv.col; y0_16 += scaled_mv.row; // Get reference block pointer. buf_ptr = ref_frame + y0 * pre_buf->stride + x0; buf_stride = pre_buf->stride; // Do border extension if there is motion or the // width/height is not a multiple of 8 pixels. if (is_scaled || scaled_mv.col || scaled_mv.row || (frame_width & 0x7) || (frame_height & 0x7)) { int y1 = ((y0_16 + (h - 1) * ys) >> SUBPEL_BITS) + 1; // Get reference block bottom right horizontal coordinate. int x1 = ((x0_16 + (w - 1) * xs) >> SUBPEL_BITS) + 1; int x_pad = 0, y_pad = 0; if (subpel_x || (sf->x_step_q4 != SUBPEL_SHIFTS)) { x0 -= VP9_INTERP_EXTEND - 1; x1 += VP9_INTERP_EXTEND; x_pad = 1; } if (subpel_y || (sf->y_step_q4 != SUBPEL_SHIFTS)) { y0 -= VP9_INTERP_EXTEND - 1; y1 += VP9_INTERP_EXTEND; y_pad = 1; } // Skip border extension if block is inside the frame. if (x0 < 0 || x0 > frame_width - 1 || x1 < 0 || x1 > frame_width - 1 || y0 < 0 || y0 > frame_height - 1 || y1 < 0 || y1 > frame_height - 1) { // Extend the border. const uint8_t *const buf_ptr1 = ref_frame + y0 * buf_stride + x0; const int b_w = x1 - x0 + 1; const int b_h = y1 - y0 + 1; const int border_offset = y_pad * 3 * b_w + x_pad * 3; extend_and_predict(twd, buf_ptr1, buf_stride, x0, y0, b_w, b_h, frame_width, frame_height, border_offset, dst, dst_buf->stride, subpel_x, subpel_y, kernel, sf, #if CONFIG_VP9_HIGHBITDEPTH xd, #endif w, h, ref, xs, ys); return; } } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_inter_predictor(CONVERT_TO_SHORTPTR(buf_ptr), buf_stride, CONVERT_TO_SHORTPTR(dst), dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys, xd->bd); } else { inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); } #else inter_predictor(buf_ptr, buf_stride, dst, dst_buf->stride, subpel_x, subpel_y, sf, w, h, ref, kernel, xs, ys); #endif // CONFIG_VP9_HIGHBITDEPTH } static void dec_build_inter_predictors_sb(TileWorkerData *twd, VP9Decoder *const pbi, MACROBLOCKD *xd, int mi_row, int mi_col) { int plane; const int mi_x = mi_col * MI_SIZE; const int mi_y = mi_row * MI_SIZE; const MODE_INFO *mi = xd->mi[0]; const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; const BLOCK_SIZE sb_type = mi->sb_type; const int is_compound = has_second_ref(mi); int ref; int is_scaled; for (ref = 0; ref < 1 + is_compound; ++ref) { const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; RefBuffer *ref_buf = &pbi->common.frame_refs[frame - LAST_FRAME]; const struct scale_factors *const sf = &ref_buf->sf; const int idx = ref_buf->idx; BufferPool *const pool = pbi->common.buffer_pool; RefCntBuffer *const ref_frame_buf = &pool->frame_bufs[idx]; if (!vp9_is_valid_scale(sf)) vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, "Reference frame has invalid dimensions"); is_scaled = vp9_is_scaled(sf); vp9_setup_pre_planes(xd, ref, ref_buf->buf, mi_row, mi_col, is_scaled ? sf : NULL); xd->block_refs[ref] = ref_buf; if (sb_type < BLOCK_8X8) { for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *const pd = &xd->plane[plane]; struct buf_2d *const dst_buf = &pd->dst; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int n4w_x4 = 4 * num_4x4_w; const int n4h_x4 = 4 * num_4x4_h; struct buf_2d *const pre_buf = &pd->pre[ref]; int i = 0, x, y; for (y = 0; y < num_4x4_h; ++y) { for (x = 0; x < num_4x4_w; ++x) { const MV mv = average_split_mvs(pd, mi, ref, i++); dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 4 * x, 4 * y, 4, 4, mi_x, mi_y, kernel, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } } } else { const MV mv = mi->mv[ref].as_mv; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *const pd = &xd->plane[plane]; struct buf_2d *const dst_buf = &pd->dst; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int n4w_x4 = 4 * num_4x4_w; const int n4h_x4 = 4 * num_4x4_h; struct buf_2d *const pre_buf = &pd->pre[ref]; dec_build_inter_predictors(twd, xd, plane, n4w_x4, n4h_x4, 0, 0, n4w_x4, n4h_x4, mi_x, mi_y, kernel, sf, pre_buf, dst_buf, &mv, ref_frame_buf, is_scaled, ref); } } } } static INLINE void dec_reset_skip_context(MACROBLOCKD *xd) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { struct macroblockd_plane *const pd = &xd->plane[i]; memset(pd->above_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_w); memset(pd->left_context, 0, sizeof(ENTROPY_CONTEXT) * pd->n4_h); } } static void set_plane_n4(MACROBLOCKD *const xd, int bw, int bh, int bwl, int bhl) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].n4_w = (bw << 1) >> xd->plane[i].subsampling_x; xd->plane[i].n4_h = (bh << 1) >> xd->plane[i].subsampling_y; xd->plane[i].n4_wl = bwl - xd->plane[i].subsampling_x; xd->plane[i].n4_hl = bhl - xd->plane[i].subsampling_y; } } static MODE_INFO *set_offsets_recon(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, int bw, int bh, int bwl, int bhl) { const int offset = mi_row * cm->mi_stride + mi_col; const TileInfo *const tile = &xd->tile; xd->mi = cm->mi_grid_visible + offset; set_plane_n4(xd, bw, bh, bwl, bhl); set_skip_context(xd, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); return xd->mi[0]; } static MODE_INFO *set_offsets(VP9_COMMON *const cm, MACROBLOCKD *const xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int bw, int bh, int x_mis, int y_mis, int bwl, int bhl) { const int offset = mi_row * cm->mi_stride + mi_col; int x, y; const TileInfo *const tile = &xd->tile; xd->mi = cm->mi_grid_visible + offset; xd->mi[0] = &cm->mi[offset]; // TODO(slavarnway): Generate sb_type based on bwl and bhl, instead of // passing bsize from decode_partition(). xd->mi[0]->sb_type = bsize; for (y = 0; y < y_mis; ++y) for (x = !y; x < x_mis; ++x) { xd->mi[y * cm->mi_stride + x] = xd->mi[0]; } set_plane_n4(xd, bw, bh, bwl, bhl); set_skip_context(xd, mi_row, mi_col); // Distance of Mb to the various image edges. These are specified to 8th pel // as they are always compared to values that are in 1/8th pel units set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); return xd->mi[0]; } static INLINE int predict_recon_inter(MACROBLOCKD *xd, MODE_INFO *mi, TileWorkerData *twd, predict_recon_func func) { int eobtotal = 0; int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); int row, col; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) eobtotal += func(twd, mi, plane, row, col, tx_size); } return eobtotal; } static INLINE void predict_recon_intra(MACROBLOCKD *xd, MODE_INFO *mi, TileWorkerData *twd, intra_recon_func func) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); int row, col; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) func(twd, mi, plane, row, col, tx_size); } } static void decode_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; const int less8x8 = bsize < BLOCK_8X8; const int bw = 1 << (bwl - 1); const int bh = 1 << (bhl - 1); const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); vpx_reader *r = &twd->bit_reader; MACROBLOCKD *const xd = &twd->xd; MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl, bhl); if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; if (uv_subsize == BLOCK_INVALID) vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } vp9_read_mode_info(twd, pbi, mi_row, mi_col, x_mis, y_mis); if (mi->skip) { dec_reset_skip_context(xd); } if (!is_inter_block(mi)) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); int row, col; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) predict_and_reconstruct_intra_block(twd, mi, plane, row, col, tx_size); } } else { // Prediction dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); #if CONFIG_MISMATCH_DEBUG { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; int pixel_c, pixel_r; const BLOCK_SIZE plane_bsize = get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); const int bw = get_block_width(plane_bsize); const int bh = get_block_height(plane_bsize); mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x, pd->subsampling_y); mismatch_check_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, pixel_r, bw, bh, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); } } #endif // Reconstruction if (!mi->skip) { int eobtotal = 0; int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; const int num_4x4_w = pd->n4_w; const int num_4x4_h = pd->n4_h; const int step = (1 << tx_size); int row, col; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); xd->max_blocks_wide = xd->mb_to_right_edge >= 0 ? 0 : max_blocks_wide; xd->max_blocks_high = xd->mb_to_bottom_edge >= 0 ? 0 : max_blocks_high; for (row = 0; row < max_blocks_high; row += step) for (col = 0; col < max_blocks_wide; col += step) eobtotal += reconstruct_inter_block(twd, mi, plane, row, col, tx_size, mi_row, mi_col); } if (!less8x8 && eobtotal == 0) mi->skip = 1; // skip loopfilter } } xd->corrupted |= vpx_reader_has_error(r); if (cm->lf.filter_level) { vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); } } static void recon_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; const int bw = 1 << (bwl - 1); const int bh = 1 << (bhl - 1); MACROBLOCKD *const xd = &twd->xd; MODE_INFO *mi = set_offsets_recon(cm, xd, mi_row, mi_col, bw, bh, bwl, bhl); if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; if (uv_subsize == BLOCK_INVALID) vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } if (!is_inter_block(mi)) { predict_recon_intra(xd, mi, twd, predict_and_reconstruct_intra_block_row_mt); } else { // Prediction dec_build_inter_predictors_sb(twd, pbi, xd, mi_row, mi_col); // Reconstruction if (!mi->skip) { predict_recon_inter(xd, mi, twd, reconstruct_inter_block_row_mt); } } vp9_build_mask(cm, mi, mi_row, mi_col, bw, bh); } static void parse_block(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl) { VP9_COMMON *const cm = &pbi->common; const int bw = 1 << (bwl - 1); const int bh = 1 << (bhl - 1); const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); vpx_reader *r = &twd->bit_reader; MACROBLOCKD *const xd = &twd->xd; MODE_INFO *mi = set_offsets(cm, xd, bsize, mi_row, mi_col, bw, bh, x_mis, y_mis, bwl, bhl); if (bsize >= BLOCK_8X8 && (cm->subsampling_x || cm->subsampling_y)) { const BLOCK_SIZE uv_subsize = ss_size_lookup[bsize][cm->subsampling_x][cm->subsampling_y]; if (uv_subsize == BLOCK_INVALID) vpx_internal_error(xd->error_info, VPX_CODEC_CORRUPT_FRAME, "Invalid block size."); } vp9_read_mode_info(twd, pbi, mi_row, mi_col, x_mis, y_mis); if (mi->skip) { dec_reset_skip_context(xd); } if (!is_inter_block(mi)) { predict_recon_intra(xd, mi, twd, parse_intra_block_row_mt); } else { if (!mi->skip) { tran_low_t *dqcoeff[MAX_MB_PLANE]; int *eob[MAX_MB_PLANE]; int plane; int eobtotal; // Based on eobtotal and bsize, this may be mi->skip may be set to true // In that case dqcoeff and eob need to be backed up and restored as // recon_block will not increment these pointers for skip cases for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *const pd = &xd->plane[plane]; dqcoeff[plane] = pd->dqcoeff; eob[plane] = pd->eob; } eobtotal = predict_recon_inter(xd, mi, twd, parse_inter_block_row_mt); if (bsize >= BLOCK_8X8 && eobtotal == 0) { mi->skip = 1; // skip loopfilter for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *pd = &xd->plane[plane]; pd->dqcoeff = dqcoeff[plane]; pd->eob = eob[plane]; } } } } xd->corrupted |= vpx_reader_has_error(r); } static INLINE int dec_partition_plane_context(TileWorkerData *twd, int mi_row, int mi_col, int bsl) { const PARTITION_CONTEXT *above_ctx = twd->xd.above_seg_context + mi_col; const PARTITION_CONTEXT *left_ctx = twd->xd.left_seg_context + (mi_row & MI_MASK); int above = (*above_ctx >> bsl) & 1, left = (*left_ctx >> bsl) & 1; // assert(bsl >= 0); return (left * 2 + above) + bsl * PARTITION_PLOFFSET; } static INLINE void dec_update_partition_context(TileWorkerData *twd, int mi_row, int mi_col, BLOCK_SIZE subsize, int bw) { PARTITION_CONTEXT *const above_ctx = twd->xd.above_seg_context + mi_col; PARTITION_CONTEXT *const left_ctx = twd->xd.left_seg_context + (mi_row & MI_MASK); // update the partition context at the end notes. set partition bits // of block sizes larger than the current one to be one, and partition // bits of smaller block sizes to be zero. memset(above_ctx, partition_context_lookup[subsize].above, bw); memset(left_ctx, partition_context_lookup[subsize].left, bw); } static PARTITION_TYPE read_partition(TileWorkerData *twd, int mi_row, int mi_col, int has_rows, int has_cols, int bsl) { const int ctx = dec_partition_plane_context(twd, mi_row, mi_col, bsl); const vpx_prob *const probs = twd->xd.partition_probs[ctx]; FRAME_COUNTS *counts = twd->xd.counts; PARTITION_TYPE p; vpx_reader *r = &twd->bit_reader; if (has_rows && has_cols) p = (PARTITION_TYPE)vpx_read_tree(r, vp9_partition_tree, probs); else if (!has_rows && has_cols) p = vpx_read(r, probs[1]) ? PARTITION_SPLIT : PARTITION_HORZ; else if (has_rows && !has_cols) p = vpx_read(r, probs[2]) ? PARTITION_SPLIT : PARTITION_VERT; else p = PARTITION_SPLIT; if (counts) ++counts->partition[ctx][p]; return p; } // TODO(slavarnway): eliminate bsize and subsize in future commits static void decode_partition(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int n4x4_l2) { VP9_COMMON *const cm = &pbi->common; const int n8x8_l2 = n4x4_l2 - 1; const int num_8x8_wh = 1 << n8x8_l2; const int hbs = num_8x8_wh >> 1; PARTITION_TYPE partition; BLOCK_SIZE subsize; const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; MACROBLOCKD *const xd = &twd->xd; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); subsize = subsize_lookup[partition][bsize]; // get_subsize(bsize, partition); if (!hbs) { // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); decode_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); } else { switch (partition) { case PARTITION_NONE: decode_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); break; case PARTITION_HORZ: decode_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); if (has_rows) decode_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, n8x8_l2); break; case PARTITION_VERT: decode_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); if (has_cols) decode_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, n4x4_l2); break; case PARTITION_SPLIT: decode_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2); decode_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2); decode_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2); decode_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2); break; default: assert(0 && "Invalid partition type"); } } // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); } static void process_partition(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int n4x4_l2, int parse_recon_flag, process_block_fn_t process_block) { VP9_COMMON *const cm = &pbi->common; const int n8x8_l2 = n4x4_l2 - 1; const int num_8x8_wh = 1 << n8x8_l2; const int hbs = num_8x8_wh >> 1; PARTITION_TYPE partition; BLOCK_SIZE subsize; const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; MACROBLOCKD *const xd = &twd->xd; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; if (parse_recon_flag & PARSE) { *xd->partition = read_partition(twd, mi_row, mi_col, has_rows, has_cols, n8x8_l2); } partition = *xd->partition; xd->partition++; subsize = get_subsize(bsize, partition); if (!hbs) { // calculate bmode block dimensions (log 2) xd->bmode_blocks_wl = 1 >> !!(partition & PARTITION_VERT); xd->bmode_blocks_hl = 1 >> !!(partition & PARTITION_HORZ); process_block(twd, pbi, mi_row, mi_col, subsize, 1, 1); } else { switch (partition) { case PARTITION_NONE: process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n4x4_l2); break; case PARTITION_HORZ: process_block(twd, pbi, mi_row, mi_col, subsize, n4x4_l2, n8x8_l2); if (has_rows) process_block(twd, pbi, mi_row + hbs, mi_col, subsize, n4x4_l2, n8x8_l2); break; case PARTITION_VERT: process_block(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, n4x4_l2); if (has_cols) process_block(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, n4x4_l2); break; case PARTITION_SPLIT: process_partition(twd, pbi, mi_row, mi_col, subsize, n8x8_l2, parse_recon_flag, process_block); process_partition(twd, pbi, mi_row, mi_col + hbs, subsize, n8x8_l2, parse_recon_flag, process_block); process_partition(twd, pbi, mi_row + hbs, mi_col, subsize, n8x8_l2, parse_recon_flag, process_block); process_partition(twd, pbi, mi_row + hbs, mi_col + hbs, subsize, n8x8_l2, parse_recon_flag, process_block); break; default: assert(0 && "Invalid partition type"); } } if (parse_recon_flag & PARSE) { // update partition context if ((bsize == BLOCK_8X8 || partition != PARTITION_SPLIT) && bsize >= BLOCK_8X8) dec_update_partition_context(twd, mi_row, mi_col, subsize, num_8x8_wh); } } static void setup_token_decoder(const uint8_t *data, const uint8_t *data_end, size_t read_size, struct vpx_internal_error_info *error_info, vpx_reader *r, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { // Validate the calculated partition length. If the buffer // described by the partition can't be fully read, then restrict // it to the portion that can be (for EC mode) or throw an error. if (!read_is_valid(data, read_size, data_end)) vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); if (vpx_reader_init(r, data, read_size, decrypt_cb, decrypt_state)) vpx_internal_error(error_info, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder %d", 1); } static void read_coef_probs_common(vp9_coeff_probs_model *coef_probs, vpx_reader *r) { int i, j, k, l, m; if (vpx_read_bit(r)) for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) for (m = 0; m < UNCONSTRAINED_NODES; ++m) vp9_diff_update_prob(r, &coef_probs[i][j][k][l][m]); } static void read_coef_probs(FRAME_CONTEXT *fc, TX_MODE tx_mode, vpx_reader *r) { const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) read_coef_probs_common(fc->coef_probs[tx_size], r); } static void setup_segmentation(struct segmentation *seg, struct vpx_read_bit_buffer *rb) { int i, j; seg->update_map = 0; seg->update_data = 0; seg->enabled = vpx_rb_read_bit(rb); if (!seg->enabled) return; // Segmentation map update seg->update_map = vpx_rb_read_bit(rb); if (seg->update_map) { for (i = 0; i < SEG_TREE_PROBS; i++) seg->tree_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) : MAX_PROB; seg->temporal_update = vpx_rb_read_bit(rb); if (seg->temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) seg->pred_probs[i] = vpx_rb_read_bit(rb) ? vpx_rb_read_literal(rb, 8) : MAX_PROB; } else { for (i = 0; i < PREDICTION_PROBS; i++) seg->pred_probs[i] = MAX_PROB; } } // Segmentation data update seg->update_data = vpx_rb_read_bit(rb); if (seg->update_data) { seg->abs_delta = vpx_rb_read_bit(rb); vp9_clearall_segfeatures(seg); for (i = 0; i < MAX_SEGMENTS; i++) { for (j = 0; j < SEG_LVL_MAX; j++) { int data = 0; const int feature_enabled = vpx_rb_read_bit(rb); if (feature_enabled) { vp9_enable_segfeature(seg, i, j); data = decode_unsigned_max(rb, vp9_seg_feature_data_max(j)); if (vp9_is_segfeature_signed(j)) data = vpx_rb_read_bit(rb) ? -data : data; } vp9_set_segdata(seg, i, j, data); } } } } static void setup_loopfilter(struct loopfilter *lf, struct vpx_read_bit_buffer *rb) { lf->filter_level = vpx_rb_read_literal(rb, 6); lf->sharpness_level = vpx_rb_read_literal(rb, 3); // Read in loop filter deltas applied at the MB level based on mode or ref // frame. lf->mode_ref_delta_update = 0; lf->mode_ref_delta_enabled = vpx_rb_read_bit(rb); if (lf->mode_ref_delta_enabled) { lf->mode_ref_delta_update = vpx_rb_read_bit(rb); if (lf->mode_ref_delta_update) { int i; for (i = 0; i < MAX_REF_LF_DELTAS; i++) if (vpx_rb_read_bit(rb)) lf->ref_deltas[i] = vpx_rb_read_signed_literal(rb, 6); for (i = 0; i < MAX_MODE_LF_DELTAS; i++) if (vpx_rb_read_bit(rb)) lf->mode_deltas[i] = vpx_rb_read_signed_literal(rb, 6); } } } static INLINE int read_delta_q(struct vpx_read_bit_buffer *rb) { return vpx_rb_read_bit(rb) ? vpx_rb_read_signed_literal(rb, 4) : 0; } static void setup_quantization(VP9_COMMON *const cm, MACROBLOCKD *const xd, struct vpx_read_bit_buffer *rb) { cm->base_qindex = vpx_rb_read_literal(rb, QINDEX_BITS); cm->y_dc_delta_q = read_delta_q(rb); cm->uv_dc_delta_q = read_delta_q(rb); cm->uv_ac_delta_q = read_delta_q(rb); cm->dequant_bit_depth = cm->bit_depth; xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; #if CONFIG_VP9_HIGHBITDEPTH xd->bd = (int)cm->bit_depth; #endif } static void setup_segmentation_dequant(VP9_COMMON *const cm) { // Build y/uv dequant values based on segmentation. if (cm->seg.enabled) { int i; for (i = 0; i < MAX_SEGMENTS; ++i) { const int qindex = vp9_get_qindex(&cm->seg, i, cm->base_qindex); cm->y_dequant[i][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); cm->y_dequant[i][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); cm->uv_dequant[i][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth); cm->uv_dequant[i][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); } } else { const int qindex = cm->base_qindex; // When segmentation is disabled, only the first value is used. The // remaining are don't cares. cm->y_dequant[0][0] = vp9_dc_quant(qindex, cm->y_dc_delta_q, cm->bit_depth); cm->y_dequant[0][1] = vp9_ac_quant(qindex, 0, cm->bit_depth); cm->uv_dequant[0][0] = vp9_dc_quant(qindex, cm->uv_dc_delta_q, cm->bit_depth); cm->uv_dequant[0][1] = vp9_ac_quant(qindex, cm->uv_ac_delta_q, cm->bit_depth); } } static INTERP_FILTER read_interp_filter(struct vpx_read_bit_buffer *rb) { const INTERP_FILTER literal_to_filter[] = { EIGHTTAP_SMOOTH, EIGHTTAP, EIGHTTAP_SHARP, BILINEAR }; return vpx_rb_read_bit(rb) ? SWITCHABLE : literal_to_filter[vpx_rb_read_literal(rb, 2)]; } static void setup_render_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { cm->render_width = cm->width; cm->render_height = cm->height; if (vpx_rb_read_bit(rb)) vp9_read_frame_size(rb, &cm->render_width, &cm->render_height); } static void resize_mv_buffer(VP9_COMMON *cm) { vpx_free(cm->cur_frame->mvs); cm->cur_frame->mi_rows = cm->mi_rows; cm->cur_frame->mi_cols = cm->mi_cols; CHECK_MEM_ERROR(cm, cm->cur_frame->mvs, (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cm->cur_frame->mvs))); } static void resize_context_buffers(VP9_COMMON *cm, int width, int height) { #if CONFIG_SIZE_LIMIT if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Dimensions of %dx%d beyond allowed size of %dx%d.", width, height, DECODE_WIDTH_LIMIT, DECODE_HEIGHT_LIMIT); #endif if (cm->width != width || cm->height != height) { const int new_mi_rows = ALIGN_POWER_OF_TWO(height, MI_SIZE_LOG2) >> MI_SIZE_LOG2; const int new_mi_cols = ALIGN_POWER_OF_TWO(width, MI_SIZE_LOG2) >> MI_SIZE_LOG2; // Allocations in vp9_alloc_context_buffers() depend on individual // dimensions as well as the overall size. if (new_mi_cols > cm->mi_cols || new_mi_rows > cm->mi_rows) { if (vp9_alloc_context_buffers(cm, width, height)) { // The cm->mi_* values have been cleared and any existing context // buffers have been freed. Clear cm->width and cm->height to be // consistent and to force a realloc next time. cm->width = 0; cm->height = 0; vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate context buffers"); } } else { vp9_set_mb_mi(cm, width, height); } vp9_init_context_buffers(cm); cm->width = width; cm->height = height; } if (cm->cur_frame->mvs == NULL || cm->mi_rows > cm->cur_frame->mi_rows || cm->mi_cols > cm->cur_frame->mi_cols) { resize_mv_buffer(cm); } } static void setup_frame_size(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { int width, height; BufferPool *const pool = cm->buffer_pool; vp9_read_frame_size(rb, &width, &height); resize_context_buffers(cm, width, height); setup_render_size(cm, rb); if (vpx_realloc_frame_buffer( get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment, &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } pool->frame_bufs[cm->new_fb_idx].released = 0; pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static INLINE int valid_ref_frame_img_fmt(vpx_bit_depth_t ref_bit_depth, int ref_xss, int ref_yss, vpx_bit_depth_t this_bit_depth, int this_xss, int this_yss) { return ref_bit_depth == this_bit_depth && ref_xss == this_xss && ref_yss == this_yss; } static void setup_frame_size_with_refs(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { int width, height; int found = 0, i; int has_valid_ref_frame = 0; BufferPool *const pool = cm->buffer_pool; for (i = 0; i < REFS_PER_FRAME; ++i) { if (vpx_rb_read_bit(rb)) { if (cm->frame_refs[i].idx != INVALID_IDX) { YV12_BUFFER_CONFIG *const buf = cm->frame_refs[i].buf; width = buf->y_crop_width; height = buf->y_crop_height; found = 1; break; } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Failed to decode frame size"); } } } if (!found) vp9_read_frame_size(rb, &width, &height); if (width <= 0 || height <= 0) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Invalid frame size"); // Check to make sure at least one of frames that this frame references // has valid dimensions. for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; has_valid_ref_frame |= (ref_frame->idx != INVALID_IDX && valid_ref_frame_size(ref_frame->buf->y_crop_width, ref_frame->buf->y_crop_height, width, height)); } if (!has_valid_ref_frame) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame has invalid size"); for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_frame = &cm->frame_refs[i]; if (ref_frame->idx == INVALID_IDX || !valid_ref_frame_img_fmt(ref_frame->buf->bit_depth, ref_frame->buf->subsampling_x, ref_frame->buf->subsampling_y, cm->bit_depth, cm->subsampling_x, cm->subsampling_y)) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Referenced frame has incompatible color format"); } resize_context_buffers(cm, width, height); setup_render_size(cm, rb); if (vpx_realloc_frame_buffer( get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_DEC_BORDER_IN_PIXELS, cm->byte_alignment, &pool->frame_bufs[cm->new_fb_idx].raw_frame_buffer, pool->get_fb_cb, pool->cb_priv)) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); } pool->frame_bufs[cm->new_fb_idx].released = 0; pool->frame_bufs[cm->new_fb_idx].buf.subsampling_x = cm->subsampling_x; pool->frame_bufs[cm->new_fb_idx].buf.subsampling_y = cm->subsampling_y; pool->frame_bufs[cm->new_fb_idx].buf.bit_depth = (unsigned int)cm->bit_depth; pool->frame_bufs[cm->new_fb_idx].buf.color_space = cm->color_space; pool->frame_bufs[cm->new_fb_idx].buf.color_range = cm->color_range; pool->frame_bufs[cm->new_fb_idx].buf.render_width = cm->render_width; pool->frame_bufs[cm->new_fb_idx].buf.render_height = cm->render_height; } static void setup_tile_info(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { int min_log2_tile_cols, max_log2_tile_cols, max_ones; vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); // columns max_ones = max_log2_tile_cols - min_log2_tile_cols; cm->log2_tile_cols = min_log2_tile_cols; while (max_ones-- && vpx_rb_read_bit(rb)) cm->log2_tile_cols++; if (cm->log2_tile_cols > 6) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Invalid number of tile columns"); // rows cm->log2_tile_rows = vpx_rb_read_bit(rb); if (cm->log2_tile_rows) cm->log2_tile_rows += vpx_rb_read_bit(rb); } // Reads the next tile returning its size and adjusting '*data' accordingly // based on 'is_last'. static void get_tile_buffer(const uint8_t *const data_end, int is_last, struct vpx_internal_error_info *error_info, const uint8_t **data, vpx_decrypt_cb decrypt_cb, void *decrypt_state, TileBuffer *buf) { size_t size; if (!is_last) { if (!read_is_valid(*data, 4, data_end)) vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile length"); if (decrypt_cb) { uint8_t be_data[4]; decrypt_cb(decrypt_state, *data, be_data, 4); size = mem_get_be32(be_data); } else { size = mem_get_be32(*data); } *data += 4; if (size > (size_t)(data_end - *data)) vpx_internal_error(error_info, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt tile size"); } else { size = data_end - *data; } buf->data = *data; buf->size = size; *data += size; } static void get_tile_buffers(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, int tile_cols, int tile_rows, TileBuffer (*tile_buffers)[1 << 6]) { int r, c; for (r = 0; r < tile_rows; ++r) { for (c = 0; c < tile_cols; ++c) { const int is_last = (r == tile_rows - 1) && (c == tile_cols - 1); TileBuffer *const buf = &tile_buffers[r][c]; buf->col = c; get_tile_buffer(data_end, is_last, &pbi->common.error, &data, pbi->decrypt_cb, pbi->decrypt_state, buf); } } } static void map_write(RowMTWorkerData *const row_mt_worker_data, int map_idx, int sync_idx) { #if CONFIG_MULTITHREAD pthread_mutex_lock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); row_mt_worker_data->recon_map[map_idx] = 1; pthread_cond_signal(&row_mt_worker_data->recon_sync_cond[sync_idx]); pthread_mutex_unlock(&row_mt_worker_data->recon_sync_mutex[sync_idx]); #else (void)row_mt_worker_data; (void)map_idx; (void)sync_idx; #endif // CONFIG_MULTITHREAD } static void map_read(RowMTWorkerData *const row_mt_worker_data, int map_idx, int sync_idx) { #if CONFIG_MULTITHREAD volatile int8_t *map = row_mt_worker_data->recon_map + map_idx; pthread_mutex_t *const mutex = &row_mt_worker_data->recon_sync_mutex[sync_idx]; pthread_mutex_lock(mutex); while (!(*map)) { pthread_cond_wait(&row_mt_worker_data->recon_sync_cond[sync_idx], mutex); } pthread_mutex_unlock(mutex); #else (void)row_mt_worker_data; (void)map_idx; (void)sync_idx; #endif // CONFIG_MULTITHREAD } static int lpf_map_write_check(VP9LfSync *lf_sync, int row, int num_tile_cols) { int return_val = 0; #if CONFIG_MULTITHREAD int corrupted; pthread_mutex_lock(lf_sync->lf_mutex); corrupted = lf_sync->corrupted; pthread_mutex_unlock(lf_sync->lf_mutex); if (!corrupted) { pthread_mutex_lock(&lf_sync->recon_done_mutex[row]); lf_sync->num_tiles_done[row] += 1; if (num_tile_cols == lf_sync->num_tiles_done[row]) return_val = 1; pthread_mutex_unlock(&lf_sync->recon_done_mutex[row]); } #else (void)lf_sync; (void)row; (void)num_tile_cols; #endif return return_val; } static void vp9_tile_done(VP9Decoder *pbi) { #if CONFIG_MULTITHREAD int terminate; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; const int all_parse_done = 1 << pbi->common.log2_tile_cols; pthread_mutex_lock(&row_mt_worker_data->recon_done_mutex); row_mt_worker_data->num_tiles_done++; terminate = all_parse_done == row_mt_worker_data->num_tiles_done; pthread_mutex_unlock(&row_mt_worker_data->recon_done_mutex); if (terminate) { vp9_jobq_terminate(&row_mt_worker_data->jobq); } #else (void)pbi; #endif } static void vp9_jobq_alloc(VP9Decoder *pbi) { VP9_COMMON *const cm = &pbi->common; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; const int tile_cols = 1 << cm->log2_tile_cols; const size_t jobq_size = (tile_cols * sb_rows * 2 + sb_rows) * sizeof(Job); if (jobq_size > row_mt_worker_data->jobq_size) { vpx_free(row_mt_worker_data->jobq_buf); CHECK_MEM_ERROR(cm, row_mt_worker_data->jobq_buf, vpx_calloc(1, jobq_size)); vp9_jobq_init(&row_mt_worker_data->jobq, row_mt_worker_data->jobq_buf, jobq_size); row_mt_worker_data->jobq_size = jobq_size; } } static void recon_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, int mi_row, int is_last_row, VP9LfSync *lf_sync, int cur_tile_col) { VP9_COMMON *const cm = &pbi->common; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; const int tile_cols = 1 << cm->log2_tile_cols; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; int mi_col_start = tile_data->xd.tile.mi_col_start; int mi_col_end = tile_data->xd.tile.mi_col_end; int mi_col; vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { const int c = mi_col >> MI_BLOCK_SIZE_LOG2; int plane; const int sb_num = (cur_sb_row * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); // Top Dependency if (cur_sb_row) { map_read(row_mt_worker_data, ((cur_sb_row - 1) * sb_cols) + c, ((cur_sb_row - 1) * tile_cols) + cur_tile_col); } for (plane = 0; plane < MAX_MB_PLANE; ++plane) { tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); tile_data->xd.plane[plane].dqcoeff = row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); } tile_data->xd.partition = row_mt_worker_data->partition + (sb_num * PARTITIONS_PER_SB); process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, RECON, recon_block); if (cm->lf.filter_level && !cm->skip_loop_filter) { // Queue LPF_JOB int is_lpf_job_ready = 0; if (mi_col + MI_BLOCK_SIZE >= mi_col_end) { // Checks if this row has been decoded in all tiles is_lpf_job_ready = lpf_map_write_check(lf_sync, cur_sb_row, tile_cols); if (is_lpf_job_ready) { Job lpf_job; lpf_job.job_type = LPF_JOB; if (cur_sb_row > 0) { lpf_job.row_num = mi_row - MI_BLOCK_SIZE; vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, sizeof(lpf_job)); } if (is_last_row) { lpf_job.row_num = mi_row; vp9_jobq_queue(&row_mt_worker_data->jobq, &lpf_job, sizeof(lpf_job)); } } } } map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, (cur_sb_row * tile_cols) + cur_tile_col); } } static void parse_tile_row(TileWorkerData *tile_data, VP9Decoder *pbi, int mi_row, int cur_tile_col, uint8_t **data_end) { int mi_col; VP9_COMMON *const cm = &pbi->common; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; TileInfo *tile = &tile_data->xd.tile; TileBuffer *const buf = &pbi->tile_buffers[cur_tile_col]; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); vp9_zero(tile_data->dqcoeff); vp9_tile_init(tile, cm, 0, cur_tile_col); /* Update reader only at the beginning of each row in a tile */ if (mi_row == 0) { setup_token_decoder(buf->data, *data_end, buf->size, &tile_data->error_info, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); } vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); tile_data->xd.error_info = &tile_data->error_info; vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { const int r = mi_row >> MI_BLOCK_SIZE_LOG2; const int c = mi_col >> MI_BLOCK_SIZE_LOG2; int plane; const int sb_num = (r * (aligned_cols >> MI_BLOCK_SIZE_LOG2) + c); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane] + (sb_num << EOBS_PER_SB_LOG2); tile_data->xd.plane[plane].dqcoeff = row_mt_worker_data->dqcoeff[plane] + (sb_num << DQCOEFFS_PER_SB_LOG2); } tile_data->xd.partition = row_mt_worker_data->partition + sb_num * PARTITIONS_PER_SB; process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, PARSE, parse_block); } } static int row_decode_worker_hook(void *arg1, void *arg2) { ThreadData *const thread_data = (ThreadData *)arg1; uint8_t **data_end = (uint8_t **)arg2; VP9Decoder *const pbi = thread_data->pbi; VP9_COMMON *const cm = &pbi->common; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; const int tile_cols = 1 << cm->log2_tile_cols; Job job; LFWorkerData *lf_data = thread_data->lf_data; VP9LfSync *lf_sync = thread_data->lf_sync; volatile int corrupted = 0; TileWorkerData *volatile tile_data_recon = NULL; while (!vp9_jobq_dequeue(&row_mt_worker_data->jobq, &job, sizeof(job), 1)) { int mi_col; const int mi_row = job.row_num; if (job.job_type == LPF_JOB) { lf_data->start = mi_row; lf_data->stop = lf_data->start + MI_BLOCK_SIZE; if (cm->lf.filter_level && !cm->skip_loop_filter && mi_row < cm->mi_rows) { vp9_loopfilter_job(lf_data, lf_sync); } } else if (job.job_type == RECON_JOB) { const int cur_sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; const int is_last_row = sb_rows - 1 == cur_sb_row; int mi_col_start, mi_col_end; if (!tile_data_recon) CHECK_MEM_ERROR(cm, tile_data_recon, vpx_memalign(32, sizeof(TileWorkerData))); tile_data_recon->xd = pbi->mb; vp9_tile_init(&tile_data_recon->xd.tile, cm, 0, job.tile_col); vp9_init_macroblockd(cm, &tile_data_recon->xd, tile_data_recon->dqcoeff); mi_col_start = tile_data_recon->xd.tile.mi_col_start; mi_col_end = tile_data_recon->xd.tile.mi_col_end; if (setjmp(tile_data_recon->error_info.jmp)) { const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; tile_data_recon->error_info.setjmp = 0; corrupted = 1; for (mi_col = mi_col_start; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE) { const int c = mi_col >> MI_BLOCK_SIZE_LOG2; map_write(row_mt_worker_data, (cur_sb_row * sb_cols) + c, (cur_sb_row * tile_cols) + job.tile_col); } if (is_last_row) { vp9_tile_done(pbi); } continue; } tile_data_recon->error_info.setjmp = 1; tile_data_recon->xd.error_info = &tile_data_recon->error_info; recon_tile_row(tile_data_recon, pbi, mi_row, is_last_row, lf_sync, job.tile_col); if (corrupted) vpx_internal_error(&tile_data_recon->error_info, VPX_CODEC_CORRUPT_FRAME, "Failed to decode tile data"); if (is_last_row) { vp9_tile_done(pbi); } } else if (job.job_type == PARSE_JOB) { TileWorkerData *const tile_data = &pbi->tile_worker_data[job.tile_col]; if (setjmp(tile_data->error_info.jmp)) { tile_data->error_info.setjmp = 0; corrupted = 1; vp9_tile_done(pbi); continue; } tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? 0 : &tile_data->counts; tile_data->error_info.setjmp = 1; parse_tile_row(tile_data, pbi, mi_row, job.tile_col, data_end); corrupted |= tile_data->xd.corrupted; if (corrupted) vpx_internal_error(&tile_data->error_info, VPX_CODEC_CORRUPT_FRAME, "Failed to decode tile data"); /* Queue in the recon_job for this row */ { Job recon_job; recon_job.row_num = mi_row; recon_job.tile_col = job.tile_col; recon_job.job_type = RECON_JOB; vp9_jobq_queue(&row_mt_worker_data->jobq, &recon_job, sizeof(recon_job)); } /* Queue next parse job */ if (mi_row + MI_BLOCK_SIZE < cm->mi_rows) { Job parse_job; parse_job.row_num = mi_row + MI_BLOCK_SIZE; parse_job.tile_col = job.tile_col; parse_job.job_type = PARSE_JOB; vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, sizeof(parse_job)); } } } vpx_free(tile_data_recon); return !corrupted; } static const uint8_t *decode_tiles(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; TileBuffer tile_buffers[4][1 << 6]; int tile_row, tile_col; int mi_row, mi_col; TileWorkerData *tile_data = NULL; if (cm->lf.filter_level && !cm->skip_loop_filter && pbi->lf_worker.data1 == NULL) { CHECK_MEM_ERROR(cm, pbi->lf_worker.data1, vpx_memalign(32, sizeof(LFWorkerData))); pbi->lf_worker.hook = vp9_loop_filter_worker; if (pbi->max_threads > 1 && !winterface->reset(&pbi->lf_worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Loop filter thread creation failed"); } } if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1; // Be sure to sync as we might be resuming after a failed frame decode. winterface->sync(&pbi->lf_worker); vp9_loop_filter_data_reset(lf_data, get_frame_new_buffer(cm), cm, pbi->mb.plane); } assert(tile_rows <= 4); assert(tile_cols <= (1 << 6)); // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. memset(cm->above_context, 0, sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_cols); memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_cols); vp9_reset_lfm(cm); get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, tile_buffers); // Load all tile information into tile_data. for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const TileBuffer *const buf = &tile_buffers[tile_row][tile_col]; tile_data = pbi->tile_worker_data + tile_cols * tile_row + tile_col; tile_data->xd = pbi->mb; tile_data->xd.corrupted = 0; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &cm->counts; vp9_zero(tile_data->dqcoeff); vp9_tile_init(&tile_data->xd.tile, cm, tile_row, tile_col); setup_token_decoder(buf->data, data_end, buf->size, &cm->error, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); vp9_init_macroblockd(cm, &tile_data->xd, tile_data->dqcoeff); } } for (tile_row = 0; tile_row < tile_rows; ++tile_row) { TileInfo tile; vp9_tile_set_row(&tile, cm, tile_row); for (mi_row = tile.mi_row_start; mi_row < tile.mi_row_end; mi_row += MI_BLOCK_SIZE) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { const int col = pbi->inv_tile_order ? tile_cols - tile_col - 1 : tile_col; tile_data = pbi->tile_worker_data + tile_cols * tile_row + col; vp9_tile_set_col(&tile, cm, col); vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += MI_BLOCK_SIZE) { if (pbi->row_mt == 1) { int plane; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; tile_data->xd.plane[plane].dqcoeff = row_mt_worker_data->dqcoeff[plane]; } tile_data->xd.partition = row_mt_worker_data->partition; process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, PARSE, parse_block); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { tile_data->xd.plane[plane].eob = row_mt_worker_data->eob[plane]; tile_data->xd.plane[plane].dqcoeff = row_mt_worker_data->dqcoeff[plane]; } tile_data->xd.partition = row_mt_worker_data->partition; process_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4, RECON, recon_block); } else { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } } pbi->mb.corrupted |= tile_data->xd.corrupted; if (pbi->mb.corrupted) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Failed to decode tile data"); } // Loopfilter one row. if (cm->lf.filter_level && !cm->skip_loop_filter) { const int lf_start = mi_row - MI_BLOCK_SIZE; LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1; // delay the loopfilter by 1 macroblock row. if (lf_start < 0) continue; // decoding has completed: finish up the loop filter in this thread. if (mi_row + MI_BLOCK_SIZE >= cm->mi_rows) continue; winterface->sync(&pbi->lf_worker); lf_data->start = lf_start; lf_data->stop = mi_row; if (pbi->max_threads > 1) { winterface->launch(&pbi->lf_worker); } else { winterface->execute(&pbi->lf_worker); } } } } // Loopfilter remaining rows in the frame. if (cm->lf.filter_level && !cm->skip_loop_filter) { LFWorkerData *const lf_data = (LFWorkerData *)pbi->lf_worker.data1; winterface->sync(&pbi->lf_worker); lf_data->start = lf_data->stop; lf_data->stop = cm->mi_rows; winterface->execute(&pbi->lf_worker); } // Get last tile data. tile_data = pbi->tile_worker_data + tile_cols * tile_rows - 1; return vpx_reader_find_end(&tile_data->bit_reader); } static void set_rows_after_error(VP9LfSync *lf_sync, int start_row, int mi_rows, int num_tiles_left, int total_num_tiles) { do { int mi_row; const int aligned_rows = mi_cols_aligned_to_sb(mi_rows); const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); const int corrupted = 1; for (mi_row = start_row; mi_row < mi_rows; mi_row += MI_BLOCK_SIZE) { const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); vp9_set_row(lf_sync, total_num_tiles, mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row, corrupted); } /* If there are multiple tiles, the second tile should start marking row * progress from row 0. */ start_row = 0; } while (num_tiles_left--); } // On entry 'tile_data->data_end' points to the end of the input frame, on exit // it is updated to reflect the bitreader position of the final tile column if // present in the tile buffer group or NULL otherwise. static int tile_worker_hook(void *arg1, void *arg2) { TileWorkerData *const tile_data = (TileWorkerData *)arg1; VP9Decoder *const pbi = (VP9Decoder *)arg2; TileInfo *volatile tile = &tile_data->xd.tile; const int final_col = (1 << pbi->common.log2_tile_cols) - 1; const uint8_t *volatile bit_reader_end = NULL; VP9_COMMON *cm = &pbi->common; LFWorkerData *lf_data = tile_data->lf_data; VP9LfSync *lf_sync = tile_data->lf_sync; volatile int mi_row = 0; volatile int n = tile_data->buf_start; tile_data->error_info.setjmp = 1; if (setjmp(tile_data->error_info.jmp)) { tile_data->error_info.setjmp = 0; tile_data->xd.corrupted = 1; tile_data->data_end = NULL; if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { const int num_tiles_left = tile_data->buf_end - n; const int mi_row_start = mi_row; set_rows_after_error(lf_sync, mi_row_start, cm->mi_rows, num_tiles_left, 1 << cm->log2_tile_cols); } return 0; } tile_data->xd.corrupted = 0; do { int mi_col; const TileBuffer *const buf = pbi->tile_buffers + n; /* Initialize to 0 is safe since we do not deal with streams that have * more than one row of tiles. (So tile->mi_row_start will be 0) */ assert(cm->log2_tile_rows == 0); mi_row = 0; vp9_zero(tile_data->dqcoeff); vp9_tile_init(tile, &pbi->common, 0, buf->col); setup_token_decoder(buf->data, tile_data->data_end, buf->size, &tile_data->error_info, &tile_data->bit_reader, pbi->decrypt_cb, pbi->decrypt_state); vp9_init_macroblockd(&pbi->common, &tile_data->xd, tile_data->dqcoeff); // init resets xd.error_info tile_data->xd.error_info = &tile_data->error_info; for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { vp9_zero(tile_data->xd.left_context); vp9_zero(tile_data->xd.left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) { decode_partition(tile_data, pbi, mi_row, mi_col, BLOCK_64X64, 4); } if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); const int sb_rows = (aligned_rows >> MI_BLOCK_SIZE_LOG2); const int is_last_row = (sb_rows - 1 == mi_row >> MI_BLOCK_SIZE_LOG2); vp9_set_row(lf_sync, 1 << cm->log2_tile_cols, mi_row >> MI_BLOCK_SIZE_LOG2, is_last_row, tile_data->xd.corrupted); } } if (buf->col == final_col) { bit_reader_end = vpx_reader_find_end(&tile_data->bit_reader); } } while (!tile_data->xd.corrupted && ++n <= tile_data->buf_end); if (pbi->lpf_mt_opt && n < tile_data->buf_end && cm->lf.filter_level && !cm->skip_loop_filter) { /* This was not incremented in the tile loop, so increment before tiles left * calculation */ ++n; set_rows_after_error(lf_sync, 0, cm->mi_rows, tile_data->buf_end - n, 1 << cm->log2_tile_cols); } if (pbi->lpf_mt_opt && !tile_data->xd.corrupted && cm->lf.filter_level && !cm->skip_loop_filter) { vp9_loopfilter_rows(lf_data, lf_sync); } tile_data->data_end = bit_reader_end; return !tile_data->xd.corrupted; } // sorts in descending order static int compare_tile_buffers(const void *a, const void *b) { const TileBuffer *const buf_a = (const TileBuffer *)a; const TileBuffer *const buf_b = (const TileBuffer *)b; return (buf_a->size < buf_b->size) - (buf_a->size > buf_b->size); } static INLINE void init_mt(VP9Decoder *pbi) { int n; VP9_COMMON *const cm = &pbi->common; VP9LfSync *lf_row_sync = &pbi->lf_row_sync; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); if (pbi->num_tile_workers == 0) { const int num_threads = pbi->max_threads; CHECK_MEM_ERROR(cm, pbi->tile_workers, vpx_malloc(num_threads * sizeof(*pbi->tile_workers))); for (n = 0; n < num_threads; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; ++pbi->num_tile_workers; winterface->init(worker); if (n < num_threads - 1 && !winterface->reset(worker)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile decoder thread creation failed"); } } } // Initialize LPF if ((pbi->lpf_mt_opt || pbi->row_mt) && cm->lf.filter_level && !cm->skip_loop_filter) { vp9_lpf_mt_init(lf_row_sync, cm, cm->lf.filter_level, pbi->num_tile_workers); } // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. memset(cm->above_context, 0, sizeof(*cm->above_context) * MAX_MB_PLANE * 2 * aligned_mi_cols); memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * aligned_mi_cols); vp9_reset_lfm(cm); } static const uint8_t *decode_tiles_row_wise_mt(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; RowMTWorkerData *const row_mt_worker_data = pbi->row_mt_worker_data; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; const int num_workers = pbi->max_threads; int i, n; int col; int corrupted = 0; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; const int sb_cols = mi_cols_aligned_to_sb(cm->mi_cols) >> MI_BLOCK_SIZE_LOG2; VP9LfSync *lf_row_sync = &pbi->lf_row_sync; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); assert(tile_cols <= (1 << 6)); assert(tile_rows == 1); (void)tile_rows; memset(row_mt_worker_data->recon_map, 0, sb_rows * sb_cols * sizeof(*row_mt_worker_data->recon_map)); init_mt(pbi); // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; ThreadData *const thread_data = &pbi->row_mt_worker_data->thread_data[n]; winterface->sync(worker); if (cm->lf.filter_level && !cm->skip_loop_filter) { thread_data->lf_sync = lf_row_sync; thread_data->lf_data = &thread_data->lf_sync->lfdata[n]; vp9_loop_filter_data_reset(thread_data->lf_data, new_fb, cm, pbi->mb.plane); } thread_data->pbi = pbi; worker->hook = row_decode_worker_hook; worker->data1 = thread_data; worker->data2 = (void *)&row_mt_worker_data->data_end; } for (col = 0; col < tile_cols; ++col) { TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; } /* Reset the jobq to start of the jobq buffer */ vp9_jobq_reset(&row_mt_worker_data->jobq); row_mt_worker_data->num_tiles_done = 0; row_mt_worker_data->data_end = NULL; // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, &pbi->tile_buffers); // Initialize thread frame counts. if (!cm->frame_parallel_decoding_mode) { for (col = 0; col < tile_cols; ++col) { TileWorkerData *const tile_data = &pbi->tile_worker_data[col]; vp9_zero(tile_data->counts); } } // queue parse jobs for 0th row of every tile for (col = 0; col < tile_cols; ++col) { Job parse_job; parse_job.row_num = 0; parse_job.tile_col = col; parse_job.job_type = PARSE_JOB; vp9_jobq_queue(&row_mt_worker_data->jobq, &parse_job, sizeof(parse_job)); } for (i = 0; i < num_workers; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; worker->had_error = 0; if (i == num_workers - 1) { winterface->execute(worker); } else { winterface->launch(worker); } } for (; n > 0; --n) { VPxWorker *const worker = &pbi->tile_workers[n - 1]; // TODO(jzern): The tile may have specific error data associated with // its vpx_internal_error_info which could be propagated to the main info // in cm. Additionally once the threads have been synced and an error is // detected, there's no point in continuing to decode tiles. corrupted |= !winterface->sync(worker); } pbi->mb.corrupted = corrupted; { /* Set data end */ TileWorkerData *const tile_data = &pbi->tile_worker_data[tile_cols - 1]; row_mt_worker_data->data_end = vpx_reader_find_end(&tile_data->bit_reader); } // Accumulate thread frame counts. if (!cm->frame_parallel_decoding_mode) { for (i = 0; i < tile_cols; ++i) { TileWorkerData *const tile_data = &pbi->tile_worker_data[i]; vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); } } return row_mt_worker_data->data_end; } static const uint8_t *decode_tiles_mt(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end) { VP9_COMMON *const cm = &pbi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); const uint8_t *bit_reader_end = NULL; VP9LfSync *lf_row_sync = &pbi->lf_row_sync; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; const int num_workers = VPXMIN(pbi->max_threads, tile_cols); int n; assert(tile_cols <= (1 << 6)); assert(tile_rows == 1); (void)tile_rows; init_mt(pbi); // Reset tile decoding hook for (n = 0; n < num_workers; ++n) { VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = &pbi->tile_worker_data[n + pbi->total_tiles]; winterface->sync(worker); if (pbi->lpf_mt_opt && cm->lf.filter_level && !cm->skip_loop_filter) { tile_data->lf_sync = lf_row_sync; tile_data->lf_data = &tile_data->lf_sync->lfdata[n]; vp9_loop_filter_data_reset(tile_data->lf_data, new_fb, cm, pbi->mb.plane); tile_data->lf_data->y_only = 0; } tile_data->xd = pbi->mb; tile_data->xd.counts = cm->frame_parallel_decoding_mode ? NULL : &tile_data->counts; worker->hook = tile_worker_hook; worker->data1 = tile_data; worker->data2 = pbi; } // Load tile data into tile_buffers get_tile_buffers(pbi, data, data_end, tile_cols, tile_rows, &pbi->tile_buffers); // Sort the buffers based on size in descending order. qsort(pbi->tile_buffers, tile_cols, sizeof(pbi->tile_buffers[0]), compare_tile_buffers); if (num_workers == tile_cols) { // Rearrange the tile buffers such that the largest, and // presumably the most difficult, tile will be decoded in the main thread. // This should help minimize the number of instances where the main thread // is waiting for a worker to complete. const TileBuffer largest = pbi->tile_buffers[0]; memmove(pbi->tile_buffers, pbi->tile_buffers + 1, (tile_cols - 1) * sizeof(pbi->tile_buffers[0])); pbi->tile_buffers[tile_cols - 1] = largest; } else { int start = 0, end = tile_cols - 2; TileBuffer tmp; // Interleave the tiles to distribute the load between threads, assuming a // larger tile implies it is more difficult to decode. while (start < end) { tmp = pbi->tile_buffers[start]; pbi->tile_buffers[start] = pbi->tile_buffers[end]; pbi->tile_buffers[end] = tmp; start += 2; end -= 2; } } // Initialize thread frame counts. if (!cm->frame_parallel_decoding_mode) { for (n = 0; n < num_workers; ++n) { TileWorkerData *const tile_data = (TileWorkerData *)pbi->tile_workers[n].data1; vp9_zero(tile_data->counts); } } { const int base = tile_cols / num_workers; const int remain = tile_cols % num_workers; int buf_start = 0; for (n = 0; n < num_workers; ++n) { const int count = base + (remain + n) / num_workers; VPxWorker *const worker = &pbi->tile_workers[n]; TileWorkerData *const tile_data = (TileWorkerData *)worker->data1; tile_data->buf_start = buf_start; tile_data->buf_end = buf_start + count - 1; tile_data->data_end = data_end; buf_start += count; worker->had_error = 0; if (n == num_workers - 1) { assert(tile_data->buf_end == tile_cols - 1); winterface->execute(worker); } else { winterface->launch(worker); } } for (; n > 0; --n) { VPxWorker *const worker = &pbi->tile_workers[n - 1]; TileWorkerData *const tile_data = (TileWorkerData *)worker->data1; // TODO(jzern): The tile may have specific error data associated with // its vpx_internal_error_info which could be propagated to the main info // in cm. Additionally once the threads have been synced and an error is // detected, there's no point in continuing to decode tiles. pbi->mb.corrupted |= !winterface->sync(worker); if (!bit_reader_end) bit_reader_end = tile_data->data_end; } } // Accumulate thread frame counts. if (!cm->frame_parallel_decoding_mode) { for (n = 0; n < num_workers; ++n) { TileWorkerData *const tile_data = (TileWorkerData *)pbi->tile_workers[n].data1; vp9_accumulate_frame_counts(&cm->counts, &tile_data->counts, 1); } } assert(bit_reader_end || pbi->mb.corrupted); return bit_reader_end; } static void error_handler(void *data) { VP9_COMMON *const cm = (VP9_COMMON *)data; vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet"); } static void read_bitdepth_colorspace_sampling(VP9_COMMON *cm, struct vpx_read_bit_buffer *rb) { if (cm->profile >= PROFILE_2) { cm->bit_depth = vpx_rb_read_bit(rb) ? VPX_BITS_12 : VPX_BITS_10; #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth = 1; #endif } else { cm->bit_depth = VPX_BITS_8; #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth = 0; #endif } cm->color_space = vpx_rb_read_literal(rb, 3); if (cm->color_space != VPX_CS_SRGB) { cm->color_range = (vpx_color_range_t)vpx_rb_read_bit(rb); if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { cm->subsampling_x = vpx_rb_read_bit(rb); cm->subsampling_y = vpx_rb_read_bit(rb); if (cm->subsampling_x == 1 && cm->subsampling_y == 1) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "4:2:0 color not supported in profile 1 or 3"); if (vpx_rb_read_bit(rb)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Reserved bit set"); } else { cm->subsampling_y = cm->subsampling_x = 1; } } else { cm->color_range = VPX_CR_FULL_RANGE; if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { // Note if colorspace is SRGB then 4:4:4 chroma sampling is assumed. // 4:2:2 or 4:4:0 chroma sampling is not allowed. cm->subsampling_y = cm->subsampling_x = 0; if (vpx_rb_read_bit(rb)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Reserved bit set"); } else { vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "4:4:4 color not supported in profile 0 or 2"); } } } static INLINE void flush_all_fb_on_key(VP9_COMMON *cm) { if (cm->frame_type == KEY_FRAME && cm->current_video_frame > 0) { RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; BufferPool *const pool = cm->buffer_pool; int i; for (i = 0; i < FRAME_BUFFERS; ++i) { if (i == cm->new_fb_idx) continue; frame_bufs[i].ref_count = 0; if (!frame_bufs[i].released) { pool->release_fb_cb(pool->cb_priv, &frame_bufs[i].raw_frame_buffer); frame_bufs[i].released = 1; } } } } static size_t read_uncompressed_header(VP9Decoder *pbi, struct vpx_read_bit_buffer *rb) { VP9_COMMON *const cm = &pbi->common; BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = pool->frame_bufs; int i, mask, ref_index = 0; size_t sz; cm->last_frame_type = cm->frame_type; cm->last_intra_only = cm->intra_only; if (vpx_rb_read_literal(rb, 2) != VP9_FRAME_MARKER) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame marker"); cm->profile = vp9_read_profile(rb); #if CONFIG_VP9_HIGHBITDEPTH if (cm->profile >= MAX_PROFILES) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Unsupported bitstream profile"); #else if (cm->profile >= PROFILE_2) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Unsupported bitstream profile"); #endif cm->show_existing_frame = vpx_rb_read_bit(rb); if (cm->show_existing_frame) { // Show an existing frame directly. const int frame_to_show = cm->ref_frame_map[vpx_rb_read_literal(rb, 3)]; if (frame_to_show < 0 || frame_bufs[frame_to_show].ref_count < 1) { vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Buffer %d does not contain a decoded frame", frame_to_show); } ref_cnt_fb(frame_bufs, &cm->new_fb_idx, frame_to_show); pbi->refresh_frame_flags = 0; cm->lf.filter_level = 0; cm->show_frame = 1; return 0; } cm->frame_type = (FRAME_TYPE)vpx_rb_read_bit(rb); cm->show_frame = vpx_rb_read_bit(rb); cm->error_resilient_mode = vpx_rb_read_bit(rb); if (cm->frame_type == KEY_FRAME) { if (!vp9_read_sync_code(rb)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); read_bitdepth_colorspace_sampling(cm, rb); pbi->refresh_frame_flags = (1 << REF_FRAMES) - 1; for (i = 0; i < REFS_PER_FRAME; ++i) { cm->frame_refs[i].idx = INVALID_IDX; cm->frame_refs[i].buf = NULL; } setup_frame_size(cm, rb); if (pbi->need_resync) { memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); flush_all_fb_on_key(cm); pbi->need_resync = 0; } } else { cm->intra_only = cm->show_frame ? 0 : vpx_rb_read_bit(rb); cm->reset_frame_context = cm->error_resilient_mode ? 0 : vpx_rb_read_literal(rb, 2); if (cm->intra_only) { if (!vp9_read_sync_code(rb)) vpx_internal_error(&cm->error, VPX_CODEC_UNSUP_BITSTREAM, "Invalid frame sync code"); if (cm->profile > PROFILE_0) { read_bitdepth_colorspace_sampling(cm, rb); } else { // NOTE: The intra-only frame header does not include the specification // of either the color format or color sub-sampling in profile 0. VP9 // specifies that the default color format should be YUV 4:2:0 in this // case (normative). cm->color_space = VPX_CS_BT_601; cm->color_range = VPX_CR_STUDIO_RANGE; cm->subsampling_y = cm->subsampling_x = 1; cm->bit_depth = VPX_BITS_8; #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth = 0; #endif } pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); setup_frame_size(cm, rb); if (pbi->need_resync) { memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); pbi->need_resync = 0; } } else if (pbi->need_resync != 1) { /* Skip if need resync */ pbi->refresh_frame_flags = vpx_rb_read_literal(rb, REF_FRAMES); for (i = 0; i < REFS_PER_FRAME; ++i) { const int ref = vpx_rb_read_literal(rb, REF_FRAMES_LOG2); const int idx = cm->ref_frame_map[ref]; RefBuffer *const ref_frame = &cm->frame_refs[i]; ref_frame->idx = idx; ref_frame->buf = &frame_bufs[idx].buf; cm->ref_frame_sign_bias[LAST_FRAME + i] = vpx_rb_read_bit(rb); } setup_frame_size_with_refs(cm, rb); cm->allow_high_precision_mv = vpx_rb_read_bit(rb); cm->interp_filter = read_interp_filter(rb); for (i = 0; i < REFS_PER_FRAME; ++i) { RefBuffer *const ref_buf = &cm->frame_refs[i]; #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame( &ref_buf->sf, ref_buf->buf->y_crop_width, ref_buf->buf->y_crop_height, cm->width, cm->height, cm->use_highbitdepth); #else vp9_setup_scale_factors_for_frame( &ref_buf->sf, ref_buf->buf->y_crop_width, ref_buf->buf->y_crop_height, cm->width, cm->height); #endif } } } #if CONFIG_VP9_HIGHBITDEPTH get_frame_new_buffer(cm)->bit_depth = cm->bit_depth; #endif get_frame_new_buffer(cm)->color_space = cm->color_space; get_frame_new_buffer(cm)->color_range = cm->color_range; get_frame_new_buffer(cm)->render_width = cm->render_width; get_frame_new_buffer(cm)->render_height = cm->render_height; if (pbi->need_resync) { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Keyframe / intra-only frame required to reset decoder" " state"); } if (!cm->error_resilient_mode) { cm->refresh_frame_context = vpx_rb_read_bit(rb); cm->frame_parallel_decoding_mode = vpx_rb_read_bit(rb); if (!cm->frame_parallel_decoding_mode) vp9_zero(cm->counts); } else { cm->refresh_frame_context = 0; cm->frame_parallel_decoding_mode = 1; } // This flag will be overridden by the call to vp9_setup_past_independence // below, forcing the use of context 0 for those frame types. cm->frame_context_idx = vpx_rb_read_literal(rb, FRAME_CONTEXTS_LOG2); // Generate next_ref_frame_map. for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { if (mask & 1) { cm->next_ref_frame_map[ref_index] = cm->new_fb_idx; ++frame_bufs[cm->new_fb_idx].ref_count; } else { cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; } // Current thread holds the reference frame. if (cm->ref_frame_map[ref_index] >= 0) ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; ++ref_index; } for (; ref_index < REF_FRAMES; ++ref_index) { cm->next_ref_frame_map[ref_index] = cm->ref_frame_map[ref_index]; // Current thread holds the reference frame. if (cm->ref_frame_map[ref_index] >= 0) ++frame_bufs[cm->ref_frame_map[ref_index]].ref_count; } pbi->hold_ref_buf = 1; if (frame_is_intra_only(cm) || cm->error_resilient_mode) vp9_setup_past_independence(cm); setup_loopfilter(&cm->lf, rb); setup_quantization(cm, &pbi->mb, rb); setup_segmentation(&cm->seg, rb); setup_segmentation_dequant(cm); setup_tile_info(cm, rb); if (pbi->row_mt == 1) { int num_sbs = 1; const int aligned_rows = mi_cols_aligned_to_sb(cm->mi_rows); const int sb_rows = aligned_rows >> MI_BLOCK_SIZE_LOG2; const int num_jobs = sb_rows << cm->log2_tile_cols; if (pbi->row_mt_worker_data == NULL) { CHECK_MEM_ERROR(cm, pbi->row_mt_worker_data, vpx_calloc(1, sizeof(*pbi->row_mt_worker_data))); #if CONFIG_MULTITHREAD pthread_mutex_init(&pbi->row_mt_worker_data->recon_done_mutex, NULL); #endif } if (pbi->max_threads > 1) { const int aligned_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int sb_cols = aligned_cols >> MI_BLOCK_SIZE_LOG2; num_sbs = sb_cols * sb_rows; } if (num_sbs > pbi->row_mt_worker_data->num_sbs || num_jobs > pbi->row_mt_worker_data->num_jobs) { vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); vp9_dec_alloc_row_mt_mem(pbi->row_mt_worker_data, cm, num_sbs, pbi->max_threads, num_jobs); } vp9_jobq_alloc(pbi); } sz = vpx_rb_read_literal(rb, 16); if (sz == 0) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Invalid header size"); return sz; } static int read_compressed_header(VP9Decoder *pbi, const uint8_t *data, size_t partition_size) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; FRAME_CONTEXT *const fc = cm->fc; vpx_reader r; int k; if (vpx_reader_init(&r, data, partition_size, pbi->decrypt_cb, pbi->decrypt_state)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate bool decoder 0"); cm->tx_mode = xd->lossless ? ONLY_4X4 : read_tx_mode(&r); if (cm->tx_mode == TX_MODE_SELECT) read_tx_mode_probs(&fc->tx_probs, &r); read_coef_probs(fc, cm->tx_mode, &r); for (k = 0; k < SKIP_CONTEXTS; ++k) vp9_diff_update_prob(&r, &fc->skip_probs[k]); if (!frame_is_intra_only(cm)) { nmv_context *const nmvc = &fc->nmvc; int i, j; read_inter_mode_probs(fc, &r); if (cm->interp_filter == SWITCHABLE) read_switchable_interp_probs(fc, &r); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_diff_update_prob(&r, &fc->intra_inter_prob[i]); cm->reference_mode = read_frame_reference_mode(cm, &r); if (cm->reference_mode != SINGLE_REFERENCE) vp9_setup_compound_reference_mode(cm); read_frame_reference_mode_probs(cm, &r); for (j = 0; j < BLOCK_SIZE_GROUPS; j++) for (i = 0; i < INTRA_MODES - 1; ++i) vp9_diff_update_prob(&r, &fc->y_mode_prob[j][i]); for (j = 0; j < PARTITION_CONTEXTS; ++j) for (i = 0; i < PARTITION_TYPES - 1; ++i) vp9_diff_update_prob(&r, &fc->partition_prob[j][i]); read_mv_probs(nmvc, cm->allow_high_precision_mv, &r); } return vpx_reader_has_error(&r); } static struct vpx_read_bit_buffer *init_read_bit_buffer( VP9Decoder *pbi, struct vpx_read_bit_buffer *rb, const uint8_t *data, const uint8_t *data_end, uint8_t clear_data[MAX_VP9_HEADER_SIZE]) { rb->bit_offset = 0; rb->error_handler = error_handler; rb->error_handler_data = &pbi->common; if (pbi->decrypt_cb) { const int n = (int)VPXMIN(MAX_VP9_HEADER_SIZE, data_end - data); pbi->decrypt_cb(pbi->decrypt_state, data, clear_data, n); rb->bit_buffer = clear_data; rb->bit_buffer_end = clear_data + n; } else { rb->bit_buffer = data; rb->bit_buffer_end = data_end; } return rb; } //------------------------------------------------------------------------------ int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb) { return vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_0 && vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_1 && vpx_rb_read_literal(rb, 8) == VP9_SYNC_CODE_2; } void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, int *width, int *height) { *width = vpx_rb_read_literal(rb, 16) + 1; *height = vpx_rb_read_literal(rb, 16) + 1; } BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb) { int profile = vpx_rb_read_bit(rb); profile |= vpx_rb_read_bit(rb) << 1; if (profile > 2) profile += vpx_rb_read_bit(rb); return (BITSTREAM_PROFILE)profile; } void vp9_decode_frame(VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end) { VP9_COMMON *const cm = &pbi->common; MACROBLOCKD *const xd = &pbi->mb; struct vpx_read_bit_buffer rb; int context_updated = 0; uint8_t clear_data[MAX_VP9_HEADER_SIZE]; const size_t first_partition_size = read_uncompressed_header( pbi, init_read_bit_buffer(pbi, &rb, data, data_end, clear_data)); const int tile_rows = 1 << cm->log2_tile_rows; const int tile_cols = 1 << cm->log2_tile_cols; YV12_BUFFER_CONFIG *const new_fb = get_frame_new_buffer(cm); #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG bitstream_queue_set_frame_read(cm->current_video_frame * 2 + cm->show_frame); #endif #if CONFIG_MISMATCH_DEBUG mismatch_move_frame_idx_r(); #endif xd->cur_buf = new_fb; if (!first_partition_size) { // showing a frame directly *p_data_end = data + (cm->profile <= PROFILE_2 ? 1 : 2); return; } data += vpx_rb_bytes_read(&rb); if (!read_is_valid(data, first_partition_size, data_end)) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Truncated packet or corrupt header length"); cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->width == cm->last_width && cm->height == cm->last_height && !cm->last_intra_only && cm->last_show_frame && (cm->last_frame_type != KEY_FRAME); vp9_setup_block_planes(xd, cm->subsampling_x, cm->subsampling_y); *cm->fc = cm->frame_contexts[cm->frame_context_idx]; if (!cm->fc->initialized) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Uninitialized entropy context."); xd->corrupted = 0; new_fb->corrupted = read_compressed_header(pbi, data, first_partition_size); if (new_fb->corrupted) vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data header is corrupted."); if (cm->lf.filter_level && !cm->skip_loop_filter) { vp9_loop_filter_frame_init(cm, cm->lf.filter_level); } if (pbi->tile_worker_data == NULL || (tile_cols * tile_rows) != pbi->total_tiles) { const int num_tile_workers = tile_cols * tile_rows + ((pbi->max_threads > 1) ? pbi->max_threads : 0); const size_t twd_size = num_tile_workers * sizeof(*pbi->tile_worker_data); // Ensure tile data offsets will be properly aligned. This may fail on // platforms without DECLARE_ALIGNED(). assert((sizeof(*pbi->tile_worker_data) % 16) == 0); vpx_free(pbi->tile_worker_data); CHECK_MEM_ERROR(cm, pbi->tile_worker_data, vpx_memalign(32, twd_size)); pbi->total_tiles = tile_rows * tile_cols; } if (pbi->max_threads > 1 && tile_rows == 1 && (tile_cols > 1 || pbi->row_mt == 1)) { if (pbi->row_mt == 1) { *p_data_end = decode_tiles_row_wise_mt(pbi, data + first_partition_size, data_end); } else { // Multi-threaded tile decoder *p_data_end = decode_tiles_mt(pbi, data + first_partition_size, data_end); if (!pbi->lpf_mt_opt) { if (!xd->corrupted) { if (!cm->skip_loop_filter) { // If multiple threads are used to decode tiles, then we use those // threads to do parallel loopfiltering. vp9_loop_filter_frame_mt( new_fb, cm, pbi->mb.plane, cm->lf.filter_level, 0, 0, pbi->tile_workers, pbi->num_tile_workers, &pbi->lf_row_sync); } } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data is corrupted."); } } } } else { *p_data_end = decode_tiles(pbi, data + first_partition_size, data_end); } if (!xd->corrupted) { if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { vp9_adapt_coef_probs(cm); if (!frame_is_intra_only(cm)) { vp9_adapt_mode_probs(cm); vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } } } else { vpx_internal_error(&cm->error, VPX_CODEC_CORRUPT_FRAME, "Decode failed. Frame data is corrupted."); } // Non frame parallel update frame context here. if (cm->refresh_frame_context && !context_updated) cm->frame_contexts[cm->frame_context_idx] = *cm->fc; } libvpx-1.8.2/vp9/decoder/vp9_decodeframe.h000066400000000000000000000021071357355204000203620ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_DECODEFRAME_H_ #define VPX_VP9_DECODER_VP9_DECODEFRAME_H_ #ifdef __cplusplus extern "C" { #endif #include "vp9/common/vp9_enums.h" struct VP9Decoder; struct vpx_read_bit_buffer; int vp9_read_sync_code(struct vpx_read_bit_buffer *const rb); void vp9_read_frame_size(struct vpx_read_bit_buffer *rb, int *width, int *height); BITSTREAM_PROFILE vp9_read_profile(struct vpx_read_bit_buffer *rb); void vp9_decode_frame(struct VP9Decoder *pbi, const uint8_t *data, const uint8_t *data_end, const uint8_t **p_data_end); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_DECODER_VP9_DECODEFRAME_H_ libvpx-1.8.2/vp9/decoder/vp9_decodemv.c000066400000000000000000000756361357355204000177260ustar00rootroot00000000000000/* Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/decoder/vp9_decodemv.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vpx_dsp/vpx_dsp_common.h" static PREDICTION_MODE read_intra_mode(vpx_reader *r, const vpx_prob *p) { return (PREDICTION_MODE)vpx_read_tree(r, vp9_intra_mode_tree, p); } static PREDICTION_MODE read_intra_mode_y(VP9_COMMON *cm, MACROBLOCKD *xd, vpx_reader *r, int size_group) { const PREDICTION_MODE y_mode = read_intra_mode(r, cm->fc->y_mode_prob[size_group]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->y_mode[size_group][y_mode]; return y_mode; } static PREDICTION_MODE read_intra_mode_uv(VP9_COMMON *cm, MACROBLOCKD *xd, vpx_reader *r, PREDICTION_MODE y_mode) { const PREDICTION_MODE uv_mode = read_intra_mode(r, cm->fc->uv_mode_prob[y_mode]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->uv_mode[y_mode][uv_mode]; return uv_mode; } static PREDICTION_MODE read_inter_mode(VP9_COMMON *cm, MACROBLOCKD *xd, vpx_reader *r, int ctx) { const int mode = vpx_read_tree(r, vp9_inter_mode_tree, cm->fc->inter_mode_probs[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->inter_mode[ctx][mode]; return NEARESTMV + mode; } static int read_segment_id(vpx_reader *r, const struct segmentation *seg) { return vpx_read_tree(r, vp9_segment_tree, seg->tree_probs); } static TX_SIZE read_selected_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, TX_SIZE max_tx_size, vpx_reader *r) { FRAME_COUNTS *counts = xd->counts; const int ctx = get_tx_size_context(xd); const vpx_prob *tx_probs = get_tx_probs(max_tx_size, ctx, &cm->fc->tx_probs); int tx_size = vpx_read(r, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { tx_size += vpx_read(r, tx_probs[1]); if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) tx_size += vpx_read(r, tx_probs[2]); } if (counts) ++get_tx_counts(max_tx_size, ctx, &counts->tx)[tx_size]; return (TX_SIZE)tx_size; } static INLINE TX_SIZE read_tx_size(VP9_COMMON *cm, MACROBLOCKD *xd, int allow_select, vpx_reader *r) { TX_MODE tx_mode = cm->tx_mode; BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; if (allow_select && tx_mode == TX_MODE_SELECT && bsize >= BLOCK_8X8) return read_selected_tx_size(cm, xd, max_tx_size, r); else return VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[tx_mode]); } static int dec_get_segment_id(const VP9_COMMON *cm, const uint8_t *segment_ids, int mi_offset, int x_mis, int y_mis) { int x, y, segment_id = INT_MAX; for (y = 0; y < y_mis; y++) for (x = 0; x < x_mis; x++) segment_id = VPXMIN(segment_id, segment_ids[mi_offset + y * cm->mi_cols + x]); assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); return segment_id; } static void set_segment_id(VP9_COMMON *cm, int mi_offset, int x_mis, int y_mis, int segment_id) { int x, y; assert(segment_id >= 0 && segment_id < MAX_SEGMENTS); for (y = 0; y < y_mis; y++) for (x = 0; x < x_mis; x++) cm->current_frame_seg_map[mi_offset + y * cm->mi_cols + x] = segment_id; } static void copy_segment_id(const VP9_COMMON *cm, const uint8_t *last_segment_ids, uint8_t *current_segment_ids, int mi_offset, int x_mis, int y_mis) { int x, y; for (y = 0; y < y_mis; y++) for (x = 0; x < x_mis; x++) current_segment_ids[mi_offset + y * cm->mi_cols + x] = last_segment_ids ? last_segment_ids[mi_offset + y * cm->mi_cols + x] : 0; } static int read_intra_segment_id(VP9_COMMON *const cm, int mi_offset, int x_mis, int y_mis, vpx_reader *r) { struct segmentation *const seg = &cm->seg; int segment_id; if (!seg->enabled) return 0; // Default for disabled segmentation if (!seg->update_map) { copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, mi_offset, x_mis, y_mis); return 0; } segment_id = read_segment_id(r, seg); set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); return segment_id; } static int read_inter_segment_id(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis) { struct segmentation *const seg = &cm->seg; MODE_INFO *const mi = xd->mi[0]; int predicted_segment_id, segment_id; const int mi_offset = mi_row * cm->mi_cols + mi_col; if (!seg->enabled) return 0; // Default for disabled segmentation predicted_segment_id = cm->last_frame_seg_map ? dec_get_segment_id(cm, cm->last_frame_seg_map, mi_offset, x_mis, y_mis) : 0; if (!seg->update_map) { copy_segment_id(cm, cm->last_frame_seg_map, cm->current_frame_seg_map, mi_offset, x_mis, y_mis); return predicted_segment_id; } if (seg->temporal_update) { const vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); mi->seg_id_predicted = vpx_read(r, pred_prob); segment_id = mi->seg_id_predicted ? predicted_segment_id : read_segment_id(r, seg); } else { segment_id = read_segment_id(r, seg); } set_segment_id(cm, mi_offset, x_mis, y_mis, segment_id); return segment_id; } static int read_skip(VP9_COMMON *cm, const MACROBLOCKD *xd, int segment_id, vpx_reader *r) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { const int ctx = vp9_get_skip_context(xd); const int skip = vpx_read(r, cm->fc->skip_probs[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->skip[ctx][skip]; return skip; } } static void read_intra_frame_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis) { MODE_INFO *const mi = xd->mi[0]; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const BLOCK_SIZE bsize = mi->sb_type; int i; const int mi_offset = mi_row * cm->mi_cols + mi_col; mi->segment_id = read_intra_segment_id(cm, mi_offset, x_mis, y_mis, r); mi->skip = read_skip(cm, xd, mi->segment_id, r); mi->tx_size = read_tx_size(cm, xd, 1, r); mi->ref_frame[0] = INTRA_FRAME; mi->ref_frame[1] = NONE; switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) mi->bmi[i].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, i)); mi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 1)); break; case BLOCK_8X4: mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 2)); break; default: mi->mode = read_intra_mode(r, get_y_mode_probs(mi, above_mi, left_mi, 0)); } mi->uv_mode = read_intra_mode(r, vp9_kf_uv_mode_prob[mi->mode]); } static int read_mv_component(vpx_reader *r, const nmv_component *mvcomp, int usehp) { int mag, d, fr, hp; const int sign = vpx_read(r, mvcomp->sign); const int mv_class = vpx_read_tree(r, vp9_mv_class_tree, mvcomp->classes); const int class0 = mv_class == MV_CLASS_0; // Integer part if (class0) { d = vpx_read(r, mvcomp->class0[0]); mag = 0; } else { int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits d = 0; for (i = 0; i < n; ++i) d |= vpx_read(r, mvcomp->bits[i]) << i; mag = CLASS0_SIZE << (mv_class + 2); } // Fractional part fr = vpx_read_tree(r, vp9_mv_fp_tree, class0 ? mvcomp->class0_fp[d] : mvcomp->fp); // High precision part (if hp is not used, the default value of the hp is 1) hp = usehp ? vpx_read(r, class0 ? mvcomp->class0_hp : mvcomp->hp) : 1; // Result mag += ((d << 3) | (fr << 1) | hp) + 1; return sign ? -mag : mag; } static INLINE void read_mv(vpx_reader *r, MV *mv, const MV *ref, const nmv_context *ctx, nmv_context_counts *counts, int allow_hp) { const MV_JOINT_TYPE joint_type = (MV_JOINT_TYPE)vpx_read_tree(r, vp9_mv_joint_tree, ctx->joints); const int use_hp = allow_hp && use_mv_hp(ref); MV diff = { 0, 0 }; if (mv_joint_vertical(joint_type)) diff.row = read_mv_component(r, &ctx->comps[0], use_hp); if (mv_joint_horizontal(joint_type)) diff.col = read_mv_component(r, &ctx->comps[1], use_hp); vp9_inc_mv(&diff, counts); mv->row = ref->row + diff.row; mv->col = ref->col + diff.col; } static REFERENCE_MODE read_block_reference_mode(VP9_COMMON *cm, const MACROBLOCKD *xd, vpx_reader *r) { if (cm->reference_mode == REFERENCE_MODE_SELECT) { const int ctx = vp9_get_reference_mode_context(cm, xd); const REFERENCE_MODE mode = (REFERENCE_MODE)vpx_read(r, cm->fc->comp_inter_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->comp_inter[ctx][mode]; return mode; // SINGLE_REFERENCE or COMPOUND_REFERENCE } else { return cm->reference_mode; } } // Read the referncence frame static void read_ref_frames(VP9_COMMON *const cm, MACROBLOCKD *const xd, vpx_reader *r, int segment_id, MV_REFERENCE_FRAME ref_frame[2]) { FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = xd->counts; if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { ref_frame[0] = (MV_REFERENCE_FRAME)get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME); ref_frame[1] = NONE; } else { const REFERENCE_MODE mode = read_block_reference_mode(cm, xd, r); // FIXME(rbultje) I'm pretty sure this breaks segmentation ref frame coding if (mode == COMPOUND_REFERENCE) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int bit = vpx_read(r, fc->comp_ref_prob[ctx]); if (counts) ++counts->comp_ref[ctx][bit]; ref_frame[idx] = cm->comp_fixed_ref; ref_frame[!idx] = cm->comp_var_ref[bit]; } else if (mode == SINGLE_REFERENCE) { const int ctx0 = vp9_get_pred_context_single_ref_p1(xd); const int bit0 = vpx_read(r, fc->single_ref_prob[ctx0][0]); if (counts) ++counts->single_ref[ctx0][0][bit0]; if (bit0) { const int ctx1 = vp9_get_pred_context_single_ref_p2(xd); const int bit1 = vpx_read(r, fc->single_ref_prob[ctx1][1]); if (counts) ++counts->single_ref[ctx1][1][bit1]; ref_frame[0] = bit1 ? ALTREF_FRAME : GOLDEN_FRAME; } else { ref_frame[0] = LAST_FRAME; } ref_frame[1] = NONE; } else { assert(0 && "Invalid prediction mode."); } } } static INLINE INTERP_FILTER read_switchable_interp_filter(VP9_COMMON *const cm, MACROBLOCKD *const xd, vpx_reader *r) { const int ctx = get_pred_context_switchable_interp(xd); const INTERP_FILTER type = (INTERP_FILTER)vpx_read_tree( r, vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->switchable_interp[ctx][type]; return type; } static void read_intra_block_mode_info(VP9_COMMON *const cm, MACROBLOCKD *const xd, MODE_INFO *mi, vpx_reader *r) { const BLOCK_SIZE bsize = mi->sb_type; int i; switch (bsize) { case BLOCK_4X4: for (i = 0; i < 4; ++i) mi->bmi[i].as_mode = read_intra_mode_y(cm, xd, r, 0); mi->mode = mi->bmi[3].as_mode; break; case BLOCK_4X8: mi->bmi[0].as_mode = mi->bmi[2].as_mode = read_intra_mode_y(cm, xd, r, 0); mi->bmi[1].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode_y(cm, xd, r, 0); break; case BLOCK_8X4: mi->bmi[0].as_mode = mi->bmi[1].as_mode = read_intra_mode_y(cm, xd, r, 0); mi->bmi[2].as_mode = mi->bmi[3].as_mode = mi->mode = read_intra_mode_y(cm, xd, r, 0); break; default: mi->mode = read_intra_mode_y(cm, xd, r, size_group_lookup[bsize]); } mi->uv_mode = read_intra_mode_uv(cm, xd, r, mi->mode); // Initialize interp_filter here so we do not have to check for inter block // modes in get_pred_context_switchable_interp() mi->interp_filter = SWITCHABLE_FILTERS; mi->ref_frame[0] = INTRA_FRAME; mi->ref_frame[1] = NONE; } static INLINE int is_mv_valid(const MV *mv) { return mv->row > MV_LOW && mv->row < MV_UPP && mv->col > MV_LOW && mv->col < MV_UPP; } static INLINE void copy_mv_pair(int_mv *dst, const int_mv *src) { memcpy(dst, src, sizeof(*dst) * 2); } static INLINE void zero_mv_pair(int_mv *dst) { memset(dst, 0, sizeof(*dst) * 2); } static INLINE int assign_mv(VP9_COMMON *cm, MACROBLOCKD *xd, PREDICTION_MODE mode, int_mv mv[2], int_mv ref_mv[2], int_mv near_nearest_mv[2], int is_compound, int allow_hp, vpx_reader *r) { int i; int ret = 1; switch (mode) { case NEWMV: { FRAME_COUNTS *counts = xd->counts; nmv_context_counts *const mv_counts = counts ? &counts->mv : NULL; for (i = 0; i < 1 + is_compound; ++i) { read_mv(r, &mv[i].as_mv, &ref_mv[i].as_mv, &cm->fc->nmvc, mv_counts, allow_hp); ret = ret && is_mv_valid(&mv[i].as_mv); } break; } case NEARMV: case NEARESTMV: { copy_mv_pair(mv, near_nearest_mv); break; } case ZEROMV: { zero_mv_pair(mv); break; } default: { return 0; } } return ret; } static int read_is_inter_block(VP9_COMMON *const cm, MACROBLOCKD *const xd, int segment_id, vpx_reader *r) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { return get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME) != INTRA_FRAME; } else { const int ctx = get_intra_inter_context(xd); const int is_inter = vpx_read(r, cm->fc->intra_inter_prob[ctx]); FRAME_COUNTS *counts = xd->counts; if (counts) ++counts->intra_inter[ctx][is_inter]; return is_inter; } } // This macro is used to add a motion vector mv_ref list if it isn't // already in the list. If it's the second motion vector or early_break // it will also skip all additional processing and jump to Done! #define ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done) \ do { \ if (refmv_count) { \ if ((mv).as_int != (mv_ref_list)[0].as_int) { \ (mv_ref_list)[(refmv_count)] = (mv); \ refmv_count++; \ goto Done; \ } \ } else { \ (mv_ref_list)[(refmv_count)++] = (mv); \ if (early_break) goto Done; \ } \ } while (0) // If either reference frame is different, not INTRA, and they // are different from each other scale and add the mv to our list. #define IF_DIFF_REF_FRAME_ADD_MV_EB(mbmi, ref_frame, ref_sign_bias, \ refmv_count, mv_ref_list, Done) \ do { \ if (is_inter_block(mbmi)) { \ if ((mbmi)->ref_frame[0] != ref_frame) \ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 0, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ if (has_second_ref(mbmi) && (mbmi)->ref_frame[1] != ref_frame && \ (mbmi)->mv[1].as_int != (mbmi)->mv[0].as_int) \ ADD_MV_REF_LIST_EB(scale_mv((mbmi), 1, ref_frame, ref_sign_bias), \ refmv_count, mv_ref_list, Done); \ } \ } while (0) // This function searches the neighborhood of a given MB/SB // to try and find candidate reference vectors. static int dec_find_mv_refs(const VP9_COMMON *cm, const MACROBLOCKD *xd, PREDICTION_MODE mode, MV_REFERENCE_FRAME ref_frame, const POSITION *const mv_ref_search, int_mv *mv_ref_list, int mi_row, int mi_col, int block) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; int different_ref_found = 0; const MV_REF *const prev_frame_mvs = cm->use_prev_frame_mvs ? cm->prev_frame->mvs + mi_row * cm->mi_cols + mi_col : NULL; const TileInfo *const tile = &xd->tile; // If mode is nearestmv or newmv (uses nearestmv as a reference) then stop // searching after the first mv is found. const int early_break = (mode != NEARMV); // Blank the reference vector list memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); i = 0; if (block >= 0) { // If the size < 8x8 we get the mv from the bmi substructure for the // nearest two blocks. for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST_EB( get_sub_block_mv(candidate_mi, 0, mv_ref->col, block), refmv_count, mv_ref_list, Done); else if (candidate_mi->ref_frame[1] == ref_frame) ADD_MV_REF_LIST_EB( get_sub_block_mv(candidate_mi, 1, mv_ref->col, block), refmv_count, mv_ref_list, Done); } } } // Check the rest of the neighbors in much the same way // as before except we don't need to keep track of sub blocks or // mode counts. for (; i < MVREF_NEIGHBOURS; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; if (candidate->ref_frame[0] == ref_frame) ADD_MV_REF_LIST_EB(candidate->mv[0], refmv_count, mv_ref_list, Done); else if (candidate->ref_frame[1] == ref_frame) ADD_MV_REF_LIST_EB(candidate->mv[1], refmv_count, mv_ref_list, Done); } } // Check the last frame's mode and mv info. if (prev_frame_mvs) { if (prev_frame_mvs->ref_frame[0] == ref_frame) { ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[0], refmv_count, mv_ref_list, Done); } else if (prev_frame_mvs->ref_frame[1] == ref_frame) { ADD_MV_REF_LIST_EB(prev_frame_mvs->mv[1], refmv_count, mv_ref_list, Done); } } // Since we couldn't find 2 mvs from the same reference frame // go back through the neighbors and find motion vectors from // different reference frames. if (different_ref_found) { for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV_EB(candidate, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } } // Since we still don't have a candidate we'll try the last frame. if (prev_frame_mvs) { if (prev_frame_mvs->ref_frame[0] != ref_frame && prev_frame_mvs->ref_frame[0] > INTRA_FRAME) { int_mv mv = prev_frame_mvs->mv[0]; if (ref_sign_bias[prev_frame_mvs->ref_frame[0]] != ref_sign_bias[ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); } if (prev_frame_mvs->ref_frame[1] > INTRA_FRAME && prev_frame_mvs->ref_frame[1] != ref_frame && prev_frame_mvs->mv[1].as_int != prev_frame_mvs->mv[0].as_int) { int_mv mv = prev_frame_mvs->mv[1]; if (ref_sign_bias[prev_frame_mvs->ref_frame[1]] != ref_sign_bias[ref_frame]) { mv.as_mv.row *= -1; mv.as_mv.col *= -1; } ADD_MV_REF_LIST_EB(mv, refmv_count, mv_ref_list, Done); } } if (mode == NEARMV) refmv_count = MAX_MV_REF_CANDIDATES; else // we only care about the nearestmv for the remaining modes refmv_count = 1; Done: // Clamp vectors for (i = 0; i < refmv_count; ++i) clamp_mv_ref(&mv_ref_list[i].as_mv, xd); return refmv_count; } static void append_sub8x8_mvs_for_idx(VP9_COMMON *cm, MACROBLOCKD *xd, const POSITION *const mv_ref_search, PREDICTION_MODE b_mode, int block, int ref, int mi_row, int mi_col, int_mv *best_sub8x8) { int_mv mv_list[MAX_MV_REF_CANDIDATES]; MODE_INFO *const mi = xd->mi[0]; b_mode_info *bmi = mi->bmi; int n; int refmv_count; assert(MAX_MV_REF_CANDIDATES == 2); switch (block) { case 0: refmv_count = dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, mv_list, mi_row, mi_col, block); best_sub8x8->as_int = mv_list[refmv_count - 1].as_int; break; case 1: case 2: if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; } else { dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, mv_list, mi_row, mi_col, block); best_sub8x8->as_int = 0; for (n = 0; n < 2; ++n) if (bmi[0].as_mv[ref].as_int != mv_list[n].as_int) { best_sub8x8->as_int = mv_list[n].as_int; break; } } break; case 3: if (b_mode == NEARESTMV) { best_sub8x8->as_int = bmi[2].as_mv[ref].as_int; } else { best_sub8x8->as_int = 0; if (bmi[2].as_mv[ref].as_int != bmi[1].as_mv[ref].as_int) { best_sub8x8->as_int = bmi[1].as_mv[ref].as_int; break; } if (bmi[2].as_mv[ref].as_int != bmi[0].as_mv[ref].as_int) { best_sub8x8->as_int = bmi[0].as_mv[ref].as_int; break; } dec_find_mv_refs(cm, xd, b_mode, mi->ref_frame[ref], mv_ref_search, mv_list, mi_row, mi_col, block); for (n = 0; n < 2; ++n) if (bmi[2].as_mv[ref].as_int != mv_list[n].as_int) { best_sub8x8->as_int = mv_list[n].as_int; break; } } break; default: assert(0 && "Invalid block index."); } } static uint8_t get_mode_context(const VP9_COMMON *cm, const MACROBLOCKD *xd, const POSITION *const mv_ref_search, int mi_row, int mi_col) { int i; int context_counter = 0; const TileInfo *const tile = &xd->tile; // Get mode count from nearest 2 blocks for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate->mode]; } } return counter_to_context[context_counter]; } static void read_inter_block_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, MODE_INFO *const mi, int mi_row, int mi_col, vpx_reader *r) { VP9_COMMON *const cm = &pbi->common; const BLOCK_SIZE bsize = mi->sb_type; const int allow_hp = cm->allow_high_precision_mv; int_mv best_ref_mvs[2] = { { 0 }, { 0 } }; int ref, is_compound; uint8_t inter_mode_ctx; const POSITION *const mv_ref_search = mv_ref_blocks[bsize]; read_ref_frames(cm, xd, r, mi->segment_id, mi->ref_frame); is_compound = has_second_ref(mi); inter_mode_ctx = get_mode_context(cm, xd, mv_ref_search, mi_row, mi_col); if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { mi->mode = ZEROMV; if (bsize < BLOCK_8X8) { vpx_internal_error(xd->error_info, VPX_CODEC_UNSUP_BITSTREAM, "Invalid usage of segement feature on small blocks"); return; } } else { if (bsize >= BLOCK_8X8) mi->mode = read_inter_mode(cm, xd, r, inter_mode_ctx); } mi->interp_filter = (cm->interp_filter == SWITCHABLE) ? read_switchable_interp_filter(cm, xd, r) : cm->interp_filter; if (bsize < BLOCK_8X8) { const int num_4x4_w = 1 << xd->bmode_blocks_wl; const int num_4x4_h = 1 << xd->bmode_blocks_hl; int idx, idy; PREDICTION_MODE b_mode; int got_mv_refs_for_new = 0; int_mv best_sub8x8[2]; const uint32_t invalid_mv = 0x80008000; // Initialize the 2nd element as even though it won't be used meaningfully // if is_compound is false, copying/clamping it may trigger a MSan warning. best_sub8x8[1].as_int = invalid_mv; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; b_mode = read_inter_mode(cm, xd, r, inter_mode_ctx); if (b_mode == NEARESTMV || b_mode == NEARMV) { for (ref = 0; ref < 1 + is_compound; ++ref) append_sub8x8_mvs_for_idx(cm, xd, mv_ref_search, b_mode, j, ref, mi_row, mi_col, &best_sub8x8[ref]); } else if (b_mode == NEWMV && !got_mv_refs_for_new) { for (ref = 0; ref < 1 + is_compound; ++ref) { int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; dec_find_mv_refs(cm, xd, NEWMV, frame, mv_ref_search, tmp_mvs, mi_row, mi_col, -1); lower_mv_precision(&tmp_mvs[0].as_mv, allow_hp); best_ref_mvs[ref] = tmp_mvs[0]; got_mv_refs_for_new = 1; } } if (!assign_mv(cm, xd, b_mode, mi->bmi[j].as_mv, best_ref_mvs, best_sub8x8, is_compound, allow_hp, r)) { xd->corrupted |= 1; break; } if (num_4x4_h == 2) mi->bmi[j + 2] = mi->bmi[j]; if (num_4x4_w == 2) mi->bmi[j + 1] = mi->bmi[j]; } } mi->mode = b_mode; copy_mv_pair(mi->mv, mi->bmi[3].as_mv); } else { if (mi->mode != ZEROMV) { for (ref = 0; ref < 1 + is_compound; ++ref) { int_mv tmp_mvs[MAX_MV_REF_CANDIDATES]; const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; int refmv_count = dec_find_mv_refs(cm, xd, mi->mode, frame, mv_ref_search, tmp_mvs, mi_row, mi_col, -1); lower_mv_precision(&tmp_mvs[refmv_count - 1].as_mv, allow_hp); best_ref_mvs[ref] = tmp_mvs[refmv_count - 1]; } } xd->corrupted |= !assign_mv(cm, xd, mi->mode, mi->mv, best_ref_mvs, best_ref_mvs, is_compound, allow_hp, r); } } static void read_inter_frame_mode_info(VP9Decoder *const pbi, MACROBLOCKD *const xd, int mi_row, int mi_col, vpx_reader *r, int x_mis, int y_mis) { VP9_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; int inter_block; mi->segment_id = read_inter_segment_id(cm, xd, mi_row, mi_col, r, x_mis, y_mis); mi->skip = read_skip(cm, xd, mi->segment_id, r); inter_block = read_is_inter_block(cm, xd, mi->segment_id, r); mi->tx_size = read_tx_size(cm, xd, !mi->skip || !inter_block, r); if (inter_block) read_inter_block_mode_info(pbi, xd, mi, mi_row, mi_col, r); else read_intra_block_mode_info(cm, xd, mi, r); } static INLINE void copy_ref_frame_pair(MV_REFERENCE_FRAME *dst, const MV_REFERENCE_FRAME *src) { memcpy(dst, src, sizeof(*dst) * 2); } void vp9_read_mode_info(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, int x_mis, int y_mis) { vpx_reader *r = &twd->bit_reader; MACROBLOCKD *const xd = &twd->xd; VP9_COMMON *const cm = &pbi->common; MODE_INFO *const mi = xd->mi[0]; MV_REF *frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; if (frame_is_intra_only(cm)) { read_intra_frame_mode_info(cm, xd, mi_row, mi_col, r, x_mis, y_mis); } else { read_inter_frame_mode_info(pbi, xd, mi_row, mi_col, r, x_mis, y_mis); for (h = 0; h < y_mis; ++h) { for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mvs + w; copy_ref_frame_pair(mv->ref_frame, mi->ref_frame); copy_mv_pair(mv->mv, mi->mv); } frame_mvs += cm->mi_cols; } } #if 0 // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->above_mi == NULL || xd->left_mi == NULL) && !is_inter_block(mi) && need_top_left[mi->uv_mode]) assert(0); #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH } libvpx-1.8.2/vp9/decoder/vp9_decodemv.h000066400000000000000000000014761357355204000177220ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_DECODEMV_H_ #define VPX_VP9_DECODER_VP9_DECODEMV_H_ #include "vpx_dsp/bitreader.h" #include "vp9/decoder/vp9_decoder.h" #ifdef __cplusplus extern "C" { #endif void vp9_read_mode_info(TileWorkerData *twd, VP9Decoder *const pbi, int mi_row, int mi_col, int x_mis, int y_mis); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_DECODER_VP9_DECODEMV_H_ libvpx-1.8.2/vp9/decoder/vp9_decoder.c000066400000000000000000000450461357355204000175350ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_once.h" #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/decoder/vp9_detokenize.h" static void initialize_dec(void) { static volatile int init_done = 0; if (!init_done) { vp9_rtcd(); vpx_dsp_rtcd(); vpx_scale_rtcd(); vp9_init_intra_predictors(); init_done = 1; } } static void vp9_dec_setup_mi(VP9_COMMON *cm) { cm->mi = cm->mip + cm->mi_stride + 1; cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, VP9_COMMON *cm, int num_sbs, int max_threads, int num_jobs) { int plane; const size_t dqcoeff_size = (num_sbs << DQCOEFFS_PER_SB_LOG2) * sizeof(*row_mt_worker_data->dqcoeff[0]); row_mt_worker_data->num_jobs = num_jobs; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_mutex, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_mutex) * num_jobs)); if (row_mt_worker_data->recon_sync_mutex) { for (i = 0; i < num_jobs; ++i) { pthread_mutex_init(&row_mt_worker_data->recon_sync_mutex[i], NULL); } } CHECK_MEM_ERROR( cm, row_mt_worker_data->recon_sync_cond, vpx_malloc(sizeof(*row_mt_worker_data->recon_sync_cond) * num_jobs)); if (row_mt_worker_data->recon_sync_cond) { for (i = 0; i < num_jobs; ++i) { pthread_cond_init(&row_mt_worker_data->recon_sync_cond[i], NULL); } } } #endif row_mt_worker_data->num_sbs = num_sbs; for (plane = 0; plane < 3; ++plane) { CHECK_MEM_ERROR(cm, row_mt_worker_data->dqcoeff[plane], vpx_memalign(16, dqcoeff_size)); memset(row_mt_worker_data->dqcoeff[plane], 0, dqcoeff_size); CHECK_MEM_ERROR(cm, row_mt_worker_data->eob[plane], vpx_calloc(num_sbs << EOBS_PER_SB_LOG2, sizeof(*row_mt_worker_data->eob[plane]))); } CHECK_MEM_ERROR(cm, row_mt_worker_data->partition, vpx_calloc(num_sbs * PARTITIONS_PER_SB, sizeof(*row_mt_worker_data->partition))); CHECK_MEM_ERROR(cm, row_mt_worker_data->recon_map, vpx_calloc(num_sbs, sizeof(*row_mt_worker_data->recon_map))); // allocate memory for thread_data if (row_mt_worker_data->thread_data == NULL) { const size_t thread_size = max_threads * sizeof(*row_mt_worker_data->thread_data); CHECK_MEM_ERROR(cm, row_mt_worker_data->thread_data, vpx_memalign(32, thread_size)); } } void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data) { if (row_mt_worker_data != NULL) { int plane; #if CONFIG_MULTITHREAD int i; if (row_mt_worker_data->recon_sync_mutex != NULL) { for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { pthread_mutex_destroy(&row_mt_worker_data->recon_sync_mutex[i]); } vpx_free(row_mt_worker_data->recon_sync_mutex); row_mt_worker_data->recon_sync_mutex = NULL; } if (row_mt_worker_data->recon_sync_cond != NULL) { for (i = 0; i < row_mt_worker_data->num_jobs; ++i) { pthread_cond_destroy(&row_mt_worker_data->recon_sync_cond[i]); } vpx_free(row_mt_worker_data->recon_sync_cond); row_mt_worker_data->recon_sync_cond = NULL; } #endif for (plane = 0; plane < 3; ++plane) { vpx_free(row_mt_worker_data->eob[plane]); row_mt_worker_data->eob[plane] = NULL; vpx_free(row_mt_worker_data->dqcoeff[plane]); row_mt_worker_data->dqcoeff[plane] = NULL; } vpx_free(row_mt_worker_data->partition); row_mt_worker_data->partition = NULL; vpx_free(row_mt_worker_data->recon_map); row_mt_worker_data->recon_map = NULL; vpx_free(row_mt_worker_data->thread_data); row_mt_worker_data->thread_data = NULL; } } static int vp9_dec_alloc_mi(VP9_COMMON *cm, int mi_size) { cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); if (!cm->mip) return 1; cm->mi_alloc_size = mi_size; cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *)); if (!cm->mi_grid_base) return 1; return 0; } static void vp9_dec_free_mi(VP9_COMMON *cm) { vpx_free(cm->mip); cm->mip = NULL; vpx_free(cm->mi_grid_base); cm->mi_grid_base = NULL; cm->mi_alloc_size = 0; } VP9Decoder *vp9_decoder_create(BufferPool *const pool) { VP9Decoder *volatile const pbi = vpx_memalign(32, sizeof(*pbi)); VP9_COMMON *volatile const cm = pbi ? &pbi->common : NULL; if (!cm) return NULL; vp9_zero(*pbi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_decoder_remove(pbi); return NULL; } cm->error.setjmp = 1; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); CHECK_MEM_ERROR( cm, cm->frame_contexts, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); pbi->need_resync = 1; once(initialize_dec); // Initialize the references to not point to any frame buffers. memset(&cm->ref_frame_map, -1, sizeof(cm->ref_frame_map)); memset(&cm->next_ref_frame_map, -1, sizeof(cm->next_ref_frame_map)); cm->current_video_frame = 0; pbi->ready_for_new_data = 1; pbi->common.buffer_pool = pool; cm->bit_depth = VPX_BITS_8; cm->dequant_bit_depth = VPX_BITS_8; cm->alloc_mi = vp9_dec_alloc_mi; cm->free_mi = vp9_dec_free_mi; cm->setup_mi = vp9_dec_setup_mi; vp9_loop_filter_init(cm); cm->error.setjmp = 0; vpx_get_worker_interface()->init(&pbi->lf_worker); return pbi; } void vp9_decoder_remove(VP9Decoder *pbi) { int i; if (!pbi) return; vpx_get_worker_interface()->end(&pbi->lf_worker); vpx_free(pbi->lf_worker.data1); for (i = 0; i < pbi->num_tile_workers; ++i) { VPxWorker *const worker = &pbi->tile_workers[i]; vpx_get_worker_interface()->end(worker); } vpx_free(pbi->tile_worker_data); vpx_free(pbi->tile_workers); if (pbi->num_tile_workers > 0) { vp9_loop_filter_dealloc(&pbi->lf_row_sync); } if (pbi->row_mt == 1) { vp9_dec_free_row_mt_mem(pbi->row_mt_worker_data); if (pbi->row_mt_worker_data != NULL) { vp9_jobq_deinit(&pbi->row_mt_worker_data->jobq); vpx_free(pbi->row_mt_worker_data->jobq_buf); #if CONFIG_MULTITHREAD pthread_mutex_destroy(&pbi->row_mt_worker_data->recon_done_mutex); #endif } vpx_free(pbi->row_mt_worker_data); } vp9_remove_common(&pbi->common); vpx_free(pbi); } static int equal_dimensions(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { return a->y_height == b->y_height && a->y_width == b->y_width && a->uv_height == b->uv_height && a->uv_width == b->uv_width; } vpx_codec_err_t vp9_copy_reference_dec(VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { VP9_COMMON *cm = &pbi->common; /* TODO(jkoleszar): The decoder doesn't have any real knowledge of what the * encoder is using the frame buffers for. This is just a stub to keep the * vpxenc --test-decode functionality working, and will be replaced in a * later commit that adds VP9-specific controls for this functionality. */ if (ref_frame_flag == VP9_LAST_FLAG) { const YV12_BUFFER_CONFIG *const cfg = get_ref_frame(cm, 0); if (cfg == NULL) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "No 'last' reference frame"); return VPX_CODEC_ERROR; } if (!equal_dimensions(cfg, sd)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); else vpx_yv12_copy_frame(cfg, sd); } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); } return cm->error.error_code; } vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { int idx; YV12_BUFFER_CONFIG *ref_buf = NULL; // TODO(jkoleszar): The decoder doesn't have any real knowledge of what the // encoder is using the frame buffers for. This is just a stub to keep the // vpxenc --test-decode functionality working, and will be replaced in a // later commit that adds VP9-specific controls for this functionality. // (Yunqing) The set_reference control depends on the following setting in // encoder. // cpi->lst_fb_idx = 0; // cpi->gld_fb_idx = 1; // cpi->alt_fb_idx = 2; if (ref_frame_flag == VP9_LAST_FLAG) { idx = cm->ref_frame_map[0]; } else if (ref_frame_flag == VP9_GOLD_FLAG) { idx = cm->ref_frame_map[1]; } else if (ref_frame_flag == VP9_ALT_FLAG) { idx = cm->ref_frame_map[2]; } else { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame"); return cm->error.error_code; } if (idx < 0 || idx >= FRAME_BUFFERS) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Invalid reference frame map"); return cm->error.error_code; } // Get the destination reference buffer. ref_buf = &cm->buffer_pool->frame_bufs[idx].buf; if (!equal_dimensions(ref_buf, sd)) { vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Incorrect buffer dimensions"); } else { // Overwrite the reference frame buffer. vpx_yv12_copy_frame(sd, ref_buf); } return cm->error.error_code; } /* If any buffer updating is signaled it should be done here. */ static void swap_frame_buffers(VP9Decoder *pbi) { int ref_index = 0, mask; VP9_COMMON *const cm = &pbi->common; BufferPool *const pool = cm->buffer_pool; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. if (mask & 1) { decrease_ref_count(old_idx, frame_bufs, pool); } cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; ++ref_index; } // Current thread releases the holding of reference frame. for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { const int old_idx = cm->ref_frame_map[ref_index]; decrease_ref_count(old_idx, frame_bufs, pool); cm->ref_frame_map[ref_index] = cm->next_ref_frame_map[ref_index]; } pbi->hold_ref_buf = 0; cm->frame_to_show = get_frame_new_buffer(cm); --frame_bufs[cm->new_fb_idx].ref_count; // Invalidate these references until the next frame starts. for (ref_index = 0; ref_index < 3; ref_index++) cm->frame_refs[ref_index].idx = -1; } static void release_fb_on_decoder_exit(VP9Decoder *pbi) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VP9_COMMON *volatile const cm = &pbi->common; BufferPool *volatile const pool = cm->buffer_pool; RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; int i; // Synchronize all threads immediately as a subsequent decode call may // cause a resize invalidating some allocations. winterface->sync(&pbi->lf_worker); for (i = 0; i < pbi->num_tile_workers; ++i) { winterface->sync(&pbi->tile_workers[i]); } // Release all the reference buffers if worker thread is holding them. if (pbi->hold_ref_buf == 1) { int ref_index = 0, mask; for (mask = pbi->refresh_frame_flags; mask; mask >>= 1) { const int old_idx = cm->ref_frame_map[ref_index]; // Current thread releases the holding of reference frame. decrease_ref_count(old_idx, frame_bufs, pool); // Release the reference frame in reference map. if (mask & 1) { decrease_ref_count(old_idx, frame_bufs, pool); } ++ref_index; } // Current thread releases the holding of reference frame. for (; ref_index < REF_FRAMES && !cm->show_existing_frame; ++ref_index) { const int old_idx = cm->ref_frame_map[ref_index]; decrease_ref_count(old_idx, frame_bufs, pool); } pbi->hold_ref_buf = 0; } } int vp9_receive_compressed_data(VP9Decoder *pbi, size_t size, const uint8_t **psource) { VP9_COMMON *volatile const cm = &pbi->common; BufferPool *volatile const pool = cm->buffer_pool; RefCntBuffer *volatile const frame_bufs = cm->buffer_pool->frame_bufs; const uint8_t *source = *psource; int retcode = 0; cm->error.error_code = VPX_CODEC_OK; if (size == 0) { // This is used to signal that we are missing frames. // We do not know if the missing frame(s) was supposed to update // any of the reference buffers, but we act conservative and // mark only the last buffer as corrupted. // // TODO(jkoleszar): Error concealment is undefined and non-normative // at this point, but if it becomes so, [0] may not always be the correct // thing to do here. if (cm->frame_refs[0].idx > 0) { assert(cm->frame_refs[0].buf != NULL); cm->frame_refs[0].buf->corrupted = 1; } } pbi->ready_for_new_data = 0; // Check if the previous frame was a frame without any references to it. if (cm->new_fb_idx >= 0 && frame_bufs[cm->new_fb_idx].ref_count == 0 && !frame_bufs[cm->new_fb_idx].released) { pool->release_fb_cb(pool->cb_priv, &frame_bufs[cm->new_fb_idx].raw_frame_buffer); frame_bufs[cm->new_fb_idx].released = 1; } // Find a free frame buffer. Return error if can not find any. cm->new_fb_idx = get_free_fb(cm); if (cm->new_fb_idx == INVALID_IDX) { pbi->ready_for_new_data = 1; release_fb_on_decoder_exit(pbi); vpx_clear_system_state(); vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Unable to find free frame buffer"); return cm->error.error_code; } // Assign a MV array to the frame buffer. cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; pbi->hold_ref_buf = 0; pbi->cur_buf = &frame_bufs[cm->new_fb_idx]; if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; pbi->ready_for_new_data = 1; release_fb_on_decoder_exit(pbi); // Release current frame. decrease_ref_count(cm->new_fb_idx, frame_bufs, pool); vpx_clear_system_state(); return -1; } cm->error.setjmp = 1; vp9_decode_frame(pbi, source, source + size, psource); swap_frame_buffers(pbi); vpx_clear_system_state(); if (!cm->show_existing_frame) { cm->last_show_frame = cm->show_frame; cm->prev_frame = cm->cur_frame; if (cm->seg.enabled) vp9_swap_current_and_last_seg_map(cm); } if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx; // Update progress in frame parallel decode. cm->last_width = cm->width; cm->last_height = cm->height; if (cm->show_frame) { cm->current_video_frame++; } cm->error.setjmp = 0; return retcode; } int vp9_get_raw_frame(VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, vp9_ppflags_t *flags) { VP9_COMMON *const cm = &pbi->common; int ret = -1; #if !CONFIG_VP9_POSTPROC (void)*flags; #endif if (pbi->ready_for_new_data == 1) return ret; pbi->ready_for_new_data = 1; /* no raw frame to show!!! */ if (!cm->show_frame) return ret; pbi->ready_for_new_data = 1; #if CONFIG_VP9_POSTPROC if (!cm->show_existing_frame) { ret = vp9_post_proc_frame(cm, sd, flags, cm->width); } else { *sd = *cm->frame_to_show; ret = 0; } #else *sd = *cm->frame_to_show; ret = 0; #endif /*!CONFIG_POSTPROC*/ vpx_clear_system_state(); return ret; } vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, size_t data_sz, uint32_t sizes[8], int *count, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { // A chunk ending with a byte matching 0xc0 is an invalid chunk unless // it is a super frame index. If the last byte of real video compression // data is 0xc0 the encoder must add a 0 byte. If we have the marker but // not the associated matching marker byte at the front of the index we have // an invalid bitstream and need to return an error. uint8_t marker; assert(data_sz); marker = read_marker(decrypt_cb, decrypt_state, data + data_sz - 1); *count = 0; if ((marker & 0xe0) == 0xc0) { const uint32_t frames = (marker & 0x7) + 1; const uint32_t mag = ((marker >> 3) & 0x3) + 1; const size_t index_sz = 2 + mag * frames; // This chunk is marked as having a superframe index but doesn't have // enough data for it, thus it's an invalid superframe index. if (data_sz < index_sz) return VPX_CODEC_CORRUPT_FRAME; { const uint8_t marker2 = read_marker(decrypt_cb, decrypt_state, data + data_sz - index_sz); // This chunk is marked as having a superframe index but doesn't have // the matching marker byte at the front of the index therefore it's an // invalid chunk. if (marker != marker2) return VPX_CODEC_CORRUPT_FRAME; } { // Found a valid superframe index. uint32_t i, j; const uint8_t *x = &data[data_sz - index_sz + 1]; // Frames has a maximum of 8 and mag has a maximum of 4. uint8_t clear_buffer[32]; assert(sizeof(clear_buffer) >= frames * mag); if (decrypt_cb) { decrypt_cb(decrypt_state, x, clear_buffer, frames * mag); x = clear_buffer; } for (i = 0; i < frames; ++i) { uint32_t this_sz = 0; for (j = 0; j < mag; ++j) this_sz |= ((uint32_t)(*x++)) << (j * 8); sizes[i] = this_sz; } *count = frames; } } return VPX_CODEC_OK; } libvpx-1.8.2/vp9/decoder/vp9_decoder.h000066400000000000000000000134041357355204000175330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_DECODER_H_ #define VPX_VP9_DECODER_VP9_DECODER_H_ #include "./vpx_config.h" #include "vpx/vpx_codec.h" #include "vpx_dsp/bitreader.h" #include "vpx_scale/yv12config.h" #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_ppflags.h" #include "./vp9_job_queue.h" #ifdef __cplusplus extern "C" { #endif #define EOBS_PER_SB_LOG2 8 #define DQCOEFFS_PER_SB_LOG2 12 #define PARTITIONS_PER_SB 85 typedef enum JobType { PARSE_JOB, RECON_JOB, LPF_JOB } JobType; typedef struct ThreadData { struct VP9Decoder *pbi; LFWorkerData *lf_data; VP9LfSync *lf_sync; } ThreadData; typedef struct TileBuffer { const uint8_t *data; size_t size; int col; // only used with multi-threaded decoding } TileBuffer; typedef struct TileWorkerData { const uint8_t *data_end; int buf_start, buf_end; // pbi->tile_buffers to decode, inclusive vpx_reader bit_reader; FRAME_COUNTS counts; LFWorkerData *lf_data; VP9LfSync *lf_sync; DECLARE_ALIGNED(16, MACROBLOCKD, xd); /* dqcoeff are shared by all the planes. So planes must be decoded serially */ DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); DECLARE_ALIGNED(16, uint16_t, extend_and_predict_buf[80 * 2 * 80 * 2]); struct vpx_internal_error_info error_info; } TileWorkerData; typedef void (*process_block_fn_t)(TileWorkerData *twd, struct VP9Decoder *const pbi, int mi_row, int mi_col, BLOCK_SIZE bsize, int bwl, int bhl); typedef struct RowMTWorkerData { int num_sbs; int *eob[MAX_MB_PLANE]; PARTITION_TYPE *partition; tran_low_t *dqcoeff[MAX_MB_PLANE]; int8_t *recon_map; const uint8_t *data_end; uint8_t *jobq_buf; JobQueueRowMt jobq; size_t jobq_size; int num_tiles_done; int num_jobs; #if CONFIG_MULTITHREAD pthread_mutex_t recon_done_mutex; pthread_mutex_t *recon_sync_mutex; pthread_cond_t *recon_sync_cond; #endif ThreadData *thread_data; } RowMTWorkerData; /* Structure to queue and dequeue row decode jobs */ typedef struct Job { int row_num; int tile_col; JobType job_type; } Job; typedef struct VP9Decoder { DECLARE_ALIGNED(16, MACROBLOCKD, mb); DECLARE_ALIGNED(16, VP9_COMMON, common); int ready_for_new_data; int refresh_frame_flags; // TODO(hkuang): Combine this with cur_buf in macroblockd as they are // the same. RefCntBuffer *cur_buf; // Current decoding frame buffer. VPxWorker lf_worker; VPxWorker *tile_workers; TileWorkerData *tile_worker_data; TileBuffer tile_buffers[64]; int num_tile_workers; int total_tiles; VP9LfSync lf_row_sync; vpx_decrypt_cb decrypt_cb; void *decrypt_state; int max_threads; int inv_tile_order; int need_resync; // wait for key/intra-only frame. int hold_ref_buf; // hold the reference buffer. int row_mt; int lpf_mt_opt; RowMTWorkerData *row_mt_worker_data; } VP9Decoder; int vp9_receive_compressed_data(struct VP9Decoder *pbi, size_t size, const uint8_t **psource); int vp9_get_raw_frame(struct VP9Decoder *pbi, YV12_BUFFER_CONFIG *sd, vp9_ppflags_t *flags); vpx_codec_err_t vp9_copy_reference_dec(struct VP9Decoder *pbi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); vpx_codec_err_t vp9_set_reference_dec(VP9_COMMON *cm, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); static INLINE uint8_t read_marker(vpx_decrypt_cb decrypt_cb, void *decrypt_state, const uint8_t *data) { if (decrypt_cb) { uint8_t marker; decrypt_cb(decrypt_state, data, &marker, 1); return marker; } return *data; } // This function is exposed for use in tests, as well as the inlined function // "read_marker". vpx_codec_err_t vp9_parse_superframe_index(const uint8_t *data, size_t data_sz, uint32_t sizes[8], int *count, vpx_decrypt_cb decrypt_cb, void *decrypt_state); struct VP9Decoder *vp9_decoder_create(BufferPool *const pool); void vp9_decoder_remove(struct VP9Decoder *pbi); void vp9_dec_alloc_row_mt_mem(RowMTWorkerData *row_mt_worker_data, VP9_COMMON *cm, int num_sbs, int max_threads, int num_jobs); void vp9_dec_free_row_mt_mem(RowMTWorkerData *row_mt_worker_data); static INLINE void decrease_ref_count(int idx, RefCntBuffer *const frame_bufs, BufferPool *const pool) { if (idx >= 0 && frame_bufs[idx].ref_count > 0) { --frame_bufs[idx].ref_count; // A worker may only get a free framebuffer index when calling get_free_fb. // But the private buffer is not set up until finish decoding header. // So any error happens during decoding header, the frame_bufs will not // have valid priv buffer. if (!frame_bufs[idx].released && frame_bufs[idx].ref_count == 0 && frame_bufs[idx].raw_frame_buffer.priv) { pool->release_fb_cb(pool->cb_priv, &frame_bufs[idx].raw_frame_buffer); frame_bufs[idx].released = 1; } } } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_DECODER_VP9_DECODER_H_ libvpx-1.8.2/vp9/decoder/vp9_detokenize.c000066400000000000000000000256271357355204000202740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #if CONFIG_COEFFICIENT_RANGE_CHECKING #include "vp9/common/vp9_idct.h" #endif #include "vp9/decoder/vp9_detokenize.h" #define EOB_CONTEXT_NODE 0 #define ZERO_CONTEXT_NODE 1 #define ONE_CONTEXT_NODE 2 #define INCREMENT_COUNT(token) \ do { \ if (counts) ++coef_counts[band][ctx][token]; \ } while (0) static INLINE int read_bool(vpx_reader *r, int prob, BD_VALUE *value, int *count, unsigned int *range) { const unsigned int split = (*range * prob + (256 - prob)) >> CHAR_BIT; const BD_VALUE bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); #if CONFIG_BITSTREAM_DEBUG const int queue_r = bitstream_queue_get_read(); const int frame_idx = bitstream_queue_get_frame_read(); int ref_result, ref_prob; bitstream_queue_pop(&ref_result, &ref_prob); if (prob != ref_prob) { fprintf(stderr, "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d " "queue_r %d\n", frame_idx, prob, ref_prob, queue_r); assert(0); } #endif if (*count < 0) { r->value = *value; r->count = *count; vpx_reader_fill(r); *value = r->value; *count = r->count; } if (*value >= bigsplit) { *range = *range - split; *value = *value - bigsplit; { const int shift = vpx_norm[*range]; *range <<= shift; *value <<= shift; *count -= shift; } #if CONFIG_BITSTREAM_DEBUG { const int bit = 1; if (bit != ref_result) { fprintf( stderr, "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " "queue_r %d\n", frame_idx, bit, ref_result, queue_r); assert(0); } } #endif return 1; } *range = split; { const int shift = vpx_norm[*range]; *range <<= shift; *value <<= shift; *count -= shift; } #if CONFIG_BITSTREAM_DEBUG { const int bit = 0; if (bit != ref_result) { fprintf(stderr, "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " "queue_r %d\n", frame_idx, bit, ref_result, queue_r); assert(0); } } #endif return 0; } static INLINE int read_coeff(vpx_reader *r, const vpx_prob *probs, int n, BD_VALUE *value, int *count, unsigned int *range) { int i, val = 0; for (i = 0; i < n; ++i) val = (val << 1) | read_bool(r, probs[i], value, count, range); return val; } static int decode_coefs(const MACROBLOCKD *xd, PLANE_TYPE type, tran_low_t *dqcoeff, TX_SIZE tx_size, const int16_t *dq, int ctx, const int16_t *scan, const int16_t *nb, vpx_reader *r) { FRAME_COUNTS *counts = xd->counts; const int max_eob = 16 << (tx_size << 1); const FRAME_CONTEXT *const fc = xd->fc; const int ref = is_inter_block(xd->mi[0]); int band, c = 0; const vpx_prob(*coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = fc->coef_probs[tx_size][type][ref]; const vpx_prob *prob; unsigned int(*coef_counts)[COEFF_CONTEXTS][UNCONSTRAINED_NODES + 1]; unsigned int(*eob_branch_count)[COEFF_CONTEXTS]; uint8_t token_cache[32 * 32]; const uint8_t *band_translate = get_band_translate(tx_size); const int dq_shift = (tx_size == TX_32X32); int v; int16_t dqv = dq[0]; const uint8_t *const cat6_prob = #if CONFIG_VP9_HIGHBITDEPTH (xd->bd == VPX_BITS_12) ? vp9_cat6_prob_high12 : (xd->bd == VPX_BITS_10) ? vp9_cat6_prob_high12 + 2 : #endif // CONFIG_VP9_HIGHBITDEPTH vp9_cat6_prob; const int cat6_bits = #if CONFIG_VP9_HIGHBITDEPTH (xd->bd == VPX_BITS_12) ? 18 : (xd->bd == VPX_BITS_10) ? 16 : #endif // CONFIG_VP9_HIGHBITDEPTH 14; // Keep value, range, and count as locals. The compiler produces better // results with the locals than using r directly. BD_VALUE value = r->value; unsigned int range = r->range; int count = r->count; if (counts) { coef_counts = counts->coef[tx_size][type][ref]; eob_branch_count = counts->eob_branch[tx_size][type][ref]; } while (c < max_eob) { int val = -1; band = *band_translate++; prob = coef_probs[band][ctx]; if (counts) ++eob_branch_count[band][ctx]; if (!read_bool(r, prob[EOB_CONTEXT_NODE], &value, &count, &range)) { INCREMENT_COUNT(EOB_MODEL_TOKEN); break; } while (!read_bool(r, prob[ZERO_CONTEXT_NODE], &value, &count, &range)) { INCREMENT_COUNT(ZERO_TOKEN); dqv = dq[1]; token_cache[scan[c]] = 0; ++c; if (c >= max_eob) { r->value = value; r->range = range; r->count = count; return c; // zero tokens at the end (no eob token) } ctx = get_coef_context(nb, token_cache, c); band = *band_translate++; prob = coef_probs[band][ctx]; } if (read_bool(r, prob[ONE_CONTEXT_NODE], &value, &count, &range)) { const vpx_prob *p = vp9_pareto8_full[prob[PIVOT_NODE] - 1]; INCREMENT_COUNT(TWO_TOKEN); if (read_bool(r, p[0], &value, &count, &range)) { if (read_bool(r, p[3], &value, &count, &range)) { token_cache[scan[c]] = 5; if (read_bool(r, p[5], &value, &count, &range)) { if (read_bool(r, p[7], &value, &count, &range)) { val = CAT6_MIN_VAL + read_coeff(r, cat6_prob, cat6_bits, &value, &count, &range); } else { val = CAT5_MIN_VAL + read_coeff(r, vp9_cat5_prob, 5, &value, &count, &range); } } else if (read_bool(r, p[6], &value, &count, &range)) { val = CAT4_MIN_VAL + read_coeff(r, vp9_cat4_prob, 4, &value, &count, &range); } else { val = CAT3_MIN_VAL + read_coeff(r, vp9_cat3_prob, 3, &value, &count, &range); } } else { token_cache[scan[c]] = 4; if (read_bool(r, p[4], &value, &count, &range)) { val = CAT2_MIN_VAL + read_coeff(r, vp9_cat2_prob, 2, &value, &count, &range); } else { val = CAT1_MIN_VAL + read_coeff(r, vp9_cat1_prob, 1, &value, &count, &range); } } #if CONFIG_VP9_HIGHBITDEPTH // val may use 18-bits v = (int)(((int64_t)val * dqv) >> dq_shift); #else v = (val * dqv) >> dq_shift; #endif } else { if (read_bool(r, p[1], &value, &count, &range)) { token_cache[scan[c]] = 3; v = ((3 + read_bool(r, p[2], &value, &count, &range)) * dqv) >> dq_shift; } else { token_cache[scan[c]] = 2; v = (2 * dqv) >> dq_shift; } } } else { INCREMENT_COUNT(ONE_TOKEN); token_cache[scan[c]] = 1; v = dqv >> dq_shift; } #if CONFIG_COEFFICIENT_RANGE_CHECKING #if CONFIG_VP9_HIGHBITDEPTH dqcoeff[scan[c]] = highbd_check_range( read_bool(r, 128, &value, &count, &range) ? -v : v, xd->bd); #else dqcoeff[scan[c]] = check_range(read_bool(r, 128, &value, &count, &range) ? -v : v); #endif // CONFIG_VP9_HIGHBITDEPTH #else if (read_bool(r, 128, &value, &count, &range)) { dqcoeff[scan[c]] = (tran_low_t)-v; } else { dqcoeff[scan[c]] = (tran_low_t)v; } #endif // CONFIG_COEFFICIENT_RANGE_CHECKING ++c; ctx = get_coef_context(nb, token_cache, c); dqv = dq[1]; } r->value = value; r->range = range; r->count = count; return c; } static void get_ctx_shift(MACROBLOCKD *xd, int *ctx_shift_a, int *ctx_shift_l, int x, int y, unsigned int tx_size_in_blocks) { if (xd->max_blocks_wide) { if (tx_size_in_blocks + x > xd->max_blocks_wide) *ctx_shift_a = (tx_size_in_blocks - (xd->max_blocks_wide - x)) * 8; } if (xd->max_blocks_high) { if (tx_size_in_blocks + y > xd->max_blocks_high) *ctx_shift_l = (tx_size_in_blocks - (xd->max_blocks_high - y)) * 8; } } int vp9_decode_block_tokens(TileWorkerData *twd, int plane, const scan_order *sc, int x, int y, TX_SIZE tx_size, int seg_id) { vpx_reader *r = &twd->bit_reader; MACROBLOCKD *xd = &twd->xd; struct macroblockd_plane *const pd = &xd->plane[plane]; const int16_t *const dequant = pd->seg_dequant[seg_id]; int eob; ENTROPY_CONTEXT *a = pd->above_context + x; ENTROPY_CONTEXT *l = pd->left_context + y; int ctx; int ctx_shift_a = 0; int ctx_shift_l = 0; switch (tx_size) { case TX_4X4: ctx = a[0] != 0; ctx += l[0] != 0; eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); a[0] = l[0] = (eob > 0); break; case TX_8X8: get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_8X8); ctx = !!*(const uint16_t *)a; ctx += !!*(const uint16_t *)l; eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); *(uint16_t *)a = ((eob > 0) * 0x0101) >> ctx_shift_a; *(uint16_t *)l = ((eob > 0) * 0x0101) >> ctx_shift_l; break; case TX_16X16: get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_16X16); ctx = !!*(const uint32_t *)a; ctx += !!*(const uint32_t *)l; eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); *(uint32_t *)a = ((eob > 0) * 0x01010101) >> ctx_shift_a; *(uint32_t *)l = ((eob > 0) * 0x01010101) >> ctx_shift_l; break; case TX_32X32: get_ctx_shift(xd, &ctx_shift_a, &ctx_shift_l, x, y, 1 << TX_32X32); // NOTE: casting to uint64_t here is safe because the default memory // alignment is at least 8 bytes and the TX_32X32 is aligned on 8 byte // boundaries. ctx = !!*(const uint64_t *)a; ctx += !!*(const uint64_t *)l; eob = decode_coefs(xd, get_plane_type(plane), pd->dqcoeff, tx_size, dequant, ctx, sc->scan, sc->neighbors, r); *(uint64_t *)a = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_a; *(uint64_t *)l = ((eob > 0) * 0x0101010101010101ULL) >> ctx_shift_l; break; default: assert(0 && "Invalid transform size."); eob = 0; break; } return eob; } libvpx-1.8.2/vp9/decoder/vp9_detokenize.h000066400000000000000000000016171357355204000202720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_DETOKENIZE_H_ #define VPX_VP9_DECODER_VP9_DETOKENIZE_H_ #include "vpx_dsp/bitreader.h" #include "vp9/decoder/vp9_decoder.h" #include "vp9/common/vp9_scan.h" #ifdef __cplusplus extern "C" { #endif int vp9_decode_block_tokens(TileWorkerData *twd, int plane, const scan_order *sc, int x, int y, TX_SIZE tx_size, int seg_id); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_DECODER_VP9_DETOKENIZE_H_ libvpx-1.8.2/vp9/decoder/vp9_dsubexp.c000066400000000000000000000056241357355204000176000ustar00rootroot00000000000000/* Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_entropy.h" #include "vp9/decoder/vp9_dsubexp.h" static int inv_recenter_nonneg(int v, int m) { if (v > 2 * m) return v; return (v & 1) ? m - ((v + 1) >> 1) : m + (v >> 1); } static int decode_uniform(vpx_reader *r) { const int l = 8; const int m = (1 << l) - 191; const int v = vpx_read_literal(r, l - 1); return v < m ? v : (v << 1) - m + vpx_read_bit(r); } static int inv_remap_prob(int v, int m) { static uint8_t inv_map_table[MAX_PROB] = { 7, 20, 33, 46, 59, 72, 85, 98, 111, 124, 137, 150, 163, 176, 189, 202, 215, 228, 241, 254, 1, 2, 3, 4, 5, 6, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 253 }; assert(v < (int)(sizeof(inv_map_table) / sizeof(inv_map_table[0]))); v = inv_map_table[v]; m--; if ((m << 1) <= MAX_PROB) { return 1 + inv_recenter_nonneg(v, m); } else { return MAX_PROB - inv_recenter_nonneg(v, MAX_PROB - 1 - m); } } static int decode_term_subexp(vpx_reader *r) { if (!vpx_read_bit(r)) return vpx_read_literal(r, 4); if (!vpx_read_bit(r)) return vpx_read_literal(r, 4) + 16; if (!vpx_read_bit(r)) return vpx_read_literal(r, 5) + 32; return decode_uniform(r) + 64; } void vp9_diff_update_prob(vpx_reader *r, vpx_prob *p) { if (vpx_read(r, DIFF_UPDATE_PROB)) { const int delp = decode_term_subexp(r); *p = (vpx_prob)inv_remap_prob(delp, *p); } } libvpx-1.8.2/vp9/decoder/vp9_dsubexp.h000066400000000000000000000013011357355204000175710ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_DSUBEXP_H_ #define VPX_VP9_DECODER_VP9_DSUBEXP_H_ #include "vpx_dsp/bitreader.h" #ifdef __cplusplus extern "C" { #endif void vp9_diff_update_prob(vpx_reader *r, vpx_prob *p); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_DECODER_VP9_DSUBEXP_H_ libvpx-1.8.2/vp9/decoder/vp9_job_queue.c000066400000000000000000000060661357355204000201050ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx/vpx_integer.h" #include "vp9/decoder/vp9_job_queue.h" void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size) { #if CONFIG_MULTITHREAD pthread_mutex_init(&jobq->mutex, NULL); pthread_cond_init(&jobq->cond, NULL); #endif jobq->buf_base = buf; jobq->buf_wr = buf; jobq->buf_rd = buf; jobq->buf_end = buf + buf_size; jobq->terminate = 0; } void vp9_jobq_reset(JobQueueRowMt *jobq) { #if CONFIG_MULTITHREAD pthread_mutex_lock(&jobq->mutex); #endif jobq->buf_wr = jobq->buf_base; jobq->buf_rd = jobq->buf_base; jobq->terminate = 0; #if CONFIG_MULTITHREAD pthread_mutex_unlock(&jobq->mutex); #endif } void vp9_jobq_deinit(JobQueueRowMt *jobq) { vp9_jobq_reset(jobq); #if CONFIG_MULTITHREAD pthread_mutex_destroy(&jobq->mutex); pthread_cond_destroy(&jobq->cond); #endif } void vp9_jobq_terminate(JobQueueRowMt *jobq) { #if CONFIG_MULTITHREAD pthread_mutex_lock(&jobq->mutex); #endif jobq->terminate = 1; #if CONFIG_MULTITHREAD pthread_cond_broadcast(&jobq->cond); pthread_mutex_unlock(&jobq->mutex); #endif } int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size) { int ret = 0; #if CONFIG_MULTITHREAD pthread_mutex_lock(&jobq->mutex); #endif if (jobq->buf_end >= jobq->buf_wr + job_size) { memcpy(jobq->buf_wr, job, job_size); jobq->buf_wr = jobq->buf_wr + job_size; #if CONFIG_MULTITHREAD pthread_cond_signal(&jobq->cond); #endif ret = 0; } else { /* Wrap around case is not supported */ assert(0); ret = 1; } #if CONFIG_MULTITHREAD pthread_mutex_unlock(&jobq->mutex); #endif return ret; } int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, int blocking) { int ret = 0; #if CONFIG_MULTITHREAD pthread_mutex_lock(&jobq->mutex); #endif if (jobq->buf_end >= jobq->buf_rd + job_size) { while (1) { if (jobq->buf_wr >= jobq->buf_rd + job_size) { memcpy(job, jobq->buf_rd, job_size); jobq->buf_rd = jobq->buf_rd + job_size; ret = 0; break; } else { /* If all the entries have been dequeued, then break and return */ if (jobq->terminate == 1) { ret = 1; break; } if (blocking == 1) { #if CONFIG_MULTITHREAD pthread_cond_wait(&jobq->cond, &jobq->mutex); #endif } else { /* If there is no job available, * and this is non blocking call then return fail */ ret = 1; break; } } } } else { /* Wrap around case is not supported */ ret = 1; } #if CONFIG_MULTITHREAD pthread_mutex_unlock(&jobq->mutex); #endif return ret; } libvpx-1.8.2/vp9/decoder/vp9_job_queue.h000066400000000000000000000025651357355204000201120ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ #define VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ #include "vpx_util/vpx_thread.h" typedef struct { // Pointer to buffer base which contains the jobs uint8_t *buf_base; // Pointer to current address where new job can be added uint8_t *volatile buf_wr; // Pointer to current address from where next job can be obtained uint8_t *volatile buf_rd; // Pointer to end of job buffer uint8_t *buf_end; int terminate; #if CONFIG_MULTITHREAD pthread_mutex_t mutex; pthread_cond_t cond; #endif } JobQueueRowMt; void vp9_jobq_init(JobQueueRowMt *jobq, uint8_t *buf, size_t buf_size); void vp9_jobq_reset(JobQueueRowMt *jobq); void vp9_jobq_deinit(JobQueueRowMt *jobq); void vp9_jobq_terminate(JobQueueRowMt *jobq); int vp9_jobq_queue(JobQueueRowMt *jobq, void *job, size_t job_size); int vp9_jobq_dequeue(JobQueueRowMt *jobq, void *job, size_t job_size, int blocking); #endif // VPX_VP9_DECODER_VP9_JOB_QUEUE_H_ libvpx-1.8.2/vp9/encoder/000077500000000000000000000000001357355204000151675ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/arm/000077500000000000000000000000001357355204000157465ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/arm/neon/000077500000000000000000000000001357355204000167055ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/arm/neon/vp9_denoiser_neon.c000066400000000000000000000345741357355204000225130ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" #include "vpx_mem/vpx_mem.h" // Compute the sum of all pixel differences of this MB. static INLINE int horizontal_add_s8x16(const int8x16_t v_sum_diff_total) { const int16x8_t fe_dc_ba_98_76_54_32_10 = vpaddlq_s8(v_sum_diff_total); const int32x4_t fedc_ba98_7654_3210 = vpaddlq_s16(fe_dc_ba_98_76_54_32_10); const int64x2_t fedcba98_76543210 = vpaddlq_s32(fedc_ba98_7654_3210); const int64x1_t x = vqadd_s64(vget_high_s64(fedcba98_76543210), vget_low_s64(fedcba98_76543210)); const int sum_diff = vget_lane_s32(vreinterpret_s32_s64(x), 0); return sum_diff; } // Denoise a 16x1 vector. static INLINE int8x16_t denoiser_16x1_neon( const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, const uint8x16_t v_level1_threshold, const uint8x16_t v_level2_threshold, const uint8x16_t v_level3_threshold, const uint8x16_t v_level1_adjustment, const uint8x16_t v_delta_level_1_and_2, const uint8x16_t v_delta_level_2_and_3, int8x16_t v_sum_diff_total) { const uint8x16_t v_sig = vld1q_u8(sig); const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); /* Figure out which level that put us in. */ const uint8x16_t v_level1_mask = vcleq_u8(v_level1_threshold, v_abs_diff); const uint8x16_t v_level2_mask = vcleq_u8(v_level2_threshold, v_abs_diff); const uint8x16_t v_level3_mask = vcleq_u8(v_level3_threshold, v_abs_diff); /* Calculate absolute adjustments for level 1, 2 and 3. */ const uint8x16_t v_level2_adjustment = vandq_u8(v_level2_mask, v_delta_level_1_and_2); const uint8x16_t v_level3_adjustment = vandq_u8(v_level3_mask, v_delta_level_2_and_3); const uint8x16_t v_level1and2_adjustment = vaddq_u8(v_level1_adjustment, v_level2_adjustment); const uint8x16_t v_level1and2and3_adjustment = vaddq_u8(v_level1and2_adjustment, v_level3_adjustment); /* Figure adjustment absolute value by selecting between the absolute * difference if in level0 or the value for level 1, 2 and 3. */ const uint8x16_t v_abs_adjustment = vbslq_u8(v_level1_mask, v_level1and2and3_adjustment, v_abs_diff); /* Calculate positive and negative adjustments. Apply them to the signal * and accumulate them. Adjustments are less than eight and the maximum * sum of them (7 * 16) can fit in a signed char. */ const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); uint8x16_t v_running_avg_y = vqaddq_u8(v_sig, v_pos_adjustment); v_running_avg_y = vqsubq_u8(v_running_avg_y, v_neg_adjustment); /* Store results. */ vst1q_u8(running_avg_y, v_running_avg_y); /* Sum all the accumulators to have the sum of all pixel differences * for this macroblock. */ { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_pos_adjustment), vreinterpretq_s8_u8(v_neg_adjustment)); v_sum_diff_total = vaddq_s8(v_sum_diff_total, v_sum_diff); } return v_sum_diff_total; } static INLINE int8x16_t denoiser_adjust_16x1_neon( const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, const uint8x16_t k_delta, int8x16_t v_sum_diff_total) { uint8x16_t v_running_avg_y = vld1q_u8(running_avg_y); const uint8x16_t v_sig = vld1q_u8(sig); const uint8x16_t v_mc_running_avg_y = vld1q_u8(mc_running_avg_y); /* Calculate absolute difference and sign masks. */ const uint8x16_t v_abs_diff = vabdq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_pos_mask = vcltq_u8(v_sig, v_mc_running_avg_y); const uint8x16_t v_diff_neg_mask = vcgtq_u8(v_sig, v_mc_running_avg_y); // Clamp absolute difference to delta to get the adjustment. const uint8x16_t v_abs_adjustment = vminq_u8(v_abs_diff, (k_delta)); const uint8x16_t v_pos_adjustment = vandq_u8(v_diff_pos_mask, v_abs_adjustment); const uint8x16_t v_neg_adjustment = vandq_u8(v_diff_neg_mask, v_abs_adjustment); v_running_avg_y = vqsubq_u8(v_running_avg_y, v_pos_adjustment); v_running_avg_y = vqaddq_u8(v_running_avg_y, v_neg_adjustment); /* Store results. */ vst1q_u8(running_avg_y, v_running_avg_y); { const int8x16_t v_sum_diff = vqsubq_s8(vreinterpretq_s8_u8(v_neg_adjustment), vreinterpretq_s8_u8(v_pos_adjustment)); v_sum_diff_total = vaddq_s8(v_sum_diff_total, v_sum_diff); } return v_sum_diff_total; } // Denoise 8x8 and 8x16 blocks. static int vp9_denoiser_8xN_neon(const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) { int sum_diff_thresh, r, sum_diff = 0; const int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16]; const uint8x16_t v_level1_adjustment = vmovq_n_u8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3); const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); const uint8x16_t v_level1_threshold = vdupq_n_u8(4 + shift_inc); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); const int b_height = (4 << b_height_log2_lookup[bs]) >> 1; int8x16_t v_sum_diff_total = vdupq_n_s8(0); for (r = 0; r < b_height; ++r) { memcpy(sig_buffer[r], sig, width); memcpy(sig_buffer[r] + width, sig + sig_stride, width); memcpy(mc_running_buffer[r], mc_running_avg_y, width); memcpy(mc_running_buffer[r] + width, mc_running_avg_y + mc_avg_y_stride, width); memcpy(running_buffer[r], running_avg_y, width); memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); v_sum_diff_total = denoiser_16x1_neon( sig_buffer[r], mc_running_buffer[r], running_buffer[r], v_level1_threshold, v_level2_threshold, v_level3_threshold, v_level1_adjustment, v_delta_level_1_and_2, v_delta_level_2_and_3, v_sum_diff_total); { const uint8x16_t v_running_buffer = vld1q_u8(running_buffer[r]); const uint8x8_t v_running_buffer_high = vget_high_u8(v_running_buffer); const uint8x8_t v_running_buffer_low = vget_low_u8(v_running_buffer); vst1_u8(running_avg_y, v_running_buffer_low); vst1_u8(running_avg_y + avg_y_stride, v_running_buffer_high); } // Update pointers for next iteration. sig += (sig_stride << 1); mc_running_avg_y += (mc_avg_y_stride << 1); running_avg_y += (avg_y_stride << 1); } { sum_diff = horizontal_add_s8x16(v_sum_diff_total); sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); if (abs(sum_diff) > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // check if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the acceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. const int delta = ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const uint8x16_t k_delta = vmovq_n_u8(delta); running_avg_y -= avg_y_stride * (b_height << 1); for (r = 0; r < b_height; ++r) { v_sum_diff_total = denoiser_adjust_16x1_neon( sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_delta, v_sum_diff_total); { const uint8x16_t v_running_buffer = vld1q_u8(running_buffer[r]); const uint8x8_t v_running_buffer_high = vget_high_u8(v_running_buffer); const uint8x8_t v_running_buffer_low = vget_low_u8(v_running_buffer); vst1_u8(running_avg_y, v_running_buffer_low); vst1_u8(running_avg_y + avg_y_stride, v_running_buffer_high); } // Update pointers for next iteration. running_avg_y += (avg_y_stride << 1); } sum_diff = horizontal_add_s8x16(v_sum_diff_total); if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } return FILTER_BLOCK; } // Denoise 16x16, 16x32, 32x16, 32x32, 32x64, 64x32 and 64x64 blocks. static int vp9_denoiser_NxM_neon(const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { const int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; const uint8x16_t v_level1_adjustment = vmovq_n_u8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 4 + shift_inc : 3); const uint8x16_t v_delta_level_1_and_2 = vdupq_n_u8(1); const uint8x16_t v_delta_level_2_and_3 = vdupq_n_u8(2); const uint8x16_t v_level1_threshold = vmovq_n_u8(4 + shift_inc); const uint8x16_t v_level2_threshold = vdupq_n_u8(8); const uint8x16_t v_level3_threshold = vdupq_n_u8(16); const int b_width = (4 << b_width_log2_lookup[bs]); const int b_height = (4 << b_height_log2_lookup[bs]); const int b_width_shift4 = b_width >> 4; int8x16_t v_sum_diff_total[4][4]; int r, c, sum_diff = 0; for (r = 0; r < 4; ++r) { for (c = 0; c < b_width_shift4; ++c) { v_sum_diff_total[c][r] = vdupq_n_s8(0); } } for (r = 0; r < b_height; ++r) { for (c = 0; c < b_width_shift4; ++c) { v_sum_diff_total[c][r >> 4] = denoiser_16x1_neon( sig, mc_running_avg_y, running_avg_y, v_level1_threshold, v_level2_threshold, v_level3_threshold, v_level1_adjustment, v_delta_level_1_and_2, v_delta_level_2_and_3, v_sum_diff_total[c][r >> 4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { for (c = 0; c < b_width_shift4; ++c) { sum_diff += horizontal_add_s8x16(v_sum_diff_total[c][r >> 4]); } } // Update pointers for next iteration. sig = sig - b_width + sig_stride; mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; running_avg_y = running_avg_y - b_width + avg_y_stride; } { const int sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); if (abs(sum_diff) > sum_diff_thresh) { const int delta = ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const uint8x16_t k_delta = vdupq_n_u8(delta); sig -= sig_stride * b_height; mc_running_avg_y -= mc_avg_y_stride * b_height; running_avg_y -= avg_y_stride * b_height; sum_diff = 0; for (r = 0; r < b_height; ++r) { for (c = 0; c < b_width_shift4; ++c) { v_sum_diff_total[c][r >> 4] = denoiser_adjust_16x1_neon(sig, mc_running_avg_y, running_avg_y, k_delta, v_sum_diff_total[c][r >> 4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { for (c = 0; c < b_width_shift4; ++c) { sum_diff += horizontal_add_s8x16(v_sum_diff_total[c][r >> 4]); } } sig = sig - b_width + sig_stride; mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; running_avg_y = running_avg_y - b_width + avg_y_stride; } if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } return FILTER_BLOCK; } int vp9_denoiser_filter_neon(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { // Rank by frequency of the block type to have an early termination. if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 || bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 || bs == BLOCK_32X64 || bs == BLOCK_64X32) { return vp9_denoiser_NxM_neon(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude); } else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) { return vp9_denoiser_8xN_neon(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude, 8); } return COPY_BLOCK; } libvpx-1.8.2/vp9/encoder/arm/neon/vp9_error_neon.c000066400000000000000000000026371357355204000220270ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" int64_t vp9_block_error_fp_neon(const int16_t *coeff, const int16_t *dqcoeff, int block_size) { int64x2_t error = vdupq_n_s64(0); assert(block_size >= 8); assert((block_size % 8) == 0); do { const int16x8_t c = vld1q_s16(coeff); const int16x8_t d = vld1q_s16(dqcoeff); const int16x8_t diff = vsubq_s16(c, d); const int16x4_t diff_lo = vget_low_s16(diff); const int16x4_t diff_hi = vget_high_s16(diff); // diff is 15-bits, the squares 30, so we can store 2 in 31-bits before // accumulating them in 64-bits. const int32x4_t err0 = vmull_s16(diff_lo, diff_lo); const int32x4_t err1 = vmlal_s16(err0, diff_hi, diff_hi); const int64x2_t err2 = vaddl_s32(vget_low_s32(err1), vget_high_s32(err1)); error = vaddq_s64(error, err2); coeff += 8; dqcoeff += 8; block_size -= 8; } while (block_size != 0); return vgetq_lane_s64(error, 0) + vgetq_lane_s64(error, 1); } libvpx-1.8.2/vp9/encoder/arm/neon/vp9_frame_scale_neon.c000066400000000000000000000762551357355204000231460ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp9/common/vp9_blockd.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/arm/vpx_convolve8_neon.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_scale/yv12config.h" // Note: The scaling functions could write extra rows and columns in dst, which // exceed the right and bottom boundaries of the destination frame. We rely on // the following frame extension function to fix these rows and columns. static INLINE void scale_plane_2_to_1_phase_0(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h) { const int max_width = (w + 15) & ~15; int y = h; assert(w && h); do { int x = max_width; do { const uint8x16x2_t s = vld2q_u8(src); vst1q_u8(dst, s.val[0]); src += 32; dst += 16; x -= 16; } while (x); src += 2 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static INLINE void scale_plane_4_to_1_phase_0(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h) { const int max_width = (w + 15) & ~15; int y = h; assert(w && h); do { int x = max_width; do { const uint8x16x4_t s = vld4q_u8(src); vst1q_u8(dst, s.val[0]); src += 64; dst += 16; x -= 16; } while (x); src += 4 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static INLINE void scale_plane_bilinear_kernel( const uint8x16_t in0, const uint8x16_t in1, const uint8x16_t in2, const uint8x16_t in3, const uint8x8_t coef0, const uint8x8_t coef1, uint8_t *const dst) { const uint16x8_t h0 = vmull_u8(vget_low_u8(in0), coef0); const uint16x8_t h1 = vmull_u8(vget_high_u8(in0), coef0); const uint16x8_t h2 = vmull_u8(vget_low_u8(in2), coef0); const uint16x8_t h3 = vmull_u8(vget_high_u8(in2), coef0); const uint16x8_t h4 = vmlal_u8(h0, vget_low_u8(in1), coef1); const uint16x8_t h5 = vmlal_u8(h1, vget_high_u8(in1), coef1); const uint16x8_t h6 = vmlal_u8(h2, vget_low_u8(in3), coef1); const uint16x8_t h7 = vmlal_u8(h3, vget_high_u8(in3), coef1); const uint8x8_t hor0 = vrshrn_n_u16(h4, 7); // temp: 00 01 02 03 04 05 06 07 const uint8x8_t hor1 = vrshrn_n_u16(h5, 7); // temp: 08 09 0A 0B 0C 0D 0E 0F const uint8x8_t hor2 = vrshrn_n_u16(h6, 7); // temp: 10 11 12 13 14 15 16 17 const uint8x8_t hor3 = vrshrn_n_u16(h7, 7); // temp: 18 19 1A 1B 1C 1D 1E 1F const uint16x8_t v0 = vmull_u8(hor0, coef0); const uint16x8_t v1 = vmull_u8(hor1, coef0); const uint16x8_t v2 = vmlal_u8(v0, hor2, coef1); const uint16x8_t v3 = vmlal_u8(v1, hor3, coef1); // dst: 0 1 2 3 4 5 6 7 8 9 A B C D E F const uint8x16_t d = vcombine_u8(vrshrn_n_u16(v2, 7), vrshrn_n_u16(v3, 7)); vst1q_u8(dst, d); } static INLINE void scale_plane_2_to_1_bilinear( const uint8_t *const src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t c0, const int16_t c1) { const int max_width = (w + 15) & ~15; const uint8_t *src0 = src; const uint8_t *src1 = src + src_stride; const uint8x8_t coef0 = vdup_n_u8(c0); const uint8x8_t coef1 = vdup_n_u8(c1); int y = h; assert(w && h); do { int x = max_width; do { // 000 002 004 006 008 00A 00C 00E 010 012 014 016 018 01A 01C 01E // 001 003 005 007 009 00B 00D 00F 011 013 015 017 019 01B 01D 01F const uint8x16x2_t s0 = vld2q_u8(src0); // 100 102 104 106 108 10A 10C 10E 110 112 114 116 118 11A 11C 11E // 101 103 105 107 109 10B 10D 10F 111 113 115 117 119 11B 11D 11F const uint8x16x2_t s1 = vld2q_u8(src1); scale_plane_bilinear_kernel(s0.val[0], s0.val[1], s1.val[0], s1.val[1], coef0, coef1, dst); src0 += 32; src1 += 32; dst += 16; x -= 16; } while (x); src0 += 2 * (src_stride - max_width); src1 += 2 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static INLINE void scale_plane_4_to_1_bilinear( const uint8_t *const src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t c0, const int16_t c1) { const int max_width = (w + 15) & ~15; const uint8_t *src0 = src; const uint8_t *src1 = src + src_stride; const uint8x8_t coef0 = vdup_n_u8(c0); const uint8x8_t coef1 = vdup_n_u8(c1); int y = h; assert(w && h); do { int x = max_width; do { // (*) -- useless // 000 004 008 00C 010 014 018 01C 020 024 028 02C 030 034 038 03C // 001 005 009 00D 011 015 019 01D 021 025 029 02D 031 035 039 03D // 002 006 00A 00E 012 016 01A 01E 022 026 02A 02E 032 036 03A 03E (*) // 003 007 00B 00F 013 017 01B 01F 023 027 02B 02F 033 037 03B 03F (*) const uint8x16x4_t s0 = vld4q_u8(src0); // 100 104 108 10C 110 114 118 11C 120 124 128 12C 130 134 138 13C // 101 105 109 10D 111 115 119 11D 121 125 129 12D 131 135 139 13D // 102 106 10A 10E 112 116 11A 11E 122 126 12A 12E 132 136 13A 13E (*) // 103 107 10B 10F 113 117 11B 11F 123 127 12B 12F 133 137 13B 13F (*) const uint8x16x4_t s1 = vld4q_u8(src1); scale_plane_bilinear_kernel(s0.val[0], s0.val[1], s1.val[0], s1.val[1], coef0, coef1, dst); src0 += 64; src1 += 64; dst += 16; x -= 16; } while (x); src0 += 4 * (src_stride - max_width); src1 += 4 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static INLINE uint8x8_t scale_filter_bilinear(const uint8x8_t *const s, const uint8x8_t *const coef) { const uint16x8_t h0 = vmull_u8(s[0], coef[0]); const uint16x8_t h1 = vmlal_u8(h0, s[1], coef[1]); return vrshrn_n_u16(h1, 7); } static void scale_plane_2_to_1_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t *const coef, uint8_t *const temp_buffer) { const int width_hor = (w + 3) & ~3; const int width_ver = (w + 7) & ~7; const int height_hor = (2 * h + SUBPEL_TAPS - 2 + 7) & ~7; const int height_ver = (h + 3) & ~3; const int16x8_t filters = vld1q_s16(coef); int x, y = height_hor; uint8_t *t = temp_buffer; uint8x8_t s[14], d[4]; assert(w && h); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2 + 1; // horizontal 4x8 // Note: processing 4x8 is about 20% faster than processing row by row using // vld4_u8(). do { load_u8_8x8(src + 2, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); x = width_hor; do { src += 8; load_u8_8x8(src, src_stride, &s[6], &s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13]); transpose_u8_8x8(&s[6], &s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13]); d[0] = scale_filter_8(&s[0], filters); // 00 10 20 30 40 50 60 70 d[1] = scale_filter_8(&s[2], filters); // 01 11 21 31 41 51 61 71 d[2] = scale_filter_8(&s[4], filters); // 02 12 22 32 42 52 62 72 d[3] = scale_filter_8(&s[6], filters); // 03 13 23 33 43 53 63 73 // 00 01 02 03 40 41 42 43 // 10 11 12 13 50 51 52 53 // 20 21 22 23 60 61 62 63 // 30 31 32 33 70 71 72 73 transpose_u8_8x4(&d[0], &d[1], &d[2], &d[3]); vst1_lane_u32((uint32_t *)(t + 0 * width_hor), vreinterpret_u32_u8(d[0]), 0); vst1_lane_u32((uint32_t *)(t + 1 * width_hor), vreinterpret_u32_u8(d[1]), 0); vst1_lane_u32((uint32_t *)(t + 2 * width_hor), vreinterpret_u32_u8(d[2]), 0); vst1_lane_u32((uint32_t *)(t + 3 * width_hor), vreinterpret_u32_u8(d[3]), 0); vst1_lane_u32((uint32_t *)(t + 4 * width_hor), vreinterpret_u32_u8(d[0]), 1); vst1_lane_u32((uint32_t *)(t + 5 * width_hor), vreinterpret_u32_u8(d[1]), 1); vst1_lane_u32((uint32_t *)(t + 6 * width_hor), vreinterpret_u32_u8(d[2]), 1); vst1_lane_u32((uint32_t *)(t + 7 * width_hor), vreinterpret_u32_u8(d[3]), 1); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; s[4] = s[12]; s[5] = s[13]; t += 4; x -= 4; } while (x); src += 8 * src_stride - 2 * width_hor; t += 7 * width_hor; y -= 8; } while (y); // vertical 8x4 x = width_ver; t = temp_buffer; do { load_u8_8x8(t, width_hor, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); t += 6 * width_hor; y = height_ver; do { load_u8_8x8(t, width_hor, &s[6], &s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13]); t += 8 * width_hor; d[0] = scale_filter_8(&s[0], filters); // 00 01 02 03 04 05 06 07 d[1] = scale_filter_8(&s[2], filters); // 10 11 12 13 14 15 16 17 d[2] = scale_filter_8(&s[4], filters); // 20 21 22 23 24 25 26 27 d[3] = scale_filter_8(&s[6], filters); // 30 31 32 33 34 35 36 37 vst1_u8(dst + 0 * dst_stride, d[0]); vst1_u8(dst + 1 * dst_stride, d[1]); vst1_u8(dst + 2 * dst_stride, d[2]); vst1_u8(dst + 3 * dst_stride, d[3]); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; s[4] = s[12]; s[5] = s[13]; dst += 4 * dst_stride; y -= 4; } while (y); t -= width_hor * (2 * height_ver + 6); t += 8; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } static void scale_plane_4_to_1_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t *const coef, uint8_t *const temp_buffer) { const int width_hor = (w + 1) & ~1; const int width_ver = (w + 7) & ~7; const int height_hor = (4 * h + SUBPEL_TAPS - 2 + 7) & ~7; const int height_ver = (h + 1) & ~1; const int16x8_t filters = vld1q_s16(coef); int x, y = height_hor; uint8_t *t = temp_buffer; uint8x8_t s[12], d[2]; assert(w && h); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2 + 3; // horizontal 2x8 // Note: processing 2x8 is about 20% faster than processing row by row using // vld4_u8(). do { load_u8_8x8(src + 4, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); transpose_u8_4x8(&s[0], &s[1], &s[2], &s[3], s[4], s[5], s[6], s[7]); x = width_hor; do { uint8x8x2_t dd; src += 8; load_u8_8x8(src, src_stride, &s[4], &s[5], &s[6], &s[7], &s[8], &s[9], &s[10], &s[11]); transpose_u8_8x8(&s[4], &s[5], &s[6], &s[7], &s[8], &s[9], &s[10], &s[11]); d[0] = scale_filter_8(&s[0], filters); // 00 10 20 30 40 50 60 70 d[1] = scale_filter_8(&s[4], filters); // 01 11 21 31 41 51 61 71 // dd.val[0]: 00 01 20 21 40 41 60 61 // dd.val[1]: 10 11 30 31 50 51 70 71 dd = vtrn_u8(d[0], d[1]); vst1_lane_u16((uint16_t *)(t + 0 * width_hor), vreinterpret_u16_u8(dd.val[0]), 0); vst1_lane_u16((uint16_t *)(t + 1 * width_hor), vreinterpret_u16_u8(dd.val[1]), 0); vst1_lane_u16((uint16_t *)(t + 2 * width_hor), vreinterpret_u16_u8(dd.val[0]), 1); vst1_lane_u16((uint16_t *)(t + 3 * width_hor), vreinterpret_u16_u8(dd.val[1]), 1); vst1_lane_u16((uint16_t *)(t + 4 * width_hor), vreinterpret_u16_u8(dd.val[0]), 2); vst1_lane_u16((uint16_t *)(t + 5 * width_hor), vreinterpret_u16_u8(dd.val[1]), 2); vst1_lane_u16((uint16_t *)(t + 6 * width_hor), vreinterpret_u16_u8(dd.val[0]), 3); vst1_lane_u16((uint16_t *)(t + 7 * width_hor), vreinterpret_u16_u8(dd.val[1]), 3); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; t += 2; x -= 2; } while (x); src += 8 * src_stride - 4 * width_hor; t += 7 * width_hor; y -= 8; } while (y); // vertical 8x2 x = width_ver; t = temp_buffer; do { load_u8_8x4(t, width_hor, &s[0], &s[1], &s[2], &s[3]); t += 4 * width_hor; y = height_ver; do { load_u8_8x8(t, width_hor, &s[4], &s[5], &s[6], &s[7], &s[8], &s[9], &s[10], &s[11]); t += 8 * width_hor; d[0] = scale_filter_8(&s[0], filters); // 00 01 02 03 04 05 06 07 d[1] = scale_filter_8(&s[4], filters); // 10 11 12 13 14 15 16 17 vst1_u8(dst + 0 * dst_stride, d[0]); vst1_u8(dst + 1 * dst_stride, d[1]); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; dst += 2 * dst_stride; y -= 2; } while (y); t -= width_hor * (4 * height_ver + 4); t += 8; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } // Notes for 4 to 3 scaling: // // 1. 6 rows are calculated in each horizontal inner loop, so width_hor must be // multiple of 6, and no less than w. // // 2. 8 rows are calculated in each vertical inner loop, so width_ver must be // multiple of 8, and no less than w. // // 3. 8 columns are calculated in each horizontal inner loop for further // vertical scaling, so height_hor must be multiple of 8, and no less than // 4 * h / 3. // // 4. 6 columns are calculated in each vertical inner loop, so height_ver must // be multiple of 6, and no less than h. // // 5. The physical location of the last row of the 4 to 3 scaled frame is // decided by phase_scaler, and are always less than 1 pixel below the last row // of the original image. static void scale_plane_4_to_3_bilinear(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int phase_scaler, uint8_t *const temp_buffer) { static const int step_q4 = 16 * 4 / 3; const int width_hor = (w + 5) - ((w + 5) % 6); const int stride_hor = width_hor + 2; // store 2 extra pixels const int width_ver = (w + 7) & ~7; // We only need 1 extra row below because there are only 2 bilinear // coefficients. const int height_hor = (4 * h / 3 + 1 + 7) & ~7; const int height_ver = (h + 5) - ((h + 5) % 6); int x, y = height_hor; uint8_t *t = temp_buffer; uint8x8_t s[9], d[8], c[6]; assert(w && h); c[0] = vdup_n_u8((uint8_t)vp9_filter_kernels[BILINEAR][phase_scaler][3]); c[1] = vdup_n_u8((uint8_t)vp9_filter_kernels[BILINEAR][phase_scaler][4]); c[2] = vdup_n_u8( (uint8_t)vp9_filter_kernels[BILINEAR][(phase_scaler + 1 * step_q4) & SUBPEL_MASK][3]); c[3] = vdup_n_u8( (uint8_t)vp9_filter_kernels[BILINEAR][(phase_scaler + 1 * step_q4) & SUBPEL_MASK][4]); c[4] = vdup_n_u8( (uint8_t)vp9_filter_kernels[BILINEAR][(phase_scaler + 2 * step_q4) & SUBPEL_MASK][3]); c[5] = vdup_n_u8( (uint8_t)vp9_filter_kernels[BILINEAR][(phase_scaler + 2 * step_q4) & SUBPEL_MASK][4]); d[6] = vdup_n_u8(0); d[7] = vdup_n_u8(0); // horizontal 6x8 do { load_u8_8x8(src, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); src += 1; transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); x = width_hor; do { load_u8_8x8(src, src_stride, &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7], &s[8]); src += 8; transpose_u8_8x8(&s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7], &s[8]); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 d[0] = scale_filter_bilinear(&s[0], &c[0]); d[1] = scale_filter_bilinear(&s[(phase_scaler + 1 * step_q4) >> 4], &c[2]); d[2] = scale_filter_bilinear(&s[(phase_scaler + 2 * step_q4) >> 4], &c[4]); d[3] = scale_filter_bilinear(&s[4], &c[0]); d[4] = scale_filter_bilinear(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], &c[2]); d[5] = scale_filter_bilinear(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], &c[4]); // 00 01 02 03 04 05 xx xx // 10 11 12 13 14 15 xx xx // 20 21 22 23 24 25 xx xx // 30 31 32 33 34 35 xx xx // 40 41 42 43 44 45 xx xx // 50 51 52 53 54 55 xx xx // 60 61 62 63 64 65 xx xx // 70 71 72 73 74 75 xx xx transpose_u8_8x8(&d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]); // store 2 extra pixels vst1_u8(t + 0 * stride_hor, d[0]); vst1_u8(t + 1 * stride_hor, d[1]); vst1_u8(t + 2 * stride_hor, d[2]); vst1_u8(t + 3 * stride_hor, d[3]); vst1_u8(t + 4 * stride_hor, d[4]); vst1_u8(t + 5 * stride_hor, d[5]); vst1_u8(t + 6 * stride_hor, d[6]); vst1_u8(t + 7 * stride_hor, d[7]); s[0] = s[8]; t += 6; x -= 6; } while (x); src += 8 * src_stride - 4 * width_hor / 3 - 1; t += 7 * stride_hor + 2; y -= 8; } while (y); // vertical 8x6 x = width_ver; t = temp_buffer; do { load_u8_8x8(t, stride_hor, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); t += stride_hor; y = height_ver; do { load_u8_8x8(t, stride_hor, &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7], &s[8]); t += 8 * stride_hor; d[0] = scale_filter_bilinear(&s[0], &c[0]); d[1] = scale_filter_bilinear(&s[(phase_scaler + 1 * step_q4) >> 4], &c[2]); d[2] = scale_filter_bilinear(&s[(phase_scaler + 2 * step_q4) >> 4], &c[4]); d[3] = scale_filter_bilinear(&s[4], &c[0]); d[4] = scale_filter_bilinear(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], &c[2]); d[5] = scale_filter_bilinear(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], &c[4]); vst1_u8(dst + 0 * dst_stride, d[0]); vst1_u8(dst + 1 * dst_stride, d[1]); vst1_u8(dst + 2 * dst_stride, d[2]); vst1_u8(dst + 3 * dst_stride, d[3]); vst1_u8(dst + 4 * dst_stride, d[4]); vst1_u8(dst + 5 * dst_stride, d[5]); s[0] = s[8]; dst += 6 * dst_stride; y -= 6; } while (y); t -= stride_hor * (4 * height_ver / 3 + 1); t += 8; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } static void scale_plane_4_to_3_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const InterpKernel *const coef, const int phase_scaler, uint8_t *const temp_buffer) { static const int step_q4 = 16 * 4 / 3; const int width_hor = (w + 5) - ((w + 5) % 6); const int stride_hor = width_hor + 2; // store 2 extra pixels const int width_ver = (w + 7) & ~7; // We need (SUBPEL_TAPS - 1) extra rows: (SUBPEL_TAPS / 2 - 1) extra rows // above and (SUBPEL_TAPS / 2) extra rows below. const int height_hor = (4 * h / 3 + SUBPEL_TAPS - 1 + 7) & ~7; const int height_ver = (h + 5) - ((h + 5) % 6); const int16x8_t filters0 = vld1q_s16(coef[(phase_scaler + 0 * step_q4) & SUBPEL_MASK]); const int16x8_t filters1 = vld1q_s16(coef[(phase_scaler + 1 * step_q4) & SUBPEL_MASK]); const int16x8_t filters2 = vld1q_s16(coef[(phase_scaler + 2 * step_q4) & SUBPEL_MASK]); int x, y = height_hor; uint8_t *t = temp_buffer; uint8x8_t s[15], d[8]; assert(w && h); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2; d[6] = vdup_n_u8(0); d[7] = vdup_n_u8(0); // horizontal 6x8 do { load_u8_8x8(src + 1, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); x = width_hor; do { src += 8; load_u8_8x8(src, src_stride, &s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13], &s[14]); transpose_u8_8x8(&s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13], &s[14]); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 d[0] = scale_filter_8(&s[0], filters0); d[1] = scale_filter_8(&s[(phase_scaler + 1 * step_q4) >> 4], filters1); d[2] = scale_filter_8(&s[(phase_scaler + 2 * step_q4) >> 4], filters2); d[3] = scale_filter_8(&s[4], filters0); d[4] = scale_filter_8(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], filters1); d[5] = scale_filter_8(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], filters2); // 00 01 02 03 04 05 xx xx // 10 11 12 13 14 15 xx xx // 20 21 22 23 24 25 xx xx // 30 31 32 33 34 35 xx xx // 40 41 42 43 44 45 xx xx // 50 51 52 53 54 55 xx xx // 60 61 62 63 64 65 xx xx // 70 71 72 73 74 75 xx xx transpose_u8_8x8(&d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]); // store 2 extra pixels vst1_u8(t + 0 * stride_hor, d[0]); vst1_u8(t + 1 * stride_hor, d[1]); vst1_u8(t + 2 * stride_hor, d[2]); vst1_u8(t + 3 * stride_hor, d[3]); vst1_u8(t + 4 * stride_hor, d[4]); vst1_u8(t + 5 * stride_hor, d[5]); vst1_u8(t + 6 * stride_hor, d[6]); vst1_u8(t + 7 * stride_hor, d[7]); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; s[4] = s[12]; s[5] = s[13]; s[6] = s[14]; t += 6; x -= 6; } while (x); src += 8 * src_stride - 4 * width_hor / 3; t += 7 * stride_hor + 2; y -= 8; } while (y); // vertical 8x6 x = width_ver; t = temp_buffer; do { load_u8_8x8(t, stride_hor, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); t += 7 * stride_hor; y = height_ver; do { load_u8_8x8(t, stride_hor, &s[7], &s[8], &s[9], &s[10], &s[11], &s[12], &s[13], &s[14]); t += 8 * stride_hor; d[0] = scale_filter_8(&s[0], filters0); d[1] = scale_filter_8(&s[(phase_scaler + 1 * step_q4) >> 4], filters1); d[2] = scale_filter_8(&s[(phase_scaler + 2 * step_q4) >> 4], filters2); d[3] = scale_filter_8(&s[4], filters0); d[4] = scale_filter_8(&s[4 + ((phase_scaler + 1 * step_q4) >> 4)], filters1); d[5] = scale_filter_8(&s[4 + ((phase_scaler + 2 * step_q4) >> 4)], filters2); vst1_u8(dst + 0 * dst_stride, d[0]); vst1_u8(dst + 1 * dst_stride, d[1]); vst1_u8(dst + 2 * dst_stride, d[2]); vst1_u8(dst + 3 * dst_stride, d[3]); vst1_u8(dst + 4 * dst_stride, d[4]); vst1_u8(dst + 5 * dst_stride, d[5]); s[0] = s[8]; s[1] = s[9]; s[2] = s[10]; s[3] = s[11]; s[4] = s[12]; s[5] = s[13]; s[6] = s[14]; dst += 6 * dst_stride; y -= 6; } while (y); t -= stride_hor * (4 * height_ver / 3 + 7); t += 8; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } void vp9_scale_and_extend_frame_neon(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, INTERP_FILTER filter_type, int phase_scaler) { const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const int dst_w = dst->y_crop_width; const int dst_h = dst->y_crop_height; const int dst_uv_w = dst_w / 2; const int dst_uv_h = dst_h / 2; int scaled = 0; // phase_scaler is usually 0 or 8. assert(phase_scaler >= 0 && phase_scaler < 16); if (2 * dst_w == src_w && 2 * dst_h == src_h) { // 2 to 1 scaled = 1; if (phase_scaler == 0) { scale_plane_2_to_1_phase_0(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h); scale_plane_2_to_1_phase_0(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); scale_plane_2_to_1_phase_0(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); } else if (filter_type == BILINEAR) { const int16_t c0 = vp9_filter_kernels[BILINEAR][phase_scaler][3]; const int16_t c1 = vp9_filter_kernels[BILINEAR][phase_scaler][4]; scale_plane_2_to_1_bilinear(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, c0, c1); scale_plane_2_to_1_bilinear(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0, c1); scale_plane_2_to_1_bilinear(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0, c1); } else { const int buffer_stride = (dst_w + 3) & ~3; const int buffer_height = (2 * dst_h + SUBPEL_TAPS - 2 + 7) & ~7; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height); if (temp_buffer) { scale_plane_2_to_1_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_2_to_1_general( src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_2_to_1_general( src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); free(temp_buffer); } else { scaled = 0; } } } else if (4 * dst_w == src_w && 4 * dst_h == src_h) { // 4 to 1 scaled = 1; if (phase_scaler == 0) { scale_plane_4_to_1_phase_0(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h); scale_plane_4_to_1_phase_0(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); scale_plane_4_to_1_phase_0(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); } else if (filter_type == BILINEAR) { const int16_t c0 = vp9_filter_kernels[BILINEAR][phase_scaler][3]; const int16_t c1 = vp9_filter_kernels[BILINEAR][phase_scaler][4]; scale_plane_4_to_1_bilinear(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, c0, c1); scale_plane_4_to_1_bilinear(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0, c1); scale_plane_4_to_1_bilinear(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0, c1); } else { const int buffer_stride = (dst_w + 1) & ~1; const int buffer_height = (4 * dst_h + SUBPEL_TAPS - 2 + 7) & ~7; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height); if (temp_buffer) { scale_plane_4_to_1_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_4_to_1_general( src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_4_to_1_general( src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); free(temp_buffer); } else { scaled = 0; } } } else if (4 * dst_w == 3 * src_w && 4 * dst_h == 3 * src_h) { // 4 to 3 const int buffer_stride = (dst_w + 5) - ((dst_w + 5) % 6) + 2; const int buffer_height = (4 * dst_h / 3 + SUBPEL_TAPS - 1 + 7) & ~7; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height); if (temp_buffer) { scaled = 1; if (filter_type == BILINEAR) { scale_plane_4_to_3_bilinear(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, phase_scaler, temp_buffer); scale_plane_4_to_3_bilinear(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, phase_scaler, temp_buffer); scale_plane_4_to_3_bilinear(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, phase_scaler, temp_buffer); } else { scale_plane_4_to_3_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); scale_plane_4_to_3_general(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); scale_plane_4_to_3_general(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); } free(temp_buffer); } } if (scaled) { vpx_extend_frame_borders(dst); } else { // Call c version for all other scaling ratios. vp9_scale_and_extend_frame_c(src, dst, filter_type, phase_scaler); } } libvpx-1.8.2/vp9/encoder/arm/neon/vp9_quantize_neon.c000066400000000000000000000244331357355204000225340ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_config.h" #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/vpx_dsp_common.h" static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, const int16x8_t dequant, tran_low_t *dqcoeff) { const int32x4_t dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); const int32x4_t dqcoeff_1 = vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); #if CONFIG_VP9_HIGHBITDEPTH vst1q_s32(dqcoeff, dqcoeff_0); vst1q_s32(dqcoeff + 4, dqcoeff_1); #else vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1))); #endif // CONFIG_VP9_HIGHBITDEPTH } void vp9_quantize_fp_neon(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. int i; const int16x8_t v_zero = vdupq_n_s16(0); const int16x8_t v_one = vdupq_n_s16(1); int16x8_t v_eobmax_76543210 = vdupq_n_s16(-1); int16x8_t v_round = vmovq_n_s16(round_ptr[1]); int16x8_t v_quant = vmovq_n_s16(quant_ptr[1]); int16x8_t v_dequant = vmovq_n_s16(dequant_ptr[1]); (void)scan; (void)skip_block; assert(!skip_block); // adjust for dc v_round = vsetq_lane_s16(round_ptr[0], v_round, 0); v_quant = vsetq_lane_s16(quant_ptr[0], v_quant, 0); v_dequant = vsetq_lane_s16(dequant_ptr[0], v_dequant, 0); // process dc and the first seven ac coeffs { const int16x8_t v_iscan = vld1q_s16(&iscan[0]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); const int16x8_t v_abs = vabsq_s16(v_coeff); const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant)); const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16)); const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr, v_qcoeff); v_round = vmovq_n_s16(round_ptr[1]); v_quant = vmovq_n_s16(quant_ptr[1]); v_dequant = vmovq_n_s16(dequant_ptr[1]); } // now process the rest of the ac coeffs for (i = 8; i < count; i += 8) { const int16x8_t v_iscan = vld1q_s16(&iscan[i]); const int16x8_t v_coeff = load_tran_low_to_s16q(coeff_ptr + i); const int16x8_t v_coeff_sign = vshrq_n_s16(v_coeff, 15); const int16x8_t v_abs = vabsq_s16(v_coeff); const int16x8_t v_tmp = vqaddq_s16(v_abs, v_round); const int32x4_t v_tmp_lo = vmull_s16(vget_low_s16(v_tmp), vget_low_s16(v_quant)); const int32x4_t v_tmp_hi = vmull_s16(vget_high_s16(v_tmp), vget_high_s16(v_quant)); const int16x8_t v_tmp2 = vcombine_s16(vshrn_n_s32(v_tmp_lo, 16), vshrn_n_s32(v_tmp_hi, 16)); const uint16x8_t v_nz_mask = vceqq_s16(v_tmp2, v_zero); const int16x8_t v_iscan_plus1 = vaddq_s16(v_iscan, v_one); const int16x8_t v_nz_iscan = vbslq_s16(v_nz_mask, v_zero, v_iscan_plus1); const int16x8_t v_qcoeff_a = veorq_s16(v_tmp2, v_coeff_sign); const int16x8_t v_qcoeff = vsubq_s16(v_qcoeff_a, v_coeff_sign); calculate_dqcoeff_and_store(v_qcoeff, v_dequant, dqcoeff_ptr + i); v_eobmax_76543210 = vmaxq_s16(v_eobmax_76543210, v_nz_iscan); store_s16q_to_tran_low(qcoeff_ptr + i, v_qcoeff); } #ifdef __aarch64__ *eob_ptr = vmaxvq_s16(v_eobmax_76543210); #else { const int16x4_t v_eobmax_3210 = vmax_s16(vget_low_s16(v_eobmax_76543210), vget_high_s16(v_eobmax_76543210)); const int64x1_t v_eobmax_xx32 = vshr_n_s64(vreinterpret_s64_s16(v_eobmax_3210), 32); const int16x4_t v_eobmax_tmp = vmax_s16(v_eobmax_3210, vreinterpret_s16_s64(v_eobmax_xx32)); const int64x1_t v_eobmax_xxx3 = vshr_n_s64(vreinterpret_s64_s16(v_eobmax_tmp), 16); const int16x4_t v_eobmax_final = vmax_s16(v_eobmax_tmp, vreinterpret_s16_s64(v_eobmax_xxx3)); *eob_ptr = (uint16_t)vget_lane_s16(v_eobmax_final, 0); } #endif // __aarch64__ } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31)); } void vp9_quantize_fp_32x32_neon(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); // ROUND_POWER_OF_TWO(round_ptr[], 1) const int16x8_t round = vrshrq_n_s16(vld1q_s16(round_ptr), 1); const int16x8_t quant = vld1q_s16(quant_ptr); const int16x4_t dequant = vld1_s16(dequant_ptr); // dequant >> 2 is used similar to zbin as a threshold. const int16x8_t dequant_thresh = vshrq_n_s16(vld1q_s16(dequant_ptr), 2); // Process dc and the first seven ac coeffs. const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; uint16x8_t eob_max; (void)scan; (void)count; (void)skip_block; assert(!skip_block); // coeff * quant_ptr[]) >> 15 qcoeff = vqdmulhq_s16(qcoeff, quant); // Restore sign. qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, dequant_mask); // qcoeff * dequant[] / 2 dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), dequant); dqcoeff_1 = vmull_n_s16(vget_high_s16(qcoeff), dequant_ptr[1]); // Add 1 if negative to round towards zero because the C uses division. dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); #if CONFIG_VP9_HIGHBITDEPTH vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); #else store_s16q_to_tran_low(dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); #endif eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; { int i; const int16x8_t round = vrshrq_n_s16(vmovq_n_s16(round_ptr[1]), 1); const int16x8_t quant = vmovq_n_s16(quant_ptr[1]); const int16x8_t dequant_thresh = vshrq_n_s16(vmovq_n_s16(dequant_ptr[1]), 2); // Process the rest of the ac coeffs. for (i = 8; i < 32 * 32; i += 8) { const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t dequant_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, dequant_thresh)); int16x8_t qcoeff = vqaddq_s16(coeff_abs, round); int32x4_t dqcoeff_0, dqcoeff_1; qcoeff = vqdmulhq_s16(qcoeff, quant); qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, dequant_mask); dqcoeff_0 = vmull_n_s16(vget_low_s16(qcoeff), dequant_ptr[1]); dqcoeff_1 = vmull_n_s16(vget_high_s16(qcoeff), dequant_ptr[1]); dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); #if CONFIG_VP9_HIGHBITDEPTH vst1q_s32(dqcoeff_ptr, vshrq_n_s32(dqcoeff_0, 1)); vst1q_s32(dqcoeff_ptr + 4, vshrq_n_s32(dqcoeff_1, 1)); #else store_s16q_to_tran_low( dqcoeff_ptr, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); #endif eob_max = vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); store_s16q_to_tran_low(qcoeff_ptr, qcoeff); iscan += 8; coeff_ptr += 8; qcoeff_ptr += 8; dqcoeff_ptr += 8; } #ifdef __aarch64__ *eob_ptr = vmaxvq_u16(eob_max); #else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); const uint16x4_t eob_max_1 = vpmax_u16(eob_max_0, eob_max_0); const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } #endif // __aarch64__ } } libvpx-1.8.2/vp9/encoder/mips/000077500000000000000000000000001357355204000161375ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/mips/msa/000077500000000000000000000000001357355204000167175ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/mips/msa/vp9_error_msa.c000066400000000000000000000147761357355204000216710ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" #define BLOCK_ERROR_BLOCKSIZE_MSA(BSize) \ static int64_t block_error_##BSize##size_msa( \ const int16_t *coeff_ptr, const int16_t *dq_coeff_ptr, int64_t *ssz) { \ int64_t err = 0; \ uint32_t loop_cnt; \ v8i16 coeff, dq_coeff, coeff_r_h, coeff_l_h; \ v4i32 diff_r, diff_l, coeff_r_w, coeff_l_w; \ v2i64 sq_coeff_r, sq_coeff_l; \ v2i64 err0, err_dup0, err1, err_dup1; \ \ coeff = LD_SH(coeff_ptr); \ dq_coeff = LD_SH(dq_coeff_ptr); \ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ DOTP_SW2_SD(coeff_r_w, coeff_l_w, coeff_r_w, coeff_l_w, sq_coeff_r, \ sq_coeff_l); \ DOTP_SW2_SD(diff_r, diff_l, diff_r, diff_l, err0, err1); \ \ coeff = LD_SH(coeff_ptr + 8); \ dq_coeff = LD_SH(dq_coeff_ptr + 8); \ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ \ coeff_ptr += 16; \ dq_coeff_ptr += 16; \ \ for (loop_cnt = ((BSize >> 4) - 1); loop_cnt--;) { \ coeff = LD_SH(coeff_ptr); \ dq_coeff = LD_SH(dq_coeff_ptr); \ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ \ coeff = LD_SH(coeff_ptr + 8); \ dq_coeff = LD_SH(dq_coeff_ptr + 8); \ UNPCK_SH_SW(coeff, coeff_r_w, coeff_l_w); \ ILVRL_H2_SH(coeff, dq_coeff, coeff_r_h, coeff_l_h); \ HSUB_UH2_SW(coeff_r_h, coeff_l_h, diff_r, diff_l); \ DPADD_SD2_SD(coeff_r_w, coeff_l_w, sq_coeff_r, sq_coeff_l); \ DPADD_SD2_SD(diff_r, diff_l, err0, err1); \ \ coeff_ptr += 16; \ dq_coeff_ptr += 16; \ } \ \ err_dup0 = __msa_splati_d(sq_coeff_r, 1); \ err_dup1 = __msa_splati_d(sq_coeff_l, 1); \ sq_coeff_r += err_dup0; \ sq_coeff_l += err_dup1; \ *ssz = __msa_copy_s_d(sq_coeff_r, 0); \ *ssz += __msa_copy_s_d(sq_coeff_l, 0); \ \ err_dup0 = __msa_splati_d(err0, 1); \ err_dup1 = __msa_splati_d(err1, 1); \ err0 += err_dup0; \ err1 += err_dup1; \ err = __msa_copy_s_d(err0, 0); \ err += __msa_copy_s_d(err1, 0); \ \ return err; \ } #if !CONFIG_VP9_HIGHBITDEPTH BLOCK_ERROR_BLOCKSIZE_MSA(16); BLOCK_ERROR_BLOCKSIZE_MSA(64); BLOCK_ERROR_BLOCKSIZE_MSA(256); BLOCK_ERROR_BLOCKSIZE_MSA(1024); int64_t vp9_block_error_msa(const tran_low_t *coeff_ptr, const tran_low_t *dq_coeff_ptr, intptr_t blk_size, int64_t *ssz) { int64_t err; const int16_t *coeff = (const int16_t *)coeff_ptr; const int16_t *dq_coeff = (const int16_t *)dq_coeff_ptr; switch (blk_size) { case 16: err = block_error_16size_msa(coeff, dq_coeff, ssz); break; case 64: err = block_error_64size_msa(coeff, dq_coeff, ssz); break; case 256: err = block_error_256size_msa(coeff, dq_coeff, ssz); break; case 1024: err = block_error_1024size_msa(coeff, dq_coeff, ssz); break; default: err = vp9_block_error_c(coeff_ptr, dq_coeff_ptr, blk_size, ssz); break; } return err; } #endif // !CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vp9/encoder/mips/msa/vp9_fdct16x16_msa.c000066400000000000000000000425241357355204000221560ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" #include "vpx_dsp/mips/fwd_txfm_msa.h" static void fadst16_cols_step1_msa(const int16_t *input, int32_t stride, const int32_t *const0, int16_t *int_buf) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3; v4i32 k0, k1, k2, k3; /* load input data */ r0 = LD_SH(input); r15 = LD_SH(input + 15 * stride); r7 = LD_SH(input + 7 * stride); r8 = LD_SH(input + 8 * stride); SLLI_4V(r0, r15, r7, r8, 2); /* stage 1 */ LD_SW2(const0, 4, k0, k1); LD_SW2(const0 + 8, 4, k2, k3); MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); r3 = LD_SH(input + 3 * stride); r4 = LD_SH(input + 4 * stride); r11 = LD_SH(input + 11 * stride); r12 = LD_SH(input + 12 * stride); SLLI_4V(r3, r4, r11, r12, 2); LD_SW2(const0 + 4 * 4, 4, k0, k1); LD_SW2(const0 + 4 * 6, 4, k2, k3); MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); /* stage 2 */ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1); ST_SH2(tp0, tp2, int_buf, 8); ST_SH2(tp1, tp3, int_buf + 4 * 8, 8); LD_SW2(const0 + 4 * 8, 4, k0, k1); k2 = LD_SW(const0 + 4 * 10); MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); ST_SH2(h0, h1, int_buf + 8 * 8, 8); ST_SH2(h3, h2, int_buf + 12 * 8, 8); r9 = LD_SH(input + 9 * stride); r6 = LD_SH(input + 6 * stride); r1 = LD_SH(input + stride); r14 = LD_SH(input + 14 * stride); SLLI_4V(r9, r6, r1, r14, 2); LD_SW2(const0 + 4 * 11, 4, k0, k1); LD_SW2(const0 + 4 * 13, 4, k2, k3); MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3); ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8); r13 = LD_SH(input + 13 * stride); r2 = LD_SH(input + 2 * stride); r5 = LD_SH(input + 5 * stride); r10 = LD_SH(input + 10 * stride); SLLI_4V(r13, r2, r5, r10, 2); LD_SW2(const0 + 4 * 15, 4, k0, k1); LD_SW2(const0 + 4 * 17, 4, k2, k3); MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3); ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8); BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3); ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8); } static void fadst16_cols_step2_msa(int16_t *int_buf, const int32_t *const0, int16_t *out) { int16_t *out_ptr = out + 128; v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15; v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11; v8i16 out0, out1, out2, out3, out4, out5, out6, out7; v8i16 out8, out9, out10, out11, out12, out13, out14, out15; v4i32 k0, k1, k2, k3; LD_SH2(int_buf + 3 * 8, 4 * 8, g13, g15); LD_SH2(int_buf + 11 * 8, 4 * 8, g5, g7); LD_SW2(const0 + 4 * 19, 4, k0, k1); k2 = LD_SW(const0 + 4 * 21); MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); tp0 = LD_SH(int_buf + 4 * 8); tp1 = LD_SH(int_buf + 5 * 8); tp3 = LD_SH(int_buf + 10 * 8); tp2 = LD_SH(int_buf + 14 * 8); LD_SW2(const0 + 4 * 22, 4, k0, k1); k2 = LD_SW(const0 + 4 * 24); MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7); out4 = -out4; ST_SH(out4, (out + 3 * 16)); ST_SH(out5, (out_ptr + 4 * 16)); h1 = LD_SH(int_buf + 9 * 8); h3 = LD_SH(int_buf + 12 * 8); MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); out13 = -out13; ST_SH(out12, (out + 2 * 16)); ST_SH(out13, (out_ptr + 5 * 16)); tp0 = LD_SH(int_buf); tp1 = LD_SH(int_buf + 8); tp2 = LD_SH(int_buf + 2 * 8); tp3 = LD_SH(int_buf + 6 * 8); BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10); out1 = -out1; ST_SH(out0, (out)); ST_SH(out1, (out_ptr + 7 * 16)); h0 = LD_SH(int_buf + 8 * 8); h2 = LD_SH(int_buf + 13 * 8); BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10); out8 = -out8; ST_SH(out8, (out + 16)); ST_SH(out9, (out_ptr + 6 * 16)); /* stage 4 */ LD_SW2(const0 + 4 * 25, 4, k0, k1); LD_SW2(const0 + 4 * 27, 4, k2, k3); MADD_SHORT(h10, h11, k1, k2, out2, out3); ST_SH(out2, (out + 7 * 16)); ST_SH(out3, (out_ptr)); MADD_SHORT(out6, out7, k0, k3, out6, out7); ST_SH(out6, (out + 4 * 16)); ST_SH(out7, (out_ptr + 3 * 16)); MADD_SHORT(out10, out11, k0, k3, out10, out11); ST_SH(out10, (out + 6 * 16)); ST_SH(out11, (out_ptr + 16)); MADD_SHORT(out14, out15, k1, k2, out14, out15); ST_SH(out14, (out + 5 * 16)); ST_SH(out15, (out_ptr + 2 * 16)); } static void fadst16_transpose_postproc_msa(int16_t *input, int16_t *out) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; /* load input data */ LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7); TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, r7); FDCT_POSTPROC_2V_NEG_H(r0, r1); FDCT_POSTPROC_2V_NEG_H(r2, r3); FDCT_POSTPROC_2V_NEG_H(r4, r5); FDCT_POSTPROC_2V_NEG_H(r6, r7); ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8); out += 64; LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15); TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, r12, r13, r14, r15); FDCT_POSTPROC_2V_NEG_H(r8, r9); FDCT_POSTPROC_2V_NEG_H(r10, r11); FDCT_POSTPROC_2V_NEG_H(r12, r13); FDCT_POSTPROC_2V_NEG_H(r14, r15); ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8); out += 64; /* load input data */ input += 128; LD_SH8(input, 16, l0, l1, l2, l3, l4, l5, l6, l7); TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, r7); FDCT_POSTPROC_2V_NEG_H(r0, r1); FDCT_POSTPROC_2V_NEG_H(r2, r3); FDCT_POSTPROC_2V_NEG_H(r4, r5); FDCT_POSTPROC_2V_NEG_H(r6, r7); ST_SH8(r0, r1, r2, r3, r4, r5, r6, r7, out, 8); out += 64; LD_SH8(input + 8, 16, l8, l9, l10, l11, l12, l13, l14, l15); TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, r12, r13, r14, r15); FDCT_POSTPROC_2V_NEG_H(r8, r9); FDCT_POSTPROC_2V_NEG_H(r10, r11); FDCT_POSTPROC_2V_NEG_H(r12, r13); FDCT_POSTPROC_2V_NEG_H(r14, r15); ST_SH8(r8, r9, r10, r11, r12, r13, r14, r15, out, 8); } static void fadst16_rows_step1_msa(int16_t *input, const int32_t *const0, int16_t *int_buf) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 tp0, tp1, tp2, tp3, g0, g1, g2, g3, g8, g9, g10, g11, h0, h1, h2, h3; v4i32 k0, k1, k2, k3; /* load input data */ r0 = LD_SH(input); r7 = LD_SH(input + 7 * 8); r8 = LD_SH(input + 8 * 8); r15 = LD_SH(input + 15 * 8); /* stage 1 */ LD_SW2(const0, 4, k0, k1); LD_SW2(const0 + 4 * 2, 4, k2, k3); MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); r3 = LD_SH(input + 3 * 8); r4 = LD_SH(input + 4 * 8); r11 = LD_SH(input + 11 * 8); r12 = LD_SH(input + 12 * 8); LD_SW2(const0 + 4 * 4, 4, k0, k1); LD_SW2(const0 + 4 * 6, 4, k2, k3); MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); /* stage 2 */ BUTTERFLY_4(g0, g2, g10, g8, tp0, tp2, tp3, tp1); ST_SH2(tp0, tp1, int_buf, 4 * 8); ST_SH2(tp2, tp3, int_buf + 8, 4 * 8); LD_SW2(const0 + 4 * 8, 4, k0, k1); k2 = LD_SW(const0 + 4 * 10); MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); ST_SH2(h0, h3, int_buf + 8 * 8, 4 * 8); ST_SH2(h1, h2, int_buf + 9 * 8, 4 * 8); r1 = LD_SH(input + 8); r6 = LD_SH(input + 6 * 8); r9 = LD_SH(input + 9 * 8); r14 = LD_SH(input + 14 * 8); LD_SW2(const0 + 4 * 11, 4, k0, k1); LD_SW2(const0 + 4 * 13, 4, k2, k3); MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g0, g1, g2, g3); ST_SH2(g1, g3, int_buf + 3 * 8, 4 * 8); r2 = LD_SH(input + 2 * 8); r5 = LD_SH(input + 5 * 8); r10 = LD_SH(input + 10 * 8); r13 = LD_SH(input + 13 * 8); LD_SW2(const0 + 4 * 15, 4, k0, k1); LD_SW2(const0 + 4 * 17, 4, k2, k3); MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, h0, h1, h2, h3); ST_SH2(h1, h3, int_buf + 11 * 8, 4 * 8); BUTTERFLY_4(h0, h2, g2, g0, tp0, tp1, tp2, tp3); ST_SH4(tp0, tp1, tp2, tp3, int_buf + 2 * 8, 4 * 8); } static void fadst16_rows_step2_msa(int16_t *int_buf, const int32_t *const0, int16_t *out) { int16_t *out_ptr = out + 8; v8i16 tp0, tp1, tp2, tp3, g5, g7, g13, g15; v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h10, h11; v8i16 out0, out1, out2, out3, out4, out5, out6, out7; v8i16 out8, out9, out10, out11, out12, out13, out14, out15; v4i32 k0, k1, k2, k3; g13 = LD_SH(int_buf + 3 * 8); g15 = LD_SH(int_buf + 7 * 8); g5 = LD_SH(int_buf + 11 * 8); g7 = LD_SH(int_buf + 15 * 8); LD_SW2(const0 + 4 * 19, 4, k0, k1); k2 = LD_SW(const0 + 4 * 21); MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); tp0 = LD_SH(int_buf + 4 * 8); tp1 = LD_SH(int_buf + 5 * 8); tp3 = LD_SH(int_buf + 10 * 8); tp2 = LD_SH(int_buf + 14 * 8); LD_SW2(const0 + 4 * 22, 4, k0, k1); k2 = LD_SW(const0 + 4 * 24); MADD_BF(tp0, tp1, tp2, tp3, k0, k1, k2, k0, out4, out6, out5, out7); out4 = -out4; ST_SH(out4, (out + 3 * 16)); ST_SH(out5, (out_ptr + 4 * 16)); h1 = LD_SH(int_buf + 9 * 8); h3 = LD_SH(int_buf + 12 * 8); MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); out13 = -out13; ST_SH(out12, (out + 2 * 16)); ST_SH(out13, (out_ptr + 5 * 16)); tp0 = LD_SH(int_buf); tp1 = LD_SH(int_buf + 8); tp2 = LD_SH(int_buf + 2 * 8); tp3 = LD_SH(int_buf + 6 * 8); BUTTERFLY_4(tp0, tp1, tp3, tp2, out0, out1, h11, h10); out1 = -out1; ST_SH(out0, (out)); ST_SH(out1, (out_ptr + 7 * 16)); h0 = LD_SH(int_buf + 8 * 8); h2 = LD_SH(int_buf + 13 * 8); BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10); out8 = -out8; ST_SH(out8, (out + 16)); ST_SH(out9, (out_ptr + 6 * 16)); /* stage 4 */ LD_SW2(const0 + 4 * 25, 4, k0, k1); LD_SW2(const0 + 4 * 27, 4, k2, k3); MADD_SHORT(h10, h11, k1, k2, out2, out3); ST_SH(out2, (out + 7 * 16)); ST_SH(out3, (out_ptr)); MADD_SHORT(out6, out7, k0, k3, out6, out7); ST_SH(out6, (out + 4 * 16)); ST_SH(out7, (out_ptr + 3 * 16)); MADD_SHORT(out10, out11, k0, k3, out10, out11); ST_SH(out10, (out + 6 * 16)); ST_SH(out11, (out_ptr + 16)); MADD_SHORT(out14, out15, k1, k2, out14, out15); ST_SH(out14, (out + 5 * 16)); ST_SH(out15, (out_ptr + 2 * 16)); } static void fadst16_transpose_msa(int16_t *input, int16_t *out) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; /* load input data */ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15); TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, r7); TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, r12, r13, r14, r15); ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8); ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8); out += 16 * 8; /* load input data */ input += 128; LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15); TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, r0, r1, r2, r3, r4, r5, r6, r7); TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, r8, r9, r10, r11, r12, r13, r14, r15); ST_SH8(r0, r8, r1, r9, r2, r10, r3, r11, out, 8); ST_SH8(r4, r12, r5, r13, r6, r14, r7, r15, (out + 64), 8); } static void postproc_fdct16x8_1d_row(int16_t *intermediate, int16_t *output) { int16_t *temp = intermediate; int16_t *out = output; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8i16 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11; v8i16 in12, in13, in14, in15; LD_SH8(temp, 16, in0, in1, in2, in3, in4, in5, in6, in7); temp = intermediate + 8; LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, in10, in11, in12, in13, in14, in15); FDCT_POSTPROC_2V_NEG_H(in0, in1); FDCT_POSTPROC_2V_NEG_H(in2, in3); FDCT_POSTPROC_2V_NEG_H(in4, in5); FDCT_POSTPROC_2V_NEG_H(in6, in7); FDCT_POSTPROC_2V_NEG_H(in8, in9); FDCT_POSTPROC_2V_NEG_H(in10, in11); FDCT_POSTPROC_2V_NEG_H(in12, in13); FDCT_POSTPROC_2V_NEG_H(in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, in8, in9, in10, in11, in12, in13, in14, in15); temp = intermediate; ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, temp, 16); FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); temp = intermediate; LD_SH8(temp, 16, in8, in9, in10, in11, in12, in13, in14, in15); FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3); ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, out, 16); TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7); out = output + 8; ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, out, 16); } void vp9_fht16x16_msa(const int16_t *input, int16_t *output, int32_t stride, int32_t tx_type) { DECLARE_ALIGNED(32, int16_t, tmp[256]); DECLARE_ALIGNED(32, int16_t, trans_buf[256]); DECLARE_ALIGNED(32, int16_t, tmp_buf[128]); int32_t i; int16_t *ptmpbuf = &tmp_buf[0]; int16_t *trans = &trans_buf[0]; const int32_t const_arr[29 * 4] = { 52707308, 52707308, 52707308, 52707308, -1072430300, -1072430300, -1072430300, -1072430300, 795618043, 795618043, 795618043, 795618043, -721080468, -721080468, -721080468, -721080468, 459094491, 459094491, 459094491, 459094491, -970646691, -970646691, -970646691, -970646691, 1010963856, 1010963856, 1010963856, 1010963856, -361743294, -361743294, -361743294, -361743294, 209469125, 209469125, 209469125, 209469125, -1053094788, -1053094788, -1053094788, -1053094788, 1053160324, 1053160324, 1053160324, 1053160324, 639644520, 639644520, 639644520, 639644520, -862444000, -862444000, -862444000, -862444000, 1062144356, 1062144356, 1062144356, 1062144356, -157532337, -157532337, -157532337, -157532337, 260914709, 260914709, 260914709, 260914709, -1041559667, -1041559667, -1041559667, -1041559667, 920985831, 920985831, 920985831, 920985831, -551995675, -551995675, -551995675, -551995675, 596522295, 596522295, 596522295, 596522295, 892853362, 892853362, 892853362, 892853362, -892787826, -892787826, -892787826, -892787826, 410925857, 410925857, 410925857, 410925857, -992012162, -992012162, -992012162, -992012162, 992077698, 992077698, 992077698, 992077698, 759246145, 759246145, 759246145, 759246145, -759180609, -759180609, -759180609, -759180609, -759222975, -759222975, -759222975, -759222975, 759288511, 759288511, 759288511, 759288511 }; switch (tx_type) { case DCT_DCT: /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride); } /* row transform */ for (i = 0; i < 2; ++i) { fdct16x8_1d_row(tmp + (128 * i), output + (128 * i)); } break; case ADST_DCT: /* column transform */ for (i = 0; i < 2; ++i) { fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf); fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3)); } /* row transform */ for (i = 0; i < 2; ++i) { postproc_fdct16x8_1d_row(tmp + (128 * i), output + (128 * i)); } break; case DCT_ADST: /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column(input + 8 * i, tmp + 8 * i, stride); } fadst16_transpose_postproc_msa(tmp, trans); /* row transform */ for (i = 0; i < 2; ++i) { fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf); fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7)); } fadst16_transpose_msa(tmp, output); break; case ADST_ADST: /* column transform */ for (i = 0; i < 2; ++i) { fadst16_cols_step1_msa(input + (i << 3), stride, const_arr, ptmpbuf); fadst16_cols_step2_msa(ptmpbuf, const_arr, tmp + (i << 3)); } fadst16_transpose_postproc_msa(tmp, trans); /* row transform */ for (i = 0; i < 2; ++i) { fadst16_rows_step1_msa(trans + (i << 7), const_arr, ptmpbuf); fadst16_rows_step2_msa(ptmpbuf, const_arr, tmp + (i << 7)); } fadst16_transpose_msa(tmp, output); break; default: assert(0); break; } } libvpx-1.8.2/vp9/encoder/mips/msa/vp9_fdct4x4_msa.c000066400000000000000000000055351357355204000220110ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" void vp9_fwht4x4_msa(const int16_t *input, int16_t *output, int32_t src_stride) { v8i16 in0, in1, in2, in3, in4; LD_SH4(input, src_stride, in0, in1, in2, in3); in0 += in1; in3 -= in2; in4 = (in0 - in3) >> 1; SUB2(in4, in1, in4, in2, in1, in2); in0 -= in2; in3 += in1; TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); in0 += in2; in1 -= in3; in4 = (in0 - in1) >> 1; SUB2(in4, in2, in4, in3, in2, in3); in0 -= in3; in1 += in2; SLLI_4V(in0, in1, in2, in3, 2); TRANSPOSE4x4_SH_SH(in0, in3, in1, in2, in0, in3, in1, in2); ST4x2_UB(in0, output, 4); ST4x2_UB(in3, output + 4, 4); ST4x2_UB(in1, output + 8, 4); ST4x2_UB(in2, output + 12, 4); } void vp9_fht4x4_msa(const int16_t *input, int16_t *output, int32_t stride, int32_t tx_type) { v8i16 in0, in1, in2, in3; LD_SH4(input, stride, in0, in1, in2, in3); /* fdct4 pre-process */ { v8i16 temp, mask; v16i8 zero = { 0 }; v16i8 one = __msa_ldi_b(1); mask = (v8i16)__msa_sldi_b(zero, one, 15); SLLI_4V(in0, in1, in2, in3, 4); temp = __msa_ceqi_h(in0, 0); temp = (v8i16)__msa_xori_b((v16u8)temp, 255); temp = mask & temp; in0 += temp; } switch (tx_type) { case DCT_DCT: VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_DCT: VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); break; case DCT_ADST: VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3); break; case ADST_ADST: VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FADST4(in0, in1, in2, in3, in0, in1, in2, in3); break; default: assert(0); break; } TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3); SRA_4V(in0, in1, in2, in3, 2); PCKEV_D2_SH(in1, in0, in3, in2, in0, in2); ST_SH2(in0, in2, output, 8); } libvpx-1.8.2/vp9/encoder/mips/msa/vp9_fdct8x8_msa.c000066400000000000000000000052061357355204000220140ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_enums.h" #include "vp9/encoder/mips/msa/vp9_fdct_msa.h" void vp9_fht8x8_msa(const int16_t *input, int16_t *output, int32_t stride, int32_t tx_type) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7); SLLI_4V(in0, in1, in2, in3, 2); SLLI_4V(in4, in5, in6, in7, 2); switch (tx_type) { case DCT_DCT: VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_DCT: VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case DCT_ADST: VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; case ADST_ADST: VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); break; default: assert(0); break; } TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7); ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8); } libvpx-1.8.2/vp9/encoder/mips/msa/vp9_fdct_msa.h000066400000000000000000000177311357355204000214570ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ #define VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ #include "vpx_dsp/mips/fwd_txfm_msa.h" #include "vpx_dsp/mips/txfm_macros_msa.h" #include "vpx_ports/mem.h" #define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3, out4, out5, out6, out7) \ { \ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \ cospi_24_64, -cospi_24_64, 0, 0 }; \ \ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \ cnst2_m = -cnst0_m; \ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \ cnst4_m = -cnst2_m; \ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ \ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ cnst2_m, cnst3_m, in7, in0, in4, in3); \ \ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \ cnst2_m = -cnst0_m; \ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \ cnst4_m = -cnst2_m; \ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ \ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ cnst2_m, cnst3_m, in5, in2, in6, in1); \ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \ out7 = -s0_m; \ out0 = s1_m; \ \ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \ \ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ cnst1_m = cnst0_m; \ \ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \ cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \ \ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ \ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \ out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ \ out1 = -out1; \ out3 = -out3; \ out5 = -out5; \ } #define VP9_FADST4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v4i32 s0_m, s1_m, s2_m, s3_m, constant_m; \ v4i32 in0_r_m, in1_r_m, in2_r_m, in3_r_m; \ \ UNPCK_R_SH_SW(in0, in0_r_m); \ UNPCK_R_SH_SW(in1, in1_r_m); \ UNPCK_R_SH_SW(in2, in2_r_m); \ UNPCK_R_SH_SW(in3, in3_r_m); \ \ constant_m = __msa_fill_w(sinpi_4_9); \ MUL2(in0_r_m, constant_m, in3_r_m, constant_m, s1_m, s0_m); \ \ constant_m = __msa_fill_w(sinpi_1_9); \ s0_m += in0_r_m * constant_m; \ s1_m -= in1_r_m * constant_m; \ \ constant_m = __msa_fill_w(sinpi_2_9); \ s0_m += in1_r_m * constant_m; \ s1_m += in3_r_m * constant_m; \ \ s2_m = in0_r_m + in1_r_m - in3_r_m; \ \ constant_m = __msa_fill_w(sinpi_3_9); \ MUL2(in2_r_m, constant_m, s2_m, constant_m, s3_m, in1_r_m); \ \ in0_r_m = s0_m + s3_m; \ s2_m = s1_m - s3_m; \ s3_m = s1_m - s0_m + s3_m; \ \ SRARI_W4_SW(in0_r_m, in1_r_m, s2_m, s3_m, DCT_CONST_BITS); \ PCKEV_H4_SH(in0_r_m, in0_r_m, in1_r_m, in1_r_m, s2_m, s2_m, s3_m, s3_m, \ out0, out1, out2, out3); \ } #endif // VPX_VP9_ENCODER_MIPS_MSA_VP9_FDCT_MSA_H_ libvpx-1.8.2/vp9/encoder/ppc/000077500000000000000000000000001357355204000157515ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/ppc/vp9_quantize_vsx.c000066400000000000000000000250131357355204000214540ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" // Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit // integers, and return the high 16 bits of the intermediate integers. // (a * b) >> 16 // Note: Because this is done in 2 operations, a and b cannot both be UINT16_MIN static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { // madds does ((A * B) >> 15) + C, we need >> 16, so we perform an extra right // shift. return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); } // Negate 16-bit integers in a when the corresponding signed 16-bit // integer in b is negative. static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); return vec_xor(vec_add(a, mask), mask); } // Compare packed 16-bit integers across a, and return the maximum value in // every element. Returns a vector containing the biggest value across vector a. static INLINE int16x8_t vec_max_across(int16x8_t a) { a = vec_max(a, vec_perm(a, a, vec_perm64)); a = vec_max(a, vec_perm(a, a, vec_perm32)); return vec_max(a, vec_perm(a, a, vec_perm16)); } void vp9_quantize_fp_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; bool16x8_t zero_coeff0, zero_coeff1; int16x8_t round = vec_vsx_ld(0, round_ptr); int16x8_t quant = vec_vsx_ld(0, quant_ptr); int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); int16x8_t scan0 = vec_vsx_ld(0, iscan); int16x8_t scan1 = vec_vsx_ld(16, iscan); (void)scan; (void)skip_block; assert(!skip_block); // First set of 8 coeff starts with DC + 7 AC qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); vec_vsx_st(qcoeff0, 0, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); // Remove DC value from round and quant round = vec_splat(round, 1); quant = vec_splat(quant, 1); // Remove DC value from dequant dequant = vec_splat(dequant, 1); // Second set of 8 coeff starts with (all AC) qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); vec_vsx_st(qcoeff1, 16, qcoeff_ptr); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); // We quantize 16 coeff up front (enough for a 4x4) and process 24 coeff per // loop iteration. // for 8x8: 16 + 2 x 24 = 64 // for 16x16: 16 + 10 x 24 = 256 if (n_coeffs > 16) { int16x8_t coeff2, qcoeff2, dqcoeff2, eob2, scan2; bool16x8_t zero_coeff2; int index = 16; int off0 = 32; int off1 = 48; int off2 = 64; do { coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); scan0 = vec_vsx_ld(off0, iscan); scan1 = vec_vsx_ld(off1, iscan); scan2 = vec_vsx_ld(off2, iscan); qcoeff0 = vec_mulhi(vec_vaddshs(vec_abs(coeff0), round), quant); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); vec_vsx_st(qcoeff0, off0, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); qcoeff1 = vec_mulhi(vec_vaddshs(vec_abs(coeff1), round), quant); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); vec_vsx_st(qcoeff1, off1, qcoeff_ptr); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); qcoeff2 = vec_mulhi(vec_vaddshs(vec_abs(coeff2), round), quant); zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); qcoeff2 = vec_sign(qcoeff2, coeff2); vec_vsx_st(qcoeff2, off2, qcoeff_ptr); dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, vec_or(scan0, zero_coeff0)); eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); eob = vec_max(eob, eob2); index += 24; off0 += 48; off1 += 48; off2 += 48; } while (index < n_coeffs); } eob = vec_max_across(eob); *eob_ptr = eob[0] + 1; } // Sets the value of a 32-bit integers to 1 when the corresponding value in a is // negative. static INLINE int32x4_t vec_is_neg(int32x4_t a) { return vec_sr(a, vec_shift_sign_s32); } // DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 // blocks are twice as big as for other block sizes. As such, using // vec_mladd results in overflow. static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, int16x8_t dequant) { int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); // Add 1 if negative to round towards zero because the C uses division. dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); } void vp9_quantize_fp_32x32_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { // In stage 1, we quantize 16 coeffs (DC + 15 AC) // In stage 2, we loop 42 times and quantize 24 coeffs per iteration // (32 * 32 - 16) / 24 = 42 int num_itr = 42; // Offsets are in bytes, 16 coeffs = 32 bytes int off0 = 32; int off1 = 48; int off2 = 64; int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; bool16x8_t mask0, mask1, zero_coeff0, zero_coeff1; int16x8_t round = vec_vsx_ld(0, round_ptr); int16x8_t quant = vec_vsx_ld(0, quant_ptr); int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); int16x8_t scan0 = vec_vsx_ld(0, iscan); int16x8_t scan1 = vec_vsx_ld(16, iscan); int16x8_t thres = vec_sra(dequant, vec_splats((uint16_t)2)); int16x8_t abs_coeff0 = vec_abs(coeff0); int16x8_t abs_coeff1 = vec_abs(coeff1); (void)scan; (void)skip_block; (void)n_coeffs; assert(!skip_block); mask0 = vec_cmpge(abs_coeff0, thres); round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); // First set of 8 coeff starts with DC + 7 AC qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); qcoeff0 = vec_and(qcoeff0, mask0); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); vec_vsx_st(qcoeff0, 0, qcoeff_ptr); dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); // Remove DC value from thres, round, quant and dequant thres = vec_splat(thres, 1); round = vec_splat(round, 1); quant = vec_splat(quant, 1); dequant = vec_splat(dequant, 1); mask1 = vec_cmpge(abs_coeff1, thres); // Second set of 8 coeff starts with (all AC) qcoeff1 = vec_madds(vec_vaddshs(vec_abs(coeff1), round), quant, vec_zeros_s16); qcoeff1 = vec_and(qcoeff1, mask1); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); qcoeff1 = vec_sign(qcoeff1, coeff1); vec_vsx_st(qcoeff1, 16, qcoeff_ptr); dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(vec_or(scan0, zero_coeff0), vec_or(scan1, zero_coeff1)); do { int16x8_t coeff2, abs_coeff2, qcoeff2, dqcoeff2, eob2, scan2; bool16x8_t zero_coeff2, mask2; coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); scan0 = vec_vsx_ld(off0, iscan); scan1 = vec_vsx_ld(off1, iscan); scan2 = vec_vsx_ld(off2, iscan); abs_coeff0 = vec_abs(coeff0); abs_coeff1 = vec_abs(coeff1); abs_coeff2 = vec_abs(coeff2); qcoeff0 = vec_madds(vec_vaddshs(abs_coeff0, round), quant, vec_zeros_s16); qcoeff1 = vec_madds(vec_vaddshs(abs_coeff1, round), quant, vec_zeros_s16); qcoeff2 = vec_madds(vec_vaddshs(abs_coeff2, round), quant, vec_zeros_s16); mask0 = vec_cmpge(abs_coeff0, thres); mask1 = vec_cmpge(abs_coeff1, thres); mask2 = vec_cmpge(abs_coeff2, thres); qcoeff0 = vec_and(qcoeff0, mask0); qcoeff1 = vec_and(qcoeff1, mask1); qcoeff2 = vec_and(qcoeff2, mask2); zero_coeff0 = vec_cmpeq(qcoeff0, vec_zeros_s16); zero_coeff1 = vec_cmpeq(qcoeff1, vec_zeros_s16); zero_coeff2 = vec_cmpeq(qcoeff2, vec_zeros_s16); qcoeff0 = vec_sign(qcoeff0, coeff0); qcoeff1 = vec_sign(qcoeff1, coeff1); qcoeff2 = vec_sign(qcoeff2, coeff2); vec_vsx_st(qcoeff0, off0, qcoeff_ptr); vec_vsx_st(qcoeff1, off1, qcoeff_ptr); vec_vsx_st(qcoeff2, off2, qcoeff_ptr); dqcoeff0 = dequantize_coeff_32(qcoeff0, dequant); dqcoeff1 = dequantize_coeff_32(qcoeff1, dequant); dqcoeff2 = dequantize_coeff_32(qcoeff2, dequant); vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, vec_or(scan0, zero_coeff0)); eob2 = vec_max(vec_or(scan1, zero_coeff1), vec_or(scan2, zero_coeff2)); eob = vec_max(eob, eob2); off0 += 48; off1 += 48; off2 += 48; num_itr--; } while (num_itr != 0); eob = vec_max_across(eob); *eob_ptr = eob[0] + 1; } libvpx-1.8.2/vp9/encoder/vp9_alt_ref_aq.c000066400000000000000000000034131357355204000202270ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file in the root of the source tree. An additional * intellectual property rights grant can be found in the file PATENTS. * All contributing project authors may be found in the AUTHORS file in * the root of the source tree. */ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_alt_ref_aq.h" struct ALT_REF_AQ { int dummy; }; struct ALT_REF_AQ *vp9_alt_ref_aq_create(void) { return (struct ALT_REF_AQ *)vpx_malloc(sizeof(struct ALT_REF_AQ)); } void vp9_alt_ref_aq_destroy(struct ALT_REF_AQ *const self) { vpx_free(self); } void vp9_alt_ref_aq_upload_map(struct ALT_REF_AQ *const self, const struct MATX_8U *segmentation_map) { (void)self; (void)segmentation_map; } void vp9_alt_ref_aq_set_nsegments(struct ALT_REF_AQ *const self, int nsegments) { (void)self; (void)nsegments; } void vp9_alt_ref_aq_setup_mode(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi) { (void)cpi; (void)self; } // set basic segmentation to the altref's one void vp9_alt_ref_aq_setup_map(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi) { (void)cpi; (void)self; } // restore cpi->aq_mode void vp9_alt_ref_aq_unset_all(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi) { (void)cpi; (void)self; } int vp9_alt_ref_aq_disable_if(const struct ALT_REF_AQ *self, int segmentation_overhead, int bandwidth) { (void)bandwidth; (void)self; (void)segmentation_overhead; return 0; } libvpx-1.8.2/vp9/encoder/vp9_alt_ref_aq.h000066400000000000000000000071511357355204000202370ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file in the root of the source tree. An additional * intellectual property rights grant can be found in the file PATENTS. * All contributing project authors may be found in the AUTHORS file in * the root of the source tree. */ /* * \file vp9_alt_ref_aq.h * * This file contains public interface for setting up adaptive segmentation * for altref frames. Go to alt_ref_aq_private.h for implmentation details. */ #ifndef VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ #define VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ #include "vpx/vpx_integer.h" // Where to disable segmentation #define ALT_REF_AQ_LOW_BITRATE_BOUNDARY 150 // Last frame always has overall quality = 0, // so it is questionable if I can process it #define ALT_REF_AQ_APPLY_TO_LAST_FRAME 1 // If I should try to compare gain // against segmentation overhead #define ALT_REF_AQ_PROTECT_GAIN 0 // Threshold to disable segmentation #define ALT_REF_AQ_PROTECT_GAIN_THRESH 0.5 #ifdef __cplusplus extern "C" { #endif // Simple structure for storing images struct MATX_8U { int rows; int cols; int stride; uint8_t *data; }; struct VP9_COMP; struct ALT_REF_AQ; /*!\brief Constructor * * \return Instance of the class */ struct ALT_REF_AQ *vp9_alt_ref_aq_create(void); /*!\brief Upload segmentation_map to self object * * \param self Instance of the class * \param segmentation_map Segmentation map to upload */ void vp9_alt_ref_aq_upload_map(struct ALT_REF_AQ *const self, const struct MATX_8U *segmentation_map); /*!\brief Return pointer to the altref segmentation map * * \param self Instance of the class * \param segmentation_overhead Segmentation overhead in bytes * \param bandwidth Current frame bandwidth in bytes * * \return Boolean value to disable segmentation */ int vp9_alt_ref_aq_disable_if(const struct ALT_REF_AQ *self, int segmentation_overhead, int bandwidth); /*!\brief Set number of segments * * It is used for delta quantizer computations * and thus it can be larger than * maximum value of the segmentation map * * \param self Instance of the class * \param nsegments Maximum number of segments */ void vp9_alt_ref_aq_set_nsegments(struct ALT_REF_AQ *const self, int nsegments); /*!\brief Set up LOOKAHEAD_AQ segmentation mode * * Set up segmentation mode to LOOKAHEAD_AQ * (expected future frames prediction * quality refering to the current frame). * * \param self Instance of the class * \param cpi Encoder context */ void vp9_alt_ref_aq_setup_mode(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi); /*!\brief Set up LOOKAHEAD_AQ segmentation map and delta quantizers * * \param self Instance of the class * \param cpi Encoder context */ void vp9_alt_ref_aq_setup_map(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi); /*!\brief Restore main segmentation map mode and reset the class variables * * \param self Instance of the class * \param cpi Encoder context */ void vp9_alt_ref_aq_unset_all(struct ALT_REF_AQ *const self, struct VP9_COMP *const cpi); /*!\brief Destructor * * \param self Instance of the class */ void vp9_alt_ref_aq_destroy(struct ALT_REF_AQ *const self); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ALT_REF_AQ_H_ libvpx-1.8.2/vp9/encoder/vp9_aq_360.c000066400000000000000000000046561357355204000171350ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_segmentation.h" static const double rate_ratio[MAX_SEGMENTS] = { 1.0, 0.75, 0.6, 0.5, 0.4, 0.3, 0.25 }; // Sets segment id 0 for the equatorial region, 1 for temperate region // and 2 for the polar regions unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows) { if (mi_row < mi_rows / 8 || mi_row > mi_rows - mi_rows / 8) return 2; else if (mi_row < mi_rows / 4 || mi_row > mi_rows - mi_rows / 4) return 1; else return 0; } void vp9_360aq_frame_setup(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; struct segmentation *seg = &cm->seg; int i; if (frame_is_intra_only(cm) || cpi->force_update_segmentation || cm->error_resilient_mode) { vp9_enable_segmentation(seg); vp9_clearall_segfeatures(seg); seg->abs_delta = SEGMENT_DELTADATA; vpx_clear_system_state(); for (i = 0; i < MAX_SEGMENTS; ++i) { int qindex_delta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, rate_ratio[i], cm->bit_depth); // We don't allow qindex 0 in a segment if the base value is not 0. // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment // Q delta is sometimes applied without going back around the rd loop. // This could lead to an illegal combination of partition size and q. if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { qindex_delta = -cm->base_qindex + 1; } // No need to enable SEG_LVL_ALT_Q for this segment. if (rate_ratio[i] == 1.0) { continue; } vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); } } } libvpx-1.8.2/vp9/encoder/vp9_aq_360.h000066400000000000000000000013641357355204000171330ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_AQ_360_H_ #define VPX_VP9_ENCODER_VP9_AQ_360_H_ #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus extern "C" { #endif unsigned int vp9_360aq_segment_id(int mi_row, int mi_rows); void vp9_360aq_frame_setup(VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_AQ_360_H_ libvpx-1.8.2/vp9/encoder/vp9_aq_complexity.c000066400000000000000000000137061357355204000210160ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_complexity.h" #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_segmentation.h" #define AQ_C_SEGMENTS 5 #define DEFAULT_AQ2_SEG 3 // Neutral Q segment #define AQ_C_STRENGTHS 3 static const double aq_c_q_adj_factor[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { { 1.75, 1.25, 1.05, 1.00, 0.90 }, { 2.00, 1.50, 1.15, 1.00, 0.85 }, { 2.50, 1.75, 1.25, 1.00, 0.80 } }; static const double aq_c_transitions[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { { 0.15, 0.30, 0.55, 2.00, 100.0 }, { 0.20, 0.40, 0.65, 2.00, 100.0 }, { 0.25, 0.50, 0.75, 2.00, 100.0 } }; static const double aq_c_var_thresholds[AQ_C_STRENGTHS][AQ_C_SEGMENTS] = { { -4.0, -3.0, -2.0, 100.00, 100.0 }, { -3.5, -2.5, -1.5, 100.00, 100.0 }, { -3.0, -2.0, -1.0, 100.00, 100.0 } }; static int get_aq_c_strength(int q_index, vpx_bit_depth_t bit_depth) { // Approximate base quatizer (truncated to int) const int base_quant = vp9_ac_quant(q_index, 0, bit_depth) / 4; return (base_quant > 10) + (base_quant > 25); } void vp9_setup_in_frame_q_adj(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; struct segmentation *const seg = &cm->seg; // Make SURE use of floating point in this function is safe. vpx_clear_system_state(); if (frame_is_intra_only(cm) || cm->error_resilient_mode || cpi->refresh_alt_ref_frame || cpi->force_update_segmentation || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { int segment; const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); // Clear down the segment map. memset(cpi->segmentation_map, DEFAULT_AQ2_SEG, cm->mi_rows * cm->mi_cols); vp9_clearall_segfeatures(seg); // Segmentation only makes sense if the target bits per SB is above a // threshold. Below this the overheads will usually outweigh any benefit. if (cpi->rc.sb64_target_rate < 256) { vp9_disable_segmentation(seg); return; } vp9_enable_segmentation(seg); // Select delta coding method. seg->abs_delta = SEGMENT_DELTADATA; // Default segment "Q" feature is disabled so it defaults to the baseline Q. vp9_disable_segfeature(seg, DEFAULT_AQ2_SEG, SEG_LVL_ALT_Q); // Use some of the segments for in frame Q adjustment. for (segment = 0; segment < AQ_C_SEGMENTS; ++segment) { int qindex_delta; if (segment == DEFAULT_AQ2_SEG) continue; qindex_delta = vp9_compute_qdelta_by_rate( &cpi->rc, cm->frame_type, cm->base_qindex, aq_c_q_adj_factor[aq_strength][segment], cm->bit_depth); // For AQ complexity mode, we dont allow Q0 in a segment if the base // Q is not 0. Q0 (lossless) implies 4x4 only and in AQ mode 2 a segment // Q delta is sometimes applied without going back around the rd loop. // This could lead to an illegal combination of partition size and q. if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { qindex_delta = -cm->base_qindex + 1; } if ((cm->base_qindex + qindex_delta) > 0) { vp9_enable_segfeature(seg, segment, SEG_LVL_ALT_Q); vp9_set_segdata(seg, segment, SEG_LVL_ALT_Q, qindex_delta); } } } } #define DEFAULT_LV_THRESH 10.0 #define MIN_DEFAULT_LV_THRESH 8.0 // Select a segment for the current block. // The choice of segment for a block depends on the ratio of the projected // bits for the block vs a target average and its spatial complexity. void vp9_caq_select_segment(VP9_COMP *cpi, MACROBLOCK *mb, BLOCK_SIZE bs, int mi_row, int mi_col, int projected_rate) { VP9_COMMON *const cm = &cpi->common; const int mi_offset = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; const int xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[bs]); const int ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[bs]); int x, y; int i; unsigned char segment; if (0) { segment = DEFAULT_AQ2_SEG; } else { // Rate depends on fraction of a SB64 in frame (xmis * ymis / bw * bh). // It is converted to bits * 256 units. const int target_rate = (cpi->rc.sb64_target_rate * xmis * ymis * 256) / (bw * bh); double logvar; double low_var_thresh; const int aq_strength = get_aq_c_strength(cm->base_qindex, cm->bit_depth); vpx_clear_system_state(); low_var_thresh = (cpi->oxcf.pass == 2) ? VPXMAX(cpi->twopass.mb_av_energy, MIN_DEFAULT_LV_THRESH) : DEFAULT_LV_THRESH; vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); logvar = vp9_log_block_var(cpi, mb, bs); segment = AQ_C_SEGMENTS - 1; // Just in case no break out below. for (i = 0; i < AQ_C_SEGMENTS; ++i) { // Test rate against a threshold value and variance against a threshold. // Increasing segment number (higher variance and complexity) = higher Q. if ((projected_rate < target_rate * aq_c_transitions[aq_strength][i]) && (logvar < (low_var_thresh + aq_c_var_thresholds[aq_strength][i]))) { segment = i; break; } } } // Fill in the entires in the segment map corresponding to this SB64. for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { cpi->segmentation_map[mi_offset + y * cm->mi_cols + x] = segment; } } } libvpx-1.8.2/vp9/encoder/vp9_aq_complexity.h000066400000000000000000000021071357355204000210140ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ #define VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ #ifdef __cplusplus extern "C" { #endif #include "vp9/common/vp9_enums.h" struct VP9_COMP; struct macroblock; // Select a segment for the current Block. void vp9_caq_select_segment(struct VP9_COMP *cpi, struct macroblock *, BLOCK_SIZE bs, int mi_row, int mi_col, int projected_rate); // This function sets up a set of segments with delta Q values around // the baseline frame quantizer. void vp9_setup_in_frame_q_adj(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_AQ_COMPLEXITY_H_ libvpx-1.8.2/vp9/encoder/vp9_aq_cyclicrefresh.c000066400000000000000000000721651357355204000214520ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" static const uint8_t VP9_VAR_OFFS[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols) { size_t last_coded_q_map_size; CYCLIC_REFRESH *const cr = vpx_calloc(1, sizeof(*cr)); if (cr == NULL) return NULL; cr->map = vpx_calloc(mi_rows * mi_cols, sizeof(*cr->map)); if (cr->map == NULL) { vp9_cyclic_refresh_free(cr); return NULL; } last_coded_q_map_size = mi_rows * mi_cols * sizeof(*cr->last_coded_q_map); cr->last_coded_q_map = vpx_malloc(last_coded_q_map_size); if (cr->last_coded_q_map == NULL) { vp9_cyclic_refresh_free(cr); return NULL; } assert(MAXQ <= 255); memset(cr->last_coded_q_map, MAXQ, last_coded_q_map_size); cr->counter_encode_maxq_scene_change = 0; return cr; } void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr) { if (cr != NULL) { vpx_free(cr->map); vpx_free(cr->last_coded_q_map); vpx_free(cr); } } // Check if this coding block, of size bsize, should be considered for refresh // (lower-qp coding). Decision can be based on various factors, such as // size of the coding block (i.e., below min_block size rejected), coding // mode, and rate/distortion. static int candidate_refresh_aq(const CYCLIC_REFRESH *cr, const MODE_INFO *mi, int64_t rate, int64_t dist, int bsize) { MV mv = mi->mv[0].as_mv; // Reject the block for lower-qp coding if projected distortion // is above the threshold, and any of the following is true: // 1) mode uses large mv // 2) mode is an intra-mode // Otherwise accept for refresh. if (dist > cr->thresh_dist_sb && (mv.row > cr->motion_thresh || mv.row < -cr->motion_thresh || mv.col > cr->motion_thresh || mv.col < -cr->motion_thresh || !is_inter_block(mi))) return CR_SEGMENT_ID_BASE; else if (bsize >= BLOCK_16X16 && rate < cr->thresh_rate_sb && is_inter_block(mi) && mi->mv[0].as_int == 0 && cr->rate_boost_fac > 10) // More aggressive delta-q for bigger blocks with zero motion. return CR_SEGMENT_ID_BOOST2; else return CR_SEGMENT_ID_BOOST1; } // Compute delta-q for the segment. static int compute_deltaq(const VP9_COMP *cpi, int q, double rate_factor) { const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const RATE_CONTROL *const rc = &cpi->rc; int deltaq = vp9_compute_qdelta_by_rate(rc, cpi->common.frame_type, q, rate_factor, cpi->common.bit_depth); if ((-deltaq) > cr->max_qdelta_perc * q / 100) { deltaq = -cr->max_qdelta_perc * q / 100; } return deltaq; } // For the just encoded frame, estimate the bits, incorporating the delta-q // from non-base segment. For now ignore effect of multiple segments // (with different delta-q). Note this function is called in the postencode // (called from rc_update_rate_correction_factors()). int vp9_cyclic_refresh_estimate_bits_at_q(const VP9_COMP *cpi, double correction_factor) { const VP9_COMMON *const cm = &cpi->common; const CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int estimated_bits; int mbs = cm->MBs; int num8x8bl = mbs << 2; // Weight for non-base segments: use actual number of blocks refreshed in // previous/just encoded frame. Note number of blocks here is in 8x8 units. double weight_segment1 = (double)cr->actual_num_seg1_blocks / num8x8bl; double weight_segment2 = (double)cr->actual_num_seg2_blocks / num8x8bl; // Take segment weighted average for estimated bits. estimated_bits = (int)((1.0 - weight_segment1 - weight_segment2) * vp9_estimate_bits_at_q(cm->frame_type, cm->base_qindex, mbs, correction_factor, cm->bit_depth) + weight_segment1 * vp9_estimate_bits_at_q(cm->frame_type, cm->base_qindex + cr->qindex_delta[1], mbs, correction_factor, cm->bit_depth) + weight_segment2 * vp9_estimate_bits_at_q(cm->frame_type, cm->base_qindex + cr->qindex_delta[2], mbs, correction_factor, cm->bit_depth)); return estimated_bits; } // Prior to encoding the frame, estimate the bits per mb, for a given q = i and // a corresponding delta-q (for segment 1). This function is called in the // rc_regulate_q() to set the base qp index. // Note: the segment map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or // to 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock, prior to encoding. int vp9_cyclic_refresh_rc_bits_per_mb(const VP9_COMP *cpi, int i, double correction_factor) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int bits_per_mb; int deltaq = 0; if (cpi->oxcf.speed < 8) deltaq = compute_deltaq(cpi, i, cr->rate_ratio_qdelta); else deltaq = -(cr->max_qdelta_perc * i) / 200; // Take segment weighted average for bits per mb. bits_per_mb = (int)((1.0 - cr->weight_segment) * vp9_rc_bits_per_mb(cm->frame_type, i, correction_factor, cm->bit_depth) + cr->weight_segment * vp9_rc_bits_per_mb(cm->frame_type, i + deltaq, correction_factor, cm->bit_depth)); return bits_per_mb; } // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. void vp9_cyclic_refresh_update_segment(VP9_COMP *const cpi, MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, int skip, struct macroblock_plane *const p) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; int refresh_this_block = candidate_refresh_aq(cr, mi, rate, dist, bsize); // Default is to not update the refresh map. int new_map_value = cr->map[block_index]; int x = 0; int y = 0; int is_skin = 0; if (refresh_this_block == 0 && bsize <= BLOCK_16X16 && cpi->use_skin_detection) { is_skin = vp9_compute_skin_block(p[0].src.buf, p[1].src.buf, p[2].src.buf, p[0].src.stride, p[1].src.stride, bsize, 0, 0); if (is_skin) refresh_this_block = 1; } if (cpi->oxcf.rc_mode == VPX_VBR && mi->ref_frame[0] == GOLDEN_FRAME) refresh_this_block = 0; // If this block is labeled for refresh, check if we should reset the // segment_id. if (cpi->sf.use_nonrd_pick_mode && cyclic_refresh_segment_id_boosted(mi->segment_id)) { mi->segment_id = refresh_this_block; // Reset segment_id if it will be skipped. if (skip) mi->segment_id = CR_SEGMENT_ID_BASE; } // Update the cyclic refresh map, to be used for setting segmentation map // for the next frame. If the block will be refreshed this frame, mark it // as clean. The magnitude of the -ve influences how long before we consider // it for refresh again. if (cyclic_refresh_segment_id_boosted(mi->segment_id)) { new_map_value = -cr->time_for_refresh; } else if (refresh_this_block) { // Else if it is accepted as candidate for refresh, and has not already // been refreshed (marked as 1) then mark it as a candidate for cleanup // for future time (marked as 0), otherwise don't update it. if (cr->map[block_index] == 1) new_map_value = 0; } else { // Leave it marked as block that is not candidate for refresh. new_map_value = 1; } // Update entries in the cyclic refresh map with new_map_value, and // copy mbmi->segment_id into global segmentation map. for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; cr->map[map_offset] = new_map_value; cpi->segmentation_map[map_offset] = mi->segment_id; } } void vp9_cyclic_refresh_update_sb_postencode(VP9_COMP *const cpi, const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; int x, y; for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; // Inter skip blocks were clearly not coded at the current qindex, so // don't update the map for them. For cases where motion is non-zero or // the reference frame isn't the previous frame, the previous value in // the map for this spatial location is not entirely correct. if ((!is_inter_block(mi) || !mi->skip) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ); } else if (is_inter_block(mi) && mi->skip && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { cr->last_coded_q_map[map_offset] = VPXMIN( clamp(cm->base_qindex + cr->qindex_delta[mi->segment_id], 0, MAXQ), cr->last_coded_q_map[map_offset]); } } } // From the just encoded frame: update the actual number of blocks that were // applied the segment delta q, and the amount of low motion in the frame. // Also check conditions for forcing golden update, or preventing golden // update if the period is up. void vp9_cyclic_refresh_postencode(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi = cm->mi_grid_visible; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; RATE_CONTROL *const rc = &cpi->rc; unsigned char *const seg_map = cpi->segmentation_map; double fraction_low = 0.0; int force_gf_refresh = 0; int low_content_frame = 0; int mi_row, mi_col; cr->actual_num_seg1_blocks = 0; cr->actual_num_seg2_blocks = 0; for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { MV mv = mi[0]->mv[0].as_mv; int map_index = mi_row * cm->mi_cols + mi_col; if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST1) cr->actual_num_seg1_blocks++; else if (cyclic_refresh_segment_id(seg_map[map_index]) == CR_SEGMENT_ID_BOOST2) cr->actual_num_seg2_blocks++; // Accumulate low_content_frame. if (is_inter_block(mi[0]) && abs(mv.row) < 16 && abs(mv.col) < 16) low_content_frame++; mi++; } mi += 8; } // Check for golden frame update: only for non-SVC and non-golden boost. if (!cpi->use_svc && cpi->ext_refresh_frame_flags_pending == 0 && !cpi->oxcf.gf_cbr_boost_pct) { // Force this frame as a golden update frame if this frame changes the // resolution (resize_pending != 0). if (cpi->resize_pending != 0) { vp9_cyclic_refresh_set_golden_update(cpi); rc->frames_till_gf_update_due = rc->baseline_gf_interval; if (rc->frames_till_gf_update_due > rc->frames_to_key) rc->frames_till_gf_update_due = rc->frames_to_key; cpi->refresh_golden_frame = 1; force_gf_refresh = 1; } // Update average of low content/motion in the frame. fraction_low = (double)low_content_frame / (cm->mi_rows * cm->mi_cols); cr->low_content_avg = (fraction_low + 3 * cr->low_content_avg) / 4; if (!force_gf_refresh && cpi->refresh_golden_frame == 1 && rc->frames_since_key > rc->frames_since_golden + 1) { // Don't update golden reference if the amount of low_content for the // current encoded frame is small, or if the recursive average of the // low_content over the update interval window falls below threshold. if (fraction_low < 0.65 || cr->low_content_avg < 0.6) { cpi->refresh_golden_frame = 0; } // Reset for next internal. cr->low_content_avg = fraction_low; } } } // Set golden frame update interval, for non-svc 1 pass CBR mode. void vp9_cyclic_refresh_set_golden_update(VP9_COMP *const cpi) { RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; // Set minimum gf_interval for GF update to a multiple of the refresh period, // with some max limit. Depending on past encoding stats, GF flag may be // reset and update may not occur until next baseline_gf_interval. if (cr->percent_refresh > 0) rc->baseline_gf_interval = VPXMIN(4 * (100 / cr->percent_refresh), 40); else rc->baseline_gf_interval = 40; if (cpi->oxcf.rc_mode == VPX_VBR) rc->baseline_gf_interval = 20; if (rc->avg_frame_low_motion < 50 && rc->frames_since_key > 40) rc->baseline_gf_interval = 10; } static int is_superblock_flat_static(VP9_COMP *const cpi, int sb_row_index, int sb_col_index) { unsigned int source_variance; const uint8_t *src_y = cpi->Source->y_buffer; const int ystride = cpi->Source->y_stride; unsigned int sse; const BLOCK_SIZE bsize = BLOCK_64X64; src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6); source_variance = cpi->fn_ptr[bsize].vf(src_y, ystride, VP9_VAR_OFFS, 0, &sse); if (source_variance == 0) { uint64_t block_sad; const uint8_t *last_src_y = cpi->Last_Source->y_buffer; const int last_ystride = cpi->Last_Source->y_stride; last_src_y += (sb_row_index << 6) * ystride + (sb_col_index << 6); block_sad = cpi->fn_ptr[bsize].sdf(src_y, ystride, last_src_y, last_ystride); if (block_sad == 0) return 1; } return 0; } // Update the segmentation map, and related quantities: cyclic refresh map, // refresh sb_index, and target number of blocks to be refreshed. // The map is set to either 0/CR_SEGMENT_ID_BASE (no refresh) or to // 1/CR_SEGMENT_ID_BOOST1 (refresh) for each superblock. // Blocks labeled as BOOST1 may later get set to BOOST2 (during the // encoding of the superblock). static void cyclic_refresh_update_map(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; int i, block_count, bl_index, sb_rows, sb_cols, sbs_in_frame; int xmis, ymis, x, y; int consec_zero_mv_thresh = 0; int qindex_thresh = 0; int count_sel = 0; int count_tot = 0; memset(seg_map, CR_SEGMENT_ID_BASE, cm->mi_rows * cm->mi_cols); sb_cols = (cm->mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sb_rows = (cm->mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; sbs_in_frame = sb_cols * sb_rows; // Number of target blocks to get the q delta (segment 1). block_count = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; // Set the segmentation map: cycle through the superblocks, starting at // cr->mb_index, and stopping when either block_count blocks have been found // to be refreshed, or we have passed through whole frame. assert(cr->sb_index < sbs_in_frame); i = cr->sb_index; cr->target_num_seg_blocks = 0; if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) { consec_zero_mv_thresh = 100; } qindex_thresh = cpi->oxcf.content == VP9E_CONTENT_SCREEN ? vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST2, cm->base_qindex) : vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex); // More aggressive settings for noisy content. if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { consec_zero_mv_thresh = 60; qindex_thresh = VPXMAX(vp9_get_qindex(&cm->seg, CR_SEGMENT_ID_BOOST1, cm->base_qindex), cm->base_qindex); } do { int sum_map = 0; int consec_zero_mv_thresh_block = consec_zero_mv_thresh; // Get the mi_row/mi_col corresponding to superblock index i. int sb_row_index = (i / sb_cols); int sb_col_index = i - sb_row_index * sb_cols; int mi_row = sb_row_index * MI_BLOCK_SIZE; int mi_col = sb_col_index * MI_BLOCK_SIZE; int flat_static_blocks = 0; int compute_content = 1; assert(mi_row >= 0 && mi_row < cm->mi_rows); assert(mi_col >= 0 && mi_col < cm->mi_cols); #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) compute_content = 0; #endif if (cpi->Last_Source == NULL || cpi->Last_Source->y_width != cpi->Source->y_width || cpi->Last_Source->y_height != cpi->Source->y_height) compute_content = 0; bl_index = mi_row * cm->mi_cols + mi_col; // Loop through all 8x8 blocks in superblock and update map. xmis = VPXMIN(cm->mi_cols - mi_col, num_8x8_blocks_wide_lookup[BLOCK_64X64]); ymis = VPXMIN(cm->mi_rows - mi_row, num_8x8_blocks_high_lookup[BLOCK_64X64]); if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium && (xmis <= 2 || ymis <= 2)) consec_zero_mv_thresh_block = 4; for (y = 0; y < ymis; y++) { for (x = 0; x < xmis; x++) { const int bl_index2 = bl_index + y * cm->mi_cols + x; // If the block is as a candidate for clean up then mark it // for possible boost/refresh (segment 1). The segment id may get // reset to 0 later depending on the coding mode. if (cr->map[bl_index2] == 0) { count_tot++; if (cr->last_coded_q_map[bl_index2] > qindex_thresh || cpi->consec_zero_mv[bl_index2] < consec_zero_mv_thresh_block) { sum_map++; count_sel++; } } else if (cr->map[bl_index2] < 0) { cr->map[bl_index2]++; } } } // Enforce constant segment over superblock. // If segment is at least half of superblock, set to 1. if (sum_map >= xmis * ymis / 2) { // This superblock is a candidate for refresh: // compute spatial variance and exclude blocks that are spatially flat // and stationary. Note: this is currently only done for screne content // mode. if (compute_content && cr->skip_flat_static_blocks) flat_static_blocks = is_superblock_flat_static(cpi, sb_row_index, sb_col_index); if (!flat_static_blocks) { // Label this superblock as segment 1. for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { seg_map[bl_index + y * cm->mi_cols + x] = CR_SEGMENT_ID_BOOST1; } cr->target_num_seg_blocks += xmis * ymis; } } i++; if (i == sbs_in_frame) { i = 0; } } while (cr->target_num_seg_blocks < block_count && i != cr->sb_index); cr->sb_index = i; cr->reduce_refresh = 0; if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) if (count_sel<(3 * count_tot)>> 2) cr->reduce_refresh = 1; } // Set cyclic refresh parameters. void vp9_cyclic_refresh_update_parameters(VP9_COMP *const cpi) { const RATE_CONTROL *const rc = &cpi->rc; const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int num8x8bl = cm->MBs << 2; int target_refresh = 0; double weight_segment_target = 0; double weight_segment = 0; int thresh_low_motion = 20; int qp_thresh = VPXMIN((cpi->oxcf.content == VP9E_CONTENT_SCREEN) ? 35 : 20, rc->best_quality << 1); int qp_max_thresh = 117 * MAXQ >> 7; cr->apply_cyclic_refresh = 1; if (frame_is_intra_only(cm) || cpi->svc.temporal_layer_id > 0 || is_lossless_requested(&cpi->oxcf) || rc->avg_frame_qindex[INTER_FRAME] < qp_thresh || (cpi->use_svc && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) || (!cpi->use_svc && rc->avg_frame_low_motion < thresh_low_motion && rc->frames_since_key > 40) || (!cpi->use_svc && rc->avg_frame_qindex[INTER_FRAME] > qp_max_thresh && rc->frames_since_key > 20)) { cr->apply_cyclic_refresh = 0; return; } cr->percent_refresh = 10; if (cr->reduce_refresh) cr->percent_refresh = 5; cr->max_qdelta_perc = 60; cr->time_for_refresh = 0; cr->motion_thresh = 32; cr->rate_boost_fac = 15; // Use larger delta-qp (increase rate_ratio_qdelta) for first few (~4) // periods of the refresh cycle, after a key frame. // Account for larger interval on base layer for temporal layers. if (cr->percent_refresh > 0 && rc->frames_since_key < (4 * cpi->svc.number_temporal_layers) * (100 / cr->percent_refresh)) { cr->rate_ratio_qdelta = 3.0; } else { cr->rate_ratio_qdelta = 2.0; if (cpi->noise_estimate.enabled && cpi->noise_estimate.level >= kMedium) { // Reduce the delta-qp if the estimated source noise is above threshold. cr->rate_ratio_qdelta = 1.7; cr->rate_boost_fac = 13; } } // For screen-content: keep rate_ratio_qdelta to 2.0 (segment#1 boost) and // percent_refresh (refresh rate) to 10. But reduce rate boost for segment#2 // (rate_boost_fac = 10 disables segment#2). if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { // Only enable feature of skipping flat_static blocks for top layer // under screen content mode. if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) cr->skip_flat_static_blocks = 1; cr->percent_refresh = (cr->skip_flat_static_blocks) ? 5 : 10; // Increase the amount of refresh on scene change that is encoded at max Q, // increase for a few cycles of the refresh period (~100 / percent_refresh). if (cr->counter_encode_maxq_scene_change < 30) cr->percent_refresh = (cr->skip_flat_static_blocks) ? 10 : 15; cr->rate_ratio_qdelta = 2.0; cr->rate_boost_fac = 10; } // Adjust some parameters for low resolutions. if (cm->width * cm->height <= 352 * 288) { if (rc->avg_frame_bandwidth < 3000) { cr->motion_thresh = 64; cr->rate_boost_fac = 13; } else { cr->max_qdelta_perc = 70; cr->rate_ratio_qdelta = VPXMAX(cr->rate_ratio_qdelta, 2.5); } } if (cpi->oxcf.rc_mode == VPX_VBR) { // To be adjusted for VBR mode, e.g., based on gf period and boost. // For now use smaller qp-delta (than CBR), no second boosted seg, and // turn-off (no refresh) on golden refresh (since it's already boosted). cr->percent_refresh = 10; cr->rate_ratio_qdelta = 1.5; cr->rate_boost_fac = 10; if (cpi->refresh_golden_frame == 1) { cr->percent_refresh = 0; cr->rate_ratio_qdelta = 1.0; } } // Weight for segment prior to encoding: take the average of the target // number for the frame to be encoded and the actual from the previous frame. // Use the target if its less. To be used for setting the base qp for the // frame in vp9_rc_regulate_q. target_refresh = cr->percent_refresh * cm->mi_rows * cm->mi_cols / 100; weight_segment_target = (double)(target_refresh) / num8x8bl; weight_segment = (double)((target_refresh + cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) >> 1) / num8x8bl; if (weight_segment_target < 7 * weight_segment / 8) weight_segment = weight_segment_target; // For screen-content: don't include target for the weight segment, // since for all flat areas the segment is reset, so its more accurate // to just use the previous actual number of seg blocks for the weight. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) weight_segment = (double)(cr->actual_num_seg1_blocks + cr->actual_num_seg2_blocks) / num8x8bl; cr->weight_segment = weight_segment; } // Setup cyclic background refresh: set delta q and segmentation map. void vp9_cyclic_refresh_setup(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; struct segmentation *const seg = &cm->seg; int scene_change_detected = cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe); if (cm->current_video_frame == 0) cr->low_content_avg = 0.0; // Reset if resoluton change has occurred. if (cpi->resize_pending != 0) vp9_cyclic_refresh_reset_resize(cpi); if (!cr->apply_cyclic_refresh || (cpi->force_update_segmentation) || scene_change_detected) { // Set segmentation map to 0 and disable. unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); vp9_disable_segmentation(&cm->seg); if (cm->frame_type == KEY_FRAME || scene_change_detected) { memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cr->reduce_refresh = 0; cr->counter_encode_maxq_scene_change = 0; } return; } else { int qindex_delta = 0; int qindex2; const double q = vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth); cr->counter_encode_maxq_scene_change++; vpx_clear_system_state(); // Set rate threshold to some multiple (set to 2 for now) of the target // rate (target is given by sb64_target_rate and scaled by 256). cr->thresh_rate_sb = ((int64_t)(rc->sb64_target_rate) << 8) << 2; // Distortion threshold, quadratic in Q, scale factor to be adjusted. // q will not exceed 457, so (q * q) is within 32bit; see: // vp9_convert_qindex_to_q(), vp9_ac_quant(), ac_qlookup*[]. cr->thresh_dist_sb = ((int64_t)(q * q)) << 2; // Set up segmentation. // Clear down the segment map. vp9_enable_segmentation(&cm->seg); vp9_clearall_segfeatures(seg); // Select delta coding method. seg->abs_delta = SEGMENT_DELTADATA; // Note: setting temporal_update has no effect, as the seg-map coding method // (temporal or spatial) is determined in vp9_choose_segmap_coding_method(), // based on the coding cost of each method. For error_resilient mode on the // last_frame_seg_map is set to 0, so if temporal coding is used, it is // relative to 0 previous map. // seg->temporal_update = 0; // Segment BASE "Q" feature is disabled so it defaults to the baseline Q. vp9_disable_segfeature(seg, CR_SEGMENT_ID_BASE, SEG_LVL_ALT_Q); // Use segment BOOST1 for in-frame Q adjustment. vp9_enable_segfeature(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q); // Use segment BOOST2 for more aggressive in-frame Q adjustment. vp9_enable_segfeature(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q); // Set the q delta for segment BOOST1. qindex_delta = compute_deltaq(cpi, cm->base_qindex, cr->rate_ratio_qdelta); cr->qindex_delta[1] = qindex_delta; // Compute rd-mult for segment BOOST1. qindex2 = clamp(cm->base_qindex + cm->y_dc_delta_q + qindex_delta, 0, MAXQ); cr->rdmult = vp9_compute_rd_mult(cpi, qindex2); vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST1, SEG_LVL_ALT_Q, qindex_delta); // Set a more aggressive (higher) q delta for segment BOOST2. qindex_delta = compute_deltaq( cpi, cm->base_qindex, VPXMIN(CR_MAX_RATE_TARGET_RATIO, 0.1 * cr->rate_boost_fac * cr->rate_ratio_qdelta)); cr->qindex_delta[2] = qindex_delta; vp9_set_segdata(seg, CR_SEGMENT_ID_BOOST2, SEG_LVL_ALT_Q, qindex_delta); // Update the segmentation and refresh map. cyclic_refresh_update_map(cpi); } } int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr) { return cr->rdmult; } void vp9_cyclic_refresh_reset_resize(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; memset(cr->map, 0, cm->mi_rows * cm->mi_cols); memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; cr->counter_encode_maxq_scene_change = 0; } void vp9_cyclic_refresh_limit_q(const VP9_COMP *cpi, int *q) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; // For now apply hard limit to frame-level decrease in q, if the cyclic // refresh is active (percent_refresh > 0). if (cr->percent_refresh > 0 && cpi->rc.q_1_frame - *q > 8) { *q = cpi->rc.q_1_frame - 8; } } libvpx-1.8.2/vp9/encoder/vp9_aq_cyclicrefresh.h000066400000000000000000000126651357355204000214560ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ #define VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_skin_detection.h" #ifdef __cplusplus extern "C" { #endif // The segment ids used in cyclic refresh: from base (no boost) to increasing // boost (higher delta-qp). #define CR_SEGMENT_ID_BASE 0 #define CR_SEGMENT_ID_BOOST1 1 #define CR_SEGMENT_ID_BOOST2 2 // Maximum rate target ratio for setting segment delta-qp. #define CR_MAX_RATE_TARGET_RATIO 4.0 struct CYCLIC_REFRESH { // Percentage of blocks per frame that are targeted as candidates // for cyclic refresh. int percent_refresh; // Maximum q-delta as percentage of base q. int max_qdelta_perc; // Superblock starting index for cycling through the frame. int sb_index; // Controls how long block will need to wait to be refreshed again, in // excess of the cycle time, i.e., in the case of all zero motion, block // will be refreshed every (100/percent_refresh + time_for_refresh) frames. int time_for_refresh; // Target number of (8x8) blocks that are set for delta-q. int target_num_seg_blocks; // Actual number of (8x8) blocks that were applied delta-q. int actual_num_seg1_blocks; int actual_num_seg2_blocks; // RD mult. parameters for segment 1. int rdmult; // Cyclic refresh map. signed char *map; // Map of the last q a block was coded at. uint8_t *last_coded_q_map; // Thresholds applied to the projected rate/distortion of the coding block, // when deciding whether block should be refreshed. int64_t thresh_rate_sb; int64_t thresh_dist_sb; // Threshold applied to the motion vector (in units of 1/8 pel) of the // coding block, when deciding whether block should be refreshed. int16_t motion_thresh; // Rate target ratio to set q delta. double rate_ratio_qdelta; // Boost factor for rate target ratio, for segment CR_SEGMENT_ID_BOOST2. int rate_boost_fac; double low_content_avg; int qindex_delta[3]; int reduce_refresh; double weight_segment; int apply_cyclic_refresh; int counter_encode_maxq_scene_change; int skip_flat_static_blocks; }; struct VP9_COMP; typedef struct CYCLIC_REFRESH CYCLIC_REFRESH; CYCLIC_REFRESH *vp9_cyclic_refresh_alloc(int mi_rows, int mi_cols); void vp9_cyclic_refresh_free(CYCLIC_REFRESH *cr); // Estimate the bits, incorporating the delta-q from segment 1, after encoding // the frame. int vp9_cyclic_refresh_estimate_bits_at_q(const struct VP9_COMP *cpi, double correction_factor); // Estimate the bits per mb, for a given q = i and a corresponding delta-q // (for segment 1), prior to encoding the frame. int vp9_cyclic_refresh_rc_bits_per_mb(const struct VP9_COMP *cpi, int i, double correction_factor); // Prior to coding a given prediction block, of size bsize at (mi_row, mi_col), // check if we should reset the segment_id, and update the cyclic_refresh map // and segmentation map. void vp9_cyclic_refresh_update_segment(struct VP9_COMP *const cpi, MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize, int64_t rate, int64_t dist, int skip, struct macroblock_plane *const p); void vp9_cyclic_refresh_update_sb_postencode(struct VP9_COMP *const cpi, const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize); // From the just encoded frame: update the actual number of blocks that were // applied the segment delta q, and the amount of low motion in the frame. // Also check conditions for forcing golden update, or preventing golden // update if the period is up. void vp9_cyclic_refresh_postencode(struct VP9_COMP *const cpi); // Set golden frame update interval, for non-svc 1 pass CBR mode. void vp9_cyclic_refresh_set_golden_update(struct VP9_COMP *const cpi); // Set/update global/frame level refresh parameters. void vp9_cyclic_refresh_update_parameters(struct VP9_COMP *const cpi); // Setup cyclic background refresh: set delta q and segmentation map. void vp9_cyclic_refresh_setup(struct VP9_COMP *const cpi); int vp9_cyclic_refresh_get_rdmult(const CYCLIC_REFRESH *cr); void vp9_cyclic_refresh_reset_resize(struct VP9_COMP *const cpi); static INLINE int cyclic_refresh_segment_id_boosted(int segment_id) { return segment_id == CR_SEGMENT_ID_BOOST1 || segment_id == CR_SEGMENT_ID_BOOST2; } static INLINE int cyclic_refresh_segment_id(int segment_id) { if (segment_id == CR_SEGMENT_ID_BOOST1) return CR_SEGMENT_ID_BOOST1; else if (segment_id == CR_SEGMENT_ID_BOOST2) return CR_SEGMENT_ID_BOOST2; else return CR_SEGMENT_ID_BASE; } void vp9_cyclic_refresh_limit_q(const struct VP9_COMP *cpi, int *q); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_AQ_CYCLICREFRESH_H_ libvpx-1.8.2/vp9/encoder/vp9_aq_variance.c000066400000000000000000000171551357355204000204130ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_segmentation.h" #define ENERGY_MIN (-4) #define ENERGY_MAX (1) #define ENERGY_SPAN (ENERGY_MAX - ENERGY_MIN + 1) #define ENERGY_IN_BOUNDS(energy) \ assert((energy) >= ENERGY_MIN && (energy) <= ENERGY_MAX) static const double rate_ratio[MAX_SEGMENTS] = { 2.5, 2.0, 1.5, 1.0, 0.75, 1.0, 1.0, 1.0 }; static const int segment_id[ENERGY_SPAN] = { 0, 1, 1, 2, 3, 4 }; #define SEGMENT_ID(i) segment_id[(i)-ENERGY_MIN] DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, static const uint16_t, vp9_highbd_64_zeros[64]) = { 0 }; #endif unsigned int vp9_vaq_segment_id(int energy) { ENERGY_IN_BOUNDS(energy); return SEGMENT_ID(energy); } void vp9_vaq_frame_setup(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; struct segmentation *seg = &cm->seg; int i; if (frame_is_intra_only(cm) || cm->error_resilient_mode || cpi->refresh_alt_ref_frame || cpi->force_update_segmentation || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { vp9_enable_segmentation(seg); vp9_clearall_segfeatures(seg); seg->abs_delta = SEGMENT_DELTADATA; vpx_clear_system_state(); for (i = 0; i < MAX_SEGMENTS; ++i) { int qindex_delta = vp9_compute_qdelta_by_rate(&cpi->rc, cm->frame_type, cm->base_qindex, rate_ratio[i], cm->bit_depth); // We don't allow qindex 0 in a segment if the base value is not 0. // Q index 0 (lossless) implies 4x4 encoding only and in AQ mode a segment // Q delta is sometimes applied without going back around the rd loop. // This could lead to an illegal combination of partition size and q. if ((cm->base_qindex != 0) && ((cm->base_qindex + qindex_delta) == 0)) { qindex_delta = -cm->base_qindex + 1; } // No need to enable SEG_LVL_ALT_Q for this segment. if (rate_ratio[i] == 1.0) { continue; } vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, qindex_delta); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); } } } /* TODO(agrange, paulwilkins): The block_variance calls the unoptimized versions * of variance() and highbd_8_variance(). It should not. */ static void aq_variance(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int w, int h, unsigned int *sse, int *sum) { int i, j; *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } a += a_stride; b += b_stride; } } #if CONFIG_VP9_HIGHBITDEPTH static void aq_highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); uint16_t *b = CONVERT_TO_SHORTPTR(b8); *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } a += a_stride; b += b_stride; } } #endif // CONFIG_VP9_HIGHBITDEPTH static unsigned int block_variance(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { MACROBLOCKD *xd = &x->e_mbd; unsigned int var, sse; int right_overflow = (xd->mb_to_right_edge < 0) ? ((-xd->mb_to_right_edge) >> 3) : 0; int bottom_overflow = (xd->mb_to_bottom_edge < 0) ? ((-xd->mb_to_bottom_edge) >> 3) : 0; if (right_overflow || bottom_overflow) { const int bw = 8 * num_8x8_blocks_wide_lookup[bs] - right_overflow; const int bh = 8 * num_8x8_blocks_high_lookup[bs] - bottom_overflow; int avg; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint64_t sse64 = 0; int64_t sum64 = 0; aq_highbd_variance64(x->plane[0].src.buf, x->plane[0].src.stride, CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, bw, bh, &sse64, &sum64); sse = (unsigned int)(sse64 >> (2 * (xd->bd - 8))); avg = (int)(sum64 >> (xd->bd - 8)); } else { aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, bw, bh, &sse, &avg); } #else aq_variance(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, bw, bh, &sse, &avg); #endif // CONFIG_VP9_HIGHBITDEPTH var = sse - (unsigned int)(((int64_t)avg * avg) / (bw * bh)); return (unsigned int)(((uint64_t)256 * var) / (bw * bh)); } else { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, CONVERT_TO_BYTEPTR(vp9_highbd_64_zeros), 0, &sse); } else { var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); } #else var = cpi->fn_ptr[bs].vf(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); #endif // CONFIG_VP9_HIGHBITDEPTH return (unsigned int)(((uint64_t)256 * var) >> num_pels_log2_lookup[bs]); } } double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { unsigned int var = block_variance(cpi, x, bs); vpx_clear_system_state(); return log(var + 1.0); } // Get the range of sub block energy values; void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bsize, int *min_e, int *max_e) { VP9_COMMON *const cm = &cpi->common; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); int x, y; if (xmis < bw || ymis < bh) { vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); *min_e = vp9_block_energy(cpi, mb, bsize); *max_e = *min_e; } else { int energy; *min_e = ENERGY_MAX; *max_e = ENERGY_MIN; for (y = 0; y < ymis; ++y) { for (x = 0; x < xmis; ++x) { vp9_setup_src_planes(mb, cpi->Source, mi_row + y, mi_col + x); energy = vp9_block_energy(cpi, mb, BLOCK_8X8); *min_e = VPXMIN(*min_e, energy); *max_e = VPXMAX(*max_e, energy); } } } // Re-instate source pointers back to what they should have been on entry. vp9_setup_src_planes(mb, cpi->Source, mi_row, mi_col); } #define DEFAULT_E_MIDPOINT 10.0 int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs) { double energy; double energy_midpoint; vpx_clear_system_state(); energy_midpoint = (cpi->oxcf.pass == 2) ? cpi->twopass.mb_av_energy : DEFAULT_E_MIDPOINT; energy = vp9_log_block_var(cpi, x, bs) - energy_midpoint; return clamp((int)round(energy), ENERGY_MIN, ENERGY_MAX); } libvpx-1.8.2/vp9/encoder/vp9_aq_variance.h000066400000000000000000000020721357355204000204100ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ #define VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus extern "C" { #endif unsigned int vp9_vaq_segment_id(int energy); void vp9_vaq_frame_setup(VP9_COMP *cpi); void vp9_get_sub_block_energy(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bsize, int *min_e, int *max_e); int vp9_block_energy(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); double vp9_log_block_var(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bs); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_AQ_VARIANCE_H_ libvpx-1.8.2/vp9/encoder/vp9_bitstream.c000066400000000000000000001415211357355204000201270ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vpx/vpx_encoder.h" #include "vpx_dsp/bitwriter_buffer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/system_state.h" #if CONFIG_BITSTREAM_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_bitstream.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_subexp.h" #include "vp9/encoder/vp9_tokenize.h" static const struct vp9_token intra_mode_encodings[INTRA_MODES] = { { 0, 1 }, { 6, 3 }, { 28, 5 }, { 30, 5 }, { 58, 6 }, { 59, 6 }, { 126, 7 }, { 127, 7 }, { 62, 6 }, { 2, 2 } }; static const struct vp9_token switchable_interp_encodings[SWITCHABLE_FILTERS] = { { 0, 1 }, { 2, 2 }, { 3, 2 } }; static const struct vp9_token partition_encodings[PARTITION_TYPES] = { { 0, 1 }, { 2, 2 }, { 6, 3 }, { 7, 3 } }; static const struct vp9_token inter_mode_encodings[INTER_MODES] = { { 2, 2 }, { 6, 3 }, { 0, 1 }, { 7, 3 } }; static void write_intra_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { vp9_write_token(w, vp9_intra_mode_tree, probs, &intra_mode_encodings[mode]); } static void write_inter_mode(vpx_writer *w, PREDICTION_MODE mode, const vpx_prob *probs) { assert(is_inter_mode(mode)); vp9_write_token(w, vp9_inter_mode_tree, probs, &inter_mode_encodings[INTER_OFFSET(mode)]); } static void encode_unsigned_max(struct vpx_write_bit_buffer *wb, int data, int max) { vpx_wb_write_literal(wb, data, get_unsigned_bits(max)); } static void prob_diff_update(const vpx_tree_index *tree, vpx_prob probs[/*n - 1*/], const unsigned int counts[/*n - 1*/], int n, vpx_writer *w) { int i; unsigned int branch_ct[32][2]; // Assuming max number of probabilities <= 32 assert(n <= 32); vp9_tree_probs_from_distribution(tree, branch_ct, counts); for (i = 0; i < n - 1; ++i) vp9_cond_prob_diff_update(w, &probs[i], branch_ct[i]); } static void write_selected_tx_size(const VP9_COMMON *cm, const MACROBLOCKD *const xd, vpx_writer *w) { TX_SIZE tx_size = xd->mi[0]->tx_size; BLOCK_SIZE bsize = xd->mi[0]->sb_type; const TX_SIZE max_tx_size = max_txsize_lookup[bsize]; const vpx_prob *const tx_probs = get_tx_probs(max_tx_size, get_tx_size_context(xd), &cm->fc->tx_probs); vpx_write(w, tx_size != TX_4X4, tx_probs[0]); if (tx_size != TX_4X4 && max_tx_size >= TX_16X16) { vpx_write(w, tx_size != TX_8X8, tx_probs[1]); if (tx_size != TX_8X8 && max_tx_size >= TX_32X32) vpx_write(w, tx_size != TX_16X16, tx_probs[2]); } } static int write_skip(const VP9_COMMON *cm, const MACROBLOCKD *const xd, int segment_id, const MODE_INFO *mi, vpx_writer *w) { if (segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)) { return 1; } else { const int skip = mi->skip; vpx_write(w, skip, vp9_get_skip_prob(cm, xd)); return skip; } } static void update_skip_probs(VP9_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { int k; for (k = 0; k < SKIP_CONTEXTS; ++k) vp9_cond_prob_diff_update(w, &cm->fc->skip_probs[k], counts->skip[k]); } static void update_switchable_interp_probs(VP9_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { int j; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) prob_diff_update(vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[j], counts->switchable_interp[j], SWITCHABLE_FILTERS, w); } static void pack_mb_tokens(vpx_writer *w, TOKENEXTRA **tp, const TOKENEXTRA *const stop, vpx_bit_depth_t bit_depth) { const TOKENEXTRA *p; const vp9_extra_bit *const extra_bits = #if CONFIG_VP9_HIGHBITDEPTH (bit_depth == VPX_BITS_12) ? vp9_extra_bits_high12 : (bit_depth == VPX_BITS_10) ? vp9_extra_bits_high10 : vp9_extra_bits; #else vp9_extra_bits; (void)bit_depth; #endif // CONFIG_VP9_HIGHBITDEPTH for (p = *tp; p < stop && p->token != EOSB_TOKEN; ++p) { if (p->token == EOB_TOKEN) { vpx_write(w, 0, p->context_tree[0]); continue; } vpx_write(w, 1, p->context_tree[0]); while (p->token == ZERO_TOKEN) { vpx_write(w, 0, p->context_tree[1]); ++p; if (p == stop || p->token == EOSB_TOKEN) { *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN); return; } } { const int t = p->token; const vpx_prob *const context_tree = p->context_tree; assert(t != ZERO_TOKEN); assert(t != EOB_TOKEN); assert(t != EOSB_TOKEN); vpx_write(w, 1, context_tree[1]); if (t == ONE_TOKEN) { vpx_write(w, 0, context_tree[2]); vpx_write_bit(w, p->extra & 1); } else { // t >= TWO_TOKEN && t < EOB_TOKEN const struct vp9_token *const a = &vp9_coef_encodings[t]; const int v = a->value; const int n = a->len; const int e = p->extra; vpx_write(w, 1, context_tree[2]); vp9_write_tree(w, vp9_coef_con_tree, vp9_pareto8_full[context_tree[PIVOT_NODE] - 1], v, n - UNCONSTRAINED_NODES, 0); if (t >= CATEGORY1_TOKEN) { const vp9_extra_bit *const b = &extra_bits[t]; const unsigned char *pb = b->prob; int v = e >> 1; int n = b->len; // number of bits in v, assumed nonzero do { const int bb = (v >> --n) & 1; vpx_write(w, bb, *pb++); } while (n); } vpx_write_bit(w, e & 1); } } } *tp = (TOKENEXTRA *)(uintptr_t)p + (p->token == EOSB_TOKEN); } static void write_segment_id(vpx_writer *w, const struct segmentation *seg, int segment_id) { if (seg->enabled && seg->update_map) vp9_write_tree(w, vp9_segment_tree, seg->tree_probs, segment_id, 3, 0); } // This function encodes the reference frame static void write_ref_frames(const VP9_COMMON *cm, const MACROBLOCKD *const xd, vpx_writer *w) { const MODE_INFO *const mi = xd->mi[0]; const int is_compound = has_second_ref(mi); const int segment_id = mi->segment_id; // If segment level coding of this signal is disabled... // or the segment allows multiple reference frame options if (segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME)) { assert(!is_compound); assert(mi->ref_frame[0] == get_segdata(&cm->seg, segment_id, SEG_LVL_REF_FRAME)); } else { // does the feature use compound prediction or not // (if not specified at the frame/segment level) if (cm->reference_mode == REFERENCE_MODE_SELECT) { vpx_write(w, is_compound, vp9_get_reference_mode_prob(cm, xd)); } else { assert((!is_compound) == (cm->reference_mode == SINGLE_REFERENCE)); } if (is_compound) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; vpx_write(w, mi->ref_frame[!idx] == cm->comp_var_ref[1], vp9_get_pred_prob_comp_ref_p(cm, xd)); } else { const int bit0 = mi->ref_frame[0] != LAST_FRAME; vpx_write(w, bit0, vp9_get_pred_prob_single_ref_p1(cm, xd)); if (bit0) { const int bit1 = mi->ref_frame[0] != GOLDEN_FRAME; vpx_write(w, bit1, vp9_get_pred_prob_single_ref_p2(cm, xd)); } } } } static void pack_inter_mode_mvs( VP9_COMP *cpi, const MACROBLOCKD *const xd, const MB_MODE_INFO_EXT *const mbmi_ext, vpx_writer *w, unsigned int *const max_mv_magnitude, int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { VP9_COMMON *const cm = &cpi->common; const nmv_context *nmvc = &cm->fc->nmvc; const struct segmentation *const seg = &cm->seg; const MODE_INFO *const mi = xd->mi[0]; const PREDICTION_MODE mode = mi->mode; const int segment_id = mi->segment_id; const BLOCK_SIZE bsize = mi->sb_type; const int allow_hp = cm->allow_high_precision_mv; const int is_inter = is_inter_block(mi); const int is_compound = has_second_ref(mi); int skip, ref; if (seg->update_map) { if (seg->temporal_update) { const int pred_flag = mi->seg_id_predicted; vpx_prob pred_prob = vp9_get_pred_prob_seg_id(seg, xd); vpx_write(w, pred_flag, pred_prob); if (!pred_flag) write_segment_id(w, seg, segment_id); } else { write_segment_id(w, seg, segment_id); } } skip = write_skip(cm, xd, segment_id, mi, w); if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) vpx_write(w, is_inter, vp9_get_intra_inter_prob(cm, xd)); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT && !(is_inter && skip)) { write_selected_tx_size(cm, xd, w); } if (!is_inter) { if (bsize >= BLOCK_8X8) { write_intra_mode(w, mode, cm->fc->y_mode_prob[size_group_lookup[bsize]]); } else { int idx, idy; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const PREDICTION_MODE b_mode = mi->bmi[idy * 2 + idx].as_mode; write_intra_mode(w, b_mode, cm->fc->y_mode_prob[0]); } } } write_intra_mode(w, mi->uv_mode, cm->fc->uv_mode_prob[mode]); } else { const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; const vpx_prob *const inter_probs = cm->fc->inter_mode_probs[mode_ctx]; write_ref_frames(cm, xd, w); // If segment skip is not enabled code the mode. if (!segfeature_active(seg, segment_id, SEG_LVL_SKIP)) { if (bsize >= BLOCK_8X8) { write_inter_mode(w, mode, inter_probs); } } if (cm->interp_filter == SWITCHABLE) { const int ctx = get_pred_context_switchable_interp(xd); vp9_write_token(w, vp9_switchable_interp_tree, cm->fc->switchable_interp_prob[ctx], &switchable_interp_encodings[mi->interp_filter]); ++interp_filter_selected[0][mi->interp_filter]; } else { assert(mi->interp_filter == cm->interp_filter); } if (bsize < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; write_inter_mode(w, b_mode, inter_probs); if (b_mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) vp9_encode_mv(cpi, w, &mi->bmi[j].as_mv[ref].as_mv, &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc, allow_hp, max_mv_magnitude); } } } } else { if (mode == NEWMV) { for (ref = 0; ref < 1 + is_compound; ++ref) vp9_encode_mv(cpi, w, &mi->mv[ref].as_mv, &mbmi_ext->ref_mvs[mi->ref_frame[ref]][0].as_mv, nmvc, allow_hp, max_mv_magnitude); } } } } static void write_mb_modes_kf(const VP9_COMMON *cm, const MACROBLOCKD *xd, vpx_writer *w) { const struct segmentation *const seg = &cm->seg; const MODE_INFO *const mi = xd->mi[0]; const MODE_INFO *const above_mi = xd->above_mi; const MODE_INFO *const left_mi = xd->left_mi; const BLOCK_SIZE bsize = mi->sb_type; if (seg->update_map) write_segment_id(w, seg, mi->segment_id); write_skip(cm, xd, mi->segment_id, mi, w); if (bsize >= BLOCK_8X8 && cm->tx_mode == TX_MODE_SELECT) write_selected_tx_size(cm, xd, w); if (bsize >= BLOCK_8X8) { write_intra_mode(w, mi->mode, get_y_mode_probs(mi, above_mi, left_mi, 0)); } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int block = idy * 2 + idx; write_intra_mode(w, mi->bmi[block].as_mode, get_y_mode_probs(mi, above_mi, left_mi, block)); } } } write_intra_mode(w, mi->uv_mode, vp9_kf_uv_mode_prob[mi->mode]); } static void write_modes_b( VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row, int mi_col, unsigned int *const max_mv_magnitude, int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; const MB_MODE_INFO_EXT *const mbmi_ext = cpi->td.mb.mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); MODE_INFO *m; xd->mi = cm->mi_grid_visible + (mi_row * cm->mi_stride + mi_col); m = xd->mi[0]; set_mi_row_col(xd, tile, mi_row, num_8x8_blocks_high_lookup[m->sb_type], mi_col, num_8x8_blocks_wide_lookup[m->sb_type], cm->mi_rows, cm->mi_cols); if (frame_is_intra_only(cm)) { write_mb_modes_kf(cm, xd, w); } else { pack_inter_mode_mvs(cpi, xd, mbmi_ext, w, max_mv_magnitude, interp_filter_selected); } assert(*tok < tok_end); pack_mb_tokens(w, tok, tok_end, cm->bit_depth); } static void write_partition(const VP9_COMMON *const cm, const MACROBLOCKD *const xd, int hbs, int mi_row, int mi_col, PARTITION_TYPE p, BLOCK_SIZE bsize, vpx_writer *w) { const int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); const vpx_prob *const probs = xd->partition_probs[ctx]; const int has_rows = (mi_row + hbs) < cm->mi_rows; const int has_cols = (mi_col + hbs) < cm->mi_cols; if (has_rows && has_cols) { vp9_write_token(w, vp9_partition_tree, probs, &partition_encodings[p]); } else if (!has_rows && has_cols) { assert(p == PARTITION_SPLIT || p == PARTITION_HORZ); vpx_write(w, p == PARTITION_SPLIT, probs[1]); } else if (has_rows && !has_cols) { assert(p == PARTITION_SPLIT || p == PARTITION_VERT); vpx_write(w, p == PARTITION_SPLIT, probs[2]); } else { assert(p == PARTITION_SPLIT); } } static void write_modes_sb( VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, vpx_writer *w, TOKENEXTRA **tok, const TOKENEXTRA *const tok_end, int mi_row, int mi_col, BLOCK_SIZE bsize, unsigned int *const max_mv_magnitude, int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) / 4; PARTITION_TYPE partition; BLOCK_SIZE subsize; const MODE_INFO *m = NULL; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; m = cm->mi_grid_visible[mi_row * cm->mi_stride + mi_col]; partition = partition_lookup[bsl][m->sb_type]; write_partition(cm, xd, bs, mi_row, mi_col, partition, bsize, w); subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, max_mv_magnitude, interp_filter_selected); } else { switch (partition) { case PARTITION_NONE: write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, max_mv_magnitude, interp_filter_selected); break; case PARTITION_HORZ: write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, max_mv_magnitude, interp_filter_selected); if (mi_row + bs < cm->mi_rows) write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col, max_mv_magnitude, interp_filter_selected); break; case PARTITION_VERT: write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, max_mv_magnitude, interp_filter_selected); if (mi_col + bs < cm->mi_cols) write_modes_b(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, max_mv_magnitude, interp_filter_selected); break; default: assert(partition == PARTITION_SPLIT); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col, subsize, max_mv_magnitude, interp_filter_selected); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row, mi_col + bs, subsize, max_mv_magnitude, interp_filter_selected); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col, subsize, max_mv_magnitude, interp_filter_selected); write_modes_sb(cpi, xd, tile, w, tok, tok_end, mi_row + bs, mi_col + bs, subsize, max_mv_magnitude, interp_filter_selected); break; } } // update partition context if (bsize >= BLOCK_8X8 && (bsize == BLOCK_8X8 || partition != PARTITION_SPLIT)) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } static void write_modes( VP9_COMP *cpi, MACROBLOCKD *const xd, const TileInfo *const tile, vpx_writer *w, int tile_row, int tile_col, unsigned int *const max_mv_magnitude, int interp_filter_selected[MAX_REF_FRAMES][SWITCHABLE]) { const VP9_COMMON *const cm = &cpi->common; int mi_row, mi_col, tile_sb_row; TOKENEXTRA *tok = NULL; TOKENEXTRA *tok_end = NULL; set_partition_probs(cm, xd); for (mi_row = tile->mi_row_start; mi_row < tile->mi_row_end; mi_row += MI_BLOCK_SIZE) { tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile->mi_row_start) >> MI_BLOCK_SIZE_LOG2; tok = cpi->tplist[tile_row][tile_col][tile_sb_row].start; tok_end = tok + cpi->tplist[tile_row][tile_col][tile_sb_row].count; vp9_zero(xd->left_seg_context); for (mi_col = tile->mi_col_start; mi_col < tile->mi_col_end; mi_col += MI_BLOCK_SIZE) write_modes_sb(cpi, xd, tile, w, &tok, tok_end, mi_row, mi_col, BLOCK_64X64, max_mv_magnitude, interp_filter_selected); assert(tok == cpi->tplist[tile_row][tile_col][tile_sb_row].stop); } } static void build_tree_distribution(VP9_COMP *cpi, TX_SIZE tx_size, vp9_coeff_stats *coef_branch_ct, vp9_coeff_probs_model *coef_probs) { vp9_coeff_count *coef_counts = cpi->td.rd_counts.coef_counts[tx_size]; unsigned int(*eob_branch_ct)[REF_TYPES][COEF_BANDS][COEFF_CONTEXTS] = cpi->common.counts.eob_branch[tx_size]; int i, j, k, l, m; for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { vp9_tree_probs_from_distribution(vp9_coef_tree, coef_branch_ct[i][j][k][l], coef_counts[i][j][k][l]); coef_branch_ct[i][j][k][l][0][1] = eob_branch_ct[i][j][k][l] - coef_branch_ct[i][j][k][l][0][0]; for (m = 0; m < UNCONSTRAINED_NODES; ++m) coef_probs[i][j][k][l][m] = get_binary_prob(coef_branch_ct[i][j][k][l][m][0], coef_branch_ct[i][j][k][l][m][1]); } } } } } static void update_coef_probs_common(vpx_writer *const bc, VP9_COMP *cpi, TX_SIZE tx_size, vp9_coeff_stats *frame_branch_ct, vp9_coeff_probs_model *new_coef_probs) { vp9_coeff_probs_model *old_coef_probs = cpi->common.fc->coef_probs[tx_size]; const vpx_prob upd = DIFF_UPDATE_PROB; const int entropy_nodes_update = UNCONSTRAINED_NODES; int i, j, k, l, t; int stepsize = cpi->sf.coeff_prob_appx_step; switch (cpi->sf.use_fast_coef_updates) { case TWO_LOOP: { /* dry run to see if there is any update at all needed */ int savings = 0; int update[2] = { 0, 0 }; for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { for (t = 0; t < entropy_nodes_update; ++t) { vpx_prob newp = new_coef_probs[i][j][k][l][t]; const vpx_prob oldp = old_coef_probs[i][j][k][l][t]; int s; int u = 0; if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], oldp, &newp, upd, stepsize); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], oldp, &newp, upd); if (s > 0 && newp != oldp) u = 1; if (u) savings += s - (int)(vp9_cost_zero(upd)); else savings -= (int)(vp9_cost_zero(upd)); update[u]++; } } } } } // printf("Update %d %d, savings %d\n", update[0], update[1], savings); /* Is coef updated at all */ if (update[1] == 0 || savings < 0) { vpx_write_bit(bc, 0); return; } vpx_write_bit(bc, 1); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { // calc probs and branch cts for this frame only for (t = 0; t < entropy_nodes_update; ++t) { vpx_prob newp = new_coef_probs[i][j][k][l][t]; vpx_prob *oldp = old_coef_probs[i][j][k][l] + t; const vpx_prob upd = DIFF_UPDATE_PROB; int s; int u = 0; if (t == PIVOT_NODE) s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd, stepsize); else s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd); if (s > 0 && newp != *oldp) u = 1; vpx_write(bc, u, upd); if (u) { /* send/use new probability */ vp9_write_prob_diff_update(bc, newp, *oldp); *oldp = newp; } } } } } } return; } default: { int updates = 0; int noupdates_before_first = 0; assert(cpi->sf.use_fast_coef_updates == ONE_LOOP_REDUCED); for (i = 0; i < PLANE_TYPES; ++i) { for (j = 0; j < REF_TYPES; ++j) { for (k = 0; k < COEF_BANDS; ++k) { for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { // calc probs and branch cts for this frame only for (t = 0; t < entropy_nodes_update; ++t) { vpx_prob newp = new_coef_probs[i][j][k][l][t]; vpx_prob *oldp = old_coef_probs[i][j][k][l] + t; int s; int u = 0; if (t == PIVOT_NODE) { s = vp9_prob_diff_update_savings_search_model( frame_branch_ct[i][j][k][l][0], *oldp, &newp, upd, stepsize); } else { s = vp9_prob_diff_update_savings_search( frame_branch_ct[i][j][k][l][t], *oldp, &newp, upd); } if (s > 0 && newp != *oldp) u = 1; updates += u; if (u == 0 && updates == 0) { noupdates_before_first++; continue; } if (u == 1 && updates == 1) { int v; // first update vpx_write_bit(bc, 1); for (v = 0; v < noupdates_before_first; ++v) vpx_write(bc, 0, upd); } vpx_write(bc, u, upd); if (u) { /* send/use new probability */ vp9_write_prob_diff_update(bc, newp, *oldp); *oldp = newp; } } } } } } if (updates == 0) { vpx_write_bit(bc, 0); // no updates } return; } } } static void update_coef_probs(VP9_COMP *cpi, vpx_writer *w) { const TX_MODE tx_mode = cpi->common.tx_mode; const TX_SIZE max_tx_size = tx_mode_to_biggest_tx_size[tx_mode]; TX_SIZE tx_size; for (tx_size = TX_4X4; tx_size <= max_tx_size; ++tx_size) { vp9_coeff_stats frame_branch_ct[PLANE_TYPES]; vp9_coeff_probs_model frame_coef_probs[PLANE_TYPES]; if (cpi->td.counts->tx.tx_totals[tx_size] <= 20 || (tx_size >= TX_16X16 && cpi->sf.tx_size_search_method == USE_TX_8X8)) { vpx_write_bit(w, 0); } else { build_tree_distribution(cpi, tx_size, frame_branch_ct, frame_coef_probs); update_coef_probs_common(w, cpi, tx_size, frame_branch_ct, frame_coef_probs); } } } static void encode_loopfilter(struct loopfilter *lf, struct vpx_write_bit_buffer *wb) { int i; // Encode the loop filter level and type vpx_wb_write_literal(wb, lf->filter_level, 6); vpx_wb_write_literal(wb, lf->sharpness_level, 3); // Write out loop filter deltas applied at the MB level based on mode or // ref frame (if they are enabled). vpx_wb_write_bit(wb, lf->mode_ref_delta_enabled); if (lf->mode_ref_delta_enabled) { vpx_wb_write_bit(wb, lf->mode_ref_delta_update); if (lf->mode_ref_delta_update) { for (i = 0; i < MAX_REF_LF_DELTAS; i++) { const int delta = lf->ref_deltas[i]; const int changed = delta != lf->last_ref_deltas[i]; vpx_wb_write_bit(wb, changed); if (changed) { lf->last_ref_deltas[i] = delta; vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6); vpx_wb_write_bit(wb, delta < 0); } } for (i = 0; i < MAX_MODE_LF_DELTAS; i++) { const int delta = lf->mode_deltas[i]; const int changed = delta != lf->last_mode_deltas[i]; vpx_wb_write_bit(wb, changed); if (changed) { lf->last_mode_deltas[i] = delta; vpx_wb_write_literal(wb, abs(delta) & 0x3F, 6); vpx_wb_write_bit(wb, delta < 0); } } } } } static void write_delta_q(struct vpx_write_bit_buffer *wb, int delta_q) { if (delta_q != 0) { vpx_wb_write_bit(wb, 1); vpx_wb_write_literal(wb, abs(delta_q), 4); vpx_wb_write_bit(wb, delta_q < 0); } else { vpx_wb_write_bit(wb, 0); } } static void encode_quantization(const VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) { vpx_wb_write_literal(wb, cm->base_qindex, QINDEX_BITS); write_delta_q(wb, cm->y_dc_delta_q); write_delta_q(wb, cm->uv_dc_delta_q); write_delta_q(wb, cm->uv_ac_delta_q); } static void encode_segmentation(VP9_COMMON *cm, MACROBLOCKD *xd, struct vpx_write_bit_buffer *wb) { int i, j; const struct segmentation *seg = &cm->seg; vpx_wb_write_bit(wb, seg->enabled); if (!seg->enabled) return; // Segmentation map vpx_wb_write_bit(wb, seg->update_map); if (seg->update_map) { // Select the coding strategy (temporal or spatial) vp9_choose_segmap_coding_method(cm, xd); // Write out probabilities used to decode unpredicted macro-block segments for (i = 0; i < SEG_TREE_PROBS; i++) { const int prob = seg->tree_probs[i]; const int update = prob != MAX_PROB; vpx_wb_write_bit(wb, update); if (update) vpx_wb_write_literal(wb, prob, 8); } // Write out the chosen coding method. vpx_wb_write_bit(wb, seg->temporal_update); if (seg->temporal_update) { for (i = 0; i < PREDICTION_PROBS; i++) { const int prob = seg->pred_probs[i]; const int update = prob != MAX_PROB; vpx_wb_write_bit(wb, update); if (update) vpx_wb_write_literal(wb, prob, 8); } } } // Segmentation data vpx_wb_write_bit(wb, seg->update_data); if (seg->update_data) { vpx_wb_write_bit(wb, seg->abs_delta); for (i = 0; i < MAX_SEGMENTS; i++) { for (j = 0; j < SEG_LVL_MAX; j++) { const int active = segfeature_active(seg, i, j); vpx_wb_write_bit(wb, active); if (active) { const int data = get_segdata(seg, i, j); const int data_max = vp9_seg_feature_data_max(j); if (vp9_is_segfeature_signed(j)) { encode_unsigned_max(wb, abs(data), data_max); vpx_wb_write_bit(wb, data < 0); } else { encode_unsigned_max(wb, data, data_max); } } } } } } static void encode_txfm_probs(VP9_COMMON *cm, vpx_writer *w, FRAME_COUNTS *counts) { // Mode vpx_write_literal(w, VPXMIN(cm->tx_mode, ALLOW_32X32), 2); if (cm->tx_mode >= ALLOW_32X32) vpx_write_bit(w, cm->tx_mode == TX_MODE_SELECT); // Probabilities if (cm->tx_mode == TX_MODE_SELECT) { int i, j; unsigned int ct_8x8p[TX_SIZES - 3][2]; unsigned int ct_16x16p[TX_SIZES - 2][2]; unsigned int ct_32x32p[TX_SIZES - 1][2]; for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_8x8(counts->tx.p8x8[i], ct_8x8p); for (j = 0; j < TX_SIZES - 3; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p8x8[i][j], ct_8x8p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_16x16(counts->tx.p16x16[i], ct_16x16p); for (j = 0; j < TX_SIZES - 2; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p16x16[i][j], ct_16x16p[j]); } for (i = 0; i < TX_SIZE_CONTEXTS; i++) { tx_counts_to_branch_counts_32x32(counts->tx.p32x32[i], ct_32x32p); for (j = 0; j < TX_SIZES - 1; j++) vp9_cond_prob_diff_update(w, &cm->fc->tx_probs.p32x32[i][j], ct_32x32p[j]); } } } static void write_interp_filter(INTERP_FILTER filter, struct vpx_write_bit_buffer *wb) { const int filter_to_literal[] = { 1, 0, 2, 3 }; vpx_wb_write_bit(wb, filter == SWITCHABLE); if (filter != SWITCHABLE) vpx_wb_write_literal(wb, filter_to_literal[filter], 2); } static void fix_interp_filter(VP9_COMMON *cm, FRAME_COUNTS *counts) { if (cm->interp_filter == SWITCHABLE) { // Check to see if only one of the filters is actually used int count[SWITCHABLE_FILTERS]; int i, j, c = 0; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { count[i] = 0; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; ++j) count[i] += counts->switchable_interp[j][i]; c += (count[i] > 0); } if (c == 1) { // Only one filter is used. So set the filter at frame level for (i = 0; i < SWITCHABLE_FILTERS; ++i) { if (count[i]) { cm->interp_filter = i; break; } } } } } static void write_tile_info(const VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) { int min_log2_tile_cols, max_log2_tile_cols, ones; vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); // columns ones = cm->log2_tile_cols - min_log2_tile_cols; while (ones--) vpx_wb_write_bit(wb, 1); if (cm->log2_tile_cols < max_log2_tile_cols) vpx_wb_write_bit(wb, 0); // rows vpx_wb_write_bit(wb, cm->log2_tile_rows != 0); if (cm->log2_tile_rows != 0) vpx_wb_write_bit(wb, cm->log2_tile_rows != 1); } int vp9_get_refresh_mask(VP9_COMP *cpi) { if (vp9_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term we leave it in the GF slot and, // if we're updating the GF with the current decoded frame, we save it // instead to the ARF slot. // Later, in the function vp9_encoder.c:vp9_update_reference_frames() we // will swap gld_fb_idx and alt_fb_idx to achieve our objective. We do it // there so that it can be done outside of the recode loop. // Note: This is highly specific to the use of ARF as a forward reference, // and this needs to be generalized as other uses are implemented // (like RTC/temporal scalability). return (cpi->refresh_last_frame << cpi->lst_fb_idx) | (cpi->refresh_golden_frame << cpi->alt_fb_idx); } else { int arf_idx = cpi->alt_fb_idx; GF_GROUP *const gf_group = &cpi->twopass.gf_group; if (cpi->multi_layer_arf) { for (arf_idx = 0; arf_idx < REF_FRAMES; ++arf_idx) { if (arf_idx != cpi->alt_fb_idx && arf_idx != cpi->lst_fb_idx && arf_idx != cpi->gld_fb_idx) { int idx; for (idx = 0; idx < gf_group->stack_size; ++idx) if (arf_idx == gf_group->arf_index_stack[idx]) break; if (idx == gf_group->stack_size) break; } } } cpi->twopass.gf_group.top_arf_idx = arf_idx; if (cpi->use_svc && cpi->svc.use_set_ref_frame_config && cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) return cpi->svc.update_buffer_slot[cpi->svc.spatial_layer_id]; return (cpi->refresh_last_frame << cpi->lst_fb_idx) | (cpi->refresh_golden_frame << cpi->gld_fb_idx) | (cpi->refresh_alt_ref_frame << arf_idx); } } static int encode_tile_worker(void *arg1, void *arg2) { VP9_COMP *cpi = (VP9_COMP *)arg1; VP9BitstreamWorkerData *data = (VP9BitstreamWorkerData *)arg2; MACROBLOCKD *const xd = &data->xd; const int tile_row = 0; vpx_start_encode(&data->bit_writer, data->dest); write_modes(cpi, xd, &cpi->tile_data[data->tile_idx].tile_info, &data->bit_writer, tile_row, data->tile_idx, &data->max_mv_magnitude, data->interp_filter_selected); vpx_stop_encode(&data->bit_writer); return 1; } void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi) { if (cpi->vp9_bitstream_worker_data) { int i; for (i = 1; i < cpi->num_workers; ++i) { vpx_free(cpi->vp9_bitstream_worker_data[i].dest); } vpx_free(cpi->vp9_bitstream_worker_data); cpi->vp9_bitstream_worker_data = NULL; } } static int encode_tiles_buffer_alloc(VP9_COMP *const cpi) { int i; const size_t worker_data_size = cpi->num_workers * sizeof(*cpi->vp9_bitstream_worker_data); cpi->vp9_bitstream_worker_data = vpx_memalign(16, worker_data_size); memset(cpi->vp9_bitstream_worker_data, 0, worker_data_size); if (!cpi->vp9_bitstream_worker_data) return 1; for (i = 1; i < cpi->num_workers; ++i) { cpi->vp9_bitstream_worker_data[i].dest_size = cpi->oxcf.width * cpi->oxcf.height; cpi->vp9_bitstream_worker_data[i].dest = vpx_malloc(cpi->vp9_bitstream_worker_data[i].dest_size); if (!cpi->vp9_bitstream_worker_data[i].dest) return 1; } return 0; } static size_t encode_tiles_mt(VP9_COMP *cpi, uint8_t *data_ptr) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = cpi->num_workers; size_t total_size = 0; int tile_col = 0; if (!cpi->vp9_bitstream_worker_data || cpi->vp9_bitstream_worker_data[1].dest_size > (cpi->oxcf.width * cpi->oxcf.height)) { vp9_bitstream_encode_tiles_buffer_dealloc(cpi); if (encode_tiles_buffer_alloc(cpi)) return 0; } while (tile_col < tile_cols) { int i, j; for (i = 0; i < num_workers && tile_col < tile_cols; ++i) { VPxWorker *const worker = &cpi->workers[i]; VP9BitstreamWorkerData *const data = &cpi->vp9_bitstream_worker_data[i]; // Populate the worker data. data->xd = cpi->td.mb.e_mbd; data->tile_idx = tile_col; data->max_mv_magnitude = cpi->max_mv_magnitude; memset(data->interp_filter_selected, 0, sizeof(data->interp_filter_selected[0][0]) * SWITCHABLE); // First thread can directly write into the output buffer. if (i == 0) { // If this worker happens to be for the last tile, then do not offset it // by 4 for the tile size. data->dest = data_ptr + total_size + (tile_col == tile_cols - 1 ? 0 : 4); } worker->data1 = cpi; worker->data2 = data; worker->hook = encode_tile_worker; worker->had_error = 0; if (i < num_workers - 1) { winterface->launch(worker); } else { winterface->execute(worker); } ++tile_col; } for (j = 0; j < i; ++j) { VPxWorker *const worker = &cpi->workers[j]; VP9BitstreamWorkerData *const data = (VP9BitstreamWorkerData *)worker->data2; uint32_t tile_size; int k; if (!winterface->sync(worker)) return 0; tile_size = data->bit_writer.pos; // Aggregate per-thread bitstream stats. cpi->max_mv_magnitude = VPXMAX(cpi->max_mv_magnitude, data->max_mv_magnitude); for (k = 0; k < SWITCHABLE; ++k) { cpi->interp_filter_selected[0][k] += data->interp_filter_selected[0][k]; } // Prefix the size of the tile on all but the last. if (tile_col != tile_cols || j < i - 1) { mem_put_be32(data_ptr + total_size, tile_size); total_size += 4; } if (j > 0) { memcpy(data_ptr + total_size, data->dest, tile_size); } total_size += tile_size; } } return total_size; } static size_t encode_tiles(VP9_COMP *cpi, uint8_t *data_ptr) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vpx_writer residual_bc; int tile_row, tile_col; size_t total_size = 0; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; memset(cm->above_seg_context, 0, sizeof(*cm->above_seg_context) * mi_cols_aligned_to_sb(cm->mi_cols)); // Encoding tiles in parallel is done only for realtime mode now. In other // modes the speed up is insignificant and requires further testing to ensure // that it does not make the overall process worse in any case. if (cpi->oxcf.mode == REALTIME && cpi->num_workers > 1 && tile_rows == 1 && tile_cols > 1) { return encode_tiles_mt(cpi, data_ptr); } for (tile_row = 0; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { int tile_idx = tile_row * tile_cols + tile_col; if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) vpx_start_encode(&residual_bc, data_ptr + total_size + 4); else vpx_start_encode(&residual_bc, data_ptr + total_size); write_modes(cpi, xd, &cpi->tile_data[tile_idx].tile_info, &residual_bc, tile_row, tile_col, &cpi->max_mv_magnitude, cpi->interp_filter_selected); vpx_stop_encode(&residual_bc); if (tile_col < tile_cols - 1 || tile_row < tile_rows - 1) { // size of this tile mem_put_be32(data_ptr + total_size, residual_bc.pos); total_size += 4; } total_size += residual_bc.pos; } } return total_size; } static void write_render_size(const VP9_COMMON *cm, struct vpx_write_bit_buffer *wb) { const int scaling_active = cm->width != cm->render_width || cm->height != cm->render_height; vpx_wb_write_bit(wb, scaling_active); if (scaling_active) { vpx_wb_write_literal(wb, cm->render_width - 1, 16); vpx_wb_write_literal(wb, cm->render_height - 1, 16); } } static void write_frame_size(const VP9_COMMON *cm, struct vpx_write_bit_buffer *wb) { vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); write_render_size(cm, wb); } static void write_frame_size_with_refs(VP9_COMP *cpi, struct vpx_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; int found = 0; MV_REFERENCE_FRAME ref_frame; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, ref_frame); // Set "found" to 0 for temporal svc and for spatial svc key frame if (cpi->use_svc && ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || (cpi->svc.number_spatial_layers > 1 && cpi->svc.layer_context[cpi->svc.spatial_layer_id].is_key_frame))) { found = 0; } else if (cfg != NULL) { found = cm->width == cfg->y_crop_width && cm->height == cfg->y_crop_height; } vpx_wb_write_bit(wb, found); if (found) { break; } } if (!found) { vpx_wb_write_literal(wb, cm->width - 1, 16); vpx_wb_write_literal(wb, cm->height - 1, 16); } write_render_size(cm, wb); } static void write_sync_code(struct vpx_write_bit_buffer *wb) { vpx_wb_write_literal(wb, VP9_SYNC_CODE_0, 8); vpx_wb_write_literal(wb, VP9_SYNC_CODE_1, 8); vpx_wb_write_literal(wb, VP9_SYNC_CODE_2, 8); } static void write_profile(BITSTREAM_PROFILE profile, struct vpx_write_bit_buffer *wb) { switch (profile) { case PROFILE_0: vpx_wb_write_literal(wb, 0, 2); break; case PROFILE_1: vpx_wb_write_literal(wb, 2, 2); break; case PROFILE_2: vpx_wb_write_literal(wb, 1, 2); break; default: assert(profile == PROFILE_3); vpx_wb_write_literal(wb, 6, 3); break; } } static void write_bitdepth_colorspace_sampling( VP9_COMMON *const cm, struct vpx_write_bit_buffer *wb) { if (cm->profile >= PROFILE_2) { assert(cm->bit_depth > VPX_BITS_8); vpx_wb_write_bit(wb, cm->bit_depth == VPX_BITS_10 ? 0 : 1); } vpx_wb_write_literal(wb, cm->color_space, 3); if (cm->color_space != VPX_CS_SRGB) { // 0: [16, 235] (i.e. xvYCC), 1: [0, 255] vpx_wb_write_bit(wb, cm->color_range); if (cm->profile == PROFILE_1 || cm->profile == PROFILE_3) { assert(cm->subsampling_x != 1 || cm->subsampling_y != 1); vpx_wb_write_bit(wb, cm->subsampling_x); vpx_wb_write_bit(wb, cm->subsampling_y); vpx_wb_write_bit(wb, 0); // unused } else { assert(cm->subsampling_x == 1 && cm->subsampling_y == 1); } } else { assert(cm->profile == PROFILE_1 || cm->profile == PROFILE_3); vpx_wb_write_bit(wb, 0); // unused } } static void write_uncompressed_header(VP9_COMP *cpi, struct vpx_write_bit_buffer *wb) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vpx_wb_write_literal(wb, VP9_FRAME_MARKER, 2); write_profile(cm->profile, wb); // If to use show existing frame. vpx_wb_write_bit(wb, cm->show_existing_frame); if (cm->show_existing_frame) { vpx_wb_write_literal(wb, cpi->alt_fb_idx, 3); return; } vpx_wb_write_bit(wb, cm->frame_type); vpx_wb_write_bit(wb, cm->show_frame); vpx_wb_write_bit(wb, cm->error_resilient_mode); if (cm->frame_type == KEY_FRAME) { write_sync_code(wb); write_bitdepth_colorspace_sampling(cm, wb); write_frame_size(cm, wb); } else { if (!cm->show_frame) vpx_wb_write_bit(wb, cm->intra_only); if (!cm->error_resilient_mode) vpx_wb_write_literal(wb, cm->reset_frame_context, 2); if (cm->intra_only) { write_sync_code(wb); // Note for profile 0, 420 8bpp is assumed. if (cm->profile > PROFILE_0) { write_bitdepth_colorspace_sampling(cm, wb); } vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); write_frame_size(cm, wb); } else { MV_REFERENCE_FRAME ref_frame; vpx_wb_write_literal(wb, vp9_get_refresh_mask(cpi), REF_FRAMES); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { assert(get_ref_frame_map_idx(cpi, ref_frame) != INVALID_IDX); vpx_wb_write_literal(wb, get_ref_frame_map_idx(cpi, ref_frame), REF_FRAMES_LOG2); vpx_wb_write_bit(wb, cm->ref_frame_sign_bias[ref_frame]); } write_frame_size_with_refs(cpi, wb); vpx_wb_write_bit(wb, cm->allow_high_precision_mv); fix_interp_filter(cm, cpi->td.counts); write_interp_filter(cm->interp_filter, wb); } } if (!cm->error_resilient_mode) { vpx_wb_write_bit(wb, cm->refresh_frame_context); vpx_wb_write_bit(wb, cm->frame_parallel_decoding_mode); } vpx_wb_write_literal(wb, cm->frame_context_idx, FRAME_CONTEXTS_LOG2); encode_loopfilter(&cm->lf, wb); encode_quantization(cm, wb); encode_segmentation(cm, xd, wb); write_tile_info(cm, wb); } static size_t write_compressed_header(VP9_COMP *cpi, uint8_t *data) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; FRAME_CONTEXT *const fc = cm->fc; FRAME_COUNTS *counts = cpi->td.counts; vpx_writer header_bc; vpx_start_encode(&header_bc, data); if (xd->lossless) cm->tx_mode = ONLY_4X4; else encode_txfm_probs(cm, &header_bc, counts); update_coef_probs(cpi, &header_bc); update_skip_probs(cm, &header_bc, counts); if (!frame_is_intra_only(cm)) { int i; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) prob_diff_update(vp9_inter_mode_tree, cm->fc->inter_mode_probs[i], counts->inter_mode[i], INTER_MODES, &header_bc); if (cm->interp_filter == SWITCHABLE) update_switchable_interp_probs(cm, &header_bc, counts); for (i = 0; i < INTRA_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->intra_inter_prob[i], counts->intra_inter[i]); if (cpi->allow_comp_inter_inter) { const int use_compound_pred = cm->reference_mode != SINGLE_REFERENCE; const int use_hybrid_pred = cm->reference_mode == REFERENCE_MODE_SELECT; vpx_write_bit(&header_bc, use_compound_pred); if (use_compound_pred) { vpx_write_bit(&header_bc, use_hybrid_pred); if (use_hybrid_pred) for (i = 0; i < COMP_INTER_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_inter_prob[i], counts->comp_inter[i]); } } if (cm->reference_mode != COMPOUND_REFERENCE) { for (i = 0; i < REF_CONTEXTS; i++) { vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][0], counts->single_ref[i][0]); vp9_cond_prob_diff_update(&header_bc, &fc->single_ref_prob[i][1], counts->single_ref[i][1]); } } if (cm->reference_mode != SINGLE_REFERENCE) for (i = 0; i < REF_CONTEXTS; i++) vp9_cond_prob_diff_update(&header_bc, &fc->comp_ref_prob[i], counts->comp_ref[i]); for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) prob_diff_update(vp9_intra_mode_tree, cm->fc->y_mode_prob[i], counts->y_mode[i], INTRA_MODES, &header_bc); for (i = 0; i < PARTITION_CONTEXTS; ++i) prob_diff_update(vp9_partition_tree, fc->partition_prob[i], counts->partition[i], PARTITION_TYPES, &header_bc); vp9_write_nmv_probs(cm, cm->allow_high_precision_mv, &header_bc, &counts->mv); } vpx_stop_encode(&header_bc); assert(header_bc.pos <= 0xffff); return header_bc.pos; } void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size) { uint8_t *data = dest; size_t first_part_size, uncompressed_hdr_size; struct vpx_write_bit_buffer wb = { data, 0 }; struct vpx_write_bit_buffer saved_wb; #if CONFIG_BITSTREAM_DEBUG bitstream_queue_reset_write(); #endif write_uncompressed_header(cpi, &wb); // Skip the rest coding process if use show existing frame. if (cpi->common.show_existing_frame) { uncompressed_hdr_size = vpx_wb_bytes_written(&wb); data += uncompressed_hdr_size; *size = data - dest; return; } saved_wb = wb; vpx_wb_write_literal(&wb, 0, 16); // don't know in advance first part. size uncompressed_hdr_size = vpx_wb_bytes_written(&wb); data += uncompressed_hdr_size; vpx_clear_system_state(); first_part_size = write_compressed_header(cpi, data); data += first_part_size; // TODO(jbb): Figure out what to do if first_part_size > 16 bits. vpx_wb_write_literal(&saved_wb, (int)first_part_size, 16); data += encode_tiles(cpi, data); *size = data - dest; } libvpx-1.8.2/vp9/encoder/vp9_bitstream.h000066400000000000000000000027451357355204000201400ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_BITSTREAM_H_ #define VPX_VP9_ENCODER_VP9_BITSTREAM_H_ #ifdef __cplusplus extern "C" { #endif #include "vp9/encoder/vp9_encoder.h" typedef struct VP9BitstreamWorkerData { uint8_t *dest; int dest_size; vpx_writer bit_writer; int tile_idx; unsigned int max_mv_magnitude; // The size of interp_filter_selected in VP9_COMP is actually // MAX_REFERENCE_FRAMES x SWITCHABLE. But when encoding tiles, all we ever do // is increment the very first index (index 0) for the first dimension. Hence // this is sufficient. int interp_filter_selected[1][SWITCHABLE]; DECLARE_ALIGNED(16, MACROBLOCKD, xd); } VP9BitstreamWorkerData; int vp9_get_refresh_mask(VP9_COMP *cpi); void vp9_bitstream_encode_tiles_buffer_dealloc(VP9_COMP *const cpi); void vp9_pack_bitstream(VP9_COMP *cpi, uint8_t *dest, size_t *size); static INLINE int vp9_preserve_existing_gf(VP9_COMP *cpi) { return cpi->refresh_golden_frame && cpi->rc.is_src_frame_alt_ref && !cpi->use_svc; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_BITSTREAM_H_ libvpx-1.8.2/vp9/encoder/vp9_block.h000066400000000000000000000132151357355204000172320ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_BLOCK_H_ #define VPX_VP9_ENCODER_VP9_BLOCK_H_ #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_entropy.h" #ifdef __cplusplus extern "C" { #endif typedef struct { unsigned int sse; int sum; unsigned int var; } diff; struct macroblock_plane { DECLARE_ALIGNED(16, int16_t, src_diff[64 * 64]); tran_low_t *qcoeff; tran_low_t *coeff; uint16_t *eobs; struct buf_2d src; // Quantizer setings DECLARE_ALIGNED(16, int16_t, round_fp[8]); int16_t *quant_fp; int16_t *quant; int16_t *quant_shift; int16_t *zbin; int16_t *round; int64_t quant_thred[2]; }; /* The [2] dimension is for whether we skip the EOB node (i.e. if previous * coefficient in this block was zero) or not. */ typedef unsigned int vp9_coeff_cost[PLANE_TYPES][REF_TYPES][COEF_BANDS][2] [COEFF_CONTEXTS][ENTROPY_TOKENS]; typedef struct { int_mv ref_mvs[MAX_REF_FRAMES][MAX_MV_REF_CANDIDATES]; uint8_t mode_context[MAX_REF_FRAMES]; } MB_MODE_INFO_EXT; typedef struct { int col_min; int col_max; int row_min; int row_max; } MvLimits; typedef struct macroblock MACROBLOCK; struct macroblock { // cf. https://bugs.chromium.org/p/webm/issues/detail?id=1054 #if defined(_MSC_VER) && _MSC_VER < 1900 int64_t bsse[MAX_MB_PLANE << 2]; #endif struct macroblock_plane plane[MAX_MB_PLANE]; MACROBLOCKD e_mbd; MB_MODE_INFO_EXT *mbmi_ext; MB_MODE_INFO_EXT *mbmi_ext_base; int skip_block; int select_tx_size; int skip_recode; int skip_optimize; int q_index; int block_qcoeff_opt; int block_tx_domain; // The equivalent error at the current rdmult of one whole bit (not one // bitcost unit). int errorperbit; // The equivalend SAD error of one (whole) bit at the current quantizer // for large blocks. int sadperbit16; // The equivalend SAD error of one (whole) bit at the current quantizer // for sub-8x8 blocks. int sadperbit4; int rddiv; int rdmult; int cb_rdmult; int segment_id; int mb_energy; // These are set to their default values at the beginning, and then adjusted // further in the encoding process. BLOCK_SIZE min_partition_size; BLOCK_SIZE max_partition_size; int mv_best_ref_index[MAX_REF_FRAMES]; unsigned int max_mv_context[MAX_REF_FRAMES]; unsigned int source_variance; unsigned int pred_sse[MAX_REF_FRAMES]; int pred_mv_sad[MAX_REF_FRAMES]; int nmvjointcost[MV_JOINTS]; int *nmvcost[2]; int *nmvcost_hp[2]; int **mvcost; int nmvjointsadcost[MV_JOINTS]; int *nmvsadcost[2]; int *nmvsadcost_hp[2]; int **mvsadcost; // sharpness is used to disable skip mode and change rd_mult int sharpness; // aq mode is used to adjust rd based on segment. int adjust_rdmult_by_segment; // These define limits to motion vector components to prevent them // from extending outside the UMV borders MvLimits mv_limits; // Notes transform blocks where no coefficents are coded. // Set during mode selection. Read during block encoding. uint8_t zcoeff_blk[TX_SIZES][256]; // Accumulate the tx block eobs in a partition block. int32_t sum_y_eobs[TX_SIZES]; int skip; int encode_breakout; // note that token_costs is the cost when eob node is skipped vp9_coeff_cost token_costs[TX_SIZES]; int optimize; // indicate if it is in the rd search loop or encoding process int use_lp32x32fdct; int skip_encode; // In first pass, intra prediction is done based on source pixels // at tile boundaries int fp_src_pred; // use fast quantization process int quant_fp; // skip forward transform and quantization uint8_t skip_txfm[MAX_MB_PLANE << 2]; #define SKIP_TXFM_NONE 0 #define SKIP_TXFM_AC_DC 1 #define SKIP_TXFM_AC_ONLY 2 // cf. https://bugs.chromium.org/p/webm/issues/detail?id=1054 #if !defined(_MSC_VER) || _MSC_VER >= 1900 int64_t bsse[MAX_MB_PLANE << 2]; #endif // Used to store sub partition's choices. MV pred_mv[MAX_REF_FRAMES]; // Strong color activity detection. Used in RTC coding mode to enhance // the visual quality at the boundary of moving color objects. uint8_t color_sensitivity[2]; uint8_t sb_is_skin; uint8_t skip_low_source_sad; uint8_t lowvar_highsumdiff; uint8_t last_sb_high_content; int sb_use_mv_part; int sb_mvcol_part; int sb_mvrow_part; int sb_pickmode_part; int zero_temp_sad_source; // For each superblock: saves the content value (e.g., low/high sad/sumdiff) // based on source sad, prior to encoding the frame. uint8_t content_state_sb; // Used to save the status of whether a block has a low variance in // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for // 32x32, 9~24 for 16x16. uint8_t variance_low[25]; uint8_t arf_frame_usage; uint8_t lastgolden_frame_usage; void (*fwd_txfm4x4)(const int16_t *input, tran_low_t *output, int stride); void (*inv_txfm_add)(const tran_low_t *input, uint8_t *dest, int stride, int eob); #if CONFIG_VP9_HIGHBITDEPTH void (*highbd_inv_txfm_add)(const tran_low_t *input, uint16_t *dest, int stride, int eob, int bd); #endif DECLARE_ALIGNED(16, uint8_t, est_pred[64 * 64]); struct scale_factors *me_sf; }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_BLOCK_H_ libvpx-1.8.2/vp9/encoder/vp9_blockiness.c000066400000000000000000000105421357355204000202670ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx/vpx_integer.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_blockiness.h" static int horizontal_filter(const uint8_t *s) { return (s[1] - s[-2]) * 2 + (s[-1] - s[0]) * 6; } static int vertical_filter(const uint8_t *s, int p) { return (s[p] - s[-2 * p]) * 2 + (s[-p] - s[0]) * 6; } static int variance(int sum, int sum_squared, int size) { return sum_squared / size - (sum / size) * (sum / size); } // Calculate a blockiness level for a vertical block edge. // This function returns a new blockiness metric that's defined as // p0 p1 p2 p3 // q0 q1 q2 q3 // block edge -> // r0 r1 r2 r3 // s0 s1 s2 s3 // blockiness = p0*-2+q0*6+r0*-6+s0*2 + // p1*-2+q1*6+r1*-6+s1*2 + // p2*-2+q2*6+r2*-6+s2*2 + // p3*-2+q3*6+r3*-6+s3*2 ; // reconstructed_blockiness = abs(blockiness from reconstructed buffer - // blockiness from source buffer,0) // // I make the assumption that flat blocks are much more visible than high // contrast blocks. As such, I scale the result of the blockiness calc // by dividing the blockiness by the variance of the pixels on either side // of the edge as follows: // var_0 = (q0^2+q1^2+q2^2+q3^2) - ((q0 + q1 + q2 + q3) / 4 )^2 // var_1 = (r0^2+r1^2+r2^2+r3^2) - ((r0 + r1 + r2 + r3) / 4 )^2 // The returned blockiness is the scaled value // Reconstructed blockiness / ( 1 + var_0 + var_1 ) ; static int blockiness_vertical(const uint8_t *s, int sp, const uint8_t *r, int rp, int size) { int s_blockiness = 0; int r_blockiness = 0; int sum_0 = 0; int sum_sq_0 = 0; int sum_1 = 0; int sum_sq_1 = 0; int i; int var_0; int var_1; for (i = 0; i < size; ++i, s += sp, r += rp) { s_blockiness += horizontal_filter(s); r_blockiness += horizontal_filter(r); sum_0 += s[0]; sum_sq_0 += s[0] * s[0]; sum_1 += s[-1]; sum_sq_1 += s[-1] * s[-1]; } var_0 = variance(sum_0, sum_sq_0, size); var_1 = variance(sum_1, sum_sq_1, size); r_blockiness = abs(r_blockiness); s_blockiness = abs(s_blockiness); if (r_blockiness > s_blockiness) return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); else return 0; } // Calculate a blockiness level for a horizontal block edge // same as above. static int blockiness_horizontal(const uint8_t *s, int sp, const uint8_t *r, int rp, int size) { int s_blockiness = 0; int r_blockiness = 0; int sum_0 = 0; int sum_sq_0 = 0; int sum_1 = 0; int sum_sq_1 = 0; int i; int var_0; int var_1; for (i = 0; i < size; ++i, ++s, ++r) { s_blockiness += vertical_filter(s, sp); r_blockiness += vertical_filter(r, rp); sum_0 += s[0]; sum_sq_0 += s[0] * s[0]; sum_1 += s[-sp]; sum_sq_1 += s[-sp] * s[-sp]; } var_0 = variance(sum_0, sum_sq_0, size); var_1 = variance(sum_1, sum_sq_1, size); r_blockiness = abs(r_blockiness); s_blockiness = abs(s_blockiness); if (r_blockiness > s_blockiness) return (r_blockiness - s_blockiness) / (1 + var_0 + var_1); else return 0; } // This function returns the blockiness for the entire frame currently by // looking at all borders in steps of 4. double vp9_get_blockiness(const uint8_t *img1, int img1_pitch, const uint8_t *img2, int img2_pitch, int width, int height) { double blockiness = 0; int i, j; vpx_clear_system_state(); for (i = 0; i < height; i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { for (j = 0; j < width; j += 4) { if (i > 0 && i < height && j > 0 && j < width) { blockiness += blockiness_vertical(img1 + j, img1_pitch, img2 + j, img2_pitch, 4); blockiness += blockiness_horizontal(img1 + j, img1_pitch, img2 + j, img2_pitch, 4); } } } blockiness /= width * height / 16; return blockiness; } libvpx-1.8.2/vp9/encoder/vp9_blockiness.h000066400000000000000000000014431357355204000202740ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ #define VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ #ifdef __cplusplus extern "C" { #endif double vp9_get_blockiness(const uint8_t *img1, int img1_pitch, const uint8_t *img2, int img2_pitch, int width, int height); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_BLOCKINESS_H_ libvpx-1.8.2/vp9/encoder/vp9_context_tree.c000066400000000000000000000125541357355204000206430ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encoder.h" static const BLOCK_SIZE square[] = { BLOCK_8X8, BLOCK_16X16, BLOCK_32X32, BLOCK_64X64, }; static void alloc_mode_context(VP9_COMMON *cm, int num_4x4_blk, PICK_MODE_CONTEXT *ctx) { const int num_blk = (num_4x4_blk < 4 ? 4 : num_4x4_blk); const int num_pix = num_blk << 4; int i, k; ctx->num_4x4_blk = num_blk; CHECK_MEM_ERROR(cm, ctx->zcoeff_blk, vpx_calloc(num_blk, sizeof(uint8_t))); for (i = 0; i < MAX_MB_PLANE; ++i) { for (k = 0; k < 3; ++k) { CHECK_MEM_ERROR(cm, ctx->coeff[i][k], vpx_memalign(32, num_pix * sizeof(*ctx->coeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->qcoeff[i][k], vpx_memalign(32, num_pix * sizeof(*ctx->qcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->dqcoeff[i][k], vpx_memalign(32, num_pix * sizeof(*ctx->dqcoeff[i][k]))); CHECK_MEM_ERROR(cm, ctx->eobs[i][k], vpx_memalign(32, num_blk * sizeof(*ctx->eobs[i][k]))); ctx->coeff_pbuf[i][k] = ctx->coeff[i][k]; ctx->qcoeff_pbuf[i][k] = ctx->qcoeff[i][k]; ctx->dqcoeff_pbuf[i][k] = ctx->dqcoeff[i][k]; ctx->eobs_pbuf[i][k] = ctx->eobs[i][k]; } } } static void free_mode_context(PICK_MODE_CONTEXT *ctx) { int i, k; vpx_free(ctx->zcoeff_blk); ctx->zcoeff_blk = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { for (k = 0; k < 3; ++k) { vpx_free(ctx->coeff[i][k]); ctx->coeff[i][k] = 0; vpx_free(ctx->qcoeff[i][k]); ctx->qcoeff[i][k] = 0; vpx_free(ctx->dqcoeff[i][k]); ctx->dqcoeff[i][k] = 0; vpx_free(ctx->eobs[i][k]); ctx->eobs[i][k] = 0; } } } static void alloc_tree_contexts(VP9_COMMON *cm, PC_TREE *tree, int num_4x4_blk) { alloc_mode_context(cm, num_4x4_blk, &tree->none); alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[0]); alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[0]); if (num_4x4_blk > 4) { alloc_mode_context(cm, num_4x4_blk / 2, &tree->horizontal[1]); alloc_mode_context(cm, num_4x4_blk / 2, &tree->vertical[1]); } else { memset(&tree->horizontal[1], 0, sizeof(tree->horizontal[1])); memset(&tree->vertical[1], 0, sizeof(tree->vertical[1])); } } static void free_tree_contexts(PC_TREE *tree) { free_mode_context(&tree->none); free_mode_context(&tree->horizontal[0]); free_mode_context(&tree->horizontal[1]); free_mode_context(&tree->vertical[0]); free_mode_context(&tree->vertical[1]); } // This function sets up a tree of contexts such that at each square // partition level. There are contexts for none, horizontal, vertical, and // split. Along with a block_size value and a selected block_size which // represents the state of our search. void vp9_setup_pc_tree(VP9_COMMON *cm, ThreadData *td) { int i, j; const int leaf_nodes = 64; const int tree_nodes = 64 + 16 + 4 + 1; int pc_tree_index = 0; PC_TREE *this_pc; PICK_MODE_CONTEXT *this_leaf; int square_index = 1; int nodes; vpx_free(td->leaf_tree); CHECK_MEM_ERROR(cm, td->leaf_tree, vpx_calloc(leaf_nodes, sizeof(*td->leaf_tree))); vpx_free(td->pc_tree); CHECK_MEM_ERROR(cm, td->pc_tree, vpx_calloc(tree_nodes, sizeof(*td->pc_tree))); this_pc = &td->pc_tree[0]; this_leaf = &td->leaf_tree[0]; // 4x4 blocks smaller than 8x8 but in the same 8x8 block share the same // context so we only need to allocate 1 for each 8x8 block. for (i = 0; i < leaf_nodes; ++i) alloc_mode_context(cm, 1, &td->leaf_tree[i]); // Sets up all the leaf nodes in the tree. for (pc_tree_index = 0; pc_tree_index < leaf_nodes; ++pc_tree_index) { PC_TREE *const tree = &td->pc_tree[pc_tree_index]; tree->block_size = square[0]; alloc_tree_contexts(cm, tree, 4); tree->leaf_split[0] = this_leaf++; for (j = 1; j < 4; j++) tree->leaf_split[j] = tree->leaf_split[0]; } // Each node has 4 leaf nodes, fill each block_size level of the tree // from leafs to the root. for (nodes = 16; nodes > 0; nodes >>= 2) { for (i = 0; i < nodes; ++i) { PC_TREE *const tree = &td->pc_tree[pc_tree_index]; alloc_tree_contexts(cm, tree, 4 << (2 * square_index)); tree->block_size = square[square_index]; for (j = 0; j < 4; j++) tree->split[j] = this_pc++; ++pc_tree_index; } ++square_index; } td->pc_root = &td->pc_tree[tree_nodes - 1]; td->pc_root[0].none.best_mode_index = 2; } void vp9_free_pc_tree(ThreadData *td) { int i; if (td == NULL) return; if (td->leaf_tree != NULL) { // Set up all 4x4 mode contexts for (i = 0; i < 64; ++i) free_mode_context(&td->leaf_tree[i]); vpx_free(td->leaf_tree); td->leaf_tree = NULL; } if (td->pc_tree != NULL) { const int tree_nodes = 64 + 16 + 4 + 1; // Sets up all the leaf nodes in the tree. for (i = 0; i < tree_nodes; ++i) free_tree_contexts(&td->pc_tree[i]); vpx_free(td->pc_tree); td->pc_tree = NULL; } } libvpx-1.8.2/vp9/encoder/vp9_context_tree.h000066400000000000000000000057471357355204000206560ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ #define VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" #ifdef __cplusplus extern "C" { #endif struct VP9_COMP; struct VP9Common; struct ThreadData; // Structure to hold snapshot of coding context during the mode picking process typedef struct { MODE_INFO mic; MB_MODE_INFO_EXT mbmi_ext; uint8_t *zcoeff_blk; tran_low_t *coeff[MAX_MB_PLANE][3]; tran_low_t *qcoeff[MAX_MB_PLANE][3]; tran_low_t *dqcoeff[MAX_MB_PLANE][3]; uint16_t *eobs[MAX_MB_PLANE][3]; // dual buffer pointers, 0: in use, 1: best in store tran_low_t *coeff_pbuf[MAX_MB_PLANE][3]; tran_low_t *qcoeff_pbuf[MAX_MB_PLANE][3]; tran_low_t *dqcoeff_pbuf[MAX_MB_PLANE][3]; uint16_t *eobs_pbuf[MAX_MB_PLANE][3]; int is_coded; int num_4x4_blk; int skip; int pred_pixel_ready; // For current partition, only if all Y, U, and V transform blocks' // coefficients are quantized to 0, skippable is set to 0. int skippable; uint8_t skip_txfm[MAX_MB_PLANE << 2]; int best_mode_index; int hybrid_pred_diff; int comp_pred_diff; int single_pred_diff; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; // TODO(jingning) Use RD_COST struct here instead. This involves a boarder // scope of refactoring. int rate; int64_t dist; int64_t rdcost; #if CONFIG_VP9_TEMPORAL_DENOISING unsigned int newmv_sse; unsigned int zeromv_sse; unsigned int zeromv_lastref_sse; PREDICTION_MODE best_sse_inter_mode; int_mv best_sse_mv; MV_REFERENCE_FRAME best_reference_frame; MV_REFERENCE_FRAME best_zeromv_reference_frame; int sb_skip_denoising; #endif // motion vector cache for adaptive motion search control in partition // search loop MV pred_mv[MAX_REF_FRAMES]; INTERP_FILTER pred_interp_filter; // Used for the machine learning-based early termination int32_t sum_y_eobs; // Skip certain ref frames during RD search of rectangular partitions. uint8_t skip_ref_frame_mask; } PICK_MODE_CONTEXT; typedef struct PC_TREE { int index; PARTITION_TYPE partitioning; BLOCK_SIZE block_size; PICK_MODE_CONTEXT none; PICK_MODE_CONTEXT horizontal[2]; PICK_MODE_CONTEXT vertical[2]; union { struct PC_TREE *split[4]; PICK_MODE_CONTEXT *leaf_split[4]; }; // Obtained from a simple motion search. Used by the ML based partition search // speed feature. MV mv; } PC_TREE; void vp9_setup_pc_tree(struct VP9Common *cm, struct ThreadData *td); void vp9_free_pc_tree(struct ThreadData *td); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_CONTEXT_TREE_H_ libvpx-1.8.2/vp9/encoder/vp9_cost.c000066400000000000000000000054651357355204000171130ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/encoder/vp9_cost.h" /* round(-log2(i/256.) * (1 << VP9_PROB_COST_SHIFT)) Begins with a bogus entry for simpler addressing. */ const uint16_t vp9_prob_cost[256] = { 4096, 4096, 3584, 3284, 3072, 2907, 2772, 2659, 2560, 2473, 2395, 2325, 2260, 2201, 2147, 2096, 2048, 2003, 1961, 1921, 1883, 1847, 1813, 1780, 1748, 1718, 1689, 1661, 1635, 1609, 1584, 1559, 1536, 1513, 1491, 1470, 1449, 1429, 1409, 1390, 1371, 1353, 1335, 1318, 1301, 1284, 1268, 1252, 1236, 1221, 1206, 1192, 1177, 1163, 1149, 1136, 1123, 1110, 1097, 1084, 1072, 1059, 1047, 1036, 1024, 1013, 1001, 990, 979, 968, 958, 947, 937, 927, 917, 907, 897, 887, 878, 868, 859, 850, 841, 832, 823, 814, 806, 797, 789, 780, 772, 764, 756, 748, 740, 732, 724, 717, 709, 702, 694, 687, 680, 673, 665, 658, 651, 644, 637, 631, 624, 617, 611, 604, 598, 591, 585, 578, 572, 566, 560, 554, 547, 541, 535, 530, 524, 518, 512, 506, 501, 495, 489, 484, 478, 473, 467, 462, 456, 451, 446, 441, 435, 430, 425, 420, 415, 410, 405, 400, 395, 390, 385, 380, 375, 371, 366, 361, 356, 352, 347, 343, 338, 333, 329, 324, 320, 316, 311, 307, 302, 298, 294, 289, 285, 281, 277, 273, 268, 264, 260, 256, 252, 248, 244, 240, 236, 232, 228, 224, 220, 216, 212, 209, 205, 201, 197, 194, 190, 186, 182, 179, 175, 171, 168, 164, 161, 157, 153, 150, 146, 143, 139, 136, 132, 129, 125, 122, 119, 115, 112, 109, 105, 102, 99, 95, 92, 89, 86, 82, 79, 76, 73, 70, 66, 63, 60, 57, 54, 51, 48, 45, 42, 38, 35, 32, 29, 26, 23, 20, 18, 15, 12, 9, 6, 3 }; static void cost(int *costs, vpx_tree tree, const vpx_prob *probs, int i, int c) { const vpx_prob prob = probs[i / 2]; int b; assert(prob != 0); for (b = 0; b <= 1; ++b) { const int cc = c + vp9_cost_bit(prob, b); const vpx_tree_index ii = tree[i + b]; if (ii <= 0) costs[-ii] = cc; else cost(costs, tree, probs, ii, cc); } } void vp9_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree) { cost(costs, tree, probs, 0, 0); } void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree) { assert(tree[0] <= 0 && tree[1] > 0); costs[-tree[0]] = vp9_cost_bit(probs[0], 0); cost(costs, tree, probs, 2, 0); } libvpx-1.8.2/vp9/encoder/vp9_cost.h000066400000000000000000000031551357355204000171120ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_COST_H_ #define VPX_VP9_ENCODER_VP9_COST_H_ #include "vpx_dsp/prob.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif extern const uint16_t vp9_prob_cost[256]; // The factor to scale from cost in bits to cost in vp9_prob_cost units. #define VP9_PROB_COST_SHIFT 9 #define vp9_cost_zero(prob) (vp9_prob_cost[prob]) #define vp9_cost_one(prob) vp9_cost_zero(256 - (prob)) #define vp9_cost_bit(prob, bit) vp9_cost_zero((bit) ? 256 - (prob) : (prob)) static INLINE unsigned int cost_branch256(const unsigned int ct[2], vpx_prob p) { return ct[0] * vp9_cost_zero(p) + ct[1] * vp9_cost_one(p); } static INLINE int treed_cost(vpx_tree tree, const vpx_prob *probs, int bits, int len) { int cost = 0; vpx_tree_index i = 0; do { const int bit = (bits >> --len) & 1; cost += vp9_cost_bit(probs[i >> 1], bit); i = tree[i + bit]; } while (len); return cost; } void vp9_cost_tokens(int *costs, const vpx_prob *probs, vpx_tree tree); void vp9_cost_tokens_skip(int *costs, const vpx_prob *probs, vpx_tree tree); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_COST_H_ libvpx-1.8.2/vp9/encoder/vp9_dct.c000066400000000000000000000464371357355204000167210ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_idct.h" #include "vpx_dsp/fwd_txfm.h" #include "vpx_ports/mem.h" static void fdct4(const tran_low_t *input, tran_low_t *output) { tran_high_t step[4]; tran_high_t temp1, temp2; step[0] = input[0] + input[3]; step[1] = input[1] + input[2]; step[2] = input[1] - input[2]; step[3] = input[0] - input[3]; temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; output[0] = (tran_low_t)fdct_round_shift(temp1); output[2] = (tran_low_t)fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; output[1] = (tran_low_t)fdct_round_shift(temp1); output[3] = (tran_low_t)fdct_round_shift(temp2); } static void fdct8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t t0, t1, t2, t3; // needs32 tran_high_t x0, x1, x2, x3; // canbe16 // stage 1 s0 = input[0] + input[7]; s1 = input[1] + input[6]; s2 = input[2] + input[5]; s3 = input[3] + input[4]; s4 = input[3] - input[4]; s5 = input[2] - input[5]; s6 = input[1] - input[6]; s7 = input[0] - input[7]; // fdct4(step, step); x0 = s0 + s3; x1 = s1 + s2; x2 = s1 - s2; x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; output[0] = (tran_low_t)fdct_round_shift(t0); output[2] = (tran_low_t)fdct_round_shift(t2); output[4] = (tran_low_t)fdct_round_shift(t1); output[6] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; t2 = (tran_low_t)fdct_round_shift(t0); t3 = (tran_low_t)fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; x1 = s4 - t2; x2 = s7 - t3; x3 = s7 + t3; // Stage 4 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; output[1] = (tran_low_t)fdct_round_shift(t0); output[3] = (tran_low_t)fdct_round_shift(t2); output[5] = (tran_low_t)fdct_round_shift(t1); output[7] = (tran_low_t)fdct_round_shift(t3); } static void fdct16(const tran_low_t in[16], tran_low_t out[16]) { tran_high_t step1[8]; // canbe16 tran_high_t step2[8]; // canbe16 tran_high_t step3[8]; // canbe16 tran_high_t input[8]; // canbe16 tran_high_t temp1, temp2; // needs32 // step 1 input[0] = in[0] + in[15]; input[1] = in[1] + in[14]; input[2] = in[2] + in[13]; input[3] = in[3] + in[12]; input[4] = in[4] + in[11]; input[5] = in[5] + in[10]; input[6] = in[6] + in[9]; input[7] = in[7] + in[8]; step1[0] = in[7] - in[8]; step1[1] = in[6] - in[9]; step1[2] = in[5] - in[10]; step1[3] = in[4] - in[11]; step1[4] = in[3] - in[12]; step1[5] = in[2] - in[13]; step1[6] = in[1] - in[14]; step1[7] = in[0] - in[15]; // fdct8(step, step); { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t t0, t1, t2, t3; // needs32 tran_high_t x0, x1, x2, x3; // canbe16 // stage 1 s0 = input[0] + input[7]; s1 = input[1] + input[6]; s2 = input[2] + input[5]; s3 = input[3] + input[4]; s4 = input[3] - input[4]; s5 = input[2] - input[5]; s6 = input[1] - input[6]; s7 = input[0] - input[7]; // fdct4(step, step); x0 = s0 + s3; x1 = s1 + s2; x2 = s1 - s2; x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; out[0] = (tran_low_t)fdct_round_shift(t0); out[4] = (tran_low_t)fdct_round_shift(t2); out[8] = (tran_low_t)fdct_round_shift(t1); out[12] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; t2 = fdct_round_shift(t0); t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; x1 = s4 - t2; x2 = s7 - t3; x3 = s7 + t3; // Stage 4 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; out[2] = (tran_low_t)fdct_round_shift(t0); out[6] = (tran_low_t)fdct_round_shift(t2); out[10] = (tran_low_t)fdct_round_shift(t1); out[14] = (tran_low_t)fdct_round_shift(t3); } // step 2 temp1 = (step1[5] - step1[2]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64; step2[2] = fdct_round_shift(temp1); step2[3] = fdct_round_shift(temp2); temp1 = (step1[4] + step1[3]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64; step2[4] = fdct_round_shift(temp1); step2[5] = fdct_round_shift(temp2); // step 3 step3[0] = step1[0] + step2[3]; step3[1] = step1[1] + step2[2]; step3[2] = step1[1] - step2[2]; step3[3] = step1[0] - step2[3]; step3[4] = step1[7] - step2[4]; step3[5] = step1[6] - step2[5]; step3[6] = step1[6] + step2[5]; step3[7] = step1[7] + step2[4]; // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; step2[1] = fdct_round_shift(temp1); step2[2] = fdct_round_shift(temp2); temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; step2[5] = fdct_round_shift(temp1); step2[6] = fdct_round_shift(temp2); // step 5 step1[0] = step3[0] + step2[1]; step1[1] = step3[0] - step2[1]; step1[2] = step3[3] + step2[2]; step1[3] = step3[3] - step2[2]; step1[4] = step3[4] - step2[5]; step1[5] = step3[4] + step2[5]; step1[6] = step3[7] - step2[6]; step1[7] = step3[7] + step2[6]; // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; out[1] = (tran_low_t)fdct_round_shift(temp1); out[9] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; out[5] = (tran_low_t)fdct_round_shift(temp1); out[13] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; out[3] = (tran_low_t)fdct_round_shift(temp1); out[11] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; out[7] = (tran_low_t)fdct_round_shift(temp1); out[15] = (tran_low_t)fdct_round_shift(temp2); } static void fadst4(const tran_low_t *input, tran_low_t *output) { tran_high_t x0, x1, x2, x3; tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; x0 = input[0]; x1 = input[1]; x2 = input[2]; x3 = input[3]; if (!(x0 | x1 | x2 | x3)) { output[0] = output[1] = output[2] = output[3] = 0; return; } s0 = sinpi_1_9 * x0; s1 = sinpi_4_9 * x0; s2 = sinpi_2_9 * x1; s3 = sinpi_1_9 * x1; s4 = sinpi_3_9 * x2; s5 = sinpi_4_9 * x3; s6 = sinpi_2_9 * x3; s7 = x0 + x1 - x3; x0 = s0 + s2 + s5; x1 = sinpi_3_9 * s7; x2 = s1 - s3 + s6; x3 = s4; s0 = x0 + x3; s1 = x1; s2 = x2 - x3; s3 = x2 - x0 + x3; // 1-D transform scaling factor is sqrt(2). output[0] = (tran_low_t)fdct_round_shift(s0); output[1] = (tran_low_t)fdct_round_shift(s1); output[2] = (tran_low_t)fdct_round_shift(s2); output[3] = (tran_low_t)fdct_round_shift(s3); } static void fadst8(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_high_t x0 = input[7]; tran_high_t x1 = input[0]; tran_high_t x2 = input[5]; tran_high_t x3 = input[2]; tran_high_t x4 = input[3]; tran_high_t x5 = input[4]; tran_high_t x6 = input[1]; tran_high_t x7 = input[6]; // stage 1 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; s1 = cospi_30_64 * x0 - cospi_2_64 * x1; s2 = cospi_10_64 * x2 + cospi_22_64 * x3; s3 = cospi_22_64 * x2 - cospi_10_64 * x3; s4 = cospi_18_64 * x4 + cospi_14_64 * x5; s5 = cospi_14_64 * x4 - cospi_18_64 * x5; s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; x0 = fdct_round_shift(s0 + s4); x1 = fdct_round_shift(s1 + s5); x2 = fdct_round_shift(s2 + s6); x3 = fdct_round_shift(s3 + s7); x4 = fdct_round_shift(s0 - s4); x5 = fdct_round_shift(s1 - s5); x6 = fdct_round_shift(s2 - s6); x7 = fdct_round_shift(s3 - s7); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = cospi_8_64 * x4 + cospi_24_64 * x5; s5 = cospi_24_64 * x4 - cospi_8_64 * x5; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; s7 = cospi_8_64 * x6 + cospi_24_64 * x7; x0 = s0 + s2; x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; x4 = fdct_round_shift(s4 + s6); x5 = fdct_round_shift(s5 + s7); x6 = fdct_round_shift(s4 - s6); x7 = fdct_round_shift(s5 - s7); // stage 3 s2 = cospi_16_64 * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (x6 - x7); x2 = fdct_round_shift(s2); x3 = fdct_round_shift(s3); x6 = fdct_round_shift(s6); x7 = fdct_round_shift(s7); output[0] = (tran_low_t)x0; output[1] = (tran_low_t)-x4; output[2] = (tran_low_t)x6; output[3] = (tran_low_t)-x2; output[4] = (tran_low_t)x3; output[5] = (tran_low_t)-x7; output[6] = (tran_low_t)x5; output[7] = (tran_low_t)-x1; } static void fadst16(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; tran_high_t x0 = input[15]; tran_high_t x1 = input[0]; tran_high_t x2 = input[13]; tran_high_t x3 = input[2]; tran_high_t x4 = input[11]; tran_high_t x5 = input[4]; tran_high_t x6 = input[9]; tran_high_t x7 = input[6]; tran_high_t x8 = input[7]; tran_high_t x9 = input[8]; tran_high_t x10 = input[5]; tran_high_t x11 = input[10]; tran_high_t x12 = input[3]; tran_high_t x13 = input[12]; tran_high_t x14 = input[1]; tran_high_t x15 = input[14]; // stage 1 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; s1 = x0 * cospi_31_64 - x1 * cospi_1_64; s2 = x2 * cospi_5_64 + x3 * cospi_27_64; s3 = x2 * cospi_27_64 - x3 * cospi_5_64; s4 = x4 * cospi_9_64 + x5 * cospi_23_64; s5 = x4 * cospi_23_64 - x5 * cospi_9_64; s6 = x6 * cospi_13_64 + x7 * cospi_19_64; s7 = x6 * cospi_19_64 - x7 * cospi_13_64; s8 = x8 * cospi_17_64 + x9 * cospi_15_64; s9 = x8 * cospi_15_64 - x9 * cospi_17_64; s10 = x10 * cospi_21_64 + x11 * cospi_11_64; s11 = x10 * cospi_11_64 - x11 * cospi_21_64; s12 = x12 * cospi_25_64 + x13 * cospi_7_64; s13 = x12 * cospi_7_64 - x13 * cospi_25_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; x0 = fdct_round_shift(s0 + s8); x1 = fdct_round_shift(s1 + s9); x2 = fdct_round_shift(s2 + s10); x3 = fdct_round_shift(s3 + s11); x4 = fdct_round_shift(s4 + s12); x5 = fdct_round_shift(s5 + s13); x6 = fdct_round_shift(s6 + s14); x7 = fdct_round_shift(s7 + s15); x8 = fdct_round_shift(s0 - s8); x9 = fdct_round_shift(s1 - s9); x10 = fdct_round_shift(s2 - s10); x11 = fdct_round_shift(s3 - s11); x12 = fdct_round_shift(s4 - s12); x13 = fdct_round_shift(s5 - s13); x14 = fdct_round_shift(s6 - s14); x15 = fdct_round_shift(s7 - s15); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4; s5 = x5; s6 = x6; s7 = x7; s8 = x8 * cospi_4_64 + x9 * cospi_28_64; s9 = x8 * cospi_28_64 - x9 * cospi_4_64; s10 = x10 * cospi_20_64 + x11 * cospi_12_64; s11 = x10 * cospi_12_64 - x11 * cospi_20_64; s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; s13 = x12 * cospi_4_64 + x13 * cospi_28_64; s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; x0 = s0 + s4; x1 = s1 + s5; x2 = s2 + s6; x3 = s3 + s7; x4 = s0 - s4; x5 = s1 - s5; x6 = s2 - s6; x7 = s3 - s7; x8 = fdct_round_shift(s8 + s12); x9 = fdct_round_shift(s9 + s13); x10 = fdct_round_shift(s10 + s14); x11 = fdct_round_shift(s11 + s15); x12 = fdct_round_shift(s8 - s12); x13 = fdct_round_shift(s9 - s13); x14 = fdct_round_shift(s10 - s14); x15 = fdct_round_shift(s11 - s15); // stage 3 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4 * cospi_8_64 + x5 * cospi_24_64; s5 = x4 * cospi_24_64 - x5 * cospi_8_64; s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; s7 = x6 * cospi_8_64 + x7 * cospi_24_64; s8 = x8; s9 = x9; s10 = x10; s11 = x11; s12 = x12 * cospi_8_64 + x13 * cospi_24_64; s13 = x12 * cospi_24_64 - x13 * cospi_8_64; s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; x0 = s0 + s2; x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; x4 = fdct_round_shift(s4 + s6); x5 = fdct_round_shift(s5 + s7); x6 = fdct_round_shift(s4 - s6); x7 = fdct_round_shift(s5 - s7); x8 = s8 + s10; x9 = s9 + s11; x10 = s8 - s10; x11 = s9 - s11; x12 = fdct_round_shift(s12 + s14); x13 = fdct_round_shift(s13 + s15); x14 = fdct_round_shift(s12 - s14); x15 = fdct_round_shift(s13 - s15); // stage 4 s2 = (-cospi_16_64) * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (-x6 + x7); s10 = cospi_16_64 * (x10 + x11); s11 = cospi_16_64 * (-x10 + x11); s14 = (-cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); x2 = fdct_round_shift(s2); x3 = fdct_round_shift(s3); x6 = fdct_round_shift(s6); x7 = fdct_round_shift(s7); x10 = fdct_round_shift(s10); x11 = fdct_round_shift(s11); x14 = fdct_round_shift(s14); x15 = fdct_round_shift(s15); output[0] = (tran_low_t)x0; output[1] = (tran_low_t)-x8; output[2] = (tran_low_t)x12; output[3] = (tran_low_t)-x4; output[4] = (tran_low_t)x6; output[5] = (tran_low_t)x14; output[6] = (tran_low_t)x10; output[7] = (tran_low_t)x2; output[8] = (tran_low_t)x3; output[9] = (tran_low_t)x11; output[10] = (tran_low_t)x15; output[11] = (tran_low_t)x7; output[12] = (tran_low_t)x5; output[13] = (tran_low_t)-x13; output[14] = (tran_low_t)x9; output[15] = (tran_low_t)-x1; } static const transform_2d FHT_4[] = { { fdct4, fdct4 }, // DCT_DCT = 0 { fadst4, fdct4 }, // ADST_DCT = 1 { fdct4, fadst4 }, // DCT_ADST = 2 { fadst4, fadst4 } // ADST_ADST = 3 }; static const transform_2d FHT_8[] = { { fdct8, fdct8 }, // DCT_DCT = 0 { fadst8, fdct8 }, // ADST_DCT = 1 { fdct8, fadst8 }, // DCT_ADST = 2 { fadst8, fadst8 } // ADST_ADST = 3 }; static const transform_2d FHT_16[] = { { fdct16, fdct16 }, // DCT_DCT = 0 { fadst16, fdct16 }, // ADST_DCT = 1 { fdct16, fadst16 }, // DCT_ADST = 2 { fadst16, fadst16 } // ADST_ADST = 3 }; void vp9_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct4x4_c(input, output, stride); } else { tran_low_t out[4 * 4]; int i, j; tran_low_t temp_in[4], temp_out[4]; const transform_2d ht = FHT_4[tx_type]; // Columns for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = input[j * stride + i] * 16; if (i == 0 && temp_in[0]) temp_in[0] += 1; ht.cols(temp_in, temp_out); for (j = 0; j < 4; ++j) out[j * 4 + i] = temp_out[j]; } // Rows for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j + i * 4]; ht.rows(temp_in, temp_out); for (j = 0; j < 4; ++j) output[j + i * 4] = (temp_out[j] + 1) >> 2; } } } void vp9_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct8x8_c(input, output, stride); } else { tran_low_t out[64]; int i, j; tran_low_t temp_in[8], temp_out[8]; const transform_2d ht = FHT_8[tx_type]; // Columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = input[j * stride + i] * 4; ht.cols(temp_in, temp_out); for (j = 0; j < 8; ++j) out[j * 8 + i] = temp_out[j]; } // Rows for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j + i * 8]; ht.rows(temp_in, temp_out); for (j = 0; j < 8; ++j) output[j + i * 8] = (temp_out[j] + (temp_out[j] < 0)) >> 1; } } } /* 4-point reversible, orthonormal Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ void vp9_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { int i; tran_high_t a1, b1, c1, d1, e1; const int16_t *ip_pass0 = input; const tran_low_t *ip = NULL; tran_low_t *op = output; for (i = 0; i < 4; i++) { a1 = ip_pass0[0 * stride]; b1 = ip_pass0[1 * stride]; c1 = ip_pass0[2 * stride]; d1 = ip_pass0[3 * stride]; a1 += b1; d1 = d1 - c1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= c1; d1 += b1; op[0] = (tran_low_t)a1; op[4] = (tran_low_t)c1; op[8] = (tran_low_t)d1; op[12] = (tran_low_t)b1; ip_pass0++; op++; } ip = output; op = output; for (i = 0; i < 4; i++) { a1 = ip[0]; b1 = ip[1]; c1 = ip[2]; d1 = ip[3]; a1 += b1; d1 -= c1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= c1; d1 += b1; op[0] = (tran_low_t)(a1 * UNIT_QUANT_FACTOR); op[1] = (tran_low_t)(c1 * UNIT_QUANT_FACTOR); op[2] = (tran_low_t)(d1 * UNIT_QUANT_FACTOR); op[3] = (tran_low_t)(b1 * UNIT_QUANT_FACTOR); ip += 4; op += 4; } } void vp9_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { if (tx_type == DCT_DCT) { vpx_fdct16x16_c(input, output, stride); } else { tran_low_t out[256]; int i, j; tran_low_t temp_in[16], temp_out[16]; const transform_2d ht = FHT_16[tx_type]; // Columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = input[j * stride + i] * 4; ht.cols(temp_in, temp_out); for (j = 0; j < 16; ++j) out[j * 16 + i] = (temp_out[j] + 1 + (temp_out[j] < 0)) >> 2; } // Rows for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j + i * 16]; ht.rows(temp_in, temp_out); for (j = 0; j < 16; ++j) output[j + i * 16] = temp_out[j]; } } } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_fht4x4_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { vp9_fht4x4_c(input, output, stride, tx_type); } void vp9_highbd_fht8x8_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { vp9_fht8x8_c(input, output, stride, tx_type); } void vp9_highbd_fwht4x4_c(const int16_t *input, tran_low_t *output, int stride) { vp9_fwht4x4_c(input, output, stride); } void vp9_highbd_fht16x16_c(const int16_t *input, tran_low_t *output, int stride, int tx_type) { vp9_fht16x16_c(input, output, stride, tx_type); } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vp9/encoder/vp9_denoiser.c000066400000000000000000000750141357355204000177500ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" #include "vp9/encoder/vp9_encoder.h" #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv); #endif static int absdiff_thresh(BLOCK_SIZE bs, int increase_denoising) { (void)bs; return 3 + (increase_denoising ? 1 : 0); } static int delta_thresh(BLOCK_SIZE bs, int increase_denoising) { (void)bs; (void)increase_denoising; return 4; } static int noise_motion_thresh(BLOCK_SIZE bs, int increase_denoising) { (void)bs; (void)increase_denoising; return 625; } static unsigned int sse_thresh(BLOCK_SIZE bs, int increase_denoising) { return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 80 : 40); } static int sse_diff_thresh(BLOCK_SIZE bs, int increase_denoising, int motion_magnitude) { if (motion_magnitude > noise_motion_thresh(bs, increase_denoising)) { if (increase_denoising) return (1 << num_pels_log2_lookup[bs]) << 2; else return 0; } else { return (1 << num_pels_log2_lookup[bs]) << 4; } } static int total_adj_weak_thresh(BLOCK_SIZE bs, int increase_denoising) { return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2); } // TODO(jackychen): If increase_denoising is enabled in the future, // we might need to update the code for calculating 'total_adj' in // case the C code is not bit-exact with corresponding sse2 code. int vp9_denoiser_filter_c(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { int r, c; const uint8_t *sig_start = sig; const uint8_t *mc_avg_start = mc_avg; uint8_t *avg_start = avg; int diff, adj, absdiff, delta; int adj_val[] = { 3, 4, 6 }; int total_adj = 0; int shift_inc = 1; // If motion_magnitude is small, making the denoiser more aggressive by // increasing the adjustment for each level. Add another increment for // blocks that are labeled for increase denoising. if (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) { if (increase_denoising) { shift_inc = 2; } adj_val[0] += shift_inc; adj_val[1] += shift_inc; adj_val[2] += shift_inc; } // First attempt to apply a strong temporal denoising filter. for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) { diff = mc_avg[c] - sig[c]; absdiff = abs(diff); if (absdiff <= absdiff_thresh(bs, increase_denoising)) { avg[c] = mc_avg[c]; total_adj += diff; } else { switch (absdiff) { case 4: case 5: case 6: case 7: adj = adj_val[0]; break; case 8: case 9: case 10: case 11: case 12: case 13: case 14: case 15: adj = adj_val[1]; break; default: adj = adj_val[2]; } if (diff > 0) { avg[c] = VPXMIN(UINT8_MAX, sig[c] + adj); total_adj += adj; } else { avg[c] = VPXMAX(0, sig[c] - adj); total_adj -= adj; } } } sig += sig_stride; avg += avg_stride; mc_avg += mc_avg_stride; } // If the strong filter did not modify the signal too much, we're all set. if (abs(total_adj) <= total_adj_strong_thresh(bs, increase_denoising)) { return FILTER_BLOCK; } // Otherwise, we try to dampen the filter if the delta is not too high. delta = ((abs(total_adj) - total_adj_strong_thresh(bs, increase_denoising)) >> num_pels_log2_lookup[bs]) + 1; if (delta >= delta_thresh(bs, increase_denoising)) { return COPY_BLOCK; } mc_avg = mc_avg_start; avg = avg_start; sig = sig_start; for (r = 0; r < (4 << b_height_log2_lookup[bs]); ++r) { for (c = 0; c < (4 << b_width_log2_lookup[bs]); ++c) { diff = mc_avg[c] - sig[c]; adj = abs(diff); if (adj > delta) { adj = delta; } if (diff > 0) { // Diff positive means we made positive adjustment above // (in first try/attempt), so now make negative adjustment to bring // denoised signal down. avg[c] = VPXMAX(0, avg[c] - adj); total_adj -= adj; } else { // Diff negative means we made negative adjustment above // (in first try/attempt), so now make positive adjustment to bring // denoised signal up. avg[c] = VPXMIN(UINT8_MAX, avg[c] + adj); total_adj += adj; } } sig += sig_stride; avg += avg_stride; mc_avg += mc_avg_stride; } // We can use the filter if it has been sufficiently dampened if (abs(total_adj) <= total_adj_weak_thresh(bs, increase_denoising)) { return FILTER_BLOCK; } return COPY_BLOCK; } static uint8_t *block_start(uint8_t *framebuf, int stride, int mi_row, int mi_col) { return framebuf + (stride * mi_row << 3) + (mi_col << 3); } static VP9_DENOISER_DECISION perform_motion_compensation( VP9_COMMON *const cm, VP9_DENOISER *denoiser, MACROBLOCK *mb, BLOCK_SIZE bs, int increase_denoising, int mi_row, int mi_col, PICK_MODE_CONTEXT *ctx, int motion_magnitude, int is_skin, int *zeromv_filter, int consec_zeromv, int num_spatial_layers, int width, int lst_fb_idx, int gld_fb_idx, int use_svc, int spatial_layer, int use_gf_temporal_ref) { const int sse_diff = (ctx->newmv_sse == UINT_MAX) ? 0 : ((int)ctx->zeromv_sse - (int)ctx->newmv_sse); int frame; int denoise_layer_idx = 0; MACROBLOCKD *filter_mbd = &mb->e_mbd; MODE_INFO *mi = filter_mbd->mi[0]; MODE_INFO saved_mi; int i; struct buf_2d saved_dst[MAX_MB_PLANE]; struct buf_2d saved_pre[MAX_MB_PLANE]; const RefBuffer *saved_block_refs[2]; MV_REFERENCE_FRAME saved_frame; frame = ctx->best_reference_frame; saved_mi = *mi; if (is_skin && (motion_magnitude > 0 || consec_zeromv < 4)) return COPY_BLOCK; // Avoid denoising small blocks. When noise > kDenLow or frame width > 480, // denoise 16x16 blocks. if (bs == BLOCK_8X8 || bs == BLOCK_8X16 || bs == BLOCK_16X8 || (bs == BLOCK_16X16 && width > 480 && denoiser->denoising_level <= kDenLow)) return COPY_BLOCK; // If the best reference frame uses inter-prediction and there is enough of a // difference in sum-squared-error, use it. if (frame != INTRA_FRAME && frame != ALTREF_FRAME && frame != GOLDEN_FRAME && sse_diff > sse_diff_thresh(bs, increase_denoising, motion_magnitude)) { mi->ref_frame[0] = ctx->best_reference_frame; mi->mode = ctx->best_sse_inter_mode; mi->mv[0] = ctx->best_sse_mv; } else { // Otherwise, use the zero reference frame. frame = ctx->best_zeromv_reference_frame; ctx->newmv_sse = ctx->zeromv_sse; // Bias to last reference. if ((num_spatial_layers > 1 && !use_gf_temporal_ref) || frame == ALTREF_FRAME || (frame == GOLDEN_FRAME && use_gf_temporal_ref) || (frame != LAST_FRAME && ((ctx->zeromv_lastref_sse<(5 * ctx->zeromv_sse)>> 2) || denoiser->denoising_level >= kDenHigh))) { frame = LAST_FRAME; ctx->newmv_sse = ctx->zeromv_lastref_sse; } mi->ref_frame[0] = frame; mi->mode = ZEROMV; mi->mv[0].as_int = 0; ctx->best_sse_inter_mode = ZEROMV; ctx->best_sse_mv.as_int = 0; *zeromv_filter = 1; if (denoiser->denoising_level > kDenMedium) { motion_magnitude = 0; } } saved_frame = frame; // When using SVC, we need to map REF_FRAME to the frame buffer index. if (use_svc) { if (frame == LAST_FRAME) frame = lst_fb_idx + 1; else if (frame == GOLDEN_FRAME) frame = gld_fb_idx + 1; // Shift for the second spatial layer. if (num_spatial_layers - spatial_layer == 2) frame = frame + denoiser->num_ref_frames; denoise_layer_idx = num_spatial_layers - spatial_layer - 1; } // Force copy (no denoise, copy source in denoised buffer) if // running_avg_y[frame] is NULL. if (denoiser->running_avg_y[frame].buffer_alloc == NULL) { // Restore everything to its original state *mi = saved_mi; return COPY_BLOCK; } if (ctx->newmv_sse > sse_thresh(bs, increase_denoising)) { // Restore everything to its original state *mi = saved_mi; return COPY_BLOCK; } if (motion_magnitude > (noise_motion_thresh(bs, increase_denoising) << 3)) { // Restore everything to its original state *mi = saved_mi; return COPY_BLOCK; } // We will restore these after motion compensation. for (i = 0; i < MAX_MB_PLANE; ++i) { saved_pre[i] = filter_mbd->plane[i].pre[0]; saved_dst[i] = filter_mbd->plane[i].dst; } saved_block_refs[0] = filter_mbd->block_refs[0]; // Set the pointers in the MACROBLOCKD to point to the buffers in the denoiser // struct. filter_mbd->plane[0].pre[0].buf = block_start(denoiser->running_avg_y[frame].y_buffer, denoiser->running_avg_y[frame].y_stride, mi_row, mi_col); filter_mbd->plane[0].pre[0].stride = denoiser->running_avg_y[frame].y_stride; filter_mbd->plane[1].pre[0].buf = block_start(denoiser->running_avg_y[frame].u_buffer, denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col); filter_mbd->plane[1].pre[0].stride = denoiser->running_avg_y[frame].uv_stride; filter_mbd->plane[2].pre[0].buf = block_start(denoiser->running_avg_y[frame].v_buffer, denoiser->running_avg_y[frame].uv_stride, mi_row, mi_col); filter_mbd->plane[2].pre[0].stride = denoiser->running_avg_y[frame].uv_stride; filter_mbd->plane[0].dst.buf = block_start( denoiser->mc_running_avg_y[denoise_layer_idx].y_buffer, denoiser->mc_running_avg_y[denoise_layer_idx].y_stride, mi_row, mi_col); filter_mbd->plane[0].dst.stride = denoiser->mc_running_avg_y[denoise_layer_idx].y_stride; filter_mbd->plane[1].dst.buf = block_start( denoiser->mc_running_avg_y[denoise_layer_idx].u_buffer, denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col); filter_mbd->plane[1].dst.stride = denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride; filter_mbd->plane[2].dst.buf = block_start( denoiser->mc_running_avg_y[denoise_layer_idx].v_buffer, denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride, mi_row, mi_col); filter_mbd->plane[2].dst.stride = denoiser->mc_running_avg_y[denoise_layer_idx].uv_stride; set_ref_ptrs(cm, filter_mbd, saved_frame, NONE); vp9_build_inter_predictors_sby(filter_mbd, mi_row, mi_col, bs); // Restore everything to its original state *mi = saved_mi; filter_mbd->block_refs[0] = saved_block_refs[0]; for (i = 0; i < MAX_MB_PLANE; ++i) { filter_mbd->plane[i].pre[0] = saved_pre[i]; filter_mbd->plane[i].dst = saved_dst[i]; } return FILTER_BLOCK; } void vp9_denoiser_denoise(VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, VP9_DENOISER_DECISION *denoiser_decision, int use_gf_temporal_ref) { int mv_col, mv_row; int motion_magnitude = 0; int zeromv_filter = 0; VP9_DENOISER *denoiser = &cpi->denoiser; VP9_DENOISER_DECISION decision = COPY_BLOCK; const int shift = cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id == 2 ? denoiser->num_ref_frames : 0; YV12_BUFFER_CONFIG avg = denoiser->running_avg_y[INTRA_FRAME + shift]; const int denoise_layer_index = cpi->svc.number_spatial_layers - cpi->svc.spatial_layer_id - 1; YV12_BUFFER_CONFIG mc_avg = denoiser->mc_running_avg_y[denoise_layer_index]; uint8_t *avg_start = block_start(avg.y_buffer, avg.y_stride, mi_row, mi_col); uint8_t *mc_avg_start = block_start(mc_avg.y_buffer, mc_avg.y_stride, mi_row, mi_col); struct buf_2d src = mb->plane[0].src; int is_skin = 0; int increase_denoising = 0; int consec_zeromv = 0; int last_is_reference = cpi->ref_frame_flags & VP9_LAST_FLAG; mv_col = ctx->best_sse_mv.as_mv.col; mv_row = ctx->best_sse_mv.as_mv.row; motion_magnitude = mv_row * mv_row + mv_col * mv_col; if (cpi->use_skin_detection && bs <= BLOCK_32X32 && denoiser->denoising_level < kDenHigh) { int motion_level = (motion_magnitude < 16) ? 0 : 1; // If motion for current block is small/zero, compute consec_zeromv for // skin detection (early exit in skin detection is done for large // consec_zeromv when current block has small/zero motion). consec_zeromv = 0; if (motion_level == 0) { VP9_COMMON *const cm = &cpi->common; int j, i; // Loop through the 8x8 sub-blocks. const int bw = num_8x8_blocks_wide_lookup[bs]; const int bh = num_8x8_blocks_high_lookup[bs]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; consec_zeromv = 100; for (i = 0; i < ymis; i++) { for (j = 0; j < xmis; j++) { int bl_index = block_index + i * cm->mi_cols + j; consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], consec_zeromv); // No need to keep checking 8x8 blocks if any of the sub-blocks // has small consec_zeromv (since threshold for no_skin based on // zero/small motion in skin detection is high, i.e, > 4). if (consec_zeromv < 4) { i = ymis; break; } } } } // TODO(marpan): Compute skin detection over sub-blocks. is_skin = vp9_compute_skin_block( mb->plane[0].src.buf, mb->plane[1].src.buf, mb->plane[2].src.buf, mb->plane[0].src.stride, mb->plane[1].src.stride, bs, consec_zeromv, motion_level); } if (!is_skin && denoiser->denoising_level == kDenHigh) increase_denoising = 1; // Copy block if LAST_FRAME is not a reference. // Last doesn't always exist when SVC layers are dynamically changed, e.g. top // spatial layer doesn't have last reference when it's brought up for the // first time on the fly. if (last_is_reference && denoiser->denoising_level >= kDenLow && !ctx->sb_skip_denoising) decision = perform_motion_compensation( &cpi->common, denoiser, mb, bs, increase_denoising, mi_row, mi_col, ctx, motion_magnitude, is_skin, &zeromv_filter, consec_zeromv, cpi->svc.number_spatial_layers, cpi->Source->y_width, cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->use_svc, cpi->svc.spatial_layer_id, use_gf_temporal_ref); if (decision == FILTER_BLOCK) { decision = vp9_denoiser_filter(src.buf, src.stride, mc_avg_start, mc_avg.y_stride, avg_start, avg.y_stride, increase_denoising, bs, motion_magnitude); } if (decision == FILTER_BLOCK) { vpx_convolve_copy(avg_start, avg.y_stride, src.buf, src.stride, NULL, 0, 0, 0, 0, num_4x4_blocks_wide_lookup[bs] << 2, num_4x4_blocks_high_lookup[bs] << 2); } else { // COPY_BLOCK vpx_convolve_copy(src.buf, src.stride, avg_start, avg.y_stride, NULL, 0, 0, 0, 0, num_4x4_blocks_wide_lookup[bs] << 2, num_4x4_blocks_high_lookup[bs] << 2); } *denoiser_decision = decision; if (decision == FILTER_BLOCK && zeromv_filter == 1) *denoiser_decision = FILTER_ZEROMV_BLOCK; } static void copy_frame(YV12_BUFFER_CONFIG *const dest, const YV12_BUFFER_CONFIG *const src) { int r; const uint8_t *srcbuf = src->y_buffer; uint8_t *destbuf = dest->y_buffer; assert(dest->y_width == src->y_width); assert(dest->y_height == src->y_height); for (r = 0; r < dest->y_height; ++r) { memcpy(destbuf, srcbuf, dest->y_width); destbuf += dest->y_stride; srcbuf += src->y_stride; } } static void swap_frame_buffer(YV12_BUFFER_CONFIG *const dest, YV12_BUFFER_CONFIG *const src) { uint8_t *tmp_buf = dest->y_buffer; assert(dest->y_width == src->y_width); assert(dest->y_height == src->y_height); dest->y_buffer = src->y_buffer; src->y_buffer = tmp_buf; } void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer) { const int shift = second_spatial_layer ? denoiser->num_ref_frames : 0; // Copy source into denoised reference buffers on KEY_FRAME or // if the just encoded frame was resized. For SVC, copy source if the base // spatial layer was key frame. if (frame_type == KEY_FRAME || resized != 0 || denoiser->reset || svc_refresh_denoiser_buffers) { int i; // Start at 1 so as not to overwrite the INTRA_FRAME for (i = 1; i < denoiser->num_ref_frames; ++i) { if (denoiser->running_avg_y[i + shift].buffer_alloc != NULL) copy_frame(&denoiser->running_avg_y[i + shift], &src); } denoiser->reset = 0; return; } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config) { int i; for (i = 0; i < REF_FRAMES; i++) { if (svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) copy_frame(&denoiser->running_avg_y[i + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } } else { // If more than one refresh occurs, must copy frame buffer. if ((refresh_alt_ref_frame + refresh_golden_frame + refresh_last_frame) > 1) { if (refresh_alt_ref_frame) { copy_frame(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_golden_frame) { copy_frame(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_last_frame) { copy_frame(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } } else { if (refresh_alt_ref_frame) { swap_frame_buffer(&denoiser->running_avg_y[alt_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_golden_frame) { swap_frame_buffer(&denoiser->running_avg_y[gld_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } if (refresh_last_frame) { swap_frame_buffer(&denoiser->running_avg_y[lst_fb_idx + 1 + shift], &denoiser->running_avg_y[INTRA_FRAME + shift]); } } } } void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx) { ctx->zeromv_sse = UINT_MAX; ctx->newmv_sse = UINT_MAX; ctx->zeromv_lastref_sse = UINT_MAX; ctx->best_sse_mv.as_int = 0; } void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PREDICTION_MODE mode, PICK_MODE_CONTEXT *ctx) { if (mi->mv[0].as_int == 0 && sse < ctx->zeromv_sse) { ctx->zeromv_sse = sse; ctx->best_zeromv_reference_frame = mi->ref_frame[0]; if (mi->ref_frame[0] == LAST_FRAME) ctx->zeromv_lastref_sse = sse; } if (mi->mv[0].as_int != 0 && sse < ctx->newmv_sse) { ctx->newmv_sse = sse; ctx->best_sse_inter_mode = mode; ctx->best_sse_mv = mi->mv[0]; ctx->best_reference_frame = mi->ref_frame[0]; } } static int vp9_denoiser_realloc_svc_helper(VP9_COMMON *cm, VP9_DENOISER *denoiser, int fb_idx) { int fail = 0; if (denoiser->running_avg_y[fb_idx].buffer_alloc == NULL) { fail = vpx_alloc_frame_buffer(&denoiser->running_avg_y[fb_idx], cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, 0); if (fail) { vp9_denoiser_free(denoiser); return 1; } } return 0; } int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, struct SVC *svc, int svc_buf_shift, int refresh_alt, int refresh_gld, int refresh_lst, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx) { int fail = 0; if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config) { int i; for (i = 0; i < REF_FRAMES; i++) { if (cm->frame_type == KEY_FRAME || svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) { fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, i + 1 + svc_buf_shift); } } } else { if (refresh_alt) { // Increase the frame buffer index by 1 to map it to the buffer index in // the denoiser. fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, alt_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } if (refresh_gld) { fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, gld_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } if (refresh_lst) { fail = vp9_denoiser_realloc_svc_helper(cm, denoiser, lst_fb_idx + 1 + svc_buf_shift); if (fail) return 1; } } return 0; } int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, int use_svc, int noise_sen, int width, int height, int ssx, int ssy, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border) { int i, layer, fail, init_num_ref_frames; const int legacy_byte_alignment = 0; int num_layers = 1; int scaled_width = width; int scaled_height = height; if (use_svc) { LAYER_CONTEXT *lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + svc->temporal_layer_id]; get_layer_resolution(width, height, lc->scaling_factor_num, lc->scaling_factor_den, &scaled_width, &scaled_height); // For SVC: only denoise at most 2 spatial (highest) layers. if (noise_sen >= 2) // Denoise from one spatial layer below the top. svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 2, 0); else // Only denoise the top spatial layer. svc->first_layer_denoise = VPXMAX(svc->number_spatial_layers - 1, 0); num_layers = svc->number_spatial_layers - svc->first_layer_denoise; } assert(denoiser != NULL); denoiser->num_ref_frames = use_svc ? SVC_REF_FRAMES : NONSVC_REF_FRAMES; init_num_ref_frames = use_svc ? MAX_REF_FRAMES : NONSVC_REF_FRAMES; denoiser->num_layers = num_layers; CHECK_MEM_ERROR(cm, denoiser->running_avg_y, vpx_calloc(denoiser->num_ref_frames * num_layers, sizeof(denoiser->running_avg_y[0]))); CHECK_MEM_ERROR( cm, denoiser->mc_running_avg_y, vpx_calloc(num_layers, sizeof(denoiser->mc_running_avg_y[0]))); for (layer = 0; layer < num_layers; ++layer) { const int denoise_width = (layer == 0) ? width : scaled_width; const int denoise_height = (layer == 0) ? height : scaled_height; for (i = 0; i < init_num_ref_frames; ++i) { fail = vpx_alloc_frame_buffer( &denoiser->running_avg_y[i + denoiser->num_ref_frames * layer], denoise_width, denoise_height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif border, legacy_byte_alignment); if (fail) { vp9_denoiser_free(denoiser); return 1; } #ifdef OUTPUT_YUV_DENOISED make_grayscale(&denoiser->running_avg_y[i]); #endif } fail = vpx_alloc_frame_buffer(&denoiser->mc_running_avg_y[layer], denoise_width, denoise_height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif border, legacy_byte_alignment); if (fail) { vp9_denoiser_free(denoiser); return 1; } } // denoiser->last_source only used for noise_estimation, so only for top // layer. fail = vpx_alloc_frame_buffer(&denoiser->last_source, width, height, ssx, ssy, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif border, legacy_byte_alignment); if (fail) { vp9_denoiser_free(denoiser); return 1; } #ifdef OUTPUT_YUV_DENOISED make_grayscale(&denoiser->running_avg_y[i]); #endif denoiser->frame_buffer_initialized = 1; denoiser->denoising_level = kDenMedium; denoiser->prev_denoising_level = kDenMedium; denoiser->reset = 0; denoiser->current_denoiser_frame = 0; return 0; } void vp9_denoiser_free(VP9_DENOISER *denoiser) { int i; if (denoiser == NULL) { return; } denoiser->frame_buffer_initialized = 0; for (i = 0; i < denoiser->num_ref_frames * denoiser->num_layers; ++i) { vpx_free_frame_buffer(&denoiser->running_avg_y[i]); } vpx_free(denoiser->running_avg_y); denoiser->running_avg_y = NULL; for (i = 0; i < denoiser->num_layers; ++i) { vpx_free_frame_buffer(&denoiser->mc_running_avg_y[i]); } vpx_free(denoiser->mc_running_avg_y); denoiser->mc_running_avg_y = NULL; vpx_free_frame_buffer(&denoiser->last_source); } static void force_refresh_longterm_ref(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; // If long term reference is used, force refresh of that slot, so // denoiser buffer for long term reference stays in sync. if (svc->use_gf_temporal_ref_current_layer) { int index = svc->spatial_layer_id; if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; assert(index >= 0); cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; cpi->refresh_alt_ref_frame = 1; } } void vp9_denoiser_set_noise_level(VP9_COMP *const cpi, int noise_level) { VP9_DENOISER *const denoiser = &cpi->denoiser; denoiser->denoising_level = noise_level; if (denoiser->denoising_level > kDenLowLow && denoiser->prev_denoising_level == kDenLowLow) { denoiser->reset = 1; force_refresh_longterm_ref(cpi); } else { denoiser->reset = 0; } denoiser->prev_denoising_level = denoiser->denoising_level; } // Scale/increase the partition threshold // for denoiser speed-up. int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int content_state, int temporal_layer_id) { if ((content_state == kLowSadLowSumdiff) || (content_state == kHighSadLowSumdiff) || (content_state == kLowVarHighSumdiff) || (noise_level == kDenHigh) || (temporal_layer_id != 0)) { int64_t scaled_thr = (temporal_layer_id < 2) ? (3 * threshold) >> 1 : (7 * threshold) >> 2; return scaled_thr; } else { return (5 * threshold) >> 2; } } // Scale/increase the ac skip threshold for // denoiser speed-up. int64_t vp9_scale_acskip_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int abs_sumdiff, int temporal_layer_id) { if (noise_level >= kDenLow && abs_sumdiff < 5) return threshold *= (noise_level == kDenLow) ? 2 : (temporal_layer_id == 2) ? 10 : 6; else return threshold; } void vp9_denoiser_reset_on_first_frame(VP9_COMP *const cpi) { if (vp9_denoise_svc_non_key(cpi) && cpi->denoiser.current_denoiser_frame == 0) { cpi->denoiser.reset = 1; force_refresh_longterm_ref(cpi); } } void vp9_denoiser_update_ref_frame(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->denoiser.denoising_level > kDenLowLow) { int svc_refresh_denoiser_buffers = 0; int denoise_svc_second_layer = 0; FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; cpi->denoiser.current_denoiser_frame++; if (cpi->use_svc) { const int svc_buf_shift = svc->number_spatial_layers - svc->spatial_layer_id == 2 ? cpi->denoiser.num_ref_frames : 0; int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; svc_refresh_denoiser_buffers = lc->is_key_frame || svc->spatial_layer_sync[svc->spatial_layer_id]; denoise_svc_second_layer = svc->number_spatial_layers - svc->spatial_layer_id == 2 ? 1 : 0; // Check if we need to allocate extra buffers in the denoiser // for refreshed frames. if (vp9_denoiser_realloc_svc(cm, &cpi->denoiser, svc, svc_buf_shift, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, cpi->lst_fb_idx)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to re-allocate denoiser for SVC"); } vp9_denoiser_update_frame_info( &cpi->denoiser, *cpi->Source, svc, frame_type, cpi->refresh_alt_ref_frame, cpi->refresh_golden_frame, cpi->refresh_last_frame, cpi->alt_fb_idx, cpi->gld_fb_idx, cpi->lst_fb_idx, cpi->resize_pending, svc_refresh_denoiser_buffers, denoise_svc_second_layer); } } #ifdef OUTPUT_YUV_DENOISED static void make_grayscale(YV12_BUFFER_CONFIG *yuv) { int r, c; uint8_t *u = yuv->u_buffer; uint8_t *v = yuv->v_buffer; for (r = 0; r < yuv->uv_height; ++r) { for (c = 0; c < yuv->uv_width; ++c) { u[c] = UINT8_MAX / 2; v[c] = UINT8_MAX / 2; } u += yuv->uv_stride; v += yuv->uv_stride; } } #endif libvpx-1.8.2/vp9/encoder/vp9_denoiser.h000066400000000000000000000106401357355204000177470ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_DENOISER_H_ #define VPX_VP9_ENCODER_VP9_DENOISER_H_ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_skin_detection.h" #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif #define MOTION_MAGNITUDE_THRESHOLD (8 * 3) // Denoiser is used in non svc real-time mode which does not use alt-ref, so no // need to allocate for it, and hence we need MAX_REF_FRAME - 1 #define NONSVC_REF_FRAMES MAX_REF_FRAMES - 1 // Number of frame buffers when SVC is used. [0] for current denoised buffer and // [1..8] for REF_FRAMES #define SVC_REF_FRAMES 9 typedef enum vp9_denoiser_decision { COPY_BLOCK, FILTER_BLOCK, FILTER_ZEROMV_BLOCK } VP9_DENOISER_DECISION; typedef enum vp9_denoiser_level { kDenLowLow, kDenLow, kDenMedium, kDenHigh } VP9_DENOISER_LEVEL; typedef struct vp9_denoiser { YV12_BUFFER_CONFIG *running_avg_y; YV12_BUFFER_CONFIG *mc_running_avg_y; YV12_BUFFER_CONFIG last_source; int frame_buffer_initialized; int reset; int num_ref_frames; int num_layers; unsigned int current_denoiser_frame; VP9_DENOISER_LEVEL denoising_level; VP9_DENOISER_LEVEL prev_denoising_level; } VP9_DENOISER; typedef struct { int64_t zero_last_cost_orig; int *ref_frame_cost; int_mv (*frame_mv)[MAX_REF_FRAMES]; int reuse_inter_pred; TX_SIZE best_tx_size; PREDICTION_MODE best_mode; MV_REFERENCE_FRAME best_ref_frame; INTERP_FILTER best_pred_filter; uint8_t best_mode_skip_txfm; } VP9_PICKMODE_CTX_DEN; struct VP9_COMP; struct SVC; void vp9_denoiser_update_frame_info( VP9_DENOISER *denoiser, YV12_BUFFER_CONFIG src, struct SVC *svc, FRAME_TYPE frame_type, int refresh_alt_ref_frame, int refresh_golden_frame, int refresh_last_frame, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx, int resized, int svc_refresh_denoiser_buffers, int second_spatial_layer); void vp9_denoiser_denoise(struct VP9_COMP *cpi, MACROBLOCK *mb, int mi_row, int mi_col, BLOCK_SIZE bs, PICK_MODE_CONTEXT *ctx, VP9_DENOISER_DECISION *denoiser_decision, int use_gf_temporal_ref); void vp9_denoiser_reset_frame_stats(PICK_MODE_CONTEXT *ctx); void vp9_denoiser_update_frame_stats(MODE_INFO *mi, unsigned int sse, PREDICTION_MODE mode, PICK_MODE_CONTEXT *ctx); int vp9_denoiser_realloc_svc(VP9_COMMON *cm, VP9_DENOISER *denoiser, struct SVC *svc, int svc_buf_shift, int refresh_alt, int refresh_gld, int refresh_lst, int alt_fb_idx, int gld_fb_idx, int lst_fb_idx); int vp9_denoiser_alloc(VP9_COMMON *cm, struct SVC *svc, VP9_DENOISER *denoiser, int use_svc, int noise_sen, int width, int height, int ssx, int ssy, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border); #if CONFIG_VP9_TEMPORAL_DENOISING // This function is used by both c and sse2 denoiser implementations. // Define it as a static function within the scope where vp9_denoiser.h // is referenced. static INLINE int total_adj_strong_thresh(BLOCK_SIZE bs, int increase_denoising) { return (1 << num_pels_log2_lookup[bs]) * (increase_denoising ? 3 : 2); } #endif void vp9_denoiser_free(VP9_DENOISER *denoiser); void vp9_denoiser_set_noise_level(struct VP9_COMP *const cpi, int noise_level); void vp9_denoiser_reset_on_first_frame(struct VP9_COMP *const cpi); int64_t vp9_scale_part_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int content_state, int temporal_layer_id); int64_t vp9_scale_acskip_thresh(int64_t threshold, VP9_DENOISER_LEVEL noise_level, int abs_sumdiff, int temporal_layer_id); void vp9_denoiser_update_ref_frame(struct VP9_COMP *const cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_DENOISER_H_ libvpx-1.8.2/vp9/encoder/vp9_encodeframe.c000066400000000000000000007445051357355204000204200ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_ports/system_state.h" #if CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/common/vp9_tile_common.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" #endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_variance.h" #endif #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_partition_models.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_rdopt.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_tokenize.h" static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); // This is used as a reference when computing the source variance for the // purpose of activity masking. // Eventually this should be replaced by custom no-reference routines, // which will be faster. static const uint8_t VP9_VAR_OFFS[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; #if CONFIG_VP9_HIGHBITDEPTH static const uint16_t VP9_HIGH_VAR_OFFS_8[64] = { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }; static const uint16_t VP9_HIGH_VAR_OFFS_10[64] = { 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4, 128 * 4 }; static const uint16_t VP9_HIGH_VAR_OFFS_12[64] = { 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16, 128 * 16 }; #endif // CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { unsigned int sse; const unsigned int var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, VP9_VAR_OFFS, 0, &sse); return var; } #if CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_high_get_sby_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) { unsigned int var, sse; switch (bd) { case 10: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10), 0, &sse); break; case 12: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12), 0, &sse); break; case 8: default: var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8), 0, &sse); break; } return var; } #endif // CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs) { return ROUND_POWER_OF_TWO(vp9_get_sby_variance(cpi, ref, bs), num_pels_log2_lookup[bs]); } #if CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_high_get_sby_perpixel_variance(VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd) { return (unsigned int)ROUND64_POWER_OF_TWO( (int64_t)vp9_high_get_sby_variance(cpi, ref, bs, bd), num_pels_log2_lookup[bs]); } #endif // CONFIG_VP9_HIGHBITDEPTH #if !CONFIG_REALTIME_ONLY static unsigned int get_sby_perpixel_diff_variance(VP9_COMP *cpi, const struct buf_2d *ref, int mi_row, int mi_col, BLOCK_SIZE bs) { unsigned int sse, var; uint8_t *last_y; const YV12_BUFFER_CONFIG *last = get_ref_frame_buffer(cpi, LAST_FRAME); assert(last != NULL); last_y = &last->y_buffer[mi_row * MI_SIZE * last->y_stride + mi_col * MI_SIZE]; var = cpi->fn_ptr[bs].vf(ref->buf, ref->stride, last_y, last->y_stride, &sse); return ROUND_POWER_OF_TWO(var, num_pels_log2_lookup[bs]); } static BLOCK_SIZE get_rd_var_based_fixed_partition(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col) { unsigned int var = get_sby_perpixel_diff_variance( cpi, &x->plane[0].src, mi_row, mi_col, BLOCK_64X64); if (var < 8) return BLOCK_64X64; else if (var < 128) return BLOCK_32X32; else if (var < 2048) return BLOCK_16X16; else return BLOCK_8X8; } #endif // !CONFIG_REALTIME_ONLY static void set_segment_index(VP9_COMP *cpi, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize, int segment_index) { VP9_COMMON *const cm = &cpi->common; const struct segmentation *const seg = &cm->seg; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; const AQ_MODE aq_mode = cpi->oxcf.aq_mode; const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; // Initialize the segmentation index as 0. mi->segment_id = 0; // Skip the rest if AQ mode is disabled. if (!seg->enabled) return; switch (aq_mode) { case CYCLIC_REFRESH_AQ: mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); break; #if !CONFIG_REALTIME_ONLY case VARIANCE_AQ: if (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || cpi->force_update_segmentation || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref)) { int min_energy; int max_energy; // Get sub block energy range if (bsize >= BLOCK_32X32) { vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, &max_energy); } else { min_energy = bsize <= BLOCK_16X16 ? x->mb_energy : vp9_block_energy(cpi, x, bsize); } mi->segment_id = vp9_vaq_segment_id(min_energy); } else { mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } break; case EQUATOR360_AQ: if (cm->frame_type == KEY_FRAME || cpi->force_update_segmentation) mi->segment_id = vp9_360aq_segment_id(mi_row, cm->mi_rows); else mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); break; #endif case LOOKAHEAD_AQ: mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); break; case PSNR_AQ: mi->segment_id = segment_index; break; case PERCEPTUAL_AQ: mi->segment_id = x->segment_id; break; default: // NO_AQ or PSNR_AQ break; } // Set segment index from ROI map if it's enabled. if (cpi->roi.enabled) mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); vp9_init_plane_quantizers(cpi, x); } // Lighter version of set_offsets that only sets the mode info // pointers. static INLINE void set_mode_info_offsets(VP9_COMMON *const cm, MACROBLOCK *const x, MACROBLOCKD *const xd, int mi_row, int mi_col) { const int idx_str = xd->mi_stride * mi_row + mi_col; xd->mi = cm->mi_grid_visible + idx_str; xd->mi[0] = cm->mi + idx_str; x->mbmi_ext = x->mbmi_ext_base + (mi_row * cm->mi_cols + mi_col); } static void set_ssim_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, const BLOCK_SIZE bsize, const int mi_row, const int mi_col, int *const rdmult) { const VP9_COMMON *const cm = &cpi->common; const int bsize_base = BLOCK_16X16; const int num_8x8_w = num_8x8_blocks_wide_lookup[bsize_base]; const int num_8x8_h = num_8x8_blocks_high_lookup[bsize_base]; const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; const int num_bcols = (num_8x8_blocks_wide_lookup[bsize] + num_8x8_w - 1) / num_8x8_w; const int num_brows = (num_8x8_blocks_high_lookup[bsize] + num_8x8_h - 1) / num_8x8_h; int row, col; double num_of_mi = 0.0; double geom_mean_of_scale = 0.0; assert(cpi->oxcf.tuning == VP8_TUNE_SSIM); for (row = mi_row / num_8x8_w; row < num_rows && row < mi_row / num_8x8_w + num_brows; ++row) { for (col = mi_col / num_8x8_h; col < num_cols && col < mi_col / num_8x8_h + num_bcols; ++col) { const int index = row * num_cols + col; geom_mean_of_scale += log(cpi->mi_ssim_rdmult_scaling_factors[index]); num_of_mi += 1.0; } } geom_mean_of_scale = exp(geom_mean_of_scale / num_of_mi); *rdmult = (int)((double)(*rdmult) * geom_mean_of_scale); *rdmult = VPXMAX(*rdmult, 0); set_error_per_bit(x, *rdmult); vpx_clear_system_state(); } static void set_offsets(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; MACROBLOCKD *const xd = &x->e_mbd; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; MvLimits *const mv_limits = &x->mv_limits; set_skip_context(xd, mi_row, mi_col); set_mode_info_offsets(cm, x, xd, mi_row, mi_col); // Set up destination pointers. vp9_setup_dst_planes(xd->plane, get_frame_new_buffer(cm), mi_row, mi_col); // Set up limit values for MV components. // Mv beyond the range do not produce new/different prediction block. mv_limits->row_min = -(((mi_row + mi_height) * MI_SIZE) + VP9_INTERP_EXTEND); mv_limits->col_min = -(((mi_col + mi_width) * MI_SIZE) + VP9_INTERP_EXTEND); mv_limits->row_max = (cm->mi_rows - mi_row) * MI_SIZE + VP9_INTERP_EXTEND; mv_limits->col_max = (cm->mi_cols - mi_col) * MI_SIZE + VP9_INTERP_EXTEND; // Set up distance of MB to edge of frame in 1/8th pel units. assert(!(mi_col & (mi_width - 1)) && !(mi_row & (mi_height - 1))); set_mi_row_col(xd, tile, mi_row, mi_height, mi_col, mi_width, cm->mi_rows, cm->mi_cols); // Set up source buffers. vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); // R/D setup. x->rddiv = cpi->rd.RDDIV; x->rdmult = cpi->rd.RDMULT; if (oxcf->tuning == VP8_TUNE_SSIM) { set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); } // required by vp9_append_sub8x8_mvs_for_idx() and vp9_find_best_ref_mvs() xd->tile = *tile; } static void duplicate_mode_info_in_sb(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int block_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], cm->mi_cols - mi_col); const int block_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], cm->mi_rows - mi_row); const int mi_stride = xd->mi_stride; MODE_INFO *const src_mi = xd->mi[0]; int i, j; for (j = 0; j < block_height; ++j) for (i = 0; i < block_width; ++i) xd->mi[j * mi_stride + i] = src_mi; } static void set_block_size(VP9_COMP *const cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE bsize) { if (cpi->common.mi_cols > mi_col && cpi->common.mi_rows > mi_row) { set_mode_info_offsets(&cpi->common, x, xd, mi_row, mi_col); xd->mi[0]->sb_type = bsize; } } typedef struct { // This struct is used for computing variance in choose_partitioning(), where // the max number of samples within a superblock is 16x16 (with 4x4 avg). Even // in high bitdepth, uint32_t is enough for sum_square_error (2^12 * 2^12 * 16 // * 16 = 2^32). uint32_t sum_square_error; int32_t sum_error; int log2_count; int variance; } var; typedef struct { var none; var horz[2]; var vert[2]; } partition_variance; typedef struct { partition_variance part_variances; var split[4]; } v4x4; typedef struct { partition_variance part_variances; v4x4 split[4]; } v8x8; typedef struct { partition_variance part_variances; v8x8 split[4]; } v16x16; typedef struct { partition_variance part_variances; v16x16 split[4]; } v32x32; typedef struct { partition_variance part_variances; v32x32 split[4]; } v64x64; typedef struct { partition_variance *part_variances; var *split[4]; } variance_node; typedef enum { V16X16, V32X32, V64X64, } TREE_LEVEL; static void tree_to_node(void *data, BLOCK_SIZE bsize, variance_node *node) { int i; node->part_variances = NULL; switch (bsize) { case BLOCK_64X64: { v64x64 *vt = (v64x64 *)data; node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].part_variances.none; break; } case BLOCK_32X32: { v32x32 *vt = (v32x32 *)data; node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].part_variances.none; break; } case BLOCK_16X16: { v16x16 *vt = (v16x16 *)data; node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].part_variances.none; break; } case BLOCK_8X8: { v8x8 *vt = (v8x8 *)data; node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i].part_variances.none; break; } default: { v4x4 *vt = (v4x4 *)data; assert(bsize == BLOCK_4X4); node->part_variances = &vt->part_variances; for (i = 0; i < 4; i++) node->split[i] = &vt->split[i]; break; } } } // Set variance values given sum square error, sum error, count. static void fill_variance(uint32_t s2, int32_t s, int c, var *v) { v->sum_square_error = s2; v->sum_error = s; v->log2_count = c; } static void get_variance(var *v) { v->variance = (int)(256 * (v->sum_square_error - (uint32_t)(((int64_t)v->sum_error * v->sum_error) >> v->log2_count)) >> v->log2_count); } static void sum_2_variances(const var *a, const var *b, var *r) { assert(a->log2_count == b->log2_count); fill_variance(a->sum_square_error + b->sum_square_error, a->sum_error + b->sum_error, a->log2_count + 1, r); } static void fill_variance_tree(void *data, BLOCK_SIZE bsize) { variance_node node; memset(&node, 0, sizeof(node)); tree_to_node(data, bsize, &node); sum_2_variances(node.split[0], node.split[1], &node.part_variances->horz[0]); sum_2_variances(node.split[2], node.split[3], &node.part_variances->horz[1]); sum_2_variances(node.split[0], node.split[2], &node.part_variances->vert[0]); sum_2_variances(node.split[1], node.split[3], &node.part_variances->vert[1]); sum_2_variances(&node.part_variances->vert[0], &node.part_variances->vert[1], &node.part_variances->none); } static int set_vt_partitioning(VP9_COMP *cpi, MACROBLOCK *const x, MACROBLOCKD *const xd, void *data, BLOCK_SIZE bsize, int mi_row, int mi_col, int64_t threshold, BLOCK_SIZE bsize_min, int force_split) { VP9_COMMON *const cm = &cpi->common; variance_node vt; const int block_width = num_8x8_blocks_wide_lookup[bsize]; const int block_height = num_8x8_blocks_high_lookup[bsize]; assert(block_height == block_width); tree_to_node(data, bsize, &vt); if (force_split == 1) return 0; // For bsize=bsize_min (16x16/8x8 for 8x8/4x4 downsampling), select if // variance is below threshold, otherwise split will be selected. // No check for vert/horiz split as too few samples for variance. if (bsize == bsize_min) { // Variance already computed to set the force_split. if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { set_block_size(cpi, x, xd, mi_row, mi_col, bsize); return 1; } return 0; } else if (bsize > bsize_min) { // Variance already computed to set the force_split. if (frame_is_intra_only(cm)) get_variance(&vt.part_variances->none); // For key frame: take split for bsize above 32X32 or very high variance. if (frame_is_intra_only(cm) && (bsize > BLOCK_32X32 || vt.part_variances->none.variance > (threshold << 4))) { return 0; } // If variance is low, take the bsize (no split). if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows && vt.part_variances->none.variance < threshold) { set_block_size(cpi, x, xd, mi_row, mi_col, bsize); return 1; } // Check vertical split. if (mi_row + block_height / 2 < cm->mi_rows) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_VERT); get_variance(&vt.part_variances->vert[0]); get_variance(&vt.part_variances->vert[1]); if (vt.part_variances->vert[0].variance < threshold && vt.part_variances->vert[1].variance < threshold && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row, mi_col + block_width / 2, subsize); return 1; } } // Check horizontal split. if (mi_col + block_width / 2 < cm->mi_cols) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_HORZ); get_variance(&vt.part_variances->horz[0]); get_variance(&vt.part_variances->horz[1]); if (vt.part_variances->horz[0].variance < threshold && vt.part_variances->horz[1].variance < threshold && get_plane_block_size(subsize, &xd->plane[1]) < BLOCK_INVALID) { set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row + block_height / 2, mi_col, subsize); return 1; } } return 0; } return 0; } static int64_t scale_part_thresh_sumdiff(int64_t threshold_base, int speed, int width, int height, int content_state) { if (speed >= 8) { if (width <= 640 && height <= 480) return (5 * threshold_base) >> 2; else if ((content_state == kLowSadLowSumdiff) || (content_state == kHighSadLowSumdiff) || (content_state == kLowVarHighSumdiff)) return (5 * threshold_base) >> 2; } else if (speed == 7) { if ((content_state == kLowSadLowSumdiff) || (content_state == kHighSadLowSumdiff) || (content_state == kLowVarHighSumdiff)) { return (5 * threshold_base) >> 2; } } return threshold_base; } // Set the variance split thresholds for following the block sizes: // 0 - threshold_64x64, 1 - threshold_32x32, 2 - threshold_16x16, // 3 - vbp_threshold_8x8. vbp_threshold_8x8 (to split to 4x4 partition) is // currently only used on key frame. static void set_vbp_thresholds(VP9_COMP *cpi, int64_t thresholds[], int q, int content_state) { VP9_COMMON *const cm = &cpi->common; const int is_key_frame = frame_is_intra_only(cm); const int threshold_multiplier = is_key_frame ? 20 : cpi->sf.variance_part_thresh_mult; int64_t threshold_base = (int64_t)(threshold_multiplier * cpi->y_dequant[q][1]); if (is_key_frame) { thresholds[0] = threshold_base; thresholds[1] = threshold_base >> 2; thresholds[2] = threshold_base >> 2; thresholds[3] = threshold_base << 2; } else { // Increase base variance threshold based on estimated noise level. if (cpi->noise_estimate.enabled && cm->width >= 640 && cm->height >= 480) { NOISE_LEVEL noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); if (noise_level == kHigh) threshold_base = 3 * threshold_base; else if (noise_level == kMedium) threshold_base = threshold_base << 1; else if (noise_level < kLow) threshold_base = (7 * threshold_base) >> 3; } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->oxcf.speed > 5 && cpi->denoiser.denoising_level >= kDenLow) threshold_base = vp9_scale_part_thresh(threshold_base, cpi->denoiser.denoising_level, content_state, cpi->svc.temporal_layer_id); else threshold_base = scale_part_thresh_sumdiff(threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); #else // Increase base variance threshold based on content_state/sum_diff level. threshold_base = scale_part_thresh_sumdiff( threshold_base, cpi->oxcf.speed, cm->width, cm->height, content_state); #endif thresholds[0] = threshold_base; thresholds[2] = threshold_base << cpi->oxcf.speed; if (cm->width >= 1280 && cm->height >= 720 && cpi->oxcf.speed < 7) thresholds[2] = thresholds[2] << 1; if (cm->width <= 352 && cm->height <= 288) { thresholds[0] = threshold_base >> 3; thresholds[1] = threshold_base >> 1; thresholds[2] = threshold_base << 3; } else if (cm->width < 1280 && cm->height < 720) { thresholds[1] = (5 * threshold_base) >> 2; } else if (cm->width < 1920 && cm->height < 1080) { thresholds[1] = threshold_base << 1; } else { thresholds[1] = (5 * threshold_base) >> 1; } if (cpi->sf.disable_16x16part_nonkey) thresholds[2] = INT64_MAX; } } void vp9_set_variance_partition_thresholds(VP9_COMP *cpi, int q, int content_state) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; const int is_key_frame = frame_is_intra_only(cm); if (sf->partition_search_type != VAR_BASED_PARTITION && sf->partition_search_type != REFERENCE_PARTITION) { return; } else { set_vbp_thresholds(cpi, cpi->vbp_thresholds, q, content_state); // The thresholds below are not changed locally. if (is_key_frame) { cpi->vbp_threshold_sad = 0; cpi->vbp_threshold_copy = 0; cpi->vbp_bsize_min = BLOCK_8X8; } else { if (cm->width <= 352 && cm->height <= 288) cpi->vbp_threshold_sad = 10; else cpi->vbp_threshold_sad = (cpi->y_dequant[q][1] << 1) > 1000 ? (cpi->y_dequant[q][1] << 1) : 1000; cpi->vbp_bsize_min = BLOCK_16X16; if (cm->width <= 352 && cm->height <= 288) cpi->vbp_threshold_copy = 4000; else if (cm->width <= 640 && cm->height <= 360) cpi->vbp_threshold_copy = 8000; else cpi->vbp_threshold_copy = (cpi->y_dequant[q][1] << 3) > 8000 ? (cpi->y_dequant[q][1] << 3) : 8000; if (cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe)) { cpi->vbp_threshold_sad = 0; cpi->vbp_threshold_copy = 0; } } cpi->vbp_threshold_minmax = 15 + (q >> 3); } } // Compute the minmax over the 8x8 subblocks. static int compute_minmax_8x8(const uint8_t *s, int sp, const uint8_t *d, int dp, int x16_idx, int y16_idx, #if CONFIG_VP9_HIGHBITDEPTH int highbd_flag, #endif int pixels_wide, int pixels_high) { int k; int minmax_max = 0; int minmax_min = 255; // Loop over the 4 8x8 subblocks. for (k = 0; k < 4; k++) { int x8_idx = x16_idx + ((k & 1) << 3); int y8_idx = y16_idx + ((k >> 1) << 3); int min = 0; int max = 0; if (x8_idx < pixels_wide && y8_idx < pixels_high) { #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); } else { vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); } #else vpx_minmax_8x8(s + y8_idx * sp + x8_idx, sp, d + y8_idx * dp + x8_idx, dp, &min, &max); #endif if ((max - min) > minmax_max) minmax_max = (max - min); if ((max - min) < minmax_min) minmax_min = (max - min); } } return (minmax_max - minmax_min); } static void fill_variance_4x4avg(const uint8_t *s, int sp, const uint8_t *d, int dp, int x8_idx, int y8_idx, v8x8 *vst, #if CONFIG_VP9_HIGHBITDEPTH int highbd_flag, #endif int pixels_wide, int pixels_high, int is_key_frame) { int k; for (k = 0; k < 4; k++) { int x4_idx = x8_idx + ((k & 1) << 2); int y4_idx = y8_idx + ((k >> 1) << 2); unsigned int sse = 0; int sum = 0; if (x4_idx < pixels_wide && y4_idx < pixels_high) { int s_avg; int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { s_avg = vpx_highbd_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) d_avg = vpx_highbd_avg_4x4(d + y4_idx * dp + x4_idx, dp); } else { s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); } #else s_avg = vpx_avg_4x4(s + y4_idx * sp + x4_idx, sp); if (!is_key_frame) d_avg = vpx_avg_4x4(d + y4_idx * dp + x4_idx, dp); #endif sum = s_avg - d_avg; sse = sum * sum; } fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); } } static void fill_variance_8x8avg(const uint8_t *s, int sp, const uint8_t *d, int dp, int x16_idx, int y16_idx, v16x16 *vst, #if CONFIG_VP9_HIGHBITDEPTH int highbd_flag, #endif int pixels_wide, int pixels_high, int is_key_frame) { int k; for (k = 0; k < 4; k++) { int x8_idx = x16_idx + ((k & 1) << 3); int y8_idx = y16_idx + ((k >> 1) << 3); unsigned int sse = 0; int sum = 0; if (x8_idx < pixels_wide && y8_idx < pixels_high) { int s_avg; int d_avg = 128; #if CONFIG_VP9_HIGHBITDEPTH if (highbd_flag & YV12_FLAG_HIGHBITDEPTH) { s_avg = vpx_highbd_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) d_avg = vpx_highbd_avg_8x8(d + y8_idx * dp + x8_idx, dp); } else { s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); } #else s_avg = vpx_avg_8x8(s + y8_idx * sp + x8_idx, sp); if (!is_key_frame) d_avg = vpx_avg_8x8(d + y8_idx * dp + x8_idx, dp); #endif sum = s_avg - d_avg; sse = sum * sum; } fill_variance(sse, sum, 0, &vst->split[k].part_variances.none); } } // Check if most of the superblock is skin content, and if so, force split to // 32x32, and set x->sb_is_skin for use in mode selection. static int skin_sb_split(VP9_COMP *cpi, MACROBLOCK *x, const int low_res, int mi_row, int mi_col, int *force_split) { VP9_COMMON *const cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) return 0; #endif // Avoid checking superblocks on/near boundary and avoid low resolutions. // Note superblock may still pick 64X64 if y_sad is very small // (i.e., y_sad < cpi->vbp_threshold_sad) below. For now leave this as is. if (!low_res && (mi_col >= 8 && mi_col + 8 < cm->mi_cols && mi_row >= 8 && mi_row + 8 < cm->mi_rows)) { int num_16x16_skin = 0; int num_16x16_nonskin = 0; uint8_t *ysignal = x->plane[0].src.buf; uint8_t *usignal = x->plane[1].src.buf; uint8_t *vsignal = x->plane[2].src.buf; int sp = x->plane[0].src.stride; int spuv = x->plane[1].src.stride; const int block_index = mi_row * cm->mi_cols + mi_col; const int bw = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int bh = num_8x8_blocks_high_lookup[BLOCK_64X64]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); // Loop through the 16x16 sub-blocks. int i, j; for (i = 0; i < ymis; i += 2) { for (j = 0; j < xmis; j += 2) { int bl_index = block_index + i * cm->mi_cols + j; int is_skin = cpi->skin_map[bl_index]; num_16x16_skin += is_skin; num_16x16_nonskin += (1 - is_skin); if (num_16x16_nonskin > 3) { // Exit loop if at least 4 of the 16x16 blocks are not skin. i = ymis; break; } ysignal += 16; usignal += 8; vsignal += 8; } ysignal += (sp << 4) - 64; usignal += (spuv << 3) - 32; vsignal += (spuv << 3) - 32; } if (num_16x16_skin > 12) { *force_split = 1; return 1; } } return 0; } static void set_low_temp_var_flag(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, v64x64 *vt, int64_t thresholds[], MV_REFERENCE_FRAME ref_frame_partition, int mi_col, int mi_row) { int i, j; VP9_COMMON *const cm = &cpi->common; const int mv_thr = cm->width > 640 ? 8 : 4; // Check temporal variance for bsize >= 16x16, if LAST_FRAME was selected and // int_pro mv is small. If the temporal variance is small set the flag // variance_low for the block. The variance threshold can be adjusted, the // higher the more aggressive. if (ref_frame_partition == LAST_FRAME && (cpi->sf.short_circuit_low_temp_var == 1 || (xd->mi[0]->mv[0].as_mv.col < mv_thr && xd->mi[0]->mv[0].as_mv.col > -mv_thr && xd->mi[0]->mv[0].as_mv.row < mv_thr && xd->mi[0]->mv[0].as_mv.row > -mv_thr))) { if (xd->mi[0]->sb_type == BLOCK_64X64) { if ((vt->part_variances).none.variance < (thresholds[0] >> 1)) x->variance_low[0] = 1; } else if (xd->mi[0]->sb_type == BLOCK_64X32) { for (i = 0; i < 2; i++) { if (vt->part_variances.horz[i].variance < (thresholds[0] >> 2)) x->variance_low[i + 1] = 1; } } else if (xd->mi[0]->sb_type == BLOCK_32X64) { for (i = 0; i < 2; i++) { if (vt->part_variances.vert[i].variance < (thresholds[0] >> 2)) x->variance_low[i + 3] = 1; } } else { for (i = 0; i < 4; i++) { const int idx[4][2] = { { 0, 0 }, { 0, 4 }, { 4, 0 }, { 4, 4 } }; const int idx_str = cm->mi_stride * (mi_row + idx[i][0]) + mi_col + idx[i][1]; MODE_INFO **this_mi = cm->mi_grid_visible + idx_str; if (cm->mi_cols <= mi_col + idx[i][1] || cm->mi_rows <= mi_row + idx[i][0]) continue; if ((*this_mi)->sb_type == BLOCK_32X32) { int64_t threshold_32x32 = (cpi->sf.short_circuit_low_temp_var == 1 || cpi->sf.short_circuit_low_temp_var == 3) ? ((5 * thresholds[1]) >> 3) : (thresholds[1] >> 1); if (vt->split[i].part_variances.none.variance < threshold_32x32) x->variance_low[i + 5] = 1; } else if (cpi->sf.short_circuit_low_temp_var >= 2) { // For 32x16 and 16x32 blocks, the flag is set on each 16x16 block // inside. if ((*this_mi)->sb_type == BLOCK_16X16 || (*this_mi)->sb_type == BLOCK_32X16 || (*this_mi)->sb_type == BLOCK_16X32) { for (j = 0; j < 4; j++) { if (vt->split[i].split[j].part_variances.none.variance < (thresholds[2] >> 8)) x->variance_low[(i << 2) + j + 9] = 1; } } } } } } } static void copy_partitioning_helper(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; BLOCK_SIZE *prev_part = cpi->prev_partition; int start_pos = mi_row * cm->mi_stride + mi_col; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) >> 2; BLOCK_SIZE subsize; PARTITION_TYPE partition; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; partition = partition_lookup[bsl][prev_part[start_pos]]; subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { set_block_size(cpi, x, xd, mi_row, mi_col, bsize); } else { switch (partition) { case PARTITION_NONE: set_block_size(cpi, x, xd, mi_row, mi_col, bsize); break; case PARTITION_HORZ: set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row + bs, mi_col, subsize); break; case PARTITION_VERT: set_block_size(cpi, x, xd, mi_row, mi_col, subsize); set_block_size(cpi, x, xd, mi_row, mi_col + bs, subsize); break; default: assert(partition == PARTITION_SPLIT); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col); copy_partitioning_helper(cpi, x, xd, subsize, mi_row, mi_col + bs); copy_partitioning_helper(cpi, x, xd, subsize, mi_row + bs, mi_col + bs); break; } } } static int copy_partitioning(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, int mi_row, int mi_col, int segment_id, int sb_offset) { int svc_copy_allowed = 1; int frames_since_key_thresh = 1; if (cpi->use_svc) { // For SVC, don't allow copy if base spatial layer is key frame, or if // frame is not a temporal enhancement layer frame. int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; if (lc->is_key_frame || !cpi->svc.non_reference_frame) svc_copy_allowed = 0; frames_since_key_thresh = cpi->svc.number_spatial_layers << 1; } if (cpi->rc.frames_since_key > frames_since_key_thresh && svc_copy_allowed && !cpi->resize_pending && segment_id == CR_SEGMENT_ID_BASE && cpi->prev_segment_id[sb_offset] == CR_SEGMENT_ID_BASE && cpi->copied_frame_cnt[sb_offset] < cpi->max_copied_frame) { if (cpi->prev_partition != NULL) { copy_partitioning_helper(cpi, x, xd, BLOCK_64X64, mi_row, mi_col); cpi->copied_frame_cnt[sb_offset] += 1; memcpy(x->variance_low, &(cpi->prev_variance_low[sb_offset * 25]), sizeof(x->variance_low)); return 1; } } return 0; } static int scale_partitioning_svc(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, BLOCK_SIZE bsize, int mi_row, int mi_col, int mi_row_high, int mi_col_high) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; BLOCK_SIZE *prev_part = svc->prev_partition_svc; // Variables with _high are for higher resolution. int bsize_high = 0; int subsize_high = 0; const int bsl_high = b_width_log2_lookup[bsize]; const int bs_high = (1 << bsl_high) >> 2; const int has_rows = (mi_row_high + bs_high) < cm->mi_rows; const int has_cols = (mi_col_high + bs_high) < cm->mi_cols; const int row_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0 }; const int col_boundary_block_scale_factor[BLOCK_SIZES] = { 13, 13, 13, 2, 2, 0, 2, 2, 0, 2, 2, 0, 0 }; int start_pos; BLOCK_SIZE bsize_low; PARTITION_TYPE partition_high; if (mi_row_high >= cm->mi_rows || mi_col_high >= cm->mi_cols) return 0; if (mi_row >= svc->mi_rows[svc->spatial_layer_id - 1] || mi_col >= svc->mi_cols[svc->spatial_layer_id - 1]) return 0; // Find corresponding (mi_col/mi_row) block down-scaled by 2x2. start_pos = mi_row * (svc->mi_stride[svc->spatial_layer_id - 1]) + mi_col; bsize_low = prev_part[start_pos]; // The block size is too big for boundaries. Do variance based partitioning. if ((!has_rows || !has_cols) && bsize_low > BLOCK_16X16) return 1; // For reference frames: return 1 (do variance-based partitioning) if the // superblock is not low source sad and lower-resoln bsize is below 32x32. if (!cpi->svc.non_reference_frame && !x->skip_low_source_sad && bsize_low < BLOCK_32X32) return 1; // Scale up block size by 2x2. Force 64x64 for size larger than 32x32. if (bsize_low < BLOCK_32X32) { bsize_high = bsize_low + 3; } else if (bsize_low >= BLOCK_32X32) { bsize_high = BLOCK_64X64; } // Scale up blocks on boundary. if (!has_cols && has_rows) { bsize_high = bsize_low + row_boundary_block_scale_factor[bsize_low]; } else if (has_cols && !has_rows) { bsize_high = bsize_low + col_boundary_block_scale_factor[bsize_low]; } else if (!has_cols && !has_rows) { bsize_high = bsize_low; } partition_high = partition_lookup[bsl_high][bsize_high]; subsize_high = get_subsize(bsize, partition_high); if (subsize_high < BLOCK_8X8) { set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); } else { const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) >> 2; switch (partition_high) { case PARTITION_NONE: set_block_size(cpi, x, xd, mi_row_high, mi_col_high, bsize_high); break; case PARTITION_HORZ: set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); if (subsize_high < BLOCK_64X64) set_block_size(cpi, x, xd, mi_row_high + bs_high, mi_col_high, subsize_high); break; case PARTITION_VERT: set_block_size(cpi, x, xd, mi_row_high, mi_col_high, subsize_high); if (subsize_high < BLOCK_64X64) set_block_size(cpi, x, xd, mi_row_high, mi_col_high + bs_high, subsize_high); break; default: assert(partition_high == PARTITION_SPLIT); if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col, mi_row_high, mi_col_high)) return 1; if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), mi_col, mi_row_high + bs_high, mi_col_high)) return 1; if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row, mi_col + (bs >> 1), mi_row_high, mi_col_high + bs_high)) return 1; if (scale_partitioning_svc(cpi, x, xd, subsize_high, mi_row + (bs >> 1), mi_col + (bs >> 1), mi_row_high + bs_high, mi_col_high + bs_high)) return 1; break; } } return 0; } static void update_partition_svc(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; BLOCK_SIZE *prev_part = cpi->svc.prev_partition_svc; int start_pos = mi_row * cm->mi_stride + mi_col; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) >> 2; BLOCK_SIZE subsize; PARTITION_TYPE partition; const MODE_INFO *mi = NULL; int xx, yy; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; mi = cm->mi_grid_visible[start_pos]; partition = partition_lookup[bsl][mi->sb_type]; subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { prev_part[start_pos] = bsize; } else { switch (partition) { case PARTITION_NONE: prev_part[start_pos] = bsize; if (bsize == BLOCK_64X64) { for (xx = 0; xx < 8; xx += 4) for (yy = 0; yy < 8; yy += 4) { if ((mi_row + xx < cm->mi_rows) && (mi_col + yy < cm->mi_cols)) prev_part[start_pos + xx * cm->mi_stride + yy] = bsize; } } break; case PARTITION_HORZ: prev_part[start_pos] = subsize; if (mi_row + bs < cm->mi_rows) prev_part[start_pos + bs * cm->mi_stride] = subsize; break; case PARTITION_VERT: prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; default: assert(partition == PARTITION_SPLIT); update_partition_svc(cpi, subsize, mi_row, mi_col); update_partition_svc(cpi, subsize, mi_row + bs, mi_col); update_partition_svc(cpi, subsize, mi_row, mi_col + bs); update_partition_svc(cpi, subsize, mi_row + bs, mi_col + bs); break; } } } static void update_prev_partition_helper(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; BLOCK_SIZE *prev_part = cpi->prev_partition; int start_pos = mi_row * cm->mi_stride + mi_col; const int bsl = b_width_log2_lookup[bsize]; const int bs = (1 << bsl) >> 2; BLOCK_SIZE subsize; PARTITION_TYPE partition; const MODE_INFO *mi = NULL; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; mi = cm->mi_grid_visible[start_pos]; partition = partition_lookup[bsl][mi->sb_type]; subsize = get_subsize(bsize, partition); if (subsize < BLOCK_8X8) { prev_part[start_pos] = bsize; } else { switch (partition) { case PARTITION_NONE: prev_part[start_pos] = bsize; break; case PARTITION_HORZ: prev_part[start_pos] = subsize; if (mi_row + bs < cm->mi_rows) prev_part[start_pos + bs * cm->mi_stride] = subsize; break; case PARTITION_VERT: prev_part[start_pos] = subsize; if (mi_col + bs < cm->mi_cols) prev_part[start_pos + bs] = subsize; break; default: assert(partition == PARTITION_SPLIT); update_prev_partition_helper(cpi, subsize, mi_row, mi_col); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col); update_prev_partition_helper(cpi, subsize, mi_row, mi_col + bs); update_prev_partition_helper(cpi, subsize, mi_row + bs, mi_col + bs); break; } } } static void update_prev_partition(VP9_COMP *cpi, MACROBLOCK *x, int segment_id, int mi_row, int mi_col, int sb_offset) { update_prev_partition_helper(cpi, BLOCK_64X64, mi_row, mi_col); cpi->prev_segment_id[sb_offset] = segment_id; memcpy(&(cpi->prev_variance_low[sb_offset * 25]), x->variance_low, sizeof(x->variance_low)); // Reset the counter for copy partitioning cpi->copied_frame_cnt[sb_offset] = 0; } static void chroma_check(VP9_COMP *cpi, MACROBLOCK *x, int bsize, unsigned int y_sad, int is_key_frame) { int i; MACROBLOCKD *xd = &x->e_mbd; if (is_key_frame) return; // For speed > 8, avoid the chroma check if y_sad is above threshold. if (cpi->oxcf.speed > 8) { if (y_sad > cpi->vbp_thresholds[1] && (!cpi->noise_estimate.enabled || vp9_noise_estimate_extract_level(&cpi->noise_estimate) < kMedium)) return; } for (i = 1; i <= 2; ++i) { unsigned int uv_sad = UINT_MAX; struct macroblock_plane *p = &x->plane[i]; struct macroblockd_plane *pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); if (bs != BLOCK_INVALID) uv_sad = cpi->fn_ptr[bs].sdf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); // TODO(marpan): Investigate if we should lower this threshold if // superblock is detected as skin. x->color_sensitivity[i - 1] = uv_sad > (y_sad >> 2); } } static uint64_t avg_source_sad(VP9_COMP *cpi, MACROBLOCK *x, int shift, int sb_offset) { unsigned int tmp_sse; uint64_t tmp_sad; unsigned int tmp_variance; const BLOCK_SIZE bsize = BLOCK_64X64; uint8_t *src_y = cpi->Source->y_buffer; int src_ystride = cpi->Source->y_stride; uint8_t *last_src_y = cpi->Last_Source->y_buffer; int last_src_ystride = cpi->Last_Source->y_stride; uint64_t avg_source_sad_threshold = 10000; uint64_t avg_source_sad_threshold2 = 12000; #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) return 0; #endif src_y += shift; last_src_y += shift; tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); tmp_variance = vpx_variance64x64(src_y, src_ystride, last_src_y, last_src_ystride, &tmp_sse); // Note: tmp_sse - tmp_variance = ((sum * sum) >> 12) if (tmp_sad < avg_source_sad_threshold) x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kLowSadLowSumdiff : kLowSadHighSumdiff; else x->content_state_sb = ((tmp_sse - tmp_variance) < 25) ? kHighSadLowSumdiff : kHighSadHighSumdiff; // Detect large lighting change. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->oxcf.rc_mode == VPX_CBR && tmp_variance < (tmp_sse >> 3) && (tmp_sse - tmp_variance) > 10000) x->content_state_sb = kLowVarHighSumdiff; else if (tmp_sad > (avg_source_sad_threshold << 1)) x->content_state_sb = kVeryHighSad; if (cpi->content_state_sb_fd != NULL) { if (tmp_sad < avg_source_sad_threshold2) { // Cap the increment to 255. if (cpi->content_state_sb_fd[sb_offset] < 255) cpi->content_state_sb_fd[sb_offset]++; } else { cpi->content_state_sb_fd[sb_offset] = 0; } } if (tmp_sad == 0) x->zero_temp_sad_source = 1; return tmp_sad; } // This function chooses partitioning based on the variance between source and // reconstructed last, where variance is computed for down-sampled inputs. static int choose_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; int i, j, k, m; v64x64 vt; v16x16 *vt2 = NULL; int force_split[21]; int avg_32x32; int max_var_32x32 = 0; int min_var_32x32 = INT_MAX; int var_32x32; int avg_16x16[4]; int maxvar_16x16[4]; int minvar_16x16[4]; int64_t threshold_4x4avg; NOISE_LEVEL noise_level = kLow; int content_state = 0; uint8_t *s; const uint8_t *d; int sp; int dp; int compute_minmax_variance = 1; unsigned int y_sad = UINT_MAX; BLOCK_SIZE bsize = BLOCK_64X64; // Ref frame used in partitioning. MV_REFERENCE_FRAME ref_frame_partition = LAST_FRAME; int pixels_wide = 64, pixels_high = 64; int64_t thresholds[4] = { cpi->vbp_thresholds[0], cpi->vbp_thresholds[1], cpi->vbp_thresholds[2], cpi->vbp_thresholds[3] }; int force_64_split = cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe) || (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->compute_source_sad_onepass && cpi->sf.use_source_sad && !x->zero_temp_sad_source); // For the variance computation under SVC mode, we treat the frame as key if // the reference (base layer frame) is key frame (i.e., is_key_frame == 1). int is_key_frame = (frame_is_intra_only(cm) || (is_one_pass_cbr_svc(cpi) && cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame)); // Always use 4x4 partition for key frame. const int use_4x4_partition = frame_is_intra_only(cm); const int low_res = (cm->width <= 352 && cm->height <= 288); int variance4x4downsample[16]; int segment_id; int sb_offset = (cm->mi_stride >> 3) * (mi_row >> 3) + (mi_col >> 3); // For SVC: check if LAST frame is NULL or if the resolution of LAST is // different than the current frame resolution, and if so, treat this frame // as a key frame, for the purpose of the superblock partitioning. // LAST == NULL can happen in some cases where enhancement spatial layers are // enabled dyanmically in the stream and the only reference is the spatial // reference (GOLDEN). if (cpi->use_svc) { const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, LAST_FRAME); if (ref == NULL || ref->y_crop_height != cm->height || ref->y_crop_width != cm->width) is_key_frame = 1; } set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); set_segment_index(cpi, x, mi_row, mi_col, BLOCK_64X64, 0); segment_id = xd->mi[0]->segment_id; if (cpi->oxcf.speed >= 8 || (cpi->use_svc && cpi->svc.non_reference_frame)) compute_minmax_variance = 0; memset(x->variance_low, 0, sizeof(x->variance_low)); if (cpi->sf.use_source_sad && !is_key_frame) { int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); content_state = x->content_state_sb; x->skip_low_source_sad = (content_state == kLowSadLowSumdiff || content_state == kLowSadHighSumdiff) ? 1 : 0; x->lowvar_highsumdiff = (content_state == kLowVarHighSumdiff) ? 1 : 0; if (cpi->content_state_sb_fd != NULL) x->last_sb_high_content = cpi->content_state_sb_fd[sb_offset2]; // For SVC on top spatial layer: use/scale the partition from // the lower spatial resolution if svc_use_lowres_part is enabled. if (cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && cpi->svc.prev_partition_svc != NULL && content_state != kVeryHighSad) { if (!scale_partitioning_svc(cpi, x, xd, BLOCK_64X64, mi_row >> 1, mi_col >> 1, mi_row, mi_col)) { if (cpi->sf.copy_partition_flag) { update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); } return 0; } } // If source_sad is low copy the partition without computing the y_sad. if (x->skip_low_source_sad && cpi->sf.copy_partition_flag && !force_64_split && copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { x->sb_use_mv_part = 1; if (cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); return 0; } } if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && cyclic_refresh_segment_id_boosted(segment_id)) { int q = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); set_vbp_thresholds(cpi, thresholds, q, content_state); } else { set_vbp_thresholds(cpi, thresholds, cm->base_qindex, content_state); } // Decrease 32x32 split threshold for screen on base layer, for scene // change/high motion frames. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->svc.spatial_layer_id == 0 && force_64_split) thresholds[1] = 3 * thresholds[1] >> 2; // For non keyframes, disable 4x4 average for low resolution when speed = 8 threshold_4x4avg = (cpi->oxcf.speed < 8) ? thresholds[1] << 1 : INT64_MAX; if (xd->mb_to_right_edge < 0) pixels_wide += (xd->mb_to_right_edge >> 3); if (xd->mb_to_bottom_edge < 0) pixels_high += (xd->mb_to_bottom_edge >> 3); s = x->plane[0].src.buf; sp = x->plane[0].src.stride; // Index for force_split: 0 for 64x64, 1-4 for 32x32 blocks, // 5-20 for the 16x16 blocks. force_split[0] = force_64_split; if (!is_key_frame) { // In the case of spatial/temporal scalable coding, the assumption here is // that the temporal reference frame will always be of type LAST_FRAME. // TODO(marpan): If that assumption is broken, we need to revisit this code. MODE_INFO *mi = xd->mi[0]; YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const YV12_BUFFER_CONFIG *yv12_g = NULL; unsigned int y_sad_g, y_sad_thr, y_sad_last; bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); assert(yv12 != NULL); if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); } // Only compute y_sad_g (sad for golden reference) for speed < 8. if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); y_sad_g = cpi->fn_ptr[bsize].sdf( x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); } else { y_sad_g = UINT_MAX; } if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.is_src_frame_alt_ref) { yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[ALTREF_FRAME - 1].sf); mi->ref_frame[0] = ALTREF_FRAME; y_sad_g = UINT_MAX; } else { vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[LAST_FRAME - 1].sf); mi->ref_frame[0] = LAST_FRAME; } mi->ref_frame[1] = NONE; mi->sb_type = BLOCK_64X64; mi->mv[0].as_int = 0; mi->interp_filter = BILINEAR; if (cpi->oxcf.speed >= 8 && !low_res && x->content_state_sb != kVeryHighSad) { y_sad = cpi->fn_ptr[bsize].sdf( x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); } else { const MV dummy_mv = { 0, 0 }; y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, &dummy_mv); x->sb_use_mv_part = 1; x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && cpi->svc.high_num_blocks_with_motion && !x->zero_temp_sad_source && cm->width > 640 && cm->height > 480) { // Disable split below 16x16 block size when scroll motion (horz or // vert) is detected. // TODO(marpan/jianj): Improve this condition: issue is that search // range is hard-coded/limited in vp9_int_pro_motion_estimation() so // scroll motion may not be detected here. if (((abs(x->sb_mvrow_part) >= 48 && abs(x->sb_mvcol_part) <= 8) || (abs(x->sb_mvcol_part) >= 48 && abs(x->sb_mvrow_part) <= 8)) && y_sad < 100000) { compute_minmax_variance = 0; thresholds[2] = INT64_MAX; } } } y_sad_last = y_sad; // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad // are close if short_circuit_low_temp_var is on. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; if (y_sad_g < y_sad_thr) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); mi->ref_frame[0] = GOLDEN_FRAME; mi->mv[0].as_int = 0; y_sad = y_sad_g; ref_frame_partition = GOLDEN_FRAME; } else { x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; ref_frame_partition = LAST_FRAME; } set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); if (cpi->use_skin_detection) x->sb_is_skin = skin_sb_split(cpi, x, low_res, mi_row, mi_col, force_split); d = xd->plane[0].dst.buf; dp = xd->plane[0].dst.stride; // If the y_sad is very small, take 64x64 as partition and exit. // Don't check on boosted segment for now, as 64x64 is suppressed there. if (segment_id == CR_SEGMENT_ID_BASE && y_sad < cpi->vbp_threshold_sad) { const int block_width = num_8x8_blocks_wide_lookup[BLOCK_64X64]; const int block_height = num_8x8_blocks_high_lookup[BLOCK_64X64]; if (mi_col + block_width / 2 < cm->mi_cols && mi_row + block_height / 2 < cm->mi_rows) { set_block_size(cpi, x, xd, mi_row, mi_col, BLOCK_64X64); x->variance_low[0] = 1; chroma_check(cpi, x, bsize, y_sad, is_key_frame); if (cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); if (cpi->sf.copy_partition_flag) { update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); } return 0; } } // If the y_sad is small enough, copy the partition of the superblock in the // last frame to current frame only if the last frame is not a keyframe. // Stop the copy every cpi->max_copied_frame to refresh the partition. // TODO(jianj) : tune the threshold. if (cpi->sf.copy_partition_flag && y_sad_last < cpi->vbp_threshold_copy && copy_partitioning(cpi, x, xd, mi_row, mi_col, segment_id, sb_offset)) { chroma_check(cpi, x, bsize, y_sad, is_key_frame); if (cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); return 0; } } else { d = VP9_VAR_OFFS; dp = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (xd->bd) { case 10: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_10); break; case 12: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_12); break; case 8: default: d = CONVERT_TO_BYTEPTR(VP9_HIGH_VAR_OFFS_8); break; } } #endif // CONFIG_VP9_HIGHBITDEPTH } if (low_res && threshold_4x4avg < INT64_MAX) CHECK_MEM_ERROR(cm, vt2, vpx_calloc(16, sizeof(*vt2))); // Fill in the entire tree of 8x8 (or 4x4 under some conditions) variances // for splits. for (i = 0; i < 4; i++) { const int x32_idx = ((i & 1) << 5); const int y32_idx = ((i >> 1) << 5); const int i2 = i << 2; force_split[i + 1] = 0; avg_16x16[i] = 0; maxvar_16x16[i] = 0; minvar_16x16[i] = INT_MAX; for (j = 0; j < 4; j++) { const int x16_idx = x32_idx + ((j & 1) << 4); const int y16_idx = y32_idx + ((j >> 1) << 4); const int split_index = 5 + i2 + j; v16x16 *vst = &vt.split[i].split[j]; force_split[split_index] = 0; variance4x4downsample[i2 + j] = 0; if (!is_key_frame) { fill_variance_8x8avg(s, sp, d, dp, x16_idx, y16_idx, vst, #if CONFIG_VP9_HIGHBITDEPTH xd->cur_buf->flags, #endif pixels_wide, pixels_high, is_key_frame); fill_variance_tree(&vt.split[i].split[j], BLOCK_16X16); get_variance(&vt.split[i].split[j].part_variances.none); avg_16x16[i] += vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance < minvar_16x16[i]) minvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance > maxvar_16x16[i]) maxvar_16x16[i] = vt.split[i].split[j].part_variances.none.variance; if (vt.split[i].split[j].part_variances.none.variance > thresholds[2]) { // 16X16 variance is above threshold for split, so force split to 8x8 // for this 16x16 block (this also forces splits for upper levels). force_split[split_index] = 1; force_split[i + 1] = 1; force_split[0] = 1; } else if (compute_minmax_variance && vt.split[i].split[j].part_variances.none.variance > thresholds[1] && !cyclic_refresh_segment_id_boosted(segment_id)) { // We have some nominal amount of 16x16 variance (based on average), // compute the minmax over the 8x8 sub-blocks, and if above threshold, // force split to 8x8 block for this 16x16 block. int minmax = compute_minmax_8x8(s, sp, d, dp, x16_idx, y16_idx, #if CONFIG_VP9_HIGHBITDEPTH xd->cur_buf->flags, #endif pixels_wide, pixels_high); int thresh_minmax = (int)cpi->vbp_threshold_minmax; if (x->content_state_sb == kVeryHighSad) thresh_minmax = thresh_minmax << 1; if (minmax > thresh_minmax) { force_split[split_index] = 1; force_split[i + 1] = 1; force_split[0] = 1; } } } if (is_key_frame || (low_res && vt.split[i].split[j].part_variances.none.variance > threshold_4x4avg)) { force_split[split_index] = 0; // Go down to 4x4 down-sampling for variance. variance4x4downsample[i2 + j] = 1; for (k = 0; k < 4; k++) { int x8_idx = x16_idx + ((k & 1) << 3); int y8_idx = y16_idx + ((k >> 1) << 3); v8x8 *vst2 = is_key_frame ? &vst->split[k] : &vt2[i2 + j].split[k]; fill_variance_4x4avg(s, sp, d, dp, x8_idx, y8_idx, vst2, #if CONFIG_VP9_HIGHBITDEPTH xd->cur_buf->flags, #endif pixels_wide, pixels_high, is_key_frame); } } } } if (cpi->noise_estimate.enabled) noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); // Fill the rest of the variance tree by summing split partition values. avg_32x32 = 0; for (i = 0; i < 4; i++) { const int i2 = i << 2; for (j = 0; j < 4; j++) { if (variance4x4downsample[i2 + j] == 1) { v16x16 *vtemp = (!is_key_frame) ? &vt2[i2 + j] : &vt.split[i].split[j]; for (m = 0; m < 4; m++) fill_variance_tree(&vtemp->split[m], BLOCK_8X8); fill_variance_tree(vtemp, BLOCK_16X16); // If variance of this 16x16 block is above the threshold, force block // to split. This also forces a split on the upper levels. get_variance(&vtemp->part_variances.none); if (vtemp->part_variances.none.variance > thresholds[2]) { force_split[5 + i2 + j] = 1; force_split[i + 1] = 1; force_split[0] = 1; } } } fill_variance_tree(&vt.split[i], BLOCK_32X32); // If variance of this 32x32 block is above the threshold, or if its above // (some threshold of) the average variance over the sub-16x16 blocks, then // force this block to split. This also forces a split on the upper // (64x64) level. if (!force_split[i + 1]) { get_variance(&vt.split[i].part_variances.none); var_32x32 = vt.split[i].part_variances.none.variance; max_var_32x32 = VPXMAX(var_32x32, max_var_32x32); min_var_32x32 = VPXMIN(var_32x32, min_var_32x32); if (vt.split[i].part_variances.none.variance > thresholds[1] || (!is_key_frame && vt.split[i].part_variances.none.variance > (thresholds[1] >> 1) && vt.split[i].part_variances.none.variance > (avg_16x16[i] >> 1))) { force_split[i + 1] = 1; force_split[0] = 1; } else if (!is_key_frame && noise_level < kLow && cm->height <= 360 && (maxvar_16x16[i] - minvar_16x16[i]) > (thresholds[1] >> 1) && maxvar_16x16[i] > thresholds[1]) { force_split[i + 1] = 1; force_split[0] = 1; } avg_32x32 += var_32x32; } } if (!force_split[0]) { fill_variance_tree(&vt, BLOCK_64X64); get_variance(&vt.part_variances.none); // If variance of this 64x64 block is above (some threshold of) the average // variance over the sub-32x32 blocks, then force this block to split. // Only checking this for noise level >= medium for now. if (!is_key_frame && noise_level >= kMedium && vt.part_variances.none.variance > (9 * avg_32x32) >> 5) force_split[0] = 1; // Else if the maximum 32x32 variance minus the miniumum 32x32 variance in // a 64x64 block is greater than threshold and the maximum 32x32 variance is // above a miniumum threshold, then force the split of a 64x64 block // Only check this for low noise. else if (!is_key_frame && noise_level < kMedium && (max_var_32x32 - min_var_32x32) > 3 * (thresholds[0] >> 3) && max_var_32x32 > thresholds[0] >> 1) force_split[0] = 1; } // Now go through the entire structure, splitting every block size until // we get to one that's got a variance lower than our threshold. if (mi_col + 8 > cm->mi_cols || mi_row + 8 > cm->mi_rows || !set_vt_partitioning(cpi, x, xd, &vt, BLOCK_64X64, mi_row, mi_col, thresholds[0], BLOCK_16X16, force_split[0])) { for (i = 0; i < 4; ++i) { const int x32_idx = ((i & 1) << 2); const int y32_idx = ((i >> 1) << 2); const int i2 = i << 2; if (!set_vt_partitioning(cpi, x, xd, &vt.split[i], BLOCK_32X32, (mi_row + y32_idx), (mi_col + x32_idx), thresholds[1], BLOCK_16X16, force_split[i + 1])) { for (j = 0; j < 4; ++j) { const int x16_idx = ((j & 1) << 1); const int y16_idx = ((j >> 1) << 1); // For inter frames: if variance4x4downsample[] == 1 for this 16x16 // block, then the variance is based on 4x4 down-sampling, so use vt2 // in set_vt_partioning(), otherwise use vt. v16x16 *vtemp = (!is_key_frame && variance4x4downsample[i2 + j] == 1) ? &vt2[i2 + j] : &vt.split[i].split[j]; if (!set_vt_partitioning( cpi, x, xd, vtemp, BLOCK_16X16, mi_row + y32_idx + y16_idx, mi_col + x32_idx + x16_idx, thresholds[2], cpi->vbp_bsize_min, force_split[5 + i2 + j])) { for (k = 0; k < 4; ++k) { const int x8_idx = (k & 1); const int y8_idx = (k >> 1); if (use_4x4_partition) { if (!set_vt_partitioning(cpi, x, xd, &vtemp->split[k], BLOCK_8X8, mi_row + y32_idx + y16_idx + y8_idx, mi_col + x32_idx + x16_idx + x8_idx, thresholds[3], BLOCK_8X8, 0)) { set_block_size( cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_4X4); } } else { set_block_size( cpi, x, xd, (mi_row + y32_idx + y16_idx + y8_idx), (mi_col + x32_idx + x16_idx + x8_idx), BLOCK_8X8); } } } } } } } if (!frame_is_intra_only(cm) && cpi->sf.copy_partition_flag) { update_prev_partition(cpi, x, segment_id, mi_row, mi_col, sb_offset); } if (!frame_is_intra_only(cm) && cpi->sf.svc_use_lowres_part && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 2) update_partition_svc(cpi, BLOCK_64X64, mi_row, mi_col); if (cpi->sf.short_circuit_low_temp_var) { set_low_temp_var_flag(cpi, x, xd, &vt, thresholds, ref_frame_partition, mi_col, mi_row); } chroma_check(cpi, x, bsize, y_sad, is_key_frame); if (vt2) vpx_free(vt2); return 0; } #if !CONFIG_REALTIME_ONLY static void update_state(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled) { int i, x_idx, y; VP9_COMMON *const cm = &cpi->common; RD_COUNTS *const rdc = &td->rd_counts; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; MODE_INFO *mi = &ctx->mic; MODE_INFO *const xdmi = xd->mi[0]; MODE_INFO *mi_addr = xd->mi[0]; const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; const int mis = cm->mi_stride; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; int max_plane; assert(mi->sb_type == bsize); *mi_addr = *mi; *x->mbmi_ext = ctx->mbmi_ext; // If segmentation in use if (seg->enabled) { // For in frame complexity AQ copy the segment id from the segment map. if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; mi_addr->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } // Else for cyclic refresh mode update the segment map, set the segment id // and then update the quantizer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp9_cyclic_refresh_update_segment(cpi, xd->mi[0], mi_row, mi_col, bsize, ctx->rate, ctx->dist, x->skip, p); } } max_plane = is_inter_block(xdmi) ? MAX_MB_PLANE : 1; for (i = 0; i < max_plane; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; p[i].eobs = ctx->eobs_pbuf[i][1]; } for (i = max_plane; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][2]; p[i].qcoeff = ctx->qcoeff_pbuf[i][2]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][2]; p[i].eobs = ctx->eobs_pbuf[i][2]; } // Restore the coding context of the MB to that that was in place // when the mode was picked for it for (y = 0; y < mi_height; y++) for (x_idx = 0; x_idx < mi_width; x_idx++) if ((xd->mb_to_right_edge >> (3 + MI_SIZE_LOG2)) + mi_width > x_idx && (xd->mb_to_bottom_edge >> (3 + MI_SIZE_LOG2)) + mi_height > y) { xd->mi[x_idx + y * mis] = mi_addr; } if (cpi->oxcf.aq_mode != NO_AQ) vp9_init_plane_quantizers(cpi, x); if (is_inter_block(xdmi) && xdmi->sb_type < BLOCK_8X8) { xdmi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; xdmi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; } x->skip = ctx->skip; memcpy(x->zcoeff_blk[xdmi->tx_size], ctx->zcoeff_blk, sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); if (!output_enabled) return; #if CONFIG_INTERNAL_STATS if (frame_is_intra_only(cm)) { static const int kf_mode_index[] = { THR_DC /*DC_PRED*/, THR_V_PRED /*V_PRED*/, THR_H_PRED /*H_PRED*/, THR_D45_PRED /*D45_PRED*/, THR_D135_PRED /*D135_PRED*/, THR_D117_PRED /*D117_PRED*/, THR_D153_PRED /*D153_PRED*/, THR_D207_PRED /*D207_PRED*/, THR_D63_PRED /*D63_PRED*/, THR_TM /*TM_PRED*/, }; ++cpi->mode_chosen_counts[kf_mode_index[xdmi->mode]]; } else { // Note how often each mode chosen as best ++cpi->mode_chosen_counts[ctx->best_mode_index]; } #endif if (!frame_is_intra_only(cm)) { if (is_inter_block(xdmi)) { vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int ctx = get_pred_context_switchable_interp(xd); ++td->counts->switchable_interp[ctx][xdmi->interp_filter]; } } rdc->comp_pred_diff[SINGLE_REFERENCE] += ctx->single_pred_diff; rdc->comp_pred_diff[COMPOUND_REFERENCE] += ctx->comp_pred_diff; rdc->comp_pred_diff[REFERENCE_MODE_SELECT] += ctx->hybrid_pred_diff; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) rdc->filter_diff[i] += ctx->best_filter_diff[i]; } for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; mv->ref_frame[0] = mi->ref_frame[0]; mv->ref_frame[1] = mi->ref_frame[1]; mv->mv[0].as_int = mi->mv[0].as_int; mv->mv[1].as_int = mi->mv[1].as_int; } } } #endif // !CONFIG_REALTIME_ONLY void vp9_setup_src_planes(MACROBLOCK *x, const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col) { uint8_t *const buffers[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; int i; // Set current frame pointer. x->e_mbd.cur_buf = src; for (i = 0; i < MAX_MB_PLANE; i++) setup_pred_plane(&x->plane[i].src, buffers[i], strides[i], mi_row, mi_col, NULL, x->e_mbd.plane[i].subsampling_x, x->e_mbd.plane[i].subsampling_y); } static void set_mode_info_seg_skip(MACROBLOCK *x, TX_MODE tx_mode, RD_COST *rd_cost, BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; INTERP_FILTER filter_ref; filter_ref = get_pred_context_switchable_interp(xd); if (filter_ref == SWITCHABLE_FILTERS) filter_ref = EIGHTTAP; mi->sb_type = bsize; mi->mode = ZEROMV; mi->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[tx_mode]); mi->skip = 1; mi->uv_mode = DC_PRED; mi->ref_frame[0] = LAST_FRAME; mi->ref_frame[1] = NONE; mi->mv[0].as_int = 0; mi->interp_filter = filter_ref; xd->mi[0]->bmi[0].as_mv[0].as_int = 0; x->skip = 1; vp9_rd_cost_init(rd_cost); } #if !CONFIG_REALTIME_ONLY static void set_segment_rdmult(VP9_COMP *const cpi, MACROBLOCK *const x, int mi_row, int mi_col, BLOCK_SIZE bsize, AQ_MODE aq_mode) { VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const uint8_t *const map = cm->seg.update_map ? cpi->segmentation_map : cm->last_frame_seg_map; vp9_init_plane_quantizers(cpi, x); vpx_clear_system_state(); if (aq_mode == NO_AQ || aq_mode == PSNR_AQ) { if (cpi->sf.enable_tpl_model) x->rdmult = x->cb_rdmult; } else if (aq_mode == PERCEPTUAL_AQ) { x->rdmult = x->cb_rdmult; } else if (aq_mode == CYCLIC_REFRESH_AQ) { // If segment is boosted, use rdmult for that segment. if (cyclic_refresh_segment_id_boosted( get_segment_id(cm, map, bsize, mi_row, mi_col))) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); } else { x->rdmult = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); } if (oxcf->tuning == VP8_TUNE_SSIM) { set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); } } static void rd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int rate_in_best_rd, int64_t dist_in_best_rd) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; const AQ_MODE aq_mode = cpi->oxcf.aq_mode; int i, orig_rdmult; int64_t best_rd = INT64_MAX; vpx_clear_system_state(); // Use the lower precision, but faster, 32x32 fdct for mode selection. x->use_lp32x32fdct = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); mi = xd->mi[0]; mi->sb_type = bsize; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][0]; p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; p[i].eobs = ctx->eobs_pbuf[i][0]; } ctx->is_coded = 0; ctx->skippable = 0; ctx->pred_pixel_ready = 0; x->skip_recode = 0; // Set to zero to make sure we do not use the previous encoded frame stats mi->skip = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { x->source_variance = vp9_high_get_sby_perpixel_variance( cpi, &x->plane[0].src, bsize, xd->bd); } else { x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); } #else x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); #endif // CONFIG_VP9_HIGHBITDEPTH // Save rdmult before it might be changed, so it can be restored later. orig_rdmult = x->rdmult; if ((cpi->sf.tx_domain_thresh > 0.0) || (cpi->sf.quant_opt_thresh > 0.0)) { double logvar = vp9_log_block_var(cpi, x, bsize); // Check block complexity as part of descision on using pixel or transform // domain distortion in rd tests. x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion && (logvar >= cpi->sf.tx_domain_thresh); // Check block complexity as part of descision on using quantized // coefficient optimisation inside the rd loop. x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt && (logvar <= cpi->sf.quant_opt_thresh); } else { x->block_tx_domain = cpi->sf.allow_txfm_domain_distortion; x->block_qcoeff_opt = cpi->sf.allow_quant_coeff_opt; } set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); set_segment_rdmult(cpi, x, mi_row, mi_col, bsize, aq_mode); if (rate_in_best_rd < INT_MAX && dist_in_best_rd < INT64_MAX) { best_rd = vp9_calculate_rd_cost(x->rdmult, x->rddiv, rate_in_best_rd, dist_in_best_rd); } // Find best coding mode & reconstruct the MB so it is available // as a predictor for MBs that follow in the SB if (frame_is_intra_only(cm)) { vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, best_rd); } else { if (bsize >= BLOCK_8X8) { if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) vp9_rd_pick_inter_mode_sb_seg_skip(cpi, tile_data, x, rd_cost, bsize, ctx, best_rd); else vp9_rd_pick_inter_mode_sb(cpi, tile_data, x, mi_row, mi_col, rd_cost, bsize, ctx, best_rd); } else { vp9_rd_pick_inter_mode_sub8x8(cpi, tile_data, x, mi_row, mi_col, rd_cost, bsize, ctx, best_rd); } } // Examine the resulting rate and for AQ mode 2 make a segment choice. if ((rd_cost->rate != INT_MAX) && (aq_mode == COMPLEXITY_AQ) && (bsize >= BLOCK_16X16) && (cm->frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref))) { vp9_caq_select_segment(cpi, x, bsize, mi_row, mi_col, rd_cost->rate); } // TODO(jingning) The rate-distortion optimization flow needs to be // refactored to provide proper exit/return handle. if (rd_cost->rate == INT_MAX || rd_cost->dist == INT64_MAX) rd_cost->rdcost = INT64_MAX; else rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); x->rdmult = orig_rdmult; ctx->rate = rd_cost->rate; ctx->dist = rd_cost->dist; } #endif // !CONFIG_REALTIME_ONLY static void update_stats(VP9_COMMON *cm, ThreadData *td) { const MACROBLOCK *x = &td->mb; const MACROBLOCKD *const xd = &x->e_mbd; const MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const BLOCK_SIZE bsize = mi->sb_type; if (!frame_is_intra_only(cm)) { FRAME_COUNTS *const counts = td->counts; const int inter_block = is_inter_block(mi); const int seg_ref_active = segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_REF_FRAME); if (!seg_ref_active) { counts->intra_inter[get_intra_inter_context(xd)][inter_block]++; // If the segment reference feature is enabled we have only a single // reference frame allowed for the segment so exclude it from // the reference frame counts used to work out probabilities. if (inter_block) { const MV_REFERENCE_FRAME ref0 = mi->ref_frame[0]; if (cm->reference_mode == REFERENCE_MODE_SELECT) counts->comp_inter[vp9_get_reference_mode_context(cm, xd)] [has_second_ref(mi)]++; if (has_second_ref(mi)) { const int idx = cm->ref_frame_sign_bias[cm->comp_fixed_ref]; const int ctx = vp9_get_pred_context_comp_ref_p(cm, xd); const int bit = mi->ref_frame[!idx] == cm->comp_var_ref[1]; counts->comp_ref[ctx][bit]++; } else { counts->single_ref[vp9_get_pred_context_single_ref_p1(xd)][0] [ref0 != LAST_FRAME]++; if (ref0 != LAST_FRAME) counts->single_ref[vp9_get_pred_context_single_ref_p2(xd)][1] [ref0 != GOLDEN_FRAME]++; } } } if (inter_block && !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) { const int mode_ctx = mbmi_ext->mode_context[mi->ref_frame[0]]; if (bsize >= BLOCK_8X8) { const PREDICTION_MODE mode = mi->mode; ++counts->inter_mode[mode_ctx][INTER_OFFSET(mode)]; } else { const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int j = idy * 2 + idx; const PREDICTION_MODE b_mode = mi->bmi[j].as_mode; ++counts->inter_mode[mode_ctx][INTER_OFFSET(b_mode)]; } } } } } } #if !CONFIG_REALTIME_ONLY static void restore_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE bsize) { MACROBLOCKD *const xd = &x->e_mbd; int p; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; for (p = 0; p < MAX_MB_PLANE; p++) { memcpy(xd->above_context[p] + ((mi_col * 2) >> xd->plane[p].subsampling_x), a + num_4x4_blocks_wide * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); memcpy(xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), l + num_4x4_blocks_high * p, (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } memcpy(xd->above_seg_context + mi_col, sa, sizeof(*xd->above_seg_context) * mi_width); memcpy(xd->left_seg_context + (mi_row & MI_MASK), sl, sizeof(xd->left_seg_context[0]) * mi_height); } static void save_context(MACROBLOCK *const x, int mi_row, int mi_col, ENTROPY_CONTEXT a[16 * MAX_MB_PLANE], ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], PARTITION_CONTEXT sa[8], PARTITION_CONTEXT sl[8], BLOCK_SIZE bsize) { const MACROBLOCKD *const xd = &x->e_mbd; int p; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; // buffer the above/left context information of the block in search. for (p = 0; p < MAX_MB_PLANE; ++p) { memcpy(a + num_4x4_blocks_wide * p, xd->above_context[p] + (mi_col * 2 >> xd->plane[p].subsampling_x), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_wide) >> xd->plane[p].subsampling_x); memcpy(l + num_4x4_blocks_high * p, xd->left_context[p] + ((mi_row & MI_MASK) * 2 >> xd->plane[p].subsampling_y), (sizeof(ENTROPY_CONTEXT) * num_4x4_blocks_high) >> xd->plane[p].subsampling_y); } memcpy(sa, xd->above_seg_context + mi_col, sizeof(*xd->above_seg_context) * mi_width); memcpy(sl, xd->left_seg_context + (mi_row & MI_MASK), sizeof(xd->left_seg_context[0]) * mi_height); } static void encode_b(VP9_COMP *cpi, const TileInfo *const tile, ThreadData *td, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { MACROBLOCK *const x = &td->mb; set_offsets(cpi, tile, x, mi_row, mi_col, bsize); if (cpi->sf.enable_tpl_model && (cpi->oxcf.aq_mode == NO_AQ || cpi->oxcf.aq_mode == PERCEPTUAL_AQ)) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; x->rdmult = x->cb_rdmult; if (oxcf->tuning == VP8_TUNE_SSIM) { set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &x->rdmult); } } update_state(cpi, td, ctx, mi_row, mi_col, bsize, output_enabled); encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); if (output_enabled) { update_stats(&cpi->common, td); (*tp)->token = EOSB_TOKEN; (*tp)++; } } static void encode_sb(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; BLOCK_SIZE subsize = bsize; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; if (bsize >= BLOCK_8X8) { ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = get_subsize(bsize, pc_tree->partitioning); } else { ctx = 0; subsize = BLOCK_4X4; } partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) td->counts->partition[ctx][partition]++; switch (partition) { case PARTITION_NONE: encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->none); break; case PARTITION_VERT: encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { encode_b(cpi, tile, td, tp, mi_row, mi_col + hbs, output_enabled, subsize, &pc_tree->vertical[1]); } break; case PARTITION_HORZ: encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { encode_b(cpi, tile, td, tp, mi_row + hbs, mi_col, output_enabled, subsize, &pc_tree->horizontal[1]); } break; default: assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { encode_b(cpi, tile, td, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->leaf_split[0]); } else { encode_sb(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); encode_sb(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, pc_tree->split[1]); encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, pc_tree->split[2]); encode_sb(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, subsize, pc_tree->split[3]); } break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } #endif // !CONFIG_REALTIME_ONLY // Check to see if the given partition size is allowed for a specified number // of 8x8 block rows and columns remaining in the image. // If not then return the largest allowed partition size static BLOCK_SIZE find_partition_size(BLOCK_SIZE bsize, int rows_left, int cols_left, int *bh, int *bw) { if (rows_left <= 0 || cols_left <= 0) { return VPXMIN(bsize, BLOCK_8X8); } else { for (; bsize > 0; bsize -= 3) { *bh = num_8x8_blocks_high_lookup[bsize]; *bw = num_8x8_blocks_wide_lookup[bsize]; if ((*bh <= rows_left) && (*bw <= cols_left)) { break; } } } return bsize; } static void set_partial_b64x64_partition(MODE_INFO *mi, int mis, int bh_in, int bw_in, int row8x8_remaining, int col8x8_remaining, BLOCK_SIZE bsize, MODE_INFO **mi_8x8) { int bh = bh_in; int r, c; for (r = 0; r < MI_BLOCK_SIZE; r += bh) { int bw = bw_in; for (c = 0; c < MI_BLOCK_SIZE; c += bw) { const int index = r * mis + c; mi_8x8[index] = mi + index; mi_8x8[index]->sb_type = find_partition_size( bsize, row8x8_remaining - r, col8x8_remaining - c, &bh, &bw); } } } // This function attempts to set all mode info entries in a given SB64 // to the same block partition size. // However, at the bottom and right borders of the image the requested size // may not be allowed in which case this code attempts to choose the largest // allowable partition. static void set_fixed_partitioning(VP9_COMP *cpi, const TileInfo *const tile, MODE_INFO **mi_8x8, int mi_row, int mi_col, BLOCK_SIZE bsize) { VP9_COMMON *const cm = &cpi->common; const int mis = cm->mi_stride; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; int block_row, block_col; MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; int bh = num_8x8_blocks_high_lookup[bsize]; int bw = num_8x8_blocks_wide_lookup[bsize]; assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); // Apply the requested partition size to the SB64 if it is all "in image" if ((col8x8_remaining >= MI_BLOCK_SIZE) && (row8x8_remaining >= MI_BLOCK_SIZE)) { for (block_row = 0; block_row < MI_BLOCK_SIZE; block_row += bh) { for (block_col = 0; block_col < MI_BLOCK_SIZE; block_col += bw) { int index = block_row * mis + block_col; mi_8x8[index] = mi_upper_left + index; mi_8x8[index]->sb_type = bsize; } } } else { // Else this is a partial SB64. set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, col8x8_remaining, bsize, mi_8x8); } } static const struct { int row; int col; } coord_lookup[16] = { // 32x32 index = 0 { 0, 0 }, { 0, 2 }, { 2, 0 }, { 2, 2 }, // 32x32 index = 1 { 0, 4 }, { 0, 6 }, { 2, 4 }, { 2, 6 }, // 32x32 index = 2 { 4, 0 }, { 4, 2 }, { 6, 0 }, { 6, 2 }, // 32x32 index = 3 { 4, 4 }, { 4, 6 }, { 6, 4 }, { 6, 6 }, }; static void set_source_var_based_partition(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *const x, MODE_INFO **mi_8x8, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; const int mis = cm->mi_stride; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; MODE_INFO *mi_upper_left = cm->mi + mi_row * mis + mi_col; vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); assert((row8x8_remaining > 0) && (col8x8_remaining > 0)); // In-image SB64 if ((col8x8_remaining >= MI_BLOCK_SIZE) && (row8x8_remaining >= MI_BLOCK_SIZE)) { int i, j; int index; diff d32[4]; const int offset = (mi_row >> 1) * cm->mb_cols + (mi_col >> 1); int is_larger_better = 0; int use32x32 = 0; unsigned int thr = cpi->source_var_thresh; memset(d32, 0, 4 * sizeof(diff)); for (i = 0; i < 4; i++) { diff *d16[4]; for (j = 0; j < 4; j++) { int b_mi_row = coord_lookup[i * 4 + j].row; int b_mi_col = coord_lookup[i * 4 + j].col; int boffset = b_mi_row / 2 * cm->mb_cols + b_mi_col / 2; d16[j] = cpi->source_diff_var + offset + boffset; index = b_mi_row * mis + b_mi_col; mi_8x8[index] = mi_upper_left + index; mi_8x8[index]->sb_type = BLOCK_16X16; // TODO(yunqingwang): If d16[j].var is very large, use 8x8 partition // size to further improve quality. } is_larger_better = (d16[0]->var < thr) && (d16[1]->var < thr) && (d16[2]->var < thr) && (d16[3]->var < thr); // Use 32x32 partition if (is_larger_better) { use32x32 += 1; for (j = 0; j < 4; j++) { d32[i].sse += d16[j]->sse; d32[i].sum += d16[j]->sum; } d32[i].var = (unsigned int)(d32[i].sse - (unsigned int)(((int64_t)d32[i].sum * d32[i].sum) >> 10)); index = coord_lookup[i * 4].row * mis + coord_lookup[i * 4].col; mi_8x8[index] = mi_upper_left + index; mi_8x8[index]->sb_type = BLOCK_32X32; } } if (use32x32 == 4) { thr <<= 1; is_larger_better = (d32[0].var < thr) && (d32[1].var < thr) && (d32[2].var < thr) && (d32[3].var < thr); // Use 64x64 partition if (is_larger_better) { mi_8x8[0] = mi_upper_left; mi_8x8[0]->sb_type = BLOCK_64X64; } } } else { // partial in-image SB64 int bh = num_8x8_blocks_high_lookup[BLOCK_16X16]; int bw = num_8x8_blocks_wide_lookup[BLOCK_16X16]; set_partial_b64x64_partition(mi_upper_left, mis, bh, bw, row8x8_remaining, col8x8_remaining, BLOCK_16X16, mi_8x8); } } static void update_state_rt(VP9_COMP *cpi, ThreadData *td, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, int bsize) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblock_plane *const p = x->plane; const struct segmentation *const seg = &cm->seg; const int bw = num_8x8_blocks_wide_lookup[mi->sb_type]; const int bh = num_8x8_blocks_high_lookup[mi->sb_type]; const int x_mis = VPXMIN(bw, cm->mi_cols - mi_col); const int y_mis = VPXMIN(bh, cm->mi_rows - mi_row); *(xd->mi[0]) = ctx->mic; *(x->mbmi_ext) = ctx->mbmi_ext; if (seg->enabled && (cpi->oxcf.aq_mode != NO_AQ || cpi->roi.enabled)) { // Setting segmentation map for cyclic_refresh. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp9_cyclic_refresh_update_segment(cpi, mi, mi_row, mi_col, bsize, ctx->rate, ctx->dist, x->skip, p); } else { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; mi->segment_id = get_segment_id(cm, map, bsize, mi_row, mi_col); } vp9_init_plane_quantizers(cpi, x); } if (is_inter_block(mi)) { vp9_update_mv_count(td); if (cm->interp_filter == SWITCHABLE) { const int pred_ctx = get_pred_context_switchable_interp(xd); ++td->counts->switchable_interp[pred_ctx][mi->interp_filter]; } if (mi->sb_type < BLOCK_8X8) { mi->mv[0].as_int = mi->bmi[3].as_mv[0].as_int; mi->mv[1].as_int = mi->bmi[3].as_mv[1].as_int; } } if (cm->use_prev_frame_mvs || !cm->error_resilient_mode || (cpi->svc.use_base_mv && cpi->svc.number_spatial_layers > 1 && cpi->svc.spatial_layer_id != cpi->svc.number_spatial_layers - 1)) { MV_REF *const frame_mvs = cm->cur_frame->mvs + mi_row * cm->mi_cols + mi_col; int w, h; for (h = 0; h < y_mis; ++h) { MV_REF *const frame_mv = frame_mvs + h * cm->mi_cols; for (w = 0; w < x_mis; ++w) { MV_REF *const mv = frame_mv + w; mv->ref_frame[0] = mi->ref_frame[0]; mv->ref_frame[1] = mi->ref_frame[1]; mv->mv[0].as_int = mi->mv[0].as_int; mv->mv[1].as_int = mi->mv[1].as_int; } } } x->skip = ctx->skip; x->skip_txfm[0] = (mi->segment_id || xd->lossless) ? 0 : ctx->skip_txfm[0]; } static void encode_b_rt(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { MACROBLOCK *const x = &td->mb; set_offsets(cpi, tile, x, mi_row, mi_col, bsize); update_state_rt(cpi, td, ctx, mi_row, mi_col, bsize); encode_superblock(cpi, td, tp, output_enabled, mi_row, mi_col, bsize, ctx); update_stats(&cpi->common, td); (*tp)->token = EOSB_TOKEN; (*tp)++; } static void encode_sb_rt(VP9_COMP *cpi, ThreadData *td, const TileInfo *const tile, TOKENEXTRA **tp, int mi_row, int mi_col, int output_enabled, BLOCK_SIZE bsize, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; int ctx; PARTITION_TYPE partition; BLOCK_SIZE subsize; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; if (bsize >= BLOCK_8X8) { const int idx_str = xd->mi_stride * mi_row + mi_col; MODE_INFO **mi_8x8 = cm->mi_grid_visible + idx_str; ctx = partition_plane_context(xd, mi_row, mi_col, bsize); subsize = mi_8x8[0]->sb_type; } else { ctx = 0; subsize = BLOCK_4X4; } partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) td->counts->partition[ctx][partition]++; switch (partition) { case PARTITION_NONE: encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->none); break; case PARTITION_VERT: encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { encode_b_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, &pc_tree->vertical[1]); } break; case PARTITION_HORZ: encode_b_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { encode_b_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, &pc_tree->horizontal[1]); } break; default: assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->split[0]); encode_sb_rt(cpi, td, tile, tp, mi_row, mi_col + hbs, output_enabled, subsize, pc_tree->split[1]); encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col, output_enabled, subsize, pc_tree->split[2]); encode_sb_rt(cpi, td, tile, tp, mi_row + hbs, mi_col + hbs, output_enabled, subsize, pc_tree->split[3]); break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } #if !CONFIG_REALTIME_ONLY static void rd_use_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MODE_INFO **mi_8x8, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int *rate, int64_t *dist, int do_recon, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mis = cm->mi_stride; const int bsl = b_width_log2_lookup[bsize]; const int mi_step = num_4x4_blocks_wide_lookup[bsize] / 2; const int bss = (1 << bsl) / 4; int i, pl; PARTITION_TYPE partition = PARTITION_NONE; BLOCK_SIZE subsize; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; RD_COST last_part_rdc, none_rdc, chosen_rdc; BLOCK_SIZE sub_subsize = BLOCK_4X4; int splits_below = 0; BLOCK_SIZE bs_type = mi_8x8[0]->sb_type; int do_partition_search = 1; PICK_MODE_CONTEXT *ctx = &pc_tree->none; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; assert(num_4x4_blocks_wide_lookup[bsize] == num_4x4_blocks_high_lookup[bsize]); vp9_rd_cost_reset(&last_part_rdc); vp9_rd_cost_reset(&none_rdc); vp9_rd_cost_reset(&chosen_rdc); partition = partition_lookup[bsl][bs_type]; subsize = get_subsize(bsize, partition); pc_tree->partitioning = partition; save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); x->mb_energy = vp9_block_energy(cpi, x, bsize); } if (do_partition_search && cpi->sf.partition_search_type == SEARCH_PARTITION && cpi->sf.adjust_partitioning_from_last_frame) { // Check if any of the sub blocks are further split. if (partition == PARTITION_SPLIT && subsize > BLOCK_8X8) { sub_subsize = get_subsize(subsize, PARTITION_SPLIT); splits_below = 1; for (i = 0; i < 4; i++) { int jj = i >> 1, ii = i & 0x01; MODE_INFO *this_mi = mi_8x8[jj * bss * mis + ii * bss]; if (this_mi && this_mi->sb_type >= sub_subsize) { splits_below = 0; } } } // If partition is not none try none unless each of the 4 splits are split // even further.. if (partition != PARTITION_NONE && !splits_below && mi_row + (mi_step >> 1) < cm->mi_rows && mi_col + (mi_step >> 1) < cm->mi_cols) { pc_tree->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &none_rdc, bsize, ctx, INT_MAX, INT64_MAX); pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (none_rdc.rate < INT_MAX) { none_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; none_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, none_rdc.rate, none_rdc.dist); } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); mi_8x8[0]->sb_type = bs_type; pc_tree->partitioning = partition; } } switch (partition) { case PARTITION_NONE: rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, bsize, ctx, INT_MAX, INT64_MAX); break; case PARTITION_HORZ: pc_tree->horizontal[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->horizontal[0], INT_MAX, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_row + (mi_step >> 1) < cm->mi_rows) { RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); pc_tree->horizontal[1].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row + (mi_step >> 1), mi_col, &tmp_rdc, subsize, &pc_tree->horizontal[1], INT_MAX, INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; } break; case PARTITION_VERT: pc_tree->vertical[0].skip_ref_frame_mask = 0; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, &pc_tree->vertical[0], INT_MAX, INT64_MAX); if (last_part_rdc.rate != INT_MAX && bsize >= BLOCK_8X8 && mi_col + (mi_step >> 1) < cm->mi_cols) { RD_COST tmp_rdc; PICK_MODE_CONTEXT *ctx = &pc_tree->vertical[0]; vp9_rd_cost_init(&tmp_rdc); update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); pc_tree->vertical[bsize > BLOCK_8X8].skip_ref_frame_mask = 0; rd_pick_sb_modes( cpi, tile_data, x, mi_row, mi_col + (mi_step >> 1), &tmp_rdc, subsize, &pc_tree->vertical[bsize > BLOCK_8X8], INT_MAX, INT64_MAX); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; last_part_rdc.rdcost += tmp_rdc.rdcost; } break; default: assert(partition == PARTITION_SPLIT); if (bsize == BLOCK_8X8) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &last_part_rdc, subsize, pc_tree->leaf_split[0], INT_MAX, INT64_MAX); break; } last_part_rdc.rate = 0; last_part_rdc.dist = 0; last_part_rdc.rdcost = 0; for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); int jj = i >> 1, ii = i & 0x01; RD_COST tmp_rdc; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; vp9_rd_cost_init(&tmp_rdc); rd_use_partition(cpi, td, tile_data, mi_8x8 + jj * bss * mis + ii * bss, tp, mi_row + y_idx, mi_col + x_idx, subsize, &tmp_rdc.rate, &tmp_rdc.dist, i != 3, pc_tree->split[i]); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&last_part_rdc); break; } last_part_rdc.rate += tmp_rdc.rate; last_part_rdc.dist += tmp_rdc.dist; } break; } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (last_part_rdc.rate < INT_MAX) { last_part_rdc.rate += cpi->partition_cost[pl][partition]; last_part_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, last_part_rdc.rate, last_part_rdc.dist); } if (do_partition_search && cpi->sf.adjust_partitioning_from_last_frame && cpi->sf.partition_search_type == SEARCH_PARTITION && partition != PARTITION_SPLIT && bsize > BLOCK_8X8 && (mi_row + mi_step < cm->mi_rows || mi_row + (mi_step >> 1) == cm->mi_rows) && (mi_col + mi_step < cm->mi_cols || mi_col + (mi_step >> 1) == cm->mi_cols)) { BLOCK_SIZE split_subsize = get_subsize(bsize, PARTITION_SPLIT); chosen_rdc.rate = 0; chosen_rdc.dist = 0; restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->partitioning = PARTITION_SPLIT; // Split partition. for (i = 0; i < 4; i++) { int x_idx = (i & 1) * (mi_step >> 1); int y_idx = (i >> 1) * (mi_step >> 1); RD_COST tmp_rdc; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; if ((mi_row + y_idx >= cm->mi_rows) || (mi_col + x_idx >= cm->mi_cols)) continue; save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); pc_tree->split[i]->partitioning = PARTITION_NONE; rd_pick_sb_modes(cpi, tile_data, x, mi_row + y_idx, mi_col + x_idx, &tmp_rdc, split_subsize, &pc_tree->split[i]->none, INT_MAX, INT64_MAX); restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); if (tmp_rdc.rate == INT_MAX || tmp_rdc.dist == INT64_MAX) { vp9_rd_cost_reset(&chosen_rdc); break; } chosen_rdc.rate += tmp_rdc.rate; chosen_rdc.dist += tmp_rdc.dist; if (i != 3) encode_sb(cpi, td, tile_info, tp, mi_row + y_idx, mi_col + x_idx, 0, split_subsize, pc_tree->split[i]); pl = partition_plane_context(xd, mi_row + y_idx, mi_col + x_idx, split_subsize); chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; } pl = partition_plane_context(xd, mi_row, mi_col, bsize); if (chosen_rdc.rate < INT_MAX) { chosen_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; chosen_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, chosen_rdc.rate, chosen_rdc.dist); } } // If last_part is better set the partitioning to that. if (last_part_rdc.rdcost < chosen_rdc.rdcost) { mi_8x8[0]->sb_type = bsize; if (bsize >= BLOCK_8X8) pc_tree->partitioning = partition; chosen_rdc = last_part_rdc; } // If none was better set the partitioning to that. if (none_rdc.rdcost < chosen_rdc.rdcost) { if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; chosen_rdc = none_rdc; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); // We must have chosen a partitioning and encoding or we'll fail later on. // No other opportunities for success. if (bsize == BLOCK_64X64) assert(chosen_rdc.rate < INT_MAX && chosen_rdc.dist < INT64_MAX); if (do_recon) { int output_enabled = (bsize == BLOCK_64X64); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } *rate = chosen_rdc.rate; *dist = chosen_rdc.dist; } static const BLOCK_SIZE min_partition_size[BLOCK_SIZES] = { BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16 }; static const BLOCK_SIZE max_partition_size[BLOCK_SIZES] = { BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64, BLOCK_64X64 }; // Look at all the mode_info entries for blocks that are part of this // partition and find the min and max values for sb_type. // At the moment this is designed to work on a 64x64 SB but could be // adjusted to use a size parameter. // // The min and max are assumed to have been initialized prior to calling this // function so repeat calls can accumulate a min and max of more than one sb64. static void get_sb_partition_size_range(MACROBLOCKD *xd, MODE_INFO **mi_8x8, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size, int bs_hist[BLOCK_SIZES]) { int sb_width_in_blocks = MI_BLOCK_SIZE; int sb_height_in_blocks = MI_BLOCK_SIZE; int i, j; int index = 0; // Check the sb_type for each block that belongs to this region. for (i = 0; i < sb_height_in_blocks; ++i) { for (j = 0; j < sb_width_in_blocks; ++j) { MODE_INFO *mi = mi_8x8[index + j]; BLOCK_SIZE sb_type = mi ? mi->sb_type : 0; bs_hist[sb_type]++; *min_block_size = VPXMIN(*min_block_size, sb_type); *max_block_size = VPXMAX(*max_block_size, sb_type); } index += xd->mi_stride; } } // Next square block size less or equal than current block size. static const BLOCK_SIZE next_square_size[BLOCK_SIZES] = { BLOCK_4X4, BLOCK_4X4, BLOCK_4X4, BLOCK_8X8, BLOCK_8X8, BLOCK_8X8, BLOCK_16X16, BLOCK_16X16, BLOCK_16X16, BLOCK_32X32, BLOCK_32X32, BLOCK_32X32, BLOCK_64X64 }; // Look at neighboring blocks and set a min and max partition size based on // what they chose. static void rd_auto_partition_range(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCKD *const xd, int mi_row, int mi_col, BLOCK_SIZE *min_block_size, BLOCK_SIZE *max_block_size) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi = xd->mi; const int left_in_image = !!xd->left_mi; const int above_in_image = !!xd->above_mi; const int row8x8_remaining = tile->mi_row_end - mi_row; const int col8x8_remaining = tile->mi_col_end - mi_col; int bh, bw; BLOCK_SIZE min_size = BLOCK_4X4; BLOCK_SIZE max_size = BLOCK_64X64; int bs_hist[BLOCK_SIZES] = { 0 }; // Trap case where we do not have a prediction. if (left_in_image || above_in_image || cm->frame_type != KEY_FRAME) { // Default "min to max" and "max to min" min_size = BLOCK_64X64; max_size = BLOCK_4X4; // NOTE: each call to get_sb_partition_size_range() uses the previous // passed in values for min and max as a starting point. // Find the min and max partition used in previous frame at this location if (cm->frame_type != KEY_FRAME) { MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[mi_row * xd->mi_stride + mi_col]; get_sb_partition_size_range(xd, prev_mi, &min_size, &max_size, bs_hist); } // Find the min and max partition sizes used in the left SB64 if (left_in_image) { MODE_INFO **left_sb64_mi = &mi[-MI_BLOCK_SIZE]; get_sb_partition_size_range(xd, left_sb64_mi, &min_size, &max_size, bs_hist); } // Find the min and max partition sizes used in the above SB64. if (above_in_image) { MODE_INFO **above_sb64_mi = &mi[-xd->mi_stride * MI_BLOCK_SIZE]; get_sb_partition_size_range(xd, above_sb64_mi, &min_size, &max_size, bs_hist); } // Adjust observed min and max for "relaxed" auto partition case. if (cpi->sf.auto_min_max_partition_size == RELAXED_NEIGHBORING_MIN_MAX) { min_size = min_partition_size[min_size]; max_size = max_partition_size[max_size]; } } // Check border cases where max and min from neighbors may not be legal. max_size = find_partition_size(max_size, row8x8_remaining, col8x8_remaining, &bh, &bw); // Test for blocks at the edge of the active image. // This may be the actual edge of the image or where there are formatting // bars. if (vp9_active_edge_sb(cpi, mi_row, mi_col)) { min_size = BLOCK_4X4; } else { min_size = VPXMIN(cpi->sf.rd_auto_partition_min_limit, VPXMIN(min_size, max_size)); } // When use_square_partition_only is true, make sure at least one square // partition is allowed by selecting the next smaller square size as // *min_block_size. if (cpi->sf.use_square_partition_only && next_square_size[max_size] < min_size) { min_size = next_square_size[max_size]; } *min_block_size = min_size; *max_block_size = max_size; } // TODO(jingning) refactor functions setting partition search range static void set_partition_range(VP9_COMMON *cm, MACROBLOCKD *xd, int mi_row, int mi_col, BLOCK_SIZE bsize, BLOCK_SIZE *min_bs, BLOCK_SIZE *max_bs) { int mi_width = num_8x8_blocks_wide_lookup[bsize]; int mi_height = num_8x8_blocks_high_lookup[bsize]; int idx, idy; MODE_INFO *mi; const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[idx_str]; BLOCK_SIZE bs, min_size, max_size; min_size = BLOCK_64X64; max_size = BLOCK_4X4; if (prev_mi) { for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { mi = prev_mi[idy * cm->mi_stride + idx]; bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } } } if (xd->left_mi) { for (idy = 0; idy < mi_height; ++idy) { mi = xd->mi[idy * cm->mi_stride - 1]; bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } } if (xd->above_mi) { for (idx = 0; idx < mi_width; ++idx) { mi = xd->mi[idx - cm->mi_stride]; bs = mi ? mi->sb_type : bsize; min_size = VPXMIN(min_size, bs); max_size = VPXMAX(max_size, bs); } } if (min_size == max_size) { min_size = min_partition_size[min_size]; max_size = max_partition_size[max_size]; } *min_bs = min_size; *max_bs = max_size; } #endif // !CONFIG_REALTIME_ONLY static INLINE void store_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { memcpy(ctx->pred_mv, x->pred_mv, sizeof(x->pred_mv)); } static INLINE void load_pred_mv(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx) { memcpy(x->pred_mv, ctx->pred_mv, sizeof(x->pred_mv)); } #if CONFIG_FP_MB_STATS const int num_16x16_blocks_wide_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 4, 4 }; const int num_16x16_blocks_high_lookup[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 2, 4, 2, 4 }; const int qindex_skip_threshold_lookup[BLOCK_SIZES] = { 0, 10, 10, 30, 40, 40, 60, 80, 80, 90, 100, 100, 120 }; const int qindex_split_threshold_lookup[BLOCK_SIZES] = { 0, 3, 3, 7, 15, 15, 30, 40, 40, 60, 80, 80, 120 }; const int complexity_16x16_blocks_threshold[BLOCK_SIZES] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 4, 4, 6 }; typedef enum { MV_ZERO = 0, MV_LEFT = 1, MV_UP = 2, MV_RIGHT = 3, MV_DOWN = 4, MV_INVALID } MOTION_DIRECTION; static INLINE MOTION_DIRECTION get_motion_direction_fp(uint8_t fp_byte) { if (fp_byte & FPMB_MOTION_ZERO_MASK) { return MV_ZERO; } else if (fp_byte & FPMB_MOTION_LEFT_MASK) { return MV_LEFT; } else if (fp_byte & FPMB_MOTION_RIGHT_MASK) { return MV_RIGHT; } else if (fp_byte & FPMB_MOTION_UP_MASK) { return MV_UP; } else { return MV_DOWN; } } static INLINE int get_motion_inconsistency(MOTION_DIRECTION this_mv, MOTION_DIRECTION that_mv) { if (this_mv == that_mv) { return 0; } else { return abs(this_mv - that_mv) == 2 ? 2 : 1; } } #endif // Calculate prediction based on the given input features and neural net config. // Assume there are no more than NN_MAX_NODES_PER_LAYER nodes in each hidden // layer. static void nn_predict(const float *features, const NN_CONFIG *nn_config, float *output) { int num_input_nodes = nn_config->num_inputs; int buf_index = 0; float buf[2][NN_MAX_NODES_PER_LAYER]; const float *input_nodes = features; // Propagate hidden layers. const int num_layers = nn_config->num_hidden_layers; int layer, node, i; assert(num_layers <= NN_MAX_HIDDEN_LAYERS); for (layer = 0; layer < num_layers; ++layer) { const float *weights = nn_config->weights[layer]; const float *bias = nn_config->bias[layer]; float *output_nodes = buf[buf_index]; const int num_output_nodes = nn_config->num_hidden_nodes[layer]; assert(num_output_nodes < NN_MAX_NODES_PER_LAYER); for (node = 0; node < num_output_nodes; ++node) { float val = 0.0f; for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; val += bias[node]; // ReLU as activation function. val = VPXMAX(val, 0.0f); output_nodes[node] = val; weights += num_input_nodes; } num_input_nodes = num_output_nodes; input_nodes = output_nodes; buf_index = 1 - buf_index; } // Final output layer. { const float *weights = nn_config->weights[num_layers]; for (node = 0; node < nn_config->num_outputs; ++node) { const float *bias = nn_config->bias[num_layers]; float val = 0.0f; for (i = 0; i < num_input_nodes; ++i) val += weights[i] * input_nodes[i]; output[node] = val + bias[node]; weights += num_input_nodes; } } } #if !CONFIG_REALTIME_ONLY #define FEATURES 7 // Machine-learning based partition search early termination. // Return 1 to skip split and rect partitions. static int ml_pruning_partition(VP9_COMMON *const cm, MACROBLOCKD *const xd, PICK_MODE_CONTEXT *ctx, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int mag_mv = abs(ctx->mic.mv[0].as_mv.col) + abs(ctx->mic.mv[0].as_mv.row); const int left_in_image = !!xd->left_mi; const int above_in_image = !!xd->above_mi; MODE_INFO **prev_mi = &cm->prev_mi_grid_visible[mi_col + cm->mi_stride * mi_row]; int above_par = 0; // above_partitioning int left_par = 0; // left_partitioning int last_par = 0; // last_partitioning int offset = 0; int i; BLOCK_SIZE context_size; const NN_CONFIG *nn_config = NULL; const float *mean, *sd, *linear_weights; float nn_score, linear_score; float features[FEATURES]; assert(b_width_log2_lookup[bsize] == b_height_log2_lookup[bsize]); vpx_clear_system_state(); switch (bsize) { case BLOCK_64X64: offset = 0; nn_config = &vp9_partition_nnconfig_64x64; break; case BLOCK_32X32: offset = 8; nn_config = &vp9_partition_nnconfig_32x32; break; case BLOCK_16X16: offset = 16; nn_config = &vp9_partition_nnconfig_16x16; break; default: assert(0 && "Unexpected block size."); return 0; } if (above_in_image) { context_size = xd->above_mi->sb_type; if (context_size < bsize) above_par = 2; else if (context_size == bsize) above_par = 1; } if (left_in_image) { context_size = xd->left_mi->sb_type; if (context_size < bsize) left_par = 2; else if (context_size == bsize) left_par = 1; } if (prev_mi) { context_size = prev_mi[0]->sb_type; if (context_size < bsize) last_par = 2; else if (context_size == bsize) last_par = 1; } mean = &vp9_partition_feature_mean[offset]; sd = &vp9_partition_feature_std[offset]; features[0] = ((float)ctx->rate - mean[0]) / sd[0]; features[1] = ((float)ctx->dist - mean[1]) / sd[1]; features[2] = ((float)mag_mv / 2 - mean[2]) * sd[2]; features[3] = ((float)(left_par + above_par) / 2 - mean[3]) * sd[3]; features[4] = ((float)ctx->sum_y_eobs - mean[4]) / sd[4]; features[5] = ((float)cm->base_qindex - mean[5]) * sd[5]; features[6] = ((float)last_par - mean[6]) * sd[6]; // Predict using linear model. linear_weights = &vp9_partition_linear_weights[offset]; linear_score = linear_weights[FEATURES]; for (i = 0; i < FEATURES; ++i) linear_score += linear_weights[i] * features[i]; if (linear_score > 0.1f) return 0; // Predict using neural net model. nn_predict(features, nn_config, &nn_score); if (linear_score < -0.0f && nn_score < 0.1f) return 1; if (nn_score < -0.0f && linear_score < 0.1f) return 1; return 0; } #undef FEATURES #define FEATURES 4 // ML-based partition search breakout. static int ml_predict_breakout(VP9_COMP *const cpi, BLOCK_SIZE bsize, const MACROBLOCK *const x, const RD_COST *const rd_cost) { DECLARE_ALIGNED(16, static const uint8_t, vp9_64_zeros[64]) = { 0 }; const VP9_COMMON *const cm = &cpi->common; float features[FEATURES]; const float *linear_weights = NULL; // Linear model weights. float linear_score = 0.0f; const int qindex = cm->base_qindex; const int q_ctx = qindex >= 200 ? 0 : (qindex >= 150 ? 1 : 2); const int is_720p_or_larger = VPXMIN(cm->width, cm->height) >= 720; const int resolution_ctx = is_720p_or_larger ? 1 : 0; switch (bsize) { case BLOCK_64X64: linear_weights = vp9_partition_breakout_weights_64[resolution_ctx][q_ctx]; break; case BLOCK_32X32: linear_weights = vp9_partition_breakout_weights_32[resolution_ctx][q_ctx]; break; case BLOCK_16X16: linear_weights = vp9_partition_breakout_weights_16[resolution_ctx][q_ctx]; break; case BLOCK_8X8: linear_weights = vp9_partition_breakout_weights_8[resolution_ctx][q_ctx]; break; default: assert(0 && "Unexpected block size."); return 0; } if (!linear_weights) return 0; { // Generate feature values. #if CONFIG_VP9_HIGHBITDEPTH const int ac_q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); #else const int ac_q = vp9_ac_quant(qindex, 0, cm->bit_depth); #endif // CONFIG_VP9_HIGHBITDEPTH const int num_pels_log2 = num_pels_log2_lookup[bsize]; int feature_index = 0; unsigned int var, sse; float rate_f, dist_f; #if CONFIG_VP9_HIGHBITDEPTH if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { var = vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, x->e_mbd.bd); } else { var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); } #else var = cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, vp9_64_zeros, 0, &sse); #endif var = var >> num_pels_log2; vpx_clear_system_state(); rate_f = (float)VPXMIN(rd_cost->rate, INT_MAX); dist_f = (float)(VPXMIN(rd_cost->dist, INT_MAX) >> num_pels_log2); rate_f = ((float)x->rdmult / 128.0f / 512.0f / (float)(1 << num_pels_log2)) * rate_f; features[feature_index++] = rate_f; features[feature_index++] = dist_f; features[feature_index++] = (float)var; features[feature_index++] = (float)ac_q; assert(feature_index == FEATURES); } { // Calculate the output score. int i; linear_score = linear_weights[FEATURES]; for (i = 0; i < FEATURES; ++i) linear_score += linear_weights[i] * features[i]; } return linear_score >= cpi->sf.rd_ml_partition.search_breakout_thresh[q_ctx]; } #undef FEATURES #define FEATURES 8 #define LABELS 4 static void ml_prune_rect_partition(VP9_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, const PC_TREE *const pc_tree, int *allow_horz, int *allow_vert, int64_t ref_rd) { const NN_CONFIG *nn_config = NULL; float score[LABELS] = { 0.0f, }; int thresh = -1; int i; (void)x; if (ref_rd <= 0 || ref_rd > 1000000000) return; switch (bsize) { case BLOCK_8X8: break; case BLOCK_16X16: nn_config = &vp9_rect_part_nnconfig_16; thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[1]; break; case BLOCK_32X32: nn_config = &vp9_rect_part_nnconfig_32; thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[2]; break; case BLOCK_64X64: nn_config = &vp9_rect_part_nnconfig_64; thresh = cpi->sf.rd_ml_partition.prune_rect_thresh[3]; break; default: assert(0 && "Unexpected block size."); return; } if (!nn_config || thresh < 0) return; // Feature extraction and model score calculation. { const VP9_COMMON *const cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (x->e_mbd.bd - 8); #else const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); #endif // CONFIG_VP9_HIGHBITDEPTH const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; int feature_index = 0; float features[FEATURES]; features[feature_index++] = logf((float)dc_q + 1.0f); features[feature_index++] = (float)(pc_tree->partitioning == PARTITION_NONE); features[feature_index++] = logf((float)ref_rd / bs / bs + 1.0f); { const float norm_factor = 1.0f / ((float)ref_rd + 1.0f); const int64_t none_rdcost = pc_tree->none.rdcost; float rd_ratio = 2.0f; if (none_rdcost > 0 && none_rdcost < 1000000000) rd_ratio = (float)none_rdcost * norm_factor; features[feature_index++] = VPXMIN(rd_ratio, 2.0f); for (i = 0; i < 4; ++i) { const int64_t this_rd = pc_tree->split[i]->none.rdcost; const int rd_valid = this_rd > 0 && this_rd < 1000000000; // Ratio between sub-block RD and whole block RD. features[feature_index++] = rd_valid ? (float)this_rd * norm_factor : 1.0f; } } assert(feature_index == FEATURES); nn_predict(features, nn_config, score); } // Make decisions based on the model score. { int max_score = -1000; int horz = 0, vert = 0; int int_score[LABELS]; for (i = 0; i < LABELS; ++i) { int_score[i] = (int)(100 * score[i]); max_score = VPXMAX(int_score[i], max_score); } thresh = max_score - thresh; for (i = 0; i < LABELS; ++i) { if (int_score[i] >= thresh) { if ((i >> 0) & 1) horz = 1; if ((i >> 1) & 1) vert = 1; } } *allow_horz = *allow_horz && horz; *allow_vert = *allow_vert && vert; } } #undef FEATURES #undef LABELS // Perform fast and coarse motion search for the given block. This is a // pre-processing step for the ML based partition search speedup. static void simple_motion_search(const VP9_COMP *const cpi, MACROBLOCK *const x, BLOCK_SIZE bsize, int mi_row, int mi_col, MV ref_mv, MV_REFERENCE_FRAME ref, uint8_t *const pred_buf) { const VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; const YV12_BUFFER_CONFIG *const yv12 = get_ref_frame_buffer(cpi, ref); const int step_param = 1; const MvLimits tmp_mv_limits = x->mv_limits; const SEARCH_METHODS search_method = NSTEP; const int sadpb = x->sadperbit16; MV ref_mv_full = { ref_mv.row >> 3, ref_mv.col >> 3 }; MV best_mv = { 0, 0 }; int cost_list[5]; assert(yv12 != NULL); if (!yv12) return; vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[ref - 1].sf); mi->ref_frame[0] = ref; mi->ref_frame[1] = NONE; mi->sb_type = bsize; vp9_set_mv_search_range(&x->mv_limits, &ref_mv); vp9_full_pixel_search(cpi, x, bsize, &ref_mv_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &ref_mv, &best_mv, 0, 0); best_mv.row *= 8; best_mv.col *= 8; x->mv_limits = tmp_mv_limits; mi->mv[0].as_mv = best_mv; set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); xd->plane[0].dst.buf = pred_buf; xd->plane[0].dst.stride = 64; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); } // Use a neural net model to prune partition-none and partition-split search. // Features used: QP; spatial block size contexts; variance of prediction // residue after simple_motion_search. #define FEATURES 12 static void ml_predict_var_rd_paritioning(const VP9_COMP *const cpi, MACROBLOCK *const x, PC_TREE *const pc_tree, BLOCK_SIZE bsize, int mi_row, int mi_col, int *none, int *split) { const VP9_COMMON *const cm = &cpi->common; const NN_CONFIG *nn_config = NULL; #if CONFIG_VP9_HIGHBITDEPTH MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64 * 2]); uint8_t *const pred_buf = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? (CONVERT_TO_BYTEPTR(pred_buffer)) : pred_buffer; #else DECLARE_ALIGNED(16, uint8_t, pred_buffer[64 * 64]); uint8_t *const pred_buf = pred_buffer; #endif // CONFIG_VP9_HIGHBITDEPTH const int speed = cpi->oxcf.speed; float thresh = 0.0f; switch (bsize) { case BLOCK_64X64: nn_config = &vp9_part_split_nnconfig_64; thresh = speed > 0 ? 2.8f : 3.0f; break; case BLOCK_32X32: nn_config = &vp9_part_split_nnconfig_32; thresh = speed > 0 ? 3.5f : 3.0f; break; case BLOCK_16X16: nn_config = &vp9_part_split_nnconfig_16; thresh = speed > 0 ? 3.8f : 4.0f; break; case BLOCK_8X8: nn_config = &vp9_part_split_nnconfig_8; if (cm->width >= 720 && cm->height >= 720) thresh = speed > 0 ? 2.5f : 2.0f; else thresh = speed > 0 ? 3.8f : 2.0f; break; default: assert(0 && "Unexpected block size."); return; } if (!nn_config) return; // Do a simple single motion search to find a prediction for current block. // The variance of the residue will be used as input features. { MV ref_mv; const MV_REFERENCE_FRAME ref = cpi->rc.is_src_frame_alt_ref ? ALTREF_FRAME : LAST_FRAME; // If bsize is 64x64, use zero MV as reference; otherwise, use MV result // of previous(larger) block as reference. if (bsize == BLOCK_64X64) ref_mv.row = ref_mv.col = 0; else ref_mv = pc_tree->mv; vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); simple_motion_search(cpi, x, bsize, mi_row, mi_col, ref_mv, ref, pred_buf); pc_tree->mv = x->e_mbd.mi[0]->mv[0].as_mv; } vpx_clear_system_state(); { float features[FEATURES] = { 0.0f }; #if CONFIG_VP9_HIGHBITDEPTH const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) >> (xd->bd - 8); #else const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); #endif // CONFIG_VP9_HIGHBITDEPTH int feature_idx = 0; float score; // Generate model input features. features[feature_idx++] = logf((float)dc_q + 1.0f); // Get the variance of the residue as input features. { const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); const uint8_t *pred = pred_buf; const uint8_t *src = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; const int pred_stride = 64; unsigned int sse; // Variance of whole block. const unsigned int var = cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); const MACROBLOCKD *const xd = &x->e_mbd; const int has_above = !!xd->above_mi; const int has_left = !!xd->left_mi; const BLOCK_SIZE above_bsize = has_above ? xd->above_mi->sb_type : bsize; const BLOCK_SIZE left_bsize = has_left ? xd->left_mi->sb_type : bsize; int i; features[feature_idx++] = (float)has_above; features[feature_idx++] = (float)b_width_log2_lookup[above_bsize]; features[feature_idx++] = (float)b_height_log2_lookup[above_bsize]; features[feature_idx++] = (float)has_left; features[feature_idx++] = (float)b_width_log2_lookup[left_bsize]; features[feature_idx++] = (float)b_height_log2_lookup[left_bsize]; features[feature_idx++] = logf((float)var + 1.0f); for (i = 0; i < 4; ++i) { const int x_idx = (i & 1) * bs / 2; const int y_idx = (i >> 1) * bs / 2; const int src_offset = y_idx * src_stride + x_idx; const int pred_offset = y_idx * pred_stride + x_idx; // Variance of quarter block. const unsigned int sub_var = cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, pred + pred_offset, pred_stride, &sse); const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; features[feature_idx++] = var_ratio; } } assert(feature_idx == FEATURES); // Feed the features into the model to get the confidence score. nn_predict(features, nn_config, &score); // Higher score means that the model has higher confidence that the split // partition is better than the non-split partition. So if the score is // high enough, we skip the none-split partition search; if the score is // low enough, we skip the split partition search. if (score > thresh) *none = 0; if (score < -thresh) *split = 0; } } #undef FEATURES #endif // !CONFIG_REALTIME_ONLY static double log_wiener_var(int64_t wiener_variance) { return log(1.0 + wiener_variance) / log(2.0); } static void build_kmeans_segmentation(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; BLOCK_SIZE bsize = BLOCK_64X64; KMEANS_DATA *kmeans_data; vp9_disable_segmentation(&cm->seg); if (cm->show_frame) { int mi_row, mi_col; cpi->kmeans_data_size = 0; cpi->kmeans_ctr_num = 8; for (mi_row = 0; mi_row < cm->mi_rows; mi_row += MI_BLOCK_SIZE) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += MI_BLOCK_SIZE) { int mb_row_start = mi_row >> 1; int mb_col_start = mi_col >> 1; int mb_row_end = VPXMIN( (mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); int mb_col_end = VPXMIN( (mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); int row, col; int64_t wiener_variance = 0; for (row = mb_row_start; row < mb_row_end; ++row) for (col = mb_col_start; col < mb_col_end; ++col) wiener_variance += cpi->mb_wiener_variance[row * cm->mb_cols + col]; wiener_variance /= (mb_row_end - mb_row_start) * (mb_col_end - mb_col_start); #if CONFIG_MULTITHREAD pthread_mutex_lock(&cpi->kmeans_mutex); #endif // CONFIG_MULTITHREAD kmeans_data = &cpi->kmeans_data_arr[cpi->kmeans_data_size++]; kmeans_data->value = log_wiener_var(wiener_variance); kmeans_data->pos = mi_row * cpi->kmeans_data_stride + mi_col; #if CONFIG_MULTITHREAD pthread_mutex_unlock(&cpi->kmeans_mutex); #endif // CONFIG_MULTITHREAD } } vp9_kmeans(cpi->kmeans_ctr_ls, cpi->kmeans_boundary_ls, cpi->kmeans_count_ls, cpi->kmeans_ctr_num, cpi->kmeans_data_arr, cpi->kmeans_data_size); vp9_perceptual_aq_mode_setup(cpi, &cm->seg); } } #if !CONFIG_REALTIME_ONLY static int wiener_var_segment(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *cm = &cpi->common; int mb_row_start = mi_row >> 1; int mb_col_start = mi_col >> 1; int mb_row_end = VPXMIN((mi_row + num_8x8_blocks_high_lookup[bsize]) >> 1, cm->mb_rows); int mb_col_end = VPXMIN((mi_col + num_8x8_blocks_wide_lookup[bsize]) >> 1, cm->mb_cols); int row, col, idx; int64_t wiener_variance = 0; int segment_id; int8_t seg_hist[MAX_SEGMENTS] = { 0 }; int8_t max_count = 0, max_index = -1; vpx_clear_system_state(); assert(cpi->norm_wiener_variance > 0); for (row = mb_row_start; row < mb_row_end; ++row) { for (col = mb_col_start; col < mb_col_end; ++col) { wiener_variance = cpi->mb_wiener_variance[row * cm->mb_cols + col]; segment_id = vp9_get_group_idx(log_wiener_var(wiener_variance), cpi->kmeans_boundary_ls, cpi->kmeans_ctr_num); ++seg_hist[segment_id]; } } for (idx = 0; idx < cpi->kmeans_ctr_num; ++idx) { if (seg_hist[idx] > max_count) { max_count = seg_hist[idx]; max_index = idx; } } assert(max_index >= 0); segment_id = max_index; return segment_id; } static int get_rdmult_delta(VP9_COMP *cpi, BLOCK_SIZE bsize, int mi_row, int mi_col, int orig_rdmult) { const int gf_group_index = cpi->twopass.gf_group.index; TplDepFrame *tpl_frame = &cpi->tpl_stats[gf_group_index]; TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; int tpl_stride = tpl_frame->stride; int64_t intra_cost = 0; int64_t mc_dep_cost = 0; int mi_wide = num_8x8_blocks_wide_lookup[bsize]; int mi_high = num_8x8_blocks_high_lookup[bsize]; int row, col; int dr = 0; int count = 0; double r0, rk, beta; if (tpl_frame->is_valid == 0) return orig_rdmult; if (cpi->twopass.gf_group.layer_depth[gf_group_index] > 1) return orig_rdmult; if (gf_group_index >= MAX_ARF_GOP_SIZE) return orig_rdmult; for (row = mi_row; row < mi_row + mi_high; ++row) { for (col = mi_col; col < mi_col + mi_wide; ++col) { TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; if (row >= cpi->common.mi_rows || col >= cpi->common.mi_cols) continue; intra_cost += this_stats->intra_cost; mc_dep_cost += this_stats->mc_dep_cost; ++count; } } vpx_clear_system_state(); r0 = cpi->rd.r0; rk = (double)intra_cost / mc_dep_cost; beta = r0 / rk; dr = vp9_get_adaptive_rdmult(cpi, beta); dr = VPXMIN(dr, orig_rdmult * 3 / 2); dr = VPXMAX(dr, orig_rdmult * 1 / 2); dr = VPXMAX(1, dr); return dr; } #endif // !CONFIG_REALTIME_ONLY #if !CONFIG_REALTIME_ONLY // TODO(jingning,jimbankoski,rbultje): properly skip partition types that are // unlikely to be selected depending on previous rate-distortion optimization // results, for encoding speed-up. static int rd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, RD_COST best_rdc, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mi_step = num_8x8_blocks_wide_lookup[bsize] / 2; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; PARTITION_CONTEXT sl[8], sa[8]; TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *const ctx = &pc_tree->none; int i; const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); BLOCK_SIZE subsize; RD_COST this_rdc, sum_rdc; int do_split = bsize >= BLOCK_8X8; int do_rect = 1; INTERP_FILTER pred_interp_filter; // Override skipping rectangular partition operations for edge blocks const int force_horz_split = (mi_row + mi_step >= cm->mi_rows); const int force_vert_split = (mi_col + mi_step >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; BLOCK_SIZE min_size = x->min_partition_size; BLOCK_SIZE max_size = x->max_partition_size; #if CONFIG_FP_MB_STATS unsigned int src_diff_var = UINT_MAX; int none_complexity = 0; #endif int partition_none_allowed = !force_horz_split && !force_vert_split; int partition_horz_allowed = !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; int64_t dist_breakout_thr = cpi->sf.partition_search_breakout_thr.dist; int rate_breakout_thr = cpi->sf.partition_search_breakout_thr.rate; int must_split = 0; int should_encode_sb = 0; // Ref frames picked in the [i_th] quarter subblock during square partition // RD search. It may be used to prune ref frame selection of rect partitions. uint8_t ref_frames_used[4] = { 0, 0, 0, 0 }; int partition_mul = x->cb_rdmult; (void)*tp_orig; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); rate_breakout_thr *= num_pels_log2_lookup[bsize]; vp9_rd_cost_init(&this_rdc); vp9_rd_cost_init(&sum_rdc); set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); if (oxcf->tuning == VP8_TUNE_SSIM) { set_ssim_rdmult(cpi, x, bsize, mi_row, mi_col, &partition_mul); } vp9_rd_cost_update(partition_mul, x->rddiv, &best_rdc); if (bsize == BLOCK_16X16 && cpi->oxcf.aq_mode != NO_AQ && cpi->oxcf.aq_mode != LOOKAHEAD_AQ) x->mb_energy = vp9_block_energy(cpi, x, bsize); if (cpi->sf.cb_partition_search && bsize == BLOCK_16X16) { int cb_partition_search_ctrl = ((pc_tree->index == 0 || pc_tree->index == 3) + get_chessboard_index(cm->current_video_frame)) & 0x1; if (cb_partition_search_ctrl && bsize > min_size && bsize < max_size) set_partition_range(cm, xd, mi_row, mi_col, bsize, &min_size, &max_size); } // Get sub block energy range if (bsize >= BLOCK_16X16) { int min_energy, max_energy; vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, &max_energy); must_split = (min_energy < -3) && (max_energy - min_energy > 2); } // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (cpi->sf.auto_min_max_partition_size) { partition_none_allowed &= (bsize <= max_size); partition_horz_allowed &= ((bsize <= max_size && bsize > min_size) || force_horz_split); partition_vert_allowed &= ((bsize <= max_size && bsize > min_size) || force_vert_split); do_split &= bsize > min_size; } if (cpi->sf.use_square_partition_only && (bsize > cpi->sf.use_square_only_thresh_high || bsize < cpi->sf.use_square_only_thresh_low)) { if (cpi->use_svc) { if (!vp9_active_h_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) partition_horz_allowed &= force_horz_split; if (!vp9_active_v_edge(cpi, mi_row, mi_step) || x->e_mbd.lossless) partition_vert_allowed &= force_vert_split; } else { partition_horz_allowed &= force_horz_split; partition_vert_allowed &= force_vert_split; } } save_context(x, mi_row, mi_col, a, l, sa, sl, bsize); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); src_diff_var = get_sby_perpixel_diff_variance(cpi, &x->plane[0].src, mi_row, mi_col, bsize); } #endif #if CONFIG_FP_MB_STATS // Decide whether we shall split directly and skip searching NONE by using // the first pass block statistics if (cpi->use_fp_mb_stats && bsize >= BLOCK_32X32 && do_split && partition_none_allowed && src_diff_var > 4 && cm->base_qindex < qindex_split_threshold_lookup[bsize]) { int mb_row = mi_row >> 1; int mb_col = mi_col >> 1; int mb_row_end = VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); int mb_col_end = VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); int r, c; // compute a complexity measure, basically measure inconsistency of motion // vectors obtained from the first pass in the current block for (r = mb_row; r < mb_row_end; r++) { for (c = mb_col; c < mb_col_end; c++) { const int mb_index = r * cm->mb_cols + c; MOTION_DIRECTION this_mv; MOTION_DIRECTION right_mv; MOTION_DIRECTION bottom_mv; this_mv = get_motion_direction_fp(cpi->twopass.this_frame_mb_stats[mb_index]); // to its right if (c != mb_col_end - 1) { right_mv = get_motion_direction_fp( cpi->twopass.this_frame_mb_stats[mb_index + 1]); none_complexity += get_motion_inconsistency(this_mv, right_mv); } // to its bottom if (r != mb_row_end - 1) { bottom_mv = get_motion_direction_fp( cpi->twopass.this_frame_mb_stats[mb_index + cm->mb_cols]); none_complexity += get_motion_inconsistency(this_mv, bottom_mv); } // do not count its left and top neighbors to avoid double counting } } if (none_complexity > complexity_16x16_blocks_threshold[bsize]) { partition_none_allowed = 0; } } #endif pc_tree->partitioning = PARTITION_NONE; if (cpi->sf.rd_ml_partition.var_pruning && !frame_is_intra_only(cm)) { const int do_rd_ml_partition_var_pruning = partition_none_allowed && do_split && mi_row + num_8x8_blocks_high_lookup[bsize] <= cm->mi_rows && mi_col + num_8x8_blocks_wide_lookup[bsize] <= cm->mi_cols; if (do_rd_ml_partition_var_pruning) { ml_predict_var_rd_paritioning(cpi, x, pc_tree, bsize, mi_row, mi_col, &partition_none_allowed, &do_split); } else { vp9_zero(pc_tree->mv); } if (bsize > BLOCK_8X8) { // Store MV result as reference for subblocks. for (i = 0; i < 4; ++i) pc_tree->split[i]->mv = pc_tree->mv; } } // PARTITION_NONE if (partition_none_allowed) { rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx, best_rdc.rate, best_rdc.dist); ctx->rdcost = this_rdc.rdcost; if (this_rdc.rate != INT_MAX) { if (cpi->sf.prune_ref_frame_for_rect_partitions) { const int ref1 = ctx->mic.ref_frame[0]; const int ref2 = ctx->mic.ref_frame[1]; for (i = 0; i < 4; ++i) { ref_frames_used[i] |= (1 << ref1); if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); } } if (bsize >= BLOCK_8X8) { this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; vp9_rd_cost_update(partition_mul, x->rddiv, &this_rdc); } if (this_rdc.rdcost < best_rdc.rdcost) { MODE_INFO *mi = xd->mi[0]; best_rdc = this_rdc; should_encode_sb = 1; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; if (cpi->sf.rd_ml_partition.search_early_termination) { // Currently, the machine-learning based partition search early // termination is only used while bsize is 16x16, 32x32 or 64x64, // VPXMIN(cm->width, cm->height) >= 480, and speed = 0. if (!x->e_mbd.lossless && !segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP) && ctx->mic.mode >= INTRA_MODES && bsize >= BLOCK_16X16) { if (ml_pruning_partition(cm, xd, ctx, mi_row, mi_col, bsize)) { do_split = 0; do_rect = 0; } } } if ((do_split || do_rect) && !x->e_mbd.lossless && ctx->skippable) { const int use_ml_based_breakout = cpi->sf.rd_ml_partition.search_breakout && cm->base_qindex >= 100; if (use_ml_based_breakout) { if (ml_predict_breakout(cpi, bsize, x, &this_rdc)) { do_split = 0; do_rect = 0; } } else { if (!cpi->sf.rd_ml_partition.search_early_termination) { if ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && best_rdc.rate < rate_breakout_thr)) { do_split = 0; do_rect = 0; } } } } #if CONFIG_FP_MB_STATS // Check if every 16x16 first pass block statistics has zero // motion and the corresponding first pass residue is small enough. // If that is the case, check the difference variance between the // current frame and the last frame. If the variance is small enough, // stop further splitting in RD optimization if (cpi->use_fp_mb_stats && do_split != 0 && cm->base_qindex > qindex_skip_threshold_lookup[bsize]) { int mb_row = mi_row >> 1; int mb_col = mi_col >> 1; int mb_row_end = VPXMIN(mb_row + num_16x16_blocks_high_lookup[bsize], cm->mb_rows); int mb_col_end = VPXMIN(mb_col + num_16x16_blocks_wide_lookup[bsize], cm->mb_cols); int r, c; int skip = 1; for (r = mb_row; r < mb_row_end; r++) { for (c = mb_col; c < mb_col_end; c++) { const int mb_index = r * cm->mb_cols + c; if (!(cpi->twopass.this_frame_mb_stats[mb_index] & FPMB_MOTION_ZERO_MASK) || !(cpi->twopass.this_frame_mb_stats[mb_index] & FPMB_ERROR_SMALL_MASK)) { skip = 0; break; } } if (skip == 0) { break; } } if (skip) { if (src_diff_var == UINT_MAX) { set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); src_diff_var = get_sby_perpixel_diff_variance( cpi, &x->plane[0].src, mi_row, mi_col, bsize); } if (src_diff_var < 8) { do_split = 0; do_rect = 0; } } } #endif } } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } else { vp9_zero(ctx->pred_mv); ctx->mic.interp_filter = EIGHTTAP; } // store estimated motion vector store_pred_mv(x, ctx); // If the interp_filter is marked as SWITCHABLE_FILTERS, it was for an // intra block and used for context purposes. if (ctx->mic.interp_filter == SWITCHABLE_FILTERS) { pred_interp_filter = EIGHTTAP; } else { pred_interp_filter = ctx->mic.interp_filter; } // PARTITION_SPLIT // TODO(jingning): use the motion vectors given by the above search as // the starting point of motion search in the following partition type check. pc_tree->split[0]->none.rdcost = 0; pc_tree->split[1]->none.rdcost = 0; pc_tree->split[2]->none.rdcost = 0; pc_tree->split[3]->none.rdcost = 0; if (do_split || must_split) { subsize = get_subsize(bsize, PARTITION_SPLIT); load_pred_mv(x, ctx); if (bsize == BLOCK_8X8) { i = 4; if (cpi->sf.adaptive_pred_interp_filter && partition_none_allowed) pc_tree->leaf_split[0]->pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, pc_tree->leaf_split[0], best_rdc.rate, best_rdc.dist); if (sum_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; } else { if (cpi->sf.prune_ref_frame_for_rect_partitions) { const int ref1 = pc_tree->leaf_split[0]->mic.ref_frame[0]; const int ref2 = pc_tree->leaf_split[0]->mic.ref_frame[1]; for (i = 0; i < 4; ++i) { ref_frames_used[i] |= (1 << ref1); if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); } } } } else { for (i = 0; (i < 4) && ((sum_rdc.rdcost < best_rdc.rdcost) || must_split); ++i) { const int x_idx = (i & 1) * mi_step; const int y_idx = (i >> 1) * mi_step; int found_best_rd = 0; RD_COST best_rdc_split; vp9_rd_cost_reset(&best_rdc_split); if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX) { // A must split test here increases the number of sub // partitions but hurts metrics results quite a bit, // so this extra test is commented out pending // further tests on whether it adds much in terms of // visual quality. // (must_split) ? best_rdc.rate // : best_rdc.rate - sum_rdc.rate, // (must_split) ? best_rdc.dist // : best_rdc.dist - sum_rdc.dist, best_rdc_split.rate = best_rdc.rate - sum_rdc.rate; best_rdc_split.dist = best_rdc.dist - sum_rdc.dist; } if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; pc_tree->split[i]->index = i; if (cpi->sf.prune_ref_frame_for_rect_partitions) pc_tree->split[i]->none.rate = INT_MAX; found_best_rd = rd_pick_partition( cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, best_rdc_split, pc_tree->split[i]); if (found_best_rd == 0) { sum_rdc.rdcost = INT64_MAX; break; } else { if (cpi->sf.prune_ref_frame_for_rect_partitions && pc_tree->split[i]->none.rate != INT_MAX) { const int ref1 = pc_tree->split[i]->none.mic.ref_frame[0]; const int ref2 = pc_tree->split[i]->none.mic.ref_frame[1]; ref_frames_used[i] |= (1 << ref1); if (ref2 > 0) ref_frames_used[i] |= (1 << ref2); } sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } } if (((sum_rdc.rdcost < best_rdc.rdcost) || must_split) && i == 4) { sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); if ((sum_rdc.rdcost < best_rdc.rdcost) || (must_split && (sum_rdc.dist < best_rdc.dist))) { best_rdc = sum_rdc; should_encode_sb = 1; pc_tree->partitioning = PARTITION_SPLIT; // Rate and distortion based partition search termination clause. if (!cpi->sf.rd_ml_partition.search_early_termination && !x->e_mbd.lossless && ((best_rdc.dist < (dist_breakout_thr >> 2)) || (best_rdc.dist < dist_breakout_thr && best_rdc.rate < rate_breakout_thr))) { do_rect = 0; } } } else { // skip rectangular partition test when larger block size // gives better rd cost if (cpi->sf.less_rectangular_check && (bsize > cpi->sf.use_square_only_thresh_high || best_rdc.dist < dist_breakout_thr)) do_rect &= !partition_none_allowed; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } pc_tree->horizontal[0].skip_ref_frame_mask = 0; pc_tree->horizontal[1].skip_ref_frame_mask = 0; pc_tree->vertical[0].skip_ref_frame_mask = 0; pc_tree->vertical[1].skip_ref_frame_mask = 0; if (cpi->sf.prune_ref_frame_for_rect_partitions) { uint8_t used_frames; used_frames = ref_frames_used[0] | ref_frames_used[1]; if (used_frames) { pc_tree->horizontal[0].skip_ref_frame_mask = ~used_frames & 0xff; } used_frames = ref_frames_used[2] | ref_frames_used[3]; if (used_frames) { pc_tree->horizontal[1].skip_ref_frame_mask = ~used_frames & 0xff; } used_frames = ref_frames_used[0] | ref_frames_used[2]; if (used_frames) { pc_tree->vertical[0].skip_ref_frame_mask = ~used_frames & 0xff; } used_frames = ref_frames_used[1] | ref_frames_used[3]; if (used_frames) { pc_tree->vertical[1].skip_ref_frame_mask = ~used_frames & 0xff; } } { const int do_ml_rect_partition_pruning = !frame_is_intra_only(cm) && !force_horz_split && !force_vert_split && (partition_horz_allowed || partition_vert_allowed) && bsize > BLOCK_8X8; if (do_ml_rect_partition_pruning) { ml_prune_rect_partition(cpi, x, bsize, pc_tree, &partition_horz_allowed, &partition_vert_allowed, best_rdc.rdcost); } } // PARTITION_HORZ if (partition_horz_allowed && (do_rect || vp9_active_h_edge(cpi, mi_row, mi_step))) { const int part_mode_rate = cpi->partition_cost[pl][PARTITION_HORZ]; subsize = get_subsize(bsize, PARTITION_HORZ); load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[0].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0], best_rdc.rate - part_mode_rate, best_rdc.dist); if (sum_rdc.rdcost < INT64_MAX) { sum_rdc.rate += part_mode_rate; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + mi_step < cm->mi_rows && bsize > BLOCK_8X8) { PICK_MODE_CONTEXT *ctx = &pc_tree->horizontal[0]; update_state(cpi, td, ctx, mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->horizontal[1].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row + mi_step, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1], best_rdc.rate - sum_rdc.rate, best_rdc.dist - sum_rdc.dist); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; should_encode_sb = 1; pc_tree->partitioning = PARTITION_HORZ; if (cpi->sf.less_rectangular_check && bsize > cpi->sf.use_square_only_thresh_high) do_rect = 0; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } // PARTITION_VERT if (partition_vert_allowed && (do_rect || vp9_active_v_edge(cpi, mi_col, mi_step))) { const int part_mode_rate = cpi->partition_cost[pl][PARTITION_VERT]; subsize = get_subsize(bsize, PARTITION_VERT); load_pred_mv(x, ctx); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[0].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0], best_rdc.rate - part_mode_rate, best_rdc.dist); if (sum_rdc.rdcost < INT64_MAX) { sum_rdc.rate += part_mode_rate; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + mi_step < cm->mi_cols && bsize > BLOCK_8X8) { update_state(cpi, td, &pc_tree->vertical[0], mi_row, mi_col, subsize, 0); encode_superblock(cpi, td, tp, 0, mi_row, mi_col, subsize, &pc_tree->vertical[0]); if (cpi->sf.adaptive_pred_interp_filter && bsize == BLOCK_8X8 && partition_none_allowed) pc_tree->vertical[1].pred_interp_filter = pred_interp_filter; rd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + mi_step, &this_rdc, subsize, &pc_tree->vertical[1], best_rdc.rate - sum_rdc.rate, best_rdc.dist - sum_rdc.dist); if (this_rdc.rate == INT_MAX) { sum_rdc.rdcost = INT64_MAX; } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; vp9_rd_cost_update(partition_mul, x->rddiv, &sum_rdc); } } if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; should_encode_sb = 1; pc_tree->partitioning = PARTITION_VERT; } restore_context(x, mi_row, mi_col, a, l, sa, sl, bsize); } *rd_cost = best_rdc; if (should_encode_sb && pc_tree->index != 3) { int output_enabled = (bsize == BLOCK_64X64); encode_sb(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } if (bsize == BLOCK_64X64) { assert(tp_orig < *tp); assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); } else { assert(tp_orig == *tp); } return should_encode_sb; } static void encode_rd_sb_row(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; SPEED_FEATURES *const sf = &cpi->sf; const int mi_col_start = tile_info->mi_col_start; const int mi_col_end = tile_info->mi_col_end; int mi_col; const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; const int num_sb_cols = get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); int sb_col_in_tile; // Initialize the left context for the new SB row memset(&xd->left_context, 0, sizeof(xd->left_context)); memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE, sb_col_in_tile++) { const struct segmentation *const seg = &cm->seg; int dummy_rate; int64_t dummy_dist; RD_COST dummy_rdc; int i; int seg_skip = 0; int orig_rdmult = cpi->rd.RDMULT; const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi = cm->mi_grid_visible + idx_str; vp9_rd_cost_reset(&dummy_rdc); (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile); if (sf->adaptive_pred_interp_filter) { for (i = 0; i < 64; ++i) td->leaf_tree[i].pred_interp_filter = SWITCHABLE; for (i = 0; i < 64; ++i) { td->pc_tree[i].vertical[0].pred_interp_filter = SWITCHABLE; td->pc_tree[i].vertical[1].pred_interp_filter = SWITCHABLE; td->pc_tree[i].horizontal[0].pred_interp_filter = SWITCHABLE; td->pc_tree[i].horizontal[1].pred_interp_filter = SWITCHABLE; } } for (i = 0; i < MAX_REF_FRAMES; ++i) { x->pred_mv[i].row = INT16_MAX; x->pred_mv[i].col = INT16_MAX; } td->pc_root->index = 0; if (seg->enabled) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); } x->source_variance = UINT_MAX; x->cb_rdmult = orig_rdmult; if (sf->partition_search_type == FIXED_PARTITION || seg_skip) { const BLOCK_SIZE bsize = seg_skip ? BLOCK_64X64 : sf->always_this_block_size; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else if (cpi->partition_search_skippable_frame) { BLOCK_SIZE bsize; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); bsize = get_rd_var_based_fixed_partition(cpi, x, mi_row, mi_col); set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else if (sf->partition_search_type == VAR_BASED_PARTITION && cm->frame_type != KEY_FRAME) { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); rd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rate, &dummy_dist, 1, td->pc_root); } else { if (cpi->twopass.gf_group.index > 0 && cpi->sf.enable_tpl_model) { int dr = get_rdmult_delta(cpi, BLOCK_64X64, mi_row, mi_col, orig_rdmult); x->cb_rdmult = dr; } if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ && cm->show_frame) { x->segment_id = wiener_var_segment(cpi, BLOCK_64X64, mi_row, mi_col); x->cb_rdmult = vp9_compute_rd_mult( cpi, vp9_get_qindex(&cm->seg, x->segment_id, cm->base_qindex)); } // If required set upper and lower partition size limits if (sf->auto_min_max_partition_size) { set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); rd_auto_partition_range(cpi, tile_info, xd, mi_row, mi_col, &x->min_partition_size, &x->max_partition_size); } td->pc_root->none.rdcost = 0; rd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc, dummy_rdc, td->pc_root); } (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile, num_sb_cols); } } #endif // !CONFIG_REALTIME_ONLY static void init_encode_frame_mb_context(VP9_COMP *cpi) { MACROBLOCK *const x = &cpi->td.mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; const int aligned_mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); // Copy data over into macro block data structures. vp9_setup_src_planes(x, cpi->Source, 0, 0); vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); // Note: this memset assumes above_context[0], [1] and [2] // are allocated as part of the same buffer. memset(xd->above_context[0], 0, sizeof(*xd->above_context[0]) * 2 * aligned_mi_cols * MAX_MB_PLANE); memset(xd->above_seg_context, 0, sizeof(*xd->above_seg_context) * aligned_mi_cols); } static int check_dual_ref_flags(VP9_COMP *cpi) { const int ref_flags = cpi->ref_frame_flags; if (segfeature_active(&cpi->common.seg, 1, SEG_LVL_REF_FRAME)) { return 0; } else { return (!!(ref_flags & VP9_GOLD_FLAG) + !!(ref_flags & VP9_LAST_FLAG) + !!(ref_flags & VP9_ALT_FLAG)) >= 2; } } static void reset_skip_tx_size(VP9_COMMON *cm, TX_SIZE max_tx_size) { int mi_row, mi_col; const int mis = cm->mi_stride; MODE_INFO **mi_ptr = cm->mi_grid_visible; for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row, mi_ptr += mis) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { if (mi_ptr[mi_col]->tx_size > max_tx_size) mi_ptr[mi_col]->tx_size = max_tx_size; } } } static MV_REFERENCE_FRAME get_frame_type(const VP9_COMP *cpi) { if (frame_is_intra_only(&cpi->common)) return INTRA_FRAME; else if (cpi->rc.is_src_frame_alt_ref && cpi->refresh_golden_frame) return ALTREF_FRAME; else if (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame) return GOLDEN_FRAME; else return LAST_FRAME; } static TX_MODE select_tx_mode(const VP9_COMP *cpi, MACROBLOCKD *const xd) { if (xd->lossless) return ONLY_4X4; if (cpi->common.frame_type == KEY_FRAME && cpi->sf.use_nonrd_pick_mode) return ALLOW_16X16; if (cpi->sf.tx_size_search_method == USE_LARGESTALL) return ALLOW_32X32; else if (cpi->sf.tx_size_search_method == USE_FULL_RD || cpi->sf.tx_size_search_method == USE_TX_8X8) return TX_MODE_SELECT; else return cpi->common.tx_mode; } static void hybrid_intra_mode_search(VP9_COMP *cpi, MACROBLOCK *const x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { if (!cpi->sf.nonrd_keyframe && bsize < BLOCK_16X16) vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); else vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); } static void hybrid_search_svc_baseiskey(VP9_COMP *cpi, MACROBLOCK *const x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, TileDataEnc *tile_data, int mi_row, int mi_col) { if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); } else { if (cpi->svc.disable_inter_layer_pred == INTER_LAYER_PRED_OFF) vp9_pick_intra_mode(cpi, x, rd_cost, bsize, ctx); else if (bsize >= BLOCK_8X8) vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); else vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); } } static void hybrid_search_scene_change(VP9_COMP *cpi, MACROBLOCK *const x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, TileDataEnc *tile_data, int mi_row, int mi_col) { if (!cpi->sf.nonrd_keyframe && bsize <= BLOCK_8X8) { vp9_rd_pick_intra_mode_sb(cpi, x, rd_cost, bsize, ctx, INT64_MAX); } else { vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); } } static void nonrd_pick_sb_modes(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *const x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi; ENTROPY_CONTEXT l[16 * MAX_MB_PLANE], a[16 * MAX_MB_PLANE]; BLOCK_SIZE bs = VPXMAX(bsize, BLOCK_8X8); // processing unit block size const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bs]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bs]; int plane; set_offsets(cpi, tile_info, x, mi_row, mi_col, bsize); set_segment_index(cpi, x, mi_row, mi_col, bsize, 0); mi = xd->mi[0]; mi->sb_type = bsize; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *pd = &xd->plane[plane]; memcpy(a + num_4x4_blocks_wide * plane, pd->above_context, (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); memcpy(l + num_4x4_blocks_high * plane, pd->left_context, (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); } if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled) if (cyclic_refresh_segment_id_boosted(mi->segment_id)) x->rdmult = vp9_cyclic_refresh_get_rdmult(cpi->cyclic_refresh); if (frame_is_intra_only(cm)) hybrid_intra_mode_search(cpi, x, rd_cost, bsize, ctx); else if (cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame) hybrid_search_svc_baseiskey(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, mi_col); else if (segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP)) set_mode_info_seg_skip(x, cm->tx_mode, rd_cost, bsize); else if (bsize >= BLOCK_8X8) { if (cpi->rc.hybrid_intra_scene_change) hybrid_search_scene_change(cpi, x, rd_cost, bsize, ctx, tile_data, mi_row, mi_col); else vp9_pick_inter_mode(cpi, x, tile_data, mi_row, mi_col, rd_cost, bsize, ctx); } else { vp9_pick_inter_mode_sub8x8(cpi, x, mi_row, mi_col, rd_cost, bsize, ctx); } duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); for (plane = 0; plane < MAX_MB_PLANE; ++plane) { struct macroblockd_plane *pd = &xd->plane[plane]; memcpy(pd->above_context, a + num_4x4_blocks_wide * plane, (sizeof(a[0]) * num_4x4_blocks_wide) >> pd->subsampling_x); memcpy(pd->left_context, l + num_4x4_blocks_high * plane, (sizeof(l[0]) * num_4x4_blocks_high) >> pd->subsampling_y); } if (rd_cost->rate == INT_MAX) vp9_rd_cost_reset(rd_cost); ctx->rate = rd_cost->rate; ctx->dist = rd_cost->dist; } static void fill_mode_info_sb(VP9_COMMON *cm, MACROBLOCK *x, int mi_row, int mi_col, BLOCK_SIZE bsize, PC_TREE *pc_tree) { MACROBLOCKD *xd = &x->e_mbd; int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; PARTITION_TYPE partition = pc_tree->partitioning; BLOCK_SIZE subsize = get_subsize(bsize, partition); assert(bsize >= BLOCK_8X8); if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; switch (partition) { case PARTITION_NONE: set_mode_info_offsets(cm, x, xd, mi_row, mi_col); *(xd->mi[0]) = pc_tree->none.mic; *(x->mbmi_ext) = pc_tree->none.mbmi_ext; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, bsize); break; case PARTITION_VERT: set_mode_info_offsets(cm, x, xd, mi_row, mi_col); *(xd->mi[0]) = pc_tree->vertical[0].mic; *(x->mbmi_ext) = pc_tree->vertical[0].mbmi_ext; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); if (mi_col + hbs < cm->mi_cols) { set_mode_info_offsets(cm, x, xd, mi_row, mi_col + hbs); *(xd->mi[0]) = pc_tree->vertical[1].mic; *(x->mbmi_ext) = pc_tree->vertical[1].mbmi_ext; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col + hbs, subsize); } break; case PARTITION_HORZ: set_mode_info_offsets(cm, x, xd, mi_row, mi_col); *(xd->mi[0]) = pc_tree->horizontal[0].mic; *(x->mbmi_ext) = pc_tree->horizontal[0].mbmi_ext; duplicate_mode_info_in_sb(cm, xd, mi_row, mi_col, subsize); if (mi_row + hbs < cm->mi_rows) { set_mode_info_offsets(cm, x, xd, mi_row + hbs, mi_col); *(xd->mi[0]) = pc_tree->horizontal[1].mic; *(x->mbmi_ext) = pc_tree->horizontal[1].mbmi_ext; duplicate_mode_info_in_sb(cm, xd, mi_row + hbs, mi_col, subsize); } break; case PARTITION_SPLIT: { fill_mode_info_sb(cm, x, mi_row, mi_col, subsize, pc_tree->split[0]); fill_mode_info_sb(cm, x, mi_row, mi_col + hbs, subsize, pc_tree->split[1]); fill_mode_info_sb(cm, x, mi_row + hbs, mi_col, subsize, pc_tree->split[2]); fill_mode_info_sb(cm, x, mi_row + hbs, mi_col + hbs, subsize, pc_tree->split[3]); break; } default: break; } } // Reset the prediction pixel ready flag recursively. static void pred_pixel_ready_reset(PC_TREE *pc_tree, BLOCK_SIZE bsize) { pc_tree->none.pred_pixel_ready = 0; pc_tree->horizontal[0].pred_pixel_ready = 0; pc_tree->horizontal[1].pred_pixel_ready = 0; pc_tree->vertical[0].pred_pixel_ready = 0; pc_tree->vertical[1].pred_pixel_ready = 0; if (bsize > BLOCK_8X8) { BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); int i; for (i = 0; i < 4; ++i) pred_pixel_ready_reset(pc_tree->split[i], subsize); } } #define FEATURES 6 #define LABELS 2 static int ml_predict_var_paritioning(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; const NN_CONFIG *nn_config = NULL; switch (bsize) { case BLOCK_64X64: nn_config = &vp9_var_part_nnconfig_64; break; case BLOCK_32X32: nn_config = &vp9_var_part_nnconfig_32; break; case BLOCK_16X16: nn_config = &vp9_var_part_nnconfig_16; break; case BLOCK_8X8: break; default: assert(0 && "Unexpected block size."); return -1; } if (!nn_config) return -1; vpx_clear_system_state(); { const float thresh = cpi->oxcf.speed <= 5 ? 1.25f : 0.0f; float features[FEATURES] = { 0.0f }; const int dc_q = vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth); int feature_idx = 0; float score[LABELS]; features[feature_idx++] = logf((float)(dc_q * dc_q) / 256.0f + 1.0f); vp9_setup_src_planes(x, cpi->Source, mi_row, mi_col); { const int bs = 4 * num_4x4_blocks_wide_lookup[bsize]; const BLOCK_SIZE subsize = get_subsize(bsize, PARTITION_SPLIT); const int sb_offset_row = 8 * (mi_row & 7); const int sb_offset_col = 8 * (mi_col & 7); const uint8_t *pred = x->est_pred + sb_offset_row * 64 + sb_offset_col; const uint8_t *src = x->plane[0].src.buf; const int src_stride = x->plane[0].src.stride; const int pred_stride = 64; unsigned int sse; int i; // Variance of whole block. const unsigned int var = cpi->fn_ptr[bsize].vf(src, src_stride, pred, pred_stride, &sse); const float factor = (var == 0) ? 1.0f : (1.0f / (float)var); features[feature_idx++] = logf((float)var + 1.0f); for (i = 0; i < 4; ++i) { const int x_idx = (i & 1) * bs / 2; const int y_idx = (i >> 1) * bs / 2; const int src_offset = y_idx * src_stride + x_idx; const int pred_offset = y_idx * pred_stride + x_idx; // Variance of quarter block. const unsigned int sub_var = cpi->fn_ptr[subsize].vf(src + src_offset, src_stride, pred + pred_offset, pred_stride, &sse); const float var_ratio = (var == 0) ? 1.0f : factor * (float)sub_var; features[feature_idx++] = var_ratio; } } assert(feature_idx == FEATURES); nn_predict(features, nn_config, score); if (score[0] > thresh) return PARTITION_SPLIT; if (score[0] < -thresh) return PARTITION_NONE; return -1; } } #undef FEATURES #undef LABELS static void nonrd_pick_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, RD_COST *rd_cost, int do_recon, int64_t best_rd, PC_TREE *pc_tree) { const SPEED_FEATURES *const sf = &cpi->sf; VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int ms = num_8x8_blocks_wide_lookup[bsize] / 2; TOKENEXTRA *tp_orig = *tp; PICK_MODE_CONTEXT *ctx = &pc_tree->none; int i; BLOCK_SIZE subsize = bsize; RD_COST this_rdc, sum_rdc, best_rdc; int do_split = bsize >= BLOCK_8X8; int do_rect = 1; // Override skipping rectangular partition operations for edge blocks const int force_horz_split = (mi_row + ms >= cm->mi_rows); const int force_vert_split = (mi_col + ms >= cm->mi_cols); const int xss = x->e_mbd.plane[1].subsampling_x; const int yss = x->e_mbd.plane[1].subsampling_y; int partition_none_allowed = !force_horz_split && !force_vert_split; int partition_horz_allowed = !force_vert_split && yss <= xss && bsize >= BLOCK_8X8; int partition_vert_allowed = !force_horz_split && xss <= yss && bsize >= BLOCK_8X8; const int use_ml_based_partitioning = sf->partition_search_type == ML_BASED_PARTITION; (void)*tp_orig; // Avoid checking for rectangular partitions for speed >= 6. if (cpi->oxcf.speed >= 6) do_rect = 0; assert(num_8x8_blocks_wide_lookup[bsize] == num_8x8_blocks_high_lookup[bsize]); vp9_rd_cost_init(&sum_rdc); vp9_rd_cost_reset(&best_rdc); best_rdc.rdcost = best_rd; // Determine partition types in search according to the speed features. // The threshold set here has to be of square block size. if (sf->auto_min_max_partition_size) { partition_none_allowed &= (bsize <= x->max_partition_size && bsize >= x->min_partition_size); partition_horz_allowed &= ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || force_horz_split); partition_vert_allowed &= ((bsize <= x->max_partition_size && bsize > x->min_partition_size) || force_vert_split); do_split &= bsize > x->min_partition_size; } if (sf->use_square_partition_only) { partition_horz_allowed &= force_horz_split; partition_vert_allowed &= force_vert_split; } if (use_ml_based_partitioning) { if (partition_none_allowed || do_split) do_rect = 0; if (partition_none_allowed && do_split) { const int ml_predicted_partition = ml_predict_var_paritioning(cpi, x, bsize, mi_row, mi_col); if (ml_predicted_partition == PARTITION_NONE) do_split = 0; if (ml_predicted_partition == PARTITION_SPLIT) partition_none_allowed = 0; } } if (!partition_none_allowed && !do_split) do_rect = 1; ctx->pred_pixel_ready = !(partition_vert_allowed || partition_horz_allowed || do_split); // PARTITION_NONE if (partition_none_allowed) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &this_rdc, bsize, ctx); ctx->mic = *xd->mi[0]; ctx->mbmi_ext = *x->mbmi_ext; ctx->skip_txfm[0] = x->skip_txfm[0]; ctx->skip = x->skip; if (this_rdc.rate != INT_MAX) { const int pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_NONE]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; if (bsize >= BLOCK_8X8) pc_tree->partitioning = PARTITION_NONE; if (!use_ml_based_partitioning) { int64_t dist_breakout_thr = sf->partition_search_breakout_thr.dist; int64_t rate_breakout_thr = sf->partition_search_breakout_thr.rate; dist_breakout_thr >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); rate_breakout_thr *= num_pels_log2_lookup[bsize]; if (!x->e_mbd.lossless && this_rdc.rate < rate_breakout_thr && this_rdc.dist < dist_breakout_thr) { do_split = 0; do_rect = 0; } } } } } // store estimated motion vector store_pred_mv(x, ctx); // PARTITION_SPLIT if (do_split) { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_SPLIT]; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); subsize = get_subsize(bsize, PARTITION_SPLIT); for (i = 0; i < 4 && sum_rdc.rdcost < best_rdc.rdcost; ++i) { const int x_idx = (i & 1) * ms; const int y_idx = (i >> 1) * ms; if (mi_row + y_idx >= cm->mi_rows || mi_col + x_idx >= cm->mi_cols) continue; load_pred_mv(x, ctx); nonrd_pick_partition(cpi, td, tile_data, tp, mi_row + y_idx, mi_col + x_idx, subsize, &this_rdc, 0, best_rdc.rdcost - sum_rdc.rdcost, pc_tree->split[i]); if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); } else { sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost += this_rdc.rdcost; } } if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_SPLIT; } else { // skip rectangular partition test when larger block size // gives better rd cost if (sf->less_rectangular_check) do_rect &= !partition_none_allowed; } } // PARTITION_HORZ if (partition_horz_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_HORZ); load_pred_mv(x, ctx); pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (sum_rdc.rdcost < best_rdc.rdcost && mi_row + ms < cm->mi_rows) { load_pred_mv(x, ctx); pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + ms, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); } else { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); this_rdc.rate += cpi->partition_cost[pl][PARTITION_HORZ]; sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); } } if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_HORZ; } else { pred_pixel_ready_reset(pc_tree, bsize); } } // PARTITION_VERT if (partition_vert_allowed && do_rect) { subsize = get_subsize(bsize, PARTITION_VERT); load_pred_mv(x, ctx); pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, &sum_rdc, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (sum_rdc.rdcost < best_rdc.rdcost && mi_col + ms < cm->mi_cols) { load_pred_mv(x, ctx); pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + ms, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; if (this_rdc.rate == INT_MAX) { vp9_rd_cost_reset(&sum_rdc); } else { int pl = partition_plane_context(xd, mi_row, mi_col, bsize); sum_rdc.rate += cpi->partition_cost[pl][PARTITION_VERT]; sum_rdc.rate += this_rdc.rate; sum_rdc.dist += this_rdc.dist; sum_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, sum_rdc.rate, sum_rdc.dist); } } if (sum_rdc.rdcost < best_rdc.rdcost) { best_rdc = sum_rdc; pc_tree->partitioning = PARTITION_VERT; } else { pred_pixel_ready_reset(pc_tree, bsize); } } *rd_cost = best_rdc; if (best_rdc.rate == INT_MAX) { vp9_rd_cost_reset(rd_cost); return; } // update mode info array fill_mode_info_sb(cm, x, mi_row, mi_col, bsize, pc_tree); if (best_rdc.rate < INT_MAX && best_rdc.dist < INT64_MAX && do_recon) { int output_enabled = (bsize == BLOCK_64X64); encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, bsize, pc_tree); } if (bsize == BLOCK_64X64 && do_recon) { assert(tp_orig < *tp); assert(best_rdc.rate < INT_MAX); assert(best_rdc.dist < INT64_MAX); } else { assert(tp_orig == *tp); } } static void nonrd_select_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MODE_INFO **mi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled, RD_COST *rd_cost, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; const int mis = cm->mi_stride; PARTITION_TYPE partition; BLOCK_SIZE subsize; RD_COST this_rdc; BLOCK_SIZE subsize_ref = (cpi->sf.adapt_partition_source_sad) ? BLOCK_8X8 : BLOCK_16X16; vp9_rd_cost_reset(&this_rdc); if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (bsize == BLOCK_32X32 && subsize == BLOCK_32X32) { x->max_partition_size = BLOCK_32X32; x->min_partition_size = BLOCK_16X16; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 0, INT64_MAX, pc_tree); } else if (bsize == BLOCK_32X32 && partition != PARTITION_NONE && subsize >= subsize_ref) { x->max_partition_size = BLOCK_32X32; x->min_partition_size = BLOCK_8X8; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 0, INT64_MAX, pc_tree); } else if (bsize == BLOCK_16X16 && partition != PARTITION_NONE) { x->max_partition_size = BLOCK_16X16; x->min_partition_size = BLOCK_8X8; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, bsize, rd_cost, 0, INT64_MAX, pc_tree); } else { switch (partition) { case PARTITION_NONE: pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->none); pc_tree->none.mic = *xd->mi[0]; pc_tree->none.mbmi_ext = *x->mbmi_ext; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; break; case PARTITION_VERT: pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; if (mi_col + hbs < cm->mi_cols) { pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, &this_rdc, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } } break; case PARTITION_HORZ: pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, rd_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; if (mi_row + hbs < cm->mi_rows) { pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, &this_rdc, subsize, &pc_tree->horizontal[1]); pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } } break; default: assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, rd_cost, pc_tree->split[0]); nonrd_select_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[1]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis, tp, mi_row + hbs, mi_col, subsize, output_enabled, &this_rdc, pc_tree->split[2]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } nonrd_select_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, mi_row + hbs, mi_col + hbs, subsize, output_enabled, &this_rdc, pc_tree->split[3]); if (this_rdc.rate != INT_MAX && this_rdc.dist != INT64_MAX && rd_cost->rate != INT_MAX && rd_cost->dist != INT64_MAX) { rd_cost->rate += this_rdc.rate; rd_cost->dist += this_rdc.dist; } break; } } if (bsize == BLOCK_64X64 && output_enabled) encode_sb_rt(cpi, td, tile_info, tp, mi_row, mi_col, 1, bsize, pc_tree); } static void nonrd_use_partition(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, MODE_INFO **mi, TOKENEXTRA **tp, int mi_row, int mi_col, BLOCK_SIZE bsize, int output_enabled, RD_COST *dummy_cost, PC_TREE *pc_tree) { VP9_COMMON *const cm = &cpi->common; TileInfo *tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int bsl = b_width_log2_lookup[bsize], hbs = (1 << bsl) / 4; const int mis = cm->mi_stride; PARTITION_TYPE partition; BLOCK_SIZE subsize; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; subsize = (bsize >= BLOCK_8X8) ? mi[0]->sb_type : BLOCK_4X4; partition = partition_lookup[bsl][subsize]; if (output_enabled && bsize != BLOCK_4X4) { int ctx = partition_plane_context(xd, mi_row, mi_col, bsize); td->counts->partition[ctx][partition]++; } switch (partition) { case PARTITION_NONE: pc_tree->none.pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->none); pc_tree->none.mic = *xd->mi[0]; pc_tree->none.mbmi_ext = *x->mbmi_ext; pc_tree->none.skip_txfm[0] = x->skip_txfm[0]; pc_tree->none.skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->none); break; case PARTITION_VERT: pc_tree->vertical[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->vertical[0]); pc_tree->vertical[0].mic = *xd->mi[0]; pc_tree->vertical[0].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[0].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->vertical[0]); if (mi_col + hbs < cm->mi_cols && bsize > BLOCK_8X8) { pc_tree->vertical[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col + hbs, dummy_cost, subsize, &pc_tree->vertical[1]); pc_tree->vertical[1].mic = *xd->mi[0]; pc_tree->vertical[1].mbmi_ext = *x->mbmi_ext; pc_tree->vertical[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->vertical[1].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col + hbs, output_enabled, subsize, &pc_tree->vertical[1]); } break; case PARTITION_HORZ: pc_tree->horizontal[0].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, &pc_tree->horizontal[0]); pc_tree->horizontal[0].mic = *xd->mi[0]; pc_tree->horizontal[0].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[0].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[0].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, subsize, &pc_tree->horizontal[0]); if (mi_row + hbs < cm->mi_rows && bsize > BLOCK_8X8) { pc_tree->horizontal[1].pred_pixel_ready = 1; nonrd_pick_sb_modes(cpi, tile_data, x, mi_row + hbs, mi_col, dummy_cost, subsize, &pc_tree->horizontal[1]); pc_tree->horizontal[1].mic = *xd->mi[0]; pc_tree->horizontal[1].mbmi_ext = *x->mbmi_ext; pc_tree->horizontal[1].skip_txfm[0] = x->skip_txfm[0]; pc_tree->horizontal[1].skip = x->skip; encode_b_rt(cpi, td, tile_info, tp, mi_row + hbs, mi_col, output_enabled, subsize, &pc_tree->horizontal[1]); } break; default: assert(partition == PARTITION_SPLIT); subsize = get_subsize(bsize, PARTITION_SPLIT); if (bsize == BLOCK_8X8) { nonrd_pick_sb_modes(cpi, tile_data, x, mi_row, mi_col, dummy_cost, subsize, pc_tree->leaf_split[0]); encode_b_rt(cpi, td, tile_info, tp, mi_row, mi_col, output_enabled, subsize, pc_tree->leaf_split[0]); } else { nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, subsize, output_enabled, dummy_cost, pc_tree->split[0]); nonrd_use_partition(cpi, td, tile_data, mi + hbs, tp, mi_row, mi_col + hbs, subsize, output_enabled, dummy_cost, pc_tree->split[1]); nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis, tp, mi_row + hbs, mi_col, subsize, output_enabled, dummy_cost, pc_tree->split[2]); nonrd_use_partition(cpi, td, tile_data, mi + hbs * mis + hbs, tp, mi_row + hbs, mi_col + hbs, subsize, output_enabled, dummy_cost, pc_tree->split[3]); } break; } if (partition != PARTITION_SPLIT || bsize == BLOCK_8X8) update_partition_context(xd, mi_row, mi_col, subsize, bsize); } // Get a prediction(stored in x->est_pred) for the whole 64x64 superblock. static void get_estimated_pred(VP9_COMP *cpi, const TileInfo *const tile, MACROBLOCK *x, int mi_row, int mi_col) { VP9_COMMON *const cm = &cpi->common; const int is_key_frame = frame_is_intra_only(cm); MACROBLOCKD *xd = &x->e_mbd; set_offsets(cpi, tile, x, mi_row, mi_col, BLOCK_64X64); if (!is_key_frame) { MODE_INFO *mi = xd->mi[0]; YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); const YV12_BUFFER_CONFIG *yv12_g = NULL; const BLOCK_SIZE bsize = BLOCK_32X32 + (mi_col + 4 < cm->mi_cols) * 2 + (mi_row + 4 < cm->mi_rows); unsigned int y_sad_g, y_sad_thr; unsigned int y_sad = UINT_MAX; assert(yv12 != NULL); if (!(is_one_pass_cbr_svc(cpi) && cpi->svc.spatial_layer_id) || cpi->svc.use_gf_temporal_ref_current_layer) { // For now, GOLDEN will not be used for non-zero spatial layers, since // it may not be a temporal reference. yv12_g = get_ref_frame_buffer(cpi, GOLDEN_FRAME); } // Only compute y_sad_g (sad for golden reference) for speed < 8. if (cpi->oxcf.speed < 8 && yv12_g && yv12_g != yv12 && (cpi->ref_frame_flags & VP9_GOLD_FLAG)) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); y_sad_g = cpi->fn_ptr[bsize].sdf( x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); } else { y_sad_g = UINT_MAX; } if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.is_src_frame_alt_ref) { yv12 = get_ref_frame_buffer(cpi, ALTREF_FRAME); vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[ALTREF_FRAME - 1].sf); mi->ref_frame[0] = ALTREF_FRAME; y_sad_g = UINT_MAX; } else { vp9_setup_pre_planes(xd, 0, yv12, mi_row, mi_col, &cm->frame_refs[LAST_FRAME - 1].sf); mi->ref_frame[0] = LAST_FRAME; } mi->ref_frame[1] = NONE; mi->sb_type = BLOCK_64X64; mi->mv[0].as_int = 0; mi->interp_filter = BILINEAR; { const MV dummy_mv = { 0, 0 }; y_sad = vp9_int_pro_motion_estimation(cpi, x, bsize, mi_row, mi_col, &dummy_mv); x->sb_use_mv_part = 1; x->sb_mvcol_part = mi->mv[0].as_mv.col; x->sb_mvrow_part = mi->mv[0].as_mv.row; } // Pick ref frame for partitioning, bias last frame when y_sad_g and y_sad // are close if short_circuit_low_temp_var is on. y_sad_thr = cpi->sf.short_circuit_low_temp_var ? (y_sad * 7) >> 3 : y_sad; if (y_sad_g < y_sad_thr) { vp9_setup_pre_planes(xd, 0, yv12_g, mi_row, mi_col, &cm->frame_refs[GOLDEN_FRAME - 1].sf); mi->ref_frame[0] = GOLDEN_FRAME; mi->mv[0].as_int = 0; } else { x->pred_mv[LAST_FRAME] = mi->mv[0].as_mv; } set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); xd->plane[0].dst.buf = x->est_pred; xd->plane[0].dst.stride = 64; vp9_build_inter_predictors_sb(xd, mi_row, mi_col, BLOCK_64X64); } else { #if CONFIG_VP9_HIGHBITDEPTH switch (xd->bd) { case 8: memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); break; case 10: memset(x->est_pred, 128 * 4, 64 * 64 * sizeof(x->est_pred[0])); break; case 12: memset(x->est_pred, 128 * 16, 64 * 64 * sizeof(x->est_pred[0])); break; } #else memset(x->est_pred, 128, 64 * 64 * sizeof(x->est_pred[0])); #endif // CONFIG_VP9_HIGHBITDEPTH } } static void encode_nonrd_sb_row(VP9_COMP *cpi, ThreadData *td, TileDataEnc *tile_data, int mi_row, TOKENEXTRA **tp) { SPEED_FEATURES *const sf = &cpi->sf; VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; const int mi_col_start = tile_info->mi_col_start; const int mi_col_end = tile_info->mi_col_end; int mi_col; const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; const int num_sb_cols = get_num_cols(tile_data->tile_info, MI_BLOCK_SIZE_LOG2); int sb_col_in_tile; // Initialize the left context for the new SB row memset(&xd->left_context, 0, sizeof(xd->left_context)); memset(xd->left_seg_context, 0, sizeof(xd->left_seg_context)); // Code each SB in the row for (mi_col = mi_col_start, sb_col_in_tile = 0; mi_col < mi_col_end; mi_col += MI_BLOCK_SIZE, ++sb_col_in_tile) { const struct segmentation *const seg = &cm->seg; RD_COST dummy_rdc; const int idx_str = cm->mi_stride * mi_row + mi_col; MODE_INFO **mi = cm->mi_grid_visible + idx_str; PARTITION_SEARCH_TYPE partition_search_type = sf->partition_search_type; BLOCK_SIZE bsize = BLOCK_64X64; int seg_skip = 0; int i; (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile); if (cpi->use_skin_detection) { vp9_compute_skin_sb(cpi, BLOCK_16X16, mi_row, mi_col); } x->source_variance = UINT_MAX; for (i = 0; i < MAX_REF_FRAMES; ++i) { x->pred_mv[i].row = INT16_MAX; x->pred_mv[i].col = INT16_MAX; } vp9_rd_cost_init(&dummy_rdc); x->color_sensitivity[0] = 0; x->color_sensitivity[1] = 0; x->sb_is_skin = 0; x->skip_low_source_sad = 0; x->lowvar_highsumdiff = 0; x->content_state_sb = 0; x->zero_temp_sad_source = 0; x->sb_use_mv_part = 0; x->sb_mvcol_part = 0; x->sb_mvrow_part = 0; x->sb_pickmode_part = 0; x->arf_frame_usage = 0; x->lastgolden_frame_usage = 0; if (seg->enabled) { const uint8_t *const map = seg->update_map ? cpi->segmentation_map : cm->last_frame_seg_map; int segment_id = get_segment_id(cm, map, BLOCK_64X64, mi_row, mi_col); seg_skip = segfeature_active(seg, segment_id, SEG_LVL_SKIP); if (seg_skip) { partition_search_type = FIXED_PARTITION; } } if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { int shift = cpi->Source->y_stride * (mi_row << 3) + (mi_col << 3); int sb_offset2 = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); int64_t source_sad = avg_source_sad(cpi, x, shift, sb_offset2); if (sf->adapt_partition_source_sad && (cpi->oxcf.rc_mode == VPX_VBR && !cpi->rc.is_src_frame_alt_ref && source_sad > sf->adapt_partition_thresh && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) partition_search_type = REFERENCE_PARTITION; } // Set the partition type of the 64X64 block switch (partition_search_type) { case VAR_BASED_PARTITION: // TODO(jingning, marpan): The mode decision and encoding process // support both intra and inter sub8x8 block coding for RTC mode. // Tune the thresholds accordingly to use sub8x8 block coding for // coding performance improvement. choose_partitioning(cpi, tile_info, x, mi_row, mi_col); nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; case ML_BASED_PARTITION: get_estimated_pred(cpi, tile_info, x, mi_row, mi_col); x->max_partition_size = BLOCK_64X64; x->min_partition_size = BLOCK_8X8; x->sb_pickmode_part = 1; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, td->pc_root); break; case SOURCE_VAR_BASED_PARTITION: set_source_var_based_partition(cpi, tile_info, x, mi, mi_row, mi_col); nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; case FIXED_PARTITION: if (!seg_skip) bsize = sf->always_this_block_size; set_fixed_partitioning(cpi, tile_info, mi, mi_row, mi_col, bsize); nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); break; default: assert(partition_search_type == REFERENCE_PARTITION); x->sb_pickmode_part = 1; set_offsets(cpi, tile_info, x, mi_row, mi_col, BLOCK_64X64); // Use nonrd_pick_partition on scene-cut for VBR mode. // nonrd_pick_partition does not support 4x4 partition, so avoid it // on key frame for now. if ((cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.high_source_sad && cpi->oxcf.speed < 6 && !frame_is_intra_only(cm) && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { // Use lower max_partition_size for low resoultions. if (cm->width <= 352 && cm->height <= 288) x->max_partition_size = BLOCK_32X32; else x->max_partition_size = BLOCK_64X64; x->min_partition_size = BLOCK_8X8; nonrd_pick_partition(cpi, td, tile_data, tp, mi_row, mi_col, BLOCK_64X64, &dummy_rdc, 1, INT64_MAX, td->pc_root); } else { choose_partitioning(cpi, tile_info, x, mi_row, mi_col); // TODO(marpan): Seems like nonrd_select_partition does not support // 4x4 partition. Since 4x4 is used on key frame, use this switch // for now. if (frame_is_intra_only(cm)) nonrd_use_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); else nonrd_select_partition(cpi, td, tile_data, mi, tp, mi_row, mi_col, BLOCK_64X64, 1, &dummy_rdc, td->pc_root); } break; } // Update ref_frame usage for inter frame if this group is ARF group. if (!cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame && cpi->rc.alt_ref_gf_group && cpi->sf.use_altref_onepass) { int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); if (cpi->count_arf_frame_usage != NULL) cpi->count_arf_frame_usage[sboffset] = x->arf_frame_usage; if (cpi->count_lastgolden_frame_usage != NULL) cpi->count_lastgolden_frame_usage[sboffset] = x->lastgolden_frame_usage; } (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, sb_row, sb_col_in_tile, num_sb_cols); } } // end RTC play code static INLINE uint32_t variance(const diff *const d) { return d->sse - (uint32_t)(((int64_t)d->sum * d->sum) >> 8); } #if CONFIG_VP9_HIGHBITDEPTH static INLINE uint32_t variance_highbd(diff *const d) { const int64_t var = (int64_t)d->sse - (((int64_t)d->sum * d->sum) >> 8); return (var >= 0) ? (uint32_t)var : 0; } #endif // CONFIG_VP9_HIGHBITDEPTH static int set_var_thresh_from_histogram(VP9_COMP *cpi) { const SPEED_FEATURES *const sf = &cpi->sf; const VP9_COMMON *const cm = &cpi->common; const uint8_t *src = cpi->Source->y_buffer; const uint8_t *last_src = cpi->Last_Source->y_buffer; const int src_stride = cpi->Source->y_stride; const int last_stride = cpi->Last_Source->y_stride; // Pick cutoff threshold const int cutoff = (VPXMIN(cm->width, cm->height) >= 720) ? (cm->MBs * VAR_HIST_LARGE_CUT_OFF / 100) : (cm->MBs * VAR_HIST_SMALL_CUT_OFF / 100); DECLARE_ALIGNED(16, int, hist[VAR_HIST_BINS]); diff *var16 = cpi->source_diff_var; int sum = 0; int i, j; memset(hist, 0, VAR_HIST_BINS * sizeof(hist[0])); for (i = 0; i < cm->mb_rows; i++) { for (j = 0; j < cm->mb_cols; j++) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: vpx_highbd_8_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance(var16); break; case VPX_BITS_10: vpx_highbd_10_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; default: assert(cm->bit_depth == VPX_BITS_12); vpx_highbd_12_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance_highbd(var16); break; } } else { vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance(var16); } #else vpx_get16x16var(src, src_stride, last_src, last_stride, &var16->sse, &var16->sum); var16->var = variance(var16); #endif // CONFIG_VP9_HIGHBITDEPTH if (var16->var >= VAR_HIST_MAX_BG_VAR) hist[VAR_HIST_BINS - 1]++; else hist[var16->var / VAR_HIST_FACTOR]++; src += 16; last_src += 16; var16++; } src = src - cm->mb_cols * 16 + 16 * src_stride; last_src = last_src - cm->mb_cols * 16 + 16 * last_stride; } cpi->source_var_thresh = 0; if (hist[VAR_HIST_BINS - 1] < cutoff) { for (i = 0; i < VAR_HIST_BINS - 1; i++) { sum += hist[i]; if (sum > cutoff) { cpi->source_var_thresh = (i + 1) * VAR_HIST_FACTOR; return 0; } } } return sf->search_type_check_frequency; } static void source_var_based_partition_search_method(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; if (cm->frame_type == KEY_FRAME) { // For key frame, use SEARCH_PARTITION. sf->partition_search_type = SEARCH_PARTITION; } else if (cm->intra_only) { sf->partition_search_type = FIXED_PARTITION; } else { if (cm->last_width != cm->width || cm->last_height != cm->height) { if (cpi->source_diff_var) vpx_free(cpi->source_diff_var); CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); } if (!cpi->frames_till_next_var_check) cpi->frames_till_next_var_check = set_var_thresh_from_histogram(cpi); if (cpi->frames_till_next_var_check > 0) { sf->partition_search_type = FIXED_PARTITION; cpi->frames_till_next_var_check--; } } } static int get_skip_encode_frame(const VP9_COMMON *cm, ThreadData *const td) { unsigned int intra_count = 0, inter_count = 0; int j; for (j = 0; j < INTRA_INTER_CONTEXTS; ++j) { intra_count += td->counts->intra_inter[j][0]; inter_count += td->counts->intra_inter[j][1]; } return (intra_count << 2) < inter_count && cm->frame_type != KEY_FRAME && cm->show_frame; } void vp9_init_tile_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; TOKENEXTRA *pre_tok = cpi->tile_tok[0][0]; TOKENLIST *tplist = cpi->tplist[0][0]; int tile_tok = 0; int tplist_count = 0; if (cpi->tile_data == NULL || cpi->allocated_tiles < tile_cols * tile_rows) { if (cpi->tile_data != NULL) vpx_free(cpi->tile_data); CHECK_MEM_ERROR( cm, cpi->tile_data, vpx_malloc(tile_cols * tile_rows * sizeof(*cpi->tile_data))); cpi->allocated_tiles = tile_cols * tile_rows; for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_col = 0; tile_col < tile_cols; ++tile_col) { TileDataEnc *tile_data = &cpi->tile_data[tile_row * tile_cols + tile_col]; int i, j; for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = RD_THRESH_INIT_FACT; #if CONFIG_CONSISTENT_RECODE tile_data->thresh_freq_fact_prev[i][j] = RD_THRESH_INIT_FACT; #endif tile_data->mode_map[i][j] = j; } } #if CONFIG_MULTITHREAD tile_data->row_base_thresh_freq_fact = NULL; #endif } } for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; TileInfo *tile_info = &this_tile->tile_info; if (cpi->sf.adaptive_rd_thresh_row_mt && this_tile->row_base_thresh_freq_fact == NULL) vp9_row_mt_alloc_rd_thresh(cpi, this_tile); vp9_tile_init(tile_info, cm, tile_row, tile_col); cpi->tile_tok[tile_row][tile_col] = pre_tok + tile_tok; pre_tok = cpi->tile_tok[tile_row][tile_col]; tile_tok = allocated_tokens(*tile_info); cpi->tplist[tile_row][tile_col] = tplist + tplist_count; tplist = cpi->tplist[tile_row][tile_col]; tplist_count = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); } } } void vp9_encode_sb_row(VP9_COMP *cpi, ThreadData *td, int tile_row, int tile_col, int mi_row) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; const TileInfo *const tile_info = &this_tile->tile_info; TOKENEXTRA *tok = NULL; int tile_sb_row; int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; tile_sb_row = mi_cols_aligned_to_sb(mi_row - tile_info->mi_row_start) >> MI_BLOCK_SIZE_LOG2; get_start_tok(cpi, tile_row, tile_col, mi_row, &tok); cpi->tplist[tile_row][tile_col][tile_sb_row].start = tok; if (cpi->sf.use_nonrd_pick_mode) encode_nonrd_sb_row(cpi, td, this_tile, mi_row, &tok); #if !CONFIG_REALTIME_ONLY else encode_rd_sb_row(cpi, td, this_tile, mi_row, &tok); #endif cpi->tplist[tile_row][tile_col][tile_sb_row].stop = tok; cpi->tplist[tile_row][tile_col][tile_sb_row].count = (unsigned int)(cpi->tplist[tile_row][tile_col][tile_sb_row].stop - cpi->tplist[tile_row][tile_col][tile_sb_row].start); assert(tok - cpi->tplist[tile_row][tile_col][tile_sb_row].start <= get_token_alloc(MI_BLOCK_SIZE >> 1, tile_mb_cols)); (void)tile_mb_cols; } void vp9_encode_tile(VP9_COMP *cpi, ThreadData *td, int tile_row, int tile_col) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; const TileInfo *const tile_info = &this_tile->tile_info; const int mi_row_start = tile_info->mi_row_start; const int mi_row_end = tile_info->mi_row_end; int mi_row; for (mi_row = mi_row_start; mi_row < mi_row_end; mi_row += MI_BLOCK_SIZE) vp9_encode_sb_row(cpi, td, tile_row, tile_col, mi_row); } static void encode_tiles(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; vp9_init_tile_data(cpi); for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_col = 0; tile_col < tile_cols; ++tile_col) vp9_encode_tile(cpi, &cpi->td, tile_row, tile_col); } #if CONFIG_FP_MB_STATS static int input_fpmb_stats(FIRSTPASS_MB_STATS *firstpass_mb_stats, VP9_COMMON *cm, uint8_t **this_frame_mb_stats) { uint8_t *mb_stats_in = firstpass_mb_stats->mb_stats_start + cm->current_video_frame * cm->MBs * sizeof(uint8_t); if (mb_stats_in > firstpass_mb_stats->mb_stats_end) return EOF; *this_frame_mb_stats = mb_stats_in; return 1; } #endif static int compare_kmeans_data(const void *a, const void *b) { if (((const KMEANS_DATA *)a)->value > ((const KMEANS_DATA *)b)->value) { return 1; } else if (((const KMEANS_DATA *)a)->value < ((const KMEANS_DATA *)b)->value) { return -1; } else { return 0; } } static void compute_boundary_ls(const double *ctr_ls, int k, double *boundary_ls) { // boundary_ls[j] is the upper bound of data centered at ctr_ls[j] int j; for (j = 0; j < k - 1; ++j) { boundary_ls[j] = (ctr_ls[j] + ctr_ls[j + 1]) / 2.; } boundary_ls[k - 1] = DBL_MAX; } int vp9_get_group_idx(double value, double *boundary_ls, int k) { int group_idx = 0; while (value >= boundary_ls[group_idx]) { ++group_idx; if (group_idx == k - 1) { break; } } return group_idx; } void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, KMEANS_DATA *arr, int size) { int i, j; int itr; int group_idx; double sum[MAX_KMEANS_GROUPS]; int count[MAX_KMEANS_GROUPS]; vpx_clear_system_state(); assert(k >= 2 && k <= MAX_KMEANS_GROUPS); qsort(arr, size, sizeof(*arr), compare_kmeans_data); // initialize the center points for (j = 0; j < k; ++j) { ctr_ls[j] = arr[(size * (2 * j + 1)) / (2 * k)].value; } for (itr = 0; itr < 10; ++itr) { compute_boundary_ls(ctr_ls, k, boundary_ls); for (i = 0; i < MAX_KMEANS_GROUPS; ++i) { sum[i] = 0; count[i] = 0; } // Both the data and centers are sorted in ascending order. // As each data point is processed in order, its corresponding group index // can only increase. So we only need to reset the group index to zero here. group_idx = 0; for (i = 0; i < size; ++i) { while (arr[i].value >= boundary_ls[group_idx]) { // place samples into clusters ++group_idx; if (group_idx == k - 1) { break; } } sum[group_idx] += arr[i].value; ++count[group_idx]; } for (group_idx = 0; group_idx < k; ++group_idx) { if (count[group_idx] > 0) ctr_ls[group_idx] = sum[group_idx] / count[group_idx]; sum[group_idx] = 0; count[group_idx] = 0; } } // compute group_idx, boundary_ls and count_ls for (j = 0; j < k; ++j) { count_ls[j] = 0; } compute_boundary_ls(ctr_ls, k, boundary_ls); group_idx = 0; for (i = 0; i < size; ++i) { while (arr[i].value >= boundary_ls[group_idx]) { ++group_idx; if (group_idx == k - 1) { break; } } arr[i].group_idx = group_idx; ++count_ls[group_idx]; } } static void encode_frame_internal(VP9_COMP *cpi) { SPEED_FEATURES *const sf = &cpi->sf; ThreadData *const td = &cpi->td; MACROBLOCK *const x = &td->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; const int gf_group_index = cpi->twopass.gf_group.index; xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; vp9_zero(*td->counts); vp9_zero(cpi->td.rd_counts); xd->lossless = cm->base_qindex == 0 && cm->y_dc_delta_q == 0 && cm->uv_dc_delta_q == 0 && cm->uv_ac_delta_q == 0; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) x->fwd_txfm4x4 = xd->lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; else x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; x->highbd_inv_txfm_add = xd->lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; #else x->fwd_txfm4x4 = xd->lossless ? vp9_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH x->inv_txfm_add = xd->lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; #if CONFIG_CONSISTENT_RECODE x->optimize = sf->optimize_coefficients == 1 && cpi->oxcf.pass != 1; #endif if (xd->lossless) x->optimize = 0; x->sharpness = cpi->oxcf.sharpness; x->adjust_rdmult_by_segment = (cpi->oxcf.aq_mode == VARIANCE_AQ); cm->tx_mode = select_tx_mode(cpi, xd); vp9_frame_init_quantizer(cpi); vp9_initialize_rd_consts(cpi); vp9_initialize_me_consts(cpi, x, cm->base_qindex); init_encode_frame_mb_context(cpi); cm->use_prev_frame_mvs = !cm->error_resilient_mode && cm->width == cm->last_width && cm->height == cm->last_height && !cm->intra_only && cm->last_show_frame; // Special case: set prev_mi to NULL when the previous mode info // context cannot be used. cm->prev_mi = cm->use_prev_frame_mvs ? cm->prev_mip + cm->mi_stride + 1 : NULL; x->quant_fp = cpi->sf.use_quant_fp; vp9_zero(x->skip_txfm); if (sf->use_nonrd_pick_mode) { // Initialize internal buffer pointers for rtc coding, where non-RD // mode decision is used and hence no buffer pointer swap needed. int i; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &cpi->td.pc_root->none; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][0]; p[i].qcoeff = ctx->qcoeff_pbuf[i][0]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][0]; p[i].eobs = ctx->eobs_pbuf[i][0]; } vp9_zero(x->zcoeff_blk); if (cm->frame_type != KEY_FRAME && cpi->rc.frames_since_golden == 0 && !(cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) && !cpi->use_svc) cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); if (sf->partition_search_type == SOURCE_VAR_BASED_PARTITION) source_var_based_partition_search_method(cpi); } else if (gf_group_index && gf_group_index < MAX_ARF_GOP_SIZE && cpi->sf.enable_tpl_model) { TplDepFrame *tpl_frame = &cpi->tpl_stats[cpi->twopass.gf_group.index]; TplDepStats *tpl_stats = tpl_frame->tpl_stats_ptr; int tpl_stride = tpl_frame->stride; int64_t intra_cost_base = 0; int64_t mc_dep_cost_base = 0; int row, col; for (row = 0; row < cm->mi_rows && tpl_frame->is_valid; ++row) { for (col = 0; col < cm->mi_cols; ++col) { TplDepStats *this_stats = &tpl_stats[row * tpl_stride + col]; intra_cost_base += this_stats->intra_cost; mc_dep_cost_base += this_stats->mc_dep_cost; } } vpx_clear_system_state(); if (tpl_frame->is_valid) cpi->rd.r0 = (double)intra_cost_base / mc_dep_cost_base; } // Frame segmentation if (cpi->oxcf.aq_mode == PERCEPTUAL_AQ) build_kmeans_segmentation(cpi); { struct vpx_usec_timer emr_timer; vpx_usec_timer_start(&emr_timer); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { input_fpmb_stats(&cpi->twopass.firstpass_mb_stats, cm, &cpi->twopass.this_frame_mb_stats); } #endif if (!cpi->row_mt) { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; // If allowed, encoding tiles in parallel with one thread handling one // tile when row based multi-threading is disabled. if (VPXMIN(cpi->oxcf.max_threads, 1 << cm->log2_tile_cols) > 1) vp9_encode_tiles_mt(cpi); else encode_tiles(cpi); } else { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; vp9_encode_tiles_row_mt(cpi); } vpx_usec_timer_mark(&emr_timer); cpi->time_encode_sb_row += vpx_usec_timer_elapsed(&emr_timer); } sf->skip_encode_frame = sf->skip_encode_sb ? get_skip_encode_frame(cm, td) : 0; #if 0 // Keep record of the total distortion this time around for future use cpi->last_frame_distortion = cpi->frame_distortion; #endif } static INTERP_FILTER get_interp_filter( const int64_t threshes[SWITCHABLE_FILTER_CONTEXTS], int is_alt_ref) { if (!is_alt_ref && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP] && threshes[EIGHTTAP_SMOOTH] > threshes[EIGHTTAP_SHARP] && threshes[EIGHTTAP_SMOOTH] > threshes[SWITCHABLE - 1]) { return EIGHTTAP_SMOOTH; } else if (threshes[EIGHTTAP_SHARP] > threshes[EIGHTTAP] && threshes[EIGHTTAP_SHARP] > threshes[SWITCHABLE - 1]) { return EIGHTTAP_SHARP; } else if (threshes[EIGHTTAP] > threshes[SWITCHABLE - 1]) { return EIGHTTAP; } else { return SWITCHABLE; } } static int compute_frame_aq_offset(struct VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; struct segmentation *const seg = &cm->seg; int mi_row, mi_col; int sum_delta = 0; int map_index = 0; int qdelta_index; int segment_id; for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { MODE_INFO **mi_8x8 = mi_8x8_ptr; for (mi_col = 0; mi_col < cm->mi_cols; mi_col++, mi_8x8++) { segment_id = mi_8x8[0]->segment_id; qdelta_index = get_segdata(seg, segment_id, SEG_LVL_ALT_Q); sum_delta += qdelta_index; map_index++; } mi_8x8_ptr += cm->mi_stride; } return sum_delta / (cm->mi_rows * cm->mi_cols); } #if CONFIG_CONSISTENT_RECODE static void restore_encode_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; int i, j; RD_OPT *rd_opt = &cpi->rd; for (i = 0; i < MAX_REF_FRAMES; i++) { for (j = 0; j < REFERENCE_MODES; j++) rd_opt->prediction_type_threshes[i][j] = rd_opt->prediction_type_threshes_prev[i][j]; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) rd_opt->filter_threshes[i][j] = rd_opt->filter_threshes_prev[i][j]; } if (cpi->tile_data != NULL) { for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_col = 0; tile_col < tile_cols; ++tile_col) { TileDataEnc *tile_data = &cpi->tile_data[tile_row * tile_cols + tile_col]; for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact[i][j] = tile_data->thresh_freq_fact_prev[i][j]; } } } } cm->interp_filter = cpi->sf.default_interp_filter; } #endif void vp9_encode_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; #if CONFIG_CONSISTENT_RECODE restore_encode_params(cpi); #endif #if CONFIG_MISMATCH_DEBUG mismatch_reset_frame(MAX_MB_PLANE); #endif // In the longer term the encoder should be generalized to match the // decoder such that we allow compound where one of the 3 buffers has a // different sign bias and that buffer is then the fixed ref. However, this // requires further work in the rd loop. For now the only supported encoder // side behavior is where the ALT ref buffer has opposite sign bias to // the other two. if (!frame_is_intra_only(cm)) { if (vp9_compound_reference_allowed(cm)) { cpi->allow_comp_inter_inter = 1; vp9_setup_compound_reference_mode(cm); } else { cpi->allow_comp_inter_inter = 0; } } if (cpi->sf.frame_parameter_update) { int i; RD_OPT *const rd_opt = &cpi->rd; FRAME_COUNTS *counts = cpi->td.counts; RD_COUNTS *const rdc = &cpi->td.rd_counts; // This code does a single RD pass over the whole frame assuming // either compound, single or hybrid prediction as per whatever has // worked best for that type of frame in the past. // It also predicts whether another coding mode would have worked // better than this coding mode. If that is the case, it remembers // that for subsequent frames. // It also does the same analysis for transform size selection. const MV_REFERENCE_FRAME frame_type = get_frame_type(cpi); int64_t *const mode_thrs = rd_opt->prediction_type_threshes[frame_type]; int64_t *const filter_thrs = rd_opt->filter_threshes[frame_type]; const int is_alt_ref = frame_type == ALTREF_FRAME; /* prediction (compound, single or hybrid) mode selection */ if (is_alt_ref || !cpi->allow_comp_inter_inter) cm->reference_mode = SINGLE_REFERENCE; else if (mode_thrs[COMPOUND_REFERENCE] > mode_thrs[SINGLE_REFERENCE] && mode_thrs[COMPOUND_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT] && check_dual_ref_flags(cpi) && cpi->static_mb_pct == 100) cm->reference_mode = COMPOUND_REFERENCE; else if (mode_thrs[SINGLE_REFERENCE] > mode_thrs[REFERENCE_MODE_SELECT]) cm->reference_mode = SINGLE_REFERENCE; else cm->reference_mode = REFERENCE_MODE_SELECT; if (cm->interp_filter == SWITCHABLE) cm->interp_filter = get_interp_filter(filter_thrs, is_alt_ref); encode_frame_internal(cpi); for (i = 0; i < REFERENCE_MODES; ++i) mode_thrs[i] = (mode_thrs[i] + rdc->comp_pred_diff[i] / cm->MBs) / 2; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_thrs[i] = (filter_thrs[i] + rdc->filter_diff[i] / cm->MBs) / 2; if (cm->reference_mode == REFERENCE_MODE_SELECT) { int single_count_zero = 0; int comp_count_zero = 0; for (i = 0; i < COMP_INTER_CONTEXTS; i++) { single_count_zero += counts->comp_inter[i][0]; comp_count_zero += counts->comp_inter[i][1]; } if (comp_count_zero == 0) { cm->reference_mode = SINGLE_REFERENCE; vp9_zero(counts->comp_inter); } else if (single_count_zero == 0) { cm->reference_mode = COMPOUND_REFERENCE; vp9_zero(counts->comp_inter); } } if (cm->tx_mode == TX_MODE_SELECT) { int count4x4 = 0; int count8x8_lp = 0, count8x8_8x8p = 0; int count16x16_16x16p = 0, count16x16_lp = 0; int count32x32 = 0; for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { count4x4 += counts->tx.p32x32[i][TX_4X4]; count4x4 += counts->tx.p16x16[i][TX_4X4]; count4x4 += counts->tx.p8x8[i][TX_4X4]; count8x8_lp += counts->tx.p32x32[i][TX_8X8]; count8x8_lp += counts->tx.p16x16[i][TX_8X8]; count8x8_8x8p += counts->tx.p8x8[i][TX_8X8]; count16x16_16x16p += counts->tx.p16x16[i][TX_16X16]; count16x16_lp += counts->tx.p32x32[i][TX_16X16]; count32x32 += counts->tx.p32x32[i][TX_32X32]; } if (count4x4 == 0 && count16x16_lp == 0 && count16x16_16x16p == 0 && count32x32 == 0) { cm->tx_mode = ALLOW_8X8; reset_skip_tx_size(cm, TX_8X8); } else if (count8x8_8x8p == 0 && count16x16_16x16p == 0 && count8x8_lp == 0 && count16x16_lp == 0 && count32x32 == 0) { cm->tx_mode = ONLY_4X4; reset_skip_tx_size(cm, TX_4X4); } else if (count8x8_lp == 0 && count16x16_lp == 0 && count4x4 == 0) { cm->tx_mode = ALLOW_32X32; } else if (count32x32 == 0 && count8x8_lp == 0 && count4x4 == 0) { cm->tx_mode = ALLOW_16X16; reset_skip_tx_size(cm, TX_16X16); } } } else { FRAME_COUNTS *counts = cpi->td.counts; cm->reference_mode = SINGLE_REFERENCE; if (cpi->allow_comp_inter_inter && cpi->sf.use_compound_nonrd_pickmode && cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) cm->reference_mode = REFERENCE_MODE_SELECT; encode_frame_internal(cpi); if (cm->reference_mode == REFERENCE_MODE_SELECT) { int single_count_zero = 0; int comp_count_zero = 0; int i; for (i = 0; i < COMP_INTER_CONTEXTS; i++) { single_count_zero += counts->comp_inter[i][0]; comp_count_zero += counts->comp_inter[i][1]; } if (comp_count_zero == 0) { cm->reference_mode = SINGLE_REFERENCE; vp9_zero(counts->comp_inter); } else if (single_count_zero == 0) { cm->reference_mode = COMPOUND_REFERENCE; vp9_zero(counts->comp_inter); } } } // If segmented AQ is enabled compute the average AQ weighting. if (cm->seg.enabled && (cpi->oxcf.aq_mode != NO_AQ) && (cm->seg.update_map || cm->seg.update_data)) { cm->seg.aq_av_offset = compute_frame_aq_offset(cpi); } } static void sum_intra_stats(FRAME_COUNTS *counts, const MODE_INFO *mi) { const PREDICTION_MODE y_mode = mi->mode; const PREDICTION_MODE uv_mode = mi->uv_mode; const BLOCK_SIZE bsize = mi->sb_type; if (bsize < BLOCK_8X8) { int idx, idy; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; for (idy = 0; idy < 2; idy += num_4x4_h) for (idx = 0; idx < 2; idx += num_4x4_w) ++counts->y_mode[0][mi->bmi[idy * 2 + idx].as_mode]; } else { ++counts->y_mode[size_group_lookup[bsize]][y_mode]; } ++counts->uv_mode[y_mode][uv_mode]; } static void update_zeromv_cnt(VP9_COMP *const cpi, const MODE_INFO *const mi, int mi_row, int mi_col, BLOCK_SIZE bsize) { const VP9_COMMON *const cm = &cpi->common; MV mv = mi->mv[0].as_mv; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; const int xmis = VPXMIN(cm->mi_cols - mi_col, bw); const int ymis = VPXMIN(cm->mi_rows - mi_row, bh); const int block_index = mi_row * cm->mi_cols + mi_col; int x, y; for (y = 0; y < ymis; y++) for (x = 0; x < xmis; x++) { int map_offset = block_index + y * cm->mi_cols + x; if (mi->ref_frame[0] == LAST_FRAME && is_inter_block(mi) && mi->segment_id <= CR_SEGMENT_ID_BOOST2) { if (abs(mv.row) < 8 && abs(mv.col) < 8) { if (cpi->consec_zero_mv[map_offset] < 255) cpi->consec_zero_mv[map_offset]++; } else { cpi->consec_zero_mv[map_offset] = 0; } } } } static void encode_superblock(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int output_enabled, int mi_row, int mi_col, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; const int seg_skip = segfeature_active(&cm->seg, mi->segment_id, SEG_LVL_SKIP); x->skip_recode = !x->select_tx_size && mi->sb_type >= BLOCK_8X8 && cpi->oxcf.aq_mode != COMPLEXITY_AQ && cpi->oxcf.aq_mode != CYCLIC_REFRESH_AQ && cpi->sf.allow_skip_recode; if (!x->skip_recode && !cpi->sf.use_nonrd_pick_mode) memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); x->skip_optimize = ctx->is_coded; ctx->is_coded = 1; x->use_lp32x32fdct = cpi->sf.use_lp32x32fdct; x->skip_encode = (!output_enabled && cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH); if (x->skip_encode) return; if (!is_inter_block(mi)) { int plane; #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mi->uv_mode]) assert(0); #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH mi->skip = 1; for (plane = 0; plane < MAX_MB_PLANE; ++plane) vp9_encode_intra_block_plane(x, VPXMAX(bsize, BLOCK_8X8), plane, 1); if (output_enabled) sum_intra_stats(td->counts, mi); vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, VPXMAX(bsize, BLOCK_8X8)); } else { int ref; const int is_compound = has_second_ref(mi); set_ref_ptrs(cm, xd, mi->ref_frame[0], mi->ref_frame[1]); for (ref = 0; ref < 1 + is_compound; ++ref) { YV12_BUFFER_CONFIG *cfg = get_ref_frame_buffer(cpi, mi->ref_frame[ref]); assert(cfg != NULL); vp9_setup_pre_planes(xd, ref, cfg, mi_row, mi_col, &xd->block_refs[ref]->sf); } if (!(cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready) || seg_skip) vp9_build_inter_predictors_sby(xd, mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, VPXMAX(bsize, BLOCK_8X8)); #if CONFIG_MISMATCH_DEBUG if (output_enabled) { int plane; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { const struct macroblockd_plane *pd = &xd->plane[plane]; int pixel_c, pixel_r; const BLOCK_SIZE plane_bsize = get_plane_block_size(VPXMAX(bsize, BLOCK_8X8), &xd->plane[plane]); const int bw = get_block_width(plane_bsize); const int bh = get_block_height(plane_bsize); mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, 0, 0, pd->subsampling_x, pd->subsampling_y); mismatch_record_block_pre(pd->dst.buf, pd->dst.stride, plane, pixel_c, pixel_r, bw, bh, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); } } #endif vp9_encode_sb(x, VPXMAX(bsize, BLOCK_8X8), mi_row, mi_col, output_enabled); vp9_tokenize_sb(cpi, td, t, !output_enabled, seg_skip, VPXMAX(bsize, BLOCK_8X8)); } if (seg_skip) { assert(mi->skip); } if (output_enabled) { if (cm->tx_mode == TX_MODE_SELECT && mi->sb_type >= BLOCK_8X8 && !(is_inter_block(mi) && mi->skip)) { ++get_tx_counts(max_txsize_lookup[bsize], get_tx_size_context(xd), &td->counts->tx)[mi->tx_size]; } else { // The new intra coding scheme requires no change of transform size if (is_inter_block(mi)) { mi->tx_size = VPXMIN(tx_mode_to_biggest_tx_size[cm->tx_mode], max_txsize_lookup[bsize]); } else { mi->tx_size = (bsize >= BLOCK_8X8) ? mi->tx_size : TX_4X4; } } ++td->counts->tx.tx_totals[mi->tx_size]; ++td->counts->tx.tx_totals[get_uv_tx_size(mi, &xd->plane[1])]; if (cm->seg.enabled && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_sb_postencode(cpi, mi, mi_row, mi_col, bsize); if (cpi->oxcf.pass == 0 && cpi->svc.temporal_layer_id == 0 && (!cpi->use_svc || (cpi->use_svc && !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1))) update_zeromv_cnt(cpi, mi, mi_row, mi_col, bsize); } } libvpx-1.8.2/vp9/encoder/vp9_encodeframe.h000066400000000000000000000034611357355204000204120ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ #define VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif struct macroblock; struct yv12_buffer_config; struct VP9_COMP; struct ThreadData; // Constants used in SOURCE_VAR_BASED_PARTITION #define VAR_HIST_MAX_BG_VAR 1000 #define VAR_HIST_FACTOR 10 #define VAR_HIST_BINS (VAR_HIST_MAX_BG_VAR / VAR_HIST_FACTOR + 1) #define VAR_HIST_LARGE_CUT_OFF 75 #define VAR_HIST_SMALL_CUT_OFF 45 void vp9_setup_src_planes(struct macroblock *x, const struct yv12_buffer_config *src, int mi_row, int mi_col); void vp9_encode_frame(struct VP9_COMP *cpi); void vp9_init_tile_data(struct VP9_COMP *cpi); void vp9_encode_tile(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col); void vp9_encode_sb_row(struct VP9_COMP *cpi, struct ThreadData *td, int tile_row, int tile_col, int mi_row); void vp9_set_variance_partition_thresholds(struct VP9_COMP *cpi, int q, int content_state); struct KMEANS_DATA; void vp9_kmeans(double *ctr_ls, double *boundary_ls, int *count_ls, int k, struct KMEANS_DATA *arr, int size); int vp9_get_group_idx(double value, double *boundary_ls, int k); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ENCODEFRAME_H_ libvpx-1.8.2/vp9/encoder/vp9_encodemb.c000066400000000000000000001214421357355204000177110ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/quantize.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #if CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" #endif #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_scan.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_tokenize.h" struct optimize_ctx { ENTROPY_CONTEXT ta[MAX_MB_PLANE][16]; ENTROPY_CONTEXT tl[MAX_MB_PLANE][16]; }; void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &x->e_mbd.plane[plane]; const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int bw = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int bh = 4 * num_4x4_blocks_high_lookup[plane_bsize]; #if CONFIG_VP9_HIGHBITDEPTH if (x->e_mbd.cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, x->e_mbd.bd); return; } #endif // CONFIG_VP9_HIGHBITDEPTH vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); } static const int plane_rd_mult[REF_TYPES][PLANE_TYPES] = { { 10, 6 }, { 8, 5 }, }; // 'num' can be negative, but 'shift' must be non-negative. #define RIGHT_SHIFT_POSSIBLY_NEGATIVE(num, shift) \ (((num) >= 0) ? (num) >> (shift) : -((-(num)) >> (shift))) int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx) { MACROBLOCKD *const xd = &mb->e_mbd; struct macroblock_plane *const p = &mb->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const int ref = is_inter_block(xd->mi[0]); uint8_t token_cache[1024]; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const int eob = p->eobs[block]; const PLANE_TYPE plane_type = get_plane_type(plane); const int default_eob = 16 << (tx_size << 1); const int shift = (tx_size == TX_32X32); const int16_t *const dequant_ptr = pd->dequant; const uint8_t *const band_translate = get_band_translate(tx_size); const scan_order *const so = get_scan(xd, tx_size, plane_type, block); const int16_t *const scan = so->scan; const int16_t *const nb = so->neighbors; const MODE_INFO *mbmi = xd->mi[0]; const int sharpness = mb->sharpness; const int64_t rdadj = (int64_t)mb->rdmult * plane_rd_mult[ref][plane_type]; const int64_t rdmult = (sharpness == 0 ? rdadj >> 1 : (rdadj * (8 - sharpness + mbmi->segment_id)) >> 4); const int64_t rddiv = mb->rddiv; int64_t rd_cost0, rd_cost1; int64_t rate0, rate1; int16_t t0, t1; int i, final_eob; int count_high_values_after_eob = 0; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); #endif unsigned int(*const token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = mb->token_costs[tx_size][plane_type][ref]; unsigned int(*token_costs_cur)[2][COEFF_CONTEXTS][ENTROPY_TOKENS]; int64_t eob_cost0, eob_cost1; const int ctx0 = ctx; int64_t accu_rate = 0; // Initialized to the worst possible error for the largest transform size. // This ensures that it never goes negative. int64_t accu_error = ((int64_t)1) << 50; int64_t best_block_rd_cost = INT64_MAX; int x_prev = 1; tran_low_t before_best_eob_qc = 0; tran_low_t before_best_eob_dqc = 0; assert((!plane_type && !plane) || (plane_type && plane)); assert(eob <= default_eob); for (i = 0; i < eob; i++) { const int rc = scan[i]; token_cache[rc] = vp9_pt_energy_class[vp9_get_token(qcoeff[rc])]; } final_eob = 0; // Initial RD cost. token_costs_cur = token_costs + band_translate[0]; rate0 = (*token_costs_cur)[0][ctx0][EOB_TOKEN]; best_block_rd_cost = RDCOST(rdmult, rddiv, rate0, accu_error); // For each token, pick one of two choices greedily: // (i) First candidate: Keep current quantized value, OR // (ii) Second candidate: Reduce quantized value by 1. for (i = 0; i < eob; i++) { const int rc = scan[i]; const int x = qcoeff[rc]; const int band_cur = band_translate[i]; const int ctx_cur = (i == 0) ? ctx : get_coef_context(nb, token_cache, i); const int token_tree_sel_cur = (x_prev == 0); token_costs_cur = token_costs + band_cur; if (x == 0) { // No need to search const int token = vp9_get_token(x); rate0 = (*token_costs_cur)[token_tree_sel_cur][ctx_cur][token]; accu_rate += rate0; x_prev = 0; // Note: accu_error does not change. } else { const int dqv = dequant_ptr[rc != 0]; // Compute the distortion for quantizing to 0. const int diff_for_zero_raw = (0 - coeff[rc]) * (1 << shift); const int diff_for_zero = #if CONFIG_VP9_HIGHBITDEPTH (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff_for_zero_raw, xd->bd - 8) : #endif diff_for_zero_raw; const int64_t distortion_for_zero = (int64_t)diff_for_zero * diff_for_zero; // Compute the distortion for the first candidate const int diff0_raw = (dqcoeff[rc] - coeff[rc]) * (1 << shift); const int diff0 = #if CONFIG_VP9_HIGHBITDEPTH (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? RIGHT_SHIFT_POSSIBLY_NEGATIVE(diff0_raw, xd->bd - 8) : #endif // CONFIG_VP9_HIGHBITDEPTH diff0_raw; const int64_t distortion0 = (int64_t)diff0 * diff0; // Compute the distortion for the second candidate const int sign = -(x < 0); // -1 if x is negative and 0 otherwise. const int x1 = x - 2 * sign - 1; // abs(x1) = abs(x) - 1. int64_t distortion1; if (x1 != 0) { const int dqv_step = #if CONFIG_VP9_HIGHBITDEPTH (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? dqv >> (xd->bd - 8) : #endif // CONFIG_VP9_HIGHBITDEPTH dqv; const int diff_step = (dqv_step + sign) ^ sign; const int diff1 = diff0 - diff_step; assert(dqv > 0); // We aren't right shifting a negative number above. distortion1 = (int64_t)diff1 * diff1; } else { distortion1 = distortion_for_zero; } { // Calculate RDCost for current coeff for the two candidates. const int64_t base_bits0 = vp9_get_token_cost(x, &t0, cat6_high_cost); const int64_t base_bits1 = vp9_get_token_cost(x1, &t1, cat6_high_cost); rate0 = base_bits0 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t0]; rate1 = base_bits1 + (*token_costs_cur)[token_tree_sel_cur][ctx_cur][t1]; } { int rdcost_better_for_x1, eob_rdcost_better_for_x1; int dqc0, dqc1; int64_t best_eob_cost_cur; int use_x1; // Calculate RD Cost effect on the next coeff for the two candidates. int64_t next_bits0 = 0; int64_t next_bits1 = 0; int64_t next_eob_bits0 = 0; int64_t next_eob_bits1 = 0; if (i < default_eob - 1) { int ctx_next, token_tree_sel_next; const int band_next = band_translate[i + 1]; const int token_next = (i + 1 != eob) ? vp9_get_token(qcoeff[scan[i + 1]]) : EOB_TOKEN; unsigned int(*const token_costs_next)[2][COEFF_CONTEXTS] [ENTROPY_TOKENS] = token_costs + band_next; token_cache[rc] = vp9_pt_energy_class[t0]; ctx_next = get_coef_context(nb, token_cache, i + 1); token_tree_sel_next = (x == 0); next_bits0 = (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; next_eob_bits0 = (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; token_cache[rc] = vp9_pt_energy_class[t1]; ctx_next = get_coef_context(nb, token_cache, i + 1); token_tree_sel_next = (x1 == 0); next_bits1 = (*token_costs_next)[token_tree_sel_next][ctx_next][token_next]; if (x1 != 0) { next_eob_bits1 = (*token_costs_next)[token_tree_sel_next][ctx_next][EOB_TOKEN]; } } // Compare the total RD costs for two candidates. rd_cost0 = RDCOST(rdmult, rddiv, (rate0 + next_bits0), distortion0); rd_cost1 = RDCOST(rdmult, rddiv, (rate1 + next_bits1), distortion1); rdcost_better_for_x1 = (rd_cost1 < rd_cost0); eob_cost0 = RDCOST(rdmult, rddiv, (accu_rate + rate0 + next_eob_bits0), (accu_error + distortion0 - distortion_for_zero)); eob_cost1 = eob_cost0; if (x1 != 0) { eob_cost1 = RDCOST(rdmult, rddiv, (accu_rate + rate1 + next_eob_bits1), (accu_error + distortion1 - distortion_for_zero)); eob_rdcost_better_for_x1 = (eob_cost1 < eob_cost0); } else { eob_rdcost_better_for_x1 = 0; } // Calculate the two candidate de-quantized values. dqc0 = dqcoeff[rc]; dqc1 = 0; if (rdcost_better_for_x1 + eob_rdcost_better_for_x1) { if (x1 != 0) { dqc1 = RIGHT_SHIFT_POSSIBLY_NEGATIVE(x1 * dqv, shift); } else { dqc1 = 0; } } // Pick and record the better quantized and de-quantized values. if (rdcost_better_for_x1) { qcoeff[rc] = x1; dqcoeff[rc] = dqc1; accu_rate += rate1; accu_error += distortion1 - distortion_for_zero; assert(distortion1 <= distortion_for_zero); token_cache[rc] = vp9_pt_energy_class[t1]; } else { accu_rate += rate0; accu_error += distortion0 - distortion_for_zero; assert(distortion0 <= distortion_for_zero); token_cache[rc] = vp9_pt_energy_class[t0]; } if (sharpness > 0 && abs(qcoeff[rc]) > 1) count_high_values_after_eob++; assert(accu_error >= 0); x_prev = qcoeff[rc]; // Update based on selected quantized value. use_x1 = (x1 != 0) && eob_rdcost_better_for_x1; best_eob_cost_cur = use_x1 ? eob_cost1 : eob_cost0; // Determine whether to move the eob position to i+1 if (best_eob_cost_cur < best_block_rd_cost) { best_block_rd_cost = best_eob_cost_cur; final_eob = i + 1; count_high_values_after_eob = 0; if (use_x1) { before_best_eob_qc = x1; before_best_eob_dqc = dqc1; } else { before_best_eob_qc = x; before_best_eob_dqc = dqc0; } } } } } if (count_high_values_after_eob > 0) { final_eob = eob - 1; for (; final_eob >= 0; final_eob--) { const int rc = scan[final_eob]; const int x = qcoeff[rc]; if (x) { break; } } final_eob++; } else { assert(final_eob <= eob); if (final_eob > 0) { int rc; assert(before_best_eob_qc != 0); i = final_eob - 1; rc = scan[i]; qcoeff[rc] = before_best_eob_qc; dqcoeff[rc] = before_best_eob_dqc; } for (i = final_eob; i < eob; i++) { int rc = scan[i]; qcoeff[rc] = 0; dqcoeff[rc] = 0; } } mb->plane[plane].eobs[block] = final_eob; return final_eob; } #undef RIGHT_SHIFT_POSSIBLY_NEGATIVE static INLINE void fdct32x32(int rd_transform, const int16_t *src, tran_low_t *dst, int src_stride) { if (rd_transform) vpx_fdct32x32_rd(src, dst, src_stride); else vpx_fdct32x32(src, dst, src_stride); } #if CONFIG_VP9_HIGHBITDEPTH static INLINE void highbd_fdct32x32(int rd_transform, const int16_t *src, tran_low_t *dst, int src_stride) { if (rd_transform) vpx_highbd_fdct32x32_rd(src, dst, src_stride); else vpx_highbd_fdct32x32(src, dst, src_stride); } #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int16_t *src_diff; src_diff = &p->src_diff[4 * (row * diff_stride + col)]; // skip block condition should be handled before this is called. assert(!x->skip_block); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { case TX_32X32: highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_highbd_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_highbd_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; } return; } #endif // CONFIG_VP9_HIGHBITDEPTH switch (tx_size) { case TX_32X32: fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vp9_quantize_fp_32x32(coeff, 1024, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vpx_fdct16x16(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vpx_fdct8x8(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; } } void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int16_t *src_diff; src_diff = &p->src_diff[4 * (row * diff_stride + col)]; // skip block condition should be handled before this is called. assert(!x->skip_block); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { case TX_32X32: vpx_highbd_fdct32x32_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_16X16: vpx_highbd_fdct16x16_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_8X8: vpx_highbd_fdct8x8_1(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; } return; } #endif // CONFIG_VP9_HIGHBITDEPTH switch (tx_size) { case TX_32X32: vpx_fdct32x32_1(src_diff, coeff, diff_stride); vpx_quantize_dc_32x32(coeff, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_16X16: vpx_fdct16x16_1(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 256, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; case TX_8X8: vpx_fdct8x8_1(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 64, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_dc(coeff, 16, x->skip_block, p->round, p->quant_fp[0], qcoeff, dqcoeff, pd->dequant[0], eob); break; } } void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int16_t *src_diff; src_diff = &p->src_diff[4 * (row * diff_stride + col)]; // skip block condition should be handled before this is called. assert(!x->skip_block); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { switch (tx_size) { case TX_32X32: highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; } return; } #endif // CONFIG_VP9_HIGHBITDEPTH switch (tx_size) { case TX_32X32: fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_16X16: vpx_fdct16x16(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vpx_fdct8x8(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; } } static void encode_block(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; #if CONFIG_MISMATCH_DEBUG int mi_row = args->mi_row; int mi_col = args->mi_col; int output_enabled = args->output_enabled; #endif MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint8_t *dst; ENTROPY_CONTEXT *a, *l; dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; a = &args->ta[col]; l = &args->tl[row]; // TODO(jingning): per transformed block zero forcing only enabled for // luma component. will integrate chroma components as well. if (x->zcoeff_blk[tx_size][block] && plane == 0) { p->eobs[block] = 0; *a = *l = 0; #if CONFIG_MISMATCH_DEBUG goto encode_block_end; #else return; #endif } if (!x->skip_recode) { if (x->quant_fp) { // Encoding process for rtc mode if (x->skip_txfm[0] == SKIP_TXFM_AC_DC && plane == 0) { // skip forward transform p->eobs[block] = 0; *a = *l = 0; #if CONFIG_MISMATCH_DEBUG goto encode_block_end; #else return; #endif } else { vp9_xform_quant_fp(x, plane, block, row, col, plane_bsize, tx_size); } } else { if (max_txsize_lookup[plane_bsize] == tx_size) { int txfm_blk_index = (plane << 2) + (block >> (tx_size << 1)); if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_NONE) { // full forward transform and quantization vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } else if (x->skip_txfm[txfm_blk_index] == SKIP_TXFM_AC_ONLY) { // fast path forward transform and quantization vp9_xform_quant_dc(x, plane, block, row, col, plane_bsize, tx_size); } else { // skip forward transform p->eobs[block] = 0; *a = *l = 0; #if CONFIG_MISMATCH_DEBUG goto encode_block_end; #else return; #endif } } else { vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); } } } if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { const int ctx = combine_entropy_contexts(*a, *l); *a = *l = vp9_optimize_b(x, plane, block, tx_size, ctx) > 0; } else { *a = *l = p->eobs[block] > 0; } if (p->eobs[block]) *(args->skip) = 0; if (x->skip_encode || p->eobs[block] == 0) { #if CONFIG_MISMATCH_DEBUG goto encode_block_end; #else return; #endif } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); switch (tx_size) { case TX_32X32: vp9_highbd_idct32x32_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; case TX_16X16: vp9_highbd_idct16x16_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; case TX_8X8: vp9_highbd_idct8x8_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; default: assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->highbd_inv_txfm_add(dqcoeff, dst16, pd->dst.stride, p->eobs[block], xd->bd); break; } #if CONFIG_MISMATCH_DEBUG goto encode_block_end; #else return; #endif } #endif // CONFIG_VP9_HIGHBITDEPTH switch (tx_size) { case TX_32X32: vp9_idct32x32_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; case TX_16X16: vp9_idct16x16_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; case TX_8X8: vp9_idct8x8_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; default: assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); break; } #if CONFIG_MISMATCH_DEBUG encode_block_end: if (output_enabled) { int pixel_c, pixel_r; int blk_w = 1 << (tx_size + TX_UNIT_SIZE_LOG2); int blk_h = 1 << (tx_size + TX_UNIT_SIZE_LOG2); mi_to_pixel_loc(&pixel_c, &pixel_r, mi_col, mi_row, col, row, pd->subsampling_x, pd->subsampling_y); mismatch_record_block_tx(dst, pd->dst.stride, plane, pixel_c, pixel_r, blk_w, blk_h, xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH); } #endif } static void encode_block_pass1(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { MACROBLOCK *const x = (MACROBLOCK *)arg; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint8_t *dst; dst = &pd->dst.buf[4 * row * pd->dst.stride + 4 * col]; vp9_xform_quant(x, plane, block, row, col, plane_bsize, tx_size); if (p->eobs[block] > 0) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { x->highbd_inv_txfm_add(dqcoeff, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, p->eobs[block], xd->bd); return; } #endif // CONFIG_VP9_HIGHBITDEPTH x->inv_txfm_add(dqcoeff, dst, pd->dst.stride, p->eobs[block]); } } void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize) { vp9_subtract_plane(x, bsize, 0); vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, 0, encode_block_pass1, x); } void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int output_enabled) { MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; MODE_INFO *mi = xd->mi[0]; int plane; #if CONFIG_MISMATCH_DEBUG struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip, mi_row, mi_col, output_enabled }; #else struct encode_b_args arg = { x, 1, NULL, NULL, &mi->skip }; (void)mi_row; (void)mi_col; (void)output_enabled; #endif mi->skip = 1; if (x->skip) return; for (plane = 0; plane < MAX_MB_PLANE; ++plane) { if (!x->skip_recode) vp9_subtract_plane(x, bsize, plane); if (x->optimize && (!x->skip_recode || !x->skip_optimize)) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(mi, pd) : mi->tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); arg.enable_coeff_opt = 1; } else { arg.enable_coeff_opt = 0; } arg.ta = ctx.ta[plane]; arg.tl = ctx.tl[plane]; vp9_foreach_transformed_block_in_plane(xd, bsize, plane, encode_block, &arg); } } void vp9_encode_block_intra(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct encode_b_args *const args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; tran_low_t *coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const scan_order *scan_order; TX_TYPE tx_type = DCT_DCT; PREDICTION_MODE mode; const int bwl = b_width_log2_lookup[plane_bsize]; const int diff_stride = 4 * (1 << bwl); uint8_t *src, *dst; int16_t *src_diff; uint16_t *eob = &p->eobs[block]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; ENTROPY_CONTEXT *a = NULL; ENTROPY_CONTEXT *l = NULL; int entropy_ctx = 0; dst = &pd->dst.buf[4 * (row * dst_stride + col)]; src = &p->src.buf[4 * (row * src_stride + col)]; src_diff = &p->src_diff[4 * (row * diff_stride + col)]; if (args->enable_coeff_opt) { a = &args->ta[col]; l = &args->tl[row]; entropy_ctx = combine_entropy_contexts(*a, *l); } if (tx_size == TX_4X4) { tx_type = get_tx_type_4x4(get_plane_type(plane), xd, block); scan_order = &vp9_scan_orders[TX_4X4][tx_type]; mode = plane == 0 ? get_y_mode(xd->mi[0], block) : mi->uv_mode; } else { mode = plane == 0 ? mi->mode : mi->uv_mode; if (tx_size == TX_32X32) { scan_order = &vp9_default_scan_orders[TX_32X32]; } else { tx_type = get_tx_type(get_plane_type(plane), xd); scan_order = &vp9_scan_orders[tx_size][tx_type]; } } vp9_predict_intra_block( xd, bwl, tx_size, mode, (x->skip_encode || x->fp_src_pred) ? src : dst, (x->skip_encode || x->fp_src_pred) ? src_stride : dst_stride, dst, dst_stride, col, row, plane); // skip block condition should be handled before this is called. assert(!x->skip_block); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); switch (tx_size) { case TX_32X32: if (!x->skip_recode) { vpx_highbd_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); highbd_fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vpx_highbd_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { vp9_highbd_idct32x32_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; case TX_16X16: if (!x->skip_recode) { vpx_highbd_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); if (tx_type == DCT_DCT) vpx_highbd_fdct16x16(src_diff, coeff, diff_stride); else vp9_highbd_fht16x16(src_diff, coeff, diff_stride, tx_type); vpx_highbd_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { vp9_highbd_iht16x16_add(tx_type, dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; case TX_8X8: if (!x->skip_recode) { vpx_highbd_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); if (tx_type == DCT_DCT) vpx_highbd_fdct8x8(src_diff, coeff, diff_stride); else vp9_highbd_fht8x8(src_diff, coeff, diff_stride, tx_type); vpx_highbd_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { vp9_highbd_iht8x8_add(tx_type, dqcoeff, dst16, dst_stride, *eob, xd->bd); } break; default: assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_highbd_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride, xd->bd); if (tx_type != DCT_DCT) vp9_highbd_fht4x4(src_diff, coeff, diff_stride, tx_type); else x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_highbd_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) { // this is like vp9_short_idct4x4 but has a special case around // eob<=1 which is significant (not just an optimization) for the // lossless case. x->highbd_inv_txfm_add(dqcoeff, dst16, dst_stride, *eob, xd->bd); } else { vp9_highbd_iht4x4_16_add(dqcoeff, dst16, dst_stride, tx_type, xd->bd); } } break; } if (*eob) *(args->skip) = 0; return; } #endif // CONFIG_VP9_HIGHBITDEPTH switch (tx_size) { case TX_32X32: if (!x->skip_recode) { vpx_subtract_block(32, 32, src_diff, diff_stride, src, src_stride, dst, dst_stride); fdct32x32(x->use_lp32x32fdct, src_diff, coeff, diff_stride); vpx_quantize_b_32x32(coeff, 1024, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) vp9_idct32x32_add(dqcoeff, dst, dst_stride, *eob); break; case TX_16X16: if (!x->skip_recode) { vpx_subtract_block(16, 16, src_diff, diff_stride, src, src_stride, dst, dst_stride); vp9_fht16x16(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 256, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) vp9_iht16x16_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; case TX_8X8: if (!x->skip_recode) { vpx_subtract_block(8, 8, src_diff, diff_stride, src, src_stride, dst, dst_stride); vp9_fht8x8(src_diff, coeff, diff_stride, tx_type); vpx_quantize_b(coeff, 64, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) vp9_iht8x8_add(tx_type, dqcoeff, dst, dst_stride, *eob); break; default: assert(tx_size == TX_4X4); if (!x->skip_recode) { vpx_subtract_block(4, 4, src_diff, diff_stride, src, src_stride, dst, dst_stride); if (tx_type != DCT_DCT) vp9_fht4x4(src_diff, coeff, diff_stride, tx_type); else x->fwd_txfm4x4(src_diff, coeff, diff_stride); vpx_quantize_b(coeff, 16, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); } if (args->enable_coeff_opt && !x->skip_recode) { *a = *l = vp9_optimize_b(x, plane, block, tx_size, entropy_ctx) > 0; } if (!x->skip_encode && *eob) { if (tx_type == DCT_DCT) // this is like vp9_short_idct4x4 but has a special case around eob<=1 // which is significant (not just an optimization) for the lossless // case. x->inv_txfm_add(dqcoeff, dst, dst_stride, *eob); else vp9_iht4x4_16_add(dqcoeff, dst, dst_stride, tx_type); } break; } if (*eob) *(args->skip) = 0; } void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, int enable_optimize_b) { const MACROBLOCKD *const xd = &x->e_mbd; struct optimize_ctx ctx; #if CONFIG_MISMATCH_DEBUG // TODO(angiebird): make mismatch_debug support intra mode struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip, 0, 0, 0 }; #else struct encode_b_args arg = { x, enable_optimize_b, ctx.ta[plane], ctx.tl[plane], &xd->mi[0]->skip }; #endif if (enable_optimize_b && x->optimize && (!x->skip_recode || !x->skip_optimize)) { const struct macroblockd_plane *const pd = &xd->plane[plane]; const TX_SIZE tx_size = plane ? get_uv_tx_size(xd->mi[0], pd) : xd->mi[0]->tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, ctx.ta[plane], ctx.tl[plane]); } else { arg.enable_coeff_opt = 0; } vp9_foreach_transformed_block_in_plane(xd, bsize, plane, vp9_encode_block_intra, &arg); } libvpx-1.8.2/vp9/encoder/vp9_encodemb.h000066400000000000000000000036501357355204000177160ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_ENCODEMB_H_ #define VPX_VP9_ENCODER_VP9_ENCODEMB_H_ #include "./vpx_config.h" #include "vp9/encoder/vp9_block.h" #ifdef __cplusplus extern "C" { #endif struct encode_b_args { MACROBLOCK *x; int enable_coeff_opt; ENTROPY_CONTEXT *ta; ENTROPY_CONTEXT *tl; int8_t *skip; #if CONFIG_MISMATCH_DEBUG int mi_row; int mi_col; int output_enabled; #endif }; int vp9_optimize_b(MACROBLOCK *mb, int plane, int block, TX_SIZE tx_size, int ctx); void vp9_encode_sb(MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int output_enabled); void vp9_encode_sby_pass1(MACROBLOCK *x, BLOCK_SIZE bsize); void vp9_xform_quant_fp(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp9_xform_quant_dc(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp9_xform_quant(MACROBLOCK *x, int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size); void vp9_subtract_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); void vp9_encode_block_intra(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg); void vp9_encode_intra_block_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane, int enable_optimize_b); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ENCODEMB_H_ libvpx-1.8.2/vp9/encoder/vp9_encodemv.c000066400000000000000000000225741357355204000177430ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemv.h" #include "vpx_dsp/vpx_dsp_common.h" static struct vp9_token mv_joint_encodings[MV_JOINTS]; static struct vp9_token mv_class_encodings[MV_CLASSES]; static struct vp9_token mv_fp_encodings[MV_FP_SIZE]; void vp9_entropy_mv_init(void) { vp9_tokens_from_tree(mv_joint_encodings, vp9_mv_joint_tree); vp9_tokens_from_tree(mv_class_encodings, vp9_mv_class_tree); vp9_tokens_from_tree(mv_fp_encodings, vp9_mv_fp_tree); } static void encode_mv_component(vpx_writer *w, int comp, const nmv_component *mvcomp, int usehp) { int offset; const int sign = comp < 0; const int mag = sign ? -comp : comp; const int mv_class = vp9_get_mv_class(mag - 1, &offset); const int d = offset >> 3; // int mv data const int fr = (offset >> 1) & 3; // fractional mv data const int hp = offset & 1; // high precision mv data assert(comp != 0); // Sign vpx_write(w, sign, mvcomp->sign); // Class vp9_write_token(w, vp9_mv_class_tree, mvcomp->classes, &mv_class_encodings[mv_class]); // Integer bits if (mv_class == MV_CLASS_0) { vpx_write(w, d, mvcomp->class0[0]); } else { int i; const int n = mv_class + CLASS0_BITS - 1; // number of bits for (i = 0; i < n; ++i) vpx_write(w, (d >> i) & 1, mvcomp->bits[i]); } // Fractional bits vp9_write_token(w, vp9_mv_fp_tree, mv_class == MV_CLASS_0 ? mvcomp->class0_fp[d] : mvcomp->fp, &mv_fp_encodings[fr]); // High precision bit if (usehp) vpx_write(w, hp, mv_class == MV_CLASS_0 ? mvcomp->class0_hp : mvcomp->hp); } static void build_nmv_component_cost_table(int *mvcost, const nmv_component *const mvcomp, int usehp) { int sign_cost[2], class_cost[MV_CLASSES], class0_cost[CLASS0_SIZE]; int bits_cost[MV_OFFSET_BITS][2]; int class0_fp_cost[CLASS0_SIZE][MV_FP_SIZE], fp_cost[MV_FP_SIZE]; int class0_hp_cost[2], hp_cost[2]; int i; int c, o; sign_cost[0] = vp9_cost_zero(mvcomp->sign); sign_cost[1] = vp9_cost_one(mvcomp->sign); vp9_cost_tokens(class_cost, mvcomp->classes, vp9_mv_class_tree); vp9_cost_tokens(class0_cost, mvcomp->class0, vp9_mv_class0_tree); for (i = 0; i < MV_OFFSET_BITS; ++i) { bits_cost[i][0] = vp9_cost_zero(mvcomp->bits[i]); bits_cost[i][1] = vp9_cost_one(mvcomp->bits[i]); } for (i = 0; i < CLASS0_SIZE; ++i) vp9_cost_tokens(class0_fp_cost[i], mvcomp->class0_fp[i], vp9_mv_fp_tree); vp9_cost_tokens(fp_cost, mvcomp->fp, vp9_mv_fp_tree); // Always build the hp costs to avoid an uninitialized warning from gcc class0_hp_cost[0] = vp9_cost_zero(mvcomp->class0_hp); class0_hp_cost[1] = vp9_cost_one(mvcomp->class0_hp); hp_cost[0] = vp9_cost_zero(mvcomp->hp); hp_cost[1] = vp9_cost_one(mvcomp->hp); mvcost[0] = 0; // MV_CLASS_0 for (o = 0; o < (CLASS0_SIZE << 3); ++o) { int d, e, f; int cost = class_cost[MV_CLASS_0]; int v = o + 1; d = (o >> 3); /* int mv data */ f = (o >> 1) & 3; /* fractional pel mv data */ cost += class0_cost[d]; cost += class0_fp_cost[d][f]; if (usehp) { e = (o & 1); /* high precision mv data */ cost += class0_hp_cost[e]; } mvcost[v] = cost + sign_cost[0]; mvcost[-v] = cost + sign_cost[1]; } for (c = MV_CLASS_1; c < MV_CLASSES; ++c) { int d; for (d = 0; d < (1 << c); ++d) { int f; int whole_cost = class_cost[c]; int b = c + CLASS0_BITS - 1; /* number of bits */ for (i = 0; i < b; ++i) whole_cost += bits_cost[i][((d >> i) & 1)]; for (f = 0; f < 4; ++f) { int cost = whole_cost + fp_cost[f]; int v = (CLASS0_SIZE << (c + 2)) + d * 8 + f * 2 /* + e */ + 1; if (usehp) { mvcost[v] = cost + hp_cost[0] + sign_cost[0]; mvcost[-v] = cost + hp_cost[0] + sign_cost[1]; if (v + 1 > MV_MAX) break; mvcost[v + 1] = cost + hp_cost[1] + sign_cost[0]; mvcost[-v - 1] = cost + hp_cost[1] + sign_cost[1]; } else { mvcost[v] = cost + sign_cost[0]; mvcost[-v] = cost + sign_cost[1]; if (v + 1 > MV_MAX) break; mvcost[v + 1] = cost + sign_cost[0]; mvcost[-v - 1] = cost + sign_cost[1]; } } } } } static int update_mv(vpx_writer *w, const unsigned int ct[2], vpx_prob *cur_p, vpx_prob upd_p) { const vpx_prob new_p = get_binary_prob(ct[0], ct[1]) | 1; const int update = cost_branch256(ct, *cur_p) + vp9_cost_zero(upd_p) > cost_branch256(ct, new_p) + vp9_cost_one(upd_p) + (7 << VP9_PROB_COST_SHIFT); vpx_write(w, update, upd_p); if (update) { *cur_p = new_p; vpx_write_literal(w, new_p >> 1, 7); } return update; } static void write_mv_update(const vpx_tree_index *tree, vpx_prob probs[/*n - 1*/], const unsigned int counts[/*n - 1*/], int n, vpx_writer *w) { int i; unsigned int branch_ct[32][2]; // Assuming max number of probabilities <= 32 assert(n <= 32); vp9_tree_probs_from_distribution(tree, branch_ct, counts); for (i = 0; i < n - 1; ++i) update_mv(w, branch_ct[i], &probs[i], MV_UPDATE_PROB); } void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w, nmv_context_counts *const counts) { int i, j; nmv_context *const mvc = &cm->fc->nmvc; write_mv_update(vp9_mv_joint_tree, mvc->joints, counts->joints, MV_JOINTS, w); for (i = 0; i < 2; ++i) { nmv_component *comp = &mvc->comps[i]; nmv_component_counts *comp_counts = &counts->comps[i]; update_mv(w, comp_counts->sign, &comp->sign, MV_UPDATE_PROB); write_mv_update(vp9_mv_class_tree, comp->classes, comp_counts->classes, MV_CLASSES, w); write_mv_update(vp9_mv_class0_tree, comp->class0, comp_counts->class0, CLASS0_SIZE, w); for (j = 0; j < MV_OFFSET_BITS; ++j) update_mv(w, comp_counts->bits[j], &comp->bits[j], MV_UPDATE_PROB); } for (i = 0; i < 2; ++i) { for (j = 0; j < CLASS0_SIZE; ++j) write_mv_update(vp9_mv_fp_tree, mvc->comps[i].class0_fp[j], counts->comps[i].class0_fp[j], MV_FP_SIZE, w); write_mv_update(vp9_mv_fp_tree, mvc->comps[i].fp, counts->comps[i].fp, MV_FP_SIZE, w); } if (usehp) { for (i = 0; i < 2; ++i) { update_mv(w, counts->comps[i].class0_hp, &mvc->comps[i].class0_hp, MV_UPDATE_PROB); update_mv(w, counts->comps[i].hp, &mvc->comps[i].hp, MV_UPDATE_PROB); } } } void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref, const nmv_context *mvctx, int usehp, unsigned int *const max_mv_magnitude) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; const MV_JOINT_TYPE j = vp9_get_mv_joint(&diff); usehp = usehp && use_mv_hp(ref); vp9_write_token(w, vp9_mv_joint_tree, mvctx->joints, &mv_joint_encodings[j]); if (mv_joint_vertical(j)) encode_mv_component(w, diff.row, &mvctx->comps[0], usehp); if (mv_joint_horizontal(j)) encode_mv_component(w, diff.col, &mvctx->comps[1], usehp); // If auto_mv_step_size is enabled then keep track of the largest // motion vector component used. if (cpi->sf.mv.auto_mv_step_size) { const unsigned int maxv = VPXMAX(abs(mv->row), abs(mv->col)) >> 3; *max_mv_magnitude = VPXMAX(maxv, *max_mv_magnitude); } } void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context *ctx, int usehp) { vp9_cost_tokens(mvjoint, ctx->joints, vp9_mv_joint_tree); build_nmv_component_cost_table(mvcost[0], &ctx->comps[0], usehp); build_nmv_component_cost_table(mvcost[1], &ctx->comps[1], usehp); } static void inc_mvs(const MODE_INFO *mi, const MB_MODE_INFO_EXT *mbmi_ext, const int_mv mvs[2], nmv_context_counts *counts) { int i; for (i = 0; i < 1 + has_second_ref(mi); ++i) { const MV *ref = &mbmi_ext->ref_mvs[mi->ref_frame[i]][0].as_mv; const MV diff = { mvs[i].as_mv.row - ref->row, mvs[i].as_mv.col - ref->col }; vp9_inc_mv(&diff, counts); } } void vp9_update_mv_count(ThreadData *td) { const MACROBLOCKD *xd = &td->mb.e_mbd; const MODE_INFO *mi = xd->mi[0]; const MB_MODE_INFO_EXT *mbmi_ext = td->mb.mbmi_ext; if (mi->sb_type < BLOCK_8X8) { const int num_4x4_w = num_4x4_blocks_wide_lookup[mi->sb_type]; const int num_4x4_h = num_4x4_blocks_high_lookup[mi->sb_type]; int idx, idy; for (idy = 0; idy < 2; idy += num_4x4_h) { for (idx = 0; idx < 2; idx += num_4x4_w) { const int i = idy * 2 + idx; if (mi->bmi[i].as_mode == NEWMV) inc_mvs(mi, mbmi_ext, mi->bmi[i].as_mv, &td->counts->mv); } } } else { if (mi->mode == NEWMV) inc_mvs(mi, mbmi_ext, mi->mv, &td->counts->mv); } } libvpx-1.8.2/vp9/encoder/vp9_encodemv.h000066400000000000000000000022361357355204000177410ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_ENCODEMV_H_ #define VPX_VP9_ENCODER_VP9_ENCODEMV_H_ #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus extern "C" { #endif void vp9_entropy_mv_init(void); void vp9_write_nmv_probs(VP9_COMMON *cm, int usehp, vpx_writer *w, nmv_context_counts *const counts); void vp9_encode_mv(VP9_COMP *cpi, vpx_writer *w, const MV *mv, const MV *ref, const nmv_context *mvctx, int usehp, unsigned int *const max_mv_magnitude); void vp9_build_nmv_cost_table(int *mvjoint, int *mvcost[2], const nmv_context *ctx, int usehp); void vp9_update_mv_count(ThreadData *td); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ENCODEMV_H_ libvpx-1.8.2/vp9/encoder/vp9_encoder.c000066400000000000000000010473211357355204000175600ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_dsp/psnr.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #if CONFIG_INTERNAL_STATS #include "vpx_dsp/ssim.h" #endif #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_ports/vpx_timer.h" #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_filter.h" #include "vp9/common/vp9_idct.h" #if CONFIG_NON_GREEDY_MV #include "vp9/common/vp9_mvref_common.h" #endif #if CONFIG_VP9_POSTPROC #include "vp9/common/vp9_postproc.h" #endif #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/common/vp9_scan.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_alt_ref_aq.h" #include "vp9/encoder/vp9_aq_360.h" #include "vp9/encoder/vp9_aq_complexity.h" #endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_variance.h" #endif #include "vp9/encoder/vp9_bitstream.h" #if CONFIG_INTERNAL_STATS #include "vp9/encoder/vp9_blockiness.h" #endif #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mbgraph.h" #if CONFIG_NON_GREEDY_MV #include "vp9/encoder/vp9_mcomp.h" #endif #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_resize.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_skin_detection.h" #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vp9/vp9_cx_iface.h" #define AM_SEGMENT_ID_INACTIVE 7 #define AM_SEGMENT_ID_ACTIVE 0 // Whether to use high precision mv for altref computation. #define ALTREF_HIGH_PRECISION_MV 1 // Q threshold for high precision mv. Choose a very high value for now so that // HIGH_PRECISION is always chosen. #define HIGH_PRECISION_MV_QTHRESH 200 #define FRAME_SIZE_FACTOR 128 // empirical params for context model threshold #define FRAME_RATE_FACTOR 8 #ifdef OUTPUT_YUV_DENOISED FILE *yuv_denoised_file = NULL; #endif #ifdef OUTPUT_YUV_SKINMAP static FILE *yuv_skinmap_file = NULL; #endif #ifdef OUTPUT_YUV_REC FILE *yuv_rec_file; #endif #ifdef OUTPUT_YUV_SVC_SRC FILE *yuv_svc_src[3] = { NULL, NULL, NULL }; #endif #if 0 FILE *framepsnr; FILE *kf_list; FILE *keyfile; #endif #ifdef ENABLE_KF_DENOISE // Test condition for spatial denoise of source. static int is_spatial_denoise_enabled(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; return (oxcf->pass != 1) && !is_lossless_requested(&cpi->oxcf) && frame_is_intra_only(cm); } #endif #if CONFIG_VP9_HIGHBITDEPTH void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size); #endif void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size); #if !CONFIG_REALTIME_ONLY // compute adaptive threshold for skip recoding static int compute_context_model_thresh(const VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int frame_size = (cm->width * cm->height) >> 10; const int bitrate = (int)(oxcf->target_bandwidth >> 10); const int qindex_factor = cm->base_qindex + (MAXQ >> 1); // This equation makes the threshold adaptive to frame size. // Coding gain obtained by recoding comes from alternate frames of large // content change. We skip recoding if the difference of previous and current // frame context probability model is less than a certain threshold. // The first component is the most critical part to guarantee adaptivity. // Other parameters are estimated based on normal setting of hd resolution // parameters. e.g frame_size = 1920x1080, bitrate = 8000, qindex_factor < 50 const int thresh = ((FRAME_SIZE_FACTOR * frame_size - FRAME_RATE_FACTOR * bitrate) * qindex_factor) >> 9; return thresh; } // compute the total cost difference between current // and previous frame context prob model. static int compute_context_model_diff(const VP9_COMMON *const cm) { const FRAME_CONTEXT *const pre_fc = &cm->frame_contexts[cm->frame_context_idx]; const FRAME_CONTEXT *const cur_fc = cm->fc; const FRAME_COUNTS *counts = &cm->counts; vpx_prob pre_last_prob, cur_last_prob; int diff = 0; int i, j, k, l, m, n; // y_mode_prob for (i = 0; i < BLOCK_SIZE_GROUPS; ++i) { for (j = 0; j < INTRA_MODES - 1; ++j) { diff += (int)counts->y_mode[i][j] * (pre_fc->y_mode_prob[i][j] - cur_fc->y_mode_prob[i][j]); } pre_last_prob = MAX_PROB - pre_fc->y_mode_prob[i][INTRA_MODES - 2]; cur_last_prob = MAX_PROB - cur_fc->y_mode_prob[i][INTRA_MODES - 2]; diff += (int)counts->y_mode[i][INTRA_MODES - 1] * (pre_last_prob - cur_last_prob); } // uv_mode_prob for (i = 0; i < INTRA_MODES; ++i) { for (j = 0; j < INTRA_MODES - 1; ++j) { diff += (int)counts->uv_mode[i][j] * (pre_fc->uv_mode_prob[i][j] - cur_fc->uv_mode_prob[i][j]); } pre_last_prob = MAX_PROB - pre_fc->uv_mode_prob[i][INTRA_MODES - 2]; cur_last_prob = MAX_PROB - cur_fc->uv_mode_prob[i][INTRA_MODES - 2]; diff += (int)counts->uv_mode[i][INTRA_MODES - 1] * (pre_last_prob - cur_last_prob); } // partition_prob for (i = 0; i < PARTITION_CONTEXTS; ++i) { for (j = 0; j < PARTITION_TYPES - 1; ++j) { diff += (int)counts->partition[i][j] * (pre_fc->partition_prob[i][j] - cur_fc->partition_prob[i][j]); } pre_last_prob = MAX_PROB - pre_fc->partition_prob[i][PARTITION_TYPES - 2]; cur_last_prob = MAX_PROB - cur_fc->partition_prob[i][PARTITION_TYPES - 2]; diff += (int)counts->partition[i][PARTITION_TYPES - 1] * (pre_last_prob - cur_last_prob); } // coef_probs for (i = 0; i < TX_SIZES; ++i) { for (j = 0; j < PLANE_TYPES; ++j) { for (k = 0; k < REF_TYPES; ++k) { for (l = 0; l < COEF_BANDS; ++l) { for (m = 0; m < BAND_COEFF_CONTEXTS(l); ++m) { for (n = 0; n < UNCONSTRAINED_NODES; ++n) { diff += (int)counts->coef[i][j][k][l][m][n] * (pre_fc->coef_probs[i][j][k][l][m][n] - cur_fc->coef_probs[i][j][k][l][m][n]); } pre_last_prob = MAX_PROB - pre_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1]; cur_last_prob = MAX_PROB - cur_fc->coef_probs[i][j][k][l][m][UNCONSTRAINED_NODES - 1]; diff += (int)counts->coef[i][j][k][l][m][UNCONSTRAINED_NODES] * (pre_last_prob - cur_last_prob); } } } } } // switchable_interp_prob for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { for (j = 0; j < SWITCHABLE_FILTERS - 1; ++j) { diff += (int)counts->switchable_interp[i][j] * (pre_fc->switchable_interp_prob[i][j] - cur_fc->switchable_interp_prob[i][j]); } pre_last_prob = MAX_PROB - pre_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2]; cur_last_prob = MAX_PROB - cur_fc->switchable_interp_prob[i][SWITCHABLE_FILTERS - 2]; diff += (int)counts->switchable_interp[i][SWITCHABLE_FILTERS - 1] * (pre_last_prob - cur_last_prob); } // inter_mode_probs for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { for (j = 0; j < INTER_MODES - 1; ++j) { diff += (int)counts->inter_mode[i][j] * (pre_fc->inter_mode_probs[i][j] - cur_fc->inter_mode_probs[i][j]); } pre_last_prob = MAX_PROB - pre_fc->inter_mode_probs[i][INTER_MODES - 2]; cur_last_prob = MAX_PROB - cur_fc->inter_mode_probs[i][INTER_MODES - 2]; diff += (int)counts->inter_mode[i][INTER_MODES - 1] * (pre_last_prob - cur_last_prob); } // intra_inter_prob for (i = 0; i < INTRA_INTER_CONTEXTS; ++i) { diff += (int)counts->intra_inter[i][0] * (pre_fc->intra_inter_prob[i] - cur_fc->intra_inter_prob[i]); pre_last_prob = MAX_PROB - pre_fc->intra_inter_prob[i]; cur_last_prob = MAX_PROB - cur_fc->intra_inter_prob[i]; diff += (int)counts->intra_inter[i][1] * (pre_last_prob - cur_last_prob); } // comp_inter_prob for (i = 0; i < COMP_INTER_CONTEXTS; ++i) { diff += (int)counts->comp_inter[i][0] * (pre_fc->comp_inter_prob[i] - cur_fc->comp_inter_prob[i]); pre_last_prob = MAX_PROB - pre_fc->comp_inter_prob[i]; cur_last_prob = MAX_PROB - cur_fc->comp_inter_prob[i]; diff += (int)counts->comp_inter[i][1] * (pre_last_prob - cur_last_prob); } // single_ref_prob for (i = 0; i < REF_CONTEXTS; ++i) { for (j = 0; j < 2; ++j) { diff += (int)counts->single_ref[i][j][0] * (pre_fc->single_ref_prob[i][j] - cur_fc->single_ref_prob[i][j]); pre_last_prob = MAX_PROB - pre_fc->single_ref_prob[i][j]; cur_last_prob = MAX_PROB - cur_fc->single_ref_prob[i][j]; diff += (int)counts->single_ref[i][j][1] * (pre_last_prob - cur_last_prob); } } // comp_ref_prob for (i = 0; i < REF_CONTEXTS; ++i) { diff += (int)counts->comp_ref[i][0] * (pre_fc->comp_ref_prob[i] - cur_fc->comp_ref_prob[i]); pre_last_prob = MAX_PROB - pre_fc->comp_ref_prob[i]; cur_last_prob = MAX_PROB - cur_fc->comp_ref_prob[i]; diff += (int)counts->comp_ref[i][1] * (pre_last_prob - cur_last_prob); } // tx_probs for (i = 0; i < TX_SIZE_CONTEXTS; ++i) { // p32x32 for (j = 0; j < TX_SIZES - 1; ++j) { diff += (int)counts->tx.p32x32[i][j] * (pre_fc->tx_probs.p32x32[i][j] - cur_fc->tx_probs.p32x32[i][j]); } pre_last_prob = MAX_PROB - pre_fc->tx_probs.p32x32[i][TX_SIZES - 2]; cur_last_prob = MAX_PROB - cur_fc->tx_probs.p32x32[i][TX_SIZES - 2]; diff += (int)counts->tx.p32x32[i][TX_SIZES - 1] * (pre_last_prob - cur_last_prob); // p16x16 for (j = 0; j < TX_SIZES - 2; ++j) { diff += (int)counts->tx.p16x16[i][j] * (pre_fc->tx_probs.p16x16[i][j] - cur_fc->tx_probs.p16x16[i][j]); } pre_last_prob = MAX_PROB - pre_fc->tx_probs.p16x16[i][TX_SIZES - 3]; cur_last_prob = MAX_PROB - cur_fc->tx_probs.p16x16[i][TX_SIZES - 3]; diff += (int)counts->tx.p16x16[i][TX_SIZES - 2] * (pre_last_prob - cur_last_prob); // p8x8 for (j = 0; j < TX_SIZES - 3; ++j) { diff += (int)counts->tx.p8x8[i][j] * (pre_fc->tx_probs.p8x8[i][j] - cur_fc->tx_probs.p8x8[i][j]); } pre_last_prob = MAX_PROB - pre_fc->tx_probs.p8x8[i][TX_SIZES - 4]; cur_last_prob = MAX_PROB - cur_fc->tx_probs.p8x8[i][TX_SIZES - 4]; diff += (int)counts->tx.p8x8[i][TX_SIZES - 3] * (pre_last_prob - cur_last_prob); } // skip_probs for (i = 0; i < SKIP_CONTEXTS; ++i) { diff += (int)counts->skip[i][0] * (pre_fc->skip_probs[i] - cur_fc->skip_probs[i]); pre_last_prob = MAX_PROB - pre_fc->skip_probs[i]; cur_last_prob = MAX_PROB - cur_fc->skip_probs[i]; diff += (int)counts->skip[i][1] * (pre_last_prob - cur_last_prob); } // mv for (i = 0; i < MV_JOINTS - 1; ++i) { diff += (int)counts->mv.joints[i] * (pre_fc->nmvc.joints[i] - cur_fc->nmvc.joints[i]); } pre_last_prob = MAX_PROB - pre_fc->nmvc.joints[MV_JOINTS - 2]; cur_last_prob = MAX_PROB - cur_fc->nmvc.joints[MV_JOINTS - 2]; diff += (int)counts->mv.joints[MV_JOINTS - 1] * (pre_last_prob - cur_last_prob); for (i = 0; i < 2; ++i) { const nmv_component_counts *nmv_count = &counts->mv.comps[i]; const nmv_component *pre_nmv_prob = &pre_fc->nmvc.comps[i]; const nmv_component *cur_nmv_prob = &cur_fc->nmvc.comps[i]; // sign diff += (int)nmv_count->sign[0] * (pre_nmv_prob->sign - cur_nmv_prob->sign); pre_last_prob = MAX_PROB - pre_nmv_prob->sign; cur_last_prob = MAX_PROB - cur_nmv_prob->sign; diff += (int)nmv_count->sign[1] * (pre_last_prob - cur_last_prob); // classes for (j = 0; j < MV_CLASSES - 1; ++j) { diff += (int)nmv_count->classes[j] * (pre_nmv_prob->classes[j] - cur_nmv_prob->classes[j]); } pre_last_prob = MAX_PROB - pre_nmv_prob->classes[MV_CLASSES - 2]; cur_last_prob = MAX_PROB - cur_nmv_prob->classes[MV_CLASSES - 2]; diff += (int)nmv_count->classes[MV_CLASSES - 1] * (pre_last_prob - cur_last_prob); // class0 for (j = 0; j < CLASS0_SIZE - 1; ++j) { diff += (int)nmv_count->class0[j] * (pre_nmv_prob->class0[j] - cur_nmv_prob->class0[j]); } pre_last_prob = MAX_PROB - pre_nmv_prob->class0[CLASS0_SIZE - 2]; cur_last_prob = MAX_PROB - cur_nmv_prob->class0[CLASS0_SIZE - 2]; diff += (int)nmv_count->class0[CLASS0_SIZE - 1] * (pre_last_prob - cur_last_prob); // bits for (j = 0; j < MV_OFFSET_BITS; ++j) { diff += (int)nmv_count->bits[j][0] * (pre_nmv_prob->bits[j] - cur_nmv_prob->bits[j]); pre_last_prob = MAX_PROB - pre_nmv_prob->bits[j]; cur_last_prob = MAX_PROB - cur_nmv_prob->bits[j]; diff += (int)nmv_count->bits[j][1] * (pre_last_prob - cur_last_prob); } // class0_fp for (j = 0; j < CLASS0_SIZE; ++j) { for (k = 0; k < MV_FP_SIZE - 1; ++k) { diff += (int)nmv_count->class0_fp[j][k] * (pre_nmv_prob->class0_fp[j][k] - cur_nmv_prob->class0_fp[j][k]); } pre_last_prob = MAX_PROB - pre_nmv_prob->class0_fp[j][MV_FP_SIZE - 2]; cur_last_prob = MAX_PROB - cur_nmv_prob->class0_fp[j][MV_FP_SIZE - 2]; diff += (int)nmv_count->class0_fp[j][MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob); } // fp for (j = 0; j < MV_FP_SIZE - 1; ++j) { diff += (int)nmv_count->fp[j] * (pre_nmv_prob->fp[j] - cur_nmv_prob->fp[j]); } pre_last_prob = MAX_PROB - pre_nmv_prob->fp[MV_FP_SIZE - 2]; cur_last_prob = MAX_PROB - cur_nmv_prob->fp[MV_FP_SIZE - 2]; diff += (int)nmv_count->fp[MV_FP_SIZE - 1] * (pre_last_prob - cur_last_prob); // class0_hp diff += (int)nmv_count->class0_hp[0] * (pre_nmv_prob->class0_hp - cur_nmv_prob->class0_hp); pre_last_prob = MAX_PROB - pre_nmv_prob->class0_hp; cur_last_prob = MAX_PROB - cur_nmv_prob->class0_hp; diff += (int)nmv_count->class0_hp[1] * (pre_last_prob - cur_last_prob); // hp diff += (int)nmv_count->hp[0] * (pre_nmv_prob->hp - cur_nmv_prob->hp); pre_last_prob = MAX_PROB - pre_nmv_prob->hp; cur_last_prob = MAX_PROB - cur_nmv_prob->hp; diff += (int)nmv_count->hp[1] * (pre_last_prob - cur_last_prob); } return -diff; } #endif // !CONFIG_REALTIME_ONLY // Test for whether to calculate metrics for the frame. static int is_psnr_calc_enabled(const VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; return cpi->b_calculate_psnr && (oxcf->pass != 1) && cm->show_frame; } /* clang-format off */ const Vp9LevelSpec vp9_level_defs[VP9_LEVELS] = { // sample rate size breadth bitrate cpb { LEVEL_1, 829440, 36864, 512, 200, 400, 2, 1, 4, 8 }, { LEVEL_1_1, 2764800, 73728, 768, 800, 1000, 2, 1, 4, 8 }, { LEVEL_2, 4608000, 122880, 960, 1800, 1500, 2, 1, 4, 8 }, { LEVEL_2_1, 9216000, 245760, 1344, 3600, 2800, 2, 2, 4, 8 }, { LEVEL_3, 20736000, 552960, 2048, 7200, 6000, 2, 4, 4, 8 }, { LEVEL_3_1, 36864000, 983040, 2752, 12000, 10000, 2, 4, 4, 8 }, { LEVEL_4, 83558400, 2228224, 4160, 18000, 16000, 4, 4, 4, 8 }, { LEVEL_4_1, 160432128, 2228224, 4160, 30000, 18000, 4, 4, 5, 6 }, { LEVEL_5, 311951360, 8912896, 8384, 60000, 36000, 6, 8, 6, 4 }, { LEVEL_5_1, 588251136, 8912896, 8384, 120000, 46000, 8, 8, 10, 4 }, // TODO(huisu): update max_cpb_size for level 5_2 ~ 6_2 when // they are finalized (currently tentative). { LEVEL_5_2, 1176502272, 8912896, 8384, 180000, 90000, 8, 8, 10, 4 }, { LEVEL_6, 1176502272, 35651584, 16832, 180000, 90000, 8, 16, 10, 4 }, { LEVEL_6_1, 2353004544u, 35651584, 16832, 240000, 180000, 8, 16, 10, 4 }, { LEVEL_6_2, 4706009088u, 35651584, 16832, 480000, 360000, 8, 16, 10, 4 }, }; /* clang-format on */ static const char *level_fail_messages[TARGET_LEVEL_FAIL_IDS] = { "The average bit-rate is too high.", "The picture size is too large.", "The picture width/height is too large.", "The luma sample rate is too large.", "The CPB size is too large.", "The compression ratio is too small", "Too many column tiles are used.", "The alt-ref distance is too small.", "Too many reference buffers are used." }; static INLINE void Scale2Ratio(VPX_SCALING mode, int *hr, int *hs) { switch (mode) { case NORMAL: *hr = 1; *hs = 1; break; case FOURFIVE: *hr = 4; *hs = 5; break; case THREEFIVE: *hr = 3; *hs = 5; break; default: assert(mode == ONETWO); *hr = 1; *hs = 2; break; } } // Mark all inactive blocks as active. Other segmentation features may be set // so memset cannot be used, instead only inactive blocks should be reset. static void suppress_active_map(VP9_COMP *cpi) { unsigned char *const seg_map = cpi->segmentation_map; if (cpi->active_map.enabled || cpi->active_map.update) { const int rows = cpi->common.mi_rows; const int cols = cpi->common.mi_cols; int i; for (i = 0; i < rows * cols; ++i) if (seg_map[i] == AM_SEGMENT_ID_INACTIVE) seg_map[i] = AM_SEGMENT_ID_ACTIVE; } } static void apply_active_map(VP9_COMP *cpi) { struct segmentation *const seg = &cpi->common.seg; unsigned char *const seg_map = cpi->segmentation_map; const unsigned char *const active_map = cpi->active_map.map; int i; assert(AM_SEGMENT_ID_ACTIVE == CR_SEGMENT_ID_BASE); if (frame_is_intra_only(&cpi->common)) { cpi->active_map.enabled = 0; cpi->active_map.update = 1; } if (cpi->active_map.update) { if (cpi->active_map.enabled) { for (i = 0; i < cpi->common.mi_rows * cpi->common.mi_cols; ++i) if (seg_map[i] == AM_SEGMENT_ID_ACTIVE) seg_map[i] = active_map[i]; vp9_enable_segmentation(seg); vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); vp9_enable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF); // Setting the data to -MAX_LOOP_FILTER will result in the computed loop // filter level being zero regardless of the value of seg->abs_delta. vp9_set_segdata(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF, -MAX_LOOP_FILTER); } else { vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_SKIP); vp9_disable_segfeature(seg, AM_SEGMENT_ID_INACTIVE, SEG_LVL_ALT_LF); if (seg->enabled) { seg->update_data = 1; seg->update_map = 1; } } cpi->active_map.update = 0; } } static void apply_roi_map(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; struct segmentation *const seg = &cm->seg; vpx_roi_map_t *roi = &cpi->roi; const int *delta_q = roi->delta_q; const int *delta_lf = roi->delta_lf; const int *skip = roi->skip; int ref_frame[8]; int internal_delta_q[MAX_SEGMENTS]; int i; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; // TODO(jianj): Investigate why ROI not working in speed < 5 or in non // realtime mode. if (cpi->oxcf.mode != REALTIME || cpi->oxcf.speed < 5) return; if (!roi->enabled) return; memcpy(&ref_frame, roi->ref_frame, sizeof(ref_frame)); vp9_enable_segmentation(seg); vp9_clearall_segfeatures(seg); // Select delta coding method; seg->abs_delta = SEGMENT_DELTADATA; memcpy(cpi->segmentation_map, roi->roi_map, (cm->mi_rows * cm->mi_cols)); for (i = 0; i < MAX_SEGMENTS; ++i) { // Translate the external delta q values to internal values. internal_delta_q[i] = vp9_quantizer_to_qindex(abs(delta_q[i])); if (delta_q[i] < 0) internal_delta_q[i] = -internal_delta_q[i]; vp9_disable_segfeature(seg, i, SEG_LVL_ALT_Q); vp9_disable_segfeature(seg, i, SEG_LVL_ALT_LF); if (internal_delta_q[i] != 0) { vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, internal_delta_q[i]); } if (delta_lf[i] != 0) { vp9_enable_segfeature(seg, i, SEG_LVL_ALT_LF); vp9_set_segdata(seg, i, SEG_LVL_ALT_LF, delta_lf[i]); } if (skip[i] != 0) { vp9_enable_segfeature(seg, i, SEG_LVL_SKIP); vp9_set_segdata(seg, i, SEG_LVL_SKIP, skip[i]); } if (ref_frame[i] >= 0) { int valid_ref = 1; // ALTREF is not used as reference for nonrd_pickmode with 0 lag. if (ref_frame[i] == ALTREF_FRAME && cpi->sf.use_nonrd_pick_mode) valid_ref = 0; // If GOLDEN is selected, make sure it's set as reference. if (ref_frame[i] == GOLDEN_FRAME && !(cpi->ref_frame_flags & flag_list[ref_frame[i]])) { valid_ref = 0; } // GOLDEN was updated in previous encoded frame, so GOLDEN and LAST are // same reference. if (ref_frame[i] == GOLDEN_FRAME && cpi->rc.frames_since_golden == 0) ref_frame[i] = LAST_FRAME; if (valid_ref) { vp9_enable_segfeature(seg, i, SEG_LVL_REF_FRAME); vp9_set_segdata(seg, i, SEG_LVL_REF_FRAME, ref_frame[i]); } } } roi->enabled = 1; } static void init_level_info(Vp9LevelInfo *level_info) { Vp9LevelStats *const level_stats = &level_info->level_stats; Vp9LevelSpec *const level_spec = &level_info->level_spec; memset(level_stats, 0, sizeof(*level_stats)); memset(level_spec, 0, sizeof(*level_spec)); level_spec->level = LEVEL_UNKNOWN; level_spec->min_altref_distance = INT_MAX; } static int check_seg_range(int seg_data[8], int range) { return !(abs(seg_data[0]) > range || abs(seg_data[1]) > range || abs(seg_data[2]) > range || abs(seg_data[3]) > range || abs(seg_data[4]) > range || abs(seg_data[5]) > range || abs(seg_data[6]) > range || abs(seg_data[7]) > range); } VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec) { int i; const Vp9LevelSpec *this_level; vpx_clear_system_state(); for (i = 0; i < VP9_LEVELS; ++i) { this_level = &vp9_level_defs[i]; if ((double)level_spec->max_luma_sample_rate > (double)this_level->max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P) || level_spec->max_luma_picture_size > this_level->max_luma_picture_size || level_spec->max_luma_picture_breadth > this_level->max_luma_picture_breadth || level_spec->average_bitrate > this_level->average_bitrate || level_spec->max_cpb_size > this_level->max_cpb_size || level_spec->compression_ratio < this_level->compression_ratio || level_spec->max_col_tiles > this_level->max_col_tiles || level_spec->min_altref_distance < this_level->min_altref_distance || level_spec->max_ref_frame_buffers > this_level->max_ref_frame_buffers) continue; break; } return (i == VP9_LEVELS) ? LEVEL_UNKNOWN : vp9_level_defs[i].level; } int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[8], int delta_lf[8], int skip[8], int ref_frame[8]) { VP9_COMMON *cm = &cpi->common; vpx_roi_map_t *roi = &cpi->roi; const int range = 63; const int ref_frame_range = 3; // Alt-ref const int skip_range = 1; const int frame_rows = cpi->common.mi_rows; const int frame_cols = cpi->common.mi_cols; // Check number of rows and columns match if (frame_rows != (int)rows || frame_cols != (int)cols) { return -1; } if (!check_seg_range(delta_q, range) || !check_seg_range(delta_lf, range) || !check_seg_range(ref_frame, ref_frame_range) || !check_seg_range(skip, skip_range)) return -1; // Also disable segmentation if no deltas are specified. if (!map || (!(delta_q[0] | delta_q[1] | delta_q[2] | delta_q[3] | delta_q[4] | delta_q[5] | delta_q[6] | delta_q[7] | delta_lf[0] | delta_lf[1] | delta_lf[2] | delta_lf[3] | delta_lf[4] | delta_lf[5] | delta_lf[6] | delta_lf[7] | skip[0] | skip[1] | skip[2] | skip[3] | skip[4] | skip[5] | skip[6] | skip[7]) && (ref_frame[0] == -1 && ref_frame[1] == -1 && ref_frame[2] == -1 && ref_frame[3] == -1 && ref_frame[4] == -1 && ref_frame[5] == -1 && ref_frame[6] == -1 && ref_frame[7] == -1))) { vp9_disable_segmentation(&cm->seg); cpi->roi.enabled = 0; return 0; } if (roi->roi_map) { vpx_free(roi->roi_map); roi->roi_map = NULL; } CHECK_MEM_ERROR(cm, roi->roi_map, vpx_malloc(rows * cols)); // Copy to ROI sturcture in the compressor. memcpy(roi->roi_map, map, rows * cols); memcpy(&roi->delta_q, delta_q, MAX_SEGMENTS * sizeof(delta_q[0])); memcpy(&roi->delta_lf, delta_lf, MAX_SEGMENTS * sizeof(delta_lf[0])); memcpy(&roi->skip, skip, MAX_SEGMENTS * sizeof(skip[0])); memcpy(&roi->ref_frame, ref_frame, MAX_SEGMENTS * sizeof(ref_frame[0])); roi->enabled = 1; roi->rows = rows; roi->cols = cols; return 0; } int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols) { unsigned char *const active_map_8x8 = cpi->active_map.map; const int mi_rows = cpi->common.mi_rows; const int mi_cols = cpi->common.mi_cols; cpi->active_map.update = 1; if (new_map_16x16) { int r, c; for (r = 0; r < mi_rows; ++r) { for (c = 0; c < mi_cols; ++c) { active_map_8x8[r * mi_cols + c] = new_map_16x16[(r >> 1) * cols + (c >> 1)] ? AM_SEGMENT_ID_ACTIVE : AM_SEGMENT_ID_INACTIVE; } } cpi->active_map.enabled = 1; } else { cpi->active_map.enabled = 0; } return 0; } else { return -1; } } int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols) { if (rows == cpi->common.mb_rows && cols == cpi->common.mb_cols && new_map_16x16) { unsigned char *const seg_map_8x8 = cpi->segmentation_map; const int mi_rows = cpi->common.mi_rows; const int mi_cols = cpi->common.mi_cols; memset(new_map_16x16, !cpi->active_map.enabled, rows * cols); if (cpi->active_map.enabled) { int r, c; for (r = 0; r < mi_rows; ++r) { for (c = 0; c < mi_cols; ++c) { // Cyclic refresh segments are considered active despite not having // AM_SEGMENT_ID_ACTIVE new_map_16x16[(r >> 1) * cols + (c >> 1)] |= seg_map_8x8[r * mi_cols + c] != AM_SEGMENT_ID_INACTIVE; } } } return 0; } else { return -1; } } void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv) { MACROBLOCK *const mb = &cpi->td.mb; cpi->common.allow_high_precision_mv = allow_high_precision_mv; if (cpi->common.allow_high_precision_mv) { mb->mvcost = mb->nmvcost_hp; mb->mvsadcost = mb->nmvsadcost_hp; } else { mb->mvcost = mb->nmvcost; mb->mvsadcost = mb->nmvsadcost; } } static void setup_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; // Set up entropy context depending on frame type. The decoder mandates // the use of the default context, index 0, for keyframes and inter // frames where the error_resilient_mode or intra_only flag is set. For // other inter-frames the encoder currently uses only two contexts; // context 1 for ALTREF frames and context 0 for the others. if (frame_is_intra_only(cm) || cm->error_resilient_mode) { vp9_setup_past_independence(cm); } else { if (!cpi->use_svc) cm->frame_context_idx = cpi->refresh_alt_ref_frame; } // TODO(jingning): Overwrite the frame_context_idx index in multi-layer ARF // case. Need some further investigation on if we could apply this to single // layer ARF case as well. if (cpi->multi_layer_arf && !cpi->use_svc) { GF_GROUP *const gf_group = &cpi->twopass.gf_group; const int gf_group_index = gf_group->index; const int boost_frame = !cpi->rc.is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); // frame_context_idx Frame Type // 0 Intra only frame, base layer ARF // 1 ARFs with layer depth = 2,3 // 2 ARFs with layer depth > 3 // 3 Non-boosted frames if (frame_is_intra_only(cm)) { cm->frame_context_idx = 0; } else if (boost_frame) { if (gf_group->rf_level[gf_group_index] == GF_ARF_STD) cm->frame_context_idx = 0; else if (gf_group->layer_depth[gf_group_index] <= 3) cm->frame_context_idx = 1; else cm->frame_context_idx = 2; } else { cm->frame_context_idx = 3; } } if (cm->frame_type == KEY_FRAME) { cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; vp9_zero(cpi->interp_filter_selected); } else { *cm->fc = cm->frame_contexts[cm->frame_context_idx]; vp9_zero(cpi->interp_filter_selected[0]); } } static void vp9_enc_setup_mi(VP9_COMMON *cm) { int i; cm->mi = cm->mip + cm->mi_stride + 1; memset(cm->mip, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mip)); cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; // Clear top border row memset(cm->prev_mip, 0, sizeof(*cm->prev_mip) * cm->mi_stride); // Clear left border column for (i = 1; i < cm->mi_rows + 1; ++i) memset(&cm->prev_mip[i * cm->mi_stride], 0, sizeof(*cm->prev_mip)); cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; memset(cm->mi_grid_base, 0, cm->mi_stride * (cm->mi_rows + 1) * sizeof(*cm->mi_grid_base)); } static int vp9_enc_alloc_mi(VP9_COMMON *cm, int mi_size) { cm->mip = vpx_calloc(mi_size, sizeof(*cm->mip)); if (!cm->mip) return 1; cm->prev_mip = vpx_calloc(mi_size, sizeof(*cm->prev_mip)); if (!cm->prev_mip) return 1; cm->mi_alloc_size = mi_size; cm->mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *)); if (!cm->mi_grid_base) return 1; cm->prev_mi_grid_base = (MODE_INFO **)vpx_calloc(mi_size, sizeof(MODE_INFO *)); if (!cm->prev_mi_grid_base) return 1; return 0; } static void vp9_enc_free_mi(VP9_COMMON *cm) { vpx_free(cm->mip); cm->mip = NULL; vpx_free(cm->prev_mip); cm->prev_mip = NULL; vpx_free(cm->mi_grid_base); cm->mi_grid_base = NULL; vpx_free(cm->prev_mi_grid_base); cm->prev_mi_grid_base = NULL; cm->mi_alloc_size = 0; } static void vp9_swap_mi_and_prev_mi(VP9_COMMON *cm) { // Current mip will be the prev_mip for the next frame. MODE_INFO **temp_base = cm->prev_mi_grid_base; MODE_INFO *temp = cm->prev_mip; // Skip update prev_mi frame in show_existing_frame mode. if (cm->show_existing_frame) return; cm->prev_mip = cm->mip; cm->mip = temp; // Update the upper left visible macroblock ptrs. cm->mi = cm->mip + cm->mi_stride + 1; cm->prev_mi = cm->prev_mip + cm->mi_stride + 1; cm->prev_mi_grid_base = cm->mi_grid_base; cm->mi_grid_base = temp_base; cm->mi_grid_visible = cm->mi_grid_base + cm->mi_stride + 1; cm->prev_mi_grid_visible = cm->prev_mi_grid_base + cm->mi_stride + 1; } void vp9_initialize_enc(void) { static volatile int init_done = 0; if (!init_done) { vp9_rtcd(); vpx_dsp_rtcd(); vpx_scale_rtcd(); vp9_init_intra_predictors(); vp9_init_me_luts(); vp9_rc_init_minq_luts(); vp9_entropy_mv_init(); #if !CONFIG_REALTIME_ONLY vp9_temporal_filter_init(); #endif init_done = 1; } } static void dealloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int i; vpx_free(cpi->mbmi_ext_base); cpi->mbmi_ext_base = NULL; vpx_free(cpi->tile_data); cpi->tile_data = NULL; vpx_free(cpi->segmentation_map); cpi->segmentation_map = NULL; vpx_free(cpi->coding_context.last_frame_seg_map_copy); cpi->coding_context.last_frame_seg_map_copy = NULL; vpx_free(cpi->nmvcosts[0]); vpx_free(cpi->nmvcosts[1]); cpi->nmvcosts[0] = NULL; cpi->nmvcosts[1] = NULL; vpx_free(cpi->nmvcosts_hp[0]); vpx_free(cpi->nmvcosts_hp[1]); cpi->nmvcosts_hp[0] = NULL; cpi->nmvcosts_hp[1] = NULL; vpx_free(cpi->nmvsadcosts[0]); vpx_free(cpi->nmvsadcosts[1]); cpi->nmvsadcosts[0] = NULL; cpi->nmvsadcosts[1] = NULL; vpx_free(cpi->nmvsadcosts_hp[0]); vpx_free(cpi->nmvsadcosts_hp[1]); cpi->nmvsadcosts_hp[0] = NULL; cpi->nmvsadcosts_hp[1] = NULL; vpx_free(cpi->skin_map); cpi->skin_map = NULL; vpx_free(cpi->prev_partition); cpi->prev_partition = NULL; vpx_free(cpi->svc.prev_partition_svc); cpi->svc.prev_partition_svc = NULL; vpx_free(cpi->prev_segment_id); cpi->prev_segment_id = NULL; vpx_free(cpi->prev_variance_low); cpi->prev_variance_low = NULL; vpx_free(cpi->copied_frame_cnt); cpi->copied_frame_cnt = NULL; vpx_free(cpi->content_state_sb_fd); cpi->content_state_sb_fd = NULL; vpx_free(cpi->count_arf_frame_usage); cpi->count_arf_frame_usage = NULL; vpx_free(cpi->count_lastgolden_frame_usage); cpi->count_lastgolden_frame_usage = NULL; vp9_cyclic_refresh_free(cpi->cyclic_refresh); cpi->cyclic_refresh = NULL; vpx_free(cpi->active_map.map); cpi->active_map.map = NULL; vpx_free(cpi->roi.roi_map); cpi->roi.roi_map = NULL; vpx_free(cpi->consec_zero_mv); cpi->consec_zero_mv = NULL; vpx_free(cpi->mb_wiener_variance); cpi->mb_wiener_variance = NULL; vpx_free(cpi->mi_ssim_rdmult_scaling_factors); cpi->mi_ssim_rdmult_scaling_factors = NULL; vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); #endif vp9_free_context_buffers(cm); vpx_free_frame_buffer(&cpi->last_frame_uf); vpx_free_frame_buffer(&cpi->scaled_source); vpx_free_frame_buffer(&cpi->scaled_last_source); vpx_free_frame_buffer(&cpi->alt_ref_buffer); #ifdef ENABLE_KF_DENOISE vpx_free_frame_buffer(&cpi->raw_unscaled_source); vpx_free_frame_buffer(&cpi->raw_scaled_source); #endif vp9_lookahead_destroy(cpi->lookahead); vpx_free(cpi->tile_tok[0][0]); cpi->tile_tok[0][0] = 0; vpx_free(cpi->tplist[0][0]); cpi->tplist[0][0] = NULL; vp9_free_pc_tree(&cpi->td); for (i = 0; i < cpi->svc.number_spatial_layers; ++i) { LAYER_CONTEXT *const lc = &cpi->svc.layer_context[i]; vpx_free(lc->rc_twopass_stats_in.buf); lc->rc_twopass_stats_in.buf = NULL; lc->rc_twopass_stats_in.sz = 0; } if (cpi->source_diff_var != NULL) { vpx_free(cpi->source_diff_var); cpi->source_diff_var = NULL; } for (i = 0; i < MAX_LAG_BUFFERS; ++i) { vpx_free_frame_buffer(&cpi->svc.scaled_frames[i]); } memset(&cpi->svc.scaled_frames[0], 0, MAX_LAG_BUFFERS * sizeof(cpi->svc.scaled_frames[0])); vpx_free_frame_buffer(&cpi->svc.scaled_temp); memset(&cpi->svc.scaled_temp, 0, sizeof(cpi->svc.scaled_temp)); vpx_free_frame_buffer(&cpi->svc.empty_frame.img); memset(&cpi->svc.empty_frame, 0, sizeof(cpi->svc.empty_frame)); vp9_free_svc_cyclic_refresh(cpi); } static void save_coding_context(VP9_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; VP9_COMMON *cm = &cpi->common; // Stores a snapshot of key state variables which can subsequently be // restored with a call to vp9_restore_coding_context. These functions are // intended for use in a re-code loop in vp9_compress_frame where the // quantizer value is adjusted between loop iterations. vp9_copy(cc->nmvjointcost, cpi->td.mb.nmvjointcost); memcpy(cc->nmvcosts[0], cpi->nmvcosts[0], MV_VALS * sizeof(*cpi->nmvcosts[0])); memcpy(cc->nmvcosts[1], cpi->nmvcosts[1], MV_VALS * sizeof(*cpi->nmvcosts[1])); memcpy(cc->nmvcosts_hp[0], cpi->nmvcosts_hp[0], MV_VALS * sizeof(*cpi->nmvcosts_hp[0])); memcpy(cc->nmvcosts_hp[1], cpi->nmvcosts_hp[1], MV_VALS * sizeof(*cpi->nmvcosts_hp[1])); vp9_copy(cc->segment_pred_probs, cm->seg.pred_probs); memcpy(cpi->coding_context.last_frame_seg_map_copy, cm->last_frame_seg_map, (cm->mi_rows * cm->mi_cols)); vp9_copy(cc->last_ref_lf_deltas, cm->lf.last_ref_deltas); vp9_copy(cc->last_mode_lf_deltas, cm->lf.last_mode_deltas); cc->fc = *cm->fc; } static void restore_coding_context(VP9_COMP *cpi) { CODING_CONTEXT *const cc = &cpi->coding_context; VP9_COMMON *cm = &cpi->common; // Restore key state variables to the snapshot state stored in the // previous call to vp9_save_coding_context. vp9_copy(cpi->td.mb.nmvjointcost, cc->nmvjointcost); memcpy(cpi->nmvcosts[0], cc->nmvcosts[0], MV_VALS * sizeof(*cc->nmvcosts[0])); memcpy(cpi->nmvcosts[1], cc->nmvcosts[1], MV_VALS * sizeof(*cc->nmvcosts[1])); memcpy(cpi->nmvcosts_hp[0], cc->nmvcosts_hp[0], MV_VALS * sizeof(*cc->nmvcosts_hp[0])); memcpy(cpi->nmvcosts_hp[1], cc->nmvcosts_hp[1], MV_VALS * sizeof(*cc->nmvcosts_hp[1])); vp9_copy(cm->seg.pred_probs, cc->segment_pred_probs); memcpy(cm->last_frame_seg_map, cpi->coding_context.last_frame_seg_map_copy, (cm->mi_rows * cm->mi_cols)); vp9_copy(cm->lf.last_ref_deltas, cc->last_ref_lf_deltas); vp9_copy(cm->lf.last_mode_deltas, cc->last_mode_lf_deltas); *cm->fc = cc->fc; } #if !CONFIG_REALTIME_ONLY static void configure_static_seg_features(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; struct segmentation *const seg = &cm->seg; int high_q = (int)(rc->avg_q > 48.0); int qi_delta; // Disable and clear down for KF if (cm->frame_type == KEY_FRAME) { // Clear down the global segmentation map memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation vp9_disable_segmentation(seg); // Clear down the segment features. vp9_clearall_segfeatures(seg); } else if (cpi->refresh_alt_ref_frame) { // If this is an alt ref frame // Clear down the global segmentation map memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; cpi->static_mb_pct = 0; // Disable segmentation and individual segment features by default vp9_disable_segmentation(seg); vp9_clearall_segfeatures(seg); // Scan frames from current to arf frame. // This function re-enables segmentation if appropriate. vp9_update_mbgraph_stats(cpi); // If segmentation was enabled set those features needed for the // arf itself. if (seg->enabled) { seg->update_map = 1; seg->update_data = 1; qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 0.875, cm->bit_depth); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta - 2); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Where relevant assume segment data is delta data seg->abs_delta = SEGMENT_DELTADATA; } } else if (seg->enabled) { // All other frames if segmentation has been enabled // First normal frame in a valid gf or alt ref group if (rc->frames_since_golden == 0) { // Set up segment features for normal frames in an arf group if (rc->source_alt_ref_active) { seg->update_map = 0; seg->update_data = 1; seg->abs_delta = SEGMENT_DELTADATA; qi_delta = vp9_compute_qdelta(rc, rc->avg_q, rc->avg_q * 1.125, cm->bit_depth); vp9_set_segdata(seg, 1, SEG_LVL_ALT_Q, qi_delta + 2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_Q); vp9_set_segdata(seg, 1, SEG_LVL_ALT_LF, -2); vp9_enable_segfeature(seg, 1, SEG_LVL_ALT_LF); // Segment coding disabled for compred testing if (high_q || (cpi->static_mb_pct == 100)) { vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } } else { // Disable segmentation and clear down features if alt ref // is not active for this group vp9_disable_segmentation(seg); memset(cpi->segmentation_map, 0, cm->mi_rows * cm->mi_cols); seg->update_map = 0; seg->update_data = 0; vp9_clearall_segfeatures(seg); } } else if (rc->is_src_frame_alt_ref) { // Special case where we are coding over the top of a previous // alt ref frame. // Segment coding disabled for compred testing // Enable ref frame features for segment 0 as well vp9_enable_segfeature(seg, 0, SEG_LVL_REF_FRAME); vp9_enable_segfeature(seg, 1, SEG_LVL_REF_FRAME); // All mbs should use ALTREF_FRAME vp9_clear_segdata(seg, 0, SEG_LVL_REF_FRAME); vp9_set_segdata(seg, 0, SEG_LVL_REF_FRAME, ALTREF_FRAME); vp9_clear_segdata(seg, 1, SEG_LVL_REF_FRAME); vp9_set_segdata(seg, 1, SEG_LVL_REF_FRAME, ALTREF_FRAME); // Skip all MBs if high Q (0,0 mv and skip coeffs) if (high_q) { vp9_enable_segfeature(seg, 0, SEG_LVL_SKIP); vp9_enable_segfeature(seg, 1, SEG_LVL_SKIP); } // Enable data update seg->update_data = 1; } else { // All other frames. // No updates.. leave things as they are. seg->update_map = 0; seg->update_data = 0; } } } #endif // !CONFIG_REALTIME_ONLY static void update_reference_segmentation_map(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MODE_INFO **mi_8x8_ptr = cm->mi_grid_visible; uint8_t *cache_ptr = cm->last_frame_seg_map; int row, col; for (row = 0; row < cm->mi_rows; row++) { MODE_INFO **mi_8x8 = mi_8x8_ptr; uint8_t *cache = cache_ptr; for (col = 0; col < cm->mi_cols; col++, mi_8x8++, cache++) cache[0] = mi_8x8[0]->segment_id; mi_8x8_ptr += cm->mi_stride; cache_ptr += cm->mi_cols; } } static void alloc_raw_frame_buffers(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; const VP9EncoderConfig *oxcf = &cpi->oxcf; if (!cpi->lookahead) cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif oxcf->lag_in_frames); if (!cpi->lookahead) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate lag buffers"); // TODO(agrange) Check if ARF is enabled and skip allocation if not. if (vpx_realloc_frame_buffer(&cpi->alt_ref_buffer, oxcf->width, oxcf->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate altref buffer"); } static void alloc_util_frame_buffers(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (vpx_realloc_frame_buffer(&cpi->last_frame_uf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate last frame buffer"); if (vpx_realloc_frame_buffer(&cpi->scaled_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled source buffer"); // For 1 pass cbr: allocate scaled_frame that may be used as an intermediate // buffer for a 2 stage down-sampling: two stages of 1:2 down-sampling for a // target of 1/4x1/4. number_spatial_layers must be greater than 2. if (is_one_pass_cbr_svc(cpi) && !cpi->svc.scaled_temp_is_alloc && cpi->svc.number_spatial_layers > 2) { cpi->svc.scaled_temp_is_alloc = 1; if (vpx_realloc_frame_buffer( &cpi->svc.scaled_temp, cm->width >> 1, cm->height >> 1, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled_frame for svc "); } if (vpx_realloc_frame_buffer(&cpi->scaled_last_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled last source buffer"); #ifdef ENABLE_KF_DENOISE if (vpx_realloc_frame_buffer(&cpi->raw_unscaled_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate unscaled raw source frame buffer"); if (vpx_realloc_frame_buffer(&cpi->raw_scaled_source, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate scaled raw source frame buffer"); #endif } static int alloc_context_buffers_ext(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int mi_size = cm->mi_cols * cm->mi_rows; cpi->mbmi_ext_base = vpx_calloc(mi_size, sizeof(*cpi->mbmi_ext_base)); if (!cpi->mbmi_ext_base) return 1; return 0; } static void alloc_compressor_data(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int sb_rows; vp9_alloc_context_buffers(cm, cm->width, cm->height); alloc_context_buffers_ext(cpi); vpx_free(cpi->tile_tok[0][0]); { unsigned int tokens = get_token_alloc(cm->mb_rows, cm->mb_cols); CHECK_MEM_ERROR(cm, cpi->tile_tok[0][0], vpx_calloc(tokens, sizeof(*cpi->tile_tok[0][0]))); } sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; vpx_free(cpi->tplist[0][0]); CHECK_MEM_ERROR( cm, cpi->tplist[0][0], vpx_calloc(sb_rows * 4 * (1 << 6), sizeof(*cpi->tplist[0][0]))); vp9_setup_pc_tree(&cpi->common, &cpi->td); } void vp9_new_framerate(VP9_COMP *cpi, double framerate) { cpi->framerate = framerate < 0.1 ? 30 : framerate; vp9_rc_update_framerate(cpi); } static void set_tile_limits(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int min_log2_tile_cols, max_log2_tile_cols; vp9_get_tile_n_bits(cm->mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); cm->log2_tile_cols = clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); cm->log2_tile_rows = cpi->oxcf.tile_rows; if (cpi->oxcf.target_level == LEVEL_AUTO) { const int level_tile_cols = log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height); if (cm->log2_tile_cols > level_tile_cols) { cm->log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols); } } } static void update_frame_size(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; vp9_set_mb_mi(cm, cm->width, cm->height); vp9_init_context_buffers(cm); vp9_init_macroblockd(cm, xd, NULL); cpi->td.mb.mbmi_ext_base = cpi->mbmi_ext_base; memset(cpi->mbmi_ext_base, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->mbmi_ext_base)); set_tile_limits(cpi); } static void init_buffer_indices(VP9_COMP *cpi) { int ref_frame; for (ref_frame = 0; ref_frame < REF_FRAMES; ++ref_frame) cpi->ref_fb_idx[ref_frame] = ref_frame; cpi->lst_fb_idx = cpi->ref_fb_idx[LAST_FRAME - 1]; cpi->gld_fb_idx = cpi->ref_fb_idx[GOLDEN_FRAME - 1]; cpi->alt_fb_idx = cpi->ref_fb_idx[ALTREF_FRAME - 1]; } static void init_level_constraint(LevelConstraint *lc) { lc->level_index = -1; lc->max_cpb_size = INT_MAX; lc->max_frame_size = INT_MAX; lc->fail_flag = 0; } static void set_level_constraint(LevelConstraint *ls, int8_t level_index) { vpx_clear_system_state(); ls->level_index = level_index; if (level_index >= 0) { ls->max_cpb_size = vp9_level_defs[level_index].max_cpb_size * (double)1000; } } static void init_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; cpi->oxcf = *oxcf; cpi->framerate = oxcf->init_framerate; cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth = oxcf->use_highbitdepth; #endif cm->color_space = oxcf->color_space; cm->color_range = oxcf->color_range; cpi->target_level = oxcf->target_level; cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX; set_level_constraint(&cpi->level_constraint, get_level_index(cpi->target_level)); cm->width = oxcf->width; cm->height = oxcf->height; alloc_compressor_data(cpi); cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; // Single thread case: use counts in common. cpi->td.counts = &cm->counts; // Spatial scalability. cpi->svc.number_spatial_layers = oxcf->ss_number_layers; // Temporal scalability. cpi->svc.number_temporal_layers = oxcf->ts_number_layers; if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && cpi->oxcf.pass != 1)) { vp9_init_layer_context(cpi); } // change includes all joint functionality vp9_change_config(cpi, oxcf); cpi->static_mb_pct = 0; cpi->ref_frame_flags = 0; init_buffer_indices(cpi); vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height); } static void set_rc_buffer_sizes(RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { const int64_t bandwidth = oxcf->target_bandwidth; const int64_t starting = oxcf->starting_buffer_level_ms; const int64_t optimal = oxcf->optimal_buffer_level_ms; const int64_t maximum = oxcf->maximum_buffer_size_ms; rc->starting_buffer_level = starting * bandwidth / 1000; rc->optimal_buffer_level = (optimal == 0) ? bandwidth / 8 : optimal * bandwidth / 1000; rc->maximum_buffer_size = (maximum == 0) ? bandwidth / 8 : maximum * bandwidth / 1000; } #if CONFIG_VP9_HIGHBITDEPTH // TODO(angiebird): make sdx8f available for highbitdepth if needed #define HIGHBD_BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; \ cpi->fn_ptr[BT].sdx8f = NULL; #define MAKE_BFP_SAD_WRAPPER(fnname) \ static unsigned int fnname##_bits8(const uint8_t *src_ptr, \ int source_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride); \ } \ static unsigned int fnname##_bits10( \ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ int ref_stride) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 2; \ } \ static unsigned int fnname##_bits12( \ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ int ref_stride) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride) >> 4; \ } #define MAKE_BFP_SADAVG_WRAPPER(fnname) \ static unsigned int fnname##_bits8( \ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred); \ } \ static unsigned int fnname##_bits10( \ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \ 2; \ } \ static unsigned int fnname##_bits12( \ const uint8_t *src_ptr, int source_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ return fnname(src_ptr, source_stride, ref_ptr, ref_stride, second_pred) >> \ 4; \ } #define MAKE_BFP_SAD4D_WRAPPER(fnname) \ static void fnname##_bits8(const uint8_t *src_ptr, int source_stride, \ const uint8_t *const ref_ptr[], int ref_stride, \ unsigned int *sad_array) { \ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ } \ static void fnname##_bits10(const uint8_t *src_ptr, int source_stride, \ const uint8_t *const ref_ptr[], int ref_stride, \ unsigned int *sad_array) { \ int i; \ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ for (i = 0; i < 4; i++) sad_array[i] >>= 2; \ } \ static void fnname##_bits12(const uint8_t *src_ptr, int source_stride, \ const uint8_t *const ref_ptr[], int ref_stride, \ unsigned int *sad_array) { \ int i; \ fnname(src_ptr, source_stride, ref_ptr, ref_stride, sad_array); \ for (i = 0; i < 4; i++) sad_array[i] >>= 4; \ } MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x16) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x16_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x16x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x32) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x32_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x32x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x32) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x32_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x32x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x64) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x64_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x64x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad32x32) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad32x32_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad32x32x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad64x64) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad64x64_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad64x64x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x16) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x16_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x16x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad16x8) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad16x8_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad16x8x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x16) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x16_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x16x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x8) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x8_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x8x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad8x4) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad8x4_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad8x4x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x8) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x8_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x8x4d) MAKE_BFP_SAD_WRAPPER(vpx_highbd_sad4x4) MAKE_BFP_SADAVG_WRAPPER(vpx_highbd_sad4x4_avg) MAKE_BFP_SAD4D_WRAPPER(vpx_highbd_sad4x4x4d) static void highbd_set_var_fns(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits8, vpx_highbd_sad32x16_avg_bits8, vpx_highbd_8_variance32x16, vpx_highbd_8_sub_pixel_variance32x16, vpx_highbd_8_sub_pixel_avg_variance32x16, vpx_highbd_sad32x16x4d_bits8) HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits8, vpx_highbd_sad16x32_avg_bits8, vpx_highbd_8_variance16x32, vpx_highbd_8_sub_pixel_variance16x32, vpx_highbd_8_sub_pixel_avg_variance16x32, vpx_highbd_sad16x32x4d_bits8) HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits8, vpx_highbd_sad64x32_avg_bits8, vpx_highbd_8_variance64x32, vpx_highbd_8_sub_pixel_variance64x32, vpx_highbd_8_sub_pixel_avg_variance64x32, vpx_highbd_sad64x32x4d_bits8) HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits8, vpx_highbd_sad32x64_avg_bits8, vpx_highbd_8_variance32x64, vpx_highbd_8_sub_pixel_variance32x64, vpx_highbd_8_sub_pixel_avg_variance32x64, vpx_highbd_sad32x64x4d_bits8) HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits8, vpx_highbd_sad32x32_avg_bits8, vpx_highbd_8_variance32x32, vpx_highbd_8_sub_pixel_variance32x32, vpx_highbd_8_sub_pixel_avg_variance32x32, vpx_highbd_sad32x32x4d_bits8) HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits8, vpx_highbd_sad64x64_avg_bits8, vpx_highbd_8_variance64x64, vpx_highbd_8_sub_pixel_variance64x64, vpx_highbd_8_sub_pixel_avg_variance64x64, vpx_highbd_sad64x64x4d_bits8) HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits8, vpx_highbd_sad16x16_avg_bits8, vpx_highbd_8_variance16x16, vpx_highbd_8_sub_pixel_variance16x16, vpx_highbd_8_sub_pixel_avg_variance16x16, vpx_highbd_sad16x16x4d_bits8) HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits8, vpx_highbd_sad16x8_avg_bits8, vpx_highbd_8_variance16x8, vpx_highbd_8_sub_pixel_variance16x8, vpx_highbd_8_sub_pixel_avg_variance16x8, vpx_highbd_sad16x8x4d_bits8) HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits8, vpx_highbd_sad8x16_avg_bits8, vpx_highbd_8_variance8x16, vpx_highbd_8_sub_pixel_variance8x16, vpx_highbd_8_sub_pixel_avg_variance8x16, vpx_highbd_sad8x16x4d_bits8) HIGHBD_BFP( BLOCK_8X8, vpx_highbd_sad8x8_bits8, vpx_highbd_sad8x8_avg_bits8, vpx_highbd_8_variance8x8, vpx_highbd_8_sub_pixel_variance8x8, vpx_highbd_8_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits8) HIGHBD_BFP( BLOCK_8X4, vpx_highbd_sad8x4_bits8, vpx_highbd_sad8x4_avg_bits8, vpx_highbd_8_variance8x4, vpx_highbd_8_sub_pixel_variance8x4, vpx_highbd_8_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits8) HIGHBD_BFP( BLOCK_4X8, vpx_highbd_sad4x8_bits8, vpx_highbd_sad4x8_avg_bits8, vpx_highbd_8_variance4x8, vpx_highbd_8_sub_pixel_variance4x8, vpx_highbd_8_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits8) HIGHBD_BFP( BLOCK_4X4, vpx_highbd_sad4x4_bits8, vpx_highbd_sad4x4_avg_bits8, vpx_highbd_8_variance4x4, vpx_highbd_8_sub_pixel_variance4x4, vpx_highbd_8_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits8) break; case VPX_BITS_10: HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits10, vpx_highbd_sad32x16_avg_bits10, vpx_highbd_10_variance32x16, vpx_highbd_10_sub_pixel_variance32x16, vpx_highbd_10_sub_pixel_avg_variance32x16, vpx_highbd_sad32x16x4d_bits10) HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits10, vpx_highbd_sad16x32_avg_bits10, vpx_highbd_10_variance16x32, vpx_highbd_10_sub_pixel_variance16x32, vpx_highbd_10_sub_pixel_avg_variance16x32, vpx_highbd_sad16x32x4d_bits10) HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits10, vpx_highbd_sad64x32_avg_bits10, vpx_highbd_10_variance64x32, vpx_highbd_10_sub_pixel_variance64x32, vpx_highbd_10_sub_pixel_avg_variance64x32, vpx_highbd_sad64x32x4d_bits10) HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits10, vpx_highbd_sad32x64_avg_bits10, vpx_highbd_10_variance32x64, vpx_highbd_10_sub_pixel_variance32x64, vpx_highbd_10_sub_pixel_avg_variance32x64, vpx_highbd_sad32x64x4d_bits10) HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits10, vpx_highbd_sad32x32_avg_bits10, vpx_highbd_10_variance32x32, vpx_highbd_10_sub_pixel_variance32x32, vpx_highbd_10_sub_pixel_avg_variance32x32, vpx_highbd_sad32x32x4d_bits10) HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits10, vpx_highbd_sad64x64_avg_bits10, vpx_highbd_10_variance64x64, vpx_highbd_10_sub_pixel_variance64x64, vpx_highbd_10_sub_pixel_avg_variance64x64, vpx_highbd_sad64x64x4d_bits10) HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits10, vpx_highbd_sad16x16_avg_bits10, vpx_highbd_10_variance16x16, vpx_highbd_10_sub_pixel_variance16x16, vpx_highbd_10_sub_pixel_avg_variance16x16, vpx_highbd_sad16x16x4d_bits10) HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits10, vpx_highbd_sad16x8_avg_bits10, vpx_highbd_10_variance16x8, vpx_highbd_10_sub_pixel_variance16x8, vpx_highbd_10_sub_pixel_avg_variance16x8, vpx_highbd_sad16x8x4d_bits10) HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits10, vpx_highbd_sad8x16_avg_bits10, vpx_highbd_10_variance8x16, vpx_highbd_10_sub_pixel_variance8x16, vpx_highbd_10_sub_pixel_avg_variance8x16, vpx_highbd_sad8x16x4d_bits10) HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits10, vpx_highbd_sad8x8_avg_bits10, vpx_highbd_10_variance8x8, vpx_highbd_10_sub_pixel_variance8x8, vpx_highbd_10_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits10) HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits10, vpx_highbd_sad8x4_avg_bits10, vpx_highbd_10_variance8x4, vpx_highbd_10_sub_pixel_variance8x4, vpx_highbd_10_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits10) HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits10, vpx_highbd_sad4x8_avg_bits10, vpx_highbd_10_variance4x8, vpx_highbd_10_sub_pixel_variance4x8, vpx_highbd_10_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits10) HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits10, vpx_highbd_sad4x4_avg_bits10, vpx_highbd_10_variance4x4, vpx_highbd_10_sub_pixel_variance4x4, vpx_highbd_10_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits10) break; default: assert(cm->bit_depth == VPX_BITS_12); HIGHBD_BFP(BLOCK_32X16, vpx_highbd_sad32x16_bits12, vpx_highbd_sad32x16_avg_bits12, vpx_highbd_12_variance32x16, vpx_highbd_12_sub_pixel_variance32x16, vpx_highbd_12_sub_pixel_avg_variance32x16, vpx_highbd_sad32x16x4d_bits12) HIGHBD_BFP(BLOCK_16X32, vpx_highbd_sad16x32_bits12, vpx_highbd_sad16x32_avg_bits12, vpx_highbd_12_variance16x32, vpx_highbd_12_sub_pixel_variance16x32, vpx_highbd_12_sub_pixel_avg_variance16x32, vpx_highbd_sad16x32x4d_bits12) HIGHBD_BFP(BLOCK_64X32, vpx_highbd_sad64x32_bits12, vpx_highbd_sad64x32_avg_bits12, vpx_highbd_12_variance64x32, vpx_highbd_12_sub_pixel_variance64x32, vpx_highbd_12_sub_pixel_avg_variance64x32, vpx_highbd_sad64x32x4d_bits12) HIGHBD_BFP(BLOCK_32X64, vpx_highbd_sad32x64_bits12, vpx_highbd_sad32x64_avg_bits12, vpx_highbd_12_variance32x64, vpx_highbd_12_sub_pixel_variance32x64, vpx_highbd_12_sub_pixel_avg_variance32x64, vpx_highbd_sad32x64x4d_bits12) HIGHBD_BFP(BLOCK_32X32, vpx_highbd_sad32x32_bits12, vpx_highbd_sad32x32_avg_bits12, vpx_highbd_12_variance32x32, vpx_highbd_12_sub_pixel_variance32x32, vpx_highbd_12_sub_pixel_avg_variance32x32, vpx_highbd_sad32x32x4d_bits12) HIGHBD_BFP(BLOCK_64X64, vpx_highbd_sad64x64_bits12, vpx_highbd_sad64x64_avg_bits12, vpx_highbd_12_variance64x64, vpx_highbd_12_sub_pixel_variance64x64, vpx_highbd_12_sub_pixel_avg_variance64x64, vpx_highbd_sad64x64x4d_bits12) HIGHBD_BFP(BLOCK_16X16, vpx_highbd_sad16x16_bits12, vpx_highbd_sad16x16_avg_bits12, vpx_highbd_12_variance16x16, vpx_highbd_12_sub_pixel_variance16x16, vpx_highbd_12_sub_pixel_avg_variance16x16, vpx_highbd_sad16x16x4d_bits12) HIGHBD_BFP(BLOCK_16X8, vpx_highbd_sad16x8_bits12, vpx_highbd_sad16x8_avg_bits12, vpx_highbd_12_variance16x8, vpx_highbd_12_sub_pixel_variance16x8, vpx_highbd_12_sub_pixel_avg_variance16x8, vpx_highbd_sad16x8x4d_bits12) HIGHBD_BFP(BLOCK_8X16, vpx_highbd_sad8x16_bits12, vpx_highbd_sad8x16_avg_bits12, vpx_highbd_12_variance8x16, vpx_highbd_12_sub_pixel_variance8x16, vpx_highbd_12_sub_pixel_avg_variance8x16, vpx_highbd_sad8x16x4d_bits12) HIGHBD_BFP(BLOCK_8X8, vpx_highbd_sad8x8_bits12, vpx_highbd_sad8x8_avg_bits12, vpx_highbd_12_variance8x8, vpx_highbd_12_sub_pixel_variance8x8, vpx_highbd_12_sub_pixel_avg_variance8x8, vpx_highbd_sad8x8x4d_bits12) HIGHBD_BFP(BLOCK_8X4, vpx_highbd_sad8x4_bits12, vpx_highbd_sad8x4_avg_bits12, vpx_highbd_12_variance8x4, vpx_highbd_12_sub_pixel_variance8x4, vpx_highbd_12_sub_pixel_avg_variance8x4, vpx_highbd_sad8x4x4d_bits12) HIGHBD_BFP(BLOCK_4X8, vpx_highbd_sad4x8_bits12, vpx_highbd_sad4x8_avg_bits12, vpx_highbd_12_variance4x8, vpx_highbd_12_sub_pixel_variance4x8, vpx_highbd_12_sub_pixel_avg_variance4x8, vpx_highbd_sad4x8x4d_bits12) HIGHBD_BFP(BLOCK_4X4, vpx_highbd_sad4x4_bits12, vpx_highbd_sad4x4_avg_bits12, vpx_highbd_12_variance4x4, vpx_highbd_12_sub_pixel_variance4x4, vpx_highbd_12_sub_pixel_avg_variance4x4, vpx_highbd_sad4x4x4d_bits12) break; } } } #endif // CONFIG_VP9_HIGHBITDEPTH static void realloc_segmentation_maps(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; // Create the encoder segmentation map and set all entries to 0 vpx_free(cpi->segmentation_map); CHECK_MEM_ERROR(cm, cpi->segmentation_map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); // Create a map used for cyclic background refresh. if (cpi->cyclic_refresh) vp9_cyclic_refresh_free(cpi->cyclic_refresh); CHECK_MEM_ERROR(cm, cpi->cyclic_refresh, vp9_cyclic_refresh_alloc(cm->mi_rows, cm->mi_cols)); // Create a map used to mark inactive areas. vpx_free(cpi->active_map.map); CHECK_MEM_ERROR(cm, cpi->active_map.map, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); // And a place holder structure is the coding context // for use if we want to save and restore it vpx_free(cpi->coding_context.last_frame_seg_map_copy); CHECK_MEM_ERROR(cm, cpi->coding_context.last_frame_seg_map_copy, vpx_calloc(cm->mi_rows * cm->mi_cols, 1)); } static void alloc_copy_partition_data(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (cpi->prev_partition == NULL) { CHECK_MEM_ERROR(cm, cpi->prev_partition, (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, sizeof(*cpi->prev_partition))); } if (cpi->prev_segment_id == NULL) { CHECK_MEM_ERROR( cm, cpi->prev_segment_id, (int8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->prev_segment_id))); } if (cpi->prev_variance_low == NULL) { CHECK_MEM_ERROR(cm, cpi->prev_variance_low, (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * 25, sizeof(*cpi->prev_variance_low))); } if (cpi->copied_frame_cnt == NULL) { CHECK_MEM_ERROR( cm, cpi->copied_frame_cnt, (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->copied_frame_cnt))); } } void vp9_change_config(struct VP9_COMP *cpi, const VP9EncoderConfig *oxcf) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int last_w = cpi->oxcf.width; int last_h = cpi->oxcf.height; vp9_init_quantizer(cpi); if (cm->profile != oxcf->profile) cm->profile = oxcf->profile; cm->bit_depth = oxcf->bit_depth; cm->color_space = oxcf->color_space; cm->color_range = oxcf->color_range; cpi->target_level = oxcf->target_level; cpi->keep_level_stats = oxcf->target_level != LEVEL_MAX; set_level_constraint(&cpi->level_constraint, get_level_index(cpi->target_level)); if (cm->profile <= PROFILE_1) assert(cm->bit_depth == VPX_BITS_8); else assert(cm->bit_depth > VPX_BITS_8); cpi->oxcf = *oxcf; #if CONFIG_VP9_HIGHBITDEPTH cpi->td.mb.e_mbd.bd = (int)cm->bit_depth; #endif // CONFIG_VP9_HIGHBITDEPTH if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) { rc->baseline_gf_interval = FIXED_GF_INTERVAL; } else { rc->baseline_gf_interval = (MIN_GF_INTERVAL + MAX_GF_INTERVAL) / 2; } cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 1; cm->refresh_frame_context = 1; cm->reset_frame_context = 0; vp9_reset_segment_features(&cm->seg); vp9_set_high_precision_mv(cpi, 0); { int i; for (i = 0; i < MAX_SEGMENTS; i++) cpi->segment_encode_breakout[i] = cpi->oxcf.encode_breakout; } cpi->encode_breakout = cpi->oxcf.encode_breakout; set_rc_buffer_sizes(rc, &cpi->oxcf); // Under a configuration change, where maximum_buffer_size may change, // keep buffer level clipped to the maximum allowed buffer size. rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = VPXMIN(rc->buffer_level, rc->maximum_buffer_size); // Set up frame rate and related parameters rate control values. vp9_new_framerate(cpi, cpi->framerate); // Set absolute upper and lower quality limits rc->worst_quality = cpi->oxcf.worst_allowed_q; rc->best_quality = cpi->oxcf.best_allowed_q; cm->interp_filter = cpi->sf.default_interp_filter; if (cpi->oxcf.render_width > 0 && cpi->oxcf.render_height > 0) { cm->render_width = cpi->oxcf.render_width; cm->render_height = cpi->oxcf.render_height; } else { cm->render_width = cpi->oxcf.width; cm->render_height = cpi->oxcf.height; } if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { cm->width = cpi->oxcf.width; cm->height = cpi->oxcf.height; cpi->external_resize = 1; } if (cpi->initial_width) { int new_mi_size = 0; vp9_set_mb_mi(cm, cm->width, cm->height); new_mi_size = cm->mi_stride * calc_mi_size(cm->mi_rows); if (cm->mi_alloc_size < new_mi_size) { vp9_free_context_buffers(cm); alloc_compressor_data(cpi); realloc_segmentation_maps(cpi); cpi->initial_width = cpi->initial_height = 0; cpi->external_resize = 0; } else if (cm->mi_alloc_size == new_mi_size && (cpi->oxcf.width > last_w || cpi->oxcf.height > last_h)) { vp9_alloc_loop_filter(cm); } } if (cm->current_video_frame == 0 || last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) update_frame_size(cpi); if (last_w != cpi->oxcf.width || last_h != cpi->oxcf.height) { memset(cpi->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_reset_resize(cpi); rc->rc_1_frame = 0; rc->rc_2_frame = 0; } if ((cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && cpi->oxcf.pass != 1)) { vp9_update_layer_context_change_config(cpi, (int)cpi->oxcf.target_bandwidth); } // Check for resetting the rc flags (rc_1_frame, rc_2_frame) if the // configuration change has a large change in avg_frame_bandwidth. // For SVC check for resetting based on spatial layer average bandwidth. // Also reset buffer level to optimal level. if (cm->current_video_frame > 0) { if (cpi->use_svc) { vp9_svc_check_reset_layer_rc_flag(cpi); } else { if (rc->avg_frame_bandwidth > (3 * rc->last_avg_frame_bandwidth >> 1) || rc->avg_frame_bandwidth < (rc->last_avg_frame_bandwidth >> 1)) { rc->rc_1_frame = 0; rc->rc_2_frame = 0; rc->bits_off_target = rc->optimal_buffer_level; rc->buffer_level = rc->optimal_buffer_level; } } } cpi->alt_ref_source = NULL; rc->is_src_frame_alt_ref = 0; #if 0 // Experimental RD Code cpi->frame_distortion = 0; cpi->last_frame_distortion = 0; #endif set_tile_limits(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->ext_refresh_frame_context_pending = 0; #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); #endif vp9_set_row_mt(cpi); } #ifndef M_LOG2_E #define M_LOG2_E 0.693147180559945309417 #endif #define log2f(x) (log(x) / (float)M_LOG2_E) /*********************************************************************** * Read before modifying 'cal_nmvjointsadcost' or 'cal_nmvsadcosts' * *********************************************************************** * The following 2 functions ('cal_nmvjointsadcost' and * * 'cal_nmvsadcosts') are used to calculate cost lookup tables * * used by 'vp9_diamond_search_sad'. The C implementation of the * * function is generic, but the AVX intrinsics optimised version * * relies on the following properties of the computed tables: * * For cal_nmvjointsadcost: * * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] * * For cal_nmvsadcosts: * * - For all i: mvsadcost[0][i] == mvsadcost[1][i] * * (Equal costs for both components) * * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] * * (Cost function is even) * * If these do not hold, then the AVX optimised version of the * * 'vp9_diamond_search_sad' function cannot be used as it is, in which * * case you can revert to using the C function instead. * ***********************************************************************/ static void cal_nmvjointsadcost(int *mvjointsadcost) { /********************************************************************* * Warning: Read the comments above before modifying this function * *********************************************************************/ mvjointsadcost[0] = 600; mvjointsadcost[1] = 300; mvjointsadcost[2] = 300; mvjointsadcost[3] = 300; } static void cal_nmvsadcosts(int *mvsadcost[2]) { /********************************************************************* * Warning: Read the comments above before modifying this function * *********************************************************************/ int i = 1; mvsadcost[0][0] = 0; mvsadcost[1][0] = 0; do { double z = 256 * (2 * (log2f(8 * i) + .6)); mvsadcost[0][i] = (int)z; mvsadcost[1][i] = (int)z; mvsadcost[0][-i] = (int)z; mvsadcost[1][-i] = (int)z; } while (++i <= MV_MAX); } static void cal_nmvsadcosts_hp(int *mvsadcost[2]) { int i = 1; mvsadcost[0][0] = 0; mvsadcost[1][0] = 0; do { double z = 256 * (2 * (log2f(8 * i) + .6)); mvsadcost[0][i] = (int)z; mvsadcost[1][i] = (int)z; mvsadcost[0][-i] = (int)z; mvsadcost[1][-i] = (int)z; } while (++i <= MV_MAX); } static void init_ref_frame_bufs(VP9_COMMON *cm) { int i; BufferPool *const pool = cm->buffer_pool; cm->new_fb_idx = INVALID_IDX; for (i = 0; i < REF_FRAMES; ++i) { cm->ref_frame_map[i] = INVALID_IDX; } for (i = 0; i < FRAME_BUFFERS; ++i) { pool->frame_bufs[i].ref_count = 0; } } static void update_initial_width(VP9_COMP *cpi, int use_highbitdepth, int subsampling_x, int subsampling_y) { VP9_COMMON *const cm = &cpi->common; #if !CONFIG_VP9_HIGHBITDEPTH (void)use_highbitdepth; assert(use_highbitdepth == 0); #endif if (!cpi->initial_width || #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth != use_highbitdepth || #endif cm->subsampling_x != subsampling_x || cm->subsampling_y != subsampling_y) { cm->subsampling_x = subsampling_x; cm->subsampling_y = subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth = use_highbitdepth; #endif alloc_util_frame_buffers(cpi); cpi->initial_width = cm->width; cpi->initial_height = cm->height; cpi->initial_mbs = cm->MBs; } } // TODO(angiebird): Check whether we can move this function to vpx_image.c static INLINE void vpx_img_chroma_subsampling(vpx_img_fmt_t fmt, unsigned int *subsampling_x, unsigned int *subsampling_y) { switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: *subsampling_x = 1; break; default: *subsampling_x = 0; break; } switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I440: case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I44016: *subsampling_y = 1; break; default: *subsampling_y = 0; break; } } // TODO(angiebird): Check whether we can move this function to vpx_image.c static INLINE int vpx_img_use_highbitdepth(vpx_img_fmt_t fmt) { return fmt & VPX_IMG_FMT_HIGHBITDEPTH; } #if CONFIG_VP9_TEMPORAL_DENOISING static void setup_denoiser_buffer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (cpi->oxcf.noise_sensitivity > 0 && !cpi->denoiser.frame_buffer_initialized) { if (vp9_denoiser_alloc(cm, &cpi->svc, &cpi->denoiser, cpi->use_svc, cpi->oxcf.noise_sensitivity, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate denoiser"); } } #endif void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt) { const VP9EncoderConfig *oxcf = &cpi->oxcf; unsigned int subsampling_x, subsampling_y; const int use_highbitdepth = vpx_img_use_highbitdepth(img_fmt); vpx_img_chroma_subsampling(img_fmt, &subsampling_x, &subsampling_y); update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif assert(cpi->lookahead == NULL); cpi->lookahead = vp9_lookahead_init(oxcf->width, oxcf->height, subsampling_x, subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif oxcf->lag_in_frames); alloc_raw_frame_buffers(cpi); } VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool) { unsigned int i; VP9_COMP *volatile const cpi = vpx_memalign(32, sizeof(VP9_COMP)); VP9_COMMON *volatile const cm = cpi != NULL ? &cpi->common : NULL; if (!cm) return NULL; vp9_zero(*cpi); if (setjmp(cm->error.jmp)) { cm->error.setjmp = 0; vp9_remove_compressor(cpi); return 0; } cm->error.setjmp = 1; cm->alloc_mi = vp9_enc_alloc_mi; cm->free_mi = vp9_enc_free_mi; cm->setup_mi = vp9_enc_setup_mi; CHECK_MEM_ERROR(cm, cm->fc, (FRAME_CONTEXT *)vpx_calloc(1, sizeof(*cm->fc))); CHECK_MEM_ERROR( cm, cm->frame_contexts, (FRAME_CONTEXT *)vpx_calloc(FRAME_CONTEXTS, sizeof(*cm->frame_contexts))); cpi->use_svc = 0; cpi->resize_state = ORIG; cpi->external_resize = 0; cpi->resize_avg_qp = 0; cpi->resize_buffer_underflow = 0; cpi->use_skin_detection = 0; cpi->common.buffer_pool = pool; init_ref_frame_bufs(cm); cpi->force_update_segmentation = 0; init_config(cpi, oxcf); cpi->frame_info = vp9_get_frame_info(oxcf); vp9_rc_init(&cpi->oxcf, oxcf->pass, &cpi->rc); cm->current_video_frame = 0; cpi->partition_search_skippable_frame = 0; cpi->tile_data = NULL; realloc_segmentation_maps(cpi); CHECK_MEM_ERROR( cm, cpi->skin_map, vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(cpi->skin_map[0]))); #if !CONFIG_REALTIME_ONLY CHECK_MEM_ERROR(cm, cpi->alt_ref_aq, vp9_alt_ref_aq_create()); #endif CHECK_MEM_ERROR( cm, cpi->consec_zero_mv, vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*cpi->consec_zero_mv))); CHECK_MEM_ERROR(cm, cpi->nmvcosts[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[0]))); CHECK_MEM_ERROR(cm, cpi->nmvcosts[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts[1]))); CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[0]))); CHECK_MEM_ERROR(cm, cpi->nmvcosts_hp[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvcosts_hp[1]))); CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[0]))); CHECK_MEM_ERROR(cm, cpi->nmvsadcosts[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts[1]))); CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[0], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[0]))); CHECK_MEM_ERROR(cm, cpi->nmvsadcosts_hp[1], vpx_calloc(MV_VALS, sizeof(*cpi->nmvsadcosts_hp[1]))); for (i = 0; i < (sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0])); i++) { CHECK_MEM_ERROR( cm, cpi->mbgraph_stats[i].mb_stats, vpx_calloc(cm->MBs * sizeof(*cpi->mbgraph_stats[i].mb_stats), 1)); } #if CONFIG_FP_MB_STATS cpi->use_fp_mb_stats = 0; if (cpi->use_fp_mb_stats) { // a place holder used to store the first pass mb stats in the first pass CHECK_MEM_ERROR(cm, cpi->twopass.frame_mb_stats_buf, vpx_calloc(cm->MBs * sizeof(uint8_t), 1)); } else { cpi->twopass.frame_mb_stats_buf = NULL; } #endif cpi->refresh_alt_ref_frame = 0; cpi->b_calculate_psnr = CONFIG_INTERNAL_STATS; init_level_info(&cpi->level_info); init_level_constraint(&cpi->level_constraint); #if CONFIG_INTERNAL_STATS cpi->b_calculate_blockiness = 1; cpi->b_calculate_consistency = 1; cpi->total_inconsistency = 0; cpi->psnr.worst = 100.0; cpi->worst_ssim = 100.0; cpi->count = 0; cpi->bytes = 0; if (cpi->b_calculate_psnr) { cpi->total_sq_error = 0; cpi->total_samples = 0; cpi->totalp_sq_error = 0; cpi->totalp_samples = 0; cpi->tot_recode_hits = 0; cpi->summed_quality = 0; cpi->summed_weights = 0; cpi->summedp_quality = 0; cpi->summedp_weights = 0; } cpi->fastssim.worst = 100.0; cpi->psnrhvs.worst = 100.0; if (cpi->b_calculate_blockiness) { cpi->total_blockiness = 0; cpi->worst_blockiness = 0.0; } if (cpi->b_calculate_consistency) { CHECK_MEM_ERROR(cm, cpi->ssim_vars, vpx_calloc(cpi->common.mi_rows * cpi->common.mi_cols, sizeof(*cpi->ssim_vars) * 4)); cpi->worst_consistency = 100.0; } else { cpi->ssim_vars = NULL; } #endif cpi->first_time_stamp_ever = INT64_MAX; /********************************************************************* * Warning: Read the comments around 'cal_nmvjointsadcost' and * * 'cal_nmvsadcosts' before modifying how these tables are computed. * *********************************************************************/ cal_nmvjointsadcost(cpi->td.mb.nmvjointsadcost); cpi->td.mb.nmvcost[0] = &cpi->nmvcosts[0][MV_MAX]; cpi->td.mb.nmvcost[1] = &cpi->nmvcosts[1][MV_MAX]; cpi->td.mb.nmvsadcost[0] = &cpi->nmvsadcosts[0][MV_MAX]; cpi->td.mb.nmvsadcost[1] = &cpi->nmvsadcosts[1][MV_MAX]; cal_nmvsadcosts(cpi->td.mb.nmvsadcost); cpi->td.mb.nmvcost_hp[0] = &cpi->nmvcosts_hp[0][MV_MAX]; cpi->td.mb.nmvcost_hp[1] = &cpi->nmvcosts_hp[1][MV_MAX]; cpi->td.mb.nmvsadcost_hp[0] = &cpi->nmvsadcosts_hp[0][MV_MAX]; cpi->td.mb.nmvsadcost_hp[1] = &cpi->nmvsadcosts_hp[1][MV_MAX]; cal_nmvsadcosts_hp(cpi->td.mb.nmvsadcost_hp); #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED yuv_denoised_file = fopen("denoised.yuv", "ab"); #endif #endif #ifdef OUTPUT_YUV_SKINMAP yuv_skinmap_file = fopen("skinmap.yuv", "wb"); #endif #ifdef OUTPUT_YUV_REC yuv_rec_file = fopen("rec.yuv", "wb"); #endif #ifdef OUTPUT_YUV_SVC_SRC yuv_svc_src[0] = fopen("svc_src_0.yuv", "wb"); yuv_svc_src[1] = fopen("svc_src_1.yuv", "wb"); yuv_svc_src[2] = fopen("svc_src_2.yuv", "wb"); #endif #if 0 framepsnr = fopen("framepsnr.stt", "a"); kf_list = fopen("kf_list.stt", "w"); #endif cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; #if !CONFIG_REALTIME_ONLY if (oxcf->pass == 1) { vp9_init_first_pass(cpi); } else if (oxcf->pass == 2) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); const int packets = (int)(oxcf->two_pass_stats_in.sz / packet_sz); if (cpi->svc.number_spatial_layers > 1 || cpi->svc.number_temporal_layers > 1) { FIRSTPASS_STATS *const stats = oxcf->two_pass_stats_in.buf; FIRSTPASS_STATS *stats_copy[VPX_SS_MAX_LAYERS] = { 0 }; int i; for (i = 0; i < oxcf->ss_number_layers; ++i) { FIRSTPASS_STATS *const last_packet_for_layer = &stats[packets - oxcf->ss_number_layers + i]; const int layer_id = (int)last_packet_for_layer->spatial_layer_id; const int packets_in_layer = (int)last_packet_for_layer->count + 1; if (layer_id >= 0 && layer_id < oxcf->ss_number_layers) { int num_frames; LAYER_CONTEXT *const lc = &cpi->svc.layer_context[layer_id]; vpx_free(lc->rc_twopass_stats_in.buf); lc->rc_twopass_stats_in.sz = packets_in_layer * packet_sz; CHECK_MEM_ERROR(cm, lc->rc_twopass_stats_in.buf, vpx_malloc(lc->rc_twopass_stats_in.sz)); lc->twopass.stats_in_start = lc->rc_twopass_stats_in.buf; lc->twopass.stats_in = lc->twopass.stats_in_start; lc->twopass.stats_in_end = lc->twopass.stats_in_start + packets_in_layer - 1; // Note the last packet is cumulative first pass stats. // So the number of frames is packet number minus one num_frames = packets_in_layer - 1; fps_init_first_pass_info(&lc->twopass.first_pass_info, lc->rc_twopass_stats_in.buf, num_frames); stats_copy[layer_id] = lc->rc_twopass_stats_in.buf; } } for (i = 0; i < packets; ++i) { const int layer_id = (int)stats[i].spatial_layer_id; if (layer_id >= 0 && layer_id < oxcf->ss_number_layers && stats_copy[layer_id] != NULL) { *stats_copy[layer_id] = stats[i]; ++stats_copy[layer_id]; } } vp9_init_second_pass_spatial_svc(cpi); } else { int num_frames; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { const size_t psz = cpi->common.MBs * sizeof(uint8_t); const int ps = (int)(oxcf->firstpass_mb_stats_in.sz / psz); cpi->twopass.firstpass_mb_stats.mb_stats_start = oxcf->firstpass_mb_stats_in.buf; cpi->twopass.firstpass_mb_stats.mb_stats_end = cpi->twopass.firstpass_mb_stats.mb_stats_start + (ps - 1) * cpi->common.MBs * sizeof(uint8_t); } #endif cpi->twopass.stats_in_start = oxcf->two_pass_stats_in.buf; cpi->twopass.stats_in = cpi->twopass.stats_in_start; cpi->twopass.stats_in_end = &cpi->twopass.stats_in[packets - 1]; // Note the last packet is cumulative first pass stats. // So the number of frames is packet number minus one num_frames = packets - 1; fps_init_first_pass_info(&cpi->twopass.first_pass_info, oxcf->two_pass_stats_in.buf, num_frames); vp9_init_second_pass(cpi); } } #endif // !CONFIG_REALTIME_ONLY cpi->mb_wiener_var_cols = 0; cpi->mb_wiener_var_rows = 0; cpi->mb_wiener_variance = NULL; vp9_set_speed_features_framesize_independent(cpi, oxcf->speed); vp9_set_speed_features_framesize_dependent(cpi, oxcf->speed); { const int bsize = BLOCK_16X16; const int w = num_8x8_blocks_wide_lookup[bsize]; const int h = num_8x8_blocks_high_lookup[bsize]; const int num_cols = (cm->mi_cols + w - 1) / w; const int num_rows = (cm->mi_rows + h - 1) / h; CHECK_MEM_ERROR(cm, cpi->mi_ssim_rdmult_scaling_factors, vpx_calloc(num_rows * num_cols, sizeof(*cpi->mi_ssim_rdmult_scaling_factors))); } cpi->kmeans_data_arr_alloc = 0; #if CONFIG_NON_GREEDY_MV cpi->tpl_ready = 0; #endif // CONFIG_NON_GREEDY_MV for (i = 0; i < MAX_ARF_GOP_SIZE; ++i) cpi->tpl_stats[i].tpl_stats_ptr = NULL; // Allocate memory to store variances for a frame. CHECK_MEM_ERROR(cm, cpi->source_diff_var, vpx_calloc(cm->MBs, sizeof(diff))); cpi->source_var_thresh = 0; cpi->frames_till_next_var_check = 0; #define BFP(BT, SDF, SDAF, VF, SVF, SVAF, SDX4DF, SDX8F) \ cpi->fn_ptr[BT].sdf = SDF; \ cpi->fn_ptr[BT].sdaf = SDAF; \ cpi->fn_ptr[BT].vf = VF; \ cpi->fn_ptr[BT].svf = SVF; \ cpi->fn_ptr[BT].svaf = SVAF; \ cpi->fn_ptr[BT].sdx4df = SDX4DF; \ cpi->fn_ptr[BT].sdx8f = SDX8F; // TODO(angiebird): make sdx8f available for every block size BFP(BLOCK_32X16, vpx_sad32x16, vpx_sad32x16_avg, vpx_variance32x16, vpx_sub_pixel_variance32x16, vpx_sub_pixel_avg_variance32x16, vpx_sad32x16x4d, NULL) BFP(BLOCK_16X32, vpx_sad16x32, vpx_sad16x32_avg, vpx_variance16x32, vpx_sub_pixel_variance16x32, vpx_sub_pixel_avg_variance16x32, vpx_sad16x32x4d, NULL) BFP(BLOCK_64X32, vpx_sad64x32, vpx_sad64x32_avg, vpx_variance64x32, vpx_sub_pixel_variance64x32, vpx_sub_pixel_avg_variance64x32, vpx_sad64x32x4d, NULL) BFP(BLOCK_32X64, vpx_sad32x64, vpx_sad32x64_avg, vpx_variance32x64, vpx_sub_pixel_variance32x64, vpx_sub_pixel_avg_variance32x64, vpx_sad32x64x4d, NULL) BFP(BLOCK_32X32, vpx_sad32x32, vpx_sad32x32_avg, vpx_variance32x32, vpx_sub_pixel_variance32x32, vpx_sub_pixel_avg_variance32x32, vpx_sad32x32x4d, vpx_sad32x32x8) BFP(BLOCK_64X64, vpx_sad64x64, vpx_sad64x64_avg, vpx_variance64x64, vpx_sub_pixel_variance64x64, vpx_sub_pixel_avg_variance64x64, vpx_sad64x64x4d, NULL) BFP(BLOCK_16X16, vpx_sad16x16, vpx_sad16x16_avg, vpx_variance16x16, vpx_sub_pixel_variance16x16, vpx_sub_pixel_avg_variance16x16, vpx_sad16x16x4d, vpx_sad16x16x8) BFP(BLOCK_16X8, vpx_sad16x8, vpx_sad16x8_avg, vpx_variance16x8, vpx_sub_pixel_variance16x8, vpx_sub_pixel_avg_variance16x8, vpx_sad16x8x4d, vpx_sad16x8x8) BFP(BLOCK_8X16, vpx_sad8x16, vpx_sad8x16_avg, vpx_variance8x16, vpx_sub_pixel_variance8x16, vpx_sub_pixel_avg_variance8x16, vpx_sad8x16x4d, vpx_sad8x16x8) BFP(BLOCK_8X8, vpx_sad8x8, vpx_sad8x8_avg, vpx_variance8x8, vpx_sub_pixel_variance8x8, vpx_sub_pixel_avg_variance8x8, vpx_sad8x8x4d, vpx_sad8x8x8) BFP(BLOCK_8X4, vpx_sad8x4, vpx_sad8x4_avg, vpx_variance8x4, vpx_sub_pixel_variance8x4, vpx_sub_pixel_avg_variance8x4, vpx_sad8x4x4d, NULL) BFP(BLOCK_4X8, vpx_sad4x8, vpx_sad4x8_avg, vpx_variance4x8, vpx_sub_pixel_variance4x8, vpx_sub_pixel_avg_variance4x8, vpx_sad4x8x4d, NULL) BFP(BLOCK_4X4, vpx_sad4x4, vpx_sad4x4_avg, vpx_variance4x4, vpx_sub_pixel_variance4x4, vpx_sub_pixel_avg_variance4x4, vpx_sad4x4x4d, vpx_sad4x4x8) #if CONFIG_VP9_HIGHBITDEPTH highbd_set_var_fns(cpi); #endif /* vp9_init_quantizer() is first called here. Add check in * vp9_frame_init_quantizer() so that vp9_init_quantizer is only * called later when needed. This will avoid unnecessary calls of * vp9_init_quantizer() for every frame. */ vp9_init_quantizer(cpi); vp9_loop_filter_init(cm); // Set up the unit scaling factor used during motion search. #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, cm->width, cm->height, cm->use_highbitdepth); #else vp9_setup_scale_factors_for_frame(&cpi->me_sf, cm->width, cm->height, cm->width, cm->height); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->td.mb.me_sf = &cpi->me_sf; cm->error.setjmp = 0; #if CONFIG_RATE_CTRL encode_command_init(&cpi->encode_command); #endif return cpi; } #if CONFIG_INTERNAL_STATS #define SNPRINT(H, T) snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T)) #define SNPRINT2(H, T, V) \ snprintf((H) + strlen(H), sizeof(H) - strlen(H), (T), (V)) #endif // CONFIG_INTERNAL_STATS static void free_tpl_buffer(VP9_COMP *cpi); void vp9_remove_compressor(VP9_COMP *cpi) { VP9_COMMON *cm; unsigned int i; int t; if (!cpi) return; #if CONFIG_INTERNAL_STATS vpx_free(cpi->ssim_vars); #endif cm = &cpi->common; if (cm->current_video_frame > 0) { #if CONFIG_INTERNAL_STATS vpx_clear_system_state(); if (cpi->oxcf.pass != 1) { char headings[512] = { 0 }; char results[512] = { 0 }; FILE *f = fopen("opsnr.stt", "a"); double time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / 10000000.000; double total_encode_time = (cpi->time_receive_data + cpi->time_compress_data) / 1000.000; const double dr = (double)cpi->bytes * (double)8 / (double)1000 / time_encoded; const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1); const double target_rate = (double)cpi->oxcf.target_bandwidth / 1000; const double rate_err = ((100.0 * (dr - target_rate)) / target_rate); if (cpi->b_calculate_psnr) { const double total_psnr = vpx_sse_to_psnr( (double)cpi->total_samples, peak, (double)cpi->total_sq_error); const double totalp_psnr = vpx_sse_to_psnr( (double)cpi->totalp_samples, peak, (double)cpi->totalp_sq_error); const double total_ssim = 100 * pow(cpi->summed_quality / cpi->summed_weights, 8.0); const double totalp_ssim = 100 * pow(cpi->summedp_quality / cpi->summedp_weights, 8.0); snprintf(headings, sizeof(headings), "Bitrate\tAVGPsnr\tGLBPsnr\tAVPsnrP\tGLPsnrP\t" "VPXSSIM\tVPSSIMP\tFASTSIM\tPSNRHVS\t" "WstPsnr\tWstSsim\tWstFast\tWstHVS\t" "AVPsnrY\tAPsnrCb\tAPsnrCr"); snprintf(results, sizeof(results), "%7.2f\t%7.3f\t%7.3f\t%7.3f\t%7.3f\t" "%7.3f\t%7.3f\t%7.3f\t%7.3f\t" "%7.3f\t%7.3f\t%7.3f\t%7.3f\t" "%7.3f\t%7.3f\t%7.3f", dr, cpi->psnr.stat[ALL] / cpi->count, total_psnr, cpi->psnrp.stat[ALL] / cpi->count, totalp_psnr, total_ssim, totalp_ssim, cpi->fastssim.stat[ALL] / cpi->count, cpi->psnrhvs.stat[ALL] / cpi->count, cpi->psnr.worst, cpi->worst_ssim, cpi->fastssim.worst, cpi->psnrhvs.worst, cpi->psnr.stat[Y] / cpi->count, cpi->psnr.stat[U] / cpi->count, cpi->psnr.stat[V] / cpi->count); if (cpi->b_calculate_blockiness) { SNPRINT(headings, "\t Block\tWstBlck"); SNPRINT2(results, "\t%7.3f", cpi->total_blockiness / cpi->count); SNPRINT2(results, "\t%7.3f", cpi->worst_blockiness); } if (cpi->b_calculate_consistency) { double consistency = vpx_sse_to_psnr((double)cpi->totalp_samples, peak, (double)cpi->total_inconsistency); SNPRINT(headings, "\tConsist\tWstCons"); SNPRINT2(results, "\t%7.3f", consistency); SNPRINT2(results, "\t%7.3f", cpi->worst_consistency); } SNPRINT(headings, "\t Time\tRcErr\tAbsErr"); SNPRINT2(results, "\t%8.0f", total_encode_time); SNPRINT2(results, "\t%7.2f", rate_err); SNPRINT2(results, "\t%7.2f", fabs(rate_err)); fprintf(f, "%s\tAPsnr611\n", headings); fprintf( f, "%s\t%7.3f\n", results, (6 * cpi->psnr.stat[Y] + cpi->psnr.stat[U] + cpi->psnr.stat[V]) / (cpi->count * 8)); } fclose(f); } #endif #if 0 { printf("\n_pick_loop_filter_level:%d\n", cpi->time_pick_lpf / 1000); printf("\n_frames recive_data encod_mb_row compress_frame Total\n"); printf("%6d %10ld %10ld %10ld %10ld\n", cpi->common.current_video_frame, cpi->time_receive_data / 1000, cpi->time_encode_sb_row / 1000, cpi->time_compress_data / 1000, (cpi->time_receive_data + cpi->time_compress_data) / 1000); } #endif } #if CONFIG_VP9_TEMPORAL_DENOISING vp9_denoiser_free(&(cpi->denoiser)); #endif if (cpi->kmeans_data_arr_alloc) { #if CONFIG_MULTITHREAD pthread_mutex_destroy(&cpi->kmeans_mutex); #endif vpx_free(cpi->kmeans_data_arr); } free_tpl_buffer(cpi); for (t = 0; t < cpi->num_workers; ++t) { VPxWorker *const worker = &cpi->workers[t]; EncWorkerData *const thread_data = &cpi->tile_thr_data[t]; // Deallocate allocated threads. vpx_get_worker_interface()->end(worker); // Deallocate allocated thread data. if (t < cpi->num_workers - 1) { vpx_free(thread_data->td->counts); vp9_free_pc_tree(thread_data->td); vpx_free(thread_data->td); } } vpx_free(cpi->tile_thr_data); vpx_free(cpi->workers); vp9_row_mt_mem_dealloc(cpi); if (cpi->num_workers > 1) { vp9_loop_filter_dealloc(&cpi->lf_row_sync); vp9_bitstream_encode_tiles_buffer_dealloc(cpi); } #if !CONFIG_REALTIME_ONLY vp9_alt_ref_aq_destroy(cpi->alt_ref_aq); #endif dealloc_compressor_data(cpi); for (i = 0; i < sizeof(cpi->mbgraph_stats) / sizeof(cpi->mbgraph_stats[0]); ++i) { vpx_free(cpi->mbgraph_stats[i].mb_stats); } #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { vpx_free(cpi->twopass.frame_mb_stats_buf); cpi->twopass.frame_mb_stats_buf = NULL; } #endif vp9_remove_common(cm); vp9_free_ref_frame_buffers(cm->buffer_pool); #if CONFIG_VP9_POSTPROC vp9_free_postproc_buffers(cm); #endif vpx_free(cpi); #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED fclose(yuv_denoised_file); #endif #endif #ifdef OUTPUT_YUV_SKINMAP fclose(yuv_skinmap_file); #endif #ifdef OUTPUT_YUV_REC fclose(yuv_rec_file); #endif #ifdef OUTPUT_YUV_SVC_SRC fclose(yuv_svc_src[0]); fclose(yuv_svc_src[1]); fclose(yuv_svc_src[2]); #endif #if 0 if (keyfile) fclose(keyfile); if (framepsnr) fclose(framepsnr); if (kf_list) fclose(kf_list); #endif } int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr) { if (is_psnr_calc_enabled(cpi)) { #if CONFIG_VP9_HIGHBITDEPTH vpx_calc_highbd_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr, cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else vpx_calc_psnr(cpi->raw_source_frame, cpi->common.frame_to_show, psnr); #endif return 1; } else { vp9_zero(*psnr); return 0; } } int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags) { if (ref_frame_flags > 7) return -1; cpi->ref_frame_flags = ref_frame_flags; return 0; } void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags) { cpi->ext_refresh_golden_frame = (ref_frame_flags & VP9_GOLD_FLAG) != 0; cpi->ext_refresh_alt_ref_frame = (ref_frame_flags & VP9_ALT_FLAG) != 0; cpi->ext_refresh_last_frame = (ref_frame_flags & VP9_LAST_FLAG) != 0; cpi->ext_refresh_frame_flags_pending = 1; } static YV12_BUFFER_CONFIG *get_vp9_ref_frame_buffer( VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag) { MV_REFERENCE_FRAME ref_frame = NONE; if (ref_frame_flag == VP9_LAST_FLAG) ref_frame = LAST_FRAME; else if (ref_frame_flag == VP9_GOLD_FLAG) ref_frame = GOLDEN_FRAME; else if (ref_frame_flag == VP9_ALT_FLAG) ref_frame = ALTREF_FRAME; return ref_frame == NONE ? NULL : get_ref_frame_buffer(cpi, ref_frame); } int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); if (cfg) { vpx_yv12_copy_frame(cfg, sd); return 0; } else { return -1; } } int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd) { YV12_BUFFER_CONFIG *cfg = get_vp9_ref_frame_buffer(cpi, ref_frame_flag); if (cfg) { vpx_yv12_copy_frame(sd, cfg); return 0; } else { return -1; } } int vp9_update_entropy(VP9_COMP *cpi, int update) { cpi->ext_refresh_frame_context = update; cpi->ext_refresh_frame_context_pending = 1; return 0; } #ifdef OUTPUT_YUV_REC void vp9_write_yuv_rec_frame(VP9_COMMON *cm) { YV12_BUFFER_CONFIG *s = cm->frame_to_show; uint8_t *src = s->y_buffer; int h = cm->height; #if CONFIG_VP9_HIGHBITDEPTH if (s->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *src16 = CONVERT_TO_SHORTPTR(s->y_buffer); do { fwrite(src16, s->y_width, 2, yuv_rec_file); src16 += s->y_stride; } while (--h); src16 = CONVERT_TO_SHORTPTR(s->u_buffer); h = s->uv_height; do { fwrite(src16, s->uv_width, 2, yuv_rec_file); src16 += s->uv_stride; } while (--h); src16 = CONVERT_TO_SHORTPTR(s->v_buffer); h = s->uv_height; do { fwrite(src16, s->uv_width, 2, yuv_rec_file); src16 += s->uv_stride; } while (--h); fflush(yuv_rec_file); return; } #endif // CONFIG_VP9_HIGHBITDEPTH do { fwrite(src, s->y_width, 1, yuv_rec_file); src += s->y_stride; } while (--h); src = s->u_buffer; h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); src += s->uv_stride; } while (--h); src = s->v_buffer; h = s->uv_height; do { fwrite(src, s->uv_width, 1, yuv_rec_file); src += s->uv_stride; } while (--h); fflush(yuv_rec_file); } #endif #if CONFIG_VP9_HIGHBITDEPTH static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int bd) { #else static void scale_and_extend_frame_nonnormative(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { #endif // CONFIG_VP9_HIGHBITDEPTH // TODO(dkovalev): replace YV12_BUFFER_CONFIG with vpx_image_t int i; const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; const int src_widths[3] = { src->y_crop_width, src->uv_crop_width, src->uv_crop_width }; const int src_heights[3] = { src->y_crop_height, src->uv_crop_height, src->uv_crop_height }; uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; const int dst_widths[3] = { dst->y_crop_width, dst->uv_crop_width, dst->uv_crop_width }; const int dst_heights[3] = { dst->y_crop_height, dst->uv_crop_height, dst->uv_crop_height }; for (i = 0; i < MAX_MB_PLANE; ++i) { #if CONFIG_VP9_HIGHBITDEPTH if (src->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], dsts[i], dst_heights[i], dst_widths[i], dst_strides[i], bd); } else { vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); } #else vp9_resize_plane(srcs[i], src_heights[i], src_widths[i], src_strides[i], dsts[i], dst_heights[i], dst_widths[i], dst_strides[i]); #endif // CONFIG_VP9_HIGHBITDEPTH } vpx_extend_frame_borders(dst); } #if CONFIG_VP9_HIGHBITDEPTH static void scale_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int bd, INTERP_FILTER filter_type, int phase_scaler) { const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const int dst_w = dst->y_crop_width; const int dst_h = dst->y_crop_height; const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; const InterpKernel *const kernel = vp9_filter_kernels[filter_type]; int x, y, i; for (i = 0; i < MAX_MB_PLANE; ++i) { const int factor = (i == 0 || i == 3 ? 1 : 2); const int src_stride = src_strides[i]; const int dst_stride = dst_strides[i]; for (y = 0; y < dst_h; y += 16) { const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler; for (x = 0; x < dst_w; x += 16) { const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler; const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * src_stride + (x / factor) * src_w / dst_w; uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); if (src->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_convolve8(CONVERT_TO_SHORTPTR(src_ptr), src_stride, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, kernel, x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf, 16 * src_h / dst_h, 16 / factor, 16 / factor, bd); } else { vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel, x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf, 16 * src_h / dst_h, 16 / factor, 16 / factor); } } } } vpx_extend_frame_borders(dst); } #endif // CONFIG_VP9_HIGHBITDEPTH #if !CONFIG_REALTIME_ONLY static int scale_down(VP9_COMP *cpi, int q) { RATE_CONTROL *const rc = &cpi->rc; GF_GROUP *const gf_group = &cpi->twopass.gf_group; int scale = 0; assert(frame_is_kf_gf_arf(cpi)); if (rc->frame_size_selector == UNSCALED && q >= rc->rf_level_maxq[gf_group->rf_level[gf_group->index]]) { const int max_size_thresh = (int)(rate_thresh_mult[SCALE_STEP1] * VPXMAX(rc->this_frame_target, rc->avg_frame_bandwidth)); scale = rc->projected_frame_size > max_size_thresh ? 1 : 0; } return scale; } static int big_rate_miss_high_threshold(VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; int big_miss_high; if (frame_is_kf_gf_arf(cpi)) big_miss_high = rc->this_frame_target * 3 / 2; else big_miss_high = rc->this_frame_target * 2; return big_miss_high; } static int big_rate_miss(VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; int big_miss_high; int big_miss_low; // Ignore for overlay frames if (rc->is_src_frame_alt_ref) { return 0; } else { big_miss_low = (rc->this_frame_target / 2); big_miss_high = big_rate_miss_high_threshold(cpi); return (rc->projected_frame_size > big_miss_high) || (rc->projected_frame_size < big_miss_low); } } // test in two pass for the first static int two_pass_first_group_inter(VP9_COMP *cpi) { if (cpi->oxcf.pass == 2) { TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; const int gfg_index = gf_group->index; if (gfg_index == 0) return gf_group->update_type[gfg_index] == LF_UPDATE; return gf_group->update_type[gfg_index - 1] != LF_UPDATE && gf_group->update_type[gfg_index] == LF_UPDATE; } else { return 0; } } // Function to test for conditions that indicate we should loop // back and recode a frame. static int recode_loop_test(VP9_COMP *cpi, int high_limit, int low_limit, int q, int maxq, int minq) { const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int frame_is_kfgfarf = frame_is_kf_gf_arf(cpi); int force_recode = 0; if ((rc->projected_frame_size >= rc->max_frame_bandwidth) || big_rate_miss(cpi) || (cpi->sf.recode_loop == ALLOW_RECODE) || (two_pass_first_group_inter(cpi) && (cpi->sf.recode_loop == ALLOW_RECODE_FIRST)) || (frame_is_kfgfarf && (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF))) { if (frame_is_kfgfarf && (oxcf->resize_mode == RESIZE_DYNAMIC) && scale_down(cpi, q)) { // Code this group at a lower resolution. cpi->resize_pending = 1; return 1; } // Force recode for extreme overshoot. if ((rc->projected_frame_size >= rc->max_frame_bandwidth) || (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF && rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))) { return 1; } // TODO(agrange) high_limit could be greater than the scale-down threshold. if ((rc->projected_frame_size > high_limit && q < maxq) || (rc->projected_frame_size < low_limit && q > minq)) { force_recode = 1; } else if (cpi->oxcf.rc_mode == VPX_CQ) { // Deal with frame undershoot and whether or not we are // below the automatically set cq level. if (q > oxcf->cq_level && rc->projected_frame_size < ((rc->this_frame_target * 7) >> 3)) { force_recode = 1; } } } return force_recode; } #endif // !CONFIG_REALTIME_ONLY static void update_ref_frames(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; GF_GROUP *const gf_group = &cpi->twopass.gf_group; if (cpi->rc.show_arf_as_gld) { int tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; } else if (cm->show_existing_frame) { // Pop ARF. cpi->lst_fb_idx = cpi->alt_fb_idx; cpi->alt_fb_idx = stack_pop(gf_group->arf_index_stack, gf_group->stack_size); --gf_group->stack_size; } // At this point the new frame has been encoded. // If any buffer copy / swapping is signaled it should be done here. if (cm->frame_type == KEY_FRAME) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); } else if (vp9_preserve_existing_gf(cpi)) { // We have decided to preserve the previously existing golden frame as our // new ARF frame. However, in the short term in function // vp9_get_refresh_mask() we left it in the GF slot and, if // we're updating the GF with the current decoded frame, we save it to the // ARF slot instead. // We now have to update the ARF with the current frame and swap gld_fb_idx // and alt_fb_idx so that, overall, we've stored the old GF in the new ARF // slot and, if we're updating the GF, the current frame becomes the new GF. int tmp; ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->alt_fb_idx], cm->new_fb_idx); tmp = cpi->alt_fb_idx; cpi->alt_fb_idx = cpi->gld_fb_idx; cpi->gld_fb_idx = tmp; } else { /* For non key/golden frames */ if (cpi->refresh_alt_ref_frame) { int arf_idx = gf_group->top_arf_idx; // Push new ARF into stack. stack_push(gf_group->arf_index_stack, cpi->alt_fb_idx, gf_group->stack_size); ++gf_group->stack_size; assert(arf_idx < REF_FRAMES); ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[arf_idx], cm->new_fb_idx); memcpy(cpi->interp_filter_selected[ALTREF_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); cpi->alt_fb_idx = arf_idx; } if (cpi->refresh_golden_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->new_fb_idx); if (!cpi->rc.is_src_frame_alt_ref) memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); else memcpy(cpi->interp_filter_selected[GOLDEN_FRAME], cpi->interp_filter_selected[ALTREF_FRAME], sizeof(cpi->interp_filter_selected[ALTREF_FRAME])); } } if (cpi->refresh_last_frame) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); if (!cpi->rc.is_src_frame_alt_ref) memcpy(cpi->interp_filter_selected[LAST_FRAME], cpi->interp_filter_selected[0], sizeof(cpi->interp_filter_selected[0])); } if (gf_group->update_type[gf_group->index] == MID_OVERLAY_UPDATE) { cpi->alt_fb_idx = stack_pop(gf_group->arf_index_stack, gf_group->stack_size); --gf_group->stack_size; } } void vp9_update_reference_frames(VP9_COMP *cpi) { update_ref_frames(cpi); #if CONFIG_VP9_TEMPORAL_DENOISING vp9_denoiser_update_ref_frame(cpi); #endif if (is_one_pass_cbr_svc(cpi)) vp9_svc_update_ref_frame(cpi); } static void loopfilter_frame(VP9_COMP *cpi, VP9_COMMON *cm) { MACROBLOCKD *xd = &cpi->td.mb.e_mbd; struct loopfilter *lf = &cm->lf; int is_reference_frame = (cm->frame_type == KEY_FRAME || cpi->refresh_last_frame || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); if (cpi->use_svc && cpi->svc.temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) is_reference_frame = !cpi->svc.non_reference_frame; // Skip loop filter in show_existing_frame mode. if (cm->show_existing_frame) { lf->filter_level = 0; return; } if (xd->lossless) { lf->filter_level = 0; lf->last_filt_level = 0; } else { struct vpx_usec_timer timer; vpx_clear_system_state(); vpx_usec_timer_start(&timer); if (!cpi->rc.is_src_frame_alt_ref) { if ((cpi->common.frame_type == KEY_FRAME) && (!cpi->rc.this_key_frame_forced)) { lf->last_filt_level = 0; } vp9_pick_filter_level(cpi->Source, cpi, cpi->sf.lpf_pick); lf->last_filt_level = lf->filter_level; } else { lf->filter_level = 0; } vpx_usec_timer_mark(&timer); cpi->time_pick_lpf += vpx_usec_timer_elapsed(&timer); } if (lf->filter_level > 0 && is_reference_frame) { vp9_build_mask_frame(cm, lf->filter_level, 0); if (cpi->num_workers > 1) vp9_loop_filter_frame_mt(cm->frame_to_show, cm, xd->plane, lf->filter_level, 0, 0, cpi->workers, cpi->num_workers, &cpi->lf_row_sync); else vp9_loop_filter_frame(cm->frame_to_show, cm, xd, lf->filter_level, 0, 0); } vpx_extend_frame_inner_borders(cm->frame_to_show); } static INLINE void alloc_frame_mvs(VP9_COMMON *const cm, int buffer_idx) { RefCntBuffer *const new_fb_ptr = &cm->buffer_pool->frame_bufs[buffer_idx]; if (new_fb_ptr->mvs == NULL || new_fb_ptr->mi_rows < cm->mi_rows || new_fb_ptr->mi_cols < cm->mi_cols) { vpx_free(new_fb_ptr->mvs); CHECK_MEM_ERROR(cm, new_fb_ptr->mvs, (MV_REF *)vpx_calloc(cm->mi_rows * cm->mi_cols, sizeof(*new_fb_ptr->mvs))); new_fb_ptr->mi_rows = cm->mi_rows; new_fb_ptr->mi_cols = cm->mi_cols; } } void vp9_scale_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; MV_REFERENCE_FRAME ref_frame; const VP9_REFFRAME ref_mask[3] = { VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { // Need to convert from VP9_REFFRAME to index into ref_mask (subtract 1). if (cpi->ref_frame_flags & ref_mask[ref_frame - 1]) { BufferPool *const pool = cm->buffer_pool; const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, ref_frame); if (ref == NULL) { cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; continue; } #if CONFIG_VP9_HIGHBITDEPTH if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { RefCntBuffer *new_fb_ptr = NULL; int force_scaling = 0; int new_fb = cpi->scaled_ref_idx[ref_frame - 1]; if (new_fb == INVALID_IDX) { new_fb = get_free_fb(cm); force_scaling = 1; } if (new_fb == INVALID_IDX) return; new_fb_ptr = &pool->frame_bufs[new_fb]; if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || new_fb_ptr->buf.y_crop_height != cm->height) { if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, cm->use_highbitdepth, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); scale_and_extend_frame(ref, &new_fb_ptr->buf, (int)cm->bit_depth, EIGHTTAP, 0); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); } #else if (ref->y_crop_width != cm->width || ref->y_crop_height != cm->height) { RefCntBuffer *new_fb_ptr = NULL; int force_scaling = 0; int new_fb = cpi->scaled_ref_idx[ref_frame - 1]; if (new_fb == INVALID_IDX) { new_fb = get_free_fb(cm); force_scaling = 1; } if (new_fb == INVALID_IDX) return; new_fb_ptr = &pool->frame_bufs[new_fb]; if (force_scaling || new_fb_ptr->buf.y_crop_width != cm->width || new_fb_ptr->buf.y_crop_height != cm->height) { if (vpx_realloc_frame_buffer(&new_fb_ptr->buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); vp9_scale_and_extend_frame(ref, &new_fb_ptr->buf, EIGHTTAP, 0); cpi->scaled_ref_idx[ref_frame - 1] = new_fb; alloc_frame_mvs(cm, new_fb); } #endif // CONFIG_VP9_HIGHBITDEPTH } else { int buf_idx; RefCntBuffer *buf = NULL; if (cpi->oxcf.pass == 0 && !cpi->use_svc) { // Check for release of scaled reference. buf_idx = cpi->scaled_ref_idx[ref_frame - 1]; if (buf_idx != INVALID_IDX) { buf = &pool->frame_bufs[buf_idx]; --buf->ref_count; cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; } } buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); buf = &pool->frame_bufs[buf_idx]; buf->buf.y_crop_width = ref->y_crop_width; buf->buf.y_crop_height = ref->y_crop_height; cpi->scaled_ref_idx[ref_frame - 1] = buf_idx; ++buf->ref_count; } } else { if (cpi->oxcf.pass != 0 || cpi->use_svc) cpi->scaled_ref_idx[ref_frame - 1] = INVALID_IDX; } } } static void release_scaled_references(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int i; if (cpi->oxcf.pass == 0 && !cpi->use_svc) { // Only release scaled references under certain conditions: // if reference will be updated, or if scaled reference has same resolution. int refresh[3]; refresh[0] = (cpi->refresh_last_frame) ? 1 : 0; refresh[1] = (cpi->refresh_golden_frame) ? 1 : 0; refresh[2] = (cpi->refresh_alt_ref_frame) ? 1 : 0; for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i - 1]; if (idx != INVALID_IDX) { RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; const YV12_BUFFER_CONFIG *const ref = get_ref_frame_buffer(cpi, i); if (refresh[i - 1] || (buf->buf.y_crop_width == ref->y_crop_width && buf->buf.y_crop_height == ref->y_crop_height)) { --buf->ref_count; cpi->scaled_ref_idx[i - 1] = INVALID_IDX; } } } } else { for (i = 0; i < REFS_PER_FRAME; ++i) { const int idx = cpi->scaled_ref_idx[i]; if (idx != INVALID_IDX) { RefCntBuffer *const buf = &cm->buffer_pool->frame_bufs[idx]; --buf->ref_count; cpi->scaled_ref_idx[i] = INVALID_IDX; } } } } static void full_to_model_count(unsigned int *model_count, unsigned int *full_count) { int n; model_count[ZERO_TOKEN] = full_count[ZERO_TOKEN]; model_count[ONE_TOKEN] = full_count[ONE_TOKEN]; model_count[TWO_TOKEN] = full_count[TWO_TOKEN]; for (n = THREE_TOKEN; n < EOB_TOKEN; ++n) model_count[TWO_TOKEN] += full_count[n]; model_count[EOB_MODEL_TOKEN] = full_count[EOB_TOKEN]; } static void full_to_model_counts(vp9_coeff_count_model *model_count, vp9_coeff_count *full_count) { int i, j, k, l; for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) full_to_model_count(model_count[i][j][k][l], full_count[i][j][k][l]); } #if 0 && CONFIG_INTERNAL_STATS static void output_frame_level_debug_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; FILE *const f = fopen("tmp.stt", cm->current_video_frame ? "a" : "w"); int64_t recon_err; vpx_clear_system_state(); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { recon_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } else { recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } #else recon_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); #endif // CONFIG_VP9_HIGHBITDEPTH if (cpi->twopass.total_left_stats.coded_error != 0.0) { double dc_quant_devisor; #if CONFIG_VP9_HIGHBITDEPTH switch (cm->bit_depth) { case VPX_BITS_8: dc_quant_devisor = 4.0; break; case VPX_BITS_10: dc_quant_devisor = 16.0; break; default: assert(cm->bit_depth == VPX_BITS_12); dc_quant_devisor = 64.0; break; } #else dc_quant_devisor = 4.0; #endif if (!cm->current_video_frame) { fprintf(f, "frame, width, height, last ts, last end ts, " "source_alt_ref_pending, source_alt_ref_active, " "this_frame_target, projected_frame_size, " "projected_frame_size / MBs, " "projected_frame_size - this_frame_target, " "vbr_bits_off_target, vbr_bits_off_target_fast, " "twopass.extend_minq, twopass.extend_minq_fast, " "total_target_vs_actual, " "starting_buffer_level - bits_off_target, " "total_actual_bits, base_qindex, q for base_qindex, " "dc quant, q for active_worst_quality, avg_q, q for oxcf.cq_level, " "refresh_last_frame, refresh_golden_frame, refresh_alt_ref_frame, " "frame_type, gfu_boost, " "twopass.bits_left, " "twopass.total_left_stats.coded_error, " "twopass.bits_left / (1 + twopass.total_left_stats.coded_error), " "tot_recode_hits, recon_err, kf_boost, " "twopass.kf_zeromotion_pct, twopass.fr_content_type, " "filter_level, seg.aq_av_offset\n"); } fprintf(f, "%10u, %d, %d, %10"PRId64", %10"PRId64", %d, %d, %10d, %10d, " "%10d, %10d, %10"PRId64", %10"PRId64", %5d, %5d, %10"PRId64", " "%10"PRId64", %10"PRId64", %10d, %7.2lf, %7.2lf, %7.2lf, %7.2lf, " "%7.2lf, %6d, %6d, %5d, %5d, %5d, %10"PRId64", %10.3lf, %10lf, %8u, " "%10"PRId64", %10d, %10d, %10d, %10d, %10d\n", cpi->common.current_video_frame, cm->width, cm->height, cpi->last_time_stamp_seen, cpi->last_end_time_stamp_seen, cpi->rc.source_alt_ref_pending, cpi->rc.source_alt_ref_active, cpi->rc.this_frame_target, cpi->rc.projected_frame_size, cpi->rc.projected_frame_size / cpi->common.MBs, (cpi->rc.projected_frame_size - cpi->rc.this_frame_target), cpi->rc.vbr_bits_off_target, cpi->rc.vbr_bits_off_target_fast, cpi->twopass.extend_minq, cpi->twopass.extend_minq_fast, cpi->rc.total_target_vs_actual, (cpi->rc.starting_buffer_level - cpi->rc.bits_off_target), cpi->rc.total_actual_bits, cm->base_qindex, vp9_convert_qindex_to_q(cm->base_qindex, cm->bit_depth), (double)vp9_dc_quant(cm->base_qindex, 0, cm->bit_depth) / dc_quant_devisor, vp9_convert_qindex_to_q(cpi->twopass.active_worst_quality, cm->bit_depth), cpi->rc.avg_q, vp9_convert_qindex_to_q(cpi->oxcf.cq_level, cm->bit_depth), cpi->refresh_last_frame, cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame, cm->frame_type, cpi->rc.gfu_boost, cpi->twopass.bits_left, cpi->twopass.total_left_stats.coded_error, cpi->twopass.bits_left / (1 + cpi->twopass.total_left_stats.coded_error), cpi->tot_recode_hits, recon_err, cpi->rc.kf_boost, cpi->twopass.kf_zeromotion_pct, cpi->twopass.fr_content_type, cm->lf.filter_level, cm->seg.aq_av_offset); } fclose(f); if (0) { FILE *const fmodes = fopen("Modes.stt", "a"); int i; fprintf(fmodes, "%6d:%1d:%1d:%1d ", cpi->common.current_video_frame, cm->frame_type, cpi->refresh_golden_frame, cpi->refresh_alt_ref_frame); for (i = 0; i < MAX_MODES; ++i) fprintf(fmodes, "%5d ", cpi->mode_chosen_counts[i]); fprintf(fmodes, "\n"); fclose(fmodes); } } #endif static void set_mv_search_params(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const unsigned int max_mv_def = VPXMIN(cm->width, cm->height); // Default based on max resolution. cpi->mv_step_param = vp9_init_search_range(max_mv_def); if (cpi->sf.mv.auto_mv_step_size) { if (frame_is_intra_only(cm)) { // Initialize max_mv_magnitude for use in the first INTER frame // after a key/intra-only frame. cpi->max_mv_magnitude = max_mv_def; } else { if (cm->show_frame) { // Allow mv_steps to correspond to twice the max mv magnitude found // in the previous frame, capped by the default max_mv_magnitude based // on resolution. cpi->mv_step_param = vp9_init_search_range( VPXMIN(max_mv_def, 2 * cpi->max_mv_magnitude)); } cpi->max_mv_magnitude = 0; } } } static void set_size_independent_vars(VP9_COMP *cpi) { vp9_set_speed_features_framesize_independent(cpi, cpi->oxcf.speed); vp9_set_rd_speed_thresholds(cpi); vp9_set_rd_speed_thresholds_sub8x8(cpi); cpi->common.interp_filter = cpi->sf.default_interp_filter; } static void set_size_dependent_vars(VP9_COMP *cpi, int *q, int *bottom_index, int *top_index) { VP9_COMMON *const cm = &cpi->common; // Setup variables that depend on the dimensions of the frame. vp9_set_speed_features_framesize_dependent(cpi, cpi->oxcf.speed); // Decide q and q bounds. *q = vp9_rc_pick_q_and_bounds(cpi, bottom_index, top_index); if (cpi->oxcf.rc_mode == VPX_CBR && cpi->rc.force_max_q) { *q = cpi->rc.worst_quality; cpi->rc.force_max_q = 0; } if (!frame_is_intra_only(cm)) { vp9_set_high_precision_mv(cpi, (*q) < HIGH_PRECISION_MV_QTHRESH); } #if !CONFIG_REALTIME_ONLY // Configure experimental use of segmentation for enhanced coding of // static regions if indicated. // Only allowed in the second pass of a two pass encode, as it requires // lagged coding, and if the relevant speed feature flag is set. if (cpi->oxcf.pass == 2 && cpi->sf.static_segmentation) configure_static_seg_features(cpi); #endif // !CONFIG_REALTIME_ONLY #if CONFIG_VP9_POSTPROC && !(CONFIG_VP9_TEMPORAL_DENOISING) if (cpi->oxcf.noise_sensitivity > 0) { int l = 0; switch (cpi->oxcf.noise_sensitivity) { case 1: l = 20; break; case 2: l = 40; break; case 3: l = 60; break; case 4: case 5: l = 100; break; case 6: l = 150; break; } if (!cpi->common.postproc_state.limits) { cpi->common.postproc_state.limits = vpx_calloc(cpi->un_scaled_source->y_width, sizeof(*cpi->common.postproc_state.limits)); } vp9_denoise(&cpi->common, cpi->Source, cpi->Source, l, cpi->common.postproc_state.limits); } #endif // CONFIG_VP9_POSTPROC } static void init_motion_estimation(VP9_COMP *cpi) { int y_stride = cpi->scaled_source.y_stride; if (cpi->sf.mv.search_method == NSTEP) { vp9_init3smotion_compensation(&cpi->ss_cfg, y_stride); } else if (cpi->sf.mv.search_method == DIAMOND) { vp9_init_dsmotion_compensation(&cpi->ss_cfg, y_stride); } } static void set_frame_size(VP9_COMP *cpi) { int ref_frame; VP9_COMMON *const cm = &cpi->common; VP9EncoderConfig *const oxcf = &cpi->oxcf; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; #if !CONFIG_REALTIME_ONLY if (oxcf->pass == 2 && oxcf->rc_mode == VPX_VBR && ((oxcf->resize_mode == RESIZE_FIXED && cm->current_video_frame == 0) || (oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending))) { calculate_coded_size(cpi, &oxcf->scaled_frame_width, &oxcf->scaled_frame_height); // There has been a change in frame size. vp9_set_size_literal(cpi, oxcf->scaled_frame_width, oxcf->scaled_frame_height); } #endif // !CONFIG_REALTIME_ONLY if (oxcf->pass == 0 && oxcf->rc_mode == VPX_CBR && !cpi->use_svc && oxcf->resize_mode == RESIZE_DYNAMIC && cpi->resize_pending != 0) { oxcf->scaled_frame_width = (oxcf->width * cpi->resize_scale_num) / cpi->resize_scale_den; oxcf->scaled_frame_height = (oxcf->height * cpi->resize_scale_num) / cpi->resize_scale_den; // There has been a change in frame size. vp9_set_size_literal(cpi, oxcf->scaled_frame_width, oxcf->scaled_frame_height); // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. set_mv_search_params(cpi); vp9_noise_estimate_init(&cpi->noise_estimate, cm->width, cm->height); #if CONFIG_VP9_TEMPORAL_DENOISING // Reset the denoiser on the resized frame. if (cpi->oxcf.noise_sensitivity > 0) { vp9_denoiser_free(&(cpi->denoiser)); setup_denoiser_buffer(cpi); // Dynamic resize is only triggered for non-SVC, so we can force // golden frame update here as temporary fix to denoiser. cpi->refresh_golden_frame = 1; } #endif } if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_set_target_rate(cpi); } alloc_frame_mvs(cm, cm->new_fb_idx); // Reset the frame pointers to the current frame size. if (vpx_realloc_frame_buffer(get_frame_new_buffer(cm), cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); alloc_util_frame_buffers(cpi); init_motion_estimation(cpi); for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { RefBuffer *const ref_buf = &cm->frame_refs[ref_frame - 1]; const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); ref_buf->idx = buf_idx; if (buf_idx != INVALID_IDX) { YV12_BUFFER_CONFIG *const buf = &cm->buffer_pool->frame_bufs[buf_idx].buf; ref_buf->buf = buf; #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame( &ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width, cm->height, (buf->flags & YV12_FLAG_HIGHBITDEPTH) ? 1 : 0); #else vp9_setup_scale_factors_for_frame(&ref_buf->sf, buf->y_crop_width, buf->y_crop_height, cm->width, cm->height); #endif // CONFIG_VP9_HIGHBITDEPTH if (vp9_is_scaled(&ref_buf->sf)) vpx_extend_frame_borders(buf); } else { ref_buf->buf = NULL; } } set_ref_ptrs(cm, xd, LAST_FRAME, LAST_FRAME); } #if CONFIG_CONSISTENT_RECODE static void save_encode_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_col, tile_row; int i, j; RD_OPT *rd_opt = &cpi->rd; for (i = 0; i < MAX_REF_FRAMES; i++) { for (j = 0; j < REFERENCE_MODES; j++) rd_opt->prediction_type_threshes_prev[i][j] = rd_opt->prediction_type_threshes[i][j]; for (j = 0; j < SWITCHABLE_FILTER_CONTEXTS; j++) rd_opt->filter_threshes_prev[i][j] = rd_opt->filter_threshes[i][j]; } if (cpi->tile_data != NULL) { for (tile_row = 0; tile_row < tile_rows; ++tile_row) for (tile_col = 0; tile_col < tile_cols; ++tile_col) { TileDataEnc *tile_data = &cpi->tile_data[tile_row * tile_cols + tile_col]; for (i = 0; i < BLOCK_SIZES; ++i) { for (j = 0; j < MAX_MODES; ++j) { tile_data->thresh_freq_fact_prev[i][j] = tile_data->thresh_freq_fact[i][j]; } } } } } #endif static INLINE void set_raw_source_frame(VP9_COMP *cpi) { #ifdef ENABLE_KF_DENOISE if (is_spatial_denoise_enabled(cpi)) { cpi->raw_source_frame = vp9_scale_if_required( cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source, (oxcf->pass == 0), EIGHTTAP, 0); } else { cpi->raw_source_frame = cpi->Source; } #else cpi->raw_source_frame = cpi->Source; #endif } static int encode_without_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; int q = 0, bottom_index = 0, top_index = 0; int no_drop_scene_change = 0; const INTERP_FILTER filter_scaler = (is_one_pass_cbr_svc(cpi)) ? svc->downsample_filter_type[svc->spatial_layer_id] : EIGHTTAP; const int phase_scaler = (is_one_pass_cbr_svc(cpi)) ? svc->downsample_filter_phase[svc->spatial_layer_id] : 0; if (cm->show_existing_frame) { cpi->rc.this_frame_target = 0; if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi); return 1; } svc->time_stamp_prev[svc->spatial_layer_id] = svc->time_stamp_superframe; // Flag to check if its valid to compute the source sad (used for // scene detection and for superblock content state in CBR mode). // The flag may get reset below based on SVC or resizing state. cpi->compute_source_sad_onepass = cpi->oxcf.mode == REALTIME; vpx_clear_system_state(); set_frame_size(cpi); if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 2 && cpi->un_scaled_source->y_height == cm->height << 2 && svc->scaled_temp.y_width == cm->width << 1 && svc->scaled_temp.y_height == cm->height << 1) { // For svc, if it is a 1/4x1/4 downscaling, do a two-stage scaling to take // advantage of the 1:2 optimized scaler. In the process, the 1/2x1/2 // result will be saved in scaled_temp and might be used later. const INTERP_FILTER filter_scaler2 = svc->downsample_filter_type[1]; const int phase_scaler2 = svc->downsample_filter_phase[1]; cpi->Source = vp9_svc_twostage_scale( cm, cpi->un_scaled_source, &cpi->scaled_source, &svc->scaled_temp, filter_scaler, phase_scaler, filter_scaler2, phase_scaler2); svc->scaled_one_half = 1; } else if (is_one_pass_cbr_svc(cpi) && cpi->un_scaled_source->y_width == cm->width << 1 && cpi->un_scaled_source->y_height == cm->height << 1 && svc->scaled_one_half) { // If the spatial layer is 1/2x1/2 and the scaling is already done in the // two-stage scaling, use the result directly. cpi->Source = &svc->scaled_temp; svc->scaled_one_half = 0; } else { cpi->Source = vp9_scale_if_required( cm, cpi->un_scaled_source, &cpi->scaled_source, (cpi->oxcf.pass == 0), filter_scaler, phase_scaler); } #ifdef OUTPUT_YUV_SVC_SRC // Write out at most 3 spatial layers. if (is_one_pass_cbr_svc(cpi) && svc->spatial_layer_id < 3) { vpx_write_yuv_frame(yuv_svc_src[svc->spatial_layer_id], cpi->Source); } #endif // Unfiltered raw source used in metrics calculation if the source // has been filtered. if (is_psnr_calc_enabled(cpi)) { #ifdef ENABLE_KF_DENOISE if (is_spatial_denoise_enabled(cpi)) { cpi->raw_source_frame = vp9_scale_if_required( cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source, (cpi->oxcf.pass == 0), EIGHTTAP, phase_scaler); } else { cpi->raw_source_frame = cpi->Source; } #else cpi->raw_source_frame = cpi->Source; #endif } if ((cpi->use_svc && (svc->spatial_layer_id < svc->number_spatial_layers - 1 || svc->temporal_layer_id < svc->number_temporal_layers - 1 || svc->current_superframe < 1)) || cpi->resize_pending || cpi->resize_state || cpi->external_resize || cpi->resize_state != ORIG) { cpi->compute_source_sad_onepass = 0; if (cpi->content_state_sb_fd != NULL) memset(cpi->content_state_sb_fd, 0, (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1) * sizeof(*cpi->content_state_sb_fd)); } // Avoid scaling last_source unless its needed. // Last source is needed if avg_source_sad() is used, or if // partition_search_type == SOURCE_VAR_BASED_PARTITION, or if noise // estimation is enabled. if (cpi->unscaled_last_source != NULL && (cpi->oxcf.content == VP9E_CONTENT_SCREEN || (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5) || cpi->sf.partition_search_type == SOURCE_VAR_BASED_PARTITION || (cpi->noise_estimate.enabled && !cpi->oxcf.noise_sensitivity) || cpi->compute_source_sad_onepass)) cpi->Last_Source = vp9_scale_if_required( cm, cpi->unscaled_last_source, &cpi->scaled_last_source, (cpi->oxcf.pass == 0), EIGHTTAP, 0); if (cpi->Last_Source == NULL || cpi->Last_Source->y_width != cpi->Source->y_width || cpi->Last_Source->y_height != cpi->Source->y_height) cpi->compute_source_sad_onepass = 0; if (frame_is_intra_only(cm) || cpi->resize_pending != 0) { memset(cpi->consec_zero_mv, 0, cm->mi_rows * cm->mi_cols * sizeof(*cpi->consec_zero_mv)); } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && cpi->use_svc) vp9_denoiser_reset_on_first_frame(cpi); #endif // Scene detection is always used for VBR mode or screen-content case. // For other cases (e.g., CBR mode) use it for 5 <= speed < 8 for now // (need to check encoding time cost for doing this for speed 8). cpi->rc.high_source_sad = 0; cpi->rc.hybrid_intra_scene_change = 0; cpi->rc.re_encode_maxq_scene_change = 0; if (cm->show_frame && cpi->oxcf.mode == REALTIME && (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.content == VP9E_CONTENT_SCREEN || (cpi->oxcf.speed >= 5 && cpi->oxcf.speed < 8))) vp9_scene_detection_onepass(cpi); if (svc->spatial_layer_id == svc->first_spatial_layer_to_encode) { svc->high_source_sad_superframe = cpi->rc.high_source_sad; svc->high_num_blocks_with_motion = cpi->rc.high_num_blocks_with_motion; // On scene change reset temporal layer pattern to TL0. // Note that if the base/lower spatial layers are skipped: instead of // inserting base layer here, we force max-q for the next superframe // with lower spatial layers: this is done in vp9_encodedframe_overshoot() // when max-q is decided for the current layer. // Only do this reset for bypass/flexible mode. if (svc->high_source_sad_superframe && svc->temporal_layer_id > 0 && svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { // rc->high_source_sad will get reset so copy it to restore it. int tmp_high_source_sad = cpi->rc.high_source_sad; vp9_svc_reset_temporal_layers(cpi, cm->frame_type == KEY_FRAME); cpi->rc.high_source_sad = tmp_high_source_sad; } } vp9_update_noise_estimate(cpi); // For 1 pass CBR, check if we are dropping this frame. // Never drop on key frame, if base layer is key for svc, // on scene change, or if superframe has layer sync. if ((cpi->rc.high_source_sad || svc->high_source_sad_superframe) && !(cpi->rc.use_post_encode_drop && svc->last_layer_dropped[0])) no_drop_scene_change = 1; if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && !frame_is_intra_only(cm) && !no_drop_scene_change && !svc->superframe_has_layer_sync && (!cpi->use_svc || !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { if (vp9_rc_drop_frame(cpi)) return 0; } // For 1 pass CBR SVC, only ZEROMV is allowed for spatial reference frame // when svc->force_zero_mode_spatial_ref = 1. Under those conditions we can // avoid this frame-level upsampling (for non intra_only frames). if (frame_is_intra_only(cm) == 0 && !(is_one_pass_cbr_svc(cpi) && svc->force_zero_mode_spatial_ref)) { vp9_scale_references(cpi); } set_size_independent_vars(cpi); set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); // search method and step parameter might be changed in speed settings. init_motion_estimation(cpi); if (cpi->sf.copy_partition_flag) alloc_copy_partition_data(cpi); if (cpi->sf.svc_use_lowres_part && svc->spatial_layer_id == svc->number_spatial_layers - 2) { if (svc->prev_partition_svc == NULL) { CHECK_MEM_ERROR( cm, svc->prev_partition_svc, (BLOCK_SIZE *)vpx_calloc(cm->mi_stride * cm->mi_rows, sizeof(*svc->prev_partition_svc))); } } // TODO(jianj): Look into issue of skin detection with high bitdepth. if (cm->bit_depth == 8 && cpi->oxcf.speed >= 5 && cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { cpi->use_skin_detection = 1; } // Enable post encode frame dropping for CBR on non key frame, when // ext_use_post_encode_drop is specified by user. cpi->rc.use_post_encode_drop = cpi->rc.ext_use_post_encode_drop && cpi->oxcf.rc_mode == VPX_CBR && cm->frame_type != KEY_FRAME; vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); setup_frame(cpi); suppress_active_map(cpi); if (cpi->use_svc) { // On non-zero spatial layer, check for disabling inter-layer // prediction. if (svc->spatial_layer_id > 0) vp9_svc_constrain_inter_layer_pred(cpi); vp9_svc_assert_constraints_pattern(cpi); } if (cpi->rc.last_post_encode_dropped_scene_change) { cpi->rc.high_source_sad = 1; svc->high_source_sad_superframe = 1; // For now disable use_source_sad since Last_Source will not be the previous // encoded but the dropped one. cpi->sf.use_source_sad = 0; cpi->rc.last_post_encode_dropped_scene_change = 0; } // Check if this high_source_sad (scene/slide change) frame should be // encoded at high/max QP, and if so, set the q and adjust some rate // control parameters. if (cpi->sf.overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ && (cpi->rc.high_source_sad || (cpi->use_svc && svc->high_source_sad_superframe))) { if (vp9_encodedframe_overshoot(cpi, -1, &q)) { vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); } } #if !CONFIG_REALTIME_ONLY // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (cpi->oxcf.aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == EQUATOR360_AQ) { vp9_360aq_frame_setup(cpi); } else if (cpi->oxcf.aq_mode == COMPLEXITY_AQ) { vp9_setup_in_frame_q_adj(cpi); } else if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) { // it may be pretty bad for rate-control, // and I should handle it somehow vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); } else { #endif if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { vp9_cyclic_refresh_setup(cpi); } else if (cpi->roi.enabled && !frame_is_intra_only(cm)) { apply_roi_map(cpi); } #if !CONFIG_REALTIME_ONLY } #endif apply_active_map(cpi); vp9_encode_frame(cpi); // Check if we should re-encode this frame at high Q because of high // overshoot based on the encoded frame size. Only for frames where // high temporal-source SAD is detected. // For SVC: all spatial layers are checked for re-encoding. if (cpi->sf.overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && (cpi->rc.high_source_sad || (cpi->use_svc && svc->high_source_sad_superframe))) { int frame_size = 0; // Get an estimate of the encoded frame size. save_coding_context(cpi); vp9_pack_bitstream(cpi, dest, size); restore_coding_context(cpi); frame_size = (int)(*size) << 3; // Check if encoded frame will overshoot too much, and if so, set the q and // adjust some rate control parameters, and return to re-encode the frame. if (vp9_encodedframe_overshoot(cpi, frame_size, &q)) { vpx_clear_system_state(); vp9_set_quantizer(cm, q); vp9_set_variance_partition_thresholds(cpi, q, 0); suppress_active_map(cpi); // Turn-off cyclic refresh for re-encoded frame. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; unsigned char *const seg_map = cpi->segmentation_map; memset(seg_map, 0, cm->mi_rows * cm->mi_cols); memset(cr->last_coded_q_map, MAXQ, cm->mi_rows * cm->mi_cols * sizeof(*cr->last_coded_q_map)); cr->sb_index = 0; vp9_disable_segmentation(&cm->seg); } apply_active_map(cpi); vp9_encode_frame(cpi); } } // Update some stats from cyclic refresh, and check for golden frame update. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && !frame_is_intra_only(cm)) vp9_cyclic_refresh_postencode(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. // update_base_skip_probs(cpi); vpx_clear_system_state(); return 1; } #if !CONFIG_REALTIME_ONLY #define MAX_QSTEP_ADJ 4 static int get_qstep_adj(int rate_excess, int rate_limit) { int qstep = rate_limit ? ((rate_excess + rate_limit / 2) / rate_limit) : INT_MAX; return VPXMIN(qstep, MAX_QSTEP_ADJ); } static void encode_with_recode_loop(VP9_COMP *cpi, size_t *size, uint8_t *dest) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int bottom_index, top_index; int loop_count = 0; int loop_at_this_size = 0; int loop = 0; int overshoot_seen = 0; int undershoot_seen = 0; int frame_over_shoot_limit; int frame_under_shoot_limit; int q = 0, q_low = 0, q_high = 0; int enable_acl; #ifdef AGGRESSIVE_VBR int qrange_adj = 1; #endif if (cm->show_existing_frame) { rc->this_frame_target = 0; if (is_psnr_calc_enabled(cpi)) set_raw_source_frame(cpi); return; } set_size_independent_vars(cpi); enable_acl = cpi->sf.allow_acl ? (cm->frame_type == KEY_FRAME) || (cpi->twopass.gf_group.index == 1) : 0; do { vpx_clear_system_state(); set_frame_size(cpi); if (loop_count == 0 || cpi->resize_pending != 0) { set_size_dependent_vars(cpi, &q, &bottom_index, &top_index); #ifdef AGGRESSIVE_VBR if (two_pass_first_group_inter(cpi)) { // Adjustment limits for min and max q qrange_adj = VPXMAX(1, (top_index - bottom_index) / 2); bottom_index = VPXMAX(bottom_index - qrange_adj / 2, oxcf->best_allowed_q); top_index = VPXMIN(oxcf->worst_allowed_q, top_index + qrange_adj / 2); } #endif // TODO(agrange) Scale cpi->max_mv_magnitude if frame-size has changed. set_mv_search_params(cpi); // Reset the loop state for new frame size. overshoot_seen = 0; undershoot_seen = 0; // Reconfiguration for change in frame size has concluded. cpi->resize_pending = 0; q_low = bottom_index; q_high = top_index; loop_at_this_size = 0; } // Decide frame size bounds first time through. if (loop_count == 0) { vp9_rc_compute_frame_size_bounds(cpi, rc->this_frame_target, &frame_under_shoot_limit, &frame_over_shoot_limit); } cpi->Source = vp9_scale_if_required(cm, cpi->un_scaled_source, &cpi->scaled_source, (oxcf->pass == 0), EIGHTTAP, 0); // Unfiltered raw source used in metrics calculation if the source // has been filtered. if (is_psnr_calc_enabled(cpi)) { #ifdef ENABLE_KF_DENOISE if (is_spatial_denoise_enabled(cpi)) { cpi->raw_source_frame = vp9_scale_if_required( cm, &cpi->raw_unscaled_source, &cpi->raw_scaled_source, (oxcf->pass == 0), EIGHTTAP, 0); } else { cpi->raw_source_frame = cpi->Source; } #else cpi->raw_source_frame = cpi->Source; #endif } if (cpi->unscaled_last_source != NULL) cpi->Last_Source = vp9_scale_if_required(cm, cpi->unscaled_last_source, &cpi->scaled_last_source, (oxcf->pass == 0), EIGHTTAP, 0); if (frame_is_intra_only(cm) == 0) { if (loop_count > 0) { release_scaled_references(cpi); } vp9_scale_references(cpi); } #if CONFIG_RATE_CTRL // TODO(angiebird): This is a hack for making sure the encoder use the // external_quantize_index exactly. Avoid this kind of hack later. if (cpi->encode_command.use_external_quantize_index) { q = cpi->encode_command.external_quantize_index; } #endif vp9_set_quantizer(cm, q); if (loop_count == 0) setup_frame(cpi); // Variance adaptive and in frame q adjustment experiments are mutually // exclusive. if (oxcf->aq_mode == VARIANCE_AQ) { vp9_vaq_frame_setup(cpi); } else if (oxcf->aq_mode == EQUATOR360_AQ) { vp9_360aq_frame_setup(cpi); } else if (oxcf->aq_mode == COMPLEXITY_AQ) { vp9_setup_in_frame_q_adj(cpi); } else if (oxcf->aq_mode == LOOKAHEAD_AQ) { vp9_alt_ref_aq_setup_map(cpi->alt_ref_aq, cpi); } else if (oxcf->aq_mode == PSNR_AQ) { vp9_psnr_aq_mode_setup(&cm->seg); } vp9_encode_frame(cpi); // Update the skip mb flag probabilities based on the distribution // seen in the last encoder iteration. // update_base_skip_probs(cpi); vpx_clear_system_state(); // Dummy pack of the bitstream using up to date stats to get an // accurate estimate of output frame size to determine if we need // to recode. if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { save_coding_context(cpi); if (!cpi->sf.use_nonrd_pick_mode) vp9_pack_bitstream(cpi, dest, size); rc->projected_frame_size = (int)(*size) << 3; if (frame_over_shoot_limit == 0) frame_over_shoot_limit = 1; } #if CONFIG_RATE_CTRL // This part needs to be after save_coding_context() because // restore_coding_context will be called in the end of this function. // TODO(angiebird): This is a hack for making sure the encoder use the // external_quantize_index exactly. Avoid this kind of hack later. if (cpi->encode_command.use_external_quantize_index) { break; } #endif if (oxcf->rc_mode == VPX_Q) { loop = 0; } else { if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced && (rc->projected_frame_size < rc->max_frame_bandwidth)) { int last_q = q; int64_t kf_err; int64_t high_err_target = cpi->ambient_err; int64_t low_err_target = cpi->ambient_err >> 1; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { kf_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } else { kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } #else kf_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); #endif // CONFIG_VP9_HIGHBITDEPTH // Prevent possible divide by zero error below for perfect KF kf_err += !kf_err; // The key frame is not good enough or we can afford // to make it better without undue risk of popping. if ((kf_err > high_err_target && rc->projected_frame_size <= frame_over_shoot_limit) || (kf_err > low_err_target && rc->projected_frame_size <= frame_under_shoot_limit)) { // Lower q_high q_high = q > q_low ? q - 1 : q_low; // Adjust Q q = (int)((q * high_err_target) / kf_err); q = VPXMIN(q, (q_high + q_low) >> 1); } else if (kf_err < low_err_target && rc->projected_frame_size >= frame_under_shoot_limit) { // The key frame is much better than the previous frame // Raise q_low q_low = q < q_high ? q + 1 : q_high; // Adjust Q q = (int)((q * low_err_target) / kf_err); q = VPXMIN(q, (q_high + q_low + 1) >> 1); } // Clamp Q to upper and lower limits: q = clamp(q, q_low, q_high); loop = q != last_q; } else if (recode_loop_test(cpi, frame_over_shoot_limit, frame_under_shoot_limit, q, VPXMAX(q_high, top_index), bottom_index)) { // Is the projected frame size out of range and are we allowed // to attempt to recode. int last_q = q; int retries = 0; int qstep; if (cpi->resize_pending == 1) { // Change in frame size so go back around the recode loop. cpi->rc.frame_size_selector = SCALE_STEP1 - cpi->rc.frame_size_selector; cpi->rc.next_frame_size_selector = cpi->rc.frame_size_selector; #if CONFIG_INTERNAL_STATS ++cpi->tot_recode_hits; #endif ++loop_count; loop = 1; continue; } // Frame size out of permitted range: // Update correction factor & compute new Q to try... // Frame is too large if (rc->projected_frame_size > rc->this_frame_target) { // Special case if the projected size is > the max allowed. if ((q == q_high) && ((rc->projected_frame_size >= rc->max_frame_bandwidth) || (!rc->is_src_frame_alt_ref && (rc->projected_frame_size >= big_rate_miss_high_threshold(cpi))))) { int max_rate = VPXMAX(1, VPXMIN(rc->max_frame_bandwidth, big_rate_miss_high_threshold(cpi))); double q_val_high; q_val_high = vp9_convert_qindex_to_q(q_high, cm->bit_depth); q_val_high = q_val_high * ((double)rc->projected_frame_size / max_rate); q_high = vp9_convert_q_to_qindex(q_val_high, cm->bit_depth); q_high = clamp(q_high, rc->best_quality, rc->worst_quality); } // Raise Qlow as to at least the current value qstep = get_qstep_adj(rc->projected_frame_size, rc->this_frame_target); q_low = VPXMIN(q + qstep, q_high); if (undershoot_seen || loop_at_this_size > 1) { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi); q = (q_high + q_low + 1) / 2; } else { // Update rate_correction_factor unless vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, VPXMAX(q_high, top_index)); while (q < q_low && retries < 10) { vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, bottom_index, VPXMAX(q_high, top_index)); retries++; } } overshoot_seen = 1; } else { // Frame is too small qstep = get_qstep_adj(rc->this_frame_target, rc->projected_frame_size); q_high = VPXMAX(q - qstep, q_low); if (overshoot_seen || loop_at_this_size > 1) { vp9_rc_update_rate_correction_factors(cpi); q = (q_high + q_low) / 2; } else { vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, VPXMIN(q_low, bottom_index), top_index); // Special case reset for qlow for constrained quality. // This should only trigger where there is very substantial // undershoot on a frame and the auto cq level is above // the user passsed in value. if (oxcf->rc_mode == VPX_CQ && q < q_low) { q_low = q; } while (q > q_high && retries < 10) { vp9_rc_update_rate_correction_factors(cpi); q = vp9_rc_regulate_q(cpi, rc->this_frame_target, VPXMIN(q_low, bottom_index), top_index); retries++; } } undershoot_seen = 1; } // Clamp Q to upper and lower limits: q = clamp(q, q_low, q_high); loop = (q != last_q); } else { loop = 0; } } // Special case for overlay frame. if (rc->is_src_frame_alt_ref && rc->projected_frame_size < rc->max_frame_bandwidth) loop = 0; if (loop) { ++loop_count; ++loop_at_this_size; #if CONFIG_INTERNAL_STATS ++cpi->tot_recode_hits; #endif } if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) if (loop) restore_coding_context(cpi); } while (loop); #ifdef AGGRESSIVE_VBR if (two_pass_first_group_inter(cpi)) { cpi->twopass.active_worst_quality = VPXMIN(q + qrange_adj, oxcf->worst_allowed_q); } else if (!frame_is_kf_gf_arf(cpi)) { #else if (!frame_is_kf_gf_arf(cpi)) { #endif // Have we been forced to adapt Q outside the expected range by an extreme // rate miss. If so adjust the active maxQ for the subsequent frames. if (!rc->is_src_frame_alt_ref && (q > cpi->twopass.active_worst_quality)) { cpi->twopass.active_worst_quality = q; } else if (oxcf->vbr_corpus_complexity && q == q_low && rc->projected_frame_size < rc->this_frame_target) { cpi->twopass.active_worst_quality = VPXMAX(q, cpi->twopass.active_worst_quality - 1); } } if (enable_acl) { // Skip recoding, if model diff is below threshold const int thresh = compute_context_model_thresh(cpi); const int diff = compute_context_model_diff(cm); if (diff >= thresh) { vp9_encode_frame(cpi); } } if (cpi->sf.recode_loop >= ALLOW_RECODE_KFARFGF) { vpx_clear_system_state(); restore_coding_context(cpi); } } #endif // !CONFIG_REALTIME_ONLY static int get_ref_frame_flags(const VP9_COMP *cpi) { const int *const map = cpi->common.ref_frame_map; const int gold_is_last = map[cpi->gld_fb_idx] == map[cpi->lst_fb_idx]; const int alt_is_last = map[cpi->alt_fb_idx] == map[cpi->lst_fb_idx]; const int gold_is_alt = map[cpi->gld_fb_idx] == map[cpi->alt_fb_idx]; int flags = VP9_ALT_FLAG | VP9_GOLD_FLAG | VP9_LAST_FLAG; if (gold_is_last) flags &= ~VP9_GOLD_FLAG; if (cpi->rc.frames_till_gf_update_due == INT_MAX && (cpi->svc.number_temporal_layers == 1 && cpi->svc.number_spatial_layers == 1)) flags &= ~VP9_GOLD_FLAG; if (alt_is_last) flags &= ~VP9_ALT_FLAG; if (gold_is_alt) flags &= ~VP9_ALT_FLAG; return flags; } static void set_ext_overrides(VP9_COMP *cpi) { // Overrides the defaults with the externally supplied values with // vp9_update_reference() and vp9_update_entropy() calls // Note: The overrides are valid only for the next frame passed // to encode_frame_to_data_rate() function if (cpi->ext_refresh_frame_context_pending) { cpi->common.refresh_frame_context = cpi->ext_refresh_frame_context; cpi->ext_refresh_frame_context_pending = 0; } if (cpi->ext_refresh_frame_flags_pending) { cpi->refresh_last_frame = cpi->ext_refresh_last_frame; cpi->refresh_golden_frame = cpi->ext_refresh_golden_frame; cpi->refresh_alt_ref_frame = cpi->ext_refresh_alt_ref_frame; } } YV12_BUFFER_CONFIG *vp9_svc_twostage_scale( VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type, int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2) { if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->bit_depth == VPX_BITS_8) { vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2, phase_scaler2); vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler); } else { scale_and_extend_frame(unscaled, scaled_temp, (int)cm->bit_depth, filter_type2, phase_scaler2); scale_and_extend_frame(scaled_temp, scaled, (int)cm->bit_depth, filter_type, phase_scaler); } #else vp9_scale_and_extend_frame(unscaled, scaled_temp, filter_type2, phase_scaler2); vp9_scale_and_extend_frame(scaled_temp, scaled, filter_type, phase_scaler); #endif // CONFIG_VP9_HIGHBITDEPTH return scaled; } else { return unscaled; } } YV12_BUFFER_CONFIG *vp9_scale_if_required( VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler) { if (cm->mi_cols * MI_SIZE != unscaled->y_width || cm->mi_rows * MI_SIZE != unscaled->y_height) { #if CONFIG_VP9_HIGHBITDEPTH if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) && unscaled->y_height <= (scaled->y_height << 1)) if (cm->bit_depth == VPX_BITS_8) vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler); else scale_and_extend_frame(unscaled, scaled, (int)cm->bit_depth, filter_type, phase_scaler); else scale_and_extend_frame_nonnormative(unscaled, scaled, (int)cm->bit_depth); #else if (use_normative_scaler && unscaled->y_width <= (scaled->y_width << 1) && unscaled->y_height <= (scaled->y_height << 1)) vp9_scale_and_extend_frame(unscaled, scaled, filter_type, phase_scaler); else scale_and_extend_frame_nonnormative(unscaled, scaled); #endif // CONFIG_VP9_HIGHBITDEPTH return scaled; } else { return unscaled; } } static void set_ref_sign_bias(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx); const int cur_frame_index = ref_buffer->frame_index; MV_REFERENCE_FRAME ref_frame; for (ref_frame = LAST_FRAME; ref_frame < MAX_REF_FRAMES; ++ref_frame) { const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); const RefCntBuffer *const ref_cnt_buf = get_ref_cnt_buffer(&cpi->common, buf_idx); if (ref_cnt_buf) { cm->ref_frame_sign_bias[ref_frame] = cur_frame_index < ref_cnt_buf->frame_index; } } } static int setup_interp_filter_search_mask(VP9_COMP *cpi) { INTERP_FILTER ifilter; int ref_total[MAX_REF_FRAMES] = { 0 }; MV_REFERENCE_FRAME ref; int mask = 0; if (cpi->common.last_frame_type == KEY_FRAME || cpi->refresh_alt_ref_frame) return mask; for (ref = LAST_FRAME; ref <= ALTREF_FRAME; ++ref) for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) ref_total[ref] += cpi->interp_filter_selected[ref][ifilter]; for (ifilter = EIGHTTAP; ifilter <= EIGHTTAP_SHARP; ++ifilter) { if ((ref_total[LAST_FRAME] && cpi->interp_filter_selected[LAST_FRAME][ifilter] == 0) && (ref_total[GOLDEN_FRAME] == 0 || cpi->interp_filter_selected[GOLDEN_FRAME][ifilter] * 50 < ref_total[GOLDEN_FRAME]) && (ref_total[ALTREF_FRAME] == 0 || cpi->interp_filter_selected[ALTREF_FRAME][ifilter] * 50 < ref_total[ALTREF_FRAME])) mask |= 1 << ifilter; } return mask; } #ifdef ENABLE_KF_DENOISE // Baseline Kernal weights for denoise static uint8_t dn_kernal_3[9] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 }; static uint8_t dn_kernal_5[25] = { 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 2, 4, 2, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1 }; static INLINE void add_denoise_point(int centre_val, int data_val, int thresh, uint8_t point_weight, int *sum_val, int *sum_weight) { if (abs(centre_val - data_val) <= thresh) { *sum_weight += point_weight; *sum_val += (int)data_val * (int)point_weight; } } static void spatial_denoise_point(uint8_t *src_ptr, const int stride, const int strength) { int sum_weight = 0; int sum_val = 0; int thresh = strength; int kernal_size = 5; int half_k_size = 2; int i, j; int max_diff = 0; uint8_t *tmp_ptr; uint8_t *kernal_ptr; // Find the maximum deviation from the source point in the locale. tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1); for (i = 0; i < kernal_size + 2; ++i) { for (j = 0; j < kernal_size + 2; ++j) { max_diff = VPXMAX(max_diff, abs((int)*src_ptr - (int)tmp_ptr[j])); } tmp_ptr += stride; } // Select the kernal size. if (max_diff > (strength + (strength >> 1))) { kernal_size = 3; half_k_size = 1; thresh = thresh >> 1; } kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5; // Apply the kernal tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size; for (i = 0; i < kernal_size; ++i) { for (j = 0; j < kernal_size; ++j) { add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr, &sum_val, &sum_weight); ++kernal_ptr; } tmp_ptr += stride; } // Update the source value with the new filtered value *src_ptr = (uint8_t)((sum_val + (sum_weight >> 1)) / sum_weight); } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_spatial_denoise_point(uint16_t *src_ptr, const int stride, const int strength) { int sum_weight = 0; int sum_val = 0; int thresh = strength; int kernal_size = 5; int half_k_size = 2; int i, j; int max_diff = 0; uint16_t *tmp_ptr; uint8_t *kernal_ptr; // Find the maximum deviation from the source point in the locale. tmp_ptr = src_ptr - (stride * (half_k_size + 1)) - (half_k_size + 1); for (i = 0; i < kernal_size + 2; ++i) { for (j = 0; j < kernal_size + 2; ++j) { max_diff = VPXMAX(max_diff, abs((int)src_ptr - (int)tmp_ptr[j])); } tmp_ptr += stride; } // Select the kernal size. if (max_diff > (strength + (strength >> 1))) { kernal_size = 3; half_k_size = 1; thresh = thresh >> 1; } kernal_ptr = (kernal_size == 3) ? dn_kernal_3 : dn_kernal_5; // Apply the kernal tmp_ptr = src_ptr - (stride * half_k_size) - half_k_size; for (i = 0; i < kernal_size; ++i) { for (j = 0; j < kernal_size; ++j) { add_denoise_point((int)*src_ptr, (int)tmp_ptr[j], thresh, *kernal_ptr, &sum_val, &sum_weight); ++kernal_ptr; } tmp_ptr += stride; } // Update the source value with the new filtered value *src_ptr = (uint16_t)((sum_val + (sum_weight >> 1)) / sum_weight); } #endif // CONFIG_VP9_HIGHBITDEPTH // Apply thresholded spatial noise supression to a given buffer. static void spatial_denoise_buffer(VP9_COMP *cpi, uint8_t *buffer, const int stride, const int width, const int height, const int strength) { VP9_COMMON *const cm = &cpi->common; uint8_t *src_ptr = buffer; int row; int col; for (row = 0; row < height; ++row) { for (col = 0; col < width; ++col) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) highbd_spatial_denoise_point(CONVERT_TO_SHORTPTR(&src_ptr[col]), stride, strength); else spatial_denoise_point(&src_ptr[col], stride, strength); #else spatial_denoise_point(&src_ptr[col], stride, strength); #endif // CONFIG_VP9_HIGHBITDEPTH } src_ptr += stride; } } // Apply thresholded spatial noise supression to source. static void spatial_denoise_frame(VP9_COMP *cpi) { YV12_BUFFER_CONFIG *src = cpi->Source; const VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; VP9_COMMON *const cm = &cpi->common; // Base the filter strength on the current active max Q. const int q = (int)(vp9_convert_qindex_to_q(twopass->active_worst_quality, cm->bit_depth)); int strength = VPXMAX(oxcf->arnr_strength >> 2, VPXMIN(oxcf->arnr_strength, (q >> 4))); // Denoise each of Y,U and V buffers. spatial_denoise_buffer(cpi, src->y_buffer, src->y_stride, src->y_width, src->y_height, strength); strength += (strength >> 1); spatial_denoise_buffer(cpi, src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, strength << 1); spatial_denoise_buffer(cpi, src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, strength << 1); } #endif // ENABLE_KF_DENOISE #if !CONFIG_REALTIME_ONLY static void vp9_try_disable_lookahead_aq(VP9_COMP *cpi, size_t *size, uint8_t *dest) { if (cpi->common.seg.enabled) if (ALT_REF_AQ_PROTECT_GAIN) { size_t nsize = *size; int overhead; // TODO(yuryg): optimize this, as // we don't really need to repack save_coding_context(cpi); vp9_disable_segmentation(&cpi->common.seg); vp9_pack_bitstream(cpi, dest, &nsize); restore_coding_context(cpi); overhead = (int)*size - (int)nsize; if (vp9_alt_ref_aq_disable_if(cpi->alt_ref_aq, overhead, (int)*size)) vp9_encode_frame(cpi); else vp9_enable_segmentation(&cpi->common.seg); } } #endif static void set_frame_index(VP9_COMP *cpi, VP9_COMMON *cm) { RefCntBuffer *const ref_buffer = get_ref_cnt_buffer(cm, cm->new_fb_idx); if (ref_buffer) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; ref_buffer->frame_index = cm->current_video_frame + gf_group->arf_src_offset[gf_group->index]; } } static void set_mb_ssim_rdmult_scaling(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; MACROBLOCK *x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; uint8_t *y_buffer = cpi->Source->y_buffer; const int y_stride = cpi->Source->y_stride; const int block_size = BLOCK_16X16; const int num_8x8_w = num_8x8_blocks_wide_lookup[block_size]; const int num_8x8_h = num_8x8_blocks_high_lookup[block_size]; const int num_cols = (cm->mi_cols + num_8x8_w - 1) / num_8x8_w; const int num_rows = (cm->mi_rows + num_8x8_h - 1) / num_8x8_h; double log_sum = 0.0; int row, col; // Loop through each 64x64 block. for (row = 0; row < num_rows; ++row) { for (col = 0; col < num_cols; ++col) { int mi_row, mi_col; double var = 0.0, num_of_var = 0.0; const int index = row * num_cols + col; for (mi_row = row * num_8x8_h; mi_row < cm->mi_rows && mi_row < (row + 1) * num_8x8_h; ++mi_row) { for (mi_col = col * num_8x8_w; mi_col < cm->mi_cols && mi_col < (col + 1) * num_8x8_w; ++mi_col) { struct buf_2d buf; const int row_offset_y = mi_row << 3; const int col_offset_y = mi_col << 3; buf.buf = y_buffer + row_offset_y * y_stride + col_offset_y; buf.stride = y_stride; // In order to make SSIM_VAR_SCALE in a same scale for both 8 bit // and high bit videos, the variance needs to be divided by 2.0 or // 64.0 separately. // TODO(sdeng): need to tune for 12bit videos. #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) var += vp9_high_get_sby_variance(cpi, &buf, BLOCK_8X8, xd->bd); else #endif var += vp9_get_sby_variance(cpi, &buf, BLOCK_8X8); num_of_var += 1.0; } } var = var / num_of_var / 64.0; // Curve fitting with an exponential model on all 16x16 blocks from the // Midres dataset. var = 67.035434 * (1 - exp(-0.0021489 * var)) + 17.492222; cpi->mi_ssim_rdmult_scaling_factors[index] = var; log_sum += log(var); } } log_sum = exp(log_sum / (double)(num_rows * num_cols)); for (row = 0; row < num_rows; ++row) { for (col = 0; col < num_cols; ++col) { const int index = row * num_cols + col; cpi->mi_ssim_rdmult_scaling_factors[index] /= log_sum; } } (void)xd; } // Process the wiener variance in 16x16 block basis. static int qsort_comp(const void *elem1, const void *elem2) { int a = *((const int *)elem1); int b = *((const int *)elem2); if (a > b) return 1; if (a < b) return -1; return 0; } static void init_mb_wiener_var_buffer(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; if (cpi->mb_wiener_variance && cpi->mb_wiener_var_rows >= cm->mb_rows && cpi->mb_wiener_var_cols >= cm->mb_cols) return; vpx_free(cpi->mb_wiener_variance); cpi->mb_wiener_variance = NULL; CHECK_MEM_ERROR( cm, cpi->mb_wiener_variance, vpx_calloc(cm->mb_rows * cm->mb_cols, sizeof(*cpi->mb_wiener_variance))); cpi->mb_wiener_var_rows = cm->mb_rows; cpi->mb_wiener_var_cols = cm->mb_cols; } static void set_mb_wiener_variance(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; uint8_t *buffer = cpi->Source->y_buffer; int buf_stride = cpi->Source->y_stride; #if CONFIG_VP9_HIGHBITDEPTH ThreadData *td = &cpi->td; MACROBLOCK *x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; DECLARE_ALIGNED(16, uint16_t, zero_pred16[32 * 32]); DECLARE_ALIGNED(16, uint8_t, zero_pred8[32 * 32]); uint8_t *zero_pred; #else DECLARE_ALIGNED(16, uint8_t, zero_pred[32 * 32]); #endif DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]); int mb_row, mb_col, count = 0; // Hard coded operating block size const int block_size = 16; const int coeff_count = block_size * block_size; const TX_SIZE tx_size = TX_16X16; #if CONFIG_VP9_HIGHBITDEPTH xd->cur_buf = cpi->Source; if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { zero_pred = CONVERT_TO_BYTEPTR(zero_pred16); memset(zero_pred16, 0, sizeof(*zero_pred16) * coeff_count); } else { zero_pred = zero_pred8; memset(zero_pred8, 0, sizeof(*zero_pred8) * coeff_count); } #else memset(zero_pred, 0, sizeof(*zero_pred) * coeff_count); #endif cpi->norm_wiener_variance = 0; for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { int idx; int16_t median_val = 0; uint8_t *mb_buffer = buffer + mb_row * block_size * buf_stride + mb_col * block_size; int64_t wiener_variance = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_subtract_block(block_size, block_size, src_diff, block_size, mb_buffer, buf_stride, zero_pred, block_size, xd->bd); highbd_wht_fwd_txfm(src_diff, block_size, coeff, tx_size); } else { vpx_subtract_block(block_size, block_size, src_diff, block_size, mb_buffer, buf_stride, zero_pred, block_size); wht_fwd_txfm(src_diff, block_size, coeff, tx_size); } #else vpx_subtract_block(block_size, block_size, src_diff, block_size, mb_buffer, buf_stride, zero_pred, block_size); wht_fwd_txfm(src_diff, block_size, coeff, tx_size); #endif // CONFIG_VP9_HIGHBITDEPTH coeff[0] = 0; for (idx = 1; idx < coeff_count; ++idx) coeff[idx] = abs(coeff[idx]); qsort(coeff, coeff_count - 1, sizeof(*coeff), qsort_comp); // Noise level estimation median_val = coeff[coeff_count / 2]; // Wiener filter for (idx = 1; idx < coeff_count; ++idx) { int64_t sqr_coeff = (int64_t)coeff[idx] * coeff[idx]; int64_t tmp_coeff = (int64_t)coeff[idx]; if (median_val) { tmp_coeff = (sqr_coeff * coeff[idx]) / (sqr_coeff + (int64_t)median_val * median_val); } wiener_variance += tmp_coeff * tmp_coeff; } cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col] = wiener_variance / coeff_count; cpi->norm_wiener_variance += cpi->mb_wiener_variance[mb_row * cm->mb_cols + mb_col]; ++count; } } if (count) cpi->norm_wiener_variance /= count; cpi->norm_wiener_variance = VPXMAX(1, cpi->norm_wiener_variance); } static void encode_frame_to_data_rate(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; struct segmentation *const seg = &cm->seg; TX_SIZE t; // SVC: skip encoding of enhancement layer if the layer target bandwidth = 0. // No need to set svc.skip_enhancement_layer if whole superframe will be // dropped. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0 && cpi->oxcf.target_bandwidth == 0 && !(cpi->svc.framedrop_mode != LAYER_DROP && (cpi->svc.framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP || cpi->svc .force_drop_constrained_from_above[cpi->svc.number_spatial_layers - 1]) && cpi->svc.drop_spatial_layer[0])) { cpi->svc.skip_enhancement_layer = 1; vp9_rc_postencode_update_drop_frame(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 1; cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id] = 1; vp9_inc_frame_in_layer(cpi); return; } set_ext_overrides(cpi); vpx_clear_system_state(); #ifdef ENABLE_KF_DENOISE // Spatial denoise of key frame. if (is_spatial_denoise_enabled(cpi)) spatial_denoise_frame(cpi); #endif if (cm->show_existing_frame == 0) { // Update frame index set_frame_index(cpi, cm); // Set the arf sign bias for this frame. set_ref_sign_bias(cpi); } // Set default state for segment based loop filter update flags. cm->lf.mode_ref_delta_update = 0; if (cpi->oxcf.pass == 2 && cpi->sf.adaptive_interp_filter_search) cpi->sf.interp_filter_search_mask = setup_interp_filter_search_mask(cpi); // Set various flags etc to special state if it is a key frame. if (frame_is_intra_only(cm)) { // Reset the loop filter deltas and segmentation map. vp9_reset_segment_features(&cm->seg); // If segmentation is enabled force a map update for key frames. if (seg->enabled) { seg->update_map = 1; seg->update_data = 1; } // The alternate reference frame cannot be active for a key frame. cpi->rc.source_alt_ref_active = 0; cm->error_resilient_mode = oxcf->error_resilient_mode; cm->frame_parallel_decoding_mode = oxcf->frame_parallel_decoding_mode; // By default, encoder assumes decoder can use prev_mi. if (cm->error_resilient_mode) { cm->frame_parallel_decoding_mode = 1; cm->reset_frame_context = 0; cm->refresh_frame_context = 0; } else if (cm->intra_only) { // Only reset the current context. cm->reset_frame_context = 2; } } if (oxcf->tuning == VP8_TUNE_SSIM) set_mb_ssim_rdmult_scaling(cpi); if (oxcf->aq_mode == PERCEPTUAL_AQ) { init_mb_wiener_var_buffer(cpi); set_mb_wiener_variance(cpi); } vpx_clear_system_state(); #if CONFIG_INTERNAL_STATS memset(cpi->mode_chosen_counts, 0, MAX_MODES * sizeof(*cpi->mode_chosen_counts)); #endif #if CONFIG_CONSISTENT_RECODE // Backup to ensure consistency between recodes save_encode_params(cpi); #endif if (cpi->sf.recode_loop == DISALLOW_RECODE) { if (!encode_without_recode_loop(cpi, size, dest)) return; } else { #if !CONFIG_REALTIME_ONLY encode_with_recode_loop(cpi, size, dest); #endif } // TODO(jingning): When using show existing frame mode, we assume that the // current ARF will be directly used as the final reconstructed frame. This is // an encoder control scheme. One could in principle explore other // possibilities to arrange the reference frame buffer and their coding order. if (cm->show_existing_frame) { ref_cnt_fb(cm->buffer_pool->frame_bufs, &cm->new_fb_idx, cm->ref_frame_map[cpi->alt_fb_idx]); } #if !CONFIG_REALTIME_ONLY // Disable segmentation if it decrease rate/distortion ratio if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_try_disable_lookahead_aq(cpi, size, dest); #endif #if CONFIG_VP9_TEMPORAL_DENOISING #ifdef OUTPUT_YUV_DENOISED if (oxcf->noise_sensitivity > 0 && denoise_svc(cpi)) { vpx_write_yuv_frame(yuv_denoised_file, &cpi->denoiser.running_avg_y[INTRA_FRAME]); } #endif #endif #ifdef OUTPUT_YUV_SKINMAP if (cpi->common.current_video_frame > 1) { vp9_output_skin_map(cpi, yuv_skinmap_file); } #endif // Special case code to reduce pulsing when key frames are forced at a // fixed interval. Note the reconstruction error if it is the frame before // the force key frame if (cpi->rc.next_key_frame_forced && cpi->rc.frames_to_key == 1) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { cpi->ambient_err = vpx_highbd_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } else { cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); } #else cpi->ambient_err = vpx_get_y_sse(cpi->Source, get_frame_new_buffer(cm)); #endif // CONFIG_VP9_HIGHBITDEPTH } // If the encoder forced a KEY_FRAME decision if (cm->frame_type == KEY_FRAME) cpi->refresh_last_frame = 1; cm->frame_to_show = get_frame_new_buffer(cm); cm->frame_to_show->color_space = cm->color_space; cm->frame_to_show->color_range = cm->color_range; cm->frame_to_show->render_width = cm->render_width; cm->frame_to_show->render_height = cm->render_height; // Pick the loop filter level for the frame. loopfilter_frame(cpi, cm); if (cpi->rc.use_post_encode_drop) save_coding_context(cpi); // build the bitstream vp9_pack_bitstream(cpi, dest, size); if (cpi->rc.use_post_encode_drop && cm->base_qindex < cpi->rc.worst_quality && cpi->svc.spatial_layer_id == 0 && post_encode_drop_cbr(cpi, size)) { restore_coding_context(cpi); return; } cpi->last_frame_dropped = 0; cpi->svc.last_layer_dropped[cpi->svc.spatial_layer_id] = 0; if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) cpi->svc.num_encoded_top_layer++; // Keep track of the frame buffer index updated/refreshed for the // current encoded TL0 superframe. if (cpi->svc.temporal_layer_id == 0) { if (cpi->refresh_last_frame) cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->lst_fb_idx; else if (cpi->refresh_golden_frame) cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->gld_fb_idx; else if (cpi->refresh_alt_ref_frame) cpi->svc.fb_idx_upd_tl0[cpi->svc.spatial_layer_id] = cpi->alt_fb_idx; } if (cm->seg.update_map) update_reference_segmentation_map(cpi); if (frame_is_intra_only(cm) == 0) { release_scaled_references(cpi); } vp9_update_reference_frames(cpi); if (!cm->show_existing_frame) { for (t = TX_4X4; t <= TX_32X32; ++t) { full_to_model_counts(cpi->td.counts->coef[t], cpi->td.rd_counts.coef_counts[t]); } if (!cm->error_resilient_mode && !cm->frame_parallel_decoding_mode) { if (!frame_is_intra_only(cm)) { vp9_adapt_mode_probs(cm); vp9_adapt_mv_probs(cm, cm->allow_high_precision_mv); } vp9_adapt_coef_probs(cm); } } cpi->ext_refresh_frame_flags_pending = 0; if (cpi->refresh_golden_frame == 1) cpi->frame_flags |= FRAMEFLAGS_GOLDEN; else cpi->frame_flags &= ~FRAMEFLAGS_GOLDEN; if (cpi->refresh_alt_ref_frame == 1) cpi->frame_flags |= FRAMEFLAGS_ALTREF; else cpi->frame_flags &= ~FRAMEFLAGS_ALTREF; cpi->ref_frame_flags = get_ref_frame_flags(cpi); cm->last_frame_type = cm->frame_type; vp9_rc_postencode_update(cpi, *size); *size = VPXMAX(1, *size); #if 0 output_frame_level_debug_stats(cpi); #endif if (cm->frame_type == KEY_FRAME) { // Tell the caller that the frame was coded as a key frame *frame_flags = cpi->frame_flags | FRAMEFLAGS_KEY; } else { *frame_flags = cpi->frame_flags & ~FRAMEFLAGS_KEY; } // Clear the one shot update flags for segmentation map and mode/ref loop // filter deltas. cm->seg.update_map = 0; cm->seg.update_data = 0; cm->lf.mode_ref_delta_update = 0; // keep track of the last coded dimensions cm->last_width = cm->width; cm->last_height = cm->height; // reset to normal state now that we are done. if (!cm->show_existing_frame) { cm->last_show_frame = cm->show_frame; cm->prev_frame = cm->cur_frame; } if (cm->show_frame) { vp9_swap_mi_and_prev_mi(cm); // Don't increment frame counters if this was an altref buffer // update not a real frame ++cm->current_video_frame; if (cpi->use_svc) vp9_inc_frame_in_layer(cpi); } if (cpi->use_svc) { cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id] .last_frame_type = cm->frame_type; // Reset layer_sync back to 0 for next frame. cpi->svc.spatial_layer_sync[cpi->svc.spatial_layer_id] = 0; } cpi->force_update_segmentation = 0; #if !CONFIG_REALTIME_ONLY if (cpi->oxcf.aq_mode == LOOKAHEAD_AQ) vp9_alt_ref_aq_unset_all(cpi->alt_ref_aq, cpi); #endif cpi->svc.previous_frame_is_intra_only = cm->intra_only; cpi->svc.set_intra_only_frame = 0; } static void SvcEncode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { vp9_rc_get_svc_params(cpi); encode_frame_to_data_rate(cpi, size, dest, frame_flags); } static void Pass0Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { if (cpi->oxcf.rc_mode == VPX_CBR) { vp9_rc_get_one_pass_cbr_params(cpi); } else { vp9_rc_get_one_pass_vbr_params(cpi); } encode_frame_to_data_rate(cpi, size, dest, frame_flags); } #if !CONFIG_REALTIME_ONLY static void Pass2Encode(VP9_COMP *cpi, size_t *size, uint8_t *dest, unsigned int *frame_flags) { cpi->allow_encode_breakout = ENCODE_BREAKOUT_ENABLED; #if CONFIG_MISMATCH_DEBUG mismatch_move_frame_idx_w(); #endif encode_frame_to_data_rate(cpi, size, dest, frame_flags); } #endif // !CONFIG_REALTIME_ONLY int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time) { VP9_COMMON *const cm = &cpi->common; struct vpx_usec_timer timer; int res = 0; const int subsampling_x = sd->subsampling_x; const int subsampling_y = sd->subsampling_y; #if CONFIG_VP9_HIGHBITDEPTH const int use_highbitdepth = (sd->flags & YV12_FLAG_HIGHBITDEPTH) != 0; #else const int use_highbitdepth = 0; #endif update_initial_width(cpi, use_highbitdepth, subsampling_x, subsampling_y); #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif alloc_raw_frame_buffers(cpi); vpx_usec_timer_start(&timer); if (vp9_lookahead_push(cpi->lookahead, sd, time_stamp, end_time, use_highbitdepth, frame_flags)) res = -1; vpx_usec_timer_mark(&timer); cpi->time_receive_data += vpx_usec_timer_elapsed(&timer); if ((cm->profile == PROFILE_0 || cm->profile == PROFILE_2) && (subsampling_x != 1 || subsampling_y != 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "Non-4:2:0 color format requires profile 1 or 3"); res = -1; } if ((cm->profile == PROFILE_1 || cm->profile == PROFILE_3) && (subsampling_x == 1 && subsampling_y == 1)) { vpx_internal_error(&cm->error, VPX_CODEC_INVALID_PARAM, "4:2:0 color format requires profile 0 or 2"); res = -1; } return res; } static int frame_is_reference(const VP9_COMP *cpi) { const VP9_COMMON *cm = &cpi->common; return cm->frame_type == KEY_FRAME || cpi->refresh_last_frame || cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame || cm->refresh_frame_context || cm->lf.mode_ref_delta_update || cm->seg.update_map || cm->seg.update_data; } static void adjust_frame_rate(VP9_COMP *cpi, const struct lookahead_entry *source) { int64_t this_duration; int step = 0; if (source->ts_start == cpi->first_time_stamp_ever) { this_duration = source->ts_end - source->ts_start; step = 1; } else { int64_t last_duration = cpi->last_end_time_stamp_seen - cpi->last_time_stamp_seen; this_duration = source->ts_end - cpi->last_end_time_stamp_seen; // do a step update if the duration changes by 10% if (last_duration) step = (int)((this_duration - last_duration) * 10 / last_duration); } if (this_duration) { if (step) { vp9_new_framerate(cpi, 10000000.0 / this_duration); } else { // Average this frame's rate into the last second's average // frame rate. If we haven't seen 1 second yet, then average // over the whole interval seen. const double interval = VPXMIN( (double)(source->ts_end - cpi->first_time_stamp_ever), 10000000.0); double avg_duration = 10000000.0 / cpi->framerate; avg_duration *= (interval - avg_duration + this_duration); avg_duration /= interval; vp9_new_framerate(cpi, 10000000.0 / avg_duration); } } cpi->last_time_stamp_seen = source->ts_start; cpi->last_end_time_stamp_seen = source->ts_end; } // Returns 0 if this is not an alt ref else the offset of the source frame // used as the arf midpoint. static int get_arf_src_index(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int arf_src_index = 0; if (is_altref_enabled(cpi)) { if (cpi->oxcf.pass == 2) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { arf_src_index = gf_group->arf_src_offset[gf_group->index]; } } else if (rc->source_alt_ref_pending) { arf_src_index = rc->frames_till_gf_update_due; } } return arf_src_index; } static void check_src_altref(VP9_COMP *cpi, const struct lookahead_entry *source) { RATE_CONTROL *const rc = &cpi->rc; if (cpi->oxcf.pass == 2) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; rc->is_src_frame_alt_ref = (gf_group->update_type[gf_group->index] == OVERLAY_UPDATE); } else { rc->is_src_frame_alt_ref = cpi->alt_ref_source && (source == cpi->alt_ref_source); } if (rc->is_src_frame_alt_ref) { // Current frame is an ARF overlay frame. cpi->alt_ref_source = NULL; // Don't refresh the last buffer for an ARF overlay frame. It will // become the GF so preserve last as an alternative prediction option. cpi->refresh_last_frame = 0; } } #if CONFIG_INTERNAL_STATS static void adjust_image_stat(double y, double u, double v, double all, ImageStat *s) { s->stat[Y] += y; s->stat[U] += u; s->stat[V] += v; s->stat[ALL] += all; s->worst = VPXMIN(s->worst, all); } #endif // CONFIG_INTERNAL_STATS // Adjust the maximum allowable frame size for the target level. static void level_rc_framerate(VP9_COMP *cpi, int arf_src_index) { RATE_CONTROL *const rc = &cpi->rc; LevelConstraint *const ls = &cpi->level_constraint; VP9_COMMON *const cm = &cpi->common; const double max_cpb_size = ls->max_cpb_size; vpx_clear_system_state(); rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, ls->max_frame_size); if (frame_is_intra_only(cm)) { rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.5)); } else if (arf_src_index > 0) { rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.4)); } else { rc->max_frame_bandwidth = VPXMIN(rc->max_frame_bandwidth, (int)(max_cpb_size * 0.2)); } } static void update_level_info(VP9_COMP *cpi, size_t *size, int arf_src_index) { VP9_COMMON *const cm = &cpi->common; Vp9LevelInfo *const level_info = &cpi->level_info; Vp9LevelSpec *const level_spec = &level_info->level_spec; Vp9LevelStats *const level_stats = &level_info->level_stats; int i, idx; uint64_t luma_samples, dur_end; const uint32_t luma_pic_size = cm->width * cm->height; const uint32_t luma_pic_breadth = VPXMAX(cm->width, cm->height); LevelConstraint *const level_constraint = &cpi->level_constraint; const int8_t level_index = level_constraint->level_index; double cpb_data_size; vpx_clear_system_state(); // update level_stats level_stats->total_compressed_size += *size; if (cm->show_frame) { level_stats->total_uncompressed_size += luma_pic_size + 2 * (luma_pic_size >> (cm->subsampling_x + cm->subsampling_y)); level_stats->time_encoded = (cpi->last_end_time_stamp_seen - cpi->first_time_stamp_ever) / (double)TICKS_PER_SEC; } if (arf_src_index > 0) { if (!level_stats->seen_first_altref) { level_stats->seen_first_altref = 1; } else if (level_stats->frames_since_last_altref < level_spec->min_altref_distance) { level_spec->min_altref_distance = level_stats->frames_since_last_altref; } level_stats->frames_since_last_altref = 0; } else { ++level_stats->frames_since_last_altref; } if (level_stats->frame_window_buffer.len < FRAME_WINDOW_SIZE - 1) { idx = (level_stats->frame_window_buffer.start + level_stats->frame_window_buffer.len++) % FRAME_WINDOW_SIZE; } else { idx = level_stats->frame_window_buffer.start; level_stats->frame_window_buffer.start = (idx + 1) % FRAME_WINDOW_SIZE; } level_stats->frame_window_buffer.buf[idx].ts = cpi->last_time_stamp_seen; level_stats->frame_window_buffer.buf[idx].size = (uint32_t)(*size); level_stats->frame_window_buffer.buf[idx].luma_samples = luma_pic_size; if (cm->frame_type == KEY_FRAME) { level_stats->ref_refresh_map = 0; } else { int count = 0; level_stats->ref_refresh_map |= vp9_get_refresh_mask(cpi); // Also need to consider the case where the encoder refers to a buffer // that has been implicitly refreshed after encoding a keyframe. if (!cm->intra_only) { level_stats->ref_refresh_map |= (1 << cpi->lst_fb_idx); level_stats->ref_refresh_map |= (1 << cpi->gld_fb_idx); level_stats->ref_refresh_map |= (1 << cpi->alt_fb_idx); } for (i = 0; i < REF_FRAMES; ++i) { count += (level_stats->ref_refresh_map >> i) & 1; } if (count > level_spec->max_ref_frame_buffers) { level_spec->max_ref_frame_buffers = count; } } // update average_bitrate level_spec->average_bitrate = (double)level_stats->total_compressed_size / 125.0 / level_stats->time_encoded; // update max_luma_sample_rate luma_samples = 0; for (i = 0; i < level_stats->frame_window_buffer.len; ++i) { idx = (level_stats->frame_window_buffer.start + level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE; if (i == 0) { dur_end = level_stats->frame_window_buffer.buf[idx].ts; } if (dur_end - level_stats->frame_window_buffer.buf[idx].ts >= TICKS_PER_SEC) { break; } luma_samples += level_stats->frame_window_buffer.buf[idx].luma_samples; } if (luma_samples > level_spec->max_luma_sample_rate) { level_spec->max_luma_sample_rate = luma_samples; } // update max_cpb_size cpb_data_size = 0; for (i = 0; i < CPB_WINDOW_SIZE; ++i) { if (i >= level_stats->frame_window_buffer.len) break; idx = (level_stats->frame_window_buffer.start + level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE; cpb_data_size += level_stats->frame_window_buffer.buf[idx].size; } cpb_data_size = cpb_data_size / 125.0; if (cpb_data_size > level_spec->max_cpb_size) { level_spec->max_cpb_size = cpb_data_size; } // update max_luma_picture_size if (luma_pic_size > level_spec->max_luma_picture_size) { level_spec->max_luma_picture_size = luma_pic_size; } // update max_luma_picture_breadth if (luma_pic_breadth > level_spec->max_luma_picture_breadth) { level_spec->max_luma_picture_breadth = luma_pic_breadth; } // update compression_ratio level_spec->compression_ratio = (double)level_stats->total_uncompressed_size * cm->bit_depth / level_stats->total_compressed_size / 8.0; // update max_col_tiles if (level_spec->max_col_tiles < (1 << cm->log2_tile_cols)) { level_spec->max_col_tiles = (1 << cm->log2_tile_cols); } if (level_index >= 0 && level_constraint->fail_flag == 0) { if (level_spec->max_luma_picture_size > vp9_level_defs[level_index].max_luma_picture_size) { level_constraint->fail_flag |= (1 << LUMA_PIC_SIZE_TOO_LARGE); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[LUMA_PIC_SIZE_TOO_LARGE]); } if (level_spec->max_luma_picture_breadth > vp9_level_defs[level_index].max_luma_picture_breadth) { level_constraint->fail_flag |= (1 << LUMA_PIC_BREADTH_TOO_LARGE); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[LUMA_PIC_BREADTH_TOO_LARGE]); } if ((double)level_spec->max_luma_sample_rate > (double)vp9_level_defs[level_index].max_luma_sample_rate * (1 + SAMPLE_RATE_GRACE_P)) { level_constraint->fail_flag |= (1 << LUMA_SAMPLE_RATE_TOO_LARGE); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[LUMA_SAMPLE_RATE_TOO_LARGE]); } if (level_spec->max_col_tiles > vp9_level_defs[level_index].max_col_tiles) { level_constraint->fail_flag |= (1 << TOO_MANY_COLUMN_TILE); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[TOO_MANY_COLUMN_TILE]); } if (level_spec->min_altref_distance < vp9_level_defs[level_index].min_altref_distance) { level_constraint->fail_flag |= (1 << ALTREF_DIST_TOO_SMALL); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[ALTREF_DIST_TOO_SMALL]); } if (level_spec->max_ref_frame_buffers > vp9_level_defs[level_index].max_ref_frame_buffers) { level_constraint->fail_flag |= (1 << TOO_MANY_REF_BUFFER); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[TOO_MANY_REF_BUFFER]); } if (level_spec->max_cpb_size > vp9_level_defs[level_index].max_cpb_size) { level_constraint->fail_flag |= (1 << CPB_TOO_LARGE); vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Failed to encode to the target level %d. %s", vp9_level_defs[level_index].level, level_fail_messages[CPB_TOO_LARGE]); } // Set an upper bound for the next frame size. It will be used in // level_rc_framerate() before encoding the next frame. cpb_data_size = 0; for (i = 0; i < CPB_WINDOW_SIZE - 1; ++i) { if (i >= level_stats->frame_window_buffer.len) break; idx = (level_stats->frame_window_buffer.start + level_stats->frame_window_buffer.len - 1 - i) % FRAME_WINDOW_SIZE; cpb_data_size += level_stats->frame_window_buffer.buf[idx].size; } cpb_data_size = cpb_data_size / 125.0; level_constraint->max_frame_size = (int)((vp9_level_defs[level_index].max_cpb_size - cpb_data_size) * 1000.0); if (level_stats->frame_window_buffer.len < CPB_WINDOW_SIZE - 1) level_constraint->max_frame_size >>= 1; } } typedef struct GF_PICTURE { YV12_BUFFER_CONFIG *frame; int ref_frame[3]; FRAME_UPDATE_TYPE update_type; } GF_PICTURE; static void init_gop_frames(VP9_COMP *cpi, GF_PICTURE *gf_picture, const GF_GROUP *gf_group, int *tpl_group_frames) { VP9_COMMON *cm = &cpi->common; int frame_idx = 0; int i; int gld_index = -1; int alt_index = -1; int lst_index = -1; int arf_index_stack[MAX_ARF_LAYERS]; int arf_stack_size = 0; int extend_frame_count = 0; int pframe_qindex = cpi->tpl_stats[2].base_qindex; int frame_gop_offset = 0; RefCntBuffer *frame_bufs = cm->buffer_pool->frame_bufs; int8_t recon_frame_index[REFS_PER_FRAME + MAX_ARF_LAYERS]; memset(recon_frame_index, -1, sizeof(recon_frame_index)); stack_init(arf_index_stack, MAX_ARF_LAYERS); // TODO(jingning): To be used later for gf frame type parsing. (void)gf_group; for (i = 0; i < FRAME_BUFFERS; ++i) { if (frame_bufs[i].ref_count == 0) { alloc_frame_mvs(cm, i); if (vpx_realloc_frame_buffer(&frame_bufs[i].buf, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate frame buffer"); recon_frame_index[frame_idx] = i; ++frame_idx; if (frame_idx >= REFS_PER_FRAME + cpi->oxcf.enable_auto_arf) break; } } for (i = 0; i < REFS_PER_FRAME + 1; ++i) { assert(recon_frame_index[i] >= 0); cpi->tpl_recon_frames[i] = &frame_bufs[recon_frame_index[i]].buf; } *tpl_group_frames = 0; // Initialize Golden reference frame. gf_picture[0].frame = get_ref_frame_buffer(cpi, GOLDEN_FRAME); for (i = 0; i < 3; ++i) gf_picture[0].ref_frame[i] = -1; gf_picture[0].update_type = gf_group->update_type[0]; gld_index = 0; ++*tpl_group_frames; // Initialize base layer ARF frame gf_picture[1].frame = cpi->Source; gf_picture[1].ref_frame[0] = gld_index; gf_picture[1].ref_frame[1] = lst_index; gf_picture[1].ref_frame[2] = alt_index; gf_picture[1].update_type = gf_group->update_type[1]; alt_index = 1; ++*tpl_group_frames; // Initialize P frames for (frame_idx = 2; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { struct lookahead_entry *buf; frame_gop_offset = gf_group->frame_gop_index[frame_idx]; buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); if (buf == NULL) break; gf_picture[frame_idx].frame = &buf->img; gf_picture[frame_idx].ref_frame[0] = gld_index; gf_picture[frame_idx].ref_frame[1] = lst_index; gf_picture[frame_idx].ref_frame[2] = alt_index; gf_picture[frame_idx].update_type = gf_group->update_type[frame_idx]; switch (gf_group->update_type[frame_idx]) { case ARF_UPDATE: stack_push(arf_index_stack, alt_index, arf_stack_size); ++arf_stack_size; alt_index = frame_idx; break; case LF_UPDATE: lst_index = frame_idx; break; case OVERLAY_UPDATE: gld_index = frame_idx; alt_index = stack_pop(arf_index_stack, arf_stack_size); --arf_stack_size; break; case USE_BUF_FRAME: lst_index = alt_index; alt_index = stack_pop(arf_index_stack, arf_stack_size); --arf_stack_size; break; default: break; } ++*tpl_group_frames; // The length of group of pictures is baseline_gf_interval, plus the // beginning golden frame from last GOP, plus the last overlay frame in // the same GOP. if (frame_idx == gf_group->gf_group_size) break; } alt_index = -1; ++frame_idx; ++frame_gop_offset; // Extend two frames outside the current gf group. for (; frame_idx < MAX_LAG_BUFFERS && extend_frame_count < 2; ++frame_idx) { struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, frame_gop_offset - 1); if (buf == NULL) break; cpi->tpl_stats[frame_idx].base_qindex = pframe_qindex; gf_picture[frame_idx].frame = &buf->img; gf_picture[frame_idx].ref_frame[0] = gld_index; gf_picture[frame_idx].ref_frame[1] = lst_index; gf_picture[frame_idx].ref_frame[2] = alt_index; gf_picture[frame_idx].update_type = LF_UPDATE; lst_index = frame_idx; ++*tpl_group_frames; ++extend_frame_count; ++frame_gop_offset; } } static void init_tpl_stats(VP9_COMP *cpi) { int frame_idx; for (frame_idx = 0; frame_idx < MAX_ARF_GOP_SIZE; ++frame_idx) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; memset(tpl_frame->tpl_stats_ptr, 0, tpl_frame->height * tpl_frame->width * sizeof(*tpl_frame->tpl_stats_ptr)); tpl_frame->is_valid = 0; } } #if CONFIG_NON_GREEDY_MV static uint32_t full_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, MotionField *motion_field, int frame_idx, uint8_t *cur_frame_buf, uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, int mi_row, int mi_col, MV *mv) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; int step_param; uint32_t bestsme = UINT_MAX; const MvLimits tmp_mv_limits = x->mv_limits; // lambda is used to adjust the importance of motion vector consitency. // TODO(angiebird): Figure out lambda's proper value. const int lambda = cpi->tpl_stats[frame_idx].lambda; int_mv nb_full_mvs[NB_MVS_NUM]; int nb_full_mv_num; MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ best_ref_mv1_full.col = best_ref_mv1.col >> 3; best_ref_mv1_full.row = best_ref_mv1.row >> 3; // Setup frame pointers x->plane[0].src.buf = cur_frame_buf; x->plane[0].src.stride = stride; xd->plane[0].pre[0].buf = ref_frame_buf; xd->plane[0].pre[0].stride = stride; step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); nb_full_mv_num = vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); vp9_full_pixel_diamond_new(cpi, x, bsize, &best_ref_mv1_full, step_param, lambda, 1, nb_full_mvs, nb_full_mv_num, mv); /* restore UMV window */ x->mv_limits = tmp_mv_limits; return bestsme; } static uint32_t sub_pixel_motion_search(VP9_COMP *cpi, ThreadData *td, uint8_t *cur_frame_buf, uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, MV *mv) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; uint32_t bestsme = UINT_MAX; uint32_t distortion; uint32_t sse; int cost_list[5]; MV best_ref_mv1 = { 0, 0 }; // Setup frame pointers x->plane[0].src.buf = cur_frame_buf; x->plane[0].src.stride = stride; xd->plane[0].pre[0].buf = ref_frame_buf; xd->plane[0].pre[0].stride = stride; // TODO(yunqing): may use higher tap interp filter than 2 taps. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, USE_2_TAPS); return bestsme; } #else // CONFIG_NON_GREEDY_MV static uint32_t motion_compensated_prediction(VP9_COMP *cpi, ThreadData *td, uint8_t *cur_frame_buf, uint8_t *ref_frame_buf, int stride, BLOCK_SIZE bsize, MV *mv) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const SEARCH_METHODS search_method = NSTEP; int step_param; int sadpb = x->sadperbit16; uint32_t bestsme = UINT_MAX; uint32_t distortion; uint32_t sse; int cost_list[5]; const MvLimits tmp_mv_limits = x->mv_limits; MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ best_ref_mv1_full.col = best_ref_mv1.col >> 3; best_ref_mv1_full.row = best_ref_mv1.row >> 3; // Setup frame pointers x->plane[0].src.buf = cur_frame_buf; x->plane[0].src.stride = stride; xd->plane[0].pre[0].buf = ref_frame_buf; xd->plane[0].pre[0].stride = stride; step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); vp9_full_pixel_search(cpi, x, bsize, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, mv, 0, 0); /* restore UMV window */ x->mv_limits = tmp_mv_limits; // TODO(yunqing): may use higher tap interp filter than 2 taps. // Ignore mv costing by sending NULL pointer instead of cost array bestsme = cpi->find_fractional_mv_step( x, mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, USE_2_TAPS); return bestsme; } #endif static int get_overlap_area(int grid_pos_row, int grid_pos_col, int ref_pos_row, int ref_pos_col, int block, BLOCK_SIZE bsize) { int width = 0, height = 0; int bw = 4 << b_width_log2_lookup[bsize]; int bh = 4 << b_height_log2_lookup[bsize]; switch (block) { case 0: width = grid_pos_col + bw - ref_pos_col; height = grid_pos_row + bh - ref_pos_row; break; case 1: width = ref_pos_col + bw - grid_pos_col; height = grid_pos_row + bh - ref_pos_row; break; case 2: width = grid_pos_col + bw - ref_pos_col; height = ref_pos_row + bh - grid_pos_row; break; case 3: width = ref_pos_col + bw - grid_pos_col; height = ref_pos_row + bh - grid_pos_row; break; default: assert(0); } return width * height; } static int round_floor(int ref_pos, int bsize_pix) { int round; if (ref_pos < 0) round = -(1 + (-ref_pos - 1) / bsize_pix); else round = ref_pos / bsize_pix; return round; } static void tpl_model_store(TplDepStats *tpl_stats, int mi_row, int mi_col, BLOCK_SIZE bsize, int stride) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const TplDepStats *src_stats = &tpl_stats[mi_row * stride + mi_col]; int idx, idy; for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * stride + mi_col + idx]; const int64_t mc_flow = tpl_ptr->mc_flow; const int64_t mc_ref_cost = tpl_ptr->mc_ref_cost; *tpl_ptr = *src_stats; tpl_ptr->mc_flow = mc_flow; tpl_ptr->mc_ref_cost = mc_ref_cost; tpl_ptr->mc_dep_cost = tpl_ptr->intra_cost + tpl_ptr->mc_flow; } } } static void tpl_model_update_b(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, int mi_row, int mi_col, const BLOCK_SIZE bsize) { TplDepFrame *ref_tpl_frame = &tpl_frame[tpl_stats->ref_frame_index]; TplDepStats *ref_stats = ref_tpl_frame->tpl_stats_ptr; MV mv = tpl_stats->mv.as_mv; int mv_row = mv.row >> 3; int mv_col = mv.col >> 3; int ref_pos_row = mi_row * MI_SIZE + mv_row; int ref_pos_col = mi_col * MI_SIZE + mv_col; const int bw = 4 << b_width_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int pix_num = bw * bh; // top-left on grid block location in pixel int grid_pos_row_base = round_floor(ref_pos_row, bh) * bh; int grid_pos_col_base = round_floor(ref_pos_col, bw) * bw; int block; for (block = 0; block < 4; ++block) { int grid_pos_row = grid_pos_row_base + bh * (block >> 1); int grid_pos_col = grid_pos_col_base + bw * (block & 0x01); if (grid_pos_row >= 0 && grid_pos_row < ref_tpl_frame->mi_rows * MI_SIZE && grid_pos_col >= 0 && grid_pos_col < ref_tpl_frame->mi_cols * MI_SIZE) { int overlap_area = get_overlap_area( grid_pos_row, grid_pos_col, ref_pos_row, ref_pos_col, block, bsize); int ref_mi_row = round_floor(grid_pos_row, bh) * mi_height; int ref_mi_col = round_floor(grid_pos_col, bw) * mi_width; int64_t mc_flow = tpl_stats->mc_dep_cost - (tpl_stats->mc_dep_cost * tpl_stats->inter_cost) / tpl_stats->intra_cost; int idx, idy; for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { TplDepStats *des_stats = &ref_stats[(ref_mi_row + idy) * ref_tpl_frame->stride + (ref_mi_col + idx)]; des_stats->mc_flow += (mc_flow * overlap_area) / pix_num; des_stats->mc_ref_cost += ((tpl_stats->intra_cost - tpl_stats->inter_cost) * overlap_area) / pix_num; assert(overlap_area >= 0); } } } } } static void tpl_model_update(TplDepFrame *tpl_frame, TplDepStats *tpl_stats, int mi_row, int mi_col, const BLOCK_SIZE bsize) { int idx, idy; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; for (idy = 0; idy < mi_height; ++idy) { for (idx = 0; idx < mi_width; ++idx) { TplDepStats *tpl_ptr = &tpl_stats[(mi_row + idy) * tpl_frame->stride + (mi_col + idx)]; tpl_model_update_b(tpl_frame, tpl_ptr, mi_row + idy, mi_col + idx, BLOCK_8X8); } } } static void get_quantize_error(MACROBLOCK *x, int plane, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, TX_SIZE tx_size, int64_t *recon_error, int64_t *sse) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; uint16_t eob; int pix_num = 1 << num_pels_log2_lookup[txsize_to_bsize[tx_size]]; const int shift = tx_size == TX_32X32 ? 0 : 2; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, scan_order->iscan); } else { vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, scan_order->iscan); } #else vp9_quantize_fp_32x32(coeff, pix_num, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, &eob, scan_order->scan, scan_order->iscan); #endif // CONFIG_VP9_HIGHBITDEPTH *recon_error = vp9_block_error(coeff, dqcoeff, pix_num, sse) >> shift; *recon_error = VPXMAX(*recon_error, 1); *sse = (*sse) >> shift; *sse = VPXMAX(*sse, 1); } #if CONFIG_VP9_HIGHBITDEPTH void highbd_wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size) { // TODO(sdeng): Implement SIMD based high bit-depth Hadamard transforms. switch (tx_size) { case TX_8X8: vpx_highbd_hadamard_8x8(src_diff, bw, coeff); break; case TX_16X16: vpx_highbd_hadamard_16x16(src_diff, bw, coeff); break; case TX_32X32: vpx_highbd_hadamard_32x32(src_diff, bw, coeff); break; default: assert(0); } } #endif // CONFIG_VP9_HIGHBITDEPTH void wht_fwd_txfm(int16_t *src_diff, int bw, tran_low_t *coeff, TX_SIZE tx_size) { switch (tx_size) { case TX_8X8: vpx_hadamard_8x8(src_diff, bw, coeff); break; case TX_16X16: vpx_hadamard_16x16(src_diff, bw, coeff); break; case TX_32X32: vpx_hadamard_32x32(src_diff, bw, coeff); break; default: assert(0); } } static void set_mv_limits(const VP9_COMMON *cm, MACROBLOCK *x, int mi_row, int mi_col) { x->mv_limits.row_min = -((mi_row * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); x->mv_limits.row_max = (cm->mi_rows - 1 - mi_row) * MI_SIZE + (17 - 2 * VP9_INTERP_EXTEND); x->mv_limits.col_min = -((mi_col * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND)); x->mv_limits.col_max = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) + (17 - 2 * VP9_INTERP_EXTEND); } static void mode_estimation(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, struct scale_factors *sf, GF_PICTURE *gf_picture, int frame_idx, TplDepFrame *tpl_frame, int16_t *src_diff, tran_low_t *coeff, tran_low_t *qcoeff, tran_low_t *dqcoeff, int mi_row, int mi_col, BLOCK_SIZE bsize, TX_SIZE tx_size, YV12_BUFFER_CONFIG *ref_frame[], uint8_t *predictor, int64_t *recon_error, int64_t *sse) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; const int bw = 4 << b_width_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize]; const int pix_num = bw * bh; int best_rf_idx = -1; int_mv best_mv; int64_t best_inter_cost = INT64_MAX; int64_t inter_cost; int rf_idx; const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP]; int64_t best_intra_cost = INT64_MAX; int64_t intra_cost; PREDICTION_MODE mode; int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; MODE_INFO mi_above, mi_left; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; TplDepStats *tpl_stats = &tpl_frame->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; xd->mb_to_top_edge = -((mi_row * MI_SIZE) * 8); xd->mb_to_bottom_edge = ((cm->mi_rows - 1 - mi_row) * MI_SIZE) * 8; xd->mb_to_left_edge = -((mi_col * MI_SIZE) * 8); xd->mb_to_right_edge = ((cm->mi_cols - 1 - mi_col) * MI_SIZE) * 8; xd->above_mi = (mi_row > 0) ? &mi_above : NULL; xd->left_mi = (mi_col > 0) ? &mi_left : NULL; // Intra prediction search for (mode = DC_PRED; mode <= TM_PRED; ++mode) { uint8_t *src, *dst; int src_stride, dst_stride; src = xd->cur_buf->y_buffer + mb_y_offset; src_stride = xd->cur_buf->y_stride; dst = &predictor[0]; dst_stride = bw; xd->mi[0]->sb_type = bsize; xd->mi[0]->ref_frame[0] = INTRA_FRAME; vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, src, src_stride, dst, dst_stride, 0, 0, 0); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); intra_cost = vpx_highbd_satd(coeff, pix_num); } else { vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); wht_fwd_txfm(src_diff, bw, coeff, tx_size); intra_cost = vpx_satd(coeff, pix_num); } #else vpx_subtract_block(bh, bw, src_diff, bw, src, src_stride, dst, dst_stride); wht_fwd_txfm(src_diff, bw, coeff, tx_size); intra_cost = vpx_satd(coeff, pix_num); #endif // CONFIG_VP9_HIGHBITDEPTH if (intra_cost < best_intra_cost) best_intra_cost = intra_cost; } // Motion compensated prediction best_mv.as_int = 0; set_mv_limits(cm, x, mi_row, mi_col); for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { int_mv mv; #if CONFIG_NON_GREEDY_MV MotionField *motion_field; #endif if (ref_frame[rf_idx] == NULL) continue; #if CONFIG_NON_GREEDY_MV (void)td; motion_field = vp9_motion_field_info_get_motion_field( &cpi->motion_field_info, frame_idx, rf_idx, bsize); mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); #else motion_compensated_prediction(cpi, td, xd->cur_buf->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_buffer + mb_y_offset, xd->cur_buf->y_stride, bsize, &mv.as_mv); #endif #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(ref_frame[rf_idx]->y_buffer + mb_y_offset), ref_frame[rf_idx]->y_stride, CONVERT_TO_SHORTPTR(&predictor[0]), bw, &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); vpx_highbd_subtract_block( bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw, xd->bd); highbd_wht_fwd_txfm(src_diff, bw, coeff, tx_size); inter_cost = vpx_highbd_satd(coeff, pix_num); } else { vp9_build_inter_predictor( ref_frame[rf_idx]->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); wht_fwd_txfm(src_diff, bw, coeff, tx_size); inter_cost = vpx_satd(coeff, pix_num); } #else vp9_build_inter_predictor(ref_frame[rf_idx]->y_buffer + mb_y_offset, ref_frame[rf_idx]->y_stride, &predictor[0], bw, &mv.as_mv, sf, bw, bh, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); vpx_subtract_block(bh, bw, src_diff, bw, xd->cur_buf->y_buffer + mb_y_offset, xd->cur_buf->y_stride, &predictor[0], bw); wht_fwd_txfm(src_diff, bw, coeff, tx_size); inter_cost = vpx_satd(coeff, pix_num); #endif if (inter_cost < best_inter_cost) { best_rf_idx = rf_idx; best_inter_cost = inter_cost; best_mv.as_int = mv.as_int; get_quantize_error(x, 0, coeff, qcoeff, dqcoeff, tx_size, recon_error, sse); } } best_intra_cost = VPXMAX(best_intra_cost, 1); best_inter_cost = VPXMIN(best_intra_cost, best_inter_cost); tpl_stats->inter_cost = VPXMAX( 1, (best_inter_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); tpl_stats->intra_cost = VPXMAX( 1, (best_intra_cost << TPL_DEP_COST_SCALE_LOG2) / (mi_height * mi_width)); tpl_stats->ref_frame_index = gf_picture[frame_idx].ref_frame[best_rf_idx]; tpl_stats->mv.as_int = best_mv.as_int; } #if CONFIG_NON_GREEDY_MV static int get_block_src_pred_buf(MACROBLOCKD *xd, GF_PICTURE *gf_picture, int frame_idx, int rf_idx, int mi_row, int mi_col, struct buf_2d *src, struct buf_2d *pre) { const int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; YV12_BUFFER_CONFIG *ref_frame = NULL; int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; if (ref_frame_idx != -1) { ref_frame = gf_picture[ref_frame_idx].frame; src->buf = xd->cur_buf->y_buffer + mb_y_offset; src->stride = xd->cur_buf->y_stride; pre->buf = ref_frame->y_buffer + mb_y_offset; pre->stride = ref_frame->y_stride; assert(src->stride == pre->stride); return 1; } else { printf("invalid ref_frame_idx"); assert(ref_frame_idx != -1); return 0; } } #define kMvPreCheckLines 5 #define kMvPreCheckSize 15 #define MV_REF_POS_NUM 3 POSITION mv_ref_pos[MV_REF_POS_NUM] = { { -1, 0 }, { 0, -1 }, { -1, -1 }, }; static int_mv *get_select_mv(VP9_COMP *cpi, TplDepFrame *tpl_frame, int mi_row, int mi_col) { return &cpi->select_mv_arr[mi_row * tpl_frame->stride + mi_col]; } static int_mv find_ref_mv(int mv_mode, VP9_COMP *cpi, TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { int i; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; int_mv nearest_mv, near_mv, invalid_mv; nearest_mv.as_int = INVALID_MV; near_mv.as_int = INVALID_MV; invalid_mv.as_int = INVALID_MV; for (i = 0; i < MV_REF_POS_NUM; ++i) { int nb_row = mi_row + mv_ref_pos[i].row * mi_height; int nb_col = mi_col + mv_ref_pos[i].col * mi_width; assert(mv_ref_pos[i].row <= 0); assert(mv_ref_pos[i].col <= 0); if (nb_row >= 0 && nb_col >= 0) { if (nearest_mv.as_int == INVALID_MV) { nearest_mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); } else { int_mv mv = *get_select_mv(cpi, tpl_frame, nb_row, nb_col); if (mv.as_int == nearest_mv.as_int) { continue; } else { near_mv = mv; break; } } } } if (nearest_mv.as_int == INVALID_MV) { nearest_mv.as_mv.row = 0; nearest_mv.as_mv.col = 0; } if (near_mv.as_int == INVALID_MV) { near_mv.as_mv.row = 0; near_mv.as_mv.col = 0; } if (mv_mode == NEAREST_MV_MODE) { return nearest_mv; } if (mv_mode == NEAR_MV_MODE) { return near_mv; } assert(0); return invalid_mv; } static int_mv get_mv_from_mv_mode(int mv_mode, VP9_COMP *cpi, MotionField *motion_field, TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { int_mv mv; switch (mv_mode) { case ZERO_MV_MODE: mv.as_mv.row = 0; mv.as_mv.col = 0; break; case NEW_MV_MODE: mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); break; case NEAREST_MV_MODE: mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); break; case NEAR_MV_MODE: mv = find_ref_mv(mv_mode, cpi, tpl_frame, bsize, mi_row, mi_col); break; default: mv.as_int = INVALID_MV; assert(0); break; } return mv; } static double get_mv_dist(int mv_mode, VP9_COMP *cpi, MACROBLOCKD *xd, GF_PICTURE *gf_picture, MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *mv) { uint32_t sse; struct buf_2d src; struct buf_2d pre; MV full_mv; *mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col); full_mv = get_full_mv(&mv->as_mv); if (get_block_src_pred_buf(xd, gf_picture, frame_idx, rf_idx, mi_row, mi_col, &src, &pre)) { // TODO(angiebird): Consider subpixel when computing the sse. cpi->fn_ptr[bsize].vf(src.buf, src.stride, get_buf_from_mv(&pre, &full_mv), pre.stride, &sse); return (double)(sse << VP9_DIST_SCALE_LOG2); } else { assert(0); return 0; } } static int get_mv_mode_cost(int mv_mode) { // TODO(angiebird): The probabilities are roughly inferred from // default_inter_mode_probs. Check if there is a better way to set the // probabilities. const int zero_mv_prob = 16; const int new_mv_prob = 24 * 1; const int ref_mv_prob = 256 - zero_mv_prob - new_mv_prob; assert(zero_mv_prob + new_mv_prob + ref_mv_prob == 256); switch (mv_mode) { case ZERO_MV_MODE: return vp9_prob_cost[zero_mv_prob]; break; case NEW_MV_MODE: return vp9_prob_cost[new_mv_prob]; break; case NEAREST_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; case NEAR_MV_MODE: return vp9_prob_cost[ref_mv_prob]; break; default: assert(0); return -1; } } static INLINE double get_mv_diff_cost(MV *new_mv, MV *ref_mv) { double mv_diff_cost = log2(1 + abs(new_mv->row - ref_mv->row)) + log2(1 + abs(new_mv->col - ref_mv->col)); mv_diff_cost *= (1 << VP9_PROB_COST_SHIFT); return mv_diff_cost; } static double get_mv_cost(int mv_mode, VP9_COMP *cpi, MotionField *motion_field, TplDepFrame *tpl_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { double mv_cost = get_mv_mode_cost(mv_mode); if (mv_mode == NEW_MV_MODE) { MV new_mv = get_mv_from_mv_mode(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col) .as_mv; MV nearest_mv = get_mv_from_mv_mode(NEAREST_MV_MODE, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col) .as_mv; MV near_mv = get_mv_from_mv_mode(NEAR_MV_MODE, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col) .as_mv; double nearest_cost = get_mv_diff_cost(&new_mv, &nearest_mv); double near_cost = get_mv_diff_cost(&new_mv, &near_mv); mv_cost += nearest_cost < near_cost ? nearest_cost : near_cost; } return mv_cost; } static double eval_mv_mode(int mv_mode, VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture, MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *mv) { MACROBLOCKD *xd = &x->e_mbd; double mv_dist = get_mv_dist(mv_mode, cpi, xd, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, mi_row, mi_col, mv); double mv_cost = get_mv_cost(mv_mode, cpi, motion_field, tpl_frame, bsize, mi_row, mi_col); double mult = 180; return mv_cost + mult * log2f(1 + mv_dist); } static int find_best_ref_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture, MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col, double *rd, int_mv *mv) { int best_mv_mode = ZERO_MV_MODE; int update = 0; int mv_mode; *rd = 0; for (mv_mode = 0; mv_mode < MAX_MV_MODE; ++mv_mode) { double this_rd; int_mv this_mv; if (mv_mode == NEW_MV_MODE) { continue; } this_rd = eval_mv_mode(mv_mode, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, mi_row, mi_col, &this_mv); if (update == 0) { *rd = this_rd; *mv = this_mv; best_mv_mode = mv_mode; update = 1; } else { if (this_rd < *rd) { *rd = this_rd; *mv = this_mv; best_mv_mode = mv_mode; } } } return best_mv_mode; } static void predict_mv_mode(VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture, MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize, int mi_row, int mi_col) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; int tmp_mv_mode_arr[kMvPreCheckSize]; int *mv_mode_arr = tpl_frame->mv_mode_arr[rf_idx]; double *rd_diff_arr = tpl_frame->rd_diff_arr[rf_idx]; int_mv *select_mv_arr = cpi->select_mv_arr; int_mv tmp_select_mv_arr[kMvPreCheckSize]; int stride = tpl_frame->stride; double new_mv_rd = 0; double no_new_mv_rd = 0; double this_new_mv_rd = 0; double this_no_new_mv_rd = 0; int idx; int tmp_idx; assert(kMvPreCheckSize == (kMvPreCheckLines * (kMvPreCheckLines + 1)) >> 1); // no new mv // diagnal scan order tmp_idx = 0; for (idx = 0; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; int nb_row = mi_row + r * mi_height; int nb_col = mi_col + c * mi_width; if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { double this_rd; int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, nb_row, nb_col, &this_rd, mv); if (r == 0 && c == 0) { this_no_new_mv_rd = this_rd; } no_new_mv_rd += this_rd; tmp_mv_mode_arr[tmp_idx] = mv_mode_arr[nb_row * stride + nb_col]; tmp_select_mv_arr[tmp_idx] = select_mv_arr[nb_row * stride + nb_col]; ++tmp_idx; } } } // new mv mv_mode_arr[mi_row * stride + mi_col] = NEW_MV_MODE; this_new_mv_rd = eval_mv_mode( NEW_MV_MODE, cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, mi_row, mi_col, &select_mv_arr[mi_row * stride + mi_col]); new_mv_rd = this_new_mv_rd; // We start from idx = 1 because idx = 0 is evaluated as NEW_MV_MODE // beforehand. for (idx = 1; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; int nb_row = mi_row + r * mi_height; int nb_col = mi_col + c * mi_width; if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { double this_rd; int_mv *mv = &select_mv_arr[nb_row * stride + nb_col]; mv_mode_arr[nb_row * stride + nb_col] = find_best_ref_mv_mode( cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, nb_row, nb_col, &this_rd, mv); new_mv_rd += this_rd; } } } // update best_mv_mode tmp_idx = 0; if (no_new_mv_rd < new_mv_rd) { for (idx = 0; idx < kMvPreCheckLines; ++idx) { int r; for (r = 0; r <= idx; ++r) { int c = idx - r; int nb_row = mi_row + r * mi_height; int nb_col = mi_col + c * mi_width; if (nb_row < tpl_frame->mi_rows && nb_col < tpl_frame->mi_cols) { mv_mode_arr[nb_row * stride + nb_col] = tmp_mv_mode_arr[tmp_idx]; select_mv_arr[nb_row * stride + nb_col] = tmp_select_mv_arr[tmp_idx]; ++tmp_idx; } } } rd_diff_arr[mi_row * stride + mi_col] = 0; } else { rd_diff_arr[mi_row * stride + mi_col] = (no_new_mv_rd - this_no_new_mv_rd) - (new_mv_rd - this_new_mv_rd); } } static void predict_mv_mode_arr(VP9_COMP *cpi, MACROBLOCK *x, GF_PICTURE *gf_picture, MotionField *motion_field, int frame_idx, TplDepFrame *tpl_frame, int rf_idx, BLOCK_SIZE bsize) { const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int unit_rows = tpl_frame->mi_rows / mi_height; const int unit_cols = tpl_frame->mi_cols / mi_width; const int max_diagonal_lines = unit_rows + unit_cols - 1; int idx; for (idx = 0; idx < max_diagonal_lines; ++idx) { int r; for (r = VPXMAX(idx - unit_cols + 1, 0); r <= VPXMIN(idx, unit_rows - 1); ++r) { int c = idx - r; int mi_row = r * mi_height; int mi_col = c * mi_width; assert(c >= 0 && c < unit_cols); assert(mi_row >= 0 && mi_row < tpl_frame->mi_rows); assert(mi_col >= 0 && mi_col < tpl_frame->mi_cols); predict_mv_mode(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize, mi_row, mi_col); } } } static void do_motion_search(VP9_COMP *cpi, ThreadData *td, MotionField *motion_field, int frame_idx, YV12_BUFFER_CONFIG *ref_frame, BLOCK_SIZE bsize, int mi_row, int mi_col) { VP9_COMMON *cm = &cpi->common; MACROBLOCK *x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; const int mb_y_offset = mi_row * MI_SIZE * xd->cur_buf->y_stride + mi_col * MI_SIZE; assert(ref_frame != NULL); set_mv_limits(cm, x, mi_row, mi_col); { int_mv mv = vp9_motion_field_mi_get_mv(motion_field, mi_row, mi_col); uint8_t *cur_frame_buf = xd->cur_buf->y_buffer + mb_y_offset; uint8_t *ref_frame_buf = ref_frame->y_buffer + mb_y_offset; const int stride = xd->cur_buf->y_stride; full_pixel_motion_search(cpi, td, motion_field, frame_idx, cur_frame_buf, ref_frame_buf, stride, bsize, mi_row, mi_col, &mv.as_mv); sub_pixel_motion_search(cpi, td, cur_frame_buf, ref_frame_buf, stride, bsize, &mv.as_mv); vp9_motion_field_mi_set_mv(motion_field, mi_row, mi_col, mv); } } static void build_motion_field( VP9_COMP *cpi, int frame_idx, YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES], BLOCK_SIZE bsize) { VP9_COMMON *cm = &cpi->common; ThreadData *td = &cpi->td; TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; const int ph = num_4x4_blocks_high_lookup[bsize] << 2; int mi_row, mi_col; int rf_idx; tpl_frame->lambda = (pw * ph) >> 2; assert(pw * ph == tpl_frame->lambda << 2); for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { MotionField *motion_field = vp9_motion_field_info_get_motion_field( &cpi->motion_field_info, frame_idx, rf_idx, bsize); if (ref_frame[rf_idx] == NULL) { continue; } vp9_motion_field_reset_mvs(motion_field); for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { do_motion_search(cpi, td, motion_field, frame_idx, ref_frame[rf_idx], bsize, mi_row, mi_col); } } } } #endif // CONFIG_NON_GREEDY_MV static void mc_flow_dispenser(VP9_COMP *cpi, GF_PICTURE *gf_picture, int frame_idx, BLOCK_SIZE bsize) { TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; YV12_BUFFER_CONFIG *this_frame = gf_picture[frame_idx].frame; YV12_BUFFER_CONFIG *ref_frame[MAX_INTER_REF_FRAMES] = { NULL, NULL, NULL }; VP9_COMMON *cm = &cpi->common; struct scale_factors sf; int rdmult, idx; ThreadData *td = &cpi->td; MACROBLOCK *x = &td->mb; MACROBLOCKD *xd = &x->e_mbd; int mi_row, mi_col; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, predictor16[32 * 32 * 3]); DECLARE_ALIGNED(16, uint8_t, predictor8[32 * 32 * 3]); uint8_t *predictor; #else DECLARE_ALIGNED(16, uint8_t, predictor[32 * 32 * 3]); #endif DECLARE_ALIGNED(16, int16_t, src_diff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, coeff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, qcoeff[32 * 32]); DECLARE_ALIGNED(16, tran_low_t, dqcoeff[32 * 32]); const TX_SIZE tx_size = max_txsize_lookup[bsize]; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; int64_t recon_error, sse; #if CONFIG_NON_GREEDY_MV int square_block_idx; int rf_idx; #endif // Setup scaling factor #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame( &sf, this_frame->y_crop_width, this_frame->y_crop_height, this_frame->y_crop_width, this_frame->y_crop_height, cpi->common.use_highbitdepth); if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) predictor = CONVERT_TO_BYTEPTR(predictor16); else predictor = predictor8; #else vp9_setup_scale_factors_for_frame( &sf, this_frame->y_crop_width, this_frame->y_crop_height, this_frame->y_crop_width, this_frame->y_crop_height); #endif // CONFIG_VP9_HIGHBITDEPTH // Prepare reference frame pointers. If any reference frame slot is // unavailable, the pointer will be set to Null. for (idx = 0; idx < MAX_INTER_REF_FRAMES; ++idx) { int rf_idx = gf_picture[frame_idx].ref_frame[idx]; if (rf_idx != -1) ref_frame[idx] = gf_picture[rf_idx].frame; } xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; xd->cur_buf = this_frame; // Get rd multiplier set up. rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, tpl_frame->base_qindex); set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, tpl_frame->base_qindex); tpl_frame->is_valid = 1; cm->base_qindex = tpl_frame->base_qindex; vp9_frame_init_quantizer(cpi); #if CONFIG_NON_GREEDY_MV for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; ++square_block_idx) { BLOCK_SIZE square_bsize = square_block_idx_to_bsize(square_block_idx); build_motion_field(cpi, frame_idx, ref_frame, square_bsize); } for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { int ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; if (ref_frame_idx != -1) { MotionField *motion_field = vp9_motion_field_info_get_motion_field( &cpi->motion_field_info, frame_idx, rf_idx, bsize); predict_mv_mode_arr(cpi, x, gf_picture, motion_field, frame_idx, tpl_frame, rf_idx, bsize); } } #endif for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { mode_estimation(cpi, x, xd, &sf, gf_picture, frame_idx, tpl_frame, src_diff, coeff, qcoeff, dqcoeff, mi_row, mi_col, bsize, tx_size, ref_frame, predictor, &recon_error, &sse); // Motion flow dependency dispenser. tpl_model_store(tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize, tpl_frame->stride); tpl_model_update(cpi->tpl_stats, tpl_frame->tpl_stats_ptr, mi_row, mi_col, bsize); } } } #if CONFIG_NON_GREEDY_MV #define DUMP_TPL_STATS 0 #if DUMP_TPL_STATS static void dump_buf(uint8_t *buf, int stride, int row, int col, int h, int w) { int i, j; printf("%d %d\n", h, w); for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { printf("%d ", buf[(row + i) * stride + col + j]); } } printf("\n"); } static void dump_frame_buf(const YV12_BUFFER_CONFIG *frame_buf) { dump_buf(frame_buf->y_buffer, frame_buf->y_stride, 0, 0, frame_buf->y_height, frame_buf->y_width); dump_buf(frame_buf->u_buffer, frame_buf->uv_stride, 0, 0, frame_buf->uv_height, frame_buf->uv_width); dump_buf(frame_buf->v_buffer, frame_buf->uv_stride, 0, 0, frame_buf->uv_height, frame_buf->uv_width); } static void dump_tpl_stats(const VP9_COMP *cpi, int tpl_group_frames, const GF_GROUP *gf_group, const GF_PICTURE *gf_picture, BLOCK_SIZE bsize) { int frame_idx; const VP9_COMMON *cm = &cpi->common; int rf_idx; for (frame_idx = 1; frame_idx < tpl_group_frames; ++frame_idx) { for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { const TplDepFrame *tpl_frame = &cpi->tpl_stats[frame_idx]; int mi_row, mi_col; int ref_frame_idx; const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; ref_frame_idx = gf_picture[frame_idx].ref_frame[rf_idx]; if (ref_frame_idx != -1) { YV12_BUFFER_CONFIG *ref_frame_buf = gf_picture[ref_frame_idx].frame; const int gf_frame_offset = gf_group->frame_gop_index[frame_idx]; const int ref_gf_frame_offset = gf_group->frame_gop_index[ref_frame_idx]; printf("=\n"); printf( "frame_idx %d mi_rows %d mi_cols %d bsize %d ref_frame_idx %d " "rf_idx %d gf_frame_offset %d ref_gf_frame_offset %d\n", frame_idx, cm->mi_rows, cm->mi_cols, mi_width * MI_SIZE, ref_frame_idx, rf_idx, gf_frame_offset, ref_gf_frame_offset); for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { int_mv mv = vp9_motion_field_info_get_mv(&cpi->motion_field_info, frame_idx, rf_idx, bsize, mi_row, mi_col); printf("%d %d %d %d\n", mi_row, mi_col, mv.as_mv.row, mv.as_mv.col); } } } for (mi_row = 0; mi_row < cm->mi_rows; ++mi_row) { for (mi_col = 0; mi_col < cm->mi_cols; ++mi_col) { if ((mi_row % mi_height) == 0 && (mi_col % mi_width) == 0) { const TplDepStats *tpl_ptr = &tpl_frame ->tpl_stats_ptr[mi_row * tpl_frame->stride + mi_col]; printf("%f ", tpl_ptr->feature_score); } } } printf("\n"); for (mi_row = 0; mi_row < cm->mi_rows; mi_row += mi_height) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += mi_width) { const int mv_mode = tpl_frame ->mv_mode_arr[rf_idx][mi_row * tpl_frame->stride + mi_col]; printf("%d ", mv_mode); } } printf("\n"); dump_frame_buf(gf_picture[frame_idx].frame); dump_frame_buf(ref_frame_buf); } } } } #endif // DUMP_TPL_STATS #endif // CONFIG_NON_GREEDY_MV static void init_tpl_buffer(VP9_COMP *cpi) { VP9_COMMON *cm = &cpi->common; int frame; const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); #if CONFIG_NON_GREEDY_MV int rf_idx; vpx_free(cpi->select_mv_arr); CHECK_MEM_ERROR( cm, cpi->select_mv_arr, vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->select_mv_arr))); #endif // TODO(jingning): Reduce the actual memory use for tpl model build up. for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { if (cpi->tpl_stats[frame].width >= mi_cols && cpi->tpl_stats[frame].height >= mi_rows && cpi->tpl_stats[frame].tpl_stats_ptr) continue; #if CONFIG_NON_GREEDY_MV for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); CHECK_MEM_ERROR( cm, cpi->tpl_stats[frame].mv_mode_arr[rf_idx], vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->tpl_stats[frame].mv_mode_arr[rf_idx]))); vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); CHECK_MEM_ERROR( cm, cpi->tpl_stats[frame].rd_diff_arr[rf_idx], vpx_calloc(mi_rows * mi_cols * 4, sizeof(*cpi->tpl_stats[frame].rd_diff_arr[rf_idx]))); } #endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); CHECK_MEM_ERROR(cm, cpi->tpl_stats[frame].tpl_stats_ptr, vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->tpl_stats[frame].tpl_stats_ptr))); cpi->tpl_stats[frame].is_valid = 0; cpi->tpl_stats[frame].width = mi_cols; cpi->tpl_stats[frame].height = mi_rows; cpi->tpl_stats[frame].stride = mi_cols; cpi->tpl_stats[frame].mi_rows = cm->mi_rows; cpi->tpl_stats[frame].mi_cols = cm->mi_cols; } for (frame = 0; frame < REF_FRAMES; ++frame) { cpi->enc_frame_buf[frame].mem_valid = 0; cpi->enc_frame_buf[frame].released = 1; } } static void free_tpl_buffer(VP9_COMP *cpi) { int frame; #if CONFIG_NON_GREEDY_MV vp9_free_motion_field_info(&cpi->motion_field_info); vpx_free(cpi->select_mv_arr); #endif for (frame = 0; frame < MAX_ARF_GOP_SIZE; ++frame) { #if CONFIG_NON_GREEDY_MV int rf_idx; for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { vpx_free(cpi->tpl_stats[frame].mv_mode_arr[rf_idx]); vpx_free(cpi->tpl_stats[frame].rd_diff_arr[rf_idx]); } #endif vpx_free(cpi->tpl_stats[frame].tpl_stats_ptr); cpi->tpl_stats[frame].is_valid = 0; } } static void setup_tpl_stats(VP9_COMP *cpi) { GF_PICTURE gf_picture[MAX_ARF_GOP_SIZE]; const GF_GROUP *gf_group = &cpi->twopass.gf_group; int tpl_group_frames = 0; int frame_idx; cpi->tpl_bsize = BLOCK_32X32; init_gop_frames(cpi, gf_picture, gf_group, &tpl_group_frames); init_tpl_stats(cpi); // Backward propagation from tpl_group_frames to 1. for (frame_idx = tpl_group_frames - 1; frame_idx > 0; --frame_idx) { if (gf_picture[frame_idx].update_type == USE_BUF_FRAME) continue; mc_flow_dispenser(cpi, gf_picture, frame_idx, cpi->tpl_bsize); } #if CONFIG_NON_GREEDY_MV cpi->tpl_ready = 1; #if DUMP_TPL_STATS dump_tpl_stats(cpi, tpl_group_frames, gf_group, gf_picture, cpi->tpl_bsize); #endif // DUMP_TPL_STATS #endif // CONFIG_NON_GREEDY_MV } static void init_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result) { encode_frame_result->show_idx = -1; // Actual encoding deosn't happen. } #if !CONFIG_REALTIME_ONLY static void update_encode_frame_result(ENCODE_FRAME_RESULT *encode_frame_result, int show_idx, FRAME_UPDATE_TYPE update_type, const YV12_BUFFER_CONFIG *source_frame, const YV12_BUFFER_CONFIG *coded_frame, int quantize_index, uint32_t bit_depth, uint32_t input_bit_depth) { PSNR_STATS psnr; #if CONFIG_VP9_HIGHBITDEPTH vpx_calc_highbd_psnr(source_frame, coded_frame, &psnr, bit_depth, input_bit_depth); #else (void)bit_depth; (void)input_bit_depth; vpx_calc_psnr(source_frame, coded_frame, &psnr); #endif encode_frame_result->psnr = psnr.psnr[0]; encode_frame_result->sse = psnr.sse[0]; encode_frame_result->show_idx = show_idx; encode_frame_result->update_type = update_type; encode_frame_result->quantize_index = quantize_index; } #endif // !CONFIG_REALTIME_ONLY int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush, ENCODE_FRAME_RESULT *encode_frame_result) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; RATE_CONTROL *const rc = &cpi->rc; struct vpx_usec_timer cmptimer; YV12_BUFFER_CONFIG *force_src_buffer = NULL; struct lookahead_entry *last_source = NULL; struct lookahead_entry *source = NULL; int arf_src_index; const int gf_group_index = cpi->twopass.gf_group.index; int i; init_encode_frame_result(encode_frame_result); if (is_one_pass_cbr_svc(cpi)) { vp9_one_pass_cbr_svc_start_layer(cpi); } vpx_usec_timer_start(&cmptimer); vp9_set_high_precision_mv(cpi, ALTREF_HIGH_PRECISION_MV); // Is multi-arf enabled. // Note that at the moment multi_arf is only configured for 2 pass VBR and // will not work properly with svc. // Enable the Jingning's new "multi_layer_arf" code if "enable_auto_arf" // is greater than or equal to 2. if ((oxcf->pass == 2) && !cpi->use_svc && (cpi->oxcf.enable_auto_arf >= 2)) cpi->multi_layer_arf = 1; else cpi->multi_layer_arf = 0; // Normal defaults cm->reset_frame_context = 0; cm->refresh_frame_context = 1; if (!is_one_pass_cbr_svc(cpi)) { cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; } // Should we encode an arf frame. arf_src_index = get_arf_src_index(cpi); if (arf_src_index) { for (i = 0; i <= arf_src_index; ++i) { struct lookahead_entry *e = vp9_lookahead_peek(cpi->lookahead, i); // Avoid creating an alt-ref if there's a forced keyframe pending. if (e == NULL) { break; } else if (e->flags == VPX_EFLAG_FORCE_KF) { arf_src_index = 0; flush = 1; break; } } } // Clear arf index stack before group of pictures processing starts. if (gf_group_index == 1) { stack_init(cpi->twopass.gf_group.arf_index_stack, MAX_LAG_BUFFERS * 2); cpi->twopass.gf_group.stack_size = 0; } if (arf_src_index) { assert(arf_src_index <= rc->frames_to_key); if ((source = vp9_lookahead_peek(cpi->lookahead, arf_src_index)) != NULL) { cpi->alt_ref_source = source; #if !CONFIG_REALTIME_ONLY if ((oxcf->mode != REALTIME) && (oxcf->arnr_max_frames > 0) && (oxcf->arnr_strength > 0)) { int bitrate = cpi->rc.avg_frame_bandwidth / 40; int not_low_bitrate = bitrate > ALT_REF_AQ_LOW_BITRATE_BOUNDARY; int not_last_frame = (cpi->lookahead->sz - arf_src_index > 1); not_last_frame |= ALT_REF_AQ_APPLY_TO_LAST_FRAME; // Produce the filtered ARF frame. vp9_temporal_filter(cpi, arf_src_index); vpx_extend_frame_borders(&cpi->alt_ref_buffer); // for small bitrates segmentation overhead usually // eats all bitrate gain from enabling delta quantizers if (cpi->oxcf.alt_ref_aq != 0 && not_low_bitrate && not_last_frame) vp9_alt_ref_aq_setup_mode(cpi->alt_ref_aq, cpi); force_src_buffer = &cpi->alt_ref_buffer; } #endif cm->show_frame = 0; cm->intra_only = 0; cpi->refresh_alt_ref_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_last_frame = 0; rc->is_src_frame_alt_ref = 0; rc->source_alt_ref_pending = 0; } else { rc->source_alt_ref_pending = 0; } } if (!source) { // Get last frame source. if (cm->current_video_frame > 0) { if ((last_source = vp9_lookahead_peek(cpi->lookahead, -1)) == NULL) return -1; } // Read in the source frame. if (cpi->use_svc || cpi->svc.set_intra_only_frame) source = vp9_svc_lookahead_pop(cpi, cpi->lookahead, flush); else source = vp9_lookahead_pop(cpi->lookahead, flush); if (source != NULL) { cm->show_frame = 1; cm->intra_only = 0; // If the flags indicate intra frame, but if the current picture is for // spatial layer above first_spatial_layer_to_encode, it should not be an // intra picture. if ((source->flags & VPX_EFLAG_FORCE_KF) && cpi->use_svc && cpi->svc.spatial_layer_id > cpi->svc.first_spatial_layer_to_encode) { source->flags &= ~(unsigned int)(VPX_EFLAG_FORCE_KF); } // Check to see if the frame should be encoded as an arf overlay. check_src_altref(cpi, source); } } if (source) { cpi->un_scaled_source = cpi->Source = force_src_buffer ? force_src_buffer : &source->img; #ifdef ENABLE_KF_DENOISE // Copy of raw source for metrics calculation. if (is_psnr_calc_enabled(cpi)) vp9_copy_and_extend_frame(cpi->Source, &cpi->raw_unscaled_source); #endif cpi->unscaled_last_source = last_source != NULL ? &last_source->img : NULL; *time_stamp = source->ts_start; *time_end = source->ts_end; *frame_flags = (source->flags & VPX_EFLAG_FORCE_KF) ? FRAMEFLAGS_KEY : 0; } else { *size = 0; return -1; } if (source->ts_start < cpi->first_time_stamp_ever) { cpi->first_time_stamp_ever = source->ts_start; cpi->last_end_time_stamp_seen = source->ts_start; } // Clear down mmx registers vpx_clear_system_state(); // adjust frame rates based on timestamps given if (cm->show_frame) { if (cpi->use_svc && cpi->svc.use_set_ref_frame_config && cpi->svc.duration[cpi->svc.spatial_layer_id] > 0) vp9_svc_adjust_frame_rate(cpi); else adjust_frame_rate(cpi, source); } if (is_one_pass_cbr_svc(cpi)) { vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } // Find a free buffer for the new frame, releasing the reference previously // held. if (cm->new_fb_idx != INVALID_IDX) { --pool->frame_bufs[cm->new_fb_idx].ref_count; } cm->new_fb_idx = get_free_fb(cm); if (cm->new_fb_idx == INVALID_IDX) return -1; cm->cur_frame = &pool->frame_bufs[cm->new_fb_idx]; // Start with a 0 size frame. *size = 0; cpi->frame_flags = *frame_flags; #if !CONFIG_REALTIME_ONLY if ((oxcf->pass == 2) && !cpi->use_svc) { vp9_rc_get_second_pass_params(cpi); } else if (oxcf->pass == 1) { set_frame_size(cpi); } #endif // !CONFIG_REALTIME_ONLY if (oxcf->pass != 1 && cpi->level_constraint.level_index >= 0 && cpi->level_constraint.fail_flag == 0) level_rc_framerate(cpi, arf_src_index); if (cpi->oxcf.pass != 0 || cpi->use_svc || frame_is_intra_only(cm) == 1) { for (i = 0; i < REFS_PER_FRAME; ++i) cpi->scaled_ref_idx[i] = INVALID_IDX; } if (cpi->kmeans_data_arr_alloc == 0) { const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); #if CONFIG_MULTITHREAD pthread_mutex_init(&cpi->kmeans_mutex, NULL); #endif CHECK_MEM_ERROR( cm, cpi->kmeans_data_arr, vpx_calloc(mi_rows * mi_cols, sizeof(*cpi->kmeans_data_arr))); cpi->kmeans_data_stride = mi_cols; cpi->kmeans_data_arr_alloc = 1; } #if CONFIG_NON_GREEDY_MV { const int mi_cols = mi_cols_aligned_to_sb(cm->mi_cols); const int mi_rows = mi_cols_aligned_to_sb(cm->mi_rows); Status status = vp9_alloc_motion_field_info( &cpi->motion_field_info, MAX_ARF_GOP_SIZE, mi_rows, mi_cols); if (status == STATUS_FAILED) { vpx_internal_error(&(cm)->error, VPX_CODEC_MEM_ERROR, "vp9_alloc_motion_field_info failed"); } } #endif // CONFIG_NON_GREEDY_MV if (gf_group_index == 1 && cpi->twopass.gf_group.update_type[gf_group_index] == ARF_UPDATE && cpi->sf.enable_tpl_model) { init_tpl_buffer(cpi); vp9_estimate_qp_gop(cpi); setup_tpl_stats(cpi); } #if CONFIG_BITSTREAM_DEBUG assert(cpi->oxcf.max_threads == 0 && "bitstream debug tool does not support multithreading"); bitstream_queue_record_write(); #endif #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG bitstream_queue_set_frame_write(cm->current_video_frame * 2 + cm->show_frame); #endif cpi->td.mb.fp_src_pred = 0; #if CONFIG_REALTIME_ONLY if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); } else { // One pass encode Pass0Encode(cpi, size, dest, frame_flags); } #else // !CONFIG_REALTIME_ONLY if (oxcf->pass == 1 && !cpi->use_svc) { const int lossless = is_lossless_requested(oxcf); #if CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.use_highbitdepth) cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_highbd_fwht4x4 : vpx_highbd_fdct4x4; else cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4; cpi->td.mb.highbd_inv_txfm_add = lossless ? vp9_highbd_iwht4x4_add : vp9_highbd_idct4x4_add; #else cpi->td.mb.fwd_txfm4x4 = lossless ? vp9_fwht4x4 : vpx_fdct4x4; #endif // CONFIG_VP9_HIGHBITDEPTH cpi->td.mb.inv_txfm_add = lossless ? vp9_iwht4x4_add : vp9_idct4x4_add; vp9_first_pass(cpi, source); } else if (oxcf->pass == 2 && !cpi->use_svc) { Pass2Encode(cpi, size, dest, frame_flags); // update_encode_frame_result() depends on twopass.gf_group.index and // cm->new_fb_idx and cpi->Source are updated for current properly and have // not been updated for the next frame yet. // The update locations are as follows. // 1) twopass.gf_group.index is initialized at define_gf_group by vp9_zero() // for the first frame in the gf_group and is updated for the next frame at // vp9_twopass_postencode_update(). // 2) cpi->Source is updated at the beginging of this function, i.e. // vp9_get_compressed_data() // 3) cm->new_fb_idx is updated at the beginging of this function by // get_free_fb(cm) // TODO(angiebird): Improve the codebase to make the update of frame // dependent variables more robust. update_encode_frame_result( encode_frame_result, source->show_idx, cpi->twopass.gf_group.update_type[cpi->twopass.gf_group.index], cpi->Source, get_frame_new_buffer(cm), vp9_get_quantizer(cpi), cpi->oxcf.input_bit_depth, cm->bit_depth); vp9_twopass_postencode_update(cpi); } else if (cpi->use_svc) { SvcEncode(cpi, size, dest, frame_flags); } else { // One pass encode Pass0Encode(cpi, size, dest, frame_flags); } #endif // CONFIG_REALTIME_ONLY if (cm->show_frame) cm->cur_show_frame_fb_idx = cm->new_fb_idx; if (cm->refresh_frame_context) cm->frame_contexts[cm->frame_context_idx] = *cm->fc; // No frame encoded, or frame was dropped, release scaled references. if ((*size == 0) && (frame_is_intra_only(cm) == 0)) { release_scaled_references(cpi); } if (*size > 0) { cpi->droppable = !frame_is_reference(cpi); } // Save layer specific state. if (is_one_pass_cbr_svc(cpi) || ((cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) && oxcf->pass == 2)) { vp9_save_layer_context(cpi); } vpx_usec_timer_mark(&cmptimer); cpi->time_compress_data += vpx_usec_timer_elapsed(&cmptimer); if (cpi->keep_level_stats && oxcf->pass != 1) update_level_info(cpi, size, arf_src_index); #if CONFIG_INTERNAL_STATS if (oxcf->pass != 1) { double samples = 0.0; cpi->bytes += (int)(*size); if (cm->show_frame) { uint32_t bit_depth = 8; uint32_t in_bit_depth = 8; cpi->count++; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { in_bit_depth = cpi->oxcf.input_bit_depth; bit_depth = cm->bit_depth; } #endif if (cpi->b_calculate_psnr) { YV12_BUFFER_CONFIG *orig = cpi->raw_source_frame; YV12_BUFFER_CONFIG *recon = cpi->common.frame_to_show; YV12_BUFFER_CONFIG *pp = &cm->post_proc_buffer; PSNR_STATS psnr; #if CONFIG_VP9_HIGHBITDEPTH vpx_calc_highbd_psnr(orig, recon, &psnr, cpi->td.mb.e_mbd.bd, in_bit_depth); #else vpx_calc_psnr(orig, recon, &psnr); #endif // CONFIG_VP9_HIGHBITDEPTH adjust_image_stat(psnr.psnr[1], psnr.psnr[2], psnr.psnr[3], psnr.psnr[0], &cpi->psnr); cpi->total_sq_error += psnr.sse[0]; cpi->total_samples += psnr.samples[0]; samples = psnr.samples[0]; { PSNR_STATS psnr2; double frame_ssim2 = 0, weight = 0; #if CONFIG_VP9_POSTPROC if (vpx_alloc_frame_buffer( pp, recon->y_crop_width, recon->y_crop_height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment) < 0) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to allocate post processing buffer"); } { vp9_ppflags_t ppflags; ppflags.post_proc_flag = VP9D_DEBLOCK; ppflags.deblocking_level = 0; // not used in vp9_post_proc_frame() ppflags.noise_level = 0; // not used in vp9_post_proc_frame() vp9_post_proc_frame(cm, pp, &ppflags, cpi->un_scaled_source->y_width); } #endif vpx_clear_system_state(); #if CONFIG_VP9_HIGHBITDEPTH vpx_calc_highbd_psnr(orig, pp, &psnr2, cpi->td.mb.e_mbd.bd, cpi->oxcf.input_bit_depth); #else vpx_calc_psnr(orig, pp, &psnr2); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->totalp_sq_error += psnr2.sse[0]; cpi->totalp_samples += psnr2.samples[0]; adjust_image_stat(psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], &cpi->psnrp); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { frame_ssim2 = vpx_highbd_calc_ssim(orig, recon, &weight, bit_depth, in_bit_depth); } else { frame_ssim2 = vpx_calc_ssim(orig, recon, &weight); } #else frame_ssim2 = vpx_calc_ssim(orig, recon, &weight); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->worst_ssim = VPXMIN(cpi->worst_ssim, frame_ssim2); cpi->summed_quality += frame_ssim2 * weight; cpi->summed_weights += weight; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { frame_ssim2 = vpx_highbd_calc_ssim(orig, pp, &weight, bit_depth, in_bit_depth); } else { frame_ssim2 = vpx_calc_ssim(orig, pp, &weight); } #else frame_ssim2 = vpx_calc_ssim(orig, pp, &weight); #endif // CONFIG_VP9_HIGHBITDEPTH cpi->summedp_quality += frame_ssim2 * weight; cpi->summedp_weights += weight; #if 0 if (cm->show_frame) { FILE *f = fopen("q_used.stt", "a"); fprintf(f, "%5d : Y%f7.3:U%f7.3:V%f7.3:F%f7.3:S%7.3f\n", cpi->common.current_video_frame, psnr2.psnr[1], psnr2.psnr[2], psnr2.psnr[3], psnr2.psnr[0], frame_ssim2); fclose(f); } #endif } } if (cpi->b_calculate_blockiness) { #if CONFIG_VP9_HIGHBITDEPTH if (!cm->use_highbitdepth) #endif { double frame_blockiness = vp9_get_blockiness( cpi->Source->y_buffer, cpi->Source->y_stride, cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride, cpi->Source->y_width, cpi->Source->y_height); cpi->worst_blockiness = VPXMAX(cpi->worst_blockiness, frame_blockiness); cpi->total_blockiness += frame_blockiness; } } if (cpi->b_calculate_consistency) { #if CONFIG_VP9_HIGHBITDEPTH if (!cm->use_highbitdepth) #endif { double this_inconsistency = vpx_get_ssim_metrics( cpi->Source->y_buffer, cpi->Source->y_stride, cm->frame_to_show->y_buffer, cm->frame_to_show->y_stride, cpi->Source->y_width, cpi->Source->y_height, cpi->ssim_vars, &cpi->metrics, 1); const double peak = (double)((1 << cpi->oxcf.input_bit_depth) - 1); double consistency = vpx_sse_to_psnr(samples, peak, (double)cpi->total_inconsistency); if (consistency > 0.0) cpi->worst_consistency = VPXMIN(cpi->worst_consistency, consistency); cpi->total_inconsistency += this_inconsistency; } } { double y, u, v, frame_all; frame_all = vpx_calc_fastssim(cpi->Source, cm->frame_to_show, &y, &u, &v, bit_depth, in_bit_depth); adjust_image_stat(y, u, v, frame_all, &cpi->fastssim); } { double y, u, v, frame_all; frame_all = vpx_psnrhvs(cpi->Source, cm->frame_to_show, &y, &u, &v, bit_depth, in_bit_depth); adjust_image_stat(y, u, v, frame_all, &cpi->psnrhvs); } } } #endif if (is_one_pass_cbr_svc(cpi)) { if (cm->show_frame) { ++cpi->svc.spatial_layer_to_encode; if (cpi->svc.spatial_layer_to_encode >= cpi->svc.number_spatial_layers) cpi->svc.spatial_layer_to_encode = 0; } } vpx_clear_system_state(); return 0; } int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags) { VP9_COMMON *cm = &cpi->common; #if !CONFIG_VP9_POSTPROC (void)flags; #endif if (!cm->show_frame) { return -1; } else { int ret; #if CONFIG_VP9_POSTPROC ret = vp9_post_proc_frame(cm, dest, flags, cpi->un_scaled_source->y_width); #else if (cm->frame_to_show) { *dest = *cm->frame_to_show; dest->y_width = cm->width; dest->y_height = cm->height; dest->uv_width = cm->width >> cm->subsampling_x; dest->uv_height = cm->height >> cm->subsampling_y; ret = 0; } else { ret = -1; } #endif // !CONFIG_VP9_POSTPROC vpx_clear_system_state(); return ret; } } int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode) { VP9_COMMON *cm = &cpi->common; int hr = 0, hs = 0, vr = 0, vs = 0; if (horiz_mode > ONETWO || vert_mode > ONETWO) return -1; Scale2Ratio(horiz_mode, &hr, &hs); Scale2Ratio(vert_mode, &vr, &vs); // always go to the next whole number cm->width = (hs - 1 + cpi->oxcf.width * hr) / hs; cm->height = (vs - 1 + cpi->oxcf.height * vr) / vs; if (cm->current_video_frame) { assert(cm->width <= cpi->initial_width); assert(cm->height <= cpi->initial_height); } update_frame_size(cpi); return 0; } int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height) { VP9_COMMON *cm = &cpi->common; #if CONFIG_VP9_HIGHBITDEPTH update_initial_width(cpi, cm->use_highbitdepth, 1, 1); #else update_initial_width(cpi, 0, 1, 1); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_TEMPORAL_DENOISING setup_denoiser_buffer(cpi); #endif alloc_raw_frame_buffers(cpi); if (width) { cm->width = width; if (cm->width > cpi->initial_width) { cm->width = cpi->initial_width; printf("Warning: Desired width too large, changed to %d\n", cm->width); } } if (height) { cm->height = height; if (cm->height > cpi->initial_height) { cm->height = cpi->initial_height; printf("Warning: Desired height too large, changed to %d\n", cm->height); } } assert(cm->width <= cpi->initial_width); assert(cm->height <= cpi->initial_height); update_frame_size(cpi); return 0; } void vp9_set_svc(VP9_COMP *cpi, int use_svc) { cpi->use_svc = use_svc; return; } int vp9_get_quantizer(const VP9_COMP *cpi) { return cpi->common.base_qindex; } void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags) { if (flags & (VP8_EFLAG_NO_REF_LAST | VP8_EFLAG_NO_REF_GF | VP8_EFLAG_NO_REF_ARF)) { int ref = 7; if (flags & VP8_EFLAG_NO_REF_LAST) ref ^= VP9_LAST_FLAG; if (flags & VP8_EFLAG_NO_REF_GF) ref ^= VP9_GOLD_FLAG; if (flags & VP8_EFLAG_NO_REF_ARF) ref ^= VP9_ALT_FLAG; vp9_use_as_reference(cpi, ref); } if (flags & (VP8_EFLAG_NO_UPD_LAST | VP8_EFLAG_NO_UPD_GF | VP8_EFLAG_NO_UPD_ARF | VP8_EFLAG_FORCE_GF | VP8_EFLAG_FORCE_ARF)) { int upd = 7; if (flags & VP8_EFLAG_NO_UPD_LAST) upd ^= VP9_LAST_FLAG; if (flags & VP8_EFLAG_NO_UPD_GF) upd ^= VP9_GOLD_FLAG; if (flags & VP8_EFLAG_NO_UPD_ARF) upd ^= VP9_ALT_FLAG; vp9_update_reference(cpi, upd); } if (flags & VP8_EFLAG_NO_UPD_ENTROPY) { vp9_update_entropy(cpi, 0); } } void vp9_set_row_mt(VP9_COMP *cpi) { // Enable row based multi-threading for supported modes of encoding cpi->row_mt = 0; if (((cpi->oxcf.mode == GOOD || cpi->oxcf.mode == BEST) && cpi->oxcf.speed < 5 && cpi->oxcf.pass == 1) && cpi->oxcf.row_mt && !cpi->use_svc) cpi->row_mt = 1; if (cpi->oxcf.mode == GOOD && cpi->oxcf.speed < 5 && (cpi->oxcf.pass == 0 || cpi->oxcf.pass == 2) && cpi->oxcf.row_mt && !cpi->use_svc) cpi->row_mt = 1; // In realtime mode, enable row based multi-threading for all the speed levels // where non-rd path is used. if (cpi->oxcf.mode == REALTIME && cpi->oxcf.speed >= 5 && cpi->oxcf.row_mt) { cpi->row_mt = 1; } if (cpi->row_mt) cpi->row_mt_bit_exact = 1; else cpi->row_mt_bit_exact = 0; } libvpx-1.8.2/vp9/encoder/vp9_encoder.h000066400000000000000000000763001357355204000175630ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_ENCODER_H_ #define VPX_VP9_ENCODER_VP9_ENCODER_H_ #include #include "./vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8cx.h" #if CONFIG_INTERNAL_STATS #include "vpx_dsp/ssim.h" #endif #include "vpx_dsp/variance.h" #include "vpx_dsp/psnr.h" #include "vpx_ports/system_state.h" #include "vpx_util/vpx_thread.h" #include "vpx_util/vpx_timestamp.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_ppflags.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_thread_common.h" #include "vp9/common/vp9_onyxc_int.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_alt_ref_aq.h" #endif #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_job_queue.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_mbgraph.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_tokenize.h" #if CONFIG_VP9_TEMPORAL_DENOISING #include "vp9/encoder/vp9_denoiser.h" #endif #ifdef __cplusplus extern "C" { #endif // vp9 uses 10,000,000 ticks/second as time stamp #define TICKS_PER_SEC 10000000 typedef struct { int nmvjointcost[MV_JOINTS]; int nmvcosts[2][MV_VALS]; int nmvcosts_hp[2][MV_VALS]; vpx_prob segment_pred_probs[PREDICTION_PROBS]; unsigned char *last_frame_seg_map_copy; // 0 = Intra, Last, GF, ARF signed char last_ref_lf_deltas[MAX_REF_LF_DELTAS]; // 0 = ZERO_MV, MV signed char last_mode_lf_deltas[MAX_MODE_LF_DELTAS]; FRAME_CONTEXT fc; } CODING_CONTEXT; typedef enum { // encode_breakout is disabled. ENCODE_BREAKOUT_DISABLED = 0, // encode_breakout is enabled. ENCODE_BREAKOUT_ENABLED = 1, // encode_breakout is enabled with small max_thresh limit. ENCODE_BREAKOUT_LIMITED = 2 } ENCODE_BREAKOUT_TYPE; typedef enum { NORMAL = 0, FOURFIVE = 1, THREEFIVE = 2, ONETWO = 3 } VPX_SCALING; typedef enum { // Good Quality Fast Encoding. The encoder balances quality with the amount of // time it takes to encode the output. Speed setting controls how fast. GOOD, // The encoder places priority on the quality of the output over encoding // speed. The output is compressed at the highest possible quality. This // option takes the longest amount of time to encode. Speed setting ignored. BEST, // Realtime/Live Encoding. This mode is optimized for realtime encoding (for // example, capturing a television signal or feed from a live camera). Speed // setting controls how fast. REALTIME } MODE; typedef enum { FRAMEFLAGS_KEY = 1 << 0, FRAMEFLAGS_GOLDEN = 1 << 1, FRAMEFLAGS_ALTREF = 1 << 2, } FRAMETYPE_FLAGS; typedef enum { NO_AQ = 0, VARIANCE_AQ = 1, COMPLEXITY_AQ = 2, CYCLIC_REFRESH_AQ = 3, EQUATOR360_AQ = 4, PERCEPTUAL_AQ = 5, PSNR_AQ = 6, // AQ based on lookahead temporal // variance (only valid for altref frames) LOOKAHEAD_AQ = 7, AQ_MODE_COUNT // This should always be the last member of the enum } AQ_MODE; typedef enum { RESIZE_NONE = 0, // No frame resizing allowed (except for SVC). RESIZE_FIXED = 1, // All frames are coded at the specified dimension. RESIZE_DYNAMIC = 2 // Coded size of each frame is determined by the codec. } RESIZE_TYPE; typedef enum { kInvalid = 0, kLowSadLowSumdiff = 1, kLowSadHighSumdiff = 2, kHighSadLowSumdiff = 3, kHighSadHighSumdiff = 4, kLowVarHighSumdiff = 5, kVeryHighSad = 6, } CONTENT_STATE_SB; typedef struct VP9EncoderConfig { BITSTREAM_PROFILE profile; vpx_bit_depth_t bit_depth; // Codec bit-depth. int width; // width of data passed to the compressor int height; // height of data passed to the compressor unsigned int input_bit_depth; // Input bit depth. double init_framerate; // set to passed in framerate vpx_rational_t g_timebase; // equivalent to g_timebase in vpx_codec_enc_cfg_t vpx_rational64_t g_timebase_in_ts; // g_timebase * TICKS_PER_SEC int64_t target_bandwidth; // bandwidth to be used in bits per second int noise_sensitivity; // pre processing blur: recommendation 0 int sharpness; // sharpening output: recommendation 0: int speed; // maximum allowed bitrate for any intra frame in % of bitrate target. unsigned int rc_max_intra_bitrate_pct; // maximum allowed bitrate for any inter frame in % of bitrate target. unsigned int rc_max_inter_bitrate_pct; // percent of rate boost for golden frame in CBR mode. unsigned int gf_cbr_boost_pct; MODE mode; int pass; // Key Framing Operations int auto_key; // autodetect cut scenes and set the keyframes int key_freq; // maximum distance to key frame. int lag_in_frames; // how many frames lag before we start encoding // ---------------------------------------------------------------- // DATARATE CONTROL OPTIONS // vbr, cbr, constrained quality or constant quality enum vpx_rc_mode rc_mode; // buffer targeting aggressiveness int under_shoot_pct; int over_shoot_pct; // buffering parameters int64_t starting_buffer_level_ms; int64_t optimal_buffer_level_ms; int64_t maximum_buffer_size_ms; // Frame drop threshold. int drop_frames_water_mark; // controlling quality int fixed_q; int worst_allowed_q; int best_allowed_q; int cq_level; AQ_MODE aq_mode; // Adaptive Quantization mode // Special handling of Adaptive Quantization for AltRef frames int alt_ref_aq; // Internal frame size scaling. RESIZE_TYPE resize_mode; int scaled_frame_width; int scaled_frame_height; // Enable feature to reduce the frame quantization every x frames. int frame_periodic_boost; // two pass datarate control int two_pass_vbrbias; // two pass datarate control tweaks int two_pass_vbrmin_section; int two_pass_vbrmax_section; int vbr_corpus_complexity; // 0 indicates corpus vbr disabled // END DATARATE CONTROL OPTIONS // ---------------------------------------------------------------- // Spatial and temporal scalability. int ss_number_layers; // Number of spatial layers. int ts_number_layers; // Number of temporal layers. // Bitrate allocation for spatial layers. int layer_target_bitrate[VPX_MAX_LAYERS]; int ss_target_bitrate[VPX_SS_MAX_LAYERS]; int ss_enable_auto_arf[VPX_SS_MAX_LAYERS]; // Bitrate allocation (CBR mode) and framerate factor, for temporal layers. int ts_rate_decimator[VPX_TS_MAX_LAYERS]; int enable_auto_arf; int encode_breakout; // early breakout : for video conf recommend 800 /* Bitfield defining the error resiliency features to enable. * Can provide decodable frames after losses in previous * frames and decodable partitions after losses in the same frame. */ unsigned int error_resilient_mode; /* Bitfield defining the parallel decoding mode where the * decoding in successive frames may be conducted in parallel * just by decoding the frame headers. */ unsigned int frame_parallel_decoding_mode; int arnr_max_frames; int arnr_strength; int min_gf_interval; int max_gf_interval; int tile_columns; int tile_rows; int enable_tpl_model; int max_threads; unsigned int target_level; vpx_fixed_buf_t two_pass_stats_in; #if CONFIG_FP_MB_STATS vpx_fixed_buf_t firstpass_mb_stats_in; #endif vp8e_tuning tuning; vp9e_tune_content content; #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth; #endif vpx_color_space_t color_space; vpx_color_range_t color_range; int render_width; int render_height; VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; int row_mt; unsigned int motion_vector_unit_test; } VP9EncoderConfig; static INLINE int is_lossless_requested(const VP9EncoderConfig *cfg) { return cfg->best_allowed_q == 0 && cfg->worst_allowed_q == 0; } typedef struct TplDepStats { int64_t intra_cost; int64_t inter_cost; int64_t mc_flow; int64_t mc_dep_cost; int64_t mc_ref_cost; int ref_frame_index; int_mv mv; } TplDepStats; #if CONFIG_NON_GREEDY_MV #define ZERO_MV_MODE 0 #define NEW_MV_MODE 1 #define NEAREST_MV_MODE 2 #define NEAR_MV_MODE 3 #define MAX_MV_MODE 4 #endif typedef struct TplDepFrame { uint8_t is_valid; TplDepStats *tpl_stats_ptr; int stride; int width; int height; int mi_rows; int mi_cols; int base_qindex; #if CONFIG_NON_GREEDY_MV int lambda; int *mv_mode_arr[3]; double *rd_diff_arr[3]; #endif } TplDepFrame; #define TPL_DEP_COST_SCALE_LOG2 4 // TODO(jingning) All spatially adaptive variables should go to TileDataEnc. typedef struct TileDataEnc { TileInfo tile_info; int thresh_freq_fact[BLOCK_SIZES][MAX_MODES]; #if CONFIG_CONSISTENT_RECODE int thresh_freq_fact_prev[BLOCK_SIZES][MAX_MODES]; #endif int8_t mode_map[BLOCK_SIZES][MAX_MODES]; FIRSTPASS_DATA fp_data; VP9RowMTSync row_mt_sync; // Used for adaptive_rd_thresh with row multithreading int *row_base_thresh_freq_fact; } TileDataEnc; typedef struct RowMTInfo { JobQueueHandle job_queue_hdl; #if CONFIG_MULTITHREAD pthread_mutex_t job_mutex; #endif } RowMTInfo; typedef struct { TOKENEXTRA *start; TOKENEXTRA *stop; unsigned int count; } TOKENLIST; typedef struct MultiThreadHandle { int allocated_tile_rows; int allocated_tile_cols; int allocated_vert_unit_rows; // Frame level params int num_tile_vert_sbs[MAX_NUM_TILE_ROWS]; // Job Queue structure and handles JobQueue *job_queue; int jobs_per_tile_col; RowMTInfo row_mt_info[MAX_NUM_TILE_COLS]; int thread_id_to_tile_id[MAX_NUM_THREADS]; // Mapping of threads to tiles } MultiThreadHandle; typedef struct RD_COUNTS { vp9_coeff_count coef_counts[TX_SIZES][PLANE_TYPES]; int64_t comp_pred_diff[REFERENCE_MODES]; int64_t filter_diff[SWITCHABLE_FILTER_CONTEXTS]; } RD_COUNTS; typedef struct ThreadData { MACROBLOCK mb; RD_COUNTS rd_counts; FRAME_COUNTS *counts; PICK_MODE_CONTEXT *leaf_tree; PC_TREE *pc_tree; PC_TREE *pc_root; } ThreadData; struct EncWorkerData; typedef struct ActiveMap { int enabled; int update; unsigned char *map; } ActiveMap; typedef enum { Y, U, V, ALL } STAT_TYPE; typedef struct IMAGE_STAT { double stat[ALL + 1]; double worst; } ImageStat; // Kf noise filtering currently disabled by default in build. // #define ENABLE_KF_DENOISE 1 #define CPB_WINDOW_SIZE 4 #define FRAME_WINDOW_SIZE 128 #define SAMPLE_RATE_GRACE_P 0.015 #define VP9_LEVELS 14 typedef enum { LEVEL_UNKNOWN = 0, LEVEL_AUTO = 1, LEVEL_1 = 10, LEVEL_1_1 = 11, LEVEL_2 = 20, LEVEL_2_1 = 21, LEVEL_3 = 30, LEVEL_3_1 = 31, LEVEL_4 = 40, LEVEL_4_1 = 41, LEVEL_5 = 50, LEVEL_5_1 = 51, LEVEL_5_2 = 52, LEVEL_6 = 60, LEVEL_6_1 = 61, LEVEL_6_2 = 62, LEVEL_MAX = 255 } VP9_LEVEL; typedef struct { VP9_LEVEL level; uint64_t max_luma_sample_rate; uint32_t max_luma_picture_size; uint32_t max_luma_picture_breadth; double average_bitrate; // in kilobits per second double max_cpb_size; // in kilobits double compression_ratio; uint8_t max_col_tiles; uint32_t min_altref_distance; uint8_t max_ref_frame_buffers; } Vp9LevelSpec; extern const Vp9LevelSpec vp9_level_defs[VP9_LEVELS]; typedef struct { int64_t ts; // timestamp uint32_t luma_samples; uint32_t size; // in bytes } FrameRecord; typedef struct { FrameRecord buf[FRAME_WINDOW_SIZE]; uint8_t start; uint8_t len; } FrameWindowBuffer; typedef struct { uint8_t seen_first_altref; uint32_t frames_since_last_altref; uint64_t total_compressed_size; uint64_t total_uncompressed_size; double time_encoded; // in seconds FrameWindowBuffer frame_window_buffer; int ref_refresh_map; } Vp9LevelStats; typedef struct { Vp9LevelStats level_stats; Vp9LevelSpec level_spec; } Vp9LevelInfo; typedef enum { BITRATE_TOO_LARGE = 0, LUMA_PIC_SIZE_TOO_LARGE, LUMA_PIC_BREADTH_TOO_LARGE, LUMA_SAMPLE_RATE_TOO_LARGE, CPB_TOO_LARGE, COMPRESSION_RATIO_TOO_SMALL, TOO_MANY_COLUMN_TILE, ALTREF_DIST_TOO_SMALL, TOO_MANY_REF_BUFFER, TARGET_LEVEL_FAIL_IDS } TARGET_LEVEL_FAIL_ID; typedef struct { int8_t level_index; uint8_t fail_flag; int max_frame_size; // in bits double max_cpb_size; // in bits } LevelConstraint; typedef struct ARNRFilterData { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS]; int strength; int frame_count; int alt_ref_index; struct scale_factors sf; } ARNRFilterData; typedef struct EncFrameBuf { int mem_valid; int released; YV12_BUFFER_CONFIG frame; } EncFrameBuf; // Maximum operating frame buffer size needed for a GOP using ARF reference. #define MAX_ARF_GOP_SIZE (2 * MAX_LAG_BUFFERS) #define MAX_KMEANS_GROUPS 8 typedef struct KMEANS_DATA { double value; int pos; int group_idx; } KMEANS_DATA; #if CONFIG_RATE_CTRL typedef struct ENCODE_COMMAND { int use_external_quantize_index; int external_quantize_index; } ENCODE_COMMAND; static INLINE void encode_command_init(ENCODE_COMMAND *encode_command) { vp9_zero(*encode_command); encode_command->use_external_quantize_index = 0; encode_command->external_quantize_index = -1; } static INLINE void encode_command_set_external_quantize_index( ENCODE_COMMAND *encode_command, int quantize_index) { encode_command->use_external_quantize_index = 1; encode_command->external_quantize_index = quantize_index; } static INLINE void encode_command_reset_external_quantize_index( ENCODE_COMMAND *encode_command) { encode_command->use_external_quantize_index = 0; encode_command->external_quantize_index = -1; } #endif // CONFIG_RATE_CTRL typedef struct VP9_COMP { FRAME_INFO frame_info; QUANTS quants; ThreadData td; MB_MODE_INFO_EXT *mbmi_ext_base; DECLARE_ALIGNED(16, int16_t, y_dequant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_dequant[QINDEX_RANGE][8]); VP9_COMMON common; VP9EncoderConfig oxcf; struct lookahead_ctx *lookahead; struct lookahead_entry *alt_ref_source; YV12_BUFFER_CONFIG *Source; YV12_BUFFER_CONFIG *Last_Source; // NULL for first frame and alt_ref frames YV12_BUFFER_CONFIG *un_scaled_source; YV12_BUFFER_CONFIG scaled_source; YV12_BUFFER_CONFIG *unscaled_last_source; YV12_BUFFER_CONFIG scaled_last_source; #ifdef ENABLE_KF_DENOISE YV12_BUFFER_CONFIG raw_unscaled_source; YV12_BUFFER_CONFIG raw_scaled_source; #endif YV12_BUFFER_CONFIG *raw_source_frame; BLOCK_SIZE tpl_bsize; TplDepFrame tpl_stats[MAX_ARF_GOP_SIZE]; YV12_BUFFER_CONFIG *tpl_recon_frames[REF_FRAMES]; EncFrameBuf enc_frame_buf[REF_FRAMES]; #if CONFIG_MULTITHREAD pthread_mutex_t kmeans_mutex; #endif int kmeans_data_arr_alloc; KMEANS_DATA *kmeans_data_arr; int kmeans_data_size; int kmeans_data_stride; double kmeans_ctr_ls[MAX_KMEANS_GROUPS]; double kmeans_boundary_ls[MAX_KMEANS_GROUPS]; int kmeans_count_ls[MAX_KMEANS_GROUPS]; int kmeans_ctr_num; #if CONFIG_NON_GREEDY_MV MotionFieldInfo motion_field_info; int tpl_ready; int_mv *select_mv_arr; #endif TileDataEnc *tile_data; int allocated_tiles; // Keep track of memory allocated for tiles. // For a still frame, this flag is set to 1 to skip partition search. int partition_search_skippable_frame; int scaled_ref_idx[REFS_PER_FRAME]; int lst_fb_idx; int gld_fb_idx; int alt_fb_idx; int ref_fb_idx[REF_FRAMES]; int refresh_last_frame; int refresh_golden_frame; int refresh_alt_ref_frame; int ext_refresh_frame_flags_pending; int ext_refresh_last_frame; int ext_refresh_golden_frame; int ext_refresh_alt_ref_frame; int ext_refresh_frame_context_pending; int ext_refresh_frame_context; int64_t norm_wiener_variance; int64_t *mb_wiener_variance; int mb_wiener_var_rows; int mb_wiener_var_cols; double *mi_ssim_rdmult_scaling_factors; YV12_BUFFER_CONFIG last_frame_uf; TOKENEXTRA *tile_tok[4][1 << 6]; TOKENLIST *tplist[4][1 << 6]; // Ambient reconstruction err target for force key frames int64_t ambient_err; RD_OPT rd; CODING_CONTEXT coding_context; int *nmvcosts[2]; int *nmvcosts_hp[2]; int *nmvsadcosts[2]; int *nmvsadcosts_hp[2]; int64_t last_time_stamp_seen; int64_t last_end_time_stamp_seen; int64_t first_time_stamp_ever; RATE_CONTROL rc; double framerate; int interp_filter_selected[REF_FRAMES][SWITCHABLE]; struct vpx_codec_pkt_list *output_pkt_list; MBGRAPH_FRAME_STATS mbgraph_stats[MAX_LAG_BUFFERS]; int mbgraph_n_frames; // number of frames filled in the above int static_mb_pct; // % forced skip mbs by segmentation int ref_frame_flags; SPEED_FEATURES sf; uint32_t max_mv_magnitude; int mv_step_param; int allow_comp_inter_inter; // Default value is 1. From first pass stats, encode_breakout may be disabled. ENCODE_BREAKOUT_TYPE allow_encode_breakout; // Get threshold from external input. A suggested threshold is 800 for HD // clips, and 300 for < HD clips. int encode_breakout; uint8_t *segmentation_map; uint8_t *skin_map; // segment threashold for encode breakout int segment_encode_breakout[MAX_SEGMENTS]; CYCLIC_REFRESH *cyclic_refresh; ActiveMap active_map; fractional_mv_step_fp *find_fractional_mv_step; struct scale_factors me_sf; vp9_diamond_search_fn_t diamond_search_sad; vp9_variance_fn_ptr_t fn_ptr[BLOCK_SIZES]; uint64_t time_receive_data; uint64_t time_compress_data; uint64_t time_pick_lpf; uint64_t time_encode_sb_row; #if CONFIG_FP_MB_STATS int use_fp_mb_stats; #endif TWO_PASS twopass; // Force recalculation of segment_ids for each mode info uint8_t force_update_segmentation; YV12_BUFFER_CONFIG alt_ref_buffer; // class responsible for adaptive // quantization of altref frames struct ALT_REF_AQ *alt_ref_aq; #if CONFIG_INTERNAL_STATS unsigned int mode_chosen_counts[MAX_MODES]; int count; uint64_t total_sq_error; uint64_t total_samples; ImageStat psnr; uint64_t totalp_sq_error; uint64_t totalp_samples; ImageStat psnrp; double total_blockiness; double worst_blockiness; int bytes; double summed_quality; double summed_weights; double summedp_quality; double summedp_weights; unsigned int tot_recode_hits; double worst_ssim; ImageStat ssimg; ImageStat fastssim; ImageStat psnrhvs; int b_calculate_ssimg; int b_calculate_blockiness; int b_calculate_consistency; double total_inconsistency; double worst_consistency; Ssimv *ssim_vars; Metrics metrics; #endif int b_calculate_psnr; int droppable; int initial_width; int initial_height; int initial_mbs; // Number of MBs in the full-size frame; to be used to // normalize the firstpass stats. This will differ from the // number of MBs in the current frame when the frame is // scaled. int use_svc; SVC svc; // Store frame variance info in SOURCE_VAR_BASED_PARTITION search type. diff *source_diff_var; // The threshold used in SOURCE_VAR_BASED_PARTITION search type. unsigned int source_var_thresh; int frames_till_next_var_check; int frame_flags; search_site_config ss_cfg; int mbmode_cost[INTRA_MODES]; unsigned int inter_mode_cost[INTER_MODE_CONTEXTS][INTER_MODES]; int intra_uv_mode_cost[FRAME_TYPES][INTRA_MODES][INTRA_MODES]; int y_mode_costs[INTRA_MODES][INTRA_MODES][INTRA_MODES]; int switchable_interp_costs[SWITCHABLE_FILTER_CONTEXTS][SWITCHABLE_FILTERS]; int partition_cost[PARTITION_CONTEXTS][PARTITION_TYPES]; // Indices are: max_tx_size-1, tx_size_ctx, tx_size int tx_size_cost[TX_SIZES - 1][TX_SIZE_CONTEXTS][TX_SIZES]; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_DENOISER denoiser; #endif int resize_pending; RESIZE_STATE resize_state; int external_resize; int resize_scale_num; int resize_scale_den; int resize_avg_qp; int resize_buffer_underflow; int resize_count; int use_skin_detection; int target_level; NOISE_ESTIMATE noise_estimate; // Count on how many consecutive times a block uses small/zeromv for encoding. uint8_t *consec_zero_mv; // VAR_BASED_PARTITION thresholds // 0 - threshold_64x64; 1 - threshold_32x32; // 2 - threshold_16x16; 3 - vbp_threshold_8x8; int64_t vbp_thresholds[4]; int64_t vbp_threshold_minmax; int64_t vbp_threshold_sad; // Threshold used for partition copy int64_t vbp_threshold_copy; BLOCK_SIZE vbp_bsize_min; // Multi-threading int num_workers; VPxWorker *workers; struct EncWorkerData *tile_thr_data; VP9LfSync lf_row_sync; struct VP9BitstreamWorkerData *vp9_bitstream_worker_data; int keep_level_stats; Vp9LevelInfo level_info; MultiThreadHandle multi_thread_ctxt; void (*row_mt_sync_read_ptr)(VP9RowMTSync *const, int, int); void (*row_mt_sync_write_ptr)(VP9RowMTSync *const, int, int, const int); ARNRFilterData arnr_filter_data; int row_mt; unsigned int row_mt_bit_exact; // Previous Partition Info BLOCK_SIZE *prev_partition; int8_t *prev_segment_id; // Used to save the status of whether a block has a low variance in // choose_partitioning. 0 for 64x64, 1~2 for 64x32, 3~4 for 32x64, 5~8 for // 32x32, 9~24 for 16x16. // This is for the last frame and is copied to the current frame // when partition copy happens. uint8_t *prev_variance_low; uint8_t *copied_frame_cnt; uint8_t max_copied_frame; // If the last frame is dropped, we don't copy partition. uint8_t last_frame_dropped; // For each superblock: keeps track of the last time (in frame distance) the // the superblock did not have low source sad. uint8_t *content_state_sb_fd; int compute_source_sad_onepass; LevelConstraint level_constraint; uint8_t *count_arf_frame_usage; uint8_t *count_lastgolden_frame_usage; int multi_layer_arf; vpx_roi_map_t roi; #if CONFIG_RATE_CTRL ENCODE_COMMAND encode_command; #endif } VP9_COMP; typedef struct ENCODE_FRAME_RESULT { int show_idx; FRAME_UPDATE_TYPE update_type; double psnr; uint64_t sse; int quantize_index; } ENCODE_FRAME_RESULT; void vp9_initialize_enc(void); void vp9_update_compressor_with_img_fmt(VP9_COMP *cpi, vpx_img_fmt_t img_fmt); struct VP9_COMP *vp9_create_compressor(const VP9EncoderConfig *oxcf, BufferPool *const pool); void vp9_remove_compressor(VP9_COMP *cpi); void vp9_change_config(VP9_COMP *cpi, const VP9EncoderConfig *oxcf); // receive a frames worth of data. caller can assume that a copy of this // frame is made and not just a copy of the pointer.. int vp9_receive_raw_frame(VP9_COMP *cpi, vpx_enc_frame_flags_t frame_flags, YV12_BUFFER_CONFIG *sd, int64_t time_stamp, int64_t end_time); int vp9_get_compressed_data(VP9_COMP *cpi, unsigned int *frame_flags, size_t *size, uint8_t *dest, int64_t *time_stamp, int64_t *time_end, int flush, ENCODE_FRAME_RESULT *encode_frame_result); int vp9_get_preview_raw_frame(VP9_COMP *cpi, YV12_BUFFER_CONFIG *dest, vp9_ppflags_t *flags); int vp9_use_as_reference(VP9_COMP *cpi, int ref_frame_flags); void vp9_update_reference(VP9_COMP *cpi, int ref_frame_flags); int vp9_copy_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp9_set_reference_enc(VP9_COMP *cpi, VP9_REFFRAME ref_frame_flag, YV12_BUFFER_CONFIG *sd); int vp9_update_entropy(VP9_COMP *cpi, int update); int vp9_set_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols); int vp9_get_active_map(VP9_COMP *cpi, unsigned char *new_map_16x16, int rows, int cols); int vp9_set_internal_size(VP9_COMP *cpi, VPX_SCALING horiz_mode, VPX_SCALING vert_mode); int vp9_set_size_literal(VP9_COMP *cpi, unsigned int width, unsigned int height); void vp9_set_svc(VP9_COMP *cpi, int use_svc); static INLINE int stack_pop(int *stack, int stack_size) { int idx; const int r = stack[0]; for (idx = 1; idx < stack_size; ++idx) stack[idx - 1] = stack[idx]; return r; } static INLINE int stack_top(const int *stack) { return stack[0]; } static INLINE void stack_push(int *stack, int new_item, int stack_size) { int idx; for (idx = stack_size; idx > 0; --idx) stack[idx] = stack[idx - 1]; stack[0] = new_item; } static INLINE void stack_init(int *stack, int length) { int idx; for (idx = 0; idx < length; ++idx) stack[idx] = -1; } int vp9_get_quantizer(const VP9_COMP *cpi); static INLINE int frame_is_kf_gf_arf(const VP9_COMP *cpi) { return frame_is_intra_only(&cpi->common) || cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !cpi->rc.is_src_frame_alt_ref); } static INLINE int get_ref_frame_map_idx(const VP9_COMP *cpi, MV_REFERENCE_FRAME ref_frame) { if (ref_frame == LAST_FRAME) { return cpi->lst_fb_idx; } else if (ref_frame == GOLDEN_FRAME) { return cpi->gld_fb_idx; } else { return cpi->alt_fb_idx; } } static INLINE int get_ref_frame_buf_idx(const VP9_COMP *const cpi, int ref_frame) { const VP9_COMMON *const cm = &cpi->common; const int map_idx = get_ref_frame_map_idx(cpi, ref_frame); return (map_idx != INVALID_IDX) ? cm->ref_frame_map[map_idx] : INVALID_IDX; } static INLINE RefCntBuffer *get_ref_cnt_buffer(VP9_COMMON *cm, int fb_idx) { return fb_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[fb_idx] : NULL; } static INLINE YV12_BUFFER_CONFIG *get_ref_frame_buffer( const VP9_COMP *const cpi, MV_REFERENCE_FRAME ref_frame) { const VP9_COMMON *const cm = &cpi->common; const int buf_idx = get_ref_frame_buf_idx(cpi, ref_frame); return buf_idx != INVALID_IDX ? &cm->buffer_pool->frame_bufs[buf_idx].buf : NULL; } static INLINE int get_token_alloc(int mb_rows, int mb_cols) { // TODO(JBB): double check we can't exceed this token count if we have a // 32x32 transform crossing a boundary at a multiple of 16. // mb_rows, cols are in units of 16 pixels. We assume 3 planes all at full // resolution. We assume up to 1 token per pixel, and then allow // a head room of 4. return mb_rows * mb_cols * (16 * 16 * 3 + 4); } // Get the allocated token size for a tile. It does the same calculation as in // the frame token allocation. static INLINE int allocated_tokens(TileInfo tile) { int tile_mb_rows = (tile.mi_row_end - tile.mi_row_start + 1) >> 1; int tile_mb_cols = (tile.mi_col_end - tile.mi_col_start + 1) >> 1; return get_token_alloc(tile_mb_rows, tile_mb_cols); } static INLINE void get_start_tok(VP9_COMP *cpi, int tile_row, int tile_col, int mi_row, TOKENEXTRA **tok) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; const TileInfo *const tile_info = &this_tile->tile_info; int tile_mb_cols = (tile_info->mi_col_end - tile_info->mi_col_start + 1) >> 1; const int mb_row = (mi_row - tile_info->mi_row_start) >> 1; *tok = cpi->tile_tok[tile_row][tile_col] + get_token_alloc(mb_row, tile_mb_cols); } int64_t vp9_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #if CONFIG_VP9_HIGHBITDEPTH int64_t vp9_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_scale_references(VP9_COMP *cpi); void vp9_update_reference_frames(VP9_COMP *cpi); void vp9_set_high_precision_mv(VP9_COMP *cpi, int allow_high_precision_mv); YV12_BUFFER_CONFIG *vp9_svc_twostage_scale( VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, YV12_BUFFER_CONFIG *scaled_temp, INTERP_FILTER filter_type, int phase_scaler, INTERP_FILTER filter_type2, int phase_scaler2); YV12_BUFFER_CONFIG *vp9_scale_if_required( VP9_COMMON *cm, YV12_BUFFER_CONFIG *unscaled, YV12_BUFFER_CONFIG *scaled, int use_normative_scaler, INTERP_FILTER filter_type, int phase_scaler); void vp9_apply_encoding_flags(VP9_COMP *cpi, vpx_enc_frame_flags_t flags); static INLINE int is_one_pass_cbr_svc(const struct VP9_COMP *const cpi) { return (cpi->use_svc && cpi->oxcf.pass == 0); } #if CONFIG_VP9_TEMPORAL_DENOISING static INLINE int denoise_svc(const struct VP9_COMP *const cpi) { return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id >= cpi->svc.first_layer_denoise)); } #endif #define MIN_LOOKAHEAD_FOR_ARFS 4 static INLINE int is_altref_enabled(const VP9_COMP *const cpi) { return !(cpi->oxcf.mode == REALTIME && cpi->oxcf.rc_mode == VPX_CBR) && cpi->oxcf.lag_in_frames >= MIN_LOOKAHEAD_FOR_ARFS && cpi->oxcf.enable_auto_arf; } static INLINE void set_ref_ptrs(const VP9_COMMON *const cm, MACROBLOCKD *xd, MV_REFERENCE_FRAME ref0, MV_REFERENCE_FRAME ref1) { xd->block_refs[0] = &cm->frame_refs[ref0 >= LAST_FRAME ? ref0 - LAST_FRAME : 0]; xd->block_refs[1] = &cm->frame_refs[ref1 >= LAST_FRAME ? ref1 - LAST_FRAME : 0]; } static INLINE int get_chessboard_index(const int frame_index) { return frame_index & 0x1; } static INLINE int *cond_cost_list(const struct VP9_COMP *cpi, int *cost_list) { return cpi->sf.mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL; } static INLINE int get_num_vert_units(TileInfo tile, int shift) { int num_vert_units = (tile.mi_row_end - tile.mi_row_start + (1 << shift) - 1) >> shift; return num_vert_units; } static INLINE int get_num_cols(TileInfo tile, int shift) { int num_cols = (tile.mi_col_end - tile.mi_col_start + (1 << shift) - 1) >> shift; return num_cols; } static INLINE int get_level_index(VP9_LEVEL level) { int i; for (i = 0; i < VP9_LEVELS; ++i) { if (level == vp9_level_defs[i].level) return i; } return -1; } // Return the log2 value of max column tiles corresponding to the level that // the picture size fits into. static INLINE int log_tile_cols_from_picsize_level(uint32_t width, uint32_t height) { int i; const uint32_t pic_size = width * height; const uint32_t pic_breadth = VPXMAX(width, height); for (i = LEVEL_1; i < LEVEL_MAX; ++i) { if (vp9_level_defs[i].max_luma_picture_size >= pic_size && vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) { return get_msb(vp9_level_defs[i].max_col_tiles); } } return INT_MAX; } VP9_LEVEL vp9_get_level(const Vp9LevelSpec *const level_spec); int vp9_set_roi_map(VP9_COMP *cpi, unsigned char *map, unsigned int rows, unsigned int cols, int delta_q[8], int delta_lf[8], int skip[8], int ref_frame[8]); void vp9_new_framerate(VP9_COMP *cpi, double framerate); void vp9_set_row_mt(VP9_COMP *cpi); int vp9_get_psnr(const VP9_COMP *cpi, PSNR_STATS *psnr); #define LAYER_IDS_TO_IDX(sl, tl, num_tl) ((sl) * (num_tl) + (tl)) #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ENCODER_H_ libvpx-1.8.2/vp9/encoder/vp9_ethread.c000066400000000000000000000534171357355204000175570ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vpx_dsp/vpx_dsp_common.h" static void accumulate_rd_opt(ThreadData *td, ThreadData *td_t) { int i, j, k, l, m, n; for (i = 0; i < REFERENCE_MODES; i++) td->rd_counts.comp_pred_diff[i] += td_t->rd_counts.comp_pred_diff[i]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) td->rd_counts.filter_diff[i] += td_t->rd_counts.filter_diff[i]; for (i = 0; i < TX_SIZES; i++) for (j = 0; j < PLANE_TYPES; j++) for (k = 0; k < REF_TYPES; k++) for (l = 0; l < COEF_BANDS; l++) for (m = 0; m < COEFF_CONTEXTS; m++) for (n = 0; n < ENTROPY_TOKENS; n++) td->rd_counts.coef_counts[i][j][k][l][m][n] += td_t->rd_counts.coef_counts[i][j][k][l][m][n]; } static int enc_worker_hook(void *arg1, void *unused) { EncWorkerData *const thread_data = (EncWorkerData *)arg1; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int t; (void)unused; for (t = thread_data->start; t < tile_rows * tile_cols; t += cpi->num_workers) { int tile_row = t / tile_cols; int tile_col = t % tile_cols; vp9_encode_tile(cpi, thread_data->td, tile_row, tile_col); } return 0; } static int get_max_tile_cols(VP9_COMP *cpi) { const int aligned_width = ALIGN_POWER_OF_TWO(cpi->oxcf.width, MI_SIZE_LOG2); int mi_cols = aligned_width >> MI_SIZE_LOG2; int min_log2_tile_cols, max_log2_tile_cols; int log2_tile_cols; vp9_get_tile_n_bits(mi_cols, &min_log2_tile_cols, &max_log2_tile_cols); log2_tile_cols = clamp(cpi->oxcf.tile_columns, min_log2_tile_cols, max_log2_tile_cols); if (cpi->oxcf.target_level == LEVEL_AUTO) { const int level_tile_cols = log_tile_cols_from_picsize_level(cpi->common.width, cpi->common.height); if (log2_tile_cols > level_tile_cols) { log2_tile_cols = VPXMAX(level_tile_cols, min_log2_tile_cols); } } return (1 << log2_tile_cols); } static void create_enc_workers(VP9_COMP *cpi, int num_workers) { VP9_COMMON *const cm = &cpi->common; const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; // Only run once to create threads and allocate thread data. if (cpi->num_workers == 0) { int allocated_workers = num_workers; // While using SVC, we need to allocate threads according to the highest // resolution. When row based multithreading is enabled, it is OK to // allocate more threads than the number of max tile columns. if (cpi->use_svc && !cpi->row_mt) { int max_tile_cols = get_max_tile_cols(cpi); allocated_workers = VPXMIN(cpi->oxcf.max_threads, max_tile_cols); } CHECK_MEM_ERROR(cm, cpi->workers, vpx_malloc(allocated_workers * sizeof(*cpi->workers))); CHECK_MEM_ERROR(cm, cpi->tile_thr_data, vpx_calloc(allocated_workers, sizeof(*cpi->tile_thr_data))); for (i = 0; i < allocated_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *thread_data = &cpi->tile_thr_data[i]; ++cpi->num_workers; winterface->init(worker); if (i < allocated_workers - 1) { thread_data->cpi = cpi; // Allocate thread data. CHECK_MEM_ERROR(cm, thread_data->td, vpx_memalign(32, sizeof(*thread_data->td))); vp9_zero(*thread_data->td); // Set up pc_tree. thread_data->td->leaf_tree = NULL; thread_data->td->pc_tree = NULL; vp9_setup_pc_tree(cm, thread_data->td); // Allocate frame counters in thread data. CHECK_MEM_ERROR(cm, thread_data->td->counts, vpx_calloc(1, sizeof(*thread_data->td->counts))); // Create threads if (!winterface->reset(worker)) vpx_internal_error(&cm->error, VPX_CODEC_ERROR, "Tile encoder thread creation failed"); } else { // Main thread acts as a worker and uses the thread data in cpi. thread_data->cpi = cpi; thread_data->td = &cpi->td; } winterface->sync(worker); } } } static void launch_enc_workers(VP9_COMP *cpi, VPxWorkerHook hook, void *data2, int num_workers) { const VPxWorkerInterface *const winterface = vpx_get_worker_interface(); int i; for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; worker->hook = hook; worker->data1 = &cpi->tile_thr_data[i]; worker->data2 = data2; } // Encode a frame for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Set the starting tile for each thread. thread_data->start = i; if (i == cpi->num_workers - 1) winterface->execute(worker); else winterface->launch(worker); } // Encoding ends. for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; winterface->sync(worker); } } void vp9_encode_tiles_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int num_workers = VPXMIN(cpi->oxcf.max_threads, tile_cols); int i; vp9_init_tile_data(cpi); create_enc_workers(cpi, num_workers); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_worker_hook, NULL, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } } #if !CONFIG_REALTIME_ONLY static void accumulate_fp_tile_stat(TileDataEnc *tile_data, TileDataEnc *tile_data_t) { tile_data->fp_data.intra_factor += tile_data_t->fp_data.intra_factor; tile_data->fp_data.brightness_factor += tile_data_t->fp_data.brightness_factor; tile_data->fp_data.coded_error += tile_data_t->fp_data.coded_error; tile_data->fp_data.sr_coded_error += tile_data_t->fp_data.sr_coded_error; tile_data->fp_data.frame_noise_energy += tile_data_t->fp_data.frame_noise_energy; tile_data->fp_data.intra_error += tile_data_t->fp_data.intra_error; tile_data->fp_data.intercount += tile_data_t->fp_data.intercount; tile_data->fp_data.second_ref_count += tile_data_t->fp_data.second_ref_count; tile_data->fp_data.neutral_count += tile_data_t->fp_data.neutral_count; tile_data->fp_data.intra_count_low += tile_data_t->fp_data.intra_count_low; tile_data->fp_data.intra_count_high += tile_data_t->fp_data.intra_count_high; tile_data->fp_data.intra_skip_count += tile_data_t->fp_data.intra_skip_count; tile_data->fp_data.mvcount += tile_data_t->fp_data.mvcount; tile_data->fp_data.sum_mvr += tile_data_t->fp_data.sum_mvr; tile_data->fp_data.sum_mvr_abs += tile_data_t->fp_data.sum_mvr_abs; tile_data->fp_data.sum_mvc += tile_data_t->fp_data.sum_mvc; tile_data->fp_data.sum_mvc_abs += tile_data_t->fp_data.sum_mvc_abs; tile_data->fp_data.sum_mvrs += tile_data_t->fp_data.sum_mvrs; tile_data->fp_data.sum_mvcs += tile_data_t->fp_data.sum_mvcs; tile_data->fp_data.sum_in_vectors += tile_data_t->fp_data.sum_in_vectors; tile_data->fp_data.intra_smooth_count += tile_data_t->fp_data.intra_smooth_count; tile_data->fp_data.image_data_start_row = VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) == INVALID_ROW ? VPXMAX(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row) : VPXMIN(tile_data->fp_data.image_data_start_row, tile_data_t->fp_data.image_data_start_row); } #endif // !CONFIG_REALTIME_ONLY // Allocate memory for row synchronization void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, VP9_COMMON *cm, int rows) { row_mt_sync->rows = rows; #if CONFIG_MULTITHREAD { int i; CHECK_MEM_ERROR(cm, row_mt_sync->mutex, vpx_malloc(sizeof(*row_mt_sync->mutex) * rows)); if (row_mt_sync->mutex) { for (i = 0; i < rows; ++i) { pthread_mutex_init(&row_mt_sync->mutex[i], NULL); } } CHECK_MEM_ERROR(cm, row_mt_sync->cond, vpx_malloc(sizeof(*row_mt_sync->cond) * rows)); if (row_mt_sync->cond) { for (i = 0; i < rows; ++i) { pthread_cond_init(&row_mt_sync->cond[i], NULL); } } } #endif // CONFIG_MULTITHREAD CHECK_MEM_ERROR(cm, row_mt_sync->cur_col, vpx_malloc(sizeof(*row_mt_sync->cur_col) * rows)); // Set up nsync. row_mt_sync->sync_range = 1; } // Deallocate row based multi-threading synchronization related mutex and data void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync) { if (row_mt_sync != NULL) { #if CONFIG_MULTITHREAD int i; if (row_mt_sync->mutex != NULL) { for (i = 0; i < row_mt_sync->rows; ++i) { pthread_mutex_destroy(&row_mt_sync->mutex[i]); } vpx_free(row_mt_sync->mutex); } if (row_mt_sync->cond != NULL) { for (i = 0; i < row_mt_sync->rows; ++i) { pthread_cond_destroy(&row_mt_sync->cond[i]); } vpx_free(row_mt_sync->cond); } #endif // CONFIG_MULTITHREAD vpx_free(row_mt_sync->cur_col); // clear the structure as the source of this call may be dynamic change // in tiles in which case this call will be followed by an _alloc() // which may fail. vp9_zero(*row_mt_sync); } } void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c) { #if CONFIG_MULTITHREAD const int nsync = row_mt_sync->sync_range; if (r && !(c & (nsync - 1))) { pthread_mutex_t *const mutex = &row_mt_sync->mutex[r - 1]; pthread_mutex_lock(mutex); while (c > row_mt_sync->cur_col[r - 1] - nsync + 1) { pthread_cond_wait(&row_mt_sync->cond[r - 1], mutex); } pthread_mutex_unlock(mutex); } #else (void)row_mt_sync; (void)r; (void)c; #endif // CONFIG_MULTITHREAD } void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c) { (void)row_mt_sync; (void)r; (void)c; return; } void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c, const int cols) { #if CONFIG_MULTITHREAD const int nsync = row_mt_sync->sync_range; int cur; // Only signal when there are enough encoded blocks for next row to run. int sig = 1; if (c < cols - 1) { cur = c; if (c % nsync != nsync - 1) sig = 0; } else { cur = cols + nsync; } if (sig) { pthread_mutex_lock(&row_mt_sync->mutex[r]); row_mt_sync->cur_col[r] = cur; pthread_cond_signal(&row_mt_sync->cond[r]); pthread_mutex_unlock(&row_mt_sync->mutex[r]); } #else (void)row_mt_sync; (void)r; (void)c; (void)cols; #endif // CONFIG_MULTITHREAD } void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c, const int cols) { (void)row_mt_sync; (void)r; (void)c; (void)cols; return; } #if !CONFIG_REALTIME_ONLY static int first_pass_worker_hook(void *arg1, void *arg2) { EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; int tile_row, tile_col; TileDataEnc *this_tile; int end_of_frame; int thread_id = thread_data->thread_id; int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id]; JobNode *proc_job = NULL; FIRSTPASS_DATA fp_acc_data; MV zero_mv = { 0, 0 }; MV best_ref_mv; int mb_row; end_of_frame = 0; while (0 == end_of_frame) { // Get the next job in the queue proc_job = (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id); if (NULL == proc_job) { // Query for the status of other tiles end_of_frame = vp9_get_tiles_proc_status( multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id, tile_cols); } else { tile_col = proc_job->tile_col_id; tile_row = proc_job->tile_row_id; this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; mb_row = proc_job->vert_unit_row_num; best_ref_mv = zero_mv; vp9_zero(fp_acc_data); fp_acc_data.image_data_start_row = INVALID_ROW; vp9_first_pass_encode_tile_mb_row(cpi, thread_data->td, &fp_acc_data, this_tile, &best_ref_mv, mb_row); } } return 0; } void vp9_encode_fp_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; TileDataEnc *first_tile_col; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, FIRST_PASS_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; } } launch_enc_workers(cpi, first_pass_worker_hook, multi_thread_ctxt, num_workers); first_tile_col = &cpi->tile_data[0]; for (i = 1; i < tile_cols; i++) { TileDataEnc *this_tile = &cpi->tile_data[i]; accumulate_fp_tile_stat(first_tile_col, this_tile); } } static int temporal_filter_worker_hook(void *arg1, void *arg2) { EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; int tile_row, tile_col; int mb_col_start, mb_col_end; TileDataEnc *this_tile; int end_of_frame; int thread_id = thread_data->thread_id; int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id]; JobNode *proc_job = NULL; int mb_row; end_of_frame = 0; while (0 == end_of_frame) { // Get the next job in the queue proc_job = (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id); if (NULL == proc_job) { // Query for the status of other tiles end_of_frame = vp9_get_tiles_proc_status( multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id, tile_cols); } else { tile_col = proc_job->tile_col_id; tile_row = proc_job->tile_row_id; this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; mb_col_start = (this_tile->tile_info.mi_col_start) >> TF_SHIFT; mb_col_end = (this_tile->tile_info.mi_col_end + TF_ROUND) >> TF_SHIFT; mb_row = proc_job->vert_unit_row_num; vp9_temporal_filter_iterate_row_c(cpi, thread_data->td, mb_row, mb_col_start, mb_col_end); } } return 0; } void vp9_temporal_filter_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = cpi->num_workers ? cpi->num_workers : 1; int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ARNR_JOB); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; } } launch_enc_workers(cpi, temporal_filter_worker_hook, multi_thread_ctxt, num_workers); } #endif // !CONFIG_REALTIME_ONLY static int enc_row_mt_worker_hook(void *arg1, void *arg2) { EncWorkerData *const thread_data = (EncWorkerData *)arg1; MultiThreadHandle *multi_thread_ctxt = (MultiThreadHandle *)arg2; VP9_COMP *const cpi = thread_data->cpi; const VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; int tile_row, tile_col; int end_of_frame; int thread_id = thread_data->thread_id; int cur_tile_id = multi_thread_ctxt->thread_id_to_tile_id[thread_id]; JobNode *proc_job = NULL; int mi_row; end_of_frame = 0; while (0 == end_of_frame) { // Get the next job in the queue proc_job = (JobNode *)vp9_enc_grp_get_next_job(multi_thread_ctxt, cur_tile_id); if (NULL == proc_job) { // Query for the status of other tiles end_of_frame = vp9_get_tiles_proc_status( multi_thread_ctxt, thread_data->tile_completion_status, &cur_tile_id, tile_cols); } else { tile_col = proc_job->tile_col_id; tile_row = proc_job->tile_row_id; mi_row = proc_job->vert_unit_row_num * MI_BLOCK_SIZE; vp9_encode_sb_row(cpi, thread_data->td, tile_row, tile_col, mi_row); } } return 0; } void vp9_encode_tiles_row_mt(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int num_workers = VPXMAX(cpi->oxcf.max_threads, 1); int i; if (multi_thread_ctxt->allocated_tile_cols < tile_cols || multi_thread_ctxt->allocated_tile_rows < tile_rows || multi_thread_ctxt->allocated_vert_unit_rows < cm->mb_rows) { vp9_row_mt_mem_dealloc(cpi); vp9_init_tile_data(cpi); vp9_row_mt_mem_alloc(cpi); } else { vp9_init_tile_data(cpi); } create_enc_workers(cpi, num_workers); vp9_assign_tile_to_thread(multi_thread_ctxt, tile_cols, cpi->num_workers); vp9_prepare_job_queue(cpi, ENCODE_JOB); vp9_multi_thread_tile_init(cpi); for (i = 0; i < num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; // Before encoding a frame, copy the thread data from cpi. if (thread_data->td != &cpi->td) { thread_data->td->mb = cpi->td.mb; thread_data->td->rd_counts = cpi->td.rd_counts; } if (thread_data->td->counts != &cpi->common.counts) { memcpy(thread_data->td->counts, &cpi->common.counts, sizeof(cpi->common.counts)); } // Handle use_nonrd_pick_mode case. if (cpi->sf.use_nonrd_pick_mode) { MACROBLOCK *const x = &thread_data->td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; PICK_MODE_CONTEXT *ctx = &thread_data->td->pc_root->none; int j; for (j = 0; j < MAX_MB_PLANE; ++j) { p[j].coeff = ctx->coeff_pbuf[j][0]; p[j].qcoeff = ctx->qcoeff_pbuf[j][0]; pd[j].dqcoeff = ctx->dqcoeff_pbuf[j][0]; p[j].eobs = ctx->eobs_pbuf[j][0]; } } } launch_enc_workers(cpi, enc_row_mt_worker_hook, multi_thread_ctxt, num_workers); for (i = 0; i < num_workers; i++) { VPxWorker *const worker = &cpi->workers[i]; EncWorkerData *const thread_data = (EncWorkerData *)worker->data1; // Accumulate counters. if (i < cpi->num_workers - 1) { vp9_accumulate_frame_counts(&cm->counts, thread_data->td->counts, 0); accumulate_rd_opt(&cpi->td, thread_data->td); } } } libvpx-1.8.2/vp9/encoder/vp9_ethread.h000066400000000000000000000041321357355204000175520ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_ETHREAD_H_ #define VPX_VP9_ENCODER_VP9_ETHREAD_H_ #ifdef __cplusplus extern "C" { #endif #define MAX_NUM_TILE_COLS (1 << 6) #define MAX_NUM_TILE_ROWS 4 #define MAX_NUM_THREADS 80 struct VP9_COMP; struct ThreadData; typedef struct EncWorkerData { struct VP9_COMP *cpi; struct ThreadData *td; int start; int thread_id; int tile_completion_status[MAX_NUM_TILE_COLS]; } EncWorkerData; // Encoder row synchronization typedef struct VP9RowMTSyncData { #if CONFIG_MULTITHREAD pthread_mutex_t *mutex; pthread_cond_t *cond; #endif // Allocate memory to store the sb/mb block index in each row. int *cur_col; int sync_range; int rows; } VP9RowMTSync; void vp9_encode_tiles_mt(struct VP9_COMP *cpi); void vp9_encode_tiles_row_mt(struct VP9_COMP *cpi); void vp9_encode_fp_row_mt(struct VP9_COMP *cpi); void vp9_row_mt_sync_read(VP9RowMTSync *const row_mt_sync, int r, int c); void vp9_row_mt_sync_write(VP9RowMTSync *const row_mt_sync, int r, int c, const int cols); void vp9_row_mt_sync_read_dummy(VP9RowMTSync *const row_mt_sync, int r, int c); void vp9_row_mt_sync_write_dummy(VP9RowMTSync *const row_mt_sync, int r, int c, const int cols); // Allocate memory for row based multi-threading synchronization. void vp9_row_mt_sync_mem_alloc(VP9RowMTSync *row_mt_sync, struct VP9Common *cm, int rows); // Deallocate row based multi-threading synchronization related mutex and data. void vp9_row_mt_sync_mem_dealloc(VP9RowMTSync *row_mt_sync); void vp9_temporal_filter_row_mt(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_ETHREAD_H_ libvpx-1.8.2/vp9/encoder/vp9_extend.c000066400000000000000000000170211357355204000174210ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_extend.h" static void copy_and_extend_plane(const uint8_t *src, int src_pitch, uint8_t *dst, int dst_pitch, int w, int h, int extend_top, int extend_left, int extend_bottom, int extend_right) { int i, linesize; // copy the left and right most columns out const uint8_t *src_ptr1 = src; const uint8_t *src_ptr2 = src + w - 1; uint8_t *dst_ptr1 = dst - extend_left; uint8_t *dst_ptr2 = dst + w; for (i = 0; i < h; i++) { memset(dst_ptr1, src_ptr1[0], extend_left); memcpy(dst_ptr1 + extend_left, src_ptr1, w); memset(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_pitch; src_ptr2 += src_pitch; dst_ptr1 += dst_pitch; dst_ptr2 += dst_pitch; } // Now copy the top and bottom lines into each line of the respective // borders src_ptr1 = dst - extend_left; src_ptr2 = dst + dst_pitch * (h - 1) - extend_left; dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left; dst_ptr2 = dst + dst_pitch * (h)-extend_left; linesize = extend_left + extend_right + w; for (i = 0; i < extend_top; i++) { memcpy(dst_ptr1, src_ptr1, linesize); dst_ptr1 += dst_pitch; } for (i = 0; i < extend_bottom; i++) { memcpy(dst_ptr2, src_ptr2, linesize); dst_ptr2 += dst_pitch; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_copy_and_extend_plane(const uint8_t *src8, int src_pitch, uint8_t *dst8, int dst_pitch, int w, int h, int extend_top, int extend_left, int extend_bottom, int extend_right) { int i, linesize; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); // copy the left and right most columns out const uint16_t *src_ptr1 = src; const uint16_t *src_ptr2 = src + w - 1; uint16_t *dst_ptr1 = dst - extend_left; uint16_t *dst_ptr2 = dst + w; for (i = 0; i < h; i++) { vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); memcpy(dst_ptr1 + extend_left, src_ptr1, w * sizeof(src_ptr1[0])); vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_pitch; src_ptr2 += src_pitch; dst_ptr1 += dst_pitch; dst_ptr2 += dst_pitch; } // Now copy the top and bottom lines into each line of the respective // borders src_ptr1 = dst - extend_left; src_ptr2 = dst + dst_pitch * (h - 1) - extend_left; dst_ptr1 = dst + dst_pitch * (-extend_top) - extend_left; dst_ptr2 = dst + dst_pitch * (h)-extend_left; linesize = extend_left + extend_right + w; for (i = 0; i < extend_top; i++) { memcpy(dst_ptr1, src_ptr1, linesize * sizeof(src_ptr1[0])); dst_ptr1 += dst_pitch; } for (i = 0; i < extend_bottom; i++) { memcpy(dst_ptr2, src_ptr2, linesize * sizeof(src_ptr2[0])); dst_ptr2 += dst_pitch; } } #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst) { // Extend src frame in buffer // Altref filtering assumes 16 pixel extension const int et_y = 16; const int el_y = 16; // Motion estimation may use src block variance with the block size up // to 64x64, so the right and bottom need to be extended to 64 multiple // or up to 16, whichever is greater. const int er_y = VPXMAX(src->y_width + 16, ALIGN_POWER_OF_TWO(src->y_width, 6)) - src->y_crop_width; const int eb_y = VPXMAX(src->y_height + 16, ALIGN_POWER_OF_TWO(src->y_height, 6)) - src->y_crop_height; const int uv_width_subsampling = (src->uv_width != src->y_width); const int uv_height_subsampling = (src->uv_height != src->y_height); const int et_uv = et_y >> uv_height_subsampling; const int el_uv = el_y >> uv_width_subsampling; const int eb_uv = eb_y >> uv_height_subsampling; const int er_uv = er_y >> uv_width_subsampling; #if CONFIG_VP9_HIGHBITDEPTH if (src->flags & YV12_FLAG_HIGHBITDEPTH) { highbd_copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, src->y_crop_width, src->y_crop_height, et_y, el_y, eb_y, er_y); highbd_copy_and_extend_plane( src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); highbd_copy_and_extend_plane( src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); return; } #endif // CONFIG_VP9_HIGHBITDEPTH copy_and_extend_plane(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, src->y_crop_width, src->y_crop_height, et_y, el_y, eb_y, er_y); copy_and_extend_plane(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); copy_and_extend_plane(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, src->uv_crop_width, src->uv_crop_height, et_uv, el_uv, eb_uv, er_uv); } void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw) { // If the side is not touching the bounder then don't extend. const int et_y = srcy ? 0 : dst->border; const int el_y = srcx ? 0 : dst->border; const int eb_y = srcy + srch != src->y_height ? 0 : dst->border + dst->y_height - src->y_height; const int er_y = srcx + srcw != src->y_width ? 0 : dst->border + dst->y_width - src->y_width; const int src_y_offset = srcy * src->y_stride + srcx; const int dst_y_offset = srcy * dst->y_stride + srcx; const int et_uv = ROUND_POWER_OF_TWO(et_y, 1); const int el_uv = ROUND_POWER_OF_TWO(el_y, 1); const int eb_uv = ROUND_POWER_OF_TWO(eb_y, 1); const int er_uv = ROUND_POWER_OF_TWO(er_y, 1); const int src_uv_offset = ((srcy * src->uv_stride) >> 1) + (srcx >> 1); const int dst_uv_offset = ((srcy * dst->uv_stride) >> 1) + (srcx >> 1); const int srch_uv = ROUND_POWER_OF_TWO(srch, 1); const int srcw_uv = ROUND_POWER_OF_TWO(srcw, 1); copy_and_extend_plane(src->y_buffer + src_y_offset, src->y_stride, dst->y_buffer + dst_y_offset, dst->y_stride, srcw, srch, et_y, el_y, eb_y, er_y); copy_and_extend_plane(src->u_buffer + src_uv_offset, src->uv_stride, dst->u_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, srch_uv, et_uv, el_uv, eb_uv, er_uv); copy_and_extend_plane(src->v_buffer + src_uv_offset, src->uv_stride, dst->v_buffer + dst_uv_offset, dst->uv_stride, srcw_uv, srch_uv, et_uv, el_uv, eb_uv, er_uv); } libvpx-1.8.2/vp9/encoder/vp9_extend.h000066400000000000000000000017721357355204000174340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_EXTEND_H_ #define VPX_VP9_ENCODER_VP9_EXTEND_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif void vp9_copy_and_extend_frame(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst); void vp9_copy_and_extend_frame_with_rect(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, int srcy, int srcx, int srch, int srcw); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_EXTEND_H_ libvpx-1.8.2/vp9/encoder/vp9_firstpass.c000066400000000000000000004246751357355204000201710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" #include "vp9/common/vp9_entropymv.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" // vp9_setup_dst_planes() #include "vp9/encoder/vp9_aq_variance.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rd.h" #include "vpx_dsp/variance.h" #define OUTPUT_FPF 0 #define ARF_STATS_OUTPUT 0 #define COMPLEXITY_STATS_OUTPUT 0 #define FIRST_PASS_Q 10.0 #define NORMAL_BOOST 100 #define MIN_ARF_GF_BOOST 250 #define MIN_DECAY_FACTOR 0.01 #define NEW_MV_MODE_PENALTY 32 #define DARK_THRESH 64 #define LOW_I_THRESH 24000 #define NCOUNT_INTRA_THRESH 8192 #define NCOUNT_INTRA_FACTOR 3 #define DOUBLE_DIVIDE_CHECK(x) ((x) < 0 ? (x)-0.000001 : (x) + 0.000001) #if ARF_STATS_OUTPUT unsigned int arf_count = 0; #endif // Resets the first pass file to the given position using a relative seek from // the current position. static void reset_fpf_position(TWO_PASS *p, const FIRSTPASS_STATS *position) { p->stats_in = position; } // Read frame stats at an offset from the current position. static const FIRSTPASS_STATS *read_frame_stats(const TWO_PASS *p, int offset) { if ((offset >= 0 && p->stats_in + offset >= p->stats_in_end) || (offset < 0 && p->stats_in + offset < p->stats_in_start)) { return NULL; } return &p->stats_in[offset]; } static int input_stats(TWO_PASS *p, FIRSTPASS_STATS *fps) { if (p->stats_in >= p->stats_in_end) return EOF; *fps = *p->stats_in; ++p->stats_in; return 1; } static void output_stats(FIRSTPASS_STATS *stats) { (void)stats; // TEMP debug code #if OUTPUT_FPF { FILE *fpfile; fpfile = fopen("firstpass.stt", "a"); fprintf(fpfile, "%12.0lf %12.4lf %12.2lf %12.2lf %12.2lf %12.0lf %12.4lf %12.4lf" "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.4lf" "%12.4lf %12.4lf %12.4lf %12.4lf %12.4lf %12.0lf %12.4lf %12.0lf" "%12.4lf" "\n", stats->frame, stats->weight, stats->intra_error, stats->coded_error, stats->sr_coded_error, stats->frame_noise_energy, stats->pcnt_inter, stats->pcnt_motion, stats->pcnt_second_ref, stats->pcnt_neutral, stats->pcnt_intra_low, stats->pcnt_intra_high, stats->intra_skip_pct, stats->intra_smooth_pct, stats->inactive_zone_rows, stats->inactive_zone_cols, stats->MVr, stats->mvr_abs, stats->MVc, stats->mvc_abs, stats->MVrv, stats->MVcv, stats->mv_in_out_count, stats->count, stats->duration); fclose(fpfile); } #endif } #if CONFIG_FP_MB_STATS static void output_fpmb_stats(uint8_t *this_frame_mb_stats, VP9_COMMON *cm, struct vpx_codec_pkt_list *pktlist) { struct vpx_codec_cx_pkt pkt; pkt.kind = VPX_CODEC_FPMB_STATS_PKT; pkt.data.firstpass_mb_stats.buf = this_frame_mb_stats; pkt.data.firstpass_mb_stats.sz = cm->initial_mbs * sizeof(uint8_t); vpx_codec_pkt_list_add(pktlist, &pkt); } #endif static void zero_stats(FIRSTPASS_STATS *section) { section->frame = 0.0; section->weight = 0.0; section->intra_error = 0.0; section->coded_error = 0.0; section->sr_coded_error = 0.0; section->frame_noise_energy = 0.0; section->pcnt_inter = 0.0; section->pcnt_motion = 0.0; section->pcnt_second_ref = 0.0; section->pcnt_neutral = 0.0; section->intra_skip_pct = 0.0; section->intra_smooth_pct = 0.0; section->pcnt_intra_low = 0.0; section->pcnt_intra_high = 0.0; section->inactive_zone_rows = 0.0; section->inactive_zone_cols = 0.0; section->MVr = 0.0; section->mvr_abs = 0.0; section->MVc = 0.0; section->mvc_abs = 0.0; section->MVrv = 0.0; section->MVcv = 0.0; section->mv_in_out_count = 0.0; section->count = 0.0; section->duration = 1.0; section->spatial_layer_id = 0; } static void accumulate_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { section->frame += frame->frame; section->weight += frame->weight; section->spatial_layer_id = frame->spatial_layer_id; section->intra_error += frame->intra_error; section->coded_error += frame->coded_error; section->sr_coded_error += frame->sr_coded_error; section->frame_noise_energy += frame->frame_noise_energy; section->pcnt_inter += frame->pcnt_inter; section->pcnt_motion += frame->pcnt_motion; section->pcnt_second_ref += frame->pcnt_second_ref; section->pcnt_neutral += frame->pcnt_neutral; section->intra_skip_pct += frame->intra_skip_pct; section->intra_smooth_pct += frame->intra_smooth_pct; section->pcnt_intra_low += frame->pcnt_intra_low; section->pcnt_intra_high += frame->pcnt_intra_high; section->inactive_zone_rows += frame->inactive_zone_rows; section->inactive_zone_cols += frame->inactive_zone_cols; section->MVr += frame->MVr; section->mvr_abs += frame->mvr_abs; section->MVc += frame->MVc; section->mvc_abs += frame->mvc_abs; section->MVrv += frame->MVrv; section->MVcv += frame->MVcv; section->mv_in_out_count += frame->mv_in_out_count; section->count += frame->count; section->duration += frame->duration; } static void subtract_stats(FIRSTPASS_STATS *section, const FIRSTPASS_STATS *frame) { section->frame -= frame->frame; section->weight -= frame->weight; section->intra_error -= frame->intra_error; section->coded_error -= frame->coded_error; section->sr_coded_error -= frame->sr_coded_error; section->frame_noise_energy -= frame->frame_noise_energy; section->pcnt_inter -= frame->pcnt_inter; section->pcnt_motion -= frame->pcnt_motion; section->pcnt_second_ref -= frame->pcnt_second_ref; section->pcnt_neutral -= frame->pcnt_neutral; section->intra_skip_pct -= frame->intra_skip_pct; section->intra_smooth_pct -= frame->intra_smooth_pct; section->pcnt_intra_low -= frame->pcnt_intra_low; section->pcnt_intra_high -= frame->pcnt_intra_high; section->inactive_zone_rows -= frame->inactive_zone_rows; section->inactive_zone_cols -= frame->inactive_zone_cols; section->MVr -= frame->MVr; section->mvr_abs -= frame->mvr_abs; section->MVc -= frame->MVc; section->mvc_abs -= frame->mvc_abs; section->MVrv -= frame->MVrv; section->MVcv -= frame->MVcv; section->mv_in_out_count -= frame->mv_in_out_count; section->count -= frame->count; section->duration -= frame->duration; } // Calculate an active area of the image that discounts formatting // bars and partially discounts other 0 energy areas. #define MIN_ACTIVE_AREA 0.5 #define MAX_ACTIVE_AREA 1.0 static double calculate_active_area(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame) { double active_pct; active_pct = 1.0 - ((this_frame->intra_skip_pct / 2) + ((this_frame->inactive_zone_rows * 2) / (double)frame_info->mb_rows)); return fclamp(active_pct, MIN_ACTIVE_AREA, MAX_ACTIVE_AREA); } // Get the average weighted error for the clip (or corpus) static double get_distribution_av_err(VP9_COMP *cpi, TWO_PASS *const twopass) { const double av_weight = twopass->total_stats.weight / twopass->total_stats.count; if (cpi->oxcf.vbr_corpus_complexity) return av_weight * twopass->mean_mod_score; else return (twopass->total_stats.coded_error * av_weight) / twopass->total_stats.count; } #define ACT_AREA_CORRECTION 0.5 // Calculate a modified Error used in distributing bits between easier and // harder frames. static double calculate_mod_frame_score(const VP9_COMP *cpi, const VP9EncoderConfig *oxcf, const FIRSTPASS_STATS *this_frame, const double av_err) { double modified_score = av_err * pow(this_frame->coded_error * this_frame->weight / DOUBLE_DIVIDE_CHECK(av_err), oxcf->two_pass_vbrbias / 100.0); // Correction for active area. Frames with a reduced active area // (eg due to formatting bars) have a higher error per mb for the // remaining active MBs. The correction here assumes that coding // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. modified_score *= pow(calculate_active_area(&cpi->frame_info, this_frame), ACT_AREA_CORRECTION); return modified_score; } static double calc_norm_frame_score(const VP9EncoderConfig *oxcf, const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame, double mean_mod_score, double av_err) { double modified_score = av_err * pow(this_frame->coded_error * this_frame->weight / DOUBLE_DIVIDE_CHECK(av_err), oxcf->two_pass_vbrbias / 100.0); const double min_score = (double)(oxcf->two_pass_vbrmin_section) / 100.0; const double max_score = (double)(oxcf->two_pass_vbrmax_section) / 100.0; // Correction for active area. Frames with a reduced active area // (eg due to formatting bars) have a higher error per mb for the // remaining active MBs. The correction here assumes that coding // 0.5N blocks of complexity 2X is a little easier than coding N // blocks of complexity X. modified_score *= pow(calculate_active_area(frame_info, this_frame), ACT_AREA_CORRECTION); // Normalize to a midpoint score. modified_score /= DOUBLE_DIVIDE_CHECK(mean_mod_score); return fclamp(modified_score, min_score, max_score); } static double calculate_norm_frame_score(const VP9_COMP *cpi, const TWO_PASS *twopass, const VP9EncoderConfig *oxcf, const FIRSTPASS_STATS *this_frame, const double av_err) { return calc_norm_frame_score(oxcf, &cpi->frame_info, this_frame, twopass->mean_mod_score, av_err); } // This function returns the maximum target rate per frame. static int frame_max_bits(const RATE_CONTROL *rc, const VP9EncoderConfig *oxcf) { int64_t max_bits = ((int64_t)rc->avg_frame_bandwidth * (int64_t)oxcf->two_pass_vbrmax_section) / 100; if (max_bits < 0) max_bits = 0; else if (max_bits > rc->max_frame_bandwidth) max_bits = rc->max_frame_bandwidth; return (int)max_bits; } void vp9_init_first_pass(VP9_COMP *cpi) { zero_stats(&cpi->twopass.total_stats); } void vp9_end_first_pass(VP9_COMP *cpi) { output_stats(&cpi->twopass.total_stats); cpi->twopass.first_pass_done = 1; vpx_free(cpi->twopass.fp_mb_float_stats); cpi->twopass.fp_mb_float_stats = NULL; } static vpx_variance_fn_t get_block_variance_fn(BLOCK_SIZE bsize) { switch (bsize) { case BLOCK_8X8: return vpx_mse8x8; case BLOCK_16X8: return vpx_mse16x8; case BLOCK_8X16: return vpx_mse8x16; default: return vpx_mse16x16; } } static unsigned int get_prediction_error(BLOCK_SIZE bsize, const struct buf_2d *src, const struct buf_2d *ref) { unsigned int sse; const vpx_variance_fn_t fn = get_block_variance_fn(bsize); fn(src->buf, src->stride, ref->buf, ref->stride, &sse); return sse; } #if CONFIG_VP9_HIGHBITDEPTH static vpx_variance_fn_t highbd_get_block_variance_fn(BLOCK_SIZE bsize, int bd) { switch (bd) { default: switch (bsize) { case BLOCK_8X8: return vpx_highbd_8_mse8x8; case BLOCK_16X8: return vpx_highbd_8_mse16x8; case BLOCK_8X16: return vpx_highbd_8_mse8x16; default: return vpx_highbd_8_mse16x16; } break; case 10: switch (bsize) { case BLOCK_8X8: return vpx_highbd_10_mse8x8; case BLOCK_16X8: return vpx_highbd_10_mse16x8; case BLOCK_8X16: return vpx_highbd_10_mse8x16; default: return vpx_highbd_10_mse16x16; } break; case 12: switch (bsize) { case BLOCK_8X8: return vpx_highbd_12_mse8x8; case BLOCK_16X8: return vpx_highbd_12_mse16x8; case BLOCK_8X16: return vpx_highbd_12_mse8x16; default: return vpx_highbd_12_mse16x16; } break; } } static unsigned int highbd_get_prediction_error(BLOCK_SIZE bsize, const struct buf_2d *src, const struct buf_2d *ref, int bd) { unsigned int sse; const vpx_variance_fn_t fn = highbd_get_block_variance_fn(bsize, bd); fn(src->buf, src->stride, ref->buf, ref->stride, &sse); return sse; } #endif // CONFIG_VP9_HIGHBITDEPTH // Refine the motion search range according to the frame dimension // for first pass test. static int get_search_range(const VP9_COMP *cpi) { int sr = 0; const int dim = VPXMIN(cpi->initial_width, cpi->initial_height); while ((dim << sr) < MAX_FULL_PEL_VAL) ++sr; return sr; } static void first_pass_motion_search(VP9_COMP *cpi, MACROBLOCK *x, const MV *ref_mv, MV *best_mv, int *best_motion_err) { MACROBLOCKD *const xd = &x->e_mbd; MV tmp_mv = { 0, 0 }; MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 }; int num00, tmp_err, n; const BLOCK_SIZE bsize = xd->mi[0]->sb_type; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; const int new_mv_mode_penalty = NEW_MV_MODE_PENALTY; int step_param = 3; int further_steps = (MAX_MVSEARCH_STEPS - 1) - step_param; const int sr = get_search_range(cpi); step_param += sr; further_steps -= sr; // Override the default variance function to use MSE. v_fn_ptr.vf = get_block_variance_fn(bsize); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, xd->bd); } #endif // CONFIG_VP9_HIGHBITDEPTH // Center the initial step/diamond search on best mv. tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; if (tmp_err < *best_motion_err) { *best_motion_err = tmp_err; *best_mv = tmp_mv; } // Carry out further step/diamond searches as necessary. n = num00; num00 = 0; while (n < further_steps) { ++n; if (num00) { --num00; } else { tmp_err = cpi->diamond_search_sad(x, &cpi->ss_cfg, &ref_mv_full, &tmp_mv, step_param + n, x->sadperbit16, &num00, &v_fn_ptr, ref_mv); if (tmp_err < INT_MAX) tmp_err = vp9_get_mvpred_var(x, &tmp_mv, ref_mv, &v_fn_ptr, 1); if (tmp_err < INT_MAX - new_mv_mode_penalty) tmp_err += new_mv_mode_penalty; if (tmp_err < *best_motion_err) { *best_motion_err = tmp_err; *best_mv = tmp_mv; } } } } static BLOCK_SIZE get_bsize(const VP9_COMMON *cm, int mb_row, int mb_col) { if (2 * mb_col + 1 < cm->mi_cols) { return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_16X16 : BLOCK_16X8; } else { return 2 * mb_row + 1 < cm->mi_rows ? BLOCK_8X16 : BLOCK_8X8; } } static int find_fp_qindex(vpx_bit_depth_t bit_depth) { int i; for (i = 0; i < QINDEX_RANGE; ++i) if (vp9_convert_qindex_to_q(i, bit_depth) >= FIRST_PASS_Q) break; if (i == QINDEX_RANGE) i--; return i; } static void set_first_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY))) { cm->frame_type = KEY_FRAME; } else { cm->frame_type = INTER_FRAME; } // Do not use periodic key frames. cpi->rc.frames_to_key = INT_MAX; } // Scale an sse threshold to account for 8/10/12 bit. static int scale_sse_threshold(VP9_COMMON *cm, int thresh) { int ret_val = thresh; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = thresh; break; case VPX_BITS_10: ret_val = thresh << 4; break; default: assert(cm->bit_depth == VPX_BITS_12); ret_val = thresh << 8; break; } } #else (void)cm; #endif // CONFIG_VP9_HIGHBITDEPTH return ret_val; } // This threshold is used to track blocks where to all intents and purposes // the intra prediction error 0. Though the metric we test against // is technically a sse we are mainly interested in blocks where all the pixels // in the 8 bit domain have an error of <= 1 (where error = sse) so a // linear scaling for 10 and 12 bit gives similar results. #define UL_INTRA_THRESH 50 static int get_ul_intra_threshold(VP9_COMMON *cm) { int ret_val = UL_INTRA_THRESH; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = UL_INTRA_THRESH; break; case VPX_BITS_10: ret_val = UL_INTRA_THRESH << 2; break; default: assert(cm->bit_depth == VPX_BITS_12); ret_val = UL_INTRA_THRESH << 4; break; } } #else (void)cm; #endif // CONFIG_VP9_HIGHBITDEPTH return ret_val; } #define SMOOTH_INTRA_THRESH 4000 static int get_smooth_intra_threshold(VP9_COMMON *cm) { int ret_val = SMOOTH_INTRA_THRESH; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: ret_val = SMOOTH_INTRA_THRESH; break; case VPX_BITS_10: ret_val = SMOOTH_INTRA_THRESH << 4; break; default: assert(cm->bit_depth == VPX_BITS_12); ret_val = SMOOTH_INTRA_THRESH << 8; break; } } #else (void)cm; #endif // CONFIG_VP9_HIGHBITDEPTH return ret_val; } #define FP_DN_THRESH 8 #define FP_MAX_DN_THRESH 24 #define KERNEL_SIZE 3 // Baseline Kernal weights for first pass noise metric static uint8_t fp_dn_kernal_3[KERNEL_SIZE * KERNEL_SIZE] = { 1, 2, 1, 2, 4, 2, 1, 2, 1 }; // Estimate noise at a single point based on the impace of a spatial kernal // on the point value static int fp_estimate_point_noise(uint8_t *src_ptr, const int stride) { int sum_weight = 0; int sum_val = 0; int i, j; int max_diff = 0; int diff; int dn_diff; uint8_t *tmp_ptr; uint8_t *kernal_ptr; uint8_t dn_val; uint8_t centre_val = *src_ptr; kernal_ptr = fp_dn_kernal_3; // Apply the kernal tmp_ptr = src_ptr - stride - 1; for (i = 0; i < KERNEL_SIZE; ++i) { for (j = 0; j < KERNEL_SIZE; ++j) { diff = abs((int)centre_val - (int)tmp_ptr[j]); max_diff = VPXMAX(max_diff, diff); if (diff <= FP_DN_THRESH) { sum_weight += *kernal_ptr; sum_val += (int)tmp_ptr[j] * (int)*kernal_ptr; } ++kernal_ptr; } tmp_ptr += stride; } if (max_diff < FP_MAX_DN_THRESH) // Update the source value with the new filtered value dn_val = (sum_val + (sum_weight >> 1)) / sum_weight; else dn_val = *src_ptr; // return the noise energy as the square of the difference between the // denoised and raw value. dn_diff = (int)*src_ptr - (int)dn_val; return dn_diff * dn_diff; } #if CONFIG_VP9_HIGHBITDEPTH static int fp_highbd_estimate_point_noise(uint8_t *src_ptr, const int stride) { int sum_weight = 0; int sum_val = 0; int i, j; int max_diff = 0; int diff; int dn_diff; uint8_t *tmp_ptr; uint16_t *tmp_ptr16; uint8_t *kernal_ptr; uint16_t dn_val; uint16_t centre_val = *CONVERT_TO_SHORTPTR(src_ptr); kernal_ptr = fp_dn_kernal_3; // Apply the kernal tmp_ptr = src_ptr - stride - 1; for (i = 0; i < KERNEL_SIZE; ++i) { tmp_ptr16 = CONVERT_TO_SHORTPTR(tmp_ptr); for (j = 0; j < KERNEL_SIZE; ++j) { diff = abs((int)centre_val - (int)tmp_ptr16[j]); max_diff = VPXMAX(max_diff, diff); if (diff <= FP_DN_THRESH) { sum_weight += *kernal_ptr; sum_val += (int)tmp_ptr16[j] * (int)*kernal_ptr; } ++kernal_ptr; } tmp_ptr += stride; } if (max_diff < FP_MAX_DN_THRESH) // Update the source value with the new filtered value dn_val = (sum_val + (sum_weight >> 1)) / sum_weight; else dn_val = *CONVERT_TO_SHORTPTR(src_ptr); // return the noise energy as the square of the difference between the // denoised and raw value. dn_diff = (int)(*CONVERT_TO_SHORTPTR(src_ptr)) - (int)dn_val; return dn_diff * dn_diff; } #endif // Estimate noise for a block. static int fp_estimate_block_noise(MACROBLOCK *x, BLOCK_SIZE bsize) { #if CONFIG_VP9_HIGHBITDEPTH MACROBLOCKD *xd = &x->e_mbd; #endif uint8_t *src_ptr = &x->plane[0].src.buf[0]; const int width = num_4x4_blocks_wide_lookup[bsize] * 4; const int height = num_4x4_blocks_high_lookup[bsize] * 4; int w, h; int stride = x->plane[0].src.stride; int block_noise = 0; // Sampled points to reduce cost overhead. for (h = 0; h < height; h += 2) { for (w = 0; w < width; w += 2) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) block_noise += fp_highbd_estimate_point_noise(src_ptr, stride); else block_noise += fp_estimate_point_noise(src_ptr, stride); #else block_noise += fp_estimate_point_noise(src_ptr, stride); #endif ++src_ptr; } src_ptr += (stride - width); } return block_noise << 2; // Scale << 2 to account for sampling. } // This function is called to test the functionality of row based // multi-threading in unit tests for bit-exactness static void accumulate_floating_point_stats(VP9_COMP *cpi, TileDataEnc *first_tile_col) { VP9_COMMON *const cm = &cpi->common; int mb_row, mb_col; first_tile_col->fp_data.intra_factor = 0; first_tile_col->fp_data.brightness_factor = 0; first_tile_col->fp_data.neutral_count = 0; for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { for (mb_col = 0; mb_col < cm->mb_cols; ++mb_col) { const int mb_index = mb_row * cm->mb_cols + mb_col; first_tile_col->fp_data.intra_factor += cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor; first_tile_col->fp_data.brightness_factor += cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor; first_tile_col->fp_data.neutral_count += cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count; } } } static void first_pass_stat_calc(VP9_COMP *cpi, FIRSTPASS_STATS *fps, FIRSTPASS_DATA *fp_acc_data) { VP9_COMMON *const cm = &cpi->common; // The minimum error here insures some bit allocation to frames even // in static regions. The allocation per MB declines for larger formats // where the typical "real" energy per MB also falls. // Initial estimate here uses sqrt(mbs) to define the min_err, where the // number of mbs is proportional to the image area. const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs : cpi->common.MBs; const double min_err = 200 * sqrt(num_mbs); // Clamp the image start to rows/2. This number of rows is discarded top // and bottom as dead data so rows / 2 means the frame is blank. if ((fp_acc_data->image_data_start_row > cm->mb_rows / 2) || (fp_acc_data->image_data_start_row == INVALID_ROW)) { fp_acc_data->image_data_start_row = cm->mb_rows / 2; } // Exclude any image dead zone if (fp_acc_data->image_data_start_row > 0) { fp_acc_data->intra_skip_count = VPXMAX(0, fp_acc_data->intra_skip_count - (fp_acc_data->image_data_start_row * cm->mb_cols * 2)); } fp_acc_data->intra_factor = fp_acc_data->intra_factor / (double)num_mbs; fp_acc_data->brightness_factor = fp_acc_data->brightness_factor / (double)num_mbs; fps->weight = fp_acc_data->intra_factor * fp_acc_data->brightness_factor; fps->frame = cm->current_video_frame; fps->spatial_layer_id = cpi->svc.spatial_layer_id; fps->coded_error = ((double)(fp_acc_data->coded_error >> 8) + min_err) / num_mbs; fps->sr_coded_error = ((double)(fp_acc_data->sr_coded_error >> 8) + min_err) / num_mbs; fps->intra_error = ((double)(fp_acc_data->intra_error >> 8) + min_err) / num_mbs; fps->frame_noise_energy = (double)(fp_acc_data->frame_noise_energy) / (double)num_mbs; fps->count = 1.0; fps->pcnt_inter = (double)(fp_acc_data->intercount) / num_mbs; fps->pcnt_second_ref = (double)(fp_acc_data->second_ref_count) / num_mbs; fps->pcnt_neutral = (double)(fp_acc_data->neutral_count) / num_mbs; fps->pcnt_intra_low = (double)(fp_acc_data->intra_count_low) / num_mbs; fps->pcnt_intra_high = (double)(fp_acc_data->intra_count_high) / num_mbs; fps->intra_skip_pct = (double)(fp_acc_data->intra_skip_count) / num_mbs; fps->intra_smooth_pct = (double)(fp_acc_data->intra_smooth_count) / num_mbs; fps->inactive_zone_rows = (double)(fp_acc_data->image_data_start_row); // Currently set to 0 as most issues relate to letter boxing. fps->inactive_zone_cols = (double)0; if (fp_acc_data->mvcount > 0) { fps->MVr = (double)(fp_acc_data->sum_mvr) / fp_acc_data->mvcount; fps->mvr_abs = (double)(fp_acc_data->sum_mvr_abs) / fp_acc_data->mvcount; fps->MVc = (double)(fp_acc_data->sum_mvc) / fp_acc_data->mvcount; fps->mvc_abs = (double)(fp_acc_data->sum_mvc_abs) / fp_acc_data->mvcount; fps->MVrv = ((double)(fp_acc_data->sum_mvrs) - ((double)(fp_acc_data->sum_mvr) * (fp_acc_data->sum_mvr) / fp_acc_data->mvcount)) / fp_acc_data->mvcount; fps->MVcv = ((double)(fp_acc_data->sum_mvcs) - ((double)(fp_acc_data->sum_mvc) * (fp_acc_data->sum_mvc) / fp_acc_data->mvcount)) / fp_acc_data->mvcount; fps->mv_in_out_count = (double)(fp_acc_data->sum_in_vectors) / (fp_acc_data->mvcount * 2); fps->pcnt_motion = (double)(fp_acc_data->mvcount) / num_mbs; } else { fps->MVr = 0.0; fps->mvr_abs = 0.0; fps->MVc = 0.0; fps->mvc_abs = 0.0; fps->MVrv = 0.0; fps->MVcv = 0.0; fps->mv_in_out_count = 0.0; fps->pcnt_motion = 0.0; } } static void accumulate_fp_mb_row_stat(TileDataEnc *this_tile, FIRSTPASS_DATA *fp_acc_data) { this_tile->fp_data.intra_factor += fp_acc_data->intra_factor; this_tile->fp_data.brightness_factor += fp_acc_data->brightness_factor; this_tile->fp_data.coded_error += fp_acc_data->coded_error; this_tile->fp_data.sr_coded_error += fp_acc_data->sr_coded_error; this_tile->fp_data.frame_noise_energy += fp_acc_data->frame_noise_energy; this_tile->fp_data.intra_error += fp_acc_data->intra_error; this_tile->fp_data.intercount += fp_acc_data->intercount; this_tile->fp_data.second_ref_count += fp_acc_data->second_ref_count; this_tile->fp_data.neutral_count += fp_acc_data->neutral_count; this_tile->fp_data.intra_count_low += fp_acc_data->intra_count_low; this_tile->fp_data.intra_count_high += fp_acc_data->intra_count_high; this_tile->fp_data.intra_skip_count += fp_acc_data->intra_skip_count; this_tile->fp_data.mvcount += fp_acc_data->mvcount; this_tile->fp_data.sum_mvr += fp_acc_data->sum_mvr; this_tile->fp_data.sum_mvr_abs += fp_acc_data->sum_mvr_abs; this_tile->fp_data.sum_mvc += fp_acc_data->sum_mvc; this_tile->fp_data.sum_mvc_abs += fp_acc_data->sum_mvc_abs; this_tile->fp_data.sum_mvrs += fp_acc_data->sum_mvrs; this_tile->fp_data.sum_mvcs += fp_acc_data->sum_mvcs; this_tile->fp_data.sum_in_vectors += fp_acc_data->sum_in_vectors; this_tile->fp_data.intra_smooth_count += fp_acc_data->intra_smooth_count; this_tile->fp_data.image_data_start_row = VPXMIN(this_tile->fp_data.image_data_start_row, fp_acc_data->image_data_start_row) == INVALID_ROW ? VPXMAX(this_tile->fp_data.image_data_start_row, fp_acc_data->image_data_start_row) : VPXMIN(this_tile->fp_data.image_data_start_row, fp_acc_data->image_data_start_row); } #define NZ_MOTION_PENALTY 128 #define INTRA_MODE_PENALTY 1024 void vp9_first_pass_encode_tile_mb_row(VP9_COMP *cpi, ThreadData *td, FIRSTPASS_DATA *fp_acc_data, TileDataEnc *tile_data, MV *best_ref_mv, int mb_row) { int mb_col; MACROBLOCK *const x = &td->mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; TileInfo tile = tile_data->tile_info; const int mb_col_start = ROUND_POWER_OF_TWO(tile.mi_col_start, 1); const int mb_col_end = ROUND_POWER_OF_TWO(tile.mi_col_end, 1); struct macroblock_plane *const p = x->plane; struct macroblockd_plane *const pd = xd->plane; const PICK_MODE_CONTEXT *ctx = &td->pc_root->none; int i, c; int num_mb_cols = get_num_cols(tile_data->tile_info, 1); int recon_yoffset, recon_uvoffset; const int intrapenalty = INTRA_MODE_PENALTY; const MV zero_mv = { 0, 0 }; int recon_y_stride, recon_uv_stride, uv_mb_height; YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; MODE_INFO mi_above, mi_left; double mb_intra_factor; double mb_brightness_factor; double mb_neutral_count; int scaled_low_intra_thresh = scale_sse_threshold(cm, LOW_I_THRESH); // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); xd->mi = cm->mi_grid_visible + xd->mi_stride * (mb_row << 1) + mb_col_start; xd->mi[0] = cm->mi + xd->mi_stride * (mb_row << 1) + mb_col_start; for (i = 0; i < MAX_MB_PLANE; ++i) { p[i].coeff = ctx->coeff_pbuf[i][1]; p[i].qcoeff = ctx->qcoeff_pbuf[i][1]; pd[i].dqcoeff = ctx->dqcoeff_pbuf[i][1]; p[i].eobs = ctx->eobs_pbuf[i][1]; } recon_y_stride = new_yv12->y_stride; recon_uv_stride = new_yv12->uv_stride; uv_mb_height = 16 >> (new_yv12->y_height > new_yv12->uv_height); // Reset above block coeffs. recon_yoffset = (mb_row * recon_y_stride * 16) + mb_col_start * 16; recon_uvoffset = (mb_row * recon_uv_stride * uv_mb_height) + mb_col_start * uv_mb_height; // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. x->mv_limits.row_min = -((mb_row * 16) + BORDER_MV_PIXELS_B16); x->mv_limits.row_max = ((cm->mb_rows - 1 - mb_row) * 16) + BORDER_MV_PIXELS_B16; for (mb_col = mb_col_start, c = 0; mb_col < mb_col_end; ++mb_col, c++) { int this_error; int this_intra_error; const int use_dc_pred = (mb_col || mb_row) && (!mb_col || !mb_row); const BLOCK_SIZE bsize = get_bsize(cm, mb_row, mb_col); double log_intra; int level_sample; const int mb_index = mb_row * cm->mb_cols + mb_col; #if CONFIG_FP_MB_STATS const int mb_index = mb_row * cm->mb_cols + mb_col; #endif (*(cpi->row_mt_sync_read_ptr))(&tile_data->row_mt_sync, mb_row, c); // Adjust to the next column of MBs. x->plane[0].src.buf = cpi->Source->y_buffer + mb_row * 16 * x->plane[0].src.stride + mb_col * 16; x->plane[1].src.buf = cpi->Source->u_buffer + mb_row * uv_mb_height * x->plane[1].src.stride + mb_col * uv_mb_height; x->plane[2].src.buf = cpi->Source->v_buffer + mb_row * uv_mb_height * x->plane[1].src.stride + mb_col * uv_mb_height; vpx_clear_system_state(); xd->plane[0].dst.buf = new_yv12->y_buffer + recon_yoffset; xd->plane[1].dst.buf = new_yv12->u_buffer + recon_uvoffset; xd->plane[2].dst.buf = new_yv12->v_buffer + recon_uvoffset; xd->mi[0]->sb_type = bsize; xd->mi[0]->ref_frame[0] = INTRA_FRAME; set_mi_row_col(xd, &tile, mb_row << 1, num_8x8_blocks_high_lookup[bsize], mb_col << 1, num_8x8_blocks_wide_lookup[bsize], cm->mi_rows, cm->mi_cols); // Are edges available for intra prediction? // Since the firstpass does not populate the mi_grid_visible, // above_mi/left_mi must be overwritten with a nonzero value when edges // are available. Required by vp9_predict_intra_block(). xd->above_mi = (mb_row != 0) ? &mi_above : NULL; xd->left_mi = ((mb_col << 1) > tile.mi_col_start) ? &mi_left : NULL; // Do intra 16x16 prediction. x->skip_encode = 0; x->fp_src_pred = 0; // Do intra prediction based on source pixels for tile boundaries if (mb_col == mb_col_start && mb_col != 0) { xd->left_mi = &mi_left; x->fp_src_pred = 1; } xd->mi[0]->mode = DC_PRED; xd->mi[0]->tx_size = use_dc_pred ? (bsize >= BLOCK_16X16 ? TX_16X16 : TX_8X8) : TX_4X4; // Fix - zero the 16x16 block first. This ensures correct this_error for // block sizes smaller than 16x16. vp9_zero_array(x->plane[0].src_diff, 256); vp9_encode_intra_block_plane(x, bsize, 0, 0); this_error = vpx_get_mb_ss(x->plane[0].src_diff); this_intra_error = this_error; // Keep a record of blocks that have very low intra error residual // (i.e. are in effect completely flat and untextured in the intra // domain). In natural videos this is uncommon, but it is much more // common in animations, graphics and screen content, so may be used // as a signal to detect these types of content. if (this_error < get_ul_intra_threshold(cm)) { ++(fp_acc_data->intra_skip_count); } else if ((mb_col > 0) && (fp_acc_data->image_data_start_row == INVALID_ROW)) { fp_acc_data->image_data_start_row = mb_row; } // Blocks that are mainly smooth in the intra domain. // Some special accounting for CQ but also these are better for testing // noise levels. if (this_error < get_smooth_intra_threshold(cm)) { ++(fp_acc_data->intra_smooth_count); } // Special case noise measurement for first frame. if (cm->current_video_frame == 0) { if (this_intra_error < scale_sse_threshold(cm, LOW_I_THRESH)) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; } } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { switch (cm->bit_depth) { case VPX_BITS_8: break; case VPX_BITS_10: this_error >>= 4; break; default: assert(cm->bit_depth == VPX_BITS_12); this_error >>= 8; break; } } #endif // CONFIG_VP9_HIGHBITDEPTH vpx_clear_system_state(); log_intra = log(this_error + 1.0); if (log_intra < 10.0) { mb_intra_factor = 1.0 + ((10.0 - log_intra) * 0.05); fp_acc_data->intra_factor += mb_intra_factor; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = mb_intra_factor; } else { fp_acc_data->intra_factor += 1.0; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_intra_factor = 1.0; } #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) level_sample = CONVERT_TO_SHORTPTR(x->plane[0].src.buf)[0]; else level_sample = x->plane[0].src.buf[0]; #else level_sample = x->plane[0].src.buf[0]; #endif if ((level_sample < DARK_THRESH) && (log_intra < 9.0)) { mb_brightness_factor = 1.0 + (0.01 * (DARK_THRESH - level_sample)); fp_acc_data->brightness_factor += mb_brightness_factor; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = mb_brightness_factor; } else { fp_acc_data->brightness_factor += 1.0; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_brightness_factor = 1.0; } // Intrapenalty below deals with situations where the intra and inter // error scores are very low (e.g. a plain black frame). // We do not have special cases in first pass for 0,0 and nearest etc so // all inter modes carry an overhead cost estimate for the mv. // When the error score is very low this causes us to pick all or lots of // INTRA modes and throw lots of key frames. // This penalty adds a cost matching that of a 0,0 mv to the intra case. this_error += intrapenalty; // Accumulate the intra error. fp_acc_data->intra_error += (int64_t)this_error; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { // initialization cpi->twopass.frame_mb_stats_buf[mb_index] = 0; } #endif // Set up limit values for motion vectors to prevent them extending // outside the UMV borders. x->mv_limits.col_min = -((mb_col * 16) + BORDER_MV_PIXELS_B16); x->mv_limits.col_max = ((cm->mb_cols - 1 - mb_col) * 16) + BORDER_MV_PIXELS_B16; // Other than for the first frame do a motion search. if (cm->current_video_frame > 0) { int tmp_err, motion_error, this_motion_error, raw_motion_error; // Assume 0,0 motion with no mv overhead. MV mv = { 0, 0 }, tmp_mv = { 0, 0 }; struct buf_2d unscaled_last_source_buf_2d; vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[bsize]; xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); this_motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &xd->plane[0].pre[0], 8); } else { motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); this_motion_error = motion_error; } #else motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); this_motion_error = motion_error; #endif // CONFIG_VP9_HIGHBITDEPTH // Compute the motion error of the 0,0 motion using the last source // frame as the reference. Skip the further motion search on // reconstructed frame if this error is very small. unscaled_last_source_buf_2d.buf = cpi->unscaled_last_source->y_buffer + recon_yoffset; unscaled_last_source_buf_2d.stride = cpi->unscaled_last_source->y_stride; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { raw_motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &unscaled_last_source_buf_2d, xd->bd); } else { raw_motion_error = get_prediction_error(bsize, &x->plane[0].src, &unscaled_last_source_buf_2d); } #else raw_motion_error = get_prediction_error(bsize, &x->plane[0].src, &unscaled_last_source_buf_2d); #endif // CONFIG_VP9_HIGHBITDEPTH if (raw_motion_error > NZ_MOTION_PENALTY) { // Test last reference frame using the previous best mv as the // starting point (best reference) for the search. first_pass_motion_search(cpi, x, best_ref_mv, &mv, &motion_error); v_fn_ptr.vf = get_block_variance_fn(bsize); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { v_fn_ptr.vf = highbd_get_block_variance_fn(bsize, 8); } #endif // CONFIG_VP9_HIGHBITDEPTH this_motion_error = vp9_get_mvpred_var(x, &mv, best_ref_mv, &v_fn_ptr, 0); // If the current best reference mv is not centered on 0,0 then do a // 0,0 based search as well. if (!is_zero_mv(best_ref_mv)) { tmp_err = INT_MAX; first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &tmp_err); if (tmp_err < motion_error) { motion_error = tmp_err; mv = tmp_mv; this_motion_error = vp9_get_mvpred_var(x, &tmp_mv, &zero_mv, &v_fn_ptr, 0); } } // Search in an older reference frame. if ((cm->current_video_frame > 1) && gld_yv12 != NULL) { // Assume 0,0 motion with no mv overhead. int gf_motion_error; xd->plane[0].pre[0].buf = gld_yv12->y_buffer + recon_yoffset; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { gf_motion_error = highbd_get_prediction_error( bsize, &x->plane[0].src, &xd->plane[0].pre[0], xd->bd); } else { gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); } #else gf_motion_error = get_prediction_error(bsize, &x->plane[0].src, &xd->plane[0].pre[0]); #endif // CONFIG_VP9_HIGHBITDEPTH first_pass_motion_search(cpi, x, &zero_mv, &tmp_mv, &gf_motion_error); if (gf_motion_error < motion_error && gf_motion_error < this_error) ++(fp_acc_data->second_ref_count); // Reset to last frame as reference buffer. xd->plane[0].pre[0].buf = first_ref_buf->y_buffer + recon_yoffset; xd->plane[1].pre[0].buf = first_ref_buf->u_buffer + recon_uvoffset; xd->plane[2].pre[0].buf = first_ref_buf->v_buffer + recon_uvoffset; // In accumulating a score for the older reference frame take the // best of the motion predicted score and the intra coded error // (just as will be done for) accumulation of "coded_error" for // the last frame. if (gf_motion_error < this_error) fp_acc_data->sr_coded_error += gf_motion_error; else fp_acc_data->sr_coded_error += this_error; } else { fp_acc_data->sr_coded_error += motion_error; } } else { fp_acc_data->sr_coded_error += motion_error; } // Start by assuming that intra mode is best. best_ref_mv->row = 0; best_ref_mv->col = 0; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { // intra prediction statistics cpi->twopass.frame_mb_stats_buf[mb_index] = 0; cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_DCINTRA_MASK; cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; if (this_error > FPMB_ERROR_LARGE_TH) { cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK; } else if (this_error < FPMB_ERROR_SMALL_TH) { cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK; } } #endif if (motion_error <= this_error) { vpx_clear_system_state(); // Keep a count of cases where the inter and intra were very close // and very low. This helps with scene cut detection for example in // cropped clips with black bars at the sides or top and bottom. if (((this_error - intrapenalty) * 9 <= motion_error * 10) && (this_error < (2 * intrapenalty))) { fp_acc_data->neutral_count += 1.0; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = 1.0; // Also track cases where the intra is not much worse than the inter // and use this in limiting the GF/arf group length. } else if ((this_error > NCOUNT_INTRA_THRESH) && (this_error < (NCOUNT_INTRA_FACTOR * motion_error))) { mb_neutral_count = (double)motion_error / DOUBLE_DIVIDE_CHECK((double)this_error); fp_acc_data->neutral_count += mb_neutral_count; if (cpi->row_mt_bit_exact) cpi->twopass.fp_mb_float_stats[mb_index].frame_mb_neutral_count = mb_neutral_count; } mv.row *= 8; mv.col *= 8; this_error = motion_error; xd->mi[0]->mode = NEWMV; xd->mi[0]->mv[0].as_mv = mv; xd->mi[0]->tx_size = TX_4X4; xd->mi[0]->ref_frame[0] = LAST_FRAME; xd->mi[0]->ref_frame[1] = NONE; vp9_build_inter_predictors_sby(xd, mb_row << 1, mb_col << 1, bsize); vp9_encode_sby_pass1(x, bsize); fp_acc_data->sum_mvr += mv.row; fp_acc_data->sum_mvr_abs += abs(mv.row); fp_acc_data->sum_mvc += mv.col; fp_acc_data->sum_mvc_abs += abs(mv.col); fp_acc_data->sum_mvrs += mv.row * mv.row; fp_acc_data->sum_mvcs += mv.col * mv.col; ++(fp_acc_data->intercount); *best_ref_mv = mv; #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { // inter prediction statistics cpi->twopass.frame_mb_stats_buf[mb_index] = 0; cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_DCINTRA_MASK; cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_ZERO_MASK; if (this_error > FPMB_ERROR_LARGE_TH) { cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_LARGE_MASK; } else if (this_error < FPMB_ERROR_SMALL_TH) { cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_ERROR_SMALL_MASK; } } #endif if (!is_zero_mv(&mv)) { ++(fp_acc_data->mvcount); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { cpi->twopass.frame_mb_stats_buf[mb_index] &= ~FPMB_MOTION_ZERO_MASK; // check estimated motion direction if (mv.as_mv.col > 0 && mv.as_mv.col >= abs(mv.as_mv.row)) { // right direction cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_RIGHT_MASK; } else if (mv.as_mv.row < 0 && abs(mv.as_mv.row) >= abs(mv.as_mv.col)) { // up direction cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_UP_MASK; } else if (mv.as_mv.col < 0 && abs(mv.as_mv.col) >= abs(mv.as_mv.row)) { // left direction cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_LEFT_MASK; } else { // down direction cpi->twopass.frame_mb_stats_buf[mb_index] |= FPMB_MOTION_DOWN_MASK; } } #endif // Does the row vector point inwards or outwards? if (mb_row < cm->mb_rows / 2) { if (mv.row > 0) --(fp_acc_data->sum_in_vectors); else if (mv.row < 0) ++(fp_acc_data->sum_in_vectors); } else if (mb_row > cm->mb_rows / 2) { if (mv.row > 0) ++(fp_acc_data->sum_in_vectors); else if (mv.row < 0) --(fp_acc_data->sum_in_vectors); } // Does the col vector point inwards or outwards? if (mb_col < cm->mb_cols / 2) { if (mv.col > 0) --(fp_acc_data->sum_in_vectors); else if (mv.col < 0) ++(fp_acc_data->sum_in_vectors); } else if (mb_col > cm->mb_cols / 2) { if (mv.col > 0) ++(fp_acc_data->sum_in_vectors); else if (mv.col < 0) --(fp_acc_data->sum_in_vectors); } } if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; } } else { // Intra < inter error if (this_intra_error < scaled_low_intra_thresh) { fp_acc_data->frame_noise_energy += fp_estimate_block_noise(x, bsize); if (this_motion_error < scaled_low_intra_thresh) { fp_acc_data->intra_count_low += 1.0; } else { fp_acc_data->intra_count_high += 1.0; } } else { fp_acc_data->frame_noise_energy += (int64_t)SECTION_NOISE_DEF; fp_acc_data->intra_count_high += 1.0; } } } else { fp_acc_data->sr_coded_error += (int64_t)this_error; } fp_acc_data->coded_error += (int64_t)this_error; recon_yoffset += 16; recon_uvoffset += uv_mb_height; // Accumulate row level stats to the corresponding tile stats if (cpi->row_mt && mb_col == mb_col_end - 1) accumulate_fp_mb_row_stat(tile_data, fp_acc_data); (*(cpi->row_mt_sync_write_ptr))(&tile_data->row_mt_sync, mb_row, c, num_mb_cols); } vpx_clear_system_state(); } static void first_pass_encode(VP9_COMP *cpi, FIRSTPASS_DATA *fp_acc_data) { VP9_COMMON *const cm = &cpi->common; int mb_row; TileDataEnc tile_data; TileInfo *tile = &tile_data.tile_info; MV zero_mv = { 0, 0 }; MV best_ref_mv; // Tiling is ignored in the first pass. vp9_tile_init(tile, cm, 0, 0); for (mb_row = 0; mb_row < cm->mb_rows; ++mb_row) { best_ref_mv = zero_mv; vp9_first_pass_encode_tile_mb_row(cpi, &cpi->td, fp_acc_data, &tile_data, &best_ref_mv, mb_row); } } void vp9_first_pass(VP9_COMP *cpi, const struct lookahead_entry *source) { MACROBLOCK *const x = &cpi->td.mb; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; TWO_PASS *twopass = &cpi->twopass; YV12_BUFFER_CONFIG *const lst_yv12 = get_ref_frame_buffer(cpi, LAST_FRAME); YV12_BUFFER_CONFIG *gld_yv12 = get_ref_frame_buffer(cpi, GOLDEN_FRAME); YV12_BUFFER_CONFIG *const new_yv12 = get_frame_new_buffer(cm); const YV12_BUFFER_CONFIG *first_ref_buf = lst_yv12; BufferPool *const pool = cm->buffer_pool; FIRSTPASS_DATA fp_temp_data; FIRSTPASS_DATA *fp_acc_data = &fp_temp_data; vpx_clear_system_state(); vp9_zero(fp_temp_data); fp_acc_data->image_data_start_row = INVALID_ROW; // First pass code requires valid last and new frame buffers. assert(new_yv12 != NULL); assert(frame_is_intra_only(cm) || (lst_yv12 != NULL)); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { vp9_zero_array(cpi->twopass.frame_mb_stats_buf, cm->initial_mbs); } #endif set_first_pass_params(cpi); vp9_set_quantizer(cm, find_fp_qindex(cm->bit_depth)); vp9_setup_block_planes(&x->e_mbd, cm->subsampling_x, cm->subsampling_y); vp9_setup_src_planes(x, cpi->Source, 0, 0); vp9_setup_dst_planes(xd->plane, new_yv12, 0, 0); if (!frame_is_intra_only(cm)) { vp9_setup_pre_planes(xd, 0, first_ref_buf, 0, 0, NULL); } xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; vp9_frame_init_quantizer(cpi); x->skip_recode = 0; vp9_init_mv_probs(cm); vp9_initialize_rd_consts(cpi); cm->log2_tile_rows = 0; if (cpi->row_mt_bit_exact && cpi->twopass.fp_mb_float_stats == NULL) CHECK_MEM_ERROR( cm, cpi->twopass.fp_mb_float_stats, vpx_calloc(cm->MBs * sizeof(*cpi->twopass.fp_mb_float_stats), 1)); { FIRSTPASS_STATS fps; TileDataEnc *first_tile_col; if (!cpi->row_mt) { cm->log2_tile_cols = 0; cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read_dummy; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write_dummy; first_pass_encode(cpi, fp_acc_data); first_pass_stat_calc(cpi, &fps, fp_acc_data); } else { cpi->row_mt_sync_read_ptr = vp9_row_mt_sync_read; cpi->row_mt_sync_write_ptr = vp9_row_mt_sync_write; if (cpi->row_mt_bit_exact) { cm->log2_tile_cols = 0; vp9_zero_array(cpi->twopass.fp_mb_float_stats, cm->MBs); } vp9_encode_fp_row_mt(cpi); first_tile_col = &cpi->tile_data[0]; if (cpi->row_mt_bit_exact) accumulate_floating_point_stats(cpi, first_tile_col); first_pass_stat_calc(cpi, &fps, &(first_tile_col->fp_data)); } // Dont allow a value of 0 for duration. // (Section duration is also defaulted to minimum of 1.0). fps.duration = VPXMAX(1.0, (double)(source->ts_end - source->ts_start)); // Don't want to do output stats with a stack variable! twopass->this_frame_stats = fps; output_stats(&twopass->this_frame_stats); accumulate_stats(&twopass->total_stats, &fps); #if CONFIG_FP_MB_STATS if (cpi->use_fp_mb_stats) { output_fpmb_stats(twopass->frame_mb_stats_buf, cm, cpi->output_pkt_list); } #endif } // Copy the previous Last Frame back into gf and and arf buffers if // the prediction is good enough... but also don't allow it to lag too far. if ((twopass->sr_update_lag > 3) || ((cm->current_video_frame > 0) && (twopass->this_frame_stats.pcnt_inter > 0.20) && ((twopass->this_frame_stats.intra_error / DOUBLE_DIVIDE_CHECK(twopass->this_frame_stats.coded_error)) > 2.0))) { if (gld_yv12 != NULL) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); } twopass->sr_update_lag = 1; } else { ++twopass->sr_update_lag; } vpx_extend_frame_borders(new_yv12); // The frame we just compressed now becomes the last frame. ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->lst_fb_idx], cm->new_fb_idx); // Special case for the first frame. Copy into the GF buffer as a second // reference. if (cm->current_video_frame == 0 && cpi->gld_fb_idx != INVALID_IDX) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[cpi->gld_fb_idx], cm->ref_frame_map[cpi->lst_fb_idx]); } // Use this to see what the first pass reconstruction looks like. if (0) { char filename[512]; FILE *recon_file; snprintf(filename, sizeof(filename), "enc%04d.yuv", (int)cm->current_video_frame); if (cm->current_video_frame == 0) recon_file = fopen(filename, "wb"); else recon_file = fopen(filename, "ab"); (void)fwrite(lst_yv12->buffer_alloc, lst_yv12->frame_size, 1, recon_file); fclose(recon_file); } ++cm->current_video_frame; if (cpi->use_svc) vp9_inc_frame_in_layer(cpi); } static const double q_pow_term[(QINDEX_RANGE >> 5) + 1] = { 0.65, 0.70, 0.75, 0.85, 0.90, 0.90, 0.90, 1.00, 1.25 }; static double calc_correction_factor(double err_per_mb, double err_divisor, int q) { const double error_term = err_per_mb / DOUBLE_DIVIDE_CHECK(err_divisor); const int index = q >> 5; double power_term; assert((index >= 0) && (index < (QINDEX_RANGE >> 5))); // Adjustment based on quantizer to the power term. power_term = q_pow_term[index] + (((q_pow_term[index + 1] - q_pow_term[index]) * (q % 32)) / 32.0); // Calculate correction factor. if (power_term < 1.0) assert(error_term >= 0.0); return fclamp(pow(error_term, power_term), 0.05, 5.0); } static double wq_err_divisor(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; unsigned int screen_area = (cm->width * cm->height); // Use a different error per mb factor for calculating boost for // different formats. if (screen_area <= 640 * 360) { return 115.0; } else if (screen_area < 1280 * 720) { return 125.0; } else if (screen_area <= 1920 * 1080) { return 130.0; } else if (screen_area < 3840 * 2160) { return 150.0; } // Fall through to here only for 4K and above. return 200.0; } #define NOISE_FACTOR_MIN 0.9 #define NOISE_FACTOR_MAX 1.1 static int get_twopass_worst_quality(VP9_COMP *cpi, const double section_err, double inactive_zone, double section_noise, int section_target_bandwidth) { const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; double last_group_rate_err; // Clamp the target rate to VBR min / max limts. const int target_rate = vp9_rc_clamp_pframe_target_size(cpi, section_target_bandwidth); double noise_factor = pow((section_noise / SECTION_NOISE_DEF), 0.5); noise_factor = fclamp(noise_factor, NOISE_FACTOR_MIN, NOISE_FACTOR_MAX); inactive_zone = fclamp(inactive_zone, 0.0, 1.0); // TODO(jimbankoski): remove #if here or below when this has been // well tested. #if CONFIG_ALWAYS_ADJUST_BPM // based on recent history adjust expectations of bits per macroblock. last_group_rate_err = (double)twopass->rolling_arf_group_actual_bits / DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits); last_group_rate_err = VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err)); twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0; twopass->bpm_factor = VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor)); #endif if (target_rate <= 0) { return rc->worst_quality; // Highest value allowed } else { const int num_mbs = (cpi->oxcf.resize_mode != RESIZE_NONE) ? cpi->initial_mbs : cpi->common.MBs; const double active_pct = VPXMAX(0.01, 1.0 - inactive_zone); const int active_mbs = (int)VPXMAX(1, (double)num_mbs * active_pct); const double av_err_per_mb = section_err / active_pct; const double speed_term = 1.0 + 0.04 * oxcf->speed; const int target_norm_bits_per_mb = (int)(((uint64_t)target_rate << BPER_MB_NORMBITS) / active_mbs); int q; // TODO(jimbankoski): remove #if here or above when this has been // well tested. #if !CONFIG_ALWAYS_ADJUST_BPM // based on recent history adjust expectations of bits per macroblock. last_group_rate_err = (double)twopass->rolling_arf_group_actual_bits / DOUBLE_DIVIDE_CHECK((double)twopass->rolling_arf_group_target_bits); last_group_rate_err = VPXMAX(0.25, VPXMIN(4.0, last_group_rate_err)); twopass->bpm_factor *= (3.0 + last_group_rate_err) / 4.0; twopass->bpm_factor = VPXMAX(0.25, VPXMIN(4.0, twopass->bpm_factor)); #endif // Try and pick a max Q that will be high enough to encode the // content at the given rate. for (q = rc->best_quality; q < rc->worst_quality; ++q) { const double factor = calc_correction_factor(av_err_per_mb, wq_err_divisor(cpi), q); const int bits_per_mb = vp9_rc_bits_per_mb( INTER_FRAME, q, factor * speed_term * cpi->twopass.bpm_factor * noise_factor, cpi->common.bit_depth); if (bits_per_mb <= target_norm_bits_per_mb) break; } // Restriction on active max q for constrained quality mode. if (cpi->oxcf.rc_mode == VPX_CQ) q = VPXMAX(q, oxcf->cq_level); return q; } } static void setup_rf_level_maxq(VP9_COMP *cpi) { int i; RATE_CONTROL *const rc = &cpi->rc; for (i = INTER_NORMAL; i < RATE_FACTOR_LEVELS; ++i) { int qdelta = vp9_frame_type_qdelta(cpi, i, rc->worst_quality); rc->rf_level_maxq[i] = VPXMAX(rc->worst_quality + qdelta, rc->best_quality); } } static void init_subsampling(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; const int w = cm->width; const int h = cm->height; int i; for (i = 0; i < FRAME_SCALE_STEPS; ++i) { // Note: Frames with odd-sized dimensions may result from this scaling. rc->frame_width[i] = (w * 16) / frame_scale_factor[i]; rc->frame_height[i] = (h * 16) / frame_scale_factor[i]; } setup_rf_level_maxq(cpi); } void calculate_coded_size(VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height) { RATE_CONTROL *const rc = &cpi->rc; *scaled_frame_width = rc->frame_width[rc->frame_size_selector]; *scaled_frame_height = rc->frame_height[rc->frame_size_selector]; } void vp9_init_second_pass(VP9_COMP *cpi) { VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; double frame_rate; FIRSTPASS_STATS *stats; zero_stats(&twopass->total_stats); zero_stats(&twopass->total_left_stats); if (!twopass->stats_in_end) return; stats = &twopass->total_stats; *stats = *twopass->stats_in_end; twopass->total_left_stats = *stats; // Scan the first pass file and calculate a modified score for each // frame that is used to distribute bits. The modified score is assumed // to provide a linear basis for bit allocation. I.e a frame A with a score // that is double that of frame B will be allocated 2x as many bits. { double modified_score_total = 0.0; const FIRSTPASS_STATS *s = twopass->stats_in; double av_err; if (oxcf->vbr_corpus_complexity) { twopass->mean_mod_score = (double)oxcf->vbr_corpus_complexity / 10.0; av_err = get_distribution_av_err(cpi, twopass); } else { av_err = get_distribution_av_err(cpi, twopass); // The first scan is unclamped and gives a raw average. while (s < twopass->stats_in_end) { modified_score_total += calculate_mod_frame_score(cpi, oxcf, s, av_err); ++s; } // The average error from this first scan is used to define the midpoint // error for the rate distribution function. twopass->mean_mod_score = modified_score_total / DOUBLE_DIVIDE_CHECK(stats->count); } // Second scan using clamps based on the previous cycle average. // This may modify the total and average somewhat but we dont bother with // further itterations. modified_score_total = 0.0; s = twopass->stats_in; while (s < twopass->stats_in_end) { modified_score_total += calculate_norm_frame_score(cpi, twopass, oxcf, s, av_err); ++s; } twopass->normalized_score_left = modified_score_total; // If using Corpus wide VBR mode then update the clip target bandwidth to // reflect how the clip compares to the rest of the corpus. if (oxcf->vbr_corpus_complexity) { oxcf->target_bandwidth = (int64_t)((double)oxcf->target_bandwidth * (twopass->normalized_score_left / stats->count)); } #if COMPLEXITY_STATS_OUTPUT { FILE *compstats; compstats = fopen("complexity_stats.stt", "a"); fprintf(compstats, "%10.3lf\n", twopass->normalized_score_left / stats->count); fclose(compstats); } #endif } frame_rate = 10000000.0 * stats->count / stats->duration; // Each frame can have a different duration, as the frame rate in the source // isn't guaranteed to be constant. The frame rate prior to the first frame // encoded in the second pass is a guess. However, the sum duration is not. // It is calculated based on the actual durations of all frames from the // first pass. vp9_new_framerate(cpi, frame_rate); twopass->bits_left = (int64_t)(stats->duration * oxcf->target_bandwidth / 10000000.0); // This variable monitors how far behind the second ref update is lagging. twopass->sr_update_lag = 1; // Reset the vbr bits off target counters rc->vbr_bits_off_target = 0; rc->vbr_bits_off_target_fast = 0; rc->rate_error_estimate = 0; // Static sequence monitor variables. twopass->kf_zeromotion_pct = 100; twopass->last_kfgroup_zeromotion_pct = 100; // Initialize bits per macro_block estimate correction factor. twopass->bpm_factor = 1.0; // Initialize actual and target bits counters for ARF groups so that // at the start we have a neutral bpm adjustment. twopass->rolling_arf_group_target_bits = 1; twopass->rolling_arf_group_actual_bits = 1; if (oxcf->resize_mode != RESIZE_NONE) { init_subsampling(cpi); } // Initialize the arnr strangth adjustment to 0 twopass->arnr_strength_adjustment = 0; } #define SR_DIFF_PART 0.0015 #define INTRA_PART 0.005 #define DEFAULT_DECAY_LIMIT 0.75 #define LOW_SR_DIFF_TRHESH 0.1 #define SR_DIFF_MAX 128.0 #define LOW_CODED_ERR_PER_MB 10.0 #define NCOUNT_FRAME_II_THRESH 6.0 static double get_sr_decay_rate(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *frame) { double sr_diff = (frame->sr_coded_error - frame->coded_error); double sr_decay = 1.0; double modified_pct_inter; double modified_pcnt_intra; const double motion_amplitude_part = frame->pcnt_motion * ((frame->mvc_abs + frame->mvr_abs) / (frame_info->frame_height + frame_info->frame_width)); modified_pct_inter = frame->pcnt_inter; if ((frame->coded_error > LOW_CODED_ERR_PER_MB) && ((frame->intra_error / DOUBLE_DIVIDE_CHECK(frame->coded_error)) < (double)NCOUNT_FRAME_II_THRESH)) { modified_pct_inter = frame->pcnt_inter + frame->pcnt_intra_low - frame->pcnt_neutral; } modified_pcnt_intra = 100 * (1.0 - modified_pct_inter); if ((sr_diff > LOW_SR_DIFF_TRHESH)) { sr_diff = VPXMIN(sr_diff, SR_DIFF_MAX); sr_decay = 1.0 - (SR_DIFF_PART * sr_diff) - motion_amplitude_part - (INTRA_PART * modified_pcnt_intra); } return VPXMAX(sr_decay, DEFAULT_DECAY_LIMIT); } // This function gives an estimate of how badly we believe the prediction // quality is decaying from frame to frame. static double get_zero_motion_factor(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *frame_stats) { const double zero_motion_pct = frame_stats->pcnt_inter - frame_stats->pcnt_motion; double sr_decay = get_sr_decay_rate(frame_info, frame_stats); return VPXMIN(sr_decay, zero_motion_pct); } #define ZM_POWER_FACTOR 0.75 static double get_prediction_decay_rate(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *frame_stats) { const double sr_decay_rate = get_sr_decay_rate(frame_info, frame_stats); const double zero_motion_factor = (0.95 * pow((frame_stats->pcnt_inter - frame_stats->pcnt_motion), ZM_POWER_FACTOR)); return VPXMAX(zero_motion_factor, (sr_decay_rate + ((1.0 - sr_decay_rate) * zero_motion_factor))); } static int get_show_idx(const TWO_PASS *twopass) { return (int)(twopass->stats_in - twopass->stats_in_start); } // Function to test for a condition where a complex transition is followed // by a static section. For example in slide shows where there is a fade // between slides. This is to help with more optimal kf and gf positioning. static int check_transition_to_still(const FIRST_PASS_INFO *first_pass_info, int show_idx, int still_interval) { int j; int num_frames = fps_get_num_frames(first_pass_info); if (show_idx + still_interval > num_frames) { return 0; } // Look ahead a few frames to see if static condition persists... for (j = 0; j < still_interval; ++j) { const FIRSTPASS_STATS *stats = fps_get_frame_stats(first_pass_info, show_idx + j); if (stats->pcnt_inter - stats->pcnt_motion < 0.999) break; } // Only if it does do we signal a transition to still. return j == still_interval; } // This function detects a flash through the high relative pcnt_second_ref // score in the frame following a flash frame. The offset passed in should // reflect this. static int detect_flash_from_frame_stats(const FIRSTPASS_STATS *frame_stats) { // What we are looking for here is a situation where there is a // brief break in prediction (such as a flash) but subsequent frames // are reasonably well predicted by an earlier (pre flash) frame. // The recovery after a flash is indicated by a high pcnt_second_ref // useage or a second ref coded error notabley lower than the last // frame coded error. if (frame_stats == NULL) { return 0; } return (frame_stats->sr_coded_error < frame_stats->coded_error) || ((frame_stats->pcnt_second_ref > frame_stats->pcnt_inter) && (frame_stats->pcnt_second_ref >= 0.5)); } static int detect_flash(const TWO_PASS *twopass, int offset) { const FIRSTPASS_STATS *const next_frame = read_frame_stats(twopass, offset); return detect_flash_from_frame_stats(next_frame); } // Update the motion related elements to the GF arf boost calculation. static void accumulate_frame_motion_stats(const FIRSTPASS_STATS *stats, double *mv_in_out, double *mv_in_out_accumulator, double *abs_mv_in_out_accumulator, double *mv_ratio_accumulator) { const double pct = stats->pcnt_motion; // Accumulate Motion In/Out of frame stats. *mv_in_out = stats->mv_in_out_count * pct; *mv_in_out_accumulator += *mv_in_out; *abs_mv_in_out_accumulator += fabs(*mv_in_out); // Accumulate a measure of how uniform (or conversely how random) the motion // field is (a ratio of abs(mv) / mv). if (pct > 0.05) { const double mvr_ratio = fabs(stats->mvr_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVr)); const double mvc_ratio = fabs(stats->mvc_abs) / DOUBLE_DIVIDE_CHECK(fabs(stats->MVc)); *mv_ratio_accumulator += pct * (mvr_ratio < stats->mvr_abs ? mvr_ratio : stats->mvr_abs); *mv_ratio_accumulator += pct * (mvc_ratio < stats->mvc_abs ? mvc_ratio : stats->mvc_abs); } } #define BASELINE_ERR_PER_MB 12500.0 #define GF_MAX_BOOST 96.0 static double calc_frame_boost(const FRAME_INFO *frame_info, const FIRSTPASS_STATS *this_frame, int avg_frame_qindex, double this_frame_mv_in_out) { double frame_boost; const double lq = vp9_convert_qindex_to_q(avg_frame_qindex, frame_info->bit_depth); const double boost_q_correction = VPXMIN((0.5 + (lq * 0.015)), 1.5); const double active_area = calculate_active_area(frame_info, this_frame); // Underlying boost factor is based on inter error ratio. frame_boost = (BASELINE_ERR_PER_MB * active_area) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); // Small adjustment for cases where there is a zoom out if (this_frame_mv_in_out > 0.0) frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); // Q correction and scalling frame_boost = frame_boost * boost_q_correction; return VPXMIN(frame_boost, GF_MAX_BOOST * boost_q_correction); } static double kf_err_per_mb(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; unsigned int screen_area = (cm->width * cm->height); // Use a different error per mb factor for calculating boost for // different formats. if (screen_area < 1280 * 720) { return 2000.0; } else if (screen_area < 1920 * 1080) { return 500.0; } return 250.0; } static double calc_kf_frame_boost(VP9_COMP *cpi, const FIRSTPASS_STATS *this_frame, double *sr_accumulator, double this_frame_mv_in_out, double max_boost) { double frame_boost; const double lq = vp9_convert_qindex_to_q( cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth); const double boost_q_correction = VPXMIN((0.50 + (lq * 0.015)), 2.00); const double active_area = calculate_active_area(&cpi->frame_info, this_frame); // Underlying boost factor is based on inter error ratio. frame_boost = (kf_err_per_mb(cpi) * active_area) / DOUBLE_DIVIDE_CHECK(this_frame->coded_error + *sr_accumulator); // Update the accumulator for second ref error difference. // This is intended to give an indication of how much the coded error is // increasing over time. *sr_accumulator += (this_frame->sr_coded_error - this_frame->coded_error); *sr_accumulator = VPXMAX(0.0, *sr_accumulator); // Small adjustment for cases where there is a zoom out if (this_frame_mv_in_out > 0.0) frame_boost += frame_boost * (this_frame_mv_in_out * 2.0); // Q correction and scaling // The 40.0 value here is an experimentally derived baseline minimum. // This value is in line with the minimum per frame boost in the alt_ref // boost calculation. frame_boost = ((frame_boost + 40.0) * boost_q_correction); return VPXMIN(frame_boost, max_boost * boost_q_correction); } static int compute_arf_boost(const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, int arf_show_idx, int f_frames, int b_frames, int avg_frame_qindex) { int i; double boost_score = 0.0; double mv_ratio_accumulator = 0.0; double decay_accumulator = 1.0; double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; int arf_boost; int flash_detected = 0; // Search forward from the proposed arf/next gf position. for (i = 0; i < f_frames; ++i) { const FIRSTPASS_STATS *this_frame = fps_get_frame_stats(first_pass_info, arf_show_idx + i); const FIRSTPASS_STATS *next_frame = fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. accumulate_frame_motion_stats( this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); // We want to discount the flash frame itself and the recovery // frame that follows as both will have poor scores. flash_detected = detect_flash_from_frame_stats(this_frame) || detect_flash_from_frame_stats(next_frame); // Accumulate the effect of prediction quality decay. if (!flash_detected) { decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, avg_frame_qindex, this_frame_mv_in_out); } arf_boost = (int)boost_score; // Reset for backward looking loop. boost_score = 0.0; mv_ratio_accumulator = 0.0; decay_accumulator = 1.0; this_frame_mv_in_out = 0.0; mv_in_out_accumulator = 0.0; abs_mv_in_out_accumulator = 0.0; // Search backward towards last gf position. for (i = -1; i >= -b_frames; --i) { const FIRSTPASS_STATS *this_frame = fps_get_frame_stats(first_pass_info, arf_show_idx + i); const FIRSTPASS_STATS *next_frame = fps_get_frame_stats(first_pass_info, arf_show_idx + i + 1); if (this_frame == NULL) break; // Update the motion related elements to the boost calculation. accumulate_frame_motion_stats( this_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); // We want to discount the the flash frame itself and the recovery // frame that follows as both will have poor scores. flash_detected = detect_flash_from_frame_stats(this_frame) || detect_flash_from_frame_stats(next_frame); // Cumulative effect of prediction quality decay. if (!flash_detected) { decay_accumulator *= get_prediction_decay_rate(frame_info, this_frame); decay_accumulator = decay_accumulator < MIN_DECAY_FACTOR ? MIN_DECAY_FACTOR : decay_accumulator; } boost_score += decay_accumulator * calc_frame_boost(frame_info, this_frame, avg_frame_qindex, this_frame_mv_in_out); } arf_boost += (int)boost_score; if (arf_boost < ((b_frames + f_frames) * 40)) arf_boost = ((b_frames + f_frames) * 40); arf_boost = VPXMAX(arf_boost, MIN_ARF_GF_BOOST); return arf_boost; } static int calc_arf_boost(VP9_COMP *cpi, int f_frames, int b_frames) { const FRAME_INFO *frame_info = &cpi->frame_info; TWO_PASS *const twopass = &cpi->twopass; const int avg_inter_frame_qindex = cpi->rc.avg_frame_qindex[INTER_FRAME]; int arf_show_idx = get_show_idx(twopass); return compute_arf_boost(frame_info, &twopass->first_pass_info, arf_show_idx, f_frames, b_frames, avg_inter_frame_qindex); } // Calculate a section intra ratio used in setting max loop filter. static int calculate_section_intra_ratio(const FIRSTPASS_STATS *begin, const FIRSTPASS_STATS *end, int section_length) { const FIRSTPASS_STATS *s = begin; double intra_error = 0.0; double coded_error = 0.0; int i = 0; while (s < end && i < section_length) { intra_error += s->intra_error; coded_error += s->coded_error; ++s; ++i; } return (int)(intra_error / DOUBLE_DIVIDE_CHECK(coded_error)); } // Calculate the total bits to allocate in this GF/ARF group. static int64_t calculate_total_gf_group_bits(VP9_COMP *cpi, double gf_group_err) { VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const TWO_PASS *const twopass = &cpi->twopass; const int max_bits = frame_max_bits(rc, &cpi->oxcf); int64_t total_group_bits; const int is_key_frame = frame_is_intra_only(cm); const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; int gop_frames = rc->baseline_gf_interval + rc->source_alt_ref_pending - arf_active_or_kf; // Calculate the bits to be allocated to the group as a whole. if ((twopass->kf_group_bits > 0) && (twopass->kf_group_error_left > 0.0)) { int key_frame_interval = rc->frames_since_key + rc->frames_to_key; int distance_from_next_key_frame = rc->frames_to_key - (rc->baseline_gf_interval + rc->source_alt_ref_pending); int max_gf_bits_bias = rc->avg_frame_bandwidth; double gf_interval_bias_bits_normalize_factor = (double)rc->baseline_gf_interval / 16; total_group_bits = (int64_t)(twopass->kf_group_bits * (gf_group_err / twopass->kf_group_error_left)); // TODO(ravi): Experiment with different values of max_gf_bits_bias total_group_bits += (int64_t)((double)distance_from_next_key_frame / key_frame_interval * max_gf_bits_bias * gf_interval_bias_bits_normalize_factor); } else { total_group_bits = 0; } // Clamp odd edge cases. total_group_bits = (total_group_bits < 0) ? 0 : (total_group_bits > twopass->kf_group_bits) ? twopass->kf_group_bits : total_group_bits; // Clip based on user supplied data rate variability limit. if (total_group_bits > (int64_t)max_bits * gop_frames) total_group_bits = (int64_t)max_bits * gop_frames; return total_group_bits; } // Calculate the number bits extra to assign to boosted frames in a group. static int calculate_boost_bits(int frame_count, int boost, int64_t total_group_bits) { int allocation_chunks; // return 0 for invalid inputs (could arise e.g. through rounding errors) if (!boost || (total_group_bits <= 0) || (frame_count < 0)) return 0; allocation_chunks = (frame_count * NORMAL_BOOST) + boost; // Prevent overflow. if (boost > 1023) { int divisor = boost >> 10; boost /= divisor; allocation_chunks /= divisor; } // Calculate the number of extra bits for use in the boosted frame or frames. return VPXMAX((int)(((int64_t)boost * total_group_bits) / allocation_chunks), 0); } // Used in corpus vbr: Calculates the total normalized group complexity score // for a given number of frames starting at the current position in the stats // file. static double calculate_group_score(VP9_COMP *cpi, double av_score, int frame_count) { VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; const FIRSTPASS_STATS *s = twopass->stats_in; double score_total = 0.0; int i = 0; // We dont ever want to return a 0 score here. if (frame_count == 0) return 1.0; while ((i < frame_count) && (s < twopass->stats_in_end)) { score_total += calculate_norm_frame_score(cpi, twopass, oxcf, s, av_score); ++s; ++i; } return score_total; } static void find_arf_order(VP9_COMP *cpi, GF_GROUP *gf_group, int *index_counter, int depth, int start, int end) { TWO_PASS *twopass = &cpi->twopass; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; FIRSTPASS_STATS fpf_frame; const int mid = (start + end + 1) >> 1; const int min_frame_interval = 2; int idx; // Process regular P frames if ((end - start < min_frame_interval) || (depth > gf_group->allowed_max_layer_depth)) { for (idx = start; idx <= end; ++idx) { gf_group->update_type[*index_counter] = LF_UPDATE; gf_group->arf_src_offset[*index_counter] = 0; gf_group->frame_gop_index[*index_counter] = idx; gf_group->rf_level[*index_counter] = INTER_NORMAL; gf_group->layer_depth[*index_counter] = depth; gf_group->gfu_boost[*index_counter] = NORMAL_BOOST; ++(*index_counter); } gf_group->max_layer_depth = VPXMAX(gf_group->max_layer_depth, depth); return; } assert(abs(mid - start) >= 1 && abs(mid - end) >= 1); // Process ARF frame gf_group->layer_depth[*index_counter] = depth; gf_group->update_type[*index_counter] = ARF_UPDATE; gf_group->arf_src_offset[*index_counter] = mid - start; gf_group->frame_gop_index[*index_counter] = mid; gf_group->rf_level[*index_counter] = GF_ARF_LOW; for (idx = 0; idx <= mid; ++idx) if (EOF == input_stats(twopass, &fpf_frame)) break; gf_group->gfu_boost[*index_counter] = VPXMAX(MIN_ARF_GF_BOOST, calc_arf_boost(cpi, end - mid + 1, mid - start) >> depth); reset_fpf_position(twopass, start_pos); ++(*index_counter); find_arf_order(cpi, gf_group, index_counter, depth + 1, start, mid - 1); gf_group->update_type[*index_counter] = USE_BUF_FRAME; gf_group->arf_src_offset[*index_counter] = 0; gf_group->frame_gop_index[*index_counter] = mid; gf_group->rf_level[*index_counter] = INTER_NORMAL; gf_group->layer_depth[*index_counter] = depth; ++(*index_counter); find_arf_order(cpi, gf_group, index_counter, depth + 1, mid + 1, end); } static INLINE void set_gf_overlay_frame_type(GF_GROUP *gf_group, int frame_index, int source_alt_ref_active) { if (source_alt_ref_active) { gf_group->update_type[frame_index] = OVERLAY_UPDATE; gf_group->rf_level[frame_index] = INTER_NORMAL; gf_group->layer_depth[frame_index] = MAX_ARF_LAYERS - 1; gf_group->gfu_boost[frame_index] = NORMAL_BOOST; } else { gf_group->update_type[frame_index] = GF_UPDATE; gf_group->rf_level[frame_index] = GF_ARF_STD; gf_group->layer_depth[frame_index] = 0; } } static void define_gf_group_structure(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; int frame_index = 0; int key_frame = cpi->common.frame_type == KEY_FRAME; int layer_depth = 1; int gop_frames = rc->baseline_gf_interval - (key_frame || rc->source_alt_ref_pending); gf_group->frame_start = cpi->common.current_video_frame; gf_group->frame_end = gf_group->frame_start + rc->baseline_gf_interval; gf_group->max_layer_depth = 0; gf_group->allowed_max_layer_depth = 0; // For key frames the frame target rate is already set and it // is also the golden frame. // === [frame_index == 0] === if (!key_frame) set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_active); ++frame_index; // === [frame_index == 1] === if (rc->source_alt_ref_pending) { gf_group->update_type[frame_index] = ARF_UPDATE; gf_group->rf_level[frame_index] = GF_ARF_STD; gf_group->layer_depth[frame_index] = layer_depth; gf_group->arf_src_offset[frame_index] = (unsigned char)(rc->baseline_gf_interval - 1); gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; gf_group->max_layer_depth = 1; ++frame_index; ++layer_depth; gf_group->allowed_max_layer_depth = cpi->oxcf.enable_auto_arf; } find_arf_order(cpi, gf_group, &frame_index, layer_depth, 1, gop_frames); set_gf_overlay_frame_type(gf_group, frame_index, rc->source_alt_ref_pending); gf_group->arf_src_offset[frame_index] = 0; gf_group->frame_gop_index[frame_index] = rc->baseline_gf_interval; // Set the frame ops number. gf_group->gf_group_size = frame_index; } static void allocate_gf_group_bits(VP9_COMP *cpi, int64_t gf_group_bits, int gf_arf_bits) { VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS frame_stats; int i; int frame_index = 0; int target_frame_size; int key_frame; const int max_bits = frame_max_bits(&cpi->rc, oxcf); int64_t total_group_bits = gf_group_bits; int mid_frame_idx; int normal_frames; int normal_frame_bits; int last_frame_reduction = 0; double av_score = 1.0; double tot_norm_frame_score = 1.0; double this_frame_score = 1.0; // Define the GF structure and specify int gop_frames = gf_group->gf_group_size; key_frame = cpi->common.frame_type == KEY_FRAME; // For key frames the frame target rate is already set and it // is also the golden frame. // === [frame_index == 0] === if (!key_frame) { gf_group->bit_allocation[frame_index] = rc->source_alt_ref_active ? 0 : gf_arf_bits; } // Deduct the boost bits for arf (or gf if it is not a key frame) // from the group total. if (rc->source_alt_ref_pending || !key_frame) total_group_bits -= gf_arf_bits; ++frame_index; // === [frame_index == 1] === // Store the bits to spend on the ARF if there is one. if (rc->source_alt_ref_pending) { gf_group->bit_allocation[frame_index] = gf_arf_bits; ++frame_index; } // Define middle frame mid_frame_idx = frame_index + (rc->baseline_gf_interval >> 1) - 1; normal_frames = (rc->baseline_gf_interval - 1); if (normal_frames > 1) normal_frame_bits = (int)(total_group_bits / normal_frames); else normal_frame_bits = (int)total_group_bits; gf_group->gfu_boost[1] = rc->gfu_boost; if (cpi->multi_layer_arf) { int idx; int arf_depth_bits[MAX_ARF_LAYERS] = { 0 }; int arf_depth_count[MAX_ARF_LAYERS] = { 0 }; int arf_depth_boost[MAX_ARF_LAYERS] = { 0 }; int total_arfs = 1; // Account for the base layer ARF. for (idx = 0; idx < gop_frames; ++idx) { if (gf_group->update_type[idx] == ARF_UPDATE) { arf_depth_boost[gf_group->layer_depth[idx]] += gf_group->gfu_boost[idx]; ++arf_depth_count[gf_group->layer_depth[idx]]; } } for (idx = 2; idx < MAX_ARF_LAYERS; ++idx) { if (arf_depth_boost[idx] == 0) break; arf_depth_bits[idx] = calculate_boost_bits( rc->baseline_gf_interval - total_arfs - arf_depth_count[idx], arf_depth_boost[idx], total_group_bits); total_group_bits -= arf_depth_bits[idx]; total_arfs += arf_depth_count[idx]; } // offset the base layer arf normal_frames -= (total_arfs - 1); if (normal_frames > 1) normal_frame_bits = (int)(total_group_bits / normal_frames); else normal_frame_bits = (int)total_group_bits; target_frame_size = normal_frame_bits; target_frame_size = clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); // The first layer ARF has its bit allocation assigned. for (idx = frame_index; idx < gop_frames; ++idx) { switch (gf_group->update_type[idx]) { case ARF_UPDATE: gf_group->bit_allocation[idx] = (int)(((int64_t)arf_depth_bits[gf_group->layer_depth[idx]] * gf_group->gfu_boost[idx]) / arf_depth_boost[gf_group->layer_depth[idx]]); break; case USE_BUF_FRAME: gf_group->bit_allocation[idx] = 0; break; default: gf_group->bit_allocation[idx] = target_frame_size; break; } } gf_group->bit_allocation[idx] = 0; return; } if (oxcf->vbr_corpus_complexity) { av_score = get_distribution_av_err(cpi, twopass); tot_norm_frame_score = calculate_group_score(cpi, av_score, normal_frames); } // Allocate bits to the other frames in the group. for (i = 0; i < normal_frames; ++i) { if (EOF == input_stats(twopass, &frame_stats)) break; if (oxcf->vbr_corpus_complexity) { this_frame_score = calculate_norm_frame_score(cpi, twopass, oxcf, &frame_stats, av_score); normal_frame_bits = (int)((double)total_group_bits * (this_frame_score / tot_norm_frame_score)); } target_frame_size = normal_frame_bits; if ((i == (normal_frames - 1)) && (i >= 1)) { last_frame_reduction = normal_frame_bits / 16; target_frame_size -= last_frame_reduction; } target_frame_size = clamp(target_frame_size, 0, VPXMIN(max_bits, (int)total_group_bits)); gf_group->bit_allocation[frame_index] = target_frame_size; ++frame_index; } // Add in some extra bits for the middle frame in the group. gf_group->bit_allocation[mid_frame_idx] += last_frame_reduction; // Note: // We need to configure the frame at the end of the sequence + 1 that will be // the start frame for the next group. Otherwise prior to the call to // vp9_rc_get_second_pass_params() the data will be undefined. } // Adjusts the ARNF filter for a GF group. static void adjust_group_arnr_filter(VP9_COMP *cpi, double section_noise, double section_inter, double section_motion) { TWO_PASS *const twopass = &cpi->twopass; double section_zeromv = section_inter - section_motion; twopass->arnr_strength_adjustment = 0; if (section_noise < 150) { twopass->arnr_strength_adjustment -= 1; if (section_noise < 75) twopass->arnr_strength_adjustment -= 1; } else if (section_noise > 250) twopass->arnr_strength_adjustment += 1; if (section_zeromv > 0.50) twopass->arnr_strength_adjustment += 1; } // Analyse and define a gf/arf group. #define ARF_ABS_ZOOM_THRESH 4.0 #define MAX_GF_BOOST 5400 typedef struct RANGE { int min; int max; } RANGE; static int get_gop_coding_frame_num( int *use_alt_ref, const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, const RATE_CONTROL *rc, int gf_start_show_idx, const RANGE *active_gf_interval, double gop_intra_factor, int lag_in_frames) { double loop_decay_rate = 1.00; double mv_ratio_accumulator = 0.0; double this_frame_mv_in_out = 0.0; double mv_in_out_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; double sr_accumulator = 0.0; // Motion breakout threshold for loop below depends on image size. double mv_ratio_accumulator_thresh = (frame_info->frame_height + frame_info->frame_width) / 4.0; double zero_motion_accumulator = 1.0; int gop_coding_frames; *use_alt_ref = 1; gop_coding_frames = 0; while (gop_coding_frames < rc->static_scene_max_gf_interval && gop_coding_frames < rc->frames_to_key) { const FIRSTPASS_STATS *next_next_frame; const FIRSTPASS_STATS *next_frame; int flash_detected; ++gop_coding_frames; next_frame = fps_get_frame_stats(first_pass_info, gf_start_show_idx + gop_coding_frames); if (next_frame == NULL) { break; } // Test for the case where there is a brief flash but the prediction // quality back to an earlier frame is then restored. next_next_frame = fps_get_frame_stats( first_pass_info, gf_start_show_idx + gop_coding_frames + 1); flash_detected = detect_flash_from_frame_stats(next_next_frame); // Update the motion related elements to the boost calculation. accumulate_frame_motion_stats( next_frame, &this_frame_mv_in_out, &mv_in_out_accumulator, &abs_mv_in_out_accumulator, &mv_ratio_accumulator); // Monitor for static sections. if ((rc->frames_since_key + gop_coding_frames - 1) > 1) { zero_motion_accumulator = VPXMIN(zero_motion_accumulator, get_zero_motion_factor(frame_info, next_frame)); } // Accumulate the effect of prediction quality decay. if (!flash_detected) { double last_loop_decay_rate = loop_decay_rate; loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); // Break clause to detect very still sections after motion. For example, // a static image after a fade or other transition. if (gop_coding_frames > rc->min_gf_interval && loop_decay_rate >= 0.999 && last_loop_decay_rate < 0.9) { int still_interval = 5; if (check_transition_to_still(first_pass_info, gf_start_show_idx + gop_coding_frames, still_interval)) { *use_alt_ref = 0; break; } } // Update the accumulator for second ref error difference. // This is intended to give an indication of how much the coded error is // increasing over time. if (gop_coding_frames == 1) { sr_accumulator += next_frame->coded_error; } else { sr_accumulator += (next_frame->sr_coded_error - next_frame->coded_error); } } // Break out conditions. // Break at maximum of active_gf_interval->max unless almost totally // static. // // Note that the addition of a test of rc->source_alt_ref_active is // deliberate. The effect of this is that after a normal altref group even // if the material is static there will be one normal length GF group // before allowing longer GF groups. The reason for this is that in cases // such as slide shows where slides are separated by a complex transition // such as a fade, the arf group spanning the transition may not be coded // at a very high quality and hence this frame (with its overlay) is a // poor golden frame to use for an extended group. if ((gop_coding_frames >= active_gf_interval->max) && ((zero_motion_accumulator < 0.995) || (rc->source_alt_ref_active))) { break; } if ( // Don't break out with a very short interval. (gop_coding_frames >= active_gf_interval->min) && // If possible dont break very close to a kf ((rc->frames_to_key - gop_coding_frames) >= rc->min_gf_interval) && (gop_coding_frames & 0x01) && (!flash_detected) && ((mv_ratio_accumulator > mv_ratio_accumulator_thresh) || (abs_mv_in_out_accumulator > ARF_ABS_ZOOM_THRESH) || (sr_accumulator > gop_intra_factor * next_frame->intra_error))) { break; } } *use_alt_ref &= zero_motion_accumulator < 0.995; *use_alt_ref &= gop_coding_frames < lag_in_frames; *use_alt_ref &= gop_coding_frames >= rc->min_gf_interval; return gop_coding_frames; } static RANGE get_active_gf_inverval_range( const FRAME_INFO *frame_info, const RATE_CONTROL *rc, int arf_active_or_kf, int gf_start_show_idx, int active_worst_quality, int last_boosted_qindex) { RANGE active_gf_interval; #if CONFIG_RATE_CTRL (void)frame_info; (void)gf_start_show_idx; (void)active_worst_quality; (void)last_boosted_qindex; active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + 2; active_gf_interval.max = 16 + arf_active_or_kf; if ((active_gf_interval.max <= rc->frames_to_key) && (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { active_gf_interval.min = rc->frames_to_key / 2; active_gf_interval.max = rc->frames_to_key / 2; } #else int int_max_q = (int)(vp9_convert_qindex_to_q(active_worst_quality, frame_info->bit_depth)); int q_term = (gf_start_show_idx == 0) ? int_max_q / 32 : (int)(vp9_convert_qindex_to_q(last_boosted_qindex, frame_info->bit_depth) / 6); active_gf_interval.min = rc->min_gf_interval + arf_active_or_kf + VPXMIN(2, int_max_q / 200); active_gf_interval.min = VPXMIN(active_gf_interval.min, rc->max_gf_interval + arf_active_or_kf); // The value chosen depends on the active Q range. At low Q we have // bits to spare and are better with a smaller interval and smaller boost. // At high Q when there are few bits to spare we are better with a longer // interval to spread the cost of the GF. active_gf_interval.max = 11 + arf_active_or_kf + VPXMIN(5, q_term); // Force max GF interval to be odd. active_gf_interval.max = active_gf_interval.max | 0x01; // We have: active_gf_interval.min <= // rc->max_gf_interval + arf_active_or_kf. if (active_gf_interval.max < active_gf_interval.min) { active_gf_interval.max = active_gf_interval.min; } else { active_gf_interval.max = VPXMIN(active_gf_interval.max, rc->max_gf_interval + arf_active_or_kf); } // Would the active max drop us out just before the near the next kf? if ((active_gf_interval.max <= rc->frames_to_key) && (active_gf_interval.max >= (rc->frames_to_key - rc->min_gf_interval))) { active_gf_interval.max = rc->frames_to_key / 2; } active_gf_interval.max = VPXMAX(active_gf_interval.max, active_gf_interval.min); #endif return active_gf_interval; } static int get_arf_layers(int multi_layer_arf, int max_layers, int coding_frame_num) { assert(max_layers <= MAX_ARF_LAYERS); if (multi_layer_arf) { int layers = 0; int i; for (i = coding_frame_num; i > 0; i >>= 1) { ++layers; } layers = VPXMIN(max_layers, layers); return layers; } else { return 1; } } static void define_gf_group(VP9_COMP *cpi, int gf_start_show_idx) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; VP9EncoderConfig *const oxcf = &cpi->oxcf; TWO_PASS *const twopass = &cpi->twopass; const FRAME_INFO *frame_info = &cpi->frame_info; const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; const FIRSTPASS_STATS *const start_pos = twopass->stats_in; int gop_coding_frames; double gf_group_err = 0.0; double gf_group_raw_error = 0.0; double gf_group_noise = 0.0; double gf_group_skip_pct = 0.0; double gf_group_inactive_zone_rows = 0.0; double gf_group_inter = 0.0; double gf_group_motion = 0.0; int allow_alt_ref = is_altref_enabled(cpi); int use_alt_ref; int64_t gf_group_bits; int gf_arf_bits; const int is_key_frame = frame_is_intra_only(cm); // If this is a key frame or the overlay from a previous arf then // the error score / cost of this frame has already been accounted for. const int arf_active_or_kf = is_key_frame || rc->source_alt_ref_active; int is_alt_ref_flash = 0; double gop_intra_factor; int gop_frames; RANGE active_gf_interval; // Reset the GF group data structures unless this is a key // frame in which case it will already have been done. if (is_key_frame == 0) { vp9_zero(twopass->gf_group); } vpx_clear_system_state(); active_gf_interval = get_active_gf_inverval_range( frame_info, rc, arf_active_or_kf, gf_start_show_idx, twopass->active_worst_quality, rc->last_boosted_qindex); if (cpi->multi_layer_arf) { int arf_layers = get_arf_layers(cpi->multi_layer_arf, oxcf->enable_auto_arf, active_gf_interval.max); gop_intra_factor = 1.0 + 0.25 * arf_layers; } else { gop_intra_factor = 1.0; } { gop_coding_frames = get_gop_coding_frame_num( &use_alt_ref, frame_info, first_pass_info, rc, gf_start_show_idx, &active_gf_interval, gop_intra_factor, cpi->oxcf.lag_in_frames); use_alt_ref &= allow_alt_ref; } // Was the group length constrained by the requirement for a new KF? rc->constrained_gf_group = (gop_coding_frames >= rc->frames_to_key) ? 1 : 0; // Should we use the alternate reference frame. if (use_alt_ref) { const int f_frames = (rc->frames_to_key - gop_coding_frames >= gop_coding_frames - 1) ? gop_coding_frames - 1 : VPXMAX(0, rc->frames_to_key - gop_coding_frames); const int b_frames = gop_coding_frames - 1; const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; // TODO(angiebird): figure out why arf's location is assigned this way const int arf_show_idx = VPXMIN(gf_start_show_idx + gop_coding_frames + 1, fps_get_num_frames(first_pass_info)); // Calculate the boost for alt ref. rc->gfu_boost = compute_arf_boost(frame_info, first_pass_info, arf_show_idx, f_frames, b_frames, avg_inter_frame_qindex); rc->source_alt_ref_pending = 1; } else { const int f_frames = gop_coding_frames - 1; const int b_frames = 0; const int avg_inter_frame_qindex = rc->avg_frame_qindex[INTER_FRAME]; // TODO(angiebird): figure out why arf's location is assigned this way const int gld_show_idx = VPXMIN(gf_start_show_idx + 1, fps_get_num_frames(first_pass_info)); const int arf_boost = compute_arf_boost(frame_info, first_pass_info, gld_show_idx, f_frames, b_frames, avg_inter_frame_qindex); rc->gfu_boost = VPXMIN(MAX_GF_BOOST, arf_boost); rc->source_alt_ref_pending = 0; } #define LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR 0.2 rc->arf_active_best_quality_adjustment_factor = 1.0; rc->arf_increase_active_best_quality = 0; if (!is_lossless_requested(&cpi->oxcf)) { if (rc->frames_since_key >= rc->frames_to_key) { // Increase the active best quality in the second half of key frame // interval. rc->arf_active_best_quality_adjustment_factor = LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * (rc->frames_to_key - gop_coding_frames) / (VPXMAX(1, ((rc->frames_to_key + rc->frames_since_key) / 2 - gop_coding_frames))); rc->arf_increase_active_best_quality = 1; } else if ((rc->frames_to_key - gop_coding_frames) > 0) { // Reduce the active best quality in the first half of key frame interval. rc->arf_active_best_quality_adjustment_factor = LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR + (1.0 - LAST_ALR_ACTIVE_BEST_QUALITY_ADJUSTMENT_FACTOR) * (rc->frames_since_key + gop_coding_frames) / (VPXMAX(1, (rc->frames_to_key + rc->frames_since_key) / 2 + gop_coding_frames)); rc->arf_increase_active_best_quality = -1; } } #ifdef AGGRESSIVE_VBR // Limit maximum boost based on interval length. rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 140); #else rc->gfu_boost = VPXMIN((int)rc->gfu_boost, gop_coding_frames * 200); #endif // Cap the ARF boost when perceptual quality AQ mode is enabled. This is // designed to improve the perceptual quality of high value content and to // make consistent quality across consecutive frames. It will hurt objective // quality. if (oxcf->aq_mode == PERCEPTUAL_AQ) rc->gfu_boost = VPXMIN(rc->gfu_boost, MIN_ARF_GF_BOOST); rc->baseline_gf_interval = gop_coding_frames - rc->source_alt_ref_pending; if (rc->source_alt_ref_pending) is_alt_ref_flash = detect_flash(twopass, rc->baseline_gf_interval); { const double av_err = get_distribution_av_err(cpi, twopass); const double mean_mod_score = twopass->mean_mod_score; // If the first frame is a key frame or the overlay from a previous arf then // the error score / cost of this frame has already been accounted for. int start_idx = arf_active_or_kf ? 1 : 0; int j; for (j = start_idx; j < gop_coding_frames; ++j) { int show_idx = gf_start_show_idx + j; const FIRSTPASS_STATS *frame_stats = fps_get_frame_stats(first_pass_info, show_idx); // Accumulate error score of frames in this gf group. gf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, mean_mod_score, av_err); gf_group_raw_error += frame_stats->coded_error; gf_group_noise += frame_stats->frame_noise_energy; gf_group_skip_pct += frame_stats->intra_skip_pct; gf_group_inactive_zone_rows += frame_stats->inactive_zone_rows; gf_group_inter += frame_stats->pcnt_inter; gf_group_motion += frame_stats->pcnt_motion; } } // Calculate the bits to be allocated to the gf/arf group as a whole gf_group_bits = calculate_total_gf_group_bits(cpi, gf_group_err); gop_frames = rc->baseline_gf_interval + rc->source_alt_ref_pending - arf_active_or_kf; // Store the average moise level measured for the group // TODO(any): Experiment with removal of else condition (gop_frames = 0) so // that consumption of group noise energy is based on previous gf group if (gop_frames > 0) twopass->gf_group.group_noise_energy = (int)(gf_group_noise / gop_frames); else twopass->gf_group.group_noise_energy = 0; // Calculate an estimate of the maxq needed for the group. // We are more aggressive about correcting for sections // where there could be significant overshoot than for easier // sections where we do not wish to risk creating an overshoot // of the allocated bit budget. if ((cpi->oxcf.rc_mode != VPX_Q) && (rc->baseline_gf_interval > 1)) { const int vbr_group_bits_per_frame = (int)(gf_group_bits / gop_frames); const double group_av_err = gf_group_raw_error / gop_frames; const double group_av_noise = gf_group_noise / gop_frames; const double group_av_skip_pct = gf_group_skip_pct / gop_frames; const double group_av_inactive_zone = ((gf_group_inactive_zone_rows * 2) / (gop_frames * (double)cm->mb_rows)); int tmp_q = get_twopass_worst_quality( cpi, group_av_err, (group_av_skip_pct + group_av_inactive_zone), group_av_noise, vbr_group_bits_per_frame); twopass->active_worst_quality = (tmp_q + (twopass->active_worst_quality * 3)) >> 2; #if CONFIG_ALWAYS_ADJUST_BPM // Reset rolling actual and target bits counters for ARF groups. twopass->rolling_arf_group_target_bits = 0; twopass->rolling_arf_group_actual_bits = 0; #endif } // Context Adjustment of ARNR filter strength if (rc->baseline_gf_interval > 1) { adjust_group_arnr_filter(cpi, (gf_group_noise / gop_frames), (gf_group_inter / gop_frames), (gf_group_motion / gop_frames)); } else { twopass->arnr_strength_adjustment = 0; } // Calculate the extra bits to be used for boosted frame(s) gf_arf_bits = calculate_boost_bits((rc->baseline_gf_interval - 1), rc->gfu_boost, gf_group_bits); // Adjust KF group bits and error remaining. twopass->kf_group_error_left -= gf_group_err; // Decide GOP structure. define_gf_group_structure(cpi); // Allocate bits to each of the frames in the GF group. allocate_gf_group_bits(cpi, gf_group_bits, gf_arf_bits); // Reset the file position. reset_fpf_position(twopass, start_pos); // Calculate a section intra ratio used in setting max loop filter. twopass->section_intra_rating = calculate_section_intra_ratio( start_pos, twopass->stats_in_end, rc->baseline_gf_interval); if (oxcf->resize_mode == RESIZE_DYNAMIC) { // Default to starting GF groups at normal frame size. cpi->rc.next_frame_size_selector = UNSCALED; } #if !CONFIG_ALWAYS_ADJUST_BPM // Reset rolling actual and target bits counters for ARF groups. twopass->rolling_arf_group_target_bits = 0; twopass->rolling_arf_group_actual_bits = 0; #endif rc->preserve_arf_as_gld = rc->preserve_next_arf_as_gld; rc->preserve_next_arf_as_gld = 0; // If alt ref frame is flash do not set preserve_arf_as_gld if (!is_lossless_requested(&cpi->oxcf) && !cpi->use_svc && cpi->oxcf.aq_mode == NO_AQ && cpi->multi_layer_arf && !is_alt_ref_flash) rc->preserve_next_arf_as_gld = 1; } // Intra / Inter threshold very low #define VERY_LOW_II 1.5 // Clean slide transitions we expect a sharp single frame spike in error. #define ERROR_SPIKE 5.0 // Slide show transition detection. // Tests for case where there is very low error either side of the current frame // but much higher just for this frame. This can help detect key frames in // slide shows even where the slides are pictures of different sizes. // Also requires that intra and inter errors are very similar to help eliminate // harmful false positives. // It will not help if the transition is a fade or other multi-frame effect. static int slide_transition(const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *next_frame) { return (this_frame->intra_error < (this_frame->coded_error * VERY_LOW_II)) && (this_frame->coded_error > (last_frame->coded_error * ERROR_SPIKE)) && (this_frame->coded_error > (next_frame->coded_error * ERROR_SPIKE)); } // This test looks for anomalous changes in the nature of the intra signal // related to the previous and next frame as an indicator for coding a key // frame. This test serves to detect some additional scene cuts, // especially in lowish motion and low contrast sections, that are missed // by the other tests. static int intra_step_transition(const FIRSTPASS_STATS *this_frame, const FIRSTPASS_STATS *last_frame, const FIRSTPASS_STATS *next_frame) { double last_ii_ratio; double this_ii_ratio; double next_ii_ratio; double last_pcnt_intra = 1.0 - last_frame->pcnt_inter; double this_pcnt_intra = 1.0 - this_frame->pcnt_inter; double next_pcnt_intra = 1.0 - next_frame->pcnt_inter; double mod_this_intra = this_pcnt_intra + this_frame->pcnt_neutral; // Calculate ii ratio for this frame last frame and next frame. last_ii_ratio = last_frame->intra_error / DOUBLE_DIVIDE_CHECK(last_frame->coded_error); this_ii_ratio = this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error); next_ii_ratio = next_frame->intra_error / DOUBLE_DIVIDE_CHECK(next_frame->coded_error); // Return true the intra/inter ratio for the current frame is // low but better in the next and previous frame and the relative useage of // intra in the current frame is markedly higher than the last and next frame. if ((this_ii_ratio < 2.0) && (last_ii_ratio > 2.25) && (next_ii_ratio > 2.25) && (this_pcnt_intra > (3 * last_pcnt_intra)) && (this_pcnt_intra > (3 * next_pcnt_intra)) && ((this_pcnt_intra > 0.075) || (mod_this_intra > 0.85))) { return 1; // Very low inter intra ratio (i.e. not much gain from inter coding), most // blocks neutral on coding method and better inter prediction either side } else if ((this_ii_ratio < 1.25) && (mod_this_intra > 0.85) && (this_ii_ratio < last_ii_ratio * 0.9) && (this_ii_ratio < next_ii_ratio * 0.9)) { return 1; } else { return 0; } } // Minimum % intra coding observed in first pass (1.0 = 100%) #define MIN_INTRA_LEVEL 0.25 // Threshold for use of the lagging second reference frame. Scene cuts do not // usually have a high second ref useage. #define SECOND_REF_USEAGE_THRESH 0.2 // Hard threshold where the first pass chooses intra for almost all blocks. // In such a case even if the frame is not a scene cut coding a key frame // may be a good option. #define VERY_LOW_INTER_THRESH 0.05 // Maximum threshold for the relative ratio of intra error score vs best // inter error score. #define KF_II_ERR_THRESHOLD 2.5 #define KF_II_MAX 128.0 #define II_FACTOR 12.5 // Test for very low intra complexity which could cause false key frames #define V_LOW_INTRA 0.5 static int test_candidate_kf(const FIRST_PASS_INFO *first_pass_info, int show_idx) { const FIRSTPASS_STATS *last_frame = fps_get_frame_stats(first_pass_info, show_idx - 1); const FIRSTPASS_STATS *this_frame = fps_get_frame_stats(first_pass_info, show_idx); const FIRSTPASS_STATS *next_frame = fps_get_frame_stats(first_pass_info, show_idx + 1); int is_viable_kf = 0; double pcnt_intra = 1.0 - this_frame->pcnt_inter; // Does the frame satisfy the primary criteria of a key frame? // See above for an explanation of the test criteria. // If so, then examine how well it predicts subsequent frames. detect_flash_from_frame_stats(next_frame); if (!detect_flash_from_frame_stats(this_frame) && !detect_flash_from_frame_stats(next_frame) && (this_frame->pcnt_second_ref < SECOND_REF_USEAGE_THRESH) && ((this_frame->pcnt_inter < VERY_LOW_INTER_THRESH) || (slide_transition(this_frame, last_frame, next_frame)) || (intra_step_transition(this_frame, last_frame, next_frame)) || (((this_frame->coded_error > (next_frame->coded_error * 1.2)) && (this_frame->coded_error > (last_frame->coded_error * 1.2))) && (pcnt_intra > MIN_INTRA_LEVEL) && ((pcnt_intra + this_frame->pcnt_neutral) > 0.5) && ((this_frame->intra_error / DOUBLE_DIVIDE_CHECK(this_frame->coded_error)) < KF_II_ERR_THRESHOLD)))) { int i; double boost_score = 0.0; double old_boost_score = 0.0; double decay_accumulator = 1.0; // Examine how well the key frame predicts subsequent frames. for (i = 0; i < 16; ++i) { const FIRSTPASS_STATS *frame_stats = fps_get_frame_stats(first_pass_info, show_idx + 1 + i); double next_iiratio = (II_FACTOR * frame_stats->intra_error / DOUBLE_DIVIDE_CHECK(frame_stats->coded_error)); if (next_iiratio > KF_II_MAX) next_iiratio = KF_II_MAX; // Cumulative effect of decay in prediction quality. if (frame_stats->pcnt_inter > 0.85) decay_accumulator *= frame_stats->pcnt_inter; else decay_accumulator *= (0.85 + frame_stats->pcnt_inter) / 2.0; // Keep a running total. boost_score += (decay_accumulator * next_iiratio); // Test various breakout clauses. if ((frame_stats->pcnt_inter < 0.05) || (next_iiratio < 1.5) || (((frame_stats->pcnt_inter - frame_stats->pcnt_neutral) < 0.20) && (next_iiratio < 3.0)) || ((boost_score - old_boost_score) < 3.0) || (frame_stats->intra_error < V_LOW_INTRA)) { break; } old_boost_score = boost_score; // Get the next frame details if (show_idx + 1 + i == fps_get_num_frames(first_pass_info) - 1) break; } // If there is tolerable prediction for at least the next 3 frames then // break out else discard this potential key frame and move on if (boost_score > 30.0 && (i > 3)) { is_viable_kf = 1; } else { is_viable_kf = 0; } } return is_viable_kf; } #define FRAMES_TO_CHECK_DECAY 8 #define MIN_KF_TOT_BOOST 300 #define DEFAULT_SCAN_FRAMES_FOR_KF_BOOST 32 #define MAX_SCAN_FRAMES_FOR_KF_BOOST 48 #define MIN_SCAN_FRAMES_FOR_KF_BOOST 32 #define KF_ABS_ZOOM_THRESH 6.0 #ifdef AGGRESSIVE_VBR #define KF_MAX_FRAME_BOOST 80.0 #define MAX_KF_TOT_BOOST 4800 #else #define KF_MAX_FRAME_BOOST 96.0 #define MAX_KF_TOT_BOOST 5400 #endif int vp9_get_frames_to_next_key(const VP9EncoderConfig *oxcf, const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, int kf_show_idx, int min_gf_interval) { double recent_loop_decay[FRAMES_TO_CHECK_DECAY]; int j; int frames_to_key; int max_frames_to_key = first_pass_info->num_frames - kf_show_idx; max_frames_to_key = VPXMIN(max_frames_to_key, oxcf->key_freq); // Initialize the decay rates for the recent frames to check for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) recent_loop_decay[j] = 1.0; // Find the next keyframe. if (!oxcf->auto_key) { frames_to_key = max_frames_to_key; } else { frames_to_key = 1; while (frames_to_key < max_frames_to_key) { // Provided that we are not at the end of the file... if (kf_show_idx + frames_to_key + 1 < first_pass_info->num_frames) { double loop_decay_rate; double decay_accumulator; const FIRSTPASS_STATS *next_frame = fps_get_frame_stats( first_pass_info, kf_show_idx + frames_to_key + 1); // Check for a scene cut. if (test_candidate_kf(first_pass_info, kf_show_idx + frames_to_key)) break; // How fast is the prediction quality decaying? loop_decay_rate = get_prediction_decay_rate(frame_info, next_frame); // We want to know something about the recent past... rather than // as used elsewhere where we are concerned with decay in prediction // quality since the last GF or KF. recent_loop_decay[(frames_to_key - 1) % FRAMES_TO_CHECK_DECAY] = loop_decay_rate; decay_accumulator = 1.0; for (j = 0; j < FRAMES_TO_CHECK_DECAY; ++j) decay_accumulator *= recent_loop_decay[j]; // Special check for transition or high motion followed by a // static scene. if ((frames_to_key - 1) > min_gf_interval && loop_decay_rate >= 0.999 && decay_accumulator < 0.9) { int still_interval = oxcf->key_freq - (frames_to_key - 1); // TODO(angiebird): Figure out why we use "+1" here int show_idx = kf_show_idx + frames_to_key; if (check_transition_to_still(first_pass_info, show_idx, still_interval)) { break; } } } ++frames_to_key; } } return frames_to_key; } static void find_next_key_frame(VP9_COMP *cpi, int kf_show_idx) { int i; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const FIRST_PASS_INFO *first_pass_info = &twopass->first_pass_info; const FRAME_INFO *frame_info = &cpi->frame_info; const FIRSTPASS_STATS *const start_position = twopass->stats_in; const FIRSTPASS_STATS *keyframe_stats = fps_get_frame_stats(first_pass_info, kf_show_idx); FIRSTPASS_STATS next_frame; int kf_bits = 0; int64_t max_kf_bits; double zero_motion_accumulator = 1.0; double zero_motion_sum = 0.0; double zero_motion_avg; double motion_compensable_sum = 0.0; double motion_compensable_avg; int num_frames = 0; int kf_boost_scan_frames = DEFAULT_SCAN_FRAMES_FOR_KF_BOOST; double boost_score = 0.0; double kf_mod_err = 0.0; double kf_raw_err = 0.0; double kf_group_err = 0.0; double sr_accumulator = 0.0; double abs_mv_in_out_accumulator = 0.0; const double av_err = get_distribution_av_err(cpi, twopass); const double mean_mod_score = twopass->mean_mod_score; vp9_zero(next_frame); cpi->common.frame_type = KEY_FRAME; rc->frames_since_key = 0; // Reset the GF group data structures. vp9_zero(*gf_group); // Is this a forced key frame by interval. rc->this_key_frame_forced = rc->next_key_frame_forced; // Clear the alt ref active flag and last group multi arf flags as they // can never be set for a key frame. rc->source_alt_ref_active = 0; // KF is always a GF so clear frames till next gf counter. rc->frames_till_gf_update_due = 0; rc->frames_to_key = 1; twopass->kf_group_bits = 0; // Total bits available to kf group twopass->kf_group_error_left = 0.0; // Group modified error score. kf_raw_err = keyframe_stats->intra_error; kf_mod_err = calc_norm_frame_score(oxcf, frame_info, keyframe_stats, mean_mod_score, av_err); rc->frames_to_key = vp9_get_frames_to_next_key( oxcf, frame_info, first_pass_info, kf_show_idx, rc->min_gf_interval); // If there is a max kf interval set by the user we must obey it. // We already breakout of the loop above at 2x max. // This code centers the extra kf if the actual natural interval // is between 1x and 2x. if (rc->frames_to_key >= cpi->oxcf.key_freq) { rc->next_key_frame_forced = 1; } else { rc->next_key_frame_forced = 0; } for (i = 0; i < rc->frames_to_key; ++i) { const FIRSTPASS_STATS *frame_stats = fps_get_frame_stats(first_pass_info, kf_show_idx + i); // Accumulate kf group error. kf_group_err += calc_norm_frame_score(oxcf, frame_info, frame_stats, mean_mod_score, av_err); } // Calculate the number of bits that should be assigned to the kf group. if (twopass->bits_left > 0 && twopass->normalized_score_left > 0.0) { // Maximum number of bits for a single normal frame (not key frame). const int max_bits = frame_max_bits(rc, &cpi->oxcf); // Maximum number of bits allocated to the key frame group. int64_t max_grp_bits; // Default allocation based on bits left and relative // complexity of the section. twopass->kf_group_bits = (int64_t)( twopass->bits_left * (kf_group_err / twopass->normalized_score_left)); // Clip based on maximum per frame rate defined by the user. max_grp_bits = (int64_t)max_bits * (int64_t)rc->frames_to_key; if (twopass->kf_group_bits > max_grp_bits) twopass->kf_group_bits = max_grp_bits; } else { twopass->kf_group_bits = 0; } twopass->kf_group_bits = VPXMAX(0, twopass->kf_group_bits); // Scan through the kf group collating various stats used to determine // how many bits to spend on it. boost_score = 0.0; for (i = 0; i < VPXMIN(MAX_SCAN_FRAMES_FOR_KF_BOOST, (rc->frames_to_key - 1)); ++i) { if (EOF == input_stats(twopass, &next_frame)) break; zero_motion_sum += next_frame.pcnt_inter - next_frame.pcnt_motion; motion_compensable_sum += 1 - (double)next_frame.coded_error / next_frame.intra_error; num_frames++; } if (num_frames >= MIN_SCAN_FRAMES_FOR_KF_BOOST) { zero_motion_avg = zero_motion_sum / num_frames; motion_compensable_avg = motion_compensable_sum / num_frames; kf_boost_scan_frames = (int)(VPXMAX(64 * zero_motion_avg - 16, 160 * motion_compensable_avg - 112)); kf_boost_scan_frames = VPXMAX(VPXMIN(kf_boost_scan_frames, MAX_SCAN_FRAMES_FOR_KF_BOOST), MIN_SCAN_FRAMES_FOR_KF_BOOST); } reset_fpf_position(twopass, start_position); for (i = 0; i < (rc->frames_to_key - 1); ++i) { if (EOF == input_stats(twopass, &next_frame)) break; // The zero motion test here insures that if we mark a kf group as static // it is static throughout not just the first KF_BOOST_SCAN_MAX_FRAMES. // It also allows for a larger boost on long static groups. if ((i <= kf_boost_scan_frames) || (zero_motion_accumulator >= 0.99)) { double frame_boost; double zm_factor; // Monitor for static sections. // First frame in kf group the second ref indicator is invalid. if (i > 0) { zero_motion_accumulator = VPXMIN(zero_motion_accumulator, get_zero_motion_factor(&cpi->frame_info, &next_frame)); } else { zero_motion_accumulator = next_frame.pcnt_inter - next_frame.pcnt_motion; } // Factor 0.75-1.25 based on how much of frame is static. zm_factor = (0.75 + (zero_motion_accumulator / 2.0)); // The second (lagging) ref error is not valid immediately after // a key frame because either the lag has not built up (in the case of // the first key frame or it points to a refernce before the new key // frame. if (i < 2) sr_accumulator = 0.0; frame_boost = calc_kf_frame_boost(cpi, &next_frame, &sr_accumulator, 0, KF_MAX_FRAME_BOOST * zm_factor); boost_score += frame_boost; // Measure of zoom. Large zoom tends to indicate reduced boost. abs_mv_in_out_accumulator += fabs(next_frame.mv_in_out_count * next_frame.pcnt_motion); if ((frame_boost < 25.00) || (abs_mv_in_out_accumulator > KF_ABS_ZOOM_THRESH) || (sr_accumulator > (kf_raw_err * 1.50))) break; } else { break; } } reset_fpf_position(twopass, start_position); // Store the zero motion percentage twopass->kf_zeromotion_pct = (int)(zero_motion_accumulator * 100.0); // Calculate a section intra ratio used in setting max loop filter. twopass->key_frame_section_intra_rating = calculate_section_intra_ratio( start_position, twopass->stats_in_end, rc->frames_to_key); // Special case for static / slide show content but dont apply // if the kf group is very short. if ((zero_motion_accumulator > 0.99) && (rc->frames_to_key > 8)) { rc->kf_boost = MAX_KF_TOT_BOOST; } else { // Apply various clamps for min and max boost rc->kf_boost = VPXMAX((int)boost_score, (rc->frames_to_key * 3)); rc->kf_boost = VPXMAX(rc->kf_boost, MIN_KF_TOT_BOOST); rc->kf_boost = VPXMIN(rc->kf_boost, MAX_KF_TOT_BOOST); } // Work out how many bits to allocate for the key frame itself. kf_bits = calculate_boost_bits((rc->frames_to_key - 1), rc->kf_boost, twopass->kf_group_bits); // Based on the spatial complexity, increase the bits allocated to key frame. kf_bits += (int)((twopass->kf_group_bits - kf_bits) * (kf_mod_err / kf_group_err)); max_kf_bits = twopass->kf_group_bits - (rc->frames_to_key - 1) * FRAME_OVERHEAD_BITS; max_kf_bits = lclamp(max_kf_bits, 0, INT_MAX); kf_bits = VPXMIN(kf_bits, (int)max_kf_bits); twopass->kf_group_bits -= kf_bits; // Save the bits to spend on the key frame. gf_group->bit_allocation[0] = kf_bits; gf_group->update_type[0] = KF_UPDATE; gf_group->rf_level[0] = KF_STD; gf_group->layer_depth[0] = 0; // Note the total error score of the kf group minus the key frame itself. twopass->kf_group_error_left = (kf_group_err - kf_mod_err); // Adjust the count of total modified error left. // The count of bits left is adjusted elsewhere based on real coded frame // sizes. twopass->normalized_score_left -= kf_group_err; if (oxcf->resize_mode == RESIZE_DYNAMIC) { // Default to normal-sized frame on keyframes. cpi->rc.next_frame_size_selector = UNSCALED; } } static int is_skippable_frame(const VP9_COMP *cpi) { // If the current frame does not have non-zero motion vector detected in the // first pass, and so do its previous and forward frames, then this frame // can be skipped for partition check, and the partition size is assigned // according to the variance const TWO_PASS *const twopass = &cpi->twopass; return (!frame_is_intra_only(&cpi->common) && twopass->stats_in - 2 > twopass->stats_in_start && twopass->stats_in < twopass->stats_in_end && (twopass->stats_in - 1)->pcnt_inter - (twopass->stats_in - 1)->pcnt_motion == 1 && (twopass->stats_in - 2)->pcnt_inter - (twopass->stats_in - 2)->pcnt_motion == 1 && twopass->stats_in->pcnt_inter - twopass->stats_in->pcnt_motion == 1); } void vp9_rc_get_second_pass_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; TWO_PASS *const twopass = &cpi->twopass; GF_GROUP *const gf_group = &twopass->gf_group; FIRSTPASS_STATS this_frame; const int show_idx = cm->current_video_frame; if (!twopass->stats_in) return; // If this is an arf frame then we dont want to read the stats file or // advance the input pointer as we already have what we need. if (gf_group->update_type[gf_group->index] == ARF_UPDATE) { int target_rate; vp9_zero(this_frame); this_frame = cpi->twopass.stats_in_start[cm->current_video_frame + gf_group->arf_src_offset[gf_group->index]]; vp9_configure_buffer_updates(cpi, gf_group->index); target_rate = gf_group->bit_allocation[gf_group->index]; target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); rc->base_frame_target = target_rate; cm->frame_type = INTER_FRAME; // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } // The multiplication by 256 reverses a scaling factor of (>> 8) // applied when combining MB error values for the frame. twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0); twopass->mb_smooth_pct = this_frame.intra_smooth_pct; return; } vpx_clear_system_state(); if (cpi->oxcf.rc_mode == VPX_Q) { twopass->active_worst_quality = cpi->oxcf.cq_level; } else if (cm->current_video_frame == 0) { const int frames_left = (int)(twopass->total_stats.count - cm->current_video_frame); // Special case code for first frame. const int section_target_bandwidth = (int)(twopass->bits_left / frames_left); const double section_length = twopass->total_left_stats.count; const double section_error = twopass->total_left_stats.coded_error / section_length; const double section_intra_skip = twopass->total_left_stats.intra_skip_pct / section_length; const double section_inactive_zone = (twopass->total_left_stats.inactive_zone_rows * 2) / ((double)cm->mb_rows * section_length); const double section_noise = twopass->total_left_stats.frame_noise_energy / section_length; int tmp_q; tmp_q = get_twopass_worst_quality( cpi, section_error, section_intra_skip + section_inactive_zone, section_noise, section_target_bandwidth); twopass->active_worst_quality = tmp_q; twopass->baseline_active_worst_quality = tmp_q; rc->ni_av_qi = tmp_q; rc->last_q[INTER_FRAME] = tmp_q; rc->avg_q = vp9_convert_qindex_to_q(tmp_q, cm->bit_depth); rc->avg_frame_qindex[INTER_FRAME] = tmp_q; rc->last_q[KEY_FRAME] = (tmp_q + cpi->oxcf.best_allowed_q) / 2; rc->avg_frame_qindex[KEY_FRAME] = rc->last_q[KEY_FRAME]; } vp9_zero(this_frame); if (EOF == input_stats(twopass, &this_frame)) return; // Set the frame content type flag. if (this_frame.intra_skip_pct >= FC_ANIMATION_THRESH) twopass->fr_content_type = FC_GRAPHICS_ANIMATION; else twopass->fr_content_type = FC_NORMAL; // Keyframe and section processing. if (rc->frames_to_key == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY)) { // Define next KF group and assign bits to it. find_next_key_frame(cpi, show_idx); } else { cm->frame_type = INTER_FRAME; } // Define a new GF/ARF group. (Should always enter here for key frames). if (rc->frames_till_gf_update_due == 0) { define_gf_group(cpi, show_idx); rc->frames_till_gf_update_due = rc->baseline_gf_interval; #if ARF_STATS_OUTPUT { FILE *fpfile; fpfile = fopen("arf.stt", "a"); ++arf_count; fprintf(fpfile, "%10d %10ld %10d %10d %10ld %10ld\n", cm->current_video_frame, rc->frames_till_gf_update_due, rc->kf_boost, arf_count, rc->gfu_boost, cm->frame_type); fclose(fpfile); } #endif } vp9_configure_buffer_updates(cpi, gf_group->index); // Do the firstpass stats indicate that this frame is skippable for the // partition search? if (cpi->sf.allow_partition_search_skip && cpi->oxcf.pass == 2 && !cpi->use_svc) { cpi->partition_search_skippable_frame = is_skippable_frame(cpi); } rc->base_frame_target = gf_group->bit_allocation[gf_group->index]; // The multiplication by 256 reverses a scaling factor of (>> 8) // applied when combining MB error values for the frame. twopass->mb_av_energy = log((this_frame.intra_error * 256.0) + 1.0); twopass->mb_smooth_pct = this_frame.intra_smooth_pct; // Update the total stats remaining structure. subtract_stats(&twopass->total_left_stats, &this_frame); } #define MINQ_ADJ_LIMIT 48 #define MINQ_ADJ_LIMIT_CQ 20 #define HIGH_UNDERSHOOT_RATIO 2 void vp9_twopass_postencode_update(VP9_COMP *cpi) { TWO_PASS *const twopass = &cpi->twopass; RATE_CONTROL *const rc = &cpi->rc; VP9_COMMON *const cm = &cpi->common; const int bits_used = rc->base_frame_target; // VBR correction is done through rc->vbr_bits_off_target. Based on the // sign of this value, a limited % adjustment is made to the target rate // of subsequent frames, to try and push it back towards 0. This method // is designed to prevent extreme behaviour at the end of a clip // or group of frames. rc->vbr_bits_off_target += rc->base_frame_target - rc->projected_frame_size; twopass->bits_left = VPXMAX(twopass->bits_left - bits_used, 0); // Target vs actual bits for this arf group. twopass->rolling_arf_group_target_bits += rc->this_frame_target; twopass->rolling_arf_group_actual_bits += rc->projected_frame_size; // Calculate the pct rc error. if (rc->total_actual_bits) { rc->rate_error_estimate = (int)((rc->vbr_bits_off_target * 100) / rc->total_actual_bits); rc->rate_error_estimate = clamp(rc->rate_error_estimate, -100, 100); } else { rc->rate_error_estimate = 0; } if (cpi->common.frame_type != KEY_FRAME) { twopass->kf_group_bits -= bits_used; twopass->last_kfgroup_zeromotion_pct = twopass->kf_zeromotion_pct; } twopass->kf_group_bits = VPXMAX(twopass->kf_group_bits, 0); // Increment the gf group index ready for the next frame. ++twopass->gf_group.index; // If the rate control is drifting consider adjustment to min or maxq. if ((cpi->oxcf.rc_mode != VPX_Q) && !cpi->rc.is_src_frame_alt_ref) { const int maxq_adj_limit = rc->worst_quality - twopass->active_worst_quality; const int minq_adj_limit = (cpi->oxcf.rc_mode == VPX_CQ ? MINQ_ADJ_LIMIT_CQ : MINQ_ADJ_LIMIT); int aq_extend_min = 0; int aq_extend_max = 0; // Extend min or Max Q range to account for imbalance from the base // value when using AQ. if (cpi->oxcf.aq_mode != NO_AQ && cpi->oxcf.aq_mode != PSNR_AQ && cpi->oxcf.aq_mode != PERCEPTUAL_AQ) { if (cm->seg.aq_av_offset < 0) { // The balance of the AQ map tends towarda lowering the average Q. aq_extend_min = 0; aq_extend_max = VPXMIN(maxq_adj_limit, -cm->seg.aq_av_offset); } else { // The balance of the AQ map tends towards raising the average Q. aq_extend_min = VPXMIN(minq_adj_limit, cm->seg.aq_av_offset); aq_extend_max = 0; } } // Undershoot. if (rc->rate_error_estimate > cpi->oxcf.under_shoot_pct) { --twopass->extend_maxq; if (rc->rolling_target_bits >= rc->rolling_actual_bits) ++twopass->extend_minq; // Overshoot. } else if (rc->rate_error_estimate < -cpi->oxcf.over_shoot_pct) { --twopass->extend_minq; if (rc->rolling_target_bits < rc->rolling_actual_bits) ++twopass->extend_maxq; } else { // Adjustment for extreme local overshoot. if (rc->projected_frame_size > (2 * rc->base_frame_target) && rc->projected_frame_size > (2 * rc->avg_frame_bandwidth)) ++twopass->extend_maxq; // Unwind undershoot or overshoot adjustment. if (rc->rolling_target_bits < rc->rolling_actual_bits) --twopass->extend_minq; else if (rc->rolling_target_bits > rc->rolling_actual_bits) --twopass->extend_maxq; } twopass->extend_minq = clamp(twopass->extend_minq, aq_extend_min, minq_adj_limit); twopass->extend_maxq = clamp(twopass->extend_maxq, aq_extend_max, maxq_adj_limit); // If there is a big and undexpected undershoot then feed the extra // bits back in quickly. One situation where this may happen is if a // frame is unexpectedly almost perfectly predicted by the ARF or GF // but not very well predcited by the previous frame. if (!frame_is_kf_gf_arf(cpi) && !cpi->rc.is_src_frame_alt_ref) { int fast_extra_thresh = rc->base_frame_target / HIGH_UNDERSHOOT_RATIO; if (rc->projected_frame_size < fast_extra_thresh) { rc->vbr_bits_off_target_fast += fast_extra_thresh - rc->projected_frame_size; rc->vbr_bits_off_target_fast = VPXMIN(rc->vbr_bits_off_target_fast, (4 * rc->avg_frame_bandwidth)); // Fast adaptation of minQ if necessary to use up the extra bits. if (rc->avg_frame_bandwidth) { twopass->extend_minq_fast = (int)(rc->vbr_bits_off_target_fast * 8 / rc->avg_frame_bandwidth); } twopass->extend_minq_fast = VPXMIN( twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); } else if (rc->vbr_bits_off_target_fast) { twopass->extend_minq_fast = VPXMIN( twopass->extend_minq_fast, minq_adj_limit - twopass->extend_minq); } else { twopass->extend_minq_fast = 0; } } } } #if CONFIG_RATE_CTRL // Under CONFIG_RATE_CTRL, once the first_pass_info is ready, the number of // coding frames (including show frame and alt ref) can be determined. int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, int multi_layer_arf, int allow_alt_ref) { int coding_frame_num = 0; RATE_CONTROL rc; RANGE active_gf_interval; int arf_layers; double gop_intra_factor; int use_alt_ref; int gop_coding_frames; int gop_show_frames; int show_idx = 0; int arf_active_or_kf = 1; rc.static_scene_max_gf_interval = 250; vp9_rc_init(oxcf, 1, &rc); while (show_idx < first_pass_info->num_frames) { if (rc.frames_to_key == 0) { rc.frames_to_key = vp9_get_frames_to_next_key( oxcf, frame_info, first_pass_info, show_idx, rc.min_gf_interval); arf_active_or_kf = 1; } else { } { int dummy = 0; active_gf_interval = get_active_gf_inverval_range( frame_info, &rc, arf_active_or_kf, show_idx, dummy, dummy); } arf_layers = get_arf_layers(multi_layer_arf, oxcf->enable_auto_arf, active_gf_interval.max); if (multi_layer_arf) { gop_intra_factor = 1.0 + 0.25 * arf_layers; } else { gop_intra_factor = 1.0; } gop_coding_frames = get_gop_coding_frame_num( &use_alt_ref, frame_info, first_pass_info, &rc, show_idx, &active_gf_interval, gop_intra_factor, oxcf->lag_in_frames); use_alt_ref &= allow_alt_ref; rc.source_alt_ref_active = use_alt_ref; arf_active_or_kf = use_alt_ref; gop_show_frames = gop_coding_frames - use_alt_ref; rc.frames_to_key -= gop_show_frames; rc.frames_since_key += gop_show_frames; show_idx += gop_show_frames; coding_frame_num += gop_show_frames + use_alt_ref; } return coding_frame_num; } #endif FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *twopass) { return twopass->this_frame_stats; } FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *twopass) { return twopass->total_stats; } libvpx-1.8.2/vp9/encoder/vp9_firstpass.h000066400000000000000000000171141357355204000201600ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ #define VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ #include #include "vp9/common/vp9_onyxc_int.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/encoder/vp9_ratectrl.h" #ifdef __cplusplus extern "C" { #endif #if CONFIG_FP_MB_STATS #define FPMB_DCINTRA_MASK 0x01 #define FPMB_MOTION_ZERO_MASK 0x02 #define FPMB_MOTION_LEFT_MASK 0x04 #define FPMB_MOTION_RIGHT_MASK 0x08 #define FPMB_MOTION_UP_MASK 0x10 #define FPMB_MOTION_DOWN_MASK 0x20 #define FPMB_ERROR_SMALL_MASK 0x40 #define FPMB_ERROR_LARGE_MASK 0x80 #define FPMB_ERROR_SMALL_TH 2000 #define FPMB_ERROR_LARGE_TH 48000 typedef struct { uint8_t *mb_stats_start; uint8_t *mb_stats_end; } FIRSTPASS_MB_STATS; #endif #define INVALID_ROW (-1) #define MAX_ARF_LAYERS 6 #define SECTION_NOISE_DEF 250.0 typedef struct { double frame_mb_intra_factor; double frame_mb_brightness_factor; double frame_mb_neutral_count; } FP_MB_FLOAT_STATS; typedef struct { double intra_factor; double brightness_factor; int64_t coded_error; int64_t sr_coded_error; int64_t frame_noise_energy; int64_t intra_error; int intercount; int second_ref_count; double neutral_count; double intra_count_low; // Coded intra but low variance double intra_count_high; // Coded intra high variance int intra_skip_count; int image_data_start_row; int mvcount; int sum_mvr; int sum_mvr_abs; int sum_mvc; int sum_mvc_abs; int64_t sum_mvrs; int64_t sum_mvcs; int sum_in_vectors; int intra_smooth_count; } FIRSTPASS_DATA; typedef struct { double frame; double weight; double intra_error; double coded_error; double sr_coded_error; double frame_noise_energy; double pcnt_inter; double pcnt_motion; double pcnt_second_ref; double pcnt_neutral; double pcnt_intra_low; // Coded intra but low variance double pcnt_intra_high; // Coded intra high variance double intra_skip_pct; double intra_smooth_pct; // % of blocks that are smooth double inactive_zone_rows; // Image mask rows top and bottom. double inactive_zone_cols; // Image mask columns at left and right edges. double MVr; double mvr_abs; double MVc; double mvc_abs; double MVrv; double MVcv; double mv_in_out_count; double duration; double count; int64_t spatial_layer_id; } FIRSTPASS_STATS; typedef enum { KF_UPDATE = 0, LF_UPDATE = 1, GF_UPDATE = 2, ARF_UPDATE = 3, OVERLAY_UPDATE = 4, MID_OVERLAY_UPDATE = 5, USE_BUF_FRAME = 6, // Use show existing frame, no ref buffer update FRAME_UPDATE_TYPES = 7 } FRAME_UPDATE_TYPE; #define FC_ANIMATION_THRESH 0.15 typedef enum { FC_NORMAL = 0, FC_GRAPHICS_ANIMATION = 1, FRAME_CONTENT_TYPES = 2 } FRAME_CONTENT_TYPE; typedef struct { unsigned char index; RATE_FACTOR_LEVEL rf_level[MAX_STATIC_GF_GROUP_LENGTH + 2]; FRAME_UPDATE_TYPE update_type[MAX_STATIC_GF_GROUP_LENGTH + 2]; unsigned char arf_src_offset[MAX_STATIC_GF_GROUP_LENGTH + 2]; unsigned char layer_depth[MAX_STATIC_GF_GROUP_LENGTH + 2]; unsigned char frame_gop_index[MAX_STATIC_GF_GROUP_LENGTH + 2]; int bit_allocation[MAX_STATIC_GF_GROUP_LENGTH + 2]; int gfu_boost[MAX_STATIC_GF_GROUP_LENGTH + 2]; int frame_start; int frame_end; // TODO(jingning): The array size of arf_stack could be reduced. int arf_index_stack[MAX_LAG_BUFFERS * 2]; int top_arf_idx; int stack_size; int gf_group_size; int max_layer_depth; int allowed_max_layer_depth; int group_noise_energy; } GF_GROUP; typedef struct { const FIRSTPASS_STATS *stats; int num_frames; } FIRST_PASS_INFO; static INLINE void fps_init_first_pass_info(FIRST_PASS_INFO *first_pass_info, const FIRSTPASS_STATS *stats, int num_frames) { first_pass_info->stats = stats; first_pass_info->num_frames = num_frames; } static INLINE int fps_get_num_frames(const FIRST_PASS_INFO *first_pass_info) { return first_pass_info->num_frames; } static INLINE const FIRSTPASS_STATS *fps_get_frame_stats( const FIRST_PASS_INFO *first_pass_info, int show_idx) { if (show_idx < 0 || show_idx >= first_pass_info->num_frames) { return NULL; } return &first_pass_info->stats[show_idx]; } typedef struct { unsigned int section_intra_rating; unsigned int key_frame_section_intra_rating; FIRSTPASS_STATS total_stats; FIRSTPASS_STATS this_frame_stats; const FIRSTPASS_STATS *stats_in; const FIRSTPASS_STATS *stats_in_start; const FIRSTPASS_STATS *stats_in_end; FIRST_PASS_INFO first_pass_info; FIRSTPASS_STATS total_left_stats; int first_pass_done; int64_t bits_left; double mean_mod_score; double normalized_score_left; double mb_av_energy; double mb_smooth_pct; #if CONFIG_FP_MB_STATS uint8_t *frame_mb_stats_buf; uint8_t *this_frame_mb_stats; FIRSTPASS_MB_STATS firstpass_mb_stats; #endif FP_MB_FLOAT_STATS *fp_mb_float_stats; // An indication of the content type of the current frame FRAME_CONTENT_TYPE fr_content_type; // Projected total bits available for a key frame group of frames int64_t kf_group_bits; // Error score of frames still to be coded in kf group double kf_group_error_left; double bpm_factor; int rolling_arf_group_target_bits; int rolling_arf_group_actual_bits; int sr_update_lag; int kf_zeromotion_pct; int last_kfgroup_zeromotion_pct; int active_worst_quality; int baseline_active_worst_quality; int extend_minq; int extend_maxq; int extend_minq_fast; int arnr_strength_adjustment; int last_qindex_of_arf_layer[MAX_ARF_LAYERS]; GF_GROUP gf_group; } TWO_PASS; struct VP9_COMP; struct ThreadData; struct TileDataEnc; void vp9_init_first_pass(struct VP9_COMP *cpi); void vp9_first_pass(struct VP9_COMP *cpi, const struct lookahead_entry *source); void vp9_end_first_pass(struct VP9_COMP *cpi); void vp9_first_pass_encode_tile_mb_row(struct VP9_COMP *cpi, struct ThreadData *td, FIRSTPASS_DATA *fp_acc_data, struct TileDataEnc *tile_data, MV *best_ref_mv, int mb_row); void vp9_init_second_pass(struct VP9_COMP *cpi); void vp9_rc_get_second_pass_params(struct VP9_COMP *cpi); // Post encode update of the rate control parameters for 2-pass void vp9_twopass_postencode_update(struct VP9_COMP *cpi); void calculate_coded_size(struct VP9_COMP *cpi, int *scaled_frame_width, int *scaled_frame_height); struct VP9EncoderConfig; int vp9_get_frames_to_next_key(const struct VP9EncoderConfig *oxcf, const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, int kf_show_idx, int min_gf_interval); #if CONFIG_RATE_CTRL int vp9_get_coding_frame_num(const struct VP9EncoderConfig *oxcf, const FRAME_INFO *frame_info, const FIRST_PASS_INFO *first_pass_info, int multi_layer_arf, int allow_alt_ref); #endif FIRSTPASS_STATS vp9_get_frame_stats(const TWO_PASS *two_pass); FIRSTPASS_STATS vp9_get_total_stats(const TWO_PASS *two_pass); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_FIRSTPASS_H_ libvpx-1.8.2/vp9/encoder/vp9_frame_scale.c000066400000000000000000000123051357355204000203730ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vp9/common/vp9_blockd.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_scale/yv12config.h" void vp9_scale_and_extend_frame_c(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, INTERP_FILTER filter_type, int phase_scaler) { const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const uint8_t *const srcs[3] = { src->y_buffer, src->u_buffer, src->v_buffer }; const int src_strides[3] = { src->y_stride, src->uv_stride, src->uv_stride }; uint8_t *const dsts[3] = { dst->y_buffer, dst->u_buffer, dst->v_buffer }; const int dst_strides[3] = { dst->y_stride, dst->uv_stride, dst->uv_stride }; const InterpKernel *const kernel = vp9_filter_kernels[filter_type]; int x, y, i; #if HAVE_SSSE3 || HAVE_NEON // TODO(linfengz): The 4:3 specialized C code is disabled by default since // it's much slower than the general version which calls vpx_scaled_2d() even // if vpx_scaled_2d() is not optimized. It will only be enabled as a reference // for the platforms which have faster optimization. if (4 * dst->y_crop_width == 3 * src_w && 4 * dst->y_crop_height == 3 * src_h) { // Specialize 4 to 3 scaling. // Example pixel locations. // (O: Original pixel. S: Scaled pixel. X: Overlapped pixel.) // phase_scaler = 0 | phase_scaler = 8 // | // X O S O S O X | O O O O O // | // | // | S S S // | // | // O O O O O | O O O O O // | // S S S S | // | // | // | S S S // O O O O O | O O O O O // | // | // | // S S S S | // | // O O O O O | O O O O O // | S S S // | // | // | // | // X O S O S O X | O O O O O const int dst_ws[3] = { dst->y_crop_width, dst->uv_crop_width, dst->uv_crop_width }; const int dst_hs[3] = { dst->y_crop_height, dst->uv_crop_height, dst->uv_crop_height }; for (i = 0; i < MAX_MB_PLANE; ++i) { const int dst_w = dst_ws[i]; const int dst_h = dst_hs[i]; const int src_stride = src_strides[i]; const int dst_stride = dst_strides[i]; for (y = 0; y < dst_h; y += 3) { for (x = 0; x < dst_w; x += 3) { const uint8_t *src_ptr = srcs[i] + 4 * y / 3 * src_stride + 4 * x / 3; uint8_t *dst_ptr = dsts[i] + y * dst_stride + x; // Must call c function because its optimization doesn't support 3x3. vpx_scaled_2d_c(src_ptr, src_stride, dst_ptr, dst_stride, kernel, phase_scaler, 64 / 3, phase_scaler, 64 / 3, 3, 3); } } } } else #endif { const int dst_w = dst->y_crop_width; const int dst_h = dst->y_crop_height; for (i = 0; i < MAX_MB_PLANE; ++i) { const int factor = (i == 0 || i == 3 ? 1 : 2); const int src_stride = src_strides[i]; const int dst_stride = dst_strides[i]; for (y = 0; y < dst_h; y += 16) { const int y_q4 = y * (16 / factor) * src_h / dst_h + phase_scaler; for (x = 0; x < dst_w; x += 16) { const int x_q4 = x * (16 / factor) * src_w / dst_w + phase_scaler; const uint8_t *src_ptr = srcs[i] + (y / factor) * src_h / dst_h * src_stride + (x / factor) * src_w / dst_w; uint8_t *dst_ptr = dsts[i] + (y / factor) * dst_stride + (x / factor); vpx_scaled_2d(src_ptr, src_stride, dst_ptr, dst_stride, kernel, x_q4 & 0xf, 16 * src_w / dst_w, y_q4 & 0xf, 16 * src_h / dst_h, 16 / factor, 16 / factor); } } } } vpx_extend_frame_borders(dst); } libvpx-1.8.2/vp9/encoder/vp9_job_queue.h000066400000000000000000000023401357355204000201130ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ #define VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ typedef enum { FIRST_PASS_JOB, ENCODE_JOB, ARNR_JOB, NUM_JOB_TYPES, } JOB_TYPE; // Encode job parameters typedef struct { int vert_unit_row_num; // Index of the vertical unit row int tile_col_id; // tile col id within a tile int tile_row_id; // tile col id within a tile } JobNode; // Job queue element parameters typedef struct { // Pointer to the next link in the job queue void *next; // Job information context of the module JobNode job_info; } JobQueue; // Job queue handle typedef struct { // Pointer to the next link in the job queue void *next; // Counter to store the number of jobs picked up for processing int num_jobs_acquired; } JobQueueHandle; #endif // VPX_VP9_ENCODER_VP9_JOB_QUEUE_H_ libvpx-1.8.2/vp9/encoder/vp9_lookahead.c000066400000000000000000000162111357355204000200610ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_lookahead.h" /* Return the buffer at the given absolute index and increment the index */ static struct lookahead_entry *pop(struct lookahead_ctx *ctx, int *idx) { int index = *idx; struct lookahead_entry *buf = ctx->buf + index; assert(index < ctx->max_sz); if (++index >= ctx->max_sz) index -= ctx->max_sz; *idx = index; return buf; } void vp9_lookahead_destroy(struct lookahead_ctx *ctx) { if (ctx) { if (ctx->buf) { int i; for (i = 0; i < ctx->max_sz; i++) vpx_free_frame_buffer(&ctx->buf[i].img); free(ctx->buf); } free(ctx); } } struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int height, unsigned int subsampling_x, unsigned int subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif unsigned int depth) { struct lookahead_ctx *ctx = NULL; // Clamp the lookahead queue depth depth = clamp(depth, 1, MAX_LAG_BUFFERS); // Allocate memory to keep previous source frames available. depth += MAX_PRE_FRAMES; // Allocate the lookahead structures ctx = calloc(1, sizeof(*ctx)); if (ctx) { const int legacy_byte_alignment = 0; unsigned int i; ctx->max_sz = depth; ctx->buf = calloc(depth, sizeof(*ctx->buf)); ctx->next_show_idx = 0; if (!ctx->buf) goto bail; for (i = 0; i < depth; i++) if (vpx_alloc_frame_buffer( &ctx->buf[i].img, width, height, subsampling_x, subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, legacy_byte_alignment)) goto bail; } return ctx; bail: vp9_lookahead_destroy(ctx); return NULL; } #define USE_PARTIAL_COPY 0 int vp9_lookahead_full(const struct lookahead_ctx *ctx) { return ctx->sz + 1 + MAX_PRE_FRAMES > ctx->max_sz; } int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx) { return ctx->next_show_idx; } int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags) { struct lookahead_entry *buf; #if USE_PARTIAL_COPY int row, col, active_end; int mb_rows = (src->y_height + 15) >> 4; int mb_cols = (src->y_width + 15) >> 4; #endif int width = src->y_crop_width; int height = src->y_crop_height; int uv_width = src->uv_crop_width; int uv_height = src->uv_crop_height; int subsampling_x = src->subsampling_x; int subsampling_y = src->subsampling_y; int larger_dimensions, new_dimensions; #if !CONFIG_VP9_HIGHBITDEPTH (void)use_highbitdepth; assert(use_highbitdepth == 0); #endif if (vp9_lookahead_full(ctx)) return 1; ctx->sz++; buf = pop(ctx, &ctx->write_idx); new_dimensions = width != buf->img.y_crop_width || height != buf->img.y_crop_height || uv_width != buf->img.uv_crop_width || uv_height != buf->img.uv_crop_height; larger_dimensions = width > buf->img.y_width || height > buf->img.y_height || uv_width > buf->img.uv_width || uv_height > buf->img.uv_height; assert(!larger_dimensions || new_dimensions); #if USE_PARTIAL_COPY // TODO(jkoleszar): This is disabled for now, as // vp9_copy_and_extend_frame_with_rect is not subsampling/alpha aware. // Only do this partial copy if the following conditions are all met: // 1. Lookahead queue has has size of 1. // 2. Active map is provided. // 3. This is not a key frame, golden nor altref frame. if (!new_dimensions && ctx->max_sz == 1 && active_map && !flags) { for (row = 0; row < mb_rows; ++row) { col = 0; while (1) { // Find the first active macroblock in this row. for (; col < mb_cols; ++col) { if (active_map[col]) break; } // No more active macroblock in this row. if (col == mb_cols) break; // Find the end of active region in this row. active_end = col; for (; active_end < mb_cols; ++active_end) { if (!active_map[active_end]) break; } // Only copy this active region. vp9_copy_and_extend_frame_with_rect(src, &buf->img, row << 4, col << 4, 16, (active_end - col) << 4); // Start again from the end of this active region. col = active_end; } active_map += mb_cols; } } else { #endif if (larger_dimensions) { YV12_BUFFER_CONFIG new_img; memset(&new_img, 0, sizeof(new_img)); if (vpx_alloc_frame_buffer(&new_img, width, height, subsampling_x, subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, 0)) return 1; vpx_free_frame_buffer(&buf->img); buf->img = new_img; } else if (new_dimensions) { buf->img.y_crop_width = src->y_crop_width; buf->img.y_crop_height = src->y_crop_height; buf->img.uv_crop_width = src->uv_crop_width; buf->img.uv_crop_height = src->uv_crop_height; buf->img.subsampling_x = src->subsampling_x; buf->img.subsampling_y = src->subsampling_y; } // Partial copy not implemented yet vp9_copy_and_extend_frame(src, &buf->img); #if USE_PARTIAL_COPY } #endif buf->ts_start = ts_start; buf->ts_end = ts_end; buf->flags = flags; buf->show_idx = ctx->next_show_idx; ++ctx->next_show_idx; return 0; } struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; if (ctx && ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = pop(ctx, &ctx->read_idx); ctx->sz--; } return buf; } struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, int index) { struct lookahead_entry *buf = NULL; if (index >= 0) { // Forward peek if (index < ctx->sz) { index += ctx->read_idx; if (index >= ctx->max_sz) index -= ctx->max_sz; buf = ctx->buf + index; } } else if (index < 0) { // Backward peek if (-index <= MAX_PRE_FRAMES) { index += ctx->read_idx; if (index < 0) index += ctx->max_sz; buf = ctx->buf + index; } } return buf; } unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx) { return ctx->sz; } libvpx-1.8.2/vp9/encoder/vp9_lookahead.h000066400000000000000000000105331357355204000200670ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ #define VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_encoder.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif #define MAX_LAG_BUFFERS 25 struct lookahead_entry { YV12_BUFFER_CONFIG img; int64_t ts_start; int64_t ts_end; int show_idx; /*The show_idx of this frame*/ vpx_enc_frame_flags_t flags; }; // The max of past frames we want to keep in the queue. #define MAX_PRE_FRAMES 1 struct lookahead_ctx { int max_sz; /* Absolute size of the queue */ int sz; /* Number of buffers currently in the queue */ int read_idx; /* Read index */ int write_idx; /* Write index */ int next_show_idx; /* The show_idx that will be assigned to the next frame being pushed in the queue*/ struct lookahead_entry *buf; /* Buffer list */ }; /**\brief Initializes the lookahead stage * * The lookahead stage is a queue of frame buffers on which some analysis * may be done when buffers are enqueued. */ struct lookahead_ctx *vp9_lookahead_init(unsigned int width, unsigned int height, unsigned int subsampling_x, unsigned int subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif unsigned int depth); /**\brief Destroys the lookahead stage */ void vp9_lookahead_destroy(struct lookahead_ctx *ctx); /**\brief Check if lookahead is full * * \param[in] ctx Pointer to the lookahead context * * Return 1 if lookahead is full, otherwise return 0. */ int vp9_lookahead_full(const struct lookahead_ctx *ctx); /**\brief Return the next_show_idx * * \param[in] ctx Pointer to the lookahead context * * Return the show_idx that will be assigned to the next * frame pushed by vp9_lookahead_push() */ int vp9_lookahead_next_show_idx(const struct lookahead_ctx *ctx); /**\brief Enqueue a source buffer * * This function will copy the source image into a new framebuffer with * the expected stride/border. * * If active_map is non-NULL and there is only one frame in the queue, then copy * only active macroblocks. * * \param[in] ctx Pointer to the lookahead context * \param[in] src Pointer to the image to enqueue * \param[in] ts_start Timestamp for the start of this frame * \param[in] ts_end Timestamp for the end of this frame * \param[in] flags Flags set on this frame * \param[in] active_map Map that specifies which macroblock is active */ int vp9_lookahead_push(struct lookahead_ctx *ctx, YV12_BUFFER_CONFIG *src, int64_t ts_start, int64_t ts_end, int use_highbitdepth, vpx_enc_frame_flags_t flags); /**\brief Get the next source buffer to encode * * * \param[in] ctx Pointer to the lookahead context * \param[in] drain Flag indicating the buffer should be drained * (return a buffer regardless of the current queue depth) * * \retval NULL, if drain set and queue is empty * \retval NULL, if drain not set and queue not of the configured depth */ struct lookahead_entry *vp9_lookahead_pop(struct lookahead_ctx *ctx, int drain); /**\brief Get a future source buffer to encode * * \param[in] ctx Pointer to the lookahead context * \param[in] index Index of the frame to be returned, 0 == next frame * * \retval NULL, if no buffer exists at the specified index */ struct lookahead_entry *vp9_lookahead_peek(struct lookahead_ctx *ctx, int index); /**\brief Get the number of frames currently in the lookahead queue * * \param[in] ctx Pointer to the lookahead context */ unsigned int vp9_lookahead_depth(struct lookahead_ctx *ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_LOOKAHEAD_H_ libvpx-1.8.2/vp9/encoder/vp9_mbgraph.c000066400000000000000000000334011357355204000175520ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/system_state.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" static unsigned int do_16x16_motion_iteration(VP9_COMP *cpi, const MV *ref_mv, MV *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const SEARCH_METHODS old_search_method = mv_sf->search_method; const vp9_variance_fn_ptr_t v_fn_ptr = cpi->fn_ptr[BLOCK_16X16]; const MvLimits tmp_mv_limits = x->mv_limits; MV ref_full; int cost_list[5]; // Further step/diamond searches as necessary int step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(&x->mv_limits, ref_mv); ref_full.col = ref_mv->col >> 3; ref_full.row = ref_mv->row >> 3; mv_sf->search_method = HEX; vp9_full_pixel_search(cpi, x, BLOCK_16X16, &ref_full, step_param, cpi->sf.mv.search_method, x->errorperbit, cond_cost_list(cpi, cost_list), ref_mv, dst_mv, 0, 0); mv_sf->search_method = old_search_method; /* restore UMV window */ x->mv_limits = tmp_mv_limits; // Try sub-pixel MC // if (bestsme > error_thresh && bestsme < INT_MAX) { uint32_t distortion; uint32_t sse; // TODO(yunqing): may use higher tap interp filter than 2 taps if needed. cpi->find_fractional_mv_step( x, dst_mv, ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &v_fn_ptr, 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, 0, 0, USE_2_TAPS); } xd->mi[0]->mode = NEWMV; xd->mi[0]->mv[0].as_mv = *dst_mv; vp9_build_inter_predictors_sby(xd, mb_row, mb_col, BLOCK_16X16); return vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); } static int do_16x16_motion_search(VP9_COMP *cpi, const MV *ref_mv, int_mv *dst_mv, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err, tmp_err; MV tmp_mv; // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; // Test last reference frame using the previous best mv as the // starting point (best reference) for the search tmp_err = do_16x16_motion_iteration(cpi, ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { err = tmp_err; dst_mv->as_mv = tmp_mv; } // If the current best reference mv is not centered on 0,0 then do a 0,0 // based search as well. if (ref_mv->row != 0 || ref_mv->col != 0) { unsigned int tmp_err; MV zero_ref_mv = { 0, 0 }, tmp_mv; tmp_err = do_16x16_motion_iteration(cpi, &zero_ref_mv, &tmp_mv, mb_row, mb_col); if (tmp_err < err) { dst_mv->as_mv = tmp_mv; err = tmp_err; } } return err; } static int do_16x16_zerozero_search(VP9_COMP *cpi, int_mv *dst_mv) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; unsigned int err; // Try zero MV first // FIXME should really use something like near/nearest MV and/or MV prediction err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].pre[0].buf, xd->plane[0].pre[0].stride); dst_mv->as_int = 0; return err; } static int find_best_16x16_intra(VP9_COMP *cpi, PREDICTION_MODE *pbest_mode) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; PREDICTION_MODE best_mode = -1, mode; unsigned int best_err = INT_MAX; // calculate SATD for each intra prediction mode; // we're intentionally not doing 4x4, we just want a rough estimate for (mode = DC_PRED; mode <= TM_PRED; mode++) { unsigned int err; xd->mi[0]->mode = mode; vp9_predict_intra_block(xd, 2, TX_16X16, mode, x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride, 0, 0, 0); err = vpx_sad16x16(x->plane[0].src.buf, x->plane[0].src.stride, xd->plane[0].dst.buf, xd->plane[0].dst.stride); // find best if (err < best_err) { best_err = err; best_mode = mode; } } if (pbest_mode) *pbest_mode = best_mode; return best_err; } static void update_mbgraph_mb_stats(VP9_COMP *cpi, MBGRAPH_MB_STATS *stats, YV12_BUFFER_CONFIG *buf, int mb_y_offset, YV12_BUFFER_CONFIG *golden_ref, const MV *prev_golden_ref_mv, YV12_BUFFER_CONFIG *alt_ref, int mb_row, int mb_col) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; int intra_error; VP9_COMMON *cm = &cpi->common; // FIXME in practice we're completely ignoring chroma here x->plane[0].src.buf = buf->y_buffer + mb_y_offset; x->plane[0].src.stride = buf->y_stride; xd->plane[0].dst.buf = get_frame_new_buffer(cm)->y_buffer + mb_y_offset; xd->plane[0].dst.stride = get_frame_new_buffer(cm)->y_stride; // do intra 16x16 prediction intra_error = find_best_16x16_intra(cpi, &stats->ref[INTRA_FRAME].m.mode); if (intra_error <= 0) intra_error = 1; stats->ref[INTRA_FRAME].err = intra_error; // Golden frame MV search, if it exists and is different than last frame if (golden_ref) { int g_motion_error; xd->plane[0].pre[0].buf = golden_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = golden_ref->y_stride; g_motion_error = do_16x16_motion_search(cpi, prev_golden_ref_mv, &stats->ref[GOLDEN_FRAME].m.mv, mb_row, mb_col); stats->ref[GOLDEN_FRAME].err = g_motion_error; } else { stats->ref[GOLDEN_FRAME].err = INT_MAX; stats->ref[GOLDEN_FRAME].m.mv.as_int = 0; } // Do an Alt-ref frame MV search, if it exists and is different than // last/golden frame. if (alt_ref) { int a_motion_error; xd->plane[0].pre[0].buf = alt_ref->y_buffer + mb_y_offset; xd->plane[0].pre[0].stride = alt_ref->y_stride; a_motion_error = do_16x16_zerozero_search(cpi, &stats->ref[ALTREF_FRAME].m.mv); stats->ref[ALTREF_FRAME].err = a_motion_error; } else { stats->ref[ALTREF_FRAME].err = INT_MAX; stats->ref[ALTREF_FRAME].m.mv.as_int = 0; } } static void update_mbgraph_frame_stats(VP9_COMP *cpi, MBGRAPH_FRAME_STATS *stats, YV12_BUFFER_CONFIG *buf, YV12_BUFFER_CONFIG *golden_ref, YV12_BUFFER_CONFIG *alt_ref) { MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &x->e_mbd; VP9_COMMON *const cm = &cpi->common; int mb_col, mb_row, offset = 0; int mb_y_offset = 0, arf_y_offset = 0, gld_y_offset = 0; MV gld_top_mv = { 0, 0 }; MODE_INFO mi_local; MODE_INFO mi_above, mi_left; vp9_zero(mi_local); // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. x->mv_limits.row_min = -BORDER_MV_PIXELS_B16; x->mv_limits.row_max = (cm->mb_rows - 1) * 8 + BORDER_MV_PIXELS_B16; // Signal to vp9_predict_intra_block() that above is not available xd->above_mi = NULL; xd->plane[0].dst.stride = buf->y_stride; xd->plane[0].pre[0].stride = buf->y_stride; xd->plane[1].dst.stride = buf->uv_stride; xd->mi[0] = &mi_local; mi_local.sb_type = BLOCK_16X16; mi_local.ref_frame[0] = LAST_FRAME; mi_local.ref_frame[1] = NONE; for (mb_row = 0; mb_row < cm->mb_rows; mb_row++) { MV gld_left_mv = gld_top_mv; int mb_y_in_offset = mb_y_offset; int arf_y_in_offset = arf_y_offset; int gld_y_in_offset = gld_y_offset; // Set up limit values for motion vectors to prevent them extending outside // the UMV borders. x->mv_limits.col_min = -BORDER_MV_PIXELS_B16; x->mv_limits.col_max = (cm->mb_cols - 1) * 8 + BORDER_MV_PIXELS_B16; // Signal to vp9_predict_intra_block() that left is not available xd->left_mi = NULL; for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { MBGRAPH_MB_STATS *mb_stats = &stats->mb_stats[offset + mb_col]; update_mbgraph_mb_stats(cpi, mb_stats, buf, mb_y_in_offset, golden_ref, &gld_left_mv, alt_ref, mb_row, mb_col); gld_left_mv = mb_stats->ref[GOLDEN_FRAME].m.mv.as_mv; if (mb_col == 0) { gld_top_mv = gld_left_mv; } // Signal to vp9_predict_intra_block() that left is available xd->left_mi = &mi_left; mb_y_in_offset += 16; gld_y_in_offset += 16; arf_y_in_offset += 16; x->mv_limits.col_min -= 16; x->mv_limits.col_max -= 16; } // Signal to vp9_predict_intra_block() that above is available xd->above_mi = &mi_above; mb_y_offset += buf->y_stride * 16; gld_y_offset += golden_ref->y_stride * 16; if (alt_ref) arf_y_offset += alt_ref->y_stride * 16; x->mv_limits.row_min -= 16; x->mv_limits.row_max -= 16; offset += cm->mb_cols; } } // void separate_arf_mbs_byzz static void separate_arf_mbs(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int mb_col, mb_row, offset, i; int mi_row, mi_col; int ncnt[4] = { 0 }; int n_frames = cpi->mbgraph_n_frames; int *arf_not_zz; CHECK_MEM_ERROR( cm, arf_not_zz, vpx_calloc(cm->mb_rows * cm->mb_cols * sizeof(*arf_not_zz), 1)); // We are not interested in results beyond the alt ref itself. if (n_frames > cpi->rc.frames_till_gf_update_due) n_frames = cpi->rc.frames_till_gf_update_due; // defer cost to reference frames for (i = n_frames - 1; i >= 0; i--) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; for (offset = 0, mb_row = 0; mb_row < cm->mb_rows; offset += cm->mb_cols, mb_row++) { for (mb_col = 0; mb_col < cm->mb_cols; mb_col++) { MBGRAPH_MB_STATS *mb_stats = &frame_stats->mb_stats[offset + mb_col]; int altref_err = mb_stats->ref[ALTREF_FRAME].err; int intra_err = mb_stats->ref[INTRA_FRAME].err; int golden_err = mb_stats->ref[GOLDEN_FRAME].err; // Test for altref vs intra and gf and that its mv was 0,0. if (altref_err > 1000 || altref_err > intra_err || altref_err > golden_err) { arf_not_zz[offset + mb_col]++; } } } } // arf_not_zz is indexed by MB, but this loop is indexed by MI to avoid out // of bound access in segmentation_map for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { // If any of the blocks in the sequence failed then the MB // goes in segment 0 if (arf_not_zz[mi_row / 2 * cm->mb_cols + mi_col / 2]) { ncnt[0]++; cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 0; } else { cpi->segmentation_map[mi_row * cm->mi_cols + mi_col] = 1; ncnt[1]++; } } } // Only bother with segmentation if over 10% of the MBs in static segment // if ( ncnt[1] && (ncnt[0] / ncnt[1] < 10) ) if (1) { // Note % of blocks that are marked as static if (cm->MBs) cpi->static_mb_pct = (ncnt[1] * 100) / (cm->mi_rows * cm->mi_cols); // This error case should not be reachable as this function should // never be called with the common data structure uninitialized. else cpi->static_mb_pct = 0; vp9_enable_segmentation(&cm->seg); } else { cpi->static_mb_pct = 0; vp9_disable_segmentation(&cm->seg); } // Free localy allocated storage vpx_free(arf_not_zz); } void vp9_update_mbgraph_stats(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; int i, n_frames = vp9_lookahead_depth(cpi->lookahead); YV12_BUFFER_CONFIG *golden_ref = get_ref_frame_buffer(cpi, GOLDEN_FRAME); assert(golden_ref != NULL); // we need to look ahead beyond where the ARF transitions into // being a GF - so exit if we don't look ahead beyond that if (n_frames <= cpi->rc.frames_till_gf_update_due) return; if (n_frames > MAX_LAG_BUFFERS) n_frames = MAX_LAG_BUFFERS; cpi->mbgraph_n_frames = n_frames; for (i = 0; i < n_frames; i++) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; memset(frame_stats->mb_stats, 0, cm->mb_rows * cm->mb_cols * sizeof(*cpi->mbgraph_stats[i].mb_stats)); } // do motion search to find contribution of each reference to data // later on in this GF group // FIXME really, the GF/last MC search should be done forward, and // the ARF MC search backwards, to get optimal results for MV caching for (i = 0; i < n_frames; i++) { MBGRAPH_FRAME_STATS *frame_stats = &cpi->mbgraph_stats[i]; struct lookahead_entry *q_cur = vp9_lookahead_peek(cpi->lookahead, i); assert(q_cur != NULL); update_mbgraph_frame_stats(cpi, frame_stats, &q_cur->img, golden_ref, cpi->Source); } vpx_clear_system_state(); separate_arf_mbs(cpi); } libvpx-1.8.2/vp9/encoder/vp9_mbgraph.h000066400000000000000000000016211357355204000175560ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_MBGRAPH_H_ #define VPX_VP9_ENCODER_VP9_MBGRAPH_H_ #ifdef __cplusplus extern "C" { #endif typedef struct { struct { int err; union { int_mv mv; PREDICTION_MODE mode; } m; } ref[MAX_REF_FRAMES]; } MBGRAPH_MB_STATS; typedef struct { MBGRAPH_MB_STATS *mb_stats; } MBGRAPH_FRAME_STATS; struct VP9_COMP; void vp9_update_mbgraph_stats(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_MBGRAPH_H_ libvpx-1.8.2/vp9/encoder/vp9_mcomp.c000066400000000000000000003440021357355204000172470ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_mcomp.h" // #define NEW_DIAMOND_SEARCH void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv) { int col_min = (mv->col >> 3) - MAX_FULL_PEL_VAL + (mv->col & 7 ? 1 : 0); int row_min = (mv->row >> 3) - MAX_FULL_PEL_VAL + (mv->row & 7 ? 1 : 0); int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL; col_min = VPXMAX(col_min, (MV_LOW >> 3) + 1); row_min = VPXMAX(row_min, (MV_LOW >> 3) + 1); col_max = VPXMIN(col_max, (MV_UPP >> 3) - 1); row_max = VPXMIN(row_max, (MV_UPP >> 3) - 1); // Get intersection of UMV window and valid MV window to reduce # of checks // in diamond search. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; if (mv_limits->row_max > row_max) mv_limits->row_max = row_max; } void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, const MvLimits *umv_window_limits, const MV *ref_mv) { subpel_mv_limits->col_min = VPXMAX(umv_window_limits->col_min * 8, ref_mv->col - MAX_FULL_PEL_VAL * 8); subpel_mv_limits->col_max = VPXMIN(umv_window_limits->col_max * 8, ref_mv->col + MAX_FULL_PEL_VAL * 8); subpel_mv_limits->row_min = VPXMAX(umv_window_limits->row_min * 8, ref_mv->row - MAX_FULL_PEL_VAL * 8); subpel_mv_limits->row_max = VPXMIN(umv_window_limits->row_max * 8, ref_mv->row + MAX_FULL_PEL_VAL * 8); subpel_mv_limits->col_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->col_min); subpel_mv_limits->col_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->col_max); subpel_mv_limits->row_min = VPXMAX(MV_LOW + 1, subpel_mv_limits->row_min); subpel_mv_limits->row_max = VPXMIN(MV_UPP - 1, subpel_mv_limits->row_max); } int vp9_init_search_range(int size) { int sr = 0; // Minimum search size no matter what the passed in value. size = VPXMAX(16, size); while ((size << sr) < MAX_FULL_PEL_VAL) sr++; sr = VPXMIN(sr, MAX_MVSEARCH_STEPS - 2); return sr; } static INLINE int mv_cost(const MV *mv, const int *joint_cost, int *const comp_cost[2]) { assert(mv->row >= -MV_MAX && mv->row < MV_MAX); assert(mv->col >= -MV_MAX && mv->col < MV_MAX); return joint_cost[vp9_get_mv_joint(mv)] + comp_cost[0][mv->row] + comp_cost[1][mv->col]; } int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, int *mvcost[2], int weight) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; return ROUND_POWER_OF_TWO(mv_cost(&diff, mvjcost, mvcost) * weight, 7); } #define PIXEL_TRANSFORM_ERROR_SCALE 4 static int mv_err_cost(const MV *mv, const MV *ref, const int *mvjcost, int *mvcost[2], int error_per_bit) { if (mvcost) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; return (int)ROUND64_POWER_OF_TWO( (int64_t)mv_cost(&diff, mvjcost, mvcost) * error_per_bit, RDDIV_BITS + VP9_PROB_COST_SHIFT - RD_EPB_SHIFT + PIXEL_TRANSFORM_ERROR_SCALE); } return 0; } static int mvsad_err_cost(const MACROBLOCK *x, const MV *mv, const MV *ref, int sad_per_bit) { const MV diff = { mv->row - ref->row, mv->col - ref->col }; return ROUND_POWER_OF_TWO( (unsigned)mv_cost(&diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, VP9_PROB_COST_SHIFT); } void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride) { int len; int ss_count = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 4 search sites per step. const MV ss_mvs[] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len } }; int i; for (i = 0; i < 4; ++i, ++ss_count) { cfg->ss_mv[ss_count] = ss_mvs[i]; cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; } } cfg->searches_per_step = 4; cfg->total_steps = ss_count / cfg->searches_per_step; } void vp9_init3smotion_compensation(search_site_config *cfg, int stride) { int len; int ss_count = 0; for (len = MAX_FIRST_STEP; len > 0; len /= 2) { // Generate offsets for 8 search sites per step. const MV ss_mvs[8] = { { -len, 0 }, { len, 0 }, { 0, -len }, { 0, len }, { -len, -len }, { -len, len }, { len, -len }, { len, len } }; int i; for (i = 0; i < 8; ++i, ++ss_count) { cfg->ss_mv[ss_count] = ss_mvs[i]; cfg->ss_os[ss_count] = ss_mvs[i].row * stride + ss_mvs[i].col; } } cfg->searches_per_step = 8; cfg->total_steps = ss_count / cfg->searches_per_step; } // convert motion vector component to offset for sv[a]f calc static INLINE int sp(int x) { return x & 7; } static INLINE const uint8_t *pre(const uint8_t *buf, int stride, int r, int c) { return &buf[(r >> 3) * stride + (c >> 3)]; } #if CONFIG_VP9_HIGHBITDEPTH /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ int64_t tmpmse; \ const MV mv = { r, c }; \ const MV ref_mv = { rr, rc }; \ if (second_pred == NULL) { \ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ src_stride, &sse); \ } else { \ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ src_stride, &sse, second_pred); \ } \ tmpmse = thismse; \ tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ if (tmpmse >= INT_MAX) { \ v = INT_MAX; \ } else if ((v = (uint32_t)tmpmse) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ } else { \ v = INT_MAX; \ } #else /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ const MV mv = { r, c }; \ const MV ref_mv = { rr, rc }; \ if (second_pred == NULL) \ thismse = vfp->svf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ src_stride, &sse); \ else \ thismse = vfp->svaf(pre(y, y_stride, r, c), y_stride, sp(c), sp(r), z, \ src_stride, &sse, second_pred); \ if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ thismse) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ } else { \ v = INT_MAX; \ } #endif #define FIRST_LEVEL_CHECKS \ { \ unsigned int left, right, up, down, diag; \ CHECK_BETTER(left, tr, tc - hstep); \ CHECK_BETTER(right, tr, tc + hstep); \ CHECK_BETTER(up, tr - hstep, tc); \ CHECK_BETTER(down, tr + hstep, tc); \ whichdir = (left < right ? 0 : 1) + (up < down ? 0 : 2); \ switch (whichdir) { \ case 0: CHECK_BETTER(diag, tr - hstep, tc - hstep); break; \ case 1: CHECK_BETTER(diag, tr - hstep, tc + hstep); break; \ case 2: CHECK_BETTER(diag, tr + hstep, tc - hstep); break; \ case 3: CHECK_BETTER(diag, tr + hstep, tc + hstep); break; \ } \ } #define SECOND_LEVEL_CHECKS \ { \ int kr, kc; \ unsigned int second; \ if (tr != br && tc != bc) { \ kr = br - tr; \ kc = bc - tc; \ CHECK_BETTER(second, tr + kr, tc + 2 * kc); \ CHECK_BETTER(second, tr + 2 * kr, tc + kc); \ } else if (tr == br && tc != bc) { \ kc = bc - tc; \ CHECK_BETTER(second, tr + hstep, tc + 2 * kc); \ CHECK_BETTER(second, tr - hstep, tc + 2 * kc); \ switch (whichdir) { \ case 0: \ case 1: CHECK_BETTER(second, tr + hstep, tc + kc); break; \ case 2: \ case 3: CHECK_BETTER(second, tr - hstep, tc + kc); break; \ } \ } else if (tr != br && tc == bc) { \ kr = br - tr; \ CHECK_BETTER(second, tr + 2 * kr, tc + hstep); \ CHECK_BETTER(second, tr + 2 * kr, tc - hstep); \ switch (whichdir) { \ case 0: \ case 2: CHECK_BETTER(second, tr + kr, tc + hstep); break; \ case 1: \ case 3: CHECK_BETTER(second, tr + kr, tc - hstep); break; \ } \ } \ } #define SETUP_SUBPEL_SEARCH \ const uint8_t *const z = x->plane[0].src.buf; \ const int src_stride = x->plane[0].src.stride; \ const MACROBLOCKD *xd = &x->e_mbd; \ unsigned int besterr = UINT_MAX; \ unsigned int sse; \ unsigned int whichdir; \ int thismse; \ const unsigned int halfiters = iters_per_step; \ const unsigned int quarteriters = iters_per_step; \ const unsigned int eighthiters = iters_per_step; \ const int y_stride = xd->plane[0].pre[0].stride; \ const int offset = bestmv->row * y_stride + bestmv->col; \ const uint8_t *const y = xd->plane[0].pre[0].buf; \ \ int rr = ref_mv->row; \ int rc = ref_mv->col; \ int br = bestmv->row * 8; \ int bc = bestmv->col * 8; \ int hstep = 4; \ int minc, maxc, minr, maxr; \ int tr = br; \ int tc = bc; \ MvLimits subpel_mv_limits; \ \ vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); \ minc = subpel_mv_limits.col_min; \ maxc = subpel_mv_limits.col_max; \ minr = subpel_mv_limits.row_min; \ maxr = subpel_mv_limits.row_max; \ \ bestmv->row *= 8; \ bestmv->col *= 8; static unsigned int setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const MV *ref_mv, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, const uint8_t *const src, const int src_stride, const uint8_t *const y, int y_stride, const uint8_t *second_pred, int w, int h, int offset, int *mvjcost, int *mvcost[2], uint32_t *sse1, uint32_t *distortion) { #if CONFIG_VP9_HIGHBITDEPTH uint64_t besterr; if (second_pred != NULL) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, h, CONVERT_TO_SHORTPTR(y + offset), y_stride); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src, src_stride, sse1); } else { DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } } else { besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); } *distortion = (uint32_t)besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); if (besterr >= UINT_MAX) return UINT_MAX; return (uint32_t)besterr; #else uint32_t besterr; (void)xd; if (second_pred != NULL) { DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, y + offset, y_stride); besterr = vfp->vf(comp_pred, w, src, src_stride, sse1); } else { besterr = vfp->vf(y + offset, y_stride, src, src_stride, sse1); } *distortion = besterr; besterr += mv_err_cost(bestmv, ref_mv, mvjcost, mvcost, error_per_bit); return besterr; #endif // CONFIG_VP9_HIGHBITDEPTH } static INLINE int64_t divide_and_round(const int64_t n, const int64_t d) { return ((n < 0) ^ (d < 0)) ? ((n - d / 2) / d) : ((n + d / 2) / d); } static INLINE int is_cost_list_wellbehaved(int *cost_list) { return cost_list[0] < cost_list[1] && cost_list[0] < cost_list[2] && cost_list[0] < cost_list[3] && cost_list[0] < cost_list[4]; } // Returns surface minima estimate at given precision in 1/2^n bits. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C // For a given set of costs S0, S1, S2, S3, S4 at points // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, // the solution for the location of the minima (x0, y0) is given by: // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). // The code below is an integerized version of that. static void get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { const int64_t x0 = (int64_t)cost_list[1] - cost_list[3]; const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3]; const int64_t x1 = (int64_t)cost_list[4] - cost_list[2]; const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2]; const int b = 1 << (bits - 1); *ic = (int)divide_and_round(x0 * b, y0); *ir = (int)divide_and_round(x1 * b, y1); } uint32_t vp9_skip_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); (void)halfiters; (void)quarteriters; (void)eighthiters; (void)whichdir; (void)allow_hp; (void)forced_stop; (void)hstep; (void)rr; (void)rc; (void)minr; (void)minc; (void)maxr; (void)maxc; (void)tr; (void)tc; (void)sse; (void)thismse; (void)cost_list; (void)use_accurate_subpel_search; return besterr; } uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); (void)halfiters; (void)quarteriters; (void)eighthiters; (void)whichdir; (void)allow_hp; (void)forced_stop; (void)hstep; (void)use_accurate_subpel_search; if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { int ir, ic; unsigned int minpt = INT_MAX; get_cost_surf_min(cost_list, &ir, &ic, 2); if (ir != 0 || ic != 0) { CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic); } } else { FIRST_LEVEL_CHECKS; if (halfiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } } } tr = br; tc = bc; if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } } bestmv->row = br; bestmv->col = bc; return besterr; } uint32_t vp9_find_best_sub_pixel_tree_pruned_more( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; (void)use_accurate_subpel_search; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { unsigned int minpt; int ir, ic; get_cost_surf_min(cost_list, &ir, &ic, 1); if (ir != 0 || ic != 0) { CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep); } } else { FIRST_LEVEL_CHECKS; if (halfiters > 1) { SECOND_LEVEL_CHECKS; } } // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { tr = br; tc = bc; hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } } if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { tr = br; tc = bc; hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } } // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point. (void)tr; (void)tc; bestmv->row = br; bestmv->col = bc; return besterr; } uint32_t vp9_find_best_sub_pixel_tree_pruned( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { SETUP_SUBPEL_SEARCH; (void)use_accurate_subpel_search; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX && cost_list[2] != INT_MAX && cost_list[3] != INT_MAX && cost_list[4] != INT_MAX) { unsigned int left, right, up, down, diag; whichdir = (cost_list[1] < cost_list[3] ? 0 : 1) + (cost_list[2] < cost_list[4] ? 0 : 2); switch (whichdir) { case 0: CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(down, tr + hstep, tc); CHECK_BETTER(diag, tr + hstep, tc - hstep); break; case 1: CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(down, tr + hstep, tc); CHECK_BETTER(diag, tr + hstep, tc + hstep); break; case 2: CHECK_BETTER(left, tr, tc - hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(diag, tr - hstep, tc - hstep); break; case 3: CHECK_BETTER(right, tr, tc + hstep); CHECK_BETTER(up, tr - hstep, tc); CHECK_BETTER(diag, tr - hstep, tc + hstep); break; } } else { FIRST_LEVEL_CHECKS; if (halfiters > 1) { SECOND_LEVEL_CHECKS; } } tr = br; tc = bc; // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (quarteriters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) { hstep >>= 1; FIRST_LEVEL_CHECKS; if (eighthiters > 1) { SECOND_LEVEL_CHECKS; } tr = br; tc = bc; } // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point. (void)tr; (void)tc; bestmv->row = br; bestmv->col = bc; return besterr; } /* clang-format off */ static const MV search_step_table[12] = { // left, right, up, down { 0, -4 }, { 0, 4 }, { -4, 0 }, { 4, 0 }, { 0, -2 }, { 0, 2 }, { -2, 0 }, { 2, 0 }, { 0, -1 }, { 0, 1 }, { -1, 0 }, { 1, 0 } }; /* clang-format on */ static int accurate_sub_pel_search( const MACROBLOCKD *xd, const MV *this_mv, const struct scale_factors *sf, const InterpKernel *kernel, const vp9_variance_fn_ptr_t *vfp, const uint8_t *const src_address, const int src_stride, const uint8_t *const pre_address, int y_stride, const uint8_t *second_pred, int w, int h, uint32_t *sse) { #if CONFIG_VP9_HIGHBITDEPTH uint64_t besterr; assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); assert(w != 0 && h != 0); if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { DECLARE_ALIGNED(16, uint16_t, pred16[64 * 64]); vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(pre_address), y_stride, pred16, w, this_mv, sf, w, h, 0, kernel, MV_PRECISION_Q3, 0, 0, xd->bd); if (second_pred != NULL) { DECLARE_ALIGNED(16, uint16_t, comp_pred16[64 * 64]); vpx_highbd_comp_avg_pred(comp_pred16, CONVERT_TO_SHORTPTR(second_pred), w, h, pred16, w); besterr = vfp->vf(CONVERT_TO_BYTEPTR(comp_pred16), w, src_address, src_stride, sse); } else { besterr = vfp->vf(CONVERT_TO_BYTEPTR(pred16), w, src_address, src_stride, sse); } } else { DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 0, kernel, MV_PRECISION_Q3, 0, 0); if (second_pred != NULL) { DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); } else { besterr = vfp->vf(pred, w, src_address, src_stride, sse); } } if (besterr >= UINT_MAX) return UINT_MAX; return (int)besterr; #else int besterr; DECLARE_ALIGNED(16, uint8_t, pred[64 * 64]); assert(sf->x_step_q4 == 16 && sf->y_step_q4 == 16); assert(w != 0 && h != 0); (void)xd; vp9_build_inter_predictor(pre_address, y_stride, pred, w, this_mv, sf, w, h, 0, kernel, MV_PRECISION_Q3, 0, 0); if (second_pred != NULL) { DECLARE_ALIGNED(16, uint8_t, comp_pred[64 * 64]); vpx_comp_avg_pred(comp_pred, second_pred, w, h, pred, w); besterr = vfp->vf(comp_pred, w, src_address, src_stride, sse); } else { besterr = vfp->vf(pred, w, src_address, src_stride, sse); } return besterr; #endif // CONFIG_VP9_HIGHBITDEPTH } // TODO(yunqing): this part can be further refactored. #if CONFIG_VP9_HIGHBITDEPTH /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER1(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ int64_t tmpmse; \ const MV mv = { r, c }; \ const MV ref_mv = { rr, rc }; \ thismse = \ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ y, y_stride, second_pred, w, h, &sse); \ tmpmse = thismse; \ tmpmse += mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit); \ if (tmpmse >= INT_MAX) { \ v = INT_MAX; \ } else if ((v = (uint32_t)tmpmse) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ } else { \ v = INT_MAX; \ } #else /* checks if (r, c) has better score than previous best */ #define CHECK_BETTER1(v, r, c) \ if (c >= minc && c <= maxc && r >= minr && r <= maxr) { \ const MV mv = { r, c }; \ const MV ref_mv = { rr, rc }; \ thismse = \ accurate_sub_pel_search(xd, &mv, x->me_sf, kernel, vfp, z, src_stride, \ y, y_stride, second_pred, w, h, &sse); \ if ((v = mv_err_cost(&mv, &ref_mv, mvjcost, mvcost, error_per_bit) + \ thismse) < besterr) { \ besterr = v; \ br = r; \ bc = c; \ *distortion = thismse; \ *sse1 = sse; \ } \ } else { \ v = INT_MAX; \ } #endif uint32_t vp9_find_best_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { const uint8_t *const z = x->plane[0].src.buf; const uint8_t *const src_address = z; const int src_stride = x->plane[0].src.stride; const MACROBLOCKD *xd = &x->e_mbd; unsigned int besterr = UINT_MAX; unsigned int sse; int thismse; const int y_stride = xd->plane[0].pre[0].stride; const int offset = bestmv->row * y_stride + bestmv->col; const uint8_t *const y = xd->plane[0].pre[0].buf; int rr = ref_mv->row; int rc = ref_mv->col; int br = bestmv->row * 8; int bc = bestmv->col * 8; int hstep = 4; int iter, round = 3 - forced_stop; int minc, maxc, minr, maxr; int tr = br; int tc = bc; const MV *search_step = search_step_table; int idx, best_idx = -1; unsigned int cost_array[5]; int kr, kc; MvLimits subpel_mv_limits; // TODO(yunqing): need to add 4-tap filter optimization to speed up the // encoder. const InterpKernel *kernel = (use_accurate_subpel_search > 0) ? ((use_accurate_subpel_search == USE_4_TAPS) ? vp9_filter_kernels[FOURTAP] : ((use_accurate_subpel_search == USE_8_TAPS) ? vp9_filter_kernels[EIGHTTAP] : vp9_filter_kernels[EIGHTTAP_SHARP])) : vp9_filter_kernels[BILINEAR]; vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); minc = subpel_mv_limits.col_min; maxc = subpel_mv_limits.col_max; minr = subpel_mv_limits.row_min; maxr = subpel_mv_limits.row_max; if (!(allow_hp && use_mv_hp(ref_mv))) if (round == 3) round = 2; bestmv->row *= 8; bestmv->col *= 8; besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z, src_stride, y, y_stride, second_pred, w, h, offset, mvjcost, mvcost, sse1, distortion); (void)cost_list; // to silence compiler warning for (iter = 0; iter < round; ++iter) { // Check vertical and horizontal sub-pixel positions. for (idx = 0; idx < 4; ++idx) { tr = br + search_step[idx].row; tc = bc + search_step[idx].col; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { MV this_mv; this_mv.row = tr; this_mv.col = tc; if (use_accurate_subpel_search) { thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, src_address, src_stride, y, y_stride, second_pred, w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); } cost_array[idx] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (cost_array[idx] < besterr) { best_idx = idx; besterr = cost_array[idx]; *distortion = thismse; *sse1 = sse; } } else { cost_array[idx] = UINT_MAX; } } // Check diagonal sub-pixel position kc = (cost_array[0] <= cost_array[1] ? -hstep : hstep); kr = (cost_array[2] <= cost_array[3] ? -hstep : hstep); tc = bc + kc; tr = br + kr; if (tc >= minc && tc <= maxc && tr >= minr && tr <= maxr) { MV this_mv = { tr, tc }; if (use_accurate_subpel_search) { thismse = accurate_sub_pel_search(xd, &this_mv, x->me_sf, kernel, vfp, src_address, src_stride, y, y_stride, second_pred, w, h, &sse); } else { const uint8_t *const pre_address = y + (tr >> 3) * y_stride + (tc >> 3); if (second_pred == NULL) thismse = vfp->svf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse); else thismse = vfp->svaf(pre_address, y_stride, sp(tc), sp(tr), src_address, src_stride, &sse, second_pred); } cost_array[4] = thismse + mv_err_cost(&this_mv, ref_mv, mvjcost, mvcost, error_per_bit); if (cost_array[4] < besterr) { best_idx = 4; besterr = cost_array[4]; *distortion = thismse; *sse1 = sse; } } else { cost_array[idx] = UINT_MAX; } if (best_idx < 4 && best_idx >= 0) { br += search_step[best_idx].row; bc += search_step[best_idx].col; } else if (best_idx == 4) { br = tr; bc = tc; } if (iters_per_step > 0 && best_idx != -1) { unsigned int second; const int br0 = br; const int bc0 = bc; assert(tr == br || tc == bc); if (tr == br && tc != bc) { kc = bc - tc; if (iters_per_step == 1) { if (use_accurate_subpel_search) { CHECK_BETTER1(second, br0, bc0 + kc); } else { CHECK_BETTER(second, br0, bc0 + kc); } } } else if (tr != br && tc == bc) { kr = br - tr; if (iters_per_step == 1) { if (use_accurate_subpel_search) { CHECK_BETTER1(second, br0 + kr, bc0); } else { CHECK_BETTER(second, br0 + kr, bc0); } } } if (iters_per_step > 1) { if (use_accurate_subpel_search) { CHECK_BETTER1(second, br0 + kr, bc0); CHECK_BETTER1(second, br0, bc0 + kc); if (br0 != br || bc0 != bc) { CHECK_BETTER1(second, br0 + kr, bc0 + kc); } } else { CHECK_BETTER(second, br0 + kr, bc0); CHECK_BETTER(second, br0, bc0 + kc); if (br0 != br || bc0 != bc) { CHECK_BETTER(second, br0 + kr, bc0 + kc); } } } } search_step += 4; hstep >>= 1; best_idx = -1; } // Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point. (void)tr; (void)tc; bestmv->row = br; bestmv->col = bc; return besterr; } #undef CHECK_BETTER #undef CHECK_BETTER1 static INLINE int check_bounds(const MvLimits *mv_limits, int row, int col, int range) { return ((row - range) >= mv_limits->row_min) & ((row + range) <= mv_limits->row_max) & ((col - range) >= mv_limits->col_min) & ((col + range) <= mv_limits->col_max); } static INLINE int is_mv_in(const MvLimits *mv_limits, const MV *mv) { return (mv->col >= mv_limits->col_min) && (mv->col <= mv_limits->col_max) && (mv->row >= mv_limits->row_min) && (mv->row <= mv_limits->row_max); } #define CHECK_BETTER \ { \ if (thissad < bestsad) { \ if (use_mvcost) \ thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); \ if (thissad < bestsad) { \ bestsad = thissad; \ best_site = i; \ } \ } \ } #define MAX_PATTERN_SCALES 11 #define MAX_PATTERN_CANDIDATES 8 // max number of canddiates per scale #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates // Calculate and return a sad+mvcost list around an integer best pel. static INLINE void calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, int sadpb, const vp9_variance_fn_ptr_t *fn_ptr, const MV *best_mv, int *cost_list) { static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; int br = best_mv->row; int bc = best_mv->col; MV this_mv; int i; unsigned int sse; this_mv.row = br; this_mv.col = bc; cost_list[0] = fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride, &sse) + mvsad_err_cost(x, &this_mv, &fcenter_mv, sadpb); if (check_bounds(&x->mv_limits, br, bc, 1)) { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride, &sse) + mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, x->errorperbit); } } else { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) cost_list[i + 1] = INT_MAX; else cost_list[i + 1] = fn_ptr->vf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride, &sse) + mv_err_cost(&this_mv, &fcenter_mv, x->nmvjointcost, x->mvcost, x->errorperbit); } } } // Generic pattern search function that searches over multiple scales. // Each scale can have a different number of candidates and shape of // candidates as indicated in the num_candidates and candidates arrays // passed into this function // static int vp9_pattern_search( const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, const int num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { const MACROBLOCKD *const xd = &x->e_mbd; static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; int i, s, t; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; int bestsad = INT_MAX; int thissad; int k = -1; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; int best_init_s = search_param_to_steps[search_param]; // adjust ref_mv to make sure it is within MV range clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); br = ref_mv->row; bc = ref_mv->col; // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of // further steps around it. if (do_init_search) { s = best_init_s; best_init_s = -1; for (t = 0; t <= s; ++t) { int best_site = -1; if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row, bc + candidates[t][i].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row, bc + candidates[t][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site == -1) { continue; } else { best_init_s = t; k = best_site; } } if (best_init_s != -1) { br += candidates[best_init_s][k].row; bc += candidates[best_init_s][k].col; } } // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { int best_site = -1; s = best_init_s; do { // No need to search all 6 points the 1st time if initial search was used if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site == -1) { continue; } else { br += candidates[s][best_site].row; bc += candidates[s][best_site].col; k = best_site; } } do { int next_chkpts_indices[PATTERN_CANDIDATES_REF]; best_site = -1; next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; next_chkpts_indices[1] = k; next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site != -1) { k = next_chkpts_indices[best_site]; br += candidates[s][k].row; bc += candidates[s][k].col; } } while (best_site != -1); } while (s--); } // Returns the one-away integer pel sad values around the best as follows: // cost_list[0]: cost at the best integer pel // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel if (cost_list) { const MV best_mv = { br, bc }; calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, &best_mv, cost_list); } best_mv->row = br; best_mv->col = bc; return bestsad; } // A specialized function where the smallest scale search candidates // are 4 1-away neighbors, and cost_list is non-null // TODO(debargha): Merge this function with the one above. Also remove // use_mvcost option since it is always 1, to save unnecessary branches. static int vp9_pattern_search_sad( const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, const int num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { const MACROBLOCKD *const xd = &x->e_mbd; static const int search_param_to_steps[MAX_MVSEARCH_STEPS] = { 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, }; int i, s, t; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; int bestsad = INT_MAX; int thissad; int k = -1; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; int best_init_s = search_param_to_steps[search_param]; // adjust ref_mv to make sure it is within MV range clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); br = ref_mv->row; bc = ref_mv->col; if (cost_list != NULL) { cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; } // Work out the start point for the search bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit); // Search all possible scales upto the search param around the center point // pick the scale of the point that is best as the starting scale of // further steps around it. if (do_init_search) { s = best_init_s; best_init_s = -1; for (t = 0; t <= s; ++t) { int best_site = -1; if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row, bc + candidates[t][i].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row, bc + candidates[t][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site == -1) { continue; } else { best_init_s = t; k = best_site; } } if (best_init_s != -1) { br += candidates[best_init_s][k].row; bc += candidates[best_init_s][k].col; } } // If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { int do_sad = (num_candidates[0] == 4 && cost_list != NULL); int best_site = -1; s = best_init_s; for (; s >= do_sad; s--) { if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site == -1) { continue; } else { br += candidates[s][best_site].row; bc += candidates[s][best_site].col; k = best_site; } } do { int next_chkpts_indices[PATTERN_CANDIDATES_REF]; best_site = -1; next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; next_chkpts_indices[1] = k; next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site != -1) { k = next_chkpts_indices[best_site]; br += candidates[s][k].row; bc += candidates[s][k].col; } } while (best_site != -1); } // Note: If we enter the if below, then cost_list must be non-NULL. if (s == 0) { cost_list[0] = bestsad; if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; cost_list[i + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row, bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue; cost_list[i + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site != -1) { br += candidates[s][best_site].row; bc += candidates[s][best_site].col; k = best_site; } } while (best_site != -1) { int next_chkpts_indices[PATTERN_CANDIDATES_REF]; best_site = -1; next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1; next_chkpts_indices[1] = k; next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1; cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] = INT_MAX; cost_list[((k + 2) % 4) + 1] = cost_list[0]; cost_list[0] = bestsad; if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; cost_list[next_chkpts_indices[i] + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = { br + candidates[s][next_chkpts_indices[i]].row, bc + candidates[s][next_chkpts_indices[i]].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) { cost_list[next_chkpts_indices[i] + 1] = INT_MAX; continue; } cost_list[next_chkpts_indices[i] + 1] = thissad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); CHECK_BETTER } } if (best_site != -1) { k = next_chkpts_indices[best_site]; br += candidates[s][k].row; bc += candidates[s][k].col; } } } } // Returns the one-away integer pel sad values around the best as follows: // cost_list[0]: sad at the best integer pel // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel if (cost_list) { static const MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; if (cost_list[0] == INT_MAX) { cost_list[0] = bestsad; if (check_bounds(&x->mv_limits, br, bc, 1)) { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; cost_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); } } else { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) cost_list[i + 1] = INT_MAX; else cost_list[i + 1] = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &this_mv), in_what->stride); } } } else { if (use_mvcost) { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; if (cost_list[i + 1] != INT_MAX) { cost_list[i + 1] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); } } } } } best_mv->row = br; best_mv->col = bc; return bestsad; } int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV mv = { best_mv->row * 8, best_mv->col * 8 }; uint32_t unused; #if CONFIG_VP9_HIGHBITDEPTH uint64_t err = vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused); err += (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); if (err >= INT_MAX) return INT_MAX; return (int)err; #else return vfp->vf(what->buf, what->stride, get_buf_from_mv(in_what, best_mv), in_what->stride, &unused) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); #endif } int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV mv = { best_mv->row * 8, best_mv->col * 8 }; unsigned int unused; return vfp->svaf(get_buf_from_mv(in_what, best_mv), in_what->stride, 0, 0, what->buf, what->stride, &unused, second_pred) + (use_mvcost ? mv_err_cost(&mv, center_mv, x->nmvjointcost, x->mvcost, x->errorperbit) : 0); } static int hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { // First scale has 8-closest points, the rest have 6 points in hex shape // at increasing scales static const int hex_num_candidates[MAX_PATTERN_SCALES] = { 8, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6 }; // Note that the largest candidate step at each scale is 2^scale /* clang-format off */ static const MV hex_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, { -1, 0 } }, { { -1, -2 }, { 1, -2 }, { 2, 0 }, { 1, 2 }, { -1, 2 }, { -2, 0 } }, { { -2, -4 }, { 2, -4 }, { 4, 0 }, { 2, 4 }, { -2, 4 }, { -4, 0 } }, { { -4, -8 }, { 4, -8 }, { 8, 0 }, { 4, 8 }, { -4, 8 }, { -8, 0 } }, { { -8, -16 }, { 8, -16 }, { 16, 0 }, { 8, 16 }, { -8, 16 }, { -16, 0 } }, { { -16, -32 }, { 16, -32 }, { 32, 0 }, { 16, 32 }, { -16, 32 }, { -32, 0 } }, { { -32, -64 }, { 32, -64 }, { 64, 0 }, { 32, 64 }, { -32, 64 }, { -64, 0 } }, { { -64, -128 }, { 64, -128 }, { 128, 0 }, { 64, 128 }, { -64, 128 }, { -128, 0 } }, { { -128, -256 }, { 128, -256 }, { 256, 0 }, { 128, 256 }, { -128, 256 }, { -256, 0 } }, { { -256, -512 }, { 256, -512 }, { 512, 0 }, { 256, 512 }, { -256, 512 }, { -512, 0 } }, { { -512, -1024 }, { 512, -1024 }, { 1024, 0 }, { 512, 1024 }, { -512, 1024 }, { -1024, 0 } } }; /* clang-format on */ return vp9_pattern_search( x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, hex_num_candidates, hex_candidates); } static int bigdia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { // First scale has 4-closest points, the rest have 8 points in diamond // shape at increasing scales static const int bigdia_num_candidates[MAX_PATTERN_SCALES] = { 4, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }; // Note that the largest candidate step at each scale is 2^scale /* clang-format off */ static const MV bigdia_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }, { { -1, -1 }, { 0, -2 }, { 1, -1 }, { 2, 0 }, { 1, 1 }, { 0, 2 }, { -1, 1 }, { -2, 0 } }, { { -2, -2 }, { 0, -4 }, { 2, -2 }, { 4, 0 }, { 2, 2 }, { 0, 4 }, { -2, 2 }, { -4, 0 } }, { { -4, -4 }, { 0, -8 }, { 4, -4 }, { 8, 0 }, { 4, 4 }, { 0, 8 }, { -4, 4 }, { -8, 0 } }, { { -8, -8 }, { 0, -16 }, { 8, -8 }, { 16, 0 }, { 8, 8 }, { 0, 16 }, { -8, 8 }, { -16, 0 } }, { { -16, -16 }, { 0, -32 }, { 16, -16 }, { 32, 0 }, { 16, 16 }, { 0, 32 }, { -16, 16 }, { -32, 0 } }, { { -32, -32 }, { 0, -64 }, { 32, -32 }, { 64, 0 }, { 32, 32 }, { 0, 64 }, { -32, 32 }, { -64, 0 } }, { { -64, -64 }, { 0, -128 }, { 64, -64 }, { 128, 0 }, { 64, 64 }, { 0, 128 }, { -64, 64 }, { -128, 0 } }, { { -128, -128 }, { 0, -256 }, { 128, -128 }, { 256, 0 }, { 128, 128 }, { 0, 256 }, { -128, 128 }, { -256, 0 } }, { { -256, -256 }, { 0, -512 }, { 256, -256 }, { 512, 0 }, { 256, 256 }, { 0, 512 }, { -256, 256 }, { -512, 0 } }, { { -512, -512 }, { 0, -1024 }, { 512, -512 }, { 1024, 0 }, { 512, 512 }, { 0, 1024 }, { -512, 512 }, { -1024, 0 } } }; /* clang-format on */ return vp9_pattern_search_sad( x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, bigdia_num_candidates, bigdia_candidates); } static int square_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { // All scales have 8 closest points in square shape static const int square_num_candidates[MAX_PATTERN_SCALES] = { 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, }; // Note that the largest candidate step at each scale is 2^scale /* clang-format off */ static const MV square_candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES] = { { { -1, -1 }, { 0, -1 }, { 1, -1 }, { 1, 0 }, { 1, 1 }, { 0, 1 }, { -1, 1 }, { -1, 0 } }, { { -2, -2 }, { 0, -2 }, { 2, -2 }, { 2, 0 }, { 2, 2 }, { 0, 2 }, { -2, 2 }, { -2, 0 } }, { { -4, -4 }, { 0, -4 }, { 4, -4 }, { 4, 0 }, { 4, 4 }, { 0, 4 }, { -4, 4 }, { -4, 0 } }, { { -8, -8 }, { 0, -8 }, { 8, -8 }, { 8, 0 }, { 8, 8 }, { 0, 8 }, { -8, 8 }, { -8, 0 } }, { { -16, -16 }, { 0, -16 }, { 16, -16 }, { 16, 0 }, { 16, 16 }, { 0, 16 }, { -16, 16 }, { -16, 0 } }, { { -32, -32 }, { 0, -32 }, { 32, -32 }, { 32, 0 }, { 32, 32 }, { 0, 32 }, { -32, 32 }, { -32, 0 } }, { { -64, -64 }, { 0, -64 }, { 64, -64 }, { 64, 0 }, { 64, 64 }, { 0, 64 }, { -64, 64 }, { -64, 0 } }, { { -128, -128 }, { 0, -128 }, { 128, -128 }, { 128, 0 }, { 128, 128 }, { 0, 128 }, { -128, 128 }, { -128, 0 } }, { { -256, -256 }, { 0, -256 }, { 256, -256 }, { 256, 0 }, { 256, 256 }, { 0, 256 }, { -256, 256 }, { -256, 0 } }, { { -512, -512 }, { 0, -512 }, { 512, -512 }, { 512, 0 }, { 512, 512 }, { 0, 512 }, { -512, 512 }, { -512, 0 } }, { { -1024, -1024 }, { 0, -1024 }, { 1024, -1024 }, { 1024, 0 }, { 1024, 1024 }, { 0, 1024 }, { -1024, 1024 }, { -1024, 0 } } }; /* clang-format on */ return vp9_pattern_search( x, ref_mv, search_param, sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv, square_num_candidates, square_candidates); } static int fast_hex_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, // must be zero for fast_hex int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return hex_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); } static int fast_dia_search(const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv) { return bigdia_search(x, ref_mv, VPXMAX(MAX_MVSEARCH_STEPS - 2, search_param), sad_per_bit, do_init_search, cost_list, vfp, use_mvcost, center_mv, best_mv); } #undef CHECK_BETTER // Exhuastive motion search around a given centre position with a given // step size. static int exhaustive_mesh_search(const MACROBLOCK *x, MV *ref_mv, MV *best_mv, int range, int step, int sad_per_bit, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; MV fcenter_mv = { center_mv->row, center_mv->col }; unsigned int best_sad = INT_MAX; int r, c, i; int start_col, end_col, start_row, end_row; int col_step = (step > 1) ? step : 4; assert(step >= 1); clamp_mv(&fcenter_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); *best_mv = fcenter_mv; best_sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &fcenter_mv), in_what->stride) + mvsad_err_cost(x, &fcenter_mv, ref_mv, sad_per_bit); start_row = VPXMAX(-range, x->mv_limits.row_min - fcenter_mv.row); start_col = VPXMAX(-range, x->mv_limits.col_min - fcenter_mv.col); end_row = VPXMIN(range, x->mv_limits.row_max - fcenter_mv.row); end_col = VPXMIN(range, x->mv_limits.col_max - fcenter_mv.col); for (r = start_row; r <= end_row; r += step) { for (c = start_col; c <= end_col; c += col_step) { // Step > 1 means we are not checking every location in this pass. if (step > 1) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } else { // 4 sads in a single call if we are checking every location if (c + 3 <= end_col) { unsigned int sads[4]; const uint8_t *addrs[4]; for (i = 0; i < 4; ++i) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; addrs[i] = get_buf_from_mv(in_what, &mv); } fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads); for (i = 0; i < 4; ++i) { if (sads[i] < best_sad) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; const unsigned int sad = sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } } else { for (i = 0; i < end_col - c; ++i) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } } } } } return best_sad; } #define MIN_RANGE 7 #define MAX_RANGE 256 #define MIN_INTERVAL 1 #if CONFIG_NON_GREEDY_MV static int64_t exhaustive_mesh_search_multi_step( MV *best_mv, const MV *center_mv, int range, int step, const struct buf_2d *src, const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, const vp9_variance_fn_ptr_t *fn_ptr) { int64_t best_sad; int r, c; int start_col, end_col, start_row, end_row; *best_mv = *center_mv; best_sad = ((int64_t)fn_ptr->sdf(src->buf, src->stride, get_buf_from_mv(pre, center_mv), pre->stride) << LOG2_PRECISION) + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); for (r = start_row; r <= end_row; r += step) { for (c = start_col; c <= end_col; c += step) { const MV mv = { r, c }; int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, get_buf_from_mv(pre, &mv), pre->stride) << LOG2_PRECISION; if (sad < best_sad) { sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } } return best_sad; } static int64_t exhaustive_mesh_search_single_step( MV *best_mv, const MV *center_mv, int range, const struct buf_2d *src, const struct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, const vp9_variance_fn_ptr_t *fn_ptr) { int64_t best_sad; int r, c, i; int start_col, end_col, start_row, end_row; *best_mv = *center_mv; best_sad = ((int64_t)fn_ptr->sdf(src->buf, src->stride, get_buf_from_mv(pre, center_mv), pre->stride) << LOG2_PRECISION) + lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num); start_row = VPXMAX(center_mv->row - range, mv_limits->row_min); start_col = VPXMAX(center_mv->col - range, mv_limits->col_min); end_row = VPXMIN(center_mv->row + range, mv_limits->row_max); end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); for (r = start_row; r <= end_row; r += 1) { c = start_col; // sdx8f may not be available some block size if (fn_ptr->sdx8f) { while (c + 7 <= end_col) { unsigned int sads[8]; const MV mv = { r, c }; const uint8_t *buf = get_buf_from_mv(pre, &mv); fn_ptr->sdx8f(src->buf, src->stride, buf, pre->stride, sads); for (i = 0; i < 8; ++i) { int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; if (sad < best_sad) { const MV mv = { r, c + i }; sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } c += 8; } } while (c + 3 <= end_col) { unsigned int sads[4]; const uint8_t *addrs[4]; for (i = 0; i < 4; ++i) { const MV mv = { r, c + i }; addrs[i] = get_buf_from_mv(pre, &mv); } fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads); for (i = 0; i < 4; ++i) { int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; if (sad < best_sad) { const MV mv = { r, c + i }; sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } } c += 4; } while (c <= end_col) { const MV mv = { r, c }; int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride, get_buf_from_mv(pre, &mv), pre->stride) << LOG2_PRECISION; if (sad < best_sad) { sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) { best_sad = sad; *best_mv = mv; } } c += 1; } } return best_sad; } static int64_t exhaustive_mesh_search_new(const MACROBLOCK *x, MV *best_mv, int range, int step, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, int lambda, const int_mv *nb_full_mvs, int full_mv_num) { const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *src = &x->plane[0].src; const struct buf_2d *pre = &xd->plane[0].pre[0]; assert(step >= 1); assert(is_mv_in(&x->mv_limits, center_mv)); if (step == 1) { return exhaustive_mesh_search_single_step( best_mv, center_mv, range, src, pre, lambda, nb_full_mvs, full_mv_num, &x->mv_limits, fn_ptr); } return exhaustive_mesh_search_multi_step(best_mv, center_mv, range, step, src, pre, lambda, nb_full_mvs, full_mv_num, &x->mv_limits, fn_ptr); } static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x, MV *centre_mv_full, const vp9_variance_fn_ptr_t *fn_ptr, MV *dst_mv, int lambda, const int_mv *nb_full_mvs, int full_mv_num) { const SPEED_FEATURES *const sf = &cpi->sf; MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; int64_t bestsme; int i; int interval = sf->mesh_patterns[0].interval; int range = sf->mesh_patterns[0].range; int baseline_interval_divisor; // Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || (interval > range)) { printf("ERROR: invalid range\n"); assert(0); } baseline_interval_divisor = range / interval; // Check size of proposed first range against magnitude of the centre // value used as a starting point. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); range = VPXMIN(range, MAX_RANGE); interval = VPXMAX(interval, range / baseline_interval_divisor); // initial search bestsme = exhaustive_mesh_search_new(x, &temp_mv, range, interval, fn_ptr, &temp_mv, lambda, nb_full_mvs, full_mv_num); if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { // Progressive searches with range and step size decreasing each time // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range bestsme = exhaustive_mesh_search_new( x, &temp_mv, sf->mesh_patterns[i].range, sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs, full_mv_num); if (sf->mesh_patterns[i].interval == 1) break; } } *dst_mv = temp_mv; return bestsme; } static int64_t diamond_search_sad_new(const MACROBLOCK *x, const search_site_config *cfg, const MV *init_full_mv, MV *best_full_mv, int search_param, int lambda, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, int full_mv_num) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; const uint8_t *best_address; int64_t bestsad; int best_site = -1; int last_site = -1; // search_param determines the length of the initial step and hence the number // of iterations. // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; const int tot_steps = cfg->total_steps - search_param; vpx_clear_system_state(); *best_full_mv = *init_full_mv; clamp_mv(best_full_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); *num00 = 0; // Work out the start point for the search in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride + best_full_mv->col; best_address = in_what; // Check the starting position { const int64_t mv_dist = (int64_t)fn_ptr->sdf(what, what_stride, in_what, in_what_stride) << LOG2_PRECISION; const int64_t mv_cost = vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); bestsad = mv_dist + lambda * mv_cost; } i = 0; for (step = 0; step < tot_steps; step++) { int all_in = 1, t; // All_in is true if every one of the points we are checking are within // the bounds of the image. all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min); all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); // If all the pixels are within the bounds we don't check whether the // search point is valid in this loop, otherwise we check each point // for validity.. if (all_in) { unsigned int sad_array[4]; for (j = 0; j < cfg->searches_per_step; j += 4) { unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { const int64_t mv_dist = (int64_t)sad_array[t] << LOG2_PRECISION; if (mv_dist < bestsad) { const MV this_mv = { best_full_mv->row + ss_mv[i].row, best_full_mv->col + ss_mv[i].col }; const int64_t mv_cost = vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } } } else { for (j = 0; j < cfg->searches_per_step; j++) { // Trap illegal vectors const MV this_mv = { best_full_mv->row + ss_mv[i].row, best_full_mv->col + ss_mv[i].col }; if (is_mv_in(&x->mv_limits, &this_mv)) { const uint8_t *const check_here = ss_os[i] + best_address; const int64_t mv_dist = (int64_t)fn_ptr->sdf(what, what_stride, check_here, in_what_stride) << LOG2_PRECISION; if (mv_dist < bestsad) { const int64_t mv_cost = vp9_nb_mvs_inconsistency(&this_mv, nb_full_mvs, full_mv_num); const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } } if (best_site != last_site) { best_full_mv->row += ss_mv[best_site].row; best_full_mv->col += ss_mv[best_site].col; best_address += ss_os[best_site]; last_site = best_site; } else if (best_address == in_what) { (*num00)++; } } return bestsad; } int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row, int mi_col, int_mv *nb_full_mvs) { const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; const int dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; int nb_full_mv_num = 0; int i; assert(mi_row % mi_height == 0); assert(mi_col % mi_width == 0); for (i = 0; i < NB_MVS_NUM; ++i) { int r = dirs[i][0]; int c = dirs[i][1]; int brow = mi_row / mi_height + r; int bcol = mi_col / mi_width + c; if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 && bcol < motion_field->block_cols) { if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) { int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol); nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv); ++nb_full_mv_num; } } } return nb_full_mv_num; } #endif // CONFIG_NON_GREEDY_MV int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { int i, j, step; const MACROBLOCKD *const xd = &x->e_mbd; uint8_t *what = x->plane[0].src.buf; const int what_stride = x->plane[0].src.stride; const uint8_t *in_what; const int in_what_stride = xd->plane[0].pre[0].stride; const uint8_t *best_address; unsigned int bestsad = INT_MAX; int best_site = -1; int last_site = -1; int ref_row; int ref_col; // search_param determines the length of the initial step and hence the number // of iterations. // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... // const search_site *ss = &cfg->ss[search_param * cfg->searches_per_step]; const MV *ss_mv = &cfg->ss_mv[search_param * cfg->searches_per_step]; const intptr_t *ss_os = &cfg->ss_os[search_param * cfg->searches_per_step]; const int tot_steps = cfg->total_steps - search_param; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); ref_row = ref_mv->row; ref_col = ref_mv->col; *num00 = 0; best_mv->row = ref_row; best_mv->col = ref_col; // Work out the start point for the search in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; best_address = in_what; // Check the starting position bestsad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride) + mvsad_err_cost(x, best_mv, &fcenter_mv, sad_per_bit); i = 0; for (step = 0; step < tot_steps; step++) { int all_in = 1, t; // All_in is true if every one of the points we are checking are within // the bounds of the image. all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min); all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max); all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min); all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max); // If all the pixels are within the bounds we don't check whether the // search point is valid in this loop, otherwise we check each point // for validity.. if (all_in) { unsigned int sad_array[4]; for (j = 0; j < cfg->searches_per_step; j += 4) { unsigned char const *block_offset[4]; for (t = 0; t < 4; t++) block_offset[t] = ss_os[i + t] + best_address; fn_ptr->sdx4df(what, what_stride, block_offset, in_what_stride, sad_array); for (t = 0; t < 4; t++, i++) { if (sad_array[t] < bestsad) { const MV this_mv = { best_mv->row + ss_mv[i].row, best_mv->col + ss_mv[i].col }; sad_array[t] += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (sad_array[t] < bestsad) { bestsad = sad_array[t]; best_site = i; } } } } } else { for (j = 0; j < cfg->searches_per_step; j++) { // Trap illegal vectors const MV this_mv = { best_mv->row + ss_mv[i].row, best_mv->col + ss_mv[i].col }; if (is_mv_in(&x->mv_limits, &this_mv)) { const uint8_t *const check_here = ss_os[i] + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_site = i; } } } i++; } } if (best_site != last_site) { best_mv->row += ss_mv[best_site].row; best_mv->col += ss_mv[best_site].col; best_address += ss_os[best_site]; last_site = best_site; #if defined(NEW_DIAMOND_SEARCH) while (1) { const MV this_mv = { best_mv->row + ss_mv[best_site].row, best_mv->col + ss_mv[best_site].col }; if (is_mv_in(&x->mv_limits, &this_mv)) { const uint8_t *const check_here = ss_os[best_site] + best_address; unsigned int thissad = fn_ptr->sdf(what, what_stride, check_here, in_what_stride); if (thissad < bestsad) { thissad += mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit); if (thissad < bestsad) { bestsad = thissad; best_mv->row += ss_mv[best_site].row; best_mv->col += ss_mv[best_site].col; best_address += ss_os[best_site]; continue; } } } break; } #endif } else if (best_address == in_what) { (*num00)++; } } return bestsad; } static int vector_match(int16_t *ref, int16_t *src, int bwl) { int best_sad = INT_MAX; int this_sad; int d; int center, offset = 0; int bw = 4 << bwl; // redundant variable, to be changed in the experiments. for (d = 0; d <= bw; d += 16) { this_sad = vpx_vector_var(&ref[d], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; offset = d; } } center = offset; for (d = -8; d <= 8; d += 16) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue; this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; } } offset = center; for (d = -4; d <= 4; d += 8) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue; this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; } } offset = center; for (d = -2; d <= 2; d += 4) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue; this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; } } offset = center; for (d = -1; d <= 1; d += 2) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue; this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) { best_sad = this_sad; center = this_pos; } } return (center - (bw >> 1)); } static const MV search_pos[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, }; unsigned int vp9_int_pro_motion_estimation(const VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *ref_mv) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; DECLARE_ALIGNED(16, int16_t, hbuf[128]); DECLARE_ALIGNED(16, int16_t, vbuf[128]); DECLARE_ALIGNED(16, int16_t, src_hbuf[64]); DECLARE_ALIGNED(16, int16_t, src_vbuf[64]); int idx; const int bw = 4 << b_width_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize]; const int search_width = bw << 1; const int search_height = bh << 1; const int src_stride = x->plane[0].src.stride; const int ref_stride = xd->plane[0].pre[0].stride; uint8_t const *ref_buf, *src_buf; MV *tmp_mv = &xd->mi[0]->mv[0].as_mv; unsigned int best_sad, tmp_sad, this_sad[4]; MV this_mv; const int norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]); MvLimits subpel_mv_limits; if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } #if CONFIG_VP9_HIGHBITDEPTH // TODO(jingning): Implement integral projection functions for high bit-depth // setting and remove this part of code. if (xd->bd != 8) { unsigned int this_sad; tmp_mv->row = 0; tmp_mv->col = 0; this_sad = cpi->fn_ptr[bsize].sdf(x->plane[0].src.buf, src_stride, xd->plane[0].pre[0].buf, ref_stride); if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } return this_sad; } #endif // Set up prediction 1-D reference set ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); for (idx = 0; idx < search_width; idx += 16) { vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh); ref_buf += 16; } ref_buf = xd->plane[0].pre[0].buf - (bh >> 1) * ref_stride; for (idx = 0; idx < search_height; ++idx) { vbuf[idx] = vpx_int_pro_col(ref_buf, bw) >> norm_factor; ref_buf += ref_stride; } // Set up src 1-D reference set for (idx = 0; idx < bw; idx += 16) { src_buf = x->plane[0].src.buf + idx; vpx_int_pro_row(&src_hbuf[idx], src_buf, src_stride, bh); } src_buf = x->plane[0].src.buf; for (idx = 0; idx < bh; ++idx) { src_vbuf[idx] = vpx_int_pro_col(src_buf, bw) >> norm_factor; src_buf += src_stride; } // Find the best match per 1-D search tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]); tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]); this_mv = *tmp_mv; src_buf = x->plane[0].src.buf; ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; best_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); { const uint8_t *const pos[4] = { ref_buf - ref_stride, ref_buf - 1, ref_buf + 1, ref_buf + ref_stride, }; cpi->fn_ptr[bsize].sdx4df(src_buf, src_stride, pos, ref_stride, this_sad); } for (idx = 0; idx < 4; ++idx) { if (this_sad[idx] < best_sad) { best_sad = this_sad[idx]; tmp_mv->row = search_pos[idx].row + this_mv.row; tmp_mv->col = search_pos[idx].col + this_mv.col; } } if (this_sad[0] < this_sad[3]) this_mv.row -= 1; else this_mv.row += 1; if (this_sad[1] < this_sad[2]) this_mv.col -= 1; else this_mv.col += 1; ref_buf = xd->plane[0].pre[0].buf + this_mv.row * ref_stride + this_mv.col; tmp_sad = cpi->fn_ptr[bsize].sdf(src_buf, src_stride, ref_buf, ref_stride); if (best_sad > tmp_sad) { *tmp_mv = this_mv; best_sad = tmp_sad; } tmp_mv->row *= 8; tmp_mv->col *= 8; vp9_set_subpel_mv_search_range(&subpel_mv_limits, &x->mv_limits, ref_mv); clamp_mv(tmp_mv, subpel_mv_limits.col_min, subpel_mv_limits.col_max, subpel_mv_limits.row_min, subpel_mv_limits.row_max); if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } return best_sad; } static int get_exhaustive_threshold(int exhaustive_searches_thresh, BLOCK_SIZE bsize) { return exhaustive_searches_thresh >> (8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize])); } #if CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int lambda, int do_refine, const int_mv *nb_full_mvs, int full_mv_num, MV *best_mv) { const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; const SPEED_FEATURES *const sf = &cpi->sf; int n, num00 = 0; int thissme; int bestsme; const int further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; const MV center_mv = { 0, 0 }; vpx_clear_system_state(); diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda, &n, fn_ptr, nb_full_mvs, full_mv_num); bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); // If there won't be more n-step search, check to see if refining search is // needed. if (n > further_steps) do_refine = 0; while (n < further_steps) { ++n; if (num00) { num00--; } else { MV temp_mv; diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param + n, lambda, &num00, fn_ptr, nb_full_mvs, full_mv_num); thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; if (thissme < bestsme) { bestsme = thissme; *best_mv = temp_mv; } } } // final 1-away diamond refining search if (do_refine) { const int search_range = 8; MV temp_mv = *best_mv; vp9_refining_search_sad_new(x, &temp_mv, lambda, search_range, fn_ptr, nb_full_mvs, full_mv_num); thissme = vp9_get_mvpred_var(x, &temp_mv, ¢er_mv, fn_ptr, 0); if (thissme < bestsme) { bestsme = thissme; *best_mv = temp_mv; } } if (sf->exhaustive_searches_thresh < INT_MAX && !cpi->rc.is_src_frame_alt_ref) { const int64_t exhaustive_thr = get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); if (bestsme > exhaustive_thr) { full_pixel_exhaustive_new(cpi, x, best_mv, fn_ptr, best_mv, lambda, nb_full_mvs, full_mv_num); bestsme = vp9_get_mvpred_var(x, best_mv, ¢er_mv, fn_ptr, 0); } } return bestsme; } #endif // CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond refining search */ static int full_pixel_diamond(const VP9_COMP *const cpi, const MACROBLOCK *const x, MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { MV temp_mv; int thissme, n, num00 = 0; int bestsme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param, sadpb, &n, fn_ptr, ref_mv); if (bestsme < INT_MAX) bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); *dst_mv = temp_mv; // If there won't be more n-step search, check to see if refining search is // needed. if (n > further_steps) do_refine = 0; while (n < further_steps) { ++n; if (num00) { num00--; } else { thissme = cpi->diamond_search_sad(x, &cpi->ss_cfg, mvp_full, &temp_mv, step_param + n, sadpb, &num00, fn_ptr, ref_mv); if (thissme < INT_MAX) thissme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); // check to see if refining search is needed. if (num00 > further_steps - n) do_refine = 0; if (thissme < bestsme) { bestsme = thissme; *dst_mv = temp_mv; } } } // final 1-away diamond refining search if (do_refine) { const int search_range = 8; MV best_mv = *dst_mv; thissme = vp9_refining_search_sad(x, &best_mv, sadpb, search_range, fn_ptr, ref_mv); if (thissme < INT_MAX) thissme = vp9_get_mvpred_var(x, &best_mv, ref_mv, fn_ptr, 1); if (thissme < bestsme) { bestsme = thissme; *dst_mv = best_mv; } } // Return cost list. if (cost_list) { calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); } return bestsme; } // Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. static int full_pixel_exhaustive(const VP9_COMP *const cpi, const MACROBLOCK *const x, MV *centre_mv_full, int sadpb, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { const SPEED_FEATURES *const sf = &cpi->sf; MV temp_mv = { centre_mv_full->row, centre_mv_full->col }; MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; int bestsme; int i; int interval = sf->mesh_patterns[0].interval; int range = sf->mesh_patterns[0].range; int baseline_interval_divisor; // Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) || (interval > range)) return INT_MAX; baseline_interval_divisor = range / interval; // Check size of proposed first range against magnitude of the centre // value used as a starting point. range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4); range = VPXMIN(range, MAX_RANGE); interval = VPXMAX(interval, range / baseline_interval_divisor); // initial search bestsme = exhaustive_mesh_search(x, &f_ref_mv, &temp_mv, range, interval, sadpb, fn_ptr, &temp_mv); if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { // Progressive searches with range and step size decreasing each time // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range bestsme = exhaustive_mesh_search( x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range, sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv); if (sf->mesh_patterns[i].interval == 1) break; } } if (bestsme < INT_MAX) bestsme = vp9_get_mvpred_var(x, &temp_mv, ref_mv, fn_ptr, 1); *dst_mv = temp_mv; // Return cost list. if (cost_list) { calc_int_cost_list(x, ref_mv, sadpb, fn_ptr, dst_mv, cost_list); } return bestsme; } #if CONFIG_NON_GREEDY_MV int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, int lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, int full_mv_num) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const uint8_t *best_address = get_buf_from_mv(in_what, best_full_mv); int64_t best_sad; int i, j; vpx_clear_system_state(); { const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) << LOG2_PRECISION; const int64_t mv_cost = vp9_nb_mvs_inconsistency(best_full_mv, nb_full_mvs, full_mv_num); best_sad = mv_dist + lambda * mv_cost; } for (i = 0; i < search_range; i++) { int best_site = -1; const int all_in = ((best_full_mv->row - 1) > x->mv_limits.row_min) & ((best_full_mv->row + 1) < x->mv_limits.row_max) & ((best_full_mv->col - 1) > x->mv_limits.col_min) & ((best_full_mv->col + 1) < x->mv_limits.col_max); if (all_in) { unsigned int sads[4]; const uint8_t *const positions[4] = { best_address - in_what->stride, best_address - 1, best_address + 1, best_address + in_what->stride }; fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); for (j = 0; j < 4; ++j) { const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; const int64_t mv_dist = (int64_t)sads[j] << LOG2_PRECISION; const int64_t mv_cost = vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; best_site = j; } } } else { for (j = 0; j < 4; ++j) { const MV mv = { best_full_mv->row + neighbors[j].row, best_full_mv->col + neighbors[j].col }; if (is_mv_in(&x->mv_limits, &mv)) { const int64_t mv_dist = (int64_t)fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride) << LOG2_PRECISION; const int64_t mv_cost = vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); const int64_t thissad = mv_dist + lambda * mv_cost; if (thissad < best_sad) { best_sad = thissad; best_site = j; } } } } if (best_site == -1) { break; } else { best_full_mv->row += neighbors[best_site].row; best_full_mv->col += neighbors[best_site].col; best_address = get_buf_from_mv(in_what, best_full_mv); } } return best_sad; } #endif // CONFIG_NON_GREEDY_MV int vp9_refining_search_sad(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const MACROBLOCKD *const xd = &x->e_mbd; const MV neighbors[4] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 } }; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; const uint8_t *best_address = get_buf_from_mv(in_what, ref_mv); unsigned int best_sad = fn_ptr->sdf(what->buf, what->stride, best_address, in_what->stride) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); int i, j; for (i = 0; i < search_range; i++) { int best_site = -1; const int all_in = ((ref_mv->row - 1) > x->mv_limits.row_min) & ((ref_mv->row + 1) < x->mv_limits.row_max) & ((ref_mv->col - 1) > x->mv_limits.col_min) & ((ref_mv->col + 1) < x->mv_limits.col_max); if (all_in) { unsigned int sads[4]; const uint8_t *const positions[4] = { best_address - in_what->stride, best_address - 1, best_address + 1, best_address + in_what->stride }; fn_ptr->sdx4df(what->buf, what->stride, positions, in_what->stride, sads); for (j = 0; j < 4; ++j) { if (sads[j] < best_sad) { const MV mv = { ref_mv->row + neighbors[j].row, ref_mv->col + neighbors[j].col }; sads[j] += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sads[j] < best_sad) { best_sad = sads[j]; best_site = j; } } } } else { for (j = 0; j < 4; ++j) { const MV mv = { ref_mv->row + neighbors[j].row, ref_mv->col + neighbors[j].col }; if (is_mv_in(&x->mv_limits, &mv)) { unsigned int sad = fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; } } } } } if (best_site == -1) { break; } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; best_address = get_buf_from_mv(in_what, ref_mv); } } return best_sad; } // This function is called when we do joint motion search in comp_inter_inter // mode. int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, const uint8_t *second_pred) { const MV neighbors[8] = { { -1, 0 }, { 0, -1 }, { 0, 1 }, { 1, 0 }, { -1, -1 }, { 1, -1 }, { -1, 1 }, { 1, 1 } }; const MACROBLOCKD *const xd = &x->e_mbd; const struct buf_2d *const what = &x->plane[0].src; const struct buf_2d *const in_what = &xd->plane[0].pre[0]; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; unsigned int best_sad = INT_MAX; int i, j; clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max, x->mv_limits.row_min, x->mv_limits.row_max); best_sad = fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv), in_what->stride, second_pred) + mvsad_err_cost(x, ref_mv, &fcenter_mv, error_per_bit); for (i = 0; i < search_range; ++i) { int best_site = -1; for (j = 0; j < 8; ++j) { const MV mv = { ref_mv->row + neighbors[j].row, ref_mv->col + neighbors[j].col }; if (is_mv_in(&x->mv_limits, &mv)) { unsigned int sad = fn_ptr->sdaf(what->buf, what->stride, get_buf_from_mv(in_what, &mv), in_what->stride, second_pred); if (sad < best_sad) { sad += mvsad_err_cost(x, &mv, &fcenter_mv, error_per_bit); if (sad < best_sad) { best_sad = sad; best_site = j; } } } } if (best_site == -1) { break; } else { ref_mv->row += neighbors[best_site].row; ref_mv->col += neighbors[best_site].col; } } return best_sad; } int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = (SEARCH_METHODS)search_method; const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; int run_exhaustive_search = 0; if (cost_list) { cost_list[0] = INT_MAX; cost_list[1] = INT_MAX; cost_list[2] = INT_MAX; cost_list[3] = INT_MAX; cost_list[4] = INT_MAX; } switch (method) { case FAST_DIAMOND: var = fast_dia_search(x, mvp_full, step_param, error_per_bit, 0, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case FAST_HEX: var = fast_hex_search(x, mvp_full, step_param, error_per_bit, 0, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case HEX: var = hex_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case SQUARE: var = square_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case BIGDIA: var = bigdia_search(x, mvp_full, step_param, error_per_bit, 1, cost_list, fn_ptr, 1, ref_mv, tmp_mv); break; case NSTEP: case MESH: var = full_pixel_diamond(cpi, x, mvp_full, step_param, error_per_bit, MAX_MVSEARCH_STEPS - 1 - step_param, 1, cost_list, fn_ptr, ref_mv, tmp_mv); break; default: assert(0 && "Unknown search method"); } if (method == NSTEP) { if (sf->exhaustive_searches_thresh < INT_MAX && !cpi->rc.is_src_frame_alt_ref) { const int64_t exhaustive_thr = get_exhaustive_threshold(sf->exhaustive_searches_thresh, bsize); if (var > exhaustive_thr) { run_exhaustive_search = 1; } } } else if (method == MESH) { run_exhaustive_search = 1; } if (run_exhaustive_search) { int var_ex; MV tmp_mv_ex; var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list, fn_ptr, ref_mv, &tmp_mv_ex); if (var_ex < var) { var = var_ex; *tmp_mv = tmp_mv_ex; } } if (method != NSTEP && method != MESH && rd && var < var_max) var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1); return var; } // Note(yunqingwang): The following 2 functions are only used in the motion // vector unit test, which return extreme motion vectors allowed by the MV // limits. #define COMMON_MV_TEST \ SETUP_SUBPEL_SEARCH; \ \ (void)error_per_bit; \ (void)vfp; \ (void)z; \ (void)src_stride; \ (void)y; \ (void)y_stride; \ (void)second_pred; \ (void)w; \ (void)h; \ (void)offset; \ (void)mvjcost; \ (void)mvcost; \ (void)sse1; \ (void)distortion; \ \ (void)halfiters; \ (void)quarteriters; \ (void)eighthiters; \ (void)whichdir; \ (void)allow_hp; \ (void)forced_stop; \ (void)hstep; \ (void)rr; \ (void)rc; \ \ (void)tr; \ (void)tc; \ (void)sse; \ (void)thismse; \ (void)cost_list; \ (void)use_accurate_subpel_search; // Return the maximum MV. uint32_t vp9_return_max_sub_pixel_mv( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)minr; (void)minc; bestmv->row = maxr; bestmv->col = maxc; besterr = 0; // In the sub-pel motion search, if hp is not used, then the last bit of mv // has to be 0. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); return besterr; } // Return the minimum MV. uint32_t vp9_return_min_sub_pixel_mv( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) { COMMON_MV_TEST; (void)maxr; (void)maxc; bestmv->row = minr; bestmv->col = minc; besterr = 0; // In the sub-pel motion search, if hp is not used, then the last bit of mv // has to be 0. lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv)); return besterr; } libvpx-1.8.2/vp9/encoder/vp9_mcomp.h000066400000000000000000000165261357355204000172630ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_MCOMP_H_ #define VPX_VP9_ENCODER_VP9_MCOMP_H_ #include "vp9/encoder/vp9_block.h" #if CONFIG_NON_GREEDY_MV #include "vp9/encoder/vp9_non_greedy_mv.h" #endif // CONFIG_NON_GREEDY_MV #include "vpx_dsp/variance.h" #ifdef __cplusplus extern "C" { #endif // The maximum number of steps in a step search given the largest // allowed initial step #define MAX_MVSEARCH_STEPS 11 // Max full pel mv specified in the unit of full pixel // Enable the use of motion vector in range [-1023, 1023]. #define MAX_FULL_PEL_VAL ((1 << (MAX_MVSEARCH_STEPS - 1)) - 1) // Maximum size of the first step in full pel units #define MAX_FIRST_STEP (1 << (MAX_MVSEARCH_STEPS - 1)) // Allowed motion vector pixel distance outside image border // for Block_16x16 #define BORDER_MV_PIXELS_B16 (16 + VP9_INTERP_EXTEND) typedef struct search_site_config { // motion search sites MV ss_mv[8 * MAX_MVSEARCH_STEPS]; // Motion vector intptr_t ss_os[8 * MAX_MVSEARCH_STEPS]; // Offset int searches_per_step; int total_steps; } search_site_config; static INLINE const uint8_t *get_buf_from_mv(const struct buf_2d *buf, const MV *mv) { return &buf->buf[mv->row * buf->stride + mv->col]; } void vp9_init_dsmotion_compensation(search_site_config *cfg, int stride); void vp9_init3smotion_compensation(search_site_config *cfg, int stride); void vp9_set_mv_search_range(MvLimits *mv_limits, const MV *mv); int vp9_mv_bit_cost(const MV *mv, const MV *ref, const int *mvjcost, int *mvcost[2], int weight); // Utility to compute variance + MV rate cost for a given MV int vp9_get_mvpred_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); int vp9_get_mvpred_av_var(const MACROBLOCK *x, const MV *best_mv, const MV *center_mv, const uint8_t *second_pred, const vp9_variance_fn_ptr_t *vfp, int use_mvcost); struct VP9_COMP; struct SPEED_FEATURES; int vp9_init_search_range(int size); int vp9_refining_search_sad(const struct macroblock *x, struct mv *ref_mv, int error_per_bit, int search_range, const struct vp9_variance_vtable *fn_ptr, const struct mv *center_mv); // Perform integral projection based motion estimation. unsigned int vp9_int_pro_motion_estimation(const struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *ref_mv); typedef uint32_t(fractional_mv_step_fp)( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, // 0 - full, 1 - qtr only, 2 - half only int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2], uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search); extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_more; extern fractional_mv_step_fp vp9_find_best_sub_pixel_tree_pruned_evenmore; extern fractional_mv_step_fp vp9_skip_sub_pixel_tree; extern fractional_mv_step_fp vp9_return_max_sub_pixel_mv; extern fractional_mv_step_fp vp9_return_min_sub_pixel_mv; typedef int (*vp9_full_search_fn_t)(const MACROBLOCK *x, const MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, MV *best_mv); typedef int (*vp9_refining_search_fn_t)(const MACROBLOCK *x, MV *ref_mv, int sad_per_bit, int distance, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv); typedef int (*vp9_diamond_search_fn_t)( const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv); int vp9_refining_search_8p_c(const MACROBLOCK *x, MV *ref_mv, int error_per_bit, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv, const uint8_t *second_pred); struct VP9_COMP; // "mvp_full" is the MV search starting point; // "ref_mv" is the context reference MV; // "tmp_mv" is the searched best MV. int vp9_full_pixel_search(const struct VP9_COMP *const cpi, const MACROBLOCK *const x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd); void vp9_set_subpel_mv_search_range(MvLimits *subpel_mv_limits, const MvLimits *umv_window_limits, const MV *ref_mv); #if CONFIG_NON_GREEDY_MV struct TplDepStats; int64_t vp9_refining_search_sad_new(const MACROBLOCK *x, MV *best_full_mv, int lambda, int search_range, const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, int full_mv_num); int vp9_full_pixel_diamond_new(const struct VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, MV *mvp_full, int step_param, int lambda, int do_refine, const int_mv *nb_full_mvs, int full_mv_num, MV *best_mv); static INLINE MV get_full_mv(const MV *mv) { MV out_mv; out_mv.row = mv->row >> 3; out_mv.col = mv->col >> 3; return out_mv; } struct TplDepFrame; int vp9_prepare_nb_full_mvs(const struct MotionField *motion_field, int mi_row, int mi_col, int_mv *nb_full_mvs); static INLINE BLOCK_SIZE get_square_block_size(BLOCK_SIZE bsize) { BLOCK_SIZE square_bsize; switch (bsize) { case BLOCK_4X4: case BLOCK_4X8: case BLOCK_8X4: square_bsize = BLOCK_4X4; break; case BLOCK_8X8: case BLOCK_8X16: case BLOCK_16X8: square_bsize = BLOCK_8X8; break; case BLOCK_16X16: case BLOCK_16X32: case BLOCK_32X16: square_bsize = BLOCK_16X16; break; case BLOCK_32X32: case BLOCK_32X64: case BLOCK_64X32: case BLOCK_64X64: square_bsize = BLOCK_32X32; break; default: square_bsize = BLOCK_INVALID; assert(0 && "ERROR: invalid block size"); break; } return square_bsize; } #endif // CONFIG_NON_GREEDY_MV #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_MCOMP_H_ libvpx-1.8.2/vp9/encoder/vp9_multi_thread.c000066400000000000000000000260011357355204000206110ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_multi_thread.h" #include "vp9/encoder/vp9_temporal_filter.h" void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt, int tile_id) { RowMTInfo *row_mt_info; JobQueueHandle *job_queue_hdl = NULL; void *next = NULL; JobNode *job_info = NULL; #if CONFIG_MULTITHREAD pthread_mutex_t *mutex_handle = NULL; #endif row_mt_info = (RowMTInfo *)(&multi_thread_ctxt->row_mt_info[tile_id]); job_queue_hdl = (JobQueueHandle *)&row_mt_info->job_queue_hdl; #if CONFIG_MULTITHREAD mutex_handle = &row_mt_info->job_mutex; #endif // lock the mutex for queue access #if CONFIG_MULTITHREAD pthread_mutex_lock(mutex_handle); #endif next = job_queue_hdl->next; if (NULL != next) { JobQueue *job_queue = (JobQueue *)next; job_info = &job_queue->job_info; // Update the next job in the queue job_queue_hdl->next = job_queue->next; job_queue_hdl->num_jobs_acquired++; } #if CONFIG_MULTITHREAD pthread_mutex_unlock(mutex_handle); #endif return job_info; } void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi, TileDataEnc *const this_tile) { VP9_COMMON *const cm = &cpi->common; const int sb_rows = (mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2) + 1; int i; this_tile->row_base_thresh_freq_fact = (int *)vpx_calloc(sb_rows * BLOCK_SIZES * MAX_MODES, sizeof(*(this_tile->row_base_thresh_freq_fact))); for (i = 0; i < sb_rows * BLOCK_SIZES * MAX_MODES; i++) this_tile->row_base_thresh_freq_fact[i] = RD_THRESH_INIT_FACT; } void vp9_row_mt_mem_alloc(VP9_COMP *cpi) { struct VP9Common *cm = &cpi->common; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int tile_row, tile_col; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; int jobs_per_tile_col, total_jobs; // Allocate memory that is large enough for all row_mt stages. First pass // uses 16x16 block size. jobs_per_tile_col = VPXMAX(cm->mb_rows, sb_rows); // Calculate the total number of jobs total_jobs = jobs_per_tile_col * tile_cols; multi_thread_ctxt->allocated_tile_cols = tile_cols; multi_thread_ctxt->allocated_tile_rows = tile_rows; multi_thread_ctxt->allocated_vert_unit_rows = jobs_per_tile_col; multi_thread_ctxt->job_queue = (JobQueue *)vpx_memalign(32, total_jobs * sizeof(JobQueue)); #if CONFIG_MULTITHREAD // Create mutex for each tile for (tile_col = 0; tile_col < tile_cols; tile_col++) { RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col]; pthread_mutex_init(&row_mt_info->job_mutex, NULL); } #endif // Allocate memory for row based multi-threading for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_col]; vp9_row_mt_sync_mem_alloc(&this_tile->row_mt_sync, cm, jobs_per_tile_col); if (cpi->sf.adaptive_rd_thresh_row_mt) { if (this_tile->row_base_thresh_freq_fact != NULL) { vpx_free(this_tile->row_base_thresh_freq_fact); this_tile->row_base_thresh_freq_fact = NULL; } vp9_row_mt_alloc_rd_thresh(cpi, this_tile); } } // Assign the sync pointer of tile row zero for every tile row > 0 for (tile_row = 1; tile_row < tile_rows; tile_row++) { for (tile_col = 0; tile_col < tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols + tile_col]; TileDataEnc *this_col_tile = &cpi->tile_data[tile_col]; this_tile->row_mt_sync = this_col_tile->row_mt_sync; } } // Calculate the number of vertical units in the given tile row for (tile_row = 0; tile_row < tile_rows; tile_row++) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * tile_cols]; TileInfo *tile_info = &this_tile->tile_info; multi_thread_ctxt->num_tile_vert_sbs[tile_row] = get_num_vert_units(*tile_info, MI_BLOCK_SIZE_LOG2); } } void vp9_row_mt_mem_dealloc(VP9_COMP *cpi) { MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; int tile_col; #if CONFIG_MULTITHREAD int tile_row; #endif // Deallocate memory for job queue if (multi_thread_ctxt->job_queue) vpx_free(multi_thread_ctxt->job_queue); #if CONFIG_MULTITHREAD // Destroy mutex for each tile for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; tile_col++) { RowMTInfo *row_mt_info = &multi_thread_ctxt->row_mt_info[tile_col]; if (row_mt_info) pthread_mutex_destroy(&row_mt_info->job_mutex); } #endif // Free row based multi-threading sync memory for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_col]; vp9_row_mt_sync_mem_dealloc(&this_tile->row_mt_sync); } #if CONFIG_MULTITHREAD for (tile_row = 0; tile_row < multi_thread_ctxt->allocated_tile_rows; tile_row++) { for (tile_col = 0; tile_col < multi_thread_ctxt->allocated_tile_cols; tile_col++) { TileDataEnc *this_tile = &cpi->tile_data[tile_row * multi_thread_ctxt->allocated_tile_cols + tile_col]; if (this_tile->row_base_thresh_freq_fact != NULL) { vpx_free(this_tile->row_base_thresh_freq_fact); this_tile->row_base_thresh_freq_fact = NULL; } } } #endif } void vp9_multi_thread_tile_init(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; int i; for (i = 0; i < tile_cols; i++) { TileDataEnc *this_tile = &cpi->tile_data[i]; int jobs_per_tile_col = cpi->oxcf.pass == 1 ? cm->mb_rows : sb_rows; // Initialize cur_col to -1 for all rows. memset(this_tile->row_mt_sync.cur_col, -1, sizeof(*this_tile->row_mt_sync.cur_col) * jobs_per_tile_col); vp9_zero(this_tile->fp_data); this_tile->fp_data.image_data_start_row = INVALID_ROW; } } void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt, int tile_cols, int num_workers) { int tile_id = 0; int i; // Allocating the threads for the tiles for (i = 0; i < num_workers; i++) { multi_thread_ctxt->thread_id_to_tile_id[i] = tile_id++; if (tile_id == tile_cols) tile_id = 0; } } int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt, int cur_tile_id) { RowMTInfo *row_mt_info; JobQueueHandle *job_queue_hndl; #if CONFIG_MULTITHREAD pthread_mutex_t *mutex; #endif int num_jobs_remaining; row_mt_info = &multi_thread_ctxt->row_mt_info[cur_tile_id]; job_queue_hndl = &row_mt_info->job_queue_hdl; #if CONFIG_MULTITHREAD mutex = &row_mt_info->job_mutex; #endif #if CONFIG_MULTITHREAD pthread_mutex_lock(mutex); #endif num_jobs_remaining = multi_thread_ctxt->jobs_per_tile_col - job_queue_hndl->num_jobs_acquired; #if CONFIG_MULTITHREAD pthread_mutex_unlock(mutex); #endif return (num_jobs_remaining); } void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type) { VP9_COMMON *const cm = &cpi->common; MultiThreadHandle *multi_thread_ctxt = &cpi->multi_thread_ctxt; JobQueue *job_queue = multi_thread_ctxt->job_queue; const int tile_cols = 1 << cm->log2_tile_cols; int job_row_num, jobs_per_tile, jobs_per_tile_col = 0, total_jobs; const int sb_rows = mi_cols_aligned_to_sb(cm->mi_rows) >> MI_BLOCK_SIZE_LOG2; int tile_col, i; switch (job_type) { case ENCODE_JOB: jobs_per_tile_col = sb_rows; break; case FIRST_PASS_JOB: jobs_per_tile_col = cm->mb_rows; break; case ARNR_JOB: jobs_per_tile_col = ((cm->mi_rows + TF_ROUND) >> TF_SHIFT); break; default: assert(0); } total_jobs = jobs_per_tile_col * tile_cols; multi_thread_ctxt->jobs_per_tile_col = jobs_per_tile_col; // memset the entire job queue buffer to zero memset(job_queue, 0, total_jobs * sizeof(JobQueue)); // Job queue preparation for (tile_col = 0; tile_col < tile_cols; tile_col++) { RowMTInfo *tile_ctxt = &multi_thread_ctxt->row_mt_info[tile_col]; JobQueue *job_queue_curr, *job_queue_temp; int tile_row = 0; tile_ctxt->job_queue_hdl.next = (void *)job_queue; tile_ctxt->job_queue_hdl.num_jobs_acquired = 0; job_queue_curr = job_queue; job_queue_temp = job_queue; // loop over all the vertical rows for (job_row_num = 0, jobs_per_tile = 0; job_row_num < jobs_per_tile_col; job_row_num++, jobs_per_tile++) { job_queue_curr->job_info.vert_unit_row_num = job_row_num; job_queue_curr->job_info.tile_col_id = tile_col; job_queue_curr->job_info.tile_row_id = tile_row; job_queue_curr->next = (void *)(job_queue_temp + 1); job_queue_curr = ++job_queue_temp; if (ENCODE_JOB == job_type) { if (jobs_per_tile >= multi_thread_ctxt->num_tile_vert_sbs[tile_row] - 1) { tile_row++; jobs_per_tile = -1; } } } // Set the last pointer to NULL job_queue_curr += -1; job_queue_curr->next = (void *)NULL; // Move to the next tile job_queue += jobs_per_tile_col; } for (i = 0; i < cpi->num_workers; i++) { EncWorkerData *thread_data; thread_data = &cpi->tile_thr_data[i]; thread_data->thread_id = i; for (tile_col = 0; tile_col < tile_cols; tile_col++) thread_data->tile_completion_status[tile_col] = 0; } } int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt, int *tile_completion_status, int *cur_tile_id, int tile_cols) { int tile_col; int tile_id = -1; // Stores the tile ID with minimum proc done int max_num_jobs_remaining = 0; int num_jobs_remaining; // Mark the completion to avoid check in the loop tile_completion_status[*cur_tile_id] = 1; // Check for the status of all the tiles for (tile_col = 0; tile_col < tile_cols; tile_col++) { if (tile_completion_status[tile_col] == 0) { num_jobs_remaining = vp9_get_job_queue_status(multi_thread_ctxt, tile_col); // Mark the completion to avoid checks during future switches across tiles if (num_jobs_remaining == 0) tile_completion_status[tile_col] = 1; if (num_jobs_remaining > max_num_jobs_remaining) { max_num_jobs_remaining = num_jobs_remaining; tile_id = tile_col; } } } if (-1 == tile_id) { return 1; } else { // Update the cur ID to the next tile ID that will be processed, // which will be the least processed tile *cur_tile_id = tile_id; return 0; } } libvpx-1.8.2/vp9/encoder/vp9_multi_thread.h000066400000000000000000000027321357355204000206230ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ #define VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_job_queue.h" void *vp9_enc_grp_get_next_job(MultiThreadHandle *multi_thread_ctxt, int tile_id); void vp9_prepare_job_queue(VP9_COMP *cpi, JOB_TYPE job_type); int vp9_get_job_queue_status(MultiThreadHandle *multi_thread_ctxt, int cur_tile_id); void vp9_assign_tile_to_thread(MultiThreadHandle *multi_thread_ctxt, int tile_cols, int num_workers); void vp9_multi_thread_tile_init(VP9_COMP *cpi); void vp9_row_mt_mem_alloc(VP9_COMP *cpi); void vp9_row_mt_alloc_rd_thresh(VP9_COMP *const cpi, TileDataEnc *const this_tile); void vp9_row_mt_mem_dealloc(VP9_COMP *cpi); int vp9_get_tiles_proc_status(MultiThreadHandle *multi_thread_ctxt, int *tile_completion_status, int *cur_tile_id, int tile_cols); #endif // VPX_VP9_ENCODER_VP9_MULTI_THREAD_H_ libvpx-1.8.2/vp9/encoder/vp9_noise_estimate.c000066400000000000000000000263721357355204000211530ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_scale/yv12config.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_noise_estimate.h" #include "vp9/encoder/vp9_encoder.h" #if CONFIG_VP9_TEMPORAL_DENOISING // For SVC: only do noise estimation on top spatial layer. static INLINE int noise_est_svc(const struct VP9_COMP *const cpi) { return (!cpi->use_svc || (cpi->use_svc && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)); } #endif void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height) { ne->enabled = 0; ne->level = (width * height < 1280 * 720) ? kLowLow : kLow; ne->value = 0; ne->count = 0; ne->thresh = 90; ne->last_w = 0; ne->last_h = 0; if (width * height >= 1920 * 1080) { ne->thresh = 200; } else if (width * height >= 1280 * 720) { ne->thresh = 140; } else if (width * height >= 640 * 360) { ne->thresh = 115; } ne->num_frames_estimate = 15; ne->adapt_thresh = (3 * ne->thresh) >> 1; } static int enable_noise_estimation(VP9_COMP *const cpi) { #if CONFIG_VP9_HIGHBITDEPTH if (cpi->common.use_highbitdepth) return 0; #endif // Enable noise estimation if denoising is on. #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && cpi->common.width >= 320 && cpi->common.height >= 180) return 1; #endif // Only allow noise estimate under certain encoding mode. // Enabled for 1 pass CBR, speed >=5, and if resolution is same as original. // Not enabled for SVC mode and screen_content_mode. // Not enabled for low resolutions. if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.speed >= 5 && cpi->resize_state == ORIG && cpi->resize_pending == 0 && !cpi->use_svc && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cpi->common.width * cpi->common.height >= 640 * 360) return 1; else return 0; } #if CONFIG_VP9_TEMPORAL_DENOISING static void copy_frame(YV12_BUFFER_CONFIG *const dest, const YV12_BUFFER_CONFIG *const src) { int r; const uint8_t *srcbuf = src->y_buffer; uint8_t *destbuf = dest->y_buffer; assert(dest->y_width == src->y_width); assert(dest->y_height == src->y_height); for (r = 0; r < dest->y_height; ++r) { memcpy(destbuf, srcbuf, dest->y_width); destbuf += dest->y_stride; srcbuf += src->y_stride; } } #endif // CONFIG_VP9_TEMPORAL_DENOISING NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne) { int noise_level = kLowLow; if (ne->value > (ne->thresh << 1)) { noise_level = kHigh; } else { if (ne->value > ne->thresh) noise_level = kMedium; else if (ne->value > (ne->thresh >> 1)) noise_level = kLow; else noise_level = kLowLow; } return noise_level; } void vp9_update_noise_estimate(VP9_COMP *const cpi) { const VP9_COMMON *const cm = &cpi->common; NOISE_ESTIMATE *const ne = &cpi->noise_estimate; const int low_res = (cm->width <= 352 && cm->height <= 288); // Estimate of noise level every frame_period frames. int frame_period = 8; int thresh_consec_zeromv = 6; int frame_counter = cm->current_video_frame; // Estimate is between current source and last source. YV12_BUFFER_CONFIG *last_source = cpi->Last_Source; #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) { last_source = &cpi->denoiser.last_source; // Tune these thresholds for different resolutions when denoising is // enabled. if (cm->width > 640 && cm->width <= 1920) { thresh_consec_zeromv = 2; } } #endif ne->enabled = enable_noise_estimation(cpi); if (cpi->svc.number_spatial_layers > 1) frame_counter = cpi->svc.current_superframe; if (!ne->enabled || frame_counter % frame_period != 0 || last_source == NULL || (cpi->svc.number_spatial_layers == 1 && (ne->last_w != cm->width || ne->last_h != cm->height))) { #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif if (last_source != NULL) { ne->last_w = cm->width; ne->last_h = cm->height; } return; } else if (frame_counter > 60 && cpi->svc.num_encoded_top_layer > 1 && cpi->rc.frames_since_key > cpi->svc.number_spatial_layers && cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1 && cpi->rc.avg_frame_low_motion < (low_res ? 60 : 40)) { // Force noise estimation to 0 and denoiser off if content has high motion. ne->level = kLowLow; ne->count = 0; ne->num_frames_estimate = 10; #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi) && cpi->svc.current_superframe > 1) { vp9_denoiser_set_noise_level(cpi, ne->level); copy_frame(&cpi->denoiser.last_source, cpi->Source); } #endif return; } else { unsigned int bin_size = 100; unsigned int hist[MAX_VAR_HIST_BINS] = { 0 }; unsigned int hist_avg[MAX_VAR_HIST_BINS]; unsigned int max_bin = 0; unsigned int max_bin_count = 0; unsigned int bin_cnt; int bsize = BLOCK_16X16; // Loop over sub-sample of 16x16 blocks of frame, and for blocks that have // been encoded as zero/small mv at least x consecutive frames, compute // the variance to update estimate of noise in the source. const uint8_t *src_y = cpi->Source->y_buffer; const int src_ystride = cpi->Source->y_stride; const uint8_t *last_src_y = last_source->y_buffer; const int last_src_ystride = last_source->y_stride; const uint8_t *src_u = cpi->Source->u_buffer; const uint8_t *src_v = cpi->Source->v_buffer; const int src_uvstride = cpi->Source->uv_stride; int mi_row, mi_col; int num_low_motion = 0; int frame_low_motion = 1; for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { int bl_index = mi_row * cm->mi_cols + mi_col; if (cpi->consec_zero_mv[bl_index] > thresh_consec_zeromv) num_low_motion++; } } if (num_low_motion < ((3 * cm->mi_rows * cm->mi_cols) >> 3)) frame_low_motion = 0; for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { // 16x16 blocks, 1/4 sample of frame. if (mi_row % 4 == 0 && mi_col % 4 == 0 && mi_row < cm->mi_rows - 1 && mi_col < cm->mi_cols - 1) { int bl_index = mi_row * cm->mi_cols + mi_col; int bl_index1 = bl_index + 1; int bl_index2 = bl_index + cm->mi_cols; int bl_index3 = bl_index2 + 1; int consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], VPXMIN(cpi->consec_zero_mv[bl_index1], VPXMIN(cpi->consec_zero_mv[bl_index2], cpi->consec_zero_mv[bl_index3]))); // Only consider blocks that are likely steady background. i.e, have // been encoded as zero/low motion x (= thresh_consec_zeromv) frames // in a row. consec_zero_mv[] defined for 8x8 blocks, so consider all // 4 sub-blocks for 16x16 block. And exclude this frame if // high_source_sad is true (i.e., scene/content change). if (frame_low_motion && consec_zeromv > thresh_consec_zeromv && !cpi->rc.high_source_sad && !cpi->svc.high_source_sad_superframe) { int is_skin = 0; if (cpi->use_skin_detection) { is_skin = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, src_uvstride, bsize, consec_zeromv, 0); } if (!is_skin) { unsigned int sse; // Compute variance between co-located blocks from current and // last input frames. unsigned int variance = cpi->fn_ptr[bsize].vf( src_y, src_ystride, last_src_y, last_src_ystride, &sse); unsigned int hist_index = variance / bin_size; if (hist_index < MAX_VAR_HIST_BINS) hist[hist_index]++; else if (hist_index < 3 * (MAX_VAR_HIST_BINS >> 1)) hist[MAX_VAR_HIST_BINS - 1]++; // Account for the tail } } } src_y += 8; last_src_y += 8; src_u += 4; src_v += 4; } src_y += (src_ystride << 3) - (cm->mi_cols << 3); last_src_y += (last_src_ystride << 3) - (cm->mi_cols << 3); src_u += (src_uvstride << 2) - (cm->mi_cols << 2); src_v += (src_uvstride << 2) - (cm->mi_cols << 2); } ne->last_w = cm->width; ne->last_h = cm->height; // Adjust histogram to account for effect that histogram flattens // and shifts to zero as scene darkens. if (hist[0] > 10 && (hist[MAX_VAR_HIST_BINS - 1] > hist[0] >> 2)) { hist[0] = 0; hist[1] >>= 2; hist[2] >>= 2; hist[3] >>= 2; hist[4] >>= 1; hist[5] >>= 1; hist[6] = 3 * hist[6] >> 1; hist[MAX_VAR_HIST_BINS - 1] >>= 1; } // Average hist[] and find largest bin for (bin_cnt = 0; bin_cnt < MAX_VAR_HIST_BINS; bin_cnt++) { if (bin_cnt == 0) hist_avg[bin_cnt] = (hist[0] + hist[1] + hist[2]) / 3; else if (bin_cnt == MAX_VAR_HIST_BINS - 1) hist_avg[bin_cnt] = hist[MAX_VAR_HIST_BINS - 1] >> 2; else if (bin_cnt == MAX_VAR_HIST_BINS - 2) hist_avg[bin_cnt] = (hist[bin_cnt - 1] + 2 * hist[bin_cnt] + (hist[bin_cnt + 1] >> 1) + 2) >> 2; else hist_avg[bin_cnt] = (hist[bin_cnt - 1] + 2 * hist[bin_cnt] + hist[bin_cnt + 1] + 2) >> 2; if (hist_avg[bin_cnt] > max_bin_count) { max_bin_count = hist_avg[bin_cnt]; max_bin = bin_cnt; } } // Scale by 40 to work with existing thresholds ne->value = (int)((3 * ne->value + max_bin * 40) >> 2); // Quickly increase VNR strength when the noise level increases suddenly. if (ne->level < kMedium && ne->value > ne->adapt_thresh) { ne->count = ne->num_frames_estimate; } else { ne->count++; } if (ne->count == ne->num_frames_estimate) { // Reset counter and check noise level condition. ne->num_frames_estimate = 30; ne->count = 0; ne->level = vp9_noise_estimate_extract_level(ne); #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) vp9_denoiser_set_noise_level(cpi, ne->level); #endif } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && noise_est_svc(cpi)) copy_frame(&cpi->denoiser.last_source, cpi->Source); #endif } libvpx-1.8.2/vp9/encoder/vp9_noise_estimate.h000066400000000000000000000025311357355204000211470ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ #define VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_skin_detection.h" #include "vpx_scale/yv12config.h" #if CONFIG_VP9_TEMPORAL_DENOISING #include "vp9/encoder/vp9_denoiser.h" #endif #ifdef __cplusplus extern "C" { #endif #define MAX_VAR_HIST_BINS 20 typedef enum noise_level { kLowLow, kLow, kMedium, kHigh } NOISE_LEVEL; typedef struct noise_estimate { int enabled; NOISE_LEVEL level; int value; int thresh; int adapt_thresh; int count; int last_w; int last_h; int num_frames_estimate; } NOISE_ESTIMATE; struct VP9_COMP; void vp9_noise_estimate_init(NOISE_ESTIMATE *const ne, int width, int height); NOISE_LEVEL vp9_noise_estimate_extract_level(NOISE_ESTIMATE *const ne); void vp9_update_noise_estimate(struct VP9_COMP *const cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_NOISE_ESTIMATE_H_ libvpx-1.8.2/vp9/encoder/vp9_non_greedy_mv.c000066400000000000000000000617161357355204000207770ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/common/vp9_mv.h" #include "vp9/encoder/vp9_non_greedy_mv.h" // TODO(angiebird): move non_greedy_mv related functions to this file #define LOG2_TABLE_SIZE 1024 static const int log2_table[LOG2_TABLE_SIZE] = { 0, // This is a dummy value 0, 1048576, 1661954, 2097152, 2434718, 2710530, 2943725, 3145728, 3323907, 3483294, 3627477, 3759106, 3880192, 3992301, 4096672, 4194304, 4286015, 4372483, 4454275, 4531870, 4605679, 4676053, 4743299, 4807682, 4869436, 4928768, 4985861, 5040877, 5093962, 5145248, 5194851, 5242880, 5289431, 5334591, 5378443, 5421059, 5462508, 5502851, 5542146, 5580446, 5617800, 5654255, 5689851, 5724629, 5758625, 5791875, 5824409, 5856258, 5887450, 5918012, 5947969, 5977344, 6006160, 6034437, 6062195, 6089453, 6116228, 6142538, 6168398, 6193824, 6218829, 6243427, 6267632, 6291456, 6314910, 6338007, 6360756, 6383167, 6405252, 6427019, 6448477, 6469635, 6490501, 6511084, 6531390, 6551427, 6571202, 6590722, 6609993, 6629022, 6647815, 6666376, 6684713, 6702831, 6720734, 6738427, 6755916, 6773205, 6790299, 6807201, 6823917, 6840451, 6856805, 6872985, 6888993, 6904834, 6920510, 6936026, 6951384, 6966588, 6981641, 6996545, 7011304, 7025920, 7040397, 7054736, 7068940, 7083013, 7096956, 7110771, 7124461, 7138029, 7151476, 7164804, 7178017, 7191114, 7204100, 7216974, 7229740, 7242400, 7254954, 7267405, 7279754, 7292003, 7304154, 7316208, 7328167, 7340032, 7351805, 7363486, 7375079, 7386583, 7398000, 7409332, 7420579, 7431743, 7442826, 7453828, 7464751, 7475595, 7486362, 7497053, 7507669, 7518211, 7528680, 7539077, 7549404, 7559660, 7569847, 7579966, 7590017, 7600003, 7609923, 7619778, 7629569, 7639298, 7648964, 7658569, 7668114, 7677598, 7687023, 7696391, 7705700, 7714952, 7724149, 7733289, 7742375, 7751407, 7760385, 7769310, 7778182, 7787003, 7795773, 7804492, 7813161, 7821781, 7830352, 7838875, 7847350, 7855777, 7864158, 7872493, 7880782, 7889027, 7897226, 7905381, 7913492, 7921561, 7929586, 7937569, 7945510, 7953410, 7961268, 7969086, 7976864, 7984602, 7992301, 7999960, 8007581, 8015164, 8022709, 8030217, 8037687, 8045121, 8052519, 8059880, 8067206, 8074496, 8081752, 8088973, 8096159, 8103312, 8110431, 8117516, 8124569, 8131589, 8138576, 8145532, 8152455, 8159347, 8166208, 8173037, 8179836, 8186605, 8193343, 8200052, 8206731, 8213380, 8220001, 8226593, 8233156, 8239690, 8246197, 8252676, 8259127, 8265550, 8271947, 8278316, 8284659, 8290976, 8297266, 8303530, 8309768, 8315981, 8322168, 8328330, 8334467, 8340579, 8346667, 8352730, 8358769, 8364784, 8370775, 8376743, 8382687, 8388608, 8394506, 8400381, 8406233, 8412062, 8417870, 8423655, 8429418, 8435159, 8440878, 8446576, 8452252, 8457908, 8463542, 8469155, 8474748, 8480319, 8485871, 8491402, 8496913, 8502404, 8507875, 8513327, 8518759, 8524171, 8529564, 8534938, 8540293, 8545629, 8550947, 8556245, 8561525, 8566787, 8572031, 8577256, 8582464, 8587653, 8592825, 8597980, 8603116, 8608236, 8613338, 8618423, 8623491, 8628542, 8633576, 8638593, 8643594, 8648579, 8653547, 8658499, 8663434, 8668354, 8673258, 8678145, 8683017, 8687874, 8692715, 8697540, 8702350, 8707145, 8711925, 8716690, 8721439, 8726174, 8730894, 8735599, 8740290, 8744967, 8749628, 8754276, 8758909, 8763528, 8768134, 8772725, 8777302, 8781865, 8786415, 8790951, 8795474, 8799983, 8804478, 8808961, 8813430, 8817886, 8822328, 8826758, 8831175, 8835579, 8839970, 8844349, 8848715, 8853068, 8857409, 8861737, 8866053, 8870357, 8874649, 8878928, 8883195, 8887451, 8891694, 8895926, 8900145, 8904353, 8908550, 8912734, 8916908, 8921069, 8925220, 8929358, 8933486, 8937603, 8941708, 8945802, 8949885, 8953957, 8958018, 8962068, 8966108, 8970137, 8974155, 8978162, 8982159, 8986145, 8990121, 8994086, 8998041, 9001986, 9005920, 9009844, 9013758, 9017662, 9021556, 9025440, 9029314, 9033178, 9037032, 9040877, 9044711, 9048536, 9052352, 9056157, 9059953, 9063740, 9067517, 9071285, 9075044, 9078793, 9082533, 9086263, 9089985, 9093697, 9097400, 9101095, 9104780, 9108456, 9112123, 9115782, 9119431, 9123072, 9126704, 9130328, 9133943, 9137549, 9141146, 9144735, 9148316, 9151888, 9155452, 9159007, 9162554, 9166092, 9169623, 9173145, 9176659, 9180165, 9183663, 9187152, 9190634, 9194108, 9197573, 9201031, 9204481, 9207923, 9211357, 9214784, 9218202, 9221613, 9225017, 9228412, 9231800, 9235181, 9238554, 9241919, 9245277, 9248628, 9251971, 9255307, 9258635, 9261956, 9265270, 9268577, 9271876, 9275169, 9278454, 9281732, 9285002, 9288266, 9291523, 9294773, 9298016, 9301252, 9304481, 9307703, 9310918, 9314126, 9317328, 9320523, 9323711, 9326892, 9330067, 9333235, 9336397, 9339552, 9342700, 9345842, 9348977, 9352106, 9355228, 9358344, 9361454, 9364557, 9367654, 9370744, 9373828, 9376906, 9379978, 9383043, 9386102, 9389155, 9392202, 9395243, 9398278, 9401306, 9404329, 9407345, 9410356, 9413360, 9416359, 9419351, 9422338, 9425319, 9428294, 9431263, 9434226, 9437184, 9440136, 9443082, 9446022, 9448957, 9451886, 9454809, 9457726, 9460638, 9463545, 9466446, 9469341, 9472231, 9475115, 9477994, 9480867, 9483735, 9486597, 9489454, 9492306, 9495152, 9497993, 9500828, 9503659, 9506484, 9509303, 9512118, 9514927, 9517731, 9520530, 9523324, 9526112, 9528895, 9531674, 9534447, 9537215, 9539978, 9542736, 9545489, 9548237, 9550980, 9553718, 9556451, 9559179, 9561903, 9564621, 9567335, 9570043, 9572747, 9575446, 9578140, 9580830, 9583514, 9586194, 9588869, 9591540, 9594205, 9596866, 9599523, 9602174, 9604821, 9607464, 9610101, 9612735, 9615363, 9617987, 9620607, 9623222, 9625832, 9628438, 9631040, 9633637, 9636229, 9638818, 9641401, 9643981, 9646556, 9649126, 9651692, 9654254, 9656812, 9659365, 9661914, 9664459, 9666999, 9669535, 9672067, 9674594, 9677118, 9679637, 9682152, 9684663, 9687169, 9689672, 9692170, 9694665, 9697155, 9699641, 9702123, 9704601, 9707075, 9709545, 9712010, 9714472, 9716930, 9719384, 9721834, 9724279, 9726721, 9729159, 9731593, 9734024, 9736450, 9738872, 9741291, 9743705, 9746116, 9748523, 9750926, 9753326, 9755721, 9758113, 9760501, 9762885, 9765266, 9767642, 9770015, 9772385, 9774750, 9777112, 9779470, 9781825, 9784175, 9786523, 9788866, 9791206, 9793543, 9795875, 9798204, 9800530, 9802852, 9805170, 9807485, 9809797, 9812104, 9814409, 9816710, 9819007, 9821301, 9823591, 9825878, 9828161, 9830441, 9832718, 9834991, 9837261, 9839527, 9841790, 9844050, 9846306, 9848559, 9850808, 9853054, 9855297, 9857537, 9859773, 9862006, 9864235, 9866462, 9868685, 9870904, 9873121, 9875334, 9877544, 9879751, 9881955, 9884155, 9886352, 9888546, 9890737, 9892925, 9895109, 9897291, 9899469, 9901644, 9903816, 9905985, 9908150, 9910313, 9912473, 9914629, 9916783, 9918933, 9921080, 9923225, 9925366, 9927504, 9929639, 9931771, 9933900, 9936027, 9938150, 9940270, 9942387, 9944502, 9946613, 9948721, 9950827, 9952929, 9955029, 9957126, 9959219, 9961310, 9963398, 9965484, 9967566, 9969645, 9971722, 9973796, 9975866, 9977934, 9980000, 9982062, 9984122, 9986179, 9988233, 9990284, 9992332, 9994378, 9996421, 9998461, 10000498, 10002533, 10004565, 10006594, 10008621, 10010644, 10012665, 10014684, 10016700, 10018713, 10020723, 10022731, 10024736, 10026738, 10028738, 10030735, 10032729, 10034721, 10036710, 10038697, 10040681, 10042662, 10044641, 10046617, 10048591, 10050562, 10052530, 10054496, 10056459, 10058420, 10060379, 10062334, 10064287, 10066238, 10068186, 10070132, 10072075, 10074016, 10075954, 10077890, 10079823, 10081754, 10083682, 10085608, 10087532, 10089453, 10091371, 10093287, 10095201, 10097112, 10099021, 10100928, 10102832, 10104733, 10106633, 10108529, 10110424, 10112316, 10114206, 10116093, 10117978, 10119861, 10121742, 10123620, 10125495, 10127369, 10129240, 10131109, 10132975, 10134839, 10136701, 10138561, 10140418, 10142273, 10144126, 10145976, 10147825, 10149671, 10151514, 10153356, 10155195, 10157032, 10158867, 10160699, 10162530, 10164358, 10166184, 10168007, 10169829, 10171648, 10173465, 10175280, 10177093, 10178904, 10180712, 10182519, 10184323, 10186125, 10187925, 10189722, 10191518, 10193311, 10195103, 10196892, 10198679, 10200464, 10202247, 10204028, 10205806, 10207583, 10209357, 10211130, 10212900, 10214668, 10216435, 10218199, 10219961, 10221721, 10223479, 10225235, 10226989, 10228741, 10230491, 10232239, 10233985, 10235728, 10237470, 10239210, 10240948, 10242684, 10244417, 10246149, 10247879, 10249607, 10251333, 10253057, 10254779, 10256499, 10258217, 10259933, 10261647, 10263360, 10265070, 10266778, 10268485, 10270189, 10271892, 10273593, 10275292, 10276988, 10278683, 10280376, 10282068, 10283757, 10285444, 10287130, 10288814, 10290495, 10292175, 10293853, 10295530, 10297204, 10298876, 10300547, 10302216, 10303883, 10305548, 10307211, 10308873, 10310532, 10312190, 10313846, 10315501, 10317153, 10318804, 10320452, 10322099, 10323745, 10325388, 10327030, 10328670, 10330308, 10331944, 10333578, 10335211, 10336842, 10338472, 10340099, 10341725, 10343349, 10344971, 10346592, 10348210, 10349828, 10351443, 10353057, 10354668, 10356279, 10357887, 10359494, 10361099, 10362702, 10364304, 10365904, 10367502, 10369099, 10370694, 10372287, 10373879, 10375468, 10377057, 10378643, 10380228, 10381811, 10383393, 10384973, 10386551, 10388128, 10389703, 10391276, 10392848, 10394418, 10395986, 10397553, 10399118, 10400682, 10402244, 10403804, 10405363, 10406920, 10408476, 10410030, 10411582, 10413133, 10414682, 10416230, 10417776, 10419320, 10420863, 10422404, 10423944, 10425482, 10427019, 10428554, 10430087, 10431619, 10433149, 10434678, 10436206, 10437731, 10439256, 10440778, 10442299, 10443819, 10445337, 10446854, 10448369, 10449882, 10451394, 10452905, 10454414, 10455921, 10457427, 10458932, 10460435, 10461936, 10463436, 10464935, 10466432, 10467927, 10469422, 10470914, 10472405, 10473895, 10475383, 10476870, 10478355, 10479839, 10481322, 10482802, 10484282, }; static int mi_size_to_block_size(int mi_bsize, int mi_num) { return (mi_num % mi_bsize) ? mi_num / mi_bsize + 1 : mi_num / mi_bsize; } Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, int frame_num, int mi_rows, int mi_cols) { int frame_idx, rf_idx, square_block_idx; if (motion_field_info->allocated) { // TODO(angiebird): Avoid re-allocate buffer if possible vp9_free_motion_field_info(motion_field_info); } motion_field_info->frame_num = frame_num; motion_field_info->motion_field_array = vpx_calloc(frame_num, sizeof(*motion_field_info->motion_field_array)); for (frame_idx = 0; frame_idx < frame_num; ++frame_idx) { for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; ++square_block_idx) { BLOCK_SIZE bsize = square_block_idx_to_bsize(square_block_idx); const int mi_height = num_8x8_blocks_high_lookup[bsize]; const int mi_width = num_8x8_blocks_wide_lookup[bsize]; const int block_rows = mi_size_to_block_size(mi_height, mi_rows); const int block_cols = mi_size_to_block_size(mi_width, mi_cols); MotionField *motion_field = &motion_field_info ->motion_field_array[frame_idx][rf_idx][square_block_idx]; Status status = vp9_alloc_motion_field(motion_field, bsize, block_rows, block_cols); if (status == STATUS_FAILED) { return STATUS_FAILED; } } } } motion_field_info->allocated = 1; return STATUS_OK; } Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, int block_rows, int block_cols) { Status status = STATUS_OK; motion_field->ready = 0; motion_field->bsize = bsize; motion_field->block_rows = block_rows; motion_field->block_cols = block_cols; motion_field->block_num = block_rows * block_cols; motion_field->mf = vpx_calloc(motion_field->block_num, sizeof(*motion_field->mf)); if (motion_field->mf == NULL) { status = STATUS_FAILED; } motion_field->set_mv = vpx_calloc(motion_field->block_num, sizeof(*motion_field->set_mv)); if (motion_field->set_mv == NULL) { vpx_free(motion_field->mf); motion_field->mf = NULL; status = STATUS_FAILED; } motion_field->local_structure = vpx_calloc( motion_field->block_num, sizeof(*motion_field->local_structure)); if (motion_field->local_structure == NULL) { vpx_free(motion_field->mf); motion_field->mf = NULL; vpx_free(motion_field->set_mv); motion_field->set_mv = NULL; status = STATUS_FAILED; } return status; } void vp9_free_motion_field(MotionField *motion_field) { vpx_free(motion_field->mf); vpx_free(motion_field->set_mv); vpx_free(motion_field->local_structure); vp9_zero(*motion_field); } void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info) { if (motion_field_info->allocated) { int frame_idx, rf_idx, square_block_idx; for (frame_idx = 0; frame_idx < motion_field_info->frame_num; ++frame_idx) { for (rf_idx = 0; rf_idx < MAX_INTER_REF_FRAMES; ++rf_idx) { for (square_block_idx = 0; square_block_idx < SQUARE_BLOCK_SIZES; ++square_block_idx) { MotionField *motion_field = &motion_field_info ->motion_field_array[frame_idx][rf_idx][square_block_idx]; vp9_free_motion_field(motion_field); } } } vpx_free(motion_field_info->motion_field_array); motion_field_info->motion_field_array = NULL; motion_field_info->frame_num = 0; motion_field_info->allocated = 0; } } MotionField *vp9_motion_field_info_get_motion_field( MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, BLOCK_SIZE bsize) { int square_block_idx = get_square_block_idx(bsize); assert(frame_idx < motion_field_info->frame_num); assert(motion_field_info->allocated == 1); return &motion_field_info ->motion_field_array[frame_idx][rf_idx][square_block_idx]; } int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, int bcol) { assert(brow >= 0 && brow < motion_field->block_rows); assert(bcol >= 0 && bcol < motion_field->block_cols); return motion_field->set_mv[brow * motion_field->block_cols + bcol]; } int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, int bcol) { assert(brow >= 0 && brow < motion_field->block_rows); assert(bcol >= 0 && bcol < motion_field->block_cols); return motion_field->mf[brow * motion_field->block_cols + bcol]; } int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, int mi_col) { const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; const int brow = mi_row / mi_height; const int bcol = mi_col / mi_width; assert(mi_row % mi_height == 0); assert(mi_col % mi_width == 0); return vp9_motion_field_get_mv(motion_field, brow, bcol); } void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, int mi_col, int_mv mv) { const int mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; const int mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; const int brow = mi_row / mi_height; const int bcol = mi_col / mi_width; assert(mi_row % mi_height == 0); assert(mi_col % mi_width == 0); assert(brow >= 0 && brow < motion_field->block_rows); assert(bcol >= 0 && bcol < motion_field->block_cols); motion_field->mf[brow * motion_field->block_cols + bcol] = mv; motion_field->set_mv[brow * motion_field->block_cols + bcol] = 1; } void vp9_motion_field_reset_mvs(MotionField *motion_field) { memset(motion_field->set_mv, 0, motion_field->block_num * sizeof(*motion_field->set_mv)); } static int64_t log2_approximation(int64_t v) { assert(v > 0); if (v < LOG2_TABLE_SIZE) { return log2_table[v]; } else { // use linear approximation when v >= 2^10 const int slope = 1477; // slope = 1 / (log(2) * 1024) * (1 << LOG2_PRECISION) assert(LOG2_TABLE_SIZE == 1 << 10); return slope * (v - LOG2_TABLE_SIZE) + (10 << LOG2_PRECISION); } } int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, int mv_num) { // The behavior of this function is to compute log2 of mv difference, // i.e. min log2(1 + row_diff * row_diff + col_diff * col_diff) // against available neighbor mvs. // Since the log2 is monotonically increasing, we can compute // min row_diff * row_diff + col_diff * col_diff first // then apply log2 in the end. int i; int64_t min_abs_diff = INT64_MAX; int cnt = 0; assert(mv_num <= NB_MVS_NUM); for (i = 0; i < mv_num; ++i) { MV nb_mv = nb_full_mvs[i].as_mv; const int64_t row_diff = abs(mv->row - nb_mv.row); const int64_t col_diff = abs(mv->col - nb_mv.col); const int64_t abs_diff = row_diff * row_diff + col_diff * col_diff; assert(nb_full_mvs[i].as_int != INVALID_MV); min_abs_diff = VPXMIN(abs_diff, min_abs_diff); ++cnt; } if (cnt) { return log2_approximation(1 + min_abs_diff); } return 0; } static FloatMV get_smooth_motion_vector(const FloatMV scaled_search_mv, const FloatMV *tmp_mf, const int (*M)[MF_LOCAL_STRUCTURE_SIZE], int rows, int cols, int row, int col, float alpha) { const FloatMV tmp_mv = tmp_mf[row * cols + col]; int idx_row, idx_col; FloatMV avg_nb_mv = { 0.0f, 0.0f }; FloatMV mv = { 0.0f, 0.0f }; float filter[3][3] = { { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f }, { 1.0f / 6.0f, 0.0f, 1.0f / 6.0f }, { 1.0f / 12.0f, 1.0f / 6.0f, 1.0f / 12.0f } }; for (idx_row = 0; idx_row < 3; ++idx_row) { int nb_row = row + idx_row - 1; for (idx_col = 0; idx_col < 3; ++idx_col) { int nb_col = col + idx_col - 1; if (nb_row < 0 || nb_col < 0 || nb_row >= rows || nb_col >= cols) { avg_nb_mv.row += (tmp_mv.row) * filter[idx_row][idx_col]; avg_nb_mv.col += (tmp_mv.col) * filter[idx_row][idx_col]; } else { const FloatMV nb_mv = tmp_mf[nb_row * cols + nb_col]; avg_nb_mv.row += (nb_mv.row) * filter[idx_row][idx_col]; avg_nb_mv.col += (nb_mv.col) * filter[idx_row][idx_col]; } } } { // M is the local variance of reference frame float M00 = M[row * cols + col][0]; float M01 = M[row * cols + col][1]; float M10 = M[row * cols + col][2]; float M11 = M[row * cols + col][3]; float det = (M00 + alpha) * (M11 + alpha) - M01 * M10; float inv_M00 = (M11 + alpha) / det; float inv_M01 = -M01 / det; float inv_M10 = -M10 / det; float inv_M11 = (M00 + alpha) / det; float inv_MM00 = inv_M00 * M00 + inv_M01 * M10; float inv_MM01 = inv_M00 * M01 + inv_M01 * M11; float inv_MM10 = inv_M10 * M00 + inv_M11 * M10; float inv_MM11 = inv_M10 * M01 + inv_M11 * M11; mv.row = inv_M00 * avg_nb_mv.row * alpha + inv_M01 * avg_nb_mv.col * alpha + inv_MM00 * scaled_search_mv.row + inv_MM01 * scaled_search_mv.col; mv.col = inv_M10 * avg_nb_mv.row * alpha + inv_M11 * avg_nb_mv.col * alpha + inv_MM10 * scaled_search_mv.row + inv_MM11 * scaled_search_mv.col; } return mv; } void vp9_get_smooth_motion_field(const MV *search_mf, const int (*M)[MF_LOCAL_STRUCTURE_SIZE], int rows, int cols, BLOCK_SIZE bsize, float alpha, int num_iters, MV *smooth_mf) { // M is the local variation of reference frame // build two buffers FloatMV *input = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); FloatMV *output = (FloatMV *)malloc(rows * cols * sizeof(FloatMV)); int idx; int row, col; int bw = 4 << b_width_log2_lookup[bsize]; int bh = 4 << b_height_log2_lookup[bsize]; // copy search results to input buffer for (idx = 0; idx < rows * cols; ++idx) { input[idx].row = (float)search_mf[idx].row / bh; input[idx].col = (float)search_mf[idx].col / bw; } for (idx = 0; idx < num_iters; ++idx) { FloatMV *tmp; for (row = 0; row < rows; ++row) { for (col = 0; col < cols; ++col) { // note: the scaled_search_mf and smooth_mf are all scaled by macroblock // size const MV search_mv = search_mf[row * cols + col]; FloatMV scaled_search_mv = { (float)search_mv.row / bh, (float)search_mv.col / bw }; output[row * cols + col] = get_smooth_motion_vector( scaled_search_mv, input, M, rows, cols, row, col, alpha); } } // swap buffers tmp = input; input = output; output = tmp; } // copy smoothed results to output for (idx = 0; idx < rows * cols; ++idx) { smooth_mf[idx].row = (int)(input[idx].row * bh); smooth_mf[idx].col = (int)(input[idx].col * bw); } free(input); free(output); } void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, const YV12_BUFFER_CONFIG *ref_frame, const MV *search_mf, const vp9_variance_fn_ptr_t *fn_ptr, int rows, int cols, BLOCK_SIZE bsize, int (*M)[MF_LOCAL_STRUCTURE_SIZE]) { const int bw = 4 << b_width_log2_lookup[bsize]; const int bh = 4 << b_height_log2_lookup[bsize]; const int cur_stride = cur_frame->y_stride; const int ref_stride = ref_frame->y_stride; const int width = ref_frame->y_width; const int height = ref_frame->y_height; int row, col; for (row = 0; row < rows; ++row) { for (col = 0; col < cols; ++col) { int cur_offset = row * bh * cur_stride + col * bw; uint8_t *center = cur_frame->y_buffer + cur_offset; int ref_h = row * bh + search_mf[row * cols + col].row; int ref_w = col * bw + search_mf[row * cols + col].col; int ref_offset; uint8_t *target; uint8_t *nb; int search_dist; int nb_dist; int I_row = 0, I_col = 0; // TODO(Dan): handle the case that when reference frame block beyond the // boundary ref_h = ref_h < 0 ? 0 : (ref_h >= height - bh ? height - bh - 1 : ref_h); ref_w = ref_w < 0 ? 0 : (ref_w >= width - bw ? width - bw - 1 : ref_w); // compute search results distortion // TODO(Dan): maybe need to use vp9 function to find the reference block, // to compare with the results of my python code, I first use my way to // compute the reference block ref_offset = ref_h * ref_stride + ref_w; target = ref_frame->y_buffer + ref_offset; search_dist = fn_ptr->sdf(center, cur_stride, target, ref_stride); // compute target's neighbors' distortions // TODO(Dan): if using padding, the boundary condition may vary // up if (ref_h - bh >= 0) { nb = target - ref_stride * bh; nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); I_row += nb_dist - search_dist; } // down if (ref_h + bh < height - bh) { nb = target + ref_stride * bh; nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); I_row += nb_dist - search_dist; } if (ref_h - bh >= 0 && ref_h + bh < height - bh) { I_row /= 2; } I_row /= (bw * bh); // left if (ref_w - bw >= 0) { nb = target - bw; nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); I_col += nb_dist - search_dist; } // down if (ref_w + bw < width - bw) { nb = target + bw; nb_dist = fn_ptr->sdf(center, cur_stride, nb, ref_stride); I_col += nb_dist - search_dist; } if (ref_w - bw >= 0 && ref_w + bw < width - bw) { I_col /= 2; } I_col /= (bw * bh); M[row * cols + col][0] = I_row * I_row; M[row * cols + col][1] = I_row * I_col; M[row * cols + col][2] = I_col * I_row; M[row * cols + col][3] = I_col * I_col; } } } libvpx-1.8.2/vp9/encoder/vp9_non_greedy_mv.h000066400000000000000000000076471357355204000210070ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ #define VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ #include "vp9/common/vp9_enums.h" #include "vp9/common/vp9_blockd.h" #include "vpx_scale/yv12config.h" #include "vpx_dsp/variance.h" #ifdef __cplusplus extern "C" { #endif #define NB_MVS_NUM 4 #define LOG2_PRECISION 20 #define MF_LOCAL_STRUCTURE_SIZE 4 #define SQUARE_BLOCK_SIZES 4 typedef enum Status { STATUS_OK = 0, STATUS_FAILED = 1 } Status; typedef struct MotionField { int ready; BLOCK_SIZE bsize; int block_rows; int block_cols; int block_num; // block_num == block_rows * block_cols int (*local_structure)[MF_LOCAL_STRUCTURE_SIZE]; int_mv *mf; int *set_mv; int mv_log_scale; } MotionField; typedef struct MotionFieldInfo { int frame_num; int allocated; MotionField (*motion_field_array)[MAX_INTER_REF_FRAMES][SQUARE_BLOCK_SIZES]; } MotionFieldInfo; typedef struct { float row, col; } FloatMV; static INLINE int get_square_block_idx(BLOCK_SIZE bsize) { if (bsize == BLOCK_4X4) { return 0; } if (bsize == BLOCK_8X8) { return 1; } if (bsize == BLOCK_16X16) { return 2; } if (bsize == BLOCK_32X32) { return 3; } assert(0 && "ERROR: non-square block size"); return -1; } static INLINE BLOCK_SIZE square_block_idx_to_bsize(int square_block_idx) { if (square_block_idx == 0) { return BLOCK_4X4; } if (square_block_idx == 1) { return BLOCK_8X8; } if (square_block_idx == 2) { return BLOCK_16X16; } if (square_block_idx == 3) { return BLOCK_32X32; } assert(0 && "ERROR: invalid square_block_idx"); return BLOCK_INVALID; } Status vp9_alloc_motion_field_info(MotionFieldInfo *motion_field_info, int frame_num, int mi_rows, int mi_cols); Status vp9_alloc_motion_field(MotionField *motion_field, BLOCK_SIZE bsize, int block_rows, int block_cols); void vp9_free_motion_field(MotionField *motion_field); void vp9_free_motion_field_info(MotionFieldInfo *motion_field_info); int64_t vp9_nb_mvs_inconsistency(const MV *mv, const int_mv *nb_full_mvs, int mv_num); void vp9_get_smooth_motion_field(const MV *search_mf, const int (*M)[MF_LOCAL_STRUCTURE_SIZE], int rows, int cols, BLOCK_SIZE bize, float alpha, int num_iters, MV *smooth_mf); void vp9_get_local_structure(const YV12_BUFFER_CONFIG *cur_frame, const YV12_BUFFER_CONFIG *ref_frame, const MV *search_mf, const vp9_variance_fn_ptr_t *fn_ptr, int rows, int cols, BLOCK_SIZE bsize, int (*M)[MF_LOCAL_STRUCTURE_SIZE]); MotionField *vp9_motion_field_info_get_motion_field( MotionFieldInfo *motion_field_info, int frame_idx, int rf_idx, BLOCK_SIZE bsize); void vp9_motion_field_mi_set_mv(MotionField *motion_field, int mi_row, int mi_col, int_mv mv); void vp9_motion_field_reset_mvs(MotionField *motion_field); int_mv vp9_motion_field_get_mv(const MotionField *motion_field, int brow, int bcol); int_mv vp9_motion_field_mi_get_mv(const MotionField *motion_field, int mi_row, int mi_col); int vp9_motion_field_is_mv_set(const MotionField *motion_field, int brow, int bcol); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_NON_GREEDY_MV_H_ libvpx-1.8.2/vp9/encoder/vp9_partition_models.h000066400000000000000000001121531357355204000215150ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ #define VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ #ifdef __cplusplus extern "C" { #endif #define NN_MAX_HIDDEN_LAYERS 10 #define NN_MAX_NODES_PER_LAYER 128 // Neural net model config. It defines the layout of a neural net model, such as // the number of inputs/outputs, number of layers, the number of nodes in each // layer, as well as the weights and bias of each node. typedef struct { int num_inputs; // Number of input nodes, i.e. features. int num_outputs; // Number of output nodes. int num_hidden_layers; // Number of hidden layers, maximum 10. // Number of nodes for each hidden layer. int num_hidden_nodes[NN_MAX_HIDDEN_LAYERS]; // Weight parameters, indexed by layer. const float *weights[NN_MAX_HIDDEN_LAYERS + 1]; // Bias parameters, indexed by layer. const float *bias[NN_MAX_HIDDEN_LAYERS + 1]; } NN_CONFIG; // Partition search breakout model. #define FEATURES 4 #define Q_CTX 3 #define RESOLUTION_CTX 2 static const float vp9_partition_breakout_weights_64[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { { { -0.016673f, -0.001025f, -0.000032f, 0.000833f, 1.94261885f - 2.1f, }, { -0.160867f, -0.002101f, 0.000011f, 0.002448f, 1.65738142f - 2.5f, }, { -0.628934f, -0.011459f, -0.000009f, 0.013833f, 1.47982645f - 1.6f, }, }, { { -0.064309f, -0.006121f, 0.000232f, 0.005778f, 0.7989465f - 5.0f, }, { -0.314957f, -0.009346f, -0.000225f, 0.010072f, 2.80695581f - 5.5f, }, { -0.635535f, -0.015135f, 0.000091f, 0.015247f, 2.90381241f - 5.0f, }, }, }; static const float vp9_partition_breakout_weights_32[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { { { -0.010554f, -0.003081f, -0.000134f, 0.004491f, 1.68445992f - 3.5f, }, { -0.051489f, -0.007609f, 0.000016f, 0.009792f, 1.28089404f - 2.5f, }, { -0.163097f, -0.013081f, 0.000022f, 0.019006f, 1.36129403f - 3.2f, }, }, { { -0.024629f, -0.006492f, -0.000254f, 0.004895f, 1.27919173f - 4.5f, }, { -0.083936f, -0.009827f, -0.000200f, 0.010399f, 2.73731065f - 4.5f, }, { -0.279052f, -0.013334f, 0.000289f, 0.023203f, 2.43595719f - 3.5f, }, }, }; static const float vp9_partition_breakout_weights_16[RESOLUTION_CTX][Q_CTX][FEATURES + 1] = { { { -0.013154f, -0.002404f, -0.000977f, 0.008450f, 2.57404566f - 5.5f, }, { -0.019146f, -0.004018f, 0.000064f, 0.008187f, 2.15043926f - 2.5f, }, { -0.075755f, -0.010858f, 0.000030f, 0.024505f, 2.06848121f - 2.5f, }, }, { { -0.007636f, -0.002751f, -0.000682f, 0.005968f, 0.19225763f - 4.5f, }, { -0.047306f, -0.009113f, -0.000518f, 0.016007f, 2.61068869f - 4.0f, }, { -0.069336f, -0.010448f, -0.001120f, 0.023083f, 1.47591054f - 5.5f, }, }, }; static const float vp9_partition_breakout_weights_8[RESOLUTION_CTX][Q_CTX] [FEATURES + 1] = { { { -0.011807f, -0.009873f, -0.000931f, 0.034768f, 1.32254851f - 2.0f, }, { -0.003861f, -0.002701f, 0.000100f, 0.013876f, 1.96755111f - 1.5f, }, { -0.013522f, -0.008677f, -0.000562f, 0.034468f, 1.53440356f - 1.5f, }, }, { { -0.003221f, -0.002125f, 0.000993f, 0.012768f, 0.03541421f - 2.0f, }, { -0.006069f, -0.007335f, 0.000229f, 0.026104f, 0.17135315f - 1.5f, }, { -0.039894f, -0.011419f, 0.000070f, 0.061817f, 0.6739977f - 1.5f, }, }, }; #undef FEATURES #undef Q_CTX #undef RESOLUTION_CTX // Rectangular partition search pruning model. #define FEATURES 8 #define LABELS 4 #define NODES 16 static const float vp9_rect_part_nn_weights_16_layer0[FEATURES * NODES] = { -0.432522f, 0.133070f, -0.169187f, 0.768340f, 0.891228f, 0.554458f, 0.356000f, 0.403621f, 0.809165f, 0.778214f, -0.520357f, 0.301451f, -0.386972f, -0.314402f, 0.021878f, 1.148746f, -0.462258f, -0.175524f, -0.344589f, -0.475159f, -0.232322f, 0.471147f, -0.489948f, 0.467740f, -0.391550f, 0.208601f, 0.054138f, 0.076859f, -0.309497f, -0.095927f, 0.225917f, 0.011582f, -0.520730f, -0.585497f, 0.174036f, 0.072521f, 0.120771f, -0.517234f, -0.581908f, -0.034003f, -0.694722f, -0.364368f, 0.290584f, 0.038373f, 0.685654f, 0.394019f, 0.759667f, 1.257502f, -0.610516f, -0.185434f, 0.211997f, -0.172458f, 0.044605f, 0.145316f, -0.182525f, -0.147376f, 0.578742f, 0.312412f, -0.446135f, -0.389112f, 0.454033f, 0.260490f, 0.664285f, 0.395856f, -0.231827f, 0.215228f, 0.014856f, -0.395462f, 0.479646f, -0.391445f, -0.357788f, 0.166238f, -0.056818f, -0.027783f, 0.060880f, -1.604710f, 0.531268f, 0.282184f, 0.714944f, 0.093523f, -0.218312f, -0.095546f, -0.285621f, -0.190871f, -0.448340f, -0.016611f, 0.413913f, -0.286720f, -0.158828f, -0.092635f, -0.279551f, 0.166509f, -0.088162f, 0.446543f, -0.276830f, -0.065642f, -0.176346f, -0.984754f, 0.338738f, 0.403809f, 0.738065f, 1.154439f, 0.750764f, 0.770959f, -0.269403f, 0.295651f, -0.331858f, 0.367144f, 0.279279f, 0.157419f, -0.348227f, -0.168608f, -0.956000f, -0.647136f, 0.250516f, 0.858084f, 0.809802f, 0.492408f, 0.804841f, 0.282802f, 0.079395f, -0.291771f, -0.024382f, -1.615880f, -0.445166f, -0.407335f, -0.483044f, 0.141126f, }; static const float vp9_rect_part_nn_bias_16_layer0[NODES] = { 0.275384f, -0.053745f, 0.000000f, 0.000000f, -0.178103f, 0.513965f, -0.161352f, 0.228551f, 0.000000f, 1.013712f, 0.000000f, 0.000000f, -1.144009f, -0.000006f, -0.241727f, 2.048764f, }; static const float vp9_rect_part_nn_weights_16_layer1[NODES * LABELS] = { -1.435278f, 2.204691f, -0.410718f, 0.202708f, 0.109208f, 1.059142f, -0.306360f, 0.845906f, 0.489654f, -1.121915f, -0.169133f, -0.003385f, 0.660590f, -0.018711f, 1.227158f, -2.967504f, 1.407345f, -1.293243f, -0.386921f, 0.300492f, 0.338824f, -0.083250f, -0.069454f, -1.001827f, -0.327891f, 0.899353f, 0.367397f, -0.118601f, -0.171936f, -0.420646f, -0.803319f, 2.029634f, 0.940268f, -0.664484f, 0.339916f, 0.315944f, 0.157374f, -0.402482f, -0.491695f, 0.595827f, 0.015031f, 0.255887f, -0.466327f, -0.212598f, 0.136485f, 0.033363f, -0.796921f, 1.414304f, -0.282185f, -2.673571f, -0.280994f, 0.382658f, -0.350902f, 0.227926f, 0.062602f, -1.000199f, 0.433731f, 1.176439f, -0.163216f, -0.229015f, -0.640098f, -0.438852f, -0.947700f, 2.203434f, }; static const float vp9_rect_part_nn_bias_16_layer1[LABELS] = { -0.875510f, 0.982408f, 0.560854f, -0.415209f, }; static const NN_CONFIG vp9_rect_part_nnconfig_16 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_16_layer0, vp9_rect_part_nn_weights_16_layer1, }, { vp9_rect_part_nn_bias_16_layer0, vp9_rect_part_nn_bias_16_layer1, }, }; static const float vp9_rect_part_nn_weights_32_layer0[FEATURES * NODES] = { -0.147312f, -0.753248f, 0.540206f, 0.661415f, 0.484117f, -0.341609f, 0.016183f, 0.064177f, 0.781580f, 0.902232f, -0.505342f, 0.325183f, -0.231072f, -0.120107f, -0.076216f, 0.120038f, 0.403695f, -0.463301f, -0.192158f, 0.407442f, 0.106633f, 1.072371f, -0.446779f, 0.467353f, 0.318812f, -0.505996f, -0.008768f, -0.239598f, 0.085480f, 0.284640f, -0.365045f, -0.048083f, -0.112090f, -0.067089f, 0.304138f, -0.228809f, 0.383651f, -0.196882f, 0.477039f, -0.217978f, -0.506931f, -0.125675f, 0.050456f, 1.086598f, 0.732128f, 0.326941f, 0.103952f, 0.121769f, -0.154487f, -0.255514f, 0.030591f, -0.382797f, -0.019981f, -0.326570f, 0.149691f, -0.435633f, -0.070795f, 0.167691f, 0.251413f, -0.153405f, 0.160347f, 0.455107f, -0.968580f, -0.575879f, 0.623115f, -0.069793f, -0.379768f, -0.965807f, -0.062057f, 0.071312f, 0.457098f, 0.350372f, -0.460659f, -0.985393f, 0.359963f, -0.093677f, 0.404272f, -0.326896f, -0.277752f, 0.609322f, -0.114193f, -0.230701f, 0.089208f, 0.645381f, 0.494485f, 0.467876f, -0.166187f, 0.251044f, -0.394661f, 0.192895f, -0.344777f, -0.041893f, -0.111163f, 0.066347f, 0.378158f, -0.455465f, 0.339839f, -0.418207f, -0.356515f, -0.227536f, -0.211091f, -0.122945f, 0.361772f, -0.338095f, 0.004564f, -0.398510f, 0.060876f, -2.132504f, -0.086776f, -0.029166f, 0.039241f, 0.222534f, -0.188565f, -0.288792f, -0.160789f, -0.123905f, 0.397916f, -0.063779f, 0.167210f, -0.445004f, 0.056889f, 0.207280f, 0.000101f, 0.384507f, -1.721239f, -2.036402f, -2.084403f, -2.060483f, }; static const float vp9_rect_part_nn_bias_32_layer0[NODES] = { -0.859251f, -0.109938f, 0.091838f, 0.187817f, -0.728265f, 0.253080f, 0.000000f, -0.357195f, -0.031290f, -1.373237f, -0.761086f, 0.000000f, -0.024504f, 1.765711f, 0.000000f, 1.505390f, }; static const float vp9_rect_part_nn_weights_32_layer1[NODES * LABELS] = { 0.680940f, 1.367178f, 0.403075f, 0.029957f, 0.500917f, 1.407776f, -0.354002f, 0.011667f, 1.663767f, 0.959155f, 0.428323f, -0.205345f, -0.081850f, -3.920103f, -0.243802f, -4.253933f, -0.034020f, -1.361057f, 0.128236f, -0.138422f, -0.025790f, -0.563518f, -0.148715f, -0.344381f, -1.677389f, -0.868332f, -0.063792f, 0.052052f, 0.359591f, 2.739808f, -0.414304f, 3.036597f, -0.075368f, -1.019680f, 0.642501f, 0.209779f, -0.374539f, -0.718294f, -0.116616f, -0.043212f, -1.787809f, -0.773262f, 0.068734f, 0.508309f, 0.099334f, 1.802239f, -0.333538f, 2.708645f, -0.447682f, -2.355555f, -0.506674f, -0.061028f, -0.310305f, -0.375475f, 0.194572f, 0.431788f, -0.789624f, -0.031962f, 0.358353f, 0.382937f, 0.232002f, 2.321813f, -0.037523f, 2.104652f, }; static const float vp9_rect_part_nn_bias_32_layer1[LABELS] = { -0.693383f, 0.773661f, 0.426878f, -0.070619f, }; static const NN_CONFIG vp9_rect_part_nnconfig_32 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_32_layer0, vp9_rect_part_nn_weights_32_layer1, }, { vp9_rect_part_nn_bias_32_layer0, vp9_rect_part_nn_bias_32_layer1, }, }; #undef NODES #define NODES 24 static const float vp9_rect_part_nn_weights_64_layer0[FEATURES * NODES] = { 0.024671f, -0.220610f, -0.284362f, -0.069556f, -0.315700f, 0.187861f, 0.139782f, 0.063110f, 0.796561f, 0.172868f, -0.662194f, -1.393074f, 0.085003f, 0.393381f, 0.358477f, -0.187268f, -0.370745f, 0.218287f, 0.027271f, -0.254089f, -0.048236f, -0.459137f, 0.253171f, 0.122598f, -0.550107f, -0.568456f, 0.159866f, -0.246534f, 0.096384f, -0.255460f, 0.077864f, -0.334837f, 0.026921f, -0.697252f, 0.345262f, 1.343578f, 0.815984f, 1.118211f, 1.574016f, 0.578476f, -0.285967f, -0.508672f, 0.118137f, 0.037695f, 1.540510f, 1.256648f, 1.163819f, 1.172027f, 0.661551f, -0.111980f, -0.434204f, -0.894217f, 0.570524f, 0.050292f, -0.113680f, 0.000784f, -0.211554f, -0.369394f, 0.158306f, -0.512505f, -0.238696f, 0.091498f, -0.448490f, -0.491268f, -0.353112f, -0.303315f, -0.428438f, 0.127998f, -0.406790f, -0.401786f, -0.279888f, -0.384223f, 0.026100f, 0.041621f, -0.315818f, -0.087888f, 0.353497f, 0.163123f, -0.380128f, -0.090334f, -0.216647f, -0.117849f, -0.173502f, 0.301871f, 0.070854f, 0.114627f, -0.050545f, -0.160381f, 0.595294f, 0.492696f, -0.453858f, -1.154139f, 0.126000f, 0.034550f, 0.456665f, -0.236618f, -0.112640f, 0.050759f, -0.449162f, 0.110059f, 0.147116f, 0.249358f, -0.049894f, 0.063351f, -0.004467f, 0.057242f, -0.482015f, -0.174335f, -0.085617f, -0.333808f, -0.358440f, -0.069006f, 0.099260f, -1.243430f, -0.052963f, 0.112088f, -2.661115f, -2.445893f, -2.688174f, -2.624232f, 0.030494f, 0.161311f, 0.012136f, 0.207564f, -2.776856f, -2.791940f, -2.623962f, -2.918820f, 1.231619f, -0.376692f, -0.698078f, 0.110336f, -0.285378f, 0.258367f, -0.180159f, -0.376608f, -0.034348f, -0.130206f, 0.160020f, 0.852977f, 0.580573f, 1.450782f, 1.357596f, 0.787382f, -0.544004f, -0.014795f, 0.032121f, -0.557696f, 0.159994f, -0.540908f, 0.180380f, -0.398045f, 0.705095f, 0.515103f, -0.511521f, -1.271374f, -0.231019f, 0.423647f, 0.064907f, -0.255338f, -0.877748f, -0.667205f, 0.267847f, 0.135229f, 0.617844f, 1.349849f, 1.012623f, 0.730506f, -0.078571f, 0.058401f, 0.053221f, -2.426146f, -0.098808f, -0.138508f, -0.153299f, 0.149116f, -0.444243f, 0.301807f, 0.065066f, 0.092929f, -0.372784f, -0.095540f, 0.192269f, 0.237894f, 0.080228f, -0.214074f, -0.011426f, -2.352367f, -0.085394f, -0.190361f, -0.001177f, 0.089197f, }; static const float vp9_rect_part_nn_bias_64_layer0[NODES] = { 0.000000f, -0.057652f, -0.175413f, -0.175389f, -1.084097f, -1.423801f, -0.076307f, -0.193803f, 0.000000f, -0.066474f, -0.050318f, -0.019832f, -0.038814f, -0.144184f, 2.652451f, 2.415006f, 0.197464f, -0.729842f, -0.173774f, 0.239171f, 0.486425f, 2.463304f, -0.175279f, 2.352637f, }; static const float vp9_rect_part_nn_weights_64_layer1[NODES * LABELS] = { -0.063237f, 1.925696f, -0.182145f, -0.226687f, 0.602941f, -0.941140f, 0.814598f, -0.117063f, 0.282988f, 0.066369f, 0.096951f, 1.049735f, -0.188188f, -0.281227f, -4.836746f, -5.047797f, 0.892358f, 0.417145f, -0.279849f, 1.335945f, 0.660338f, -2.757938f, -0.115714f, -1.862183f, -0.045980f, -1.597624f, -0.586822f, -0.615589f, -0.330537f, 1.068496f, -0.167290f, 0.141290f, -0.112100f, 0.232761f, 0.252307f, -0.399653f, 0.353118f, 0.241583f, 2.635241f, 4.026119f, -1.137327f, -0.052446f, -0.139814f, -1.104256f, -0.759391f, 2.508457f, -0.526297f, 2.095348f, -0.444473f, -1.090452f, 0.584122f, 0.468729f, -0.368865f, 1.041425f, -1.079504f, 0.348837f, 0.390091f, 0.416191f, 0.212906f, -0.660255f, 0.053630f, 0.209476f, 3.595525f, 2.257293f, -0.514030f, 0.074203f, -0.375862f, -1.998307f, -0.930310f, 1.866686f, -0.247137f, 1.087789f, 0.100186f, 0.298150f, 0.165265f, 0.050478f, 0.249167f, 0.371789f, -0.294497f, 0.202954f, 0.037310f, 0.193159f, 0.161551f, 0.301597f, 0.299286f, 0.185946f, 0.822976f, 2.066130f, -1.724588f, 0.055977f, -0.330747f, -0.067747f, -0.475801f, 1.555958f, -0.025808f, -0.081516f, }; static const float vp9_rect_part_nn_bias_64_layer1[LABELS] = { -0.090723f, 0.894968f, 0.844754f, -3.496194f, }; static const NN_CONFIG vp9_rect_part_nnconfig_64 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_rect_part_nn_weights_64_layer0, vp9_rect_part_nn_weights_64_layer1, }, { vp9_rect_part_nn_bias_64_layer0, vp9_rect_part_nn_bias_64_layer1, }, }; #undef FEATURES #undef LABELS #undef NODES #define FEATURES 7 // Partition pruning model(neural nets). static const float vp9_partition_nn_weights_64x64_layer0[FEATURES * 8] = { -3.571348f, 0.014835f, -3.255393f, -0.098090f, -0.013120f, 0.000221f, 0.056273f, 0.190179f, -0.268130f, -1.828242f, -0.010655f, 0.937244f, -0.435120f, 0.512125f, 1.610679f, 0.190816f, -0.799075f, -0.377348f, -0.144232f, 0.614383f, -0.980388f, 1.754150f, -0.185603f, -0.061854f, -0.807172f, 1.240177f, 1.419531f, -0.438544f, -5.980774f, 0.139045f, -0.032359f, -0.068887f, -1.237918f, 0.115706f, 0.003164f, 2.924212f, 1.246838f, -0.035833f, 0.810011f, -0.805894f, 0.010966f, 0.076463f, -4.226380f, -2.437764f, -0.010619f, -0.020935f, -0.451494f, 0.300079f, -0.168961f, -3.326450f, -2.731094f, 0.002518f, 0.018840f, -1.656815f, 0.068039f, 0.010586f, }; static const float vp9_partition_nn_bias_64x64_layer0[8] = { -3.469882f, 0.683989f, 0.194010f, 0.313782f, -3.153335f, 2.245849f, -1.946190f, -3.740020f, }; static const float vp9_partition_nn_weights_64x64_layer1[8] = { -8.058566f, 0.108306f, -0.280620f, -0.818823f, -6.445117f, 0.865364f, -1.127127f, -8.808660f, }; static const float vp9_partition_nn_bias_64x64_layer1[1] = { 6.46909416f, }; static const NN_CONFIG vp9_partition_nnconfig_64x64 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_partition_nn_weights_64x64_layer0, vp9_partition_nn_weights_64x64_layer1, }, { vp9_partition_nn_bias_64x64_layer0, vp9_partition_nn_bias_64x64_layer1, }, }; static const float vp9_partition_nn_weights_32x32_layer0[FEATURES * 8] = { -0.295437f, -4.002648f, -0.205399f, -0.060919f, 0.708037f, 0.027221f, -0.039137f, -0.907724f, -3.151662f, 0.007106f, 0.018726f, -0.534928f, 0.022744f, 0.000159f, -1.717189f, -3.229031f, -0.027311f, 0.269863f, -0.400747f, -0.394366f, -0.108878f, 0.603027f, 0.455369f, -0.197170f, 1.241746f, -1.347820f, -0.575636f, -0.462879f, -2.296426f, 0.196696f, -0.138347f, -0.030754f, -0.200774f, 0.453795f, 0.055625f, -3.163116f, -0.091003f, -0.027028f, -0.042984f, -0.605185f, 0.143240f, -0.036439f, -0.801228f, 0.313409f, -0.159942f, 0.031267f, 0.886454f, -1.531644f, -0.089655f, 0.037683f, -0.163441f, -0.130454f, -0.058344f, 0.060011f, 0.275387f, 1.552226f, }; static const float vp9_partition_nn_bias_32x32_layer0[8] = { -0.838372f, -2.609089f, -0.055763f, 1.329485f, -1.297638f, -2.636622f, -0.826909f, 1.012644f, }; static const float vp9_partition_nn_weights_32x32_layer1[8] = { -1.792632f, -7.322353f, -0.683386f, 0.676564f, -1.488118f, -7.527719f, 1.240163f, 0.614309f, }; static const float vp9_partition_nn_bias_32x32_layer1[1] = { 4.97422546f, }; static const NN_CONFIG vp9_partition_nnconfig_32x32 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_partition_nn_weights_32x32_layer0, vp9_partition_nn_weights_32x32_layer1, }, { vp9_partition_nn_bias_32x32_layer0, vp9_partition_nn_bias_32x32_layer1, }, }; static const float vp9_partition_nn_weights_16x16_layer0[FEATURES * 8] = { -1.717673f, -4.718130f, -0.125725f, -0.183427f, -0.511764f, 0.035328f, 0.130891f, -3.096753f, 0.174968f, -0.188769f, -0.640796f, 1.305661f, 1.700638f, -0.073806f, -4.006781f, -1.630999f, -0.064863f, -0.086410f, -0.148617f, 0.172733f, -0.018619f, 2.152595f, 0.778405f, -0.156455f, 0.612995f, -0.467878f, 0.152022f, -0.236183f, 0.339635f, -0.087119f, -3.196610f, -1.080401f, -0.637704f, -0.059974f, 1.706298f, -0.793705f, -6.399260f, 0.010624f, -0.064199f, -0.650621f, 0.338087f, -0.001531f, 1.023655f, -3.700272f, -0.055281f, -0.386884f, 0.375504f, -0.898678f, 0.281156f, -0.314611f, 0.863354f, -0.040582f, -0.145019f, 0.029329f, -2.197880f, -0.108733f, }; static const float vp9_partition_nn_bias_16x16_layer0[8] = { 0.411516f, -2.143737f, -3.693192f, 2.123142f, -1.356910f, -3.561016f, -0.765045f, -2.417082f, }; static const float vp9_partition_nn_weights_16x16_layer1[8] = { -0.619755f, -2.202391f, -4.337171f, 0.611319f, 0.377677f, -4.998723f, -1.052235f, 1.949922f, }; static const float vp9_partition_nn_bias_16x16_layer1[1] = { 3.20981717f, }; static const NN_CONFIG vp9_partition_nnconfig_16x16 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_partition_nn_weights_16x16_layer0, vp9_partition_nn_weights_16x16_layer1, }, { vp9_partition_nn_bias_16x16_layer0, vp9_partition_nn_bias_16x16_layer1, }, }; #undef FEATURES #define FEATURES 6 static const float vp9_var_part_nn_weights_64_layer0[FEATURES * 8] = { -0.249572f, 0.205532f, -2.175608f, 1.094836f, -2.986370f, 0.193160f, -0.143823f, 0.378511f, -1.997788f, -2.166866f, -1.930158f, -1.202127f, -0.611875f, -0.506422f, -0.432487f, 0.071205f, 0.578172f, -0.154285f, -0.051830f, 0.331681f, -1.457177f, -2.443546f, -2.000302f, -1.389283f, 0.372084f, -0.464917f, 2.265235f, 2.385787f, 2.312722f, 2.127868f, -0.403963f, -0.177860f, -0.436751f, -0.560539f, 0.254903f, 0.193976f, -0.305611f, 0.256632f, 0.309388f, -0.437439f, 1.702640f, -5.007069f, -0.323450f, 0.294227f, 1.267193f, 1.056601f, 0.387181f, -0.191215f, }; static const float vp9_var_part_nn_bias_64_layer0[8] = { -0.044396f, -0.938166f, 0.000000f, -0.916375f, 1.242299f, 0.000000f, -0.405734f, 0.014206f, }; static const float vp9_var_part_nn_weights_64_layer1[8] = { 1.635945f, 0.979557f, 0.455315f, 1.197199f, -2.251024f, -0.464953f, 1.378676f, -0.111927f, }; static const float vp9_var_part_nn_bias_64_layer1[1] = { -0.37972447f, }; static const NN_CONFIG vp9_var_part_nnconfig_64 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_var_part_nn_weights_64_layer0, vp9_var_part_nn_weights_64_layer1, }, { vp9_var_part_nn_bias_64_layer0, vp9_var_part_nn_bias_64_layer1, }, }; static const float vp9_var_part_nn_weights_32_layer0[FEATURES * 8] = { 0.067243f, -0.083598f, -2.191159f, 2.726434f, -3.324013f, 3.477977f, 0.323736f, -0.510199f, 2.960693f, 2.937661f, 2.888476f, 2.938315f, -0.307602f, -0.503353f, -0.080725f, -0.473909f, -0.417162f, 0.457089f, 0.665153f, -0.273210f, 0.028279f, 0.972220f, -0.445596f, 1.756611f, -0.177892f, -0.091758f, 0.436661f, -0.521506f, 0.133786f, 0.266743f, 0.637367f, -0.160084f, -1.396269f, 1.020841f, -1.112971f, 0.919496f, -0.235883f, 0.651954f, 0.109061f, -0.429463f, 0.740839f, -0.962060f, 0.299519f, -0.386298f, 1.550231f, 2.464915f, 1.311969f, 2.561612f, }; static const float vp9_var_part_nn_bias_32_layer0[8] = { 0.368242f, 0.736617f, 0.000000f, 0.757287f, 0.000000f, 0.613248f, -0.776390f, 0.928497f, }; static const float vp9_var_part_nn_weights_32_layer1[8] = { 0.939884f, -2.420850f, -0.410489f, -0.186690f, 0.063287f, -0.522011f, 0.484527f, -0.639625f, }; static const float vp9_var_part_nn_bias_32_layer1[1] = { -0.6455006f, }; static const NN_CONFIG vp9_var_part_nnconfig_32 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_var_part_nn_weights_32_layer0, vp9_var_part_nn_weights_32_layer1, }, { vp9_var_part_nn_bias_32_layer0, vp9_var_part_nn_bias_32_layer1, }, }; static const float vp9_var_part_nn_weights_16_layer0[FEATURES * 8] = { 0.742567f, -0.580624f, -0.244528f, 0.331661f, -0.113949f, -0.559295f, -0.386061f, 0.438653f, 1.467463f, 0.211589f, 0.513972f, 1.067855f, -0.876679f, 0.088560f, -0.687483f, -0.380304f, -0.016412f, 0.146380f, 0.015318f, 0.000351f, -2.764887f, 3.269717f, 2.752428f, -2.236754f, 0.561539f, -0.852050f, -0.084667f, 0.202057f, 0.197049f, 0.364922f, -0.463801f, 0.431790f, 1.872096f, -0.091887f, -0.055034f, 2.443492f, -0.156958f, -0.189571f, -0.542424f, -0.589804f, -0.354422f, 0.401605f, 0.642021f, -0.875117f, 2.040794f, 1.921070f, 1.792413f, 1.839727f, }; static const float vp9_var_part_nn_bias_16_layer0[8] = { 2.901234f, -1.940932f, -0.198970f, -0.406524f, 0.059422f, -1.879207f, -0.232340f, 2.979821f, }; static const float vp9_var_part_nn_weights_16_layer1[8] = { -0.528731f, 0.375234f, -0.088422f, 0.668629f, 0.870449f, 0.578735f, 0.546103f, -1.957207f, }; static const float vp9_var_part_nn_bias_16_layer1[1] = { -1.95769405f, }; static const NN_CONFIG vp9_var_part_nnconfig_16 = { FEATURES, // num_inputs 1, // num_outputs 1, // num_hidden_layers { 8, }, // num_hidden_nodes { vp9_var_part_nn_weights_16_layer0, vp9_var_part_nn_weights_16_layer1, }, { vp9_var_part_nn_bias_16_layer0, vp9_var_part_nn_bias_16_layer1, }, }; #undef FEATURES #define FEATURES 12 #define LABELS 1 #define NODES 8 static const float vp9_part_split_nn_weights_64_layer0[FEATURES * NODES] = { -0.609728f, -0.409099f, -0.472449f, 0.183769f, -0.457740f, 0.081089f, 0.171003f, 0.578696f, -0.019043f, -0.856142f, 0.557369f, -1.779424f, -0.274044f, -0.320632f, -0.392531f, -0.359462f, -0.404106f, -0.288357f, 0.200620f, 0.038013f, -0.430093f, 0.235083f, -0.487442f, 0.424814f, -0.232758f, -0.442943f, 0.229397f, -0.540301f, -0.648421f, -0.649747f, -0.171638f, 0.603824f, 0.468497f, -0.421580f, 0.178840f, -0.533838f, -0.029471f, -0.076296f, 0.197426f, -0.187908f, -0.003950f, -0.065740f, 0.085165f, -0.039674f, -5.640702f, 1.909538f, -1.434604f, 3.294606f, -0.788812f, 0.196864f, 0.057012f, -0.019757f, 0.336233f, 0.075378f, 0.081503f, 0.491864f, -1.899470f, -1.764173f, -1.888137f, -1.762343f, 0.845542f, 0.202285f, 0.381948f, -0.150996f, 0.556893f, -0.305354f, 0.561482f, -0.021974f, -0.703117f, 0.268638f, -0.665736f, 1.191005f, -0.081568f, -0.115653f, 0.272029f, -0.140074f, 0.072683f, 0.092651f, -0.472287f, -0.055790f, -0.434425f, 0.352055f, 0.048246f, 0.372865f, 0.111499f, -0.338304f, 0.739133f, 0.156519f, -0.594644f, 0.137295f, 0.613350f, -0.165102f, -1.003731f, 0.043070f, -0.887896f, -0.174202f, }; static const float vp9_part_split_nn_bias_64_layer0[NODES] = { 1.182714f, 0.000000f, 0.902019f, 0.953115f, -1.372486f, -1.288740f, -0.155144f, -3.041362f, }; static const float vp9_part_split_nn_weights_64_layer1[NODES * LABELS] = { 0.841214f, 0.456016f, 0.869270f, 1.692999f, -1.700494f, -0.911761f, 0.030111f, -1.447548f, }; static const float vp9_part_split_nn_bias_64_layer1[LABELS] = { 1.17782545f, }; static const NN_CONFIG vp9_part_split_nnconfig_64 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_part_split_nn_weights_64_layer0, vp9_part_split_nn_weights_64_layer1, }, { vp9_part_split_nn_bias_64_layer0, vp9_part_split_nn_bias_64_layer1, }, }; static const float vp9_part_split_nn_weights_32_layer0[FEATURES * NODES] = { -0.105488f, -0.218662f, 0.010980f, -0.226979f, 0.028076f, 0.743430f, 0.789266f, 0.031907f, -1.464200f, 0.222336f, -1.068493f, -0.052712f, -0.176181f, -0.102654f, -0.973932f, -0.182637f, -0.198000f, 0.335977f, 0.271346f, 0.133005f, 1.674203f, 0.689567f, 0.657133f, 0.283524f, 0.115529f, 0.738327f, 0.317184f, -0.179736f, 0.403691f, 0.679350f, 0.048925f, 0.271338f, -1.538921f, -0.900737f, -1.377845f, 0.084245f, 0.803122f, -0.107806f, 0.103045f, -0.023335f, -0.098116f, -0.127809f, 0.037665f, -0.523225f, 1.622185f, 1.903999f, 1.358889f, 1.680785f, 0.027743f, 0.117906f, -0.158810f, 0.057775f, 0.168257f, 0.062414f, 0.086228f, -0.087381f, -3.066082f, 3.021855f, -4.092155f, 2.550104f, -0.230022f, -0.207445f, -0.000347f, 0.034042f, 0.097057f, 0.220088f, -0.228841f, -0.029405f, -1.507174f, -1.455184f, 2.624904f, 2.643355f, 0.319912f, 0.585531f, -1.018225f, -0.699606f, 1.026490f, 0.169952f, -0.093579f, -0.142352f, -0.107256f, 0.059598f, 0.043190f, 0.507543f, -0.138617f, 0.030197f, 0.059574f, -0.634051f, -0.586724f, -0.148020f, -0.334380f, 0.459547f, 1.620600f, 0.496850f, 0.639480f, -0.465715f, }; static const float vp9_part_split_nn_bias_32_layer0[NODES] = { -1.125885f, 0.753197f, -0.825808f, 0.004839f, 0.583920f, 0.718062f, 0.976741f, 0.796188f, }; static const float vp9_part_split_nn_weights_32_layer1[NODES * LABELS] = { -0.458745f, 0.724624f, -0.479720f, -2.199872f, 1.162661f, 1.194153f, -0.716896f, 0.824080f, }; static const float vp9_part_split_nn_bias_32_layer1[LABELS] = { 0.71644074f, }; static const NN_CONFIG vp9_part_split_nnconfig_32 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_part_split_nn_weights_32_layer0, vp9_part_split_nn_weights_32_layer1, }, { vp9_part_split_nn_bias_32_layer0, vp9_part_split_nn_bias_32_layer1, }, }; static const float vp9_part_split_nn_weights_16_layer0[FEATURES * NODES] = { -0.003629f, -0.046852f, 0.220428f, -0.033042f, 0.049365f, 0.112818f, -0.306149f, -0.005872f, 1.066947f, -2.290226f, 2.159505f, -0.618714f, -0.213294f, 0.451372f, -0.199459f, 0.223730f, -0.321709f, 0.063364f, 0.148704f, -0.293371f, 0.077225f, -0.421947f, -0.515543f, -0.240975f, -0.418516f, 1.036523f, -0.009165f, 0.032484f, 1.086549f, 0.220322f, -0.247585f, -0.221232f, -0.225050f, 0.993051f, 0.285907f, 1.308846f, 0.707456f, 0.335152f, 0.234556f, 0.264590f, -0.078033f, 0.542226f, 0.057777f, 0.163471f, 0.039245f, -0.725960f, 0.963780f, -0.972001f, 0.252237f, -0.192745f, -0.836571f, -0.460539f, -0.528713f, -0.160198f, -0.621108f, 0.486405f, -0.221923f, 1.519426f, -0.857871f, 0.411595f, 0.947188f, 0.203339f, 0.174526f, 0.016382f, 0.256879f, 0.049818f, 0.057836f, -0.659096f, 0.459894f, 0.174695f, 0.379359f, 0.062530f, -0.210201f, -0.355788f, -0.208432f, -0.401723f, -0.115373f, 0.191336f, -0.109342f, 0.002455f, -0.078746f, -0.391871f, 0.149892f, -0.239615f, -0.520709f, 0.118568f, -0.437975f, 0.118116f, -0.565426f, -0.206446f, 0.113407f, 0.558894f, 0.534627f, 1.154350f, -0.116833f, 1.723311f, }; static const float vp9_part_split_nn_bias_16_layer0[NODES] = { 0.013109f, -0.034341f, 0.679845f, -0.035781f, -0.104183f, 0.098055f, -0.041130f, 0.160107f, }; static const float vp9_part_split_nn_weights_16_layer1[NODES * LABELS] = { 1.499564f, -0.403259f, 1.366532f, -0.469868f, 0.482227f, -2.076697f, 0.527691f, 0.540495f, }; static const float vp9_part_split_nn_bias_16_layer1[LABELS] = { 0.01134653f, }; static const NN_CONFIG vp9_part_split_nnconfig_16 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_part_split_nn_weights_16_layer0, vp9_part_split_nn_weights_16_layer1, }, { vp9_part_split_nn_bias_16_layer0, vp9_part_split_nn_bias_16_layer1, }, }; static const float vp9_part_split_nn_weights_8_layer0[FEATURES * NODES] = { -0.668875f, -0.159078f, -0.062663f, -0.483785f, -0.146814f, -0.608975f, -0.589145f, 0.203704f, -0.051007f, -0.113769f, -0.477511f, -0.122603f, -1.329890f, 1.403386f, 0.199636f, -0.161139f, 2.182090f, -0.014307f, 0.015755f, -0.208468f, 0.884353f, 0.815920f, 0.632464f, 0.838225f, 1.369483f, -0.029068f, 0.570213f, -0.573546f, 0.029617f, 0.562054f, -0.653093f, -0.211910f, -0.661013f, -0.384418f, -0.574038f, -0.510069f, 0.173047f, -0.274231f, -1.044008f, -0.422040f, -0.810296f, 0.144069f, -0.406704f, 0.411230f, -0.144023f, 0.745651f, -0.595091f, 0.111787f, 0.840651f, 0.030123f, -0.242155f, 0.101486f, -0.017889f, -0.254467f, -0.285407f, -0.076675f, -0.549542f, -0.013544f, -0.686566f, -0.755150f, 1.623949f, -0.286369f, 0.170976f, 0.016442f, -0.598353f, -0.038540f, 0.202597f, -0.933582f, 0.599510f, 0.362273f, 0.577722f, 0.477603f, 0.767097f, 0.431532f, 0.457034f, 0.223279f, 0.381349f, 0.033777f, 0.423923f, -0.664762f, 0.385662f, 0.075744f, 0.182681f, 0.024118f, 0.319408f, -0.528864f, 0.976537f, -0.305971f, -0.189380f, -0.241689f, -1.318092f, 0.088647f, -0.109030f, -0.945654f, 1.082797f, 0.184564f, }; static const float vp9_part_split_nn_bias_8_layer0[NODES] = { -0.237472f, 2.051396f, 0.297062f, -0.730194f, 0.060472f, -0.565959f, 0.560869f, -0.395448f, }; static const float vp9_part_split_nn_weights_8_layer1[NODES * LABELS] = { 0.568121f, 1.575915f, -0.544309f, 0.751595f, -0.117911f, -1.340730f, -0.739671f, 0.661216f, }; static const float vp9_part_split_nn_bias_8_layer1[LABELS] = { -0.63375306f, }; static const NN_CONFIG vp9_part_split_nnconfig_8 = { FEATURES, // num_inputs LABELS, // num_outputs 1, // num_hidden_layers { NODES, }, // num_hidden_nodes { vp9_part_split_nn_weights_8_layer0, vp9_part_split_nn_weights_8_layer1, }, { vp9_part_split_nn_bias_8_layer0, vp9_part_split_nn_bias_8_layer1, }, }; #undef NODES #undef FEATURES #undef LABELS // Partition pruning model(linear). static const float vp9_partition_feature_mean[24] = { 303501.697372f, 3042630.372158f, 24.694696f, 1.392182f, 689.413511f, 162.027012f, 1.478213f, 0.0, 135382.260230f, 912738.513263f, 28.845217f, 1.515230f, 544.158492f, 131.807995f, 1.436863f, 0.0f, 43682.377587f, 208131.711766f, 28.084737f, 1.356677f, 138.254122f, 119.522553f, 1.252322f, 0.0f, }; static const float vp9_partition_feature_std[24] = { 673689.212982f, 5996652.516628f, 0.024449f, 1.989792f, 985.880847f, 0.014638f, 2.001898f, 0.0f, 208798.775332f, 1812548.443284f, 0.018693f, 1.838009f, 396.986910f, 0.015657f, 1.332541f, 0.0f, 55888.847031f, 448587.962714f, 0.017900f, 1.904776f, 98.652832f, 0.016598f, 1.320992f, 0.0f, }; // Error tolerance: 0.01%-0.0.05%-0.1% static const float vp9_partition_linear_weights[24] = { 0.111736f, 0.289977f, 0.042219f, 0.204765f, 0.120410f, -0.143863f, 0.282376f, 0.847811f, 0.637161f, 0.131570f, 0.018636f, 0.202134f, 0.112797f, 0.028162f, 0.182450f, 1.124367f, 0.386133f, 0.083700f, 0.050028f, 0.150873f, 0.061119f, 0.109318f, 0.127255f, 0.625211f, }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_PARTITION_MODELS_H_ libvpx-1.8.2/vp9/encoder/vp9_picklpf.c000066400000000000000000000156311357355204000175670ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_scale_rtcd.h" #include "vpx_dsp/psnr.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_loopfilter.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_picklpf.h" #include "vp9/encoder/vp9_quantize.h" static unsigned int get_section_intra_rating(const VP9_COMP *cpi) { unsigned int section_intra_rating; section_intra_rating = (cpi->common.frame_type == KEY_FRAME) ? cpi->twopass.key_frame_section_intra_rating : cpi->twopass.section_intra_rating; return section_intra_rating; } static int get_max_filter_level(const VP9_COMP *cpi) { if (cpi->oxcf.pass == 2) { unsigned int section_intra_rating = get_section_intra_rating(cpi); return section_intra_rating > 8 ? MAX_LOOP_FILTER * 3 / 4 : MAX_LOOP_FILTER; } else { return MAX_LOOP_FILTER; } } static int64_t try_filter_frame(const YV12_BUFFER_CONFIG *sd, VP9_COMP *const cpi, int filt_level, int partial_frame) { VP9_COMMON *const cm = &cpi->common; int64_t filt_err; vp9_build_mask_frame(cm, filt_level, partial_frame); if (cpi->num_workers > 1) vp9_loop_filter_frame_mt(cm->frame_to_show, cm, cpi->td.mb.e_mbd.plane, filt_level, 1, partial_frame, cpi->workers, cpi->num_workers, &cpi->lf_row_sync); else vp9_loop_filter_frame(cm->frame_to_show, cm, &cpi->td.mb.e_mbd, filt_level, 1, partial_frame); #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) { filt_err = vpx_highbd_get_y_sse(sd, cm->frame_to_show); } else { filt_err = vpx_get_y_sse(sd, cm->frame_to_show); } #else filt_err = vpx_get_y_sse(sd, cm->frame_to_show); #endif // CONFIG_VP9_HIGHBITDEPTH // Re-instate the unfiltered frame vpx_yv12_copy_y(&cpi->last_frame_uf, cm->frame_to_show); return filt_err; } static int search_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, int partial_frame) { const VP9_COMMON *const cm = &cpi->common; const struct loopfilter *const lf = &cm->lf; const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); int filt_direction = 0; int64_t best_err; int filt_best; // Start the search at the previous frame filter level unless it is now out of // range. int filt_mid = clamp(lf->last_filt_level, min_filter_level, max_filter_level); int filter_step = filt_mid < 16 ? 4 : filt_mid / 4; // Sum squared error at each filter level int64_t ss_err[MAX_LOOP_FILTER + 1]; unsigned int section_intra_rating = get_section_intra_rating(cpi); // Set each entry to -1 memset(ss_err, 0xFF, sizeof(ss_err)); // Make a copy of the unfiltered / processed recon buffer vpx_yv12_copy_y(cm->frame_to_show, &cpi->last_frame_uf); best_err = try_filter_frame(sd, cpi, filt_mid, partial_frame); filt_best = filt_mid; ss_err[filt_mid] = best_err; while (filter_step > 0) { const int filt_high = VPXMIN(filt_mid + filter_step, max_filter_level); const int filt_low = VPXMAX(filt_mid - filter_step, min_filter_level); // Bias against raising loop filter in favor of lowering it. int64_t bias = (best_err >> (15 - (filt_mid / 8))) * filter_step; if ((cpi->oxcf.pass == 2) && (section_intra_rating < 20)) bias = (bias * section_intra_rating) / 20; // yx, bias less for large block size if (cm->tx_mode != ONLY_4X4) bias >>= 1; if (filt_direction <= 0 && filt_low != filt_mid) { // Get Low filter error score if (ss_err[filt_low] < 0) { ss_err[filt_low] = try_filter_frame(sd, cpi, filt_low, partial_frame); } // If value is close to the best so far then bias towards a lower loop // filter value. if ((ss_err[filt_low] - bias) < best_err) { // Was it actually better than the previous best? if (ss_err[filt_low] < best_err) best_err = ss_err[filt_low]; filt_best = filt_low; } } // Now look at filt_high if (filt_direction >= 0 && filt_high != filt_mid) { if (ss_err[filt_high] < 0) { ss_err[filt_high] = try_filter_frame(sd, cpi, filt_high, partial_frame); } // Was it better than the previous best? if (ss_err[filt_high] < (best_err - bias)) { best_err = ss_err[filt_high]; filt_best = filt_high; } } // Half the step distance if the best filter value was the same as last time if (filt_best == filt_mid) { filter_step /= 2; filt_direction = 0; } else { filt_direction = (filt_best < filt_mid) ? -1 : 1; filt_mid = filt_best; } } return filt_best; } void vp9_pick_filter_level(const YV12_BUFFER_CONFIG *sd, VP9_COMP *cpi, LPF_PICK_METHOD method) { VP9_COMMON *const cm = &cpi->common; struct loopfilter *const lf = &cm->lf; lf->sharpness_level = 0; if (method == LPF_PICK_MINIMAL_LPF && lf->filter_level) { lf->filter_level = 0; } else if (method >= LPF_PICK_FROM_Q) { const int min_filter_level = 0; const int max_filter_level = get_max_filter_level(cpi); const int q = vp9_ac_quant(cm->base_qindex, 0, cm->bit_depth); // These values were determined by linear fitting the result of the // searched level, filt_guess = q * 0.316206 + 3.87252 #if CONFIG_VP9_HIGHBITDEPTH int filt_guess; switch (cm->bit_depth) { case VPX_BITS_8: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); break; case VPX_BITS_10: filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 4060632, 20); break; default: assert(cm->bit_depth == VPX_BITS_12); filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 16242526, 22); break; } #else int filt_guess = ROUND_POWER_OF_TWO(q * 20723 + 1015158, 18); #endif // CONFIG_VP9_HIGHBITDEPTH if (cpi->oxcf.pass == 0 && cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && (cm->base_qindex < 200 || cm->width * cm->height > 320 * 240) && cpi->oxcf.content != VP9E_CONTENT_SCREEN && cm->frame_type != KEY_FRAME) filt_guess = 5 * filt_guess >> 3; if (cm->frame_type == KEY_FRAME) filt_guess -= 4; lf->filter_level = clamp(filt_guess, min_filter_level, max_filter_level); } else { lf->filter_level = search_filter_level(sd, cpi, method == LPF_PICK_FROM_SUBIMAGE); } } libvpx-1.8.2/vp9/encoder/vp9_picklpf.h000066400000000000000000000015061357355204000175700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_PICKLPF_H_ #define VPX_VP9_ENCODER_VP9_PICKLPF_H_ #ifdef __cplusplus extern "C" { #endif #include "vp9/encoder/vp9_encoder.h" struct yv12_buffer_config; struct VP9_COMP; void vp9_pick_filter_level(const struct yv12_buffer_config *sd, struct VP9_COMP *cpi, LPF_PICK_METHOD method); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_PICKLPF_H_ libvpx-1.8.2/vp9/encoder/vp9_pickmode.c000066400000000000000000003475561357355204000177500ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_codec.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_blockd.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_scan.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_pickmode.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" typedef struct { uint8_t *data; int stride; int in_use; } PRED_BUFFER; typedef struct { PRED_BUFFER *best_pred; PREDICTION_MODE best_mode; TX_SIZE best_tx_size; TX_SIZE best_intra_tx_size; MV_REFERENCE_FRAME best_ref_frame; MV_REFERENCE_FRAME best_second_ref_frame; uint8_t best_mode_skip_txfm; INTERP_FILTER best_pred_filter; } BEST_PICKMODE; static const int pos_shift_16x16[4][4] = { { 9, 10, 13, 14 }, { 11, 12, 15, 16 }, { 17, 18, 21, 22 }, { 19, 20, 23, 24 } }; static int mv_refs_rt(VP9_COMP *cpi, const VP9_COMMON *cm, const MACROBLOCK *x, const MACROBLOCKD *xd, const TileInfo *const tile, MODE_INFO *mi, MV_REFERENCE_FRAME ref_frame, int_mv *mv_ref_list, int_mv *base_mv, int mi_row, int mi_col, int use_base_mv) { const int *ref_sign_bias = cm->ref_frame_sign_bias; int i, refmv_count = 0; const POSITION *const mv_ref_search = mv_ref_blocks[mi->sb_type]; int different_ref_found = 0; int context_counter = 0; int const_motion = 0; // Blank the reference vector list memset(mv_ref_list, 0, sizeof(*mv_ref_list) * MAX_MV_REF_CANDIDATES); // The nearest 2 blocks are treated differently // if the size < 8x8 we get the mv from the bmi substructure, // and we also need to keep a mode count. for (i = 0; i < 2; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // Keep counts for entropy encoding. context_counter += mode_2_counter[candidate_mi->mode]; different_ref_found = 1; if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(get_sub_block_mv(candidate_mi, 0, mv_ref->col, -1), refmv_count, mv_ref_list, Done); } } const_motion = 1; // Check the rest of the neighbors in much the same way // as before except we don't need to keep track of sub blocks or // mode counts. for (; i < MVREF_NEIGHBOURS && !refmv_count; ++i) { const POSITION *const mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; different_ref_found = 1; if (candidate_mi->ref_frame[0] == ref_frame) ADD_MV_REF_LIST(candidate_mi->mv[0], refmv_count, mv_ref_list, Done); } } // Since we couldn't find 2 mvs from the same reference frame // go back through the neighbors and find motion vectors from // different reference frames. if (different_ref_found && !refmv_count) { for (i = 0; i < MVREF_NEIGHBOURS; ++i) { const POSITION *mv_ref = &mv_ref_search[i]; if (is_inside(tile, mi_col, mi_row, cm->mi_rows, mv_ref)) { const MODE_INFO *const candidate_mi = xd->mi[mv_ref->col + mv_ref->row * xd->mi_stride]; // If the candidate is INTRA we don't want to consider its mv. IF_DIFF_REF_FRAME_ADD_MV(candidate_mi, ref_frame, ref_sign_bias, refmv_count, mv_ref_list, Done); } } } if (use_base_mv && !cpi->svc.layer_context[cpi->svc.temporal_layer_id].is_key_frame && ref_frame == LAST_FRAME) { // Get base layer mv. MV_REF *candidate = &cm->prev_frame ->mvs[(mi_col >> 1) + (mi_row >> 1) * (cm->mi_cols >> 1)]; if (candidate->mv[0].as_int != INVALID_MV) { base_mv->as_mv.row = (candidate->mv[0].as_mv.row * 2); base_mv->as_mv.col = (candidate->mv[0].as_mv.col * 2); clamp_mv_ref(&base_mv->as_mv, xd); } else { base_mv->as_int = INVALID_MV; } } Done: x->mbmi_ext->mode_context[ref_frame] = counter_to_context[context_counter]; // Clamp vectors for (i = 0; i < MAX_MV_REF_CANDIDATES; ++i) clamp_mv_ref(&mv_ref_list[i].as_mv, xd); return const_motion; } static int combined_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv, int64_t best_rd_sofar, int use_base_mv) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; const int step_param = cpi->sf.mv.fullpel_search_step_param; const int sadpb = x->sadperbit16; MV mvp_full; const int ref = mi->ref_frame[0]; const MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; MV center_mv; uint32_t dis; int rate_mode; const MvLimits tmp_mv_limits = x->mv_limits; int rv = 0; int cost_list[5]; int search_subpel = 1; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } vp9_set_mv_search_range(&x->mv_limits, &ref_mv); // Limit motion vector for large lightning change. if (cpi->oxcf.speed > 5 && x->lowvar_highsumdiff) { x->mv_limits.col_min = VPXMAX(x->mv_limits.col_min, -10); x->mv_limits.row_min = VPXMAX(x->mv_limits.row_min, -10); x->mv_limits.col_max = VPXMIN(x->mv_limits.col_max, 10); x->mv_limits.row_max = VPXMIN(x->mv_limits.row_max, 10); } assert(x->mv_best_ref_index[ref] <= 2); if (x->mv_best_ref_index[ref] < 2) mvp_full = x->mbmi_ext->ref_mvs[ref][x->mv_best_ref_index[ref]].as_mv; else mvp_full = x->pred_mv[ref]; mvp_full.col >>= 3; mvp_full.row >>= 3; if (!use_base_mv) center_mv = ref_mv; else center_mv = tmp_mv->as_mv; if (x->sb_use_mv_part) { tmp_mv->as_mv.row = x->sb_mvrow_part >> 3; tmp_mv->as_mv.col = x->sb_mvcol_part >> 3; } else { vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), ¢er_mv, &tmp_mv->as_mv, INT_MAX, 0); } x->mv_limits = tmp_mv_limits; // calculate the bit cost on motion vector mvp_full.row = tmp_mv->as_mv.row * 8; mvp_full.col = tmp_mv->as_mv.col * 8; *rate_mv = vp9_mv_bit_cost(&mvp_full, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); rate_mode = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref]][INTER_OFFSET(NEWMV)]; rv = !(RDCOST(x->rdmult, x->rddiv, (*rate_mv + rate_mode), 0) > best_rd_sofar); // For SVC on non-reference frame, avoid subpel for (0, 0) motion. if (cpi->use_svc && cpi->svc.non_reference_frame) { if (mvp_full.row == 0 && mvp_full.col == 0) search_subpel = 0; } if (rv && search_subpel) { SUBPEL_FORCE_STOP subpel_force_stop = cpi->sf.mv.subpel_force_stop; if (use_base_mv && cpi->sf.base_mv_aggressive) subpel_force_stop = HALF_PEL; if (cpi->sf.mv.enable_adaptive_subpel_force_stop) { const int mv_thresh = cpi->sf.mv.adapt_subpel_force_stop.mv_thresh; if (abs(tmp_mv->as_mv.row) >= mv_thresh || abs(tmp_mv->as_mv.col) >= mv_thresh) subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_above; else subpel_force_stop = cpi->sf.mv.adapt_subpel_force_stop.force_stop_below; } cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, 0, 0, cpi->sf.use_accurate_subpel_search); *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } return rv; } static void block_variance(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int w, int h, unsigned int *sse, int *sum, int block_size, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, vpx_bit_depth_t bd, #endif uint32_t *sse8x8, int *sum8x8, uint32_t *var8x8) { int i, j, k = 0; *sse = 0; *sum = 0; for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { #if CONFIG_VP9_HIGHBITDEPTH if (use_highbitdepth) { switch (bd) { case VPX_BITS_8: vpx_highbd_8_get8x8var(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse8x8[k], &sum8x8[k]); break; case VPX_BITS_10: vpx_highbd_10_get8x8var(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse8x8[k], &sum8x8[k]); break; case VPX_BITS_12: vpx_highbd_12_get8x8var(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse8x8[k], &sum8x8[k]); break; } } else { vpx_get8x8var(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse8x8[k], &sum8x8[k]); } #else vpx_get8x8var(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse8x8[k], &sum8x8[k]); #endif *sse += sse8x8[k]; *sum += sum8x8[k]; var8x8[k] = sse8x8[k] - (uint32_t)(((int64_t)sum8x8[k] * sum8x8[k]) >> 6); k++; } } } static void calculate_variance(int bw, int bh, TX_SIZE tx_size, unsigned int *sse_i, int *sum_i, unsigned int *var_o, unsigned int *sse_o, int *sum_o) { const BLOCK_SIZE unit_size = txsize_to_bsize[tx_size]; const int nw = 1 << (bw - b_width_log2_lookup[unit_size]); const int nh = 1 << (bh - b_height_log2_lookup[unit_size]); int i, j, k = 0; for (i = 0; i < nh; i += 2) { for (j = 0; j < nw; j += 2) { sse_o[k] = sse_i[i * nw + j] + sse_i[i * nw + j + 1] + sse_i[(i + 1) * nw + j] + sse_i[(i + 1) * nw + j + 1]; sum_o[k] = sum_i[i * nw + j] + sum_i[i * nw + j + 1] + sum_i[(i + 1) * nw + j] + sum_i[(i + 1) * nw + j + 1]; var_o[k] = sse_o[k] - (uint32_t)(((int64_t)sum_o[k] * sum_o[k]) >> (b_width_log2_lookup[unit_size] + b_height_log2_lookup[unit_size] + 6)); k++; } } } // Adjust the ac_thr according to speed, width, height and normalized sum static int ac_thr_factor(const int speed, const int width, const int height, const int norm_sum) { if (speed >= 8 && norm_sum < 5) { if (width <= 640 && height <= 480) return 4; else return 2; } return 1; } static TX_SIZE calculate_tx_size(VP9_COMP *const cpi, BLOCK_SIZE bsize, MACROBLOCKD *const xd, unsigned int var, unsigned int sse, int64_t ac_thr, unsigned int source_variance, int is_intra) { // TODO(marpan): Tune selection for intra-modes, screen content, etc. TX_SIZE tx_size; unsigned int var_thresh = is_intra ? (unsigned int)ac_thr : 1; int limit_tx = 1; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && (source_variance == 0 || var < var_thresh)) limit_tx = 0; if (cpi->common.tx_mode == TX_MODE_SELECT) { if (sse > (var << 2)) tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); else tx_size = TX_8X8; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && limit_tx && cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id)) tx_size = TX_8X8; else if (tx_size > TX_16X16 && limit_tx) tx_size = TX_16X16; // For screen-content force 4X4 tx_size over 8X8, for large variance. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && tx_size == TX_8X8 && bsize <= BLOCK_16X16 && ((var >> 5) > (unsigned int)ac_thr)) tx_size = TX_4X4; } else { tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); } return tx_size; } static void compute_intra_yprediction(PREDICTION_MODE mode, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd) { struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; uint8_t *const src_buf_base = p->src.buf; uint8_t *const dst_buf_base = pd->dst.buf; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; // block and transform sizes, in number of 4x4 blocks log 2 ("*_b") // 4x4=0, 8x8=2, 16x16=4, 32x32=6, 64x64=8 const TX_SIZE tx_size = max_txsize_lookup[bsize]; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; int row, col; // If mb_to_right_edge is < 0 we are in a situation in which // the current block size extends into the UMV and we won't // visit the sub blocks that are wholly within the UMV. const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> (5 + pd->subsampling_x)); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> (5 + pd->subsampling_y)); // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. for (row = 0; row < max_blocks_high; row += (1 << tx_size)) { // Skip visiting the sub blocks that are wholly within the UMV. for (col = 0; col < max_blocks_wide; col += (1 << tx_size)) { p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)]; pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)]; vp9_predict_intra_block(xd, b_width_log2_lookup[bsize], tx_size, mode, x->skip_encode ? p->src.buf : pd->dst.buf, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, col, row, 0); } } p->src.buf = src_buf_base; pd->dst.buf = dst_buf_base; } static void model_rd_for_sb_y_large(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, unsigned int *var_y, unsigned int *sse_y, int mi_row, int mi_col, int *early_term, int *flag_preduv_computed) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. unsigned int sse; int rate; int64_t dist; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; int64_t dc_thr = dc_quant * dc_quant >> 6; int64_t ac_thr = ac_quant * ac_quant >> 6; unsigned int var; int sum; int skip_dc = 0; const int bw = b_width_log2_lookup[bsize]; const int bh = b_height_log2_lookup[bsize]; const int num8x8 = 1 << (bw + bh - 2); unsigned int sse8x8[64] = { 0 }; int sum8x8[64] = { 0 }; unsigned int var8x8[64] = { 0 }; TX_SIZE tx_size; int i, k; #if CONFIG_VP9_HIGHBITDEPTH const vpx_bit_depth_t bd = cpi->common.bit_depth; #endif // Calculate variance for whole partition, and also save 8x8 blocks' variance // to be used in following transform skipping test. block_variance(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, 4 << bw, 4 << bh, &sse, &sum, 8, #if CONFIG_VP9_HIGHBITDEPTH cpi->common.use_highbitdepth, bd, #endif sse8x8, sum8x8, var8x8); var = sse - (unsigned int)(((int64_t)sum * sum) >> (bw + bh + 4)); *var_y = var; *sse_y = sse; #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc(cpi) && cpi->oxcf.speed > 5) ac_thr = vp9_scale_acskip_thresh(ac_thr, cpi->denoiser.denoising_level, (abs(sum) >> (bw + bh)), cpi->svc.temporal_layer_id); else ac_thr *= ac_thr_factor(cpi->oxcf.speed, cpi->common.width, cpi->common.height, abs(sum) >> (bw + bh)); #else ac_thr *= ac_thr_factor(cpi->oxcf.speed, cpi->common.width, cpi->common.height, abs(sum) >> (bw + bh)); #endif tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr, x->source_variance, 0); // The code below for setting skip flag assumes tranform size of at least 8x8, // so force this lower limit on transform. if (tx_size < TX_8X8) tx_size = TX_8X8; xd->mi[0]->tx_size = tx_size; if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->zero_temp_sad_source && x->source_variance == 0) dc_thr = dc_thr << 1; // Evaluate if the partition block is a skippable block in Y plane. { unsigned int sse16x16[16] = { 0 }; int sum16x16[16] = { 0 }; unsigned int var16x16[16] = { 0 }; const int num16x16 = num8x8 >> 2; unsigned int sse32x32[4] = { 0 }; int sum32x32[4] = { 0 }; unsigned int var32x32[4] = { 0 }; const int num32x32 = num8x8 >> 4; int ac_test = 1; int dc_test = 1; const int num = (tx_size == TX_8X8) ? num8x8 : ((tx_size == TX_16X16) ? num16x16 : num32x32); const unsigned int *sse_tx = (tx_size == TX_8X8) ? sse8x8 : ((tx_size == TX_16X16) ? sse16x16 : sse32x32); const unsigned int *var_tx = (tx_size == TX_8X8) ? var8x8 : ((tx_size == TX_16X16) ? var16x16 : var32x32); // Calculate variance if tx_size > TX_8X8 if (tx_size >= TX_16X16) calculate_variance(bw, bh, TX_8X8, sse8x8, sum8x8, var16x16, sse16x16, sum16x16); if (tx_size == TX_32X32) calculate_variance(bw, bh, TX_16X16, sse16x16, sum16x16, var32x32, sse32x32, sum32x32); // Skipping test x->skip_txfm[0] = SKIP_TXFM_NONE; for (k = 0; k < num; k++) // Check if all ac coefficients can be quantized to zero. if (!(var_tx[k] < ac_thr || var == 0)) { ac_test = 0; break; } for (k = 0; k < num; k++) // Check if dc coefficient can be quantized to zero. if (!(sse_tx[k] - var_tx[k] < dc_thr || sse == var)) { dc_test = 0; break; } if (ac_test) { x->skip_txfm[0] = SKIP_TXFM_AC_ONLY; if (dc_test) x->skip_txfm[0] = SKIP_TXFM_AC_DC; } else if (dc_test) { skip_dc = 1; } } if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) { int skip_uv[2] = { 0 }; unsigned int var_uv[2]; unsigned int sse_uv[2]; *out_rate_sum = 0; *out_dist_sum = sse << 4; // Transform skipping test in UV planes. for (i = 1; i <= 2; i++) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const TX_SIZE uv_tx_size = get_uv_tx_size(xd->mi[0], pd); const BLOCK_SIZE unit_size = txsize_to_bsize[uv_tx_size]; const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, pd); const int uv_bw = b_width_log2_lookup[uv_bsize]; const int uv_bh = b_height_log2_lookup[uv_bsize]; const int sf = (uv_bw - b_width_log2_lookup[unit_size]) + (uv_bh - b_height_log2_lookup[unit_size]); const uint32_t uv_dc_thr = pd->dequant[0] * pd->dequant[0] >> (6 - sf); const uint32_t uv_ac_thr = pd->dequant[1] * pd->dequant[1] >> (6 - sf); int j = i - 1; vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, i); flag_preduv_computed[i - 1] = 1; var_uv[j] = cpi->fn_ptr[uv_bsize].vf( p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse_uv[j]); if ((var_uv[j] < uv_ac_thr || var_uv[j] == 0) && (sse_uv[j] - var_uv[j] < uv_dc_thr || sse_uv[j] == var_uv[j])) skip_uv[j] = 1; else break; } // If the transform in YUV planes are skippable, the mode search checks // fewer inter modes and doesn't check intra modes. if (skip_uv[0] & skip_uv[1]) { *early_term = 1; } return; } if (!skip_dc) { #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], dc_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], dc_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH } if (!skip_dc) { *out_rate_sum = rate >> 1; *out_dist_sum = dist << 3; } else { *out_rate_sum = 0; *out_dist_sum = (sse - var) << 4; } #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH *out_rate_sum += rate; *out_dist_sum += dist << 4; } static void model_rd_for_sb_y(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, unsigned int *var_y, unsigned int *sse_y, int is_intra) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. unsigned int sse; int rate; int64_t dist; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int64_t dc_thr = p->quant_thred[0] >> 6; const int64_t ac_thr = p->quant_thred[1] >> 6; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; unsigned int var = cpi->fn_ptr[bsize].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); int skip_dc = 0; *var_y = var; *sse_y = sse; xd->mi[0]->tx_size = calculate_tx_size(cpi, bsize, xd, var, sse, ac_thr, x->source_variance, is_intra); // Evaluate if the partition block is a skippable block in Y plane. { const BLOCK_SIZE unit_size = txsize_to_bsize[xd->mi[0]->tx_size]; const unsigned int num_blk_log2 = (b_width_log2_lookup[bsize] - b_width_log2_lookup[unit_size]) + (b_height_log2_lookup[bsize] - b_height_log2_lookup[unit_size]); const unsigned int sse_tx = sse >> num_blk_log2; const unsigned int var_tx = var >> num_blk_log2; x->skip_txfm[0] = SKIP_TXFM_NONE; // Check if all ac coefficients can be quantized to zero. if (var_tx < ac_thr || var == 0) { x->skip_txfm[0] = SKIP_TXFM_AC_ONLY; // Check if dc coefficient can be quantized to zero. if (sse_tx - var_tx < dc_thr || sse == var) x->skip_txfm[0] = SKIP_TXFM_AC_DC; } else { if (sse_tx - var_tx < dc_thr || sse == var) skip_dc = 1; } } if (x->skip_txfm[0] == SKIP_TXFM_AC_DC) { *out_rate_sum = 0; *out_dist_sum = sse << 4; return; } if (!skip_dc) { #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], dc_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bsize], dc_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH } if (!skip_dc) { *out_rate_sum = rate >> 1; *out_dist_sum = dist << 3; } else { *out_rate_sum = 0; *out_dist_sum = (sse - var) << 4; } #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bsize], ac_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH *out_rate_sum += rate; *out_dist_sum += dist << 4; } static void block_yrd(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int *skippable, int64_t *sse, BLOCK_SIZE bsize, TX_SIZE tx_size, int rd_computed, int is_intra) { MACROBLOCKD *xd = &x->e_mbd; const struct macroblockd_plane *pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; const int num_4x4_w = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[bsize]; const int step = 1 << (tx_size << 1); const int block_step = (1 << tx_size); int block = 0, r, c; const int max_blocks_wide = num_4x4_w + (xd->mb_to_right_edge >= 0 ? 0 : xd->mb_to_right_edge >> 5); const int max_blocks_high = num_4x4_h + (xd->mb_to_bottom_edge >= 0 ? 0 : xd->mb_to_bottom_edge >> 5); int eob_cost = 0; const int bw = 4 * num_4x4_w; const int bh = 4 * num_4x4_h; if (cpi->sf.use_simple_block_yrd && cpi->common.frame_type != KEY_FRAME && (bsize < BLOCK_32X32 || (cpi->use_svc && (bsize < BLOCK_32X32 || cpi->svc.temporal_layer_id > 0)))) { unsigned int var_y, sse_y; (void)tx_size; if (!rd_computed) model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc->rate, &this_rdc->dist, &var_y, &sse_y, is_intra); *sse = INT_MAX; *skippable = 0; return; } (void)cpi; // The max tx_size passed in is TX_16X16. assert(tx_size != TX_32X32); #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, x->e_mbd.bd); } else { vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); } #else vpx_subtract_block(bh, bw, p->src_diff, bw, p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride); #endif *skippable = 1; // Keep track of the row and column of the blocks we use so that we know // if we are in the unrestricted motion border. for (r = 0; r < max_blocks_high; r += block_step) { for (c = 0; c < num_4x4_w; c += block_step) { if (c < max_blocks_wide) { const scan_order *const scan_order = &vp9_default_scan_orders[tx_size]; tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; const int diff_stride = bw; const int16_t *src_diff; src_diff = &p->src_diff[(r * diff_stride + c) << 2]; switch (tx_size) { case TX_16X16: vpx_hadamard_16x16(src_diff, diff_stride, coeff); vp9_quantize_fp(coeff, 256, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; case TX_8X8: vpx_hadamard_8x8(src_diff, diff_stride, coeff); vp9_quantize_fp(coeff, 64, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; default: assert(tx_size == TX_4X4); x->fwd_txfm4x4(src_diff, coeff, diff_stride); vp9_quantize_fp(coeff, 16, x->skip_block, p->round_fp, p->quant_fp, qcoeff, dqcoeff, pd->dequant, eob, scan_order->scan, scan_order->iscan); break; } *skippable &= (*eob == 0); eob_cost += 1; } block += step; } } this_rdc->rate = 0; if (*sse < INT64_MAX) { *sse = (*sse << 6) >> 2; if (*skippable) { this_rdc->dist = *sse; return; } } block = 0; this_rdc->dist = 0; for (r = 0; r < max_blocks_high; r += block_step) { for (c = 0; c < num_4x4_w; c += block_step) { if (c < max_blocks_wide) { tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); uint16_t *const eob = &p->eobs[block]; if (*eob == 1) this_rdc->rate += (int)abs(qcoeff[0]); else if (*eob > 1) this_rdc->rate += vpx_satd(qcoeff, step << 4); this_rdc->dist += vp9_block_error_fp(coeff, dqcoeff, step << 4) >> 2; } block += step; } } // If skippable is set, rate gets clobbered later. this_rdc->rate <<= (2 + VP9_PROB_COST_SHIFT); this_rdc->rate += (eob_cost << VP9_PROB_COST_SHIFT); } static void model_rd_for_sb_uv(VP9_COMP *cpi, BLOCK_SIZE plane_bsize, MACROBLOCK *x, MACROBLOCKD *xd, RD_COST *this_rdc, unsigned int *var_y, unsigned int *sse_y, int start_plane, int stop_plane) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. unsigned int sse; int rate; int64_t dist; int i; #if CONFIG_VP9_HIGHBITDEPTH uint64_t tot_var = *var_y; uint64_t tot_sse = *sse_y; #else uint32_t tot_var = *var_y; uint32_t tot_sse = *sse_y; #endif this_rdc->rate = 0; this_rdc->dist = 0; for (i = start_plane; i <= stop_plane; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const uint32_t dc_quant = pd->dequant[0]; const uint32_t ac_quant = pd->dequant[1]; const BLOCK_SIZE bs = plane_bsize; unsigned int var; if (!x->color_sensitivity[i - 1]) continue; var = cpi->fn_ptr[bs].vf(p->src.buf, p->src.stride, pd->dst.buf, pd->dst.stride, &sse); assert(sse >= var); tot_var += var; tot_sse += sse; #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs], dc_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(sse - var, num_pels_log2_lookup[bs], dc_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH this_rdc->rate += rate >> 1; this_rdc->dist += dist << 3; #if CONFIG_VP9_HIGHBITDEPTH vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> (xd->bd - 5), &rate, &dist); #else vp9_model_rd_from_var_lapndz(var, num_pels_log2_lookup[bs], ac_quant >> 3, &rate, &dist); #endif // CONFIG_VP9_HIGHBITDEPTH this_rdc->rate += rate; this_rdc->dist += dist << 4; } #if CONFIG_VP9_HIGHBITDEPTH *var_y = tot_var > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_var; *sse_y = tot_sse > UINT32_MAX ? UINT32_MAX : (uint32_t)tot_sse; #else *var_y = tot_var; *sse_y = tot_sse; #endif } static int get_pred_buffer(PRED_BUFFER *p, int len) { int i; for (i = 0; i < len; i++) { if (!p[i].in_use) { p[i].in_use = 1; return i; } } return -1; } static void free_pred_buffer(PRED_BUFFER *p) { if (p != NULL) p->in_use = 0; } static void encode_breakout_test( VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, MV_REFERENCE_FRAME ref_frame, PREDICTION_MODE this_mode, unsigned int var_y, unsigned int sse_y, struct buf_2d yv12_mb[][MAX_MB_PLANE], int *rate, int64_t *dist, int *flag_preduv_computed) { MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE uv_size = get_plane_block_size(bsize, &xd->plane[1]); unsigned int var = var_y, sse = sse_y; // Skipping threshold for ac. unsigned int thresh_ac; // Skipping threshold for dc. unsigned int thresh_dc; int motion_low = 1; if (cpi->use_svc && ref_frame == GOLDEN_FRAME) return; if (mi->mv[0].as_mv.row > 64 || mi->mv[0].as_mv.row < -64 || mi->mv[0].as_mv.col > 64 || mi->mv[0].as_mv.col < -64) motion_low = 0; if (x->encode_breakout > 0 && motion_low == 1) { // Set a maximum for threshold to avoid big PSNR loss in low bit rate // case. Use extreme low threshold for static frames to limit // skipping. const unsigned int max_thresh = 36000; // The encode_breakout input const unsigned int min_thresh = VPXMIN(((unsigned int)x->encode_breakout << 4), max_thresh); #if CONFIG_VP9_HIGHBITDEPTH const int shift = (xd->bd << 1) - 16; #endif // Calculate threshold according to dequant value. thresh_ac = (xd->plane[0].dequant[1] * xd->plane[0].dequant[1]) >> 3; #if CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) { thresh_ac = ROUND_POWER_OF_TWO(thresh_ac, shift); } #endif // CONFIG_VP9_HIGHBITDEPTH thresh_ac = clamp(thresh_ac, min_thresh, max_thresh); // Adjust ac threshold according to partition size. thresh_ac >>= 8 - (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); thresh_dc = (xd->plane[0].dequant[0] * xd->plane[0].dequant[0] >> 6); #if CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && shift > 0) { thresh_dc = ROUND_POWER_OF_TWO(thresh_dc, shift); } #endif // CONFIG_VP9_HIGHBITDEPTH } else { thresh_ac = 0; thresh_dc = 0; } // Y skipping condition checking for ac and dc. if (var <= thresh_ac && (sse - var) <= thresh_dc) { unsigned int sse_u, sse_v; unsigned int var_u, var_v; unsigned int thresh_ac_uv = thresh_ac; unsigned int thresh_dc_uv = thresh_dc; if (x->sb_is_skin) { thresh_ac_uv = 0; thresh_dc_uv = 0; } if (!flag_preduv_computed[0] || !flag_preduv_computed[1]) { xd->plane[1].pre[0] = yv12_mb[ref_frame][1]; xd->plane[2].pre[0] = yv12_mb[ref_frame][2]; vp9_build_inter_predictors_sbuv(xd, mi_row, mi_col, bsize); } var_u = cpi->fn_ptr[uv_size].vf(x->plane[1].src.buf, x->plane[1].src.stride, xd->plane[1].dst.buf, xd->plane[1].dst.stride, &sse_u); // U skipping condition checking if (((var_u << 2) <= thresh_ac_uv) && (sse_u - var_u <= thresh_dc_uv)) { var_v = cpi->fn_ptr[uv_size].vf( x->plane[2].src.buf, x->plane[2].src.stride, xd->plane[2].dst.buf, xd->plane[2].dst.stride, &sse_v); // V skipping condition checking if (((var_v << 2) <= thresh_ac_uv) && (sse_v - var_v <= thresh_dc_uv)) { x->skip = 1; // The cost of skip bit needs to be added. *rate = cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; // More on this part of rate // rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); // Scaling factor for SSE from spatial domain to frequency // domain is 16. Adjust distortion accordingly. // TODO(yunqingwang): In this function, only y-plane dist is // calculated. *dist = (sse << 4); // + ((sse_u + sse_v) << 4); // *disable_skip = 1; } } } } struct estimate_block_intra_args { VP9_COMP *cpi; MACROBLOCK *x; PREDICTION_MODE mode; int skippable; RD_COST *rdc; }; static void estimate_block_intra(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct estimate_block_intra_args *const args = arg; VP9_COMP *const cpi = args->cpi; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *const p = &x->plane[plane]; struct macroblockd_plane *const pd = &xd->plane[plane]; const BLOCK_SIZE bsize_tx = txsize_to_bsize[tx_size]; uint8_t *const src_buf_base = p->src.buf; uint8_t *const dst_buf_base = pd->dst.buf; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; RD_COST this_rdc; (void)block; p->src.buf = &src_buf_base[4 * (row * (int64_t)src_stride + col)]; pd->dst.buf = &dst_buf_base[4 * (row * (int64_t)dst_stride + col)]; // Use source buffer as an approximation for the fully reconstructed buffer. vp9_predict_intra_block(xd, b_width_log2_lookup[plane_bsize], tx_size, args->mode, x->skip_encode ? p->src.buf : pd->dst.buf, x->skip_encode ? src_stride : dst_stride, pd->dst.buf, dst_stride, col, row, plane); if (plane == 0) { int64_t this_sse = INT64_MAX; block_yrd(cpi, x, &this_rdc, &args->skippable, &this_sse, bsize_tx, VPXMIN(tx_size, TX_16X16), 0, 1); } else { unsigned int var = 0; unsigned int sse = 0; model_rd_for_sb_uv(cpi, bsize_tx, x, xd, &this_rdc, &var, &sse, plane, plane); } p->src.buf = src_buf_base; pd->dst.buf = dst_buf_base; args->rdc->rate += this_rdc.rate; args->rdc->dist += this_rdc.dist; } static const THR_MODES mode_idx[MAX_REF_FRAMES][4] = { { THR_DC, THR_V_PRED, THR_H_PRED, THR_TM }, { THR_NEARESTMV, THR_NEARMV, THR_ZEROMV, THR_NEWMV }, { THR_NEARESTG, THR_NEARG, THR_ZEROG, THR_NEWG }, { THR_NEARESTA, THR_NEARA, THR_ZEROA, THR_NEWA }, }; static const PREDICTION_MODE intra_mode_list[] = { DC_PRED, V_PRED, H_PRED, TM_PRED }; static int mode_offset(const PREDICTION_MODE mode) { if (mode >= NEARESTMV) { return INTER_OFFSET(mode); } else { switch (mode) { case DC_PRED: return 0; case V_PRED: return 1; case H_PRED: return 2; case TM_PRED: return 3; default: return -1; } } } static INLINE int rd_less_than_thresh_row_mt(int64_t best_rd, int thresh, const int *const thresh_fact) { int is_rd_less_than_thresh; is_rd_less_than_thresh = best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX; return is_rd_less_than_thresh; } static INLINE void update_thresh_freq_fact_row_mt( VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance, int thresh_freq_fact_idx, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx, PREDICTION_MODE mode) { THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)]; int freq_fact_idx = thresh_freq_fact_idx + thr_mode_idx; int *freq_fact = &tile_data->row_base_thresh_freq_fact[freq_fact_idx]; if (thr_mode_idx == best_mode_idx) *freq_fact -= (*freq_fact >> 4); else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV && ref_frame == LAST_FRAME && source_variance < 5) { *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32); } else { *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } static INLINE void update_thresh_freq_fact( VP9_COMP *cpi, TileDataEnc *tile_data, int source_variance, BLOCK_SIZE bsize, MV_REFERENCE_FRAME ref_frame, THR_MODES best_mode_idx, PREDICTION_MODE mode) { THR_MODES thr_mode_idx = mode_idx[ref_frame][mode_offset(mode)]; int *freq_fact = &tile_data->thresh_freq_fact[bsize][thr_mode_idx]; if (thr_mode_idx == best_mode_idx) *freq_fact -= (*freq_fact >> 4); else if (cpi->sf.limit_newmv_early_exit && mode == NEWMV && ref_frame == LAST_FRAME && source_variance < 5) { *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, 32); } else { *freq_fact = VPXMIN(*freq_fact + RD_THRESH_INC, cpi->sf.adaptive_rd_thresh * RD_THRESH_MAX_FACT); } } void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; RD_COST this_rdc, best_rdc; PREDICTION_MODE this_mode; struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; const TX_SIZE intra_tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); MODE_INFO *const mic = xd->mi[0]; int *bmode_costs; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; (void)ctx; vp9_rd_cost_reset(&best_rdc); vp9_rd_cost_reset(&this_rdc); mi->ref_frame[0] = INTRA_FRAME; // Initialize interp_filter here so we do not have to check for inter block // modes in get_pred_context_switchable_interp() mi->interp_filter = SWITCHABLE_FILTERS; mi->mv[0].as_int = INVALID_MV; mi->uv_mode = DC_PRED; memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); // Change the limit of this loop to add other intra prediction // mode tests. for (this_mode = DC_PRED; this_mode <= H_PRED; ++this_mode) { this_rdc.dist = this_rdc.rate = 0; args.mode = this_mode; args.skippable = 1; args.rdc = &this_rdc; mi->tx_size = intra_tx_size; vp9_foreach_transformed_block_in_plane(xd, bsize, 0, estimate_block_intra, &args); if (args.skippable) { x->skip_txfm[0] = SKIP_TXFM_AC_DC; this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1); } else { x->skip_txfm[0] = SKIP_TXFM_NONE; this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0); } this_rdc.rate += bmode_costs[this_mode]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; mi->mode = this_mode; } } *rd_cost = best_rdc; } static void init_ref_frame_cost(VP9_COMMON *const cm, MACROBLOCKD *const xd, int ref_frame_cost[MAX_REF_FRAMES]) { vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); ref_frame_cost[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); ref_frame_cost[LAST_FRAME] = ref_frame_cost[GOLDEN_FRAME] = ref_frame_cost[ALTREF_FRAME] = vp9_cost_bit(intra_inter_p, 1); ref_frame_cost[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); ref_frame_cost[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); ref_frame_cost[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); } typedef struct { MV_REFERENCE_FRAME ref_frame; PREDICTION_MODE pred_mode; } REF_MODE; #define RT_INTER_MODES 12 static const REF_MODE ref_mode_set[RT_INTER_MODES] = { { LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV }, { GOLDEN_FRAME, ZEROMV }, { LAST_FRAME, NEARMV }, { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV }, { GOLDEN_FRAME, NEWMV }, { ALTREF_FRAME, ZEROMV }, { ALTREF_FRAME, NEARESTMV }, { ALTREF_FRAME, NEARMV }, { ALTREF_FRAME, NEWMV } }; #define RT_INTER_MODES_SVC 8 static const REF_MODE ref_mode_set_svc[RT_INTER_MODES_SVC] = { { LAST_FRAME, ZEROMV }, { LAST_FRAME, NEARESTMV }, { LAST_FRAME, NEARMV }, { GOLDEN_FRAME, ZEROMV }, { GOLDEN_FRAME, NEARESTMV }, { GOLDEN_FRAME, NEARMV }, { LAST_FRAME, NEWMV }, { GOLDEN_FRAME, NEWMV } }; static INLINE void find_predictors( VP9_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int const_motion[MAX_REF_FRAMES], int *ref_frame_skip_mask, const int flag_list[4], TileDataEnc *tile_data, int mi_row, int mi_col, struct buf_2d yv12_mb[4][MAX_MB_PLANE], BLOCK_SIZE bsize, int force_skip_low_temp_var, int comp_pred_allowed) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); TileInfo *const tile_info = &tile_data->tile_info; // TODO(jingning) placeholder for inter-frame non-RD mode decision. x->pred_mv_sad[ref_frame] = INT_MAX; frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; // this needs various further optimizations. to be continued.. if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); if (cm->use_prev_frame_mvs || comp_pred_allowed) { vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col, x->mbmi_ext->mode_context); } else { const_motion[ref_frame] = mv_refs_rt(cpi, cm, x, xd, tile_info, xd->mi[0], ref_frame, candidates, &frame_mv[NEWMV][ref_frame], mi_row, mi_col, (int)(cpi->svc.use_base_mv && cpi->svc.spatial_layer_id)); } vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, &frame_mv[NEARESTMV][ref_frame], &frame_mv[NEARMV][ref_frame]); // Early exit for golden frame if force_skip_low_temp_var is set. if (!vp9_is_scaled(sf) && bsize >= BLOCK_8X8 && !(force_skip_low_temp_var && ref_frame == GOLDEN_FRAME)) { vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, bsize); } } else { *ref_frame_skip_mask |= (1 << ref_frame); } } static void vp9_NEWMV_diff_bias(const NOISE_ESTIMATE *ne, MACROBLOCKD *xd, PREDICTION_MODE this_mode, RD_COST *this_rdc, BLOCK_SIZE bsize, int mv_row, int mv_col, int is_last_frame, int lowvar_highsumdiff, int is_skin) { // Bias against MVs associated with NEWMV mode that are very different from // top/left neighbors. if (this_mode == NEWMV) { int al_mv_average_row; int al_mv_average_col; int left_row, left_col; int row_diff, col_diff; int above_mv_valid = 0; int left_mv_valid = 0; int above_row = 0; int above_col = 0; if (xd->above_mi) { above_mv_valid = xd->above_mi->mv[0].as_int != INVALID_MV; above_row = xd->above_mi->mv[0].as_mv.row; above_col = xd->above_mi->mv[0].as_mv.col; } if (xd->left_mi) { left_mv_valid = xd->left_mi->mv[0].as_int != INVALID_MV; left_row = xd->left_mi->mv[0].as_mv.row; left_col = xd->left_mi->mv[0].as_mv.col; } if (above_mv_valid && left_mv_valid) { al_mv_average_row = (above_row + left_row + 1) >> 1; al_mv_average_col = (above_col + left_col + 1) >> 1; } else if (above_mv_valid) { al_mv_average_row = above_row; al_mv_average_col = above_col; } else if (left_mv_valid) { al_mv_average_row = left_row; al_mv_average_col = left_col; } else { al_mv_average_row = al_mv_average_col = 0; } row_diff = (al_mv_average_row - mv_row); col_diff = (al_mv_average_col - mv_col); if (row_diff > 48 || row_diff < -48 || col_diff > 48 || col_diff < -48) { if (bsize > BLOCK_32X32) this_rdc->rdcost = this_rdc->rdcost << 1; else this_rdc->rdcost = 3 * this_rdc->rdcost >> 1; } } // If noise estimation is enabled, and estimated level is above threshold, // add a bias to LAST reference with small motion, for large blocks. if (ne->enabled && ne->level >= kMedium && bsize >= BLOCK_32X32 && is_last_frame && mv_row < 8 && mv_row > -8 && mv_col < 8 && mv_col > -8) this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3); else if (lowvar_highsumdiff && !is_skin && bsize >= BLOCK_16X16 && is_last_frame && mv_row < 16 && mv_row > -16 && mv_col < 16 && mv_col > -16) this_rdc->rdcost = 7 * (this_rdc->rdcost >> 3); } #if CONFIG_VP9_TEMPORAL_DENOISING static void vp9_pickmode_ctx_den_update( VP9_PICKMODE_CTX_DEN *ctx_den, int64_t zero_last_cost_orig, int ref_frame_cost[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int reuse_inter_pred, BEST_PICKMODE *bp) { ctx_den->zero_last_cost_orig = zero_last_cost_orig; ctx_den->ref_frame_cost = ref_frame_cost; ctx_den->frame_mv = frame_mv; ctx_den->reuse_inter_pred = reuse_inter_pred; ctx_den->best_tx_size = bp->best_tx_size; ctx_den->best_mode = bp->best_mode; ctx_den->best_ref_frame = bp->best_ref_frame; ctx_den->best_pred_filter = bp->best_pred_filter; ctx_den->best_mode_skip_txfm = bp->best_mode_skip_txfm; } static void recheck_zeromv_after_denoising( VP9_COMP *cpi, MODE_INFO *const mi, MACROBLOCK *x, MACROBLOCKD *const xd, VP9_DENOISER_DECISION decision, VP9_PICKMODE_CTX_DEN *ctx_den, struct buf_2d yv12_mb[4][MAX_MB_PLANE], RD_COST *best_rdc, BLOCK_SIZE bsize, int mi_row, int mi_col) { // If INTRA or GOLDEN reference was selected, re-evaluate ZEROMV on // denoised result. Only do this under noise conditions, and if rdcost of // ZEROMV onoriginal source is not significantly higher than rdcost of best // mode. if (cpi->noise_estimate.enabled && cpi->noise_estimate.level > kLow && ctx_den->zero_last_cost_orig < (best_rdc->rdcost << 3) && ((ctx_den->best_ref_frame == INTRA_FRAME && decision >= FILTER_BLOCK) || (ctx_den->best_ref_frame == GOLDEN_FRAME && cpi->svc.number_spatial_layers == 1 && decision == FILTER_ZEROMV_BLOCK))) { // Check if we should pick ZEROMV on denoised signal. VP9_COMMON *const cm = &cpi->common; int rate = 0; int64_t dist = 0; uint32_t var_y = UINT_MAX; uint32_t sse_y = UINT_MAX; RD_COST this_rdc; mi->mode = ZEROMV; mi->ref_frame[0] = LAST_FRAME; mi->ref_frame[1] = NONE; set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE); mi->mv[0].as_int = 0; mi->interp_filter = EIGHTTAP; if (cpi->sf.default_interp_filter == BILINEAR) mi->interp_filter = BILINEAR; xd->plane[0].pre[0] = yv12_mb[LAST_FRAME][0]; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); model_rd_for_sb_y(cpi, bsize, x, xd, &rate, &dist, &var_y, &sse_y, 0); this_rdc.rate = rate + ctx_den->ref_frame_cost[LAST_FRAME] + cpi->inter_mode_cost[x->mbmi_ext->mode_context[LAST_FRAME]] [INTER_OFFSET(ZEROMV)]; this_rdc.dist = dist; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, rate, dist); // Don't switch to ZEROMV if the rdcost for ZEROMV on denoised source // is higher than best_ref mode (on original source). if (this_rdc.rdcost > best_rdc->rdcost) { this_rdc = *best_rdc; mi->mode = ctx_den->best_mode; mi->ref_frame[0] = ctx_den->best_ref_frame; set_ref_ptrs(cm, xd, mi->ref_frame[0], NONE); mi->interp_filter = ctx_den->best_pred_filter; if (ctx_den->best_ref_frame == INTRA_FRAME) { mi->mv[0].as_int = INVALID_MV; mi->interp_filter = SWITCHABLE_FILTERS; } else if (ctx_den->best_ref_frame == GOLDEN_FRAME) { mi->mv[0].as_int = ctx_den->frame_mv[ctx_den->best_mode][ctx_den->best_ref_frame] .as_int; if (ctx_den->reuse_inter_pred) { xd->plane[0].pre[0] = yv12_mb[GOLDEN_FRAME][0]; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); } } mi->tx_size = ctx_den->best_tx_size; x->skip_txfm[0] = ctx_den->best_mode_skip_txfm; } else { ctx_den->best_ref_frame = LAST_FRAME; *best_rdc = this_rdc; } } } #endif // CONFIG_VP9_TEMPORAL_DENOISING static INLINE int get_force_skip_low_temp_var(uint8_t *variance_low, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int i = (mi_row & 0x7) >> 1; const int j = (mi_col & 0x7) >> 1; int force_skip_low_temp_var = 0; // Set force_skip_low_temp_var based on the block size and block offset. if (bsize == BLOCK_64X64) { force_skip_low_temp_var = variance_low[0]; } else if (bsize == BLOCK_64X32) { if (!(mi_col & 0x7) && !(mi_row & 0x7)) { force_skip_low_temp_var = variance_low[1]; } else if (!(mi_col & 0x7) && (mi_row & 0x7)) { force_skip_low_temp_var = variance_low[2]; } } else if (bsize == BLOCK_32X64) { if (!(mi_col & 0x7) && !(mi_row & 0x7)) { force_skip_low_temp_var = variance_low[3]; } else if ((mi_col & 0x7) && !(mi_row & 0x7)) { force_skip_low_temp_var = variance_low[4]; } } else if (bsize == BLOCK_32X32) { if (!(mi_col & 0x7) && !(mi_row & 0x7)) { force_skip_low_temp_var = variance_low[5]; } else if ((mi_col & 0x7) && !(mi_row & 0x7)) { force_skip_low_temp_var = variance_low[6]; } else if (!(mi_col & 0x7) && (mi_row & 0x7)) { force_skip_low_temp_var = variance_low[7]; } else if ((mi_col & 0x7) && (mi_row & 0x7)) { force_skip_low_temp_var = variance_low[8]; } } else if (bsize == BLOCK_16X16) { force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]]; } else if (bsize == BLOCK_32X16) { // The col shift index for the second 16x16 block. const int j2 = ((mi_col + 2) & 0x7) >> 1; // Only if each 16x16 block inside has low temporal variance. force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && variance_low[pos_shift_16x16[i][j2]]; } else if (bsize == BLOCK_16X32) { // The row shift index for the second 16x16 block. const int i2 = ((mi_row + 2) & 0x7) >> 1; force_skip_low_temp_var = variance_low[pos_shift_16x16[i][j]] && variance_low[pos_shift_16x16[i2][j]]; } return force_skip_low_temp_var; } static void search_filter_ref(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *this_rdc, int mi_row, int mi_col, PRED_BUFFER *tmp, BLOCK_SIZE bsize, int reuse_inter_pred, PRED_BUFFER **this_mode_pred, unsigned int *var_y, unsigned int *sse_y, int force_smooth_filter, int *this_early_term, int *flag_preduv_computed, int use_model_yrd_large) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; int pf_rate[3] = { 0 }; int64_t pf_dist[3] = { 0 }; int curr_rate[3] = { 0 }; unsigned int pf_var[3] = { 0 }; unsigned int pf_sse[3] = { 0 }; TX_SIZE pf_tx_size[3] = { 0 }; int64_t best_cost = INT64_MAX; INTERP_FILTER best_filter = SWITCHABLE, filter; PRED_BUFFER *current_pred = *this_mode_pred; uint8_t skip_txfm = SKIP_TXFM_NONE; int best_early_term = 0; int best_flag_preduv_computed[2] = { 0 }; INTERP_FILTER filter_start = force_smooth_filter ? EIGHTTAP_SMOOTH : EIGHTTAP; INTERP_FILTER filter_end = EIGHTTAP_SMOOTH; for (filter = filter_start; filter <= filter_end; ++filter) { int64_t cost; mi->interp_filter = filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); // For large partition blocks, extra testing is done. if (use_model_yrd_large) model_rd_for_sb_y_large(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter], mi_row, mi_col, this_early_term, flag_preduv_computed); else model_rd_for_sb_y(cpi, bsize, x, xd, &pf_rate[filter], &pf_dist[filter], &pf_var[filter], &pf_sse[filter], 0); curr_rate[filter] = pf_rate[filter]; pf_rate[filter] += vp9_get_switchable_rate(cpi, xd); cost = RDCOST(x->rdmult, x->rddiv, pf_rate[filter], pf_dist[filter]); pf_tx_size[filter] = mi->tx_size; if (cost < best_cost) { best_filter = filter; best_cost = cost; skip_txfm = x->skip_txfm[0]; best_early_term = *this_early_term; best_flag_preduv_computed[0] = flag_preduv_computed[0]; best_flag_preduv_computed[1] = flag_preduv_computed[1]; if (reuse_inter_pred) { if (*this_mode_pred != current_pred) { free_pred_buffer(*this_mode_pred); *this_mode_pred = current_pred; } if (filter != filter_end) { current_pred = &tmp[get_pred_buffer(tmp, 3)]; pd->dst.buf = current_pred->data; pd->dst.stride = bw; } } } } if (reuse_inter_pred && *this_mode_pred != current_pred) free_pred_buffer(current_pred); mi->interp_filter = best_filter; mi->tx_size = pf_tx_size[best_filter]; this_rdc->rate = curr_rate[best_filter]; this_rdc->dist = pf_dist[best_filter]; *var_y = pf_var[best_filter]; *sse_y = pf_sse[best_filter]; x->skip_txfm[0] = skip_txfm; *this_early_term = best_early_term; flag_preduv_computed[0] = best_flag_preduv_computed[0]; flag_preduv_computed[1] = best_flag_preduv_computed[1]; if (reuse_inter_pred) { pd->dst.buf = (*this_mode_pred)->data; pd->dst.stride = (*this_mode_pred)->stride; } else if (best_filter < filter_end) { mi->interp_filter = best_filter; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); } } static int search_new_mv(VP9_COMP *cpi, MACROBLOCK *x, int_mv frame_mv[][MAX_REF_FRAMES], MV_REFERENCE_FRAME ref_frame, int gf_temporal_ref, BLOCK_SIZE bsize, int mi_row, int mi_col, int best_pred_sad, int *rate_mv, unsigned int best_sse_sofar, RD_COST *best_rdc) { SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; SPEED_FEATURES *const sf = &cpi->sf; if (ref_frame > LAST_FRAME && gf_temporal_ref && cpi->oxcf.rc_mode == VPX_CBR) { int tmp_sad; uint32_t dis; int cost_list[5] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, INT_MAX }; if (bsize < BLOCK_16X16) return -1; tmp_sad = vp9_int_pro_motion_estimation( cpi, x, bsize, mi_row, mi_col, &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv); if (tmp_sad > x->pred_mv_sad[LAST_FRAME]) return -1; if (tmp_sad + (num_pels_log2_lookup[bsize] << 4) > best_pred_sad) return -1; frame_mv[NEWMV][ref_frame].as_int = mi->mv[0].as_int; *rate_mv = vp9_mv_bit_cost(&frame_mv[NEWMV][ref_frame].as_mv, &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); frame_mv[NEWMV][ref_frame].as_mv.row >>= 3; frame_mv[NEWMV][ref_frame].as_mv.col >>= 3; cpi->find_fractional_mv_step( x, &frame_mv[NEWMV][ref_frame].as_mv, &x->mbmi_ext->ref_mvs[ref_frame][0].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref_frame], NULL, 0, 0, cpi->sf.use_accurate_subpel_search); } else if (svc->use_base_mv && svc->spatial_layer_id) { if (frame_mv[NEWMV][ref_frame].as_int != INVALID_MV) { const int pre_stride = xd->plane[0].pre[0].stride; unsigned int base_mv_sse = UINT_MAX; int scale = (cpi->rc.avg_frame_low_motion > 60) ? 2 : 4; const uint8_t *const pre_buf = xd->plane[0].pre[0].buf + (frame_mv[NEWMV][ref_frame].as_mv.row >> 3) * pre_stride + (frame_mv[NEWMV][ref_frame].as_mv.col >> 3); cpi->fn_ptr[bsize].vf(x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride, &base_mv_sse); // Exit NEWMV search if base_mv is (0,0) && bsize < BLOCK_16x16, // for SVC encoding. if (cpi->use_svc && svc->use_base_mv && bsize < BLOCK_16X16 && frame_mv[NEWMV][ref_frame].as_mv.row == 0 && frame_mv[NEWMV][ref_frame].as_mv.col == 0) return -1; // Exit NEWMV search if base_mv_sse is large. if (sf->base_mv_aggressive && base_mv_sse > (best_sse_sofar << scale)) return -1; if (base_mv_sse < (best_sse_sofar << 1)) { // Base layer mv is good. // Exit NEWMV search if the base_mv is (0, 0) and sse is low, since // (0, 0) mode is already tested. unsigned int base_mv_sse_normalized = base_mv_sse >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); if (sf->base_mv_aggressive && base_mv_sse <= best_sse_sofar && base_mv_sse_normalized < 400 && frame_mv[NEWMV][ref_frame].as_mv.row == 0 && frame_mv[NEWMV][ref_frame].as_mv.col == 0) return -1; if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], rate_mv, best_rdc->rdcost, 1)) { return -1; } } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], rate_mv, best_rdc->rdcost, 0)) { return -1; } } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], rate_mv, best_rdc->rdcost, 0)) { return -1; } } else if (!combined_motion_search(cpi, x, bsize, mi_row, mi_col, &frame_mv[NEWMV][ref_frame], rate_mv, best_rdc->rdcost, 0)) { return -1; } return 0; } static INLINE void init_best_pickmode(BEST_PICKMODE *bp) { bp->best_mode = ZEROMV; bp->best_ref_frame = LAST_FRAME; bp->best_tx_size = TX_SIZES; bp->best_intra_tx_size = TX_SIZES; bp->best_pred_filter = EIGHTTAP; bp->best_mode_skip_txfm = SKIP_TXFM_NONE; bp->best_second_ref_frame = NONE; bp->best_pred = NULL; } void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; SVC *const svc = &cpi->svc; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; struct macroblockd_plane *const pd = &xd->plane[0]; BEST_PICKMODE best_pickmode; MV_REFERENCE_FRAME ref_frame; MV_REFERENCE_FRAME usable_ref_frame, second_ref_frame; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; uint8_t mode_checked[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; RD_COST this_rdc, best_rdc; // var_y and sse_y are saved to be used in skipping checking unsigned int var_y = UINT_MAX; unsigned int sse_y = UINT_MAX; const int intra_cost_penalty = vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); int64_t inter_mode_thresh = RDCOST(x->rdmult, x->rddiv, intra_cost_penalty, 0); const int *const rd_threshes = cpi->rd.threshes[mi->segment_id][bsize]; const int sb_row = mi_row >> MI_BLOCK_SIZE_LOG2; int thresh_freq_fact_idx = (sb_row * BLOCK_SIZES + bsize) * MAX_MODES; const int *const rd_thresh_freq_fact = (cpi->sf.adaptive_rd_thresh_row_mt) ? &(tile_data->row_base_thresh_freq_fact[thresh_freq_fact_idx]) : tile_data->thresh_freq_fact[bsize]; #if CONFIG_VP9_TEMPORAL_DENOISING const int denoise_recheck_zeromv = 1; #endif INTERP_FILTER filter_ref; int pred_filter_search = cm->interp_filter == SWITCHABLE; int const_motion[MAX_REF_FRAMES] = { 0 }; const int bh = num_4x4_blocks_high_lookup[bsize] << 2; const int bw = num_4x4_blocks_wide_lookup[bsize] << 2; // For speed 6, the result of interp filter is reused later in actual encoding // process. // tmp[3] points to dst buffer, and the other 3 point to allocated buffers. PRED_BUFFER tmp[4]; DECLARE_ALIGNED(16, uint8_t, pred_buf[3 * 64 * 64] VPX_UNINITIALIZED); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, pred_buf_16[3 * 64 * 64] VPX_UNINITIALIZED); #endif struct buf_2d orig_dst = pd->dst; PRED_BUFFER *this_mode_pred = NULL; const int pixels_in_block = bh * bw; int reuse_inter_pred = cpi->sf.reuse_inter_pred_sby && ctx->pred_pixel_ready; int ref_frame_skip_mask = 0; int idx; int best_pred_sad = INT_MAX; int best_early_term = 0; int ref_frame_cost[MAX_REF_FRAMES]; int svc_force_zero_mode[3] = { 0 }; int perform_intra_pred = 1; int use_golden_nonzeromv = 1; int force_skip_low_temp_var = 0; int skip_ref_find_pred[4] = { 0 }; unsigned int sse_zeromv_normalized = UINT_MAX; unsigned int best_sse_sofar = UINT_MAX; int gf_temporal_ref = 0; int force_test_gf_zeromv = 0; #if CONFIG_VP9_TEMPORAL_DENOISING VP9_PICKMODE_CTX_DEN ctx_den; int64_t zero_last_cost_orig = INT64_MAX; int denoise_svc_pickmode = 1; #endif INTERP_FILTER filter_gf_svc = EIGHTTAP; MV_REFERENCE_FRAME inter_layer_ref = GOLDEN_FRAME; const struct segmentation *const seg = &cm->seg; int comp_modes = 0; int num_inter_modes = (cpi->use_svc) ? RT_INTER_MODES_SVC : RT_INTER_MODES; int flag_svc_subpel = 0; int svc_mv_col = 0; int svc_mv_row = 0; int no_scaling = 0; int large_block = 0; int use_model_yrd_large = 0; unsigned int thresh_svc_skip_golden = 500; unsigned int thresh_skip_golden = 500; int force_smooth_filter = cpi->sf.force_smooth_interpol; int scene_change_detected = cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe); init_best_pickmode(&best_pickmode); x->encode_breakout = seg->enabled ? cpi->segment_encode_breakout[mi->segment_id] : cpi->encode_breakout; x->source_variance = UINT_MAX; if (cpi->sf.default_interp_filter == BILINEAR) { best_pickmode.best_pred_filter = BILINEAR; filter_gf_svc = BILINEAR; } if (cpi->use_svc && svc->spatial_layer_id > 0) { int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id - 1, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; if (lc->scaling_factor_num == lc->scaling_factor_den) no_scaling = 1; } if (svc->spatial_layer_id > 0 && (svc->high_source_sad_superframe || no_scaling)) thresh_svc_skip_golden = 0; // Lower the skip threshold if lower spatial layer is better quality relative // to current layer. else if (svc->spatial_layer_id > 0 && cm->base_qindex > 150 && cm->base_qindex > svc->lower_layer_qindex + 15) thresh_svc_skip_golden = 100; // Increase skip threshold if lower spatial layer is lower quality relative // to current layer. else if (svc->spatial_layer_id > 0 && cm->base_qindex < 140 && cm->base_qindex < svc->lower_layer_qindex - 20) thresh_svc_skip_golden = 1000; if (!cpi->use_svc || (svc->use_gf_temporal_ref_current_layer && !svc->layer_context[svc->temporal_layer_id].is_key_frame)) { struct scale_factors *const sf_last = &cm->frame_refs[LAST_FRAME - 1].sf; struct scale_factors *const sf_golden = &cm->frame_refs[GOLDEN_FRAME - 1].sf; gf_temporal_ref = 1; // For temporal long term prediction, check that the golden reference // is same scale as last reference, otherwise disable. if ((sf_last->x_scale_fp != sf_golden->x_scale_fp) || (sf_last->y_scale_fp != sf_golden->y_scale_fp)) { gf_temporal_ref = 0; } else { if (cpi->rc.avg_frame_low_motion > 70) thresh_svc_skip_golden = 500; else thresh_svc_skip_golden = 0; } } init_ref_frame_cost(cm, xd, ref_frame_cost); memset(&mode_checked[0][0], 0, MB_MODE_COUNT * MAX_REF_FRAMES); if (reuse_inter_pred) { int i; for (i = 0; i < 3; i++) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) tmp[i].data = CONVERT_TO_BYTEPTR(&pred_buf_16[pixels_in_block * i]); else tmp[i].data = &pred_buf[pixels_in_block * i]; #else tmp[i].data = &pred_buf[pixels_in_block * i]; #endif // CONFIG_VP9_HIGHBITDEPTH tmp[i].stride = bw; tmp[i].in_use = 0; } tmp[3].data = pd->dst.buf; tmp[3].stride = pd->dst.stride; tmp[3].in_use = 0; } x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; x->skip = 0; if (cpi->sf.cb_pred_filter_search) { const int bsl = mi_width_log2_lookup[bsize]; pred_filter_search = cm->interp_filter == SWITCHABLE ? (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; } // Instead of using vp9_get_pred_context_switchable_interp(xd) to assign // filter_ref, we use a less strict condition on assigning filter_ref. // This is to reduce the probabily of entering the flow of not assigning // filter_ref and then skip filter search. filter_ref = cm->interp_filter; if (cpi->sf.default_interp_filter != BILINEAR) { if (xd->above_mi && is_inter_block(xd->above_mi)) filter_ref = xd->above_mi->interp_filter; else if (xd->left_mi && is_inter_block(xd->left_mi)) filter_ref = xd->left_mi->interp_filter; } // initialize mode decisions vp9_rd_cost_reset(&best_rdc); vp9_rd_cost_reset(rd_cost); mi->sb_type = bsize; mi->ref_frame[0] = NONE; mi->ref_frame[1] = NONE; mi->tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cm->tx_mode]); if (sf->short_circuit_flat_blocks || sf->limit_newmv_early_exit) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) x->source_variance = vp9_high_get_sby_perpixel_variance( cpi, &x->plane[0].src, bsize, xd->bd); else #endif // CONFIG_VP9_HIGHBITDEPTH x->source_variance = vp9_get_sby_perpixel_variance(cpi, &x->plane[0].src, bsize); if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && mi->segment_id > 0 && x->zero_temp_sad_source && x->source_variance == 0) { mi->segment_id = 0; vp9_init_plane_quantizers(cpi, x); } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0) { if (cpi->use_svc) denoise_svc_pickmode = vp9_denoise_svc_non_key(cpi); if (cpi->denoiser.denoising_level > kDenLowLow && denoise_svc_pickmode) vp9_denoiser_reset_frame_stats(ctx); } #endif if (cpi->rc.frames_since_golden == 0 && gf_temporal_ref && !cpi->rc.alt_ref_gf_group && !cpi->rc.last_frame_is_src_altref) { usable_ref_frame = LAST_FRAME; } else { usable_ref_frame = GOLDEN_FRAME; } if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) { if (cpi->rc.alt_ref_gf_group || cpi->rc.is_src_frame_alt_ref) usable_ref_frame = ALTREF_FRAME; if (cpi->rc.is_src_frame_alt_ref) { skip_ref_find_pred[LAST_FRAME] = 1; skip_ref_find_pred[GOLDEN_FRAME] = 1; } if (!cm->show_frame) { if (cpi->rc.frames_since_key == 1) { usable_ref_frame = LAST_FRAME; skip_ref_find_pred[GOLDEN_FRAME] = 1; skip_ref_find_pred[ALTREF_FRAME] = 1; } } } // For svc mode, on spatial_layer_id > 0: if the reference has different scale // constrain the inter mode to only test zero motion. if (cpi->use_svc && svc->force_zero_mode_spatial_ref && svc->spatial_layer_id > 0 && !gf_temporal_ref) { if (cpi->ref_frame_flags & flag_list[LAST_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[LAST_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[LAST_FRAME - 1] = 1; inter_layer_ref = LAST_FRAME; } } if (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) { struct scale_factors *const sf = &cm->frame_refs[GOLDEN_FRAME - 1].sf; if (vp9_is_scaled(sf)) { svc_force_zero_mode[GOLDEN_FRAME - 1] = 1; inter_layer_ref = GOLDEN_FRAME; } } } if (cpi->sf.short_circuit_low_temp_var) { force_skip_low_temp_var = get_force_skip_low_temp_var(&x->variance_low[0], mi_row, mi_col, bsize); // If force_skip_low_temp_var is set, and for short circuit mode = 1 and 3, // skip golden reference. if ((cpi->sf.short_circuit_low_temp_var == 1 || cpi->sf.short_circuit_low_temp_var == 3) && force_skip_low_temp_var) { usable_ref_frame = LAST_FRAME; } } if (sf->disable_golden_ref && (x->content_state_sb != kVeryHighSad || cpi->rc.avg_frame_low_motion < 60)) usable_ref_frame = LAST_FRAME; if (!((cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && !svc_force_zero_mode[GOLDEN_FRAME - 1] && !force_skip_low_temp_var)) use_golden_nonzeromv = 0; if (cpi->oxcf.speed >= 8 && !cpi->use_svc && ((cpi->rc.frames_since_golden + 1) < x->last_sb_high_content || x->last_sb_high_content > 40 || cpi->rc.frames_since_golden > 120)) usable_ref_frame = LAST_FRAME; // Compound prediction modes: (0,0) on LAST/GOLDEN and ARF. if (cm->reference_mode == REFERENCE_MODE_SELECT && cpi->sf.use_compound_nonrd_pickmode && usable_ref_frame == ALTREF_FRAME) comp_modes = 2; // If the segment reference frame feature is enabled and it's set to GOLDEN // reference, then make sure we don't skip checking GOLDEN, this is to // prevent possibility of not picking any mode. if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) == GOLDEN_FRAME) { usable_ref_frame = GOLDEN_FRAME; skip_ref_find_pred[GOLDEN_FRAME] = 0; thresh_svc_skip_golden = 0; } for (ref_frame = LAST_FRAME; ref_frame <= usable_ref_frame; ++ref_frame) { // Skip find_predictor if the reference frame is not in the // ref_frame_flags (i.e., not used as a reference for this frame). skip_ref_find_pred[ref_frame] = !(cpi->ref_frame_flags & flag_list[ref_frame]); if (!skip_ref_find_pred[ref_frame]) { find_predictors(cpi, x, ref_frame, frame_mv, const_motion, &ref_frame_skip_mask, flag_list, tile_data, mi_row, mi_col, yv12_mb, bsize, force_skip_low_temp_var, comp_modes > 0); } } if (cpi->use_svc || cpi->oxcf.speed <= 7 || bsize < BLOCK_32X32) x->sb_use_mv_part = 0; // Set the flag_svc_subpel to 1 for SVC if the lower spatial layer used // an averaging filter for downsampling (phase = 8). If so, we will test // a nonzero motion mode on the spatial reference. // The nonzero motion is half pixel shifted to left and top (-4, -4). if (cpi->use_svc && svc->spatial_layer_id > 0 && svc_force_zero_mode[inter_layer_ref - 1] && svc->downsample_filter_phase[svc->spatial_layer_id - 1] == 8 && !gf_temporal_ref) { svc_mv_col = -4; svc_mv_row = -4; flag_svc_subpel = 1; } // For SVC with quality layers, when QP of lower layer is lower // than current layer: force check of GF-ZEROMV before early exit // due to skip flag. if (svc->spatial_layer_id > 0 && no_scaling && (cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && cm->base_qindex > svc->lower_layer_qindex + 10) force_test_gf_zeromv = 1; // For low motion content use x->sb_is_skin in addition to VeryHighSad // for setting large_block. large_block = (x->content_state_sb == kVeryHighSad || (x->sb_is_skin && cpi->rc.avg_frame_low_motion > 70) || cpi->oxcf.speed < 7) ? bsize > BLOCK_32X32 : bsize >= BLOCK_32X32; use_model_yrd_large = cpi->oxcf.rc_mode == VPX_CBR && large_block && !cyclic_refresh_segment_id_boosted(xd->mi[0]->segment_id) && cm->base_qindex; for (idx = 0; idx < num_inter_modes + comp_modes; ++idx) { int rate_mv = 0; int mode_rd_thresh; int mode_index; int i; int64_t this_sse; int is_skippable; int this_early_term = 0; int rd_computed = 0; int flag_preduv_computed[2] = { 0 }; int inter_mv_mode = 0; int skip_this_mv = 0; int comp_pred = 0; int force_mv_inter_layer = 0; PREDICTION_MODE this_mode; second_ref_frame = NONE; if (idx < num_inter_modes) { this_mode = ref_mode_set[idx].pred_mode; ref_frame = ref_mode_set[idx].ref_frame; if (cpi->use_svc) { this_mode = ref_mode_set_svc[idx].pred_mode; ref_frame = ref_mode_set_svc[idx].ref_frame; } } else { // Add (0,0) compound modes. this_mode = ZEROMV; ref_frame = LAST_FRAME; if (idx == num_inter_modes + comp_modes - 1) ref_frame = GOLDEN_FRAME; second_ref_frame = ALTREF_FRAME; comp_pred = 1; } if (ref_frame > usable_ref_frame) continue; if (skip_ref_find_pred[ref_frame]) continue; if (svc->previous_frame_is_intra_only) { if (ref_frame != LAST_FRAME || frame_mv[this_mode][ref_frame].as_int != 0) continue; } // If the segment reference frame feature is enabled then do nothing if the // current ref frame is not allowed. if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) continue; if (flag_svc_subpel && ref_frame == inter_layer_ref) { force_mv_inter_layer = 1; // Only test mode if NEARESTMV/NEARMV is (svc_mv_col, svc_mv_row), // otherwise set NEWMV to (svc_mv_col, svc_mv_row). if (this_mode == NEWMV) { frame_mv[this_mode][ref_frame].as_mv.col = svc_mv_col; frame_mv[this_mode][ref_frame].as_mv.row = svc_mv_row; } else if (frame_mv[this_mode][ref_frame].as_mv.col != svc_mv_col || frame_mv[this_mode][ref_frame].as_mv.row != svc_mv_row) { continue; } } if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; // Skip compound inter modes if ARF is not available. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; // Do not allow compound prediction if the segment level reference frame // feature is in use as in this case there can only be one reference. if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) continue; } // For CBR mode: skip the golden reference search if sse of zeromv_last is // below threshold. if (ref_frame == GOLDEN_FRAME && cpi->oxcf.rc_mode == VPX_CBR && ((cpi->use_svc && sse_zeromv_normalized < thresh_svc_skip_golden) || (!cpi->use_svc && sse_zeromv_normalized < thresh_skip_golden))) continue; if (!(cpi->ref_frame_flags & flag_list[ref_frame])) continue; // For screen content. If zero_temp_sad source is computed: skip // non-zero motion check for stationary blocks. If the superblock is // non-stationary then for flat blocks skip the zero last check (keep golden // as it may be inter-layer reference). Otherwise (if zero_temp_sad_source // is not computed) skip non-zero motion check for flat blocks. // TODO(marpan): Compute zero_temp_sad_source per coding block. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { if (cpi->compute_source_sad_onepass && cpi->sf.use_source_sad) { if ((frame_mv[this_mode][ref_frame].as_int != 0 && x->zero_temp_sad_source) || (frame_mv[this_mode][ref_frame].as_int == 0 && x->source_variance == 0 && ref_frame == LAST_FRAME && !x->zero_temp_sad_source)) continue; } else if (frame_mv[this_mode][ref_frame].as_int != 0 && x->source_variance == 0) { continue; } } if (!(cpi->sf.inter_mode_mask[bsize] & (1 << this_mode))) continue; if (cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR) { if (cpi->rc.is_src_frame_alt_ref && (ref_frame != ALTREF_FRAME || frame_mv[this_mode][ref_frame].as_int != 0)) continue; if (!cm->show_frame && ref_frame == ALTREF_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) continue; if (cpi->rc.alt_ref_gf_group && cm->show_frame && cpi->rc.frames_since_golden > (cpi->rc.baseline_gf_interval >> 1) && ref_frame == GOLDEN_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) continue; if (cpi->rc.alt_ref_gf_group && cm->show_frame && cpi->rc.frames_since_golden > 0 && cpi->rc.frames_since_golden < (cpi->rc.baseline_gf_interval >> 1) && ref_frame == ALTREF_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) continue; } if (const_motion[ref_frame] && this_mode == NEARMV) continue; // Skip non-zeromv mode search for golden frame if force_skip_low_temp_var // is set. If nearestmv for golden frame is 0, zeromv mode will be skipped // later. if (!force_mv_inter_layer && force_skip_low_temp_var && ref_frame == GOLDEN_FRAME && frame_mv[this_mode][ref_frame].as_int != 0) { continue; } if (x->content_state_sb != kVeryHighSad && (cpi->sf.short_circuit_low_temp_var >= 2 || (cpi->sf.short_circuit_low_temp_var == 1 && bsize == BLOCK_64X64)) && force_skip_low_temp_var && ref_frame == LAST_FRAME && this_mode == NEWMV) { continue; } if (cpi->use_svc) { if (!force_mv_inter_layer && svc_force_zero_mode[ref_frame - 1] && frame_mv[this_mode][ref_frame].as_int != 0) continue; } // Disable this drop out case if the ref frame segment level feature is // enabled for this segment. This is to prevent the possibility that we end // up unable to pick any mode. if (!segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME)) { if (sf->reference_masking && !(frame_mv[this_mode][ref_frame].as_int == 0 && ref_frame == LAST_FRAME)) { if (usable_ref_frame < ALTREF_FRAME) { if (!force_skip_low_temp_var && usable_ref_frame > LAST_FRAME) { i = (ref_frame == LAST_FRAME) ? GOLDEN_FRAME : LAST_FRAME; if ((cpi->ref_frame_flags & flag_list[i])) if (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[i] << 1)) ref_frame_skip_mask |= (1 << ref_frame); } } else if (!cpi->rc.is_src_frame_alt_ref && !(frame_mv[this_mode][ref_frame].as_int == 0 && ref_frame == ALTREF_FRAME)) { int ref1 = (ref_frame == GOLDEN_FRAME) ? LAST_FRAME : GOLDEN_FRAME; int ref2 = (ref_frame == ALTREF_FRAME) ? LAST_FRAME : ALTREF_FRAME; if (((cpi->ref_frame_flags & flag_list[ref1]) && (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref1] << 1))) || ((cpi->ref_frame_flags & flag_list[ref2]) && (x->pred_mv_sad[ref_frame] > (x->pred_mv_sad[ref2] << 1)))) ref_frame_skip_mask |= (1 << ref_frame); } } if (ref_frame_skip_mask & (1 << ref_frame)) continue; } // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } mi->ref_frame[0] = ref_frame; mi->ref_frame[1] = second_ref_frame; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); mode_index = mode_idx[ref_frame][INTER_OFFSET(this_mode)]; mode_rd_thresh = best_pickmode.best_mode_skip_txfm ? rd_threshes[mode_index] << 1 : rd_threshes[mode_index]; // Increase mode_rd_thresh value for GOLDEN_FRAME for improved encoding // speed with little/no subjective quality loss. if (cpi->sf.bias_golden && ref_frame == GOLDEN_FRAME && cpi->rc.frames_since_golden > 4) mode_rd_thresh = mode_rd_thresh << 3; if ((cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index]))) if (frame_mv[this_mode][ref_frame].as_int != 0) continue; if (this_mode == NEWMV && !force_mv_inter_layer) { if (search_new_mv(cpi, x, frame_mv, ref_frame, gf_temporal_ref, bsize, mi_row, mi_col, best_pred_sad, &rate_mv, best_sse_sofar, &best_rdc)) continue; } // TODO(jianj): Skipping the testing of (duplicate) non-zero motion vector // causes some regression, leave it for duplicate zero-mv for now, until // regression issue is resolved. for (inter_mv_mode = NEARESTMV; inter_mv_mode <= NEWMV; inter_mv_mode++) { if (inter_mv_mode == this_mode || comp_pred) continue; if (mode_checked[inter_mv_mode][ref_frame] && frame_mv[this_mode][ref_frame].as_int == frame_mv[inter_mv_mode][ref_frame].as_int && frame_mv[inter_mv_mode][ref_frame].as_int == 0) { skip_this_mv = 1; break; } } if (skip_this_mv) continue; // If use_golden_nonzeromv is false, NEWMV mode is skipped for golden, no // need to compute best_pred_sad which is only used to skip golden NEWMV. if (use_golden_nonzeromv && this_mode == NEWMV && ref_frame == LAST_FRAME && frame_mv[NEWMV][LAST_FRAME].as_int != INVALID_MV) { const int pre_stride = xd->plane[0].pre[0].stride; const uint8_t *const pre_buf = xd->plane[0].pre[0].buf + (frame_mv[NEWMV][LAST_FRAME].as_mv.row >> 3) * pre_stride + (frame_mv[NEWMV][LAST_FRAME].as_mv.col >> 3); best_pred_sad = cpi->fn_ptr[bsize].sdf( x->plane[0].src.buf, x->plane[0].src.stride, pre_buf, pre_stride); x->pred_mv_sad[LAST_FRAME] = best_pred_sad; } if (this_mode != NEARESTMV && !comp_pred && frame_mv[this_mode][ref_frame].as_int == frame_mv[NEARESTMV][ref_frame].as_int) continue; mi->mode = this_mode; mi->mv[0].as_int = frame_mv[this_mode][ref_frame].as_int; mi->mv[1].as_int = 0; // Search for the best prediction filter type, when the resulting // motion vector is at sub-pixel accuracy level for luma component, i.e., // the last three bits are all zeros. if (reuse_inter_pred) { if (!this_mode_pred) { this_mode_pred = &tmp[3]; } else { this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; pd->dst.buf = this_mode_pred->data; pd->dst.stride = bw; } } if ((this_mode == NEWMV || filter_ref == SWITCHABLE) && pred_filter_search && (ref_frame == LAST_FRAME || (ref_frame == GOLDEN_FRAME && !force_mv_inter_layer && (cpi->use_svc || cpi->oxcf.rc_mode == VPX_VBR))) && (((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) != 0)) { rd_computed = 1; search_filter_ref(cpi, x, &this_rdc, mi_row, mi_col, tmp, bsize, reuse_inter_pred, &this_mode_pred, &var_y, &sse_y, force_smooth_filter, &this_early_term, flag_preduv_computed, use_model_yrd_large); } else { mi->interp_filter = (filter_ref == SWITCHABLE) ? EIGHTTAP : filter_ref; if (cpi->use_svc && ref_frame == GOLDEN_FRAME && svc_force_zero_mode[ref_frame - 1]) mi->interp_filter = filter_gf_svc; vp9_build_inter_predictors_sby(xd, mi_row, mi_col, bsize); // For large partition blocks, extra testing is done. if (use_model_yrd_large) { rd_computed = 1; model_rd_for_sb_y_large(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, mi_row, mi_col, &this_early_term, flag_preduv_computed); } else { rd_computed = 1; model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, 0); } // Save normalized sse (between current and last frame) for (0, 0) motion. if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) { sse_zeromv_normalized = sse_y >> (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); } if (sse_y < best_sse_sofar) best_sse_sofar = sse_y; } if (!this_early_term) { this_sse = (int64_t)sse_y; block_yrd(cpi, x, &this_rdc, &is_skippable, &this_sse, bsize, VPXMIN(mi->tx_size, TX_16X16), rd_computed, 0); x->skip_txfm[0] = is_skippable; if (is_skippable) { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); } else { if (RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist) < RDCOST(x->rdmult, x->rddiv, 0, this_sse)) { this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); } else { this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); this_rdc.dist = this_sse; x->skip_txfm[0] = SKIP_TXFM_AC_DC; } } if (cm->interp_filter == SWITCHABLE) { if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) this_rdc.rate += vp9_get_switchable_rate(cpi, xd); } } else { if (cm->interp_filter == SWITCHABLE) { if ((mi->mv[0].as_mv.row | mi->mv[0].as_mv.col) & 0x07) this_rdc.rate += vp9_get_switchable_rate(cpi, xd); } this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); } if (!this_early_term && (x->color_sensitivity[0] || x->color_sensitivity[1])) { RD_COST rdc_uv; const BLOCK_SIZE uv_bsize = get_plane_block_size(bsize, &xd->plane[1]); if (x->color_sensitivity[0] && !flag_preduv_computed[0]) { vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 1); flag_preduv_computed[0] = 1; } if (x->color_sensitivity[1] && !flag_preduv_computed[1]) { vp9_build_inter_predictors_sbp(xd, mi_row, mi_col, bsize, 2); flag_preduv_computed[1] = 1; } model_rd_for_sb_uv(cpi, uv_bsize, x, xd, &rdc_uv, &var_y, &sse_y, 1, 2); this_rdc.rate += rdc_uv.rate; this_rdc.dist += rdc_uv.dist; } this_rdc.rate += rate_mv; this_rdc.rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; // TODO(marpan): Add costing for compound mode. this_rdc.rate += ref_frame_cost[ref_frame]; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); // Bias against NEWMV that is very different from its neighbors, and bias // to small motion-lastref for noisy input. if (cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.speed >= 5 && cpi->oxcf.content != VP9E_CONTENT_SCREEN) { vp9_NEWMV_diff_bias(&cpi->noise_estimate, xd, this_mode, &this_rdc, bsize, frame_mv[this_mode][ref_frame].as_mv.row, frame_mv[this_mode][ref_frame].as_mv.col, ref_frame == LAST_FRAME, x->lowvar_highsumdiff, x->sb_is_skin); } // Skipping checking: test to see if this block can be reconstructed by // prediction only. if (cpi->allow_encode_breakout && !xd->lossless && !scene_change_detected && !svc->high_num_blocks_with_motion) { encode_breakout_test(cpi, x, bsize, mi_row, mi_col, ref_frame, this_mode, var_y, sse_y, yv12_mb, &this_rdc.rate, &this_rdc.dist, flag_preduv_computed); if (x->skip) { this_rdc.rate += rate_mv; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); } } // On spatially flat blocks for screne content: bias against zero-last // if the sse_y is non-zero. Only on scene change or high motion frames. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && (scene_change_detected || svc->high_num_blocks_with_motion) && ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0 && svc->spatial_layer_id == 0 && x->source_variance == 0 && sse_y > 0) { this_rdc.rdcost = this_rdc.rdcost << 2; } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow) { vp9_denoiser_update_frame_stats(mi, sse_y, this_mode, ctx); // Keep track of zero_last cost. if (ref_frame == LAST_FRAME && frame_mv[this_mode][ref_frame].as_int == 0) zero_last_cost_orig = this_rdc.rdcost; } #else (void)ctx; #endif mode_checked[this_mode][ref_frame] = 1; if (this_rdc.rdcost < best_rdc.rdcost || x->skip) { best_rdc = this_rdc; best_early_term = this_early_term; best_pickmode.best_mode = this_mode; best_pickmode.best_pred_filter = mi->interp_filter; best_pickmode.best_tx_size = mi->tx_size; best_pickmode.best_ref_frame = ref_frame; best_pickmode.best_mode_skip_txfm = x->skip_txfm[0]; best_pickmode.best_second_ref_frame = second_ref_frame; if (reuse_inter_pred) { free_pred_buffer(best_pickmode.best_pred); best_pickmode.best_pred = this_mode_pred; } } else { if (reuse_inter_pred) free_pred_buffer(this_mode_pred); } if (x->skip && (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) break; // If early termination flag is 1 and at least 2 modes are checked, // the mode search is terminated. if (best_early_term && idx > 0 && !scene_change_detected && (!force_test_gf_zeromv || mode_checked[ZEROMV][GOLDEN_FRAME])) { x->skip = 1; break; } } mi->mode = best_pickmode.best_mode; mi->interp_filter = best_pickmode.best_pred_filter; mi->tx_size = best_pickmode.best_tx_size; mi->ref_frame[0] = best_pickmode.best_ref_frame; mi->mv[0].as_int = frame_mv[best_pickmode.best_mode][best_pickmode.best_ref_frame].as_int; xd->mi[0]->bmi[0].as_mv[0].as_int = mi->mv[0].as_int; x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm; mi->ref_frame[1] = best_pickmode.best_second_ref_frame; // For spatial enhancemanent layer: perform intra prediction only if base // layer is chosen as the reference. Always perform intra prediction if // LAST is the only reference, or is_key_frame is set, or on base // temporal layer. if (svc->spatial_layer_id && !gf_temporal_ref) { perform_intra_pred = svc->temporal_layer_id == 0 || svc->layer_context[svc->temporal_layer_id].is_key_frame || !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) || (!svc->layer_context[svc->temporal_layer_id].is_key_frame && svc_force_zero_mode[best_pickmode.best_ref_frame - 1]); inter_mode_thresh = (inter_mode_thresh << 1) + inter_mode_thresh; } if ((cpi->oxcf.lag_in_frames > 0 && cpi->oxcf.rc_mode == VPX_VBR && cpi->rc.is_src_frame_alt_ref) || svc->previous_frame_is_intra_only) perform_intra_pred = 0; // If the segment reference frame feature is enabled and set then // skip the intra prediction. if (segfeature_active(seg, mi->segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, mi->segment_id, SEG_LVL_REF_FRAME) > 0) perform_intra_pred = 0; // Perform intra prediction search, if the best SAD is above a certain // threshold. if (best_rdc.rdcost == INT64_MAX || (cpi->oxcf.content == VP9E_CONTENT_SCREEN && x->source_variance == 0) || (scene_change_detected && perform_intra_pred) || ((!force_skip_low_temp_var || bsize < BLOCK_32X32 || x->content_state_sb == kVeryHighSad) && perform_intra_pred && !x->skip && best_rdc.rdcost > inter_mode_thresh && bsize <= cpi->sf.max_intra_bsize && !x->skip_low_source_sad && !x->lowvar_highsumdiff)) { struct estimate_block_intra_args args = { cpi, x, DC_PRED, 1, 0 }; int64_t this_sse = INT64_MAX; int i; PRED_BUFFER *const best_pred = best_pickmode.best_pred; TX_SIZE intra_tx_size = VPXMIN(max_txsize_lookup[bsize], tx_mode_to_biggest_tx_size[cpi->common.tx_mode]); if (reuse_inter_pred && best_pred != NULL) { if (best_pred->data == orig_dst.buf) { this_mode_pred = &tmp[get_pred_buffer(tmp, 3)]; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) vpx_highbd_convolve_copy( CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride, CONVERT_TO_SHORTPTR(this_mode_pred->data), this_mode_pred->stride, NULL, 0, 0, 0, 0, bw, bh, xd->bd); else vpx_convolve_copy(best_pred->data, best_pred->stride, this_mode_pred->data, this_mode_pred->stride, NULL, 0, 0, 0, 0, bw, bh); #else vpx_convolve_copy(best_pred->data, best_pred->stride, this_mode_pred->data, this_mode_pred->stride, NULL, 0, 0, 0, 0, bw, bh); #endif // CONFIG_VP9_HIGHBITDEPTH best_pickmode.best_pred = this_mode_pred; } } pd->dst = orig_dst; for (i = 0; i < 4; ++i) { const PREDICTION_MODE this_mode = intra_mode_list[i]; THR_MODES mode_index = mode_idx[INTRA_FRAME][mode_offset(this_mode)]; int mode_rd_thresh = rd_threshes[mode_index]; // For spatially flat blocks, under short_circuit_flat_blocks flag: // only check DC mode for stationary blocks, otherwise also check // H and V mode. if (sf->short_circuit_flat_blocks && x->source_variance == 0 && ((x->zero_temp_sad_source && this_mode != DC_PRED) || i > 2)) { continue; } if (!((1 << this_mode) & cpi->sf.intra_y_mode_bsize_mask[bsize])) continue; if (cpi->sf.rt_intra_dc_only_low_content && this_mode != DC_PRED && x->content_state_sb != kVeryHighSad) continue; if ((cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh_row_mt(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index])) || (!cpi->sf.adaptive_rd_thresh_row_mt && rd_less_than_thresh(best_rdc.rdcost, mode_rd_thresh, &rd_thresh_freq_fact[mode_index]))) { // Avoid this early exit for screen on base layer, for scene // changes or high motion frames. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN || svc->spatial_layer_id > 0 || (!scene_change_detected && !svc->high_num_blocks_with_motion)) continue; } mi->mode = this_mode; mi->ref_frame[0] = INTRA_FRAME; this_rdc.dist = this_rdc.rate = 0; args.mode = this_mode; args.skippable = 1; args.rdc = &this_rdc; mi->tx_size = intra_tx_size; compute_intra_yprediction(this_mode, bsize, x, xd); model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, 1); block_yrd(cpi, x, &this_rdc, &args.skippable, &this_sse, bsize, VPXMIN(mi->tx_size, TX_16X16), 1, 1); // Check skip cost here since skippable is not set for for uv, this // mirrors the behavior used by inter if (args.skippable) { x->skip_txfm[0] = SKIP_TXFM_AC_DC; this_rdc.rate = vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 1); } else { x->skip_txfm[0] = SKIP_TXFM_NONE; this_rdc.rate += vp9_cost_bit(vp9_get_skip_prob(&cpi->common, xd), 0); } // Inter and intra RD will mismatch in scale for non-screen content. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) { if (x->color_sensitivity[0]) vp9_foreach_transformed_block_in_plane(xd, bsize, 1, estimate_block_intra, &args); if (x->color_sensitivity[1]) vp9_foreach_transformed_block_in_plane(xd, bsize, 2, estimate_block_intra, &args); } this_rdc.rate += cpi->mbmode_cost[this_mode]; this_rdc.rate += ref_frame_cost[INTRA_FRAME]; this_rdc.rate += intra_cost_penalty; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < best_rdc.rdcost) { best_rdc = this_rdc; best_pickmode.best_mode = this_mode; best_pickmode.best_intra_tx_size = mi->tx_size; best_pickmode.best_ref_frame = INTRA_FRAME; best_pickmode.best_second_ref_frame = NONE; mi->uv_mode = this_mode; mi->mv[0].as_int = INVALID_MV; mi->mv[1].as_int = INVALID_MV; best_pickmode.best_mode_skip_txfm = x->skip_txfm[0]; } } // Reset mb_mode_info to the best inter mode. if (best_pickmode.best_ref_frame != INTRA_FRAME) { mi->tx_size = best_pickmode.best_tx_size; } else { mi->tx_size = best_pickmode.best_intra_tx_size; } } pd->dst = orig_dst; mi->mode = best_pickmode.best_mode; mi->ref_frame[0] = best_pickmode.best_ref_frame; mi->ref_frame[1] = best_pickmode.best_second_ref_frame; x->skip_txfm[0] = best_pickmode.best_mode_skip_txfm; if (!is_inter_block(mi)) { mi->interp_filter = SWITCHABLE_FILTERS; } if (reuse_inter_pred && best_pickmode.best_pred != NULL) { PRED_BUFFER *const best_pred = best_pickmode.best_pred; if (best_pred->data != orig_dst.buf && is_inter_mode(mi->mode)) { #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) vpx_highbd_convolve_copy( CONVERT_TO_SHORTPTR(best_pred->data), best_pred->stride, CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh, xd->bd); else vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf, pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh); #else vpx_convolve_copy(best_pred->data, best_pred->stride, pd->dst.buf, pd->dst.stride, NULL, 0, 0, 0, 0, bw, bh); #endif // CONFIG_VP9_HIGHBITDEPTH } } #if CONFIG_VP9_TEMPORAL_DENOISING if (cpi->oxcf.noise_sensitivity > 0 && cpi->resize_pending == 0 && denoise_svc_pickmode && cpi->denoiser.denoising_level > kDenLowLow && cpi->denoiser.reset == 0) { VP9_DENOISER_DECISION decision = COPY_BLOCK; ctx->sb_skip_denoising = 0; // TODO(marpan): There is an issue with denoising when the // superblock partitioning scheme is based on the pickmode. // Remove this condition when the issue is resolved. if (x->sb_pickmode_part) ctx->sb_skip_denoising = 1; vp9_pickmode_ctx_den_update(&ctx_den, zero_last_cost_orig, ref_frame_cost, frame_mv, reuse_inter_pred, &best_pickmode); vp9_denoiser_denoise(cpi, x, mi_row, mi_col, bsize, ctx, &decision, gf_temporal_ref); if (denoise_recheck_zeromv) recheck_zeromv_after_denoising(cpi, mi, x, xd, decision, &ctx_den, yv12_mb, &best_rdc, bsize, mi_row, mi_col); best_pickmode.best_ref_frame = ctx_den.best_ref_frame; } #endif if (best_pickmode.best_ref_frame == ALTREF_FRAME || best_pickmode.best_second_ref_frame == ALTREF_FRAME) x->arf_frame_usage++; else if (best_pickmode.best_ref_frame != INTRA_FRAME) x->lastgolden_frame_usage++; if (cpi->sf.adaptive_rd_thresh) { THR_MODES best_mode_idx = mode_idx[best_pickmode.best_ref_frame][mode_offset(mi->mode)]; if (best_pickmode.best_ref_frame == INTRA_FRAME) { // Only consider the modes that are included in the intra_mode_list. int intra_modes = sizeof(intra_mode_list) / sizeof(PREDICTION_MODE); int i; // TODO(yunqingwang): Check intra mode mask and only update freq_fact // for those valid modes. for (i = 0; i < intra_modes; i++) { if (cpi->sf.adaptive_rd_thresh_row_mt) update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance, thresh_freq_fact_idx, INTRA_FRAME, best_mode_idx, intra_mode_list[i]); else update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, INTRA_FRAME, best_mode_idx, intra_mode_list[i]); } } else { for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { PREDICTION_MODE this_mode; if (best_pickmode.best_ref_frame != ref_frame) continue; for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { if (cpi->sf.adaptive_rd_thresh_row_mt) update_thresh_freq_fact_row_mt(cpi, tile_data, x->source_variance, thresh_freq_fact_idx, ref_frame, best_mode_idx, this_mode); else update_thresh_freq_fact(cpi, tile_data, x->source_variance, bsize, ref_frame, best_mode_idx, this_mode); } } } } *rd_cost = best_rdc; } void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx) { VP9_COMMON *const cm = &cpi->common; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame = NONE; MV_REFERENCE_FRAME best_ref_frame = NONE; unsigned char segment_id = mi->segment_id; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int64_t best_rd = INT64_MAX; b_mode_info bsi[MAX_REF_FRAMES][4]; int ref_frame_skip_mask = 0; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; ctx->pred_pixel_ready = 0; for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); int_mv dummy_mv[2]; x->pred_mv_sad[ref_frame] = INT_MAX; if ((cpi->ref_frame_flags & flag_list[ref_frame]) && (yv12 != NULL)) { int_mv *const candidates = mbmi_ext->ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); vp9_find_mv_refs(cm, xd, xd->mi[0], ref_frame, candidates, mi_row, mi_col, mbmi_ext->mode_context); vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, &dummy_mv[0], &dummy_mv[1]); } else { ref_frame_skip_mask |= (1 << ref_frame); } } mi->sb_type = bsize; mi->tx_size = TX_4X4; mi->uv_mode = DC_PRED; mi->ref_frame[0] = LAST_FRAME; mi->ref_frame[1] = NONE; mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ++ref_frame) { int64_t this_rd = 0; int plane; if (ref_frame_skip_mask & (1 << ref_frame)) continue; #if CONFIG_BETTER_HW_COMPATIBILITY if ((bsize == BLOCK_8X4 || bsize == BLOCK_4X8) && ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; #endif // TODO(jingning, agrange): Scaling reference frame not supported for // sub8x8 blocks. Is this supported now? if (ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf)) continue; // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) continue; mi->ref_frame[0] = ref_frame; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); // Select prediction reference frames. for (plane = 0; plane < MAX_MB_PLANE; plane++) xd->plane[plane].pre[0] = yv12_mb[ref_frame][plane]; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { int_mv b_mv[MB_MODE_COUNT]; int64_t b_best_rd = INT64_MAX; const int i = idy * 2 + idx; PREDICTION_MODE this_mode; RD_COST this_rdc; unsigned int var_y, sse_y; struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; const struct buf_2d orig_src = p->src; const struct buf_2d orig_dst = pd->dst; struct buf_2d orig_pre[2]; memcpy(orig_pre, xd->plane[0].pre, sizeof(orig_pre)); // set buffer pointers for sub8x8 motion search. p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; pd->dst.buf = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; pd->pre[0].buf = &pd->pre[0] .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; b_mv[ZEROMV].as_int = 0; b_mv[NEWMV].as_int = INVALID_MV; vp9_append_sub8x8_mvs_for_idx(cm, xd, i, 0, mi_row, mi_col, &b_mv[NEARESTMV], &b_mv[NEARMV], mbmi_ext->mode_context); for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { int b_rate = 0; xd->mi[0]->bmi[i].as_mv[0].as_int = b_mv[this_mode].as_int; if (this_mode == NEWMV) { const int step_param = cpi->sf.mv.fullpel_search_step_param; MV mvp_full; MV tmp_mv; int cost_list[5]; const MvLimits tmp_mv_limits = x->mv_limits; uint32_t dummy_dist; if (i == 0) { mvp_full.row = b_mv[NEARESTMV].as_mv.row >> 3; mvp_full.col = b_mv[NEARESTMV].as_mv.col >> 3; } else { mvp_full.row = xd->mi[0]->bmi[0].as_mv[0].as_mv.row >> 3; mvp_full.col = xd->mi[0]->bmi[0].as_mv[0].as_mv.col >> 3; } vp9_set_mv_search_range(&x->mv_limits, &mbmi_ext->ref_mvs[ref_frame][0].as_mv); vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, x->sadperbit4, cond_cost_list(cpi, cost_list), &mbmi_ext->ref_mvs[ref_frame][0].as_mv, &tmp_mv, INT_MAX, 0); x->mv_limits = tmp_mv_limits; // calculate the bit cost on motion vector mvp_full.row = tmp_mv.row * 8; mvp_full.col = tmp_mv.col * 8; b_rate += vp9_mv_bit_cost( &mvp_full, &mbmi_ext->ref_mvs[ref_frame][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]] [INTER_OFFSET(NEWMV)]; if (RDCOST(x->rdmult, x->rddiv, b_rate, 0) > b_best_rd) continue; cpi->find_fractional_mv_step( x, &tmp_mv, &mbmi_ext->ref_mvs[ref_frame][0].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dummy_dist, &x->pred_sse[ref_frame], NULL, 0, 0, cpi->sf.use_accurate_subpel_search); xd->mi[0]->bmi[i].as_mv[0].as_mv = tmp_mv; } else { b_rate += cpi->inter_mode_cost[x->mbmi_ext->mode_context[ref_frame]] [INTER_OFFSET(this_mode)]; } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(pd->pre[0].buf), pd->pre[0].stride, CONVERT_TO_SHORTPTR(pd->dst.buf), pd->dst.stride, &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i & 0x01), mi_row * MI_SIZE + 4 * (i >> 1), xd->bd); } else { #endif vp9_build_inter_predictor( pd->pre[0].buf, pd->pre[0].stride, pd->dst.buf, pd->dst.stride, &xd->mi[0]->bmi[i].as_mv[0].as_mv, &xd->block_refs[0]->sf, 4 * num_4x4_blocks_wide, 4 * num_4x4_blocks_high, 0, vp9_filter_kernels[mi->interp_filter], MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i & 0x01), mi_row * MI_SIZE + 4 * (i >> 1)); #if CONFIG_VP9_HIGHBITDEPTH } #endif model_rd_for_sb_y(cpi, bsize, x, xd, &this_rdc.rate, &this_rdc.dist, &var_y, &sse_y, 0); this_rdc.rate += b_rate; this_rdc.rdcost = RDCOST(x->rdmult, x->rddiv, this_rdc.rate, this_rdc.dist); if (this_rdc.rdcost < b_best_rd) { b_best_rd = this_rdc.rdcost; bsi[ref_frame][i].as_mode = this_mode; bsi[ref_frame][i].as_mv[0].as_mv = xd->mi[0]->bmi[i].as_mv[0].as_mv; } } // mode search // restore source and prediction buffer pointers. p->src = orig_src; pd->pre[0] = orig_pre[0]; pd->dst = orig_dst; this_rd += b_best_rd; xd->mi[0]->bmi[i] = bsi[ref_frame][i]; if (num_4x4_blocks_wide > 1) xd->mi[0]->bmi[i + 1] = xd->mi[0]->bmi[i]; if (num_4x4_blocks_high > 1) xd->mi[0]->bmi[i + 2] = xd->mi[0]->bmi[i]; } } // loop through sub8x8 blocks if (this_rd < best_rd) { best_rd = this_rd; best_ref_frame = ref_frame; } } // reference frames mi->tx_size = TX_4X4; mi->ref_frame[0] = best_ref_frame; for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { const int block = idy * 2 + idx; xd->mi[0]->bmi[block] = bsi[best_ref_frame][block]; if (num_4x4_blocks_wide > 1) xd->mi[0]->bmi[block + 1] = bsi[best_ref_frame][block]; if (num_4x4_blocks_high > 1) xd->mi[0]->bmi[block + 2] = bsi[best_ref_frame][block]; } } mi->mode = xd->mi[0]->bmi[3].as_mode; ctx->mic = *(xd->mi[0]); ctx->mbmi_ext = *x->mbmi_ext; ctx->skip_txfm[0] = SKIP_TXFM_NONE; ctx->skip = 0; // Dummy assignment for speed -5. No effect in speed -6. rd_cost->rdcost = best_rd; } libvpx-1.8.2/vp9/encoder/vp9_pickmode.h000066400000000000000000000023131357355204000177300ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_PICKMODE_H_ #define VPX_VP9_ENCODER_VP9_PICKMODE_H_ #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus extern "C" { #endif void vp9_pick_intra_mode(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); void vp9_pick_inter_mode(VP9_COMP *cpi, MACROBLOCK *x, TileDataEnc *tile_data, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); void vp9_pick_inter_mode_sub8x8(VP9_COMP *cpi, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_PICKMODE_H_ libvpx-1.8.2/vp9/encoder/vp9_quantize.c000066400000000000000000000314771357355204000200050ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_rd.h" void vp9_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant_ptr[rc != 0]) >> 16; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (tmp) eob = i; } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_quantize_fp_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i; int eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[rc != 0]; const int abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 16); qcoeff_ptr[rc] = (tran_low_t)(abs_qcoeff ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (abs_qcoeff) eob = i; } *eob_ptr = eob + 1; } #endif // TODO(jingning) Refactor this file and combine functions with similar // operations. void vp9_quantize_fp_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); int tmp = 0; int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) { abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); tmp = (abs_coeff * quant_ptr[rc != 0]) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; } if (tmp) eob = i; } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_quantize_fp_32x32_c( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); for (i = 0; i < n_coeffs; i++) { int abs_qcoeff = 0; const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= (dequant_ptr[rc != 0] >> 2)) { const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); abs_qcoeff = (int)((tmp * quant_ptr[rc != 0]) >> 15); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; } if (abs_qcoeff) eob = i; } *eob_ptr = eob + 1; } #endif void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan) { MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block), *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); const int n_coeffs = 4 * 4; if (x->skip_block) { memset(qcoeff, 0, n_coeffs * sizeof(*qcoeff)); memset(dqcoeff, 0, n_coeffs * sizeof(*dqcoeff)); return; } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, &p->eobs[block], scan, iscan); return; } #endif vpx_quantize_b(BLOCK_OFFSET(p->coeff, block), n_coeffs, x->skip_block, p->zbin, p->round, p->quant, p->quant_shift, qcoeff, dqcoeff, pd->dequant, &p->eobs[block], scan, iscan); } static void invert_quant(int16_t *quant, int16_t *shift, int d) { unsigned t; int l, m; t = d; for (l = 0; t > 1; l++) t >>= 1; m = 1 + (1 << (16 + l)) / d; *quant = (int16_t)(m - (1 << 16)); *shift = 1 << (16 - l); } static int get_qzbin_factor(int q, vpx_bit_depth_t bit_depth) { const int quant = vp9_dc_quant(q, 0, bit_depth); #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: return q == 0 ? 64 : (quant < 148 ? 84 : 80); case VPX_BITS_10: return q == 0 ? 64 : (quant < 592 ? 84 : 80); default: assert(bit_depth == VPX_BITS_12); return q == 0 ? 64 : (quant < 2368 ? 84 : 80); } #else (void)bit_depth; return q == 0 ? 64 : (quant < 148 ? 84 : 80); #endif } void vp9_init_quantizer(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; QUANTS *const quants = &cpi->quants; int i, q, quant; for (q = 0; q < QINDEX_RANGE; q++) { int qzbin_factor = get_qzbin_factor(q, cm->bit_depth); int qrounding_factor = q == 0 ? 64 : 48; const int sharpness_adjustment = 16 * (7 - cpi->oxcf.sharpness) / 7; if (cpi->oxcf.sharpness > 0 && q > 0) { qzbin_factor = 64 + sharpness_adjustment; qrounding_factor = 64 - sharpness_adjustment; } for (i = 0; i < 2; ++i) { int qrounding_factor_fp = i == 0 ? 48 : 42; if (q == 0) qrounding_factor_fp = 64; if (cpi->oxcf.sharpness > 0) qrounding_factor_fp = 64 - sharpness_adjustment; // y quant = i == 0 ? vp9_dc_quant(q, cm->y_dc_delta_q, cm->bit_depth) : vp9_ac_quant(q, 0, cm->bit_depth); invert_quant(&quants->y_quant[q][i], &quants->y_quant_shift[q][i], quant); quants->y_quant_fp[q][i] = (1 << 16) / quant; quants->y_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7; quants->y_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); quants->y_round[q][i] = (qrounding_factor * quant) >> 7; cpi->y_dequant[q][i] = quant; // uv quant = i == 0 ? vp9_dc_quant(q, cm->uv_dc_delta_q, cm->bit_depth) : vp9_ac_quant(q, cm->uv_ac_delta_q, cm->bit_depth); invert_quant(&quants->uv_quant[q][i], &quants->uv_quant_shift[q][i], quant); quants->uv_quant_fp[q][i] = (1 << 16) / quant; quants->uv_round_fp[q][i] = (qrounding_factor_fp * quant) >> 7; quants->uv_zbin[q][i] = ROUND_POWER_OF_TWO(qzbin_factor * quant, 7); quants->uv_round[q][i] = (qrounding_factor * quant) >> 7; cpi->uv_dequant[q][i] = quant; } for (i = 2; i < 8; i++) { quants->y_quant[q][i] = quants->y_quant[q][1]; quants->y_quant_fp[q][i] = quants->y_quant_fp[q][1]; quants->y_round_fp[q][i] = quants->y_round_fp[q][1]; quants->y_quant_shift[q][i] = quants->y_quant_shift[q][1]; quants->y_zbin[q][i] = quants->y_zbin[q][1]; quants->y_round[q][i] = quants->y_round[q][1]; cpi->y_dequant[q][i] = cpi->y_dequant[q][1]; quants->uv_quant[q][i] = quants->uv_quant[q][1]; quants->uv_quant_fp[q][i] = quants->uv_quant_fp[q][1]; quants->uv_round_fp[q][i] = quants->uv_round_fp[q][1]; quants->uv_quant_shift[q][i] = quants->uv_quant_shift[q][1]; quants->uv_zbin[q][i] = quants->uv_zbin[q][1]; quants->uv_round[q][i] = quants->uv_round[q][1]; cpi->uv_dequant[q][i] = cpi->uv_dequant[q][1]; } } } void vp9_init_plane_quantizers(VP9_COMP *cpi, MACROBLOCK *x) { const VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; QUANTS *const quants = &cpi->quants; const int segment_id = xd->mi[0]->segment_id; const int qindex = vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex); const int rdmult = vp9_compute_rd_mult(cpi, qindex + cm->y_dc_delta_q); int i; // Y x->plane[0].quant = quants->y_quant[qindex]; x->plane[0].quant_fp = quants->y_quant_fp[qindex]; memcpy(x->plane[0].round_fp, quants->y_round_fp[qindex], 8 * sizeof(*(x->plane[0].round_fp))); x->plane[0].quant_shift = quants->y_quant_shift[qindex]; x->plane[0].zbin = quants->y_zbin[qindex]; x->plane[0].round = quants->y_round[qindex]; xd->plane[0].dequant = cpi->y_dequant[qindex]; x->plane[0].quant_thred[0] = x->plane[0].zbin[0] * x->plane[0].zbin[0]; x->plane[0].quant_thred[1] = x->plane[0].zbin[1] * x->plane[0].zbin[1]; // UV for (i = 1; i < 3; i++) { x->plane[i].quant = quants->uv_quant[qindex]; x->plane[i].quant_fp = quants->uv_quant_fp[qindex]; memcpy(x->plane[i].round_fp, quants->uv_round_fp[qindex], 8 * sizeof(*(x->plane[i].round_fp))); x->plane[i].quant_shift = quants->uv_quant_shift[qindex]; x->plane[i].zbin = quants->uv_zbin[qindex]; x->plane[i].round = quants->uv_round[qindex]; xd->plane[i].dequant = cpi->uv_dequant[qindex]; x->plane[i].quant_thred[0] = x->plane[i].zbin[0] * x->plane[i].zbin[0]; x->plane[i].quant_thred[1] = x->plane[i].zbin[1] * x->plane[i].zbin[1]; } x->skip_block = segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP); x->q_index = qindex; set_error_per_bit(x, rdmult); vp9_initialize_me_consts(cpi, x, x->q_index); } void vp9_frame_init_quantizer(VP9_COMP *cpi) { vp9_init_plane_quantizers(cpi, &cpi->td.mb); } void vp9_set_quantizer(VP9_COMMON *cm, int q) { // quantizer has to be reinitialized with vp9_init_quantizer() if any // delta_q changes. cm->base_qindex = q; cm->y_dc_delta_q = 0; cm->uv_dc_delta_q = 0; cm->uv_ac_delta_q = 0; } // Table that converts 0-63 Q-range values passed in outside to the Qindex // range used internally. static const int quantizer_to_qindex[] = { 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92, 96, 100, 104, 108, 112, 116, 120, 124, 128, 132, 136, 140, 144, 148, 152, 156, 160, 164, 168, 172, 176, 180, 184, 188, 192, 196, 200, 204, 208, 212, 216, 220, 224, 228, 232, 236, 240, 244, 249, 255, }; int vp9_quantizer_to_qindex(int quantizer) { return quantizer_to_qindex[quantizer]; } int vp9_qindex_to_quantizer(int qindex) { int quantizer; for (quantizer = 0; quantizer < 64; ++quantizer) if (quantizer_to_qindex[quantizer] >= qindex) return quantizer; return 63; } libvpx-1.8.2/vp9/encoder/vp9_quantize.h000066400000000000000000000040211357355204000177730ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_QUANTIZE_H_ #define VPX_VP9_ENCODER_VP9_QUANTIZE_H_ #include "./vpx_config.h" #include "vp9/encoder/vp9_block.h" #ifdef __cplusplus extern "C" { #endif typedef struct { DECLARE_ALIGNED(16, int16_t, y_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_quant_shift[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_zbin[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_round[QINDEX_RANGE][8]); // TODO(jingning): in progress of re-working the quantization. will decide // if we want to deprecate the current use of y_quant. DECLARE_ALIGNED(16, int16_t, y_quant_fp[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_quant_fp[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, y_round_fp[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_round_fp[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_quant[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_quant_shift[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_zbin[QINDEX_RANGE][8]); DECLARE_ALIGNED(16, int16_t, uv_round[QINDEX_RANGE][8]); } QUANTS; void vp9_regular_quantize_b_4x4(MACROBLOCK *x, int plane, int block, const int16_t *scan, const int16_t *iscan); struct VP9_COMP; struct VP9Common; void vp9_frame_init_quantizer(struct VP9_COMP *cpi); void vp9_init_plane_quantizers(struct VP9_COMP *cpi, MACROBLOCK *x); void vp9_init_quantizer(struct VP9_COMP *cpi); void vp9_set_quantizer(struct VP9Common *cm, int q); int vp9_quantizer_to_qindex(int quantizer); int vp9_qindex_to_quantizer(int qindex); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_QUANTIZE_H_ libvpx-1.8.2/vp9/encoder/vp9_ratectrl.c000066400000000000000000004005341357355204000177570ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_ratectrl.h" // Max rate per frame for 1080P and below encodes if no level requirement given. // For larger formats limit to MAX_MB_RATE bits per MB // 4Mbits is derived from the level requirement for level 4 (1080P 30) which // requires that HW can sustain a rate of 16Mbits over a 4 frame group. // If a lower level requirement is specified then this may over ride this value. #define MAX_MB_RATE 250 #define MAXRATE_1080P 4000000 #define DEFAULT_KF_BOOST 2000 #define DEFAULT_GF_BOOST 2000 #define LIMIT_QRANGE_FOR_ALTREF_AND_KEY 1 #define MIN_BPB_FACTOR 0.005 #define MAX_BPB_FACTOR 50 #if CONFIG_VP9_HIGHBITDEPTH #define ASSIGN_MINQ_TABLE(bit_depth, name) \ do { \ switch (bit_depth) { \ case VPX_BITS_8: name = name##_8; break; \ case VPX_BITS_10: name = name##_10; break; \ default: \ assert(bit_depth == VPX_BITS_12); \ name = name##_12; \ break; \ } \ } while (0) #else #define ASSIGN_MINQ_TABLE(bit_depth, name) \ do { \ (void)bit_depth; \ name = name##_8; \ } while (0) #endif // Tables relating active max Q to active min Q static int kf_low_motion_minq_8[QINDEX_RANGE]; static int kf_high_motion_minq_8[QINDEX_RANGE]; static int arfgf_low_motion_minq_8[QINDEX_RANGE]; static int arfgf_high_motion_minq_8[QINDEX_RANGE]; static int inter_minq_8[QINDEX_RANGE]; static int rtc_minq_8[QINDEX_RANGE]; #if CONFIG_VP9_HIGHBITDEPTH static int kf_low_motion_minq_10[QINDEX_RANGE]; static int kf_high_motion_minq_10[QINDEX_RANGE]; static int arfgf_low_motion_minq_10[QINDEX_RANGE]; static int arfgf_high_motion_minq_10[QINDEX_RANGE]; static int inter_minq_10[QINDEX_RANGE]; static int rtc_minq_10[QINDEX_RANGE]; static int kf_low_motion_minq_12[QINDEX_RANGE]; static int kf_high_motion_minq_12[QINDEX_RANGE]; static int arfgf_low_motion_minq_12[QINDEX_RANGE]; static int arfgf_high_motion_minq_12[QINDEX_RANGE]; static int inter_minq_12[QINDEX_RANGE]; static int rtc_minq_12[QINDEX_RANGE]; #endif #ifdef AGGRESSIVE_VBR static int gf_high = 2400; static int gf_low = 400; static int kf_high = 4000; static int kf_low = 400; #else static int gf_high = 2000; static int gf_low = 400; static int kf_high = 4800; static int kf_low = 300; #endif // Functions to compute the active minq lookup table entries based on a // formulaic approach to facilitate easier adjustment of the Q tables. // The formulae were derived from computing a 3rd order polynomial best // fit to the original data (after plotting real maxq vs minq (not q index)) static int get_minq_index(double maxq, double x3, double x2, double x1, vpx_bit_depth_t bit_depth) { int i; const double minqtarget = VPXMIN(((x3 * maxq + x2) * maxq + x1) * maxq, maxq); // Special case handling to deal with the step from q2.0 // down to lossless mode represented by q 1.0. if (minqtarget <= 2.0) return 0; for (i = 0; i < QINDEX_RANGE; i++) { if (minqtarget <= vp9_convert_qindex_to_q(i, bit_depth)) return i; } return QINDEX_RANGE - 1; } static void init_minq_luts(int *kf_low_m, int *kf_high_m, int *arfgf_low, int *arfgf_high, int *inter, int *rtc, vpx_bit_depth_t bit_depth) { int i; for (i = 0; i < QINDEX_RANGE; i++) { const double maxq = vp9_convert_qindex_to_q(i, bit_depth); kf_low_m[i] = get_minq_index(maxq, 0.000001, -0.0004, 0.150, bit_depth); kf_high_m[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.45, bit_depth); #ifdef AGGRESSIVE_VBR arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.275, bit_depth); inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.80, bit_depth); #else arfgf_low[i] = get_minq_index(maxq, 0.0000015, -0.0009, 0.30, bit_depth); inter[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth); #endif arfgf_high[i] = get_minq_index(maxq, 0.0000021, -0.00125, 0.55, bit_depth); rtc[i] = get_minq_index(maxq, 0.00000271, -0.00113, 0.70, bit_depth); } } void vp9_rc_init_minq_luts(void) { init_minq_luts(kf_low_motion_minq_8, kf_high_motion_minq_8, arfgf_low_motion_minq_8, arfgf_high_motion_minq_8, inter_minq_8, rtc_minq_8, VPX_BITS_8); #if CONFIG_VP9_HIGHBITDEPTH init_minq_luts(kf_low_motion_minq_10, kf_high_motion_minq_10, arfgf_low_motion_minq_10, arfgf_high_motion_minq_10, inter_minq_10, rtc_minq_10, VPX_BITS_10); init_minq_luts(kf_low_motion_minq_12, kf_high_motion_minq_12, arfgf_low_motion_minq_12, arfgf_high_motion_minq_12, inter_minq_12, rtc_minq_12, VPX_BITS_12); #endif } // These functions use formulaic calculations to make playing with the // quantizer tables easier. If necessary they can be replaced by lookup // tables if and when things settle down in the experimental bitstream double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth) { // Convert the index to a real Q value (scaled down to match old Q values) #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; case VPX_BITS_10: return vp9_ac_quant(qindex, 0, bit_depth) / 16.0; default: assert(bit_depth == VPX_BITS_12); return vp9_ac_quant(qindex, 0, bit_depth) / 64.0; } #else return vp9_ac_quant(qindex, 0, bit_depth) / 4.0; #endif } int vp9_convert_q_to_qindex(double q_val, vpx_bit_depth_t bit_depth) { int i; for (i = 0; i < QINDEX_RANGE; ++i) if (vp9_convert_qindex_to_q(i, bit_depth) >= q_val) break; if (i == QINDEX_RANGE) i--; return i; } int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, double correction_factor, vpx_bit_depth_t bit_depth) { const double q = vp9_convert_qindex_to_q(qindex, bit_depth); int enumerator = frame_type == KEY_FRAME ? 2700000 : 1800000; assert(correction_factor <= MAX_BPB_FACTOR && correction_factor >= MIN_BPB_FACTOR); // q based adjustment to baseline enumerator enumerator += (int)(enumerator * q) >> 12; return (int)(enumerator * correction_factor / q); } int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor, vpx_bit_depth_t bit_depth) { const int bpm = (int)(vp9_rc_bits_per_mb(frame_type, q, correction_factor, bit_depth)); return VPXMAX(FRAME_OVERHEAD_BITS, (int)(((uint64_t)bpm * mbs) >> BPER_MB_NORMBITS)); } int vp9_rc_clamp_pframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; const int min_frame_target = VPXMAX(rc->min_frame_bandwidth, rc->avg_frame_bandwidth >> 5); if (target < min_frame_target) target = min_frame_target; if (cpi->refresh_golden_frame && rc->is_src_frame_alt_ref) { // If there is an active ARF at this location use the minimum // bits on this frame even if it is a constructed arf. // The active maximum quantizer insures that an appropriate // number of bits will be spent if needed for constructed ARFs. target = min_frame_target; } // Clip the frame target to the maximum allowed value. if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; if (oxcf->rc_max_inter_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; target = VPXMIN(target, max_rate); } return target; } int vp9_rc_clamp_iframe_target_size(const VP9_COMP *const cpi, int target) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; if (oxcf->rc_max_intra_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_intra_bitrate_pct / 100; target = VPXMIN(target, max_rate); } if (target > rc->max_frame_bandwidth) target = rc->max_frame_bandwidth; return target; } // TODO(marpan/jianj): bits_off_target and buffer_level are used in the saame // way for CBR mode, for the buffering updates below. Look into removing one // of these (i.e., bits_off_target). // Update the buffer level before encoding with the per-frame-bandwidth, static void update_buffer_level_preencode(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; rc->bits_off_target += rc->avg_frame_bandwidth; // Clip the buffer level to the maximum specified buffer size. rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; } // Update the buffer level before encoding with the per-frame-bandwidth // for SVC. The current and all upper temporal layers are updated, needed // for the layered rate control which involves cumulative buffer levels for // the temporal layers. Allow for using the timestamp(pts) delta for the // framerate when the set_ref_frame_config is used. static void update_buffer_level_svc_preencode(VP9_COMP *cpi) { SVC *const svc = &cpi->svc; int i; // Set this to 1 to use timestamp delta for "framerate" under // ref_frame_config usage. int use_timestamp = 1; const int64_t ts_delta = svc->time_stamp_superframe - svc->time_stamp_prev[svc->spatial_layer_id]; for (i = svc->temporal_layer_id; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; if (use_timestamp && cpi->svc.use_set_ref_frame_config && svc->number_temporal_layers == 1 && ts_delta > 0 && svc->current_superframe > 0) { // TODO(marpan): This may need to be modified for temporal layers. const double framerate_pts = 10000000.0 / ts_delta; lrc->bits_off_target += (int)(lc->target_bandwidth / framerate_pts); } else { lrc->bits_off_target += (int)(lc->target_bandwidth / lc->framerate); } // Clip buffer level to maximum buffer size for the layer. lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = lrc->bits_off_target; if (i == svc->temporal_layer_id) { cpi->rc.bits_off_target = lrc->bits_off_target; cpi->rc.buffer_level = lrc->buffer_level; } } } // Update the buffer level for higher temporal layers, given the encoded current // temporal layer. static void update_layer_buffer_level_postencode(SVC *svc, int encoded_frame_size) { int i = 0; const int current_temporal_layer = svc->temporal_layer_id; for (i = current_temporal_layer + 1; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->bits_off_target -= encoded_frame_size; // Clip buffer level to maximum buffer size for the layer. lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = lrc->bits_off_target; } } // Update the buffer level after encoding with encoded frame size. static void update_buffer_level_postencode(VP9_COMP *cpi, int encoded_frame_size) { RATE_CONTROL *const rc = &cpi->rc; rc->bits_off_target -= encoded_frame_size; // Clip the buffer level to the maximum specified buffer size. rc->bits_off_target = VPXMIN(rc->bits_off_target, rc->maximum_buffer_size); // For screen-content mode, and if frame-dropper is off, don't let buffer // level go below threshold, given here as -rc->maximum_ buffer_size. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.drop_frames_water_mark == 0) rc->bits_off_target = VPXMAX(rc->bits_off_target, -rc->maximum_buffer_size); rc->buffer_level = rc->bits_off_target; if (is_one_pass_cbr_svc(cpi)) { update_layer_buffer_level_postencode(&cpi->svc, encoded_frame_size); } } int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate) { // Assume we do not need any constraint lower than 4K 20 fps static const double factor_safe = 3840 * 2160 * 20.0; const double factor = width * height * framerate; const int default_interval = clamp((int)(framerate * 0.125), MIN_GF_INTERVAL, MAX_GF_INTERVAL); if (factor <= factor_safe) return default_interval; else return VPXMAX(default_interval, (int)(MIN_GF_INTERVAL * factor / factor_safe + 0.5)); // Note this logic makes: // 4K24: 5 // 4K30: 6 // 4K60: 12 } int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval) { int interval = VPXMIN(MAX_GF_INTERVAL, (int)(framerate * 0.75)); interval += (interval & 0x01); // Round to even value return VPXMAX(interval, min_gf_interval); } void vp9_rc_init(const VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc) { int i; if (pass == 0 && oxcf->rc_mode == VPX_CBR) { rc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; rc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; } else { rc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; rc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; } rc->last_q[KEY_FRAME] = oxcf->best_allowed_q; rc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; rc->buffer_level = rc->starting_buffer_level; rc->bits_off_target = rc->starting_buffer_level; rc->rolling_target_bits = rc->avg_frame_bandwidth; rc->rolling_actual_bits = rc->avg_frame_bandwidth; rc->long_rolling_target_bits = rc->avg_frame_bandwidth; rc->long_rolling_actual_bits = rc->avg_frame_bandwidth; rc->total_actual_bits = 0; rc->total_target_bits = 0; rc->total_target_vs_actual = 0; rc->avg_frame_low_motion = 0; rc->count_last_scene_change = 0; rc->af_ratio_onepass_vbr = 10; rc->prev_avg_source_sad_lag = 0; rc->high_source_sad = 0; rc->reset_high_source_sad = 0; rc->high_source_sad_lagindex = -1; rc->high_num_blocks_with_motion = 0; rc->hybrid_intra_scene_change = 0; rc->re_encode_maxq_scene_change = 0; rc->alt_ref_gf_group = 0; rc->last_frame_is_src_altref = 0; rc->fac_active_worst_inter = 150; rc->fac_active_worst_gf = 100; rc->force_qpmin = 0; for (i = 0; i < MAX_LAG_BUFFERS; ++i) rc->avg_source_sad[i] = 0; rc->frames_since_key = 8; // Sensible default for first frame. rc->this_key_frame_forced = 0; rc->next_key_frame_forced = 0; rc->source_alt_ref_pending = 0; rc->source_alt_ref_active = 0; rc->frames_till_gf_update_due = 0; rc->ni_av_qi = oxcf->worst_allowed_q; rc->ni_tot_qi = 0; rc->ni_frames = 0; rc->tot_q = 0.0; rc->avg_q = vp9_convert_qindex_to_q(oxcf->worst_allowed_q, oxcf->bit_depth); for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { rc->rate_correction_factors[i] = 1.0; rc->damped_adjustment[i] = 0; } rc->min_gf_interval = oxcf->min_gf_interval; rc->max_gf_interval = oxcf->max_gf_interval; if (rc->min_gf_interval == 0) rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( oxcf->width, oxcf->height, oxcf->init_framerate); if (rc->max_gf_interval == 0) rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( oxcf->init_framerate, rc->min_gf_interval); rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; rc->force_max_q = 0; rc->last_post_encode_dropped_scene_change = 0; rc->use_post_encode_drop = 0; rc->ext_use_post_encode_drop = 0; rc->arf_active_best_quality_adjustment_factor = 1.0; rc->arf_increase_active_best_quality = 0; rc->preserve_arf_as_gld = 0; rc->preserve_next_arf_as_gld = 0; rc->show_arf_as_gld = 0; } static int check_buffer_above_thresh(VP9_COMP *cpi, int drop_mark) { SVC *svc = &cpi->svc; if (!cpi->use_svc || cpi->svc.framedrop_mode != FULL_SUPERFRAME_DROP) { RATE_CONTROL *const rc = &cpi->rc; return (rc->buffer_level > drop_mark); } else { int i; // For SVC in the FULL_SUPERFRAME_DROP): the condition on // buffer (if its above threshold, so no drop) is checked on current and // upper spatial layers. If any spatial layer is not above threshold then // we return 0. for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; // Exclude check for layer whose bitrate is 0. if (lc->target_bandwidth > 0) { const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100); if (!(lrc->buffer_level > drop_mark_layer)) return 0; } } return 1; } } static int check_buffer_below_thresh(VP9_COMP *cpi, int drop_mark) { SVC *svc = &cpi->svc; if (!cpi->use_svc || cpi->svc.framedrop_mode == LAYER_DROP) { RATE_CONTROL *const rc = &cpi->rc; return (rc->buffer_level <= drop_mark); } else { int i; // For SVC in the constrained framedrop mode (svc->framedrop_mode = // CONSTRAINED_LAYER_DROP or FULL_SUPERFRAME_DROP): the condition on // buffer (if its below threshold, so drop frame) is checked on current // and upper spatial layers. For FULL_SUPERFRAME_DROP mode if any // spatial layer is <= threshold, then we return 1 (drop). for (i = svc->spatial_layer_id; i < svc->number_spatial_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; // Exclude check for layer whose bitrate is 0. if (lc->target_bandwidth > 0) { const int drop_mark_layer = (int)(cpi->svc.framedrop_thresh[i] * lrc->optimal_buffer_level / 100); if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) { if (lrc->buffer_level <= drop_mark_layer) return 1; } else { if (!(lrc->buffer_level <= drop_mark_layer)) return 0; } } } if (cpi->svc.framedrop_mode == FULL_SUPERFRAME_DROP) return 0; else return 1; } } int vp9_test_drop(VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; SVC *svc = &cpi->svc; int drop_frames_water_mark = oxcf->drop_frames_water_mark; if (cpi->use_svc) { // If we have dropped max_consec_drop frames, then we don't // drop this spatial layer, and reset counter to 0. if (svc->drop_count[svc->spatial_layer_id] == svc->max_consec_drop) { svc->drop_count[svc->spatial_layer_id] = 0; return 0; } else { drop_frames_water_mark = svc->framedrop_thresh[svc->spatial_layer_id]; } } if (!drop_frames_water_mark || (svc->spatial_layer_id > 0 && svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { return 0; } else { if ((rc->buffer_level < 0 && svc->framedrop_mode != FULL_SUPERFRAME_DROP) || (check_buffer_below_thresh(cpi, -1) && svc->framedrop_mode == FULL_SUPERFRAME_DROP)) { // Always drop if buffer is below 0. return 1; } else { // If buffer is below drop_mark, for now just drop every other frame // (starting with the next frame) until it increases back over drop_mark. int drop_mark = (int)(drop_frames_water_mark * rc->optimal_buffer_level / 100); if (check_buffer_above_thresh(cpi, drop_mark) && (rc->decimation_factor > 0)) { --rc->decimation_factor; } else if (check_buffer_below_thresh(cpi, drop_mark) && rc->decimation_factor == 0) { rc->decimation_factor = 1; } if (rc->decimation_factor > 0) { if (rc->decimation_count > 0) { --rc->decimation_count; return 1; } else { rc->decimation_count = rc->decimation_factor; return 0; } } else { rc->decimation_count = 0; return 0; } } } } int post_encode_drop_cbr(VP9_COMP *cpi, size_t *size) { size_t frame_size = *size << 3; int64_t new_buffer_level = cpi->rc.buffer_level + cpi->rc.avg_frame_bandwidth - (int64_t)frame_size; // For now we drop if new buffer level (given the encoded frame size) goes // below 0. if (new_buffer_level < 0) { *size = 0; vp9_rc_postencode_update_drop_frame(cpi); // Update flag to use for next frame. if (cpi->rc.high_source_sad || (cpi->use_svc && cpi->svc.high_source_sad_superframe)) cpi->rc.last_post_encode_dropped_scene_change = 1; // Force max_q on next fame. cpi->rc.force_max_q = 1; cpi->rc.avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality; cpi->last_frame_dropped = 1; cpi->ext_refresh_frame_flags_pending = 0; if (cpi->use_svc) { SVC *svc = &cpi->svc; int sl = 0; int tl = 0; svc->last_layer_dropped[svc->spatial_layer_id] = 1; svc->drop_spatial_layer[svc->spatial_layer_id] = 1; svc->drop_count[svc->spatial_layer_id]++; svc->skip_enhancement_layer = 1; // Postencode drop is only checked on base spatial layer, // for now if max-q is set on base we force it on all layers. for (sl = 0; sl < svc->number_spatial_layers; ++sl) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { const int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->force_max_q = 1; lrc->avg_frame_qindex[INTER_FRAME] = cpi->rc.worst_quality; } } } return 1; } cpi->rc.force_max_q = 0; cpi->rc.last_post_encode_dropped_scene_change = 0; return 0; } int vp9_rc_drop_frame(VP9_COMP *cpi) { SVC *svc = &cpi->svc; int svc_prev_layer_dropped = 0; // In the constrained or full_superframe framedrop mode for svc // (framedrop_mode != (LAYER_DROP && CONSTRAINED_FROM_ABOVE)), // if the previous spatial layer was dropped, drop the current spatial layer. if (cpi->use_svc && svc->spatial_layer_id > 0 && svc->drop_spatial_layer[svc->spatial_layer_id - 1]) svc_prev_layer_dropped = 1; if ((svc_prev_layer_dropped && svc->framedrop_mode != LAYER_DROP && svc->framedrop_mode != CONSTRAINED_FROM_ABOVE_DROP) || svc->force_drop_constrained_from_above[svc->spatial_layer_id] || vp9_test_drop(cpi)) { vp9_rc_postencode_update_drop_frame(cpi); cpi->ext_refresh_frame_flags_pending = 0; cpi->last_frame_dropped = 1; if (cpi->use_svc) { svc->last_layer_dropped[svc->spatial_layer_id] = 1; svc->drop_spatial_layer[svc->spatial_layer_id] = 1; svc->drop_count[svc->spatial_layer_id]++; svc->skip_enhancement_layer = 1; if (svc->framedrop_mode == LAYER_DROP || (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP && svc->force_drop_constrained_from_above[svc->number_spatial_layers - 1] == 0) || svc->drop_spatial_layer[0] == 0) { // For the case of constrained drop mode where full superframe is // dropped, we don't increment the svc frame counters. // In particular temporal layer counter (which is incremented in // vp9_inc_frame_in_layer()) won't be incremented, so on a dropped // frame we try the same temporal_layer_id on next incoming frame. // This is to avoid an issue with temporal alignement with full // superframe dropping. vp9_inc_frame_in_layer(cpi); } if (svc->spatial_layer_id == svc->number_spatial_layers - 1) { int i; int all_layers_drop = 1; for (i = 0; i < svc->spatial_layer_id; i++) { if (svc->drop_spatial_layer[i] == 0) { all_layers_drop = 0; break; } } if (all_layers_drop == 1) svc->skip_enhancement_layer = 0; } } return 1; } return 0; } static int adjust_q_cbr(const VP9_COMP *cpi, int q) { // This makes sure q is between oscillating Qs to prevent resonance. if (!cpi->rc.reset_high_source_sad && (!cpi->oxcf.gf_cbr_boost_pct || !(cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame)) && (cpi->rc.rc_1_frame * cpi->rc.rc_2_frame == -1) && cpi->rc.q_1_frame != cpi->rc.q_2_frame) { int qclamp = clamp(q, VPXMIN(cpi->rc.q_1_frame, cpi->rc.q_2_frame), VPXMAX(cpi->rc.q_1_frame, cpi->rc.q_2_frame)); // If the previous frame had overshoot and the current q needs to increase // above the clamped value, reduce the clamp for faster reaction to // overshoot. if (cpi->rc.rc_1_frame == -1 && q > qclamp) q = (q + qclamp) >> 1; else q = qclamp; } if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) vp9_cyclic_refresh_limit_q(cpi, &q); return VPXMAX(VPXMIN(q, cpi->rc.worst_quality), cpi->rc.best_quality); } static double get_rate_correction_factor(const VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; const VP9_COMMON *const cm = &cpi->common; double rcf; if (frame_is_intra_only(cm)) { rcf = rc->rate_correction_factors[KF_STD]; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; rcf = rc->rate_correction_factors[rf_lvl]; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && !rc->is_src_frame_alt_ref && !cpi->use_svc && (cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 100)) rcf = rc->rate_correction_factors[GF_ARF_STD]; else rcf = rc->rate_correction_factors[INTER_NORMAL]; } rcf *= rcf_mult[rc->frame_size_selector]; return fclamp(rcf, MIN_BPB_FACTOR, MAX_BPB_FACTOR); } static void set_rate_correction_factor(VP9_COMP *cpi, double factor) { RATE_CONTROL *const rc = &cpi->rc; const VP9_COMMON *const cm = &cpi->common; // Normalize RCF to account for the size-dependent scaling factor. factor /= rcf_mult[cpi->rc.frame_size_selector]; factor = fclamp(factor, MIN_BPB_FACTOR, MAX_BPB_FACTOR); if (frame_is_intra_only(cm)) { rc->rate_correction_factors[KF_STD] = factor; } else if (cpi->oxcf.pass == 2) { RATE_FACTOR_LEVEL rf_lvl = cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; rc->rate_correction_factors[rf_lvl] = factor; } else { if ((cpi->refresh_alt_ref_frame || cpi->refresh_golden_frame) && !rc->is_src_frame_alt_ref && !cpi->use_svc && (cpi->oxcf.rc_mode != VPX_CBR || cpi->oxcf.gf_cbr_boost_pct > 100)) rc->rate_correction_factors[GF_ARF_STD] = factor; else rc->rate_correction_factors[INTER_NORMAL] = factor; } } void vp9_rc_update_rate_correction_factors(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; int correction_factor = 100; double rate_correction_factor = get_rate_correction_factor(cpi); double adjustment_limit; RATE_FACTOR_LEVEL rf_lvl = cpi->twopass.gf_group.rf_level[cpi->twopass.gf_group.index]; int projected_size_based_on_q = 0; // Do not update the rate factors for arf overlay frames. if (cpi->rc.is_src_frame_alt_ref) return; // Clear down mmx registers to allow floating point in what follows vpx_clear_system_state(); // Work out how big we would have expected the frame to be at this Q given // the current correction factor. // Stay in double to avoid int overflow when values are large if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->common.seg.enabled) { projected_size_based_on_q = vp9_cyclic_refresh_estimate_bits_at_q(cpi, rate_correction_factor); } else { FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; projected_size_based_on_q = vp9_estimate_bits_at_q(frame_type, cm->base_qindex, cm->MBs, rate_correction_factor, cm->bit_depth); } // Work out a size correction factor. if (projected_size_based_on_q > FRAME_OVERHEAD_BITS) correction_factor = (int)((100 * (int64_t)cpi->rc.projected_frame_size) / projected_size_based_on_q); // Do not use damped adjustment for the first frame of each frame type if (!cpi->rc.damped_adjustment[rf_lvl]) { adjustment_limit = 1.0; cpi->rc.damped_adjustment[rf_lvl] = 1; } else { // More heavily damped adjustment used if we have been oscillating either // side of target. adjustment_limit = 0.25 + 0.5 * VPXMIN(1, fabs(log10(0.01 * correction_factor))); } cpi->rc.q_2_frame = cpi->rc.q_1_frame; cpi->rc.q_1_frame = cm->base_qindex; cpi->rc.rc_2_frame = cpi->rc.rc_1_frame; if (correction_factor > 110) cpi->rc.rc_1_frame = -1; else if (correction_factor < 90) cpi->rc.rc_1_frame = 1; else cpi->rc.rc_1_frame = 0; // Turn off oscilation detection in the case of massive overshoot. if (cpi->rc.rc_1_frame == -1 && cpi->rc.rc_2_frame == 1 && correction_factor > 1000) { cpi->rc.rc_2_frame = 0; } if (correction_factor > 102) { // We are not already at the worst allowable quality correction_factor = (int)(100 + ((correction_factor - 100) * adjustment_limit)); rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; } else if (correction_factor < 99) { // We are not already at the best allowable quality correction_factor = (int)(100 - ((100 - correction_factor) * adjustment_limit)); rate_correction_factor = (rate_correction_factor * correction_factor) / 100; // Keep rate_correction_factor within limits if (rate_correction_factor < MIN_BPB_FACTOR) rate_correction_factor = MIN_BPB_FACTOR; } set_rate_correction_factor(cpi, rate_correction_factor); } int vp9_rc_regulate_q(const VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality) { const VP9_COMMON *const cm = &cpi->common; CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; int q = active_worst_quality; int last_error = INT_MAX; int i, target_bits_per_mb, bits_per_mb_at_this_q; const double correction_factor = get_rate_correction_factor(cpi); // Calculate required scaling factor based on target frame size and size of // frame produced using previous Q. target_bits_per_mb = (int)(((uint64_t)target_bits_per_frame << BPER_MB_NORMBITS) / cm->MBs); i = active_best_quality; do { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cm->seg.enabled && cr->apply_cyclic_refresh && (!cpi->oxcf.gf_cbr_boost_pct || !cpi->refresh_golden_frame)) { bits_per_mb_at_this_q = (int)vp9_cyclic_refresh_rc_bits_per_mb(cpi, i, correction_factor); } else { FRAME_TYPE frame_type = cm->intra_only ? KEY_FRAME : cm->frame_type; bits_per_mb_at_this_q = (int)vp9_rc_bits_per_mb( frame_type, i, correction_factor, cm->bit_depth); } if (bits_per_mb_at_this_q <= target_bits_per_mb) { if ((target_bits_per_mb - bits_per_mb_at_this_q) <= last_error) q = i; else q = i - 1; break; } else { last_error = bits_per_mb_at_this_q - target_bits_per_mb; } } while (++i <= active_worst_quality); // Adjustment to q for CBR mode. if (cpi->oxcf.rc_mode == VPX_CBR) return adjust_q_cbr(cpi, q); return q; } static int get_active_quality(int q, int gfu_boost, int low, int high, int *low_motion_minq, int *high_motion_minq) { if (gfu_boost > high) { return low_motion_minq[q]; } else if (gfu_boost < low) { return high_motion_minq[q]; } else { const int gap = high - low; const int offset = high - gfu_boost; const int qdiff = high_motion_minq[q] - low_motion_minq[q]; const int adjustment = ((offset * qdiff) + (gap >> 1)) / gap; return low_motion_minq[q] + adjustment; } } static int get_kf_active_quality(const RATE_CONTROL *const rc, int q, vpx_bit_depth_t bit_depth) { int *kf_low_motion_minq; int *kf_high_motion_minq; ASSIGN_MINQ_TABLE(bit_depth, kf_low_motion_minq); ASSIGN_MINQ_TABLE(bit_depth, kf_high_motion_minq); return get_active_quality(q, rc->kf_boost, kf_low, kf_high, kf_low_motion_minq, kf_high_motion_minq); } static int get_gf_active_quality(const VP9_COMP *const cpi, int q, vpx_bit_depth_t bit_depth) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const RATE_CONTROL *const rc = &cpi->rc; int *arfgf_low_motion_minq; int *arfgf_high_motion_minq; const int gfu_boost = cpi->multi_layer_arf ? gf_group->gfu_boost[gf_group->index] : rc->gfu_boost; ASSIGN_MINQ_TABLE(bit_depth, arfgf_low_motion_minq); ASSIGN_MINQ_TABLE(bit_depth, arfgf_high_motion_minq); return get_active_quality(q, gfu_boost, gf_low, gf_high, arfgf_low_motion_minq, arfgf_high_motion_minq); } static int calc_active_worst_quality_one_pass_vbr(const VP9_COMP *cpi) { const RATE_CONTROL *const rc = &cpi->rc; const unsigned int curr_frame = cpi->common.current_video_frame; int active_worst_quality; if (cpi->common.frame_type == KEY_FRAME) { active_worst_quality = curr_frame == 0 ? rc->worst_quality : rc->last_q[KEY_FRAME] << 1; } else { if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] * 5 >> 2 : rc->last_q[INTER_FRAME] * rc->fac_active_worst_gf / 100; } else { active_worst_quality = curr_frame == 1 ? rc->last_q[KEY_FRAME] << 1 : rc->avg_frame_qindex[INTER_FRAME] * rc->fac_active_worst_inter / 100; } } return VPXMIN(active_worst_quality, rc->worst_quality); } // Adjust active_worst_quality level based on buffer level. static int calc_active_worst_quality_one_pass_cbr(const VP9_COMP *cpi) { // Adjust active_worst_quality: If buffer is above the optimal/target level, // bring active_worst_quality down depending on fullness of buffer. // If buffer is below the optimal level, let the active_worst_quality go from // ambient Q (at buffer = optimal level) to worst_quality level // (at buffer = critical level). const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *rc = &cpi->rc; // Buffer level below which we push active_worst to worst_quality. int64_t critical_level = rc->optimal_buffer_level >> 3; int64_t buff_lvl_step = 0; int adjustment = 0; int active_worst_quality; int ambient_qp; unsigned int num_frames_weight_key = 5 * cpi->svc.number_temporal_layers; if (frame_is_intra_only(cm) || rc->reset_high_source_sad || rc->force_max_q) return rc->worst_quality; // For ambient_qp we use minimum of avg_frame_qindex[KEY_FRAME/INTER_FRAME] // for the first few frames following key frame. These are both initialized // to worst_quality and updated with (3/4, 1/4) average in postencode_update. // So for first few frames following key, the qp of that key frame is weighted // into the active_worst_quality setting. ambient_qp = (cm->current_video_frame < num_frames_weight_key) ? VPXMIN(rc->avg_frame_qindex[INTER_FRAME], rc->avg_frame_qindex[KEY_FRAME]) : rc->avg_frame_qindex[INTER_FRAME]; active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 5) >> 2); // For SVC if the current base spatial layer was key frame, use the QP from // that base layer for ambient_qp. if (cpi->use_svc && cpi->svc.spatial_layer_id > 0) { int layer = LAYER_IDS_TO_IDX(0, cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); const LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; if (lc->is_key_frame) { const RATE_CONTROL *lrc = &lc->rc; ambient_qp = VPXMIN(ambient_qp, lrc->last_q[KEY_FRAME]); active_worst_quality = VPXMIN(rc->worst_quality, (ambient_qp * 9) >> 3); } } if (rc->buffer_level > rc->optimal_buffer_level) { // Adjust down. // Maximum limit for down adjustment ~30%; make it lower for screen content. int max_adjustment_down = active_worst_quality / 3; if (cpi->oxcf.content == VP9E_CONTENT_SCREEN) max_adjustment_down = active_worst_quality >> 3; if (max_adjustment_down) { buff_lvl_step = ((rc->maximum_buffer_size - rc->optimal_buffer_level) / max_adjustment_down); if (buff_lvl_step) adjustment = (int)((rc->buffer_level - rc->optimal_buffer_level) / buff_lvl_step); active_worst_quality -= adjustment; } } else if (rc->buffer_level > critical_level) { // Adjust up from ambient Q. if (critical_level) { buff_lvl_step = (rc->optimal_buffer_level - critical_level); if (buff_lvl_step) { adjustment = (int)((rc->worst_quality - ambient_qp) * (rc->optimal_buffer_level - rc->buffer_level) / buff_lvl_step); } active_worst_quality = ambient_qp + adjustment; } } else { // Set to worst_quality if buffer is below critical level. active_worst_quality = rc->worst_quality; } return active_worst_quality; } static int rc_pick_q_and_bounds_one_pass_cbr(const VP9_COMP *cpi, int *bottom_index, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; int active_best_quality; int active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi); int q; int *rtc_minq; ASSIGN_MINQ_TABLE(cm->bit_depth, rtc_minq); if (frame_is_intra_only(cm)) { active_best_quality = rc->best_quality; // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. if (rc->this_key_frame_forced) { int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); int delta_qindex = vp9_compute_qdelta( rc, last_boosted_q, (last_boosted_q * 0.75), cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else if (cm->current_video_frame > 0) { // not first frame of one pass and kf_boost is set double q_adj_factor = 1.0; double q_val; active_best_quality = get_kf_active_quality( rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth); // Allow somewhat lower kf minq with small image formats. if ((cm->width * cm->height) <= (352 * 288)) { q_adj_factor -= 0.25; } // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth); active_best_quality += vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); } } else if (!rc->is_src_frame_alt_ref && !cpi->use_svc && cpi->oxcf.gf_cbr_boost_pct && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. if (rc->frames_since_key > 1 && rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { q = rc->avg_frame_qindex[INTER_FRAME]; } else { q = active_worst_quality; } active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); } else { // Use the lower of active_worst_quality and recent/average Q. if (cm->current_video_frame > 1) { if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) active_best_quality = rtc_minq[rc->avg_frame_qindex[INTER_FRAME]]; else active_best_quality = rtc_minq[active_worst_quality]; } else { if (rc->avg_frame_qindex[KEY_FRAME] < active_worst_quality) active_best_quality = rtc_minq[rc->avg_frame_qindex[KEY_FRAME]]; else active_best_quality = rtc_minq[active_worst_quality]; } } // Clip the active best and worst quality values to limits active_best_quality = clamp(active_best_quality, rc->best_quality, rc->worst_quality); active_worst_quality = clamp(active_worst_quality, active_best_quality, rc->worst_quality); *top_index = active_worst_quality; *bottom_index = active_best_quality; // Special case code to try and match quality with forced key frames if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; } } assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && *bottom_index >= rc->best_quality); assert(q <= rc->worst_quality && q >= rc->best_quality); return q; } static int get_active_cq_level_one_pass(const RATE_CONTROL *rc, const VP9EncoderConfig *const oxcf) { static const double cq_adjust_threshold = 0.1; int active_cq_level = oxcf->cq_level; if (oxcf->rc_mode == VPX_CQ && rc->total_target_bits > 0) { const double x = (double)rc->total_actual_bits / rc->total_target_bits; if (x < cq_adjust_threshold) { active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold); } } return active_cq_level; } #define SMOOTH_PCT_MIN 0.1 #define SMOOTH_PCT_DIV 0.05 static int get_active_cq_level_two_pass(const TWO_PASS *twopass, const RATE_CONTROL *rc, const VP9EncoderConfig *const oxcf) { static const double cq_adjust_threshold = 0.1; int active_cq_level = oxcf->cq_level; if (oxcf->rc_mode == VPX_CQ) { if (twopass->mb_smooth_pct > SMOOTH_PCT_MIN) { active_cq_level -= (int)((twopass->mb_smooth_pct - SMOOTH_PCT_MIN) / SMOOTH_PCT_DIV); active_cq_level = VPXMAX(active_cq_level, 0); } if (rc->total_target_bits > 0) { const double x = (double)rc->total_actual_bits / rc->total_target_bits; if (x < cq_adjust_threshold) { active_cq_level = (int)(active_cq_level * x / cq_adjust_threshold); } } } return active_cq_level; } static int rc_pick_q_and_bounds_one_pass_vbr(const VP9_COMP *cpi, int *bottom_index, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int cq_level = get_active_cq_level_one_pass(rc, oxcf); int active_best_quality; int active_worst_quality = calc_active_worst_quality_one_pass_vbr(cpi); int q; int *inter_minq; ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); if (frame_is_intra_only(cm)) { if (oxcf->rc_mode == VPX_Q) { int qindex = cq_level; double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); int delta_qindex = vp9_compute_qdelta(rc, q, q * 0.25, cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else if (rc->this_key_frame_forced) { // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. int qindex = rc->last_boosted_qindex; double last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); int delta_qindex = vp9_compute_qdelta( rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { // not first frame of one pass and kf_boost is set double q_adj_factor = 1.0; double q_val; active_best_quality = get_kf_active_quality( rc, rc->avg_frame_qindex[KEY_FRAME], cm->bit_depth); // Allow somewhat lower kf minq with small image formats. if ((cm->width * cm->height) <= (352 * 288)) { q_adj_factor -= 0.25; } // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth); active_best_quality += vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); } } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. if (rc->frames_since_key > 1) { if (rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { q = rc->avg_frame_qindex[INTER_FRAME]; } else { q = active_worst_quality; } } else { q = rc->avg_frame_qindex[KEY_FRAME]; } // For constrained quality dont allow Q less than the cq level if (oxcf->rc_mode == VPX_CQ) { if (q < cq_level) q = cq_level; active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); // Constrained quality use slightly lower active best. active_best_quality = active_best_quality * 15 / 16; } else if (oxcf->rc_mode == VPX_Q) { int qindex = cq_level; double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); int delta_qindex; if (cpi->refresh_alt_ref_frame) delta_qindex = vp9_compute_qdelta(rc, q, q * 0.40, cm->bit_depth); else delta_qindex = vp9_compute_qdelta(rc, q, q * 0.50, cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); } } else { if (oxcf->rc_mode == VPX_Q) { int qindex = cq_level; double q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); double delta_rate[FIXED_GF_INTERVAL] = { 0.50, 1.0, 0.85, 1.0, 0.70, 1.0, 0.85, 1.0 }; int delta_qindex = vp9_compute_qdelta( rc, q, q * delta_rate[cm->current_video_frame % FIXED_GF_INTERVAL], cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } else { // Use the min of the average Q and active_worst_quality as basis for // active_best. if (cm->current_video_frame > 1) { q = VPXMIN(rc->avg_frame_qindex[INTER_FRAME], active_worst_quality); active_best_quality = inter_minq[q]; } else { active_best_quality = inter_minq[rc->avg_frame_qindex[KEY_FRAME]]; } // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) { active_best_quality = cq_level; } } } // Clip the active best and worst quality values to limits active_best_quality = clamp(active_best_quality, rc->best_quality, rc->worst_quality); active_worst_quality = clamp(active_worst_quality, active_best_quality, rc->worst_quality); *top_index = active_worst_quality; *bottom_index = active_best_quality; #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY { int qdelta = 0; vpx_clear_system_state(); // Limit Q range for the adaptive loop. if (cm->frame_type == KEY_FRAME && !rc->this_key_frame_forced && !(cm->current_video_frame == 0)) { qdelta = vp9_compute_qdelta_by_rate( &cpi->rc, cm->frame_type, active_worst_quality, 2.0, cm->bit_depth); } else if (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) { qdelta = vp9_compute_qdelta_by_rate( &cpi->rc, cm->frame_type, active_worst_quality, 1.75, cm->bit_depth); } if (rc->high_source_sad && cpi->sf.use_altref_onepass) qdelta = 0; *top_index = active_worst_quality + qdelta; *top_index = (*top_index > *bottom_index) ? *top_index : *bottom_index; } #endif if (oxcf->rc_mode == VPX_Q) { q = active_best_quality; // Special case code to try and match quality with forced key frames } else if ((cm->frame_type == KEY_FRAME) && rc->this_key_frame_forced) { q = rc->last_boosted_qindex; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, active_worst_quality); if (q > *top_index) { // Special case when we are targeting the max allowed rate if (rc->this_frame_target >= rc->max_frame_bandwidth) *top_index = q; else q = *top_index; } } assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && *bottom_index >= rc->best_quality); assert(q <= rc->worst_quality && q >= rc->best_quality); return q; } int vp9_frame_type_qdelta(const VP9_COMP *cpi, int rf_level, int q) { static const double rate_factor_deltas[RATE_FACTOR_LEVELS] = { 1.00, // INTER_NORMAL 1.00, // INTER_HIGH 1.50, // GF_ARF_LOW 1.75, // GF_ARF_STD 2.00, // KF_STD }; const VP9_COMMON *const cm = &cpi->common; int qdelta = vp9_compute_qdelta_by_rate( &cpi->rc, cm->frame_type, q, rate_factor_deltas[rf_level], cm->bit_depth); return qdelta; } #define STATIC_MOTION_THRESH 95 static void pick_kf_q_bound_two_pass(const VP9_COMP *cpi, int *bottom_index, int *top_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; int active_best_quality; int active_worst_quality = cpi->twopass.active_worst_quality; if (rc->this_key_frame_forced) { // Handle the special case for key frames forced when we have reached // the maximum key frame interval. Here force the Q to a range // based on the ambient Q to reduce the risk of popping. double last_boosted_q; int delta_qindex; int qindex; if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { qindex = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); active_best_quality = qindex; last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, last_boosted_q * 1.25, cm->bit_depth); active_worst_quality = VPXMIN(qindex + delta_qindex, active_worst_quality); } else { qindex = rc->last_boosted_qindex; last_boosted_q = vp9_convert_qindex_to_q(qindex, cm->bit_depth); delta_qindex = vp9_compute_qdelta(rc, last_boosted_q, last_boosted_q * 0.75, cm->bit_depth); active_best_quality = VPXMAX(qindex + delta_qindex, rc->best_quality); } } else { // Not forced keyframe. double q_adj_factor = 1.0; double q_val; // Baseline value derived from cpi->active_worst_quality and kf boost. active_best_quality = get_kf_active_quality(rc, active_worst_quality, cm->bit_depth); if (cpi->twopass.kf_zeromotion_pct >= STATIC_KF_GROUP_THRESH) { active_best_quality /= 4; } // Dont allow the active min to be lossless (q0) unlesss the max q // already indicates lossless. active_best_quality = VPXMIN(active_worst_quality, VPXMAX(1, active_best_quality)); // Allow somewhat lower kf minq with small image formats. if ((cm->width * cm->height) <= (352 * 288)) { q_adj_factor -= 0.25; } // Make a further adjustment based on the kf zero motion measure. q_adj_factor += 0.05 - (0.001 * (double)cpi->twopass.kf_zeromotion_pct); // Convert the adjustment factor to a qindex delta // on active_best_quality. q_val = vp9_convert_qindex_to_q(active_best_quality, cm->bit_depth); active_best_quality += vp9_compute_qdelta(rc, q_val, q_val * q_adj_factor, cm->bit_depth); } *top_index = active_worst_quality; *bottom_index = active_best_quality; } static int rc_constant_q(const VP9_COMP *cpi, int *bottom_index, int *top_index, int gf_group_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const GF_GROUP *gf_group = &cpi->twopass.gf_group; const int is_intra_frame = frame_is_intra_only(cm); const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf); int q = cq_level; int active_best_quality = cq_level; int active_worst_quality = cq_level; // Key frame qp decision if (is_intra_frame && rc->frames_to_key > 1) pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality); // ARF / GF qp decision if (!is_intra_frame && !rc->is_src_frame_alt_ref && cpi->refresh_alt_ref_frame) { active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { const int layer_depth = gf_group->layer_depth[gf_group_index]; // linearly fit the frame q depending on the layer depth index from // the base layer ARF. active_best_quality = ((layer_depth - 1) * cq_level + active_best_quality + layer_depth / 2) / layer_depth; } } q = active_best_quality; *top_index = active_worst_quality; *bottom_index = active_best_quality; return q; } static int rc_pick_q_and_bounds_two_pass(const VP9_COMP *cpi, int *bottom_index, int *top_index, int gf_group_index) { const VP9_COMMON *const cm = &cpi->common; const RATE_CONTROL *const rc = &cpi->rc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const GF_GROUP *gf_group = &cpi->twopass.gf_group; const int cq_level = get_active_cq_level_two_pass(&cpi->twopass, rc, oxcf); int active_best_quality; int active_worst_quality = cpi->twopass.active_worst_quality; int q; int *inter_minq; int arf_active_best_quality_hl; int *arfgf_high_motion_minq, *arfgf_low_motion_minq; const int boost_frame = !rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame); ASSIGN_MINQ_TABLE(cm->bit_depth, inter_minq); if (oxcf->rc_mode == VPX_Q) return rc_constant_q(cpi, bottom_index, top_index, gf_group_index); if (frame_is_intra_only(cm)) { pick_kf_q_bound_two_pass(cpi, &active_best_quality, &active_worst_quality); } else if (boost_frame) { // Use the lower of active_worst_quality and recent // average Q as basis for GF/ARF best Q limit unless last frame was // a key frame. if (rc->frames_since_key > 1 && rc->avg_frame_qindex[INTER_FRAME] < active_worst_quality) { q = rc->avg_frame_qindex[INTER_FRAME]; } else { q = active_worst_quality; } // For constrained quality dont allow Q less than the cq level if (oxcf->rc_mode == VPX_CQ) { if (q < cq_level) q = cq_level; } active_best_quality = get_gf_active_quality(cpi, q, cm->bit_depth); arf_active_best_quality_hl = active_best_quality; if (rc->arf_increase_active_best_quality == 1) { ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_high_motion_minq); arf_active_best_quality_hl = arfgf_high_motion_minq[q]; } else if (rc->arf_increase_active_best_quality == -1) { ASSIGN_MINQ_TABLE(cm->bit_depth, arfgf_low_motion_minq); arf_active_best_quality_hl = arfgf_low_motion_minq[q]; } active_best_quality = (int)((double)active_best_quality * rc->arf_active_best_quality_adjustment_factor + (double)arf_active_best_quality_hl * (1.0 - rc->arf_active_best_quality_adjustment_factor)); // Modify best quality for second level arfs. For mode VPX_Q this // becomes the baseline frame q. if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { const int layer_depth = gf_group->layer_depth[gf_group_index]; // linearly fit the frame q depending on the layer depth index from // the base layer ARF. active_best_quality = ((layer_depth - 1) * q + active_best_quality + layer_depth / 2) / layer_depth; } } else { active_best_quality = inter_minq[active_worst_quality]; // For the constrained quality mode we don't want // q to fall below the cq level. if ((oxcf->rc_mode == VPX_CQ) && (active_best_quality < cq_level)) { active_best_quality = cq_level; } } // Extension to max or min Q if undershoot or overshoot is outside // the permitted range. if (frame_is_intra_only(cm) || boost_frame) { const int layer_depth = gf_group->layer_depth[gf_group_index]; active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast); active_worst_quality += (cpi->twopass.extend_maxq / 2); if (gf_group->rf_level[gf_group_index] == GF_ARF_LOW) { assert(layer_depth > 1); active_best_quality = VPXMAX(active_best_quality, cpi->twopass.last_qindex_of_arf_layer[layer_depth - 1]); } } else { const int max_layer_depth = gf_group->max_layer_depth; assert(max_layer_depth > 0); active_best_quality -= (cpi->twopass.extend_minq + cpi->twopass.extend_minq_fast) / 2; active_worst_quality += cpi->twopass.extend_maxq; // For normal frames do not allow an active minq lower than the q used for // the last boosted frame. active_best_quality = VPXMAX(active_best_quality, cpi->twopass.last_qindex_of_arf_layer[max_layer_depth - 1]); } #if LIMIT_QRANGE_FOR_ALTREF_AND_KEY vpx_clear_system_state(); // Static forced key frames Q restrictions dealt with elsewhere. if (!frame_is_intra_only(cm) || !rc->this_key_frame_forced || cpi->twopass.last_kfgroup_zeromotion_pct < STATIC_MOTION_THRESH) { int qdelta = vp9_frame_type_qdelta(cpi, gf_group->rf_level[gf_group_index], active_worst_quality); active_worst_quality = VPXMAX(active_worst_quality + qdelta, active_best_quality); } #endif // Modify active_best_quality for downscaled normal frames. if (rc->frame_size_selector != UNSCALED && !frame_is_kf_gf_arf(cpi)) { int qdelta = vp9_compute_qdelta_by_rate( rc, cm->frame_type, active_best_quality, 2.0, cm->bit_depth); active_best_quality = VPXMAX(active_best_quality + qdelta, rc->best_quality); } active_best_quality = clamp(active_best_quality, rc->best_quality, rc->worst_quality); active_worst_quality = clamp(active_worst_quality, active_best_quality, rc->worst_quality); if (frame_is_intra_only(cm) && rc->this_key_frame_forced) { // If static since last kf use better of last boosted and last kf q. if (cpi->twopass.last_kfgroup_zeromotion_pct >= STATIC_MOTION_THRESH) { q = VPXMIN(rc->last_kf_qindex, rc->last_boosted_qindex); } else { q = rc->last_boosted_qindex; } } else if (frame_is_intra_only(cm) && !rc->this_key_frame_forced) { q = active_best_quality; } else { q = vp9_rc_regulate_q(cpi, rc->this_frame_target, active_best_quality, active_worst_quality); if (q > active_worst_quality) { // Special case when we are targeting the max allowed rate. if (rc->this_frame_target >= rc->max_frame_bandwidth) active_worst_quality = q; else q = active_worst_quality; } } clamp(q, active_best_quality, active_worst_quality); *top_index = active_worst_quality; *bottom_index = active_best_quality; assert(*top_index <= rc->worst_quality && *top_index >= rc->best_quality); assert(*bottom_index <= rc->worst_quality && *bottom_index >= rc->best_quality); assert(q <= rc->worst_quality && q >= rc->best_quality); return q; } int vp9_rc_pick_q_and_bounds(const VP9_COMP *cpi, int *bottom_index, int *top_index) { int q; const int gf_group_index = cpi->twopass.gf_group.index; if (cpi->oxcf.pass == 0) { if (cpi->oxcf.rc_mode == VPX_CBR) q = rc_pick_q_and_bounds_one_pass_cbr(cpi, bottom_index, top_index); else q = rc_pick_q_and_bounds_one_pass_vbr(cpi, bottom_index, top_index); } else { q = rc_pick_q_and_bounds_two_pass(cpi, bottom_index, top_index, gf_group_index); } if (cpi->sf.use_nonrd_pick_mode) { if (cpi->sf.force_frame_boost == 1) q -= cpi->sf.max_delta_qindex; if (q < *bottom_index) *bottom_index = q; else if (q > *top_index) *top_index = q; } return q; } void vp9_configure_buffer_updates(VP9_COMP *cpi, int gf_group_index) { VP9_COMMON *cm = &cpi->common; TWO_PASS *const twopass = &cpi->twopass; cpi->rc.is_src_frame_alt_ref = 0; cm->show_existing_frame = 0; cpi->rc.show_arf_as_gld = 0; switch (twopass->gf_group.update_type[gf_group_index]) { case KF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 1; break; case LF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; break; case GF_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 0; break; case OVERLAY_UPDATE: cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 1; cpi->refresh_alt_ref_frame = 0; cpi->rc.is_src_frame_alt_ref = 1; if (cpi->rc.preserve_arf_as_gld) { cpi->rc.show_arf_as_gld = 1; cpi->refresh_golden_frame = 0; cm->show_existing_frame = 1; cm->refresh_frame_context = 0; } break; case MID_OVERLAY_UPDATE: cpi->refresh_last_frame = 1; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; cpi->rc.is_src_frame_alt_ref = 1; break; case USE_BUF_FRAME: cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 0; cpi->rc.is_src_frame_alt_ref = 1; cm->show_existing_frame = 1; cm->refresh_frame_context = 0; break; default: assert(twopass->gf_group.update_type[gf_group_index] == ARF_UPDATE); cpi->refresh_last_frame = 0; cpi->refresh_golden_frame = 0; cpi->refresh_alt_ref_frame = 1; break; } } void vp9_estimate_qp_gop(VP9_COMP *cpi) { int gop_length = cpi->twopass.gf_group.gf_group_size; int bottom_index, top_index; int idx; const int gf_index = cpi->twopass.gf_group.index; const int is_src_frame_alt_ref = cpi->rc.is_src_frame_alt_ref; const int refresh_frame_context = cpi->common.refresh_frame_context; for (idx = 1; idx <= gop_length; ++idx) { TplDepFrame *tpl_frame = &cpi->tpl_stats[idx]; int target_rate = cpi->twopass.gf_group.bit_allocation[idx]; cpi->twopass.gf_group.index = idx; vp9_rc_set_frame_target(cpi, target_rate); vp9_configure_buffer_updates(cpi, idx); tpl_frame->base_qindex = rc_pick_q_and_bounds_two_pass(cpi, &bottom_index, &top_index, idx); tpl_frame->base_qindex = VPXMAX(tpl_frame->base_qindex, 1); } // Reset the actual index and frame update cpi->twopass.gf_group.index = gf_index; cpi->rc.is_src_frame_alt_ref = is_src_frame_alt_ref; cpi->common.refresh_frame_context = refresh_frame_context; vp9_configure_buffer_updates(cpi, gf_index); } void vp9_rc_compute_frame_size_bounds(const VP9_COMP *cpi, int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit) { if (cpi->oxcf.rc_mode == VPX_Q) { *frame_under_shoot_limit = 0; *frame_over_shoot_limit = INT_MAX; } else { // For very small rate targets where the fractional adjustment // may be tiny make sure there is at least a minimum range. const int tol_low = (cpi->sf.recode_tolerance_low * frame_target) / 100; const int tol_high = (cpi->sf.recode_tolerance_high * frame_target) / 100; *frame_under_shoot_limit = VPXMAX(frame_target - tol_low - 100, 0); *frame_over_shoot_limit = VPXMIN(frame_target + tol_high + 100, cpi->rc.max_frame_bandwidth); } } void vp9_rc_set_frame_target(VP9_COMP *cpi, int target) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; rc->this_frame_target = target; // Modify frame size target when down-scaling. if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC && rc->frame_size_selector != UNSCALED) rc->this_frame_target = (int)(rc->this_frame_target * rate_thresh_mult[rc->frame_size_selector]); // Target rate per SB64 (including partial SB64s. rc->sb64_target_rate = (int)(((int64_t)rc->this_frame_target * 64 * 64) / (cm->width * cm->height)); } static void update_alt_ref_frame_stats(VP9_COMP *cpi) { // this frame refreshes means next frames don't unless specified by user RATE_CONTROL *const rc = &cpi->rc; rc->frames_since_golden = 0; // Mark the alt ref as done (setting to 0 means no further alt refs pending). rc->source_alt_ref_pending = 0; // Set the alternate reference frame active flag rc->source_alt_ref_active = 1; } static void update_golden_frame_stats(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; // Update the Golden frame usage counts. if (cpi->refresh_golden_frame) { // this frame refreshes means next frames don't unless specified by user rc->frames_since_golden = 0; // If we are not using alt ref in the up and coming group clear the arf // active flag. In multi arf group case, if the index is not 0 then // we are overlaying a mid group arf so should not reset the flag. if (cpi->oxcf.pass == 2) { if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0)) rc->source_alt_ref_active = 0; } else if (!rc->source_alt_ref_pending) { rc->source_alt_ref_active = 0; } // Decrement count down till next gf if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; } else if (!cpi->refresh_alt_ref_frame) { // Decrement count down till next gf if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; rc->frames_since_golden++; if (rc->show_arf_as_gld) { rc->frames_since_golden = 0; // If we are not using alt ref in the up and coming group clear the arf // active flag. In multi arf group case, if the index is not 0 then // we are overlaying a mid group arf so should not reset the flag. if (!rc->source_alt_ref_pending && (cpi->twopass.gf_group.index == 0)) rc->source_alt_ref_active = 0; } } } static void update_altref_usage(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; int sum_ref_frame_usage = 0; int arf_frame_usage = 0; int mi_row, mi_col; if (cpi->rc.alt_ref_gf_group && !cpi->rc.is_src_frame_alt_ref && !cpi->refresh_golden_frame && !cpi->refresh_alt_ref_frame) for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col += 8) { int sboffset = ((cm->mi_cols + 7) >> 3) * (mi_row >> 3) + (mi_col >> 3); sum_ref_frame_usage += cpi->count_arf_frame_usage[sboffset] + cpi->count_lastgolden_frame_usage[sboffset]; arf_frame_usage += cpi->count_arf_frame_usage[sboffset]; } } if (sum_ref_frame_usage > 0) { double altref_count = 100.0 * arf_frame_usage / sum_ref_frame_usage; cpi->rc.perc_arf_usage = 0.75 * cpi->rc.perc_arf_usage + 0.25 * altref_count; } } static void compute_frame_low_motion(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; int mi_row, mi_col; MODE_INFO **mi = cm->mi_grid_visible; RATE_CONTROL *const rc = &cpi->rc; const int rows = cm->mi_rows, cols = cm->mi_cols; int cnt_zeromv = 0; for (mi_row = 0; mi_row < rows; mi_row++) { for (mi_col = 0; mi_col < cols; mi_col++) { if (mi[0]->ref_frame[0] == LAST_FRAME && abs(mi[0]->mv[0].as_mv.row) < 16 && abs(mi[0]->mv[0].as_mv.col) < 16) cnt_zeromv++; mi++; } mi += 8; } cnt_zeromv = 100 * cnt_zeromv / (rows * cols); rc->avg_frame_low_motion = (3 * rc->avg_frame_low_motion + cnt_zeromv) >> 2; } void vp9_rc_postencode_update(VP9_COMP *cpi, uint64_t bytes_used) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; SVC *const svc = &cpi->svc; const int qindex = cm->base_qindex; const GF_GROUP *gf_group = &cpi->twopass.gf_group; const int gf_group_index = cpi->twopass.gf_group.index; const int layer_depth = gf_group->layer_depth[gf_group_index]; // Update rate control heuristics rc->projected_frame_size = (int)(bytes_used << 3); // Post encode loop adjustment of Q prediction. vp9_rc_update_rate_correction_factors(cpi); // Keep a record of last Q and ambient average Q. if (frame_is_intra_only(cm)) { rc->last_q[KEY_FRAME] = qindex; rc->avg_frame_qindex[KEY_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[KEY_FRAME] + qindex, 2); if (cpi->use_svc) { int i = 0; SVC *svc = &cpi->svc; for (i = 0; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->last_q[KEY_FRAME] = rc->last_q[KEY_FRAME]; lrc->avg_frame_qindex[KEY_FRAME] = rc->avg_frame_qindex[KEY_FRAME]; } } } else { if ((cpi->use_svc && oxcf->rc_mode == VPX_CBR) || (!rc->is_src_frame_alt_ref && !(cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame))) { rc->last_q[INTER_FRAME] = qindex; rc->avg_frame_qindex[INTER_FRAME] = ROUND_POWER_OF_TWO(3 * rc->avg_frame_qindex[INTER_FRAME] + qindex, 2); rc->ni_frames++; rc->tot_q += vp9_convert_qindex_to_q(qindex, cm->bit_depth); rc->avg_q = rc->tot_q / rc->ni_frames; // Calculate the average Q for normal inter frames (not key or GFU // frames). rc->ni_tot_qi += qindex; rc->ni_av_qi = rc->ni_tot_qi / rc->ni_frames; } } if (cpi->use_svc) vp9_svc_adjust_avg_frame_qindex(cpi); // Keep record of last boosted (KF/KF/ARF) Q value. // If the current frame is coded at a lower Q then we also update it. // If all mbs in this group are skipped only update if the Q value is // better than that already stored. // This is used to help set quality in forced key frames to reduce popping if ((qindex < rc->last_boosted_qindex) || (cm->frame_type == KEY_FRAME) || (!rc->constrained_gf_group && (cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { rc->last_boosted_qindex = qindex; } if ((qindex < cpi->twopass.last_qindex_of_arf_layer[layer_depth]) || (cm->frame_type == KEY_FRAME) || (!rc->constrained_gf_group && (cpi->refresh_alt_ref_frame || (cpi->refresh_golden_frame && !rc->is_src_frame_alt_ref)))) { cpi->twopass.last_qindex_of_arf_layer[layer_depth] = qindex; } if (frame_is_intra_only(cm)) rc->last_kf_qindex = qindex; update_buffer_level_postencode(cpi, rc->projected_frame_size); // Rolling monitors of whether we are over or underspending used to help // regulate min and Max Q in two pass. if (!frame_is_intra_only(cm)) { rc->rolling_target_bits = ROUND_POWER_OF_TWO( rc->rolling_target_bits * 3 + rc->this_frame_target, 2); rc->rolling_actual_bits = ROUND_POWER_OF_TWO( rc->rolling_actual_bits * 3 + rc->projected_frame_size, 2); rc->long_rolling_target_bits = ROUND_POWER_OF_TWO( rc->long_rolling_target_bits * 31 + rc->this_frame_target, 5); rc->long_rolling_actual_bits = ROUND_POWER_OF_TWO( rc->long_rolling_actual_bits * 31 + rc->projected_frame_size, 5); } // Actual bits spent rc->total_actual_bits += rc->projected_frame_size; rc->total_target_bits += cm->show_frame ? rc->avg_frame_bandwidth : 0; rc->total_target_vs_actual = rc->total_actual_bits - rc->total_target_bits; if (!cpi->use_svc) { if (is_altref_enabled(cpi) && cpi->refresh_alt_ref_frame && (!frame_is_intra_only(cm))) // Update the alternate reference frame stats as appropriate. update_alt_ref_frame_stats(cpi); else // Update the Golden frame stats as appropriate. update_golden_frame_stats(cpi); } // If second (long term) temporal reference is used for SVC, // update the golden frame counter, only for base temporal layer. if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && svc->temporal_layer_id == 0) { int i = 0; if (cpi->refresh_golden_frame) rc->frames_since_golden = 0; else rc->frames_since_golden++; // Decrement count down till next gf if (rc->frames_till_gf_update_due > 0) rc->frames_till_gf_update_due--; // Update the frames_since_golden for all upper temporal layers. for (i = 1; i < svc->number_temporal_layers; ++i) { const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, i, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lrc->frames_since_golden = rc->frames_since_golden; } } if (frame_is_intra_only(cm)) rc->frames_since_key = 0; if (cm->show_frame) { rc->frames_since_key++; rc->frames_to_key--; } // Trigger the resizing of the next frame if it is scaled. if (oxcf->pass != 0) { cpi->resize_pending = rc->next_frame_size_selector != rc->frame_size_selector; rc->frame_size_selector = rc->next_frame_size_selector; } if (oxcf->pass == 0) { if (!frame_is_intra_only(cm) && (!cpi->use_svc || (cpi->use_svc && !svc->layer_context[svc->temporal_layer_id].is_key_frame && svc->spatial_layer_id == svc->number_spatial_layers - 1))) { compute_frame_low_motion(cpi); if (cpi->sf.use_altref_onepass) update_altref_usage(cpi); } // For SVC: set avg_frame_low_motion (only computed on top spatial layer) // to all lower spatial layers. if (cpi->use_svc && svc->spatial_layer_id == svc->number_spatial_layers - 1) { int i; for (i = 0; i < svc->number_spatial_layers - 1; ++i) { const int layer = LAYER_IDS_TO_IDX(i, svc->temporal_layer_id, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lrc->avg_frame_low_motion = rc->avg_frame_low_motion; } } cpi->rc.last_frame_is_src_altref = cpi->rc.is_src_frame_alt_ref; } if (!frame_is_intra_only(cm)) rc->reset_high_source_sad = 0; rc->last_avg_frame_bandwidth = rc->avg_frame_bandwidth; if (cpi->use_svc && svc->spatial_layer_id < svc->number_spatial_layers - 1) svc->lower_layer_qindex = cm->base_qindex; } void vp9_rc_postencode_update_drop_frame(VP9_COMP *cpi) { cpi->common.current_video_frame++; cpi->rc.frames_since_key++; cpi->rc.frames_to_key--; cpi->rc.rc_2_frame = 0; cpi->rc.rc_1_frame = 0; cpi->rc.last_avg_frame_bandwidth = cpi->rc.avg_frame_bandwidth; // For SVC on dropped frame when framedrop_mode != LAYER_DROP: // in this mode the whole superframe may be dropped if only a single layer // has buffer underflow (below threshold). Since this can then lead to // increasing buffer levels/overflow for certain layers even though whole // superframe is dropped, we cap buffer level if its already stable. if (cpi->use_svc && cpi->svc.framedrop_mode != LAYER_DROP && cpi->rc.buffer_level > cpi->rc.optimal_buffer_level) { cpi->rc.buffer_level = cpi->rc.optimal_buffer_level; cpi->rc.bits_off_target = cpi->rc.optimal_buffer_level; } } static int calc_pframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { const RATE_CONTROL *const rc = &cpi->rc; const int af_ratio = rc->af_ratio_onepass_vbr; int target = (!rc->is_src_frame_alt_ref && (cpi->refresh_golden_frame || cpi->refresh_alt_ref_frame)) ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio) / (rc->baseline_gf_interval + af_ratio - 1) : (rc->avg_frame_bandwidth * rc->baseline_gf_interval) / (rc->baseline_gf_interval + af_ratio - 1); return vp9_rc_clamp_pframe_target_size(cpi, target); } static int calc_iframe_target_size_one_pass_vbr(const VP9_COMP *const cpi) { static const int kf_ratio = 25; const RATE_CONTROL *rc = &cpi->rc; const int target = rc->avg_frame_bandwidth * kf_ratio; return vp9_rc_clamp_iframe_target_size(cpi, target); } static void adjust_gfint_frame_constraint(VP9_COMP *cpi, int frame_constraint) { RATE_CONTROL *const rc = &cpi->rc; rc->constrained_gf_group = 0; // Reset gf interval to make more equal spacing for frame_constraint. if ((frame_constraint <= 7 * rc->baseline_gf_interval >> 2) && (frame_constraint > rc->baseline_gf_interval)) { rc->baseline_gf_interval = frame_constraint >> 1; if (rc->baseline_gf_interval < 5) rc->baseline_gf_interval = frame_constraint; rc->constrained_gf_group = 1; } else { // Reset to keep gf_interval <= frame_constraint. if (rc->baseline_gf_interval > frame_constraint) { rc->baseline_gf_interval = frame_constraint; rc->constrained_gf_group = 1; } } } void vp9_rc_get_one_pass_vbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; if (!cpi->refresh_alt_ref_frame && (cm->current_video_frame == 0 || (cpi->frame_flags & FRAMEFLAGS_KEY) || rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; rc->this_key_frame_forced = cm->current_video_frame != 0 && rc->frames_to_key == 0; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; rc->source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; } if (rc->frames_till_gf_update_due == 0) { double rate_err = 1.0; rc->gfu_boost = DEFAULT_GF_BOOST; if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) { vp9_cyclic_refresh_set_golden_update(cpi); } else { rc->baseline_gf_interval = VPXMIN( 20, VPXMAX(10, (rc->min_gf_interval + rc->max_gf_interval) / 2)); } rc->af_ratio_onepass_vbr = 10; if (rc->rolling_target_bits > 0) rate_err = (double)rc->rolling_actual_bits / (double)rc->rolling_target_bits; if (cm->current_video_frame > 30) { if (rc->avg_frame_qindex[INTER_FRAME] > (7 * rc->worst_quality) >> 3 && rate_err > 3.5) { rc->baseline_gf_interval = VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1); } else if (rc->avg_frame_low_motion < 20) { // Decrease gf interval for high motion case. rc->baseline_gf_interval = VPXMAX(6, rc->baseline_gf_interval >> 1); } // Adjust boost and af_ratio based on avg_frame_low_motion, which varies // between 0 and 100 (stationary, 100% zero/small motion). rc->gfu_boost = VPXMAX(500, DEFAULT_GF_BOOST * (rc->avg_frame_low_motion << 1) / (rc->avg_frame_low_motion + 100)); rc->af_ratio_onepass_vbr = VPXMIN(15, VPXMAX(5, 3 * rc->gfu_boost / 400)); } adjust_gfint_frame_constraint(cpi, rc->frames_to_key); rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->refresh_golden_frame = 1; rc->source_alt_ref_pending = 0; rc->alt_ref_gf_group = 0; if (cpi->sf.use_altref_onepass && cpi->oxcf.enable_auto_arf) { rc->source_alt_ref_pending = 1; rc->alt_ref_gf_group = 1; } } if (cm->frame_type == KEY_FRAME) target = calc_iframe_target_size_one_pass_vbr(cpi); else target = calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->oxcf.pass == 0) vp9_cyclic_refresh_update_parameters(cpi); } static int calc_pframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const VP9EncoderConfig *oxcf = &cpi->oxcf; const RATE_CONTROL *rc = &cpi->rc; const SVC *const svc = &cpi->svc; const int64_t diff = rc->optimal_buffer_level - rc->buffer_level; const int64_t one_pct_bits = 1 + rc->optimal_buffer_level / 100; int min_frame_target = VPXMAX(rc->avg_frame_bandwidth >> 4, FRAME_OVERHEAD_BITS); int target; if (oxcf->gf_cbr_boost_pct) { const int af_ratio_pct = oxcf->gf_cbr_boost_pct + 100; target = cpi->refresh_golden_frame ? (rc->avg_frame_bandwidth * rc->baseline_gf_interval * af_ratio_pct) / (rc->baseline_gf_interval * 100 + af_ratio_pct - 100) : (rc->avg_frame_bandwidth * rc->baseline_gf_interval * 100) / (rc->baseline_gf_interval * 100 + af_ratio_pct - 100); } else { target = rc->avg_frame_bandwidth; } if (is_one_pass_cbr_svc(cpi)) { // Note that for layers, avg_frame_bandwidth is the cumulative // per-frame-bandwidth. For the target size of this frame, use the // layer average frame size (i.e., non-cumulative per-frame-bw). int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); const LAYER_CONTEXT *lc = &svc->layer_context[layer]; target = lc->avg_frame_size; min_frame_target = VPXMAX(lc->avg_frame_size >> 4, FRAME_OVERHEAD_BITS); } if (diff > 0) { // Lower the target bandwidth for this frame. const int pct_low = (int)VPXMIN(diff / one_pct_bits, oxcf->under_shoot_pct); target -= (target * pct_low) / 200; } else if (diff < 0) { // Increase the target bandwidth for this frame. const int pct_high = (int)VPXMIN(-diff / one_pct_bits, oxcf->over_shoot_pct); target += (target * pct_high) / 200; } if (oxcf->rc_max_inter_bitrate_pct) { const int max_rate = rc->avg_frame_bandwidth * oxcf->rc_max_inter_bitrate_pct / 100; target = VPXMIN(target, max_rate); } return VPXMAX(min_frame_target, target); } static int calc_iframe_target_size_one_pass_cbr(const VP9_COMP *cpi) { const RATE_CONTROL *rc = &cpi->rc; const VP9EncoderConfig *oxcf = &cpi->oxcf; const SVC *const svc = &cpi->svc; int target; if (cpi->common.current_video_frame == 0) { target = ((rc->starting_buffer_level / 2) > INT_MAX) ? INT_MAX : (int)(rc->starting_buffer_level / 2); } else { int kf_boost = 32; double framerate = cpi->framerate; if (svc->number_temporal_layers > 1 && oxcf->rc_mode == VPX_CBR) { // Use the layer framerate for temporal layers CBR mode. const int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); const LAYER_CONTEXT *lc = &svc->layer_context[layer]; framerate = lc->framerate; } kf_boost = VPXMAX(kf_boost, (int)(2 * framerate - 16)); if (rc->frames_since_key < framerate / 2) { kf_boost = (int)(kf_boost * rc->frames_since_key / (framerate / 2)); } target = ((16 + kf_boost) * rc->avg_frame_bandwidth) >> 4; } return vp9_rc_clamp_iframe_target_size(cpi, target); } static void set_intra_only_frame(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; // Don't allow intra_only frame for bypass/flexible SVC mode, or if number // of spatial layers is 1 or if number of spatial or temporal layers > 3. // Also if intra-only is inserted on very first frame, don't allow if // if number of temporal layers > 1. This is because on intra-only frame // only 3 reference buffers can be updated, but for temporal layers > 1 // we generally need to use buffer slots 4 and 5. if ((cm->current_video_frame == 0 && svc->number_temporal_layers > 1) || svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS || svc->number_spatial_layers > 3 || svc->number_temporal_layers > 3 || svc->number_spatial_layers == 1) return; cm->show_frame = 0; cm->intra_only = 1; cm->frame_type = INTER_FRAME; cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_last_frame = 1; cpi->ext_refresh_golden_frame = 1; cpi->ext_refresh_alt_ref_frame = 1; if (cm->current_video_frame == 0) { cpi->lst_fb_idx = 0; cpi->gld_fb_idx = 1; cpi->alt_fb_idx = 2; } else { int i; int count = 0; cpi->lst_fb_idx = -1; cpi->gld_fb_idx = -1; cpi->alt_fb_idx = -1; // For intra-only frame we need to refresh all slots that were // being used for the base layer (fb_idx_base[i] == 1). // Start with assigning last first, then golden and then alt. for (i = 0; i < REF_FRAMES; ++i) { if (svc->fb_idx_base[i] == 1) count++; if (count == 1 && cpi->lst_fb_idx == -1) cpi->lst_fb_idx = i; if (count == 2 && cpi->gld_fb_idx == -1) cpi->gld_fb_idx = i; if (count == 3 && cpi->alt_fb_idx == -1) cpi->alt_fb_idx = i; } // If golden or alt is not being used for base layer, then set them // to the lst_fb_idx. if (cpi->gld_fb_idx == -1) cpi->gld_fb_idx = cpi->lst_fb_idx; if (cpi->alt_fb_idx == -1) cpi->alt_fb_idx = cpi->lst_fb_idx; } } void vp9_rc_get_svc_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; SVC *const svc = &cpi->svc; int target = rc->avg_frame_bandwidth; int layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); if (svc->first_spatial_layer_to_encode) svc->layer_context[svc->temporal_layer_id].is_key_frame = 0; // Periodic key frames is based on the super-frame counter // (svc.current_superframe), also only base spatial layer is key frame. // Key frame is set for any of the following: very first frame, frame flags // indicates key, superframe counter hits key frequencey, or (non-intra) sync // flag is set for spatial layer 0. if ((cm->current_video_frame == 0 && !svc->previous_frame_is_intra_only) || (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && (svc->current_superframe % cpi->oxcf.key_freq == 0) && !svc->previous_frame_is_intra_only && svc->spatial_layer_id == 0) || (svc->spatial_layer_sync[0] == 1 && svc->spatial_layer_id == 0)) { cm->frame_type = KEY_FRAME; rc->source_alt_ref_active = 0; if (is_one_pass_cbr_svc(cpi)) { if (cm->current_video_frame > 0) vp9_svc_reset_temporal_layers(cpi, 1); layer = LAYER_IDS_TO_IDX(svc->spatial_layer_id, svc->temporal_layer_id, svc->number_temporal_layers); svc->layer_context[layer].is_key_frame = 1; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); // Assumption here is that LAST_FRAME is being updated for a keyframe. // Thus no change in update flags. target = calc_iframe_target_size_one_pass_cbr(cpi); } } else { cm->frame_type = INTER_FRAME; if (is_one_pass_cbr_svc(cpi)) { LAYER_CONTEXT *lc = &svc->layer_context[layer]; // Add condition current_video_frame > 0 for the case where first frame // is intra only followed by overlay/copy frame. In this case we don't // want to reset is_key_frame to 0 on overlay/copy frame. lc->is_key_frame = (svc->spatial_layer_id == 0 && cm->current_video_frame > 0) ? 0 : svc->layer_context[svc->temporal_layer_id].is_key_frame; target = calc_pframe_target_size_one_pass_cbr(cpi); } } if (svc->simulcast_mode) { if (svc->spatial_layer_id > 0 && svc->layer_context[layer].is_key_frame == 1) { cm->frame_type = KEY_FRAME; cpi->ref_frame_flags &= (~VP9_LAST_FLAG & ~VP9_GOLD_FLAG & ~VP9_ALT_FLAG); target = calc_iframe_target_size_one_pass_cbr(cpi); } // Set the buffer idx and refresh flags for key frames in simulcast mode. // Note the buffer slot for long-term reference is set below (line 2255), // and alt_ref is used for that on key frame. So use last and golden for // the other two normal slots. if (cm->frame_type == KEY_FRAME) { if (svc->number_spatial_layers == 2) { if (svc->spatial_layer_id == 0) { cpi->lst_fb_idx = 0; cpi->gld_fb_idx = 2; cpi->alt_fb_idx = 6; } else if (svc->spatial_layer_id == 1) { cpi->lst_fb_idx = 1; cpi->gld_fb_idx = 3; cpi->alt_fb_idx = 6; } } else if (svc->number_spatial_layers == 3) { if (svc->spatial_layer_id == 0) { cpi->lst_fb_idx = 0; cpi->gld_fb_idx = 3; cpi->alt_fb_idx = 6; } else if (svc->spatial_layer_id == 1) { cpi->lst_fb_idx = 1; cpi->gld_fb_idx = 4; cpi->alt_fb_idx = 6; } else if (svc->spatial_layer_id == 2) { cpi->lst_fb_idx = 2; cpi->gld_fb_idx = 5; cpi->alt_fb_idx = 7; } } cpi->ext_refresh_last_frame = 1; cpi->ext_refresh_golden_frame = 1; cpi->ext_refresh_alt_ref_frame = 1; } } // Check if superframe contains a sync layer request. vp9_svc_check_spatial_layer_sync(cpi); // If long term termporal feature is enabled, set the period of the update. // The update/refresh of this reference frame is always on base temporal // layer frame. if (svc->use_gf_temporal_ref_current_layer) { // Only use gf long-term prediction on non-key superframes. if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { // Use golden for this reference, which will be used for prediction. int index = svc->spatial_layer_id; if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; assert(index >= 0); cpi->gld_fb_idx = svc->buffer_gf_temporal_ref[index].idx; // Enable prediction off LAST (last reference) and golden (which will // generally be further behind/long-term reference). cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } // Check for update/refresh of reference: only refresh on base temporal // layer. if (svc->temporal_layer_id == 0) { if (svc->layer_context[svc->temporal_layer_id].is_key_frame) { // On key frame we update the buffer index used for long term reference. // Use the alt_ref since it is not used or updated on key frames. int index = svc->spatial_layer_id; if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; assert(index >= 0); cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; cpi->ext_refresh_alt_ref_frame = 1; } else if (rc->frames_till_gf_update_due == 0) { // Set perdiod of next update. Make it a multiple of 10, as the cyclic // refresh is typically ~10%, and we'd like the update to happen after // a few cylces of the refresh (so it better quality frame). Note the // cyclic refresh for SVC only operates on base temporal layer frames. // Choose 20 as perdiod for now (2 cycles). rc->baseline_gf_interval = 20; rc->frames_till_gf_update_due = rc->baseline_gf_interval; cpi->ext_refresh_golden_frame = 1; rc->gfu_boost = DEFAULT_GF_BOOST; } } } else if (!svc->use_gf_temporal_ref) { rc->frames_till_gf_update_due = INT_MAX; rc->baseline_gf_interval = INT_MAX; } if (svc->set_intra_only_frame) { set_intra_only_frame(cpi); target = calc_iframe_target_size_one_pass_cbr(cpi); } // Any update/change of global cyclic refresh parameters (amount/delta-qp) // should be done here, before the frame qp is selected. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); vp9_rc_set_frame_target(cpi, target); if (cm->show_frame) update_buffer_level_svc_preencode(cpi); } void vp9_rc_get_one_pass_cbr_params(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; if ((cm->current_video_frame == 0) || (cpi->frame_flags & FRAMEFLAGS_KEY) || (cpi->oxcf.auto_key && rc->frames_to_key == 0)) { cm->frame_type = KEY_FRAME; rc->frames_to_key = cpi->oxcf.key_freq; rc->kf_boost = DEFAULT_KF_BOOST; rc->source_alt_ref_active = 0; } else { cm->frame_type = INTER_FRAME; } if (rc->frames_till_gf_update_due == 0) { if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_set_golden_update(cpi); else rc->baseline_gf_interval = (rc->min_gf_interval + rc->max_gf_interval) / 2; rc->frames_till_gf_update_due = rc->baseline_gf_interval; // NOTE: frames_till_gf_update_due must be <= frames_to_key. if (rc->frames_till_gf_update_due > rc->frames_to_key) rc->frames_till_gf_update_due = rc->frames_to_key; cpi->refresh_golden_frame = 1; rc->gfu_boost = DEFAULT_GF_BOOST; } // Any update/change of global cyclic refresh parameters (amount/delta-qp) // should be done here, before the frame qp is selected. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) vp9_cyclic_refresh_update_parameters(cpi); if (frame_is_intra_only(cm)) target = calc_iframe_target_size_one_pass_cbr(cpi); else target = calc_pframe_target_size_one_pass_cbr(cpi); vp9_rc_set_frame_target(cpi, target); if (cm->show_frame) update_buffer_level_preencode(cpi); if (cpi->oxcf.resize_mode == RESIZE_DYNAMIC) cpi->resize_pending = vp9_resize_one_pass_cbr(cpi); else cpi->resize_pending = 0; } int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget, vpx_bit_depth_t bit_depth) { int start_index = rc->worst_quality; int target_index = rc->worst_quality; int i; // Convert the average q value to an index. for (i = rc->best_quality; i < rc->worst_quality; ++i) { start_index = i; if (vp9_convert_qindex_to_q(i, bit_depth) >= qstart) break; } // Convert the q target to an index for (i = rc->best_quality; i < rc->worst_quality; ++i) { target_index = i; if (vp9_convert_qindex_to_q(i, bit_depth) >= qtarget) break; } return target_index - start_index; } int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, int qindex, double rate_target_ratio, vpx_bit_depth_t bit_depth) { int target_index = rc->worst_quality; int i; // Look up the current projected bits per block for the base index const int base_bits_per_mb = vp9_rc_bits_per_mb(frame_type, qindex, 1.0, bit_depth); // Find the target bits per mb based on the base value and given ratio. const int target_bits_per_mb = (int)(rate_target_ratio * base_bits_per_mb); // Convert the q target to an index for (i = rc->best_quality; i < rc->worst_quality; ++i) { if (vp9_rc_bits_per_mb(frame_type, i, 1.0, bit_depth) <= target_bits_per_mb) { target_index = i; break; } } return target_index - qindex; } void vp9_rc_set_gf_interval_range(const VP9_COMP *const cpi, RATE_CONTROL *const rc) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; // Special case code for 1 pass fixed Q mode tests if ((oxcf->pass == 0) && (oxcf->rc_mode == VPX_Q)) { rc->max_gf_interval = FIXED_GF_INTERVAL; rc->min_gf_interval = FIXED_GF_INTERVAL; rc->static_scene_max_gf_interval = FIXED_GF_INTERVAL; } else { // Set Maximum gf/arf interval rc->max_gf_interval = oxcf->max_gf_interval; rc->min_gf_interval = oxcf->min_gf_interval; #if CONFIG_RATE_CTRL if (rc->min_gf_interval == 0) { rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( oxcf->width, oxcf->height, oxcf->init_framerate); } if (rc->max_gf_interval == 0) { rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( oxcf->init_framerate, rc->min_gf_interval); } #else if (rc->min_gf_interval == 0) rc->min_gf_interval = vp9_rc_get_default_min_gf_interval( oxcf->width, oxcf->height, cpi->framerate); if (rc->max_gf_interval == 0) rc->max_gf_interval = vp9_rc_get_default_max_gf_interval( cpi->framerate, rc->min_gf_interval); #endif // Extended max interval for genuinely static scenes like slide shows. rc->static_scene_max_gf_interval = MAX_STATIC_GF_GROUP_LENGTH; if (rc->max_gf_interval > rc->static_scene_max_gf_interval) rc->max_gf_interval = rc->static_scene_max_gf_interval; // Clamp min to max rc->min_gf_interval = VPXMIN(rc->min_gf_interval, rc->max_gf_interval); if (oxcf->target_level == LEVEL_AUTO) { const uint32_t pic_size = cpi->common.width * cpi->common.height; const uint32_t pic_breadth = VPXMAX(cpi->common.width, cpi->common.height); int i; for (i = LEVEL_1; i < LEVEL_MAX; ++i) { if (vp9_level_defs[i].max_luma_picture_size >= pic_size && vp9_level_defs[i].max_luma_picture_breadth >= pic_breadth) { if (rc->min_gf_interval <= (int)vp9_level_defs[i].min_altref_distance) { rc->min_gf_interval = (int)vp9_level_defs[i].min_altref_distance + 1; rc->max_gf_interval = VPXMAX(rc->max_gf_interval, rc->min_gf_interval); } break; } } } } } void vp9_rc_update_framerate(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RATE_CONTROL *const rc = &cpi->rc; int vbr_max_bits; rc->avg_frame_bandwidth = (int)(oxcf->target_bandwidth / cpi->framerate); rc->min_frame_bandwidth = (int)(rc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100); rc->min_frame_bandwidth = VPXMAX(rc->min_frame_bandwidth, FRAME_OVERHEAD_BITS); // A maximum bitrate for a frame is defined. // However this limit is extended if a very high rate is given on the command // line or the the rate cannnot be acheived because of a user specificed max q // (e.g. when the user specifies lossless encode). // // If a level is specified that requires a lower maximum rate then the level // value take precedence. vbr_max_bits = (int)(((int64_t)rc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); rc->max_frame_bandwidth = VPXMAX(VPXMAX((cm->MBs * MAX_MB_RATE), MAXRATE_1080P), vbr_max_bits); vp9_rc_set_gf_interval_range(cpi, rc); } #define VBR_PCT_ADJUSTMENT_LIMIT 50 // For VBR...adjustment to the frame target based on error from previous frames static void vbr_rate_correction(VP9_COMP *cpi, int *this_frame_target) { RATE_CONTROL *const rc = &cpi->rc; int64_t vbr_bits_off_target = rc->vbr_bits_off_target; int max_delta; int frame_window = VPXMIN(16, ((int)cpi->twopass.total_stats.count - cpi->common.current_video_frame)); // Calcluate the adjustment to rate for this frame. if (frame_window > 0) { max_delta = (vbr_bits_off_target > 0) ? (int)(vbr_bits_off_target / frame_window) : (int)(-vbr_bits_off_target / frame_window); max_delta = VPXMIN(max_delta, ((*this_frame_target * VBR_PCT_ADJUSTMENT_LIMIT) / 100)); // vbr_bits_off_target > 0 means we have extra bits to spend if (vbr_bits_off_target > 0) { *this_frame_target += (vbr_bits_off_target > max_delta) ? max_delta : (int)vbr_bits_off_target; } else { *this_frame_target -= (vbr_bits_off_target < -max_delta) ? max_delta : (int)-vbr_bits_off_target; } } // Fast redistribution of bits arising from massive local undershoot. // Dont do it for kf,arf,gf or overlay frames. if (!frame_is_kf_gf_arf(cpi) && !rc->is_src_frame_alt_ref && rc->vbr_bits_off_target_fast) { int one_frame_bits = VPXMAX(rc->avg_frame_bandwidth, *this_frame_target); int fast_extra_bits; fast_extra_bits = (int)VPXMIN(rc->vbr_bits_off_target_fast, one_frame_bits); fast_extra_bits = (int)VPXMIN( fast_extra_bits, VPXMAX(one_frame_bits / 8, rc->vbr_bits_off_target_fast / 8)); *this_frame_target += (int)fast_extra_bits; rc->vbr_bits_off_target_fast -= fast_extra_bits; } } void vp9_set_target_rate(VP9_COMP *cpi) { RATE_CONTROL *const rc = &cpi->rc; int target_rate = rc->base_frame_target; if (cpi->common.frame_type == KEY_FRAME) target_rate = vp9_rc_clamp_iframe_target_size(cpi, target_rate); else target_rate = vp9_rc_clamp_pframe_target_size(cpi, target_rate); if (!cpi->oxcf.vbr_corpus_complexity) { // Correction to rate target based on prior over or under shoot. if (cpi->oxcf.rc_mode == VPX_VBR || cpi->oxcf.rc_mode == VPX_CQ) vbr_rate_correction(cpi, &target_rate); } vp9_rc_set_frame_target(cpi, target_rate); } // Check if we should resize, based on average QP from past x frames. // Only allow for resize at most one scale down for now, scaling factor is 2. int vp9_resize_one_pass_cbr(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; RESIZE_ACTION resize_action = NO_RESIZE; int avg_qp_thr1 = 70; int avg_qp_thr2 = 50; int min_width = 180; int min_height = 180; int down_size_on = 1; cpi->resize_scale_num = 1; cpi->resize_scale_den = 1; // Don't resize on key frame; reset the counters on key frame. if (cm->frame_type == KEY_FRAME) { cpi->resize_avg_qp = 0; cpi->resize_count = 0; return 0; } // Check current frame reslution to avoid generating frames smaller than // the minimum resolution. if (ONEHALFONLY_RESIZE) { if ((cm->width >> 1) < min_width || (cm->height >> 1) < min_height) down_size_on = 0; } else { if (cpi->resize_state == ORIG && (cm->width * 3 / 4 < min_width || cm->height * 3 / 4 < min_height)) return 0; else if (cpi->resize_state == THREE_QUARTER && ((cpi->oxcf.width >> 1) < min_width || (cpi->oxcf.height >> 1) < min_height)) down_size_on = 0; } #if CONFIG_VP9_TEMPORAL_DENOISING // If denoiser is on, apply a smaller qp threshold. if (cpi->oxcf.noise_sensitivity > 0) { avg_qp_thr1 = 60; avg_qp_thr2 = 40; } #endif // Resize based on average buffer underflow and QP over some window. // Ignore samples close to key frame, since QP is usually high after key. if (cpi->rc.frames_since_key > 2 * cpi->framerate) { const int window = (int)(4 * cpi->framerate); cpi->resize_avg_qp += cm->base_qindex; if (cpi->rc.buffer_level < (int)(30 * rc->optimal_buffer_level / 100)) ++cpi->resize_buffer_underflow; ++cpi->resize_count; // Check for resize action every "window" frames. if (cpi->resize_count >= window) { int avg_qp = cpi->resize_avg_qp / cpi->resize_count; // Resize down if buffer level has underflowed sufficient amount in past // window, and we are at original or 3/4 of original resolution. // Resize back up if average QP is low, and we are currently in a resized // down state, i.e. 1/2 or 3/4 of original resolution. // Currently, use a flag to turn 3/4 resizing feature on/off. if (cpi->resize_buffer_underflow > (cpi->resize_count >> 2)) { if (cpi->resize_state == THREE_QUARTER && down_size_on) { resize_action = DOWN_ONEHALF; cpi->resize_state = ONE_HALF; } else if (cpi->resize_state == ORIG) { resize_action = ONEHALFONLY_RESIZE ? DOWN_ONEHALF : DOWN_THREEFOUR; cpi->resize_state = ONEHALFONLY_RESIZE ? ONE_HALF : THREE_QUARTER; } } else if (cpi->resize_state != ORIG && avg_qp < avg_qp_thr1 * cpi->rc.worst_quality / 100) { if (cpi->resize_state == THREE_QUARTER || avg_qp < avg_qp_thr2 * cpi->rc.worst_quality / 100 || ONEHALFONLY_RESIZE) { resize_action = UP_ORIG; cpi->resize_state = ORIG; } else if (cpi->resize_state == ONE_HALF) { resize_action = UP_THREEFOUR; cpi->resize_state = THREE_QUARTER; } } // Reset for next window measurement. cpi->resize_avg_qp = 0; cpi->resize_count = 0; cpi->resize_buffer_underflow = 0; } } // If decision is to resize, reset some quantities, and check is we should // reduce rate correction factor, if (resize_action != NO_RESIZE) { int target_bits_per_frame; int active_worst_quality; int qindex; int tot_scale_change; if (resize_action == DOWN_THREEFOUR || resize_action == UP_THREEFOUR) { cpi->resize_scale_num = 3; cpi->resize_scale_den = 4; } else if (resize_action == DOWN_ONEHALF) { cpi->resize_scale_num = 1; cpi->resize_scale_den = 2; } else { // UP_ORIG or anything else cpi->resize_scale_num = 1; cpi->resize_scale_den = 1; } tot_scale_change = (cpi->resize_scale_den * cpi->resize_scale_den) / (cpi->resize_scale_num * cpi->resize_scale_num); // Reset buffer level to optimal, update target size. rc->buffer_level = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level; rc->this_frame_target = calc_pframe_target_size_one_pass_cbr(cpi); // Get the projected qindex, based on the scaled target frame size (scaled // so target_bits_per_mb in vp9_rc_regulate_q will be correct target). target_bits_per_frame = (resize_action >= 0) ? rc->this_frame_target * tot_scale_change : rc->this_frame_target / tot_scale_change; active_worst_quality = calc_active_worst_quality_one_pass_cbr(cpi); qindex = vp9_rc_regulate_q(cpi, target_bits_per_frame, rc->best_quality, active_worst_quality); // If resize is down, check if projected q index is close to worst_quality, // and if so, reduce the rate correction factor (since likely can afford // lower q for resized frame). if (resize_action > 0 && qindex > 90 * cpi->rc.worst_quality / 100) { rc->rate_correction_factors[INTER_NORMAL] *= 0.85; } // If resize is back up, check if projected q index is too much above the // current base_qindex, and if so, reduce the rate correction factor // (since prefer to keep q for resized frame at least close to previous q). if (resize_action < 0 && qindex > 130 * cm->base_qindex / 100) { rc->rate_correction_factors[INTER_NORMAL] *= 0.9; } } return resize_action; } static void adjust_gf_boost_lag_one_pass_vbr(VP9_COMP *cpi, uint64_t avg_sad_current) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; int target; int found = 0; int found2 = 0; int frame; int i; uint64_t avg_source_sad_lag = avg_sad_current; int high_source_sad_lagindex = -1; int steady_sad_lagindex = -1; uint32_t sad_thresh1 = 70000; uint32_t sad_thresh2 = 120000; int low_content = 0; int high_content = 0; double rate_err = 1.0; // Get measure of complexity over the future frames, and get the first // future frame with high_source_sad/scene-change. int tot_frames = (int)vp9_lookahead_depth(cpi->lookahead) - 1; for (frame = tot_frames; frame >= 1; --frame) { const int lagframe_idx = tot_frames - frame + 1; uint64_t reference_sad = rc->avg_source_sad[0]; for (i = 1; i < lagframe_idx; ++i) { if (rc->avg_source_sad[i] > 0) reference_sad = (3 * reference_sad + rc->avg_source_sad[i]) >> 2; } // Detect up-coming scene change. if (!found && (rc->avg_source_sad[lagframe_idx] > VPXMAX(sad_thresh1, (unsigned int)(reference_sad << 1)) || rc->avg_source_sad[lagframe_idx] > VPXMAX(3 * sad_thresh1 >> 2, (unsigned int)(reference_sad << 2)))) { high_source_sad_lagindex = lagframe_idx; found = 1; } // Detect change from motion to steady. if (!found2 && lagframe_idx > 1 && lagframe_idx < tot_frames && rc->avg_source_sad[lagframe_idx - 1] > (sad_thresh1 >> 2)) { found2 = 1; for (i = lagframe_idx; i < tot_frames; ++i) { if (!(rc->avg_source_sad[i] > 0 && rc->avg_source_sad[i] < (sad_thresh1 >> 2) && rc->avg_source_sad[i] < (rc->avg_source_sad[lagframe_idx - 1] >> 1))) { found2 = 0; i = tot_frames; } } if (found2) steady_sad_lagindex = lagframe_idx; } avg_source_sad_lag += rc->avg_source_sad[lagframe_idx]; } if (tot_frames > 0) avg_source_sad_lag = avg_source_sad_lag / tot_frames; // Constrain distance between detected scene cuts. if (high_source_sad_lagindex != -1 && high_source_sad_lagindex != rc->high_source_sad_lagindex - 1 && abs(high_source_sad_lagindex - rc->high_source_sad_lagindex) < 4) rc->high_source_sad_lagindex = -1; else rc->high_source_sad_lagindex = high_source_sad_lagindex; // Adjust some factors for the next GF group, ignore initial key frame, // and only for lag_in_frames not too small. if (cpi->refresh_golden_frame == 1 && cm->current_video_frame > 30 && cpi->oxcf.lag_in_frames > 8) { int frame_constraint; if (rc->rolling_target_bits > 0) rate_err = (double)rc->rolling_actual_bits / (double)rc->rolling_target_bits; high_content = high_source_sad_lagindex != -1 || avg_source_sad_lag > (rc->prev_avg_source_sad_lag << 1) || avg_source_sad_lag > sad_thresh2; low_content = high_source_sad_lagindex == -1 && ((avg_source_sad_lag < (rc->prev_avg_source_sad_lag >> 1)) || (avg_source_sad_lag < sad_thresh1)); if (low_content) { rc->gfu_boost = DEFAULT_GF_BOOST; rc->baseline_gf_interval = VPXMIN(15, (3 * rc->baseline_gf_interval) >> 1); } else if (high_content) { rc->gfu_boost = DEFAULT_GF_BOOST >> 1; rc->baseline_gf_interval = (rate_err > 3.0) ? VPXMAX(10, rc->baseline_gf_interval >> 1) : VPXMAX(6, rc->baseline_gf_interval >> 1); } if (rc->baseline_gf_interval > cpi->oxcf.lag_in_frames - 1) rc->baseline_gf_interval = cpi->oxcf.lag_in_frames - 1; // Check for constraining gf_interval for up-coming scene/content changes, // or for up-coming key frame, whichever is closer. frame_constraint = rc->frames_to_key; if (rc->high_source_sad_lagindex > 0 && frame_constraint > rc->high_source_sad_lagindex) frame_constraint = rc->high_source_sad_lagindex; if (steady_sad_lagindex > 3 && frame_constraint > steady_sad_lagindex) frame_constraint = steady_sad_lagindex; adjust_gfint_frame_constraint(cpi, frame_constraint); rc->frames_till_gf_update_due = rc->baseline_gf_interval; // Adjust factors for active_worst setting & af_ratio for next gf interval. rc->fac_active_worst_inter = 150; // corresponds to 3/2 (= 150 /100). rc->fac_active_worst_gf = 100; if (rate_err < 2.0 && !high_content) { rc->fac_active_worst_inter = 120; rc->fac_active_worst_gf = 90; } else if (rate_err > 8.0 && rc->avg_frame_qindex[INTER_FRAME] < 16) { // Increase active_worst faster at low Q if rate fluctuation is high. rc->fac_active_worst_inter = 200; if (rc->avg_frame_qindex[INTER_FRAME] < 8) rc->fac_active_worst_inter = 400; } if (low_content && rc->avg_frame_low_motion > 80) { rc->af_ratio_onepass_vbr = 15; } else if (high_content || rc->avg_frame_low_motion < 30) { rc->af_ratio_onepass_vbr = 5; rc->gfu_boost = DEFAULT_GF_BOOST >> 2; } if (cpi->sf.use_altref_onepass && cpi->oxcf.enable_auto_arf) { // Flag to disable usage of ARF based on past usage, only allow this // disabling if current frame/group does not start with key frame or // scene cut. Note perc_arf_usage is only computed for speed >= 5. int arf_usage_low = (cm->frame_type != KEY_FRAME && !rc->high_source_sad && cpi->rc.perc_arf_usage < 15 && cpi->oxcf.speed >= 5); // Don't use alt-ref for this group under certain conditions. if (arf_usage_low || (rc->high_source_sad_lagindex > 0 && rc->high_source_sad_lagindex <= rc->frames_till_gf_update_due) || (avg_source_sad_lag > 3 * sad_thresh1 >> 3)) { rc->source_alt_ref_pending = 0; rc->alt_ref_gf_group = 0; } else { rc->source_alt_ref_pending = 1; rc->alt_ref_gf_group = 1; // If alt-ref is used for this gf group, limit the interval. if (rc->baseline_gf_interval > 12) { rc->baseline_gf_interval = 12; rc->frames_till_gf_update_due = rc->baseline_gf_interval; } } } target = calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); } rc->prev_avg_source_sad_lag = avg_source_sad_lag; } // Compute average source sad (temporal sad: between current source and // previous source) over a subset of superblocks. Use this is detect big changes // in content and allow rate control to react. // This function also handles special case of lag_in_frames, to measure content // level in #future frames set by the lag_in_frames. void vp9_scene_detection_onepass(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; YV12_BUFFER_CONFIG const *unscaled_src = cpi->un_scaled_source; YV12_BUFFER_CONFIG const *unscaled_last_src = cpi->unscaled_last_source; uint8_t *src_y; int src_ystride; int src_width; int src_height; uint8_t *last_src_y; int last_src_ystride; int last_src_width; int last_src_height; if (cpi->un_scaled_source == NULL || cpi->unscaled_last_source == NULL || (cpi->use_svc && cpi->svc.current_superframe == 0)) return; src_y = unscaled_src->y_buffer; src_ystride = unscaled_src->y_stride; src_width = unscaled_src->y_width; src_height = unscaled_src->y_height; last_src_y = unscaled_last_src->y_buffer; last_src_ystride = unscaled_last_src->y_stride; last_src_width = unscaled_last_src->y_width; last_src_height = unscaled_last_src->y_height; #if CONFIG_VP9_HIGHBITDEPTH if (cm->use_highbitdepth) return; #endif rc->high_source_sad = 0; rc->high_num_blocks_with_motion = 0; // For SVC: scene detection is only checked on first spatial layer of // the superframe using the original/unscaled resolutions. if (cpi->svc.spatial_layer_id == cpi->svc.first_spatial_layer_to_encode && src_width == last_src_width && src_height == last_src_height) { YV12_BUFFER_CONFIG *frames[MAX_LAG_BUFFERS] = { NULL }; int num_mi_cols = cm->mi_cols; int num_mi_rows = cm->mi_rows; int start_frame = 0; int frames_to_buffer = 1; int frame = 0; int scene_cut_force_key_frame = 0; int num_zero_temp_sad = 0; uint64_t avg_sad_current = 0; uint32_t min_thresh = 10000; float thresh = 8.0f; uint32_t thresh_key = 140000; if (cpi->oxcf.speed <= 5) thresh_key = 240000; if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) min_thresh = 65000; if (cpi->oxcf.rc_mode == VPX_VBR) thresh = 2.1f; if (cpi->use_svc && cpi->svc.number_spatial_layers > 1) { const int aligned_width = ALIGN_POWER_OF_TWO(src_width, MI_SIZE_LOG2); const int aligned_height = ALIGN_POWER_OF_TWO(src_height, MI_SIZE_LOG2); num_mi_cols = aligned_width >> MI_SIZE_LOG2; num_mi_rows = aligned_height >> MI_SIZE_LOG2; } if (cpi->oxcf.lag_in_frames > 0) { frames_to_buffer = (cm->current_video_frame == 1) ? (int)vp9_lookahead_depth(cpi->lookahead) - 1 : 2; start_frame = (int)vp9_lookahead_depth(cpi->lookahead) - 1; for (frame = 0; frame < frames_to_buffer; ++frame) { const int lagframe_idx = start_frame - frame; if (lagframe_idx >= 0) { struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, lagframe_idx); frames[frame] = &buf->img; } } // The avg_sad for this current frame is the value of frame#1 // (first future frame) from previous frame. avg_sad_current = rc->avg_source_sad[1]; if (avg_sad_current > VPXMAX(min_thresh, (unsigned int)(rc->avg_source_sad[0] * thresh)) && cm->current_video_frame > (unsigned int)cpi->oxcf.lag_in_frames) rc->high_source_sad = 1; else rc->high_source_sad = 0; if (rc->high_source_sad && avg_sad_current > thresh_key) scene_cut_force_key_frame = 1; // Update recursive average for current frame. if (avg_sad_current > 0) rc->avg_source_sad[0] = (3 * rc->avg_source_sad[0] + avg_sad_current) >> 2; // Shift back data, starting at frame#1. for (frame = 1; frame < cpi->oxcf.lag_in_frames - 1; ++frame) rc->avg_source_sad[frame] = rc->avg_source_sad[frame + 1]; } for (frame = 0; frame < frames_to_buffer; ++frame) { if (cpi->oxcf.lag_in_frames == 0 || (frames[frame] != NULL && frames[frame + 1] != NULL && frames[frame]->y_width == frames[frame + 1]->y_width && frames[frame]->y_height == frames[frame + 1]->y_height)) { int sbi_row, sbi_col; const int lagframe_idx = (cpi->oxcf.lag_in_frames == 0) ? 0 : start_frame - frame + 1; const BLOCK_SIZE bsize = BLOCK_64X64; // Loop over sub-sample of frame, compute average sad over 64x64 blocks. uint64_t avg_sad = 0; uint64_t tmp_sad = 0; int num_samples = 0; int sb_cols = (num_mi_cols + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; int sb_rows = (num_mi_rows + MI_BLOCK_SIZE - 1) / MI_BLOCK_SIZE; if (cpi->oxcf.lag_in_frames > 0) { src_y = frames[frame]->y_buffer; src_ystride = frames[frame]->y_stride; last_src_y = frames[frame + 1]->y_buffer; last_src_ystride = frames[frame + 1]->y_stride; } num_zero_temp_sad = 0; for (sbi_row = 0; sbi_row < sb_rows; ++sbi_row) { for (sbi_col = 0; sbi_col < sb_cols; ++sbi_col) { // Checker-board pattern, ignore boundary. if (((sbi_row > 0 && sbi_col > 0) && (sbi_row < sb_rows - 1 && sbi_col < sb_cols - 1) && ((sbi_row % 2 == 0 && sbi_col % 2 == 0) || (sbi_row % 2 != 0 && sbi_col % 2 != 0)))) { tmp_sad = cpi->fn_ptr[bsize].sdf(src_y, src_ystride, last_src_y, last_src_ystride); avg_sad += tmp_sad; num_samples++; if (tmp_sad == 0) num_zero_temp_sad++; } src_y += 64; last_src_y += 64; } src_y += (src_ystride << 6) - (sb_cols << 6); last_src_y += (last_src_ystride << 6) - (sb_cols << 6); } if (num_samples > 0) avg_sad = avg_sad / num_samples; // Set high_source_sad flag if we detect very high increase in avg_sad // between current and previous frame value(s). Use minimum threshold // for cases where there is small change from content that is completely // static. if (lagframe_idx == 0) { if (avg_sad > VPXMAX(min_thresh, (unsigned int)(rc->avg_source_sad[0] * thresh)) && rc->frames_since_key > 1 + cpi->svc.number_spatial_layers && num_zero_temp_sad < 3 * (num_samples >> 2)) rc->high_source_sad = 1; else rc->high_source_sad = 0; if (rc->high_source_sad && avg_sad > thresh_key) scene_cut_force_key_frame = 1; if (avg_sad > 0 || cpi->oxcf.rc_mode == VPX_CBR) rc->avg_source_sad[0] = (3 * rc->avg_source_sad[0] + avg_sad) >> 2; } else { rc->avg_source_sad[lagframe_idx] = avg_sad; } if (num_zero_temp_sad < (3 * num_samples >> 2)) rc->high_num_blocks_with_motion = 1; } } // For CBR non-screen content mode, check if we should reset the rate // control. Reset is done if high_source_sad is detected and the rate // control is at very low QP with rate correction factor at min level. if (cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN && !cpi->use_svc) { if (rc->high_source_sad && rc->last_q[INTER_FRAME] == rc->best_quality && rc->avg_frame_qindex[INTER_FRAME] < (rc->best_quality << 1) && rc->rate_correction_factors[INTER_NORMAL] == MIN_BPB_FACTOR) { rc->rate_correction_factors[INTER_NORMAL] = 0.5; rc->avg_frame_qindex[INTER_FRAME] = rc->worst_quality; rc->buffer_level = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level; rc->reset_high_source_sad = 1; } if (cm->frame_type != KEY_FRAME && rc->reset_high_source_sad) rc->this_frame_target = rc->avg_frame_bandwidth; } // For SVC the new (updated) avg_source_sad[0] for the current superframe // updates the setting for all layers. if (cpi->use_svc) { int sl, tl; SVC *const svc = &cpi->svc; for (sl = 0; sl < svc->number_spatial_layers; ++sl) for (tl = 0; tl < svc->number_temporal_layers; ++tl) { int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lrc->avg_source_sad[0] = rc->avg_source_sad[0]; } } // For VBR, under scene change/high content change, force golden refresh. if (cpi->oxcf.rc_mode == VPX_VBR && cm->frame_type != KEY_FRAME && rc->high_source_sad && rc->frames_to_key > 3 && rc->count_last_scene_change > 4 && cpi->ext_refresh_frame_flags_pending == 0) { int target; cpi->refresh_golden_frame = 1; if (scene_cut_force_key_frame) cm->frame_type = KEY_FRAME; rc->source_alt_ref_pending = 0; if (cpi->sf.use_altref_onepass && cpi->oxcf.enable_auto_arf) rc->source_alt_ref_pending = 1; rc->gfu_boost = DEFAULT_GF_BOOST >> 1; rc->baseline_gf_interval = VPXMIN(20, VPXMAX(10, rc->baseline_gf_interval)); adjust_gfint_frame_constraint(cpi, rc->frames_to_key); rc->frames_till_gf_update_due = rc->baseline_gf_interval; target = calc_pframe_target_size_one_pass_vbr(cpi); vp9_rc_set_frame_target(cpi, target); rc->count_last_scene_change = 0; } else { rc->count_last_scene_change++; } // If lag_in_frame is used, set the gf boost and interval. if (cpi->oxcf.lag_in_frames > 0) adjust_gf_boost_lag_one_pass_vbr(cpi, avg_sad_current); } } // Test if encoded frame will significantly overshoot the target bitrate, and // if so, set the QP, reset/adjust some rate control parameters, and return 1. // frame_size = -1 means frame has not been encoded. int vp9_encodedframe_overshoot(VP9_COMP *cpi, int frame_size, int *q) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; SPEED_FEATURES *const sf = &cpi->sf; int thresh_qp = 7 * (rc->worst_quality >> 3); int thresh_rate = rc->avg_frame_bandwidth << 3; // Lower thresh_qp for video (more overshoot at lower Q) to be // more conservative for video. if (cpi->oxcf.content != VP9E_CONTENT_SCREEN) thresh_qp = 3 * (rc->worst_quality >> 2); // If this decision is not based on an encoded frame size but just on // scene/slide change detection (i.e., re_encode_overshoot_cbr_rt == // FAST_DETECTION_MAXQ), for now skip the (frame_size > thresh_rate) // condition in this case. // TODO(marpan): Use a better size/rate condition for this case and // adjust thresholds. if ((sf->overshoot_detection_cbr_rt == FAST_DETECTION_MAXQ || frame_size > thresh_rate) && cm->base_qindex < thresh_qp) { double rate_correction_factor = cpi->rc.rate_correction_factors[INTER_NORMAL]; const int target_size = cpi->rc.avg_frame_bandwidth; double new_correction_factor; int target_bits_per_mb; double q2; int enumerator; // Force a re-encode, and for now use max-QP. *q = cpi->rc.worst_quality; cpi->cyclic_refresh->counter_encode_maxq_scene_change = 0; cpi->rc.re_encode_maxq_scene_change = 1; // If the frame_size is much larger than the threshold (big content change) // and the encoded frame used alot of Intra modes, then force hybrid_intra // encoding for the re-encode on this scene change. hybrid_intra will // use rd-based intra mode selection for small blocks. if (sf->overshoot_detection_cbr_rt == RE_ENCODE_MAXQ && frame_size > (thresh_rate << 1) && cpi->svc.spatial_layer_id == 0) { MODE_INFO **mi = cm->mi_grid_visible; int sum_intra_usage = 0; int mi_row, mi_col; int tot = 0; for (mi_row = 0; mi_row < cm->mi_rows; mi_row++) { for (mi_col = 0; mi_col < cm->mi_cols; mi_col++) { if (mi[0]->ref_frame[0] == INTRA_FRAME) sum_intra_usage++; tot++; mi++; } mi += 8; } sum_intra_usage = 100 * sum_intra_usage / (cm->mi_rows * cm->mi_cols); if (sum_intra_usage > 60) cpi->rc.hybrid_intra_scene_change = 1; } // Adjust avg_frame_qindex, buffer_level, and rate correction factors, as // these parameters will affect QP selection for subsequent frames. If they // have settled down to a very different (low QP) state, then not adjusting // them may cause next frame to select low QP and overshoot again. cpi->rc.avg_frame_qindex[INTER_FRAME] = *q; rc->buffer_level = rc->optimal_buffer_level; rc->bits_off_target = rc->optimal_buffer_level; // Reset rate under/over-shoot flags. cpi->rc.rc_1_frame = 0; cpi->rc.rc_2_frame = 0; // Adjust rate correction factor. target_bits_per_mb = (int)(((uint64_t)target_size << BPER_MB_NORMBITS) / cm->MBs); // Rate correction factor based on target_bits_per_mb and qp (==max_QP). // This comes from the inverse computation of vp9_rc_bits_per_mb(). q2 = vp9_convert_qindex_to_q(*q, cm->bit_depth); enumerator = 1800000; // Factor for inter frame. enumerator += (int)(enumerator * q2) >> 12; new_correction_factor = (double)target_bits_per_mb * q2 / enumerator; if (new_correction_factor > rate_correction_factor) { rate_correction_factor = VPXMIN(2.0 * rate_correction_factor, new_correction_factor); if (rate_correction_factor > MAX_BPB_FACTOR) rate_correction_factor = MAX_BPB_FACTOR; cpi->rc.rate_correction_factors[INTER_NORMAL] = rate_correction_factor; } // For temporal layers, reset the rate control parametes across all // temporal layers. If the first_spatial_layer_to_encode > 0, then this // superframe has skipped lower base layers. So in this case we should also // reset and force max-q for spatial layers < first_spatial_layer_to_encode. if (cpi->use_svc) { int tl = 0; int sl = 0; SVC *svc = &cpi->svc; for (sl = 0; sl < svc->first_spatial_layer_to_encode; ++sl) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { const int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->avg_frame_qindex[INTER_FRAME] = *q; lrc->buffer_level = lrc->optimal_buffer_level; lrc->bits_off_target = lrc->optimal_buffer_level; lrc->rc_1_frame = 0; lrc->rc_2_frame = 0; lrc->rate_correction_factors[INTER_NORMAL] = rate_correction_factor; lrc->force_max_q = 1; } } } return 1; } else { return 0; } } libvpx-1.8.2/vp9/encoder/vp9_ratectrl.h000066400000000000000000000266471357355204000177750ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_RATECTRL_H_ #define VPX_VP9_ENCODER_VP9_RATECTRL_H_ #include "vpx/vpx_codec.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_lookahead.h" #ifdef __cplusplus extern "C" { #endif // Used to control aggressive VBR mode. // #define AGGRESSIVE_VBR 1 // Bits Per MB at different Q (Multiplied by 512) #define BPER_MB_NORMBITS 9 #define MIN_GF_INTERVAL 4 #define MAX_GF_INTERVAL 16 #define FIXED_GF_INTERVAL 8 // Used in some testing modes only #define ONEHALFONLY_RESIZE 0 #define FRAME_OVERHEAD_BITS 200 // Threshold used to define a KF group as static (e.g. a slide show). // Essentially this means that no frame in the group has more than 1% of MBs // that are not marked as coded with 0,0 motion in the first pass. #define STATIC_KF_GROUP_THRESH 99 // The maximum duration of a GF group that is static (for example a slide show). #define MAX_STATIC_GF_GROUP_LENGTH 250 typedef enum { INTER_NORMAL = 0, INTER_HIGH = 1, GF_ARF_LOW = 2, GF_ARF_STD = 3, KF_STD = 4, RATE_FACTOR_LEVELS = 5 } RATE_FACTOR_LEVEL; // Internal frame scaling level. typedef enum { UNSCALED = 0, // Frame is unscaled. SCALE_STEP1 = 1, // First-level down-scaling. FRAME_SCALE_STEPS } FRAME_SCALE_LEVEL; typedef enum { NO_RESIZE = 0, DOWN_THREEFOUR = 1, // From orig to 3/4. DOWN_ONEHALF = 2, // From orig or 3/4 to 1/2. UP_THREEFOUR = -1, // From 1/2 to 3/4. UP_ORIG = -2, // From 1/2 or 3/4 to orig. } RESIZE_ACTION; typedef enum { ORIG = 0, THREE_QUARTER = 1, ONE_HALF = 2 } RESIZE_STATE; // Frame dimensions multiplier wrt the native frame size, in 1/16ths, // specified for the scale-up case. // e.g. 24 => 16/24 = 2/3 of native size. The restriction to 1/16th is // intended to match the capabilities of the normative scaling filters, // giving precedence to the up-scaling accuracy. static const int frame_scale_factor[FRAME_SCALE_STEPS] = { 16, 24 }; // Multiplier of the target rate to be used as threshold for triggering scaling. static const double rate_thresh_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 }; // Scale dependent Rate Correction Factor multipliers. Compensates for the // greater number of bits per pixel generated in down-scaled frames. static const double rcf_mult[FRAME_SCALE_STEPS] = { 1.0, 2.0 }; typedef struct { // Rate targetting variables int base_frame_target; // A baseline frame target before adjustment // for previous under or over shoot. int this_frame_target; // Actual frame target after rc adjustment. int projected_frame_size; int sb64_target_rate; int last_q[FRAME_TYPES]; // Separate values for Intra/Inter int last_boosted_qindex; // Last boosted GF/KF/ARF q int last_kf_qindex; // Q index of the last key frame coded. int gfu_boost; int last_boost; int kf_boost; double rate_correction_factors[RATE_FACTOR_LEVELS]; int frames_since_golden; int frames_till_gf_update_due; int min_gf_interval; int max_gf_interval; int static_scene_max_gf_interval; int baseline_gf_interval; int constrained_gf_group; int frames_to_key; int frames_since_key; int this_key_frame_forced; int next_key_frame_forced; int source_alt_ref_pending; int source_alt_ref_active; int is_src_frame_alt_ref; int avg_frame_bandwidth; // Average frame size target for clip int min_frame_bandwidth; // Minimum allocation used for any frame int max_frame_bandwidth; // Maximum burst rate allowed for a frame. int ni_av_qi; int ni_tot_qi; int ni_frames; int avg_frame_qindex[FRAME_TYPES]; double tot_q; double avg_q; int64_t buffer_level; int64_t bits_off_target; int64_t vbr_bits_off_target; int64_t vbr_bits_off_target_fast; int decimation_factor; int decimation_count; int rolling_target_bits; int rolling_actual_bits; int long_rolling_target_bits; int long_rolling_actual_bits; int rate_error_estimate; int64_t total_actual_bits; int64_t total_target_bits; int64_t total_target_vs_actual; int worst_quality; int best_quality; int64_t starting_buffer_level; int64_t optimal_buffer_level; int64_t maximum_buffer_size; // rate control history for last frame(1) and the frame before(2). // -1: undershot // 1: overshoot // 0: not initialized. int rc_1_frame; int rc_2_frame; int q_1_frame; int q_2_frame; // Keep track of the last target average frame bandwidth. int last_avg_frame_bandwidth; // Auto frame-scaling variables. FRAME_SCALE_LEVEL frame_size_selector; FRAME_SCALE_LEVEL next_frame_size_selector; int frame_width[FRAME_SCALE_STEPS]; int frame_height[FRAME_SCALE_STEPS]; int rf_level_maxq[RATE_FACTOR_LEVELS]; int fac_active_worst_inter; int fac_active_worst_gf; uint64_t avg_source_sad[MAX_LAG_BUFFERS]; uint64_t prev_avg_source_sad_lag; int high_source_sad_lagindex; int high_num_blocks_with_motion; int alt_ref_gf_group; int last_frame_is_src_altref; int high_source_sad; int count_last_scene_change; int hybrid_intra_scene_change; int re_encode_maxq_scene_change; int avg_frame_low_motion; int af_ratio_onepass_vbr; int force_qpmin; int reset_high_source_sad; double perc_arf_usage; int force_max_q; // Last frame was dropped post encode on scene change. int last_post_encode_dropped_scene_change; // Enable post encode frame dropping for screen content. Only enabled when // ext_use_post_encode_drop is enabled by user. int use_post_encode_drop; // External flag to enable post encode frame dropping, controlled by user. int ext_use_post_encode_drop; int damped_adjustment[RATE_FACTOR_LEVELS]; double arf_active_best_quality_adjustment_factor; int arf_increase_active_best_quality; int preserve_arf_as_gld; int preserve_next_arf_as_gld; int show_arf_as_gld; } RATE_CONTROL; struct VP9_COMP; struct VP9EncoderConfig; void vp9_rc_init(const struct VP9EncoderConfig *oxcf, int pass, RATE_CONTROL *rc); int vp9_estimate_bits_at_q(FRAME_TYPE frame_type, int q, int mbs, double correction_factor, vpx_bit_depth_t bit_depth); double vp9_convert_qindex_to_q(int qindex, vpx_bit_depth_t bit_depth); int vp9_convert_q_to_qindex(double q_val, vpx_bit_depth_t bit_depth); void vp9_rc_init_minq_luts(void); int vp9_rc_get_default_min_gf_interval(int width, int height, double framerate); // Note vp9_rc_get_default_max_gf_interval() requires the min_gf_interval to // be passed in to ensure that the max_gf_interval returned is at least as big // as that. int vp9_rc_get_default_max_gf_interval(double framerate, int min_gf_interval); // Generally at the high level, the following flow is expected // to be enforced for rate control: // First call per frame, one of: // vp9_rc_get_one_pass_vbr_params() // vp9_rc_get_one_pass_cbr_params() // vp9_rc_get_svc_params() // vp9_rc_get_first_pass_params() // vp9_rc_get_second_pass_params() // depending on the usage to set the rate control encode parameters desired. // // Then, call encode_frame_to_data_rate() to perform the // actual encode. This function will in turn call encode_frame() // one or more times, followed by one of: // vp9_rc_postencode_update() // vp9_rc_postencode_update_drop_frame() // // The majority of rate control parameters are only expected // to be set in the vp9_rc_get_..._params() functions and // updated during the vp9_rc_postencode_update...() functions. // The only exceptions are vp9_rc_drop_frame() and // vp9_rc_update_rate_correction_factors() functions. // Functions to set parameters for encoding before the actual // encode_frame_to_data_rate() function. void vp9_rc_get_one_pass_vbr_params(struct VP9_COMP *cpi); void vp9_rc_get_one_pass_cbr_params(struct VP9_COMP *cpi); void vp9_rc_get_svc_params(struct VP9_COMP *cpi); // Post encode update of the rate control parameters based // on bytes used void vp9_rc_postencode_update(struct VP9_COMP *cpi, uint64_t bytes_used); // Post encode update of the rate control parameters for dropped frames void vp9_rc_postencode_update_drop_frame(struct VP9_COMP *cpi); // Updates rate correction factors // Changes only the rate correction factors in the rate control structure. void vp9_rc_update_rate_correction_factors(struct VP9_COMP *cpi); // Post encode drop for CBR mode. int post_encode_drop_cbr(struct VP9_COMP *cpi, size_t *size); int vp9_test_drop(struct VP9_COMP *cpi); // Decide if we should drop this frame: For 1-pass CBR. // Changes only the decimation count in the rate control structure int vp9_rc_drop_frame(struct VP9_COMP *cpi); // Computes frame size bounds. void vp9_rc_compute_frame_size_bounds(const struct VP9_COMP *cpi, int frame_target, int *frame_under_shoot_limit, int *frame_over_shoot_limit); // Picks q and q bounds given the target for bits int vp9_rc_pick_q_and_bounds(const struct VP9_COMP *cpi, int *bottom_index, int *top_index); // Estimates q to achieve a target bits per frame int vp9_rc_regulate_q(const struct VP9_COMP *cpi, int target_bits_per_frame, int active_best_quality, int active_worst_quality); // Estimates bits per mb for a given qindex and correction factor. int vp9_rc_bits_per_mb(FRAME_TYPE frame_type, int qindex, double correction_factor, vpx_bit_depth_t bit_depth); // Clamping utilities for bitrate targets for iframes and pframes. int vp9_rc_clamp_iframe_target_size(const struct VP9_COMP *const cpi, int target); int vp9_rc_clamp_pframe_target_size(const struct VP9_COMP *const cpi, int target); // Utility to set frame_target into the RATE_CONTROL structure // This function is called only from the vp9_rc_get_..._params() functions. void vp9_rc_set_frame_target(struct VP9_COMP *cpi, int target); // Computes a q delta (in "q index" terms) to get from a starting q value // to a target q value int vp9_compute_qdelta(const RATE_CONTROL *rc, double qstart, double qtarget, vpx_bit_depth_t bit_depth); // Computes a q delta (in "q index" terms) to get from a starting q value // to a value that should equate to the given rate ratio. int vp9_compute_qdelta_by_rate(const RATE_CONTROL *rc, FRAME_TYPE frame_type, int qindex, double rate_target_ratio, vpx_bit_depth_t bit_depth); int vp9_frame_type_qdelta(const struct VP9_COMP *cpi, int rf_level, int q); void vp9_rc_update_framerate(struct VP9_COMP *cpi); void vp9_rc_set_gf_interval_range(const struct VP9_COMP *const cpi, RATE_CONTROL *const rc); void vp9_set_target_rate(struct VP9_COMP *cpi); int vp9_resize_one_pass_cbr(struct VP9_COMP *cpi); void vp9_scene_detection_onepass(struct VP9_COMP *cpi); int vp9_encodedframe_overshoot(struct VP9_COMP *cpi, int frame_size, int *q); void vp9_configure_buffer_updates(struct VP9_COMP *cpi, int gf_group_index); void vp9_estimate_qp_gop(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_RATECTRL_H_ libvpx-1.8.2/vp9/encoder/vp9_rd.c000066400000000000000000000704451357355204000165500ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vp9_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/bitops.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_seg_common.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_tokenize.h" #define RD_THRESH_POW 1.25 // Factor to weigh the rate for switchable interp filters. #define SWITCHABLE_INTERP_RATE_FACTOR 1 void vp9_rd_cost_reset(RD_COST *rd_cost) { rd_cost->rate = INT_MAX; rd_cost->dist = INT64_MAX; rd_cost->rdcost = INT64_MAX; } void vp9_rd_cost_init(RD_COST *rd_cost) { rd_cost->rate = 0; rd_cost->dist = 0; rd_cost->rdcost = 0; } int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist) { assert(mult >= 0); assert(div > 0); if (rate >= 0 && dist >= 0) { return RDCOST(mult, div, rate, dist); } if (rate >= 0 && dist < 0) { return RDCOST_NEG_D(mult, div, rate, -dist); } if (rate < 0 && dist >= 0) { return RDCOST_NEG_R(mult, div, -rate, dist); } return -RDCOST(mult, div, -rate, -dist); } void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost) { if (rd_cost->rate < INT_MAX && rd_cost->dist < INT64_MAX) { rd_cost->rdcost = vp9_calculate_rd_cost(mult, div, rd_cost->rate, rd_cost->dist); } else { vp9_rd_cost_reset(rd_cost); } } // The baseline rd thresholds for breaking out of the rd loop for // certain modes are assumed to be based on 8x8 blocks. // This table is used to correct for block size. // The factors here are << 2 (2 = x0.5, 32 = x8 etc). static const uint8_t rd_thresh_block_size_factor[BLOCK_SIZES] = { 2, 3, 3, 4, 6, 6, 8, 12, 12, 16, 24, 24, 32 }; static void fill_mode_costs(VP9_COMP *cpi) { const FRAME_CONTEXT *const fc = cpi->common.fc; int i, j; for (i = 0; i < INTRA_MODES; ++i) { for (j = 0; j < INTRA_MODES; ++j) { vp9_cost_tokens(cpi->y_mode_costs[i][j], vp9_kf_y_mode_prob[i][j], vp9_intra_mode_tree); } } vp9_cost_tokens(cpi->mbmode_cost, fc->y_mode_prob[1], vp9_intra_mode_tree); for (i = 0; i < INTRA_MODES; ++i) { vp9_cost_tokens(cpi->intra_uv_mode_cost[KEY_FRAME][i], vp9_kf_uv_mode_prob[i], vp9_intra_mode_tree); vp9_cost_tokens(cpi->intra_uv_mode_cost[INTER_FRAME][i], fc->uv_mode_prob[i], vp9_intra_mode_tree); } for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) { vp9_cost_tokens(cpi->switchable_interp_costs[i], fc->switchable_interp_prob[i], vp9_switchable_interp_tree); } for (i = TX_8X8; i < TX_SIZES; ++i) { for (j = 0; j < TX_SIZE_CONTEXTS; ++j) { const vpx_prob *tx_probs = get_tx_probs(i, j, &fc->tx_probs); int k; for (k = 0; k <= i; ++k) { int cost = 0; int m; for (m = 0; m <= k - (k == i); ++m) { if (m == k) cost += vp9_cost_zero(tx_probs[m]); else cost += vp9_cost_one(tx_probs[m]); } cpi->tx_size_cost[i - 1][j][k] = cost; } } } } static void fill_token_costs(vp9_coeff_cost *c, vp9_coeff_probs_model (*p)[PLANE_TYPES]) { int i, j, k, l; TX_SIZE t; for (t = TX_4X4; t <= TX_32X32; ++t) for (i = 0; i < PLANE_TYPES; ++i) for (j = 0; j < REF_TYPES; ++j) for (k = 0; k < COEF_BANDS; ++k) for (l = 0; l < BAND_COEFF_CONTEXTS(k); ++l) { vpx_prob probs[ENTROPY_NODES]; vp9_model_to_full_probs(p[t][i][j][k][l], probs); vp9_cost_tokens((int *)c[t][i][j][k][0][l], probs, vp9_coef_tree); vp9_cost_tokens_skip((int *)c[t][i][j][k][1][l], probs, vp9_coef_tree); assert(c[t][i][j][k][0][l][EOB_TOKEN] == c[t][i][j][k][1][l][EOB_TOKEN]); } } // Values are now correlated to quantizer. static int sad_per_bit16lut_8[QINDEX_RANGE]; static int sad_per_bit4lut_8[QINDEX_RANGE]; #if CONFIG_VP9_HIGHBITDEPTH static int sad_per_bit16lut_10[QINDEX_RANGE]; static int sad_per_bit4lut_10[QINDEX_RANGE]; static int sad_per_bit16lut_12[QINDEX_RANGE]; static int sad_per_bit4lut_12[QINDEX_RANGE]; #endif static void init_me_luts_bd(int *bit16lut, int *bit4lut, int range, vpx_bit_depth_t bit_depth) { int i; // Initialize the sad lut tables using a formulaic calculation for now. // This is to make it easier to resolve the impact of experimental changes // to the quantizer tables. for (i = 0; i < range; i++) { const double q = vp9_convert_qindex_to_q(i, bit_depth); bit16lut[i] = (int)(0.0418 * q + 2.4107); bit4lut[i] = (int)(0.063 * q + 2.742); } } void vp9_init_me_luts(void) { init_me_luts_bd(sad_per_bit16lut_8, sad_per_bit4lut_8, QINDEX_RANGE, VPX_BITS_8); #if CONFIG_VP9_HIGHBITDEPTH init_me_luts_bd(sad_per_bit16lut_10, sad_per_bit4lut_10, QINDEX_RANGE, VPX_BITS_10); init_me_luts_bd(sad_per_bit16lut_12, sad_per_bit4lut_12, QINDEX_RANGE, VPX_BITS_12); #endif } static const int rd_boost_factor[16] = { 64, 32, 32, 32, 24, 16, 12, 12, 8, 8, 4, 4, 2, 2, 1, 0 }; // Note that the element below for frame type "USE_BUF_FRAME", which indicates // that the show frame flag is set, should not be used as no real frame // is encoded so we should not reach here. However, a dummy value // is inserted here to make sure the data structure has the right number // of values assigned. static const int rd_frame_type_factor[FRAME_UPDATE_TYPES] = { 128, 144, 128, 128, 144, 144 }; int vp9_compute_rd_mult_based_on_qindex(const VP9_COMP *cpi, int qindex) { // largest dc_quant is 21387, therefore rdmult should always fit in int32_t const int q = vp9_dc_quant(qindex, 0, cpi->common.bit_depth); uint32_t rdmult = q * q; if (cpi->common.frame_type != KEY_FRAME) { if (qindex < 128) rdmult = rdmult * 4; else if (qindex < 190) rdmult = rdmult * 4 + rdmult / 2; else rdmult = rdmult * 3; } else { if (qindex < 64) rdmult = rdmult * 4; else if (qindex <= 128) rdmult = rdmult * 3 + rdmult / 2; else if (qindex < 190) rdmult = rdmult * 4 + rdmult / 2; else rdmult = rdmult * 7 + rdmult / 2; } #if CONFIG_VP9_HIGHBITDEPTH switch (cpi->common.bit_depth) { case VPX_BITS_10: rdmult = ROUND_POWER_OF_TWO(rdmult, 4); break; case VPX_BITS_12: rdmult = ROUND_POWER_OF_TWO(rdmult, 8); break; default: break; } #endif // CONFIG_VP9_HIGHBITDEPTH return rdmult > 0 ? rdmult : 1; } static int modulate_rdmult(const VP9_COMP *cpi, int rdmult) { int64_t rdmult_64 = rdmult; if (cpi->oxcf.pass == 2 && (cpi->common.frame_type != KEY_FRAME)) { const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const FRAME_UPDATE_TYPE frame_type = gf_group->update_type[gf_group->index]; const int gfu_boost = cpi->multi_layer_arf ? gf_group->gfu_boost[gf_group->index] : cpi->rc.gfu_boost; const int boost_index = VPXMIN(15, (gfu_boost / 100)); rdmult_64 = (rdmult_64 * rd_frame_type_factor[frame_type]) >> 7; rdmult_64 += ((rdmult_64 * rd_boost_factor[boost_index]) >> 7); } return (int)rdmult_64; } int vp9_compute_rd_mult(const VP9_COMP *cpi, int qindex) { int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, qindex); return modulate_rdmult(cpi, rdmult); } int vp9_get_adaptive_rdmult(const VP9_COMP *cpi, double beta) { int rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, cpi->common.base_qindex); rdmult = (int)((double)rdmult / beta); rdmult = rdmult > 0 ? rdmult : 1; return modulate_rdmult(cpi, rdmult); } static int compute_rd_thresh_factor(int qindex, vpx_bit_depth_t bit_depth) { double q; #if CONFIG_VP9_HIGHBITDEPTH switch (bit_depth) { case VPX_BITS_8: q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; break; case VPX_BITS_10: q = vp9_dc_quant(qindex, 0, VPX_BITS_10) / 16.0; break; default: assert(bit_depth == VPX_BITS_12); q = vp9_dc_quant(qindex, 0, VPX_BITS_12) / 64.0; break; } #else (void)bit_depth; q = vp9_dc_quant(qindex, 0, VPX_BITS_8) / 4.0; #endif // CONFIG_VP9_HIGHBITDEPTH // TODO(debargha): Adjust the function below. return VPXMAX((int)(pow(q, RD_THRESH_POW) * 5.12), 8); } void vp9_initialize_me_consts(VP9_COMP *cpi, MACROBLOCK *x, int qindex) { #if CONFIG_VP9_HIGHBITDEPTH switch (cpi->common.bit_depth) { case VPX_BITS_8: x->sadperbit16 = sad_per_bit16lut_8[qindex]; x->sadperbit4 = sad_per_bit4lut_8[qindex]; break; case VPX_BITS_10: x->sadperbit16 = sad_per_bit16lut_10[qindex]; x->sadperbit4 = sad_per_bit4lut_10[qindex]; break; default: assert(cpi->common.bit_depth == VPX_BITS_12); x->sadperbit16 = sad_per_bit16lut_12[qindex]; x->sadperbit4 = sad_per_bit4lut_12[qindex]; break; } #else (void)cpi; x->sadperbit16 = sad_per_bit16lut_8[qindex]; x->sadperbit4 = sad_per_bit4lut_8[qindex]; #endif // CONFIG_VP9_HIGHBITDEPTH } static void set_block_thresholds(const VP9_COMMON *cm, RD_OPT *rd) { int i, bsize, segment_id; for (segment_id = 0; segment_id < MAX_SEGMENTS; ++segment_id) { const int qindex = clamp(vp9_get_qindex(&cm->seg, segment_id, cm->base_qindex) + cm->y_dc_delta_q, 0, MAXQ); const int q = compute_rd_thresh_factor(qindex, cm->bit_depth); for (bsize = 0; bsize < BLOCK_SIZES; ++bsize) { // Threshold here seems unnecessarily harsh but fine given actual // range of values used for cpi->sf.thresh_mult[]. const int t = q * rd_thresh_block_size_factor[bsize]; const int thresh_max = INT_MAX / t; if (bsize >= BLOCK_8X8) { for (i = 0; i < MAX_MODES; ++i) rd->threshes[segment_id][bsize][i] = rd->thresh_mult[i] < thresh_max ? rd->thresh_mult[i] * t / 4 : INT_MAX; } else { for (i = 0; i < MAX_REFS; ++i) rd->threshes[segment_id][bsize][i] = rd->thresh_mult_sub8x8[i] < thresh_max ? rd->thresh_mult_sub8x8[i] * t / 4 : INT_MAX; } } } } void vp9_build_inter_mode_cost(VP9_COMP *cpi) { const VP9_COMMON *const cm = &cpi->common; int i; for (i = 0; i < INTER_MODE_CONTEXTS; ++i) { vp9_cost_tokens((int *)cpi->inter_mode_cost[i], cm->fc->inter_mode_probs[i], vp9_inter_mode_tree); } } void vp9_initialize_rd_consts(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; MACROBLOCK *const x = &cpi->td.mb; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; RD_OPT *const rd = &cpi->rd; int i; vpx_clear_system_state(); rd->RDDIV = RDDIV_BITS; // In bits (to multiply D by 128). rd->RDMULT = vp9_compute_rd_mult(cpi, cm->base_qindex + cm->y_dc_delta_q); set_error_per_bit(x, rd->RDMULT); x->select_tx_size = (cpi->sf.tx_size_search_method == USE_LARGESTALL && cm->frame_type != KEY_FRAME) ? 0 : 1; set_block_thresholds(cm, rd); set_partition_probs(cm, xd); if (cpi->oxcf.pass == 1) { if (!frame_is_intra_only(cm)) vp9_build_nmv_cost_table( x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc, cm->allow_high_precision_mv); } else { if (!cpi->sf.use_nonrd_pick_mode || cm->frame_type == KEY_FRAME) fill_token_costs(x->token_costs, cm->fc->coef_probs); if (cpi->sf.partition_search_type != VAR_BASED_PARTITION || cm->frame_type == KEY_FRAME) { for (i = 0; i < PARTITION_CONTEXTS; ++i) vp9_cost_tokens(cpi->partition_cost[i], get_partition_probs(xd, i), vp9_partition_tree); } if (!cpi->sf.use_nonrd_pick_mode || (cm->current_video_frame & 0x07) == 1 || cm->frame_type == KEY_FRAME) { fill_mode_costs(cpi); if (!frame_is_intra_only(cm)) { vp9_build_nmv_cost_table( x->nmvjointcost, cm->allow_high_precision_mv ? x->nmvcost_hp : x->nmvcost, &cm->fc->nmvc, cm->allow_high_precision_mv); vp9_build_inter_mode_cost(cpi); } } } } // NOTE: The tables below must be of the same size. // The functions described below are sampled at the four most significant // bits of x^2 + 8 / 256. // Normalized rate: // This table models the rate for a Laplacian source with given variance // when quantized with a uniform quantizer with given stepsize. The // closed form expression is: // Rn(x) = H(sqrt(r)) + sqrt(r)*[1 + H(r)/(1 - r)], // where r = exp(-sqrt(2) * x) and x = qpstep / sqrt(variance), // and H(x) is the binary entropy function. static const int rate_tab_q10[] = { 65536, 6086, 5574, 5275, 5063, 4899, 4764, 4651, 4553, 4389, 4255, 4142, 4044, 3958, 3881, 3811, 3748, 3635, 3538, 3453, 3376, 3307, 3244, 3186, 3133, 3037, 2952, 2877, 2809, 2747, 2690, 2638, 2589, 2501, 2423, 2353, 2290, 2232, 2179, 2130, 2084, 2001, 1928, 1862, 1802, 1748, 1698, 1651, 1608, 1530, 1460, 1398, 1342, 1290, 1243, 1199, 1159, 1086, 1021, 963, 911, 864, 821, 781, 745, 680, 623, 574, 530, 490, 455, 424, 395, 345, 304, 269, 239, 213, 190, 171, 154, 126, 104, 87, 73, 61, 52, 44, 38, 28, 21, 16, 12, 10, 8, 6, 5, 3, 2, 1, 1, 1, 0, 0, }; // Normalized distortion: // This table models the normalized distortion for a Laplacian source // with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expression is: // Dn(x) = 1 - 1/sqrt(2) * x / sinh(x/sqrt(2)) // where x = qpstep / sqrt(variance). // Note the actual distortion is Dn * variance. static const int dist_tab_q10[] = { 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 4, 5, 5, 6, 7, 7, 8, 9, 11, 12, 13, 15, 16, 17, 18, 21, 24, 26, 29, 31, 34, 36, 39, 44, 49, 54, 59, 64, 69, 73, 78, 88, 97, 106, 115, 124, 133, 142, 151, 167, 184, 200, 215, 231, 245, 260, 274, 301, 327, 351, 375, 397, 418, 439, 458, 495, 528, 559, 587, 613, 637, 659, 680, 717, 749, 777, 801, 823, 842, 859, 874, 899, 919, 936, 949, 960, 969, 977, 983, 994, 1001, 1006, 1010, 1013, 1015, 1017, 1018, 1020, 1022, 1022, 1023, 1023, 1023, 1024, }; static const int xsq_iq_q10[] = { 0, 4, 8, 12, 16, 20, 24, 28, 32, 40, 48, 56, 64, 72, 80, 88, 96, 112, 128, 144, 160, 176, 192, 208, 224, 256, 288, 320, 352, 384, 416, 448, 480, 544, 608, 672, 736, 800, 864, 928, 992, 1120, 1248, 1376, 1504, 1632, 1760, 1888, 2016, 2272, 2528, 2784, 3040, 3296, 3552, 3808, 4064, 4576, 5088, 5600, 6112, 6624, 7136, 7648, 8160, 9184, 10208, 11232, 12256, 13280, 14304, 15328, 16352, 18400, 20448, 22496, 24544, 26592, 28640, 30688, 32736, 36832, 40928, 45024, 49120, 53216, 57312, 61408, 65504, 73696, 81888, 90080, 98272, 106464, 114656, 122848, 131040, 147424, 163808, 180192, 196576, 212960, 229344, 245728, }; static void model_rd_norm(int xsq_q10, int *r_q10, int *d_q10) { const int tmp = (xsq_q10 >> 2) + 8; const int k = get_msb(tmp) - 3; const int xq = (k << 3) + ((tmp >> k) & 0x7); const int one_q10 = 1 << 10; const int a_q10 = ((xsq_q10 - xsq_iq_q10[xq]) << 10) >> (2 + k); const int b_q10 = one_q10 - a_q10; *r_q10 = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; *d_q10 = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } static void model_rd_norm_vec(int xsq_q10[MAX_MB_PLANE], int r_q10[MAX_MB_PLANE], int d_q10[MAX_MB_PLANE]) { int i; const int one_q10 = 1 << 10; for (i = 0; i < MAX_MB_PLANE; ++i) { const int tmp = (xsq_q10[i] >> 2) + 8; const int k = get_msb(tmp) - 3; const int xq = (k << 3) + ((tmp >> k) & 0x7); const int a_q10 = ((xsq_q10[i] - xsq_iq_q10[xq]) << 10) >> (2 + k); const int b_q10 = one_q10 - a_q10; r_q10[i] = (rate_tab_q10[xq] * b_q10 + rate_tab_q10[xq + 1] * a_q10) >> 10; d_q10[i] = (dist_tab_q10[xq] * b_q10 + dist_tab_q10[xq + 1] * a_q10) >> 10; } } static const uint32_t MAX_XSQ_Q10 = 245727; void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, unsigned int qstep, int *rate, int64_t *dist) { // This function models the rate and distortion for a Laplacian // source with given variance when quantized with a uniform quantizer // with given stepsize. The closed form expressions are in: // Hang and Chen, "Source Model for transform video coder and its // application - Part I: Fundamental Theory", IEEE Trans. Circ. // Sys. for Video Tech., April 1997. if (var == 0) { *rate = 0; *dist = 0; } else { int d_q10, r_q10; const uint64_t xsq_q10_64 = (((uint64_t)qstep * qstep << (n_log2 + 10)) + (var >> 1)) / var; const int xsq_q10 = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); model_rd_norm(xsq_q10, &r_q10, &d_q10); *rate = ROUND_POWER_OF_TWO(r_q10 << n_log2, 10 - VP9_PROB_COST_SHIFT); *dist = (var * (int64_t)d_q10 + 512) >> 10; } } // Implements a fixed length vector form of vp9_model_rd_from_var_lapndz where // vectors are of length MAX_MB_PLANE and all elements of var are non-zero. void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], unsigned int n_log2[MAX_MB_PLANE], unsigned int qstep[MAX_MB_PLANE], int64_t *rate_sum, int64_t *dist_sum) { int i; int xsq_q10[MAX_MB_PLANE], d_q10[MAX_MB_PLANE], r_q10[MAX_MB_PLANE]; for (i = 0; i < MAX_MB_PLANE; ++i) { const uint64_t xsq_q10_64 = (((uint64_t)qstep[i] * qstep[i] << (n_log2[i] + 10)) + (var[i] >> 1)) / var[i]; xsq_q10[i] = (int)VPXMIN(xsq_q10_64, MAX_XSQ_Q10); } model_rd_norm_vec(xsq_q10, r_q10, d_q10); for (i = 0; i < MAX_MB_PLANE; ++i) { int rate = ROUND_POWER_OF_TWO(r_q10[i] << n_log2[i], 10 - VP9_PROB_COST_SHIFT); int64_t dist = (var[i] * (int64_t)d_q10[i] + 512) >> 10; *rate_sum += rate; *dist_sum += dist; } } void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16]) { const BLOCK_SIZE plane_bsize = get_plane_block_size(bsize, pd); const int num_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int num_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const ENTROPY_CONTEXT *const above = pd->above_context; const ENTROPY_CONTEXT *const left = pd->left_context; int i; switch (tx_size) { case TX_4X4: memcpy(t_above, above, sizeof(ENTROPY_CONTEXT) * num_4x4_w); memcpy(t_left, left, sizeof(ENTROPY_CONTEXT) * num_4x4_h); break; case TX_8X8: for (i = 0; i < num_4x4_w; i += 2) t_above[i] = !!*(const uint16_t *)&above[i]; for (i = 0; i < num_4x4_h; i += 2) t_left[i] = !!*(const uint16_t *)&left[i]; break; case TX_16X16: for (i = 0; i < num_4x4_w; i += 4) t_above[i] = !!*(const uint32_t *)&above[i]; for (i = 0; i < num_4x4_h; i += 4) t_left[i] = !!*(const uint32_t *)&left[i]; break; default: assert(tx_size == TX_32X32); for (i = 0; i < num_4x4_w; i += 8) t_above[i] = !!*(const uint64_t *)&above[i]; for (i = 0; i < num_4x4_h; i += 8) t_left[i] = !!*(const uint64_t *)&left[i]; break; } } void vp9_mv_pred(VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size) { int i; int zero_seen = 0; int best_index = 0; int best_sad = INT_MAX; int this_sad = INT_MAX; int max_mv = 0; int near_same_nearest; uint8_t *src_y_ptr = x->plane[0].src.buf; uint8_t *ref_y_ptr; const int num_mv_refs = MAX_MV_REF_CANDIDATES + (block_size < x->max_partition_size); MV pred_mv[3]; pred_mv[0] = x->mbmi_ext->ref_mvs[ref_frame][0].as_mv; pred_mv[1] = x->mbmi_ext->ref_mvs[ref_frame][1].as_mv; pred_mv[2] = x->pred_mv[ref_frame]; assert(num_mv_refs <= (int)(sizeof(pred_mv) / sizeof(pred_mv[0]))); near_same_nearest = x->mbmi_ext->ref_mvs[ref_frame][0].as_int == x->mbmi_ext->ref_mvs[ref_frame][1].as_int; // Get the sad for each candidate reference mv. for (i = 0; i < num_mv_refs; ++i) { const MV *this_mv = &pred_mv[i]; int fp_row, fp_col; if (this_mv->row == INT16_MAX || this_mv->col == INT16_MAX) continue; if (i == 1 && near_same_nearest) continue; fp_row = (this_mv->row + 3 + (this_mv->row >= 0)) >> 3; fp_col = (this_mv->col + 3 + (this_mv->col >= 0)) >> 3; max_mv = VPXMAX(max_mv, VPXMAX(abs(this_mv->row), abs(this_mv->col)) >> 3); if (fp_row == 0 && fp_col == 0 && zero_seen) continue; zero_seen |= (fp_row == 0 && fp_col == 0); ref_y_ptr = &ref_y_buffer[ref_y_stride * fp_row + fp_col]; // Find sad for current vector. this_sad = cpi->fn_ptr[block_size].sdf(src_y_ptr, x->plane[0].src.stride, ref_y_ptr, ref_y_stride); // Note if it is the best so far. if (this_sad < best_sad) { best_sad = this_sad; best_index = i; } } // Note the index of the mv that worked best in the reference list. x->mv_best_ref_index[ref_frame] = best_index; x->max_mv_context[ref_frame] = max_mv; x->pred_mv_sad[ref_frame] = best_sad; } void vp9_setup_pred_block(const MACROBLOCKD *xd, struct buf_2d dst[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *scale, const struct scale_factors *scale_uv) { int i; dst[0].buf = src->y_buffer; dst[0].stride = src->y_stride; dst[1].buf = src->u_buffer; dst[2].buf = src->v_buffer; dst[1].stride = dst[2].stride = src->uv_stride; for (i = 0; i < MAX_MB_PLANE; ++i) { setup_pred_plane(dst + i, dst[i].buf, dst[i].stride, mi_row, mi_col, i ? scale_uv : scale, xd->plane[i].subsampling_x, xd->plane[i].subsampling_y); } } int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, int stride) { const int bw = b_width_log2_lookup[plane_bsize]; const int y = 4 * (raster_block >> bw); const int x = 4 * (raster_block & ((1 << bw) - 1)); return y * stride + x; } int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, int16_t *base) { const int stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; return base + vp9_raster_block_offset(plane_bsize, raster_block, stride); } YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const VP9_COMP *cpi, int ref_frame) { const VP9_COMMON *const cm = &cpi->common; const int scaled_idx = cpi->scaled_ref_idx[ref_frame - 1]; const int ref_idx = get_ref_frame_buf_idx(cpi, ref_frame); assert(ref_frame >= LAST_FRAME && ref_frame <= ALTREF_FRAME); return (scaled_idx != ref_idx && scaled_idx != INVALID_IDX) ? &cm->buffer_pool->frame_bufs[scaled_idx].buf : NULL; } int vp9_get_switchable_rate(const VP9_COMP *cpi, const MACROBLOCKD *const xd) { const MODE_INFO *const mi = xd->mi[0]; const int ctx = get_pred_context_switchable_interp(xd); return SWITCHABLE_INTERP_RATE_FACTOR * cpi->switchable_interp_costs[ctx][mi->interp_filter]; } void vp9_set_rd_speed_thresholds(VP9_COMP *cpi) { int i; RD_OPT *const rd = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; // Set baseline threshold values. for (i = 0; i < MAX_MODES; ++i) rd->thresh_mult[i] = cpi->oxcf.mode == BEST ? -500 : 0; if (sf->adaptive_rd_thresh) { rd->thresh_mult[THR_NEARESTMV] = 300; rd->thresh_mult[THR_NEARESTG] = 300; rd->thresh_mult[THR_NEARESTA] = 300; } else { rd->thresh_mult[THR_NEARESTMV] = 0; rd->thresh_mult[THR_NEARESTG] = 0; rd->thresh_mult[THR_NEARESTA] = 0; } rd->thresh_mult[THR_DC] += 1000; rd->thresh_mult[THR_NEWMV] += 1000; rd->thresh_mult[THR_NEWA] += 1000; rd->thresh_mult[THR_NEWG] += 1000; rd->thresh_mult[THR_NEARMV] += 1000; rd->thresh_mult[THR_NEARA] += 1000; rd->thresh_mult[THR_COMP_NEARESTLA] += 1000; rd->thresh_mult[THR_COMP_NEARESTGA] += 1000; rd->thresh_mult[THR_TM] += 1000; rd->thresh_mult[THR_COMP_NEARLA] += 1500; rd->thresh_mult[THR_COMP_NEWLA] += 2000; rd->thresh_mult[THR_NEARG] += 1000; rd->thresh_mult[THR_COMP_NEARGA] += 1500; rd->thresh_mult[THR_COMP_NEWGA] += 2000; rd->thresh_mult[THR_ZEROMV] += 2000; rd->thresh_mult[THR_ZEROG] += 2000; rd->thresh_mult[THR_ZEROA] += 2000; rd->thresh_mult[THR_COMP_ZEROLA] += 2500; rd->thresh_mult[THR_COMP_ZEROGA] += 2500; rd->thresh_mult[THR_H_PRED] += 2000; rd->thresh_mult[THR_V_PRED] += 2000; rd->thresh_mult[THR_D45_PRED] += 2500; rd->thresh_mult[THR_D135_PRED] += 2500; rd->thresh_mult[THR_D117_PRED] += 2500; rd->thresh_mult[THR_D153_PRED] += 2500; rd->thresh_mult[THR_D207_PRED] += 2500; rd->thresh_mult[THR_D63_PRED] += 2500; } void vp9_set_rd_speed_thresholds_sub8x8(VP9_COMP *cpi) { static const int thresh_mult[2][MAX_REFS] = { { 2500, 2500, 2500, 4500, 4500, 2500 }, { 2000, 2000, 2000, 4000, 4000, 2000 } }; RD_OPT *const rd = &cpi->rd; const int idx = cpi->oxcf.mode == BEST; memcpy(rd->thresh_mult_sub8x8, thresh_mult[idx], sizeof(thresh_mult[idx])); } void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, int bsize, int best_mode_index) { if (rd_thresh > 0) { const int top_mode = bsize < BLOCK_8X8 ? MAX_REFS : MAX_MODES; int mode; for (mode = 0; mode < top_mode; ++mode) { const BLOCK_SIZE min_size = VPXMAX(bsize - 1, BLOCK_4X4); const BLOCK_SIZE max_size = VPXMIN(bsize + 2, BLOCK_64X64); BLOCK_SIZE bs; for (bs = min_size; bs <= max_size; ++bs) { int *const fact = &factor_buf[bs][mode]; if (mode == best_mode_index) { *fact -= (*fact >> 4); } else { *fact = VPXMIN(*fact + RD_THRESH_INC, rd_thresh * RD_THRESH_MAX_FACT); } } } } } int vp9_get_intra_cost_penalty(const VP9_COMP *const cpi, BLOCK_SIZE bsize, int qindex, int qdelta) { // Reduce the intra cost penalty for small blocks (<=16x16). int reduction_fac = (bsize <= BLOCK_16X16) ? ((bsize <= BLOCK_8X8) ? 4 : 2) : 0; if (cpi->noise_estimate.enabled && cpi->noise_estimate.level == kHigh) // Don't reduce intra cost penalty if estimated noise level is high. reduction_fac = 0; // Always use VPX_BITS_8 as input here because the penalty is applied // to rate not distortion so we want a consistent penalty for all bit // depths. If the actual bit depth were passed in here then the value // retured by vp9_dc_quant() would scale with the bit depth and we would // then need to apply inverse scaling to correct back to a bit depth // independent rate penalty. return (20 * vp9_dc_quant(qindex, qdelta, VPX_BITS_8)) >> reduction_fac; } libvpx-1.8.2/vp9/encoder/vp9_rd.h000066400000000000000000000161741357355204000165540ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_RD_H_ #define VPX_VP9_ENCODER_VP9_RD_H_ #include #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_cost.h" #ifdef __cplusplus extern "C" { #endif #define RDDIV_BITS 7 #define RD_EPB_SHIFT 6 #define RDCOST(RM, DM, R, D) \ ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) + ((D) << (DM)) #define RDCOST_NEG_R(RM, DM, R, D) \ ((D) << (DM)) - ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) #define RDCOST_NEG_D(RM, DM, R, D) \ ROUND_POWER_OF_TWO(((int64_t)(R)) * (RM), VP9_PROB_COST_SHIFT) - ((D) << (DM)) #define QIDX_SKIP_THRESH 115 #define MV_COST_WEIGHT 108 #define MV_COST_WEIGHT_SUB 120 #define MAX_MODES 30 #define MAX_REFS 6 #define RD_THRESH_INIT_FACT 32 #define RD_THRESH_MAX_FACT 64 #define RD_THRESH_INC 1 #define VP9_DIST_SCALE_LOG2 4 #define VP9_DIST_SCALE (1 << VP9_DIST_SCALE_LOG2) // This enumerator type needs to be kept aligned with the mode order in // const MODE_DEFINITION vp9_mode_order[MAX_MODES] used in the rd code. typedef enum { THR_NEARESTMV, THR_NEARESTA, THR_NEARESTG, THR_DC, THR_NEWMV, THR_NEWA, THR_NEWG, THR_NEARMV, THR_NEARA, THR_NEARG, THR_ZEROMV, THR_ZEROG, THR_ZEROA, THR_COMP_NEARESTLA, THR_COMP_NEARESTGA, THR_TM, THR_COMP_NEARLA, THR_COMP_NEWLA, THR_COMP_NEARGA, THR_COMP_NEWGA, THR_COMP_ZEROLA, THR_COMP_ZEROGA, THR_H_PRED, THR_V_PRED, THR_D135_PRED, THR_D207_PRED, THR_D153_PRED, THR_D63_PRED, THR_D117_PRED, THR_D45_PRED, } THR_MODES; typedef enum { THR_LAST, THR_GOLD, THR_ALTR, THR_COMP_LA, THR_COMP_GA, THR_INTRA, } THR_MODES_SUB8X8; typedef struct RD_OPT { // Thresh_mult is used to set a threshold for the rd score. A higher value // means that we will accept the best mode so far more often. This number // is used in combination with the current block size, and thresh_freq_fact to // pick a threshold. int thresh_mult[MAX_MODES]; int thresh_mult_sub8x8[MAX_REFS]; int threshes[MAX_SEGMENTS][BLOCK_SIZES][MAX_MODES]; int64_t prediction_type_threshes[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t filter_threshes[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; #if CONFIG_CONSISTENT_RECODE int64_t prediction_type_threshes_prev[MAX_REF_FRAMES][REFERENCE_MODES]; int64_t filter_threshes_prev[MAX_REF_FRAMES][SWITCHABLE_FILTER_CONTEXTS]; #endif int RDMULT; int RDDIV; double r0; } RD_OPT; typedef struct RD_COST { int rate; int64_t dist; int64_t rdcost; } RD_COST; // Reset the rate distortion cost values to maximum (invalid) value. void vp9_rd_cost_reset(RD_COST *rd_cost); // Initialize the rate distortion cost values to zero. void vp9_rd_cost_init(RD_COST *rd_cost); // It supports negative rate and dist, which is different from RDCOST(). int64_t vp9_calculate_rd_cost(int mult, int div, int rate, int64_t dist); // Update the cost value based on its rate and distortion. void vp9_rd_cost_update(int mult, int div, RD_COST *rd_cost); struct TileInfo; struct TileDataEnc; struct VP9_COMP; struct macroblock; int vp9_compute_rd_mult_based_on_qindex(const struct VP9_COMP *cpi, int qindex); int vp9_compute_rd_mult(const struct VP9_COMP *cpi, int qindex); int vp9_get_adaptive_rdmult(const struct VP9_COMP *cpi, double beta); void vp9_initialize_rd_consts(struct VP9_COMP *cpi); void vp9_initialize_me_consts(struct VP9_COMP *cpi, MACROBLOCK *x, int qindex); void vp9_model_rd_from_var_lapndz(unsigned int var, unsigned int n_log2, unsigned int qstep, int *rate, int64_t *dist); void vp9_model_rd_from_var_lapndz_vec(unsigned int var[MAX_MB_PLANE], unsigned int n_log2[MAX_MB_PLANE], unsigned int qstep[MAX_MB_PLANE], int64_t *rate_sum, int64_t *dist_sum); int vp9_get_switchable_rate(const struct VP9_COMP *cpi, const MACROBLOCKD *const xd); int vp9_raster_block_offset(BLOCK_SIZE plane_bsize, int raster_block, int stride); int16_t *vp9_raster_block_offset_int16(BLOCK_SIZE plane_bsize, int raster_block, int16_t *base); YV12_BUFFER_CONFIG *vp9_get_scaled_ref_frame(const struct VP9_COMP *cpi, int ref_frame); void vp9_init_me_luts(void); void vp9_get_entropy_contexts(BLOCK_SIZE bsize, TX_SIZE tx_size, const struct macroblockd_plane *pd, ENTROPY_CONTEXT t_above[16], ENTROPY_CONTEXT t_left[16]); void vp9_set_rd_speed_thresholds(struct VP9_COMP *cpi); void vp9_set_rd_speed_thresholds_sub8x8(struct VP9_COMP *cpi); void vp9_update_rd_thresh_fact(int (*factor_buf)[MAX_MODES], int rd_thresh, int bsize, int best_mode_index); static INLINE int rd_less_than_thresh(int64_t best_rd, int thresh, const int *const thresh_fact) { return best_rd < ((int64_t)thresh * (*thresh_fact) >> 5) || thresh == INT_MAX; } static INLINE void set_error_per_bit(MACROBLOCK *x, int rdmult) { x->errorperbit = rdmult >> RD_EPB_SHIFT; x->errorperbit += (x->errorperbit == 0); } void vp9_mv_pred(struct VP9_COMP *cpi, MACROBLOCK *x, uint8_t *ref_y_buffer, int ref_y_stride, int ref_frame, BLOCK_SIZE block_size); void vp9_setup_pred_block(const MACROBLOCKD *xd, struct buf_2d dst[MAX_MB_PLANE], const YV12_BUFFER_CONFIG *src, int mi_row, int mi_col, const struct scale_factors *scale, const struct scale_factors *scale_uv); int vp9_get_intra_cost_penalty(const struct VP9_COMP *const cpi, BLOCK_SIZE bsize, int qindex, int qdelta); unsigned int vp9_get_sby_variance(struct VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs); unsigned int vp9_get_sby_perpixel_variance(struct VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs); #if CONFIG_VP9_HIGHBITDEPTH unsigned int vp9_high_get_sby_variance(struct VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd); unsigned int vp9_high_get_sby_perpixel_variance(struct VP9_COMP *cpi, const struct buf_2d *ref, BLOCK_SIZE bs, int bd); #endif void vp9_build_inter_mode_cost(struct VP9_COMP *cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_RD_H_ libvpx-1.8.2/vp9/encoder/vp9_rdopt.c000066400000000000000000005427371357355204000173030ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_entropymode.h" #include "vp9/common/vp9_idct.h" #include "vp9/common/vp9_mvref_common.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/common/vp9_reconintra.h" #include "vp9/common/vp9_scan.h" #include "vp9/common/vp9_seg_common.h" #if !CONFIG_REALTIME_ONLY #include "vp9/encoder/vp9_aq_variance.h" #endif #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encodemb.h" #include "vp9/encoder/vp9_encodemv.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_rd.h" #include "vp9/encoder/vp9_rdopt.h" #define LAST_FRAME_MODE_MASK \ ((1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) #define GOLDEN_FRAME_MODE_MASK \ ((1 << LAST_FRAME) | (1 << ALTREF_FRAME) | (1 << INTRA_FRAME)) #define ALT_REF_MODE_MASK \ ((1 << LAST_FRAME) | (1 << GOLDEN_FRAME) | (1 << INTRA_FRAME)) #define SECOND_REF_FRAME_MASK ((1 << ALTREF_FRAME) | 0x01) #define MIN_EARLY_TERM_INDEX 3 #define NEW_MV_DISCOUNT_FACTOR 8 typedef struct { PREDICTION_MODE mode; MV_REFERENCE_FRAME ref_frame[2]; } MODE_DEFINITION; typedef struct { MV_REFERENCE_FRAME ref_frame[2]; } REF_DEFINITION; struct rdcost_block_args { const VP9_COMP *cpi; MACROBLOCK *x; ENTROPY_CONTEXT t_above[16]; ENTROPY_CONTEXT t_left[16]; int this_rate; int64_t this_dist; int64_t this_sse; int64_t this_rd; int64_t best_rd; int exit_early; int use_fast_coef_costing; const scan_order *so; uint8_t skippable; struct buf_2d *this_recon; }; #define LAST_NEW_MV_INDEX 6 #if !CONFIG_REALTIME_ONLY static const MODE_DEFINITION vp9_mode_order[MAX_MODES] = { { NEARESTMV, { LAST_FRAME, NONE } }, { NEARESTMV, { ALTREF_FRAME, NONE } }, { NEARESTMV, { GOLDEN_FRAME, NONE } }, { DC_PRED, { INTRA_FRAME, NONE } }, { NEWMV, { LAST_FRAME, NONE } }, { NEWMV, { ALTREF_FRAME, NONE } }, { NEWMV, { GOLDEN_FRAME, NONE } }, { NEARMV, { LAST_FRAME, NONE } }, { NEARMV, { ALTREF_FRAME, NONE } }, { NEARMV, { GOLDEN_FRAME, NONE } }, { ZEROMV, { LAST_FRAME, NONE } }, { ZEROMV, { GOLDEN_FRAME, NONE } }, { ZEROMV, { ALTREF_FRAME, NONE } }, { NEARESTMV, { LAST_FRAME, ALTREF_FRAME } }, { NEARESTMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { TM_PRED, { INTRA_FRAME, NONE } }, { NEARMV, { LAST_FRAME, ALTREF_FRAME } }, { NEWMV, { LAST_FRAME, ALTREF_FRAME } }, { NEARMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { NEWMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { ZEROMV, { LAST_FRAME, ALTREF_FRAME } }, { ZEROMV, { GOLDEN_FRAME, ALTREF_FRAME } }, { H_PRED, { INTRA_FRAME, NONE } }, { V_PRED, { INTRA_FRAME, NONE } }, { D135_PRED, { INTRA_FRAME, NONE } }, { D207_PRED, { INTRA_FRAME, NONE } }, { D153_PRED, { INTRA_FRAME, NONE } }, { D63_PRED, { INTRA_FRAME, NONE } }, { D117_PRED, { INTRA_FRAME, NONE } }, { D45_PRED, { INTRA_FRAME, NONE } }, }; static const REF_DEFINITION vp9_ref_order[MAX_REFS] = { { { LAST_FRAME, NONE } }, { { GOLDEN_FRAME, NONE } }, { { ALTREF_FRAME, NONE } }, { { LAST_FRAME, ALTREF_FRAME } }, { { GOLDEN_FRAME, ALTREF_FRAME } }, { { INTRA_FRAME, NONE } }, }; #endif // !CONFIG_REALTIME_ONLY static void swap_block_ptr(MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int m, int n, int min_plane, int max_plane) { int i; for (i = min_plane; i < max_plane; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &x->e_mbd.plane[i]; p->coeff = ctx->coeff_pbuf[i][m]; p->qcoeff = ctx->qcoeff_pbuf[i][m]; pd->dqcoeff = ctx->dqcoeff_pbuf[i][m]; p->eobs = ctx->eobs_pbuf[i][m]; ctx->coeff_pbuf[i][m] = ctx->coeff_pbuf[i][n]; ctx->qcoeff_pbuf[i][m] = ctx->qcoeff_pbuf[i][n]; ctx->dqcoeff_pbuf[i][m] = ctx->dqcoeff_pbuf[i][n]; ctx->eobs_pbuf[i][m] = ctx->eobs_pbuf[i][n]; ctx->coeff_pbuf[i][n] = p->coeff; ctx->qcoeff_pbuf[i][n] = p->qcoeff; ctx->dqcoeff_pbuf[i][n] = pd->dqcoeff; ctx->eobs_pbuf[i][n] = p->eobs; } } #if !CONFIG_REALTIME_ONLY static void model_rd_for_sb(VP9_COMP *cpi, BLOCK_SIZE bsize, MACROBLOCK *x, MACROBLOCKD *xd, int *out_rate_sum, int64_t *out_dist_sum, int *skip_txfm_sb, int64_t *skip_sse_sb) { // Note our transform coeffs are 8 times an orthogonal transform. // Hence quantizer step is also 8 times. To get effective quantizer // we need to divide by 8 before sending to modeling function. int i; int64_t rate_sum = 0; int64_t dist_sum = 0; const int ref = xd->mi[0]->ref_frame[0]; unsigned int sse; unsigned int var = 0; int64_t total_sse = 0; int skip_flag = 1; const int shift = 6; int64_t dist; const int dequant_shift = #if CONFIG_VP9_HIGHBITDEPTH (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd - 5 : #endif // CONFIG_VP9_HIGHBITDEPTH 3; unsigned int qstep_vec[MAX_MB_PLANE]; unsigned int nlog2_vec[MAX_MB_PLANE]; unsigned int sum_sse_vec[MAX_MB_PLANE]; int any_zero_sum_sse = 0; x->pred_sse[ref] = 0; for (i = 0; i < MAX_MB_PLANE; ++i) { struct macroblock_plane *const p = &x->plane[i]; struct macroblockd_plane *const pd = &xd->plane[i]; const BLOCK_SIZE bs = get_plane_block_size(bsize, pd); const TX_SIZE max_tx_size = max_txsize_lookup[bs]; const BLOCK_SIZE unit_size = txsize_to_bsize[max_tx_size]; const int64_t dc_thr = p->quant_thred[0] >> shift; const int64_t ac_thr = p->quant_thred[1] >> shift; unsigned int sum_sse = 0; // The low thresholds are used to measure if the prediction errors are // low enough so that we can skip the mode search. const int64_t low_dc_thr = VPXMIN(50, dc_thr >> 2); const int64_t low_ac_thr = VPXMIN(80, ac_thr >> 2); int bw = 1 << (b_width_log2_lookup[bs] - b_width_log2_lookup[unit_size]); int bh = 1 << (b_height_log2_lookup[bs] - b_width_log2_lookup[unit_size]); int idx, idy; int lw = b_width_log2_lookup[unit_size] + 2; int lh = b_height_log2_lookup[unit_size] + 2; for (idy = 0; idy < bh; ++idy) { for (idx = 0; idx < bw; ++idx) { uint8_t *src = p->src.buf + (idy * p->src.stride << lh) + (idx << lw); uint8_t *dst = pd->dst.buf + (idy * pd->dst.stride << lh) + (idx << lh); int block_idx = (idy << 1) + idx; int low_err_skip = 0; var = cpi->fn_ptr[unit_size].vf(src, p->src.stride, dst, pd->dst.stride, &sse); x->bsse[(i << 2) + block_idx] = sse; sum_sse += sse; x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_NONE; if (!x->select_tx_size) { // Check if all ac coefficients can be quantized to zero. if (var < ac_thr || var == 0) { x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_ONLY; // Check if dc coefficient can be quantized to zero. if (sse - var < dc_thr || sse == var) { x->skip_txfm[(i << 2) + block_idx] = SKIP_TXFM_AC_DC; if (!sse || (var < low_ac_thr && sse - var < low_dc_thr)) low_err_skip = 1; } } } if (skip_flag && !low_err_skip) skip_flag = 0; if (i == 0) x->pred_sse[ref] += sse; } } total_sse += sum_sse; sum_sse_vec[i] = sum_sse; any_zero_sum_sse = any_zero_sum_sse || (sum_sse == 0); qstep_vec[i] = pd->dequant[1] >> dequant_shift; nlog2_vec[i] = num_pels_log2_lookup[bs]; } // Fast approximate the modelling function. if (cpi->sf.simple_model_rd_from_var) { for (i = 0; i < MAX_MB_PLANE; ++i) { int64_t rate; const int64_t square_error = sum_sse_vec[i]; int quantizer = qstep_vec[i]; if (quantizer < 120) rate = (square_error * (280 - quantizer)) >> (16 - VP9_PROB_COST_SHIFT); else rate = 0; dist = (square_error * quantizer) >> 8; rate_sum += rate; dist_sum += dist; } } else { if (any_zero_sum_sse) { for (i = 0; i < MAX_MB_PLANE; ++i) { int rate; vp9_model_rd_from_var_lapndz(sum_sse_vec[i], nlog2_vec[i], qstep_vec[i], &rate, &dist); rate_sum += rate; dist_sum += dist; } } else { vp9_model_rd_from_var_lapndz_vec(sum_sse_vec, nlog2_vec, qstep_vec, &rate_sum, &dist_sum); } } *skip_txfm_sb = skip_flag; *skip_sse_sb = total_sse << VP9_DIST_SCALE_LOG2; *out_rate_sum = (int)rate_sum; *out_dist_sum = dist_sum << VP9_DIST_SCALE_LOG2; } #endif // !CONFIG_REALTIME_ONLY #if CONFIG_VP9_HIGHBITDEPTH int64_t vp9_highbd_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd) { int i; int64_t error = 0, sqcoeff = 0; int shift = 2 * (bd - 8); int rounding = shift > 0 ? 1 << (shift - 1) : 0; for (i = 0; i < block_size; i++) { const int64_t diff = coeff[i] - dqcoeff[i]; error += diff * diff; sqcoeff += (int64_t)coeff[i] * (int64_t)coeff[i]; } assert(error >= 0 && sqcoeff >= 0); error = (error + rounding) >> shift; sqcoeff = (sqcoeff + rounding) >> shift; *ssz = sqcoeff; return error; } static int64_t vp9_highbd_block_error_dispatch(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd) { if (bd == 8) { return vp9_block_error(coeff, dqcoeff, block_size, ssz); } else { return vp9_highbd_block_error(coeff, dqcoeff, block_size, ssz, bd); } } #endif // CONFIG_VP9_HIGHBITDEPTH int64_t vp9_block_error_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz) { int i; int64_t error = 0, sqcoeff = 0; for (i = 0; i < block_size; i++) { const int diff = coeff[i] - dqcoeff[i]; error += diff * diff; sqcoeff += coeff[i] * coeff[i]; } *ssz = sqcoeff; return error; } int64_t vp9_block_error_fp_c(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size) { int i; int64_t error = 0; for (i = 0; i < block_size; i++) { const int diff = coeff[i] - dqcoeff[i]; error += diff * diff; } return error; } /* The trailing '0' is a terminator which is used inside cost_coeffs() to * decide whether to include cost of a trailing EOB node or not (i.e. we * can skip this if the last coefficient in this transform block, e.g. the * 16th coefficient in a 4x4 block or the 64th coefficient in a 8x8 block, * were non-zero). */ static const int16_t band_counts[TX_SIZES][8] = { { 1, 2, 3, 4, 3, 16 - 13, 0 }, { 1, 2, 3, 4, 11, 64 - 21, 0 }, { 1, 2, 3, 4, 11, 256 - 21, 0 }, { 1, 2, 3, 4, 11, 1024 - 21, 0 }, }; static int cost_coeffs(MACROBLOCK *x, int plane, int block, TX_SIZE tx_size, int pt, const int16_t *scan, const int16_t *nb, int use_fast_coef_costing) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; const struct macroblock_plane *p = &x->plane[plane]; const PLANE_TYPE type = get_plane_type(plane); const int16_t *band_count = &band_counts[tx_size][1]; const int eob = p->eobs[block]; const tran_low_t *const qcoeff = BLOCK_OFFSET(p->qcoeff, block); unsigned int(*token_costs)[2][COEFF_CONTEXTS][ENTROPY_TOKENS] = x->token_costs[tx_size][type][is_inter_block(mi)]; uint8_t token_cache[32 * 32]; int cost; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t *cat6_high_cost = vp9_get_high_cost_table(xd->bd); #else const uint16_t *cat6_high_cost = vp9_get_high_cost_table(8); #endif // Check for consistency of tx_size with mode info assert(type == PLANE_TYPE_Y ? mi->tx_size == tx_size : get_uv_tx_size(mi, &xd->plane[plane]) == tx_size); if (eob == 0) { // single eob token cost = token_costs[0][0][pt][EOB_TOKEN]; } else { if (use_fast_coef_costing) { int band_left = *band_count++; int c; // dc token int v = qcoeff[0]; int16_t prev_t; cost = vp9_get_token_cost(v, &prev_t, cat6_high_cost); cost += (*token_costs)[0][pt][prev_t]; token_cache[0] = vp9_pt_energy_class[prev_t]; ++token_costs; // ac tokens for (c = 1; c < eob; c++) { const int rc = scan[c]; int16_t t; v = qcoeff[rc]; cost += vp9_get_token_cost(v, &t, cat6_high_cost); cost += (*token_costs)[!prev_t][!prev_t][t]; prev_t = t; if (!--band_left) { band_left = *band_count++; ++token_costs; } } // eob token if (band_left) cost += (*token_costs)[0][!prev_t][EOB_TOKEN]; } else { // !use_fast_coef_costing int band_left = *band_count++; int c; // dc token int v = qcoeff[0]; int16_t tok; unsigned int(*tok_cost_ptr)[COEFF_CONTEXTS][ENTROPY_TOKENS]; cost = vp9_get_token_cost(v, &tok, cat6_high_cost); cost += (*token_costs)[0][pt][tok]; token_cache[0] = vp9_pt_energy_class[tok]; ++token_costs; tok_cost_ptr = &((*token_costs)[!tok]); // ac tokens for (c = 1; c < eob; c++) { const int rc = scan[c]; v = qcoeff[rc]; cost += vp9_get_token_cost(v, &tok, cat6_high_cost); pt = get_coef_context(nb, token_cache, c); cost += (*tok_cost_ptr)[pt][tok]; token_cache[rc] = vp9_pt_energy_class[tok]; if (!--band_left) { band_left = *band_count++; ++token_costs; } tok_cost_ptr = &((*token_costs)[!tok]); } // eob token if (band_left) { pt = get_coef_context(nb, token_cache, c); cost += (*token_costs)[0][pt][EOB_TOKEN]; } } } return cost; } static INLINE int num_4x4_to_edge(int plane_4x4_dim, int mb_to_edge_dim, int subsampling_dim, int blk_dim) { return plane_4x4_dim + (mb_to_edge_dim >> (5 + subsampling_dim)) - blk_dim; } // Copy all visible 4x4s in the transform block. static void copy_block_visible(const MACROBLOCKD *xd, const struct macroblockd_plane *const pd, const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, int blk_row, int blk_col, const BLOCK_SIZE plane_bsize, const BLOCK_SIZE tx_bsize) { const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, pd->subsampling_x, blk_col); int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, pd->subsampling_y, blk_row); const int is_highbd = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH; if (tx_bsize == BLOCK_4X4 || (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { const int w = tx_4x4_w << 2; const int h = tx_4x4_h << 2; #if CONFIG_VP9_HIGHBITDEPTH if (is_highbd) { vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src), src_stride, CONVERT_TO_SHORTPTR(dst), dst_stride, NULL, 0, 0, 0, 0, w, h, xd->bd); } else { #endif vpx_convolve_copy(src, src_stride, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); #if CONFIG_VP9_HIGHBITDEPTH } #endif } else { int r, c; int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); // if we are in the unrestricted motion border. for (r = 0; r < max_r; ++r) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_c; ++c) { const uint8_t *src_ptr = src + r * src_stride * 4 + c * 4; uint8_t *dst_ptr = dst + r * dst_stride * 4 + c * 4; #if CONFIG_VP9_HIGHBITDEPTH if (is_highbd) { vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(src_ptr), src_stride, CONVERT_TO_SHORTPTR(dst_ptr), dst_stride, NULL, 0, 0, 0, 0, 4, 4, xd->bd); } else { #endif vpx_convolve_copy(src_ptr, src_stride, dst_ptr, dst_stride, NULL, 0, 0, 0, 0, 4, 4); #if CONFIG_VP9_HIGHBITDEPTH } #endif } } } (void)is_highbd; } // Compute the pixel domain sum square error on all visible 4x4s in the // transform block. static unsigned pixel_sse(const VP9_COMP *const cpi, const MACROBLOCKD *xd, const struct macroblockd_plane *const pd, const uint8_t *src, const int src_stride, const uint8_t *dst, const int dst_stride, int blk_row, int blk_col, const BLOCK_SIZE plane_bsize, const BLOCK_SIZE tx_bsize) { unsigned int sse = 0; const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, pd->subsampling_x, blk_col); int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, pd->subsampling_y, blk_row); if (tx_bsize == BLOCK_4X4 || (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { cpi->fn_ptr[tx_bsize].vf(src, src_stride, dst, dst_stride, &sse); } else { const vpx_variance_fn_t vf_4x4 = cpi->fn_ptr[BLOCK_4X4].vf; int r, c; unsigned this_sse = 0; int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); sse = 0; // if we are in the unrestricted motion border. for (r = 0; r < max_r; ++r) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_c; ++c) { vf_4x4(src + r * src_stride * 4 + c * 4, src_stride, dst + r * dst_stride * 4 + c * 4, dst_stride, &this_sse); sse += this_sse; } } } return sse; } // Compute the squares sum squares on all visible 4x4s in the transform block. static int64_t sum_squares_visible(const MACROBLOCKD *xd, const struct macroblockd_plane *const pd, const int16_t *diff, const int diff_stride, int blk_row, int blk_col, const BLOCK_SIZE plane_bsize, const BLOCK_SIZE tx_bsize) { int64_t sse; const int plane_4x4_w = num_4x4_blocks_wide_lookup[plane_bsize]; const int plane_4x4_h = num_4x4_blocks_high_lookup[plane_bsize]; const int tx_4x4_w = num_4x4_blocks_wide_lookup[tx_bsize]; const int tx_4x4_h = num_4x4_blocks_high_lookup[tx_bsize]; int b4x4s_to_right_edge = num_4x4_to_edge(plane_4x4_w, xd->mb_to_right_edge, pd->subsampling_x, blk_col); int b4x4s_to_bottom_edge = num_4x4_to_edge(plane_4x4_h, xd->mb_to_bottom_edge, pd->subsampling_y, blk_row); if (tx_bsize == BLOCK_4X4 || (b4x4s_to_right_edge >= tx_4x4_w && b4x4s_to_bottom_edge >= tx_4x4_h)) { assert(tx_4x4_w == tx_4x4_h); sse = (int64_t)vpx_sum_squares_2d_i16(diff, diff_stride, tx_4x4_w << 2); } else { int r, c; int max_r = VPXMIN(b4x4s_to_bottom_edge, tx_4x4_h); int max_c = VPXMIN(b4x4s_to_right_edge, tx_4x4_w); sse = 0; // if we are in the unrestricted motion border. for (r = 0; r < max_r; ++r) { // Skip visiting the sub blocks that are wholly within the UMV. for (c = 0; c < max_c; ++c) { sse += (int64_t)vpx_sum_squares_2d_i16( diff + r * diff_stride * 4 + c * 4, diff_stride, 4); } } } return sse; } static void dist_block(const VP9_COMP *cpi, MACROBLOCK *x, int plane, BLOCK_SIZE plane_bsize, int block, int blk_row, int blk_col, TX_SIZE tx_size, int64_t *out_dist, int64_t *out_sse, struct buf_2d *out_recon) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblock_plane *const p = &x->plane[plane]; const struct macroblockd_plane *const pd = &xd->plane[plane]; const int eob = p->eobs[block]; if (!out_recon && x->block_tx_domain && eob) { const int ss_txfrm_size = tx_size << 1; int64_t this_sse; const int shift = tx_size == TX_32X32 ? 0 : 2; const tran_low_t *const coeff = BLOCK_OFFSET(p->coeff, block); const tran_low_t *const dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); #if CONFIG_VP9_HIGHBITDEPTH const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; *out_dist = vp9_highbd_block_error_dispatch( coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse, bd) >> shift; #else *out_dist = vp9_block_error(coeff, dqcoeff, 16 << ss_txfrm_size, &this_sse) >> shift; #endif // CONFIG_VP9_HIGHBITDEPTH *out_sse = this_sse >> shift; if (x->skip_encode && !is_inter_block(xd->mi[0])) { // TODO(jingning): tune the model to better capture the distortion. const int64_t p = (pd->dequant[1] * pd->dequant[1] * (1 << ss_txfrm_size)) >> #if CONFIG_VP9_HIGHBITDEPTH (shift + 2 + (bd - 8) * 2); #else (shift + 2); #endif // CONFIG_VP9_HIGHBITDEPTH *out_dist += (p >> 4); *out_sse += p; } } else { const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; const int bs = 4 * num_4x4_blocks_wide_lookup[tx_bsize]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; const int src_idx = 4 * (blk_row * src_stride + blk_col); const int dst_idx = 4 * (blk_row * dst_stride + blk_col); const uint8_t *src = &p->src.buf[src_idx]; const uint8_t *dst = &pd->dst.buf[dst_idx]; uint8_t *out_recon_ptr = 0; const tran_low_t *dqcoeff = BLOCK_OFFSET(pd->dqcoeff, block); unsigned int tmp; tmp = pixel_sse(cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, blk_col, plane_bsize, tx_bsize); *out_sse = (int64_t)tmp * 16; if (out_recon) { const int out_recon_idx = 4 * (blk_row * out_recon->stride + blk_col); out_recon_ptr = &out_recon->buf[out_recon_idx]; copy_block_visible(xd, pd, dst, dst_stride, out_recon_ptr, out_recon->stride, blk_row, blk_col, plane_bsize, tx_bsize); } if (eob) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, recon16[1024]); uint8_t *recon = (uint8_t *)recon16; #else DECLARE_ALIGNED(16, uint8_t, recon[1024]); #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_convolve_copy(CONVERT_TO_SHORTPTR(dst), dst_stride, recon16, 32, NULL, 0, 0, 0, 0, bs, bs, xd->bd); if (xd->lossless) { vp9_highbd_iwht4x4_add(dqcoeff, recon16, 32, eob, xd->bd); } else { switch (tx_size) { case TX_4X4: vp9_highbd_idct4x4_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_8X8: vp9_highbd_idct8x8_add(dqcoeff, recon16, 32, eob, xd->bd); break; case TX_16X16: vp9_highbd_idct16x16_add(dqcoeff, recon16, 32, eob, xd->bd); break; default: assert(tx_size == TX_32X32); vp9_highbd_idct32x32_add(dqcoeff, recon16, 32, eob, xd->bd); break; } } recon = CONVERT_TO_BYTEPTR(recon16); } else { #endif // CONFIG_VP9_HIGHBITDEPTH vpx_convolve_copy(dst, dst_stride, recon, 32, NULL, 0, 0, 0, 0, bs, bs); switch (tx_size) { case TX_32X32: vp9_idct32x32_add(dqcoeff, recon, 32, eob); break; case TX_16X16: vp9_idct16x16_add(dqcoeff, recon, 32, eob); break; case TX_8X8: vp9_idct8x8_add(dqcoeff, recon, 32, eob); break; default: assert(tx_size == TX_4X4); // this is like vp9_short_idct4x4 but has a special case around // eob<=1, which is significant (not just an optimization) for // the lossless case. x->inv_txfm_add(dqcoeff, recon, 32, eob); break; } #if CONFIG_VP9_HIGHBITDEPTH } #endif // CONFIG_VP9_HIGHBITDEPTH tmp = pixel_sse(cpi, xd, pd, src, src_stride, recon, 32, blk_row, blk_col, plane_bsize, tx_bsize); if (out_recon) { copy_block_visible(xd, pd, recon, 32, out_recon_ptr, out_recon->stride, blk_row, blk_col, plane_bsize, tx_bsize); } } *out_dist = (int64_t)tmp * 16; } } static int rate_block(int plane, int block, TX_SIZE tx_size, int coeff_ctx, struct rdcost_block_args *args) { return cost_coeffs(args->x, plane, block, tx_size, coeff_ctx, args->so->scan, args->so->neighbors, args->use_fast_coef_costing); } static void block_rd_txfm(int plane, int block, int blk_row, int blk_col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct rdcost_block_args *args = arg; MACROBLOCK *const x = args->x; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; int64_t rd1, rd2, rd; int rate; int64_t dist; int64_t sse; const int coeff_ctx = combine_entropy_contexts(args->t_left[blk_row], args->t_above[blk_col]); struct buf_2d *recon = args->this_recon; const BLOCK_SIZE tx_bsize = txsize_to_bsize[tx_size]; const struct macroblockd_plane *const pd = &xd->plane[plane]; const int dst_stride = pd->dst.stride; const uint8_t *dst = &pd->dst.buf[4 * (blk_row * dst_stride + blk_col)]; if (args->exit_early) return; if (!is_inter_block(mi)) { #if CONFIG_MISMATCH_DEBUG struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip, 0, 0, 0 }; #else struct encode_b_args intra_arg = { x, x->block_qcoeff_opt, args->t_above, args->t_left, &mi->skip }; #endif vp9_encode_block_intra(plane, block, blk_row, blk_col, plane_bsize, tx_size, &intra_arg); if (recon) { uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, blk_row, blk_col, plane_bsize, tx_bsize); } if (x->block_tx_domain) { dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size, &dist, &sse, /*recon =*/0); } else { const struct macroblock_plane *const p = &x->plane[plane]; const int src_stride = p->src.stride; const int diff_stride = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const uint8_t *src = &p->src.buf[4 * (blk_row * src_stride + blk_col)]; const int16_t *diff = &p->src_diff[4 * (blk_row * diff_stride + blk_col)]; unsigned int tmp; sse = sum_squares_visible(xd, pd, diff, diff_stride, blk_row, blk_col, plane_bsize, tx_bsize); #if CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->bd > 8)) sse = ROUND64_POWER_OF_TWO(sse, (xd->bd - 8) * 2); #endif // CONFIG_VP9_HIGHBITDEPTH sse = sse * 16; tmp = pixel_sse(args->cpi, xd, pd, src, src_stride, dst, dst_stride, blk_row, blk_col, plane_bsize, tx_bsize); dist = (int64_t)tmp * 16; } } else { int skip_txfm_flag = SKIP_TXFM_NONE; if (max_txsize_lookup[plane_bsize] == tx_size) skip_txfm_flag = x->skip_txfm[(plane << 2) + (block >> (tx_size << 1))]; if (skip_txfm_flag == SKIP_TXFM_NONE || (recon && skip_txfm_flag == SKIP_TXFM_AC_ONLY)) { // full forward transform and quantization vp9_xform_quant(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); if (x->block_qcoeff_opt) vp9_optimize_b(x, plane, block, tx_size, coeff_ctx); dist_block(args->cpi, x, plane, plane_bsize, block, blk_row, blk_col, tx_size, &dist, &sse, recon); } else if (skip_txfm_flag == SKIP_TXFM_AC_ONLY) { // compute DC coefficient tran_low_t *const coeff = BLOCK_OFFSET(x->plane[plane].coeff, block); tran_low_t *const dqcoeff = BLOCK_OFFSET(xd->plane[plane].dqcoeff, block); vp9_xform_quant_dc(x, plane, block, blk_row, blk_col, plane_bsize, tx_size); sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (x->plane[plane].eobs[block]) { const int64_t orig_sse = (int64_t)coeff[0] * coeff[0]; const int64_t resd_sse = coeff[0] - dqcoeff[0]; int64_t dc_correct = orig_sse - resd_sse * resd_sse; #if CONFIG_VP9_HIGHBITDEPTH dc_correct >>= ((xd->bd - 8) * 2); #endif if (tx_size != TX_32X32) dc_correct >>= 2; dist = VPXMAX(0, sse - dc_correct); } } else { // SKIP_TXFM_AC_DC // skip forward transform. Because this is handled here, the quantization // does not need to do it. x->plane[plane].eobs[block] = 0; sse = x->bsse[(plane << 2) + (block >> (tx_size << 1))] << 4; dist = sse; if (recon) { uint8_t *rec_ptr = &recon->buf[4 * (blk_row * recon->stride + blk_col)]; copy_block_visible(xd, pd, dst, dst_stride, rec_ptr, recon->stride, blk_row, blk_col, plane_bsize, tx_bsize); } } } rd = RDCOST(x->rdmult, x->rddiv, 0, dist); if (args->this_rd + rd > args->best_rd) { args->exit_early = 1; return; } rate = rate_block(plane, block, tx_size, coeff_ctx, args); args->t_above[blk_col] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; args->t_left[blk_row] = (x->plane[plane].eobs[block] > 0) ? 1 : 0; rd1 = RDCOST(x->rdmult, x->rddiv, rate, dist); rd2 = RDCOST(x->rdmult, x->rddiv, 0, sse); // TODO(jingning): temporarily enabled only for luma component rd = VPXMIN(rd1, rd2); if (plane == 0) { x->zcoeff_blk[tx_size][block] = !x->plane[plane].eobs[block] || (x->sharpness == 0 && rd1 > rd2 && !xd->lossless); x->sum_y_eobs[tx_size] += x->plane[plane].eobs[block]; } args->this_rate += rate; args->this_dist += dist; args->this_sse += sse; args->this_rd += rd; if (args->this_rd > args->best_rd) { args->exit_early = 1; return; } args->skippable &= !x->plane[plane].eobs[block]; } static void txfm_rd_in_plane(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, int64_t ref_best_rd, int plane, BLOCK_SIZE bsize, TX_SIZE tx_size, int use_fast_coef_costing, struct buf_2d *recon) { MACROBLOCKD *const xd = &x->e_mbd; const struct macroblockd_plane *const pd = &xd->plane[plane]; struct rdcost_block_args args; vp9_zero(args); args.cpi = cpi; args.x = x; args.best_rd = ref_best_rd; args.use_fast_coef_costing = use_fast_coef_costing; args.skippable = 1; args.this_recon = recon; if (plane == 0) xd->mi[0]->tx_size = tx_size; vp9_get_entropy_contexts(bsize, tx_size, pd, args.t_above, args.t_left); args.so = get_scan(xd, tx_size, get_plane_type(plane), 0); vp9_foreach_transformed_block_in_plane(xd, bsize, plane, block_rd_txfm, &args); if (args.exit_early) { *rate = INT_MAX; *distortion = INT64_MAX; *sse = INT64_MAX; *skippable = 0; } else { *distortion = args.this_dist; *rate = args.this_rate; *sse = args.this_sse; *skippable = args.skippable; } } static void choose_largest_tx_size(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *sse, int64_t ref_best_rd, BLOCK_SIZE bs, struct buf_2d *recon) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; const TX_SIZE largest_tx_size = tx_mode_to_biggest_tx_size[cm->tx_mode]; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; mi->tx_size = VPXMIN(max_tx_size, largest_tx_size); txfm_rd_in_plane(cpi, x, rate, distortion, skip, sse, ref_best_rd, 0, bs, mi->tx_size, cpi->sf.use_fast_coef_costing, recon); } static void choose_tx_size_from_rd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, int64_t ref_best_rd, BLOCK_SIZE bs, struct buf_2d *recon) { const TX_SIZE max_tx_size = max_txsize_lookup[bs]; VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); int r[TX_SIZES][2], s[TX_SIZES]; int64_t d[TX_SIZES], sse[TX_SIZES]; int64_t rd[TX_SIZES][2] = { { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX }, { INT64_MAX, INT64_MAX } }; int n; int s0, s1; int64_t best_rd = ref_best_rd; TX_SIZE best_tx = max_tx_size; int start_tx, end_tx; const int tx_size_ctx = get_tx_size_context(xd); #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, recon_buf16[TX_SIZES][64 * 64]); uint8_t *recon_buf[TX_SIZES]; for (n = 0; n < TX_SIZES; ++n) { if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { recon_buf[n] = CONVERT_TO_BYTEPTR(recon_buf16[n]); } else { recon_buf[n] = (uint8_t *)recon_buf16[n]; } } #else DECLARE_ALIGNED(16, uint8_t, recon_buf[TX_SIZES][64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH assert(skip_prob > 0); s0 = vp9_cost_bit(skip_prob, 0); s1 = vp9_cost_bit(skip_prob, 1); if (cm->tx_mode == TX_MODE_SELECT) { start_tx = max_tx_size; end_tx = VPXMAX(start_tx - cpi->sf.tx_size_search_depth, 0); if (bs > BLOCK_32X32) end_tx = VPXMIN(end_tx + 1, start_tx); } else { TX_SIZE chosen_tx_size = VPXMIN(max_tx_size, tx_mode_to_biggest_tx_size[cm->tx_mode]); start_tx = chosen_tx_size; end_tx = chosen_tx_size; } for (n = start_tx; n >= end_tx; n--) { const int r_tx_size = cpi->tx_size_cost[max_tx_size - 1][tx_size_ctx][n]; if (recon) { struct buf_2d this_recon; this_recon.buf = recon_buf[n]; this_recon.stride = recon->stride; txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing, &this_recon); } else { txfm_rd_in_plane(cpi, x, &r[n][0], &d[n], &s[n], &sse[n], best_rd, 0, bs, n, cpi->sf.use_fast_coef_costing, 0); } r[n][1] = r[n][0]; if (r[n][0] < INT_MAX) { r[n][1] += r_tx_size; } if (d[n] == INT64_MAX || r[n][0] == INT_MAX) { rd[n][0] = rd[n][1] = INT64_MAX; } else if (s[n]) { if (is_inter_block(mi)) { rd[n][0] = rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); r[n][1] -= r_tx_size; } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, s1, sse[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, s1 + r_tx_size, sse[n]); } } else { rd[n][0] = RDCOST(x->rdmult, x->rddiv, r[n][0] + s0, d[n]); rd[n][1] = RDCOST(x->rdmult, x->rddiv, r[n][1] + s0, d[n]); } if (is_inter_block(mi) && !xd->lossless && !s[n] && sse[n] != INT64_MAX) { rd[n][0] = VPXMIN(rd[n][0], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); rd[n][1] = VPXMIN(rd[n][1], RDCOST(x->rdmult, x->rddiv, s1, sse[n])); } // Early termination in transform size search. if (cpi->sf.tx_size_search_breakout && (rd[n][1] == INT64_MAX || (n < (int)max_tx_size && rd[n][1] > rd[n + 1][1]) || s[n] == 1)) break; if (rd[n][1] < best_rd) { best_tx = n; best_rd = rd[n][1]; } } mi->tx_size = best_tx; *distortion = d[mi->tx_size]; *rate = r[mi->tx_size][cm->tx_mode == TX_MODE_SELECT]; *skip = s[mi->tx_size]; *psse = sse[mi->tx_size]; if (recon) { #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { memcpy(CONVERT_TO_SHORTPTR(recon->buf), CONVERT_TO_SHORTPTR(recon_buf[mi->tx_size]), 64 * 64 * sizeof(uint16_t)); } else { #endif memcpy(recon->buf, recon_buf[mi->tx_size], 64 * 64); #if CONFIG_VP9_HIGHBITDEPTH } #endif } } static void super_block_yrd(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skip, int64_t *psse, BLOCK_SIZE bs, int64_t ref_best_rd, struct buf_2d *recon) { MACROBLOCKD *xd = &x->e_mbd; int64_t sse; int64_t *ret_sse = psse ? psse : &sse; assert(bs == xd->mi[0]->sb_type); if (cpi->sf.tx_size_search_method == USE_LARGESTALL || xd->lossless) { choose_largest_tx_size(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, bs, recon); } else { choose_tx_size_from_rd(cpi, x, rate, distortion, skip, ret_sse, ref_best_rd, bs, recon); } } static int conditional_skipintra(PREDICTION_MODE mode, PREDICTION_MODE best_intra_mode) { if (mode == D117_PRED && best_intra_mode != V_PRED && best_intra_mode != D135_PRED) return 1; if (mode == D63_PRED && best_intra_mode != V_PRED && best_intra_mode != D45_PRED) return 1; if (mode == D207_PRED && best_intra_mode != H_PRED && best_intra_mode != D45_PRED) return 1; if (mode == D153_PRED && best_intra_mode != H_PRED && best_intra_mode != D135_PRED) return 1; return 0; } static int64_t rd_pick_intra4x4block(VP9_COMP *cpi, MACROBLOCK *x, int row, int col, PREDICTION_MODE *best_mode, const int *bmode_costs, ENTROPY_CONTEXT *a, ENTROPY_CONTEXT *l, int *bestrate, int *bestratey, int64_t *bestdistortion, BLOCK_SIZE bsize, int64_t rd_thresh) { PREDICTION_MODE mode; MACROBLOCKD *const xd = &x->e_mbd; int64_t best_rd = rd_thresh; struct macroblock_plane *p = &x->plane[0]; struct macroblockd_plane *pd = &xd->plane[0]; const int src_stride = p->src.stride; const int dst_stride = pd->dst.stride; const uint8_t *src_init = &p->src.buf[row * 4 * src_stride + col * 4]; uint8_t *dst_init = &pd->dst.buf[row * 4 * src_stride + col * 4]; ENTROPY_CONTEXT ta[2], tempa[2]; ENTROPY_CONTEXT tl[2], templ[2]; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; uint8_t best_dst[8 * 8]; #if CONFIG_VP9_HIGHBITDEPTH uint16_t best_dst16[8 * 8]; #endif memcpy(ta, a, num_4x4_blocks_wide * sizeof(a[0])); memcpy(tl, l, num_4x4_blocks_high * sizeof(l[0])); xd->mi[0]->tx_size = TX_4X4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; int64_t distortion = 0; int rate = bmode_costs[mode]; if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; // Only do the oblique modes if the best so far is // one of the neighboring directional modes if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { if (conditional_skipintra(mode, *best_mode)) continue; } memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { const int block = (row + idy) * 2 + (col + idx); const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; uint16_t *const dst16 = CONVERT_TO_SHORTPTR(dst); int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, dst, dst_stride, col + idx, row + idy, 0); vpx_highbd_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride, xd->bd); if (xd->lossless) { const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); vp9_highbd_fwht4x4(src_diff, coeff, 8); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next_highbd; vp9_highbd_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst16, dst_stride, p->eobs[block], xd->bd); } else { int64_t unused; const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); if (tx_type == DCT_DCT) vpx_highbd_fdct4x4(src_diff, coeff, 8); else vp9_highbd_fht4x4(src_diff, coeff, 8, tx_type); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); distortion += vp9_highbd_block_error_dispatch( coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused, xd->bd) >> 2; tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0 ? 1 : 0); if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next_highbd; vp9_highbd_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst16, dst_stride, p->eobs[block], xd->bd); } } } rate += ratey; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; *bestratey = ratey; *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { memcpy(best_dst16 + idy * 8, CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), num_4x4_blocks_wide * 4 * sizeof(uint16_t)); } } next_highbd : {} } if (best_rd >= rd_thresh || x->skip_encode) return best_rd; for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) { memcpy(CONVERT_TO_SHORTPTR(dst_init + idy * dst_stride), best_dst16 + idy * 8, num_4x4_blocks_wide * 4 * sizeof(uint16_t)); } return best_rd; } #endif // CONFIG_VP9_HIGHBITDEPTH for (mode = DC_PRED; mode <= TM_PRED; ++mode) { int64_t this_rd; int ratey = 0; int64_t distortion = 0; int rate = bmode_costs[mode]; if (!(cpi->sf.intra_y_mode_mask[TX_4X4] & (1 << mode))) continue; // Only do the oblique modes if the best so far is // one of the neighboring directional modes if (cpi->sf.mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { if (conditional_skipintra(mode, *best_mode)) continue; } memcpy(tempa, ta, num_4x4_blocks_wide * sizeof(ta[0])); memcpy(templ, tl, num_4x4_blocks_high * sizeof(tl[0])); for (idy = 0; idy < num_4x4_blocks_high; ++idy) { for (idx = 0; idx < num_4x4_blocks_wide; ++idx) { const int block = (row + idy) * 2 + (col + idx); const uint8_t *const src = &src_init[idx * 4 + idy * 4 * src_stride]; uint8_t *const dst = &dst_init[idx * 4 + idy * 4 * dst_stride]; int16_t *const src_diff = vp9_raster_block_offset_int16(BLOCK_8X8, block, p->src_diff); tran_low_t *const coeff = BLOCK_OFFSET(x->plane[0].coeff, block); xd->mi[0]->bmi[block].as_mode = mode; vp9_predict_intra_block(xd, 1, TX_4X4, mode, x->skip_encode ? src : dst, x->skip_encode ? src_stride : dst_stride, dst, dst_stride, col + idx, row + idy, 0); vpx_subtract_block(4, 4, src_diff, 8, src, src_stride, dst, dst_stride); if (xd->lossless) { const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); vp9_fwht4x4(src_diff, coeff, 8); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp9_iwht4x4_add(BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, p->eobs[block]); } else { int64_t unused; const TX_TYPE tx_type = get_tx_type_4x4(PLANE_TYPE_Y, xd, block); const scan_order *so = &vp9_scan_orders[TX_4X4][tx_type]; const int coeff_ctx = combine_entropy_contexts(tempa[idx], templ[idy]); vp9_fht4x4(src_diff, coeff, 8, tx_type); vp9_regular_quantize_b_4x4(x, 0, block, so->scan, so->iscan); ratey += cost_coeffs(x, 0, block, TX_4X4, coeff_ctx, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); tempa[idx] = templ[idy] = (x->plane[0].eobs[block] > 0) ? 1 : 0; distortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, block), 16, &unused) >> 2; if (RDCOST(x->rdmult, x->rddiv, ratey, distortion) >= best_rd) goto next; vp9_iht4x4_add(tx_type, BLOCK_OFFSET(pd->dqcoeff, block), dst, dst_stride, p->eobs[block]); } } } rate += ratey; this_rd = RDCOST(x->rdmult, x->rddiv, rate, distortion); if (this_rd < best_rd) { *bestrate = rate; *bestratey = ratey; *bestdistortion = distortion; best_rd = this_rd; *best_mode = mode; memcpy(a, tempa, num_4x4_blocks_wide * sizeof(tempa[0])); memcpy(l, templ, num_4x4_blocks_high * sizeof(templ[0])); for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) memcpy(best_dst + idy * 8, dst_init + idy * dst_stride, num_4x4_blocks_wide * 4); } next : {} } if (best_rd >= rd_thresh || x->skip_encode) return best_rd; for (idy = 0; idy < num_4x4_blocks_high * 4; ++idy) memcpy(dst_init + idy * dst_stride, best_dst + idy * 8, num_4x4_blocks_wide * 4); return best_rd; } static int64_t rd_pick_intra_sub_8x8_y_mode(VP9_COMP *cpi, MACROBLOCK *mb, int *rate, int *rate_y, int64_t *distortion, int64_t best_rd) { int i, j; const MACROBLOCKD *const xd = &mb->e_mbd; MODE_INFO *const mic = xd->mi[0]; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const BLOCK_SIZE bsize = xd->mi[0]->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; int idx, idy; int cost = 0; int64_t total_distortion = 0; int tot_rate_y = 0; int64_t total_rd = 0; const int *bmode_costs = cpi->mbmode_cost; // Pick modes for each sub-block (of size 4x4, 4x8, or 8x4) in an 8x8 block. for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { PREDICTION_MODE best_mode = DC_PRED; int r = INT_MAX, ry = INT_MAX; int64_t d = INT64_MAX, this_rd = INT64_MAX; i = idy * 2 + idx; if (cpi->common.frame_type == KEY_FRAME) { const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, i); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, i); bmode_costs = cpi->y_mode_costs[A][L]; } this_rd = rd_pick_intra4x4block( cpi, mb, idy, idx, &best_mode, bmode_costs, xd->plane[0].above_context + idx, xd->plane[0].left_context + idy, &r, &ry, &d, bsize, best_rd - total_rd); if (this_rd >= best_rd - total_rd) return INT64_MAX; total_rd += this_rd; cost += r; total_distortion += d; tot_rate_y += ry; mic->bmi[i].as_mode = best_mode; for (j = 1; j < num_4x4_blocks_high; ++j) mic->bmi[i + j * 2].as_mode = best_mode; for (j = 1; j < num_4x4_blocks_wide; ++j) mic->bmi[i + j].as_mode = best_mode; if (total_rd >= best_rd) return INT64_MAX; } } *rate = cost; *rate_y = tot_rate_y; *distortion = total_distortion; mic->mode = mic->bmi[3].as_mode; return RDCOST(mb->rdmult, mb->rddiv, cost, total_distortion); } // This function is used only for intra_only frames static int64_t rd_pick_intra_sby_mode(VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, int64_t best_rd) { PREDICTION_MODE mode; PREDICTION_MODE mode_selected = DC_PRED; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mic = xd->mi[0]; int this_rate, this_rate_tokenonly, s; int64_t this_distortion, this_rd; TX_SIZE best_tx = TX_4X4; int *bmode_costs; const MODE_INFO *above_mi = xd->above_mi; const MODE_INFO *left_mi = xd->left_mi; const PREDICTION_MODE A = vp9_above_block_mode(mic, above_mi, 0); const PREDICTION_MODE L = vp9_left_block_mode(mic, left_mi, 0); bmode_costs = cpi->y_mode_costs[A][L]; memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); /* Y Search for intra prediction mode */ for (mode = DC_PRED; mode <= TM_PRED; mode++) { if (cpi->sf.use_nonrd_pick_mode) { // These speed features are turned on in hybrid non-RD and RD mode // for key frame coding in the context of real-time setting. if (conditional_skipintra(mode, mode_selected)) continue; if (*skippable) break; } mic->mode = mode; super_block_yrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, NULL, bsize, best_rd, /*recon = */ 0); if (this_rate_tokenonly == INT_MAX) continue; this_rate = this_rate_tokenonly + bmode_costs[mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; best_tx = mic->tx_size; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; } } mic->mode = mode_selected; mic->tx_size = best_tx; return best_rd; } // Return value 0: early termination triggered, no valid rd cost available; // 1: rd cost values are valid. static int super_block_uvrd(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int64_t *distortion, int *skippable, int64_t *sse, BLOCK_SIZE bsize, int64_t ref_best_rd) { MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; const TX_SIZE uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); int plane; int pnrate = 0, pnskip = 1; int64_t pndist = 0, pnsse = 0; int is_cost_valid = 1; if (ref_best_rd < 0) is_cost_valid = 0; if (is_inter_block(mi) && is_cost_valid) { int plane; for (plane = 1; plane < MAX_MB_PLANE; ++plane) vp9_subtract_plane(x, bsize, plane); } *rate = 0; *distortion = 0; *sse = 0; *skippable = 1; for (plane = 1; plane < MAX_MB_PLANE; ++plane) { txfm_rd_in_plane(cpi, x, &pnrate, &pndist, &pnskip, &pnsse, ref_best_rd, plane, bsize, uv_tx_size, cpi->sf.use_fast_coef_costing, /*recon = */ 0); if (pnrate == INT_MAX) { is_cost_valid = 0; break; } *rate += pnrate; *distortion += pndist; *sse += pnsse; *skippable &= pnskip; } if (!is_cost_valid) { // reset cost value *rate = INT_MAX; *distortion = INT64_MAX; *sse = INT64_MAX; *skippable = 0; } return is_cost_valid; } static int64_t rd_pick_intra_sbuv_mode(VP9_COMP *cpi, MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize, TX_SIZE max_tx_size) { MACROBLOCKD *xd = &x->e_mbd; PREDICTION_MODE mode; PREDICTION_MODE mode_selected = DC_PRED; int64_t best_rd = INT64_MAX, this_rd; int this_rate_tokenonly, this_rate, s; int64_t this_distortion, this_sse; memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); for (mode = DC_PRED; mode <= TM_PRED; ++mode) { if (!(cpi->sf.intra_uv_mode_mask[max_tx_size] & (1 << mode))) continue; #if CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH if ((xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) && (xd->above_mi == NULL || xd->left_mi == NULL) && need_top_left[mode]) continue; #endif // CONFIG_BETTER_HW_COMPATIBILITY && CONFIG_VP9_HIGHBITDEPTH xd->mi[0]->uv_mode = mode; if (!super_block_uvrd(cpi, x, &this_rate_tokenonly, &this_distortion, &s, &this_sse, bsize, best_rd)) continue; this_rate = this_rate_tokenonly + cpi->intra_uv_mode_cost[cpi->common.frame_type][xd->mi[0]->mode][mode]; this_rd = RDCOST(x->rdmult, x->rddiv, this_rate, this_distortion); if (this_rd < best_rd) { mode_selected = mode; best_rd = this_rd; *rate = this_rate; *rate_tokenonly = this_rate_tokenonly; *distortion = this_distortion; *skippable = s; if (!x->select_tx_size) swap_block_ptr(x, ctx, 2, 0, 1, MAX_MB_PLANE); } } xd->mi[0]->uv_mode = mode_selected; return best_rd; } #if !CONFIG_REALTIME_ONLY static int64_t rd_sbuv_dcpred(const VP9_COMP *cpi, MACROBLOCK *x, int *rate, int *rate_tokenonly, int64_t *distortion, int *skippable, BLOCK_SIZE bsize) { const VP9_COMMON *cm = &cpi->common; int64_t unused; x->e_mbd.mi[0]->uv_mode = DC_PRED; memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); super_block_uvrd(cpi, x, rate_tokenonly, distortion, skippable, &unused, bsize, INT64_MAX); *rate = *rate_tokenonly + cpi->intra_uv_mode_cost[cm->frame_type][x->e_mbd.mi[0]->mode][DC_PRED]; return RDCOST(x->rdmult, x->rddiv, *rate, *distortion); } static void choose_intra_uv_mode(VP9_COMP *cpi, MACROBLOCK *const x, PICK_MODE_CONTEXT *ctx, BLOCK_SIZE bsize, TX_SIZE max_tx_size, int *rate_uv, int *rate_uv_tokenonly, int64_t *dist_uv, int *skip_uv, PREDICTION_MODE *mode_uv) { // Use an estimated rd for uv_intra based on DC_PRED if the // appropriate speed flag is set. if (cpi->sf.use_uv_intra_rd_estimate) { rd_sbuv_dcpred(cpi, x, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize); // Else do a proper rd search for each possible transform size that may // be considered in the main rd loop. } else { rd_pick_intra_sbuv_mode(cpi, x, ctx, rate_uv, rate_uv_tokenonly, dist_uv, skip_uv, bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, max_tx_size); } *mode_uv = x->e_mbd.mi[0]->uv_mode; } static int cost_mv_ref(const VP9_COMP *cpi, PREDICTION_MODE mode, int mode_context) { assert(is_inter_mode(mode)); return cpi->inter_mode_cost[mode_context][INTER_OFFSET(mode)]; } static int set_and_cost_bmi_mvs(VP9_COMP *cpi, MACROBLOCK *x, MACROBLOCKD *xd, int i, PREDICTION_MODE mode, int_mv this_mv[2], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int_mv seg_mvs[MAX_REF_FRAMES], int_mv *best_ref_mv[2], const int *mvjcost, int *mvcost[2]) { MODE_INFO *const mi = xd->mi[0]; const MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; int thismvcost = 0; int idx, idy; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[mi->sb_type]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[mi->sb_type]; const int is_compound = has_second_ref(mi); switch (mode) { case NEWMV: this_mv[0].as_int = seg_mvs[mi->ref_frame[0]].as_int; thismvcost += vp9_mv_bit_cost(&this_mv[0].as_mv, &best_ref_mv[0]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); if (is_compound) { this_mv[1].as_int = seg_mvs[mi->ref_frame[1]].as_int; thismvcost += vp9_mv_bit_cost(&this_mv[1].as_mv, &best_ref_mv[1]->as_mv, mvjcost, mvcost, MV_COST_WEIGHT_SUB); } break; case NEARMV: case NEARESTMV: this_mv[0].as_int = frame_mv[mode][mi->ref_frame[0]].as_int; if (is_compound) this_mv[1].as_int = frame_mv[mode][mi->ref_frame[1]].as_int; break; default: assert(mode == ZEROMV); this_mv[0].as_int = 0; if (is_compound) this_mv[1].as_int = 0; break; } mi->bmi[i].as_mv[0].as_int = this_mv[0].as_int; if (is_compound) mi->bmi[i].as_mv[1].as_int = this_mv[1].as_int; mi->bmi[i].as_mode = mode; for (idy = 0; idy < num_4x4_blocks_high; ++idy) for (idx = 0; idx < num_4x4_blocks_wide; ++idx) memmove(&mi->bmi[i + idy * 2 + idx], &mi->bmi[i], sizeof(mi->bmi[i])); return cost_mv_ref(cpi, mode, mbmi_ext->mode_context[mi->ref_frame[0]]) + thismvcost; } static int64_t encode_inter_mb_segment(VP9_COMP *cpi, MACROBLOCK *x, int64_t best_yrd, int i, int *labelyrate, int64_t *distortion, int64_t *sse, ENTROPY_CONTEXT *ta, ENTROPY_CONTEXT *tl, int mi_row, int mi_col) { int k; MACROBLOCKD *xd = &x->e_mbd; struct macroblockd_plane *const pd = &xd->plane[0]; struct macroblock_plane *const p = &x->plane[0]; MODE_INFO *const mi = xd->mi[0]; const BLOCK_SIZE plane_bsize = get_plane_block_size(mi->sb_type, pd); const int width = 4 * num_4x4_blocks_wide_lookup[plane_bsize]; const int height = 4 * num_4x4_blocks_high_lookup[plane_bsize]; int idx, idy; const uint8_t *const src = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; uint8_t *const dst = &pd->dst.buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->dst.stride)]; int64_t thisdistortion = 0, thissse = 0; int thisrate = 0, ref; const scan_order *so = &vp9_default_scan_orders[TX_4X4]; const int is_compound = has_second_ref(mi); const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; for (ref = 0; ref < 1 + is_compound; ++ref) { const int bw = b_width_log2_lookup[BLOCK_8X8]; const int h = 4 * (i >> bw); const int w = 4 * (i & ((1 << bw) - 1)); const struct scale_factors *sf = &xd->block_refs[ref]->sf; int y_stride = pd->pre[ref].stride; uint8_t *pre = pd->pre[ref].buf + (h * pd->pre[ref].stride + w); if (vp9_is_scaled(sf)) { const int x_start = (-xd->mb_to_left_edge >> (3 + pd->subsampling_x)); const int y_start = (-xd->mb_to_top_edge >> (3 + pd->subsampling_y)); y_stride = xd->block_refs[ref]->buf->y_stride; pre = xd->block_refs[ref]->buf->y_buffer; pre += scaled_buffer_offset(x_start + w, y_start + h, y_stride, sf); } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(pre), y_stride, CONVERT_TO_SHORTPTR(dst), pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2), xd->bd); } else { vp9_build_inter_predictor( pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); } #else vp9_build_inter_predictor( pre, y_stride, dst, pd->dst.stride, &mi->bmi[i].as_mv[ref].as_mv, &xd->block_refs[ref]->sf, width, height, ref, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE + 4 * (i % 2), mi_row * MI_SIZE + 4 * (i / 2)); #endif // CONFIG_VP9_HIGHBITDEPTH } #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vpx_highbd_subtract_block( height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, src, p->src.stride, dst, pd->dst.stride, xd->bd); } else { vpx_subtract_block(height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, src, p->src.stride, dst, pd->dst.stride); } #else vpx_subtract_block(height, width, vp9_raster_block_offset_int16(BLOCK_8X8, i, p->src_diff), 8, src, p->src.stride, dst, pd->dst.stride); #endif // CONFIG_VP9_HIGHBITDEPTH k = i; for (idy = 0; idy < height / 4; ++idy) { for (idx = 0; idx < width / 4; ++idx) { #if CONFIG_VP9_HIGHBITDEPTH const int bd = (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) ? xd->bd : 8; #endif int64_t ssz, rd, rd1, rd2; tran_low_t *coeff; int coeff_ctx; k += (idy * 2 + idx); coeff_ctx = combine_entropy_contexts(ta[k & 1], tl[k >> 1]); coeff = BLOCK_OFFSET(p->coeff, k); x->fwd_txfm4x4(vp9_raster_block_offset_int16(BLOCK_8X8, k, p->src_diff), coeff, 8); vp9_regular_quantize_b_4x4(x, 0, k, so->scan, so->iscan); #if CONFIG_VP9_HIGHBITDEPTH thisdistortion += vp9_highbd_block_error_dispatch( coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz, bd); #else thisdistortion += vp9_block_error(coeff, BLOCK_OFFSET(pd->dqcoeff, k), 16, &ssz); #endif // CONFIG_VP9_HIGHBITDEPTH thissse += ssz; thisrate += cost_coeffs(x, 0, k, TX_4X4, coeff_ctx, so->scan, so->neighbors, cpi->sf.use_fast_coef_costing); ta[k & 1] = tl[k >> 1] = (x->plane[0].eobs[k] > 0) ? 1 : 0; rd1 = RDCOST(x->rdmult, x->rddiv, thisrate, thisdistortion >> 2); rd2 = RDCOST(x->rdmult, x->rddiv, 0, thissse >> 2); rd = VPXMIN(rd1, rd2); if (rd >= best_yrd) return INT64_MAX; } } *distortion = thisdistortion >> 2; *labelyrate = thisrate; *sse = thissse >> 2; return RDCOST(x->rdmult, x->rddiv, *labelyrate, *distortion); } #endif // !CONFIG_REALTIME_ONLY typedef struct { int eobs; int brate; int byrate; int64_t bdist; int64_t bsse; int64_t brdcost; int_mv mvs[2]; ENTROPY_CONTEXT ta[2]; ENTROPY_CONTEXT tl[2]; } SEG_RDSTAT; typedef struct { int_mv *ref_mv[2]; int_mv mvp; int64_t segment_rd; int r; int64_t d; int64_t sse; int segment_yrate; PREDICTION_MODE modes[4]; SEG_RDSTAT rdstat[4][INTER_MODES]; int mvthresh; } BEST_SEG_INFO; #if !CONFIG_REALTIME_ONLY static INLINE int mv_check_bounds(const MvLimits *mv_limits, const MV *mv) { return (mv->row >> 3) < mv_limits->row_min || (mv->row >> 3) > mv_limits->row_max || (mv->col >> 3) < mv_limits->col_min || (mv->col >> 3) > mv_limits->col_max; } static INLINE void mi_buf_shift(MACROBLOCK *x, int i) { MODE_INFO *const mi = x->e_mbd.mi[0]; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &x->e_mbd.plane[0]; p->src.buf = &p->src.buf[vp9_raster_block_offset(BLOCK_8X8, i, p->src.stride)]; assert(((intptr_t)pd->pre[0].buf & 0x7) == 0); pd->pre[0].buf = &pd->pre[0].buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[0].stride)]; if (has_second_ref(mi)) pd->pre[1].buf = &pd->pre[1] .buf[vp9_raster_block_offset(BLOCK_8X8, i, pd->pre[1].stride)]; } static INLINE void mi_buf_restore(MACROBLOCK *x, struct buf_2d orig_src, struct buf_2d orig_pre[2]) { MODE_INFO *mi = x->e_mbd.mi[0]; x->plane[0].src = orig_src; x->e_mbd.plane[0].pre[0] = orig_pre[0]; if (has_second_ref(mi)) x->e_mbd.plane[0].pre[1] = orig_pre[1]; } static INLINE int mv_has_subpel(const MV *mv) { return (mv->row & 0x0F) || (mv->col & 0x0F); } // Check if NEARESTMV/NEARMV/ZEROMV is the cheapest way encode zero motion. // TODO(aconverse): Find out if this is still productive then clean up or remove static int check_best_zero_mv(const VP9_COMP *cpi, const uint8_t mode_context[MAX_REF_FRAMES], int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES], int this_mode, const MV_REFERENCE_FRAME ref_frames[2]) { if ((this_mode == NEARMV || this_mode == NEARESTMV || this_mode == ZEROMV) && frame_mv[this_mode][ref_frames[0]].as_int == 0 && (ref_frames[1] == NONE || frame_mv[this_mode][ref_frames[1]].as_int == 0)) { int rfc = mode_context[ref_frames[0]]; int c1 = cost_mv_ref(cpi, NEARMV, rfc); int c2 = cost_mv_ref(cpi, NEARESTMV, rfc); int c3 = cost_mv_ref(cpi, ZEROMV, rfc); if (this_mode == NEARMV) { if (c1 > c3) return 0; } else if (this_mode == NEARESTMV) { if (c2 > c3) return 0; } else { assert(this_mode == ZEROMV); if (ref_frames[1] == NONE) { if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0) || (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0)) return 0; } else { if ((c3 >= c2 && frame_mv[NEARESTMV][ref_frames[0]].as_int == 0 && frame_mv[NEARESTMV][ref_frames[1]].as_int == 0) || (c3 >= c1 && frame_mv[NEARMV][ref_frames[0]].as_int == 0 && frame_mv[NEARMV][ref_frames[1]].as_int == 0)) return 0; } } } return 1; } static void joint_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int_mv *frame_mv, int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], int *rate_mv) { const VP9_COMMON *const cm = &cpi->common; const int pw = 4 * num_4x4_blocks_wide_lookup[bsize]; const int ph = 4 * num_4x4_blocks_high_lookup[bsize]; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; const int refs[2] = { mi->ref_frame[0], mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1] }; int_mv ref_mv[2]; int ite, ref; const InterpKernel *kernel = vp9_filter_kernels[mi->interp_filter]; struct scale_factors sf; // Do joint motion search in compound mode to get more accurate mv. struct buf_2d backup_yv12[2][MAX_MB_PLANE]; uint32_t last_besterr[2] = { UINT_MAX, UINT_MAX }; const YV12_BUFFER_CONFIG *const scaled_ref_frame[2] = { vp9_get_scaled_ref_frame(cpi, mi->ref_frame[0]), vp9_get_scaled_ref_frame(cpi, mi->ref_frame[1]) }; // Prediction buffer from second frame. #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, second_pred_alloc_16[64 * 64]); uint8_t *second_pred; #else DECLARE_ALIGNED(16, uint8_t, second_pred[64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH for (ref = 0; ref < 2; ++ref) { ref_mv[ref] = x->mbmi_ext->ref_mvs[refs[ref]][0]; if (scaled_ref_frame[ref]) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[ref][i] = xd->plane[i].pre[ref]; vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL); } frame_mv[refs[ref]].as_int = single_newmv[refs[ref]].as_int; } // Since we have scaled the reference frames to match the size of the current // frame we must use a unit scaling factor during mode selection. #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, cm->height, cm->use_highbitdepth); #else vp9_setup_scale_factors_for_frame(&sf, cm->width, cm->height, cm->width, cm->height); #endif // CONFIG_VP9_HIGHBITDEPTH // Allow joint search multiple times iteratively for each reference frame // and break out of the search loop if it couldn't find a better mv. for (ite = 0; ite < 4; ite++) { struct buf_2d ref_yv12[2]; uint32_t bestsme = UINT_MAX; int sadpb = x->sadperbit16; MV tmp_mv; int search_range = 3; const MvLimits tmp_mv_limits = x->mv_limits; int id = ite % 2; // Even iterations search in the first reference frame, // odd iterations search in the second. The predictor // found for the 'other' reference frame is factored in. // Initialized here because of compiler problem in Visual Studio. ref_yv12[0] = xd->plane[0].pre[0]; ref_yv12[1] = xd->plane[0].pre[1]; // Get the prediction block from the 'other' reference frame. #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { second_pred = CONVERT_TO_BYTEPTR(second_pred_alloc_16); vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(ref_yv12[!id].buf), ref_yv12[!id].stride, second_pred_alloc_16, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE, xd->bd); } else { second_pred = (uint8_t *)second_pred_alloc_16; vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); } #else vp9_build_inter_predictor(ref_yv12[!id].buf, ref_yv12[!id].stride, second_pred, pw, &frame_mv[refs[!id]].as_mv, &sf, pw, ph, 0, kernel, MV_PRECISION_Q3, mi_col * MI_SIZE, mi_row * MI_SIZE); #endif // CONFIG_VP9_HIGHBITDEPTH // Do compound motion search on the current reference frame. if (id) xd->plane[0].pre[0] = ref_yv12[id]; vp9_set_mv_search_range(&x->mv_limits, &ref_mv[id].as_mv); // Use the mv result from the single mode as mv predictor. tmp_mv = frame_mv[refs[id]].as_mv; tmp_mv.col >>= 3; tmp_mv.row >>= 3; // Small-range full-pixel motion search. bestsme = vp9_refining_search_8p_c(x, &tmp_mv, sadpb, search_range, &cpi->fn_ptr[bsize], &ref_mv[id].as_mv, second_pred); if (bestsme < UINT_MAX) bestsme = vp9_get_mvpred_av_var(x, &tmp_mv, &ref_mv[id].as_mv, second_pred, &cpi->fn_ptr[bsize], 1); x->mv_limits = tmp_mv_limits; if (bestsme < UINT_MAX) { uint32_t dis; /* TODO: use dis in distortion calculation later. */ uint32_t sse; bestsme = cpi->find_fractional_mv_step( x, &tmp_mv, &ref_mv[id].as_mv, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], 0, cpi->sf.mv.subpel_search_level, NULL, x->nmvjointcost, x->mvcost, &dis, &sse, second_pred, pw, ph, cpi->sf.use_accurate_subpel_search); } // Restore the pointer to the first (possibly scaled) prediction buffer. if (id) xd->plane[0].pre[0] = ref_yv12[0]; if (bestsme < last_besterr[id]) { frame_mv[refs[id]].as_mv = tmp_mv; last_besterr[id] = bestsme; } else { break; } } *rate_mv = 0; for (ref = 0; ref < 2; ++ref) { if (scaled_ref_frame[ref]) { // Restore the prediction frame pointers to their unscaled versions. int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } *rate_mv += vp9_mv_bit_cost(&frame_mv[refs[ref]].as_mv, &x->mbmi_ext->ref_mvs[refs[ref]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } } static int64_t rd_pick_best_sub8x8_mode( VP9_COMP *cpi, MACROBLOCK *x, int_mv *best_ref_mv, int_mv *second_best_ref_mv, int64_t best_rd, int *returntotrate, int *returnyrate, int64_t *returndistortion, int *skippable, int64_t *psse, int mvthresh, int_mv seg_mvs[4][MAX_REF_FRAMES], BEST_SEG_INFO *bsi_buf, int filter_idx, int mi_row, int mi_col) { int i; BEST_SEG_INFO *bsi = bsi_buf + filter_idx; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; int mode_idx; int k, br = 0, idx, idy; int64_t bd = 0, block_sse = 0; PREDICTION_MODE this_mode; VP9_COMMON *cm = &cpi->common; struct macroblock_plane *const p = &x->plane[0]; struct macroblockd_plane *const pd = &xd->plane[0]; const int label_count = 4; int64_t this_segment_rd = 0; int label_mv_thresh; int segmentyrate = 0; const BLOCK_SIZE bsize = mi->sb_type; const int num_4x4_blocks_wide = num_4x4_blocks_wide_lookup[bsize]; const int num_4x4_blocks_high = num_4x4_blocks_high_lookup[bsize]; const int pw = num_4x4_blocks_wide << 2; const int ph = num_4x4_blocks_high << 2; ENTROPY_CONTEXT t_above[2], t_left[2]; int subpelmv = 1, have_ref = 0; SPEED_FEATURES *const sf = &cpi->sf; const int has_second_rf = has_second_ref(mi); const int inter_mode_mask = sf->inter_mode_mask[bsize]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; vp9_zero(*bsi); bsi->segment_rd = best_rd; bsi->ref_mv[0] = best_ref_mv; bsi->ref_mv[1] = second_best_ref_mv; bsi->mvp.as_int = best_ref_mv->as_int; bsi->mvthresh = mvthresh; for (i = 0; i < 4; i++) bsi->modes[i] = ZEROMV; memcpy(t_above, pd->above_context, sizeof(t_above)); memcpy(t_left, pd->left_context, sizeof(t_left)); // 64 makes this threshold really big effectively // making it so that we very rarely check mvs on // segments. setting this to 1 would make mv thresh // roughly equal to what it is for macroblocks label_mv_thresh = 1 * bsi->mvthresh / label_count; // Segmentation method overheads for (idy = 0; idy < 2; idy += num_4x4_blocks_high) { for (idx = 0; idx < 2; idx += num_4x4_blocks_wide) { // TODO(jingning,rbultje): rewrite the rate-distortion optimization // loop for 4x4/4x8/8x4 block coding. to be replaced with new rd loop int_mv mode_mv[MB_MODE_COUNT][2]; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; PREDICTION_MODE mode_selected = ZEROMV; int64_t best_rd = INT64_MAX; const int i = idy * 2 + idx; int ref; for (ref = 0; ref < 1 + has_second_rf; ++ref) { const MV_REFERENCE_FRAME frame = mi->ref_frame[ref]; frame_mv[ZEROMV][frame].as_int = 0; vp9_append_sub8x8_mvs_for_idx( cm, xd, i, ref, mi_row, mi_col, &frame_mv[NEARESTMV][frame], &frame_mv[NEARMV][frame], mbmi_ext->mode_context); } // search for the best motion vector on this segment for (this_mode = NEARESTMV; this_mode <= NEWMV; ++this_mode) { const struct buf_2d orig_src = x->plane[0].src; struct buf_2d orig_pre[2]; mode_idx = INTER_OFFSET(this_mode); bsi->rdstat[i][mode_idx].brdcost = INT64_MAX; if (!(inter_mode_mask & (1 << this_mode))) continue; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode, mi->ref_frame)) continue; memcpy(orig_pre, pd->pre, sizeof(orig_pre)); memcpy(bsi->rdstat[i][mode_idx].ta, t_above, sizeof(bsi->rdstat[i][mode_idx].ta)); memcpy(bsi->rdstat[i][mode_idx].tl, t_left, sizeof(bsi->rdstat[i][mode_idx].tl)); // motion search for newmv (single predictor case only) if (!has_second_rf && this_mode == NEWMV && seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) { MV *const new_mv = &mode_mv[NEWMV][0].as_mv; int step_param = 0; uint32_t bestsme = UINT_MAX; int sadpb = x->sadperbit4; MV mvp_full; int max_mv; int cost_list[5]; const MvLimits tmp_mv_limits = x->mv_limits; /* Is the best so far sufficiently good that we cant justify doing * and new motion search. */ if (best_rd < label_mv_thresh) break; if (cpi->oxcf.mode != BEST) { // use previous block's result as next block's MV predictor. if (i > 0) { bsi->mvp.as_int = mi->bmi[i - 1].as_mv[0].as_int; if (i == 2) bsi->mvp.as_int = mi->bmi[i - 2].as_mv[0].as_int; } } if (i == 0) max_mv = x->max_mv_context[mi->ref_frame[0]]; else max_mv = VPXMAX(abs(bsi->mvp.as_mv.row), abs(bsi->mvp.as_mv.col)) >> 3; if (sf->mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and the best ref mvs of the current block for // the given reference. step_param = (vp9_init_search_range(max_mv) + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } mvp_full.row = bsi->mvp.as_mv.row >> 3; mvp_full.col = bsi->mvp.as_mv.col >> 3; if (sf->adaptive_motion_search) { if (x->pred_mv[mi->ref_frame[0]].row != INT16_MAX && x->pred_mv[mi->ref_frame[0]].col != INT16_MAX) { mvp_full.row = x->pred_mv[mi->ref_frame[0]].row >> 3; mvp_full.col = x->pred_mv[mi->ref_frame[0]].col >> 3; } step_param = VPXMAX(step_param, 8); } // adjust src pointer for this block mi_buf_shift(x, i); vp9_set_mv_search_range(&x->mv_limits, &bsi->ref_mv[0]->as_mv); bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, sf->mv.subpel_search_method != SUBPEL_TREE ? cost_list : NULL, &bsi->ref_mv[0]->as_mv, new_mv, INT_MAX, 1); x->mv_limits = tmp_mv_limits; if (bestsme < UINT_MAX) { uint32_t distortion; cpi->find_fractional_mv_step( x, new_mv, &bsi->ref_mv[0]->as_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], sf->mv.subpel_force_stop, sf->mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &distortion, &x->pred_sse[mi->ref_frame[0]], NULL, pw, ph, cpi->sf.use_accurate_subpel_search); // save motion search result for use in compound prediction seg_mvs[i][mi->ref_frame[0]].as_mv = *new_mv; } x->pred_mv[mi->ref_frame[0]] = *new_mv; // restore src pointers mi_buf_restore(x, orig_src, orig_pre); } if (has_second_rf) { if (seg_mvs[i][mi->ref_frame[1]].as_int == INVALID_MV || seg_mvs[i][mi->ref_frame[0]].as_int == INVALID_MV) continue; } if (has_second_rf && this_mode == NEWMV && mi->interp_filter == EIGHTTAP) { // adjust src pointers mi_buf_shift(x, i); if (sf->comp_inter_joint_search_thresh <= bsize) { int rate_mv; joint_motion_search(cpi, x, bsize, frame_mv[this_mode], mi_row, mi_col, seg_mvs[i], &rate_mv); seg_mvs[i][mi->ref_frame[0]].as_int = frame_mv[this_mode][mi->ref_frame[0]].as_int; seg_mvs[i][mi->ref_frame[1]].as_int = frame_mv[this_mode][mi->ref_frame[1]].as_int; } // restore src pointers mi_buf_restore(x, orig_src, orig_pre); } bsi->rdstat[i][mode_idx].brate = set_and_cost_bmi_mvs( cpi, x, xd, i, this_mode, mode_mv[this_mode], frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); for (ref = 0; ref < 1 + has_second_rf; ++ref) { bsi->rdstat[i][mode_idx].mvs[ref].as_int = mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_wide > 1) bsi->rdstat[i + 1][mode_idx].mvs[ref].as_int = mode_mv[this_mode][ref].as_int; if (num_4x4_blocks_high > 1) bsi->rdstat[i + 2][mode_idx].mvs[ref].as_int = mode_mv[this_mode][ref].as_int; } // Trap vectors that reach beyond the UMV borders if (mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][0].as_mv) || (has_second_rf && mv_check_bounds(&x->mv_limits, &mode_mv[this_mode][1].as_mv))) continue; if (filter_idx > 0) { BEST_SEG_INFO *ref_bsi = bsi_buf; subpelmv = 0; have_ref = 1; for (ref = 0; ref < 1 + has_second_rf; ++ref) { subpelmv |= mv_has_subpel(&mode_mv[this_mode][ref].as_mv); have_ref &= mode_mv[this_mode][ref].as_int == ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (filter_idx > 1 && !subpelmv && !have_ref) { ref_bsi = bsi_buf + 1; have_ref = 1; for (ref = 0; ref < 1 + has_second_rf; ++ref) have_ref &= mode_mv[this_mode][ref].as_int == ref_bsi->rdstat[i][mode_idx].mvs[ref].as_int; } if (!subpelmv && have_ref && ref_bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { memcpy(&bsi->rdstat[i][mode_idx], &ref_bsi->rdstat[i][mode_idx], sizeof(SEG_RDSTAT)); if (num_4x4_blocks_wide > 1) bsi->rdstat[i + 1][mode_idx].eobs = ref_bsi->rdstat[i + 1][mode_idx].eobs; if (num_4x4_blocks_high > 1) bsi->rdstat[i + 2][mode_idx].eobs = ref_bsi->rdstat[i + 2][mode_idx].eobs; if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { mode_selected = this_mode; best_rd = bsi->rdstat[i][mode_idx].brdcost; } continue; } } bsi->rdstat[i][mode_idx].brdcost = encode_inter_mb_segment( cpi, x, bsi->segment_rd - this_segment_rd, i, &bsi->rdstat[i][mode_idx].byrate, &bsi->rdstat[i][mode_idx].bdist, &bsi->rdstat[i][mode_idx].bsse, bsi->rdstat[i][mode_idx].ta, bsi->rdstat[i][mode_idx].tl, mi_row, mi_col); if (bsi->rdstat[i][mode_idx].brdcost < INT64_MAX) { bsi->rdstat[i][mode_idx].brdcost += RDCOST(x->rdmult, x->rddiv, bsi->rdstat[i][mode_idx].brate, 0); bsi->rdstat[i][mode_idx].brate += bsi->rdstat[i][mode_idx].byrate; bsi->rdstat[i][mode_idx].eobs = p->eobs[i]; if (num_4x4_blocks_wide > 1) bsi->rdstat[i + 1][mode_idx].eobs = p->eobs[i + 1]; if (num_4x4_blocks_high > 1) bsi->rdstat[i + 2][mode_idx].eobs = p->eobs[i + 2]; } if (bsi->rdstat[i][mode_idx].brdcost < best_rd) { mode_selected = this_mode; best_rd = bsi->rdstat[i][mode_idx].brdcost; } } /*for each 4x4 mode*/ if (best_rd == INT64_MAX) { int iy, midx; for (iy = i + 1; iy < 4; ++iy) for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; return INT64_MAX; } mode_idx = INTER_OFFSET(mode_selected); memcpy(t_above, bsi->rdstat[i][mode_idx].ta, sizeof(t_above)); memcpy(t_left, bsi->rdstat[i][mode_idx].tl, sizeof(t_left)); set_and_cost_bmi_mvs(cpi, x, xd, i, mode_selected, mode_mv[mode_selected], frame_mv, seg_mvs[i], bsi->ref_mv, x->nmvjointcost, x->mvcost); br += bsi->rdstat[i][mode_idx].brate; bd += bsi->rdstat[i][mode_idx].bdist; block_sse += bsi->rdstat[i][mode_idx].bsse; segmentyrate += bsi->rdstat[i][mode_idx].byrate; this_segment_rd += bsi->rdstat[i][mode_idx].brdcost; if (this_segment_rd > bsi->segment_rd) { int iy, midx; for (iy = i + 1; iy < 4; ++iy) for (midx = 0; midx < INTER_MODES; ++midx) bsi->rdstat[iy][midx].brdcost = INT64_MAX; bsi->segment_rd = INT64_MAX; return INT64_MAX; } } } /* for each label */ bsi->r = br; bsi->d = bd; bsi->segment_yrate = segmentyrate; bsi->segment_rd = this_segment_rd; bsi->sse = block_sse; // update the coding decisions for (k = 0; k < 4; ++k) bsi->modes[k] = mi->bmi[k].as_mode; if (bsi->segment_rd > best_rd) return INT64_MAX; /* set it to the best */ for (i = 0; i < 4; i++) { mode_idx = INTER_OFFSET(bsi->modes[i]); mi->bmi[i].as_mv[0].as_int = bsi->rdstat[i][mode_idx].mvs[0].as_int; if (has_second_ref(mi)) mi->bmi[i].as_mv[1].as_int = bsi->rdstat[i][mode_idx].mvs[1].as_int; x->plane[0].eobs[i] = bsi->rdstat[i][mode_idx].eobs; mi->bmi[i].as_mode = bsi->modes[i]; } /* * used to set mbmi->mv.as_int */ *returntotrate = bsi->r; *returndistortion = bsi->d; *returnyrate = bsi->segment_yrate; *skippable = vp9_is_skippable_in_plane(x, BLOCK_8X8, 0); *psse = bsi->sse; mi->mode = bsi->modes[3]; return bsi->segment_rd; } static void estimate_ref_frame_costs(const VP9_COMMON *cm, const MACROBLOCKD *xd, int segment_id, unsigned int *ref_costs_single, unsigned int *ref_costs_comp, vpx_prob *comp_mode_p) { int seg_ref_active = segfeature_active(&cm->seg, segment_id, SEG_LVL_REF_FRAME); if (seg_ref_active) { memset(ref_costs_single, 0, MAX_REF_FRAMES * sizeof(*ref_costs_single)); memset(ref_costs_comp, 0, MAX_REF_FRAMES * sizeof(*ref_costs_comp)); *comp_mode_p = 128; } else { vpx_prob intra_inter_p = vp9_get_intra_inter_prob(cm, xd); vpx_prob comp_inter_p = 128; if (cm->reference_mode == REFERENCE_MODE_SELECT) { comp_inter_p = vp9_get_reference_mode_prob(cm, xd); *comp_mode_p = comp_inter_p; } else { *comp_mode_p = 128; } ref_costs_single[INTRA_FRAME] = vp9_cost_bit(intra_inter_p, 0); if (cm->reference_mode != COMPOUND_REFERENCE) { vpx_prob ref_single_p1 = vp9_get_pred_prob_single_ref_p1(cm, xd); vpx_prob ref_single_p2 = vp9_get_pred_prob_single_ref_p2(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 0); ref_costs_single[LAST_FRAME] = ref_costs_single[GOLDEN_FRAME] = ref_costs_single[ALTREF_FRAME] = base_cost; ref_costs_single[LAST_FRAME] += vp9_cost_bit(ref_single_p1, 0); ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p1, 1); ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p1, 1); ref_costs_single[GOLDEN_FRAME] += vp9_cost_bit(ref_single_p2, 0); ref_costs_single[ALTREF_FRAME] += vp9_cost_bit(ref_single_p2, 1); } else { ref_costs_single[LAST_FRAME] = 512; ref_costs_single[GOLDEN_FRAME] = 512; ref_costs_single[ALTREF_FRAME] = 512; } if (cm->reference_mode != SINGLE_REFERENCE) { vpx_prob ref_comp_p = vp9_get_pred_prob_comp_ref_p(cm, xd); unsigned int base_cost = vp9_cost_bit(intra_inter_p, 1); if (cm->reference_mode == REFERENCE_MODE_SELECT) base_cost += vp9_cost_bit(comp_inter_p, 1); ref_costs_comp[LAST_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 0); ref_costs_comp[GOLDEN_FRAME] = base_cost + vp9_cost_bit(ref_comp_p, 1); } else { ref_costs_comp[LAST_FRAME] = 512; ref_costs_comp[GOLDEN_FRAME] = 512; } } } static void store_coding_context( MACROBLOCK *x, PICK_MODE_CONTEXT *ctx, int mode_index, int64_t comp_pred_diff[REFERENCE_MODES], int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS], int skippable) { MACROBLOCKD *const xd = &x->e_mbd; // Take a snapshot of the coding context so it can be // restored if we decide to encode this way ctx->skip = x->skip; ctx->skippable = skippable; ctx->best_mode_index = mode_index; ctx->mic = *xd->mi[0]; ctx->mbmi_ext = *x->mbmi_ext; ctx->single_pred_diff = (int)comp_pred_diff[SINGLE_REFERENCE]; ctx->comp_pred_diff = (int)comp_pred_diff[COMPOUND_REFERENCE]; ctx->hybrid_pred_diff = (int)comp_pred_diff[REFERENCE_MODE_SELECT]; memcpy(ctx->best_filter_diff, best_filter_diff, sizeof(*best_filter_diff) * SWITCHABLE_FILTER_CONTEXTS); } static void setup_buffer_inter(VP9_COMP *cpi, MACROBLOCK *x, MV_REFERENCE_FRAME ref_frame, BLOCK_SIZE block_size, int mi_row, int mi_col, int_mv frame_nearest_mv[MAX_REF_FRAMES], int_mv frame_near_mv[MAX_REF_FRAMES], struct buf_2d yv12_mb[4][MAX_MB_PLANE]) { const VP9_COMMON *cm = &cpi->common; const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; int_mv *const candidates = x->mbmi_ext->ref_mvs[ref_frame]; const struct scale_factors *const sf = &cm->frame_refs[ref_frame - 1].sf; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; assert(yv12 != NULL); // TODO(jkoleszar): Is the UV buffer ever used here? If so, need to make this // use the UV scaling factors. vp9_setup_pred_block(xd, yv12_mb[ref_frame], yv12, mi_row, mi_col, sf, sf); // Gets an initial list of candidate vectors from neighbours and orders them vp9_find_mv_refs(cm, xd, mi, ref_frame, candidates, mi_row, mi_col, mbmi_ext->mode_context); // Candidate refinement carried out at encoder and decoder vp9_find_best_ref_mvs(xd, cm->allow_high_precision_mv, candidates, &frame_nearest_mv[ref_frame], &frame_near_mv[ref_frame]); // Further refinement that is encode side only to test the top few candidates // in full and choose the best as the centre point for subsequent searches. // The current implementation doesn't support scaling. if (!vp9_is_scaled(sf) && block_size >= BLOCK_8X8) vp9_mv_pred(cpi, x, yv12_mb[ref_frame][0].buf, yv12->y_stride, ref_frame, block_size); } #if CONFIG_NON_GREEDY_MV static int ref_frame_to_gf_rf_idx(int ref_frame) { if (ref_frame == GOLDEN_FRAME) { return 0; } if (ref_frame == LAST_FRAME) { return 1; } if (ref_frame == ALTREF_FRAME) { return 2; } assert(0); return -1; } #endif static void single_motion_search(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int mi_row, int mi_col, int_mv *tmp_mv, int *rate_mv) { MACROBLOCKD *xd = &x->e_mbd; const VP9_COMMON *cm = &cpi->common; MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0 } }; int step_param; MV mvp_full; int ref = mi->ref_frame[0]; MV ref_mv = x->mbmi_ext->ref_mvs[ref][0].as_mv; const MvLimits tmp_mv_limits = x->mv_limits; int cost_list[5]; const int best_predmv_idx = x->mv_best_ref_index[ref]; const YV12_BUFFER_CONFIG *scaled_ref_frame = vp9_get_scaled_ref_frame(cpi, ref); const int pw = num_4x4_blocks_wide_lookup[bsize] << 2; const int ph = num_4x4_blocks_high_lookup[bsize] << 2; MV pred_mv[3]; int bestsme = INT_MAX; #if CONFIG_NON_GREEDY_MV int gf_group_idx = cpi->twopass.gf_group.index; int gf_rf_idx = ref_frame_to_gf_rf_idx(ref); BLOCK_SIZE square_bsize = get_square_block_size(bsize); int_mv nb_full_mvs[NB_MVS_NUM] = { 0 }; MotionField *motion_field = vp9_motion_field_info_get_motion_field( &cpi->motion_field_info, gf_group_idx, gf_rf_idx, square_bsize); const int nb_full_mv_num = vp9_prepare_nb_full_mvs(motion_field, mi_row, mi_col, nb_full_mvs); const int lambda = (pw * ph) / 4; assert(pw * ph == lambda << 2); #else // CONFIG_NON_GREEDY_MV int sadpb = x->sadperbit16; #endif // CONFIG_NON_GREEDY_MV pred_mv[0] = x->mbmi_ext->ref_mvs[ref][0].as_mv; pred_mv[1] = x->mbmi_ext->ref_mvs[ref][1].as_mv; pred_mv[2] = x->pred_mv[ref]; if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0]; vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL); } // Work out the size of the first step in the mv step search. // 0 here is maximum length first step. 1 is VPXMAX >> 1 etc. if (cpi->sf.mv.auto_mv_step_size && cm->show_frame) { // Take wtd average of the step_params based on the last frame's // max mv magnitude and that based on the best ref mvs of the current // block for the given reference. step_param = (vp9_init_search_range(x->max_mv_context[ref]) + cpi->mv_step_param) / 2; } else { step_param = cpi->mv_step_param; } if (cpi->sf.adaptive_motion_search && bsize < BLOCK_64X64) { const int boffset = 2 * (b_width_log2_lookup[BLOCK_64X64] - VPXMIN(b_height_log2_lookup[bsize], b_width_log2_lookup[bsize])); step_param = VPXMAX(step_param, boffset); } if (cpi->sf.adaptive_motion_search) { int bwl = b_width_log2_lookup[bsize]; int bhl = b_height_log2_lookup[bsize]; int tlevel = x->pred_mv_sad[ref] >> (bwl + bhl + 4); if (tlevel < 5) step_param += 2; // prev_mv_sad is not setup for dynamically scaled frames. if (cpi->oxcf.resize_mode != RESIZE_DYNAMIC) { int i; for (i = LAST_FRAME; i <= ALTREF_FRAME && cm->show_frame; ++i) { if ((x->pred_mv_sad[ref] >> 3) > x->pred_mv_sad[i]) { x->pred_mv[ref].row = INT16_MAX; x->pred_mv[ref].col = INT16_MAX; tmp_mv->as_int = INVALID_MV; if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) xd->plane[i].pre[0] = backup_yv12[i]; } return; } } } } // Note: MV limits are modified here. Always restore the original values // after full-pixel motion search. vp9_set_mv_search_range(&x->mv_limits, &ref_mv); mvp_full = pred_mv[best_predmv_idx]; mvp_full.col >>= 3; mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_diamond_new(cpi, x, bsize, &mvp_full, step_param, lambda, 1, nb_full_mvs, nb_full_mv_num, &tmp_mv->as_mv); #else // CONFIG_NON_GREEDY_MV bestsme = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, step_param, cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), &ref_mv, &tmp_mv->as_mv, INT_MAX, 1); #endif // CONFIG_NON_GREEDY_MV if (cpi->sf.enhanced_full_pixel_motion_search) { int i; for (i = 0; i < 3; ++i) { int this_me; MV this_mv; int diff_row; int diff_col; int step; if (pred_mv[i].row == INT16_MAX || pred_mv[i].col == INT16_MAX) continue; if (i == best_predmv_idx) continue; diff_row = ((int)pred_mv[i].row - pred_mv[i > 0 ? (i - 1) : best_predmv_idx].row) >> 3; diff_col = ((int)pred_mv[i].col - pred_mv[i > 0 ? (i - 1) : best_predmv_idx].col) >> 3; if (diff_row == 0 && diff_col == 0) continue; if (diff_row < 0) diff_row = -diff_row; if (diff_col < 0) diff_col = -diff_col; step = get_msb((diff_row + diff_col + 1) >> 1); if (step <= 0) continue; mvp_full = pred_mv[i]; mvp_full.col >>= 3; mvp_full.row >>= 3; #if CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_diamond_new( cpi, x, bsize, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), lambda, 1, nb_full_mvs, nb_full_mv_num, &this_mv); #else // CONFIG_NON_GREEDY_MV this_me = vp9_full_pixel_search( cpi, x, bsize, &mvp_full, VPXMAX(step_param, MAX_MVSEARCH_STEPS - step), cpi->sf.mv.search_method, sadpb, cond_cost_list(cpi, cost_list), &ref_mv, &this_mv, INT_MAX, 1); #endif // CONFIG_NON_GREEDY_MV if (this_me < bestsme) { tmp_mv->as_mv = this_mv; bestsme = this_me; } } } x->mv_limits = tmp_mv_limits; if (bestsme < INT_MAX) { uint32_t dis; /* TODO: use dis in distortion calculation later. */ cpi->find_fractional_mv_step( x, &tmp_mv->as_mv, &ref_mv, cm->allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[bsize], cpi->sf.mv.subpel_force_stop, cpi->sf.mv.subpel_search_level, cond_cost_list(cpi, cost_list), x->nmvjointcost, x->mvcost, &dis, &x->pred_sse[ref], NULL, pw, ph, cpi->sf.use_accurate_subpel_search); } *rate_mv = vp9_mv_bit_cost(&tmp_mv->as_mv, &ref_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); x->pred_mv[ref] = tmp_mv->as_mv; if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i]; } } static INLINE void restore_dst_buf(MACROBLOCKD *xd, uint8_t *orig_dst[MAX_MB_PLANE], int orig_dst_stride[MAX_MB_PLANE]) { int i; for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = orig_dst[i]; xd->plane[i].dst.stride = orig_dst_stride[i]; } } // In some situations we want to discount tha pparent cost of a new motion // vector. Where there is a subtle motion field and especially where there is // low spatial complexity then it can be hard to cover the cost of a new motion // vector in a single block, even if that motion vector reduces distortion. // However, once established that vector may be usable through the nearest and // near mv modes to reduce distortion in subsequent blocks and also improve // visual quality. static int discount_newmv_test(VP9_COMP *cpi, int this_mode, int_mv this_mv, int_mv (*mode_mv)[MAX_REF_FRAMES], int ref_frame, int mi_row, int mi_col, BLOCK_SIZE bsize) { #if CONFIG_NON_GREEDY_MV (void)mode_mv; (void)this_mv; if (this_mode == NEWMV && bsize >= BLOCK_8X8 && cpi->tpl_ready) { const int gf_group_idx = cpi->twopass.gf_group.index; const int gf_rf_idx = ref_frame_to_gf_rf_idx(ref_frame); const TplDepFrame tpl_frame = cpi->tpl_stats[gf_group_idx]; const MotionField *motion_field = vp9_motion_field_info_get_motion_field( &cpi->motion_field_info, gf_group_idx, gf_rf_idx, cpi->tpl_bsize); const int tpl_block_mi_h = num_8x8_blocks_high_lookup[cpi->tpl_bsize]; const int tpl_block_mi_w = num_8x8_blocks_wide_lookup[cpi->tpl_bsize]; const int tpl_mi_row = mi_row - (mi_row % tpl_block_mi_h); const int tpl_mi_col = mi_col - (mi_col % tpl_block_mi_w); const int mv_mode = tpl_frame .mv_mode_arr[gf_rf_idx][tpl_mi_row * tpl_frame.stride + tpl_mi_col]; if (mv_mode == NEW_MV_MODE) { int_mv tpl_new_mv = vp9_motion_field_mi_get_mv(motion_field, tpl_mi_row, tpl_mi_col); int row_diff = abs(tpl_new_mv.as_mv.row - this_mv.as_mv.row); int col_diff = abs(tpl_new_mv.as_mv.col - this_mv.as_mv.col); if (VPXMAX(row_diff, col_diff) <= 8) { return 1; } else { return 0; } } else { return 0; } } else { return 0; } #else (void)mi_row; (void)mi_col; (void)bsize; return (!cpi->rc.is_src_frame_alt_ref && (this_mode == NEWMV) && (this_mv.as_int != 0) && ((mode_mv[NEARESTMV][ref_frame].as_int == 0) || (mode_mv[NEARESTMV][ref_frame].as_int == INVALID_MV)) && ((mode_mv[NEARMV][ref_frame].as_int == 0) || (mode_mv[NEARMV][ref_frame].as_int == INVALID_MV))); #endif } static int64_t handle_inter_mode( VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int *rate2, int64_t *distortion, int *skippable, int *rate_y, int *rate_uv, struct buf_2d *recon, int *disable_skip, int_mv (*mode_mv)[MAX_REF_FRAMES], int mi_row, int mi_col, int_mv single_newmv[MAX_REF_FRAMES], INTERP_FILTER (*single_filter)[MAX_REF_FRAMES], int (*single_skippable)[MAX_REF_FRAMES], int64_t *psse, const int64_t ref_best_rd, int64_t *mask_filter, int64_t filter_cache[]) { VP9_COMMON *cm = &cpi->common; MACROBLOCKD *xd = &x->e_mbd; MODE_INFO *mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const int is_comp_pred = has_second_ref(mi); const int this_mode = mi->mode; int_mv *frame_mv = mode_mv[this_mode]; int i; int refs[2] = { mi->ref_frame[0], (mi->ref_frame[1] < 0 ? 0 : mi->ref_frame[1]) }; int_mv cur_mv[2]; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, tmp_buf16[MAX_MB_PLANE * 64 * 64]); uint8_t *tmp_buf; #else DECLARE_ALIGNED(16, uint8_t, tmp_buf[MAX_MB_PLANE * 64 * 64]); #endif // CONFIG_VP9_HIGHBITDEPTH int pred_exists = 0; int intpel_mv; int64_t rd, tmp_rd, best_rd = INT64_MAX; int best_needs_copy = 0; uint8_t *orig_dst[MAX_MB_PLANE]; int orig_dst_stride[MAX_MB_PLANE]; int rs = 0; INTERP_FILTER best_filter = SWITCHABLE; uint8_t skip_txfm[MAX_MB_PLANE << 2] = { 0 }; int64_t bsse[MAX_MB_PLANE << 2] = { 0 }; int bsl = mi_width_log2_lookup[bsize]; int pred_filter_search = cpi->sf.cb_pred_filter_search ? (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1 : 0; int skip_txfm_sb = 0; int64_t skip_sse_sb = INT64_MAX; int64_t distortion_y = 0, distortion_uv = 0; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { tmp_buf = CONVERT_TO_BYTEPTR(tmp_buf16); } else { tmp_buf = (uint8_t *)tmp_buf16; } #endif // CONFIG_VP9_HIGHBITDEPTH if (pred_filter_search) { INTERP_FILTER af = SWITCHABLE, lf = SWITCHABLE; if (xd->above_mi && is_inter_block(xd->above_mi)) af = xd->above_mi->interp_filter; if (xd->left_mi && is_inter_block(xd->left_mi)) lf = xd->left_mi->interp_filter; if ((this_mode != NEWMV) || (af == lf)) best_filter = af; } if (is_comp_pred) { if (frame_mv[refs[0]].as_int == INVALID_MV || frame_mv[refs[1]].as_int == INVALID_MV) return INT64_MAX; if (cpi->sf.adaptive_mode_search) { if (single_filter[this_mode][refs[0]] == single_filter[this_mode][refs[1]]) best_filter = single_filter[this_mode][refs[0]]; } } if (this_mode == NEWMV) { int rate_mv; if (is_comp_pred) { // Initialize mv using single prediction mode result. frame_mv[refs[0]].as_int = single_newmv[refs[0]].as_int; frame_mv[refs[1]].as_int = single_newmv[refs[1]].as_int; if (cpi->sf.comp_inter_joint_search_thresh <= bsize) { joint_motion_search(cpi, x, bsize, frame_mv, mi_row, mi_col, single_newmv, &rate_mv); } else { rate_mv = vp9_mv_bit_cost(&frame_mv[refs[0]].as_mv, &x->mbmi_ext->ref_mvs[refs[0]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); rate_mv += vp9_mv_bit_cost(&frame_mv[refs[1]].as_mv, &x->mbmi_ext->ref_mvs[refs[1]][0].as_mv, x->nmvjointcost, x->mvcost, MV_COST_WEIGHT); } *rate2 += rate_mv; } else { int_mv tmp_mv; single_motion_search(cpi, x, bsize, mi_row, mi_col, &tmp_mv, &rate_mv); if (tmp_mv.as_int == INVALID_MV) return INT64_MAX; frame_mv[refs[0]].as_int = xd->mi[0]->bmi[0].as_mv[0].as_int = tmp_mv.as_int; single_newmv[refs[0]].as_int = tmp_mv.as_int; // Estimate the rate implications of a new mv but discount this // under certain circumstances where we want to help initiate a weak // motion field, where the distortion gain for a single block may not // be enough to overcome the cost of a new mv. if (discount_newmv_test(cpi, this_mode, tmp_mv, mode_mv, refs[0], mi_row, mi_col, bsize)) { *rate2 += VPXMAX((rate_mv / NEW_MV_DISCOUNT_FACTOR), 1); } else { *rate2 += rate_mv; } } } for (i = 0; i < is_comp_pred + 1; ++i) { cur_mv[i] = frame_mv[refs[i]]; // Clip "next_nearest" so that it does not extend to far out of image if (this_mode != NEWMV) clamp_mv2(&cur_mv[i].as_mv, xd); if (mv_check_bounds(&x->mv_limits, &cur_mv[i].as_mv)) return INT64_MAX; mi->mv[i].as_int = cur_mv[i].as_int; } // do first prediction into the destination buffer. Do the next // prediction into a temporary buffer. Then keep track of which one // of these currently holds the best predictor, and use the other // one for future predictions. In the end, copy from tmp_buf to // dst if necessary. for (i = 0; i < MAX_MB_PLANE; i++) { orig_dst[i] = xd->plane[i].dst.buf; orig_dst_stride[i] = xd->plane[i].dst.stride; } // We don't include the cost of the second reference here, because there // are only two options: Last/ARF or Golden/ARF; The second one is always // known, which is ARF. // // Under some circumstances we discount the cost of new mv mode to encourage // initiation of a motion field. if (discount_newmv_test(cpi, this_mode, frame_mv[refs[0]], mode_mv, refs[0], mi_row, mi_col, bsize)) { *rate2 += VPXMIN(cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]), cost_mv_ref(cpi, NEARESTMV, mbmi_ext->mode_context[refs[0]])); } else { *rate2 += cost_mv_ref(cpi, this_mode, mbmi_ext->mode_context[refs[0]]); } if (RDCOST(x->rdmult, x->rddiv, *rate2, 0) > ref_best_rd && mi->mode != NEARESTMV) return INT64_MAX; pred_exists = 0; // Are all MVs integer pel for Y and UV intpel_mv = !mv_has_subpel(&mi->mv[0].as_mv); if (is_comp_pred) intpel_mv &= !mv_has_subpel(&mi->mv[1].as_mv); // Search for best switchable filter by checking the variance of // pred error irrespective of whether the filter will be used for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { if (x->source_variance < cpi->sf.disable_filter_search_var_thresh) { best_filter = EIGHTTAP; } else if (best_filter == SWITCHABLE) { int newbest; int tmp_rate_sum = 0; int64_t tmp_dist_sum = 0; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { int j; int64_t rs_rd; int tmp_skip_sb = 0; int64_t tmp_skip_sse = INT64_MAX; mi->interp_filter = i; rs = vp9_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); if (i > 0 && intpel_mv) { rd = RDCOST(x->rdmult, x->rddiv, tmp_rate_sum, tmp_dist_sum); filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; *mask_filter = VPXMAX(*mask_filter, rd); } else { int rate_sum = 0; int64_t dist_sum = 0; if (i > 0 && cpi->sf.adaptive_interp_filter_search && (cpi->sf.interp_filter_search_mask & (1 << i))) { rate_sum = INT_MAX; dist_sum = INT64_MAX; continue; } if ((cm->interp_filter == SWITCHABLE && (!i || best_needs_copy)) || (cm->interp_filter != SWITCHABLE && (cm->interp_filter == mi->interp_filter || (i == 0 && intpel_mv)))) { restore_dst_buf(xd, orig_dst, orig_dst_stride); } else { for (j = 0; j < MAX_MB_PLANE; j++) { xd->plane[j].dst.buf = tmp_buf + j * 64 * 64; xd->plane[j].dst.stride = 64; } } vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); model_rd_for_sb(cpi, bsize, x, xd, &rate_sum, &dist_sum, &tmp_skip_sb, &tmp_skip_sse); rd = RDCOST(x->rdmult, x->rddiv, rate_sum, dist_sum); filter_cache[i] = rd; filter_cache[SWITCHABLE_FILTERS] = VPXMIN(filter_cache[SWITCHABLE_FILTERS], rd + rs_rd); if (cm->interp_filter == SWITCHABLE) rd += rs_rd; *mask_filter = VPXMAX(*mask_filter, rd); if (i == 0 && intpel_mv) { tmp_rate_sum = rate_sum; tmp_dist_sum = dist_sum; } } if (i == 0 && cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { if (rd / 2 > ref_best_rd) { restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } } newbest = i == 0 || rd < best_rd; if (newbest) { best_rd = rd; best_filter = mi->interp_filter; if (cm->interp_filter == SWITCHABLE && i && !intpel_mv) best_needs_copy = !best_needs_copy; } if ((cm->interp_filter == SWITCHABLE && newbest) || (cm->interp_filter != SWITCHABLE && cm->interp_filter == mi->interp_filter)) { pred_exists = 1; tmp_rd = best_rd; skip_txfm_sb = tmp_skip_sb; skip_sse_sb = tmp_skip_sse; memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); memcpy(bsse, x->bsse, sizeof(bsse)); } } restore_dst_buf(xd, orig_dst, orig_dst_stride); } } // Set the appropriate filter mi->interp_filter = cm->interp_filter != SWITCHABLE ? cm->interp_filter : best_filter; rs = cm->interp_filter == SWITCHABLE ? vp9_get_switchable_rate(cpi, xd) : 0; if (pred_exists) { if (best_needs_copy) { // again temporarily set the buffers to local memory to prevent a memcpy for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].dst.buf = tmp_buf + i * 64 * 64; xd->plane[i].dst.stride = 64; } } rd = tmp_rd + RDCOST(x->rdmult, x->rddiv, rs, 0); } else { int tmp_rate; int64_t tmp_dist; // Handles the special case when a filter that is not in the // switchable list (ex. bilinear) is indicated at the frame level, or // skip condition holds. vp9_build_inter_predictors_sb(xd, mi_row, mi_col, bsize); model_rd_for_sb(cpi, bsize, x, xd, &tmp_rate, &tmp_dist, &skip_txfm_sb, &skip_sse_sb); rd = RDCOST(x->rdmult, x->rddiv, rs + tmp_rate, tmp_dist); memcpy(skip_txfm, x->skip_txfm, sizeof(skip_txfm)); memcpy(bsse, x->bsse, sizeof(bsse)); } if (!is_comp_pred) single_filter[this_mode][refs[0]] = mi->interp_filter; if (cpi->sf.adaptive_mode_search) if (is_comp_pred) if (single_skippable[this_mode][refs[0]] && single_skippable[this_mode][refs[1]]) memset(skip_txfm, SKIP_TXFM_AC_DC, sizeof(skip_txfm)); if (cpi->sf.use_rd_breakout && ref_best_rd < INT64_MAX) { // if current pred_error modeled rd is substantially more than the best // so far, do not bother doing full rd if (rd / 2 > ref_best_rd) { restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } } if (cm->interp_filter == SWITCHABLE) *rate2 += rs; memcpy(x->skip_txfm, skip_txfm, sizeof(skip_txfm)); memcpy(x->bsse, bsse, sizeof(bsse)); if (!skip_txfm_sb || xd->lossless) { int skippable_y, skippable_uv; int64_t sseuv = INT64_MAX; int64_t rdcosty = INT64_MAX; // Y cost and distortion vp9_subtract_plane(x, bsize, 0); super_block_yrd(cpi, x, rate_y, &distortion_y, &skippable_y, psse, bsize, ref_best_rd, recon); if (*rate_y == INT_MAX) { *rate2 = INT_MAX; *distortion = INT64_MAX; restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } *rate2 += *rate_y; *distortion += distortion_y; rdcosty = RDCOST(x->rdmult, x->rddiv, *rate2, *distortion); rdcosty = VPXMIN(rdcosty, RDCOST(x->rdmult, x->rddiv, 0, *psse)); if (!super_block_uvrd(cpi, x, rate_uv, &distortion_uv, &skippable_uv, &sseuv, bsize, ref_best_rd - rdcosty)) { *rate2 = INT_MAX; *distortion = INT64_MAX; restore_dst_buf(xd, orig_dst, orig_dst_stride); return INT64_MAX; } *psse += sseuv; *rate2 += *rate_uv; *distortion += distortion_uv; *skippable = skippable_y && skippable_uv; } else { x->skip = 1; *disable_skip = 1; // The cost of skip bit needs to be added. *rate2 += vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); *distortion = skip_sse_sb; } if (!is_comp_pred) single_skippable[this_mode][refs[0]] = *skippable; restore_dst_buf(xd, orig_dst, orig_dst_stride); return 0; // The rate-distortion cost will be re-calculated by caller. } #endif // !CONFIG_REALTIME_ONLY void vp9_rd_pick_intra_mode_sb(VP9_COMP *cpi, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; struct macroblockd_plane *const pd = xd->plane; int rate_y = 0, rate_uv = 0, rate_y_tokenonly = 0, rate_uv_tokenonly = 0; int y_skip = 0, uv_skip = 0; int64_t dist_y = 0, dist_uv = 0; TX_SIZE max_uv_tx_size; x->skip_encode = 0; ctx->skip = 0; xd->mi[0]->ref_frame[0] = INTRA_FRAME; xd->mi[0]->ref_frame[1] = NONE; // Initialize interp_filter here so we do not have to check for inter block // modes in get_pred_context_switchable_interp() xd->mi[0]->interp_filter = SWITCHABLE_FILTERS; if (bsize >= BLOCK_8X8) { if (rd_pick_intra_sby_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, &y_skip, bsize, best_rd) >= best_rd) { rd_cost->rate = INT_MAX; return; } } else { y_skip = 0; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate_y, &rate_y_tokenonly, &dist_y, best_rd) >= best_rd) { rd_cost->rate = INT_MAX; return; } } max_uv_tx_size = uv_txsize_lookup[bsize][xd->mi[0]->tx_size] [pd[1].subsampling_x][pd[1].subsampling_y]; rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv, &rate_uv_tokenonly, &dist_uv, &uv_skip, VPXMAX(BLOCK_8X8, bsize), max_uv_tx_size); if (y_skip && uv_skip) { rd_cost->rate = rate_y + rate_uv - rate_y_tokenonly - rate_uv_tokenonly + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 1); rd_cost->dist = dist_y + dist_uv; } else { rd_cost->rate = rate_y + rate_uv + vp9_cost_bit(vp9_get_skip_prob(cm, xd), 0); rd_cost->dist = dist_y + dist_uv; } ctx->mic = *xd->mi[0]; ctx->mbmi_ext = *x->mbmi_ext; rd_cost->rdcost = RDCOST(x->rdmult, x->rddiv, rd_cost->rate, rd_cost->dist); } #if !CONFIG_REALTIME_ONLY // This function is designed to apply a bias or adjustment to an rd value based // on the relative variance of the source and reconstruction. #define LOW_VAR_THRESH 250 #define VAR_MULT 250 static unsigned int max_var_adjust[VP9E_CONTENT_INVALID] = { 16, 16, 250 }; static void rd_variance_adjustment(VP9_COMP *cpi, MACROBLOCK *x, BLOCK_SIZE bsize, int64_t *this_rd, struct buf_2d *recon, MV_REFERENCE_FRAME ref_frame, MV_REFERENCE_FRAME second_ref_frame, PREDICTION_MODE this_mode) { MACROBLOCKD *const xd = &x->e_mbd; unsigned int rec_variance; unsigned int src_variance; unsigned int src_rec_min; unsigned int var_diff = 0; unsigned int var_factor = 0; unsigned int adj_max; unsigned int low_var_thresh = LOW_VAR_THRESH; const int bw = num_8x8_blocks_wide_lookup[bsize]; const int bh = num_8x8_blocks_high_lookup[bsize]; vp9e_tune_content content_type = cpi->oxcf.content; if (*this_rd == INT64_MAX) return; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { rec_variance = vp9_high_get_sby_variance(cpi, recon, bsize, xd->bd); src_variance = vp9_high_get_sby_variance(cpi, &x->plane[0].src, bsize, xd->bd); } else { rec_variance = vp9_get_sby_variance(cpi, recon, bsize); src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); } #else rec_variance = vp9_get_sby_variance(cpi, recon, bsize); src_variance = vp9_get_sby_variance(cpi, &x->plane[0].src, bsize); #endif // CONFIG_VP9_HIGHBITDEPTH // Scale based on area in 8x8 blocks rec_variance /= (bw * bh); src_variance /= (bw * bh); if (content_type == VP9E_CONTENT_FILM) { if (cpi->oxcf.pass == 2) { // Adjust low variance threshold based on estimated group noise enegry. double noise_factor = (double)cpi->twopass.gf_group.group_noise_energy / SECTION_NOISE_DEF; low_var_thresh = (unsigned int)(low_var_thresh * noise_factor); if (ref_frame == INTRA_FRAME) { low_var_thresh *= 2; if (this_mode == DC_PRED) low_var_thresh *= 5; } else if (second_ref_frame > INTRA_FRAME) { low_var_thresh *= 2; } } } else { low_var_thresh = LOW_VAR_THRESH / 2; } // Lower of source (raw per pixel value) and recon variance. Note that // if the source per pixel is 0 then the recon value here will not be per // pixel (see above) so will likely be much larger. src_rec_min = VPXMIN(src_variance, rec_variance); if (src_rec_min > low_var_thresh) return; // We care more when the reconstruction has lower variance so give this case // a stronger weighting. var_diff = (src_variance > rec_variance) ? (src_variance - rec_variance) * 2 : (rec_variance - src_variance) / 2; adj_max = max_var_adjust[content_type]; var_factor = (unsigned int)((int64_t)VAR_MULT * var_diff) / VPXMAX(1, src_variance); var_factor = VPXMIN(adj_max, var_factor); if ((content_type == VP9E_CONTENT_FILM) && ((ref_frame == INTRA_FRAME) || (second_ref_frame > INTRA_FRAME))) { var_factor *= 2; } *this_rd += (*this_rd * var_factor) / 100; (void)xd; } #endif // !CONFIG_REALTIME_ONLY // Do we have an internal image edge (e.g. formatting bars). int vp9_internal_image_edge(VP9_COMP *cpi) { return (cpi->oxcf.pass == 2) && ((cpi->twopass.this_frame_stats.inactive_zone_rows > 0) || (cpi->twopass.this_frame_stats.inactive_zone_cols > 0)); } // Checks to see if a super block is on a horizontal image edge. // In most cases this is the "real" edge unless there are formatting // bars embedded in the stream. int vp9_active_h_edge(VP9_COMP *cpi, int mi_row, int mi_step) { int top_edge = 0; int bottom_edge = cpi->common.mi_rows; int is_active_h_edge = 0; // For two pass account for any formatting bars detected. if (cpi->oxcf.pass == 2) { TWO_PASS *twopass = &cpi->twopass; // The inactive region is specified in MBs not mi units. // The image edge is in the following MB row. top_edge += (int)(twopass->this_frame_stats.inactive_zone_rows * 2); bottom_edge -= (int)(twopass->this_frame_stats.inactive_zone_rows * 2); bottom_edge = VPXMAX(top_edge, bottom_edge); } if (((top_edge >= mi_row) && (top_edge < (mi_row + mi_step))) || ((bottom_edge >= mi_row) && (bottom_edge < (mi_row + mi_step)))) { is_active_h_edge = 1; } return is_active_h_edge; } // Checks to see if a super block is on a vertical image edge. // In most cases this is the "real" edge unless there are formatting // bars embedded in the stream. int vp9_active_v_edge(VP9_COMP *cpi, int mi_col, int mi_step) { int left_edge = 0; int right_edge = cpi->common.mi_cols; int is_active_v_edge = 0; // For two pass account for any formatting bars detected. if (cpi->oxcf.pass == 2) { TWO_PASS *twopass = &cpi->twopass; // The inactive region is specified in MBs not mi units. // The image edge is in the following MB row. left_edge += (int)(twopass->this_frame_stats.inactive_zone_cols * 2); right_edge -= (int)(twopass->this_frame_stats.inactive_zone_cols * 2); right_edge = VPXMAX(left_edge, right_edge); } if (((left_edge >= mi_col) && (left_edge < (mi_col + mi_step))) || ((right_edge >= mi_col) && (right_edge < (mi_col + mi_step)))) { is_active_v_edge = 1; } return is_active_v_edge; } // Checks to see if a super block is at the edge of the active image. // In most cases this is the "real" edge unless there are formatting // bars embedded in the stream. int vp9_active_edge_sb(VP9_COMP *cpi, int mi_row, int mi_col) { return vp9_active_h_edge(cpi, mi_row, MI_BLOCK_SIZE) || vp9_active_v_edge(cpi, mi_col, MI_BLOCK_SIZE); } #if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sb(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; TileInfo *const tile_info = &tile_data->tile_info; RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; MB_MODE_INFO_EXT *const mbmi_ext = x->mbmi_ext; const struct segmentation *const seg = &cm->seg; PREDICTION_MODE this_mode; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mi->segment_id; int comp_pred, i, k; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; int_mv single_newmv[MAX_REF_FRAMES] = { { 0 } }; INTERP_FILTER single_inter_filter[MB_MODE_COUNT][MAX_REF_FRAMES]; int single_skippable[MB_MODE_COUNT][MAX_REF_FRAMES]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int64_t best_rd = best_rd_so_far; int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MODE_INFO best_mbmode; int best_mode_skippable = 0; int midx, best_mode_index = -1; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vpx_prob comp_mode_p; int64_t best_intra_rd = INT64_MAX; unsigned int best_pred_sse = UINT_MAX; PREDICTION_MODE best_intra_mode = DC_PRED; int rate_uv_intra[TX_SIZES], rate_uv_tokenonly[TX_SIZES]; int64_t dist_uv[TX_SIZES]; int skip_uv[TX_SIZES]; PREDICTION_MODE mode_uv[TX_SIZES]; const int intra_cost_penalty = vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); int best_skip2 = 0; uint8_t ref_frame_skip_mask[2] = { 0, 1 }; uint16_t mode_skip_mask[MAX_REF_FRAMES] = { 0 }; int mode_skip_start = sf->mode_skip_start + 1; const int *const rd_threshes = rd_opt->threshes[segment_id][bsize]; const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; int64_t mode_threshold[MAX_MODES]; int8_t *tile_mode_map = tile_data->mode_map[bsize]; int8_t mode_map[MAX_MODES]; // Maintain mode_map information locally to avoid // lock mechanism involved with reads from // tile_mode_map const int mode_search_skip_flags = sf->mode_search_skip_flags; const int is_rect_partition = num_4x4_blocks_wide_lookup[bsize] != num_4x4_blocks_high_lookup[bsize]; int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; struct buf_2d *recon; struct buf_2d recon_buf; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, recon16[64 * 64]); recon_buf.buf = xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH ? CONVERT_TO_BYTEPTR(recon16) : (uint8_t *)recon16; #else DECLARE_ALIGNED(16, uint8_t, recon8[64 * 64]); recon_buf.buf = recon8; #endif // CONFIG_VP9_HIGHBITDEPTH recon_buf.stride = 64; recon = cpi->oxcf.content == VP9E_CONTENT_FILM ? &recon_buf : 0; vp9_zero(best_mbmode); x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = INT64_MAX; for (i = 0; i < TX_SIZES; i++) rate_uv_intra[i] = INT_MAX; for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; for (i = 0; i < MB_MODE_COUNT; ++i) { for (k = 0; k < MAX_REF_FRAMES; ++k) { single_inter_filter[i][k] = SWITCHABLE; single_skippable[i][k] = 0; } } rd_cost->rate = INT_MAX; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { x->pred_mv_sad[ref_frame] = INT_MAX; if ((cpi->ref_frame_flags & flag_list[ref_frame]) && !(is_rect_partition && (ctx->skip_ref_frame_mask & (1 << ref_frame)))) { assert(get_ref_frame_buffer(cpi, ref_frame) != NULL); setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { if (!(cpi->ref_frame_flags & flag_list[ref_frame])) { // Skip checking missing references in both single and compound reference // modes. Note that a mode will be skipped if both reference frames // are masked out. ref_frame_skip_mask[0] |= (1 << ref_frame); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; } else if (sf->reference_masking) { for (i = LAST_FRAME; i <= ALTREF_FRAME; ++i) { // Skip fixed mv modes for poor references if ((x->pred_mv_sad[ref_frame] >> 2) > x->pred_mv_sad[i]) { mode_skip_mask[ref_frame] |= INTER_NEAREST_NEAR_ZERO; break; } } } // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { ref_frame_skip_mask[0] |= (1 << ref_frame); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; } } // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, // unless ARNR filtering is enabled in which case we want // an unfiltered alternative. We allow near/nearest as well // because they may result in zero-zero MVs but be cheaper. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) { ref_frame_skip_mask[0] = (1 << LAST_FRAME) | (1 << GOLDEN_FRAME); ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; mode_skip_mask[ALTREF_FRAME] = ~INTER_NEAREST_NEAR_ZERO; if (frame_mv[NEARMV][ALTREF_FRAME].as_int != 0) mode_skip_mask[ALTREF_FRAME] |= (1 << NEARMV); if (frame_mv[NEARESTMV][ALTREF_FRAME].as_int != 0) mode_skip_mask[ALTREF_FRAME] |= (1 << NEARESTMV); } } if (cpi->rc.is_src_frame_alt_ref) { if (sf->alt_ref_search_fp) { mode_skip_mask[ALTREF_FRAME] = 0; ref_frame_skip_mask[0] = ~(1 << ALTREF_FRAME) & 0xff; ref_frame_skip_mask[1] = SECOND_REF_FRAME_MASK; } } if (sf->alt_ref_search_fp) if (!cm->show_frame && x->pred_mv_sad[GOLDEN_FRAME] < INT_MAX) if (x->pred_mv_sad[ALTREF_FRAME] > (x->pred_mv_sad[GOLDEN_FRAME] << 1)) mode_skip_mask[ALTREF_FRAME] |= INTER_ALL; if (sf->adaptive_mode_search) { if (cm->show_frame && !cpi->rc.is_src_frame_alt_ref && cpi->rc.frames_since_golden >= 3) if (x->pred_mv_sad[GOLDEN_FRAME] > (x->pred_mv_sad[LAST_FRAME] << 1)) mode_skip_mask[GOLDEN_FRAME] |= INTER_ALL; } if (bsize > sf->max_intra_bsize) { ref_frame_skip_mask[0] |= (1 << INTRA_FRAME); ref_frame_skip_mask[1] |= (1 << INTRA_FRAME); } mode_skip_mask[INTRA_FRAME] |= ~(sf->intra_y_mode_mask[max_txsize_lookup[bsize]]); for (i = 0; i <= LAST_NEW_MV_INDEX; ++i) mode_threshold[i] = 0; for (i = LAST_NEW_MV_INDEX + 1; i < MAX_MODES; ++i) mode_threshold[i] = ((int64_t)rd_threshes[i] * rd_thresh_freq_fact[i]) >> 5; midx = sf->schedule_mode_search ? mode_skip_start : 0; while (midx > 4) { uint8_t end_pos = 0; for (i = 5; i < midx; ++i) { if (mode_threshold[tile_mode_map[i - 1]] > mode_threshold[tile_mode_map[i]]) { uint8_t tmp = tile_mode_map[i]; tile_mode_map[i] = tile_mode_map[i - 1]; tile_mode_map[i - 1] = tmp; end_pos = i; } } midx = end_pos; } memcpy(mode_map, tile_mode_map, sizeof(mode_map)); for (midx = 0; midx < MAX_MODES; ++midx) { int mode_index = mode_map[midx]; int mode_excluded = 0; int64_t this_rd = INT64_MAX; int disable_skip = 0; int compmode_cost = 0; int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable = 0; int this_skip2 = 0; int64_t total_sse = INT64_MAX; int early_term = 0; this_mode = vp9_mode_order[mode_index].mode; ref_frame = vp9_mode_order[mode_index].ref_frame[0]; second_ref_frame = vp9_mode_order[mode_index].ref_frame[1]; vp9_zero(x->sum_y_eobs); if (is_rect_partition) { if (ctx->skip_ref_frame_mask & (1 << ref_frame)) continue; if (second_ref_frame > 0 && (ctx->skip_ref_frame_mask & (1 << second_ref_frame))) continue; } // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (midx == mode_skip_start && best_mode_index >= 0) { switch (best_mbmode.ref_frame[0]) { case INTRA_FRAME: break; case LAST_FRAME: ref_frame_skip_mask[0] |= LAST_FRAME_MODE_MASK; break; case GOLDEN_FRAME: ref_frame_skip_mask[0] |= GOLDEN_FRAME_MODE_MASK; break; case ALTREF_FRAME: ref_frame_skip_mask[0] |= ALT_REF_MODE_MASK; break; case NONE: case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; } } if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) continue; if (mode_skip_mask[ref_frame] & (1 << this_mode)) continue; // Test best rd so far against threshold for trying this mode. if (best_mode_skippable && sf->schedule_mode_search) mode_threshold[mode_index] <<= 1; if (best_rd < mode_threshold[mode_index]) continue; // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; if (sf->motion_field_mode_search) { const int mi_width = VPXMIN(num_8x8_blocks_wide_lookup[bsize], tile_info->mi_col_end - mi_col); const int mi_height = VPXMIN(num_8x8_blocks_high_lookup[bsize], tile_info->mi_row_end - mi_row); const int bsl = mi_width_log2_lookup[bsize]; int cb_partition_search_ctrl = (((mi_row + mi_col) >> bsl) + get_chessboard_index(cm->current_video_frame)) & 0x1; MODE_INFO *ref_mi; int const_motion = 1; int skip_ref_frame = !cb_partition_search_ctrl; MV_REFERENCE_FRAME rf = NONE; int_mv ref_mv; ref_mv.as_int = INVALID_MV; if ((mi_row - 1) >= tile_info->mi_row_start) { ref_mv = xd->mi[-xd->mi_stride]->mv[0]; rf = xd->mi[-xd->mi_stride]->ref_frame[0]; for (i = 0; i < mi_width; ++i) { ref_mi = xd->mi[-xd->mi_stride + i]; const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && (ref_frame == ref_mi->ref_frame[0]); skip_ref_frame &= (rf == ref_mi->ref_frame[0]); } } if ((mi_col - 1) >= tile_info->mi_col_start) { if (ref_mv.as_int == INVALID_MV) ref_mv = xd->mi[-1]->mv[0]; if (rf == NONE) rf = xd->mi[-1]->ref_frame[0]; for (i = 0; i < mi_height; ++i) { ref_mi = xd->mi[i * xd->mi_stride - 1]; const_motion &= (ref_mv.as_int == ref_mi->mv[0].as_int) && (ref_frame == ref_mi->ref_frame[0]); skip_ref_frame &= (rf == ref_mi->ref_frame[0]); } } if (skip_ref_frame && this_mode != NEARESTMV && this_mode != NEWMV) if (rf > INTRA_FRAME) if (ref_frame != rf) continue; if (const_motion) if (this_mode == NEARMV || this_mode == ZEROMV) continue; } comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; if (cm->ref_frame_sign_bias[ref_frame] == cm->ref_frame_sign_bias[second_ref_frame]) continue; // Skip compound inter modes if ARF is not available. if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; // Do not allow compound prediction if the segment level reference frame // feature is in use as in this case there can only be one reference. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; if ((mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && best_mode_index >= 0 && best_mbmode.ref_frame[0] == INTRA_FRAME) continue; mode_excluded = cm->reference_mode == SINGLE_REFERENCE; } else { if (ref_frame != INTRA_FRAME) mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; } if (ref_frame == INTRA_FRAME) { if (sf->adaptive_mode_search) if ((x->source_variance << num_pels_log2_lookup[bsize]) > best_pred_sse) continue; if (this_mode != DC_PRED) { // Disable intra modes other than DC_PRED for blocks with low variance // Threshold for intra skipping based on source variance // TODO(debargha): Specialize the threshold for super block sizes const unsigned int skip_intra_var_thresh = (cpi->oxcf.content == VP9E_CONTENT_FILM) ? 0 : 64; if ((mode_search_skip_flags & FLAG_SKIP_INTRA_LOWVAR) && x->source_variance < skip_intra_var_thresh) continue; // Only search the oblique modes if the best so far is // one of the neighboring directional modes if ((mode_search_skip_flags & FLAG_SKIP_INTRA_BESTINTER) && (this_mode >= D45_PRED && this_mode <= TM_PRED)) { if (best_mode_index >= 0 && best_mbmode.ref_frame[0] > INTRA_FRAME) continue; } if (mode_search_skip_flags & FLAG_SKIP_INTRA_DIRMISMATCH) { if (conditional_skipintra(this_mode, best_intra_mode)) continue; } } } else { const MV_REFERENCE_FRAME ref_frames[2] = { ref_frame, second_ref_frame }; if (!check_best_zero_mv(cpi, mbmi_ext->mode_context, frame_mv, this_mode, ref_frames)) continue; } mi->mode = this_mode; mi->uv_mode = DC_PRED; mi->ref_frame[0] = ref_frame; mi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; mi->mv[0].as_int = mi->mv[1].as_int = 0; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } if (ref_frame == INTRA_FRAME) { TX_SIZE uv_tx; struct macroblockd_plane *const pd = &xd->plane[1]; memset(x->skip_txfm, 0, sizeof(x->skip_txfm)); super_block_yrd(cpi, x, &rate_y, &distortion_y, &skippable, NULL, bsize, best_rd, recon); if (rate_y == INT_MAX) continue; uv_tx = uv_txsize_lookup[bsize][mi->tx_size][pd->subsampling_x] [pd->subsampling_y]; if (rate_uv_intra[uv_tx] == INT_MAX) { choose_intra_uv_mode(cpi, x, ctx, bsize, uv_tx, &rate_uv_intra[uv_tx], &rate_uv_tokenonly[uv_tx], &dist_uv[uv_tx], &skip_uv[uv_tx], &mode_uv[uv_tx]); } rate_uv = rate_uv_tokenonly[uv_tx]; distortion_uv = dist_uv[uv_tx]; skippable = skippable && skip_uv[uv_tx]; mi->uv_mode = mode_uv[uv_tx]; rate2 = rate_y + cpi->mbmode_cost[mi->mode] + rate_uv_intra[uv_tx]; if (this_mode != DC_PRED && this_mode != TM_PRED) rate2 += intra_cost_penalty; distortion2 = distortion_y + distortion_uv; } else { this_rd = handle_inter_mode( cpi, x, bsize, &rate2, &distortion2, &skippable, &rate_y, &rate_uv, recon, &disable_skip, frame_mv, mi_row, mi_col, single_newmv, single_inter_filter, single_skippable, &total_sse, best_rd, &mask_filter, filter_cache); if (this_rd == INT64_MAX) continue; compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; } // Estimate the reference frame signaling cost and add it // to the rolling cost variable. if (comp_pred) { rate2 += ref_costs_comp[ref_frame]; } else { rate2 += ref_costs_single[ref_frame]; } if (!disable_skip) { const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); const int skip_cost0 = vp9_cost_bit(skip_prob, 0); const int skip_cost1 = vp9_cost_bit(skip_prob, 1); if (skippable) { // Back out the coefficient coding costs rate2 -= (rate_y + rate_uv); // Cost the skip mb case rate2 += skip_cost1; } else if (ref_frame != INTRA_FRAME && !xd->lossless && !cpi->oxcf.sharpness) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, distortion2) < RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also assert(total_sse >= 0); rate2 += skip_cost1; distortion2 = total_sse; rate2 -= (rate_y + rate_uv); this_skip2 = 1; } } else { // Add in the cost of the no skip flag. rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } if (recon) { // In film mode bias against DC pred and other intra if there is a // significant difference between the variance of the sub blocks in the // the source. Also apply some bias against compound modes which also // tend to blur fine texture such as film grain over time. // // The sub block test here acts in the case where one or more sub // blocks have high relatively variance but others relatively low // variance. Here the high variance sub blocks may push the // total variance for the current block size over the thresholds // used in rd_variance_adjustment() below. if (cpi->oxcf.content == VP9E_CONTENT_FILM) { if (bsize >= BLOCK_16X16) { int min_energy, max_energy; vp9_get_sub_block_energy(cpi, x, mi_row, mi_col, bsize, &min_energy, &max_energy); if (max_energy > min_energy) { if (ref_frame == INTRA_FRAME) { if (this_mode == DC_PRED) this_rd += (this_rd * (max_energy - min_energy)); else this_rd += (this_rd * (max_energy - min_energy)) / 4; } else if (second_ref_frame > INTRA_FRAME) { this_rd += this_rd / 4; } } } } // Apply an adjustment to the rd value based on the similarity of the // source variance and reconstructed variance. rd_variance_adjustment(cpi, x, bsize, &this_rd, recon, ref_frame, second_ref_frame, this_mode); } if (ref_frame == INTRA_FRAME) { // Keep record of best intra rd if (this_rd < best_intra_rd) { best_intra_rd = this_rd; best_intra_mode = mi->mode; } } if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); } // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { int max_plane = MAX_MB_PLANE; if (!mode_excluded) { // Note index of best mode so far best_mode_index = mode_index; if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mi->mv[0].as_int = 0; max_plane = 1; // Initialize interp_filter here so we do not have to check for // inter block modes in get_pred_context_switchable_interp() mi->interp_filter = SWITCHABLE_FILTERS; } else { best_pred_sse = x->pred_sse[ref_frame]; } rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; best_mbmode = *mi; best_skip2 = this_skip2; best_mode_skippable = skippable; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); memcpy(ctx->zcoeff_blk, x->zcoeff_blk[mi->tx_size], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); ctx->sum_y_eobs = x->sum_y_eobs[mi->tx_size]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history if ((mode_search_skip_flags & FLAG_EARLY_TERMINATE) && (mode_index > MIN_EARLY_TERM_INDEX)) { int qstep = xd->plane[0].dequant[1]; // TODO(debargha): Enhance this by specializing for each mode_index int scale = 4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { qstep >>= (xd->bd - 8); } #endif // CONFIG_VP9_HIGHBITDEPTH if (x->source_variance < UINT_MAX) { const int var_adjust = (x->source_variance < 16); scale -= var_adjust; } if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { early_term = 1; } } } } /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; if (cm->reference_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { single_rate = rate2; hybrid_rate = rate2 + compmode_cost; } single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); if (!comp_pred) { if (single_rd < best_pred_rd[SINGLE_REFERENCE]) best_pred_rd[SINGLE_REFERENCE] = single_rd; } else { if (single_rd < best_pred_rd[COMPOUND_REFERENCE]) best_pred_rd[COMPOUND_REFERENCE] = single_rd; } if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; /* keep record of best filter type */ if (!mode_excluded && cm->interp_filter != BILINEAR) { int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { int64_t adj_rd; if (ref == INT64_MAX) adj_rd = 0; else if (filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. adj_rd = mask_filter - ref + 10; else adj_rd = filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); } } } if (early_term) break; if (x->skip && !comp_pred) break; } // The inter modes' rate costs are not calculated precisely in some cases. // Therefore, sometimes, NEWMV is chosen instead of NEARESTMV, NEARMV, and // ZEROMV. Here, checks are added for those cases, and the mode decisions // are corrected. if (best_mbmode.mode == NEWMV) { const MV_REFERENCE_FRAME refs[2] = { best_mbmode.ref_frame[0], best_mbmode.ref_frame[1] }; int comp_pred_mode = refs[1] > INTRA_FRAME; if (frame_mv[NEARESTMV][refs[0]].as_int == best_mbmode.mv[0].as_int && ((comp_pred_mode && frame_mv[NEARESTMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || !comp_pred_mode)) best_mbmode.mode = NEARESTMV; else if (frame_mv[NEARMV][refs[0]].as_int == best_mbmode.mv[0].as_int && ((comp_pred_mode && frame_mv[NEARMV][refs[1]].as_int == best_mbmode.mv[1].as_int) || !comp_pred_mode)) best_mbmode.mode = NEARMV; else if (best_mbmode.mv[0].as_int == 0 && ((comp_pred_mode && best_mbmode.mv[1].as_int == 0) || !comp_pred_mode)) best_mbmode.mode = ZEROMV; } if (best_mode_index < 0 || best_rd >= best_rd_so_far) { // If adaptive interp filter is enabled, then the current leaf node of 8x8 // data is needed for sub8x8. Hence preserve the context. #if CONFIG_CONSISTENT_RECODE if (bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; #else if (cpi->row_mt && bsize == BLOCK_8X8) ctx->mic = *xd->mi[0]; #endif rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } // If we used an estimate for the uv intra rd in the loop above... if (sf->use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (best_mbmode.ref_frame[0] == INTRA_FRAME) { TX_SIZE uv_tx_size; *mi = best_mbmode; uv_tx_size = get_uv_tx_size(mi, &xd->plane[1]); rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra[uv_tx_size], &rate_uv_tokenonly[uv_tx_size], &dist_uv[uv_tx_size], &skip_uv[uv_tx_size], bsize < BLOCK_8X8 ? BLOCK_8X8 : bsize, uv_tx_size); } } assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); if (!cpi->rc.is_src_frame_alt_ref) vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh, bsize, best_mode_index); // macroblock modes *mi = best_mbmode; x->skip |= best_skip2; for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; } if (!x->skip) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (best_filter_rd[i] == INT64_MAX) best_filter_diff[i] = 0; else best_filter_diff[i] = best_rd - best_filter_rd[i]; } if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); } // TODO(yunqingwang): Moving this line in front of the above best_filter_diff // updating code causes PSNR loss. Need to figure out the confliction. x->skip |= best_mode_skippable; if (!x->skip && !x->select_tx_size) { int has_high_freq_coeff = 0; int plane; int max_plane = is_inter_block(xd->mi[0]) ? MAX_MB_PLANE : 1; for (plane = 0; plane < max_plane; ++plane) { x->plane[plane].eobs = ctx->eobs_pbuf[plane][1]; has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); } for (plane = max_plane; plane < MAX_MB_PLANE; ++plane) { x->plane[plane].eobs = ctx->eobs_pbuf[plane][2]; has_high_freq_coeff |= vp9_has_high_freq_in_plane(x, bsize, plane); } best_mode_skippable |= !has_high_freq_coeff; } assert(best_mode_index >= 0); store_coding_context(x, ctx, best_mode_index, best_pred_diff, best_filter_diff, best_mode_skippable); } void vp9_rd_pick_inter_mode_sb_seg_skip(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; unsigned char segment_id = mi->segment_id; const int comp_pred = 0; int i; int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vpx_prob comp_mode_p; INTERP_FILTER best_filter = SWITCHABLE; int64_t this_rd = INT64_MAX; int rate2 = 0; const int64_t distortion2 = 0; x->skip_encode = cpi->sf.skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < MAX_REF_FRAMES; ++i) x->pred_sse[i] = INT_MAX; for (i = LAST_FRAME; i < MAX_REF_FRAMES; ++i) x->pred_mv_sad[i] = INT_MAX; rd_cost->rate = INT_MAX; assert(segfeature_active(&cm->seg, segment_id, SEG_LVL_SKIP)); mi->mode = ZEROMV; mi->uv_mode = DC_PRED; mi->ref_frame[0] = LAST_FRAME; mi->ref_frame[1] = NONE; mi->mv[0].as_int = 0; x->skip = 1; ctx->sum_y_eobs = 0; if (cm->interp_filter != BILINEAR) { best_filter = EIGHTTAP; if (cm->interp_filter == SWITCHABLE && x->source_variance >= cpi->sf.disable_filter_search_var_thresh) { int rs; int best_rs = INT_MAX; for (i = 0; i < SWITCHABLE_FILTERS; ++i) { mi->interp_filter = i; rs = vp9_get_switchable_rate(cpi, xd); if (rs < best_rs) { best_rs = rs; best_filter = mi->interp_filter; } } } } // Set the appropriate filter if (cm->interp_filter == SWITCHABLE) { mi->interp_filter = best_filter; rate2 += vp9_get_switchable_rate(cpi, xd); } else { mi->interp_filter = cm->interp_filter; } if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += vp9_cost_bit(comp_mode_p, comp_pred); // Estimate the reference frame signaling cost and add it // to the rolling cost variable. rate2 += ref_costs_single[LAST_FRAME]; this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; if (this_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == mi->interp_filter)); vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, cpi->sf.adaptive_rd_thresh, bsize, THR_ZEROMV); vp9_zero(best_pred_diff); vp9_zero(best_filter_diff); if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, MAX_MB_PLANE); store_coding_context(x, ctx, THR_ZEROMV, best_pred_diff, best_filter_diff, 0); } void vp9_rd_pick_inter_mode_sub8x8(VP9_COMP *cpi, TileDataEnc *tile_data, MACROBLOCK *x, int mi_row, int mi_col, RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far) { VP9_COMMON *const cm = &cpi->common; RD_OPT *const rd_opt = &cpi->rd; SPEED_FEATURES *const sf = &cpi->sf; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; const struct segmentation *const seg = &cm->seg; MV_REFERENCE_FRAME ref_frame, second_ref_frame; unsigned char segment_id = mi->segment_id; int comp_pred, i; int_mv frame_mv[MB_MODE_COUNT][MAX_REF_FRAMES]; struct buf_2d yv12_mb[4][MAX_MB_PLANE]; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int64_t best_rd = best_rd_so_far; int64_t best_yrd = best_rd_so_far; // FIXME(rbultje) more precise int64_t best_pred_diff[REFERENCE_MODES]; int64_t best_pred_rd[REFERENCE_MODES]; int64_t best_filter_rd[SWITCHABLE_FILTER_CONTEXTS]; int64_t best_filter_diff[SWITCHABLE_FILTER_CONTEXTS]; MODE_INFO best_mbmode; int ref_index, best_ref_index = 0; unsigned int ref_costs_single[MAX_REF_FRAMES], ref_costs_comp[MAX_REF_FRAMES]; vpx_prob comp_mode_p; INTERP_FILTER tmp_best_filter = SWITCHABLE; int rate_uv_intra, rate_uv_tokenonly; int64_t dist_uv; int skip_uv; PREDICTION_MODE mode_uv = DC_PRED; const int intra_cost_penalty = vp9_get_intra_cost_penalty(cpi, bsize, cm->base_qindex, cm->y_dc_delta_q); int_mv seg_mvs[4][MAX_REF_FRAMES]; b_mode_info best_bmodes[4]; int best_skip2 = 0; int ref_frame_skip_mask[2] = { 0 }; int64_t mask_filter = 0; int64_t filter_cache[SWITCHABLE_FILTER_CONTEXTS]; int internal_active_edge = vp9_active_edge_sb(cpi, mi_row, mi_col) && vp9_internal_image_edge(cpi); const int *const rd_thresh_freq_fact = tile_data->thresh_freq_fact[bsize]; x->skip_encode = sf->skip_encode_frame && x->q_index < QIDX_SKIP_THRESH; memset(x->zcoeff_blk[TX_4X4], 0, 4); vp9_zero(best_mbmode); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; for (i = 0; i < 4; i++) { int j; for (j = 0; j < MAX_REF_FRAMES; j++) seg_mvs[i][j].as_int = INVALID_MV; } estimate_ref_frame_costs(cm, xd, segment_id, ref_costs_single, ref_costs_comp, &comp_mode_p); for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = INT64_MAX; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = INT64_MAX; rate_uv_intra = INT_MAX; rd_cost->rate = INT_MAX; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { setup_buffer_inter(cpi, x, ref_frame, bsize, mi_row, mi_col, frame_mv[NEARESTMV], frame_mv[NEARMV], yv12_mb); } else { ref_frame_skip_mask[0] |= (1 << ref_frame); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; } frame_mv[NEWMV][ref_frame].as_int = INVALID_MV; frame_mv[ZEROMV][ref_frame].as_int = 0; } for (ref_index = 0; ref_index < MAX_REFS; ++ref_index) { int mode_excluded = 0; int64_t this_rd = INT64_MAX; int disable_skip = 0; int compmode_cost = 0; int rate2 = 0, rate_y = 0, rate_uv = 0; int64_t distortion2 = 0, distortion_y = 0, distortion_uv = 0; int skippable = 0; int i; int this_skip2 = 0; int64_t total_sse = INT_MAX; int early_term = 0; struct buf_2d backup_yv12[2][MAX_MB_PLANE]; ref_frame = vp9_ref_order[ref_index].ref_frame[0]; second_ref_frame = vp9_ref_order[ref_index].ref_frame[1]; vp9_zero(x->sum_y_eobs); #if CONFIG_BETTER_HW_COMPATIBILITY // forbid 8X4 and 4X8 partitions if any reference frame is scaled. if (bsize == BLOCK_8X4 || bsize == BLOCK_4X8) { int ref_scaled = ref_frame > INTRA_FRAME && vp9_is_scaled(&cm->frame_refs[ref_frame - 1].sf); if (second_ref_frame > INTRA_FRAME) ref_scaled += vp9_is_scaled(&cm->frame_refs[second_ref_frame - 1].sf); if (ref_scaled) continue; } #endif // Look at the reference frame of the best mode so far and set the // skip mask to look at a subset of the remaining modes. if (ref_index > 2 && sf->mode_skip_start < MAX_MODES) { if (ref_index == 3) { switch (best_mbmode.ref_frame[0]) { case INTRA_FRAME: break; case LAST_FRAME: ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << ALTREF_FRAME); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; case GOLDEN_FRAME: ref_frame_skip_mask[0] |= (1 << LAST_FRAME) | (1 << ALTREF_FRAME); ref_frame_skip_mask[1] |= SECOND_REF_FRAME_MASK; break; case ALTREF_FRAME: ref_frame_skip_mask[0] |= (1 << GOLDEN_FRAME) | (1 << LAST_FRAME); break; case NONE: case MAX_REF_FRAMES: assert(0 && "Invalid Reference frame"); break; } } } if ((ref_frame_skip_mask[0] & (1 << ref_frame)) && (ref_frame_skip_mask[1] & (1 << VPXMAX(0, second_ref_frame)))) continue; // Test best rd so far against threshold for trying this mode. if (!internal_active_edge && rd_less_than_thresh(best_rd, rd_opt->threshes[segment_id][bsize][ref_index], &rd_thresh_freq_fact[ref_index])) continue; // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test && ref_frame == INTRA_FRAME) continue; comp_pred = second_ref_frame > INTRA_FRAME; if (comp_pred) { if (!cpi->allow_comp_inter_inter) continue; if (cm->ref_frame_sign_bias[ref_frame] == cm->ref_frame_sign_bias[second_ref_frame]) continue; if (!(cpi->ref_frame_flags & flag_list[second_ref_frame])) continue; // Do not allow compound prediction if the segment level reference frame // feature is in use as in this case there can only be one reference. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) continue; if ((sf->mode_search_skip_flags & FLAG_SKIP_COMP_BESTINTRA) && best_mbmode.ref_frame[0] == INTRA_FRAME) continue; } if (comp_pred) mode_excluded = cm->reference_mode == SINGLE_REFERENCE; else if (ref_frame != INTRA_FRAME) mode_excluded = cm->reference_mode == COMPOUND_REFERENCE; // If the segment reference frame feature is enabled.... // then do nothing if the current ref frame is not allowed.. if (segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME) && get_segdata(seg, segment_id, SEG_LVL_REF_FRAME) != (int)ref_frame) { continue; // Disable this drop out case if the ref frame // segment level feature is enabled for this segment. This is to // prevent the possibility that we end up unable to pick any mode. } else if (!segfeature_active(seg, segment_id, SEG_LVL_REF_FRAME)) { // Only consider ZEROMV/ALTREF_FRAME for alt ref frame, // unless ARNR filtering is enabled in which case we want // an unfiltered alternative. We allow near/nearest as well // because they may result in zero-zero MVs but be cheaper. if (cpi->rc.is_src_frame_alt_ref && (cpi->oxcf.arnr_max_frames == 0)) continue; } mi->tx_size = TX_4X4; mi->uv_mode = DC_PRED; mi->ref_frame[0] = ref_frame; mi->ref_frame[1] = second_ref_frame; // Evaluate all sub-pel filters irrespective of whether we can use // them for this frame. mi->interp_filter = cm->interp_filter == SWITCHABLE ? EIGHTTAP : cm->interp_filter; x->skip = 0; set_ref_ptrs(cm, xd, ref_frame, second_ref_frame); // Select prediction reference frames. for (i = 0; i < MAX_MB_PLANE; i++) { xd->plane[i].pre[0] = yv12_mb[ref_frame][i]; if (comp_pred) xd->plane[i].pre[1] = yv12_mb[second_ref_frame][i]; } if (ref_frame == INTRA_FRAME) { int rate; if (rd_pick_intra_sub_8x8_y_mode(cpi, x, &rate, &rate_y, &distortion_y, best_rd) >= best_rd) continue; rate2 += rate; rate2 += intra_cost_penalty; distortion2 += distortion_y; if (rate_uv_intra == INT_MAX) { choose_intra_uv_mode(cpi, x, ctx, bsize, TX_4X4, &rate_uv_intra, &rate_uv_tokenonly, &dist_uv, &skip_uv, &mode_uv); } rate2 += rate_uv_intra; rate_uv = rate_uv_tokenonly; distortion2 += dist_uv; distortion_uv = dist_uv; mi->uv_mode = mode_uv; } else { int rate; int64_t distortion; int64_t this_rd_thresh; int64_t tmp_rd, tmp_best_rd = INT64_MAX, tmp_best_rdu = INT64_MAX; int tmp_best_rate = INT_MAX, tmp_best_ratey = INT_MAX; int64_t tmp_best_distortion = INT_MAX, tmp_best_sse, uv_sse; int tmp_best_skippable = 0; int switchable_filter_index; int_mv *second_ref = comp_pred ? &x->mbmi_ext->ref_mvs[second_ref_frame][0] : NULL; b_mode_info tmp_best_bmodes[16]; MODE_INFO tmp_best_mbmode; BEST_SEG_INFO bsi[SWITCHABLE_FILTERS]; int pred_exists = 0; int uv_skippable; YV12_BUFFER_CONFIG *scaled_ref_frame[2] = { NULL, NULL }; int ref; for (ref = 0; ref < 2; ++ref) { scaled_ref_frame[ref] = mi->ref_frame[ref] > INTRA_FRAME ? vp9_get_scaled_ref_frame(cpi, mi->ref_frame[ref]) : NULL; if (scaled_ref_frame[ref]) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[ref][i] = xd->plane[i].pre[ref]; vp9_setup_pre_planes(xd, ref, scaled_ref_frame[ref], mi_row, mi_col, NULL); } } this_rd_thresh = (ref_frame == LAST_FRAME) ? rd_opt->threshes[segment_id][bsize][THR_LAST] : rd_opt->threshes[segment_id][bsize][THR_ALTR]; this_rd_thresh = (ref_frame == GOLDEN_FRAME) ? rd_opt->threshes[segment_id][bsize][THR_GOLD] : this_rd_thresh; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; ++i) filter_cache[i] = INT64_MAX; if (cm->interp_filter != BILINEAR) { tmp_best_filter = EIGHTTAP; if (x->source_variance < sf->disable_filter_search_var_thresh) { tmp_best_filter = EIGHTTAP; } else if (sf->adaptive_pred_interp_filter == 1 && ctx->pred_interp_filter < SWITCHABLE) { tmp_best_filter = ctx->pred_interp_filter; } else if (sf->adaptive_pred_interp_filter == 2) { tmp_best_filter = ctx->pred_interp_filter < SWITCHABLE ? ctx->pred_interp_filter : 0; } else { for (switchable_filter_index = 0; switchable_filter_index < SWITCHABLE_FILTERS; ++switchable_filter_index) { int newbest, rs; int64_t rs_rd; MB_MODE_INFO_EXT *mbmi_ext = x->mbmi_ext; mi->interp_filter = switchable_filter_index; tmp_rd = rd_pick_best_sub8x8_mode( cpi, x, &mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, &rate_y, &distortion, &skippable, &total_sse, (int)this_rd_thresh, seg_mvs, bsi, switchable_filter_index, mi_row, mi_col); if (tmp_rd == INT64_MAX) continue; rs = vp9_get_switchable_rate(cpi, xd); rs_rd = RDCOST(x->rdmult, x->rddiv, rs, 0); filter_cache[switchable_filter_index] = tmp_rd; filter_cache[SWITCHABLE_FILTERS] = VPXMIN(filter_cache[SWITCHABLE_FILTERS], tmp_rd + rs_rd); if (cm->interp_filter == SWITCHABLE) tmp_rd += rs_rd; mask_filter = VPXMAX(mask_filter, tmp_rd); newbest = (tmp_rd < tmp_best_rd); if (newbest) { tmp_best_filter = mi->interp_filter; tmp_best_rd = tmp_rd; } if ((newbest && cm->interp_filter == SWITCHABLE) || (mi->interp_filter == cm->interp_filter && cm->interp_filter != SWITCHABLE)) { tmp_best_rdu = tmp_rd; tmp_best_rate = rate; tmp_best_ratey = rate_y; tmp_best_distortion = distortion; tmp_best_sse = total_sse; tmp_best_skippable = skippable; tmp_best_mbmode = *mi; for (i = 0; i < 4; i++) { tmp_best_bmodes[i] = xd->mi[0]->bmi[i]; x->zcoeff_blk[TX_4X4][i] = !x->plane[0].eobs[i]; x->sum_y_eobs[TX_4X4] += x->plane[0].eobs[i]; } pred_exists = 1; if (switchable_filter_index == 0 && sf->use_rd_breakout && best_rd < INT64_MAX) { if (tmp_best_rdu / 2 > best_rd) { // skip searching the other filters if the first is // already substantially larger than the best so far tmp_best_filter = mi->interp_filter; tmp_best_rdu = INT64_MAX; break; } } } } // switchable_filter_index loop } } if (tmp_best_rdu == INT64_MAX && pred_exists) continue; mi->interp_filter = (cm->interp_filter == SWITCHABLE ? tmp_best_filter : cm->interp_filter); if (!pred_exists) { // Handles the special case when a filter that is not in the // switchable list (bilinear, 6-tap) is indicated at the frame level tmp_rd = rd_pick_best_sub8x8_mode( cpi, x, &x->mbmi_ext->ref_mvs[ref_frame][0], second_ref, best_yrd, &rate, &rate_y, &distortion, &skippable, &total_sse, (int)this_rd_thresh, seg_mvs, bsi, 0, mi_row, mi_col); if (tmp_rd == INT64_MAX) continue; } else { total_sse = tmp_best_sse; rate = tmp_best_rate; rate_y = tmp_best_ratey; distortion = tmp_best_distortion; skippable = tmp_best_skippable; *mi = tmp_best_mbmode; for (i = 0; i < 4; i++) xd->mi[0]->bmi[i] = tmp_best_bmodes[i]; } rate2 += rate; distortion2 += distortion; if (cm->interp_filter == SWITCHABLE) rate2 += vp9_get_switchable_rate(cpi, xd); if (!mode_excluded) mode_excluded = comp_pred ? cm->reference_mode == SINGLE_REFERENCE : cm->reference_mode == COMPOUND_REFERENCE; compmode_cost = vp9_cost_bit(comp_mode_p, comp_pred); tmp_best_rdu = best_rd - VPXMIN(RDCOST(x->rdmult, x->rddiv, rate2, distortion2), RDCOST(x->rdmult, x->rddiv, 0, total_sse)); if (tmp_best_rdu > 0) { // If even the 'Y' rd value of split is higher than best so far // then dont bother looking at UV vp9_build_inter_predictors_sbuv(&x->e_mbd, mi_row, mi_col, BLOCK_8X8); memset(x->skip_txfm, SKIP_TXFM_NONE, sizeof(x->skip_txfm)); if (!super_block_uvrd(cpi, x, &rate_uv, &distortion_uv, &uv_skippable, &uv_sse, BLOCK_8X8, tmp_best_rdu)) { for (ref = 0; ref < 2; ++ref) { if (scaled_ref_frame[ref]) { int i; for (i = 0; i < MAX_MB_PLANE; ++i) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } } continue; } rate2 += rate_uv; distortion2 += distortion_uv; skippable = skippable && uv_skippable; total_sse += uv_sse; } for (ref = 0; ref < 2; ++ref) { if (scaled_ref_frame[ref]) { // Restore the prediction frame pointers to their unscaled versions. int i; for (i = 0; i < MAX_MB_PLANE; ++i) xd->plane[i].pre[ref] = backup_yv12[ref][i]; } } } if (cm->reference_mode == REFERENCE_MODE_SELECT) rate2 += compmode_cost; // Estimate the reference frame signaling cost and add it // to the rolling cost variable. if (second_ref_frame > INTRA_FRAME) { rate2 += ref_costs_comp[ref_frame]; } else { rate2 += ref_costs_single[ref_frame]; } if (!disable_skip) { const vpx_prob skip_prob = vp9_get_skip_prob(cm, xd); const int skip_cost0 = vp9_cost_bit(skip_prob, 0); const int skip_cost1 = vp9_cost_bit(skip_prob, 1); // Skip is never coded at the segment level for sub8x8 blocks and instead // always coded in the bitstream at the mode info level. if (ref_frame != INTRA_FRAME && !xd->lossless) { if (RDCOST(x->rdmult, x->rddiv, rate_y + rate_uv + skip_cost0, distortion2) < RDCOST(x->rdmult, x->rddiv, skip_cost1, total_sse)) { // Add in the cost of the no skip flag. rate2 += skip_cost0; } else { // FIXME(rbultje) make this work for splitmv also rate2 += skip_cost1; distortion2 = total_sse; assert(total_sse >= 0); rate2 -= (rate_y + rate_uv); rate_y = 0; rate_uv = 0; this_skip2 = 1; } } else { // Add in the cost of the no skip flag. rate2 += skip_cost0; } // Calculate the final RD estimate for this mode. this_rd = RDCOST(x->rdmult, x->rddiv, rate2, distortion2); } if (!disable_skip && ref_frame == INTRA_FRAME) { for (i = 0; i < REFERENCE_MODES; ++i) best_pred_rd[i] = VPXMIN(best_pred_rd[i], this_rd); for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) best_filter_rd[i] = VPXMIN(best_filter_rd[i], this_rd); } // Did this mode help.. i.e. is it the new best mode if (this_rd < best_rd || x->skip) { if (!mode_excluded) { int max_plane = MAX_MB_PLANE; // Note index of best mode so far best_ref_index = ref_index; if (ref_frame == INTRA_FRAME) { /* required for left and above block mv */ mi->mv[0].as_int = 0; max_plane = 1; // Initialize interp_filter here so we do not have to check for // inter block modes in get_pred_context_switchable_interp() mi->interp_filter = SWITCHABLE_FILTERS; } rd_cost->rate = rate2; rd_cost->dist = distortion2; rd_cost->rdcost = this_rd; best_rd = this_rd; best_yrd = best_rd - RDCOST(x->rdmult, x->rddiv, rate_uv, distortion_uv); best_mbmode = *mi; best_skip2 = this_skip2; if (!x->select_tx_size) swap_block_ptr(x, ctx, 1, 0, 0, max_plane); memcpy(ctx->zcoeff_blk, x->zcoeff_blk[TX_4X4], sizeof(ctx->zcoeff_blk[0]) * ctx->num_4x4_blk); ctx->sum_y_eobs = x->sum_y_eobs[TX_4X4]; for (i = 0; i < 4; i++) best_bmodes[i] = xd->mi[0]->bmi[i]; // TODO(debargha): enhance this test with a better distortion prediction // based on qp, activity mask and history if ((sf->mode_search_skip_flags & FLAG_EARLY_TERMINATE) && (ref_index > MIN_EARLY_TERM_INDEX)) { int qstep = xd->plane[0].dequant[1]; // TODO(debargha): Enhance this by specializing for each mode_index int scale = 4; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { qstep >>= (xd->bd - 8); } #endif // CONFIG_VP9_HIGHBITDEPTH if (x->source_variance < UINT_MAX) { const int var_adjust = (x->source_variance < 16); scale -= var_adjust; } if (ref_frame > INTRA_FRAME && distortion2 * scale < qstep * qstep) { early_term = 1; } } } } /* keep record of best compound/single-only prediction */ if (!disable_skip && ref_frame != INTRA_FRAME) { int64_t single_rd, hybrid_rd, single_rate, hybrid_rate; if (cm->reference_mode == REFERENCE_MODE_SELECT) { single_rate = rate2 - compmode_cost; hybrid_rate = rate2; } else { single_rate = rate2; hybrid_rate = rate2 + compmode_cost; } single_rd = RDCOST(x->rdmult, x->rddiv, single_rate, distortion2); hybrid_rd = RDCOST(x->rdmult, x->rddiv, hybrid_rate, distortion2); if (!comp_pred && single_rd < best_pred_rd[SINGLE_REFERENCE]) best_pred_rd[SINGLE_REFERENCE] = single_rd; else if (comp_pred && single_rd < best_pred_rd[COMPOUND_REFERENCE]) best_pred_rd[COMPOUND_REFERENCE] = single_rd; if (hybrid_rd < best_pred_rd[REFERENCE_MODE_SELECT]) best_pred_rd[REFERENCE_MODE_SELECT] = hybrid_rd; } /* keep record of best filter type */ if (!mode_excluded && !disable_skip && ref_frame != INTRA_FRAME && cm->interp_filter != BILINEAR) { int64_t ref = filter_cache[cm->interp_filter == SWITCHABLE ? SWITCHABLE_FILTERS : cm->interp_filter]; int64_t adj_rd; for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (ref == INT64_MAX) adj_rd = 0; else if (filter_cache[i] == INT64_MAX) // when early termination is triggered, the encoder does not have // access to the rate-distortion cost. it only knows that the cost // should be above the maximum valid value. hence it takes the known // maximum plus an arbitrary constant as the rate-distortion cost. adj_rd = mask_filter - ref + 10; else adj_rd = filter_cache[i] - ref; adj_rd += this_rd; best_filter_rd[i] = VPXMIN(best_filter_rd[i], adj_rd); } } if (early_term) break; if (x->skip && !comp_pred) break; } if (best_rd >= best_rd_so_far) { rd_cost->rate = INT_MAX; rd_cost->rdcost = INT64_MAX; return; } // If we used an estimate for the uv intra rd in the loop above... if (sf->use_uv_intra_rd_estimate) { // Do Intra UV best rd mode selection if best mode choice above was intra. if (best_mbmode.ref_frame[0] == INTRA_FRAME) { *mi = best_mbmode; rd_pick_intra_sbuv_mode(cpi, x, ctx, &rate_uv_intra, &rate_uv_tokenonly, &dist_uv, &skip_uv, BLOCK_8X8, TX_4X4); } } if (best_rd == INT64_MAX) { rd_cost->rate = INT_MAX; rd_cost->dist = INT64_MAX; rd_cost->rdcost = INT64_MAX; return; } assert((cm->interp_filter == SWITCHABLE) || (cm->interp_filter == best_mbmode.interp_filter) || !is_inter_block(&best_mbmode)); vp9_update_rd_thresh_fact(tile_data->thresh_freq_fact, sf->adaptive_rd_thresh, bsize, best_ref_index); // macroblock modes *mi = best_mbmode; x->skip |= best_skip2; if (!is_inter_block(&best_mbmode)) { for (i = 0; i < 4; i++) xd->mi[0]->bmi[i].as_mode = best_bmodes[i].as_mode; } else { for (i = 0; i < 4; ++i) memcpy(&xd->mi[0]->bmi[i], &best_bmodes[i], sizeof(b_mode_info)); mi->mv[0].as_int = xd->mi[0]->bmi[3].as_mv[0].as_int; mi->mv[1].as_int = xd->mi[0]->bmi[3].as_mv[1].as_int; } for (i = 0; i < REFERENCE_MODES; ++i) { if (best_pred_rd[i] == INT64_MAX) best_pred_diff[i] = INT_MIN; else best_pred_diff[i] = best_rd - best_pred_rd[i]; } if (!x->skip) { for (i = 0; i < SWITCHABLE_FILTER_CONTEXTS; i++) { if (best_filter_rd[i] == INT64_MAX) best_filter_diff[i] = 0; else best_filter_diff[i] = best_rd - best_filter_rd[i]; } if (cm->interp_filter == SWITCHABLE) assert(best_filter_diff[SWITCHABLE_FILTERS] == 0); } else { vp9_zero(best_filter_diff); } store_coding_context(x, ctx, best_ref_index, best_pred_diff, best_filter_diff, 0); } #endif // !CONFIG_REALTIME_ONLY libvpx-1.8.2/vp9/encoder/vp9_rdopt.h000066400000000000000000000044141357355204000172710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_RDOPT_H_ #define VPX_VP9_ENCODER_VP9_RDOPT_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_context_tree.h" #ifdef __cplusplus extern "C" { #endif struct TileInfo; struct VP9_COMP; struct macroblock; struct RD_COST; void vp9_rd_pick_intra_mode_sb(struct VP9_COMP *cpi, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd); #if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sb(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, int mi_row, int mi_col, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); void vp9_rd_pick_inter_mode_sb_seg_skip( struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); #endif int vp9_internal_image_edge(struct VP9_COMP *cpi); int vp9_active_h_edge(struct VP9_COMP *cpi, int mi_row, int mi_step); int vp9_active_v_edge(struct VP9_COMP *cpi, int mi_col, int mi_step); int vp9_active_edge_sb(struct VP9_COMP *cpi, int mi_row, int mi_col); #if !CONFIG_REALTIME_ONLY void vp9_rd_pick_inter_mode_sub8x8(struct VP9_COMP *cpi, struct TileDataEnc *tile_data, struct macroblock *x, int mi_row, int mi_col, struct RD_COST *rd_cost, BLOCK_SIZE bsize, PICK_MODE_CONTEXT *ctx, int64_t best_rd_so_far); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_RDOPT_H_ libvpx-1.8.2/vp9/encoder/vp9_resize.c000066400000000000000000000772411357355204000174450ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "./vpx_config.h" #if CONFIG_VP9_HIGHBITDEPTH #include "vpx_dsp/vpx_dsp_common.h" #endif // CONFIG_VP9_HIGHBITDEPTH #include "vpx_ports/mem.h" #include "vp9/common/vp9_common.h" #include "vp9/encoder/vp9_resize.h" #define FILTER_BITS 7 #define INTERP_TAPS 8 #define SUBPEL_BITS 5 #define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) #define INTERP_PRECISION_BITS 32 typedef int16_t interp_kernel[INTERP_TAPS]; // Filters for interpolation (0.5-band) - note this also filters integer pels. static const interp_kernel filteredinterp_filters500[(1 << SUBPEL_BITS)] = { { -3, 0, 35, 64, 35, 0, -3, 0 }, { -3, -1, 34, 64, 36, 1, -3, 0 }, { -3, -1, 32, 64, 38, 1, -3, 0 }, { -2, -2, 31, 63, 39, 2, -3, 0 }, { -2, -2, 29, 63, 41, 2, -3, 0 }, { -2, -2, 28, 63, 42, 3, -4, 0 }, { -2, -3, 27, 63, 43, 4, -4, 0 }, { -2, -3, 25, 62, 45, 5, -4, 0 }, { -2, -3, 24, 62, 46, 5, -4, 0 }, { -2, -3, 23, 61, 47, 6, -4, 0 }, { -2, -3, 21, 60, 49, 7, -4, 0 }, { -1, -4, 20, 60, 50, 8, -4, -1 }, { -1, -4, 19, 59, 51, 9, -4, -1 }, { -1, -4, 17, 58, 52, 10, -4, 0 }, { -1, -4, 16, 57, 53, 12, -4, -1 }, { -1, -4, 15, 56, 54, 13, -4, -1 }, { -1, -4, 14, 55, 55, 14, -4, -1 }, { -1, -4, 13, 54, 56, 15, -4, -1 }, { -1, -4, 12, 53, 57, 16, -4, -1 }, { 0, -4, 10, 52, 58, 17, -4, -1 }, { -1, -4, 9, 51, 59, 19, -4, -1 }, { -1, -4, 8, 50, 60, 20, -4, -1 }, { 0, -4, 7, 49, 60, 21, -3, -2 }, { 0, -4, 6, 47, 61, 23, -3, -2 }, { 0, -4, 5, 46, 62, 24, -3, -2 }, { 0, -4, 5, 45, 62, 25, -3, -2 }, { 0, -4, 4, 43, 63, 27, -3, -2 }, { 0, -4, 3, 42, 63, 28, -2, -2 }, { 0, -3, 2, 41, 63, 29, -2, -2 }, { 0, -3, 2, 39, 63, 31, -2, -2 }, { 0, -3, 1, 38, 64, 32, -1, -3 }, { 0, -3, 1, 36, 64, 34, -1, -3 } }; // Filters for interpolation (0.625-band) - note this also filters integer pels. static const interp_kernel filteredinterp_filters625[(1 << SUBPEL_BITS)] = { { -1, -8, 33, 80, 33, -8, -1, 0 }, { -1, -8, 30, 80, 35, -8, -1, 1 }, { -1, -8, 28, 80, 37, -7, -2, 1 }, { 0, -8, 26, 79, 39, -7, -2, 1 }, { 0, -8, 24, 79, 41, -7, -2, 1 }, { 0, -8, 22, 78, 43, -6, -2, 1 }, { 0, -8, 20, 78, 45, -5, -3, 1 }, { 0, -8, 18, 77, 48, -5, -3, 1 }, { 0, -8, 16, 76, 50, -4, -3, 1 }, { 0, -8, 15, 75, 52, -3, -4, 1 }, { 0, -7, 13, 74, 54, -3, -4, 1 }, { 0, -7, 11, 73, 56, -2, -4, 1 }, { 0, -7, 10, 71, 58, -1, -4, 1 }, { 1, -7, 8, 70, 60, 0, -5, 1 }, { 1, -6, 6, 68, 62, 1, -5, 1 }, { 1, -6, 5, 67, 63, 2, -5, 1 }, { 1, -6, 4, 65, 65, 4, -6, 1 }, { 1, -5, 2, 63, 67, 5, -6, 1 }, { 1, -5, 1, 62, 68, 6, -6, 1 }, { 1, -5, 0, 60, 70, 8, -7, 1 }, { 1, -4, -1, 58, 71, 10, -7, 0 }, { 1, -4, -2, 56, 73, 11, -7, 0 }, { 1, -4, -3, 54, 74, 13, -7, 0 }, { 1, -4, -3, 52, 75, 15, -8, 0 }, { 1, -3, -4, 50, 76, 16, -8, 0 }, { 1, -3, -5, 48, 77, 18, -8, 0 }, { 1, -3, -5, 45, 78, 20, -8, 0 }, { 1, -2, -6, 43, 78, 22, -8, 0 }, { 1, -2, -7, 41, 79, 24, -8, 0 }, { 1, -2, -7, 39, 79, 26, -8, 0 }, { 1, -2, -7, 37, 80, 28, -8, -1 }, { 1, -1, -8, 35, 80, 30, -8, -1 }, }; // Filters for interpolation (0.75-band) - note this also filters integer pels. static const interp_kernel filteredinterp_filters750[(1 << SUBPEL_BITS)] = { { 2, -11, 25, 96, 25, -11, 2, 0 }, { 2, -11, 22, 96, 28, -11, 2, 0 }, { 2, -10, 19, 95, 31, -11, 2, 0 }, { 2, -10, 17, 95, 34, -12, 2, 0 }, { 2, -9, 14, 94, 37, -12, 2, 0 }, { 2, -8, 12, 93, 40, -12, 1, 0 }, { 2, -8, 9, 92, 43, -12, 1, 1 }, { 2, -7, 7, 91, 46, -12, 1, 0 }, { 2, -7, 5, 90, 49, -12, 1, 0 }, { 2, -6, 3, 88, 52, -12, 0, 1 }, { 2, -5, 1, 86, 55, -12, 0, 1 }, { 2, -5, -1, 84, 58, -11, 0, 1 }, { 2, -4, -2, 82, 61, -11, -1, 1 }, { 2, -4, -4, 80, 64, -10, -1, 1 }, { 1, -3, -5, 77, 67, -9, -1, 1 }, { 1, -3, -6, 75, 70, -8, -2, 1 }, { 1, -2, -7, 72, 72, -7, -2, 1 }, { 1, -2, -8, 70, 75, -6, -3, 1 }, { 1, -1, -9, 67, 77, -5, -3, 1 }, { 1, -1, -10, 64, 80, -4, -4, 2 }, { 1, -1, -11, 61, 82, -2, -4, 2 }, { 1, 0, -11, 58, 84, -1, -5, 2 }, { 1, 0, -12, 55, 86, 1, -5, 2 }, { 1, 0, -12, 52, 88, 3, -6, 2 }, { 0, 1, -12, 49, 90, 5, -7, 2 }, { 0, 1, -12, 46, 91, 7, -7, 2 }, { 1, 1, -12, 43, 92, 9, -8, 2 }, { 0, 1, -12, 40, 93, 12, -8, 2 }, { 0, 2, -12, 37, 94, 14, -9, 2 }, { 0, 2, -12, 34, 95, 17, -10, 2 }, { 0, 2, -11, 31, 95, 19, -10, 2 }, { 0, 2, -11, 28, 96, 22, -11, 2 } }; // Filters for interpolation (0.875-band) - note this also filters integer pels. static const interp_kernel filteredinterp_filters875[(1 << SUBPEL_BITS)] = { { 3, -8, 13, 112, 13, -8, 3, 0 }, { 3, -7, 10, 112, 17, -9, 3, -1 }, { 2, -6, 7, 111, 21, -9, 3, -1 }, { 2, -5, 4, 111, 24, -10, 3, -1 }, { 2, -4, 1, 110, 28, -11, 3, -1 }, { 1, -3, -1, 108, 32, -12, 4, -1 }, { 1, -2, -3, 106, 36, -13, 4, -1 }, { 1, -1, -6, 105, 40, -14, 4, -1 }, { 1, -1, -7, 102, 44, -14, 4, -1 }, { 1, 0, -9, 100, 48, -15, 4, -1 }, { 1, 1, -11, 97, 53, -16, 4, -1 }, { 0, 1, -12, 95, 57, -16, 4, -1 }, { 0, 2, -13, 91, 61, -16, 4, -1 }, { 0, 2, -14, 88, 65, -16, 4, -1 }, { 0, 3, -15, 84, 69, -17, 4, 0 }, { 0, 3, -16, 81, 73, -16, 3, 0 }, { 0, 3, -16, 77, 77, -16, 3, 0 }, { 0, 3, -16, 73, 81, -16, 3, 0 }, { 0, 4, -17, 69, 84, -15, 3, 0 }, { -1, 4, -16, 65, 88, -14, 2, 0 }, { -1, 4, -16, 61, 91, -13, 2, 0 }, { -1, 4, -16, 57, 95, -12, 1, 0 }, { -1, 4, -16, 53, 97, -11, 1, 1 }, { -1, 4, -15, 48, 100, -9, 0, 1 }, { -1, 4, -14, 44, 102, -7, -1, 1 }, { -1, 4, -14, 40, 105, -6, -1, 1 }, { -1, 4, -13, 36, 106, -3, -2, 1 }, { -1, 4, -12, 32, 108, -1, -3, 1 }, { -1, 3, -11, 28, 110, 1, -4, 2 }, { -1, 3, -10, 24, 111, 4, -5, 2 }, { -1, 3, -9, 21, 111, 7, -6, 2 }, { -1, 3, -9, 17, 112, 10, -7, 3 } }; // Filters for interpolation (full-band) - no filtering for integer pixels static const interp_kernel filteredinterp_filters1000[(1 << SUBPEL_BITS)] = { { 0, 0, 0, 128, 0, 0, 0, 0 }, { 0, 1, -3, 128, 3, -1, 0, 0 }, { -1, 2, -6, 127, 7, -2, 1, 0 }, { -1, 3, -9, 126, 12, -4, 1, 0 }, { -1, 4, -12, 125, 16, -5, 1, 0 }, { -1, 4, -14, 123, 20, -6, 2, 0 }, { -1, 5, -15, 120, 25, -8, 2, 0 }, { -1, 5, -17, 118, 30, -9, 3, -1 }, { -1, 6, -18, 114, 35, -10, 3, -1 }, { -1, 6, -19, 111, 41, -12, 3, -1 }, { -1, 6, -20, 107, 46, -13, 4, -1 }, { -1, 6, -21, 103, 52, -14, 4, -1 }, { -1, 6, -21, 99, 57, -16, 5, -1 }, { -1, 6, -21, 94, 63, -17, 5, -1 }, { -1, 6, -20, 89, 68, -18, 5, -1 }, { -1, 6, -20, 84, 73, -19, 6, -1 }, { -1, 6, -20, 79, 79, -20, 6, -1 }, { -1, 6, -19, 73, 84, -20, 6, -1 }, { -1, 5, -18, 68, 89, -20, 6, -1 }, { -1, 5, -17, 63, 94, -21, 6, -1 }, { -1, 5, -16, 57, 99, -21, 6, -1 }, { -1, 4, -14, 52, 103, -21, 6, -1 }, { -1, 4, -13, 46, 107, -20, 6, -1 }, { -1, 3, -12, 41, 111, -19, 6, -1 }, { -1, 3, -10, 35, 114, -18, 6, -1 }, { -1, 3, -9, 30, 118, -17, 5, -1 }, { 0, 2, -8, 25, 120, -15, 5, -1 }, { 0, 2, -6, 20, 123, -14, 4, -1 }, { 0, 1, -5, 16, 125, -12, 4, -1 }, { 0, 1, -4, 12, 126, -9, 3, -1 }, { 0, 1, -2, 7, 127, -6, 2, -1 }, { 0, 0, -1, 3, 128, -3, 1, 0 } }; // Filters for factor of 2 downsampling. static const int16_t vp9_down2_symeven_half_filter[] = { 56, 12, -3, -1 }; static const int16_t vp9_down2_symodd_half_filter[] = { 64, 35, 0, -3 }; static const interp_kernel *choose_interp_filter(int inlength, int outlength) { int outlength16 = outlength * 16; if (outlength16 >= inlength * 16) return filteredinterp_filters1000; else if (outlength16 >= inlength * 13) return filteredinterp_filters875; else if (outlength16 >= inlength * 11) return filteredinterp_filters750; else if (outlength16 >= inlength * 9) return filteredinterp_filters625; else return filteredinterp_filters500; } static void interpolate(const uint8_t *const input, int inlength, uint8_t *output, int outlength) { const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) / outlength; const int64_t offset = inlength > outlength ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength; uint8_t *optr = output; int x, x1, x2, sum, k, int_pel, sub_pel; int64_t y; const interp_kernel *interp_filters = choose_interp_filter(inlength, outlength); x = 0; y = offset; while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { x++; y += delta; } x1 = x; x = outlength - 1; y = delta * x + offset; while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >= inlength) { x--; y -= delta; } x2 = x; if (x1 > x2) { for (x = 0, y = offset; x < outlength; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) { const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; sum += filter[k] * input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; } *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } } else { // Initial part. for (x = 0, y = offset; x < x1; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? 0 : int_pel - INTERP_TAPS / 2 + 1 + k)]; *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } // Middle part. for (; x <= x2; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } // End part. for (; x < outlength; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength ? inlength - 1 : int_pel - INTERP_TAPS / 2 + 1 + k)]; *optr++ = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } } } static void down2_symeven(const uint8_t *const input, int length, uint8_t *output) { // Actual filter len = 2 * filter_len_half. const int16_t *filter = vp9_down2_symeven_half_filter; const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; int i, j; uint8_t *optr = output; int l1 = filter_len_half; int l2 = (length - filter_len_half); l1 += (l1 & 1); l2 += (l2 & 1); if (l1 > l2) { // Short input length. for (i = 0; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } } else { // Initial part. for (i = 0; i < l1; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } // Middle part. for (; i < l2; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[i - j] + input[i + 1 + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } // End part. for (; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[i - j] + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } } } static void down2_symodd(const uint8_t *const input, int length, uint8_t *output) { // Actual filter len = 2 * filter_len_half - 1. const int16_t *filter = vp9_down2_symodd_half_filter; const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; int i, j; uint8_t *optr = output; int l1 = filter_len_half - 1; int l2 = (length - filter_len_half + 1); l1 += (l1 & 1); l2 += (l2 & 1); if (l1 > l2) { // Short input length. for (i = 0; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[(i + j >= length ? length - 1 : i + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } } else { // Initial part. for (i = 0; i < l1; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } // Middle part. for (; i < l2; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[i - j] + input[i + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } // End part. for (; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel(sum); } } } static int get_down2_length(int length, int steps) { int s; for (s = 0; s < steps; ++s) length = (length + 1) >> 1; return length; } static int get_down2_steps(int in_length, int out_length) { int steps = 0; int proj_in_length; while ((proj_in_length = get_down2_length(in_length, 1)) >= out_length) { ++steps; in_length = proj_in_length; } return steps; } static void resize_multistep(const uint8_t *const input, int length, uint8_t *output, int olength, uint8_t *otmp) { int steps; if (length == olength) { memcpy(output, input, sizeof(output[0]) * length); return; } steps = get_down2_steps(length, olength); if (steps > 0) { int s; uint8_t *out = NULL; uint8_t *otmp2; int filteredlength = length; assert(otmp != NULL); otmp2 = otmp + get_down2_length(length, 1); for (s = 0; s < steps; ++s) { const int proj_filteredlength = get_down2_length(filteredlength, 1); const uint8_t *const in = (s == 0 ? input : out); if (s == steps - 1 && proj_filteredlength == olength) out = output; else out = (s & 1 ? otmp2 : otmp); if (filteredlength & 1) down2_symodd(in, filteredlength, out); else down2_symeven(in, filteredlength, out); filteredlength = proj_filteredlength; } if (filteredlength != olength) { interpolate(out, filteredlength, output, olength); } } else { interpolate(input, length, output, olength); } } static void fill_col_to_arr(uint8_t *img, int stride, int len, uint8_t *arr) { int i; uint8_t *iptr = img; uint8_t *aptr = arr; for (i = 0; i < len; ++i, iptr += stride) { *aptr++ = *iptr; } } static void fill_arr_to_col(uint8_t *img, int stride, int len, uint8_t *arr) { int i; uint8_t *iptr = img; uint8_t *aptr = arr; for (i = 0; i < len; ++i, iptr += stride) { *iptr = *aptr++; } } void vp9_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride) { int i; uint8_t *intbuf = (uint8_t *)calloc(width2 * height, sizeof(*intbuf)); uint8_t *tmpbuf = (uint8_t *)calloc(width < height ? height : width, sizeof(*tmpbuf)); uint8_t *arrbuf = (uint8_t *)calloc(height, sizeof(*arrbuf)); uint8_t *arrbuf2 = (uint8_t *)calloc(height2, sizeof(*arrbuf2)); if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; assert(width > 0); assert(height > 0); assert(width2 > 0); assert(height2 > 0); for (i = 0; i < height; ++i) resize_multistep(input + in_stride * i, width, intbuf + width2 * i, width2, tmpbuf); for (i = 0; i < width2; ++i) { fill_col_to_arr(intbuf + i, width2, height, arrbuf); resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf); fill_arr_to_col(output + i, out_stride, height2, arrbuf2); } Error: free(intbuf); free(tmpbuf); free(arrbuf); free(arrbuf2); } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_interpolate(const uint16_t *const input, int inlength, uint16_t *output, int outlength, int bd) { const int64_t delta = (((uint64_t)inlength << 32) + outlength / 2) / outlength; const int64_t offset = inlength > outlength ? (((int64_t)(inlength - outlength) << 31) + outlength / 2) / outlength : -(((int64_t)(outlength - inlength) << 31) + outlength / 2) / outlength; uint16_t *optr = output; int x, x1, x2, sum, k, int_pel, sub_pel; int64_t y; const interp_kernel *interp_filters = choose_interp_filter(inlength, outlength); x = 0; y = offset; while ((y >> INTERP_PRECISION_BITS) < (INTERP_TAPS / 2 - 1)) { x++; y += delta; } x1 = x; x = outlength - 1; y = delta * x + offset; while ((y >> INTERP_PRECISION_BITS) + (int64_t)(INTERP_TAPS / 2) >= inlength) { x--; y -= delta; } x2 = x; if (x1 > x2) { for (x = 0, y = offset; x < outlength; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) { const int pk = int_pel - INTERP_TAPS / 2 + 1 + k; sum += filter[k] * input[(pk < 0 ? 0 : (pk >= inlength ? inlength - 1 : pk))]; } *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); } } else { // Initial part. for (x = 0, y = offset; x < x1; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) { assert(int_pel - INTERP_TAPS / 2 + 1 + k < inlength); sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k < 0 ? 0 : int_pel - INTERP_TAPS / 2 + 1 + k)]; } *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); } // Middle part. for (; x <= x2; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[int_pel - INTERP_TAPS / 2 + 1 + k]; *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); } // End part. for (; x < outlength; ++x, y += delta) { const int16_t *filter; int_pel = y >> INTERP_PRECISION_BITS; sub_pel = (y >> (INTERP_PRECISION_BITS - SUBPEL_BITS)) & SUBPEL_MASK; filter = interp_filters[sub_pel]; sum = 0; for (k = 0; k < INTERP_TAPS; ++k) sum += filter[k] * input[(int_pel - INTERP_TAPS / 2 + 1 + k >= inlength ? inlength - 1 : int_pel - INTERP_TAPS / 2 + 1 + k)]; *optr++ = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); } } } static void highbd_down2_symeven(const uint16_t *const input, int length, uint16_t *output, int bd) { // Actual filter len = 2 * filter_len_half. static const int16_t *filter = vp9_down2_symeven_half_filter; const int filter_len_half = sizeof(vp9_down2_symeven_half_filter) / 2; int i, j; uint16_t *optr = output; int l1 = filter_len_half; int l2 = (length - filter_len_half); l1 += (l1 & 1); l2 += (l2 & 1); if (l1 > l2) { // Short input length. for (i = 0; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } } else { // Initial part. for (i = 0; i < l1; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + 1 + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } // Middle part. for (; i < l2; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[i - j] + input[i + 1 + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } // End part. for (; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)); for (j = 0; j < filter_len_half; ++j) { sum += (input[i - j] + input[(i + 1 + j >= length ? length - 1 : i + 1 + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } } } static void highbd_down2_symodd(const uint16_t *const input, int length, uint16_t *output, int bd) { // Actual filter len = 2 * filter_len_half - 1. static const int16_t *filter = vp9_down2_symodd_half_filter; const int filter_len_half = sizeof(vp9_down2_symodd_half_filter) / 2; int i, j; uint16_t *optr = output; int l1 = filter_len_half - 1; int l2 = (length - filter_len_half + 1); l1 += (l1 & 1); l2 += (l2 & 1); if (l1 > l2) { // Short input length. for (i = 0; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[(i + j >= length ? length - 1 : i + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } } else { // Initial part. for (i = 0; i < l1; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[(i - j < 0 ? 0 : i - j)] + input[i + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } // Middle part. for (; i < l2; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[i - j] + input[i + j]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } // End part. for (; i < length; i += 2) { int sum = (1 << (FILTER_BITS - 1)) + input[i] * filter[0]; for (j = 1; j < filter_len_half; ++j) { sum += (input[i - j] + input[(i + j >= length ? length - 1 : i + j)]) * filter[j]; } sum >>= FILTER_BITS; *optr++ = clip_pixel_highbd(sum, bd); } } } static void highbd_resize_multistep(const uint16_t *const input, int length, uint16_t *output, int olength, uint16_t *otmp, int bd) { int steps; if (length == olength) { memcpy(output, input, sizeof(output[0]) * length); return; } steps = get_down2_steps(length, olength); if (steps > 0) { int s; uint16_t *out = NULL; uint16_t *otmp2; int filteredlength = length; assert(otmp != NULL); otmp2 = otmp + get_down2_length(length, 1); for (s = 0; s < steps; ++s) { const int proj_filteredlength = get_down2_length(filteredlength, 1); const uint16_t *const in = (s == 0 ? input : out); if (s == steps - 1 && proj_filteredlength == olength) out = output; else out = (s & 1 ? otmp2 : otmp); if (filteredlength & 1) highbd_down2_symodd(in, filteredlength, out, bd); else highbd_down2_symeven(in, filteredlength, out, bd); filteredlength = proj_filteredlength; } if (filteredlength != olength) { highbd_interpolate(out, filteredlength, output, olength, bd); } } else { highbd_interpolate(input, length, output, olength, bd); } } static void highbd_fill_col_to_arr(uint16_t *img, int stride, int len, uint16_t *arr) { int i; uint16_t *iptr = img; uint16_t *aptr = arr; for (i = 0; i < len; ++i, iptr += stride) { *aptr++ = *iptr; } } static void highbd_fill_arr_to_col(uint16_t *img, int stride, int len, uint16_t *arr) { int i; uint16_t *iptr = img; uint16_t *aptr = arr; for (i = 0; i < len; ++i, iptr += stride) { *iptr = *aptr++; } } void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride, int bd) { int i; uint16_t *intbuf = (uint16_t *)malloc(sizeof(uint16_t) * width2 * height); uint16_t *tmpbuf = (uint16_t *)malloc(sizeof(uint16_t) * (width < height ? height : width)); uint16_t *arrbuf = (uint16_t *)malloc(sizeof(uint16_t) * height); uint16_t *arrbuf2 = (uint16_t *)malloc(sizeof(uint16_t) * height2); if (intbuf == NULL || tmpbuf == NULL || arrbuf == NULL || arrbuf2 == NULL) goto Error; assert(width > 0); assert(height > 0); assert(width2 > 0); assert(height2 > 0); for (i = 0; i < height; ++i) { highbd_resize_multistep(CONVERT_TO_SHORTPTR(input + in_stride * i), width, intbuf + width2 * i, width2, tmpbuf, bd); } for (i = 0; i < width2; ++i) { highbd_fill_col_to_arr(intbuf + i, width2, height, arrbuf); highbd_resize_multistep(arrbuf, height, arrbuf2, height2, tmpbuf, bd); highbd_fill_arr_to_col(CONVERT_TO_SHORTPTR(output + i), out_stride, height2, arrbuf2); } Error: free(intbuf); free(tmpbuf); free(arrbuf); free(arrbuf2); } #endif // CONFIG_VP9_HIGHBITDEPTH void vp9_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth) { vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); vp9_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, owidth / 2, ouv_stride); vp9_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, owidth / 2, ouv_stride); } void vp9_resize_frame422(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth) { vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); vp9_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2, ouv_stride); vp9_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2, ouv_stride); } void vp9_resize_frame444(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth) { vp9_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride); vp9_resize_plane(u, height, width, uv_stride, ou, oheight, owidth, ouv_stride); vp9_resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride); } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd) { vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride, bd); vp9_highbd_resize_plane(u, height / 2, width / 2, uv_stride, ou, oheight / 2, owidth / 2, ouv_stride, bd); vp9_highbd_resize_plane(v, height / 2, width / 2, uv_stride, ov, oheight / 2, owidth / 2, ouv_stride, bd); } void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd) { vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride, bd); vp9_highbd_resize_plane(u, height, width / 2, uv_stride, ou, oheight, owidth / 2, ouv_stride, bd); vp9_highbd_resize_plane(v, height, width / 2, uv_stride, ov, oheight, owidth / 2, ouv_stride, bd); } void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd) { vp9_highbd_resize_plane(y, height, width, y_stride, oy, oheight, owidth, oy_stride, bd); vp9_highbd_resize_plane(u, height, width, uv_stride, ou, oheight, owidth, ouv_stride, bd); vp9_highbd_resize_plane(v, height, width, uv_stride, ov, oheight, owidth, ouv_stride, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vp9/encoder/vp9_resize.h000066400000000000000000000065521357355204000174470ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_RESIZE_H_ #define VPX_VP9_ENCODER_VP9_RESIZE_H_ #include #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif void vp9_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride); void vp9_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth); void vp9_resize_frame422(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth); void vp9_resize_frame444(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth); #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_resize_plane(const uint8_t *const input, int height, int width, int in_stride, uint8_t *output, int height2, int width2, int out_stride, int bd); void vp9_highbd_resize_frame420(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd); void vp9_highbd_resize_frame422(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd); void vp9_highbd_resize_frame444(const uint8_t *const y, int y_stride, const uint8_t *const u, const uint8_t *const v, int uv_stride, int height, int width, uint8_t *oy, int oy_stride, uint8_t *ou, uint8_t *ov, int ouv_stride, int oheight, int owidth, int bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_RESIZE_H_ libvpx-1.8.2/vp9/encoder/vp9_segmentation.c000066400000000000000000000270571357355204000206410ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_tile_common.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_segmentation.h" void vp9_enable_segmentation(struct segmentation *seg) { seg->enabled = 1; seg->update_map = 1; seg->update_data = 1; } void vp9_disable_segmentation(struct segmentation *seg) { seg->enabled = 0; seg->update_map = 0; seg->update_data = 0; } void vp9_set_segment_data(struct segmentation *seg, signed char *feature_data, unsigned char abs_delta) { seg->abs_delta = abs_delta; memcpy(seg->feature_data, feature_data, sizeof(seg->feature_data)); } void vp9_disable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { seg->feature_mask[segment_id] &= ~(1 << feature_id); } void vp9_clear_segdata(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id) { seg->feature_data[segment_id][feature_id] = 0; } void vp9_psnr_aq_mode_setup(struct segmentation *seg) { int i; vp9_enable_segmentation(seg); vp9_clearall_segfeatures(seg); seg->abs_delta = SEGMENT_DELTADATA; for (i = 0; i < MAX_SEGMENTS; ++i) { vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, 2 * (i - (MAX_SEGMENTS / 2))); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); } } void vp9_perceptual_aq_mode_setup(struct VP9_COMP *cpi, struct segmentation *seg) { const VP9_COMMON *cm = &cpi->common; const int seg_counts = cpi->kmeans_ctr_num; const int base_qindex = cm->base_qindex; const double base_qstep = vp9_convert_qindex_to_q(base_qindex, cm->bit_depth); const double mid_ctr = cpi->kmeans_ctr_ls[seg_counts / 2]; const double var_diff_scale = 4.0; int i; assert(seg_counts <= MAX_SEGMENTS); vp9_enable_segmentation(seg); vp9_clearall_segfeatures(seg); seg->abs_delta = SEGMENT_DELTADATA; for (i = 0; i < seg_counts / 2; ++i) { double wiener_var_diff = mid_ctr - cpi->kmeans_ctr_ls[i]; double target_qstep = base_qstep / (1.0 + wiener_var_diff / var_diff_scale); int target_qindex = vp9_convert_q_to_qindex(target_qstep, cm->bit_depth); assert(wiener_var_diff >= 0.0); vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, target_qindex - base_qindex); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); } vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, 0); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); for (; i < seg_counts; ++i) { double wiener_var_diff = cpi->kmeans_ctr_ls[i] - mid_ctr; double target_qstep = base_qstep * (1.0 + wiener_var_diff / var_diff_scale); int target_qindex = vp9_convert_q_to_qindex(target_qstep, cm->bit_depth); assert(wiener_var_diff >= 0.0); vp9_set_segdata(seg, i, SEG_LVL_ALT_Q, target_qindex - base_qindex); vp9_enable_segfeature(seg, i, SEG_LVL_ALT_Q); } } // Based on set of segment counts calculate a probability tree static void calc_segtree_probs(int *segcounts, vpx_prob *segment_tree_probs) { // Work out probabilities of each segment const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; const int c45 = segcounts[4] + segcounts[5]; const int c67 = segcounts[6] + segcounts[7]; segment_tree_probs[0] = get_binary_prob(c01 + c23, c45 + c67); segment_tree_probs[1] = get_binary_prob(c01, c23); segment_tree_probs[2] = get_binary_prob(c45, c67); segment_tree_probs[3] = get_binary_prob(segcounts[0], segcounts[1]); segment_tree_probs[4] = get_binary_prob(segcounts[2], segcounts[3]); segment_tree_probs[5] = get_binary_prob(segcounts[4], segcounts[5]); segment_tree_probs[6] = get_binary_prob(segcounts[6], segcounts[7]); } // Based on set of segment counts and probabilities calculate a cost estimate static int cost_segmap(int *segcounts, vpx_prob *probs) { const int c01 = segcounts[0] + segcounts[1]; const int c23 = segcounts[2] + segcounts[3]; const int c45 = segcounts[4] + segcounts[5]; const int c67 = segcounts[6] + segcounts[7]; const int c0123 = c01 + c23; const int c4567 = c45 + c67; // Cost the top node of the tree int cost = c0123 * vp9_cost_zero(probs[0]) + c4567 * vp9_cost_one(probs[0]); // Cost subsequent levels if (c0123 > 0) { cost += c01 * vp9_cost_zero(probs[1]) + c23 * vp9_cost_one(probs[1]); if (c01 > 0) cost += segcounts[0] * vp9_cost_zero(probs[3]) + segcounts[1] * vp9_cost_one(probs[3]); if (c23 > 0) cost += segcounts[2] * vp9_cost_zero(probs[4]) + segcounts[3] * vp9_cost_one(probs[4]); } if (c4567 > 0) { cost += c45 * vp9_cost_zero(probs[2]) + c67 * vp9_cost_one(probs[2]); if (c45 > 0) cost += segcounts[4] * vp9_cost_zero(probs[5]) + segcounts[5] * vp9_cost_one(probs[5]); if (c67 > 0) cost += segcounts[6] * vp9_cost_zero(probs[6]) + segcounts[7] * vp9_cost_one(probs[6]); } return cost; } static void count_segs(const VP9_COMMON *cm, MACROBLOCKD *xd, const TileInfo *tile, MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, int bw, int bh, int mi_row, int mi_col) { int segment_id; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; xd->mi = mi; segment_id = xd->mi[0]->segment_id; set_mi_row_col(xd, tile, mi_row, bh, mi_col, bw, cm->mi_rows, cm->mi_cols); // Count the number of hits on each segment with no prediction no_pred_segcounts[segment_id]++; // Temporal prediction not allowed on key frames if (cm->frame_type != KEY_FRAME) { const BLOCK_SIZE bsize = xd->mi[0]->sb_type; // Test to see if the segment id matches the predicted value. const int pred_segment_id = get_segment_id(cm, cm->last_frame_seg_map, bsize, mi_row, mi_col); const int pred_flag = pred_segment_id == segment_id; const int pred_context = vp9_get_pred_context_seg_id(xd); // Store the prediction status for this mb and update counts // as appropriate xd->mi[0]->seg_id_predicted = pred_flag; temporal_predictor_count[pred_context][pred_flag]++; // Update the "unpredicted" segment count if (!pred_flag) t_unpred_seg_counts[segment_id]++; } } static void count_segs_sb(const VP9_COMMON *cm, MACROBLOCKD *xd, const TileInfo *tile, MODE_INFO **mi, int *no_pred_segcounts, int (*temporal_predictor_count)[2], int *t_unpred_seg_counts, int mi_row, int mi_col, BLOCK_SIZE bsize) { const int mis = cm->mi_stride; int bw, bh; const int bs = num_8x8_blocks_wide_lookup[bsize], hbs = bs / 2; if (mi_row >= cm->mi_rows || mi_col >= cm->mi_cols) return; bw = num_8x8_blocks_wide_lookup[mi[0]->sb_type]; bh = num_8x8_blocks_high_lookup[mi[0]->sb_type]; if (bw == bs && bh == bs) { count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, bs, mi_row, mi_col); } else if (bw == bs && bh < bs) { count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, hbs, mi_row, mi_col); count_segs(cm, xd, tile, mi + hbs * mis, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, bs, hbs, mi_row + hbs, mi_col); } else if (bw < bs && bh == bs) { count_segs(cm, xd, tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row, mi_col); count_segs(cm, xd, tile, mi + hbs, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, hbs, bs, mi_row, mi_col + hbs); } else { const BLOCK_SIZE subsize = subsize_lookup[PARTITION_SPLIT][bsize]; int n; assert(bw < bs && bh < bs); for (n = 0; n < 4; n++) { const int mi_dc = hbs * (n & 1); const int mi_dr = hbs * (n >> 1); count_segs_sb(cm, xd, tile, &mi[mi_dr * mis + mi_dc], no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, mi_row + mi_dr, mi_col + mi_dc, subsize); } } } void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd) { struct segmentation *seg = &cm->seg; int no_pred_cost; int t_pred_cost = INT_MAX; int i, tile_col, mi_row, mi_col; int temporal_predictor_count[PREDICTION_PROBS][2] = { { 0 } }; int no_pred_segcounts[MAX_SEGMENTS] = { 0 }; int t_unpred_seg_counts[MAX_SEGMENTS] = { 0 }; vpx_prob no_pred_tree[SEG_TREE_PROBS]; vpx_prob t_pred_tree[SEG_TREE_PROBS]; vpx_prob t_nopred_prob[PREDICTION_PROBS]; // Set default state for the segment tree probabilities and the // temporal coding probabilities memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); memset(seg->pred_probs, 255, sizeof(seg->pred_probs)); // First of all generate stats regarding how well the last segment map // predicts this one for (tile_col = 0; tile_col < 1 << cm->log2_tile_cols; tile_col++) { TileInfo tile; MODE_INFO **mi_ptr; vp9_tile_init(&tile, cm, 0, tile_col); mi_ptr = cm->mi_grid_visible + tile.mi_col_start; for (mi_row = 0; mi_row < cm->mi_rows; mi_row += 8, mi_ptr += 8 * cm->mi_stride) { MODE_INFO **mi = mi_ptr; for (mi_col = tile.mi_col_start; mi_col < tile.mi_col_end; mi_col += 8, mi += 8) count_segs_sb(cm, xd, &tile, mi, no_pred_segcounts, temporal_predictor_count, t_unpred_seg_counts, mi_row, mi_col, BLOCK_64X64); } } // Work out probability tree for coding segments without prediction // and the cost. calc_segtree_probs(no_pred_segcounts, no_pred_tree); no_pred_cost = cost_segmap(no_pred_segcounts, no_pred_tree); // Key frames cannot use temporal prediction if (!frame_is_intra_only(cm)) { // Work out probability tree for coding those segments not // predicted using the temporal method and the cost. calc_segtree_probs(t_unpred_seg_counts, t_pred_tree); t_pred_cost = cost_segmap(t_unpred_seg_counts, t_pred_tree); // Add in the cost of the signaling for each prediction context. for (i = 0; i < PREDICTION_PROBS; i++) { const int count0 = temporal_predictor_count[i][0]; const int count1 = temporal_predictor_count[i][1]; t_nopred_prob[i] = get_binary_prob(count0, count1); // Add in the predictor signaling cost t_pred_cost += count0 * vp9_cost_zero(t_nopred_prob[i]) + count1 * vp9_cost_one(t_nopred_prob[i]); } } // Now choose which coding method to use. if (t_pred_cost < no_pred_cost) { seg->temporal_update = 1; memcpy(seg->tree_probs, t_pred_tree, sizeof(t_pred_tree)); memcpy(seg->pred_probs, t_nopred_prob, sizeof(t_nopred_prob)); } else { seg->temporal_update = 0; memcpy(seg->tree_probs, no_pred_tree, sizeof(no_pred_tree)); } } void vp9_reset_segment_features(struct segmentation *seg) { // Set up default state for MB feature flags seg->enabled = 0; seg->update_map = 0; seg->update_data = 0; memset(seg->tree_probs, 255, sizeof(seg->tree_probs)); vp9_clearall_segfeatures(seg); } libvpx-1.8.2/vp9/encoder/vp9_segmentation.h000066400000000000000000000036041357355204000206360ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ #define VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_encoder.h" #ifdef __cplusplus extern "C" { #endif void vp9_enable_segmentation(struct segmentation *seg); void vp9_disable_segmentation(struct segmentation *seg); void vp9_disable_segfeature(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id); void vp9_clear_segdata(struct segmentation *seg, int segment_id, SEG_LVL_FEATURES feature_id); void vp9_psnr_aq_mode_setup(struct segmentation *seg); void vp9_perceptual_aq_mode_setup(struct VP9_COMP *cpi, struct segmentation *seg); // The values given for each segment can be either deltas (from the default // value chosen for the frame) or absolute values. // // Valid range for abs values is (0-127 for MB_LVL_ALT_Q), (0-63 for // SEGMENT_ALT_LF) // Valid range for delta values are (+/-127 for MB_LVL_ALT_Q), (+/-63 for // SEGMENT_ALT_LF) // // abs_delta = SEGMENT_DELTADATA (deltas) abs_delta = SEGMENT_ABSDATA (use // the absolute values given). void vp9_set_segment_data(struct segmentation *seg, signed char *feature_data, unsigned char abs_delta); void vp9_choose_segmap_coding_method(VP9_COMMON *cm, MACROBLOCKD *xd); void vp9_reset_segment_features(struct segmentation *seg); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_SEGMENTATION_H_ libvpx-1.8.2/vp9/encoder/vp9_skin_detection.c000066400000000000000000000155711357355204000211440ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vp9/common/vp9_blockd.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_skin_detection.h" int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int stride, int strideuv, int bsize, int consec_zeromv, int curr_motion_magn) { // No skin if block has been zero/small motion for long consecutive time. if (consec_zeromv > 60 && curr_motion_magn == 0) { return 0; } else { int motion = 1; // Take center pixel in block to determine is_skin. const int y_width_shift = (4 << b_width_log2_lookup[bsize]) >> 1; const int y_height_shift = (4 << b_height_log2_lookup[bsize]) >> 1; const int uv_width_shift = y_width_shift >> 1; const int uv_height_shift = y_height_shift >> 1; const uint8_t ysource = y[y_height_shift * stride + y_width_shift]; const uint8_t usource = u[uv_height_shift * strideuv + uv_width_shift]; const uint8_t vsource = v[uv_height_shift * strideuv + uv_width_shift]; if (consec_zeromv > 25 && curr_motion_magn == 0) motion = 0; return vpx_skin_pixel(ysource, usource, vsource, motion); } } void vp9_compute_skin_sb(VP9_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, int mi_col) { int i, j, num_bl; VP9_COMMON *const cm = &cpi->common; const uint8_t *src_y = cpi->Source->y_buffer; const uint8_t *src_u = cpi->Source->u_buffer; const uint8_t *src_v = cpi->Source->v_buffer; const int src_ystride = cpi->Source->y_stride; const int src_uvstride = cpi->Source->uv_stride; const int y_bsize = 4 << b_width_log2_lookup[bsize]; const int uv_bsize = y_bsize >> 1; const int shy = (y_bsize == 8) ? 3 : 4; const int shuv = shy - 1; const int fac = y_bsize / 8; const int y_shift = src_ystride * (mi_row << 3) + (mi_col << 3); const int uv_shift = src_uvstride * (mi_row << 2) + (mi_col << 2); const int mi_row_limit = VPXMIN(mi_row + 8, cm->mi_rows - 2); const int mi_col_limit = VPXMIN(mi_col + 8, cm->mi_cols - 2); src_y += y_shift; src_u += uv_shift; src_v += uv_shift; for (i = mi_row; i < mi_row_limit; i += fac) { num_bl = 0; for (j = mi_col; j < mi_col_limit; j += fac) { int consec_zeromv = 0; int bl_index = i * cm->mi_cols + j; int bl_index1 = bl_index + 1; int bl_index2 = bl_index + cm->mi_cols; int bl_index3 = bl_index2 + 1; // Don't detect skin on the boundary. if (i == 0 || j == 0) continue; if (bsize == BLOCK_8X8) consec_zeromv = cpi->consec_zero_mv[bl_index]; else consec_zeromv = VPXMIN(cpi->consec_zero_mv[bl_index], VPXMIN(cpi->consec_zero_mv[bl_index1], VPXMIN(cpi->consec_zero_mv[bl_index2], cpi->consec_zero_mv[bl_index3]))); cpi->skin_map[bl_index] = vp9_compute_skin_block(src_y, src_u, src_v, src_ystride, src_uvstride, bsize, consec_zeromv, 0); num_bl++; src_y += y_bsize; src_u += uv_bsize; src_v += uv_bsize; } src_y += (src_ystride << shy) - (num_bl << shy); src_u += (src_uvstride << shuv) - (num_bl << shuv); src_v += (src_uvstride << shuv) - (num_bl << shuv); } // Remove isolated skin blocks (none of its neighbors are skin) and isolated // non-skin blocks (all of its neighbors are skin). // Skip 4 corner blocks which have only 3 neighbors to remove isolated skin // blocks. Skip superblock borders to remove isolated non-skin blocks. for (i = mi_row; i < mi_row_limit; i += fac) { for (j = mi_col; j < mi_col_limit; j += fac) { int bl_index = i * cm->mi_cols + j; int num_neighbor = 0; int mi, mj; int non_skin_threshold = 8; // Skip 4 corners. if ((i == mi_row && (j == mi_col || j == mi_col_limit - fac)) || (i == mi_row_limit - fac && (j == mi_col || j == mi_col_limit - fac))) continue; // There are only 5 neighbors for non-skin blocks on the border. if (i == mi_row || i == mi_row_limit - fac || j == mi_col || j == mi_col_limit - fac) non_skin_threshold = 5; for (mi = -fac; mi <= fac; mi += fac) { for (mj = -fac; mj <= fac; mj += fac) { if (i + mi >= mi_row && i + mi < mi_row_limit && j + mj >= mi_col && j + mj < mi_col_limit) { int bl_neighbor_index = (i + mi) * cm->mi_cols + j + mj; if (cpi->skin_map[bl_neighbor_index]) num_neighbor++; } } } if (cpi->skin_map[bl_index] && num_neighbor < 2) cpi->skin_map[bl_index] = 0; if (!cpi->skin_map[bl_index] && num_neighbor == non_skin_threshold) cpi->skin_map[bl_index] = 1; } } } #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. void vp9_output_skin_map(VP9_COMP *const cpi, FILE *yuv_skinmap_file) { int i, j, mi_row, mi_col, num_bl; VP9_COMMON *const cm = &cpi->common; uint8_t *y; const uint8_t *src_y = cpi->Source->y_buffer; const int src_ystride = cpi->Source->y_stride; const int y_bsize = 16; // Use 8x8 or 16x16. const int shy = (y_bsize == 8) ? 3 : 4; const int fac = y_bsize / 8; YV12_BUFFER_CONFIG skinmap; memset(&skinmap, 0, sizeof(YV12_BUFFER_CONFIG)); if (vpx_alloc_frame_buffer(&skinmap, cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment)) { vpx_free_frame_buffer(&skinmap); return; } memset(skinmap.buffer_alloc, 128, skinmap.frame_size); y = skinmap.y_buffer; // Loop through blocks and set skin map based on center pixel of block. // Set y to white for skin block, otherwise set to source with gray scale. // Ignore rightmost/bottom boundary blocks. for (mi_row = 0; mi_row < cm->mi_rows - 1; mi_row += fac) { num_bl = 0; for (mi_col = 0; mi_col < cm->mi_cols - 1; mi_col += fac) { const int block_index = mi_row * cm->mi_cols + mi_col; const int is_skin = cpi->skin_map[block_index]; for (i = 0; i < y_bsize; i++) { for (j = 0; j < y_bsize; j++) { y[i * src_ystride + j] = is_skin ? 255 : src_y[i * src_ystride + j]; } } num_bl++; y += y_bsize; src_y += y_bsize; } y += (src_ystride << shy) - (num_bl << shy); src_y += (src_ystride << shy) - (num_bl << shy); } vpx_write_yuv_frame(yuv_skinmap_file, &skinmap); vpx_free_frame_buffer(&skinmap); } #endif libvpx-1.8.2/vp9/encoder/vp9_skin_detection.h000066400000000000000000000023541357355204000211440ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ #define VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ #include "vp9/common/vp9_blockd.h" #include "vpx_dsp/skin_detection.h" #include "vpx_util/vpx_write_yuv_frame.h" #ifdef __cplusplus extern "C" { #endif struct VP9_COMP; int vp9_compute_skin_block(const uint8_t *y, const uint8_t *u, const uint8_t *v, int stride, int strideuv, int bsize, int consec_zeromv, int curr_motion_magn); void vp9_compute_skin_sb(struct VP9_COMP *const cpi, BLOCK_SIZE bsize, int mi_row, int mi_col); #ifdef OUTPUT_YUV_SKINMAP // For viewing skin map on input source. void vp9_output_skin_map(struct VP9_COMP *const cpi, FILE *yuv_skinmap_file); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_SKIN_DETECTION_H_ libvpx-1.8.2/vp9/encoder/vp9_speed_features.c000066400000000000000000001172041357355204000211340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_speed_features.h" #include "vp9/encoder/vp9_rdopt.h" #include "vpx_dsp/vpx_dsp_common.h" // Mesh search patters for various speed settings static MESH_PATTERN best_quality_mesh_pattern[MAX_MESH_STEP] = { { 64, 4 }, { 28, 2 }, { 15, 1 }, { 7, 1 } }; #if !CONFIG_REALTIME_ONLY // Define 3 mesh density levels to control the number of searches. #define MESH_DENSITY_LEVELS 3 static MESH_PATTERN good_quality_mesh_patterns[MESH_DENSITY_LEVELS][MAX_MESH_STEP] = { { { 64, 8 }, { 28, 4 }, { 15, 1 }, { 7, 1 } }, { { 64, 8 }, { 14, 2 }, { 7, 1 }, { 7, 1 } }, { { 64, 16 }, { 24, 8 }, { 12, 4 }, { 7, 1 } }, }; // Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality static int frame_is_boosted(const VP9_COMP *cpi) { return frame_is_kf_gf_arf(cpi); } // Sets a partition size down to which the auto partition code will always // search (can go lower), based on the image dimensions. The logic here // is that the extent to which ringing artefacts are offensive, depends // partly on the screen area that over which they propogate. Propogation is // limited by transform block size but the screen area take up by a given block // size will be larger for a small image format stretched to full screen. static BLOCK_SIZE set_partition_min_limit(VP9_COMMON *const cm) { unsigned int screen_area = (cm->width * cm->height); // Select block size based on image format size. if (screen_area < 1280 * 720) { // Formats smaller in area than 720P return BLOCK_4X4; } else if (screen_area < 1920 * 1080) { // Format >= 720P and < 1080P return BLOCK_8X8; } else { // Formats 1080P and up return BLOCK_16X16; } } static void set_good_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; const int min_frame_size = VPXMIN(cm->width, cm->height); const int is_480p_or_larger = min_frame_size >= 480; const int is_720p_or_larger = min_frame_size >= 720; const int is_1080p_or_larger = min_frame_size >= 1080; const int is_2160p_or_larger = min_frame_size >= 2160; // speed 0 features sf->partition_search_breakout_thr.dist = (1 << 20); sf->partition_search_breakout_thr.rate = 80; sf->use_square_only_thresh_high = BLOCK_SIZES; sf->use_square_only_thresh_low = BLOCK_4X4; if (is_480p_or_larger) { // Currently, the machine-learning based partition search early termination // is only used while VPXMIN(cm->width, cm->height) >= 480 and speed = 0. sf->rd_ml_partition.search_early_termination = 1; } else { sf->use_square_only_thresh_high = BLOCK_32X32; } if (!is_1080p_or_larger) { sf->rd_ml_partition.search_breakout = 1; if (is_720p_or_larger) { sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; sf->rd_ml_partition.search_breakout_thresh[1] = 0.0f; sf->rd_ml_partition.search_breakout_thresh[2] = 0.0f; } else { sf->rd_ml_partition.search_breakout_thresh[0] = 2.5f; sf->rd_ml_partition.search_breakout_thresh[1] = 1.5f; sf->rd_ml_partition.search_breakout_thresh[2] = 1.5f; } } if (speed >= 1) { sf->rd_ml_partition.search_early_termination = 0; sf->rd_ml_partition.search_breakout = 1; if (is_480p_or_larger) sf->use_square_only_thresh_high = BLOCK_64X64; else sf->use_square_only_thresh_high = BLOCK_32X32; sf->use_square_only_thresh_low = BLOCK_16X16; if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 22); sf->rd_ml_partition.search_breakout_thresh[0] = -5.0f; sf->rd_ml_partition.search_breakout_thresh[1] = -5.0f; sf->rd_ml_partition.search_breakout_thresh[2] = -9.0f; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; sf->partition_search_breakout_thr.dist = (1 << 21); sf->rd_ml_partition.search_breakout_thresh[0] = -1.0f; sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; sf->rd_ml_partition.search_breakout_thresh[2] = -1.0f; } #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) { sf->rd_ml_partition.search_breakout_thresh[0] -= 1.0f; sf->rd_ml_partition.search_breakout_thresh[1] -= 1.0f; sf->rd_ml_partition.search_breakout_thresh[2] -= 1.0f; } #endif // CONFIG_VP9_HIGHBITDEPTH } if (speed >= 2) { sf->use_square_only_thresh_high = BLOCK_4X4; sf->use_square_only_thresh_low = BLOCK_SIZES; if (is_720p_or_larger) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; sf->adaptive_pred_interp_filter = 0; sf->partition_search_breakout_thr.dist = (1 << 24); sf->partition_search_breakout_thr.rate = 120; sf->rd_ml_partition.search_breakout = 0; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; sf->partition_search_breakout_thr.dist = (1 << 22); sf->partition_search_breakout_thr.rate = 100; sf->rd_ml_partition.search_breakout_thresh[0] = 0.0f; sf->rd_ml_partition.search_breakout_thresh[1] = -1.0f; sf->rd_ml_partition.search_breakout_thresh[2] = -4.0f; } sf->rd_auto_partition_min_limit = set_partition_min_limit(cm); // Use a set of speed features for 4k videos. if (is_2160p_or_larger) { sf->use_square_partition_only = 1; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->alt_ref_search_fp = 1; sf->cb_pred_filter_search = 1; sf->adaptive_interp_filter_search = 1; sf->disable_split_mask = DISABLE_ALL_SPLIT; } } if (speed >= 3) { sf->rd_ml_partition.search_breakout = 0; if (is_720p_or_larger) { sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->schedule_mode_search = cm->base_qindex < 220 ? 1 : 0; sf->partition_search_breakout_thr.dist = (1 << 25); sf->partition_search_breakout_thr.rate = 200; } else { sf->max_intra_bsize = BLOCK_32X32; sf->disable_split_mask = DISABLE_ALL_INTER_SPLIT; sf->schedule_mode_search = cm->base_qindex < 175 ? 1 : 0; sf->partition_search_breakout_thr.dist = (1 << 23); sf->partition_search_breakout_thr.rate = 120; } } // If this is a two pass clip that fits the criteria for animated or // graphics content then reset disable_split_mask for speeds 1-4. // Also if the image edge is internal to the coded area. if ((speed >= 1) && (cpi->oxcf.pass == 2) && ((cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) || (vp9_internal_image_edge(cpi)))) { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; } if (speed >= 4) { sf->partition_search_breakout_thr.rate = 300; if (is_720p_or_larger) { sf->partition_search_breakout_thr.dist = (1 << 26); } else { sf->partition_search_breakout_thr.dist = (1 << 24); } sf->disable_split_mask = DISABLE_ALL_SPLIT; } if (speed >= 5) { sf->partition_search_breakout_thr.rate = 500; } } static double tx_dom_thresholds[6] = { 99.0, 14.0, 12.0, 8.0, 4.0, 0.0 }; static double qopt_thresholds[6] = { 99.0, 12.0, 10.0, 4.0, 2.0, 0.0 }; static void set_good_speed_feature_framesize_independent(VP9_COMP *cpi, VP9_COMMON *cm, SPEED_FEATURES *sf, int speed) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; const int boosted = frame_is_boosted(cpi); int i; sf->tx_size_search_breakout = 1; sf->adaptive_rd_thresh = 1; sf->adaptive_rd_thresh_row_mt = 0; sf->allow_skip_recode = 1; sf->less_rectangular_check = 1; sf->use_square_partition_only = !boosted; sf->prune_ref_frame_for_rect_partitions = 1; sf->rd_ml_partition.var_pruning = 1; sf->rd_ml_partition.prune_rect_thresh[0] = -1; sf->rd_ml_partition.prune_rect_thresh[1] = 350; sf->rd_ml_partition.prune_rect_thresh[2] = 325; sf->rd_ml_partition.prune_rect_thresh[3] = 250; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { sf->exhaustive_searches_thresh = (1 << 22); } else { sf->exhaustive_searches_thresh = INT_MAX; } for (i = 0; i < MAX_MESH_STEP; ++i) { const int mesh_density_level = 0; sf->mesh_patterns[i].range = good_quality_mesh_patterns[mesh_density_level][i].range; sf->mesh_patterns[i].interval = good_quality_mesh_patterns[mesh_density_level][i].interval; } if (speed >= 1) { sf->temporal_filter_search_method = NSTEP; sf->rd_ml_partition.var_pruning = !boosted; sf->rd_ml_partition.prune_rect_thresh[1] = 225; sf->rd_ml_partition.prune_rect_thresh[2] = 225; sf->rd_ml_partition.prune_rect_thresh[3] = 225; if (oxcf->pass == 2) { TWO_PASS *const twopass = &cpi->twopass; if ((twopass->fr_content_type == FC_GRAPHICS_ANIMATION) || vp9_internal_image_edge(cpi)) { sf->use_square_partition_only = !boosted; } else { sf->use_square_partition_only = !frame_is_intra_only(cm); } } else { sf->use_square_partition_only = !frame_is_intra_only(cm); } sf->allow_txfm_domain_distortion = 1; sf->tx_domain_thresh = tx_dom_thresholds[(speed < 6) ? speed : 5]; sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = qopt_thresholds[(speed < 6) ? speed : 5]; sf->less_rectangular_check = 1; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->mv.auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->mv.subpel_search_level = 1; if (cpi->oxcf.content != VP9E_CONTENT_FILM) sf->mode_skip_start = 10; sf->adaptive_pred_interp_filter = 1; sf->allow_acl = 0; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; if (cpi->oxcf.content != VP9E_CONTENT_FILM) { sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 30; sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 23) : INT_MAX; sf->use_accurate_subpel_search = USE_4_TAPS; } if (speed >= 2) { sf->rd_ml_partition.var_pruning = 0; if (oxcf->vbr_corpus_complexity) sf->recode_loop = ALLOW_RECODE_FIRST; else sf->recode_loop = ALLOW_RECODE_KFARFGF; sf->tx_size_search_method = frame_is_boosted(cpi) ? USE_FULL_RD : USE_LARGESTALL; // Reference masking is not supported in dynamic scaling mode. sf->reference_masking = oxcf->resize_mode != RESIZE_DYNAMIC ? 1 : 0; sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; sf->disable_filter_search_var_thresh = 100; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->recode_tolerance_low = 15; sf->recode_tolerance_high = 45; sf->enhanced_full_pixel_motion_search = 0; sf->prune_ref_frame_for_rect_partitions = 0; sf->rd_ml_partition.prune_rect_thresh[1] = -1; sf->rd_ml_partition.prune_rect_thresh[2] = -1; sf->rd_ml_partition.prune_rect_thresh[3] = -1; sf->mv.subpel_search_level = 0; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { int mesh_density_level = 1; sf->mesh_patterns[i].range = good_quality_mesh_patterns[mesh_density_level][i].range; sf->mesh_patterns[i].interval = good_quality_mesh_patterns[mesh_density_level][i].interval; } } sf->use_accurate_subpel_search = USE_2_TAPS; } if (speed >= 3) { sf->use_square_partition_only = !frame_is_intra_only(cm); sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED; sf->adaptive_pred_interp_filter = 0; sf->adaptive_mode_search = 1; sf->cb_partition_search = !boosted; sf->cb_pred_filter_search = 1; sf->alt_ref_search_fp = 1; sf->recode_loop = ALLOW_RECODE_KFMAXBW; sf->adaptive_rd_thresh = 3; sf->mode_skip_start = 6; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC; sf->adaptive_interp_filter_search = 1; sf->allow_partition_search_skip = 1; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { int mesh_density_level = 2; sf->mesh_patterns[i].range = good_quality_mesh_patterns[mesh_density_level][i].range; sf->mesh_patterns[i].interval = good_quality_mesh_patterns[mesh_density_level][i].interval; } } } if (speed >= 4) { sf->use_square_partition_only = 1; sf->tx_size_search_method = USE_LARGESTALL; sf->mv.search_method = BIGDIA; sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_MORE; sf->adaptive_rd_thresh = 4; if (cm->frame_type != KEY_FRAME) sf->mode_search_skip_flags |= FLAG_EARLY_TERMINATE; sf->disable_filter_search_var_thresh = 200; sf->use_lp32x32fdct = 1; sf->use_fast_coef_updates = ONE_LOOP_REDUCED; sf->use_fast_coef_costing = 1; sf->motion_field_mode_search = !boosted; } if (speed >= 5) { int i; sf->optimize_coefficients = 0; sf->mv.search_method = HEX; sf->disable_filter_search_var_thresh = 500; for (i = 0; i < TX_SIZES; ++i) { sf->intra_y_mode_mask[i] = INTRA_DC; sf->intra_uv_mode_mask[i] = INTRA_DC; } sf->mv.reduce_first_step_size = 1; sf->simple_model_rd_from_var = 1; } } #endif // !CONFIG_REALTIME_ONLY static void set_rt_speed_feature_framesize_dependent(VP9_COMP *cpi, SPEED_FEATURES *sf, int speed) { VP9_COMMON *const cm = &cpi->common; if (speed >= 1) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { sf->disable_split_mask = DISABLE_COMPOUND_SPLIT; } } if (speed >= 2) { if (VPXMIN(cm->width, cm->height) >= 720) { sf->disable_split_mask = cm->show_frame ? DISABLE_ALL_SPLIT : DISABLE_ALL_INTER_SPLIT; } else { sf->disable_split_mask = LAST_AND_INTRA_SPLIT_ONLY; } } if (speed >= 5) { sf->partition_search_breakout_thr.rate = 200; if (VPXMIN(cm->width, cm->height) >= 720) { sf->partition_search_breakout_thr.dist = (1 << 25); } else { sf->partition_search_breakout_thr.dist = (1 << 23); } } if (speed >= 7) { sf->encode_breakout_thresh = (VPXMIN(cm->width, cm->height) >= 720) ? 800 : 300; } } static void set_rt_speed_feature_framesize_independent( VP9_COMP *cpi, SPEED_FEATURES *sf, int speed, vp9e_tune_content content) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; const int is_keyframe = cm->frame_type == KEY_FRAME; const int frames_since_key = is_keyframe ? 0 : cpi->rc.frames_since_key; sf->static_segmentation = 0; sf->adaptive_rd_thresh = 1; sf->adaptive_rd_thresh_row_mt = 0; sf->use_fast_coef_costing = 1; sf->exhaustive_searches_thresh = INT_MAX; sf->allow_acl = 0; sf->copy_partition_flag = 0; sf->use_source_sad = 0; sf->use_simple_block_yrd = 0; sf->adapt_partition_source_sad = 0; sf->use_altref_onepass = 0; sf->use_compound_nonrd_pickmode = 0; sf->nonrd_keyframe = 0; sf->svc_use_lowres_part = 0; sf->overshoot_detection_cbr_rt = NO_DETECTION; sf->disable_16x16part_nonkey = 0; sf->disable_golden_ref = 0; sf->enable_tpl_model = 0; sf->enhanced_full_pixel_motion_search = 0; sf->use_accurate_subpel_search = USE_2_TAPS; sf->nonrd_use_ml_partition = 0; sf->variance_part_thresh_mult = 1; sf->cb_pred_filter_search = 0; sf->force_smooth_interpol = 0; sf->rt_intra_dc_only_low_content = 0; if (speed >= 1) { sf->allow_txfm_domain_distortion = 1; sf->tx_domain_thresh = 0.0; sf->allow_quant_coeff_opt = 0; sf->quant_opt_thresh = 0.0; sf->use_square_partition_only = !frame_is_intra_only(cm); sf->less_rectangular_check = 1; sf->tx_size_search_method = frame_is_intra_only(cm) ? USE_FULL_RD : USE_LARGESTALL; sf->use_rd_breakout = 1; sf->adaptive_motion_search = 1; sf->adaptive_pred_interp_filter = 1; sf->mv.auto_mv_step_size = 1; sf->adaptive_rd_thresh = 2; sf->intra_y_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_32X32] = INTRA_DC_H_V; sf->intra_uv_mode_mask[TX_16X16] = INTRA_DC_H_V; } if (speed >= 2) { sf->mode_search_skip_flags = (cm->frame_type == KEY_FRAME) ? 0 : FLAG_SKIP_INTRA_DIRMISMATCH | FLAG_SKIP_INTRA_BESTINTER | FLAG_SKIP_COMP_BESTINTRA | FLAG_SKIP_INTRA_LOWVAR; sf->adaptive_pred_interp_filter = 2; // Reference masking only enabled for 1 spatial layer, and if none of the // references have been scaled. The latter condition needs to be checked // for external or internal dynamic resize. sf->reference_masking = (svc->number_spatial_layers == 1); if (sf->reference_masking == 1 && (cpi->external_resize == 1 || cpi->oxcf.resize_mode == RESIZE_DYNAMIC)) { MV_REFERENCE_FRAME ref_frame; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { const struct scale_factors *const scale_fac = &cm->frame_refs[ref_frame - 1].sf; if (vp9_is_scaled(scale_fac)) sf->reference_masking = 0; } } } sf->disable_filter_search_var_thresh = 50; sf->comp_inter_joint_search_thresh = BLOCK_SIZES; sf->auto_min_max_partition_size = RELAXED_NEIGHBORING_MIN_MAX; sf->lf_motion_threshold = LOW_MOTION_THRESHOLD; sf->adjust_partitioning_from_last_frame = 1; sf->last_partitioning_redo_frequency = 3; sf->use_lp32x32fdct = 1; sf->mode_skip_start = 11; sf->intra_y_mode_mask[TX_16X16] = INTRA_DC_H_V; } if (speed >= 3) { sf->use_square_partition_only = 1; sf->disable_filter_search_var_thresh = 100; sf->use_uv_intra_rd_estimate = 1; sf->skip_encode_sb = 1; sf->mv.subpel_search_level = 0; sf->adaptive_rd_thresh = 4; sf->mode_skip_start = 6; sf->allow_skip_recode = 0; sf->optimize_coefficients = 0; sf->disable_split_mask = DISABLE_ALL_SPLIT; sf->lpf_pick = LPF_PICK_FROM_Q; } if (speed >= 4) { int i; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) sf->use_altref_onepass = 1; sf->mv.subpel_force_stop = QUARTER_PEL; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_DC_H_V; sf->intra_uv_mode_mask[i] = INTRA_DC; } sf->intra_y_mode_mask[TX_32X32] = INTRA_DC; sf->frame_parameter_update = 0; sf->mv.search_method = FAST_HEX; sf->allow_skip_recode = 0; sf->max_intra_bsize = BLOCK_32X32; sf->use_fast_coef_costing = 0; sf->use_quant_fp = !is_keyframe; sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; sf->adaptive_rd_thresh = 2; sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->partition_search_type = VAR_BASED_PARTITION; } if (speed >= 5) { sf->use_altref_onepass = 0; sf->use_quant_fp = !is_keyframe; sf->auto_min_max_partition_size = is_keyframe ? RELAXED_NEIGHBORING_MIN_MAX : STRICT_NEIGHBORING_MIN_MAX; sf->default_max_partition_size = BLOCK_32X32; sf->default_min_partition_size = BLOCK_8X8; sf->force_frame_boost = is_keyframe || (frames_since_key % (sf->last_partitioning_redo_frequency << 1) == 1); sf->max_delta_qindex = is_keyframe ? 20 : 15; sf->partition_search_type = REFERENCE_PARTITION; if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && cpi->rc.is_src_frame_alt_ref) { sf->partition_search_type = VAR_BASED_PARTITION; } sf->use_nonrd_pick_mode = 1; sf->allow_skip_recode = 0; sf->inter_mode_mask[BLOCK_32X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_32X64] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X32] = INTER_NEAREST_NEW_ZERO; sf->inter_mode_mask[BLOCK_64X64] = INTER_NEAREST_NEW_ZERO; sf->adaptive_rd_thresh = 2; // This feature is only enabled when partition search is disabled. sf->reuse_inter_pred_sby = 1; sf->coeff_prob_appx_step = 4; sf->use_fast_coef_updates = is_keyframe ? TWO_LOOP : ONE_LOOP_REDUCED; sf->mode_search_skip_flags = FLAG_SKIP_INTRA_DIRMISMATCH; sf->tx_size_search_method = is_keyframe ? USE_LARGESTALL : USE_TX_8X8; sf->simple_model_rd_from_var = 1; if (cpi->oxcf.rc_mode == VPX_VBR) sf->mv.search_method = NSTEP; if (!is_keyframe) { int i; if (content == VP9E_CONTENT_SCREEN) { for (i = 0; i < BLOCK_SIZES; ++i) if (i >= BLOCK_32X32) sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; else sf->intra_y_mode_bsize_mask[i] = INTRA_DC_TM_H_V; } else { for (i = 0; i < BLOCK_SIZES; ++i) if (i > BLOCK_16X16) sf->intra_y_mode_bsize_mask[i] = INTRA_DC; else // Use H and V intra mode for block sizes <= 16X16. sf->intra_y_mode_bsize_mask[i] = INTRA_DC_H_V; } } if (content == VP9E_CONTENT_SCREEN) { sf->short_circuit_flat_blocks = 1; } if (cpi->oxcf.rc_mode == VPX_CBR && cpi->oxcf.content != VP9E_CONTENT_SCREEN) { sf->limit_newmv_early_exit = 1; if (!cpi->use_svc) sf->bias_golden = 1; } // Keep nonrd_keyframe = 1 for non-base spatial layers to prevent // increase in encoding time. if (cpi->use_svc && svc->spatial_layer_id > 0) sf->nonrd_keyframe = 1; if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && cpi->oxcf.rc_mode == VPX_CBR) { if (cm->width * cm->height <= 352 * 288 && !cpi->use_svc && cpi->oxcf.content != VP9E_CONTENT_SCREEN) sf->overshoot_detection_cbr_rt = RE_ENCODE_MAXQ; else sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; } if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0 && cm->width <= 1280 && cm->height <= 720) { sf->use_altref_onepass = 1; sf->use_compound_nonrd_pickmode = 1; } if (cm->width * cm->height > 1280 * 720) sf->cb_pred_filter_search = 1; } if (speed >= 6) { if (cpi->oxcf.rc_mode == VPX_VBR && cpi->oxcf.lag_in_frames > 0) { sf->use_altref_onepass = 1; sf->use_compound_nonrd_pickmode = 1; } sf->partition_search_type = VAR_BASED_PARTITION; sf->mv.search_method = NSTEP; sf->mv.reduce_first_step_size = 1; sf->skip_encode_sb = 0; if (!cpi->external_resize) sf->use_source_sad = 1; if (sf->use_source_sad) { sf->adapt_partition_source_sad = 1; sf->adapt_partition_thresh = (cm->width * cm->height <= 640 * 360) ? 40000 : 60000; if (cpi->content_state_sb_fd == NULL && (!cpi->use_svc || svc->spatial_layer_id == svc->number_spatial_layers - 1)) { cpi->content_state_sb_fd = (uint8_t *)vpx_calloc( (cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(uint8_t)); } } if (cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // Enable short circuit for low temporal variance. sf->short_circuit_low_temp_var = 1; } if (svc->temporal_layer_id > 0) { sf->adaptive_rd_thresh = 4; sf->limit_newmv_early_exit = 0; sf->base_mv_aggressive = 1; } if (cm->frame_type != KEY_FRAME && cpi->resize_state == ORIG && cpi->oxcf.rc_mode == VPX_CBR) sf->overshoot_detection_cbr_rt = FAST_DETECTION_MAXQ; } if (speed >= 7) { sf->adapt_partition_source_sad = 0; sf->adaptive_rd_thresh = 3; sf->mv.search_method = FAST_DIAMOND; sf->mv.fullpel_search_step_param = 10; // For SVC: use better mv search on base temporal layer, and only // on base spatial layer if highest resolution is above 640x360. if (svc->number_temporal_layers > 2 && svc->temporal_layer_id == 0 && (svc->spatial_layer_id == 0 || cpi->oxcf.width * cpi->oxcf.height <= 640 * 360)) { sf->mv.search_method = NSTEP; sf->mv.fullpel_search_step_param = 6; } if (svc->temporal_layer_id > 0 || svc->spatial_layer_id > 1) { sf->use_simple_block_yrd = 1; if (svc->non_reference_frame) sf->mv.subpel_search_method = SUBPEL_TREE_PRUNED_EVENMORE; } if (cpi->use_svc && cpi->row_mt && cpi->oxcf.max_threads > 1) sf->adaptive_rd_thresh_row_mt = 1; // Enable partition copy. For SVC only enabled for top spatial resolution // layer. cpi->max_copied_frame = 0; if (!cpi->last_frame_dropped && cpi->resize_state == ORIG && !cpi->external_resize && (!cpi->use_svc || (svc->spatial_layer_id == svc->number_spatial_layers - 1 && !svc->last_layer_dropped[svc->number_spatial_layers - 1]))) { sf->copy_partition_flag = 1; cpi->max_copied_frame = 2; // The top temporal enhancement layer (for number of temporal layers > 1) // are non-reference frames, so use large/max value for max_copied_frame. if (svc->number_temporal_layers > 1 && svc->temporal_layer_id == svc->number_temporal_layers - 1) cpi->max_copied_frame = 255; } // For SVC: enable use of lower resolution partition for higher resolution, // only for 3 spatial layers and when config/top resolution is above VGA. // Enable only for non-base temporal layer frames. if (cpi->use_svc && svc->use_partition_reuse && svc->number_spatial_layers == 3 && svc->temporal_layer_id > 0 && cpi->oxcf.width * cpi->oxcf.height > 640 * 480) sf->svc_use_lowres_part = 1; // For SVC when golden is used as second temporal reference: to avoid // encode time increase only use this feature on base temporal layer. // (i.e remove golden flag from frame_flags for temporal_layer_id > 0). if (cpi->use_svc && svc->use_gf_temporal_ref_current_layer && svc->temporal_layer_id > 0) cpi->ref_frame_flags &= (~VP9_GOLD_FLAG); if (cm->width * cm->height > 640 * 480) sf->cb_pred_filter_search = 1; } if (speed >= 8) { sf->adaptive_rd_thresh = 4; sf->skip_encode_sb = 1; sf->nonrd_keyframe = 1; if (!cpi->use_svc) cpi->max_copied_frame = 4; if (cpi->row_mt && cpi->oxcf.max_threads > 1) sf->adaptive_rd_thresh_row_mt = 1; // Enable ML based partition for low res. if (!frame_is_intra_only(cm) && cm->width * cm->height <= 352 * 288) { sf->nonrd_use_ml_partition = 1; } #if CONFIG_VP9_HIGHBITDEPTH if (cpi->Source->flags & YV12_FLAG_HIGHBITDEPTH) sf->nonrd_use_ml_partition = 0; #endif if (content == VP9E_CONTENT_SCREEN) sf->mv.subpel_force_stop = HALF_PEL; sf->rt_intra_dc_only_low_content = 1; if (!cpi->use_svc && cpi->oxcf.rc_mode == VPX_CBR && content != VP9E_CONTENT_SCREEN) { // More aggressive short circuit for speed 8. sf->short_circuit_low_temp_var = 3; // Use level 2 for noisey cases as there is a regression in some // noisy clips with level 3. if (cpi->noise_estimate.enabled && cm->width >= 1280 && cm->height >= 720) { NOISE_LEVEL noise_level = vp9_noise_estimate_extract_level(&cpi->noise_estimate); if (noise_level >= kMedium) sf->short_circuit_low_temp_var = 2; } // Since the short_circuit_low_temp_var is used, reduce the // adaptive_rd_thresh level. if (cm->width * cm->height > 352 * 288) sf->adaptive_rd_thresh = 1; else sf->adaptive_rd_thresh = 2; } sf->limit_newmv_early_exit = 0; sf->use_simple_block_yrd = 1; if (cm->width * cm->height > 352 * 288) sf->cb_pred_filter_search = 1; } if (speed >= 9) { // Only keep INTRA_DC mode for speed 9. if (!is_keyframe) { int i = 0; for (i = 0; i < BLOCK_SIZES; ++i) sf->intra_y_mode_bsize_mask[i] = INTRA_DC; } sf->cb_pred_filter_search = 1; sf->mv.enable_adaptive_subpel_force_stop = 1; sf->mv.adapt_subpel_force_stop.mv_thresh = 1; sf->mv.adapt_subpel_force_stop.force_stop_below = QUARTER_PEL; sf->mv.adapt_subpel_force_stop.force_stop_above = HALF_PEL; // Disable partition blocks below 16x16, except for low-resolutions. if (cm->frame_type != KEY_FRAME && cm->width >= 320 && cm->height >= 240) sf->disable_16x16part_nonkey = 1; // Allow for disabling GOLDEN reference, for CBR mode. if (cpi->oxcf.rc_mode == VPX_CBR) sf->disable_golden_ref = 1; if (cpi->rc.avg_frame_low_motion < 70) sf->default_interp_filter = BILINEAR; if (cm->width * cm->height >= 640 * 360) sf->variance_part_thresh_mult = 2; } if (sf->nonrd_use_ml_partition) sf->partition_search_type = ML_BASED_PARTITION; if (sf->use_altref_onepass) { if (cpi->rc.is_src_frame_alt_ref && cm->frame_type != KEY_FRAME) { sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_64X64; } if (cpi->count_arf_frame_usage == NULL) cpi->count_arf_frame_usage = (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_arf_frame_usage)); if (cpi->count_lastgolden_frame_usage == NULL) cpi->count_lastgolden_frame_usage = (uint8_t *)vpx_calloc((cm->mi_stride >> 3) * ((cm->mi_rows >> 3) + 1), sizeof(*cpi->count_lastgolden_frame_usage)); } if (svc->previous_frame_is_intra_only) { sf->partition_search_type = FIXED_PARTITION; sf->always_this_block_size = BLOCK_64X64; } // Special case for screen content: increase motion search on base spatial // layer when high motion is detected or previous SL0 frame was dropped. if (cpi->oxcf.content == VP9E_CONTENT_SCREEN && cpi->oxcf.speed >= 5 && (svc->high_num_blocks_with_motion || svc->last_layer_dropped[0])) { sf->mv.search_method = NSTEP; // TODO(marpan/jianj): Tune this setting for screensharing. For now use // small step_param for all spatial layers. sf->mv.fullpel_search_step_param = 2; } // TODO(marpan): There is regression for aq-mode=3 speed <= 4, force it // off for now. if (speed <= 3 && cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ) cpi->oxcf.aq_mode = 0; } void vp9_set_speed_features_framesize_dependent(VP9_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; const VP9EncoderConfig *const oxcf = &cpi->oxcf; RD_OPT *const rd = &cpi->rd; int i; // best quality defaults // Some speed-up features even for best quality as minimal impact on quality. sf->partition_search_breakout_thr.dist = (1 << 19); sf->partition_search_breakout_thr.rate = 80; sf->rd_ml_partition.search_early_termination = 0; sf->rd_ml_partition.search_breakout = 0; if (oxcf->mode == REALTIME) set_rt_speed_feature_framesize_dependent(cpi, sf, speed); #if !CONFIG_REALTIME_ONLY else if (oxcf->mode == GOOD) set_good_speed_feature_framesize_dependent(cpi, sf, speed); #endif if (sf->disable_split_mask == DISABLE_ALL_SPLIT) { sf->adaptive_pred_interp_filter = 0; } if (cpi->encode_breakout && oxcf->mode == REALTIME && sf->encode_breakout_thresh > cpi->encode_breakout) { cpi->encode_breakout = sf->encode_breakout_thresh; } // Check for masked out split cases. for (i = 0; i < MAX_REFS; ++i) { if (sf->disable_split_mask & (1 << i)) { rd->thresh_mult_sub8x8[i] = INT_MAX; } } // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match. // It can be used in realtime when adaptive_rd_thresh_row_mt is enabled since // adaptive_rd_thresh is defined per-row for non-rd pickmode. if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; } void vp9_set_speed_features_framesize_independent(VP9_COMP *cpi, int speed) { SPEED_FEATURES *const sf = &cpi->sf; #if !CONFIG_REALTIME_ONLY VP9_COMMON *const cm = &cpi->common; #endif MACROBLOCK *const x = &cpi->td.mb; const VP9EncoderConfig *const oxcf = &cpi->oxcf; int i; // best quality defaults sf->frame_parameter_update = 1; sf->mv.search_method = NSTEP; sf->recode_loop = ALLOW_RECODE_FIRST; sf->mv.subpel_search_method = SUBPEL_TREE; sf->mv.subpel_search_level = 2; sf->mv.subpel_force_stop = EIGHTH_PEL; sf->optimize_coefficients = !is_lossless_requested(&cpi->oxcf); sf->mv.reduce_first_step_size = 0; sf->coeff_prob_appx_step = 1; sf->mv.auto_mv_step_size = 0; sf->mv.fullpel_search_step_param = 6; sf->comp_inter_joint_search_thresh = BLOCK_4X4; sf->tx_size_search_method = USE_FULL_RD; sf->use_lp32x32fdct = 0; sf->adaptive_motion_search = 0; sf->enhanced_full_pixel_motion_search = 1; sf->adaptive_pred_interp_filter = 0; sf->adaptive_mode_search = 0; sf->cb_pred_filter_search = 0; sf->cb_partition_search = 0; sf->motion_field_mode_search = 0; sf->alt_ref_search_fp = 0; sf->use_quant_fp = 0; sf->reference_masking = 0; sf->partition_search_type = SEARCH_PARTITION; sf->less_rectangular_check = 0; sf->use_square_partition_only = 0; sf->use_square_only_thresh_high = BLOCK_SIZES; sf->use_square_only_thresh_low = BLOCK_4X4; sf->auto_min_max_partition_size = NOT_IN_USE; sf->rd_auto_partition_min_limit = BLOCK_4X4; sf->default_max_partition_size = BLOCK_64X64; sf->default_min_partition_size = BLOCK_4X4; sf->adjust_partitioning_from_last_frame = 0; sf->last_partitioning_redo_frequency = 4; sf->disable_split_mask = 0; sf->mode_search_skip_flags = 0; sf->force_frame_boost = 0; sf->max_delta_qindex = 0; sf->disable_filter_search_var_thresh = 0; sf->adaptive_interp_filter_search = 0; sf->allow_partition_search_skip = 0; sf->allow_txfm_domain_distortion = 0; sf->tx_domain_thresh = 99.0; sf->allow_quant_coeff_opt = sf->optimize_coefficients; sf->quant_opt_thresh = 99.0; sf->allow_acl = 1; sf->enable_tpl_model = oxcf->enable_tpl_model; sf->prune_ref_frame_for_rect_partitions = 0; sf->temporal_filter_search_method = MESH; for (i = 0; i < TX_SIZES; i++) { sf->intra_y_mode_mask[i] = INTRA_ALL; sf->intra_uv_mode_mask[i] = INTRA_ALL; } sf->use_rd_breakout = 0; sf->skip_encode_sb = 0; sf->use_uv_intra_rd_estimate = 0; sf->allow_skip_recode = 0; sf->lpf_pick = LPF_PICK_FROM_FULL_IMAGE; sf->use_fast_coef_updates = TWO_LOOP; sf->use_fast_coef_costing = 0; sf->mode_skip_start = MAX_MODES; // Mode index at which mode skip mask set sf->schedule_mode_search = 0; sf->use_nonrd_pick_mode = 0; for (i = 0; i < BLOCK_SIZES; ++i) sf->inter_mode_mask[i] = INTER_ALL; sf->max_intra_bsize = BLOCK_64X64; sf->reuse_inter_pred_sby = 0; // This setting only takes effect when partition_search_type is set // to FIXED_PARTITION. sf->always_this_block_size = BLOCK_16X16; sf->search_type_check_frequency = 50; sf->encode_breakout_thresh = 0; // Recode loop tolerance %. sf->recode_tolerance_low = 12; sf->recode_tolerance_high = 25; sf->default_interp_filter = SWITCHABLE; sf->simple_model_rd_from_var = 0; sf->short_circuit_flat_blocks = 0; sf->short_circuit_low_temp_var = 0; sf->limit_newmv_early_exit = 0; sf->bias_golden = 0; sf->base_mv_aggressive = 0; sf->rd_ml_partition.prune_rect_thresh[0] = -1; sf->rd_ml_partition.prune_rect_thresh[1] = -1; sf->rd_ml_partition.prune_rect_thresh[2] = -1; sf->rd_ml_partition.prune_rect_thresh[3] = -1; sf->rd_ml_partition.var_pruning = 0; sf->use_accurate_subpel_search = USE_8_TAPS; // Some speed-up features even for best quality as minimal impact on quality. sf->adaptive_rd_thresh = 1; sf->tx_size_search_breakout = 1; sf->tx_size_search_depth = 2; sf->exhaustive_searches_thresh = (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) ? (1 << 20) : INT_MAX; if (cpi->twopass.fr_content_type == FC_GRAPHICS_ANIMATION) { for (i = 0; i < MAX_MESH_STEP; ++i) { sf->mesh_patterns[i].range = best_quality_mesh_pattern[i].range; sf->mesh_patterns[i].interval = best_quality_mesh_pattern[i].interval; } } if (oxcf->mode == REALTIME) set_rt_speed_feature_framesize_independent(cpi, sf, speed, oxcf->content); #if !CONFIG_REALTIME_ONLY else if (oxcf->mode == GOOD) set_good_speed_feature_framesize_independent(cpi, cm, sf, speed); #endif cpi->diamond_search_sad = vp9_diamond_search_sad; // Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. if (oxcf->pass == 1) sf->optimize_coefficients = 0; // No recode for 1 pass. if (oxcf->pass == 0) { sf->recode_loop = DISALLOW_RECODE; sf->optimize_coefficients = 0; } if (sf->mv.subpel_force_stop == FULL_PEL) { // Whole pel only cpi->find_fractional_mv_step = vp9_skip_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_MORE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_more; } else if (sf->mv.subpel_search_method == SUBPEL_TREE_PRUNED_EVENMORE) { cpi->find_fractional_mv_step = vp9_find_best_sub_pixel_tree_pruned_evenmore; } // This is only used in motion vector unit test. if (cpi->oxcf.motion_vector_unit_test == 1) cpi->find_fractional_mv_step = vp9_return_max_sub_pixel_mv; else if (cpi->oxcf.motion_vector_unit_test == 2) cpi->find_fractional_mv_step = vp9_return_min_sub_pixel_mv; x->optimize = sf->optimize_coefficients == 1 && oxcf->pass != 1; x->min_partition_size = sf->default_min_partition_size; x->max_partition_size = sf->default_max_partition_size; if (!cpi->oxcf.frame_periodic_boost) { sf->max_delta_qindex = 0; } // With row based multi-threading, the following speed features // have to be disabled to guarantee that bitstreams encoded with single thread // and multiple threads match. // It can be used in realtime when adaptive_rd_thresh_row_mt is enabled since // adaptive_rd_thresh is defined per-row for non-rd pickmode. if (!sf->adaptive_rd_thresh_row_mt && cpi->row_mt_bit_exact && oxcf->max_threads > 1) sf->adaptive_rd_thresh = 0; } libvpx-1.8.2/vp9/encoder/vp9_speed_features.h000066400000000000000000000524661357355204000211510ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ #define VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ #include "vp9/common/vp9_enums.h" #ifdef __cplusplus extern "C" { #endif enum { INTRA_ALL = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED) | (1 << D45_PRED) | (1 << D135_PRED) | (1 << D117_PRED) | (1 << D153_PRED) | (1 << D207_PRED) | (1 << D63_PRED) | (1 << TM_PRED), INTRA_DC = (1 << DC_PRED), INTRA_DC_TM = (1 << DC_PRED) | (1 << TM_PRED), INTRA_DC_H_V = (1 << DC_PRED) | (1 << V_PRED) | (1 << H_PRED), INTRA_DC_TM_H_V = (1 << DC_PRED) | (1 << TM_PRED) | (1 << V_PRED) | (1 << H_PRED) }; enum { INTER_ALL = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST = (1 << NEARESTMV), INTER_NEAREST_NEW = (1 << NEARESTMV) | (1 << NEWMV), INTER_NEAREST_ZERO = (1 << NEARESTMV) | (1 << ZEROMV), INTER_NEAREST_NEW_ZERO = (1 << NEARESTMV) | (1 << ZEROMV) | (1 << NEWMV), INTER_NEAREST_NEAR_NEW = (1 << NEARESTMV) | (1 << NEARMV) | (1 << NEWMV), INTER_NEAREST_NEAR_ZERO = (1 << NEARESTMV) | (1 << NEARMV) | (1 << ZEROMV), }; enum { DISABLE_ALL_INTER_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | (1 << THR_ALTR) | (1 << THR_GOLD) | (1 << THR_LAST), DISABLE_ALL_SPLIT = (1 << THR_INTRA) | DISABLE_ALL_INTER_SPLIT, DISABLE_COMPOUND_SPLIT = (1 << THR_COMP_GA) | (1 << THR_COMP_LA), LAST_AND_INTRA_SPLIT_ONLY = (1 << THR_COMP_GA) | (1 << THR_COMP_LA) | (1 << THR_ALTR) | (1 << THR_GOLD) }; typedef enum { DIAMOND = 0, NSTEP = 1, HEX = 2, BIGDIA = 3, SQUARE = 4, FAST_HEX = 5, FAST_DIAMOND = 6, MESH = 7 } SEARCH_METHODS; typedef enum { // No recode. DISALLOW_RECODE = 0, // Allow recode for KF and exceeding maximum frame bandwidth. ALLOW_RECODE_KFMAXBW = 1, // Allow recode only for KF/ARF/GF frames. ALLOW_RECODE_KFARFGF = 2, // Allow recode for ARF/GF/KF and first normal frame in each group. ALLOW_RECODE_FIRST = 3, // Allow recode for all frames based on bitrate constraints. ALLOW_RECODE = 4, } RECODE_LOOP_TYPE; typedef enum { SUBPEL_TREE = 0, SUBPEL_TREE_PRUNED = 1, // Prunes 1/2-pel searches SUBPEL_TREE_PRUNED_MORE = 2, // Prunes 1/2-pel searches more aggressively SUBPEL_TREE_PRUNED_EVENMORE = 3, // Prunes 1/2- and 1/4-pel searches // Other methods to come } SUBPEL_SEARCH_METHODS; typedef enum { NO_MOTION_THRESHOLD = 0, LOW_MOTION_THRESHOLD = 7 } MOTION_THRESHOLD; typedef enum { USE_FULL_RD = 0, USE_LARGESTALL, USE_TX_8X8 } TX_SIZE_SEARCH_METHOD; typedef enum { NOT_IN_USE = 0, RELAXED_NEIGHBORING_MIN_MAX = 1, STRICT_NEIGHBORING_MIN_MAX = 2 } AUTO_MIN_MAX_MODE; typedef enum { // Try the full image with different values. LPF_PICK_FROM_FULL_IMAGE, // Try a small portion of the image with different values. LPF_PICK_FROM_SUBIMAGE, // Estimate the level based on quantizer and frame type LPF_PICK_FROM_Q, // Pick 0 to disable LPF if LPF was enabled last frame LPF_PICK_MINIMAL_LPF } LPF_PICK_METHOD; typedef enum { // Terminate search early based on distortion so far compared to // qp step, distortion in the neighborhood of the frame, etc. FLAG_EARLY_TERMINATE = 1 << 0, // Skips comp inter modes if the best so far is an intra mode. FLAG_SKIP_COMP_BESTINTRA = 1 << 1, // Skips oblique intra modes if the best so far is an inter mode. FLAG_SKIP_INTRA_BESTINTER = 1 << 3, // Skips oblique intra modes at angles 27, 63, 117, 153 if the best // intra so far is not one of the neighboring directions. FLAG_SKIP_INTRA_DIRMISMATCH = 1 << 4, // Skips intra modes other than DC_PRED if the source variance is small FLAG_SKIP_INTRA_LOWVAR = 1 << 5, } MODE_SEARCH_SKIP_LOGIC; typedef enum { FLAG_SKIP_EIGHTTAP = 1 << EIGHTTAP, FLAG_SKIP_EIGHTTAP_SMOOTH = 1 << EIGHTTAP_SMOOTH, FLAG_SKIP_EIGHTTAP_SHARP = 1 << EIGHTTAP_SHARP, } INTERP_FILTER_MASK; typedef enum { // Search partitions using RD/NONRD criterion. SEARCH_PARTITION, // Always use a fixed size partition. FIXED_PARTITION, REFERENCE_PARTITION, // Use an arbitrary partitioning scheme based on source variance within // a 64X64 SB. VAR_BASED_PARTITION, // Use non-fixed partitions based on source variance. SOURCE_VAR_BASED_PARTITION, // Make partition decisions with machine learning models. ML_BASED_PARTITION } PARTITION_SEARCH_TYPE; typedef enum { // Does a dry run to see if any of the contexts need to be updated or not, // before the final run. TWO_LOOP = 0, // No dry run, also only half the coef contexts and bands are updated. // The rest are not updated at all. ONE_LOOP_REDUCED = 1 } FAST_COEFF_UPDATE; typedef enum { EIGHTH_PEL, QUARTER_PEL, HALF_PEL, FULL_PEL } SUBPEL_FORCE_STOP; typedef struct ADAPT_SUBPEL_FORCE_STOP { // Threshold for full pixel motion vector; int mv_thresh; // subpel_force_stop if full pixel MV is below the threshold. SUBPEL_FORCE_STOP force_stop_below; // subpel_force_stop if full pixel MV is equal to or above the threshold. SUBPEL_FORCE_STOP force_stop_above; } ADAPT_SUBPEL_FORCE_STOP; typedef struct MV_SPEED_FEATURES { // Motion search method (Diamond, NSTEP, Hex, Big Diamond, Square, etc). SEARCH_METHODS search_method; // This parameter controls which step in the n-step process we start at. // It's changed adaptively based on circumstances. int reduce_first_step_size; // If this is set to 1, we limit the motion search range to 2 times the // largest motion vector found in the last frame. int auto_mv_step_size; // Subpel_search_method can only be subpel_tree which does a subpixel // logarithmic search that keeps stepping at 1/2 pixel units until // you stop getting a gain, and then goes on to 1/4 and repeats // the same process. Along the way it skips many diagonals. SUBPEL_SEARCH_METHODS subpel_search_method; // Subpel MV search level. Can take values 0 - 2. Higher values mean more // extensive subpel search. int subpel_search_level; // When to stop subpel motion search. SUBPEL_FORCE_STOP subpel_force_stop; // If it's enabled, different subpel_force_stop will be used for different MV. int enable_adaptive_subpel_force_stop; ADAPT_SUBPEL_FORCE_STOP adapt_subpel_force_stop; // This variable sets the step_param used in full pel motion search. int fullpel_search_step_param; } MV_SPEED_FEATURES; typedef struct PARTITION_SEARCH_BREAKOUT_THR { int64_t dist; int rate; } PARTITION_SEARCH_BREAKOUT_THR; #define MAX_MESH_STEP 4 typedef struct MESH_PATTERN { int range; int interval; } MESH_PATTERN; typedef enum { // No reaction to rate control on a detected slide/scene change. NO_DETECTION = 0, // Set to larger Q (max_q set by user) based only on the // detected slide/scene change and current/past Q. FAST_DETECTION_MAXQ = 1, // Based on (first pass) encoded frame, if large frame size is detected // then set to higher Q for the second re-encode. This involves 2 pass // encoding on slide change, so slower than 1, but more accurate for // detecting overshoot. RE_ENCODE_MAXQ = 2 } OVERSHOOT_DETECTION_CBR_RT; typedef enum { USE_2_TAPS = 0, USE_4_TAPS, USE_8_TAPS, USE_8_TAPS_SHARP, } SUBPEL_SEARCH_TYPE; typedef struct SPEED_FEATURES { MV_SPEED_FEATURES mv; // Frame level coding parameter update int frame_parameter_update; RECODE_LOOP_TYPE recode_loop; // Trellis (dynamic programming) optimization of quantized values (+1, 0). int optimize_coefficients; // Always set to 0. If on it enables 0 cost background transmission // (except for the initial transmission of the segmentation). The feature is // disabled because the addition of very large block sizes make the // backgrounds very to cheap to encode, and the segmentation we have // adds overhead. int static_segmentation; // If 1 we iterate finding a best reference for 2 ref frames together - via // a log search that iterates 4 times (check around mv for last for best // error of combined predictor then check around mv for alt). If 0 we // we just use the best motion vector found for each frame by itself. BLOCK_SIZE comp_inter_joint_search_thresh; // This variable is used to cap the maximum number of times we skip testing a // mode to be evaluated. A high value means we will be faster. // Turned off when (row_mt_bit_exact == 1 && adaptive_rd_thresh_row_mt == 0). int adaptive_rd_thresh; // Flag to use adaptive_rd_thresh when row-mt it enabled, only for non-rd // pickmode. int adaptive_rd_thresh_row_mt; // Enables skipping the reconstruction step (idct, recon) in the // intermediate steps assuming the last frame didn't have too many intra // blocks and the q is less than a threshold. int skip_encode_sb; int skip_encode_frame; // Speed feature to allow or disallow skipping of recode at block // level within a frame. int allow_skip_recode; // Coefficient probability model approximation step size int coeff_prob_appx_step; // Enable uniform quantizer followed by trellis coefficient optimization int allow_quant_coeff_opt; double quant_opt_thresh; // Enable asymptotic closed-loop encoding decision for key frame and // alternate reference frames. int allow_acl; // Temporal dependency model based encoding mode optimization int enable_tpl_model; // Use transform domain distortion. Use pixel domain distortion in speed 0 // and certain situations in higher speed to improve the RD model precision. int allow_txfm_domain_distortion; double tx_domain_thresh; // The threshold is to determine how slow the motino is, it is used when // use_lastframe_partitioning is set to LAST_FRAME_PARTITION_LOW_MOTION MOTION_THRESHOLD lf_motion_threshold; // Determine which method we use to determine transform size. We can choose // between options like full rd, largest for prediction size, largest // for intra and model coefs for the rest. TX_SIZE_SEARCH_METHOD tx_size_search_method; // How many levels of tx size to search, starting from the largest. int tx_size_search_depth; // Low precision 32x32 fdct keeps everything in 16 bits and thus is less // precise but significantly faster than the non lp version. int use_lp32x32fdct; // After looking at the first set of modes (set by index here), skip // checking modes for reference frames that don't match the reference frame // of the best so far. int mode_skip_start; // TODO(JBB): Remove this. int reference_masking; PARTITION_SEARCH_TYPE partition_search_type; // Used if partition_search_type = FIXED_SIZE_PARTITION BLOCK_SIZE always_this_block_size; // Skip rectangular partition test when partition type none gives better // rd than partition type split. int less_rectangular_check; // Disable testing non square partitions(eg 16x32) for block sizes larger than // use_square_only_thresh_high or smaller than use_square_only_thresh_low. int use_square_partition_only; BLOCK_SIZE use_square_only_thresh_high; BLOCK_SIZE use_square_only_thresh_low; // Prune reference frames for rectangular partitions. int prune_ref_frame_for_rect_partitions; // Sets min and max partition sizes for this 64x64 region based on the // same 64x64 in last encoded frame, and the left and above neighbor. AUTO_MIN_MAX_MODE auto_min_max_partition_size; // Ensures the rd based auto partition search will always // go down at least to the specified level. BLOCK_SIZE rd_auto_partition_min_limit; // Min and max partition size we enable (block_size) as per auto // min max, but also used by adjust partitioning, and pick_partitioning. BLOCK_SIZE default_min_partition_size; BLOCK_SIZE default_max_partition_size; // Whether or not we allow partitions one smaller or one greater than the last // frame's partitioning. Only used if use_lastframe_partitioning is set. int adjust_partitioning_from_last_frame; // How frequently we re do the partitioning from scratch. Only used if // use_lastframe_partitioning is set. int last_partitioning_redo_frequency; // Disables sub 8x8 blocksizes in different scenarios: Choices are to disable // it always, to allow it for only Last frame and Intra, disable it for all // inter modes or to enable it always. int disable_split_mask; // TODO(jingning): combine the related motion search speed features // This allows us to use motion search at other sizes as a starting // point for this motion search and limits the search range around it. int adaptive_motion_search; // Do extra full pixel motion search to obtain better motion vector. int enhanced_full_pixel_motion_search; // Threshold for allowing exhaistive motion search. int exhaustive_searches_thresh; // Pattern to be used for any exhaustive mesh searches. MESH_PATTERN mesh_patterns[MAX_MESH_STEP]; int schedule_mode_search; // Allows sub 8x8 modes to use the prediction filter that was determined // best for 8x8 mode. If set to 0 we always re check all the filters for // sizes less than 8x8, 1 means we check all filter modes if no 8x8 filter // was selected, and 2 means we use 8 tap if no 8x8 filter mode was selected. int adaptive_pred_interp_filter; // Adaptive prediction mode search int adaptive_mode_search; // Chessboard pattern prediction filter type search int cb_pred_filter_search; int cb_partition_search; int motion_field_mode_search; int alt_ref_search_fp; // Fast quantization process path int use_quant_fp; // Use finer quantizer in every other few frames that run variable block // partition type search. int force_frame_boost; // Maximally allowed base quantization index fluctuation. int max_delta_qindex; // Implements various heuristics to skip searching modes // The heuristics selected are based on flags // defined in the MODE_SEARCH_SKIP_HEURISTICS enum unsigned int mode_search_skip_flags; // A source variance threshold below which filter search is disabled // Choose a very large value (UINT_MAX) to use 8-tap always unsigned int disable_filter_search_var_thresh; // These bit masks allow you to enable or disable intra modes for each // transform size separately. int intra_y_mode_mask[TX_SIZES]; int intra_uv_mode_mask[TX_SIZES]; // These bit masks allow you to enable or disable intra modes for each // prediction block size separately. int intra_y_mode_bsize_mask[BLOCK_SIZES]; // This variable enables an early break out of mode testing if the model for // rd built from the prediction signal indicates a value that's much // higher than the best rd we've seen so far. int use_rd_breakout; // This enables us to use an estimate for intra rd based on dc mode rather // than choosing an actual uv mode in the stage of encoding before the actual // final encode. int use_uv_intra_rd_estimate; // This feature controls how the loop filter level is determined. LPF_PICK_METHOD lpf_pick; // This feature limits the number of coefficients updates we actually do // by only looking at counts from 1/2 the bands. FAST_COEFF_UPDATE use_fast_coef_updates; // This flag controls the use of non-RD mode decision. int use_nonrd_pick_mode; // A binary mask indicating if NEARESTMV, NEARMV, ZEROMV, NEWMV // modes are used in order from LSB to MSB for each BLOCK_SIZE. int inter_mode_mask[BLOCK_SIZES]; // This feature controls whether we do the expensive context update and // calculation in the rd coefficient costing loop. int use_fast_coef_costing; // This feature controls the tolerence vs target used in deciding whether to // recode a frame. It has no meaning if recode is disabled. int recode_tolerance_low; int recode_tolerance_high; // This variable controls the maximum block size where intra blocks can be // used in inter frames. // TODO(aconverse): Fold this into one of the other many mode skips BLOCK_SIZE max_intra_bsize; // The frequency that we check if SOURCE_VAR_BASED_PARTITION or // FIXED_PARTITION search type should be used. int search_type_check_frequency; // When partition is pre-set, the inter prediction result from pick_inter_mode // can be reused in final block encoding process. It is enabled only for real- // time mode speed 6. int reuse_inter_pred_sby; // This variable sets the encode_breakout threshold. Currently, it is only // enabled in real time mode. int encode_breakout_thresh; // default interp filter choice INTERP_FILTER default_interp_filter; // Early termination in transform size search, which only applies while // tx_size_search_method is USE_FULL_RD. int tx_size_search_breakout; // adaptive interp_filter search to allow skip of certain filter types. int adaptive_interp_filter_search; // mask for skip evaluation of certain interp_filter type. INTERP_FILTER_MASK interp_filter_search_mask; // Partition search early breakout thresholds. PARTITION_SEARCH_BREAKOUT_THR partition_search_breakout_thr; struct { // Use ML-based partition search early breakout. int search_breakout; // Higher values mean more aggressiveness for partition search breakout that // results in better encoding speed but worse compression performance. float search_breakout_thresh[3]; // Machine-learning based partition search early termination int search_early_termination; // Machine-learning based partition search pruning using prediction residue // variance. int var_pruning; // Threshold values used for ML based rectangular partition search pruning. // If < 0, the feature is turned off. // Higher values mean more aggressiveness to skip rectangular partition // search that results in better encoding speed but worse coding // performance. int prune_rect_thresh[4]; } rd_ml_partition; // Allow skipping partition search for still image frame int allow_partition_search_skip; // Fast approximation of vp9_model_rd_from_var_lapndz int simple_model_rd_from_var; // Skip a number of expensive mode evaluations for blocks with zero source // variance. int short_circuit_flat_blocks; // Skip a number of expensive mode evaluations for blocks with very low // temporal variance. If the low temporal variance flag is set for a block, // do the following: // 1: Skip all golden modes and ALL INTRA for bsize >= 32x32. // 2: Skip golden non-zeromv and newmv-last for bsize >= 16x16, skip ALL // INTRA for bsize >= 32x32 and vert/horz INTRA for bsize 16x16, 16x32 and // 32x16. // 3: Same as (2), but also skip golden zeromv. int short_circuit_low_temp_var; // Limits the rd-threshold update for early exit for the newmv-last mode, // for non-rd mode. int limit_newmv_early_exit; // Adds a bias against golden reference, for non-rd mode. int bias_golden; // Bias to use base mv and skip 1/4 subpel search when use base mv in // enhancement layer. int base_mv_aggressive; // Global flag to enable partition copy from the previous frame. int copy_partition_flag; // Compute the source sad for every superblock of the frame, // prior to encoding the frame, to be used to bypass some encoder decisions. int use_source_sad; int use_simple_block_yrd; // If source sad of superblock is high (> adapt_partition_thresh), will switch // from VARIANCE_PARTITION to REFERENCE_PARTITION (which selects partition // based on the nonrd-pickmode). int adapt_partition_source_sad; int adapt_partition_thresh; // Enable use of alt-refs in 1 pass VBR. int use_altref_onepass; // Enable use of compound prediction, for nonrd_pickmode with nonzero lag. int use_compound_nonrd_pickmode; // Always use nonrd_pick_intra for all block sizes on keyframes. int nonrd_keyframe; // For SVC: enables use of partition from lower spatial resolution. int svc_use_lowres_part; // Flag to indicate process for handling overshoot on slide/scene change, // for real-time CBR mode. OVERSHOOT_DETECTION_CBR_RT overshoot_detection_cbr_rt; // Disable partitioning of 16x16 blocks. int disable_16x16part_nonkey; // Allow for disabling golden reference. int disable_golden_ref; // Allow sub-pixel search to use interpolation filters with different taps in // order to achieve accurate motion search result. SUBPEL_SEARCH_TYPE use_accurate_subpel_search; // Search method used by temporal filtering in full_pixel_motion_search. SEARCH_METHODS temporal_filter_search_method; // Use machine learning based partition search. int nonrd_use_ml_partition; // Multiplier for base thresold for variance partitioning. int variance_part_thresh_mult; // Force subpel motion filter to always use SMOOTH_FILTER. int force_smooth_interpol; // For real-time mode: force DC only under intra search when content // does not have high souce SAD. int rt_intra_dc_only_low_content; } SPEED_FEATURES; struct VP9_COMP; void vp9_set_speed_features_framesize_independent(struct VP9_COMP *cpi, int speed); void vp9_set_speed_features_framesize_dependent(struct VP9_COMP *cpi, int speed); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_SPEED_FEATURES_H_ libvpx-1.8.2/vp9/encoder/vp9_subexp.c000066400000000000000000000165231357355204000174460ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/bitwriter.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_entropy.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_subexp.h" static const uint8_t update_bits[255] = { 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 11, 0, }; #define MIN_DELP_BITS 5 static int recenter_nonneg(int v, int m) { if (v > (m << 1)) return v; else if (v >= m) return ((v - m) << 1); else return ((m - v) << 1) - 1; } static int remap_prob(int v, int m) { int i; static const uint8_t map_table[MAX_PROB - 1] = { // generated by: // map_table[j] = split_index(j, MAX_PROB - 1, MODULUS_PARAM); 20, 21, 22, 23, 24, 25, 0, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 1, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 2, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 3, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 4, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 5, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 6, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 7, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 8, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 9, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 10, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 11, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 12, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 13, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 14, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 15, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 16, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 17, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 18, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 19, }; v--; m--; if ((m << 1) <= MAX_PROB) i = recenter_nonneg(v, m) - 1; else i = recenter_nonneg(MAX_PROB - 1 - v, MAX_PROB - 1 - m) - 1; assert(i >= 0 && (size_t)i < sizeof(map_table)); i = map_table[i]; return i; } static int prob_diff_update_cost(vpx_prob newp, vpx_prob oldp) { int delp = remap_prob(newp, oldp); return update_bits[delp] << VP9_PROB_COST_SHIFT; } static void encode_uniform(vpx_writer *w, int v) { const int l = 8; const int m = (1 << l) - 191; if (v < m) { vpx_write_literal(w, v, l - 1); } else { vpx_write_literal(w, m + ((v - m) >> 1), l - 1); vpx_write_literal(w, (v - m) & 1, 1); } } static INLINE int write_bit_gte(vpx_writer *w, int word, int test) { vpx_write_literal(w, word >= test, 1); return word >= test; } static void encode_term_subexp(vpx_writer *w, int word) { if (!write_bit_gte(w, word, 16)) { vpx_write_literal(w, word, 4); } else if (!write_bit_gte(w, word, 32)) { vpx_write_literal(w, word - 16, 4); } else if (!write_bit_gte(w, word, 64)) { vpx_write_literal(w, word - 32, 5); } else { encode_uniform(w, word - 64); } } void vp9_write_prob_diff_update(vpx_writer *w, vpx_prob newp, vpx_prob oldp) { const int delp = remap_prob(newp, oldp); encode_term_subexp(w, delp); } int vp9_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp, vpx_prob *bestp, vpx_prob upd) { const int old_b = cost_branch256(ct, oldp); int bestsavings = 0; vpx_prob newp, bestnewp = oldp; const int step = *bestp > oldp ? -1 : 1; const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd); if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) { for (newp = *bestp; newp != oldp; newp += step) { const int new_b = cost_branch256(ct, newp); const int update_b = prob_diff_update_cost(newp, oldp) + upd_cost; const int savings = old_b - new_b - update_b; if (savings > bestsavings) { bestsavings = savings; bestnewp = newp; } } } *bestp = bestnewp; return bestsavings; } int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, const vpx_prob oldp, vpx_prob *bestp, vpx_prob upd, int stepsize) { int i, old_b, new_b, update_b, savings, bestsavings; int newp; const int step_sign = *bestp > oldp ? -1 : 1; const int step = stepsize * step_sign; const int upd_cost = vp9_cost_one(upd) - vp9_cost_zero(upd); const vpx_prob *newplist, *oldplist; vpx_prob bestnewp; oldplist = vp9_pareto8_full[oldp - 1]; old_b = cost_branch256(ct + 2 * PIVOT_NODE, oldp); for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) old_b += cost_branch256(ct + 2 * i, oldplist[i - UNCONSTRAINED_NODES]); bestsavings = 0; bestnewp = oldp; assert(stepsize > 0); if (old_b > upd_cost + (MIN_DELP_BITS << VP9_PROB_COST_SHIFT)) { for (newp = *bestp; (newp - oldp) * step_sign < 0; newp += step) { if (newp < 1 || newp > 255) continue; newplist = vp9_pareto8_full[newp - 1]; new_b = cost_branch256(ct + 2 * PIVOT_NODE, newp); for (i = UNCONSTRAINED_NODES; i < ENTROPY_NODES; ++i) new_b += cost_branch256(ct + 2 * i, newplist[i - UNCONSTRAINED_NODES]); update_b = prob_diff_update_cost(newp, oldp) + upd_cost; savings = old_b - new_b - update_b; if (savings > bestsavings) { bestsavings = savings; bestnewp = newp; } } } *bestp = bestnewp; return bestsavings; } void vp9_cond_prob_diff_update(vpx_writer *w, vpx_prob *oldp, const unsigned int ct[2]) { const vpx_prob upd = DIFF_UPDATE_PROB; vpx_prob newp = get_binary_prob(ct[0], ct[1]); const int savings = vp9_prob_diff_update_savings_search(ct, *oldp, &newp, upd); assert(newp >= 1); if (savings > 0) { vpx_write(w, 1, upd); vp9_write_prob_diff_update(w, newp, *oldp); *oldp = newp; } else { vpx_write(w, 0, upd); } } libvpx-1.8.2/vp9/encoder/vp9_subexp.h000066400000000000000000000024671357355204000174550ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_SUBEXP_H_ #define VPX_VP9_ENCODER_VP9_SUBEXP_H_ #ifdef __cplusplus extern "C" { #endif #include "vpx_dsp/prob.h" struct vpx_writer; void vp9_write_prob_diff_update(struct vpx_writer *w, vpx_prob newp, vpx_prob oldp); void vp9_cond_prob_diff_update(struct vpx_writer *w, vpx_prob *oldp, const unsigned int ct[2]); int vp9_prob_diff_update_savings_search(const unsigned int *ct, vpx_prob oldp, vpx_prob *bestp, vpx_prob upd); int vp9_prob_diff_update_savings_search_model(const unsigned int *ct, const vpx_prob oldp, vpx_prob *bestp, vpx_prob upd, int stepsize); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_SUBEXP_H_ libvpx-1.8.2/vp9/encoder/vp9_svc_layercontext.c000066400000000000000000001527211357355204000215350ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vp9/encoder/vp9_aq_cyclicrefresh.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_svc_layercontext.h" #include "vp9/encoder/vp9_extend.h" #include "vpx_dsp/vpx_dsp_common.h" #define SMALL_FRAME_WIDTH 32 #define SMALL_FRAME_HEIGHT 16 static void swap_ptr(void *a, void *b) { void **a_p = (void **)a; void **b_p = (void **)b; void *c = *a_p; *a_p = *b_p; *b_p = c; } void vp9_init_layer_context(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; int mi_rows = cpi->common.mi_rows; int mi_cols = cpi->common.mi_cols; int sl, tl, i; int alt_ref_idx = svc->number_spatial_layers; svc->spatial_layer_id = 0; svc->temporal_layer_id = 0; svc->force_zero_mode_spatial_ref = 0; svc->use_base_mv = 0; svc->use_partition_reuse = 0; svc->use_gf_temporal_ref = 1; svc->use_gf_temporal_ref_current_layer = 0; svc->scaled_temp_is_alloc = 0; svc->scaled_one_half = 0; svc->current_superframe = 0; svc->non_reference_frame = 0; svc->skip_enhancement_layer = 0; svc->disable_inter_layer_pred = INTER_LAYER_PRED_ON; svc->framedrop_mode = CONSTRAINED_LAYER_DROP; svc->set_intra_only_frame = 0; svc->previous_frame_is_intra_only = 0; svc->superframe_has_layer_sync = 0; svc->use_set_ref_frame_config = 0; svc->num_encoded_top_layer = 0; svc->simulcast_mode = 0; for (i = 0; i < REF_FRAMES; ++i) { svc->fb_idx_spatial_layer_id[i] = 0xff; svc->fb_idx_temporal_layer_id[i] = 0xff; svc->fb_idx_base[i] = 0; } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { svc->last_layer_dropped[sl] = 0; svc->drop_spatial_layer[sl] = 0; svc->ext_frame_flags[sl] = 0; svc->lst_fb_idx[sl] = 0; svc->gld_fb_idx[sl] = 1; svc->alt_fb_idx[sl] = 2; svc->downsample_filter_type[sl] = BILINEAR; svc->downsample_filter_phase[sl] = 8; // Set to 8 for averaging filter. svc->framedrop_thresh[sl] = oxcf->drop_frames_water_mark; svc->fb_idx_upd_tl0[sl] = -1; svc->drop_count[sl] = 0; svc->spatial_layer_sync[sl] = 0; svc->force_drop_constrained_from_above[sl] = 0; } svc->max_consec_drop = INT_MAX; svc->buffer_gf_temporal_ref[1].idx = 7; svc->buffer_gf_temporal_ref[0].idx = 6; svc->buffer_gf_temporal_ref[1].is_used = 0; svc->buffer_gf_temporal_ref[0].is_used = 0; if (cpi->oxcf.error_resilient_mode == 0 && cpi->oxcf.pass == 2) { if (vpx_realloc_frame_buffer(&cpi->svc.empty_frame.img, SMALL_FRAME_WIDTH, SMALL_FRAME_HEIGHT, cpi->common.subsampling_x, cpi->common.subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cpi->common.use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cpi->common.byte_alignment, NULL, NULL, NULL)) vpx_internal_error(&cpi->common.error, VPX_CODEC_MEM_ERROR, "Failed to allocate empty frame for multiple frame " "contexts"); memset(cpi->svc.empty_frame.img.buffer_alloc, 0x80, cpi->svc.empty_frame.img.buffer_alloc_sz); } for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; int i; lc->current_video_frame_in_layer = 0; lc->layer_size = 0; lc->frames_from_key_frame = 0; lc->last_frame_type = FRAME_TYPES; lrc->ni_av_qi = oxcf->worst_allowed_q; lrc->total_actual_bits = 0; lrc->total_target_vs_actual = 0; lrc->ni_tot_qi = 0; lrc->tot_q = 0.0; lrc->avg_q = 0.0; lrc->ni_frames = 0; lrc->decimation_count = 0; lrc->decimation_factor = 0; lrc->worst_quality = oxcf->worst_allowed_q; lrc->best_quality = oxcf->best_allowed_q; for (i = 0; i < RATE_FACTOR_LEVELS; ++i) { lrc->rate_correction_factors[i] = 1.0; } if (cpi->oxcf.rc_mode == VPX_CBR) { lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; lrc->last_q[INTER_FRAME] = oxcf->worst_allowed_q; lrc->avg_frame_qindex[INTER_FRAME] = oxcf->worst_allowed_q; lrc->avg_frame_qindex[KEY_FRAME] = oxcf->worst_allowed_q; } else { lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; lrc->last_q[KEY_FRAME] = oxcf->best_allowed_q; lrc->last_q[INTER_FRAME] = oxcf->best_allowed_q; lrc->avg_frame_qindex[KEY_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; lrc->avg_frame_qindex[INTER_FRAME] = (oxcf->worst_allowed_q + oxcf->best_allowed_q) / 2; if (oxcf->ss_enable_auto_arf[sl]) lc->alt_ref_idx = alt_ref_idx++; else lc->alt_ref_idx = INVALID_IDX; lc->gold_ref_idx = INVALID_IDX; } lrc->buffer_level = oxcf->starting_buffer_level_ms * lc->target_bandwidth / 1000; lrc->bits_off_target = lrc->buffer_level; // Initialize the cyclic refresh parameters. If spatial layers are used // (i.e., ss_number_layers > 1), these need to be updated per spatial // layer. // Cyclic refresh is only applied on base temporal layer. if (oxcf->ss_number_layers > 1 && tl == 0) { size_t last_coded_q_map_size; size_t consec_zero_mv_size; VP9_COMMON *const cm = &cpi->common; lc->sb_index = 0; lc->actual_num_seg1_blocks = 0; lc->actual_num_seg2_blocks = 0; lc->counter_encode_maxq_scene_change = 0; CHECK_MEM_ERROR(cm, lc->map, vpx_malloc(mi_rows * mi_cols * sizeof(*lc->map))); memset(lc->map, 0, mi_rows * mi_cols); last_coded_q_map_size = mi_rows * mi_cols * sizeof(*lc->last_coded_q_map); CHECK_MEM_ERROR(cm, lc->last_coded_q_map, vpx_malloc(last_coded_q_map_size)); assert(MAXQ <= 255); memset(lc->last_coded_q_map, MAXQ, last_coded_q_map_size); consec_zero_mv_size = mi_rows * mi_cols * sizeof(*lc->consec_zero_mv); CHECK_MEM_ERROR(cm, lc->consec_zero_mv, vpx_malloc(consec_zero_mv_size)); memset(lc->consec_zero_mv, 0, consec_zero_mv_size); } } } // Still have extra buffer for base layer golden frame if (!(svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) && alt_ref_idx < REF_FRAMES) svc->layer_context[0].gold_ref_idx = alt_ref_idx; } // Update the layer context from a change_config() call. void vp9_update_layer_context_change_config(VP9_COMP *const cpi, const int target_bandwidth) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; const RATE_CONTROL *const rc = &cpi->rc; int sl, tl, layer = 0, spatial_layer_target; float bitrate_alloc = 1.0; cpi->svc.temporal_layering_mode = oxcf->temporal_layering_mode; if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); svc->layer_context[layer].target_bandwidth = oxcf->layer_target_bitrate[layer]; } layer = LAYER_IDS_TO_IDX( sl, ((oxcf->ts_number_layers - 1) < 0 ? 0 : (oxcf->ts_number_layers - 1)), oxcf->ts_number_layers); spatial_layer_target = svc->layer_context[layer].target_bandwidth = oxcf->layer_target_bitrate[layer]; for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { LAYER_CONTEXT *const lc = &svc->layer_context[sl * oxcf->ts_number_layers + tl]; RATE_CONTROL *const lrc = &lc->rc; lc->spatial_layer_target_bandwidth = spatial_layer_target; bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; lrc->starting_buffer_level = (int64_t)(rc->starting_buffer_level * bitrate_alloc); lrc->optimal_buffer_level = (int64_t)(rc->optimal_buffer_level * bitrate_alloc); lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; lrc->worst_quality = rc->worst_quality; lrc->best_quality = rc->best_quality; } } } else { int layer_end; if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { layer_end = svc->number_temporal_layers; } else { layer_end = svc->number_spatial_layers; } for (layer = 0; layer < layer_end; ++layer) { LAYER_CONTEXT *const lc = &svc->layer_context[layer]; RATE_CONTROL *const lrc = &lc->rc; lc->target_bandwidth = oxcf->layer_target_bitrate[layer]; bitrate_alloc = (float)lc->target_bandwidth / target_bandwidth; // Update buffer-related quantities. lrc->starting_buffer_level = (int64_t)(rc->starting_buffer_level * bitrate_alloc); lrc->optimal_buffer_level = (int64_t)(rc->optimal_buffer_level * bitrate_alloc); lrc->maximum_buffer_size = (int64_t)(rc->maximum_buffer_size * bitrate_alloc); lrc->bits_off_target = VPXMIN(lrc->bits_off_target, lrc->maximum_buffer_size); lrc->buffer_level = VPXMIN(lrc->buffer_level, lrc->maximum_buffer_size); // Update framerate-related quantities. if (svc->number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) { lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[layer]; } else { lc->framerate = cpi->framerate; } lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = rc->max_frame_bandwidth; // Update qp-related quantities. lrc->worst_quality = rc->worst_quality; lrc->best_quality = rc->best_quality; } } } static LAYER_CONTEXT *get_layer_context(VP9_COMP *const cpi) { if (is_one_pass_cbr_svc(cpi)) return &cpi->svc.layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id]; else return (cpi->svc.number_temporal_layers > 1 && cpi->oxcf.rc_mode == VPX_CBR) ? &cpi->svc.layer_context[cpi->svc.temporal_layer_id] : &cpi->svc.layer_context[cpi->svc.spatial_layer_id]; } void vp9_update_temporal_layer_framerate(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; // Index into spatial+temporal arrays. const int st_idx = svc->spatial_layer_id * svc->number_temporal_layers + svc->temporal_layer_id; const int tl = svc->temporal_layer_id; lc->framerate = cpi->framerate / oxcf->ts_rate_decimator[tl]; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->max_frame_bandwidth = cpi->rc.max_frame_bandwidth; // Update the average layer frame size (non-cumulative per-frame-bw). if (tl == 0) { lc->avg_frame_size = lrc->avg_frame_bandwidth; } else { const double prev_layer_framerate = cpi->framerate / oxcf->ts_rate_decimator[tl - 1]; const int prev_layer_target_bandwidth = oxcf->layer_target_bitrate[st_idx - 1]; lc->avg_frame_size = (int)((lc->target_bandwidth - prev_layer_target_bandwidth) / (lc->framerate - prev_layer_framerate)); } } void vp9_update_spatial_layer_framerate(VP9_COMP *const cpi, double framerate) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(cpi); RATE_CONTROL *const lrc = &lc->rc; lc->framerate = framerate; lrc->avg_frame_bandwidth = (int)(lc->target_bandwidth / lc->framerate); lrc->min_frame_bandwidth = (int)(lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmin_section / 100); lrc->max_frame_bandwidth = (int)(((int64_t)lrc->avg_frame_bandwidth * oxcf->two_pass_vbrmax_section) / 100); vp9_rc_set_gf_interval_range(cpi, lrc); } void vp9_restore_layer_context(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = get_layer_context(cpi); const int old_frame_since_key = cpi->rc.frames_since_key; const int old_frame_to_key = cpi->rc.frames_to_key; const int old_ext_use_post_encode_drop = cpi->rc.ext_use_post_encode_drop; cpi->rc = lc->rc; cpi->twopass = lc->twopass; cpi->oxcf.target_bandwidth = lc->target_bandwidth; cpi->alt_ref_source = lc->alt_ref_source; // Check if it is one_pass_cbr_svc mode and lc->speed > 0 (real-time mode // does not use speed = 0). if (is_one_pass_cbr_svc(cpi) && lc->speed > 0) { cpi->oxcf.speed = lc->speed; } // Reset the frames_since_key and frames_to_key counters to their values // before the layer restore. Keep these defined for the stream (not layer). if (cpi->svc.number_temporal_layers > 1 || cpi->svc.number_spatial_layers > 1) { cpi->rc.frames_since_key = old_frame_since_key; cpi->rc.frames_to_key = old_frame_to_key; } cpi->rc.ext_use_post_encode_drop = old_ext_use_post_encode_drop; // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, // for the base temporal layer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; swap_ptr(&cr->map, &lc->map); swap_ptr(&cr->last_coded_q_map, &lc->last_coded_q_map); swap_ptr(&cpi->consec_zero_mv, &lc->consec_zero_mv); cr->sb_index = lc->sb_index; cr->actual_num_seg1_blocks = lc->actual_num_seg1_blocks; cr->actual_num_seg2_blocks = lc->actual_num_seg2_blocks; cr->counter_encode_maxq_scene_change = lc->counter_encode_maxq_scene_change; } } void vp9_save_layer_context(VP9_COMP *const cpi) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; LAYER_CONTEXT *const lc = get_layer_context(cpi); lc->rc = cpi->rc; lc->twopass = cpi->twopass; lc->target_bandwidth = (int)oxcf->target_bandwidth; lc->alt_ref_source = cpi->alt_ref_source; // For spatial-svc, allow cyclic-refresh to be applied on the spatial layers, // for the base temporal layer. if (cpi->oxcf.aq_mode == CYCLIC_REFRESH_AQ && cpi->svc.number_spatial_layers > 1 && cpi->svc.temporal_layer_id == 0) { CYCLIC_REFRESH *const cr = cpi->cyclic_refresh; signed char *temp = lc->map; uint8_t *temp2 = lc->last_coded_q_map; uint8_t *temp3 = lc->consec_zero_mv; lc->map = cr->map; cr->map = temp; lc->last_coded_q_map = cr->last_coded_q_map; cr->last_coded_q_map = temp2; lc->consec_zero_mv = cpi->consec_zero_mv; cpi->consec_zero_mv = temp3; lc->sb_index = cr->sb_index; lc->actual_num_seg1_blocks = cr->actual_num_seg1_blocks; lc->actual_num_seg2_blocks = cr->actual_num_seg2_blocks; lc->counter_encode_maxq_scene_change = cr->counter_encode_maxq_scene_change; } } #if !CONFIG_REALTIME_ONLY void vp9_init_second_pass_spatial_svc(VP9_COMP *cpi) { SVC *const svc = &cpi->svc; int i; for (i = 0; i < svc->number_spatial_layers; ++i) { TWO_PASS *const twopass = &svc->layer_context[i].twopass; svc->spatial_layer_id = i; vp9_init_second_pass(cpi); twopass->total_stats.spatial_layer_id = i; twopass->total_left_stats.spatial_layer_id = i; } svc->spatial_layer_id = 0; } #endif // !CONFIG_REALTIME_ONLY void vp9_inc_frame_in_layer(VP9_COMP *const cpi) { LAYER_CONTEXT *const lc = &cpi->svc.layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers]; ++lc->current_video_frame_in_layer; ++lc->frames_from_key_frame; if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) ++cpi->svc.current_superframe; } void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out) { int w, h; if (width_out == NULL || height_out == NULL || den == 0) return; w = width_org * num / den; h = height_org * num / den; // make height and width even to make chrome player happy w += w % 2; h += h % 2; *width_out = w; *height_out = h; } static void reset_fb_idx_unused(VP9_COMP *const cpi) { // If a reference frame is not referenced or refreshed, then set the // fb_idx for that reference to the first one used/referenced. // This is to avoid setting fb_idx for a reference to a slot that is not // used/needed (i.e., since that reference is not referenced or refreshed). static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; MV_REFERENCE_FRAME ref_frame; MV_REFERENCE_FRAME first_ref = 0; int first_fb_idx = 0; int fb_idx[3] = { cpi->lst_fb_idx, cpi->gld_fb_idx, cpi->alt_fb_idx }; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ref_frame++) { if (cpi->ref_frame_flags & flag_list[ref_frame]) { first_ref = ref_frame; first_fb_idx = fb_idx[ref_frame - 1]; break; } } if (first_ref > 0) { if (first_ref != LAST_FRAME && !(cpi->ref_frame_flags & flag_list[LAST_FRAME]) && !cpi->ext_refresh_last_frame) cpi->lst_fb_idx = first_fb_idx; else if (first_ref != GOLDEN_FRAME && !(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]) && !cpi->ext_refresh_golden_frame) cpi->gld_fb_idx = first_fb_idx; else if (first_ref != ALTREF_FRAME && !(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]) && !cpi->ext_refresh_alt_ref_frame) cpi->alt_fb_idx = first_fb_idx; } } // Never refresh any reference frame buffers on top temporal layers in // simulcast mode, which has interlayer prediction disabled. static void non_reference_frame_simulcast(VP9_COMP *const cpi) { if (cpi->svc.temporal_layer_id == cpi->svc.number_temporal_layers - 1 && cpi->svc.temporal_layer_id > 0) { cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 0; cpi->ext_refresh_alt_ref_frame = 0; } } // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 3 - that does 0-2-1-2 temporal layering // scheme. static void set_flags_and_fb_idx_for_temporal_mode3(VP9_COMP *const cpi) { int frame_num_within_temporal_struct = 0; int spatial_id, temporal_id; spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; frame_num_within_temporal_struct = cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers] .current_video_frame_in_layer % 4; temporal_id = cpi->svc.temporal_layer_id = (frame_num_within_temporal_struct & 1) ? 2 : (frame_num_within_temporal_struct >> 1); cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0; if (!temporal_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_last_frame = 1; if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. cpi->ref_frame_flags = VP9_LAST_FLAG; cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else if (temporal_id == 1) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else { if (frame_num_within_temporal_struct == 1) { // the first tl2 picture if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer cpi->ext_refresh_frame_flags_pending = 1; if (!spatial_id) cpi->ref_frame_flags = VP9_LAST_FLAG; else cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (spatial_id < cpi->svc.number_spatial_layers - 1) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else { // The second tl2 picture if (spatial_id == cpi->svc.number_spatial_layers - 1) { // top layer cpi->ext_refresh_frame_flags_pending = 1; if (!spatial_id) cpi->ref_frame_flags = VP9_LAST_FLAG; else cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } else if (!spatial_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ref_frame_flags = VP9_LAST_FLAG; cpi->ext_refresh_alt_ref_frame = 1; } else { // top layer cpi->ext_refresh_frame_flags_pending = 1; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; cpi->ext_refresh_alt_ref_frame = 1; } } } if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; if (spatial_id) { if (cpi->svc.layer_context[temporal_id].is_key_frame) { cpi->lst_fb_idx = spatial_id - 1; cpi->gld_fb_idx = spatial_id; } else { cpi->gld_fb_idx = spatial_id - 1; } } else { cpi->gld_fb_idx = 0; } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } else if (frame_num_within_temporal_struct == 1) { cpi->lst_fb_idx = spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } else { cpi->lst_fb_idx = cpi->svc.number_spatial_layers + spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 2 - that does 0-1-0-1 temporal layering // scheme. static void set_flags_and_fb_idx_for_temporal_mode2(VP9_COMP *const cpi) { int spatial_id, temporal_id; spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; temporal_id = cpi->svc.temporal_layer_id = cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers] .current_video_frame_in_layer & 1; cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0; if (!temporal_id) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_last_frame = 1; if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[temporal_id].is_key_frame) { // base layer is a key frame. cpi->ref_frame_flags = VP9_LAST_FLAG; cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } else if (temporal_id == 1) { cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_alt_ref_frame = 1; if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else { if (spatial_id == cpi->svc.number_spatial_layers - 1) cpi->ext_refresh_alt_ref_frame = 0; cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } } if (temporal_id == 0) { cpi->lst_fb_idx = spatial_id; if (spatial_id) { if (cpi->svc.layer_context[temporal_id].is_key_frame) { cpi->lst_fb_idx = spatial_id - 1; cpi->gld_fb_idx = spatial_id; } else { cpi->gld_fb_idx = spatial_id - 1; } } else { cpi->gld_fb_idx = 0; } cpi->alt_fb_idx = 0; } else if (temporal_id == 1) { cpi->lst_fb_idx = spatial_id; cpi->gld_fb_idx = cpi->svc.number_spatial_layers + spatial_id - 1; cpi->alt_fb_idx = cpi->svc.number_spatial_layers + spatial_id; } if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); reset_fb_idx_unused(cpi); } // The function sets proper ref_frame_flags, buffer indices, and buffer update // variables for temporal layering mode 0 - that has no temporal layering. static void set_flags_and_fb_idx_for_temporal_mode_noLayering( VP9_COMP *const cpi) { int spatial_id; spatial_id = cpi->svc.spatial_layer_id = cpi->svc.spatial_layer_to_encode; cpi->ext_refresh_last_frame = cpi->ext_refresh_golden_frame = cpi->ext_refresh_alt_ref_frame = 0; cpi->ext_refresh_frame_flags_pending = 1; cpi->ext_refresh_last_frame = 1; if (!spatial_id) { cpi->ref_frame_flags = VP9_LAST_FLAG; } else if (cpi->svc.layer_context[0].is_key_frame) { cpi->ref_frame_flags = VP9_LAST_FLAG; cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 1; } else { cpi->ref_frame_flags = VP9_LAST_FLAG | VP9_GOLD_FLAG; } cpi->lst_fb_idx = spatial_id; if (spatial_id) { if (cpi->svc.layer_context[0].is_key_frame) { cpi->lst_fb_idx = spatial_id - 1; cpi->gld_fb_idx = spatial_id; } else { cpi->gld_fb_idx = spatial_id - 1; } } else { cpi->gld_fb_idx = 0; } if (cpi->svc.simulcast_mode) non_reference_frame_simulcast(cpi); reset_fb_idx_unused(cpi); } static void set_flags_and_fb_idx_bypass_via_set_ref_frame_config( VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; int sl = svc->spatial_layer_id = svc->spatial_layer_to_encode; cpi->svc.temporal_layer_id = cpi->svc.temporal_layer_id_per_spatial[sl]; cpi->ext_refresh_frame_flags_pending = 1; cpi->lst_fb_idx = svc->lst_fb_idx[sl]; cpi->gld_fb_idx = svc->gld_fb_idx[sl]; cpi->alt_fb_idx = svc->alt_fb_idx[sl]; cpi->ext_refresh_last_frame = 0; cpi->ext_refresh_golden_frame = 0; cpi->ext_refresh_alt_ref_frame = 0; cpi->ref_frame_flags = 0; if (svc->reference_last[sl]) cpi->ref_frame_flags |= VP9_LAST_FLAG; if (svc->reference_golden[sl]) cpi->ref_frame_flags |= VP9_GOLD_FLAG; if (svc->reference_altref[sl]) cpi->ref_frame_flags |= VP9_ALT_FLAG; } void vp9_copy_flags_ref_update_idx(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; int sl = svc->spatial_layer_id; svc->lst_fb_idx[sl] = cpi->lst_fb_idx; svc->gld_fb_idx[sl] = cpi->gld_fb_idx; svc->alt_fb_idx[sl] = cpi->alt_fb_idx; // For the fixed SVC mode: pass the refresh_lst/gld/alt_frame flags to the // update_buffer_slot, this is needed for the GET_SVC_REF_FRAME_CONFIG api. if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { int ref; for (ref = 0; ref < REF_FRAMES; ++ref) { svc->update_buffer_slot[sl] &= ~(1 << ref); if ((ref == svc->lst_fb_idx[sl] && cpi->refresh_last_frame) || (ref == svc->gld_fb_idx[sl] && cpi->refresh_golden_frame) || (ref == svc->alt_fb_idx[sl] && cpi->refresh_alt_ref_frame)) svc->update_buffer_slot[sl] |= (1 << ref); } } // TODO(jianj): Remove these 3, deprecated. svc->update_last[sl] = (uint8_t)cpi->refresh_last_frame; svc->update_golden[sl] = (uint8_t)cpi->refresh_golden_frame; svc->update_altref[sl] = (uint8_t)cpi->refresh_alt_ref_frame; svc->reference_last[sl] = (uint8_t)(cpi->ref_frame_flags & flag_list[LAST_FRAME]); svc->reference_golden[sl] = (uint8_t)(cpi->ref_frame_flags & flag_list[GOLDEN_FRAME]); svc->reference_altref[sl] = (uint8_t)(cpi->ref_frame_flags & flag_list[ALTREF_FRAME]); } int vp9_one_pass_cbr_svc_start_layer(VP9_COMP *const cpi) { int width = 0, height = 0; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; svc->skip_enhancement_layer = 0; if (svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF && svc->number_spatial_layers > 1 && svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 && !(svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config)) svc->simulcast_mode = 1; else svc->simulcast_mode = 0; if (svc->number_spatial_layers > 1) { svc->use_base_mv = 1; svc->use_partition_reuse = 1; } svc->force_zero_mode_spatial_ref = 1; svc->mi_stride[svc->spatial_layer_id] = cpi->common.mi_stride; svc->mi_rows[svc->spatial_layer_id] = cpi->common.mi_rows; svc->mi_cols[svc->spatial_layer_id] = cpi->common.mi_cols; // For constrained_from_above drop mode: before encoding superframe (i.e., // at SL0 frame) check all spatial layers (starting from top) for possible // drop, and if so, set a flag to force drop of that layer and all its lower // layers. if (svc->spatial_layer_to_encode == svc->first_spatial_layer_to_encode) { int sl; for (sl = 0; sl < svc->number_spatial_layers; sl++) svc->force_drop_constrained_from_above[sl] = 0; if (svc->framedrop_mode == CONSTRAINED_FROM_ABOVE_DROP) { for (sl = svc->number_spatial_layers - 1; sl >= svc->first_spatial_layer_to_encode; sl--) { int layer = sl * svc->number_temporal_layers + svc->temporal_layer_id; LAYER_CONTEXT *const lc = &svc->layer_context[layer]; cpi->rc = lc->rc; cpi->oxcf.target_bandwidth = lc->target_bandwidth; if (vp9_test_drop(cpi)) { int sl2; // Set flag to force drop in encoding for this mode. for (sl2 = sl; sl2 >= svc->first_spatial_layer_to_encode; sl2--) svc->force_drop_constrained_from_above[sl2] = 1; break; } } } } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) { set_flags_and_fb_idx_for_temporal_mode2(cpi); } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config) { set_flags_and_fb_idx_bypass_via_set_ref_frame_config(cpi); } if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[0].idx || cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[0].idx || cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[0].idx) svc->buffer_gf_temporal_ref[0].is_used = 1; if (cpi->lst_fb_idx == svc->buffer_gf_temporal_ref[1].idx || cpi->gld_fb_idx == svc->buffer_gf_temporal_ref[1].idx || cpi->alt_fb_idx == svc->buffer_gf_temporal_ref[1].idx) svc->buffer_gf_temporal_ref[1].is_used = 1; // For the fixed (non-flexible/bypass) SVC mode: // If long term temporal reference is enabled at the sequence level // (use_gf_temporal_ref == 1), and inter_layer is disabled (on inter-frames), // we can use golden as a second temporal reference // (since the spatial/inter-layer reference is disabled). // We check that the fb_idx for this reference (buffer_gf_temporal_ref.idx) is // unused (slot 7 and 6 should be available for 3-3 layer system). // For now usage of this second temporal reference will only be used for // highest and next to highest spatial layer (i.e., top and middle layer for // 3 spatial layers). svc->use_gf_temporal_ref_current_layer = 0; if (svc->use_gf_temporal_ref && !svc->buffer_gf_temporal_ref[0].is_used && !svc->buffer_gf_temporal_ref[1].is_used && svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->disable_inter_layer_pred != INTER_LAYER_PRED_ON && svc->number_spatial_layers <= 3 && svc->number_temporal_layers <= 3 && svc->spatial_layer_id >= svc->number_spatial_layers - 2) { // Enable the second (long-term) temporal reference at the frame-level. svc->use_gf_temporal_ref_current_layer = 1; } // Check if current superframe has any layer sync, only check once on // base layer. if (svc->spatial_layer_id == 0) { int sl = 0; // Default is no sync. svc->superframe_has_layer_sync = 0; for (sl = 0; sl < svc->number_spatial_layers; ++sl) { if (cpi->svc.spatial_layer_sync[sl]) svc->superframe_has_layer_sync = 1; } } // Reset the drop flags for all spatial layers, on the base layer. if (svc->spatial_layer_id == 0) { vp9_zero(svc->drop_spatial_layer); // TODO(jianj/marpan): Investigate why setting svc->lst/gld/alt_fb_idx // causes an issue with frame dropping and temporal layers, when the frame // flags are passed via the encode call (bypass mode). Issue is that we're // resetting ext_refresh_frame_flags_pending to 0 on frame drops. if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { memset(&svc->lst_fb_idx, -1, sizeof(svc->lst_fb_idx)); memset(&svc->gld_fb_idx, -1, sizeof(svc->lst_fb_idx)); memset(&svc->alt_fb_idx, -1, sizeof(svc->lst_fb_idx)); // These are set by API before the superframe is encoded and they are // passed to encoder layer by layer. Don't reset them on layer 0 in bypass // mode. vp9_zero(svc->update_buffer_slot); vp9_zero(svc->reference_last); vp9_zero(svc->reference_golden); vp9_zero(svc->reference_altref); // TODO(jianj): Remove these 3, deprecated. vp9_zero(svc->update_last); vp9_zero(svc->update_golden); vp9_zero(svc->update_altref); } } lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + svc->temporal_layer_id]; // Setting the worst/best_quality via the encoder control: SET_SVC_PARAMETERS, // only for non-BYPASS mode for now. if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS || svc->use_set_ref_frame_config) { RATE_CONTROL *const lrc = &lc->rc; lrc->worst_quality = vp9_quantizer_to_qindex(lc->max_q); lrc->best_quality = vp9_quantizer_to_qindex(lc->min_q); } get_layer_resolution(cpi->oxcf.width, cpi->oxcf.height, lc->scaling_factor_num, lc->scaling_factor_den, &width, &height); // Use Eightap_smooth for low resolutions. if (width * height <= 320 * 240) svc->downsample_filter_type[svc->spatial_layer_id] = EIGHTTAP_SMOOTH; // For scale factors > 0.75, set the phase to 0 (aligns decimated pixel // to source pixel). lc = &svc->layer_context[svc->spatial_layer_id * svc->number_temporal_layers + svc->temporal_layer_id]; if (lc->scaling_factor_num > (3 * lc->scaling_factor_den) >> 2) svc->downsample_filter_phase[svc->spatial_layer_id] = 0; // The usage of use_base_mv or partition_reuse assumes down-scale of 2x2. // For now, turn off use of base motion vectors and partition reuse if the // spatial scale factors for any layers are not 2, // keep the case of 3 spatial layers with scale factor of 4x4 for base layer. // TODO(marpan): Fix this to allow for use_base_mv for scale factors != 2. if (svc->number_spatial_layers > 1) { int sl; for (sl = 0; sl < svc->number_spatial_layers - 1; ++sl) { lc = &svc->layer_context[sl * svc->number_temporal_layers + svc->temporal_layer_id]; if ((lc->scaling_factor_num != lc->scaling_factor_den >> 1) && !(lc->scaling_factor_num == lc->scaling_factor_den >> 2 && sl == 0 && svc->number_spatial_layers == 3)) { svc->use_base_mv = 0; svc->use_partition_reuse = 0; break; } } // For non-zero spatial layers: if the previous spatial layer was dropped // disable the base_mv and partition_reuse features. if (svc->spatial_layer_id > 0 && svc->drop_spatial_layer[svc->spatial_layer_id - 1]) { svc->use_base_mv = 0; svc->use_partition_reuse = 0; } } svc->non_reference_frame = 0; if (cpi->common.frame_type != KEY_FRAME && !cpi->ext_refresh_last_frame && !cpi->ext_refresh_golden_frame && !cpi->ext_refresh_alt_ref_frame) svc->non_reference_frame = 1; // For non-flexible mode, where update_buffer_slot is used, need to check if // all buffer slots are not refreshed. if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS) { if (svc->update_buffer_slot[svc->spatial_layer_id] != 0) svc->non_reference_frame = 0; } if (svc->spatial_layer_id == 0) { svc->high_source_sad_superframe = 0; svc->high_num_blocks_with_motion = 0; } if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->last_layer_dropped[svc->spatial_layer_id] && svc->fb_idx_upd_tl0[svc->spatial_layer_id] != -1 && !svc->layer_context[svc->temporal_layer_id].is_key_frame) { // For fixed/non-flexible mode, if the previous frame (same spatial layer // from previous superframe) was dropped, make sure the lst_fb_idx // for this frame corresponds to the buffer index updated on (last) encoded // TL0 frame (with same spatial layer). cpi->lst_fb_idx = svc->fb_idx_upd_tl0[svc->spatial_layer_id]; } if (vp9_set_size_literal(cpi, width, height) != 0) return VPX_CODEC_INVALID_PARAM; return 0; } struct lookahead_entry *vp9_svc_lookahead_pop(VP9_COMP *const cpi, struct lookahead_ctx *ctx, int drain) { struct lookahead_entry *buf = NULL; if (ctx->sz && (drain || ctx->sz == ctx->max_sz - MAX_PRE_FRAMES)) { buf = vp9_lookahead_peek(ctx, 0); if (buf != NULL) { // Only remove the buffer when pop the highest layer. if (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1) { vp9_lookahead_pop(ctx, drain); } } } return buf; } void vp9_free_svc_cyclic_refresh(VP9_COMP *const cpi) { int sl, tl; SVC *const svc = &cpi->svc; const VP9EncoderConfig *const oxcf = &cpi->oxcf; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { int layer = LAYER_IDS_TO_IDX(sl, tl, oxcf->ts_number_layers); LAYER_CONTEXT *const lc = &svc->layer_context[layer]; if (lc->map) vpx_free(lc->map); if (lc->last_coded_q_map) vpx_free(lc->last_coded_q_map); if (lc->consec_zero_mv) vpx_free(lc->consec_zero_mv); } } } // Reset on key frame: reset counters, references and buffer updates. void vp9_svc_reset_temporal_layers(VP9_COMP *const cpi, int is_key) { int sl, tl; SVC *const svc = &cpi->svc; LAYER_CONTEXT *lc = NULL; for (sl = 0; sl < svc->number_spatial_layers; ++sl) { for (tl = 0; tl < svc->number_temporal_layers; ++tl) { lc = &cpi->svc.layer_context[sl * svc->number_temporal_layers + tl]; lc->current_video_frame_in_layer = 0; if (is_key) lc->frames_from_key_frame = 0; } } if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0212) { set_flags_and_fb_idx_for_temporal_mode3(cpi); } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING) { set_flags_and_fb_idx_for_temporal_mode_noLayering(cpi); } else if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_0101) { set_flags_and_fb_idx_for_temporal_mode2(cpi); } vp9_update_temporal_layer_framerate(cpi); vp9_restore_layer_context(cpi); } void vp9_svc_check_reset_layer_rc_flag(VP9_COMP *const cpi) { SVC *svc = &cpi->svc; int sl, tl; for (sl = 0; sl < svc->number_spatial_layers; ++sl) { // Check for reset based on avg_frame_bandwidth for spatial layer sl. int layer = LAYER_IDS_TO_IDX(sl, svc->number_temporal_layers - 1, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; if (lrc->avg_frame_bandwidth > (3 * lrc->last_avg_frame_bandwidth >> 1) || lrc->avg_frame_bandwidth < (lrc->last_avg_frame_bandwidth >> 1)) { // Reset for all temporal layers with spatial layer sl. for (tl = 0; tl < svc->number_temporal_layers; ++tl) { int layer = LAYER_IDS_TO_IDX(sl, tl, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->rc_1_frame = 0; lrc->rc_2_frame = 0; lrc->bits_off_target = lrc->optimal_buffer_level; lrc->buffer_level = lrc->optimal_buffer_level; } } } } void vp9_svc_constrain_inter_layer_pred(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; const int sl = svc->spatial_layer_id; // Check for disabling inter-layer (spatial) prediction, if // svc.disable_inter_layer_pred is set. If the previous spatial layer was // dropped then disable the prediction from this (scaled) reference. // For INTER_LAYER_PRED_OFF_NONKEY: inter-layer prediction is disabled // on key frames or if any spatial layer is a sync layer. if ((svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF_NONKEY && !svc->layer_context[svc->temporal_layer_id].is_key_frame && !svc->superframe_has_layer_sync) || svc->disable_inter_layer_pred == INTER_LAYER_PRED_OFF || svc->drop_spatial_layer[sl - 1]) { MV_REFERENCE_FRAME ref_frame; static const int flag_list[4] = { 0, VP9_LAST_FLAG, VP9_GOLD_FLAG, VP9_ALT_FLAG }; for (ref_frame = LAST_FRAME; ref_frame <= ALTREF_FRAME; ++ref_frame) { const YV12_BUFFER_CONFIG *yv12 = get_ref_frame_buffer(cpi, ref_frame); if (yv12 != NULL && (cpi->ref_frame_flags & flag_list[ref_frame])) { const struct scale_factors *const scale_fac = &cm->frame_refs[ref_frame - 1].sf; if (vp9_is_scaled(scale_fac)) { cpi->ref_frame_flags &= (~flag_list[ref_frame]); // Point golden/altref frame buffer index to last. if (!svc->simulcast_mode) { if (ref_frame == GOLDEN_FRAME) cpi->gld_fb_idx = cpi->lst_fb_idx; else if (ref_frame == ALTREF_FRAME) cpi->alt_fb_idx = cpi->lst_fb_idx; } } } } } // For fixed/non-flexible SVC: check for disabling inter-layer prediction. // If the reference for inter-layer prediction (the reference that is scaled) // is not the previous spatial layer from the same superframe, then we disable // inter-layer prediction. Only need to check when inter_layer prediction is // not set to OFF mode. if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->disable_inter_layer_pred != INTER_LAYER_PRED_OFF) { // We only use LAST and GOLDEN for prediction in real-time mode, so we // check both here. MV_REFERENCE_FRAME ref_frame; for (ref_frame = LAST_FRAME; ref_frame <= GOLDEN_FRAME; ref_frame++) { struct scale_factors *scale_fac = &cm->frame_refs[ref_frame - 1].sf; if (vp9_is_scaled(scale_fac)) { // If this reference was updated on the previous spatial layer of the // current superframe, then we keep this reference (don't disable). // Otherwise we disable the inter-layer prediction. // This condition is verified by checking if the current frame buffer // index is equal to any of the slots for the previous spatial layer, // and if so, check if that slot was updated/refreshed. If that is the // case, then this reference is valid for inter-layer prediction under // the mode INTER_LAYER_PRED_ON_CONSTRAINED. int fb_idx = ref_frame == LAST_FRAME ? cpi->lst_fb_idx : cpi->gld_fb_idx; int ref_flag = ref_frame == LAST_FRAME ? VP9_LAST_FLAG : VP9_GOLD_FLAG; int disable = 1; if (fb_idx < 0) continue; if ((fb_idx == svc->lst_fb_idx[sl - 1] && (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) || (fb_idx == svc->gld_fb_idx[sl - 1] && (svc->update_buffer_slot[sl - 1] & (1 << fb_idx))) || (fb_idx == svc->alt_fb_idx[sl - 1] && (svc->update_buffer_slot[sl - 1] & (1 << fb_idx)))) disable = 0; if (disable) cpi->ref_frame_flags &= (~ref_flag); } } } } void vp9_svc_assert_constraints_pattern(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; // For fixed/non-flexible mode, the following constraint are expected, // when inter-layer prediciton is on (default). if (svc->temporal_layering_mode != VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->disable_inter_layer_pred == INTER_LAYER_PRED_ON && svc->framedrop_mode != LAYER_DROP) { if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { // On non-key frames: LAST is always temporal reference, GOLDEN is // spatial reference. if (svc->temporal_layer_id == 0) // Base temporal only predicts from base temporal. assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == 0); else // Non-base temporal only predicts from lower temporal layer. assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] < svc->temporal_layer_id); if (svc->spatial_layer_id > 0 && cpi->ref_frame_flags & VP9_GOLD_FLAG && svc->spatial_layer_id > svc->first_spatial_layer_to_encode) { // Non-base spatial only predicts from lower spatial layer with same // temporal_id. assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == svc->spatial_layer_id - 1); assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == svc->temporal_layer_id); } } else if (svc->spatial_layer_id > 0 && svc->spatial_layer_id > svc->first_spatial_layer_to_encode) { // Only 1 reference for frame whose base is key; reference may be LAST // or GOLDEN, so we check both. if (cpi->ref_frame_flags & VP9_LAST_FLAG) { assert(svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] == svc->spatial_layer_id - 1); assert(svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] == svc->temporal_layer_id); } else if (cpi->ref_frame_flags & VP9_GOLD_FLAG) { assert(svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] == svc->spatial_layer_id - 1); assert(svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] == svc->temporal_layer_id); } } } else if (svc->use_gf_temporal_ref_current_layer && !svc->layer_context[svc->temporal_layer_id].is_key_frame) { // For the usage of golden as second long term reference: the // temporal_layer_id of that reference must be base temporal layer 0, and // spatial_layer_id of that reference must be same as current // spatial_layer_id. If not, disable feature. // TODO(marpan): Investigate when this can happen, and maybe put this check // and reset in a different place. if (svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] != svc->spatial_layer_id || svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] != 0) svc->use_gf_temporal_ref_current_layer = 0; } } #if CONFIG_VP9_TEMPORAL_DENOISING int vp9_denoise_svc_non_key(VP9_COMP *const cpi) { int layer = LAYER_IDS_TO_IDX(cpi->svc.spatial_layer_id, cpi->svc.temporal_layer_id, cpi->svc.number_temporal_layers); LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; return denoise_svc(cpi) && !lc->is_key_frame; } #endif void vp9_svc_check_spatial_layer_sync(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; // Only for superframes whose base is not key, as those are // already sync frames. if (!svc->layer_context[svc->temporal_layer_id].is_key_frame) { if (svc->spatial_layer_id == 0) { // On base spatial layer: if the current superframe has a layer sync then // reset the pattern counters and reset to base temporal layer. if (svc->superframe_has_layer_sync) vp9_svc_reset_temporal_layers(cpi, cpi->common.frame_type == KEY_FRAME); } // If the layer sync is set for this current spatial layer then // disable the temporal reference. if (svc->spatial_layer_id > 0 && svc->spatial_layer_sync[svc->spatial_layer_id]) { cpi->ref_frame_flags &= (~VP9_LAST_FLAG); if (svc->use_gf_temporal_ref_current_layer) { int index = svc->spatial_layer_id; // If golden is used as second reference: need to remove it from // prediction, reset refresh period to 0, and update the reference. svc->use_gf_temporal_ref_current_layer = 0; cpi->rc.baseline_gf_interval = 0; cpi->rc.frames_till_gf_update_due = 0; // On layer sync frame we must update the buffer index used for long // term reference. Use the alt_ref since it is not used or updated on // sync frames. if (svc->number_spatial_layers == 3) index = svc->spatial_layer_id - 1; assert(index >= 0); cpi->alt_fb_idx = svc->buffer_gf_temporal_ref[index].idx; cpi->ext_refresh_alt_ref_frame = 1; } } } } void vp9_svc_update_ref_frame_buffer_idx(VP9_COMP *const cpi) { SVC *const svc = &cpi->svc; // Update the usage of frame buffer index for base spatial layers. if (svc->spatial_layer_id == 0) { if ((cpi->ref_frame_flags & VP9_LAST_FLAG) || cpi->refresh_last_frame) svc->fb_idx_base[cpi->lst_fb_idx] = 1; if ((cpi->ref_frame_flags & VP9_GOLD_FLAG) || cpi->refresh_golden_frame) svc->fb_idx_base[cpi->gld_fb_idx] = 1; if ((cpi->ref_frame_flags & VP9_ALT_FLAG) || cpi->refresh_alt_ref_frame) svc->fb_idx_base[cpi->alt_fb_idx] = 1; } } static void vp9_svc_update_ref_frame_bypass_mode(VP9_COMP *const cpi) { // For non-flexible/bypass SVC mode: check for refreshing other buffer // slots. SVC *const svc = &cpi->svc; VP9_COMMON *const cm = &cpi->common; BufferPool *const pool = cm->buffer_pool; int i; for (i = 0; i < REF_FRAMES; i++) { if (cm->frame_type == KEY_FRAME || svc->update_buffer_slot[svc->spatial_layer_id] & (1 << i)) { ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; } } } void vp9_svc_update_ref_frame(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; BufferPool *const pool = cm->buffer_pool; if (svc->temporal_layering_mode == VP9E_TEMPORAL_LAYERING_MODE_BYPASS && svc->use_set_ref_frame_config) { vp9_svc_update_ref_frame_bypass_mode(cpi); } else if (cm->frame_type == KEY_FRAME && !svc->simulcast_mode) { // Keep track of frame index for each reference frame. int i; // On key frame update all reference frame slots. for (i = 0; i < REF_FRAMES; i++) { svc->fb_idx_spatial_layer_id[i] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[i] = svc->temporal_layer_id; // LAST/GOLDEN/ALTREF is already updated above. if (i != cpi->lst_fb_idx && i != cpi->gld_fb_idx && i != cpi->alt_fb_idx) ref_cnt_fb(pool->frame_bufs, &cm->ref_frame_map[i], cm->new_fb_idx); } } else { if (cpi->refresh_last_frame) { svc->fb_idx_spatial_layer_id[cpi->lst_fb_idx] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[cpi->lst_fb_idx] = svc->temporal_layer_id; } if (cpi->refresh_golden_frame) { svc->fb_idx_spatial_layer_id[cpi->gld_fb_idx] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[cpi->gld_fb_idx] = svc->temporal_layer_id; } if (cpi->refresh_alt_ref_frame) { svc->fb_idx_spatial_layer_id[cpi->alt_fb_idx] = svc->spatial_layer_id; svc->fb_idx_temporal_layer_id[cpi->alt_fb_idx] = svc->temporal_layer_id; } } // Copy flags from encoder to SVC struct. vp9_copy_flags_ref_update_idx(cpi); vp9_svc_update_ref_frame_buffer_idx(cpi); } void vp9_svc_adjust_frame_rate(VP9_COMP *const cpi) { int64_t this_duration = cpi->svc.timebase_fac * cpi->svc.duration[cpi->svc.spatial_layer_id]; vp9_new_framerate(cpi, 10000000.0 / this_duration); } void vp9_svc_adjust_avg_frame_qindex(VP9_COMP *const cpi) { VP9_COMMON *const cm = &cpi->common; SVC *const svc = &cpi->svc; RATE_CONTROL *const rc = &cpi->rc; // On key frames in CBR mode: reset the avg_frame_index for base layer // (to level closer to worst_quality) if the overshoot is significant. // Reset it for all temporal layers on base spatial layer. if (cm->frame_type == KEY_FRAME && cpi->oxcf.rc_mode == VPX_CBR && !svc->simulcast_mode && rc->projected_frame_size > 3 * rc->avg_frame_bandwidth) { int tl; rc->avg_frame_qindex[INTER_FRAME] = VPXMAX(rc->avg_frame_qindex[INTER_FRAME], (cm->base_qindex + rc->worst_quality) >> 1); for (tl = 0; tl < svc->number_temporal_layers; ++tl) { const int layer = LAYER_IDS_TO_IDX(0, tl, svc->number_temporal_layers); LAYER_CONTEXT *lc = &svc->layer_context[layer]; RATE_CONTROL *lrc = &lc->rc; lrc->avg_frame_qindex[INTER_FRAME] = rc->avg_frame_qindex[INTER_FRAME]; } } } libvpx-1.8.2/vp9/encoder/vp9_svc_layercontext.h000066400000000000000000000224301357355204000215330ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ #define VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ #include "vpx/vpx_encoder.h" #include "vp9/encoder/vp9_ratectrl.h" #ifdef __cplusplus extern "C" { #endif typedef enum { // Inter-layer prediction is on on all frames. INTER_LAYER_PRED_ON, // Inter-layer prediction is off on all frames. INTER_LAYER_PRED_OFF, // Inter-layer prediction is off on non-key frames and non-sync frames. INTER_LAYER_PRED_OFF_NONKEY, // Inter-layer prediction is on on all frames, but constrained such // that any layer S (> 0) can only predict from previous spatial // layer S-1, from the same superframe. INTER_LAYER_PRED_ON_CONSTRAINED } INTER_LAYER_PRED; typedef struct BUFFER_LONGTERM_REF { int idx; int is_used; } BUFFER_LONGTERM_REF; typedef struct { RATE_CONTROL rc; int target_bandwidth; int spatial_layer_target_bandwidth; // Target for the spatial layer. double framerate; int avg_frame_size; int max_q; int min_q; int scaling_factor_num; int scaling_factor_den; TWO_PASS twopass; vpx_fixed_buf_t rc_twopass_stats_in; unsigned int current_video_frame_in_layer; int is_key_frame; int frames_from_key_frame; FRAME_TYPE last_frame_type; struct lookahead_entry *alt_ref_source; int alt_ref_idx; int gold_ref_idx; int has_alt_frame; size_t layer_size; // Cyclic refresh parameters (aq-mode=3), that need to be updated per-frame. // TODO(jianj/marpan): Is it better to use the full cyclic refresh struct. int sb_index; signed char *map; uint8_t *last_coded_q_map; uint8_t *consec_zero_mv; int actual_num_seg1_blocks; int actual_num_seg2_blocks; int counter_encode_maxq_scene_change; uint8_t speed; } LAYER_CONTEXT; typedef struct SVC { int spatial_layer_id; int temporal_layer_id; int number_spatial_layers; int number_temporal_layers; int spatial_layer_to_encode; // Workaround for multiple frame contexts enum { ENCODED = 0, ENCODING, NEED_TO_ENCODE } encode_empty_frame_state; struct lookahead_entry empty_frame; int encode_intra_empty_frame; // Store scaled source frames to be used for temporal filter to generate // a alt ref frame. YV12_BUFFER_CONFIG scaled_frames[MAX_LAG_BUFFERS]; // Temp buffer used for 2-stage down-sampling, for real-time mode. YV12_BUFFER_CONFIG scaled_temp; int scaled_one_half; int scaled_temp_is_alloc; // Layer context used for rate control in one pass temporal CBR mode or // two pass spatial mode. LAYER_CONTEXT layer_context[VPX_MAX_LAYERS]; // Indicates what sort of temporal layering is used. // Currently, this only works for CBR mode. VP9E_TEMPORAL_LAYERING_MODE temporal_layering_mode; // Frame flags and buffer indexes for each spatial layer, set by the // application (external settings). int ext_frame_flags[VPX_MAX_LAYERS]; int lst_fb_idx[VPX_MAX_LAYERS]; int gld_fb_idx[VPX_MAX_LAYERS]; int alt_fb_idx[VPX_MAX_LAYERS]; int force_zero_mode_spatial_ref; // Sequence level flag to enable second (long term) temporal reference. int use_gf_temporal_ref; // Frame level flag to enable second (long term) temporal reference. int use_gf_temporal_ref_current_layer; // Allow second reference for at most 2 top highest resolution layers. BUFFER_LONGTERM_REF buffer_gf_temporal_ref[2]; int current_superframe; int non_reference_frame; int use_base_mv; int use_partition_reuse; // Used to control the downscaling filter for source scaling, for 1 pass CBR. // downsample_filter_phase: = 0 will do sub-sampling (no weighted average), // = 8 will center the target pixel and get a symmetric averaging filter. // downsample_filter_type: 4 filters may be used: eighttap_regular, // eighttap_smooth, eighttap_sharp, and bilinear. INTERP_FILTER downsample_filter_type[VPX_SS_MAX_LAYERS]; int downsample_filter_phase[VPX_SS_MAX_LAYERS]; BLOCK_SIZE *prev_partition_svc; int mi_stride[VPX_MAX_LAYERS]; int mi_rows[VPX_MAX_LAYERS]; int mi_cols[VPX_MAX_LAYERS]; int first_layer_denoise; int skip_enhancement_layer; int lower_layer_qindex; int last_layer_dropped[VPX_MAX_LAYERS]; int drop_spatial_layer[VPX_MAX_LAYERS]; int framedrop_thresh[VPX_MAX_LAYERS]; int drop_count[VPX_MAX_LAYERS]; int force_drop_constrained_from_above[VPX_MAX_LAYERS]; int max_consec_drop; SVC_LAYER_DROP_MODE framedrop_mode; INTER_LAYER_PRED disable_inter_layer_pred; // Flag to indicate scene change and high num of motion blocks at current // superframe, scene detection is currently checked for each superframe prior // to encoding, on the full resolution source. int high_source_sad_superframe; int high_num_blocks_with_motion; // Flags used to get SVC pattern info. int update_buffer_slot[VPX_SS_MAX_LAYERS]; uint8_t reference_last[VPX_SS_MAX_LAYERS]; uint8_t reference_golden[VPX_SS_MAX_LAYERS]; uint8_t reference_altref[VPX_SS_MAX_LAYERS]; // TODO(jianj): Remove these last 3, deprecated. uint8_t update_last[VPX_SS_MAX_LAYERS]; uint8_t update_golden[VPX_SS_MAX_LAYERS]; uint8_t update_altref[VPX_SS_MAX_LAYERS]; // Keep track of the frame buffer index updated/refreshed on the base // temporal superframe. int fb_idx_upd_tl0[VPX_SS_MAX_LAYERS]; // Keep track of the spatial and temporal layer id of the frame that last // updated the frame buffer index. uint8_t fb_idx_spatial_layer_id[REF_FRAMES]; uint8_t fb_idx_temporal_layer_id[REF_FRAMES]; int spatial_layer_sync[VPX_SS_MAX_LAYERS]; uint8_t set_intra_only_frame; uint8_t previous_frame_is_intra_only; uint8_t superframe_has_layer_sync; uint8_t fb_idx_base[REF_FRAMES]; int use_set_ref_frame_config; int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; int first_spatial_layer_to_encode; // Parameters for allowing framerate per spatial layer, and buffer // update based on timestamps. int64_t duration[VPX_SS_MAX_LAYERS]; int64_t timebase_fac; int64_t time_stamp_superframe; int64_t time_stamp_prev[VPX_SS_MAX_LAYERS]; int num_encoded_top_layer; // Every spatial layer on a superframe whose base is key is key too. int simulcast_mode; } SVC; struct VP9_COMP; // Initialize layer context data from init_config(). void vp9_init_layer_context(struct VP9_COMP *const cpi); // Update the layer context from a change_config() call. void vp9_update_layer_context_change_config(struct VP9_COMP *const cpi, const int target_bandwidth); // Prior to encoding the frame, update framerate-related quantities // for the current temporal layer. void vp9_update_temporal_layer_framerate(struct VP9_COMP *const cpi); // Update framerate-related quantities for the current spatial layer. void vp9_update_spatial_layer_framerate(struct VP9_COMP *const cpi, double framerate); // Prior to encoding the frame, set the layer context, for the current layer // to be encoded, to the cpi struct. void vp9_restore_layer_context(struct VP9_COMP *const cpi); // Save the layer context after encoding the frame. void vp9_save_layer_context(struct VP9_COMP *const cpi); // Initialize second pass rc for spatial svc. void vp9_init_second_pass_spatial_svc(struct VP9_COMP *cpi); void get_layer_resolution(const int width_org, const int height_org, const int num, const int den, int *width_out, int *height_out); // Increment number of video frames in layer void vp9_inc_frame_in_layer(struct VP9_COMP *const cpi); // Check if current layer is key frame in spatial upper layer int vp9_is_upper_layer_key_frame(const struct VP9_COMP *const cpi); // Get the next source buffer to encode struct lookahead_entry *vp9_svc_lookahead_pop(struct VP9_COMP *const cpi, struct lookahead_ctx *ctx, int drain); // Start a frame and initialize svc parameters int vp9_svc_start_frame(struct VP9_COMP *const cpi); #if CONFIG_VP9_TEMPORAL_DENOISING int vp9_denoise_svc_non_key(struct VP9_COMP *const cpi); #endif void vp9_copy_flags_ref_update_idx(struct VP9_COMP *const cpi); int vp9_one_pass_cbr_svc_start_layer(struct VP9_COMP *const cpi); void vp9_free_svc_cyclic_refresh(struct VP9_COMP *const cpi); void vp9_svc_reset_temporal_layers(struct VP9_COMP *const cpi, int is_key); void vp9_svc_check_reset_layer_rc_flag(struct VP9_COMP *const cpi); void vp9_svc_constrain_inter_layer_pred(struct VP9_COMP *const cpi); void vp9_svc_assert_constraints_pattern(struct VP9_COMP *const cpi); void vp9_svc_check_spatial_layer_sync(struct VP9_COMP *const cpi); void vp9_svc_update_ref_frame_buffer_idx(struct VP9_COMP *const cpi); void vp9_svc_update_ref_frame_key_simulcast(struct VP9_COMP *const cpi); void vp9_svc_update_ref_frame(struct VP9_COMP *const cpi); void vp9_svc_adjust_frame_rate(struct VP9_COMP *const cpi); void vp9_svc_adjust_avg_frame_qindex(struct VP9_COMP *const cpi); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_SVC_LAYERCONTEXT_H_ libvpx-1.8.2/vp9/encoder/vp9_temporal_filter.c000066400000000000000000001255621357355204000213340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_common.h" #include "vp9/common/vp9_onyxc_int.h" #include "vp9/common/vp9_quant_common.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_encodeframe.h" #include "vp9/encoder/vp9_ethread.h" #include "vp9/encoder/vp9_extend.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_mcomp.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_quantize.h" #include "vp9/encoder/vp9_ratectrl.h" #include "vp9/encoder/vp9_segmentation.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_ports/vpx_timer.h" #include "vpx_scale/vpx_scale.h" static int fixed_divide[512]; static unsigned int index_mult[14] = { 0, 0, 0, 0, 49152, 39322, 32768, 28087, 24576, 21846, 19661, 17874, 0, 15124 }; #if CONFIG_VP9_HIGHBITDEPTH static int64_t highbd_index_mult[14] = { 0U, 0U, 0U, 0U, 3221225472U, 2576980378U, 2147483648U, 1840700270U, 1610612736U, 1431655766U, 1288490189U, 1171354718U, 0U, 991146300U }; #endif // CONFIG_VP9_HIGHBITDEPTH static void temporal_filter_predictors_mb_c( MACROBLOCKD *xd, uint8_t *y_mb_ptr, uint8_t *u_mb_ptr, uint8_t *v_mb_ptr, int stride, int uv_block_width, int uv_block_height, int mv_row, int mv_col, uint8_t *pred, struct scale_factors *scale, int x, int y, MV *blk_mvs, int use_32x32) { const int which_mv = 0; const InterpKernel *const kernel = vp9_filter_kernels[EIGHTTAP_SHARP]; int i, j, k = 0, ys = (BH >> 1), xs = (BW >> 1); enum mv_precision mv_precision_uv; int uv_stride; if (uv_block_width == (BW >> 1)) { uv_stride = (stride + 1) >> 1; mv_precision_uv = MV_PRECISION_Q4; } else { uv_stride = stride; mv_precision_uv = MV_PRECISION_Q3; } #if !CONFIG_VP9_HIGHBITDEPTH (void)xd; #endif if (use_32x32) { const MV mv = { mv_row, mv_col }; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor(CONVERT_TO_SHORTPTR(y_mb_ptr), stride, CONVERT_TO_SHORTPTR(&pred[0]), BW, &mv, scale, BW, BH, which_mv, kernel, MV_PRECISION_Q3, x, y, xd->bd); vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(u_mb_ptr), uv_stride, CONVERT_TO_SHORTPTR(&pred[BLK_PELS]), uv_block_width, &mv, scale, uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x, y, xd->bd); vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(v_mb_ptr), uv_stride, CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1)]), uv_block_width, &mv, scale, uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x, y, xd->bd); return; } #endif // CONFIG_VP9_HIGHBITDEPTH vp9_build_inter_predictor(y_mb_ptr, stride, &pred[0], BW, &mv, scale, BW, BH, which_mv, kernel, MV_PRECISION_Q3, x, y); vp9_build_inter_predictor(u_mb_ptr, uv_stride, &pred[BLK_PELS], uv_block_width, &mv, scale, uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr, uv_stride, &pred[(BLK_PELS << 1)], uv_block_width, &mv, scale, uv_block_width, uv_block_height, which_mv, kernel, mv_precision_uv, x, y); return; } // While use_32x32 = 0, construct the 32x32 predictor using 4 16x16 // predictors. // Y predictor for (i = 0; i < BH; i += ys) { for (j = 0; j < BW; j += xs) { const MV mv = blk_mvs[k]; const int y_offset = i * stride + j; const int p_offset = i * BW + j; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(y_mb_ptr + y_offset), stride, CONVERT_TO_SHORTPTR(&pred[p_offset]), BW, &mv, scale, xs, ys, which_mv, kernel, MV_PRECISION_Q3, x, y, xd->bd); } else { vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset], BW, &mv, scale, xs, ys, which_mv, kernel, MV_PRECISION_Q3, x, y); } #else vp9_build_inter_predictor(y_mb_ptr + y_offset, stride, &pred[p_offset], BW, &mv, scale, xs, ys, which_mv, kernel, MV_PRECISION_Q3, x, y); #endif // CONFIG_VP9_HIGHBITDEPTH k++; } } // U and V predictors ys = (uv_block_height >> 1); xs = (uv_block_width >> 1); k = 0; for (i = 0; i < uv_block_height; i += ys) { for (j = 0; j < uv_block_width; j += xs) { const MV mv = blk_mvs[k]; const int uv_offset = i * uv_stride + j; const int p_offset = i * uv_block_width + j; #if CONFIG_VP9_HIGHBITDEPTH if (xd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(u_mb_ptr + uv_offset), uv_stride, CONVERT_TO_SHORTPTR(&pred[BLK_PELS + p_offset]), uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y, xd->bd); vp9_highbd_build_inter_predictor( CONVERT_TO_SHORTPTR(v_mb_ptr + uv_offset), uv_stride, CONVERT_TO_SHORTPTR(&pred[(BLK_PELS << 1) + p_offset]), uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y, xd->bd); } else { vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride, &pred[BLK_PELS + p_offset], uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride, &pred[(BLK_PELS << 1) + p_offset], uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y); } #else vp9_build_inter_predictor(u_mb_ptr + uv_offset, uv_stride, &pred[BLK_PELS + p_offset], uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y); vp9_build_inter_predictor(v_mb_ptr + uv_offset, uv_stride, &pred[(BLK_PELS << 1) + p_offset], uv_block_width, &mv, scale, xs, ys, which_mv, kernel, mv_precision_uv, x, y); #endif // CONFIG_VP9_HIGHBITDEPTH k++; } } } void vp9_temporal_filter_init(void) { int i; fixed_divide[0] = 0; for (i = 1; i < 512; ++i) fixed_divide[i] = 0x80000 / i; } static INLINE int mod_index(int sum_dist, int index, int rounding, int strength, int filter_weight) { int mod; assert(index >= 0 && index <= 13); assert(index_mult[index] != 0); mod = ((unsigned int)clamp(sum_dist, 0, UINT16_MAX) * index_mult[index]) >> 16; mod += rounding; mod >>= strength; mod = VPXMIN(16, mod); mod = 16 - mod; mod *= filter_weight; return mod; } #if CONFIG_VP9_HIGHBITDEPTH static INLINE int highbd_mod_index(int sum_dist, int index, int rounding, int strength, int filter_weight) { int mod; assert(index >= 0 && index <= 13); assert(highbd_index_mult[index] != 0); mod = (int)((clamp(sum_dist, 0, INT32_MAX) * highbd_index_mult[index]) >> 32); mod += rounding; mod >>= strength; mod = VPXMIN(16, mod); mod = 16 - mod; mod *= filter_weight; return mod; } #endif // CONFIG_VP9_HIGHBITDEPTH static INLINE int get_filter_weight(unsigned int i, unsigned int j, unsigned int block_height, unsigned int block_width, const int *const blk_fw, int use_32x32) { // blk_fw[0] ~ blk_fw[3] are the same. if (use_32x32) { return blk_fw[0]; } if (i < block_height / 2) { if (j < block_width / 2) { return blk_fw[0]; } return blk_fw[1]; } if (j < block_width / 2) { return blk_fw[2]; } return blk_fw[3]; } void vp9_apply_temporal_filter_c( const uint8_t *y_frame1, int y_stride, const uint8_t *y_pred, int y_buf_stride, const uint8_t *u_frame1, const uint8_t *v_frame1, int uv_stride, const uint8_t *u_pred, const uint8_t *v_pred, int uv_buf_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accumulator, uint16_t *y_count, uint32_t *u_accumulator, uint16_t *u_count, uint32_t *v_accumulator, uint16_t *v_count) { unsigned int i, j, k, m; int modifier; const int rounding = (1 << strength) >> 1; const unsigned int uv_block_width = block_width >> ss_x; const unsigned int uv_block_height = block_height >> ss_y; DECLARE_ALIGNED(16, uint16_t, y_diff_sse[BLK_PELS]); DECLARE_ALIGNED(16, uint16_t, u_diff_sse[BLK_PELS]); DECLARE_ALIGNED(16, uint16_t, v_diff_sse[BLK_PELS]); int idx = 0, idy; assert(strength >= 0); assert(strength <= 6); memset(y_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); memset(u_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); memset(v_diff_sse, 0, BLK_PELS * sizeof(uint16_t)); // Calculate diff^2 for each pixel of the 16x16 block. // TODO(yunqing): the following code needs to be optimized. for (i = 0; i < block_height; i++) { for (j = 0; j < block_width; j++) { const int16_t diff = y_frame1[i * (int)y_stride + j] - y_pred[i * (int)block_width + j]; y_diff_sse[idx++] = diff * diff; } } idx = 0; for (i = 0; i < uv_block_height; i++) { for (j = 0; j < uv_block_width; j++) { const int16_t diffu = u_frame1[i * uv_stride + j] - u_pred[i * uv_buf_stride + j]; const int16_t diffv = v_frame1[i * uv_stride + j] - v_pred[i * uv_buf_stride + j]; u_diff_sse[idx] = diffu * diffu; v_diff_sse[idx] = diffv * diffv; idx++; } } for (i = 0, k = 0, m = 0; i < block_height; i++) { for (j = 0; j < block_width; j++) { const int pixel_value = y_pred[i * y_buf_stride + j]; const int filter_weight = get_filter_weight(i, j, block_height, block_width, blk_fw, use_32x32); // non-local mean approach int y_index = 0; const int uv_r = i >> ss_y; const int uv_c = j >> ss_x; modifier = 0; for (idy = -1; idy <= 1; ++idy) { for (idx = -1; idx <= 1; ++idx) { const int row = (int)i + idy; const int col = (int)j + idx; if (row >= 0 && row < (int)block_height && col >= 0 && col < (int)block_width) { modifier += y_diff_sse[row * (int)block_width + col]; ++y_index; } } } assert(y_index > 0); modifier += u_diff_sse[uv_r * uv_block_width + uv_c]; modifier += v_diff_sse[uv_r * uv_block_width + uv_c]; y_index += 2; modifier = mod_index(modifier, y_index, rounding, strength, filter_weight); y_count[k] += modifier; y_accumulator[k] += modifier * pixel_value; ++k; // Process chroma component if (!(i & ss_y) && !(j & ss_x)) { const int u_pixel_value = u_pred[uv_r * uv_buf_stride + uv_c]; const int v_pixel_value = v_pred[uv_r * uv_buf_stride + uv_c]; // non-local mean approach int cr_index = 0; int u_mod = 0, v_mod = 0; int y_diff = 0; for (idy = -1; idy <= 1; ++idy) { for (idx = -1; idx <= 1; ++idx) { const int row = uv_r + idy; const int col = uv_c + idx; if (row >= 0 && row < (int)uv_block_height && col >= 0 && col < (int)uv_block_width) { u_mod += u_diff_sse[row * uv_block_width + col]; v_mod += v_diff_sse[row * uv_block_width + col]; ++cr_index; } } } assert(cr_index > 0); for (idy = 0; idy < 1 + ss_y; ++idy) { for (idx = 0; idx < 1 + ss_x; ++idx) { const int row = (uv_r << ss_y) + idy; const int col = (uv_c << ss_x) + idx; y_diff += y_diff_sse[row * (int)block_width + col]; ++cr_index; } } u_mod += y_diff; v_mod += y_diff; u_mod = mod_index(u_mod, cr_index, rounding, strength, filter_weight); v_mod = mod_index(v_mod, cr_index, rounding, strength, filter_weight); u_count[m] += u_mod; u_accumulator[m] += u_mod * u_pixel_value; v_count[m] += v_mod; v_accumulator[m] += v_mod * v_pixel_value; ++m; } // Complete YUV pixel } } } #if CONFIG_VP9_HIGHBITDEPTH void vp9_highbd_apply_temporal_filter_c( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_32x32, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { const int uv_block_width = block_width >> ss_x; const int uv_block_height = block_height >> ss_y; const int y_diff_stride = BW; const int uv_diff_stride = BW; DECLARE_ALIGNED(16, uint32_t, y_diff_sse[BLK_PELS]); DECLARE_ALIGNED(16, uint32_t, u_diff_sse[BLK_PELS]); DECLARE_ALIGNED(16, uint32_t, v_diff_sse[BLK_PELS]); const int rounding = (1 << strength) >> 1; // Loop variables int row, col; int uv_row, uv_col; int row_step, col_step; memset(y_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); memset(u_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); memset(v_diff_sse, 0, BLK_PELS * sizeof(uint32_t)); // Get the square diffs for (row = 0; row < (int)block_height; row++) { for (col = 0; col < (int)block_width; col++) { const int diff = y_src[row * y_src_stride + col] - y_pre[row * y_pre_stride + col]; y_diff_sse[row * y_diff_stride + col] = diff * diff; } } for (row = 0; row < uv_block_height; row++) { for (col = 0; col < uv_block_width; col++) { const int u_diff = u_src[row * uv_src_stride + col] - u_pre[row * uv_pre_stride + col]; const int v_diff = v_src[row * uv_src_stride + col] - v_pre[row * uv_pre_stride + col]; u_diff_sse[row * uv_diff_stride + col] = u_diff * u_diff; v_diff_sse[row * uv_diff_stride + col] = v_diff * v_diff; } } // Apply the filter to luma for (row = 0; row < (int)block_height; row++) { for (col = 0; col < (int)block_width; col++) { const int uv_row = row >> ss_y; const int uv_col = col >> ss_x; const int filter_weight = get_filter_weight( row, col, block_height, block_width, blk_fw, use_32x32); // First we get the modifier for the current y pixel const int y_pixel = y_pre[row * y_pre_stride + col]; int y_num_used = 0; int y_mod = 0; // Sum the neighboring 3x3 y pixels for (row_step = -1; row_step <= 1; row_step++) { for (col_step = -1; col_step <= 1; col_step++) { const int sub_row = row + row_step; const int sub_col = col + col_step; if (sub_row >= 0 && sub_row < (int)block_height && sub_col >= 0 && sub_col < (int)block_width) { y_mod += y_diff_sse[sub_row * y_diff_stride + sub_col]; y_num_used++; } } } // Sum the corresponding uv pixels to the current y modifier // Note we are rounding down instead of rounding to the nearest pixel. y_mod += u_diff_sse[uv_row * uv_diff_stride + uv_col]; y_mod += v_diff_sse[uv_row * uv_diff_stride + uv_col]; y_num_used += 2; // Set the modifier y_mod = highbd_mod_index(y_mod, y_num_used, rounding, strength, filter_weight); // Accumulate the result y_count[row * block_width + col] += y_mod; y_accum[row * block_width + col] += y_mod * y_pixel; } } // Apply the filter to chroma for (uv_row = 0; uv_row < uv_block_height; uv_row++) { for (uv_col = 0; uv_col < uv_block_width; uv_col++) { const int y_row = uv_row << ss_y; const int y_col = uv_col << ss_x; const int filter_weight = get_filter_weight( uv_row, uv_col, uv_block_height, uv_block_width, blk_fw, use_32x32); const int u_pixel = u_pre[uv_row * uv_pre_stride + uv_col]; const int v_pixel = v_pre[uv_row * uv_pre_stride + uv_col]; int uv_num_used = 0; int u_mod = 0, v_mod = 0; // Sum the neighboring 3x3 chromal pixels to the chroma modifier for (row_step = -1; row_step <= 1; row_step++) { for (col_step = -1; col_step <= 1; col_step++) { const int sub_row = uv_row + row_step; const int sub_col = uv_col + col_step; if (sub_row >= 0 && sub_row < uv_block_height && sub_col >= 0 && sub_col < uv_block_width) { u_mod += u_diff_sse[sub_row * uv_diff_stride + sub_col]; v_mod += v_diff_sse[sub_row * uv_diff_stride + sub_col]; uv_num_used++; } } } // Sum all the luma pixels associated with the current luma pixel for (row_step = 0; row_step < 1 + ss_y; row_step++) { for (col_step = 0; col_step < 1 + ss_x; col_step++) { const int sub_row = y_row + row_step; const int sub_col = y_col + col_step; const int y_diff = y_diff_sse[sub_row * y_diff_stride + sub_col]; u_mod += y_diff; v_mod += y_diff; uv_num_used++; } } // Set the modifier u_mod = highbd_mod_index(u_mod, uv_num_used, rounding, strength, filter_weight); v_mod = highbd_mod_index(v_mod, uv_num_used, rounding, strength, filter_weight); // Accumulate the result u_count[uv_row * uv_block_width + uv_col] += u_mod; u_accum[uv_row * uv_block_width + uv_col] += u_mod * u_pixel; v_count[uv_row * uv_block_width + uv_col] += v_mod; v_accum[uv_row * uv_block_width + uv_col] += v_mod * v_pixel; } } } #endif // CONFIG_VP9_HIGHBITDEPTH static uint32_t temporal_filter_find_matching_mb_c( VP9_COMP *cpi, ThreadData *td, uint8_t *arf_frame_buf, uint8_t *frame_ptr_buf, int stride, MV *ref_mv, MV *blk_mvs, int *blk_bestsme) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MV_SPEED_FEATURES *const mv_sf = &cpi->sf.mv; const SEARCH_METHODS search_method = MESH; const SEARCH_METHODS search_method_16 = cpi->sf.temporal_filter_search_method; int step_param; int sadpb = x->sadperbit16; uint32_t bestsme = UINT_MAX; uint32_t distortion; uint32_t sse; int cost_list[5]; const MvLimits tmp_mv_limits = x->mv_limits; MV best_ref_mv1 = { 0, 0 }; MV best_ref_mv1_full; /* full-pixel value of best_ref_mv1 */ // Save input state struct buf_2d src = x->plane[0].src; struct buf_2d pre = xd->plane[0].pre[0]; int i, j, k = 0; best_ref_mv1_full.col = best_ref_mv1.col >> 3; best_ref_mv1_full.row = best_ref_mv1.row >> 3; // Setup frame pointers x->plane[0].src.buf = arf_frame_buf; x->plane[0].src.stride = stride; xd->plane[0].pre[0].buf = frame_ptr_buf; xd->plane[0].pre[0].stride = stride; step_param = mv_sf->reduce_first_step_size; step_param = VPXMIN(step_param, MAX_MVSEARCH_STEPS - 2); vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); vp9_full_pixel_search(cpi, x, TF_BLOCK, &best_ref_mv1_full, step_param, search_method, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, ref_mv, 0, 0); /* restore UMV window */ x->mv_limits = tmp_mv_limits; // find_fractional_mv_step parameters: best_ref_mv1 is for mv rate cost // calculation. The start full mv and the search result are stored in // ref_mv. bestsme = cpi->find_fractional_mv_step( x, ref_mv, &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[TF_BLOCK], 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, BW, BH, USE_8_TAPS_SHARP); // DO motion search on 4 16x16 sub_blocks. best_ref_mv1.row = ref_mv->row; best_ref_mv1.col = ref_mv->col; best_ref_mv1_full.col = best_ref_mv1.col >> 3; best_ref_mv1_full.row = best_ref_mv1.row >> 3; for (i = 0; i < BH; i += SUB_BH) { for (j = 0; j < BW; j += SUB_BW) { // Setup frame pointers x->plane[0].src.buf = arf_frame_buf + i * stride + j; x->plane[0].src.stride = stride; xd->plane[0].pre[0].buf = frame_ptr_buf + i * stride + j; xd->plane[0].pre[0].stride = stride; vp9_set_mv_search_range(&x->mv_limits, &best_ref_mv1); vp9_full_pixel_search(cpi, x, TF_SUB_BLOCK, &best_ref_mv1_full, step_param, search_method_16, sadpb, cond_cost_list(cpi, cost_list), &best_ref_mv1, &blk_mvs[k], 0, 0); /* restore UMV window */ x->mv_limits = tmp_mv_limits; blk_bestsme[k] = cpi->find_fractional_mv_step( x, &blk_mvs[k], &best_ref_mv1, cpi->common.allow_high_precision_mv, x->errorperbit, &cpi->fn_ptr[TF_SUB_BLOCK], 0, mv_sf->subpel_search_level, cond_cost_list(cpi, cost_list), NULL, NULL, &distortion, &sse, NULL, SUB_BW, SUB_BH, USE_8_TAPS_SHARP); k++; } } // Restore input state x->plane[0].src = src; xd->plane[0].pre[0] = pre; return bestsme; } void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, int mb_row, int mb_col_start, int mb_col_end) { ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data; YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames; int frame_count = arnr_filter_data->frame_count; int alt_ref_index = arnr_filter_data->alt_ref_index; int strength = arnr_filter_data->strength; struct scale_factors *scale = &arnr_filter_data->sf; int byte; int frame; int mb_col; int mb_cols = (frames[alt_ref_index]->y_crop_width + BW - 1) >> BW_LOG2; int mb_rows = (frames[alt_ref_index]->y_crop_height + BH - 1) >> BH_LOG2; DECLARE_ALIGNED(16, uint32_t, accumulator[BLK_PELS * 3]); DECLARE_ALIGNED(16, uint16_t, count[BLK_PELS * 3]); MACROBLOCKD *mbd = &td->mb.e_mbd; YV12_BUFFER_CONFIG *f = frames[alt_ref_index]; uint8_t *dst1, *dst2; #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(16, uint16_t, predictor16[BLK_PELS * 3]); DECLARE_ALIGNED(16, uint8_t, predictor8[BLK_PELS * 3]); uint8_t *predictor; #else DECLARE_ALIGNED(16, uint8_t, predictor[BLK_PELS * 3]); #endif const int mb_uv_height = BH >> mbd->plane[1].subsampling_y; const int mb_uv_width = BW >> mbd->plane[1].subsampling_x; // Addition of the tile col level offsets int mb_y_offset = mb_row * BH * (f->y_stride) + BW * mb_col_start; int mb_uv_offset = mb_row * mb_uv_height * f->uv_stride + mb_uv_width * mb_col_start; #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { predictor = CONVERT_TO_BYTEPTR(predictor16); } else { predictor = predictor8; } #endif // Source frames are extended to 16 pixels. This is different than // L/A/G reference frames that have a border of 32 (VP9ENCBORDERINPIXELS) // A 6/8 tap filter is used for motion search. This requires 2 pixels // before and 3 pixels after. So the largest Y mv on a border would // then be 16 - VP9_INTERP_EXTEND. The UV blocks are half the size of the // Y and therefore only extended by 8. The largest mv that a UV block // can support is 8 - VP9_INTERP_EXTEND. A UV mv is half of a Y mv. // (16 - VP9_INTERP_EXTEND) >> 1 which is greater than // 8 - VP9_INTERP_EXTEND. // To keep the mv in play for both Y and UV planes the max that it // can be on a border is therefore 16 - (2*VP9_INTERP_EXTEND+1). td->mb.mv_limits.row_min = -((mb_row * BH) + (17 - 2 * VP9_INTERP_EXTEND)); td->mb.mv_limits.row_max = ((mb_rows - 1 - mb_row) * BH) + (17 - 2 * VP9_INTERP_EXTEND); for (mb_col = mb_col_start; mb_col < mb_col_end; mb_col++) { int i, j, k; int stride; MV ref_mv; vp9_zero_array(accumulator, BLK_PELS * 3); vp9_zero_array(count, BLK_PELS * 3); td->mb.mv_limits.col_min = -((mb_col * BW) + (17 - 2 * VP9_INTERP_EXTEND)); td->mb.mv_limits.col_max = ((mb_cols - 1 - mb_col) * BW) + (17 - 2 * VP9_INTERP_EXTEND); if (cpi->oxcf.content == VP9E_CONTENT_FILM) { unsigned int src_variance; struct buf_2d src; src.buf = f->y_buffer + mb_y_offset; src.stride = f->y_stride; #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { src_variance = vp9_high_get_sby_perpixel_variance(cpi, &src, TF_BLOCK, mbd->bd); } else { src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); } #else src_variance = vp9_get_sby_perpixel_variance(cpi, &src, TF_BLOCK); #endif // CONFIG_VP9_HIGHBITDEPTH if (src_variance <= 2) { strength = VPXMAX(0, arnr_filter_data->strength - 2); } } for (frame = 0; frame < frame_count; frame++) { // MVs for 4 16x16 sub blocks. MV blk_mvs[4]; // Filter weights for 4 16x16 sub blocks. int blk_fw[4] = { 0, 0, 0, 0 }; int use_32x32 = 0; if (frames[frame] == NULL) continue; ref_mv.row = 0; ref_mv.col = 0; blk_mvs[0] = kZeroMv; blk_mvs[1] = kZeroMv; blk_mvs[2] = kZeroMv; blk_mvs[3] = kZeroMv; if (frame == alt_ref_index) { blk_fw[0] = blk_fw[1] = blk_fw[2] = blk_fw[3] = 2; use_32x32 = 1; } else { const int thresh_low = 10000; const int thresh_high = 20000; int blk_bestsme[4] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX }; // Find best match in this frame by MC int err = temporal_filter_find_matching_mb_c( cpi, td, frames[alt_ref_index]->y_buffer + mb_y_offset, frames[frame]->y_buffer + mb_y_offset, frames[frame]->y_stride, &ref_mv, blk_mvs, blk_bestsme); int err16 = blk_bestsme[0] + blk_bestsme[1] + blk_bestsme[2] + blk_bestsme[3]; int max_err = INT_MIN, min_err = INT_MAX; for (k = 0; k < 4; k++) { if (min_err > blk_bestsme[k]) min_err = blk_bestsme[k]; if (max_err < blk_bestsme[k]) max_err = blk_bestsme[k]; } if (((err * 15 < (err16 << 4)) && max_err - min_err < 10000) || ((err * 14 < (err16 << 4)) && max_err - min_err < 5000)) { use_32x32 = 1; // Assign higher weight to matching MB if it's error // score is lower. If not applying MC default behavior // is to weight all MBs equal. blk_fw[0] = err < (thresh_low << THR_SHIFT) ? 2 : err < (thresh_high << THR_SHIFT) ? 1 : 0; blk_fw[1] = blk_fw[2] = blk_fw[3] = blk_fw[0]; } else { use_32x32 = 0; for (k = 0; k < 4; k++) blk_fw[k] = blk_bestsme[k] < thresh_low ? 2 : blk_bestsme[k] < thresh_high ? 1 : 0; } for (k = 0; k < 4; k++) { switch (abs(frame - alt_ref_index)) { case 1: blk_fw[k] = VPXMIN(blk_fw[k], 2); break; case 2: case 3: blk_fw[k] = VPXMIN(blk_fw[k], 1); break; default: break; } } } if (blk_fw[0] | blk_fw[1] | blk_fw[2] | blk_fw[3]) { // Construct the predictors temporal_filter_predictors_mb_c( mbd, frames[frame]->y_buffer + mb_y_offset, frames[frame]->u_buffer + mb_uv_offset, frames[frame]->v_buffer + mb_uv_offset, frames[frame]->y_stride, mb_uv_width, mb_uv_height, ref_mv.row, ref_mv.col, predictor, scale, mb_col * BW, mb_row * BH, blk_mvs, use_32x32); #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { int adj_strength = strength + 2 * (mbd->bd - 8); // Apply the filter (YUV) vp9_highbd_apply_temporal_filter( CONVERT_TO_SHORTPTR(f->y_buffer + mb_y_offset), f->y_stride, CONVERT_TO_SHORTPTR(predictor), BW, CONVERT_TO_SHORTPTR(f->u_buffer + mb_uv_offset), CONVERT_TO_SHORTPTR(f->v_buffer + mb_uv_offset), f->uv_stride, CONVERT_TO_SHORTPTR(predictor + BLK_PELS), CONVERT_TO_SHORTPTR(predictor + (BLK_PELS << 1)), mb_uv_width, BW, BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, adj_strength, blk_fw, use_32x32, accumulator, count, accumulator + BLK_PELS, count + BLK_PELS, accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); } else { // Apply the filter (YUV) vp9_apply_temporal_filter( f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), mb_uv_width, BW, BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32, accumulator, count, accumulator + BLK_PELS, count + BLK_PELS, accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); } #else // Apply the filter (YUV) vp9_apply_temporal_filter( f->y_buffer + mb_y_offset, f->y_stride, predictor, BW, f->u_buffer + mb_uv_offset, f->v_buffer + mb_uv_offset, f->uv_stride, predictor + BLK_PELS, predictor + (BLK_PELS << 1), mb_uv_width, BW, BH, mbd->plane[1].subsampling_x, mbd->plane[1].subsampling_y, strength, blk_fw, use_32x32, accumulator, count, accumulator + BLK_PELS, count + BLK_PELS, accumulator + (BLK_PELS << 1), count + (BLK_PELS << 1)); #endif // CONFIG_VP9_HIGHBITDEPTH } } #if CONFIG_VP9_HIGHBITDEPTH if (mbd->cur_buf->flags & YV12_FLAG_HIGHBITDEPTH) { uint16_t *dst1_16; uint16_t *dst2_16; // Normalize filter output to produce AltRef frame dst1 = cpi->alt_ref_buffer.y_buffer; dst1_16 = CONVERT_TO_SHORTPTR(dst1); stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; for (i = 0, k = 0; i < BH; i++) { for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1_16[byte] = (uint16_t)pval; // move to next pixel byte++; } byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; dst1_16 = CONVERT_TO_SHORTPTR(dst1); dst2_16 = CONVERT_TO_SHORTPTR(dst2); stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1_16[byte] = (uint16_t)pval; // V pval = accumulator[m] + (count[m] >> 1); pval *= fixed_divide[count[m]]; pval >>= 19; dst2_16[byte] = (uint16_t)pval; // move to next pixel byte++; } byte += stride - mb_uv_width; } } else { // Normalize filter output to produce AltRef frame dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; for (i = 0, k = 0; i < BH; i++) { for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; // move to next pixel byte++; } byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; // V pval = accumulator[m] + (count[m] >> 1); pval *= fixed_divide[count[m]]; pval >>= 19; dst2[byte] = (uint8_t)pval; // move to next pixel byte++; } byte += stride - mb_uv_width; } } #else // Normalize filter output to produce AltRef frame dst1 = cpi->alt_ref_buffer.y_buffer; stride = cpi->alt_ref_buffer.y_stride; byte = mb_y_offset; for (i = 0, k = 0; i < BH; i++) { for (j = 0; j < BW; j++, k++) { unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; // move to next pixel byte++; } byte += stride - BW; } dst1 = cpi->alt_ref_buffer.u_buffer; dst2 = cpi->alt_ref_buffer.v_buffer; stride = cpi->alt_ref_buffer.uv_stride; byte = mb_uv_offset; for (i = 0, k = BLK_PELS; i < mb_uv_height; i++) { for (j = 0; j < mb_uv_width; j++, k++) { int m = k + BLK_PELS; // U unsigned int pval = accumulator[k] + (count[k] >> 1); pval *= fixed_divide[count[k]]; pval >>= 19; dst1[byte] = (uint8_t)pval; // V pval = accumulator[m] + (count[m] >> 1); pval *= fixed_divide[count[m]]; pval >>= 19; dst2[byte] = (uint8_t)pval; // move to next pixel byte++; } byte += stride - mb_uv_width; } #endif // CONFIG_VP9_HIGHBITDEPTH mb_y_offset += BW; mb_uv_offset += mb_uv_width; } } static void temporal_filter_iterate_tile_c(VP9_COMP *cpi, int tile_row, int tile_col) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; TileInfo *tile_info = &cpi->tile_data[tile_row * tile_cols + tile_col].tile_info; const int mb_row_start = (tile_info->mi_row_start) >> TF_SHIFT; const int mb_row_end = (tile_info->mi_row_end + TF_ROUND) >> TF_SHIFT; const int mb_col_start = (tile_info->mi_col_start) >> TF_SHIFT; const int mb_col_end = (tile_info->mi_col_end + TF_ROUND) >> TF_SHIFT; int mb_row; for (mb_row = mb_row_start; mb_row < mb_row_end; mb_row++) { vp9_temporal_filter_iterate_row_c(cpi, &cpi->td, mb_row, mb_col_start, mb_col_end); } } static void temporal_filter_iterate_c(VP9_COMP *cpi) { VP9_COMMON *const cm = &cpi->common; const int tile_cols = 1 << cm->log2_tile_cols; const int tile_rows = 1 << cm->log2_tile_rows; int tile_row, tile_col; vp9_init_tile_data(cpi); for (tile_row = 0; tile_row < tile_rows; ++tile_row) { for (tile_col = 0; tile_col < tile_cols; ++tile_col) { temporal_filter_iterate_tile_c(cpi, tile_row, tile_col); } } } // Apply buffer limits and context specific adjustments to arnr filter. static void adjust_arnr_filter(VP9_COMP *cpi, int distance, int group_boost, int *arnr_frames, int *arnr_strength) { const VP9EncoderConfig *const oxcf = &cpi->oxcf; const GF_GROUP *const gf_group = &cpi->twopass.gf_group; const int frames_after_arf = vp9_lookahead_depth(cpi->lookahead) - distance - 1; int frames_fwd = (cpi->oxcf.arnr_max_frames - 1) >> 1; int frames_bwd; int q, frames, base_strength, strength; // Context dependent two pass adjustment to strength. if (oxcf->pass == 2) { base_strength = oxcf->arnr_strength + cpi->twopass.arnr_strength_adjustment; // Clip to allowed range. base_strength = VPXMIN(6, VPXMAX(0, base_strength)); } else { base_strength = oxcf->arnr_strength; } // Define the forward and backwards filter limits for this arnr group. if (frames_fwd > frames_after_arf) frames_fwd = frames_after_arf; if (frames_fwd > distance) frames_fwd = distance; frames_bwd = frames_fwd; // For even length filter there is one more frame backward // than forward: e.g. len=6 ==> bbbAff, len=7 ==> bbbAfff. if (frames_bwd < distance) frames_bwd += (oxcf->arnr_max_frames + 1) & 0x1; // Set the baseline active filter size. frames = frames_bwd + 1 + frames_fwd; // Adjust the strength based on active max q. if (cpi->common.current_video_frame > 1) q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[INTER_FRAME], cpi->common.bit_depth)); else q = ((int)vp9_convert_qindex_to_q(cpi->rc.avg_frame_qindex[KEY_FRAME], cpi->common.bit_depth)); if (q > 16) { strength = base_strength; } else { strength = base_strength - ((16 - q) / 2); if (strength < 0) strength = 0; } // Adjust number of frames in filter and strength based on gf boost level. if (frames > group_boost / 150) { frames = group_boost / 150; frames += !(frames & 1); } if (strength > group_boost / 300) { strength = group_boost / 300; } // Adjustments for second level arf in multi arf case. // Leave commented out place holder for possible filtering adjustment with // new multi-layer arf code. // if (cpi->oxcf.pass == 2 && cpi->multi_arf_allowed) // if (gf_group->rf_level[gf_group->index] != GF_ARF_STD) strength >>= 1; // TODO(jingning): Skip temporal filtering for intermediate frames that will // be used as show_existing_frame. Need to further explore the possibility to // apply certain filter. if (gf_group->arf_src_offset[gf_group->index] < cpi->rc.baseline_gf_interval - 1) frames = 1; *arnr_frames = frames; *arnr_strength = strength; } void vp9_temporal_filter(VP9_COMP *cpi, int distance) { VP9_COMMON *const cm = &cpi->common; RATE_CONTROL *const rc = &cpi->rc; MACROBLOCKD *const xd = &cpi->td.mb.e_mbd; ARNRFilterData *arnr_filter_data = &cpi->arnr_filter_data; int frame; int frames_to_blur; int start_frame; int strength; int frames_to_blur_backward; int frames_to_blur_forward; struct scale_factors *sf = &arnr_filter_data->sf; YV12_BUFFER_CONFIG **frames = arnr_filter_data->frames; int rdmult; // Apply context specific adjustments to the arnr filter parameters. adjust_arnr_filter(cpi, distance, rc->gfu_boost, &frames_to_blur, &strength); frames_to_blur_backward = (frames_to_blur / 2); frames_to_blur_forward = ((frames_to_blur - 1) / 2); start_frame = distance + frames_to_blur_forward; arnr_filter_data->strength = strength; arnr_filter_data->frame_count = frames_to_blur; arnr_filter_data->alt_ref_index = frames_to_blur_backward; // Setup frame pointers, NULL indicates frame not included in filter. for (frame = 0; frame < frames_to_blur; ++frame) { const int which_buffer = start_frame - frame; struct lookahead_entry *buf = vp9_lookahead_peek(cpi->lookahead, which_buffer); frames[frames_to_blur - 1 - frame] = &buf->img; } if (frames_to_blur > 0) { // Setup scaling factors. Scaling on each of the arnr frames is not // supported. if (cpi->use_svc) { // In spatial svc the scaling factors might be less then 1/2. // So we will use non-normative scaling. int frame_used = 0; #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame( sf, get_frame_new_buffer(cm)->y_crop_width, get_frame_new_buffer(cm)->y_crop_height, get_frame_new_buffer(cm)->y_crop_width, get_frame_new_buffer(cm)->y_crop_height, cm->use_highbitdepth); #else vp9_setup_scale_factors_for_frame( sf, get_frame_new_buffer(cm)->y_crop_width, get_frame_new_buffer(cm)->y_crop_height, get_frame_new_buffer(cm)->y_crop_width, get_frame_new_buffer(cm)->y_crop_height); #endif // CONFIG_VP9_HIGHBITDEPTH for (frame = 0; frame < frames_to_blur; ++frame) { if (cm->mi_cols * MI_SIZE != frames[frame]->y_width || cm->mi_rows * MI_SIZE != frames[frame]->y_height) { if (vpx_realloc_frame_buffer(&cpi->svc.scaled_frames[frame_used], cm->width, cm->height, cm->subsampling_x, cm->subsampling_y, #if CONFIG_VP9_HIGHBITDEPTH cm->use_highbitdepth, #endif VP9_ENC_BORDER_IN_PIXELS, cm->byte_alignment, NULL, NULL, NULL)) { vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to reallocate alt_ref_buffer"); } frames[frame] = vp9_scale_if_required( cm, frames[frame], &cpi->svc.scaled_frames[frame_used], 0, EIGHTTAP, 0); ++frame_used; } } cm->mi = cm->mip + cm->mi_stride + 1; xd->mi = cm->mi_grid_visible; xd->mi[0] = cm->mi; } else { // ARF is produced at the native frame size and resized when coded. #if CONFIG_VP9_HIGHBITDEPTH vp9_setup_scale_factors_for_frame( sf, frames[0]->y_crop_width, frames[0]->y_crop_height, frames[0]->y_crop_width, frames[0]->y_crop_height, cm->use_highbitdepth); #else vp9_setup_scale_factors_for_frame( sf, frames[0]->y_crop_width, frames[0]->y_crop_height, frames[0]->y_crop_width, frames[0]->y_crop_height); #endif // CONFIG_VP9_HIGHBITDEPTH } } // Initialize errorperbit and sabperbit. rdmult = vp9_compute_rd_mult_based_on_qindex(cpi, ARNR_FILT_QINDEX); set_error_per_bit(&cpi->td.mb, rdmult); vp9_initialize_me_consts(cpi, &cpi->td.mb, ARNR_FILT_QINDEX); if (!cpi->row_mt) temporal_filter_iterate_c(cpi); else vp9_temporal_filter_row_mt(cpi); } libvpx-1.8.2/vp9/encoder/vp9_temporal_filter.h000066400000000000000000000024411357355204000213270ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ #define VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ #ifdef __cplusplus extern "C" { #endif #define ARNR_FILT_QINDEX 128 static const MV kZeroMv = { 0, 0 }; // Block size used in temporal filtering #define TF_BLOCK BLOCK_32X32 #define BH 32 #define BH_LOG2 5 #define BW 32 #define BW_LOG2 5 #define BLK_PELS ((BH) * (BW)) // Pixels in the block #define TF_SHIFT 2 #define TF_ROUND 3 #define THR_SHIFT 2 #define TF_SUB_BLOCK BLOCK_16X16 #define SUB_BH 16 #define SUB_BW 16 void vp9_temporal_filter_init(void); void vp9_temporal_filter(VP9_COMP *cpi, int distance); void vp9_temporal_filter_iterate_row_c(VP9_COMP *cpi, ThreadData *td, int mb_row, int mb_col_start, int mb_col_end); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_TEMPORAL_FILTER_H_ libvpx-1.8.2/vp9/encoder/vp9_tokenize.c000066400000000000000000000615111357355204000177650ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include "vpx_mem/vpx_mem.h" #include "vp9/common/vp9_entropy.h" #include "vp9/common/vp9_pred_common.h" #include "vp9/common/vp9_scan.h" #include "vp9/encoder/vp9_cost.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_tokenize.h" static const TOKENVALUE dct_cat_lt_10_value_tokens[] = { { 9, 63 }, { 9, 61 }, { 9, 59 }, { 9, 57 }, { 9, 55 }, { 9, 53 }, { 9, 51 }, { 9, 49 }, { 9, 47 }, { 9, 45 }, { 9, 43 }, { 9, 41 }, { 9, 39 }, { 9, 37 }, { 9, 35 }, { 9, 33 }, { 9, 31 }, { 9, 29 }, { 9, 27 }, { 9, 25 }, { 9, 23 }, { 9, 21 }, { 9, 19 }, { 9, 17 }, { 9, 15 }, { 9, 13 }, { 9, 11 }, { 9, 9 }, { 9, 7 }, { 9, 5 }, { 9, 3 }, { 9, 1 }, { 8, 31 }, { 8, 29 }, { 8, 27 }, { 8, 25 }, { 8, 23 }, { 8, 21 }, { 8, 19 }, { 8, 17 }, { 8, 15 }, { 8, 13 }, { 8, 11 }, { 8, 9 }, { 8, 7 }, { 8, 5 }, { 8, 3 }, { 8, 1 }, { 7, 15 }, { 7, 13 }, { 7, 11 }, { 7, 9 }, { 7, 7 }, { 7, 5 }, { 7, 3 }, { 7, 1 }, { 6, 7 }, { 6, 5 }, { 6, 3 }, { 6, 1 }, { 5, 3 }, { 5, 1 }, { 4, 1 }, { 3, 1 }, { 2, 1 }, { 1, 1 }, { 0, 0 }, { 1, 0 }, { 2, 0 }, { 3, 0 }, { 4, 0 }, { 5, 0 }, { 5, 2 }, { 6, 0 }, { 6, 2 }, { 6, 4 }, { 6, 6 }, { 7, 0 }, { 7, 2 }, { 7, 4 }, { 7, 6 }, { 7, 8 }, { 7, 10 }, { 7, 12 }, { 7, 14 }, { 8, 0 }, { 8, 2 }, { 8, 4 }, { 8, 6 }, { 8, 8 }, { 8, 10 }, { 8, 12 }, { 8, 14 }, { 8, 16 }, { 8, 18 }, { 8, 20 }, { 8, 22 }, { 8, 24 }, { 8, 26 }, { 8, 28 }, { 8, 30 }, { 9, 0 }, { 9, 2 }, { 9, 4 }, { 9, 6 }, { 9, 8 }, { 9, 10 }, { 9, 12 }, { 9, 14 }, { 9, 16 }, { 9, 18 }, { 9, 20 }, { 9, 22 }, { 9, 24 }, { 9, 26 }, { 9, 28 }, { 9, 30 }, { 9, 32 }, { 9, 34 }, { 9, 36 }, { 9, 38 }, { 9, 40 }, { 9, 42 }, { 9, 44 }, { 9, 46 }, { 9, 48 }, { 9, 50 }, { 9, 52 }, { 9, 54 }, { 9, 56 }, { 9, 58 }, { 9, 60 }, { 9, 62 } }; const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens = dct_cat_lt_10_value_tokens + (sizeof(dct_cat_lt_10_value_tokens) / sizeof(*dct_cat_lt_10_value_tokens)) / 2; // The corresponding costs of the extrabits for the tokens in the above table // are stored in the table below. The values are obtained from looking up the // entry for the specified extrabits in the table corresponding to the token // (as defined in cost element vp9_extra_bits) // e.g. {9, 63} maps to cat5_cost[63 >> 1], {1, 1} maps to sign_cost[1 >> 1] static const int dct_cat_lt_10_value_cost[] = { 3773, 3750, 3704, 3681, 3623, 3600, 3554, 3531, 3432, 3409, 3363, 3340, 3282, 3259, 3213, 3190, 3136, 3113, 3067, 3044, 2986, 2963, 2917, 2894, 2795, 2772, 2726, 2703, 2645, 2622, 2576, 2553, 3197, 3116, 3058, 2977, 2881, 2800, 2742, 2661, 2615, 2534, 2476, 2395, 2299, 2218, 2160, 2079, 2566, 2427, 2334, 2195, 2023, 1884, 1791, 1652, 1893, 1696, 1453, 1256, 1229, 864, 512, 512, 512, 512, 0, 512, 512, 512, 512, 864, 1229, 1256, 1453, 1696, 1893, 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566, 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615, 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197, 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773, }; const int *vp9_dct_cat_lt_10_value_cost = dct_cat_lt_10_value_cost + (sizeof(dct_cat_lt_10_value_cost) / sizeof(*dct_cat_lt_10_value_cost)) / 2; // Array indices are identical to previously-existing CONTEXT_NODE indices /* clang-format off */ const vpx_tree_index vp9_coef_tree[TREE_SIZE(ENTROPY_TOKENS)] = { -EOB_TOKEN, 2, // 0 = EOB -ZERO_TOKEN, 4, // 1 = ZERO -ONE_TOKEN, 6, // 2 = ONE 8, 12, // 3 = LOW_VAL -TWO_TOKEN, 10, // 4 = TWO -THREE_TOKEN, -FOUR_TOKEN, // 5 = THREE 14, 16, // 6 = HIGH_LOW -CATEGORY1_TOKEN, -CATEGORY2_TOKEN, // 7 = CAT_ONE 18, 20, // 8 = CAT_THREEFOUR -CATEGORY3_TOKEN, -CATEGORY4_TOKEN, // 9 = CAT_THREE -CATEGORY5_TOKEN, -CATEGORY6_TOKEN // 10 = CAT_FIVE }; /* clang-format on */ static const int16_t zero_cost[] = { 0 }; static const int16_t sign_cost[1] = { 512 }; static const int16_t cat1_cost[1 << 1] = { 864, 1229 }; static const int16_t cat2_cost[1 << 2] = { 1256, 1453, 1696, 1893 }; static const int16_t cat3_cost[1 << 3] = { 1652, 1791, 1884, 2023, 2195, 2334, 2427, 2566 }; static const int16_t cat4_cost[1 << 4] = { 2079, 2160, 2218, 2299, 2395, 2476, 2534, 2615, 2661, 2742, 2800, 2881, 2977, 3058, 3116, 3197 }; static const int16_t cat5_cost[1 << 5] = { 2553, 2576, 2622, 2645, 2703, 2726, 2772, 2795, 2894, 2917, 2963, 2986, 3044, 3067, 3113, 3136, 3190, 3213, 3259, 3282, 3340, 3363, 3409, 3432, 3531, 3554, 3600, 3623, 3681, 3704, 3750, 3773 }; const int16_t vp9_cat6_low_cost[256] = { 3378, 3390, 3401, 3413, 3435, 3447, 3458, 3470, 3517, 3529, 3540, 3552, 3574, 3586, 3597, 3609, 3671, 3683, 3694, 3706, 3728, 3740, 3751, 3763, 3810, 3822, 3833, 3845, 3867, 3879, 3890, 3902, 3973, 3985, 3996, 4008, 4030, 4042, 4053, 4065, 4112, 4124, 4135, 4147, 4169, 4181, 4192, 4204, 4266, 4278, 4289, 4301, 4323, 4335, 4346, 4358, 4405, 4417, 4428, 4440, 4462, 4474, 4485, 4497, 4253, 4265, 4276, 4288, 4310, 4322, 4333, 4345, 4392, 4404, 4415, 4427, 4449, 4461, 4472, 4484, 4546, 4558, 4569, 4581, 4603, 4615, 4626, 4638, 4685, 4697, 4708, 4720, 4742, 4754, 4765, 4777, 4848, 4860, 4871, 4883, 4905, 4917, 4928, 4940, 4987, 4999, 5010, 5022, 5044, 5056, 5067, 5079, 5141, 5153, 5164, 5176, 5198, 5210, 5221, 5233, 5280, 5292, 5303, 5315, 5337, 5349, 5360, 5372, 4988, 5000, 5011, 5023, 5045, 5057, 5068, 5080, 5127, 5139, 5150, 5162, 5184, 5196, 5207, 5219, 5281, 5293, 5304, 5316, 5338, 5350, 5361, 5373, 5420, 5432, 5443, 5455, 5477, 5489, 5500, 5512, 5583, 5595, 5606, 5618, 5640, 5652, 5663, 5675, 5722, 5734, 5745, 5757, 5779, 5791, 5802, 5814, 5876, 5888, 5899, 5911, 5933, 5945, 5956, 5968, 6015, 6027, 6038, 6050, 6072, 6084, 6095, 6107, 5863, 5875, 5886, 5898, 5920, 5932, 5943, 5955, 6002, 6014, 6025, 6037, 6059, 6071, 6082, 6094, 6156, 6168, 6179, 6191, 6213, 6225, 6236, 6248, 6295, 6307, 6318, 6330, 6352, 6364, 6375, 6387, 6458, 6470, 6481, 6493, 6515, 6527, 6538, 6550, 6597, 6609, 6620, 6632, 6654, 6666, 6677, 6689, 6751, 6763, 6774, 6786, 6808, 6820, 6831, 6843, 6890, 6902, 6913, 6925, 6947, 6959, 6970, 6982 }; const uint16_t vp9_cat6_high_cost[64] = { 88, 2251, 2727, 4890, 3148, 5311, 5787, 7950, 3666, 5829, 6305, 8468, 6726, 8889, 9365, 11528, 3666, 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 3666, 5829, 6305, 8468, 6726, 8889, 9365, 11528, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 7244, 9407, 9883, 12046, 10304, 12467, 12943, 15106, 10822, 12985, 13461, 15624, 13882, 16045, 16521, 18684 }; #if CONFIG_VP9_HIGHBITDEPTH const uint16_t vp9_cat6_high10_high_cost[256] = { 94, 2257, 2733, 4896, 3154, 5317, 5793, 7956, 3672, 5835, 6311, 8474, 6732, 8895, 9371, 11534, 3672, 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 3672, 5835, 6311, 8474, 6732, 8895, 9371, 11534, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 7250, 9413, 9889, 12052, 10310, 12473, 12949, 15112, 10828, 12991, 13467, 15630, 13888, 16051, 16527, 18690, 4187, 6350, 6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560, 19723, 17981, 20144, 20620, 22783, 4187, 6350, 6826, 8989, 7247, 9410, 9886, 12049, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 7765, 9928, 10404, 12567, 10825, 12988, 13464, 15627, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 11343, 13506, 13982, 16145, 14403, 16566, 17042, 19205, 14921, 17084, 17560, 19723, 17981, 20144, 20620, 22783, 8280, 10443, 10919, 13082, 11340, 13503, 13979, 16142, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 11858, 14021, 14497, 16660, 14918, 17081, 17557, 19720, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 15436, 17599, 18075, 20238, 18496, 20659, 21135, 23298, 19014, 21177, 21653, 23816, 22074, 24237, 24713, 26876 }; const uint16_t vp9_cat6_high12_high_cost[1024] = { 100, 2263, 2739, 4902, 3160, 5323, 5799, 7962, 3678, 5841, 6317, 8480, 6738, 8901, 9377, 11540, 3678, 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 3678, 5841, 6317, 8480, 6738, 8901, 9377, 11540, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 7256, 9419, 9895, 12058, 10316, 12479, 12955, 15118, 10834, 12997, 13473, 15636, 13894, 16057, 16533, 18696, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626, 22789, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 4193, 6356, 6832, 8995, 7253, 9416, 9892, 12055, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 7771, 9934, 10410, 12573, 10831, 12994, 13470, 15633, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 11349, 13512, 13988, 16151, 14409, 16572, 17048, 19211, 14927, 17090, 17566, 19729, 17987, 20150, 20626, 22789, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 8286, 10449, 10925, 13088, 11346, 13509, 13985, 16148, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 11864, 14027, 14503, 16666, 14924, 17087, 17563, 19726, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 15442, 17605, 18081, 20244, 18502, 20665, 21141, 23304, 19020, 21183, 21659, 23822, 22080, 24243, 24719, 26882, 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 12379, 14542, 15018, 17181, 15439, 17602, 18078, 20241, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 15957, 18120, 18596, 20759, 19017, 21180, 21656, 23819, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 19535, 21698, 22174, 24337, 22595, 24758, 25234, 27397, 23113, 25276, 25752, 27915, 26173, 28336, 28812, 30975, 16472, 18635, 19111, 21274, 19532, 21695, 22171, 24334, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851, 29327, 31490, 20050, 22213, 22689, 24852, 23110, 25273, 25749, 27912, 23628, 25791, 26267, 28430, 26688, 28851, 29327, 31490, 23628, 25791, 26267, 28430, 26688, 28851, 29327, 31490, 27206, 29369, 29845, 32008, 30266, 32429, 32905, 35068 }; #endif const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS] = { { 0, 0, 0, zero_cost }, // ZERO_TOKEN { 0, 0, 1, sign_cost }, // ONE_TOKEN { 0, 0, 2, sign_cost }, // TWO_TOKEN { 0, 0, 3, sign_cost }, // THREE_TOKEN { 0, 0, 4, sign_cost }, // FOUR_TOKEN { vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CATEGORY1_TOKEN { vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CATEGORY2_TOKEN { vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CATEGORY3_TOKEN { vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CATEGORY4_TOKEN { vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CATEGORY5_TOKEN { vp9_cat6_prob, 14, CAT6_MIN_VAL, 0 }, // CATEGORY6_TOKEN { 0, 0, 0, zero_cost } // EOB_TOKEN }; #if CONFIG_VP9_HIGHBITDEPTH const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS] = { { 0, 0, 0, zero_cost }, // ZERO { 0, 0, 1, sign_cost }, // ONE { 0, 0, 2, sign_cost }, // TWO { 0, 0, 3, sign_cost }, // THREE { 0, 0, 4, sign_cost }, // FOUR { vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1 { vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2 { vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3 { vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4 { vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5 { vp9_cat6_prob_high12 + 2, 16, CAT6_MIN_VAL, 0 }, // CAT6 { 0, 0, 0, zero_cost } // EOB }; const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS] = { { 0, 0, 0, zero_cost }, // ZERO { 0, 0, 1, sign_cost }, // ONE { 0, 0, 2, sign_cost }, // TWO { 0, 0, 3, sign_cost }, // THREE { 0, 0, 4, sign_cost }, // FOUR { vp9_cat1_prob, 1, CAT1_MIN_VAL, cat1_cost }, // CAT1 { vp9_cat2_prob, 2, CAT2_MIN_VAL, cat2_cost }, // CAT2 { vp9_cat3_prob, 3, CAT3_MIN_VAL, cat3_cost }, // CAT3 { vp9_cat4_prob, 4, CAT4_MIN_VAL, cat4_cost }, // CAT4 { vp9_cat5_prob, 5, CAT5_MIN_VAL, cat5_cost }, // CAT5 { vp9_cat6_prob_high12, 18, CAT6_MIN_VAL, 0 }, // CAT6 { 0, 0, 0, zero_cost } // EOB }; #endif const struct vp9_token vp9_coef_encodings[ENTROPY_TOKENS] = { { 2, 2 }, { 6, 3 }, { 28, 5 }, { 58, 6 }, { 59, 6 }, { 60, 6 }, { 61, 6 }, { 124, 7 }, { 125, 7 }, { 126, 7 }, { 127, 7 }, { 0, 1 } }; struct tokenize_b_args { VP9_COMP *cpi; ThreadData *td; TOKENEXTRA **tp; }; static void set_entropy_context_b(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args *const args = arg; ThreadData *const td = args->td; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; vp9_set_contexts(xd, pd, plane_bsize, tx_size, p->eobs[block] > 0, col, row); } static INLINE void add_token(TOKENEXTRA **t, const vpx_prob *context_tree, int16_t token, EXTRABIT extra, unsigned int *counts) { (*t)->context_tree = context_tree; (*t)->token = token; (*t)->extra = extra; (*t)++; ++counts[token]; } static INLINE void add_token_no_extra(TOKENEXTRA **t, const vpx_prob *context_tree, int16_t token, unsigned int *counts) { (*t)->context_tree = context_tree; (*t)->token = token; (*t)++; ++counts[token]; } static void tokenize_b(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *arg) { struct tokenize_b_args *const args = arg; VP9_COMP *cpi = args->cpi; ThreadData *const td = args->td; MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; TOKENEXTRA **tp = args->tp; uint8_t token_cache[32 * 32]; struct macroblock_plane *p = &x->plane[plane]; struct macroblockd_plane *pd = &xd->plane[plane]; MODE_INFO *mi = xd->mi[0]; int pt; /* near block/prev token context index */ int c; TOKENEXTRA *t = *tp; /* store tokens starting here */ int eob = p->eobs[block]; const PLANE_TYPE type = get_plane_type(plane); const tran_low_t *qcoeff = BLOCK_OFFSET(p->qcoeff, block); const int16_t *scan, *nb; const scan_order *so; const int ref = is_inter_block(mi); unsigned int(*const counts)[COEFF_CONTEXTS][ENTROPY_TOKENS] = td->rd_counts.coef_counts[tx_size][type][ref]; vpx_prob(*const coef_probs)[COEFF_CONTEXTS][UNCONSTRAINED_NODES] = cpi->common.fc->coef_probs[tx_size][type][ref]; unsigned int(*const eob_branch)[COEFF_CONTEXTS] = td->counts->eob_branch[tx_size][type][ref]; const uint8_t *const band = get_band_translate(tx_size); const int tx_eob = 16 << (tx_size << 1); int16_t token; EXTRABIT extra; pt = get_entropy_context(tx_size, pd->above_context + col, pd->left_context + row); so = get_scan(xd, tx_size, type, block); scan = so->scan; nb = so->neighbors; c = 0; while (c < eob) { int v = 0; v = qcoeff[scan[c]]; ++eob_branch[band[c]][pt]; while (!v) { add_token_no_extra(&t, coef_probs[band[c]][pt], ZERO_TOKEN, counts[band[c]][pt]); token_cache[scan[c]] = 0; ++c; pt = get_coef_context(nb, token_cache, c); v = qcoeff[scan[c]]; } vp9_get_token_extra(v, &token, &extra); add_token(&t, coef_probs[band[c]][pt], token, extra, counts[band[c]][pt]); token_cache[scan[c]] = vp9_pt_energy_class[token]; ++c; pt = get_coef_context(nb, token_cache, c); } if (c < tx_eob) { ++eob_branch[band[c]][pt]; add_token_no_extra(&t, coef_probs[band[c]][pt], EOB_TOKEN, counts[band[c]][pt]); } *tp = t; vp9_set_contexts(xd, pd, plane_bsize, tx_size, c > 0, col, row); } struct is_skippable_args { uint16_t *eobs; int *skippable; }; static void is_skippable(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; (void)plane; (void)plane_bsize; (void)tx_size; (void)row; (void)col; args->skippable[0] &= (!args->eobs[block]); } // TODO(yaowu): rewrite and optimize this function to remove the usage of // vp9_foreach_transform_block() and simplify is_skippable(). int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { int result = 1; struct is_skippable_args args = { x->plane[plane].eobs, &result }; vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, is_skippable, &args); return result; } static void has_high_freq_coeff(int plane, int block, int row, int col, BLOCK_SIZE plane_bsize, TX_SIZE tx_size, void *argv) { struct is_skippable_args *args = argv; int eobs = (tx_size == TX_4X4) ? 3 : 10; (void)plane; (void)plane_bsize; (void)row; (void)col; *(args->skippable) |= (args->eobs[block] > eobs); } int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane) { int result = 0; struct is_skippable_args args = { x->plane[plane].eobs, &result }; vp9_foreach_transformed_block_in_plane(&x->e_mbd, bsize, plane, has_high_freq_coeff, &args); return result; } void vp9_tokenize_sb(VP9_COMP *cpi, ThreadData *td, TOKENEXTRA **t, int dry_run, int seg_skip, BLOCK_SIZE bsize) { MACROBLOCK *const x = &td->mb; MACROBLOCKD *const xd = &x->e_mbd; MODE_INFO *const mi = xd->mi[0]; const int ctx = vp9_get_skip_context(xd); struct tokenize_b_args arg = { cpi, td, t }; if (seg_skip) { assert(mi->skip); } if (mi->skip) { if (!dry_run && !seg_skip) ++td->counts->skip[ctx][1]; reset_skip_context(xd, bsize); return; } if (!dry_run) { ++td->counts->skip[ctx][0]; vp9_foreach_transformed_block(xd, bsize, tokenize_b, &arg); } else { vp9_foreach_transformed_block(xd, bsize, set_entropy_context_b, &arg); } } libvpx-1.8.2/vp9/encoder/vp9_tokenize.h000066400000000000000000000076521357355204000200000ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_TOKENIZE_H_ #define VPX_VP9_ENCODER_VP9_TOKENIZE_H_ #include "vp9/common/vp9_entropy.h" #include "vp9/encoder/vp9_block.h" #include "vp9/encoder/vp9_treewriter.h" #ifdef __cplusplus extern "C" { #endif #define EOSB_TOKEN 127 // Not signalled, encoder only #if CONFIG_VP9_HIGHBITDEPTH typedef int32_t EXTRABIT; #else typedef int16_t EXTRABIT; #endif typedef struct { int16_t token; EXTRABIT extra; } TOKENVALUE; typedef struct { const vpx_prob *context_tree; int16_t token; EXTRABIT extra; } TOKENEXTRA; extern const vpx_tree_index vp9_coef_tree[]; extern const vpx_tree_index vp9_coef_con_tree[]; extern const struct vp9_token vp9_coef_encodings[]; int vp9_is_skippable_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); int vp9_has_high_freq_in_plane(MACROBLOCK *x, BLOCK_SIZE bsize, int plane); struct VP9_COMP; struct ThreadData; void vp9_tokenize_sb(struct VP9_COMP *cpi, struct ThreadData *td, TOKENEXTRA **t, int dry_run, int seg_skip, BLOCK_SIZE bsize); typedef struct { const vpx_prob *prob; int len; int base_val; const int16_t *cost; } vp9_extra_bit; // indexed by token value extern const vp9_extra_bit vp9_extra_bits[ENTROPY_TOKENS]; #if CONFIG_VP9_HIGHBITDEPTH extern const vp9_extra_bit vp9_extra_bits_high10[ENTROPY_TOKENS]; extern const vp9_extra_bit vp9_extra_bits_high12[ENTROPY_TOKENS]; #endif // CONFIG_VP9_HIGHBITDEPTH extern const int16_t *vp9_dct_value_cost_ptr; /* TODO: The Token field should be broken out into a separate char array to * improve cache locality, since it's needed for costing when the rest of the * fields are not. */ extern const TOKENVALUE *vp9_dct_value_tokens_ptr; extern const TOKENVALUE *vp9_dct_cat_lt_10_value_tokens; extern const int *vp9_dct_cat_lt_10_value_cost; extern const int16_t vp9_cat6_low_cost[256]; extern const uint16_t vp9_cat6_high_cost[64]; extern const uint16_t vp9_cat6_high10_high_cost[256]; extern const uint16_t vp9_cat6_high12_high_cost[1024]; #if CONFIG_VP9_HIGHBITDEPTH static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { return bit_depth == 8 ? vp9_cat6_high_cost : (bit_depth == 10 ? vp9_cat6_high10_high_cost : vp9_cat6_high12_high_cost); } #else static INLINE const uint16_t *vp9_get_high_cost_table(int bit_depth) { (void)bit_depth; return vp9_cat6_high_cost; } #endif // CONFIG_VP9_HIGHBITDEPTH static INLINE void vp9_get_token_extra(int v, int16_t *token, EXTRABIT *extra) { if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { *token = CATEGORY6_TOKEN; if (v >= CAT6_MIN_VAL) *extra = 2 * v - 2 * CAT6_MIN_VAL; else *extra = -2 * v - 2 * CAT6_MIN_VAL + 1; return; } *token = vp9_dct_cat_lt_10_value_tokens[v].token; *extra = vp9_dct_cat_lt_10_value_tokens[v].extra; } static INLINE int16_t vp9_get_token(int v) { if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) return 10; return vp9_dct_cat_lt_10_value_tokens[v].token; } static INLINE int vp9_get_token_cost(int v, int16_t *token, const uint16_t *cat6_high_table) { if (v >= CAT6_MIN_VAL || v <= -CAT6_MIN_VAL) { EXTRABIT extrabits; *token = CATEGORY6_TOKEN; extrabits = abs(v) - CAT6_MIN_VAL; return vp9_cat6_low_cost[extrabits & 0xff] + cat6_high_table[extrabits >> 8]; } *token = vp9_dct_cat_lt_10_value_tokens[v].token; return vp9_dct_cat_lt_10_value_cost[v]; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_TOKENIZE_H_ libvpx-1.8.2/vp9/encoder/vp9_treewriter.c000066400000000000000000000034321357355204000203270ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vp9/encoder/vp9_treewriter.h" static void tree2tok(struct vp9_token *tokens, const vpx_tree_index *tree, int i, int v, int l) { v += v; ++l; do { const vpx_tree_index j = tree[i++]; if (j <= 0) { tokens[-j].value = v; tokens[-j].len = l; } else { tree2tok(tokens, tree, j, v, l); } } while (++v & 1); } void vp9_tokens_from_tree(struct vp9_token *tokens, const vpx_tree_index *tree) { tree2tok(tokens, tree, 0, 0, 0); } static unsigned int convert_distribution(unsigned int i, vpx_tree tree, unsigned int branch_ct[][2], const unsigned int num_events[]) { unsigned int left, right; if (tree[i] <= 0) left = num_events[-tree[i]]; else left = convert_distribution(tree[i], tree, branch_ct, num_events); if (tree[i + 1] <= 0) right = num_events[-tree[i + 1]]; else right = convert_distribution(tree[i + 1], tree, branch_ct, num_events); branch_ct[i >> 1][0] = left; branch_ct[i >> 1][1] = right; return left + right; } void vp9_tree_probs_from_distribution(vpx_tree tree, unsigned int branch_ct[/* n-1 */][2], const unsigned int num_events[/* n */]) { convert_distribution(0, tree, branch_ct, num_events); } libvpx-1.8.2/vp9/encoder/vp9_treewriter.h000066400000000000000000000030721357355204000203340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_VP9_TREEWRITER_H_ #define VPX_VP9_ENCODER_VP9_TREEWRITER_H_ #include "vpx_dsp/bitwriter.h" #ifdef __cplusplus extern "C" { #endif void vp9_tree_probs_from_distribution(vpx_tree tree, unsigned int branch_ct[/* n - 1 */][2], const unsigned int num_events[/* n */]); struct vp9_token { int value; int len; }; void vp9_tokens_from_tree(struct vp9_token *, const vpx_tree_index *); static INLINE void vp9_write_tree(vpx_writer *w, const vpx_tree_index *tree, const vpx_prob *probs, int bits, int len, vpx_tree_index i) { do { const int bit = (bits >> --len) & 1; vpx_write(w, bit, probs[i >> 1]); i = tree[i + bit]; } while (len); } static INLINE void vp9_write_token(vpx_writer *w, const vpx_tree_index *tree, const vpx_prob *probs, const struct vp9_token *token) { vp9_write_tree(w, tree, probs, token->value, token->len, 0); } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_ENCODER_VP9_TREEWRITER_H_ libvpx-1.8.2/vp9/encoder/x86/000077500000000000000000000000001357355204000156145ustar00rootroot00000000000000libvpx-1.8.2/vp9/encoder/x86/highbd_temporal_filter_sse4.c000066400000000000000000001134111357355204000234140ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vp9/encoder/x86/temporal_filter_constants.h" // Compute (a-b)**2 for 8 pixels with size 16-bit static INLINE void highbd_store_dist_8(const uint16_t *a, const uint16_t *b, uint32_t *dst) { const __m128i zero = _mm_setzero_si128(); const __m128i a_reg = _mm_loadu_si128((const __m128i *)a); const __m128i b_reg = _mm_loadu_si128((const __m128i *)b); const __m128i a_first = _mm_cvtepu16_epi32(a_reg); const __m128i a_second = _mm_unpackhi_epi16(a_reg, zero); const __m128i b_first = _mm_cvtepu16_epi32(b_reg); const __m128i b_second = _mm_unpackhi_epi16(b_reg, zero); __m128i dist_first, dist_second; dist_first = _mm_sub_epi32(a_first, b_first); dist_second = _mm_sub_epi32(a_second, b_second); dist_first = _mm_mullo_epi32(dist_first, dist_first); dist_second = _mm_mullo_epi32(dist_second, dist_second); _mm_storeu_si128((__m128i *)dst, dist_first); _mm_storeu_si128((__m128i *)(dst + 4), dist_second); } // Sum up three neighboring distortions for the pixels static INLINE void highbd_get_sum_4(const uint32_t *dist, __m128i *sum) { __m128i dist_reg, dist_left, dist_right; dist_reg = _mm_loadu_si128((const __m128i *)dist); dist_left = _mm_loadu_si128((const __m128i *)(dist - 1)); dist_right = _mm_loadu_si128((const __m128i *)(dist + 1)); *sum = _mm_add_epi32(dist_reg, dist_left); *sum = _mm_add_epi32(*sum, dist_right); } static INLINE void highbd_get_sum_8(const uint32_t *dist, __m128i *sum_first, __m128i *sum_second) { highbd_get_sum_4(dist, sum_first); highbd_get_sum_4(dist + 4, sum_second); } // Average the value based on the number of values summed (9 for pixels away // from the border, 4 for pixels in corners, and 6 for other edge values, plus // however many values from y/uv plane are). // // Add in the rounding factor and shift, clamp to 16, invert and shift. Multiply // by weight. static INLINE void highbd_average_4(__m128i *output, const __m128i *sum, const __m128i *mul_constants, const int strength, const int rounding, const int weight) { // _mm_srl_epi16 uses the lower 64 bit value for the shift. const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); const __m128i rounding_u32 = _mm_set1_epi32(rounding); const __m128i weight_u32 = _mm_set1_epi32(weight); const __m128i sixteen = _mm_set1_epi32(16); const __m128i zero = _mm_setzero_si128(); // modifier * 3 / index; const __m128i sum_lo = _mm_unpacklo_epi32(*sum, zero); const __m128i sum_hi = _mm_unpackhi_epi32(*sum, zero); const __m128i const_lo = _mm_unpacklo_epi32(*mul_constants, zero); const __m128i const_hi = _mm_unpackhi_epi32(*mul_constants, zero); const __m128i mul_lo = _mm_mul_epu32(sum_lo, const_lo); const __m128i mul_lo_div = _mm_srli_epi64(mul_lo, 32); const __m128i mul_hi = _mm_mul_epu32(sum_hi, const_hi); const __m128i mul_hi_div = _mm_srli_epi64(mul_hi, 32); // Now we have // mul_lo: 00 a1 00 a0 // mul_hi: 00 a3 00 a2 // Unpack as 64 bit words to get even and odd elements // unpack_lo: 00 a2 00 a0 // unpack_hi: 00 a3 00 a1 // Then we can shift and OR the results to get everything in 32-bits const __m128i mul_even = _mm_unpacklo_epi64(mul_lo_div, mul_hi_div); const __m128i mul_odd = _mm_unpackhi_epi64(mul_lo_div, mul_hi_div); const __m128i mul_odd_shift = _mm_slli_si128(mul_odd, 4); const __m128i mul = _mm_or_si128(mul_even, mul_odd_shift); // Round *output = _mm_add_epi32(mul, rounding_u32); *output = _mm_srl_epi32(*output, strength_u128); // Multiply with the weight *output = _mm_min_epu32(*output, sixteen); *output = _mm_sub_epi32(sixteen, *output); *output = _mm_mullo_epi32(*output, weight_u32); } static INLINE void highbd_average_8(__m128i *output_0, __m128i *output_1, const __m128i *sum_0_u32, const __m128i *sum_1_u32, const __m128i *mul_constants_0, const __m128i *mul_constants_1, const int strength, const int rounding, const int weight) { highbd_average_4(output_0, sum_0_u32, mul_constants_0, strength, rounding, weight); highbd_average_4(output_1, sum_1_u32, mul_constants_1, strength, rounding, weight); } // Add 'sum_u32' to 'count'. Multiply by 'pred' and add to 'accumulator.' static INLINE void highbd_accumulate_and_store_8(const __m128i sum_first_u32, const __m128i sum_second_u32, const uint16_t *pred, uint16_t *count, uint32_t *accumulator) { // Cast down to 16-bit ints const __m128i sum_u16 = _mm_packus_epi32(sum_first_u32, sum_second_u32); const __m128i zero = _mm_setzero_si128(); __m128i pred_u16 = _mm_loadu_si128((const __m128i *)pred); __m128i count_u16 = _mm_loadu_si128((const __m128i *)count); __m128i pred_0_u32, pred_1_u32; __m128i accum_0_u32, accum_1_u32; count_u16 = _mm_adds_epu16(count_u16, sum_u16); _mm_storeu_si128((__m128i *)count, count_u16); pred_u16 = _mm_mullo_epi16(sum_u16, pred_u16); pred_0_u32 = _mm_cvtepu16_epi32(pred_u16); pred_1_u32 = _mm_unpackhi_epi16(pred_u16, zero); accum_0_u32 = _mm_loadu_si128((const __m128i *)accumulator); accum_1_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 4)); accum_0_u32 = _mm_add_epi32(pred_0_u32, accum_0_u32); accum_1_u32 = _mm_add_epi32(pred_1_u32, accum_1_u32); _mm_storeu_si128((__m128i *)accumulator, accum_0_u32); _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); } static INLINE void highbd_read_dist_4(const uint32_t *dist, __m128i *dist_reg) { *dist_reg = _mm_loadu_si128((const __m128i *)dist); } static INLINE void highbd_read_dist_8(const uint32_t *dist, __m128i *reg_first, __m128i *reg_second) { highbd_read_dist_4(dist, reg_first); highbd_read_dist_4(dist + 4, reg_second); } static INLINE void highbd_read_chroma_dist_row_8( int ss_x, const uint32_t *u_dist, const uint32_t *v_dist, __m128i *u_first, __m128i *u_second, __m128i *v_first, __m128i *v_second) { if (!ss_x) { // If there is no chroma subsampling in the horizontal direction, then we // need to load 8 entries from chroma. highbd_read_dist_8(u_dist, u_first, u_second); highbd_read_dist_8(v_dist, v_first, v_second); } else { // ss_x == 1 // Otherwise, we only need to load 8 entries __m128i u_reg, v_reg; highbd_read_dist_4(u_dist, &u_reg); *u_first = _mm_unpacklo_epi32(u_reg, u_reg); *u_second = _mm_unpackhi_epi32(u_reg, u_reg); highbd_read_dist_4(v_dist, &v_reg); *v_first = _mm_unpacklo_epi32(v_reg, v_reg); *v_second = _mm_unpackhi_epi32(v_reg, v_reg); } } static void vp9_highbd_apply_temporal_filter_luma_8( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist, const uint32_t *const *neighbors_first, const uint32_t *const *neighbors_second, int top_weight, int bottom_weight) { const int rounding = (1 << strength) >> 1; int weight = top_weight; __m128i mul_first, mul_second; __m128i sum_row_1_first, sum_row_1_second; __m128i sum_row_2_first, sum_row_2_second; __m128i sum_row_3_first, sum_row_3_second; __m128i u_first, u_second; __m128i v_first, v_second; __m128i sum_row_first; __m128i sum_row_second; // Loop variables unsigned int h; assert(strength >= 4 && strength <= 14 && "invalid adjusted temporal filter strength"); assert(block_width == 8); (void)block_width; // First row mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); // Add luma values highbd_get_sum_8(y_dist, &sum_row_2_first, &sum_row_2_second); highbd_get_sum_8(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); // We don't need to saturate here because the maximum value is UINT12_MAX ** 2 // * 9 ~= 2**24 * 9 < 2 ** 28 < INT32_MAX sum_row_first = _mm_add_epi32(sum_row_2_first, sum_row_3_first); sum_row_second = _mm_add_epi32(sum_row_2_second, sum_row_3_second); // Add chroma values highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); // Max value here is 2 ** 24 * (9 + 2), so no saturation is needed sum_row_first = _mm_add_epi32(sum_row_first, u_first); sum_row_second = _mm_add_epi32(sum_row_second, u_second); sum_row_first = _mm_add_epi32(sum_row_first, v_first); sum_row_second = _mm_add_epi32(sum_row_second, v_second); // Get modifier and store result highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, &sum_row_second, &mul_first, &mul_second, strength, rounding, weight); highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, y_accum); y_src += y_src_stride; y_pre += y_pre_stride; y_count += y_pre_stride; y_accum += y_pre_stride; y_dist += DIST_STRIDE; u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; // Then all the rows except the last one mul_first = _mm_load_si128((const __m128i *)neighbors_first[1]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[1]); for (h = 1; h < block_height - 1; ++h) { // Move the weight to bottom half if (!use_whole_blk && h == block_height / 2) { weight = bottom_weight; } // Shift the rows up sum_row_1_first = sum_row_2_first; sum_row_1_second = sum_row_2_second; sum_row_2_first = sum_row_3_first; sum_row_2_second = sum_row_3_second; // Add luma values to the modifier sum_row_first = _mm_add_epi32(sum_row_1_first, sum_row_2_first); sum_row_second = _mm_add_epi32(sum_row_1_second, sum_row_2_second); highbd_get_sum_8(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); sum_row_first = _mm_add_epi32(sum_row_first, sum_row_3_first); sum_row_second = _mm_add_epi32(sum_row_second, sum_row_3_second); // Add chroma values to the modifier if (ss_y == 0 || h % 2 == 0) { // Only calculate the new chroma distortion if we are at a pixel that // corresponds to a new chroma row highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; } sum_row_first = _mm_add_epi32(sum_row_first, u_first); sum_row_second = _mm_add_epi32(sum_row_second, u_second); sum_row_first = _mm_add_epi32(sum_row_first, v_first); sum_row_second = _mm_add_epi32(sum_row_second, v_second); // Get modifier and store result highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, &sum_row_second, &mul_first, &mul_second, strength, rounding, weight); highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, y_accum); y_src += y_src_stride; y_pre += y_pre_stride; y_count += y_pre_stride; y_accum += y_pre_stride; y_dist += DIST_STRIDE; } // The last row mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); // Shift the rows up sum_row_1_first = sum_row_2_first; sum_row_1_second = sum_row_2_second; sum_row_2_first = sum_row_3_first; sum_row_2_second = sum_row_3_second; // Add luma values to the modifier sum_row_first = _mm_add_epi32(sum_row_1_first, sum_row_2_first); sum_row_second = _mm_add_epi32(sum_row_1_second, sum_row_2_second); // Add chroma values to the modifier if (ss_y == 0) { // Only calculate the new chroma distortion if we are at a pixel that // corresponds to a new chroma row highbd_read_chroma_dist_row_8(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); } sum_row_first = _mm_add_epi32(sum_row_first, u_first); sum_row_second = _mm_add_epi32(sum_row_second, u_second); sum_row_first = _mm_add_epi32(sum_row_first, v_first); sum_row_second = _mm_add_epi32(sum_row_second, v_second); // Get modifier and store result highbd_average_8(&sum_row_first, &sum_row_second, &sum_row_first, &sum_row_second, &mul_first, &mul_second, strength, rounding, weight); highbd_accumulate_and_store_8(sum_row_first, sum_row_second, y_pre, y_count, y_accum); } // Perform temporal filter for the luma component. static void vp9_highbd_apply_temporal_filter_luma( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist) { unsigned int blk_col = 0, uv_blk_col = 0; const unsigned int blk_col_step = 8, uv_blk_col_step = 8 >> ss_x; const unsigned int mid_width = block_width >> 1, last_width = block_width - blk_col_step; int top_weight = blk_fw[0], bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; const uint32_t *const *neighbors_first; const uint32_t *const *neighbors_second; // Left neighbors_first = HIGHBD_LUMA_LEFT_COLUMN_NEIGHBORS; neighbors_second = HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS; vp9_highbd_apply_temporal_filter_luma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight); blk_col += blk_col_step; uv_blk_col += uv_blk_col_step; // Middle First neighbors_first = HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS; for (; blk_col < mid_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_highbd_apply_temporal_filter_luma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight); } if (!use_whole_blk) { top_weight = blk_fw[1]; bottom_weight = blk_fw[3]; } // Middle Second for (; blk_col < last_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_highbd_apply_temporal_filter_luma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight); } // Right neighbors_second = HIGHBD_LUMA_RIGHT_COLUMN_NEIGHBORS; vp9_highbd_apply_temporal_filter_luma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, blk_col_step, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight); } // Add a row of luma distortion that corresponds to 8 chroma mods. If we are // subsampling in x direction, then we have 16 lumas, else we have 8. static INLINE void highbd_add_luma_dist_to_8_chroma_mod( const uint32_t *y_dist, int ss_x, int ss_y, __m128i *u_mod_fst, __m128i *u_mod_snd, __m128i *v_mod_fst, __m128i *v_mod_snd) { __m128i y_reg_fst, y_reg_snd; if (!ss_x) { highbd_read_dist_8(y_dist, &y_reg_fst, &y_reg_snd); if (ss_y == 1) { __m128i y_tmp_fst, y_tmp_snd; highbd_read_dist_8(y_dist + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); y_reg_fst = _mm_add_epi32(y_reg_fst, y_tmp_fst); y_reg_snd = _mm_add_epi32(y_reg_snd, y_tmp_snd); } } else { // Temporary __m128i y_fst, y_snd; // First 8 highbd_read_dist_8(y_dist, &y_fst, &y_snd); if (ss_y == 1) { __m128i y_tmp_fst, y_tmp_snd; highbd_read_dist_8(y_dist + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); y_fst = _mm_add_epi32(y_fst, y_tmp_fst); y_snd = _mm_add_epi32(y_snd, y_tmp_snd); } y_reg_fst = _mm_hadd_epi32(y_fst, y_snd); // Second 8 highbd_read_dist_8(y_dist + 8, &y_fst, &y_snd); if (ss_y == 1) { __m128i y_tmp_fst, y_tmp_snd; highbd_read_dist_8(y_dist + 8 + DIST_STRIDE, &y_tmp_fst, &y_tmp_snd); y_fst = _mm_add_epi32(y_fst, y_tmp_fst); y_snd = _mm_add_epi32(y_snd, y_tmp_snd); } y_reg_snd = _mm_hadd_epi32(y_fst, y_snd); } *u_mod_fst = _mm_add_epi32(*u_mod_fst, y_reg_fst); *u_mod_snd = _mm_add_epi32(*u_mod_snd, y_reg_snd); *v_mod_fst = _mm_add_epi32(*v_mod_fst, y_reg_fst); *v_mod_snd = _mm_add_epi32(*v_mod_snd, y_reg_snd); } // Apply temporal filter to the chroma components. This performs temporal // filtering on a chroma block of 8 X uv_height. If blk_fw is not NULL, use // blk_fw as an array of size 4 for the weights for each of the 4 subblocks, // else use top_weight for top half, and bottom weight for bottom half. static void vp9_highbd_apply_temporal_filter_chroma_8( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int uv_block_width, unsigned int uv_block_height, int ss_x, int ss_y, int strength, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist, const uint32_t *const *neighbors_fst, const uint32_t *const *neighbors_snd, int top_weight, int bottom_weight, const int *blk_fw) { const int rounding = (1 << strength) >> 1; int weight = top_weight; __m128i mul_fst, mul_snd; __m128i u_sum_row_1_fst, u_sum_row_2_fst, u_sum_row_3_fst; __m128i v_sum_row_1_fst, v_sum_row_2_fst, v_sum_row_3_fst; __m128i u_sum_row_1_snd, u_sum_row_2_snd, u_sum_row_3_snd; __m128i v_sum_row_1_snd, v_sum_row_2_snd, v_sum_row_3_snd; __m128i u_sum_row_fst, v_sum_row_fst; __m128i u_sum_row_snd, v_sum_row_snd; // Loop variable unsigned int h; (void)uv_block_width; // First row mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[0]); mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[0]); // Add chroma values highbd_get_sum_8(u_dist, &u_sum_row_2_fst, &u_sum_row_2_snd); highbd_get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3_fst, &u_sum_row_3_snd); u_sum_row_fst = _mm_add_epi32(u_sum_row_2_fst, u_sum_row_3_fst); u_sum_row_snd = _mm_add_epi32(u_sum_row_2_snd, u_sum_row_3_snd); highbd_get_sum_8(v_dist, &v_sum_row_2_fst, &v_sum_row_2_snd); highbd_get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3_fst, &v_sum_row_3_snd); v_sum_row_fst = _mm_add_epi32(v_sum_row_2_fst, v_sum_row_3_fst); v_sum_row_snd = _mm_add_epi32(v_sum_row_2_snd, v_sum_row_3_snd); // Add luma values highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, &u_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd); // Get modifier and store result if (blk_fw) { highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); } else { highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); } highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, u_accum); highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, v_accum); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; u_count += uv_pre_stride; u_accum += uv_pre_stride; v_count += uv_pre_stride; v_accum += uv_pre_stride; y_src += y_src_stride * (1 + ss_y); y_pre += y_pre_stride * (1 + ss_y); y_dist += DIST_STRIDE * (1 + ss_y); // Then all the rows except the last one mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[1]); mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[1]); for (h = 1; h < uv_block_height - 1; ++h) { // Move the weight pointer to the bottom half of the blocks if (h == uv_block_height / 2) { if (blk_fw) { blk_fw += 2; } else { weight = bottom_weight; } } // Shift the rows up u_sum_row_1_fst = u_sum_row_2_fst; u_sum_row_2_fst = u_sum_row_3_fst; u_sum_row_1_snd = u_sum_row_2_snd; u_sum_row_2_snd = u_sum_row_3_snd; v_sum_row_1_fst = v_sum_row_2_fst; v_sum_row_2_fst = v_sum_row_3_fst; v_sum_row_1_snd = v_sum_row_2_snd; v_sum_row_2_snd = v_sum_row_3_snd; // Add chroma values u_sum_row_fst = _mm_add_epi32(u_sum_row_1_fst, u_sum_row_2_fst); u_sum_row_snd = _mm_add_epi32(u_sum_row_1_snd, u_sum_row_2_snd); highbd_get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3_fst, &u_sum_row_3_snd); u_sum_row_fst = _mm_add_epi32(u_sum_row_fst, u_sum_row_3_fst); u_sum_row_snd = _mm_add_epi32(u_sum_row_snd, u_sum_row_3_snd); v_sum_row_fst = _mm_add_epi32(v_sum_row_1_fst, v_sum_row_2_fst); v_sum_row_snd = _mm_add_epi32(v_sum_row_1_snd, v_sum_row_2_snd); highbd_get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3_fst, &v_sum_row_3_snd); v_sum_row_fst = _mm_add_epi32(v_sum_row_fst, v_sum_row_3_fst); v_sum_row_snd = _mm_add_epi32(v_sum_row_snd, v_sum_row_3_snd); // Add luma values highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, &u_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd); // Get modifier and store result if (blk_fw) { highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); } else { highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); } highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, u_accum); highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, v_accum); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; u_count += uv_pre_stride; u_accum += uv_pre_stride; v_count += uv_pre_stride; v_accum += uv_pre_stride; y_src += y_src_stride * (1 + ss_y); y_pre += y_pre_stride * (1 + ss_y); y_dist += DIST_STRIDE * (1 + ss_y); } // The last row mul_fst = _mm_load_si128((const __m128i *)neighbors_fst[0]); mul_snd = _mm_load_si128((const __m128i *)neighbors_snd[0]); // Shift the rows up u_sum_row_1_fst = u_sum_row_2_fst; u_sum_row_2_fst = u_sum_row_3_fst; u_sum_row_1_snd = u_sum_row_2_snd; u_sum_row_2_snd = u_sum_row_3_snd; v_sum_row_1_fst = v_sum_row_2_fst; v_sum_row_2_fst = v_sum_row_3_fst; v_sum_row_1_snd = v_sum_row_2_snd; v_sum_row_2_snd = v_sum_row_3_snd; // Add chroma values u_sum_row_fst = _mm_add_epi32(u_sum_row_1_fst, u_sum_row_2_fst); v_sum_row_fst = _mm_add_epi32(v_sum_row_1_fst, v_sum_row_2_fst); u_sum_row_snd = _mm_add_epi32(u_sum_row_1_snd, u_sum_row_2_snd); v_sum_row_snd = _mm_add_epi32(v_sum_row_1_snd, v_sum_row_2_snd); // Add luma values highbd_add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row_fst, &u_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd); // Get modifier and store result if (blk_fw) { highbd_average_4(&u_sum_row_fst, &u_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&u_sum_row_snd, &u_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); highbd_average_4(&v_sum_row_fst, &v_sum_row_fst, &mul_fst, strength, rounding, blk_fw[0]); highbd_average_4(&v_sum_row_snd, &v_sum_row_snd, &mul_snd, strength, rounding, blk_fw[1]); } else { highbd_average_8(&u_sum_row_fst, &u_sum_row_snd, &u_sum_row_fst, &u_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); highbd_average_8(&v_sum_row_fst, &v_sum_row_snd, &v_sum_row_fst, &v_sum_row_snd, &mul_fst, &mul_snd, strength, rounding, weight); } highbd_accumulate_and_store_8(u_sum_row_fst, u_sum_row_snd, u_pre, u_count, u_accum); highbd_accumulate_and_store_8(v_sum_row_fst, v_sum_row_snd, v_pre, v_count, v_accum); } // Perform temporal filter for the chroma components. static void vp9_highbd_apply_temporal_filter_chroma( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, const uint32_t *y_dist, const uint32_t *u_dist, const uint32_t *v_dist) { const unsigned int uv_width = block_width >> ss_x, uv_height = block_height >> ss_y; unsigned int blk_col = 0, uv_blk_col = 0; const unsigned int uv_blk_col_step = 8, blk_col_step = 8 << ss_x; const unsigned int uv_mid_width = uv_width >> 1, uv_last_width = uv_width - uv_blk_col_step; int top_weight = blk_fw[0], bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; const uint32_t *const *neighbors_fst; const uint32_t *const *neighbors_snd; if (uv_width == 8) { // Special Case: We are subsampling in x direction on a 16x16 block. Since // we are operating on a row of 8 chroma pixels, we can't use the usual // left-middle-right pattern. assert(ss_x); if (ss_y) { neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; } else { neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; } if (use_whole_blk) { vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); } else { vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, 0, 0, blk_fw); } return; } // Left if (ss_x && ss_y) { neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else { neighbors_fst = HIGHBD_CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS; neighbors_snd = HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; } vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); blk_col += blk_col_step; uv_blk_col += uv_blk_col_step; // Middle First if (ss_x && ss_y) { neighbors_fst = HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors_fst = HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else { neighbors_fst = HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; } for (; uv_blk_col < uv_mid_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); } if (!use_whole_blk) { top_weight = blk_fw[1]; bottom_weight = blk_fw[3]; } // Middle Second for (; uv_blk_col < uv_last_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); } // Right if (ss_x && ss_y) { neighbors_snd = HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors_snd = HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; } else { neighbors_snd = HIGHBD_CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS; } vp9_highbd_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_fst, neighbors_snd, top_weight, bottom_weight, NULL); } void vp9_highbd_apply_temporal_filter_sse4_1( const uint16_t *y_src, int y_src_stride, const uint16_t *y_pre, int y_pre_stride, const uint16_t *u_src, const uint16_t *v_src, int uv_src_stride, const uint16_t *u_pre, const uint16_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { const unsigned int chroma_height = block_height >> ss_y, chroma_width = block_width >> ss_x; DECLARE_ALIGNED(16, uint32_t, y_dist[BH * DIST_STRIDE]) = { 0 }; DECLARE_ALIGNED(16, uint32_t, u_dist[BH * DIST_STRIDE]) = { 0 }; DECLARE_ALIGNED(16, uint32_t, v_dist[BH * DIST_STRIDE]) = { 0 }; uint32_t *y_dist_ptr = y_dist + 1, *u_dist_ptr = u_dist + 1, *v_dist_ptr = v_dist + 1; const uint16_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; const uint16_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; // Loop variables unsigned int row, blk_col; assert(block_width <= BW && "block width too large"); assert(block_height <= BH && "block height too large"); assert(block_width % 16 == 0 && "block width must be multiple of 16"); assert(block_height % 2 == 0 && "block height must be even"); assert((ss_x == 0 || ss_x == 1) && (ss_y == 0 || ss_y == 1) && "invalid chroma subsampling"); assert(strength >= 4 && strength <= 14 && "invalid adjusted temporal filter strength"); assert(blk_fw[0] >= 0 && "filter weight must be positive"); assert( (use_whole_blk || (blk_fw[1] >= 0 && blk_fw[2] >= 0 && blk_fw[3] >= 0)) && "subblock filter weight must be positive"); assert(blk_fw[0] <= 2 && "sublock filter weight must be less than 2"); assert( (use_whole_blk || (blk_fw[1] <= 2 && blk_fw[2] <= 2 && blk_fw[3] <= 2)) && "subblock filter weight must be less than 2"); // Precompute the difference squared for (row = 0; row < block_height; row++) { for (blk_col = 0; blk_col < block_width; blk_col += 8) { highbd_store_dist_8(y_src_ptr + blk_col, y_pre_ptr + blk_col, y_dist_ptr + blk_col); } y_src_ptr += y_src_stride; y_pre_ptr += y_pre_stride; y_dist_ptr += DIST_STRIDE; } for (row = 0; row < chroma_height; row++) { for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { highbd_store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, u_dist_ptr + blk_col); highbd_store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, v_dist_ptr + blk_col); } u_src_ptr += uv_src_stride; u_pre_ptr += uv_pre_stride; u_dist_ptr += DIST_STRIDE; v_src_ptr += uv_src_stride; v_pre_ptr += uv_pre_stride; v_dist_ptr += DIST_STRIDE; } y_dist_ptr = y_dist + 1; u_dist_ptr = u_dist + 1; v_dist_ptr = v_dist + 1; vp9_highbd_apply_temporal_filter_luma( y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, strength, blk_fw, use_whole_blk, y_accum, y_count, y_dist_ptr, u_dist_ptr, v_dist_ptr); vp9_highbd_apply_temporal_filter_chroma( y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, strength, blk_fw, use_whole_blk, u_accum, u_count, v_accum, v_count, y_dist_ptr, u_dist_ptr, v_dist_ptr); } libvpx-1.8.2/vp9/encoder/x86/temporal_filter_constants.h000066400000000000000000000371121357355204000232550ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ #define VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ #include "./vpx_config.h" // Division using multiplication and shifting. The C implementation does: // modifier *= 3; // modifier /= index; // where 'modifier' is a set of summed values and 'index' is the number of // summed values. // // This equation works out to (m * 3) / i which reduces to: // m * 3/4 // m * 1/2 // m * 1/3 // // By pairing the multiply with a down shift by 16 (_mm_mulhi_epu16): // m * C / 65536 // we can create a C to replicate the division. // // m * 49152 / 65536 = m * 3/4 // m * 32758 / 65536 = m * 1/2 // m * 21846 / 65536 = m * 0.3333 // // These are loaded using an instruction expecting int16_t values but are used // with _mm_mulhi_epu16(), which treats them as unsigned. #define NEIGHBOR_CONSTANT_4 (int16_t)49152 #define NEIGHBOR_CONSTANT_5 (int16_t)39322 #define NEIGHBOR_CONSTANT_6 (int16_t)32768 #define NEIGHBOR_CONSTANT_7 (int16_t)28087 #define NEIGHBOR_CONSTANT_8 (int16_t)24576 #define NEIGHBOR_CONSTANT_9 (int16_t)21846 #define NEIGHBOR_CONSTANT_10 (int16_t)19661 #define NEIGHBOR_CONSTANT_11 (int16_t)17874 #define NEIGHBOR_CONSTANT_13 (int16_t)15124 DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_5, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_5 }; DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7, NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_1[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 }; DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11 }; DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_6, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_6 }; DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_2[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_11, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, LEFT_CORNER_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_CORNER_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, LEFT_EDGE_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 }; DECLARE_ALIGNED(16, static const int16_t, RIGHT_EDGE_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_EDGE_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const int16_t, MIDDLE_CENTER_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13 }; DECLARE_ALIGNED(16, static const int16_t, TWO_CORNER_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_8, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const int16_t, TWO_EDGE_NEIGHBORS_PLUS_4[8]) = { NEIGHBOR_CONSTANT_10, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_13, NEIGHBOR_CONSTANT_10 }; static const int16_t *const LUMA_LEFT_COLUMN_NEIGHBORS[2] = { LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 }; static const int16_t *const LUMA_MIDDLE_COLUMN_NEIGHBORS[2] = { MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 }; static const int16_t *const LUMA_RIGHT_COLUMN_NEIGHBORS[2] = { RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 }; static const int16_t *const CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS[2] = { LEFT_CORNER_NEIGHBORS_PLUS_1, LEFT_EDGE_NEIGHBORS_PLUS_1 }; static const int16_t *const CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { MIDDLE_EDGE_NEIGHBORS_PLUS_1, MIDDLE_CENTER_NEIGHBORS_PLUS_1 }; static const int16_t *const CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS[2] = { RIGHT_CORNER_NEIGHBORS_PLUS_1, RIGHT_EDGE_NEIGHBORS_PLUS_1 }; static const int16_t *const CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { LEFT_CORNER_NEIGHBORS_PLUS_2, LEFT_EDGE_NEIGHBORS_PLUS_2 }; static const int16_t *const CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { MIDDLE_EDGE_NEIGHBORS_PLUS_2, MIDDLE_CENTER_NEIGHBORS_PLUS_2 }; static const int16_t *const CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { RIGHT_CORNER_NEIGHBORS_PLUS_2, RIGHT_EDGE_NEIGHBORS_PLUS_2 }; static const int16_t *const CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { TWO_CORNER_NEIGHBORS_PLUS_2, TWO_EDGE_NEIGHBORS_PLUS_2 }; static const int16_t *const CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { LEFT_CORNER_NEIGHBORS_PLUS_4, LEFT_EDGE_NEIGHBORS_PLUS_4 }; static const int16_t *const CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { MIDDLE_EDGE_NEIGHBORS_PLUS_4, MIDDLE_CENTER_NEIGHBORS_PLUS_4 }; static const int16_t *const CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { RIGHT_CORNER_NEIGHBORS_PLUS_4, RIGHT_EDGE_NEIGHBORS_PLUS_4 }; static const int16_t *const CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS[2] = { TWO_CORNER_NEIGHBORS_PLUS_4, TWO_EDGE_NEIGHBORS_PLUS_4 }; #if CONFIG_VP9_HIGHBITDEPTH #define HIGHBD_NEIGHBOR_CONSTANT_4 (uint32_t)3221225472U #define HIGHBD_NEIGHBOR_CONSTANT_5 (uint32_t)2576980378U #define HIGHBD_NEIGHBOR_CONSTANT_6 (uint32_t)2147483648U #define HIGHBD_NEIGHBOR_CONSTANT_7 (uint32_t)1840700270U #define HIGHBD_NEIGHBOR_CONSTANT_8 (uint32_t)1610612736U #define HIGHBD_NEIGHBOR_CONSTANT_9 (uint32_t)1431655766U #define HIGHBD_NEIGHBOR_CONSTANT_10 (uint32_t)1288490189U #define HIGHBD_NEIGHBOR_CONSTANT_11 (uint32_t)1171354718U #define HIGHBD_NEIGHBOR_CONSTANT_13 (uint32_t)991146300U DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_5, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_5 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7, HIGHBD_NEIGHBOR_CONSTANT_7 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_1[4]) = { HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_6, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_6 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2[4]) = { HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11, HIGHBD_NEIGHBOR_CONSTANT_11 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_8, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_8 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10, HIGHBD_NEIGHBOR_CONSTANT_10 }; DECLARE_ALIGNED(16, static const uint32_t, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_4[4]) = { HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13, HIGHBD_NEIGHBOR_CONSTANT_13 }; static const uint32_t *const HIGHBD_LUMA_LEFT_COLUMN_NEIGHBORS[2] = { HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_LUMA_MIDDLE_COLUMN_NEIGHBORS[2] = { HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_LUMA_RIGHT_COLUMN_NEIGHBORS[2] = { HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS[2] = { HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_1, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_1 }; static const uint32_t *const HIGHBD_CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_1, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_1 }; static const uint32_t *const HIGHBD_CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS[2] = { HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_1, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_1 }; static const uint32_t *const HIGHBD_CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_2, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_2, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_2 }; static const uint32_t *const HIGHBD_CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS[2] = { HIGHBD_LEFT_CORNER_NEIGHBORS_PLUS_4, HIGHBD_LEFT_EDGE_NEIGHBORS_PLUS_4 }; static const uint32_t *const HIGHBD_CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS[2] = { HIGHBD_MIDDLE_EDGE_NEIGHBORS_PLUS_4, HIGHBD_MIDDLE_CENTER_NEIGHBORS_PLUS_4 }; static const uint32_t *const HIGHBD_CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS[2] = { HIGHBD_RIGHT_CORNER_NEIGHBORS_PLUS_4, HIGHBD_RIGHT_EDGE_NEIGHBORS_PLUS_4 }; #endif // CONFIG_VP9_HIGHBITDEPTH #define DIST_STRIDE ((BW) + 2) #endif // VPX_VP9_ENCODER_X86_TEMPORAL_FILTER_CONSTANTS_H_ libvpx-1.8.2/vp9/encoder/x86/temporal_filter_sse4.c000066400000000000000000001074511357355204000221160ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_temporal_filter.h" #include "vp9/encoder/x86/temporal_filter_constants.h" // Read in 8 pixels from a and b as 8-bit unsigned integers, compute the // difference squared, and store as unsigned 16-bit integer to dst. static INLINE void store_dist_8(const uint8_t *a, const uint8_t *b, uint16_t *dst) { const __m128i a_reg = _mm_loadl_epi64((const __m128i *)a); const __m128i b_reg = _mm_loadl_epi64((const __m128i *)b); const __m128i a_first = _mm_cvtepu8_epi16(a_reg); const __m128i b_first = _mm_cvtepu8_epi16(b_reg); __m128i dist_first; dist_first = _mm_sub_epi16(a_first, b_first); dist_first = _mm_mullo_epi16(dist_first, dist_first); _mm_storeu_si128((__m128i *)dst, dist_first); } static INLINE void store_dist_16(const uint8_t *a, const uint8_t *b, uint16_t *dst) { const __m128i zero = _mm_setzero_si128(); const __m128i a_reg = _mm_loadu_si128((const __m128i *)a); const __m128i b_reg = _mm_loadu_si128((const __m128i *)b); const __m128i a_first = _mm_cvtepu8_epi16(a_reg); const __m128i a_second = _mm_unpackhi_epi8(a_reg, zero); const __m128i b_first = _mm_cvtepu8_epi16(b_reg); const __m128i b_second = _mm_unpackhi_epi8(b_reg, zero); __m128i dist_first, dist_second; dist_first = _mm_sub_epi16(a_first, b_first); dist_second = _mm_sub_epi16(a_second, b_second); dist_first = _mm_mullo_epi16(dist_first, dist_first); dist_second = _mm_mullo_epi16(dist_second, dist_second); _mm_storeu_si128((__m128i *)dst, dist_first); _mm_storeu_si128((__m128i *)(dst + 8), dist_second); } static INLINE void read_dist_8(const uint16_t *dist, __m128i *dist_reg) { *dist_reg = _mm_loadu_si128((const __m128i *)dist); } static INLINE void read_dist_16(const uint16_t *dist, __m128i *reg_first, __m128i *reg_second) { read_dist_8(dist, reg_first); read_dist_8(dist + 8, reg_second); } // Average the value based on the number of values summed (9 for pixels away // from the border, 4 for pixels in corners, and 6 for other edge values). // // Add in the rounding factor and shift, clamp to 16, invert and shift. Multiply // by weight. static INLINE __m128i average_8(__m128i sum, const __m128i *mul_constants, const int strength, const int rounding, const __m128i *weight) { // _mm_srl_epi16 uses the lower 64 bit value for the shift. const __m128i strength_u128 = _mm_set_epi32(0, 0, 0, strength); const __m128i rounding_u16 = _mm_set1_epi16(rounding); const __m128i weight_u16 = *weight; const __m128i sixteen = _mm_set1_epi16(16); // modifier * 3 / index; sum = _mm_mulhi_epu16(sum, *mul_constants); sum = _mm_adds_epu16(sum, rounding_u16); sum = _mm_srl_epi16(sum, strength_u128); // The maximum input to this comparison is UINT16_MAX * NEIGHBOR_CONSTANT_4 // >> 16 (also NEIGHBOR_CONSTANT_4 -1) which is 49151 / 0xbfff / -16385 // So this needs to use the epu16 version which did not come until SSE4. sum = _mm_min_epu16(sum, sixteen); sum = _mm_sub_epi16(sixteen, sum); return _mm_mullo_epi16(sum, weight_u16); } // Add 'sum_u16' to 'count'. Multiply by 'pred' and add to 'accumulator.' static void accumulate_and_store_8(const __m128i sum_u16, const uint8_t *pred, uint16_t *count, uint32_t *accumulator) { const __m128i pred_u8 = _mm_loadl_epi64((const __m128i *)pred); const __m128i zero = _mm_setzero_si128(); __m128i count_u16 = _mm_loadu_si128((const __m128i *)count); __m128i pred_u16 = _mm_cvtepu8_epi16(pred_u8); __m128i pred_0_u32, pred_1_u32; __m128i accum_0_u32, accum_1_u32; count_u16 = _mm_adds_epu16(count_u16, sum_u16); _mm_storeu_si128((__m128i *)count, count_u16); pred_u16 = _mm_mullo_epi16(sum_u16, pred_u16); pred_0_u32 = _mm_cvtepu16_epi32(pred_u16); pred_1_u32 = _mm_unpackhi_epi16(pred_u16, zero); accum_0_u32 = _mm_loadu_si128((const __m128i *)accumulator); accum_1_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 4)); accum_0_u32 = _mm_add_epi32(pred_0_u32, accum_0_u32); accum_1_u32 = _mm_add_epi32(pred_1_u32, accum_1_u32); _mm_storeu_si128((__m128i *)accumulator, accum_0_u32); _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); } static INLINE void accumulate_and_store_16(const __m128i sum_0_u16, const __m128i sum_1_u16, const uint8_t *pred, uint16_t *count, uint32_t *accumulator) { const __m128i pred_u8 = _mm_loadu_si128((const __m128i *)pred); const __m128i zero = _mm_setzero_si128(); __m128i count_0_u16 = _mm_loadu_si128((const __m128i *)count), count_1_u16 = _mm_loadu_si128((const __m128i *)(count + 8)); __m128i pred_0_u16 = _mm_cvtepu8_epi16(pred_u8), pred_1_u16 = _mm_unpackhi_epi8(pred_u8, zero); __m128i pred_0_u32, pred_1_u32, pred_2_u32, pred_3_u32; __m128i accum_0_u32, accum_1_u32, accum_2_u32, accum_3_u32; count_0_u16 = _mm_adds_epu16(count_0_u16, sum_0_u16); _mm_storeu_si128((__m128i *)count, count_0_u16); count_1_u16 = _mm_adds_epu16(count_1_u16, sum_1_u16); _mm_storeu_si128((__m128i *)(count + 8), count_1_u16); pred_0_u16 = _mm_mullo_epi16(sum_0_u16, pred_0_u16); pred_1_u16 = _mm_mullo_epi16(sum_1_u16, pred_1_u16); pred_0_u32 = _mm_cvtepu16_epi32(pred_0_u16); pred_1_u32 = _mm_unpackhi_epi16(pred_0_u16, zero); pred_2_u32 = _mm_cvtepu16_epi32(pred_1_u16); pred_3_u32 = _mm_unpackhi_epi16(pred_1_u16, zero); accum_0_u32 = _mm_loadu_si128((const __m128i *)accumulator); accum_1_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 4)); accum_2_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 8)); accum_3_u32 = _mm_loadu_si128((const __m128i *)(accumulator + 12)); accum_0_u32 = _mm_add_epi32(pred_0_u32, accum_0_u32); accum_1_u32 = _mm_add_epi32(pred_1_u32, accum_1_u32); accum_2_u32 = _mm_add_epi32(pred_2_u32, accum_2_u32); accum_3_u32 = _mm_add_epi32(pred_3_u32, accum_3_u32); _mm_storeu_si128((__m128i *)accumulator, accum_0_u32); _mm_storeu_si128((__m128i *)(accumulator + 4), accum_1_u32); _mm_storeu_si128((__m128i *)(accumulator + 8), accum_2_u32); _mm_storeu_si128((__m128i *)(accumulator + 12), accum_3_u32); } // Read in 8 pixels from y_dist. For each index i, compute y_dist[i-1] + // y_dist[i] + y_dist[i+1] and store in sum as 16-bit unsigned int. static INLINE void get_sum_8(const uint16_t *y_dist, __m128i *sum) { __m128i dist_reg, dist_left, dist_right; dist_reg = _mm_loadu_si128((const __m128i *)y_dist); dist_left = _mm_loadu_si128((const __m128i *)(y_dist - 1)); dist_right = _mm_loadu_si128((const __m128i *)(y_dist + 1)); *sum = _mm_adds_epu16(dist_reg, dist_left); *sum = _mm_adds_epu16(*sum, dist_right); } // Read in 16 pixels from y_dist. For each index i, compute y_dist[i-1] + // y_dist[i] + y_dist[i+1]. Store the result for first 8 pixels in sum_first and // the rest in sum_second. static INLINE void get_sum_16(const uint16_t *y_dist, __m128i *sum_first, __m128i *sum_second) { get_sum_8(y_dist, sum_first); get_sum_8(y_dist + 8, sum_second); } // Read in a row of chroma values corresponds to a row of 16 luma values. static INLINE void read_chroma_dist_row_16(int ss_x, const uint16_t *u_dist, const uint16_t *v_dist, __m128i *u_first, __m128i *u_second, __m128i *v_first, __m128i *v_second) { if (!ss_x) { // If there is no chroma subsampling in the horizontal direction, then we // need to load 16 entries from chroma. read_dist_16(u_dist, u_first, u_second); read_dist_16(v_dist, v_first, v_second); } else { // ss_x == 1 // Otherwise, we only need to load 8 entries __m128i u_reg, v_reg; read_dist_8(u_dist, &u_reg); *u_first = _mm_unpacklo_epi16(u_reg, u_reg); *u_second = _mm_unpackhi_epi16(u_reg, u_reg); read_dist_8(v_dist, &v_reg); *v_first = _mm_unpacklo_epi16(v_reg, v_reg); *v_second = _mm_unpackhi_epi16(v_reg, v_reg); } } // Horizontal add unsigned 16-bit ints in src and store them as signed 32-bit // int in dst. static INLINE void hadd_epu16(__m128i *src, __m128i *dst) { const __m128i zero = _mm_setzero_si128(); const __m128i shift_right = _mm_srli_si128(*src, 2); const __m128i odd = _mm_blend_epi16(shift_right, zero, 170); const __m128i even = _mm_blend_epi16(*src, zero, 170); *dst = _mm_add_epi32(even, odd); } // Add a row of luma distortion to 8 corresponding chroma mods. static INLINE void add_luma_dist_to_8_chroma_mod(const uint16_t *y_dist, int ss_x, int ss_y, __m128i *u_mod, __m128i *v_mod) { __m128i y_reg; if (!ss_x) { read_dist_8(y_dist, &y_reg); if (ss_y == 1) { __m128i y_tmp; read_dist_8(y_dist + DIST_STRIDE, &y_tmp); y_reg = _mm_adds_epu16(y_reg, y_tmp); } } else { __m128i y_first, y_second; read_dist_16(y_dist, &y_first, &y_second); if (ss_y == 1) { __m128i y_tmp_0, y_tmp_1; read_dist_16(y_dist + DIST_STRIDE, &y_tmp_0, &y_tmp_1); y_first = _mm_adds_epu16(y_first, y_tmp_0); y_second = _mm_adds_epu16(y_second, y_tmp_1); } hadd_epu16(&y_first, &y_first); hadd_epu16(&y_second, &y_second); y_reg = _mm_packus_epi32(y_first, y_second); } *u_mod = _mm_adds_epu16(*u_mod, y_reg); *v_mod = _mm_adds_epu16(*v_mod, y_reg); } // Apply temporal filter to the luma components. This performs temporal // filtering on a luma block of 16 X block_height. Use blk_fw as an array of // size 4 for the weights for each of the 4 subblocks if blk_fw is not NULL, // else use top_weight for top half, and bottom weight for bottom half. static void vp9_apply_temporal_filter_luma_16( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist, const int16_t *const *neighbors_first, const int16_t *const *neighbors_second, int top_weight, int bottom_weight, const int *blk_fw) { const int rounding = (1 << strength) >> 1; __m128i weight_first, weight_second; __m128i mul_first, mul_second; __m128i sum_row_1_first, sum_row_1_second; __m128i sum_row_2_first, sum_row_2_second; __m128i sum_row_3_first, sum_row_3_second; __m128i u_first, u_second; __m128i v_first, v_second; __m128i sum_row_first; __m128i sum_row_second; // Loop variables unsigned int h; assert(strength >= 0); assert(strength <= 6); assert(block_width == 16); (void)block_width; // Initialize the weights if (blk_fw) { weight_first = _mm_set1_epi16(blk_fw[0]); weight_second = _mm_set1_epi16(blk_fw[1]); } else { weight_first = _mm_set1_epi16(top_weight); weight_second = weight_first; } // First row mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); // Add luma values get_sum_16(y_dist, &sum_row_2_first, &sum_row_2_second); get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); sum_row_first = _mm_adds_epu16(sum_row_2_first, sum_row_3_first); sum_row_second = _mm_adds_epu16(sum_row_2_second, sum_row_3_second); // Add chroma values read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); sum_row_first = _mm_adds_epu16(sum_row_first, u_first); sum_row_second = _mm_adds_epu16(sum_row_second, u_second); sum_row_first = _mm_adds_epu16(sum_row_first, v_first); sum_row_second = _mm_adds_epu16(sum_row_second, v_second); // Get modifier and store result sum_row_first = average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, &weight_second); accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, y_accum); y_src += y_src_stride; y_pre += y_pre_stride; y_count += y_pre_stride; y_accum += y_pre_stride; y_dist += DIST_STRIDE; u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; // Then all the rows except the last one mul_first = _mm_load_si128((const __m128i *)neighbors_first[1]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[1]); for (h = 1; h < block_height - 1; ++h) { // Move the weight to bottom half if (!use_whole_blk && h == block_height / 2) { if (blk_fw) { weight_first = _mm_set1_epi16(blk_fw[2]); weight_second = _mm_set1_epi16(blk_fw[3]); } else { weight_first = _mm_set1_epi16(bottom_weight); weight_second = weight_first; } } // Shift the rows up sum_row_1_first = sum_row_2_first; sum_row_1_second = sum_row_2_second; sum_row_2_first = sum_row_3_first; sum_row_2_second = sum_row_3_second; // Add luma values to the modifier sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); get_sum_16(y_dist + DIST_STRIDE, &sum_row_3_first, &sum_row_3_second); sum_row_first = _mm_adds_epu16(sum_row_first, sum_row_3_first); sum_row_second = _mm_adds_epu16(sum_row_second, sum_row_3_second); // Add chroma values to the modifier if (ss_y == 0 || h % 2 == 0) { // Only calculate the new chroma distortion if we are at a pixel that // corresponds to a new chroma row read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; } sum_row_first = _mm_adds_epu16(sum_row_first, u_first); sum_row_second = _mm_adds_epu16(sum_row_second, u_second); sum_row_first = _mm_adds_epu16(sum_row_first, v_first); sum_row_second = _mm_adds_epu16(sum_row_second, v_second); // Get modifier and store result sum_row_first = average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, &weight_second); accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, y_accum); y_src += y_src_stride; y_pre += y_pre_stride; y_count += y_pre_stride; y_accum += y_pre_stride; y_dist += DIST_STRIDE; } // The last row mul_first = _mm_load_si128((const __m128i *)neighbors_first[0]); mul_second = _mm_load_si128((const __m128i *)neighbors_second[0]); // Shift the rows up sum_row_1_first = sum_row_2_first; sum_row_1_second = sum_row_2_second; sum_row_2_first = sum_row_3_first; sum_row_2_second = sum_row_3_second; // Add luma values to the modifier sum_row_first = _mm_adds_epu16(sum_row_1_first, sum_row_2_first); sum_row_second = _mm_adds_epu16(sum_row_1_second, sum_row_2_second); // Add chroma values to the modifier if (ss_y == 0) { // Only calculate the new chroma distortion if we are at a pixel that // corresponds to a new chroma row read_chroma_dist_row_16(ss_x, u_dist, v_dist, &u_first, &u_second, &v_first, &v_second); } sum_row_first = _mm_adds_epu16(sum_row_first, u_first); sum_row_second = _mm_adds_epu16(sum_row_second, u_second); sum_row_first = _mm_adds_epu16(sum_row_first, v_first); sum_row_second = _mm_adds_epu16(sum_row_second, v_second); // Get modifier and store result sum_row_first = average_8(sum_row_first, &mul_first, strength, rounding, &weight_first); sum_row_second = average_8(sum_row_second, &mul_second, strength, rounding, &weight_second); accumulate_and_store_16(sum_row_first, sum_row_second, y_pre, y_count, y_accum); } // Perform temporal filter for the luma component. static void vp9_apply_temporal_filter_luma( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist) { unsigned int blk_col = 0, uv_blk_col = 0; const unsigned int blk_col_step = 16, uv_blk_col_step = 16 >> ss_x; const unsigned int mid_width = block_width >> 1, last_width = block_width - blk_col_step; int top_weight = blk_fw[0], bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; const int16_t *const *neighbors_first; const int16_t *const *neighbors_second; if (block_width == 16) { // Special Case: The blockwidth is 16 and we are operating on a row of 16 // chroma pixels. In this case, we can't use the usualy left-midle-right // pattern. We also don't support splitting now. neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; if (use_whole_blk) { vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight, NULL); } else { vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, 0, 0, blk_fw); } return; } // Left neighbors_first = LUMA_LEFT_COLUMN_NEIGHBORS; neighbors_second = LUMA_MIDDLE_COLUMN_NEIGHBORS; vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight, NULL); blk_col += blk_col_step; uv_blk_col += uv_blk_col_step; // Middle First neighbors_first = LUMA_MIDDLE_COLUMN_NEIGHBORS; for (; blk_col < mid_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight, NULL); } if (!use_whole_blk) { top_weight = blk_fw[1]; bottom_weight = blk_fw[3]; } // Middle Second for (; blk_col < last_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight, NULL); } // Right neighbors_second = LUMA_RIGHT_COLUMN_NEIGHBORS; vp9_apply_temporal_filter_luma_16( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, 16, block_height, ss_x, ss_y, strength, use_whole_blk, y_accum + blk_col, y_count + blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors_first, neighbors_second, top_weight, bottom_weight, NULL); } // Apply temporal filter to the chroma components. This performs temporal // filtering on a chroma block of 8 X uv_height. If blk_fw is not NULL, use // blk_fw as an array of size 4 for the weights for each of the 4 subblocks, // else use top_weight for top half, and bottom weight for bottom half. static void vp9_apply_temporal_filter_chroma_8( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int uv_block_width, unsigned int uv_block_height, int ss_x, int ss_y, int strength, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist, const int16_t *const *neighbors, int top_weight, int bottom_weight, const int *blk_fw) { const int rounding = (1 << strength) >> 1; __m128i weight; __m128i mul; __m128i u_sum_row_1, u_sum_row_2, u_sum_row_3; __m128i v_sum_row_1, v_sum_row_2, v_sum_row_3; __m128i u_sum_row, v_sum_row; // Loop variable unsigned int h; (void)uv_block_width; // Initilize weight if (blk_fw) { weight = _mm_setr_epi16(blk_fw[0], blk_fw[0], blk_fw[0], blk_fw[0], blk_fw[1], blk_fw[1], blk_fw[1], blk_fw[1]); } else { weight = _mm_set1_epi16(top_weight); } // First row mul = _mm_load_si128((const __m128i *)neighbors[0]); // Add chroma values get_sum_8(u_dist, &u_sum_row_2); get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); u_sum_row = _mm_adds_epu16(u_sum_row_2, u_sum_row_3); get_sum_8(v_dist, &v_sum_row_2); get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); v_sum_row = _mm_adds_epu16(v_sum_row_2, v_sum_row_3); // Add luma values add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); // Get modifier and store result u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; u_count += uv_pre_stride; u_accum += uv_pre_stride; v_count += uv_pre_stride; v_accum += uv_pre_stride; y_src += y_src_stride * (1 + ss_y); y_pre += y_pre_stride * (1 + ss_y); y_dist += DIST_STRIDE * (1 + ss_y); // Then all the rows except the last one mul = _mm_load_si128((const __m128i *)neighbors[1]); for (h = 1; h < uv_block_height - 1; ++h) { // Move the weight pointer to the bottom half of the blocks if (h == uv_block_height / 2) { if (blk_fw) { weight = _mm_setr_epi16(blk_fw[2], blk_fw[2], blk_fw[2], blk_fw[2], blk_fw[3], blk_fw[3], blk_fw[3], blk_fw[3]); } else { weight = _mm_set1_epi16(bottom_weight); } } // Shift the rows up u_sum_row_1 = u_sum_row_2; u_sum_row_2 = u_sum_row_3; v_sum_row_1 = v_sum_row_2; v_sum_row_2 = v_sum_row_3; // Add chroma values u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); get_sum_8(u_dist + DIST_STRIDE, &u_sum_row_3); u_sum_row = _mm_adds_epu16(u_sum_row, u_sum_row_3); v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); get_sum_8(v_dist + DIST_STRIDE, &v_sum_row_3); v_sum_row = _mm_adds_epu16(v_sum_row, v_sum_row_3); // Add luma values add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); // Get modifier and store result u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); u_src += uv_src_stride; u_pre += uv_pre_stride; u_dist += DIST_STRIDE; v_src += uv_src_stride; v_pre += uv_pre_stride; v_dist += DIST_STRIDE; u_count += uv_pre_stride; u_accum += uv_pre_stride; v_count += uv_pre_stride; v_accum += uv_pre_stride; y_src += y_src_stride * (1 + ss_y); y_pre += y_pre_stride * (1 + ss_y); y_dist += DIST_STRIDE * (1 + ss_y); } // The last row mul = _mm_load_si128((const __m128i *)neighbors[0]); // Shift the rows up u_sum_row_1 = u_sum_row_2; u_sum_row_2 = u_sum_row_3; v_sum_row_1 = v_sum_row_2; v_sum_row_2 = v_sum_row_3; // Add chroma values u_sum_row = _mm_adds_epu16(u_sum_row_1, u_sum_row_2); v_sum_row = _mm_adds_epu16(v_sum_row_1, v_sum_row_2); // Add luma values add_luma_dist_to_8_chroma_mod(y_dist, ss_x, ss_y, &u_sum_row, &v_sum_row); // Get modifier and store result u_sum_row = average_8(u_sum_row, &mul, strength, rounding, &weight); v_sum_row = average_8(v_sum_row, &mul, strength, rounding, &weight); accumulate_and_store_8(u_sum_row, u_pre, u_count, u_accum); accumulate_and_store_8(v_sum_row, v_pre, v_count, v_accum); } // Perform temporal filter for the chroma components. static void vp9_apply_temporal_filter_chroma( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *blk_fw, int use_whole_blk, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count, const uint16_t *y_dist, const uint16_t *u_dist, const uint16_t *v_dist) { const unsigned int uv_width = block_width >> ss_x, uv_height = block_height >> ss_y; unsigned int blk_col = 0, uv_blk_col = 0; const unsigned int uv_blk_col_step = 8, blk_col_step = 8 << ss_x; const unsigned int uv_mid_width = uv_width >> 1, uv_last_width = uv_width - uv_blk_col_step; int top_weight = blk_fw[0], bottom_weight = use_whole_blk ? blk_fw[0] : blk_fw[2]; const int16_t *const *neighbors; if (uv_width == 8) { // Special Case: We are subsampling in x direction on a 16x16 block. Since // we are operating on a row of 8 chroma pixels, we can't use the usual // left-middle-right pattern. assert(ss_x); if (ss_y) { neighbors = CHROMA_DOUBLE_SS_SINGLE_COLUMN_NEIGHBORS; } else { neighbors = CHROMA_SINGLE_SS_SINGLE_COLUMN_NEIGHBORS; } if (use_whole_blk) { vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, bottom_weight, NULL); } else { vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, 0, 0, blk_fw); } return; } // Left if (ss_x && ss_y) { neighbors = CHROMA_DOUBLE_SS_LEFT_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors = CHROMA_SINGLE_SS_LEFT_COLUMN_NEIGHBORS; } else { neighbors = CHROMA_NO_SS_LEFT_COLUMN_NEIGHBORS; } vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, bottom_weight, NULL); blk_col += blk_col_step; uv_blk_col += uv_blk_col_step; // Middle First if (ss_x && ss_y) { neighbors = CHROMA_DOUBLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors = CHROMA_SINGLE_SS_MIDDLE_COLUMN_NEIGHBORS; } else { neighbors = CHROMA_NO_SS_MIDDLE_COLUMN_NEIGHBORS; } for (; uv_blk_col < uv_mid_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, bottom_weight, NULL); } if (!use_whole_blk) { top_weight = blk_fw[1]; bottom_weight = blk_fw[3]; } // Middle Second for (; uv_blk_col < uv_last_width; blk_col += blk_col_step, uv_blk_col += uv_blk_col_step) { vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, bottom_weight, NULL); } // Right if (ss_x && ss_y) { neighbors = CHROMA_DOUBLE_SS_RIGHT_COLUMN_NEIGHBORS; } else if (ss_x || ss_y) { neighbors = CHROMA_SINGLE_SS_RIGHT_COLUMN_NEIGHBORS; } else { neighbors = CHROMA_NO_SS_RIGHT_COLUMN_NEIGHBORS; } vp9_apply_temporal_filter_chroma_8( y_src + blk_col, y_src_stride, y_pre + blk_col, y_pre_stride, u_src + uv_blk_col, v_src + uv_blk_col, uv_src_stride, u_pre + uv_blk_col, v_pre + uv_blk_col, uv_pre_stride, uv_width, uv_height, ss_x, ss_y, strength, u_accum + uv_blk_col, u_count + uv_blk_col, v_accum + uv_blk_col, v_count + uv_blk_col, y_dist + blk_col, u_dist + uv_blk_col, v_dist + uv_blk_col, neighbors, top_weight, bottom_weight, NULL); } void vp9_apply_temporal_filter_sse4_1( const uint8_t *y_src, int y_src_stride, const uint8_t *y_pre, int y_pre_stride, const uint8_t *u_src, const uint8_t *v_src, int uv_src_stride, const uint8_t *u_pre, const uint8_t *v_pre, int uv_pre_stride, unsigned int block_width, unsigned int block_height, int ss_x, int ss_y, int strength, const int *const blk_fw, int use_whole_blk, uint32_t *y_accum, uint16_t *y_count, uint32_t *u_accum, uint16_t *u_count, uint32_t *v_accum, uint16_t *v_count) { const unsigned int chroma_height = block_height >> ss_y, chroma_width = block_width >> ss_x; DECLARE_ALIGNED(16, uint16_t, y_dist[BH * DIST_STRIDE]) = { 0 }; DECLARE_ALIGNED(16, uint16_t, u_dist[BH * DIST_STRIDE]) = { 0 }; DECLARE_ALIGNED(16, uint16_t, v_dist[BH * DIST_STRIDE]) = { 0 }; const int *blk_fw_ptr = blk_fw; uint16_t *y_dist_ptr = y_dist + 1, *u_dist_ptr = u_dist + 1, *v_dist_ptr = v_dist + 1; const uint8_t *y_src_ptr = y_src, *u_src_ptr = u_src, *v_src_ptr = v_src; const uint8_t *y_pre_ptr = y_pre, *u_pre_ptr = u_pre, *v_pre_ptr = v_pre; // Loop variables unsigned int row, blk_col; assert(block_width <= BW && "block width too large"); assert(block_height <= BH && "block height too large"); assert(block_width % 16 == 0 && "block width must be multiple of 16"); assert(block_height % 2 == 0 && "block height must be even"); assert((ss_x == 0 || ss_x == 1) && (ss_y == 0 || ss_y == 1) && "invalid chroma subsampling"); assert(strength >= 0 && strength <= 6 && "invalid temporal filter strength"); assert(blk_fw[0] >= 0 && "filter weight must be positive"); assert( (use_whole_blk || (blk_fw[1] >= 0 && blk_fw[2] >= 0 && blk_fw[3] >= 0)) && "subblock filter weight must be positive"); assert(blk_fw[0] <= 2 && "sublock filter weight must be less than 2"); assert( (use_whole_blk || (blk_fw[1] <= 2 && blk_fw[2] <= 2 && blk_fw[3] <= 2)) && "subblock filter weight must be less than 2"); // Precompute the difference sqaured for (row = 0; row < block_height; row++) { for (blk_col = 0; blk_col < block_width; blk_col += 16) { store_dist_16(y_src_ptr + blk_col, y_pre_ptr + blk_col, y_dist_ptr + blk_col); } y_src_ptr += y_src_stride; y_pre_ptr += y_pre_stride; y_dist_ptr += DIST_STRIDE; } for (row = 0; row < chroma_height; row++) { for (blk_col = 0; blk_col < chroma_width; blk_col += 8) { store_dist_8(u_src_ptr + blk_col, u_pre_ptr + blk_col, u_dist_ptr + blk_col); store_dist_8(v_src_ptr + blk_col, v_pre_ptr + blk_col, v_dist_ptr + blk_col); } u_src_ptr += uv_src_stride; u_pre_ptr += uv_pre_stride; u_dist_ptr += DIST_STRIDE; v_src_ptr += uv_src_stride; v_pre_ptr += uv_pre_stride; v_dist_ptr += DIST_STRIDE; } y_dist_ptr = y_dist + 1; u_dist_ptr = u_dist + 1; v_dist_ptr = v_dist + 1; vp9_apply_temporal_filter_luma( y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, strength, blk_fw_ptr, use_whole_blk, y_accum, y_count, y_dist_ptr, u_dist_ptr, v_dist_ptr); vp9_apply_temporal_filter_chroma( y_src, y_src_stride, y_pre, y_pre_stride, u_src, v_src, uv_src_stride, u_pre, v_pre, uv_pre_stride, block_width, block_height, ss_x, ss_y, strength, blk_fw_ptr, use_whole_blk, u_accum, u_count, v_accum, v_count, y_dist_ptr, u_dist_ptr, v_dist_ptr); } libvpx-1.8.2/vp9/encoder/x86/vp9_dct_intrin_sse2.c000066400000000000000000001700251357355204000216540ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include // SSE2 #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_dsp/x86/fwd_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" static INLINE void load_buffer_4x4(const int16_t *input, __m128i *in, int stride) { const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); __m128i mask; in[0] = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); in[1] = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); in[2] = _mm_loadl_epi64((const __m128i *)(input + 2 * stride)); in[3] = _mm_loadl_epi64((const __m128i *)(input + 3 * stride)); in[0] = _mm_slli_epi16(in[0], 4); in[1] = _mm_slli_epi16(in[1], 4); in[2] = _mm_slli_epi16(in[2], 4); in[3] = _mm_slli_epi16(in[3], 4); mask = _mm_cmpeq_epi16(in[0], k__nonzero_bias_a); in[0] = _mm_add_epi16(in[0], mask); in[0] = _mm_add_epi16(in[0], k__nonzero_bias_b); } static INLINE void write_buffer_4x4(tran_low_t *output, __m128i *res) { const __m128i kOne = _mm_set1_epi16(1); __m128i in01 = _mm_unpacklo_epi64(res[0], res[1]); __m128i in23 = _mm_unpacklo_epi64(res[2], res[3]); __m128i out01 = _mm_add_epi16(in01, kOne); __m128i out23 = _mm_add_epi16(in23, kOne); out01 = _mm_srai_epi16(out01, 2); out23 = _mm_srai_epi16(out23, 2); store_output(&out01, (output + 0 * 8)); store_output(&out23, (output + 1 * 8)); } static INLINE void transpose_4x4(__m128i *res) { // Combine and transpose // 00 01 02 03 20 21 22 23 // 10 11 12 13 30 31 32 33 const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); // 00 10 01 11 02 12 03 13 // 20 30 21 31 22 32 23 33 res[0] = _mm_unpacklo_epi32(tr0_0, tr0_1); res[2] = _mm_unpackhi_epi32(tr0_0, tr0_1); // 00 10 20 30 01 11 21 31 // 02 12 22 32 03 13 23 33 // only use the first 4 16-bit integers res[1] = _mm_unpackhi_epi64(res[0], res[0]); res[3] = _mm_unpackhi_epi64(res[2], res[2]); } static void fdct4_sse2(__m128i *in) { const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[4], v[4]; u[0] = _mm_unpacklo_epi16(in[0], in[1]); u[1] = _mm_unpacklo_epi16(in[3], in[2]); v[0] = _mm_add_epi16(u[0], u[1]); v[1] = _mm_sub_epi16(u[0], u[1]); u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); // 0 u[1] = _mm_madd_epi16(v[0], k__cospi_p16_m16); // 2 u[2] = _mm_madd_epi16(v[1], k__cospi_p08_p24); // 1 u[3] = _mm_madd_epi16(v[1], k__cospi_p24_m08); // 3 v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); in[0] = _mm_packs_epi32(u[0], u[1]); in[1] = _mm_packs_epi32(u[2], u[3]); transpose_4x4(in); } static void fadst4_sse2(__m128i *in) { const __m128i k__sinpi_p01_p02 = pair_set_epi16(sinpi_1_9, sinpi_2_9); const __m128i k__sinpi_p04_m01 = pair_set_epi16(sinpi_4_9, -sinpi_1_9); const __m128i k__sinpi_p03_p04 = pair_set_epi16(sinpi_3_9, sinpi_4_9); const __m128i k__sinpi_m03_p02 = pair_set_epi16(-sinpi_3_9, sinpi_2_9); const __m128i k__sinpi_p03_p03 = _mm_set1_epi16((int16_t)sinpi_3_9); const __m128i kZero = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u[8], v[8]; __m128i in7 = _mm_add_epi16(in[0], in[1]); u[0] = _mm_unpacklo_epi16(in[0], in[1]); u[1] = _mm_unpacklo_epi16(in[2], in[3]); u[2] = _mm_unpacklo_epi16(in7, kZero); u[3] = _mm_unpacklo_epi16(in[2], kZero); u[4] = _mm_unpacklo_epi16(in[3], kZero); v[0] = _mm_madd_epi16(u[0], k__sinpi_p01_p02); // s0 + s2 v[1] = _mm_madd_epi16(u[1], k__sinpi_p03_p04); // s4 + s5 v[2] = _mm_madd_epi16(u[2], k__sinpi_p03_p03); // x1 v[3] = _mm_madd_epi16(u[0], k__sinpi_p04_m01); // s1 - s3 v[4] = _mm_madd_epi16(u[1], k__sinpi_m03_p02); // -s4 + s6 v[5] = _mm_madd_epi16(u[3], k__sinpi_p03_p03); // s4 v[6] = _mm_madd_epi16(u[4], k__sinpi_p03_p03); u[0] = _mm_add_epi32(v[0], v[1]); u[1] = _mm_sub_epi32(v[2], v[6]); u[2] = _mm_add_epi32(v[3], v[4]); u[3] = _mm_sub_epi32(u[2], u[0]); u[4] = _mm_slli_epi32(v[5], 2); u[5] = _mm_sub_epi32(u[4], v[5]); u[6] = _mm_add_epi32(u[3], u[5]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); in[0] = _mm_packs_epi32(u[0], u[2]); in[1] = _mm_packs_epi32(u[1], u[3]); transpose_4x4(in); } void vp9_fht4x4_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in[4]; switch (tx_type) { case DCT_DCT: vpx_fdct4x4_sse2(input, output, stride); break; case ADST_DCT: load_buffer_4x4(input, in, stride); fadst4_sse2(in); fdct4_sse2(in); write_buffer_4x4(output, in); break; case DCT_ADST: load_buffer_4x4(input, in, stride); fdct4_sse2(in); fadst4_sse2(in); write_buffer_4x4(output, in); break; default: assert(tx_type == ADST_ADST); load_buffer_4x4(input, in, stride); fadst4_sse2(in); fadst4_sse2(in); write_buffer_4x4(output, in); break; } } // load 8x8 array static INLINE void load_buffer_8x8(const int16_t *input, __m128i *in, int stride) { in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride)); in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride)); in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride)); in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride)); in[4] = _mm_load_si128((const __m128i *)(input + 4 * stride)); in[5] = _mm_load_si128((const __m128i *)(input + 5 * stride)); in[6] = _mm_load_si128((const __m128i *)(input + 6 * stride)); in[7] = _mm_load_si128((const __m128i *)(input + 7 * stride)); in[0] = _mm_slli_epi16(in[0], 2); in[1] = _mm_slli_epi16(in[1], 2); in[2] = _mm_slli_epi16(in[2], 2); in[3] = _mm_slli_epi16(in[3], 2); in[4] = _mm_slli_epi16(in[4], 2); in[5] = _mm_slli_epi16(in[5], 2); in[6] = _mm_slli_epi16(in[6], 2); in[7] = _mm_slli_epi16(in[7], 2); } // right shift and rounding static INLINE void right_shift_8x8(__m128i *res, const int bit) { __m128i sign0 = _mm_srai_epi16(res[0], 15); __m128i sign1 = _mm_srai_epi16(res[1], 15); __m128i sign2 = _mm_srai_epi16(res[2], 15); __m128i sign3 = _mm_srai_epi16(res[3], 15); __m128i sign4 = _mm_srai_epi16(res[4], 15); __m128i sign5 = _mm_srai_epi16(res[5], 15); __m128i sign6 = _mm_srai_epi16(res[6], 15); __m128i sign7 = _mm_srai_epi16(res[7], 15); if (bit == 2) { const __m128i const_rounding = _mm_set1_epi16(1); res[0] = _mm_add_epi16(res[0], const_rounding); res[1] = _mm_add_epi16(res[1], const_rounding); res[2] = _mm_add_epi16(res[2], const_rounding); res[3] = _mm_add_epi16(res[3], const_rounding); res[4] = _mm_add_epi16(res[4], const_rounding); res[5] = _mm_add_epi16(res[5], const_rounding); res[6] = _mm_add_epi16(res[6], const_rounding); res[7] = _mm_add_epi16(res[7], const_rounding); } res[0] = _mm_sub_epi16(res[0], sign0); res[1] = _mm_sub_epi16(res[1], sign1); res[2] = _mm_sub_epi16(res[2], sign2); res[3] = _mm_sub_epi16(res[3], sign3); res[4] = _mm_sub_epi16(res[4], sign4); res[5] = _mm_sub_epi16(res[5], sign5); res[6] = _mm_sub_epi16(res[6], sign6); res[7] = _mm_sub_epi16(res[7], sign7); if (bit == 1) { res[0] = _mm_srai_epi16(res[0], 1); res[1] = _mm_srai_epi16(res[1], 1); res[2] = _mm_srai_epi16(res[2], 1); res[3] = _mm_srai_epi16(res[3], 1); res[4] = _mm_srai_epi16(res[4], 1); res[5] = _mm_srai_epi16(res[5], 1); res[6] = _mm_srai_epi16(res[6], 1); res[7] = _mm_srai_epi16(res[7], 1); } else { res[0] = _mm_srai_epi16(res[0], 2); res[1] = _mm_srai_epi16(res[1], 2); res[2] = _mm_srai_epi16(res[2], 2); res[3] = _mm_srai_epi16(res[3], 2); res[4] = _mm_srai_epi16(res[4], 2); res[5] = _mm_srai_epi16(res[5], 2); res[6] = _mm_srai_epi16(res[6], 2); res[7] = _mm_srai_epi16(res[7], 2); } } // write 8x8 array static INLINE void write_buffer_8x8(tran_low_t *output, __m128i *res, int stride) { store_output(&res[0], (output + 0 * stride)); store_output(&res[1], (output + 1 * stride)); store_output(&res[2], (output + 2 * stride)); store_output(&res[3], (output + 3 * stride)); store_output(&res[4], (output + 4 * stride)); store_output(&res[5], (output + 5 * stride)); store_output(&res[6], (output + 6 * stride)); store_output(&res[7], (output + 7 * stride)); } static void fdct8_sse2(__m128i *in) { // constants const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u0, u1, u2, u3, u4, u5, u6, u7; __m128i v0, v1, v2, v3, v4, v5, v6, v7; __m128i s0, s1, s2, s3, s4, s5, s6, s7; // stage 1 s0 = _mm_add_epi16(in[0], in[7]); s1 = _mm_add_epi16(in[1], in[6]); s2 = _mm_add_epi16(in[2], in[5]); s3 = _mm_add_epi16(in[3], in[4]); s4 = _mm_sub_epi16(in[3], in[4]); s5 = _mm_sub_epi16(in[2], in[5]); s6 = _mm_sub_epi16(in[1], in[6]); s7 = _mm_sub_epi16(in[0], in[7]); u0 = _mm_add_epi16(s0, s3); u1 = _mm_add_epi16(s1, s2); u2 = _mm_sub_epi16(s1, s2); u3 = _mm_sub_epi16(s0, s3); // interleave and perform butterfly multiplication/addition v0 = _mm_unpacklo_epi16(u0, u1); v1 = _mm_unpackhi_epi16(u0, u1); v2 = _mm_unpacklo_epi16(u2, u3); v3 = _mm_unpackhi_epi16(u2, u3); u0 = _mm_madd_epi16(v0, k__cospi_p16_p16); u1 = _mm_madd_epi16(v1, k__cospi_p16_p16); u2 = _mm_madd_epi16(v0, k__cospi_p16_m16); u3 = _mm_madd_epi16(v1, k__cospi_p16_m16); u4 = _mm_madd_epi16(v2, k__cospi_p24_p08); u5 = _mm_madd_epi16(v3, k__cospi_p24_p08); u6 = _mm_madd_epi16(v2, k__cospi_m08_p24); u7 = _mm_madd_epi16(v3, k__cospi_m08_p24); // shift and rounding v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); in[0] = _mm_packs_epi32(u0, u1); in[2] = _mm_packs_epi32(u4, u5); in[4] = _mm_packs_epi32(u2, u3); in[6] = _mm_packs_epi32(u6, u7); // stage 2 // interleave and perform butterfly multiplication/addition u0 = _mm_unpacklo_epi16(s6, s5); u1 = _mm_unpackhi_epi16(s6, s5); v0 = _mm_madd_epi16(u0, k__cospi_p16_m16); v1 = _mm_madd_epi16(u1, k__cospi_p16_m16); v2 = _mm_madd_epi16(u0, k__cospi_p16_p16); v3 = _mm_madd_epi16(u1, k__cospi_p16_p16); // shift and rounding u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); u0 = _mm_packs_epi32(v0, v1); u1 = _mm_packs_epi32(v2, v3); // stage 3 s0 = _mm_add_epi16(s4, u0); s1 = _mm_sub_epi16(s4, u0); s2 = _mm_sub_epi16(s7, u1); s3 = _mm_add_epi16(s7, u1); // stage 4 u0 = _mm_unpacklo_epi16(s0, s3); u1 = _mm_unpackhi_epi16(s0, s3); u2 = _mm_unpacklo_epi16(s1, s2); u3 = _mm_unpackhi_epi16(s1, s2); v0 = _mm_madd_epi16(u0, k__cospi_p28_p04); v1 = _mm_madd_epi16(u1, k__cospi_p28_p04); v2 = _mm_madd_epi16(u2, k__cospi_p12_p20); v3 = _mm_madd_epi16(u3, k__cospi_p12_p20); v4 = _mm_madd_epi16(u2, k__cospi_m20_p12); v5 = _mm_madd_epi16(u3, k__cospi_m20_p12); v6 = _mm_madd_epi16(u0, k__cospi_m04_p28); v7 = _mm_madd_epi16(u1, k__cospi_m04_p28); // shift and rounding u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING); u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING); u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING); u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING); v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); v4 = _mm_srai_epi32(u4, DCT_CONST_BITS); v5 = _mm_srai_epi32(u5, DCT_CONST_BITS); v6 = _mm_srai_epi32(u6, DCT_CONST_BITS); v7 = _mm_srai_epi32(u7, DCT_CONST_BITS); in[1] = _mm_packs_epi32(v0, v1); in[3] = _mm_packs_epi32(v4, v5); in[5] = _mm_packs_epi32(v2, v3); in[7] = _mm_packs_epi32(v6, v7); // transpose transpose_16bit_8x8(in, in); } static void fadst8_sse2(__m128i *in) { // Constants const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__const_0 = _mm_set1_epi16(0); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); __m128i u0, u1, u2, u3, u4, u5, u6, u7, u8, u9, u10, u11, u12, u13, u14, u15; __m128i v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, v13, v14, v15; __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9, w10, w11, w12, w13, w14, w15; __m128i s0, s1, s2, s3, s4, s5, s6, s7; __m128i in0, in1, in2, in3, in4, in5, in6, in7; // properly aligned for butterfly input in0 = in[7]; in1 = in[0]; in2 = in[5]; in3 = in[2]; in4 = in[3]; in5 = in[4]; in6 = in[1]; in7 = in[6]; // column transformation // stage 1 // interleave and multiply/add into 32-bit integer s0 = _mm_unpacklo_epi16(in0, in1); s1 = _mm_unpackhi_epi16(in0, in1); s2 = _mm_unpacklo_epi16(in2, in3); s3 = _mm_unpackhi_epi16(in2, in3); s4 = _mm_unpacklo_epi16(in4, in5); s5 = _mm_unpackhi_epi16(in4, in5); s6 = _mm_unpacklo_epi16(in6, in7); s7 = _mm_unpackhi_epi16(in6, in7); u0 = _mm_madd_epi16(s0, k__cospi_p02_p30); u1 = _mm_madd_epi16(s1, k__cospi_p02_p30); u2 = _mm_madd_epi16(s0, k__cospi_p30_m02); u3 = _mm_madd_epi16(s1, k__cospi_p30_m02); u4 = _mm_madd_epi16(s2, k__cospi_p10_p22); u5 = _mm_madd_epi16(s3, k__cospi_p10_p22); u6 = _mm_madd_epi16(s2, k__cospi_p22_m10); u7 = _mm_madd_epi16(s3, k__cospi_p22_m10); u8 = _mm_madd_epi16(s4, k__cospi_p18_p14); u9 = _mm_madd_epi16(s5, k__cospi_p18_p14); u10 = _mm_madd_epi16(s4, k__cospi_p14_m18); u11 = _mm_madd_epi16(s5, k__cospi_p14_m18); u12 = _mm_madd_epi16(s6, k__cospi_p26_p06); u13 = _mm_madd_epi16(s7, k__cospi_p26_p06); u14 = _mm_madd_epi16(s6, k__cospi_p06_m26); u15 = _mm_madd_epi16(s7, k__cospi_p06_m26); // addition w0 = _mm_add_epi32(u0, u8); w1 = _mm_add_epi32(u1, u9); w2 = _mm_add_epi32(u2, u10); w3 = _mm_add_epi32(u3, u11); w4 = _mm_add_epi32(u4, u12); w5 = _mm_add_epi32(u5, u13); w6 = _mm_add_epi32(u6, u14); w7 = _mm_add_epi32(u7, u15); w8 = _mm_sub_epi32(u0, u8); w9 = _mm_sub_epi32(u1, u9); w10 = _mm_sub_epi32(u2, u10); w11 = _mm_sub_epi32(u3, u11); w12 = _mm_sub_epi32(u4, u12); w13 = _mm_sub_epi32(u5, u13); w14 = _mm_sub_epi32(u6, u14); w15 = _mm_sub_epi32(u7, u15); // shift and rounding v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); v8 = _mm_add_epi32(w8, k__DCT_CONST_ROUNDING); v9 = _mm_add_epi32(w9, k__DCT_CONST_ROUNDING); v10 = _mm_add_epi32(w10, k__DCT_CONST_ROUNDING); v11 = _mm_add_epi32(w11, k__DCT_CONST_ROUNDING); v12 = _mm_add_epi32(w12, k__DCT_CONST_ROUNDING); v13 = _mm_add_epi32(w13, k__DCT_CONST_ROUNDING); v14 = _mm_add_epi32(w14, k__DCT_CONST_ROUNDING); v15 = _mm_add_epi32(w15, k__DCT_CONST_ROUNDING); u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); u8 = _mm_srai_epi32(v8, DCT_CONST_BITS); u9 = _mm_srai_epi32(v9, DCT_CONST_BITS); u10 = _mm_srai_epi32(v10, DCT_CONST_BITS); u11 = _mm_srai_epi32(v11, DCT_CONST_BITS); u12 = _mm_srai_epi32(v12, DCT_CONST_BITS); u13 = _mm_srai_epi32(v13, DCT_CONST_BITS); u14 = _mm_srai_epi32(v14, DCT_CONST_BITS); u15 = _mm_srai_epi32(v15, DCT_CONST_BITS); // back to 16-bit and pack 8 integers into __m128i in[0] = _mm_packs_epi32(u0, u1); in[1] = _mm_packs_epi32(u2, u3); in[2] = _mm_packs_epi32(u4, u5); in[3] = _mm_packs_epi32(u6, u7); in[4] = _mm_packs_epi32(u8, u9); in[5] = _mm_packs_epi32(u10, u11); in[6] = _mm_packs_epi32(u12, u13); in[7] = _mm_packs_epi32(u14, u15); // stage 2 s0 = _mm_add_epi16(in[0], in[2]); s1 = _mm_add_epi16(in[1], in[3]); s2 = _mm_sub_epi16(in[0], in[2]); s3 = _mm_sub_epi16(in[1], in[3]); u0 = _mm_unpacklo_epi16(in[4], in[5]); u1 = _mm_unpackhi_epi16(in[4], in[5]); u2 = _mm_unpacklo_epi16(in[6], in[7]); u3 = _mm_unpackhi_epi16(in[6], in[7]); v0 = _mm_madd_epi16(u0, k__cospi_p08_p24); v1 = _mm_madd_epi16(u1, k__cospi_p08_p24); v2 = _mm_madd_epi16(u0, k__cospi_p24_m08); v3 = _mm_madd_epi16(u1, k__cospi_p24_m08); v4 = _mm_madd_epi16(u2, k__cospi_m24_p08); v5 = _mm_madd_epi16(u3, k__cospi_m24_p08); v6 = _mm_madd_epi16(u2, k__cospi_p08_p24); v7 = _mm_madd_epi16(u3, k__cospi_p08_p24); w0 = _mm_add_epi32(v0, v4); w1 = _mm_add_epi32(v1, v5); w2 = _mm_add_epi32(v2, v6); w3 = _mm_add_epi32(v3, v7); w4 = _mm_sub_epi32(v0, v4); w5 = _mm_sub_epi32(v1, v5); w6 = _mm_sub_epi32(v2, v6); w7 = _mm_sub_epi32(v3, v7); v0 = _mm_add_epi32(w0, k__DCT_CONST_ROUNDING); v1 = _mm_add_epi32(w1, k__DCT_CONST_ROUNDING); v2 = _mm_add_epi32(w2, k__DCT_CONST_ROUNDING); v3 = _mm_add_epi32(w3, k__DCT_CONST_ROUNDING); v4 = _mm_add_epi32(w4, k__DCT_CONST_ROUNDING); v5 = _mm_add_epi32(w5, k__DCT_CONST_ROUNDING); v6 = _mm_add_epi32(w6, k__DCT_CONST_ROUNDING); v7 = _mm_add_epi32(w7, k__DCT_CONST_ROUNDING); u0 = _mm_srai_epi32(v0, DCT_CONST_BITS); u1 = _mm_srai_epi32(v1, DCT_CONST_BITS); u2 = _mm_srai_epi32(v2, DCT_CONST_BITS); u3 = _mm_srai_epi32(v3, DCT_CONST_BITS); u4 = _mm_srai_epi32(v4, DCT_CONST_BITS); u5 = _mm_srai_epi32(v5, DCT_CONST_BITS); u6 = _mm_srai_epi32(v6, DCT_CONST_BITS); u7 = _mm_srai_epi32(v7, DCT_CONST_BITS); // back to 16-bit intergers s4 = _mm_packs_epi32(u0, u1); s5 = _mm_packs_epi32(u2, u3); s6 = _mm_packs_epi32(u4, u5); s7 = _mm_packs_epi32(u6, u7); // stage 3 u0 = _mm_unpacklo_epi16(s2, s3); u1 = _mm_unpackhi_epi16(s2, s3); u2 = _mm_unpacklo_epi16(s6, s7); u3 = _mm_unpackhi_epi16(s6, s7); v0 = _mm_madd_epi16(u0, k__cospi_p16_p16); v1 = _mm_madd_epi16(u1, k__cospi_p16_p16); v2 = _mm_madd_epi16(u0, k__cospi_p16_m16); v3 = _mm_madd_epi16(u1, k__cospi_p16_m16); v4 = _mm_madd_epi16(u2, k__cospi_p16_p16); v5 = _mm_madd_epi16(u3, k__cospi_p16_p16); v6 = _mm_madd_epi16(u2, k__cospi_p16_m16); v7 = _mm_madd_epi16(u3, k__cospi_p16_m16); u0 = _mm_add_epi32(v0, k__DCT_CONST_ROUNDING); u1 = _mm_add_epi32(v1, k__DCT_CONST_ROUNDING); u2 = _mm_add_epi32(v2, k__DCT_CONST_ROUNDING); u3 = _mm_add_epi32(v3, k__DCT_CONST_ROUNDING); u4 = _mm_add_epi32(v4, k__DCT_CONST_ROUNDING); u5 = _mm_add_epi32(v5, k__DCT_CONST_ROUNDING); u6 = _mm_add_epi32(v6, k__DCT_CONST_ROUNDING); u7 = _mm_add_epi32(v7, k__DCT_CONST_ROUNDING); v0 = _mm_srai_epi32(u0, DCT_CONST_BITS); v1 = _mm_srai_epi32(u1, DCT_CONST_BITS); v2 = _mm_srai_epi32(u2, DCT_CONST_BITS); v3 = _mm_srai_epi32(u3, DCT_CONST_BITS); v4 = _mm_srai_epi32(u4, DCT_CONST_BITS); v5 = _mm_srai_epi32(u5, DCT_CONST_BITS); v6 = _mm_srai_epi32(u6, DCT_CONST_BITS); v7 = _mm_srai_epi32(u7, DCT_CONST_BITS); s2 = _mm_packs_epi32(v0, v1); s3 = _mm_packs_epi32(v2, v3); s6 = _mm_packs_epi32(v4, v5); s7 = _mm_packs_epi32(v6, v7); // FIXME(jingning): do subtract using bit inversion? in[0] = s0; in[1] = _mm_sub_epi16(k__const_0, s4); in[2] = s6; in[3] = _mm_sub_epi16(k__const_0, s2); in[4] = s3; in[5] = _mm_sub_epi16(k__const_0, s7); in[6] = s5; in[7] = _mm_sub_epi16(k__const_0, s1); // transpose transpose_16bit_8x8(in, in); } void vp9_fht8x8_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in[8]; switch (tx_type) { case DCT_DCT: vpx_fdct8x8_sse2(input, output, stride); break; case ADST_DCT: load_buffer_8x8(input, in, stride); fadst8_sse2(in); fdct8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; case DCT_ADST: load_buffer_8x8(input, in, stride); fdct8_sse2(in); fadst8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; default: assert(tx_type == ADST_ADST); load_buffer_8x8(input, in, stride); fadst8_sse2(in); fadst8_sse2(in); right_shift_8x8(in, 1); write_buffer_8x8(output, in, 8); break; } } static INLINE void load_buffer_16x16(const int16_t *input, __m128i *in0, __m128i *in1, int stride) { // load first 8 columns load_buffer_8x8(input, in0, stride); load_buffer_8x8(input + 8 * stride, in0 + 8, stride); input += 8; // load second 8 columns load_buffer_8x8(input, in1, stride); load_buffer_8x8(input + 8 * stride, in1 + 8, stride); } static INLINE void write_buffer_16x16(tran_low_t *output, __m128i *in0, __m128i *in1, int stride) { // write first 8 columns write_buffer_8x8(output, in0, stride); write_buffer_8x8(output + 8 * stride, in0 + 8, stride); // write second 8 columns output += 8; write_buffer_8x8(output, in1, stride); write_buffer_8x8(output + 8 * stride, in1 + 8, stride); } static INLINE void right_shift_16x16(__m128i *res0, __m128i *res1) { // perform rounding operations right_shift_8x8(res0, 2); right_shift_8x8(res0 + 8, 2); right_shift_8x8(res1, 2); right_shift_8x8(res1 + 8, 2); } static void fdct16_8col(__m128i *in) { // perform 16x16 1-D DCT for 8 columns __m128i i[8], s[8], p[8], t[8], u[16], v[16]; const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64); const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64); const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64); const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64); const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64); const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64); const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64); const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); // stage 1 i[0] = _mm_add_epi16(in[0], in[15]); i[1] = _mm_add_epi16(in[1], in[14]); i[2] = _mm_add_epi16(in[2], in[13]); i[3] = _mm_add_epi16(in[3], in[12]); i[4] = _mm_add_epi16(in[4], in[11]); i[5] = _mm_add_epi16(in[5], in[10]); i[6] = _mm_add_epi16(in[6], in[9]); i[7] = _mm_add_epi16(in[7], in[8]); s[0] = _mm_sub_epi16(in[7], in[8]); s[1] = _mm_sub_epi16(in[6], in[9]); s[2] = _mm_sub_epi16(in[5], in[10]); s[3] = _mm_sub_epi16(in[4], in[11]); s[4] = _mm_sub_epi16(in[3], in[12]); s[5] = _mm_sub_epi16(in[2], in[13]); s[6] = _mm_sub_epi16(in[1], in[14]); s[7] = _mm_sub_epi16(in[0], in[15]); p[0] = _mm_add_epi16(i[0], i[7]); p[1] = _mm_add_epi16(i[1], i[6]); p[2] = _mm_add_epi16(i[2], i[5]); p[3] = _mm_add_epi16(i[3], i[4]); p[4] = _mm_sub_epi16(i[3], i[4]); p[5] = _mm_sub_epi16(i[2], i[5]); p[6] = _mm_sub_epi16(i[1], i[6]); p[7] = _mm_sub_epi16(i[0], i[7]); u[0] = _mm_add_epi16(p[0], p[3]); u[1] = _mm_add_epi16(p[1], p[2]); u[2] = _mm_sub_epi16(p[1], p[2]); u[3] = _mm_sub_epi16(p[0], p[3]); v[0] = _mm_unpacklo_epi16(u[0], u[1]); v[1] = _mm_unpackhi_epi16(u[0], u[1]); v[2] = _mm_unpacklo_epi16(u[2], u[3]); v[3] = _mm_unpackhi_epi16(u[2], u[3]); u[0] = _mm_madd_epi16(v[0], k__cospi_p16_p16); u[1] = _mm_madd_epi16(v[1], k__cospi_p16_p16); u[2] = _mm_madd_epi16(v[0], k__cospi_p16_m16); u[3] = _mm_madd_epi16(v[1], k__cospi_p16_m16); u[4] = _mm_madd_epi16(v[2], k__cospi_p24_p08); u[5] = _mm_madd_epi16(v[3], k__cospi_p24_p08); u[6] = _mm_madd_epi16(v[2], k__cospi_m08_p24); u[7] = _mm_madd_epi16(v[3], k__cospi_m08_p24); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); in[0] = _mm_packs_epi32(u[0], u[1]); in[4] = _mm_packs_epi32(u[4], u[5]); in[8] = _mm_packs_epi32(u[2], u[3]); in[12] = _mm_packs_epi32(u[6], u[7]); u[0] = _mm_unpacklo_epi16(p[5], p[6]); u[1] = _mm_unpackhi_epi16(p[5], p[6]); v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); v[2] = _mm_madd_epi16(u[0], k__cospi_p16_p16); v[3] = _mm_madd_epi16(u[1], k__cospi_p16_p16); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); u[0] = _mm_packs_epi32(v[0], v[1]); u[1] = _mm_packs_epi32(v[2], v[3]); t[0] = _mm_add_epi16(p[4], u[0]); t[1] = _mm_sub_epi16(p[4], u[0]); t[2] = _mm_sub_epi16(p[7], u[1]); t[3] = _mm_add_epi16(p[7], u[1]); u[0] = _mm_unpacklo_epi16(t[0], t[3]); u[1] = _mm_unpackhi_epi16(t[0], t[3]); u[2] = _mm_unpacklo_epi16(t[1], t[2]); u[3] = _mm_unpackhi_epi16(t[1], t[2]); v[0] = _mm_madd_epi16(u[0], k__cospi_p28_p04); v[1] = _mm_madd_epi16(u[1], k__cospi_p28_p04); v[2] = _mm_madd_epi16(u[2], k__cospi_p12_p20); v[3] = _mm_madd_epi16(u[3], k__cospi_p12_p20); v[4] = _mm_madd_epi16(u[2], k__cospi_m20_p12); v[5] = _mm_madd_epi16(u[3], k__cospi_m20_p12); v[6] = _mm_madd_epi16(u[0], k__cospi_m04_p28); v[7] = _mm_madd_epi16(u[1], k__cospi_m04_p28); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); in[2] = _mm_packs_epi32(v[0], v[1]); in[6] = _mm_packs_epi32(v[4], v[5]); in[10] = _mm_packs_epi32(v[2], v[3]); in[14] = _mm_packs_epi32(v[6], v[7]); // stage 2 u[0] = _mm_unpacklo_epi16(s[2], s[5]); u[1] = _mm_unpackhi_epi16(s[2], s[5]); u[2] = _mm_unpacklo_epi16(s[3], s[4]); u[3] = _mm_unpackhi_epi16(s[3], s[4]); v[0] = _mm_madd_epi16(u[0], k__cospi_m16_p16); v[1] = _mm_madd_epi16(u[1], k__cospi_m16_p16); v[2] = _mm_madd_epi16(u[2], k__cospi_m16_p16); v[3] = _mm_madd_epi16(u[3], k__cospi_m16_p16); v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16); v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16); v[6] = _mm_madd_epi16(u[0], k__cospi_p16_p16); v[7] = _mm_madd_epi16(u[1], k__cospi_p16_p16); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); t[2] = _mm_packs_epi32(v[0], v[1]); t[3] = _mm_packs_epi32(v[2], v[3]); t[4] = _mm_packs_epi32(v[4], v[5]); t[5] = _mm_packs_epi32(v[6], v[7]); // stage 3 p[0] = _mm_add_epi16(s[0], t[3]); p[1] = _mm_add_epi16(s[1], t[2]); p[2] = _mm_sub_epi16(s[1], t[2]); p[3] = _mm_sub_epi16(s[0], t[3]); p[4] = _mm_sub_epi16(s[7], t[4]); p[5] = _mm_sub_epi16(s[6], t[5]); p[6] = _mm_add_epi16(s[6], t[5]); p[7] = _mm_add_epi16(s[7], t[4]); // stage 4 u[0] = _mm_unpacklo_epi16(p[1], p[6]); u[1] = _mm_unpackhi_epi16(p[1], p[6]); u[2] = _mm_unpacklo_epi16(p[2], p[5]); u[3] = _mm_unpackhi_epi16(p[2], p[5]); v[0] = _mm_madd_epi16(u[0], k__cospi_m08_p24); v[1] = _mm_madd_epi16(u[1], k__cospi_m08_p24); v[2] = _mm_madd_epi16(u[2], k__cospi_p24_p08); v[3] = _mm_madd_epi16(u[3], k__cospi_p24_p08); v[4] = _mm_madd_epi16(u[2], k__cospi_p08_m24); v[5] = _mm_madd_epi16(u[3], k__cospi_p08_m24); v[6] = _mm_madd_epi16(u[0], k__cospi_p24_p08); v[7] = _mm_madd_epi16(u[1], k__cospi_p24_p08); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); t[1] = _mm_packs_epi32(v[0], v[1]); t[2] = _mm_packs_epi32(v[2], v[3]); t[5] = _mm_packs_epi32(v[4], v[5]); t[6] = _mm_packs_epi32(v[6], v[7]); // stage 5 s[0] = _mm_add_epi16(p[0], t[1]); s[1] = _mm_sub_epi16(p[0], t[1]); s[2] = _mm_add_epi16(p[3], t[2]); s[3] = _mm_sub_epi16(p[3], t[2]); s[4] = _mm_sub_epi16(p[4], t[5]); s[5] = _mm_add_epi16(p[4], t[5]); s[6] = _mm_sub_epi16(p[7], t[6]); s[7] = _mm_add_epi16(p[7], t[6]); // stage 6 u[0] = _mm_unpacklo_epi16(s[0], s[7]); u[1] = _mm_unpackhi_epi16(s[0], s[7]); u[2] = _mm_unpacklo_epi16(s[1], s[6]); u[3] = _mm_unpackhi_epi16(s[1], s[6]); u[4] = _mm_unpacklo_epi16(s[2], s[5]); u[5] = _mm_unpackhi_epi16(s[2], s[5]); u[6] = _mm_unpacklo_epi16(s[3], s[4]); u[7] = _mm_unpackhi_epi16(s[3], s[4]); v[0] = _mm_madd_epi16(u[0], k__cospi_p30_p02); v[1] = _mm_madd_epi16(u[1], k__cospi_p30_p02); v[2] = _mm_madd_epi16(u[2], k__cospi_p14_p18); v[3] = _mm_madd_epi16(u[3], k__cospi_p14_p18); v[4] = _mm_madd_epi16(u[4], k__cospi_p22_p10); v[5] = _mm_madd_epi16(u[5], k__cospi_p22_p10); v[6] = _mm_madd_epi16(u[6], k__cospi_p06_p26); v[7] = _mm_madd_epi16(u[7], k__cospi_p06_p26); v[8] = _mm_madd_epi16(u[6], k__cospi_m26_p06); v[9] = _mm_madd_epi16(u[7], k__cospi_m26_p06); v[10] = _mm_madd_epi16(u[4], k__cospi_m10_p22); v[11] = _mm_madd_epi16(u[5], k__cospi_m10_p22); v[12] = _mm_madd_epi16(u[2], k__cospi_m18_p14); v[13] = _mm_madd_epi16(u[3], k__cospi_m18_p14); v[14] = _mm_madd_epi16(u[0], k__cospi_m02_p30); v[15] = _mm_madd_epi16(u[1], k__cospi_m02_p30); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); in[1] = _mm_packs_epi32(v[0], v[1]); in[9] = _mm_packs_epi32(v[2], v[3]); in[5] = _mm_packs_epi32(v[4], v[5]); in[13] = _mm_packs_epi32(v[6], v[7]); in[3] = _mm_packs_epi32(v[8], v[9]); in[11] = _mm_packs_epi32(v[10], v[11]); in[7] = _mm_packs_epi32(v[12], v[13]); in[15] = _mm_packs_epi32(v[14], v[15]); } static void fadst16_8col(__m128i *in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64); const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64); const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64); const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64); const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64); const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64); const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64); const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i kZero = _mm_set1_epi16(0); u[0] = _mm_unpacklo_epi16(in[15], in[0]); u[1] = _mm_unpackhi_epi16(in[15], in[0]); u[2] = _mm_unpacklo_epi16(in[13], in[2]); u[3] = _mm_unpackhi_epi16(in[13], in[2]); u[4] = _mm_unpacklo_epi16(in[11], in[4]); u[5] = _mm_unpackhi_epi16(in[11], in[4]); u[6] = _mm_unpacklo_epi16(in[9], in[6]); u[7] = _mm_unpackhi_epi16(in[9], in[6]); u[8] = _mm_unpacklo_epi16(in[7], in[8]); u[9] = _mm_unpackhi_epi16(in[7], in[8]); u[10] = _mm_unpacklo_epi16(in[5], in[10]); u[11] = _mm_unpackhi_epi16(in[5], in[10]); u[12] = _mm_unpacklo_epi16(in[3], in[12]); u[13] = _mm_unpackhi_epi16(in[3], in[12]); u[14] = _mm_unpacklo_epi16(in[1], in[14]); u[15] = _mm_unpackhi_epi16(in[1], in[14]); v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31); v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31); v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01); v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01); v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27); v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27); v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05); v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05); v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23); v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23); v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09); v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09); v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19); v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19); v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13); v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13); v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15); v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15); v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17); v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17); v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11); v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11); v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21); v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21); v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07); v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07); v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25); v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25); v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03); v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03); v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29); v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29); u[0] = _mm_add_epi32(v[0], v[16]); u[1] = _mm_add_epi32(v[1], v[17]); u[2] = _mm_add_epi32(v[2], v[18]); u[3] = _mm_add_epi32(v[3], v[19]); u[4] = _mm_add_epi32(v[4], v[20]); u[5] = _mm_add_epi32(v[5], v[21]); u[6] = _mm_add_epi32(v[6], v[22]); u[7] = _mm_add_epi32(v[7], v[23]); u[8] = _mm_add_epi32(v[8], v[24]); u[9] = _mm_add_epi32(v[9], v[25]); u[10] = _mm_add_epi32(v[10], v[26]); u[11] = _mm_add_epi32(v[11], v[27]); u[12] = _mm_add_epi32(v[12], v[28]); u[13] = _mm_add_epi32(v[13], v[29]); u[14] = _mm_add_epi32(v[14], v[30]); u[15] = _mm_add_epi32(v[15], v[31]); u[16] = _mm_sub_epi32(v[0], v[16]); u[17] = _mm_sub_epi32(v[1], v[17]); u[18] = _mm_sub_epi32(v[2], v[18]); u[19] = _mm_sub_epi32(v[3], v[19]); u[20] = _mm_sub_epi32(v[4], v[20]); u[21] = _mm_sub_epi32(v[5], v[21]); u[22] = _mm_sub_epi32(v[6], v[22]); u[23] = _mm_sub_epi32(v[7], v[23]); u[24] = _mm_sub_epi32(v[8], v[24]); u[25] = _mm_sub_epi32(v[9], v[25]); u[26] = _mm_sub_epi32(v[10], v[26]); u[27] = _mm_sub_epi32(v[11], v[27]); u[28] = _mm_sub_epi32(v[12], v[28]); u[29] = _mm_sub_epi32(v[13], v[29]); u[30] = _mm_sub_epi32(v[14], v[30]); u[31] = _mm_sub_epi32(v[15], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); v[16] = _mm_add_epi32(u[16], k__DCT_CONST_ROUNDING); v[17] = _mm_add_epi32(u[17], k__DCT_CONST_ROUNDING); v[18] = _mm_add_epi32(u[18], k__DCT_CONST_ROUNDING); v[19] = _mm_add_epi32(u[19], k__DCT_CONST_ROUNDING); v[20] = _mm_add_epi32(u[20], k__DCT_CONST_ROUNDING); v[21] = _mm_add_epi32(u[21], k__DCT_CONST_ROUNDING); v[22] = _mm_add_epi32(u[22], k__DCT_CONST_ROUNDING); v[23] = _mm_add_epi32(u[23], k__DCT_CONST_ROUNDING); v[24] = _mm_add_epi32(u[24], k__DCT_CONST_ROUNDING); v[25] = _mm_add_epi32(u[25], k__DCT_CONST_ROUNDING); v[26] = _mm_add_epi32(u[26], k__DCT_CONST_ROUNDING); v[27] = _mm_add_epi32(u[27], k__DCT_CONST_ROUNDING); v[28] = _mm_add_epi32(u[28], k__DCT_CONST_ROUNDING); v[29] = _mm_add_epi32(u[29], k__DCT_CONST_ROUNDING); v[30] = _mm_add_epi32(u[30], k__DCT_CONST_ROUNDING); v[31] = _mm_add_epi32(u[31], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); u[16] = _mm_srai_epi32(v[16], DCT_CONST_BITS); u[17] = _mm_srai_epi32(v[17], DCT_CONST_BITS); u[18] = _mm_srai_epi32(v[18], DCT_CONST_BITS); u[19] = _mm_srai_epi32(v[19], DCT_CONST_BITS); u[20] = _mm_srai_epi32(v[20], DCT_CONST_BITS); u[21] = _mm_srai_epi32(v[21], DCT_CONST_BITS); u[22] = _mm_srai_epi32(v[22], DCT_CONST_BITS); u[23] = _mm_srai_epi32(v[23], DCT_CONST_BITS); u[24] = _mm_srai_epi32(v[24], DCT_CONST_BITS); u[25] = _mm_srai_epi32(v[25], DCT_CONST_BITS); u[26] = _mm_srai_epi32(v[26], DCT_CONST_BITS); u[27] = _mm_srai_epi32(v[27], DCT_CONST_BITS); u[28] = _mm_srai_epi32(v[28], DCT_CONST_BITS); u[29] = _mm_srai_epi32(v[29], DCT_CONST_BITS); u[30] = _mm_srai_epi32(v[30], DCT_CONST_BITS); u[31] = _mm_srai_epi32(v[31], DCT_CONST_BITS); s[0] = _mm_packs_epi32(u[0], u[1]); s[1] = _mm_packs_epi32(u[2], u[3]); s[2] = _mm_packs_epi32(u[4], u[5]); s[3] = _mm_packs_epi32(u[6], u[7]); s[4] = _mm_packs_epi32(u[8], u[9]); s[5] = _mm_packs_epi32(u[10], u[11]); s[6] = _mm_packs_epi32(u[12], u[13]); s[7] = _mm_packs_epi32(u[14], u[15]); s[8] = _mm_packs_epi32(u[16], u[17]); s[9] = _mm_packs_epi32(u[18], u[19]); s[10] = _mm_packs_epi32(u[20], u[21]); s[11] = _mm_packs_epi32(u[22], u[23]); s[12] = _mm_packs_epi32(u[24], u[25]); s[13] = _mm_packs_epi32(u[26], u[27]); s[14] = _mm_packs_epi32(u[28], u[29]); s[15] = _mm_packs_epi32(u[30], u[31]); // stage 2 u[0] = _mm_unpacklo_epi16(s[8], s[9]); u[1] = _mm_unpackhi_epi16(s[8], s[9]); u[2] = _mm_unpacklo_epi16(s[10], s[11]); u[3] = _mm_unpackhi_epi16(s[10], s[11]); u[4] = _mm_unpacklo_epi16(s[12], s[13]); u[5] = _mm_unpackhi_epi16(s[12], s[13]); u[6] = _mm_unpacklo_epi16(s[14], s[15]); u[7] = _mm_unpackhi_epi16(s[14], s[15]); v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28); v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28); v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04); v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04); v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12); v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12); v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20); v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20); v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04); v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04); v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28); v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28); v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20); v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20); v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12); v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12); u[0] = _mm_add_epi32(v[0], v[8]); u[1] = _mm_add_epi32(v[1], v[9]); u[2] = _mm_add_epi32(v[2], v[10]); u[3] = _mm_add_epi32(v[3], v[11]); u[4] = _mm_add_epi32(v[4], v[12]); u[5] = _mm_add_epi32(v[5], v[13]); u[6] = _mm_add_epi32(v[6], v[14]); u[7] = _mm_add_epi32(v[7], v[15]); u[8] = _mm_sub_epi32(v[0], v[8]); u[9] = _mm_sub_epi32(v[1], v[9]); u[10] = _mm_sub_epi32(v[2], v[10]); u[11] = _mm_sub_epi32(v[3], v[11]); u[12] = _mm_sub_epi32(v[4], v[12]); u[13] = _mm_sub_epi32(v[5], v[13]); u[14] = _mm_sub_epi32(v[6], v[14]); u[15] = _mm_sub_epi32(v[7], v[15]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); x[0] = _mm_add_epi16(s[0], s[4]); x[1] = _mm_add_epi16(s[1], s[5]); x[2] = _mm_add_epi16(s[2], s[6]); x[3] = _mm_add_epi16(s[3], s[7]); x[4] = _mm_sub_epi16(s[0], s[4]); x[5] = _mm_sub_epi16(s[1], s[5]); x[6] = _mm_sub_epi16(s[2], s[6]); x[7] = _mm_sub_epi16(s[3], s[7]); x[8] = _mm_packs_epi32(u[0], u[1]); x[9] = _mm_packs_epi32(u[2], u[3]); x[10] = _mm_packs_epi32(u[4], u[5]); x[11] = _mm_packs_epi32(u[6], u[7]); x[12] = _mm_packs_epi32(u[8], u[9]); x[13] = _mm_packs_epi32(u[10], u[11]); x[14] = _mm_packs_epi32(u[12], u[13]); x[15] = _mm_packs_epi32(u[14], u[15]); // stage 3 u[0] = _mm_unpacklo_epi16(x[4], x[5]); u[1] = _mm_unpackhi_epi16(x[4], x[5]); u[2] = _mm_unpacklo_epi16(x[6], x[7]); u[3] = _mm_unpackhi_epi16(x[6], x[7]); u[4] = _mm_unpacklo_epi16(x[12], x[13]); u[5] = _mm_unpackhi_epi16(x[12], x[13]); u[6] = _mm_unpacklo_epi16(x[14], x[15]); u[7] = _mm_unpackhi_epi16(x[14], x[15]); v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24); v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24); v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08); v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08); v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08); v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08); v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24); v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24); u[0] = _mm_add_epi32(v[0], v[4]); u[1] = _mm_add_epi32(v[1], v[5]); u[2] = _mm_add_epi32(v[2], v[6]); u[3] = _mm_add_epi32(v[3], v[7]); u[4] = _mm_sub_epi32(v[0], v[4]); u[5] = _mm_sub_epi32(v[1], v[5]); u[6] = _mm_sub_epi32(v[2], v[6]); u[7] = _mm_sub_epi32(v[3], v[7]); u[8] = _mm_add_epi32(v[8], v[12]); u[9] = _mm_add_epi32(v[9], v[13]); u[10] = _mm_add_epi32(v[10], v[14]); u[11] = _mm_add_epi32(v[11], v[15]); u[12] = _mm_sub_epi32(v[8], v[12]); u[13] = _mm_sub_epi32(v[9], v[13]); u[14] = _mm_sub_epi32(v[10], v[14]); u[15] = _mm_sub_epi32(v[11], v[15]); u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); u[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); u[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); u[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); u[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); u[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); u[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); u[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); s[0] = _mm_add_epi16(x[0], x[2]); s[1] = _mm_add_epi16(x[1], x[3]); s[2] = _mm_sub_epi16(x[0], x[2]); s[3] = _mm_sub_epi16(x[1], x[3]); s[4] = _mm_packs_epi32(v[0], v[1]); s[5] = _mm_packs_epi32(v[2], v[3]); s[6] = _mm_packs_epi32(v[4], v[5]); s[7] = _mm_packs_epi32(v[6], v[7]); s[8] = _mm_add_epi16(x[8], x[10]); s[9] = _mm_add_epi16(x[9], x[11]); s[10] = _mm_sub_epi16(x[8], x[10]); s[11] = _mm_sub_epi16(x[9], x[11]); s[12] = _mm_packs_epi32(v[8], v[9]); s[13] = _mm_packs_epi32(v[10], v[11]); s[14] = _mm_packs_epi32(v[12], v[13]); s[15] = _mm_packs_epi32(v[14], v[15]); // stage 4 u[0] = _mm_unpacklo_epi16(s[2], s[3]); u[1] = _mm_unpackhi_epi16(s[2], s[3]); u[2] = _mm_unpacklo_epi16(s[6], s[7]); u[3] = _mm_unpackhi_epi16(s[6], s[7]); u[4] = _mm_unpacklo_epi16(s[10], s[11]); u[5] = _mm_unpackhi_epi16(s[10], s[11]); u[6] = _mm_unpacklo_epi16(s[14], s[15]); u[7] = _mm_unpackhi_epi16(s[14], s[15]); v[0] = _mm_madd_epi16(u[0], k__cospi_m16_m16); v[1] = _mm_madd_epi16(u[1], k__cospi_m16_m16); v[2] = _mm_madd_epi16(u[0], k__cospi_p16_m16); v[3] = _mm_madd_epi16(u[1], k__cospi_p16_m16); v[4] = _mm_madd_epi16(u[2], k__cospi_p16_p16); v[5] = _mm_madd_epi16(u[3], k__cospi_p16_p16); v[6] = _mm_madd_epi16(u[2], k__cospi_m16_p16); v[7] = _mm_madd_epi16(u[3], k__cospi_m16_p16); v[8] = _mm_madd_epi16(u[4], k__cospi_p16_p16); v[9] = _mm_madd_epi16(u[5], k__cospi_p16_p16); v[10] = _mm_madd_epi16(u[4], k__cospi_m16_p16); v[11] = _mm_madd_epi16(u[5], k__cospi_m16_p16); v[12] = _mm_madd_epi16(u[6], k__cospi_m16_m16); v[13] = _mm_madd_epi16(u[7], k__cospi_m16_m16); v[14] = _mm_madd_epi16(u[6], k__cospi_p16_m16); v[15] = _mm_madd_epi16(u[7], k__cospi_p16_m16); u[0] = _mm_add_epi32(v[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(v[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(v[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(v[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(v[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(v[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(v[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(v[7], k__DCT_CONST_ROUNDING); u[8] = _mm_add_epi32(v[8], k__DCT_CONST_ROUNDING); u[9] = _mm_add_epi32(v[9], k__DCT_CONST_ROUNDING); u[10] = _mm_add_epi32(v[10], k__DCT_CONST_ROUNDING); u[11] = _mm_add_epi32(v[11], k__DCT_CONST_ROUNDING); u[12] = _mm_add_epi32(v[12], k__DCT_CONST_ROUNDING); u[13] = _mm_add_epi32(v[13], k__DCT_CONST_ROUNDING); u[14] = _mm_add_epi32(v[14], k__DCT_CONST_ROUNDING); u[15] = _mm_add_epi32(v[15], k__DCT_CONST_ROUNDING); v[0] = _mm_srai_epi32(u[0], DCT_CONST_BITS); v[1] = _mm_srai_epi32(u[1], DCT_CONST_BITS); v[2] = _mm_srai_epi32(u[2], DCT_CONST_BITS); v[3] = _mm_srai_epi32(u[3], DCT_CONST_BITS); v[4] = _mm_srai_epi32(u[4], DCT_CONST_BITS); v[5] = _mm_srai_epi32(u[5], DCT_CONST_BITS); v[6] = _mm_srai_epi32(u[6], DCT_CONST_BITS); v[7] = _mm_srai_epi32(u[7], DCT_CONST_BITS); v[8] = _mm_srai_epi32(u[8], DCT_CONST_BITS); v[9] = _mm_srai_epi32(u[9], DCT_CONST_BITS); v[10] = _mm_srai_epi32(u[10], DCT_CONST_BITS); v[11] = _mm_srai_epi32(u[11], DCT_CONST_BITS); v[12] = _mm_srai_epi32(u[12], DCT_CONST_BITS); v[13] = _mm_srai_epi32(u[13], DCT_CONST_BITS); v[14] = _mm_srai_epi32(u[14], DCT_CONST_BITS); v[15] = _mm_srai_epi32(u[15], DCT_CONST_BITS); in[0] = s[0]; in[1] = _mm_sub_epi16(kZero, s[8]); in[2] = s[12]; in[3] = _mm_sub_epi16(kZero, s[4]); in[4] = _mm_packs_epi32(v[4], v[5]); in[5] = _mm_packs_epi32(v[12], v[13]); in[6] = _mm_packs_epi32(v[8], v[9]); in[7] = _mm_packs_epi32(v[0], v[1]); in[8] = _mm_packs_epi32(v[2], v[3]); in[9] = _mm_packs_epi32(v[10], v[11]); in[10] = _mm_packs_epi32(v[14], v[15]); in[11] = _mm_packs_epi32(v[6], v[7]); in[12] = s[5]; in[13] = _mm_sub_epi16(kZero, s[13]); in[14] = s[9]; in[15] = _mm_sub_epi16(kZero, s[1]); } static void fdct16_sse2(__m128i *in0, __m128i *in1) { fdct16_8col(in0); fdct16_8col(in1); transpose_16bit_16x16(in0, in1); } static void fadst16_sse2(__m128i *in0, __m128i *in1) { fadst16_8col(in0); fadst16_8col(in1); transpose_16bit_16x16(in0, in1); } void vp9_fht16x16_sse2(const int16_t *input, tran_low_t *output, int stride, int tx_type) { __m128i in0[16], in1[16]; switch (tx_type) { case DCT_DCT: vpx_fdct16x16_sse2(input, output, stride); break; case ADST_DCT: load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fdct16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; case DCT_ADST: load_buffer_16x16(input, in0, in1, stride); fdct16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; default: assert(tx_type == ADST_ADST); load_buffer_16x16(input, in0, in1, stride); fadst16_sse2(in0, in1); right_shift_16x16(in0, in1); fadst16_sse2(in0, in1); write_buffer_16x16(output, in0, in1, 16); break; } } libvpx-1.8.2/vp9/encoder/x86/vp9_dct_sse2.asm000066400000000000000000000041561357355204000206300ustar00rootroot00000000000000; ; Copyright (c) 2016 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %define private_prefix vp9 %include "third_party/x86inc/x86inc.asm" %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION .text %macro TRANSFORM_COLS 0 paddw m0, m1 movq m4, m0 psubw m3, m2 psubw m4, m3 psraw m4, 1 movq m5, m4 psubw m5, m1 ;b1 psubw m4, m2 ;c1 psubw m0, m4 paddw m3, m5 ; m0 a0 SWAP 1, 4 ; m1 c1 SWAP 2, 3 ; m2 d1 SWAP 3, 5 ; m3 b1 %endmacro %macro TRANSPOSE_4X4 0 ; 00 01 02 03 ; 10 11 12 13 ; 20 21 22 23 ; 30 31 32 33 punpcklwd m0, m1 ; 00 10 01 11 02 12 03 13 punpcklwd m2, m3 ; 20 30 21 31 22 32 23 33 mova m1, m0 punpckldq m0, m2 ; 00 10 20 30 01 11 21 31 punpckhdq m1, m2 ; 02 12 22 32 03 13 23 33 %endmacro INIT_XMM sse2 cglobal fwht4x4, 3, 4, 8, input, output, stride lea r3q, [inputq + strideq*4] movq m0, [inputq] ;a1 movq m1, [inputq + strideq*2] ;b1 movq m2, [r3q] ;c1 movq m3, [r3q + strideq*2] ;d1 TRANSFORM_COLS TRANSPOSE_4X4 SWAP 1, 2 psrldq m1, m0, 8 psrldq m3, m2, 8 TRANSFORM_COLS TRANSPOSE_4X4 psllw m0, 2 psllw m1, 2 STORE_TRAN_LOW 0, outputq, 0, 2, 3 STORE_TRAN_LOW 1, outputq, 8, 2, 3 RET libvpx-1.8.2/vp9/encoder/x86/vp9_denoiser_sse2.c000066400000000000000000000331361357355204000213300ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" #include "vp9/common/vp9_reconinter.h" #include "vp9/encoder/vp9_context_tree.h" #include "vp9/encoder/vp9_denoiser.h" #include "vpx_mem/vpx_mem.h" // Compute the sum of all pixel differences of this MB. static INLINE int sum_diff_16x1(__m128i acc_diff) { const __m128i k_1 = _mm_set1_epi16(1); const __m128i acc_diff_lo = _mm_srai_epi16(_mm_unpacklo_epi8(acc_diff, acc_diff), 8); const __m128i acc_diff_hi = _mm_srai_epi16(_mm_unpackhi_epi8(acc_diff, acc_diff), 8); const __m128i acc_diff_16 = _mm_add_epi16(acc_diff_lo, acc_diff_hi); const __m128i hg_fe_dc_ba = _mm_madd_epi16(acc_diff_16, k_1); const __m128i hgfe_dcba = _mm_add_epi32(hg_fe_dc_ba, _mm_srli_si128(hg_fe_dc_ba, 8)); const __m128i hgfedcba = _mm_add_epi32(hgfe_dcba, _mm_srli_si128(hgfe_dcba, 4)); return _mm_cvtsi128_si32(hgfedcba); } // Denoise a 16x1 vector. static INLINE __m128i vp9_denoiser_16x1_sse2( const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, const __m128i *k_0, const __m128i *k_4, const __m128i *k_8, const __m128i *k_16, const __m128i *l3, const __m128i *l32, const __m128i *l21, __m128i acc_diff) { // Calculate differences const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); __m128i v_running_avg_y; const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, *k_0); // Clamp absolute difference to 16 to be used to get mask. Doing this // allows us to use _mm_cmpgt_epi8, which operates on signed byte. const __m128i clamped_absdiff = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), *k_16); // Get masks for l2 l1 and l0 adjustments. const __m128i mask2 = _mm_cmpgt_epi8(*k_16, clamped_absdiff); const __m128i mask1 = _mm_cmpgt_epi8(*k_8, clamped_absdiff); const __m128i mask0 = _mm_cmpgt_epi8(*k_4, clamped_absdiff); // Get adjustments for l2, l1, and l0. __m128i adj2 = _mm_and_si128(mask2, *l32); const __m128i adj1 = _mm_and_si128(mask1, *l21); const __m128i adj0 = _mm_and_si128(mask0, clamped_absdiff); __m128i adj, padj, nadj; // Combine the adjustments and get absolute adjustments. adj2 = _mm_add_epi8(adj2, adj1); adj = _mm_sub_epi8(*l3, adj2); adj = _mm_andnot_si128(mask0, adj); adj = _mm_or_si128(adj, adj0); // Restore the sign and get positive and negative adjustments. padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); // Calculate filtered value. v_running_avg_y = _mm_adds_epu8(v_sig, padj); v_running_avg_y = _mm_subs_epu8(v_running_avg_y, nadj); _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); // Adjustments <=7, and each element in acc_diff can fit in signed // char. acc_diff = _mm_adds_epi8(acc_diff, padj); acc_diff = _mm_subs_epi8(acc_diff, nadj); return acc_diff; } // Denoise a 16x1 vector with a weaker filter. static INLINE __m128i vp9_denoiser_adj_16x1_sse2( const uint8_t *sig, const uint8_t *mc_running_avg_y, uint8_t *running_avg_y, const __m128i k_0, const __m128i k_delta, __m128i acc_diff) { __m128i v_running_avg_y = _mm_loadu_si128((__m128i *)(&running_avg_y[0])); // Calculate differences. const __m128i v_sig = _mm_loadu_si128((const __m128i *)(&sig[0])); const __m128i v_mc_running_avg_y = _mm_loadu_si128((const __m128i *)(&mc_running_avg_y[0])); const __m128i pdiff = _mm_subs_epu8(v_mc_running_avg_y, v_sig); const __m128i ndiff = _mm_subs_epu8(v_sig, v_mc_running_avg_y); // Obtain the sign. FF if diff is negative. const __m128i diff_sign = _mm_cmpeq_epi8(pdiff, k_0); // Clamp absolute difference to delta to get the adjustment. const __m128i adj = _mm_min_epu8(_mm_or_si128(pdiff, ndiff), k_delta); // Restore the sign and get positive and negative adjustments. __m128i padj, nadj; padj = _mm_andnot_si128(diff_sign, adj); nadj = _mm_and_si128(diff_sign, adj); // Calculate filtered value. v_running_avg_y = _mm_subs_epu8(v_running_avg_y, padj); v_running_avg_y = _mm_adds_epu8(v_running_avg_y, nadj); _mm_storeu_si128((__m128i *)running_avg_y, v_running_avg_y); // Accumulate the adjustments. acc_diff = _mm_subs_epi8(acc_diff, padj); acc_diff = _mm_adds_epi8(acc_diff, nadj); return acc_diff; } // Denoise 8x8 and 8x16 blocks. static int vp9_denoiser_NxM_sse2_small(const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude, int width) { int sum_diff_thresh, r, sum_diff = 0; const int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; uint8_t sig_buffer[8][16], mc_running_buffer[8][16], running_buffer[8][16]; __m128i acc_diff = _mm_setzero_si128(); const __m128i k_0 = _mm_setzero_si128(); const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); const __m128i k_8 = _mm_set1_epi8(8); const __m128i k_16 = _mm_set1_epi8(16); // Modify each level's adjustment according to motion_magnitude. const __m128i l3 = _mm_set1_epi8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); // Difference between level 3 and level 2 is 2. const __m128i l32 = _mm_set1_epi8(2); // Difference between level 2 and level 1 is 1. const __m128i l21 = _mm_set1_epi8(1); const int b_height = (4 << b_height_log2_lookup[bs]) >> 1; for (r = 0; r < b_height; ++r) { memcpy(sig_buffer[r], sig, width); memcpy(sig_buffer[r] + width, sig + sig_stride, width); memcpy(mc_running_buffer[r], mc_running_avg_y, width); memcpy(mc_running_buffer[r] + width, mc_running_avg_y + mc_avg_y_stride, width); memcpy(running_buffer[r], running_avg_y, width); memcpy(running_buffer[r] + width, running_avg_y + avg_y_stride, width); acc_diff = vp9_denoiser_16x1_sse2(sig_buffer[r], mc_running_buffer[r], running_buffer[r], &k_0, &k_4, &k_8, &k_16, &l3, &l32, &l21, acc_diff); memcpy(running_avg_y, running_buffer[r], width); memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); // Update pointers for next iteration. sig += (sig_stride << 1); mc_running_avg_y += (mc_avg_y_stride << 1); running_avg_y += (avg_y_stride << 1); } { sum_diff = sum_diff_16x1(acc_diff); sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); if (abs(sum_diff) > sum_diff_thresh) { // Before returning to copy the block (i.e., apply no denoising), // check if we can still apply some (weaker) temporal filtering to // this block, that would otherwise not be denoised at all. Simplest // is to apply an additional adjustment to running_avg_y to bring it // closer to sig. The adjustment is capped by a maximum delta, and // chosen such that in most cases the resulting sum_diff will be // within the acceptable range given by sum_diff_thresh. // The delta is set by the excess of absolute pixel diff over the // threshold. const int delta = ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); running_avg_y -= avg_y_stride * (b_height << 1); for (r = 0; r < b_height; ++r) { acc_diff = vp9_denoiser_adj_16x1_sse2( sig_buffer[r], mc_running_buffer[r], running_buffer[r], k_0, k_delta, acc_diff); memcpy(running_avg_y, running_buffer[r], width); memcpy(running_avg_y + avg_y_stride, running_buffer[r] + width, width); // Update pointers for next iteration. running_avg_y += (avg_y_stride << 1); } sum_diff = sum_diff_16x1(acc_diff); if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } return FILTER_BLOCK; } // Denoise 16x16, 16x32, 32x16, 32x32, 32x64, 64x32 and 64x64 blocks. static int vp9_denoiser_NxM_sse2_big(const uint8_t *sig, int sig_stride, const uint8_t *mc_running_avg_y, int mc_avg_y_stride, uint8_t *running_avg_y, int avg_y_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { int sum_diff_thresh, r, c, sum_diff = 0; const int shift_inc = (increase_denoising && motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 1 : 0; __m128i acc_diff[4][4]; const __m128i k_0 = _mm_setzero_si128(); const __m128i k_4 = _mm_set1_epi8(4 + shift_inc); const __m128i k_8 = _mm_set1_epi8(8); const __m128i k_16 = _mm_set1_epi8(16); // Modify each level's adjustment according to motion_magnitude. const __m128i l3 = _mm_set1_epi8( (motion_magnitude <= MOTION_MAGNITUDE_THRESHOLD) ? 7 + shift_inc : 6); // Difference between level 3 and level 2 is 2. const __m128i l32 = _mm_set1_epi8(2); // Difference between level 2 and level 1 is 1. const __m128i l21 = _mm_set1_epi8(1); const int b_width = (4 << b_width_log2_lookup[bs]); const int b_height = (4 << b_height_log2_lookup[bs]); const int b_width_shift4 = b_width >> 4; for (r = 0; r < 4; ++r) { for (c = 0; c < b_width_shift4; ++c) { acc_diff[c][r] = _mm_setzero_si128(); } } for (r = 0; r < b_height; ++r) { for (c = 0; c < b_width_shift4; ++c) { acc_diff[c][r >> 4] = vp9_denoiser_16x1_sse2( sig, mc_running_avg_y, running_avg_y, &k_0, &k_4, &k_8, &k_16, &l3, &l32, &l21, acc_diff[c][r >> 4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { for (c = 0; c < b_width_shift4; ++c) { sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]); } } // Update pointers for next iteration. sig = sig - b_width + sig_stride; mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; running_avg_y = running_avg_y - b_width + avg_y_stride; } { sum_diff_thresh = total_adj_strong_thresh(bs, increase_denoising); if (abs(sum_diff) > sum_diff_thresh) { const int delta = ((abs(sum_diff) - sum_diff_thresh) >> num_pels_log2_lookup[bs]) + 1; // Only apply the adjustment for max delta up to 3. if (delta < 4) { const __m128i k_delta = _mm_set1_epi8(delta); sig -= sig_stride * b_height; mc_running_avg_y -= mc_avg_y_stride * b_height; running_avg_y -= avg_y_stride * b_height; sum_diff = 0; for (r = 0; r < b_height; ++r) { for (c = 0; c < b_width_shift4; ++c) { acc_diff[c][r >> 4] = vp9_denoiser_adj_16x1_sse2(sig, mc_running_avg_y, running_avg_y, k_0, k_delta, acc_diff[c][r >> 4]); // Update pointers for next iteration. sig += 16; mc_running_avg_y += 16; running_avg_y += 16; } if ((r & 0xf) == 0xf || (bs == BLOCK_16X8 && r == 7)) { for (c = 0; c < b_width_shift4; ++c) { sum_diff += sum_diff_16x1(acc_diff[c][r >> 4]); } } sig = sig - b_width + sig_stride; mc_running_avg_y = mc_running_avg_y - b_width + mc_avg_y_stride; running_avg_y = running_avg_y - b_width + avg_y_stride; } if (abs(sum_diff) > sum_diff_thresh) { return COPY_BLOCK; } } else { return COPY_BLOCK; } } } return FILTER_BLOCK; } int vp9_denoiser_filter_sse2(const uint8_t *sig, int sig_stride, const uint8_t *mc_avg, int mc_avg_stride, uint8_t *avg, int avg_stride, int increase_denoising, BLOCK_SIZE bs, int motion_magnitude) { // Rank by frequency of the block type to have an early termination. if (bs == BLOCK_16X16 || bs == BLOCK_32X32 || bs == BLOCK_64X64 || bs == BLOCK_16X32 || bs == BLOCK_16X8 || bs == BLOCK_32X16 || bs == BLOCK_32X64 || bs == BLOCK_64X32) { return vp9_denoiser_NxM_sse2_big(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude); } else if (bs == BLOCK_8X8 || bs == BLOCK_8X16) { return vp9_denoiser_NxM_sse2_small(sig, sig_stride, mc_avg, mc_avg_stride, avg, avg_stride, increase_denoising, bs, motion_magnitude, 8); } else { return COPY_BLOCK; } } libvpx-1.8.2/vp9/encoder/x86/vp9_diamond_search_sad_avx.c000066400000000000000000000311471357355204000232310ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #if defined(_MSC_VER) #include #endif #include #include #include "vpx_dsp/vpx_dsp_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vpx_ports/mem.h" #ifdef __GNUC__ #define LIKELY(v) __builtin_expect(v, 1) #define UNLIKELY(v) __builtin_expect(v, 0) #else #define LIKELY(v) (v) #define UNLIKELY(v) (v) #endif static INLINE int_mv pack_int_mv(int16_t row, int16_t col) { int_mv result; result.as_mv.row = row; result.as_mv.col = col; return result; } static INLINE MV_JOINT_TYPE get_mv_joint(const int_mv mv) { // This is simplified from the C implementation to utilise that // x->nmvjointsadcost[1] == x->nmvjointsadcost[2] and // x->nmvjointsadcost[1] == x->nmvjointsadcost[3] return mv.as_int == 0 ? 0 : 1; } static INLINE int mv_cost(const int_mv mv, const int *joint_cost, int *const comp_cost[2]) { return joint_cost[get_mv_joint(mv)] + comp_cost[0][mv.as_mv.row] + comp_cost[1][mv.as_mv.col]; } static int mvsad_err_cost(const MACROBLOCK *x, const int_mv mv, const MV *ref, int sad_per_bit) { const int_mv diff = pack_int_mv(mv.as_mv.row - ref->row, mv.as_mv.col - ref->col); return ROUND_POWER_OF_TWO( (unsigned)mv_cost(diff, x->nmvjointsadcost, x->nmvsadcost) * sad_per_bit, VP9_PROB_COST_SHIFT); } /***************************************************************************** * This function utilizes 3 properties of the cost function lookup tables, * * constructed in using 'cal_nmvjointsadcost' and 'cal_nmvsadcosts' in * * vp9_encoder.c. * * For the joint cost: * * - mvjointsadcost[1] == mvjointsadcost[2] == mvjointsadcost[3] * * For the component costs: * * - For all i: mvsadcost[0][i] == mvsadcost[1][i] * * (Equal costs for both components) * * - For all i: mvsadcost[0][i] == mvsadcost[0][-i] * * (Cost function is even) * * If these do not hold, then this function cannot be used without * * modification, in which case you can revert to using the C implementation, * * which does not rely on these properties. * *****************************************************************************/ int vp9_diamond_search_sad_avx(const MACROBLOCK *x, const search_site_config *cfg, MV *ref_mv, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const MV *center_mv) { const int_mv maxmv = pack_int_mv(x->mv_limits.row_max, x->mv_limits.col_max); const __m128i v_max_mv_w = _mm_set1_epi32(maxmv.as_int); const int_mv minmv = pack_int_mv(x->mv_limits.row_min, x->mv_limits.col_min); const __m128i v_min_mv_w = _mm_set1_epi32(minmv.as_int); const __m128i v_spb_d = _mm_set1_epi32(sad_per_bit); const __m128i v_joint_cost_0_d = _mm_set1_epi32(x->nmvjointsadcost[0]); const __m128i v_joint_cost_1_d = _mm_set1_epi32(x->nmvjointsadcost[1]); // search_param determines the length of the initial step and hence the number // of iterations. // 0 = initial step (MAX_FIRST_STEP) pel // 1 = (MAX_FIRST_STEP/2) pel, // 2 = (MAX_FIRST_STEP/4) pel... const MV *ss_mv = &cfg->ss_mv[cfg->searches_per_step * search_param]; const intptr_t *ss_os = &cfg->ss_os[cfg->searches_per_step * search_param]; const int tot_steps = cfg->total_steps - search_param; const int_mv fcenter_mv = pack_int_mv(center_mv->row >> 3, center_mv->col >> 3); const __m128i vfcmv = _mm_set1_epi32(fcenter_mv.as_int); const int ref_row = clamp(ref_mv->row, minmv.as_mv.row, maxmv.as_mv.row); const int ref_col = clamp(ref_mv->col, minmv.as_mv.col, maxmv.as_mv.col); int_mv bmv = pack_int_mv(ref_row, ref_col); int_mv new_bmv = bmv; __m128i v_bmv_w = _mm_set1_epi32(bmv.as_int); const int what_stride = x->plane[0].src.stride; const int in_what_stride = x->e_mbd.plane[0].pre[0].stride; const uint8_t *const what = x->plane[0].src.buf; const uint8_t *const in_what = x->e_mbd.plane[0].pre[0].buf + ref_row * in_what_stride + ref_col; // Work out the start point for the search const uint8_t *best_address = in_what; const uint8_t *new_best_address = best_address; #if VPX_ARCH_X86_64 __m128i v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else __m128i v_ba_d = _mm_set1_epi32((intptr_t)best_address); #endif unsigned int best_sad; int i, j, step; // Check the prerequisite cost function properties that are easy to check // in an assert. See the function-level documentation for details on all // prerequisites. assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[2]); assert(x->nmvjointsadcost[1] == x->nmvjointsadcost[3]); // Check the starting position best_sad = fn_ptr->sdf(what, what_stride, in_what, in_what_stride); best_sad += mvsad_err_cost(x, bmv, &fcenter_mv.as_mv, sad_per_bit); *num00 = 0; for (i = 0, step = 0; step < tot_steps; step++) { for (j = 0; j < cfg->searches_per_step; j += 4, i += 4) { __m128i v_sad_d, v_cost_d, v_outside_d, v_inside_d, v_diff_mv_w; #if VPX_ARCH_X86_64 __m128i v_blocka[2]; #else __m128i v_blocka[1]; #endif // Compute the candidate motion vectors const __m128i v_ss_mv_w = _mm_loadu_si128((const __m128i *)&ss_mv[i]); const __m128i v_these_mv_w = _mm_add_epi16(v_bmv_w, v_ss_mv_w); // Clamp them to the search bounds __m128i v_these_mv_clamp_w = v_these_mv_w; v_these_mv_clamp_w = _mm_min_epi16(v_these_mv_clamp_w, v_max_mv_w); v_these_mv_clamp_w = _mm_max_epi16(v_these_mv_clamp_w, v_min_mv_w); // The ones that did not change are inside the search area v_inside_d = _mm_cmpeq_epi32(v_these_mv_clamp_w, v_these_mv_w); // If none of them are inside, then move on if (LIKELY(_mm_test_all_zeros(v_inside_d, v_inside_d))) { continue; } // The inverse mask indicates which of the MVs are outside v_outside_d = _mm_xor_si128(v_inside_d, _mm_set1_epi8((int8_t)0xff)); // Shift right to keep the sign bit clear, we will use this later // to set the cost to the maximum value. v_outside_d = _mm_srli_epi32(v_outside_d, 1); // Compute the difference MV v_diff_mv_w = _mm_sub_epi16(v_these_mv_clamp_w, vfcmv); // We utilise the fact that the cost function is even, and use the // absolute difference. This allows us to use unsigned indexes later // and reduces cache pressure somewhat as only a half of the table // is ever referenced. v_diff_mv_w = _mm_abs_epi16(v_diff_mv_w); // Compute the SIMD pointer offsets. { #if VPX_ARCH_X86_64 // sizeof(intptr_t) == 8 // Load the offsets __m128i v_bo10_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 0]); __m128i v_bo32_q = _mm_loadu_si128((const __m128i *)&ss_os[i + 2]); // Set the ones falling outside to zero v_bo10_q = _mm_and_si128(v_bo10_q, _mm_cvtepi32_epi64(v_inside_d)); v_bo32_q = _mm_and_si128(v_bo32_q, _mm_unpackhi_epi32(v_inside_d, v_inside_d)); // Compute the candidate addresses v_blocka[0] = _mm_add_epi64(v_ba_q, v_bo10_q); v_blocka[1] = _mm_add_epi64(v_ba_q, v_bo32_q); #else // VPX_ARCH_X86 // sizeof(intptr_t) == 4 __m128i v_bo_d = _mm_loadu_si128((const __m128i *)&ss_os[i]); v_bo_d = _mm_and_si128(v_bo_d, v_inside_d); v_blocka[0] = _mm_add_epi32(v_ba_d, v_bo_d); #endif } fn_ptr->sdx4df(what, what_stride, (const uint8_t **)&v_blocka[0], in_what_stride, (uint32_t *)&v_sad_d); // Look up the component cost of the residual motion vector { const int32_t row0 = _mm_extract_epi16(v_diff_mv_w, 0); const int32_t col0 = _mm_extract_epi16(v_diff_mv_w, 1); const int32_t row1 = _mm_extract_epi16(v_diff_mv_w, 2); const int32_t col1 = _mm_extract_epi16(v_diff_mv_w, 3); const int32_t row2 = _mm_extract_epi16(v_diff_mv_w, 4); const int32_t col2 = _mm_extract_epi16(v_diff_mv_w, 5); const int32_t row3 = _mm_extract_epi16(v_diff_mv_w, 6); const int32_t col3 = _mm_extract_epi16(v_diff_mv_w, 7); // Note: This is a use case for vpgather in AVX2 const uint32_t cost0 = x->nmvsadcost[0][row0] + x->nmvsadcost[0][col0]; const uint32_t cost1 = x->nmvsadcost[0][row1] + x->nmvsadcost[0][col1]; const uint32_t cost2 = x->nmvsadcost[0][row2] + x->nmvsadcost[0][col2]; const uint32_t cost3 = x->nmvsadcost[0][row3] + x->nmvsadcost[0][col3]; __m128i v_cost_10_d, v_cost_32_d; v_cost_10_d = _mm_cvtsi32_si128(cost0); v_cost_10_d = _mm_insert_epi32(v_cost_10_d, cost1, 1); v_cost_32_d = _mm_cvtsi32_si128(cost2); v_cost_32_d = _mm_insert_epi32(v_cost_32_d, cost3, 1); v_cost_d = _mm_unpacklo_epi64(v_cost_10_d, v_cost_32_d); } // Now add in the joint cost { const __m128i v_sel_d = _mm_cmpeq_epi32(v_diff_mv_w, _mm_setzero_si128()); const __m128i v_joint_cost_d = _mm_blendv_epi8(v_joint_cost_1_d, v_joint_cost_0_d, v_sel_d); v_cost_d = _mm_add_epi32(v_cost_d, v_joint_cost_d); } // Multiply by sad_per_bit v_cost_d = _mm_mullo_epi32(v_cost_d, v_spb_d); // ROUND_POWER_OF_TWO(v_cost_d, VP9_PROB_COST_SHIFT) v_cost_d = _mm_add_epi32(v_cost_d, _mm_set1_epi32(1 << (VP9_PROB_COST_SHIFT - 1))); v_cost_d = _mm_srai_epi32(v_cost_d, VP9_PROB_COST_SHIFT); // Add the cost to the sad v_sad_d = _mm_add_epi32(v_sad_d, v_cost_d); // Make the motion vectors outside the search area have max cost // by or'ing in the comparison mask, this way the minimum search won't // pick them. v_sad_d = _mm_or_si128(v_sad_d, v_outside_d); // Find the minimum value and index horizontally in v_sad_d { // Try speculatively on 16 bits, so we can use the minpos intrinsic const __m128i v_sad_w = _mm_packus_epi32(v_sad_d, v_sad_d); const __m128i v_minp_w = _mm_minpos_epu16(v_sad_w); uint32_t local_best_sad = _mm_extract_epi16(v_minp_w, 0); uint32_t local_best_idx = _mm_extract_epi16(v_minp_w, 1); // If the local best value is not saturated, just use it, otherwise // find the horizontal minimum again the hard way on 32 bits. // This is executed rarely. if (UNLIKELY(local_best_sad == 0xffff)) { __m128i v_loval_d, v_hival_d, v_loidx_d, v_hiidx_d, v_sel_d; v_loval_d = v_sad_d; v_loidx_d = _mm_set_epi32(3, 2, 1, 0); v_hival_d = _mm_srli_si128(v_loval_d, 8); v_hiidx_d = _mm_srli_si128(v_loidx_d, 8); v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d); v_hival_d = _mm_srli_si128(v_loval_d, 4); v_hiidx_d = _mm_srli_si128(v_loidx_d, 4); v_sel_d = _mm_cmplt_epi32(v_hival_d, v_loval_d); v_loval_d = _mm_blendv_epi8(v_loval_d, v_hival_d, v_sel_d); v_loidx_d = _mm_blendv_epi8(v_loidx_d, v_hiidx_d, v_sel_d); local_best_sad = _mm_extract_epi32(v_loval_d, 0); local_best_idx = _mm_extract_epi32(v_loidx_d, 0); } // Update the global minimum if the local minimum is smaller if (LIKELY(local_best_sad < best_sad)) { new_bmv = ((const int_mv *)&v_these_mv_w)[local_best_idx]; new_best_address = ((const uint8_t **)v_blocka)[local_best_idx]; best_sad = local_best_sad; } } } bmv = new_bmv; best_address = new_best_address; v_bmv_w = _mm_set1_epi32(bmv.as_int); #if VPX_ARCH_X86_64 v_ba_q = _mm_set1_epi64x((intptr_t)best_address); #else v_ba_d = _mm_set1_epi32((intptr_t)best_address); #endif if (UNLIKELY(best_address == in_what)) { (*num00)++; } } *best_mv = bmv.as_mv; return best_sad; } libvpx-1.8.2/vp9/encoder/x86/vp9_error_avx2.c000066400000000000000000000150011357355204000206440ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/bitdepth_conversion_avx2.h" int64_t vp9_block_error_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz) { __m256i sse_256, ssz_256; __m256i exp_dqcoeff_lo, exp_dqcoeff_hi, exp_coeff_lo, exp_coeff_hi; __m256i sse_hi, ssz_hi; __m128i sse_128, ssz_128; int64_t sse; const __m256i zero = _mm256_setzero_si256(); // If the block size is 16 then the results will fit in 32 bits. if (block_size == 16) { __m256i coeff_256, dqcoeff_256, coeff_hi, dqcoeff_hi; // Load 16 elements for coeff and dqcoeff. coeff_256 = load_tran_low(coeff); dqcoeff_256 = load_tran_low(dqcoeff); // dqcoeff - coeff dqcoeff_256 = _mm256_sub_epi16(dqcoeff_256, coeff_256); // madd (dqcoeff - coeff) dqcoeff_256 = _mm256_madd_epi16(dqcoeff_256, dqcoeff_256); // madd coeff coeff_256 = _mm256_madd_epi16(coeff_256, coeff_256); // Save the higher 64 bit of each 128 bit lane. dqcoeff_hi = _mm256_srli_si256(dqcoeff_256, 8); coeff_hi = _mm256_srli_si256(coeff_256, 8); // Add the higher 64 bit to the low 64 bit. dqcoeff_256 = _mm256_add_epi32(dqcoeff_256, dqcoeff_hi); coeff_256 = _mm256_add_epi32(coeff_256, coeff_hi); // Expand each double word in the lower 64 bits to quad word. sse_256 = _mm256_unpacklo_epi32(dqcoeff_256, zero); ssz_256 = _mm256_unpacklo_epi32(coeff_256, zero); } else { int i; assert(block_size % 32 == 0); sse_256 = zero; ssz_256 = zero; for (i = 0; i < block_size; i += 32) { __m256i coeff_0, coeff_1, dqcoeff_0, dqcoeff_1; // Load 32 elements for coeff and dqcoeff. coeff_0 = load_tran_low(coeff + i); dqcoeff_0 = load_tran_low(dqcoeff + i); coeff_1 = load_tran_low(coeff + i + 16); dqcoeff_1 = load_tran_low(dqcoeff + i + 16); // dqcoeff - coeff dqcoeff_0 = _mm256_sub_epi16(dqcoeff_0, coeff_0); dqcoeff_1 = _mm256_sub_epi16(dqcoeff_1, coeff_1); // madd (dqcoeff - coeff) dqcoeff_0 = _mm256_madd_epi16(dqcoeff_0, dqcoeff_0); dqcoeff_1 = _mm256_madd_epi16(dqcoeff_1, dqcoeff_1); // madd coeff coeff_0 = _mm256_madd_epi16(coeff_0, coeff_0); coeff_1 = _mm256_madd_epi16(coeff_1, coeff_1); // Add the first madd (dqcoeff - coeff) with the second. dqcoeff_0 = _mm256_add_epi32(dqcoeff_0, dqcoeff_1); // Add the first madd (coeff) with the second. coeff_0 = _mm256_add_epi32(coeff_0, coeff_1); // Expand each double word of madd (dqcoeff - coeff) to quad word. exp_dqcoeff_lo = _mm256_unpacklo_epi32(dqcoeff_0, zero); exp_dqcoeff_hi = _mm256_unpackhi_epi32(dqcoeff_0, zero); // expand each double word of madd (coeff) to quad word exp_coeff_lo = _mm256_unpacklo_epi32(coeff_0, zero); exp_coeff_hi = _mm256_unpackhi_epi32(coeff_0, zero); // Add each quad word of madd (dqcoeff - coeff) and madd (coeff). sse_256 = _mm256_add_epi64(sse_256, exp_dqcoeff_lo); ssz_256 = _mm256_add_epi64(ssz_256, exp_coeff_lo); sse_256 = _mm256_add_epi64(sse_256, exp_dqcoeff_hi); ssz_256 = _mm256_add_epi64(ssz_256, exp_coeff_hi); } } // Save the higher 64 bit of each 128 bit lane. sse_hi = _mm256_srli_si256(sse_256, 8); ssz_hi = _mm256_srli_si256(ssz_256, 8); // Add the higher 64 bit to the low 64 bit. sse_256 = _mm256_add_epi64(sse_256, sse_hi); ssz_256 = _mm256_add_epi64(ssz_256, ssz_hi); // Add each 64 bit from each of the 128 bit lane of the 256 bit. sse_128 = _mm_add_epi64(_mm256_castsi256_si128(sse_256), _mm256_extractf128_si256(sse_256, 1)); ssz_128 = _mm_add_epi64(_mm256_castsi256_si128(ssz_256), _mm256_extractf128_si256(ssz_256, 1)); // Store the results. _mm_storel_epi64((__m128i *)(&sse), sse_128); _mm_storel_epi64((__m128i *)(ssz), ssz_128); return sse; } int64_t vp9_block_error_fp_avx2(const tran_low_t *coeff, const tran_low_t *dqcoeff, int block_size) { int i; const __m256i zero = _mm256_setzero_si256(); __m256i sse_256 = zero; __m256i sse_hi; __m128i sse_128; int64_t sse; if (block_size == 16) { // Load 16 elements for coeff and dqcoeff. const __m256i _coeff = load_tran_low(coeff); const __m256i _dqcoeff = load_tran_low(dqcoeff); // dqcoeff - coeff const __m256i diff = _mm256_sub_epi16(_dqcoeff, _coeff); // madd (dqcoeff - coeff) const __m256i error_lo = _mm256_madd_epi16(diff, diff); // Save the higher 64 bit of each 128 bit lane. const __m256i error_hi = _mm256_srli_si256(error_lo, 8); // Add the higher 64 bit to the low 64 bit. const __m256i error = _mm256_add_epi32(error_lo, error_hi); // Expand each double word in the lower 64 bits to quad word. sse_256 = _mm256_unpacklo_epi32(error, zero); } else { for (i = 0; i < block_size; i += 16) { // Load 16 elements for coeff and dqcoeff. const __m256i _coeff = load_tran_low(coeff); const __m256i _dqcoeff = load_tran_low(dqcoeff); const __m256i diff = _mm256_sub_epi16(_dqcoeff, _coeff); const __m256i error = _mm256_madd_epi16(diff, diff); // Expand each double word of madd (dqcoeff - coeff) to quad word. const __m256i exp_error_lo = _mm256_unpacklo_epi32(error, zero); const __m256i exp_error_hi = _mm256_unpackhi_epi32(error, zero); // Add each quad word of madd (dqcoeff - coeff). sse_256 = _mm256_add_epi64(sse_256, exp_error_lo); sse_256 = _mm256_add_epi64(sse_256, exp_error_hi); coeff += 16; dqcoeff += 16; } } // Save the higher 64 bit of each 128 bit lane. sse_hi = _mm256_srli_si256(sse_256, 8); // Add the higher 64 bit to the low 64 bit. sse_256 = _mm256_add_epi64(sse_256, sse_hi); // Add each 64 bit from each of the 128 bit lane of the 256 bit. sse_128 = _mm_add_epi64(_mm256_castsi256_si128(sse_256), _mm256_extractf128_si256(sse_256, 1)); // Store the results. _mm_storel_epi64((__m128i *)&sse, sse_128); return sse; } libvpx-1.8.2/vp9/encoder/x86/vp9_error_sse2.asm000066400000000000000000000062531357355204000212070ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %define private_prefix vp9 %include "third_party/x86inc/x86inc.asm" %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION .text ; int64_t vp9_block_error(int16_t *coeff, int16_t *dqcoeff, intptr_t block_size, ; int64_t *ssz) INIT_XMM sse2 cglobal block_error, 3, 3, 8, uqc, dqc, size, ssz pxor m4, m4 ; sse accumulator pxor m6, m6 ; ssz accumulator pxor m5, m5 ; dedicated zero register .loop: LOAD_TRAN_LOW 2, uqcq, 0 LOAD_TRAN_LOW 0, dqcq, 0 LOAD_TRAN_LOW 3, uqcq, 8 LOAD_TRAN_LOW 1, dqcq, 8 INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16 INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16 sub sizeq, 16 psubw m0, m2 psubw m1, m3 ; individual errors are max. 15bit+sign, so squares are 30bit, and ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) pmaddwd m0, m0 pmaddwd m1, m1 pmaddwd m2, m2 pmaddwd m3, m3 ; the sum of 2 31bit integers will fit in a 32bit unsigned integer paddd m0, m1 paddd m2, m3 ; accumulate in 64bit punpckldq m7, m0, m5 punpckhdq m0, m5 paddq m4, m7 punpckldq m7, m2, m5 paddq m4, m0 punpckhdq m2, m5 paddq m6, m7 paddq m6, m2 jg .loop ; accumulate horizontally and store in return value movhlps m5, m4 movhlps m7, m6 paddq m4, m5 paddq m6, m7 %if VPX_ARCH_X86_64 movq rax, m4 movq [sszq], m6 %else mov eax, sszm pshufd m5, m4, 0x1 movq [eax], m6 movd eax, m4 movd edx, m5 %endif RET ; Compute the sum of squared difference between two tran_low_t vectors. ; Vectors are converted (if necessary) to int16_t for calculations. ; int64_t vp9_block_error_fp(tran_low_t *coeff, tran_low_t *dqcoeff, ; intptr_t block_size) INIT_XMM sse2 cglobal block_error_fp, 3, 3, 6, uqc, dqc, size pxor m4, m4 ; sse accumulator pxor m5, m5 ; dedicated zero register .loop: LOAD_TRAN_LOW 2, uqcq, 0 LOAD_TRAN_LOW 0, dqcq, 0 LOAD_TRAN_LOW 3, uqcq, 8 LOAD_TRAN_LOW 1, dqcq, 8 INCREMENT_ELEMENTS_TRAN_LOW uqcq, 16 INCREMENT_ELEMENTS_TRAN_LOW dqcq, 16 sub sizeq, 16 psubw m0, m2 psubw m1, m3 ; individual errors are max. 15bit+sign, so squares are 30bit, and ; thus the sum of 2 should fit in a 31bit integer (+ unused sign bit) pmaddwd m0, m0 pmaddwd m1, m1 ; the sum of 2 31bit integers will fit in a 32bit unsigned integer paddd m0, m1 ; accumulate in 64bit punpckldq m3, m0, m5 punpckhdq m0, m5 paddq m4, m3 paddq m4, m0 jnz .loop ; accumulate horizontally and store in return value movhlps m5, m4 paddq m4, m5 %if VPX_ARCH_X86_64 movq rax, m4 %else pshufd m5, m4, 0x1 movd eax, m4 movd edx, m5 %endif RET libvpx-1.8.2/vp9/encoder/x86/vp9_frame_scale_ssse3.c000066400000000000000000001077601357355204000221520ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSSE3 #include "./vp9_rtcd.h" #include "./vpx_dsp_rtcd.h" #include "./vpx_scale_rtcd.h" #include "vpx_dsp/x86/convolve_ssse3.h" #include "vpx_dsp/x86/mem_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_scale/yv12config.h" static INLINE __m128i scale_plane_2_to_1_phase_0_kernel( const uint8_t *const src, const __m128i *const mask) { const __m128i a = _mm_loadu_si128((const __m128i *)(&src[0])); const __m128i b = _mm_loadu_si128((const __m128i *)(&src[16])); const __m128i a_and = _mm_and_si128(a, *mask); const __m128i b_and = _mm_and_si128(b, *mask); return _mm_packus_epi16(a_and, b_and); } static void scale_plane_2_to_1_phase_0(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const int dst_w, const int dst_h) { const int max_width = (dst_w + 15) & ~15; const __m128i mask = _mm_set1_epi16(0x00FF); int y = dst_h; do { int x = max_width; do { const __m128i d = scale_plane_2_to_1_phase_0_kernel(src, &mask); _mm_storeu_si128((__m128i *)dst, d); src += 32; dst += 16; x -= 16; } while (x); src += 2 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static void scale_plane_4_to_1_phase_0(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const int dst_w, const int dst_h) { const int max_width = (dst_w + 15) & ~15; const __m128i mask = _mm_set1_epi32(0x000000FF); int y = dst_h; do { int x = max_width; do { const __m128i d0 = scale_plane_2_to_1_phase_0_kernel(&src[0], &mask); const __m128i d1 = scale_plane_2_to_1_phase_0_kernel(&src[32], &mask); const __m128i d2 = _mm_packus_epi16(d0, d1); _mm_storeu_si128((__m128i *)dst, d2); src += 64; dst += 16; x -= 16; } while (x); src += 4 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static INLINE __m128i scale_plane_bilinear_kernel(const __m128i *const s, const __m128i c0c1) { const __m128i k_64 = _mm_set1_epi16(1 << 6); const __m128i t0 = _mm_maddubs_epi16(s[0], c0c1); const __m128i t1 = _mm_maddubs_epi16(s[1], c0c1); // round and shift by 7 bit each 16 bit const __m128i t2 = _mm_adds_epi16(t0, k_64); const __m128i t3 = _mm_adds_epi16(t1, k_64); const __m128i t4 = _mm_srai_epi16(t2, 7); const __m128i t5 = _mm_srai_epi16(t3, 7); return _mm_packus_epi16(t4, t5); } static void scale_plane_2_to_1_bilinear(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const int dst_w, const int dst_h, const __m128i c0c1) { const int max_width = (dst_w + 15) & ~15; int y = dst_h; do { int x = max_width; do { __m128i s[2], d[2]; // Horizontal // Even rows s[0] = _mm_loadu_si128((const __m128i *)(src + 0)); s[1] = _mm_loadu_si128((const __m128i *)(src + 16)); d[0] = scale_plane_bilinear_kernel(s, c0c1); // odd rows s[0] = _mm_loadu_si128((const __m128i *)(src + src_stride + 0)); s[1] = _mm_loadu_si128((const __m128i *)(src + src_stride + 16)); d[1] = scale_plane_bilinear_kernel(s, c0c1); // Vertical s[0] = _mm_unpacklo_epi8(d[0], d[1]); s[1] = _mm_unpackhi_epi8(d[0], d[1]); d[0] = scale_plane_bilinear_kernel(s, c0c1); _mm_storeu_si128((__m128i *)dst, d[0]); src += 32; dst += 16; x -= 16; } while (x); src += 2 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static void scale_plane_4_to_1_bilinear(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const int dst_w, const int dst_h, const __m128i c0c1) { const int max_width = (dst_w + 15) & ~15; int y = dst_h; do { int x = max_width; do { __m128i s[8], d[8]; // Note: Using _mm_packus_epi32() in SSE4.1 could be faster. // Here we tried to not use shuffle instructions which would be slow // on some x86 CPUs. // Horizontal // 000 001 xx xx 004 005 xx xx 008 009 xx xx 00C 00D xx xx // 010 011 xx xx 014 015 xx xx 018 019 xx xx 01C 01D xx xx // 020 021 xx xx 024 025 xx xx 028 029 xx xx 02C 02D xx xx // 030 031 xx xx 034 035 xx xx 038 039 xx xx 03C 03D xx xx // 100 101 xx xx 104 105 xx xx 108 109 xx xx 10C 10D xx xx // 110 111 xx xx 114 115 xx xx 118 119 xx xx 11C 11D xx xx // 120 121 xx xx 124 125 xx xx 128 129 xx xx 12C 12D xx xx // 130 131 xx xx 134 135 xx xx 138 139 xx xx 13C 13D xx xx s[0] = _mm_loadu_si128((const __m128i *)(&src[0])); s[1] = _mm_loadu_si128((const __m128i *)(&src[16])); s[2] = _mm_loadu_si128((const __m128i *)(&src[32])); s[3] = _mm_loadu_si128((const __m128i *)(&src[48])); s[4] = _mm_loadu_si128((const __m128i *)(src + src_stride + 0)); s[5] = _mm_loadu_si128((const __m128i *)(src + src_stride + 16)); s[6] = _mm_loadu_si128((const __m128i *)(src + src_stride + 32)); s[7] = _mm_loadu_si128((const __m128i *)(src + src_stride + 48)); // 000 001 100 101 xx xx xx xx 004 005 104 105 xx xx xx xx // 008 009 108 109 xx xx xx xx 00C 00D 10C 10D xx xx xx xx // 010 011 110 111 xx xx xx xx 014 015 114 115 xx xx xx xx // 018 019 118 119 xx xx xx xx 01C 01D 11C 11D xx xx xx xx // 020 021 120 121 xx xx xx xx 024 025 124 125 xx xx xx xx // 028 029 128 129 xx xx xx xx 02C 02D 12C 12D xx xx xx xx // 030 031 130 131 xx xx xx xx 034 035 134 135 xx xx xx xx // 038 039 138 139 xx xx xx xx 03C 03D 13C 13D xx xx xx xx d[0] = _mm_unpacklo_epi16(s[0], s[4]); d[1] = _mm_unpackhi_epi16(s[0], s[4]); d[2] = _mm_unpacklo_epi16(s[1], s[5]); d[3] = _mm_unpackhi_epi16(s[1], s[5]); d[4] = _mm_unpacklo_epi16(s[2], s[6]); d[5] = _mm_unpackhi_epi16(s[2], s[6]); d[6] = _mm_unpacklo_epi16(s[3], s[7]); d[7] = _mm_unpackhi_epi16(s[3], s[7]); // 000 001 100 101 008 009 108 109 xx xx xx xx xx xx xx xx // 004 005 104 105 00C 00D 10C 10D xx xx xx xx xx xx xx xx // 010 011 110 111 018 019 118 119 xx xx xx xx xx xx xx xx // 014 015 114 115 01C 01D 11C 11D xx xx xx xx xx xx xx xx // 020 021 120 121 028 029 128 129 xx xx xx xx xx xx xx xx // 024 025 124 125 02C 02D 12C 12D xx xx xx xx xx xx xx xx // 030 031 130 131 038 039 138 139 xx xx xx xx xx xx xx xx // 034 035 134 135 03C 03D 13C 13D xx xx xx xx xx xx xx xx s[0] = _mm_unpacklo_epi32(d[0], d[1]); s[1] = _mm_unpackhi_epi32(d[0], d[1]); s[2] = _mm_unpacklo_epi32(d[2], d[3]); s[3] = _mm_unpackhi_epi32(d[2], d[3]); s[4] = _mm_unpacklo_epi32(d[4], d[5]); s[5] = _mm_unpackhi_epi32(d[4], d[5]); s[6] = _mm_unpacklo_epi32(d[6], d[7]); s[7] = _mm_unpackhi_epi32(d[6], d[7]); // 000 001 100 101 004 005 104 105 008 009 108 109 00C 00D 10C 10D // 010 011 110 111 014 015 114 115 018 019 118 119 01C 01D 11C 11D // 020 021 120 121 024 025 124 125 028 029 128 129 02C 02D 12C 12D // 030 031 130 131 034 035 134 135 038 039 138 139 03C 03D 13C 13D d[0] = _mm_unpacklo_epi32(s[0], s[1]); d[1] = _mm_unpacklo_epi32(s[2], s[3]); d[2] = _mm_unpacklo_epi32(s[4], s[5]); d[3] = _mm_unpacklo_epi32(s[6], s[7]); d[0] = scale_plane_bilinear_kernel(&d[0], c0c1); d[1] = scale_plane_bilinear_kernel(&d[2], c0c1); // Vertical d[0] = scale_plane_bilinear_kernel(d, c0c1); _mm_storeu_si128((__m128i *)dst, d[0]); src += 64; dst += 16; x -= 16; } while (x); src += 4 * (src_stride - max_width); dst += dst_stride - max_width; } while (--y); } static void scale_plane_2_to_1_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t *const coef, uint8_t *const temp_buffer) { const int width_hor = (w + 3) & ~3; const int width_ver = (w + 7) & ~7; const int height_hor = (2 * h + SUBPEL_TAPS - 2 + 7) & ~7; const int height_ver = (h + 3) & ~3; int x, y = height_hor; uint8_t *t = temp_buffer; __m128i s[11], d[4]; __m128i f[4]; assert(w && h); shuffle_filter_ssse3(coef, f); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2 + 1; // horizontal 4x8 do { load_8bit_8x8(src + 2, src_stride, s); // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 // 02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73 // 04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75 // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 (overlapped) transpose_16bit_4x8(s, s); x = width_hor; do { src += 8; load_8bit_8x8(src, src_stride, &s[3]); // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 // 08 09 18 19 28 29 38 39 48 49 58 59 68 69 78 79 // 0A 0B 1A 1B 2A 2B 3A 3B 4A 4B 5A 5B 6A 6B 7A 7B // 0C 0D 1C 1D 2C 2D 3C 3D 4C 4D 5C 5D 6C 6D 7C 7D transpose_16bit_4x8(&s[3], &s[3]); d[0] = convolve8_8_ssse3(&s[0], f); // 00 10 20 30 40 50 60 70 d[1] = convolve8_8_ssse3(&s[1], f); // 01 11 21 31 41 51 61 71 d[2] = convolve8_8_ssse3(&s[2], f); // 02 12 22 32 42 52 62 72 d[3] = convolve8_8_ssse3(&s[3], f); // 03 13 23 33 43 53 63 73 // 00 10 20 30 40 50 60 70 02 12 22 32 42 52 62 72 // 01 11 21 31 41 51 61 71 03 13 23 33 43 53 63 73 d[0] = _mm_packus_epi16(d[0], d[2]); d[1] = _mm_packus_epi16(d[1], d[3]); // 00 10 01 11 20 30 21 31 40 50 41 51 60 70 61 71 // 02 12 03 13 22 32 23 33 42 52 43 53 62 72 63 73 d[2] = _mm_unpacklo_epi16(d[0], d[1]); d[3] = _mm_unpackhi_epi16(d[0], d[1]); // 00 10 01 11 02 12 03 13 20 30 21 31 22 32 23 33 // 40 50 41 51 42 52 43 53 60 70 61 71 62 72 63 73 d[0] = _mm_unpacklo_epi32(d[2], d[3]); d[1] = _mm_unpackhi_epi32(d[2], d[3]); store_8bit_8x4_from_16x2(d, t, 2 * width_hor); s[0] = s[4]; s[1] = s[5]; s[2] = s[6]; t += 8; x -= 4; } while (x); src += 8 * src_stride - 2 * width_hor; t += 6 * width_hor; y -= 8; } while (y); // vertical 8x4 x = width_ver; t = temp_buffer; do { // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 s[0] = _mm_loadu_si128((const __m128i *)(t + 0 * width_hor)); s[1] = _mm_loadu_si128((const __m128i *)(t + 2 * width_hor)); s[2] = _mm_loadu_si128((const __m128i *)(t + 4 * width_hor)); t += 6 * width_hor; y = height_ver; do { // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 // 80 90 81 91 82 92 83 93 84 94 85 95 86 96 87 77 // A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 77 // C0 D0 C1 D1 C2 D2 C3 D3 C4 D4 C5 D5 C6 D6 C7 77 loadu_8bit_16x4(t, 2 * width_hor, &s[3]); t += 8 * width_hor; d[0] = convolve8_8_ssse3(&s[0], f); // 00 01 02 03 04 05 06 07 d[1] = convolve8_8_ssse3(&s[1], f); // 10 11 12 13 14 15 16 17 d[2] = convolve8_8_ssse3(&s[2], f); // 20 21 22 23 24 25 26 27 d[3] = convolve8_8_ssse3(&s[3], f); // 30 31 32 33 34 35 36 37 // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 // 20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37 d[0] = _mm_packus_epi16(d[0], d[1]); d[1] = _mm_packus_epi16(d[2], d[3]); store_8bit_8x4_from_16x2(d, dst, dst_stride); s[0] = s[4]; s[1] = s[5]; s[2] = s[6]; dst += 4 * dst_stride; y -= 4; } while (y); t -= width_hor * (2 * height_ver + 6); t += 16; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } static void scale_plane_4_to_1_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const int16_t *const coef, uint8_t *const temp_buffer) { const int width_hor = (w + 1) & ~1; const int width_ver = (w + 7) & ~7; const int height_hor = (4 * h + SUBPEL_TAPS - 2 + 7) & ~7; const int height_ver = (h + 1) & ~1; int x, y = height_hor; uint8_t *t = temp_buffer; __m128i s[11], d[4]; __m128i f[4]; assert(w && h); shuffle_filter_ssse3(coef, f); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2 + 3; // horizontal 2x8 do { load_8bit_8x8(src + 4, src_stride, s); // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 // 02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73 // 04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75 (overlapped) // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 (overlapped) transpose_16bit_4x8(s, s); x = width_hor; do { src += 8; load_8bit_8x8(src, src_stride, &s[2]); // 04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75 // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 // 08 09 18 19 28 29 38 39 48 49 58 59 68 69 78 79 // 0A 0B 1A 1B 2A 2B 3A 3B 4A 4B 5A 5B 6A 6B 7A 7B transpose_16bit_4x8(&s[2], &s[2]); d[0] = convolve8_8_ssse3(&s[0], f); // 00 10 20 30 40 50 60 70 d[1] = convolve8_8_ssse3(&s[2], f); // 01 11 21 31 41 51 61 71 // 00 10 20 30 40 50 60 70 xx xx xx xx xx xx xx xx // 01 11 21 31 41 51 61 71 xx xx xx xx xx xx xx xx d[0] = _mm_packus_epi16(d[0], d[0]); d[1] = _mm_packus_epi16(d[1], d[1]); // 00 10 01 11 20 30 21 31 40 50 41 51 60 70 61 71 d[0] = _mm_unpacklo_epi16(d[0], d[1]); store_8bit_4x4_sse2(d[0], t, 2 * width_hor); s[0] = s[4]; s[1] = s[5]; t += 4; x -= 2; } while (x); src += 8 * src_stride - 4 * width_hor; t += 6 * width_hor; y -= 8; } while (y); // vertical 8x2 x = width_ver; t = temp_buffer; do { // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 s[0] = _mm_loadu_si128((const __m128i *)(t + 0 * width_hor)); s[1] = _mm_loadu_si128((const __m128i *)(t + 2 * width_hor)); t += 4 * width_hor; y = height_ver; do { // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 // 80 90 81 91 82 92 83 93 84 94 85 95 86 96 87 77 // A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 77 loadu_8bit_16x4(t, 2 * width_hor, &s[2]); t += 8 * width_hor; d[0] = convolve8_8_ssse3(&s[0], f); // 00 01 02 03 04 05 06 07 d[1] = convolve8_8_ssse3(&s[2], f); // 10 11 12 13 14 15 16 17 // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 d[0] = _mm_packus_epi16(d[0], d[1]); _mm_storel_epi64((__m128i *)(dst + 0 * dst_stride), d[0]); _mm_storeh_epi64((__m128i *)(dst + 1 * dst_stride), d[0]); s[0] = s[4]; s[1] = s[5]; dst += 2 * dst_stride; y -= 2; } while (y); t -= width_hor * (4 * height_ver + 4); t += 16; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } typedef void (*shuffle_filter_funcs)(const int16_t *const filter, __m128i *const f); typedef __m128i (*convolve8_funcs)(const __m128i *const s, const __m128i *const f); static void scale_plane_4_to_3_general(const uint8_t *src, const int src_stride, uint8_t *dst, const int dst_stride, const int w, const int h, const InterpKernel *const coef, const int phase_scaler, uint8_t *const temp_buffer) { static const int step_q4 = 16 * 4 / 3; const int width_hor = (w + 5) - ((w + 5) % 6); const int stride_hor = 2 * width_hor + 4; // store 4 extra pixels const int width_ver = (w + 7) & ~7; // We need (SUBPEL_TAPS - 1) extra rows: (SUBPEL_TAPS / 2 - 1) extra rows // above and (SUBPEL_TAPS / 2) extra rows below. const int height_hor = (4 * h / 3 + SUBPEL_TAPS - 1 + 7) & ~7; const int height_ver = (h + 5) - ((h + 5) % 6); int x, y = height_hor; uint8_t *t = temp_buffer; __m128i s[12], d[6], dd[4]; __m128i f0[4], f1[5], f2[5]; // The offset of the first row is always less than 1 pixel. const int offset1_q4 = phase_scaler + 1 * step_q4; const int offset2_q4 = phase_scaler + 2 * step_q4; // offset_idxx indicates the pixel offset is even (0) or odd (1). // It's used to choose the src offset and filter coefficient offset. const int offset_idx1 = (offset1_q4 >> 4) & 1; const int offset_idx2 = (offset2_q4 >> 4) & 1; static const shuffle_filter_funcs shuffle_filter_funcs[2] = { shuffle_filter_ssse3, shuffle_filter_odd_ssse3 }; static const convolve8_funcs convolve8_funcs[2] = { convolve8_8_even_offset_ssse3, convolve8_8_odd_offset_ssse3 }; assert(w && h); shuffle_filter_ssse3(coef[(phase_scaler + 0 * step_q4) & SUBPEL_MASK], f0); shuffle_filter_funcs[offset_idx1](coef[offset1_q4 & SUBPEL_MASK], f1); shuffle_filter_funcs[offset_idx2](coef[offset2_q4 & SUBPEL_MASK], f2); // Sub 64 to avoid overflow. // Coef 128 would be treated as -128 in PMADDUBSW. Sub 64 here. // Coef 128 is in either fx[1] or fx[2] depending on the phase idx. // When filter phase idx is 1, the two biggest coefficients are shuffled // together, and the sum of them are always no less than 128. Sub 64 here. // After the subtraction, when the sum of all positive coefficients are no // larger than 128, and the sum of all negative coefficients are no // less than -128, there will be no overflow in the convolve8 functions. f0[1] = _mm_sub_epi8(f0[1], _mm_set1_epi8(64)); f1[1 + offset_idx1] = _mm_sub_epi8(f1[1 + offset_idx1], _mm_set1_epi8(64)); f2[1 + offset_idx2] = _mm_sub_epi8(f2[1 + offset_idx2], _mm_set1_epi8(64)); src -= (SUBPEL_TAPS / 2 - 1) * src_stride + SUBPEL_TAPS / 2 - 1; // horizontal 6x8 do { load_8bit_8x8(src, src_stride, s); // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 // 02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73 // 04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75 // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 transpose_16bit_4x8(s, s); x = width_hor; do { src += 8; load_8bit_8x8(src, src_stride, &s[4]); // 08 09 18 19 28 29 38 39 48 49 58 59 68 69 78 79 // 0A 0B 1A 1B 2A 2B 3A 3B 4A 4B 5A 5B 6A 6B 7A 7B // OC 0D 1C 1D 2C 2D 3C 3D 4C 4D 5C 5D 6C 6D 7C 7D // 0E 0F 1E 1F 2E 2F 3E 3F 4E 4F 5E 5F 6E 6F 7E 7F transpose_16bit_4x8(&s[4], &s[4]); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 d[0] = convolve8_8_even_offset_ssse3(&s[0], f0); d[1] = convolve8_funcs[offset_idx1](&s[offset1_q4 >> 5], f1); d[2] = convolve8_funcs[offset_idx2](&s[offset2_q4 >> 5], f2); d[3] = convolve8_8_even_offset_ssse3(&s[2], f0); d[4] = convolve8_funcs[offset_idx1](&s[2 + (offset1_q4 >> 5)], f1); d[5] = convolve8_funcs[offset_idx2](&s[2 + (offset2_q4 >> 5)], f2); // 00 10 20 30 40 50 60 70 02 12 22 32 42 52 62 72 // 01 11 21 31 41 51 61 71 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 xx xx xx xx xx xx xx xx // 05 15 25 35 45 55 65 75 xx xx xx xx xx xx xx xx dd[0] = _mm_packus_epi16(d[0], d[2]); dd[1] = _mm_packus_epi16(d[1], d[3]); dd[2] = _mm_packus_epi16(d[4], d[4]); dd[3] = _mm_packus_epi16(d[5], d[5]); // 00 10 01 11 20 30 21 31 40 50 41 51 60 70 61 71 // 02 12 03 13 22 32 23 33 42 52 43 53 62 72 63 73 // 04 14 05 15 24 34 25 35 44 54 45 55 64 74 65 75 d[0] = _mm_unpacklo_epi16(dd[0], dd[1]); d[1] = _mm_unpackhi_epi16(dd[0], dd[1]); d[2] = _mm_unpacklo_epi16(dd[2], dd[3]); // 00 10 01 11 02 12 03 13 20 30 21 31 22 32 23 33 // 40 50 41 51 42 52 43 53 60 70 61 71 62 72 63 73 // 04 14 05 15 xx xx xx xx 24 34 25 35 xx xx xx xx // 44 54 45 55 xx xx xx xx 64 74 65 75 xx xx xx xx dd[0] = _mm_unpacklo_epi32(d[0], d[1]); dd[1] = _mm_unpackhi_epi32(d[0], d[1]); dd[2] = _mm_unpacklo_epi32(d[2], d[2]); dd[3] = _mm_unpackhi_epi32(d[2], d[2]); // 00 10 01 11 02 12 03 13 04 14 05 15 xx xx xx xx // 20 30 21 31 22 32 23 33 24 34 25 35 xx xx xx xx // 40 50 41 51 42 52 43 53 44 54 45 55 xx xx xx xx // 60 70 61 71 62 72 63 73 64 74 65 75 xx xx xx xx d[0] = _mm_unpacklo_epi64(dd[0], dd[2]); d[1] = _mm_unpackhi_epi64(dd[0], dd[2]); d[2] = _mm_unpacklo_epi64(dd[1], dd[3]); d[3] = _mm_unpackhi_epi64(dd[1], dd[3]); // store 4 extra pixels storeu_8bit_16x4(d, t, stride_hor); s[0] = s[4]; s[1] = s[5]; s[2] = s[6]; s[3] = s[7]; t += 12; x -= 6; } while (x); src += 8 * src_stride - 4 * width_hor / 3; t += 3 * stride_hor + 4; y -= 8; } while (y); // vertical 8x6 x = width_ver; t = temp_buffer; do { // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 loadu_8bit_16x4(t, stride_hor, s); y = height_ver; do { // 80 90 81 91 82 92 83 93 84 94 85 95 86 96 87 97 // A0 B0 A1 B1 A2 B2 A3 B3 A4 B4 A5 B5 A6 B6 A7 B7 // C0 D0 C1 D1 C2 D2 C3 D3 C4 D4 C5 D5 C6 D6 C7 D7 // E0 F0 E1 F1 E2 F2 E3 F3 E4 F4 E5 F5 E6 F6 E7 F7 t += 4 * stride_hor; loadu_8bit_16x4(t, stride_hor, &s[4]); d[0] = convolve8_8_even_offset_ssse3(&s[0], f0); d[1] = convolve8_funcs[offset_idx1](&s[offset1_q4 >> 5], f1); d[2] = convolve8_funcs[offset_idx2](&s[offset2_q4 >> 5], f2); d[3] = convolve8_8_even_offset_ssse3(&s[2], f0); d[4] = convolve8_funcs[offset_idx1](&s[2 + (offset1_q4 >> 5)], f1); d[5] = convolve8_funcs[offset_idx2](&s[2 + (offset2_q4 >> 5)], f2); // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 // 20 21 22 23 24 25 26 27 30 31 32 33 34 35 36 37 // 40 41 42 43 44 45 46 47 50 51 52 53 54 55 56 57 d[0] = _mm_packus_epi16(d[0], d[1]); d[2] = _mm_packus_epi16(d[2], d[3]); d[4] = _mm_packus_epi16(d[4], d[5]); _mm_storel_epi64((__m128i *)(dst + 0 * dst_stride), d[0]); _mm_storeh_epi64((__m128i *)(dst + 1 * dst_stride), d[0]); _mm_storel_epi64((__m128i *)(dst + 2 * dst_stride), d[2]); _mm_storeh_epi64((__m128i *)(dst + 3 * dst_stride), d[2]); _mm_storel_epi64((__m128i *)(dst + 4 * dst_stride), d[4]); _mm_storeh_epi64((__m128i *)(dst + 5 * dst_stride), d[4]); s[0] = s[4]; s[1] = s[5]; s[2] = s[6]; s[3] = s[7]; dst += 6 * dst_stride; y -= 6; } while (y); t -= stride_hor * 2 * height_ver / 3; t += 16; dst -= height_ver * dst_stride; dst += 8; x -= 8; } while (x); } static INLINE __m128i scale_1_to_2_phase_0_kernel(const __m128i *const s, const __m128i *const f) { __m128i ss[4], temp; ss[0] = _mm_unpacklo_epi8(s[0], s[1]); ss[1] = _mm_unpacklo_epi8(s[2], s[3]); ss[2] = _mm_unpacklo_epi8(s[4], s[5]); ss[3] = _mm_unpacklo_epi8(s[6], s[7]); temp = convolve8_8_ssse3(ss, f); return _mm_packus_epi16(temp, temp); } // Only calculate odd columns since even columns are just src pixels' copies. static void scale_1_to_2_phase_0_row(const uint8_t *src, uint8_t *dst, const int w, const __m128i *const f) { int x = w; do { __m128i s[8], temp; s[0] = _mm_loadl_epi64((const __m128i *)(src + 0)); s[1] = _mm_loadl_epi64((const __m128i *)(src + 1)); s[2] = _mm_loadl_epi64((const __m128i *)(src + 2)); s[3] = _mm_loadl_epi64((const __m128i *)(src + 3)); s[4] = _mm_loadl_epi64((const __m128i *)(src + 4)); s[5] = _mm_loadl_epi64((const __m128i *)(src + 5)); s[6] = _mm_loadl_epi64((const __m128i *)(src + 6)); s[7] = _mm_loadl_epi64((const __m128i *)(src + 7)); temp = scale_1_to_2_phase_0_kernel(s, f); _mm_storel_epi64((__m128i *)dst, temp); src += 8; dst += 8; x -= 8; } while (x); } static void scale_plane_1_to_2_phase_0(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const int src_w, const int src_h, const int16_t *const coef, uint8_t *const temp_buffer) { int max_width; int y; uint8_t *tmp[9]; __m128i f[4]; max_width = (src_w + 7) & ~7; tmp[0] = temp_buffer + 0 * max_width; tmp[1] = temp_buffer + 1 * max_width; tmp[2] = temp_buffer + 2 * max_width; tmp[3] = temp_buffer + 3 * max_width; tmp[4] = temp_buffer + 4 * max_width; tmp[5] = temp_buffer + 5 * max_width; tmp[6] = temp_buffer + 6 * max_width; tmp[7] = temp_buffer + 7 * max_width; shuffle_filter_ssse3(coef, f); scale_1_to_2_phase_0_row(src - 3 * src_stride - 3, tmp[0], max_width, f); scale_1_to_2_phase_0_row(src - 2 * src_stride - 3, tmp[1], max_width, f); scale_1_to_2_phase_0_row(src - 1 * src_stride - 3, tmp[2], max_width, f); scale_1_to_2_phase_0_row(src + 0 * src_stride - 3, tmp[3], max_width, f); scale_1_to_2_phase_0_row(src + 1 * src_stride - 3, tmp[4], max_width, f); scale_1_to_2_phase_0_row(src + 2 * src_stride - 3, tmp[5], max_width, f); scale_1_to_2_phase_0_row(src + 3 * src_stride - 3, tmp[6], max_width, f); y = src_h; do { int x; scale_1_to_2_phase_0_row(src + 4 * src_stride - 3, tmp[7], max_width, f); for (x = 0; x < max_width; x += 8) { __m128i s[8], C, D, CD; // Even rows const __m128i a = _mm_loadl_epi64((const __m128i *)(src + x)); const __m128i b = _mm_loadl_epi64((const __m128i *)(tmp[3] + x)); const __m128i ab = _mm_unpacklo_epi8(a, b); _mm_storeu_si128((__m128i *)(dst + 2 * x), ab); // Odd rows // Even columns load_8bit_8x8(src + x - 3 * src_stride, src_stride, s); C = scale_1_to_2_phase_0_kernel(s, f); // Odd columns s[0] = _mm_loadl_epi64((const __m128i *)(tmp[0] + x)); s[1] = _mm_loadl_epi64((const __m128i *)(tmp[1] + x)); s[2] = _mm_loadl_epi64((const __m128i *)(tmp[2] + x)); s[3] = _mm_loadl_epi64((const __m128i *)(tmp[3] + x)); s[4] = _mm_loadl_epi64((const __m128i *)(tmp[4] + x)); s[5] = _mm_loadl_epi64((const __m128i *)(tmp[5] + x)); s[6] = _mm_loadl_epi64((const __m128i *)(tmp[6] + x)); s[7] = _mm_loadl_epi64((const __m128i *)(tmp[7] + x)); D = scale_1_to_2_phase_0_kernel(s, f); CD = _mm_unpacklo_epi8(C, D); _mm_storeu_si128((__m128i *)(dst + dst_stride + 2 * x), CD); } src += src_stride; dst += 2 * dst_stride; tmp[8] = tmp[0]; tmp[0] = tmp[1]; tmp[1] = tmp[2]; tmp[2] = tmp[3]; tmp[3] = tmp[4]; tmp[4] = tmp[5]; tmp[5] = tmp[6]; tmp[6] = tmp[7]; tmp[7] = tmp[8]; } while (--y); } void vp9_scale_and_extend_frame_ssse3(const YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, uint8_t filter_type, int phase_scaler) { const int src_w = src->y_crop_width; const int src_h = src->y_crop_height; const int dst_w = dst->y_crop_width; const int dst_h = dst->y_crop_height; const int dst_uv_w = dst_w / 2; const int dst_uv_h = dst_h / 2; int scaled = 0; // phase_scaler is usually 0 or 8. assert(phase_scaler >= 0 && phase_scaler < 16); if (dst_w * 2 == src_w && dst_h * 2 == src_h) { // 2 to 1 scaled = 1; if (phase_scaler == 0) { scale_plane_2_to_1_phase_0(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h); scale_plane_2_to_1_phase_0(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); scale_plane_2_to_1_phase_0(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); } else if (filter_type == BILINEAR) { const int16_t c0 = vp9_filter_kernels[BILINEAR][phase_scaler][3]; const int16_t c1 = vp9_filter_kernels[BILINEAR][phase_scaler][4]; const __m128i c0c1 = _mm_set1_epi16(c0 | (c1 << 8)); // c0 and c1 >= 0 scale_plane_2_to_1_bilinear(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, c0c1); scale_plane_2_to_1_bilinear(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0c1); scale_plane_2_to_1_bilinear(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0c1); } else { const int buffer_stride = (dst_w + 3) & ~3; const int buffer_height = (2 * dst_h + SUBPEL_TAPS - 2 + 7) & ~7; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height); if (temp_buffer) { scale_plane_2_to_1_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_2_to_1_general( src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_2_to_1_general( src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); free(temp_buffer); } else { scaled = 0; } } } else if (4 * dst_w == src_w && 4 * dst_h == src_h) { // 4 to 1 scaled = 1; if (phase_scaler == 0) { scale_plane_4_to_1_phase_0(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h); scale_plane_4_to_1_phase_0(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); scale_plane_4_to_1_phase_0(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h); } else if (filter_type == BILINEAR) { const int16_t c0 = vp9_filter_kernels[BILINEAR][phase_scaler][3]; const int16_t c1 = vp9_filter_kernels[BILINEAR][phase_scaler][4]; const __m128i c0c1 = _mm_set1_epi16(c0 | (c1 << 8)); // c0 and c1 >= 0 scale_plane_4_to_1_bilinear(src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, c0c1); scale_plane_4_to_1_bilinear(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0c1); scale_plane_4_to_1_bilinear(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, c0c1); } else { const int buffer_stride = (dst_w + 1) & ~1; const int buffer_height = (4 * dst_h + SUBPEL_TAPS - 2 + 7) & ~7; // When dst_w is 1 or 2, we need extra padding to avoid heap read overflow const int extra_padding = 16; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_stride * buffer_height + extra_padding); if (temp_buffer) { scale_plane_4_to_1_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_4_to_1_general( src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); scale_plane_4_to_1_general( src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type][phase_scaler], temp_buffer); free(temp_buffer); } else { scaled = 0; } } } else if (4 * dst_w == 3 * src_w && 4 * dst_h == 3 * src_h) { // 4 to 3 const int buffer_stride_hor = (dst_w + 5) - ((dst_w + 5) % 6) + 2; const int buffer_stride_ver = (dst_w + 7) & ~7; const int buffer_height = (4 * dst_h / 3 + SUBPEL_TAPS - 1 + 7) & ~7; // When the vertical filter reads more pixels than the horizontal filter // generated in each row, we need extra padding to avoid heap read overflow. // For example, the horizontal filter generates 18 pixels but the vertical // filter reads 24 pixels in a row. The difference is multiplied by 2 since // two rows are interlaced together in the optimization. const int extra_padding = (buffer_stride_ver > buffer_stride_hor) ? 2 * (buffer_stride_ver - buffer_stride_hor) : 0; const int buffer_size = buffer_stride_hor * buffer_height + extra_padding; uint8_t *const temp_buffer = (uint8_t *)malloc(buffer_size); if (temp_buffer) { scaled = 1; scale_plane_4_to_3_general( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, dst_w, dst_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); scale_plane_4_to_3_general(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); scale_plane_4_to_3_general(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, dst_uv_w, dst_uv_h, vp9_filter_kernels[filter_type], phase_scaler, temp_buffer); free(temp_buffer); } } else if (dst_w == src_w * 2 && dst_h == src_h * 2 && phase_scaler == 0) { // 1 to 2 uint8_t *const temp_buffer = (uint8_t *)malloc(8 * ((src_w + 7) & ~7)); if (temp_buffer) { scaled = 1; scale_plane_1_to_2_phase_0( src->y_buffer, src->y_stride, dst->y_buffer, dst->y_stride, src_w, src_h, vp9_filter_kernels[filter_type][8], temp_buffer); scale_plane_1_to_2_phase_0(src->u_buffer, src->uv_stride, dst->u_buffer, dst->uv_stride, src_w / 2, src_h / 2, vp9_filter_kernels[filter_type][8], temp_buffer); scale_plane_1_to_2_phase_0(src->v_buffer, src->uv_stride, dst->v_buffer, dst->uv_stride, src_w / 2, src_h / 2, vp9_filter_kernels[filter_type][8], temp_buffer); free(temp_buffer); } } if (scaled) { vpx_extend_frame_borders(dst); } else { // Call c version for all other scaling ratios. vp9_scale_and_extend_frame_c(src, dst, filter_type, phase_scaler); } } libvpx-1.8.2/vp9/encoder/x86/vp9_highbd_block_error_intrin_sse2.c000066400000000000000000000056171357355204000247160ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vp9_rtcd.h" #include "vp9/common/vp9_common.h" int64_t vp9_highbd_block_error_sse2(const tran_low_t *coeff, const tran_low_t *dqcoeff, intptr_t block_size, int64_t *ssz, int bd) { int i, j, test; uint32_t temp[4]; __m128i max, min, cmp0, cmp1, cmp2, cmp3; int64_t error = 0, sqcoeff = 0; const int shift = 2 * (bd - 8); const int rounding = shift > 0 ? 1 << (shift - 1) : 0; for (i = 0; i < block_size; i += 8) { // Load the data into xmm registers __m128i mm_coeff = _mm_load_si128((const __m128i *)(coeff + i)); __m128i mm_coeff2 = _mm_load_si128((const __m128i *)(coeff + i + 4)); __m128i mm_dqcoeff = _mm_load_si128((const __m128i *)(dqcoeff + i)); __m128i mm_dqcoeff2 = _mm_load_si128((const __m128i *)(dqcoeff + i + 4)); // Check if any values require more than 15 bit max = _mm_set1_epi32(0x3fff); min = _mm_set1_epi32((int32_t)0xffffc000); cmp0 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff, max), _mm_cmplt_epi32(mm_coeff, min)); cmp1 = _mm_xor_si128(_mm_cmpgt_epi32(mm_coeff2, max), _mm_cmplt_epi32(mm_coeff2, min)); cmp2 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff, max), _mm_cmplt_epi32(mm_dqcoeff, min)); cmp3 = _mm_xor_si128(_mm_cmpgt_epi32(mm_dqcoeff2, max), _mm_cmplt_epi32(mm_dqcoeff2, min)); test = _mm_movemask_epi8( _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3))); if (!test) { __m128i mm_diff, error_sse2, sqcoeff_sse2; mm_coeff = _mm_packs_epi32(mm_coeff, mm_coeff2); mm_dqcoeff = _mm_packs_epi32(mm_dqcoeff, mm_dqcoeff2); mm_diff = _mm_sub_epi16(mm_coeff, mm_dqcoeff); error_sse2 = _mm_madd_epi16(mm_diff, mm_diff); sqcoeff_sse2 = _mm_madd_epi16(mm_coeff, mm_coeff); _mm_storeu_si128((__m128i *)temp, error_sse2); error = error + temp[0] + temp[1] + temp[2] + temp[3]; _mm_storeu_si128((__m128i *)temp, sqcoeff_sse2); sqcoeff += temp[0] + temp[1] + temp[2] + temp[3]; } else { for (j = 0; j < 8; j++) { const int64_t diff = coeff[i + j] - dqcoeff[i + j]; error += diff * diff; sqcoeff += (int64_t)coeff[i + j] * (int64_t)coeff[i + j]; } } } assert(error >= 0 && sqcoeff >= 0); error = (error + rounding) >> shift; sqcoeff = (sqcoeff + rounding) >> shift; *ssz = sqcoeff; return error; } libvpx-1.8.2/vp9/encoder/x86/vp9_quantize_avx2.c000066400000000000000000000117231357355204000213620ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include // AVX2 #include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/bitdepth_conversion_avx2.h" #include "vpx_dsp/x86/quantize_sse2.h" // Zero fill 8 positions in the output buffer. static INLINE void store_zero_tran_low(tran_low_t *a) { const __m256i zero = _mm256_setzero_si256(); #if CONFIG_VP9_HIGHBITDEPTH _mm256_storeu_si256((__m256i *)(a), zero); _mm256_storeu_si256((__m256i *)(a + 8), zero); #else _mm256_storeu_si256((__m256i *)(a), zero); #endif } static INLINE __m256i scan_eob_256(const __m256i *iscan_ptr, __m256i *coeff256) { const __m256i iscan = _mm256_loadu_si256(iscan_ptr); const __m256i zero256 = _mm256_setzero_si256(); #if CONFIG_VP9_HIGHBITDEPTH // The _mm256_packs_epi32() in load_tran_low() packs the 64 bit coeff as // B1 A1 B0 A0. Shuffle to B1 B0 A1 A0 in order to scan eob correctly. const __m256i _coeff256 = _mm256_permute4x64_epi64(*coeff256, 0xd8); const __m256i zero_coeff0 = _mm256_cmpeq_epi16(_coeff256, zero256); #else const __m256i zero_coeff0 = _mm256_cmpeq_epi16(*coeff256, zero256); #endif const __m256i nzero_coeff0 = _mm256_cmpeq_epi16(zero_coeff0, zero256); // Add one to convert from indices to counts const __m256i iscan_plus_one = _mm256_sub_epi16(iscan, nzero_coeff0); return _mm256_and_si256(iscan_plus_one, nzero_coeff0); } void vp9_quantize_fp_avx2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { __m128i eob; __m256i round256, quant256, dequant256; __m256i eob256, thr256; (void)scan; (void)skip_block; assert(!skip_block); coeff_ptr += n_coeffs; iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; { __m256i coeff256; // Setup global values { const __m128i round = _mm_load_si128((const __m128i *)round_ptr); const __m128i quant = _mm_load_si128((const __m128i *)quant_ptr); const __m128i dequant = _mm_load_si128((const __m128i *)dequant_ptr); round256 = _mm256_castsi128_si256(round); round256 = _mm256_permute4x64_epi64(round256, 0x54); quant256 = _mm256_castsi128_si256(quant); quant256 = _mm256_permute4x64_epi64(quant256, 0x54); dequant256 = _mm256_castsi128_si256(dequant); dequant256 = _mm256_permute4x64_epi64(dequant256, 0x54); } { __m256i qcoeff256; __m256i qtmp256; coeff256 = load_tran_low(coeff_ptr + n_coeffs); qcoeff256 = _mm256_abs_epi16(coeff256); qcoeff256 = _mm256_adds_epi16(qcoeff256, round256); qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256); qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256); store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs); coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256); store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); } eob256 = scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256); n_coeffs += 8 * 2; } // remove dc constants dequant256 = _mm256_permute2x128_si256(dequant256, dequant256, 0x31); quant256 = _mm256_permute2x128_si256(quant256, quant256, 0x31); round256 = _mm256_permute2x128_si256(round256, round256, 0x31); thr256 = _mm256_srai_epi16(dequant256, 1); // AC only loop while (n_coeffs < 0) { __m256i coeff256 = load_tran_low(coeff_ptr + n_coeffs); __m256i qcoeff256 = _mm256_abs_epi16(coeff256); int32_t nzflag = _mm256_movemask_epi8(_mm256_cmpgt_epi16(qcoeff256, thr256)); if (nzflag) { __m256i qtmp256; qcoeff256 = _mm256_adds_epi16(qcoeff256, round256); qtmp256 = _mm256_mulhi_epi16(qcoeff256, quant256); qcoeff256 = _mm256_sign_epi16(qtmp256, coeff256); store_tran_low(qcoeff256, qcoeff_ptr + n_coeffs); coeff256 = _mm256_mullo_epi16(qcoeff256, dequant256); store_tran_low(coeff256, dqcoeff_ptr + n_coeffs); eob256 = _mm256_max_epi16( eob256, scan_eob_256((const __m256i *)(iscan + n_coeffs), &coeff256)); } else { store_zero_tran_low(qcoeff_ptr + n_coeffs); store_zero_tran_low(dqcoeff_ptr + n_coeffs); } n_coeffs += 8 * 2; } eob = _mm_max_epi16(_mm256_castsi256_si128(eob256), _mm256_extracti128_si256(eob256, 1)); *eob_ptr = accumulate_eob(eob); } libvpx-1.8.2/vp9/encoder/x86/vp9_quantize_sse2.c000066400000000000000000000156101357355204000213550ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vp9_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" void vp9_quantize_fp_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t *quant_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { __m128i zero; __m128i thr; int nzflag; __m128i eob; __m128i round, quant, dequant; (void)scan; (void)skip_block; assert(!skip_block); coeff_ptr += n_coeffs; iscan += n_coeffs; qcoeff_ptr += n_coeffs; dqcoeff_ptr += n_coeffs; n_coeffs = -n_coeffs; zero = _mm_setzero_si128(); { __m128i coeff0, coeff1; // Setup global values { round = _mm_load_si128((const __m128i *)round_ptr); quant = _mm_load_si128((const __m128i *)quant_ptr); dequant = _mm_load_si128((const __m128i *)dequant_ptr); } { __m128i coeff0_sign, coeff1_sign; __m128i qcoeff0, qcoeff1; __m128i qtmp0, qtmp1; // Do DC and first 15 AC coeff0 = load_tran_low(coeff_ptr + n_coeffs); coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); coeff1_sign = _mm_srai_epi16(coeff1, 15); qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); qcoeff0 = _mm_adds_epi16(qcoeff0, round); round = _mm_unpackhi_epi64(round, round); qcoeff1 = _mm_adds_epi16(qcoeff1, round); qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); quant = _mm_unpackhi_epi64(quant, quant); qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); // Reinsert signs qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); dequant = _mm_unpackhi_epi64(dequant, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); } { // Scan for eob __m128i zero_coeff0, zero_coeff1; __m128i nzero_coeff0, nzero_coeff1; __m128i iscan0, iscan1; __m128i eob1; zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); eob = _mm_and_si128(iscan0, nzero_coeff0); eob1 = _mm_and_si128(iscan1, nzero_coeff1); eob = _mm_max_epi16(eob, eob1); } n_coeffs += 8 * 2; } thr = _mm_srai_epi16(dequant, 1); // AC only loop while (n_coeffs < 0) { __m128i coeff0, coeff1; { __m128i coeff0_sign, coeff1_sign; __m128i qcoeff0, qcoeff1; __m128i qtmp0, qtmp1; coeff0 = load_tran_low(coeff_ptr + n_coeffs); coeff1 = load_tran_low(coeff_ptr + n_coeffs + 8); // Poor man's sign extract coeff0_sign = _mm_srai_epi16(coeff0, 15); coeff1_sign = _mm_srai_epi16(coeff1, 15); qcoeff0 = _mm_xor_si128(coeff0, coeff0_sign); qcoeff1 = _mm_xor_si128(coeff1, coeff1_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); nzflag = _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff0, thr)) | _mm_movemask_epi8(_mm_cmpgt_epi16(qcoeff1, thr)); if (nzflag) { qcoeff0 = _mm_adds_epi16(qcoeff0, round); qcoeff1 = _mm_adds_epi16(qcoeff1, round); qtmp0 = _mm_mulhi_epi16(qcoeff0, quant); qtmp1 = _mm_mulhi_epi16(qcoeff1, quant); // Reinsert signs qcoeff0 = _mm_xor_si128(qtmp0, coeff0_sign); qcoeff1 = _mm_xor_si128(qtmp1, coeff1_sign); qcoeff0 = _mm_sub_epi16(qcoeff0, coeff0_sign); qcoeff1 = _mm_sub_epi16(qcoeff1, coeff1_sign); store_tran_low(qcoeff0, qcoeff_ptr + n_coeffs); store_tran_low(qcoeff1, qcoeff_ptr + n_coeffs + 8); coeff0 = _mm_mullo_epi16(qcoeff0, dequant); coeff1 = _mm_mullo_epi16(qcoeff1, dequant); store_tran_low(coeff0, dqcoeff_ptr + n_coeffs); store_tran_low(coeff1, dqcoeff_ptr + n_coeffs + 8); } else { store_zero_tran_low(qcoeff_ptr + n_coeffs); store_zero_tran_low(qcoeff_ptr + n_coeffs + 8); store_zero_tran_low(dqcoeff_ptr + n_coeffs); store_zero_tran_low(dqcoeff_ptr + n_coeffs + 8); } } if (nzflag) { // Scan for eob __m128i zero_coeff0, zero_coeff1; __m128i nzero_coeff0, nzero_coeff1; __m128i iscan0, iscan1; __m128i eob0, eob1; zero_coeff0 = _mm_cmpeq_epi16(coeff0, zero); zero_coeff1 = _mm_cmpeq_epi16(coeff1, zero); nzero_coeff0 = _mm_cmpeq_epi16(zero_coeff0, zero); nzero_coeff1 = _mm_cmpeq_epi16(zero_coeff1, zero); iscan0 = _mm_load_si128((const __m128i *)(iscan + n_coeffs)); iscan1 = _mm_load_si128((const __m128i *)(iscan + n_coeffs) + 1); // Add one to convert from indices to counts iscan0 = _mm_sub_epi16(iscan0, nzero_coeff0); iscan1 = _mm_sub_epi16(iscan1, nzero_coeff1); eob0 = _mm_and_si128(iscan0, nzero_coeff0); eob1 = _mm_and_si128(iscan1, nzero_coeff1); eob0 = _mm_max_epi16(eob0, eob1); eob = _mm_max_epi16(eob, eob0); } n_coeffs += 8 * 2; } // Accumulate EOB { __m128i eob_shuffled; eob_shuffled = _mm_shuffle_epi32(eob, 0xe); eob = _mm_max_epi16(eob, eob_shuffled); eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); eob = _mm_max_epi16(eob, eob_shuffled); eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); eob = _mm_max_epi16(eob, eob_shuffled); *eob_ptr = _mm_extract_epi16(eob, 1); } } libvpx-1.8.2/vp9/encoder/x86/vp9_quantize_ssse3_x86_64.asm000066400000000000000000000170601357355204000231160ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %define private_prefix vp9 %include "third_party/x86inc/x86inc.asm" %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION_RODATA pw_1: times 8 dw 1 SECTION .text %macro QUANTIZE_FP 2 cglobal quantize_%1, 0, %2, 15, coeff, ncoeff, skip, round, quant, \ qcoeff, dqcoeff, dequant, \ eob, scan, iscan ; actual quantize loop - setup pointers, rounders, etc. movifnidn coeffq, coeffmp movifnidn ncoeffq, ncoeffmp mov r2, dequantmp movifnidn roundq, roundmp movifnidn quantq, quantmp mova m1, [roundq] ; m1 = round mova m2, [quantq] ; m2 = quant %ifidn %1, fp_32x32 pcmpeqw m5, m5 psrlw m5, 15 paddw m1, m5 psrlw m1, 1 ; m1 = (m1 + 1) / 2 %endif mova m3, [r2q] ; m3 = dequant mov r3, qcoeffmp mov r4, dqcoeffmp mov r5, iscanmp %ifidn %1, fp_32x32 psllw m2, 1 %endif pxor m5, m5 ; m5 = dedicated zero INCREMENT_ELEMENTS_TRAN_LOW coeffq, ncoeffq lea r5q, [r5q+ncoeffq*2] INCREMENT_ELEMENTS_TRAN_LOW r3q, ncoeffq INCREMENT_ELEMENTS_TRAN_LOW r4q, ncoeffq neg ncoeffq ; get DC and first 15 AC coeffs LOAD_TRAN_LOW 9, coeffq, ncoeffq ; m9 = c[i] LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8 ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) pcmpeqw m7, m7 paddsw m6, m1 ; m6 += round punpckhqdq m1, m1 paddsw m11, m1 ; m11 += round pmulhw m8, m6, m2 ; m8 = m6*q>>16 punpckhqdq m2, m2 pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m8, m9 ; m8 = reinsert sign psignw m13, m10 ; m13 = reinsert sign STORE_TRAN_LOW 8, r3q, ncoeffq, 6, 11, 12 STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12 %ifidn %1, fp_32x32 pabsw m8, m8 pabsw m13, m13 %endif pmullw m8, m3 ; r4[i] = r3[i] * q punpckhqdq m3, m3 pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m8, 1 psrlw m13, 1 psignw m8, m9 psignw m13, m10 psrlw m0, m3, 2 %else psrlw m0, m3, 1 %endif STORE_TRAN_LOW 8, r4q, ncoeffq, 6, 11, 12 STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12 pcmpeqw m8, m5 ; m8 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m8, m6 ; m8 = max(eob) pandn m13, m11 ; m13 = max(eob) pmaxsw m8, m13 add ncoeffq, mmsize jz .accumulate_eob .ac_only_loop: LOAD_TRAN_LOW 9, coeffq, ncoeffq ; m9 = c[i] LOAD_TRAN_LOW 10, coeffq, ncoeffq + 8 ; m10 = c[i] pabsw m6, m9 ; m6 = abs(m9) pabsw m11, m10 ; m11 = abs(m10) pcmpgtw m7, m6, m0 pcmpgtw m12, m11, m0 pmovmskb r6d, m7 pmovmskb r2d, m12 or r6, r2 jz .skip_iter pcmpeqw m7, m7 paddsw m6, m1 ; m6 += round paddsw m11, m1 ; m11 += round pmulhw m14, m6, m2 ; m14 = m6*q>>16 pmulhw m13, m11, m2 ; m13 = m11*q>>16 psignw m14, m9 ; m14 = reinsert sign psignw m13, m10 ; m13 = reinsert sign STORE_TRAN_LOW 14, r3q, ncoeffq, 6, 11, 12 STORE_TRAN_LOW 13, r3q, ncoeffq + 8, 6, 11, 12 %ifidn %1, fp_32x32 pabsw m14, m14 pabsw m13, m13 %endif pmullw m14, m3 ; r4[i] = r3[i] * q pmullw m13, m3 ; r4[i] = r3[i] * q %ifidn %1, fp_32x32 psrlw m14, 1 psrlw m13, 1 psignw m14, m9 psignw m13, m10 %endif STORE_TRAN_LOW 14, r4q, ncoeffq, 6, 11, 12 STORE_TRAN_LOW 13, r4q, ncoeffq + 8, 6, 11, 12 pcmpeqw m14, m5 ; m14 = c[i] == 0 pcmpeqw m13, m5 ; m13 = c[i] == 0 mova m6, [ r5q+ncoeffq*2+ 0] ; m6 = scan[i] mova m11, [ r5q+ncoeffq*2+16] ; m11 = scan[i] psubw m6, m7 ; m6 = scan[i] + 1 psubw m11, m7 ; m11 = scan[i] + 1 pandn m14, m6 ; m14 = max(eob) pandn m13, m11 ; m13 = max(eob) pmaxsw m8, m14 pmaxsw m8, m13 add ncoeffq, mmsize jl .ac_only_loop jmp .accumulate_eob .skip_iter: STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq STORE_ZERO_TRAN_LOW 5, r3q, ncoeffq + 8 STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq STORE_ZERO_TRAN_LOW 5, r4q, ncoeffq + 8 add ncoeffq, mmsize jl .ac_only_loop .accumulate_eob: ; horizontally accumulate/max eobs and write into [eob] memory pointer mov r2, eobmp pshufd m7, m8, 0xe pmaxsw m8, m7 pshuflw m7, m8, 0xe pmaxsw m8, m7 pshuflw m7, m8, 0x1 pmaxsw m8, m7 pextrw r6, m8, 0 mov [r2], r6w RET %endmacro INIT_XMM ssse3 QUANTIZE_FP fp, 7 QUANTIZE_FP fp_32x32, 7 libvpx-1.8.2/vp9/exports_dec000066400000000000000000000000611357355204000160070ustar00rootroot00000000000000data vpx_codec_vp9_dx_algo text vpx_codec_vp9_dx libvpx-1.8.2/vp9/exports_enc000066400000000000000000000000611357355204000160210ustar00rootroot00000000000000data vpx_codec_vp9_cx_algo text vpx_codec_vp9_cx libvpx-1.8.2/vp9/simple_encode.cc000066400000000000000000000271611357355204000166740ustar00rootroot00000000000000#include #include "vp9/common/vp9_onyxc_int.h" #include "vp9/vp9_iface_common.h" #include "vp9/encoder/vp9_encoder.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/simple_encode.h" #include "vp9/vp9_cx_iface.h" namespace vp9 { // TODO(angiebird): Merge this function with vpx_img_plane_width() static int img_plane_width(const vpx_image_t *img, int plane) { if (plane > 0 && img->x_chroma_shift > 0) return (img->d_w + 1) >> img->x_chroma_shift; else return img->d_w; } // TODO(angiebird): Merge this function with vpx_img_plane_height() static int img_plane_height(const vpx_image_t *img, int plane) { if (plane > 0 && img->y_chroma_shift > 0) return (img->d_h + 1) >> img->y_chroma_shift; else return img->d_h; } // TODO(angiebird): Merge this function with vpx_img_read() static int img_read(vpx_image_t *img, FILE *file) { int plane; for (plane = 0; plane < 3; ++plane) { unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = img_plane_width(img, plane) * ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); const int h = img_plane_height(img, plane); int y; for (y = 0; y < h; ++y) { if (fread(buf, 1, w, file) != (size_t)w) return 0; buf += stride; } } return 1; } class SimpleEncode::EncodeImpl { public: VP9_COMP *cpi; vpx_img_fmt_t img_fmt; vpx_image_t tmp_img; std::vector first_pass_stats; }; static VP9_COMP *init_encoder(const VP9EncoderConfig *oxcf, vpx_img_fmt_t img_fmt) { VP9_COMP *cpi; BufferPool *buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(*buffer_pool)); vp9_initialize_enc(); cpi = vp9_create_compressor(oxcf, buffer_pool); vp9_update_compressor_with_img_fmt(cpi, img_fmt); return cpi; } static void free_encoder(VP9_COMP *cpi) { BufferPool *buffer_pool = cpi->common.buffer_pool; vp9_remove_compressor(cpi); // buffer_pool needs to be free after cpi because buffer_pool contains // allocated buffers that will be free in vp9_remove_compressor() vpx_free(buffer_pool); } static INLINE vpx_rational_t make_vpx_rational(int num, int den) { vpx_rational_t v; v.num = num; v.den = den; return v; } static INLINE FrameType get_frame_type_from_update_type(FRAME_UPDATE_TYPE update_type) { // TODO(angiebird): Figure out if we need frame type other than key frame, // alternate reference and inter frame switch (update_type) { case KF_UPDATE: return kKeyFrame; break; case ARF_UPDATE: return kAlternateReference; break; default: return kInterFrame; break; } } static void update_encode_frame_result( EncodeFrameResult *encode_frame_result, const ENCODE_FRAME_RESULT *encode_frame_info) { encode_frame_result->coding_data_bit_size = encode_frame_result->coding_data_byte_size * 8; encode_frame_result->show_idx = encode_frame_info->show_idx; encode_frame_result->frame_type = get_frame_type_from_update_type(encode_frame_info->update_type); encode_frame_result->psnr = encode_frame_info->psnr; encode_frame_result->sse = encode_frame_info->sse; encode_frame_result->quantize_index = encode_frame_info->quantize_index; } SimpleEncode::SimpleEncode(int frame_width, int frame_height, int frame_rate_num, int frame_rate_den, int target_bitrate, int num_frames, const char *infile_path) { impl_ptr_ = std::unique_ptr(new EncodeImpl()); frame_width_ = frame_width; frame_height_ = frame_height; frame_rate_num_ = frame_rate_num; frame_rate_den_ = frame_rate_den; target_bitrate_ = target_bitrate; num_frames_ = num_frames; // TODO(angirbid): Should we keep a file pointer here or keep the file_path? file_ = fopen(infile_path, "r"); impl_ptr_->cpi = NULL; impl_ptr_->img_fmt = VPX_IMG_FMT_I420; } void SimpleEncode::ComputeFirstPassStats() { vpx_rational_t frame_rate = make_vpx_rational(frame_rate_num_, frame_rate_den_); const VP9EncoderConfig oxcf = vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, target_bitrate_, VPX_RC_FIRST_PASS); VP9_COMP *cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); struct lookahead_ctx *lookahead = cpi->lookahead; int i; int use_highbitdepth = 0; #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth = cpi->common.use_highbitdepth; #endif vpx_image_t img; vpx_img_alloc(&img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1); rewind(file_); impl_ptr_->first_pass_stats.clear(); for (i = 0; i < num_frames_; ++i) { assert(!vp9_lookahead_full(lookahead)); if (img_read(&img, file_)) { int next_show_idx = vp9_lookahead_next_show_idx(lookahead); int64_t ts_start = timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx); int64_t ts_end = timebase_units_to_ticks(&oxcf.g_timebase_in_ts, next_show_idx + 1); YV12_BUFFER_CONFIG sd; image2yuvconfig(&img, &sd); vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); { int64_t time_stamp; int64_t time_end; int flush = 1; // Makes vp9_get_compressed_data process a frame size_t size; unsigned int frame_flags = 0; ENCODE_FRAME_RESULT encode_frame_info; // TODO(angiebird): Call vp9_first_pass directly vp9_get_compressed_data(cpi, &frame_flags, &size, NULL, &time_stamp, &time_end, flush, &encode_frame_info); // vp9_get_compressed_data only generates first pass stats not // compresses data assert(size == 0); } impl_ptr_->first_pass_stats.push_back(vp9_get_frame_stats(&cpi->twopass)); } } vp9_end_first_pass(cpi); // TODO(angiebird): Store the total_stats apart form first_pass_stats impl_ptr_->first_pass_stats.push_back(vp9_get_total_stats(&cpi->twopass)); free_encoder(cpi); rewind(file_); vpx_img_free(&img); } std::vector> SimpleEncode::ObserveFirstPassStats() { std::vector> output_stats; // TODO(angiebird): This function make several assumptions of // FIRSTPASS_STATS. 1) All elements in FIRSTPASS_STATS are double except the // last one. 2) The last entry of first_pass_stats is the total_stats. // Change the code structure, so that we don't have to make these assumptions // Note the last entry of first_pass_stats is the total_stats, we don't need // it. for (size_t i = 0; i < impl_ptr_->first_pass_stats.size() - 1; ++i) { double *buf_start = reinterpret_cast(&impl_ptr_->first_pass_stats[i]); // We use - 1 here because the last member in FIRSTPASS_STATS is not double double *buf_end = buf_start + sizeof(impl_ptr_->first_pass_stats[i]) / sizeof(*buf_end) - 1; std::vector this_stats(buf_start, buf_end); output_stats.push_back(this_stats); } return output_stats; } void SimpleEncode::StartEncode() { assert(impl_ptr_->first_pass_stats.size() > 0); vpx_rational_t frame_rate = make_vpx_rational(frame_rate_num_, frame_rate_den_); VP9EncoderConfig oxcf = vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, target_bitrate_, VPX_RC_LAST_PASS); vpx_fixed_buf_t stats; stats.buf = impl_ptr_->first_pass_stats.data(); stats.sz = sizeof(impl_ptr_->first_pass_stats[0]) * impl_ptr_->first_pass_stats.size(); vp9_set_first_pass_stats(&oxcf, &stats); assert(impl_ptr_->cpi == NULL); impl_ptr_->cpi = init_encoder(&oxcf, impl_ptr_->img_fmt); vpx_img_alloc(&impl_ptr_->tmp_img, impl_ptr_->img_fmt, frame_width_, frame_height_, 1); rewind(file_); } void SimpleEncode::EndEncode() { free_encoder(impl_ptr_->cpi); impl_ptr_->cpi = nullptr; vpx_img_free(&impl_ptr_->tmp_img); rewind(file_); } int SimpleEncode::GetKeyFrameGroupSize(int key_frame_index) const { const VP9_COMP *cpi = impl_ptr_->cpi; return vp9_get_frames_to_next_key(&cpi->oxcf, &cpi->frame_info, &cpi->twopass.first_pass_info, key_frame_index, cpi->rc.min_gf_interval); } void SimpleEncode::EncodeFrame(EncodeFrameResult *encode_frame_result) { VP9_COMP *cpi = impl_ptr_->cpi; struct lookahead_ctx *lookahead = cpi->lookahead; int use_highbitdepth = 0; #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth = cpi->common.use_highbitdepth; #endif // The lookahead's size is set to oxcf->lag_in_frames. // We want to fill lookahead to it's max capacity if possible so that the // encoder can construct alt ref frame in time. // In the other words, we hope vp9_get_compressed_data to encode a frame // every time in the function while (!vp9_lookahead_full(lookahead)) { // TODO(angiebird): Check whether we can move this file read logics to // lookahead if (img_read(&impl_ptr_->tmp_img, file_)) { int next_show_idx = vp9_lookahead_next_show_idx(lookahead); int64_t ts_start = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx); int64_t ts_end = timebase_units_to_ticks(&cpi->oxcf.g_timebase_in_ts, next_show_idx + 1); YV12_BUFFER_CONFIG sd; image2yuvconfig(&impl_ptr_->tmp_img, &sd); vp9_lookahead_push(lookahead, &sd, ts_start, ts_end, use_highbitdepth, 0); } else { break; } } assert(encode_frame_result->coding_data.get() == nullptr); const size_t max_coding_data_byte_size = frame_width_ * frame_height_ * 3; encode_frame_result->coding_data = std::move( std::unique_ptr(new uint8_t[max_coding_data_byte_size])); int64_t time_stamp; int64_t time_end; int flush = 1; // Make vp9_get_compressed_data encode a frame unsigned int frame_flags = 0; ENCODE_FRAME_RESULT encode_frame_info; vp9_get_compressed_data(cpi, &frame_flags, &encode_frame_result->coding_data_byte_size, encode_frame_result->coding_data.get(), &time_stamp, &time_end, flush, &encode_frame_info); // vp9_get_compressed_data is expected to encode a frame every time, so the // data size should be greater than zero. assert(encode_frame_result->coding_data_byte_size > 0); assert(encode_frame_result->coding_data_byte_size < max_coding_data_byte_size); update_encode_frame_result(encode_frame_result, &encode_frame_info); } void SimpleEncode::EncodeFrameWithQuantizeIndex( EncodeFrameResult *encode_frame_result, int quantize_index) { encode_command_set_external_quantize_index(&impl_ptr_->cpi->encode_command, quantize_index); EncodeFrame(encode_frame_result); encode_command_reset_external_quantize_index(&impl_ptr_->cpi->encode_command); } int SimpleEncode::GetCodingFrameNum() const { assert(impl_ptr_->first_pass_stats.size() - 1 > 0); // These are the default settings for now. const int multi_layer_arf = 0; const int allow_alt_ref = 1; vpx_rational_t frame_rate = make_vpx_rational(frame_rate_num_, frame_rate_den_); const VP9EncoderConfig oxcf = vp9_get_encoder_config(frame_width_, frame_height_, frame_rate, target_bitrate_, VPX_RC_LAST_PASS); FRAME_INFO frame_info = vp9_get_frame_info(&oxcf); FIRST_PASS_INFO first_pass_info; fps_init_first_pass_info(&first_pass_info, impl_ptr_->first_pass_stats.data(), num_frames_); return vp9_get_coding_frame_num(&oxcf, &frame_info, &first_pass_info, multi_layer_arf, allow_alt_ref); } SimpleEncode::~SimpleEncode() { if (this->file_ != NULL) { fclose(this->file_); } } } // namespace vp9 libvpx-1.8.2/vp9/simple_encode.h000066400000000000000000000065221357355204000165340ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_SIMPLE_ENCODE_H_ #define VPX_VP9_SIMPLE_ENCODE_H_ #include #include #include #include #include namespace vp9 { enum FrameType { kKeyFrame = 0, kInterFrame, kAlternateReference, }; struct EncodeFrameResult { int show_idx; FrameType frame_type; size_t coding_data_bit_size; size_t coding_data_byte_size; // The EncodeFrame will allocate a buffer, write the coding data into the // buffer and give the ownership of the buffer to coding_data. std::unique_ptr coding_data; double psnr; uint64_t sse; int quantize_index; }; class SimpleEncode { public: SimpleEncode(int frame_width, int frame_height, int frame_rate_num, int frame_rate_den, int target_bitrate, int num_frames, const char *infile_path); ~SimpleEncode(); SimpleEncode(SimpleEncode &) = delete; SimpleEncode &operator=(const SimpleEncode &) = delete; // Makes encoder compute the first pass stats and store it internally for // future encode. void ComputeFirstPassStats(); // Outputs the first pass stats represented by a 2-D vector. // One can use the frame index at first dimension to retrieve the stats for // each video frame. The stats of each video frame is a vector of 25 double // values. For details, please check FIRSTPASS_STATS in vp9_firstpass.h std::vector> ObserveFirstPassStats(); // Initializes the encoder for actual encoding. // This function should be called after ComputeFirstPassStats(). void StartEncode(); // Frees the encoder. // This function should be called after StartEncode() or EncodeFrame(). void EndEncode(); // Given a key_frame_index, computes this key frame group's size. // The key frame group size includes one key frame plus the number of // following inter frames. Note that the key frame group size only counts the // show frames. The number of no show frames like alternate refereces are not // counted. int GetKeyFrameGroupSize(int key_frame_index) const; // Encodes a frame // This function should be called after StartEncode() and before EndEncode(). void EncodeFrame(EncodeFrameResult *encode_frame_result); // Encodes a frame with a specific quantize index. // This function should be called after StartEncode() and before EndEncode(). void EncodeFrameWithQuantizeIndex(EncodeFrameResult *encode_frame_result, int quantize_index); // Gets the number of coding frames for the video. The coding frames include // show frame and no show frame. // This function should be called after ComputeFirstPassStats(). int GetCodingFrameNum() const; private: class EncodeImpl; int frame_width_; int frame_height_; int frame_rate_num_; int frame_rate_den_; int target_bitrate_; int num_frames_; std::FILE *file_; std::unique_ptr impl_ptr_; }; } // namespace vp9 #endif // VPX_VP9_SIMPLE_ENCODE_H_ libvpx-1.8.2/vp9/vp9_common.mk000066400000000000000000000110261357355204000161670ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP9_COMMON_SRCS-yes += vp9_common.mk VP9_COMMON_SRCS-yes += vp9_iface_common.h VP9_COMMON_SRCS-yes += vp9_iface_common.c VP9_COMMON_SRCS-yes += common/vp9_ppflags.h VP9_COMMON_SRCS-yes += common/vp9_alloccommon.c VP9_COMMON_SRCS-yes += common/vp9_blockd.c # VP9_COMMON_SRCS-yes += common/vp9_debugmodes.c VP9_COMMON_SRCS-yes += common/vp9_entropy.c VP9_COMMON_SRCS-yes += common/vp9_entropymode.c VP9_COMMON_SRCS-yes += common/vp9_entropymv.c VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.c VP9_COMMON_SRCS-yes += common/vp9_frame_buffers.h VP9_COMMON_SRCS-yes += common/vp9_idct.c VP9_COMMON_SRCS-yes += common/vp9_alloccommon.h VP9_COMMON_SRCS-yes += common/vp9_blockd.h VP9_COMMON_SRCS-yes += common/vp9_common.h VP9_COMMON_SRCS-yes += common/vp9_entropy.h VP9_COMMON_SRCS-yes += common/vp9_entropymode.h VP9_COMMON_SRCS-yes += common/vp9_entropymv.h VP9_COMMON_SRCS-yes += common/vp9_enums.h VP9_COMMON_SRCS-yes += common/vp9_filter.h VP9_COMMON_SRCS-yes += common/vp9_filter.c VP9_COMMON_SRCS-yes += common/vp9_idct.h VP9_COMMON_SRCS-yes += common/vp9_loopfilter.h VP9_COMMON_SRCS-yes += common/vp9_thread_common.h VP9_COMMON_SRCS-yes += common/vp9_mv.h VP9_COMMON_SRCS-yes += common/vp9_onyxc_int.h VP9_COMMON_SRCS-yes += common/vp9_pred_common.h VP9_COMMON_SRCS-yes += common/vp9_pred_common.c VP9_COMMON_SRCS-yes += common/vp9_quant_common.h VP9_COMMON_SRCS-yes += common/vp9_reconinter.h VP9_COMMON_SRCS-yes += common/vp9_reconintra.h VP9_COMMON_SRCS-yes += common/vp9_rtcd.c VP9_COMMON_SRCS-yes += common/vp9_rtcd_defs.pl VP9_COMMON_SRCS-yes += common/vp9_scale.h VP9_COMMON_SRCS-yes += common/vp9_scale.c VP9_COMMON_SRCS-yes += common/vp9_seg_common.h VP9_COMMON_SRCS-yes += common/vp9_seg_common.c VP9_COMMON_SRCS-yes += common/vp9_tile_common.h VP9_COMMON_SRCS-yes += common/vp9_tile_common.c VP9_COMMON_SRCS-yes += common/vp9_loopfilter.c VP9_COMMON_SRCS-yes += common/vp9_thread_common.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.c VP9_COMMON_SRCS-yes += common/vp9_mvref_common.h VP9_COMMON_SRCS-yes += common/vp9_quant_common.c VP9_COMMON_SRCS-yes += common/vp9_reconinter.c VP9_COMMON_SRCS-yes += common/vp9_reconintra.c VP9_COMMON_SRCS-yes += common/vp9_common_data.c VP9_COMMON_SRCS-yes += common/vp9_common_data.h VP9_COMMON_SRCS-yes += common/vp9_scan.c VP9_COMMON_SRCS-yes += common/vp9_scan.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_postproc.c VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.h VP9_COMMON_SRCS-$(CONFIG_VP9_POSTPROC) += common/vp9_mfqe.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct4x4_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct8x8_msa.c VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_idct16x16_msa.c endif # !CONFIG_VP9_HIGHBITDEPTH VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_idct_intrin_sse2.c VP9_COMMON_SRCS-$(HAVE_VSX) += common/ppc/vp9_idct_vsx.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht4x4_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht8x8_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht16x16_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_iht_neon.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_COMMON_SRCS-$(HAVE_MSA) += common/mips/msa/vp9_mfqe_msa.c VP9_COMMON_SRCS-$(HAVE_SSE2) += common/x86/vp9_mfqe_sse2.asm endif ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans4_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans8_dspr2.c VP9_COMMON_SRCS-$(HAVE_DSPR2) += common/mips/dspr2/vp9_itrans16_dspr2.c else VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht4x4_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht8x8_add_neon.c VP9_COMMON_SRCS-$(HAVE_NEON) += common/arm/neon/vp9_highbd_iht16x16_add_neon.c VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht4x4_add_sse4.c VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht8x8_add_sse4.c VP9_COMMON_SRCS-$(HAVE_SSE4_1) += common/x86/vp9_highbd_iht16x16_add_sse4.c endif $(eval $(call rtcd_h_template,vp9_rtcd,vp9/common/vp9_rtcd_defs.pl)) libvpx-1.8.2/vp9/vp9_cx_iface.c000066400000000000000000002272441357355204000162660ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "vpx/vpx_encoder.h" #include "vpx_dsp/psnr.h" #include "vpx_ports/vpx_once.h" #include "vpx_ports/system_state.h" #include "vpx_util/vpx_timestamp.h" #include "vpx/internal/vpx_codec_internal.h" #include "./vpx_version.h" #include "vp9/encoder/vp9_encoder.h" #include "vpx/vp8cx.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/vp9_cx_iface.h" #include "vp9/encoder/vp9_firstpass.h" #include "vp9/encoder/vp9_lookahead.h" #include "vp9/vp9_cx_iface.h" #include "vp9/vp9_iface_common.h" typedef struct vp9_extracfg { int cpu_used; // available cpu percentage in 1/16 unsigned int enable_auto_alt_ref; unsigned int noise_sensitivity; unsigned int sharpness; unsigned int static_thresh; unsigned int tile_columns; unsigned int tile_rows; unsigned int enable_tpl_model; unsigned int arnr_max_frames; unsigned int arnr_strength; unsigned int min_gf_interval; unsigned int max_gf_interval; vp8e_tuning tuning; unsigned int cq_level; // constrained quality level unsigned int rc_max_intra_bitrate_pct; unsigned int rc_max_inter_bitrate_pct; unsigned int gf_cbr_boost_pct; unsigned int lossless; unsigned int target_level; unsigned int frame_parallel_decoding_mode; AQ_MODE aq_mode; int alt_ref_aq; unsigned int frame_periodic_boost; vpx_bit_depth_t bit_depth; vp9e_tune_content content; vpx_color_space_t color_space; vpx_color_range_t color_range; int render_width; int render_height; unsigned int row_mt; unsigned int motion_vector_unit_test; } vp9_extracfg; static struct vp9_extracfg default_extra_cfg = { 0, // cpu_used 1, // enable_auto_alt_ref 0, // noise_sensitivity 0, // sharpness 0, // static_thresh 6, // tile_columns 0, // tile_rows 1, // enable_tpl_model 7, // arnr_max_frames 5, // arnr_strength 0, // min_gf_interval; 0 -> default decision 0, // max_gf_interval; 0 -> default decision VP8_TUNE_PSNR, // tuning 10, // cq_level 0, // rc_max_intra_bitrate_pct 0, // rc_max_inter_bitrate_pct 0, // gf_cbr_boost_pct 0, // lossless 255, // target_level 1, // frame_parallel_decoding_mode NO_AQ, // aq_mode 0, // alt_ref_aq 0, // frame_periodic_delta_q VPX_BITS_8, // Bit depth VP9E_CONTENT_DEFAULT, // content VPX_CS_UNKNOWN, // color space 0, // color range 0, // render width 0, // render height 0, // row_mt 0, // motion_vector_unit_test }; struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_enc_cfg_t cfg; struct vp9_extracfg extra_cfg; vpx_rational64_t timestamp_ratio; vpx_codec_pts_t pts_offset; unsigned char pts_offset_initialized; VP9EncoderConfig oxcf; VP9_COMP *cpi; unsigned char *cx_data; size_t cx_data_sz; unsigned char *pending_cx_data; size_t pending_cx_data_sz; int pending_frame_count; size_t pending_frame_sizes[8]; size_t pending_frame_magnitude; vpx_image_t preview_img; vpx_enc_frame_flags_t next_frame_flags; vp8_postproc_cfg_t preview_ppcfg; vpx_codec_pkt_list_decl(256) pkt_list; unsigned int fixed_kf_cntr; vpx_codec_priv_output_cx_pkt_cb_pair_t output_cx_pkt_cb; // BufferPool that holds all reference frames. BufferPool *buffer_pool; }; static vpx_codec_err_t update_error_state( vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { const vpx_codec_err_t res = error->error_code; if (res != VPX_CODEC_OK) ctx->base.err_detail = error->has_detail ? error->detail : NULL; return res; } #undef ERROR #define ERROR(str) \ do { \ ctx->base.err_detail = str; \ return VPX_CODEC_INVALID_PARAM; \ } while (0) #define RANGE_CHECK(p, memb, lo, hi) \ do { \ if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ ERROR(#memb " out of range [" #lo ".." #hi "]"); \ } while (0) #define RANGE_CHECK_HI(p, memb, hi) \ do { \ if (!((p)->memb <= (hi))) ERROR(#memb " out of range [.." #hi "]"); \ } while (0) #define RANGE_CHECK_LO(p, memb, lo) \ do { \ if (!((p)->memb >= (lo))) ERROR(#memb " out of range [" #lo "..]"); \ } while (0) #define RANGE_CHECK_BOOL(p, memb) \ do { \ if (!!((p)->memb) != (p)->memb) ERROR(#memb " expected boolean"); \ } while (0) #if defined(_MSC_VER) #define COMPILE_TIME_ASSERT(boolexp) \ do { \ char compile_time_assert[(boolexp) ? 1 : -1]; \ (void)compile_time_assert; \ } while (0) #else // !_MSC_VER #define COMPILE_TIME_ASSERT(boolexp) \ do { \ struct { \ unsigned int compile_time_assert : (boolexp) ? 1 : -1; \ } compile_time_assert; \ (void)compile_time_assert; \ } while (0) #endif // _MSC_VER static vpx_codec_err_t validate_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { RANGE_CHECK(cfg, g_w, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_h, 1, 65535); // 16 bits available RANGE_CHECK(cfg, g_timebase.den, 1, 1000000000); RANGE_CHECK(cfg, g_timebase.num, 1, 1000000000); RANGE_CHECK_HI(cfg, g_profile, 3); RANGE_CHECK_HI(cfg, rc_max_quantizer, 63); RANGE_CHECK_HI(cfg, rc_min_quantizer, cfg->rc_max_quantizer); RANGE_CHECK_BOOL(extra_cfg, lossless); RANGE_CHECK_BOOL(extra_cfg, frame_parallel_decoding_mode); RANGE_CHECK(extra_cfg, aq_mode, 0, AQ_MODE_COUNT - 2); RANGE_CHECK(extra_cfg, alt_ref_aq, 0, 1); RANGE_CHECK(extra_cfg, frame_periodic_boost, 0, 1); RANGE_CHECK_HI(cfg, g_threads, 64); RANGE_CHECK_HI(cfg, g_lag_in_frames, MAX_LAG_BUFFERS); RANGE_CHECK(cfg, rc_end_usage, VPX_VBR, VPX_Q); RANGE_CHECK_HI(cfg, rc_undershoot_pct, 100); RANGE_CHECK_HI(cfg, rc_overshoot_pct, 100); RANGE_CHECK_HI(cfg, rc_2pass_vbr_bias_pct, 100); RANGE_CHECK(cfg, rc_2pass_vbr_corpus_complexity, 0, 10000); RANGE_CHECK(cfg, kf_mode, VPX_KF_DISABLED, VPX_KF_AUTO); RANGE_CHECK_BOOL(cfg, rc_resize_allowed); RANGE_CHECK_HI(cfg, rc_dropframe_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_up_thresh, 100); RANGE_CHECK_HI(cfg, rc_resize_down_thresh, 100); #if CONFIG_REALTIME_ONLY RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_ONE_PASS); #else RANGE_CHECK(cfg, g_pass, VPX_RC_ONE_PASS, VPX_RC_LAST_PASS); #endif RANGE_CHECK(extra_cfg, min_gf_interval, 0, (MAX_LAG_BUFFERS - 1)); RANGE_CHECK(extra_cfg, max_gf_interval, 0, (MAX_LAG_BUFFERS - 1)); if (extra_cfg->max_gf_interval > 0) { RANGE_CHECK(extra_cfg, max_gf_interval, 2, (MAX_LAG_BUFFERS - 1)); } if (extra_cfg->min_gf_interval > 0 && extra_cfg->max_gf_interval > 0) { RANGE_CHECK(extra_cfg, max_gf_interval, extra_cfg->min_gf_interval, (MAX_LAG_BUFFERS - 1)); } // For formation of valid ARF groups lag_in _frames should be 0 or greater // than the max_gf_interval + 2 if (cfg->g_lag_in_frames > 0 && extra_cfg->max_gf_interval > 0 && cfg->g_lag_in_frames < extra_cfg->max_gf_interval + 2) { ERROR("Set lag in frames to 0 (low delay) or >= (max-gf-interval + 2)"); } if (cfg->rc_resize_allowed == 1) { RANGE_CHECK(cfg, rc_scaled_width, 0, cfg->g_w); RANGE_CHECK(cfg, rc_scaled_height, 0, cfg->g_h); } RANGE_CHECK(cfg, ss_number_layers, 1, VPX_SS_MAX_LAYERS); RANGE_CHECK(cfg, ts_number_layers, 1, VPX_TS_MAX_LAYERS); { unsigned int level = extra_cfg->target_level; if (level != LEVEL_1 && level != LEVEL_1_1 && level != LEVEL_2 && level != LEVEL_2_1 && level != LEVEL_3 && level != LEVEL_3_1 && level != LEVEL_4 && level != LEVEL_4_1 && level != LEVEL_5 && level != LEVEL_5_1 && level != LEVEL_5_2 && level != LEVEL_6 && level != LEVEL_6_1 && level != LEVEL_6_2 && level != LEVEL_UNKNOWN && level != LEVEL_AUTO && level != LEVEL_MAX) ERROR("target_level is invalid"); } if (cfg->ss_number_layers * cfg->ts_number_layers > VPX_MAX_LAYERS) ERROR("ss_number_layers * ts_number_layers is out of range"); if (cfg->ts_number_layers > 1) { unsigned int sl, tl; for (sl = 1; sl < cfg->ss_number_layers; ++sl) { for (tl = 1; tl < cfg->ts_number_layers; ++tl) { const int layer = LAYER_IDS_TO_IDX(sl, tl, cfg->ts_number_layers); if (cfg->layer_target_bitrate[layer] < cfg->layer_target_bitrate[layer - 1]) ERROR("ts_target_bitrate entries are not increasing"); } } RANGE_CHECK(cfg, ts_rate_decimator[cfg->ts_number_layers - 1], 1, 1); for (tl = cfg->ts_number_layers - 2; tl > 0; --tl) if (cfg->ts_rate_decimator[tl - 1] != 2 * cfg->ts_rate_decimator[tl]) ERROR("ts_rate_decimator factors are not powers of 2"); } // VP9 does not support a lower bound on the keyframe interval in // automatic keyframe placement mode. if (cfg->kf_mode != VPX_KF_DISABLED && cfg->kf_min_dist != cfg->kf_max_dist && cfg->kf_min_dist > 0) ERROR( "kf_min_dist not supported in auto mode, use 0 " "or kf_max_dist instead."); RANGE_CHECK(extra_cfg, row_mt, 0, 1); RANGE_CHECK(extra_cfg, motion_vector_unit_test, 0, 2); RANGE_CHECK(extra_cfg, enable_auto_alt_ref, 0, MAX_ARF_LAYERS); RANGE_CHECK(extra_cfg, cpu_used, -9, 9); RANGE_CHECK_HI(extra_cfg, noise_sensitivity, 6); RANGE_CHECK(extra_cfg, tile_columns, 0, 6); RANGE_CHECK(extra_cfg, tile_rows, 0, 2); RANGE_CHECK_HI(extra_cfg, sharpness, 7); RANGE_CHECK(extra_cfg, arnr_max_frames, 0, 15); RANGE_CHECK_HI(extra_cfg, arnr_strength, 6); RANGE_CHECK(extra_cfg, cq_level, 0, 63); RANGE_CHECK(cfg, g_bit_depth, VPX_BITS_8, VPX_BITS_12); RANGE_CHECK(cfg, g_input_bit_depth, 8, 12); RANGE_CHECK(extra_cfg, content, VP9E_CONTENT_DEFAULT, VP9E_CONTENT_INVALID - 1); #if !CONFIG_REALTIME_ONLY if (cfg->g_pass == VPX_RC_LAST_PASS) { const size_t packet_sz = sizeof(FIRSTPASS_STATS); const int n_packets = (int)(cfg->rc_twopass_stats_in.sz / packet_sz); const FIRSTPASS_STATS *stats; if (cfg->rc_twopass_stats_in.buf == NULL) ERROR("rc_twopass_stats_in.buf not set."); if (cfg->rc_twopass_stats_in.sz % packet_sz) ERROR("rc_twopass_stats_in.sz indicates truncated packet."); if (cfg->ss_number_layers > 1 || cfg->ts_number_layers > 1) { int i; unsigned int n_packets_per_layer[VPX_SS_MAX_LAYERS] = { 0 }; stats = cfg->rc_twopass_stats_in.buf; for (i = 0; i < n_packets; ++i) { const int layer_id = (int)stats[i].spatial_layer_id; if (layer_id >= 0 && layer_id < (int)cfg->ss_number_layers) { ++n_packets_per_layer[layer_id]; } } for (i = 0; i < (int)cfg->ss_number_layers; ++i) { unsigned int layer_id; if (n_packets_per_layer[i] < 2) { ERROR( "rc_twopass_stats_in requires at least two packets for each " "layer."); } stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - cfg->ss_number_layers + i; layer_id = (int)stats->spatial_layer_id; if (layer_id >= cfg->ss_number_layers || (unsigned int)(stats->count + 0.5) != n_packets_per_layer[layer_id] - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } } else { if (cfg->rc_twopass_stats_in.sz < 2 * packet_sz) ERROR("rc_twopass_stats_in requires at least two packets."); stats = (const FIRSTPASS_STATS *)cfg->rc_twopass_stats_in.buf + n_packets - 1; if ((int)(stats->count + 0.5) != n_packets - 1) ERROR("rc_twopass_stats_in missing EOS stats packet"); } } #endif // !CONFIG_REALTIME_ONLY #if !CONFIG_VP9_HIGHBITDEPTH if (cfg->g_profile > (unsigned int)PROFILE_1) { ERROR("Profile > 1 not supported in this build configuration"); } #endif if (cfg->g_profile <= (unsigned int)PROFILE_1 && cfg->g_bit_depth > VPX_BITS_8) { ERROR("Codec high bit-depth not supported in profile < 2"); } if (cfg->g_profile <= (unsigned int)PROFILE_1 && cfg->g_input_bit_depth > 8) { ERROR("Source high bit-depth not supported in profile < 2"); } if (cfg->g_profile > (unsigned int)PROFILE_1 && cfg->g_bit_depth == VPX_BITS_8) { ERROR("Codec bit-depth 8 not supported in profile > 1"); } RANGE_CHECK(extra_cfg, color_space, VPX_CS_UNKNOWN, VPX_CS_SRGB); RANGE_CHECK(extra_cfg, color_range, VPX_CR_STUDIO_RANGE, VPX_CR_FULL_RANGE); return VPX_CODEC_OK; } static vpx_codec_err_t validate_img(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I42016: break; case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I444: case VPX_IMG_FMT_I440: if (ctx->cfg.g_profile != (unsigned int)PROFILE_1) { ERROR( "Invalid image format. I422, I444, I440 images are " "not supported in profile."); } break; case VPX_IMG_FMT_I42216: case VPX_IMG_FMT_I44416: case VPX_IMG_FMT_I44016: if (ctx->cfg.g_profile != (unsigned int)PROFILE_1 && ctx->cfg.g_profile != (unsigned int)PROFILE_3) { ERROR( "Invalid image format. 16-bit I422, I444, I440 images are " "not supported in profile."); } break; default: ERROR( "Invalid image format. Only YV12, I420, I422, I444 images are " "supported."); break; } if (img->d_w != ctx->cfg.g_w || img->d_h != ctx->cfg.g_h) ERROR("Image size must match encoder init configuration size"); return VPX_CODEC_OK; } static int get_image_bps(const vpx_image_t *img) { switch (img->fmt) { case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I420: return 12; case VPX_IMG_FMT_I422: return 16; case VPX_IMG_FMT_I444: return 24; case VPX_IMG_FMT_I440: return 16; case VPX_IMG_FMT_I42016: return 24; case VPX_IMG_FMT_I42216: return 32; case VPX_IMG_FMT_I44416: return 48; case VPX_IMG_FMT_I44016: return 32; default: assert(0 && "Invalid image format"); break; } return 0; } // Modify the encoder config for the target level. static void config_target_level(VP9EncoderConfig *oxcf) { double max_average_bitrate; // in bits per second int max_over_shoot_pct; const int target_level_index = get_level_index(oxcf->target_level); vpx_clear_system_state(); assert(target_level_index >= 0); assert(target_level_index < VP9_LEVELS); // Maximum target bit-rate is level_limit * 80%. max_average_bitrate = vp9_level_defs[target_level_index].average_bitrate * 800.0; if ((double)oxcf->target_bandwidth > max_average_bitrate) oxcf->target_bandwidth = (int64_t)(max_average_bitrate); if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; // Adjust max over-shoot percentage. max_over_shoot_pct = (int)((max_average_bitrate * 1.10 - (double)oxcf->target_bandwidth) * 100 / (double)(oxcf->target_bandwidth)); if (oxcf->over_shoot_pct > max_over_shoot_pct) oxcf->over_shoot_pct = max_over_shoot_pct; // Adjust worst allowed quantizer. oxcf->worst_allowed_q = vp9_quantizer_to_qindex(63); // Adjust minimum art-ref distance. // min_gf_interval should be no less than min_altref_distance + 1, // as the encoder may produce bitstream with alt-ref distance being // min_gf_interval - 1. if (oxcf->min_gf_interval <= (int)vp9_level_defs[target_level_index].min_altref_distance) { oxcf->min_gf_interval = (int)vp9_level_defs[target_level_index].min_altref_distance + 1; // If oxcf->max_gf_interval == 0, it will be assigned with a default value // in vp9_rc_set_gf_interval_range(). if (oxcf->max_gf_interval != 0) { oxcf->max_gf_interval = VPXMAX(oxcf->max_gf_interval, oxcf->min_gf_interval); } } // Adjust maximum column tiles. if (vp9_level_defs[target_level_index].max_col_tiles < (1 << oxcf->tile_columns)) { while (oxcf->tile_columns > 0 && vp9_level_defs[target_level_index].max_col_tiles < (1 << oxcf->tile_columns)) --oxcf->tile_columns; } } static vpx_rational64_t get_g_timebase_in_ts(vpx_rational_t g_timebase) { vpx_rational64_t g_timebase_in_ts; g_timebase_in_ts.den = g_timebase.den; g_timebase_in_ts.num = g_timebase.num; g_timebase_in_ts.num *= TICKS_PER_SEC; reduce_ratio(&g_timebase_in_ts); return g_timebase_in_ts; } static vpx_codec_err_t set_encoder_config( VP9EncoderConfig *oxcf, const vpx_codec_enc_cfg_t *cfg, const struct vp9_extracfg *extra_cfg) { const int is_vbr = cfg->rc_end_usage == VPX_VBR; int sl, tl; oxcf->profile = cfg->g_profile; oxcf->max_threads = (int)cfg->g_threads; oxcf->width = cfg->g_w; oxcf->height = cfg->g_h; oxcf->bit_depth = cfg->g_bit_depth; oxcf->input_bit_depth = cfg->g_input_bit_depth; // TODO(angiebird): Figure out if we can just use g_timebase to indicate the // inverse of framerate // guess a frame rate if out of whack, use 30 oxcf->init_framerate = (double)cfg->g_timebase.den / cfg->g_timebase.num; if (oxcf->init_framerate > 180) oxcf->init_framerate = 30; oxcf->g_timebase = cfg->g_timebase; oxcf->g_timebase_in_ts = get_g_timebase_in_ts(oxcf->g_timebase); oxcf->mode = GOOD; switch (cfg->g_pass) { case VPX_RC_ONE_PASS: oxcf->pass = 0; break; case VPX_RC_FIRST_PASS: oxcf->pass = 1; break; case VPX_RC_LAST_PASS: oxcf->pass = 2; break; } oxcf->lag_in_frames = cfg->g_pass == VPX_RC_FIRST_PASS ? 0 : cfg->g_lag_in_frames; oxcf->rc_mode = cfg->rc_end_usage; // Convert target bandwidth from Kbit/s to Bit/s oxcf->target_bandwidth = 1000 * cfg->rc_target_bitrate; oxcf->rc_max_intra_bitrate_pct = extra_cfg->rc_max_intra_bitrate_pct; oxcf->rc_max_inter_bitrate_pct = extra_cfg->rc_max_inter_bitrate_pct; oxcf->gf_cbr_boost_pct = extra_cfg->gf_cbr_boost_pct; oxcf->best_allowed_q = extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_min_quantizer); oxcf->worst_allowed_q = extra_cfg->lossless ? 0 : vp9_quantizer_to_qindex(cfg->rc_max_quantizer); oxcf->cq_level = vp9_quantizer_to_qindex(extra_cfg->cq_level); oxcf->fixed_q = -1; oxcf->under_shoot_pct = cfg->rc_undershoot_pct; oxcf->over_shoot_pct = cfg->rc_overshoot_pct; oxcf->scaled_frame_width = cfg->rc_scaled_width; oxcf->scaled_frame_height = cfg->rc_scaled_height; if (cfg->rc_resize_allowed == 1) { oxcf->resize_mode = (oxcf->scaled_frame_width == 0 || oxcf->scaled_frame_height == 0) ? RESIZE_DYNAMIC : RESIZE_FIXED; } else { oxcf->resize_mode = RESIZE_NONE; } oxcf->maximum_buffer_size_ms = is_vbr ? 240000 : cfg->rc_buf_sz; oxcf->starting_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_initial_sz; oxcf->optimal_buffer_level_ms = is_vbr ? 60000 : cfg->rc_buf_optimal_sz; oxcf->drop_frames_water_mark = cfg->rc_dropframe_thresh; oxcf->two_pass_vbrbias = cfg->rc_2pass_vbr_bias_pct; oxcf->two_pass_vbrmin_section = cfg->rc_2pass_vbr_minsection_pct; oxcf->two_pass_vbrmax_section = cfg->rc_2pass_vbr_maxsection_pct; oxcf->vbr_corpus_complexity = cfg->rc_2pass_vbr_corpus_complexity; oxcf->auto_key = cfg->kf_mode == VPX_KF_AUTO && cfg->kf_min_dist != cfg->kf_max_dist; oxcf->key_freq = cfg->kf_max_dist; oxcf->speed = abs(extra_cfg->cpu_used); oxcf->encode_breakout = extra_cfg->static_thresh; oxcf->enable_auto_arf = extra_cfg->enable_auto_alt_ref; if (oxcf->bit_depth == VPX_BITS_8) { oxcf->noise_sensitivity = extra_cfg->noise_sensitivity; } else { // Disable denoiser for high bitdepth since vp9_denoiser_filter only works // for 8 bits. oxcf->noise_sensitivity = 0; } oxcf->sharpness = extra_cfg->sharpness; vp9_set_first_pass_stats(oxcf, &cfg->rc_twopass_stats_in); #if CONFIG_FP_MB_STATS oxcf->firstpass_mb_stats_in = cfg->rc_firstpass_mb_stats_in; #endif oxcf->color_space = extra_cfg->color_space; oxcf->color_range = extra_cfg->color_range; oxcf->render_width = extra_cfg->render_width; oxcf->render_height = extra_cfg->render_height; oxcf->arnr_max_frames = extra_cfg->arnr_max_frames; oxcf->arnr_strength = extra_cfg->arnr_strength; oxcf->min_gf_interval = extra_cfg->min_gf_interval; oxcf->max_gf_interval = extra_cfg->max_gf_interval; oxcf->tuning = extra_cfg->tuning; oxcf->content = extra_cfg->content; oxcf->tile_columns = extra_cfg->tile_columns; oxcf->enable_tpl_model = extra_cfg->enable_tpl_model; // TODO(yunqing): The dependencies between row tiles cause error in multi- // threaded encoding. For now, tile_rows is forced to be 0 in this case. // The further fix can be done by adding synchronizations after a tile row // is encoded. But this will hurt multi-threaded encoder performance. So, // it is recommended to use tile-rows=0 while encoding with threads > 1. if (oxcf->max_threads > 1 && oxcf->tile_columns > 0) oxcf->tile_rows = 0; else oxcf->tile_rows = extra_cfg->tile_rows; oxcf->error_resilient_mode = cfg->g_error_resilient; oxcf->frame_parallel_decoding_mode = extra_cfg->frame_parallel_decoding_mode; oxcf->aq_mode = extra_cfg->aq_mode; oxcf->alt_ref_aq = extra_cfg->alt_ref_aq; oxcf->frame_periodic_boost = extra_cfg->frame_periodic_boost; oxcf->ss_number_layers = cfg->ss_number_layers; oxcf->ts_number_layers = cfg->ts_number_layers; oxcf->temporal_layering_mode = (enum vp9e_temporal_layering_mode)cfg->temporal_layering_mode; oxcf->target_level = extra_cfg->target_level; oxcf->row_mt = extra_cfg->row_mt; oxcf->motion_vector_unit_test = extra_cfg->motion_vector_unit_test; for (sl = 0; sl < oxcf->ss_number_layers; ++sl) { for (tl = 0; tl < oxcf->ts_number_layers; ++tl) { oxcf->layer_target_bitrate[sl * oxcf->ts_number_layers + tl] = 1000 * cfg->layer_target_bitrate[sl * oxcf->ts_number_layers + tl]; } } if (oxcf->ss_number_layers == 1 && oxcf->pass != 0) { oxcf->ss_target_bitrate[0] = (int)oxcf->target_bandwidth; } if (oxcf->ts_number_layers > 1) { for (tl = 0; tl < VPX_TS_MAX_LAYERS; ++tl) { oxcf->ts_rate_decimator[tl] = cfg->ts_rate_decimator[tl] ? cfg->ts_rate_decimator[tl] : 1; } } else if (oxcf->ts_number_layers == 1) { oxcf->ts_rate_decimator[0] = 1; } if (get_level_index(oxcf->target_level) >= 0) config_target_level(oxcf); // vp9_dump_encoder_config(oxcf); return VPX_CODEC_OK; } static vpx_codec_err_t encoder_set_config(vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; int force_key = 0; if (cfg->g_w != ctx->cfg.g_w || cfg->g_h != ctx->cfg.g_h) { if (cfg->g_lag_in_frames > 1 || cfg->g_pass != VPX_RC_ONE_PASS) ERROR("Cannot change width or height after initialization"); if (!valid_ref_frame_size(ctx->cfg.g_w, ctx->cfg.g_h, cfg->g_w, cfg->g_h) || (ctx->cpi->initial_width && (int)cfg->g_w > ctx->cpi->initial_width) || (ctx->cpi->initial_height && (int)cfg->g_h > ctx->cpi->initial_height)) force_key = 1; } // Prevent increasing lag_in_frames. This check is stricter than it needs // to be -- the limit is not increasing past the first lag_in_frames // value, but we don't track the initial config, only the last successful // config. if (cfg->g_lag_in_frames > ctx->cfg.g_lag_in_frames) ERROR("Cannot increase lag_in_frames"); res = validate_config(ctx, cfg, &ctx->extra_cfg); if (res == VPX_CODEC_OK) { ctx->cfg = *cfg; set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); // On profile change, request a key frame force_key |= ctx->cpi->common.profile != ctx->oxcf.profile; vp9_change_config(ctx->cpi, &ctx->oxcf); } if (force_key) ctx->next_frame_flags |= VPX_EFLAG_FORCE_KF; return res; } static vpx_codec_err_t ctrl_get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = vp9_get_quantizer(ctx->cpi); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_get_quantizer64(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = vp9_qindex_to_quantizer(vp9_get_quantizer(ctx->cpi)); return VPX_CODEC_OK; } static vpx_codec_err_t update_extra_cfg(vpx_codec_alg_priv_t *ctx, const struct vp9_extracfg *extra_cfg) { const vpx_codec_err_t res = validate_config(ctx, &ctx->cfg, extra_cfg); if (res == VPX_CODEC_OK) { ctx->extra_cfg = *extra_cfg; set_encoder_config(&ctx->oxcf, &ctx->cfg, &ctx->extra_cfg); vp9_change_config(ctx->cpi, &ctx->oxcf); } return res; } static vpx_codec_err_t ctrl_set_cpuused(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; // Use fastest speed setting (speed 9 or -9) if it's set beyond the range. extra_cfg.cpu_used = CAST(VP8E_SET_CPUUSED, args); extra_cfg.cpu_used = VPXMIN(9, extra_cfg.cpu_used); extra_cfg.cpu_used = VPXMAX(-9, extra_cfg.cpu_used); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_enable_auto_alt_ref(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.enable_auto_alt_ref = CAST(VP8E_SET_ENABLEAUTOALTREF, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_noise_sensitivity(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.noise_sensitivity = CAST(VP9E_SET_NOISE_SENSITIVITY, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_sharpness(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.sharpness = CAST(VP8E_SET_SHARPNESS, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_static_thresh(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.static_thresh = CAST(VP8E_SET_STATIC_THRESHOLD, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_tile_columns(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.tile_columns = CAST(VP9E_SET_TILE_COLUMNS, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_tile_rows(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.tile_rows = CAST(VP9E_SET_TILE_ROWS, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_tpl_model(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.enable_tpl_model = CAST(VP9E_SET_TPL, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_arnr_max_frames(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.arnr_max_frames = CAST(VP8E_SET_ARNR_MAXFRAMES, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_arnr_strength(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.arnr_strength = CAST(VP8E_SET_ARNR_STRENGTH, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_arnr_type(vpx_codec_alg_priv_t *ctx, va_list args) { (void)ctx; (void)args; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_tuning(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.tuning = CAST(VP8E_SET_TUNING, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_cq_level(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.cq_level = CAST(VP8E_SET_CQ_LEVEL, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_rc_max_intra_bitrate_pct( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.rc_max_intra_bitrate_pct = CAST(VP8E_SET_MAX_INTRA_BITRATE_PCT, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_rc_max_inter_bitrate_pct( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.rc_max_inter_bitrate_pct = CAST(VP9E_SET_MAX_INTER_BITRATE_PCT, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_rc_gf_cbr_boost_pct(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.gf_cbr_boost_pct = CAST(VP9E_SET_GF_CBR_BOOST_PCT, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_lossless(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.lossless = CAST(VP9E_SET_LOSSLESS, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_frame_parallel_decoding_mode( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.frame_parallel_decoding_mode = CAST(VP9E_SET_FRAME_PARALLEL_DECODING, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_aq_mode(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.aq_mode = CAST(VP9E_SET_AQ_MODE, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_alt_ref_aq(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.alt_ref_aq = CAST(VP9E_SET_ALT_REF_AQ, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_min_gf_interval(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.min_gf_interval = CAST(VP9E_SET_MIN_GF_INTERVAL, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_max_gf_interval(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.max_gf_interval = CAST(VP9E_SET_MAX_GF_INTERVAL, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_frame_periodic_boost(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.frame_periodic_boost = CAST(VP9E_SET_FRAME_PERIODIC_BOOST, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_target_level(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.target_level = CAST(VP9E_SET_TARGET_LEVEL, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.row_mt = CAST(VP9E_SET_ROW_MT, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_enable_motion_vector_unit_test( vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.motion_vector_unit_test = CAST(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_get_level(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL) return VPX_CODEC_INVALID_PARAM; *arg = (int)vp9_get_level(&ctx->cpi->level_info.level_spec); return VPX_CODEC_OK; } static vpx_codec_err_t encoder_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { vpx_codec_err_t res = VPX_CODEC_OK; (void)data; if (ctx->priv == NULL) { vpx_codec_alg_priv_t *const priv = vpx_calloc(1, sizeof(*priv)); if (priv == NULL) return VPX_CODEC_MEM_ERROR; ctx->priv = (vpx_codec_priv_t *)priv; ctx->priv->init_flags = ctx->init_flags; ctx->priv->enc.total_encoders = 1; priv->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); if (priv->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR; if (ctx->config.enc) { // Update the reference to the config structure to an internal copy. priv->cfg = *ctx->config.enc; ctx->config.enc = &priv->cfg; } priv->extra_cfg = default_extra_cfg; once(vp9_initialize_enc); res = validate_config(priv, &priv->cfg, &priv->extra_cfg); if (res == VPX_CODEC_OK) { priv->pts_offset_initialized = 0; // TODO(angiebird): Replace priv->timestamp_ratio by // oxcf->g_timebase_in_ts priv->timestamp_ratio = get_g_timebase_in_ts(priv->cfg.g_timebase); set_encoder_config(&priv->oxcf, &priv->cfg, &priv->extra_cfg); #if CONFIG_VP9_HIGHBITDEPTH priv->oxcf.use_highbitdepth = (ctx->init_flags & VPX_CODEC_USE_HIGHBITDEPTH) ? 1 : 0; #endif priv->cpi = vp9_create_compressor(&priv->oxcf, priv->buffer_pool); if (priv->cpi == NULL) res = VPX_CODEC_MEM_ERROR; } } return res; } static vpx_codec_err_t encoder_destroy(vpx_codec_alg_priv_t *ctx) { free(ctx->cx_data); vp9_remove_compressor(ctx->cpi); vpx_free(ctx->buffer_pool); vpx_free(ctx); return VPX_CODEC_OK; } static void pick_quickcompress_mode(vpx_codec_alg_priv_t *ctx, unsigned long duration, unsigned long deadline) { MODE new_mode = BEST; #if CONFIG_REALTIME_ONLY (void)duration; deadline = VPX_DL_REALTIME; #else switch (ctx->cfg.g_pass) { case VPX_RC_ONE_PASS: if (deadline > 0) { // Convert duration parameter from stream timebase to microseconds. uint64_t duration_us; COMPILE_TIME_ASSERT(TICKS_PER_SEC > 1000000 && (TICKS_PER_SEC % 1000000) == 0); duration_us = duration * (uint64_t)ctx->timestamp_ratio.num / (ctx->timestamp_ratio.den * (TICKS_PER_SEC / 1000000)); // If the deadline is more that the duration this frame is to be shown, // use good quality mode. Otherwise use realtime mode. new_mode = (deadline > duration_us) ? GOOD : REALTIME; } else { new_mode = BEST; } break; case VPX_RC_FIRST_PASS: break; case VPX_RC_LAST_PASS: new_mode = deadline > 0 ? GOOD : BEST; break; } #endif // CONFIG_REALTIME_ONLY if (deadline == VPX_DL_REALTIME) { ctx->oxcf.pass = 0; new_mode = REALTIME; } if (ctx->oxcf.mode != new_mode) { ctx->oxcf.mode = new_mode; vp9_change_config(ctx->cpi, &ctx->oxcf); } } // Turn on to test if supplemental superframe data breaks decoding // #define TEST_SUPPLEMENTAL_SUPERFRAME_DATA static int write_superframe_index(vpx_codec_alg_priv_t *ctx) { uint8_t marker = 0xc0; unsigned int mask; int mag, index_sz; assert(ctx->pending_frame_count); assert(ctx->pending_frame_count <= 8); // Add the number of frames to the marker byte marker |= ctx->pending_frame_count - 1; // Choose the magnitude for (mag = 0, mask = 0xff; mag < 4; mag++) { if (ctx->pending_frame_magnitude < mask) break; mask <<= 8; mask |= 0xff; } marker |= mag << 3; // Write the index index_sz = 2 + (mag + 1) * ctx->pending_frame_count; if (ctx->pending_cx_data_sz + index_sz < ctx->cx_data_sz) { uint8_t *x = ctx->pending_cx_data + ctx->pending_cx_data_sz; int i, j; #ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA uint8_t marker_test = 0xc0; int mag_test = 2; // 1 - 4 int frames_test = 4; // 1 - 8 int index_sz_test = 2 + mag_test * frames_test; marker_test |= frames_test - 1; marker_test |= (mag_test - 1) << 3; *x++ = marker_test; for (i = 0; i < mag_test * frames_test; ++i) *x++ = 0; // fill up with arbitrary data *x++ = marker_test; ctx->pending_cx_data_sz += index_sz_test; printf("Added supplemental superframe data\n"); #endif *x++ = marker; for (i = 0; i < ctx->pending_frame_count; i++) { unsigned int this_sz = (unsigned int)ctx->pending_frame_sizes[i]; for (j = 0; j <= mag; j++) { *x++ = this_sz & 0xff; this_sz >>= 8; } } *x++ = marker; ctx->pending_cx_data_sz += index_sz; #ifdef TEST_SUPPLEMENTAL_SUPERFRAME_DATA index_sz += index_sz_test; #endif } return index_sz; } static vpx_codec_frame_flags_t get_frame_pkt_flags(const VP9_COMP *cpi, unsigned int lib_flags) { vpx_codec_frame_flags_t flags = lib_flags << 16; if (lib_flags & FRAMEFLAGS_KEY || (cpi->use_svc && cpi->svc .layer_context[cpi->svc.spatial_layer_id * cpi->svc.number_temporal_layers + cpi->svc.temporal_layer_id] .is_key_frame)) flags |= VPX_FRAME_IS_KEY; if (cpi->droppable) flags |= VPX_FRAME_IS_DROPPABLE; return flags; } static INLINE vpx_codec_cx_pkt_t get_psnr_pkt(const PSNR_STATS *psnr) { vpx_codec_cx_pkt_t pkt; pkt.kind = VPX_CODEC_PSNR_PKT; pkt.data.psnr = *psnr; return pkt; } #if !CONFIG_REALTIME_ONLY static INLINE vpx_codec_cx_pkt_t get_first_pass_stats_pkt(FIRSTPASS_STATS *stats) { // WARNNING: This function assumes that stats will // exist and not be changed until the packet is processed // TODO(angiebird): Refactor the code to avoid using the assumption. vpx_codec_cx_pkt_t pkt; pkt.kind = VPX_CODEC_STATS_PKT; pkt.data.twopass_stats.buf = stats; pkt.data.twopass_stats.sz = sizeof(*stats); return pkt; } #endif const size_t kMinCompressedSize = 8192; static vpx_codec_err_t encoder_encode(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts_val, unsigned long duration, vpx_enc_frame_flags_t enc_flags, unsigned long deadline) { volatile vpx_codec_err_t res = VPX_CODEC_OK; volatile vpx_enc_frame_flags_t flags = enc_flags; volatile vpx_codec_pts_t pts = pts_val; VP9_COMP *const cpi = ctx->cpi; const vpx_rational64_t *const timestamp_ratio = &ctx->timestamp_ratio; size_t data_sz; vpx_codec_cx_pkt_t pkt; memset(&pkt, 0, sizeof(pkt)); if (cpi == NULL) return VPX_CODEC_INVALID_PARAM; if (img != NULL) { res = validate_img(ctx, img); if (res == VPX_CODEC_OK) { // There's no codec control for multiple alt-refs so check the encoder // instance for its status to determine the compressed data size. data_sz = ctx->cfg.g_w * ctx->cfg.g_h * get_image_bps(img) / 8 * (cpi->multi_layer_arf ? 8 : 2); if (data_sz < kMinCompressedSize) data_sz = kMinCompressedSize; if (ctx->cx_data == NULL || ctx->cx_data_sz < data_sz) { ctx->cx_data_sz = data_sz; free(ctx->cx_data); ctx->cx_data = (unsigned char *)malloc(ctx->cx_data_sz); if (ctx->cx_data == NULL) { return VPX_CODEC_MEM_ERROR; } } } } if (!ctx->pts_offset_initialized) { ctx->pts_offset = pts; ctx->pts_offset_initialized = 1; } pts -= ctx->pts_offset; pick_quickcompress_mode(ctx, duration, deadline); vpx_codec_pkt_list_init(&ctx->pkt_list); // Handle Flags if (((flags & VP8_EFLAG_NO_UPD_GF) && (flags & VP8_EFLAG_FORCE_GF)) || ((flags & VP8_EFLAG_NO_UPD_ARF) && (flags & VP8_EFLAG_FORCE_ARF))) { ctx->base.err_detail = "Conflicting flags."; return VPX_CODEC_INVALID_PARAM; } if (setjmp(cpi->common.error.jmp)) { cpi->common.error.setjmp = 0; res = update_error_state(ctx, &cpi->common.error); vpx_clear_system_state(); return res; } cpi->common.error.setjmp = 1; if (res == VPX_CODEC_OK) vp9_apply_encoding_flags(cpi, flags); // Handle fixed keyframe intervals if (ctx->cfg.kf_mode == VPX_KF_AUTO && ctx->cfg.kf_min_dist == ctx->cfg.kf_max_dist) { if (++ctx->fixed_kf_cntr > ctx->cfg.kf_min_dist) { flags |= VPX_EFLAG_FORCE_KF; ctx->fixed_kf_cntr = 1; } } if (res == VPX_CODEC_OK) { unsigned int lib_flags = 0; YV12_BUFFER_CONFIG sd; int64_t dst_time_stamp = timebase_units_to_ticks(timestamp_ratio, pts); int64_t dst_end_time_stamp = timebase_units_to_ticks(timestamp_ratio, pts + duration); size_t size, cx_data_sz; unsigned char *cx_data; cpi->svc.timebase_fac = timebase_units_to_ticks(timestamp_ratio, 1); cpi->svc.time_stamp_superframe = dst_time_stamp; // Set up internal flags if (ctx->base.init_flags & VPX_CODEC_USE_PSNR) cpi->b_calculate_psnr = 1; if (img != NULL) { res = image2yuvconfig(img, &sd); // Store the original flags in to the frame buffer. Will extract the // key frame flag when we actually encode this frame. if (vp9_receive_raw_frame(cpi, flags | ctx->next_frame_flags, &sd, dst_time_stamp, dst_end_time_stamp)) { res = update_error_state(ctx, &cpi->common.error); } ctx->next_frame_flags = 0; } cx_data = ctx->cx_data; cx_data_sz = ctx->cx_data_sz; /* Any pending invisible frames? */ if (ctx->pending_cx_data) { memmove(cx_data, ctx->pending_cx_data, ctx->pending_cx_data_sz); ctx->pending_cx_data = cx_data; cx_data += ctx->pending_cx_data_sz; cx_data_sz -= ctx->pending_cx_data_sz; /* TODO: this is a minimal check, the underlying codec doesn't respect * the buffer size anyway. */ if (cx_data_sz < ctx->cx_data_sz / 2) { vpx_internal_error(&cpi->common.error, VPX_CODEC_ERROR, "Compressed data buffer too small"); return VPX_CODEC_ERROR; } } if (cpi->oxcf.pass == 1 && !cpi->use_svc) { #if !CONFIG_REALTIME_ONLY // compute first pass stats if (img) { int ret; ENCODE_FRAME_RESULT encode_frame_result; vpx_codec_cx_pkt_t fps_pkt; // TODO(angiebird): Call vp9_first_pass directly ret = vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, &dst_time_stamp, &dst_end_time_stamp, !img, &encode_frame_result); assert(size == 0); // There is no compressed data in the first pass (void)ret; assert(ret == 0); fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.this_frame_stats); vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); } else { if (!cpi->twopass.first_pass_done) { vpx_codec_cx_pkt_t fps_pkt; vp9_end_first_pass(cpi); fps_pkt = get_first_pass_stats_pkt(&cpi->twopass.total_stats); vpx_codec_pkt_list_add(&ctx->pkt_list.head, &fps_pkt); } } #else // !CONFIG_REALTIME_ONLY assert(0); #endif // !CONFIG_REALTIME_ONLY } else { ENCODE_FRAME_RESULT encode_frame_result; while (cx_data_sz >= ctx->cx_data_sz / 2 && -1 != vp9_get_compressed_data(cpi, &lib_flags, &size, cx_data, &dst_time_stamp, &dst_end_time_stamp, !img, &encode_frame_result)) { // Pack psnr pkt if (size > 0 && !cpi->use_svc) { // TODO(angiebird): Figure out while we don't need psnr pkt when // use_svc is on PSNR_STATS psnr; if (vp9_get_psnr(cpi, &psnr)) { vpx_codec_cx_pkt_t psnr_pkt = get_psnr_pkt(&psnr); vpx_codec_pkt_list_add(&ctx->pkt_list.head, &psnr_pkt); } } if (size || (cpi->use_svc && cpi->svc.skip_enhancement_layer)) { // Pack invisible frames with the next visible frame if (!cpi->common.show_frame || (cpi->use_svc && cpi->svc.spatial_layer_id < cpi->svc.number_spatial_layers - 1)) { if (ctx->pending_cx_data == 0) ctx->pending_cx_data = cx_data; ctx->pending_cx_data_sz += size; if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; ctx->pending_frame_magnitude |= size; cx_data += size; cx_data_sz -= size; pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; if (ctx->output_cx_pkt_cb.output_cx_pkt) { pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + ctx->pts_offset; pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( timestamp_ratio, dst_end_time_stamp - dst_time_stamp); pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); pkt.data.frame.buf = ctx->pending_cx_data; pkt.data.frame.sz = size; ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; ctx->pending_frame_magnitude = 0; ctx->output_cx_pkt_cb.output_cx_pkt( &pkt, ctx->output_cx_pkt_cb.user_priv); } continue; } // Add the frame packet to the list of returned packets. pkt.kind = VPX_CODEC_CX_FRAME_PKT; pkt.data.frame.pts = ticks_to_timebase_units(timestamp_ratio, dst_time_stamp) + ctx->pts_offset; pkt.data.frame.duration = (unsigned long)ticks_to_timebase_units( timestamp_ratio, dst_end_time_stamp - dst_time_stamp); pkt.data.frame.flags = get_frame_pkt_flags(cpi, lib_flags); pkt.data.frame.width[cpi->svc.spatial_layer_id] = cpi->common.width; pkt.data.frame.height[cpi->svc.spatial_layer_id] = cpi->common.height; pkt.data.frame.spatial_layer_encoded[cpi->svc.spatial_layer_id] = 1 - cpi->svc.drop_spatial_layer[cpi->svc.spatial_layer_id]; if (ctx->pending_cx_data) { if (size) ctx->pending_frame_sizes[ctx->pending_frame_count++] = size; ctx->pending_frame_magnitude |= size; ctx->pending_cx_data_sz += size; // write the superframe only for the case when if (!ctx->output_cx_pkt_cb.output_cx_pkt) size += write_superframe_index(ctx); pkt.data.frame.buf = ctx->pending_cx_data; pkt.data.frame.sz = ctx->pending_cx_data_sz; ctx->pending_cx_data = NULL; ctx->pending_cx_data_sz = 0; ctx->pending_frame_count = 0; ctx->pending_frame_magnitude = 0; } else { pkt.data.frame.buf = cx_data; pkt.data.frame.sz = size; } pkt.data.frame.partition_id = -1; if (ctx->output_cx_pkt_cb.output_cx_pkt) ctx->output_cx_pkt_cb.output_cx_pkt( &pkt, ctx->output_cx_pkt_cb.user_priv); else vpx_codec_pkt_list_add(&ctx->pkt_list.head, &pkt); cx_data += size; cx_data_sz -= size; if (is_one_pass_cbr_svc(cpi) && (cpi->svc.spatial_layer_id == cpi->svc.number_spatial_layers - 1)) { // Encoded all spatial layers; exit loop. break; } } } } } cpi->common.error.setjmp = 0; return res; } static const vpx_codec_cx_pkt_t *encoder_get_cxdata(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { return vpx_codec_pkt_list_get(&ctx->pkt_list.head, iter); } static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); vp9_set_reference_enc(ctx->cpi, ref_frame_to_vp9_reframe(frame->frame_type), &sd); return VPX_CODEC_OK; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *const frame = va_arg(args, vpx_ref_frame_t *); if (frame != NULL) { YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); vp9_copy_reference_enc(ctx->cpi, ref_frame_to_vp9_reframe(frame->frame_type), &sd); return VPX_CODEC_OK; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vp9_ref_frame_t *const frame = va_arg(args, vp9_ref_frame_t *); if (frame != NULL) { const int fb_idx = ctx->cpi->common.cur_show_frame_fb_idx; YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->cpi->common, fb_idx); if (fb == NULL) return VPX_CODEC_ERROR; yuvconfig2image(&frame->img, fb, NULL); return VPX_CODEC_OK; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_previewpp(vpx_codec_alg_priv_t *ctx, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *config = va_arg(args, vp8_postproc_cfg_t *); if (config != NULL) { ctx->preview_ppcfg = *config; return VPX_CODEC_OK; } return VPX_CODEC_INVALID_PARAM; #else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; #endif } static vpx_image_t *encoder_get_preview(vpx_codec_alg_priv_t *ctx) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags; vp9_zero(flags); if (ctx->preview_ppcfg.post_proc_flag) { flags.post_proc_flag = ctx->preview_ppcfg.post_proc_flag; flags.deblocking_level = ctx->preview_ppcfg.deblocking_level; flags.noise_level = ctx->preview_ppcfg.noise_level; } if (vp9_get_preview_raw_frame(ctx->cpi, &sd, &flags) == 0) { yuvconfig2image(&ctx->preview_img, &sd, NULL); return &ctx->preview_img; } return NULL; } static vpx_codec_err_t ctrl_set_roi_map(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_roi_map_t *data = va_arg(args, vpx_roi_map_t *); if (data) { vpx_roi_map_t *roi = (vpx_roi_map_t *)data; if (!vp9_set_roi_map(ctx->cpi, roi->roi_map, roi->rows, roi->cols, roi->delta_q, roi->delta_lf, roi->skip, roi->ref_frame)) { return VPX_CODEC_OK; } return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_active_map(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); if (map) { if (!vp9_set_active_map(ctx->cpi, map->active_map, (int)map->rows, (int)map->cols)) return VPX_CODEC_OK; return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_active_map(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_active_map_t *const map = va_arg(args, vpx_active_map_t *); if (map) { if (!vp9_get_active_map(ctx->cpi, map->active_map, (int)map->rows, (int)map->cols)) return VPX_CODEC_OK; return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_scale_mode(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_scaling_mode_t *const mode = va_arg(args, vpx_scaling_mode_t *); if (mode) { const int res = vp9_set_internal_size(ctx->cpi, (VPX_SCALING)mode->h_scaling_mode, (VPX_SCALING)mode->v_scaling_mode); return (res == 0) ? VPX_CODEC_OK : VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_svc(vpx_codec_alg_priv_t *ctx, va_list args) { int data = va_arg(args, int); const vpx_codec_enc_cfg_t *cfg = &ctx->cfg; // Both one-pass and two-pass RC are supported now. // User setting this has to make sure of the following. // In two-pass setting: either (but not both) // cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 // In one-pass setting: // either or both cfg->ss_number_layers > 1, or cfg->ts_number_layers > 1 vp9_set_svc(ctx->cpi, data); if (data == 1 && (cfg->g_pass == VPX_RC_FIRST_PASS || cfg->g_pass == VPX_RC_LAST_PASS) && cfg->ss_number_layers > 1 && cfg->ts_number_layers > 1) { return VPX_CODEC_INVALID_PARAM; } vp9_set_row_mt(ctx->cpi); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_layer_id(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_svc_layer_id_t *const data = va_arg(args, vpx_svc_layer_id_t *); VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; int sl; svc->spatial_layer_to_encode = data->spatial_layer_id; svc->first_spatial_layer_to_encode = data->spatial_layer_id; // TODO(jianj): Deprecated to be removed. svc->temporal_layer_id = data->temporal_layer_id; // Allow for setting temporal layer per spatial layer for superframe. for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { svc->temporal_layer_id_per_spatial[sl] = data->temporal_layer_id_per_spatial[sl]; } // Checks on valid layer_id input. if (svc->temporal_layer_id < 0 || svc->temporal_layer_id >= (int)ctx->cfg.ts_number_layers) { return VPX_CODEC_INVALID_PARAM; } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_get_svc_layer_id(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_svc_layer_id_t *data = va_arg(args, vpx_svc_layer_id_t *); VP9_COMP *const cpi = (VP9_COMP *)ctx->cpi; SVC *const svc = &cpi->svc; data->spatial_layer_id = svc->spatial_layer_id; data->temporal_layer_id = svc->temporal_layer_id; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_parameters(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_extra_cfg_t *const params = va_arg(args, vpx_svc_extra_cfg_t *); int sl, tl; // Number of temporal layers and number of spatial layers have to be set // properly before calling this control function. for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { for (tl = 0; tl < cpi->svc.number_temporal_layers; ++tl) { const int layer = LAYER_IDS_TO_IDX(sl, tl, cpi->svc.number_temporal_layers); LAYER_CONTEXT *lc = &cpi->svc.layer_context[layer]; lc->max_q = params->max_quantizers[layer]; lc->min_q = params->min_quantizers[layer]; lc->scaling_factor_num = params->scaling_factor_num[sl]; lc->scaling_factor_den = params->scaling_factor_den[sl]; lc->speed = params->speed_per_layer[sl]; } } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_get_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); int sl; for (sl = 0; sl <= cpi->svc.spatial_layer_id; sl++) { data->update_buffer_slot[sl] = cpi->svc.update_buffer_slot[sl]; data->reference_last[sl] = cpi->svc.reference_last[sl]; data->reference_golden[sl] = cpi->svc.reference_golden[sl]; data->reference_alt_ref[sl] = cpi->svc.reference_altref[sl]; data->lst_fb_idx[sl] = cpi->svc.lst_fb_idx[sl]; data->gld_fb_idx[sl] = cpi->svc.gld_fb_idx[sl]; data->alt_fb_idx[sl] = cpi->svc.alt_fb_idx[sl]; // TODO(jianj): Remove these 3, deprecated. data->update_last[sl] = cpi->svc.update_last[sl]; data->update_golden[sl] = cpi->svc.update_golden[sl]; data->update_alt_ref[sl] = cpi->svc.update_altref[sl]; } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_ref_frame_config(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_ref_frame_config_t *data = va_arg(args, vpx_svc_ref_frame_config_t *); int sl; cpi->svc.use_set_ref_frame_config = 1; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) { cpi->svc.update_buffer_slot[sl] = data->update_buffer_slot[sl]; cpi->svc.reference_last[sl] = data->reference_last[sl]; cpi->svc.reference_golden[sl] = data->reference_golden[sl]; cpi->svc.reference_altref[sl] = data->reference_alt_ref[sl]; cpi->svc.lst_fb_idx[sl] = data->lst_fb_idx[sl]; cpi->svc.gld_fb_idx[sl] = data->gld_fb_idx[sl]; cpi->svc.alt_fb_idx[sl] = data->alt_fb_idx[sl]; cpi->svc.duration[sl] = data->duration[sl]; } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_inter_layer_pred(vpx_codec_alg_priv_t *ctx, va_list args) { const int data = va_arg(args, int); VP9_COMP *const cpi = ctx->cpi; cpi->svc.disable_inter_layer_pred = data; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_frame_drop_layer(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_frame_drop_t *data = va_arg(args, vpx_svc_frame_drop_t *); int sl; cpi->svc.framedrop_mode = data->framedrop_mode; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) cpi->svc.framedrop_thresh[sl] = data->framedrop_thresh[sl]; // Don't allow max_consec_drop values below 1. cpi->svc.max_consec_drop = VPXMAX(1, data->max_consec_drop); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_gf_temporal_ref(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; const unsigned int data = va_arg(args, unsigned int); cpi->svc.use_gf_temporal_ref = data; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_svc_spatial_layer_sync( vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; vpx_svc_spatial_layer_sync_t *data = va_arg(args, vpx_svc_spatial_layer_sync_t *); int sl; for (sl = 0; sl < cpi->svc.number_spatial_layers; ++sl) cpi->svc.spatial_layer_sync[sl] = data->spatial_layer_sync[sl]; cpi->svc.set_intra_only_frame = data->base_layer_intra_only; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_register_cx_callback(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_codec_priv_output_cx_pkt_cb_pair_t *cbp = (vpx_codec_priv_output_cx_pkt_cb_pair_t *)va_arg(args, void *); ctx->output_cx_pkt_cb.output_cx_pkt = cbp->output_cx_pkt; ctx->output_cx_pkt_cb.user_priv = cbp->user_priv; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_tune_content(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.content = CAST(VP9E_SET_TUNE_CONTENT, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_color_space(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.color_space = CAST(VP9E_SET_COLOR_SPACE, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_color_range(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; extra_cfg.color_range = CAST(VP9E_SET_COLOR_RANGE, args); return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_render_size(vpx_codec_alg_priv_t *ctx, va_list args) { struct vp9_extracfg extra_cfg = ctx->extra_cfg; int *const render_size = va_arg(args, int *); extra_cfg.render_width = render_size[0]; extra_cfg.render_height = render_size[1]; return update_extra_cfg(ctx, &extra_cfg); } static vpx_codec_err_t ctrl_set_postencode_drop(vpx_codec_alg_priv_t *ctx, va_list args) { VP9_COMP *const cpi = ctx->cpi; const unsigned int data = va_arg(args, unsigned int); cpi->rc.ext_use_post_encode_drop = data; return VPX_CODEC_OK; } static vpx_codec_ctrl_fn_map_t encoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, // Setters { VP8_SET_REFERENCE, ctrl_set_reference }, { VP8_SET_POSTPROC, ctrl_set_previewpp }, { VP9E_SET_ROI_MAP, ctrl_set_roi_map }, { VP8E_SET_ACTIVEMAP, ctrl_set_active_map }, { VP8E_SET_SCALEMODE, ctrl_set_scale_mode }, { VP8E_SET_CPUUSED, ctrl_set_cpuused }, { VP8E_SET_ENABLEAUTOALTREF, ctrl_set_enable_auto_alt_ref }, { VP8E_SET_SHARPNESS, ctrl_set_sharpness }, { VP8E_SET_STATIC_THRESHOLD, ctrl_set_static_thresh }, { VP9E_SET_TILE_COLUMNS, ctrl_set_tile_columns }, { VP9E_SET_TILE_ROWS, ctrl_set_tile_rows }, { VP9E_SET_TPL, ctrl_set_tpl_model }, { VP8E_SET_ARNR_MAXFRAMES, ctrl_set_arnr_max_frames }, { VP8E_SET_ARNR_STRENGTH, ctrl_set_arnr_strength }, { VP8E_SET_ARNR_TYPE, ctrl_set_arnr_type }, { VP8E_SET_TUNING, ctrl_set_tuning }, { VP8E_SET_CQ_LEVEL, ctrl_set_cq_level }, { VP8E_SET_MAX_INTRA_BITRATE_PCT, ctrl_set_rc_max_intra_bitrate_pct }, { VP9E_SET_MAX_INTER_BITRATE_PCT, ctrl_set_rc_max_inter_bitrate_pct }, { VP9E_SET_GF_CBR_BOOST_PCT, ctrl_set_rc_gf_cbr_boost_pct }, { VP9E_SET_LOSSLESS, ctrl_set_lossless }, { VP9E_SET_FRAME_PARALLEL_DECODING, ctrl_set_frame_parallel_decoding_mode }, { VP9E_SET_AQ_MODE, ctrl_set_aq_mode }, { VP9E_SET_ALT_REF_AQ, ctrl_set_alt_ref_aq }, { VP9E_SET_FRAME_PERIODIC_BOOST, ctrl_set_frame_periodic_boost }, { VP9E_SET_SVC, ctrl_set_svc }, { VP9E_SET_SVC_PARAMETERS, ctrl_set_svc_parameters }, { VP9E_REGISTER_CX_CALLBACK, ctrl_register_cx_callback }, { VP9E_SET_SVC_LAYER_ID, ctrl_set_svc_layer_id }, { VP9E_SET_TUNE_CONTENT, ctrl_set_tune_content }, { VP9E_SET_COLOR_SPACE, ctrl_set_color_space }, { VP9E_SET_COLOR_RANGE, ctrl_set_color_range }, { VP9E_SET_NOISE_SENSITIVITY, ctrl_set_noise_sensitivity }, { VP9E_SET_MIN_GF_INTERVAL, ctrl_set_min_gf_interval }, { VP9E_SET_MAX_GF_INTERVAL, ctrl_set_max_gf_interval }, { VP9E_SET_SVC_REF_FRAME_CONFIG, ctrl_set_svc_ref_frame_config }, { VP9E_SET_RENDER_SIZE, ctrl_set_render_size }, { VP9E_SET_TARGET_LEVEL, ctrl_set_target_level }, { VP9E_SET_ROW_MT, ctrl_set_row_mt }, { VP9E_SET_POSTENCODE_DROP, ctrl_set_postencode_drop }, { VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, ctrl_enable_motion_vector_unit_test }, { VP9E_SET_SVC_INTER_LAYER_PRED, ctrl_set_svc_inter_layer_pred }, { VP9E_SET_SVC_FRAME_DROP_LAYER, ctrl_set_svc_frame_drop_layer }, { VP9E_SET_SVC_GF_TEMPORAL_REF, ctrl_set_svc_gf_temporal_ref }, { VP9E_SET_SVC_SPATIAL_LAYER_SYNC, ctrl_set_svc_spatial_layer_sync }, // Getters { VP8E_GET_LAST_QUANTIZER, ctrl_get_quantizer }, { VP8E_GET_LAST_QUANTIZER_64, ctrl_get_quantizer64 }, { VP9_GET_REFERENCE, ctrl_get_reference }, { VP9E_GET_SVC_LAYER_ID, ctrl_get_svc_layer_id }, { VP9E_GET_ACTIVEMAP, ctrl_get_active_map }, { VP9E_GET_LEVEL, ctrl_get_level }, { VP9E_GET_SVC_REF_FRAME_CONFIG, ctrl_get_svc_ref_frame_config }, { -1, NULL }, }; static vpx_codec_enc_cfg_map_t encoder_usage_cfg_map[] = { { 0, { // NOLINT 0, // g_usage (unused) 8, // g_threads 0, // g_profile 320, // g_width 240, // g_height VPX_BITS_8, // g_bit_depth 8, // g_input_bit_depth { 1, 30 }, // g_timebase 0, // g_error_resilient VPX_RC_ONE_PASS, // g_pass 25, // g_lag_in_frames 0, // rc_dropframe_thresh 0, // rc_resize_allowed 0, // rc_scaled_width 0, // rc_scaled_height 60, // rc_resize_down_thresold 30, // rc_resize_up_thresold VPX_VBR, // rc_end_usage { NULL, 0 }, // rc_twopass_stats_in { NULL, 0 }, // rc_firstpass_mb_stats_in 256, // rc_target_bitrate 0, // rc_min_quantizer 63, // rc_max_quantizer 25, // rc_undershoot_pct 25, // rc_overshoot_pct 6000, // rc_max_buffer_size 4000, // rc_buffer_initial_size 5000, // rc_buffer_optimal_size 50, // rc_two_pass_vbrbias 0, // rc_two_pass_vbrmin_section 2000, // rc_two_pass_vbrmax_section 0, // rc_2pass_vbr_corpus_complexity (non 0 for corpus vbr) // keyframing settings (kf) VPX_KF_AUTO, // g_kfmode 0, // kf_min_dist 128, // kf_max_dist VPX_SS_DEFAULT_LAYERS, // ss_number_layers { 0 }, { 0 }, // ss_target_bitrate 1, // ts_number_layers { 0 }, // ts_target_bitrate { 0 }, // ts_rate_decimator 0, // ts_periodicity { 0 }, // ts_layer_id { 0 }, // layer_taget_bitrate 0 // temporal_layering_mode } }, }; #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp9_cx) = { "WebM Project VP9 Encoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, #if CONFIG_VP9_HIGHBITDEPTH VPX_CODEC_CAP_HIGHBITDEPTH | #endif VPX_CODEC_CAP_ENCODER | VPX_CODEC_CAP_PSNR, // vpx_codec_caps_t encoder_init, // vpx_codec_init_fn_t encoder_destroy, // vpx_codec_destroy_fn_t encoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t { // NOLINT NULL, // vpx_codec_peek_si_fn_t NULL, // vpx_codec_get_si_fn_t NULL, // vpx_codec_decode_fn_t NULL, // vpx_codec_frame_get_fn_t NULL // vpx_codec_set_fb_fn_t }, { // NOLINT 1, // 1 cfg map encoder_usage_cfg_map, // vpx_codec_enc_cfg_map_t encoder_encode, // vpx_codec_encode_fn_t encoder_get_cxdata, // vpx_codec_get_cx_data_fn_t encoder_set_config, // vpx_codec_enc_config_set_fn_t NULL, // vpx_codec_get_global_headers_fn_t encoder_get_preview, // vpx_codec_get_preview_frame_fn_t NULL // vpx_codec_enc_mr_get_mem_loc_fn_t } }; static vpx_codec_enc_cfg_t get_enc_cfg(int frame_width, int frame_height, vpx_rational_t frame_rate, int target_bitrate, vpx_enc_pass enc_pass) { vpx_codec_enc_cfg_t enc_cfg = encoder_usage_cfg_map[0].cfg; enc_cfg.g_w = frame_width; enc_cfg.g_h = frame_height; enc_cfg.rc_target_bitrate = target_bitrate; enc_cfg.g_pass = enc_pass; // g_timebase is the inverse of frame_rate enc_cfg.g_timebase.num = frame_rate.den; enc_cfg.g_timebase.den = frame_rate.num; return enc_cfg; } static vp9_extracfg get_extra_cfg() { vp9_extracfg extra_cfg = default_extra_cfg; return extra_cfg; } VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, vpx_rational_t frame_rate, int target_bitrate, vpx_enc_pass enc_pass) { /* This function will generate the same VP9EncoderConfig used by the * vpxenc command given below. * The configs in the vpxenc command corresponds to parameters of * vp9_get_encoder_config() as follows. * * WIDTH: frame_width * HEIGHT: frame_height * FPS: frame_rate * BITRATE: target_bitrate * * INPUT, OUTPUT, LIMIT will not affect VP9EncoderConfig * * vpxenc command: * INPUT=bus_cif.y4m * OUTPUT=output.webm * WIDTH=352 * HEIGHT=288 * BITRATE=600 * FPS=30/1 * LIMIT=150 * ./vpxenc --limit=$LIMIT --width=$WIDTH --height=$HEIGHT --fps=$FPS * --lag-in-frames=25 \ * --codec=vp9 --good --cpu-used=0 --threads=0 --profile=0 \ * --min-q=0 --max-q=63 --auto-alt-ref=1 --passes=2 --kf-max-dist=150 \ * --kf-min-dist=0 --drop-frame=0 --static-thresh=0 --bias-pct=50 \ * --minsection-pct=0 --maxsection-pct=150 --arnr-maxframes=7 --psnr \ * --arnr-strength=5 --sharpness=0 --undershoot-pct=100 --overshoot-pct=100 \ * --frame-parallel=0 --tile-columns=0 --cpu-used=0 --end-usage=vbr \ * --target-bitrate=$BITRATE -o $OUTPUT $INPUT */ VP9EncoderConfig oxcf; vp9_extracfg extra_cfg = get_extra_cfg(); vpx_codec_enc_cfg_t enc_cfg = get_enc_cfg( frame_width, frame_height, frame_rate, target_bitrate, enc_pass); set_encoder_config(&oxcf, &enc_cfg, &extra_cfg); // These settings are made to match the settings of the vpxenc command. oxcf.key_freq = 150; oxcf.under_shoot_pct = 100; oxcf.over_shoot_pct = 100; oxcf.max_threads = 0; oxcf.tile_columns = 0; oxcf.frame_parallel_decoding_mode = 0; oxcf.two_pass_vbrmax_section = 150; return oxcf; } #define DUMP_STRUCT_VALUE(struct, value) \ printf(#value " %" PRId64 "\n", (int64_t)(struct)->value) void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf) { DUMP_STRUCT_VALUE(oxcf, profile); DUMP_STRUCT_VALUE(oxcf, bit_depth); DUMP_STRUCT_VALUE(oxcf, width); DUMP_STRUCT_VALUE(oxcf, height); DUMP_STRUCT_VALUE(oxcf, input_bit_depth); DUMP_STRUCT_VALUE(oxcf, init_framerate); // TODO(angiebird): dump g_timebase // TODO(angiebird): dump g_timebase_in_ts DUMP_STRUCT_VALUE(oxcf, target_bandwidth); DUMP_STRUCT_VALUE(oxcf, noise_sensitivity); DUMP_STRUCT_VALUE(oxcf, sharpness); DUMP_STRUCT_VALUE(oxcf, speed); DUMP_STRUCT_VALUE(oxcf, rc_max_intra_bitrate_pct); DUMP_STRUCT_VALUE(oxcf, rc_max_inter_bitrate_pct); DUMP_STRUCT_VALUE(oxcf, gf_cbr_boost_pct); DUMP_STRUCT_VALUE(oxcf, mode); DUMP_STRUCT_VALUE(oxcf, pass); // Key Framing Operations DUMP_STRUCT_VALUE(oxcf, auto_key); DUMP_STRUCT_VALUE(oxcf, key_freq); DUMP_STRUCT_VALUE(oxcf, lag_in_frames); // ---------------------------------------------------------------- // DATARATE CONTROL OPTIONS // vbr, cbr, constrained quality or constant quality DUMP_STRUCT_VALUE(oxcf, rc_mode); // buffer targeting aggressiveness DUMP_STRUCT_VALUE(oxcf, under_shoot_pct); DUMP_STRUCT_VALUE(oxcf, over_shoot_pct); // buffering parameters // TODO(angiebird): dump tarting_buffer_level_ms // TODO(angiebird): dump ptimal_buffer_level_ms // TODO(angiebird): dump maximum_buffer_size_ms // Frame drop threshold. DUMP_STRUCT_VALUE(oxcf, drop_frames_water_mark); // controlling quality DUMP_STRUCT_VALUE(oxcf, fixed_q); DUMP_STRUCT_VALUE(oxcf, worst_allowed_q); DUMP_STRUCT_VALUE(oxcf, best_allowed_q); DUMP_STRUCT_VALUE(oxcf, cq_level); DUMP_STRUCT_VALUE(oxcf, aq_mode); // Special handling of Adaptive Quantization for AltRef frames DUMP_STRUCT_VALUE(oxcf, alt_ref_aq); // Internal frame size scaling. DUMP_STRUCT_VALUE(oxcf, resize_mode); DUMP_STRUCT_VALUE(oxcf, scaled_frame_width); DUMP_STRUCT_VALUE(oxcf, scaled_frame_height); // Enable feature to reduce the frame quantization every x frames. DUMP_STRUCT_VALUE(oxcf, frame_periodic_boost); // two pass datarate control DUMP_STRUCT_VALUE(oxcf, two_pass_vbrbias); DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmin_section); DUMP_STRUCT_VALUE(oxcf, two_pass_vbrmax_section); DUMP_STRUCT_VALUE(oxcf, vbr_corpus_complexity); // END DATARATE CONTROL OPTIONS // ---------------------------------------------------------------- // Spatial and temporal scalability. DUMP_STRUCT_VALUE(oxcf, ss_number_layers); DUMP_STRUCT_VALUE(oxcf, ts_number_layers); // Bitrate allocation for spatial layers. // TODO(angiebird): dump layer_target_bitrate[VPX_MAX_LAYERS] // TODO(angiebird): dump ss_target_bitrate[VPX_SS_MAX_LAYERS] // TODO(angiebird): dump ss_enable_auto_arf[VPX_SS_MAX_LAYERS] // TODO(angiebird): dump ts_rate_decimator[VPX_TS_MAX_LAYERS] DUMP_STRUCT_VALUE(oxcf, enable_auto_arf); DUMP_STRUCT_VALUE(oxcf, encode_breakout); DUMP_STRUCT_VALUE(oxcf, error_resilient_mode); DUMP_STRUCT_VALUE(oxcf, frame_parallel_decoding_mode); DUMP_STRUCT_VALUE(oxcf, arnr_max_frames); DUMP_STRUCT_VALUE(oxcf, arnr_strength); DUMP_STRUCT_VALUE(oxcf, min_gf_interval); DUMP_STRUCT_VALUE(oxcf, max_gf_interval); DUMP_STRUCT_VALUE(oxcf, tile_columns); DUMP_STRUCT_VALUE(oxcf, tile_rows); DUMP_STRUCT_VALUE(oxcf, enable_tpl_model); DUMP_STRUCT_VALUE(oxcf, max_threads); DUMP_STRUCT_VALUE(oxcf, target_level); // TODO(angiebird): dump two_pass_stats_in #if CONFIG_FP_MB_STATS // TODO(angiebird): dump firstpass_mb_stats_in #endif DUMP_STRUCT_VALUE(oxcf, tuning); DUMP_STRUCT_VALUE(oxcf, content); #if CONFIG_VP9_HIGHBITDEPTH DUMP_STRUCT_VALUE(oxcf, use_highbitdepth); #endif DUMP_STRUCT_VALUE(oxcf, color_space); DUMP_STRUCT_VALUE(oxcf, color_range); DUMP_STRUCT_VALUE(oxcf, render_width); DUMP_STRUCT_VALUE(oxcf, render_height); DUMP_STRUCT_VALUE(oxcf, temporal_layering_mode); DUMP_STRUCT_VALUE(oxcf, row_mt); DUMP_STRUCT_VALUE(oxcf, motion_vector_unit_test); } FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf) { FRAME_INFO frame_info; int dummy; frame_info.frame_width = oxcf->width; frame_info.frame_height = oxcf->height; frame_info.render_frame_width = oxcf->width; frame_info.render_frame_height = oxcf->height; frame_info.bit_depth = oxcf->bit_depth; vp9_set_mi_size(&frame_info.mi_rows, &frame_info.mi_cols, &dummy, frame_info.frame_width, frame_info.frame_height); vp9_set_mb_size(&frame_info.mb_rows, &frame_info.mb_cols, &frame_info.num_mbs, frame_info.mi_rows, frame_info.mi_cols); // TODO(angiebird): Figure out how to get subsampling_x/y here return frame_info; } void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, const vpx_fixed_buf_t *stats) { oxcf->two_pass_stats_in = *stats; } libvpx-1.8.2/vp9/vp9_cx_iface.h000066400000000000000000000030531357355204000162610ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_VP9_CX_IFACE_H_ #define VPX_VP9_VP9_CX_IFACE_H_ #include "vp9/encoder/vp9_encoder.h" #include "vp9/common/vp9_onyxc_int.h" #ifdef __cplusplus extern "C" { #endif VP9EncoderConfig vp9_get_encoder_config(int frame_width, int frame_height, vpx_rational_t frame_rate, int target_bitrate, vpx_enc_pass enc_pass); void vp9_dump_encoder_config(const VP9EncoderConfig *oxcf); FRAME_INFO vp9_get_frame_info(const VP9EncoderConfig *oxcf); static INLINE int64_t timebase_units_to_ticks(const vpx_rational64_t *timestamp_ratio, int64_t n) { return n * timestamp_ratio->num / timestamp_ratio->den; } static INLINE int64_t ticks_to_timebase_units(const vpx_rational64_t *timestamp_ratio, int64_t n) { int64_t round = timestamp_ratio->num / 2; if (round > 0) --round; return (n * timestamp_ratio->den + round) / timestamp_ratio->num; } void vp9_set_first_pass_stats(VP9EncoderConfig *oxcf, const vpx_fixed_buf_t *stats); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_VP9_CX_IFACE_H_ libvpx-1.8.2/vp9/vp9_dx_iface.c000066400000000000000000000565201357355204000162640ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_version.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx/vp8dx.h" #include "vpx/vpx_decoder.h" #include "vpx_dsp/bitreader_buffer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_util/vpx_thread.h" #include "vp9/common/vp9_alloccommon.h" #include "vp9/common/vp9_frame_buffers.h" #include "vp9/decoder/vp9_decodeframe.h" #include "vp9/vp9_dx_iface.h" #include "vp9/vp9_iface_common.h" #define VP9_CAP_POSTPROC (CONFIG_VP9_POSTPROC ? VPX_CODEC_CAP_POSTPROC : 0) static vpx_codec_err_t decoder_init(vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data) { // This function only allocates space for the vpx_codec_alg_priv_t // structure. More memory may be required at the time the stream // information becomes known. (void)data; if (!ctx->priv) { vpx_codec_alg_priv_t *const priv = (vpx_codec_alg_priv_t *)vpx_calloc(1, sizeof(*priv)); if (priv == NULL) return VPX_CODEC_MEM_ERROR; ctx->priv = (vpx_codec_priv_t *)priv; ctx->priv->init_flags = ctx->init_flags; priv->si.sz = sizeof(priv->si); priv->flushed = 0; if (ctx->config.dec) { priv->cfg = *ctx->config.dec; ctx->config.dec = &priv->cfg; } } return VPX_CODEC_OK; } static vpx_codec_err_t decoder_destroy(vpx_codec_alg_priv_t *ctx) { if (ctx->pbi != NULL) { vp9_decoder_remove(ctx->pbi); } if (ctx->buffer_pool) { vp9_free_ref_frame_buffers(ctx->buffer_pool); vp9_free_internal_frame_buffers(&ctx->buffer_pool->int_frame_buffers); } vpx_free(ctx->buffer_pool); vpx_free(ctx); return VPX_CODEC_OK; } static int parse_bitdepth_colorspace_sampling(BITSTREAM_PROFILE profile, struct vpx_read_bit_buffer *rb) { vpx_color_space_t color_space; if (profile >= PROFILE_2) rb->bit_offset += 1; // Bit-depth 10 or 12. color_space = (vpx_color_space_t)vpx_rb_read_literal(rb, 3); if (color_space != VPX_CS_SRGB) { rb->bit_offset += 1; // [16,235] (including xvycc) vs [0,255] range. if (profile == PROFILE_1 || profile == PROFILE_3) { rb->bit_offset += 2; // subsampling x/y. rb->bit_offset += 1; // unused. } } else { if (profile == PROFILE_1 || profile == PROFILE_3) { rb->bit_offset += 1; // unused } else { // RGB is only available in version 1. return 0; } } return 1; } static vpx_codec_err_t decoder_peek_si_internal( const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si, int *is_intra_only, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { int intra_only_flag = 0; uint8_t clear_buffer[11]; if (data + data_sz <= data) return VPX_CODEC_INVALID_PARAM; si->is_kf = 0; si->w = si->h = 0; if (decrypt_cb) { data_sz = VPXMIN(sizeof(clear_buffer), data_sz); decrypt_cb(decrypt_state, data, clear_buffer, data_sz); data = clear_buffer; } // A maximum of 6 bits are needed to read the frame marker, profile and // show_existing_frame. if (data_sz < 1) return VPX_CODEC_UNSUP_BITSTREAM; { int show_frame; int error_resilient; struct vpx_read_bit_buffer rb = { data, data + data_sz, 0, NULL, NULL }; const int frame_marker = vpx_rb_read_literal(&rb, 2); const BITSTREAM_PROFILE profile = vp9_read_profile(&rb); if (frame_marker != VP9_FRAME_MARKER) return VPX_CODEC_UNSUP_BITSTREAM; if (profile >= MAX_PROFILES) return VPX_CODEC_UNSUP_BITSTREAM; if (vpx_rb_read_bit(&rb)) { // show an existing frame // If profile is > 2 and show_existing_frame is true, then at least 1 more // byte (6+3=9 bits) is needed. if (profile > 2 && data_sz < 2) return VPX_CODEC_UNSUP_BITSTREAM; vpx_rb_read_literal(&rb, 3); // Frame buffer to show. return VPX_CODEC_OK; } // For the rest of the function, a maximum of 9 more bytes are needed // (computed by taking the maximum possible bits needed in each case). Note // that this has to be updated if we read any more bits in this function. if (data_sz < 10) return VPX_CODEC_UNSUP_BITSTREAM; si->is_kf = !vpx_rb_read_bit(&rb); show_frame = vpx_rb_read_bit(&rb); error_resilient = vpx_rb_read_bit(&rb); if (si->is_kf) { if (!vp9_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM; if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); } else { intra_only_flag = show_frame ? 0 : vpx_rb_read_bit(&rb); rb.bit_offset += error_resilient ? 0 : 2; // reset_frame_context if (intra_only_flag) { if (!vp9_read_sync_code(&rb)) return VPX_CODEC_UNSUP_BITSTREAM; if (profile > PROFILE_0) { if (!parse_bitdepth_colorspace_sampling(profile, &rb)) return VPX_CODEC_UNSUP_BITSTREAM; // The colorspace info may cause vp9_read_frame_size() to need 11 // bytes. if (data_sz < 11) return VPX_CODEC_UNSUP_BITSTREAM; } rb.bit_offset += REF_FRAMES; // refresh_frame_flags vp9_read_frame_size(&rb, (int *)&si->w, (int *)&si->h); } } } if (is_intra_only != NULL) *is_intra_only = intra_only_flag; return VPX_CODEC_OK; } static vpx_codec_err_t decoder_peek_si(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si) { return decoder_peek_si_internal(data, data_sz, si, NULL, NULL, NULL); } static vpx_codec_err_t decoder_get_si(vpx_codec_alg_priv_t *ctx, vpx_codec_stream_info_t *si) { const size_t sz = (si->sz >= sizeof(vp9_stream_info_t)) ? sizeof(vp9_stream_info_t) : sizeof(vpx_codec_stream_info_t); memcpy(si, &ctx->si, sz); si->sz = (unsigned int)sz; return VPX_CODEC_OK; } static void set_error_detail(vpx_codec_alg_priv_t *ctx, const char *const error) { ctx->base.err_detail = error; } static vpx_codec_err_t update_error_state( vpx_codec_alg_priv_t *ctx, const struct vpx_internal_error_info *error) { if (error->error_code) set_error_detail(ctx, error->has_detail ? error->detail : NULL); return error->error_code; } static void init_buffer_callbacks(vpx_codec_alg_priv_t *ctx) { VP9_COMMON *const cm = &ctx->pbi->common; BufferPool *const pool = cm->buffer_pool; cm->new_fb_idx = INVALID_IDX; cm->byte_alignment = ctx->byte_alignment; cm->skip_loop_filter = ctx->skip_loop_filter; if (ctx->get_ext_fb_cb != NULL && ctx->release_ext_fb_cb != NULL) { pool->get_fb_cb = ctx->get_ext_fb_cb; pool->release_fb_cb = ctx->release_ext_fb_cb; pool->cb_priv = ctx->ext_priv; } else { pool->get_fb_cb = vp9_get_frame_buffer; pool->release_fb_cb = vp9_release_frame_buffer; if (vp9_alloc_internal_frame_buffers(&pool->int_frame_buffers)) vpx_internal_error(&cm->error, VPX_CODEC_MEM_ERROR, "Failed to initialize internal frame buffers"); pool->cb_priv = &pool->int_frame_buffers; } } static void set_default_ppflags(vp8_postproc_cfg_t *cfg) { cfg->post_proc_flag = VP8_DEBLOCK | VP8_DEMACROBLOCK; cfg->deblocking_level = 4; cfg->noise_level = 0; } static void set_ppflags(const vpx_codec_alg_priv_t *ctx, vp9_ppflags_t *flags) { flags->post_proc_flag = ctx->postproc_cfg.post_proc_flag; flags->deblocking_level = ctx->postproc_cfg.deblocking_level; flags->noise_level = ctx->postproc_cfg.noise_level; } #undef ERROR #define ERROR(str) \ do { \ ctx->base.err_detail = str; \ return VPX_CODEC_INVALID_PARAM; \ } while (0) #define RANGE_CHECK(p, memb, lo, hi) \ do { \ if (!(((p)->memb == (lo) || (p)->memb > (lo)) && (p)->memb <= (hi))) \ ERROR(#memb " out of range [" #lo ".." #hi "]"); \ } while (0) static vpx_codec_err_t init_decoder(vpx_codec_alg_priv_t *ctx) { ctx->last_show_frame = -1; ctx->need_resync = 1; ctx->flushed = 0; ctx->buffer_pool = (BufferPool *)vpx_calloc(1, sizeof(BufferPool)); if (ctx->buffer_pool == NULL) return VPX_CODEC_MEM_ERROR; ctx->pbi = vp9_decoder_create(ctx->buffer_pool); if (ctx->pbi == NULL) { set_error_detail(ctx, "Failed to allocate decoder"); return VPX_CODEC_MEM_ERROR; } ctx->pbi->max_threads = ctx->cfg.threads; ctx->pbi->inv_tile_order = ctx->invert_tile_order; RANGE_CHECK(ctx, row_mt, 0, 1); ctx->pbi->row_mt = ctx->row_mt; RANGE_CHECK(ctx, lpf_opt, 0, 1); ctx->pbi->lpf_mt_opt = ctx->lpf_opt; // If postprocessing was enabled by the application and a // configuration has not been provided, default it. if (!ctx->postproc_cfg_set && (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC)) set_default_ppflags(&ctx->postproc_cfg); init_buffer_callbacks(ctx); return VPX_CODEC_OK; } static INLINE void check_resync(vpx_codec_alg_priv_t *const ctx, const VP9Decoder *const pbi) { // Clear resync flag if the decoder got a key frame or intra only frame. if (ctx->need_resync == 1 && pbi->need_resync == 0 && (pbi->common.intra_only || pbi->common.frame_type == KEY_FRAME)) ctx->need_resync = 0; } static vpx_codec_err_t decode_one(vpx_codec_alg_priv_t *ctx, const uint8_t **data, unsigned int data_sz, void *user_priv, int64_t deadline) { (void)deadline; // Determine the stream parameters. Note that we rely on peek_si to // validate that we have a buffer that does not wrap around the top // of the heap. if (!ctx->si.h) { int is_intra_only = 0; const vpx_codec_err_t res = decoder_peek_si_internal(*data, data_sz, &ctx->si, &is_intra_only, ctx->decrypt_cb, ctx->decrypt_state); if (res != VPX_CODEC_OK) return res; if (!ctx->si.is_kf && !is_intra_only) return VPX_CODEC_ERROR; } ctx->user_priv = user_priv; // Set these even if already initialized. The caller may have changed the // decrypt config between frames. ctx->pbi->decrypt_cb = ctx->decrypt_cb; ctx->pbi->decrypt_state = ctx->decrypt_state; if (vp9_receive_compressed_data(ctx->pbi, data_sz, data)) { ctx->pbi->cur_buf->buf.corrupted = 1; ctx->pbi->need_resync = 1; ctx->need_resync = 1; return update_error_state(ctx, &ctx->pbi->common.error); } check_resync(ctx, ctx->pbi); return VPX_CODEC_OK; } static vpx_codec_err_t decoder_decode(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { const uint8_t *data_start = data; const uint8_t *const data_end = data + data_sz; vpx_codec_err_t res; uint32_t frame_sizes[8]; int frame_count; if (data == NULL && data_sz == 0) { ctx->flushed = 1; return VPX_CODEC_OK; } // Reset flushed when receiving a valid frame. ctx->flushed = 0; // Initialize the decoder on the first frame. if (ctx->pbi == NULL) { const vpx_codec_err_t res = init_decoder(ctx); if (res != VPX_CODEC_OK) return res; } res = vp9_parse_superframe_index(data, data_sz, frame_sizes, &frame_count, ctx->decrypt_cb, ctx->decrypt_state); if (res != VPX_CODEC_OK) return res; if (ctx->svc_decoding && ctx->svc_spatial_layer < frame_count - 1) frame_count = ctx->svc_spatial_layer + 1; // Decode in serial mode. if (frame_count > 0) { int i; for (i = 0; i < frame_count; ++i) { const uint8_t *data_start_copy = data_start; const uint32_t frame_size = frame_sizes[i]; vpx_codec_err_t res; if (data_start < data || frame_size > (uint32_t)(data_end - data_start)) { set_error_detail(ctx, "Invalid frame size in index"); return VPX_CODEC_CORRUPT_FRAME; } res = decode_one(ctx, &data_start_copy, frame_size, user_priv, deadline); if (res != VPX_CODEC_OK) return res; data_start += frame_size; } } else { while (data_start < data_end) { const uint32_t frame_size = (uint32_t)(data_end - data_start); const vpx_codec_err_t res = decode_one(ctx, &data_start, frame_size, user_priv, deadline); if (res != VPX_CODEC_OK) return res; // Account for suboptimal termination by the encoder. while (data_start < data_end) { const uint8_t marker = read_marker(ctx->decrypt_cb, ctx->decrypt_state, data_start); if (marker) break; ++data_start; } } } return res; } static vpx_image_t *decoder_get_frame(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img = NULL; // Legacy parameter carried over from VP8. Has no effect for VP9 since we // always return only 1 frame per decode call. (void)iter; if (ctx->pbi != NULL) { YV12_BUFFER_CONFIG sd; vp9_ppflags_t flags = { 0, 0, 0 }; if (ctx->base.init_flags & VPX_CODEC_USE_POSTPROC) set_ppflags(ctx, &flags); if (vp9_get_raw_frame(ctx->pbi, &sd, &flags) == 0) { VP9_COMMON *const cm = &ctx->pbi->common; RefCntBuffer *const frame_bufs = cm->buffer_pool->frame_bufs; ctx->last_show_frame = ctx->pbi->common.new_fb_idx; if (ctx->need_resync) return NULL; yuvconfig2image(&ctx->img, &sd, ctx->user_priv); ctx->img.fb_priv = frame_bufs[cm->new_fb_idx].raw_frame_buffer.priv; img = &ctx->img; return img; } } return NULL; } static vpx_codec_err_t decoder_set_fb_fn( vpx_codec_alg_priv_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { if (cb_get == NULL || cb_release == NULL) { return VPX_CODEC_INVALID_PARAM; } else if (ctx->pbi == NULL) { // If the decoder has already been initialized, do not accept changes to // the frame buffer functions. ctx->get_ext_fb_cb = cb_get; ctx->release_ext_fb_cb = cb_release; ctx->ext_priv = cb_priv; return VPX_CODEC_OK; } return VPX_CODEC_ERROR; } static vpx_codec_err_t ctrl_set_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *const data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *const frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); return vp9_set_reference_dec( &ctx->pbi->common, ref_frame_to_vp9_reframe(frame->frame_type), &sd); } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t ctrl_copy_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_ref_frame_t *data = va_arg(args, vpx_ref_frame_t *); if (data) { vpx_ref_frame_t *frame = (vpx_ref_frame_t *)data; YV12_BUFFER_CONFIG sd; image2yuvconfig(&frame->img, &sd); return vp9_copy_reference_dec(ctx->pbi, (VP9_REFFRAME)frame->frame_type, &sd); } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t ctrl_get_reference(vpx_codec_alg_priv_t *ctx, va_list args) { vp9_ref_frame_t *data = va_arg(args, vp9_ref_frame_t *); if (data) { const int fb_idx = ctx->pbi->common.cur_show_frame_fb_idx; YV12_BUFFER_CONFIG *fb = get_buf_frame(&ctx->pbi->common, fb_idx); if (fb == NULL) return VPX_CODEC_ERROR; yuvconfig2image(&data->img, fb, NULL); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } } static vpx_codec_err_t ctrl_set_postproc(vpx_codec_alg_priv_t *ctx, va_list args) { #if CONFIG_VP9_POSTPROC vp8_postproc_cfg_t *data = va_arg(args, vp8_postproc_cfg_t *); if (data) { ctx->postproc_cfg_set = 1; ctx->postproc_cfg = *((vp8_postproc_cfg_t *)data); return VPX_CODEC_OK; } else { return VPX_CODEC_INVALID_PARAM; } #else (void)ctx; (void)args; return VPX_CODEC_INCAPABLE; #endif } static vpx_codec_err_t ctrl_get_quantizer(vpx_codec_alg_priv_t *ctx, va_list args) { int *const arg = va_arg(args, int *); if (arg == NULL || ctx->pbi == NULL) return VPX_CODEC_INVALID_PARAM; *arg = ctx->pbi->common.base_qindex; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_get_last_ref_updates(vpx_codec_alg_priv_t *ctx, va_list args) { int *const update_info = va_arg(args, int *); if (update_info) { if (ctx->pbi != NULL) { *update_info = ctx->pbi->refresh_frame_flags; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; } } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_frame_corrupted(vpx_codec_alg_priv_t *ctx, va_list args) { int *corrupted = va_arg(args, int *); if (corrupted) { if (ctx->pbi != NULL) { RefCntBuffer *const frame_bufs = ctx->pbi->common.buffer_pool->frame_bufs; if (ctx->pbi->common.frame_to_show == NULL) return VPX_CODEC_ERROR; if (ctx->last_show_frame >= 0) *corrupted = frame_bufs[ctx->last_show_frame].buf.corrupted; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; } } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_frame_size(vpx_codec_alg_priv_t *ctx, va_list args) { int *const frame_size = va_arg(args, int *); if (frame_size) { if (ctx->pbi != NULL) { const VP9_COMMON *const cm = &ctx->pbi->common; frame_size[0] = cm->width; frame_size[1] = cm->height; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; } } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_render_size(vpx_codec_alg_priv_t *ctx, va_list args) { int *const render_size = va_arg(args, int *); if (render_size) { if (ctx->pbi != NULL) { const VP9_COMMON *const cm = &ctx->pbi->common; render_size[0] = cm->render_width; render_size[1] = cm->render_height; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; } } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_get_bit_depth(vpx_codec_alg_priv_t *ctx, va_list args) { unsigned int *const bit_depth = va_arg(args, unsigned int *); if (bit_depth) { if (ctx->pbi != NULL) { const VP9_COMMON *const cm = &ctx->pbi->common; *bit_depth = cm->bit_depth; return VPX_CODEC_OK; } else { return VPX_CODEC_ERROR; } } return VPX_CODEC_INVALID_PARAM; } static vpx_codec_err_t ctrl_set_invert_tile_order(vpx_codec_alg_priv_t *ctx, va_list args) { ctx->invert_tile_order = va_arg(args, int); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_decryptor(vpx_codec_alg_priv_t *ctx, va_list args) { vpx_decrypt_init *init = va_arg(args, vpx_decrypt_init *); ctx->decrypt_cb = init ? init->decrypt_cb : NULL; ctx->decrypt_state = init ? init->decrypt_state : NULL; return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_byte_alignment(vpx_codec_alg_priv_t *ctx, va_list args) { const int legacy_byte_alignment = 0; const int min_byte_alignment = 32; const int max_byte_alignment = 1024; const int byte_alignment = va_arg(args, int); if (byte_alignment != legacy_byte_alignment && (byte_alignment < min_byte_alignment || byte_alignment > max_byte_alignment || (byte_alignment & (byte_alignment - 1)) != 0)) return VPX_CODEC_INVALID_PARAM; ctx->byte_alignment = byte_alignment; if (ctx->pbi != NULL) { ctx->pbi->common.byte_alignment = byte_alignment; } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_skip_loop_filter(vpx_codec_alg_priv_t *ctx, va_list args) { ctx->skip_loop_filter = va_arg(args, int); if (ctx->pbi != NULL) { ctx->pbi->common.skip_loop_filter = ctx->skip_loop_filter; } return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_spatial_layer_svc(vpx_codec_alg_priv_t *ctx, va_list args) { ctx->svc_decoding = 1; ctx->svc_spatial_layer = va_arg(args, int); if (ctx->svc_spatial_layer < 0) return VPX_CODEC_INVALID_PARAM; else return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_set_row_mt(vpx_codec_alg_priv_t *ctx, va_list args) { ctx->row_mt = va_arg(args, int); return VPX_CODEC_OK; } static vpx_codec_err_t ctrl_enable_lpf_opt(vpx_codec_alg_priv_t *ctx, va_list args) { ctx->lpf_opt = va_arg(args, int); return VPX_CODEC_OK; } static vpx_codec_ctrl_fn_map_t decoder_ctrl_maps[] = { { VP8_COPY_REFERENCE, ctrl_copy_reference }, // Setters { VP8_SET_REFERENCE, ctrl_set_reference }, { VP8_SET_POSTPROC, ctrl_set_postproc }, { VP9_INVERT_TILE_DECODE_ORDER, ctrl_set_invert_tile_order }, { VPXD_SET_DECRYPTOR, ctrl_set_decryptor }, { VP9_SET_BYTE_ALIGNMENT, ctrl_set_byte_alignment }, { VP9_SET_SKIP_LOOP_FILTER, ctrl_set_skip_loop_filter }, { VP9_DECODE_SVC_SPATIAL_LAYER, ctrl_set_spatial_layer_svc }, { VP9D_SET_ROW_MT, ctrl_set_row_mt }, { VP9D_SET_LOOP_FILTER_OPT, ctrl_enable_lpf_opt }, // Getters { VPXD_GET_LAST_QUANTIZER, ctrl_get_quantizer }, { VP8D_GET_LAST_REF_UPDATES, ctrl_get_last_ref_updates }, { VP8D_GET_FRAME_CORRUPTED, ctrl_get_frame_corrupted }, { VP9_GET_REFERENCE, ctrl_get_reference }, { VP9D_GET_DISPLAY_SIZE, ctrl_get_render_size }, { VP9D_GET_BIT_DEPTH, ctrl_get_bit_depth }, { VP9D_GET_FRAME_SIZE, ctrl_get_frame_size }, { -1, NULL }, }; #ifndef VERSION_STRING #define VERSION_STRING #endif CODEC_INTERFACE(vpx_codec_vp9_dx) = { "WebM Project VP9 Decoder" VERSION_STRING, VPX_CODEC_INTERNAL_ABI_VERSION, #if CONFIG_VP9_HIGHBITDEPTH VPX_CODEC_CAP_HIGHBITDEPTH | #endif VPX_CODEC_CAP_DECODER | VP9_CAP_POSTPROC | VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER, // vpx_codec_caps_t decoder_init, // vpx_codec_init_fn_t decoder_destroy, // vpx_codec_destroy_fn_t decoder_ctrl_maps, // vpx_codec_ctrl_fn_map_t { // NOLINT decoder_peek_si, // vpx_codec_peek_si_fn_t decoder_get_si, // vpx_codec_get_si_fn_t decoder_decode, // vpx_codec_decode_fn_t decoder_get_frame, // vpx_codec_frame_get_fn_t decoder_set_fb_fn, // vpx_codec_set_fb_fn_t }, { // NOLINT 0, NULL, // vpx_codec_enc_cfg_map_t NULL, // vpx_codec_encode_fn_t NULL, // vpx_codec_get_cx_data_fn_t NULL, // vpx_codec_enc_config_set_fn_t NULL, // vpx_codec_get_global_headers_fn_t NULL, // vpx_codec_get_preview_frame_fn_t NULL // vpx_codec_enc_mr_get_mem_loc_fn_t } }; libvpx-1.8.2/vp9/vp9_dx_iface.h000066400000000000000000000030111357355204000162540ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_VP9_DX_IFACE_H_ #define VPX_VP9_VP9_DX_IFACE_H_ #include "vp9/decoder/vp9_decoder.h" typedef vpx_codec_stream_info_t vp9_stream_info_t; struct vpx_codec_alg_priv { vpx_codec_priv_t base; vpx_codec_dec_cfg_t cfg; vp9_stream_info_t si; VP9Decoder *pbi; void *user_priv; int postproc_cfg_set; vp8_postproc_cfg_t postproc_cfg; vpx_decrypt_cb decrypt_cb; void *decrypt_state; vpx_image_t img; int img_avail; int flushed; int invert_tile_order; int last_show_frame; // Index of last output frame. int byte_alignment; int skip_loop_filter; int need_resync; // wait for key/intra-only frame // BufferPool that holds all reference frames. BufferPool *buffer_pool; // External frame buffer info to save for VP9 common. void *ext_priv; // Private data associated with the external frame buffers. vpx_get_frame_buffer_cb_fn_t get_ext_fb_cb; vpx_release_frame_buffer_cb_fn_t release_ext_fb_cb; // Allow for decoding up to a given spatial layer for SVC stream. int svc_decoding; int svc_spatial_layer; int row_mt; int lpf_opt; }; #endif // VPX_VP9_VP9_DX_IFACE_H_ libvpx-1.8.2/vp9/vp9_iface_common.c000066400000000000000000000114411357355204000171320ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file in the root of the source tree. An additional * intellectual property rights grant can be found in the file PATENTS. * All contributing project authors may be found in the AUTHORS file in * the root of the source tree. */ #include "vp9/vp9_iface_common.h" void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, void *user_priv) { /** vpx_img_wrap() doesn't allow specifying independent strides for * the Y, U, and V planes, nor other alignment adjustments that * might be representable by a YV12_BUFFER_CONFIG, so we just * initialize all the fields.*/ int bps; if (!yv12->subsampling_y) { if (!yv12->subsampling_x) { img->fmt = VPX_IMG_FMT_I444; bps = 24; } else { img->fmt = VPX_IMG_FMT_I422; bps = 16; } } else { if (!yv12->subsampling_x) { img->fmt = VPX_IMG_FMT_I440; bps = 16; } else { img->fmt = VPX_IMG_FMT_I420; bps = 12; } } img->cs = yv12->color_space; img->range = yv12->color_range; img->bit_depth = 8; img->w = yv12->y_stride; img->h = ALIGN_POWER_OF_TWO(yv12->y_height + 2 * VP9_ENC_BORDER_IN_PIXELS, 3); img->d_w = yv12->y_crop_width; img->d_h = yv12->y_crop_height; img->r_w = yv12->render_width; img->r_h = yv12->render_height; img->x_chroma_shift = yv12->subsampling_x; img->y_chroma_shift = yv12->subsampling_y; img->planes[VPX_PLANE_Y] = yv12->y_buffer; img->planes[VPX_PLANE_U] = yv12->u_buffer; img->planes[VPX_PLANE_V] = yv12->v_buffer; img->planes[VPX_PLANE_ALPHA] = NULL; img->stride[VPX_PLANE_Y] = yv12->y_stride; img->stride[VPX_PLANE_U] = yv12->uv_stride; img->stride[VPX_PLANE_V] = yv12->uv_stride; img->stride[VPX_PLANE_ALPHA] = yv12->y_stride; #if CONFIG_VP9_HIGHBITDEPTH if (yv12->flags & YV12_FLAG_HIGHBITDEPTH) { // vpx_image_t uses byte strides and a pointer to the first byte // of the image. img->fmt = (vpx_img_fmt_t)(img->fmt | VPX_IMG_FMT_HIGHBITDEPTH); img->bit_depth = yv12->bit_depth; img->planes[VPX_PLANE_Y] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->y_buffer); img->planes[VPX_PLANE_U] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->u_buffer); img->planes[VPX_PLANE_V] = (uint8_t *)CONVERT_TO_SHORTPTR(yv12->v_buffer); img->planes[VPX_PLANE_ALPHA] = NULL; img->stride[VPX_PLANE_Y] = 2 * yv12->y_stride; img->stride[VPX_PLANE_U] = 2 * yv12->uv_stride; img->stride[VPX_PLANE_V] = 2 * yv12->uv_stride; img->stride[VPX_PLANE_ALPHA] = 2 * yv12->y_stride; } #endif // CONFIG_VP9_HIGHBITDEPTH img->bps = bps; img->user_priv = user_priv; img->img_data = yv12->buffer_alloc; img->img_data_owner = 0; img->self_allocd = 0; } vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, YV12_BUFFER_CONFIG *yv12) { yv12->y_buffer = img->planes[VPX_PLANE_Y]; yv12->u_buffer = img->planes[VPX_PLANE_U]; yv12->v_buffer = img->planes[VPX_PLANE_V]; yv12->y_crop_width = img->d_w; yv12->y_crop_height = img->d_h; yv12->render_width = img->r_w; yv12->render_height = img->r_h; yv12->y_width = img->d_w; yv12->y_height = img->d_h; yv12->uv_width = img->x_chroma_shift == 1 ? (1 + yv12->y_width) / 2 : yv12->y_width; yv12->uv_height = img->y_chroma_shift == 1 ? (1 + yv12->y_height) / 2 : yv12->y_height; yv12->uv_crop_width = yv12->uv_width; yv12->uv_crop_height = yv12->uv_height; yv12->y_stride = img->stride[VPX_PLANE_Y]; yv12->uv_stride = img->stride[VPX_PLANE_U]; yv12->color_space = img->cs; yv12->color_range = img->range; #if CONFIG_VP9_HIGHBITDEPTH if (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) { // In vpx_image_t // planes point to uint8 address of start of data // stride counts uint8s to reach next row // In YV12_BUFFER_CONFIG // y_buffer, u_buffer, v_buffer point to uint16 address of data // stride and border counts in uint16s // This means that all the address calculations in the main body of code // should work correctly. // However, before we do any pixel operations we need to cast the address // to a uint16 ponter and double its value. yv12->y_buffer = CONVERT_TO_BYTEPTR(yv12->y_buffer); yv12->u_buffer = CONVERT_TO_BYTEPTR(yv12->u_buffer); yv12->v_buffer = CONVERT_TO_BYTEPTR(yv12->v_buffer); yv12->y_stride >>= 1; yv12->uv_stride >>= 1; yv12->flags = YV12_FLAG_HIGHBITDEPTH; } else { yv12->flags = 0; } yv12->border = (yv12->y_stride - img->w) / 2; #else yv12->border = (img->stride[VPX_PLANE_Y] - img->w) / 2; #endif // CONFIG_VP9_HIGHBITDEPTH yv12->subsampling_x = img->x_chroma_shift; yv12->subsampling_y = img->y_chroma_shift; return VPX_CODEC_OK; } libvpx-1.8.2/vp9/vp9_iface_common.h000066400000000000000000000024011357355204000171330ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VP9_VP9_IFACE_COMMON_H_ #define VPX_VP9_VP9_IFACE_COMMON_H_ #include #include "vpx_ports/mem.h" #include "vpx/vp8.h" #include "vpx_scale/yv12config.h" #include "common/vp9_enums.h" #ifdef __cplusplus extern "C" { #endif void yuvconfig2image(vpx_image_t *img, const YV12_BUFFER_CONFIG *yv12, void *user_priv); vpx_codec_err_t image2yuvconfig(const vpx_image_t *img, YV12_BUFFER_CONFIG *yv12); static INLINE VP9_REFFRAME ref_frame_to_vp9_reframe(vpx_ref_frame_type_t frame) { switch (frame) { case VP8_LAST_FRAME: return VP9_LAST_FLAG; case VP8_GOLD_FRAME: return VP9_GOLD_FLAG; case VP8_ALTR_FRAME: return VP9_ALT_FLAG; } assert(0 && "Invalid Reference Frame"); return VP9_LAST_FLAG; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VP9_VP9_IFACE_COMMON_H_ libvpx-1.8.2/vp9/vp9cx.mk000066400000000000000000000166311357355204000151610ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP9_CX_EXPORTS += exports_enc VP9_CX_SRCS-yes += $(VP9_COMMON_SRCS-yes) VP9_CX_SRCS-no += $(VP9_COMMON_SRCS-no) VP9_CX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_CX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_CX_SRCS-yes += vp9_cx_iface.c VP9_CX_SRCS-yes += vp9_cx_iface.h VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.cc VP9_CX_SRCS-$(CONFIG_RATE_CTRL) += simple_encode.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.c VP9_CX_SRCS-yes += encoder/vp9_context_tree.h VP9_CX_SRCS-yes += encoder/vp9_cost.h VP9_CX_SRCS-yes += encoder/vp9_cost.c VP9_CX_SRCS-yes += encoder/vp9_dct.c VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.c VP9_CX_SRCS-$(CONFIG_VP9_TEMPORAL_DENOISING) += encoder/vp9_denoiser.h VP9_CX_SRCS-yes += encoder/vp9_encodeframe.c VP9_CX_SRCS-yes += encoder/vp9_encodeframe.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.c VP9_CX_SRCS-yes += encoder/vp9_encodemv.c VP9_CX_SRCS-yes += encoder/vp9_ethread.h VP9_CX_SRCS-yes += encoder/vp9_ethread.c VP9_CX_SRCS-yes += encoder/vp9_extend.c VP9_CX_SRCS-yes += encoder/vp9_firstpass.c VP9_CX_SRCS-yes += encoder/vp9_block.h VP9_CX_SRCS-yes += encoder/vp9_bitstream.h VP9_CX_SRCS-yes += encoder/vp9_encodemb.h VP9_CX_SRCS-yes += encoder/vp9_encodemv.h VP9_CX_SRCS-yes += encoder/vp9_extend.h VP9_CX_SRCS-yes += encoder/vp9_firstpass.h VP9_CX_SRCS-yes += encoder/vp9_frame_scale.c VP9_CX_SRCS-yes += encoder/vp9_job_queue.h VP9_CX_SRCS-yes += encoder/vp9_lookahead.c VP9_CX_SRCS-yes += encoder/vp9_lookahead.h VP9_CX_SRCS-yes += encoder/vp9_mcomp.h VP9_CX_SRCS-yes += encoder/vp9_multi_thread.c VP9_CX_SRCS-yes += encoder/vp9_multi_thread.h VP9_CX_SRCS-yes += encoder/vp9_encoder.h VP9_CX_SRCS-yes += encoder/vp9_quantize.h VP9_CX_SRCS-yes += encoder/vp9_ratectrl.h VP9_CX_SRCS-yes += encoder/vp9_rd.h VP9_CX_SRCS-yes += encoder/vp9_rdopt.h VP9_CX_SRCS-yes += encoder/vp9_pickmode.h VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.h VP9_CX_SRCS-yes += encoder/vp9_treewriter.h VP9_CX_SRCS-yes += encoder/vp9_mcomp.c VP9_CX_SRCS-yes += encoder/vp9_encoder.c VP9_CX_SRCS-yes += encoder/vp9_picklpf.c VP9_CX_SRCS-yes += encoder/vp9_picklpf.h VP9_CX_SRCS-yes += encoder/vp9_quantize.c VP9_CX_SRCS-yes += encoder/vp9_ratectrl.c VP9_CX_SRCS-yes += encoder/vp9_rd.c VP9_CX_SRCS-yes += encoder/vp9_rdopt.c VP9_CX_SRCS-yes += encoder/vp9_pickmode.c VP9_CX_SRCS-yes += encoder/vp9_partition_models.h VP9_CX_SRCS-yes += encoder/vp9_segmentation.c VP9_CX_SRCS-yes += encoder/vp9_segmentation.h VP9_CX_SRCS-yes += encoder/vp9_speed_features.c VP9_CX_SRCS-yes += encoder/vp9_speed_features.h VP9_CX_SRCS-yes += encoder/vp9_subexp.c VP9_CX_SRCS-yes += encoder/vp9_subexp.h VP9_CX_SRCS-yes += encoder/vp9_svc_layercontext.c VP9_CX_SRCS-yes += encoder/vp9_resize.c VP9_CX_SRCS-yes += encoder/vp9_resize.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.c VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += encoder/vp9_blockiness.h VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.c VP9_CX_SRCS-$(CONFIG_NON_GREEDY_MV) += encoder/vp9_non_greedy_mv.h VP9_CX_SRCS-yes += encoder/vp9_tokenize.c VP9_CX_SRCS-yes += encoder/vp9_treewriter.c VP9_CX_SRCS-yes += encoder/vp9_aq_variance.c VP9_CX_SRCS-yes += encoder/vp9_aq_variance.h VP9_CX_SRCS-yes += encoder/vp9_aq_360.c VP9_CX_SRCS-yes += encoder/vp9_aq_360.h VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.c VP9_CX_SRCS-yes += encoder/vp9_aq_cyclicrefresh.h VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.c VP9_CX_SRCS-yes += encoder/vp9_aq_complexity.h VP9_CX_SRCS-yes += encoder/vp9_alt_ref_aq.h VP9_CX_SRCS-yes += encoder/vp9_alt_ref_aq.c VP9_CX_SRCS-yes += encoder/vp9_skin_detection.c VP9_CX_SRCS-yes += encoder/vp9_skin_detection.h VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.c VP9_CX_SRCS-yes += encoder/vp9_noise_estimate.h ifeq ($(CONFIG_VP9_POSTPROC),yes) VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.h VP9_CX_SRCS-$(CONFIG_INTERNAL_STATS) += common/vp9_postproc.c endif VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.c VP9_CX_SRCS-yes += encoder/vp9_temporal_filter.h VP9_CX_SRCS-yes += encoder/vp9_mbgraph.c VP9_CX_SRCS-yes += encoder/vp9_mbgraph.h VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_sse4.c VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/temporal_filter_constants.h VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_quantize_sse2.c VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_quantize_avx2.c VP9_CX_SRCS-$(HAVE_AVX) += encoder/x86/vp9_diamond_search_sad_avx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_highbd_block_error_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSE4_1) += encoder/x86/highbd_temporal_filter_sse4.c endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_sse2.asm VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_error_sse2.asm ifeq ($(VPX_ARCH_X86_64),yes) VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_quantize_ssse3_x86_64.asm endif VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_dct_intrin_sse2.c VP9_CX_SRCS-$(HAVE_SSSE3) += encoder/x86/vp9_frame_scale_ssse3.c ifeq ($(CONFIG_VP9_TEMPORAL_DENOISING),yes) VP9_CX_SRCS-$(HAVE_SSE2) += encoder/x86/vp9_denoiser_sse2.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_denoiser_neon.c endif VP9_CX_SRCS-$(HAVE_AVX2) += encoder/x86/vp9_error_avx2.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_error_neon.c endif VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_frame_scale_neon.c VP9_CX_SRCS-$(HAVE_NEON) += encoder/arm/neon/vp9_quantize_neon.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_error_msa.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct4x4_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct8x8_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct16x16_msa.c VP9_CX_SRCS-$(HAVE_MSA) += encoder/mips/msa/vp9_fdct_msa.h endif # !CONFIG_VP9_HIGHBITDEPTH VP9_CX_SRCS-$(HAVE_VSX) += encoder/ppc/vp9_quantize_vsx.c # Strip unnecessary files with CONFIG_REALTIME_ONLY VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_firstpass.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_mbgraph.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_temporal_filter.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_sse4.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/temporal_filter_constants.h VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/x86/highbd_temporal_filter_sse4.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.h VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_alt_ref_aq.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_variance.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_variance.h VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_360.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_360.h VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_complexity.c VP9_CX_SRCS_REMOVE-$(CONFIG_REALTIME_ONLY) += encoder/vp9_aq_complexity.h VP9_CX_SRCS-yes := $(filter-out $(VP9_CX_SRCS_REMOVE-yes),$(VP9_CX_SRCS-yes)) libvpx-1.8.2/vp9/vp9dx.mk000066400000000000000000000024211357355204000151520ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## VP9_DX_EXPORTS += exports_dec VP9_DX_SRCS-yes += $(VP9_COMMON_SRCS-yes) VP9_DX_SRCS-no += $(VP9_COMMON_SRCS-no) VP9_DX_SRCS_REMOVE-yes += $(VP9_COMMON_SRCS_REMOVE-yes) VP9_DX_SRCS_REMOVE-no += $(VP9_COMMON_SRCS_REMOVE-no) VP9_DX_SRCS-yes += vp9_dx_iface.c VP9_DX_SRCS-yes += vp9_dx_iface.h VP9_DX_SRCS-yes += decoder/vp9_decodemv.c VP9_DX_SRCS-yes += decoder/vp9_decodeframe.c VP9_DX_SRCS-yes += decoder/vp9_decodeframe.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.c VP9_DX_SRCS-yes += decoder/vp9_decodemv.h VP9_DX_SRCS-yes += decoder/vp9_detokenize.h VP9_DX_SRCS-yes += decoder/vp9_decoder.c VP9_DX_SRCS-yes += decoder/vp9_decoder.h VP9_DX_SRCS-yes += decoder/vp9_dsubexp.c VP9_DX_SRCS-yes += decoder/vp9_dsubexp.h VP9_DX_SRCS-yes += decoder/vp9_job_queue.c VP9_DX_SRCS-yes += decoder/vp9_job_queue.h VP9_DX_SRCS-yes := $(filter-out $(VP9_DX_SRCS_REMOVE-yes),$(VP9_DX_SRCS-yes)) libvpx-1.8.2/vpx/000077500000000000000000000000001357355204000136475ustar00rootroot00000000000000libvpx-1.8.2/vpx/exports_com000066400000000000000000000005751357355204000161430ustar00rootroot00000000000000text vpx_codec_build_config text vpx_codec_control_ text vpx_codec_destroy text vpx_codec_err_to_string text vpx_codec_error text vpx_codec_error_detail text vpx_codec_get_caps text vpx_codec_iface_name text vpx_codec_version text vpx_codec_version_extra_str text vpx_codec_version_str text vpx_img_alloc text vpx_img_flip text vpx_img_free text vpx_img_set_rect text vpx_img_wrap libvpx-1.8.2/vpx/exports_dec000066400000000000000000000003761357355204000161170ustar00rootroot00000000000000text vpx_codec_dec_init_ver text vpx_codec_decode text vpx_codec_get_frame text vpx_codec_get_stream_info text vpx_codec_peek_stream_info text vpx_codec_register_put_frame_cb text vpx_codec_register_put_slice_cb text vpx_codec_set_frame_buffer_functions libvpx-1.8.2/vpx/exports_enc000066400000000000000000000004211357355204000161200ustar00rootroot00000000000000text vpx_codec_enc_config_default text vpx_codec_enc_config_set text vpx_codec_enc_init_multi_ver text vpx_codec_enc_init_ver text vpx_codec_encode text vpx_codec_get_cx_data text vpx_codec_get_global_headers text vpx_codec_get_preview_frame text vpx_codec_set_cx_data_buf libvpx-1.8.2/vpx/internal/000077500000000000000000000000001357355204000154635ustar00rootroot00000000000000libvpx-1.8.2/vpx/internal/vpx_codec_internal.h000066400000000000000000000424501357355204000215070ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\file * \brief Describes the decoder algorithm interface for algorithm * implementations. * * This file defines the private structures and data types that are only * relevant to implementing an algorithm, as opposed to using it. * * To create a decoder algorithm class, an interface structure is put * into the global namespace: *

 *     my_codec.c:
 *       vpx_codec_iface_t my_codec = {
 *           "My Codec v1.0",
 *           VPX_CODEC_ALG_ABI_VERSION,
 *           ...
 *       };
 *     
* * An application instantiates a specific decoder instance by using * vpx_codec_init() and a pointer to the algorithm's interface structure: *
 *     my_app.c:
 *       extern vpx_codec_iface_t my_codec;
 *       {
 *           vpx_codec_ctx_t algo;
 *           res = vpx_codec_init(&algo, &my_codec);
 *       }
 *     
* * Once initialized, the instance is manged using other functions from * the vpx_codec_* family. */ #ifndef VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ #define VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ #include "../vpx_decoder.h" #include "../vpx_encoder.h" #include #ifdef __cplusplus extern "C" { #endif /*!\brief Current ABI version number * * \internal * If this file is altered in any way that changes the ABI, this value * must be bumped. Examples include, but are not limited to, changing * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ #define VPX_CODEC_INTERNAL_ABI_VERSION (5) /**<\hideinitializer*/ typedef struct vpx_codec_alg_priv vpx_codec_alg_priv_t; typedef struct vpx_codec_priv_enc_mr_cfg vpx_codec_priv_enc_mr_cfg_t; /*!\brief init function pointer prototype * * Performs algorithm-specific initialization of the decoder context. This * function is called by the generic vpx_codec_init() wrapper function, so * plugins implementing this interface may trust the input parameters to be * properly initialized. * * \param[in] ctx Pointer to this instance's context * \retval #VPX_CODEC_OK * The input stream was recognized and decoder initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory operation failed. */ typedef vpx_codec_err_t (*vpx_codec_init_fn_t)( vpx_codec_ctx_t *ctx, vpx_codec_priv_enc_mr_cfg_t *data); /*!\brief destroy function pointer prototype * * Performs algorithm-specific destruction of the decoder context. This * function is called by the generic vpx_codec_destroy() wrapper function, * so plugins implementing this interface may trust the input parameters * to be properly initialized. * * \param[in] ctx Pointer to this instance's context * \retval #VPX_CODEC_OK * The input stream was recognized and decoder initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory operation failed. */ typedef vpx_codec_err_t (*vpx_codec_destroy_fn_t)(vpx_codec_alg_priv_t *ctx); /*!\brief parse stream info function pointer prototype * * Performs high level parsing of the bitstream. This function is called by the * generic vpx_codec_peek_stream_info() wrapper function, so plugins * implementing this interface may trust the input parameters to be properly * initialized. * * \param[in] data Pointer to a block of data to parse * \param[in] data_sz Size of the data buffer * \param[in,out] si Pointer to stream info to update. The size member * \ref MUST be properly initialized, but \ref MAY be * clobbered by the algorithm. This parameter \ref MAY * be NULL. * * \retval #VPX_CODEC_OK * Bitstream is parsable and stream information updated */ typedef vpx_codec_err_t (*vpx_codec_peek_si_fn_t)(const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si); /*!\brief Return information about the current stream. * * Returns information about the stream that has been parsed during decoding. * * \param[in] ctx Pointer to this instance's context * \param[in,out] si Pointer to stream info to update. The size member * \ref MUST be properly initialized, but \ref MAY be * clobbered by the algorithm. This parameter \ref MAY * be NULL. * * \retval #VPX_CODEC_OK * Bitstream is parsable and stream information updated */ typedef vpx_codec_err_t (*vpx_codec_get_si_fn_t)(vpx_codec_alg_priv_t *ctx, vpx_codec_stream_info_t *si); /*!\brief control function pointer prototype * * This function is used to exchange algorithm specific data with the decoder * instance. This can be used to implement features specific to a particular * algorithm. * * This function is called by the generic vpx_codec_control() wrapper * function, so plugins implementing this interface may trust the input * parameters to be properly initialized. However, this interface does not * provide type safety for the exchanged data or assign meanings to the * control codes. Those details should be specified in the algorithm's * header file. In particular, the ctrl_id parameter is guaranteed to exist * in the algorithm's control mapping table, and the data parameter may be NULL. * * * \param[in] ctx Pointer to this instance's context * \param[in] ctrl_id Algorithm specific control identifier * \param[in,out] data Data to exchange with algorithm instance. * * \retval #VPX_CODEC_OK * The internal state data was deserialized. */ typedef vpx_codec_err_t (*vpx_codec_control_fn_t)(vpx_codec_alg_priv_t *ctx, va_list ap); /*!\brief control function pointer mapping * * This structure stores the mapping between control identifiers and * implementing functions. Each algorithm provides a list of these * mappings. This list is searched by the vpx_codec_control() wrapper * function to determine which function to invoke. The special * value {0, NULL} is used to indicate end-of-list, and must be * present. The special value {0, } can be used as a catch-all * mapping. This implies that ctrl_id values chosen by the algorithm * \ref MUST be non-zero. */ typedef const struct vpx_codec_ctrl_fn_map { int ctrl_id; vpx_codec_control_fn_t fn; } vpx_codec_ctrl_fn_map_t; /*!\brief decode data function pointer prototype * * Processes a buffer of coded data. If the processing results in a new * decoded frame becoming available, #VPX_CODEC_CB_PUT_SLICE and * #VPX_CODEC_CB_PUT_FRAME events are generated as appropriate. This * function is called by the generic vpx_codec_decode() wrapper function, * so plugins implementing this interface may trust the input parameters * to be properly initialized. * * \param[in] ctx Pointer to this instance's context * \param[in] data Pointer to this block of new coded data. If * NULL, a #VPX_CODEC_CB_PUT_FRAME event is posted * for the previously decoded frame. * \param[in] data_sz Size of the coded data, in bytes. * * \return Returns #VPX_CODEC_OK if the coded data was processed completely * and future pictures can be decoded without error. Otherwise, * see the descriptions of the other error codes in ::vpx_codec_err_t * for recoverability capabilities. */ typedef vpx_codec_err_t (*vpx_codec_decode_fn_t)(vpx_codec_alg_priv_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline); /*!\brief Decoded frames iterator * * Iterates over a list of the frames available for display. The iterator * storage should be initialized to NULL to start the iteration. Iteration is * complete when this function returns NULL. * * The list of available frames becomes valid upon completion of the * vpx_codec_decode call, and remains valid until the next call to * vpx_codec_decode. * * \param[in] ctx Pointer to this instance's context * \param[in out] iter Iterator storage, initialized to NULL * * \return Returns a pointer to an image, if one is ready for display. Frames * produced will always be in PTS (presentation time stamp) order. */ typedef vpx_image_t *(*vpx_codec_get_frame_fn_t)(vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter); /*!\brief Pass in external frame buffers for the decoder to use. * * Registers functions to be called when libvpx needs a frame buffer * to decode the current frame and a function to be called when libvpx does * not internally reference the frame buffer. This set function must * be called before the first call to decode or libvpx will assume the * default behavior of allocating frame buffers internally. * * \param[in] ctx Pointer to this instance's context * \param[in] cb_get Pointer to the get callback function * \param[in] cb_release Pointer to the release callback function * \param[in] cb_priv Callback's private data * * \retval #VPX_CODEC_OK * External frame buffers will be used by libvpx. * \retval #VPX_CODEC_INVALID_PARAM * One or more of the callbacks were NULL. * \retval #VPX_CODEC_ERROR * Decoder context not initialized, or algorithm not capable of * using external frame buffers. * * \note * When decoding VP9, the application may be required to pass in at least * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame * buffers. */ typedef vpx_codec_err_t (*vpx_codec_set_fb_fn_t)( vpx_codec_alg_priv_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); typedef vpx_codec_err_t (*vpx_codec_encode_fn_t)(vpx_codec_alg_priv_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, vpx_enc_frame_flags_t flags, unsigned long deadline); typedef const vpx_codec_cx_pkt_t *(*vpx_codec_get_cx_data_fn_t)( vpx_codec_alg_priv_t *ctx, vpx_codec_iter_t *iter); typedef vpx_codec_err_t (*vpx_codec_enc_config_set_fn_t)( vpx_codec_alg_priv_t *ctx, const vpx_codec_enc_cfg_t *cfg); typedef vpx_fixed_buf_t *(*vpx_codec_get_global_headers_fn_t)( vpx_codec_alg_priv_t *ctx); typedef vpx_image_t *(*vpx_codec_get_preview_frame_fn_t)( vpx_codec_alg_priv_t *ctx); typedef vpx_codec_err_t (*vpx_codec_enc_mr_get_mem_loc_fn_t)( const vpx_codec_enc_cfg_t *cfg, void **mem_loc); /*!\brief usage configuration mapping * * This structure stores the mapping between usage identifiers and * configuration structures. Each algorithm provides a list of these * mappings. This list is searched by the vpx_codec_enc_config_default() * wrapper function to determine which config to return. The special value * {-1, {0}} is used to indicate end-of-list, and must be present. At least * one mapping must be present, in addition to the end-of-list. * */ typedef const struct vpx_codec_enc_cfg_map { int usage; vpx_codec_enc_cfg_t cfg; } vpx_codec_enc_cfg_map_t; /*!\brief Decoder algorithm interface interface * * All decoders \ref MUST expose a variable of this type. */ struct vpx_codec_iface { const char *name; /**< Identification String */ int abi_version; /**< Implemented ABI version */ vpx_codec_caps_t caps; /**< Decoder capabilities */ vpx_codec_init_fn_t init; /**< \copydoc ::vpx_codec_init_fn_t */ vpx_codec_destroy_fn_t destroy; /**< \copydoc ::vpx_codec_destroy_fn_t */ vpx_codec_ctrl_fn_map_t *ctrl_maps; /**< \copydoc ::vpx_codec_ctrl_fn_map_t */ struct vpx_codec_dec_iface { vpx_codec_peek_si_fn_t peek_si; /**< \copydoc ::vpx_codec_peek_si_fn_t */ vpx_codec_get_si_fn_t get_si; /**< \copydoc ::vpx_codec_get_si_fn_t */ vpx_codec_decode_fn_t decode; /**< \copydoc ::vpx_codec_decode_fn_t */ vpx_codec_get_frame_fn_t get_frame; /**< \copydoc ::vpx_codec_get_frame_fn_t */ vpx_codec_set_fb_fn_t set_fb_fn; /**< \copydoc ::vpx_codec_set_fb_fn_t */ } dec; struct vpx_codec_enc_iface { int cfg_map_count; vpx_codec_enc_cfg_map_t *cfg_maps; /**< \copydoc ::vpx_codec_enc_cfg_map_t */ vpx_codec_encode_fn_t encode; /**< \copydoc ::vpx_codec_encode_fn_t */ vpx_codec_get_cx_data_fn_t get_cx_data; /**< \copydoc ::vpx_codec_get_cx_data_fn_t */ vpx_codec_enc_config_set_fn_t cfg_set; /**< \copydoc ::vpx_codec_enc_config_set_fn_t */ vpx_codec_get_global_headers_fn_t get_glob_hdrs; /**< \copydoc ::vpx_codec_get_global_headers_fn_t */ vpx_codec_get_preview_frame_fn_t get_preview; /**< \copydoc ::vpx_codec_get_preview_frame_fn_t */ vpx_codec_enc_mr_get_mem_loc_fn_t mr_get_mem_loc; /**< \copydoc ::vpx_codec_enc_mr_get_mem_loc_fn_t */ } enc; }; /*!\brief Callback function pointer / user data pair storage */ typedef struct vpx_codec_priv_cb_pair { union { vpx_codec_put_frame_cb_fn_t put_frame; vpx_codec_put_slice_cb_fn_t put_slice; } u; void *user_priv; } vpx_codec_priv_cb_pair_t; /*!\brief Instance private storage * * This structure is allocated by the algorithm's init function. It can be * extended in one of two ways. First, a second, algorithm specific structure * can be allocated and the priv member pointed to it. Alternatively, this * structure can be made the first member of the algorithm specific structure, * and the pointer cast to the proper type. */ struct vpx_codec_priv { const char *err_detail; vpx_codec_flags_t init_flags; struct { vpx_codec_priv_cb_pair_t put_frame_cb; vpx_codec_priv_cb_pair_t put_slice_cb; } dec; struct { vpx_fixed_buf_t cx_data_dst_buf; unsigned int cx_data_pad_before; unsigned int cx_data_pad_after; vpx_codec_cx_pkt_t cx_data_pkt; unsigned int total_encoders; } enc; }; /* * Multi-resolution encoding internal configuration */ struct vpx_codec_priv_enc_mr_cfg { unsigned int mr_total_resolutions; unsigned int mr_encoder_id; struct vpx_rational mr_down_sampling_factor; void *mr_low_res_mode_info; }; #undef VPX_CTRL_USE_TYPE #define VPX_CTRL_USE_TYPE(id, typ) \ static VPX_INLINE typ id##__value(va_list args) { return va_arg(args, typ); } #undef VPX_CTRL_USE_TYPE_DEPRECATED #define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ static VPX_INLINE typ id##__value(va_list args) { return va_arg(args, typ); } #define CAST(id, arg) id##__value(arg) /* CODEC_INTERFACE convenience macro * * By convention, each codec interface is a struct with extern linkage, where * the symbol is suffixed with _algo. A getter function is also defined to * return a pointer to the struct, since in some cases it's easier to work * with text symbols than data symbols (see issue #169). This function has * the same name as the struct, less the _algo suffix. The CODEC_INTERFACE * macro is provided to define this getter function automatically. */ #define CODEC_INTERFACE(id) \ vpx_codec_iface_t *id(void) { return &id##_algo; } \ vpx_codec_iface_t id##_algo /* Internal Utility Functions * * The following functions are intended to be used inside algorithms as * utilities for manipulating vpx_codec_* data structures. */ struct vpx_codec_pkt_list { unsigned int cnt; unsigned int max; struct vpx_codec_cx_pkt pkts[1]; }; #define vpx_codec_pkt_list_decl(n) \ union { \ struct vpx_codec_pkt_list head; \ struct { \ struct vpx_codec_pkt_list head; \ struct vpx_codec_cx_pkt pkts[n]; \ } alloc; \ } #define vpx_codec_pkt_list_init(m) \ (m)->alloc.head.cnt = 0, \ (m)->alloc.head.max = sizeof((m)->alloc.pkts) / sizeof((m)->alloc.pkts[0]) int vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *, const struct vpx_codec_cx_pkt *); const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get( struct vpx_codec_pkt_list *list, vpx_codec_iter_t *iter); #include #include struct vpx_internal_error_info { vpx_codec_err_t error_code; int has_detail; char detail[80]; int setjmp; jmp_buf jmp; }; #define CLANG_ANALYZER_NORETURN #if defined(__has_feature) #if __has_feature(attribute_analyzer_noreturn) #undef CLANG_ANALYZER_NORETURN #define CLANG_ANALYZER_NORETURN __attribute__((analyzer_noreturn)) #endif #endif void vpx_internal_error(struct vpx_internal_error_info *info, vpx_codec_err_t error, const char *fmt, ...) CLANG_ANALYZER_NORETURN; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_INTERNAL_VPX_CODEC_INTERNAL_H_ libvpx-1.8.2/vpx/src/000077500000000000000000000000001357355204000144365ustar00rootroot00000000000000libvpx-1.8.2/vpx/src/vpx_codec.c000066400000000000000000000073471357355204000165670ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\file * \brief Provides the high level interface to wrap decoder algorithms. * */ #include #include #include "vpx/vpx_integer.h" #include "vpx/internal/vpx_codec_internal.h" #include "vpx_version.h" #define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var) int vpx_codec_version(void) { return VERSION_PACKED; } const char *vpx_codec_version_str(void) { return VERSION_STRING_NOSP; } const char *vpx_codec_version_extra_str(void) { return VERSION_EXTRA; } const char *vpx_codec_iface_name(vpx_codec_iface_t *iface) { return iface ? iface->name : ""; } const char *vpx_codec_err_to_string(vpx_codec_err_t err) { switch (err) { case VPX_CODEC_OK: return "Success"; case VPX_CODEC_ERROR: return "Unspecified internal error"; case VPX_CODEC_MEM_ERROR: return "Memory allocation error"; case VPX_CODEC_ABI_MISMATCH: return "ABI version mismatch"; case VPX_CODEC_INCAPABLE: return "Codec does not implement requested capability"; case VPX_CODEC_UNSUP_BITSTREAM: return "Bitstream not supported by this decoder"; case VPX_CODEC_UNSUP_FEATURE: return "Bitstream required feature not supported by this decoder"; case VPX_CODEC_CORRUPT_FRAME: return "Corrupt frame detected"; case VPX_CODEC_INVALID_PARAM: return "Invalid parameter"; case VPX_CODEC_LIST_END: return "End of iterated list"; } return "Unrecognized error code"; } const char *vpx_codec_error(vpx_codec_ctx_t *ctx) { return (ctx) ? vpx_codec_err_to_string(ctx->err) : vpx_codec_err_to_string(VPX_CODEC_INVALID_PARAM); } const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx) { if (ctx && ctx->err) return ctx->priv ? ctx->priv->err_detail : ctx->err_detail; return NULL; } vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx) { vpx_codec_err_t res; if (!ctx) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; else { ctx->iface->destroy((vpx_codec_alg_priv_t *)ctx->priv); ctx->iface = NULL; ctx->name = NULL; ctx->priv = NULL; res = VPX_CODEC_OK; } return SAVE_STATUS(ctx, res); } vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface) { return (iface) ? iface->caps : 0; } vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, int ctrl_id, ...) { vpx_codec_err_t res; if (!ctx || !ctrl_id) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv || !ctx->iface->ctrl_maps) res = VPX_CODEC_ERROR; else { vpx_codec_ctrl_fn_map_t *entry; res = VPX_CODEC_INCAPABLE; for (entry = ctx->iface->ctrl_maps; entry && entry->fn; entry++) { if (!entry->ctrl_id || entry->ctrl_id == ctrl_id) { va_list ap; va_start(ap, ctrl_id); res = entry->fn((vpx_codec_alg_priv_t *)ctx->priv, ap); va_end(ap); break; } } } return SAVE_STATUS(ctx, res); } void vpx_internal_error(struct vpx_internal_error_info *info, vpx_codec_err_t error, const char *fmt, ...) { va_list ap; info->error_code = error; info->has_detail = 0; if (fmt) { size_t sz = sizeof(info->detail); info->has_detail = 1; va_start(ap, fmt); vsnprintf(info->detail, sz - 1, fmt, ap); va_end(ap); info->detail[sz - 1] = '\0'; } if (info->setjmp) longjmp(info->jmp, info->error_code); } libvpx-1.8.2/vpx/src/vpx_decoder.c000066400000000000000000000133721357355204000171120ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\file * \brief Provides the high level interface to wrap decoder algorithms. * */ #include #include "vpx/internal/vpx_codec_internal.h" #define SAVE_STATUS(ctx, var) (ctx ? (ctx->err = var) : var) static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { return (vpx_codec_alg_priv_t *)ctx->priv; } vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, const vpx_codec_dec_cfg_t *cfg, vpx_codec_flags_t flags, int ver) { vpx_codec_err_t res; if (ver != VPX_DECODER_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if (!ctx || !iface) res = VPX_CODEC_INVALID_PARAM; else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if ((flags & VPX_CODEC_USE_POSTPROC) && !(iface->caps & VPX_CODEC_CAP_POSTPROC)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_ERROR_CONCEALMENT) && !(iface->caps & VPX_CODEC_CAP_ERROR_CONCEALMENT)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_INPUT_FRAGMENTS) && !(iface->caps & VPX_CODEC_CAP_INPUT_FRAGMENTS)) res = VPX_CODEC_INCAPABLE; else if (!(iface->caps & VPX_CODEC_CAP_DECODER)) res = VPX_CODEC_INCAPABLE; else { memset(ctx, 0, sizeof(*ctx)); ctx->iface = iface; ctx->name = iface->name; ctx->priv = NULL; ctx->init_flags = flags; ctx->config.dec = cfg; res = ctx->iface->init(ctx, NULL); if (res) { ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; vpx_codec_destroy(ctx); } } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si) { vpx_codec_err_t res; if (!iface || !data || !data_sz || !si || si->sz < sizeof(vpx_codec_stream_info_t)) res = VPX_CODEC_INVALID_PARAM; else { /* Set default/unknown values */ si->w = 0; si->h = 0; res = iface->dec.peek_si(data, data_sz, si); } return res; } vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, vpx_codec_stream_info_t *si) { vpx_codec_err_t res; if (!ctx || !si || si->sz < sizeof(vpx_codec_stream_info_t)) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; else { /* Set default/unknown values */ si->w = 0; si->h = 0; res = ctx->iface->dec.get_si(get_alg_priv(ctx), si); } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline) { vpx_codec_err_t res; /* Sanity checks */ /* NULL data ptr allowed if data_sz is 0 too */ if (!ctx || (!data && data_sz) || (data && !data_sz)) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; else { res = ctx->iface->dec.decode(get_alg_priv(ctx), data, data_sz, user_priv, deadline); } return SAVE_STATUS(ctx, res); } vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter) { vpx_image_t *img; if (!ctx || !iter || !ctx->iface || !ctx->priv) img = NULL; else img = ctx->iface->dec.get_frame(get_alg_priv(ctx), iter); return img; } vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, vpx_codec_put_frame_cb_fn_t cb, void *user_priv) { vpx_codec_err_t res; if (!ctx || !cb) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_FRAME)) res = VPX_CODEC_ERROR; else { ctx->priv->dec.put_frame_cb.u.put_frame = cb; ctx->priv->dec.put_frame_cb.user_priv = user_priv; res = VPX_CODEC_OK; } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, vpx_codec_put_slice_cb_fn_t cb, void *user_priv) { vpx_codec_err_t res; if (!ctx || !cb) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv || !(ctx->iface->caps & VPX_CODEC_CAP_PUT_SLICE)) res = VPX_CODEC_ERROR; else { ctx->priv->dec.put_slice_cb.u.put_slice = cb; ctx->priv->dec.put_slice_cb.user_priv = user_priv; res = VPX_CODEC_OK; } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_set_frame_buffer_functions( vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv) { vpx_codec_err_t res; if (!ctx || !cb_get || !cb_release) { res = VPX_CODEC_INVALID_PARAM; } else if (!ctx->iface || !ctx->priv || !(ctx->iface->caps & VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER)) { res = VPX_CODEC_ERROR; } else { res = ctx->iface->dec.set_fb_fn(get_alg_priv(ctx), cb_get, cb_release, cb_priv); } return SAVE_STATUS(ctx, res); } libvpx-1.8.2/vpx/src/vpx_encoder.c000066400000000000000000000267251357355204000171320ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\file * \brief Provides the high level interface to wrap encoder algorithms. * */ #include #include #include #include #include "vp8/common/blockd.h" #include "vpx_config.h" #include "vpx/internal/vpx_codec_internal.h" #define SAVE_STATUS(ctx, var) ((ctx) ? ((ctx)->err = (var)) : (var)) static vpx_codec_alg_priv_t *get_alg_priv(vpx_codec_ctx_t *ctx) { return (vpx_codec_alg_priv_t *)ctx->priv; } vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, const vpx_codec_enc_cfg_t *cfg, vpx_codec_flags_t flags, int ver) { vpx_codec_err_t res; if (ver != VPX_ENCODER_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if (!ctx || !iface || !cfg) res = VPX_CODEC_INVALID_PARAM; else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if (!(iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_PSNR) && !(iface->caps & VPX_CODEC_CAP_PSNR)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION) && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION)) res = VPX_CODEC_INCAPABLE; else { ctx->iface = iface; ctx->name = iface->name; ctx->priv = NULL; ctx->init_flags = flags; ctx->config.enc = cfg; res = ctx->iface->init(ctx, NULL); if (res) { ctx->err_detail = ctx->priv ? ctx->priv->err_detail : NULL; vpx_codec_destroy(ctx); } } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_enc_init_multi_ver( vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, int num_enc, vpx_codec_flags_t flags, vpx_rational_t *dsf, int ver) { vpx_codec_err_t res = VPX_CODEC_OK; if (ver != VPX_ENCODER_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if (!ctx || !iface || !cfg || (num_enc > 16 || num_enc < 1)) res = VPX_CODEC_INVALID_PARAM; else if (iface->abi_version != VPX_CODEC_INTERNAL_ABI_VERSION) res = VPX_CODEC_ABI_MISMATCH; else if (!(iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_PSNR) && !(iface->caps & VPX_CODEC_CAP_PSNR)) res = VPX_CODEC_INCAPABLE; else if ((flags & VPX_CODEC_USE_OUTPUT_PARTITION) && !(iface->caps & VPX_CODEC_CAP_OUTPUT_PARTITION)) res = VPX_CODEC_INCAPABLE; else { int i; #if CONFIG_MULTI_RES_ENCODING int mem_loc_owned = 0; #endif void *mem_loc = NULL; if (iface->enc.mr_get_mem_loc == NULL) return VPX_CODEC_INCAPABLE; if (!(res = iface->enc.mr_get_mem_loc(cfg, &mem_loc))) { for (i = 0; i < num_enc; i++) { vpx_codec_priv_enc_mr_cfg_t mr_cfg; /* Validate down-sampling factor. */ if (dsf->num < 1 || dsf->num > 4096 || dsf->den < 1 || dsf->den > dsf->num) { res = VPX_CODEC_INVALID_PARAM; } else { mr_cfg.mr_low_res_mode_info = mem_loc; mr_cfg.mr_total_resolutions = num_enc; mr_cfg.mr_encoder_id = num_enc - 1 - i; mr_cfg.mr_down_sampling_factor.num = dsf->num; mr_cfg.mr_down_sampling_factor.den = dsf->den; ctx->iface = iface; ctx->name = iface->name; ctx->priv = NULL; ctx->init_flags = flags; ctx->config.enc = cfg; res = ctx->iface->init(ctx, &mr_cfg); } if (res) { const char *error_detail = ctx->priv ? ctx->priv->err_detail : NULL; /* Destroy current ctx */ ctx->err_detail = error_detail; vpx_codec_destroy(ctx); /* Destroy already allocated high-level ctx */ while (i) { ctx--; ctx->err_detail = error_detail; vpx_codec_destroy(ctx); i--; } #if CONFIG_MULTI_RES_ENCODING if (!mem_loc_owned) { assert(mem_loc); free(((LOWER_RES_FRAME_INFO *)mem_loc)->mb_info); free(mem_loc); } #endif return SAVE_STATUS(ctx, res); } #if CONFIG_MULTI_RES_ENCODING mem_loc_owned = 1; #endif ctx++; cfg++; dsf++; } ctx--; } } return SAVE_STATUS(ctx, res); } vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage) { vpx_codec_err_t res; vpx_codec_enc_cfg_map_t *map; int i; if (!iface || !cfg || usage != 0) res = VPX_CODEC_INVALID_PARAM; else if (!(iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; else { res = VPX_CODEC_INVALID_PARAM; for (i = 0; i < iface->enc.cfg_map_count; ++i) { map = iface->enc.cfg_maps + i; *cfg = map->cfg; res = VPX_CODEC_OK; break; } } return res; } #if VPX_ARCH_X86 || VPX_ARCH_X86_64 /* On X86, disable the x87 unit's internal 80 bit precision for better * consistency with the SSE unit's 64 bit precision. */ #include "vpx_ports/x86.h" #define FLOATING_POINT_INIT() \ do { \ unsigned short x87_orig_mode = x87_set_double_precision(); #define FLOATING_POINT_RESTORE() \ x87_set_control_word(x87_orig_mode); \ } \ while (0) #else static void FLOATING_POINT_INIT() {} static void FLOATING_POINT_RESTORE() {} #endif vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, vpx_enc_frame_flags_t flags, unsigned long deadline) { vpx_codec_err_t res = VPX_CODEC_OK; if (!ctx || (img && !duration)) res = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) res = VPX_CODEC_ERROR; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; else { unsigned int num_enc = ctx->priv->enc.total_encoders; /* Execute in a normalized floating point environment, if the platform * requires it. */ FLOATING_POINT_INIT(); if (num_enc == 1) res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags, deadline); else { /* Multi-resolution encoding: * Encode multi-levels in reverse order. For example, * if mr_total_resolutions = 3, first encode level 2, * then encode level 1, and finally encode level 0. */ int i; ctx += num_enc - 1; if (img) img += num_enc - 1; for (i = num_enc - 1; i >= 0; i--) { if ((res = ctx->iface->enc.encode(get_alg_priv(ctx), img, pts, duration, flags, deadline))) break; ctx--; if (img) img--; } ctx++; } FLOATING_POINT_RESTORE(); } return SAVE_STATUS(ctx, res); } const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter) { const vpx_codec_cx_pkt_t *pkt = NULL; if (ctx) { if (!iter) ctx->err = VPX_CODEC_INVALID_PARAM; else if (!ctx->iface || !ctx->priv) ctx->err = VPX_CODEC_ERROR; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) ctx->err = VPX_CODEC_INCAPABLE; else pkt = ctx->iface->enc.get_cx_data(get_alg_priv(ctx), iter); } if (pkt && pkt->kind == VPX_CODEC_CX_FRAME_PKT) { // If the application has specified a destination area for the // compressed data, and the codec has not placed the data there, // and it fits, copy it. vpx_codec_priv_t *const priv = ctx->priv; char *const dst_buf = (char *)priv->enc.cx_data_dst_buf.buf; if (dst_buf && pkt->data.raw.buf != dst_buf && pkt->data.raw.sz + priv->enc.cx_data_pad_before + priv->enc.cx_data_pad_after <= priv->enc.cx_data_dst_buf.sz) { vpx_codec_cx_pkt_t *modified_pkt = &priv->enc.cx_data_pkt; memcpy(dst_buf + priv->enc.cx_data_pad_before, pkt->data.raw.buf, pkt->data.raw.sz); *modified_pkt = *pkt; modified_pkt->data.raw.buf = dst_buf; modified_pkt->data.raw.sz += priv->enc.cx_data_pad_before + priv->enc.cx_data_pad_after; pkt = modified_pkt; } if (dst_buf == pkt->data.raw.buf) { priv->enc.cx_data_dst_buf.buf = dst_buf + pkt->data.raw.sz; priv->enc.cx_data_dst_buf.sz -= pkt->data.raw.sz; } } return pkt; } vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx, const vpx_fixed_buf_t *buf, unsigned int pad_before, unsigned int pad_after) { if (!ctx || !ctx->priv) return VPX_CODEC_INVALID_PARAM; if (buf) { ctx->priv->enc.cx_data_dst_buf = *buf; ctx->priv->enc.cx_data_pad_before = pad_before; ctx->priv->enc.cx_data_pad_after = pad_after; } else { ctx->priv->enc.cx_data_dst_buf.buf = NULL; ctx->priv->enc.cx_data_dst_buf.sz = 0; ctx->priv->enc.cx_data_pad_before = 0; ctx->priv->enc.cx_data_pad_after = 0; } return VPX_CODEC_OK; } const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx) { vpx_image_t *img = NULL; if (ctx) { if (!ctx->iface || !ctx->priv) ctx->err = VPX_CODEC_ERROR; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) ctx->err = VPX_CODEC_INCAPABLE; else if (!ctx->iface->enc.get_preview) ctx->err = VPX_CODEC_INCAPABLE; else img = ctx->iface->enc.get_preview(get_alg_priv(ctx)); } return img; } vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx) { vpx_fixed_buf_t *buf = NULL; if (ctx) { if (!ctx->iface || !ctx->priv) ctx->err = VPX_CODEC_ERROR; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) ctx->err = VPX_CODEC_INCAPABLE; else if (!ctx->iface->enc.get_glob_hdrs) ctx->err = VPX_CODEC_INCAPABLE; else buf = ctx->iface->enc.get_glob_hdrs(get_alg_priv(ctx)); } return buf; } vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx, const vpx_codec_enc_cfg_t *cfg) { vpx_codec_err_t res; if (!ctx || !ctx->iface || !ctx->priv || !cfg) res = VPX_CODEC_INVALID_PARAM; else if (!(ctx->iface->caps & VPX_CODEC_CAP_ENCODER)) res = VPX_CODEC_INCAPABLE; else res = ctx->iface->enc.cfg_set(get_alg_priv(ctx), cfg); return SAVE_STATUS(ctx, res); } int vpx_codec_pkt_list_add(struct vpx_codec_pkt_list *list, const struct vpx_codec_cx_pkt *pkt) { if (list->cnt < list->max) { list->pkts[list->cnt++] = *pkt; return 0; } return 1; } const vpx_codec_cx_pkt_t *vpx_codec_pkt_list_get( struct vpx_codec_pkt_list *list, vpx_codec_iter_t *iter) { const vpx_codec_cx_pkt_t *pkt; if (!(*iter)) { *iter = list->pkts; } pkt = (const vpx_codec_cx_pkt_t *)*iter; if ((size_t)(pkt - list->pkts) < list->cnt) *iter = pkt + 1; else pkt = NULL; return pkt; } libvpx-1.8.2/vpx/src/vpx_image.c000066400000000000000000000171021357355204000165620ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx/vpx_image.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" static vpx_image_t *img_alloc_helper(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int buf_align, unsigned int stride_align, unsigned char *img_data) { unsigned int h, w, s, xcs, ycs, bps; unsigned int stride_in_bytes; int align; /* Treat align==0 like align==1 */ if (!buf_align) buf_align = 1; /* Validate alignment (must be power of 2) */ if (buf_align & (buf_align - 1)) goto fail; /* Treat align==0 like align==1 */ if (!stride_align) stride_align = 1; /* Validate alignment (must be power of 2) */ if (stride_align & (stride_align - 1)) goto fail; /* Get sample size for this format */ switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_YV12: bps = 12; break; case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I440: bps = 16; break; case VPX_IMG_FMT_I444: bps = 24; break; case VPX_IMG_FMT_I42016: bps = 24; break; case VPX_IMG_FMT_I42216: case VPX_IMG_FMT_I44016: bps = 32; break; case VPX_IMG_FMT_I44416: bps = 48; break; default: bps = 16; break; } /* Get chroma shift values for this format */ switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I422: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I42216: xcs = 1; break; default: xcs = 0; break; } switch (fmt) { case VPX_IMG_FMT_I420: case VPX_IMG_FMT_I440: case VPX_IMG_FMT_YV12: case VPX_IMG_FMT_I42016: case VPX_IMG_FMT_I44016: ycs = 1; break; default: ycs = 0; break; } /* Calculate storage sizes. If the buffer was allocated externally, the width * and height shouldn't be adjusted. */ w = d_w; h = d_h; s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8; s = (s + stride_align - 1) & ~(stride_align - 1); stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s; /* Allocate the new image */ if (!img) { img = (vpx_image_t *)calloc(1, sizeof(vpx_image_t)); if (!img) goto fail; img->self_allocd = 1; } else { memset(img, 0, sizeof(vpx_image_t)); } img->img_data = img_data; if (!img_data) { uint64_t alloc_size; /* Calculate storage sizes given the chroma subsampling */ align = (1 << xcs) - 1; w = (d_w + align) & ~align; align = (1 << ycs) - 1; h = (d_h + align) & ~align; s = (fmt & VPX_IMG_FMT_PLANAR) ? w : bps * w / 8; s = (s + stride_align - 1) & ~(stride_align - 1); stride_in_bytes = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? s * 2 : s; alloc_size = (fmt & VPX_IMG_FMT_PLANAR) ? (uint64_t)h * s * bps / 8 : (uint64_t)h * s; if (alloc_size != (size_t)alloc_size) goto fail; img->img_data = (uint8_t *)vpx_memalign(buf_align, (size_t)alloc_size); img->img_data_owner = 1; } if (!img->img_data) goto fail; img->fmt = fmt; img->bit_depth = (fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 16 : 8; img->w = w; img->h = h; img->x_chroma_shift = xcs; img->y_chroma_shift = ycs; img->bps = bps; /* Calculate strides */ img->stride[VPX_PLANE_Y] = img->stride[VPX_PLANE_ALPHA] = stride_in_bytes; img->stride[VPX_PLANE_U] = img->stride[VPX_PLANE_V] = stride_in_bytes >> xcs; /* Default viewport to entire image */ if (!vpx_img_set_rect(img, 0, 0, d_w, d_h)) return img; fail: vpx_img_free(img); return NULL; } vpx_image_t *vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align) { return img_alloc_helper(img, fmt, d_w, d_h, align, align, NULL); } vpx_image_t *vpx_img_wrap(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int stride_align, unsigned char *img_data) { /* By setting buf_align = 1, we don't change buffer alignment in this * function. */ return img_alloc_helper(img, fmt, d_w, d_h, 1, stride_align, img_data); } int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y, unsigned int w, unsigned int h) { unsigned char *data; if (x + w <= img->w && y + h <= img->h) { img->d_w = w; img->d_h = h; /* Calculate plane pointers */ if (!(img->fmt & VPX_IMG_FMT_PLANAR)) { img->planes[VPX_PLANE_PACKED] = img->img_data + x * img->bps / 8 + y * img->stride[VPX_PLANE_PACKED]; } else { const int bytes_per_sample = (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1; data = img->img_data; if (img->fmt & VPX_IMG_FMT_HAS_ALPHA) { img->planes[VPX_PLANE_ALPHA] = data + x * bytes_per_sample + y * img->stride[VPX_PLANE_ALPHA]; data += img->h * img->stride[VPX_PLANE_ALPHA]; } img->planes[VPX_PLANE_Y] = data + x * bytes_per_sample + y * img->stride[VPX_PLANE_Y]; data += img->h * img->stride[VPX_PLANE_Y]; if (!(img->fmt & VPX_IMG_FMT_UV_FLIP)) { img->planes[VPX_PLANE_U] = data + (x >> img->x_chroma_shift) * bytes_per_sample + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; img->planes[VPX_PLANE_V] = data + (x >> img->x_chroma_shift) * bytes_per_sample + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; } else { img->planes[VPX_PLANE_V] = data + (x >> img->x_chroma_shift) * bytes_per_sample + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; data += (img->h >> img->y_chroma_shift) * img->stride[VPX_PLANE_V]; img->planes[VPX_PLANE_U] = data + (x >> img->x_chroma_shift) * bytes_per_sample + (y >> img->y_chroma_shift) * img->stride[VPX_PLANE_U]; } } return 0; } return -1; } void vpx_img_flip(vpx_image_t *img) { /* Note: In the calculation pointer adjustment calculation, we want the * rhs to be promoted to a signed type. Section 6.3.1.8 of the ISO C99 * standard indicates that if the adjustment parameter is unsigned, the * stride parameter will be promoted to unsigned, causing errors when * the lhs is a larger type than the rhs. */ img->planes[VPX_PLANE_Y] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_Y]; img->stride[VPX_PLANE_Y] = -img->stride[VPX_PLANE_Y]; img->planes[VPX_PLANE_U] += (signed)((img->d_h >> img->y_chroma_shift) - 1) * img->stride[VPX_PLANE_U]; img->stride[VPX_PLANE_U] = -img->stride[VPX_PLANE_U]; img->planes[VPX_PLANE_V] += (signed)((img->d_h >> img->y_chroma_shift) - 1) * img->stride[VPX_PLANE_V]; img->stride[VPX_PLANE_V] = -img->stride[VPX_PLANE_V]; img->planes[VPX_PLANE_ALPHA] += (signed)(img->d_h - 1) * img->stride[VPX_PLANE_ALPHA]; img->stride[VPX_PLANE_ALPHA] = -img->stride[VPX_PLANE_ALPHA]; } void vpx_img_free(vpx_image_t *img) { if (img) { if (img->img_data && img->img_data_owner) vpx_free(img->img_data); if (img->self_allocd) free(img); } } libvpx-1.8.2/vpx/vp8.h000066400000000000000000000102301357355204000145310ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\defgroup vp8 VP8 * \ingroup codecs * VP8 is a video compression algorithm that uses motion * compensated prediction, Discrete Cosine Transform (DCT) coding of the * prediction error signal and context dependent entropy coding techniques * based on arithmetic principles. It features: * - YUV 4:2:0 image format * - Macro-block based coding (16x16 luma plus two 8x8 chroma) * - 1/4 (1/8) pixel accuracy motion compensated prediction * - 4x4 DCT transform * - 128 level linear quantizer * - In loop deblocking filter * - Context-based entropy coding * * @{ */ /*!\file * \brief Provides controls common to both the VP8 encoder and decoder. */ #ifndef VPX_VPX_VP8_H_ #define VPX_VPX_VP8_H_ #include "./vpx_codec.h" #include "./vpx_image.h" #ifdef __cplusplus extern "C" { #endif /*!\brief Control functions * * The set of macros define the control functions of VP8 interface */ enum vp8_com_control_id { /*!\brief pass in an external frame into decoder to be used as reference frame */ VP8_SET_REFERENCE = 1, VP8_COPY_REFERENCE = 2, /**< get a copy of reference frame from the decoder */ VP8_SET_POSTPROC = 3, /**< set the decoder's post processing settings */ /* TODO(jkoleszar): The encoder incorrectly reuses some of these values (5+) * for its control ids. These should be migrated to something like the * VP8_DECODER_CTRL_ID_START range next time we're ready to break the ABI. */ VP9_GET_REFERENCE = 128, /**< get a pointer to a reference frame */ VP8_COMMON_CTRL_ID_MAX, VP8_DECODER_CTRL_ID_START = 256 }; /*!\brief post process flags * * The set of macros define VP8 decoder post processing flags */ enum vp8_postproc_level { VP8_NOFILTERING = 0, VP8_DEBLOCK = 1 << 0, VP8_DEMACROBLOCK = 1 << 1, VP8_ADDNOISE = 1 << 2, VP8_MFQE = 1 << 3 }; /*!\brief post process flags * * This define a structure that describe the post processing settings. For * the best objective measure (using the PSNR metric) set post_proc_flag * to VP8_DEBLOCK and deblocking_level to 1. */ typedef struct vp8_postproc_cfg { /*!\brief the types of post processing to be done, should be combination of * "vp8_postproc_level" */ int post_proc_flag; int deblocking_level; /**< the strength of deblocking, valid range [0, 16] */ int noise_level; /**< the strength of additive noise, valid range [0, 16] */ } vp8_postproc_cfg_t; /*!\brief reference frame type * * The set of macros define the type of VP8 reference frames */ typedef enum vpx_ref_frame_type { VP8_LAST_FRAME = 1, VP8_GOLD_FRAME = 2, VP8_ALTR_FRAME = 4 } vpx_ref_frame_type_t; /*!\brief reference frame data struct * * Define the data struct to access vp8 reference frames. */ typedef struct vpx_ref_frame { vpx_ref_frame_type_t frame_type; /**< which reference frame */ vpx_image_t img; /**< reference frame data in image format */ } vpx_ref_frame_t; /*!\brief VP9 specific reference frame data struct * * Define the data struct to access vp9 reference frames. */ typedef struct vp9_ref_frame { int idx; /**< frame index to get (input) */ vpx_image_t img; /**< img structure to populate (output) */ } vp9_ref_frame_t; /*!\cond */ /*!\brief vp8 decoder control function parameter type * * defines the data type for each of VP8 decoder control function requires */ VPX_CTRL_USE_TYPE(VP8_SET_REFERENCE, vpx_ref_frame_t *) #define VPX_CTRL_VP8_SET_REFERENCE VPX_CTRL_USE_TYPE(VP8_COPY_REFERENCE, vpx_ref_frame_t *) #define VPX_CTRL_VP8_COPY_REFERENCE VPX_CTRL_USE_TYPE(VP8_SET_POSTPROC, vp8_postproc_cfg_t *) #define VPX_CTRL_VP8_SET_POSTPROC VPX_CTRL_USE_TYPE(VP9_GET_REFERENCE, vp9_ref_frame_t *) #define VPX_CTRL_VP9_GET_REFERENCE /*!\endcond */ /*! @} - end defgroup vp8 */ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_VP8_H_ libvpx-1.8.2/vpx/vp8cx.h000066400000000000000000001023051357355204000150710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_VP8CX_H_ #define VPX_VPX_VP8CX_H_ /*!\defgroup vp8_encoder WebM VP8/VP9 Encoder * \ingroup vp8 * * @{ */ #include "./vp8.h" #include "./vpx_encoder.h" /*!\file * \brief Provides definitions for using VP8 or VP9 encoder algorithm within the * vpx Codec Interface. */ #ifdef __cplusplus extern "C" { #endif /*!\name Algorithm interface for VP8 * * This interface provides the capability to encode raw VP8 streams. * @{ */ extern vpx_codec_iface_t vpx_codec_vp8_cx_algo; extern vpx_codec_iface_t *vpx_codec_vp8_cx(void); /*!@} - end algorithm interface member group*/ /*!\name Algorithm interface for VP9 * * This interface provides the capability to encode raw VP9 streams. * @{ */ extern vpx_codec_iface_t vpx_codec_vp9_cx_algo; extern vpx_codec_iface_t *vpx_codec_vp9_cx(void); /*!@} - end algorithm interface member group*/ /* * Algorithm Flags */ /*!\brief Don't reference the last frame * * When this flag is set, the encoder will not use the last frame as a * predictor. When not set, the encoder will choose whether to use the * last frame or not automatically. */ #define VP8_EFLAG_NO_REF_LAST (1 << 16) /*!\brief Don't reference the golden frame * * When this flag is set, the encoder will not use the golden frame as a * predictor. When not set, the encoder will choose whether to use the * golden frame or not automatically. */ #define VP8_EFLAG_NO_REF_GF (1 << 17) /*!\brief Don't reference the alternate reference frame * * When this flag is set, the encoder will not use the alt ref frame as a * predictor. When not set, the encoder will choose whether to use the * alt ref frame or not automatically. */ #define VP8_EFLAG_NO_REF_ARF (1 << 21) /*!\brief Don't update the last frame * * When this flag is set, the encoder will not update the last frame with * the contents of the current frame. */ #define VP8_EFLAG_NO_UPD_LAST (1 << 18) /*!\brief Don't update the golden frame * * When this flag is set, the encoder will not update the golden frame with * the contents of the current frame. */ #define VP8_EFLAG_NO_UPD_GF (1 << 22) /*!\brief Don't update the alternate reference frame * * When this flag is set, the encoder will not update the alt ref frame with * the contents of the current frame. */ #define VP8_EFLAG_NO_UPD_ARF (1 << 23) /*!\brief Force golden frame update * * When this flag is set, the encoder copy the contents of the current frame * to the golden frame buffer. */ #define VP8_EFLAG_FORCE_GF (1 << 19) /*!\brief Force alternate reference frame update * * When this flag is set, the encoder copy the contents of the current frame * to the alternate reference frame buffer. */ #define VP8_EFLAG_FORCE_ARF (1 << 24) /*!\brief Disable entropy update * * When this flag is set, the encoder will not update its internal entropy * model based on the entropy of this frame. */ #define VP8_EFLAG_NO_UPD_ENTROPY (1 << 20) /*!\brief VPx encoder control functions * * This set of macros define the control functions available for VPx * encoder interface. * * \sa #vpx_codec_control */ enum vp8e_enc_control_id { /*!\brief Codec control function to pass an ROI map to encoder. * * Supported in codecs: VP8 */ VP8E_SET_ROI_MAP = 8, /*!\brief Codec control function to pass an Active map to encoder. * * Supported in codecs: VP8, VP9 */ VP8E_SET_ACTIVEMAP, /*!\brief Codec control function to set encoder scaling mode. * * Supported in codecs: VP8, VP9 */ VP8E_SET_SCALEMODE = 11, /*!\brief Codec control function to set encoder internal speed settings. * * Changes in this value influences, among others, the encoder's selection * of motion estimation methods. Values greater than 0 will increase encoder * speed at the expense of quality. * * \note Valid range for VP8: -16..16 * \note Valid range for VP9: -9..9 * * Supported in codecs: VP8, VP9 */ VP8E_SET_CPUUSED = 13, /*!\brief Codec control function to enable automatic use of arf frames. * * \note Valid range for VP8: 0..1 * \note Valid range for VP9: 0..6 * * Supported in codecs: VP8, VP9 */ VP8E_SET_ENABLEAUTOALTREF, /*!\brief control function to set noise sensitivity * * 0: off, 1: OnYOnly, 2: OnYUV, * 3: OnYUVAggressive, 4: Adaptive * * Supported in codecs: VP8 */ VP8E_SET_NOISE_SENSITIVITY, /*!\brief Codec control function to set higher sharpness at the expense * of a lower PSNR. * * \note Valid range: 0..7 * * Supported in codecs: VP8, VP9 */ VP8E_SET_SHARPNESS, /*!\brief Codec control function to set the threshold for MBs treated static. * * Supported in codecs: VP8, VP9 */ VP8E_SET_STATIC_THRESHOLD, /*!\brief Codec control function to set the number of token partitions. * * Supported in codecs: VP8 */ VP8E_SET_TOKEN_PARTITIONS, /*!\brief Codec control function to get last quantizer chosen by the encoder. * * Return value uses internal quantizer scale defined by the codec. * * Supported in codecs: VP8, VP9 */ VP8E_GET_LAST_QUANTIZER, /*!\brief Codec control function to get last quantizer chosen by the encoder. * * Return value uses the 0..63 scale as used by the rc_*_quantizer config * parameters. * * Supported in codecs: VP8, VP9 */ VP8E_GET_LAST_QUANTIZER_64, /*!\brief Codec control function to set the max no of frames to create arf. * * Supported in codecs: VP8, VP9 */ VP8E_SET_ARNR_MAXFRAMES, /*!\brief Codec control function to set the filter strength for the arf. * * Supported in codecs: VP8, VP9 */ VP8E_SET_ARNR_STRENGTH, /*!\deprecated control function to set the filter type to use for the arf. */ VP8E_SET_ARNR_TYPE, /*!\brief Codec control function to set visual tuning. * * Supported in codecs: VP8, VP9 */ VP8E_SET_TUNING, /*!\brief Codec control function to set constrained / constant quality level. * * \attention For this value to be used vpx_codec_enc_cfg_t::rc_end_usage must * be set to #VPX_CQ or #VPX_Q * \note Valid range: 0..63 * * Supported in codecs: VP8, VP9 */ VP8E_SET_CQ_LEVEL, /*!\brief Codec control function to set Max data rate for Intra frames. * * This value controls additional clamping on the maximum size of a * keyframe. It is expressed as a percentage of the average * per-frame bitrate, with the special (and default) value 0 meaning * unlimited, or no additional clamping beyond the codec's built-in * algorithm. * * For example, to allocate no more than 4.5 frames worth of bitrate * to a keyframe, set this to 450. * * Supported in codecs: VP8, VP9 */ VP8E_SET_MAX_INTRA_BITRATE_PCT, /*!\brief Codec control function to set reference and update frame flags. * * Supported in codecs: VP8 */ VP8E_SET_FRAME_FLAGS, /*!\brief Codec control function to set max data rate for Inter frames. * * This value controls additional clamping on the maximum size of an * inter frame. It is expressed as a percentage of the average * per-frame bitrate, with the special (and default) value 0 meaning * unlimited, or no additional clamping beyond the codec's built-in * algorithm. * * For example, to allow no more than 4.5 frames worth of bitrate * to an inter frame, set this to 450. * * Supported in codecs: VP9 */ VP9E_SET_MAX_INTER_BITRATE_PCT, /*!\brief Boost percentage for Golden Frame in CBR mode. * * This value controls the amount of boost given to Golden Frame in * CBR mode. It is expressed as a percentage of the average * per-frame bitrate, with the special (and default) value 0 meaning * the feature is off, i.e., no golden frame boost in CBR mode and * average bitrate target is used. * * For example, to allow 100% more bits, i.e, 2X, in a golden frame * than average frame, set this to 100. * * Supported in codecs: VP9 */ VP9E_SET_GF_CBR_BOOST_PCT, /*!\brief Codec control function to set the temporal layer id. * * For temporal scalability: this control allows the application to set the * layer id for each frame to be encoded. Note that this control must be set * for every frame prior to encoding. The usage of this control function * supersedes the internal temporal pattern counter, which is now deprecated. * * Supported in codecs: VP8 */ VP8E_SET_TEMPORAL_LAYER_ID, /*!\brief Codec control function to set encoder screen content mode. * * 0: off, 1: On, 2: On with more aggressive rate control. * * Supported in codecs: VP8 */ VP8E_SET_SCREEN_CONTENT_MODE, /*!\brief Codec control function to set lossless encoding mode. * * VP9 can operate in lossless encoding mode, in which the bitstream * produced will be able to decode and reconstruct a perfect copy of * input source. This control function provides a mean to switch encoder * into lossless coding mode(1) or normal coding mode(0) that may be lossy. * 0 = lossy coding mode * 1 = lossless coding mode * * By default, encoder operates in normal coding mode (maybe lossy). * * Supported in codecs: VP9 */ VP9E_SET_LOSSLESS, /*!\brief Codec control function to set number of tile columns. * * In encoding and decoding, VP9 allows an input image frame be partitioned * into separated vertical tile columns, which can be encoded or decoded * independently. This enables easy implementation of parallel encoding and * decoding. This control requests the encoder to use column tiles in * encoding an input frame, with number of tile columns (in Log2 unit) as * the parameter: * 0 = 1 tile column * 1 = 2 tile columns * 2 = 4 tile columns * ..... * n = 2**n tile columns * The requested tile columns will be capped by the encoder based on image * size limitations (The minimum width of a tile column is 256 pixels, the * maximum is 4096). * * By default, the value is 6, i.e., the maximum number of tiles supported by * the resolution. * * Supported in codecs: VP9 */ VP9E_SET_TILE_COLUMNS, /*!\brief Codec control function to set number of tile rows. * * In encoding and decoding, VP9 allows an input image frame be partitioned * into separated horizontal tile rows. Tile rows are encoded or decoded * sequentially. Even though encoding/decoding of later tile rows depends on * earlier ones, this allows the encoder to output data packets for tile rows * prior to completely processing all tile rows in a frame, thereby reducing * the latency in processing between input and output. The parameter * for this control describes the number of tile rows, which has a valid * range [0, 2]: * 0 = 1 tile row * 1 = 2 tile rows * 2 = 4 tile rows * * By default, the value is 0, i.e. one single row tile for entire image. * * Supported in codecs: VP9 */ VP9E_SET_TILE_ROWS, /*!\brief Codec control function to enable frame parallel decoding feature. * * VP9 has a bitstream feature to reduce decoding dependency between frames * by turning off backward update of probability context used in encoding * and decoding. This allows staged parallel processing of more than one * video frame in the decoder. This control function provides a means to * turn this feature on or off for bitstreams produced by encoder. * * By default, this feature is on. * * Supported in codecs: VP9 */ VP9E_SET_FRAME_PARALLEL_DECODING, /*!\brief Codec control function to set adaptive quantization mode. * * VP9 has a segment based feature that allows encoder to adaptively change * quantization parameter for each segment within a frame to improve the * subjective quality. This control makes encoder operate in one of the * several AQ_modes supported. * * By default, encoder operates with AQ_Mode 0(adaptive quantization off). * * Supported in codecs: VP9 */ VP9E_SET_AQ_MODE, /*!\brief Codec control function to enable/disable periodic Q boost. * * One VP9 encoder speed feature is to enable quality boost by lowering * frame level Q periodically. This control function provides a mean to * turn on/off this feature. * 0 = off * 1 = on * * By default, the encoder is allowed to use this feature for appropriate * encoding modes. * * Supported in codecs: VP9 */ VP9E_SET_FRAME_PERIODIC_BOOST, /*!\brief Codec control function to set noise sensitivity. * * 0: off, 1: On(YOnly), 2: For SVC only, on top two spatial layers(YOnly) * * Supported in codecs: VP9 */ VP9E_SET_NOISE_SENSITIVITY, /*!\brief Codec control function to turn on/off SVC in encoder. * \note Return value is VPX_CODEC_INVALID_PARAM if the encoder does not * support SVC in its current encoding mode * 0: off, 1: on * * Supported in codecs: VP9 */ VP9E_SET_SVC, /*!\brief Codec control function to pass an ROI map to encoder. * * Supported in codecs: VP9 */ VP9E_SET_ROI_MAP, /*!\brief Codec control function to set parameters for SVC. * \note Parameters contain min_q, max_q, scaling factor for each of the * SVC layers. * * Supported in codecs: VP9 */ VP9E_SET_SVC_PARAMETERS, /*!\brief Codec control function to set svc layer for spatial and temporal. * \note Valid ranges: 0..#vpx_codec_enc_cfg::ss_number_layers for spatial * layer and 0..#vpx_codec_enc_cfg::ts_number_layers for * temporal layer. * * Supported in codecs: VP9 */ VP9E_SET_SVC_LAYER_ID, /*!\brief Codec control function to set content type. * \note Valid parameter range: * VP9E_CONTENT_DEFAULT = Regular video content (Default) * VP9E_CONTENT_SCREEN = Screen capture content * VP9E_CONTENT_FILM = Film content: improves grain retention * * Supported in codecs: VP9 */ VP9E_SET_TUNE_CONTENT, /*!\brief Codec control function to get svc layer ID. * \note The layer ID returned is for the data packet from the registered * callback function. * * Supported in codecs: VP9 */ VP9E_GET_SVC_LAYER_ID, /*!\brief Codec control function to register callback to get per layer packet. * \note Parameter for this control function is a structure with a callback * function and a pointer to private data used by the callback. * * Supported in codecs: VP9 */ VP9E_REGISTER_CX_CALLBACK, /*!\brief Codec control function to set color space info. * \note Valid ranges: 0..7, default is "UNKNOWN". * 0 = UNKNOWN, * 1 = BT_601 * 2 = BT_709 * 3 = SMPTE_170 * 4 = SMPTE_240 * 5 = BT_2020 * 6 = RESERVED * 7 = SRGB * * Supported in codecs: VP9 */ VP9E_SET_COLOR_SPACE, /*!\brief Codec control function to set temporal layering mode. * \note Valid ranges: 0..3, default is "0" * (VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING). * 0 = VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING * 1 = VP9E_TEMPORAL_LAYERING_MODE_BYPASS * 2 = VP9E_TEMPORAL_LAYERING_MODE_0101 * 3 = VP9E_TEMPORAL_LAYERING_MODE_0212 * * Supported in codecs: VP9 */ VP9E_SET_TEMPORAL_LAYERING_MODE, /*!\brief Codec control function to set minimum interval between GF/ARF frames * * By default the value is set as 4. * * Supported in codecs: VP9 */ VP9E_SET_MIN_GF_INTERVAL, /*!\brief Codec control function to set minimum interval between GF/ARF frames * * By default the value is set as 16. * * Supported in codecs: VP9 */ VP9E_SET_MAX_GF_INTERVAL, /*!\brief Codec control function to get an Active map back from the encoder. * * Supported in codecs: VP9 */ VP9E_GET_ACTIVEMAP, /*!\brief Codec control function to set color range bit. * \note Valid ranges: 0..1, default is 0 * 0 = Limited range (16..235 or HBD equivalent) * 1 = Full range (0..255 or HBD equivalent) * * Supported in codecs: VP9 */ VP9E_SET_COLOR_RANGE, /*!\brief Codec control function to set the frame flags and buffer indices * for spatial layers. The frame flags and buffer indices are set using the * struct #vpx_svc_ref_frame_config defined below. * * Supported in codecs: VP9 */ VP9E_SET_SVC_REF_FRAME_CONFIG, /*!\brief Codec control function to set intended rendering image size. * * By default, this is identical to the image size in pixels. * * Supported in codecs: VP9 */ VP9E_SET_RENDER_SIZE, /*!\brief Codec control function to set target level. * * 255: off (default); 0: only keep level stats; 10: target for level 1.0; * 11: target for level 1.1; ... 62: target for level 6.2 * * Supported in codecs: VP9 */ VP9E_SET_TARGET_LEVEL, /*!\brief Codec control function to set row level multi-threading. * * 0 : off, 1 : on * * Supported in codecs: VP9 */ VP9E_SET_ROW_MT, /*!\brief Codec control function to get bitstream level. * * Supported in codecs: VP9 */ VP9E_GET_LEVEL, /*!\brief Codec control function to enable/disable special mode for altref * adaptive quantization. You can use it with --aq-mode concurrently. * * Enable special adaptive quantization for altref frames based on their * expected prediction quality for the future frames. * * Supported in codecs: VP9 */ VP9E_SET_ALT_REF_AQ, /*!\brief Boost percentage for Golden Frame in CBR mode. * * This value controls the amount of boost given to Golden Frame in * CBR mode. It is expressed as a percentage of the average * per-frame bitrate, with the special (and default) value 0 meaning * the feature is off, i.e., no golden frame boost in CBR mode and * average bitrate target is used. * * For example, to allow 100% more bits, i.e, 2X, in a golden frame * than average frame, set this to 100. * * Supported in codecs: VP8 */ VP8E_SET_GF_CBR_BOOST_PCT, /*!\brief Codec control function to enable the extreme motion vector unit test * in VP9. Please note that this is only used in motion vector unit test. * * 0 : off, 1 : MAX_EXTREME_MV, 2 : MIN_EXTREME_MV * * Supported in codecs: VP9 */ VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, /*!\brief Codec control function to constrain the inter-layer prediction * (prediction of lower spatial resolution) in VP9 SVC. * * 0 : inter-layer prediction on, 1 : off, 2 : off only on non-key frames * * Supported in codecs: VP9 */ VP9E_SET_SVC_INTER_LAYER_PRED, /*!\brief Codec control function to set mode and thresholds for frame * dropping in SVC. Drop frame thresholds are set per-layer. Mode is set as: * 0 : layer-dependent dropping, 1 : constrained dropping, current layer drop * forces drop on all upper layers. Default mode is 0. * * Supported in codecs: VP9 */ VP9E_SET_SVC_FRAME_DROP_LAYER, /*!\brief Codec control function to get the refresh and reference flags and * the buffer indices, up to the last encoded spatial layer. * * Supported in codecs: VP9 */ VP9E_GET_SVC_REF_FRAME_CONFIG, /*!\brief Codec control function to enable/disable use of golden reference as * a second temporal reference for SVC. Only used when inter-layer prediction * is disabled on INTER frames. * * 0: Off, 1: Enabled (default) * * Supported in codecs: VP9 */ VP9E_SET_SVC_GF_TEMPORAL_REF, /*!\brief Codec control function to enable spatial layer sync frame, for any * spatial layer. Enabling it for layer k means spatial layer k will disable * all temporal prediction, but keep the inter-layer prediction. It will * refresh any temporal reference buffer for that layer, and reset the * temporal layer for the superframe to 0. Setting the layer sync for base * spatial layer forces a key frame. Default is off (0) for all spatial * layers. Spatial layer sync flag is reset to 0 after each encoded layer, * so when control is invoked it is only used for the current superframe. * * 0: Off (default), 1: Enabled * * Supported in codecs: VP9 */ VP9E_SET_SVC_SPATIAL_LAYER_SYNC, /*!\brief Codec control function to enable temporal dependency model. * * Vp9 allows the encoder to run temporal dependency model and use it to * improve the compression performance. To enable, set this parameter to be * 1. The default value is set to be 1. */ VP9E_SET_TPL, /*!\brief Codec control function to enable postencode frame drop. * * This will allow encoder to drop frame after it's encoded. * * 0: Off (default), 1: Enabled * * Supported in codecs: VP9 */ VP9E_SET_POSTENCODE_DROP, }; /*!\brief vpx 1-D scaling mode * * This set of constants define 1-D vpx scaling modes */ typedef enum vpx_scaling_mode_1d { VP8E_NORMAL = 0, VP8E_FOURFIVE = 1, VP8E_THREEFIVE = 2, VP8E_ONETWO = 3 } VPX_SCALING_MODE; /*!\brief Temporal layering mode enum for VP9 SVC. * * This set of macros define the different temporal layering modes. * Supported codecs: VP9 (in SVC mode) * */ typedef enum vp9e_temporal_layering_mode { /*!\brief No temporal layering. * Used when only spatial layering is used. */ VP9E_TEMPORAL_LAYERING_MODE_NOLAYERING = 0, /*!\brief Bypass mode. * Used when application needs to control temporal layering. * This will only work when the number of spatial layers equals 1. */ VP9E_TEMPORAL_LAYERING_MODE_BYPASS = 1, /*!\brief 0-1-0-1... temporal layering scheme with two temporal layers. */ VP9E_TEMPORAL_LAYERING_MODE_0101 = 2, /*!\brief 0-2-1-2... temporal layering scheme with three temporal layers. */ VP9E_TEMPORAL_LAYERING_MODE_0212 = 3 } VP9E_TEMPORAL_LAYERING_MODE; /*!\brief vpx region of interest map * * These defines the data structures for the region of interest map * */ typedef struct vpx_roi_map { /*! If ROI is enabled. */ uint8_t enabled; /*! An id between 0-3 (0-7 for vp9) for each 16x16 (8x8 for VP9) * region within a frame. */ unsigned char *roi_map; unsigned int rows; /**< Number of rows. */ unsigned int cols; /**< Number of columns. */ /*! VP8 only uses the first 4 segments. VP9 uses 8 segments. */ int delta_q[8]; /**< Quantizer deltas. */ int delta_lf[8]; /**< Loop filter deltas. */ /*! skip and ref frame segment is only used in VP9. */ int skip[8]; /**< Skip this block. */ int ref_frame[8]; /**< Reference frame for this block. */ /*! Static breakout threshold for each segment. Only used in VP8. */ unsigned int static_threshold[4]; } vpx_roi_map_t; /*!\brief vpx active region map * * These defines the data structures for active region map * */ typedef struct vpx_active_map { /*!\brief specify an on (1) or off (0) each 16x16 region within a frame */ unsigned char *active_map; unsigned int rows; /**< number of rows */ unsigned int cols; /**< number of cols */ } vpx_active_map_t; /*!\brief vpx image scaling mode * * This defines the data structure for image scaling mode * */ typedef struct vpx_scaling_mode { VPX_SCALING_MODE h_scaling_mode; /**< horizontal scaling mode */ VPX_SCALING_MODE v_scaling_mode; /**< vertical scaling mode */ } vpx_scaling_mode_t; /*!\brief VP8 token partition mode * * This defines VP8 partitioning mode for compressed data, i.e., the number of * sub-streams in the bitstream. Used for parallelized decoding. * */ typedef enum { VP8_ONE_TOKENPARTITION = 0, VP8_TWO_TOKENPARTITION = 1, VP8_FOUR_TOKENPARTITION = 2, VP8_EIGHT_TOKENPARTITION = 3 } vp8e_token_partitions; /*!brief VP9 encoder content type */ typedef enum { VP9E_CONTENT_DEFAULT, VP9E_CONTENT_SCREEN, VP9E_CONTENT_FILM, VP9E_CONTENT_INVALID } vp9e_tune_content; /*!\brief VP8 model tuning parameters * * Changes the encoder to tune for certain types of input material. * */ typedef enum { VP8_TUNE_PSNR, VP8_TUNE_SSIM } vp8e_tuning; /*!\brief vp9 svc layer parameters * * This defines the spatial and temporal layer id numbers for svc encoding. * This is used with the #VP9E_SET_SVC_LAYER_ID control to set the spatial and * temporal layer id for the current frame. * */ typedef struct vpx_svc_layer_id { int spatial_layer_id; /**< First spatial layer to start encoding. */ // TODO(jianj): Deprecated, to be removed. int temporal_layer_id; /**< Temporal layer id number. */ int temporal_layer_id_per_spatial[VPX_SS_MAX_LAYERS]; /**< Temp layer id. */ } vpx_svc_layer_id_t; /*!\brief vp9 svc frame flag parameters. * * This defines the frame flags and buffer indices for each spatial layer for * svc encoding. * This is used with the #VP9E_SET_SVC_REF_FRAME_CONFIG control to set frame * flags and buffer indices for each spatial layer for the current (super)frame. * */ typedef struct vpx_svc_ref_frame_config { int lst_fb_idx[VPX_SS_MAX_LAYERS]; /**< Last buffer index. */ int gld_fb_idx[VPX_SS_MAX_LAYERS]; /**< Golden buffer index. */ int alt_fb_idx[VPX_SS_MAX_LAYERS]; /**< Altref buffer index. */ int update_buffer_slot[VPX_SS_MAX_LAYERS]; /**< Update reference frames. */ // TODO(jianj): Remove update_last/golden/alt_ref, these are deprecated. int update_last[VPX_SS_MAX_LAYERS]; /**< Update last. */ int update_golden[VPX_SS_MAX_LAYERS]; /**< Update golden. */ int update_alt_ref[VPX_SS_MAX_LAYERS]; /**< Update altref. */ int reference_last[VPX_SS_MAX_LAYERS]; /**< Last as reference. */ int reference_golden[VPX_SS_MAX_LAYERS]; /**< Golden as reference. */ int reference_alt_ref[VPX_SS_MAX_LAYERS]; /**< Altref as reference. */ int64_t duration[VPX_SS_MAX_LAYERS]; /**< Duration per spatial layer. */ } vpx_svc_ref_frame_config_t; /*!\brief VP9 svc frame dropping mode. * * This defines the frame drop mode for SVC. * */ typedef enum { CONSTRAINED_LAYER_DROP, /**< Upper layers are constrained to drop if current layer drops. */ LAYER_DROP, /**< Any spatial layer can drop. */ FULL_SUPERFRAME_DROP, /**< Only full superframe can drop. */ CONSTRAINED_FROM_ABOVE_DROP, /**< Lower layers are constrained to drop if current layer drops. */ } SVC_LAYER_DROP_MODE; /*!\brief vp9 svc frame dropping parameters. * * This defines the frame drop thresholds for each spatial layer, and * the frame dropping mode: 0 = layer based frame dropping (default), * 1 = constrained dropping where current layer drop forces all upper * spatial layers to drop. */ typedef struct vpx_svc_frame_drop { int framedrop_thresh[VPX_SS_MAX_LAYERS]; /**< Frame drop thresholds */ SVC_LAYER_DROP_MODE framedrop_mode; /**< Layer-based or constrained dropping. */ int max_consec_drop; /**< Maximum consecutive drops, for any layer. */ } vpx_svc_frame_drop_t; /*!\brief vp9 svc spatial layer sync parameters. * * This defines the spatial layer sync flag, defined per spatial layer. * */ typedef struct vpx_svc_spatial_layer_sync { int spatial_layer_sync[VPX_SS_MAX_LAYERS]; /**< Sync layer flags */ int base_layer_intra_only; /**< Flag for setting Intra-only frame on base */ } vpx_svc_spatial_layer_sync_t; /*!\cond */ /*!\brief VP8 encoder control function parameter type * * Defines the data types that VP8E control functions take. Note that * additional common controls are defined in vp8.h * */ VPX_CTRL_USE_TYPE(VP8E_SET_FRAME_FLAGS, int) #define VPX_CTRL_VP8E_SET_FRAME_FLAGS VPX_CTRL_USE_TYPE(VP8E_SET_TEMPORAL_LAYER_ID, int) #define VPX_CTRL_VP8E_SET_TEMPORAL_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_ROI_MAP, vpx_roi_map_t *) #define VPX_CTRL_VP8E_SET_ROI_MAP VPX_CTRL_USE_TYPE(VP9E_SET_ROI_MAP, vpx_roi_map_t *) #define VPX_CTRL_VP9E_SET_ROI_MAP VPX_CTRL_USE_TYPE(VP8E_SET_ACTIVEMAP, vpx_active_map_t *) #define VPX_CTRL_VP8E_SET_ACTIVEMAP VPX_CTRL_USE_TYPE(VP8E_SET_SCALEMODE, vpx_scaling_mode_t *) #define VPX_CTRL_VP8E_SET_SCALEMODE VPX_CTRL_USE_TYPE(VP9E_SET_SVC, int) #define VPX_CTRL_VP9E_SET_SVC VPX_CTRL_USE_TYPE(VP9E_SET_SVC_PARAMETERS, void *) #define VPX_CTRL_VP9E_SET_SVC_PARAMETERS VPX_CTRL_USE_TYPE(VP9E_REGISTER_CX_CALLBACK, void *) #define VPX_CTRL_VP9E_REGISTER_CX_CALLBACK VPX_CTRL_USE_TYPE(VP9E_SET_SVC_LAYER_ID, vpx_svc_layer_id_t *) #define VPX_CTRL_VP9E_SET_SVC_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_CPUUSED, int) #define VPX_CTRL_VP8E_SET_CPUUSED VPX_CTRL_USE_TYPE(VP8E_SET_ENABLEAUTOALTREF, unsigned int) #define VPX_CTRL_VP8E_SET_ENABLEAUTOALTREF VPX_CTRL_USE_TYPE(VP8E_SET_NOISE_SENSITIVITY, unsigned int) #define VPX_CTRL_VP8E_SET_NOISE_SENSITIVITY VPX_CTRL_USE_TYPE(VP8E_SET_SHARPNESS, unsigned int) #define VPX_CTRL_VP8E_SET_SHARPNESS VPX_CTRL_USE_TYPE(VP8E_SET_STATIC_THRESHOLD, unsigned int) #define VPX_CTRL_VP8E_SET_STATIC_THRESHOLD VPX_CTRL_USE_TYPE(VP8E_SET_TOKEN_PARTITIONS, int) /* vp8e_token_partitions */ #define VPX_CTRL_VP8E_SET_TOKEN_PARTITIONS VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_MAXFRAMES, unsigned int) #define VPX_CTRL_VP8E_SET_ARNR_MAXFRAMES VPX_CTRL_USE_TYPE(VP8E_SET_ARNR_STRENGTH, unsigned int) #define VPX_CTRL_VP8E_SET_ARNR_STRENGTH VPX_CTRL_USE_TYPE_DEPRECATED(VP8E_SET_ARNR_TYPE, unsigned int) #define VPX_CTRL_VP8E_SET_ARNR_TYPE VPX_CTRL_USE_TYPE(VP8E_SET_TUNING, int) /* vp8e_tuning */ #define VPX_CTRL_VP8E_SET_TUNING VPX_CTRL_USE_TYPE(VP8E_SET_CQ_LEVEL, unsigned int) #define VPX_CTRL_VP8E_SET_CQ_LEVEL VPX_CTRL_USE_TYPE(VP9E_SET_TILE_COLUMNS, int) #define VPX_CTRL_VP9E_SET_TILE_COLUMNS VPX_CTRL_USE_TYPE(VP9E_SET_TILE_ROWS, int) #define VPX_CTRL_VP9E_SET_TILE_ROWS VPX_CTRL_USE_TYPE(VP9E_SET_TPL, int) #define VPX_CTRL_VP9E_SET_TPL VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER, int *) #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER VPX_CTRL_USE_TYPE(VP8E_GET_LAST_QUANTIZER_64, int *) #define VPX_CTRL_VP8E_GET_LAST_QUANTIZER_64 VPX_CTRL_USE_TYPE(VP9E_GET_SVC_LAYER_ID, vpx_svc_layer_id_t *) #define VPX_CTRL_VP9E_GET_SVC_LAYER_ID VPX_CTRL_USE_TYPE(VP8E_SET_MAX_INTRA_BITRATE_PCT, unsigned int) #define VPX_CTRL_VP8E_SET_MAX_INTRA_BITRATE_PCT VPX_CTRL_USE_TYPE(VP9E_SET_MAX_INTER_BITRATE_PCT, unsigned int) #define VPX_CTRL_VP9E_SET_MAX_INTER_BITRATE_PCT VPX_CTRL_USE_TYPE(VP8E_SET_GF_CBR_BOOST_PCT, unsigned int) #define VPX_CTRL_VP8E_SET_GF_CBR_BOOST_PCT VPX_CTRL_USE_TYPE(VP8E_SET_SCREEN_CONTENT_MODE, unsigned int) #define VPX_CTRL_VP8E_SET_SCREEN_CONTENT_MODE VPX_CTRL_USE_TYPE(VP9E_SET_GF_CBR_BOOST_PCT, unsigned int) #define VPX_CTRL_VP9E_SET_GF_CBR_BOOST_PCT VPX_CTRL_USE_TYPE(VP9E_SET_LOSSLESS, unsigned int) #define VPX_CTRL_VP9E_SET_LOSSLESS VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PARALLEL_DECODING, unsigned int) #define VPX_CTRL_VP9E_SET_FRAME_PARALLEL_DECODING VPX_CTRL_USE_TYPE(VP9E_SET_AQ_MODE, unsigned int) #define VPX_CTRL_VP9E_SET_AQ_MODE VPX_CTRL_USE_TYPE(VP9E_SET_ALT_REF_AQ, int) #define VPX_CTRL_VP9E_SET_ALT_REF_AQ VPX_CTRL_USE_TYPE(VP9E_SET_FRAME_PERIODIC_BOOST, unsigned int) #define VPX_CTRL_VP9E_SET_FRAME_PERIODIC_BOOST VPX_CTRL_USE_TYPE(VP9E_SET_NOISE_SENSITIVITY, unsigned int) #define VPX_CTRL_VP9E_SET_NOISE_SENSITIVITY VPX_CTRL_USE_TYPE(VP9E_SET_TUNE_CONTENT, int) /* vp9e_tune_content */ #define VPX_CTRL_VP9E_SET_TUNE_CONTENT VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_SPACE, int) #define VPX_CTRL_VP9E_SET_COLOR_SPACE VPX_CTRL_USE_TYPE(VP9E_SET_MIN_GF_INTERVAL, unsigned int) #define VPX_CTRL_VP9E_SET_MIN_GF_INTERVAL VPX_CTRL_USE_TYPE(VP9E_SET_MAX_GF_INTERVAL, unsigned int) #define VPX_CTRL_VP9E_SET_MAX_GF_INTERVAL VPX_CTRL_USE_TYPE(VP9E_GET_ACTIVEMAP, vpx_active_map_t *) #define VPX_CTRL_VP9E_GET_ACTIVEMAP VPX_CTRL_USE_TYPE(VP9E_SET_COLOR_RANGE, int) #define VPX_CTRL_VP9E_SET_COLOR_RANGE VPX_CTRL_USE_TYPE(VP9E_SET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) #define VPX_CTRL_VP9E_SET_SVC_REF_FRAME_CONFIG VPX_CTRL_USE_TYPE(VP9E_SET_RENDER_SIZE, int *) #define VPX_CTRL_VP9E_SET_RENDER_SIZE VPX_CTRL_USE_TYPE(VP9E_SET_TARGET_LEVEL, unsigned int) #define VPX_CTRL_VP9E_SET_TARGET_LEVEL VPX_CTRL_USE_TYPE(VP9E_SET_ROW_MT, unsigned int) #define VPX_CTRL_VP9E_SET_ROW_MT VPX_CTRL_USE_TYPE(VP9E_GET_LEVEL, int *) #define VPX_CTRL_VP9E_GET_LEVEL VPX_CTRL_USE_TYPE(VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST, unsigned int) #define VPX_CTRL_VP9E_ENABLE_MOTION_VECTOR_UNIT_TEST VPX_CTRL_USE_TYPE(VP9E_SET_SVC_INTER_LAYER_PRED, unsigned int) #define VPX_CTRL_VP9E_SET_SVC_INTER_LAYER_PRED VPX_CTRL_USE_TYPE(VP9E_SET_SVC_FRAME_DROP_LAYER, vpx_svc_frame_drop_t *) #define VPX_CTRL_VP9E_SET_SVC_FRAME_DROP_LAYER VPX_CTRL_USE_TYPE(VP9E_GET_SVC_REF_FRAME_CONFIG, vpx_svc_ref_frame_config_t *) #define VPX_CTRL_VP9E_GET_SVC_REF_FRAME_CONFIG VPX_CTRL_USE_TYPE(VP9E_SET_SVC_GF_TEMPORAL_REF, unsigned int) #define VPX_CTRL_VP9E_SET_SVC_GF_TEMPORAL_REF VPX_CTRL_USE_TYPE(VP9E_SET_SVC_SPATIAL_LAYER_SYNC, vpx_svc_spatial_layer_sync_t *) #define VPX_CTRL_VP9E_SET_SVC_SPATIAL_LAYER_SYNC VPX_CTRL_USE_TYPE(VP9E_SET_POSTENCODE_DROP, unsigned int) #define VPX_CTRL_VP9E_SET_POSTENCODE_DROP /*!\endcond */ /*! @} - end defgroup vp8_encoder */ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_VP8CX_H_ libvpx-1.8.2/vpx/vp8dx.h000066400000000000000000000153551357355204000151020ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\defgroup vp8_decoder WebM VP8/VP9 Decoder * \ingroup vp8 * * @{ */ /*!\file * \brief Provides definitions for using VP8 or VP9 within the vpx Decoder * interface. */ #ifndef VPX_VPX_VP8DX_H_ #define VPX_VPX_VP8DX_H_ #ifdef __cplusplus extern "C" { #endif /* Include controls common to both the encoder and decoder */ #include "./vp8.h" /*!\name Algorithm interface for VP8 * * This interface provides the capability to decode VP8 streams. * @{ */ extern vpx_codec_iface_t vpx_codec_vp8_dx_algo; extern vpx_codec_iface_t *vpx_codec_vp8_dx(void); /*!@} - end algorithm interface member group*/ /*!\name Algorithm interface for VP9 * * This interface provides the capability to decode VP9 streams. * @{ */ extern vpx_codec_iface_t vpx_codec_vp9_dx_algo; extern vpx_codec_iface_t *vpx_codec_vp9_dx(void); /*!@} - end algorithm interface member group*/ /*!\enum vp8_dec_control_id * \brief VP8 decoder control functions * * This set of macros define the control functions available for the VP8 * decoder interface. * * \sa #vpx_codec_control */ enum vp8_dec_control_id { /** control function to get info on which reference frames were updated * by the last decode */ VP8D_GET_LAST_REF_UPDATES = VP8_DECODER_CTRL_ID_START, /** check if the indicated frame is corrupted */ VP8D_GET_FRAME_CORRUPTED, /** control function to get info on which reference frames were used * by the last decode */ VP8D_GET_LAST_REF_USED, /** decryption function to decrypt encoded buffer data immediately * before decoding. Takes a vpx_decrypt_init, which contains * a callback function and opaque context pointer. */ VPXD_SET_DECRYPTOR, VP8D_SET_DECRYPTOR = VPXD_SET_DECRYPTOR, /** control function to get the dimensions that the current frame is decoded * at. This may be different to the intended display size for the frame as * specified in the wrapper or frame header (see VP9D_GET_DISPLAY_SIZE). */ VP9D_GET_FRAME_SIZE, /** control function to get the current frame's intended display dimensions * (as specified in the wrapper or frame header). This may be different to * the decoded dimensions of this frame (see VP9D_GET_FRAME_SIZE). */ VP9D_GET_DISPLAY_SIZE, /** control function to get the bit depth of the stream. */ VP9D_GET_BIT_DEPTH, /** control function to set the byte alignment of the planes in the reference * buffers. Valid values are power of 2, from 32 to 1024. A value of 0 sets * legacy alignment. I.e. Y plane is aligned to 32 bytes, U plane directly * follows Y plane, and V plane directly follows U plane. Default value is 0. */ VP9_SET_BYTE_ALIGNMENT, /** control function to invert the decoding order to from right to left. The * function is used in a test to confirm the decoding independence of tile * columns. The function may be used in application where this order * of decoding is desired. * * TODO(yaowu): Rework the unit test that uses this control, and in a future * release, this test-only control shall be removed. */ VP9_INVERT_TILE_DECODE_ORDER, /** control function to set the skip loop filter flag. Valid values are * integers. The decoder will skip the loop filter when its value is set to * nonzero. If the loop filter is skipped the decoder may accumulate decode * artifacts. The default value is 0. */ VP9_SET_SKIP_LOOP_FILTER, /** control function to decode SVC stream up to the x spatial layers, * where x is passed in through the control, and is 0 for base layer. */ VP9_DECODE_SVC_SPATIAL_LAYER, /*!\brief Codec control function to get last decoded frame quantizer. * * Return value uses internal quantizer scale defined by the codec. * * Supported in codecs: VP8, VP9 */ VPXD_GET_LAST_QUANTIZER, /*!\brief Codec control function to set row level multi-threading. * * 0 : off, 1 : on * * Supported in codecs: VP9 */ VP9D_SET_ROW_MT, /*!\brief Codec control function to set loopfilter optimization. * * 0 : off, Loop filter is done after all tiles have been decoded * 1 : on, Loop filter is done immediately after decode without * waiting for all threads to sync. * * Supported in codecs: VP9 */ VP9D_SET_LOOP_FILTER_OPT, VP8_DECODER_CTRL_ID_MAX }; /** Decrypt n bytes of data from input -> output, using the decrypt_state * passed in VPXD_SET_DECRYPTOR. */ typedef void (*vpx_decrypt_cb)(void *decrypt_state, const unsigned char *input, unsigned char *output, int count); /*!\brief Structure to hold decryption state * * Defines a structure to hold the decryption state and access function. */ typedef struct vpx_decrypt_init { /*! Decrypt callback. */ vpx_decrypt_cb decrypt_cb; /*! Decryption state. */ void *decrypt_state; } vpx_decrypt_init; /*!\cond */ /*!\brief VP8 decoder control function parameter type * * Defines the data types that VP8D control functions take. Note that * additional common controls are defined in vp8.h * */ VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_UPDATES, int *) #define VPX_CTRL_VP8D_GET_LAST_REF_UPDATES VPX_CTRL_USE_TYPE(VP8D_GET_FRAME_CORRUPTED, int *) #define VPX_CTRL_VP8D_GET_FRAME_CORRUPTED VPX_CTRL_USE_TYPE(VP8D_GET_LAST_REF_USED, int *) #define VPX_CTRL_VP8D_GET_LAST_REF_USED VPX_CTRL_USE_TYPE(VPXD_GET_LAST_QUANTIZER, int *) #define VPX_CTRL_VPXD_GET_LAST_QUANTIZER VPX_CTRL_USE_TYPE(VPXD_SET_DECRYPTOR, vpx_decrypt_init *) #define VPX_CTRL_VPXD_SET_DECRYPTOR VPX_CTRL_USE_TYPE(VP8D_SET_DECRYPTOR, vpx_decrypt_init *) #define VPX_CTRL_VP8D_SET_DECRYPTOR VPX_CTRL_USE_TYPE(VP9D_GET_DISPLAY_SIZE, int *) #define VPX_CTRL_VP9D_GET_DISPLAY_SIZE VPX_CTRL_USE_TYPE(VP9D_GET_BIT_DEPTH, unsigned int *) #define VPX_CTRL_VP9D_GET_BIT_DEPTH VPX_CTRL_USE_TYPE(VP9D_GET_FRAME_SIZE, int *) #define VPX_CTRL_VP9D_GET_FRAME_SIZE VPX_CTRL_USE_TYPE(VP9_INVERT_TILE_DECODE_ORDER, int) #define VPX_CTRL_VP9_INVERT_TILE_DECODE_ORDER #define VPX_CTRL_VP9_DECODE_SVC_SPATIAL_LAYER VPX_CTRL_USE_TYPE(VP9_DECODE_SVC_SPATIAL_LAYER, int) #define VPX_CTRL_VP9_SET_SKIP_LOOP_FILTER VPX_CTRL_USE_TYPE(VP9_SET_SKIP_LOOP_FILTER, int) #define VPX_CTRL_VP9_DECODE_SET_ROW_MT VPX_CTRL_USE_TYPE(VP9D_SET_ROW_MT, int) #define VPX_CTRL_VP9_SET_LOOP_FILTER_OPT VPX_CTRL_USE_TYPE(VP9D_SET_LOOP_FILTER_OPT, int) /*!\endcond */ /*! @} - end defgroup vp8_decoder */ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_VP8DX_H_ libvpx-1.8.2/vpx/vpx_codec.h000066400000000000000000000366141357355204000160040ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\defgroup codec Common Algorithm Interface * This abstraction allows applications to easily support multiple video * formats with minimal code duplication. This section describes the interface * common to all codecs (both encoders and decoders). * @{ */ /*!\file * \brief Describes the codec algorithm interface to applications. * * This file describes the interface between an application and a * video codec algorithm. * * An application instantiates a specific codec instance by using * vpx_codec_init() and a pointer to the algorithm's interface structure: *
 *     my_app.c:
 *       extern vpx_codec_iface_t my_codec;
 *       {
 *           vpx_codec_ctx_t algo;
 *           res = vpx_codec_init(&algo, &my_codec);
 *       }
 *     
* * Once initialized, the instance is manged using other functions from * the vpx_codec_* family. */ #ifndef VPX_VPX_VPX_CODEC_H_ #define VPX_VPX_VPX_CODEC_H_ #ifdef __cplusplus extern "C" { #endif #include "./vpx_image.h" #include "./vpx_integer.h" /*!\brief Decorator indicating a function is deprecated */ #ifndef VPX_DEPRECATED #if defined(__GNUC__) && __GNUC__ #define VPX_DEPRECATED __attribute__((deprecated)) #elif defined(_MSC_VER) #define VPX_DEPRECATED #else #define VPX_DEPRECATED #endif #endif /* VPX_DEPRECATED */ #ifndef VPX_DECLSPEC_DEPRECATED #if defined(__GNUC__) && __GNUC__ #define VPX_DECLSPEC_DEPRECATED /**< \copydoc #VPX_DEPRECATED */ #elif defined(_MSC_VER) /*!\brief \copydoc #VPX_DEPRECATED */ #define VPX_DECLSPEC_DEPRECATED __declspec(deprecated) #else #define VPX_DECLSPEC_DEPRECATED /**< \copydoc #VPX_DEPRECATED */ #endif #endif /* VPX_DECLSPEC_DEPRECATED */ /*!\brief Decorator indicating a function is potentially unused */ #ifndef VPX_UNUSED #if defined(__GNUC__) || defined(__clang__) #define VPX_UNUSED __attribute__((unused)) #else #define VPX_UNUSED #endif #endif /* VPX_UNUSED */ /*!\brief Current ABI version number * * \internal * If this file is altered in any way that changes the ABI, this value * must be bumped. Examples include, but are not limited to, changing * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ #define VPX_CODEC_ABI_VERSION (4 + VPX_IMAGE_ABI_VERSION) /**<\hideinitializer*/ /*!\brief Algorithm return codes */ typedef enum { /*!\brief Operation completed without error */ VPX_CODEC_OK, /*!\brief Unspecified error */ VPX_CODEC_ERROR, /*!\brief Memory operation failed */ VPX_CODEC_MEM_ERROR, /*!\brief ABI version mismatch */ VPX_CODEC_ABI_MISMATCH, /*!\brief Algorithm does not have required capability */ VPX_CODEC_INCAPABLE, /*!\brief The given bitstream is not supported. * * The bitstream was unable to be parsed at the highest level. The decoder * is unable to proceed. This error \ref SHOULD be treated as fatal to the * stream. */ VPX_CODEC_UNSUP_BITSTREAM, /*!\brief Encoded bitstream uses an unsupported feature * * The decoder does not implement a feature required by the encoder. This * return code should only be used for features that prevent future * pictures from being properly decoded. This error \ref MAY be treated as * fatal to the stream or \ref MAY be treated as fatal to the current GOP. */ VPX_CODEC_UNSUP_FEATURE, /*!\brief The coded data for this stream is corrupt or incomplete * * There was a problem decoding the current frame. This return code * should only be used for failures that prevent future pictures from * being properly decoded. This error \ref MAY be treated as fatal to the * stream or \ref MAY be treated as fatal to the current GOP. If decoding * is continued for the current GOP, artifacts may be present. */ VPX_CODEC_CORRUPT_FRAME, /*!\brief An application-supplied parameter is not valid. * */ VPX_CODEC_INVALID_PARAM, /*!\brief An iterator reached the end of list. * */ VPX_CODEC_LIST_END } vpx_codec_err_t; /*! \brief Codec capabilities bitfield * * Each codec advertises the capabilities it supports as part of its * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces * or functionality, and are not required to be supported. * * The available flags are specified by VPX_CODEC_CAP_* defines. */ typedef long vpx_codec_caps_t; #define VPX_CODEC_CAP_DECODER 0x1 /**< Is a decoder */ #define VPX_CODEC_CAP_ENCODER 0x2 /**< Is an encoder */ /*! Can support images at greater than 8 bitdepth. */ #define VPX_CODEC_CAP_HIGHBITDEPTH 0x4 /*! \brief Initialization-time Feature Enabling * * Certain codec features must be known at initialization time, to allow for * proper memory allocation. * * The available flags are specified by VPX_CODEC_USE_* defines. */ typedef long vpx_codec_flags_t; /*!\brief Codec interface structure. * * Contains function pointers and other data private to the codec * implementation. This structure is opaque to the application. */ typedef const struct vpx_codec_iface vpx_codec_iface_t; /*!\brief Codec private data structure. * * Contains data private to the codec implementation. This structure is opaque * to the application. */ typedef struct vpx_codec_priv vpx_codec_priv_t; /*!\brief Iterator * * Opaque storage used for iterating over lists. */ typedef const void *vpx_codec_iter_t; /*!\brief Codec context structure * * All codecs \ref MUST support this context structure fully. In general, * this data should be considered private to the codec algorithm, and * not be manipulated or examined by the calling application. Applications * may reference the 'name' member to get a printable description of the * algorithm. */ typedef struct vpx_codec_ctx { const char *name; /**< Printable interface name */ vpx_codec_iface_t *iface; /**< Interface pointers */ vpx_codec_err_t err; /**< Last returned error */ const char *err_detail; /**< Detailed info, if available */ vpx_codec_flags_t init_flags; /**< Flags passed at init time */ union { /**< Decoder Configuration Pointer */ const struct vpx_codec_dec_cfg *dec; /**< Encoder Configuration Pointer */ const struct vpx_codec_enc_cfg *enc; const void *raw; } config; /**< Configuration pointer aliasing union */ vpx_codec_priv_t *priv; /**< Algorithm private storage */ } vpx_codec_ctx_t; /*!\brief Bit depth for codec * * * This enumeration determines the bit depth of the codec. */ typedef enum vpx_bit_depth { VPX_BITS_8 = 8, /**< 8 bits */ VPX_BITS_10 = 10, /**< 10 bits */ VPX_BITS_12 = 12, /**< 12 bits */ } vpx_bit_depth_t; /* * Library Version Number Interface * * For example, see the following sample return values: * vpx_codec_version() (1<<16 | 2<<8 | 3) * vpx_codec_version_str() "v1.2.3-rc1-16-gec6a1ba" * vpx_codec_version_extra_str() "rc1-16-gec6a1ba" */ /*!\brief Return the version information (as an integer) * * Returns a packed encoding of the library version number. This will only * include * the major.minor.patch component of the version number. Note that this encoded * value should be accessed through the macros provided, as the encoding may * change * in the future. * */ int vpx_codec_version(void); #define VPX_VERSION_MAJOR(v) \ (((v) >> 16) & 0xff) /**< extract major from packed version */ #define VPX_VERSION_MINOR(v) \ (((v) >> 8) & 0xff) /**< extract minor from packed version */ #define VPX_VERSION_PATCH(v) \ (((v) >> 0) & 0xff) /**< extract patch from packed version */ /*!\brief Return the version major number */ #define vpx_codec_version_major() ((vpx_codec_version() >> 16) & 0xff) /*!\brief Return the version minor number */ #define vpx_codec_version_minor() ((vpx_codec_version() >> 8) & 0xff) /*!\brief Return the version patch number */ #define vpx_codec_version_patch() ((vpx_codec_version() >> 0) & 0xff) /*!\brief Return the version information (as a string) * * Returns a printable string containing the full library version number. This * may * contain additional text following the three digit version number, as to * indicate * release candidates, prerelease versions, etc. * */ const char *vpx_codec_version_str(void); /*!\brief Return the version information (as a string) * * Returns a printable "extra string". This is the component of the string * returned * by vpx_codec_version_str() following the three digit version number. * */ const char *vpx_codec_version_extra_str(void); /*!\brief Return the build configuration * * Returns a printable string containing an encoded version of the build * configuration. This may be useful to vpx support. * */ const char *vpx_codec_build_config(void); /*!\brief Return the name for a given interface * * Returns a human readable string for name of the given codec interface. * * \param[in] iface Interface pointer * */ const char *vpx_codec_iface_name(vpx_codec_iface_t *iface); /*!\brief Convert error number to printable string * * Returns a human readable string for the last error returned by the * algorithm. The returned error will be one line and will not contain * any newline characters. * * * \param[in] err Error number. * */ const char *vpx_codec_err_to_string(vpx_codec_err_t err); /*!\brief Retrieve error synopsis for codec context * * Returns a human readable string for the last error returned by the * algorithm. The returned error will be one line and will not contain * any newline characters. * * * \param[in] ctx Pointer to this instance's context. * */ const char *vpx_codec_error(vpx_codec_ctx_t *ctx); /*!\brief Retrieve detailed error information for codec context * * Returns a human readable string providing detailed information about * the last error. * * \param[in] ctx Pointer to this instance's context. * * \retval NULL * No detailed information is available. */ const char *vpx_codec_error_detail(vpx_codec_ctx_t *ctx); /* REQUIRED FUNCTIONS * * The following functions are required to be implemented for all codecs. * They represent the base case functionality expected of all codecs. */ /*!\brief Destroy a codec instance * * Destroys a codec context, freeing any associated memory buffers. * * \param[in] ctx Pointer to this instance's context * * \retval #VPX_CODEC_OK * The codec algorithm initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory allocation failed. */ vpx_codec_err_t vpx_codec_destroy(vpx_codec_ctx_t *ctx); /*!\brief Get the capabilities of an algorithm. * * Retrieves the capabilities bitfield from the algorithm's interface. * * \param[in] iface Pointer to the algorithm interface * */ vpx_codec_caps_t vpx_codec_get_caps(vpx_codec_iface_t *iface); /*!\brief Control algorithm * * This function is used to exchange algorithm specific data with the codec * instance. This can be used to implement features specific to a particular * algorithm. * * This wrapper function dispatches the request to the helper function * associated with the given ctrl_id. It tries to call this function * transparently, but will return #VPX_CODEC_ERROR if the request could not * be dispatched. * * Note that this function should not be used directly. Call the * #vpx_codec_control wrapper macro instead. * * \param[in] ctx Pointer to this instance's context * \param[in] ctrl_id Algorithm specific control identifier * * \retval #VPX_CODEC_OK * The control request was processed. * \retval #VPX_CODEC_ERROR * The control request was not processed. * \retval #VPX_CODEC_INVALID_PARAM * The data was not valid. */ vpx_codec_err_t vpx_codec_control_(vpx_codec_ctx_t *ctx, int ctrl_id, ...); #if defined(VPX_DISABLE_CTRL_TYPECHECKS) && VPX_DISABLE_CTRL_TYPECHECKS #define vpx_codec_control(ctx, id, data) vpx_codec_control_(ctx, id, data) #define VPX_CTRL_USE_TYPE(id, typ) #define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) #define VPX_CTRL_VOID(id, typ) #else /*!\brief vpx_codec_control wrapper macro * * This macro allows for type safe conversions across the variadic parameter * to vpx_codec_control_(). * * \internal * It works by dispatching the call to the control function through a wrapper * function named with the id parameter. */ #define vpx_codec_control(ctx, id, data) \ vpx_codec_control_##id(ctx, id, data) /**<\hideinitializer*/ /*!\brief vpx_codec_control type definition macro * * This macro allows for type safe conversions across the variadic parameter * to vpx_codec_control_(). It defines the type of the argument for a given * control identifier. * * \internal * It defines a static function with * the correctly typed arguments as a wrapper to the type-unsafe internal * function. */ #define VPX_CTRL_USE_TYPE(id, typ) \ static vpx_codec_err_t vpx_codec_control_##id(vpx_codec_ctx_t *, int, typ) \ VPX_UNUSED; \ \ static vpx_codec_err_t vpx_codec_control_##id(vpx_codec_ctx_t *ctx, \ int ctrl_id, typ data) { \ return vpx_codec_control_(ctx, ctrl_id, data); \ } /**<\hideinitializer*/ /*!\brief vpx_codec_control deprecated type definition macro * * Like #VPX_CTRL_USE_TYPE, but indicates that the specified control is * deprecated and should not be used. Consult the documentation for your * codec for more information. * * \internal * It defines a static function with the correctly typed arguments as a * wrapper to the type-unsafe internal function. */ #define VPX_CTRL_USE_TYPE_DEPRECATED(id, typ) \ VPX_DECLSPEC_DEPRECATED static vpx_codec_err_t vpx_codec_control_##id( \ vpx_codec_ctx_t *, int, typ) VPX_DEPRECATED VPX_UNUSED; \ \ VPX_DECLSPEC_DEPRECATED static vpx_codec_err_t vpx_codec_control_##id( \ vpx_codec_ctx_t *ctx, int ctrl_id, typ data) { \ return vpx_codec_control_(ctx, ctrl_id, data); \ } /**<\hideinitializer*/ /*!\brief vpx_codec_control void type definition macro * * This macro allows for type safe conversions across the variadic parameter * to vpx_codec_control_(). It indicates that a given control identifier takes * no argument. * * \internal * It defines a static function without a data argument as a wrapper to the * type-unsafe internal function. */ #define VPX_CTRL_VOID(id) \ static vpx_codec_err_t vpx_codec_control_##id(vpx_codec_ctx_t *, int) \ VPX_UNUSED; \ \ static vpx_codec_err_t vpx_codec_control_##id(vpx_codec_ctx_t *ctx, \ int ctrl_id) { \ return vpx_codec_control_(ctx, ctrl_id); \ } /**<\hideinitializer*/ #endif /*!@} - end defgroup codec*/ #ifdef __cplusplus } #endif #endif // VPX_VPX_VPX_CODEC_H_ libvpx-1.8.2/vpx/vpx_codec.mk000066400000000000000000000024761357355204000161630ustar00rootroot00000000000000## ## Copyright (c) 2010 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## API_EXPORTS += exports API_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8.h API_DOC_SRCS-$(CONFIG_VP8_ENCODER) += vp8cx.h API_SRCS-$(CONFIG_VP8_DECODER) += vp8.h API_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8.h API_DOC_SRCS-$(CONFIG_VP8_DECODER) += vp8dx.h API_DOC_SRCS-yes += vpx_codec.h API_DOC_SRCS-yes += vpx_decoder.h API_DOC_SRCS-yes += vpx_encoder.h API_DOC_SRCS-yes += vpx_frame_buffer.h API_DOC_SRCS-yes += vpx_image.h API_SRCS-yes += src/vpx_decoder.c API_SRCS-yes += vpx_decoder.h API_SRCS-yes += src/vpx_encoder.c API_SRCS-yes += vpx_encoder.h API_SRCS-yes += internal/vpx_codec_internal.h API_SRCS-yes += src/vpx_codec.c API_SRCS-yes += src/vpx_image.c API_SRCS-yes += vpx_codec.h API_SRCS-yes += vpx_codec.mk API_SRCS-yes += vpx_frame_buffer.h API_SRCS-yes += vpx_image.h API_SRCS-yes += vpx_integer.h libvpx-1.8.2/vpx/vpx_decoder.h000066400000000000000000000351371357355204000163330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_VPX_DECODER_H_ #define VPX_VPX_VPX_DECODER_H_ /*!\defgroup decoder Decoder Algorithm Interface * \ingroup codec * This abstraction allows applications using this decoder to easily support * multiple video formats with minimal code duplication. This section describes * the interface common to all decoders. * @{ */ /*!\file * \brief Describes the decoder algorithm interface to applications. * * This file describes the interface between an application and a * video decoder algorithm. * */ #ifdef __cplusplus extern "C" { #endif #include "./vpx_codec.h" #include "./vpx_frame_buffer.h" /*!\brief Current ABI version number * * \internal * If this file is altered in any way that changes the ABI, this value * must be bumped. Examples include, but are not limited to, changing * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ #define VPX_DECODER_ABI_VERSION \ (3 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Decoder capabilities bitfield * * Each decoder advertises the capabilities it supports as part of its * ::vpx_codec_iface_t interface structure. Capabilities are extra interfaces * or functionality, and are not required to be supported by a decoder. * * The available flags are specified by VPX_CODEC_CAP_* defines. */ #define VPX_CODEC_CAP_PUT_SLICE 0x10000 /**< Will issue put_slice callbacks */ #define VPX_CODEC_CAP_PUT_FRAME 0x20000 /**< Will issue put_frame callbacks */ #define VPX_CODEC_CAP_POSTPROC 0x40000 /**< Can postprocess decoded frame */ /*!\brief Can conceal errors due to packet loss */ #define VPX_CODEC_CAP_ERROR_CONCEALMENT 0x80000 /*!\brief Can receive encoded frames one fragment at a time */ #define VPX_CODEC_CAP_INPUT_FRAGMENTS 0x100000 /*! \brief Initialization-time Feature Enabling * * Certain codec features must be known at initialization time, to allow for * proper memory allocation. * * The available flags are specified by VPX_CODEC_USE_* defines. */ /*!\brief Can support frame-based multi-threading */ #define VPX_CODEC_CAP_FRAME_THREADING 0x200000 /*!brief Can support external frame buffers */ #define VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER 0x400000 #define VPX_CODEC_USE_POSTPROC 0x10000 /**< Postprocess decoded frame */ /*!\brief Conceal errors in decoded frames */ #define VPX_CODEC_USE_ERROR_CONCEALMENT 0x20000 /*!\brief The input frame should be passed to the decoder one fragment at a * time */ #define VPX_CODEC_USE_INPUT_FRAGMENTS 0x40000 /*!\brief Enable frame-based multi-threading */ #define VPX_CODEC_USE_FRAME_THREADING 0x80000 /*!\brief Stream properties * * This structure is used to query or set properties of the decoded * stream. Algorithms may extend this structure with data specific * to their bitstream by setting the sz member appropriately. */ typedef struct vpx_codec_stream_info { unsigned int sz; /**< Size of this structure */ unsigned int w; /**< Width (or 0 for unknown/default) */ unsigned int h; /**< Height (or 0 for unknown/default) */ unsigned int is_kf; /**< Current frame is a keyframe */ } vpx_codec_stream_info_t; /* REQUIRED FUNCTIONS * * The following functions are required to be implemented for all decoders. * They represent the base case functionality expected of all decoders. */ /*!\brief Initialization Configurations * * This structure is used to pass init time configuration options to the * decoder. */ typedef struct vpx_codec_dec_cfg { unsigned int threads; /**< Maximum number of threads to use, default 1 */ unsigned int w; /**< Width */ unsigned int h; /**< Height */ } vpx_codec_dec_cfg_t; /**< alias for struct vpx_codec_dec_cfg */ /*!\brief Initialize a decoder instance * * Initializes a decoder context using the given interface. Applications * should call the vpx_codec_dec_init convenience macro instead of this * function directly, to ensure that the ABI version number parameter * is properly initialized. * * If the library was configured with --disable-multithread, this call * is not thread safe and should be guarded with a lock if being used * in a multithreaded context. * * \param[in] ctx Pointer to this instance's context. * \param[in] iface Pointer to the algorithm interface to use. * \param[in] cfg Configuration to use, if known. May be NULL. * \param[in] flags Bitfield of VPX_CODEC_USE_* flags * \param[in] ver ABI version number. Must be set to * VPX_DECODER_ABI_VERSION * \retval #VPX_CODEC_OK * The decoder algorithm initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory allocation failed. */ vpx_codec_err_t vpx_codec_dec_init_ver(vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, const vpx_codec_dec_cfg_t *cfg, vpx_codec_flags_t flags, int ver); /*!\brief Convenience macro for vpx_codec_dec_init_ver() * * Ensures the ABI version parameter is properly set. */ #define vpx_codec_dec_init(ctx, iface, cfg, flags) \ vpx_codec_dec_init_ver(ctx, iface, cfg, flags, VPX_DECODER_ABI_VERSION) /*!\brief Parse stream info from a buffer * * Performs high level parsing of the bitstream. Construction of a decoder * context is not necessary. Can be used to determine if the bitstream is * of the proper format, and to extract information from the stream. * * \param[in] iface Pointer to the algorithm interface * \param[in] data Pointer to a block of data to parse * \param[in] data_sz Size of the data buffer * \param[in,out] si Pointer to stream info to update. The size member * \ref MUST be properly initialized, but \ref MAY be * clobbered by the algorithm. This parameter \ref MAY * be NULL. * * \retval #VPX_CODEC_OK * Bitstream is parsable and stream information updated */ vpx_codec_err_t vpx_codec_peek_stream_info(vpx_codec_iface_t *iface, const uint8_t *data, unsigned int data_sz, vpx_codec_stream_info_t *si); /*!\brief Return information about the current stream. * * Returns information about the stream that has been parsed during decoding. * * \param[in] ctx Pointer to this instance's context * \param[in,out] si Pointer to stream info to update. The size member * \ref MUST be properly initialized, but \ref MAY be * clobbered by the algorithm. This parameter \ref MAY * be NULL. * * \retval #VPX_CODEC_OK * Bitstream is parsable and stream information updated */ vpx_codec_err_t vpx_codec_get_stream_info(vpx_codec_ctx_t *ctx, vpx_codec_stream_info_t *si); /*!\brief Decode data * * Processes a buffer of coded data. If the processing results in a new * decoded frame becoming available, PUT_SLICE and PUT_FRAME events may be * generated, as appropriate. Encoded data \ref MUST be passed in DTS (decode * time stamp) order. Frames produced will always be in PTS (presentation * time stamp) order. * If the decoder is configured with VPX_CODEC_USE_INPUT_FRAGMENTS enabled, * data and data_sz can contain a fragment of the encoded frame. Fragment * \#n must contain at least partition \#n, but can also contain subsequent * partitions (\#n+1 - \#n+i), and if so, fragments \#n+1, .., \#n+i must * be empty. When no more data is available, this function should be called * with NULL as data and 0 as data_sz. The memory passed to this function * must be available until the frame has been decoded. * * \param[in] ctx Pointer to this instance's context * \param[in] data Pointer to this block of new coded data. If * NULL, a VPX_CODEC_CB_PUT_FRAME event is posted * for the previously decoded frame. * \param[in] data_sz Size of the coded data, in bytes. * \param[in] user_priv Application specific data to associate with * this frame. * \param[in] deadline Soft deadline the decoder should attempt to meet, * in us. Set to zero for unlimited. * * \return Returns #VPX_CODEC_OK if the coded data was processed completely * and future pictures can be decoded without error. Otherwise, * see the descriptions of the other error codes in ::vpx_codec_err_t * for recoverability capabilities. */ vpx_codec_err_t vpx_codec_decode(vpx_codec_ctx_t *ctx, const uint8_t *data, unsigned int data_sz, void *user_priv, long deadline); /*!\brief Decoded frames iterator * * Iterates over a list of the frames available for display. The iterator * storage should be initialized to NULL to start the iteration. Iteration is * complete when this function returns NULL. * * The list of available frames becomes valid upon completion of the * vpx_codec_decode call, and remains valid until the next call to * vpx_codec_decode. * * \param[in] ctx Pointer to this instance's context * \param[in,out] iter Iterator storage, initialized to NULL * * \return Returns a pointer to an image, if one is ready for display. Frames * produced will always be in PTS (presentation time stamp) order. */ vpx_image_t *vpx_codec_get_frame(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter); /*!\defgroup cap_put_frame Frame-Based Decoding Functions * * The following functions are required to be implemented for all decoders * that advertise the VPX_CODEC_CAP_PUT_FRAME capability. Calling these * functions * for codecs that don't advertise this capability will result in an error * code being returned, usually VPX_CODEC_ERROR * @{ */ /*!\brief put frame callback prototype * * This callback is invoked by the decoder to notify the application of * the availability of decoded image data. */ typedef void (*vpx_codec_put_frame_cb_fn_t)(void *user_priv, const vpx_image_t *img); /*!\brief Register for notification of frame completion. * * Registers a given function to be called when a decoded frame is * available. * * \param[in] ctx Pointer to this instance's context * \param[in] cb Pointer to the callback function * \param[in] user_priv User's private data * * \retval #VPX_CODEC_OK * Callback successfully registered. * \retval #VPX_CODEC_ERROR * Decoder context not initialized, or algorithm not capable of * posting slice completion. */ vpx_codec_err_t vpx_codec_register_put_frame_cb(vpx_codec_ctx_t *ctx, vpx_codec_put_frame_cb_fn_t cb, void *user_priv); /*!@} - end defgroup cap_put_frame */ /*!\defgroup cap_put_slice Slice-Based Decoding Functions * * The following functions are required to be implemented for all decoders * that advertise the VPX_CODEC_CAP_PUT_SLICE capability. Calling these * functions * for codecs that don't advertise this capability will result in an error * code being returned, usually VPX_CODEC_ERROR * @{ */ /*!\brief put slice callback prototype * * This callback is invoked by the decoder to notify the application of * the availability of partially decoded image data. The */ typedef void (*vpx_codec_put_slice_cb_fn_t)(void *user_priv, const vpx_image_t *img, const vpx_image_rect_t *valid, const vpx_image_rect_t *update); /*!\brief Register for notification of slice completion. * * Registers a given function to be called when a decoded slice is * available. * * \param[in] ctx Pointer to this instance's context * \param[in] cb Pointer to the callback function * \param[in] user_priv User's private data * * \retval #VPX_CODEC_OK * Callback successfully registered. * \retval #VPX_CODEC_ERROR * Decoder context not initialized, or algorithm not capable of * posting slice completion. */ vpx_codec_err_t vpx_codec_register_put_slice_cb(vpx_codec_ctx_t *ctx, vpx_codec_put_slice_cb_fn_t cb, void *user_priv); /*!@} - end defgroup cap_put_slice*/ /*!\defgroup cap_external_frame_buffer External Frame Buffer Functions * * The following section is required to be implemented for all decoders * that advertise the VPX_CODEC_CAP_EXTERNAL_FRAME_BUFFER capability. * Calling this function for codecs that don't advertise this capability * will result in an error code being returned, usually VPX_CODEC_ERROR. * * \note * Currently this only works with VP9. * @{ */ /*!\brief Pass in external frame buffers for the decoder to use. * * Registers functions to be called when libvpx needs a frame buffer * to decode the current frame and a function to be called when libvpx does * not internally reference the frame buffer. This set function must * be called before the first call to decode or libvpx will assume the * default behavior of allocating frame buffers internally. * * \param[in] ctx Pointer to this instance's context * \param[in] cb_get Pointer to the get callback function * \param[in] cb_release Pointer to the release callback function * \param[in] cb_priv Callback's private data * * \retval #VPX_CODEC_OK * External frame buffers will be used by libvpx. * \retval #VPX_CODEC_INVALID_PARAM * One or more of the callbacks were NULL. * \retval #VPX_CODEC_ERROR * Decoder context not initialized, or algorithm not capable of * using external frame buffers. * * \note * When decoding VP9, the application may be required to pass in at least * #VP9_MAXIMUM_REF_BUFFERS + #VPX_MAXIMUM_WORK_BUFFERS external frame * buffers. */ vpx_codec_err_t vpx_codec_set_frame_buffer_functions( vpx_codec_ctx_t *ctx, vpx_get_frame_buffer_cb_fn_t cb_get, vpx_release_frame_buffer_cb_fn_t cb_release, void *cb_priv); /*!@} - end defgroup cap_external_frame_buffer */ /*!@} - end defgroup decoder*/ #ifdef __cplusplus } #endif #endif // VPX_VPX_VPX_DECODER_H_ libvpx-1.8.2/vpx/vpx_encoder.h000066400000000000000000001115711357355204000163420ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_VPX_ENCODER_H_ #define VPX_VPX_VPX_ENCODER_H_ /*!\defgroup encoder Encoder Algorithm Interface * \ingroup codec * This abstraction allows applications using this encoder to easily support * multiple video formats with minimal code duplication. This section describes * the interface common to all encoders. * @{ */ /*!\file * \brief Describes the encoder algorithm interface to applications. * * This file describes the interface between an application and a * video encoder algorithm. * */ #ifdef __cplusplus extern "C" { #endif #include "./vpx_codec.h" /*! Temporal Scalability: Maximum length of the sequence defining frame * layer membership */ #define VPX_TS_MAX_PERIODICITY 16 /*! Temporal Scalability: Maximum number of coding layers */ #define VPX_TS_MAX_LAYERS 5 /*! Temporal+Spatial Scalability: Maximum number of coding layers */ #define VPX_MAX_LAYERS 12 // 3 temporal + 4 spatial layers are allowed. /*! Spatial Scalability: Maximum number of coding layers */ #define VPX_SS_MAX_LAYERS 5 /*! Spatial Scalability: Default number of coding layers */ #define VPX_SS_DEFAULT_LAYERS 1 /*!\brief Current ABI version number * * \internal * If this file is altered in any way that changes the ABI, this value * must be bumped. Examples include, but are not limited to, changing * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ #define VPX_ENCODER_ABI_VERSION \ (14 + VPX_CODEC_ABI_VERSION) /**<\hideinitializer*/ /*! \brief Encoder capabilities bitfield * * Each encoder advertises the capabilities it supports as part of its * ::vpx_codec_iface_t interface structure. Capabilities are extra * interfaces or functionality, and are not required to be supported * by an encoder. * * The available flags are specified by VPX_CODEC_CAP_* defines. */ #define VPX_CODEC_CAP_PSNR 0x10000 /**< Can issue PSNR packets */ /*! Can output one partition at a time. Each partition is returned in its * own VPX_CODEC_CX_FRAME_PKT, with the FRAME_IS_FRAGMENT flag set for * every partition but the last. In this mode all frames are always * returned partition by partition. */ #define VPX_CODEC_CAP_OUTPUT_PARTITION 0x20000 /*! \brief Initialization-time Feature Enabling * * Certain codec features must be known at initialization time, to allow * for proper memory allocation. * * The available flags are specified by VPX_CODEC_USE_* defines. */ #define VPX_CODEC_USE_PSNR 0x10000 /**< Calculate PSNR on each frame */ /*!\brief Make the encoder output one partition at a time. */ #define VPX_CODEC_USE_OUTPUT_PARTITION 0x20000 #define VPX_CODEC_USE_HIGHBITDEPTH 0x40000 /**< Use high bitdepth */ /*!\brief Generic fixed size buffer structure * * This structure is able to hold a reference to any fixed size buffer. */ typedef struct vpx_fixed_buf { void *buf; /**< Pointer to the data */ size_t sz; /**< Length of the buffer, in chars */ } vpx_fixed_buf_t; /**< alias for struct vpx_fixed_buf */ /*!\brief Time Stamp Type * * An integer, which when multiplied by the stream's time base, provides * the absolute time of a sample. */ typedef int64_t vpx_codec_pts_t; /*!\brief Compressed Frame Flags * * This type represents a bitfield containing information about a compressed * frame that may be useful to an application. The most significant 16 bits * can be used by an algorithm to provide additional detail, for example to * support frame types that are codec specific (MPEG-1 D-frames for example) */ typedef uint32_t vpx_codec_frame_flags_t; #define VPX_FRAME_IS_KEY 0x1 /**< frame is the start of a GOP */ /*!\brief frame can be dropped without affecting the stream (no future frame * depends on this one) */ #define VPX_FRAME_IS_DROPPABLE 0x2 /*!\brief frame should be decoded but will not be shown */ #define VPX_FRAME_IS_INVISIBLE 0x4 /*!\brief this is a fragment of the encoded frame */ #define VPX_FRAME_IS_FRAGMENT 0x8 /*!\brief Error Resilient flags * * These flags define which error resilient features to enable in the * encoder. The flags are specified through the * vpx_codec_enc_cfg::g_error_resilient variable. */ typedef uint32_t vpx_codec_er_flags_t; /*!\brief Improve resiliency against losses of whole frames */ #define VPX_ERROR_RESILIENT_DEFAULT 0x1 /*!\brief The frame partitions are independently decodable by the bool decoder, * meaning that partitions can be decoded even though earlier partitions have * been lost. Note that intra prediction is still done over the partition * boundary. */ #define VPX_ERROR_RESILIENT_PARTITIONS 0x2 /*!\brief Encoder output packet variants * * This enumeration lists the different kinds of data packets that can be * returned by calls to vpx_codec_get_cx_data(). Algorithms \ref MAY * extend this list to provide additional functionality. */ enum vpx_codec_cx_pkt_kind { VPX_CODEC_CX_FRAME_PKT, /**< Compressed video frame */ VPX_CODEC_STATS_PKT, /**< Two-pass statistics for this frame */ VPX_CODEC_FPMB_STATS_PKT, /**< first pass mb statistics for this frame */ VPX_CODEC_PSNR_PKT, /**< PSNR statistics for this frame */ VPX_CODEC_CUSTOM_PKT = 256 /**< Algorithm extensions */ }; /*!\brief Encoder output packet * * This structure contains the different kinds of output data the encoder * may produce while compressing a frame. */ typedef struct vpx_codec_cx_pkt { enum vpx_codec_cx_pkt_kind kind; /**< packet variant */ union { struct { void *buf; /**< compressed data buffer */ size_t sz; /**< length of compressed data */ /*!\brief time stamp to show frame (in timebase units) */ vpx_codec_pts_t pts; /*!\brief duration to show frame (in timebase units) */ unsigned long duration; vpx_codec_frame_flags_t flags; /**< flags for this frame */ /*!\brief the partition id defines the decoding order of the partitions. * Only applicable when "output partition" mode is enabled. First * partition has id 0.*/ int partition_id; /*!\brief Width and height of frames in this packet. VP8 will only use the * first one.*/ unsigned int width[VPX_SS_MAX_LAYERS]; /**< frame width */ unsigned int height[VPX_SS_MAX_LAYERS]; /**< frame height */ /*!\brief Flag to indicate if spatial layer frame in this packet is * encoded or dropped. VP8 will always be set to 1.*/ uint8_t spatial_layer_encoded[VPX_SS_MAX_LAYERS]; } frame; /**< data for compressed frame packet */ vpx_fixed_buf_t twopass_stats; /**< data for two-pass packet */ vpx_fixed_buf_t firstpass_mb_stats; /**< first pass mb packet */ struct vpx_psnr_pkt { unsigned int samples[4]; /**< Number of samples, total/y/u/v */ uint64_t sse[4]; /**< sum squared error, total/y/u/v */ double psnr[4]; /**< PSNR, total/y/u/v */ } psnr; /**< data for PSNR packet */ vpx_fixed_buf_t raw; /**< data for arbitrary packets */ /* This packet size is fixed to allow codecs to extend this * interface without having to manage storage for raw packets, * i.e., if it's smaller than 128 bytes, you can store in the * packet list directly. */ char pad[128 - sizeof(enum vpx_codec_cx_pkt_kind)]; /**< fixed sz */ } data; /**< packet data */ } vpx_codec_cx_pkt_t; /**< alias for struct vpx_codec_cx_pkt */ /*!\brief Encoder return output buffer callback * * This callback function, when registered, returns with packets when each * spatial layer is encoded. */ typedef void (*vpx_codec_enc_output_cx_pkt_cb_fn_t)(vpx_codec_cx_pkt_t *pkt, void *user_data); /*!\brief Callback function pointer / user data pair storage */ typedef struct vpx_codec_enc_output_cx_cb_pair { vpx_codec_enc_output_cx_pkt_cb_fn_t output_cx_pkt; /**< Callback function */ void *user_priv; /**< Pointer to private data */ } vpx_codec_priv_output_cx_pkt_cb_pair_t; /*!\brief Rational Number * * This structure holds a fractional value. */ typedef struct vpx_rational { int num; /**< fraction numerator */ int den; /**< fraction denominator */ } vpx_rational_t; /**< alias for struct vpx_rational */ /*!\brief Multi-pass Encoding Pass */ typedef enum vpx_enc_pass { VPX_RC_ONE_PASS, /**< Single pass mode */ VPX_RC_FIRST_PASS, /**< First pass of multi-pass mode */ VPX_RC_LAST_PASS /**< Final pass of multi-pass mode */ } vpx_enc_pass; /*!\brief Rate control mode */ enum vpx_rc_mode { VPX_VBR, /**< Variable Bit Rate (VBR) mode */ VPX_CBR, /**< Constant Bit Rate (CBR) mode */ VPX_CQ, /**< Constrained Quality (CQ) mode */ VPX_Q, /**< Constant Quality (Q) mode */ }; /*!\brief Keyframe placement mode. * * This enumeration determines whether keyframes are placed automatically by * the encoder or whether this behavior is disabled. Older releases of this * SDK were implemented such that VPX_KF_FIXED meant keyframes were disabled. * This name is confusing for this behavior, so the new symbols to be used * are VPX_KF_AUTO and VPX_KF_DISABLED. */ enum vpx_kf_mode { VPX_KF_FIXED, /**< deprecated, implies VPX_KF_DISABLED */ VPX_KF_AUTO, /**< Encoder determines optimal placement automatically */ VPX_KF_DISABLED = 0 /**< Encoder does not place keyframes. */ }; /*!\brief Encoded Frame Flags * * This type indicates a bitfield to be passed to vpx_codec_encode(), defining * per-frame boolean values. By convention, bits common to all codecs will be * named VPX_EFLAG_*, and bits specific to an algorithm will be named * /algo/_eflag_*. The lower order 16 bits are reserved for common use. */ typedef long vpx_enc_frame_flags_t; #define VPX_EFLAG_FORCE_KF (1 << 0) /**< Force this frame to be a keyframe */ /*!\brief Encoder configuration structure * * This structure contains the encoder settings that have common representations * across all codecs. This doesn't imply that all codecs support all features, * however. */ typedef struct vpx_codec_enc_cfg { /* * generic settings (g) */ /*!\brief Deprecated: Algorithm specific "usage" value * * This value must be zero. */ unsigned int g_usage; /*!\brief Maximum number of threads to use * * For multi-threaded implementations, use no more than this number of * threads. The codec may use fewer threads than allowed. The value * 0 is equivalent to the value 1. */ unsigned int g_threads; /*!\brief Bitstream profile to use * * Some codecs support a notion of multiple bitstream profiles. Typically * this maps to a set of features that are turned on or off. Often the * profile to use is determined by the features of the intended decoder. * Consult the documentation for the codec to determine the valid values * for this parameter, or set to zero for a sane default. */ unsigned int g_profile; /**< profile of bitstream to use */ /*!\brief Width of the frame * * This value identifies the presentation resolution of the frame, * in pixels. Note that the frames passed as input to the encoder must * have this resolution. Frames will be presented by the decoder in this * resolution, independent of any spatial resampling the encoder may do. */ unsigned int g_w; /*!\brief Height of the frame * * This value identifies the presentation resolution of the frame, * in pixels. Note that the frames passed as input to the encoder must * have this resolution. Frames will be presented by the decoder in this * resolution, independent of any spatial resampling the encoder may do. */ unsigned int g_h; /*!\brief Bit-depth of the codec * * This value identifies the bit_depth of the codec, * Only certain bit-depths are supported as identified in the * vpx_bit_depth_t enum. */ vpx_bit_depth_t g_bit_depth; /*!\brief Bit-depth of the input frames * * This value identifies the bit_depth of the input frames in bits. * Note that the frames passed as input to the encoder must have * this bit-depth. */ unsigned int g_input_bit_depth; /*!\brief Stream timebase units * * Indicates the smallest interval of time, in seconds, used by the stream. * For fixed frame rate material, or variable frame rate material where * frames are timed at a multiple of a given clock (ex: video capture), * the \ref RECOMMENDED method is to set the timebase to the reciprocal * of the frame rate (ex: 1001/30000 for 29.970 Hz NTSC). This allows the * pts to correspond to the frame number, which can be handy. For * re-encoding video from containers with absolute time timestamps, the * \ref RECOMMENDED method is to set the timebase to that of the parent * container or multimedia framework (ex: 1/1000 for ms, as in FLV). */ struct vpx_rational g_timebase; /*!\brief Enable error resilient modes. * * The error resilient bitfield indicates to the encoder which features * it should enable to take measures for streaming over lossy or noisy * links. */ vpx_codec_er_flags_t g_error_resilient; /*!\brief Multi-pass Encoding Mode * * This value should be set to the current phase for multi-pass encoding. * For single pass, set to #VPX_RC_ONE_PASS. */ enum vpx_enc_pass g_pass; /*!\brief Allow lagged encoding * * If set, this value allows the encoder to consume a number of input * frames before producing output frames. This allows the encoder to * base decisions for the current frame on future frames. This does * increase the latency of the encoding pipeline, so it is not appropriate * in all situations (ex: realtime encoding). * * Note that this is a maximum value -- the encoder may produce frames * sooner than the given limit. Set this value to 0 to disable this * feature. */ unsigned int g_lag_in_frames; /* * rate control settings (rc) */ /*!\brief Temporal resampling configuration, if supported by the codec. * * Temporal resampling allows the codec to "drop" frames as a strategy to * meet its target data rate. This can cause temporal discontinuities in * the encoded video, which may appear as stuttering during playback. This * trade-off is often acceptable, but for many applications is not. It can * be disabled in these cases. * * This threshold is described as a percentage of the target data buffer. * When the data buffer falls below this percentage of fullness, a * dropped frame is indicated. Set the threshold to zero (0) to disable * this feature. */ unsigned int rc_dropframe_thresh; /*!\brief Enable/disable spatial resampling, if supported by the codec. * * Spatial resampling allows the codec to compress a lower resolution * version of the frame, which is then upscaled by the encoder to the * correct presentation resolution. This increases visual quality at * low data rates, at the expense of CPU time on the encoder/decoder. */ unsigned int rc_resize_allowed; /*!\brief Internal coded frame width. * * If spatial resampling is enabled this specifies the width of the * encoded frame. */ unsigned int rc_scaled_width; /*!\brief Internal coded frame height. * * If spatial resampling is enabled this specifies the height of the * encoded frame. */ unsigned int rc_scaled_height; /*!\brief Spatial resampling up watermark. * * This threshold is described as a percentage of the target data buffer. * When the data buffer rises above this percentage of fullness, the * encoder will step up to a higher resolution version of the frame. */ unsigned int rc_resize_up_thresh; /*!\brief Spatial resampling down watermark. * * This threshold is described as a percentage of the target data buffer. * When the data buffer falls below this percentage of fullness, the * encoder will step down to a lower resolution version of the frame. */ unsigned int rc_resize_down_thresh; /*!\brief Rate control algorithm to use. * * Indicates whether the end usage of this stream is to be streamed over * a bandwidth constrained link, indicating that Constant Bit Rate (CBR) * mode should be used, or whether it will be played back on a high * bandwidth link, as from a local disk, where higher variations in * bitrate are acceptable. */ enum vpx_rc_mode rc_end_usage; /*!\brief Two-pass stats buffer. * * A buffer containing all of the stats packets produced in the first * pass, concatenated. */ vpx_fixed_buf_t rc_twopass_stats_in; /*!\brief first pass mb stats buffer. * * A buffer containing all of the first pass mb stats packets produced * in the first pass, concatenated. */ vpx_fixed_buf_t rc_firstpass_mb_stats_in; /*!\brief Target data rate * * Target bandwidth to use for this stream, in kilobits per second. */ unsigned int rc_target_bitrate; /* * quantizer settings */ /*!\brief Minimum (Best Quality) Quantizer * * The quantizer is the most direct control over the quality of the * encoded image. The range of valid values for the quantizer is codec * specific. Consult the documentation for the codec to determine the * values to use. */ unsigned int rc_min_quantizer; /*!\brief Maximum (Worst Quality) Quantizer * * The quantizer is the most direct control over the quality of the * encoded image. The range of valid values for the quantizer is codec * specific. Consult the documentation for the codec to determine the * values to use. */ unsigned int rc_max_quantizer; /* * bitrate tolerance */ /*!\brief Rate control adaptation undershoot control * * VP8: Expressed as a percentage of the target bitrate, * controls the maximum allowed adaptation speed of the codec. * This factor controls the maximum amount of bits that can * be subtracted from the target bitrate in order to compensate * for prior overshoot. * VP9: Expressed as a percentage of the target bitrate, a threshold * undershoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * * * Valid values in the range VP8:0-1000 VP9: 0-100. */ unsigned int rc_undershoot_pct; /*!\brief Rate control adaptation overshoot control * * VP8: Expressed as a percentage of the target bitrate, * controls the maximum allowed adaptation speed of the codec. * This factor controls the maximum amount of bits that can * be added to the target bitrate in order to compensate for * prior undershoot. * VP9: Expressed as a percentage of the target bitrate, a threshold * overshoot level (current rate vs target) beyond which more aggressive * corrective measures are taken. * * Valid values in the range VP8:0-1000 VP9: 0-100. */ unsigned int rc_overshoot_pct; /* * decoder buffer model parameters */ /*!\brief Decoder Buffer Size * * This value indicates the amount of data that may be buffered by the * decoding application. Note that this value is expressed in units of * time (milliseconds). For example, a value of 5000 indicates that the * client will buffer (at least) 5000ms worth of encoded data. Use the * target bitrate (#rc_target_bitrate) to convert to bits/bytes, if * necessary. */ unsigned int rc_buf_sz; /*!\brief Decoder Buffer Initial Size * * This value indicates the amount of data that will be buffered by the * decoding application prior to beginning playback. This value is * expressed in units of time (milliseconds). Use the target bitrate * (#rc_target_bitrate) to convert to bits/bytes, if necessary. */ unsigned int rc_buf_initial_sz; /*!\brief Decoder Buffer Optimal Size * * This value indicates the amount of data that the encoder should try * to maintain in the decoder's buffer. This value is expressed in units * of time (milliseconds). Use the target bitrate (#rc_target_bitrate) * to convert to bits/bytes, if necessary. */ unsigned int rc_buf_optimal_sz; /* * 2 pass rate control parameters */ /*!\brief Two-pass mode CBR/VBR bias * * Bias, expressed on a scale of 0 to 100, for determining target size * for the current frame. The value 0 indicates the optimal CBR mode * value should be used. The value 100 indicates the optimal VBR mode * value should be used. Values in between indicate which way the * encoder should "lean." */ unsigned int rc_2pass_vbr_bias_pct; /*!\brief Two-pass mode per-GOP minimum bitrate * * This value, expressed as a percentage of the target bitrate, indicates * the minimum bitrate to be used for a single GOP (aka "section") */ unsigned int rc_2pass_vbr_minsection_pct; /*!\brief Two-pass mode per-GOP maximum bitrate * * This value, expressed as a percentage of the target bitrate, indicates * the maximum bitrate to be used for a single GOP (aka "section") */ unsigned int rc_2pass_vbr_maxsection_pct; /*!\brief Two-pass corpus vbr mode complexity control * Used only in VP9: A value representing the corpus midpoint complexity * for corpus vbr mode. This value defaults to 0 which disables corpus vbr * mode in favour of normal vbr mode. */ unsigned int rc_2pass_vbr_corpus_complexity; /* * keyframing settings (kf) */ /*!\brief Keyframe placement mode * * This value indicates whether the encoder should place keyframes at a * fixed interval, or determine the optimal placement automatically * (as governed by the #kf_min_dist and #kf_max_dist parameters) */ enum vpx_kf_mode kf_mode; /*!\brief Keyframe minimum interval * * This value, expressed as a number of frames, prevents the encoder from * placing a keyframe nearer than kf_min_dist to the previous keyframe. At * least kf_min_dist frames non-keyframes will be coded before the next * keyframe. Set kf_min_dist equal to kf_max_dist for a fixed interval. */ unsigned int kf_min_dist; /*!\brief Keyframe maximum interval * * This value, expressed as a number of frames, forces the encoder to code * a keyframe if one has not been coded in the last kf_max_dist frames. * A value of 0 implies all frames will be keyframes. Set kf_min_dist * equal to kf_max_dist for a fixed interval. */ unsigned int kf_max_dist; /* * Spatial scalability settings (ss) */ /*!\brief Number of spatial coding layers. * * This value specifies the number of spatial coding layers to be used. */ unsigned int ss_number_layers; /*!\brief Enable auto alt reference flags for each spatial layer. * * These values specify if auto alt reference frame is enabled for each * spatial layer. */ int ss_enable_auto_alt_ref[VPX_SS_MAX_LAYERS]; /*!\brief Target bitrate for each spatial layer. * * These values specify the target coding bitrate to be used for each * spatial layer. */ unsigned int ss_target_bitrate[VPX_SS_MAX_LAYERS]; /*!\brief Number of temporal coding layers. * * This value specifies the number of temporal layers to be used. */ unsigned int ts_number_layers; /*!\brief Target bitrate for each temporal layer. * * These values specify the target coding bitrate to be used for each * temporal layer. */ unsigned int ts_target_bitrate[VPX_TS_MAX_LAYERS]; /*!\brief Frame rate decimation factor for each temporal layer. * * These values specify the frame rate decimation factors to apply * to each temporal layer. */ unsigned int ts_rate_decimator[VPX_TS_MAX_LAYERS]; /*!\brief Length of the sequence defining frame temporal layer membership. * * This value specifies the length of the sequence that defines the * membership of frames to temporal layers. For example, if the * ts_periodicity = 8, then the frames are assigned to coding layers with a * repeated sequence of length 8. */ unsigned int ts_periodicity; /*!\brief Template defining the membership of frames to temporal layers. * * This array defines the membership of frames to temporal coding layers. * For a 2-layer encoding that assigns even numbered frames to one temporal * layer (0) and odd numbered frames to a second temporal layer (1) with * ts_periodicity=8, then ts_layer_id = (0,1,0,1,0,1,0,1). */ unsigned int ts_layer_id[VPX_TS_MAX_PERIODICITY]; /*!\brief Target bitrate for each spatial/temporal layer. * * These values specify the target coding bitrate to be used for each * spatial/temporal layer. * */ unsigned int layer_target_bitrate[VPX_MAX_LAYERS]; /*!\brief Temporal layering mode indicating which temporal layering scheme to * use. * * The value (refer to VP9E_TEMPORAL_LAYERING_MODE) specifies the * temporal layering mode to use. * */ int temporal_layering_mode; } vpx_codec_enc_cfg_t; /**< alias for struct vpx_codec_enc_cfg */ /*!\brief vp9 svc extra configure parameters * * This defines max/min quantizers and scale factors for each layer * */ typedef struct vpx_svc_parameters { int max_quantizers[VPX_MAX_LAYERS]; /**< Max Q for each layer */ int min_quantizers[VPX_MAX_LAYERS]; /**< Min Q for each layer */ int scaling_factor_num[VPX_MAX_LAYERS]; /**< Scaling factor-numerator */ int scaling_factor_den[VPX_MAX_LAYERS]; /**< Scaling factor-denominator */ int speed_per_layer[VPX_MAX_LAYERS]; /**< Speed setting for each sl */ int temporal_layering_mode; /**< Temporal layering mode */ } vpx_svc_extra_cfg_t; /*!\brief Initialize an encoder instance * * Initializes a encoder context using the given interface. Applications * should call the vpx_codec_enc_init convenience macro instead of this * function directly, to ensure that the ABI version number parameter * is properly initialized. * * If the library was configured with --disable-multithread, this call * is not thread safe and should be guarded with a lock if being used * in a multithreaded context. * * \param[in] ctx Pointer to this instance's context. * \param[in] iface Pointer to the algorithm interface to use. * \param[in] cfg Configuration to use, if known. May be NULL. * \param[in] flags Bitfield of VPX_CODEC_USE_* flags * \param[in] ver ABI version number. Must be set to * VPX_ENCODER_ABI_VERSION * \retval #VPX_CODEC_OK * The decoder algorithm initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory allocation failed. */ vpx_codec_err_t vpx_codec_enc_init_ver(vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, const vpx_codec_enc_cfg_t *cfg, vpx_codec_flags_t flags, int ver); /*!\brief Convenience macro for vpx_codec_enc_init_ver() * * Ensures the ABI version parameter is properly set. */ #define vpx_codec_enc_init(ctx, iface, cfg, flags) \ vpx_codec_enc_init_ver(ctx, iface, cfg, flags, VPX_ENCODER_ABI_VERSION) /*!\brief Initialize multi-encoder instance * * Initializes multi-encoder context using the given interface. * Applications should call the vpx_codec_enc_init_multi convenience macro * instead of this function directly, to ensure that the ABI version number * parameter is properly initialized. * * \param[in] ctx Pointer to this instance's context. * \param[in] iface Pointer to the algorithm interface to use. * \param[in] cfg Configuration to use, if known. May be NULL. * \param[in] num_enc Total number of encoders. * \param[in] flags Bitfield of VPX_CODEC_USE_* flags * \param[in] dsf Pointer to down-sampling factors. * \param[in] ver ABI version number. Must be set to * VPX_ENCODER_ABI_VERSION * \retval #VPX_CODEC_OK * The decoder algorithm initialized. * \retval #VPX_CODEC_MEM_ERROR * Memory allocation failed. */ vpx_codec_err_t vpx_codec_enc_init_multi_ver( vpx_codec_ctx_t *ctx, vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, int num_enc, vpx_codec_flags_t flags, vpx_rational_t *dsf, int ver); /*!\brief Convenience macro for vpx_codec_enc_init_multi_ver() * * Ensures the ABI version parameter is properly set. */ #define vpx_codec_enc_init_multi(ctx, iface, cfg, num_enc, flags, dsf) \ vpx_codec_enc_init_multi_ver(ctx, iface, cfg, num_enc, flags, dsf, \ VPX_ENCODER_ABI_VERSION) /*!\brief Get a default configuration * * Initializes a encoder configuration structure with default values. Supports * the notion of "usages" so that an algorithm may offer different default * settings depending on the user's intended goal. This function \ref SHOULD * be called by all applications to initialize the configuration structure * before specializing the configuration with application specific values. * * \param[in] iface Pointer to the algorithm interface to use. * \param[out] cfg Configuration buffer to populate. * \param[in] usage Must be set to 0. * * \retval #VPX_CODEC_OK * The configuration was populated. * \retval #VPX_CODEC_INCAPABLE * Interface is not an encoder interface. * \retval #VPX_CODEC_INVALID_PARAM * A parameter was NULL, or the usage value was not recognized. */ vpx_codec_err_t vpx_codec_enc_config_default(vpx_codec_iface_t *iface, vpx_codec_enc_cfg_t *cfg, unsigned int usage); /*!\brief Set or change configuration * * Reconfigures an encoder instance according to the given configuration. * * \param[in] ctx Pointer to this instance's context * \param[in] cfg Configuration buffer to use * * \retval #VPX_CODEC_OK * The configuration was populated. * \retval #VPX_CODEC_INCAPABLE * Interface is not an encoder interface. * \retval #VPX_CODEC_INVALID_PARAM * A parameter was NULL, or the usage value was not recognized. */ vpx_codec_err_t vpx_codec_enc_config_set(vpx_codec_ctx_t *ctx, const vpx_codec_enc_cfg_t *cfg); /*!\brief Get global stream headers * * Retrieves a stream level global header packet, if supported by the codec. * * \param[in] ctx Pointer to this instance's context * * \retval NULL * Encoder does not support global header * \retval Non-NULL * Pointer to buffer containing global header packet */ vpx_fixed_buf_t *vpx_codec_get_global_headers(vpx_codec_ctx_t *ctx); /*!\brief deadline parameter analogous to VPx REALTIME mode. */ #define VPX_DL_REALTIME (1) /*!\brief deadline parameter analogous to VPx GOOD QUALITY mode. */ #define VPX_DL_GOOD_QUALITY (1000000) /*!\brief deadline parameter analogous to VPx BEST QUALITY mode. */ #define VPX_DL_BEST_QUALITY (0) /*!\brief Encode a frame * * Encodes a video frame at the given "presentation time." The presentation * time stamp (PTS) \ref MUST be strictly increasing. * * The encoder supports the notion of a soft real-time deadline. Given a * non-zero value to the deadline parameter, the encoder will make a "best * effort" guarantee to return before the given time slice expires. It is * implicit that limiting the available time to encode will degrade the * output quality. The encoder can be given an unlimited time to produce the * best possible frame by specifying a deadline of '0'. This deadline * supersedes the VPx notion of "best quality, good quality, realtime". * Applications that wish to map these former settings to the new deadline * based system can use the symbols #VPX_DL_REALTIME, #VPX_DL_GOOD_QUALITY, * and #VPX_DL_BEST_QUALITY. * * When the last frame has been passed to the encoder, this function should * continue to be called, with the img parameter set to NULL. This will * signal the end-of-stream condition to the encoder and allow it to encode * any held buffers. Encoding is complete when vpx_codec_encode() is called * and vpx_codec_get_cx_data() returns no data. * * \param[in] ctx Pointer to this instance's context * \param[in] img Image data to encode, NULL to flush. * \param[in] pts Presentation time stamp, in timebase units. * \param[in] duration Duration to show frame, in timebase units. * \param[in] flags Flags to use for encoding this frame. * \param[in] deadline Time to spend encoding, in microseconds. (0=infinite) * * \retval #VPX_CODEC_OK * The configuration was populated. * \retval #VPX_CODEC_INCAPABLE * Interface is not an encoder interface. * \retval #VPX_CODEC_INVALID_PARAM * A parameter was NULL, the image format is unsupported, etc. */ vpx_codec_err_t vpx_codec_encode(vpx_codec_ctx_t *ctx, const vpx_image_t *img, vpx_codec_pts_t pts, unsigned long duration, vpx_enc_frame_flags_t flags, unsigned long deadline); /*!\brief Set compressed data output buffer * * Sets the buffer that the codec should output the compressed data * into. This call effectively sets the buffer pointer returned in the * next VPX_CODEC_CX_FRAME_PKT packet. Subsequent packets will be * appended into this buffer. The buffer is preserved across frames, * so applications must periodically call this function after flushing * the accumulated compressed data to disk or to the network to reset * the pointer to the buffer's head. * * `pad_before` bytes will be skipped before writing the compressed * data, and `pad_after` bytes will be appended to the packet. The size * of the packet will be the sum of the size of the actual compressed * data, pad_before, and pad_after. The padding bytes will be preserved * (not overwritten). * * Note that calling this function does not guarantee that the returned * compressed data will be placed into the specified buffer. In the * event that the encoded data will not fit into the buffer provided, * the returned packet \ref MAY point to an internal buffer, as it would * if this call were never used. In this event, the output packet will * NOT have any padding, and the application must free space and copy it * to the proper place. This is of particular note in configurations * that may output multiple packets for a single encoded frame (e.g., lagged * encoding) or if the application does not reset the buffer periodically. * * Applications may restore the default behavior of the codec providing * the compressed data buffer by calling this function with a NULL * buffer. * * Applications \ref MUSTNOT call this function during iteration of * vpx_codec_get_cx_data(). * * \param[in] ctx Pointer to this instance's context * \param[in] buf Buffer to store compressed data into * \param[in] pad_before Bytes to skip before writing compressed data * \param[in] pad_after Bytes to skip after writing compressed data * * \retval #VPX_CODEC_OK * The buffer was set successfully. * \retval #VPX_CODEC_INVALID_PARAM * A parameter was NULL, the image format is unsupported, etc. */ vpx_codec_err_t vpx_codec_set_cx_data_buf(vpx_codec_ctx_t *ctx, const vpx_fixed_buf_t *buf, unsigned int pad_before, unsigned int pad_after); /*!\brief Encoded data iterator * * Iterates over a list of data packets to be passed from the encoder to the * application. The different kinds of packets available are enumerated in * #vpx_codec_cx_pkt_kind. * * #VPX_CODEC_CX_FRAME_PKT packets should be passed to the application's * muxer. Multiple compressed frames may be in the list. * #VPX_CODEC_STATS_PKT packets should be appended to a global buffer. * * The application \ref MUST silently ignore any packet kinds that it does * not recognize or support. * * The data buffers returned from this function are only guaranteed to be * valid until the application makes another call to any vpx_codec_* function. * * \param[in] ctx Pointer to this instance's context * \param[in,out] iter Iterator storage, initialized to NULL * * \return Returns a pointer to an output data packet (compressed frame data, * two-pass statistics, etc.) or NULL to signal end-of-list. * */ const vpx_codec_cx_pkt_t *vpx_codec_get_cx_data(vpx_codec_ctx_t *ctx, vpx_codec_iter_t *iter); /*!\brief Get Preview Frame * * Returns an image that can be used as a preview. Shows the image as it would * exist at the decompressor. The application \ref MUST NOT write into this * image buffer. * * \param[in] ctx Pointer to this instance's context * * \return Returns a pointer to a preview image, or NULL if no image is * available. * */ const vpx_image_t *vpx_codec_get_preview_frame(vpx_codec_ctx_t *ctx); /*!@} - end defgroup encoder*/ #ifdef __cplusplus } #endif #endif // VPX_VPX_VPX_ENCODER_H_ libvpx-1.8.2/vpx/vpx_frame_buffer.h000066400000000000000000000062061357355204000173440ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_VPX_FRAME_BUFFER_H_ #define VPX_VPX_VPX_FRAME_BUFFER_H_ /*!\file * \brief Describes the decoder external frame buffer interface. */ #ifdef __cplusplus extern "C" { #endif #include "./vpx_integer.h" /*!\brief The maximum number of work buffers used by libvpx. * Support maximum 4 threads to decode video in parallel. * Each thread will use one work buffer. * TODO(hkuang): Add support to set number of worker threads dynamically. */ #define VPX_MAXIMUM_WORK_BUFFERS 8 /*!\brief The maximum number of reference buffers that a VP9 encoder may use. */ #define VP9_MAXIMUM_REF_BUFFERS 8 /*!\brief External frame buffer * * This structure holds allocated frame buffers used by the decoder. */ typedef struct vpx_codec_frame_buffer { uint8_t *data; /**< Pointer to the data buffer */ size_t size; /**< Size of data in bytes */ void *priv; /**< Frame's private data */ } vpx_codec_frame_buffer_t; /*!\brief get frame buffer callback prototype * * This callback is invoked by the decoder to retrieve data for the frame * buffer in order for the decode call to complete. The callback must * allocate at least min_size in bytes and assign it to fb->data. The callback * must zero out all the data allocated. Then the callback must set fb->size * to the allocated size. The application does not need to align the allocated * data. The callback is triggered when the decoder needs a frame buffer to * decode a compressed image into. This function may be called more than once * for every call to vpx_codec_decode. The application may set fb->priv to * some data which will be passed back in the vpx_image_t and the release * function call. |fb| is guaranteed to not be NULL. On success the callback * must return 0. Any failure the callback must return a value less than 0. * * \param[in] priv Callback's private data * \param[in] min_size Size in bytes needed by the buffer * \param[in,out] fb Pointer to vpx_codec_frame_buffer_t */ typedef int (*vpx_get_frame_buffer_cb_fn_t)(void *priv, size_t min_size, vpx_codec_frame_buffer_t *fb); /*!\brief release frame buffer callback prototype * * This callback is invoked by the decoder when the frame buffer is not * referenced by any other buffers. |fb| is guaranteed to not be NULL. On * success the callback must return 0. Any failure the callback must return * a value less than 0. * * \param[in] priv Callback's private data * \param[in] fb Pointer to vpx_codec_frame_buffer_t */ typedef int (*vpx_release_frame_buffer_cb_fn_t)(void *priv, vpx_codec_frame_buffer_t *fb); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_VPX_FRAME_BUFFER_H_ libvpx-1.8.2/vpx/vpx_image.h000066400000000000000000000176171357355204000160130ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /*!\file * \brief Describes the vpx image descriptor and associated operations * */ #ifndef VPX_VPX_VPX_IMAGE_H_ #define VPX_VPX_VPX_IMAGE_H_ #ifdef __cplusplus extern "C" { #endif /*!\brief Current ABI version number * * \internal * If this file is altered in any way that changes the ABI, this value * must be bumped. Examples include, but are not limited to, changing * types, removing or reassigning enums, adding/removing/rearranging * fields to structures */ #define VPX_IMAGE_ABI_VERSION (5) /**<\hideinitializer*/ #define VPX_IMG_FMT_PLANAR 0x100 /**< Image is a planar format. */ #define VPX_IMG_FMT_UV_FLIP 0x200 /**< V plane precedes U in memory. */ #define VPX_IMG_FMT_HAS_ALPHA 0x400 /**< Image has an alpha channel. */ #define VPX_IMG_FMT_HIGHBITDEPTH 0x800 /**< Image uses 16bit framebuffer. */ /*!\brief List of supported image formats */ typedef enum vpx_img_fmt { VPX_IMG_FMT_NONE, VPX_IMG_FMT_YV12 = VPX_IMG_FMT_PLANAR | VPX_IMG_FMT_UV_FLIP | 1, /**< planar YVU */ VPX_IMG_FMT_I420 = VPX_IMG_FMT_PLANAR | 2, VPX_IMG_FMT_I422 = VPX_IMG_FMT_PLANAR | 5, VPX_IMG_FMT_I444 = VPX_IMG_FMT_PLANAR | 6, VPX_IMG_FMT_I440 = VPX_IMG_FMT_PLANAR | 7, VPX_IMG_FMT_I42016 = VPX_IMG_FMT_I420 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I42216 = VPX_IMG_FMT_I422 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I44416 = VPX_IMG_FMT_I444 | VPX_IMG_FMT_HIGHBITDEPTH, VPX_IMG_FMT_I44016 = VPX_IMG_FMT_I440 | VPX_IMG_FMT_HIGHBITDEPTH } vpx_img_fmt_t; /**< alias for enum vpx_img_fmt */ /*!\brief List of supported color spaces */ typedef enum vpx_color_space { VPX_CS_UNKNOWN = 0, /**< Unknown */ VPX_CS_BT_601 = 1, /**< BT.601 */ VPX_CS_BT_709 = 2, /**< BT.709 */ VPX_CS_SMPTE_170 = 3, /**< SMPTE.170 */ VPX_CS_SMPTE_240 = 4, /**< SMPTE.240 */ VPX_CS_BT_2020 = 5, /**< BT.2020 */ VPX_CS_RESERVED = 6, /**< Reserved */ VPX_CS_SRGB = 7 /**< sRGB */ } vpx_color_space_t; /**< alias for enum vpx_color_space */ /*!\brief List of supported color range */ typedef enum vpx_color_range { VPX_CR_STUDIO_RANGE = 0, /**< Y [16..235], UV [16..240] */ VPX_CR_FULL_RANGE = 1 /**< YUV/RGB [0..255] */ } vpx_color_range_t; /**< alias for enum vpx_color_range */ /**\brief Image Descriptor */ typedef struct vpx_image { vpx_img_fmt_t fmt; /**< Image Format */ vpx_color_space_t cs; /**< Color Space */ vpx_color_range_t range; /**< Color Range */ /* Image storage dimensions */ unsigned int w; /**< Stored image width */ unsigned int h; /**< Stored image height */ unsigned int bit_depth; /**< Stored image bit-depth */ /* Image display dimensions */ unsigned int d_w; /**< Displayed image width */ unsigned int d_h; /**< Displayed image height */ /* Image intended rendering dimensions */ unsigned int r_w; /**< Intended rendering image width */ unsigned int r_h; /**< Intended rendering image height */ /* Chroma subsampling info */ unsigned int x_chroma_shift; /**< subsampling order, X */ unsigned int y_chroma_shift; /**< subsampling order, Y */ /* Image data pointers. */ #define VPX_PLANE_PACKED 0 /**< To be used for all packed formats */ #define VPX_PLANE_Y 0 /**< Y (Luminance) plane */ #define VPX_PLANE_U 1 /**< U (Chroma) plane */ #define VPX_PLANE_V 2 /**< V (Chroma) plane */ #define VPX_PLANE_ALPHA 3 /**< A (Transparency) plane */ unsigned char *planes[4]; /**< pointer to the top left pixel for each plane */ int stride[4]; /**< stride between rows for each plane */ int bps; /**< bits per sample (for packed formats) */ /*!\brief The following member may be set by the application to associate * data with this image. */ void *user_priv; /* The following members should be treated as private. */ unsigned char *img_data; /**< private */ int img_data_owner; /**< private */ int self_allocd; /**< private */ void *fb_priv; /**< Frame buffer data associated with the image. */ } vpx_image_t; /**< alias for struct vpx_image */ /**\brief Representation of a rectangle on a surface */ typedef struct vpx_image_rect { unsigned int x; /**< leftmost column */ unsigned int y; /**< topmost row */ unsigned int w; /**< width */ unsigned int h; /**< height */ } vpx_image_rect_t; /**< alias for struct vpx_image_rect */ /*!\brief Open a descriptor, allocating storage for the underlying image * * Returns a descriptor for storing an image of the given format. The * storage for the descriptor is allocated on the heap. * * \param[in] img Pointer to storage for descriptor. If this parameter * is NULL, the storage for the descriptor will be * allocated on the heap. * \param[in] fmt Format for the image * \param[in] d_w Width of the image * \param[in] d_h Height of the image * \param[in] align Alignment, in bytes, of the image buffer and * each row in the image(stride). * * \return Returns a pointer to the initialized image descriptor. If the img * parameter is non-null, the value of the img parameter will be * returned. */ vpx_image_t *vpx_img_alloc(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int align); /*!\brief Open a descriptor, using existing storage for the underlying image * * Returns a descriptor for storing an image of the given format. The * storage for descriptor has been allocated elsewhere, and a descriptor is * desired to "wrap" that storage. * * \param[in] img Pointer to storage for descriptor. If this * parameter is NULL, the storage for the descriptor * will be allocated on the heap. * \param[in] fmt Format for the image * \param[in] d_w Width of the image * \param[in] d_h Height of the image * \param[in] stride_align Alignment, in bytes, of each row in the image. * \param[in] img_data Storage to use for the image * * \return Returns a pointer to the initialized image descriptor. If the img * parameter is non-null, the value of the img parameter will be * returned. */ vpx_image_t *vpx_img_wrap(vpx_image_t *img, vpx_img_fmt_t fmt, unsigned int d_w, unsigned int d_h, unsigned int stride_align, unsigned char *img_data); /*!\brief Set the rectangle identifying the displayed portion of the image * * Updates the displayed rectangle (aka viewport) on the image surface to * match the specified coordinates and size. * * \param[in] img Image descriptor * \param[in] x leftmost column * \param[in] y topmost row * \param[in] w width * \param[in] h height * * \return 0 if the requested rectangle is valid, nonzero otherwise. */ int vpx_img_set_rect(vpx_image_t *img, unsigned int x, unsigned int y, unsigned int w, unsigned int h); /*!\brief Flip the image vertically (top for bottom) * * Adjusts the image descriptor's pointers and strides to make the image * be referenced upside-down. * * \param[in] img Image descriptor */ void vpx_img_flip(vpx_image_t *img); /*!\brief Close an image descriptor * * Frees all allocated storage associated with an image descriptor. * * \param[in] img Image descriptor */ void vpx_img_free(vpx_image_t *img); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_VPX_IMAGE_H_ libvpx-1.8.2/vpx/vpx_integer.h000066400000000000000000000021511357355204000163510ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_VPX_INTEGER_H_ #define VPX_VPX_VPX_INTEGER_H_ /* get ptrdiff_t, size_t, wchar_t, NULL */ #include #if defined(_MSC_VER) #define VPX_FORCE_INLINE __forceinline #define VPX_INLINE __inline #else #define VPX_FORCE_INLINE __inline__ __attribute__((always_inline)) // TODO(jbb): Allow a way to force inline off for older compilers. #define VPX_INLINE inline #endif /* Assume platforms have the C99 standard integer types. */ #if defined(__cplusplus) #if !defined(__STDC_FORMAT_MACROS) #define __STDC_FORMAT_MACROS #endif #if !defined(__STDC_LIMIT_MACROS) #define __STDC_LIMIT_MACROS #endif #endif // __cplusplus #include #include #endif // VPX_VPX_VPX_INTEGER_H_ libvpx-1.8.2/vpx_dsp/000077500000000000000000000000001357355204000145155ustar00rootroot00000000000000libvpx-1.8.2/vpx_dsp/add_noise.c000066400000000000000000000040561357355204000166130ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/postproc.h" #include "vpx_ports/mem.h" void vpx_plane_add_noise_c(uint8_t *start, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int pitch) { int i, j; int bothclamp = blackclamp + whiteclamp; for (i = 0; i < height; ++i) { uint8_t *pos = start + i * pitch; const int8_t *ref = (const int8_t *)(noise + (rand() & 0xff)); // NOLINT for (j = 0; j < width; ++j) { int v = pos[j]; v = clamp(v - blackclamp, 0, 255); v = clamp(v + bothclamp, 0, 255); v = clamp(v - whiteclamp, 0, 255); pos[j] = v + ref[j]; } } } static double gaussian(double sigma, double mu, double x) { return 1 / (sigma * sqrt(2.0 * 3.14159265)) * (exp(-(x - mu) * (x - mu) / (2 * sigma * sigma))); } int vpx_setup_noise(double sigma, int8_t *noise, int size) { int8_t char_dist[256]; int next = 0, i, j; // set up a 256 entry lookup that matches gaussian distribution for (i = -32; i < 32; ++i) { const int a_i = (int)(0.5 + 256 * gaussian(sigma, 0, i)); if (a_i) { for (j = 0; j < a_i; ++j) { if (next + j >= 256) goto set_noise; char_dist[next + j] = (int8_t)i; } next = next + j; } } // Rounding error - might mean we have less than 256. for (; next < 256; ++next) { char_dist[next] = 0; } set_noise: for (i = 0; i < size; ++i) { noise[i] = char_dist[rand() & 0xff]; // NOLINT } // Returns the highest non 0 value used in distribution. return -char_dist[0]; } libvpx-1.8.2/vpx_dsp/arm/000077500000000000000000000000001357355204000152745ustar00rootroot00000000000000libvpx-1.8.2/vpx_dsp/arm/avg_neon.c000066400000000000000000000211261357355204000172360ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" uint32_t vpx_avg_4x4_neon(const uint8_t *a, int a_stride) { const uint8x16_t b = load_unaligned_u8q(a, a_stride); const uint16x8_t c = vaddl_u8(vget_low_u8(b), vget_high_u8(b)); const uint32x2_t d = horizontal_add_uint16x8(c); return vget_lane_u32(vrshr_n_u32(d, 4), 0); } uint32_t vpx_avg_8x8_neon(const uint8_t *a, int a_stride) { int i; uint8x8_t b, c; uint16x8_t sum; uint32x2_t d; b = vld1_u8(a); a += a_stride; c = vld1_u8(a); a += a_stride; sum = vaddl_u8(b, c); for (i = 0; i < 6; ++i) { const uint8x8_t d = vld1_u8(a); a += a_stride; sum = vaddw_u8(sum, d); } d = horizontal_add_uint16x8(sum); return vget_lane_u32(vrshr_n_u32(d, 6), 0); } // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. int vpx_satd_neon(const tran_low_t *coeff, int length) { const int16x4_t zero = vdup_n_s16(0); int32x4_t accum = vdupq_n_s32(0); do { const int16x8_t src0 = load_tran_low_to_s16q(coeff); const int16x8_t src8 = load_tran_low_to_s16q(coeff + 8); accum = vabal_s16(accum, vget_low_s16(src0), zero); accum = vabal_s16(accum, vget_high_s16(src0), zero); accum = vabal_s16(accum, vget_low_s16(src8), zero); accum = vabal_s16(accum, vget_high_s16(src8), zero); length -= 16; coeff += 16; } while (length != 0); { // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] const int64x2_t s0 = vpaddlq_s32(accum); // cascading summation of 'accum'. const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)), vreinterpret_s32_s64(vget_high_s64(s0))); const int satd = vget_lane_s32(s1, 0); return satd; } } void vpx_int_pro_row_neon(int16_t hbuf[16], uint8_t const *ref, const int ref_stride, const int height) { int i; uint16x8_t vec_sum_lo = vdupq_n_u16(0); uint16x8_t vec_sum_hi = vdupq_n_u16(0); const int shift_factor = ((height >> 5) + 3) * -1; const int16x8_t vec_shift = vdupq_n_s16(shift_factor); for (i = 0; i < height; i += 8) { const uint8x16_t vec_row1 = vld1q_u8(ref); const uint8x16_t vec_row2 = vld1q_u8(ref + ref_stride); const uint8x16_t vec_row3 = vld1q_u8(ref + ref_stride * 2); const uint8x16_t vec_row4 = vld1q_u8(ref + ref_stride * 3); const uint8x16_t vec_row5 = vld1q_u8(ref + ref_stride * 4); const uint8x16_t vec_row6 = vld1q_u8(ref + ref_stride * 5); const uint8x16_t vec_row7 = vld1q_u8(ref + ref_stride * 6); const uint8x16_t vec_row8 = vld1q_u8(ref + ref_stride * 7); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row1)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row1)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row2)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row2)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row3)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row3)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row4)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row4)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row5)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row5)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row6)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row6)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row7)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row7)); vec_sum_lo = vaddw_u8(vec_sum_lo, vget_low_u8(vec_row8)); vec_sum_hi = vaddw_u8(vec_sum_hi, vget_high_u8(vec_row8)); ref += ref_stride * 8; } vec_sum_lo = vshlq_u16(vec_sum_lo, vec_shift); vec_sum_hi = vshlq_u16(vec_sum_hi, vec_shift); vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_lo)); hbuf += 8; vst1q_s16(hbuf, vreinterpretq_s16_u16(vec_sum_hi)); } int16_t vpx_int_pro_col_neon(uint8_t const *ref, const int width) { int i; uint16x8_t vec_sum = vdupq_n_u16(0); for (i = 0; i < width; i += 16) { const uint8x16_t vec_row = vld1q_u8(ref); vec_sum = vaddw_u8(vec_sum, vget_low_u8(vec_row)); vec_sum = vaddw_u8(vec_sum, vget_high_u8(vec_row)); ref += 16; } return vget_lane_s16(vreinterpret_s16_u32(horizontal_add_uint16x8(vec_sum)), 0); } // ref, src = [0, 510] - max diff = 16-bits // bwl = {2, 3, 4}, width = {16, 32, 64} int vpx_vector_var_neon(int16_t const *ref, int16_t const *src, const int bwl) { int width = 4 << bwl; int32x4_t sse = vdupq_n_s32(0); int16x8_t total = vdupq_n_s16(0); assert(width >= 8); assert((width % 8) == 0); do { const int16x8_t r = vld1q_s16(ref); const int16x8_t s = vld1q_s16(src); const int16x8_t diff = vsubq_s16(r, s); // [-510, 510], 10 bits. const int16x4_t diff_lo = vget_low_s16(diff); const int16x4_t diff_hi = vget_high_s16(diff); sse = vmlal_s16(sse, diff_lo, diff_lo); // dynamic range 26 bits. sse = vmlal_s16(sse, diff_hi, diff_hi); total = vaddq_s16(total, diff); // dynamic range 16 bits. ref += 8; src += 8; width -= 8; } while (width != 0); { // Note: 'total''s pairwise addition could be implemented similarly to // horizontal_add_uint16x8(), but one less vpaddl with 'total' when paired // with the summation of 'sse' performed better on a Cortex-A15. const int32x4_t t0 = vpaddlq_s16(total); // cascading summation of 'total' const int32x2_t t1 = vadd_s32(vget_low_s32(t0), vget_high_s32(t0)); const int32x2_t t2 = vpadd_s32(t1, t1); const int t = vget_lane_s32(t2, 0); const int64x2_t s0 = vpaddlq_s32(sse); // cascading summation of 'sse'. const int32x2_t s1 = vadd_s32(vreinterpret_s32_s64(vget_low_s64(s0)), vreinterpret_s32_s64(vget_high_s64(s0))); const int s = vget_lane_s32(s1, 0); const int shift_factor = bwl + 2; return s - ((t * t) >> shift_factor); } } void vpx_minmax_8x8_neon(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int *min, int *max) { // Load and concatenate. const uint8x16_t a01 = vcombine_u8(vld1_u8(a), vld1_u8(a + a_stride)); const uint8x16_t a23 = vcombine_u8(vld1_u8(a + 2 * a_stride), vld1_u8(a + 3 * a_stride)); const uint8x16_t a45 = vcombine_u8(vld1_u8(a + 4 * a_stride), vld1_u8(a + 5 * a_stride)); const uint8x16_t a67 = vcombine_u8(vld1_u8(a + 6 * a_stride), vld1_u8(a + 7 * a_stride)); const uint8x16_t b01 = vcombine_u8(vld1_u8(b), vld1_u8(b + b_stride)); const uint8x16_t b23 = vcombine_u8(vld1_u8(b + 2 * b_stride), vld1_u8(b + 3 * b_stride)); const uint8x16_t b45 = vcombine_u8(vld1_u8(b + 4 * b_stride), vld1_u8(b + 5 * b_stride)); const uint8x16_t b67 = vcombine_u8(vld1_u8(b + 6 * b_stride), vld1_u8(b + 7 * b_stride)); // Absolute difference. const uint8x16_t ab01_diff = vabdq_u8(a01, b01); const uint8x16_t ab23_diff = vabdq_u8(a23, b23); const uint8x16_t ab45_diff = vabdq_u8(a45, b45); const uint8x16_t ab67_diff = vabdq_u8(a67, b67); // Max values between the Q vectors. const uint8x16_t ab0123_max = vmaxq_u8(ab01_diff, ab23_diff); const uint8x16_t ab4567_max = vmaxq_u8(ab45_diff, ab67_diff); const uint8x16_t ab0123_min = vminq_u8(ab01_diff, ab23_diff); const uint8x16_t ab4567_min = vminq_u8(ab45_diff, ab67_diff); const uint8x16_t ab07_max = vmaxq_u8(ab0123_max, ab4567_max); const uint8x16_t ab07_min = vminq_u8(ab0123_min, ab4567_min); // Split to D and start doing pairwise. uint8x8_t ab_max = vmax_u8(vget_high_u8(ab07_max), vget_low_u8(ab07_max)); uint8x8_t ab_min = vmin_u8(vget_high_u8(ab07_min), vget_low_u8(ab07_min)); // Enough runs of vpmax/min propogate the max/min values to every position. ab_max = vpmax_u8(ab_max, ab_max); ab_min = vpmin_u8(ab_min, ab_min); ab_max = vpmax_u8(ab_max, ab_max); ab_min = vpmin_u8(ab_min, ab_min); ab_max = vpmax_u8(ab_max, ab_max); ab_min = vpmin_u8(ab_min, ab_min); *min = *max = 0; // Clear high bits // Store directly to avoid costly neon->gpr transfer. vst1_lane_u8((uint8_t *)max, ab_max, 0); vst1_lane_u8((uint8_t *)min, ab_min, 0); } libvpx-1.8.2/vpx_dsp/arm/avg_pred_neon.c000066400000000000000000000033671357355204000202570ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" void vpx_comp_avg_pred_neon(uint8_t *comp, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { if (width > 8) { int x, y = height; do { for (x = 0; x < width; x += 16) { const uint8x16_t p = vld1q_u8(pred + x); const uint8x16_t r = vld1q_u8(ref + x); const uint8x16_t avg = vrhaddq_u8(p, r); vst1q_u8(comp + x, avg); } comp += width; pred += width; ref += ref_stride; } while (--y); } else if (width == 8) { int i = width * height; do { const uint8x16_t p = vld1q_u8(pred); uint8x16_t r; const uint8x8_t r_0 = vld1_u8(ref); const uint8x8_t r_1 = vld1_u8(ref + ref_stride); r = vcombine_u8(r_0, r_1); ref += 2 * ref_stride; r = vrhaddq_u8(r, p); vst1q_u8(comp, r); pred += 16; comp += 16; i -= 16; } while (i); } else { int i = width * height; assert(width == 4); do { const uint8x16_t p = vld1q_u8(pred); uint8x16_t r; r = load_unaligned_u8q(ref, ref_stride); ref += 4 * ref_stride; r = vrhaddq_u8(r, p); vst1q_u8(comp, r); pred += 16; comp += 16; i -= 16; } while (i); } } libvpx-1.8.2/vpx_dsp/arm/deblock_neon.c000066400000000000000000000362511357355204000200710ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/transpose_neon.h" extern const int16_t vpx_rv[]; static uint8x8_t average_k_out(const uint8x8_t a2, const uint8x8_t a1, const uint8x8_t v0, const uint8x8_t b1, const uint8x8_t b2) { const uint8x8_t k1 = vrhadd_u8(a2, a1); const uint8x8_t k2 = vrhadd_u8(b2, b1); const uint8x8_t k3 = vrhadd_u8(k1, k2); return vrhadd_u8(k3, v0); } static uint8x8_t generate_mask(const uint8x8_t a2, const uint8x8_t a1, const uint8x8_t v0, const uint8x8_t b1, const uint8x8_t b2, const uint8x8_t filter) { const uint8x8_t a2_v0 = vabd_u8(a2, v0); const uint8x8_t a1_v0 = vabd_u8(a1, v0); const uint8x8_t b1_v0 = vabd_u8(b1, v0); const uint8x8_t b2_v0 = vabd_u8(b2, v0); uint8x8_t max = vmax_u8(a2_v0, a1_v0); max = vmax_u8(b1_v0, max); max = vmax_u8(b2_v0, max); return vclt_u8(max, filter); } static uint8x8_t generate_output(const uint8x8_t a2, const uint8x8_t a1, const uint8x8_t v0, const uint8x8_t b1, const uint8x8_t b2, const uint8x8_t filter) { const uint8x8_t k_out = average_k_out(a2, a1, v0, b1, b2); const uint8x8_t mask = generate_mask(a2, a1, v0, b1, b2, filter); return vbsl_u8(mask, k_out, v0); } // Same functions but for uint8x16_t. static uint8x16_t average_k_outq(const uint8x16_t a2, const uint8x16_t a1, const uint8x16_t v0, const uint8x16_t b1, const uint8x16_t b2) { const uint8x16_t k1 = vrhaddq_u8(a2, a1); const uint8x16_t k2 = vrhaddq_u8(b2, b1); const uint8x16_t k3 = vrhaddq_u8(k1, k2); return vrhaddq_u8(k3, v0); } static uint8x16_t generate_maskq(const uint8x16_t a2, const uint8x16_t a1, const uint8x16_t v0, const uint8x16_t b1, const uint8x16_t b2, const uint8x16_t filter) { const uint8x16_t a2_v0 = vabdq_u8(a2, v0); const uint8x16_t a1_v0 = vabdq_u8(a1, v0); const uint8x16_t b1_v0 = vabdq_u8(b1, v0); const uint8x16_t b2_v0 = vabdq_u8(b2, v0); uint8x16_t max = vmaxq_u8(a2_v0, a1_v0); max = vmaxq_u8(b1_v0, max); max = vmaxq_u8(b2_v0, max); return vcltq_u8(max, filter); } static uint8x16_t generate_outputq(const uint8x16_t a2, const uint8x16_t a1, const uint8x16_t v0, const uint8x16_t b1, const uint8x16_t b2, const uint8x16_t filter) { const uint8x16_t k_out = average_k_outq(a2, a1, v0, b1, b2); const uint8x16_t mask = generate_maskq(a2, a1, v0, b1, b2, filter); return vbslq_u8(mask, k_out, v0); } void vpx_post_proc_down_and_across_mb_row_neon(uint8_t *src_ptr, uint8_t *dst_ptr, int src_stride, int dst_stride, int cols, uint8_t *f, int size) { uint8_t *src, *dst; int row; int col; // While columns of length 16 can be processed, load them. for (col = 0; col < cols - 8; col += 16) { uint8x16_t a0, a1, a2, a3, a4, a5, a6, a7; src = src_ptr - 2 * src_stride; dst = dst_ptr; a0 = vld1q_u8(src); src += src_stride; a1 = vld1q_u8(src); src += src_stride; a2 = vld1q_u8(src); src += src_stride; a3 = vld1q_u8(src); src += src_stride; for (row = 0; row < size; row += 4) { uint8x16_t v_out_0, v_out_1, v_out_2, v_out_3; const uint8x16_t filterq = vld1q_u8(f + col); a4 = vld1q_u8(src); src += src_stride; a5 = vld1q_u8(src); src += src_stride; a6 = vld1q_u8(src); src += src_stride; a7 = vld1q_u8(src); src += src_stride; v_out_0 = generate_outputq(a0, a1, a2, a3, a4, filterq); v_out_1 = generate_outputq(a1, a2, a3, a4, a5, filterq); v_out_2 = generate_outputq(a2, a3, a4, a5, a6, filterq); v_out_3 = generate_outputq(a3, a4, a5, a6, a7, filterq); vst1q_u8(dst, v_out_0); dst += dst_stride; vst1q_u8(dst, v_out_1); dst += dst_stride; vst1q_u8(dst, v_out_2); dst += dst_stride; vst1q_u8(dst, v_out_3); dst += dst_stride; // Rotate over to the next slot. a0 = a4; a1 = a5; a2 = a6; a3 = a7; } src_ptr += 16; dst_ptr += 16; } // Clean up any left over column of length 8. if (col != cols) { uint8x8_t a0, a1, a2, a3, a4, a5, a6, a7; src = src_ptr - 2 * src_stride; dst = dst_ptr; a0 = vld1_u8(src); src += src_stride; a1 = vld1_u8(src); src += src_stride; a2 = vld1_u8(src); src += src_stride; a3 = vld1_u8(src); src += src_stride; for (row = 0; row < size; row += 4) { uint8x8_t v_out_0, v_out_1, v_out_2, v_out_3; const uint8x8_t filter = vld1_u8(f + col); a4 = vld1_u8(src); src += src_stride; a5 = vld1_u8(src); src += src_stride; a6 = vld1_u8(src); src += src_stride; a7 = vld1_u8(src); src += src_stride; v_out_0 = generate_output(a0, a1, a2, a3, a4, filter); v_out_1 = generate_output(a1, a2, a3, a4, a5, filter); v_out_2 = generate_output(a2, a3, a4, a5, a6, filter); v_out_3 = generate_output(a3, a4, a5, a6, a7, filter); vst1_u8(dst, v_out_0); dst += dst_stride; vst1_u8(dst, v_out_1); dst += dst_stride; vst1_u8(dst, v_out_2); dst += dst_stride; vst1_u8(dst, v_out_3); dst += dst_stride; // Rotate over to the next slot. a0 = a4; a1 = a5; a2 = a6; a3 = a7; } // Not strictly necessary but makes resetting dst_ptr easier. dst_ptr += 8; } dst_ptr -= cols; for (row = 0; row < size; row += 8) { uint8x8_t a0, a1, a2, a3; uint8x8_t b0, b1, b2, b3, b4, b5, b6, b7; src = dst_ptr; dst = dst_ptr; // Load 8 values, transpose 4 of them, and discard 2 because they will be // reloaded later. load_and_transpose_u8_4x8(src, dst_stride, &a0, &a1, &a2, &a3); a3 = a1; a2 = a1 = a0; // Extend left border. src += 2; for (col = 0; col < cols; col += 8) { uint8x8_t v_out_0, v_out_1, v_out_2, v_out_3, v_out_4, v_out_5, v_out_6, v_out_7; // Although the filter is meant to be applied vertically and is instead // being applied horizontally here it's OK because it's set in blocks of 8 // (or 16). const uint8x8_t filter = vld1_u8(f + col); load_and_transpose_u8_8x8(src, dst_stride, &b0, &b1, &b2, &b3, &b4, &b5, &b6, &b7); if (col + 8 == cols) { // Last row. Extend border (b5). b6 = b7 = b5; } v_out_0 = generate_output(a0, a1, a2, a3, b0, filter); v_out_1 = generate_output(a1, a2, a3, b0, b1, filter); v_out_2 = generate_output(a2, a3, b0, b1, b2, filter); v_out_3 = generate_output(a3, b0, b1, b2, b3, filter); v_out_4 = generate_output(b0, b1, b2, b3, b4, filter); v_out_5 = generate_output(b1, b2, b3, b4, b5, filter); v_out_6 = generate_output(b2, b3, b4, b5, b6, filter); v_out_7 = generate_output(b3, b4, b5, b6, b7, filter); transpose_and_store_u8_8x8(dst, dst_stride, v_out_0, v_out_1, v_out_2, v_out_3, v_out_4, v_out_5, v_out_6, v_out_7); a0 = b4; a1 = b5; a2 = b6; a3 = b7; src += 8; dst += 8; } dst_ptr += 8 * dst_stride; } } // sum += x; // sumsq += x * y; static void accumulate_sum_sumsq(const int16x4_t x, const int32x4_t xy, int16x4_t *const sum, int32x4_t *const sumsq) { const int16x4_t zero = vdup_n_s16(0); const int32x4_t zeroq = vdupq_n_s32(0); // Add in the first set because vext doesn't work with '0'. *sum = vadd_s16(*sum, x); *sumsq = vaddq_s32(*sumsq, xy); // Shift x and xy to the right and sum. vext requires an immediate. *sum = vadd_s16(*sum, vext_s16(zero, x, 1)); *sumsq = vaddq_s32(*sumsq, vextq_s32(zeroq, xy, 1)); *sum = vadd_s16(*sum, vext_s16(zero, x, 2)); *sumsq = vaddq_s32(*sumsq, vextq_s32(zeroq, xy, 2)); *sum = vadd_s16(*sum, vext_s16(zero, x, 3)); *sumsq = vaddq_s32(*sumsq, vextq_s32(zeroq, xy, 3)); } // Generate mask based on (sumsq * 15 - sum * sum < flimit) static uint16x4_t calculate_mask(const int16x4_t sum, const int32x4_t sumsq, const int32x4_t f, const int32x4_t fifteen) { const int32x4_t a = vmulq_s32(sumsq, fifteen); const int32x4_t b = vmlsl_s16(a, sum, sum); const uint32x4_t mask32 = vcltq_s32(b, f); return vmovn_u32(mask32); } static uint8x8_t combine_mask(const int16x4_t sum_low, const int16x4_t sum_high, const int32x4_t sumsq_low, const int32x4_t sumsq_high, const int32x4_t f) { const int32x4_t fifteen = vdupq_n_s32(15); const uint16x4_t mask16_low = calculate_mask(sum_low, sumsq_low, f, fifteen); const uint16x4_t mask16_high = calculate_mask(sum_high, sumsq_high, f, fifteen); return vmovn_u16(vcombine_u16(mask16_low, mask16_high)); } // Apply filter of (8 + sum + s[c]) >> 4. static uint8x8_t filter_pixels(const int16x8_t sum, const uint8x8_t s) { const int16x8_t s16 = vreinterpretq_s16_u16(vmovl_u8(s)); const int16x8_t sum_s = vaddq_s16(sum, s16); return vqrshrun_n_s16(sum_s, 4); } void vpx_mbpost_proc_across_ip_neon(uint8_t *src, int pitch, int rows, int cols, int flimit) { int row, col; const int32x4_t f = vdupq_n_s32(flimit); assert(cols % 8 == 0); for (row = 0; row < rows; ++row) { // Sum the first 8 elements, which are extended from s[0]. // sumsq gets primed with +16. int sumsq = src[0] * src[0] * 9 + 16; int sum = src[0] * 9; uint8x8_t left_context, s, right_context; int16x4_t sum_low, sum_high; int32x4_t sumsq_low, sumsq_high; // Sum (+square) the next 6 elements. // Skip [0] because it's included above. for (col = 1; col <= 6; ++col) { sumsq += src[col] * src[col]; sum += src[col]; } // Prime the sums. Later the loop uses the _high values to prime the new // vectors. sumsq_high = vdupq_n_s32(sumsq); sum_high = vdup_n_s16(sum); // Manually extend the left border. left_context = vdup_n_u8(src[0]); for (col = 0; col < cols; col += 8) { uint8x8_t mask, output; int16x8_t x, y; int32x4_t xy_low, xy_high; s = vld1_u8(src + col); if (col + 8 == cols) { // Last row. Extend border. right_context = vdup_n_u8(src[col + 7]); } else { right_context = vld1_u8(src + col + 7); } x = vreinterpretq_s16_u16(vsubl_u8(right_context, left_context)); y = vreinterpretq_s16_u16(vaddl_u8(right_context, left_context)); xy_low = vmull_s16(vget_low_s16(x), vget_low_s16(y)); xy_high = vmull_s16(vget_high_s16(x), vget_high_s16(y)); // Catch up to the last sum'd value. sum_low = vdup_lane_s16(sum_high, 3); sumsq_low = vdupq_lane_s32(vget_high_s32(sumsq_high), 1); accumulate_sum_sumsq(vget_low_s16(x), xy_low, &sum_low, &sumsq_low); // Need to do this sequentially because we need the max value from // sum_low. sum_high = vdup_lane_s16(sum_low, 3); sumsq_high = vdupq_lane_s32(vget_high_s32(sumsq_low), 1); accumulate_sum_sumsq(vget_high_s16(x), xy_high, &sum_high, &sumsq_high); mask = combine_mask(sum_low, sum_high, sumsq_low, sumsq_high, f); output = filter_pixels(vcombine_s16(sum_low, sum_high), s); output = vbsl_u8(mask, output, s); vst1_u8(src + col, output); left_context = s; } src += pitch; } } // Apply filter of (vpx_rv + sum + s[c]) >> 4. static uint8x8_t filter_pixels_rv(const int16x8_t sum, const uint8x8_t s, const int16x8_t rv) { const int16x8_t s16 = vreinterpretq_s16_u16(vmovl_u8(s)); const int16x8_t sum_s = vaddq_s16(sum, s16); const int16x8_t rounded = vaddq_s16(sum_s, rv); return vqshrun_n_s16(rounded, 4); } void vpx_mbpost_proc_down_neon(uint8_t *dst, int pitch, int rows, int cols, int flimit) { int row, col, i; const int32x4_t f = vdupq_n_s32(flimit); uint8x8_t below_context = vdup_n_u8(0); // 8 columns are processed at a time. // If rows is less than 8 the bottom border extension fails. assert(cols % 8 == 0); assert(rows >= 8); // Load and keep the first 8 values in memory. Process a vertical stripe that // is 8 wide. for (col = 0; col < cols; col += 8) { uint8x8_t s, above_context[8]; int16x8_t sum, sum_tmp; int32x4_t sumsq_low, sumsq_high; // Load and extend the top border. s = vld1_u8(dst); for (i = 0; i < 8; i++) { above_context[i] = s; } sum_tmp = vreinterpretq_s16_u16(vmovl_u8(s)); // sum * 9 sum = vmulq_n_s16(sum_tmp, 9); // (sum * 9) * sum == sum * sum * 9 sumsq_low = vmull_s16(vget_low_s16(sum), vget_low_s16(sum_tmp)); sumsq_high = vmull_s16(vget_high_s16(sum), vget_high_s16(sum_tmp)); // Load and discard the next 6 values to prime sum and sumsq. for (i = 1; i <= 6; ++i) { const uint8x8_t a = vld1_u8(dst + i * pitch); const int16x8_t b = vreinterpretq_s16_u16(vmovl_u8(a)); sum = vaddq_s16(sum, b); sumsq_low = vmlal_s16(sumsq_low, vget_low_s16(b), vget_low_s16(b)); sumsq_high = vmlal_s16(sumsq_high, vget_high_s16(b), vget_high_s16(b)); } for (row = 0; row < rows; ++row) { uint8x8_t mask, output; int16x8_t x, y; int32x4_t xy_low, xy_high; s = vld1_u8(dst + row * pitch); // Extend the bottom border. if (row + 7 < rows) { below_context = vld1_u8(dst + (row + 7) * pitch); } x = vreinterpretq_s16_u16(vsubl_u8(below_context, above_context[0])); y = vreinterpretq_s16_u16(vaddl_u8(below_context, above_context[0])); xy_low = vmull_s16(vget_low_s16(x), vget_low_s16(y)); xy_high = vmull_s16(vget_high_s16(x), vget_high_s16(y)); sum = vaddq_s16(sum, x); sumsq_low = vaddq_s32(sumsq_low, xy_low); sumsq_high = vaddq_s32(sumsq_high, xy_high); mask = combine_mask(vget_low_s16(sum), vget_high_s16(sum), sumsq_low, sumsq_high, f); output = filter_pixels_rv(sum, s, vld1q_s16(vpx_rv + (row & 127))); output = vbsl_u8(mask, output, s); vst1_u8(dst + row * pitch, output); above_context[0] = above_context[1]; above_context[1] = above_context[2]; above_context[2] = above_context[3]; above_context[3] = above_context[4]; above_context[4] = above_context[5]; above_context[5] = above_context[6]; above_context[6] = above_context[7]; above_context[7] = s; } dst += 8; } } libvpx-1.8.2/vpx_dsp/arm/fdct16x16_neon.c000066400000000000000000000354261357355204000201170ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" // Some builds of gcc 4.9.2 and .3 have trouble with some of the inline // functions. #if !defined(__clang__) && !defined(__ANDROID__) && defined(__GNUC__) && \ __GNUC__ == 4 && __GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ < 4 void vpx_fdct16x16_neon(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct16x16_c(input, output, stride); } #else static INLINE void load(const int16_t *a, int stride, int16x8_t *b /*[16]*/) { b[0] = vld1q_s16(a); a += stride; b[1] = vld1q_s16(a); a += stride; b[2] = vld1q_s16(a); a += stride; b[3] = vld1q_s16(a); a += stride; b[4] = vld1q_s16(a); a += stride; b[5] = vld1q_s16(a); a += stride; b[6] = vld1q_s16(a); a += stride; b[7] = vld1q_s16(a); a += stride; b[8] = vld1q_s16(a); a += stride; b[9] = vld1q_s16(a); a += stride; b[10] = vld1q_s16(a); a += stride; b[11] = vld1q_s16(a); a += stride; b[12] = vld1q_s16(a); a += stride; b[13] = vld1q_s16(a); a += stride; b[14] = vld1q_s16(a); a += stride; b[15] = vld1q_s16(a); } // Store 8 16x8 values, assuming stride == 16. static INLINE void store(tran_low_t *a, const int16x8_t *b /*[8]*/) { store_s16q_to_tran_low(a, b[0]); a += 16; store_s16q_to_tran_low(a, b[1]); a += 16; store_s16q_to_tran_low(a, b[2]); a += 16; store_s16q_to_tran_low(a, b[3]); a += 16; store_s16q_to_tran_low(a, b[4]); a += 16; store_s16q_to_tran_low(a, b[5]); a += 16; store_s16q_to_tran_low(a, b[6]); a += 16; store_s16q_to_tran_low(a, b[7]); } // Load step of each pass. Add and subtract clear across the input, requiring // all 16 values to be loaded. For the first pass it also multiplies by 4. // To maybe reduce register usage this could be combined with the load() step to // get the first 4 and last 4 values, cross those, then load the middle 8 values // and cross them. static INLINE void cross_input(const int16x8_t *a /*[16]*/, int16x8_t *b /*[16]*/, const int pass) { if (pass == 0) { b[0] = vshlq_n_s16(vaddq_s16(a[0], a[15]), 2); b[1] = vshlq_n_s16(vaddq_s16(a[1], a[14]), 2); b[2] = vshlq_n_s16(vaddq_s16(a[2], a[13]), 2); b[3] = vshlq_n_s16(vaddq_s16(a[3], a[12]), 2); b[4] = vshlq_n_s16(vaddq_s16(a[4], a[11]), 2); b[5] = vshlq_n_s16(vaddq_s16(a[5], a[10]), 2); b[6] = vshlq_n_s16(vaddq_s16(a[6], a[9]), 2); b[7] = vshlq_n_s16(vaddq_s16(a[7], a[8]), 2); b[8] = vshlq_n_s16(vsubq_s16(a[7], a[8]), 2); b[9] = vshlq_n_s16(vsubq_s16(a[6], a[9]), 2); b[10] = vshlq_n_s16(vsubq_s16(a[5], a[10]), 2); b[11] = vshlq_n_s16(vsubq_s16(a[4], a[11]), 2); b[12] = vshlq_n_s16(vsubq_s16(a[3], a[12]), 2); b[13] = vshlq_n_s16(vsubq_s16(a[2], a[13]), 2); b[14] = vshlq_n_s16(vsubq_s16(a[1], a[14]), 2); b[15] = vshlq_n_s16(vsubq_s16(a[0], a[15]), 2); } else { b[0] = vaddq_s16(a[0], a[15]); b[1] = vaddq_s16(a[1], a[14]); b[2] = vaddq_s16(a[2], a[13]); b[3] = vaddq_s16(a[3], a[12]); b[4] = vaddq_s16(a[4], a[11]); b[5] = vaddq_s16(a[5], a[10]); b[6] = vaddq_s16(a[6], a[9]); b[7] = vaddq_s16(a[7], a[8]); b[8] = vsubq_s16(a[7], a[8]); b[9] = vsubq_s16(a[6], a[9]); b[10] = vsubq_s16(a[5], a[10]); b[11] = vsubq_s16(a[4], a[11]); b[12] = vsubq_s16(a[3], a[12]); b[13] = vsubq_s16(a[2], a[13]); b[14] = vsubq_s16(a[1], a[14]); b[15] = vsubq_s16(a[0], a[15]); } } // Quarter round at the beginning of the second pass. Can't use vrshr (rounding) // because this only adds 1, not 1 << 2. static INLINE void partial_round_shift(int16x8_t *a /*[16]*/) { const int16x8_t one = vdupq_n_s16(1); a[0] = vshrq_n_s16(vaddq_s16(a[0], one), 2); a[1] = vshrq_n_s16(vaddq_s16(a[1], one), 2); a[2] = vshrq_n_s16(vaddq_s16(a[2], one), 2); a[3] = vshrq_n_s16(vaddq_s16(a[3], one), 2); a[4] = vshrq_n_s16(vaddq_s16(a[4], one), 2); a[5] = vshrq_n_s16(vaddq_s16(a[5], one), 2); a[6] = vshrq_n_s16(vaddq_s16(a[6], one), 2); a[7] = vshrq_n_s16(vaddq_s16(a[7], one), 2); a[8] = vshrq_n_s16(vaddq_s16(a[8], one), 2); a[9] = vshrq_n_s16(vaddq_s16(a[9], one), 2); a[10] = vshrq_n_s16(vaddq_s16(a[10], one), 2); a[11] = vshrq_n_s16(vaddq_s16(a[11], one), 2); a[12] = vshrq_n_s16(vaddq_s16(a[12], one), 2); a[13] = vshrq_n_s16(vaddq_s16(a[13], one), 2); a[14] = vshrq_n_s16(vaddq_s16(a[14], one), 2); a[15] = vshrq_n_s16(vaddq_s16(a[15], one), 2); } // fdct_round_shift((a +/- b) * c) static INLINE void butterfly_one_coeff(const int16x8_t a, const int16x8_t b, const tran_high_t c, int16x8_t *add, int16x8_t *sub) { const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), c); const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), c); const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), c); const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), c); const int32x4_t diff0 = vmlsl_n_s16(a0, vget_low_s16(b), c); const int32x4_t diff1 = vmlsl_n_s16(a1, vget_high_s16(b), c); const int16x4_t rounded0 = vqrshrn_n_s32(sum0, 14); const int16x4_t rounded1 = vqrshrn_n_s32(sum1, 14); const int16x4_t rounded2 = vqrshrn_n_s32(diff0, 14); const int16x4_t rounded3 = vqrshrn_n_s32(diff1, 14); *add = vcombine_s16(rounded0, rounded1); *sub = vcombine_s16(rounded2, rounded3); } // fdct_round_shift(a * c0 +/- b * c1) static INLINE void butterfly_two_coeff(const int16x8_t a, const int16x8_t b, const tran_coef_t c0, const tran_coef_t c1, int16x8_t *add, int16x8_t *sub) { const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), c0); const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), c0); const int32x4_t a2 = vmull_n_s16(vget_low_s16(a), c1); const int32x4_t a3 = vmull_n_s16(vget_high_s16(a), c1); const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), c0); const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), c0); const int32x4_t diff0 = vmlsl_n_s16(a0, vget_low_s16(b), c1); const int32x4_t diff1 = vmlsl_n_s16(a1, vget_high_s16(b), c1); const int16x4_t rounded0 = vqrshrn_n_s32(sum0, 14); const int16x4_t rounded1 = vqrshrn_n_s32(sum1, 14); const int16x4_t rounded2 = vqrshrn_n_s32(diff0, 14); const int16x4_t rounded3 = vqrshrn_n_s32(diff1, 14); *add = vcombine_s16(rounded0, rounded1); *sub = vcombine_s16(rounded2, rounded3); } // Transpose 8x8 to a new location. Don't use transpose_neon.h because those // are all in-place. static INLINE void transpose_8x8(const int16x8_t *a /*[8]*/, int16x8_t *b /*[8]*/) { // Swap 16 bit elements. const int16x8x2_t c0 = vtrnq_s16(a[0], a[1]); const int16x8x2_t c1 = vtrnq_s16(a[2], a[3]); const int16x8x2_t c2 = vtrnq_s16(a[4], a[5]); const int16x8x2_t c3 = vtrnq_s16(a[6], a[7]); // Swap 32 bit elements. const int32x4x2_t d0 = vtrnq_s32(vreinterpretq_s32_s16(c0.val[0]), vreinterpretq_s32_s16(c1.val[0])); const int32x4x2_t d1 = vtrnq_s32(vreinterpretq_s32_s16(c0.val[1]), vreinterpretq_s32_s16(c1.val[1])); const int32x4x2_t d2 = vtrnq_s32(vreinterpretq_s32_s16(c2.val[0]), vreinterpretq_s32_s16(c3.val[0])); const int32x4x2_t d3 = vtrnq_s32(vreinterpretq_s32_s16(c2.val[1]), vreinterpretq_s32_s16(c3.val[1])); // Swap 64 bit elements const int16x8x2_t e0 = vpx_vtrnq_s64_to_s16(d0.val[0], d2.val[0]); const int16x8x2_t e1 = vpx_vtrnq_s64_to_s16(d1.val[0], d3.val[0]); const int16x8x2_t e2 = vpx_vtrnq_s64_to_s16(d0.val[1], d2.val[1]); const int16x8x2_t e3 = vpx_vtrnq_s64_to_s16(d1.val[1], d3.val[1]); b[0] = e0.val[0]; b[1] = e1.val[0]; b[2] = e2.val[0]; b[3] = e3.val[0]; b[4] = e0.val[1]; b[5] = e1.val[1]; b[6] = e2.val[1]; b[7] = e3.val[1]; } // Main body of fdct16x16. static void dct_body(const int16x8_t *in /*[16]*/, int16x8_t *out /*[16]*/) { int16x8_t s[8]; int16x8_t x[4]; int16x8_t step[8]; // stage 1 // From fwd_txfm.c: Work on the first eight values; fdct8(input, // even_results);" s[0] = vaddq_s16(in[0], in[7]); s[1] = vaddq_s16(in[1], in[6]); s[2] = vaddq_s16(in[2], in[5]); s[3] = vaddq_s16(in[3], in[4]); s[4] = vsubq_s16(in[3], in[4]); s[5] = vsubq_s16(in[2], in[5]); s[6] = vsubq_s16(in[1], in[6]); s[7] = vsubq_s16(in[0], in[7]); // fdct4(step, step); x[0] = vaddq_s16(s[0], s[3]); x[1] = vaddq_s16(s[1], s[2]); x[2] = vsubq_s16(s[1], s[2]); x[3] = vsubq_s16(s[0], s[3]); // out[0] = fdct_round_shift((x0 + x1) * cospi_16_64) // out[8] = fdct_round_shift((x0 - x1) * cospi_16_64) butterfly_one_coeff(x[0], x[1], cospi_16_64, &out[0], &out[8]); // out[4] = fdct_round_shift(x3 * cospi_8_64 + x2 * cospi_24_64); // out[12] = fdct_round_shift(x3 * cospi_24_64 - x2 * cospi_8_64); butterfly_two_coeff(x[3], x[2], cospi_24_64, cospi_8_64, &out[4], &out[12]); // Stage 2 // Re-using source s5/s6 // s5 = fdct_round_shift((s6 - s5) * cospi_16_64) // s6 = fdct_round_shift((s6 + s5) * cospi_16_64) butterfly_one_coeff(s[6], s[5], cospi_16_64, &s[6], &s[5]); // Stage 3 x[0] = vaddq_s16(s[4], s[5]); x[1] = vsubq_s16(s[4], s[5]); x[2] = vsubq_s16(s[7], s[6]); x[3] = vaddq_s16(s[7], s[6]); // Stage 4 // out[2] = fdct_round_shift(x0 * cospi_28_64 + x3 * cospi_4_64) // out[14] = fdct_round_shift(x3 * cospi_28_64 + x0 * -cospi_4_64) butterfly_two_coeff(x[3], x[0], cospi_28_64, cospi_4_64, &out[2], &out[14]); // out[6] = fdct_round_shift(x1 * cospi_12_64 + x2 * cospi_20_64) // out[10] = fdct_round_shift(x2 * cospi_12_64 + x1 * -cospi_20_64) butterfly_two_coeff(x[2], x[1], cospi_12_64, cospi_20_64, &out[10], &out[6]); // step 2 // From fwd_txfm.c: Work on the next eight values; step1 -> odd_results" // That file distinguished between "in_high" and "step1" but the only // difference is that "in_high" is the first 8 values and "step 1" is the // second. Here, since they are all in one array, "step1" values are += 8. // step2[2] = fdct_round_shift((step1[5] - step1[2]) * cospi_16_64) // step2[3] = fdct_round_shift((step1[4] - step1[3]) * cospi_16_64) // step2[4] = fdct_round_shift((step1[4] + step1[3]) * cospi_16_64) // step2[5] = fdct_round_shift((step1[5] + step1[2]) * cospi_16_64) butterfly_one_coeff(in[13], in[10], cospi_16_64, &s[5], &s[2]); butterfly_one_coeff(in[12], in[11], cospi_16_64, &s[4], &s[3]); // step 3 s[0] = vaddq_s16(in[8], s[3]); s[1] = vaddq_s16(in[9], s[2]); x[0] = vsubq_s16(in[9], s[2]); x[1] = vsubq_s16(in[8], s[3]); x[2] = vsubq_s16(in[15], s[4]); x[3] = vsubq_s16(in[14], s[5]); s[6] = vaddq_s16(in[14], s[5]); s[7] = vaddq_s16(in[15], s[4]); // step 4 // step2[1] = fdct_round_shift(step3[1] *-cospi_8_64 + step3[6] * cospi_24_64) // step2[6] = fdct_round_shift(step3[1] * cospi_24_64 + step3[6] * cospi_8_64) butterfly_two_coeff(s[6], s[1], cospi_24_64, cospi_8_64, &s[6], &s[1]); // step2[2] = fdct_round_shift(step3[2] * cospi_24_64 + step3[5] * cospi_8_64) // step2[5] = fdct_round_shift(step3[2] * cospi_8_64 - step3[5] * cospi_24_64) butterfly_two_coeff(x[0], x[3], cospi_8_64, cospi_24_64, &s[2], &s[5]); // step 5 step[0] = vaddq_s16(s[0], s[1]); step[1] = vsubq_s16(s[0], s[1]); step[2] = vaddq_s16(x[1], s[2]); step[3] = vsubq_s16(x[1], s[2]); step[4] = vsubq_s16(x[2], s[5]); step[5] = vaddq_s16(x[2], s[5]); step[6] = vsubq_s16(s[7], s[6]); step[7] = vaddq_s16(s[7], s[6]); // step 6 // out[1] = fdct_round_shift(step1[0] * cospi_30_64 + step1[7] * cospi_2_64) // out[9] = fdct_round_shift(step1[1] * cospi_14_64 + step1[6] * cospi_18_64) // out[5] = fdct_round_shift(step1[2] * cospi_22_64 + step1[5] * cospi_10_64) // out[13] = fdct_round_shift(step1[3] * cospi_6_64 + step1[4] * cospi_26_64) // out[3] = fdct_round_shift(step1[3] * -cospi_26_64 + step1[4] * cospi_6_64) // out[11] = fdct_round_shift(step1[2] * -cospi_10_64 + step1[5] * // cospi_22_64) // out[7] = fdct_round_shift(step1[1] * -cospi_18_64 + step1[6] * cospi_14_64) // out[15] = fdct_round_shift(step1[0] * -cospi_2_64 + step1[7] * cospi_30_64) butterfly_two_coeff(step[6], step[1], cospi_14_64, cospi_18_64, &out[9], &out[7]); butterfly_two_coeff(step[7], step[0], cospi_30_64, cospi_2_64, &out[1], &out[15]); butterfly_two_coeff(step[4], step[3], cospi_6_64, cospi_26_64, &out[13], &out[3]); butterfly_two_coeff(step[5], step[2], cospi_22_64, cospi_10_64, &out[5], &out[11]); } void vpx_fdct16x16_neon(const int16_t *input, tran_low_t *output, int stride) { int16x8_t temp0[16]; int16x8_t temp1[16]; int16x8_t temp2[16]; int16x8_t temp3[16]; // Left half. load(input, stride, temp0); cross_input(temp0, temp1, 0); dct_body(temp1, temp0); // Right half. load(input + 8, stride, temp1); cross_input(temp1, temp2, 0); dct_body(temp2, temp1); // Transpose top left and top right quarters into one contiguous location to // process to the top half. transpose_8x8(&temp0[0], &temp2[0]); transpose_8x8(&temp1[0], &temp2[8]); partial_round_shift(temp2); cross_input(temp2, temp3, 1); dct_body(temp3, temp2); transpose_s16_8x8(&temp2[0], &temp2[1], &temp2[2], &temp2[3], &temp2[4], &temp2[5], &temp2[6], &temp2[7]); transpose_s16_8x8(&temp2[8], &temp2[9], &temp2[10], &temp2[11], &temp2[12], &temp2[13], &temp2[14], &temp2[15]); store(output, temp2); store(output + 8, temp2 + 8); output += 8 * 16; // Transpose bottom left and bottom right quarters into one contiguous // location to process to the bottom half. transpose_8x8(&temp0[8], &temp1[0]); transpose_s16_8x8(&temp1[8], &temp1[9], &temp1[10], &temp1[11], &temp1[12], &temp1[13], &temp1[14], &temp1[15]); partial_round_shift(temp1); cross_input(temp1, temp0, 1); dct_body(temp0, temp1); transpose_s16_8x8(&temp1[0], &temp1[1], &temp1[2], &temp1[3], &temp1[4], &temp1[5], &temp1[6], &temp1[7]); transpose_s16_8x8(&temp1[8], &temp1[9], &temp1[10], &temp1[11], &temp1[12], &temp1[13], &temp1[14], &temp1[15]); store(output, temp1); store(output + 8, temp1 + 8); } #endif // !defined(__clang__) && !defined(__ANDROID__) && defined(__GNUC__) && // __GNUC__ == 4 && __GNUC_MINOR__ == 9 && __GNUC_PATCHLEVEL__ < 4 libvpx-1.8.2/vpx_dsp/arm/fdct32x32_neon.c000066400000000000000000001501261357355204000201060ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" // Most gcc 4.9 distributions outside of Android do not generate correct code // for this function. #if !defined(__clang__) && !defined(__ANDROID__) && defined(__GNUC__) && \ __GNUC__ == 4 && __GNUC_MINOR__ <= 9 void vpx_fdct32x32_neon(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct32x32_c(input, output, stride); } void vpx_fdct32x32_rd_neon(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct32x32_rd_c(input, output, stride); } #else #define LOAD_INCREMENT(src, stride, dest, index) \ do { \ dest[index] = vld1q_s16(src); \ src += stride; \ } while (0) #define ADD_S16(src, index0, index1, dest, index3) \ do { \ dest[index3] = vaddq_s16(src[index0], src[index1]); \ } while (0) #define ADD_SHIFT_S16(src, index0, index1) \ do { \ src[index1] = vshlq_n_s16(vsubq_s16(src[index0], src[index1]), 2); \ } while (0) // Load, cross, and multiply by 4. Load the first 8 and last 8, then the // middle // 16. Doing sets of 16 at a time. Maybe sets of 8 would be better? static INLINE void load(const int16_t *a, int stride, int16x8_t *b) { const int16_t *a_end = a + 24 * stride; int16x8_t c[8]; LOAD_INCREMENT(a, stride, b, 0); LOAD_INCREMENT(a, stride, b, 1); LOAD_INCREMENT(a, stride, b, 2); LOAD_INCREMENT(a, stride, b, 3); LOAD_INCREMENT(a, stride, b, 4); LOAD_INCREMENT(a, stride, b, 5); LOAD_INCREMENT(a, stride, b, 6); LOAD_INCREMENT(a, stride, b, 7); LOAD_INCREMENT(a_end, stride, b, 24); LOAD_INCREMENT(a_end, stride, b, 25); LOAD_INCREMENT(a_end, stride, b, 26); LOAD_INCREMENT(a_end, stride, b, 27); LOAD_INCREMENT(a_end, stride, b, 28); LOAD_INCREMENT(a_end, stride, b, 29); LOAD_INCREMENT(a_end, stride, b, 30); LOAD_INCREMENT(a_end, stride, b, 31); ADD_S16(b, 0, 31, c, 0); ADD_S16(b, 1, 30, c, 1); ADD_S16(b, 2, 29, c, 2); ADD_S16(b, 3, 28, c, 3); ADD_S16(b, 4, 27, c, 4); ADD_S16(b, 5, 26, c, 5); ADD_S16(b, 6, 25, c, 6); ADD_S16(b, 7, 24, c, 7); ADD_SHIFT_S16(b, 7, 24); ADD_SHIFT_S16(b, 6, 25); ADD_SHIFT_S16(b, 5, 26); ADD_SHIFT_S16(b, 4, 27); ADD_SHIFT_S16(b, 3, 28); ADD_SHIFT_S16(b, 2, 29); ADD_SHIFT_S16(b, 1, 30); ADD_SHIFT_S16(b, 0, 31); b[0] = vshlq_n_s16(c[0], 2); b[1] = vshlq_n_s16(c[1], 2); b[2] = vshlq_n_s16(c[2], 2); b[3] = vshlq_n_s16(c[3], 2); b[4] = vshlq_n_s16(c[4], 2); b[5] = vshlq_n_s16(c[5], 2); b[6] = vshlq_n_s16(c[6], 2); b[7] = vshlq_n_s16(c[7], 2); LOAD_INCREMENT(a, stride, b, 8); LOAD_INCREMENT(a, stride, b, 9); LOAD_INCREMENT(a, stride, b, 10); LOAD_INCREMENT(a, stride, b, 11); LOAD_INCREMENT(a, stride, b, 12); LOAD_INCREMENT(a, stride, b, 13); LOAD_INCREMENT(a, stride, b, 14); LOAD_INCREMENT(a, stride, b, 15); LOAD_INCREMENT(a, stride, b, 16); LOAD_INCREMENT(a, stride, b, 17); LOAD_INCREMENT(a, stride, b, 18); LOAD_INCREMENT(a, stride, b, 19); LOAD_INCREMENT(a, stride, b, 20); LOAD_INCREMENT(a, stride, b, 21); LOAD_INCREMENT(a, stride, b, 22); LOAD_INCREMENT(a, stride, b, 23); ADD_S16(b, 8, 23, c, 0); ADD_S16(b, 9, 22, c, 1); ADD_S16(b, 10, 21, c, 2); ADD_S16(b, 11, 20, c, 3); ADD_S16(b, 12, 19, c, 4); ADD_S16(b, 13, 18, c, 5); ADD_S16(b, 14, 17, c, 6); ADD_S16(b, 15, 16, c, 7); ADD_SHIFT_S16(b, 15, 16); ADD_SHIFT_S16(b, 14, 17); ADD_SHIFT_S16(b, 13, 18); ADD_SHIFT_S16(b, 12, 19); ADD_SHIFT_S16(b, 11, 20); ADD_SHIFT_S16(b, 10, 21); ADD_SHIFT_S16(b, 9, 22); ADD_SHIFT_S16(b, 8, 23); b[8] = vshlq_n_s16(c[0], 2); b[9] = vshlq_n_s16(c[1], 2); b[10] = vshlq_n_s16(c[2], 2); b[11] = vshlq_n_s16(c[3], 2); b[12] = vshlq_n_s16(c[4], 2); b[13] = vshlq_n_s16(c[5], 2); b[14] = vshlq_n_s16(c[6], 2); b[15] = vshlq_n_s16(c[7], 2); } #undef LOAD_INCREMENT #undef ADD_S16 #undef ADD_SHIFT_S16 #define STORE_S16(src, index, dest) \ do { \ store_s16q_to_tran_low(dest, src[index]); \ dest += 8; \ } while (0); // Store 32 16x8 values, assuming stride == 32. // Slight twist: store horizontally in blocks of 8. static INLINE void store(tran_low_t *a, const int16x8_t *b) { STORE_S16(b, 0, a); STORE_S16(b, 8, a); STORE_S16(b, 16, a); STORE_S16(b, 24, a); STORE_S16(b, 1, a); STORE_S16(b, 9, a); STORE_S16(b, 17, a); STORE_S16(b, 25, a); STORE_S16(b, 2, a); STORE_S16(b, 10, a); STORE_S16(b, 18, a); STORE_S16(b, 26, a); STORE_S16(b, 3, a); STORE_S16(b, 11, a); STORE_S16(b, 19, a); STORE_S16(b, 27, a); STORE_S16(b, 4, a); STORE_S16(b, 12, a); STORE_S16(b, 20, a); STORE_S16(b, 28, a); STORE_S16(b, 5, a); STORE_S16(b, 13, a); STORE_S16(b, 21, a); STORE_S16(b, 29, a); STORE_S16(b, 6, a); STORE_S16(b, 14, a); STORE_S16(b, 22, a); STORE_S16(b, 30, a); STORE_S16(b, 7, a); STORE_S16(b, 15, a); STORE_S16(b, 23, a); STORE_S16(b, 31, a); } #undef STORE_S16 // fdct_round_shift((a +/- b) * c) static INLINE void butterfly_one_coeff(const int16x8_t a, const int16x8_t b, const tran_high_t constant, int16x8_t *add, int16x8_t *sub) { const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), constant); const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), constant); const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); const int32x4_t diff0 = vmlsl_n_s16(a0, vget_low_s16(b), constant); const int32x4_t diff1 = vmlsl_n_s16(a1, vget_high_s16(b), constant); const int16x4_t rounded0 = vqrshrn_n_s32(sum0, DCT_CONST_BITS); const int16x4_t rounded1 = vqrshrn_n_s32(sum1, DCT_CONST_BITS); const int16x4_t rounded2 = vqrshrn_n_s32(diff0, DCT_CONST_BITS); const int16x4_t rounded3 = vqrshrn_n_s32(diff1, DCT_CONST_BITS); *add = vcombine_s16(rounded0, rounded1); *sub = vcombine_s16(rounded2, rounded3); } // fdct_round_shift(a * c0 +/- b * c1) static INLINE void butterfly_two_coeff(const int16x8_t a, const int16x8_t b, const tran_coef_t constant0, const tran_coef_t constant1, int16x8_t *add, int16x8_t *sub) { const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), constant0); const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), constant0); const int32x4_t a2 = vmull_n_s16(vget_low_s16(a), constant1); const int32x4_t a3 = vmull_n_s16(vget_high_s16(a), constant1); const int32x4_t sum0 = vmlal_n_s16(a2, vget_low_s16(b), constant0); const int32x4_t sum1 = vmlal_n_s16(a3, vget_high_s16(b), constant0); const int32x4_t diff0 = vmlsl_n_s16(a0, vget_low_s16(b), constant1); const int32x4_t diff1 = vmlsl_n_s16(a1, vget_high_s16(b), constant1); const int16x4_t rounded0 = vqrshrn_n_s32(sum0, DCT_CONST_BITS); const int16x4_t rounded1 = vqrshrn_n_s32(sum1, DCT_CONST_BITS); const int16x4_t rounded2 = vqrshrn_n_s32(diff0, DCT_CONST_BITS); const int16x4_t rounded3 = vqrshrn_n_s32(diff1, DCT_CONST_BITS); *add = vcombine_s16(rounded0, rounded1); *sub = vcombine_s16(rounded2, rounded3); } // Add 2 if positive, 1 if negative, and shift by 2. // In practice, subtract the sign bit, then shift with rounding. static INLINE int16x8_t sub_round_shift(const int16x8_t a) { const uint16x8_t a_u16 = vreinterpretq_u16_s16(a); const uint16x8_t a_sign_u16 = vshrq_n_u16(a_u16, 15); const int16x8_t a_sign_s16 = vreinterpretq_s16_u16(a_sign_u16); return vrshrq_n_s16(vsubq_s16(a, a_sign_s16), 2); } static void dct_body_first_pass(const int16x8_t *in, int16x8_t *out) { int16x8_t a[32]; int16x8_t b[32]; // Stage 1: Done as part of the load. // Stage 2. // Mini cross. X the first 16 values and the middle 8 of the second half. a[0] = vaddq_s16(in[0], in[15]); a[1] = vaddq_s16(in[1], in[14]); a[2] = vaddq_s16(in[2], in[13]); a[3] = vaddq_s16(in[3], in[12]); a[4] = vaddq_s16(in[4], in[11]); a[5] = vaddq_s16(in[5], in[10]); a[6] = vaddq_s16(in[6], in[9]); a[7] = vaddq_s16(in[7], in[8]); a[8] = vsubq_s16(in[7], in[8]); a[9] = vsubq_s16(in[6], in[9]); a[10] = vsubq_s16(in[5], in[10]); a[11] = vsubq_s16(in[4], in[11]); a[12] = vsubq_s16(in[3], in[12]); a[13] = vsubq_s16(in[2], in[13]); a[14] = vsubq_s16(in[1], in[14]); a[15] = vsubq_s16(in[0], in[15]); a[16] = in[16]; a[17] = in[17]; a[18] = in[18]; a[19] = in[19]; butterfly_one_coeff(in[27], in[20], cospi_16_64, &a[27], &a[20]); butterfly_one_coeff(in[26], in[21], cospi_16_64, &a[26], &a[21]); butterfly_one_coeff(in[25], in[22], cospi_16_64, &a[25], &a[22]); butterfly_one_coeff(in[24], in[23], cospi_16_64, &a[24], &a[23]); a[28] = in[28]; a[29] = in[29]; a[30] = in[30]; a[31] = in[31]; // Stage 3. b[0] = vaddq_s16(a[0], a[7]); b[1] = vaddq_s16(a[1], a[6]); b[2] = vaddq_s16(a[2], a[5]); b[3] = vaddq_s16(a[3], a[4]); b[4] = vsubq_s16(a[3], a[4]); b[5] = vsubq_s16(a[2], a[5]); b[6] = vsubq_s16(a[1], a[6]); b[7] = vsubq_s16(a[0], a[7]); b[8] = a[8]; b[9] = a[9]; butterfly_one_coeff(a[13], a[10], cospi_16_64, &b[13], &b[10]); butterfly_one_coeff(a[12], a[11], cospi_16_64, &b[12], &b[11]); b[14] = a[14]; b[15] = a[15]; b[16] = vaddq_s16(in[16], a[23]); b[17] = vaddq_s16(in[17], a[22]); b[18] = vaddq_s16(in[18], a[21]); b[19] = vaddq_s16(in[19], a[20]); b[20] = vsubq_s16(in[19], a[20]); b[21] = vsubq_s16(in[18], a[21]); b[22] = vsubq_s16(in[17], a[22]); b[23] = vsubq_s16(in[16], a[23]); b[24] = vsubq_s16(in[31], a[24]); b[25] = vsubq_s16(in[30], a[25]); b[26] = vsubq_s16(in[29], a[26]); b[27] = vsubq_s16(in[28], a[27]); b[28] = vaddq_s16(in[28], a[27]); b[29] = vaddq_s16(in[29], a[26]); b[30] = vaddq_s16(in[30], a[25]); b[31] = vaddq_s16(in[31], a[24]); // Stage 4. a[0] = vaddq_s16(b[0], b[3]); a[1] = vaddq_s16(b[1], b[2]); a[2] = vsubq_s16(b[1], b[2]); a[3] = vsubq_s16(b[0], b[3]); a[4] = b[4]; butterfly_one_coeff(b[6], b[5], cospi_16_64, &a[6], &a[5]); a[7] = b[7]; a[8] = vaddq_s16(b[8], b[11]); a[9] = vaddq_s16(b[9], b[10]); a[10] = vsubq_s16(b[9], b[10]); a[11] = vsubq_s16(b[8], b[11]); a[12] = vsubq_s16(b[15], b[12]); a[13] = vsubq_s16(b[14], b[13]); a[14] = vaddq_s16(b[14], b[13]); a[15] = vaddq_s16(b[15], b[12]); a[16] = b[16]; a[17] = b[17]; butterfly_two_coeff(b[29], b[18], cospi_24_64, cospi_8_64, &a[29], &a[18]); butterfly_two_coeff(b[28], b[19], cospi_24_64, cospi_8_64, &a[28], &a[19]); butterfly_two_coeff(b[27], b[20], -cospi_8_64, cospi_24_64, &a[27], &a[20]); butterfly_two_coeff(b[26], b[21], -cospi_8_64, cospi_24_64, &a[26], &a[21]); a[22] = b[22]; a[23] = b[23]; a[24] = b[24]; a[25] = b[25]; a[30] = b[30]; a[31] = b[31]; // Stage 5. butterfly_one_coeff(a[0], a[1], cospi_16_64, &b[0], &b[1]); butterfly_two_coeff(a[3], a[2], cospi_24_64, cospi_8_64, &b[2], &b[3]); b[4] = vaddq_s16(a[4], a[5]); b[5] = vsubq_s16(a[4], a[5]); b[6] = vsubq_s16(a[7], a[6]); b[7] = vaddq_s16(a[7], a[6]); b[8] = a[8]; butterfly_two_coeff(a[14], a[9], cospi_24_64, cospi_8_64, &b[14], &b[9]); butterfly_two_coeff(a[13], a[10], -cospi_8_64, cospi_24_64, &b[13], &b[10]); b[11] = a[11]; b[12] = a[12]; b[15] = a[15]; b[16] = vaddq_s16(a[19], a[16]); b[17] = vaddq_s16(a[18], a[17]); b[18] = vsubq_s16(a[17], a[18]); b[19] = vsubq_s16(a[16], a[19]); b[20] = vsubq_s16(a[23], a[20]); b[21] = vsubq_s16(a[22], a[21]); b[22] = vaddq_s16(a[21], a[22]); b[23] = vaddq_s16(a[20], a[23]); b[24] = vaddq_s16(a[27], a[24]); b[25] = vaddq_s16(a[26], a[25]); b[26] = vsubq_s16(a[25], a[26]); b[27] = vsubq_s16(a[24], a[27]); b[28] = vsubq_s16(a[31], a[28]); b[29] = vsubq_s16(a[30], a[29]); b[30] = vaddq_s16(a[29], a[30]); b[31] = vaddq_s16(a[28], a[31]); // Stage 6. a[0] = b[0]; a[1] = b[1]; a[2] = b[2]; a[3] = b[3]; butterfly_two_coeff(b[7], b[4], cospi_28_64, cospi_4_64, &a[4], &a[7]); butterfly_two_coeff(b[6], b[5], cospi_12_64, cospi_20_64, &a[5], &a[6]); a[8] = vaddq_s16(b[8], b[9]); a[9] = vsubq_s16(b[8], b[9]); a[10] = vsubq_s16(b[11], b[10]); a[11] = vaddq_s16(b[11], b[10]); a[12] = vaddq_s16(b[12], b[13]); a[13] = vsubq_s16(b[12], b[13]); a[14] = vsubq_s16(b[15], b[14]); a[15] = vaddq_s16(b[15], b[14]); a[16] = b[16]; a[19] = b[19]; a[20] = b[20]; a[23] = b[23]; a[24] = b[24]; a[27] = b[27]; a[28] = b[28]; a[31] = b[31]; butterfly_two_coeff(b[30], b[17], cospi_28_64, cospi_4_64, &a[30], &a[17]); butterfly_two_coeff(b[29], b[18], -cospi_4_64, cospi_28_64, &a[29], &a[18]); butterfly_two_coeff(b[26], b[21], cospi_12_64, cospi_20_64, &a[26], &a[21]); butterfly_two_coeff(b[25], b[22], -cospi_20_64, cospi_12_64, &a[25], &a[22]); // Stage 7. b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; b[4] = a[4]; b[5] = a[5]; b[6] = a[6]; b[7] = a[7]; butterfly_two_coeff(a[15], a[8], cospi_30_64, cospi_2_64, &b[8], &b[15]); butterfly_two_coeff(a[14], a[9], cospi_14_64, cospi_18_64, &b[9], &b[14]); butterfly_two_coeff(a[13], a[10], cospi_22_64, cospi_10_64, &b[10], &b[13]); butterfly_two_coeff(a[12], a[11], cospi_6_64, cospi_26_64, &b[11], &b[12]); b[16] = vaddq_s16(a[16], a[17]); b[17] = vsubq_s16(a[16], a[17]); b[18] = vsubq_s16(a[19], a[18]); b[19] = vaddq_s16(a[19], a[18]); b[20] = vaddq_s16(a[20], a[21]); b[21] = vsubq_s16(a[20], a[21]); b[22] = vsubq_s16(a[23], a[22]); b[23] = vaddq_s16(a[23], a[22]); b[24] = vaddq_s16(a[24], a[25]); b[25] = vsubq_s16(a[24], a[25]); b[26] = vsubq_s16(a[27], a[26]); b[27] = vaddq_s16(a[27], a[26]); b[28] = vaddq_s16(a[28], a[29]); b[29] = vsubq_s16(a[28], a[29]); b[30] = vsubq_s16(a[31], a[30]); b[31] = vaddq_s16(a[31], a[30]); // Final stage. // Also compute partial rounding shift: // output[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; out[0] = sub_round_shift(b[0]); out[16] = sub_round_shift(b[1]); out[8] = sub_round_shift(b[2]); out[24] = sub_round_shift(b[3]); out[4] = sub_round_shift(b[4]); out[20] = sub_round_shift(b[5]); out[12] = sub_round_shift(b[6]); out[28] = sub_round_shift(b[7]); out[2] = sub_round_shift(b[8]); out[18] = sub_round_shift(b[9]); out[10] = sub_round_shift(b[10]); out[26] = sub_round_shift(b[11]); out[6] = sub_round_shift(b[12]); out[22] = sub_round_shift(b[13]); out[14] = sub_round_shift(b[14]); out[30] = sub_round_shift(b[15]); butterfly_two_coeff(b[31], b[16], cospi_31_64, cospi_1_64, &a[1], &a[31]); out[1] = sub_round_shift(a[1]); out[31] = sub_round_shift(a[31]); butterfly_two_coeff(b[30], b[17], cospi_15_64, cospi_17_64, &a[17], &a[15]); out[17] = sub_round_shift(a[17]); out[15] = sub_round_shift(a[15]); butterfly_two_coeff(b[29], b[18], cospi_23_64, cospi_9_64, &a[9], &a[23]); out[9] = sub_round_shift(a[9]); out[23] = sub_round_shift(a[23]); butterfly_two_coeff(b[28], b[19], cospi_7_64, cospi_25_64, &a[25], &a[7]); out[25] = sub_round_shift(a[25]); out[7] = sub_round_shift(a[7]); butterfly_two_coeff(b[27], b[20], cospi_27_64, cospi_5_64, &a[5], &a[27]); out[5] = sub_round_shift(a[5]); out[27] = sub_round_shift(a[27]); butterfly_two_coeff(b[26], b[21], cospi_11_64, cospi_21_64, &a[21], &a[11]); out[21] = sub_round_shift(a[21]); out[11] = sub_round_shift(a[11]); butterfly_two_coeff(b[25], b[22], cospi_19_64, cospi_13_64, &a[13], &a[19]); out[13] = sub_round_shift(a[13]); out[19] = sub_round_shift(a[19]); butterfly_two_coeff(b[24], b[23], cospi_3_64, cospi_29_64, &a[29], &a[3]); out[29] = sub_round_shift(a[29]); out[3] = sub_round_shift(a[3]); } #define PASS_THROUGH(src, dst, element) \ do { \ dst##_lo[element] = src##_lo[element]; \ dst##_hi[element] = src##_hi[element]; \ } while (0) #define ADD_S16_S32(a, left_index, right_index, b, b_index) \ do { \ b##_lo[b_index] = \ vaddl_s16(vget_low_s16(a[left_index]), vget_low_s16(a[right_index])); \ b##_hi[b_index] = vaddl_s16(vget_high_s16(a[left_index]), \ vget_high_s16(a[right_index])); \ } while (0) #define SUB_S16_S32(a, left_index, right_index, b, b_index) \ do { \ b##_lo[b_index] = \ vsubl_s16(vget_low_s16(a[left_index]), vget_low_s16(a[right_index])); \ b##_hi[b_index] = vsubl_s16(vget_high_s16(a[left_index]), \ vget_high_s16(a[right_index])); \ } while (0) #define ADDW_S16_S32(a, a_index, b, b_index, c, c_index) \ do { \ c##_lo[c_index] = vaddw_s16(a##_lo[a_index], vget_low_s16(b[b_index])); \ c##_hi[c_index] = vaddw_s16(a##_hi[a_index], vget_high_s16(b[b_index])); \ } while (0) #define SUBW_S16_S32(a, a_index, b, b_index, temp, temp_index, c, c_index) \ do { \ temp##_lo[temp_index] = vmovl_s16(vget_low_s16(a[a_index])); \ temp##_hi[temp_index] = vmovl_s16(vget_high_s16(a[a_index])); \ c##_lo[c_index] = vsubq_s32(temp##_lo[temp_index], b##_lo[b_index]); \ c##_hi[c_index] = vsubq_s32(temp##_hi[temp_index], b##_hi[b_index]); \ } while (0) #define ADD_S32(a, left_index, right_index, b, b_index) \ do { \ b##_lo[b_index] = vaddq_s32(a##_lo[left_index], a##_lo[right_index]); \ b##_hi[b_index] = vaddq_s32(a##_hi[left_index], a##_hi[right_index]); \ } while (0) #define SUB_S32(a, left_index, right_index, b, b_index) \ do { \ b##_lo[b_index] = vsubq_s32(a##_lo[left_index], a##_lo[right_index]); \ b##_hi[b_index] = vsubq_s32(a##_hi[left_index], a##_hi[right_index]); \ } while (0) // Like butterfly_one_coeff, but don't narrow results. static INLINE void butterfly_one_coeff_s16_s32( const int16x8_t a, const int16x8_t b, const tran_high_t constant, int32x4_t *add_lo, int32x4_t *add_hi, int32x4_t *sub_lo, int32x4_t *sub_hi) { const int32x4_t a0 = vmull_n_s16(vget_low_s16(a), constant); const int32x4_t a1 = vmull_n_s16(vget_high_s16(a), constant); const int32x4_t sum0 = vmlal_n_s16(a0, vget_low_s16(b), constant); const int32x4_t sum1 = vmlal_n_s16(a1, vget_high_s16(b), constant); const int32x4_t diff0 = vmlsl_n_s16(a0, vget_low_s16(b), constant); const int32x4_t diff1 = vmlsl_n_s16(a1, vget_high_s16(b), constant); *add_lo = vrshrq_n_s32(sum0, DCT_CONST_BITS); *add_hi = vrshrq_n_s32(sum1, DCT_CONST_BITS); *sub_lo = vrshrq_n_s32(diff0, DCT_CONST_BITS); *sub_hi = vrshrq_n_s32(diff1, DCT_CONST_BITS); } #define BUTTERFLY_ONE_S16_S32(a, left_index, right_index, constant, b, \ add_index, sub_index) \ do { \ butterfly_one_coeff_s16_s32(a[left_index], a[right_index], constant, \ &b##_lo[add_index], &b##_hi[add_index], \ &b##_lo[sub_index], &b##_hi[sub_index]); \ } while (0) // Like butterfly_one_coeff, but with s32. static INLINE void butterfly_one_coeff_s32( const int32x4_t a_lo, const int32x4_t a_hi, const int32x4_t b_lo, const int32x4_t b_hi, const int32_t constant, int32x4_t *add_lo, int32x4_t *add_hi, int32x4_t *sub_lo, int32x4_t *sub_hi) { const int32x4_t a_lo_0 = vmulq_n_s32(a_lo, constant); const int32x4_t a_hi_0 = vmulq_n_s32(a_hi, constant); const int32x4_t sum0 = vmlaq_n_s32(a_lo_0, b_lo, constant); const int32x4_t sum1 = vmlaq_n_s32(a_hi_0, b_hi, constant); const int32x4_t diff0 = vmlsq_n_s32(a_lo_0, b_lo, constant); const int32x4_t diff1 = vmlsq_n_s32(a_hi_0, b_hi, constant); *add_lo = vrshrq_n_s32(sum0, DCT_CONST_BITS); *add_hi = vrshrq_n_s32(sum1, DCT_CONST_BITS); *sub_lo = vrshrq_n_s32(diff0, DCT_CONST_BITS); *sub_hi = vrshrq_n_s32(diff1, DCT_CONST_BITS); } #define BUTTERFLY_ONE_S32(a, left_index, right_index, constant, b, add_index, \ sub_index) \ do { \ butterfly_one_coeff_s32(a##_lo[left_index], a##_hi[left_index], \ a##_lo[right_index], a##_hi[right_index], \ constant, &b##_lo[add_index], &b##_hi[add_index], \ &b##_lo[sub_index], &b##_hi[sub_index]); \ } while (0) // Like butterfly_two_coeff, but with s32. static INLINE void butterfly_two_coeff_s32( const int32x4_t a_lo, const int32x4_t a_hi, const int32x4_t b_lo, const int32x4_t b_hi, const int32_t constant0, const int32_t constant1, int32x4_t *add_lo, int32x4_t *add_hi, int32x4_t *sub_lo, int32x4_t *sub_hi) { const int32x4_t a0 = vmulq_n_s32(a_lo, constant0); const int32x4_t a1 = vmulq_n_s32(a_hi, constant0); const int32x4_t a2 = vmulq_n_s32(a_lo, constant1); const int32x4_t a3 = vmulq_n_s32(a_hi, constant1); const int32x4_t sum0 = vmlaq_n_s32(a2, b_lo, constant0); const int32x4_t sum1 = vmlaq_n_s32(a3, b_hi, constant0); const int32x4_t diff0 = vmlsq_n_s32(a0, b_lo, constant1); const int32x4_t diff1 = vmlsq_n_s32(a1, b_hi, constant1); *add_lo = vrshrq_n_s32(sum0, DCT_CONST_BITS); *add_hi = vrshrq_n_s32(sum1, DCT_CONST_BITS); *sub_lo = vrshrq_n_s32(diff0, DCT_CONST_BITS); *sub_hi = vrshrq_n_s32(diff1, DCT_CONST_BITS); } #define BUTTERFLY_TWO_S32(a, left_index, right_index, left_constant, \ right_constant, b, add_index, sub_index) \ do { \ butterfly_two_coeff_s32(a##_lo[left_index], a##_hi[left_index], \ a##_lo[right_index], a##_hi[right_index], \ left_constant, right_constant, &b##_lo[add_index], \ &b##_hi[add_index], &b##_lo[sub_index], \ &b##_hi[sub_index]); \ } while (0) // Add 1 if positive, 2 if negative, and shift by 2. // In practice, add 1, then add the sign bit, then shift without rounding. static INLINE int16x8_t add_round_shift_s32(const int32x4_t a_lo, const int32x4_t a_hi) { const int32x4_t one = vdupq_n_s32(1); const uint32x4_t a_lo_u32 = vreinterpretq_u32_s32(a_lo); const uint32x4_t a_lo_sign_u32 = vshrq_n_u32(a_lo_u32, 31); const int32x4_t a_lo_sign_s32 = vreinterpretq_s32_u32(a_lo_sign_u32); const int16x4_t b_lo = vshrn_n_s32(vqaddq_s32(vqaddq_s32(a_lo, a_lo_sign_s32), one), 2); const uint32x4_t a_hi_u32 = vreinterpretq_u32_s32(a_hi); const uint32x4_t a_hi_sign_u32 = vshrq_n_u32(a_hi_u32, 31); const int32x4_t a_hi_sign_s32 = vreinterpretq_s32_u32(a_hi_sign_u32); const int16x4_t b_hi = vshrn_n_s32(vqaddq_s32(vqaddq_s32(a_hi, a_hi_sign_s32), one), 2); return vcombine_s16(b_lo, b_hi); } static void dct_body_second_pass(const int16x8_t *in, int16x8_t *out) { int16x8_t a[32]; int16x8_t b[32]; int32x4_t c_lo[32]; int32x4_t c_hi[32]; int32x4_t d_lo[32]; int32x4_t d_hi[32]; // Stage 1. Done as part of the load for the first pass. a[0] = vaddq_s16(in[0], in[31]); a[1] = vaddq_s16(in[1], in[30]); a[2] = vaddq_s16(in[2], in[29]); a[3] = vaddq_s16(in[3], in[28]); a[4] = vaddq_s16(in[4], in[27]); a[5] = vaddq_s16(in[5], in[26]); a[6] = vaddq_s16(in[6], in[25]); a[7] = vaddq_s16(in[7], in[24]); a[8] = vaddq_s16(in[8], in[23]); a[9] = vaddq_s16(in[9], in[22]); a[10] = vaddq_s16(in[10], in[21]); a[11] = vaddq_s16(in[11], in[20]); a[12] = vaddq_s16(in[12], in[19]); a[13] = vaddq_s16(in[13], in[18]); a[14] = vaddq_s16(in[14], in[17]); a[15] = vaddq_s16(in[15], in[16]); a[16] = vsubq_s16(in[15], in[16]); a[17] = vsubq_s16(in[14], in[17]); a[18] = vsubq_s16(in[13], in[18]); a[19] = vsubq_s16(in[12], in[19]); a[20] = vsubq_s16(in[11], in[20]); a[21] = vsubq_s16(in[10], in[21]); a[22] = vsubq_s16(in[9], in[22]); a[23] = vsubq_s16(in[8], in[23]); a[24] = vsubq_s16(in[7], in[24]); a[25] = vsubq_s16(in[6], in[25]); a[26] = vsubq_s16(in[5], in[26]); a[27] = vsubq_s16(in[4], in[27]); a[28] = vsubq_s16(in[3], in[28]); a[29] = vsubq_s16(in[2], in[29]); a[30] = vsubq_s16(in[1], in[30]); a[31] = vsubq_s16(in[0], in[31]); // Stage 2. b[0] = vaddq_s16(a[0], a[15]); b[1] = vaddq_s16(a[1], a[14]); b[2] = vaddq_s16(a[2], a[13]); b[3] = vaddq_s16(a[3], a[12]); b[4] = vaddq_s16(a[4], a[11]); b[5] = vaddq_s16(a[5], a[10]); b[6] = vaddq_s16(a[6], a[9]); b[7] = vaddq_s16(a[7], a[8]); b[8] = vsubq_s16(a[7], a[8]); b[9] = vsubq_s16(a[6], a[9]); b[10] = vsubq_s16(a[5], a[10]); b[11] = vsubq_s16(a[4], a[11]); b[12] = vsubq_s16(a[3], a[12]); b[13] = vsubq_s16(a[2], a[13]); b[14] = vsubq_s16(a[1], a[14]); b[15] = vsubq_s16(a[0], a[15]); b[16] = a[16]; b[17] = a[17]; b[18] = a[18]; b[19] = a[19]; butterfly_one_coeff(a[27], a[20], cospi_16_64, &b[27], &b[20]); butterfly_one_coeff(a[26], a[21], cospi_16_64, &b[26], &b[21]); butterfly_one_coeff(a[25], a[22], cospi_16_64, &b[25], &b[22]); butterfly_one_coeff(a[24], a[23], cospi_16_64, &b[24], &b[23]); b[28] = a[28]; b[29] = a[29]; b[30] = a[30]; b[31] = a[31]; // Stage 3. With extreme values for input this calculation rolls over int16_t. // The sources for b[0] get added multiple times and, through testing, have // been shown to overflow starting here. ADD_S16_S32(b, 0, 7, c, 0); ADD_S16_S32(b, 1, 6, c, 1); ADD_S16_S32(b, 2, 5, c, 2); ADD_S16_S32(b, 3, 4, c, 3); SUB_S16_S32(b, 3, 4, c, 4); SUB_S16_S32(b, 2, 5, c, 5); SUB_S16_S32(b, 1, 6, c, 6); SUB_S16_S32(b, 0, 7, c, 7); a[8] = b[8]; a[9] = b[9]; BUTTERFLY_ONE_S16_S32(b, 13, 10, cospi_16_64, c, 13, 10); BUTTERFLY_ONE_S16_S32(b, 12, 11, cospi_16_64, c, 12, 11); a[14] = b[14]; a[15] = b[15]; ADD_S16_S32(b, 16, 23, c, 16); ADD_S16_S32(b, 17, 22, c, 17); ADD_S16_S32(b, 18, 21, c, 18); ADD_S16_S32(b, 19, 20, c, 19); SUB_S16_S32(b, 19, 20, c, 20); SUB_S16_S32(b, 18, 21, c, 21); SUB_S16_S32(b, 17, 22, c, 22); SUB_S16_S32(b, 16, 23, c, 23); SUB_S16_S32(b, 31, 24, c, 24); SUB_S16_S32(b, 30, 25, c, 25); SUB_S16_S32(b, 29, 26, c, 26); SUB_S16_S32(b, 28, 27, c, 27); ADD_S16_S32(b, 28, 27, c, 28); ADD_S16_S32(b, 29, 26, c, 29); ADD_S16_S32(b, 30, 25, c, 30); ADD_S16_S32(b, 31, 24, c, 31); // Stage 4. ADD_S32(c, 0, 3, d, 0); ADD_S32(c, 1, 2, d, 1); SUB_S32(c, 1, 2, d, 2); SUB_S32(c, 0, 3, d, 3); PASS_THROUGH(c, d, 4); BUTTERFLY_ONE_S32(c, 6, 5, cospi_16_64, d, 6, 5); PASS_THROUGH(c, d, 7); ADDW_S16_S32(c, 11, a, 8, d, 8); ADDW_S16_S32(c, 10, a, 9, d, 9); SUBW_S16_S32(a, 9, c, 10, c, 9, d, 10); SUBW_S16_S32(a, 8, c, 11, c, 8, d, 11); SUBW_S16_S32(a, 15, c, 12, c, 15, d, 12); SUBW_S16_S32(a, 14, c, 13, c, 14, d, 13); ADDW_S16_S32(c, 13, b, 14, d, 14); ADDW_S16_S32(c, 12, b, 15, d, 15); PASS_THROUGH(c, d, 16); PASS_THROUGH(c, d, 17); BUTTERFLY_TWO_S32(c, 29, 18, cospi_24_64, cospi_8_64, d, 29, 18); BUTTERFLY_TWO_S32(c, 28, 19, cospi_24_64, cospi_8_64, d, 28, 19); BUTTERFLY_TWO_S32(c, 27, 20, -cospi_8_64, cospi_24_64, d, 27, 20); BUTTERFLY_TWO_S32(c, 26, 21, -cospi_8_64, cospi_24_64, d, 26, 21); PASS_THROUGH(c, d, 22); PASS_THROUGH(c, d, 23); PASS_THROUGH(c, d, 24); PASS_THROUGH(c, d, 25); PASS_THROUGH(c, d, 30); PASS_THROUGH(c, d, 31); // Stage 5. BUTTERFLY_ONE_S32(d, 0, 1, cospi_16_64, c, 0, 1); BUTTERFLY_TWO_S32(d, 3, 2, cospi_24_64, cospi_8_64, c, 2, 3); ADD_S32(d, 4, 5, c, 4); SUB_S32(d, 4, 5, c, 5); SUB_S32(d, 7, 6, c, 6); ADD_S32(d, 7, 6, c, 7); PASS_THROUGH(d, c, 8); BUTTERFLY_TWO_S32(d, 14, 9, cospi_24_64, cospi_8_64, c, 14, 9); BUTTERFLY_TWO_S32(d, 13, 10, -cospi_8_64, cospi_24_64, c, 13, 10); PASS_THROUGH(d, c, 11); PASS_THROUGH(d, c, 12); PASS_THROUGH(d, c, 15); ADD_S32(d, 16, 19, c, 16); ADD_S32(d, 17, 18, c, 17); SUB_S32(d, 17, 18, c, 18); SUB_S32(d, 16, 19, c, 19); SUB_S32(d, 23, 20, c, 20); SUB_S32(d, 22, 21, c, 21); ADD_S32(d, 22, 21, c, 22); ADD_S32(d, 23, 20, c, 23); ADD_S32(d, 24, 27, c, 24); ADD_S32(d, 25, 26, c, 25); SUB_S32(d, 25, 26, c, 26); SUB_S32(d, 24, 27, c, 27); SUB_S32(d, 31, 28, c, 28); SUB_S32(d, 30, 29, c, 29); ADD_S32(d, 30, 29, c, 30); ADD_S32(d, 31, 28, c, 31); // Stage 6. PASS_THROUGH(c, d, 0); PASS_THROUGH(c, d, 1); PASS_THROUGH(c, d, 2); PASS_THROUGH(c, d, 3); BUTTERFLY_TWO_S32(c, 7, 4, cospi_28_64, cospi_4_64, d, 4, 7); BUTTERFLY_TWO_S32(c, 6, 5, cospi_12_64, cospi_20_64, d, 5, 6); ADD_S32(c, 8, 9, d, 8); SUB_S32(c, 8, 9, d, 9); SUB_S32(c, 11, 10, d, 10); ADD_S32(c, 11, 10, d, 11); ADD_S32(c, 12, 13, d, 12); SUB_S32(c, 12, 13, d, 13); SUB_S32(c, 15, 14, d, 14); ADD_S32(c, 15, 14, d, 15); PASS_THROUGH(c, d, 16); PASS_THROUGH(c, d, 19); PASS_THROUGH(c, d, 20); PASS_THROUGH(c, d, 23); PASS_THROUGH(c, d, 24); PASS_THROUGH(c, d, 27); PASS_THROUGH(c, d, 28); PASS_THROUGH(c, d, 31); BUTTERFLY_TWO_S32(c, 30, 17, cospi_28_64, cospi_4_64, d, 30, 17); BUTTERFLY_TWO_S32(c, 29, 18, -cospi_4_64, cospi_28_64, d, 29, 18); BUTTERFLY_TWO_S32(c, 26, 21, cospi_12_64, cospi_20_64, d, 26, 21); BUTTERFLY_TWO_S32(c, 25, 22, -cospi_20_64, cospi_12_64, d, 25, 22); // Stage 7. PASS_THROUGH(d, c, 0); PASS_THROUGH(d, c, 1); PASS_THROUGH(d, c, 2); PASS_THROUGH(d, c, 3); PASS_THROUGH(d, c, 4); PASS_THROUGH(d, c, 5); PASS_THROUGH(d, c, 6); PASS_THROUGH(d, c, 7); BUTTERFLY_TWO_S32(d, 15, 8, cospi_30_64, cospi_2_64, c, 8, 15); BUTTERFLY_TWO_S32(d, 14, 9, cospi_14_64, cospi_18_64, c, 9, 14); BUTTERFLY_TWO_S32(d, 13, 10, cospi_22_64, cospi_10_64, c, 10, 13); BUTTERFLY_TWO_S32(d, 12, 11, cospi_6_64, cospi_26_64, c, 11, 12); ADD_S32(d, 16, 17, c, 16); SUB_S32(d, 16, 17, c, 17); SUB_S32(d, 19, 18, c, 18); ADD_S32(d, 19, 18, c, 19); ADD_S32(d, 20, 21, c, 20); SUB_S32(d, 20, 21, c, 21); SUB_S32(d, 23, 22, c, 22); ADD_S32(d, 23, 22, c, 23); ADD_S32(d, 24, 25, c, 24); SUB_S32(d, 24, 25, c, 25); SUB_S32(d, 27, 26, c, 26); ADD_S32(d, 27, 26, c, 27); ADD_S32(d, 28, 29, c, 28); SUB_S32(d, 28, 29, c, 29); SUB_S32(d, 31, 30, c, 30); ADD_S32(d, 31, 30, c, 31); // Final stage. // Roll rounding into this function so we can pass back int16x8. out[0] = add_round_shift_s32(c_lo[0], c_hi[0]); out[16] = add_round_shift_s32(c_lo[1], c_hi[1]); out[8] = add_round_shift_s32(c_lo[2], c_hi[2]); out[24] = add_round_shift_s32(c_lo[3], c_hi[3]); out[4] = add_round_shift_s32(c_lo[4], c_hi[4]); out[20] = add_round_shift_s32(c_lo[5], c_hi[5]); out[12] = add_round_shift_s32(c_lo[6], c_hi[6]); out[28] = add_round_shift_s32(c_lo[7], c_hi[7]); out[2] = add_round_shift_s32(c_lo[8], c_hi[8]); out[18] = add_round_shift_s32(c_lo[9], c_hi[9]); out[10] = add_round_shift_s32(c_lo[10], c_hi[10]); out[26] = add_round_shift_s32(c_lo[11], c_hi[11]); out[6] = add_round_shift_s32(c_lo[12], c_hi[12]); out[22] = add_round_shift_s32(c_lo[13], c_hi[13]); out[14] = add_round_shift_s32(c_lo[14], c_hi[14]); out[30] = add_round_shift_s32(c_lo[15], c_hi[15]); BUTTERFLY_TWO_S32(c, 31, 16, cospi_31_64, cospi_1_64, d, 1, 31); out[1] = add_round_shift_s32(d_lo[1], d_hi[1]); out[31] = add_round_shift_s32(d_lo[31], d_hi[31]); BUTTERFLY_TWO_S32(c, 30, 17, cospi_15_64, cospi_17_64, d, 17, 15); out[17] = add_round_shift_s32(d_lo[17], d_hi[17]); out[15] = add_round_shift_s32(d_lo[15], d_hi[15]); BUTTERFLY_TWO_S32(c, 29, 18, cospi_23_64, cospi_9_64, d, 9, 23); out[9] = add_round_shift_s32(d_lo[9], d_hi[9]); out[23] = add_round_shift_s32(d_lo[23], d_hi[23]); BUTTERFLY_TWO_S32(c, 28, 19, cospi_7_64, cospi_25_64, d, 25, 7); out[25] = add_round_shift_s32(d_lo[25], d_hi[25]); out[7] = add_round_shift_s32(d_lo[7], d_hi[7]); BUTTERFLY_TWO_S32(c, 27, 20, cospi_27_64, cospi_5_64, d, 5, 27); out[5] = add_round_shift_s32(d_lo[5], d_hi[5]); out[27] = add_round_shift_s32(d_lo[27], d_hi[27]); BUTTERFLY_TWO_S32(c, 26, 21, cospi_11_64, cospi_21_64, d, 21, 11); out[21] = add_round_shift_s32(d_lo[21], d_hi[21]); out[11] = add_round_shift_s32(d_lo[11], d_hi[11]); BUTTERFLY_TWO_S32(c, 25, 22, cospi_19_64, cospi_13_64, d, 13, 19); out[13] = add_round_shift_s32(d_lo[13], d_hi[13]); out[19] = add_round_shift_s32(d_lo[19], d_hi[19]); BUTTERFLY_TWO_S32(c, 24, 23, cospi_3_64, cospi_29_64, d, 29, 3); out[29] = add_round_shift_s32(d_lo[29], d_hi[29]); out[3] = add_round_shift_s32(d_lo[3], d_hi[3]); } // Add 1 if positive, 2 if negative, and shift by 2. // In practice, add 1, then add the sign bit, then shift without rounding. static INLINE int16x8_t add_round_shift_s16(const int16x8_t a) { const int16x8_t one = vdupq_n_s16(1); const uint16x8_t a_u16 = vreinterpretq_u16_s16(a); const uint16x8_t a_sign_u16 = vshrq_n_u16(a_u16, 15); const int16x8_t a_sign_s16 = vreinterpretq_s16_u16(a_sign_u16); return vshrq_n_s16(vaddq_s16(vaddq_s16(a, a_sign_s16), one), 2); } static void dct_body_second_pass_rd(const int16x8_t *in, int16x8_t *out) { int16x8_t a[32]; int16x8_t b[32]; // Stage 1. Done as part of the load for the first pass. a[0] = vaddq_s16(in[0], in[31]); a[1] = vaddq_s16(in[1], in[30]); a[2] = vaddq_s16(in[2], in[29]); a[3] = vaddq_s16(in[3], in[28]); a[4] = vaddq_s16(in[4], in[27]); a[5] = vaddq_s16(in[5], in[26]); a[6] = vaddq_s16(in[6], in[25]); a[7] = vaddq_s16(in[7], in[24]); a[8] = vaddq_s16(in[8], in[23]); a[9] = vaddq_s16(in[9], in[22]); a[10] = vaddq_s16(in[10], in[21]); a[11] = vaddq_s16(in[11], in[20]); a[12] = vaddq_s16(in[12], in[19]); a[13] = vaddq_s16(in[13], in[18]); a[14] = vaddq_s16(in[14], in[17]); a[15] = vaddq_s16(in[15], in[16]); a[16] = vsubq_s16(in[15], in[16]); a[17] = vsubq_s16(in[14], in[17]); a[18] = vsubq_s16(in[13], in[18]); a[19] = vsubq_s16(in[12], in[19]); a[20] = vsubq_s16(in[11], in[20]); a[21] = vsubq_s16(in[10], in[21]); a[22] = vsubq_s16(in[9], in[22]); a[23] = vsubq_s16(in[8], in[23]); a[24] = vsubq_s16(in[7], in[24]); a[25] = vsubq_s16(in[6], in[25]); a[26] = vsubq_s16(in[5], in[26]); a[27] = vsubq_s16(in[4], in[27]); a[28] = vsubq_s16(in[3], in[28]); a[29] = vsubq_s16(in[2], in[29]); a[30] = vsubq_s16(in[1], in[30]); a[31] = vsubq_s16(in[0], in[31]); // Stage 2. // For the "rd" version, all the values are rounded down after stage 2 to keep // the values in 16 bits. b[0] = add_round_shift_s16(vaddq_s16(a[0], a[15])); b[1] = add_round_shift_s16(vaddq_s16(a[1], a[14])); b[2] = add_round_shift_s16(vaddq_s16(a[2], a[13])); b[3] = add_round_shift_s16(vaddq_s16(a[3], a[12])); b[4] = add_round_shift_s16(vaddq_s16(a[4], a[11])); b[5] = add_round_shift_s16(vaddq_s16(a[5], a[10])); b[6] = add_round_shift_s16(vaddq_s16(a[6], a[9])); b[7] = add_round_shift_s16(vaddq_s16(a[7], a[8])); b[8] = add_round_shift_s16(vsubq_s16(a[7], a[8])); b[9] = add_round_shift_s16(vsubq_s16(a[6], a[9])); b[10] = add_round_shift_s16(vsubq_s16(a[5], a[10])); b[11] = add_round_shift_s16(vsubq_s16(a[4], a[11])); b[12] = add_round_shift_s16(vsubq_s16(a[3], a[12])); b[13] = add_round_shift_s16(vsubq_s16(a[2], a[13])); b[14] = add_round_shift_s16(vsubq_s16(a[1], a[14])); b[15] = add_round_shift_s16(vsubq_s16(a[0], a[15])); b[16] = add_round_shift_s16(a[16]); b[17] = add_round_shift_s16(a[17]); b[18] = add_round_shift_s16(a[18]); b[19] = add_round_shift_s16(a[19]); butterfly_one_coeff(a[27], a[20], cospi_16_64, &b[27], &b[20]); butterfly_one_coeff(a[26], a[21], cospi_16_64, &b[26], &b[21]); butterfly_one_coeff(a[25], a[22], cospi_16_64, &b[25], &b[22]); butterfly_one_coeff(a[24], a[23], cospi_16_64, &b[24], &b[23]); b[20] = add_round_shift_s16(b[20]); b[21] = add_round_shift_s16(b[21]); b[22] = add_round_shift_s16(b[22]); b[23] = add_round_shift_s16(b[23]); b[24] = add_round_shift_s16(b[24]); b[25] = add_round_shift_s16(b[25]); b[26] = add_round_shift_s16(b[26]); b[27] = add_round_shift_s16(b[27]); b[28] = add_round_shift_s16(a[28]); b[29] = add_round_shift_s16(a[29]); b[30] = add_round_shift_s16(a[30]); b[31] = add_round_shift_s16(a[31]); // Stage 3. a[0] = vaddq_s16(b[0], b[7]); a[1] = vaddq_s16(b[1], b[6]); a[2] = vaddq_s16(b[2], b[5]); a[3] = vaddq_s16(b[3], b[4]); a[4] = vsubq_s16(b[3], b[4]); a[5] = vsubq_s16(b[2], b[5]); a[6] = vsubq_s16(b[1], b[6]); a[7] = vsubq_s16(b[0], b[7]); a[8] = b[8]; a[9] = b[9]; butterfly_one_coeff(b[13], b[10], cospi_16_64, &a[13], &a[10]); butterfly_one_coeff(b[12], b[11], cospi_16_64, &a[12], &a[11]); a[14] = b[14]; a[15] = b[15]; a[16] = vaddq_s16(b[16], b[23]); a[17] = vaddq_s16(b[17], b[22]); a[18] = vaddq_s16(b[18], b[21]); a[19] = vaddq_s16(b[19], b[20]); a[20] = vsubq_s16(b[19], b[20]); a[21] = vsubq_s16(b[18], b[21]); a[22] = vsubq_s16(b[17], b[22]); a[23] = vsubq_s16(b[16], b[23]); a[24] = vsubq_s16(b[31], b[24]); a[25] = vsubq_s16(b[30], b[25]); a[26] = vsubq_s16(b[29], b[26]); a[27] = vsubq_s16(b[28], b[27]); a[28] = vaddq_s16(b[28], b[27]); a[29] = vaddq_s16(b[29], b[26]); a[30] = vaddq_s16(b[30], b[25]); a[31] = vaddq_s16(b[31], b[24]); // Stage 4. b[0] = vaddq_s16(a[0], a[3]); b[1] = vaddq_s16(a[1], a[2]); b[2] = vsubq_s16(a[1], a[2]); b[3] = vsubq_s16(a[0], a[3]); b[4] = a[4]; butterfly_one_coeff(a[6], a[5], cospi_16_64, &b[6], &b[5]); b[7] = a[7]; b[8] = vaddq_s16(a[8], a[11]); b[9] = vaddq_s16(a[9], a[10]); b[10] = vsubq_s16(a[9], a[10]); b[11] = vsubq_s16(a[8], a[11]); b[12] = vsubq_s16(a[15], a[12]); b[13] = vsubq_s16(a[14], a[13]); b[14] = vaddq_s16(a[14], a[13]); b[15] = vaddq_s16(a[15], a[12]); b[16] = a[16]; b[17] = a[17]; butterfly_two_coeff(a[29], a[18], cospi_24_64, cospi_8_64, &b[29], &b[18]); butterfly_two_coeff(a[28], a[19], cospi_24_64, cospi_8_64, &b[28], &b[19]); butterfly_two_coeff(a[27], a[20], -cospi_8_64, cospi_24_64, &b[27], &b[20]); butterfly_two_coeff(a[26], a[21], -cospi_8_64, cospi_24_64, &b[26], &b[21]); b[22] = a[22]; b[23] = a[23]; b[24] = a[24]; b[25] = a[25]; b[30] = a[30]; b[31] = a[31]; // Stage 5. butterfly_one_coeff(b[0], b[1], cospi_16_64, &a[0], &a[1]); butterfly_two_coeff(b[3], b[2], cospi_24_64, cospi_8_64, &a[2], &a[3]); a[4] = vaddq_s16(b[4], b[5]); a[5] = vsubq_s16(b[4], b[5]); a[6] = vsubq_s16(b[7], b[6]); a[7] = vaddq_s16(b[7], b[6]); a[8] = b[8]; butterfly_two_coeff(b[14], b[9], cospi_24_64, cospi_8_64, &a[14], &a[9]); butterfly_two_coeff(b[13], b[10], -cospi_8_64, cospi_24_64, &a[13], &a[10]); a[11] = b[11]; a[12] = b[12]; a[15] = b[15]; a[16] = vaddq_s16(b[19], b[16]); a[17] = vaddq_s16(b[18], b[17]); a[18] = vsubq_s16(b[17], b[18]); a[19] = vsubq_s16(b[16], b[19]); a[20] = vsubq_s16(b[23], b[20]); a[21] = vsubq_s16(b[22], b[21]); a[22] = vaddq_s16(b[21], b[22]); a[23] = vaddq_s16(b[20], b[23]); a[24] = vaddq_s16(b[27], b[24]); a[25] = vaddq_s16(b[26], b[25]); a[26] = vsubq_s16(b[25], b[26]); a[27] = vsubq_s16(b[24], b[27]); a[28] = vsubq_s16(b[31], b[28]); a[29] = vsubq_s16(b[30], b[29]); a[30] = vaddq_s16(b[29], b[30]); a[31] = vaddq_s16(b[28], b[31]); // Stage 6. b[0] = a[0]; b[1] = a[1]; b[2] = a[2]; b[3] = a[3]; butterfly_two_coeff(a[7], a[4], cospi_28_64, cospi_4_64, &b[4], &b[7]); butterfly_two_coeff(a[6], a[5], cospi_12_64, cospi_20_64, &b[5], &b[6]); b[8] = vaddq_s16(a[8], a[9]); b[9] = vsubq_s16(a[8], a[9]); b[10] = vsubq_s16(a[11], a[10]); b[11] = vaddq_s16(a[11], a[10]); b[12] = vaddq_s16(a[12], a[13]); b[13] = vsubq_s16(a[12], a[13]); b[14] = vsubq_s16(a[15], a[14]); b[15] = vaddq_s16(a[15], a[14]); b[16] = a[16]; b[19] = a[19]; b[20] = a[20]; b[23] = a[23]; b[24] = a[24]; b[27] = a[27]; b[28] = a[28]; b[31] = a[31]; butterfly_two_coeff(a[30], a[17], cospi_28_64, cospi_4_64, &b[30], &b[17]); butterfly_two_coeff(a[29], a[18], -cospi_4_64, cospi_28_64, &b[29], &b[18]); butterfly_two_coeff(a[26], a[21], cospi_12_64, cospi_20_64, &b[26], &b[21]); butterfly_two_coeff(a[25], a[22], -cospi_20_64, cospi_12_64, &b[25], &b[22]); // Stage 7. a[0] = b[0]; a[1] = b[1]; a[2] = b[2]; a[3] = b[3]; a[4] = b[4]; a[5] = b[5]; a[6] = b[6]; a[7] = b[7]; butterfly_two_coeff(b[15], b[8], cospi_30_64, cospi_2_64, &a[8], &a[15]); butterfly_two_coeff(b[14], b[9], cospi_14_64, cospi_18_64, &a[9], &a[14]); butterfly_two_coeff(b[13], b[10], cospi_22_64, cospi_10_64, &a[10], &a[13]); butterfly_two_coeff(b[12], b[11], cospi_6_64, cospi_26_64, &a[11], &a[12]); a[16] = vaddq_s16(b[16], b[17]); a[17] = vsubq_s16(b[16], b[17]); a[18] = vsubq_s16(b[19], b[18]); a[19] = vaddq_s16(b[19], b[18]); a[20] = vaddq_s16(b[20], b[21]); a[21] = vsubq_s16(b[20], b[21]); a[22] = vsubq_s16(b[23], b[22]); a[23] = vaddq_s16(b[23], b[22]); a[24] = vaddq_s16(b[24], b[25]); a[25] = vsubq_s16(b[24], b[25]); a[26] = vsubq_s16(b[27], b[26]); a[27] = vaddq_s16(b[27], b[26]); a[28] = vaddq_s16(b[28], b[29]); a[29] = vsubq_s16(b[28], b[29]); a[30] = vsubq_s16(b[31], b[30]); a[31] = vaddq_s16(b[31], b[30]); // Final stage. out[0] = a[0]; out[16] = a[1]; out[8] = a[2]; out[24] = a[3]; out[4] = a[4]; out[20] = a[5]; out[12] = a[6]; out[28] = a[7]; out[2] = a[8]; out[18] = a[9]; out[10] = a[10]; out[26] = a[11]; out[6] = a[12]; out[22] = a[13]; out[14] = a[14]; out[30] = a[15]; butterfly_two_coeff(a[31], a[16], cospi_31_64, cospi_1_64, &out[1], &out[31]); butterfly_two_coeff(a[30], a[17], cospi_15_64, cospi_17_64, &out[17], &out[15]); butterfly_two_coeff(a[29], a[18], cospi_23_64, cospi_9_64, &out[9], &out[23]); butterfly_two_coeff(a[28], a[19], cospi_7_64, cospi_25_64, &out[25], &out[7]); butterfly_two_coeff(a[27], a[20], cospi_27_64, cospi_5_64, &out[5], &out[27]); butterfly_two_coeff(a[26], a[21], cospi_11_64, cospi_21_64, &out[21], &out[11]); butterfly_two_coeff(a[25], a[22], cospi_19_64, cospi_13_64, &out[13], &out[19]); butterfly_two_coeff(a[24], a[23], cospi_3_64, cospi_29_64, &out[29], &out[3]); } #undef PASS_THROUGH #undef ADD_S16_S32 #undef SUB_S16_S32 #undef ADDW_S16_S32 #undef SUBW_S16_S32 #undef ADD_S32 #undef SUB_S32 #undef BUTTERFLY_ONE_S16_S32 #undef BUTTERFLY_ONE_S32 #undef BUTTERFLY_TWO_S32 // Transpose 8x8 to a new location. Don't use transpose_neon.h because those // are all in-place. // TODO(johannkoenig): share with other fdcts. static INLINE void transpose_8x8(const int16x8_t *a, int16x8_t *b) { // Swap 16 bit elements. const int16x8x2_t c0 = vtrnq_s16(a[0], a[1]); const int16x8x2_t c1 = vtrnq_s16(a[2], a[3]); const int16x8x2_t c2 = vtrnq_s16(a[4], a[5]); const int16x8x2_t c3 = vtrnq_s16(a[6], a[7]); // Swap 32 bit elements. const int32x4x2_t d0 = vtrnq_s32(vreinterpretq_s32_s16(c0.val[0]), vreinterpretq_s32_s16(c1.val[0])); const int32x4x2_t d1 = vtrnq_s32(vreinterpretq_s32_s16(c0.val[1]), vreinterpretq_s32_s16(c1.val[1])); const int32x4x2_t d2 = vtrnq_s32(vreinterpretq_s32_s16(c2.val[0]), vreinterpretq_s32_s16(c3.val[0])); const int32x4x2_t d3 = vtrnq_s32(vreinterpretq_s32_s16(c2.val[1]), vreinterpretq_s32_s16(c3.val[1])); // Swap 64 bit elements const int16x8x2_t e0 = vpx_vtrnq_s64_to_s16(d0.val[0], d2.val[0]); const int16x8x2_t e1 = vpx_vtrnq_s64_to_s16(d1.val[0], d3.val[0]); const int16x8x2_t e2 = vpx_vtrnq_s64_to_s16(d0.val[1], d2.val[1]); const int16x8x2_t e3 = vpx_vtrnq_s64_to_s16(d1.val[1], d3.val[1]); b[0] = e0.val[0]; b[1] = e1.val[0]; b[2] = e2.val[0]; b[3] = e3.val[0]; b[4] = e0.val[1]; b[5] = e1.val[1]; b[6] = e2.val[1]; b[7] = e3.val[1]; } void vpx_fdct32x32_neon(const int16_t *input, tran_low_t *output, int stride) { int16x8_t temp0[32]; int16x8_t temp1[32]; int16x8_t temp2[32]; int16x8_t temp3[32]; int16x8_t temp4[32]; int16x8_t temp5[32]; // Process in 8x32 columns. load(input, stride, temp0); dct_body_first_pass(temp0, temp1); load(input + 8, stride, temp0); dct_body_first_pass(temp0, temp2); load(input + 16, stride, temp0); dct_body_first_pass(temp0, temp3); load(input + 24, stride, temp0); dct_body_first_pass(temp0, temp4); // Generate the top row by munging the first set of 8 from each one together. transpose_8x8(&temp1[0], &temp0[0]); transpose_8x8(&temp2[0], &temp0[8]); transpose_8x8(&temp3[0], &temp0[16]); transpose_8x8(&temp4[0], &temp0[24]); dct_body_second_pass(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output, temp5); // Second row of 8x32. transpose_8x8(&temp1[8], &temp0[0]); transpose_8x8(&temp2[8], &temp0[8]); transpose_8x8(&temp3[8], &temp0[16]); transpose_8x8(&temp4[8], &temp0[24]); dct_body_second_pass(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 8 * 32, temp5); // Third row of 8x32 transpose_8x8(&temp1[16], &temp0[0]); transpose_8x8(&temp2[16], &temp0[8]); transpose_8x8(&temp3[16], &temp0[16]); transpose_8x8(&temp4[16], &temp0[24]); dct_body_second_pass(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 16 * 32, temp5); // Final row of 8x32. transpose_8x8(&temp1[24], &temp0[0]); transpose_8x8(&temp2[24], &temp0[8]); transpose_8x8(&temp3[24], &temp0[16]); transpose_8x8(&temp4[24], &temp0[24]); dct_body_second_pass(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 24 * 32, temp5); } void vpx_fdct32x32_rd_neon(const int16_t *input, tran_low_t *output, int stride) { int16x8_t temp0[32]; int16x8_t temp1[32]; int16x8_t temp2[32]; int16x8_t temp3[32]; int16x8_t temp4[32]; int16x8_t temp5[32]; // Process in 8x32 columns. load(input, stride, temp0); dct_body_first_pass(temp0, temp1); load(input + 8, stride, temp0); dct_body_first_pass(temp0, temp2); load(input + 16, stride, temp0); dct_body_first_pass(temp0, temp3); load(input + 24, stride, temp0); dct_body_first_pass(temp0, temp4); // Generate the top row by munging the first set of 8 from each one together. transpose_8x8(&temp1[0], &temp0[0]); transpose_8x8(&temp2[0], &temp0[8]); transpose_8x8(&temp3[0], &temp0[16]); transpose_8x8(&temp4[0], &temp0[24]); dct_body_second_pass_rd(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output, temp5); // Second row of 8x32. transpose_8x8(&temp1[8], &temp0[0]); transpose_8x8(&temp2[8], &temp0[8]); transpose_8x8(&temp3[8], &temp0[16]); transpose_8x8(&temp4[8], &temp0[24]); dct_body_second_pass_rd(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 8 * 32, temp5); // Third row of 8x32 transpose_8x8(&temp1[16], &temp0[0]); transpose_8x8(&temp2[16], &temp0[8]); transpose_8x8(&temp3[16], &temp0[16]); transpose_8x8(&temp4[16], &temp0[24]); dct_body_second_pass_rd(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 16 * 32, temp5); // Final row of 8x32. transpose_8x8(&temp1[24], &temp0[0]); transpose_8x8(&temp2[24], &temp0[8]); transpose_8x8(&temp3[24], &temp0[16]); transpose_8x8(&temp4[24], &temp0[24]); dct_body_second_pass_rd(temp0, temp5); transpose_s16_8x8(&temp5[0], &temp5[1], &temp5[2], &temp5[3], &temp5[4], &temp5[5], &temp5[6], &temp5[7]); transpose_s16_8x8(&temp5[8], &temp5[9], &temp5[10], &temp5[11], &temp5[12], &temp5[13], &temp5[14], &temp5[15]); transpose_s16_8x8(&temp5[16], &temp5[17], &temp5[18], &temp5[19], &temp5[20], &temp5[21], &temp5[22], &temp5[23]); transpose_s16_8x8(&temp5[24], &temp5[25], &temp5[26], &temp5[27], &temp5[28], &temp5[29], &temp5[30], &temp5[31]); store(output + 24 * 32, temp5); } #endif // !defined(__clang__) && !defined(__ANDROID__) && defined(__GNUC__) && // __GNUC__ == 4 && __GNUC_MINOR__ <= 9 libvpx-1.8.2/vpx_dsp/arm/fdct_neon.c000066400000000000000000000065151357355204000174060ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" void vpx_fdct4x4_neon(const int16_t *input, tran_low_t *final_output, int stride) { int i; // input[M * stride] * 16 int16x4_t input_0 = vshl_n_s16(vld1_s16(input + 0 * stride), 4); int16x4_t input_1 = vshl_n_s16(vld1_s16(input + 1 * stride), 4); int16x4_t input_2 = vshl_n_s16(vld1_s16(input + 2 * stride), 4); int16x4_t input_3 = vshl_n_s16(vld1_s16(input + 3 * stride), 4); // If the very first value != 0, then add 1. if (input[0] != 0) { const int16x4_t one = vreinterpret_s16_s64(vdup_n_s64(1)); input_0 = vadd_s16(input_0, one); } for (i = 0; i < 2; ++i) { const int16x8_t input_01 = vcombine_s16(input_0, input_1); const int16x8_t input_32 = vcombine_s16(input_3, input_2); // in_0 +/- in_3, in_1 +/- in_2 const int16x8_t s_01 = vaddq_s16(input_01, input_32); const int16x8_t s_32 = vsubq_s16(input_01, input_32); // step_0 +/- step_1, step_2 +/- step_3 const int16x4_t s_0 = vget_low_s16(s_01); const int16x4_t s_1 = vget_high_s16(s_01); const int16x4_t s_2 = vget_high_s16(s_32); const int16x4_t s_3 = vget_low_s16(s_32); // (s_0 +/- s_1) * cospi_16_64 // Must expand all elements to s32. See 'needs32' comment in fwd_txfm.c. const int32x4_t s_0_p_s_1 = vaddl_s16(s_0, s_1); const int32x4_t s_0_m_s_1 = vsubl_s16(s_0, s_1); const int32x4_t temp1 = vmulq_n_s32(s_0_p_s_1, cospi_16_64); const int32x4_t temp2 = vmulq_n_s32(s_0_m_s_1, cospi_16_64); // fdct_round_shift int16x4_t out_0 = vrshrn_n_s32(temp1, DCT_CONST_BITS); int16x4_t out_2 = vrshrn_n_s32(temp2, DCT_CONST_BITS); // s_3 * cospi_8_64 + s_2 * cospi_24_64 // s_3 * cospi_24_64 - s_2 * cospi_8_64 const int32x4_t s_3_cospi_8_64 = vmull_n_s16(s_3, cospi_8_64); const int32x4_t s_3_cospi_24_64 = vmull_n_s16(s_3, cospi_24_64); const int32x4_t temp3 = vmlal_n_s16(s_3_cospi_8_64, s_2, cospi_24_64); const int32x4_t temp4 = vmlsl_n_s16(s_3_cospi_24_64, s_2, cospi_8_64); // fdct_round_shift int16x4_t out_1 = vrshrn_n_s32(temp3, DCT_CONST_BITS); int16x4_t out_3 = vrshrn_n_s32(temp4, DCT_CONST_BITS); transpose_s16_4x4d(&out_0, &out_1, &out_2, &out_3); input_0 = out_0; input_1 = out_1; input_2 = out_2; input_3 = out_3; } { // Not quite a rounding shift. Only add 1 despite shifting by 2. const int16x8_t one = vdupq_n_s16(1); int16x8_t out_01 = vcombine_s16(input_0, input_1); int16x8_t out_23 = vcombine_s16(input_2, input_3); out_01 = vshrq_n_s16(vaddq_s16(out_01, one), 2); out_23 = vshrq_n_s16(vaddq_s16(out_23, one), 2); store_s16q_to_tran_low(final_output + 0 * 8, out_01); store_s16q_to_tran_low(final_output + 1 * 8, out_23); } } libvpx-1.8.2/vpx_dsp/arm/fdct_partial_neon.c000066400000000000000000000060071357355204000211160ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" static INLINE tran_low_t get_lane(const int32x2_t a) { #if CONFIG_VP9_HIGHBITDEPTH return vget_lane_s32(a, 0); #else return vget_lane_s16(vreinterpret_s16_s32(a), 0); #endif // CONFIG_VP9_HIGHBITDETPH } void vpx_fdct4x4_1_neon(const int16_t *input, tran_low_t *output, int stride) { int16x4_t a0, a1, a2, a3; int16x8_t b0, b1; int16x8_t c; int32x2_t d; a0 = vld1_s16(input); input += stride; a1 = vld1_s16(input); input += stride; a2 = vld1_s16(input); input += stride; a3 = vld1_s16(input); b0 = vcombine_s16(a0, a1); b1 = vcombine_s16(a2, a3); c = vaddq_s16(b0, b1); d = horizontal_add_int16x8(c); output[0] = get_lane(vshl_n_s32(d, 1)); output[1] = 0; } void vpx_fdct8x8_1_neon(const int16_t *input, tran_low_t *output, int stride) { int r; int16x8_t sum = vld1q_s16(&input[0]); for (r = 1; r < 8; ++r) { const int16x8_t input_00 = vld1q_s16(&input[r * stride]); sum = vaddq_s16(sum, input_00); } output[0] = get_lane(horizontal_add_int16x8(sum)); output[1] = 0; } void vpx_fdct16x16_1_neon(const int16_t *input, tran_low_t *output, int stride) { int r; int16x8_t left = vld1q_s16(input); int16x8_t right = vld1q_s16(input + 8); int32x2_t sum; input += stride; for (r = 1; r < 16; ++r) { const int16x8_t a = vld1q_s16(input); const int16x8_t b = vld1q_s16(input + 8); input += stride; left = vaddq_s16(left, a); right = vaddq_s16(right, b); } sum = vadd_s32(horizontal_add_int16x8(left), horizontal_add_int16x8(right)); output[0] = get_lane(vshr_n_s32(sum, 1)); output[1] = 0; } void vpx_fdct32x32_1_neon(const int16_t *input, tran_low_t *output, int stride) { int r; int16x8_t a0 = vld1q_s16(input); int16x8_t a1 = vld1q_s16(input + 8); int16x8_t a2 = vld1q_s16(input + 16); int16x8_t a3 = vld1q_s16(input + 24); int32x2_t sum; input += stride; for (r = 1; r < 32; ++r) { const int16x8_t b0 = vld1q_s16(input); const int16x8_t b1 = vld1q_s16(input + 8); const int16x8_t b2 = vld1q_s16(input + 16); const int16x8_t b3 = vld1q_s16(input + 24); input += stride; a0 = vaddq_s16(a0, b0); a1 = vaddq_s16(a1, b1); a2 = vaddq_s16(a2, b2); a3 = vaddq_s16(a3, b3); } sum = vadd_s32(horizontal_add_int16x8(a0), horizontal_add_int16x8(a1)); sum = vadd_s32(sum, horizontal_add_int16x8(a2)); sum = vadd_s32(sum, horizontal_add_int16x8(a3)); output[0] = get_lane(vshr_n_s32(sum, 3)); output[1] = 0; } libvpx-1.8.2/vpx_dsp/arm/fwd_txfm_neon.c000066400000000000000000000235351357355204000203050ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" void vpx_fdct8x8_neon(const int16_t *input, tran_low_t *final_output, int stride) { int i; // stage 1 int16x8_t input_0 = vshlq_n_s16(vld1q_s16(&input[0 * stride]), 2); int16x8_t input_1 = vshlq_n_s16(vld1q_s16(&input[1 * stride]), 2); int16x8_t input_2 = vshlq_n_s16(vld1q_s16(&input[2 * stride]), 2); int16x8_t input_3 = vshlq_n_s16(vld1q_s16(&input[3 * stride]), 2); int16x8_t input_4 = vshlq_n_s16(vld1q_s16(&input[4 * stride]), 2); int16x8_t input_5 = vshlq_n_s16(vld1q_s16(&input[5 * stride]), 2); int16x8_t input_6 = vshlq_n_s16(vld1q_s16(&input[6 * stride]), 2); int16x8_t input_7 = vshlq_n_s16(vld1q_s16(&input[7 * stride]), 2); for (i = 0; i < 2; ++i) { int16x8_t out_0, out_1, out_2, out_3, out_4, out_5, out_6, out_7; const int16x8_t v_s0 = vaddq_s16(input_0, input_7); const int16x8_t v_s1 = vaddq_s16(input_1, input_6); const int16x8_t v_s2 = vaddq_s16(input_2, input_5); const int16x8_t v_s3 = vaddq_s16(input_3, input_4); const int16x8_t v_s4 = vsubq_s16(input_3, input_4); const int16x8_t v_s5 = vsubq_s16(input_2, input_5); const int16x8_t v_s6 = vsubq_s16(input_1, input_6); const int16x8_t v_s7 = vsubq_s16(input_0, input_7); // fdct4(step, step); int16x8_t v_x0 = vaddq_s16(v_s0, v_s3); int16x8_t v_x1 = vaddq_s16(v_s1, v_s2); int16x8_t v_x2 = vsubq_s16(v_s1, v_s2); int16x8_t v_x3 = vsubq_s16(v_s0, v_s3); // fdct4(step, step); int32x4_t v_t0_lo = vaddl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); int32x4_t v_t0_hi = vaddl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1)); int32x4_t v_t1_lo = vsubl_s16(vget_low_s16(v_x0), vget_low_s16(v_x1)); int32x4_t v_t1_hi = vsubl_s16(vget_high_s16(v_x0), vget_high_s16(v_x1)); int32x4_t v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), cospi_24_64); int32x4_t v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), cospi_24_64); int32x4_t v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_24_64); int32x4_t v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), cospi_24_64); v_t2_lo = vmlal_n_s16(v_t2_lo, vget_low_s16(v_x3), cospi_8_64); v_t2_hi = vmlal_n_s16(v_t2_hi, vget_high_s16(v_x3), cospi_8_64); v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x2), cospi_8_64); v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x2), cospi_8_64); v_t0_lo = vmulq_n_s32(v_t0_lo, cospi_16_64); v_t0_hi = vmulq_n_s32(v_t0_hi, cospi_16_64); v_t1_lo = vmulq_n_s32(v_t1_lo, cospi_16_64); v_t1_hi = vmulq_n_s32(v_t1_hi, cospi_16_64); { const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS); const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS); const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS); const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS); out_0 = vcombine_s16(a, c); // 00 01 02 03 40 41 42 43 out_2 = vcombine_s16(e, g); // 20 21 22 23 60 61 62 63 out_4 = vcombine_s16(b, d); // 04 05 06 07 44 45 46 47 out_6 = vcombine_s16(f, h); // 24 25 26 27 64 65 66 67 } // Stage 2 v_x0 = vsubq_s16(v_s6, v_s5); v_x1 = vaddq_s16(v_s6, v_s5); v_t0_lo = vmull_n_s16(vget_low_s16(v_x0), cospi_16_64); v_t0_hi = vmull_n_s16(vget_high_s16(v_x0), cospi_16_64); v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), cospi_16_64); v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), cospi_16_64); { const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); const int16x8_t ab = vcombine_s16(a, b); const int16x8_t cd = vcombine_s16(c, d); // Stage 3 v_x0 = vaddq_s16(v_s4, ab); v_x1 = vsubq_s16(v_s4, ab); v_x2 = vsubq_s16(v_s7, cd); v_x3 = vaddq_s16(v_s7, cd); } // Stage 4 v_t0_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_4_64); v_t0_hi = vmull_n_s16(vget_high_s16(v_x3), cospi_4_64); v_t0_lo = vmlal_n_s16(v_t0_lo, vget_low_s16(v_x0), cospi_28_64); v_t0_hi = vmlal_n_s16(v_t0_hi, vget_high_s16(v_x0), cospi_28_64); v_t1_lo = vmull_n_s16(vget_low_s16(v_x1), cospi_12_64); v_t1_hi = vmull_n_s16(vget_high_s16(v_x1), cospi_12_64); v_t1_lo = vmlal_n_s16(v_t1_lo, vget_low_s16(v_x2), cospi_20_64); v_t1_hi = vmlal_n_s16(v_t1_hi, vget_high_s16(v_x2), cospi_20_64); v_t2_lo = vmull_n_s16(vget_low_s16(v_x2), cospi_12_64); v_t2_hi = vmull_n_s16(vget_high_s16(v_x2), cospi_12_64); v_t2_lo = vmlsl_n_s16(v_t2_lo, vget_low_s16(v_x1), cospi_20_64); v_t2_hi = vmlsl_n_s16(v_t2_hi, vget_high_s16(v_x1), cospi_20_64); v_t3_lo = vmull_n_s16(vget_low_s16(v_x3), cospi_28_64); v_t3_hi = vmull_n_s16(vget_high_s16(v_x3), cospi_28_64); v_t3_lo = vmlsl_n_s16(v_t3_lo, vget_low_s16(v_x0), cospi_4_64); v_t3_hi = vmlsl_n_s16(v_t3_hi, vget_high_s16(v_x0), cospi_4_64); { const int16x4_t a = vrshrn_n_s32(v_t0_lo, DCT_CONST_BITS); const int16x4_t b = vrshrn_n_s32(v_t0_hi, DCT_CONST_BITS); const int16x4_t c = vrshrn_n_s32(v_t1_lo, DCT_CONST_BITS); const int16x4_t d = vrshrn_n_s32(v_t1_hi, DCT_CONST_BITS); const int16x4_t e = vrshrn_n_s32(v_t2_lo, DCT_CONST_BITS); const int16x4_t f = vrshrn_n_s32(v_t2_hi, DCT_CONST_BITS); const int16x4_t g = vrshrn_n_s32(v_t3_lo, DCT_CONST_BITS); const int16x4_t h = vrshrn_n_s32(v_t3_hi, DCT_CONST_BITS); out_1 = vcombine_s16(a, c); // 10 11 12 13 50 51 52 53 out_3 = vcombine_s16(e, g); // 30 31 32 33 70 71 72 73 out_5 = vcombine_s16(b, d); // 14 15 16 17 54 55 56 57 out_7 = vcombine_s16(f, h); // 34 35 36 37 74 75 76 77 } // transpose 8x8 // Can't use transpose_s16_8x8() because the values are arranged in two 4x8 // columns. { // 00 01 02 03 40 41 42 43 // 10 11 12 13 50 51 52 53 // 20 21 22 23 60 61 62 63 // 30 31 32 33 70 71 72 73 // 04 05 06 07 44 45 46 47 // 14 15 16 17 54 55 56 57 // 24 25 26 27 64 65 66 67 // 34 35 36 37 74 75 76 77 const int32x4x2_t r02_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_0), vreinterpretq_s32_s16(out_2)); const int32x4x2_t r13_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_1), vreinterpretq_s32_s16(out_3)); const int32x4x2_t r46_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_4), vreinterpretq_s32_s16(out_6)); const int32x4x2_t r57_s32 = vtrnq_s32(vreinterpretq_s32_s16(out_5), vreinterpretq_s32_s16(out_7)); const int16x8x2_t r01_s16 = vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[0]), vreinterpretq_s16_s32(r13_s32.val[0])); const int16x8x2_t r23_s16 = vtrnq_s16(vreinterpretq_s16_s32(r02_s32.val[1]), vreinterpretq_s16_s32(r13_s32.val[1])); const int16x8x2_t r45_s16 = vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[0]), vreinterpretq_s16_s32(r57_s32.val[0])); const int16x8x2_t r67_s16 = vtrnq_s16(vreinterpretq_s16_s32(r46_s32.val[1]), vreinterpretq_s16_s32(r57_s32.val[1])); input_0 = r01_s16.val[0]; input_1 = r01_s16.val[1]; input_2 = r23_s16.val[0]; input_3 = r23_s16.val[1]; input_4 = r45_s16.val[0]; input_5 = r45_s16.val[1]; input_6 = r67_s16.val[0]; input_7 = r67_s16.val[1]; // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 // 07 17 27 37 47 57 67 77 } } // for { // from vpx_dct_sse2.c // Post-condition (division by two) // division of two 16 bits signed numbers using shifts // n / 2 = (n - (n >> 15)) >> 1 const int16x8_t sign_in0 = vshrq_n_s16(input_0, 15); const int16x8_t sign_in1 = vshrq_n_s16(input_1, 15); const int16x8_t sign_in2 = vshrq_n_s16(input_2, 15); const int16x8_t sign_in3 = vshrq_n_s16(input_3, 15); const int16x8_t sign_in4 = vshrq_n_s16(input_4, 15); const int16x8_t sign_in5 = vshrq_n_s16(input_5, 15); const int16x8_t sign_in6 = vshrq_n_s16(input_6, 15); const int16x8_t sign_in7 = vshrq_n_s16(input_7, 15); input_0 = vhsubq_s16(input_0, sign_in0); input_1 = vhsubq_s16(input_1, sign_in1); input_2 = vhsubq_s16(input_2, sign_in2); input_3 = vhsubq_s16(input_3, sign_in3); input_4 = vhsubq_s16(input_4, sign_in4); input_5 = vhsubq_s16(input_5, sign_in5); input_6 = vhsubq_s16(input_6, sign_in6); input_7 = vhsubq_s16(input_7, sign_in7); // store results store_s16q_to_tran_low(final_output + 0 * 8, input_0); store_s16q_to_tran_low(final_output + 1 * 8, input_1); store_s16q_to_tran_low(final_output + 2 * 8, input_2); store_s16q_to_tran_low(final_output + 3 * 8, input_3); store_s16q_to_tran_low(final_output + 4 * 8, input_4); store_s16q_to_tran_low(final_output + 5 * 8, input_5); store_s16q_to_tran_low(final_output + 6 * 8, input_6); store_s16q_to_tran_low(final_output + 7 * 8, input_7); } } libvpx-1.8.2/vpx_dsp/arm/hadamard_neon.c000066400000000000000000000102771357355204000202270ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" static void hadamard8x8_one_pass(int16x8_t *a0, int16x8_t *a1, int16x8_t *a2, int16x8_t *a3, int16x8_t *a4, int16x8_t *a5, int16x8_t *a6, int16x8_t *a7) { const int16x8_t b0 = vaddq_s16(*a0, *a1); const int16x8_t b1 = vsubq_s16(*a0, *a1); const int16x8_t b2 = vaddq_s16(*a2, *a3); const int16x8_t b3 = vsubq_s16(*a2, *a3); const int16x8_t b4 = vaddq_s16(*a4, *a5); const int16x8_t b5 = vsubq_s16(*a4, *a5); const int16x8_t b6 = vaddq_s16(*a6, *a7); const int16x8_t b7 = vsubq_s16(*a6, *a7); const int16x8_t c0 = vaddq_s16(b0, b2); const int16x8_t c1 = vaddq_s16(b1, b3); const int16x8_t c2 = vsubq_s16(b0, b2); const int16x8_t c3 = vsubq_s16(b1, b3); const int16x8_t c4 = vaddq_s16(b4, b6); const int16x8_t c5 = vaddq_s16(b5, b7); const int16x8_t c6 = vsubq_s16(b4, b6); const int16x8_t c7 = vsubq_s16(b5, b7); *a0 = vaddq_s16(c0, c4); *a1 = vsubq_s16(c2, c6); *a2 = vsubq_s16(c0, c4); *a3 = vaddq_s16(c2, c6); *a4 = vaddq_s16(c3, c7); *a5 = vsubq_s16(c3, c7); *a6 = vsubq_s16(c1, c5); *a7 = vaddq_s16(c1, c5); } void vpx_hadamard_8x8_neon(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int16x8_t a0 = vld1q_s16(src_diff); int16x8_t a1 = vld1q_s16(src_diff + src_stride); int16x8_t a2 = vld1q_s16(src_diff + 2 * src_stride); int16x8_t a3 = vld1q_s16(src_diff + 3 * src_stride); int16x8_t a4 = vld1q_s16(src_diff + 4 * src_stride); int16x8_t a5 = vld1q_s16(src_diff + 5 * src_stride); int16x8_t a6 = vld1q_s16(src_diff + 6 * src_stride); int16x8_t a7 = vld1q_s16(src_diff + 7 * src_stride); hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); transpose_s16_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); hadamard8x8_one_pass(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); // Skip the second transpose because it is not required. store_s16q_to_tran_low(coeff + 0, a0); store_s16q_to_tran_low(coeff + 8, a1); store_s16q_to_tran_low(coeff + 16, a2); store_s16q_to_tran_low(coeff + 24, a3); store_s16q_to_tran_low(coeff + 32, a4); store_s16q_to_tran_low(coeff + 40, a5); store_s16q_to_tran_low(coeff + 48, a6); store_s16q_to_tran_low(coeff + 56, a7); } void vpx_hadamard_16x16_neon(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int i; /* Rearrange 16x16 to 8x32 and remove stride. * Top left first. */ vpx_hadamard_8x8_neon(src_diff + 0 + 0 * src_stride, src_stride, coeff + 0); /* Top right. */ vpx_hadamard_8x8_neon(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64); /* Bottom left. */ vpx_hadamard_8x8_neon(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128); /* Bottom right. */ vpx_hadamard_8x8_neon(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192); for (i = 0; i < 64; i += 8) { const int16x8_t a0 = load_tran_low_to_s16q(coeff + 0); const int16x8_t a1 = load_tran_low_to_s16q(coeff + 64); const int16x8_t a2 = load_tran_low_to_s16q(coeff + 128); const int16x8_t a3 = load_tran_low_to_s16q(coeff + 192); const int16x8_t b0 = vhaddq_s16(a0, a1); const int16x8_t b1 = vhsubq_s16(a0, a1); const int16x8_t b2 = vhaddq_s16(a2, a3); const int16x8_t b3 = vhsubq_s16(a2, a3); const int16x8_t c0 = vaddq_s16(b0, b2); const int16x8_t c1 = vaddq_s16(b1, b3); const int16x8_t c2 = vsubq_s16(b0, b2); const int16x8_t c3 = vsubq_s16(b1, b3); store_s16q_to_tran_low(coeff + 0, c0); store_s16q_to_tran_low(coeff + 64, c1); store_s16q_to_tran_low(coeff + 128, c2); store_s16q_to_tran_low(coeff + 192, c3); coeff += 8; } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct16x16_add_neon.c000066400000000000000000001657771357355204000222540ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE int32x4_t dct_const_round_shift_high_4(const int64x2x2_t in) { int32x2x2_t t32; t32.val[0] = vrshrn_n_s64(in.val[0], DCT_CONST_BITS); t32.val[1] = vrshrn_n_s64(in.val[1], DCT_CONST_BITS); return vcombine_s32(t32.val[0], t32.val[1]); } static INLINE void dct_const_round_shift_high_4_dual( const int64x2x2_t *const in, int32x4_t *const d0, int32x4_t *const d1) { *d0 = dct_const_round_shift_high_4(in[0]); *d1 = dct_const_round_shift_high_4(in[1]); } static INLINE int32x4x2_t dct_const_round_shift_high_4x2_int64x2x2(const int64x2x2_t *const in) { int32x4x2_t out; out.val[0] = dct_const_round_shift_high_4(in[0]); out.val[1] = dct_const_round_shift_high_4(in[1]); return out; } static INLINE void dct_const_round_shift_high_4x2x2(const int64x2x2_t *const in, int32x4x2_t *const d0, int32x4x2_t *const d1) { *d0 = dct_const_round_shift_high_4x2_int64x2x2(in + 0); *d1 = dct_const_round_shift_high_4x2_int64x2x2(in + 2); } static INLINE void highbd_idct_cospi_2_30(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_2_30_10_22, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_low_s32(cospi_2_30_10_22), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_low_s32(cospi_2_30_10_22), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_low_s32(cospi_2_30_10_22), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_low_s32(cospi_2_30_10_22), 1); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_low_s32(cospi_2_30_10_22), 1); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_low_s32(cospi_2_30_10_22), 1); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_low_s32(cospi_2_30_10_22), 1); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_low_s32(cospi_2_30_10_22), 1); t[0].val[0] = vmlsl_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_low_s32(cospi_2_30_10_22), 0); t[0].val[1] = vmlsl_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_low_s32(cospi_2_30_10_22), 0); t[1].val[0] = vmlsl_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_low_s32(cospi_2_30_10_22), 0); t[1].val[1] = vmlsl_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_low_s32(cospi_2_30_10_22), 0); t[2].val[0] = vmlal_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_low_s32(cospi_2_30_10_22), 0); t[2].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_low_s32(cospi_2_30_10_22), 0); t[3].val[0] = vmlal_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_low_s32(cospi_2_30_10_22), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_2_30_10_22), 0); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_4_28(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_4_12_20N_28, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_high_s32(cospi_4_12_20N_28), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_high_s32(cospi_4_12_20N_28), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_high_s32(cospi_4_12_20N_28), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_high_s32(cospi_4_12_20N_28), 1); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_high_s32(cospi_4_12_20N_28), 1); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_high_s32(cospi_4_12_20N_28), 1); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_high_s32(cospi_4_12_20N_28), 1); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_high_s32(cospi_4_12_20N_28), 1); t[0].val[0] = vmlsl_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_low_s32(cospi_4_12_20N_28), 0); t[0].val[1] = vmlsl_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_low_s32(cospi_4_12_20N_28), 0); t[1].val[0] = vmlsl_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_low_s32(cospi_4_12_20N_28), 0); t[1].val[1] = vmlsl_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_low_s32(cospi_4_12_20N_28), 0); t[2].val[0] = vmlal_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_low_s32(cospi_4_12_20N_28), 0); t[2].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_low_s32(cospi_4_12_20N_28), 0); t[3].val[0] = vmlal_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_low_s32(cospi_4_12_20N_28), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_4_12_20N_28), 0); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_6_26(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_6_26N_14_18N, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_low_s32(cospi_6_26N_14_18N), 0); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_low_s32(cospi_6_26N_14_18N), 0); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_low_s32(cospi_6_26N_14_18N), 0); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_low_s32(cospi_6_26N_14_18N), 0); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_low_s32(cospi_6_26N_14_18N), 0); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_low_s32(cospi_6_26N_14_18N), 0); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_low_s32(cospi_6_26N_14_18N), 0); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_low_s32(cospi_6_26N_14_18N), 0); t[0].val[0] = vmlal_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_low_s32(cospi_6_26N_14_18N), 1); t[0].val[1] = vmlal_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_low_s32(cospi_6_26N_14_18N), 1); t[1].val[0] = vmlal_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_low_s32(cospi_6_26N_14_18N), 1); t[1].val[1] = vmlal_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_low_s32(cospi_6_26N_14_18N), 1); t[2].val[0] = vmlsl_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_low_s32(cospi_6_26N_14_18N), 1); t[2].val[1] = vmlsl_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_low_s32(cospi_6_26N_14_18N), 1); t[3].val[0] = vmlsl_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_low_s32(cospi_6_26N_14_18N), 1); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_6_26N_14_18N), 1); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_10_22(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_2_30_10_22, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_high_s32(cospi_2_30_10_22), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_high_s32(cospi_2_30_10_22), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_high_s32(cospi_2_30_10_22), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_high_s32(cospi_2_30_10_22), 1); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_high_s32(cospi_2_30_10_22), 1); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_high_s32(cospi_2_30_10_22), 1); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_high_s32(cospi_2_30_10_22), 1); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_high_s32(cospi_2_30_10_22), 1); t[0].val[0] = vmlsl_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_high_s32(cospi_2_30_10_22), 0); t[0].val[1] = vmlsl_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_high_s32(cospi_2_30_10_22), 0); t[1].val[0] = vmlsl_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_high_s32(cospi_2_30_10_22), 0); t[1].val[1] = vmlsl_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_high_s32(cospi_2_30_10_22), 0); t[2].val[0] = vmlal_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_high_s32(cospi_2_30_10_22), 0); t[2].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_high_s32(cospi_2_30_10_22), 0); t[3].val[0] = vmlal_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_high_s32(cospi_2_30_10_22), 0); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_2_30_10_22), 0); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_12_20(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_4_12_20N_28, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_low_s32(cospi_4_12_20N_28), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_low_s32(cospi_4_12_20N_28), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_low_s32(cospi_4_12_20N_28), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_low_s32(cospi_4_12_20N_28), 1); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_low_s32(cospi_4_12_20N_28), 1); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_low_s32(cospi_4_12_20N_28), 1); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_low_s32(cospi_4_12_20N_28), 1); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_low_s32(cospi_4_12_20N_28), 1); t[0].val[0] = vmlal_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_high_s32(cospi_4_12_20N_28), 0); t[0].val[1] = vmlal_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_high_s32(cospi_4_12_20N_28), 0); t[1].val[0] = vmlal_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_high_s32(cospi_4_12_20N_28), 0); t[1].val[1] = vmlal_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_high_s32(cospi_4_12_20N_28), 0); t[2].val[0] = vmlsl_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_high_s32(cospi_4_12_20N_28), 0); t[2].val[1] = vmlsl_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_high_s32(cospi_4_12_20N_28), 0); t[3].val[0] = vmlsl_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_high_s32(cospi_4_12_20N_28), 0); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_4_12_20N_28), 0); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_14_18(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_6_26N_14_18N, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_high_s32(cospi_6_26N_14_18N), 0); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_high_s32(cospi_6_26N_14_18N), 0); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_high_s32(cospi_6_26N_14_18N), 0); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_high_s32(cospi_6_26N_14_18N), 0); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_high_s32(cospi_6_26N_14_18N), 0); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_high_s32(cospi_6_26N_14_18N), 0); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_high_s32(cospi_6_26N_14_18N), 0); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_high_s32(cospi_6_26N_14_18N), 0); t[0].val[0] = vmlal_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_high_s32(cospi_6_26N_14_18N), 1); t[0].val[1] = vmlal_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_high_s32(cospi_6_26N_14_18N), 1); t[1].val[0] = vmlal_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_high_s32(cospi_6_26N_14_18N), 1); t[1].val[1] = vmlal_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_high_s32(cospi_6_26N_14_18N), 1); t[2].val[0] = vmlsl_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_high_s32(cospi_6_26N_14_18N), 1); t[2].val[1] = vmlsl_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_high_s32(cospi_6_26N_14_18N), 1); t[3].val[0] = vmlsl_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_high_s32(cospi_6_26N_14_18N), 1); t[3].val[1] = vmlsl_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_6_26N_14_18N), 1); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_q_kernel( const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_0_8_16_24, int64x2x2_t *const t) { t[0].val[0] = vmull_lane_s32(vget_low_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 1); t[2].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_high_s32(cospi_0_8_16_24), 1); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_high_s32(cospi_0_8_16_24), 1); t[3].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_high_s32(cospi_0_8_16_24), 1); t[3].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_high_s32(cospi_0_8_16_24), 1); t[0].val[0] = vmlsl_lane_s32(t[0].val[0], vget_low_s32(s1.val[0]), vget_low_s32(cospi_0_8_16_24), 1); t[0].val[1] = vmlsl_lane_s32(t[0].val[1], vget_high_s32(s1.val[0]), vget_low_s32(cospi_0_8_16_24), 1); t[1].val[0] = vmlsl_lane_s32(t[1].val[0], vget_low_s32(s1.val[1]), vget_low_s32(cospi_0_8_16_24), 1); t[1].val[1] = vmlsl_lane_s32(t[1].val[1], vget_high_s32(s1.val[1]), vget_low_s32(cospi_0_8_16_24), 1); t[2].val[0] = vmlal_lane_s32(t[2].val[0], vget_low_s32(s0.val[0]), vget_low_s32(cospi_0_8_16_24), 1); t[2].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0.val[0]), vget_low_s32(cospi_0_8_16_24), 1); t[3].val[0] = vmlal_lane_s32(t[3].val[0], vget_low_s32(s0.val[1]), vget_low_s32(cospi_0_8_16_24), 1); t[3].val[1] = vmlal_lane_s32(t[3].val[1], vget_high_s32(s0.val[1]), vget_low_s32(cospi_0_8_16_24), 1); } static INLINE void highbd_idct_cospi_8_24_d_kernel( const int32x4_t s0, const int32x4_t s1, const int32x4_t cospi_0_8_16_24, int64x2x2_t *const t) { t[0].val[0] = vmull_lane_s32(vget_low_s32(s0), vget_high_s32(cospi_0_8_16_24), 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s0), vget_high_s32(cospi_0_8_16_24), 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s1), vget_high_s32(cospi_0_8_16_24), 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s1), vget_high_s32(cospi_0_8_16_24), 1); t[0].val[0] = vmlsl_lane_s32(t[0].val[0], vget_low_s32(s1), vget_low_s32(cospi_0_8_16_24), 1); t[0].val[1] = vmlsl_lane_s32(t[0].val[1], vget_high_s32(s1), vget_low_s32(cospi_0_8_16_24), 1); t[1].val[0] = vmlal_lane_s32(t[1].val[0], vget_low_s32(s0), vget_low_s32(cospi_0_8_16_24), 1); t[1].val[1] = vmlal_lane_s32(t[1].val[1], vget_high_s32(s0), vget_low_s32(cospi_0_8_16_24), 1); } static INLINE void highbd_idct_cospi_8_24_q(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_0_8_16_24, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; highbd_idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_d(const int32x4_t s0, const int32x4_t s1, const int32x4_t cospi_0_8_16_24, int32x4_t *const d0, int32x4_t *const d1) { int64x2x2_t t[2]; highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t); dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_neg_q(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_0_8_16_24, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[4]; highbd_idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t); t[2].val[0] = vsubq_s64(vdupq_n_s64(0), t[2].val[0]); t[2].val[1] = vsubq_s64(vdupq_n_s64(0), t[2].val[1]); t[3].val[0] = vsubq_s64(vdupq_n_s64(0), t[3].val[0]); t[3].val[1] = vsubq_s64(vdupq_n_s64(0), t[3].val[1]); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_8_24_neg_d(const int32x4_t s0, const int32x4_t s1, const int32x4_t cospi_0_8_16_24, int32x4_t *const d0, int32x4_t *const d1) { int64x2x2_t t[2]; highbd_idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t); t[1].val[0] = vsubq_s64(vdupq_n_s64(0), t[1].val[0]); t[1].val[1] = vsubq_s64(vdupq_n_s64(0), t[1].val[1]); dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct_cospi_16_16_q(const int32x4x2_t s0, const int32x4x2_t s1, const int32x4_t cospi_0_8_16_24, int32x4x2_t *const d0, int32x4x2_t *const d1) { int64x2x2_t t[6]; t[4].val[0] = vmull_lane_s32(vget_low_s32(s1.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[4].val[1] = vmull_lane_s32(vget_high_s32(s1.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[5].val[0] = vmull_lane_s32(vget_low_s32(s1.val[1]), vget_high_s32(cospi_0_8_16_24), 0); t[5].val[1] = vmull_lane_s32(vget_high_s32(s1.val[1]), vget_high_s32(cospi_0_8_16_24), 0); t[0].val[0] = vmlsl_lane_s32(t[4].val[0], vget_low_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[0].val[1] = vmlsl_lane_s32(t[4].val[1], vget_high_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[1].val[0] = vmlsl_lane_s32(t[5].val[0], vget_low_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 0); t[1].val[1] = vmlsl_lane_s32(t[5].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 0); t[2].val[0] = vmlal_lane_s32(t[4].val[0], vget_low_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[2].val[1] = vmlal_lane_s32(t[4].val[1], vget_high_s32(s0.val[0]), vget_high_s32(cospi_0_8_16_24), 0); t[3].val[0] = vmlal_lane_s32(t[5].val[0], vget_low_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 0); t[3].val[1] = vmlal_lane_s32(t[5].val[1], vget_high_s32(s0.val[1]), vget_high_s32(cospi_0_8_16_24), 0); dct_const_round_shift_high_4x2x2(t, d0, d1); } static INLINE void highbd_idct_cospi_16_16_d(const int32x4_t s0, const int32x4_t s1, const int32x4_t cospi_0_8_16_24, int32x4_t *const d0, int32x4_t *const d1) { int64x2x2_t t[3]; t[2].val[0] = vmull_lane_s32(vget_low_s32(s1), vget_high_s32(cospi_0_8_16_24), 0); t[2].val[1] = vmull_lane_s32(vget_high_s32(s1), vget_high_s32(cospi_0_8_16_24), 0); t[0].val[0] = vmlsl_lane_s32(t[2].val[0], vget_low_s32(s0), vget_high_s32(cospi_0_8_16_24), 0); t[0].val[1] = vmlsl_lane_s32(t[2].val[1], vget_high_s32(s0), vget_high_s32(cospi_0_8_16_24), 0); t[1].val[0] = vmlal_lane_s32(t[2].val[0], vget_low_s32(s0), vget_high_s32(cospi_0_8_16_24), 0); t[1].val[1] = vmlal_lane_s32(t[2].val[1], vget_high_s32(s0), vget_high_s32(cospi_0_8_16_24), 0); dct_const_round_shift_high_4_dual(t, d0, d1); } static INLINE void highbd_idct16x16_add_stage7_dual( const int32x4x2_t *const step2, int32x4x2_t *const out) { out[0].val[0] = vaddq_s32(step2[0].val[0], step2[15].val[0]); out[0].val[1] = vaddq_s32(step2[0].val[1], step2[15].val[1]); out[1].val[0] = vaddq_s32(step2[1].val[0], step2[14].val[0]); out[1].val[1] = vaddq_s32(step2[1].val[1], step2[14].val[1]); out[2].val[0] = vaddq_s32(step2[2].val[0], step2[13].val[0]); out[2].val[1] = vaddq_s32(step2[2].val[1], step2[13].val[1]); out[3].val[0] = vaddq_s32(step2[3].val[0], step2[12].val[0]); out[3].val[1] = vaddq_s32(step2[3].val[1], step2[12].val[1]); out[4].val[0] = vaddq_s32(step2[4].val[0], step2[11].val[0]); out[4].val[1] = vaddq_s32(step2[4].val[1], step2[11].val[1]); out[5].val[0] = vaddq_s32(step2[5].val[0], step2[10].val[0]); out[5].val[1] = vaddq_s32(step2[5].val[1], step2[10].val[1]); out[6].val[0] = vaddq_s32(step2[6].val[0], step2[9].val[0]); out[6].val[1] = vaddq_s32(step2[6].val[1], step2[9].val[1]); out[7].val[0] = vaddq_s32(step2[7].val[0], step2[8].val[0]); out[7].val[1] = vaddq_s32(step2[7].val[1], step2[8].val[1]); out[8].val[0] = vsubq_s32(step2[7].val[0], step2[8].val[0]); out[8].val[1] = vsubq_s32(step2[7].val[1], step2[8].val[1]); out[9].val[0] = vsubq_s32(step2[6].val[0], step2[9].val[0]); out[9].val[1] = vsubq_s32(step2[6].val[1], step2[9].val[1]); out[10].val[0] = vsubq_s32(step2[5].val[0], step2[10].val[0]); out[10].val[1] = vsubq_s32(step2[5].val[1], step2[10].val[1]); out[11].val[0] = vsubq_s32(step2[4].val[0], step2[11].val[0]); out[11].val[1] = vsubq_s32(step2[4].val[1], step2[11].val[1]); out[12].val[0] = vsubq_s32(step2[3].val[0], step2[12].val[0]); out[12].val[1] = vsubq_s32(step2[3].val[1], step2[12].val[1]); out[13].val[0] = vsubq_s32(step2[2].val[0], step2[13].val[0]); out[13].val[1] = vsubq_s32(step2[2].val[1], step2[13].val[1]); out[14].val[0] = vsubq_s32(step2[1].val[0], step2[14].val[0]); out[14].val[1] = vsubq_s32(step2[1].val[1], step2[14].val[1]); out[15].val[0] = vsubq_s32(step2[0].val[0], step2[15].val[0]); out[15].val[1] = vsubq_s32(step2[0].val[1], step2[15].val[1]); } static INLINE void highbd_idct16x16_add_stage7(const int32x4_t *const step2, int32x4_t *const out) { out[0] = vaddq_s32(step2[0], step2[15]); out[1] = vaddq_s32(step2[1], step2[14]); out[2] = vaddq_s32(step2[2], step2[13]); out[3] = vaddq_s32(step2[3], step2[12]); out[4] = vaddq_s32(step2[4], step2[11]); out[5] = vaddq_s32(step2[5], step2[10]); out[6] = vaddq_s32(step2[6], step2[9]); out[7] = vaddq_s32(step2[7], step2[8]); out[8] = vsubq_s32(step2[7], step2[8]); out[9] = vsubq_s32(step2[6], step2[9]); out[10] = vsubq_s32(step2[5], step2[10]); out[11] = vsubq_s32(step2[4], step2[11]); out[12] = vsubq_s32(step2[3], step2[12]); out[13] = vsubq_s32(step2[2], step2[13]); out[14] = vsubq_s32(step2[1], step2[14]); out[15] = vsubq_s32(step2[0], step2[15]); } void vpx_highbd_idct16x16_256_add_half1d(const int32_t *input, int32_t *output, uint16_t *dest, const int stride, const int bd) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); const int32x4_t cospi_6_26N_14_18N = vld1q_s32(kCospi32 + 12); int32x4x2_t in[16], step1[16], step2[16], out[16]; // Load input (16x8) in[0].val[0] = vld1q_s32(input); in[0].val[1] = vld1q_s32(input + 4); input += 8; in[8].val[0] = vld1q_s32(input); in[8].val[1] = vld1q_s32(input + 4); input += 8; in[1].val[0] = vld1q_s32(input); in[1].val[1] = vld1q_s32(input + 4); input += 8; in[9].val[0] = vld1q_s32(input); in[9].val[1] = vld1q_s32(input + 4); input += 8; in[2].val[0] = vld1q_s32(input); in[2].val[1] = vld1q_s32(input + 4); input += 8; in[10].val[0] = vld1q_s32(input); in[10].val[1] = vld1q_s32(input + 4); input += 8; in[3].val[0] = vld1q_s32(input); in[3].val[1] = vld1q_s32(input + 4); input += 8; in[11].val[0] = vld1q_s32(input); in[11].val[1] = vld1q_s32(input + 4); input += 8; in[4].val[0] = vld1q_s32(input); in[4].val[1] = vld1q_s32(input + 4); input += 8; in[12].val[0] = vld1q_s32(input); in[12].val[1] = vld1q_s32(input + 4); input += 8; in[5].val[0] = vld1q_s32(input); in[5].val[1] = vld1q_s32(input + 4); input += 8; in[13].val[0] = vld1q_s32(input); in[13].val[1] = vld1q_s32(input + 4); input += 8; in[6].val[0] = vld1q_s32(input); in[6].val[1] = vld1q_s32(input + 4); input += 8; in[14].val[0] = vld1q_s32(input); in[14].val[1] = vld1q_s32(input + 4); input += 8; in[7].val[0] = vld1q_s32(input); in[7].val[1] = vld1q_s32(input + 4); input += 8; in[15].val[0] = vld1q_s32(input); in[15].val[1] = vld1q_s32(input + 4); // Transpose transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s32_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); // stage 1 step1[0] = in[0 / 2]; step1[1] = in[16 / 2]; step1[2] = in[8 / 2]; step1[3] = in[24 / 2]; step1[4] = in[4 / 2]; step1[5] = in[20 / 2]; step1[6] = in[12 / 2]; step1[7] = in[28 / 2]; step1[8] = in[2 / 2]; step1[9] = in[18 / 2]; step1[10] = in[10 / 2]; step1[11] = in[26 / 2]; step1[12] = in[6 / 2]; step1[13] = in[22 / 2]; step1[14] = in[14 / 2]; step1[15] = in[30 / 2]; // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; highbd_idct_cospi_2_30(step1[8], step1[15], cospi_2_30_10_22, &step2[8], &step2[15]); highbd_idct_cospi_14_18(step1[9], step1[14], cospi_6_26N_14_18N, &step2[9], &step2[14]); highbd_idct_cospi_10_22(step1[10], step1[13], cospi_2_30_10_22, &step2[10], &step2[13]); highbd_idct_cospi_6_26(step1[11], step1[12], cospi_6_26N_14_18N, &step2[11], &step2[12]); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; highbd_idct_cospi_4_28(step2[4], step2[7], cospi_4_12_20N_28, &step1[4], &step1[7]); highbd_idct_cospi_12_20(step2[5], step2[6], cospi_4_12_20N_28, &step1[5], &step1[6]); step1[8].val[0] = vaddq_s32(step2[8].val[0], step2[9].val[0]); step1[8].val[1] = vaddq_s32(step2[8].val[1], step2[9].val[1]); step1[9].val[0] = vsubq_s32(step2[8].val[0], step2[9].val[0]); step1[9].val[1] = vsubq_s32(step2[8].val[1], step2[9].val[1]); step1[10].val[0] = vsubq_s32(step2[11].val[0], step2[10].val[0]); step1[10].val[1] = vsubq_s32(step2[11].val[1], step2[10].val[1]); step1[11].val[0] = vaddq_s32(step2[11].val[0], step2[10].val[0]); step1[11].val[1] = vaddq_s32(step2[11].val[1], step2[10].val[1]); step1[12].val[0] = vaddq_s32(step2[12].val[0], step2[13].val[0]); step1[12].val[1] = vaddq_s32(step2[12].val[1], step2[13].val[1]); step1[13].val[0] = vsubq_s32(step2[12].val[0], step2[13].val[0]); step1[13].val[1] = vsubq_s32(step2[12].val[1], step2[13].val[1]); step1[14].val[0] = vsubq_s32(step2[15].val[0], step2[14].val[0]); step1[14].val[1] = vsubq_s32(step2[15].val[1], step2[14].val[1]); step1[15].val[0] = vaddq_s32(step2[15].val[0], step2[14].val[0]); step1[15].val[1] = vaddq_s32(step2[15].val[1], step2[14].val[1]); // stage 4 highbd_idct_cospi_16_16_q(step1[1], step1[0], cospi_0_8_16_24, &step2[1], &step2[0]); highbd_idct_cospi_8_24_q(step1[2], step1[3], cospi_0_8_16_24, &step2[2], &step2[3]); step2[4].val[0] = vaddq_s32(step1[4].val[0], step1[5].val[0]); step2[4].val[1] = vaddq_s32(step1[4].val[1], step1[5].val[1]); step2[5].val[0] = vsubq_s32(step1[4].val[0], step1[5].val[0]); step2[5].val[1] = vsubq_s32(step1[4].val[1], step1[5].val[1]); step2[6].val[0] = vsubq_s32(step1[7].val[0], step1[6].val[0]); step2[6].val[1] = vsubq_s32(step1[7].val[1], step1[6].val[1]); step2[7].val[0] = vaddq_s32(step1[7].val[0], step1[6].val[0]); step2[7].val[1] = vaddq_s32(step1[7].val[1], step1[6].val[1]); step2[8] = step1[8]; highbd_idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); highbd_idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0].val[0] = vaddq_s32(step2[0].val[0], step2[3].val[0]); step1[0].val[1] = vaddq_s32(step2[0].val[1], step2[3].val[1]); step1[1].val[0] = vaddq_s32(step2[1].val[0], step2[2].val[0]); step1[1].val[1] = vaddq_s32(step2[1].val[1], step2[2].val[1]); step1[2].val[0] = vsubq_s32(step2[1].val[0], step2[2].val[0]); step1[2].val[1] = vsubq_s32(step2[1].val[1], step2[2].val[1]); step1[3].val[0] = vsubq_s32(step2[0].val[0], step2[3].val[0]); step1[3].val[1] = vsubq_s32(step2[0].val[1], step2[3].val[1]); step1[4] = step2[4]; highbd_idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8].val[0] = vaddq_s32(step2[8].val[0], step2[11].val[0]); step1[8].val[1] = vaddq_s32(step2[8].val[1], step2[11].val[1]); step1[9].val[0] = vaddq_s32(step2[9].val[0], step2[10].val[0]); step1[9].val[1] = vaddq_s32(step2[9].val[1], step2[10].val[1]); step1[10].val[0] = vsubq_s32(step2[9].val[0], step2[10].val[0]); step1[10].val[1] = vsubq_s32(step2[9].val[1], step2[10].val[1]); step1[11].val[0] = vsubq_s32(step2[8].val[0], step2[11].val[0]); step1[11].val[1] = vsubq_s32(step2[8].val[1], step2[11].val[1]); step1[12].val[0] = vsubq_s32(step2[15].val[0], step2[12].val[0]); step1[12].val[1] = vsubq_s32(step2[15].val[1], step2[12].val[1]); step1[13].val[0] = vsubq_s32(step2[14].val[0], step2[13].val[0]); step1[13].val[1] = vsubq_s32(step2[14].val[1], step2[13].val[1]); step1[14].val[0] = vaddq_s32(step2[14].val[0], step2[13].val[0]); step1[14].val[1] = vaddq_s32(step2[14].val[1], step2[13].val[1]); step1[15].val[0] = vaddq_s32(step2[15].val[0], step2[12].val[0]); step1[15].val[1] = vaddq_s32(step2[15].val[1], step2[12].val[1]); // stage 6 step2[0].val[0] = vaddq_s32(step1[0].val[0], step1[7].val[0]); step2[0].val[1] = vaddq_s32(step1[0].val[1], step1[7].val[1]); step2[1].val[0] = vaddq_s32(step1[1].val[0], step1[6].val[0]); step2[1].val[1] = vaddq_s32(step1[1].val[1], step1[6].val[1]); step2[2].val[0] = vaddq_s32(step1[2].val[0], step1[5].val[0]); step2[2].val[1] = vaddq_s32(step1[2].val[1], step1[5].val[1]); step2[3].val[0] = vaddq_s32(step1[3].val[0], step1[4].val[0]); step2[3].val[1] = vaddq_s32(step1[3].val[1], step1[4].val[1]); step2[4].val[0] = vsubq_s32(step1[3].val[0], step1[4].val[0]); step2[4].val[1] = vsubq_s32(step1[3].val[1], step1[4].val[1]); step2[5].val[0] = vsubq_s32(step1[2].val[0], step1[5].val[0]); step2[5].val[1] = vsubq_s32(step1[2].val[1], step1[5].val[1]); step2[6].val[0] = vsubq_s32(step1[1].val[0], step1[6].val[0]); step2[6].val[1] = vsubq_s32(step1[1].val[1], step1[6].val[1]); step2[7].val[0] = vsubq_s32(step1[0].val[0], step1[7].val[0]); step2[7].val[1] = vsubq_s32(step1[0].val[1], step1[7].val[1]); highbd_idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); highbd_idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 highbd_idct16x16_add_stage7_dual(step2, out); if (output) { highbd_idct16x16_store_pass1(out, output); } else { highbd_idct16x16_add_store(out, dest, stride, bd); } } static INLINE int32x4x2_t highbd_idct_cospi_lane0_dual(const int32x4x2_t s, const int32x2_t coef) { int64x2x2_t t[2]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s.val[0]), coef, 0); t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 0); t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 0); t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 0); return dct_const_round_shift_high_4x2_int64x2x2(t); } static INLINE int32x4_t highbd_idct_cospi_lane0(const int32x4_t s, const int32x2_t coef) { int64x2x2_t t; t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 0); t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 0); return dct_const_round_shift_high_4(t); } static INLINE int32x4x2_t highbd_idct_cospi_lane1_dual(const int32x4x2_t s, const int32x2_t coef) { int64x2x2_t t[2]; t[0].val[0] = vmull_lane_s32(vget_low_s32(s.val[0]), coef, 1); t[0].val[1] = vmull_lane_s32(vget_high_s32(s.val[0]), coef, 1); t[1].val[0] = vmull_lane_s32(vget_low_s32(s.val[1]), coef, 1); t[1].val[1] = vmull_lane_s32(vget_high_s32(s.val[1]), coef, 1); return dct_const_round_shift_high_4x2_int64x2x2(t); } static INLINE int32x4_t highbd_idct_cospi_lane1(const int32x4_t s, const int32x2_t coef) { int64x2x2_t t; t.val[0] = vmull_lane_s32(vget_low_s32(s), coef, 1); t.val[1] = vmull_lane_s32(vget_high_s32(s), coef, 1); return dct_const_round_shift_high_4(t); } static void vpx_highbd_idct16x16_38_add_half1d(const int32_t *input, int32_t *output, uint16_t *dest, const int stride, const int bd) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); const int32x4_t cospi_6_26N_14_18N = vld1q_s32(kCospi32 + 12); int32x4x2_t in[8], step1[16], step2[16], out[16]; // Load input (8x8) in[0].val[0] = vld1q_s32(input); in[0].val[1] = vld1q_s32(input + 4); input += 16; in[1].val[0] = vld1q_s32(input); in[1].val[1] = vld1q_s32(input + 4); input += 16; in[2].val[0] = vld1q_s32(input); in[2].val[1] = vld1q_s32(input + 4); input += 16; in[3].val[0] = vld1q_s32(input); in[3].val[1] = vld1q_s32(input + 4); input += 16; in[4].val[0] = vld1q_s32(input); in[4].val[1] = vld1q_s32(input + 4); input += 16; in[5].val[0] = vld1q_s32(input); in[5].val[1] = vld1q_s32(input + 4); input += 16; in[6].val[0] = vld1q_s32(input); in[6].val[1] = vld1q_s32(input + 4); input += 16; in[7].val[0] = vld1q_s32(input); in[7].val[1] = vld1q_s32(input + 4); // Transpose transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 step1[0] = in[0 / 2]; step1[2] = in[8 / 2]; step1[4] = in[4 / 2]; step1[6] = in[12 / 2]; step1[8] = in[2 / 2]; step1[10] = in[10 / 2]; step1[12] = in[6 / 2]; step1[14] = in[14 / 2]; // 0 in pass 1 // stage 2 step2[0] = step1[0]; step2[2] = step1[2]; step2[4] = step1[4]; step2[6] = step1[6]; step2[8] = highbd_idct_cospi_lane1_dual(step1[8], vget_low_s32(cospi_2_30_10_22)); step2[9] = highbd_idct_cospi_lane1_dual(step1[14], vget_high_s32(cospi_6_26N_14_18N)); step2[10] = highbd_idct_cospi_lane1_dual(step1[10], vget_high_s32(cospi_2_30_10_22)); step2[11] = highbd_idct_cospi_lane1_dual(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[12] = highbd_idct_cospi_lane0_dual(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[13] = highbd_idct_cospi_lane0_dual(step1[10], vget_high_s32(cospi_2_30_10_22)); step2[14] = highbd_idct_cospi_lane0_dual(step1[14], vget_high_s32(cospi_6_26N_14_18N)); step2[15] = highbd_idct_cospi_lane0_dual(step1[8], vget_low_s32(cospi_2_30_10_22)); // stage 3 step1[0] = step2[0]; step1[2] = step2[2]; step1[4] = highbd_idct_cospi_lane1_dual(step2[4], vget_high_s32(cospi_4_12_20N_28)); step1[5] = highbd_idct_cospi_lane0_dual(step2[6], vget_high_s32(cospi_4_12_20N_28)); step1[6] = highbd_idct_cospi_lane1_dual(step2[6], vget_low_s32(cospi_4_12_20N_28)); step1[7] = highbd_idct_cospi_lane0_dual(step2[4], vget_low_s32(cospi_4_12_20N_28)); step1[8] = highbd_idct_add_dual(step2[8], step2[9]); step1[9] = highbd_idct_sub_dual(step2[8], step2[9]); step1[10] = highbd_idct_sub_dual(step2[11], step2[10]); step1[11] = highbd_idct_add_dual(step2[11], step2[10]); step1[12] = highbd_idct_add_dual(step2[12], step2[13]); step1[13] = highbd_idct_sub_dual(step2[12], step2[13]); step1[14] = highbd_idct_sub_dual(step2[15], step2[14]); step1[15] = highbd_idct_add_dual(step2[15], step2[14]); // stage 4 step2[0] = step2[1] = highbd_idct_cospi_lane0_dual(step1[0], vget_high_s32(cospi_0_8_16_24)); step2[2] = highbd_idct_cospi_lane1_dual(step1[2], vget_high_s32(cospi_0_8_16_24)); step2[3] = highbd_idct_cospi_lane1_dual(step1[2], vget_low_s32(cospi_0_8_16_24)); step2[4] = highbd_idct_add_dual(step1[4], step1[5]); step2[5] = highbd_idct_sub_dual(step1[4], step1[5]); step2[6] = highbd_idct_sub_dual(step1[7], step1[6]); step2[7] = highbd_idct_add_dual(step1[7], step1[6]); step2[8] = step1[8]; highbd_idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); highbd_idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = highbd_idct_add_dual(step2[0], step2[3]); step1[1] = highbd_idct_add_dual(step2[1], step2[2]); step1[2] = highbd_idct_sub_dual(step2[1], step2[2]); step1[3] = highbd_idct_sub_dual(step2[0], step2[3]); step1[4] = step2[4]; highbd_idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = highbd_idct_add_dual(step2[8], step2[11]); step1[9] = highbd_idct_add_dual(step2[9], step2[10]); step1[10] = highbd_idct_sub_dual(step2[9], step2[10]); step1[11] = highbd_idct_sub_dual(step2[8], step2[11]); step1[12] = highbd_idct_sub_dual(step2[15], step2[12]); step1[13] = highbd_idct_sub_dual(step2[14], step2[13]); step1[14] = highbd_idct_add_dual(step2[14], step2[13]); step1[15] = highbd_idct_add_dual(step2[15], step2[12]); // stage 6 step2[0] = highbd_idct_add_dual(step1[0], step1[7]); step2[1] = highbd_idct_add_dual(step1[1], step1[6]); step2[2] = highbd_idct_add_dual(step1[2], step1[5]); step2[3] = highbd_idct_add_dual(step1[3], step1[4]); step2[4] = highbd_idct_sub_dual(step1[3], step1[4]); step2[5] = highbd_idct_sub_dual(step1[2], step1[5]); step2[6] = highbd_idct_sub_dual(step1[1], step1[6]); step2[7] = highbd_idct_sub_dual(step1[0], step1[7]); highbd_idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); highbd_idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 highbd_idct16x16_add_stage7_dual(step2, out); if (output) { highbd_idct16x16_store_pass1(out, output); } else { highbd_idct16x16_add_store(out, dest, stride, bd); } } static void highbd_idct16x16_10_add_half1d_pass1(const tran_low_t *input, int32_t *output) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); const int32x4_t cospi_6_26N_14_18N = vld1q_s32(kCospi32 + 12); int32x4_t in[4], step1[16], step2[16], out[16]; // Load input (4x4) in[0] = vld1q_s32(input); input += 16; in[1] = vld1q_s32(input); input += 16; in[2] = vld1q_s32(input); input += 16; in[3] = vld1q_s32(input); // Transpose transpose_s32_4x4(&in[0], &in[1], &in[2], &in[3]); // stage 1 step1[0] = in[0 / 2]; step1[4] = in[4 / 2]; step1[8] = in[2 / 2]; step1[12] = in[6 / 2]; // stage 2 step2[0] = step1[0]; step2[4] = step1[4]; step2[8] = highbd_idct_cospi_lane1(step1[8], vget_low_s32(cospi_2_30_10_22)); step2[11] = highbd_idct_cospi_lane1(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[12] = highbd_idct_cospi_lane0(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[15] = highbd_idct_cospi_lane0(step1[8], vget_low_s32(cospi_2_30_10_22)); // stage 3 step1[0] = step2[0]; step1[4] = highbd_idct_cospi_lane1(step2[4], vget_high_s32(cospi_4_12_20N_28)); step1[7] = highbd_idct_cospi_lane0(step2[4], vget_low_s32(cospi_4_12_20N_28)); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 step2[0] = step2[1] = highbd_idct_cospi_lane0(step1[0], vget_high_s32(cospi_0_8_16_24)); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; step2[8] = step1[8]; highbd_idct_cospi_8_24_d(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); highbd_idct_cospi_8_24_neg_d(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; highbd_idct_cospi_16_16_d(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = vaddq_s32(step2[8], step2[11]); step1[9] = vaddq_s32(step2[9], step2[10]); step1[10] = vsubq_s32(step2[9], step2[10]); step1[11] = vsubq_s32(step2[8], step2[11]); step1[12] = vsubq_s32(step2[15], step2[12]); step1[13] = vsubq_s32(step2[14], step2[13]); step1[14] = vaddq_s32(step2[14], step2[13]); step1[15] = vaddq_s32(step2[15], step2[12]); // stage 6 step2[0] = vaddq_s32(step1[0], step1[7]); step2[1] = vaddq_s32(step1[1], step1[6]); step2[2] = vaddq_s32(step1[2], step1[5]); step2[3] = vaddq_s32(step1[3], step1[4]); step2[4] = vsubq_s32(step1[3], step1[4]); step2[5] = vsubq_s32(step1[2], step1[5]); step2[6] = vsubq_s32(step1[1], step1[6]); step2[7] = vsubq_s32(step1[0], step1[7]); highbd_idct_cospi_16_16_d(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); highbd_idct_cospi_16_16_d(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 highbd_idct16x16_add_stage7(step2, out); // pass 1: save the result into output vst1q_s32(output, out[0]); output += 4; vst1q_s32(output, out[1]); output += 4; vst1q_s32(output, out[2]); output += 4; vst1q_s32(output, out[3]); output += 4; vst1q_s32(output, out[4]); output += 4; vst1q_s32(output, out[5]); output += 4; vst1q_s32(output, out[6]); output += 4; vst1q_s32(output, out[7]); output += 4; vst1q_s32(output, out[8]); output += 4; vst1q_s32(output, out[9]); output += 4; vst1q_s32(output, out[10]); output += 4; vst1q_s32(output, out[11]); output += 4; vst1q_s32(output, out[12]); output += 4; vst1q_s32(output, out[13]); output += 4; vst1q_s32(output, out[14]); output += 4; vst1q_s32(output, out[15]); } static void highbd_idct16x16_10_add_half1d_pass2(const int32_t *input, int32_t *const output, uint16_t *const dest, const int stride, const int bd) { const int32x4_t cospi_0_8_16_24 = vld1q_s32(kCospi32 + 0); const int32x4_t cospi_4_12_20N_28 = vld1q_s32(kCospi32 + 4); const int32x4_t cospi_2_30_10_22 = vld1q_s32(kCospi32 + 8); const int32x4_t cospi_6_26N_14_18N = vld1q_s32(kCospi32 + 12); int32x4x2_t in[4], step1[16], step2[16], out[16]; // Load input (4x8) in[0].val[0] = vld1q_s32(input); input += 4; in[0].val[1] = vld1q_s32(input); input += 4; in[1].val[0] = vld1q_s32(input); input += 4; in[1].val[1] = vld1q_s32(input); input += 4; in[2].val[0] = vld1q_s32(input); input += 4; in[2].val[1] = vld1q_s32(input); input += 4; in[3].val[0] = vld1q_s32(input); input += 4; in[3].val[1] = vld1q_s32(input); // Transpose transpose_s32_4x8(&in[0].val[0], &in[0].val[1], &in[1].val[0], &in[1].val[1], &in[2].val[0], &in[2].val[1], &in[3].val[0], &in[3].val[1]); // stage 1 step1[0] = in[0 / 2]; step1[4] = in[4 / 2]; step1[8] = in[2 / 2]; step1[12] = in[6 / 2]; // stage 2 step2[0] = step1[0]; step2[4] = step1[4]; step2[8] = highbd_idct_cospi_lane1_dual(step1[8], vget_low_s32(cospi_2_30_10_22)); step2[11] = highbd_idct_cospi_lane1_dual(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[12] = highbd_idct_cospi_lane0_dual(step1[12], vget_low_s32(cospi_6_26N_14_18N)); step2[15] = highbd_idct_cospi_lane0_dual(step1[8], vget_low_s32(cospi_2_30_10_22)); // stage 3 step1[0] = step2[0]; step1[4] = highbd_idct_cospi_lane1_dual(step2[4], vget_high_s32(cospi_4_12_20N_28)); step1[7] = highbd_idct_cospi_lane0_dual(step2[4], vget_low_s32(cospi_4_12_20N_28)); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 step2[0] = step2[1] = highbd_idct_cospi_lane0_dual(step1[0], vget_high_s32(cospi_0_8_16_24)); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; step2[8] = step1[8]; highbd_idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); highbd_idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; highbd_idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = highbd_idct_add_dual(step2[8], step2[11]); step1[9] = highbd_idct_add_dual(step2[9], step2[10]); step1[10] = highbd_idct_sub_dual(step2[9], step2[10]); step1[11] = highbd_idct_sub_dual(step2[8], step2[11]); step1[12] = highbd_idct_sub_dual(step2[15], step2[12]); step1[13] = highbd_idct_sub_dual(step2[14], step2[13]); step1[14] = highbd_idct_add_dual(step2[14], step2[13]); step1[15] = highbd_idct_add_dual(step2[15], step2[12]); // stage 6 step2[0] = highbd_idct_add_dual(step1[0], step1[7]); step2[1] = highbd_idct_add_dual(step1[1], step1[6]); step2[2] = highbd_idct_add_dual(step1[2], step1[5]); step2[3] = highbd_idct_add_dual(step1[3], step1[4]); step2[4] = highbd_idct_sub_dual(step1[3], step1[4]); step2[5] = highbd_idct_sub_dual(step1[2], step1[5]); step2[6] = highbd_idct_sub_dual(step1[1], step1[6]); step2[7] = highbd_idct_sub_dual(step1[0], step1[7]); highbd_idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); highbd_idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 highbd_idct16x16_add_stage7_dual(step2, out); if (output) { highbd_idct16x16_store_pass1(out, output); } else { highbd_idct16x16_add_store(out, dest, stride, bd); } } void vpx_highbd_idct16x16_256_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { if (bd == 8) { int16_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_256_add_half1d(input, row_idct_output, dest, stride, 1); // Parallel idct on the lower 8 rows vpx_idct16x16_256_add_half1d(input + 8 * 16, row_idct_output + 8, dest, stride, 1); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_256_add_half1d(row_idct_output, NULL, dest, stride, 1); // Parallel idct to get the right 8 columns vpx_idct16x16_256_add_half1d(row_idct_output + 8 * 16, NULL, dest + 8, stride, 1); } else { int32_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_highbd_idct16x16_256_add_half1d(input, row_idct_output, dest, stride, bd); // Parallel idct on the lower 8 rows vpx_highbd_idct16x16_256_add_half1d(input + 8 * 16, row_idct_output + 8, dest, stride, bd); // pass 2 // Parallel idct to get the left 8 columns vpx_highbd_idct16x16_256_add_half1d(row_idct_output, NULL, dest, stride, bd); // Parallel idct to get the right 8 columns vpx_highbd_idct16x16_256_add_half1d(row_idct_output + 8 * 16, NULL, dest + 8, stride, bd); } } void vpx_highbd_idct16x16_38_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { if (bd == 8) { int16_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_38_add_half1d(input, row_idct_output, dest, stride, 1); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_38_add_half1d(row_idct_output, NULL, dest, stride, 1); // Parallel idct to get the right 8 columns vpx_idct16x16_38_add_half1d(row_idct_output + 16 * 8, NULL, dest + 8, stride, 1); } else { int32_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_highbd_idct16x16_38_add_half1d(input, row_idct_output, dest, stride, bd); // pass 2 // Parallel idct to get the left 8 columns vpx_highbd_idct16x16_38_add_half1d(row_idct_output, NULL, dest, stride, bd); // Parallel idct to get the right 8 columns vpx_highbd_idct16x16_38_add_half1d(row_idct_output + 16 * 8, NULL, dest + 8, stride, bd); } } void vpx_highbd_idct16x16_10_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { if (bd == 8) { int16_t row_idct_output[4 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_10_add_half1d_pass1(input, row_idct_output); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_10_add_half1d_pass2(row_idct_output, NULL, dest, stride, 1); // Parallel idct to get the right 8 columns vpx_idct16x16_10_add_half1d_pass2(row_idct_output + 4 * 8, NULL, dest + 8, stride, 1); } else { int32_t row_idct_output[4 * 16]; // pass 1 // Parallel idct on the upper 8 rows highbd_idct16x16_10_add_half1d_pass1(input, row_idct_output); // pass 2 // Parallel idct to get the left 8 columns highbd_idct16x16_10_add_half1d_pass2(row_idct_output, NULL, dest, stride, bd); // Parallel idct to get the right 8 columns highbd_idct16x16_10_add_half1d_pass2(row_idct_output + 4 * 8, NULL, dest + 8, stride, bd); } } static INLINE void highbd_idct16x16_1_add_pos_kernel(uint16_t **dest, const int stride, const int16x8_t res, const int16x8_t max) { const uint16x8_t a0 = vld1q_u16(*dest + 0); const uint16x8_t a1 = vld1q_u16(*dest + 8); const int16x8_t b0 = vaddq_s16(res, vreinterpretq_s16_u16(a0)); const int16x8_t b1 = vaddq_s16(res, vreinterpretq_s16_u16(a1)); const int16x8_t c0 = vminq_s16(b0, max); const int16x8_t c1 = vminq_s16(b1, max); vst1q_u16(*dest + 0, vreinterpretq_u16_s16(c0)); vst1q_u16(*dest + 8, vreinterpretq_u16_s16(c1)); *dest += stride; } static INLINE void highbd_idct16x16_1_add_neg_kernel(uint16_t **dest, const int stride, const int16x8_t res) { const uint16x8_t a0 = vld1q_u16(*dest + 0); const uint16x8_t a1 = vld1q_u16(*dest + 8); const int16x8_t b0 = vaddq_s16(res, vreinterpretq_s16_u16(a0)); const int16x8_t b1 = vaddq_s16(res, vreinterpretq_s16_u16(a1)); const uint16x8_t c0 = vqshluq_n_s16(b0, 0); const uint16x8_t c1 = vqshluq_n_s16(b1, 0); vst1q_u16(*dest + 0, c0); vst1q_u16(*dest + 8, c1); *dest += stride; } void vpx_highbd_idct16x16_1_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const tran_low_t out0 = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); const tran_low_t out1 = HIGHBD_WRAPLOW( dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); const int16x8_t dc = vdupq_n_s16(a1); int i; if (a1 >= 0) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); for (i = 0; i < 4; ++i) { highbd_idct16x16_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct16x16_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct16x16_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct16x16_1_add_pos_kernel(&dest, stride, dc, max); } } else { for (i = 0; i < 4; ++i) { highbd_idct16x16_1_add_neg_kernel(&dest, stride, dc); highbd_idct16x16_1_add_neg_kernel(&dest, stride, dc); highbd_idct16x16_1_add_neg_kernel(&dest, stride, dc); highbd_idct16x16_1_add_neg_kernel(&dest, stride, dc); } } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct32x32_1024_add_neon.c000066400000000000000000000621641357355204000227000ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void load_from_transformed(const int32_t *const trans_buf, const int first, const int second, int32x4x2_t *const q0, int32x4x2_t *const q1) { q0->val[0] = vld1q_s32(trans_buf + first * 8); q0->val[1] = vld1q_s32(trans_buf + first * 8 + 4); q1->val[0] = vld1q_s32(trans_buf + second * 8); q1->val[1] = vld1q_s32(trans_buf + second * 8 + 4); } static INLINE void load_from_output(const int32_t *const out, const int first, const int second, int32x4x2_t *const q0, int32x4x2_t *const q1) { q0->val[0] = vld1q_s32(out + first * 32); q0->val[1] = vld1q_s32(out + first * 32 + 4); q1->val[0] = vld1q_s32(out + second * 32); q1->val[1] = vld1q_s32(out + second * 32 + 4); } static INLINE void store_in_output(int32_t *const out, const int first, const int second, const int32x4x2_t q0, const int32x4x2_t q1) { vst1q_s32(out + first * 32, q0.val[0]); vst1q_s32(out + first * 32 + 4, q0.val[1]); vst1q_s32(out + second * 32, q1.val[0]); vst1q_s32(out + second * 32 + 4, q1.val[1]); } static INLINE void highbd_store_combine_results( uint16_t *p1, uint16_t *p2, const int stride, const int32x4x2_t q0, const int32x4x2_t q1, const int32x4x2_t q2, const int32x4x2_t q3, const int16x8_t max) { int16x8_t o[4]; uint16x8_t d[4]; d[0] = vld1q_u16(p1); p1 += stride; d[1] = vld1q_u16(p1); d[3] = vld1q_u16(p2); p2 -= stride; d[2] = vld1q_u16(p2); o[0] = vcombine_s16(vrshrn_n_s32(q0.val[0], 6), vrshrn_n_s32(q0.val[1], 6)); o[1] = vcombine_s16(vrshrn_n_s32(q1.val[0], 6), vrshrn_n_s32(q1.val[1], 6)); o[2] = vcombine_s16(vrshrn_n_s32(q2.val[0], 6), vrshrn_n_s32(q2.val[1], 6)); o[3] = vcombine_s16(vrshrn_n_s32(q3.val[0], 6), vrshrn_n_s32(q3.val[1], 6)); o[0] = vqaddq_s16(o[0], vreinterpretq_s16_u16(d[0])); o[1] = vqaddq_s16(o[1], vreinterpretq_s16_u16(d[1])); o[2] = vqaddq_s16(o[2], vreinterpretq_s16_u16(d[2])); o[3] = vqaddq_s16(o[3], vreinterpretq_s16_u16(d[3])); o[0] = vminq_s16(o[0], max); o[1] = vminq_s16(o[1], max); o[2] = vminq_s16(o[2], max); o[3] = vminq_s16(o[3], max); d[0] = vqshluq_n_s16(o[0], 0); d[1] = vqshluq_n_s16(o[1], 0); d[2] = vqshluq_n_s16(o[2], 0); d[3] = vqshluq_n_s16(o[3], 0); vst1q_u16(p1, d[1]); p1 -= stride; vst1q_u16(p1, d[0]); vst1q_u16(p2, d[2]); p2 += stride; vst1q_u16(p2, d[3]); } static INLINE void do_butterfly(const int32x4x2_t qIn0, const int32x4x2_t qIn1, const int32_t first_const, const int32_t second_const, int32x4x2_t *const qOut0, int32x4x2_t *const qOut1) { int64x2x2_t q[4]; int32x2_t d[6]; // Note: using v{mul, mla, mls}l_n_s32 here slows down 35% with gcc 4.9. d[4] = vdup_n_s32(first_const); d[5] = vdup_n_s32(second_const); q[0].val[0] = vmull_s32(vget_low_s32(qIn0.val[0]), d[4]); q[0].val[1] = vmull_s32(vget_high_s32(qIn0.val[0]), d[4]); q[1].val[0] = vmull_s32(vget_low_s32(qIn0.val[1]), d[4]); q[1].val[1] = vmull_s32(vget_high_s32(qIn0.val[1]), d[4]); q[0].val[0] = vmlsl_s32(q[0].val[0], vget_low_s32(qIn1.val[0]), d[5]); q[0].val[1] = vmlsl_s32(q[0].val[1], vget_high_s32(qIn1.val[0]), d[5]); q[1].val[0] = vmlsl_s32(q[1].val[0], vget_low_s32(qIn1.val[1]), d[5]); q[1].val[1] = vmlsl_s32(q[1].val[1], vget_high_s32(qIn1.val[1]), d[5]); q[2].val[0] = vmull_s32(vget_low_s32(qIn0.val[0]), d[5]); q[2].val[1] = vmull_s32(vget_high_s32(qIn0.val[0]), d[5]); q[3].val[0] = vmull_s32(vget_low_s32(qIn0.val[1]), d[5]); q[3].val[1] = vmull_s32(vget_high_s32(qIn0.val[1]), d[5]); q[2].val[0] = vmlal_s32(q[2].val[0], vget_low_s32(qIn1.val[0]), d[4]); q[2].val[1] = vmlal_s32(q[2].val[1], vget_high_s32(qIn1.val[0]), d[4]); q[3].val[0] = vmlal_s32(q[3].val[0], vget_low_s32(qIn1.val[1]), d[4]); q[3].val[1] = vmlal_s32(q[3].val[1], vget_high_s32(qIn1.val[1]), d[4]); qOut0->val[0] = vcombine_s32(vrshrn_n_s64(q[0].val[0], DCT_CONST_BITS), vrshrn_n_s64(q[0].val[1], DCT_CONST_BITS)); qOut0->val[1] = vcombine_s32(vrshrn_n_s64(q[1].val[0], DCT_CONST_BITS), vrshrn_n_s64(q[1].val[1], DCT_CONST_BITS)); qOut1->val[0] = vcombine_s32(vrshrn_n_s64(q[2].val[0], DCT_CONST_BITS), vrshrn_n_s64(q[2].val[1], DCT_CONST_BITS)); qOut1->val[1] = vcombine_s32(vrshrn_n_s64(q[3].val[0], DCT_CONST_BITS), vrshrn_n_s64(q[3].val[1], DCT_CONST_BITS)); } static INLINE void load_s32x4q_dual(const int32_t *in, int32x4x2_t *const s) { s[0].val[0] = vld1q_s32(in); s[0].val[1] = vld1q_s32(in + 4); in += 32; s[1].val[0] = vld1q_s32(in); s[1].val[1] = vld1q_s32(in + 4); in += 32; s[2].val[0] = vld1q_s32(in); s[2].val[1] = vld1q_s32(in + 4); in += 32; s[3].val[0] = vld1q_s32(in); s[3].val[1] = vld1q_s32(in + 4); in += 32; s[4].val[0] = vld1q_s32(in); s[4].val[1] = vld1q_s32(in + 4); in += 32; s[5].val[0] = vld1q_s32(in); s[5].val[1] = vld1q_s32(in + 4); in += 32; s[6].val[0] = vld1q_s32(in); s[6].val[1] = vld1q_s32(in + 4); in += 32; s[7].val[0] = vld1q_s32(in); s[7].val[1] = vld1q_s32(in + 4); } static INLINE void transpose_and_store_s32_8x8(int32x4x2_t *const a, int32_t **out) { transpose_s32_8x8(&a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); vst1q_s32(*out, a[0].val[0]); *out += 4; vst1q_s32(*out, a[0].val[1]); *out += 4; vst1q_s32(*out, a[1].val[0]); *out += 4; vst1q_s32(*out, a[1].val[1]); *out += 4; vst1q_s32(*out, a[2].val[0]); *out += 4; vst1q_s32(*out, a[2].val[1]); *out += 4; vst1q_s32(*out, a[3].val[0]); *out += 4; vst1q_s32(*out, a[3].val[1]); *out += 4; vst1q_s32(*out, a[4].val[0]); *out += 4; vst1q_s32(*out, a[4].val[1]); *out += 4; vst1q_s32(*out, a[5].val[0]); *out += 4; vst1q_s32(*out, a[5].val[1]); *out += 4; vst1q_s32(*out, a[6].val[0]); *out += 4; vst1q_s32(*out, a[6].val[1]); *out += 4; vst1q_s32(*out, a[7].val[0]); *out += 4; vst1q_s32(*out, a[7].val[1]); *out += 4; } static INLINE void idct32_transpose_pair(const int32_t *input, int32_t *t_buf) { int i; int32x4x2_t s[8]; for (i = 0; i < 4; i++, input += 8) { load_s32x4q_dual(input, s); transpose_and_store_s32_8x8(s, &t_buf); } } static INLINE void idct32_bands_end_1st_pass(int32_t *const out, int32x4x2_t *const q) { store_in_output(out, 16, 17, q[6], q[7]); store_in_output(out, 14, 15, q[8], q[9]); load_from_output(out, 30, 31, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); store_in_output(out, 30, 31, q[6], q[7]); store_in_output(out, 0, 1, q[4], q[5]); load_from_output(out, 12, 13, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[10], q[1]); q[3] = highbd_idct_add_dual(q[11], q[0]); q[4] = highbd_idct_sub_dual(q[11], q[0]); q[5] = highbd_idct_sub_dual(q[10], q[1]); load_from_output(out, 18, 19, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); store_in_output(out, 18, 19, q[6], q[7]); store_in_output(out, 12, 13, q[8], q[9]); load_from_output(out, 28, 29, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); store_in_output(out, 28, 29, q[6], q[7]); store_in_output(out, 2, 3, q[4], q[5]); load_from_output(out, 10, 11, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[12], q[1]); q[3] = highbd_idct_add_dual(q[13], q[0]); q[4] = highbd_idct_sub_dual(q[13], q[0]); q[5] = highbd_idct_sub_dual(q[12], q[1]); load_from_output(out, 20, 21, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); store_in_output(out, 20, 21, q[6], q[7]); store_in_output(out, 10, 11, q[8], q[9]); load_from_output(out, 26, 27, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); store_in_output(out, 26, 27, q[6], q[7]); store_in_output(out, 4, 5, q[4], q[5]); load_from_output(out, 8, 9, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[14], q[1]); q[3] = highbd_idct_add_dual(q[15], q[0]); q[4] = highbd_idct_sub_dual(q[15], q[0]); q[5] = highbd_idct_sub_dual(q[14], q[1]); load_from_output(out, 22, 23, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); store_in_output(out, 22, 23, q[6], q[7]); store_in_output(out, 8, 9, q[8], q[9]); load_from_output(out, 24, 25, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); store_in_output(out, 24, 25, q[6], q[7]); store_in_output(out, 6, 7, q[4], q[5]); } static INLINE void idct32_bands_end_2nd_pass(const int32_t *const out, uint16_t *const dest, const int stride, const int16x8_t max, int32x4x2_t *const q) { uint16_t *dest0 = dest + 0 * stride; uint16_t *dest1 = dest + 31 * stride; uint16_t *dest2 = dest + 16 * stride; uint16_t *dest3 = dest + 15 * stride; const int str2 = stride << 1; highbd_store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9], max); dest2 += str2; dest3 -= str2; load_from_output(out, 30, 31, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); highbd_store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7], max); dest0 += str2; dest1 -= str2; load_from_output(out, 12, 13, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[10], q[1]); q[3] = highbd_idct_add_dual(q[11], q[0]); q[4] = highbd_idct_sub_dual(q[11], q[0]); q[5] = highbd_idct_sub_dual(q[10], q[1]); load_from_output(out, 18, 19, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); highbd_store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9], max); dest2 += str2; dest3 -= str2; load_from_output(out, 28, 29, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); highbd_store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7], max); dest0 += str2; dest1 -= str2; load_from_output(out, 10, 11, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[12], q[1]); q[3] = highbd_idct_add_dual(q[13], q[0]); q[4] = highbd_idct_sub_dual(q[13], q[0]); q[5] = highbd_idct_sub_dual(q[12], q[1]); load_from_output(out, 20, 21, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); highbd_store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9], max); dest2 += str2; dest3 -= str2; load_from_output(out, 26, 27, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); highbd_store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7], max); dest0 += str2; dest1 -= str2; load_from_output(out, 8, 9, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[14], q[1]); q[3] = highbd_idct_add_dual(q[15], q[0]); q[4] = highbd_idct_sub_dual(q[15], q[0]); q[5] = highbd_idct_sub_dual(q[14], q[1]); load_from_output(out, 22, 23, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); highbd_store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9], max); load_from_output(out, 24, 25, &q[0], &q[1]); q[4] = highbd_idct_add_dual(q[2], q[1]); q[5] = highbd_idct_add_dual(q[3], q[0]); q[6] = highbd_idct_sub_dual(q[3], q[0]); q[7] = highbd_idct_sub_dual(q[2], q[1]); highbd_store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7], max); } static INLINE void vpx_highbd_idct32_32_neon(const tran_low_t *input, uint16_t *dst, const int stride, const int bd) { int i, idct32_pass_loop; int32_t trans_buf[32 * 8]; int32_t pass1[32 * 32]; int32_t pass2[32 * 32]; int32_t *out; int32x4x2_t q[16]; for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2; idct32_pass_loop++, input = pass1, out = pass2) { for (i = 0; i < 4; i++, out += 8) { // idct32_bands_loop idct32_transpose_pair(input, trans_buf); input += 32 * 8; // ----------------------------------------- // BLOCK A: 16-19,28-31 // ----------------------------------------- // generate 16,17,30,31 // part of stage 1 load_from_transformed(trans_buf, 1, 31, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_31_64, cospi_1_64, &q[0], &q[2]); load_from_transformed(trans_buf, 17, 15, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_15_64, cospi_17_64, &q[1], &q[3]); // part of stage 2 q[4] = highbd_idct_add_dual(q[0], q[1]); q[13] = highbd_idct_sub_dual(q[0], q[1]); q[6] = highbd_idct_add_dual(q[2], q[3]); q[14] = highbd_idct_sub_dual(q[2], q[3]); // part of stage 3 do_butterfly(q[14], q[13], cospi_28_64, cospi_4_64, &q[5], &q[7]); // generate 18,19,28,29 // part of stage 1 load_from_transformed(trans_buf, 9, 23, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_23_64, cospi_9_64, &q[0], &q[2]); load_from_transformed(trans_buf, 25, 7, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_7_64, cospi_25_64, &q[1], &q[3]); // part of stage 2 q[13] = highbd_idct_sub_dual(q[3], q[2]); q[3] = highbd_idct_add_dual(q[3], q[2]); q[14] = highbd_idct_sub_dual(q[1], q[0]); q[2] = highbd_idct_add_dual(q[1], q[0]); // part of stage 3 do_butterfly(q[14], q[13], -cospi_4_64, -cospi_28_64, &q[1], &q[0]); // part of stage 4 q[8] = highbd_idct_add_dual(q[4], q[2]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[10] = highbd_idct_add_dual(q[7], q[1]); q[15] = highbd_idct_add_dual(q[6], q[3]); q[13] = highbd_idct_sub_dual(q[5], q[0]); q[14] = highbd_idct_sub_dual(q[7], q[1]); store_in_output(out, 16, 31, q[8], q[15]); store_in_output(out, 17, 30, q[9], q[10]); // part of stage 5 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[0], &q[1]); store_in_output(out, 29, 18, q[1], q[0]); // part of stage 4 q[13] = highbd_idct_sub_dual(q[4], q[2]); q[14] = highbd_idct_sub_dual(q[6], q[3]); // part of stage 5 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[4], &q[6]); store_in_output(out, 19, 28, q[4], q[6]); // ----------------------------------------- // BLOCK B: 20-23,24-27 // ----------------------------------------- // generate 20,21,26,27 // part of stage 1 load_from_transformed(trans_buf, 5, 27, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_27_64, cospi_5_64, &q[0], &q[2]); load_from_transformed(trans_buf, 21, 11, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_11_64, cospi_21_64, &q[1], &q[3]); // part of stage 2 q[13] = highbd_idct_sub_dual(q[0], q[1]); q[0] = highbd_idct_add_dual(q[0], q[1]); q[14] = highbd_idct_sub_dual(q[2], q[3]); q[2] = highbd_idct_add_dual(q[2], q[3]); // part of stage 3 do_butterfly(q[14], q[13], cospi_12_64, cospi_20_64, &q[1], &q[3]); // generate 22,23,24,25 // part of stage 1 load_from_transformed(trans_buf, 13, 19, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_19_64, cospi_13_64, &q[5], &q[7]); load_from_transformed(trans_buf, 29, 3, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_3_64, cospi_29_64, &q[4], &q[6]); // part of stage 2 q[14] = highbd_idct_sub_dual(q[4], q[5]); q[5] = highbd_idct_add_dual(q[4], q[5]); q[13] = highbd_idct_sub_dual(q[6], q[7]); q[6] = highbd_idct_add_dual(q[6], q[7]); // part of stage 3 do_butterfly(q[14], q[13], -cospi_20_64, -cospi_12_64, &q[4], &q[7]); // part of stage 4 q[10] = highbd_idct_add_dual(q[7], q[1]); q[11] = highbd_idct_add_dual(q[5], q[0]); q[12] = highbd_idct_add_dual(q[6], q[2]); q[15] = highbd_idct_add_dual(q[4], q[3]); // part of stage 6 load_from_output(out, 16, 17, &q[14], &q[13]); q[8] = highbd_idct_add_dual(q[14], q[11]); q[9] = highbd_idct_add_dual(q[13], q[10]); q[13] = highbd_idct_sub_dual(q[13], q[10]); q[11] = highbd_idct_sub_dual(q[14], q[11]); store_in_output(out, 17, 16, q[9], q[8]); load_from_output(out, 30, 31, &q[14], &q[9]); q[8] = highbd_idct_sub_dual(q[9], q[12]); q[10] = highbd_idct_add_dual(q[14], q[15]); q[14] = highbd_idct_sub_dual(q[14], q[15]); q[12] = highbd_idct_add_dual(q[9], q[12]); store_in_output(out, 30, 31, q[10], q[12]); // part of stage 7 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 25, 22, q[14], q[13]); do_butterfly(q[8], q[11], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 24, 23, q[14], q[13]); // part of stage 4 q[14] = highbd_idct_sub_dual(q[5], q[0]); q[13] = highbd_idct_sub_dual(q[6], q[2]); do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[5], &q[6]); q[14] = highbd_idct_sub_dual(q[7], q[1]); q[13] = highbd_idct_sub_dual(q[4], q[3]); do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[0], &q[1]); // part of stage 6 load_from_output(out, 18, 19, &q[14], &q[13]); q[8] = highbd_idct_add_dual(q[14], q[1]); q[9] = highbd_idct_add_dual(q[13], q[6]); q[13] = highbd_idct_sub_dual(q[13], q[6]); q[1] = highbd_idct_sub_dual(q[14], q[1]); store_in_output(out, 18, 19, q[8], q[9]); load_from_output(out, 28, 29, &q[8], &q[9]); q[14] = highbd_idct_sub_dual(q[8], q[5]); q[10] = highbd_idct_add_dual(q[8], q[5]); q[11] = highbd_idct_add_dual(q[9], q[0]); q[0] = highbd_idct_sub_dual(q[9], q[0]); store_in_output(out, 28, 29, q[10], q[11]); // part of stage 7 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 20, 27, q[13], q[14]); do_butterfly(q[0], q[1], cospi_16_64, cospi_16_64, &q[1], &q[0]); store_in_output(out, 21, 26, q[1], q[0]); // ----------------------------------------- // BLOCK C: 8-10,11-15 // ----------------------------------------- // generate 8,9,14,15 // part of stage 2 load_from_transformed(trans_buf, 2, 30, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_30_64, cospi_2_64, &q[0], &q[2]); load_from_transformed(trans_buf, 18, 14, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_14_64, cospi_18_64, &q[1], &q[3]); // part of stage 3 q[13] = highbd_idct_sub_dual(q[0], q[1]); q[0] = highbd_idct_add_dual(q[0], q[1]); q[14] = highbd_idct_sub_dual(q[2], q[3]); q[2] = highbd_idct_add_dual(q[2], q[3]); // part of stage 4 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[1], &q[3]); // generate 10,11,12,13 // part of stage 2 load_from_transformed(trans_buf, 10, 22, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_22_64, cospi_10_64, &q[5], &q[7]); load_from_transformed(trans_buf, 26, 6, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_6_64, cospi_26_64, &q[4], &q[6]); // part of stage 3 q[14] = highbd_idct_sub_dual(q[4], q[5]); q[5] = highbd_idct_add_dual(q[4], q[5]); q[13] = highbd_idct_sub_dual(q[6], q[7]); q[6] = highbd_idct_add_dual(q[6], q[7]); // part of stage 4 do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[4], &q[7]); // part of stage 5 q[8] = highbd_idct_add_dual(q[0], q[5]); q[9] = highbd_idct_add_dual(q[1], q[7]); q[13] = highbd_idct_sub_dual(q[1], q[7]); q[14] = highbd_idct_sub_dual(q[3], q[4]); q[10] = highbd_idct_add_dual(q[3], q[4]); q[15] = highbd_idct_add_dual(q[2], q[6]); store_in_output(out, 8, 15, q[8], q[15]); store_in_output(out, 9, 14, q[9], q[10]); // part of stage 6 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); store_in_output(out, 13, 10, q[3], q[1]); q[13] = highbd_idct_sub_dual(q[0], q[5]); q[14] = highbd_idct_sub_dual(q[2], q[6]); do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); store_in_output(out, 11, 12, q[1], q[3]); // ----------------------------------------- // BLOCK D: 0-3,4-7 // ----------------------------------------- // generate 4,5,6,7 // part of stage 3 load_from_transformed(trans_buf, 4, 28, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_28_64, cospi_4_64, &q[0], &q[2]); load_from_transformed(trans_buf, 20, 12, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_12_64, cospi_20_64, &q[1], &q[3]); // part of stage 4 q[13] = highbd_idct_sub_dual(q[0], q[1]); q[0] = highbd_idct_add_dual(q[0], q[1]); q[14] = highbd_idct_sub_dual(q[2], q[3]); q[2] = highbd_idct_add_dual(q[2], q[3]); // part of stage 5 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); // generate 0,1,2,3 // part of stage 4 load_from_transformed(trans_buf, 0, 16, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[5], &q[7]); load_from_transformed(trans_buf, 8, 24, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[14], &q[6]); // part of stage 5 q[4] = highbd_idct_add_dual(q[7], q[6]); q[7] = highbd_idct_sub_dual(q[7], q[6]); q[6] = highbd_idct_sub_dual(q[5], q[14]); q[5] = highbd_idct_add_dual(q[5], q[14]); // part of stage 6 q[8] = highbd_idct_add_dual(q[4], q[2]); q[9] = highbd_idct_add_dual(q[5], q[3]); q[10] = highbd_idct_add_dual(q[6], q[1]); q[11] = highbd_idct_add_dual(q[7], q[0]); q[12] = highbd_idct_sub_dual(q[7], q[0]); q[13] = highbd_idct_sub_dual(q[6], q[1]); q[14] = highbd_idct_sub_dual(q[5], q[3]); q[15] = highbd_idct_sub_dual(q[4], q[2]); // part of stage 7 load_from_output(out, 14, 15, &q[0], &q[1]); q[2] = highbd_idct_add_dual(q[8], q[1]); q[3] = highbd_idct_add_dual(q[9], q[0]); q[4] = highbd_idct_sub_dual(q[9], q[0]); q[5] = highbd_idct_sub_dual(q[8], q[1]); load_from_output(out, 16, 17, &q[0], &q[1]); q[8] = highbd_idct_add_dual(q[4], q[1]); q[9] = highbd_idct_add_dual(q[5], q[0]); q[6] = highbd_idct_sub_dual(q[5], q[0]); q[7] = highbd_idct_sub_dual(q[4], q[1]); if (idct32_pass_loop == 0) { idct32_bands_end_1st_pass(out, q); } else { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); idct32_bands_end_2nd_pass(out, dst, stride, max, q); dst += 8; } } } } void vpx_highbd_idct32x32_1024_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { if (bd == 8) { vpx_idct32_32_neon(input, CAST_TO_BYTEPTR(dest), stride, 1); } else { vpx_highbd_idct32_32_neon(input, dest, stride, bd); } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct32x32_135_add_neon.c000066400000000000000000001002361357355204000226130ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void load_8x8_s32_dual( const tran_low_t *input, int32x4x2_t *const in0, int32x4x2_t *const in1, int32x4x2_t *const in2, int32x4x2_t *const in3, int32x4x2_t *const in4, int32x4x2_t *const in5, int32x4x2_t *const in6, int32x4x2_t *const in7) { in0->val[0] = vld1q_s32(input); in0->val[1] = vld1q_s32(input + 4); input += 32; in1->val[0] = vld1q_s32(input); in1->val[1] = vld1q_s32(input + 4); input += 32; in2->val[0] = vld1q_s32(input); in2->val[1] = vld1q_s32(input + 4); input += 32; in3->val[0] = vld1q_s32(input); in3->val[1] = vld1q_s32(input + 4); input += 32; in4->val[0] = vld1q_s32(input); in4->val[1] = vld1q_s32(input + 4); input += 32; in5->val[0] = vld1q_s32(input); in5->val[1] = vld1q_s32(input + 4); input += 32; in6->val[0] = vld1q_s32(input); in6->val[1] = vld1q_s32(input + 4); input += 32; in7->val[0] = vld1q_s32(input); in7->val[1] = vld1q_s32(input + 4); } static INLINE void load_4x8_s32_dual(const tran_low_t *input, int32x4_t *const in0, int32x4_t *const in1, int32x4_t *const in2, int32x4_t *const in3, int32x4_t *const in4, int32x4_t *const in5, int32x4_t *const in6, int32x4_t *const in7) { *in0 = vld1q_s32(input); input += 32; *in1 = vld1q_s32(input); input += 32; *in2 = vld1q_s32(input); input += 32; *in3 = vld1q_s32(input); input += 32; *in4 = vld1q_s32(input); input += 32; *in5 = vld1q_s32(input); input += 32; *in6 = vld1q_s32(input); input += 32; *in7 = vld1q_s32(input); } // Only for the first pass of the _135_ variant. Since it only uses values from // the top left 16x16 it can safely assume all the remaining values are 0 and // skip an awful lot of calculations. In fact, only the first 12 columns make // the cut. None of the elements in the 13th, 14th, 15th or 16th columns are // used so it skips any calls to input[12|13|14|15] too. // In C this does a single row of 32 for each call. Here it transposes the top // left 12x8 to allow using SIMD. // vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 135 non-zero // coefficients as follows: // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 // 0 0 2 5 10 17 25 38 47 62 83 101 121 // 1 1 4 8 15 22 30 45 58 74 92 112 133 // 2 3 7 12 18 28 36 52 64 82 102 118 // 3 6 11 16 23 31 43 60 73 90 109 126 // 4 9 14 19 29 37 50 65 78 98 116 134 // 5 13 20 26 35 44 54 72 85 105 123 // 6 21 27 33 42 53 63 80 94 113 132 // 7 24 32 39 48 57 71 88 104 120 // 8 34 40 46 56 68 81 96 111 130 // 9 41 49 55 67 77 91 107 124 // 10 51 59 66 76 89 99 119 131 // 11 61 69 75 87 100 114 129 // 12 70 79 86 97 108 122 // 13 84 93 103 110 125 // 14 98 106 115 127 // 15 117 128 static void vpx_highbd_idct32_12_neon(const tran_low_t *const input, int32_t *output) { int32x4x2_t in[12], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], s8[32]; load_8x8_s32_dual(input, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); load_4x8_s32_dual(input + 8, &in[8].val[0], &in[8].val[1], &in[9].val[0], &in[9].val[1], &in[10].val[0], &in[10].val[1], &in[11].val[0], &in[11].val[1]); transpose_s32_4x8(&in[8].val[0], &in[8].val[1], &in[9].val[0], &in[9].val[1], &in[10].val[0], &in[10].val[1], &in[11].val[0], &in[11].val[1]); // stage 1 s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); s1[18] = multiply_shift_and_narrow_s32_dual(in[9], cospi_23_64); s1[29] = multiply_shift_and_narrow_s32_dual(in[9], cospi_9_64); s1[19] = multiply_shift_and_narrow_s32_dual(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s32_dual(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); s1[21] = multiply_shift_and_narrow_s32_dual(in[11], -cospi_21_64); s1[26] = multiply_shift_and_narrow_s32_dual(in[11], cospi_11_64); s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); s2[10] = multiply_shift_and_narrow_s32_dual(in[10], cospi_22_64); s2[13] = multiply_shift_and_narrow_s32_dual(in[10], cospi_10_64); s2[11] = multiply_shift_and_narrow_s32_dual(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s32_dual(in[6], cospi_6_64); s2[18] = highbd_idct_sub_dual(s1[19], s1[18]); s2[19] = highbd_idct_add_dual(s1[18], s1[19]); s2[20] = highbd_idct_add_dual(s1[20], s1[21]); s2[21] = highbd_idct_sub_dual(s1[20], s1[21]); s2[26] = highbd_idct_sub_dual(s1[27], s1[26]); s2[27] = highbd_idct_add_dual(s1[26], s1[27]); s2[28] = highbd_idct_add_dual(s1[28], s1[29]); s2[29] = highbd_idct_sub_dual(s1[28], s1[29]); // stage 3 s3[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); s3[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); s3[10] = highbd_idct_sub_dual(s2[11], s2[10]); s3[11] = highbd_idct_add_dual(s2[10], s2[11]); s3[12] = highbd_idct_add_dual(s2[12], s2[13]); s3[13] = highbd_idct_sub_dual(s2[12], s2[13]); s3[17] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], -cospi_4_64, s1[31], cospi_28_64); s3[30] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], cospi_28_64, s1[31], cospi_4_64); s3[18] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_28_64, s2[29], -cospi_4_64); s3[29] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_4_64, s2[29], cospi_28_64); s3[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_20_64, s2[26], cospi_12_64); s3[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], cospi_12_64, s2[26], cospi_20_64); s3[22] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s3[25] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s4[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); s4[2] = multiply_shift_and_narrow_s32_dual(in[8], cospi_24_64); s4[3] = multiply_shift_and_narrow_s32_dual(in[8], cospi_8_64); s4[9] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], -cospi_8_64, s2[15], cospi_24_64); s4[14] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], cospi_24_64, s2[15], cospi_8_64); s4[10] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_24_64, s3[13], -cospi_8_64); s4[13] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_8_64, s3[13], cospi_24_64); s4[16] = highbd_idct_add_dual(s1[16], s2[19]); s4[17] = highbd_idct_add_dual(s3[17], s3[18]); s4[18] = highbd_idct_sub_dual(s3[17], s3[18]); s4[19] = highbd_idct_sub_dual(s1[16], s2[19]); s4[20] = highbd_idct_sub_dual(s1[23], s2[20]); s4[21] = highbd_idct_sub_dual(s3[22], s3[21]); s4[22] = highbd_idct_add_dual(s3[21], s3[22]); s4[23] = highbd_idct_add_dual(s2[20], s1[23]); s4[24] = highbd_idct_add_dual(s1[24], s2[27]); s4[25] = highbd_idct_add_dual(s3[25], s3[26]); s4[26] = highbd_idct_sub_dual(s3[25], s3[26]); s4[27] = highbd_idct_sub_dual(s1[24], s2[27]); s4[28] = highbd_idct_sub_dual(s1[31], s2[28]); s4[29] = highbd_idct_sub_dual(s3[30], s3[29]); s4[30] = highbd_idct_add_dual(s3[29], s3[30]); s4[31] = highbd_idct_add_dual(s2[28], s1[31]); // stage 5 s5[0] = highbd_idct_add_dual(s4[0], s4[3]); s5[1] = highbd_idct_add_dual(s4[0], s4[2]); s5[2] = highbd_idct_sub_dual(s4[0], s4[2]); s5[3] = highbd_idct_sub_dual(s4[0], s4[3]); s5[5] = sub_multiply_shift_and_narrow_s32_dual(s3[7], s3[4], cospi_16_64); s5[6] = add_multiply_shift_and_narrow_s32_dual(s3[4], s3[7], cospi_16_64); s5[8] = highbd_idct_add_dual(s2[8], s3[11]); s5[9] = highbd_idct_add_dual(s4[9], s4[10]); s5[10] = highbd_idct_sub_dual(s4[9], s4[10]); s5[11] = highbd_idct_sub_dual(s2[8], s3[11]); s5[12] = highbd_idct_sub_dual(s2[15], s3[12]); s5[13] = highbd_idct_sub_dual(s4[14], s4[13]); s5[14] = highbd_idct_add_dual(s4[13], s4[14]); s5[15] = highbd_idct_add_dual(s2[15], s3[12]); s5[18] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], -cospi_8_64, s4[29], cospi_24_64); s5[29] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], cospi_24_64, s4[29], cospi_8_64); s5[19] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], -cospi_8_64, s4[28], cospi_24_64); s5[28] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], cospi_24_64, s4[28], cospi_8_64); s5[20] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_24_64, s4[27], -cospi_8_64); s5[27] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_8_64, s4[27], cospi_24_64); s5[21] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_24_64, s4[26], -cospi_8_64); s5[26] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_8_64, s4[26], cospi_24_64); // stage 6 s6[0] = highbd_idct_add_dual(s5[0], s3[7]); s6[1] = highbd_idct_add_dual(s5[1], s5[6]); s6[2] = highbd_idct_add_dual(s5[2], s5[5]); s6[3] = highbd_idct_add_dual(s5[3], s3[4]); s6[4] = highbd_idct_sub_dual(s5[3], s3[4]); s6[5] = highbd_idct_sub_dual(s5[2], s5[5]); s6[6] = highbd_idct_sub_dual(s5[1], s5[6]); s6[7] = highbd_idct_sub_dual(s5[0], s3[7]); s6[10] = sub_multiply_shift_and_narrow_s32_dual(s5[13], s5[10], cospi_16_64); s6[13] = add_multiply_shift_and_narrow_s32_dual(s5[10], s5[13], cospi_16_64); s6[11] = sub_multiply_shift_and_narrow_s32_dual(s5[12], s5[11], cospi_16_64); s6[12] = add_multiply_shift_and_narrow_s32_dual(s5[11], s5[12], cospi_16_64); s6[16] = highbd_idct_add_dual(s4[16], s4[23]); s6[17] = highbd_idct_add_dual(s4[17], s4[22]); s6[18] = highbd_idct_add_dual(s5[18], s5[21]); s6[19] = highbd_idct_add_dual(s5[19], s5[20]); s6[20] = highbd_idct_sub_dual(s5[19], s5[20]); s6[21] = highbd_idct_sub_dual(s5[18], s5[21]); s6[22] = highbd_idct_sub_dual(s4[17], s4[22]); s6[23] = highbd_idct_sub_dual(s4[16], s4[23]); s6[24] = highbd_idct_sub_dual(s4[31], s4[24]); s6[25] = highbd_idct_sub_dual(s4[30], s4[25]); s6[26] = highbd_idct_sub_dual(s5[29], s5[26]); s6[27] = highbd_idct_sub_dual(s5[28], s5[27]); s6[28] = highbd_idct_add_dual(s5[27], s5[28]); s6[29] = highbd_idct_add_dual(s5[26], s5[29]); s6[30] = highbd_idct_add_dual(s4[25], s4[30]); s6[31] = highbd_idct_add_dual(s4[24], s4[31]); // stage 7 s7[0] = highbd_idct_add_dual(s6[0], s5[15]); s7[1] = highbd_idct_add_dual(s6[1], s5[14]); s7[2] = highbd_idct_add_dual(s6[2], s6[13]); s7[3] = highbd_idct_add_dual(s6[3], s6[12]); s7[4] = highbd_idct_add_dual(s6[4], s6[11]); s7[5] = highbd_idct_add_dual(s6[5], s6[10]); s7[6] = highbd_idct_add_dual(s6[6], s5[9]); s7[7] = highbd_idct_add_dual(s6[7], s5[8]); s7[8] = highbd_idct_sub_dual(s6[7], s5[8]); s7[9] = highbd_idct_sub_dual(s6[6], s5[9]); s7[10] = highbd_idct_sub_dual(s6[5], s6[10]); s7[11] = highbd_idct_sub_dual(s6[4], s6[11]); s7[12] = highbd_idct_sub_dual(s6[3], s6[12]); s7[13] = highbd_idct_sub_dual(s6[2], s6[13]); s7[14] = highbd_idct_sub_dual(s6[1], s5[14]); s7[15] = highbd_idct_sub_dual(s6[0], s5[15]); s7[20] = sub_multiply_shift_and_narrow_s32_dual(s6[27], s6[20], cospi_16_64); s7[27] = add_multiply_shift_and_narrow_s32_dual(s6[20], s6[27], cospi_16_64); s7[21] = sub_multiply_shift_and_narrow_s32_dual(s6[26], s6[21], cospi_16_64); s7[26] = add_multiply_shift_and_narrow_s32_dual(s6[21], s6[26], cospi_16_64); s7[22] = sub_multiply_shift_and_narrow_s32_dual(s6[25], s6[22], cospi_16_64); s7[25] = add_multiply_shift_and_narrow_s32_dual(s6[22], s6[25], cospi_16_64); s7[23] = sub_multiply_shift_and_narrow_s32_dual(s6[24], s6[23], cospi_16_64); s7[24] = add_multiply_shift_and_narrow_s32_dual(s6[23], s6[24], cospi_16_64); // final stage s8[0] = highbd_idct_add_dual(s7[0], s6[31]); s8[1] = highbd_idct_add_dual(s7[1], s6[30]); s8[2] = highbd_idct_add_dual(s7[2], s6[29]); s8[3] = highbd_idct_add_dual(s7[3], s6[28]); s8[4] = highbd_idct_add_dual(s7[4], s7[27]); s8[5] = highbd_idct_add_dual(s7[5], s7[26]); s8[6] = highbd_idct_add_dual(s7[6], s7[25]); s8[7] = highbd_idct_add_dual(s7[7], s7[24]); s8[8] = highbd_idct_add_dual(s7[8], s7[23]); s8[9] = highbd_idct_add_dual(s7[9], s7[22]); s8[10] = highbd_idct_add_dual(s7[10], s7[21]); s8[11] = highbd_idct_add_dual(s7[11], s7[20]); s8[12] = highbd_idct_add_dual(s7[12], s6[19]); s8[13] = highbd_idct_add_dual(s7[13], s6[18]); s8[14] = highbd_idct_add_dual(s7[14], s6[17]); s8[15] = highbd_idct_add_dual(s7[15], s6[16]); s8[16] = highbd_idct_sub_dual(s7[15], s6[16]); s8[17] = highbd_idct_sub_dual(s7[14], s6[17]); s8[18] = highbd_idct_sub_dual(s7[13], s6[18]); s8[19] = highbd_idct_sub_dual(s7[12], s6[19]); s8[20] = highbd_idct_sub_dual(s7[11], s7[20]); s8[21] = highbd_idct_sub_dual(s7[10], s7[21]); s8[22] = highbd_idct_sub_dual(s7[9], s7[22]); s8[23] = highbd_idct_sub_dual(s7[8], s7[23]); s8[24] = highbd_idct_sub_dual(s7[7], s7[24]); s8[25] = highbd_idct_sub_dual(s7[6], s7[25]); s8[26] = highbd_idct_sub_dual(s7[5], s7[26]); s8[27] = highbd_idct_sub_dual(s7[4], s7[27]); s8[28] = highbd_idct_sub_dual(s7[3], s6[28]); s8[29] = highbd_idct_sub_dual(s7[2], s6[29]); s8[30] = highbd_idct_sub_dual(s7[1], s6[30]); s8[31] = highbd_idct_sub_dual(s7[0], s6[31]); vst1q_s32(output + 0, s8[0].val[0]); vst1q_s32(output + 4, s8[0].val[1]); output += 16; vst1q_s32(output + 0, s8[1].val[0]); vst1q_s32(output + 4, s8[1].val[1]); output += 16; vst1q_s32(output + 0, s8[2].val[0]); vst1q_s32(output + 4, s8[2].val[1]); output += 16; vst1q_s32(output + 0, s8[3].val[0]); vst1q_s32(output + 4, s8[3].val[1]); output += 16; vst1q_s32(output + 0, s8[4].val[0]); vst1q_s32(output + 4, s8[4].val[1]); output += 16; vst1q_s32(output + 0, s8[5].val[0]); vst1q_s32(output + 4, s8[5].val[1]); output += 16; vst1q_s32(output + 0, s8[6].val[0]); vst1q_s32(output + 4, s8[6].val[1]); output += 16; vst1q_s32(output + 0, s8[7].val[0]); vst1q_s32(output + 4, s8[7].val[1]); output += 16; vst1q_s32(output + 0, s8[8].val[0]); vst1q_s32(output + 4, s8[8].val[1]); output += 16; vst1q_s32(output + 0, s8[9].val[0]); vst1q_s32(output + 4, s8[9].val[1]); output += 16; vst1q_s32(output + 0, s8[10].val[0]); vst1q_s32(output + 4, s8[10].val[1]); output += 16; vst1q_s32(output + 0, s8[11].val[0]); vst1q_s32(output + 4, s8[11].val[1]); output += 16; vst1q_s32(output + 0, s8[12].val[0]); vst1q_s32(output + 4, s8[12].val[1]); output += 16; vst1q_s32(output + 0, s8[13].val[0]); vst1q_s32(output + 4, s8[13].val[1]); output += 16; vst1q_s32(output + 0, s8[14].val[0]); vst1q_s32(output + 4, s8[14].val[1]); output += 16; vst1q_s32(output + 0, s8[15].val[0]); vst1q_s32(output + 4, s8[15].val[1]); output += 16; vst1q_s32(output + 0, s8[16].val[0]); vst1q_s32(output + 4, s8[16].val[1]); output += 16; vst1q_s32(output + 0, s8[17].val[0]); vst1q_s32(output + 4, s8[17].val[1]); output += 16; vst1q_s32(output + 0, s8[18].val[0]); vst1q_s32(output + 4, s8[18].val[1]); output += 16; vst1q_s32(output + 0, s8[19].val[0]); vst1q_s32(output + 4, s8[19].val[1]); output += 16; vst1q_s32(output + 0, s8[20].val[0]); vst1q_s32(output + 4, s8[20].val[1]); output += 16; vst1q_s32(output + 0, s8[21].val[0]); vst1q_s32(output + 4, s8[21].val[1]); output += 16; vst1q_s32(output + 0, s8[22].val[0]); vst1q_s32(output + 4, s8[22].val[1]); output += 16; vst1q_s32(output + 0, s8[23].val[0]); vst1q_s32(output + 4, s8[23].val[1]); output += 16; vst1q_s32(output + 0, s8[24].val[0]); vst1q_s32(output + 4, s8[24].val[1]); output += 16; vst1q_s32(output + 0, s8[25].val[0]); vst1q_s32(output + 4, s8[25].val[1]); output += 16; vst1q_s32(output + 0, s8[26].val[0]); vst1q_s32(output + 4, s8[26].val[1]); output += 16; vst1q_s32(output + 0, s8[27].val[0]); vst1q_s32(output + 4, s8[27].val[1]); output += 16; vst1q_s32(output + 0, s8[28].val[0]); vst1q_s32(output + 4, s8[28].val[1]); output += 16; vst1q_s32(output + 0, s8[29].val[0]); vst1q_s32(output + 4, s8[29].val[1]); output += 16; vst1q_s32(output + 0, s8[30].val[0]); vst1q_s32(output + 4, s8[30].val[1]); output += 16; vst1q_s32(output + 0, s8[31].val[0]); vst1q_s32(output + 4, s8[31].val[1]); } static void vpx_highbd_idct32_16_neon(const int32_t *const input, uint16_t *const output, const int stride, const int bd) { int32x4x2_t in[16], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], out[32]; load_and_transpose_s32_8x8(input, 16, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); load_and_transpose_s32_8x8(input + 8, 16, &in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); // stage 1 s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); s1[17] = multiply_shift_and_narrow_s32_dual(in[15], -cospi_17_64); s1[30] = multiply_shift_and_narrow_s32_dual(in[15], cospi_15_64); s1[18] = multiply_shift_and_narrow_s32_dual(in[9], cospi_23_64); s1[29] = multiply_shift_and_narrow_s32_dual(in[9], cospi_9_64); s1[19] = multiply_shift_and_narrow_s32_dual(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s32_dual(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); s1[21] = multiply_shift_and_narrow_s32_dual(in[11], -cospi_21_64); s1[26] = multiply_shift_and_narrow_s32_dual(in[11], cospi_11_64); s1[22] = multiply_shift_and_narrow_s32_dual(in[13], cospi_19_64); s1[25] = multiply_shift_and_narrow_s32_dual(in[13], cospi_13_64); s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); s2[9] = multiply_shift_and_narrow_s32_dual(in[14], -cospi_18_64); s2[14] = multiply_shift_and_narrow_s32_dual(in[14], cospi_14_64); s2[10] = multiply_shift_and_narrow_s32_dual(in[10], cospi_22_64); s2[13] = multiply_shift_and_narrow_s32_dual(in[10], cospi_10_64); s2[11] = multiply_shift_and_narrow_s32_dual(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s32_dual(in[6], cospi_6_64); s2[16] = highbd_idct_add_dual(s1[16], s1[17]); s2[17] = highbd_idct_sub_dual(s1[16], s1[17]); s2[18] = highbd_idct_sub_dual(s1[19], s1[18]); s2[19] = highbd_idct_add_dual(s1[18], s1[19]); s2[20] = highbd_idct_add_dual(s1[20], s1[21]); s2[21] = highbd_idct_sub_dual(s1[20], s1[21]); s2[22] = highbd_idct_sub_dual(s1[23], s1[22]); s2[23] = highbd_idct_add_dual(s1[22], s1[23]); s2[24] = highbd_idct_add_dual(s1[24], s1[25]); s2[25] = highbd_idct_sub_dual(s1[24], s1[25]); s2[26] = highbd_idct_sub_dual(s1[27], s1[26]); s2[27] = highbd_idct_add_dual(s1[26], s1[27]); s2[28] = highbd_idct_add_dual(s1[28], s1[29]); s2[29] = highbd_idct_sub_dual(s1[28], s1[29]); s2[30] = highbd_idct_sub_dual(s1[31], s1[30]); s2[31] = highbd_idct_add_dual(s1[30], s1[31]); // stage 3 s3[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); s3[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); s3[5] = multiply_shift_and_narrow_s32_dual(in[12], -cospi_20_64); s3[6] = multiply_shift_and_narrow_s32_dual(in[12], cospi_12_64); s3[8] = highbd_idct_add_dual(s2[8], s2[9]); s3[9] = highbd_idct_sub_dual(s2[8], s2[9]); s3[10] = highbd_idct_sub_dual(s2[11], s2[10]); s3[11] = highbd_idct_add_dual(s2[10], s2[11]); s3[12] = highbd_idct_add_dual(s2[12], s2[13]); s3[13] = highbd_idct_sub_dual(s2[12], s2[13]); s3[14] = highbd_idct_sub_dual(s2[15], s2[14]); s3[15] = highbd_idct_add_dual(s2[14], s2[15]); s3[17] = multiply_accumulate_shift_and_narrow_s32_dual(s2[17], -cospi_4_64, s2[30], cospi_28_64); s3[30] = multiply_accumulate_shift_and_narrow_s32_dual(s2[17], cospi_28_64, s2[30], cospi_4_64); s3[18] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_28_64, s2[29], -cospi_4_64); s3[29] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_4_64, s2[29], cospi_28_64); s3[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_20_64, s2[26], cospi_12_64); s3[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], cospi_12_64, s2[26], cospi_20_64); s3[22] = multiply_accumulate_shift_and_narrow_s32_dual(s2[22], -cospi_12_64, s2[25], -cospi_20_64); s3[25] = multiply_accumulate_shift_and_narrow_s32_dual(s2[22], -cospi_20_64, s2[25], cospi_12_64); // stage 4 s4[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); s4[2] = multiply_shift_and_narrow_s32_dual(in[8], cospi_24_64); s4[3] = multiply_shift_and_narrow_s32_dual(in[8], cospi_8_64); s4[4] = highbd_idct_add_dual(s3[4], s3[5]); s4[5] = highbd_idct_sub_dual(s3[4], s3[5]); s4[6] = highbd_idct_sub_dual(s3[7], s3[6]); s4[7] = highbd_idct_add_dual(s3[6], s3[7]); s4[9] = multiply_accumulate_shift_and_narrow_s32_dual(s3[9], -cospi_8_64, s3[14], cospi_24_64); s4[14] = multiply_accumulate_shift_and_narrow_s32_dual(s3[9], cospi_24_64, s3[14], cospi_8_64); s4[10] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_24_64, s3[13], -cospi_8_64); s4[13] = multiply_accumulate_shift_and_narrow_s32_dual(s3[10], -cospi_8_64, s3[13], cospi_24_64); s4[16] = highbd_idct_add_dual(s2[16], s2[19]); s4[17] = highbd_idct_add_dual(s3[17], s3[18]); s4[18] = highbd_idct_sub_dual(s3[17], s3[18]); s4[19] = highbd_idct_sub_dual(s2[16], s2[19]); s4[20] = highbd_idct_sub_dual(s2[23], s2[20]); s4[21] = highbd_idct_sub_dual(s3[22], s3[21]); s4[22] = highbd_idct_add_dual(s3[21], s3[22]); s4[23] = highbd_idct_add_dual(s2[20], s2[23]); s4[24] = highbd_idct_add_dual(s2[24], s2[27]); s4[25] = highbd_idct_add_dual(s3[25], s3[26]); s4[26] = highbd_idct_sub_dual(s3[25], s3[26]); s4[27] = highbd_idct_sub_dual(s2[24], s2[27]); s4[28] = highbd_idct_sub_dual(s2[31], s2[28]); s4[29] = highbd_idct_sub_dual(s3[30], s3[29]); s4[30] = highbd_idct_add_dual(s3[29], s3[30]); s4[31] = highbd_idct_add_dual(s2[28], s2[31]); // stage 5 s5[0] = highbd_idct_add_dual(s4[0], s4[3]); s5[1] = highbd_idct_add_dual(s4[0], s4[2]); s5[2] = highbd_idct_sub_dual(s4[0], s4[2]); s5[3] = highbd_idct_sub_dual(s4[0], s4[3]); s5[5] = sub_multiply_shift_and_narrow_s32_dual(s4[6], s4[5], cospi_16_64); s5[6] = add_multiply_shift_and_narrow_s32_dual(s4[5], s4[6], cospi_16_64); s5[8] = highbd_idct_add_dual(s3[8], s3[11]); s5[9] = highbd_idct_add_dual(s4[9], s4[10]); s5[10] = highbd_idct_sub_dual(s4[9], s4[10]); s5[11] = highbd_idct_sub_dual(s3[8], s3[11]); s5[12] = highbd_idct_sub_dual(s3[15], s3[12]); s5[13] = highbd_idct_sub_dual(s4[14], s4[13]); s5[14] = highbd_idct_add_dual(s4[13], s4[14]); s5[15] = highbd_idct_add_dual(s3[15], s3[12]); s5[18] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], -cospi_8_64, s4[29], cospi_24_64); s5[29] = multiply_accumulate_shift_and_narrow_s32_dual(s4[18], cospi_24_64, s4[29], cospi_8_64); s5[19] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], -cospi_8_64, s4[28], cospi_24_64); s5[28] = multiply_accumulate_shift_and_narrow_s32_dual(s4[19], cospi_24_64, s4[28], cospi_8_64); s5[20] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_24_64, s4[27], -cospi_8_64); s5[27] = multiply_accumulate_shift_and_narrow_s32_dual(s4[20], -cospi_8_64, s4[27], cospi_24_64); s5[21] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_24_64, s4[26], -cospi_8_64); s5[26] = multiply_accumulate_shift_and_narrow_s32_dual(s4[21], -cospi_8_64, s4[26], cospi_24_64); // stage 6 s6[0] = highbd_idct_add_dual(s5[0], s4[7]); s6[1] = highbd_idct_add_dual(s5[1], s5[6]); s6[2] = highbd_idct_add_dual(s5[2], s5[5]); s6[3] = highbd_idct_add_dual(s5[3], s4[4]); s6[4] = highbd_idct_sub_dual(s5[3], s4[4]); s6[5] = highbd_idct_sub_dual(s5[2], s5[5]); s6[6] = highbd_idct_sub_dual(s5[1], s5[6]); s6[7] = highbd_idct_sub_dual(s5[0], s4[7]); s6[10] = sub_multiply_shift_and_narrow_s32_dual(s5[13], s5[10], cospi_16_64); s6[13] = add_multiply_shift_and_narrow_s32_dual(s5[10], s5[13], cospi_16_64); s6[11] = sub_multiply_shift_and_narrow_s32_dual(s5[12], s5[11], cospi_16_64); s6[12] = add_multiply_shift_and_narrow_s32_dual(s5[11], s5[12], cospi_16_64); s6[16] = highbd_idct_add_dual(s4[16], s4[23]); s6[17] = highbd_idct_add_dual(s4[17], s4[22]); s6[18] = highbd_idct_add_dual(s5[18], s5[21]); s6[19] = highbd_idct_add_dual(s5[19], s5[20]); s6[20] = highbd_idct_sub_dual(s5[19], s5[20]); s6[21] = highbd_idct_sub_dual(s5[18], s5[21]); s6[22] = highbd_idct_sub_dual(s4[17], s4[22]); s6[23] = highbd_idct_sub_dual(s4[16], s4[23]); s6[24] = highbd_idct_sub_dual(s4[31], s4[24]); s6[25] = highbd_idct_sub_dual(s4[30], s4[25]); s6[26] = highbd_idct_sub_dual(s5[29], s5[26]); s6[27] = highbd_idct_sub_dual(s5[28], s5[27]); s6[28] = highbd_idct_add_dual(s5[27], s5[28]); s6[29] = highbd_idct_add_dual(s5[26], s5[29]); s6[30] = highbd_idct_add_dual(s4[25], s4[30]); s6[31] = highbd_idct_add_dual(s4[24], s4[31]); // stage 7 s7[0] = highbd_idct_add_dual(s6[0], s5[15]); s7[1] = highbd_idct_add_dual(s6[1], s5[14]); s7[2] = highbd_idct_add_dual(s6[2], s6[13]); s7[3] = highbd_idct_add_dual(s6[3], s6[12]); s7[4] = highbd_idct_add_dual(s6[4], s6[11]); s7[5] = highbd_idct_add_dual(s6[5], s6[10]); s7[6] = highbd_idct_add_dual(s6[6], s5[9]); s7[7] = highbd_idct_add_dual(s6[7], s5[8]); s7[8] = highbd_idct_sub_dual(s6[7], s5[8]); s7[9] = highbd_idct_sub_dual(s6[6], s5[9]); s7[10] = highbd_idct_sub_dual(s6[5], s6[10]); s7[11] = highbd_idct_sub_dual(s6[4], s6[11]); s7[12] = highbd_idct_sub_dual(s6[3], s6[12]); s7[13] = highbd_idct_sub_dual(s6[2], s6[13]); s7[14] = highbd_idct_sub_dual(s6[1], s5[14]); s7[15] = highbd_idct_sub_dual(s6[0], s5[15]); s7[20] = sub_multiply_shift_and_narrow_s32_dual(s6[27], s6[20], cospi_16_64); s7[27] = add_multiply_shift_and_narrow_s32_dual(s6[20], s6[27], cospi_16_64); s7[21] = sub_multiply_shift_and_narrow_s32_dual(s6[26], s6[21], cospi_16_64); s7[26] = add_multiply_shift_and_narrow_s32_dual(s6[21], s6[26], cospi_16_64); s7[22] = sub_multiply_shift_and_narrow_s32_dual(s6[25], s6[22], cospi_16_64); s7[25] = add_multiply_shift_and_narrow_s32_dual(s6[22], s6[25], cospi_16_64); s7[23] = sub_multiply_shift_and_narrow_s32_dual(s6[24], s6[23], cospi_16_64); s7[24] = add_multiply_shift_and_narrow_s32_dual(s6[23], s6[24], cospi_16_64); // final stage out[0] = highbd_idct_add_dual(s7[0], s6[31]); out[1] = highbd_idct_add_dual(s7[1], s6[30]); out[2] = highbd_idct_add_dual(s7[2], s6[29]); out[3] = highbd_idct_add_dual(s7[3], s6[28]); out[4] = highbd_idct_add_dual(s7[4], s7[27]); out[5] = highbd_idct_add_dual(s7[5], s7[26]); out[6] = highbd_idct_add_dual(s7[6], s7[25]); out[7] = highbd_idct_add_dual(s7[7], s7[24]); out[8] = highbd_idct_add_dual(s7[8], s7[23]); out[9] = highbd_idct_add_dual(s7[9], s7[22]); out[10] = highbd_idct_add_dual(s7[10], s7[21]); out[11] = highbd_idct_add_dual(s7[11], s7[20]); out[12] = highbd_idct_add_dual(s7[12], s6[19]); out[13] = highbd_idct_add_dual(s7[13], s6[18]); out[14] = highbd_idct_add_dual(s7[14], s6[17]); out[15] = highbd_idct_add_dual(s7[15], s6[16]); out[16] = highbd_idct_sub_dual(s7[15], s6[16]); out[17] = highbd_idct_sub_dual(s7[14], s6[17]); out[18] = highbd_idct_sub_dual(s7[13], s6[18]); out[19] = highbd_idct_sub_dual(s7[12], s6[19]); out[20] = highbd_idct_sub_dual(s7[11], s7[20]); out[21] = highbd_idct_sub_dual(s7[10], s7[21]); out[22] = highbd_idct_sub_dual(s7[9], s7[22]); out[23] = highbd_idct_sub_dual(s7[8], s7[23]); out[24] = highbd_idct_sub_dual(s7[7], s7[24]); out[25] = highbd_idct_sub_dual(s7[6], s7[25]); out[26] = highbd_idct_sub_dual(s7[5], s7[26]); out[27] = highbd_idct_sub_dual(s7[4], s7[27]); out[28] = highbd_idct_sub_dual(s7[3], s6[28]); out[29] = highbd_idct_sub_dual(s7[2], s6[29]); out[30] = highbd_idct_sub_dual(s7[1], s6[30]); out[31] = highbd_idct_sub_dual(s7[0], s6[31]); highbd_idct16x16_add_store(out, output, stride, bd); highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd); } void vpx_highbd_idct32x32_135_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; if (bd == 8) { int16_t temp[32 * 16]; int16_t *t = temp; vpx_idct32_12_neon(input, temp); vpx_idct32_12_neon(input + 32 * 8, temp + 8); for (i = 0; i < 32; i += 8) { vpx_idct32_16_neon(t, dest, stride, 1); t += (16 * 8); dest += 8; } } else { int32_t temp[32 * 16]; int32_t *t = temp; vpx_highbd_idct32_12_neon(input, temp); vpx_highbd_idct32_12_neon(input + 32 * 8, temp + 8); for (i = 0; i < 32; i += 8) { vpx_highbd_idct32_16_neon(t, dest, stride, bd); t += (16 * 8); dest += 8; } } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct32x32_34_add_neon.c000066400000000000000000000611221357355204000225310ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" // Only for the first pass of the _34_ variant. Since it only uses values from // the top left 8x8 it can safely assume all the remaining values are 0 and skip // an awful lot of calculations. In fact, only the first 6 columns make the cut. // None of the elements in the 7th or 8th column are used so it skips any calls // to input[67] too. // In C this does a single row of 32 for each call. Here it transposes the top // left 8x8 to allow using SIMD. // vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 34 non-zero // coefficients as follows: // 0 1 2 3 4 5 6 7 // 0 0 2 5 10 17 25 // 1 1 4 8 15 22 30 // 2 3 7 12 18 28 // 3 6 11 16 23 31 // 4 9 14 19 29 // 5 13 20 26 // 6 21 27 33 // 7 24 32 static void vpx_highbd_idct32_6_neon(const tran_low_t *input, int32_t *output) { int32x4x2_t in[8], s1[32], s2[32], s3[32]; in[0].val[0] = vld1q_s32(input); in[0].val[1] = vld1q_s32(input + 4); input += 32; in[1].val[0] = vld1q_s32(input); in[1].val[1] = vld1q_s32(input + 4); input += 32; in[2].val[0] = vld1q_s32(input); in[2].val[1] = vld1q_s32(input + 4); input += 32; in[3].val[0] = vld1q_s32(input); in[3].val[1] = vld1q_s32(input + 4); input += 32; in[4].val[0] = vld1q_s32(input); in[4].val[1] = vld1q_s32(input + 4); input += 32; in[5].val[0] = vld1q_s32(input); in[5].val[1] = vld1q_s32(input + 4); input += 32; in[6].val[0] = vld1q_s32(input); in[6].val[1] = vld1q_s32(input + 4); input += 32; in[7].val[0] = vld1q_s32(input); in[7].val[1] = vld1q_s32(input + 4); transpose_s32_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 // input[1] * cospi_31_64 - input[31] * cospi_1_64 (but input[31] == 0) s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); // input[1] * cospi_1_64 + input[31] * cospi_31_64 (but input[31] == 0) s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); // stage 3 s1[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); s1[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); s1[17] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], -cospi_4_64, s1[31], cospi_28_64); s1[30] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], cospi_28_64, s1[31], cospi_4_64); s1[21] = multiply_accumulate_shift_and_narrow_s32_dual(s1[20], -cospi_20_64, s1[27], cospi_12_64); s1[26] = multiply_accumulate_shift_and_narrow_s32_dual(s1[20], cospi_12_64, s1[27], cospi_20_64); s1[22] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s1[25] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s1[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); s2[9] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], -cospi_8_64, s2[15], cospi_24_64); s2[14] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], cospi_24_64, s2[15], cospi_8_64); s2[20] = highbd_idct_sub_dual(s1[23], s1[20]); s2[21] = highbd_idct_sub_dual(s1[22], s1[21]); s2[22] = highbd_idct_add_dual(s1[21], s1[22]); s2[23] = highbd_idct_add_dual(s1[20], s1[23]); s2[24] = highbd_idct_add_dual(s1[24], s1[27]); s2[25] = highbd_idct_add_dual(s1[25], s1[26]); s2[26] = highbd_idct_sub_dual(s1[25], s1[26]); s2[27] = highbd_idct_sub_dual(s1[24], s1[27]); // stage 5 s1[5] = sub_multiply_shift_and_narrow_s32_dual(s1[7], s1[4], cospi_16_64); s1[6] = add_multiply_shift_and_narrow_s32_dual(s1[4], s1[7], cospi_16_64); s1[18] = multiply_accumulate_shift_and_narrow_s32_dual(s1[17], -cospi_8_64, s1[30], cospi_24_64); s1[29] = multiply_accumulate_shift_and_narrow_s32_dual(s1[17], cospi_24_64, s1[30], cospi_8_64); s1[19] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], -cospi_8_64, s1[31], cospi_24_64); s1[28] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], cospi_24_64, s1[31], cospi_8_64); s1[20] = multiply_accumulate_shift_and_narrow_s32_dual(s2[20], -cospi_24_64, s2[27], -cospi_8_64); s1[27] = multiply_accumulate_shift_and_narrow_s32_dual(s2[20], -cospi_8_64, s2[27], cospi_24_64); s1[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_24_64, s2[26], -cospi_8_64); s1[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_8_64, s2[26], cospi_24_64); // stage 6 s2[0] = highbd_idct_add_dual(s1[0], s1[7]); s2[1] = highbd_idct_add_dual(s1[0], s1[6]); s2[2] = highbd_idct_add_dual(s1[0], s1[5]); s2[3] = highbd_idct_add_dual(s1[0], s1[4]); s2[4] = highbd_idct_sub_dual(s1[0], s1[4]); s2[5] = highbd_idct_sub_dual(s1[0], s1[5]); s2[6] = highbd_idct_sub_dual(s1[0], s1[6]); s2[7] = highbd_idct_sub_dual(s1[0], s1[7]); s2[10] = sub_multiply_shift_and_narrow_s32_dual(s2[14], s2[9], cospi_16_64); s2[13] = add_multiply_shift_and_narrow_s32_dual(s2[9], s2[14], cospi_16_64); s2[11] = sub_multiply_shift_and_narrow_s32_dual(s2[15], s2[8], cospi_16_64); s2[12] = add_multiply_shift_and_narrow_s32_dual(s2[8], s2[15], cospi_16_64); s2[16] = highbd_idct_add_dual(s1[16], s2[23]); s2[17] = highbd_idct_add_dual(s1[17], s2[22]); s2[18] = highbd_idct_add_dual(s1[18], s1[21]); s2[19] = highbd_idct_add_dual(s1[19], s1[20]); s2[20] = highbd_idct_sub_dual(s1[19], s1[20]); s2[21] = highbd_idct_sub_dual(s1[18], s1[21]); s2[22] = highbd_idct_sub_dual(s1[17], s2[22]); s2[23] = highbd_idct_sub_dual(s1[16], s2[23]); s3[24] = highbd_idct_sub_dual(s1[31], s2[24]); s3[25] = highbd_idct_sub_dual(s1[30], s2[25]); s3[26] = highbd_idct_sub_dual(s1[29], s1[26]); s3[27] = highbd_idct_sub_dual(s1[28], s1[27]); s2[28] = highbd_idct_add_dual(s1[27], s1[28]); s2[29] = highbd_idct_add_dual(s1[26], s1[29]); s2[30] = highbd_idct_add_dual(s2[25], s1[30]); s2[31] = highbd_idct_add_dual(s2[24], s1[31]); // stage 7 s1[0] = highbd_idct_add_dual(s2[0], s2[15]); s1[1] = highbd_idct_add_dual(s2[1], s2[14]); s1[2] = highbd_idct_add_dual(s2[2], s2[13]); s1[3] = highbd_idct_add_dual(s2[3], s2[12]); s1[4] = highbd_idct_add_dual(s2[4], s2[11]); s1[5] = highbd_idct_add_dual(s2[5], s2[10]); s1[6] = highbd_idct_add_dual(s2[6], s2[9]); s1[7] = highbd_idct_add_dual(s2[7], s2[8]); s1[8] = highbd_idct_sub_dual(s2[7], s2[8]); s1[9] = highbd_idct_sub_dual(s2[6], s2[9]); s1[10] = highbd_idct_sub_dual(s2[5], s2[10]); s1[11] = highbd_idct_sub_dual(s2[4], s2[11]); s1[12] = highbd_idct_sub_dual(s2[3], s2[12]); s1[13] = highbd_idct_sub_dual(s2[2], s2[13]); s1[14] = highbd_idct_sub_dual(s2[1], s2[14]); s1[15] = highbd_idct_sub_dual(s2[0], s2[15]); s1[20] = sub_multiply_shift_and_narrow_s32_dual(s3[27], s2[20], cospi_16_64); s1[27] = add_multiply_shift_and_narrow_s32_dual(s2[20], s3[27], cospi_16_64); s1[21] = sub_multiply_shift_and_narrow_s32_dual(s3[26], s2[21], cospi_16_64); s1[26] = add_multiply_shift_and_narrow_s32_dual(s2[21], s3[26], cospi_16_64); s1[22] = sub_multiply_shift_and_narrow_s32_dual(s3[25], s2[22], cospi_16_64); s1[25] = add_multiply_shift_and_narrow_s32_dual(s2[22], s3[25], cospi_16_64); s1[23] = sub_multiply_shift_and_narrow_s32_dual(s3[24], s2[23], cospi_16_64); s1[24] = add_multiply_shift_and_narrow_s32_dual(s2[23], s3[24], cospi_16_64); // final stage s3[0] = highbd_idct_add_dual(s1[0], s2[31]); s3[1] = highbd_idct_add_dual(s1[1], s2[30]); s3[2] = highbd_idct_add_dual(s1[2], s2[29]); s3[3] = highbd_idct_add_dual(s1[3], s2[28]); s3[4] = highbd_idct_add_dual(s1[4], s1[27]); s3[5] = highbd_idct_add_dual(s1[5], s1[26]); s3[6] = highbd_idct_add_dual(s1[6], s1[25]); s3[7] = highbd_idct_add_dual(s1[7], s1[24]); s3[8] = highbd_idct_add_dual(s1[8], s1[23]); s3[9] = highbd_idct_add_dual(s1[9], s1[22]); s3[10] = highbd_idct_add_dual(s1[10], s1[21]); s3[11] = highbd_idct_add_dual(s1[11], s1[20]); s3[12] = highbd_idct_add_dual(s1[12], s2[19]); s3[13] = highbd_idct_add_dual(s1[13], s2[18]); s3[14] = highbd_idct_add_dual(s1[14], s2[17]); s3[15] = highbd_idct_add_dual(s1[15], s2[16]); s3[16] = highbd_idct_sub_dual(s1[15], s2[16]); s3[17] = highbd_idct_sub_dual(s1[14], s2[17]); s3[18] = highbd_idct_sub_dual(s1[13], s2[18]); s3[19] = highbd_idct_sub_dual(s1[12], s2[19]); s3[20] = highbd_idct_sub_dual(s1[11], s1[20]); s3[21] = highbd_idct_sub_dual(s1[10], s1[21]); s3[22] = highbd_idct_sub_dual(s1[9], s1[22]); s3[23] = highbd_idct_sub_dual(s1[8], s1[23]); s3[24] = highbd_idct_sub_dual(s1[7], s1[24]); s3[25] = highbd_idct_sub_dual(s1[6], s1[25]); s3[26] = highbd_idct_sub_dual(s1[5], s1[26]); s3[27] = highbd_idct_sub_dual(s1[4], s1[27]); s3[28] = highbd_idct_sub_dual(s1[3], s2[28]); s3[29] = highbd_idct_sub_dual(s1[2], s2[29]); s3[30] = highbd_idct_sub_dual(s1[1], s2[30]); s3[31] = highbd_idct_sub_dual(s1[0], s2[31]); vst1q_s32(output, s3[0].val[0]); output += 4; vst1q_s32(output, s3[0].val[1]); output += 4; vst1q_s32(output, s3[1].val[0]); output += 4; vst1q_s32(output, s3[1].val[1]); output += 4; vst1q_s32(output, s3[2].val[0]); output += 4; vst1q_s32(output, s3[2].val[1]); output += 4; vst1q_s32(output, s3[3].val[0]); output += 4; vst1q_s32(output, s3[3].val[1]); output += 4; vst1q_s32(output, s3[4].val[0]); output += 4; vst1q_s32(output, s3[4].val[1]); output += 4; vst1q_s32(output, s3[5].val[0]); output += 4; vst1q_s32(output, s3[5].val[1]); output += 4; vst1q_s32(output, s3[6].val[0]); output += 4; vst1q_s32(output, s3[6].val[1]); output += 4; vst1q_s32(output, s3[7].val[0]); output += 4; vst1q_s32(output, s3[7].val[1]); output += 4; vst1q_s32(output, s3[8].val[0]); output += 4; vst1q_s32(output, s3[8].val[1]); output += 4; vst1q_s32(output, s3[9].val[0]); output += 4; vst1q_s32(output, s3[9].val[1]); output += 4; vst1q_s32(output, s3[10].val[0]); output += 4; vst1q_s32(output, s3[10].val[1]); output += 4; vst1q_s32(output, s3[11].val[0]); output += 4; vst1q_s32(output, s3[11].val[1]); output += 4; vst1q_s32(output, s3[12].val[0]); output += 4; vst1q_s32(output, s3[12].val[1]); output += 4; vst1q_s32(output, s3[13].val[0]); output += 4; vst1q_s32(output, s3[13].val[1]); output += 4; vst1q_s32(output, s3[14].val[0]); output += 4; vst1q_s32(output, s3[14].val[1]); output += 4; vst1q_s32(output, s3[15].val[0]); output += 4; vst1q_s32(output, s3[15].val[1]); output += 4; vst1q_s32(output, s3[16].val[0]); output += 4; vst1q_s32(output, s3[16].val[1]); output += 4; vst1q_s32(output, s3[17].val[0]); output += 4; vst1q_s32(output, s3[17].val[1]); output += 4; vst1q_s32(output, s3[18].val[0]); output += 4; vst1q_s32(output, s3[18].val[1]); output += 4; vst1q_s32(output, s3[19].val[0]); output += 4; vst1q_s32(output, s3[19].val[1]); output += 4; vst1q_s32(output, s3[20].val[0]); output += 4; vst1q_s32(output, s3[20].val[1]); output += 4; vst1q_s32(output, s3[21].val[0]); output += 4; vst1q_s32(output, s3[21].val[1]); output += 4; vst1q_s32(output, s3[22].val[0]); output += 4; vst1q_s32(output, s3[22].val[1]); output += 4; vst1q_s32(output, s3[23].val[0]); output += 4; vst1q_s32(output, s3[23].val[1]); output += 4; vst1q_s32(output, s3[24].val[0]); output += 4; vst1q_s32(output, s3[24].val[1]); output += 4; vst1q_s32(output, s3[25].val[0]); output += 4; vst1q_s32(output, s3[25].val[1]); output += 4; vst1q_s32(output, s3[26].val[0]); output += 4; vst1q_s32(output, s3[26].val[1]); output += 4; vst1q_s32(output, s3[27].val[0]); output += 4; vst1q_s32(output, s3[27].val[1]); output += 4; vst1q_s32(output, s3[28].val[0]); output += 4; vst1q_s32(output, s3[28].val[1]); output += 4; vst1q_s32(output, s3[29].val[0]); output += 4; vst1q_s32(output, s3[29].val[1]); output += 4; vst1q_s32(output, s3[30].val[0]); output += 4; vst1q_s32(output, s3[30].val[1]); output += 4; vst1q_s32(output, s3[31].val[0]); output += 4; vst1q_s32(output, s3[31].val[1]); } static void vpx_highbd_idct32_8_neon(const int32_t *input, uint16_t *output, int stride, const int bd) { int32x4x2_t in[8], s1[32], s2[32], s3[32], out[32]; load_and_transpose_s32_8x8(input, 8, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 s1[16] = multiply_shift_and_narrow_s32_dual(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s32_dual(in[1], cospi_1_64); // Different for _8_ s1[19] = multiply_shift_and_narrow_s32_dual(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s32_dual(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s32_dual(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s32_dual(in[5], cospi_5_64); s1[23] = multiply_shift_and_narrow_s32_dual(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s32_dual(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s32_dual(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s32_dual(in[2], cospi_2_64); s2[11] = multiply_shift_and_narrow_s32_dual(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s32_dual(in[6], cospi_6_64); // stage 3 s1[4] = multiply_shift_and_narrow_s32_dual(in[4], cospi_28_64); s1[7] = multiply_shift_and_narrow_s32_dual(in[4], cospi_4_64); s1[17] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], -cospi_4_64, s1[31], cospi_28_64); s1[30] = multiply_accumulate_shift_and_narrow_s32_dual(s1[16], cospi_28_64, s1[31], cospi_4_64); // Different for _8_ s1[18] = multiply_accumulate_shift_and_narrow_s32_dual(s1[19], -cospi_28_64, s1[28], -cospi_4_64); s1[29] = multiply_accumulate_shift_and_narrow_s32_dual(s1[19], -cospi_4_64, s1[28], cospi_28_64); s1[21] = multiply_accumulate_shift_and_narrow_s32_dual(s1[20], -cospi_20_64, s1[27], cospi_12_64); s1[26] = multiply_accumulate_shift_and_narrow_s32_dual(s1[20], cospi_12_64, s1[27], cospi_20_64); s1[22] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s1[25] = multiply_accumulate_shift_and_narrow_s32_dual(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s1[0] = multiply_shift_and_narrow_s32_dual(in[0], cospi_16_64); s2[9] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], -cospi_8_64, s2[15], cospi_24_64); s2[14] = multiply_accumulate_shift_and_narrow_s32_dual(s2[8], cospi_24_64, s2[15], cospi_8_64); s2[10] = multiply_accumulate_shift_and_narrow_s32_dual(s2[11], -cospi_24_64, s2[12], -cospi_8_64); s2[13] = multiply_accumulate_shift_and_narrow_s32_dual(s2[11], -cospi_8_64, s2[12], cospi_24_64); s2[16] = highbd_idct_add_dual(s1[16], s1[19]); s2[17] = highbd_idct_add_dual(s1[17], s1[18]); s2[18] = highbd_idct_sub_dual(s1[17], s1[18]); s2[19] = highbd_idct_sub_dual(s1[16], s1[19]); s2[20] = highbd_idct_sub_dual(s1[23], s1[20]); s2[21] = highbd_idct_sub_dual(s1[22], s1[21]); s2[22] = highbd_idct_add_dual(s1[21], s1[22]); s2[23] = highbd_idct_add_dual(s1[20], s1[23]); s2[24] = highbd_idct_add_dual(s1[24], s1[27]); s2[25] = highbd_idct_add_dual(s1[25], s1[26]); s2[26] = highbd_idct_sub_dual(s1[25], s1[26]); s2[27] = highbd_idct_sub_dual(s1[24], s1[27]); s2[28] = highbd_idct_sub_dual(s1[31], s1[28]); s2[29] = highbd_idct_sub_dual(s1[30], s1[29]); s2[30] = highbd_idct_add_dual(s1[29], s1[30]); s2[31] = highbd_idct_add_dual(s1[28], s1[31]); // stage 5 s1[5] = sub_multiply_shift_and_narrow_s32_dual(s1[7], s1[4], cospi_16_64); s1[6] = add_multiply_shift_and_narrow_s32_dual(s1[4], s1[7], cospi_16_64); s1[8] = highbd_idct_add_dual(s2[8], s2[11]); s1[9] = highbd_idct_add_dual(s2[9], s2[10]); s1[10] = highbd_idct_sub_dual(s2[9], s2[10]); s1[11] = highbd_idct_sub_dual(s2[8], s2[11]); s1[12] = highbd_idct_sub_dual(s2[15], s2[12]); s1[13] = highbd_idct_sub_dual(s2[14], s2[13]); s1[14] = highbd_idct_add_dual(s2[13], s2[14]); s1[15] = highbd_idct_add_dual(s2[12], s2[15]); s1[18] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], -cospi_8_64, s2[29], cospi_24_64); s1[29] = multiply_accumulate_shift_and_narrow_s32_dual(s2[18], cospi_24_64, s2[29], cospi_8_64); s1[19] = multiply_accumulate_shift_and_narrow_s32_dual(s2[19], -cospi_8_64, s2[28], cospi_24_64); s1[28] = multiply_accumulate_shift_and_narrow_s32_dual(s2[19], cospi_24_64, s2[28], cospi_8_64); s1[20] = multiply_accumulate_shift_and_narrow_s32_dual(s2[20], -cospi_24_64, s2[27], -cospi_8_64); s1[27] = multiply_accumulate_shift_and_narrow_s32_dual(s2[20], -cospi_8_64, s2[27], cospi_24_64); s1[21] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_24_64, s2[26], -cospi_8_64); s1[26] = multiply_accumulate_shift_and_narrow_s32_dual(s2[21], -cospi_8_64, s2[26], cospi_24_64); // stage 6 s2[0] = highbd_idct_add_dual(s1[0], s1[7]); s2[1] = highbd_idct_add_dual(s1[0], s1[6]); s2[2] = highbd_idct_add_dual(s1[0], s1[5]); s2[3] = highbd_idct_add_dual(s1[0], s1[4]); s2[4] = highbd_idct_sub_dual(s1[0], s1[4]); s2[5] = highbd_idct_sub_dual(s1[0], s1[5]); s2[6] = highbd_idct_sub_dual(s1[0], s1[6]); s2[7] = highbd_idct_sub_dual(s1[0], s1[7]); s2[10] = sub_multiply_shift_and_narrow_s32_dual(s1[13], s1[10], cospi_16_64); s2[13] = add_multiply_shift_and_narrow_s32_dual(s1[10], s1[13], cospi_16_64); s2[11] = sub_multiply_shift_and_narrow_s32_dual(s1[12], s1[11], cospi_16_64); s2[12] = add_multiply_shift_and_narrow_s32_dual(s1[11], s1[12], cospi_16_64); s1[16] = highbd_idct_add_dual(s2[16], s2[23]); s1[17] = highbd_idct_add_dual(s2[17], s2[22]); s2[18] = highbd_idct_add_dual(s1[18], s1[21]); s2[19] = highbd_idct_add_dual(s1[19], s1[20]); s2[20] = highbd_idct_sub_dual(s1[19], s1[20]); s2[21] = highbd_idct_sub_dual(s1[18], s1[21]); s1[22] = highbd_idct_sub_dual(s2[17], s2[22]); s1[23] = highbd_idct_sub_dual(s2[16], s2[23]); s3[24] = highbd_idct_sub_dual(s2[31], s2[24]); s3[25] = highbd_idct_sub_dual(s2[30], s2[25]); s3[26] = highbd_idct_sub_dual(s1[29], s1[26]); s3[27] = highbd_idct_sub_dual(s1[28], s1[27]); s2[28] = highbd_idct_add_dual(s1[27], s1[28]); s2[29] = highbd_idct_add_dual(s1[26], s1[29]); s2[30] = highbd_idct_add_dual(s2[25], s2[30]); s2[31] = highbd_idct_add_dual(s2[24], s2[31]); // stage 7 s1[0] = highbd_idct_add_dual(s2[0], s1[15]); s1[1] = highbd_idct_add_dual(s2[1], s1[14]); s1[2] = highbd_idct_add_dual(s2[2], s2[13]); s1[3] = highbd_idct_add_dual(s2[3], s2[12]); s1[4] = highbd_idct_add_dual(s2[4], s2[11]); s1[5] = highbd_idct_add_dual(s2[5], s2[10]); s1[6] = highbd_idct_add_dual(s2[6], s1[9]); s1[7] = highbd_idct_add_dual(s2[7], s1[8]); s1[8] = highbd_idct_sub_dual(s2[7], s1[8]); s1[9] = highbd_idct_sub_dual(s2[6], s1[9]); s1[10] = highbd_idct_sub_dual(s2[5], s2[10]); s1[11] = highbd_idct_sub_dual(s2[4], s2[11]); s1[12] = highbd_idct_sub_dual(s2[3], s2[12]); s1[13] = highbd_idct_sub_dual(s2[2], s2[13]); s1[14] = highbd_idct_sub_dual(s2[1], s1[14]); s1[15] = highbd_idct_sub_dual(s2[0], s1[15]); s1[20] = sub_multiply_shift_and_narrow_s32_dual(s3[27], s2[20], cospi_16_64); s1[27] = add_multiply_shift_and_narrow_s32_dual(s2[20], s3[27], cospi_16_64); s1[21] = sub_multiply_shift_and_narrow_s32_dual(s3[26], s2[21], cospi_16_64); s1[26] = add_multiply_shift_and_narrow_s32_dual(s2[21], s3[26], cospi_16_64); s2[22] = sub_multiply_shift_and_narrow_s32_dual(s3[25], s1[22], cospi_16_64); s1[25] = add_multiply_shift_and_narrow_s32_dual(s1[22], s3[25], cospi_16_64); s2[23] = sub_multiply_shift_and_narrow_s32_dual(s3[24], s1[23], cospi_16_64); s1[24] = add_multiply_shift_and_narrow_s32_dual(s1[23], s3[24], cospi_16_64); // final stage out[0] = highbd_idct_add_dual(s1[0], s2[31]); out[1] = highbd_idct_add_dual(s1[1], s2[30]); out[2] = highbd_idct_add_dual(s1[2], s2[29]); out[3] = highbd_idct_add_dual(s1[3], s2[28]); out[4] = highbd_idct_add_dual(s1[4], s1[27]); out[5] = highbd_idct_add_dual(s1[5], s1[26]); out[6] = highbd_idct_add_dual(s1[6], s1[25]); out[7] = highbd_idct_add_dual(s1[7], s1[24]); out[8] = highbd_idct_add_dual(s1[8], s2[23]); out[9] = highbd_idct_add_dual(s1[9], s2[22]); out[10] = highbd_idct_add_dual(s1[10], s1[21]); out[11] = highbd_idct_add_dual(s1[11], s1[20]); out[12] = highbd_idct_add_dual(s1[12], s2[19]); out[13] = highbd_idct_add_dual(s1[13], s2[18]); out[14] = highbd_idct_add_dual(s1[14], s1[17]); out[15] = highbd_idct_add_dual(s1[15], s1[16]); out[16] = highbd_idct_sub_dual(s1[15], s1[16]); out[17] = highbd_idct_sub_dual(s1[14], s1[17]); out[18] = highbd_idct_sub_dual(s1[13], s2[18]); out[19] = highbd_idct_sub_dual(s1[12], s2[19]); out[20] = highbd_idct_sub_dual(s1[11], s1[20]); out[21] = highbd_idct_sub_dual(s1[10], s1[21]); out[22] = highbd_idct_sub_dual(s1[9], s2[22]); out[23] = highbd_idct_sub_dual(s1[8], s2[23]); out[24] = highbd_idct_sub_dual(s1[7], s1[24]); out[25] = highbd_idct_sub_dual(s1[6], s1[25]); out[26] = highbd_idct_sub_dual(s1[5], s1[26]); out[27] = highbd_idct_sub_dual(s1[4], s1[27]); out[28] = highbd_idct_sub_dual(s1[3], s2[28]); out[29] = highbd_idct_sub_dual(s1[2], s2[29]); out[30] = highbd_idct_sub_dual(s1[1], s2[30]); out[31] = highbd_idct_sub_dual(s1[0], s2[31]); highbd_idct16x16_add_store(out, output, stride, bd); highbd_idct16x16_add_store(out + 16, output + 16 * stride, stride, bd); } void vpx_highbd_idct32x32_34_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; if (bd == 8) { int16_t temp[32 * 8]; int16_t *t = temp; vpx_idct32_6_neon(input, t); for (i = 0; i < 32; i += 8) { vpx_idct32_8_neon(t, dest, stride, 1); t += (8 * 8); dest += 8; } } else { int32_t temp[32 * 8]; int32_t *t = temp; vpx_highbd_idct32_6_neon(input, t); for (i = 0; i < 32; i += 8) { vpx_highbd_idct32_8_neon(t, dest, stride, bd); t += (8 * 8); dest += 8; } } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct32x32_add_neon.c000066400000000000000000000072601357355204000222260ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void highbd_idct32x32_1_add_pos_kernel(uint16_t **dest, const int stride, const int16x8_t res, const int16x8_t max) { const uint16x8_t a0 = vld1q_u16(*dest); const uint16x8_t a1 = vld1q_u16(*dest + 8); const uint16x8_t a2 = vld1q_u16(*dest + 16); const uint16x8_t a3 = vld1q_u16(*dest + 24); const int16x8_t b0 = vaddq_s16(res, vreinterpretq_s16_u16(a0)); const int16x8_t b1 = vaddq_s16(res, vreinterpretq_s16_u16(a1)); const int16x8_t b2 = vaddq_s16(res, vreinterpretq_s16_u16(a2)); const int16x8_t b3 = vaddq_s16(res, vreinterpretq_s16_u16(a3)); const int16x8_t c0 = vminq_s16(b0, max); const int16x8_t c1 = vminq_s16(b1, max); const int16x8_t c2 = vminq_s16(b2, max); const int16x8_t c3 = vminq_s16(b3, max); vst1q_u16(*dest, vreinterpretq_u16_s16(c0)); vst1q_u16(*dest + 8, vreinterpretq_u16_s16(c1)); vst1q_u16(*dest + 16, vreinterpretq_u16_s16(c2)); vst1q_u16(*dest + 24, vreinterpretq_u16_s16(c3)); *dest += stride; } static INLINE void highbd_idct32x32_1_add_neg_kernel(uint16_t **dest, const int stride, const int16x8_t res) { const uint16x8_t a0 = vld1q_u16(*dest); const uint16x8_t a1 = vld1q_u16(*dest + 8); const uint16x8_t a2 = vld1q_u16(*dest + 16); const uint16x8_t a3 = vld1q_u16(*dest + 24); const int16x8_t b0 = vaddq_s16(res, vreinterpretq_s16_u16(a0)); const int16x8_t b1 = vaddq_s16(res, vreinterpretq_s16_u16(a1)); const int16x8_t b2 = vaddq_s16(res, vreinterpretq_s16_u16(a2)); const int16x8_t b3 = vaddq_s16(res, vreinterpretq_s16_u16(a3)); const uint16x8_t c0 = vqshluq_n_s16(b0, 0); const uint16x8_t c1 = vqshluq_n_s16(b1, 0); const uint16x8_t c2 = vqshluq_n_s16(b2, 0); const uint16x8_t c3 = vqshluq_n_s16(b3, 0); vst1q_u16(*dest, c0); vst1q_u16(*dest + 8, c1); vst1q_u16(*dest + 16, c2); vst1q_u16(*dest + 24, c3); *dest += stride; } void vpx_highbd_idct32x32_1_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const tran_low_t out0 = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); const tran_low_t out1 = HIGHBD_WRAPLOW( dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); const int16x8_t dc = vdupq_n_s16(a1); int i; if (a1 >= 0) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); for (i = 0; i < 8; ++i) { highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct32x32_1_add_pos_kernel(&dest, stride, dc, max); } } else { for (i = 0; i < 8; ++i) { highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc); highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc); highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc); highbd_idct32x32_1_add_neg_kernel(&dest, stride, dc); } } } libvpx-1.8.2/vpx_dsp/arm/highbd_idct4x4_add_neon.c000066400000000000000000000062331357355204000220630ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" // res is in reverse row order static INLINE void highbd_idct4x4_1_add_kernel2(uint16_t **dest, const int stride, const int16x8_t res, const int16x8_t max) { const uint16x4_t a0 = vld1_u16(*dest); const uint16x4_t a1 = vld1_u16(*dest + stride); const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a1, a0)); // Note: In some profile tests, res is quite close to +/-32767. // We use saturating addition. const int16x8_t b = vqaddq_s16(res, a); const int16x8_t c = vminq_s16(b, max); const uint16x8_t d = vqshluq_n_s16(c, 0); vst1_u16(*dest, vget_high_u16(d)); *dest += stride; vst1_u16(*dest, vget_low_u16(d)); *dest += stride; } void vpx_highbd_idct4x4_1_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); const tran_low_t out0 = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); const tran_low_t out1 = HIGHBD_WRAPLOW( dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4); const int16x8_t dc = vdupq_n_s16(a1); highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max); highbd_idct4x4_1_add_kernel1(&dest, stride, dc, max); } void vpx_highbd_idct4x4_16_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); int16x8_t a[2]; int32x4_t c[4]; c[0] = vld1q_s32(input); c[1] = vld1q_s32(input + 4); c[2] = vld1q_s32(input + 8); c[3] = vld1q_s32(input + 12); if (bd == 8) { // Rows a[0] = vcombine_s16(vmovn_s32(c[0]), vmovn_s32(c[1])); a[1] = vcombine_s16(vmovn_s32(c[2]), vmovn_s32(c[3])); transpose_idct4x4_16_bd8(a); // Columns a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_idct4x4_16_bd8(a); a[0] = vrshrq_n_s16(a[0], 4); a[1] = vrshrq_n_s16(a[1], 4); } else { const int32x4_t cospis = vld1q_s32(kCospi32); if (bd == 10) { idct4x4_16_kernel_bd10(cospis, c); idct4x4_16_kernel_bd10(cospis, c); } else { idct4x4_16_kernel_bd12(cospis, c); idct4x4_16_kernel_bd12(cospis, c); } a[0] = vcombine_s16(vqrshrn_n_s32(c[0], 4), vqrshrn_n_s32(c[1], 4)); a[1] = vcombine_s16(vqrshrn_n_s32(c[3], 4), vqrshrn_n_s32(c[2], 4)); } highbd_idct4x4_1_add_kernel1(&dest, stride, a[0], max); highbd_idct4x4_1_add_kernel2(&dest, stride, a[1], max); } libvpx-1.8.2/vpx_dsp/arm/highbd_idct8x8_add_neon.c000066400000000000000000000370461357355204000221010ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/highbd_idct_neon.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void highbd_idct8x8_1_add_pos_kernel(uint16_t **dest, const int stride, const int16x8_t res, const int16x8_t max) { const uint16x8_t a = vld1q_u16(*dest); const int16x8_t b = vaddq_s16(res, vreinterpretq_s16_u16(a)); const int16x8_t c = vminq_s16(b, max); vst1q_u16(*dest, vreinterpretq_u16_s16(c)); *dest += stride; } static INLINE void highbd_idct8x8_1_add_neg_kernel(uint16_t **dest, const int stride, const int16x8_t res) { const uint16x8_t a = vld1q_u16(*dest); const int16x8_t b = vaddq_s16(res, vreinterpretq_s16_u16(a)); const uint16x8_t c = vqshluq_n_s16(b, 0); vst1q_u16(*dest, c); *dest += stride; } void vpx_highbd_idct8x8_1_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const tran_low_t out0 = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); const tran_low_t out1 = HIGHBD_WRAPLOW( dct_const_round_shift(out0 * (tran_high_t)cospi_16_64), bd); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5); const int16x8_t dc = vdupq_n_s16(a1); if (a1 >= 0) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); highbd_idct8x8_1_add_pos_kernel(&dest, stride, dc, max); } else { highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); highbd_idct8x8_1_add_neg_kernel(&dest, stride, dc); } } static INLINE void idct8x8_12_half1d_bd10( const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { int32x4_t step1[8], step2[8]; transpose_s32_4x4(io0, io1, io2, io3); // stage 1 step1[4] = vmulq_lane_s32(*io1, vget_high_s32(cospis1), 1); step1[5] = vmulq_lane_s32(*io3, vget_high_s32(cospis1), 0); step1[6] = vmulq_lane_s32(*io3, vget_low_s32(cospis1), 1); step1[7] = vmulq_lane_s32(*io1, vget_low_s32(cospis1), 0); step1[4] = vrshrq_n_s32(step1[4], DCT_CONST_BITS); step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); step1[7] = vrshrq_n_s32(step1[7], DCT_CONST_BITS); // stage 2 step2[1] = vmulq_lane_s32(*io0, vget_high_s32(cospis0), 0); step2[2] = vmulq_lane_s32(*io2, vget_high_s32(cospis0), 1); step2[3] = vmulq_lane_s32(*io2, vget_low_s32(cospis0), 1); step2[1] = vrshrq_n_s32(step2[1], DCT_CONST_BITS); step2[2] = vrshrq_n_s32(step2[2], DCT_CONST_BITS); step2[3] = vrshrq_n_s32(step2[3], DCT_CONST_BITS); step2[4] = vaddq_s32(step1[4], step1[5]); step2[5] = vsubq_s32(step1[4], step1[5]); step2[6] = vsubq_s32(step1[7], step1[6]); step2[7] = vaddq_s32(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s32(step2[1], step2[3]); step1[1] = vaddq_s32(step2[1], step2[2]); step1[2] = vsubq_s32(step2[1], step2[2]); step1[3] = vsubq_s32(step2[1], step2[3]); step1[6] = vmulq_lane_s32(step2[6], vget_high_s32(cospis0), 0); step1[5] = vmlsq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); step1[6] = vmlaq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); // stage 4 *io0 = vaddq_s32(step1[0], step2[7]); *io1 = vaddq_s32(step1[1], step1[6]); *io2 = vaddq_s32(step1[2], step1[5]); *io3 = vaddq_s32(step1[3], step2[4]); *io4 = vsubq_s32(step1[3], step2[4]); *io5 = vsubq_s32(step1[2], step1[5]); *io6 = vsubq_s32(step1[1], step1[6]); *io7 = vsubq_s32(step1[0], step2[7]); } static INLINE void idct8x8_12_half1d_bd12( const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { int32x2_t input1l, input1h, input3l, input3h; int32x2_t step1l[2], step1h[2]; int32x4_t step1[8], step2[8]; int64x2_t t64[8]; int32x2_t t32[8]; transpose_s32_4x4(io0, io1, io2, io3); // stage 1 input1l = vget_low_s32(*io1); input1h = vget_high_s32(*io1); input3l = vget_low_s32(*io3); input3h = vget_high_s32(*io3); step1l[0] = vget_low_s32(*io0); step1h[0] = vget_high_s32(*io0); step1l[1] = vget_low_s32(*io2); step1h[1] = vget_high_s32(*io2); t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1); t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1); t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0); t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0); t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1); t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1); t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0); t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); step1[4] = vcombine_s32(t32[0], t32[1]); step1[5] = vcombine_s32(t32[2], t32[3]); step1[6] = vcombine_s32(t32[4], t32[5]); step1[7] = vcombine_s32(t32[6], t32[7]); // stage 2 t64[2] = vmull_lane_s32(step1l[0], vget_high_s32(cospis0), 0); t64[3] = vmull_lane_s32(step1h[0], vget_high_s32(cospis0), 0); t64[4] = vmull_lane_s32(step1l[1], vget_high_s32(cospis0), 1); t64[5] = vmull_lane_s32(step1h[1], vget_high_s32(cospis0), 1); t64[6] = vmull_lane_s32(step1l[1], vget_low_s32(cospis0), 1); t64[7] = vmull_lane_s32(step1h[1], vget_low_s32(cospis0), 1); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); step2[1] = vcombine_s32(t32[2], t32[3]); step2[2] = vcombine_s32(t32[4], t32[5]); step2[3] = vcombine_s32(t32[6], t32[7]); step2[4] = vaddq_s32(step1[4], step1[5]); step2[5] = vsubq_s32(step1[4], step1[5]); step2[6] = vsubq_s32(step1[7], step1[6]); step2[7] = vaddq_s32(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s32(step2[1], step2[3]); step1[1] = vaddq_s32(step2[1], step2[2]); step1[2] = vsubq_s32(step2[1], step2[2]); step1[3] = vsubq_s32(step2[1], step2[3]); t64[2] = vmull_lane_s32(vget_low_s32(step2[6]), vget_high_s32(cospis0), 0); t64[3] = vmull_lane_s32(vget_high_s32(step2[6]), vget_high_s32(cospis0), 0); t64[0] = vmlsl_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); t64[1] = vmlsl_lane_s32(t64[3], vget_high_s32(step2[5]), vget_high_s32(cospis0), 0); t64[2] = vmlal_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); t64[3] = vmlal_lane_s32(t64[3], vget_high_s32(step2[5]), vget_high_s32(cospis0), 0); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); step1[5] = vcombine_s32(t32[0], t32[1]); step1[6] = vcombine_s32(t32[2], t32[3]); // stage 4 *io0 = vaddq_s32(step1[0], step2[7]); *io1 = vaddq_s32(step1[1], step1[6]); *io2 = vaddq_s32(step1[2], step1[5]); *io3 = vaddq_s32(step1[3], step2[4]); *io4 = vsubq_s32(step1[3], step2[4]); *io5 = vsubq_s32(step1[2], step1[5]); *io6 = vsubq_s32(step1[1], step1[6]); *io7 = vsubq_s32(step1[0], step2[7]); } void vpx_highbd_idct8x8_12_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int32x4_t a[16]; int16x8_t c[8]; a[0] = vld1q_s32(input); a[1] = vld1q_s32(input + 8); a[2] = vld1q_s32(input + 16); a[3] = vld1q_s32(input + 24); if (bd == 8) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x8_t cospisd = vaddq_s16(cospis, cospis); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24 const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28 int16x4_t b[8]; b[0] = vmovn_s32(a[0]); b[1] = vmovn_s32(a[1]); b[2] = vmovn_s32(a[2]); b[3] = vmovn_s32(a[3]); idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, b); idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, b, c); c[0] = vrshrq_n_s16(c[0], 5); c[1] = vrshrq_n_s16(c[1], 5); c[2] = vrshrq_n_s16(c[2], 5); c[3] = vrshrq_n_s16(c[3], 5); c[4] = vrshrq_n_s16(c[4], 5); c[5] = vrshrq_n_s16(c[5], 5); c[6] = vrshrq_n_s16(c[6], 5); c[7] = vrshrq_n_s16(c[7], 5); } else { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 if (bd == 10) { idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_12_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[8], &a[9], &a[10], &a[11]); idct8x8_12_half1d_bd10(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7], &a[12], &a[13], &a[14], &a[15]); } else { idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_12_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[8], &a[9], &a[10], &a[11]); idct8x8_12_half1d_bd12(cospis0, cospis1, &a[4], &a[5], &a[6], &a[7], &a[12], &a[13], &a[14], &a[15]); } c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); c[1] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); c[2] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); c[3] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); c[4] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); c[5] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); c[6] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); } highbd_add8x8(c, dest, stride, bd); } void vpx_highbd_idct8x8_64_add_neon(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int32x4_t a[16]; int16x8_t c[8]; a[0] = vld1q_s32(input); a[1] = vld1q_s32(input + 4); a[2] = vld1q_s32(input + 8); a[3] = vld1q_s32(input + 12); a[4] = vld1q_s32(input + 16); a[5] = vld1q_s32(input + 20); a[6] = vld1q_s32(input + 24); a[7] = vld1q_s32(input + 28); a[8] = vld1q_s32(input + 32); a[9] = vld1q_s32(input + 36); a[10] = vld1q_s32(input + 40); a[11] = vld1q_s32(input + 44); a[12] = vld1q_s32(input + 48); a[13] = vld1q_s32(input + 52); a[14] = vld1q_s32(input + 56); a[15] = vld1q_s32(input + 60); if (bd == 8) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 int16x8_t b[8]; b[0] = vcombine_s16(vmovn_s32(a[0]), vmovn_s32(a[1])); b[1] = vcombine_s16(vmovn_s32(a[2]), vmovn_s32(a[3])); b[2] = vcombine_s16(vmovn_s32(a[4]), vmovn_s32(a[5])); b[3] = vcombine_s16(vmovn_s32(a[6]), vmovn_s32(a[7])); b[4] = vcombine_s16(vmovn_s32(a[8]), vmovn_s32(a[9])); b[5] = vcombine_s16(vmovn_s32(a[10]), vmovn_s32(a[11])); b[6] = vcombine_s16(vmovn_s32(a[12]), vmovn_s32(a[13])); b[7] = vcombine_s16(vmovn_s32(a[14]), vmovn_s32(a[15])); idct8x8_64_1d_bd8(cospis0, cospis1, b); idct8x8_64_1d_bd8(cospis0, cospis1, b); c[0] = vrshrq_n_s16(b[0], 5); c[1] = vrshrq_n_s16(b[1], 5); c[2] = vrshrq_n_s16(b[2], 5); c[3] = vrshrq_n_s16(b[3], 5); c[4] = vrshrq_n_s16(b[4], 5); c[5] = vrshrq_n_s16(b[5], 5); c[6] = vrshrq_n_s16(b[6], 5); c[7] = vrshrq_n_s16(b[7], 5); } else { const int32x4_t cospis0 = vld1q_s32(kCospi32); // cospi 0, 8, 16, 24 const int32x4_t cospis1 = vld1q_s32(kCospi32 + 4); // cospi 4, 12, 20, 28 if (bd == 10) { idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); idct8x8_64_half1d_bd10(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); } else { idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[1], &a[2], &a[3], &a[4], &a[5], &a[6], &a[7]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[8], &a[9], &a[10], &a[11], &a[12], &a[13], &a[14], &a[15]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[0], &a[8], &a[1], &a[9], &a[2], &a[10], &a[3], &a[11]); idct8x8_64_half1d_bd12(cospis0, cospis1, &a[4], &a[12], &a[5], &a[13], &a[6], &a[14], &a[7], &a[15]); } c[0] = vcombine_s16(vrshrn_n_s32(a[0], 5), vrshrn_n_s32(a[4], 5)); c[1] = vcombine_s16(vrshrn_n_s32(a[8], 5), vrshrn_n_s32(a[12], 5)); c[2] = vcombine_s16(vrshrn_n_s32(a[1], 5), vrshrn_n_s32(a[5], 5)); c[3] = vcombine_s16(vrshrn_n_s32(a[9], 5), vrshrn_n_s32(a[13], 5)); c[4] = vcombine_s16(vrshrn_n_s32(a[2], 5), vrshrn_n_s32(a[6], 5)); c[5] = vcombine_s16(vrshrn_n_s32(a[10], 5), vrshrn_n_s32(a[14], 5)); c[6] = vcombine_s16(vrshrn_n_s32(a[3], 5), vrshrn_n_s32(a[7], 5)); c[7] = vcombine_s16(vrshrn_n_s32(a[11], 5), vrshrn_n_s32(a[15], 5)); } highbd_add8x8(c, dest, stride, bd); } libvpx-1.8.2/vpx_dsp/arm/highbd_idct_neon.h000066400000000000000000000465301357355204000207240ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ #define VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void highbd_idct4x4_1_add_kernel1(uint16_t **dest, const int stride, const int16x8_t res, const int16x8_t max) { const uint16x4_t a0 = vld1_u16(*dest); const uint16x4_t a1 = vld1_u16(*dest + stride); const int16x8_t a = vreinterpretq_s16_u16(vcombine_u16(a0, a1)); // Note: In some profile tests, res is quite close to +/-32767. // We use saturating addition. const int16x8_t b = vqaddq_s16(res, a); const int16x8_t c = vminq_s16(b, max); const uint16x8_t d = vqshluq_n_s16(c, 0); vst1_u16(*dest, vget_low_u16(d)); *dest += stride; vst1_u16(*dest, vget_high_u16(d)); *dest += stride; } static INLINE void idct4x4_16_kernel_bd10(const int32x4_t cospis, int32x4_t *const a) { int32x4_t b0, b1, b2, b3; transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]); b0 = vaddq_s32(a[0], a[2]); b1 = vsubq_s32(a[0], a[2]); b0 = vmulq_lane_s32(b0, vget_high_s32(cospis), 0); b1 = vmulq_lane_s32(b1, vget_high_s32(cospis), 0); b2 = vmulq_lane_s32(a[1], vget_high_s32(cospis), 1); b3 = vmulq_lane_s32(a[1], vget_low_s32(cospis), 1); b2 = vmlsq_lane_s32(b2, a[3], vget_low_s32(cospis), 1); b3 = vmlaq_lane_s32(b3, a[3], vget_high_s32(cospis), 1); b0 = vrshrq_n_s32(b0, DCT_CONST_BITS); b1 = vrshrq_n_s32(b1, DCT_CONST_BITS); b2 = vrshrq_n_s32(b2, DCT_CONST_BITS); b3 = vrshrq_n_s32(b3, DCT_CONST_BITS); a[0] = vaddq_s32(b0, b3); a[1] = vaddq_s32(b1, b2); a[2] = vsubq_s32(b1, b2); a[3] = vsubq_s32(b0, b3); } static INLINE void idct4x4_16_kernel_bd12(const int32x4_t cospis, int32x4_t *const a) { int32x4_t b0, b1, b2, b3; int64x2_t c[12]; transpose_s32_4x4(&a[0], &a[1], &a[2], &a[3]); b0 = vaddq_s32(a[0], a[2]); b1 = vsubq_s32(a[0], a[2]); c[0] = vmull_lane_s32(vget_low_s32(b0), vget_high_s32(cospis), 0); c[1] = vmull_lane_s32(vget_high_s32(b0), vget_high_s32(cospis), 0); c[2] = vmull_lane_s32(vget_low_s32(b1), vget_high_s32(cospis), 0); c[3] = vmull_lane_s32(vget_high_s32(b1), vget_high_s32(cospis), 0); c[4] = vmull_lane_s32(vget_low_s32(a[1]), vget_high_s32(cospis), 1); c[5] = vmull_lane_s32(vget_high_s32(a[1]), vget_high_s32(cospis), 1); c[6] = vmull_lane_s32(vget_low_s32(a[1]), vget_low_s32(cospis), 1); c[7] = vmull_lane_s32(vget_high_s32(a[1]), vget_low_s32(cospis), 1); c[8] = vmull_lane_s32(vget_low_s32(a[3]), vget_low_s32(cospis), 1); c[9] = vmull_lane_s32(vget_high_s32(a[3]), vget_low_s32(cospis), 1); c[10] = vmull_lane_s32(vget_low_s32(a[3]), vget_high_s32(cospis), 1); c[11] = vmull_lane_s32(vget_high_s32(a[3]), vget_high_s32(cospis), 1); c[4] = vsubq_s64(c[4], c[8]); c[5] = vsubq_s64(c[5], c[9]); c[6] = vaddq_s64(c[6], c[10]); c[7] = vaddq_s64(c[7], c[11]); b0 = vcombine_s32(vrshrn_n_s64(c[0], DCT_CONST_BITS), vrshrn_n_s64(c[1], DCT_CONST_BITS)); b1 = vcombine_s32(vrshrn_n_s64(c[2], DCT_CONST_BITS), vrshrn_n_s64(c[3], DCT_CONST_BITS)); b2 = vcombine_s32(vrshrn_n_s64(c[4], DCT_CONST_BITS), vrshrn_n_s64(c[5], DCT_CONST_BITS)); b3 = vcombine_s32(vrshrn_n_s64(c[6], DCT_CONST_BITS), vrshrn_n_s64(c[7], DCT_CONST_BITS)); a[0] = vaddq_s32(b0, b3); a[1] = vaddq_s32(b1, b2); a[2] = vsubq_s32(b1, b2); a[3] = vsubq_s32(b0, b3); } static INLINE void highbd_add8x8(int16x8_t *const a, uint16_t *dest, const int stride, const int bd) { const int16x8_t max = vdupq_n_s16((1 << bd) - 1); const uint16_t *dst = dest; uint16x8_t d0, d1, d2, d3, d4, d5, d6, d7; uint16x8_t d0_u16, d1_u16, d2_u16, d3_u16, d4_u16, d5_u16, d6_u16, d7_u16; int16x8_t d0_s16, d1_s16, d2_s16, d3_s16, d4_s16, d5_s16, d6_s16, d7_s16; d0 = vld1q_u16(dst); dst += stride; d1 = vld1q_u16(dst); dst += stride; d2 = vld1q_u16(dst); dst += stride; d3 = vld1q_u16(dst); dst += stride; d4 = vld1q_u16(dst); dst += stride; d5 = vld1q_u16(dst); dst += stride; d6 = vld1q_u16(dst); dst += stride; d7 = vld1q_u16(dst); d0_s16 = vqaddq_s16(a[0], vreinterpretq_s16_u16(d0)); d1_s16 = vqaddq_s16(a[1], vreinterpretq_s16_u16(d1)); d2_s16 = vqaddq_s16(a[2], vreinterpretq_s16_u16(d2)); d3_s16 = vqaddq_s16(a[3], vreinterpretq_s16_u16(d3)); d4_s16 = vqaddq_s16(a[4], vreinterpretq_s16_u16(d4)); d5_s16 = vqaddq_s16(a[5], vreinterpretq_s16_u16(d5)); d6_s16 = vqaddq_s16(a[6], vreinterpretq_s16_u16(d6)); d7_s16 = vqaddq_s16(a[7], vreinterpretq_s16_u16(d7)); d0_s16 = vminq_s16(d0_s16, max); d1_s16 = vminq_s16(d1_s16, max); d2_s16 = vminq_s16(d2_s16, max); d3_s16 = vminq_s16(d3_s16, max); d4_s16 = vminq_s16(d4_s16, max); d5_s16 = vminq_s16(d5_s16, max); d6_s16 = vminq_s16(d6_s16, max); d7_s16 = vminq_s16(d7_s16, max); d0_u16 = vqshluq_n_s16(d0_s16, 0); d1_u16 = vqshluq_n_s16(d1_s16, 0); d2_u16 = vqshluq_n_s16(d2_s16, 0); d3_u16 = vqshluq_n_s16(d3_s16, 0); d4_u16 = vqshluq_n_s16(d4_s16, 0); d5_u16 = vqshluq_n_s16(d5_s16, 0); d6_u16 = vqshluq_n_s16(d6_s16, 0); d7_u16 = vqshluq_n_s16(d7_s16, 0); vst1q_u16(dest, d0_u16); dest += stride; vst1q_u16(dest, d1_u16); dest += stride; vst1q_u16(dest, d2_u16); dest += stride; vst1q_u16(dest, d3_u16); dest += stride; vst1q_u16(dest, d4_u16); dest += stride; vst1q_u16(dest, d5_u16); dest += stride; vst1q_u16(dest, d6_u16); dest += stride; vst1q_u16(dest, d7_u16); } static INLINE void idct8x8_64_half1d_bd10( const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { int32x4_t step1[8], step2[8]; transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); // stage 1 step1[4] = vmulq_lane_s32(*io1, vget_high_s32(cospis1), 1); step1[5] = vmulq_lane_s32(*io3, vget_high_s32(cospis1), 0); step1[6] = vmulq_lane_s32(*io3, vget_low_s32(cospis1), 1); step1[7] = vmulq_lane_s32(*io1, vget_low_s32(cospis1), 0); step1[4] = vmlsq_lane_s32(step1[4], *io7, vget_low_s32(cospis1), 0); step1[5] = vmlaq_lane_s32(step1[5], *io5, vget_low_s32(cospis1), 1); step1[6] = vmlsq_lane_s32(step1[6], *io5, vget_high_s32(cospis1), 0); step1[7] = vmlaq_lane_s32(step1[7], *io7, vget_high_s32(cospis1), 1); step1[4] = vrshrq_n_s32(step1[4], DCT_CONST_BITS); step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); step1[7] = vrshrq_n_s32(step1[7], DCT_CONST_BITS); // stage 2 step2[1] = vmulq_lane_s32(*io0, vget_high_s32(cospis0), 0); step2[2] = vmulq_lane_s32(*io2, vget_high_s32(cospis0), 1); step2[3] = vmulq_lane_s32(*io2, vget_low_s32(cospis0), 1); step2[0] = vmlaq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); step2[1] = vmlsq_lane_s32(step2[1], *io4, vget_high_s32(cospis0), 0); step2[2] = vmlsq_lane_s32(step2[2], *io6, vget_low_s32(cospis0), 1); step2[3] = vmlaq_lane_s32(step2[3], *io6, vget_high_s32(cospis0), 1); step2[0] = vrshrq_n_s32(step2[0], DCT_CONST_BITS); step2[1] = vrshrq_n_s32(step2[1], DCT_CONST_BITS); step2[2] = vrshrq_n_s32(step2[2], DCT_CONST_BITS); step2[3] = vrshrq_n_s32(step2[3], DCT_CONST_BITS); step2[4] = vaddq_s32(step1[4], step1[5]); step2[5] = vsubq_s32(step1[4], step1[5]); step2[6] = vsubq_s32(step1[7], step1[6]); step2[7] = vaddq_s32(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s32(step2[0], step2[3]); step1[1] = vaddq_s32(step2[1], step2[2]); step1[2] = vsubq_s32(step2[1], step2[2]); step1[3] = vsubq_s32(step2[0], step2[3]); step1[6] = vmulq_lane_s32(step2[6], vget_high_s32(cospis0), 0); step1[5] = vmlsq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); step1[6] = vmlaq_lane_s32(step1[6], step2[5], vget_high_s32(cospis0), 0); step1[5] = vrshrq_n_s32(step1[5], DCT_CONST_BITS); step1[6] = vrshrq_n_s32(step1[6], DCT_CONST_BITS); // stage 4 *io0 = vaddq_s32(step1[0], step2[7]); *io1 = vaddq_s32(step1[1], step1[6]); *io2 = vaddq_s32(step1[2], step1[5]); *io3 = vaddq_s32(step1[3], step2[4]); *io4 = vsubq_s32(step1[3], step2[4]); *io5 = vsubq_s32(step1[2], step1[5]); *io6 = vsubq_s32(step1[1], step1[6]); *io7 = vsubq_s32(step1[0], step2[7]); } static INLINE void idct8x8_64_half1d_bd12( const int32x4_t cospis0, const int32x4_t cospis1, int32x4_t *const io0, int32x4_t *const io1, int32x4_t *const io2, int32x4_t *const io3, int32x4_t *const io4, int32x4_t *const io5, int32x4_t *const io6, int32x4_t *const io7) { int32x2_t input1l, input1h, input3l, input3h, input5l, input5h, input7l, input7h; int32x2_t step1l[4], step1h[4]; int32x4_t step1[8], step2[8]; int64x2_t t64[8]; int32x2_t t32[8]; transpose_s32_8x4(io0, io1, io2, io3, io4, io5, io6, io7); // stage 1 input1l = vget_low_s32(*io1); input1h = vget_high_s32(*io1); input3l = vget_low_s32(*io3); input3h = vget_high_s32(*io3); input5l = vget_low_s32(*io5); input5h = vget_high_s32(*io5); input7l = vget_low_s32(*io7); input7h = vget_high_s32(*io7); step1l[0] = vget_low_s32(*io0); step1h[0] = vget_high_s32(*io0); step1l[1] = vget_low_s32(*io2); step1h[1] = vget_high_s32(*io2); step1l[2] = vget_low_s32(*io4); step1h[2] = vget_high_s32(*io4); step1l[3] = vget_low_s32(*io6); step1h[3] = vget_high_s32(*io6); t64[0] = vmull_lane_s32(input1l, vget_high_s32(cospis1), 1); t64[1] = vmull_lane_s32(input1h, vget_high_s32(cospis1), 1); t64[2] = vmull_lane_s32(input3l, vget_high_s32(cospis1), 0); t64[3] = vmull_lane_s32(input3h, vget_high_s32(cospis1), 0); t64[4] = vmull_lane_s32(input3l, vget_low_s32(cospis1), 1); t64[5] = vmull_lane_s32(input3h, vget_low_s32(cospis1), 1); t64[6] = vmull_lane_s32(input1l, vget_low_s32(cospis1), 0); t64[7] = vmull_lane_s32(input1h, vget_low_s32(cospis1), 0); t64[0] = vmlsl_lane_s32(t64[0], input7l, vget_low_s32(cospis1), 0); t64[1] = vmlsl_lane_s32(t64[1], input7h, vget_low_s32(cospis1), 0); t64[2] = vmlal_lane_s32(t64[2], input5l, vget_low_s32(cospis1), 1); t64[3] = vmlal_lane_s32(t64[3], input5h, vget_low_s32(cospis1), 1); t64[4] = vmlsl_lane_s32(t64[4], input5l, vget_high_s32(cospis1), 0); t64[5] = vmlsl_lane_s32(t64[5], input5h, vget_high_s32(cospis1), 0); t64[6] = vmlal_lane_s32(t64[6], input7l, vget_high_s32(cospis1), 1); t64[7] = vmlal_lane_s32(t64[7], input7h, vget_high_s32(cospis1), 1); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); step1[4] = vcombine_s32(t32[0], t32[1]); step1[5] = vcombine_s32(t32[2], t32[3]); step1[6] = vcombine_s32(t32[4], t32[5]); step1[7] = vcombine_s32(t32[6], t32[7]); // stage 2 t64[2] = vmull_lane_s32(step1l[0], vget_high_s32(cospis0), 0); t64[3] = vmull_lane_s32(step1h[0], vget_high_s32(cospis0), 0); t64[4] = vmull_lane_s32(step1l[1], vget_high_s32(cospis0), 1); t64[5] = vmull_lane_s32(step1h[1], vget_high_s32(cospis0), 1); t64[6] = vmull_lane_s32(step1l[1], vget_low_s32(cospis0), 1); t64[7] = vmull_lane_s32(step1h[1], vget_low_s32(cospis0), 1); t64[0] = vmlal_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); t64[1] = vmlal_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); t64[2] = vmlsl_lane_s32(t64[2], step1l[2], vget_high_s32(cospis0), 0); t64[3] = vmlsl_lane_s32(t64[3], step1h[2], vget_high_s32(cospis0), 0); t64[4] = vmlsl_lane_s32(t64[4], step1l[3], vget_low_s32(cospis0), 1); t64[5] = vmlsl_lane_s32(t64[5], step1h[3], vget_low_s32(cospis0), 1); t64[6] = vmlal_lane_s32(t64[6], step1l[3], vget_high_s32(cospis0), 1); t64[7] = vmlal_lane_s32(t64[7], step1h[3], vget_high_s32(cospis0), 1); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); t32[4] = vrshrn_n_s64(t64[4], DCT_CONST_BITS); t32[5] = vrshrn_n_s64(t64[5], DCT_CONST_BITS); t32[6] = vrshrn_n_s64(t64[6], DCT_CONST_BITS); t32[7] = vrshrn_n_s64(t64[7], DCT_CONST_BITS); step2[0] = vcombine_s32(t32[0], t32[1]); step2[1] = vcombine_s32(t32[2], t32[3]); step2[2] = vcombine_s32(t32[4], t32[5]); step2[3] = vcombine_s32(t32[6], t32[7]); step2[4] = vaddq_s32(step1[4], step1[5]); step2[5] = vsubq_s32(step1[4], step1[5]); step2[6] = vsubq_s32(step1[7], step1[6]); step2[7] = vaddq_s32(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s32(step2[0], step2[3]); step1[1] = vaddq_s32(step2[1], step2[2]); step1[2] = vsubq_s32(step2[1], step2[2]); step1[3] = vsubq_s32(step2[0], step2[3]); t64[2] = vmull_lane_s32(vget_low_s32(step2[6]), vget_high_s32(cospis0), 0); t64[3] = vmull_lane_s32(vget_high_s32(step2[6]), vget_high_s32(cospis0), 0); t64[0] = vmlsl_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); t64[1] = vmlsl_lane_s32(t64[3], vget_high_s32(step2[5]), vget_high_s32(cospis0), 0); t64[2] = vmlal_lane_s32(t64[2], vget_low_s32(step2[5]), vget_high_s32(cospis0), 0); t64[3] = vmlal_lane_s32(t64[3], vget_high_s32(step2[5]), vget_high_s32(cospis0), 0); t32[0] = vrshrn_n_s64(t64[0], DCT_CONST_BITS); t32[1] = vrshrn_n_s64(t64[1], DCT_CONST_BITS); t32[2] = vrshrn_n_s64(t64[2], DCT_CONST_BITS); t32[3] = vrshrn_n_s64(t64[3], DCT_CONST_BITS); step1[5] = vcombine_s32(t32[0], t32[1]); step1[6] = vcombine_s32(t32[2], t32[3]); // stage 4 *io0 = vaddq_s32(step1[0], step2[7]); *io1 = vaddq_s32(step1[1], step1[6]); *io2 = vaddq_s32(step1[2], step1[5]); *io3 = vaddq_s32(step1[3], step2[4]); *io4 = vsubq_s32(step1[3], step2[4]); *io5 = vsubq_s32(step1[2], step1[5]); *io6 = vsubq_s32(step1[1], step1[6]); *io7 = vsubq_s32(step1[0], step2[7]); } static INLINE void highbd_idct16x16_store_pass1(const int32x4x2_t *const out, int32_t *output) { // Save the result into output vst1q_s32(output + 0, out[0].val[0]); vst1q_s32(output + 4, out[0].val[1]); output += 16; vst1q_s32(output + 0, out[1].val[0]); vst1q_s32(output + 4, out[1].val[1]); output += 16; vst1q_s32(output + 0, out[2].val[0]); vst1q_s32(output + 4, out[2].val[1]); output += 16; vst1q_s32(output + 0, out[3].val[0]); vst1q_s32(output + 4, out[3].val[1]); output += 16; vst1q_s32(output + 0, out[4].val[0]); vst1q_s32(output + 4, out[4].val[1]); output += 16; vst1q_s32(output + 0, out[5].val[0]); vst1q_s32(output + 4, out[5].val[1]); output += 16; vst1q_s32(output + 0, out[6].val[0]); vst1q_s32(output + 4, out[6].val[1]); output += 16; vst1q_s32(output + 0, out[7].val[0]); vst1q_s32(output + 4, out[7].val[1]); output += 16; vst1q_s32(output + 0, out[8].val[0]); vst1q_s32(output + 4, out[8].val[1]); output += 16; vst1q_s32(output + 0, out[9].val[0]); vst1q_s32(output + 4, out[9].val[1]); output += 16; vst1q_s32(output + 0, out[10].val[0]); vst1q_s32(output + 4, out[10].val[1]); output += 16; vst1q_s32(output + 0, out[11].val[0]); vst1q_s32(output + 4, out[11].val[1]); output += 16; vst1q_s32(output + 0, out[12].val[0]); vst1q_s32(output + 4, out[12].val[1]); output += 16; vst1q_s32(output + 0, out[13].val[0]); vst1q_s32(output + 4, out[13].val[1]); output += 16; vst1q_s32(output + 0, out[14].val[0]); vst1q_s32(output + 4, out[14].val[1]); output += 16; vst1q_s32(output + 0, out[15].val[0]); vst1q_s32(output + 4, out[15].val[1]); } static INLINE void highbd_idct16x16_add_store(const int32x4x2_t *const out, uint16_t *dest, const int stride, const int bd) { // Add the result to dest const int16x8_t max = vdupq_n_s16((1 << bd) - 1); int16x8_t o[16]; o[0] = vcombine_s16(vrshrn_n_s32(out[0].val[0], 6), vrshrn_n_s32(out[0].val[1], 6)); o[1] = vcombine_s16(vrshrn_n_s32(out[1].val[0], 6), vrshrn_n_s32(out[1].val[1], 6)); o[2] = vcombine_s16(vrshrn_n_s32(out[2].val[0], 6), vrshrn_n_s32(out[2].val[1], 6)); o[3] = vcombine_s16(vrshrn_n_s32(out[3].val[0], 6), vrshrn_n_s32(out[3].val[1], 6)); o[4] = vcombine_s16(vrshrn_n_s32(out[4].val[0], 6), vrshrn_n_s32(out[4].val[1], 6)); o[5] = vcombine_s16(vrshrn_n_s32(out[5].val[0], 6), vrshrn_n_s32(out[5].val[1], 6)); o[6] = vcombine_s16(vrshrn_n_s32(out[6].val[0], 6), vrshrn_n_s32(out[6].val[1], 6)); o[7] = vcombine_s16(vrshrn_n_s32(out[7].val[0], 6), vrshrn_n_s32(out[7].val[1], 6)); o[8] = vcombine_s16(vrshrn_n_s32(out[8].val[0], 6), vrshrn_n_s32(out[8].val[1], 6)); o[9] = vcombine_s16(vrshrn_n_s32(out[9].val[0], 6), vrshrn_n_s32(out[9].val[1], 6)); o[10] = vcombine_s16(vrshrn_n_s32(out[10].val[0], 6), vrshrn_n_s32(out[10].val[1], 6)); o[11] = vcombine_s16(vrshrn_n_s32(out[11].val[0], 6), vrshrn_n_s32(out[11].val[1], 6)); o[12] = vcombine_s16(vrshrn_n_s32(out[12].val[0], 6), vrshrn_n_s32(out[12].val[1], 6)); o[13] = vcombine_s16(vrshrn_n_s32(out[13].val[0], 6), vrshrn_n_s32(out[13].val[1], 6)); o[14] = vcombine_s16(vrshrn_n_s32(out[14].val[0], 6), vrshrn_n_s32(out[14].val[1], 6)); o[15] = vcombine_s16(vrshrn_n_s32(out[15].val[0], 6), vrshrn_n_s32(out[15].val[1], 6)); highbd_idct16x16_add8x1(o[0], max, &dest, stride); highbd_idct16x16_add8x1(o[1], max, &dest, stride); highbd_idct16x16_add8x1(o[2], max, &dest, stride); highbd_idct16x16_add8x1(o[3], max, &dest, stride); highbd_idct16x16_add8x1(o[4], max, &dest, stride); highbd_idct16x16_add8x1(o[5], max, &dest, stride); highbd_idct16x16_add8x1(o[6], max, &dest, stride); highbd_idct16x16_add8x1(o[7], max, &dest, stride); highbd_idct16x16_add8x1(o[8], max, &dest, stride); highbd_idct16x16_add8x1(o[9], max, &dest, stride); highbd_idct16x16_add8x1(o[10], max, &dest, stride); highbd_idct16x16_add8x1(o[11], max, &dest, stride); highbd_idct16x16_add8x1(o[12], max, &dest, stride); highbd_idct16x16_add8x1(o[13], max, &dest, stride); highbd_idct16x16_add8x1(o[14], max, &dest, stride); highbd_idct16x16_add8x1(o[15], max, &dest, stride); } void vpx_highbd_idct16x16_256_add_half1d(const int32_t *input, int32_t *output, uint16_t *dest, const int stride, const int bd); #endif // VPX_VPX_DSP_ARM_HIGHBD_IDCT_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/highbd_intrapred_neon.c000066400000000000000000001163351357355204000217650ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" //------------------------------------------------------------------------------ // DC 4x4 static INLINE uint16x4_t dc_sum_4(const uint16_t *ref) { const uint16x4_t ref_u16 = vld1_u16(ref); const uint16x4_t p0 = vpadd_u16(ref_u16, ref_u16); return vpadd_u16(p0, p0); } static INLINE void dc_store_4x4(uint16_t *dst, ptrdiff_t stride, const uint16x4_t dc) { const uint16x4_t dc_dup = vdup_lane_u16(dc, 0); int i; for (i = 0; i < 4; ++i, dst += stride) { vst1_u16(dst, dc_dup); } } void vpx_highbd_dc_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t a = vld1_u16(above); const uint16x4_t l = vld1_u16(left); uint16x4_t sum; uint16x4_t dc; (void)bd; sum = vadd_u16(a, l); sum = vpadd_u16(sum, sum); sum = vpadd_u16(sum, sum); dc = vrshr_n_u16(sum, 3); dc_store_4x4(dst, stride, dc); } void vpx_highbd_dc_left_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_4(left); const uint16x4_t dc = vrshr_n_u16(sum, 2); (void)above; (void)bd; dc_store_4x4(dst, stride, dc); } void vpx_highbd_dc_top_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_4(above); const uint16x4_t dc = vrshr_n_u16(sum, 2); (void)left; (void)bd; dc_store_4x4(dst, stride, dc); } void vpx_highbd_dc_128_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t dc = vdup_n_u16(1 << (bd - 1)); (void)above; (void)left; dc_store_4x4(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 8x8 static INLINE uint16x4_t dc_sum_8(const uint16_t *ref) { const uint16x8_t ref_u16 = vld1q_u16(ref); uint16x4_t sum = vadd_u16(vget_low_u16(ref_u16), vget_high_u16(ref_u16)); sum = vpadd_u16(sum, sum); return vpadd_u16(sum, sum); } static INLINE void dc_store_8x8(uint16_t *dst, ptrdiff_t stride, const uint16x4_t dc) { const uint16x8_t dc_dup = vdupq_lane_u16(dc, 0); int i; for (i = 0; i < 8; ++i, dst += stride) { vst1q_u16(dst, dc_dup); } } void vpx_highbd_dc_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t above_u16 = vld1q_u16(above); const uint16x8_t left_u16 = vld1q_u16(left); const uint16x8_t p0 = vaddq_u16(above_u16, left_u16); uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); uint16x4_t dc; (void)bd; sum = vpadd_u16(sum, sum); sum = vpadd_u16(sum, sum); dc = vrshr_n_u16(sum, 4); dc_store_8x8(dst, stride, dc); } void vpx_highbd_dc_left_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_8(left); const uint16x4_t dc = vrshr_n_u16(sum, 3); (void)above; (void)bd; dc_store_8x8(dst, stride, dc); } void vpx_highbd_dc_top_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_8(above); const uint16x4_t dc = vrshr_n_u16(sum, 3); (void)left; (void)bd; dc_store_8x8(dst, stride, dc); } void vpx_highbd_dc_128_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t dc = vdup_n_u16(1 << (bd - 1)); (void)above; (void)left; dc_store_8x8(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 16x16 static INLINE uint16x4_t dc_sum_16(const uint16_t *ref) { const uint16x8x2_t ref_u16 = vld2q_u16(ref); const uint16x8_t p0 = vaddq_u16(ref_u16.val[0], ref_u16.val[1]); uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); sum = vpadd_u16(sum, sum); return vpadd_u16(sum, sum); } static INLINE void dc_store_16x16(uint16_t *dst, ptrdiff_t stride, const uint16x4_t dc) { uint16x8x2_t dc_dup; int i; dc_dup.val[0] = dc_dup.val[1] = vdupq_lane_u16(dc, 0); for (i = 0; i < 16; ++i, dst += stride) { vst2q_u16(dst, dc_dup); } } void vpx_highbd_dc_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8x2_t a = vld2q_u16(above); const uint16x8x2_t l = vld2q_u16(left); const uint16x8_t pa = vaddq_u16(a.val[0], a.val[1]); const uint16x8_t pl = vaddq_u16(l.val[0], l.val[1]); const uint16x8_t pal0 = vaddq_u16(pa, pl); uint16x4_t pal1 = vadd_u16(vget_low_u16(pal0), vget_high_u16(pal0)); uint32x2_t sum; uint16x4_t dc; (void)bd; pal1 = vpadd_u16(pal1, pal1); sum = vpaddl_u16(pal1); dc = vreinterpret_u16_u32(vrshr_n_u32(sum, 5)); dc_store_16x16(dst, stride, dc); } void vpx_highbd_dc_left_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_16(left); const uint16x4_t dc = vrshr_n_u16(sum, 4); (void)above; (void)bd; dc_store_16x16(dst, stride, dc); } void vpx_highbd_dc_top_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t sum = dc_sum_16(above); const uint16x4_t dc = vrshr_n_u16(sum, 4); (void)left; (void)bd; dc_store_16x16(dst, stride, dc); } void vpx_highbd_dc_128_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t dc = vdup_n_u16(1 << (bd - 1)); (void)above; (void)left; dc_store_16x16(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 32x32 static INLINE uint32x2_t dc_sum_32(const uint16_t *ref) { const uint16x8x4_t r = vld4q_u16(ref); const uint16x8_t p0 = vaddq_u16(r.val[0], r.val[1]); const uint16x8_t p1 = vaddq_u16(r.val[2], r.val[3]); const uint16x8_t p2 = vaddq_u16(p0, p1); uint16x4_t sum = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); sum = vpadd_u16(sum, sum); return vpaddl_u16(sum); } static INLINE void dc_store_32x32(uint16_t *dst, ptrdiff_t stride, const uint16x4_t dc) { uint16x8x2_t dc_dup; int i; dc_dup.val[0] = dc_dup.val[1] = vdupq_lane_u16(dc, 0); for (i = 0; i < 32; ++i) { vst2q_u16(dst, dc_dup); dst += 16; vst2q_u16(dst, dc_dup); dst += stride - 16; } } void vpx_highbd_dc_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8x4_t a = vld4q_u16(above); const uint16x8x4_t l = vld4q_u16(left); const uint16x8_t pa0 = vaddq_u16(a.val[0], a.val[1]); const uint16x8_t pa1 = vaddq_u16(a.val[2], a.val[3]); const uint16x8_t pl0 = vaddq_u16(l.val[0], l.val[1]); const uint16x8_t pl1 = vaddq_u16(l.val[2], l.val[3]); const uint16x8_t pa = vaddq_u16(pa0, pa1); const uint16x8_t pl = vaddq_u16(pl0, pl1); const uint16x8_t pal0 = vaddq_u16(pa, pl); const uint16x4_t pal1 = vadd_u16(vget_low_u16(pal0), vget_high_u16(pal0)); uint32x2_t sum = vpaddl_u16(pal1); uint16x4_t dc; (void)bd; sum = vpadd_u32(sum, sum); dc = vreinterpret_u16_u32(vrshr_n_u32(sum, 6)); dc_store_32x32(dst, stride, dc); } void vpx_highbd_dc_left_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint32x2_t sum = dc_sum_32(left); const uint16x4_t dc = vreinterpret_u16_u32(vrshr_n_u32(sum, 5)); (void)above; (void)bd; dc_store_32x32(dst, stride, dc); } void vpx_highbd_dc_top_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint32x2_t sum = dc_sum_32(above); const uint16x4_t dc = vreinterpret_u16_u32(vrshr_n_u32(sum, 5)); (void)left; (void)bd; dc_store_32x32(dst, stride, dc); } void vpx_highbd_dc_128_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t dc = vdup_n_u16(1 << (bd - 1)); (void)above; (void)left; dc_store_32x32(dst, stride, dc); } // ----------------------------------------------------------------------------- void vpx_highbd_d45_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t ABCDEFGH = vld1q_u16(above); const uint16x8_t BCDEFGH0 = vld1q_u16(above + 1); const uint16x8_t CDEFGH00 = vld1q_u16(above + 2); const uint16x8_t avg1 = vhaddq_u16(ABCDEFGH, CDEFGH00); const uint16x8_t avg2 = vrhaddq_u16(avg1, BCDEFGH0); const uint16x4_t avg2_low = vget_low_u16(avg2); const uint16x4_t avg2_high = vget_high_u16(avg2); const uint16x4_t r1 = vext_u16(avg2_low, avg2_high, 1); const uint16x4_t r2 = vext_u16(avg2_low, avg2_high, 2); const uint16x4_t r3 = vext_u16(avg2_low, avg2_high, 3); (void)left; (void)bd; vst1_u16(dst, avg2_low); dst += stride; vst1_u16(dst, r1); dst += stride; vst1_u16(dst, r2); dst += stride; vst1_u16(dst, r3); vst1q_lane_u16(dst + 3, ABCDEFGH, 7); } static INLINE void d45_store_8(uint16_t **dst, const ptrdiff_t stride, const uint16x8_t above_right, uint16x8_t *row) { *row = vextq_u16(*row, above_right, 1); vst1q_u16(*dst, *row); *dst += stride; } void vpx_highbd_d45_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t A0 = vld1q_u16(above); const uint16x8_t above_right = vdupq_lane_u16(vget_high_u16(A0), 3); const uint16x8_t A1 = vld1q_u16(above + 1); const uint16x8_t A2 = vld1q_u16(above + 2); const uint16x8_t avg1 = vhaddq_u16(A0, A2); uint16x8_t row = vrhaddq_u16(avg1, A1); (void)left; (void)bd; vst1q_u16(dst, row); dst += stride; d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); vst1q_u16(dst, above_right); } static INLINE void d45_store_16(uint16_t **dst, const ptrdiff_t stride, const uint16x8_t above_right, uint16x8_t *row_0, uint16x8_t *row_1) { *row_0 = vextq_u16(*row_0, *row_1, 1); *row_1 = vextq_u16(*row_1, above_right, 1); vst1q_u16(*dst, *row_0); *dst += 8; vst1q_u16(*dst, *row_1); *dst += stride - 8; } void vpx_highbd_d45_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t A0_0 = vld1q_u16(above); const uint16x8_t A0_1 = vld1q_u16(above + 8); const uint16x8_t above_right = vdupq_lane_u16(vget_high_u16(A0_1), 3); const uint16x8_t A1_0 = vld1q_u16(above + 1); const uint16x8_t A1_1 = vld1q_u16(above + 9); const uint16x8_t A2_0 = vld1q_u16(above + 2); const uint16x8_t A2_1 = vld1q_u16(above + 10); const uint16x8_t avg_0 = vhaddq_u16(A0_0, A2_0); const uint16x8_t avg_1 = vhaddq_u16(A0_1, A2_1); uint16x8_t row_0 = vrhaddq_u16(avg_0, A1_0); uint16x8_t row_1 = vrhaddq_u16(avg_1, A1_1); (void)left; (void)bd; vst1q_u16(dst, row_0); vst1q_u16(dst + 8, row_1); dst += stride; d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); d45_store_16(&dst, stride, above_right, &row_0, &row_1); vst1q_u16(dst, above_right); vst1q_u16(dst + 8, above_right); } void vpx_highbd_d45_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t A0_0 = vld1q_u16(above); const uint16x8_t A0_1 = vld1q_u16(above + 8); const uint16x8_t A0_2 = vld1q_u16(above + 16); const uint16x8_t A0_3 = vld1q_u16(above + 24); const uint16x8_t above_right = vdupq_lane_u16(vget_high_u16(A0_3), 3); const uint16x8_t A1_0 = vld1q_u16(above + 1); const uint16x8_t A1_1 = vld1q_u16(above + 9); const uint16x8_t A1_2 = vld1q_u16(above + 17); const uint16x8_t A1_3 = vld1q_u16(above + 25); const uint16x8_t A2_0 = vld1q_u16(above + 2); const uint16x8_t A2_1 = vld1q_u16(above + 10); const uint16x8_t A2_2 = vld1q_u16(above + 18); const uint16x8_t A2_3 = vld1q_u16(above + 26); const uint16x8_t avg_0 = vhaddq_u16(A0_0, A2_0); const uint16x8_t avg_1 = vhaddq_u16(A0_1, A2_1); const uint16x8_t avg_2 = vhaddq_u16(A0_2, A2_2); const uint16x8_t avg_3 = vhaddq_u16(A0_3, A2_3); uint16x8_t row_0 = vrhaddq_u16(avg_0, A1_0); uint16x8_t row_1 = vrhaddq_u16(avg_1, A1_1); uint16x8_t row_2 = vrhaddq_u16(avg_2, A1_2); uint16x8_t row_3 = vrhaddq_u16(avg_3, A1_3); int i; (void)left; (void)bd; vst1q_u16(dst, row_0); dst += 8; vst1q_u16(dst, row_1); dst += 8; vst1q_u16(dst, row_2); dst += 8; vst1q_u16(dst, row_3); dst += stride - 24; for (i = 0; i < 30; ++i) { row_0 = vextq_u16(row_0, row_1, 1); row_1 = vextq_u16(row_1, row_2, 1); row_2 = vextq_u16(row_2, row_3, 1); row_3 = vextq_u16(row_3, above_right, 1); vst1q_u16(dst, row_0); dst += 8; vst1q_u16(dst, row_1); dst += 8; vst1q_u16(dst, row_2); dst += 8; vst1q_u16(dst, row_3); dst += stride - 24; } vst1q_u16(dst, above_right); dst += 8; vst1q_u16(dst, above_right); dst += 8; vst1q_u16(dst, above_right); dst += 8; vst1q_u16(dst, above_right); } // ----------------------------------------------------------------------------- void vpx_highbd_d135_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t XA0123___ = vld1q_u16(above - 1); const uint16x4_t L0123 = vld1_u16(left); const uint16x4_t L3210 = vrev64_u16(L0123); const uint16x8_t L____3210 = vcombine_u16(L0123, L3210); const uint16x8_t L3210XA012 = vcombine_u16(L3210, vget_low_u16(XA0123___)); const uint16x8_t L210XA0123 = vextq_u16(L____3210, XA0123___, 5); const uint16x8_t L10XA0123_ = vextq_u16(L____3210, XA0123___, 6); const uint16x8_t avg1 = vhaddq_u16(L3210XA012, L10XA0123_); const uint16x8_t avg2 = vrhaddq_u16(avg1, L210XA0123); const uint16x4_t row_0 = vget_low_u16(avg2); const uint16x4_t row_1 = vget_high_u16(avg2); const uint16x4_t r0 = vext_u16(row_0, row_1, 3); const uint16x4_t r1 = vext_u16(row_0, row_1, 2); const uint16x4_t r2 = vext_u16(row_0, row_1, 1); (void)bd; vst1_u16(dst, r0); dst += stride; vst1_u16(dst, r1); dst += stride; vst1_u16(dst, r2); dst += stride; vst1_u16(dst, row_0); } void vpx_highbd_d135_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t XA0123456 = vld1q_u16(above - 1); const uint16x8_t A01234567 = vld1q_u16(above); const uint16x8_t A1234567_ = vld1q_u16(above + 1); const uint16x8_t L01234567 = vld1q_u16(left); const uint16x4_t L3210 = vrev64_u16(vget_low_u16(L01234567)); const uint16x4_t L7654 = vrev64_u16(vget_high_u16(L01234567)); const uint16x8_t L76543210 = vcombine_u16(L7654, L3210); const uint16x8_t L6543210X = vextq_u16(L76543210, XA0123456, 1); const uint16x8_t L543210XA0 = vextq_u16(L76543210, XA0123456, 2); const uint16x8_t avg_0 = vhaddq_u16(L76543210, L543210XA0); const uint16x8_t avg_1 = vhaddq_u16(XA0123456, A1234567_); const uint16x8_t row_0 = vrhaddq_u16(avg_0, L6543210X); const uint16x8_t row_1 = vrhaddq_u16(avg_1, A01234567); const uint16x8_t r0 = vextq_u16(row_0, row_1, 7); const uint16x8_t r1 = vextq_u16(row_0, row_1, 6); const uint16x8_t r2 = vextq_u16(row_0, row_1, 5); const uint16x8_t r3 = vextq_u16(row_0, row_1, 4); const uint16x8_t r4 = vextq_u16(row_0, row_1, 3); const uint16x8_t r5 = vextq_u16(row_0, row_1, 2); const uint16x8_t r6 = vextq_u16(row_0, row_1, 1); (void)bd; vst1q_u16(dst, r0); dst += stride; vst1q_u16(dst, r1); dst += stride; vst1q_u16(dst, r2); dst += stride; vst1q_u16(dst, r3); dst += stride; vst1q_u16(dst, r4); dst += stride; vst1q_u16(dst, r5); dst += stride; vst1q_u16(dst, r6); dst += stride; vst1q_u16(dst, row_0); } static INLINE void d135_store_16(uint16_t **dst, const ptrdiff_t stride, const uint16x8_t row_0, const uint16x8_t row_1) { vst1q_u16(*dst, row_0); *dst += 8; vst1q_u16(*dst, row_1); *dst += stride - 8; } void vpx_highbd_d135_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t L01234567 = vld1q_u16(left); const uint16x8_t L89abcdef = vld1q_u16(left + 8); const uint16x4_t L3210 = vrev64_u16(vget_low_u16(L01234567)); const uint16x4_t L7654 = vrev64_u16(vget_high_u16(L01234567)); const uint16x4_t Lba98 = vrev64_u16(vget_low_u16(L89abcdef)); const uint16x4_t Lfedc = vrev64_u16(vget_high_u16(L89abcdef)); const uint16x8_t L76543210 = vcombine_u16(L7654, L3210); const uint16x8_t Lfedcba98 = vcombine_u16(Lfedc, Lba98); const uint16x8_t Ledcba987 = vextq_u16(Lfedcba98, L76543210, 1); const uint16x8_t Ldcba9876 = vextq_u16(Lfedcba98, L76543210, 2); const uint16x8_t avg_0 = vhaddq_u16(Lfedcba98, Ldcba9876); const uint16x8_t row_0 = vrhaddq_u16(avg_0, Ledcba987); const uint16x8_t XA0123456 = vld1q_u16(above - 1); const uint16x8_t L6543210X = vextq_u16(L76543210, XA0123456, 1); const uint16x8_t L543210XA0 = vextq_u16(L76543210, XA0123456, 2); const uint16x8_t avg_1 = vhaddq_u16(L76543210, L543210XA0); const uint16x8_t row_1 = vrhaddq_u16(avg_1, L6543210X); const uint16x8_t A01234567 = vld1q_u16(above); const uint16x8_t A12345678 = vld1q_u16(above + 1); const uint16x8_t avg_2 = vhaddq_u16(XA0123456, A12345678); const uint16x8_t row_2 = vrhaddq_u16(avg_2, A01234567); const uint16x8_t A789abcde = vld1q_u16(above + 7); const uint16x8_t A89abcdef = vld1q_u16(above + 8); const uint16x8_t A9abcdef_ = vld1q_u16(above + 9); const uint16x8_t avg_3 = vhaddq_u16(A789abcde, A9abcdef_); const uint16x8_t row_3 = vrhaddq_u16(avg_3, A89abcdef); const uint16x8_t r0_0 = vextq_u16(row_1, row_2, 7); const uint16x8_t r0_1 = vextq_u16(row_2, row_3, 7); const uint16x8_t r1_0 = vextq_u16(row_1, row_2, 6); const uint16x8_t r1_1 = vextq_u16(row_2, row_3, 6); const uint16x8_t r2_0 = vextq_u16(row_1, row_2, 5); const uint16x8_t r2_1 = vextq_u16(row_2, row_3, 5); const uint16x8_t r3_0 = vextq_u16(row_1, row_2, 4); const uint16x8_t r3_1 = vextq_u16(row_2, row_3, 4); const uint16x8_t r4_0 = vextq_u16(row_1, row_2, 3); const uint16x8_t r4_1 = vextq_u16(row_2, row_3, 3); const uint16x8_t r5_0 = vextq_u16(row_1, row_2, 2); const uint16x8_t r5_1 = vextq_u16(row_2, row_3, 2); const uint16x8_t r6_0 = vextq_u16(row_1, row_2, 1); const uint16x8_t r6_1 = vextq_u16(row_2, row_3, 1); const uint16x8_t r8_0 = vextq_u16(row_0, row_1, 7); const uint16x8_t r9_0 = vextq_u16(row_0, row_1, 6); const uint16x8_t ra_0 = vextq_u16(row_0, row_1, 5); const uint16x8_t rb_0 = vextq_u16(row_0, row_1, 4); const uint16x8_t rc_0 = vextq_u16(row_0, row_1, 3); const uint16x8_t rd_0 = vextq_u16(row_0, row_1, 2); const uint16x8_t re_0 = vextq_u16(row_0, row_1, 1); (void)bd; d135_store_16(&dst, stride, r0_0, r0_1); d135_store_16(&dst, stride, r1_0, r1_1); d135_store_16(&dst, stride, r2_0, r2_1); d135_store_16(&dst, stride, r3_0, r3_1); d135_store_16(&dst, stride, r4_0, r4_1); d135_store_16(&dst, stride, r5_0, r5_1); d135_store_16(&dst, stride, r6_0, r6_1); d135_store_16(&dst, stride, row_1, row_2); d135_store_16(&dst, stride, r8_0, r0_0); d135_store_16(&dst, stride, r9_0, r1_0); d135_store_16(&dst, stride, ra_0, r2_0); d135_store_16(&dst, stride, rb_0, r3_0); d135_store_16(&dst, stride, rc_0, r4_0); d135_store_16(&dst, stride, rd_0, r5_0); d135_store_16(&dst, stride, re_0, r6_0); vst1q_u16(dst, row_0); dst += 8; vst1q_u16(dst, row_1); } void vpx_highbd_d135_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t LL01234567 = vld1q_u16(left + 16); const uint16x8_t LL89abcdef = vld1q_u16(left + 24); const uint16x4_t LL3210 = vrev64_u16(vget_low_u16(LL01234567)); const uint16x4_t LL7654 = vrev64_u16(vget_high_u16(LL01234567)); const uint16x4_t LLba98 = vrev64_u16(vget_low_u16(LL89abcdef)); const uint16x4_t LLfedc = vrev64_u16(vget_high_u16(LL89abcdef)); const uint16x8_t LL76543210 = vcombine_u16(LL7654, LL3210); const uint16x8_t LLfedcba98 = vcombine_u16(LLfedc, LLba98); const uint16x8_t LLedcba987 = vextq_u16(LLfedcba98, LL76543210, 1); const uint16x8_t LLdcba9876 = vextq_u16(LLfedcba98, LL76543210, 2); const uint16x8_t avg_0 = vhaddq_u16(LLfedcba98, LLdcba9876); uint16x8_t row_0 = vrhaddq_u16(avg_0, LLedcba987); const uint16x8_t LU01234567 = vld1q_u16(left); const uint16x8_t LU89abcdef = vld1q_u16(left + 8); const uint16x4_t LU3210 = vrev64_u16(vget_low_u16(LU01234567)); const uint16x4_t LU7654 = vrev64_u16(vget_high_u16(LU01234567)); const uint16x4_t LUba98 = vrev64_u16(vget_low_u16(LU89abcdef)); const uint16x4_t LUfedc = vrev64_u16(vget_high_u16(LU89abcdef)); const uint16x8_t LU76543210 = vcombine_u16(LU7654, LU3210); const uint16x8_t LUfedcba98 = vcombine_u16(LUfedc, LUba98); const uint16x8_t LL6543210Uf = vextq_u16(LL76543210, LUfedcba98, 1); const uint16x8_t LL543210Ufe = vextq_u16(LL76543210, LUfedcba98, 2); const uint16x8_t avg_1 = vhaddq_u16(LL76543210, LL543210Ufe); uint16x8_t row_1 = vrhaddq_u16(avg_1, LL6543210Uf); const uint16x8_t LUedcba987 = vextq_u16(LUfedcba98, LU76543210, 1); const uint16x8_t LUdcba9876 = vextq_u16(LUfedcba98, LU76543210, 2); const uint16x8_t avg_2 = vhaddq_u16(LUfedcba98, LUdcba9876); uint16x8_t row_2 = vrhaddq_u16(avg_2, LUedcba987); const uint16x8_t XAL0123456 = vld1q_u16(above - 1); const uint16x8_t LU6543210X = vextq_u16(LU76543210, XAL0123456, 1); const uint16x8_t LU543210XA0 = vextq_u16(LU76543210, XAL0123456, 2); const uint16x8_t avg_3 = vhaddq_u16(LU76543210, LU543210XA0); uint16x8_t row_3 = vrhaddq_u16(avg_3, LU6543210X); const uint16x8_t AL01234567 = vld1q_u16(above); const uint16x8_t AL12345678 = vld1q_u16(above + 1); const uint16x8_t avg_4 = vhaddq_u16(XAL0123456, AL12345678); uint16x8_t row_4 = vrhaddq_u16(avg_4, AL01234567); const uint16x8_t AL789abcde = vld1q_u16(above + 7); const uint16x8_t AL89abcdef = vld1q_u16(above + 8); const uint16x8_t AL9abcdefg = vld1q_u16(above + 9); const uint16x8_t avg_5 = vhaddq_u16(AL789abcde, AL9abcdefg); uint16x8_t row_5 = vrhaddq_u16(avg_5, AL89abcdef); const uint16x8_t ALfR0123456 = vld1q_u16(above + 15); const uint16x8_t AR01234567 = vld1q_u16(above + 16); const uint16x8_t AR12345678 = vld1q_u16(above + 17); const uint16x8_t avg_6 = vhaddq_u16(ALfR0123456, AR12345678); uint16x8_t row_6 = vrhaddq_u16(avg_6, AR01234567); const uint16x8_t AR789abcde = vld1q_u16(above + 23); const uint16x8_t AR89abcdef = vld1q_u16(above + 24); const uint16x8_t AR9abcdef_ = vld1q_u16(above + 25); const uint16x8_t avg_7 = vhaddq_u16(AR789abcde, AR9abcdef_); uint16x8_t row_7 = vrhaddq_u16(avg_7, AR89abcdef); int i, j; (void)bd; dst += 31 * stride; for (i = 0; i < 4; ++i) { for (j = 0; j < 8; ++j) { vst1q_u16(dst, row_0); dst += 8; vst1q_u16(dst, row_1); dst += 8; vst1q_u16(dst, row_2); dst += 8; vst1q_u16(dst, row_3); dst -= stride + 24; row_0 = vextq_u16(row_0, row_1, 1); row_1 = vextq_u16(row_1, row_2, 1); row_2 = vextq_u16(row_2, row_3, 1); row_3 = vextq_u16(row_3, row_4, 1); row_4 = vextq_u16(row_4, row_4, 1); } row_4 = row_5; row_5 = row_6; row_6 = row_7; } } //------------------------------------------------------------------------------ void vpx_highbd_v_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t row = vld1_u16(above); int i; (void)left; (void)bd; for (i = 0; i < 4; i++, dst += stride) { vst1_u16(dst, row); } } void vpx_highbd_v_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t row = vld1q_u16(above); int i; (void)left; (void)bd; for (i = 0; i < 8; i++, dst += stride) { vst1q_u16(dst, row); } } void vpx_highbd_v_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8x2_t row = vld2q_u16(above); int i; (void)left; (void)bd; for (i = 0; i < 16; i++, dst += stride) { vst2q_u16(dst, row); } } void vpx_highbd_v_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8x2_t row0 = vld2q_u16(above); const uint16x8x2_t row1 = vld2q_u16(above + 16); int i; (void)left; (void)bd; for (i = 0; i < 32; i++) { vst2q_u16(dst, row0); dst += 16; vst2q_u16(dst, row1); dst += stride - 16; } } // ----------------------------------------------------------------------------- void vpx_highbd_h_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x4_t left_u16 = vld1_u16(left); uint16x4_t row; (void)above; (void)bd; row = vdup_lane_u16(left_u16, 0); vst1_u16(dst, row); dst += stride; row = vdup_lane_u16(left_u16, 1); vst1_u16(dst, row); dst += stride; row = vdup_lane_u16(left_u16, 2); vst1_u16(dst, row); dst += stride; row = vdup_lane_u16(left_u16, 3); vst1_u16(dst, row); } void vpx_highbd_h_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const uint16x8_t left_u16 = vld1q_u16(left); const uint16x4_t left_low = vget_low_u16(left_u16); const uint16x4_t left_high = vget_high_u16(left_u16); uint16x8_t row; (void)above; (void)bd; row = vdupq_lane_u16(left_low, 0); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_low, 1); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_low, 2); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_low, 3); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_high, 0); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_high, 1); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_high, 2); vst1q_u16(dst, row); dst += stride; row = vdupq_lane_u16(left_high, 3); vst1q_u16(dst, row); } static INLINE void h_store_16(uint16_t **dst, const ptrdiff_t stride, const uint16x8_t row) { // Note: vst1q is faster than vst2q vst1q_u16(*dst, row); *dst += 8; vst1q_u16(*dst, row); *dst += stride - 8; } void vpx_highbd_h_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { int i; (void)above; (void)bd; for (i = 0; i < 2; i++, left += 8) { const uint16x8_t left_u16q = vld1q_u16(left); const uint16x4_t left_low = vget_low_u16(left_u16q); const uint16x4_t left_high = vget_high_u16(left_u16q); uint16x8_t row; row = vdupq_lane_u16(left_low, 0); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_low, 1); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_low, 2); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_low, 3); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_high, 0); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_high, 1); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_high, 2); h_store_16(&dst, stride, row); row = vdupq_lane_u16(left_high, 3); h_store_16(&dst, stride, row); } } static INLINE void h_store_32(uint16_t **dst, const ptrdiff_t stride, const uint16x8_t row) { // Note: vst1q is faster than vst2q vst1q_u16(*dst, row); *dst += 8; vst1q_u16(*dst, row); *dst += 8; vst1q_u16(*dst, row); *dst += 8; vst1q_u16(*dst, row); *dst += stride - 24; } void vpx_highbd_h_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { int i; (void)above; (void)bd; for (i = 0; i < 4; i++, left += 8) { const uint16x8_t left_u16q = vld1q_u16(left); const uint16x4_t left_low = vget_low_u16(left_u16q); const uint16x4_t left_high = vget_high_u16(left_u16q); uint16x8_t row; row = vdupq_lane_u16(left_low, 0); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_low, 1); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_low, 2); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_low, 3); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_high, 0); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_high, 1); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_high, 2); h_store_32(&dst, stride, row); row = vdupq_lane_u16(left_high, 3); h_store_32(&dst, stride, row); } } // ----------------------------------------------------------------------------- void vpx_highbd_tm_predictor_4x4_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int16x8_t max = vmovq_n_s16((1 << bd) - 1); const int16x8_t top_left = vld1q_dup_s16((const int16_t *)(above - 1)); const int16x4_t above_s16d = vld1_s16((const int16_t *)above); const int16x8_t above_s16 = vcombine_s16(above_s16d, above_s16d); const int16x4_t left_s16 = vld1_s16((const int16_t *)left); const int16x8_t sub = vsubq_s16(above_s16, top_left); int16x8_t sum; uint16x8_t row; sum = vcombine_s16(vdup_lane_s16(left_s16, 0), vdup_lane_s16(left_s16, 1)); sum = vaddq_s16(sum, sub); sum = vminq_s16(sum, max); row = vqshluq_n_s16(sum, 0); vst1_u16(dst, vget_low_u16(row)); dst += stride; vst1_u16(dst, vget_high_u16(row)); dst += stride; sum = vcombine_s16(vdup_lane_s16(left_s16, 2), vdup_lane_s16(left_s16, 3)); sum = vaddq_s16(sum, sub); sum = vminq_s16(sum, max); row = vqshluq_n_s16(sum, 0); vst1_u16(dst, vget_low_u16(row)); dst += stride; vst1_u16(dst, vget_high_u16(row)); } static INLINE void tm_8_kernel(uint16_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub, const int16x8_t max) { uint16x8_t row; int16x8_t sum = vaddq_s16(left_dup, sub); sum = vminq_s16(sum, max); row = vqshluq_n_s16(sum, 0); vst1q_u16(*dst, row); *dst += stride; } void vpx_highbd_tm_predictor_8x8_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int16x8_t max = vmovq_n_s16((1 << bd) - 1); const int16x8_t top_left = vld1q_dup_s16((const int16_t *)(above - 1)); const int16x8_t above_s16 = vld1q_s16((const int16_t *)above); const int16x8_t left_s16 = vld1q_s16((const int16_t *)left); const int16x8_t sub = vsubq_s16(above_s16, top_left); int16x4_t left_s16d; int16x8_t left_dup; int i; left_s16d = vget_low_s16(left_s16); for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16)) { left_dup = vdupq_lane_s16(left_s16d, 0); tm_8_kernel(&dst, stride, left_dup, sub, max); left_dup = vdupq_lane_s16(left_s16d, 1); tm_8_kernel(&dst, stride, left_dup, sub, max); left_dup = vdupq_lane_s16(left_s16d, 2); tm_8_kernel(&dst, stride, left_dup, sub, max); left_dup = vdupq_lane_s16(left_s16d, 3); tm_8_kernel(&dst, stride, left_dup, sub, max); } } static INLINE void tm_16_kernel(uint16_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub0, const int16x8_t sub1, const int16x8_t max) { uint16x8_t row0, row1; int16x8_t sum0 = vaddq_s16(left_dup, sub0); int16x8_t sum1 = vaddq_s16(left_dup, sub1); sum0 = vminq_s16(sum0, max); sum1 = vminq_s16(sum1, max); row0 = vqshluq_n_s16(sum0, 0); row1 = vqshluq_n_s16(sum1, 0); vst1q_u16(*dst, row0); *dst += 8; vst1q_u16(*dst, row1); *dst += stride - 8; } void vpx_highbd_tm_predictor_16x16_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int16x8_t max = vmovq_n_s16((1 << bd) - 1); const int16x8_t top_left = vld1q_dup_s16((const int16_t *)(above - 1)); const int16x8_t above0 = vld1q_s16((const int16_t *)above); const int16x8_t above1 = vld1q_s16((const int16_t *)(above + 8)); const int16x8_t sub0 = vsubq_s16(above0, top_left); const int16x8_t sub1 = vsubq_s16(above1, top_left); int16x8_t left_dup; int i, j; for (j = 0; j < 2; j++, left += 8) { const int16x8_t left_s16q = vld1q_s16((const int16_t *)left); int16x4_t left_s16d = vget_low_s16(left_s16q); for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16q)) { left_dup = vdupq_lane_s16(left_s16d, 0); tm_16_kernel(&dst, stride, left_dup, sub0, sub1, max); left_dup = vdupq_lane_s16(left_s16d, 1); tm_16_kernel(&dst, stride, left_dup, sub0, sub1, max); left_dup = vdupq_lane_s16(left_s16d, 2); tm_16_kernel(&dst, stride, left_dup, sub0, sub1, max); left_dup = vdupq_lane_s16(left_s16d, 3); tm_16_kernel(&dst, stride, left_dup, sub0, sub1, max); } } } static INLINE void tm_32_kernel(uint16_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub0, const int16x8_t sub1, const int16x8_t sub2, const int16x8_t sub3, const int16x8_t max) { uint16x8_t row0, row1, row2, row3; int16x8_t sum0 = vaddq_s16(left_dup, sub0); int16x8_t sum1 = vaddq_s16(left_dup, sub1); int16x8_t sum2 = vaddq_s16(left_dup, sub2); int16x8_t sum3 = vaddq_s16(left_dup, sub3); sum0 = vminq_s16(sum0, max); sum1 = vminq_s16(sum1, max); sum2 = vminq_s16(sum2, max); sum3 = vminq_s16(sum3, max); row0 = vqshluq_n_s16(sum0, 0); row1 = vqshluq_n_s16(sum1, 0); row2 = vqshluq_n_s16(sum2, 0); row3 = vqshluq_n_s16(sum3, 0); vst1q_u16(*dst, row0); *dst += 8; vst1q_u16(*dst, row1); *dst += 8; vst1q_u16(*dst, row2); *dst += 8; vst1q_u16(*dst, row3); *dst += stride - 24; } void vpx_highbd_tm_predictor_32x32_neon(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int16x8_t max = vmovq_n_s16((1 << bd) - 1); const int16x8_t top_left = vld1q_dup_s16((const int16_t *)(above - 1)); const int16x8_t above0 = vld1q_s16((const int16_t *)above); const int16x8_t above1 = vld1q_s16((const int16_t *)(above + 8)); const int16x8_t above2 = vld1q_s16((const int16_t *)(above + 16)); const int16x8_t above3 = vld1q_s16((const int16_t *)(above + 24)); const int16x8_t sub0 = vsubq_s16(above0, top_left); const int16x8_t sub1 = vsubq_s16(above1, top_left); const int16x8_t sub2 = vsubq_s16(above2, top_left); const int16x8_t sub3 = vsubq_s16(above3, top_left); int16x8_t left_dup; int i, j; for (i = 0; i < 4; i++, left += 8) { const int16x8_t left_s16q = vld1q_s16((const int16_t *)left); int16x4_t left_s16d = vget_low_s16(left_s16q); for (j = 0; j < 2; j++, left_s16d = vget_high_s16(left_s16q)) { left_dup = vdupq_lane_s16(left_s16d, 0); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3, max); left_dup = vdupq_lane_s16(left_s16d, 1); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3, max); left_dup = vdupq_lane_s16(left_s16d, 2); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3, max); left_dup = vdupq_lane_s16(left_s16d, 3); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3, max); } } } libvpx-1.8.2/vpx_dsp/arm/highbd_loopfilter_neon.c000066400000000000000000000754461357355204000221630ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/transpose_neon.h" static INLINE void load_thresh(const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, uint16x8_t *blimit_vec, uint16x8_t *limit_vec, uint16x8_t *thresh_vec, const int bd) { const int16x8_t shift = vdupq_n_s16(bd - 8); *blimit_vec = vmovl_u8(vld1_dup_u8(blimit)); *limit_vec = vmovl_u8(vld1_dup_u8(limit)); *thresh_vec = vmovl_u8(vld1_dup_u8(thresh)); *blimit_vec = vshlq_u16(*blimit_vec, shift); *limit_vec = vshlq_u16(*limit_vec, shift); *thresh_vec = vshlq_u16(*thresh_vec, shift); } // Here flat is 128-bit long, with each 16-bit chunk being a mask of // a pixel. When used to control filter branches, we only detect whether it is // all 0s or all 1s. We pairwise add flat to a 32-bit long number flat_status. // flat equals 0 if and only if flat_status equals 0. // flat equals -1 (all 1s) if and only if flat_status equals -4. (This is true // because each mask occupies more than 1 bit.) static INLINE uint32_t calc_flat_status(const uint16x8_t flat) { const uint64x1_t t0 = vadd_u64(vreinterpret_u64_u16(vget_low_u16(flat)), vreinterpret_u64_u16(vget_high_u16(flat))); const uint64x1_t t1 = vpaddl_u32(vreinterpret_u32_u64(t0)); return vget_lane_u32(vreinterpret_u32_u64(t1), 0); } static INLINE uint16x8_t filter_hev_mask4(const uint16x8_t limit, const uint16x8_t blimit, const uint16x8_t thresh, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *hev, uint16x8_t *mask) { uint16x8_t max, t0, t1; max = vabdq_u16(p1, p0); max = vmaxq_u16(max, vabdq_u16(q1, q0)); *hev = vcgtq_u16(max, thresh); *mask = vmaxq_u16(max, vabdq_u16(p3, p2)); *mask = vmaxq_u16(*mask, vabdq_u16(p2, p1)); *mask = vmaxq_u16(*mask, vabdq_u16(q2, q1)); *mask = vmaxq_u16(*mask, vabdq_u16(q3, q2)); t0 = vabdq_u16(p0, q0); t1 = vabdq_u16(p1, q1); t0 = vaddq_u16(t0, t0); t1 = vshrq_n_u16(t1, 1); t0 = vaddq_u16(t0, t1); *mask = vcleq_u16(*mask, limit); t0 = vcleq_u16(t0, blimit); *mask = vandq_u16(*mask, t0); return max; } static INLINE uint16x8_t filter_flat_hev_mask( const uint16x8_t limit, const uint16x8_t blimit, const uint16x8_t thresh, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *flat, uint32_t *flat_status, uint16x8_t *hev, const int bd) { uint16x8_t mask; const uint16x8_t max = filter_hev_mask4(limit, blimit, thresh, p3, p2, p1, p0, q0, q1, q2, q3, hev, &mask); *flat = vmaxq_u16(max, vabdq_u16(p2, p0)); *flat = vmaxq_u16(*flat, vabdq_u16(q2, q0)); *flat = vmaxq_u16(*flat, vabdq_u16(p3, p0)); *flat = vmaxq_u16(*flat, vabdq_u16(q3, q0)); *flat = vcleq_u16(*flat, vdupq_n_u16(1 << (bd - 8))); /* flat_mask4() */ *flat = vandq_u16(*flat, mask); *flat_status = calc_flat_status(*flat); return mask; } static INLINE uint16x8_t flat_mask5(const uint16x8_t p4, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, const uint16x8_t q4, const uint16x8_t flat, uint32_t *flat2_status, const int bd) { uint16x8_t flat2 = vabdq_u16(p4, p0); flat2 = vmaxq_u16(flat2, vabdq_u16(p3, p0)); flat2 = vmaxq_u16(flat2, vabdq_u16(p2, p0)); flat2 = vmaxq_u16(flat2, vabdq_u16(p1, p0)); flat2 = vmaxq_u16(flat2, vabdq_u16(q1, q0)); flat2 = vmaxq_u16(flat2, vabdq_u16(q2, q0)); flat2 = vmaxq_u16(flat2, vabdq_u16(q3, q0)); flat2 = vmaxq_u16(flat2, vabdq_u16(q4, q0)); flat2 = vcleq_u16(flat2, vdupq_n_u16(1 << (bd - 8))); flat2 = vandq_u16(flat2, flat); *flat2_status = calc_flat_status(flat2); return flat2; } static INLINE int16x8_t flip_sign(const uint16x8_t v, const int bd) { const uint16x8_t offset = vdupq_n_u16(0x80 << (bd - 8)); return vreinterpretq_s16_u16(vsubq_u16(v, offset)); } static INLINE uint16x8_t flip_sign_back(const int16x8_t v, const int bd) { const int16x8_t offset = vdupq_n_s16(0x80 << (bd - 8)); return vreinterpretq_u16_s16(vaddq_s16(v, offset)); } static INLINE void filter_update(const uint16x8_t sub0, const uint16x8_t sub1, const uint16x8_t add0, const uint16x8_t add1, uint16x8_t *sum) { *sum = vsubq_u16(*sum, sub0); *sum = vsubq_u16(*sum, sub1); *sum = vaddq_u16(*sum, add0); *sum = vaddq_u16(*sum, add1); } static INLINE uint16x8_t calc_7_tap_filter_kernel(const uint16x8_t sub0, const uint16x8_t sub1, const uint16x8_t add0, const uint16x8_t add1, uint16x8_t *sum) { filter_update(sub0, sub1, add0, add1, sum); return vrshrq_n_u16(*sum, 3); } static INLINE uint16x8_t apply_15_tap_filter_kernel( const uint16x8_t flat, const uint16x8_t sub0, const uint16x8_t sub1, const uint16x8_t add0, const uint16x8_t add1, const uint16x8_t in, uint16x8_t *sum) { filter_update(sub0, sub1, add0, add1, sum); return vbslq_u16(flat, vrshrq_n_u16(*sum, 4), in); } // 7-tap filter [1, 1, 1, 2, 1, 1, 1] static INLINE void calc_7_tap_filter(const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2) { uint16x8_t sum; sum = vaddq_u16(p3, p3); // 2*p3 sum = vaddq_u16(sum, p3); // 3*p3 sum = vaddq_u16(sum, p2); // 3*p3+p2 sum = vaddq_u16(sum, p2); // 3*p3+2*p2 sum = vaddq_u16(sum, p1); // 3*p3+2*p2+p1 sum = vaddq_u16(sum, p0); // 3*p3+2*p2+p1+p0 sum = vaddq_u16(sum, q0); // 3*p3+2*p2+p1+p0+q0 *op2 = vrshrq_n_u16(sum, 3); *op1 = calc_7_tap_filter_kernel(p3, p2, p1, q1, &sum); *op0 = calc_7_tap_filter_kernel(p3, p1, p0, q2, &sum); *oq0 = calc_7_tap_filter_kernel(p3, p0, q0, q3, &sum); *oq1 = calc_7_tap_filter_kernel(p2, q0, q1, q3, &sum); *oq2 = calc_7_tap_filter_kernel(p1, q1, q2, q3, &sum); } static INLINE void apply_7_tap_filter(const uint16x8_t flat, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2) { uint16x8_t tp1, tp0, tq0, tq1; calc_7_tap_filter(p3, p2, p1, p0, q0, q1, q2, q3, op2, &tp1, &tp0, &tq0, &tq1, oq2); *op2 = vbslq_u16(flat, *op2, p2); *op1 = vbslq_u16(flat, tp1, *op1); *op0 = vbslq_u16(flat, tp0, *op0); *oq0 = vbslq_u16(flat, tq0, *oq0); *oq1 = vbslq_u16(flat, tq1, *oq1); *oq2 = vbslq_u16(flat, *oq2, q2); } // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] static INLINE void apply_15_tap_filter( const uint16x8_t flat2, const uint16x8_t p7, const uint16x8_t p6, const uint16x8_t p5, const uint16x8_t p4, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, const uint16x8_t q4, const uint16x8_t q5, const uint16x8_t q6, const uint16x8_t q7, uint16x8_t *op6, uint16x8_t *op5, uint16x8_t *op4, uint16x8_t *op3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2, uint16x8_t *oq3, uint16x8_t *oq4, uint16x8_t *oq5, uint16x8_t *oq6) { uint16x8_t sum; sum = vshlq_n_u16(p7, 3); // 8*p7 sum = vsubq_u16(sum, p7); // 7*p7 sum = vaddq_u16(sum, p6); // 7*p7+p6 sum = vaddq_u16(sum, p6); // 7*p7+2*p6 sum = vaddq_u16(sum, p5); // 7*p7+2*p6+p5 sum = vaddq_u16(sum, p4); // 7*p7+2*p6+p5+p4 sum = vaddq_u16(sum, p3); // 7*p7+2*p6+p5+p4+p3 sum = vaddq_u16(sum, p2); // 7*p7+2*p6+p5+p4+p3+p2 sum = vaddq_u16(sum, p1); // 7*p7+2*p6+p5+p4+p3+p2+p1 sum = vaddq_u16(sum, p0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0 sum = vaddq_u16(sum, q0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0+q0 *op6 = vbslq_u16(flat2, vrshrq_n_u16(sum, 4), p6); *op5 = apply_15_tap_filter_kernel(flat2, p7, p6, p5, q1, p5, &sum); *op4 = apply_15_tap_filter_kernel(flat2, p7, p5, p4, q2, p4, &sum); *op3 = apply_15_tap_filter_kernel(flat2, p7, p4, p3, q3, p3, &sum); *op2 = apply_15_tap_filter_kernel(flat2, p7, p3, p2, q4, *op2, &sum); *op1 = apply_15_tap_filter_kernel(flat2, p7, p2, p1, q5, *op1, &sum); *op0 = apply_15_tap_filter_kernel(flat2, p7, p1, p0, q6, *op0, &sum); *oq0 = apply_15_tap_filter_kernel(flat2, p7, p0, q0, q7, *oq0, &sum); *oq1 = apply_15_tap_filter_kernel(flat2, p6, q0, q1, q7, *oq1, &sum); *oq2 = apply_15_tap_filter_kernel(flat2, p5, q1, q2, q7, *oq2, &sum); *oq3 = apply_15_tap_filter_kernel(flat2, p4, q2, q3, q7, q3, &sum); *oq4 = apply_15_tap_filter_kernel(flat2, p3, q3, q4, q7, q4, &sum); *oq5 = apply_15_tap_filter_kernel(flat2, p2, q4, q5, q7, q5, &sum); *oq6 = apply_15_tap_filter_kernel(flat2, p1, q5, q6, q7, q6, &sum); } static INLINE void filter4(const uint16x8_t mask, const uint16x8_t hev, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, const int bd) { const int16x8_t max = vdupq_n_s16((1 << (bd - 1)) - 1); const int16x8_t min = vdupq_n_s16((int16_t)(((uint32_t)-1) << (bd - 1))); int16x8_t filter, filter1, filter2, t; int16x8_t ps1 = flip_sign(p1, bd); int16x8_t ps0 = flip_sign(p0, bd); int16x8_t qs0 = flip_sign(q0, bd); int16x8_t qs1 = flip_sign(q1, bd); /* add outer taps if we have high edge variance */ filter = vsubq_s16(ps1, qs1); filter = vmaxq_s16(filter, min); filter = vminq_s16(filter, max); filter = vandq_s16(filter, vreinterpretq_s16_u16(hev)); t = vsubq_s16(qs0, ps0); /* inner taps */ filter = vaddq_s16(filter, t); filter = vaddq_s16(filter, t); filter = vaddq_s16(filter, t); filter = vmaxq_s16(filter, min); filter = vminq_s16(filter, max); filter = vandq_s16(filter, vreinterpretq_s16_u16(mask)); /* save bottom 3 bits so that we round one side +4 and the other +3 */ /* if it equals 4 we'll set it to adjust by -1 to account for the fact */ /* we'd round it by 3 the other way */ t = vaddq_s16(filter, vdupq_n_s16(4)); t = vminq_s16(t, max); filter1 = vshrq_n_s16(t, 3); t = vaddq_s16(filter, vdupq_n_s16(3)); t = vminq_s16(t, max); filter2 = vshrq_n_s16(t, 3); qs0 = vsubq_s16(qs0, filter1); qs0 = vmaxq_s16(qs0, min); qs0 = vminq_s16(qs0, max); ps0 = vaddq_s16(ps0, filter2); ps0 = vmaxq_s16(ps0, min); ps0 = vminq_s16(ps0, max); *oq0 = flip_sign_back(qs0, bd); *op0 = flip_sign_back(ps0, bd); /* outer tap adjustments */ filter = vrshrq_n_s16(filter1, 1); filter = vbicq_s16(filter, vreinterpretq_s16_u16(hev)); qs1 = vsubq_s16(qs1, filter); qs1 = vmaxq_s16(qs1, min); qs1 = vminq_s16(qs1, max); ps1 = vaddq_s16(ps1, filter); ps1 = vmaxq_s16(ps1, min); ps1 = vminq_s16(ps1, max); *oq1 = flip_sign_back(qs1, bd); *op1 = flip_sign_back(ps1, bd); } static INLINE void filter8(const uint16x8_t mask, const uint16x8_t flat, const uint32_t flat_status, const uint16x8_t hev, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2, const int bd) { if (flat_status != (uint32_t)-4) { filter4(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1, bd); *op2 = p2; *oq2 = q2; if (flat_status) { apply_7_tap_filter(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2); } } else { calc_7_tap_filter(p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2); } } static INLINE void filter16( const uint16x8_t mask, const uint16x8_t flat, const uint32_t flat_status, const uint16x8_t flat2, const uint32_t flat2_status, const uint16x8_t hev, const uint16x8_t p7, const uint16x8_t p6, const uint16x8_t p5, const uint16x8_t p4, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, const uint16x8_t q4, const uint16x8_t q5, const uint16x8_t q6, const uint16x8_t q7, uint16x8_t *op6, uint16x8_t *op5, uint16x8_t *op4, uint16x8_t *op3, uint16x8_t *op2, uint16x8_t *op1, uint16x8_t *op0, uint16x8_t *oq0, uint16x8_t *oq1, uint16x8_t *oq2, uint16x8_t *oq3, uint16x8_t *oq4, uint16x8_t *oq5, uint16x8_t *oq6, const int bd) { if (flat_status != (uint32_t)-4) { filter4(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1, bd); } if (flat_status) { *op2 = p2; *oq2 = q2; if (flat2_status != (uint32_t)-4) { apply_7_tap_filter(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2); } if (flat2_status) { apply_15_tap_filter(flat2, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6); } } } static INLINE void load_8x8(const uint16_t *s, const int p, uint16x8_t *p3, uint16x8_t *p2, uint16x8_t *p1, uint16x8_t *p0, uint16x8_t *q0, uint16x8_t *q1, uint16x8_t *q2, uint16x8_t *q3) { *p3 = vld1q_u16(s); s += p; *p2 = vld1q_u16(s); s += p; *p1 = vld1q_u16(s); s += p; *p0 = vld1q_u16(s); s += p; *q0 = vld1q_u16(s); s += p; *q1 = vld1q_u16(s); s += p; *q2 = vld1q_u16(s); s += p; *q3 = vld1q_u16(s); } static INLINE void load_8x16(const uint16_t *s, const int p, uint16x8_t *s0, uint16x8_t *s1, uint16x8_t *s2, uint16x8_t *s3, uint16x8_t *s4, uint16x8_t *s5, uint16x8_t *s6, uint16x8_t *s7, uint16x8_t *s8, uint16x8_t *s9, uint16x8_t *s10, uint16x8_t *s11, uint16x8_t *s12, uint16x8_t *s13, uint16x8_t *s14, uint16x8_t *s15) { *s0 = vld1q_u16(s); s += p; *s1 = vld1q_u16(s); s += p; *s2 = vld1q_u16(s); s += p; *s3 = vld1q_u16(s); s += p; *s4 = vld1q_u16(s); s += p; *s5 = vld1q_u16(s); s += p; *s6 = vld1q_u16(s); s += p; *s7 = vld1q_u16(s); s += p; *s8 = vld1q_u16(s); s += p; *s9 = vld1q_u16(s); s += p; *s10 = vld1q_u16(s); s += p; *s11 = vld1q_u16(s); s += p; *s12 = vld1q_u16(s); s += p; *s13 = vld1q_u16(s); s += p; *s14 = vld1q_u16(s); s += p; *s15 = vld1q_u16(s); } static INLINE void store_8x4(uint16_t *s, const int p, const uint16x8_t s0, const uint16x8_t s1, const uint16x8_t s2, const uint16x8_t s3) { vst1q_u16(s, s0); s += p; vst1q_u16(s, s1); s += p; vst1q_u16(s, s2); s += p; vst1q_u16(s, s3); } static INLINE void store_8x6(uint16_t *s, const int p, const uint16x8_t s0, const uint16x8_t s1, const uint16x8_t s2, const uint16x8_t s3, const uint16x8_t s4, const uint16x8_t s5) { vst1q_u16(s, s0); s += p; vst1q_u16(s, s1); s += p; vst1q_u16(s, s2); s += p; vst1q_u16(s, s3); s += p; vst1q_u16(s, s4); s += p; vst1q_u16(s, s5); } static INLINE void store_4x8(uint16_t *s, const int p, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1) { uint16x8x4_t o; o.val[0] = p1; o.val[1] = p0; o.val[2] = q0; o.val[3] = q1; vst4q_lane_u16(s, o, 0); s += p; vst4q_lane_u16(s, o, 1); s += p; vst4q_lane_u16(s, o, 2); s += p; vst4q_lane_u16(s, o, 3); s += p; vst4q_lane_u16(s, o, 4); s += p; vst4q_lane_u16(s, o, 5); s += p; vst4q_lane_u16(s, o, 6); s += p; vst4q_lane_u16(s, o, 7); } static INLINE void store_6x8(uint16_t *s, const int p, const uint16x8_t s0, const uint16x8_t s1, const uint16x8_t s2, const uint16x8_t s3, const uint16x8_t s4, const uint16x8_t s5) { uint16x8x3_t o0, o1; o0.val[0] = s0; o0.val[1] = s1; o0.val[2] = s2; o1.val[0] = s3; o1.val[1] = s4; o1.val[2] = s5; vst3q_lane_u16(s - 3, o0, 0); vst3q_lane_u16(s + 0, o1, 0); s += p; vst3q_lane_u16(s - 3, o0, 1); vst3q_lane_u16(s + 0, o1, 1); s += p; vst3q_lane_u16(s - 3, o0, 2); vst3q_lane_u16(s + 0, o1, 2); s += p; vst3q_lane_u16(s - 3, o0, 3); vst3q_lane_u16(s + 0, o1, 3); s += p; vst3q_lane_u16(s - 3, o0, 4); vst3q_lane_u16(s + 0, o1, 4); s += p; vst3q_lane_u16(s - 3, o0, 5); vst3q_lane_u16(s + 0, o1, 5); s += p; vst3q_lane_u16(s - 3, o0, 6); vst3q_lane_u16(s + 0, o1, 6); s += p; vst3q_lane_u16(s - 3, o0, 7); vst3q_lane_u16(s + 0, o1, 7); } static INLINE void store_7x8(uint16_t *s, const int p, const uint16x8_t s0, const uint16x8_t s1, const uint16x8_t s2, const uint16x8_t s3, const uint16x8_t s4, const uint16x8_t s5, const uint16x8_t s6) { uint16x8x4_t o0; uint16x8x3_t o1; o0.val[0] = s0; o0.val[1] = s1; o0.val[2] = s2; o0.val[3] = s3; o1.val[0] = s4; o1.val[1] = s5; o1.val[2] = s6; vst4q_lane_u16(s - 4, o0, 0); vst3q_lane_u16(s + 0, o1, 0); s += p; vst4q_lane_u16(s - 4, o0, 1); vst3q_lane_u16(s + 0, o1, 1); s += p; vst4q_lane_u16(s - 4, o0, 2); vst3q_lane_u16(s + 0, o1, 2); s += p; vst4q_lane_u16(s - 4, o0, 3); vst3q_lane_u16(s + 0, o1, 3); s += p; vst4q_lane_u16(s - 4, o0, 4); vst3q_lane_u16(s + 0, o1, 4); s += p; vst4q_lane_u16(s - 4, o0, 5); vst3q_lane_u16(s + 0, o1, 5); s += p; vst4q_lane_u16(s - 4, o0, 6); vst3q_lane_u16(s + 0, o1, 6); s += p; vst4q_lane_u16(s - 4, o0, 7); vst3q_lane_u16(s + 0, o1, 7); } static INLINE void store_8x14(uint16_t *s, const int p, const uint16x8_t p6, const uint16x8_t p5, const uint16x8_t p4, const uint16x8_t p3, const uint16x8_t p2, const uint16x8_t p1, const uint16x8_t p0, const uint16x8_t q0, const uint16x8_t q1, const uint16x8_t q2, const uint16x8_t q3, const uint16x8_t q4, const uint16x8_t q5, const uint16x8_t q6, const uint32_t flat_status, const uint32_t flat2_status) { if (flat_status) { if (flat2_status) { vst1q_u16(s - 7 * p, p6); vst1q_u16(s - 6 * p, p5); vst1q_u16(s - 5 * p, p4); vst1q_u16(s - 4 * p, p3); vst1q_u16(s + 3 * p, q3); vst1q_u16(s + 4 * p, q4); vst1q_u16(s + 5 * p, q5); vst1q_u16(s + 6 * p, q6); } vst1q_u16(s - 3 * p, p2); vst1q_u16(s + 2 * p, q2); } vst1q_u16(s - 2 * p, p1); vst1q_u16(s - 1 * p, p0); vst1q_u16(s + 0 * p, q0); vst1q_u16(s + 1 * p, q1); } void vpx_highbd_lpf_horizontal_4_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, mask, hev; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); load_8x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); filter_hev_mask4(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &hev, &mask); filter4(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1, bd); store_8x4(s - 2 * p, p, p1, p0, q0, q1); } void vpx_highbd_lpf_horizontal_4_dual_neon( uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_4_neon(s, p, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_4_neon(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_4_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, mask, hev; load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); transpose_s16_8x8((int16x8_t *)&p3, (int16x8_t *)&p2, (int16x8_t *)&p1, (int16x8_t *)&p0, (int16x8_t *)&q0, (int16x8_t *)&q1, (int16x8_t *)&q2, (int16x8_t *)&q3); load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); filter_hev_mask4(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &hev, &mask); filter4(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1, bd); store_4x8(s - 2, p, p1, p0, q0, q1); } void vpx_highbd_lpf_vertical_4_dual_neon( uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_vertical_4_neon(s, p, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_4_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_horizontal_8_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint32_t flat_status; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); load_8x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev, bd); filter8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2, bd); store_8x6(s - 3 * p, p, op2, op1, op0, oq0, oq1, oq2); } void vpx_highbd_lpf_horizontal_8_dual_neon( uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_8_neon(s, p, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_8_neon(s + 8, p, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_8_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint32_t flat_status; load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); transpose_s16_8x8((int16x8_t *)&p3, (int16x8_t *)&p2, (int16x8_t *)&p1, (int16x8_t *)&p0, (int16x8_t *)&q0, (int16x8_t *)&q1, (int16x8_t *)&q2, (int16x8_t *)&q3); load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev, bd); filter8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2, bd); // Note: store_6x8() is faster than transpose + store_8x8(). store_6x8(s, p, op2, op1, op0, oq0, oq1, oq2); } void vpx_highbd_lpf_vertical_8_dual_neon( uint16_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_vertical_8_neon(s, p, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_8_neon(s + 8 * p, p, blimit1, limit1, thresh1, bd); } static void lpf_horizontal_16_kernel(uint16_t *s, int p, const uint16x8_t blimit_vec, const uint16x8_t limit_vec, const uint16x8_t thresh_vec, const int bd) { uint16x8_t mask, flat, flat2, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint32_t flat_status, flat2_status; load_8x16(s - 8 * p, p, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev, bd); flat2 = flat_mask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, &flat2_status, bd); filter16(mask, flat, flat_status, flat2, flat2_status, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, bd); store_8x14(s, p, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6, flat_status, flat2_status); } static void lpf_vertical_16_kernel(uint16_t *s, int p, const uint16x8_t blimit_vec, const uint16x8_t limit_vec, const uint16x8_t thresh_vec, const int bd) { uint16x8_t mask, flat, flat2, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint32_t flat_status, flat2_status; load_8x8(s - 8, p, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0); transpose_s16_8x8((int16x8_t *)&p7, (int16x8_t *)&p6, (int16x8_t *)&p5, (int16x8_t *)&p4, (int16x8_t *)&p3, (int16x8_t *)&p2, (int16x8_t *)&p1, (int16x8_t *)&p0); load_8x8(s, p, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); transpose_s16_8x8((int16x8_t *)&q0, (int16x8_t *)&q1, (int16x8_t *)&q2, (int16x8_t *)&q3, (int16x8_t *)&q4, (int16x8_t *)&q5, (int16x8_t *)&q6, (int16x8_t *)&q7); mask = filter_flat_hev_mask(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev, bd); flat2 = flat_mask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, &flat2_status, bd); filter16(mask, flat, flat_status, flat2, flat2_status, hev, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, bd); if (flat_status) { if (flat2_status) { store_7x8(s - 3, p, op6, op5, op4, op3, op2, op1, op0); store_7x8(s + 4, p, oq0, oq1, oq2, oq3, oq4, oq5, oq6); } else { // Note: store_6x8() is faster than transpose + store_8x8(). store_6x8(s, p, op2, op1, op0, oq0, oq1, oq2); } } else { store_4x8(s - 2, p, op1, op0, oq0, oq1); } } void vpx_highbd_lpf_horizontal_16_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); lpf_horizontal_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd); } void vpx_highbd_lpf_horizontal_16_dual_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); lpf_horizontal_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd); lpf_horizontal_16_kernel(s + 8, p, blimit_vec, limit_vec, thresh_vec, bd); } void vpx_highbd_lpf_vertical_16_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); lpf_vertical_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd); } void vpx_highbd_lpf_vertical_16_dual_neon(uint16_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { uint16x8_t blimit_vec, limit_vec, thresh_vec; load_thresh(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec, bd); lpf_vertical_16_kernel(s, p, blimit_vec, limit_vec, thresh_vec, bd); lpf_vertical_16_kernel(s + 8 * p, p, blimit_vec, limit_vec, thresh_vec, bd); } libvpx-1.8.2/vpx_dsp/arm/highbd_vpx_convolve8_neon.c000066400000000000000000001046551357355204000226170ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_ports/mem.h" static INLINE void load_4x4(const int16_t *s, const ptrdiff_t p, int16x4_t *const s0, int16x4_t *const s1, int16x4_t *const s2, int16x4_t *const s3) { *s0 = vld1_s16(s); s += p; *s1 = vld1_s16(s); s += p; *s2 = vld1_s16(s); s += p; *s3 = vld1_s16(s); } static INLINE void load_8x4(const uint16_t *s, const ptrdiff_t p, uint16x8_t *const s0, uint16x8_t *const s1, uint16x8_t *const s2, uint16x8_t *const s3) { *s0 = vld1q_u16(s); s += p; *s1 = vld1q_u16(s); s += p; *s2 = vld1q_u16(s); s += p; *s3 = vld1q_u16(s); } static INLINE void load_8x8(const int16_t *s, const ptrdiff_t p, int16x8_t *const s0, int16x8_t *const s1, int16x8_t *const s2, int16x8_t *const s3, int16x8_t *const s4, int16x8_t *const s5, int16x8_t *const s6, int16x8_t *const s7) { *s0 = vld1q_s16(s); s += p; *s1 = vld1q_s16(s); s += p; *s2 = vld1q_s16(s); s += p; *s3 = vld1q_s16(s); s += p; *s4 = vld1q_s16(s); s += p; *s5 = vld1q_s16(s); s += p; *s6 = vld1q_s16(s); s += p; *s7 = vld1q_s16(s); } static INLINE void store_8x8(uint16_t *s, const ptrdiff_t p, const uint16x8_t s0, const uint16x8_t s1, const uint16x8_t s2, const uint16x8_t s3, const uint16x8_t s4, const uint16x8_t s5, const uint16x8_t s6, const uint16x8_t s7) { vst1q_u16(s, s0); s += p; vst1q_u16(s, s1); s += p; vst1q_u16(s, s2); s += p; vst1q_u16(s, s3); s += p; vst1q_u16(s, s4); s += p; vst1q_u16(s, s5); s += p; vst1q_u16(s, s6); s += p; vst1q_u16(s, s7); } static INLINE int32x4_t highbd_convolve8_4( const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, const int16x4_t s6, const int16x4_t s7, const int16x8_t filters) { const int16x4_t filters_lo = vget_low_s16(filters); const int16x4_t filters_hi = vget_high_s16(filters); int32x4_t sum; sum = vmull_lane_s16(s0, filters_lo, 0); sum = vmlal_lane_s16(sum, s1, filters_lo, 1); sum = vmlal_lane_s16(sum, s2, filters_lo, 2); sum = vmlal_lane_s16(sum, s3, filters_lo, 3); sum = vmlal_lane_s16(sum, s4, filters_hi, 0); sum = vmlal_lane_s16(sum, s5, filters_hi, 1); sum = vmlal_lane_s16(sum, s6, filters_hi, 2); sum = vmlal_lane_s16(sum, s7, filters_hi, 3); return sum; } static INLINE uint16x8_t highbd_convolve8_8(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, const int16x8_t s6, const int16x8_t s7, const int16x8_t filters, const uint16x8_t max) { const int16x4_t filters_lo = vget_low_s16(filters); const int16x4_t filters_hi = vget_high_s16(filters); int32x4_t sum0, sum1; uint16x8_t d; sum0 = vmull_lane_s16(vget_low_s16(s0), filters_lo, 0); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s1), filters_lo, 1); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s2), filters_lo, 2); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s3), filters_lo, 3); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s4), filters_hi, 0); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s5), filters_hi, 1); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s6), filters_hi, 2); sum0 = vmlal_lane_s16(sum0, vget_low_s16(s7), filters_hi, 3); sum1 = vmull_lane_s16(vget_high_s16(s0), filters_lo, 0); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s1), filters_lo, 1); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s2), filters_lo, 2); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s3), filters_lo, 3); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s4), filters_hi, 0); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s5), filters_hi, 1); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s6), filters_hi, 2); sum1 = vmlal_lane_s16(sum1, vget_high_s16(s7), filters_hi, 3); d = vcombine_u16(vqrshrun_n_s32(sum0, 7), vqrshrun_n_s32(sum1, 7)); d = vminq_u16(d, max); return d; } void vpx_highbd_convolve8_horiz_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { if (x_step_q4 != 16) { vpx_highbd_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } else { const int16x8_t filters = vld1q_s16(filter[x0_q4]); const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); uint16x8_t t0, t1, t2, t3; assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); src -= 3; if (h == 4) { int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; int32x4_t d0, d1, d2, d3; uint16x8_t d01, d23; __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); load_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u16_8x4(&t0, &t1, &t2, &t3); s0 = vreinterpret_s16_u16(vget_low_u16(t0)); s1 = vreinterpret_s16_u16(vget_low_u16(t1)); s2 = vreinterpret_s16_u16(vget_low_u16(t2)); s3 = vreinterpret_s16_u16(vget_low_u16(t3)); s4 = vreinterpret_s16_u16(vget_high_u16(t0)); s5 = vreinterpret_s16_u16(vget_high_u16(t1)); s6 = vreinterpret_s16_u16(vget_high_u16(t2)); __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); src += 7; do { load_4x4((const int16_t *)src, src_stride, &s7, &s8, &s9, &s10); transpose_s16_4x4d(&s7, &s8, &s9, &s10); d0 = highbd_convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters); d1 = highbd_convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters); d2 = highbd_convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters); d3 = highbd_convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters); d01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7)); d23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7)); d01 = vminq_u16(d01, max); d23 = vminq_u16(d23, max); transpose_u16_4x4q(&d01, &d23); vst1_u16(dst + 0 * dst_stride, vget_low_u16(d01)); vst1_u16(dst + 1 * dst_stride, vget_low_u16(d23)); vst1_u16(dst + 2 * dst_stride, vget_high_u16(d01)); vst1_u16(dst + 3 * dst_stride, vget_high_u16(d23)); s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; src += 4; dst += 4; w -= 4; } while (w > 0); } else { int16x8_t t4, t5, t6, t7; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; uint16x8_t d0, d1, d2, d3; if (w == 4) { do { load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); load_8x8((const int16_t *)(src + 7), src_stride, &s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7); src += 8 * src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); transpose_s16_8x8(&s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); d0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); d1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); d2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); d3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); transpose_u16_8x4(&d0, &d1, &d2, &d3); vst1_u16(dst, vget_low_u16(d0)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d1)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d2)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d3)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d0)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d1)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d2)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d3)); dst += dst_stride; h -= 8; } while (h > 0); } else { int width; const uint16_t *s; uint16_t *d; int16x8_t s11, s12, s13, s14; uint16x8_t d4, d5, d6, d7; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); width = w; s = src + 7; d = dst; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); do { load_8x8((const int16_t *)s, src_stride, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14); transpose_s16_8x8(&s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14); d0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); d1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); d2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); d3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); d4 = highbd_convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, max); d5 = highbd_convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, max); d6 = highbd_convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, max); d7 = highbd_convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, max); transpose_u16_8x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7); store_8x8(d, dst_stride, d0, d1, d2, d3, d4, d5, d6, d7); s0 = s8; s1 = s9; s2 = s10; s3 = s11; s4 = s12; s5 = s13; s6 = s14; s += 8; d += 8; width -= 8; } while (width > 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; } while (h > 0); } } } } void vpx_highbd_convolve8_avg_horiz_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { if (x_step_q4 != 16) { vpx_highbd_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } else { const int16x8_t filters = vld1q_s16(filter[x0_q4]); const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); uint16x8_t t0, t1, t2, t3; assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); src -= 3; if (h == 4) { int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; int32x4_t d0, d1, d2, d3; uint16x8_t d01, d23, t01, t23; __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); load_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u16_8x4(&t0, &t1, &t2, &t3); s0 = vreinterpret_s16_u16(vget_low_u16(t0)); s1 = vreinterpret_s16_u16(vget_low_u16(t1)); s2 = vreinterpret_s16_u16(vget_low_u16(t2)); s3 = vreinterpret_s16_u16(vget_low_u16(t3)); s4 = vreinterpret_s16_u16(vget_high_u16(t0)); s5 = vreinterpret_s16_u16(vget_high_u16(t1)); s6 = vreinterpret_s16_u16(vget_high_u16(t2)); __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); src += 7; do { load_4x4((const int16_t *)src, src_stride, &s7, &s8, &s9, &s10); transpose_s16_4x4d(&s7, &s8, &s9, &s10); d0 = highbd_convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters); d1 = highbd_convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters); d2 = highbd_convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters); d3 = highbd_convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters); t01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7)); t23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7)); t01 = vminq_u16(t01, max); t23 = vminq_u16(t23, max); transpose_u16_4x4q(&t01, &t23); d01 = vcombine_u16(vld1_u16(dst + 0 * dst_stride), vld1_u16(dst + 2 * dst_stride)); d23 = vcombine_u16(vld1_u16(dst + 1 * dst_stride), vld1_u16(dst + 3 * dst_stride)); d01 = vrhaddq_u16(d01, t01); d23 = vrhaddq_u16(d23, t23); vst1_u16(dst + 0 * dst_stride, vget_low_u16(d01)); vst1_u16(dst + 1 * dst_stride, vget_low_u16(d23)); vst1_u16(dst + 2 * dst_stride, vget_high_u16(d01)); vst1_u16(dst + 3 * dst_stride, vget_high_u16(d23)); s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; src += 4; dst += 4; w -= 4; } while (w > 0); } else { int16x8_t t4, t5, t6, t7; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; uint16x8_t d0, d1, d2, d3, t0, t1, t2, t3; if (w == 4) { do { load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); load_8x8((const int16_t *)(src + 7), src_stride, &s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7); src += 8 * src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); transpose_s16_8x8(&s7, &s8, &s9, &s10, &t4, &t5, &t6, &t7); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); t0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); t1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); t2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); t3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); transpose_u16_8x4(&t0, &t1, &t2, &t3); d0 = vcombine_u16(vld1_u16(dst + 0 * dst_stride), vld1_u16(dst + 4 * dst_stride)); d1 = vcombine_u16(vld1_u16(dst + 1 * dst_stride), vld1_u16(dst + 5 * dst_stride)); d2 = vcombine_u16(vld1_u16(dst + 2 * dst_stride), vld1_u16(dst + 6 * dst_stride)); d3 = vcombine_u16(vld1_u16(dst + 3 * dst_stride), vld1_u16(dst + 7 * dst_stride)); d0 = vrhaddq_u16(d0, t0); d1 = vrhaddq_u16(d1, t1); d2 = vrhaddq_u16(d2, t2); d3 = vrhaddq_u16(d3, t3); vst1_u16(dst, vget_low_u16(d0)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d1)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d2)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d3)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d0)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d1)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d2)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d3)); dst += dst_stride; h -= 8; } while (h > 0); } else { int width; const uint16_t *s; uint16_t *d; int16x8_t s11, s12, s13, s14; uint16x8_t d4, d5, d6, d7; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); load_8x8((const int16_t *)src, src_stride, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_s16_8x8(&s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); width = w; s = src + 7; d = dst; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); do { load_8x8((const int16_t *)s, src_stride, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14); transpose_s16_8x8(&s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14); d0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); d1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); d2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); d3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); d4 = highbd_convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, max); d5 = highbd_convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, max); d6 = highbd_convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, max); d7 = highbd_convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, max); transpose_u16_8x8(&d0, &d1, &d2, &d3, &d4, &d5, &d6, &d7); d0 = vrhaddq_u16(d0, vld1q_u16(d + 0 * dst_stride)); d1 = vrhaddq_u16(d1, vld1q_u16(d + 1 * dst_stride)); d2 = vrhaddq_u16(d2, vld1q_u16(d + 2 * dst_stride)); d3 = vrhaddq_u16(d3, vld1q_u16(d + 3 * dst_stride)); d4 = vrhaddq_u16(d4, vld1q_u16(d + 4 * dst_stride)); d5 = vrhaddq_u16(d5, vld1q_u16(d + 5 * dst_stride)); d6 = vrhaddq_u16(d6, vld1q_u16(d + 6 * dst_stride)); d7 = vrhaddq_u16(d7, vld1q_u16(d + 7 * dst_stride)); store_8x8(d, dst_stride, d0, d1, d2, d3, d4, d5, d6, d7); s0 = s8; s1 = s9; s2 = s10; s3 = s11; s4 = s12; s5 = s13; s6 = s14; s += 8; d += 8; width -= 8; } while (width > 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; } while (h > 0); } } } } void vpx_highbd_convolve8_vert_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { if (y_step_q4 != 16) { vpx_highbd_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } else { const int16x8_t filters = vld1q_s16(filter[y0_q4]); const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); src -= 3 * src_stride; if (w == 4) { int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; int32x4_t d0, d1, d2, d3; uint16x8_t d01, d23; s0 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s1 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s2 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s3 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s4 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s5 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s6 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; do { s7 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s8 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s9 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s10 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); d0 = highbd_convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters); d1 = highbd_convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters); d2 = highbd_convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters); d3 = highbd_convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters); d01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7)); d23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7)); d01 = vminq_u16(d01, max); d23 = vminq_u16(d23, max); vst1_u16(dst, vget_low_u16(d01)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d01)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d23)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d23)); dst += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; h -= 4; } while (h > 0); } else { int height; const uint16_t *s; uint16_t *d; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; uint16x8_t d0, d1, d2, d3; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); s = src; s0 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s1 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s2 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s3 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s4 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s5 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s6 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; d = dst; height = h; do { s7 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s8 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s9 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s10 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; __builtin_prefetch(d + 0 * dst_stride); __builtin_prefetch(d + 1 * dst_stride); __builtin_prefetch(d + 2 * dst_stride); __builtin_prefetch(d + 3 * dst_stride); __builtin_prefetch(s + 0 * src_stride); __builtin_prefetch(s + 1 * src_stride); __builtin_prefetch(s + 2 * src_stride); __builtin_prefetch(s + 3 * src_stride); d0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); d1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); d2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); d3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); vst1q_u16(d, d0); d += dst_stride; vst1q_u16(d, d1); d += dst_stride; vst1q_u16(d, d2); d += dst_stride; vst1q_u16(d, d3); d += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; height -= 4; } while (height > 0); src += 8; dst += 8; w -= 8; } while (w > 0); } } } void vpx_highbd_convolve8_avg_vert_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { if (y_step_q4 != 16) { vpx_highbd_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } else { const int16x8_t filters = vld1q_s16(filter[y0_q4]); const uint16x8_t max = vdupq_n_u16((1 << bd) - 1); assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); src -= 3 * src_stride; if (w == 4) { int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; int32x4_t d0, d1, d2, d3; uint16x8_t d01, d23, t01, t23; s0 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s1 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s2 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s3 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s4 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s5 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s6 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; do { s7 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s8 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s9 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; s10 = vreinterpret_s16_u16(vld1_u16(src)); src += src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); d0 = highbd_convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters); d1 = highbd_convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters); d2 = highbd_convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters); d3 = highbd_convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters); t01 = vcombine_u16(vqrshrun_n_s32(d0, 7), vqrshrun_n_s32(d1, 7)); t23 = vcombine_u16(vqrshrun_n_s32(d2, 7), vqrshrun_n_s32(d3, 7)); t01 = vminq_u16(t01, max); t23 = vminq_u16(t23, max); d01 = vcombine_u16(vld1_u16(dst + 0 * dst_stride), vld1_u16(dst + 1 * dst_stride)); d23 = vcombine_u16(vld1_u16(dst + 2 * dst_stride), vld1_u16(dst + 3 * dst_stride)); d01 = vrhaddq_u16(d01, t01); d23 = vrhaddq_u16(d23, t23); vst1_u16(dst, vget_low_u16(d01)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d01)); dst += dst_stride; vst1_u16(dst, vget_low_u16(d23)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d23)); dst += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; h -= 4; } while (h > 0); } else { int height; const uint16_t *s; uint16_t *d; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; uint16x8_t d0, d1, d2, d3, t0, t1, t2, t3; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); s = src; s0 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s1 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s2 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s3 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s4 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s5 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s6 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; d = dst; height = h; do { s7 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s8 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s9 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; s10 = vreinterpretq_s16_u16(vld1q_u16(s)); s += src_stride; __builtin_prefetch(d + 0 * dst_stride); __builtin_prefetch(d + 1 * dst_stride); __builtin_prefetch(d + 2 * dst_stride); __builtin_prefetch(d + 3 * dst_stride); __builtin_prefetch(s + 0 * src_stride); __builtin_prefetch(s + 1 * src_stride); __builtin_prefetch(s + 2 * src_stride); __builtin_prefetch(s + 3 * src_stride); t0 = highbd_convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, max); t1 = highbd_convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, max); t2 = highbd_convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, max); t3 = highbd_convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, max); d0 = vld1q_u16(d + 0 * dst_stride); d1 = vld1q_u16(d + 1 * dst_stride); d2 = vld1q_u16(d + 2 * dst_stride); d3 = vld1q_u16(d + 3 * dst_stride); d0 = vrhaddq_u16(d0, t0); d1 = vrhaddq_u16(d1, t1); d2 = vrhaddq_u16(d2, t2); d3 = vrhaddq_u16(d3, t3); vst1q_u16(d, d0); d += dst_stride; vst1q_u16(d, d1); d += dst_stride; vst1q_u16(d, d2); d += dst_stride; vst1q_u16(d, d3); d += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; height -= 4; } while (height > 0); src += 8; dst += 8; w -= 8; } while (w > 0); } } } libvpx-1.8.2/vpx_dsp/arm/highbd_vpx_convolve_avg_neon.c000066400000000000000000000116541357355204000233600ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" void vpx_highbd_convolve_avg_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; if (w < 8) { // avg4 uint16x4_t s0, s1, d0, d1; uint16x8_t s01, d01; do { s0 = vld1_u16(src); d0 = vld1_u16(dst); src += src_stride; s1 = vld1_u16(src); d1 = vld1_u16(dst + dst_stride); src += src_stride; s01 = vcombine_u16(s0, s1); d01 = vcombine_u16(d0, d1); d01 = vrhaddq_u16(s01, d01); vst1_u16(dst, vget_low_u16(d01)); dst += dst_stride; vst1_u16(dst, vget_high_u16(d01)); dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 8) { // avg8 uint16x8_t s0, s1, d0, d1; do { s0 = vld1q_u16(src); d0 = vld1q_u16(dst); src += src_stride; s1 = vld1q_u16(src); d1 = vld1q_u16(dst + dst_stride); src += src_stride; d0 = vrhaddq_u16(s0, d0); d1 = vrhaddq_u16(s1, d1); vst1q_u16(dst, d0); dst += dst_stride; vst1q_u16(dst, d1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w < 32) { // avg16 uint16x8_t s0l, s0h, s1l, s1h, d0l, d0h, d1l, d1h; do { s0l = vld1q_u16(src); s0h = vld1q_u16(src + 8); d0l = vld1q_u16(dst); d0h = vld1q_u16(dst + 8); src += src_stride; s1l = vld1q_u16(src); s1h = vld1q_u16(src + 8); d1l = vld1q_u16(dst + dst_stride); d1h = vld1q_u16(dst + dst_stride + 8); src += src_stride; d0l = vrhaddq_u16(s0l, d0l); d0h = vrhaddq_u16(s0h, d0h); d1l = vrhaddq_u16(s1l, d1l); d1h = vrhaddq_u16(s1h, d1h); vst1q_u16(dst, d0l); vst1q_u16(dst + 8, d0h); dst += dst_stride; vst1q_u16(dst, d1l); vst1q_u16(dst + 8, d1h); dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 32) { // avg32 uint16x8_t s0, s1, s2, s3, d0, d1, d2, d3; do { s0 = vld1q_u16(src); s1 = vld1q_u16(src + 8); s2 = vld1q_u16(src + 16); s3 = vld1q_u16(src + 24); d0 = vld1q_u16(dst); d1 = vld1q_u16(dst + 8); d2 = vld1q_u16(dst + 16); d3 = vld1q_u16(dst + 24); src += src_stride; d0 = vrhaddq_u16(s0, d0); d1 = vrhaddq_u16(s1, d1); d2 = vrhaddq_u16(s2, d2); d3 = vrhaddq_u16(s3, d3); vst1q_u16(dst, d0); vst1q_u16(dst + 8, d1); vst1q_u16(dst + 16, d2); vst1q_u16(dst + 24, d3); dst += dst_stride; s0 = vld1q_u16(src); s1 = vld1q_u16(src + 8); s2 = vld1q_u16(src + 16); s3 = vld1q_u16(src + 24); d0 = vld1q_u16(dst); d1 = vld1q_u16(dst + 8); d2 = vld1q_u16(dst + 16); d3 = vld1q_u16(dst + 24); src += src_stride; d0 = vrhaddq_u16(s0, d0); d1 = vrhaddq_u16(s1, d1); d2 = vrhaddq_u16(s2, d2); d3 = vrhaddq_u16(s3, d3); vst1q_u16(dst, d0); vst1q_u16(dst + 8, d1); vst1q_u16(dst + 16, d2); vst1q_u16(dst + 24, d3); dst += dst_stride; h -= 2; } while (h > 0); } else { // avg64 uint16x8_t s0, s1, s2, s3, d0, d1, d2, d3; do { s0 = vld1q_u16(src); s1 = vld1q_u16(src + 8); s2 = vld1q_u16(src + 16); s3 = vld1q_u16(src + 24); d0 = vld1q_u16(dst); d1 = vld1q_u16(dst + 8); d2 = vld1q_u16(dst + 16); d3 = vld1q_u16(dst + 24); d0 = vrhaddq_u16(s0, d0); d1 = vrhaddq_u16(s1, d1); d2 = vrhaddq_u16(s2, d2); d3 = vrhaddq_u16(s3, d3); vst1q_u16(dst, d0); vst1q_u16(dst + 8, d1); vst1q_u16(dst + 16, d2); vst1q_u16(dst + 24, d3); s0 = vld1q_u16(src + 32); s1 = vld1q_u16(src + 40); s2 = vld1q_u16(src + 48); s3 = vld1q_u16(src + 56); d0 = vld1q_u16(dst + 32); d1 = vld1q_u16(dst + 40); d2 = vld1q_u16(dst + 48); d3 = vld1q_u16(dst + 56); d0 = vrhaddq_u16(s0, d0); d1 = vrhaddq_u16(s1, d1); d2 = vrhaddq_u16(s2, d2); d3 = vrhaddq_u16(s3, d3); vst1q_u16(dst + 32, d0); vst1q_u16(dst + 40, d1); vst1q_u16(dst + 48, d2); vst1q_u16(dst + 56, d3); src += src_stride; dst += dst_stride; } while (--h); } } libvpx-1.8.2/vpx_dsp/arm/highbd_vpx_convolve_copy_neon.c000066400000000000000000000055501357355204000235530ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" void vpx_highbd_convolve_copy_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; if (w < 8) { // copy4 do { vst1_u16(dst, vld1_u16(src)); src += src_stride; dst += dst_stride; vst1_u16(dst, vld1_u16(src)); src += src_stride; dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 8) { // copy8 do { vst1q_u16(dst, vld1q_u16(src)); src += src_stride; dst += dst_stride; vst1q_u16(dst, vld1q_u16(src)); src += src_stride; dst += dst_stride; h -= 2; } while (h > 0); } else if (w < 32) { // copy16 do { vst2q_u16(dst, vld2q_u16(src)); src += src_stride; dst += dst_stride; vst2q_u16(dst, vld2q_u16(src)); src += src_stride; dst += dst_stride; vst2q_u16(dst, vld2q_u16(src)); src += src_stride; dst += dst_stride; vst2q_u16(dst, vld2q_u16(src)); src += src_stride; dst += dst_stride; h -= 4; } while (h > 0); } else if (w == 32) { // copy32 do { vst4q_u16(dst, vld4q_u16(src)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); src += src_stride; dst += dst_stride; h -= 4; } while (h > 0); } else { // copy64 do { vst4q_u16(dst, vld4q_u16(src)); vst4q_u16(dst + 32, vld4q_u16(src + 32)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); vst4q_u16(dst + 32, vld4q_u16(src + 32)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); vst4q_u16(dst + 32, vld4q_u16(src + 32)); src += src_stride; dst += dst_stride; vst4q_u16(dst, vld4q_u16(src)); vst4q_u16(dst + 32, vld4q_u16(src + 32)); src += src_stride; dst += dst_stride; h -= 4; } while (h > 0); } } libvpx-1.8.2/vpx_dsp/arm/highbd_vpx_convolve_neon.c000066400000000000000000000054361357355204000225240ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" void vpx_highbd_convolve8_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { // + 1 to make it divisible by 4 uint16_t temp[64 * 136]; const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; /* Filter starting 3 lines back. The neon implementation will ignore the given * height and filter a multiple of 4 lines. Since this goes in to the temp * buffer which has lots of extra room and is subsequently discarded this is * safe if somewhat less than ideal. */ vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, intermediate_height, bd); /* Step into the temp buffer 3 lines to get the actual frame data */ vpx_highbd_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } void vpx_highbd_convolve8_avg_neon(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { // + 1 to make it divisible by 4 uint16_t temp[64 * 136]; const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; /* This implementation has the same issues as above. In addition, we only want * to average the values after both passes. */ vpx_highbd_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, intermediate_height, bd); vpx_highbd_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } libvpx-1.8.2/vpx_dsp/arm/idct16x16_1_add_neon.c000066400000000000000000000061361357355204000211460ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void idct16x16_1_add_pos_kernel(uint8_t **dest, const int stride, const uint8x16_t res) { const uint8x16_t a = vld1q_u8(*dest); const uint8x16_t b = vqaddq_u8(a, res); vst1q_u8(*dest, b); *dest += stride; } static INLINE void idct16x16_1_add_neg_kernel(uint8_t **dest, const int stride, const uint8x16_t res) { const uint8x16_t a = vld1q_u8(*dest); const uint8x16_t b = vqsubq_u8(a, res); vst1q_u8(*dest, b); *dest += stride; } void vpx_idct16x16_1_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16_t out0 = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); if (a1 >= 0) { const uint8x16_t dc = create_dcq(a1); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); idct16x16_1_add_pos_kernel(&dest, stride, dc); } else { const uint8x16_t dc = create_dcq(-a1); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); idct16x16_1_add_neg_kernel(&dest, stride, dc); } } libvpx-1.8.2/vpx_dsp/arm/idct16x16_add_neon.c000066400000000000000000000626151357355204000207320ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void wrap_low_4x2(const int32x4_t *const t32, int16x4_t *const d0, int16x4_t *const d1) { *d0 = vrshrn_n_s32(t32[0], DCT_CONST_BITS); *d1 = vrshrn_n_s32(t32[1], DCT_CONST_BITS); } static INLINE void idct_cospi_8_24_d_kernel(const int16x4_t s0, const int16x4_t s1, const int16x4_t cospi_0_8_16_24, int32x4_t *const t32) { t32[0] = vmull_lane_s16(s0, cospi_0_8_16_24, 3); t32[1] = vmull_lane_s16(s1, cospi_0_8_16_24, 3); t32[0] = vmlsl_lane_s16(t32[0], s1, cospi_0_8_16_24, 1); t32[1] = vmlal_lane_s16(t32[1], s0, cospi_0_8_16_24, 1); } static INLINE void idct_cospi_8_24_d(const int16x4_t s0, const int16x4_t s1, const int16x4_t cospi_0_8_16_24, int16x4_t *const d0, int16x4_t *const d1) { int32x4_t t32[2]; idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t32); wrap_low_4x2(t32, d0, d1); } static INLINE void idct_cospi_8_24_neg_d(const int16x4_t s0, const int16x4_t s1, const int16x4_t cospi_0_8_16_24, int16x4_t *const d0, int16x4_t *const d1) { int32x4_t t32[2]; idct_cospi_8_24_d_kernel(s0, s1, cospi_0_8_16_24, t32); t32[1] = vnegq_s32(t32[1]); wrap_low_4x2(t32, d0, d1); } static INLINE void idct_cospi_16_16_d(const int16x4_t s0, const int16x4_t s1, const int16x4_t cospi_0_8_16_24, int16x4_t *const d0, int16x4_t *const d1) { int32x4_t t32[3]; t32[2] = vmull_lane_s16(s1, cospi_0_8_16_24, 2); t32[0] = vmlsl_lane_s16(t32[2], s0, cospi_0_8_16_24, 2); t32[1] = vmlal_lane_s16(t32[2], s0, cospi_0_8_16_24, 2); wrap_low_4x2(t32, d0, d1); } void vpx_idct16x16_256_add_half1d(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag) { const int16x8_t cospis0 = vld1q_s16(kCospi); const int16x8_t cospis1 = vld1q_s16(kCospi + 8); const int16x4_t cospi_0_8_16_24 = vget_low_s16(cospis0); const int16x4_t cospi_4_12_20N_28 = vget_high_s16(cospis0); const int16x4_t cospi_2_30_10_22 = vget_low_s16(cospis1); const int16x4_t cospi_6_26N_14_18N = vget_high_s16(cospis1); int16x8_t in[16], step1[16], step2[16], out[16]; // Load input (16x8) if (output) { const tran_low_t *inputT = (const tran_low_t *)input; in[0] = load_tran_low_to_s16q(inputT); inputT += 8; in[8] = load_tran_low_to_s16q(inputT); inputT += 8; in[1] = load_tran_low_to_s16q(inputT); inputT += 8; in[9] = load_tran_low_to_s16q(inputT); inputT += 8; in[2] = load_tran_low_to_s16q(inputT); inputT += 8; in[10] = load_tran_low_to_s16q(inputT); inputT += 8; in[3] = load_tran_low_to_s16q(inputT); inputT += 8; in[11] = load_tran_low_to_s16q(inputT); inputT += 8; in[4] = load_tran_low_to_s16q(inputT); inputT += 8; in[12] = load_tran_low_to_s16q(inputT); inputT += 8; in[5] = load_tran_low_to_s16q(inputT); inputT += 8; in[13] = load_tran_low_to_s16q(inputT); inputT += 8; in[6] = load_tran_low_to_s16q(inputT); inputT += 8; in[14] = load_tran_low_to_s16q(inputT); inputT += 8; in[7] = load_tran_low_to_s16q(inputT); inputT += 8; in[15] = load_tran_low_to_s16q(inputT); } else { const int16_t *inputT = (const int16_t *)input; in[0] = vld1q_s16(inputT); inputT += 8; in[8] = vld1q_s16(inputT); inputT += 8; in[1] = vld1q_s16(inputT); inputT += 8; in[9] = vld1q_s16(inputT); inputT += 8; in[2] = vld1q_s16(inputT); inputT += 8; in[10] = vld1q_s16(inputT); inputT += 8; in[3] = vld1q_s16(inputT); inputT += 8; in[11] = vld1q_s16(inputT); inputT += 8; in[4] = vld1q_s16(inputT); inputT += 8; in[12] = vld1q_s16(inputT); inputT += 8; in[5] = vld1q_s16(inputT); inputT += 8; in[13] = vld1q_s16(inputT); inputT += 8; in[6] = vld1q_s16(inputT); inputT += 8; in[14] = vld1q_s16(inputT); inputT += 8; in[7] = vld1q_s16(inputT); inputT += 8; in[15] = vld1q_s16(inputT); } // Transpose transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s16_8x8(&in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); // stage 1 step1[0] = in[0 / 2]; step1[1] = in[16 / 2]; step1[2] = in[8 / 2]; step1[3] = in[24 / 2]; step1[4] = in[4 / 2]; step1[5] = in[20 / 2]; step1[6] = in[12 / 2]; step1[7] = in[28 / 2]; step1[8] = in[2 / 2]; step1[9] = in[18 / 2]; step1[10] = in[10 / 2]; step1[11] = in[26 / 2]; step1[12] = in[6 / 2]; step1[13] = in[22 / 2]; step1[14] = in[14 / 2]; step1[15] = in[30 / 2]; // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; idct_cospi_2_30(step1[8], step1[15], cospi_2_30_10_22, &step2[8], &step2[15]); idct_cospi_14_18(step1[9], step1[14], cospi_6_26N_14_18N, &step2[9], &step2[14]); idct_cospi_10_22(step1[10], step1[13], cospi_2_30_10_22, &step2[10], &step2[13]); idct_cospi_6_26(step1[11], step1[12], cospi_6_26N_14_18N, &step2[11], &step2[12]); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; idct_cospi_4_28(step2[4], step2[7], cospi_4_12_20N_28, &step1[4], &step1[7]); idct_cospi_12_20(step2[5], step2[6], cospi_4_12_20N_28, &step1[5], &step1[6]); step1[8] = vaddq_s16(step2[8], step2[9]); step1[9] = vsubq_s16(step2[8], step2[9]); step1[10] = vsubq_s16(step2[11], step2[10]); step1[11] = vaddq_s16(step2[11], step2[10]); step1[12] = vaddq_s16(step2[12], step2[13]); step1[13] = vsubq_s16(step2[12], step2[13]); step1[14] = vsubq_s16(step2[15], step2[14]); step1[15] = vaddq_s16(step2[15], step2[14]); // stage 4 idct_cospi_16_16_q(step1[1], step1[0], cospi_0_8_16_24, &step2[1], &step2[0]); idct_cospi_8_24_q(step1[2], step1[3], cospi_0_8_16_24, &step2[2], &step2[3]); step2[4] = vaddq_s16(step1[4], step1[5]); step2[5] = vsubq_s16(step1[4], step1[5]); step2[6] = vsubq_s16(step1[7], step1[6]); step2[7] = vaddq_s16(step1[7], step1[6]); step2[8] = step1[8]; idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = vaddq_s16(step2[0], step2[3]); step1[1] = vaddq_s16(step2[1], step2[2]); step1[2] = vsubq_s16(step2[1], step2[2]); step1[3] = vsubq_s16(step2[0], step2[3]); step1[4] = step2[4]; idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = vaddq_s16(step2[8], step2[11]); step1[9] = vaddq_s16(step2[9], step2[10]); step1[10] = vsubq_s16(step2[9], step2[10]); step1[11] = vsubq_s16(step2[8], step2[11]); step1[12] = vsubq_s16(step2[15], step2[12]); step1[13] = vsubq_s16(step2[14], step2[13]); step1[14] = vaddq_s16(step2[14], step2[13]); step1[15] = vaddq_s16(step2[15], step2[12]); // stage 6 step2[0] = vaddq_s16(step1[0], step1[7]); step2[1] = vaddq_s16(step1[1], step1[6]); step2[2] = vaddq_s16(step1[2], step1[5]); step2[3] = vaddq_s16(step1[3], step1[4]); step2[4] = vsubq_s16(step1[3], step1[4]); step2[5] = vsubq_s16(step1[2], step1[5]); step2[6] = vsubq_s16(step1[1], step1[6]); step2[7] = vsubq_s16(step1[0], step1[7]); idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 idct16x16_add_stage7(step2, out); if (output) { idct16x16_store_pass1(out, output); } else { if (highbd_flag) { idct16x16_add_store_bd8(out, dest, stride); } else { idct16x16_add_store(out, dest, stride); } } } void vpx_idct16x16_38_add_half1d(const void *const input, int16_t *const output, void *const dest, const int stride, const int highbd_flag) { const int16x8_t cospis0 = vld1q_s16(kCospi); const int16x8_t cospis1 = vld1q_s16(kCospi + 8); const int16x8_t cospisd0 = vaddq_s16(cospis0, cospis0); const int16x8_t cospisd1 = vaddq_s16(cospis1, cospis1); const int16x4_t cospi_0_8_16_24 = vget_low_s16(cospis0); const int16x4_t cospid_0_8_16_24 = vget_low_s16(cospisd0); const int16x4_t cospid_4_12_20N_28 = vget_high_s16(cospisd0); const int16x4_t cospid_2_30_10_22 = vget_low_s16(cospisd1); const int16x4_t cospid_6_26_14_18N = vget_high_s16(cospisd1); int16x8_t in[8], step1[16], step2[16], out[16]; // Load input (8x8) if (output) { const tran_low_t *inputT = (const tran_low_t *)input; in[0] = load_tran_low_to_s16q(inputT); inputT += 16; in[1] = load_tran_low_to_s16q(inputT); inputT += 16; in[2] = load_tran_low_to_s16q(inputT); inputT += 16; in[3] = load_tran_low_to_s16q(inputT); inputT += 16; in[4] = load_tran_low_to_s16q(inputT); inputT += 16; in[5] = load_tran_low_to_s16q(inputT); inputT += 16; in[6] = load_tran_low_to_s16q(inputT); inputT += 16; in[7] = load_tran_low_to_s16q(inputT); } else { const int16_t *inputT = (const int16_t *)input; in[0] = vld1q_s16(inputT); inputT += 16; in[1] = vld1q_s16(inputT); inputT += 16; in[2] = vld1q_s16(inputT); inputT += 16; in[3] = vld1q_s16(inputT); inputT += 16; in[4] = vld1q_s16(inputT); inputT += 16; in[5] = vld1q_s16(inputT); inputT += 16; in[6] = vld1q_s16(inputT); inputT += 16; in[7] = vld1q_s16(inputT); } // Transpose transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 step1[0] = in[0 / 2]; step1[2] = in[8 / 2]; step1[4] = in[4 / 2]; step1[6] = in[12 / 2]; step1[8] = in[2 / 2]; step1[10] = in[10 / 2]; step1[12] = in[6 / 2]; step1[14] = in[14 / 2]; // 0 in pass 1 // stage 2 step2[0] = step1[0]; step2[2] = step1[2]; step2[4] = step1[4]; step2[6] = step1[6]; step2[8] = vqrdmulhq_lane_s16(step1[8], cospid_2_30_10_22, 1); step2[9] = vqrdmulhq_lane_s16(step1[14], cospid_6_26_14_18N, 3); step2[10] = vqrdmulhq_lane_s16(step1[10], cospid_2_30_10_22, 3); step2[11] = vqrdmulhq_lane_s16(step1[12], cospid_6_26_14_18N, 1); step2[12] = vqrdmulhq_lane_s16(step1[12], cospid_6_26_14_18N, 0); step2[13] = vqrdmulhq_lane_s16(step1[10], cospid_2_30_10_22, 2); step2[14] = vqrdmulhq_lane_s16(step1[14], cospid_6_26_14_18N, 2); step2[15] = vqrdmulhq_lane_s16(step1[8], cospid_2_30_10_22, 0); // stage 3 step1[0] = step2[0]; step1[2] = step2[2]; step1[4] = vqrdmulhq_lane_s16(step2[4], cospid_4_12_20N_28, 3); step1[5] = vqrdmulhq_lane_s16(step2[6], cospid_4_12_20N_28, 2); step1[6] = vqrdmulhq_lane_s16(step2[6], cospid_4_12_20N_28, 1); step1[7] = vqrdmulhq_lane_s16(step2[4], cospid_4_12_20N_28, 0); step1[8] = vaddq_s16(step2[8], step2[9]); step1[9] = vsubq_s16(step2[8], step2[9]); step1[10] = vsubq_s16(step2[11], step2[10]); step1[11] = vaddq_s16(step2[11], step2[10]); step1[12] = vaddq_s16(step2[12], step2[13]); step1[13] = vsubq_s16(step2[12], step2[13]); step1[14] = vsubq_s16(step2[15], step2[14]); step1[15] = vaddq_s16(step2[15], step2[14]); // stage 4 step2[0] = step2[1] = vqrdmulhq_lane_s16(step1[0], cospid_0_8_16_24, 2); step2[2] = vqrdmulhq_lane_s16(step1[2], cospid_0_8_16_24, 3); step2[3] = vqrdmulhq_lane_s16(step1[2], cospid_0_8_16_24, 1); step2[4] = vaddq_s16(step1[4], step1[5]); step2[5] = vsubq_s16(step1[4], step1[5]); step2[6] = vsubq_s16(step1[7], step1[6]); step2[7] = vaddq_s16(step1[7], step1[6]); step2[8] = step1[8]; idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = vaddq_s16(step2[0], step2[3]); step1[1] = vaddq_s16(step2[1], step2[2]); step1[2] = vsubq_s16(step2[1], step2[2]); step1[3] = vsubq_s16(step2[0], step2[3]); step1[4] = step2[4]; idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = vaddq_s16(step2[8], step2[11]); step1[9] = vaddq_s16(step2[9], step2[10]); step1[10] = vsubq_s16(step2[9], step2[10]); step1[11] = vsubq_s16(step2[8], step2[11]); step1[12] = vsubq_s16(step2[15], step2[12]); step1[13] = vsubq_s16(step2[14], step2[13]); step1[14] = vaddq_s16(step2[14], step2[13]); step1[15] = vaddq_s16(step2[15], step2[12]); // stage 6 step2[0] = vaddq_s16(step1[0], step1[7]); step2[1] = vaddq_s16(step1[1], step1[6]); step2[2] = vaddq_s16(step1[2], step1[5]); step2[3] = vaddq_s16(step1[3], step1[4]); step2[4] = vsubq_s16(step1[3], step1[4]); step2[5] = vsubq_s16(step1[2], step1[5]); step2[6] = vsubq_s16(step1[1], step1[6]); step2[7] = vsubq_s16(step1[0], step1[7]); idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 idct16x16_add_stage7(step2, out); if (output) { idct16x16_store_pass1(out, output); } else { if (highbd_flag) { idct16x16_add_store_bd8(out, dest, stride); } else { idct16x16_add_store(out, dest, stride); } } } void vpx_idct16x16_10_add_half1d_pass1(const tran_low_t *input, int16_t *output) { const int16x8_t cospis0 = vld1q_s16(kCospi); const int16x8_t cospis1 = vld1q_s16(kCospi + 8); const int16x8_t cospisd0 = vaddq_s16(cospis0, cospis0); const int16x8_t cospisd1 = vaddq_s16(cospis1, cospis1); const int16x4_t cospi_0_8_16_24 = vget_low_s16(cospis0); const int16x4_t cospid_0_8_16_24 = vget_low_s16(cospisd0); const int16x4_t cospid_4_12_20N_28 = vget_high_s16(cospisd0); const int16x4_t cospid_2_30_10_22 = vget_low_s16(cospisd1); const int16x4_t cospid_6_26_14_18N = vget_high_s16(cospisd1); int16x4_t in[4], step1[16], step2[16], out[16]; // Load input (4x4) in[0] = load_tran_low_to_s16d(input); input += 16; in[1] = load_tran_low_to_s16d(input); input += 16; in[2] = load_tran_low_to_s16d(input); input += 16; in[3] = load_tran_low_to_s16d(input); // Transpose transpose_s16_4x4d(&in[0], &in[1], &in[2], &in[3]); // stage 1 step1[0] = in[0 / 2]; step1[4] = in[4 / 2]; step1[8] = in[2 / 2]; step1[12] = in[6 / 2]; // stage 2 step2[0] = step1[0]; step2[4] = step1[4]; step2[8] = vqrdmulh_lane_s16(step1[8], cospid_2_30_10_22, 1); step2[11] = vqrdmulh_lane_s16(step1[12], cospid_6_26_14_18N, 1); step2[12] = vqrdmulh_lane_s16(step1[12], cospid_6_26_14_18N, 0); step2[15] = vqrdmulh_lane_s16(step1[8], cospid_2_30_10_22, 0); // stage 3 step1[0] = step2[0]; step1[4] = vqrdmulh_lane_s16(step2[4], cospid_4_12_20N_28, 3); step1[7] = vqrdmulh_lane_s16(step2[4], cospid_4_12_20N_28, 0); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 step2[0] = step2[1] = vqrdmulh_lane_s16(step1[0], cospid_0_8_16_24, 2); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; step2[8] = step1[8]; idct_cospi_8_24_d(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); idct_cospi_8_24_neg_d(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; idct_cospi_16_16_d(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = vadd_s16(step2[8], step2[11]); step1[9] = vadd_s16(step2[9], step2[10]); step1[10] = vsub_s16(step2[9], step2[10]); step1[11] = vsub_s16(step2[8], step2[11]); step1[12] = vsub_s16(step2[15], step2[12]); step1[13] = vsub_s16(step2[14], step2[13]); step1[14] = vadd_s16(step2[14], step2[13]); step1[15] = vadd_s16(step2[15], step2[12]); // stage 6 step2[0] = vadd_s16(step1[0], step1[7]); step2[1] = vadd_s16(step1[1], step1[6]); step2[2] = vadd_s16(step1[2], step1[5]); step2[3] = vadd_s16(step1[3], step1[4]); step2[4] = vsub_s16(step1[3], step1[4]); step2[5] = vsub_s16(step1[2], step1[5]); step2[6] = vsub_s16(step1[1], step1[6]); step2[7] = vsub_s16(step1[0], step1[7]); idct_cospi_16_16_d(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); idct_cospi_16_16_d(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 out[0] = vadd_s16(step2[0], step2[15]); out[1] = vadd_s16(step2[1], step2[14]); out[2] = vadd_s16(step2[2], step2[13]); out[3] = vadd_s16(step2[3], step2[12]); out[4] = vadd_s16(step2[4], step2[11]); out[5] = vadd_s16(step2[5], step2[10]); out[6] = vadd_s16(step2[6], step2[9]); out[7] = vadd_s16(step2[7], step2[8]); out[8] = vsub_s16(step2[7], step2[8]); out[9] = vsub_s16(step2[6], step2[9]); out[10] = vsub_s16(step2[5], step2[10]); out[11] = vsub_s16(step2[4], step2[11]); out[12] = vsub_s16(step2[3], step2[12]); out[13] = vsub_s16(step2[2], step2[13]); out[14] = vsub_s16(step2[1], step2[14]); out[15] = vsub_s16(step2[0], step2[15]); // pass 1: save the result into output vst1_s16(output, out[0]); output += 4; vst1_s16(output, out[1]); output += 4; vst1_s16(output, out[2]); output += 4; vst1_s16(output, out[3]); output += 4; vst1_s16(output, out[4]); output += 4; vst1_s16(output, out[5]); output += 4; vst1_s16(output, out[6]); output += 4; vst1_s16(output, out[7]); output += 4; vst1_s16(output, out[8]); output += 4; vst1_s16(output, out[9]); output += 4; vst1_s16(output, out[10]); output += 4; vst1_s16(output, out[11]); output += 4; vst1_s16(output, out[12]); output += 4; vst1_s16(output, out[13]); output += 4; vst1_s16(output, out[14]); output += 4; vst1_s16(output, out[15]); } void vpx_idct16x16_10_add_half1d_pass2(const int16_t *input, int16_t *const output, void *const dest, const int stride, const int highbd_flag) { const int16x8_t cospis0 = vld1q_s16(kCospi); const int16x8_t cospis1 = vld1q_s16(kCospi + 8); const int16x8_t cospisd0 = vaddq_s16(cospis0, cospis0); const int16x8_t cospisd1 = vaddq_s16(cospis1, cospis1); const int16x4_t cospi_0_8_16_24 = vget_low_s16(cospis0); const int16x4_t cospid_0_8_16_24 = vget_low_s16(cospisd0); const int16x4_t cospid_4_12_20N_28 = vget_high_s16(cospisd0); const int16x4_t cospid_2_30_10_22 = vget_low_s16(cospisd1); const int16x4_t cospid_6_26_14_18N = vget_high_s16(cospisd1); int16x4_t ind[8]; int16x8_t in[4], step1[16], step2[16], out[16]; // Load input (4x8) ind[0] = vld1_s16(input); input += 4; ind[1] = vld1_s16(input); input += 4; ind[2] = vld1_s16(input); input += 4; ind[3] = vld1_s16(input); input += 4; ind[4] = vld1_s16(input); input += 4; ind[5] = vld1_s16(input); input += 4; ind[6] = vld1_s16(input); input += 4; ind[7] = vld1_s16(input); // Transpose transpose_s16_4x8(ind[0], ind[1], ind[2], ind[3], ind[4], ind[5], ind[6], ind[7], &in[0], &in[1], &in[2], &in[3]); // stage 1 step1[0] = in[0 / 2]; step1[4] = in[4 / 2]; step1[8] = in[2 / 2]; step1[12] = in[6 / 2]; // stage 2 step2[0] = step1[0]; step2[4] = step1[4]; step2[8] = vqrdmulhq_lane_s16(step1[8], cospid_2_30_10_22, 1); step2[11] = vqrdmulhq_lane_s16(step1[12], cospid_6_26_14_18N, 1); step2[12] = vqrdmulhq_lane_s16(step1[12], cospid_6_26_14_18N, 0); step2[15] = vqrdmulhq_lane_s16(step1[8], cospid_2_30_10_22, 0); // stage 3 step1[0] = step2[0]; step1[4] = vqrdmulhq_lane_s16(step2[4], cospid_4_12_20N_28, 3); step1[7] = vqrdmulhq_lane_s16(step2[4], cospid_4_12_20N_28, 0); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 step2[0] = step2[1] = vqrdmulhq_lane_s16(step1[0], cospid_0_8_16_24, 2); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; step2[8] = step1[8]; idct_cospi_8_24_q(step1[14], step1[9], cospi_0_8_16_24, &step2[9], &step2[14]); idct_cospi_8_24_neg_q(step1[13], step1[10], cospi_0_8_16_24, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; idct_cospi_16_16_q(step2[5], step2[6], cospi_0_8_16_24, &step1[5], &step1[6]); step1[7] = step2[7]; step1[8] = vaddq_s16(step2[8], step2[11]); step1[9] = vaddq_s16(step2[9], step2[10]); step1[10] = vsubq_s16(step2[9], step2[10]); step1[11] = vsubq_s16(step2[8], step2[11]); step1[12] = vsubq_s16(step2[15], step2[12]); step1[13] = vsubq_s16(step2[14], step2[13]); step1[14] = vaddq_s16(step2[14], step2[13]); step1[15] = vaddq_s16(step2[15], step2[12]); // stage 6 step2[0] = vaddq_s16(step1[0], step1[7]); step2[1] = vaddq_s16(step1[1], step1[6]); step2[2] = vaddq_s16(step1[2], step1[5]); step2[3] = vaddq_s16(step1[3], step1[4]); step2[4] = vsubq_s16(step1[3], step1[4]); step2[5] = vsubq_s16(step1[2], step1[5]); step2[6] = vsubq_s16(step1[1], step1[6]); step2[7] = vsubq_s16(step1[0], step1[7]); idct_cospi_16_16_q(step1[10], step1[13], cospi_0_8_16_24, &step2[10], &step2[13]); idct_cospi_16_16_q(step1[11], step1[12], cospi_0_8_16_24, &step2[11], &step2[12]); step2[8] = step1[8]; step2[9] = step1[9]; step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 idct16x16_add_stage7(step2, out); if (output) { idct16x16_store_pass1(out, output); } else { if (highbd_flag) { idct16x16_add_store_bd8(out, dest, stride); } else { idct16x16_add_store(out, dest, stride); } } } void vpx_idct16x16_256_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int16_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_256_add_half1d(input, row_idct_output, dest, stride, 0); // Parallel idct on the lower 8 rows vpx_idct16x16_256_add_half1d(input + 8 * 16, row_idct_output + 8, dest, stride, 0); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_256_add_half1d(row_idct_output, NULL, dest, stride, 0); // Parallel idct to get the right 8 columns vpx_idct16x16_256_add_half1d(row_idct_output + 16 * 8, NULL, dest + 8, stride, 0); } void vpx_idct16x16_38_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int16_t row_idct_output[16 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_38_add_half1d(input, row_idct_output, dest, stride, 0); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_38_add_half1d(row_idct_output, NULL, dest, stride, 0); // Parallel idct to get the right 8 columns vpx_idct16x16_38_add_half1d(row_idct_output + 16 * 8, NULL, dest + 8, stride, 0); } void vpx_idct16x16_10_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int16_t row_idct_output[4 * 16]; // pass 1 // Parallel idct on the upper 8 rows vpx_idct16x16_10_add_half1d_pass1(input, row_idct_output); // pass 2 // Parallel idct to get the left 8 columns vpx_idct16x16_10_add_half1d_pass2(row_idct_output, NULL, dest, stride, 0); // Parallel idct to get the right 8 columns vpx_idct16x16_10_add_half1d_pass2(row_idct_output + 4 * 8, NULL, dest + 8, stride, 0); } libvpx-1.8.2/vpx_dsp/arm/idct32x32_135_add_neon.c000066400000000000000000000642321357355204000213130ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void load_8x8_s16(const tran_low_t *input, int16x8_t *const in0, int16x8_t *const in1, int16x8_t *const in2, int16x8_t *const in3, int16x8_t *const in4, int16x8_t *const in5, int16x8_t *const in6, int16x8_t *const in7) { *in0 = load_tran_low_to_s16q(input); input += 32; *in1 = load_tran_low_to_s16q(input); input += 32; *in2 = load_tran_low_to_s16q(input); input += 32; *in3 = load_tran_low_to_s16q(input); input += 32; *in4 = load_tran_low_to_s16q(input); input += 32; *in5 = load_tran_low_to_s16q(input); input += 32; *in6 = load_tran_low_to_s16q(input); input += 32; *in7 = load_tran_low_to_s16q(input); } static INLINE void load_4x8_s16(const tran_low_t *input, int16x4_t *const in0, int16x4_t *const in1, int16x4_t *const in2, int16x4_t *const in3, int16x4_t *const in4, int16x4_t *const in5, int16x4_t *const in6, int16x4_t *const in7) { *in0 = load_tran_low_to_s16d(input); input += 32; *in1 = load_tran_low_to_s16d(input); input += 32; *in2 = load_tran_low_to_s16d(input); input += 32; *in3 = load_tran_low_to_s16d(input); input += 32; *in4 = load_tran_low_to_s16d(input); input += 32; *in5 = load_tran_low_to_s16d(input); input += 32; *in6 = load_tran_low_to_s16d(input); input += 32; *in7 = load_tran_low_to_s16d(input); } // Only for the first pass of the _135_ variant. Since it only uses values from // the top left 16x16 it can safely assume all the remaining values are 0 and // skip an awful lot of calculations. In fact, only the first 12 columns make // the cut. None of the elements in the 13th, 14th, 15th or 16th columns are // used so it skips any calls to input[12|13|14|15] too. // In C this does a single row of 32 for each call. Here it transposes the top // left 12x8 to allow using SIMD. // vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 135 non-zero // coefficients as follows: // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 // 0 0 2 5 10 17 25 38 47 62 83 101 121 // 1 1 4 8 15 22 30 45 58 74 92 112 133 // 2 3 7 12 18 28 36 52 64 82 102 118 // 3 6 11 16 23 31 43 60 73 90 109 126 // 4 9 14 19 29 37 50 65 78 98 116 134 // 5 13 20 26 35 44 54 72 85 105 123 // 6 21 27 33 42 53 63 80 94 113 132 // 7 24 32 39 48 57 71 88 104 120 // 8 34 40 46 56 68 81 96 111 130 // 9 41 49 55 67 77 91 107 124 // 10 51 59 66 76 89 99 119 131 // 11 61 69 75 87 100 114 129 // 12 70 79 86 97 108 122 // 13 84 93 103 110 125 // 14 98 106 115 127 // 15 117 128 void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output) { int16x4_t tmp[8]; int16x8_t in[12], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32]; load_8x8_s16(input, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); load_4x8_s16(input + 8, &tmp[0], &tmp[1], &tmp[2], &tmp[3], &tmp[4], &tmp[5], &tmp[6], &tmp[7]); transpose_s16_4x8(tmp[0], tmp[1], tmp[2], tmp[3], tmp[4], tmp[5], tmp[6], tmp[7], &in[8], &in[9], &in[10], &in[11]); // stage 1 s1[16] = multiply_shift_and_narrow_s16(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s16(in[1], cospi_1_64); s1[18] = multiply_shift_and_narrow_s16(in[9], cospi_23_64); s1[29] = multiply_shift_and_narrow_s16(in[9], cospi_9_64); s1[19] = multiply_shift_and_narrow_s16(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s16(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s16(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s16(in[5], cospi_5_64); s1[21] = multiply_shift_and_narrow_s16(in[11], -cospi_21_64); s1[26] = multiply_shift_and_narrow_s16(in[11], cospi_11_64); s1[23] = multiply_shift_and_narrow_s16(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s16(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s16(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s16(in[2], cospi_2_64); s2[10] = multiply_shift_and_narrow_s16(in[10], cospi_22_64); s2[13] = multiply_shift_and_narrow_s16(in[10], cospi_10_64); s2[11] = multiply_shift_and_narrow_s16(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s16(in[6], cospi_6_64); s2[18] = vsubq_s16(s1[19], s1[18]); s2[19] = vaddq_s16(s1[18], s1[19]); s2[20] = vaddq_s16(s1[20], s1[21]); s2[21] = vsubq_s16(s1[20], s1[21]); s2[26] = vsubq_s16(s1[27], s1[26]); s2[27] = vaddq_s16(s1[26], s1[27]); s2[28] = vaddq_s16(s1[28], s1[29]); s2[29] = vsubq_s16(s1[28], s1[29]); // stage 3 s3[4] = multiply_shift_and_narrow_s16(in[4], cospi_28_64); s3[7] = multiply_shift_and_narrow_s16(in[4], cospi_4_64); s3[10] = vsubq_s16(s2[11], s2[10]); s3[11] = vaddq_s16(s2[10], s2[11]); s3[12] = vaddq_s16(s2[12], s2[13]); s3[13] = vsubq_s16(s2[12], s2[13]); s3[17] = multiply_accumulate_shift_and_narrow_s16(s1[16], -cospi_4_64, s1[31], cospi_28_64); s3[30] = multiply_accumulate_shift_and_narrow_s16(s1[16], cospi_28_64, s1[31], cospi_4_64); s3[18] = multiply_accumulate_shift_and_narrow_s16(s2[18], -cospi_28_64, s2[29], -cospi_4_64); s3[29] = multiply_accumulate_shift_and_narrow_s16(s2[18], -cospi_4_64, s2[29], cospi_28_64); s3[21] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_20_64, s2[26], cospi_12_64); s3[26] = multiply_accumulate_shift_and_narrow_s16(s2[21], cospi_12_64, s2[26], cospi_20_64); s3[22] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s3[25] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s4[0] = multiply_shift_and_narrow_s16(in[0], cospi_16_64); s4[2] = multiply_shift_and_narrow_s16(in[8], cospi_24_64); s4[3] = multiply_shift_and_narrow_s16(in[8], cospi_8_64); s4[9] = multiply_accumulate_shift_and_narrow_s16(s2[8], -cospi_8_64, s2[15], cospi_24_64); s4[14] = multiply_accumulate_shift_and_narrow_s16(s2[8], cospi_24_64, s2[15], cospi_8_64); s4[10] = multiply_accumulate_shift_and_narrow_s16(s3[10], -cospi_24_64, s3[13], -cospi_8_64); s4[13] = multiply_accumulate_shift_and_narrow_s16(s3[10], -cospi_8_64, s3[13], cospi_24_64); s4[16] = vaddq_s16(s1[16], s2[19]); s4[17] = vaddq_s16(s3[17], s3[18]); s4[18] = vsubq_s16(s3[17], s3[18]); s4[19] = vsubq_s16(s1[16], s2[19]); s4[20] = vsubq_s16(s1[23], s2[20]); s4[21] = vsubq_s16(s3[22], s3[21]); s4[22] = vaddq_s16(s3[21], s3[22]); s4[23] = vaddq_s16(s2[20], s1[23]); s4[24] = vaddq_s16(s1[24], s2[27]); s4[25] = vaddq_s16(s3[25], s3[26]); s4[26] = vsubq_s16(s3[25], s3[26]); s4[27] = vsubq_s16(s1[24], s2[27]); s4[28] = vsubq_s16(s1[31], s2[28]); s4[29] = vsubq_s16(s3[30], s3[29]); s4[30] = vaddq_s16(s3[29], s3[30]); s4[31] = vaddq_s16(s2[28], s1[31]); // stage 5 s5[0] = vaddq_s16(s4[0], s4[3]); s5[1] = vaddq_s16(s4[0], s4[2]); s5[2] = vsubq_s16(s4[0], s4[2]); s5[3] = vsubq_s16(s4[0], s4[3]); s5[5] = sub_multiply_shift_and_narrow_s16(s3[7], s3[4], cospi_16_64); s5[6] = add_multiply_shift_and_narrow_s16(s3[4], s3[7], cospi_16_64); s5[8] = vaddq_s16(s2[8], s3[11]); s5[9] = vaddq_s16(s4[9], s4[10]); s5[10] = vsubq_s16(s4[9], s4[10]); s5[11] = vsubq_s16(s2[8], s3[11]); s5[12] = vsubq_s16(s2[15], s3[12]); s5[13] = vsubq_s16(s4[14], s4[13]); s5[14] = vaddq_s16(s4[13], s4[14]); s5[15] = vaddq_s16(s2[15], s3[12]); s5[18] = multiply_accumulate_shift_and_narrow_s16(s4[18], -cospi_8_64, s4[29], cospi_24_64); s5[29] = multiply_accumulate_shift_and_narrow_s16(s4[18], cospi_24_64, s4[29], cospi_8_64); s5[19] = multiply_accumulate_shift_and_narrow_s16(s4[19], -cospi_8_64, s4[28], cospi_24_64); s5[28] = multiply_accumulate_shift_and_narrow_s16(s4[19], cospi_24_64, s4[28], cospi_8_64); s5[20] = multiply_accumulate_shift_and_narrow_s16(s4[20], -cospi_24_64, s4[27], -cospi_8_64); s5[27] = multiply_accumulate_shift_and_narrow_s16(s4[20], -cospi_8_64, s4[27], cospi_24_64); s5[21] = multiply_accumulate_shift_and_narrow_s16(s4[21], -cospi_24_64, s4[26], -cospi_8_64); s5[26] = multiply_accumulate_shift_and_narrow_s16(s4[21], -cospi_8_64, s4[26], cospi_24_64); // stage 6 s6[0] = vaddq_s16(s5[0], s3[7]); s6[1] = vaddq_s16(s5[1], s5[6]); s6[2] = vaddq_s16(s5[2], s5[5]); s6[3] = vaddq_s16(s5[3], s3[4]); s6[4] = vsubq_s16(s5[3], s3[4]); s6[5] = vsubq_s16(s5[2], s5[5]); s6[6] = vsubq_s16(s5[1], s5[6]); s6[7] = vsubq_s16(s5[0], s3[7]); s6[10] = sub_multiply_shift_and_narrow_s16(s5[13], s5[10], cospi_16_64); s6[13] = add_multiply_shift_and_narrow_s16(s5[10], s5[13], cospi_16_64); s6[11] = sub_multiply_shift_and_narrow_s16(s5[12], s5[11], cospi_16_64); s6[12] = add_multiply_shift_and_narrow_s16(s5[11], s5[12], cospi_16_64); s6[16] = vaddq_s16(s4[16], s4[23]); s6[17] = vaddq_s16(s4[17], s4[22]); s6[18] = vaddq_s16(s5[18], s5[21]); s6[19] = vaddq_s16(s5[19], s5[20]); s6[20] = vsubq_s16(s5[19], s5[20]); s6[21] = vsubq_s16(s5[18], s5[21]); s6[22] = vsubq_s16(s4[17], s4[22]); s6[23] = vsubq_s16(s4[16], s4[23]); s6[24] = vsubq_s16(s4[31], s4[24]); s6[25] = vsubq_s16(s4[30], s4[25]); s6[26] = vsubq_s16(s5[29], s5[26]); s6[27] = vsubq_s16(s5[28], s5[27]); s6[28] = vaddq_s16(s5[27], s5[28]); s6[29] = vaddq_s16(s5[26], s5[29]); s6[30] = vaddq_s16(s4[25], s4[30]); s6[31] = vaddq_s16(s4[24], s4[31]); // stage 7 s7[0] = vaddq_s16(s6[0], s5[15]); s7[1] = vaddq_s16(s6[1], s5[14]); s7[2] = vaddq_s16(s6[2], s6[13]); s7[3] = vaddq_s16(s6[3], s6[12]); s7[4] = vaddq_s16(s6[4], s6[11]); s7[5] = vaddq_s16(s6[5], s6[10]); s7[6] = vaddq_s16(s6[6], s5[9]); s7[7] = vaddq_s16(s6[7], s5[8]); s7[8] = vsubq_s16(s6[7], s5[8]); s7[9] = vsubq_s16(s6[6], s5[9]); s7[10] = vsubq_s16(s6[5], s6[10]); s7[11] = vsubq_s16(s6[4], s6[11]); s7[12] = vsubq_s16(s6[3], s6[12]); s7[13] = vsubq_s16(s6[2], s6[13]); s7[14] = vsubq_s16(s6[1], s5[14]); s7[15] = vsubq_s16(s6[0], s5[15]); s7[20] = sub_multiply_shift_and_narrow_s16(s6[27], s6[20], cospi_16_64); s7[27] = add_multiply_shift_and_narrow_s16(s6[20], s6[27], cospi_16_64); s7[21] = sub_multiply_shift_and_narrow_s16(s6[26], s6[21], cospi_16_64); s7[26] = add_multiply_shift_and_narrow_s16(s6[21], s6[26], cospi_16_64); s7[22] = sub_multiply_shift_and_narrow_s16(s6[25], s6[22], cospi_16_64); s7[25] = add_multiply_shift_and_narrow_s16(s6[22], s6[25], cospi_16_64); s7[23] = sub_multiply_shift_and_narrow_s16(s6[24], s6[23], cospi_16_64); s7[24] = add_multiply_shift_and_narrow_s16(s6[23], s6[24], cospi_16_64); // final stage vst1q_s16(output, vaddq_s16(s7[0], s6[31])); output += 16; vst1q_s16(output, vaddq_s16(s7[1], s6[30])); output += 16; vst1q_s16(output, vaddq_s16(s7[2], s6[29])); output += 16; vst1q_s16(output, vaddq_s16(s7[3], s6[28])); output += 16; vst1q_s16(output, vaddq_s16(s7[4], s7[27])); output += 16; vst1q_s16(output, vaddq_s16(s7[5], s7[26])); output += 16; vst1q_s16(output, vaddq_s16(s7[6], s7[25])); output += 16; vst1q_s16(output, vaddq_s16(s7[7], s7[24])); output += 16; vst1q_s16(output, vaddq_s16(s7[8], s7[23])); output += 16; vst1q_s16(output, vaddq_s16(s7[9], s7[22])); output += 16; vst1q_s16(output, vaddq_s16(s7[10], s7[21])); output += 16; vst1q_s16(output, vaddq_s16(s7[11], s7[20])); output += 16; vst1q_s16(output, vaddq_s16(s7[12], s6[19])); output += 16; vst1q_s16(output, vaddq_s16(s7[13], s6[18])); output += 16; vst1q_s16(output, vaddq_s16(s7[14], s6[17])); output += 16; vst1q_s16(output, vaddq_s16(s7[15], s6[16])); output += 16; vst1q_s16(output, vsubq_s16(s7[15], s6[16])); output += 16; vst1q_s16(output, vsubq_s16(s7[14], s6[17])); output += 16; vst1q_s16(output, vsubq_s16(s7[13], s6[18])); output += 16; vst1q_s16(output, vsubq_s16(s7[12], s6[19])); output += 16; vst1q_s16(output, vsubq_s16(s7[11], s7[20])); output += 16; vst1q_s16(output, vsubq_s16(s7[10], s7[21])); output += 16; vst1q_s16(output, vsubq_s16(s7[9], s7[22])); output += 16; vst1q_s16(output, vsubq_s16(s7[8], s7[23])); output += 16; vst1q_s16(output, vsubq_s16(s7[7], s7[24])); output += 16; vst1q_s16(output, vsubq_s16(s7[6], s7[25])); output += 16; vst1q_s16(output, vsubq_s16(s7[5], s7[26])); output += 16; vst1q_s16(output, vsubq_s16(s7[4], s7[27])); output += 16; vst1q_s16(output, vsubq_s16(s7[3], s6[28])); output += 16; vst1q_s16(output, vsubq_s16(s7[2], s6[29])); output += 16; vst1q_s16(output, vsubq_s16(s7[1], s6[30])); output += 16; vst1q_s16(output, vsubq_s16(s7[0], s6[31])); } void vpx_idct32_16_neon(const int16_t *const input, void *const output, const int stride, const int highbd_flag) { int16x8_t in[16], s1[32], s2[32], s3[32], s4[32], s5[32], s6[32], s7[32], out[32]; load_and_transpose_s16_8x8(input, 16, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); load_and_transpose_s16_8x8(input + 8, 16, &in[8], &in[9], &in[10], &in[11], &in[12], &in[13], &in[14], &in[15]); // stage 1 s1[16] = multiply_shift_and_narrow_s16(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s16(in[1], cospi_1_64); s1[17] = multiply_shift_and_narrow_s16(in[15], -cospi_17_64); s1[30] = multiply_shift_and_narrow_s16(in[15], cospi_15_64); s1[18] = multiply_shift_and_narrow_s16(in[9], cospi_23_64); s1[29] = multiply_shift_and_narrow_s16(in[9], cospi_9_64); s1[19] = multiply_shift_and_narrow_s16(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s16(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s16(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s16(in[5], cospi_5_64); s1[21] = multiply_shift_and_narrow_s16(in[11], -cospi_21_64); s1[26] = multiply_shift_and_narrow_s16(in[11], cospi_11_64); s1[22] = multiply_shift_and_narrow_s16(in[13], cospi_19_64); s1[25] = multiply_shift_and_narrow_s16(in[13], cospi_13_64); s1[23] = multiply_shift_and_narrow_s16(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s16(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s16(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s16(in[2], cospi_2_64); s2[9] = multiply_shift_and_narrow_s16(in[14], -cospi_18_64); s2[14] = multiply_shift_and_narrow_s16(in[14], cospi_14_64); s2[10] = multiply_shift_and_narrow_s16(in[10], cospi_22_64); s2[13] = multiply_shift_and_narrow_s16(in[10], cospi_10_64); s2[11] = multiply_shift_and_narrow_s16(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s16(in[6], cospi_6_64); s2[16] = vaddq_s16(s1[16], s1[17]); s2[17] = vsubq_s16(s1[16], s1[17]); s2[18] = vsubq_s16(s1[19], s1[18]); s2[19] = vaddq_s16(s1[18], s1[19]); s2[20] = vaddq_s16(s1[20], s1[21]); s2[21] = vsubq_s16(s1[20], s1[21]); s2[22] = vsubq_s16(s1[23], s1[22]); s2[23] = vaddq_s16(s1[22], s1[23]); s2[24] = vaddq_s16(s1[24], s1[25]); s2[25] = vsubq_s16(s1[24], s1[25]); s2[26] = vsubq_s16(s1[27], s1[26]); s2[27] = vaddq_s16(s1[26], s1[27]); s2[28] = vaddq_s16(s1[28], s1[29]); s2[29] = vsubq_s16(s1[28], s1[29]); s2[30] = vsubq_s16(s1[31], s1[30]); s2[31] = vaddq_s16(s1[30], s1[31]); // stage 3 s3[4] = multiply_shift_and_narrow_s16(in[4], cospi_28_64); s3[7] = multiply_shift_and_narrow_s16(in[4], cospi_4_64); s3[5] = multiply_shift_and_narrow_s16(in[12], -cospi_20_64); s3[6] = multiply_shift_and_narrow_s16(in[12], cospi_12_64); s3[8] = vaddq_s16(s2[8], s2[9]); s3[9] = vsubq_s16(s2[8], s2[9]); s3[10] = vsubq_s16(s2[11], s2[10]); s3[11] = vaddq_s16(s2[10], s2[11]); s3[12] = vaddq_s16(s2[12], s2[13]); s3[13] = vsubq_s16(s2[12], s2[13]); s3[14] = vsubq_s16(s2[15], s2[14]); s3[15] = vaddq_s16(s2[14], s2[15]); s3[17] = multiply_accumulate_shift_and_narrow_s16(s2[17], -cospi_4_64, s2[30], cospi_28_64); s3[30] = multiply_accumulate_shift_and_narrow_s16(s2[17], cospi_28_64, s2[30], cospi_4_64); s3[18] = multiply_accumulate_shift_and_narrow_s16(s2[18], -cospi_28_64, s2[29], -cospi_4_64); s3[29] = multiply_accumulate_shift_and_narrow_s16(s2[18], -cospi_4_64, s2[29], cospi_28_64); s3[21] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_20_64, s2[26], cospi_12_64); s3[26] = multiply_accumulate_shift_and_narrow_s16(s2[21], cospi_12_64, s2[26], cospi_20_64); s3[22] = multiply_accumulate_shift_and_narrow_s16(s2[22], -cospi_12_64, s2[25], -cospi_20_64); s3[25] = multiply_accumulate_shift_and_narrow_s16(s2[22], -cospi_20_64, s2[25], cospi_12_64); // stage 4 s4[0] = multiply_shift_and_narrow_s16(in[0], cospi_16_64); s4[2] = multiply_shift_and_narrow_s16(in[8], cospi_24_64); s4[3] = multiply_shift_and_narrow_s16(in[8], cospi_8_64); s4[4] = vaddq_s16(s3[4], s3[5]); s4[5] = vsubq_s16(s3[4], s3[5]); s4[6] = vsubq_s16(s3[7], s3[6]); s4[7] = vaddq_s16(s3[6], s3[7]); s4[9] = multiply_accumulate_shift_and_narrow_s16(s3[9], -cospi_8_64, s3[14], cospi_24_64); s4[14] = multiply_accumulate_shift_and_narrow_s16(s3[9], cospi_24_64, s3[14], cospi_8_64); s4[10] = multiply_accumulate_shift_and_narrow_s16(s3[10], -cospi_24_64, s3[13], -cospi_8_64); s4[13] = multiply_accumulate_shift_and_narrow_s16(s3[10], -cospi_8_64, s3[13], cospi_24_64); s4[16] = vaddq_s16(s2[16], s2[19]); s4[17] = vaddq_s16(s3[17], s3[18]); s4[18] = vsubq_s16(s3[17], s3[18]); s4[19] = vsubq_s16(s2[16], s2[19]); s4[20] = vsubq_s16(s2[23], s2[20]); s4[21] = vsubq_s16(s3[22], s3[21]); s4[22] = vaddq_s16(s3[21], s3[22]); s4[23] = vaddq_s16(s2[20], s2[23]); s4[24] = vaddq_s16(s2[24], s2[27]); s4[25] = vaddq_s16(s3[25], s3[26]); s4[26] = vsubq_s16(s3[25], s3[26]); s4[27] = vsubq_s16(s2[24], s2[27]); s4[28] = vsubq_s16(s2[31], s2[28]); s4[29] = vsubq_s16(s3[30], s3[29]); s4[30] = vaddq_s16(s3[29], s3[30]); s4[31] = vaddq_s16(s2[28], s2[31]); // stage 5 s5[0] = vaddq_s16(s4[0], s4[3]); s5[1] = vaddq_s16(s4[0], s4[2]); s5[2] = vsubq_s16(s4[0], s4[2]); s5[3] = vsubq_s16(s4[0], s4[3]); s5[5] = sub_multiply_shift_and_narrow_s16(s4[6], s4[5], cospi_16_64); s5[6] = add_multiply_shift_and_narrow_s16(s4[5], s4[6], cospi_16_64); s5[8] = vaddq_s16(s3[8], s3[11]); s5[9] = vaddq_s16(s4[9], s4[10]); s5[10] = vsubq_s16(s4[9], s4[10]); s5[11] = vsubq_s16(s3[8], s3[11]); s5[12] = vsubq_s16(s3[15], s3[12]); s5[13] = vsubq_s16(s4[14], s4[13]); s5[14] = vaddq_s16(s4[13], s4[14]); s5[15] = vaddq_s16(s3[15], s3[12]); s5[18] = multiply_accumulate_shift_and_narrow_s16(s4[18], -cospi_8_64, s4[29], cospi_24_64); s5[29] = multiply_accumulate_shift_and_narrow_s16(s4[18], cospi_24_64, s4[29], cospi_8_64); s5[19] = multiply_accumulate_shift_and_narrow_s16(s4[19], -cospi_8_64, s4[28], cospi_24_64); s5[28] = multiply_accumulate_shift_and_narrow_s16(s4[19], cospi_24_64, s4[28], cospi_8_64); s5[20] = multiply_accumulate_shift_and_narrow_s16(s4[20], -cospi_24_64, s4[27], -cospi_8_64); s5[27] = multiply_accumulate_shift_and_narrow_s16(s4[20], -cospi_8_64, s4[27], cospi_24_64); s5[21] = multiply_accumulate_shift_and_narrow_s16(s4[21], -cospi_24_64, s4[26], -cospi_8_64); s5[26] = multiply_accumulate_shift_and_narrow_s16(s4[21], -cospi_8_64, s4[26], cospi_24_64); // stage 6 s6[0] = vaddq_s16(s5[0], s4[7]); s6[1] = vaddq_s16(s5[1], s5[6]); s6[2] = vaddq_s16(s5[2], s5[5]); s6[3] = vaddq_s16(s5[3], s4[4]); s6[4] = vsubq_s16(s5[3], s4[4]); s6[5] = vsubq_s16(s5[2], s5[5]); s6[6] = vsubq_s16(s5[1], s5[6]); s6[7] = vsubq_s16(s5[0], s4[7]); s6[10] = sub_multiply_shift_and_narrow_s16(s5[13], s5[10], cospi_16_64); s6[13] = add_multiply_shift_and_narrow_s16(s5[10], s5[13], cospi_16_64); s6[11] = sub_multiply_shift_and_narrow_s16(s5[12], s5[11], cospi_16_64); s6[12] = add_multiply_shift_and_narrow_s16(s5[11], s5[12], cospi_16_64); s6[16] = vaddq_s16(s4[16], s4[23]); s6[17] = vaddq_s16(s4[17], s4[22]); s6[18] = vaddq_s16(s5[18], s5[21]); s6[19] = vaddq_s16(s5[19], s5[20]); s6[20] = vsubq_s16(s5[19], s5[20]); s6[21] = vsubq_s16(s5[18], s5[21]); s6[22] = vsubq_s16(s4[17], s4[22]); s6[23] = vsubq_s16(s4[16], s4[23]); s6[24] = vsubq_s16(s4[31], s4[24]); s6[25] = vsubq_s16(s4[30], s4[25]); s6[26] = vsubq_s16(s5[29], s5[26]); s6[27] = vsubq_s16(s5[28], s5[27]); s6[28] = vaddq_s16(s5[27], s5[28]); s6[29] = vaddq_s16(s5[26], s5[29]); s6[30] = vaddq_s16(s4[25], s4[30]); s6[31] = vaddq_s16(s4[24], s4[31]); // stage 7 s7[0] = vaddq_s16(s6[0], s5[15]); s7[1] = vaddq_s16(s6[1], s5[14]); s7[2] = vaddq_s16(s6[2], s6[13]); s7[3] = vaddq_s16(s6[3], s6[12]); s7[4] = vaddq_s16(s6[4], s6[11]); s7[5] = vaddq_s16(s6[5], s6[10]); s7[6] = vaddq_s16(s6[6], s5[9]); s7[7] = vaddq_s16(s6[7], s5[8]); s7[8] = vsubq_s16(s6[7], s5[8]); s7[9] = vsubq_s16(s6[6], s5[9]); s7[10] = vsubq_s16(s6[5], s6[10]); s7[11] = vsubq_s16(s6[4], s6[11]); s7[12] = vsubq_s16(s6[3], s6[12]); s7[13] = vsubq_s16(s6[2], s6[13]); s7[14] = vsubq_s16(s6[1], s5[14]); s7[15] = vsubq_s16(s6[0], s5[15]); s7[20] = sub_multiply_shift_and_narrow_s16(s6[27], s6[20], cospi_16_64); s7[27] = add_multiply_shift_and_narrow_s16(s6[20], s6[27], cospi_16_64); s7[21] = sub_multiply_shift_and_narrow_s16(s6[26], s6[21], cospi_16_64); s7[26] = add_multiply_shift_and_narrow_s16(s6[21], s6[26], cospi_16_64); s7[22] = sub_multiply_shift_and_narrow_s16(s6[25], s6[22], cospi_16_64); s7[25] = add_multiply_shift_and_narrow_s16(s6[22], s6[25], cospi_16_64); s7[23] = sub_multiply_shift_and_narrow_s16(s6[24], s6[23], cospi_16_64); s7[24] = add_multiply_shift_and_narrow_s16(s6[23], s6[24], cospi_16_64); // final stage out[0] = final_add(s7[0], s6[31]); out[1] = final_add(s7[1], s6[30]); out[2] = final_add(s7[2], s6[29]); out[3] = final_add(s7[3], s6[28]); out[4] = final_add(s7[4], s7[27]); out[5] = final_add(s7[5], s7[26]); out[6] = final_add(s7[6], s7[25]); out[7] = final_add(s7[7], s7[24]); out[8] = final_add(s7[8], s7[23]); out[9] = final_add(s7[9], s7[22]); out[10] = final_add(s7[10], s7[21]); out[11] = final_add(s7[11], s7[20]); out[12] = final_add(s7[12], s6[19]); out[13] = final_add(s7[13], s6[18]); out[14] = final_add(s7[14], s6[17]); out[15] = final_add(s7[15], s6[16]); out[16] = final_sub(s7[15], s6[16]); out[17] = final_sub(s7[14], s6[17]); out[18] = final_sub(s7[13], s6[18]); out[19] = final_sub(s7[12], s6[19]); out[20] = final_sub(s7[11], s7[20]); out[21] = final_sub(s7[10], s7[21]); out[22] = final_sub(s7[9], s7[22]); out[23] = final_sub(s7[8], s7[23]); out[24] = final_sub(s7[7], s7[24]); out[25] = final_sub(s7[6], s7[25]); out[26] = final_sub(s7[5], s7[26]); out[27] = final_sub(s7[4], s7[27]); out[28] = final_sub(s7[3], s6[28]); out[29] = final_sub(s7[2], s6[29]); out[30] = final_sub(s7[1], s6[30]); out[31] = final_sub(s7[0], s6[31]); if (highbd_flag) { highbd_add_and_store_bd8(out, output, stride); } else { uint8_t *const outputT = (uint8_t *)output; add_and_store_u8_s16(out + 0, outputT, stride); add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride); add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride); add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride); } } void vpx_idct32x32_135_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int i; int16_t temp[32 * 16]; int16_t *t = temp; vpx_idct32_12_neon(input, temp); vpx_idct32_12_neon(input + 32 * 8, temp + 8); for (i = 0; i < 32; i += 8) { vpx_idct32_16_neon(t, dest, stride, 0); t += (16 * 8); dest += 8; } } libvpx-1.8.2/vpx_dsp/arm/idct32x32_1_add_neon.c000066400000000000000000000036571357355204000211470ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void idct32x32_1_add_pos_kernel(uint8_t **dest, const int stride, const uint8x16_t res) { const uint8x16_t a0 = vld1q_u8(*dest); const uint8x16_t a1 = vld1q_u8(*dest + 16); const uint8x16_t b0 = vqaddq_u8(a0, res); const uint8x16_t b1 = vqaddq_u8(a1, res); vst1q_u8(*dest, b0); vst1q_u8(*dest + 16, b1); *dest += stride; } static INLINE void idct32x32_1_add_neg_kernel(uint8_t **dest, const int stride, const uint8x16_t res) { const uint8x16_t a0 = vld1q_u8(*dest); const uint8x16_t a1 = vld1q_u8(*dest + 16); const uint8x16_t b0 = vqsubq_u8(a0, res); const uint8x16_t b1 = vqsubq_u8(a1, res); vst1q_u8(*dest, b0); vst1q_u8(*dest + 16, b1); *dest += stride; } void vpx_idct32x32_1_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int i; const int16_t out0 = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 6); if (a1 >= 0) { const uint8x16_t dc = create_dcq(a1); for (i = 0; i < 32; i++) { idct32x32_1_add_pos_kernel(&dest, stride, dc); } } else { const uint8x16_t dc = create_dcq(-a1); for (i = 0; i < 32; i++) { idct32x32_1_add_neg_kernel(&dest, stride, dc); } } } libvpx-1.8.2/vpx_dsp/arm/idct32x32_34_add_neon.c000066400000000000000000000465601357355204000212350ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" // Only for the first pass of the _34_ variant. Since it only uses values from // the top left 8x8 it can safely assume all the remaining values are 0 and skip // an awful lot of calculations. In fact, only the first 6 columns make the cut. // None of the elements in the 7th or 8th column are used so it skips any calls // to input[67] too. // In C this does a single row of 32 for each call. Here it transposes the top // left 8x8 to allow using SIMD. // vp9/common/vp9_scan.c:vp9_default_iscan_32x32 arranges the first 34 non-zero // coefficients as follows: // 0 1 2 3 4 5 6 7 // 0 0 2 5 10 17 25 // 1 1 4 8 15 22 30 // 2 3 7 12 18 28 // 3 6 11 16 23 31 // 4 9 14 19 29 // 5 13 20 26 // 6 21 27 33 // 7 24 32 void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output) { int16x8_t in[8], s1[32], s2[32], s3[32]; in[0] = load_tran_low_to_s16q(input); input += 32; in[1] = load_tran_low_to_s16q(input); input += 32; in[2] = load_tran_low_to_s16q(input); input += 32; in[3] = load_tran_low_to_s16q(input); input += 32; in[4] = load_tran_low_to_s16q(input); input += 32; in[5] = load_tran_low_to_s16q(input); input += 32; in[6] = load_tran_low_to_s16q(input); input += 32; in[7] = load_tran_low_to_s16q(input); transpose_s16_8x8(&in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 // input[1] * cospi_31_64 - input[31] * cospi_1_64 (but input[31] == 0) s1[16] = multiply_shift_and_narrow_s16(in[1], cospi_31_64); // input[1] * cospi_1_64 + input[31] * cospi_31_64 (but input[31] == 0) s1[31] = multiply_shift_and_narrow_s16(in[1], cospi_1_64); s1[20] = multiply_shift_and_narrow_s16(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s16(in[5], cospi_5_64); s1[23] = multiply_shift_and_narrow_s16(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s16(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s16(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s16(in[2], cospi_2_64); // stage 3 s1[4] = multiply_shift_and_narrow_s16(in[4], cospi_28_64); s1[7] = multiply_shift_and_narrow_s16(in[4], cospi_4_64); s1[17] = multiply_accumulate_shift_and_narrow_s16(s1[16], -cospi_4_64, s1[31], cospi_28_64); s1[30] = multiply_accumulate_shift_and_narrow_s16(s1[16], cospi_28_64, s1[31], cospi_4_64); s1[21] = multiply_accumulate_shift_and_narrow_s16(s1[20], -cospi_20_64, s1[27], cospi_12_64); s1[26] = multiply_accumulate_shift_and_narrow_s16(s1[20], cospi_12_64, s1[27], cospi_20_64); s1[22] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s1[25] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s1[0] = multiply_shift_and_narrow_s16(in[0], cospi_16_64); s2[9] = multiply_accumulate_shift_and_narrow_s16(s2[8], -cospi_8_64, s2[15], cospi_24_64); s2[14] = multiply_accumulate_shift_and_narrow_s16(s2[8], cospi_24_64, s2[15], cospi_8_64); s2[20] = vsubq_s16(s1[23], s1[20]); s2[21] = vsubq_s16(s1[22], s1[21]); s2[22] = vaddq_s16(s1[21], s1[22]); s2[23] = vaddq_s16(s1[20], s1[23]); s2[24] = vaddq_s16(s1[24], s1[27]); s2[25] = vaddq_s16(s1[25], s1[26]); s2[26] = vsubq_s16(s1[25], s1[26]); s2[27] = vsubq_s16(s1[24], s1[27]); // stage 5 s1[5] = sub_multiply_shift_and_narrow_s16(s1[7], s1[4], cospi_16_64); s1[6] = add_multiply_shift_and_narrow_s16(s1[4], s1[7], cospi_16_64); s1[18] = multiply_accumulate_shift_and_narrow_s16(s1[17], -cospi_8_64, s1[30], cospi_24_64); s1[29] = multiply_accumulate_shift_and_narrow_s16(s1[17], cospi_24_64, s1[30], cospi_8_64); s1[19] = multiply_accumulate_shift_and_narrow_s16(s1[16], -cospi_8_64, s1[31], cospi_24_64); s1[28] = multiply_accumulate_shift_and_narrow_s16(s1[16], cospi_24_64, s1[31], cospi_8_64); s1[20] = multiply_accumulate_shift_and_narrow_s16(s2[20], -cospi_24_64, s2[27], -cospi_8_64); s1[27] = multiply_accumulate_shift_and_narrow_s16(s2[20], -cospi_8_64, s2[27], cospi_24_64); s1[21] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_24_64, s2[26], -cospi_8_64); s1[26] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_8_64, s2[26], cospi_24_64); // stage 6 s2[0] = vaddq_s16(s1[0], s1[7]); s2[1] = vaddq_s16(s1[0], s1[6]); s2[2] = vaddq_s16(s1[0], s1[5]); s2[3] = vaddq_s16(s1[0], s1[4]); s2[4] = vsubq_s16(s1[0], s1[4]); s2[5] = vsubq_s16(s1[0], s1[5]); s2[6] = vsubq_s16(s1[0], s1[6]); s2[7] = vsubq_s16(s1[0], s1[7]); s2[10] = sub_multiply_shift_and_narrow_s16(s2[14], s2[9], cospi_16_64); s2[13] = add_multiply_shift_and_narrow_s16(s2[9], s2[14], cospi_16_64); s2[11] = sub_multiply_shift_and_narrow_s16(s2[15], s2[8], cospi_16_64); s2[12] = add_multiply_shift_and_narrow_s16(s2[8], s2[15], cospi_16_64); s2[16] = vaddq_s16(s1[16], s2[23]); s2[17] = vaddq_s16(s1[17], s2[22]); s2[18] = vaddq_s16(s1[18], s1[21]); s2[19] = vaddq_s16(s1[19], s1[20]); s2[20] = vsubq_s16(s1[19], s1[20]); s2[21] = vsubq_s16(s1[18], s1[21]); s2[22] = vsubq_s16(s1[17], s2[22]); s2[23] = vsubq_s16(s1[16], s2[23]); s3[24] = vsubq_s16(s1[31], s2[24]); s3[25] = vsubq_s16(s1[30], s2[25]); s3[26] = vsubq_s16(s1[29], s1[26]); s3[27] = vsubq_s16(s1[28], s1[27]); s2[28] = vaddq_s16(s1[27], s1[28]); s2[29] = vaddq_s16(s1[26], s1[29]); s2[30] = vaddq_s16(s2[25], s1[30]); s2[31] = vaddq_s16(s2[24], s1[31]); // stage 7 s1[0] = vaddq_s16(s2[0], s2[15]); s1[1] = vaddq_s16(s2[1], s2[14]); s1[2] = vaddq_s16(s2[2], s2[13]); s1[3] = vaddq_s16(s2[3], s2[12]); s1[4] = vaddq_s16(s2[4], s2[11]); s1[5] = vaddq_s16(s2[5], s2[10]); s1[6] = vaddq_s16(s2[6], s2[9]); s1[7] = vaddq_s16(s2[7], s2[8]); s1[8] = vsubq_s16(s2[7], s2[8]); s1[9] = vsubq_s16(s2[6], s2[9]); s1[10] = vsubq_s16(s2[5], s2[10]); s1[11] = vsubq_s16(s2[4], s2[11]); s1[12] = vsubq_s16(s2[3], s2[12]); s1[13] = vsubq_s16(s2[2], s2[13]); s1[14] = vsubq_s16(s2[1], s2[14]); s1[15] = vsubq_s16(s2[0], s2[15]); s1[20] = sub_multiply_shift_and_narrow_s16(s3[27], s2[20], cospi_16_64); s1[27] = add_multiply_shift_and_narrow_s16(s2[20], s3[27], cospi_16_64); s1[21] = sub_multiply_shift_and_narrow_s16(s3[26], s2[21], cospi_16_64); s1[26] = add_multiply_shift_and_narrow_s16(s2[21], s3[26], cospi_16_64); s1[22] = sub_multiply_shift_and_narrow_s16(s3[25], s2[22], cospi_16_64); s1[25] = add_multiply_shift_and_narrow_s16(s2[22], s3[25], cospi_16_64); s1[23] = sub_multiply_shift_and_narrow_s16(s3[24], s2[23], cospi_16_64); s1[24] = add_multiply_shift_and_narrow_s16(s2[23], s3[24], cospi_16_64); // final stage vst1q_s16(output, vaddq_s16(s1[0], s2[31])); output += 8; vst1q_s16(output, vaddq_s16(s1[1], s2[30])); output += 8; vst1q_s16(output, vaddq_s16(s1[2], s2[29])); output += 8; vst1q_s16(output, vaddq_s16(s1[3], s2[28])); output += 8; vst1q_s16(output, vaddq_s16(s1[4], s1[27])); output += 8; vst1q_s16(output, vaddq_s16(s1[5], s1[26])); output += 8; vst1q_s16(output, vaddq_s16(s1[6], s1[25])); output += 8; vst1q_s16(output, vaddq_s16(s1[7], s1[24])); output += 8; vst1q_s16(output, vaddq_s16(s1[8], s1[23])); output += 8; vst1q_s16(output, vaddq_s16(s1[9], s1[22])); output += 8; vst1q_s16(output, vaddq_s16(s1[10], s1[21])); output += 8; vst1q_s16(output, vaddq_s16(s1[11], s1[20])); output += 8; vst1q_s16(output, vaddq_s16(s1[12], s2[19])); output += 8; vst1q_s16(output, vaddq_s16(s1[13], s2[18])); output += 8; vst1q_s16(output, vaddq_s16(s1[14], s2[17])); output += 8; vst1q_s16(output, vaddq_s16(s1[15], s2[16])); output += 8; vst1q_s16(output, vsubq_s16(s1[15], s2[16])); output += 8; vst1q_s16(output, vsubq_s16(s1[14], s2[17])); output += 8; vst1q_s16(output, vsubq_s16(s1[13], s2[18])); output += 8; vst1q_s16(output, vsubq_s16(s1[12], s2[19])); output += 8; vst1q_s16(output, vsubq_s16(s1[11], s1[20])); output += 8; vst1q_s16(output, vsubq_s16(s1[10], s1[21])); output += 8; vst1q_s16(output, vsubq_s16(s1[9], s1[22])); output += 8; vst1q_s16(output, vsubq_s16(s1[8], s1[23])); output += 8; vst1q_s16(output, vsubq_s16(s1[7], s1[24])); output += 8; vst1q_s16(output, vsubq_s16(s1[6], s1[25])); output += 8; vst1q_s16(output, vsubq_s16(s1[5], s1[26])); output += 8; vst1q_s16(output, vsubq_s16(s1[4], s1[27])); output += 8; vst1q_s16(output, vsubq_s16(s1[3], s2[28])); output += 8; vst1q_s16(output, vsubq_s16(s1[2], s2[29])); output += 8; vst1q_s16(output, vsubq_s16(s1[1], s2[30])); output += 8; vst1q_s16(output, vsubq_s16(s1[0], s2[31])); } void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, const int highbd_flag) { int16x8_t in[8], s1[32], s2[32], s3[32], out[32]; load_and_transpose_s16_8x8(input, 8, &in[0], &in[1], &in[2], &in[3], &in[4], &in[5], &in[6], &in[7]); // stage 1 s1[16] = multiply_shift_and_narrow_s16(in[1], cospi_31_64); s1[31] = multiply_shift_and_narrow_s16(in[1], cospi_1_64); // Different for _8_ s1[19] = multiply_shift_and_narrow_s16(in[7], -cospi_25_64); s1[28] = multiply_shift_and_narrow_s16(in[7], cospi_7_64); s1[20] = multiply_shift_and_narrow_s16(in[5], cospi_27_64); s1[27] = multiply_shift_and_narrow_s16(in[5], cospi_5_64); s1[23] = multiply_shift_and_narrow_s16(in[3], -cospi_29_64); s1[24] = multiply_shift_and_narrow_s16(in[3], cospi_3_64); // stage 2 s2[8] = multiply_shift_and_narrow_s16(in[2], cospi_30_64); s2[15] = multiply_shift_and_narrow_s16(in[2], cospi_2_64); s2[11] = multiply_shift_and_narrow_s16(in[6], -cospi_26_64); s2[12] = multiply_shift_and_narrow_s16(in[6], cospi_6_64); // stage 3 s1[4] = multiply_shift_and_narrow_s16(in[4], cospi_28_64); s1[7] = multiply_shift_and_narrow_s16(in[4], cospi_4_64); s1[17] = multiply_accumulate_shift_and_narrow_s16(s1[16], -cospi_4_64, s1[31], cospi_28_64); s1[30] = multiply_accumulate_shift_and_narrow_s16(s1[16], cospi_28_64, s1[31], cospi_4_64); // Different for _8_ s1[18] = multiply_accumulate_shift_and_narrow_s16(s1[19], -cospi_28_64, s1[28], -cospi_4_64); s1[29] = multiply_accumulate_shift_and_narrow_s16(s1[19], -cospi_4_64, s1[28], cospi_28_64); s1[21] = multiply_accumulate_shift_and_narrow_s16(s1[20], -cospi_20_64, s1[27], cospi_12_64); s1[26] = multiply_accumulate_shift_and_narrow_s16(s1[20], cospi_12_64, s1[27], cospi_20_64); s1[22] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_12_64, s1[24], -cospi_20_64); s1[25] = multiply_accumulate_shift_and_narrow_s16(s1[23], -cospi_20_64, s1[24], cospi_12_64); // stage 4 s1[0] = multiply_shift_and_narrow_s16(in[0], cospi_16_64); s2[9] = multiply_accumulate_shift_and_narrow_s16(s2[8], -cospi_8_64, s2[15], cospi_24_64); s2[14] = multiply_accumulate_shift_and_narrow_s16(s2[8], cospi_24_64, s2[15], cospi_8_64); s2[10] = multiply_accumulate_shift_and_narrow_s16(s2[11], -cospi_24_64, s2[12], -cospi_8_64); s2[13] = multiply_accumulate_shift_and_narrow_s16(s2[11], -cospi_8_64, s2[12], cospi_24_64); s2[16] = vaddq_s16(s1[16], s1[19]); s2[17] = vaddq_s16(s1[17], s1[18]); s2[18] = vsubq_s16(s1[17], s1[18]); s2[19] = vsubq_s16(s1[16], s1[19]); s2[20] = vsubq_s16(s1[23], s1[20]); s2[21] = vsubq_s16(s1[22], s1[21]); s2[22] = vaddq_s16(s1[21], s1[22]); s2[23] = vaddq_s16(s1[20], s1[23]); s2[24] = vaddq_s16(s1[24], s1[27]); s2[25] = vaddq_s16(s1[25], s1[26]); s2[26] = vsubq_s16(s1[25], s1[26]); s2[27] = vsubq_s16(s1[24], s1[27]); s2[28] = vsubq_s16(s1[31], s1[28]); s2[29] = vsubq_s16(s1[30], s1[29]); s2[30] = vaddq_s16(s1[29], s1[30]); s2[31] = vaddq_s16(s1[28], s1[31]); // stage 5 s1[5] = sub_multiply_shift_and_narrow_s16(s1[7], s1[4], cospi_16_64); s1[6] = add_multiply_shift_and_narrow_s16(s1[4], s1[7], cospi_16_64); s1[8] = vaddq_s16(s2[8], s2[11]); s1[9] = vaddq_s16(s2[9], s2[10]); s1[10] = vsubq_s16(s2[9], s2[10]); s1[11] = vsubq_s16(s2[8], s2[11]); s1[12] = vsubq_s16(s2[15], s2[12]); s1[13] = vsubq_s16(s2[14], s2[13]); s1[14] = vaddq_s16(s2[13], s2[14]); s1[15] = vaddq_s16(s2[12], s2[15]); s1[18] = multiply_accumulate_shift_and_narrow_s16(s2[18], -cospi_8_64, s2[29], cospi_24_64); s1[29] = multiply_accumulate_shift_and_narrow_s16(s2[18], cospi_24_64, s2[29], cospi_8_64); s1[19] = multiply_accumulate_shift_and_narrow_s16(s2[19], -cospi_8_64, s2[28], cospi_24_64); s1[28] = multiply_accumulate_shift_and_narrow_s16(s2[19], cospi_24_64, s2[28], cospi_8_64); s1[20] = multiply_accumulate_shift_and_narrow_s16(s2[20], -cospi_24_64, s2[27], -cospi_8_64); s1[27] = multiply_accumulate_shift_and_narrow_s16(s2[20], -cospi_8_64, s2[27], cospi_24_64); s1[21] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_24_64, s2[26], -cospi_8_64); s1[26] = multiply_accumulate_shift_and_narrow_s16(s2[21], -cospi_8_64, s2[26], cospi_24_64); // stage 6 s2[0] = vaddq_s16(s1[0], s1[7]); s2[1] = vaddq_s16(s1[0], s1[6]); s2[2] = vaddq_s16(s1[0], s1[5]); s2[3] = vaddq_s16(s1[0], s1[4]); s2[4] = vsubq_s16(s1[0], s1[4]); s2[5] = vsubq_s16(s1[0], s1[5]); s2[6] = vsubq_s16(s1[0], s1[6]); s2[7] = vsubq_s16(s1[0], s1[7]); s2[10] = sub_multiply_shift_and_narrow_s16(s1[13], s1[10], cospi_16_64); s2[13] = add_multiply_shift_and_narrow_s16(s1[10], s1[13], cospi_16_64); s2[11] = sub_multiply_shift_and_narrow_s16(s1[12], s1[11], cospi_16_64); s2[12] = add_multiply_shift_and_narrow_s16(s1[11], s1[12], cospi_16_64); s1[16] = vaddq_s16(s2[16], s2[23]); s1[17] = vaddq_s16(s2[17], s2[22]); s2[18] = vaddq_s16(s1[18], s1[21]); s2[19] = vaddq_s16(s1[19], s1[20]); s2[20] = vsubq_s16(s1[19], s1[20]); s2[21] = vsubq_s16(s1[18], s1[21]); s1[22] = vsubq_s16(s2[17], s2[22]); s1[23] = vsubq_s16(s2[16], s2[23]); s3[24] = vsubq_s16(s2[31], s2[24]); s3[25] = vsubq_s16(s2[30], s2[25]); s3[26] = vsubq_s16(s1[29], s1[26]); s3[27] = vsubq_s16(s1[28], s1[27]); s2[28] = vaddq_s16(s1[27], s1[28]); s2[29] = vaddq_s16(s1[26], s1[29]); s2[30] = vaddq_s16(s2[25], s2[30]); s2[31] = vaddq_s16(s2[24], s2[31]); // stage 7 s1[0] = vaddq_s16(s2[0], s1[15]); s1[1] = vaddq_s16(s2[1], s1[14]); s1[2] = vaddq_s16(s2[2], s2[13]); s1[3] = vaddq_s16(s2[3], s2[12]); s1[4] = vaddq_s16(s2[4], s2[11]); s1[5] = vaddq_s16(s2[5], s2[10]); s1[6] = vaddq_s16(s2[6], s1[9]); s1[7] = vaddq_s16(s2[7], s1[8]); s1[8] = vsubq_s16(s2[7], s1[8]); s1[9] = vsubq_s16(s2[6], s1[9]); s1[10] = vsubq_s16(s2[5], s2[10]); s1[11] = vsubq_s16(s2[4], s2[11]); s1[12] = vsubq_s16(s2[3], s2[12]); s1[13] = vsubq_s16(s2[2], s2[13]); s1[14] = vsubq_s16(s2[1], s1[14]); s1[15] = vsubq_s16(s2[0], s1[15]); s1[20] = sub_multiply_shift_and_narrow_s16(s3[27], s2[20], cospi_16_64); s1[27] = add_multiply_shift_and_narrow_s16(s2[20], s3[27], cospi_16_64); s1[21] = sub_multiply_shift_and_narrow_s16(s3[26], s2[21], cospi_16_64); s1[26] = add_multiply_shift_and_narrow_s16(s2[21], s3[26], cospi_16_64); s2[22] = sub_multiply_shift_and_narrow_s16(s3[25], s1[22], cospi_16_64); s1[25] = add_multiply_shift_and_narrow_s16(s1[22], s3[25], cospi_16_64); s2[23] = sub_multiply_shift_and_narrow_s16(s3[24], s1[23], cospi_16_64); s1[24] = add_multiply_shift_and_narrow_s16(s1[23], s3[24], cospi_16_64); // final stage out[0] = final_add(s1[0], s2[31]); out[1] = final_add(s1[1], s2[30]); out[2] = final_add(s1[2], s2[29]); out[3] = final_add(s1[3], s2[28]); out[4] = final_add(s1[4], s1[27]); out[5] = final_add(s1[5], s1[26]); out[6] = final_add(s1[6], s1[25]); out[7] = final_add(s1[7], s1[24]); out[8] = final_add(s1[8], s2[23]); out[9] = final_add(s1[9], s2[22]); out[10] = final_add(s1[10], s1[21]); out[11] = final_add(s1[11], s1[20]); out[12] = final_add(s1[12], s2[19]); out[13] = final_add(s1[13], s2[18]); out[14] = final_add(s1[14], s1[17]); out[15] = final_add(s1[15], s1[16]); out[16] = final_sub(s1[15], s1[16]); out[17] = final_sub(s1[14], s1[17]); out[18] = final_sub(s1[13], s2[18]); out[19] = final_sub(s1[12], s2[19]); out[20] = final_sub(s1[11], s1[20]); out[21] = final_sub(s1[10], s1[21]); out[22] = final_sub(s1[9], s2[22]); out[23] = final_sub(s1[8], s2[23]); out[24] = final_sub(s1[7], s1[24]); out[25] = final_sub(s1[6], s1[25]); out[26] = final_sub(s1[5], s1[26]); out[27] = final_sub(s1[4], s1[27]); out[28] = final_sub(s1[3], s2[28]); out[29] = final_sub(s1[2], s2[29]); out[30] = final_sub(s1[1], s2[30]); out[31] = final_sub(s1[0], s2[31]); if (highbd_flag) { highbd_add_and_store_bd8(out, output, stride); } else { uint8_t *const outputT = (uint8_t *)output; add_and_store_u8_s16(out + 0, outputT, stride); add_and_store_u8_s16(out + 8, outputT + (8 * stride), stride); add_and_store_u8_s16(out + 16, outputT + (16 * stride), stride); add_and_store_u8_s16(out + 24, outputT + (24 * stride), stride); } } void vpx_idct32x32_34_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { int i; int16_t temp[32 * 8]; int16_t *t = temp; vpx_idct32_6_neon(input, t); for (i = 0; i < 32; i += 8) { vpx_idct32_8_neon(t, dest, stride, 0); t += (8 * 8); dest += 8; } } libvpx-1.8.2/vpx_dsp/arm/idct32x32_add_neon.c000066400000000000000000000652101357355204000207200ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" static INLINE void load_from_transformed(const int16_t *const trans_buf, const int first, const int second, int16x8_t *const q0, int16x8_t *const q1) { *q0 = vld1q_s16(trans_buf + first * 8); *q1 = vld1q_s16(trans_buf + second * 8); } static INLINE void load_from_output(const int16_t *const out, const int first, const int second, int16x8_t *const q0, int16x8_t *const q1) { *q0 = vld1q_s16(out + first * 32); *q1 = vld1q_s16(out + second * 32); } static INLINE void store_in_output(int16_t *const out, const int first, const int second, const int16x8_t q0, const int16x8_t q1) { vst1q_s16(out + first * 32, q0); vst1q_s16(out + second * 32, q1); } static INLINE void store_combine_results(uint8_t *p1, uint8_t *p2, const int stride, int16x8_t q0, int16x8_t q1, int16x8_t q2, int16x8_t q3) { uint8x8_t d[4]; d[0] = vld1_u8(p1); p1 += stride; d[1] = vld1_u8(p1); d[3] = vld1_u8(p2); p2 -= stride; d[2] = vld1_u8(p2); q0 = vrshrq_n_s16(q0, 6); q1 = vrshrq_n_s16(q1, 6); q2 = vrshrq_n_s16(q2, 6); q3 = vrshrq_n_s16(q3, 6); q0 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q0), d[0])); q1 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q1), d[1])); q2 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q2), d[2])); q3 = vreinterpretq_s16_u16(vaddw_u8(vreinterpretq_u16_s16(q3), d[3])); d[0] = vqmovun_s16(q0); d[1] = vqmovun_s16(q1); d[2] = vqmovun_s16(q2); d[3] = vqmovun_s16(q3); vst1_u8(p1, d[1]); p1 -= stride; vst1_u8(p1, d[0]); vst1_u8(p2, d[2]); p2 += stride; vst1_u8(p2, d[3]); } static INLINE void highbd_store_combine_results_bd8(uint16_t *p1, uint16_t *p2, const int stride, int16x8_t q0, int16x8_t q1, int16x8_t q2, int16x8_t q3) { uint16x8_t d[4]; d[0] = vld1q_u16(p1); p1 += stride; d[1] = vld1q_u16(p1); d[3] = vld1q_u16(p2); p2 -= stride; d[2] = vld1q_u16(p2); q0 = vrshrq_n_s16(q0, 6); q1 = vrshrq_n_s16(q1, 6); q2 = vrshrq_n_s16(q2, 6); q3 = vrshrq_n_s16(q3, 6); q0 = vaddq_s16(q0, vreinterpretq_s16_u16(d[0])); q1 = vaddq_s16(q1, vreinterpretq_s16_u16(d[1])); q2 = vaddq_s16(q2, vreinterpretq_s16_u16(d[2])); q3 = vaddq_s16(q3, vreinterpretq_s16_u16(d[3])); d[0] = vmovl_u8(vqmovun_s16(q0)); d[1] = vmovl_u8(vqmovun_s16(q1)); d[2] = vmovl_u8(vqmovun_s16(q2)); d[3] = vmovl_u8(vqmovun_s16(q3)); vst1q_u16(p1, d[1]); p1 -= stride; vst1q_u16(p1, d[0]); vst1q_u16(p2, d[2]); p2 += stride; vst1q_u16(p2, d[3]); } static INLINE void do_butterfly(const int16x8_t qIn0, const int16x8_t qIn1, const int16_t first_const, const int16_t second_const, int16x8_t *const qOut0, int16x8_t *const qOut1) { int32x4_t q[4]; int16x4_t d[6]; d[0] = vget_low_s16(qIn0); d[1] = vget_high_s16(qIn0); d[2] = vget_low_s16(qIn1); d[3] = vget_high_s16(qIn1); // Note: using v{mul, mla, mls}l_n_s16 here slows down 35% with gcc 4.9. d[4] = vdup_n_s16(first_const); d[5] = vdup_n_s16(second_const); q[0] = vmull_s16(d[0], d[4]); q[1] = vmull_s16(d[1], d[4]); q[0] = vmlsl_s16(q[0], d[2], d[5]); q[1] = vmlsl_s16(q[1], d[3], d[5]); q[2] = vmull_s16(d[0], d[5]); q[3] = vmull_s16(d[1], d[5]); q[2] = vmlal_s16(q[2], d[2], d[4]); q[3] = vmlal_s16(q[3], d[3], d[4]); *qOut0 = vcombine_s16(vrshrn_n_s32(q[0], DCT_CONST_BITS), vrshrn_n_s32(q[1], DCT_CONST_BITS)); *qOut1 = vcombine_s16(vrshrn_n_s32(q[2], DCT_CONST_BITS), vrshrn_n_s32(q[3], DCT_CONST_BITS)); } static INLINE void load_s16x8q(const int16_t *in, int16x8_t *const s0, int16x8_t *const s1, int16x8_t *const s2, int16x8_t *const s3, int16x8_t *const s4, int16x8_t *const s5, int16x8_t *const s6, int16x8_t *const s7) { *s0 = vld1q_s16(in); in += 32; *s1 = vld1q_s16(in); in += 32; *s2 = vld1q_s16(in); in += 32; *s3 = vld1q_s16(in); in += 32; *s4 = vld1q_s16(in); in += 32; *s5 = vld1q_s16(in); in += 32; *s6 = vld1q_s16(in); in += 32; *s7 = vld1q_s16(in); } static INLINE void transpose_and_store_s16_8x8(int16x8_t a0, int16x8_t a1, int16x8_t a2, int16x8_t a3, int16x8_t a4, int16x8_t a5, int16x8_t a6, int16x8_t a7, int16_t **out) { transpose_s16_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); vst1q_s16(*out, a0); *out += 8; vst1q_s16(*out, a1); *out += 8; vst1q_s16(*out, a2); *out += 8; vst1q_s16(*out, a3); *out += 8; vst1q_s16(*out, a4); *out += 8; vst1q_s16(*out, a5); *out += 8; vst1q_s16(*out, a6); *out += 8; vst1q_s16(*out, a7); *out += 8; } static INLINE void idct32_transpose_pair(const int16_t *input, int16_t *t_buf) { int i; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; for (i = 0; i < 4; i++, input += 8) { load_s16x8q(input, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_and_store_s16_8x8(s0, s1, s2, s3, s4, s5, s6, s7, &t_buf); } } #if CONFIG_VP9_HIGHBITDEPTH static INLINE void load_s16x8q_tran_low( const tran_low_t *in, int16x8_t *const s0, int16x8_t *const s1, int16x8_t *const s2, int16x8_t *const s3, int16x8_t *const s4, int16x8_t *const s5, int16x8_t *const s6, int16x8_t *const s7) { *s0 = load_tran_low_to_s16q(in); in += 32; *s1 = load_tran_low_to_s16q(in); in += 32; *s2 = load_tran_low_to_s16q(in); in += 32; *s3 = load_tran_low_to_s16q(in); in += 32; *s4 = load_tran_low_to_s16q(in); in += 32; *s5 = load_tran_low_to_s16q(in); in += 32; *s6 = load_tran_low_to_s16q(in); in += 32; *s7 = load_tran_low_to_s16q(in); } static INLINE void idct32_transpose_pair_tran_low(const tran_low_t *input, int16_t *t_buf) { int i; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7; for (i = 0; i < 4; i++, input += 8) { load_s16x8q_tran_low(input, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_and_store_s16_8x8(s0, s1, s2, s3, s4, s5, s6, s7, &t_buf); } } #else // !CONFIG_VP9_HIGHBITDEPTH #define idct32_transpose_pair_tran_low idct32_transpose_pair #endif // CONFIG_VP9_HIGHBITDEPTH static INLINE void idct32_bands_end_1st_pass(int16_t *const out, int16x8_t *const q) { store_in_output(out, 16, 17, q[6], q[7]); store_in_output(out, 14, 15, q[8], q[9]); load_from_output(out, 30, 31, &q[0], &q[1]); q[4] = vaddq_s16(q[2], q[1]); q[5] = vaddq_s16(q[3], q[0]); q[6] = vsubq_s16(q[3], q[0]); q[7] = vsubq_s16(q[2], q[1]); store_in_output(out, 30, 31, q[6], q[7]); store_in_output(out, 0, 1, q[4], q[5]); load_from_output(out, 12, 13, &q[0], &q[1]); q[2] = vaddq_s16(q[10], q[1]); q[3] = vaddq_s16(q[11], q[0]); q[4] = vsubq_s16(q[11], q[0]); q[5] = vsubq_s16(q[10], q[1]); load_from_output(out, 18, 19, &q[0], &q[1]); q[8] = vaddq_s16(q[4], q[1]); q[9] = vaddq_s16(q[5], q[0]); q[6] = vsubq_s16(q[5], q[0]); q[7] = vsubq_s16(q[4], q[1]); store_in_output(out, 18, 19, q[6], q[7]); store_in_output(out, 12, 13, q[8], q[9]); load_from_output(out, 28, 29, &q[0], &q[1]); q[4] = vaddq_s16(q[2], q[1]); q[5] = vaddq_s16(q[3], q[0]); q[6] = vsubq_s16(q[3], q[0]); q[7] = vsubq_s16(q[2], q[1]); store_in_output(out, 28, 29, q[6], q[7]); store_in_output(out, 2, 3, q[4], q[5]); load_from_output(out, 10, 11, &q[0], &q[1]); q[2] = vaddq_s16(q[12], q[1]); q[3] = vaddq_s16(q[13], q[0]); q[4] = vsubq_s16(q[13], q[0]); q[5] = vsubq_s16(q[12], q[1]); load_from_output(out, 20, 21, &q[0], &q[1]); q[8] = vaddq_s16(q[4], q[1]); q[9] = vaddq_s16(q[5], q[0]); q[6] = vsubq_s16(q[5], q[0]); q[7] = vsubq_s16(q[4], q[1]); store_in_output(out, 20, 21, q[6], q[7]); store_in_output(out, 10, 11, q[8], q[9]); load_from_output(out, 26, 27, &q[0], &q[1]); q[4] = vaddq_s16(q[2], q[1]); q[5] = vaddq_s16(q[3], q[0]); q[6] = vsubq_s16(q[3], q[0]); q[7] = vsubq_s16(q[2], q[1]); store_in_output(out, 26, 27, q[6], q[7]); store_in_output(out, 4, 5, q[4], q[5]); load_from_output(out, 8, 9, &q[0], &q[1]); q[2] = vaddq_s16(q[14], q[1]); q[3] = vaddq_s16(q[15], q[0]); q[4] = vsubq_s16(q[15], q[0]); q[5] = vsubq_s16(q[14], q[1]); load_from_output(out, 22, 23, &q[0], &q[1]); q[8] = vaddq_s16(q[4], q[1]); q[9] = vaddq_s16(q[5], q[0]); q[6] = vsubq_s16(q[5], q[0]); q[7] = vsubq_s16(q[4], q[1]); store_in_output(out, 22, 23, q[6], q[7]); store_in_output(out, 8, 9, q[8], q[9]); load_from_output(out, 24, 25, &q[0], &q[1]); q[4] = vaddq_s16(q[2], q[1]); q[5] = vaddq_s16(q[3], q[0]); q[6] = vsubq_s16(q[3], q[0]); q[7] = vsubq_s16(q[2], q[1]); store_in_output(out, 24, 25, q[6], q[7]); store_in_output(out, 6, 7, q[4], q[5]); } static INLINE void idct32_bands_end_2nd_pass(const int16_t *const out, uint8_t *const dest, const int stride, int16x8_t *const q) { uint8_t *dest0 = dest + 0 * stride; uint8_t *dest1 = dest + 31 * stride; uint8_t *dest2 = dest + 16 * stride; uint8_t *dest3 = dest + 15 * stride; const int str2 = stride << 1; store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 30, 31, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 12, 13, &q[0], &q[1]); q[2] = vaddq_s16(q[10], q[1]); q[3] = vaddq_s16(q[11], q[0]); q[4] = vsubq_s16(q[11], q[0]); q[5] = vsubq_s16(q[10], q[1]); load_from_output(out, 18, 19, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 28, 29, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 10, 11, &q[0], &q[1]); q[2] = vaddq_s16(q[12], q[1]); q[3] = vaddq_s16(q[13], q[0]); q[4] = vsubq_s16(q[13], q[0]); q[5] = vsubq_s16(q[12], q[1]); load_from_output(out, 20, 21, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 26, 27, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 8, 9, &q[0], &q[1]); q[2] = vaddq_s16(q[14], q[1]); q[3] = vaddq_s16(q[15], q[0]); q[4] = vsubq_s16(q[15], q[0]); q[5] = vsubq_s16(q[14], q[1]); load_from_output(out, 22, 23, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); store_combine_results(dest2, dest3, stride, q[6], q[7], q[8], q[9]); load_from_output(out, 24, 25, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); store_combine_results(dest0, dest1, stride, q[4], q[5], q[6], q[7]); } static INLINE void highbd_idct32_bands_end_2nd_pass_bd8( const int16_t *const out, uint16_t *const dest, const int stride, int16x8_t *const q) { uint16_t *dest0 = dest + 0 * stride; uint16_t *dest1 = dest + 31 * stride; uint16_t *dest2 = dest + 16 * stride; uint16_t *dest3 = dest + 15 * stride; const int str2 = stride << 1; highbd_store_combine_results_bd8(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 30, 31, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); highbd_store_combine_results_bd8(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 12, 13, &q[0], &q[1]); q[2] = vaddq_s16(q[10], q[1]); q[3] = vaddq_s16(q[11], q[0]); q[4] = vsubq_s16(q[11], q[0]); q[5] = vsubq_s16(q[10], q[1]); load_from_output(out, 18, 19, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); highbd_store_combine_results_bd8(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 28, 29, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); highbd_store_combine_results_bd8(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 10, 11, &q[0], &q[1]); q[2] = vaddq_s16(q[12], q[1]); q[3] = vaddq_s16(q[13], q[0]); q[4] = vsubq_s16(q[13], q[0]); q[5] = vsubq_s16(q[12], q[1]); load_from_output(out, 20, 21, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); highbd_store_combine_results_bd8(dest2, dest3, stride, q[6], q[7], q[8], q[9]); dest2 += str2; dest3 -= str2; load_from_output(out, 26, 27, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); highbd_store_combine_results_bd8(dest0, dest1, stride, q[4], q[5], q[6], q[7]); dest0 += str2; dest1 -= str2; load_from_output(out, 8, 9, &q[0], &q[1]); q[2] = vaddq_s16(q[14], q[1]); q[3] = vaddq_s16(q[15], q[0]); q[4] = vsubq_s16(q[15], q[0]); q[5] = vsubq_s16(q[14], q[1]); load_from_output(out, 22, 23, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); highbd_store_combine_results_bd8(dest2, dest3, stride, q[6], q[7], q[8], q[9]); load_from_output(out, 24, 25, &q[0], &q[1]); q[4] = final_add(q[2], q[1]); q[5] = final_add(q[3], q[0]); q[6] = final_sub(q[3], q[0]); q[7] = final_sub(q[2], q[1]); highbd_store_combine_results_bd8(dest0, dest1, stride, q[4], q[5], q[6], q[7]); } void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest, const int stride, const int highbd_flag) { int i, idct32_pass_loop; int16_t trans_buf[32 * 8]; int16_t pass1[32 * 32]; int16_t pass2[32 * 32]; const int16_t *input_pass2 = pass1; // input of pass2 is the result of pass1 int16_t *out; int16x8_t q[16]; uint16_t *dst = CAST_TO_SHORTPTR(dest); for (idct32_pass_loop = 0, out = pass1; idct32_pass_loop < 2; idct32_pass_loop++, out = pass2) { for (i = 0; i < 4; i++, out += 8) { // idct32_bands_loop if (idct32_pass_loop == 0) { idct32_transpose_pair_tran_low(input, trans_buf); input += 32 * 8; } else { idct32_transpose_pair(input_pass2, trans_buf); input_pass2 += 32 * 8; } // ----------------------------------------- // BLOCK A: 16-19,28-31 // ----------------------------------------- // generate 16,17,30,31 // part of stage 1 load_from_transformed(trans_buf, 1, 31, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_31_64, cospi_1_64, &q[0], &q[2]); load_from_transformed(trans_buf, 17, 15, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_15_64, cospi_17_64, &q[1], &q[3]); // part of stage 2 q[4] = vaddq_s16(q[0], q[1]); q[13] = vsubq_s16(q[0], q[1]); q[6] = vaddq_s16(q[2], q[3]); q[14] = vsubq_s16(q[2], q[3]); // part of stage 3 do_butterfly(q[14], q[13], cospi_28_64, cospi_4_64, &q[5], &q[7]); // generate 18,19,28,29 // part of stage 1 load_from_transformed(trans_buf, 9, 23, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_23_64, cospi_9_64, &q[0], &q[2]); load_from_transformed(trans_buf, 25, 7, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_7_64, cospi_25_64, &q[1], &q[3]); // part of stage 2 q[13] = vsubq_s16(q[3], q[2]); q[3] = vaddq_s16(q[3], q[2]); q[14] = vsubq_s16(q[1], q[0]); q[2] = vaddq_s16(q[1], q[0]); // part of stage 3 do_butterfly(q[14], q[13], -cospi_4_64, -cospi_28_64, &q[1], &q[0]); // part of stage 4 q[8] = vaddq_s16(q[4], q[2]); q[9] = vaddq_s16(q[5], q[0]); q[10] = vaddq_s16(q[7], q[1]); q[15] = vaddq_s16(q[6], q[3]); q[13] = vsubq_s16(q[5], q[0]); q[14] = vsubq_s16(q[7], q[1]); store_in_output(out, 16, 31, q[8], q[15]); store_in_output(out, 17, 30, q[9], q[10]); // part of stage 5 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[0], &q[1]); store_in_output(out, 29, 18, q[1], q[0]); // part of stage 4 q[13] = vsubq_s16(q[4], q[2]); q[14] = vsubq_s16(q[6], q[3]); // part of stage 5 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[4], &q[6]); store_in_output(out, 19, 28, q[4], q[6]); // ----------------------------------------- // BLOCK B: 20-23,24-27 // ----------------------------------------- // generate 20,21,26,27 // part of stage 1 load_from_transformed(trans_buf, 5, 27, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_27_64, cospi_5_64, &q[0], &q[2]); load_from_transformed(trans_buf, 21, 11, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_11_64, cospi_21_64, &q[1], &q[3]); // part of stage 2 q[13] = vsubq_s16(q[0], q[1]); q[0] = vaddq_s16(q[0], q[1]); q[14] = vsubq_s16(q[2], q[3]); q[2] = vaddq_s16(q[2], q[3]); // part of stage 3 do_butterfly(q[14], q[13], cospi_12_64, cospi_20_64, &q[1], &q[3]); // generate 22,23,24,25 // part of stage 1 load_from_transformed(trans_buf, 13, 19, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_19_64, cospi_13_64, &q[5], &q[7]); load_from_transformed(trans_buf, 29, 3, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_3_64, cospi_29_64, &q[4], &q[6]); // part of stage 2 q[14] = vsubq_s16(q[4], q[5]); q[5] = vaddq_s16(q[4], q[5]); q[13] = vsubq_s16(q[6], q[7]); q[6] = vaddq_s16(q[6], q[7]); // part of stage 3 do_butterfly(q[14], q[13], -cospi_20_64, -cospi_12_64, &q[4], &q[7]); // part of stage 4 q[10] = vaddq_s16(q[7], q[1]); q[11] = vaddq_s16(q[5], q[0]); q[12] = vaddq_s16(q[6], q[2]); q[15] = vaddq_s16(q[4], q[3]); // part of stage 6 load_from_output(out, 16, 17, &q[14], &q[13]); q[8] = vaddq_s16(q[14], q[11]); q[9] = vaddq_s16(q[13], q[10]); q[13] = vsubq_s16(q[13], q[10]); q[11] = vsubq_s16(q[14], q[11]); store_in_output(out, 17, 16, q[9], q[8]); load_from_output(out, 30, 31, &q[14], &q[9]); q[8] = vsubq_s16(q[9], q[12]); q[10] = vaddq_s16(q[14], q[15]); q[14] = vsubq_s16(q[14], q[15]); q[12] = vaddq_s16(q[9], q[12]); store_in_output(out, 30, 31, q[10], q[12]); // part of stage 7 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 25, 22, q[14], q[13]); do_butterfly(q[8], q[11], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 24, 23, q[14], q[13]); // part of stage 4 q[14] = vsubq_s16(q[5], q[0]); q[13] = vsubq_s16(q[6], q[2]); do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[5], &q[6]); q[14] = vsubq_s16(q[7], q[1]); q[13] = vsubq_s16(q[4], q[3]); do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[0], &q[1]); // part of stage 6 load_from_output(out, 18, 19, &q[14], &q[13]); q[8] = vaddq_s16(q[14], q[1]); q[9] = vaddq_s16(q[13], q[6]); q[13] = vsubq_s16(q[13], q[6]); q[1] = vsubq_s16(q[14], q[1]); store_in_output(out, 18, 19, q[8], q[9]); load_from_output(out, 28, 29, &q[8], &q[9]); q[14] = vsubq_s16(q[8], q[5]); q[10] = vaddq_s16(q[8], q[5]); q[11] = vaddq_s16(q[9], q[0]); q[0] = vsubq_s16(q[9], q[0]); store_in_output(out, 28, 29, q[10], q[11]); // part of stage 7 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[13], &q[14]); store_in_output(out, 20, 27, q[13], q[14]); do_butterfly(q[0], q[1], cospi_16_64, cospi_16_64, &q[1], &q[0]); store_in_output(out, 21, 26, q[1], q[0]); // ----------------------------------------- // BLOCK C: 8-10,11-15 // ----------------------------------------- // generate 8,9,14,15 // part of stage 2 load_from_transformed(trans_buf, 2, 30, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_30_64, cospi_2_64, &q[0], &q[2]); load_from_transformed(trans_buf, 18, 14, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_14_64, cospi_18_64, &q[1], &q[3]); // part of stage 3 q[13] = vsubq_s16(q[0], q[1]); q[0] = vaddq_s16(q[0], q[1]); q[14] = vsubq_s16(q[2], q[3]); q[2] = vaddq_s16(q[2], q[3]); // part of stage 4 do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[1], &q[3]); // generate 10,11,12,13 // part of stage 2 load_from_transformed(trans_buf, 10, 22, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_22_64, cospi_10_64, &q[5], &q[7]); load_from_transformed(trans_buf, 26, 6, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_6_64, cospi_26_64, &q[4], &q[6]); // part of stage 3 q[14] = vsubq_s16(q[4], q[5]); q[5] = vaddq_s16(q[4], q[5]); q[13] = vsubq_s16(q[6], q[7]); q[6] = vaddq_s16(q[6], q[7]); // part of stage 4 do_butterfly(q[14], q[13], -cospi_8_64, -cospi_24_64, &q[4], &q[7]); // part of stage 5 q[8] = vaddq_s16(q[0], q[5]); q[9] = vaddq_s16(q[1], q[7]); q[13] = vsubq_s16(q[1], q[7]); q[14] = vsubq_s16(q[3], q[4]); q[10] = vaddq_s16(q[3], q[4]); q[15] = vaddq_s16(q[2], q[6]); store_in_output(out, 8, 15, q[8], q[15]); store_in_output(out, 9, 14, q[9], q[10]); // part of stage 6 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); store_in_output(out, 13, 10, q[3], q[1]); q[13] = vsubq_s16(q[0], q[5]); q[14] = vsubq_s16(q[2], q[6]); do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); store_in_output(out, 11, 12, q[1], q[3]); // ----------------------------------------- // BLOCK D: 0-3,4-7 // ----------------------------------------- // generate 4,5,6,7 // part of stage 3 load_from_transformed(trans_buf, 4, 28, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_28_64, cospi_4_64, &q[0], &q[2]); load_from_transformed(trans_buf, 20, 12, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_12_64, cospi_20_64, &q[1], &q[3]); // part of stage 4 q[13] = vsubq_s16(q[0], q[1]); q[0] = vaddq_s16(q[0], q[1]); q[14] = vsubq_s16(q[2], q[3]); q[2] = vaddq_s16(q[2], q[3]); // part of stage 5 do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[1], &q[3]); // generate 0,1,2,3 // part of stage 4 load_from_transformed(trans_buf, 0, 16, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_16_64, cospi_16_64, &q[5], &q[7]); load_from_transformed(trans_buf, 8, 24, &q[14], &q[13]); do_butterfly(q[14], q[13], cospi_24_64, cospi_8_64, &q[14], &q[6]); // part of stage 5 q[4] = vaddq_s16(q[7], q[6]); q[7] = vsubq_s16(q[7], q[6]); q[6] = vsubq_s16(q[5], q[14]); q[5] = vaddq_s16(q[5], q[14]); // part of stage 6 q[8] = vaddq_s16(q[4], q[2]); q[9] = vaddq_s16(q[5], q[3]); q[10] = vaddq_s16(q[6], q[1]); q[11] = vaddq_s16(q[7], q[0]); q[12] = vsubq_s16(q[7], q[0]); q[13] = vsubq_s16(q[6], q[1]); q[14] = vsubq_s16(q[5], q[3]); q[15] = vsubq_s16(q[4], q[2]); // part of stage 7 load_from_output(out, 14, 15, &q[0], &q[1]); q[2] = vaddq_s16(q[8], q[1]); q[3] = vaddq_s16(q[9], q[0]); q[4] = vsubq_s16(q[9], q[0]); q[5] = vsubq_s16(q[8], q[1]); load_from_output(out, 16, 17, &q[0], &q[1]); q[8] = final_add(q[4], q[1]); q[9] = final_add(q[5], q[0]); q[6] = final_sub(q[5], q[0]); q[7] = final_sub(q[4], q[1]); if (idct32_pass_loop == 0) { idct32_bands_end_1st_pass(out, q); } else { if (highbd_flag) { highbd_idct32_bands_end_2nd_pass_bd8(out, dst, stride, q); dst += 8; } else { idct32_bands_end_2nd_pass(out, dest, stride, q); dest += 8; } } } } } void vpx_idct32x32_1024_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { vpx_idct32_32_neon(input, dest, stride, 0); } libvpx-1.8.2/vpx_dsp/arm/idct4x4_1_add_neon.asm000066400000000000000000000040751357355204000213360ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license and patent ; grant that can be found in the LICENSE file in the root of the source ; tree. All contributing project authors may be found in the AUTHORS ; file in the root of the source tree. ; EXPORT |vpx_idct4x4_1_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 ;void vpx_idct4x4_1_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int stride) |vpx_idct4x4_1_add_neon| PROC ldrsh r0, [r0] ; cospi_16_64 = 11585 movw r12, #0x2d41 ; out = dct_const_round_shift(input[0] * cospi_16_64) mul r0, r0, r12 ; input[0] * cospi_16_64 add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) asr r0, r0, #14 ; >> DCT_CONST_BITS ; out = dct_const_round_shift(out * cospi_16_64) mul r0, r0, r12 ; out * cospi_16_64 mov r12, r1 ; save dest add r0, r0, #0x2000 ; +(1 << ((DCT_CONST_BITS) - 1)) asr r0, r0, #14 ; >> DCT_CONST_BITS ; a1 = ROUND_POWER_OF_TWO(out, 4) add r0, r0, #8 ; + (1 <<((4) - 1)) asr r0, r0, #4 ; >> 4 vdup.s16 q0, r0 ; duplicate a1 vld1.32 {d2[0]}, [r1], r2 vld1.32 {d2[1]}, [r1], r2 vld1.32 {d4[0]}, [r1], r2 vld1.32 {d4[1]}, [r1] vaddw.u8 q8, q0, d2 ; dest[x] + a1 vaddw.u8 q9, q0, d4 vqmovun.s16 d6, q8 ; clip_pixel vqmovun.s16 d7, q9 vst1.32 {d6[0]}, [r12], r2 vst1.32 {d6[1]}, [r12], r2 vst1.32 {d7[0]}, [r12], r2 vst1.32 {d7[1]}, [r12] bx lr ENDP ; |vpx_idct4x4_1_add_neon| END libvpx-1.8.2/vpx_dsp/arm/idct4x4_1_add_neon.c000066400000000000000000000033441357355204000207760ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/inv_txfm.h" static INLINE void idct4x4_1_add_kernel(uint8_t **dest, const int stride, const int16x8_t res, uint32x2_t *const d) { uint16x8_t a; uint8x8_t b; *d = vld1_lane_u32((const uint32_t *)*dest, *d, 0); *d = vld1_lane_u32((const uint32_t *)(*dest + stride), *d, 1); a = vaddw_u8(vreinterpretq_u16_s16(res), vreinterpret_u8_u32(*d)); b = vqmovun_s16(vreinterpretq_s16_u16(a)); vst1_lane_u32((uint32_t *)*dest, vreinterpret_u32_u8(b), 0); *dest += stride; vst1_lane_u32((uint32_t *)*dest, vreinterpret_u32_u8(b), 1); *dest += stride; } void vpx_idct4x4_1_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16_t out0 = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 4); const int16x8_t dc = vdupq_n_s16(a1); uint32x2_t d = vdup_n_u32(0); assert(!((intptr_t)dest % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); idct4x4_1_add_kernel(&dest, stride, dc, &d); idct4x4_1_add_kernel(&dest, stride, dc, &d); } libvpx-1.8.2/vpx_dsp/arm/idct4x4_add_neon.asm000066400000000000000000000126051357355204000211140ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_idct4x4_16_add_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 INCLUDE vpx_dsp/arm/idct_neon.asm.S AREA Block, CODE, READONLY ; name this block of code ;void vpx_idct4x4_16_add_neon(int16_t *input, uint8_t *dest, int stride) ; ; r0 int16_t input ; r1 uint8_t *dest ; r2 int stride) |vpx_idct4x4_16_add_neon| PROC ; The 2D transform is done with two passes which are actually pretty ; similar. We first transform the rows. This is done by transposing ; the inputs, doing an SIMD column transform (the columns are the ; transposed rows) and then transpose the results (so that it goes back ; in normal/row positions). Then, we transform the columns by doing ; another SIMD column transform. ; So, two passes of a transpose followed by a column transform. ; load the inputs into q8-q9, d16-d19 LOAD_TRAN_LOW_TO_S16 d16, d17, d18, d19, r0 ; generate scalar constants ; cospi_8_64 = 15137 movw r0, #0x3b21 ; cospi_16_64 = 11585 movw r3, #0x2d41 ; cospi_24_64 = 6270 movw r12, #0x187e ; transpose the input data ; 00 01 02 03 d16 ; 10 11 12 13 d17 ; 20 21 22 23 d18 ; 30 31 32 33 d19 vtrn.16 d16, d17 vtrn.16 d18, d19 ; generate constant vectors vdup.16 d20, r0 ; replicate cospi_8_64 vdup.16 d21, r3 ; replicate cospi_16_64 ; 00 10 02 12 d16 ; 01 11 03 13 d17 ; 20 30 22 32 d18 ; 21 31 23 33 d19 vtrn.32 q8, q9 ; 00 10 20 30 d16 ; 01 11 21 31 d17 ; 02 12 22 32 d18 ; 03 13 23 33 d19 vdup.16 d22, r12 ; replicate cospi_24_64 ; do the transform on transposed rows ; stage 1 vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64 vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64 ; (input[0] + input[2]) * cospi_16_64; ; (input[0] - input[2]) * cospi_16_64; vmull.s16 q8, d16, d21 vmull.s16 q14, d18, d21 vadd.s32 q13, q8, q14 vsub.s32 q14, q8, q14 ; input[1] * cospi_24_64 - input[3] * cospi_8_64; ; input[1] * cospi_8_64 + input[3] * cospi_24_64; vmlsl.s16 q15, d19, d20 vmlal.s16 q1, d19, d22 ; dct_const_round_shift vrshrn.s32 d26, q13, #14 vrshrn.s32 d27, q14, #14 vrshrn.s32 d29, q15, #14 vrshrn.s32 d28, q1, #14 ; stage 2 ; output[0] = step[0] + step[3]; ; output[1] = step[1] + step[2]; ; output[3] = step[0] - step[3]; ; output[2] = step[1] - step[2]; vadd.s16 q8, q13, q14 vsub.s16 q9, q13, q14 vswp d18, d19 ; transpose the results ; 00 01 02 03 d16 ; 10 11 12 13 d17 ; 20 21 22 23 d18 ; 30 31 32 33 d19 vtrn.16 d16, d17 vtrn.16 d18, d19 ; 00 10 02 12 d16 ; 01 11 03 13 d17 ; 20 30 22 32 d18 ; 21 31 23 33 d19 vtrn.32 q8, q9 ; 00 10 20 30 d16 ; 01 11 21 31 d17 ; 02 12 22 32 d18 ; 03 13 23 33 d19 ; do the transform on columns ; stage 1 vadd.s16 d23, d16, d18 ; (input[0] + input[2]) vsub.s16 d24, d16, d18 ; (input[0] - input[2]) vmull.s16 q15, d17, d22 ; input[1] * cospi_24_64 vmull.s16 q1, d17, d20 ; input[1] * cospi_8_64 ; (input[0] + input[2]) * cospi_16_64; ; (input[0] - input[2]) * cospi_16_64; vmull.s16 q13, d23, d21 vmull.s16 q14, d24, d21 ; input[1] * cospi_24_64 - input[3] * cospi_8_64; ; input[1] * cospi_8_64 + input[3] * cospi_24_64; vmlsl.s16 q15, d19, d20 vmlal.s16 q1, d19, d22 ; dct_const_round_shift vrshrn.s32 d26, q13, #14 vrshrn.s32 d27, q14, #14 vrshrn.s32 d29, q15, #14 vrshrn.s32 d28, q1, #14 ; stage 2 ; output[0] = step[0] + step[3]; ; output[1] = step[1] + step[2]; ; output[3] = step[0] - step[3]; ; output[2] = step[1] - step[2]; vadd.s16 q8, q13, q14 vsub.s16 q9, q13, q14 ; The results are in two registers, one of them being swapped. This will ; be taken care of by loading the 'dest' value in a swapped fashion and ; also storing them in the same swapped fashion. ; temp_out[0, 1] = d16, d17 = q8 ; temp_out[2, 3] = d19, d18 = q9 swapped ; ROUND_POWER_OF_TWO(temp_out[j], 4) vrshr.s16 q8, q8, #4 vrshr.s16 q9, q9, #4 vld1.32 {d26[0]}, [r1], r2 vld1.32 {d26[1]}, [r1], r2 vld1.32 {d27[1]}, [r1], r2 vld1.32 {d27[0]}, [r1] ; no post-increment ; ROUND_POWER_OF_TWO(temp_out[j], 4) + dest[j * stride + i] vaddw.u8 q8, q8, d26 vaddw.u8 q9, q9, d27 ; clip_pixel vqmovun.s16 d26, q8 vqmovun.s16 d27, q9 ; do the stores in reverse order with negative post-increment, by changing ; the sign of the stride rsb r2, r2, #0 vst1.32 {d27[0]}, [r1], r2 vst1.32 {d27[1]}, [r1], r2 vst1.32 {d26[1]}, [r1], r2 vst1.32 {d26[0]}, [r1] ; no post-increment bx lr ENDP ; |vpx_idct4x4_16_add_neon| END libvpx-1.8.2/vpx_dsp/arm/idct4x4_add_neon.c000066400000000000000000000035751357355204000205640ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/txfm_common.h" void vpx_idct4x4_16_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const uint8_t *dst = dest; uint32x2_t s32 = vdup_n_u32(0); int16x8_t a[2]; uint8x8_t s, d[2]; uint16x8_t sum[2]; assert(!((intptr_t)dest % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); // Rows a[0] = load_tran_low_to_s16q(input); a[1] = load_tran_low_to_s16q(input + 8); transpose_idct4x4_16_bd8(a); // Columns a[1] = vcombine_s16(vget_high_s16(a[1]), vget_low_s16(a[1])); transpose_idct4x4_16_bd8(a); a[0] = vrshrq_n_s16(a[0], 4); a[1] = vrshrq_n_s16(a[1], 4); s = load_u8(dst, stride); dst += 2 * stride; // The elements are loaded in reverse order. s32 = vld1_lane_u32((const uint32_t *)dst, s32, 1); dst += stride; s32 = vld1_lane_u32((const uint32_t *)dst, s32, 0); sum[0] = vaddw_u8(vreinterpretq_u16_s16(a[0]), s); sum[1] = vaddw_u8(vreinterpretq_u16_s16(a[1]), vreinterpret_u8_u32(s32)); d[0] = vqmovun_s16(vreinterpretq_s16_u16(sum[0])); d[1] = vqmovun_s16(vreinterpretq_s16_u16(sum[1])); store_u8(dest, stride, d[0]); dest += 2 * stride; // The elements are stored in reverse order. vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 1); dest += stride; vst1_lane_u32((uint32_t *)dest, vreinterpret_u32_u8(d[1]), 0); } libvpx-1.8.2/vpx_dsp/arm/idct8x8_1_add_neon.c000066400000000000000000000045051357355204000210060ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/inv_txfm.h" static INLINE uint8x8_t create_dcd(const int16_t dc) { int16x8_t t = vdupq_n_s16(dc); return vqmovun_s16(t); } static INLINE void idct8x8_1_add_pos_kernel(uint8_t **dest, const int stride, const uint8x8_t res) { const uint8x8_t a = vld1_u8(*dest); const uint8x8_t b = vqadd_u8(a, res); vst1_u8(*dest, b); *dest += stride; } static INLINE void idct8x8_1_add_neg_kernel(uint8_t **dest, const int stride, const uint8x8_t res) { const uint8x8_t a = vld1_u8(*dest); const uint8x8_t b = vqsub_u8(a, res); vst1_u8(*dest, b); *dest += stride; } void vpx_idct8x8_1_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16_t out0 = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); const int16_t out1 = WRAPLOW(dct_const_round_shift(out0 * cospi_16_64)); const int16_t a1 = ROUND_POWER_OF_TWO(out1, 5); if (a1 >= 0) { const uint8x8_t dc = create_dcd(a1); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); idct8x8_1_add_pos_kernel(&dest, stride, dc); } else { const uint8x8_t dc = create_dcd(-a1); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); idct8x8_1_add_neg_kernel(&dest, stride, dc); } } libvpx-1.8.2/vpx_dsp/arm/idct8x8_add_neon.c000066400000000000000000000043031357355204000205620ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/idct_neon.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" void vpx_idct8x8_64_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospis1 = vget_high_s16(cospis); // cospi 4, 12, 20, 28 int16x8_t a[8]; a[0] = load_tran_low_to_s16q(input); a[1] = load_tran_low_to_s16q(input + 8); a[2] = load_tran_low_to_s16q(input + 16); a[3] = load_tran_low_to_s16q(input + 24); a[4] = load_tran_low_to_s16q(input + 32); a[5] = load_tran_low_to_s16q(input + 40); a[6] = load_tran_low_to_s16q(input + 48); a[7] = load_tran_low_to_s16q(input + 56); idct8x8_64_1d_bd8(cospis0, cospis1, a); idct8x8_64_1d_bd8(cospis0, cospis1, a); idct8x8_add8x8_neon(a, dest, stride); } void vpx_idct8x8_12_add_neon(const tran_low_t *input, uint8_t *dest, int stride) { const int16x8_t cospis = vld1q_s16(kCospi); const int16x8_t cospisd = vaddq_s16(cospis, cospis); const int16x4_t cospis0 = vget_low_s16(cospis); // cospi 0, 8, 16, 24 const int16x4_t cospisd0 = vget_low_s16(cospisd); // doubled 0, 8, 16, 24 const int16x4_t cospisd1 = vget_high_s16(cospisd); // doubled 4, 12, 20, 28 int16x4_t a[8]; int16x8_t b[8]; a[0] = load_tran_low_to_s16d(input); a[1] = load_tran_low_to_s16d(input + 8); a[2] = load_tran_low_to_s16d(input + 16); a[3] = load_tran_low_to_s16d(input + 24); idct8x8_12_pass1_bd8(cospis0, cospisd0, cospisd1, a); idct8x8_12_pass2_bd8(cospis0, cospisd0, cospisd1, a, b); idct8x8_add8x8_neon(b, dest, stride); } libvpx-1.8.2/vpx_dsp/arm/idct_neon.asm000066400000000000000000000027571357355204000177530ustar00rootroot00000000000000; ; Copyright (c) 2016 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; INCLUDE ./vpx_config.asm ; Helper functions used to load tran_low_t into int16, narrowing if ; necessary. ; $dst0..3 are d registers with the pairs assumed to be contiguous in ; non-high-bitdepth builds. q0-q3 are used as temporaries in high-bitdepth. MACRO LOAD_TRAN_LOW_TO_S16 $dst0, $dst1, $dst2, $dst3, $src IF CONFIG_VP9_HIGHBITDEPTH vld1.s32 {q0,q1}, [$src]! vld1.s32 {q2,q3}, [$src]! vmovn.i32 $dst0, q0 vmovn.i32 $dst1, q1 vmovn.i32 $dst2, q2 vmovn.i32 $dst3, q3 ELSE vld1.s16 {$dst0-$dst1,$dst2-$dst3}, [$src]! ENDIF MEND ; $dst0..3 are d registers. q0-q3 are used as temporaries in high-bitdepth. MACRO LOAD_TRAN_LOW_TO_S16X2 $dst0, $dst1, $dst2, $dst3, $src IF CONFIG_VP9_HIGHBITDEPTH vld2.s32 {q0,q1}, [$src]! vld2.s32 {q2,q3}, [$src]! vmovn.i32 $dst0, q0 vmovn.i32 $dst1, q2 vmovn.i32 $dst2, q1 vmovn.i32 $dst3, q3 ELSE vld2.s16 {$dst0,$dst1,$dst2,$dst3}, [$src]! ENDIF MEND END libvpx-1.8.2/vpx_dsp/arm/idct_neon.h000066400000000000000000001123531357355204000174140ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_IDCT_NEON_H_ #define VPX_VPX_DSP_ARM_IDCT_NEON_H_ #include #include "./vpx_config.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/vpx_dsp_common.h" static const int16_t kCospi[16] = { 16384 /* cospi_0_64 */, 15137 /* cospi_8_64 */, 11585 /* cospi_16_64 */, 6270 /* cospi_24_64 */, 16069 /* cospi_4_64 */, 13623 /* cospi_12_64 */, -9102 /* -cospi_20_64 */, 3196 /* cospi_28_64 */, 16305 /* cospi_2_64 */, 1606 /* cospi_30_64 */, 14449 /* cospi_10_64 */, 7723 /* cospi_22_64 */, 15679 /* cospi_6_64 */, -4756 /* -cospi_26_64 */, 12665 /* cospi_14_64 */, -10394 /* -cospi_18_64 */ }; static const int32_t kCospi32[16] = { 16384 /* cospi_0_64 */, 15137 /* cospi_8_64 */, 11585 /* cospi_16_64 */, 6270 /* cospi_24_64 */, 16069 /* cospi_4_64 */, 13623 /* cospi_12_64 */, -9102 /* -cospi_20_64 */, 3196 /* cospi_28_64 */, 16305 /* cospi_2_64 */, 1606 /* cospi_30_64 */, 14449 /* cospi_10_64 */, 7723 /* cospi_22_64 */, 15679 /* cospi_6_64 */, -4756 /* -cospi_26_64 */, 12665 /* cospi_14_64 */, -10394 /* -cospi_18_64 */ }; //------------------------------------------------------------------------------ // Use saturating add/sub to avoid overflow in 2nd pass in high bit-depth static INLINE int16x8_t final_add(const int16x8_t a, const int16x8_t b) { #if CONFIG_VP9_HIGHBITDEPTH return vqaddq_s16(a, b); #else return vaddq_s16(a, b); #endif } static INLINE int16x8_t final_sub(const int16x8_t a, const int16x8_t b) { #if CONFIG_VP9_HIGHBITDEPTH return vqsubq_s16(a, b); #else return vsubq_s16(a, b); #endif } //------------------------------------------------------------------------------ static INLINE int32x4x2_t highbd_idct_add_dual(const int32x4x2_t s0, const int32x4x2_t s1) { int32x4x2_t t; t.val[0] = vaddq_s32(s0.val[0], s1.val[0]); t.val[1] = vaddq_s32(s0.val[1], s1.val[1]); return t; } static INLINE int32x4x2_t highbd_idct_sub_dual(const int32x4x2_t s0, const int32x4x2_t s1) { int32x4x2_t t; t.val[0] = vsubq_s32(s0.val[0], s1.val[0]); t.val[1] = vsubq_s32(s0.val[1], s1.val[1]); return t; } //------------------------------------------------------------------------------ static INLINE int16x8_t dct_const_round_shift_low_8(const int32x4_t *const in) { return vcombine_s16(vrshrn_n_s32(in[0], DCT_CONST_BITS), vrshrn_n_s32(in[1], DCT_CONST_BITS)); } static INLINE void dct_const_round_shift_low_8_dual(const int32x4_t *const t32, int16x8_t *const d0, int16x8_t *const d1) { *d0 = dct_const_round_shift_low_8(t32 + 0); *d1 = dct_const_round_shift_low_8(t32 + 2); } static INLINE int32x4x2_t dct_const_round_shift_high_4x2(const int64x2_t *const in) { int32x4x2_t out; out.val[0] = vcombine_s32(vrshrn_n_s64(in[0], DCT_CONST_BITS), vrshrn_n_s64(in[1], DCT_CONST_BITS)); out.val[1] = vcombine_s32(vrshrn_n_s64(in[2], DCT_CONST_BITS), vrshrn_n_s64(in[3], DCT_CONST_BITS)); return out; } // Multiply a by a_const. Saturate, shift and narrow by DCT_CONST_BITS. static INLINE int16x8_t multiply_shift_and_narrow_s16(const int16x8_t a, const int16_t a_const) { // Shift by DCT_CONST_BITS + rounding will be within 16 bits for well formed // streams. See WRAPLOW and dct_const_round_shift for details. // This instruction doubles the result and returns the high half, essentially // resulting in a right shift by 15. By multiplying the constant first that // becomes a right shift by DCT_CONST_BITS. // The largest possible value used here is // vpx_dsp/txfm_common.h:cospi_1_64 = 16364 (* 2 = 32728) a which falls *just* // within the range of int16_t (+32767 / -32768) even when negated. return vqrdmulhq_n_s16(a, a_const * 2); } // Add a and b, then multiply by ab_const. Shift and narrow by DCT_CONST_BITS. static INLINE int16x8_t add_multiply_shift_and_narrow_s16( const int16x8_t a, const int16x8_t b, const int16_t ab_const) { // In both add_ and it's pair, sub_, the input for well-formed streams will be // well within 16 bits (input to the idct is the difference between two frames // and will be within -255 to 255, or 9 bits) // However, for inputs over about 25,000 (valid for int16_t, but not for idct // input) this function can not use vaddq_s16. // In order to match existing behavior and intentionally out of range tests, // expand the addition up to 32 bits to prevent truncation. int32x4_t t[2]; t[0] = vaddl_s16(vget_low_s16(a), vget_low_s16(b)); t[1] = vaddl_s16(vget_high_s16(a), vget_high_s16(b)); t[0] = vmulq_n_s32(t[0], ab_const); t[1] = vmulq_n_s32(t[1], ab_const); return dct_const_round_shift_low_8(t); } // Subtract b from a, then multiply by ab_const. Shift and narrow by // DCT_CONST_BITS. static INLINE int16x8_t sub_multiply_shift_and_narrow_s16( const int16x8_t a, const int16x8_t b, const int16_t ab_const) { int32x4_t t[2]; t[0] = vsubl_s16(vget_low_s16(a), vget_low_s16(b)); t[1] = vsubl_s16(vget_high_s16(a), vget_high_s16(b)); t[0] = vmulq_n_s32(t[0], ab_const); t[1] = vmulq_n_s32(t[1], ab_const); return dct_const_round_shift_low_8(t); } // Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by // DCT_CONST_BITS. static INLINE int16x8_t multiply_accumulate_shift_and_narrow_s16( const int16x8_t a, const int16_t a_const, const int16x8_t b, const int16_t b_const) { int32x4_t t[2]; t[0] = vmull_n_s16(vget_low_s16(a), a_const); t[1] = vmull_n_s16(vget_high_s16(a), a_const); t[0] = vmlal_n_s16(t[0], vget_low_s16(b), b_const); t[1] = vmlal_n_s16(t[1], vget_high_s16(b), b_const); return dct_const_round_shift_low_8(t); } //------------------------------------------------------------------------------ // Note: The following 4 functions could use 32-bit operations for bit-depth 10. // However, although it's 20% faster with gcc, it's 20% slower with clang. // Use 64-bit operations for now. // Multiply a by a_const. Saturate, shift and narrow by DCT_CONST_BITS. static INLINE int32x4x2_t multiply_shift_and_narrow_s32_dual(const int32x4x2_t a, const int32_t a_const) { int64x2_t b[4]; b[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const); b[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const); b[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const); b[3] = vmull_n_s32(vget_high_s32(a.val[1]), a_const); return dct_const_round_shift_high_4x2(b); } // Add a and b, then multiply by ab_const. Shift and narrow by DCT_CONST_BITS. static INLINE int32x4x2_t add_multiply_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) { int32x4_t t[2]; int64x2_t c[4]; t[0] = vaddq_s32(a.val[0], b.val[0]); t[1] = vaddq_s32(a.val[1], b.val[1]); c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const); c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const); c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const); c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const); return dct_const_round_shift_high_4x2(c); } // Subtract b from a, then multiply by ab_const. Shift and narrow by // DCT_CONST_BITS. static INLINE int32x4x2_t sub_multiply_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32x4x2_t b, const int32_t ab_const) { int32x4_t t[2]; int64x2_t c[4]; t[0] = vsubq_s32(a.val[0], b.val[0]); t[1] = vsubq_s32(a.val[1], b.val[1]); c[0] = vmull_n_s32(vget_low_s32(t[0]), ab_const); c[1] = vmull_n_s32(vget_high_s32(t[0]), ab_const); c[2] = vmull_n_s32(vget_low_s32(t[1]), ab_const); c[3] = vmull_n_s32(vget_high_s32(t[1]), ab_const); return dct_const_round_shift_high_4x2(c); } // Multiply a by a_const and b by b_const, then accumulate. Shift and narrow by // DCT_CONST_BITS. static INLINE int32x4x2_t multiply_accumulate_shift_and_narrow_s32_dual( const int32x4x2_t a, const int32_t a_const, const int32x4x2_t b, const int32_t b_const) { int64x2_t c[4]; c[0] = vmull_n_s32(vget_low_s32(a.val[0]), a_const); c[1] = vmull_n_s32(vget_high_s32(a.val[0]), a_const); c[2] = vmull_n_s32(vget_low_s32(a.val[1]), a_const); c[3] = vmull_n_s32(vget_high_s32(a.val[1]), a_const); c[0] = vmlal_n_s32(c[0], vget_low_s32(b.val[0]), b_const); c[1] = vmlal_n_s32(c[1], vget_high_s32(b.val[0]), b_const); c[2] = vmlal_n_s32(c[2], vget_low_s32(b.val[1]), b_const); c[3] = vmlal_n_s32(c[3], vget_high_s32(b.val[1]), b_const); return dct_const_round_shift_high_4x2(c); } // Shift the output down by 6 and add it to the destination buffer. static INLINE void add_and_store_u8_s16(const int16x8_t *const a, uint8_t *d, const int stride) { uint8x8_t b[8]; int16x8_t c[8]; b[0] = vld1_u8(d); d += stride; b[1] = vld1_u8(d); d += stride; b[2] = vld1_u8(d); d += stride; b[3] = vld1_u8(d); d += stride; b[4] = vld1_u8(d); d += stride; b[5] = vld1_u8(d); d += stride; b[6] = vld1_u8(d); d += stride; b[7] = vld1_u8(d); d -= (7 * stride); // c = b + (a >> 6) c[0] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[0])), a[0], 6); c[1] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[1])), a[1], 6); c[2] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[2])), a[2], 6); c[3] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[3])), a[3], 6); c[4] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[4])), a[4], 6); c[5] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[5])), a[5], 6); c[6] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[6])), a[6], 6); c[7] = vrsraq_n_s16(vreinterpretq_s16_u16(vmovl_u8(b[7])), a[7], 6); b[0] = vqmovun_s16(c[0]); b[1] = vqmovun_s16(c[1]); b[2] = vqmovun_s16(c[2]); b[3] = vqmovun_s16(c[3]); b[4] = vqmovun_s16(c[4]); b[5] = vqmovun_s16(c[5]); b[6] = vqmovun_s16(c[6]); b[7] = vqmovun_s16(c[7]); vst1_u8(d, b[0]); d += stride; vst1_u8(d, b[1]); d += stride; vst1_u8(d, b[2]); d += stride; vst1_u8(d, b[3]); d += stride; vst1_u8(d, b[4]); d += stride; vst1_u8(d, b[5]); d += stride; vst1_u8(d, b[6]); d += stride; vst1_u8(d, b[7]); } static INLINE uint8x16_t create_dcq(const int16_t dc) { // Clip both sides and gcc may compile to assembly 'usat'. const int16_t t = (dc < 0) ? 0 : ((dc > 255) ? 255 : dc); return vdupq_n_u8((uint8_t)t); } static INLINE void idct4x4_16_kernel_bd8(int16x8_t *const a) { const int16x4_t cospis = vld1_s16(kCospi); int16x4_t b[4]; int32x4_t c[4]; int16x8_t d[2]; b[0] = vget_low_s16(a[0]); b[1] = vget_high_s16(a[0]); b[2] = vget_low_s16(a[1]); b[3] = vget_high_s16(a[1]); c[0] = vmull_lane_s16(b[0], cospis, 2); c[2] = vmull_lane_s16(b[1], cospis, 2); c[1] = vsubq_s32(c[0], c[2]); c[0] = vaddq_s32(c[0], c[2]); c[3] = vmull_lane_s16(b[2], cospis, 3); c[2] = vmull_lane_s16(b[2], cospis, 1); c[3] = vmlsl_lane_s16(c[3], b[3], cospis, 1); c[2] = vmlal_lane_s16(c[2], b[3], cospis, 3); dct_const_round_shift_low_8_dual(c, &d[0], &d[1]); a[0] = vaddq_s16(d[0], d[1]); a[1] = vsubq_s16(d[0], d[1]); } static INLINE void transpose_idct4x4_16_bd8(int16x8_t *const a) { transpose_s16_4x4q(&a[0], &a[1]); idct4x4_16_kernel_bd8(a); } static INLINE void idct8x8_12_pass1_bd8(const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1, int16x4_t *const io) { int16x4_t step1[8], step2[8]; int32x4_t t32[2]; transpose_s16_4x4d(&io[0], &io[1], &io[2], &io[3]); // stage 1 step1[4] = vqrdmulh_lane_s16(io[1], cospisd1, 3); step1[5] = vqrdmulh_lane_s16(io[3], cospisd1, 2); step1[6] = vqrdmulh_lane_s16(io[3], cospisd1, 1); step1[7] = vqrdmulh_lane_s16(io[1], cospisd1, 0); // stage 2 step2[1] = vqrdmulh_lane_s16(io[0], cospisd0, 2); step2[2] = vqrdmulh_lane_s16(io[2], cospisd0, 3); step2[3] = vqrdmulh_lane_s16(io[2], cospisd0, 1); step2[4] = vadd_s16(step1[4], step1[5]); step2[5] = vsub_s16(step1[4], step1[5]); step2[6] = vsub_s16(step1[7], step1[6]); step2[7] = vadd_s16(step1[7], step1[6]); // stage 3 step1[0] = vadd_s16(step2[1], step2[3]); step1[1] = vadd_s16(step2[1], step2[2]); step1[2] = vsub_s16(step2[1], step2[2]); step1[3] = vsub_s16(step2[1], step2[3]); t32[1] = vmull_lane_s16(step2[6], cospis0, 2); t32[0] = vmlsl_lane_s16(t32[1], step2[5], cospis0, 2); t32[1] = vmlal_lane_s16(t32[1], step2[5], cospis0, 2); step1[5] = vrshrn_n_s32(t32[0], DCT_CONST_BITS); step1[6] = vrshrn_n_s32(t32[1], DCT_CONST_BITS); // stage 4 io[0] = vadd_s16(step1[0], step2[7]); io[1] = vadd_s16(step1[1], step1[6]); io[2] = vadd_s16(step1[2], step1[5]); io[3] = vadd_s16(step1[3], step2[4]); io[4] = vsub_s16(step1[3], step2[4]); io[5] = vsub_s16(step1[2], step1[5]); io[6] = vsub_s16(step1[1], step1[6]); io[7] = vsub_s16(step1[0], step2[7]); } static INLINE void idct8x8_12_pass2_bd8(const int16x4_t cospis0, const int16x4_t cospisd0, const int16x4_t cospisd1, const int16x4_t *const input, int16x8_t *const output) { int16x8_t in[4]; int16x8_t step1[8], step2[8]; int32x4_t t32[8]; transpose_s16_4x8(input[0], input[1], input[2], input[3], input[4], input[5], input[6], input[7], &in[0], &in[1], &in[2], &in[3]); // stage 1 step1[4] = vqrdmulhq_lane_s16(in[1], cospisd1, 3); step1[5] = vqrdmulhq_lane_s16(in[3], cospisd1, 2); step1[6] = vqrdmulhq_lane_s16(in[3], cospisd1, 1); step1[7] = vqrdmulhq_lane_s16(in[1], cospisd1, 0); // stage 2 step2[1] = vqrdmulhq_lane_s16(in[0], cospisd0, 2); step2[2] = vqrdmulhq_lane_s16(in[2], cospisd0, 3); step2[3] = vqrdmulhq_lane_s16(in[2], cospisd0, 1); step2[4] = vaddq_s16(step1[4], step1[5]); step2[5] = vsubq_s16(step1[4], step1[5]); step2[6] = vsubq_s16(step1[7], step1[6]); step2[7] = vaddq_s16(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s16(step2[1], step2[3]); step1[1] = vaddq_s16(step2[1], step2[2]); step1[2] = vsubq_s16(step2[1], step2[2]); step1[3] = vsubq_s16(step2[1], step2[3]); t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); t32[3] = vmull_lane_s16(vget_high_s16(step2[6]), cospis0, 2); t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); // stage 4 output[0] = vaddq_s16(step1[0], step2[7]); output[1] = vaddq_s16(step1[1], step1[6]); output[2] = vaddq_s16(step1[2], step1[5]); output[3] = vaddq_s16(step1[3], step2[4]); output[4] = vsubq_s16(step1[3], step2[4]); output[5] = vsubq_s16(step1[2], step1[5]); output[6] = vsubq_s16(step1[1], step1[6]); output[7] = vsubq_s16(step1[0], step2[7]); } static INLINE void idct8x8_64_1d_bd8_kernel(const int16x4_t cospis0, const int16x4_t cospis1, int16x8_t *const io) { int16x4_t input1l, input1h, input3l, input3h, input5l, input5h, input7l, input7h; int16x4_t step1l[4], step1h[4]; int16x8_t step1[8], step2[8]; int32x4_t t32[8]; // stage 1 input1l = vget_low_s16(io[1]); input1h = vget_high_s16(io[1]); input3l = vget_low_s16(io[3]); input3h = vget_high_s16(io[3]); input5l = vget_low_s16(io[5]); input5h = vget_high_s16(io[5]); input7l = vget_low_s16(io[7]); input7h = vget_high_s16(io[7]); step1l[0] = vget_low_s16(io[0]); step1h[0] = vget_high_s16(io[0]); step1l[1] = vget_low_s16(io[2]); step1h[1] = vget_high_s16(io[2]); step1l[2] = vget_low_s16(io[4]); step1h[2] = vget_high_s16(io[4]); step1l[3] = vget_low_s16(io[6]); step1h[3] = vget_high_s16(io[6]); t32[0] = vmull_lane_s16(input1l, cospis1, 3); t32[1] = vmull_lane_s16(input1h, cospis1, 3); t32[2] = vmull_lane_s16(input3l, cospis1, 2); t32[3] = vmull_lane_s16(input3h, cospis1, 2); t32[4] = vmull_lane_s16(input3l, cospis1, 1); t32[5] = vmull_lane_s16(input3h, cospis1, 1); t32[6] = vmull_lane_s16(input1l, cospis1, 0); t32[7] = vmull_lane_s16(input1h, cospis1, 0); t32[0] = vmlsl_lane_s16(t32[0], input7l, cospis1, 0); t32[1] = vmlsl_lane_s16(t32[1], input7h, cospis1, 0); t32[2] = vmlal_lane_s16(t32[2], input5l, cospis1, 1); t32[3] = vmlal_lane_s16(t32[3], input5h, cospis1, 1); t32[4] = vmlsl_lane_s16(t32[4], input5l, cospis1, 2); t32[5] = vmlsl_lane_s16(t32[5], input5h, cospis1, 2); t32[6] = vmlal_lane_s16(t32[6], input7l, cospis1, 3); t32[7] = vmlal_lane_s16(t32[7], input7h, cospis1, 3); dct_const_round_shift_low_8_dual(&t32[0], &step1[4], &step1[5]); dct_const_round_shift_low_8_dual(&t32[4], &step1[6], &step1[7]); // stage 2 t32[2] = vmull_lane_s16(step1l[0], cospis0, 2); t32[3] = vmull_lane_s16(step1h[0], cospis0, 2); t32[4] = vmull_lane_s16(step1l[1], cospis0, 3); t32[5] = vmull_lane_s16(step1h[1], cospis0, 3); t32[6] = vmull_lane_s16(step1l[1], cospis0, 1); t32[7] = vmull_lane_s16(step1h[1], cospis0, 1); t32[0] = vmlal_lane_s16(t32[2], step1l[2], cospis0, 2); t32[1] = vmlal_lane_s16(t32[3], step1h[2], cospis0, 2); t32[2] = vmlsl_lane_s16(t32[2], step1l[2], cospis0, 2); t32[3] = vmlsl_lane_s16(t32[3], step1h[2], cospis0, 2); t32[4] = vmlsl_lane_s16(t32[4], step1l[3], cospis0, 1); t32[5] = vmlsl_lane_s16(t32[5], step1h[3], cospis0, 1); t32[6] = vmlal_lane_s16(t32[6], step1l[3], cospis0, 3); t32[7] = vmlal_lane_s16(t32[7], step1h[3], cospis0, 3); dct_const_round_shift_low_8_dual(&t32[0], &step2[0], &step2[1]); dct_const_round_shift_low_8_dual(&t32[4], &step2[2], &step2[3]); step2[4] = vaddq_s16(step1[4], step1[5]); step2[5] = vsubq_s16(step1[4], step1[5]); step2[6] = vsubq_s16(step1[7], step1[6]); step2[7] = vaddq_s16(step1[7], step1[6]); // stage 3 step1[0] = vaddq_s16(step2[0], step2[3]); step1[1] = vaddq_s16(step2[1], step2[2]); step1[2] = vsubq_s16(step2[1], step2[2]); step1[3] = vsubq_s16(step2[0], step2[3]); t32[2] = vmull_lane_s16(vget_low_s16(step2[6]), cospis0, 2); t32[3] = vmull_lane_s16(vget_high_s16(step2[6]), cospis0, 2); t32[0] = vmlsl_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[1] = vmlsl_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(step2[5]), cospis0, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(step2[5]), cospis0, 2); dct_const_round_shift_low_8_dual(t32, &step1[5], &step1[6]); // stage 4 io[0] = vaddq_s16(step1[0], step2[7]); io[1] = vaddq_s16(step1[1], step1[6]); io[2] = vaddq_s16(step1[2], step1[5]); io[3] = vaddq_s16(step1[3], step2[4]); io[4] = vsubq_s16(step1[3], step2[4]); io[5] = vsubq_s16(step1[2], step1[5]); io[6] = vsubq_s16(step1[1], step1[6]); io[7] = vsubq_s16(step1[0], step2[7]); } static INLINE void idct8x8_64_1d_bd8(const int16x4_t cospis0, const int16x4_t cospis1, int16x8_t *const io) { transpose_s16_8x8(&io[0], &io[1], &io[2], &io[3], &io[4], &io[5], &io[6], &io[7]); idct8x8_64_1d_bd8_kernel(cospis0, cospis1, io); } static INLINE void idct_cospi_8_24_q_kernel(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_0_8_16_24, int32x4_t *const t32) { t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_0_8_16_24, 3); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_0_8_16_24, 3); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_0_8_16_24, 3); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_0_8_16_24, 3); t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_0_8_16_24, 1); t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_0_8_16_24, 1); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_0_8_16_24, 1); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_0_8_16_24, 1); } static INLINE void idct_cospi_8_24_q(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_0_8_16_24, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_8_24_neg_q(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_0_8_16_24, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; idct_cospi_8_24_q_kernel(s0, s1, cospi_0_8_16_24, t32); t32[2] = vnegq_s32(t32[2]); t32[3] = vnegq_s32(t32[3]); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_16_16_q(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_0_8_16_24, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[6]; t32[4] = vmull_lane_s16(vget_low_s16(s1), cospi_0_8_16_24, 2); t32[5] = vmull_lane_s16(vget_high_s16(s1), cospi_0_8_16_24, 2); t32[0] = vmlsl_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2); t32[1] = vmlsl_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); t32[2] = vmlal_lane_s16(t32[4], vget_low_s16(s0), cospi_0_8_16_24, 2); t32[3] = vmlal_lane_s16(t32[5], vget_high_s16(s0), cospi_0_8_16_24, 2); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_2_30(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_2_30_10_22, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_2_30_10_22, 1); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_2_30_10_22, 1); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_2_30_10_22, 1); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_2_30_10_22, 1); t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_2_30_10_22, 0); t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 0); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 0); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 0); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_4_28(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_4_12_20N_28, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_4_12_20N_28, 3); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_4_12_20N_28, 3); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_4_12_20N_28, 3); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_4_12_20N_28, 3); t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_4_12_20N_28, 0); t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 0); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 0); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 0); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_6_26(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_6_26N_14_18N, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_6_26N_14_18N, 0); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_6_26N_14_18N, 0); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_6_26N_14_18N, 0); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_6_26N_14_18N, 0); t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_6_26N_14_18N, 1); t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 1); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 1); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 1); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_10_22(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_2_30_10_22, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_2_30_10_22, 3); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_2_30_10_22, 3); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_2_30_10_22, 3); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_2_30_10_22, 3); t32[0] = vmlsl_lane_s16(t32[0], vget_low_s16(s1), cospi_2_30_10_22, 2); t32[1] = vmlsl_lane_s16(t32[1], vget_high_s16(s1), cospi_2_30_10_22, 2); t32[2] = vmlal_lane_s16(t32[2], vget_low_s16(s0), cospi_2_30_10_22, 2); t32[3] = vmlal_lane_s16(t32[3], vget_high_s16(s0), cospi_2_30_10_22, 2); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_12_20(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_4_12_20N_28, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_4_12_20N_28, 1); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_4_12_20N_28, 1); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_4_12_20N_28, 1); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_4_12_20N_28, 1); t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_4_12_20N_28, 2); t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_4_12_20N_28, 2); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_4_12_20N_28, 2); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_4_12_20N_28, 2); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct_cospi_14_18(const int16x8_t s0, const int16x8_t s1, const int16x4_t cospi_6_26N_14_18N, int16x8_t *const d0, int16x8_t *const d1) { int32x4_t t32[4]; t32[0] = vmull_lane_s16(vget_low_s16(s0), cospi_6_26N_14_18N, 2); t32[1] = vmull_lane_s16(vget_high_s16(s0), cospi_6_26N_14_18N, 2); t32[2] = vmull_lane_s16(vget_low_s16(s1), cospi_6_26N_14_18N, 2); t32[3] = vmull_lane_s16(vget_high_s16(s1), cospi_6_26N_14_18N, 2); t32[0] = vmlal_lane_s16(t32[0], vget_low_s16(s1), cospi_6_26N_14_18N, 3); t32[1] = vmlal_lane_s16(t32[1], vget_high_s16(s1), cospi_6_26N_14_18N, 3); t32[2] = vmlsl_lane_s16(t32[2], vget_low_s16(s0), cospi_6_26N_14_18N, 3); t32[3] = vmlsl_lane_s16(t32[3], vget_high_s16(s0), cospi_6_26N_14_18N, 3); dct_const_round_shift_low_8_dual(t32, d0, d1); } static INLINE void idct16x16_add_stage7(const int16x8_t *const step2, int16x8_t *const out) { #if CONFIG_VP9_HIGHBITDEPTH // Use saturating add/sub to avoid overflow in 2nd pass out[0] = vqaddq_s16(step2[0], step2[15]); out[1] = vqaddq_s16(step2[1], step2[14]); out[2] = vqaddq_s16(step2[2], step2[13]); out[3] = vqaddq_s16(step2[3], step2[12]); out[4] = vqaddq_s16(step2[4], step2[11]); out[5] = vqaddq_s16(step2[5], step2[10]); out[6] = vqaddq_s16(step2[6], step2[9]); out[7] = vqaddq_s16(step2[7], step2[8]); out[8] = vqsubq_s16(step2[7], step2[8]); out[9] = vqsubq_s16(step2[6], step2[9]); out[10] = vqsubq_s16(step2[5], step2[10]); out[11] = vqsubq_s16(step2[4], step2[11]); out[12] = vqsubq_s16(step2[3], step2[12]); out[13] = vqsubq_s16(step2[2], step2[13]); out[14] = vqsubq_s16(step2[1], step2[14]); out[15] = vqsubq_s16(step2[0], step2[15]); #else out[0] = vaddq_s16(step2[0], step2[15]); out[1] = vaddq_s16(step2[1], step2[14]); out[2] = vaddq_s16(step2[2], step2[13]); out[3] = vaddq_s16(step2[3], step2[12]); out[4] = vaddq_s16(step2[4], step2[11]); out[5] = vaddq_s16(step2[5], step2[10]); out[6] = vaddq_s16(step2[6], step2[9]); out[7] = vaddq_s16(step2[7], step2[8]); out[8] = vsubq_s16(step2[7], step2[8]); out[9] = vsubq_s16(step2[6], step2[9]); out[10] = vsubq_s16(step2[5], step2[10]); out[11] = vsubq_s16(step2[4], step2[11]); out[12] = vsubq_s16(step2[3], step2[12]); out[13] = vsubq_s16(step2[2], step2[13]); out[14] = vsubq_s16(step2[1], step2[14]); out[15] = vsubq_s16(step2[0], step2[15]); #endif } static INLINE void idct16x16_store_pass1(const int16x8_t *const out, int16_t *output) { // Save the result into output vst1q_s16(output, out[0]); output += 16; vst1q_s16(output, out[1]); output += 16; vst1q_s16(output, out[2]); output += 16; vst1q_s16(output, out[3]); output += 16; vst1q_s16(output, out[4]); output += 16; vst1q_s16(output, out[5]); output += 16; vst1q_s16(output, out[6]); output += 16; vst1q_s16(output, out[7]); output += 16; vst1q_s16(output, out[8]); output += 16; vst1q_s16(output, out[9]); output += 16; vst1q_s16(output, out[10]); output += 16; vst1q_s16(output, out[11]); output += 16; vst1q_s16(output, out[12]); output += 16; vst1q_s16(output, out[13]); output += 16; vst1q_s16(output, out[14]); output += 16; vst1q_s16(output, out[15]); } static INLINE void idct8x8_add8x1(const int16x8_t a, uint8_t **const dest, const int stride) { const uint8x8_t s = vld1_u8(*dest); const int16x8_t res = vrshrq_n_s16(a, 5); const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s); const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q)); vst1_u8(*dest, d); *dest += stride; } static INLINE void idct8x8_add8x8_neon(int16x8_t *const out, uint8_t *dest, const int stride) { idct8x8_add8x1(out[0], &dest, stride); idct8x8_add8x1(out[1], &dest, stride); idct8x8_add8x1(out[2], &dest, stride); idct8x8_add8x1(out[3], &dest, stride); idct8x8_add8x1(out[4], &dest, stride); idct8x8_add8x1(out[5], &dest, stride); idct8x8_add8x1(out[6], &dest, stride); idct8x8_add8x1(out[7], &dest, stride); } static INLINE void idct16x16_add8x1(const int16x8_t a, uint8_t **const dest, const int stride) { const uint8x8_t s = vld1_u8(*dest); const int16x8_t res = vrshrq_n_s16(a, 6); const uint16x8_t q = vaddw_u8(vreinterpretq_u16_s16(res), s); const uint8x8_t d = vqmovun_s16(vreinterpretq_s16_u16(q)); vst1_u8(*dest, d); *dest += stride; } static INLINE void idct16x16_add_store(const int16x8_t *const out, uint8_t *dest, const int stride) { // Add the result to dest idct16x16_add8x1(out[0], &dest, stride); idct16x16_add8x1(out[1], &dest, stride); idct16x16_add8x1(out[2], &dest, stride); idct16x16_add8x1(out[3], &dest, stride); idct16x16_add8x1(out[4], &dest, stride); idct16x16_add8x1(out[5], &dest, stride); idct16x16_add8x1(out[6], &dest, stride); idct16x16_add8x1(out[7], &dest, stride); idct16x16_add8x1(out[8], &dest, stride); idct16x16_add8x1(out[9], &dest, stride); idct16x16_add8x1(out[10], &dest, stride); idct16x16_add8x1(out[11], &dest, stride); idct16x16_add8x1(out[12], &dest, stride); idct16x16_add8x1(out[13], &dest, stride); idct16x16_add8x1(out[14], &dest, stride); idct16x16_add8x1(out[15], &dest, stride); } static INLINE void highbd_idct16x16_add8x1(const int16x8_t a, const int16x8_t max, uint16_t **const dest, const int stride) { const uint16x8_t s = vld1q_u16(*dest); const int16x8_t res0 = vqaddq_s16(a, vreinterpretq_s16_u16(s)); const int16x8_t res1 = vminq_s16(res0, max); const uint16x8_t d = vqshluq_n_s16(res1, 0); vst1q_u16(*dest, d); *dest += stride; } static INLINE void idct16x16_add_store_bd8(int16x8_t *const out, uint16_t *dest, const int stride) { // Add the result to dest const int16x8_t max = vdupq_n_s16((1 << 8) - 1); out[0] = vrshrq_n_s16(out[0], 6); out[1] = vrshrq_n_s16(out[1], 6); out[2] = vrshrq_n_s16(out[2], 6); out[3] = vrshrq_n_s16(out[3], 6); out[4] = vrshrq_n_s16(out[4], 6); out[5] = vrshrq_n_s16(out[5], 6); out[6] = vrshrq_n_s16(out[6], 6); out[7] = vrshrq_n_s16(out[7], 6); out[8] = vrshrq_n_s16(out[8], 6); out[9] = vrshrq_n_s16(out[9], 6); out[10] = vrshrq_n_s16(out[10], 6); out[11] = vrshrq_n_s16(out[11], 6); out[12] = vrshrq_n_s16(out[12], 6); out[13] = vrshrq_n_s16(out[13], 6); out[14] = vrshrq_n_s16(out[14], 6); out[15] = vrshrq_n_s16(out[15], 6); highbd_idct16x16_add8x1(out[0], max, &dest, stride); highbd_idct16x16_add8x1(out[1], max, &dest, stride); highbd_idct16x16_add8x1(out[2], max, &dest, stride); highbd_idct16x16_add8x1(out[3], max, &dest, stride); highbd_idct16x16_add8x1(out[4], max, &dest, stride); highbd_idct16x16_add8x1(out[5], max, &dest, stride); highbd_idct16x16_add8x1(out[6], max, &dest, stride); highbd_idct16x16_add8x1(out[7], max, &dest, stride); highbd_idct16x16_add8x1(out[8], max, &dest, stride); highbd_idct16x16_add8x1(out[9], max, &dest, stride); highbd_idct16x16_add8x1(out[10], max, &dest, stride); highbd_idct16x16_add8x1(out[11], max, &dest, stride); highbd_idct16x16_add8x1(out[12], max, &dest, stride); highbd_idct16x16_add8x1(out[13], max, &dest, stride); highbd_idct16x16_add8x1(out[14], max, &dest, stride); highbd_idct16x16_add8x1(out[15], max, &dest, stride); } static INLINE void highbd_idct16x16_add8x1_bd8(const int16x8_t a, uint16_t **const dest, const int stride) { const uint16x8_t s = vld1q_u16(*dest); const int16x8_t res = vrsraq_n_s16(vreinterpretq_s16_u16(s), a, 6); const uint16x8_t d = vmovl_u8(vqmovun_s16(res)); vst1q_u16(*dest, d); *dest += stride; } static INLINE void highbd_add_and_store_bd8(const int16x8_t *const a, uint16_t *out, const int stride) { highbd_idct16x16_add8x1_bd8(a[0], &out, stride); highbd_idct16x16_add8x1_bd8(a[1], &out, stride); highbd_idct16x16_add8x1_bd8(a[2], &out, stride); highbd_idct16x16_add8x1_bd8(a[3], &out, stride); highbd_idct16x16_add8x1_bd8(a[4], &out, stride); highbd_idct16x16_add8x1_bd8(a[5], &out, stride); highbd_idct16x16_add8x1_bd8(a[6], &out, stride); highbd_idct16x16_add8x1_bd8(a[7], &out, stride); highbd_idct16x16_add8x1_bd8(a[8], &out, stride); highbd_idct16x16_add8x1_bd8(a[9], &out, stride); highbd_idct16x16_add8x1_bd8(a[10], &out, stride); highbd_idct16x16_add8x1_bd8(a[11], &out, stride); highbd_idct16x16_add8x1_bd8(a[12], &out, stride); highbd_idct16x16_add8x1_bd8(a[13], &out, stride); highbd_idct16x16_add8x1_bd8(a[14], &out, stride); highbd_idct16x16_add8x1_bd8(a[15], &out, stride); highbd_idct16x16_add8x1_bd8(a[16], &out, stride); highbd_idct16x16_add8x1_bd8(a[17], &out, stride); highbd_idct16x16_add8x1_bd8(a[18], &out, stride); highbd_idct16x16_add8x1_bd8(a[19], &out, stride); highbd_idct16x16_add8x1_bd8(a[20], &out, stride); highbd_idct16x16_add8x1_bd8(a[21], &out, stride); highbd_idct16x16_add8x1_bd8(a[22], &out, stride); highbd_idct16x16_add8x1_bd8(a[23], &out, stride); highbd_idct16x16_add8x1_bd8(a[24], &out, stride); highbd_idct16x16_add8x1_bd8(a[25], &out, stride); highbd_idct16x16_add8x1_bd8(a[26], &out, stride); highbd_idct16x16_add8x1_bd8(a[27], &out, stride); highbd_idct16x16_add8x1_bd8(a[28], &out, stride); highbd_idct16x16_add8x1_bd8(a[29], &out, stride); highbd_idct16x16_add8x1_bd8(a[30], &out, stride); highbd_idct16x16_add8x1_bd8(a[31], &out, stride); } void vpx_idct16x16_256_add_half1d(const void *const input, int16_t *output, void *const dest, const int stride, const int highbd_flag); void vpx_idct16x16_38_add_half1d(const void *const input, int16_t *const output, void *const dest, const int stride, const int highbd_flag); void vpx_idct16x16_10_add_half1d_pass1(const tran_low_t *input, int16_t *output); void vpx_idct16x16_10_add_half1d_pass2(const int16_t *input, int16_t *const output, void *const dest, const int stride, const int highbd_flag); void vpx_idct32_32_neon(const tran_low_t *input, uint8_t *dest, const int stride, const int highbd_flag); void vpx_idct32_12_neon(const tran_low_t *const input, int16_t *output); void vpx_idct32_16_neon(const int16_t *const input, void *const output, const int stride, const int highbd_flag); void vpx_idct32_6_neon(const tran_low_t *input, int16_t *output); void vpx_idct32_8_neon(const int16_t *input, void *const output, int stride, const int highbd_flag); #endif // VPX_VPX_DSP_ARM_IDCT_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/intrapred_neon.c000066400000000000000000001122271357355204000204540ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" //------------------------------------------------------------------------------ // DC 4x4 static INLINE uint16x4_t dc_sum_4(const uint8_t *ref) { const uint8x8_t ref_u8 = vld1_u8(ref); const uint16x4_t p0 = vpaddl_u8(ref_u8); return vpadd_u16(p0, p0); } static INLINE void dc_store_4x4(uint8_t *dst, ptrdiff_t stride, const uint8x8_t dc) { const uint8x8_t dc_dup = vdup_lane_u8(dc, 0); int i; for (i = 0; i < 4; ++i, dst += stride) { vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dc_dup), 0); } } void vpx_dc_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t a = vld1_u8(above); const uint8x8_t l = vld1_u8(left); const uint16x8_t al = vaddl_u8(a, l); uint16x4_t sum; uint8x8_t dc; sum = vpadd_u16(vget_low_u16(al), vget_low_u16(al)); sum = vpadd_u16(sum, sum); dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3)); dc_store_4x4(dst, stride, dc); } void vpx_dc_left_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_4(left); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2)); (void)above; dc_store_4x4(dst, stride, dc); } void vpx_dc_top_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_4(above); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 2)); (void)left; dc_store_4x4(dst, stride, dc); } void vpx_dc_128_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t dc = vdup_n_u8(0x80); (void)above; (void)left; dc_store_4x4(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 8x8 static INLINE uint16x4_t dc_sum_8(const uint8_t *ref) { const uint8x8_t ref_u8 = vld1_u8(ref); uint16x4_t sum = vpaddl_u8(ref_u8); sum = vpadd_u16(sum, sum); return vpadd_u16(sum, sum); } static INLINE void dc_store_8x8(uint8_t *dst, ptrdiff_t stride, const uint8x8_t dc) { const uint8x8_t dc_dup = vdup_lane_u8(dc, 0); int i; for (i = 0; i < 8; ++i, dst += stride) { vst1_u8(dst, dc_dup); } } void vpx_dc_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t above_u8 = vld1_u8(above); const uint8x8_t left_u8 = vld1_u8(left); const uint8x16_t above_and_left = vcombine_u8(above_u8, left_u8); const uint16x8_t p0 = vpaddlq_u8(above_and_left); uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); uint8x8_t dc; sum = vpadd_u16(sum, sum); sum = vpadd_u16(sum, sum); dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4)); dc_store_8x8(dst, stride, dc); } void vpx_dc_left_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_8(left); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3)); (void)above; dc_store_8x8(dst, stride, dc); } void vpx_dc_top_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_8(above); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 3)); (void)left; dc_store_8x8(dst, stride, dc); } void vpx_dc_128_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t dc = vdup_n_u8(0x80); (void)above; (void)left; dc_store_8x8(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 16x16 static INLINE uint16x4_t dc_sum_16(const uint8_t *ref) { const uint8x16_t ref_u8 = vld1q_u8(ref); const uint16x8_t p0 = vpaddlq_u8(ref_u8); uint16x4_t sum = vadd_u16(vget_low_u16(p0), vget_high_u16(p0)); sum = vpadd_u16(sum, sum); return vpadd_u16(sum, sum); } static INLINE void dc_store_16x16(uint8_t *dst, ptrdiff_t stride, const uint8x8_t dc) { const uint8x16_t dc_dup = vdupq_lane_u8(dc, 0); int i; for (i = 0; i < 16; ++i, dst += stride) { vst1q_u8(dst, dc_dup); } } void vpx_dc_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t ref0 = vld1q_u8(above); const uint8x16_t ref1 = vld1q_u8(left); const uint16x8_t p0 = vpaddlq_u8(ref0); const uint16x8_t p1 = vpaddlq_u8(ref1); const uint16x8_t p2 = vaddq_u16(p0, p1); uint16x4_t sum = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); uint8x8_t dc; sum = vpadd_u16(sum, sum); sum = vpadd_u16(sum, sum); dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5)); dc_store_16x16(dst, stride, dc); } void vpx_dc_left_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_16(left); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4)); (void)above; dc_store_16x16(dst, stride, dc); } void vpx_dc_top_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_16(above); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 4)); (void)left; dc_store_16x16(dst, stride, dc); } void vpx_dc_128_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t dc = vdup_n_u8(0x80); (void)above; (void)left; dc_store_16x16(dst, stride, dc); } //------------------------------------------------------------------------------ // DC 32x32 static INLINE uint16x4_t dc_sum_32(const uint8_t *ref) { const uint8x16x2_t r = vld2q_u8(ref); const uint16x8_t p0 = vpaddlq_u8(r.val[0]); const uint16x8_t p1 = vpaddlq_u8(r.val[1]); const uint16x8_t p2 = vaddq_u16(p0, p1); uint16x4_t sum = vadd_u16(vget_low_u16(p2), vget_high_u16(p2)); sum = vpadd_u16(sum, sum); return vpadd_u16(sum, sum); } static INLINE void dc_store_32x32(uint8_t *dst, ptrdiff_t stride, const uint8x8_t dc) { uint8x16x2_t dc_dup; int i; dc_dup.val[0] = dc_dup.val[1] = vdupq_lane_u8(dc, 0); for (i = 0; i < 32; ++i, dst += stride) { vst2q_u8(dst, dc_dup); } } void vpx_dc_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16x2_t a = vld2q_u8(above); const uint8x16x2_t l = vld2q_u8(left); const uint16x8_t pa0 = vpaddlq_u8(a.val[0]); const uint16x8_t pl0 = vpaddlq_u8(l.val[0]); const uint16x8_t pa1 = vpaddlq_u8(a.val[1]); const uint16x8_t pl1 = vpaddlq_u8(l.val[1]); const uint16x8_t pa = vaddq_u16(pa0, pa1); const uint16x8_t pl = vaddq_u16(pl0, pl1); const uint16x8_t pal = vaddq_u16(pa, pl); uint16x4_t sum = vadd_u16(vget_low_u16(pal), vget_high_u16(pal)); uint8x8_t dc; sum = vpadd_u16(sum, sum); sum = vpadd_u16(sum, sum); dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 6)); dc_store_32x32(dst, stride, dc); } void vpx_dc_left_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_32(left); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5)); (void)above; dc_store_32x32(dst, stride, dc); } void vpx_dc_top_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint16x4_t sum = dc_sum_32(above); const uint8x8_t dc = vreinterpret_u8_u16(vrshr_n_u16(sum, 5)); (void)left; dc_store_32x32(dst, stride, dc); } void vpx_dc_128_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t dc = vdup_n_u8(0x80); (void)above; (void)left; dc_store_32x32(dst, stride, dc); } // ----------------------------------------------------------------------------- void vpx_d45_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t ABCDEFGH = vld1_u8(above); const uint64x1_t A1 = vshr_n_u64(vreinterpret_u64_u8(ABCDEFGH), 8); const uint64x1_t A2 = vshr_n_u64(vreinterpret_u64_u8(ABCDEFGH), 16); const uint8x8_t BCDEFGH0 = vreinterpret_u8_u64(A1); const uint8x8_t CDEFGH00 = vreinterpret_u8_u64(A2); const uint8x8_t avg1 = vhadd_u8(ABCDEFGH, CDEFGH00); const uint8x8_t avg2 = vrhadd_u8(avg1, BCDEFGH0); const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); const uint32x2_t r0 = vreinterpret_u32_u8(avg2); const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); const uint32x2_t r3 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); (void)left; vst1_lane_u32((uint32_t *)(dst + 0 * stride), r0, 0); vst1_lane_u32((uint32_t *)(dst + 1 * stride), r1, 0); vst1_lane_u32((uint32_t *)(dst + 2 * stride), r2, 0); vst1_lane_u32((uint32_t *)(dst + 3 * stride), r3, 0); vst1_lane_u8(dst + 3 * stride + 3, ABCDEFGH, 7); } static INLINE void d45_store_8(uint8_t **dst, const ptrdiff_t stride, const uint8x8_t above_right, uint8x8_t *row) { *row = vext_u8(*row, above_right, 1); vst1_u8(*dst, *row); *dst += stride; } void vpx_d45_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t A0 = vld1_u8(above); const uint8x8_t above_right = vdup_lane_u8(A0, 7); const uint8x8_t A1 = vext_u8(A0, above_right, 1); const uint8x8_t A2 = vext_u8(A0, above_right, 2); const uint8x8_t avg1 = vhadd_u8(A0, A2); uint8x8_t row = vrhadd_u8(avg1, A1); (void)left; vst1_u8(dst, row); dst += stride; d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); d45_store_8(&dst, stride, above_right, &row); vst1_u8(dst, above_right); } static INLINE void d45_store_16(uint8_t **dst, const ptrdiff_t stride, const uint8x16_t above_right, uint8x16_t *row) { *row = vextq_u8(*row, above_right, 1); vst1q_u8(*dst, *row); *dst += stride; } void vpx_d45_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t A0 = vld1q_u8(above); const uint8x16_t above_right = vdupq_lane_u8(vget_high_u8(A0), 7); const uint8x16_t A1 = vextq_u8(A0, above_right, 1); const uint8x16_t A2 = vextq_u8(A0, above_right, 2); const uint8x16_t avg1 = vhaddq_u8(A0, A2); uint8x16_t row = vrhaddq_u8(avg1, A1); (void)left; vst1q_u8(dst, row); dst += stride; d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); d45_store_16(&dst, stride, above_right, &row); vst1q_u8(dst, above_right); } void vpx_d45_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t A0_0 = vld1q_u8(above); const uint8x16_t A0_1 = vld1q_u8(above + 16); const uint8x16_t above_right = vdupq_lane_u8(vget_high_u8(A0_1), 7); const uint8x16_t A1_0 = vld1q_u8(above + 1); const uint8x16_t A1_1 = vld1q_u8(above + 17); const uint8x16_t A2_0 = vld1q_u8(above + 2); const uint8x16_t A2_1 = vld1q_u8(above + 18); const uint8x16_t avg_0 = vhaddq_u8(A0_0, A2_0); const uint8x16_t avg_1 = vhaddq_u8(A0_1, A2_1); uint8x16_t row_0 = vrhaddq_u8(avg_0, A1_0); uint8x16_t row_1 = vrhaddq_u8(avg_1, A1_1); int i; (void)left; vst1q_u8(dst, row_0); dst += 16; vst1q_u8(dst, row_1); dst += stride - 16; for (i = 0; i < 30; ++i) { row_0 = vextq_u8(row_0, row_1, 1); row_1 = vextq_u8(row_1, above_right, 1); vst1q_u8(dst, row_0); dst += 16; vst1q_u8(dst, row_1); dst += stride - 16; } vst1q_u8(dst, above_right); dst += 16; vst1q_u8(dst, row_1); } // ----------------------------------------------------------------------------- void vpx_d135_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t XA0123 = vld1_u8(above - 1); const uint8x8_t L0123 = vld1_u8(left); const uint8x8_t L3210 = vrev64_u8(L0123); const uint8x8_t L3210XA012 = vext_u8(L3210, XA0123, 4); const uint8x8_t L210XA0123 = vext_u8(L3210, XA0123, 5); const uint8x8_t L10XA0123_ = vreinterpret_u8_u64(vshr_n_u64(vreinterpret_u64_u8(L210XA0123), 8)); const uint8x8_t avg1 = vhadd_u8(L10XA0123_, L3210XA012); const uint8x8_t avg2 = vrhadd_u8(avg1, L210XA0123); const uint64x1_t avg2_u64 = vreinterpret_u64_u8(avg2); const uint32x2_t r3 = vreinterpret_u32_u8(avg2); const uint32x2_t r2 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 8)); const uint32x2_t r1 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 16)); const uint32x2_t r0 = vreinterpret_u32_u64(vshr_n_u64(avg2_u64, 24)); vst1_lane_u32((uint32_t *)dst, r0, 0); dst += stride; vst1_lane_u32((uint32_t *)dst, r1, 0); dst += stride; vst1_lane_u32((uint32_t *)dst, r2, 0); dst += stride; vst1_lane_u32((uint32_t *)dst, r3, 0); } void vpx_d135_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t XA0123456 = vld1_u8(above - 1); const uint8x8_t A01234567 = vld1_u8(above); const uint8x8_t A1234567_ = vld1_u8(above + 1); const uint8x8_t L01234567 = vld1_u8(left); const uint8x8_t L76543210 = vrev64_u8(L01234567); const uint8x8_t L6543210X = vext_u8(L76543210, XA0123456, 1); const uint8x8_t L543210XA0 = vext_u8(L76543210, XA0123456, 2); const uint8x16_t L76543210XA0123456 = vcombine_u8(L76543210, XA0123456); const uint8x16_t L6543210XA01234567 = vcombine_u8(L6543210X, A01234567); const uint8x16_t L543210XA01234567_ = vcombine_u8(L543210XA0, A1234567_); const uint8x16_t avg = vhaddq_u8(L76543210XA0123456, L543210XA01234567_); const uint8x16_t row = vrhaddq_u8(avg, L6543210XA01234567); const uint8x8_t row_0 = vget_low_u8(row); const uint8x8_t row_1 = vget_high_u8(row); const uint8x8_t r0 = vext_u8(row_0, row_1, 7); const uint8x8_t r1 = vext_u8(row_0, row_1, 6); const uint8x8_t r2 = vext_u8(row_0, row_1, 5); const uint8x8_t r3 = vext_u8(row_0, row_1, 4); const uint8x8_t r4 = vext_u8(row_0, row_1, 3); const uint8x8_t r5 = vext_u8(row_0, row_1, 2); const uint8x8_t r6 = vext_u8(row_0, row_1, 1); vst1_u8(dst, r0); dst += stride; vst1_u8(dst, r1); dst += stride; vst1_u8(dst, r2); dst += stride; vst1_u8(dst, r3); dst += stride; vst1_u8(dst, r4); dst += stride; vst1_u8(dst, r5); dst += stride; vst1_u8(dst, r6); dst += stride; vst1_u8(dst, row_0); } static INLINE void d135_store_16x8( uint8_t **dst, const ptrdiff_t stride, const uint8x16_t row_0, const uint8x16_t row_1, const uint8x16_t row_2, const uint8x16_t row_3, const uint8x16_t row_4, const uint8x16_t row_5, const uint8x16_t row_6, const uint8x16_t row_7) { vst1q_u8(*dst, row_0); *dst += stride; vst1q_u8(*dst, row_1); *dst += stride; vst1q_u8(*dst, row_2); *dst += stride; vst1q_u8(*dst, row_3); *dst += stride; vst1q_u8(*dst, row_4); *dst += stride; vst1q_u8(*dst, row_5); *dst += stride; vst1q_u8(*dst, row_6); *dst += stride; vst1q_u8(*dst, row_7); *dst += stride; } void vpx_d135_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t XA0123456789abcde = vld1q_u8(above - 1); const uint8x16_t A0123456789abcdef = vld1q_u8(above); const uint8x16_t A123456789abcdef_ = vld1q_u8(above + 1); const uint8x16_t L0123456789abcdef = vld1q_u8(left); const uint8x8_t L76543210 = vrev64_u8(vget_low_u8(L0123456789abcdef)); const uint8x8_t Lfedcba98 = vrev64_u8(vget_high_u8(L0123456789abcdef)); const uint8x16_t Lfedcba9876543210 = vcombine_u8(Lfedcba98, L76543210); const uint8x16_t Ledcba9876543210X = vextq_u8(Lfedcba9876543210, XA0123456789abcde, 1); const uint8x16_t Ldcba9876543210XA0 = vextq_u8(Lfedcba9876543210, XA0123456789abcde, 2); const uint8x16_t avg_0 = vhaddq_u8(Lfedcba9876543210, Ldcba9876543210XA0); const uint8x16_t avg_1 = vhaddq_u8(XA0123456789abcde, A123456789abcdef_); const uint8x16_t row_0 = vrhaddq_u8(avg_0, Ledcba9876543210X); const uint8x16_t row_1 = vrhaddq_u8(avg_1, A0123456789abcdef); const uint8x16_t r_0 = vextq_u8(row_0, row_1, 15); const uint8x16_t r_1 = vextq_u8(row_0, row_1, 14); const uint8x16_t r_2 = vextq_u8(row_0, row_1, 13); const uint8x16_t r_3 = vextq_u8(row_0, row_1, 12); const uint8x16_t r_4 = vextq_u8(row_0, row_1, 11); const uint8x16_t r_5 = vextq_u8(row_0, row_1, 10); const uint8x16_t r_6 = vextq_u8(row_0, row_1, 9); const uint8x16_t r_7 = vcombine_u8(vget_high_u8(row_0), vget_low_u8(row_1)); const uint8x16_t r_8 = vextq_u8(row_0, row_1, 7); const uint8x16_t r_9 = vextq_u8(row_0, row_1, 6); const uint8x16_t r_a = vextq_u8(row_0, row_1, 5); const uint8x16_t r_b = vextq_u8(row_0, row_1, 4); const uint8x16_t r_c = vextq_u8(row_0, row_1, 3); const uint8x16_t r_d = vextq_u8(row_0, row_1, 2); const uint8x16_t r_e = vextq_u8(row_0, row_1, 1); d135_store_16x8(&dst, stride, r_0, r_1, r_2, r_3, r_4, r_5, r_6, r_7); d135_store_16x8(&dst, stride, r_8, r_9, r_a, r_b, r_c, r_d, r_e, row_0); } static INLINE void d135_store_32x2(uint8_t **dst, const ptrdiff_t stride, const uint8x16_t row_0, const uint8x16_t row_1, const uint8x16_t row_2) { uint8_t *dst2 = *dst; vst1q_u8(dst2, row_1); dst2 += 16; vst1q_u8(dst2, row_2); dst2 += 16 * stride - 16; vst1q_u8(dst2, row_0); dst2 += 16; vst1q_u8(dst2, row_1); *dst += stride; } void vpx_d135_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t LL0123456789abcdef = vld1q_u8(left + 16); const uint8x16_t LU0123456789abcdef = vld1q_u8(left); const uint8x8_t LL76543210 = vrev64_u8(vget_low_u8(LL0123456789abcdef)); const uint8x8_t LU76543210 = vrev64_u8(vget_low_u8(LU0123456789abcdef)); const uint8x8_t LLfedcba98 = vrev64_u8(vget_high_u8(LL0123456789abcdef)); const uint8x8_t LUfedcba98 = vrev64_u8(vget_high_u8(LU0123456789abcdef)); const uint8x16_t LLfedcba9876543210 = vcombine_u8(LLfedcba98, LL76543210); const uint8x16_t LUfedcba9876543210 = vcombine_u8(LUfedcba98, LU76543210); const uint8x16_t LLedcba9876543210Uf = vextq_u8(LLfedcba9876543210, LUfedcba9876543210, 1); const uint8x16_t LLdcba9876543210Ufe = vextq_u8(LLfedcba9876543210, LUfedcba9876543210, 2); const uint8x16_t avg_0 = vhaddq_u8(LLfedcba9876543210, LLdcba9876543210Ufe); const uint8x16_t row_0 = vrhaddq_u8(avg_0, LLedcba9876543210Uf); const uint8x16_t XAL0123456789abcde = vld1q_u8(above - 1); const uint8x16_t LUedcba9876543210X = vextq_u8(LUfedcba9876543210, XAL0123456789abcde, 1); const uint8x16_t LUdcba9876543210XA0 = vextq_u8(LUfedcba9876543210, XAL0123456789abcde, 2); const uint8x16_t avg_1 = vhaddq_u8(LUfedcba9876543210, LUdcba9876543210XA0); const uint8x16_t row_1 = vrhaddq_u8(avg_1, LUedcba9876543210X); const uint8x16_t AL0123456789abcdef = vld1q_u8(above); const uint8x16_t AL123456789abcdefg = vld1q_u8(above + 1); const uint8x16_t ALfR0123456789abcde = vld1q_u8(above + 15); const uint8x16_t AR0123456789abcdef = vld1q_u8(above + 16); const uint8x16_t AR123456789abcdef_ = vld1q_u8(above + 17); const uint8x16_t avg_2 = vhaddq_u8(XAL0123456789abcde, AL123456789abcdefg); const uint8x16_t row_2 = vrhaddq_u8(avg_2, AL0123456789abcdef); const uint8x16_t avg_3 = vhaddq_u8(ALfR0123456789abcde, AR123456789abcdef_); const uint8x16_t row_3 = vrhaddq_u8(avg_3, AR0123456789abcdef); { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 15); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 15); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 15); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 14); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 14); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 14); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 13); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 13); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 13); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 12); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 12); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 12); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 11); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 11); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 11); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 10); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 10); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 10); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 9); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 9); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 9); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 8); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 8); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 8); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 7); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 7); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 7); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 6); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 6); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 6); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 5); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 5); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 5); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 4); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 4); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 4); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 3); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 3); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 3); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 2); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 2); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 2); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } { const uint8x16_t r_0 = vextq_u8(row_0, row_1, 1); const uint8x16_t r_1 = vextq_u8(row_1, row_2, 1); const uint8x16_t r_2 = vextq_u8(row_2, row_3, 1); d135_store_32x2(&dst, stride, r_0, r_1, r_2); } d135_store_32x2(&dst, stride, row_0, row_1, row_2); } #if !HAVE_NEON_ASM void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint32_t d = *(const uint32_t *)above; int i; (void)left; for (i = 0; i < 4; i++, dst += stride) { *(uint32_t *)dst = d; } } void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t d = vld1_u8(above); int i; (void)left; for (i = 0; i < 8; i++, dst += stride) { vst1_u8(dst, d); } } void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vld1q_u8(above); int i; (void)left; for (i = 0; i < 16; i++, dst += stride) { vst1q_u8(dst, d); } } void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d0 = vld1q_u8(above); const uint8x16_t d1 = vld1q_u8(above + 16); int i; (void)left; for (i = 0; i < 32; i++) { // Note: performance was worse using vst2q_u8 under gcc-4.9 & clang-3.8. // clang-3.8 unrolled the loop fully with no filler so the cause is likely // the latency of the instruction. vst1q_u8(dst, d0); dst += 16; vst1q_u8(dst, d1); dst += stride - 16; } } // ----------------------------------------------------------------------------- void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint32x2_t zero = vdup_n_u32(0); const uint8x8_t left_u8 = vreinterpret_u8_u32(vld1_lane_u32((const uint32_t *)left, zero, 0)); uint8x8_t d; (void)above; d = vdup_lane_u8(left_u8, 0); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0); dst += stride; d = vdup_lane_u8(left_u8, 1); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0); dst += stride; d = vdup_lane_u8(left_u8, 2); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0); dst += stride; d = vdup_lane_u8(left_u8, 3); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0); } void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t left_u8 = vld1_u8(left); uint8x8_t d; (void)above; d = vdup_lane_u8(left_u8, 0); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 1); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 2); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 3); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 4); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 5); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 6); vst1_u8(dst, d); dst += stride; d = vdup_lane_u8(left_u8, 7); vst1_u8(dst, d); } static INLINE void h_store_16x8(uint8_t **dst, const ptrdiff_t stride, const uint8x8_t left) { const uint8x16_t row_0 = vdupq_lane_u8(left, 0); const uint8x16_t row_1 = vdupq_lane_u8(left, 1); const uint8x16_t row_2 = vdupq_lane_u8(left, 2); const uint8x16_t row_3 = vdupq_lane_u8(left, 3); const uint8x16_t row_4 = vdupq_lane_u8(left, 4); const uint8x16_t row_5 = vdupq_lane_u8(left, 5); const uint8x16_t row_6 = vdupq_lane_u8(left, 6); const uint8x16_t row_7 = vdupq_lane_u8(left, 7); vst1q_u8(*dst, row_0); *dst += stride; vst1q_u8(*dst, row_1); *dst += stride; vst1q_u8(*dst, row_2); *dst += stride; vst1q_u8(*dst, row_3); *dst += stride; vst1q_u8(*dst, row_4); *dst += stride; vst1q_u8(*dst, row_5); *dst += stride; vst1q_u8(*dst, row_6); *dst += stride; vst1q_u8(*dst, row_7); *dst += stride; } void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t left_u8q = vld1q_u8(left); (void)above; h_store_16x8(&dst, stride, vget_low_u8(left_u8q)); h_store_16x8(&dst, stride, vget_high_u8(left_u8q)); } static INLINE void h_store_32x8(uint8_t **dst, const ptrdiff_t stride, const uint8x8_t left) { const uint8x16_t row_0 = vdupq_lane_u8(left, 0); const uint8x16_t row_1 = vdupq_lane_u8(left, 1); const uint8x16_t row_2 = vdupq_lane_u8(left, 2); const uint8x16_t row_3 = vdupq_lane_u8(left, 3); const uint8x16_t row_4 = vdupq_lane_u8(left, 4); const uint8x16_t row_5 = vdupq_lane_u8(left, 5); const uint8x16_t row_6 = vdupq_lane_u8(left, 6); const uint8x16_t row_7 = vdupq_lane_u8(left, 7); vst1q_u8(*dst, row_0); // Note clang-3.8 produced poor code w/vst2q_u8 *dst += 16; vst1q_u8(*dst, row_0); *dst += stride - 16; vst1q_u8(*dst, row_1); *dst += 16; vst1q_u8(*dst, row_1); *dst += stride - 16; vst1q_u8(*dst, row_2); *dst += 16; vst1q_u8(*dst, row_2); *dst += stride - 16; vst1q_u8(*dst, row_3); *dst += 16; vst1q_u8(*dst, row_3); *dst += stride - 16; vst1q_u8(*dst, row_4); *dst += 16; vst1q_u8(*dst, row_4); *dst += stride - 16; vst1q_u8(*dst, row_5); *dst += 16; vst1q_u8(*dst, row_5); *dst += stride - 16; vst1q_u8(*dst, row_6); *dst += 16; vst1q_u8(*dst, row_6); *dst += stride - 16; vst1q_u8(*dst, row_7); *dst += 16; vst1q_u8(*dst, row_7); *dst += stride - 16; } void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int i; (void)above; for (i = 0; i < 2; i++, left += 16) { const uint8x16_t left_u8 = vld1q_u8(left); h_store_32x8(&dst, stride, vget_low_u8(left_u8)); h_store_32x8(&dst, stride, vget_high_u8(left_u8)); } } // ----------------------------------------------------------------------------- static INLINE int16x8_t convert_u8_to_s16(uint8x8_t v) { return vreinterpretq_s16_u16(vmovl_u8(v)); } void vpx_tm_predictor_4x4_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t top_left = vld1_dup_u8(above - 1); const uint8x8_t left_u8 = vld1_u8(left); const uint8x8_t above_u8 = vld1_u8(above); const int16x4_t left_s16 = vget_low_s16(convert_u8_to_s16(left_u8)); int16x8_t sub, sum; uint32x2_t d; sub = vreinterpretq_s16_u16(vsubl_u8(above_u8, top_left)); // Avoid vcombine_s16() which generates lots of redundant code with clang-3.8. sub = vreinterpretq_s16_s64( vdupq_lane_s64(vreinterpret_s64_s16(vget_low_s16(sub)), 0)); sum = vcombine_s16(vdup_lane_s16(left_s16, 0), vdup_lane_s16(left_s16, 1)); sum = vaddq_s16(sum, sub); d = vreinterpret_u32_u8(vqmovun_s16(sum)); vst1_lane_u32((uint32_t *)dst, d, 0); dst += stride; vst1_lane_u32((uint32_t *)dst, d, 1); dst += stride; sum = vcombine_s16(vdup_lane_s16(left_s16, 2), vdup_lane_s16(left_s16, 3)); sum = vaddq_s16(sum, sub); d = vreinterpret_u32_u8(vqmovun_s16(sum)); vst1_lane_u32((uint32_t *)dst, d, 0); dst += stride; vst1_lane_u32((uint32_t *)dst, d, 1); } static INLINE void tm_8_kernel(uint8_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub) { const int16x8_t sum = vaddq_s16(left_dup, sub); const uint8x8_t d = vqmovun_s16(sum); vst1_u8(*dst, d); *dst += stride; } void vpx_tm_predictor_8x8_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x8_t top_left = vld1_dup_u8(above - 1); const uint8x8_t above_u8 = vld1_u8(above); const uint8x8_t left_u8 = vld1_u8(left); const int16x8_t left_s16q = convert_u8_to_s16(left_u8); const int16x8_t sub = vreinterpretq_s16_u16(vsubl_u8(above_u8, top_left)); int16x4_t left_s16d = vget_low_s16(left_s16q); int i; for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16q)) { int16x8_t left_dup; left_dup = vdupq_lane_s16(left_s16d, 0); tm_8_kernel(&dst, stride, left_dup, sub); left_dup = vdupq_lane_s16(left_s16d, 1); tm_8_kernel(&dst, stride, left_dup, sub); left_dup = vdupq_lane_s16(left_s16d, 2); tm_8_kernel(&dst, stride, left_dup, sub); left_dup = vdupq_lane_s16(left_s16d, 3); tm_8_kernel(&dst, stride, left_dup, sub); } } static INLINE void tm_16_kernel(uint8_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub0, const int16x8_t sub1) { const int16x8_t sum0 = vaddq_s16(left_dup, sub0); const int16x8_t sum1 = vaddq_s16(left_dup, sub1); const uint8x8_t d0 = vqmovun_s16(sum0); const uint8x8_t d1 = vqmovun_s16(sum1); vst1_u8(*dst, d0); *dst += 8; vst1_u8(*dst, d1); *dst += stride - 8; } void vpx_tm_predictor_16x16_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t top_left = vld1q_dup_u8(above - 1); const uint8x16_t above_u8 = vld1q_u8(above); const int16x8_t sub0 = vreinterpretq_s16_u16( vsubl_u8(vget_low_u8(above_u8), vget_low_u8(top_left))); const int16x8_t sub1 = vreinterpretq_s16_u16( vsubl_u8(vget_high_u8(above_u8), vget_high_u8(top_left))); int16x8_t left_dup; int i; for (i = 0; i < 2; i++, left += 8) { const uint8x8_t left_u8 = vld1_u8(left); const int16x8_t left_s16q = convert_u8_to_s16(left_u8); const int16x4_t left_low = vget_low_s16(left_s16q); const int16x4_t left_high = vget_high_s16(left_s16q); left_dup = vdupq_lane_s16(left_low, 0); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_low, 1); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_low, 2); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_low, 3); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_high, 0); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_high, 1); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_high, 2); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); left_dup = vdupq_lane_s16(left_high, 3); tm_16_kernel(&dst, stride, left_dup, sub0, sub1); } } static INLINE void tm_32_kernel(uint8_t **dst, const ptrdiff_t stride, const int16x8_t left_dup, const int16x8_t sub0, const int16x8_t sub1, const int16x8_t sub2, const int16x8_t sub3) { const int16x8_t sum0 = vaddq_s16(left_dup, sub0); const int16x8_t sum1 = vaddq_s16(left_dup, sub1); const int16x8_t sum2 = vaddq_s16(left_dup, sub2); const int16x8_t sum3 = vaddq_s16(left_dup, sub3); const uint8x8_t d0 = vqmovun_s16(sum0); const uint8x8_t d1 = vqmovun_s16(sum1); const uint8x8_t d2 = vqmovun_s16(sum2); const uint8x8_t d3 = vqmovun_s16(sum3); vst1q_u8(*dst, vcombine_u8(d0, d1)); *dst += 16; vst1q_u8(*dst, vcombine_u8(d2, d3)); *dst += stride - 16; } void vpx_tm_predictor_32x32_neon(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t top_left = vld1q_dup_u8(above - 1); const uint8x16_t above_low = vld1q_u8(above); const uint8x16_t above_high = vld1q_u8(above + 16); const int16x8_t sub0 = vreinterpretq_s16_u16( vsubl_u8(vget_low_u8(above_low), vget_low_u8(top_left))); const int16x8_t sub1 = vreinterpretq_s16_u16( vsubl_u8(vget_high_u8(above_low), vget_high_u8(top_left))); const int16x8_t sub2 = vreinterpretq_s16_u16( vsubl_u8(vget_low_u8(above_high), vget_low_u8(top_left))); const int16x8_t sub3 = vreinterpretq_s16_u16( vsubl_u8(vget_high_u8(above_high), vget_high_u8(top_left))); int16x8_t left_dup; int i, j; for (j = 0; j < 4; j++, left += 8) { const uint8x8_t left_u8 = vld1_u8(left); const int16x8_t left_s16q = convert_u8_to_s16(left_u8); int16x4_t left_s16d = vget_low_s16(left_s16q); for (i = 0; i < 2; i++, left_s16d = vget_high_s16(left_s16q)) { left_dup = vdupq_lane_s16(left_s16d, 0); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); left_dup = vdupq_lane_s16(left_s16d, 1); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); left_dup = vdupq_lane_s16(left_s16d, 2); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); left_dup = vdupq_lane_s16(left_s16d, 3); tm_32_kernel(&dst, stride, left_dup, sub0, sub1, sub2, sub3); } } } #endif // !HAVE_NEON_ASM libvpx-1.8.2/vpx_dsp/arm/intrapred_neon_asm.asm000066400000000000000000000506201357355204000216500ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_v_predictor_4x4_neon| EXPORT |vpx_v_predictor_8x8_neon| EXPORT |vpx_v_predictor_16x16_neon| EXPORT |vpx_v_predictor_32x32_neon| EXPORT |vpx_h_predictor_4x4_neon| EXPORT |vpx_h_predictor_8x8_neon| EXPORT |vpx_h_predictor_16x16_neon| EXPORT |vpx_h_predictor_32x32_neon| EXPORT |vpx_tm_predictor_4x4_neon| EXPORT |vpx_tm_predictor_8x8_neon| EXPORT |vpx_tm_predictor_16x16_neon| EXPORT |vpx_tm_predictor_32x32_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 ;void vpx_v_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_v_predictor_4x4_neon| PROC vld1.32 {d0[0]}, [r2] vst1.32 {d0[0]}, [r0], r1 vst1.32 {d0[0]}, [r0], r1 vst1.32 {d0[0]}, [r0], r1 vst1.32 {d0[0]}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_4x4_neon| ;void vpx_v_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_v_predictor_8x8_neon| PROC vld1.8 {d0}, [r2] vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 vst1.8 {d0}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_8x8_neon| ;void vpx_v_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_v_predictor_16x16_neon| PROC vld1.8 {q0}, [r2] vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 vst1.8 {q0}, [r0], r1 bx lr ENDP ; |vpx_v_predictor_16x16_neon| ;void vpx_v_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_v_predictor_32x32_neon| PROC vld1.8 {q0, q1}, [r2] mov r2, #2 loop_v vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 vst1.8 {q0, q1}, [r0], r1 subs r2, r2, #1 bgt loop_v bx lr ENDP ; |vpx_v_predictor_32x32_neon| ;void vpx_h_predictor_4x4_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_h_predictor_4x4_neon| PROC vld1.32 {d1[0]}, [r3] vdup.8 d0, d1[0] vst1.32 {d0[0]}, [r0], r1 vdup.8 d0, d1[1] vst1.32 {d0[0]}, [r0], r1 vdup.8 d0, d1[2] vst1.32 {d0[0]}, [r0], r1 vdup.8 d0, d1[3] vst1.32 {d0[0]}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_4x4_neon| ;void vpx_h_predictor_8x8_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_h_predictor_8x8_neon| PROC vld1.64 {d1}, [r3] vdup.8 d0, d1[0] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[1] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[2] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[3] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[4] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[5] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[6] vst1.64 {d0}, [r0], r1 vdup.8 d0, d1[7] vst1.64 {d0}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_8x8_neon| ;void vpx_h_predictor_16x16_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_h_predictor_16x16_neon| PROC vld1.8 {q1}, [r3] vdup.8 q0, d2[0] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[1] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[2] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[3] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[4] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[5] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[6] vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[7] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[0] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[1] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[2] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[3] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[4] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[5] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[6] vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[7] vst1.8 {q0}, [r0], r1 bx lr ENDP ; |vpx_h_predictor_16x16_neon| ;void vpx_h_predictor_32x32_neon(uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_h_predictor_32x32_neon| PROC sub r1, r1, #16 mov r2, #2 loop_h vld1.8 {q1}, [r3]! vdup.8 q0, d2[0] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[1] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[2] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[3] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[4] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[5] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[6] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d2[7] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[0] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[1] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[2] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[3] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[4] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[5] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[6] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 vdup.8 q0, d3[7] vst1.8 {q0}, [r0]! vst1.8 {q0}, [r0], r1 subs r2, r2, #1 bgt loop_h bx lr ENDP ; |vpx_h_predictor_32x32_neon| ;void vpx_tm_predictor_4x4_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_tm_predictor_4x4_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 vld1.u8 {d0[]}, [r12] ; Load above 4 pixels vld1.32 {d2[0]}, [r2] ; Compute above - ytop_left vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row vld1.u8 {d2[]}, [r3]! vld1.u8 {d4[]}, [r3]! vmovl.u8 q1, d2 vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 vqmovun.s16 d1, q2 vst1.32 {d0[0]}, [r0], r1 vst1.32 {d1[0]}, [r0], r1 ; 3rd row and 4th row vld1.u8 {d2[]}, [r3]! vld1.u8 {d4[]}, [r3] vmovl.u8 q1, d2 vmovl.u8 q2, d4 vadd.s16 q1, q1, q3 vadd.s16 q2, q2, q3 vqmovun.s16 d0, q1 vqmovun.s16 d1, q2 vst1.32 {d0[0]}, [r0], r1 vst1.32 {d1[0]}, [r0], r1 bx lr ENDP ; |vpx_tm_predictor_4x4_neon| ;void vpx_tm_predictor_8x8_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_tm_predictor_8x8_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 vld1.8 {d0[]}, [r12] ; preload 8 left vld1.8 {d30}, [r3] ; Load above 8 pixels vld1.64 {d2}, [r2] vmovl.u8 q10, d30 ; Compute above - ytop_left vsubl.u8 q3, d2, d0 ; Load left row by row and compute left + (above - ytop_left) ; 1st row and 2nd row vdup.16 q0, d20[0] vdup.16 q1, d20[1] vadd.s16 q0, q3, q0 vadd.s16 q1, q3, q1 ; 3rd row and 4th row vdup.16 q8, d20[2] vdup.16 q9, d20[3] vadd.s16 q8, q3, q8 vadd.s16 q9, q3, q9 vqmovun.s16 d0, q0 vqmovun.s16 d1, q1 vqmovun.s16 d2, q8 vqmovun.s16 d3, q9 vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 vst1.64 {d2}, [r0], r1 vst1.64 {d3}, [r0], r1 ; 5th row and 6th row vdup.16 q0, d21[0] vdup.16 q1, d21[1] vadd.s16 q0, q3, q0 vadd.s16 q1, q3, q1 ; 7th row and 8th row vdup.16 q8, d21[2] vdup.16 q9, d21[3] vadd.s16 q8, q3, q8 vadd.s16 q9, q3, q9 vqmovun.s16 d0, q0 vqmovun.s16 d1, q1 vqmovun.s16 d2, q8 vqmovun.s16 d3, q9 vst1.64 {d0}, [r0], r1 vst1.64 {d1}, [r0], r1 vst1.64 {d2}, [r0], r1 vst1.64 {d3}, [r0], r1 bx lr ENDP ; |vpx_tm_predictor_8x8_neon| ;void vpx_tm_predictor_16x16_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_tm_predictor_16x16_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 vld1.8 {d0[]}, [r12] ; Load above 8 pixels vld1.8 {q1}, [r2] ; preload 8 left into r12 vld1.8 {d18}, [r3]! ; Compute above - ytop_left vsubl.u8 q2, d2, d0 vsubl.u8 q3, d3, d0 vmovl.u8 q10, d18 ; Load left row by row and compute left + (above - ytop_left) ; Process 8 rows in each single loop and loop 2 times to process 16 rows. mov r2, #2 loop_16x16_neon ; Process two rows. vdup.16 q0, d20[0] vdup.16 q8, d20[1] vadd.s16 q1, q0, q2 vadd.s16 q0, q0, q3 vadd.s16 q11, q8, q2 vadd.s16 q8, q8, q3 vqmovun.s16 d2, q1 vqmovun.s16 d3, q0 vqmovun.s16 d22, q11 vqmovun.s16 d23, q8 vdup.16 q0, d20[2] ; proload next 2 rows data vdup.16 q8, d20[3] vst1.64 {d2,d3}, [r0], r1 vst1.64 {d22,d23}, [r0], r1 ; Process two rows. vadd.s16 q1, q0, q2 vadd.s16 q0, q0, q3 vadd.s16 q11, q8, q2 vadd.s16 q8, q8, q3 vqmovun.s16 d2, q1 vqmovun.s16 d3, q0 vqmovun.s16 d22, q11 vqmovun.s16 d23, q8 vdup.16 q0, d21[0] ; proload next 2 rows data vdup.16 q8, d21[1] vst1.64 {d2,d3}, [r0], r1 vst1.64 {d22,d23}, [r0], r1 vadd.s16 q1, q0, q2 vadd.s16 q0, q0, q3 vadd.s16 q11, q8, q2 vadd.s16 q8, q8, q3 vqmovun.s16 d2, q1 vqmovun.s16 d3, q0 vqmovun.s16 d22, q11 vqmovun.s16 d23, q8 vdup.16 q0, d21[2] ; proload next 2 rows data vdup.16 q8, d21[3] vst1.64 {d2,d3}, [r0], r1 vst1.64 {d22,d23}, [r0], r1 vadd.s16 q1, q0, q2 vadd.s16 q0, q0, q3 vadd.s16 q11, q8, q2 vadd.s16 q8, q8, q3 vqmovun.s16 d2, q1 vqmovun.s16 d3, q0 vqmovun.s16 d22, q11 vqmovun.s16 d23, q8 vld1.8 {d18}, [r3]! ; preload 8 left into r12 vmovl.u8 q10, d18 vst1.64 {d2,d3}, [r0], r1 vst1.64 {d22,d23}, [r0], r1 subs r2, r2, #1 bgt loop_16x16_neon bx lr ENDP ; |vpx_tm_predictor_16x16_neon| ;void vpx_tm_predictor_32x32_neon (uint8_t *dst, ptrdiff_t y_stride, ; const uint8_t *above, ; const uint8_t *left) ; r0 uint8_t *dst ; r1 ptrdiff_t y_stride ; r2 const uint8_t *above ; r3 const uint8_t *left |vpx_tm_predictor_32x32_neon| PROC ; Load ytop_left = above[-1]; sub r12, r2, #1 vld1.8 {d0[]}, [r12] ; Load above 32 pixels vld1.8 {q1}, [r2]! vld1.8 {q2}, [r2] ; preload 8 left pixels vld1.8 {d26}, [r3]! ; Compute above - ytop_left vsubl.u8 q8, d2, d0 vsubl.u8 q9, d3, d0 vsubl.u8 q10, d4, d0 vsubl.u8 q11, d5, d0 vmovl.u8 q3, d26 ; Load left row by row and compute left + (above - ytop_left) ; Process 8 rows in each single loop and loop 4 times to process 32 rows. mov r2, #4 loop_32x32_neon ; Process two rows. vdup.16 q0, d6[0] vdup.16 q2, d6[1] vadd.s16 q12, q0, q8 vadd.s16 q13, q0, q9 vadd.s16 q14, q0, q10 vadd.s16 q15, q0, q11 vqmovun.s16 d0, q12 vqmovun.s16 d1, q13 vadd.s16 q12, q2, q8 vadd.s16 q13, q2, q9 vqmovun.s16 d2, q14 vqmovun.s16 d3, q15 vadd.s16 q14, q2, q10 vadd.s16 q15, q2, q11 vst1.64 {d0-d3}, [r0], r1 vqmovun.s16 d24, q12 vqmovun.s16 d25, q13 vqmovun.s16 d26, q14 vqmovun.s16 d27, q15 vdup.16 q1, d6[2] vdup.16 q2, d6[3] vst1.64 {d24-d27}, [r0], r1 ; Process two rows. vadd.s16 q12, q1, q8 vadd.s16 q13, q1, q9 vadd.s16 q14, q1, q10 vadd.s16 q15, q1, q11 vqmovun.s16 d0, q12 vqmovun.s16 d1, q13 vadd.s16 q12, q2, q8 vadd.s16 q13, q2, q9 vqmovun.s16 d2, q14 vqmovun.s16 d3, q15 vadd.s16 q14, q2, q10 vadd.s16 q15, q2, q11 vst1.64 {d0-d3}, [r0], r1 vqmovun.s16 d24, q12 vqmovun.s16 d25, q13 vqmovun.s16 d26, q14 vqmovun.s16 d27, q15 vdup.16 q0, d7[0] vdup.16 q2, d7[1] vst1.64 {d24-d27}, [r0], r1 ; Process two rows. vadd.s16 q12, q0, q8 vadd.s16 q13, q0, q9 vadd.s16 q14, q0, q10 vadd.s16 q15, q0, q11 vqmovun.s16 d0, q12 vqmovun.s16 d1, q13 vadd.s16 q12, q2, q8 vadd.s16 q13, q2, q9 vqmovun.s16 d2, q14 vqmovun.s16 d3, q15 vadd.s16 q14, q2, q10 vadd.s16 q15, q2, q11 vst1.64 {d0-d3}, [r0], r1 vqmovun.s16 d24, q12 vqmovun.s16 d25, q13 vqmovun.s16 d26, q14 vqmovun.s16 d27, q15 vdup.16 q0, d7[2] vdup.16 q2, d7[3] vst1.64 {d24-d27}, [r0], r1 ; Process two rows. vadd.s16 q12, q0, q8 vadd.s16 q13, q0, q9 vadd.s16 q14, q0, q10 vadd.s16 q15, q0, q11 vqmovun.s16 d0, q12 vqmovun.s16 d1, q13 vadd.s16 q12, q2, q8 vadd.s16 q13, q2, q9 vqmovun.s16 d2, q14 vqmovun.s16 d3, q15 vadd.s16 q14, q2, q10 vadd.s16 q15, q2, q11 vst1.64 {d0-d3}, [r0], r1 vqmovun.s16 d24, q12 vqmovun.s16 d25, q13 vld1.8 {d0}, [r3]! ; preload 8 left pixels vqmovun.s16 d26, q14 vqmovun.s16 d27, q15 vmovl.u8 q3, d0 vst1.64 {d24-d27}, [r0], r1 subs r2, r2, #1 bgt loop_32x32_neon bx lr ENDP ; |vpx_tm_predictor_32x32_neon| END libvpx-1.8.2/vpx_dsp/arm/loopfilter_16_neon.asm000066400000000000000000000541611357355204000215110ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_lpf_horizontal_16_neon| EXPORT |vpx_lpf_horizontal_16_dual_neon| EXPORT |vpx_lpf_vertical_16_neon| EXPORT |vpx_lpf_vertical_16_dual_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 ; void mb_lpf_horizontal_edge(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh, ; int count) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; r12 int count |mb_lpf_horizontal_edge| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh h_count vld1.8 {d16[]}, [r2] ; load *blimit vld1.8 {d17[]}, [r3] ; load *limit vld1.8 {d18[]}, [r4] ; load *thresh sub r8, r0, r1, lsl #3 ; move src pointer down by 8 lines vld1.u8 {d0}, [r8@64], r1 ; p7 vld1.u8 {d1}, [r8@64], r1 ; p6 vld1.u8 {d2}, [r8@64], r1 ; p5 vld1.u8 {d3}, [r8@64], r1 ; p4 vld1.u8 {d4}, [r8@64], r1 ; p3 vld1.u8 {d5}, [r8@64], r1 ; p2 vld1.u8 {d6}, [r8@64], r1 ; p1 vld1.u8 {d7}, [r8@64], r1 ; p0 vld1.u8 {d8}, [r8@64], r1 ; q0 vld1.u8 {d9}, [r8@64], r1 ; q1 vld1.u8 {d10}, [r8@64], r1 ; q2 vld1.u8 {d11}, [r8@64], r1 ; q3 vld1.u8 {d12}, [r8@64], r1 ; q4 vld1.u8 {d13}, [r8@64], r1 ; q5 vld1.u8 {d14}, [r8@64], r1 ; q6 vld1.u8 {d15}, [r8@64], r1 ; q7 bl vpx_wide_mbfilter_neon tst r7, #1 beq h_mbfilter ; flat && mask were not set for any of the channels. Just store the values ; from filter. sub r8, r0, r1, lsl #1 vst1.u8 {d25}, [r8@64], r1 ; store op1 vst1.u8 {d24}, [r8@64], r1 ; store op0 vst1.u8 {d23}, [r8@64], r1 ; store oq0 vst1.u8 {d26}, [r8@64], r1 ; store oq1 b h_next h_mbfilter tst r7, #2 beq h_wide_mbfilter ; flat2 was not set for any of the channels. Just store the values from ; mbfilter. sub r8, r0, r1, lsl #1 sub r8, r8, r1 vst1.u8 {d18}, [r8@64], r1 ; store op2 vst1.u8 {d19}, [r8@64], r1 ; store op1 vst1.u8 {d20}, [r8@64], r1 ; store op0 vst1.u8 {d21}, [r8@64], r1 ; store oq0 vst1.u8 {d22}, [r8@64], r1 ; store oq1 vst1.u8 {d23}, [r8@64], r1 ; store oq2 b h_next h_wide_mbfilter sub r8, r0, r1, lsl #3 add r8, r8, r1 vst1.u8 {d16}, [r8@64], r1 ; store op6 vst1.u8 {d24}, [r8@64], r1 ; store op5 vst1.u8 {d25}, [r8@64], r1 ; store op4 vst1.u8 {d26}, [r8@64], r1 ; store op3 vst1.u8 {d27}, [r8@64], r1 ; store op2 vst1.u8 {d18}, [r8@64], r1 ; store op1 vst1.u8 {d19}, [r8@64], r1 ; store op0 vst1.u8 {d20}, [r8@64], r1 ; store oq0 vst1.u8 {d21}, [r8@64], r1 ; store oq1 vst1.u8 {d22}, [r8@64], r1 ; store oq2 vst1.u8 {d23}, [r8@64], r1 ; store oq3 vst1.u8 {d1}, [r8@64], r1 ; store oq4 vst1.u8 {d2}, [r8@64], r1 ; store oq5 vst1.u8 {d3}, [r8@64], r1 ; store oq6 h_next add r0, r0, #8 subs r12, r12, #1 bne h_count vpop {d8-d15} pop {r4-r8, pc} ENDP ; |mb_lpf_horizontal_edge| ; void vpx_lpf_horizontal_16_neon(uint8_t *s, int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int pitch, ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh |vpx_lpf_horizontal_16_neon| PROC mov r12, #1 b mb_lpf_horizontal_edge ENDP ; |vpx_lpf_horizontal_16_neon| ; void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int pitch, ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh |vpx_lpf_horizontal_16_dual_neon| PROC mov r12, #2 b mb_lpf_horizontal_edge ENDP ; |vpx_lpf_horizontal_16_dual_neon| ; void mb_lpf_vertical_edge_w(uint8_t *s, int p, const uint8_t *blimit, ; const uint8_t *limit, const uint8_t *thresh, ; int count) { ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, ; r12 int count |mb_lpf_vertical_edge_w| PROC push {r4-r8, lr} vpush {d8-d15} ldr r4, [sp, #88] ; load thresh v_count vld1.8 {d16[]}, [r2] ; load *blimit vld1.8 {d17[]}, [r3] ; load *limit vld1.8 {d18[]}, [r4] ; load *thresh sub r8, r0, #8 vld1.8 {d0}, [r8@64], r1 vld1.8 {d8}, [r0@64], r1 vld1.8 {d1}, [r8@64], r1 vld1.8 {d9}, [r0@64], r1 vld1.8 {d2}, [r8@64], r1 vld1.8 {d10}, [r0@64], r1 vld1.8 {d3}, [r8@64], r1 vld1.8 {d11}, [r0@64], r1 vld1.8 {d4}, [r8@64], r1 vld1.8 {d12}, [r0@64], r1 vld1.8 {d5}, [r8@64], r1 vld1.8 {d13}, [r0@64], r1 vld1.8 {d6}, [r8@64], r1 vld1.8 {d14}, [r0@64], r1 vld1.8 {d7}, [r8@64], r1 vld1.8 {d15}, [r0@64], r1 sub r0, r0, r1, lsl #3 vtrn.32 q0, q2 vtrn.32 q1, q3 vtrn.32 q4, q6 vtrn.32 q5, q7 vtrn.16 q0, q1 vtrn.16 q2, q3 vtrn.16 q4, q5 vtrn.16 q6, q7 vtrn.8 d0, d1 vtrn.8 d2, d3 vtrn.8 d4, d5 vtrn.8 d6, d7 vtrn.8 d8, d9 vtrn.8 d10, d11 vtrn.8 d12, d13 vtrn.8 d14, d15 bl vpx_wide_mbfilter_neon tst r7, #1 beq v_mbfilter ; flat && mask were not set for any of the channels. Just store the values ; from filter. sub r0, #2 vswp d23, d25 vst4.8 {d23[0], d24[0], d25[0], d26[0]}, [r0], r1 vst4.8 {d23[1], d24[1], d25[1], d26[1]}, [r0], r1 vst4.8 {d23[2], d24[2], d25[2], d26[2]}, [r0], r1 vst4.8 {d23[3], d24[3], d25[3], d26[3]}, [r0], r1 vst4.8 {d23[4], d24[4], d25[4], d26[4]}, [r0], r1 vst4.8 {d23[5], d24[5], d25[5], d26[5]}, [r0], r1 vst4.8 {d23[6], d24[6], d25[6], d26[6]}, [r0], r1 vst4.8 {d23[7], d24[7], d25[7], d26[7]}, [r0], r1 add r0, #2 b v_next v_mbfilter tst r7, #2 beq v_wide_mbfilter ; flat2 was not set for any of the channels. Just store the values from ; mbfilter. sub r8, r0, #3 vst3.8 {d18[0], d19[0], d20[0]}, [r8], r1 vst3.8 {d21[0], d22[0], d23[0]}, [r0], r1 vst3.8 {d18[1], d19[1], d20[1]}, [r8], r1 vst3.8 {d21[1], d22[1], d23[1]}, [r0], r1 vst3.8 {d18[2], d19[2], d20[2]}, [r8], r1 vst3.8 {d21[2], d22[2], d23[2]}, [r0], r1 vst3.8 {d18[3], d19[3], d20[3]}, [r8], r1 vst3.8 {d21[3], d22[3], d23[3]}, [r0], r1 vst3.8 {d18[4], d19[4], d20[4]}, [r8], r1 vst3.8 {d21[4], d22[4], d23[4]}, [r0], r1 vst3.8 {d18[5], d19[5], d20[5]}, [r8], r1 vst3.8 {d21[5], d22[5], d23[5]}, [r0], r1 vst3.8 {d18[6], d19[6], d20[6]}, [r8], r1 vst3.8 {d21[6], d22[6], d23[6]}, [r0], r1 vst3.8 {d18[7], d19[7], d20[7]}, [r8], r1 vst3.8 {d21[7], d22[7], d23[7]}, [r0], r1 b v_next v_wide_mbfilter sub r8, r0, #8 vtrn.32 d0, d26 vtrn.32 d16, d27 vtrn.32 d24, d18 vtrn.32 d25, d19 vtrn.16 d0, d24 vtrn.16 d16, d25 vtrn.16 d26, d18 vtrn.16 d27, d19 vtrn.8 d0, d16 vtrn.8 d24, d25 vtrn.8 d26, d27 vtrn.8 d18, d19 vtrn.32 d20, d1 vtrn.32 d21, d2 vtrn.32 d22, d3 vtrn.32 d23, d15 vtrn.16 d20, d22 vtrn.16 d21, d23 vtrn.16 d1, d3 vtrn.16 d2, d15 vtrn.8 d20, d21 vtrn.8 d22, d23 vtrn.8 d1, d2 vtrn.8 d3, d15 vst1.8 {d0}, [r8@64], r1 vst1.8 {d20}, [r0@64], r1 vst1.8 {d16}, [r8@64], r1 vst1.8 {d21}, [r0@64], r1 vst1.8 {d24}, [r8@64], r1 vst1.8 {d22}, [r0@64], r1 vst1.8 {d25}, [r8@64], r1 vst1.8 {d23}, [r0@64], r1 vst1.8 {d26}, [r8@64], r1 vst1.8 {d1}, [r0@64], r1 vst1.8 {d27}, [r8@64], r1 vst1.8 {d2}, [r0@64], r1 vst1.8 {d18}, [r8@64], r1 vst1.8 {d3}, [r0@64], r1 vst1.8 {d19}, [r8@64], r1 vst1.8 {d15}, [r0@64], r1 v_next subs r12, #1 bne v_count vpop {d8-d15} pop {r4-r8, pc} ENDP ; |mb_lpf_vertical_edge_w| ; void vpx_lpf_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, ; const uint8_t *limit, const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh |vpx_lpf_vertical_16_neon| PROC mov r12, #1 b mb_lpf_vertical_edge_w ENDP ; |vpx_lpf_vertical_16_neon| ; void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh |vpx_lpf_vertical_16_dual_neon| PROC mov r12, #2 b mb_lpf_vertical_edge_w ENDP ; |vpx_lpf_vertical_16_dual_neon| ; void vpx_wide_mbfilter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. ; ; r0-r3 PRESERVE ; d16 blimit ; d17 limit ; d18 thresh ; d0 p7 ; d1 p6 ; d2 p5 ; d3 p4 ; d4 p3 ; d5 p2 ; d6 p1 ; d7 p0 ; d8 q0 ; d9 q1 ; d10 q2 ; d11 q3 ; d12 q4 ; d13 q5 ; d14 q6 ; d15 q7 |vpx_wide_mbfilter_neon| PROC mov r7, #0 ; filter_mask vabd.u8 d19, d4, d5 ; abs(p3 - p2) vabd.u8 d20, d5, d6 ; abs(p2 - p1) vabd.u8 d21, d6, d7 ; abs(p1 - p0) vabd.u8 d22, d9, d8 ; abs(q1 - q0) vabd.u8 d23, d10, d9 ; abs(q2 - q1) vabd.u8 d24, d11, d10 ; abs(q3 - q2) ; only compare the largest value to limit vmax.u8 d19, d19, d20 ; max(abs(p3 - p2), abs(p2 - p1)) vmax.u8 d20, d21, d22 ; max(abs(p1 - p0), abs(q1 - q0)) vmax.u8 d23, d23, d24 ; max(abs(q2 - q1), abs(q3 - q2)) vmax.u8 d19, d19, d20 vabd.u8 d24, d7, d8 ; abs(p0 - q0) vmax.u8 d19, d19, d23 vabd.u8 d23, d6, d9 ; a = abs(p1 - q1) vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 ; abs () > limit vcge.u8 d19, d17, d19 ; flatmask4 vabd.u8 d25, d7, d5 ; abs(p0 - p2) vabd.u8 d26, d8, d10 ; abs(q0 - q2) vabd.u8 d27, d4, d7 ; abs(p3 - p0) vabd.u8 d28, d11, d8 ; abs(q3 - q0) ; only compare the largest value to thresh vmax.u8 d25, d25, d26 ; max(abs(p0 - p2), abs(q0 - q2)) vmax.u8 d26, d27, d28 ; max(abs(p3 - p0), abs(q3 - q0)) vmax.u8 d25, d25, d26 vmax.u8 d20, d20, d25 vshr.u8 d23, d23, #1 ; a = a / 2 vqadd.u8 d24, d24, d23 ; a = b + a vmov.u8 d30, #1 vcge.u8 d24, d16, d24 ; (a > blimit * 2 + limit) * -1 vcge.u8 d20, d30, d20 ; flat vand d19, d19, d24 ; mask ; hevmask vcgt.u8 d21, d21, d18 ; (abs(p1 - p0) > thresh)*-1 vcgt.u8 d22, d22, d18 ; (abs(q1 - q0) > thresh)*-1 vorr d21, d21, d22 ; hev vand d16, d20, d19 ; flat && mask vmov r5, r6, d16 ; flatmask5(1, p7, p6, p5, p4, p0, q0, q4, q5, q6, q7) vabd.u8 d22, d3, d7 ; abs(p4 - p0) vabd.u8 d23, d12, d8 ; abs(q4 - q0) vabd.u8 d24, d7, d2 ; abs(p0 - p5) vabd.u8 d25, d8, d13 ; abs(q0 - q5) vabd.u8 d26, d1, d7 ; abs(p6 - p0) vabd.u8 d27, d14, d8 ; abs(q6 - q0) vabd.u8 d28, d0, d7 ; abs(p7 - p0) vabd.u8 d29, d15, d8 ; abs(q7 - q0) ; only compare the largest value to thresh vmax.u8 d22, d22, d23 ; max(abs(p4 - p0), abs(q4 - q0)) vmax.u8 d23, d24, d25 ; max(abs(p0 - p5), abs(q0 - q5)) vmax.u8 d24, d26, d27 ; max(abs(p6 - p0), abs(q6 - q0)) vmax.u8 d25, d28, d29 ; max(abs(p7 - p0), abs(q7 - q0)) vmax.u8 d26, d22, d23 vmax.u8 d27, d24, d25 vmax.u8 d23, d26, d27 vcge.u8 d18, d30, d23 ; flat2 vmov.u8 d22, #0x80 orrs r5, r5, r6 ; Check for 0 orreq r7, r7, #1 ; Only do filter branch vand d17, d18, d16 ; flat2 && flat && mask vmov r5, r6, d17 ; mbfilter() function ; filter() function ; convert to signed veor d23, d8, d22 ; qs0 veor d24, d7, d22 ; ps0 veor d25, d6, d22 ; ps1 veor d26, d9, d22 ; qs1 vmov.u8 d27, #3 vsub.s8 d28, d23, d24 ; ( qs0 - ps0) vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) vand d29, d29, d21 ; filter &= hev vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) vmov.u8 d29, #4 ; filter = clamp(filter + 3 * ( qs0 - ps0)) vqmovn.s16 d28, q15 vand d28, d28, d19 ; filter &= mask vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) vshr.s8 d30, d30, #3 ; filter2 >>= 3 vshr.s8 d29, d29, #3 ; filter1 >>= 3 vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) vqsub.s8 d23, d23, d29 ; oq0 = clamp(qs0 - filter1) ; outer tap adjustments: ++filter1 >> 1 vrshr.s8 d29, d29, #1 vbic d29, d29, d21 ; filter &= ~hev vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) veor d24, d24, d22 ; *f_op0 = u^0x80 veor d23, d23, d22 ; *f_oq0 = u^0x80 veor d25, d25, d22 ; *f_op1 = u^0x80 veor d26, d26, d22 ; *f_oq1 = u^0x80 tst r7, #1 bxne lr orrs r5, r5, r6 ; Check for 0 orreq r7, r7, #2 ; Only do mbfilter branch ; mbfilter flat && mask branch ; TODO(fgalligan): Can I decrease the cycles shifting to consective d's ; and using vibt on the q's? vmov.u8 d29, #2 vaddl.u8 q15, d7, d8 ; op2 = p0 + q0 vmlal.u8 q15, d4, d27 ; op2 = p0 + q0 + p3 * 3 vmlal.u8 q15, d5, d29 ; op2 = p0 + q0 + p3 * 3 + p2 * 2 vaddl.u8 q10, d4, d5 vaddw.u8 q15, d6 ; op2=p1 + p0 + q0 + p3 * 3 + p2 *2 vaddl.u8 q14, d6, d9 vqrshrn.u16 d18, q15, #3 ; r_op2 vsub.i16 q15, q10 vaddl.u8 q10, d4, d6 vadd.i16 q15, q14 vaddl.u8 q14, d7, d10 vqrshrn.u16 d19, q15, #3 ; r_op1 vsub.i16 q15, q10 vadd.i16 q15, q14 vaddl.u8 q14, d8, d11 vqrshrn.u16 d20, q15, #3 ; r_op0 vsubw.u8 q15, d4 ; oq0 = op0 - p3 vsubw.u8 q15, d7 ; oq0 -= p0 vadd.i16 q15, q14 vaddl.u8 q14, d9, d11 vqrshrn.u16 d21, q15, #3 ; r_oq0 vsubw.u8 q15, d5 ; oq1 = oq0 - p2 vsubw.u8 q15, d8 ; oq1 -= q0 vadd.i16 q15, q14 vaddl.u8 q14, d10, d11 vqrshrn.u16 d22, q15, #3 ; r_oq1 vsubw.u8 q15, d6 ; oq2 = oq0 - p1 vsubw.u8 q15, d9 ; oq2 -= q1 vadd.i16 q15, q14 vqrshrn.u16 d27, q15, #3 ; r_oq2 ; Filter does not set op2 or oq2, so use p2 and q2. vbif d18, d5, d16 ; t_op2 |= p2 & ~(flat & mask) vbif d19, d25, d16 ; t_op1 |= f_op1 & ~(flat & mask) vbif d20, d24, d16 ; t_op0 |= f_op0 & ~(flat & mask) vbif d21, d23, d16 ; t_oq0 |= f_oq0 & ~(flat & mask) vbif d22, d26, d16 ; t_oq1 |= f_oq1 & ~(flat & mask) vbit d23, d27, d16 ; t_oq2 |= r_oq2 & (flat & mask) vbif d23, d10, d16 ; t_oq2 |= q2 & ~(flat & mask) tst r7, #2 bxne lr ; wide_mbfilter flat2 && flat && mask branch vmov.u8 d16, #7 vaddl.u8 q15, d7, d8 ; op6 = p0 + q0 vaddl.u8 q12, d2, d3 vaddl.u8 q13, d4, d5 vaddl.u8 q14, d1, d6 vmlal.u8 q15, d0, d16 ; op6 += p7 * 3 vadd.i16 q12, q13 vadd.i16 q15, q14 vaddl.u8 q14, d2, d9 vadd.i16 q15, q12 vaddl.u8 q12, d0, d1 vaddw.u8 q15, d1 vaddl.u8 q13, d0, d2 vadd.i16 q14, q15, q14 vqrshrn.u16 d16, q15, #4 ; w_op6 vsub.i16 q15, q14, q12 vaddl.u8 q14, d3, d10 vqrshrn.u16 d24, q15, #4 ; w_op5 vsub.i16 q15, q13 vaddl.u8 q13, d0, d3 vadd.i16 q15, q14 vaddl.u8 q14, d4, d11 vqrshrn.u16 d25, q15, #4 ; w_op4 vadd.i16 q15, q14 vaddl.u8 q14, d0, d4 vsub.i16 q15, q13 vsub.i16 q14, q15, q14 vqrshrn.u16 d26, q15, #4 ; w_op3 vaddw.u8 q15, q14, d5 ; op2 += p2 vaddl.u8 q14, d0, d5 vaddw.u8 q15, d12 ; op2 += q4 vbif d26, d4, d17 ; op3 |= p3 & ~(f2 & f & m) vqrshrn.u16 d27, q15, #4 ; w_op2 vsub.i16 q15, q14 vaddl.u8 q14, d0, d6 vaddw.u8 q15, d6 ; op1 += p1 vaddw.u8 q15, d13 ; op1 += q5 vbif d27, d18, d17 ; op2 |= t_op2 & ~(f2 & f & m) vqrshrn.u16 d18, q15, #4 ; w_op1 vsub.i16 q15, q14 vaddl.u8 q14, d0, d7 vaddw.u8 q15, d7 ; op0 += p0 vaddw.u8 q15, d14 ; op0 += q6 vbif d18, d19, d17 ; op1 |= t_op1 & ~(f2 & f & m) vqrshrn.u16 d19, q15, #4 ; w_op0 vsub.i16 q15, q14 vaddl.u8 q14, d1, d8 vaddw.u8 q15, d8 ; oq0 += q0 vaddw.u8 q15, d15 ; oq0 += q7 vbif d19, d20, d17 ; op0 |= t_op0 & ~(f2 & f & m) vqrshrn.u16 d20, q15, #4 ; w_oq0 vsub.i16 q15, q14 vaddl.u8 q14, d2, d9 vaddw.u8 q15, d9 ; oq1 += q1 vaddl.u8 q4, d10, d15 vaddw.u8 q15, d15 ; oq1 += q7 vbif d20, d21, d17 ; oq0 |= t_oq0 & ~(f2 & f & m) vqrshrn.u16 d21, q15, #4 ; w_oq1 vsub.i16 q15, q14 vaddl.u8 q14, d3, d10 vadd.i16 q15, q4 vaddl.u8 q4, d11, d15 vbif d21, d22, d17 ; oq1 |= t_oq1 & ~(f2 & f & m) vqrshrn.u16 d22, q15, #4 ; w_oq2 vsub.i16 q15, q14 vaddl.u8 q14, d4, d11 vadd.i16 q15, q4 vaddl.u8 q4, d12, d15 vbif d22, d23, d17 ; oq2 |= t_oq2 & ~(f2 & f & m) vqrshrn.u16 d23, q15, #4 ; w_oq3 vsub.i16 q15, q14 vaddl.u8 q14, d5, d12 vadd.i16 q15, q4 vaddl.u8 q4, d13, d15 vbif d16, d1, d17 ; op6 |= p6 & ~(f2 & f & m) vqrshrn.u16 d1, q15, #4 ; w_oq4 vsub.i16 q15, q14 vaddl.u8 q14, d6, d13 vadd.i16 q15, q4 vaddl.u8 q4, d14, d15 vbif d24, d2, d17 ; op5 |= p5 & ~(f2 & f & m) vqrshrn.u16 d2, q15, #4 ; w_oq5 vsub.i16 q15, q14 vbif d25, d3, d17 ; op4 |= p4 & ~(f2 & f & m) vadd.i16 q15, q4 vbif d23, d11, d17 ; oq3 |= q3 & ~(f2 & f & m) vqrshrn.u16 d3, q15, #4 ; w_oq6 vbif d1, d12, d17 ; oq4 |= q4 & ~(f2 & f & m) vbif d2, d13, d17 ; oq5 |= q5 & ~(f2 & f & m) vbif d3, d14, d17 ; oq6 |= q6 & ~(f2 & f & m) bx lr ENDP ; |vpx_wide_mbfilter_neon| END libvpx-1.8.2/vpx_dsp/arm/loopfilter_4_neon.asm000066400000000000000000000474111357355204000214260ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_lpf_horizontal_4_neon| EXPORT |vpx_lpf_vertical_4_neon| EXPORT |vpx_lpf_horizontal_4_dual_neon| EXPORT |vpx_lpf_vertical_4_dual_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; ; void vpx_lpf_horizontal_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, |vpx_lpf_horizontal_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit ldr r2, [sp, #4] ; load thresh add r1, r1, r1 ; double pitch vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh sub r2, r0, r1, lsl #1 ; move src pointer down by 4 lines add r3, r2, r1, lsr #1 ; set to 3 lines down vld1.u8 {d3}, [r2@64], r1 ; p3 vld1.u8 {d4}, [r3@64], r1 ; p2 vld1.u8 {d5}, [r2@64], r1 ; p1 vld1.u8 {d6}, [r3@64], r1 ; p0 vld1.u8 {d7}, [r2@64], r1 ; q0 vld1.u8 {d16}, [r3@64], r1 ; q1 vld1.u8 {d17}, [r2@64] ; q2 vld1.u8 {d18}, [r3@64] ; q3 sub r2, r2, r1, lsl #1 sub r3, r3, r1, lsl #1 bl filter4_8 vst1.u8 {d4}, [r2@64], r1 ; store op1 vst1.u8 {d5}, [r3@64], r1 ; store op0 vst1.u8 {d6}, [r2@64], r1 ; store oq0 vst1.u8 {d7}, [r3@64], r1 ; store oq1 pop {pc} ENDP ; |vpx_lpf_horizontal_4_neon| ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; ; void vpx_lpf_vertical_4_neon(uint8_t *s, ; int p /* pitch */, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, |vpx_lpf_vertical_4_neon| PROC push {lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #4] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns vld1.8 {d2[]}, [r3] ; duplicate *thresh vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 vld1.u8 {d6}, [r2], r1 vld1.u8 {d7}, [r2], r1 vld1.u8 {d16}, [r2], r1 vld1.u8 {d17}, [r2], r1 vld1.u8 {d18}, [r2] ;transpose to 8x16 matrix vtrn.32 d3, d7 vtrn.32 d4, d16 vtrn.32 d5, d17 vtrn.32 d6, d18 vtrn.16 d3, d5 vtrn.16 d4, d6 vtrn.16 d7, d17 vtrn.16 d16, d18 vtrn.8 d3, d4 vtrn.8 d5, d6 vtrn.8 d7, d16 vtrn.8 d17, d18 bl filter4_8 sub r0, r0, #2 ;store op1, op0, oq0, oq1 vst4.8 {d4[0], d5[0], d6[0], d7[0]}, [r0], r1 vst4.8 {d4[1], d5[1], d6[1], d7[1]}, [r0], r1 vst4.8 {d4[2], d5[2], d6[2], d7[2]}, [r0], r1 vst4.8 {d4[3], d5[3], d6[3], d7[3]}, [r0], r1 vst4.8 {d4[4], d5[4], d6[4], d7[4]}, [r0], r1 vst4.8 {d4[5], d5[5], d6[5], d7[5]}, [r0], r1 vst4.8 {d4[6], d5[6], d6[6], d7[6]}, [r0], r1 vst4.8 {d4[7], d5[7], d6[7], d7[7]}, [r0] pop {pc} ENDP ; |vpx_lpf_vertical_4_neon| ; void filter4_8(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. The function does not use ; registers d8-d15. ; ; Inputs: ; r0-r3, r12 PRESERVE ; d0 blimit ; d1 limit ; d2 thresh ; d3 p3 ; d4 p2 ; d5 p1 ; d6 p0 ; d7 q0 ; d16 q1 ; d17 q2 ; d18 q3 ; ; Outputs: ; d4 op1 ; d5 op0 ; d6 oq0 ; d7 oq1 |filter4_8| PROC ; filter_mask vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2) vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1) vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0) vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0) vabd.u8 d3, d17, d16 ; m5 = abs(q2 - q1) vabd.u8 d4, d18, d17 ; m6 = abs(q3 - q2) ; only compare the largest value to limit vmax.u8 d19, d19, d20 ; m1 = max(m1, m2) vmax.u8 d20, d21, d22 ; m2 = max(m3, m4) vabd.u8 d17, d6, d7 ; abs(p0 - q0) vmax.u8 d3, d3, d4 ; m3 = max(m5, m6) vmov.u8 d18, #0x80 vmax.u8 d23, d19, d20 ; m1 = max(m1, m2) ; hevmask vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1 vcgt.u8 d22, d22, d2 ; (abs(q1 - q0) > thresh)*-1 vmax.u8 d23, d23, d3 ; m1 = max(m1, m3) vabd.u8 d28, d5, d16 ; a = abs(p1 - q1) vqadd.u8 d17, d17, d17 ; b = abs(p0 - q0) * 2 veor d7, d7, d18 ; qs0 vcge.u8 d23, d1, d23 ; abs(m1) > limit ; filter() function ; convert to signed vshr.u8 d28, d28, #1 ; a = a / 2 veor d6, d6, d18 ; ps0 veor d5, d5, d18 ; ps1 vqadd.u8 d17, d17, d28 ; a = b + a veor d16, d16, d18 ; qs1 vmov.u8 d19, #3 vsub.s8 d28, d7, d6 ; ( qs0 - ps0) vcge.u8 d17, d0, d17 ; a > blimit vqsub.s8 d27, d5, d16 ; filter = clamp(ps1-qs1) vorr d22, d21, d22 ; hevmask vmull.s8 q12, d28, d19 ; 3 * ( qs0 - ps0) vand d27, d27, d22 ; filter &= hev vand d23, d23, d17 ; filter_mask vaddw.s8 q12, q12, d27 ; filter + 3 * (qs0 - ps0) vmov.u8 d17, #4 ; filter = clamp(filter + 3 * ( qs0 - ps0)) vqmovn.s16 d27, q12 vand d27, d27, d23 ; filter &= mask vqadd.s8 d28, d27, d19 ; filter2 = clamp(filter+3) vqadd.s8 d27, d27, d17 ; filter1 = clamp(filter+4) vshr.s8 d28, d28, #3 ; filter2 >>= 3 vshr.s8 d27, d27, #3 ; filter1 >>= 3 vqadd.s8 d19, d6, d28 ; u = clamp(ps0 + filter2) vqsub.s8 d26, d7, d27 ; u = clamp(qs0 - filter1) ; outer tap adjustments vrshr.s8 d27, d27, #1 ; filter = ++filter1 >> 1 veor d6, d26, d18 ; *oq0 = u^0x80 vbic d27, d27, d22 ; filter &= ~hev vqadd.s8 d21, d5, d27 ; u = clamp(ps1 + filter) vqsub.s8 d20, d16, d27 ; u = clamp(qs1 - filter) veor d5, d19, d18 ; *op0 = u^0x80 veor d4, d21, d18 ; *op1 = u^0x80 veor d7, d20, d18 ; *oq1 = u^0x80 bx lr ENDP ; |filter4_8| ;void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p, ; const uint8_t *blimit0, ; const uint8_t *limit0, ; const uint8_t *thresh0, ; const uint8_t *blimit1, ; const uint8_t *limit1, ; const uint8_t *thresh1) ; r0 uint8_t *s, ; r1 int p, ; r2 const uint8_t *blimit0, ; r3 const uint8_t *limit0, ; sp const uint8_t *thresh0, ; sp+4 const uint8_t *blimit1, ; sp+8 const uint8_t *limit1, ; sp+12 const uint8_t *thresh1, |vpx_lpf_horizontal_4_dual_neon| PROC push {lr} ldr r12, [sp, #4] ; load thresh0 vld1.8 {d0}, [r2] ; load blimit0 to first half q vld1.8 {d2}, [r3] ; load limit0 to first half q add r1, r1, r1 ; double pitch ldr r2, [sp, #8] ; load blimit1 vld1.8 {d4}, [r12] ; load thresh0 to first half q ldr r3, [sp, #12] ; load limit1 ldr r12, [sp, #16] ; load thresh1 vld1.8 {d1}, [r2] ; load blimit1 to 2nd half q sub r2, r0, r1, lsl #1 ; s[-4 * p] vld1.8 {d3}, [r3] ; load limit1 to 2nd half q vld1.8 {d5}, [r12] ; load thresh1 to 2nd half q vpush {d8-d15} ; save neon registers add r3, r2, r1, lsr #1 ; s[-3 * p] vld1.u8 {q3}, [r2@64], r1 ; p3 vld1.u8 {q4}, [r3@64], r1 ; p2 vld1.u8 {q5}, [r2@64], r1 ; p1 vld1.u8 {q6}, [r3@64], r1 ; p0 vld1.u8 {q7}, [r2@64], r1 ; q0 vld1.u8 {q8}, [r3@64], r1 ; q1 vld1.u8 {q9}, [r2@64] ; q2 vld1.u8 {q10}, [r3@64] ; q3 sub r2, r2, r1, lsl #1 sub r3, r3, r1, lsl #1 bl filter4_16 vst1.u8 {q5}, [r2@64], r1 ; store op1 vst1.u8 {q6}, [r3@64], r1 ; store op0 vst1.u8 {q7}, [r2@64], r1 ; store oq0 vst1.u8 {q8}, [r3@64], r1 ; store oq1 vpop {d8-d15} ; restore neon registers pop {pc} ENDP ; |vpx_lpf_horizontal_4_dual_neon| ;void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, ; const uint8_t *blimit0, ; const uint8_t *limit0, ; const uint8_t *thresh0, ; const uint8_t *blimit1, ; const uint8_t *limit1, ; const uint8_t *thresh1) ; r0 uint8_t *s, ; r1 int p, ; r2 const uint8_t *blimit0, ; r3 const uint8_t *limit0, ; sp const uint8_t *thresh0, ; sp+4 const uint8_t *blimit1, ; sp+8 const uint8_t *limit1, ; sp+12 const uint8_t *thresh1, |vpx_lpf_vertical_4_dual_neon| PROC push {lr} ldr r12, [sp, #4] ; load thresh0 vld1.8 {d0}, [r2] ; load blimit0 to first half q vld1.8 {d2}, [r3] ; load limit0 to first half q ldr r2, [sp, #8] ; load blimit1 vld1.8 {d4}, [r12] ; load thresh0 to first half q ldr r3, [sp, #12] ; load limit1 ldr r12, [sp, #16] ; load thresh1 vld1.8 {d1}, [r2] ; load blimit1 to 2nd half q sub r2, r0, #4 ; s[-4] vld1.8 {d3}, [r3] ; load limit1 to 2nd half q vld1.8 {d5}, [r12] ; load thresh1 to 2nd half q vpush {d8-d15} ; save neon registers vld1.u8 {d6}, [r2], r1 ; 00 01 02 03 04 05 06 07 vld1.u8 {d8}, [r2], r1 ; 10 11 12 13 14 15 16 17 vld1.u8 {d10}, [r2], r1 ; 20 21 22 23 24 25 26 27 vld1.u8 {d12}, [r2], r1 ; 30 31 32 33 34 35 36 37 vld1.u8 {d14}, [r2], r1 ; 40 41 42 43 44 45 46 47 vld1.u8 {d16}, [r2], r1 ; 50 51 52 53 54 55 56 57 vld1.u8 {d18}, [r2], r1 ; 60 61 62 63 64 65 66 67 vld1.u8 {d20}, [r2], r1 ; 70 71 72 73 74 75 76 77 vld1.u8 {d7}, [r2], r1 ; 80 81 82 83 84 85 86 87 vld1.u8 {d9}, [r2], r1 ; 90 91 92 93 94 95 96 97 vld1.u8 {d11}, [r2], r1 ; A0 A1 A2 A3 A4 A5 A6 A7 vld1.u8 {d13}, [r2], r1 ; B0 B1 B2 B3 B4 B5 B6 B7 vld1.u8 {d15}, [r2], r1 ; C0 C1 C2 C3 C4 C5 C6 C7 vld1.u8 {d17}, [r2], r1 ; D0 D1 D2 D3 D4 D5 D6 D7 vld1.u8 {d19}, [r2], r1 ; E0 E1 E2 E3 E4 E5 E6 E7 vld1.u8 {d21}, [r2] ; F0 F1 F2 F3 F4 F5 F6 F7 vtrn.8 q3, q4 ; q3 : 00 10 02 12 04 14 06 16 80 90 82 92 84 94 86 96 ; q4 : 01 11 03 13 05 15 07 17 81 91 83 93 85 95 87 97 vtrn.8 q5, q6 ; q5 : 20 30 22 32 24 34 26 36 A0 B0 A2 B2 A4 B4 A6 B6 ; q6 : 21 31 23 33 25 35 27 37 A1 B1 A3 B3 A5 B5 A7 B7 vtrn.8 q7, q8 ; q7 : 40 50 42 52 44 54 46 56 C0 D0 C2 D2 C4 D4 C6 D6 ; q8 : 41 51 43 53 45 55 47 57 C1 D1 C3 D3 C5 D5 C7 D7 vtrn.8 q9, q10 ; q9 : 60 70 62 72 64 74 66 76 E0 F0 E2 F2 E4 F4 E6 F6 ; q10: 61 71 63 73 65 75 67 77 E1 F1 E3 F3 E5 F5 E7 F7 vtrn.16 q3, q5 ; q3 : 00 10 20 30 04 14 24 34 80 90 A0 B0 84 94 A4 B4 ; q5 : 02 12 22 32 06 16 26 36 82 92 A2 B2 86 96 A6 B6 vtrn.16 q4, q6 ; q4 : 01 11 21 31 05 15 25 35 81 91 A1 B1 85 95 A5 B5 ; q6 : 03 13 23 33 07 17 27 37 83 93 A3 B3 87 97 A7 B7 vtrn.16 q7, q9 ; q7 : 40 50 60 70 44 54 64 74 C0 D0 E0 F0 C4 D4 E4 F4 ; q9 : 42 52 62 72 46 56 66 76 C2 D2 E2 F2 C6 D6 E6 F6 vtrn.16 q8, q10 ; q8 : 41 51 61 71 45 55 65 75 C1 D1 E1 F1 C5 D5 E5 F5 ; q10: 43 53 63 73 47 57 67 77 C3 D3 E3 F3 C7 D7 E7 F7 vtrn.32 q3, q7 ; q3 : 00 10 20 30 40 50 60 70 80 90 A0 B0 C0 D0 E0 F0 ; q7 : 04 14 24 34 44 54 64 74 84 94 A4 B4 C4 D4 E4 F4 vtrn.32 q5, q9 ; q5 : 02 12 22 32 42 52 62 72 82 92 A2 B2 C2 D2 E2 F2 ; q9 : 06 16 26 36 46 56 66 76 86 96 A6 B6 C6 D6 E6 F6 vtrn.32 q4, q8 ; q4 : 01 11 21 31 41 51 61 71 81 91 A1 B1 C1 D1 E1 F1 ; q8 : 05 15 25 35 45 55 65 75 85 95 A5 B5 C5 D5 E5 F5 vtrn.32 q6, q10 ; q6 : 03 13 23 33 43 53 63 73 83 93 A3 B3 C3 D3 E3 F3 ; q10: 07 17 27 37 47 57 67 77 87 97 A7 B7 C7 D7 E7 F7 bl filter4_16 sub r0, #2 vmov d0, d11 vmov d1, d13 vmov d2, d15 vmov d3, d17 vmov d11, d12 vmov d12, d14 vmov d13, d16 vst4.8 {d10[0], d11[0], d12[0], d13[0]}, [r0], r1 vst4.8 {d10[1], d11[1], d12[1], d13[1]}, [r0], r1 vst4.8 {d10[2], d11[2], d12[2], d13[2]}, [r0], r1 vst4.8 {d10[3], d11[3], d12[3], d13[3]}, [r0], r1 vst4.8 {d10[4], d11[4], d12[4], d13[4]}, [r0], r1 vst4.8 {d10[5], d11[5], d12[5], d13[5]}, [r0], r1 vst4.8 {d10[6], d11[6], d12[6], d13[6]}, [r0], r1 vst4.8 {d10[7], d11[7], d12[7], d13[7]}, [r0], r1 vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r0], r1 vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r0], r1 vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r0], r1 vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r0], r1 vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r0], r1 vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r0], r1 vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r0], r1 vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r0] vpop {d8-d15} ; restore neon registers pop {pc} ENDP ; |vpx_lpf_vertical_4_dual_neon| ; void filter4_16(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. This function uses ; registers d8-d15, so the calling function must save those registers. ; ; r0-r3, r12 PRESERVE ; q0 blimit ; q1 limit ; q2 thresh ; q3 p3 ; q4 p2 ; q5 p1 ; q6 p0 ; q7 q0 ; q8 q1 ; q9 q2 ; q10 q3 ; ; Outputs: ; q5 op1 ; q6 op0 ; q7 oq0 ; q8 oq1 |filter4_16| PROC ; filter_mask vabd.u8 q11, q3, q4 ; m1 = abs(p3 - p2) vabd.u8 q12, q4, q5 ; m2 = abs(p2 - p1) vabd.u8 q13, q5, q6 ; m3 = abs(p1 - p0) vabd.u8 q14, q8, q7 ; m4 = abs(q1 - q0) vabd.u8 q3, q9, q8 ; m5 = abs(q2 - q1) vabd.u8 q4, q10, q9 ; m6 = abs(q3 - q2) ; only compare the largest value to limit vmax.u8 q11, q11, q12 ; m7 = max(m1, m2) vmax.u8 q12, q13, q14 ; m8 = max(m3, m4) vabd.u8 q9, q6, q7 ; abs(p0 - q0) vmax.u8 q3, q3, q4 ; m9 = max(m5, m6) vmov.u8 q10, #0x80 vmax.u8 q15, q11, q12 ; m10 = max(m7, m8) vcgt.u8 q13, q13, q2 ; (abs(p1 - p0) > thresh)*-1 vcgt.u8 q14, q14, q2 ; (abs(q1 - q0) > thresh)*-1 vmax.u8 q15, q15, q3 ; m11 = max(m10, m9) vabd.u8 q2, q5, q8 ; a = abs(p1 - q1) vqadd.u8 q9, q9, q9 ; b = abs(p0 - q0) * 2 veor q7, q7, q10 ; qs0 vcge.u8 q15, q1, q15 ; abs(m11) > limit vshr.u8 q2, q2, #1 ; a = a / 2 veor q6, q6, q10 ; ps0 veor q5, q5, q10 ; ps1 vqadd.u8 q9, q9, q2 ; a = b + a veor q8, q8, q10 ; qs1 vmov.u16 q4, #3 vsubl.s8 q2, d14, d12 ; ( qs0 - ps0) vsubl.s8 q11, d15, d13 vcge.u8 q9, q0, q9 ; a > blimit vqsub.s8 q1, q5, q8 ; filter = clamp(ps1-qs1) vorr q14, q13, q14 ; hev vmul.i16 q2, q2, q4 ; 3 * ( qs0 - ps0) vmul.i16 q11, q11, q4 vand q1, q1, q14 ; filter &= hev vand q15, q15, q9 ; mask vmov.u8 q4, #3 vaddw.s8 q2, q2, d2 ; filter + 3 * (qs0 - ps0) vaddw.s8 q11, q11, d3 vmov.u8 q9, #4 ; filter = clamp(filter + 3 * ( qs0 - ps0)) vqmovn.s16 d2, q2 vqmovn.s16 d3, q11 vand q1, q1, q15 ; filter &= mask vqadd.s8 q2, q1, q4 ; filter2 = clamp(filter+3) vqadd.s8 q1, q1, q9 ; filter1 = clamp(filter+4) vshr.s8 q2, q2, #3 ; filter2 >>= 3 vshr.s8 q1, q1, #3 ; filter1 >>= 3 vqadd.s8 q11, q6, q2 ; u = clamp(ps0 + filter2) vqsub.s8 q0, q7, q1 ; u = clamp(qs0 - filter1) ; outer tap adjustments vrshr.s8 q1, q1, #1 ; filter = ++filter1 >> 1 veor q7, q0, q10 ; *oq0 = u^0x80 vbic q1, q1, q14 ; filter &= ~hev vqadd.s8 q13, q5, q1 ; u = clamp(ps1 + filter) vqsub.s8 q12, q8, q1 ; u = clamp(qs1 - filter) veor q6, q11, q10 ; *op0 = u^0x80 veor q5, q13, q10 ; *op1 = u^0x80 veor q8, q12, q10 ; *oq1 = u^0x80 bx lr ENDP ; |filter4_16| END libvpx-1.8.2/vpx_dsp/arm/loopfilter_8_neon.asm000066400000000000000000000422731357355204000214330ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_lpf_horizontal_8_neon| EXPORT |vpx_lpf_horizontal_8_dual_neon| EXPORT |vpx_lpf_vertical_8_neon| EXPORT |vpx_lpf_vertical_8_dual_neon| ARM AREA ||.text||, CODE, READONLY, ALIGN=2 ; Currently vpx only works on iterations 8 at a time. The vp8 loop filter ; works on 16 iterations at a time. ; ; void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, |vpx_lpf_horizontal_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit ldr r2, [sp, #12] ; load thresh add r1, r1, r1 ; double pitch vld1.8 {d1[]}, [r3] ; duplicate *limit vld1.8 {d2[]}, [r2] ; duplicate *thresh sub r3, r0, r1, lsl #1 ; move src pointer down by 4 lines add r2, r3, r1, lsr #1 ; set to 3 lines down vld1.u8 {d3}, [r3@64], r1 ; p3 vld1.u8 {d4}, [r2@64], r1 ; p2 vld1.u8 {d5}, [r3@64], r1 ; p1 vld1.u8 {d6}, [r2@64], r1 ; p0 vld1.u8 {d7}, [r3@64], r1 ; q0 vld1.u8 {d16}, [r2@64], r1 ; q1 vld1.u8 {d17}, [r3@64] ; q2 vld1.u8 {d18}, [r2@64], r1 ; q3 sub r3, r3, r1, lsl #1 sub r2, r2, r1, lsl #2 bl vpx_mbloop_filter_neon vst1.u8 {d0}, [r2@64], r1 ; store op2 vst1.u8 {d1}, [r3@64], r1 ; store op1 vst1.u8 {d2}, [r2@64], r1 ; store op0 vst1.u8 {d3}, [r3@64], r1 ; store oq0 vst1.u8 {d4}, [r2@64], r1 ; store oq1 vst1.u8 {d5}, [r3@64], r1 ; store oq2 pop {r4-r5, pc} ENDP ; |vpx_lpf_horizontal_8_neon| ;void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, ; int p, ; const uint8_t *blimit0, ; const uint8_t *limit0, ; const uint8_t *thresh0, ; const uint8_t *blimit1, ; const uint8_t *limit1, ; const uint8_t *thresh1) ; r0 uint8_t *s, ; r1 int p, /* pitch */ ; r2 const uint8_t *blimit0, ; r3 const uint8_t *limit0, ; sp const uint8_t *thresh0, ; sp + 4 const uint8_t *blimit1, ; sp + 8 const uint8_t *limit1, ; sp + 12 const uint8_t *thresh1, |vpx_lpf_horizontal_8_dual_neon| PROC push {r0-r1, lr} ldr lr, [sp, #12] push {lr} ; thresh0 bl vpx_lpf_horizontal_8_neon ldr r2, [sp, #20] ; blimit1 ldr r3, [sp, #24] ; limit1 ldr lr, [sp, #28] str lr, [sp, #16] ; thresh1 add sp, #4 pop {r0-r1, lr} add r0, #8 ; s + 8 b vpx_lpf_horizontal_8_neon ENDP ; |vpx_lpf_horizontal_8_dual_neon| ; void vpx_lpf_vertical_8_neon(uint8_t *s, ; int pitch, ; const uint8_t *blimit, ; const uint8_t *limit, ; const uint8_t *thresh) ; ; r0 uint8_t *s, ; r1 int pitch, ; r2 const uint8_t *blimit, ; r3 const uint8_t *limit, ; sp const uint8_t *thresh, |vpx_lpf_vertical_8_neon| PROC push {r4-r5, lr} vld1.8 {d0[]}, [r2] ; duplicate *blimit vld1.8 {d1[]}, [r3] ; duplicate *limit ldr r3, [sp, #12] ; load thresh sub r2, r0, #4 ; move s pointer down by 4 columns vld1.8 {d2[]}, [r3] ; duplicate *thresh vld1.u8 {d3}, [r2], r1 ; load s data vld1.u8 {d4}, [r2], r1 vld1.u8 {d5}, [r2], r1 vld1.u8 {d6}, [r2], r1 vld1.u8 {d7}, [r2], r1 vld1.u8 {d16}, [r2], r1 vld1.u8 {d17}, [r2], r1 vld1.u8 {d18}, [r2] ;transpose to 8x16 matrix vtrn.32 d3, d7 vtrn.32 d4, d16 vtrn.32 d5, d17 vtrn.32 d6, d18 vtrn.16 d3, d5 vtrn.16 d4, d6 vtrn.16 d7, d17 vtrn.16 d16, d18 vtrn.8 d3, d4 vtrn.8 d5, d6 vtrn.8 d7, d16 vtrn.8 d17, d18 sub r2, r0, #3 add r3, r0, #1 bl vpx_mbloop_filter_neon ;store op2, op1, op0, oq0 vst4.8 {d0[0], d1[0], d2[0], d3[0]}, [r2], r1 vst4.8 {d0[1], d1[1], d2[1], d3[1]}, [r2], r1 vst4.8 {d0[2], d1[2], d2[2], d3[2]}, [r2], r1 vst4.8 {d0[3], d1[3], d2[3], d3[3]}, [r2], r1 vst4.8 {d0[4], d1[4], d2[4], d3[4]}, [r2], r1 vst4.8 {d0[5], d1[5], d2[5], d3[5]}, [r2], r1 vst4.8 {d0[6], d1[6], d2[6], d3[6]}, [r2], r1 vst4.8 {d0[7], d1[7], d2[7], d3[7]}, [r2] ;store oq1, oq2 vst2.8 {d4[0], d5[0]}, [r3], r1 vst2.8 {d4[1], d5[1]}, [r3], r1 vst2.8 {d4[2], d5[2]}, [r3], r1 vst2.8 {d4[3], d5[3]}, [r3], r1 vst2.8 {d4[4], d5[4]}, [r3], r1 vst2.8 {d4[5], d5[5]}, [r3], r1 vst2.8 {d4[6], d5[6]}, [r3], r1 vst2.8 {d4[7], d5[7]}, [r3] pop {r4-r5, pc} ENDP ; |vpx_lpf_vertical_8_neon| ;void vpx_lpf_vertical_8_dual_neon(uint8_t *s, ; int pitch, ; const uint8_t *blimit0, ; const uint8_t *limit0, ; const uint8_t *thresh0, ; const uint8_t *blimit1, ; const uint8_t *limit1, ; const uint8_t *thresh1) ; r0 uint8_t *s, ; r1 int pitch ; r2 const uint8_t *blimit0, ; r3 const uint8_t *limit0, ; sp const uint8_t *thresh0, ; sp + 4 const uint8_t *blimit1, ; sp + 8 const uint8_t *limit1, ; sp + 12 const uint8_t *thresh1, |vpx_lpf_vertical_8_dual_neon| PROC push {r0-r1, lr} ldr lr, [sp, #12] push {lr} ; thresh0 bl vpx_lpf_vertical_8_neon ldr r2, [sp, #20] ; blimit1 ldr r3, [sp, #24] ; limit1 ldr lr, [sp, #28] str lr, [sp, #16] ; thresh1 add sp, #4 pop {r0-r1, lr} add r0, r0, r1, lsl #3 ; s + 8 * pitch b vpx_lpf_vertical_8_neon ENDP ; |vpx_lpf_vertical_8_dual_neon| ; void vpx_mbloop_filter_neon(); ; This is a helper function for the loopfilters. The invidual functions do the ; necessary load, transpose (if necessary) and store. The function does not use ; registers d8-d15. ; ; Inputs: ; r0-r3, r12 PRESERVE ; d0 blimit ; d1 limit ; d2 thresh ; d3 p3 ; d4 p2 ; d5 p1 ; d6 p0 ; d7 q0 ; d16 q1 ; d17 q2 ; d18 q3 ; ; Outputs: ; d0 op2 ; d1 op1 ; d2 op0 ; d3 oq0 ; d4 oq1 ; d5 oq2 |vpx_mbloop_filter_neon| PROC ; filter_mask vabd.u8 d19, d3, d4 ; m1 = abs(p3 - p2) vabd.u8 d20, d4, d5 ; m2 = abs(p2 - p1) vabd.u8 d21, d5, d6 ; m3 = abs(p1 - p0) vabd.u8 d22, d16, d7 ; m4 = abs(q1 - q0) vabd.u8 d23, d17, d16 ; m5 = abs(q2 - q1) vabd.u8 d24, d18, d17 ; m6 = abs(q3 - q2) ; only compare the largest value to limit vmax.u8 d19, d19, d20 ; m1 = max(m1, m2) vmax.u8 d20, d21, d22 ; m2 = max(m3, m4) vabd.u8 d25, d6, d4 ; m7 = abs(p0 - p2) vmax.u8 d23, d23, d24 ; m3 = max(m5, m6) vabd.u8 d26, d7, d17 ; m8 = abs(q0 - q2) vmax.u8 d19, d19, d20 vabd.u8 d24, d6, d7 ; m9 = abs(p0 - q0) vabd.u8 d27, d3, d6 ; m10 = abs(p3 - p0) vabd.u8 d28, d18, d7 ; m11 = abs(q3 - q0) vmax.u8 d19, d19, d23 vabd.u8 d23, d5, d16 ; a = abs(p1 - q1) vqadd.u8 d24, d24, d24 ; b = abs(p0 - q0) * 2 ; abs () > limit vcge.u8 d19, d1, d19 ; only compare the largest value to thresh vmax.u8 d25, d25, d26 ; m4 = max(m7, m8) vmax.u8 d26, d27, d28 ; m5 = max(m10, m11) vshr.u8 d23, d23, #1 ; a = a / 2 vmax.u8 d25, d25, d26 ; m4 = max(m4, m5) vqadd.u8 d24, d24, d23 ; a = b + a vmax.u8 d20, d20, d25 ; m2 = max(m2, m4) vmov.u8 d23, #1 vcge.u8 d24, d0, d24 ; a > blimit vcgt.u8 d21, d21, d2 ; (abs(p1 - p0) > thresh)*-1 vcge.u8 d20, d23, d20 ; flat vand d19, d19, d24 ; mask vcgt.u8 d23, d22, d2 ; (abs(q1 - q0) > thresh)*-1 vand d20, d20, d19 ; flat & mask vmov.u8 d22, #0x80 vorr d23, d21, d23 ; hev ; This instruction will truncate the "flat & mask" masks down to 4 bits ; each to fit into one 32 bit arm register. The values are stored in ; q10.64[0]. vshrn.u16 d30, q10, #4 vmov.u32 r4, d30[0] ; flat & mask 4bits adds r5, r4, #1 ; Check for all 1's ; If mask and flat are 1's for all vectors, then we only need to execute ; the power branch for all vectors. beq power_branch_only cmp r4, #0 ; Check for 0, set flag for later ; mbfilter() function ; filter() function ; convert to signed veor d21, d7, d22 ; qs0 veor d24, d6, d22 ; ps0 veor d25, d5, d22 ; ps1 veor d26, d16, d22 ; qs1 vmov.u8 d27, #3 vsub.s8 d28, d21, d24 ; ( qs0 - ps0) vqsub.s8 d29, d25, d26 ; filter = clamp(ps1-qs1) vmull.s8 q15, d28, d27 ; 3 * ( qs0 - ps0) vand d29, d29, d23 ; filter &= hev vaddw.s8 q15, q15, d29 ; filter + 3 * (qs0 - ps0) vmov.u8 d29, #4 ; filter = clamp(filter + 3 * ( qs0 - ps0)) vqmovn.s16 d28, q15 vand d28, d28, d19 ; filter &= mask vqadd.s8 d30, d28, d27 ; filter2 = clamp(filter+3) vqadd.s8 d29, d28, d29 ; filter1 = clamp(filter+4) vshr.s8 d30, d30, #3 ; filter2 >>= 3 vshr.s8 d29, d29, #3 ; filter1 >>= 3 vqadd.s8 d24, d24, d30 ; op0 = clamp(ps0 + filter2) vqsub.s8 d21, d21, d29 ; oq0 = clamp(qs0 - filter1) ; outer tap adjustments: ++filter1 >> 1 vrshr.s8 d29, d29, #1 vbic d29, d29, d23 ; filter &= ~hev vqadd.s8 d25, d25, d29 ; op1 = clamp(ps1 + filter) vqsub.s8 d26, d26, d29 ; oq1 = clamp(qs1 - filter) ; If mask and flat are 0's for all vectors, then we only need to execute ; the filter branch for all vectors. beq filter_branch_only ; If mask and flat are mixed then we must perform both branches and ; combine the data. veor d24, d24, d22 ; *f_op0 = u^0x80 veor d21, d21, d22 ; *f_oq0 = u^0x80 veor d25, d25, d22 ; *f_op1 = u^0x80 veor d26, d26, d22 ; *f_oq1 = u^0x80 ; At this point we have already executed the filter branch. The filter ; branch does not set op2 or oq2, so use p2 and q2. Execute the power ; branch and combine the data. vmov.u8 d23, #2 vaddl.u8 q14, d6, d7 ; r_op2 = p0 + q0 vmlal.u8 q14, d3, d27 ; r_op2 += p3 * 3 vmlal.u8 q14, d4, d23 ; r_op2 += p2 * 2 vbif d0, d4, d20 ; op2 |= p2 & ~(flat & mask) vaddw.u8 q14, d5 ; r_op2 += p1 vbif d1, d25, d20 ; op1 |= f_op1 & ~(flat & mask) vqrshrn.u16 d30, q14, #3 ; r_op2 vsubw.u8 q14, d3 ; r_op1 = r_op2 - p3 vsubw.u8 q14, d4 ; r_op1 -= p2 vaddw.u8 q14, d5 ; r_op1 += p1 vaddw.u8 q14, d16 ; r_op1 += q1 vbif d2, d24, d20 ; op0 |= f_op0 & ~(flat & mask) vqrshrn.u16 d31, q14, #3 ; r_op1 vsubw.u8 q14, d3 ; r_op0 = r_op1 - p3 vsubw.u8 q14, d5 ; r_op0 -= p1 vaddw.u8 q14, d6 ; r_op0 += p0 vaddw.u8 q14, d17 ; r_op0 += q2 vbit d0, d30, d20 ; op2 |= r_op2 & (flat & mask) vqrshrn.u16 d23, q14, #3 ; r_op0 vsubw.u8 q14, d3 ; r_oq0 = r_op0 - p3 vsubw.u8 q14, d6 ; r_oq0 -= p0 vaddw.u8 q14, d7 ; r_oq0 += q0 vbit d1, d31, d20 ; op1 |= r_op1 & (flat & mask) vaddw.u8 q14, d18 ; oq0 += q3 vbit d2, d23, d20 ; op0 |= r_op0 & (flat & mask) vqrshrn.u16 d22, q14, #3 ; r_oq0 vsubw.u8 q14, d4 ; r_oq1 = r_oq0 - p2 vsubw.u8 q14, d7 ; r_oq1 -= q0 vaddw.u8 q14, d16 ; r_oq1 += q1 vbif d3, d21, d20 ; oq0 |= f_oq0 & ~(flat & mask) vaddw.u8 q14, d18 ; r_oq1 += q3 vbif d4, d26, d20 ; oq1 |= f_oq1 & ~(flat & mask) vqrshrn.u16 d6, q14, #3 ; r_oq1 vsubw.u8 q14, d5 ; r_oq2 = r_oq1 - p1 vsubw.u8 q14, d16 ; r_oq2 -= q1 vaddw.u8 q14, d17 ; r_oq2 += q2 vaddw.u8 q14, d18 ; r_oq2 += q3 vbif d5, d17, d20 ; oq2 |= q2 & ~(flat & mask) vqrshrn.u16 d7, q14, #3 ; r_oq2 vbit d3, d22, d20 ; oq0 |= r_oq0 & (flat & mask) vbit d4, d6, d20 ; oq1 |= r_oq1 & (flat & mask) vbit d5, d7, d20 ; oq2 |= r_oq2 & (flat & mask) bx lr power_branch_only vmov.u8 d27, #3 vmov.u8 d21, #2 vaddl.u8 q14, d6, d7 ; op2 = p0 + q0 vmlal.u8 q14, d3, d27 ; op2 += p3 * 3 vmlal.u8 q14, d4, d21 ; op2 += p2 * 2 vaddw.u8 q14, d5 ; op2 += p1 vqrshrn.u16 d0, q14, #3 ; op2 vsubw.u8 q14, d3 ; op1 = op2 - p3 vsubw.u8 q14, d4 ; op1 -= p2 vaddw.u8 q14, d5 ; op1 += p1 vaddw.u8 q14, d16 ; op1 += q1 vqrshrn.u16 d1, q14, #3 ; op1 vsubw.u8 q14, d3 ; op0 = op1 - p3 vsubw.u8 q14, d5 ; op0 -= p1 vaddw.u8 q14, d6 ; op0 += p0 vaddw.u8 q14, d17 ; op0 += q2 vqrshrn.u16 d2, q14, #3 ; op0 vsubw.u8 q14, d3 ; oq0 = op0 - p3 vsubw.u8 q14, d6 ; oq0 -= p0 vaddw.u8 q14, d7 ; oq0 += q0 vaddw.u8 q14, d18 ; oq0 += q3 vqrshrn.u16 d3, q14, #3 ; oq0 vsubw.u8 q14, d4 ; oq1 = oq0 - p2 vsubw.u8 q14, d7 ; oq1 -= q0 vaddw.u8 q14, d16 ; oq1 += q1 vaddw.u8 q14, d18 ; oq1 += q3 vqrshrn.u16 d4, q14, #3 ; oq1 vsubw.u8 q14, d5 ; oq2 = oq1 - p1 vsubw.u8 q14, d16 ; oq2 -= q1 vaddw.u8 q14, d17 ; oq2 += q2 vaddw.u8 q14, d18 ; oq2 += q3 vqrshrn.u16 d5, q14, #3 ; oq2 bx lr filter_branch_only ; TODO(fgalligan): See if we can rearange registers so we do not need to ; do the 2 vswp. vswp d0, d4 ; op2 vswp d5, d17 ; oq2 veor d2, d24, d22 ; *op0 = u^0x80 veor d3, d21, d22 ; *oq0 = u^0x80 veor d1, d25, d22 ; *op1 = u^0x80 veor d4, d26, d22 ; *oq1 = u^0x80 bx lr ENDP ; |vpx_mbloop_filter_neon| END libvpx-1.8.2/vpx_dsp/arm/loopfilter_neon.c000066400000000000000000001660521357355204000206500ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/transpose_neon.h" // For all the static inline functions, the functions ending with '_8' process // 8 samples in a bunch, and the functions ending with '_16' process 16 samples // in a bunch. #define FUN_LOAD_THRESH(w, r) \ static INLINE void load_thresh_##w( \ const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, \ uint8x##w##_t *blimit_vec, uint8x##w##_t *limit_vec, \ uint8x##w##_t *thresh_vec) { \ *blimit_vec = vld1##r##dup_u8(blimit); \ *limit_vec = vld1##r##dup_u8(limit); \ *thresh_vec = vld1##r##dup_u8(thresh); \ } FUN_LOAD_THRESH(8, _) // load_thresh_8 FUN_LOAD_THRESH(16, q_) // load_thresh_16 #undef FUN_LOAD_THRESH static INLINE void load_thresh_8_dual( const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, uint8x16_t *blimit_vec, uint8x16_t *limit_vec, uint8x16_t *thresh_vec) { *blimit_vec = vcombine_u8(vld1_dup_u8(blimit0), vld1_dup_u8(blimit1)); *limit_vec = vcombine_u8(vld1_dup_u8(limit0), vld1_dup_u8(limit1)); *thresh_vec = vcombine_u8(vld1_dup_u8(thresh0), vld1_dup_u8(thresh1)); } // Here flat is 64-bit long, with each 8-bit (or 4-bit) chunk being a mask of a // pixel. When used to control filter branches, we only detect whether it is all // 0s or all 1s. We pairwise add flat to a 32-bit long number flat_status. // flat equals 0 if and only if flat_status equals 0. // flat equals -1 (all 1s) if and only if flat_status equals -2. (This is true // because each mask occupies more than 1 bit.) static INLINE uint32_t calc_flat_status_8(uint8x8_t flat) { return vget_lane_u32( vreinterpret_u32_u64(vpaddl_u32(vreinterpret_u32_u8(flat))), 0); } // Here flat is 128-bit long, with each 8-bit chunk being a mask of a pixel. // When used to control filter branches, we only detect whether it is all 0s or // all 1s. We narrowing shift right each 16-bit chunk by 4 arithmetically, so // we get a 64-bit long number, with each 4-bit chunk being a mask of a pixel. // Then we pairwise add flat to a 32-bit long number flat_status. // flat equals 0 if and only if flat_status equals 0. // flat equals -1 (all 1s) if and only if flat_status equals -2. (This is true // because each mask occupies more than 1 bit.) static INLINE uint32_t calc_flat_status_16(uint8x16_t flat) { const uint8x8_t flat_4bit = vreinterpret_u8_s8(vshrn_n_s16(vreinterpretq_s16_u8(flat), 4)); return calc_flat_status_8(flat_4bit); } #define FUN_FILTER_HEV_MASK4(w, r) \ static INLINE uint8x##w##_t filter_hev_mask4_##w( \ const uint8x##w##_t limit, const uint8x##w##_t blimit, \ const uint8x##w##_t thresh, const uint8x##w##_t p3, \ const uint8x##w##_t p2, const uint8x##w##_t p1, const uint8x##w##_t p0, \ const uint8x##w##_t q0, const uint8x##w##_t q1, const uint8x##w##_t q2, \ const uint8x##w##_t q3, uint8x##w##_t *hev, uint8x##w##_t *mask) { \ uint8x##w##_t max, t0, t1; \ \ max = vabd##r##u8(p1, p0); \ max = vmax##r##u8(max, vabd##r##u8(q1, q0)); \ *hev = vcgt##r##u8(max, thresh); \ *mask = vmax##r##u8(max, vabd##r##u8(p3, p2)); \ *mask = vmax##r##u8(*mask, vabd##r##u8(p2, p1)); \ *mask = vmax##r##u8(*mask, vabd##r##u8(q2, q1)); \ *mask = vmax##r##u8(*mask, vabd##r##u8(q3, q2)); \ t0 = vabd##r##u8(p0, q0); \ t1 = vabd##r##u8(p1, q1); \ t0 = vqadd##r##u8(t0, t0); \ t1 = vshr##r##n_u8(t1, 1); \ t0 = vqadd##r##u8(t0, t1); \ *mask = vcle##r##u8(*mask, limit); \ t0 = vcle##r##u8(t0, blimit); \ *mask = vand##r##u8(*mask, t0); \ \ return max; \ } FUN_FILTER_HEV_MASK4(8, _) // filter_hev_mask4_8 FUN_FILTER_HEV_MASK4(16, q_) // filter_hev_mask4_16 #undef FUN_FILTER_HEV_MASK4 #define FUN_FILTER_FLAT_HEV_MASK(w, r) \ static INLINE uint8x##w##_t filter_flat_hev_mask_##w( \ const uint8x##w##_t limit, const uint8x##w##_t blimit, \ const uint8x##w##_t thresh, const uint8x##w##_t p3, \ const uint8x##w##_t p2, const uint8x##w##_t p1, const uint8x##w##_t p0, \ const uint8x##w##_t q0, const uint8x##w##_t q1, const uint8x##w##_t q2, \ const uint8x##w##_t q3, uint8x##w##_t *flat, uint32_t *flat_status, \ uint8x##w##_t *hev) { \ uint8x##w##_t max, mask; \ \ max = filter_hev_mask4_##w(limit, blimit, thresh, p3, p2, p1, p0, q0, q1, \ q2, q3, hev, &mask); \ *flat = vmax##r##u8(max, vabd##r##u8(p2, p0)); \ *flat = vmax##r##u8(*flat, vabd##r##u8(q2, q0)); \ *flat = vmax##r##u8(*flat, vabd##r##u8(p3, p0)); \ *flat = vmax##r##u8(*flat, vabd##r##u8(q3, q0)); \ *flat = vcle##r##u8(*flat, vdup##r##n_u8(1)); /* flat_mask4() */ \ *flat = vand##r##u8(*flat, mask); \ *flat_status = calc_flat_status_##w(*flat); \ \ return mask; \ } FUN_FILTER_FLAT_HEV_MASK(8, _) // filter_flat_hev_mask_8 FUN_FILTER_FLAT_HEV_MASK(16, q_) // filter_flat_hev_mask_16 #undef FUN_FILTER_FLAT_HEV_MASK #define FUN_FLAT_MASK5(w, r) \ static INLINE uint8x##w##_t flat_mask5_##w( \ const uint8x##w##_t p4, const uint8x##w##_t p3, const uint8x##w##_t p2, \ const uint8x##w##_t p1, const uint8x##w##_t p0, const uint8x##w##_t q0, \ const uint8x##w##_t q1, const uint8x##w##_t q2, const uint8x##w##_t q3, \ const uint8x##w##_t q4, const uint8x##w##_t flat, \ uint32_t *flat2_status) { \ uint8x##w##_t flat2 = vabd##r##u8(p4, p0); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(p3, p0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(p2, p0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(p1, p0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(q1, q0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(q2, q0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(q3, q0)); \ flat2 = vmax##r##u8(flat2, vabd##r##u8(q4, q0)); \ flat2 = vcle##r##u8(flat2, vdup##r##n_u8(1)); \ flat2 = vand##r##u8(flat2, flat); \ *flat2_status = calc_flat_status_##w(flat2); \ \ return flat2; \ } FUN_FLAT_MASK5(8, _) // flat_mask5_8 FUN_FLAT_MASK5(16, q_) // flat_mask5_16 #undef FUN_FLAT_MASK5 #define FUN_FLIP_SIGN(w, r) \ static INLINE int8x##w##_t flip_sign_##w(const uint8x##w##_t v) { \ const uint8x##w##_t sign_bit = vdup##r##n_u8(0x80); \ return vreinterpret##r##s8_u8(veor##r##u8(v, sign_bit)); \ } FUN_FLIP_SIGN(8, _) // flip_sign_8 FUN_FLIP_SIGN(16, q_) // flip_sign_16 #undef FUN_FLIP_SIGN #define FUN_FLIP_SIGN_BACK(w, r) \ static INLINE uint8x##w##_t flip_sign_back_##w(const int8x##w##_t v) { \ const int8x##w##_t sign_bit = vdup##r##n_s8(0x80); \ return vreinterpret##r##u8_s8(veor##r##s8(v, sign_bit)); \ } FUN_FLIP_SIGN_BACK(8, _) // flip_sign_back_8 FUN_FLIP_SIGN_BACK(16, q_) // flip_sign_back_16 #undef FUN_FLIP_SIGN_BACK static INLINE void filter_update_8(const uint8x8_t sub0, const uint8x8_t sub1, const uint8x8_t add0, const uint8x8_t add1, uint16x8_t *sum) { *sum = vsubw_u8(*sum, sub0); *sum = vsubw_u8(*sum, sub1); *sum = vaddw_u8(*sum, add0); *sum = vaddw_u8(*sum, add1); } static INLINE void filter_update_16(const uint8x16_t sub0, const uint8x16_t sub1, const uint8x16_t add0, const uint8x16_t add1, uint16x8_t *sum0, uint16x8_t *sum1) { *sum0 = vsubw_u8(*sum0, vget_low_u8(sub0)); *sum1 = vsubw_u8(*sum1, vget_high_u8(sub0)); *sum0 = vsubw_u8(*sum0, vget_low_u8(sub1)); *sum1 = vsubw_u8(*sum1, vget_high_u8(sub1)); *sum0 = vaddw_u8(*sum0, vget_low_u8(add0)); *sum1 = vaddw_u8(*sum1, vget_high_u8(add0)); *sum0 = vaddw_u8(*sum0, vget_low_u8(add1)); *sum1 = vaddw_u8(*sum1, vget_high_u8(add1)); } static INLINE uint8x8_t calc_7_tap_filter_8_kernel(const uint8x8_t sub0, const uint8x8_t sub1, const uint8x8_t add0, const uint8x8_t add1, uint16x8_t *sum) { filter_update_8(sub0, sub1, add0, add1, sum); return vrshrn_n_u16(*sum, 3); } static INLINE uint8x16_t calc_7_tap_filter_16_kernel( const uint8x16_t sub0, const uint8x16_t sub1, const uint8x16_t add0, const uint8x16_t add1, uint16x8_t *sum0, uint16x8_t *sum1) { filter_update_16(sub0, sub1, add0, add1, sum0, sum1); return vcombine_u8(vrshrn_n_u16(*sum0, 3), vrshrn_n_u16(*sum1, 3)); } static INLINE uint8x8_t apply_15_tap_filter_8_kernel( const uint8x8_t flat, const uint8x8_t sub0, const uint8x8_t sub1, const uint8x8_t add0, const uint8x8_t add1, const uint8x8_t in, uint16x8_t *sum) { filter_update_8(sub0, sub1, add0, add1, sum); return vbsl_u8(flat, vrshrn_n_u16(*sum, 4), in); } static INLINE uint8x16_t apply_15_tap_filter_16_kernel( const uint8x16_t flat, const uint8x16_t sub0, const uint8x16_t sub1, const uint8x16_t add0, const uint8x16_t add1, const uint8x16_t in, uint16x8_t *sum0, uint16x8_t *sum1) { uint8x16_t t; filter_update_16(sub0, sub1, add0, add1, sum0, sum1); t = vcombine_u8(vrshrn_n_u16(*sum0, 4), vrshrn_n_u16(*sum1, 4)); return vbslq_u8(flat, t, in); } // 7-tap filter [1, 1, 1, 2, 1, 1, 1] static INLINE void calc_7_tap_filter_8(const uint8x8_t p3, const uint8x8_t p2, const uint8x8_t p1, const uint8x8_t p0, const uint8x8_t q0, const uint8x8_t q1, const uint8x8_t q2, const uint8x8_t q3, uint8x8_t *op2, uint8x8_t *op1, uint8x8_t *op0, uint8x8_t *oq0, uint8x8_t *oq1, uint8x8_t *oq2) { uint16x8_t sum; sum = vaddl_u8(p3, p3); // 2*p3 sum = vaddw_u8(sum, p3); // 3*p3 sum = vaddw_u8(sum, p2); // 3*p3+p2 sum = vaddw_u8(sum, p2); // 3*p3+2*p2 sum = vaddw_u8(sum, p1); // 3*p3+2*p2+p1 sum = vaddw_u8(sum, p0); // 3*p3+2*p2+p1+p0 sum = vaddw_u8(sum, q0); // 3*p3+2*p2+p1+p0+q0 *op2 = vrshrn_n_u16(sum, 3); *op1 = calc_7_tap_filter_8_kernel(p3, p2, p1, q1, &sum); *op0 = calc_7_tap_filter_8_kernel(p3, p1, p0, q2, &sum); *oq0 = calc_7_tap_filter_8_kernel(p3, p0, q0, q3, &sum); *oq1 = calc_7_tap_filter_8_kernel(p2, q0, q1, q3, &sum); *oq2 = calc_7_tap_filter_8_kernel(p1, q1, q2, q3, &sum); } static INLINE void calc_7_tap_filter_16( const uint8x16_t p3, const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0, const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2, const uint8x16_t q3, uint8x16_t *op2, uint8x16_t *op1, uint8x16_t *op0, uint8x16_t *oq0, uint8x16_t *oq1, uint8x16_t *oq2) { uint16x8_t sum0, sum1; sum0 = vaddl_u8(vget_low_u8(p3), vget_low_u8(p3)); // 2*p3 sum1 = vaddl_u8(vget_high_u8(p3), vget_high_u8(p3)); // 2*p3 sum0 = vaddw_u8(sum0, vget_low_u8(p3)); // 3*p3 sum1 = vaddw_u8(sum1, vget_high_u8(p3)); // 3*p3 sum0 = vaddw_u8(sum0, vget_low_u8(p2)); // 3*p3+p2 sum1 = vaddw_u8(sum1, vget_high_u8(p2)); // 3*p3+p2 sum0 = vaddw_u8(sum0, vget_low_u8(p2)); // 3*p3+2*p2 sum1 = vaddw_u8(sum1, vget_high_u8(p2)); // 3*p3+2*p2 sum0 = vaddw_u8(sum0, vget_low_u8(p1)); // 3*p3+2*p2+p1 sum1 = vaddw_u8(sum1, vget_high_u8(p1)); // 3*p3+2*p2+p1 sum0 = vaddw_u8(sum0, vget_low_u8(p0)); // 3*p3+2*p2+p1+p0 sum1 = vaddw_u8(sum1, vget_high_u8(p0)); // 3*p3+2*p2+p1+p0 sum0 = vaddw_u8(sum0, vget_low_u8(q0)); // 3*p3+2*p2+p1+p0+q0 sum1 = vaddw_u8(sum1, vget_high_u8(q0)); // 3*p3+2*p2+p1+p0+q0 *op2 = vcombine_u8(vrshrn_n_u16(sum0, 3), vrshrn_n_u16(sum1, 3)); *op1 = calc_7_tap_filter_16_kernel(p3, p2, p1, q1, &sum0, &sum1); *op0 = calc_7_tap_filter_16_kernel(p3, p1, p0, q2, &sum0, &sum1); *oq0 = calc_7_tap_filter_16_kernel(p3, p0, q0, q3, &sum0, &sum1); *oq1 = calc_7_tap_filter_16_kernel(p2, q0, q1, q3, &sum0, &sum1); *oq2 = calc_7_tap_filter_16_kernel(p1, q1, q2, q3, &sum0, &sum1); } #define FUN_APPLY_7_TAP_FILTER(w, r) \ static INLINE void apply_7_tap_filter_##w( \ const uint8x##w##_t flat, const uint8x##w##_t p3, \ const uint8x##w##_t p2, const uint8x##w##_t p1, const uint8x##w##_t p0, \ const uint8x##w##_t q0, const uint8x##w##_t q1, const uint8x##w##_t q2, \ const uint8x##w##_t q3, uint8x##w##_t *op2, uint8x##w##_t *op1, \ uint8x##w##_t *op0, uint8x##w##_t *oq0, uint8x##w##_t *oq1, \ uint8x##w##_t *oq2) { \ uint8x##w##_t tp1, tp0, tq0, tq1; \ calc_7_tap_filter_##w(p3, p2, p1, p0, q0, q1, q2, q3, op2, &tp1, &tp0, \ &tq0, &tq1, oq2); \ *op2 = vbsl##r##u8(flat, *op2, p2); \ *op1 = vbsl##r##u8(flat, tp1, *op1); \ *op0 = vbsl##r##u8(flat, tp0, *op0); \ *oq0 = vbsl##r##u8(flat, tq0, *oq0); \ *oq1 = vbsl##r##u8(flat, tq1, *oq1); \ *oq2 = vbsl##r##u8(flat, *oq2, q2); \ } FUN_APPLY_7_TAP_FILTER(8, _) // apply_7_tap_filter_8 FUN_APPLY_7_TAP_FILTER(16, q_) // apply_7_tap_filter_16 #undef FUN_APPLY_7_TAP_FILTER // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] static INLINE void apply_15_tap_filter_8( const uint8x8_t flat2, const uint8x8_t p7, const uint8x8_t p6, const uint8x8_t p5, const uint8x8_t p4, const uint8x8_t p3, const uint8x8_t p2, const uint8x8_t p1, const uint8x8_t p0, const uint8x8_t q0, const uint8x8_t q1, const uint8x8_t q2, const uint8x8_t q3, const uint8x8_t q4, const uint8x8_t q5, const uint8x8_t q6, const uint8x8_t q7, uint8x8_t *op6, uint8x8_t *op5, uint8x8_t *op4, uint8x8_t *op3, uint8x8_t *op2, uint8x8_t *op1, uint8x8_t *op0, uint8x8_t *oq0, uint8x8_t *oq1, uint8x8_t *oq2, uint8x8_t *oq3, uint8x8_t *oq4, uint8x8_t *oq5, uint8x8_t *oq6) { uint16x8_t sum; sum = vshll_n_u8(p7, 3); // 8*p7 sum = vsubw_u8(sum, p7); // 7*p7 sum = vaddw_u8(sum, p6); // 7*p7+p6 sum = vaddw_u8(sum, p6); // 7*p7+2*p6 sum = vaddw_u8(sum, p5); // 7*p7+2*p6+p5 sum = vaddw_u8(sum, p4); // 7*p7+2*p6+p5+p4 sum = vaddw_u8(sum, p3); // 7*p7+2*p6+p5+p4+p3 sum = vaddw_u8(sum, p2); // 7*p7+2*p6+p5+p4+p3+p2 sum = vaddw_u8(sum, p1); // 7*p7+2*p6+p5+p4+p3+p2+p1 sum = vaddw_u8(sum, p0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0 sum = vaddw_u8(sum, q0); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0+q0 *op6 = vbsl_u8(flat2, vrshrn_n_u16(sum, 4), p6); *op5 = apply_15_tap_filter_8_kernel(flat2, p7, p6, p5, q1, p5, &sum); *op4 = apply_15_tap_filter_8_kernel(flat2, p7, p5, p4, q2, p4, &sum); *op3 = apply_15_tap_filter_8_kernel(flat2, p7, p4, p3, q3, p3, &sum); *op2 = apply_15_tap_filter_8_kernel(flat2, p7, p3, p2, q4, *op2, &sum); *op1 = apply_15_tap_filter_8_kernel(flat2, p7, p2, p1, q5, *op1, &sum); *op0 = apply_15_tap_filter_8_kernel(flat2, p7, p1, p0, q6, *op0, &sum); *oq0 = apply_15_tap_filter_8_kernel(flat2, p7, p0, q0, q7, *oq0, &sum); *oq1 = apply_15_tap_filter_8_kernel(flat2, p6, q0, q1, q7, *oq1, &sum); *oq2 = apply_15_tap_filter_8_kernel(flat2, p5, q1, q2, q7, *oq2, &sum); *oq3 = apply_15_tap_filter_8_kernel(flat2, p4, q2, q3, q7, q3, &sum); *oq4 = apply_15_tap_filter_8_kernel(flat2, p3, q3, q4, q7, q4, &sum); *oq5 = apply_15_tap_filter_8_kernel(flat2, p2, q4, q5, q7, q5, &sum); *oq6 = apply_15_tap_filter_8_kernel(flat2, p1, q5, q6, q7, q6, &sum); } static INLINE void apply_15_tap_filter_16( const uint8x16_t flat2, const uint8x16_t p7, const uint8x16_t p6, const uint8x16_t p5, const uint8x16_t p4, const uint8x16_t p3, const uint8x16_t p2, const uint8x16_t p1, const uint8x16_t p0, const uint8x16_t q0, const uint8x16_t q1, const uint8x16_t q2, const uint8x16_t q3, const uint8x16_t q4, const uint8x16_t q5, const uint8x16_t q6, const uint8x16_t q7, uint8x16_t *op6, uint8x16_t *op5, uint8x16_t *op4, uint8x16_t *op3, uint8x16_t *op2, uint8x16_t *op1, uint8x16_t *op0, uint8x16_t *oq0, uint8x16_t *oq1, uint8x16_t *oq2, uint8x16_t *oq3, uint8x16_t *oq4, uint8x16_t *oq5, uint8x16_t *oq6) { uint16x8_t sum0, sum1; uint8x16_t t; sum0 = vshll_n_u8(vget_low_u8(p7), 3); // 8*p7 sum1 = vshll_n_u8(vget_high_u8(p7), 3); // 8*p7 sum0 = vsubw_u8(sum0, vget_low_u8(p7)); // 7*p7 sum1 = vsubw_u8(sum1, vget_high_u8(p7)); // 7*p7 sum0 = vaddw_u8(sum0, vget_low_u8(p6)); // 7*p7+p6 sum1 = vaddw_u8(sum1, vget_high_u8(p6)); // 7*p7+p6 sum0 = vaddw_u8(sum0, vget_low_u8(p6)); // 7*p7+2*p6 sum1 = vaddw_u8(sum1, vget_high_u8(p6)); // 7*p7+2*p6 sum0 = vaddw_u8(sum0, vget_low_u8(p5)); // 7*p7+2*p6+p5 sum1 = vaddw_u8(sum1, vget_high_u8(p5)); // 7*p7+2*p6+p5 sum0 = vaddw_u8(sum0, vget_low_u8(p4)); // 7*p7+2*p6+p5+p4 sum1 = vaddw_u8(sum1, vget_high_u8(p4)); // 7*p7+2*p6+p5+p4 sum0 = vaddw_u8(sum0, vget_low_u8(p3)); // 7*p7+2*p6+p5+p4+p3 sum1 = vaddw_u8(sum1, vget_high_u8(p3)); // 7*p7+2*p6+p5+p4+p3 sum0 = vaddw_u8(sum0, vget_low_u8(p2)); // 7*p7+2*p6+p5+p4+p3+p2 sum1 = vaddw_u8(sum1, vget_high_u8(p2)); // 7*p7+2*p6+p5+p4+p3+p2 sum0 = vaddw_u8(sum0, vget_low_u8(p1)); // 7*p7+2*p6+p5+p4+p3+p2+p1 sum1 = vaddw_u8(sum1, vget_high_u8(p1)); // 7*p7+2*p6+p5+p4+p3+p2+p1 sum0 = vaddw_u8(sum0, vget_low_u8(p0)); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0 sum1 = vaddw_u8(sum1, vget_high_u8(p0)); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0 sum0 = vaddw_u8(sum0, vget_low_u8(q0)); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0+q0 sum1 = vaddw_u8(sum1, vget_high_u8(q0)); // 7*p7+2*p6+p5+p4+p3+p2+p1+p0+q0 t = vcombine_u8(vrshrn_n_u16(sum0, 4), vrshrn_n_u16(sum1, 4)); *op6 = vbslq_u8(flat2, t, p6); *op5 = apply_15_tap_filter_16_kernel(flat2, p7, p6, p5, q1, p5, &sum0, &sum1); *op4 = apply_15_tap_filter_16_kernel(flat2, p7, p5, p4, q2, p4, &sum0, &sum1); *op3 = apply_15_tap_filter_16_kernel(flat2, p7, p4, p3, q3, p3, &sum0, &sum1); *op2 = apply_15_tap_filter_16_kernel(flat2, p7, p3, p2, q4, *op2, &sum0, &sum1); *op1 = apply_15_tap_filter_16_kernel(flat2, p7, p2, p1, q5, *op1, &sum0, &sum1); *op0 = apply_15_tap_filter_16_kernel(flat2, p7, p1, p0, q6, *op0, &sum0, &sum1); *oq0 = apply_15_tap_filter_16_kernel(flat2, p7, p0, q0, q7, *oq0, &sum0, &sum1); *oq1 = apply_15_tap_filter_16_kernel(flat2, p6, q0, q1, q7, *oq1, &sum0, &sum1); *oq2 = apply_15_tap_filter_16_kernel(flat2, p5, q1, q2, q7, *oq2, &sum0, &sum1); *oq3 = apply_15_tap_filter_16_kernel(flat2, p4, q2, q3, q7, q3, &sum0, &sum1); *oq4 = apply_15_tap_filter_16_kernel(flat2, p3, q3, q4, q7, q4, &sum0, &sum1); *oq5 = apply_15_tap_filter_16_kernel(flat2, p2, q4, q5, q7, q5, &sum0, &sum1); *oq6 = apply_15_tap_filter_16_kernel(flat2, p1, q5, q6, q7, q6, &sum0, &sum1); } #define FUN_FILTER4(w, r) \ static INLINE void filter4_##w( \ const uint8x##w##_t mask, const uint8x##w##_t hev, \ const uint8x##w##_t p1, const uint8x##w##_t p0, const uint8x##w##_t q0, \ const uint8x##w##_t q1, uint8x##w##_t *op1, uint8x##w##_t *op0, \ uint8x##w##_t *oq0, uint8x##w##_t *oq1) { \ int8x##w##_t filter, filter1, filter2, t; \ int8x##w##_t ps1 = flip_sign_##w(p1); \ int8x##w##_t ps0 = flip_sign_##w(p0); \ int8x##w##_t qs0 = flip_sign_##w(q0); \ int8x##w##_t qs1 = flip_sign_##w(q1); \ \ /* add outer taps if we have high edge variance */ \ filter = vqsub##r##s8(ps1, qs1); \ filter = vand##r##s8(filter, vreinterpret##r##s8_u8(hev)); \ t = vqsub##r##s8(qs0, ps0); \ \ /* inner taps */ \ filter = vqadd##r##s8(filter, t); \ filter = vqadd##r##s8(filter, t); \ filter = vqadd##r##s8(filter, t); \ filter = vand##r##s8(filter, vreinterpret##r##s8_u8(mask)); \ \ /* save bottom 3 bits so that we round one side +4 and the other +3 */ \ /* if it equals 4 we'll set it to adjust by -1 to account for the fact */ \ /* we'd round it by 3 the other way */ \ filter1 = vshr##r##n_s8(vqadd##r##s8(filter, vdup##r##n_s8(4)), 3); \ filter2 = vshr##r##n_s8(vqadd##r##s8(filter, vdup##r##n_s8(3)), 3); \ \ qs0 = vqsub##r##s8(qs0, filter1); \ ps0 = vqadd##r##s8(ps0, filter2); \ *oq0 = flip_sign_back_##w(qs0); \ *op0 = flip_sign_back_##w(ps0); \ \ /* outer tap adjustments */ \ filter = vrshr##r##n_s8(filter1, 1); \ filter = vbic##r##s8(filter, vreinterpret##r##s8_u8(hev)); \ \ qs1 = vqsub##r##s8(qs1, filter); \ ps1 = vqadd##r##s8(ps1, filter); \ *oq1 = flip_sign_back_##w(qs1); \ *op1 = flip_sign_back_##w(ps1); \ } FUN_FILTER4(8, _) // filter4_8 FUN_FILTER4(16, q_) // filter4_16 #undef FUN_FILTER4 #define FUN_FILTER8(w) \ static INLINE void filter8_##w( \ const uint8x##w##_t mask, const uint8x##w##_t flat, \ const uint32_t flat_status, const uint8x##w##_t hev, \ const uint8x##w##_t p3, const uint8x##w##_t p2, const uint8x##w##_t p1, \ const uint8x##w##_t p0, const uint8x##w##_t q0, const uint8x##w##_t q1, \ const uint8x##w##_t q2, const uint8x##w##_t q3, uint8x##w##_t *op2, \ uint8x##w##_t *op1, uint8x##w##_t *op0, uint8x##w##_t *oq0, \ uint8x##w##_t *oq1, uint8x##w##_t *oq2) { \ if (flat_status != (uint32_t)-2) { \ filter4_##w(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1); \ *op2 = p2; \ *oq2 = q2; \ if (flat_status) { \ apply_7_tap_filter_##w(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, \ op0, oq0, oq1, oq2); \ } \ } else { \ calc_7_tap_filter_##w(p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, \ oq0, oq1, oq2); \ } \ } FUN_FILTER8(8) // filter8_8 FUN_FILTER8(16) // filter8_16 #undef FUN_FILTER8 #define FUN_FILTER16(w) \ static INLINE void filter16_##w( \ const uint8x##w##_t mask, const uint8x##w##_t flat, \ const uint32_t flat_status, const uint8x##w##_t flat2, \ const uint32_t flat2_status, const uint8x##w##_t hev, \ const uint8x##w##_t p7, const uint8x##w##_t p6, const uint8x##w##_t p5, \ const uint8x##w##_t p4, const uint8x##w##_t p3, const uint8x##w##_t p2, \ const uint8x##w##_t p1, const uint8x##w##_t p0, const uint8x##w##_t q0, \ const uint8x##w##_t q1, const uint8x##w##_t q2, const uint8x##w##_t q3, \ const uint8x##w##_t q4, const uint8x##w##_t q5, const uint8x##w##_t q6, \ const uint8x##w##_t q7, uint8x##w##_t *op6, uint8x##w##_t *op5, \ uint8x##w##_t *op4, uint8x##w##_t *op3, uint8x##w##_t *op2, \ uint8x##w##_t *op1, uint8x##w##_t *op0, uint8x##w##_t *oq0, \ uint8x##w##_t *oq1, uint8x##w##_t *oq2, uint8x##w##_t *oq3, \ uint8x##w##_t *oq4, uint8x##w##_t *oq5, uint8x##w##_t *oq6) { \ if (flat_status != (uint32_t)-2) { \ filter4_##w(mask, hev, p1, p0, q0, q1, op1, op0, oq0, oq1); \ } \ \ if (flat_status) { \ *op2 = p2; \ *oq2 = q2; \ if (flat2_status != (uint32_t)-2) { \ apply_7_tap_filter_##w(flat, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, \ op0, oq0, oq1, oq2); \ } \ if (flat2_status) { \ apply_15_tap_filter_##w(flat2, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, \ q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, \ op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, \ oq6); \ } \ } \ } FUN_FILTER16(8) // filter16_8 FUN_FILTER16(16) // filter16_16 #undef FUN_FILTER16 #define FUN_LOAD8(w, r) \ static INLINE void load_##w##x8( \ const uint8_t *s, const int p, uint8x##w##_t *p3, uint8x##w##_t *p2, \ uint8x##w##_t *p1, uint8x##w##_t *p0, uint8x##w##_t *q0, \ uint8x##w##_t *q1, uint8x##w##_t *q2, uint8x##w##_t *q3) { \ *p3 = vld1##r##u8(s); \ s += p; \ *p2 = vld1##r##u8(s); \ s += p; \ *p1 = vld1##r##u8(s); \ s += p; \ *p0 = vld1##r##u8(s); \ s += p; \ *q0 = vld1##r##u8(s); \ s += p; \ *q1 = vld1##r##u8(s); \ s += p; \ *q2 = vld1##r##u8(s); \ s += p; \ *q3 = vld1##r##u8(s); \ } FUN_LOAD8(8, _) // load_8x8 FUN_LOAD8(16, q_) // load_16x8 #undef FUN_LOAD8 #define FUN_LOAD16(w, r) \ static INLINE void load_##w##x16( \ const uint8_t *s, const int p, uint8x##w##_t *s0, uint8x##w##_t *s1, \ uint8x##w##_t *s2, uint8x##w##_t *s3, uint8x##w##_t *s4, \ uint8x##w##_t *s5, uint8x##w##_t *s6, uint8x##w##_t *s7, \ uint8x##w##_t *s8, uint8x##w##_t *s9, uint8x##w##_t *s10, \ uint8x##w##_t *s11, uint8x##w##_t *s12, uint8x##w##_t *s13, \ uint8x##w##_t *s14, uint8x##w##_t *s15) { \ *s0 = vld1##r##u8(s); \ s += p; \ *s1 = vld1##r##u8(s); \ s += p; \ *s2 = vld1##r##u8(s); \ s += p; \ *s3 = vld1##r##u8(s); \ s += p; \ *s4 = vld1##r##u8(s); \ s += p; \ *s5 = vld1##r##u8(s); \ s += p; \ *s6 = vld1##r##u8(s); \ s += p; \ *s7 = vld1##r##u8(s); \ s += p; \ *s8 = vld1##r##u8(s); \ s += p; \ *s9 = vld1##r##u8(s); \ s += p; \ *s10 = vld1##r##u8(s); \ s += p; \ *s11 = vld1##r##u8(s); \ s += p; \ *s12 = vld1##r##u8(s); \ s += p; \ *s13 = vld1##r##u8(s); \ s += p; \ *s14 = vld1##r##u8(s); \ s += p; \ *s15 = vld1##r##u8(s); \ } FUN_LOAD16(8, _) // load_8x16 FUN_LOAD16(16, q_) // load_16x16 #undef FUN_LOAD16 #define FUN_STORE4(w, r) \ static INLINE void store_##w##x4( \ uint8_t *s, const int p, const uint8x##w##_t s0, const uint8x##w##_t s1, \ const uint8x##w##_t s2, const uint8x##w##_t s3) { \ vst1##r##u8(s, s0); \ s += p; \ vst1##r##u8(s, s1); \ s += p; \ vst1##r##u8(s, s2); \ s += p; \ vst1##r##u8(s, s3); \ } FUN_STORE4(8, _) // store_8x4 FUN_STORE4(16, q_) // store_16x4 #undef FUN_STORE4 #define FUN_STORE6(w, r) \ static INLINE void store_##w##x6( \ uint8_t *s, const int p, const uint8x##w##_t s0, const uint8x##w##_t s1, \ const uint8x##w##_t s2, const uint8x##w##_t s3, const uint8x##w##_t s4, \ const uint8x##w##_t s5) { \ vst1##r##u8(s, s0); \ s += p; \ vst1##r##u8(s, s1); \ s += p; \ vst1##r##u8(s, s2); \ s += p; \ vst1##r##u8(s, s3); \ s += p; \ vst1##r##u8(s, s4); \ s += p; \ vst1##r##u8(s, s5); \ } FUN_STORE6(8, _) // store_8x6 FUN_STORE6(16, q_) // store_16x6 #undef FUN_STORE6 static INLINE void store_4x8(uint8_t *s, const int p, const uint8x8_t p1, const uint8x8_t p0, const uint8x8_t q0, const uint8x8_t q1) { uint8x8x4_t o; o.val[0] = p1; o.val[1] = p0; o.val[2] = q0; o.val[3] = q1; vst4_lane_u8(s, o, 0); s += p; vst4_lane_u8(s, o, 1); s += p; vst4_lane_u8(s, o, 2); s += p; vst4_lane_u8(s, o, 3); s += p; vst4_lane_u8(s, o, 4); s += p; vst4_lane_u8(s, o, 5); s += p; vst4_lane_u8(s, o, 6); s += p; vst4_lane_u8(s, o, 7); } static INLINE void store_6x8(uint8_t *s, const int p, const uint8x8_t s0, const uint8x8_t s1, const uint8x8_t s2, const uint8x8_t s3, const uint8x8_t s4, const uint8x8_t s5) { uint8x8x3_t o0, o1; o0.val[0] = s0; o0.val[1] = s1; o0.val[2] = s2; o1.val[0] = s3; o1.val[1] = s4; o1.val[2] = s5; vst3_lane_u8(s - 3, o0, 0); vst3_lane_u8(s + 0, o1, 0); s += p; vst3_lane_u8(s - 3, o0, 1); vst3_lane_u8(s + 0, o1, 1); s += p; vst3_lane_u8(s - 3, o0, 2); vst3_lane_u8(s + 0, o1, 2); s += p; vst3_lane_u8(s - 3, o0, 3); vst3_lane_u8(s + 0, o1, 3); s += p; vst3_lane_u8(s - 3, o0, 4); vst3_lane_u8(s + 0, o1, 4); s += p; vst3_lane_u8(s - 3, o0, 5); vst3_lane_u8(s + 0, o1, 5); s += p; vst3_lane_u8(s - 3, o0, 6); vst3_lane_u8(s + 0, o1, 6); s += p; vst3_lane_u8(s - 3, o0, 7); vst3_lane_u8(s + 0, o1, 7); } #define FUN_STORE8(w, r) \ static INLINE void store_##w##x8( \ uint8_t *s, const int p, const uint8x##w##_t s0, const uint8x##w##_t s1, \ const uint8x##w##_t s2, const uint8x##w##_t s3, const uint8x##w##_t s4, \ const uint8x##w##_t s5, const uint8x##w##_t s6, \ const uint8x##w##_t s7) { \ vst1##r##u8(s, s0); \ s += p; \ vst1##r##u8(s, s1); \ s += p; \ vst1##r##u8(s, s2); \ s += p; \ vst1##r##u8(s, s3); \ s += p; \ vst1##r##u8(s, s4); \ s += p; \ vst1##r##u8(s, s5); \ s += p; \ vst1##r##u8(s, s6); \ s += p; \ vst1##r##u8(s, s7); \ } FUN_STORE8(8, _) // store_8x8 FUN_STORE8(16, q_) // store_16x8 #undef FUN_STORE8 #define FUN_STORE14(w, r) \ static INLINE void store_##w##x14( \ uint8_t *s, const int p, const uint8x##w##_t p6, const uint8x##w##_t p5, \ const uint8x##w##_t p4, const uint8x##w##_t p3, const uint8x##w##_t p2, \ const uint8x##w##_t p1, const uint8x##w##_t p0, const uint8x##w##_t q0, \ const uint8x##w##_t q1, const uint8x##w##_t q2, const uint8x##w##_t q3, \ const uint8x##w##_t q4, const uint8x##w##_t q5, const uint8x##w##_t q6, \ const uint32_t flat_status, const uint32_t flat2_status) { \ if (flat_status) { \ if (flat2_status) { \ vst1##r##u8(s - 7 * p, p6); \ vst1##r##u8(s - 6 * p, p5); \ vst1##r##u8(s - 5 * p, p4); \ vst1##r##u8(s - 4 * p, p3); \ vst1##r##u8(s + 3 * p, q3); \ vst1##r##u8(s + 4 * p, q4); \ vst1##r##u8(s + 5 * p, q5); \ vst1##r##u8(s + 6 * p, q6); \ } \ vst1##r##u8(s - 3 * p, p2); \ vst1##r##u8(s + 2 * p, q2); \ } \ vst1##r##u8(s - 2 * p, p1); \ vst1##r##u8(s - 1 * p, p0); \ vst1##r##u8(s + 0 * p, q0); \ vst1##r##u8(s + 1 * p, q1); \ } FUN_STORE14(8, _) // store_8x14 FUN_STORE14(16, q_) // store_16x14 #undef FUN_STORE14 static INLINE void store_16x16(uint8_t *s, const int p, const uint8x16_t s0, const uint8x16_t s1, const uint8x16_t s2, const uint8x16_t s3, const uint8x16_t s4, const uint8x16_t s5, const uint8x16_t s6, const uint8x16_t s7, const uint8x16_t s8, const uint8x16_t s9, const uint8x16_t s10, const uint8x16_t s11, const uint8x16_t s12, const uint8x16_t s13, const uint8x16_t s14, const uint8x16_t s15) { vst1q_u8(s, s0); s += p; vst1q_u8(s, s1); s += p; vst1q_u8(s, s2); s += p; vst1q_u8(s, s3); s += p; vst1q_u8(s, s4); s += p; vst1q_u8(s, s5); s += p; vst1q_u8(s, s6); s += p; vst1q_u8(s, s7); s += p; vst1q_u8(s, s8); s += p; vst1q_u8(s, s9); s += p; vst1q_u8(s, s10); s += p; vst1q_u8(s, s11); s += p; vst1q_u8(s, s12); s += p; vst1q_u8(s, s13); s += p; vst1q_u8(s, s14); s += p; vst1q_u8(s, s15); } #define FUN_HOR_4_KERNEL(name, w) \ static INLINE void lpf_horizontal_4##name##kernel( \ uint8_t *s, const int p, const uint8x##w##_t blimit, \ const uint8x##w##_t limit, const uint8x##w##_t thresh) { \ uint8x##w##_t p3, p2, p1, p0, q0, q1, q2, q3, mask, hev; \ \ load_##w##x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); \ filter_hev_mask4_##w(limit, blimit, thresh, p3, p2, p1, p0, q0, q1, q2, \ q3, &hev, &mask); \ filter4_##w(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1); \ store_##w##x4(s - 2 * p, p, p1, p0, q0, q1); \ } FUN_HOR_4_KERNEL(_, 8) // lpf_horizontal_4_kernel FUN_HOR_4_KERNEL(_dual_, 16) // lpf_horizontal_4_dual_kernel #undef FUN_HOR_4_KERNEL void vpx_lpf_horizontal_4_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t blimit_vec, limit_vec, thresh_vec; load_thresh_8(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec); lpf_horizontal_4_kernel(s, p, blimit_vec, limit_vec, thresh_vec); } void vpx_lpf_horizontal_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { uint8x16_t blimit_vec, limit_vec, thresh_vec; load_thresh_8_dual(blimit0, limit0, thresh0, blimit1, limit1, thresh1, &blimit_vec, &limit_vec, &thresh_vec); lpf_horizontal_4_dual_kernel(s, p, blimit_vec, limit_vec, thresh_vec); } void vpx_lpf_vertical_4_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, mask, hev; load_thresh_8(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec); load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); transpose_u8_8x8(&p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); filter_hev_mask4_8(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &hev, &mask); filter4_8(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1); store_4x8(s - 2, p, p1, p0, q0, q1); } void vpx_lpf_vertical_4_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { uint8x16_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, mask, hev; uint8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; load_thresh_8_dual(blimit0, limit0, thresh0, blimit1, limit1, thresh1, &blimit_vec, &limit_vec, &thresh_vec); load_8x16(s - 4, p, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14, &s15); transpose_u8_8x16(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); filter_hev_mask4_16(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &hev, &mask); filter4_16(mask, hev, p1, p0, q0, q1, &p1, &p0, &q0, &q1); s -= 2; store_4x8(s, p, vget_low_u8(p1), vget_low_u8(p0), vget_low_u8(q0), vget_low_u8(q1)); store_4x8(s + 8 * p, p, vget_high_u8(p1), vget_high_u8(p0), vget_high_u8(q0), vget_high_u8(q1)); } void vpx_lpf_horizontal_8_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint32_t flat_status; load_thresh_8(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec); load_8x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); mask = filter_flat_hev_mask_8(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev); filter8_8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2); store_8x6(s - 3 * p, p, op2, op1, op0, oq0, oq1, oq2); } void vpx_lpf_horizontal_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { uint8x16_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint32_t flat_status; load_thresh_8_dual(blimit0, limit0, thresh0, blimit1, limit1, thresh1, &blimit_vec, &limit_vec, &thresh_vec); load_16x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); mask = filter_flat_hev_mask_16(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev); filter8_16(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2); store_16x6(s - 3 * p, p, op2, op1, op0, oq0, oq1, oq2); } void vpx_lpf_vertical_8_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint32_t flat_status; load_thresh_8(blimit, limit, thresh, &blimit_vec, &limit_vec, &thresh_vec); load_8x8(s - 4, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); transpose_u8_8x8(&p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); mask = filter_flat_hev_mask_8(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev); filter8_8(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2); // Note: transpose + store_8x8() is faster than store_6x8(). transpose_u8_8x8(&p3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &q3); store_8x8(s - 4, p, p3, op2, op1, op0, oq0, oq1, oq2, q3); } void vpx_lpf_vertical_8_dual_neon(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { uint8x16_t blimit_vec, limit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, op2, op1, op0, oq0, oq1, oq2, mask, flat, hev; uint8x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; uint32_t flat_status; load_thresh_8_dual(blimit0, limit0, thresh0, blimit1, limit1, thresh1, &blimit_vec, &limit_vec, &thresh_vec); load_8x16(s - 4, p, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14, &s15); transpose_u8_8x16(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); mask = filter_flat_hev_mask_16(limit_vec, blimit_vec, thresh_vec, p3, p2, p1, p0, q0, q1, q2, q3, &flat, &flat_status, &hev); filter8_16(mask, flat, flat_status, hev, p3, p2, p1, p0, q0, q1, q2, q3, &op2, &op1, &op0, &oq0, &oq1, &oq2); // Note: store_6x8() twice is faster than transpose + store_8x16(). store_6x8(s, p, vget_low_u8(op2), vget_low_u8(op1), vget_low_u8(op0), vget_low_u8(oq0), vget_low_u8(oq1), vget_low_u8(oq2)); store_6x8(s + 8 * p, p, vget_high_u8(op2), vget_high_u8(op1), vget_high_u8(op0), vget_high_u8(oq0), vget_high_u8(oq1), vget_high_u8(oq2)); } #define FUN_LPF_16_KERNEL(name, w) \ static INLINE void lpf_16##name##kernel( \ const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, \ const uint8x##w##_t p7, const uint8x##w##_t p6, const uint8x##w##_t p5, \ const uint8x##w##_t p4, const uint8x##w##_t p3, const uint8x##w##_t p2, \ const uint8x##w##_t p1, const uint8x##w##_t p0, const uint8x##w##_t q0, \ const uint8x##w##_t q1, const uint8x##w##_t q2, const uint8x##w##_t q3, \ const uint8x##w##_t q4, const uint8x##w##_t q5, const uint8x##w##_t q6, \ const uint8x##w##_t q7, uint8x##w##_t *op6, uint8x##w##_t *op5, \ uint8x##w##_t *op4, uint8x##w##_t *op3, uint8x##w##_t *op2, \ uint8x##w##_t *op1, uint8x##w##_t *op0, uint8x##w##_t *oq0, \ uint8x##w##_t *oq1, uint8x##w##_t *oq2, uint8x##w##_t *oq3, \ uint8x##w##_t *oq4, uint8x##w##_t *oq5, uint8x##w##_t *oq6, \ uint32_t *flat_status, uint32_t *flat2_status) { \ uint8x##w##_t blimit_vec, limit_vec, thresh_vec, mask, flat, flat2, hev; \ \ load_thresh_##w(blimit, limit, thresh, &blimit_vec, &limit_vec, \ &thresh_vec); \ mask = filter_flat_hev_mask_##w(limit_vec, blimit_vec, thresh_vec, p3, p2, \ p1, p0, q0, q1, q2, q3, &flat, \ flat_status, &hev); \ flat2 = flat_mask5_##w(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, \ flat2_status); \ filter16_##w(mask, flat, *flat_status, flat2, *flat2_status, hev, p7, p6, \ p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, \ op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, \ oq6); \ } FUN_LPF_16_KERNEL(_, 8) // lpf_16_kernel FUN_LPF_16_KERNEL(_dual_, 16) // lpf_16_dual_kernel #undef FUN_LPF_16_KERNEL void vpx_lpf_horizontal_16_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint32_t flat_status, flat2_status; load_8x16(s - 8 * p, p, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); lpf_16_kernel(blimit, limit, thresh, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, &flat_status, &flat2_status); store_8x14(s, p, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6, flat_status, flat2_status); } void vpx_lpf_horizontal_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint32_t flat_status, flat2_status; load_16x8(s - 4 * p, p, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3); p7 = vld1q_u8(s - 8 * p); p6 = vld1q_u8(s - 7 * p); p5 = vld1q_u8(s - 6 * p); p4 = vld1q_u8(s - 5 * p); q4 = vld1q_u8(s + 4 * p); q5 = vld1q_u8(s + 5 * p); q6 = vld1q_u8(s + 6 * p); q7 = vld1q_u8(s + 7 * p); lpf_16_dual_kernel(blimit, limit, thresh, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, &flat_status, &flat2_status); store_16x14(s, p, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6, flat_status, flat2_status); } void vpx_lpf_vertical_16_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x8_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint8x16_t s0, s1, s2, s3, s4, s5, s6, s7; uint32_t flat_status, flat2_status; s -= 8; load_16x8(s, p, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); transpose_u8_16x8(s0, s1, s2, s3, s4, s5, s6, s7, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); lpf_16_kernel(blimit, limit, thresh, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, &flat_status, &flat2_status); if (flat_status) { if (flat2_status) { transpose_u8_8x16(p7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6, q7, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7); store_16x8(s, p, s0, s1, s2, s3, s4, s5, s6, s7); } else { // Note: transpose + store_8x8() is faster than store_6x8(). transpose_u8_8x8(&p3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &q3); store_8x8(s + 4, p, p3, op2, op1, op0, oq0, oq1, oq2, q3); } } else { store_4x8(s + 6, p, op1, op0, oq0, oq1); } } void vpx_lpf_vertical_16_dual_neon(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8x16_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6; uint8x16_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; uint32_t flat_status, flat2_status; s -= 8; load_16x16(s, p, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14, &s15); transpose_u8_16x16(s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15, &p7, &p6, &p5, &p4, &p3, &p2, &p1, &p0, &q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); lpf_16_dual_kernel(blimit, limit, thresh, p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, &op6, &op5, &op4, &op3, &op2, &op1, &op0, &oq0, &oq1, &oq2, &oq3, &oq4, &oq5, &oq6, &flat_status, &flat2_status); if (flat_status) { if (flat2_status) { transpose_u8_16x16(p7, op6, op5, op4, op3, op2, op1, op0, oq0, oq1, oq2, oq3, oq4, oq5, oq6, q7, &s0, &s1, &s2, &s3, &s4, &s5, &s6, &s7, &s8, &s9, &s10, &s11, &s12, &s13, &s14, &s15); store_16x16(s, p, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15); } else { // Note: store_6x8() twice is faster than transpose + store_8x16(). s += 8; store_6x8(s, p, vget_low_u8(op2), vget_low_u8(op1), vget_low_u8(op0), vget_low_u8(oq0), vget_low_u8(oq1), vget_low_u8(oq2)); store_6x8(s + 8 * p, p, vget_high_u8(op2), vget_high_u8(op1), vget_high_u8(op0), vget_high_u8(oq0), vget_high_u8(oq1), vget_high_u8(oq2)); } } else { s += 6; store_4x8(s, p, vget_low_u8(op1), vget_low_u8(op0), vget_low_u8(oq0), vget_low_u8(oq1)); store_4x8(s + 8 * p, p, vget_high_u8(op1), vget_high_u8(op0), vget_high_u8(oq0), vget_high_u8(oq1)); } } libvpx-1.8.2/vpx_dsp/arm/mem_neon.h000066400000000000000000000134231357355204000172450ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_MEM_NEON_H_ #define VPX_VPX_DSP_ARM_MEM_NEON_H_ #include #include #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" static INLINE int16x4_t create_s16x4_neon(const int16_t c0, const int16_t c1, const int16_t c2, const int16_t c3) { return vcreate_s16((uint16_t)c0 | ((uint32_t)c1 << 16) | ((int64_t)(uint16_t)c2 << 32) | ((int64_t)c3 << 48)); } static INLINE int32x2_t create_s32x2_neon(const int32_t c0, const int32_t c1) { return vcreate_s32((uint32_t)c0 | ((int64_t)(uint32_t)c1 << 32)); } static INLINE int32x4_t create_s32x4_neon(const int32_t c0, const int32_t c1, const int32_t c2, const int32_t c3) { return vcombine_s32(create_s32x2_neon(c0, c1), create_s32x2_neon(c2, c3)); } // Helper functions used to load tran_low_t into int16, narrowing if necessary. static INLINE int16x8x2_t load_tran_low_to_s16x2q(const tran_low_t *buf) { #if CONFIG_VP9_HIGHBITDEPTH const int32x4x2_t v0 = vld2q_s32(buf); const int32x4x2_t v1 = vld2q_s32(buf + 8); const int16x4_t s0 = vmovn_s32(v0.val[0]); const int16x4_t s1 = vmovn_s32(v0.val[1]); const int16x4_t s2 = vmovn_s32(v1.val[0]); const int16x4_t s3 = vmovn_s32(v1.val[1]); int16x8x2_t res; res.val[0] = vcombine_s16(s0, s2); res.val[1] = vcombine_s16(s1, s3); return res; #else return vld2q_s16(buf); #endif } static INLINE int16x8_t load_tran_low_to_s16q(const tran_low_t *buf) { #if CONFIG_VP9_HIGHBITDEPTH const int32x4_t v0 = vld1q_s32(buf); const int32x4_t v1 = vld1q_s32(buf + 4); const int16x4_t s0 = vmovn_s32(v0); const int16x4_t s1 = vmovn_s32(v1); return vcombine_s16(s0, s1); #else return vld1q_s16(buf); #endif } static INLINE int16x4_t load_tran_low_to_s16d(const tran_low_t *buf) { #if CONFIG_VP9_HIGHBITDEPTH const int32x4_t v0 = vld1q_s32(buf); return vmovn_s32(v0); #else return vld1_s16(buf); #endif } static INLINE void store_s16q_to_tran_low(tran_low_t *buf, const int16x8_t a) { #if CONFIG_VP9_HIGHBITDEPTH const int32x4_t v0 = vmovl_s16(vget_low_s16(a)); const int32x4_t v1 = vmovl_s16(vget_high_s16(a)); vst1q_s32(buf, v0); vst1q_s32(buf + 4, v1); #else vst1q_s16(buf, a); #endif } // Propagate type information to the compiler. Without this the compiler may // assume the required alignment of uint32_t (4 bytes) and add alignment hints // to the memory access. // // This is used for functions operating on uint8_t which wish to load or store 4 // values at a time but which may not be on 4 byte boundaries. static INLINE void uint32_to_mem(uint8_t *buf, uint32_t a) { memcpy(buf, &a, 4); } // Load 2 sets of 4 bytes when alignment is not guaranteed. static INLINE uint8x8_t load_unaligned_u8(const uint8_t *buf, int stride) { uint32_t a; uint32x2_t a_u32 = vdup_n_u32(0); if (stride == 4) return vld1_u8(buf); memcpy(&a, buf, 4); buf += stride; a_u32 = vset_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); a_u32 = vset_lane_u32(a, a_u32, 1); return vreinterpret_u8_u32(a_u32); } // Store 2 sets of 4 bytes when alignment is not guaranteed. static INLINE void store_unaligned_u8(uint8_t *buf, int stride, const uint8x8_t a) { const uint32x2_t a_u32 = vreinterpret_u32_u8(a); if (stride == 4) { vst1_u8(buf, a); return; } uint32_to_mem(buf, vget_lane_u32(a_u32, 0)); buf += stride; uint32_to_mem(buf, vget_lane_u32(a_u32, 1)); } // Load 4 sets of 4 bytes when alignment is not guaranteed. static INLINE uint8x16_t load_unaligned_u8q(const uint8_t *buf, int stride) { uint32_t a; uint32x4_t a_u32 = vdupq_n_u32(0); if (stride == 4) return vld1q_u8(buf); memcpy(&a, buf, 4); buf += stride; a_u32 = vsetq_lane_u32(a, a_u32, 0); memcpy(&a, buf, 4); buf += stride; a_u32 = vsetq_lane_u32(a, a_u32, 1); memcpy(&a, buf, 4); buf += stride; a_u32 = vsetq_lane_u32(a, a_u32, 2); memcpy(&a, buf, 4); buf += stride; a_u32 = vsetq_lane_u32(a, a_u32, 3); return vreinterpretq_u8_u32(a_u32); } // Store 4 sets of 4 bytes when alignment is not guaranteed. static INLINE void store_unaligned_u8q(uint8_t *buf, int stride, const uint8x16_t a) { const uint32x4_t a_u32 = vreinterpretq_u32_u8(a); if (stride == 4) { vst1q_u8(buf, a); return; } uint32_to_mem(buf, vgetq_lane_u32(a_u32, 0)); buf += stride; uint32_to_mem(buf, vgetq_lane_u32(a_u32, 1)); buf += stride; uint32_to_mem(buf, vgetq_lane_u32(a_u32, 2)); buf += stride; uint32_to_mem(buf, vgetq_lane_u32(a_u32, 3)); } // Load 2 sets of 4 bytes when alignment is guaranteed. static INLINE uint8x8_t load_u8(const uint8_t *buf, int stride) { uint32x2_t a = vdup_n_u32(0); assert(!((intptr_t)buf % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); a = vld1_lane_u32((const uint32_t *)buf, a, 0); buf += stride; a = vld1_lane_u32((const uint32_t *)buf, a, 1); return vreinterpret_u8_u32(a); } // Store 2 sets of 4 bytes when alignment is guaranteed. static INLINE void store_u8(uint8_t *buf, int stride, const uint8x8_t a) { uint32x2_t a_u32 = vreinterpret_u32_u8(a); assert(!((intptr_t)buf % sizeof(uint32_t))); assert(!(stride % sizeof(uint32_t))); vst1_lane_u32((uint32_t *)buf, a_u32, 0); buf += stride; vst1_lane_u32((uint32_t *)buf, a_u32, 1); } #endif // VPX_VPX_DSP_ARM_MEM_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/quantize_neon.c000066400000000000000000000261471357355204000203310ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/arm/mem_neon.h" static INLINE void calculate_dqcoeff_and_store(const int16x8_t qcoeff, const int16x8_t dequant, tran_low_t *dqcoeff) { const int32x4_t dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); const int32x4_t dqcoeff_1 = vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); #if CONFIG_VP9_HIGHBITDEPTH vst1q_s32(dqcoeff, dqcoeff_0); vst1q_s32(dqcoeff + 4, dqcoeff_1); #else vst1q_s16(dqcoeff, vcombine_s16(vmovn_s32(dqcoeff_0), vmovn_s32(dqcoeff_1))); #endif // CONFIG_VP9_HIGHBITDEPTH } void vpx_quantize_b_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; (void)scan; (void)skip_block; assert(!skip_block); // Process first 8 values which include a dc component. { // Only the first element of each vector is DC. const int16x8_t zbin = vld1q_s16(zbin_ptr); const int16x8_t round = vld1q_s16(round_ptr); const int16x8_t quant = vld1q_s16(quant_ptr); const int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); const int16x8_t dequant = vld1q_s16(dequant_ptr); // Add one because the eob does not index from 0. const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t zbin_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, zbin)); const int16x8_t rounded = vqaddq_s16(coeff_abs, round); // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); qcoeff = vaddq_s16(qcoeff, rounded); // (qcoeff * quant_shift * 2) >> 16 >> 1 == (qcoeff * quant_shift) >> 16 qcoeff = vshrq_n_s16(vqdmulhq_s16(qcoeff, quant_shift), 1); // Restore the sign bit. qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); coeff_ptr += 8; iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; calculate_dqcoeff_and_store(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } n_coeffs -= 8; { const int16x8_t zbin = vdupq_n_s16(zbin_ptr[1]); const int16x8_t round = vdupq_n_s16(round_ptr[1]); const int16x8_t quant = vdupq_n_s16(quant_ptr[1]); const int16x8_t quant_shift = vdupq_n_s16(quant_shift_ptr[1]); const int16x8_t dequant = vdupq_n_s16(dequant_ptr[1]); do { // Add one because the eob is not its index. const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t zbin_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, zbin)); const int16x8_t rounded = vqaddq_s16(coeff_abs, round); // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); qcoeff = vaddq_s16(qcoeff, rounded); // (qcoeff * quant_shift * 2) >> 16 >> 1 == (qcoeff * quant_shift) >> 16 qcoeff = vshrq_n_s16(vqdmulhq_s16(qcoeff, quant_shift), 1); // Restore the sign bit. qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. eob_max = vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); coeff_ptr += 8; iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; calculate_dqcoeff_and_store(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; n_coeffs -= 8; } while (n_coeffs > 0); } #ifdef __aarch64__ *eob_ptr = vmaxvq_u16(eob_max); #else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); const uint16x4_t eob_max_1 = vpmax_u16(eob_max_0, eob_max_0); const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } #endif // __aarch64__ } static INLINE int32x4_t extract_sign_bit(int32x4_t a) { return vreinterpretq_s32_u32(vshrq_n_u32(vreinterpretq_u32_s32(a), 31)); } static INLINE void calculate_dqcoeff_and_store_32x32(const int16x8_t qcoeff, const int16x8_t dequant, tran_low_t *dqcoeff) { int32x4_t dqcoeff_0 = vmull_s16(vget_low_s16(qcoeff), vget_low_s16(dequant)); int32x4_t dqcoeff_1 = vmull_s16(vget_high_s16(qcoeff), vget_high_s16(dequant)); // Add 1 if negative to round towards zero because the C uses division. dqcoeff_0 = vaddq_s32(dqcoeff_0, extract_sign_bit(dqcoeff_0)); dqcoeff_1 = vaddq_s32(dqcoeff_1, extract_sign_bit(dqcoeff_1)); #if CONFIG_VP9_HIGHBITDEPTH dqcoeff_0 = vshrq_n_s32(dqcoeff_0, 1); dqcoeff_1 = vshrq_n_s32(dqcoeff_1, 1); vst1q_s32(dqcoeff, dqcoeff_0); vst1q_s32(dqcoeff + 4, dqcoeff_1); #else vst1q_s16(dqcoeff, vcombine_s16(vshrn_n_s32(dqcoeff_0, 1), vshrn_n_s32(dqcoeff_1, 1))); #endif // CONFIG_VP9_HIGHBITDEPTH } // Main difference is that zbin values are halved before comparison and dqcoeff // values are divided by 2. zbin is rounded but dqcoeff is not. void vpx_quantize_b_32x32_neon(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const int16x8_t one = vdupq_n_s16(1); const int16x8_t neg_one = vdupq_n_s16(-1); uint16x8_t eob_max; int i; (void)scan; (void)n_coeffs; // Because we will always calculate 32*32. (void)skip_block; assert(!skip_block); // Process first 8 values which include a dc component. { // Only the first element of each vector is DC. const int16x8_t zbin = vrshrq_n_s16(vld1q_s16(zbin_ptr), 1); const int16x8_t round = vrshrq_n_s16(vld1q_s16(round_ptr), 1); const int16x8_t quant = vld1q_s16(quant_ptr); const int16x8_t quant_shift = vld1q_s16(quant_shift_ptr); const int16x8_t dequant = vld1q_s16(dequant_ptr); // Add one because the eob does not index from 0. const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t zbin_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, zbin)); const int16x8_t rounded = vqaddq_s16(coeff_abs, round); // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); qcoeff = vaddq_s16(qcoeff, rounded); // (qcoeff * quant_shift * 2) >> 16 == (qcoeff * quant_shift) >> 15 qcoeff = vqdmulhq_s16(qcoeff, quant_shift); // Restore the sign bit. qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. eob_max = vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan); coeff_ptr += 8; iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; calculate_dqcoeff_and_store_32x32(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } { const int16x8_t zbin = vrshrq_n_s16(vdupq_n_s16(zbin_ptr[1]), 1); const int16x8_t round = vrshrq_n_s16(vdupq_n_s16(round_ptr[1]), 1); const int16x8_t quant = vdupq_n_s16(quant_ptr[1]); const int16x8_t quant_shift = vdupq_n_s16(quant_shift_ptr[1]); const int16x8_t dequant = vdupq_n_s16(dequant_ptr[1]); for (i = 1; i < 32 * 32 / 8; ++i) { // Add one because the eob is not its index. const uint16x8_t v_iscan = vreinterpretq_u16_s16(vaddq_s16(vld1q_s16(iscan), one)); const int16x8_t coeff = load_tran_low_to_s16q(coeff_ptr); const int16x8_t coeff_sign = vshrq_n_s16(coeff, 15); const int16x8_t coeff_abs = vabsq_s16(coeff); const int16x8_t zbin_mask = vreinterpretq_s16_u16(vcgeq_s16(coeff_abs, zbin)); const int16x8_t rounded = vqaddq_s16(coeff_abs, round); // (round * quant * 2) >> 16 >> 1 == (round * quant) >> 16 int16x8_t qcoeff = vshrq_n_s16(vqdmulhq_s16(rounded, quant), 1); qcoeff = vaddq_s16(qcoeff, rounded); // (qcoeff * quant_shift * 2) >> 16 == (qcoeff * quant_shift) >> 15 qcoeff = vqdmulhq_s16(qcoeff, quant_shift); // Restore the sign bit. qcoeff = veorq_s16(qcoeff, coeff_sign); qcoeff = vsubq_s16(qcoeff, coeff_sign); qcoeff = vandq_s16(qcoeff, zbin_mask); // Set non-zero elements to -1 and use that to extract values for eob. eob_max = vmaxq_u16(eob_max, vandq_u16(vtstq_s16(qcoeff, neg_one), v_iscan)); coeff_ptr += 8; iscan += 8; store_s16q_to_tran_low(qcoeff_ptr, qcoeff); qcoeff_ptr += 8; calculate_dqcoeff_and_store_32x32(qcoeff, dequant, dqcoeff_ptr); dqcoeff_ptr += 8; } } #ifdef __aarch64__ *eob_ptr = vmaxvq_u16(eob_max); #else { const uint16x4_t eob_max_0 = vmax_u16(vget_low_u16(eob_max), vget_high_u16(eob_max)); const uint16x4_t eob_max_1 = vpmax_u16(eob_max_0, eob_max_0); const uint16x4_t eob_max_2 = vpmax_u16(eob_max_1, eob_max_1); vst1_lane_u16(eob_ptr, eob_max_2, 0); } #endif // __aarch64__ } libvpx-1.8.2/vpx_dsp/arm/sad4d_neon.c000066400000000000000000000352531357355204000174660ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" static INLINE uint8x8_t load_unaligned_2_buffers(const void *const buf0, const void *const buf1) { uint32_t a; uint32x2_t aa = vdup_n_u32(0); memcpy(&a, buf0, 4); aa = vset_lane_u32(a, aa, 0); memcpy(&a, buf1, 4); aa = vset_lane_u32(a, aa, 1); return vreinterpret_u8_u32(aa); } static INLINE void sad4x_4d(const uint8_t *const src_ptr, const int src_stride, const uint8_t *const ref_array[4], const int ref_stride, const int height, uint32_t *const res) { int i; uint16x8_t abs[2] = { vdupq_n_u16(0), vdupq_n_u16(0) }; uint16x4_t a[2]; uint32x4_t r; assert(!((intptr_t)src_ptr % sizeof(uint32_t))); assert(!(src_stride % sizeof(uint32_t))); for (i = 0; i < height; ++i) { const uint8x8_t s = vreinterpret_u8_u32( vld1_dup_u32((const uint32_t *)(src_ptr + i * src_stride))); const uint8x8_t ref01 = load_unaligned_2_buffers( ref_array[0] + i * ref_stride, ref_array[1] + i * ref_stride); const uint8x8_t ref23 = load_unaligned_2_buffers( ref_array[2] + i * ref_stride, ref_array[3] + i * ref_stride); abs[0] = vabal_u8(abs[0], s, ref01); abs[1] = vabal_u8(abs[1], s, ref23); } a[0] = vpadd_u16(vget_low_u16(abs[0]), vget_high_u16(abs[0])); a[1] = vpadd_u16(vget_low_u16(abs[1]), vget_high_u16(abs[1])); r = vpaddlq_u16(vcombine_u16(a[0], a[1])); vst1q_u32(res, r); } void vpx_sad4x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 4, res); } void vpx_sad4x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad4x_4d(src_ptr, src_stride, ref_array, ref_stride, 8, res); } //////////////////////////////////////////////////////////////////////////////// // Can handle 512 pixels' sad sum (such as 16x32 or 32x16) static INLINE void sad_512_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { const uint16x4_t a0 = vadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); const uint16x4_t a1 = vadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); const uint16x4_t a2 = vadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); const uint16x4_t a3 = vadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); const uint16x4_t b0 = vpadd_u16(a0, a1); const uint16x4_t b1 = vpadd_u16(a2, a3); const uint32x4_t r = vpaddlq_u16(vcombine_u16(b0, b1)); vst1q_u32(res, r); } // Can handle 1024 pixels' sad sum (such as 32x32) static INLINE void sad_1024_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { const uint16x4_t a0 = vpadd_u16(vget_low_u16(sum[0]), vget_high_u16(sum[0])); const uint16x4_t a1 = vpadd_u16(vget_low_u16(sum[1]), vget_high_u16(sum[1])); const uint16x4_t a2 = vpadd_u16(vget_low_u16(sum[2]), vget_high_u16(sum[2])); const uint16x4_t a3 = vpadd_u16(vget_low_u16(sum[3]), vget_high_u16(sum[3])); const uint32x4_t b0 = vpaddlq_u16(vcombine_u16(a0, a1)); const uint32x4_t b1 = vpaddlq_u16(vcombine_u16(a2, a3)); const uint32x2_t c0 = vpadd_u32(vget_low_u32(b0), vget_high_u32(b0)); const uint32x2_t c1 = vpadd_u32(vget_low_u32(b1), vget_high_u32(b1)); vst1q_u32(res, vcombine_u32(c0, c1)); } // Can handle 2048 pixels' sad sum (such as 32x64 or 64x32) static INLINE void sad_2048_pel_final_neon(const uint16x8_t *sum /*[4]*/, uint32_t *const res) { const uint32x4_t a0 = vpaddlq_u16(sum[0]); const uint32x4_t a1 = vpaddlq_u16(sum[1]); const uint32x4_t a2 = vpaddlq_u16(sum[2]); const uint32x4_t a3 = vpaddlq_u16(sum[3]); const uint32x2_t b0 = vadd_u32(vget_low_u32(a0), vget_high_u32(a0)); const uint32x2_t b1 = vadd_u32(vget_low_u32(a1), vget_high_u32(a1)); const uint32x2_t b2 = vadd_u32(vget_low_u32(a2), vget_high_u32(a2)); const uint32x2_t b3 = vadd_u32(vget_low_u32(a3), vget_high_u32(a3)); const uint32x2_t c0 = vpadd_u32(b0, b1); const uint32x2_t c1 = vpadd_u32(b2, b3); vst1q_u32(res, vcombine_u32(c0, c1)); } // Can handle 4096 pixels' sad sum (such as 64x64) static INLINE void sad_4096_pel_final_neon(const uint16x8_t *sum /*[8]*/, uint32_t *const res) { const uint32x4_t a0 = vpaddlq_u16(sum[0]); const uint32x4_t a1 = vpaddlq_u16(sum[1]); const uint32x4_t a2 = vpaddlq_u16(sum[2]); const uint32x4_t a3 = vpaddlq_u16(sum[3]); const uint32x4_t a4 = vpaddlq_u16(sum[4]); const uint32x4_t a5 = vpaddlq_u16(sum[5]); const uint32x4_t a6 = vpaddlq_u16(sum[6]); const uint32x4_t a7 = vpaddlq_u16(sum[7]); const uint32x4_t b0 = vaddq_u32(a0, a1); const uint32x4_t b1 = vaddq_u32(a2, a3); const uint32x4_t b2 = vaddq_u32(a4, a5); const uint32x4_t b3 = vaddq_u32(a6, a7); const uint32x2_t c0 = vadd_u32(vget_low_u32(b0), vget_high_u32(b0)); const uint32x2_t c1 = vadd_u32(vget_low_u32(b1), vget_high_u32(b1)); const uint32x2_t c2 = vadd_u32(vget_low_u32(b2), vget_high_u32(b2)); const uint32x2_t c3 = vadd_u32(vget_low_u32(b3), vget_high_u32(b3)); const uint32x2_t d0 = vpadd_u32(c0, c1); const uint32x2_t d1 = vpadd_u32(c2, c3); vst1q_u32(res, vcombine_u32(d0, d1)); } static INLINE void sad8x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { int i, j; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < height; ++i) { const uint8x8_t s = vld1_u8(src_ptr); src_ptr += src_stride; for (j = 0; j < 4; ++j) { const uint8x8_t b_u8 = vld1_u8(ref_loop[j]); ref_loop[j] += ref_stride; sum[j] = vabal_u8(sum[j], s, b_u8); } } sad_512_pel_final_neon(sum, res); } void vpx_sad8x4x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 4); } void vpx_sad8x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } void vpx_sad8x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad8x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } //////////////////////////////////////////////////////////////////////////////// static INLINE void sad16_neon(const uint8_t *ref_ptr, const uint8x16_t src_ptr, uint16x8_t *const sum) { const uint8x16_t r = vld1q_u8(ref_ptr); *sum = vabal_u8(*sum, vget_low_u8(src_ptr), vget_low_u8(r)); *sum = vabal_u8(*sum, vget_high_u8(src_ptr), vget_high_u8(r)); } static INLINE void sad16x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res, const int height) { int i, j; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < height; ++i) { const uint8x16_t s = vld1q_u8(src_ptr); src_ptr += src_stride; for (j = 0; j < 4; ++j) { sad16_neon(ref_loop[j], s, &sum[j]); ref_loop[j] += ref_stride; } } sad_512_pel_final_neon(sum, res); } void vpx_sad16x8x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 8); } void vpx_sad16x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 16); } void vpx_sad16x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { sad16x_4d(src_ptr, src_stride, ref_array, ref_stride, res, 32); } //////////////////////////////////////////////////////////////////////////////// static INLINE void sad32x_4d(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, const int height, uint16x8_t *const sum) { int i; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; sum[0] = sum[1] = sum[2] = sum[3] = vdupq_n_u16(0); for (i = 0; i < height; ++i) { uint8x16_t s; s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; ref_loop[3] += ref_stride; } } void vpx_sad32x16x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 16, sum); sad_512_pel_final_neon(sum, res); } void vpx_sad32x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 32, sum); sad_1024_pel_final_neon(sum, res); } void vpx_sad32x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { uint16x8_t sum[4]; sad32x_4d(src_ptr, src_stride, ref_array, ref_stride, 64, sum); sad_2048_pel_final_neon(sum, res); } //////////////////////////////////////////////////////////////////////////////// void vpx_sad64x32x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { int i; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; uint16x8_t sum[4] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < 32; ++i) { uint8x16_t s; s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[3]); s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[3]); s = vld1q_u8(src_ptr + 2 * 16); sad16_neon(ref_loop[0] + 2 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 2 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 2 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 2 * 16, s, &sum[3]); s = vld1q_u8(src_ptr + 3 * 16); sad16_neon(ref_loop[0] + 3 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 3 * 16, s, &sum[1]); sad16_neon(ref_loop[2] + 3 * 16, s, &sum[2]); sad16_neon(ref_loop[3] + 3 * 16, s, &sum[3]); src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; ref_loop[3] += ref_stride; } sad_2048_pel_final_neon(sum, res); } void vpx_sad64x64x4d_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t *res) { int i; const uint8_t *ref_loop[4] = { ref_array[0], ref_array[1], ref_array[2], ref_array[3] }; uint16x8_t sum[8] = { vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0), vdupq_n_u16(0) }; for (i = 0; i < 64; ++i) { uint8x16_t s; s = vld1q_u8(src_ptr + 0 * 16); sad16_neon(ref_loop[0] + 0 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 0 * 16, s, &sum[2]); sad16_neon(ref_loop[2] + 0 * 16, s, &sum[4]); sad16_neon(ref_loop[3] + 0 * 16, s, &sum[6]); s = vld1q_u8(src_ptr + 1 * 16); sad16_neon(ref_loop[0] + 1 * 16, s, &sum[0]); sad16_neon(ref_loop[1] + 1 * 16, s, &sum[2]); sad16_neon(ref_loop[2] + 1 * 16, s, &sum[4]); sad16_neon(ref_loop[3] + 1 * 16, s, &sum[6]); s = vld1q_u8(src_ptr + 2 * 16); sad16_neon(ref_loop[0] + 2 * 16, s, &sum[1]); sad16_neon(ref_loop[1] + 2 * 16, s, &sum[3]); sad16_neon(ref_loop[2] + 2 * 16, s, &sum[5]); sad16_neon(ref_loop[3] + 2 * 16, s, &sum[7]); s = vld1q_u8(src_ptr + 3 * 16); sad16_neon(ref_loop[0] + 3 * 16, s, &sum[1]); sad16_neon(ref_loop[1] + 3 * 16, s, &sum[3]); sad16_neon(ref_loop[2] + 3 * 16, s, &sum[5]); sad16_neon(ref_loop[3] + 3 * 16, s, &sum[7]); src_ptr += src_stride; ref_loop[0] += ref_stride; ref_loop[1] += ref_stride; ref_loop[2] += ref_stride; ref_loop[3] += ref_stride; } sad_4096_pel_final_neon(sum, res); } libvpx-1.8.2/vpx_dsp/arm/sad_neon.c000066400000000000000000000361111357355204000172300ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" uint32_t vpx_sad4x4_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride) { const uint8x16_t src_u8 = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride); uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(ref_u8)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); return vget_lane_u32(horizontal_add_uint16x8(abs), 0); } uint32_t vpx_sad4x4_avg_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred) { const uint8x16_t src_u8 = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride); const uint8x16_t second_pred_u8 = vld1q_u8(second_pred); const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8); uint16x8_t abs = vabdl_u8(vget_low_u8(src_u8), vget_low_u8(avg)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg)); return vget_lane_u32(horizontal_add_uint16x8(abs), 0); } uint32_t vpx_sad4x8_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < 8; i += 4) { const uint8x16_t src_u8 = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride); src_ptr += 4 * src_stride; ref_ptr += 4 * ref_stride; abs = vabal_u8(abs, vget_low_u8(src_u8), vget_low_u8(ref_u8)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(ref_u8)); } return vget_lane_u32(horizontal_add_uint16x8(abs), 0); } uint32_t vpx_sad4x8_avg_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < 8; i += 4) { const uint8x16_t src_u8 = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t ref_u8 = load_unaligned_u8q(ref_ptr, ref_stride); const uint8x16_t second_pred_u8 = vld1q_u8(second_pred); const uint8x16_t avg = vrhaddq_u8(ref_u8, second_pred_u8); src_ptr += 4 * src_stride; ref_ptr += 4 * ref_stride; second_pred += 16; abs = vabal_u8(abs, vget_low_u8(src_u8), vget_low_u8(avg)); abs = vabal_u8(abs, vget_high_u8(src_u8), vget_high_u8(avg)); } return vget_lane_u32(horizontal_add_uint16x8(abs), 0); } static INLINE uint16x8_t sad8x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x8_t a_u8 = vld1_u8(src_ptr); const uint8x8_t b_u8 = vld1_u8(ref_ptr); src_ptr += src_stride; ref_ptr += ref_stride; abs = vabal_u8(abs, a_u8, b_u8); } return abs; } static INLINE uint16x8_t sad8x_avg(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x8_t a_u8 = vld1_u8(src_ptr); const uint8x8_t b_u8 = vld1_u8(ref_ptr); const uint8x8_t c_u8 = vld1_u8(second_pred); const uint8x8_t avg = vrhadd_u8(b_u8, c_u8); src_ptr += src_stride; ref_ptr += ref_stride; second_pred += 8; abs = vabal_u8(abs, a_u8, avg); } return abs; } #define sad8xN(n) \ uint32_t vpx_sad8x##n##_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = sad8x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } \ \ uint32_t vpx_sad8x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad8x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad8xN(4); sad8xN(8); sad8xN(16); static INLINE uint16x8_t sad16x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_u8 = vld1q_u8(src_ptr); const uint8x16_t b_u8 = vld1q_u8(ref_ptr); src_ptr += src_stride; ref_ptr += ref_stride; abs = vabal_u8(abs, vget_low_u8(a_u8), vget_low_u8(b_u8)); abs = vabal_u8(abs, vget_high_u8(a_u8), vget_high_u8(b_u8)); } return abs; } static INLINE uint16x8_t sad16x_avg(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_u8 = vld1q_u8(src_ptr); const uint8x16_t b_u8 = vld1q_u8(ref_ptr); const uint8x16_t c_u8 = vld1q_u8(second_pred); const uint8x16_t avg = vrhaddq_u8(b_u8, c_u8); src_ptr += src_stride; ref_ptr += ref_stride; second_pred += 16; abs = vabal_u8(abs, vget_low_u8(a_u8), vget_low_u8(avg)); abs = vabal_u8(abs, vget_high_u8(a_u8), vget_high_u8(avg)); } return abs; } #define sad16xN(n) \ uint32_t vpx_sad16x##n##_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = \ sad16x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } \ \ uint32_t vpx_sad16x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad16x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad16xN(8); sad16xN(16); sad16xN(32); static INLINE uint16x8_t sad32x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_lo = vld1q_u8(src_ptr); const uint8x16_t a_hi = vld1q_u8(src_ptr + 16); const uint8x16_t b_lo = vld1q_u8(ref_ptr); const uint8x16_t b_hi = vld1q_u8(ref_ptr + 16); src_ptr += src_stride; ref_ptr += ref_stride; abs = vabal_u8(abs, vget_low_u8(a_lo), vget_low_u8(b_lo)); abs = vabal_u8(abs, vget_high_u8(a_lo), vget_high_u8(b_lo)); abs = vabal_u8(abs, vget_low_u8(a_hi), vget_low_u8(b_hi)); abs = vabal_u8(abs, vget_high_u8(a_hi), vget_high_u8(b_hi)); } return abs; } static INLINE uint16x8_t sad32x_avg(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const int height) { int i; uint16x8_t abs = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_lo = vld1q_u8(src_ptr); const uint8x16_t a_hi = vld1q_u8(src_ptr + 16); const uint8x16_t b_lo = vld1q_u8(ref_ptr); const uint8x16_t b_hi = vld1q_u8(ref_ptr + 16); const uint8x16_t c_lo = vld1q_u8(second_pred); const uint8x16_t c_hi = vld1q_u8(second_pred + 16); const uint8x16_t avg_lo = vrhaddq_u8(b_lo, c_lo); const uint8x16_t avg_hi = vrhaddq_u8(b_hi, c_hi); src_ptr += src_stride; ref_ptr += ref_stride; second_pred += 32; abs = vabal_u8(abs, vget_low_u8(a_lo), vget_low_u8(avg_lo)); abs = vabal_u8(abs, vget_high_u8(a_lo), vget_high_u8(avg_lo)); abs = vabal_u8(abs, vget_low_u8(a_hi), vget_low_u8(avg_hi)); abs = vabal_u8(abs, vget_high_u8(a_hi), vget_high_u8(avg_hi)); } return abs; } #define sad32xN(n) \ uint32_t vpx_sad32x##n##_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ const uint16x8_t abs = \ sad32x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } \ \ uint32_t vpx_sad32x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ const uint8_t *second_pred) { \ const uint16x8_t abs = \ sad32x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ return vget_lane_u32(horizontal_add_uint16x8(abs), 0); \ } sad32xN(16); sad32xN(32); sad32xN(64); static INLINE uint32x4_t sad64x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const int height) { int i; uint16x8_t abs_0 = vdupq_n_u16(0); uint16x8_t abs_1 = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_0 = vld1q_u8(src_ptr); const uint8x16_t a_1 = vld1q_u8(src_ptr + 16); const uint8x16_t a_2 = vld1q_u8(src_ptr + 32); const uint8x16_t a_3 = vld1q_u8(src_ptr + 48); const uint8x16_t b_0 = vld1q_u8(ref_ptr); const uint8x16_t b_1 = vld1q_u8(ref_ptr + 16); const uint8x16_t b_2 = vld1q_u8(ref_ptr + 32); const uint8x16_t b_3 = vld1q_u8(ref_ptr + 48); src_ptr += src_stride; ref_ptr += ref_stride; abs_0 = vabal_u8(abs_0, vget_low_u8(a_0), vget_low_u8(b_0)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_0), vget_high_u8(b_0)); abs_0 = vabal_u8(abs_0, vget_low_u8(a_1), vget_low_u8(b_1)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_1), vget_high_u8(b_1)); abs_1 = vabal_u8(abs_1, vget_low_u8(a_2), vget_low_u8(b_2)); abs_1 = vabal_u8(abs_1, vget_high_u8(a_2), vget_high_u8(b_2)); abs_1 = vabal_u8(abs_1, vget_low_u8(a_3), vget_low_u8(b_3)); abs_1 = vabal_u8(abs_1, vget_high_u8(a_3), vget_high_u8(b_3)); } { const uint32x4_t sum = vpaddlq_u16(abs_0); return vpadalq_u16(sum, abs_1); } } static INLINE uint32x4_t sad64x_avg(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred, const int height) { int i; uint16x8_t abs_0 = vdupq_n_u16(0); uint16x8_t abs_1 = vdupq_n_u16(0); for (i = 0; i < height; ++i) { const uint8x16_t a_0 = vld1q_u8(src_ptr); const uint8x16_t a_1 = vld1q_u8(src_ptr + 16); const uint8x16_t a_2 = vld1q_u8(src_ptr + 32); const uint8x16_t a_3 = vld1q_u8(src_ptr + 48); const uint8x16_t b_0 = vld1q_u8(ref_ptr); const uint8x16_t b_1 = vld1q_u8(ref_ptr + 16); const uint8x16_t b_2 = vld1q_u8(ref_ptr + 32); const uint8x16_t b_3 = vld1q_u8(ref_ptr + 48); const uint8x16_t c_0 = vld1q_u8(second_pred); const uint8x16_t c_1 = vld1q_u8(second_pred + 16); const uint8x16_t c_2 = vld1q_u8(second_pred + 32); const uint8x16_t c_3 = vld1q_u8(second_pred + 48); const uint8x16_t avg_0 = vrhaddq_u8(b_0, c_0); const uint8x16_t avg_1 = vrhaddq_u8(b_1, c_1); const uint8x16_t avg_2 = vrhaddq_u8(b_2, c_2); const uint8x16_t avg_3 = vrhaddq_u8(b_3, c_3); src_ptr += src_stride; ref_ptr += ref_stride; second_pred += 64; abs_0 = vabal_u8(abs_0, vget_low_u8(a_0), vget_low_u8(avg_0)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_0), vget_high_u8(avg_0)); abs_0 = vabal_u8(abs_0, vget_low_u8(a_1), vget_low_u8(avg_1)); abs_0 = vabal_u8(abs_0, vget_high_u8(a_1), vget_high_u8(avg_1)); abs_1 = vabal_u8(abs_1, vget_low_u8(a_2), vget_low_u8(avg_2)); abs_1 = vabal_u8(abs_1, vget_high_u8(a_2), vget_high_u8(avg_2)); abs_1 = vabal_u8(abs_1, vget_low_u8(a_3), vget_low_u8(avg_3)); abs_1 = vabal_u8(abs_1, vget_high_u8(a_3), vget_high_u8(avg_3)); } { const uint32x4_t sum = vpaddlq_u16(abs_0); return vpadalq_u16(sum, abs_1); } } #define sad64xN(n) \ uint32_t vpx_sad64x##n##_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ const uint32x4_t abs = \ sad64x(src_ptr, src_stride, ref_ptr, ref_stride, n); \ return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ } \ \ uint32_t vpx_sad64x##n##_avg_neon(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ const uint8_t *second_pred) { \ const uint32x4_t abs = \ sad64x_avg(src_ptr, src_stride, ref_ptr, ref_stride, second_pred, n); \ return vget_lane_u32(horizontal_add_uint32x4(abs), 0); \ } sad64xN(32); sad64xN(64); libvpx-1.8.2/vpx_dsp/arm/save_reg_neon.asm000066400000000000000000000013261357355204000206120ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_push_neon| EXPORT |vpx_pop_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_push_neon| PROC vstm r0!, {d8-d15} bx lr ENDP |vpx_pop_neon| PROC vldm r0!, {d8-d15} bx lr ENDP END libvpx-1.8.2/vpx_dsp/arm/subpel_variance_neon.c000066400000000000000000000216171357355204000216300ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/variance.h" #include "vpx_dsp/arm/mem_neon.h" static const uint8_t bilinear_filters[8][2] = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; // Process a block exactly 4 wide and a multiple of 2 high. static void var_filter_block2d_bil_w4(const uint8_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, const uint8_t *filter) { const uint8x8_t f0 = vdup_n_u8(filter[0]); const uint8x8_t f1 = vdup_n_u8(filter[1]); unsigned int i; for (i = 0; i < output_height; i += 2) { const uint8x8_t src_0 = load_unaligned_u8(src_ptr, src_pixels_per_line); const uint8x8_t src_1 = load_unaligned_u8(src_ptr + pixel_step, src_pixels_per_line); const uint16x8_t a = vmull_u8(src_0, f0); const uint16x8_t b = vmlal_u8(a, src_1, f1); const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); vst1_u8(output_ptr, out); src_ptr += 2 * src_pixels_per_line; output_ptr += 8; } } // Process a block exactly 8 wide and any height. static void var_filter_block2d_bil_w8(const uint8_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, const uint8_t *filter) { const uint8x8_t f0 = vdup_n_u8(filter[0]); const uint8x8_t f1 = vdup_n_u8(filter[1]); unsigned int i; for (i = 0; i < output_height; ++i) { const uint8x8_t src_0 = vld1_u8(&src_ptr[0]); const uint8x8_t src_1 = vld1_u8(&src_ptr[pixel_step]); const uint16x8_t a = vmull_u8(src_0, f0); const uint16x8_t b = vmlal_u8(a, src_1, f1); const uint8x8_t out = vrshrn_n_u16(b, FILTER_BITS); vst1_u8(output_ptr, out); src_ptr += src_pixels_per_line; output_ptr += 8; } } // Process a block which is a mutiple of 16 wide and any height. static void var_filter_block2d_bil_w16(const uint8_t *src_ptr, uint8_t *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { const uint8x8_t f0 = vdup_n_u8(filter[0]); const uint8x8_t f1 = vdup_n_u8(filter[1]); unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; j += 16) { const uint8x16_t src_0 = vld1q_u8(&src_ptr[j]); const uint8x16_t src_1 = vld1q_u8(&src_ptr[j + pixel_step]); const uint16x8_t a = vmull_u8(vget_low_u8(src_0), f0); const uint16x8_t b = vmlal_u8(a, vget_low_u8(src_1), f1); const uint8x8_t out_lo = vrshrn_n_u16(b, FILTER_BITS); const uint16x8_t c = vmull_u8(vget_high_u8(src_0), f0); const uint16x8_t d = vmlal_u8(c, vget_high_u8(src_1), f1); const uint8x8_t out_hi = vrshrn_n_u16(d, FILTER_BITS); vst1q_u8(output_ptr + j, vcombine_u8(out_lo, out_hi)); } src_ptr += src_pixels_per_line; output_ptr += output_width; } } // 4xM filter writes an extra row to fdata because it processes two rows at a // time. #define sub_pixel_varianceNxM(n, m) \ uint32_t vpx_sub_pixel_variance##n##x##m##_neon( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ uint8_t temp1[n * m]; \ \ if (n == 4) { \ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ bilinear_filters[y_offset]); \ } else if (n == 8) { \ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ bilinear_filters[y_offset]); \ } else { \ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ bilinear_filters[y_offset]); \ } \ return vpx_variance##n##x##m(temp1, n, ref_ptr, ref_stride, sse); \ } sub_pixel_varianceNxM(4, 4); sub_pixel_varianceNxM(4, 8); sub_pixel_varianceNxM(8, 4); sub_pixel_varianceNxM(8, 8); sub_pixel_varianceNxM(8, 16); sub_pixel_varianceNxM(16, 8); sub_pixel_varianceNxM(16, 16); sub_pixel_varianceNxM(16, 32); sub_pixel_varianceNxM(32, 16); sub_pixel_varianceNxM(32, 32); sub_pixel_varianceNxM(32, 64); sub_pixel_varianceNxM(64, 32); sub_pixel_varianceNxM(64, 64); // 4xM filter writes an extra row to fdata because it processes two rows at a // time. #define sub_pixel_avg_varianceNxM(n, m) \ uint32_t vpx_sub_pixel_avg_variance##n##x##m##_neon( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint8_t temp0[n * (m + (n == 4 ? 2 : 1))]; \ uint8_t temp1[n * m]; \ \ if (n == 4) { \ var_filter_block2d_bil_w4(src_ptr, temp0, src_stride, 1, (m + 2), \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w4(temp0, temp1, n, n, m, \ bilinear_filters[y_offset]); \ } else if (n == 8) { \ var_filter_block2d_bil_w8(src_ptr, temp0, src_stride, 1, (m + 1), \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w8(temp0, temp1, n, n, m, \ bilinear_filters[y_offset]); \ } else { \ var_filter_block2d_bil_w16(src_ptr, temp0, src_stride, 1, (m + 1), n, \ bilinear_filters[x_offset]); \ var_filter_block2d_bil_w16(temp0, temp1, n, n, m, n, \ bilinear_filters[y_offset]); \ } \ \ vpx_comp_avg_pred(temp0, second_pred, n, m, temp1, n); \ \ return vpx_variance##n##x##m(temp0, n, ref_ptr, ref_stride, sse); \ } sub_pixel_avg_varianceNxM(4, 4); sub_pixel_avg_varianceNxM(4, 8); sub_pixel_avg_varianceNxM(8, 4); sub_pixel_avg_varianceNxM(8, 8); sub_pixel_avg_varianceNxM(8, 16); sub_pixel_avg_varianceNxM(16, 8); sub_pixel_avg_varianceNxM(16, 16); sub_pixel_avg_varianceNxM(16, 32); sub_pixel_avg_varianceNxM(32, 16); sub_pixel_avg_varianceNxM(32, 32); sub_pixel_avg_varianceNxM(32, 64); sub_pixel_avg_varianceNxM(64, 32); sub_pixel_avg_varianceNxM(64, 64); libvpx-1.8.2/vpx_dsp/arm/subtract_neon.c000066400000000000000000000060311357355204000203060ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" void vpx_subtract_block_neon(int rows, int cols, int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { int r = rows, c; if (cols > 16) { do { for (c = 0; c < cols; c += 32) { const uint8x16_t s0 = vld1q_u8(&src[c + 0]); const uint8x16_t s1 = vld1q_u8(&src[c + 16]); const uint8x16_t p0 = vld1q_u8(&pred[c + 0]); const uint8x16_t p1 = vld1q_u8(&pred[c + 16]); const uint16x8_t d0 = vsubl_u8(vget_low_u8(s0), vget_low_u8(p0)); const uint16x8_t d1 = vsubl_u8(vget_high_u8(s0), vget_high_u8(p0)); const uint16x8_t d2 = vsubl_u8(vget_low_u8(s1), vget_low_u8(p1)); const uint16x8_t d3 = vsubl_u8(vget_high_u8(s1), vget_high_u8(p1)); vst1q_s16(&diff[c + 0], vreinterpretq_s16_u16(d0)); vst1q_s16(&diff[c + 8], vreinterpretq_s16_u16(d1)); vst1q_s16(&diff[c + 16], vreinterpretq_s16_u16(d2)); vst1q_s16(&diff[c + 24], vreinterpretq_s16_u16(d3)); } diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); } else if (cols > 8) { do { const uint8x16_t s = vld1q_u8(&src[0]); const uint8x16_t p = vld1q_u8(&pred[0]); const uint16x8_t d0 = vsubl_u8(vget_low_u8(s), vget_low_u8(p)); const uint16x8_t d1 = vsubl_u8(vget_high_u8(s), vget_high_u8(p)); vst1q_s16(&diff[0], vreinterpretq_s16_u16(d0)); vst1q_s16(&diff[8], vreinterpretq_s16_u16(d1)); diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); } else if (cols > 4) { do { const uint8x8_t s = vld1_u8(&src[0]); const uint8x8_t p = vld1_u8(&pred[0]); const uint16x8_t v_diff = vsubl_u8(s, p); vst1q_s16(&diff[0], vreinterpretq_s16_u16(v_diff)); diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); } else { assert(cols == 4); do { const uint8x8_t s = load_unaligned_u8(src, (int)src_stride); const uint8x8_t p = load_unaligned_u8(pred, (int)pred_stride); const uint16x8_t d = vsubl_u8(s, p); vst1_s16(diff + 0 * diff_stride, vreinterpret_s16_u16(vget_low_u16(d))); vst1_s16(diff + 1 * diff_stride, vreinterpret_s16_u16(vget_high_u16(d))); diff += 2 * diff_stride; pred += 2 * pred_stride; src += 2 * src_stride; r -= 2; } while (r); } } libvpx-1.8.2/vpx_dsp/arm/sum_neon.h000066400000000000000000000025221357355204000172710ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_SUM_NEON_H_ #define VPX_VPX_DSP_ARM_SUM_NEON_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" static INLINE int32x2_t horizontal_add_int16x8(const int16x8_t a) { const int32x4_t b = vpaddlq_s16(a); const int64x2_t c = vpaddlq_s32(b); return vadd_s32(vreinterpret_s32_s64(vget_low_s64(c)), vreinterpret_s32_s64(vget_high_s64(c))); } static INLINE uint32x2_t horizontal_add_uint16x8(const uint16x8_t a) { const uint32x4_t b = vpaddlq_u16(a); const uint64x2_t c = vpaddlq_u32(b); return vadd_u32(vreinterpret_u32_u64(vget_low_u64(c)), vreinterpret_u32_u64(vget_high_u64(c))); } static INLINE uint32x2_t horizontal_add_uint32x4(const uint32x4_t a) { const uint64x2_t b = vpaddlq_u32(a); return vadd_u32(vreinterpret_u32_u64(vget_low_u64(b)), vreinterpret_u32_u64(vget_high_u64(b))); } #endif // VPX_VPX_DSP_ARM_SUM_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/sum_squares_neon.c000066400000000000000000000060601357355204000210300ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" uint64_t vpx_sum_squares_2d_i16_neon(const int16_t *src, int stride, int size) { uint64x1_t s2; if (size == 4) { int16x4_t s[4]; int32x4_t s0; uint32x2_t s1; s[0] = vld1_s16(src + 0 * stride); s[1] = vld1_s16(src + 1 * stride); s[2] = vld1_s16(src + 2 * stride); s[3] = vld1_s16(src + 3 * stride); s0 = vmull_s16(s[0], s[0]); s0 = vmlal_s16(s0, s[1], s[1]); s0 = vmlal_s16(s0, s[2], s[2]); s0 = vmlal_s16(s0, s[3], s[3]); s1 = vpadd_u32(vget_low_u32(vreinterpretq_u32_s32(s0)), vget_high_u32(vreinterpretq_u32_s32(s0))); s2 = vpaddl_u32(s1); } else { int r = size; uint64x2_t s1 = vdupq_n_u64(0); do { int c = size; int32x4_t s0 = vdupq_n_s32(0); const int16_t *src_t = src; do { int16x8_t s[8]; s[0] = vld1q_s16(src_t + 0 * stride); s[1] = vld1q_s16(src_t + 1 * stride); s[2] = vld1q_s16(src_t + 2 * stride); s[3] = vld1q_s16(src_t + 3 * stride); s[4] = vld1q_s16(src_t + 4 * stride); s[5] = vld1q_s16(src_t + 5 * stride); s[6] = vld1q_s16(src_t + 6 * stride); s[7] = vld1q_s16(src_t + 7 * stride); s0 = vmlal_s16(s0, vget_low_s16(s[0]), vget_low_s16(s[0])); s0 = vmlal_s16(s0, vget_low_s16(s[1]), vget_low_s16(s[1])); s0 = vmlal_s16(s0, vget_low_s16(s[2]), vget_low_s16(s[2])); s0 = vmlal_s16(s0, vget_low_s16(s[3]), vget_low_s16(s[3])); s0 = vmlal_s16(s0, vget_low_s16(s[4]), vget_low_s16(s[4])); s0 = vmlal_s16(s0, vget_low_s16(s[5]), vget_low_s16(s[5])); s0 = vmlal_s16(s0, vget_low_s16(s[6]), vget_low_s16(s[6])); s0 = vmlal_s16(s0, vget_low_s16(s[7]), vget_low_s16(s[7])); s0 = vmlal_s16(s0, vget_high_s16(s[0]), vget_high_s16(s[0])); s0 = vmlal_s16(s0, vget_high_s16(s[1]), vget_high_s16(s[1])); s0 = vmlal_s16(s0, vget_high_s16(s[2]), vget_high_s16(s[2])); s0 = vmlal_s16(s0, vget_high_s16(s[3]), vget_high_s16(s[3])); s0 = vmlal_s16(s0, vget_high_s16(s[4]), vget_high_s16(s[4])); s0 = vmlal_s16(s0, vget_high_s16(s[5]), vget_high_s16(s[5])); s0 = vmlal_s16(s0, vget_high_s16(s[6]), vget_high_s16(s[6])); s0 = vmlal_s16(s0, vget_high_s16(s[7]), vget_high_s16(s[7])); src_t += 8; c -= 8; } while (c); s1 = vaddw_u32(s1, vget_low_u32(vreinterpretq_u32_s32(s0))); s1 = vaddw_u32(s1, vget_high_u32(vreinterpretq_u32_s32(s0))); src += 8 * stride; r -= 8; } while (r); s2 = vadd_u64(vget_low_u64(s1), vget_high_u64(s1)); } return vget_lane_u64(s2, 0); } libvpx-1.8.2/vpx_dsp/arm/transpose_neon.h000066400000000000000000001521001357355204000205010ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ #define VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ #include #include "./vpx_config.h" // Transpose 64 bit elements as follows: // a0: 00 01 02 03 04 05 06 07 // a1: 16 17 18 19 20 21 22 23 // // b0.val[0]: 00 01 02 03 16 17 18 19 // b0.val[1]: 04 05 06 07 20 21 22 23 static INLINE int16x8x2_t vpx_vtrnq_s64_to_s16(int32x4_t a0, int32x4_t a1) { int16x8x2_t b0; b0.val[0] = vcombine_s16(vreinterpret_s16_s32(vget_low_s32(a0)), vreinterpret_s16_s32(vget_low_s32(a1))); b0.val[1] = vcombine_s16(vreinterpret_s16_s32(vget_high_s32(a0)), vreinterpret_s16_s32(vget_high_s32(a1))); return b0; } static INLINE int32x4x2_t vpx_vtrnq_s64_to_s32(int32x4_t a0, int32x4_t a1) { int32x4x2_t b0; b0.val[0] = vcombine_s32(vget_low_s32(a0), vget_low_s32(a1)); b0.val[1] = vcombine_s32(vget_high_s32(a0), vget_high_s32(a1)); return b0; } static INLINE int64x2x2_t vpx_vtrnq_s64(int32x4_t a0, int32x4_t a1) { int64x2x2_t b0; b0.val[0] = vcombine_s64(vreinterpret_s64_s32(vget_low_s32(a0)), vreinterpret_s64_s32(vget_low_s32(a1))); b0.val[1] = vcombine_s64(vreinterpret_s64_s32(vget_high_s32(a0)), vreinterpret_s64_s32(vget_high_s32(a1))); return b0; } static INLINE uint8x16x2_t vpx_vtrnq_u64_to_u8(uint32x4_t a0, uint32x4_t a1) { uint8x16x2_t b0; b0.val[0] = vcombine_u8(vreinterpret_u8_u32(vget_low_u32(a0)), vreinterpret_u8_u32(vget_low_u32(a1))); b0.val[1] = vcombine_u8(vreinterpret_u8_u32(vget_high_u32(a0)), vreinterpret_u8_u32(vget_high_u32(a1))); return b0; } static INLINE uint16x8x2_t vpx_vtrnq_u64_to_u16(uint32x4_t a0, uint32x4_t a1) { uint16x8x2_t b0; b0.val[0] = vcombine_u16(vreinterpret_u16_u32(vget_low_u32(a0)), vreinterpret_u16_u32(vget_low_u32(a1))); b0.val[1] = vcombine_u16(vreinterpret_u16_u32(vget_high_u32(a0)), vreinterpret_u16_u32(vget_high_u32(a1))); return b0; } static INLINE void transpose_u8_4x4(uint8x8_t *a0, uint8x8_t *a1) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 10 11 12 13 // a1: 20 21 22 23 30 31 32 33 // to: // b0.val[0]: 00 01 20 21 10 11 30 31 // b0.val[1]: 02 03 22 23 12 13 32 33 const uint16x4x2_t b0 = vtrn_u16(vreinterpret_u16_u8(*a0), vreinterpret_u16_u8(*a1)); // Swap 32 bit elements resulting in: // c0.val[0]: 00 01 20 21 02 03 22 23 // c0.val[1]: 10 11 30 31 12 13 32 33 const uint32x2x2_t c0 = vtrn_u32(vreinterpret_u32_u16(b0.val[0]), vreinterpret_u32_u16(b0.val[1])); // Swap 8 bit elements resulting in: // d0.val[0]: 00 10 20 30 02 12 22 32 // d0.val[1]: 01 11 21 31 03 13 23 33 const uint8x8x2_t d0 = vtrn_u8(vreinterpret_u8_u32(c0.val[0]), vreinterpret_u8_u32(c0.val[1])); *a0 = d0.val[0]; *a1 = d0.val[1]; } static INLINE void transpose_s16_4x4d(int16x4_t *a0, int16x4_t *a1, int16x4_t *a2, int16x4_t *a3) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 // a1: 10 11 12 13 // a2: 20 21 22 23 // a3: 30 31 32 33 // to: // b0.val[0]: 00 10 02 12 // b0.val[1]: 01 11 03 13 // b1.val[0]: 20 30 22 32 // b1.val[1]: 21 31 23 33 const int16x4x2_t b0 = vtrn_s16(*a0, *a1); const int16x4x2_t b1 = vtrn_s16(*a2, *a3); // Swap 32 bit elements resulting in: // c0.val[0]: 00 10 20 30 // c0.val[1]: 02 12 22 32 // c1.val[0]: 01 11 21 31 // c1.val[1]: 03 13 23 33 const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]), vreinterpret_s32_s16(b1.val[0])); const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]), vreinterpret_s32_s16(b1.val[1])); *a0 = vreinterpret_s16_s32(c0.val[0]); *a1 = vreinterpret_s16_s32(c1.val[0]); *a2 = vreinterpret_s16_s32(c0.val[1]); *a3 = vreinterpret_s16_s32(c1.val[1]); } static INLINE void transpose_s16_4x4q(int16x8_t *a0, int16x8_t *a1) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 10 11 12 13 // a1: 20 21 22 23 30 31 32 33 // to: // b0.val[0]: 00 01 20 21 10 11 30 31 // b0.val[1]: 02 03 22 23 12 13 32 33 const int32x4x2_t b0 = vtrnq_s32(vreinterpretq_s32_s16(*a0), vreinterpretq_s32_s16(*a1)); // Swap 64 bit elements resulting in: // c0: 00 01 20 21 02 03 22 23 // c1: 10 11 30 31 12 13 32 33 const int32x4_t c0 = vcombine_s32(vget_low_s32(b0.val[0]), vget_low_s32(b0.val[1])); const int32x4_t c1 = vcombine_s32(vget_high_s32(b0.val[0]), vget_high_s32(b0.val[1])); // Swap 16 bit elements resulting in: // d0.val[0]: 00 10 20 30 02 12 22 32 // d0.val[1]: 01 11 21 31 03 13 23 33 const int16x8x2_t d0 = vtrnq_s16(vreinterpretq_s16_s32(c0), vreinterpretq_s16_s32(c1)); *a0 = d0.val[0]; *a1 = d0.val[1]; } static INLINE void transpose_u16_4x4q(uint16x8_t *a0, uint16x8_t *a1) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 10 11 12 13 // a1: 20 21 22 23 30 31 32 33 // to: // b0.val[0]: 00 01 20 21 10 11 30 31 // b0.val[1]: 02 03 22 23 12 13 32 33 const uint32x4x2_t b0 = vtrnq_u32(vreinterpretq_u32_u16(*a0), vreinterpretq_u32_u16(*a1)); // Swap 64 bit elements resulting in: // c0: 00 01 20 21 02 03 22 23 // c1: 10 11 30 31 12 13 32 33 const uint32x4_t c0 = vcombine_u32(vget_low_u32(b0.val[0]), vget_low_u32(b0.val[1])); const uint32x4_t c1 = vcombine_u32(vget_high_u32(b0.val[0]), vget_high_u32(b0.val[1])); // Swap 16 bit elements resulting in: // d0.val[0]: 00 10 20 30 02 12 22 32 // d0.val[1]: 01 11 21 31 03 13 23 33 const uint16x8x2_t d0 = vtrnq_u16(vreinterpretq_u16_u32(c0), vreinterpretq_u16_u32(c1)); *a0 = d0.val[0]; *a1 = d0.val[1]; } static INLINE void transpose_u8_4x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, uint8x8_t *a3, const uint8x8_t a4, const uint8x8_t a5, const uint8x8_t a6, const uint8x8_t a7) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 XX XX XX XX // a1: 10 11 12 13 XX XX XX XX // a2: 20 21 22 23 XX XX XX XX // a3; 30 31 32 33 XX XX XX XX // a4: 40 41 42 43 XX XX XX XX // a5: 50 51 52 53 XX XX XX XX // a6: 60 61 62 63 XX XX XX XX // a7: 70 71 72 73 XX XX XX XX // to: // b0.val[0]: 00 01 02 03 40 41 42 43 // b1.val[0]: 10 11 12 13 50 51 52 53 // b2.val[0]: 20 21 22 23 60 61 62 63 // b3.val[0]: 30 31 32 33 70 71 72 73 const uint32x2x2_t b0 = vtrn_u32(vreinterpret_u32_u8(*a0), vreinterpret_u32_u8(a4)); const uint32x2x2_t b1 = vtrn_u32(vreinterpret_u32_u8(*a1), vreinterpret_u32_u8(a5)); const uint32x2x2_t b2 = vtrn_u32(vreinterpret_u32_u8(*a2), vreinterpret_u32_u8(a6)); const uint32x2x2_t b3 = vtrn_u32(vreinterpret_u32_u8(*a3), vreinterpret_u32_u8(a7)); // Swap 16 bit elements resulting in: // c0.val[0]: 00 01 20 21 40 41 60 61 // c0.val[1]: 02 03 22 23 42 43 62 63 // c1.val[0]: 10 11 30 31 50 51 70 71 // c1.val[1]: 12 13 32 33 52 53 72 73 const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u32(b0.val[0]), vreinterpret_u16_u32(b2.val[0])); const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u32(b1.val[0]), vreinterpret_u16_u32(b3.val[0])); // Swap 8 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 // d0.val[1]: 01 11 21 31 41 51 61 71 // d1.val[0]: 02 12 22 32 42 52 62 72 // d1.val[1]: 03 13 23 33 43 53 63 73 const uint8x8x2_t d0 = vtrn_u8(vreinterpret_u8_u16(c0.val[0]), vreinterpret_u8_u16(c1.val[0])); const uint8x8x2_t d1 = vtrn_u8(vreinterpret_u8_u16(c0.val[1]), vreinterpret_u8_u16(c1.val[1])); *a0 = d0.val[0]; *a1 = d0.val[1]; *a2 = d1.val[0]; *a3 = d1.val[1]; } static INLINE void transpose_s32_4x4(int32x4_t *a0, int32x4_t *a1, int32x4_t *a2, int32x4_t *a3) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 // a1: 10 11 12 13 // a2: 20 21 22 23 // a3: 30 31 32 33 // to: // b0.val[0]: 00 10 02 12 // b0.val[1]: 01 11 03 13 // b1.val[0]: 20 30 22 32 // b1.val[1]: 21 31 23 33 const int32x4x2_t b0 = vtrnq_s32(*a0, *a1); const int32x4x2_t b1 = vtrnq_s32(*a2, *a3); // Swap 64 bit elements resulting in: // c0.val[0]: 00 10 20 30 // c0.val[1]: 02 12 22 32 // c1.val[0]: 01 11 21 31 // c1.val[1]: 03 13 23 33 const int32x4x2_t c0 = vpx_vtrnq_s64_to_s32(b0.val[0], b1.val[0]); const int32x4x2_t c1 = vpx_vtrnq_s64_to_s32(b0.val[1], b1.val[1]); *a0 = c0.val[0]; *a1 = c1.val[0]; *a2 = c0.val[1]; *a3 = c1.val[1]; } static INLINE void transpose_s16_4x8(const int16x4_t a0, const int16x4_t a1, const int16x4_t a2, const int16x4_t a3, const int16x4_t a4, const int16x4_t a5, const int16x4_t a6, const int16x4_t a7, int16x8_t *const o0, int16x8_t *const o1, int16x8_t *const o2, int16x8_t *const o3) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 // a1: 10 11 12 13 // a2: 20 21 22 23 // a3: 30 31 32 33 // a4: 40 41 42 43 // a5: 50 51 52 53 // a6: 60 61 62 63 // a7: 70 71 72 73 // to: // b0.val[0]: 00 10 02 12 // b0.val[1]: 01 11 03 13 // b1.val[0]: 20 30 22 32 // b1.val[1]: 21 31 23 33 // b2.val[0]: 40 50 42 52 // b2.val[1]: 41 51 43 53 // b3.val[0]: 60 70 62 72 // b3.val[1]: 61 71 63 73 const int16x4x2_t b0 = vtrn_s16(a0, a1); const int16x4x2_t b1 = vtrn_s16(a2, a3); const int16x4x2_t b2 = vtrn_s16(a4, a5); const int16x4x2_t b3 = vtrn_s16(a6, a7); // Swap 32 bit elements resulting in: // c0.val[0]: 00 10 20 30 // c0.val[1]: 02 12 22 32 // c1.val[0]: 01 11 21 31 // c1.val[1]: 03 13 23 33 // c2.val[0]: 40 50 60 70 // c2.val[1]: 42 52 62 72 // c3.val[0]: 41 51 61 71 // c3.val[1]: 43 53 63 73 const int32x2x2_t c0 = vtrn_s32(vreinterpret_s32_s16(b0.val[0]), vreinterpret_s32_s16(b1.val[0])); const int32x2x2_t c1 = vtrn_s32(vreinterpret_s32_s16(b0.val[1]), vreinterpret_s32_s16(b1.val[1])); const int32x2x2_t c2 = vtrn_s32(vreinterpret_s32_s16(b2.val[0]), vreinterpret_s32_s16(b3.val[0])); const int32x2x2_t c3 = vtrn_s32(vreinterpret_s32_s16(b2.val[1]), vreinterpret_s32_s16(b3.val[1])); // Swap 64 bit elements resulting in: // o0: 00 10 20 30 40 50 60 70 // o1: 01 11 21 31 41 51 61 71 // o2: 02 12 22 32 42 52 62 72 // o3: 03 13 23 33 43 53 63 73 *o0 = vcombine_s16(vreinterpret_s16_s32(c0.val[0]), vreinterpret_s16_s32(c2.val[0])); *o1 = vcombine_s16(vreinterpret_s16_s32(c1.val[0]), vreinterpret_s16_s32(c3.val[0])); *o2 = vcombine_s16(vreinterpret_s16_s32(c0.val[1]), vreinterpret_s16_s32(c2.val[1])); *o3 = vcombine_s16(vreinterpret_s16_s32(c1.val[1]), vreinterpret_s16_s32(c3.val[1])); } static INLINE void transpose_s32_4x8(int32x4_t *const a0, int32x4_t *const a1, int32x4_t *const a2, int32x4_t *const a3, int32x4_t *const a4, int32x4_t *const a5, int32x4_t *const a6, int32x4_t *const a7) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 // a1: 10 11 12 13 // a2: 20 21 22 23 // a3: 30 31 32 33 // a4: 40 41 42 43 // a5: 50 51 52 53 // a6: 60 61 62 63 // a7: 70 71 72 73 // to: // b0.val[0]: 00 10 02 12 // b0.val[1]: 01 11 03 13 // b1.val[0]: 20 30 22 32 // b1.val[1]: 21 31 23 33 // b2.val[0]: 40 50 42 52 // b2.val[1]: 41 51 43 53 // b3.val[0]: 60 70 62 72 // b3.val[1]: 61 71 63 73 const int32x4x2_t b0 = vtrnq_s32(*a0, *a1); const int32x4x2_t b1 = vtrnq_s32(*a2, *a3); const int32x4x2_t b2 = vtrnq_s32(*a4, *a5); const int32x4x2_t b3 = vtrnq_s32(*a6, *a7); // Swap 64 bit elements resulting in: // c0.val[0]: 00 10 20 30 // c0.val[1]: 02 12 22 32 // c1.val[0]: 01 11 21 31 // c1.val[1]: 03 13 23 33 // c2.val[0]: 40 50 60 70 // c2.val[1]: 42 52 62 72 // c3.val[0]: 41 51 61 71 // c3.val[1]: 43 53 63 73 const int64x2x2_t c0 = vpx_vtrnq_s64(b0.val[0], b1.val[0]); const int64x2x2_t c1 = vpx_vtrnq_s64(b0.val[1], b1.val[1]); const int64x2x2_t c2 = vpx_vtrnq_s64(b2.val[0], b3.val[0]); const int64x2x2_t c3 = vpx_vtrnq_s64(b2.val[1], b3.val[1]); *a0 = vreinterpretq_s32_s64(c0.val[0]); *a1 = vreinterpretq_s32_s64(c2.val[0]); *a2 = vreinterpretq_s32_s64(c1.val[0]); *a3 = vreinterpretq_s32_s64(c3.val[0]); *a4 = vreinterpretq_s32_s64(c0.val[1]); *a5 = vreinterpretq_s32_s64(c2.val[1]); *a6 = vreinterpretq_s32_s64(c1.val[1]); *a7 = vreinterpretq_s32_s64(c3.val[1]); } static INLINE void transpose_u8_8x4(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, uint8x8_t *a3) { // Swap 8 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // to: // b0.val[0]: 00 10 02 12 04 14 06 16 // b0.val[1]: 01 11 03 13 05 15 07 17 // b1.val[0]: 20 30 22 32 24 34 26 36 // b1.val[1]: 21 31 23 33 25 35 27 37 const uint8x8x2_t b0 = vtrn_u8(*a0, *a1); const uint8x8x2_t b1 = vtrn_u8(*a2, *a3); // Swap 16 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 // c0.val[1]: 02 12 22 32 06 16 26 36 // c1.val[0]: 01 11 21 31 05 15 25 35 // c1.val[1]: 03 13 23 33 07 17 27 37 const uint16x4x2_t c0 = vtrn_u16(vreinterpret_u16_u8(b0.val[0]), vreinterpret_u16_u8(b1.val[0])); const uint16x4x2_t c1 = vtrn_u16(vreinterpret_u16_u8(b0.val[1]), vreinterpret_u16_u8(b1.val[1])); *a0 = vreinterpret_u8_u16(c0.val[0]); *a1 = vreinterpret_u8_u16(c1.val[0]); *a2 = vreinterpret_u8_u16(c0.val[1]); *a3 = vreinterpret_u8_u16(c1.val[1]); } static INLINE void transpose_u16_8x4(uint16x8_t *a0, uint16x8_t *a1, uint16x8_t *a2, uint16x8_t *a3) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // to: // b0.val[0]: 00 10 02 12 04 14 06 16 // b0.val[1]: 01 11 03 13 05 15 07 17 // b1.val[0]: 20 30 22 32 24 34 26 36 // b1.val[1]: 21 31 23 33 25 35 27 37 const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); // Swap 32 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 // c0.val[1]: 02 12 22 32 06 16 26 36 // c1.val[0]: 01 11 21 31 05 15 25 35 // c1.val[1]: 03 13 23 33 07 17 27 37 const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]), vreinterpretq_u32_u16(b1.val[0])); const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]), vreinterpretq_u32_u16(b1.val[1])); *a0 = vreinterpretq_u16_u32(c0.val[0]); *a1 = vreinterpretq_u16_u32(c1.val[0]); *a2 = vreinterpretq_u16_u32(c0.val[1]); *a3 = vreinterpretq_u16_u32(c1.val[1]); } static INLINE void transpose_s32_8x4(int32x4_t *const a0, int32x4_t *const a1, int32x4_t *const a2, int32x4_t *const a3, int32x4_t *const a4, int32x4_t *const a5, int32x4_t *const a6, int32x4_t *const a7) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 // a1: 04 05 06 07 // a2: 10 11 12 13 // a3: 14 15 16 17 // a4: 20 21 22 23 // a5: 24 25 26 27 // a6: 30 31 32 33 // a7: 34 35 36 37 // to: // b0.val[0]: 00 10 02 12 // b0.val[1]: 01 11 03 13 // b1.val[0]: 04 14 06 16 // b1.val[1]: 05 15 07 17 // b2.val[0]: 20 30 22 32 // b2.val[1]: 21 31 23 33 // b3.val[0]: 24 34 26 36 // b3.val[1]: 25 35 27 37 const int32x4x2_t b0 = vtrnq_s32(*a0, *a2); const int32x4x2_t b1 = vtrnq_s32(*a1, *a3); const int32x4x2_t b2 = vtrnq_s32(*a4, *a6); const int32x4x2_t b3 = vtrnq_s32(*a5, *a7); // Swap 64 bit elements resulting in: // c0.val[0]: 00 10 20 30 // c0.val[1]: 02 12 22 32 // c1.val[0]: 01 11 21 31 // c1.val[1]: 03 13 23 33 // c2.val[0]: 04 14 24 34 // c2.val[1]: 06 16 26 36 // c3.val[0]: 05 15 25 35 // c3.val[1]: 07 17 27 37 const int64x2x2_t c0 = vpx_vtrnq_s64(b0.val[0], b2.val[0]); const int64x2x2_t c1 = vpx_vtrnq_s64(b0.val[1], b2.val[1]); const int64x2x2_t c2 = vpx_vtrnq_s64(b1.val[0], b3.val[0]); const int64x2x2_t c3 = vpx_vtrnq_s64(b1.val[1], b3.val[1]); *a0 = vreinterpretq_s32_s64(c0.val[0]); *a1 = vreinterpretq_s32_s64(c1.val[0]); *a2 = vreinterpretq_s32_s64(c0.val[1]); *a3 = vreinterpretq_s32_s64(c1.val[1]); *a4 = vreinterpretq_s32_s64(c2.val[0]); *a5 = vreinterpretq_s32_s64(c3.val[0]); *a6 = vreinterpretq_s32_s64(c2.val[1]); *a7 = vreinterpretq_s32_s64(c3.val[1]); } // Note: Using 'd' registers or 'q' registers has almost identical speed. We use // 'q' registers here to save some instructions. static INLINE void transpose_u8_8x8(uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5, uint8x8_t *a6, uint8x8_t *a7) { // Swap 8 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // a4: 40 41 42 43 44 45 46 47 // a5: 50 51 52 53 54 55 56 57 // a6: 60 61 62 63 64 65 66 67 // a7: 70 71 72 73 74 75 76 77 // to: // b0.val[0]: 00 10 02 12 04 14 06 16 40 50 42 52 44 54 46 56 // b0.val[1]: 01 11 03 13 05 15 07 17 41 51 43 53 45 55 47 57 // b1.val[0]: 20 30 22 32 24 34 26 36 60 70 62 72 64 74 66 76 // b1.val[1]: 21 31 23 33 25 35 27 37 61 71 63 73 65 75 67 77 const uint8x16x2_t b0 = vtrnq_u8(vcombine_u8(*a0, *a4), vcombine_u8(*a1, *a5)); const uint8x16x2_t b1 = vtrnq_u8(vcombine_u8(*a2, *a6), vcombine_u8(*a3, *a7)); // Swap 16 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 40 50 60 70 44 54 64 74 // c0.val[1]: 02 12 22 32 06 16 26 36 42 52 62 72 46 56 66 76 // c1.val[0]: 01 11 21 31 05 15 25 35 41 51 61 71 45 55 65 75 // c1.val[1]: 03 13 23 33 07 17 27 37 43 53 63 73 47 57 67 77 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), vreinterpretq_u16_u8(b1.val[0])); const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), vreinterpretq_u16_u8(b1.val[1])); // Unzip 32 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 // d0.val[1]: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 // d1.val[0]: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 // d1.val[1]: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 const uint32x4x2_t d0 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[0]), vreinterpretq_u32_u16(c1.val[0])); const uint32x4x2_t d1 = vuzpq_u32(vreinterpretq_u32_u16(c0.val[1]), vreinterpretq_u32_u16(c1.val[1])); *a0 = vreinterpret_u8_u32(vget_low_u32(d0.val[0])); *a1 = vreinterpret_u8_u32(vget_high_u32(d0.val[0])); *a2 = vreinterpret_u8_u32(vget_low_u32(d1.val[0])); *a3 = vreinterpret_u8_u32(vget_high_u32(d1.val[0])); *a4 = vreinterpret_u8_u32(vget_low_u32(d0.val[1])); *a5 = vreinterpret_u8_u32(vget_high_u32(d0.val[1])); *a6 = vreinterpret_u8_u32(vget_low_u32(d1.val[1])); *a7 = vreinterpret_u8_u32(vget_high_u32(d1.val[1])); } static INLINE void transpose_s16_8x8(int16x8_t *a0, int16x8_t *a1, int16x8_t *a2, int16x8_t *a3, int16x8_t *a4, int16x8_t *a5, int16x8_t *a6, int16x8_t *a7) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // a4: 40 41 42 43 44 45 46 47 // a5: 50 51 52 53 54 55 56 57 // a6: 60 61 62 63 64 65 66 67 // a7: 70 71 72 73 74 75 76 77 // to: // b0.val[0]: 00 10 02 12 04 14 06 16 // b0.val[1]: 01 11 03 13 05 15 07 17 // b1.val[0]: 20 30 22 32 24 34 26 36 // b1.val[1]: 21 31 23 33 25 35 27 37 // b2.val[0]: 40 50 42 52 44 54 46 56 // b2.val[1]: 41 51 43 53 45 55 47 57 // b3.val[0]: 60 70 62 72 64 74 66 76 // b3.val[1]: 61 71 63 73 65 75 67 77 const int16x8x2_t b0 = vtrnq_s16(*a0, *a1); const int16x8x2_t b1 = vtrnq_s16(*a2, *a3); const int16x8x2_t b2 = vtrnq_s16(*a4, *a5); const int16x8x2_t b3 = vtrnq_s16(*a6, *a7); // Swap 32 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 // c0.val[1]: 02 12 22 32 06 16 26 36 // c1.val[0]: 01 11 21 31 05 15 25 35 // c1.val[1]: 03 13 23 33 07 17 27 37 // c2.val[0]: 40 50 60 70 44 54 64 74 // c2.val[1]: 42 52 62 72 46 56 66 76 // c3.val[0]: 41 51 61 71 45 55 65 75 // c3.val[1]: 43 53 63 73 47 57 67 77 const int32x4x2_t c0 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[0]), vreinterpretq_s32_s16(b1.val[0])); const int32x4x2_t c1 = vtrnq_s32(vreinterpretq_s32_s16(b0.val[1]), vreinterpretq_s32_s16(b1.val[1])); const int32x4x2_t c2 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[0]), vreinterpretq_s32_s16(b3.val[0])); const int32x4x2_t c3 = vtrnq_s32(vreinterpretq_s32_s16(b2.val[1]), vreinterpretq_s32_s16(b3.val[1])); // Swap 64 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 // d0.val[1]: 04 14 24 34 44 54 64 74 // d1.val[0]: 01 11 21 31 41 51 61 71 // d1.val[1]: 05 15 25 35 45 55 65 75 // d2.val[0]: 02 12 22 32 42 52 62 72 // d2.val[1]: 06 16 26 36 46 56 66 76 // d3.val[0]: 03 13 23 33 43 53 63 73 // d3.val[1]: 07 17 27 37 47 57 67 77 const int16x8x2_t d0 = vpx_vtrnq_s64_to_s16(c0.val[0], c2.val[0]); const int16x8x2_t d1 = vpx_vtrnq_s64_to_s16(c1.val[0], c3.val[0]); const int16x8x2_t d2 = vpx_vtrnq_s64_to_s16(c0.val[1], c2.val[1]); const int16x8x2_t d3 = vpx_vtrnq_s64_to_s16(c1.val[1], c3.val[1]); *a0 = d0.val[0]; *a1 = d1.val[0]; *a2 = d2.val[0]; *a3 = d3.val[0]; *a4 = d0.val[1]; *a5 = d1.val[1]; *a6 = d2.val[1]; *a7 = d3.val[1]; } static INLINE void transpose_u16_8x8(uint16x8_t *a0, uint16x8_t *a1, uint16x8_t *a2, uint16x8_t *a3, uint16x8_t *a4, uint16x8_t *a5, uint16x8_t *a6, uint16x8_t *a7) { // Swap 16 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // a4: 40 41 42 43 44 45 46 47 // a5: 50 51 52 53 54 55 56 57 // a6: 60 61 62 63 64 65 66 67 // a7: 70 71 72 73 74 75 76 77 // to: // b0.val[0]: 00 10 02 12 04 14 06 16 // b0.val[1]: 01 11 03 13 05 15 07 17 // b1.val[0]: 20 30 22 32 24 34 26 36 // b1.val[1]: 21 31 23 33 25 35 27 37 // b2.val[0]: 40 50 42 52 44 54 46 56 // b2.val[1]: 41 51 43 53 45 55 47 57 // b3.val[0]: 60 70 62 72 64 74 66 76 // b3.val[1]: 61 71 63 73 65 75 67 77 const uint16x8x2_t b0 = vtrnq_u16(*a0, *a1); const uint16x8x2_t b1 = vtrnq_u16(*a2, *a3); const uint16x8x2_t b2 = vtrnq_u16(*a4, *a5); const uint16x8x2_t b3 = vtrnq_u16(*a6, *a7); // Swap 32 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 // c0.val[1]: 02 12 22 32 06 16 26 36 // c1.val[0]: 01 11 21 31 05 15 25 35 // c1.val[1]: 03 13 23 33 07 17 27 37 // c2.val[0]: 40 50 60 70 44 54 64 74 // c2.val[1]: 42 52 62 72 46 56 66 76 // c3.val[0]: 41 51 61 71 45 55 65 75 // c3.val[1]: 43 53 63 73 47 57 67 77 const uint32x4x2_t c0 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[0]), vreinterpretq_u32_u16(b1.val[0])); const uint32x4x2_t c1 = vtrnq_u32(vreinterpretq_u32_u16(b0.val[1]), vreinterpretq_u32_u16(b1.val[1])); const uint32x4x2_t c2 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[0]), vreinterpretq_u32_u16(b3.val[0])); const uint32x4x2_t c3 = vtrnq_u32(vreinterpretq_u32_u16(b2.val[1]), vreinterpretq_u32_u16(b3.val[1])); // Swap 64 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 // d0.val[1]: 04 14 24 34 44 54 64 74 // d1.val[0]: 01 11 21 31 41 51 61 71 // d1.val[1]: 05 15 25 35 45 55 65 75 // d2.val[0]: 02 12 22 32 42 52 62 72 // d2.val[1]: 06 16 26 36 46 56 66 76 // d3.val[0]: 03 13 23 33 43 53 63 73 // d3.val[1]: 07 17 27 37 47 57 67 77 const uint16x8x2_t d0 = vpx_vtrnq_u64_to_u16(c0.val[0], c2.val[0]); const uint16x8x2_t d1 = vpx_vtrnq_u64_to_u16(c1.val[0], c3.val[0]); const uint16x8x2_t d2 = vpx_vtrnq_u64_to_u16(c0.val[1], c2.val[1]); const uint16x8x2_t d3 = vpx_vtrnq_u64_to_u16(c1.val[1], c3.val[1]); *a0 = d0.val[0]; *a1 = d1.val[0]; *a2 = d2.val[0]; *a3 = d3.val[0]; *a4 = d0.val[1]; *a5 = d1.val[1]; *a6 = d2.val[1]; *a7 = d3.val[1]; } static INLINE void transpose_s32_8x8(int32x4x2_t *a0, int32x4x2_t *a1, int32x4x2_t *a2, int32x4x2_t *a3, int32x4x2_t *a4, int32x4x2_t *a5, int32x4x2_t *a6, int32x4x2_t *a7) { // Swap 32 bit elements. Goes from: // a0: 00 01 02 03 04 05 06 07 // a1: 10 11 12 13 14 15 16 17 // a2: 20 21 22 23 24 25 26 27 // a3: 30 31 32 33 34 35 36 37 // a4: 40 41 42 43 44 45 46 47 // a5: 50 51 52 53 54 55 56 57 // a6: 60 61 62 63 64 65 66 67 // a7: 70 71 72 73 74 75 76 77 // to: // b0: 00 10 02 12 01 11 03 13 // b1: 20 30 22 32 21 31 23 33 // b2: 40 50 42 52 41 51 43 53 // b3: 60 70 62 72 61 71 63 73 // b4: 04 14 06 16 05 15 07 17 // b5: 24 34 26 36 25 35 27 37 // b6: 44 54 46 56 45 55 47 57 // b7: 64 74 66 76 65 75 67 77 const int32x4x2_t b0 = vtrnq_s32(a0->val[0], a1->val[0]); const int32x4x2_t b1 = vtrnq_s32(a2->val[0], a3->val[0]); const int32x4x2_t b2 = vtrnq_s32(a4->val[0], a5->val[0]); const int32x4x2_t b3 = vtrnq_s32(a6->val[0], a7->val[0]); const int32x4x2_t b4 = vtrnq_s32(a0->val[1], a1->val[1]); const int32x4x2_t b5 = vtrnq_s32(a2->val[1], a3->val[1]); const int32x4x2_t b6 = vtrnq_s32(a4->val[1], a5->val[1]); const int32x4x2_t b7 = vtrnq_s32(a6->val[1], a7->val[1]); // Swap 64 bit elements resulting in: // c0: 00 10 20 30 02 12 22 32 // c1: 01 11 21 31 03 13 23 33 // c2: 40 50 60 70 42 52 62 72 // c3: 41 51 61 71 43 53 63 73 // c4: 04 14 24 34 06 16 26 36 // c5: 05 15 25 35 07 17 27 37 // c6: 44 54 64 74 46 56 66 76 // c7: 45 55 65 75 47 57 67 77 const int32x4x2_t c0 = vpx_vtrnq_s64_to_s32(b0.val[0], b1.val[0]); const int32x4x2_t c1 = vpx_vtrnq_s64_to_s32(b0.val[1], b1.val[1]); const int32x4x2_t c2 = vpx_vtrnq_s64_to_s32(b2.val[0], b3.val[0]); const int32x4x2_t c3 = vpx_vtrnq_s64_to_s32(b2.val[1], b3.val[1]); const int32x4x2_t c4 = vpx_vtrnq_s64_to_s32(b4.val[0], b5.val[0]); const int32x4x2_t c5 = vpx_vtrnq_s64_to_s32(b4.val[1], b5.val[1]); const int32x4x2_t c6 = vpx_vtrnq_s64_to_s32(b6.val[0], b7.val[0]); const int32x4x2_t c7 = vpx_vtrnq_s64_to_s32(b6.val[1], b7.val[1]); // Swap 128 bit elements resulting in: // a0: 00 10 20 30 40 50 60 70 // a1: 01 11 21 31 41 51 61 71 // a2: 02 12 22 32 42 52 62 72 // a3: 03 13 23 33 43 53 63 73 // a4: 04 14 24 34 44 54 64 74 // a5: 05 15 25 35 45 55 65 75 // a6: 06 16 26 36 46 56 66 76 // a7: 07 17 27 37 47 57 67 77 a0->val[0] = c0.val[0]; a0->val[1] = c2.val[0]; a1->val[0] = c1.val[0]; a1->val[1] = c3.val[0]; a2->val[0] = c0.val[1]; a2->val[1] = c2.val[1]; a3->val[0] = c1.val[1]; a3->val[1] = c3.val[1]; a4->val[0] = c4.val[0]; a4->val[1] = c6.val[0]; a5->val[0] = c5.val[0]; a5->val[1] = c7.val[0]; a6->val[0] = c4.val[1]; a6->val[1] = c6.val[1]; a7->val[0] = c5.val[1]; a7->val[1] = c7.val[1]; } static INLINE void transpose_u8_16x8( const uint8x16_t i0, const uint8x16_t i1, const uint8x16_t i2, const uint8x16_t i3, const uint8x16_t i4, const uint8x16_t i5, const uint8x16_t i6, const uint8x16_t i7, uint8x8_t *o0, uint8x8_t *o1, uint8x8_t *o2, uint8x8_t *o3, uint8x8_t *o4, uint8x8_t *o5, uint8x8_t *o6, uint8x8_t *o7, uint8x8_t *o8, uint8x8_t *o9, uint8x8_t *o10, uint8x8_t *o11, uint8x8_t *o12, uint8x8_t *o13, uint8x8_t *o14, uint8x8_t *o15) { // Swap 8 bit elements. Goes from: // i0: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F // i1: 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F // i2: 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F // i3: 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F // i4: 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F // i5: 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F // i6: 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F // i7: 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F // to: // b0.val[0]: 00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E // b0.val[1]: 01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F // b1.val[0]: 20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E // b1.val[1]: 21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F // b2.val[0]: 40 50 42 52 44 54 46 56 48 58 4A 5A 4C 5C 4E 5E // b2.val[1]: 41 51 43 53 45 55 47 57 49 59 4B 5B 4D 5D 4F 5F // b3.val[0]: 60 70 62 72 64 74 66 76 68 78 6A 7A 6C 7C 6E 7E // b3.val[1]: 61 71 63 73 65 75 67 77 69 79 6B 7B 6D 7D 6F 7F const uint8x16x2_t b0 = vtrnq_u8(i0, i1); const uint8x16x2_t b1 = vtrnq_u8(i2, i3); const uint8x16x2_t b2 = vtrnq_u8(i4, i5); const uint8x16x2_t b3 = vtrnq_u8(i6, i7); // Swap 16 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 08 18 28 38 0C 1C 2C 3C // c0.val[1]: 02 12 22 32 06 16 26 36 0A 1A 2A 3A 0E 1E 2E 3E // c1.val[0]: 01 11 21 31 05 15 25 35 09 19 29 39 0D 1D 2D 3D // c1.val[1]: 03 13 23 33 07 17 27 37 0B 1B 2B 3B 0F 1F 2F 3F // c2.val[0]: 40 50 60 70 44 54 64 74 48 58 68 78 4C 5C 6C 7C // c2.val[1]: 42 52 62 72 46 56 66 76 4A 5A 6A 7A 4E 5E 6E 7E // c3.val[0]: 41 51 61 71 45 55 65 75 49 59 69 79 4D 5D 6D 7D // c3.val[1]: 43 53 63 73 47 57 67 77 4B 5B 6B 7B 4F 5F 6F 7F const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), vreinterpretq_u16_u8(b1.val[0])); const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), vreinterpretq_u16_u8(b1.val[1])); const uint16x8x2_t c2 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[0]), vreinterpretq_u16_u8(b3.val[0])); const uint16x8x2_t c3 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[1]), vreinterpretq_u16_u8(b3.val[1])); // Swap 32 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 08 18 28 38 48 58 68 78 // d0.val[1]: 04 14 24 34 44 54 64 74 0C 1C 2C 3C 4C 5C 6C 7C // d1.val[0]: 02 12 22 32 42 52 62 72 0A 1A 2A 3A 4A 5A 6A 7A // d1.val[1]: 06 16 26 36 46 56 66 76 0E 1E 2E 3E 4E 5E 6E 7E // d2.val[0]: 01 11 21 31 41 51 61 71 09 19 29 39 49 59 69 79 // d2.val[1]: 05 15 25 35 45 55 65 75 0D 1D 2D 3D 4D 5D 6D 7D // d3.val[0]: 03 13 23 33 43 53 63 73 0B 1B 2B 3B 4B 5B 6B 7B // d3.val[1]: 07 17 27 37 47 57 67 77 0F 1F 2F 3F 4F 5F 6F 7F const uint32x4x2_t d0 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[0]), vreinterpretq_u32_u16(c2.val[0])); const uint32x4x2_t d1 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[1]), vreinterpretq_u32_u16(c2.val[1])); const uint32x4x2_t d2 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[0]), vreinterpretq_u32_u16(c3.val[0])); const uint32x4x2_t d3 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[1]), vreinterpretq_u32_u16(c3.val[1])); // Output: // o0 : 00 10 20 30 40 50 60 70 // o1 : 01 11 21 31 41 51 61 71 // o2 : 02 12 22 32 42 52 62 72 // o3 : 03 13 23 33 43 53 63 73 // o4 : 04 14 24 34 44 54 64 74 // o5 : 05 15 25 35 45 55 65 75 // o6 : 06 16 26 36 46 56 66 76 // o7 : 07 17 27 37 47 57 67 77 // o8 : 08 18 28 38 48 58 68 78 // o9 : 09 19 29 39 49 59 69 79 // o10: 0A 1A 2A 3A 4A 5A 6A 7A // o11: 0B 1B 2B 3B 4B 5B 6B 7B // o12: 0C 1C 2C 3C 4C 5C 6C 7C // o13: 0D 1D 2D 3D 4D 5D 6D 7D // o14: 0E 1E 2E 3E 4E 5E 6E 7E // o15: 0F 1F 2F 3F 4F 5F 6F 7F *o0 = vget_low_u8(vreinterpretq_u8_u32(d0.val[0])); *o1 = vget_low_u8(vreinterpretq_u8_u32(d2.val[0])); *o2 = vget_low_u8(vreinterpretq_u8_u32(d1.val[0])); *o3 = vget_low_u8(vreinterpretq_u8_u32(d3.val[0])); *o4 = vget_low_u8(vreinterpretq_u8_u32(d0.val[1])); *o5 = vget_low_u8(vreinterpretq_u8_u32(d2.val[1])); *o6 = vget_low_u8(vreinterpretq_u8_u32(d1.val[1])); *o7 = vget_low_u8(vreinterpretq_u8_u32(d3.val[1])); *o8 = vget_high_u8(vreinterpretq_u8_u32(d0.val[0])); *o9 = vget_high_u8(vreinterpretq_u8_u32(d2.val[0])); *o10 = vget_high_u8(vreinterpretq_u8_u32(d1.val[0])); *o11 = vget_high_u8(vreinterpretq_u8_u32(d3.val[0])); *o12 = vget_high_u8(vreinterpretq_u8_u32(d0.val[1])); *o13 = vget_high_u8(vreinterpretq_u8_u32(d2.val[1])); *o14 = vget_high_u8(vreinterpretq_u8_u32(d1.val[1])); *o15 = vget_high_u8(vreinterpretq_u8_u32(d3.val[1])); } static INLINE void transpose_u8_8x16( const uint8x8_t i0, const uint8x8_t i1, const uint8x8_t i2, const uint8x8_t i3, const uint8x8_t i4, const uint8x8_t i5, const uint8x8_t i6, const uint8x8_t i7, const uint8x8_t i8, const uint8x8_t i9, const uint8x8_t i10, const uint8x8_t i11, const uint8x8_t i12, const uint8x8_t i13, const uint8x8_t i14, const uint8x8_t i15, uint8x16_t *o0, uint8x16_t *o1, uint8x16_t *o2, uint8x16_t *o3, uint8x16_t *o4, uint8x16_t *o5, uint8x16_t *o6, uint8x16_t *o7) { // Combine 8 bit elements. Goes from: // i0 : 00 01 02 03 04 05 06 07 // i1 : 10 11 12 13 14 15 16 17 // i2 : 20 21 22 23 24 25 26 27 // i3 : 30 31 32 33 34 35 36 37 // i4 : 40 41 42 43 44 45 46 47 // i5 : 50 51 52 53 54 55 56 57 // i6 : 60 61 62 63 64 65 66 67 // i7 : 70 71 72 73 74 75 76 77 // i8 : 80 81 82 83 84 85 86 87 // i9 : 90 91 92 93 94 95 96 97 // i10: A0 A1 A2 A3 A4 A5 A6 A7 // i11: B0 B1 B2 B3 B4 B5 B6 B7 // i12: C0 C1 C2 C3 C4 C5 C6 C7 // i13: D0 D1 D2 D3 D4 D5 D6 D7 // i14: E0 E1 E2 E3 E4 E5 E6 E7 // i15: F0 F1 F2 F3 F4 F5 F6 F7 // to: // a0: 00 01 02 03 04 05 06 07 80 81 82 83 84 85 86 87 // a1: 10 11 12 13 14 15 16 17 90 91 92 93 94 95 96 97 // a2: 20 21 22 23 24 25 26 27 A0 A1 A2 A3 A4 A5 A6 A7 // a3: 30 31 32 33 34 35 36 37 B0 B1 B2 B3 B4 B5 B6 B7 // a4: 40 41 42 43 44 45 46 47 C0 C1 C2 C3 C4 C5 C6 C7 // a5: 50 51 52 53 54 55 56 57 D0 D1 D2 D3 D4 D5 D6 D7 // a6: 60 61 62 63 64 65 66 67 E0 E1 E2 E3 E4 E5 E6 E7 // a7: 70 71 72 73 74 75 76 77 F0 F1 F2 F3 F4 F5 F6 F7 const uint8x16_t a0 = vcombine_u8(i0, i8); const uint8x16_t a1 = vcombine_u8(i1, i9); const uint8x16_t a2 = vcombine_u8(i2, i10); const uint8x16_t a3 = vcombine_u8(i3, i11); const uint8x16_t a4 = vcombine_u8(i4, i12); const uint8x16_t a5 = vcombine_u8(i5, i13); const uint8x16_t a6 = vcombine_u8(i6, i14); const uint8x16_t a7 = vcombine_u8(i7, i15); // Swap 8 bit elements resulting in: // b0.val[0]: 00 10 02 12 04 14 06 16 80 90 82 92 84 94 86 96 // b0.val[1]: 01 11 03 13 05 15 07 17 81 91 83 93 85 95 87 97 // b1.val[0]: 20 30 22 32 24 34 26 36 A0 B0 A2 B2 A4 B4 A6 B6 // b1.val[1]: 21 31 23 33 25 35 27 37 A1 B1 A3 B3 A5 B5 A7 B7 // b2.val[0]: 40 50 42 52 44 54 46 56 C0 D0 C2 D2 C4 D4 C6 D6 // b2.val[1]: 41 51 43 53 45 55 47 57 C1 D1 C3 D3 C5 D5 C7 D7 // b3.val[0]: 60 70 62 72 64 74 66 76 E0 F0 E2 F2 E4 F4 E6 F6 // b3.val[1]: 61 71 63 73 65 75 67 77 E1 F1 E3 F3 E5 F5 E7 F7 const uint8x16x2_t b0 = vtrnq_u8(a0, a1); const uint8x16x2_t b1 = vtrnq_u8(a2, a3); const uint8x16x2_t b2 = vtrnq_u8(a4, a5); const uint8x16x2_t b3 = vtrnq_u8(a6, a7); // Swap 16 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 80 90 A0 B0 84 94 A4 B4 // c0.val[1]: 02 12 22 32 06 16 26 36 82 92 A2 B2 86 96 A6 B6 // c1.val[0]: 01 11 21 31 05 15 25 35 81 91 A1 B1 85 95 A5 B5 // c1.val[1]: 03 13 23 33 07 17 27 37 83 93 A3 B3 87 97 A7 B7 // c2.val[0]: 40 50 60 70 44 54 64 74 C0 D0 E0 F0 C4 D4 E4 F4 // c2.val[1]: 42 52 62 72 46 56 66 76 C2 D2 E2 F2 C6 D6 E6 F6 // c3.val[0]: 41 51 61 71 45 55 65 75 C1 D1 E1 F1 C5 D5 E5 F5 // c3.val[1]: 43 53 63 73 47 57 67 77 C3 D3 E3 F3 C7 D7 E7 F7 const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), vreinterpretq_u16_u8(b1.val[0])); const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), vreinterpretq_u16_u8(b1.val[1])); const uint16x8x2_t c2 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[0]), vreinterpretq_u16_u8(b3.val[0])); const uint16x8x2_t c3 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[1]), vreinterpretq_u16_u8(b3.val[1])); // Swap 32 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 80 90 A0 B0 C0 D0 E0 F0 // d0.val[1]: 04 14 24 34 44 54 64 74 84 94 A4 B4 C4 D4 E4 F4 // d1.val[0]: 02 12 22 32 42 52 62 72 82 92 A2 B2 C2 D2 E2 F2 // d1.val[1]: 06 16 26 36 46 56 66 76 86 96 A6 B6 C6 D6 E6 F6 // d2.val[0]: 01 11 21 31 41 51 61 71 81 91 A1 B1 C1 D1 E1 F1 // d2.val[1]: 05 15 25 35 45 55 65 75 85 95 A5 B5 C5 D5 E5 F5 // d3.val[0]: 03 13 23 33 43 53 63 73 83 93 A3 B3 C3 D3 E3 F3 // d3.val[1]: 07 17 27 37 47 57 67 77 87 97 A7 B7 C7 D7 E7 F7 const uint32x4x2_t d0 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[0]), vreinterpretq_u32_u16(c2.val[0])); const uint32x4x2_t d1 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[1]), vreinterpretq_u32_u16(c2.val[1])); const uint32x4x2_t d2 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[0]), vreinterpretq_u32_u16(c3.val[0])); const uint32x4x2_t d3 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[1]), vreinterpretq_u32_u16(c3.val[1])); // Output: // o0: 00 10 20 30 40 50 60 70 80 90 A0 B0 C0 D0 E0 F0 // o1: 01 11 21 31 41 51 61 71 81 91 A1 B1 C1 D1 E1 F1 // o2: 02 12 22 32 42 52 62 72 82 92 A2 B2 C2 D2 E2 F2 // o3: 03 13 23 33 43 53 63 73 83 93 A3 B3 C3 D3 E3 F3 // o4: 04 14 24 34 44 54 64 74 84 94 A4 B4 C4 D4 E4 F4 // o5: 05 15 25 35 45 55 65 75 85 95 A5 B5 C5 D5 E5 F5 // o6: 06 16 26 36 46 56 66 76 86 96 A6 B6 C6 D6 E6 F6 // o7: 07 17 27 37 47 57 67 77 87 97 A7 B7 C7 D7 E7 F7 *o0 = vreinterpretq_u8_u32(d0.val[0]); *o1 = vreinterpretq_u8_u32(d2.val[0]); *o2 = vreinterpretq_u8_u32(d1.val[0]); *o3 = vreinterpretq_u8_u32(d3.val[0]); *o4 = vreinterpretq_u8_u32(d0.val[1]); *o5 = vreinterpretq_u8_u32(d2.val[1]); *o6 = vreinterpretq_u8_u32(d1.val[1]); *o7 = vreinterpretq_u8_u32(d3.val[1]); } static INLINE void transpose_u8_16x16( const uint8x16_t i0, const uint8x16_t i1, const uint8x16_t i2, const uint8x16_t i3, const uint8x16_t i4, const uint8x16_t i5, const uint8x16_t i6, const uint8x16_t i7, const uint8x16_t i8, const uint8x16_t i9, const uint8x16_t i10, const uint8x16_t i11, const uint8x16_t i12, const uint8x16_t i13, const uint8x16_t i14, const uint8x16_t i15, uint8x16_t *o0, uint8x16_t *o1, uint8x16_t *o2, uint8x16_t *o3, uint8x16_t *o4, uint8x16_t *o5, uint8x16_t *o6, uint8x16_t *o7, uint8x16_t *o8, uint8x16_t *o9, uint8x16_t *o10, uint8x16_t *o11, uint8x16_t *o12, uint8x16_t *o13, uint8x16_t *o14, uint8x16_t *o15) { // Swap 8 bit elements. Goes from: // i0: 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F // i1: 10 11 12 13 14 15 16 17 18 19 1A 1B 1C 1D 1E 1F // i2: 20 21 22 23 24 25 26 27 28 29 2A 2B 2C 2D 2E 2F // i3: 30 31 32 33 34 35 36 37 38 39 3A 3B 3C 3D 3E 3F // i4: 40 41 42 43 44 45 46 47 48 49 4A 4B 4C 4D 4E 4F // i5: 50 51 52 53 54 55 56 57 58 59 5A 5B 5C 5D 5E 5F // i6: 60 61 62 63 64 65 66 67 68 69 6A 6B 6C 6D 6E 6F // i7: 70 71 72 73 74 75 76 77 78 79 7A 7B 7C 7D 7E 7F // i8: 80 81 82 83 84 85 86 87 88 89 8A 8B 8C 8D 8E 8F // i9: 90 91 92 93 94 95 96 97 98 99 9A 9B 9C 9D 9E 9F // i10: A0 A1 A2 A3 A4 A5 A6 A7 A8 A9 AA AB AC AD AE AF // i11: B0 B1 B2 B3 B4 B5 B6 B7 B8 B9 BA BB BC BD BE BF // i12: C0 C1 C2 C3 C4 C5 C6 C7 C8 C9 CA CB CC CD CE CF // i13: D0 D1 D2 D3 D4 D5 D6 D7 D8 D9 DA DB DC DD DE DF // i14: E0 E1 E2 E3 E4 E5 E6 E7 E8 E9 EA EB EC ED EE EF // i15: F0 F1 F2 F3 F4 F5 F6 F7 F8 F9 FA FB FC FD FE FF // to: // b0.val[0]: 00 10 02 12 04 14 06 16 08 18 0A 1A 0C 1C 0E 1E // b0.val[1]: 01 11 03 13 05 15 07 17 09 19 0B 1B 0D 1D 0F 1F // b1.val[0]: 20 30 22 32 24 34 26 36 28 38 2A 3A 2C 3C 2E 3E // b1.val[1]: 21 31 23 33 25 35 27 37 29 39 2B 3B 2D 3D 2F 3F // b2.val[0]: 40 50 42 52 44 54 46 56 48 58 4A 5A 4C 5C 4E 5E // b2.val[1]: 41 51 43 53 45 55 47 57 49 59 4B 5B 4D 5D 4F 5F // b3.val[0]: 60 70 62 72 64 74 66 76 68 78 6A 7A 6C 7C 6E 7E // b3.val[1]: 61 71 63 73 65 75 67 77 69 79 6B 7B 6D 7D 6F 7F // b4.val[0]: 80 90 82 92 84 94 86 96 88 98 8A 9A 8C 9C 8E 9E // b4.val[1]: 81 91 83 93 85 95 87 97 89 99 8B 9B 8D 9D 8F 9F // b5.val[0]: A0 B0 A2 B2 A4 B4 A6 B6 A8 B8 AA BA AC BC AE BE // b5.val[1]: A1 B1 A3 B3 A5 B5 A7 B7 A9 B9 AB BB AD BD AF BF // b6.val[0]: C0 D0 C2 D2 C4 D4 C6 D6 C8 D8 CA DA CC DC CE DE // b6.val[1]: C1 D1 C3 D3 C5 D5 C7 D7 C9 D9 CB DB CD DD CF DF // b7.val[0]: E0 F0 E2 F2 E4 F4 E6 F6 E8 F8 EA FA EC FC EE FE // b7.val[1]: E1 F1 E3 F3 E5 F5 E7 F7 E9 F9 EB FB ED FD EF FF const uint8x16x2_t b0 = vtrnq_u8(i0, i1); const uint8x16x2_t b1 = vtrnq_u8(i2, i3); const uint8x16x2_t b2 = vtrnq_u8(i4, i5); const uint8x16x2_t b3 = vtrnq_u8(i6, i7); const uint8x16x2_t b4 = vtrnq_u8(i8, i9); const uint8x16x2_t b5 = vtrnq_u8(i10, i11); const uint8x16x2_t b6 = vtrnq_u8(i12, i13); const uint8x16x2_t b7 = vtrnq_u8(i14, i15); // Swap 16 bit elements resulting in: // c0.val[0]: 00 10 20 30 04 14 24 34 08 18 28 38 0C 1C 2C 3C // c0.val[1]: 02 12 22 32 06 16 26 36 0A 1A 2A 3A 0E 1E 2E 3E // c1.val[0]: 01 11 21 31 05 15 25 35 09 19 29 39 0D 1D 2D 3D // c1.val[1]: 03 13 23 33 07 17 27 37 0B 1B 2B 3B 0F 1F 2F 3F // c2.val[0]: 40 50 60 70 44 54 64 74 48 58 68 78 4C 5C 6C 7C // c2.val[1]: 42 52 62 72 46 56 66 76 4A 5A 6A 7A 4E 5E 6E 7E // c3.val[0]: 41 51 61 71 45 55 65 75 49 59 69 79 4D 5D 6D 7D // c3.val[1]: 43 53 63 73 47 57 67 77 4B 5B 6B 7B 4F 5F 6F 7F // c4.val[0]: 80 90 A0 B0 84 94 A4 B4 88 98 A8 B8 8C 9C AC BC // c4.val[1]: 82 92 A2 B2 86 96 A6 B6 8A 9A AA BA 8E 9E AE BE // c5.val[0]: 81 91 A1 B1 85 95 A5 B5 89 99 A9 B9 8D 9D AD BD // c5.val[1]: 83 93 A3 B3 87 97 A7 B7 8B 9B AB BB 8F 9F AF BF // c6.val[0]: C0 D0 E0 F0 C4 D4 E4 F4 C8 D8 E8 F8 CC DC EC FC // c6.val[1]: C2 D2 E2 F2 C6 D6 E6 F6 CA DA EA FA CE DE EE FE // c7.val[0]: C1 D1 E1 F1 C5 D5 E5 F5 C9 D9 E9 F9 CD DD ED FD // c7.val[1]: C3 D3 E3 F3 C7 D7 E7 F7 CB DB EB FB CF DF EF FF const uint16x8x2_t c0 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[0]), vreinterpretq_u16_u8(b1.val[0])); const uint16x8x2_t c1 = vtrnq_u16(vreinterpretq_u16_u8(b0.val[1]), vreinterpretq_u16_u8(b1.val[1])); const uint16x8x2_t c2 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[0]), vreinterpretq_u16_u8(b3.val[0])); const uint16x8x2_t c3 = vtrnq_u16(vreinterpretq_u16_u8(b2.val[1]), vreinterpretq_u16_u8(b3.val[1])); const uint16x8x2_t c4 = vtrnq_u16(vreinterpretq_u16_u8(b4.val[0]), vreinterpretq_u16_u8(b5.val[0])); const uint16x8x2_t c5 = vtrnq_u16(vreinterpretq_u16_u8(b4.val[1]), vreinterpretq_u16_u8(b5.val[1])); const uint16x8x2_t c6 = vtrnq_u16(vreinterpretq_u16_u8(b6.val[0]), vreinterpretq_u16_u8(b7.val[0])); const uint16x8x2_t c7 = vtrnq_u16(vreinterpretq_u16_u8(b6.val[1]), vreinterpretq_u16_u8(b7.val[1])); // Swap 32 bit elements resulting in: // d0.val[0]: 00 10 20 30 40 50 60 70 08 18 28 38 48 58 68 78 // d0.val[1]: 04 14 24 34 44 54 64 74 0C 1C 2C 3C 4C 5C 6C 7C // d1.val[0]: 02 12 22 32 42 52 62 72 0A 1A 2A 3A 4A 5A 6A 7A // d1.val[1]: 06 16 26 36 46 56 66 76 0E 1E 2E 3E 4E 5E 6E 7E // d2.val[0]: 01 11 21 31 41 51 61 71 09 19 29 39 49 59 69 79 // d2.val[1]: 05 15 25 35 45 55 65 75 0D 1D 2D 3D 4D 5D 6D 7D // d3.val[0]: 03 13 23 33 43 53 63 73 0B 1B 2B 3B 4B 5B 6B 7B // d3.val[1]: 07 17 27 37 47 57 67 77 0F 1F 2F 3F 4F 5F 6F 7F // d4.val[0]: 80 90 A0 B0 C0 D0 E0 F0 88 98 A8 B8 C8 D8 E8 F8 // d4.val[1]: 84 94 A4 B4 C4 D4 E4 F4 8C 9C AC BC CC DC EC FC // d5.val[0]: 82 92 A2 B2 C2 D2 E2 F2 8A 9A AA BA CA DA EA FA // d5.val[1]: 86 96 A6 B6 C6 D6 E6 F6 8E 9E AE BE CE DE EE FE // d6.val[0]: 81 91 A1 B1 C1 D1 E1 F1 89 99 A9 B9 C9 D9 E9 F9 // d6.val[1]: 85 95 A5 B5 C5 D5 E5 F5 8D 9D AD BD CD DD ED FD // d7.val[0]: 83 93 A3 B3 C3 D3 E3 F3 8B 9B AB BB CB DB EB FB // d7.val[1]: 87 97 A7 B7 C7 D7 E7 F7 8F 9F AF BF CF DF EF FF const uint32x4x2_t d0 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[0]), vreinterpretq_u32_u16(c2.val[0])); const uint32x4x2_t d1 = vtrnq_u32(vreinterpretq_u32_u16(c0.val[1]), vreinterpretq_u32_u16(c2.val[1])); const uint32x4x2_t d2 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[0]), vreinterpretq_u32_u16(c3.val[0])); const uint32x4x2_t d3 = vtrnq_u32(vreinterpretq_u32_u16(c1.val[1]), vreinterpretq_u32_u16(c3.val[1])); const uint32x4x2_t d4 = vtrnq_u32(vreinterpretq_u32_u16(c4.val[0]), vreinterpretq_u32_u16(c6.val[0])); const uint32x4x2_t d5 = vtrnq_u32(vreinterpretq_u32_u16(c4.val[1]), vreinterpretq_u32_u16(c6.val[1])); const uint32x4x2_t d6 = vtrnq_u32(vreinterpretq_u32_u16(c5.val[0]), vreinterpretq_u32_u16(c7.val[0])); const uint32x4x2_t d7 = vtrnq_u32(vreinterpretq_u32_u16(c5.val[1]), vreinterpretq_u32_u16(c7.val[1])); // Swap 64 bit elements resulting in: // e0.val[0]: 00 10 20 30 40 50 60 70 80 90 A0 B0 C0 D0 E0 F0 // e0.val[1]: 08 18 28 38 48 58 68 78 88 98 A8 B8 C8 D8 E8 F8 // e1.val[0]: 01 11 21 31 41 51 61 71 84 94 A4 B4 C4 D4 E4 F4 // e1.val[1]: 09 19 29 39 49 59 69 79 89 99 A9 B9 C9 D9 E9 F9 // e2.val[0]: 02 12 22 32 42 52 62 72 82 92 A2 B2 C2 D2 E2 F2 // e2.val[1]: 0A 1A 2A 3A 4A 5A 6A 7A 8A 9A AA BA CA DA EA FA // e3.val[0]: 03 13 23 33 43 53 63 73 86 96 A6 B6 C6 D6 E6 F6 // e3.val[1]: 0B 1B 2B 3B 4B 5B 6B 7B 8B 9B AB BB CB DB EB FB // e4.val[0]: 04 14 24 34 44 54 64 74 81 91 A1 B1 C1 D1 E1 F1 // e4.val[1]: 0C 1C 2C 3C 4C 5C 6C 7C 8C 9C AC BC CC DC EC FC // e5.val[0]: 05 15 25 35 45 55 65 75 85 95 A5 B5 C5 D5 E5 F5 // e5.val[1]: 0D 1D 2D 3D 4D 5D 6D 7D 8D 9D AD BD CD DD ED FD // e6.val[0]: 06 16 26 36 46 56 66 76 83 93 A3 B3 C3 D3 E3 F3 // e6.val[1]: 0E 1E 2E 3E 4E 5E 6E 7E 8E 9E AE BE CE DE EE FE // e7.val[0]: 07 17 27 37 47 57 67 77 87 97 A7 B7 C7 D7 E7 F7 // e7.val[1]: 0F 1F 2F 3F 4F 5F 6F 7F 8F 9F AF BF CF DF EF FF const uint8x16x2_t e0 = vpx_vtrnq_u64_to_u8(d0.val[0], d4.val[0]); const uint8x16x2_t e1 = vpx_vtrnq_u64_to_u8(d2.val[0], d6.val[0]); const uint8x16x2_t e2 = vpx_vtrnq_u64_to_u8(d1.val[0], d5.val[0]); const uint8x16x2_t e3 = vpx_vtrnq_u64_to_u8(d3.val[0], d7.val[0]); const uint8x16x2_t e4 = vpx_vtrnq_u64_to_u8(d0.val[1], d4.val[1]); const uint8x16x2_t e5 = vpx_vtrnq_u64_to_u8(d2.val[1], d6.val[1]); const uint8x16x2_t e6 = vpx_vtrnq_u64_to_u8(d1.val[1], d5.val[1]); const uint8x16x2_t e7 = vpx_vtrnq_u64_to_u8(d3.val[1], d7.val[1]); // Output: // o0 : 00 10 20 30 40 50 60 70 80 90 A0 B0 C0 D0 E0 F0 // o1 : 01 11 21 31 41 51 61 71 84 94 A4 B4 C4 D4 E4 F4 // o2 : 02 12 22 32 42 52 62 72 82 92 A2 B2 C2 D2 E2 F2 // o3 : 03 13 23 33 43 53 63 73 86 96 A6 B6 C6 D6 E6 F6 // o4 : 04 14 24 34 44 54 64 74 81 91 A1 B1 C1 D1 E1 F1 // o5 : 05 15 25 35 45 55 65 75 85 95 A5 B5 C5 D5 E5 F5 // o6 : 06 16 26 36 46 56 66 76 83 93 A3 B3 C3 D3 E3 F3 // o7 : 07 17 27 37 47 57 67 77 87 97 A7 B7 C7 D7 E7 F7 // o8 : 08 18 28 38 48 58 68 78 88 98 A8 B8 C8 D8 E8 F8 // o9 : 09 19 29 39 49 59 69 79 89 99 A9 B9 C9 D9 E9 F9 // o10: 0A 1A 2A 3A 4A 5A 6A 7A 8A 9A AA BA CA DA EA FA // o11: 0B 1B 2B 3B 4B 5B 6B 7B 8B 9B AB BB CB DB EB FB // o12: 0C 1C 2C 3C 4C 5C 6C 7C 8C 9C AC BC CC DC EC FC // o13: 0D 1D 2D 3D 4D 5D 6D 7D 8D 9D AD BD CD DD ED FD // o14: 0E 1E 2E 3E 4E 5E 6E 7E 8E 9E AE BE CE DE EE FE // o15: 0F 1F 2F 3F 4F 5F 6F 7F 8F 9F AF BF CF DF EF FF *o0 = e0.val[0]; *o1 = e1.val[0]; *o2 = e2.val[0]; *o3 = e3.val[0]; *o4 = e4.val[0]; *o5 = e5.val[0]; *o6 = e6.val[0]; *o7 = e7.val[0]; *o8 = e0.val[1]; *o9 = e1.val[1]; *o10 = e2.val[1]; *o11 = e3.val[1]; *o12 = e4.val[1]; *o13 = e5.val[1]; *o14 = e6.val[1]; *o15 = e7.val[1]; } static INLINE void load_and_transpose_u8_4x8(const uint8_t *a, const int a_stride, uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, uint8x8_t *a3) { uint8x8_t a4, a5, a6, a7; *a0 = vld1_u8(a); a += a_stride; *a1 = vld1_u8(a); a += a_stride; *a2 = vld1_u8(a); a += a_stride; *a3 = vld1_u8(a); a += a_stride; a4 = vld1_u8(a); a += a_stride; a5 = vld1_u8(a); a += a_stride; a6 = vld1_u8(a); a += a_stride; a7 = vld1_u8(a); transpose_u8_4x8(a0, a1, a2, a3, a4, a5, a6, a7); } static INLINE void load_and_transpose_u8_8x8(const uint8_t *a, const int a_stride, uint8x8_t *a0, uint8x8_t *a1, uint8x8_t *a2, uint8x8_t *a3, uint8x8_t *a4, uint8x8_t *a5, uint8x8_t *a6, uint8x8_t *a7) { *a0 = vld1_u8(a); a += a_stride; *a1 = vld1_u8(a); a += a_stride; *a2 = vld1_u8(a); a += a_stride; *a3 = vld1_u8(a); a += a_stride; *a4 = vld1_u8(a); a += a_stride; *a5 = vld1_u8(a); a += a_stride; *a6 = vld1_u8(a); a += a_stride; *a7 = vld1_u8(a); transpose_u8_8x8(a0, a1, a2, a3, a4, a5, a6, a7); } static INLINE void transpose_and_store_u8_8x8(uint8_t *a, const int a_stride, uint8x8_t a0, uint8x8_t a1, uint8x8_t a2, uint8x8_t a3, uint8x8_t a4, uint8x8_t a5, uint8x8_t a6, uint8x8_t a7) { transpose_u8_8x8(&a0, &a1, &a2, &a3, &a4, &a5, &a6, &a7); vst1_u8(a, a0); a += a_stride; vst1_u8(a, a1); a += a_stride; vst1_u8(a, a2); a += a_stride; vst1_u8(a, a3); a += a_stride; vst1_u8(a, a4); a += a_stride; vst1_u8(a, a5); a += a_stride; vst1_u8(a, a6); a += a_stride; vst1_u8(a, a7); } static INLINE void load_and_transpose_s16_8x8(const int16_t *a, const int a_stride, int16x8_t *a0, int16x8_t *a1, int16x8_t *a2, int16x8_t *a3, int16x8_t *a4, int16x8_t *a5, int16x8_t *a6, int16x8_t *a7) { *a0 = vld1q_s16(a); a += a_stride; *a1 = vld1q_s16(a); a += a_stride; *a2 = vld1q_s16(a); a += a_stride; *a3 = vld1q_s16(a); a += a_stride; *a4 = vld1q_s16(a); a += a_stride; *a5 = vld1q_s16(a); a += a_stride; *a6 = vld1q_s16(a); a += a_stride; *a7 = vld1q_s16(a); transpose_s16_8x8(a0, a1, a2, a3, a4, a5, a6, a7); } static INLINE void load_and_transpose_s32_8x8( const int32_t *a, const int a_stride, int32x4x2_t *const a0, int32x4x2_t *const a1, int32x4x2_t *const a2, int32x4x2_t *const a3, int32x4x2_t *const a4, int32x4x2_t *const a5, int32x4x2_t *const a6, int32x4x2_t *const a7) { a0->val[0] = vld1q_s32(a); a0->val[1] = vld1q_s32(a + 4); a += a_stride; a1->val[0] = vld1q_s32(a); a1->val[1] = vld1q_s32(a + 4); a += a_stride; a2->val[0] = vld1q_s32(a); a2->val[1] = vld1q_s32(a + 4); a += a_stride; a3->val[0] = vld1q_s32(a); a3->val[1] = vld1q_s32(a + 4); a += a_stride; a4->val[0] = vld1q_s32(a); a4->val[1] = vld1q_s32(a + 4); a += a_stride; a5->val[0] = vld1q_s32(a); a5->val[1] = vld1q_s32(a + 4); a += a_stride; a6->val[0] = vld1q_s32(a); a6->val[1] = vld1q_s32(a + 4); a += a_stride; a7->val[0] = vld1q_s32(a); a7->val[1] = vld1q_s32(a + 4); transpose_s32_8x8(a0, a1, a2, a3, a4, a5, a6, a7); } #endif // VPX_VPX_DSP_ARM_TRANSPOSE_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/variance_neon.c000066400000000000000000000352701357355204000202560ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/mem_neon.h" #include "vpx_dsp/arm/sum_neon.h" #include "vpx_ports/mem.h" // The variance helper functions use int16_t for sum. 8 values are accumulated // and then added (at which point they expand up to int32_t). To avoid overflow, // there can be no more than 32767 / 255 ~= 128 values accumulated in each // column. For a 32x32 buffer, this results in 32 / 8 = 4 values per row * 32 // rows = 128. Asserts have been added to each function to warn against reaching // this limit. // Process a block of width 4 four rows at a time. static void variance_neon_w4x4(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int h, uint32_t *sse, int *sum) { int i; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); int32x4_t sse_hi_s32 = vdupq_n_s32(0); // Since width is only 4, sum_s16 only loads a half row per loop. assert(h <= 256); for (i = 0; i < h; i += 4) { const uint8x16_t a_u8 = load_unaligned_u8q(src_ptr, src_stride); const uint8x16_t b_u8 = load_unaligned_u8q(ref_ptr, ref_stride); const uint16x8_t diff_lo_u16 = vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)); const uint16x8_t diff_hi_u16 = vsubl_u8(vget_high_u8(a_u8), vget_high_u8(b_u8)); const int16x8_t diff_lo_s16 = vreinterpretq_s16_u16(diff_lo_u16); const int16x8_t diff_hi_s16 = vreinterpretq_s16_u16(diff_hi_u16); sum_s16 = vaddq_s16(sum_s16, diff_lo_s16); sum_s16 = vaddq_s16(sum_s16, diff_hi_s16); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), vget_low_s16(diff_lo_s16)); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), vget_high_s16(diff_lo_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), vget_low_s16(diff_hi_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), vget_high_s16(diff_hi_s16)); src_ptr += 4 * src_stride; ref_ptr += 4 * ref_stride; } *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( vaddq_s32(sse_lo_s32, sse_hi_s32))), 0); } // Process a block of any size where the width is divisible by 16. static void variance_neon_w16(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { int i, j; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); int32x4_t sse_hi_s32 = vdupq_n_s32(0); // The loop loads 16 values at a time but doubles them up when accumulating // into sum_s16. assert(w / 8 * h <= 128); for (i = 0; i < h; ++i) { for (j = 0; j < w; j += 16) { const uint8x16_t a_u8 = vld1q_u8(src_ptr + j); const uint8x16_t b_u8 = vld1q_u8(ref_ptr + j); const uint16x8_t diff_lo_u16 = vsubl_u8(vget_low_u8(a_u8), vget_low_u8(b_u8)); const uint16x8_t diff_hi_u16 = vsubl_u8(vget_high_u8(a_u8), vget_high_u8(b_u8)); const int16x8_t diff_lo_s16 = vreinterpretq_s16_u16(diff_lo_u16); const int16x8_t diff_hi_s16 = vreinterpretq_s16_u16(diff_hi_u16); sum_s16 = vaddq_s16(sum_s16, diff_lo_s16); sum_s16 = vaddq_s16(sum_s16, diff_hi_s16); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_lo_s16), vget_low_s16(diff_lo_s16)); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_high_s16(diff_lo_s16), vget_high_s16(diff_lo_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_low_s16(diff_hi_s16), vget_low_s16(diff_hi_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_hi_s16), vget_high_s16(diff_hi_s16)); } src_ptr += src_stride; ref_ptr += ref_stride; } *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( vaddq_s32(sse_lo_s32, sse_hi_s32))), 0); } // Process a block of width 8 two rows at a time. static void variance_neon_w8x2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int h, uint32_t *sse, int *sum) { int i = 0; int16x8_t sum_s16 = vdupq_n_s16(0); int32x4_t sse_lo_s32 = vdupq_n_s32(0); int32x4_t sse_hi_s32 = vdupq_n_s32(0); // Each column has it's own accumulator entry in sum_s16. assert(h <= 128); do { const uint8x8_t a_0_u8 = vld1_u8(src_ptr); const uint8x8_t a_1_u8 = vld1_u8(src_ptr + src_stride); const uint8x8_t b_0_u8 = vld1_u8(ref_ptr); const uint8x8_t b_1_u8 = vld1_u8(ref_ptr + ref_stride); const uint16x8_t diff_0_u16 = vsubl_u8(a_0_u8, b_0_u8); const uint16x8_t diff_1_u16 = vsubl_u8(a_1_u8, b_1_u8); const int16x8_t diff_0_s16 = vreinterpretq_s16_u16(diff_0_u16); const int16x8_t diff_1_s16 = vreinterpretq_s16_u16(diff_1_u16); sum_s16 = vaddq_s16(sum_s16, diff_0_s16); sum_s16 = vaddq_s16(sum_s16, diff_1_s16); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_0_s16), vget_low_s16(diff_0_s16)); sse_lo_s32 = vmlal_s16(sse_lo_s32, vget_low_s16(diff_1_s16), vget_low_s16(diff_1_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_0_s16), vget_high_s16(diff_0_s16)); sse_hi_s32 = vmlal_s16(sse_hi_s32, vget_high_s16(diff_1_s16), vget_high_s16(diff_1_s16)); src_ptr += src_stride + src_stride; ref_ptr += ref_stride + ref_stride; i += 2; } while (i < h); *sum = vget_lane_s32(horizontal_add_int16x8(sum_s16), 0); *sse = vget_lane_u32(horizontal_add_uint32x4(vreinterpretq_u32_s32( vaddq_s32(sse_lo_s32, sse_hi_s32))), 0); } void vpx_get8x8var_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, 8, sse, sum); } void vpx_get16x16var_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 16, 16, sse, sum); } #define varianceNxM(n, m, shift) \ unsigned int vpx_variance##n##x##m##_neon( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, unsigned int *sse) { \ int sum; \ if (n == 4) \ variance_neon_w4x4(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \ &sum); \ else if (n == 8) \ variance_neon_w8x2(src_ptr, src_stride, ref_ptr, ref_stride, m, sse, \ &sum); \ else \ variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, n, m, sse, \ &sum); \ if (n * m < 16 * 16) \ return *sse - ((sum * sum) >> shift); \ else \ return *sse - (uint32_t)(((int64_t)sum * sum) >> shift); \ } varianceNxM(4, 4, 4); varianceNxM(4, 8, 5); varianceNxM(8, 4, 5); varianceNxM(8, 8, 6); varianceNxM(8, 16, 7); varianceNxM(16, 8, 7); varianceNxM(16, 16, 8); varianceNxM(16, 32, 9); varianceNxM(32, 16, 9); varianceNxM(32, 32, 10); unsigned int vpx_variance32x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 32, 32, &sse1, &sum1); variance_neon_w16(src_ptr + (32 * src_stride), src_stride, ref_ptr + (32 * ref_stride), ref_stride, 32, 32, &sse2, &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11); } unsigned int vpx_variance64x32_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1, &sum1); variance_neon_w16(src_ptr + (16 * src_stride), src_stride, ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2, &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 11); } unsigned int vpx_variance64x64_neon(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum1, sum2; uint32_t sse1, sse2; variance_neon_w16(src_ptr, src_stride, ref_ptr, ref_stride, 64, 16, &sse1, &sum1); variance_neon_w16(src_ptr + (16 * src_stride), src_stride, ref_ptr + (16 * ref_stride), ref_stride, 64, 16, &sse2, &sum2); sse1 += sse2; sum1 += sum2; variance_neon_w16(src_ptr + (16 * 2 * src_stride), src_stride, ref_ptr + (16 * 2 * ref_stride), ref_stride, 64, 16, &sse2, &sum2); sse1 += sse2; sum1 += sum2; variance_neon_w16(src_ptr + (16 * 3 * src_stride), src_stride, ref_ptr + (16 * 3 * ref_stride), ref_stride, 64, 16, &sse2, &sum2); *sse = sse1 + sse2; sum1 += sum2; return *sse - (unsigned int)(((int64_t)sum1 * sum1) >> 12); } unsigned int vpx_mse16x16_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride, unsigned int *sse) { int i; int16x4_t d22s16, d23s16, d24s16, d25s16, d26s16, d27s16, d28s16, d29s16; int64x1_t d0s64; uint8x16_t q0u8, q1u8, q2u8, q3u8; int32x4_t q7s32, q8s32, q9s32, q10s32; uint16x8_t q11u16, q12u16, q13u16, q14u16; int64x2_t q1s64; q7s32 = vdupq_n_s32(0); q8s32 = vdupq_n_s32(0); q9s32 = vdupq_n_s32(0); q10s32 = vdupq_n_s32(0); for (i = 0; i < 8; i++) { // mse16x16_neon_loop q0u8 = vld1q_u8(src_ptr); src_ptr += src_stride; q1u8 = vld1q_u8(src_ptr); src_ptr += src_stride; q2u8 = vld1q_u8(ref_ptr); ref_ptr += ref_stride; q3u8 = vld1q_u8(ref_ptr); ref_ptr += ref_stride; q11u16 = vsubl_u8(vget_low_u8(q0u8), vget_low_u8(q2u8)); q12u16 = vsubl_u8(vget_high_u8(q0u8), vget_high_u8(q2u8)); q13u16 = vsubl_u8(vget_low_u8(q1u8), vget_low_u8(q3u8)); q14u16 = vsubl_u8(vget_high_u8(q1u8), vget_high_u8(q3u8)); d22s16 = vreinterpret_s16_u16(vget_low_u16(q11u16)); d23s16 = vreinterpret_s16_u16(vget_high_u16(q11u16)); q7s32 = vmlal_s16(q7s32, d22s16, d22s16); q8s32 = vmlal_s16(q8s32, d23s16, d23s16); d24s16 = vreinterpret_s16_u16(vget_low_u16(q12u16)); d25s16 = vreinterpret_s16_u16(vget_high_u16(q12u16)); q9s32 = vmlal_s16(q9s32, d24s16, d24s16); q10s32 = vmlal_s16(q10s32, d25s16, d25s16); d26s16 = vreinterpret_s16_u16(vget_low_u16(q13u16)); d27s16 = vreinterpret_s16_u16(vget_high_u16(q13u16)); q7s32 = vmlal_s16(q7s32, d26s16, d26s16); q8s32 = vmlal_s16(q8s32, d27s16, d27s16); d28s16 = vreinterpret_s16_u16(vget_low_u16(q14u16)); d29s16 = vreinterpret_s16_u16(vget_high_u16(q14u16)); q9s32 = vmlal_s16(q9s32, d28s16, d28s16); q10s32 = vmlal_s16(q10s32, d29s16, d29s16); } q7s32 = vaddq_s32(q7s32, q8s32); q9s32 = vaddq_s32(q9s32, q10s32); q10s32 = vaddq_s32(q7s32, q9s32); q1s64 = vpaddlq_s32(q10s32); d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); vst1_lane_u32((uint32_t *)sse, vreinterpret_u32_s64(d0s64), 0); return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); } unsigned int vpx_get4x4sse_cs_neon(const unsigned char *src_ptr, int src_stride, const unsigned char *ref_ptr, int ref_stride) { int16x4_t d22s16, d24s16, d26s16, d28s16; int64x1_t d0s64; uint8x8_t d0u8, d1u8, d2u8, d3u8, d4u8, d5u8, d6u8, d7u8; int32x4_t q7s32, q8s32, q9s32, q10s32; uint16x8_t q11u16, q12u16, q13u16, q14u16; int64x2_t q1s64; d0u8 = vld1_u8(src_ptr); src_ptr += src_stride; d4u8 = vld1_u8(ref_ptr); ref_ptr += ref_stride; d1u8 = vld1_u8(src_ptr); src_ptr += src_stride; d5u8 = vld1_u8(ref_ptr); ref_ptr += ref_stride; d2u8 = vld1_u8(src_ptr); src_ptr += src_stride; d6u8 = vld1_u8(ref_ptr); ref_ptr += ref_stride; d3u8 = vld1_u8(src_ptr); src_ptr += src_stride; d7u8 = vld1_u8(ref_ptr); ref_ptr += ref_stride; q11u16 = vsubl_u8(d0u8, d4u8); q12u16 = vsubl_u8(d1u8, d5u8); q13u16 = vsubl_u8(d2u8, d6u8); q14u16 = vsubl_u8(d3u8, d7u8); d22s16 = vget_low_s16(vreinterpretq_s16_u16(q11u16)); d24s16 = vget_low_s16(vreinterpretq_s16_u16(q12u16)); d26s16 = vget_low_s16(vreinterpretq_s16_u16(q13u16)); d28s16 = vget_low_s16(vreinterpretq_s16_u16(q14u16)); q7s32 = vmull_s16(d22s16, d22s16); q8s32 = vmull_s16(d24s16, d24s16); q9s32 = vmull_s16(d26s16, d26s16); q10s32 = vmull_s16(d28s16, d28s16); q7s32 = vaddq_s32(q7s32, q8s32); q9s32 = vaddq_s32(q9s32, q10s32); q9s32 = vaddq_s32(q7s32, q9s32); q1s64 = vpaddlq_s32(q9s32); d0s64 = vadd_s64(vget_low_s64(q1s64), vget_high_s64(q1s64)); return vget_lane_u32(vreinterpret_u32_s64(d0s64), 0); } libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type1_neon.asm000066400000000000000000000465551357355204000264130ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers***************************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r3 => dst_stride ; r4 => filter_x0 ; r8 => ht ; r10 => wd EXPORT |vpx_convolve8_avg_horiz_filter_type1_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_avg_horiz_filter_type1_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 start_loop_count ldr r4, [sp, #104] ;loads pi1_coeff ldr r8, [sp, #108] ;loads x0_q4 add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] ldr r8, [sp, #128] ;loads ht ldr r10, [sp, #124] ;loads wd vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) mov r11, #1 subs r14, r8, #0 ;checks for ht == 0 vabs.s8 d2, d0 ;vabs_s8(coeff) vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0) sub r12, r0, #3 ;pu1_src - 3 vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1) add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2) rsb r9, r10, r2, lsl #1 ;2*src_strd - wd vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3) rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4) vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5) vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6) vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7) mov r7, r1 cmp r10, #4 ble outer_loop_4 cmp r10, #24 moveq r10, #16 addeq r8, #8 addeq r9, #8 cmp r10, #16 bge outer_loop_16 cmp r10, #12 addeq r8, #4 addeq r9, #4 b outer_loop_8 outer_loop8_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 mov r14, #32 add r1, #16 add r12, #16 mov r10, #8 add r8, #8 add r9, #8 outer_loop_8 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_8 inner_loop_8 mov r7, #0xc000 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {d1}, [r12], r11 vdup.16 q5, r7 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 mov r7, #0x4000 vld1.u32 {d4}, [r12], r11 vmlsl.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {d5}, [r12], r11 vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d6}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {d7}, [r12], r11 vmlal.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d13}, [r4], r11 vmlal.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u32 {d14}, [r4], r11 vmlsl.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vld1.u32 {d15}, [r4], r11 vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd vdup.16 q11, r7 vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d17}, [r4], r11 vmlal.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vhadd.s16 q4, q4, q11 vld1.u32 {d18}, [r4], r11 vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u8 {d6}, [r1] vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow ; result 1 vmlsl.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u8 {d7}, [r6] vrhadd.u8 d20, d20, d6 vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlsl.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vst1.8 {d20}, [r1]! ;store the result pu1_dst vhadd.s16 q5, q5, q11 subs r5, r5, #8 ;decrement the wd loop vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow ; result 2 vrhadd.u8 d8, d8, d7 vst1.8 {d8}, [r6]! ;store the result pu1_dst cmp r5, #4 bgt inner_loop_8 end_inner_loop_8 subs r14, r14, #2 ;decrement the ht loop add r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd add r1, r1, r8 ;increment the dst pointer by ; 2*dst_strd-wd bgt outer_loop_8 ldr r10, [sp, #120] ;loads wd cmp r10, #12 beq outer_loop4_residual end_loops b end_func outer_loop_16 str r0, [sp, #-4]! str r7, [sp, #-4]! add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd and r0, r12, #31 mov r7, #0xc000 sub r5, r10, #0 ;checks wd pld [r4, r2, lsl #1] pld [r12, r2, lsl #1] vld1.u32 {q0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {q1}, [r12], r11 vld1.u32 {q2}, [r12], r11 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q7}, [r12], r11 vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vdup.16 q10, r7 vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); inner_loop_16 vmlsl.u8 q10, d1, d24 vdup.16 q5, r7 vmlsl.u8 q10, d3, d25 mov r7, #0x4000 vdup.16 q11, r7 vmlal.u8 q10, d5, d26 vld1.u32 {q0}, [r4], r11 ;vector load pu1_src vhadd.s16 q4, q4, q11 vld1.u32 {q1}, [r4], r11 vmlal.u8 q10, d7, d27 add r12, #8 subs r5, r5, #16 vmlal.u8 q10, d13, d28 vld1.u32 {q2}, [r4], r11 vmlal.u8 q10, d15, d29 vld1.u32 {q3}, [r4], r11 vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow ; result 1 vmlsl.u8 q10, d17, d30 vld1.u32 {q6}, [r4], r11 vmlsl.u8 q10, d19, d31 vld1.u32 {q7}, [r4], r11 add r7, r1, #8 vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlsl.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r4], r11 vhadd.s16 q10, q10, q11 vld1.u32 {q9}, [r4], r11 vld1.u8 {d0}, [r1] vmlal.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u8 {d2}, [r7] vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); add r4, #8 mov r7, #0xc000 vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlal.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vqrshrun.s16 d9, q10, #6 vdup.16 q11, r7 vmlsl.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); mov r7, #0x4000 vrhadd.u8 d8, d8, d0 vrhadd.u8 d9, d9, d2 vmlsl.u8 q11, d1, d24 vmlsl.u8 q11, d3, d25 vdup.16 q10, r7 vmlal.u8 q11, d5, d26 pld [r12, r2, lsl #2] pld [r4, r2, lsl #2] addeq r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd addeq r4, r12, r2 ;pu1_src + src_strd vmlal.u8 q11, d7, d27 vmlal.u8 q11, d13, d28 vst1.8 {q4}, [r1]! ;store the result pu1_dst subeq r14, r14, #2 vhadd.s16 q5, q5, q10 vmlal.u8 q11, d15, d29 addeq r1, r1, r8 vmlsl.u8 q11, d17, d30 cmp r14, #0 vmlsl.u8 q11, d19, d31 vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow ; result 2 beq epilog_16 vld1.u32 {q0}, [r12], r11 ;vector load pu1_src mov r7, #0xc000 cmp r5, #0 vld1.u32 {q1}, [r12], r11 vhadd.s16 q11, q11, q10 vld1.u32 {q2}, [r12], r11 vdup.16 q4, r7 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vdup.16 q10, r7 vld1.u32 {q3}, [r12], r11 add r7, r6, #8 moveq r5, r10 vld1.u8 {d0}, [r6] vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u8 {d2}, [r7] vqrshrun.s16 d11, q11, #6 vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q6}, [r12], r11 vrhadd.u8 d10, d10, d0 vld1.u32 {q7}, [r12], r11 vrhadd.u8 d11, d11, d2 vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); mov r7, #0xc000 vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vst1.8 {q5}, [r6]! ;store the result pu1_dst vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); addeq r6, r1, r3 ;pu1_dst + dst_strd b inner_loop_16 epilog_16 mov r7, #0x4000 ldr r0, [sp], #4 ldr r10, [sp, #120] vdup.16 q10, r7 vhadd.s16 q11, q11, q10 vqrshrun.s16 d11, q11, #6 add r7, r6, #8 vld1.u8 {d20}, [r6] vld1.u8 {d21}, [r7] vrhadd.u8 d10, d10, d20 vrhadd.u8 d11, d11, d21 vst1.8 {q5}, [r6]! ;store the result pu1_dst ldr r7, [sp], #4 cmp r10, #24 beq outer_loop8_residual end_loops1 b end_func outer_loop4_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 add r1, #8 mov r10, #4 add r12, #8 mov r14, #16 add r8, #4 add r9, #4 outer_loop_4 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_4 inner_loop_4 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vld1.u32 {d1}, [r12], r11 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 vld1.u32 {d4}, [r12], r11 vld1.u32 {d5}, [r12], r11 vld1.u32 {d6}, [r12], r11 vld1.u32 {d7}, [r12], r11 sub r12, r12, #4 vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vld1.u32 {d13}, [r4], r11 vzip.32 d0, d12 ;vector zip the i iteration and ii ; interation in single register vld1.u32 {d14}, [r4], r11 vzip.32 d1, d13 vld1.u32 {d15}, [r4], r11 vzip.32 d2, d14 vld1.u32 {d16}, [r4], r11 vzip.32 d3, d15 vld1.u32 {d17}, [r4], r11 vzip.32 d4, d16 vld1.u32 {d18}, [r4], r11 vzip.32 d5, d17 vld1.u32 {d19}, [r4], r11 mov r7, #0xc000 vdup.16 q4, r7 sub r4, r4, #4 vzip.32 d6, d18 vzip.32 d7, d19 vmlsl.u8 q4, d1, d25 ;arithmetic operations for ii ; iteration in the same time vmlsl.u8 q4, d0, d24 vmlal.u8 q4, d2, d26 vmlal.u8 q4, d3, d27 vmlal.u8 q4, d4, d28 vmlal.u8 q4, d5, d29 vmlsl.u8 q4, d6, d30 vmlsl.u8 q4, d7, d31 mov r7, #0x4000 vdup.16 q10, r7 vhadd.s16 q4, q4, q10 vqrshrun.s16 d8, q4, #6 vld1.u32 {d10[0]}, [r1] vld1.u32 {d10[1]}, [r6] vrhadd.u8 d8, d8, d10 vst1.32 {d8[0]},[r1]! ;store the i iteration result which ; is in upper part of the register vst1.32 {d8[1]},[r6]! ;store the ii iteration result which ; is in lower part of the register subs r5, r5, #4 ;decrement the wd by 4 bgt inner_loop_4 end_inner_loop_4 subs r14, r14, #2 ;decrement the ht by 4 add r12, r12, r9 ;increment the input pointer ; 2*src_strd-wd add r1, r1, r8 ;increment the output pointer ; 2*dst_strd-wd bgt outer_loop_4 end_func vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_avg_horiz_filter_type2_neon.asm000066400000000000000000000465501357355204000264070ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r3 => dst_stride ; r4 => filter_x0 ; r8 => ht ; r10 => wd EXPORT |vpx_convolve8_avg_horiz_filter_type2_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_avg_horiz_filter_type2_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 start_loop_count ldr r4, [sp, #104] ;loads pi1_coeff ldr r8, [sp, #108] ;loads x0_q4 add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] ldr r8, [sp, #128] ;loads ht ldr r10, [sp, #124] ;loads wd vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) mov r11, #1 subs r14, r8, #0 ;checks for ht == 0 vabs.s8 d2, d0 ;vabs_s8(coeff) vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0) sub r12, r0, #3 ;pu1_src - 3 vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1) add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2) rsb r9, r10, r2, lsl #1 ;2*src_strd - wd vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3) rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4) vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5) vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6) vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7) mov r7, r1 cmp r10, #4 ble outer_loop_4 cmp r10, #24 moveq r10, #16 addeq r8, #8 addeq r9, #8 cmp r10, #16 bge outer_loop_16 cmp r10, #12 addeq r8, #4 addeq r9, #4 b outer_loop_8 outer_loop8_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 mov r14, #32 add r1, #16 add r12, #16 mov r10, #8 add r8, #8 add r9, #8 outer_loop_8 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_8 inner_loop_8 mov r7, #0xc000 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {d1}, [r12], r11 vdup.16 q5, r7 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 mov r7, #0x4000 vld1.u32 {d4}, [r12], r11 vmlal.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {d5}, [r12], r11 vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d6}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {d7}, [r12], r11 vmlsl.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d13}, [r4], r11 vmlsl.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u32 {d14}, [r4], r11 vmlal.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vld1.u32 {d15}, [r4], r11 vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd vdup.16 q11, r7 vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d17}, [r4], r11 vmlsl.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vhadd.s16 q4, q4, q11 vld1.u32 {d18}, [r4], r11 vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd vmlsl.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u8 {d6}, [r1] vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow ; result 1 vmlal.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u8 {d7}, [r6] vrhadd.u8 d20, d20, d6 vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlal.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vst1.8 {d20}, [r1]! ;store the result pu1_dst vhadd.s16 q5, q5, q11 subs r5, r5, #8 ;decrement the wd loop vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow ; result 2 vrhadd.u8 d8, d8, d7 vst1.8 {d8}, [r6]! ;store the result pu1_dst cmp r5, #4 bgt inner_loop_8 end_inner_loop_8 subs r14, r14, #2 ;decrement the ht loop add r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd add r1, r1, r8 ;increment the dst pointer by ; 2*dst_strd-wd bgt outer_loop_8 ldr r10, [sp, #120] ;loads wd cmp r10, #12 beq outer_loop4_residual end_loops b end_func outer_loop_16 str r0, [sp, #-4]! str r7, [sp, #-4]! add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd and r0, r12, #31 mov r7, #0xc000 sub r5, r10, #0 ;checks wd pld [r4, r2, lsl #1] pld [r12, r2, lsl #1] vld1.u32 {q0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {q1}, [r12], r11 vld1.u32 {q2}, [r12], r11 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q7}, [r12], r11 vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vdup.16 q10, r7 vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); inner_loop_16 vmlsl.u8 q10, d1, d24 vdup.16 q5, r7 vmlal.u8 q10, d3, d25 mov r7, #0x4000 vdup.16 q11, r7 vmlsl.u8 q10, d5, d26 vld1.u32 {q0}, [r4], r11 ;vector load pu1_src vhadd.s16 q4, q4, q11 vld1.u32 {q1}, [r4], r11 vmlal.u8 q10, d7, d27 add r12, #8 subs r5, r5, #16 vmlal.u8 q10, d13, d28 vld1.u32 {q2}, [r4], r11 vmlsl.u8 q10, d15, d29 vld1.u32 {q3}, [r4], r11 vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow ; result 1 vmlal.u8 q10, d17, d30 vld1.u32 {q6}, [r4], r11 vmlsl.u8 q10, d19, d31 vld1.u32 {q7}, [r4], r11 add r7, r1, #8 vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlal.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r4], r11 vhadd.s16 q10, q10, q11 vld1.u32 {q9}, [r4], r11 vld1.u8 {d0}, [r1] vmlsl.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u8 {d2}, [r7] vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); add r4, #8 mov r7, #0xc000 vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlsl.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vqrshrun.s16 d9, q10, #6 vdup.16 q11, r7 vmlal.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); mov r7, #0x4000 vrhadd.u8 d8, d8, d0 vrhadd.u8 d9, d9, d2 vmlsl.u8 q11, d1, d24 vmlal.u8 q11, d3, d25 vdup.16 q10, r7 vmlsl.u8 q11, d5, d26 pld [r12, r2, lsl #2] pld [r4, r2, lsl #2] addeq r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd addeq r4, r12, r2 ;pu1_src + src_strd vmlal.u8 q11, d7, d27 vmlal.u8 q11, d13, d28 vst1.8 {q4}, [r1]! ;store the result pu1_dst subeq r14, r14, #2 vhadd.s16 q5, q5, q10 vmlsl.u8 q11, d15, d29 addeq r1, r1, r8 vmlal.u8 q11, d17, d30 cmp r14, #0 vmlsl.u8 q11, d19, d31 vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow ; result 2 beq epilog_16 vld1.u32 {q0}, [r12], r11 ;vector load pu1_src mov r7, #0xc000 cmp r5, #0 vld1.u32 {q1}, [r12], r11 vhadd.s16 q11, q11, q10 vld1.u32 {q2}, [r12], r11 vdup.16 q4, r7 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vdup.16 q10, r7 vld1.u32 {q3}, [r12], r11 add r7, r6, #8 moveq r5, r10 vld1.u8 {d0}, [r6] vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u8 {d2}, [r7] vqrshrun.s16 d11, q11, #6 vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q6}, [r12], r11 vrhadd.u8 d10, d10, d0 vld1.u32 {q7}, [r12], r11 vrhadd.u8 d11, d11, d2 vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); mov r7, #0xc000 vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vst1.8 {q5}, [r6]! ;store the result pu1_dst vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); addeq r6, r1, r3 ;pu1_dst + dst_strd b inner_loop_16 epilog_16 mov r7, #0x4000 ldr r0, [sp], #4 ldr r10, [sp, #120] vdup.16 q10, r7 vhadd.s16 q11, q11, q10 vqrshrun.s16 d11, q11, #6 add r7, r6, #8 vld1.u8 {d20}, [r6] vld1.u8 {d21}, [r7] vrhadd.u8 d10, d10, d20 vrhadd.u8 d11, d11, d21 vst1.8 {q5}, [r6]! ;store the result pu1_dst ldr r7, [sp], #4 cmp r10, #24 beq outer_loop8_residual end_loops1 b end_func outer_loop4_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 add r1, #8 mov r10, #4 add r12, #8 mov r14, #16 add r8, #4 add r9, #4 outer_loop_4 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_4 inner_loop_4 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vld1.u32 {d1}, [r12], r11 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 vld1.u32 {d4}, [r12], r11 vld1.u32 {d5}, [r12], r11 vld1.u32 {d6}, [r12], r11 vld1.u32 {d7}, [r12], r11 sub r12, r12, #4 vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vld1.u32 {d13}, [r4], r11 vzip.32 d0, d12 ;vector zip the i iteration and ii ; interation in single register vld1.u32 {d14}, [r4], r11 vzip.32 d1, d13 vld1.u32 {d15}, [r4], r11 vzip.32 d2, d14 vld1.u32 {d16}, [r4], r11 vzip.32 d3, d15 vld1.u32 {d17}, [r4], r11 vzip.32 d4, d16 vld1.u32 {d18}, [r4], r11 vzip.32 d5, d17 vld1.u32 {d19}, [r4], r11 mov r7, #0xc000 vdup.16 q4, r7 sub r4, r4, #4 vzip.32 d6, d18 vzip.32 d7, d19 vmlal.u8 q4, d1, d25 ;arithmetic operations for ii ; iteration in the same time vmlsl.u8 q4, d0, d24 vmlsl.u8 q4, d2, d26 vmlal.u8 q4, d3, d27 vmlal.u8 q4, d4, d28 vmlsl.u8 q4, d5, d29 vmlal.u8 q4, d6, d30 vmlsl.u8 q4, d7, d31 mov r7, #0x4000 vdup.16 q10, r7 vhadd.s16 q4, q4, q10 vqrshrun.s16 d8, q4, #6 vld1.u32 {d10[0]}, [r1] vld1.u32 {d10[1]}, [r6] vrhadd.u8 d8, d8, d10 vst1.32 {d8[0]},[r1]! ;store the i iteration result which ; is in upper part of the register vst1.32 {d8[1]},[r6]! ;store the ii iteration result which ; is in lower part of the register subs r5, r5, #4 ;decrement the wd by 4 bgt inner_loop_4 end_inner_loop_4 subs r14, r14, #2 ;decrement the ht by 4 add r12, r12, r9 ;increment the input pointer ; 2*src_strd-wd add r1, r1, r8 ;increment the output pointer ; 2*dst_strd-wd bgt outer_loop_4 end_func vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type1_neon.asm000066400000000000000000000625321357355204000262310ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r6 => dst_stride ; r12 => filter_y0 ; r5 => ht ; r3 => wd EXPORT |vpx_convolve8_avg_vert_filter_type1_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_avg_vert_filter_type1_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 vmov.i16 q15, #0x4000 mov r11, #0xc000 ldr r12, [sp, #104] ;load filter ldr r6, [sp, #116] ;load y0_q4 add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] mov r6, r3 ldr r5, [sp, #124] ;load wd vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff vabs.s8 d0, d0 ;vabs_s8(coeff) add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff ldr r3, [sp, #128] ;load ht subs r7, r3, #0 ;r3->ht vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0); cmp r5, #8 vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1); vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2); vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3); vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4); vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5); vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6); vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7); blt core_loop_wd_4 ;core loop wd 4 jump str r0, [sp, #-4]! str r1, [sp, #-4]! bic r4, r5, #7 ;r5 ->wd rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r4, r2, lsl #2 ;r2->src_strd mov r3, r5, lsr #3 ;divide by 8 mul r7, r3 ;multiply height by width sub r7, #4 ;subtract by one for epilog prolog and r10, r0, #31 add r3, r0, r2 ;pu1_src_tmp += src_strd; vdup.16 q4, r11 vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); subs r4, r4, #8 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vdup.16 q5, r11 vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); pld [r3] vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 pld [r3, r2] pld [r3, r2, lsl #1] vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r3, r3, r2 vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); pld [r3, r2, lsl #1] vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r1] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d3, d23 vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d4, d24 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d5, d25 vmlal.u8 q6, d6, d26 add r14, r1, r6 vmlal.u8 q6, d7, d27 vmlsl.u8 q6, d16, d28 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); addle r1, r1, r9 vmlsl.u8 q7, d4, d23 subs r7, r7, #4 vmlsl.u8 q7, d3, d22 vmlal.u8 q7, d5, d24 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d6, d25 vrhadd.u8 d10, d10, d20 vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 blt epilog_end ;jumps to epilog_end beq epilog ;jumps to epilog main_loop_8 subs r4, r4, #8 vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d20}, [r14] vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); addle r0, r0, r8 bicle r4, r5, #7 ;r5 ->wd vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vrhadd.u8 d12, d12, d20 vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vld1.u8 {d20}, [r14] vqrshrun.s16 d14, q7, #6 add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vrhadd.u8 d14, d14, d20 vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vst1.8 {d14}, [r14], r6 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r14, r1, #0 vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r1, r1, #8 vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); addle r1, r1, r9 vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r14] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vmlsl.u8 q6, d3, d23 add r10, r3, r2, lsl #3 ; 10*strd - 8+2 vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 add r10, r10, r2 ; 11*strd vmlal.u8 q6, d4, d24 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); pld [r10] ;11+ 0 vmlal.u8 q6, d7, d27 pld [r10, r2] ;11+ 1*strd pld [r10, r2, lsl #1] ;11+ 2*strd vmlsl.u8 q6, d16, d28 add r10, r10, r2 ;12*strd vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); pld [r10, r2, lsl #1] ;11+ 3*strd vmlsl.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vrhadd.u8 d10, d10, d20 subs r7, r7, #4 vmlal.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vqrshrun.s16 d12, q6, #6 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); bgt main_loop_8 ;jumps to main_loop_8 epilog vld1.u8 {d20}, [r14] vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vrhadd.u8 d12, d12, d20 vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vst1.8 {d12}, [r14], r6 vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vld1.u8 {d20}, [r14] vqrshrun.s16 d14, q7, #6 vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vrhadd.u8 d14, d14, d20 vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vst1.8 {d14}, [r14], r6 vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r1] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d3, d23 vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 vmlal.u8 q6, d4, d24 vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vmlal.u8 q6, d7, d27 add r14, r1, r6 vmlsl.u8 q6, d16, d28 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vrhadd.u8 d10, d10, d20 vmlal.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vhadd.s16 q6, q6, q15 vmlal.u8 q7, d7, d26 vmlal.u8 q7, d16, d27 vmlsl.u8 q7, d17, d28 vmlsl.u8 q7, d18, d29 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 epilog_end vld1.u8 {d20}, [r14] vrhadd.u8 d12, d12, d20 vst1.8 {d12}, [r14], r6 vhadd.s16 q7, q7, q15 vqrshrun.s16 d14, q7, #6 vld1.u8 {d20}, [r14] vrhadd.u8 d14, d14, d20 vst1.8 {d14}, [r14], r6 end_loops tst r5, #7 ldr r1, [sp], #4 ldr r0, [sp], #4 vpopeq {d8 - d15} ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp mov r5, #4 add r0, r0, #8 add r1, r1, #8 mov r7, #16 core_loop_wd_4 rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r5, r2, lsl #2 ;r2->src_strd vmov.i8 d4, #0 outer_loop_wd_4 subs r12, r5, #0 ble end_inner_loop_wd_4 ;outer loop jump inner_loop_wd_4 add r3, r0, r2 vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); subs r12, r12, #4 vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 0); vdup.16 q0, r11 vmlsl.u8 q0, d5, d23 ;mul_res1 = ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); add r0, r0, #4 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlal.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); vdup.16 q4, r11 vmlsl.u8 q4, d7, d23 vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, ; 1); vmull.u8 q1, d7, d25 ;mul_res2 = ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); vmlsl.u8 q4, d6, d22 vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vmlal.u8 q4, d4, d24 vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vmlal.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); vmlal.u8 q4, d5, d25 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vmlal.u8 q4, d6, d26 vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); vdup.u32 d4, d7[1] vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, ; mul_res2); vmlal.u8 q4, d7, d27 vld1.u32 {d4[1]},[r3], r2 vmlsl.u8 q4, d4, d28 vdup.u32 d5, d4[1] vhadd.s16 q0, q0, q15 vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u32 {d5[1]},[r3] add r3, r1, r6 vld1.u32 {d20[0]}, [r1] vld1.u32 {d20[1]}, [r3] vrhadd.u8 d0, d0, d20 vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, ; vreinterpret_u32_u8(sto_res), 0); vmlsl.u8 q4, d5, d29 vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); vhadd.s16 q4, q4, q15 vqrshrun.s16 d8, q4, #6 mov r4, r3 vld1.u32 {d20[0]}, [r4], r6 vld1.u32 {d20[1]}, [r4] vrhadd.u8 d8, d8, d20 vst1.32 {d8[0]},[r3], r6 add r1, r1, #4 vst1.32 {d8[1]},[r3] bgt inner_loop_wd_4 end_inner_loop_wd_4 subs r7, r7, #4 add r1, r1, r9 add r0, r0, r8 bgt outer_loop_wd_4 vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_avg_vert_filter_type2_neon.asm000066400000000000000000000625331357355204000262330ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r6 => dst_stride ; r12 => filter_y0 ; r5 => ht ; r3 => wd EXPORT |vpx_convolve8_avg_vert_filter_type2_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_avg_vert_filter_type2_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 vmov.i16 q15, #0x4000 mov r11, #0xc000 ldr r12, [sp, #104] ;load filter ldr r6, [sp, #116] ;load y0_q4 add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] mov r6, r3 ldr r5, [sp, #124] ;load wd vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff vabs.s8 d0, d0 ;vabs_s8(coeff) add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff ldr r3, [sp, #128] ;load ht subs r7, r3, #0 ;r3->ht vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0); cmp r5, #8 vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1); vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2); vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3); vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4); vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5); vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6); vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7); blt core_loop_wd_4 ;core loop wd 4 jump str r0, [sp, #-4]! str r1, [sp, #-4]! bic r4, r5, #7 ;r5 ->wd rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r4, r2, lsl #2 ;r2->src_strd mov r3, r5, lsr #3 ;divide by 8 mul r7, r3 ;multiply height by width sub r7, #4 ;subtract by one for epilog prolog and r10, r0, #31 add r3, r0, r2 ;pu1_src_tmp += src_strd; vdup.16 q4, r11 vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); subs r4, r4, #8 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vdup.16 q5, r11 vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); pld [r3] vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 pld [r3, r2] pld [r3, r2, lsl #1] vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r3, r3, r2 vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); pld [r3, r2, lsl #1] vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r1] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d3, d23 vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d4, d24 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d5, d25 vmlal.u8 q6, d6, d26 add r14, r1, r6 vmlsl.u8 q6, d7, d27 vmlal.u8 q6, d16, d28 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); addle r1, r1, r9 vmlal.u8 q7, d4, d23 subs r7, r7, #4 vmlsl.u8 q7, d3, d22 vmlsl.u8 q7, d5, d24 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d6, d25 vrhadd.u8 d10, d10, d20 vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 blt epilog_end ;jumps to epilog_end beq epilog ;jumps to epilog main_loop_8 subs r4, r4, #8 vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d20}, [r14] vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); addle r0, r0, r8 bicle r4, r5, #7 ;r5 ->wd vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vrhadd.u8 d12, d12, d20 vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vld1.u8 {d20}, [r14] vqrshrun.s16 d14, q7, #6 add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vrhadd.u8 d14, d14, d20 vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vst1.8 {d14}, [r14], r6 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r14, r1, #0 vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r1, r1, #8 vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); addle r1, r1, r9 vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r14] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vmlal.u8 q6, d3, d23 add r10, r3, r2, lsl #3 ; 10*strd - 8+2 vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 add r10, r10, r2 ; 11*strd vmlsl.u8 q6, d4, d24 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); pld [r10] ;11+ 0 vmlsl.u8 q6, d7, d27 pld [r10, r2] ;11+ 1*strd pld [r10, r2, lsl #1] ;11+ 2*strd vmlal.u8 q6, d16, d28 add r10, r10, r2 ;12*strd vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); pld [r10, r2, lsl #1] ;11+ 3*strd vmlal.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vrhadd.u8 d10, d10, d20 subs r7, r7, #4 vmlsl.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vqrshrun.s16 d12, q6, #6 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); bgt main_loop_8 ;jumps to main_loop_8 epilog vld1.u8 {d20}, [r14] vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vrhadd.u8 d12, d12, d20 vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vst1.8 {d12}, [r14], r6 vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vld1.u8 {d20}, [r14] vqrshrun.s16 d14, q7, #6 vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vrhadd.u8 d14, d14, d20 vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vst1.8 {d14}, [r14], r6 vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vld1.u8 {d20}, [r1] vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d3, d23 vmlsl.u8 q6, d2, d22 vrhadd.u8 d8, d8, d20 vmlsl.u8 q6, d4, d24 vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vmlsl.u8 q6, d7, d27 add r14, r1, r6 vmlal.u8 q6, d16, d28 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vmlsl.u8 q6, d17, d29 vld1.u8 {d20}, [r14] vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vrhadd.u8 d10, d10, d20 vmlsl.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vhadd.s16 q6, q6, q15 vmlal.u8 q7, d7, d26 vmlsl.u8 q7, d16, d27 vmlal.u8 q7, d17, d28 vmlsl.u8 q7, d18, d29 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 epilog_end vld1.u8 {d20}, [r14] vrhadd.u8 d12, d12, d20 vst1.8 {d12}, [r14], r6 vhadd.s16 q7, q7, q15 vqrshrun.s16 d14, q7, #6 vld1.u8 {d20}, [r14] vrhadd.u8 d14, d14, d20 vst1.8 {d14}, [r14], r6 end_loops tst r5, #7 ldr r1, [sp], #4 ldr r0, [sp], #4 vpopeq {d8 - d15} ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp mov r5, #4 add r0, r0, #8 add r1, r1, #8 mov r7, #16 core_loop_wd_4 rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r5, r2, lsl #2 ;r2->src_strd vmov.i8 d4, #0 outer_loop_wd_4 subs r12, r5, #0 ble end_inner_loop_wd_4 ;outer loop jump inner_loop_wd_4 add r3, r0, r2 vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); subs r12, r12, #4 vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 0); vdup.16 q0, r11 vmlal.u8 q0, d5, d23 ;mul_res1 = ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); add r0, r0, #4 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); vdup.16 q4, r11 vmlal.u8 q4, d7, d23 vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, ; 1); vmull.u8 q1, d7, d25 ;mul_res2 = ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); vmlsl.u8 q4, d6, d22 vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vmlsl.u8 q4, d4, d24 vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vmlsl.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); vmlal.u8 q4, d5, d25 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlal.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vmlal.u8 q4, d6, d26 vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); vdup.u32 d4, d7[1] vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, ; mul_res2); vmlsl.u8 q4, d7, d27 vld1.u32 {d4[1]},[r3], r2 vmlal.u8 q4, d4, d28 vdup.u32 d5, d4[1] vhadd.s16 q0, q0, q15 vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u32 {d5[1]},[r3] add r3, r1, r6 vld1.u32 {d20[0]}, [r1] vld1.u32 {d20[1]}, [r3] vrhadd.u8 d0, d0, d20 vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, ; vreinterpret_u32_u8(sto_res), 0); vmlsl.u8 q4, d5, d29 vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); vhadd.s16 q4, q4, q15 vqrshrun.s16 d8, q4, #6 mov r4, r3 vld1.u32 {d20[0]}, [r4], r6 vld1.u32 {d20[1]}, [r4] vrhadd.u8 d8, d8, d20 vst1.32 {d8[0]},[r3], r6 add r1, r1, #4 vst1.32 {d8[1]},[r3] bgt inner_loop_wd_4 end_inner_loop_wd_4 subs r7, r7, #4 add r1, r1, r9 add r0, r0, r8 bgt outer_loop_wd_4 vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_horiz_filter_type1_neon.asm000066400000000000000000000450121357355204000255410ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r3 => dst_stride ; r4 => filter_x0 ; r8 => ht ; r10 => wd EXPORT |vpx_convolve8_horiz_filter_type1_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_horiz_filter_type1_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 start_loop_count ldr r4, [sp, #104] ;loads pi1_coeff ldr r8, [sp, #108] ;loads x0_q4 add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] ldr r8, [sp, #128] ;loads ht ldr r10, [sp, #124] ;loads wd vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) mov r11, #1 subs r14, r8, #0 ;checks for ht == 0 vabs.s8 d2, d0 ;vabs_s8(coeff) vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0) sub r12, r0, #3 ;pu1_src - 3 vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1) add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2) rsb r9, r10, r2, lsl #1 ;2*src_strd - wd vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3) rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4) vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5) vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6) vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7) mov r7, r1 cmp r10, #4 ble outer_loop_4 cmp r10, #24 moveq r10, #16 addeq r8, #8 addeq r9, #8 cmp r10, #16 bge outer_loop_16 cmp r10, #12 addeq r8, #4 addeq r9, #4 b outer_loop_8 outer_loop8_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 mov r14, #32 add r1, #16 add r12, #16 mov r10, #8 add r8, #8 add r9, #8 outer_loop_8 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_8 inner_loop_8 mov r7, #0xc000 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {d1}, [r12], r11 vdup.16 q5, r7 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 mov r7, #0x4000 vld1.u32 {d4}, [r12], r11 vmlsl.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {d5}, [r12], r11 vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d6}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {d7}, [r12], r11 vmlal.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d13}, [r4], r11 vmlal.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u32 {d14}, [r4], r11 vmlsl.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vld1.u32 {d15}, [r4], r11 vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd vdup.16 q11, r7 vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d17}, [r4], r11 vmlal.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vhadd.s16 q4, q4, q11 vld1.u32 {d18}, [r4], r11 vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vmlsl.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow ; result 1 vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlsl.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vst1.8 {d20}, [r1]! ;store the result pu1_dst vhadd.s16 q5, q5, q11 subs r5, r5, #8 ;decrement the wd loop vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow ; result 2 vst1.8 {d8}, [r6]! ;store the result pu1_dst cmp r5, #4 bgt inner_loop_8 end_inner_loop_8 subs r14, r14, #2 ;decrement the ht loop add r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd add r1, r1, r8 ;increment the dst pointer by ; 2*dst_strd-wd bgt outer_loop_8 ldr r10, [sp, #120] ;loads wd cmp r10, #12 beq outer_loop4_residual end_loops b end_func outer_loop_16 str r0, [sp, #-4]! str r7, [sp, #-4]! add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd and r0, r12, #31 mov r7, #0xc000 sub r5, r10, #0 ;checks wd pld [r4, r2, lsl #1] pld [r12, r2, lsl #1] vld1.u32 {q0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {q1}, [r12], r11 vld1.u32 {q2}, [r12], r11 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q7}, [r12], r11 vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vdup.16 q10, r7 vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); inner_loop_16 vmlsl.u8 q10, d1, d24 vdup.16 q5, r7 vmlsl.u8 q10, d3, d25 mov r7, #0x4000 vdup.16 q11, r7 vmlal.u8 q10, d5, d26 vld1.u32 {q0}, [r4], r11 ;vector load pu1_src vhadd.s16 q4, q4, q11 vld1.u32 {q1}, [r4], r11 vmlal.u8 q10, d7, d27 add r12, #8 subs r5, r5, #16 vmlal.u8 q10, d13, d28 vld1.u32 {q2}, [r4], r11 vmlal.u8 q10, d15, d29 vld1.u32 {q3}, [r4], r11 vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow ; result 1 vmlsl.u8 q10, d17, d30 vld1.u32 {q6}, [r4], r11 vmlsl.u8 q10, d19, d31 vld1.u32 {q7}, [r4], r11 vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlsl.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r4], r11 vhadd.s16 q10, q10, q11 vld1.u32 {q9}, [r4], r11 vmlal.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); add r4, #8 mov r7, #0xc000 vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlal.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vqrshrun.s16 d9, q10, #6 vdup.16 q11, r7 vmlsl.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); mov r7, #0x4000 vmlsl.u8 q11, d1, d24 vst1.8 {q4}, [r1]! ;store the result pu1_dst vmlsl.u8 q11, d3, d25 vdup.16 q10, r7 vmlal.u8 q11, d5, d26 pld [r12, r2, lsl #2] pld [r4, r2, lsl #2] addeq r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd addeq r4, r12, r2 ;pu1_src + src_strd vmlal.u8 q11, d7, d27 addeq r1, r1, r8 subeq r14, r14, #2 vmlal.u8 q11, d13, d28 vhadd.s16 q5, q5, q10 vmlal.u8 q11, d15, d29 vmlsl.u8 q11, d17, d30 cmp r14, #0 vmlsl.u8 q11, d19, d31 vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow ; result 2 beq epilog_16 vld1.u32 {q0}, [r12], r11 ;vector load pu1_src mov r7, #0xc000 cmp r5, #0 vld1.u32 {q1}, [r12], r11 vhadd.s16 q11, q11, q10 vld1.u32 {q2}, [r12], r11 vdup.16 q4, r7 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vld1.u32 {q7}, [r12], r11 vmlsl.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q9}, [r12], r11 vqrshrun.s16 d11, q11, #6 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); moveq r5, r10 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vdup.16 q10, r7 vmlal.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vst1.8 {q5}, [r6]! ;store the result pu1_dst vmlsl.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); addeq r6, r1, r3 ;pu1_dst + dst_strd b inner_loop_16 epilog_16 mov r7, #0x4000 ldr r0, [sp], #4 ldr r10, [sp, #120] vdup.16 q10, r7 vhadd.s16 q11, q11, q10 vqrshrun.s16 d11, q11, #6 vst1.8 {q5}, [r6]! ;store the result pu1_dst ldr r7, [sp], #4 cmp r10, #24 beq outer_loop8_residual end_loops1 b end_func outer_loop4_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 add r1, #8 mov r10, #4 add r12, #8 mov r14, #16 add r8, #4 add r9, #4 outer_loop_4 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_4 inner_loop_4 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vld1.u32 {d1}, [r12], r11 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 vld1.u32 {d4}, [r12], r11 vld1.u32 {d5}, [r12], r11 vld1.u32 {d6}, [r12], r11 vld1.u32 {d7}, [r12], r11 sub r12, r12, #4 vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vld1.u32 {d13}, [r4], r11 vzip.32 d0, d12 ;vector zip the i iteration and ii ; interation in single register vld1.u32 {d14}, [r4], r11 vzip.32 d1, d13 vld1.u32 {d15}, [r4], r11 vzip.32 d2, d14 vld1.u32 {d16}, [r4], r11 vzip.32 d3, d15 vld1.u32 {d17}, [r4], r11 vzip.32 d4, d16 vld1.u32 {d18}, [r4], r11 vzip.32 d5, d17 vld1.u32 {d19}, [r4], r11 mov r7, #0xc000 vdup.16 q4, r7 sub r4, r4, #4 vzip.32 d6, d18 vzip.32 d7, d19 vmlsl.u8 q4, d1, d25 ;arithmetic operations for ii ; iteration in the same time vmlsl.u8 q4, d0, d24 vmlal.u8 q4, d2, d26 vmlal.u8 q4, d3, d27 vmlal.u8 q4, d4, d28 vmlal.u8 q4, d5, d29 vmlsl.u8 q4, d6, d30 vmlsl.u8 q4, d7, d31 mov r7, #0x4000 vdup.16 q10, r7 vhadd.s16 q4, q4, q10 vqrshrun.s16 d8, q4, #6 vst1.32 {d8[0]},[r1]! ;store the i iteration result which ; is in upper part of the register vst1.32 {d8[1]},[r6]! ;store the ii iteration result which ; is in lower part of the register subs r5, r5, #4 ;decrement the wd by 4 bgt inner_loop_4 end_inner_loop_4 subs r14, r14, #2 ;decrement the ht by 4 add r12, r12, r9 ;increment the input pointer ; 2*src_strd-wd add r1, r1, r8 ;increment the output pointer ; 2*dst_strd-wd bgt outer_loop_4 end_func vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_horiz_filter_type2_neon.asm000066400000000000000000000450121357355204000255420ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r3 => dst_stride ; r4 => filter_x0 ; r8 => ht ; r10 => wd EXPORT |vpx_convolve8_horiz_filter_type2_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_horiz_filter_type2_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 start_loop_count ldr r4, [sp, #104] ;loads pi1_coeff ldr r8, [sp, #108] ;loads x0_q4 add r4, r4, r8, lsl #4 ;r4 = filter[x0_q4] ldr r8, [sp, #128] ;loads ht ldr r10, [sp, #124] ;loads wd vld2.8 {d0, d1}, [r4] ;coeff = vld1_s8(pi1_coeff) mov r11, #1 subs r14, r8, #0 ;checks for ht == 0 vabs.s8 d2, d0 ;vabs_s8(coeff) vdup.8 d24, d2[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0) sub r12, r0, #3 ;pu1_src - 3 vdup.8 d25, d2[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1) add r4, r12, r2 ;pu1_src_tmp2_8 = pu1_src + src_strd vdup.8 d26, d2[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2) rsb r9, r10, r2, lsl #1 ;2*src_strd - wd vdup.8 d27, d2[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3) rsb r8, r10, r3, lsl #1 ;2*dst_strd - wd vdup.8 d28, d2[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4) vdup.8 d29, d2[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5) vdup.8 d30, d2[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6) vdup.8 d31, d2[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7) mov r7, r1 cmp r10, #4 ble outer_loop_4 cmp r10, #24 moveq r10, #16 addeq r8, #8 addeq r9, #8 cmp r10, #16 bge outer_loop_16 cmp r10, #12 addeq r8, #4 addeq r9, #4 b outer_loop_8 outer_loop8_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 mov r14, #32 add r1, #16 add r12, #16 mov r10, #8 add r8, #8 add r9, #8 outer_loop_8 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_8 inner_loop_8 mov r7, #0xc000 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {d1}, [r12], r11 vdup.16 q5, r7 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 mov r7, #0x4000 vld1.u32 {d4}, [r12], r11 vmlal.u8 q4, d1, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {d5}, [r12], r11 vmlal.u8 q4, d3, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d6}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {d7}, [r12], r11 vmlsl.u8 q4, d2, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vmlal.u8 q4, d4, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d13}, [r4], r11 vmlsl.u8 q4, d5, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vld1.u32 {d14}, [r4], r11 vmlal.u8 q4, d6, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vld1.u32 {d15}, [r4], r11 vmlsl.u8 q4, d7, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vld1.u32 {d16}, [r4], r11 ;vector load pu1_src + src_strd vdup.16 q11, r7 vmlal.u8 q5, d15, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {d17}, [r4], r11 vmlsl.u8 q5, d14, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vhadd.s16 q4, q4, q11 vld1.u32 {d18}, [r4], r11 vmlal.u8 q5, d16, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vld1.u32 {d19}, [r4], r11 ;vector load pu1_src + src_strd vmlsl.u8 q5, d17, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vmlal.u8 q5, d18, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d19, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); vqrshrun.s16 d20, q4, #6 ;right shift and saturating narrow ; result 1 vmlsl.u8 q5, d12, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlal.u8 q5, d13, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vst1.8 {d20}, [r1]! ;store the result pu1_dst vhadd.s16 q5, q5, q11 subs r5, r5, #8 ;decrement the wd loop vqrshrun.s16 d8, q5, #6 ;right shift and saturating narrow ; result 2 vst1.8 {d8}, [r6]! ;store the result pu1_dst cmp r5, #4 bgt inner_loop_8 end_inner_loop_8 subs r14, r14, #2 ;decrement the ht loop add r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd add r1, r1, r8 ;increment the dst pointer by ; 2*dst_strd-wd bgt outer_loop_8 ldr r10, [sp, #120] ;loads wd cmp r10, #12 beq outer_loop4_residual end_loops b end_func outer_loop_16 str r0, [sp, #-4]! str r7, [sp, #-4]! add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd and r0, r12, #31 mov r7, #0xc000 sub r5, r10, #0 ;checks wd pld [r4, r2, lsl #1] pld [r12, r2, lsl #1] vld1.u32 {q0}, [r12], r11 ;vector load pu1_src vdup.16 q4, r7 vld1.u32 {q1}, [r12], r11 vld1.u32 {q2}, [r12], r11 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q7}, [r12], r11 vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q8}, [r12], r11 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); vld1.u32 {q9}, [r12], r11 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vdup.16 q10, r7 vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); inner_loop_16 vmlsl.u8 q10, d1, d24 vdup.16 q5, r7 vmlal.u8 q10, d3, d25 mov r7, #0x4000 vdup.16 q11, r7 vmlsl.u8 q10, d5, d26 vld1.u32 {q0}, [r4], r11 ;vector load pu1_src vhadd.s16 q4, q4, q11 vld1.u32 {q1}, [r4], r11 vmlal.u8 q10, d7, d27 add r12, #8 subs r5, r5, #16 vmlal.u8 q10, d13, d28 vld1.u32 {q2}, [r4], r11 vmlsl.u8 q10, d15, d29 vld1.u32 {q3}, [r4], r11 vqrshrun.s16 d8, q4, #6 ;right shift and saturating narrow ; result 1 vmlal.u8 q10, d17, d30 vld1.u32 {q6}, [r4], r11 vmlsl.u8 q10, d19, d31 vld1.u32 {q7}, [r4], r11 vmlsl.u8 q5, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vmlal.u8 q5, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r4], r11 vhadd.s16 q10, q10, q11 vld1.u32 {q9}, [r4], r11 vmlsl.u8 q5, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vmlal.u8 q5, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); add r4, #8 mov r7, #0xc000 vmlal.u8 q5, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vmlsl.u8 q5, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vqrshrun.s16 d9, q10, #6 vdup.16 q11, r7 vmlal.u8 q5, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q5, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); mov r7, #0x4000 vmlsl.u8 q11, d1, d24 vst1.8 {q4}, [r1]! ;store the result pu1_dst vmlal.u8 q11, d3, d25 vdup.16 q10, r7 vmlsl.u8 q11, d5, d26 pld [r12, r2, lsl #2] pld [r4, r2, lsl #2] addeq r12, r12, r9 ;increment the src pointer by ; 2*src_strd-wd addeq r4, r12, r2 ;pu1_src + src_strd vmlal.u8 q11, d7, d27 addeq r1, r1, r8 subeq r14, r14, #2 vmlal.u8 q11, d13, d28 vhadd.s16 q5, q5, q10 vmlsl.u8 q11, d15, d29 vmlal.u8 q11, d17, d30 cmp r14, #0 vmlsl.u8 q11, d19, d31 vqrshrun.s16 d10, q5, #6 ;right shift and saturating narrow ; result 2 beq epilog_16 vld1.u32 {q0}, [r12], r11 ;vector load pu1_src mov r7, #0xc000 cmp r5, #0 vld1.u32 {q1}, [r12], r11 vhadd.s16 q11, q11, q10 vld1.u32 {q2}, [r12], r11 vdup.16 q4, r7 vld1.u32 {q3}, [r12], r11 vmlsl.u8 q4, d0, d24 ;mul_res = vmlsl_u8(src[0_0], ; coeffabs_0); vld1.u32 {q6}, [r12], r11 vld1.u32 {q7}, [r12], r11 vmlal.u8 q4, d2, d25 ;mul_res = vmlal_u8(src[0_1], ; coeffabs_1); vld1.u32 {q8}, [r12], r11 vmlsl.u8 q4, d4, d26 ;mul_res = vmlsl_u8(src[0_2], ; coeffabs_2); vld1.u32 {q9}, [r12], r11 vqrshrun.s16 d11, q11, #6 vmlal.u8 q4, d6, d27 ;mul_res = vmull_u8(src[0_3], ; coeffabs_3); moveq r5, r10 vmlal.u8 q4, d12, d28 ;mul_res = vmlal_u8(src[0_4], ; coeffabs_4); vdup.16 q10, r7 vmlsl.u8 q4, d14, d29 ;mul_res = vmlsl_u8(src[0_5], ; coeffabs_5); vst1.8 {q5}, [r6]! ;store the result pu1_dst vmlal.u8 q4, d16, d30 ;mul_res = vmlal_u8(src[0_6], ; coeffabs_6); vmlsl.u8 q4, d18, d31 ;mul_res = vmlsl_u8(src[0_7], ; coeffabs_7); addeq r6, r1, r3 ;pu1_dst + dst_strd b inner_loop_16 epilog_16 mov r7, #0x4000 ldr r0, [sp], #4 ldr r10, [sp, #120] vdup.16 q10, r7 vhadd.s16 q11, q11, q10 vqrshrun.s16 d11, q11, #6 vst1.8 {q5}, [r6]! ;store the result pu1_dst ldr r7, [sp], #4 cmp r10, #24 beq outer_loop8_residual end_loops1 b end_func outer_loop4_residual sub r12, r0, #3 ;pu1_src - 3 mov r1, r7 add r1, #8 mov r10, #4 add r12, #8 mov r14, #16 add r8, #4 add r9, #4 outer_loop_4 add r6, r1, r3 ;pu1_dst + dst_strd add r4, r12, r2 ;pu1_src + src_strd subs r5, r10, #0 ;checks wd ble end_inner_loop_4 inner_loop_4 vld1.u32 {d0}, [r12], r11 ;vector load pu1_src vld1.u32 {d1}, [r12], r11 vld1.u32 {d2}, [r12], r11 vld1.u32 {d3}, [r12], r11 vld1.u32 {d4}, [r12], r11 vld1.u32 {d5}, [r12], r11 vld1.u32 {d6}, [r12], r11 vld1.u32 {d7}, [r12], r11 sub r12, r12, #4 vld1.u32 {d12}, [r4], r11 ;vector load pu1_src + src_strd vld1.u32 {d13}, [r4], r11 vzip.32 d0, d12 ;vector zip the i iteration and ii ; interation in single register vld1.u32 {d14}, [r4], r11 vzip.32 d1, d13 vld1.u32 {d15}, [r4], r11 vzip.32 d2, d14 vld1.u32 {d16}, [r4], r11 vzip.32 d3, d15 vld1.u32 {d17}, [r4], r11 vzip.32 d4, d16 vld1.u32 {d18}, [r4], r11 vzip.32 d5, d17 vld1.u32 {d19}, [r4], r11 mov r7, #0xc000 vdup.16 q4, r7 sub r4, r4, #4 vzip.32 d6, d18 vzip.32 d7, d19 vmlal.u8 q4, d1, d25 ;arithmetic operations for ii ; iteration in the same time vmlsl.u8 q4, d0, d24 vmlsl.u8 q4, d2, d26 vmlal.u8 q4, d3, d27 vmlal.u8 q4, d4, d28 vmlsl.u8 q4, d5, d29 vmlal.u8 q4, d6, d30 vmlsl.u8 q4, d7, d31 mov r7, #0x4000 vdup.16 q10, r7 vhadd.s16 q4, q4, q10 vqrshrun.s16 d8, q4, #6 vst1.32 {d8[0]},[r1]! ;store the i iteration result which ; is in upper part of the register vst1.32 {d8[1]},[r6]! ;store the ii iteration result which ; is in lower part of the register subs r5, r5, #4 ;decrement the wd by 4 bgt inner_loop_4 end_inner_loop_4 subs r14, r14, #2 ;decrement the ht by 4 add r12, r12, r9 ;increment the input pointer ; 2*src_strd-wd add r1, r1, r8 ;increment the output pointer ; 2*dst_strd-wd bgt outer_loop_4 end_func vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_neon.c000066400000000000000000001103311357355204000212760ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/arm/vpx_convolve8_neon.h" #include "vpx_ports/mem.h" // Note: // 1. src is not always 32-bit aligned, so don't call vld1_lane_u32(src). // 2. After refactoring the shared code in kernel loops with inline functions, // the decoder speed dropped a lot when using gcc compiler. Therefore there is // no refactoring for those parts by now. // 3. For horizontal convolve, there is an alternative optimization that // convolves a single row in each loop. For each row, 8 sample banks with 4 or 8 // samples in each are read from memory: src, (src+1), (src+2), (src+3), // (src+4), (src+5), (src+6), (src+7), or prepared by vector extract // instructions. This optimization is much faster in speed unit test, but slowed // down the whole decoder by 5%. static INLINE void store_u8_8x8(uint8_t *s, const ptrdiff_t p, const uint8x8_t s0, const uint8x8_t s1, const uint8x8_t s2, const uint8x8_t s3, const uint8x8_t s4, const uint8x8_t s5, const uint8x8_t s6, const uint8x8_t s7) { vst1_u8(s, s0); s += p; vst1_u8(s, s1); s += p; vst1_u8(s, s2); s += p; vst1_u8(s, s3); s += p; vst1_u8(s, s4); s += p; vst1_u8(s, s5); s += p; vst1_u8(s, s6); s += p; vst1_u8(s, s7); } void vpx_convolve8_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16x8_t filters = vld1q_s16(filter[x0_q4]); uint8x8_t t0, t1, t2, t3; assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); assert(x_step_q4 == 16); (void)x_step_q4; (void)y0_q4; (void)y_step_q4; src -= 3; if (h == 4) { uint8x8_t d01, d23; int16x4_t filter3, filter4, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3; int16x8_t tt0, tt1, tt2, tt3; __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); filter3 = vdup_lane_s16(vget_low_s16(filters), 3); filter4 = vdup_lane_s16(vget_high_s16(filters), 0); load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u8_8x4(&t0, &t1, &t2, &t3); tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s0 = vget_low_s16(tt0); s1 = vget_low_s16(tt1); s2 = vget_low_s16(tt2); s3 = vget_low_s16(tt3); s4 = vget_high_s16(tt0); s5 = vget_high_s16(tt1); s6 = vget_high_s16(tt2); __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); src += 7; do { load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u8_8x4(&t0, &t1, &t2, &t3); tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s7 = vget_low_s16(tt0); s8 = vget_low_s16(tt1); s9 = vget_low_s16(tt2); s10 = vget_low_s16(tt3); d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), 7); d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), 7); transpose_u8_4x4(&d01, &d23); vst1_lane_u32((uint32_t *)(dst + 0 * dst_stride), vreinterpret_u32_u8(d01), 0); vst1_lane_u32((uint32_t *)(dst + 1 * dst_stride), vreinterpret_u32_u8(d23), 0); vst1_lane_u32((uint32_t *)(dst + 2 * dst_stride), vreinterpret_u32_u8(d01), 1); vst1_lane_u32((uint32_t *)(dst + 3 * dst_stride), vreinterpret_u32_u8(d23), 1); s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; src += 4; dst += 4; w -= 4; } while (w > 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); int width; const uint8_t *s; uint8x8_t t4, t5, t6, t7; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; if (w == 4) { do { load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); load_u8_8x8(src + 7, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); src += 8 * src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); transpose_u8_4x8(&t0, &t1, &t2, &t3, t4, t5, t6, t7); s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); transpose_u8_8x4(&t0, &t1, &t2, &t3); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t0), 1); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t1), 1); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t2), 1); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(t3), 1); dst += dst_stride; h -= 8; } while (h > 0); } else { uint8_t *d; int16x8_t s11, s12, s13, s14; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); width = w; s = src + 7; d = dst; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); do { load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); t4 = convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, filter3, filter4); t5 = convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, filter3, filter4); t6 = convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, filter3, filter4); t7 = convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, filter3, filter4); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); store_u8_8x8(d, dst_stride, t0, t1, t2, t3, t4, t5, t6, t7); s0 = s8; s1 = s9; s2 = s10; s3 = s11; s4 = s12; s5 = s13; s6 = s14; s += 8; d += 8; width -= 8; } while (width > 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; } while (h > 0); } } } void vpx_convolve8_avg_horiz_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16x8_t filters = vld1q_s16(filter[x0_q4]); uint8x8_t t0, t1, t2, t3; assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); assert(x_step_q4 == 16); (void)x_step_q4; (void)y0_q4; (void)y_step_q4; src -= 3; if (h == 4) { uint8x8_t d01, d23; int16x4_t filter3, filter4, s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3; int16x8_t tt0, tt1, tt2, tt3; uint32x4_t d0123 = vdupq_n_u32(0); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); filter3 = vdup_lane_s16(vget_low_s16(filters), 3); filter4 = vdup_lane_s16(vget_high_s16(filters), 0); load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u8_8x4(&t0, &t1, &t2, &t3); tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s0 = vget_low_s16(tt0); s1 = vget_low_s16(tt1); s2 = vget_low_s16(tt2); s3 = vget_low_s16(tt3); s4 = vget_high_s16(tt0); s5 = vget_high_s16(tt1); s6 = vget_high_s16(tt2); __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); src += 7; do { load_u8_8x4(src, src_stride, &t0, &t1, &t2, &t3); transpose_u8_8x4(&t0, &t1, &t2, &t3); tt0 = vreinterpretq_s16_u16(vmovl_u8(t0)); tt1 = vreinterpretq_s16_u16(vmovl_u8(t1)); tt2 = vreinterpretq_s16_u16(vmovl_u8(t2)); tt3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s7 = vget_low_s16(tt0); s8 = vget_low_s16(tt1); s9 = vget_low_s16(tt2); s10 = vget_low_s16(tt3); d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), 7); d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), 7); transpose_u8_4x4(&d01, &d23); d0123 = vld1q_lane_u32((uint32_t *)(dst + 0 * dst_stride), d0123, 0); d0123 = vld1q_lane_u32((uint32_t *)(dst + 1 * dst_stride), d0123, 2); d0123 = vld1q_lane_u32((uint32_t *)(dst + 2 * dst_stride), d0123, 1); d0123 = vld1q_lane_u32((uint32_t *)(dst + 3 * dst_stride), d0123, 3); d0123 = vreinterpretq_u32_u8( vrhaddq_u8(vreinterpretq_u8_u32(d0123), vcombine_u8(d01, d23))); vst1q_lane_u32((uint32_t *)(dst + 0 * dst_stride), d0123, 0); vst1q_lane_u32((uint32_t *)(dst + 1 * dst_stride), d0123, 2); vst1q_lane_u32((uint32_t *)(dst + 2 * dst_stride), d0123, 1); vst1q_lane_u32((uint32_t *)(dst + 3 * dst_stride), d0123, 3); s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; src += 4; dst += 4; w -= 4; } while (w > 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); int width; const uint8_t *s; uint8x8_t t4, t5, t6, t7; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; if (w == 4) { uint32x4_t d0415 = vdupq_n_u32(0); uint32x4_t d2637 = vdupq_n_u32(0); do { load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); load_u8_8x8(src + 7, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); src += 8 * src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); transpose_u8_4x8(&t0, &t1, &t2, &t3, t4, t5, t6, t7); s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); transpose_u8_8x4(&t0, &t1, &t2, &t3); d0415 = vld1q_lane_u32((uint32_t *)(dst + 0 * dst_stride), d0415, 0); d0415 = vld1q_lane_u32((uint32_t *)(dst + 1 * dst_stride), d0415, 2); d2637 = vld1q_lane_u32((uint32_t *)(dst + 2 * dst_stride), d2637, 0); d2637 = vld1q_lane_u32((uint32_t *)(dst + 3 * dst_stride), d2637, 2); d0415 = vld1q_lane_u32((uint32_t *)(dst + 4 * dst_stride), d0415, 1); d0415 = vld1q_lane_u32((uint32_t *)(dst + 5 * dst_stride), d0415, 3); d2637 = vld1q_lane_u32((uint32_t *)(dst + 6 * dst_stride), d2637, 1); d2637 = vld1q_lane_u32((uint32_t *)(dst + 7 * dst_stride), d2637, 3); d0415 = vreinterpretq_u32_u8( vrhaddq_u8(vreinterpretq_u8_u32(d0415), vcombine_u8(t0, t1))); d2637 = vreinterpretq_u32_u8( vrhaddq_u8(vreinterpretq_u8_u32(d2637), vcombine_u8(t2, t3))); vst1q_lane_u32((uint32_t *)dst, d0415, 0); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0415, 2); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d2637, 0); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d2637, 2); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0415, 1); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0415, 3); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d2637, 1); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d2637, 3); dst += dst_stride; h -= 8; } while (h > 0); } else { uint8_t *d; int16x8_t s11, s12, s13, s14; uint8x16_t d01, d23, d45, d67; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); __builtin_prefetch(src + 7 * src_stride); load_u8_8x8(src, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s0 = vreinterpretq_s16_u16(vmovl_u8(t0)); s1 = vreinterpretq_s16_u16(vmovl_u8(t1)); s2 = vreinterpretq_s16_u16(vmovl_u8(t2)); s3 = vreinterpretq_s16_u16(vmovl_u8(t3)); s4 = vreinterpretq_s16_u16(vmovl_u8(t4)); s5 = vreinterpretq_s16_u16(vmovl_u8(t5)); s6 = vreinterpretq_s16_u16(vmovl_u8(t6)); width = w; s = src + 7; d = dst; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(dst + 4 * dst_stride); __builtin_prefetch(dst + 5 * dst_stride); __builtin_prefetch(dst + 6 * dst_stride); __builtin_prefetch(dst + 7 * dst_stride); do { load_u8_8x8(s, src_stride, &t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); s7 = vreinterpretq_s16_u16(vmovl_u8(t0)); s8 = vreinterpretq_s16_u16(vmovl_u8(t1)); s9 = vreinterpretq_s16_u16(vmovl_u8(t2)); s10 = vreinterpretq_s16_u16(vmovl_u8(t3)); s11 = vreinterpretq_s16_u16(vmovl_u8(t4)); s12 = vreinterpretq_s16_u16(vmovl_u8(t5)); s13 = vreinterpretq_s16_u16(vmovl_u8(t6)); s14 = vreinterpretq_s16_u16(vmovl_u8(t7)); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); t4 = convolve8_8(s4, s5, s6, s7, s8, s9, s10, s11, filters, filter3, filter4); t5 = convolve8_8(s5, s6, s7, s8, s9, s10, s11, s12, filters, filter3, filter4); t6 = convolve8_8(s6, s7, s8, s9, s10, s11, s12, s13, filters, filter3, filter4); t7 = convolve8_8(s7, s8, s9, s10, s11, s12, s13, s14, filters, filter3, filter4); transpose_u8_8x8(&t0, &t1, &t2, &t3, &t4, &t5, &t6, &t7); d01 = vcombine_u8(vld1_u8(d + 0 * dst_stride), vld1_u8(d + 1 * dst_stride)); d23 = vcombine_u8(vld1_u8(d + 2 * dst_stride), vld1_u8(d + 3 * dst_stride)); d45 = vcombine_u8(vld1_u8(d + 4 * dst_stride), vld1_u8(d + 5 * dst_stride)); d67 = vcombine_u8(vld1_u8(d + 6 * dst_stride), vld1_u8(d + 7 * dst_stride)); d01 = vrhaddq_u8(d01, vcombine_u8(t0, t1)); d23 = vrhaddq_u8(d23, vcombine_u8(t2, t3)); d45 = vrhaddq_u8(d45, vcombine_u8(t4, t5)); d67 = vrhaddq_u8(d67, vcombine_u8(t6, t7)); store_u8_8x8(d, dst_stride, vget_low_u8(d01), vget_high_u8(d01), vget_low_u8(d23), vget_high_u8(d23), vget_low_u8(d45), vget_high_u8(d45), vget_low_u8(d67), vget_high_u8(d67)); s0 = s8; s1 = s9; s2 = s10; s3 = s11; s4 = s12; s5 = s13; s6 = s14; s += 8; d += 8; width -= 8; } while (width > 0); src += 8 * src_stride; dst += 8 * dst_stride; h -= 8; } while (h > 0); } } } void vpx_convolve8_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16x8_t filters = vld1q_s16(filter[y0_q4]); assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); assert(y_step_q4 == 16); (void)x0_q4; (void)x_step_q4; (void)y_step_q4; src -= 3 * src_stride; if (w == 4) { const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0); uint8x8_t d01, d23; int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3; s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s2 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s3 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s4 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s6 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; do { s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s10 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), 7); d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), 7); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d01), 1); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d23), 1); dst += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; h -= 4; } while (h > 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); int height; const uint8_t *s; uint8_t *d; uint8x8_t t0, t1, t2, t3; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); s = src; s0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s3 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s4 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s6 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; d = dst; height = h; do { s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; __builtin_prefetch(d + 0 * dst_stride); __builtin_prefetch(d + 1 * dst_stride); __builtin_prefetch(d + 2 * dst_stride); __builtin_prefetch(d + 3 * dst_stride); __builtin_prefetch(s + 0 * src_stride); __builtin_prefetch(s + 1 * src_stride); __builtin_prefetch(s + 2 * src_stride); __builtin_prefetch(s + 3 * src_stride); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); vst1_u8(d, t0); d += dst_stride; vst1_u8(d, t1); d += dst_stride; vst1_u8(d, t2); d += dst_stride; vst1_u8(d, t3); d += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; height -= 4; } while (height > 0); src += 8; dst += 8; w -= 8; } while (w > 0); } } void vpx_convolve8_avg_vert_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16x8_t filters = vld1q_s16(filter[y0_q4]); assert(!((intptr_t)dst & 3)); assert(!(dst_stride & 3)); assert(y_step_q4 == 16); (void)x0_q4; (void)x_step_q4; (void)y_step_q4; src -= 3 * src_stride; if (w == 4) { const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0); uint8x8_t d01, d23; int16x4_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, d0, d1, d2, d3; uint32x4_t d0123 = vdupq_n_u32(0); s0 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s1 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s2 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s3 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s4 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s5 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s6 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; do { s7 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s8 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s9 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; s10 = vreinterpret_s16_u16(vget_low_u16(vmovl_u8(vld1_u8(src)))); src += src_stride; __builtin_prefetch(dst + 0 * dst_stride); __builtin_prefetch(dst + 1 * dst_stride); __builtin_prefetch(dst + 2 * dst_stride); __builtin_prefetch(dst + 3 * dst_stride); __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); d0 = convolve8_4(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); d1 = convolve8_4(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); d2 = convolve8_4(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); d3 = convolve8_4(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); d01 = vqrshrun_n_s16(vcombine_s16(d0, d1), 7); d23 = vqrshrun_n_s16(vcombine_s16(d2, d3), 7); d0123 = vld1q_lane_u32((uint32_t *)(dst + 0 * dst_stride), d0123, 0); d0123 = vld1q_lane_u32((uint32_t *)(dst + 1 * dst_stride), d0123, 1); d0123 = vld1q_lane_u32((uint32_t *)(dst + 2 * dst_stride), d0123, 2); d0123 = vld1q_lane_u32((uint32_t *)(dst + 3 * dst_stride), d0123, 3); d0123 = vreinterpretq_u32_u8( vrhaddq_u8(vreinterpretq_u8_u32(d0123), vcombine_u8(d01, d23))); vst1q_lane_u32((uint32_t *)dst, d0123, 0); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0123, 1); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0123, 2); dst += dst_stride; vst1q_lane_u32((uint32_t *)dst, d0123, 3); dst += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; h -= 4; } while (h > 0); } else { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); int height; const uint8_t *s; uint8_t *d; uint8x8_t t0, t1, t2, t3; uint8x16_t d01, d23, dd01, dd23; int16x8_t s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10; do { __builtin_prefetch(src + 0 * src_stride); __builtin_prefetch(src + 1 * src_stride); __builtin_prefetch(src + 2 * src_stride); __builtin_prefetch(src + 3 * src_stride); __builtin_prefetch(src + 4 * src_stride); __builtin_prefetch(src + 5 * src_stride); __builtin_prefetch(src + 6 * src_stride); s = src; s0 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s1 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s2 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s3 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s4 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s5 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s6 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; d = dst; height = h; do { s7 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s8 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s9 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; s10 = vreinterpretq_s16_u16(vmovl_u8(vld1_u8(s))); s += src_stride; __builtin_prefetch(d + 0 * dst_stride); __builtin_prefetch(d + 1 * dst_stride); __builtin_prefetch(d + 2 * dst_stride); __builtin_prefetch(d + 3 * dst_stride); __builtin_prefetch(s + 0 * src_stride); __builtin_prefetch(s + 1 * src_stride); __builtin_prefetch(s + 2 * src_stride); __builtin_prefetch(s + 3 * src_stride); t0 = convolve8_8(s0, s1, s2, s3, s4, s5, s6, s7, filters, filter3, filter4); t1 = convolve8_8(s1, s2, s3, s4, s5, s6, s7, s8, filters, filter3, filter4); t2 = convolve8_8(s2, s3, s4, s5, s6, s7, s8, s9, filters, filter3, filter4); t3 = convolve8_8(s3, s4, s5, s6, s7, s8, s9, s10, filters, filter3, filter4); d01 = vcombine_u8(t0, t1); d23 = vcombine_u8(t2, t3); dd01 = vcombine_u8(vld1_u8(d + 0 * dst_stride), vld1_u8(d + 1 * dst_stride)); dd23 = vcombine_u8(vld1_u8(d + 2 * dst_stride), vld1_u8(d + 3 * dst_stride)); dd01 = vrhaddq_u8(dd01, d01); dd23 = vrhaddq_u8(dd23, d23); vst1_u8(d, vget_low_u8(dd01)); d += dst_stride; vst1_u8(d, vget_high_u8(dd01)); d += dst_stride; vst1_u8(d, vget_low_u8(dd23)); d += dst_stride; vst1_u8(d, vget_high_u8(dd23)); d += dst_stride; s0 = s4; s1 = s5; s2 = s6; s3 = s7; s4 = s8; s5 = s9; s6 = s10; height -= 4; } while (height > 0); src += 8; dst += 8; w -= 8; } while (w > 0); } } libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_neon.h000066400000000000000000000116371357355204000213140ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ #define VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" static INLINE void load_u8_8x4(const uint8_t *s, const ptrdiff_t p, uint8x8_t *const s0, uint8x8_t *const s1, uint8x8_t *const s2, uint8x8_t *const s3) { *s0 = vld1_u8(s); s += p; *s1 = vld1_u8(s); s += p; *s2 = vld1_u8(s); s += p; *s3 = vld1_u8(s); } static INLINE void load_u8_8x8(const uint8_t *s, const ptrdiff_t p, uint8x8_t *const s0, uint8x8_t *const s1, uint8x8_t *const s2, uint8x8_t *const s3, uint8x8_t *const s4, uint8x8_t *const s5, uint8x8_t *const s6, uint8x8_t *const s7) { *s0 = vld1_u8(s); s += p; *s1 = vld1_u8(s); s += p; *s2 = vld1_u8(s); s += p; *s3 = vld1_u8(s); s += p; *s4 = vld1_u8(s); s += p; *s5 = vld1_u8(s); s += p; *s6 = vld1_u8(s); s += p; *s7 = vld1_u8(s); } static INLINE void load_u8_16x8(const uint8_t *s, const ptrdiff_t p, uint8x16_t *const s0, uint8x16_t *const s1, uint8x16_t *const s2, uint8x16_t *const s3, uint8x16_t *const s4, uint8x16_t *const s5, uint8x16_t *const s6, uint8x16_t *const s7) { *s0 = vld1q_u8(s); s += p; *s1 = vld1q_u8(s); s += p; *s2 = vld1q_u8(s); s += p; *s3 = vld1q_u8(s); s += p; *s4 = vld1q_u8(s); s += p; *s5 = vld1q_u8(s); s += p; *s6 = vld1q_u8(s); s += p; *s7 = vld1q_u8(s); } static INLINE int16x4_t convolve8_4(const int16x4_t s0, const int16x4_t s1, const int16x4_t s2, const int16x4_t s3, const int16x4_t s4, const int16x4_t s5, const int16x4_t s6, const int16x4_t s7, const int16x8_t filters, const int16x4_t filter3, const int16x4_t filter4) { const int16x4_t filters_lo = vget_low_s16(filters); const int16x4_t filters_hi = vget_high_s16(filters); int16x4_t sum; sum = vmul_lane_s16(s0, filters_lo, 0); sum = vmla_lane_s16(sum, s1, filters_lo, 1); sum = vmla_lane_s16(sum, s2, filters_lo, 2); sum = vmla_lane_s16(sum, s5, filters_hi, 1); sum = vmla_lane_s16(sum, s6, filters_hi, 2); sum = vmla_lane_s16(sum, s7, filters_hi, 3); sum = vqadd_s16(sum, vmul_s16(s3, filter3)); sum = vqadd_s16(sum, vmul_s16(s4, filter4)); return sum; } static INLINE uint8x8_t convolve8_8(const int16x8_t s0, const int16x8_t s1, const int16x8_t s2, const int16x8_t s3, const int16x8_t s4, const int16x8_t s5, const int16x8_t s6, const int16x8_t s7, const int16x8_t filters, const int16x8_t filter3, const int16x8_t filter4) { const int16x4_t filters_lo = vget_low_s16(filters); const int16x4_t filters_hi = vget_high_s16(filters); int16x8_t sum; sum = vmulq_lane_s16(s0, filters_lo, 0); sum = vmlaq_lane_s16(sum, s1, filters_lo, 1); sum = vmlaq_lane_s16(sum, s2, filters_lo, 2); sum = vmlaq_lane_s16(sum, s5, filters_hi, 1); sum = vmlaq_lane_s16(sum, s6, filters_hi, 2); sum = vmlaq_lane_s16(sum, s7, filters_hi, 3); sum = vqaddq_s16(sum, vmulq_s16(s3, filter3)); sum = vqaddq_s16(sum, vmulq_s16(s4, filter4)); return vqrshrun_n_s16(sum, 7); } static INLINE uint8x8_t scale_filter_8(const uint8x8_t *const s, const int16x8_t filters) { const int16x8_t filter3 = vdupq_lane_s16(vget_low_s16(filters), 3); const int16x8_t filter4 = vdupq_lane_s16(vget_high_s16(filters), 0); int16x8_t ss[8]; ss[0] = vreinterpretq_s16_u16(vmovl_u8(s[0])); ss[1] = vreinterpretq_s16_u16(vmovl_u8(s[1])); ss[2] = vreinterpretq_s16_u16(vmovl_u8(s[2])); ss[3] = vreinterpretq_s16_u16(vmovl_u8(s[3])); ss[4] = vreinterpretq_s16_u16(vmovl_u8(s[4])); ss[5] = vreinterpretq_s16_u16(vmovl_u8(s[5])); ss[6] = vreinterpretq_s16_u16(vmovl_u8(s[6])); ss[7] = vreinterpretq_s16_u16(vmovl_u8(s[7])); return convolve8_8(ss[0], ss[1], ss[2], ss[3], ss[4], ss[5], ss[6], ss[7], filters, filter3, filter4); } #endif // VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_H_ libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_neon_asm.c000066400000000000000000000040031357355204000221340ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vp9/common/vp9_filter.h" #include "vpx_dsp/arm/vpx_convolve8_neon_asm.h" /* Type1 and Type2 functions are called depending on the position of the * negative and positive coefficients in the filter. In type1, the filter kernel * used is sub_pel_filters_8lp, in which only the first two and the last two * coefficients are negative. In type2, the negative coefficients are 0, 2, 5 & * 7. */ #define DEFINE_FILTER(dir) \ void vpx_convolve8_##dir##_neon( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ if (filter == vp9_filter_kernels[1]) { \ vpx_convolve8_##dir##_filter_type1_neon( \ src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, \ y_step_q4, w, h); \ } else { \ vpx_convolve8_##dir##_filter_type2_neon( \ src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, \ y_step_q4, w, h); \ } \ } DEFINE_FILTER(horiz); DEFINE_FILTER(avg_horiz); DEFINE_FILTER(vert); DEFINE_FILTER(avg_vert); libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_neon_asm.h000066400000000000000000000022061357355204000221440ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ #define VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ #define DECLARE_FILTER(dir, type) \ void vpx_convolve8_##dir##_filter_##type##_neon( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h); DECLARE_FILTER(horiz, type1); DECLARE_FILTER(avg_horiz, type1); DECLARE_FILTER(horiz, type2); DECLARE_FILTER(avg_horiz, type2); DECLARE_FILTER(vert, type1); DECLARE_FILTER(avg_vert, type1); DECLARE_FILTER(vert, type2); DECLARE_FILTER(avg_vert, type2); #endif // VPX_VPX_DSP_ARM_VPX_CONVOLVE8_NEON_ASM_H_ libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_vert_filter_type1_neon.asm000066400000000000000000000603621357355204000253730ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r6 => dst_stride ; r12 => filter_y0 ; r5 => ht ; r3 => wd EXPORT |vpx_convolve8_vert_filter_type1_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_vert_filter_type1_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 vmov.i16 q15, #0x4000 mov r11, #0xc000 ldr r12, [sp, #104] ;load filter ldr r6, [sp, #116] ;load y0_q4 add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] mov r6, r3 ldr r5, [sp, #124] ;load wd vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff vabs.s8 d0, d0 ;vabs_s8(coeff) add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff ldr r3, [sp, #128] ;load ht subs r7, r3, #0 ;r3->ht vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0); cmp r5, #8 vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1); vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2); vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3); vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4); vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5); vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6); vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7); blt core_loop_wd_4 ;core loop wd 4 jump str r0, [sp, #-4]! str r1, [sp, #-4]! bic r4, r5, #7 ;r5 ->wd rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r4, r2, lsl #2 ;r2->src_strd mov r3, r5, lsr #3 ;divide by 8 mul r7, r3 ;multiply height by width sub r7, #4 ;subtract by one for epilog prolog and r10, r0, #31 add r3, r0, r2 ;pu1_src_tmp += src_strd; vdup.16 q4, r11 vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); subs r4, r4, #8 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vdup.16 q5, r11 vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); pld [r3] vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 pld [r3, r2] vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); pld [r3, r2, lsl #1] vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r3, r3, r2 vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); pld [r3, r2, lsl #1] vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); add r3, r0, r2 ;pu1_src_tmp += src_strd; vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d3, d23 vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d2, d22 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d4, d24 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d5, d25 vmlal.u8 q6, d6, d26 vmlal.u8 q6, d7, d27 vmlsl.u8 q6, d16, d28 vmlsl.u8 q6, d17, d29 add r14, r1, r6 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); addle r1, r1, r9 vmlsl.u8 q7, d4, d23 subs r7, r7, #4 vmlsl.u8 q7, d3, d22 vmlal.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 blt epilog_end ;jumps to epilog_end beq epilog ;jumps to epilog main_loop_8 subs r4, r4, #8 vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vqrshrun.s16 d14, q7, #6 add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vst1.8 {d14}, [r14], r6 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r14, r1, #0 vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r1, r1, #8 vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); addle r1, r1, r9 vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vmlsl.u8 q6, d3, d23 add r10, r3, r2, lsl #3 ; 10*strd - 8+2 vmlsl.u8 q6, d2, d22 add r10, r10, r2 ; 11*strd vmlal.u8 q6, d4, d24 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); pld [r10] ;11+ 0 vmlal.u8 q6, d7, d27 pld [r10, r2] ;11+ 1*strd vmlsl.u8 q6, d16, d28 pld [r10, r2, lsl #1] ;11+ 2*strd vmlsl.u8 q6, d17, d29 add r10, r10, r2 ;12*strd vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); pld [r10, r2, lsl #1] ;11+ 3*strd vmlsl.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 subs r7, r7, #4 vmlal.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vqrshrun.s16 d12, q6, #6 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); bgt main_loop_8 ;jumps to main_loop_8 epilog vmlsl.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vmlal.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vmlal.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlsl.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vqrshrun.s16 d14, q7, #6 vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vmlal.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); vmlal.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); vmlsl.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vst1.8 {d14}, [r14], r6 vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d3, d23 vmlsl.u8 q6, d2, d22 vmlal.u8 q6, d4, d24 vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vmlal.u8 q6, d7, d27 vmlsl.u8 q6, d16, d28 vmlsl.u8 q6, d17, d29 add r14, r1, r6 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vmlal.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vhadd.s16 q6, q6, q15 vmlal.u8 q7, d7, d26 vmlal.u8 q7, d16, d27 vmlsl.u8 q7, d17, d28 vmlsl.u8 q7, d18, d29 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 epilog_end vst1.8 {d12}, [r14], r6 vhadd.s16 q7, q7, q15 vqrshrun.s16 d14, q7, #6 vst1.8 {d14}, [r14], r6 end_loops tst r5, #7 ldr r1, [sp], #4 ldr r0, [sp], #4 vpopeq {d8 - d15} ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from ; sp mov r5, #4 add r0, r0, #8 add r1, r1, #8 mov r7, #16 core_loop_wd_4 rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r5, r2, lsl #2 ;r2->src_strd vmov.i8 d4, #0 outer_loop_wd_4 subs r12, r5, #0 ble end_inner_loop_wd_4 ;outer loop jump inner_loop_wd_4 add r3, r0, r2 vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); subs r12, r12, #4 vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 0); vdup.16 q0, r11 vmlsl.u8 q0, d5, d23 ;mul_res1 = ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); add r0, r0, #4 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlal.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); vdup.16 q4, r11 vmlsl.u8 q4, d7, d23 vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, ; 1); vmull.u8 q1, d7, d25 ;mul_res2 = ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); vmlsl.u8 q4, d6, d22 vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vmlal.u8 q4, d4, d24 vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vmlal.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); vmlal.u8 q4, d5, d25 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vmlal.u8 q4, d6, d26 vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); vdup.u32 d4, d7[1] vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, ; mul_res2); vmlal.u8 q4, d7, d27 vld1.u32 {d4[1]},[r3], r2 vmlsl.u8 q4, d4, d28 vdup.u32 d5, d4[1] vhadd.s16 q0, q0, q15 vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u32 {d5[1]},[r3] add r3, r1, r6 vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, ; vreinterpret_u32_u8(sto_res), 0); vmlsl.u8 q4, d5, d29 vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); vhadd.s16 q4, q4, q15 vqrshrun.s16 d8, q4, #6 vst1.32 {d8[0]},[r3], r6 add r1, r1, #4 vst1.32 {d8[1]},[r3] bgt inner_loop_wd_4 end_inner_loop_wd_4 subs r7, r7, #4 add r1, r1, r9 add r0, r0, r8 bgt outer_loop_wd_4 vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve8_vert_filter_type2_neon.asm000066400000000000000000000602111357355204000253650ustar00rootroot00000000000000; ; Copyright (c) 2018 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ;**************Variables Vs Registers*********************************** ; r0 => src ; r1 => dst ; r2 => src_stride ; r6 => dst_stride ; r12 => filter_y0 ; r5 => ht ; r3 => wd EXPORT |vpx_convolve8_vert_filter_type2_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve8_vert_filter_type2_neon| PROC stmfd sp!, {r4 - r12, r14} ;stack stores the values of ; the arguments vpush {d8 - d15} ; stack offset by 64 mov r4, r1 mov r1, r2 mov r2, r4 vmov.i16 q15, #0x4000 mov r11, #0xc000 ldr r12, [sp, #104] ;load filter ldr r6, [sp, #116] ;load y0_q4 add r12, r12, r6, lsl #4 ;r12 = filter[y0_q4] mov r6, r3 ldr r5, [sp, #124] ;load wd vld2.8 {d0, d1}, [r12] ;coeff = vld1_s8(pi1_coeff) sub r12, r2, r2, lsl #2 ;src_ctrd & pi1_coeff vabs.s8 d0, d0 ;vabs_s8(coeff) add r0, r0, r12 ;r0->pu1_src r12->pi1_coeff ldr r3, [sp, #128] ;load ht subs r7, r3, #0 ;r3->ht vdup.u8 d22, d0[0] ;coeffabs_0 = vdup_lane_u8(coeffabs, ; 0); cmp r5, #8 vdup.u8 d23, d0[1] ;coeffabs_1 = vdup_lane_u8(coeffabs, ; 1); vdup.u8 d24, d0[2] ;coeffabs_2 = vdup_lane_u8(coeffabs, ; 2); vdup.u8 d25, d0[3] ;coeffabs_3 = vdup_lane_u8(coeffabs, ; 3); vdup.u8 d26, d0[4] ;coeffabs_4 = vdup_lane_u8(coeffabs, ; 4); vdup.u8 d27, d0[5] ;coeffabs_5 = vdup_lane_u8(coeffabs, ; 5); vdup.u8 d28, d0[6] ;coeffabs_6 = vdup_lane_u8(coeffabs, ; 6); vdup.u8 d29, d0[7] ;coeffabs_7 = vdup_lane_u8(coeffabs, ; 7); blt core_loop_wd_4 ;core loop wd 4 jump str r0, [sp, #-4]! str r1, [sp, #-4]! bic r4, r5, #7 ;r5 ->wd rsb r9, r4, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r4, r2, lsl #2 ;r2->src_strd mov r3, r5, lsr #3 ;divide by 8 mul r7, r3 ;multiply height by width sub r7, #4 ;subtract by one for epilog prolog and r10, r0, #31 add r3, r0, r2 ;pu1_src_tmp += src_strd; vdup.16 q4, r11 vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); subs r4, r4, #8 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vdup.16 q5, r11 vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); pld [r3] vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 pld [r3, r2] vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); pld [r3, r2, lsl #1] vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r3, r3, r2 vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); pld [r3, r2, lsl #1] vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); add r3, r0, r2 ;pu1_src_tmp += src_strd; vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d1}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d3, d23 vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d2, d22 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q6, d4, d24 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d5, d25 vmlal.u8 q6, d6, d26 vmlsl.u8 q6, d7, d27 vmlal.u8 q6, d16, d28 vmlsl.u8 q6, d17, d29 add r14, r1, r6 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); addle r1, r1, r9 vmlal.u8 q7, d4, d23 subs r7, r7, #4 vmlsl.u8 q7, d3, d22 vmlsl.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 blt epilog_end ;jumps to epilog_end beq epilog ;jumps to epilog main_loop_8 subs r4, r4, #8 vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, ; coeffabs_1); addle r0, r0, r8 vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); bicle r4, r5, #7 ;r5 ->wd vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vqrshrun.s16 d14, q7, #6 add r3, r0, r2 ;pu1_src_tmp += src_strd; vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vld1.u8 {d0}, [r0]! ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vld1.u8 {d1}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vst1.8 {d14}, [r14], r6 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); add r14, r1, #0 vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); add r1, r1, #8 vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); addle r1, r1, r9 vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vmlal.u8 q6, d3, d23 add r10, r3, r2, lsl #3 ; 10*strd - 8+2 vmlsl.u8 q6, d2, d22 add r10, r10, r2 ; 11*strd vmlsl.u8 q6, d4, d24 vld1.u8 {d2}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vst1.8 {d8}, [r14], r6 ;vst1_u8(pu1_dst,sto_res); pld [r10] ;11+ 0 vmlsl.u8 q6, d7, d27 pld [r10, r2] ;11+ 1*strd vmlal.u8 q6, d16, d28 pld [r10, r2, lsl #1] ;11+ 2*strd vmlsl.u8 q6, d17, d29 add r10, r10, r2 ;12*strd vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); pld [r10, r2, lsl #1] ;11+ 3*strd vmlal.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 subs r7, r7, #4 vmlsl.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vld1.u8 {d3}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vhadd.s16 q6, q6, q15 vdup.16 q4, r11 vmlal.u8 q7, d7, d26 vld1.u8 {d4}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d16, d27 vld1.u8 {d5}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d17, d28 vld1.u8 {d6}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlsl.u8 q7, d18, d29 vld1.u8 {d7}, [r3], r2 ;src_tmp4 = vld1_u8(pu1_src_tmp); vqrshrun.s16 d12, q6, #6 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); bgt main_loop_8 ;jumps to main_loop_8 epilog vmlal.u8 q4, d1, d23 ;mul_res1 = vmull_u8(src_tmp2, vmlsl.u8 q4, d0, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp1, coeffabs_0); vmlsl.u8 q4, d2, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp3, coeffabs_2); vmlal.u8 q4, d3, d25 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp4, coeffabs_3); vhadd.s16 q7, q7, q15 vdup.16 q5, r11 vmlal.u8 q4, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp1, coeffabs_4); vmlsl.u8 q4, d5, d27 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp2, coeffabs_5); vmlal.u8 q4, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; src_tmp3, coeffabs_6); vmlsl.u8 q4, d7, d29 ;mul_res1 = vmlsl_u8(mul_res1, ; src_tmp4, coeffabs_7); vst1.8 {d12}, [r14], r6 vqrshrun.s16 d14, q7, #6 vld1.u8 {d16}, [r3], r2 ;src_tmp1 = vld1_u8(pu1_src_tmp); vmlal.u8 q5, d2, d23 ;mul_res2 = vmull_u8(src_tmp3, ; coeffabs_1); vmlsl.u8 q5, d1, d22 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp2, coeffabs_0); vmlsl.u8 q5, d3, d24 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp4, coeffabs_2); vmlal.u8 q5, d4, d25 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp1, coeffabs_3); vhadd.s16 q4, q4, q15 vdup.16 q6, r11 vmlal.u8 q5, d5, d26 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp2, coeffabs_4); vmlsl.u8 q5, d6, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp3, coeffabs_5); vmlal.u8 q5, d7, d28 ;mul_res2 = vmlal_u8(mul_res2, ; src_tmp4, coeffabs_6); vmlsl.u8 q5, d16, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; src_tmp1, coeffabs_7); vst1.8 {d14}, [r14], r6 vqrshrun.s16 d8, q4, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d17}, [r3], r2 ;src_tmp2 = vld1_u8(pu1_src_tmp); vmlal.u8 q6, d3, d23 vmlsl.u8 q6, d2, d22 vmlsl.u8 q6, d4, d24 vmlal.u8 q6, d5, d25 vhadd.s16 q5, q5, q15 vdup.16 q7, r11 vmlal.u8 q6, d6, d26 vmlsl.u8 q6, d7, d27 vmlal.u8 q6, d16, d28 vmlsl.u8 q6, d17, d29 add r14, r1, r6 vst1.8 {d8}, [r1]! ;vst1_u8(pu1_dst,sto_res); vqrshrun.s16 d10, q5, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u8 {d18}, [r3], r2 ;src_tmp3 = vld1_u8(pu1_src_tmp); vmlal.u8 q7, d4, d23 vmlsl.u8 q7, d3, d22 vmlsl.u8 q7, d5, d24 vmlal.u8 q7, d6, d25 vhadd.s16 q6, q6, q15 vmlal.u8 q7, d7, d26 vmlsl.u8 q7, d16, d27 vmlal.u8 q7, d17, d28 vmlsl.u8 q7, d18, d29 vst1.8 {d10}, [r14], r6 ;vst1_u8(pu1_dst_tmp,sto_res); vqrshrun.s16 d12, q6, #6 epilog_end vst1.8 {d12}, [r14], r6 vhadd.s16 q7, q7, q15 vqrshrun.s16 d14, q7, #6 vst1.8 {d14}, [r14], r6 end_loops tst r5, #7 ldr r1, [sp], #4 ldr r0, [sp], #4 vpopeq {d8 - d15} ldmfdeq sp!, {r4 - r12, r15} ;reload the registers from sp mov r5, #4 add r0, r0, #8 add r1, r1, #8 mov r7, #16 core_loop_wd_4 rsb r9, r5, r6, lsl #2 ;r6->dst_strd r5 ->wd rsb r8, r5, r2, lsl #2 ;r2->src_strd vmov.i8 d4, #0 outer_loop_wd_4 subs r12, r5, #0 ble end_inner_loop_wd_4 ;outer loop jump inner_loop_wd_4 add r3, r0, r2 vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); subs r12, r12, #4 vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vld1.u32 {d4[0]},[r0] ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 0); vdup.16 q0, r11 vmlal.u8 q0, d5, d23 ;mul_res1 = ; vmull_u8(vreinterpret_u8_u32(src_tmp2), coeffabs_1); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); add r0, r0, #4 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlsl.u8 q0, d4, d22 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_0); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q0, d6, d24 ;mul_res1 = vmlsl_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_2); vdup.16 q4, r11 vmlal.u8 q4, d7, d23 vdup.u32 d4, d7[1] ;src_tmp1 = vdup_lane_u32(src_tmp4, ; 1); vmull.u8 q1, d7, d25 ;mul_res2 = ; vmull_u8(vreinterpret_u8_u32(src_tmp4), coeffabs_3); vld1.u32 {d4[1]},[r3], r2 ;src_tmp1 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp1, 1); vmlsl.u8 q4, d6, d22 vmlal.u8 q0, d4, d26 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp1), coeffabs_4); vdup.u32 d5, d4[1] ;src_tmp2 = vdup_lane_u32(src_tmp1, ; 1); vmlsl.u8 q4, d4, d24 vld1.u32 {d5[1]},[r3], r2 ;src_tmp2 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp2, 1); vmlsl.u8 q1, d5, d27 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp2), coeffabs_5); vdup.u32 d6, d5[1] ;src_tmp3 = vdup_lane_u32(src_tmp2, ; 1); vmlal.u8 q4, d5, d25 vld1.u32 {d6[1]},[r3], r2 ;src_tmp3 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp3, 1); vmlal.u8 q0, d6, d28 ;mul_res1 = vmlal_u8(mul_res1, ; vreinterpret_u8_u32(src_tmp3), coeffabs_6); vdup.u32 d7, d6[1] ;src_tmp4 = vdup_lane_u32(src_tmp3, ; 1); vmlal.u8 q4, d6, d26 vld1.u32 {d7[1]},[r3], r2 ;src_tmp4 = vld1_lane_u32((uint32_t ; *)pu1_src_tmp, src_tmp4, 1); vmlsl.u8 q1, d7, d29 ;mul_res2 = vmlsl_u8(mul_res2, ; vreinterpret_u8_u32(src_tmp4), coeffabs_7); vdup.u32 d4, d7[1] vadd.i16 q0, q0, q1 ;mul_res1 = vaddq_u16(mul_res1, ; mul_res2); vmlsl.u8 q4, d7, d27 vld1.u32 {d4[1]},[r3], r2 vmlal.u8 q4, d4, d28 vdup.u32 d5, d4[1] vhadd.s16 q0, q0, q15 vqrshrun.s16 d0, q0, #6 ;sto_res = vqmovun_s16(sto_res_tmp); vld1.u32 {d5[1]},[r3] add r3, r1, r6 vst1.32 {d0[0]},[r1] ;vst1_lane_u32((uint32_t *)pu1_dst, ; vreinterpret_u32_u8(sto_res), 0); vmlsl.u8 q4, d5, d29 vst1.32 {d0[1]},[r3], r6 ;vst1_lane_u32((uint32_t ; *)pu1_dst_tmp, vreinterpret_u32_u8(sto_res), 1); vhadd.s16 q4, q4, q15 vqrshrun.s16 d8, q4, #6 vst1.32 {d8[0]},[r3], r6 add r1, r1, #4 vst1.32 {d8[1]},[r3] bgt inner_loop_wd_4 end_inner_loop_wd_4 subs r7, r7, #4 add r1, r1, r9 add r0, r0, r8 bgt outer_loop_wd_4 vpop {d8 - d15} ldmfd sp!, {r4 - r12, r15} ;reload the registers from sp ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve_avg_neon.c000066400000000000000000000076001357355204000220470ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" void vpx_convolve_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; if (w < 8) { // avg4 uint8x8_t s0, s1; uint8x8_t dd0 = vdup_n_u8(0); uint32x2x2_t s01; do { s0 = vld1_u8(src); src += src_stride; s1 = vld1_u8(src); src += src_stride; s01 = vzip_u32(vreinterpret_u32_u8(s0), vreinterpret_u32_u8(s1)); dd0 = vreinterpret_u8_u32( vld1_lane_u32((const uint32_t *)dst, vreinterpret_u32_u8(dd0), 0)); dd0 = vreinterpret_u8_u32(vld1_lane_u32( (const uint32_t *)(dst + dst_stride), vreinterpret_u32_u8(dd0), 1)); dd0 = vrhadd_u8(vreinterpret_u8_u32(s01.val[0]), dd0); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dd0), 0); dst += dst_stride; vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(dd0), 1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 8) { // avg8 uint8x8_t s0, s1, d0, d1; uint8x16_t s01, d01; do { s0 = vld1_u8(src); src += src_stride; s1 = vld1_u8(src); src += src_stride; d0 = vld1_u8(dst); d1 = vld1_u8(dst + dst_stride); s01 = vcombine_u8(s0, s1); d01 = vcombine_u8(d0, d1); d01 = vrhaddq_u8(s01, d01); vst1_u8(dst, vget_low_u8(d01)); dst += dst_stride; vst1_u8(dst, vget_high_u8(d01)); dst += dst_stride; h -= 2; } while (h > 0); } else if (w < 32) { // avg16 uint8x16_t s0, s1, d0, d1; do { s0 = vld1q_u8(src); src += src_stride; s1 = vld1q_u8(src); src += src_stride; d0 = vld1q_u8(dst); d1 = vld1q_u8(dst + dst_stride); d0 = vrhaddq_u8(s0, d0); d1 = vrhaddq_u8(s1, d1); vst1q_u8(dst, d0); dst += dst_stride; vst1q_u8(dst, d1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 32) { // avg32 uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3; do { s0 = vld1q_u8(src); s1 = vld1q_u8(src + 16); src += src_stride; s2 = vld1q_u8(src); s3 = vld1q_u8(src + 16); src += src_stride; d0 = vld1q_u8(dst); d1 = vld1q_u8(dst + 16); d2 = vld1q_u8(dst + dst_stride); d3 = vld1q_u8(dst + dst_stride + 16); d0 = vrhaddq_u8(s0, d0); d1 = vrhaddq_u8(s1, d1); d2 = vrhaddq_u8(s2, d2); d3 = vrhaddq_u8(s3, d3); vst1q_u8(dst, d0); vst1q_u8(dst + 16, d1); dst += dst_stride; vst1q_u8(dst, d2); vst1q_u8(dst + 16, d3); dst += dst_stride; h -= 2; } while (h > 0); } else { // avg64 uint8x16_t s0, s1, s2, s3, d0, d1, d2, d3; do { s0 = vld1q_u8(src); s1 = vld1q_u8(src + 16); s2 = vld1q_u8(src + 32); s3 = vld1q_u8(src + 48); src += src_stride; d0 = vld1q_u8(dst); d1 = vld1q_u8(dst + 16); d2 = vld1q_u8(dst + 32); d3 = vld1q_u8(dst + 48); d0 = vrhaddq_u8(s0, d0); d1 = vrhaddq_u8(s1, d1); d2 = vrhaddq_u8(s2, d2); d3 = vrhaddq_u8(s3, d3); vst1q_u8(dst, d0); vst1q_u8(dst + 16, d1); vst1q_u8(dst + 32, d2); vst1q_u8(dst + 48, d3); dst += dst_stride; } while (--h); } } libvpx-1.8.2/vpx_dsp/arm/vpx_convolve_avg_neon_asm.asm000066400000000000000000000071261357355204000232500ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_convolve_avg_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve_avg_neon| PROC push {r4-r6, lr} ldrd r4, r5, [sp, #36] mov r6, r2 cmp r4, #32 bgt avg64 beq avg32 cmp r4, #8 bgt avg16 beq avg8 b avg4 avg64 sub lr, r1, #32 sub r4, r3, #32 avg64_h pld [r0, r1, lsl #1] vld1.8 {q0-q1}, [r0]! vld1.8 {q2-q3}, [r0], lr pld [r2, r3] vld1.8 {q8-q9}, [r6@128]! vld1.8 {q10-q11}, [r6@128], r4 vrhadd.u8 q0, q0, q8 vrhadd.u8 q1, q1, q9 vrhadd.u8 q2, q2, q10 vrhadd.u8 q3, q3, q11 vst1.8 {q0-q1}, [r2@128]! vst1.8 {q2-q3}, [r2@128], r4 subs r5, r5, #1 bgt avg64_h pop {r4-r6, pc} avg32 vld1.8 {q0-q1}, [r0], r1 vld1.8 {q2-q3}, [r0], r1 vld1.8 {q8-q9}, [r6@128], r3 vld1.8 {q10-q11}, [r6@128], r3 pld [r0] vrhadd.u8 q0, q0, q8 pld [r0, r1] vrhadd.u8 q1, q1, q9 pld [r6] vrhadd.u8 q2, q2, q10 pld [r6, r3] vrhadd.u8 q3, q3, q11 vst1.8 {q0-q1}, [r2@128], r3 vst1.8 {q2-q3}, [r2@128], r3 subs r5, r5, #2 bgt avg32 pop {r4-r6, pc} avg16 vld1.8 {q0}, [r0], r1 vld1.8 {q1}, [r0], r1 vld1.8 {q2}, [r6@128], r3 vld1.8 {q3}, [r6@128], r3 pld [r0] pld [r0, r1] vrhadd.u8 q0, q0, q2 pld [r6] pld [r6, r3] vrhadd.u8 q1, q1, q3 vst1.8 {q0}, [r2@128], r3 vst1.8 {q1}, [r2@128], r3 subs r5, r5, #2 bgt avg16 pop {r4-r6, pc} avg8 vld1.8 {d0}, [r0], r1 vld1.8 {d1}, [r0], r1 vld1.8 {d2}, [r6@64], r3 vld1.8 {d3}, [r6@64], r3 pld [r0] pld [r0, r1] vrhadd.u8 q0, q0, q1 pld [r6] pld [r6, r3] vst1.8 {d0}, [r2@64], r3 vst1.8 {d1}, [r2@64], r3 subs r5, r5, #2 bgt avg8 pop {r4-r6, pc} avg4 vld1.32 {d0[0]}, [r0], r1 vld1.32 {d0[1]}, [r0], r1 vld1.32 {d2[0]}, [r6@32], r3 vld1.32 {d2[1]}, [r6@32], r3 vrhadd.u8 d0, d0, d2 vst1.32 {d0[0]}, [r2@32], r3 vst1.32 {d0[1]}, [r2@32], r3 subs r5, r5, #2 bgt avg4 pop {r4-r6, pc} ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve_copy_neon.c000066400000000000000000000050101357355204000222350ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" void vpx_convolve_copy_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; if (w < 8) { // copy4 do { *(uint32_t *)dst = *(const uint32_t *)src; src += src_stride; dst += dst_stride; *(uint32_t *)dst = *(const uint32_t *)src; src += src_stride; dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 8) { // copy8 uint8x8_t s0, s1; do { s0 = vld1_u8(src); src += src_stride; s1 = vld1_u8(src); src += src_stride; vst1_u8(dst, s0); dst += dst_stride; vst1_u8(dst, s1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w < 32) { // copy16 uint8x16_t s0, s1; do { s0 = vld1q_u8(src); src += src_stride; s1 = vld1q_u8(src); src += src_stride; vst1q_u8(dst, s0); dst += dst_stride; vst1q_u8(dst, s1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w == 32) { // copy32 uint8x16_t s0, s1, s2, s3; do { s0 = vld1q_u8(src); s1 = vld1q_u8(src + 16); src += src_stride; s2 = vld1q_u8(src); s3 = vld1q_u8(src + 16); src += src_stride; vst1q_u8(dst, s0); vst1q_u8(dst + 16, s1); dst += dst_stride; vst1q_u8(dst, s2); vst1q_u8(dst + 16, s3); dst += dst_stride; h -= 2; } while (h > 0); } else { // copy64 uint8x16_t s0, s1, s2, s3; do { s0 = vld1q_u8(src); s1 = vld1q_u8(src + 16); s2 = vld1q_u8(src + 32); s3 = vld1q_u8(src + 48); src += src_stride; vst1q_u8(dst, s0); vst1q_u8(dst + 16, s1); vst1q_u8(dst + 32, s2); vst1q_u8(dst + 48, s3); dst += dst_stride; } while (--h); } } libvpx-1.8.2/vpx_dsp/arm/vpx_convolve_copy_neon_asm.asm000066400000000000000000000047371357355204000234520ustar00rootroot00000000000000; ; Copyright (c) 2013 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; EXPORT |vpx_convolve_copy_neon| ARM REQUIRE8 PRESERVE8 AREA ||.text||, CODE, READONLY, ALIGN=2 |vpx_convolve_copy_neon| PROC push {r4-r5, lr} ldrd r4, r5, [sp, #32] cmp r4, #32 bgt copy64 beq copy32 cmp r4, #8 bgt copy16 beq copy8 b copy4 copy64 sub lr, r1, #32 sub r3, r3, #32 copy64_h pld [r0, r1, lsl #1] vld1.8 {q0-q1}, [r0]! vld1.8 {q2-q3}, [r0], lr vst1.8 {q0-q1}, [r2@128]! vst1.8 {q2-q3}, [r2@128], r3 subs r5, r5, #1 bgt copy64_h pop {r4-r5, pc} copy32 pld [r0, r1, lsl #1] vld1.8 {q0-q1}, [r0], r1 pld [r0, r1, lsl #1] vld1.8 {q2-q3}, [r0], r1 vst1.8 {q0-q1}, [r2@128], r3 vst1.8 {q2-q3}, [r2@128], r3 subs r5, r5, #2 bgt copy32 pop {r4-r5, pc} copy16 pld [r0, r1, lsl #1] vld1.8 {q0}, [r0], r1 pld [r0, r1, lsl #1] vld1.8 {q1}, [r0], r1 vst1.8 {q0}, [r2@128], r3 vst1.8 {q1}, [r2@128], r3 subs r5, r5, #2 bgt copy16 pop {r4-r5, pc} copy8 pld [r0, r1, lsl #1] vld1.8 {d0}, [r0], r1 pld [r0, r1, lsl #1] vld1.8 {d2}, [r0], r1 vst1.8 {d0}, [r2@64], r3 vst1.8 {d2}, [r2@64], r3 subs r5, r5, #2 bgt copy8 pop {r4-r5, pc} copy4 ldr r12, [r0], r1 str r12, [r2], r3 subs r5, r5, #1 bgt copy4 pop {r4-r5, pc} ENDP END libvpx-1.8.2/vpx_dsp/arm/vpx_convolve_neon.c000066400000000000000000000053331357355204000212130ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" void vpx_convolve8_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { /* Given our constraints: w <= 64, h <= 64, taps == 8 we can reduce the * maximum buffer size to 64 * 64 + 7 (+ 1 to make it divisible by 4). */ uint8_t temp[64 * 72]; // Account for the vertical phase needing 3 lines prior and 4 lines post // (+ 1 to make it divisible by 4). const int intermediate_height = h + 8; assert(y_step_q4 == 16); assert(x_step_q4 == 16); /* Filter starting 3 lines back. The neon implementation will ignore the given * height and filter a multiple of 4 lines. Since this goes in to the temp * buffer which has lots of extra room and is subsequently discarded this is * safe if somewhat less than ideal. */ vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, intermediate_height); /* Step into the temp buffer 3 lines to get the actual frame data */ vpx_convolve8_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { uint8_t temp[64 * 72]; const int intermediate_height = h + 8; assert(y_step_q4 == 16); assert(x_step_q4 == 16); /* This implementation has the same issues as above. In addition, we only want * to average the values after both passes. */ vpx_convolve8_horiz_neon(src - src_stride * 3, src_stride, temp, w, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, intermediate_height); vpx_convolve8_avg_vert_neon(temp + w * 3, w, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } libvpx-1.8.2/vpx_dsp/arm/vpx_scaled_convolve8_neon.c000066400000000000000000000275241357355204000226240ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/arm/transpose_neon.h" #include "vpx_dsp/arm/vpx_convolve8_neon.h" #include "vpx_ports/mem.h" static INLINE void scaledconvolve_horiz_w4( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const x_filters, const int x0_q4, const int x_step_q4, const int w, const int h) { DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); int x, y, z; src -= SUBPEL_TAPS / 2 - 1; y = h; do { int x_q4 = x0_q4; x = 0; do { // process 4 src_x steps for (z = 0; z < 4; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; if (x_q4 & SUBPEL_MASK) { const int16x8_t filters = vld1q_s16(x_filters[x_q4 & SUBPEL_MASK]); const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0); uint8x8_t s[8], d; int16x8_t ss[4]; int16x4_t t[8], tt; load_u8_8x4(src_x, src_stride, &s[0], &s[1], &s[2], &s[3]); transpose_u8_8x4(&s[0], &s[1], &s[2], &s[3]); ss[0] = vreinterpretq_s16_u16(vmovl_u8(s[0])); ss[1] = vreinterpretq_s16_u16(vmovl_u8(s[1])); ss[2] = vreinterpretq_s16_u16(vmovl_u8(s[2])); ss[3] = vreinterpretq_s16_u16(vmovl_u8(s[3])); t[0] = vget_low_s16(ss[0]); t[1] = vget_low_s16(ss[1]); t[2] = vget_low_s16(ss[2]); t[3] = vget_low_s16(ss[3]); t[4] = vget_high_s16(ss[0]); t[5] = vget_high_s16(ss[1]); t[6] = vget_high_s16(ss[2]); t[7] = vget_high_s16(ss[3]); tt = convolve8_4(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], filters, filter3, filter4); d = vqrshrun_n_s16(vcombine_s16(tt, tt), 7); vst1_lane_u32((uint32_t *)&temp[4 * z], vreinterpret_u32_u8(d), 0); } else { int i; for (i = 0; i < 4; ++i) { temp[z * 4 + i] = src_x[i * src_stride + 3]; } } x_q4 += x_step_q4; } // transpose the 4x4 filters values back to dst { const uint8x8x4_t d4 = vld4_u8(temp); vst1_lane_u32((uint32_t *)&dst[x + 0 * dst_stride], vreinterpret_u32_u8(d4.val[0]), 0); vst1_lane_u32((uint32_t *)&dst[x + 1 * dst_stride], vreinterpret_u32_u8(d4.val[1]), 0); vst1_lane_u32((uint32_t *)&dst[x + 2 * dst_stride], vreinterpret_u32_u8(d4.val[2]), 0); vst1_lane_u32((uint32_t *)&dst[x + 3 * dst_stride], vreinterpret_u32_u8(d4.val[3]), 0); } x += 4; } while (x < w); src += src_stride * 4; dst += dst_stride * 4; y -= 4; } while (y > 0); } static INLINE void scaledconvolve_horiz_w8( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const x_filters, const int x0_q4, const int x_step_q4, const int w, const int h) { DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); int x, y, z; src -= SUBPEL_TAPS / 2 - 1; // This function processes 8x8 areas. The intermediate height is not always // a multiple of 8, so force it to be a multiple of 8 here. y = (h + 7) & ~7; do { int x_q4 = x0_q4; x = 0; do { uint8x8_t d[8]; // process 8 src_x steps for (z = 0; z < 8; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; if (x_q4 & SUBPEL_MASK) { const int16x8_t filters = vld1q_s16(x_filters[x_q4 & SUBPEL_MASK]); uint8x8_t s[8]; load_u8_8x8(src_x, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); transpose_u8_8x8(&s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); d[0] = scale_filter_8(s, filters); vst1_u8(&temp[8 * z], d[0]); } else { int i; for (i = 0; i < 8; ++i) { temp[z * 8 + i] = src_x[i * src_stride + 3]; } } x_q4 += x_step_q4; } // transpose the 8x8 filters values back to dst load_u8_8x8(temp, 8, &d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]); transpose_u8_8x8(&d[0], &d[1], &d[2], &d[3], &d[4], &d[5], &d[6], &d[7]); vst1_u8(&dst[x + 0 * dst_stride], d[0]); vst1_u8(&dst[x + 1 * dst_stride], d[1]); vst1_u8(&dst[x + 2 * dst_stride], d[2]); vst1_u8(&dst[x + 3 * dst_stride], d[3]); vst1_u8(&dst[x + 4 * dst_stride], d[4]); vst1_u8(&dst[x + 5 * dst_stride], d[5]); vst1_u8(&dst[x + 6 * dst_stride], d[6]); vst1_u8(&dst[x + 7 * dst_stride], d[7]); x += 8; } while (x < w); src += src_stride * 8; dst += dst_stride * 8; } while (y -= 8); } static INLINE void scaledconvolve_vert_w4( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); y = h; do { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; if (y_q4 & SUBPEL_MASK) { const int16x8_t filters = vld1q_s16(y_filters[y_q4 & SUBPEL_MASK]); const int16x4_t filter3 = vdup_lane_s16(vget_low_s16(filters), 3); const int16x4_t filter4 = vdup_lane_s16(vget_high_s16(filters), 0); uint8x8_t s[8], d; int16x4_t t[8], tt; load_u8_8x8(src_y, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); t[0] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[0]))); t[1] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[1]))); t[2] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[2]))); t[3] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[3]))); t[4] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[4]))); t[5] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[5]))); t[6] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[6]))); t[7] = vget_low_s16(vreinterpretq_s16_u16(vmovl_u8(s[7]))); tt = convolve8_4(t[0], t[1], t[2], t[3], t[4], t[5], t[6], t[7], filters, filter3, filter4); d = vqrshrun_n_s16(vcombine_s16(tt, tt), 7); vst1_lane_u32((uint32_t *)dst, vreinterpret_u32_u8(d), 0); } else { memcpy(dst, &src_y[3 * src_stride], w); } dst += dst_stride; y_q4 += y_step_q4; } while (--y); } static INLINE void scaledconvolve_vert_w8( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); y = h; do { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; if (y_q4 & SUBPEL_MASK) { const int16x8_t filters = vld1q_s16(y_filters[y_q4 & SUBPEL_MASK]); uint8x8_t s[8], d; load_u8_8x8(src_y, src_stride, &s[0], &s[1], &s[2], &s[3], &s[4], &s[5], &s[6], &s[7]); d = scale_filter_8(s, filters); vst1_u8(dst, d); } else { memcpy(dst, &src_y[3 * src_stride], w); } dst += dst_stride; y_q4 += y_step_q4; } while (--y); } static INLINE void scaledconvolve_vert_w16( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int x, y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); y = h; do { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; if (y_q4 & SUBPEL_MASK) { x = 0; do { const int16x8_t filters = vld1q_s16(y_filters[y_q4 & SUBPEL_MASK]); uint8x16_t ss[8]; uint8x8_t s[8], d[2]; load_u8_16x8(src_y, src_stride, &ss[0], &ss[1], &ss[2], &ss[3], &ss[4], &ss[5], &ss[6], &ss[7]); s[0] = vget_low_u8(ss[0]); s[1] = vget_low_u8(ss[1]); s[2] = vget_low_u8(ss[2]); s[3] = vget_low_u8(ss[3]); s[4] = vget_low_u8(ss[4]); s[5] = vget_low_u8(ss[5]); s[6] = vget_low_u8(ss[6]); s[7] = vget_low_u8(ss[7]); d[0] = scale_filter_8(s, filters); s[0] = vget_high_u8(ss[0]); s[1] = vget_high_u8(ss[1]); s[2] = vget_high_u8(ss[2]); s[3] = vget_high_u8(ss[3]); s[4] = vget_high_u8(ss[4]); s[5] = vget_high_u8(ss[5]); s[6] = vget_high_u8(ss[6]); s[7] = vget_high_u8(ss[7]); d[1] = scale_filter_8(s, filters); vst1q_u8(&dst[x], vcombine_u8(d[0], d[1])); src_y += 16; x += 16; } while (x < w); } else { memcpy(dst, &src_y[3 * src_stride], w); } dst += dst_stride; y_q4 += y_step_q4; } while (--y); } void vpx_scaled_2d_neon(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --Require an additional 8 rows for the horiz_w8 transpose tail. // When calling in frame scaling function, the smallest scaling factor is x1/4 // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still // big enough. DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); assert(x_step_q4 <= 64); if (w >= 8) { scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); } else { scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); } if (w >= 16) { scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else if (w == 8) { scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else { scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } } libvpx-1.8.2/vpx_dsp/avg.c000066400000000000000000000321301357355204000154350ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" unsigned int vpx_avg_8x8_c(const uint8_t *s, int p) { int i, j; int sum = 0; for (i = 0; i < 8; ++i, s += p) for (j = 0; j < 8; sum += s[j], ++j) { } return (sum + 32) >> 6; } unsigned int vpx_avg_4x4_c(const uint8_t *s, int p) { int i, j; int sum = 0; for (i = 0; i < 4; ++i, s += p) for (j = 0; j < 4; sum += s[j], ++j) { } return (sum + 8) >> 4; } #if CONFIG_VP9_HIGHBITDEPTH // src_diff: 13 bit, dynamic range [-4095, 4095] // coeff: 16 bit static void hadamard_highbd_col8_first_pass(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) { int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; int16_t c0 = b0 + b2; int16_t c1 = b1 + b3; int16_t c2 = b0 - b2; int16_t c3 = b1 - b3; int16_t c4 = b4 + b6; int16_t c5 = b5 + b7; int16_t c6 = b4 - b6; int16_t c7 = b5 - b7; coeff[0] = c0 + c4; coeff[7] = c1 + c5; coeff[3] = c2 + c6; coeff[4] = c3 + c7; coeff[2] = c0 - c4; coeff[6] = c1 - c5; coeff[1] = c2 - c6; coeff[5] = c3 - c7; } // src_diff: 16 bit, dynamic range [-32760, 32760] // coeff: 19 bit static void hadamard_highbd_col8_second_pass(const int16_t *src_diff, ptrdiff_t src_stride, int32_t *coeff) { int32_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; int32_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; int32_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; int32_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; int32_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; int32_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; int32_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; int32_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; int32_t c0 = b0 + b2; int32_t c1 = b1 + b3; int32_t c2 = b0 - b2; int32_t c3 = b1 - b3; int32_t c4 = b4 + b6; int32_t c5 = b5 + b7; int32_t c6 = b4 - b6; int32_t c7 = b5 - b7; coeff[0] = c0 + c4; coeff[7] = c1 + c5; coeff[3] = c2 + c6; coeff[4] = c3 + c7; coeff[2] = c0 - c4; coeff[6] = c1 - c5; coeff[1] = c2 - c6; coeff[5] = c3 - c7; } // The order of the output coeff of the hadamard is not important. For // optimization purposes the final transpose may be skipped. void vpx_highbd_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; int16_t buffer[64]; int32_t buffer2[64]; int16_t *tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { // src_diff: 13 bit // buffer: 16 bit, dynamic range [-32760, 32760] hadamard_highbd_col8_first_pass(src_diff, src_stride, tmp_buf); tmp_buf += 8; ++src_diff; } tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { // buffer: 16 bit // buffer2: 19 bit, dynamic range [-262080, 262080] hadamard_highbd_col8_second_pass(tmp_buf, 8, buffer2 + 8 * idx); ++tmp_buf; } for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; } // In place 16x16 2D Hadamard transform void vpx_highbd_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 13 bit, dynamic range [-4095, 4095] const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; vpx_highbd_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); } // coeff: 19 bit, dynamic range [-262080, 262080] for (idx = 0; idx < 64; ++idx) { tran_low_t a0 = coeff[0]; tran_low_t a1 = coeff[64]; tran_low_t a2 = coeff[128]; tran_low_t a3 = coeff[192]; tran_low_t b0 = (a0 + a1) >> 1; tran_low_t b1 = (a0 - a1) >> 1; tran_low_t b2 = (a2 + a3) >> 1; tran_low_t b3 = (a2 - a3) >> 1; // new coeff dynamic range: 20 bit coeff[0] = b0 + b2; coeff[64] = b1 + b3; coeff[128] = b0 - b2; coeff[192] = b1 - b3; ++coeff; } } void vpx_highbd_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 13 bit, dynamic range [-4095, 4095] const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; vpx_highbd_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); } // coeff: 20 bit for (idx = 0; idx < 256; ++idx) { tran_low_t a0 = coeff[0]; tran_low_t a1 = coeff[256]; tran_low_t a2 = coeff[512]; tran_low_t a3 = coeff[768]; tran_low_t b0 = (a0 + a1) >> 2; tran_low_t b1 = (a0 - a1) >> 2; tran_low_t b2 = (a2 + a3) >> 2; tran_low_t b3 = (a2 - a3) >> 2; // new coeff dynamic range: 20 bit coeff[0] = b0 + b2; coeff[256] = b1 + b3; coeff[512] = b0 - b2; coeff[768] = b1 - b3; ++coeff; } } #endif // CONFIG_VP9_HIGHBITDEPTH // src_diff: first pass, 9 bit, dynamic range [-255, 255] // second pass, 12 bit, dynamic range [-2040, 2040] static void hadamard_col8(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) { int16_t b0 = src_diff[0 * src_stride] + src_diff[1 * src_stride]; int16_t b1 = src_diff[0 * src_stride] - src_diff[1 * src_stride]; int16_t b2 = src_diff[2 * src_stride] + src_diff[3 * src_stride]; int16_t b3 = src_diff[2 * src_stride] - src_diff[3 * src_stride]; int16_t b4 = src_diff[4 * src_stride] + src_diff[5 * src_stride]; int16_t b5 = src_diff[4 * src_stride] - src_diff[5 * src_stride]; int16_t b6 = src_diff[6 * src_stride] + src_diff[7 * src_stride]; int16_t b7 = src_diff[6 * src_stride] - src_diff[7 * src_stride]; int16_t c0 = b0 + b2; int16_t c1 = b1 + b3; int16_t c2 = b0 - b2; int16_t c3 = b1 - b3; int16_t c4 = b4 + b6; int16_t c5 = b5 + b7; int16_t c6 = b4 - b6; int16_t c7 = b5 - b7; coeff[0] = c0 + c4; coeff[7] = c1 + c5; coeff[3] = c2 + c6; coeff[4] = c3 + c7; coeff[2] = c0 - c4; coeff[6] = c1 - c5; coeff[1] = c2 - c6; coeff[5] = c3 - c7; } // The order of the output coeff of the hadamard is not important. For // optimization purposes the final transpose may be skipped. void vpx_hadamard_8x8_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; int16_t buffer[64]; int16_t buffer2[64]; int16_t *tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { hadamard_col8(src_diff, src_stride, tmp_buf); // src_diff: 9 bit // dynamic range [-255, 255] tmp_buf += 8; ++src_diff; } tmp_buf = &buffer[0]; for (idx = 0; idx < 8; ++idx) { hadamard_col8(tmp_buf, 8, buffer2 + 8 * idx); // tmp_buf: 12 bit // dynamic range [-2040, 2040] // buffer2: 15 bit // dynamic range [-16320, 16320] ++tmp_buf; } for (idx = 0; idx < 64; ++idx) coeff[idx] = (tran_low_t)buffer2[idx]; } // In place 16x16 2D Hadamard transform void vpx_hadamard_16x16_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; vpx_hadamard_8x8_c(src_ptr, src_stride, coeff + idx * 64); } // coeff: 15 bit, dynamic range [-16320, 16320] for (idx = 0; idx < 64; ++idx) { tran_low_t a0 = coeff[0]; tran_low_t a1 = coeff[64]; tran_low_t a2 = coeff[128]; tran_low_t a3 = coeff[192]; tran_low_t b0 = (a0 + a1) >> 1; // (a0 + a1): 16 bit, [-32640, 32640] tran_low_t b1 = (a0 - a1) >> 1; // b0-b3: 15 bit, dynamic range tran_low_t b2 = (a2 + a3) >> 1; // [-16320, 16320] tran_low_t b3 = (a2 - a3) >> 1; coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] coeff[64] = b1 + b3; coeff[128] = b0 - b2; coeff[192] = b1 - b3; ++coeff; } } void vpx_hadamard_32x32_c(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; vpx_hadamard_16x16_c(src_ptr, src_stride, coeff + idx * 256); } // coeff: 15 bit, dynamic range [-16320, 16320] for (idx = 0; idx < 256; ++idx) { tran_low_t a0 = coeff[0]; tran_low_t a1 = coeff[256]; tran_low_t a2 = coeff[512]; tran_low_t a3 = coeff[768]; tran_low_t b0 = (a0 + a1) >> 2; // (a0 + a1): 16 bit, [-32640, 32640] tran_low_t b1 = (a0 - a1) >> 2; // b0-b3: 15 bit, dynamic range tran_low_t b2 = (a2 + a3) >> 2; // [-16320, 16320] tran_low_t b3 = (a2 - a3) >> 2; coeff[0] = b0 + b2; // 16 bit, [-32640, 32640] coeff[256] = b1 + b3; coeff[512] = b0 - b2; coeff[768] = b1 - b3; ++coeff; } } #if CONFIG_VP9_HIGHBITDEPTH // coeff: dynamic range 20 bit. // length: value range {16, 64, 256, 1024}. int vpx_highbd_satd_c(const tran_low_t *coeff, int length) { int i; int satd = 0; for (i = 0; i < length; ++i) satd += abs(coeff[i]); // satd: 30 bits return satd; } #endif // CONFIG_VP9_HIGHBITDEPTH // coeff: 16 bits, dynamic range [-32640, 32640]. // length: value range {16, 64, 256, 1024}. int vpx_satd_c(const tran_low_t *coeff, int length) { int i; int satd = 0; for (i = 0; i < length; ++i) satd += abs(coeff[i]); // satd: 26 bits, dynamic range [-32640 * 1024, 32640 * 1024] return satd; } // Integer projection onto row vectors. // height: value range {16, 32, 64}. void vpx_int_pro_row_c(int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height) { int idx; const int norm_factor = height >> 1; for (idx = 0; idx < 16; ++idx) { int i; hbuf[idx] = 0; // hbuf[idx]: 14 bit, dynamic range [0, 16320]. for (i = 0; i < height; ++i) hbuf[idx] += ref[i * ref_stride]; // hbuf[idx]: 9 bit, dynamic range [0, 510]. hbuf[idx] /= norm_factor; ++ref; } } // width: value range {16, 32, 64}. int16_t vpx_int_pro_col_c(const uint8_t *ref, const int width) { int idx; int16_t sum = 0; // sum: 14 bit, dynamic range [0, 16320] for (idx = 0; idx < width; ++idx) sum += ref[idx]; return sum; } // ref: [0 - 510] // src: [0 - 510] // bwl: {2, 3, 4} int vpx_vector_var_c(const int16_t *ref, const int16_t *src, const int bwl) { int i; int width = 4 << bwl; int sse = 0, mean = 0, var; for (i = 0; i < width; ++i) { int diff = ref[i] - src[i]; // diff: dynamic range [-510, 510], 10 bits. mean += diff; // mean: dynamic range 16 bits. sse += diff * diff; // sse: dynamic range 26 bits. } // (mean * mean): dynamic range 31 bits. var = sse - ((mean * mean) >> (bwl + 2)); return var; } void vpx_minmax_8x8_c(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max) { int i, j; *min = 255; *max = 0; for (i = 0; i < 8; ++i, s += p, d += dp) { for (j = 0; j < 8; ++j) { int diff = abs(s[j] - d[j]); *min = diff < *min ? diff : *min; *max = diff > *max ? diff : *max; } } } #if CONFIG_VP9_HIGHBITDEPTH unsigned int vpx_highbd_avg_8x8_c(const uint8_t *s8, int p) { int i, j; int sum = 0; const uint16_t *s = CONVERT_TO_SHORTPTR(s8); for (i = 0; i < 8; ++i, s += p) for (j = 0; j < 8; sum += s[j], ++j) { } return (sum + 32) >> 6; } unsigned int vpx_highbd_avg_4x4_c(const uint8_t *s8, int p) { int i, j; int sum = 0; const uint16_t *s = CONVERT_TO_SHORTPTR(s8); for (i = 0; i < 4; ++i, s += p) for (j = 0; j < 4; sum += s[j], ++j) { } return (sum + 8) >> 4; } void vpx_highbd_minmax_8x8_c(const uint8_t *s8, int p, const uint8_t *d8, int dp, int *min, int *max) { int i, j; const uint16_t *s = CONVERT_TO_SHORTPTR(s8); const uint16_t *d = CONVERT_TO_SHORTPTR(d8); *min = 255; *max = 0; for (i = 0; i < 8; ++i, s += p, d += dp) { for (j = 0; j < 8; ++j) { int diff = abs(s[j] - d[j]); *min = diff < *min ? diff : *min; *max = diff > *max ? diff : *max; } } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/bitreader.c000066400000000000000000000060021357355204000166200ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "vpx_dsp/bitreader.h" #include "vpx_dsp/prob.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #include "vpx_mem/vpx_mem.h" #include "vpx_util/endian_inl.h" int vpx_reader_init(vpx_reader *r, const uint8_t *buffer, size_t size, vpx_decrypt_cb decrypt_cb, void *decrypt_state) { if (size && !buffer) { return 1; } else { r->buffer_end = buffer + size; r->buffer = buffer; r->value = 0; r->count = -8; r->range = 255; r->decrypt_cb = decrypt_cb; r->decrypt_state = decrypt_state; vpx_reader_fill(r); return vpx_read_bit(r) != 0; // marker bit } } void vpx_reader_fill(vpx_reader *r) { const uint8_t *const buffer_end = r->buffer_end; const uint8_t *buffer = r->buffer; const uint8_t *buffer_start = buffer; BD_VALUE value = r->value; int count = r->count; const size_t bytes_left = buffer_end - buffer; const size_t bits_left = bytes_left * CHAR_BIT; int shift = BD_VALUE_SIZE - CHAR_BIT - (count + CHAR_BIT); if (r->decrypt_cb) { size_t n = VPXMIN(sizeof(r->clear_buffer), bytes_left); r->decrypt_cb(r->decrypt_state, buffer, r->clear_buffer, (int)n); buffer = r->clear_buffer; buffer_start = r->clear_buffer; } if (bits_left > BD_VALUE_SIZE) { const int bits = (shift & 0xfffffff8) + CHAR_BIT; BD_VALUE nv; BD_VALUE big_endian_values; memcpy(&big_endian_values, buffer, sizeof(BD_VALUE)); #if SIZE_MAX == 0xffffffffffffffffULL big_endian_values = HToBE64(big_endian_values); #else big_endian_values = HToBE32(big_endian_values); #endif nv = big_endian_values >> (BD_VALUE_SIZE - bits); count += bits; buffer += (bits >> 3); value = r->value | (nv << (shift & 0x7)); } else { const int bits_over = (int)(shift + CHAR_BIT - (int)bits_left); int loop_end = 0; if (bits_over >= 0) { count += LOTS_OF_BITS; loop_end = bits_over; } if (bits_over < 0 || bits_left) { while (shift >= loop_end) { count += CHAR_BIT; value |= (BD_VALUE)*buffer++ << shift; shift -= CHAR_BIT; } } } // NOTE: Variable 'buffer' may not relate to 'r->buffer' after decryption, // so we increase 'r->buffer' by the amount that 'buffer' moved, rather than // assign 'buffer' to 'r->buffer'. r->buffer += buffer - buffer_start; r->value = value; r->count = count; } const uint8_t *vpx_reader_find_end(vpx_reader *r) { // Find the end of the coded buffer while (r->count > CHAR_BIT && r->count < BD_VALUE_SIZE) { r->count -= CHAR_BIT; r->buffer--; } return r->buffer; } libvpx-1.8.2/vpx_dsp/bitreader.h000066400000000000000000000107031357355204000166300ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_BITREADER_H_ #define VPX_VPX_DSP_BITREADER_H_ #include #include #include #include "./vpx_config.h" #include "vpx_ports/mem.h" #include "vpx/vp8dx.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/prob.h" #if CONFIG_BITSTREAM_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG #ifdef __cplusplus extern "C" { #endif typedef size_t BD_VALUE; #define BD_VALUE_SIZE ((int)sizeof(BD_VALUE) * CHAR_BIT) // This is meant to be a large, positive constant that can still be efficiently // loaded as an immediate (on platforms like ARM, for example). // Even relatively modest values like 100 would work fine. #define LOTS_OF_BITS 0x40000000 typedef struct { // Be careful when reordering this struct, it may impact the cache negatively. BD_VALUE value; unsigned int range; int count; const uint8_t *buffer_end; const uint8_t *buffer; vpx_decrypt_cb decrypt_cb; void *decrypt_state; uint8_t clear_buffer[sizeof(BD_VALUE) + 1]; } vpx_reader; int vpx_reader_init(vpx_reader *r, const uint8_t *buffer, size_t size, vpx_decrypt_cb decrypt_cb, void *decrypt_state); void vpx_reader_fill(vpx_reader *r); const uint8_t *vpx_reader_find_end(vpx_reader *r); static INLINE int vpx_reader_has_error(vpx_reader *r) { // Check if we have reached the end of the buffer. // // Variable 'count' stores the number of bits in the 'value' buffer, minus // 8. The top byte is part of the algorithm, and the remainder is buffered // to be shifted into it. So if count == 8, the top 16 bits of 'value' are // occupied, 8 for the algorithm and 8 in the buffer. // // When reading a byte from the user's buffer, count is filled with 8 and // one byte is filled into the value buffer. When we reach the end of the // data, count is additionally filled with LOTS_OF_BITS. So when // count == LOTS_OF_BITS - 1, the user's data has been exhausted. // // 1 if we have tried to decode bits after the end of stream was encountered. // 0 No error. return r->count > BD_VALUE_SIZE && r->count < LOTS_OF_BITS; } static INLINE int vpx_read(vpx_reader *r, int prob) { unsigned int bit = 0; BD_VALUE value; BD_VALUE bigsplit; int count; unsigned int range; unsigned int split = (r->range * prob + (256 - prob)) >> CHAR_BIT; if (r->count < 0) vpx_reader_fill(r); value = r->value; count = r->count; bigsplit = (BD_VALUE)split << (BD_VALUE_SIZE - CHAR_BIT); range = split; if (value >= bigsplit) { range = r->range - split; value = value - bigsplit; bit = 1; } { const unsigned char shift = vpx_norm[(unsigned char)range]; range <<= shift; value <<= shift; count -= shift; } r->value = value; r->count = count; r->range = range; #if CONFIG_BITSTREAM_DEBUG { const int queue_r = bitstream_queue_get_read(); const int frame_idx = bitstream_queue_get_frame_read(); int ref_result, ref_prob; bitstream_queue_pop(&ref_result, &ref_prob); if ((int)bit != ref_result) { fprintf(stderr, "\n *** [bit] result error, frame_idx_r %d bit %d ref_result %d " "queue_r %d\n", frame_idx, bit, ref_result, queue_r); assert(0); } if (prob != ref_prob) { fprintf(stderr, "\n *** [bit] prob error, frame_idx_r %d prob %d ref_prob %d " "queue_r %d\n", frame_idx, prob, ref_prob, queue_r); assert(0); } } #endif return bit; } static INLINE int vpx_read_bit(vpx_reader *r) { return vpx_read(r, 128); // vpx_prob_half } static INLINE int vpx_read_literal(vpx_reader *r, int bits) { int literal = 0, bit; for (bit = bits - 1; bit >= 0; bit--) literal |= vpx_read_bit(r) << bit; return literal; } static INLINE int vpx_read_tree(vpx_reader *r, const vpx_tree_index *tree, const vpx_prob *probs) { vpx_tree_index i = 0; while ((i = tree[i + vpx_read(r, probs[i >> 1])]) > 0) continue; return -i; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_BITREADER_H_ libvpx-1.8.2/vpx_dsp/bitreader_buffer.c000066400000000000000000000026771357355204000201670ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./bitreader_buffer.h" size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb) { return (rb->bit_offset + 7) >> 3; } int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb) { const size_t off = rb->bit_offset; const size_t p = off >> 3; const int q = 7 - (int)(off & 0x7); if (rb->bit_buffer + p < rb->bit_buffer_end) { const int bit = (rb->bit_buffer[p] >> q) & 1; rb->bit_offset = off + 1; return bit; } else { if (rb->error_handler != NULL) rb->error_handler(rb->error_handler_data); return 0; } } int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits) { int value = 0, bit; for (bit = bits - 1; bit >= 0; bit--) value |= vpx_rb_read_bit(rb) << bit; return value; } int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits) { const int value = vpx_rb_read_literal(rb, bits); return vpx_rb_read_bit(rb) ? -value : value; } int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits) { return vpx_rb_read_signed_literal(rb, bits); } libvpx-1.8.2/vpx_dsp/bitreader_buffer.h000066400000000000000000000023301357355204000201560ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_BITREADER_BUFFER_H_ #define VPX_VPX_DSP_BITREADER_BUFFER_H_ #include #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef void (*vpx_rb_error_handler)(void *data); struct vpx_read_bit_buffer { const uint8_t *bit_buffer; const uint8_t *bit_buffer_end; size_t bit_offset; void *error_handler_data; vpx_rb_error_handler error_handler; }; size_t vpx_rb_bytes_read(struct vpx_read_bit_buffer *rb); int vpx_rb_read_bit(struct vpx_read_bit_buffer *rb); int vpx_rb_read_literal(struct vpx_read_bit_buffer *rb, int bits); int vpx_rb_read_signed_literal(struct vpx_read_bit_buffer *rb, int bits); int vpx_rb_read_inv_signed_literal(struct vpx_read_bit_buffer *rb, int bits); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_BITREADER_BUFFER_H_ libvpx-1.8.2/vpx_dsp/bitwriter.c000066400000000000000000000021141357355204000166720ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./bitwriter.h" #if CONFIG_BITSTREAM_DEBUG #include "vpx_util/vpx_debug_util.h" #endif void vpx_start_encode(vpx_writer *br, uint8_t *source) { br->lowvalue = 0; br->range = 255; br->count = -24; br->buffer = source; br->pos = 0; vpx_write_bit(br, 0); } void vpx_stop_encode(vpx_writer *br) { int i; #if CONFIG_BITSTREAM_DEBUG bitstream_queue_set_skip_write(1); #endif for (i = 0; i < 32; i++) vpx_write_bit(br, 0); // Ensure there's no ambigous collision with any index marker bytes if ((br->buffer[br->pos - 1] & 0xe0) == 0xc0) br->buffer[br->pos++] = 0; #if CONFIG_BITSTREAM_DEBUG bitstream_queue_set_skip_write(0); #endif } libvpx-1.8.2/vpx_dsp/bitwriter.h000066400000000000000000000051761357355204000167120ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_BITWRITER_H_ #define VPX_VPX_DSP_BITWRITER_H_ #include #include "vpx_ports/mem.h" #include "vpx_dsp/prob.h" #if CONFIG_BITSTREAM_DEBUG #include "vpx_util/vpx_debug_util.h" #endif // CONFIG_BITSTREAM_DEBUG #ifdef __cplusplus extern "C" { #endif typedef struct vpx_writer { unsigned int lowvalue; unsigned int range; int count; unsigned int pos; uint8_t *buffer; } vpx_writer; void vpx_start_encode(vpx_writer *br, uint8_t *source); void vpx_stop_encode(vpx_writer *br); static INLINE void vpx_write(vpx_writer *br, int bit, int probability) { unsigned int split; int count = br->count; unsigned int range = br->range; unsigned int lowvalue = br->lowvalue; int shift; #if CONFIG_BITSTREAM_DEBUG /* int queue_r = 0; int frame_idx_r = 0; int queue_w = bitstream_queue_get_write(); int frame_idx_w = bitstream_queue_get_frame_write(); if (frame_idx_w == frame_idx_r && queue_w == queue_r) { fprintf(stderr, "\n *** bitstream queue at frame_idx_w %d queue_w %d\n", frame_idx_w, queue_w); assert(0); } */ bitstream_queue_push(bit, probability); #endif split = 1 + (((range - 1) * probability) >> 8); range = split; if (bit) { lowvalue += split; range = br->range - split; } shift = vpx_norm[range]; range <<= shift; count += shift; if (count >= 0) { int offset = shift - count; if ((lowvalue << (offset - 1)) & 0x80000000) { int x = br->pos - 1; while (x >= 0 && br->buffer[x] == 0xff) { br->buffer[x] = 0; x--; } br->buffer[x] += 1; } br->buffer[br->pos++] = (lowvalue >> (24 - offset)) & 0xff; lowvalue <<= offset; shift = count; lowvalue &= 0xffffff; count -= 8; } lowvalue <<= shift; br->count = count; br->lowvalue = lowvalue; br->range = range; } static INLINE void vpx_write_bit(vpx_writer *w, int bit) { vpx_write(w, bit, 128); // vpx_prob_half } static INLINE void vpx_write_literal(vpx_writer *w, int data, int bits) { int bit; for (bit = bits - 1; bit >= 0; bit--) vpx_write_bit(w, 1 & (data >> bit)); } #define vpx_write_prob(w, v) vpx_write_literal((w), (v), 8) #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_BITWRITER_H_ libvpx-1.8.2/vpx_dsp/bitwriter_buffer.c000066400000000000000000000025721357355204000202330ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./bitwriter_buffer.h" size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb) { return wb->bit_offset / CHAR_BIT + (wb->bit_offset % CHAR_BIT > 0); } void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit) { const int off = (int)wb->bit_offset; const int p = off / CHAR_BIT; const int q = CHAR_BIT - 1 - off % CHAR_BIT; if (q == CHAR_BIT - 1) { wb->bit_buffer[p] = bit << q; } else { wb->bit_buffer[p] &= ~(1 << q); wb->bit_buffer[p] |= bit << q; } wb->bit_offset = off + 1; } void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits) { int bit; for (bit = bits - 1; bit >= 0; bit--) vpx_wb_write_bit(wb, (data >> bit) & 1); } void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data, int bits) { vpx_wb_write_literal(wb, abs(data), bits); vpx_wb_write_bit(wb, data < 0); } libvpx-1.8.2/vpx_dsp/bitwriter_buffer.h000066400000000000000000000020611357355204000202310ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_BITWRITER_BUFFER_H_ #define VPX_VPX_DSP_BITWRITER_BUFFER_H_ #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif struct vpx_write_bit_buffer { uint8_t *bit_buffer; size_t bit_offset; }; size_t vpx_wb_bytes_written(const struct vpx_write_bit_buffer *wb); void vpx_wb_write_bit(struct vpx_write_bit_buffer *wb, int bit); void vpx_wb_write_literal(struct vpx_write_bit_buffer *wb, int data, int bits); void vpx_wb_write_inv_signed_literal(struct vpx_write_bit_buffer *wb, int data, int bits); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_BITWRITER_BUFFER_H_ libvpx-1.8.2/vpx_dsp/deblock.c000066400000000000000000000144561357355204000162760ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" const int16_t vpx_rv[] = { 8, 5, 2, 2, 8, 12, 4, 9, 8, 3, 0, 3, 9, 0, 0, 0, 8, 3, 14, 4, 10, 1, 11, 14, 1, 14, 9, 6, 12, 11, 8, 6, 10, 0, 0, 8, 9, 0, 3, 14, 8, 11, 13, 4, 2, 9, 0, 3, 9, 6, 1, 2, 3, 14, 13, 1, 8, 2, 9, 7, 3, 3, 1, 13, 13, 6, 6, 5, 2, 7, 11, 9, 11, 8, 7, 3, 2, 0, 13, 13, 14, 4, 12, 5, 12, 10, 8, 10, 13, 10, 4, 14, 4, 10, 0, 8, 11, 1, 13, 7, 7, 14, 6, 14, 13, 2, 13, 5, 4, 4, 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, 0, 10, 0, 5, 13, 2, 12, 7, 11, 13, 8, 0, 4, 10, 7, 2, 7, 2, 2, 5, 3, 4, 7, 3, 3, 14, 14, 5, 9, 13, 3, 14, 3, 6, 3, 0, 11, 8, 13, 1, 13, 1, 12, 0, 10, 9, 7, 6, 2, 8, 5, 2, 13, 7, 1, 13, 14, 7, 6, 7, 9, 6, 10, 11, 7, 8, 7, 5, 14, 8, 4, 4, 0, 8, 7, 10, 0, 8, 14, 11, 3, 12, 5, 7, 14, 3, 14, 5, 2, 6, 11, 12, 12, 8, 0, 11, 13, 1, 2, 0, 5, 10, 14, 7, 8, 0, 4, 11, 0, 8, 0, 3, 10, 5, 8, 0, 11, 6, 7, 8, 10, 7, 13, 9, 2, 5, 1, 5, 10, 2, 4, 3, 5, 6, 10, 8, 9, 4, 11, 14, 3, 8, 3, 7, 8, 5, 11, 4, 12, 3, 11, 9, 14, 8, 14, 13, 4, 3, 1, 2, 14, 6, 5, 4, 4, 11, 4, 6, 2, 1, 5, 8, 8, 12, 13, 5, 14, 10, 12, 13, 0, 9, 5, 5, 11, 10, 13, 9, 10, 13, }; void vpx_post_proc_down_and_across_mb_row_c(unsigned char *src, unsigned char *dst, int src_pitch, int dst_pitch, int cols, unsigned char *flimits, int size) { unsigned char *p_src, *p_dst; int row; int col; unsigned char v; unsigned char d[4]; assert(size >= 8); assert(cols >= 8); for (row = 0; row < size; row++) { /* post_proc_down for one row */ p_src = src; p_dst = dst; for (col = 0; col < cols; col++) { unsigned char p_above2 = p_src[col - 2 * src_pitch]; unsigned char p_above1 = p_src[col - src_pitch]; unsigned char p_below1 = p_src[col + src_pitch]; unsigned char p_below2 = p_src[col + 2 * src_pitch]; v = p_src[col]; if ((abs(v - p_above2) < flimits[col]) && (abs(v - p_above1) < flimits[col]) && (abs(v - p_below1) < flimits[col]) && (abs(v - p_below2) < flimits[col])) { unsigned char k1, k2, k3; k1 = (p_above2 + p_above1 + 1) >> 1; k2 = (p_below2 + p_below1 + 1) >> 1; k3 = (k1 + k2 + 1) >> 1; v = (k3 + v + 1) >> 1; } p_dst[col] = v; } /* now post_proc_across */ p_src = dst; p_dst = dst; p_src[-2] = p_src[-1] = p_src[0]; p_src[cols] = p_src[cols + 1] = p_src[cols - 1]; for (col = 0; col < cols; col++) { v = p_src[col]; if ((abs(v - p_src[col - 2]) < flimits[col]) && (abs(v - p_src[col - 1]) < flimits[col]) && (abs(v - p_src[col + 1]) < flimits[col]) && (abs(v - p_src[col + 2]) < flimits[col])) { unsigned char k1, k2, k3; k1 = (p_src[col - 2] + p_src[col - 1] + 1) >> 1; k2 = (p_src[col + 2] + p_src[col + 1] + 1) >> 1; k3 = (k1 + k2 + 1) >> 1; v = (k3 + v + 1) >> 1; } d[col & 3] = v; if (col >= 2) p_dst[col - 2] = d[(col - 2) & 3]; } /* handle the last two pixels */ p_dst[col - 2] = d[(col - 2) & 3]; p_dst[col - 1] = d[(col - 1) & 3]; /* next row */ src += src_pitch; dst += dst_pitch; } } void vpx_mbpost_proc_across_ip_c(unsigned char *src, int pitch, int rows, int cols, int flimit) { int r, c, i; unsigned char *s = src; unsigned char d[16]; for (r = 0; r < rows; r++) { int sumsq = 16; int sum = 0; for (i = -8; i < 0; i++) s[i] = s[0]; /* 17 avoids valgrind warning - we buffer values in c in d * and only write them when we've read 8 ahead... */ for (i = 0; i < 17; i++) s[i + cols] = s[cols - 1]; for (i = -8; i <= 6; i++) { sumsq += s[i] * s[i]; sum += s[i]; d[i + 8] = 0; } for (c = 0; c < cols + 8; c++) { int x = s[c + 7] - s[c - 8]; int y = s[c + 7] + s[c - 8]; sum += x; sumsq += x * y; d[c & 15] = s[c]; if (sumsq * 15 - sum * sum < flimit) { d[c & 15] = (8 + sum + s[c]) >> 4; } s[c - 8] = d[(c - 8) & 15]; } s += pitch; } } void vpx_mbpost_proc_down_c(unsigned char *dst, int pitch, int rows, int cols, int flimit) { int r, c, i; for (c = 0; c < cols; c++) { unsigned char *s = &dst[c]; int sumsq = 0; int sum = 0; unsigned char d[16]; for (i = -8; i < 0; i++) s[i * pitch] = s[0]; /* 17 avoids valgrind warning - we buffer values in c in d * and only write them when we've read 8 ahead... */ for (i = 0; i < 17; i++) s[(i + rows) * pitch] = s[(rows - 1) * pitch]; for (i = -8; i <= 6; i++) { sumsq += s[i * pitch] * s[i * pitch]; sum += s[i * pitch]; } for (r = 0; r < rows + 8; r++) { sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * pitch]; sum += s[7 * pitch] - s[-8 * pitch]; d[r & 15] = s[0]; if (sumsq * 15 - sum * sum < flimit) { d[r & 15] = (vpx_rv[(r & 127) + (c & 7)] + sum + s[0]) >> 4; } if (r >= 8) s[-8 * pitch] = d[(r - 8) & 15]; s += pitch; } } } libvpx-1.8.2/vpx_dsp/fastssim.c000066400000000000000000000402011357355204000165070ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * This code was originally written by: Nathan E. Egge, at the Daala * project. */ #include #include #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/system_state.h" typedef struct fs_level fs_level; typedef struct fs_ctx fs_ctx; #define SSIM_C1 (255 * 255 * 0.01 * 0.01) #define SSIM_C2 (255 * 255 * 0.03 * 0.03) #if CONFIG_VP9_HIGHBITDEPTH #define SSIM_C1_10 (1023 * 1023 * 0.01 * 0.01) #define SSIM_C1_12 (4095 * 4095 * 0.01 * 0.01) #define SSIM_C2_10 (1023 * 1023 * 0.03 * 0.03) #define SSIM_C2_12 (4095 * 4095 * 0.03 * 0.03) #endif #define FS_MINI(_a, _b) ((_a) < (_b) ? (_a) : (_b)) #define FS_MAXI(_a, _b) ((_a) > (_b) ? (_a) : (_b)) struct fs_level { uint32_t *im1; uint32_t *im2; double *ssim; int w; int h; }; struct fs_ctx { fs_level *level; int nlevels; unsigned *col_buf; }; static void fs_ctx_init(fs_ctx *_ctx, int _w, int _h, int _nlevels) { unsigned char *data; size_t data_size; int lw; int lh; int l; lw = (_w + 1) >> 1; lh = (_h + 1) >> 1; data_size = _nlevels * sizeof(fs_level) + 2 * (lw + 8) * 8 * sizeof(*_ctx->col_buf); for (l = 0; l < _nlevels; l++) { size_t im_size; size_t level_size; im_size = lw * (size_t)lh; level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); level_size += sizeof(*_ctx->level[l].ssim) - 1; level_size /= sizeof(*_ctx->level[l].ssim); level_size += im_size; level_size *= sizeof(*_ctx->level[l].ssim); data_size += level_size; lw = (lw + 1) >> 1; lh = (lh + 1) >> 1; } data = (unsigned char *)malloc(data_size); _ctx->level = (fs_level *)data; _ctx->nlevels = _nlevels; data += _nlevels * sizeof(*_ctx->level); lw = (_w + 1) >> 1; lh = (_h + 1) >> 1; for (l = 0; l < _nlevels; l++) { size_t im_size; size_t level_size; _ctx->level[l].w = lw; _ctx->level[l].h = lh; im_size = lw * (size_t)lh; level_size = 2 * im_size * sizeof(*_ctx->level[l].im1); level_size += sizeof(*_ctx->level[l].ssim) - 1; level_size /= sizeof(*_ctx->level[l].ssim); level_size *= sizeof(*_ctx->level[l].ssim); _ctx->level[l].im1 = (uint32_t *)data; _ctx->level[l].im2 = _ctx->level[l].im1 + im_size; data += level_size; _ctx->level[l].ssim = (double *)data; data += im_size * sizeof(*_ctx->level[l].ssim); lw = (lw + 1) >> 1; lh = (lh + 1) >> 1; } _ctx->col_buf = (unsigned *)data; } static void fs_ctx_clear(fs_ctx *_ctx) { free(_ctx->level); } static void fs_downsample_level(fs_ctx *_ctx, int _l) { const uint32_t *src1; const uint32_t *src2; uint32_t *dst1; uint32_t *dst2; int w2; int h2; int w; int h; int i; int j; w = _ctx->level[_l].w; h = _ctx->level[_l].h; dst1 = _ctx->level[_l].im1; dst2 = _ctx->level[_l].im2; w2 = _ctx->level[_l - 1].w; h2 = _ctx->level[_l - 1].h; src1 = _ctx->level[_l - 1].im1; src2 = _ctx->level[_l - 1].im2; for (j = 0; j < h; j++) { int j0offs; int j1offs; j0offs = 2 * j * w2; j1offs = FS_MINI(2 * j + 1, h2) * w2; for (i = 0; i < w; i++) { int i0; int i1; i0 = 2 * i; i1 = FS_MINI(i0 + 1, w2); dst1[j * w + i] = (uint32_t)((int64_t)src1[j0offs + i0] + src1[j0offs + i1] + src1[j1offs + i0] + src1[j1offs + i1]); dst2[j * w + i] = (uint32_t)((int64_t)src2[j0offs + i0] + src2[j0offs + i1] + src2[j1offs + i0] + src2[j1offs + i1]); } } } static void fs_downsample_level0(fs_ctx *_ctx, const uint8_t *_src1, int _s1ystride, const uint8_t *_src2, int _s2ystride, int _w, int _h, uint32_t bd, uint32_t shift) { uint32_t *dst1; uint32_t *dst2; int w; int h; int i; int j; w = _ctx->level[0].w; h = _ctx->level[0].h; dst1 = _ctx->level[0].im1; dst2 = _ctx->level[0].im2; for (j = 0; j < h; j++) { int j0; int j1; j0 = 2 * j; j1 = FS_MINI(j0 + 1, _h); for (i = 0; i < w; i++) { int i0; int i1; i0 = 2 * i; i1 = FS_MINI(i0 + 1, _w); if (bd == 8 && shift == 0) { dst1[j * w + i] = _src1[j0 * _s1ystride + i0] + _src1[j0 * _s1ystride + i1] + _src1[j1 * _s1ystride + i0] + _src1[j1 * _s1ystride + i1]; dst2[j * w + i] = _src2[j0 * _s2ystride + i0] + _src2[j0 * _s2ystride + i1] + _src2[j1 * _s2ystride + i0] + _src2[j1 * _s2ystride + i1]; } else { uint16_t *src1s = CONVERT_TO_SHORTPTR(_src1); uint16_t *src2s = CONVERT_TO_SHORTPTR(_src2); dst1[j * w + i] = (src1s[j0 * _s1ystride + i0] >> shift) + (src1s[j0 * _s1ystride + i1] >> shift) + (src1s[j1 * _s1ystride + i0] >> shift) + (src1s[j1 * _s1ystride + i1] >> shift); dst2[j * w + i] = (src2s[j0 * _s2ystride + i0] >> shift) + (src2s[j0 * _s2ystride + i1] >> shift) + (src2s[j1 * _s2ystride + i0] >> shift) + (src2s[j1 * _s2ystride + i1] >> shift); } } } } static void fs_apply_luminance(fs_ctx *_ctx, int _l, int bit_depth) { unsigned *col_sums_x; unsigned *col_sums_y; uint32_t *im1; uint32_t *im2; double *ssim; double c1; int w; int h; int j0offs; int j1offs; int i; int j; double ssim_c1 = SSIM_C1; #if CONFIG_VP9_HIGHBITDEPTH if (bit_depth == 10) ssim_c1 = SSIM_C1_10; if (bit_depth == 12) ssim_c1 = SSIM_C1_12; #else assert(bit_depth == 8); (void)bit_depth; #endif w = _ctx->level[_l].w; h = _ctx->level[_l].h; col_sums_x = _ctx->col_buf; col_sums_y = col_sums_x + w; im1 = _ctx->level[_l].im1; im2 = _ctx->level[_l].im2; for (i = 0; i < w; i++) col_sums_x[i] = 5 * im1[i]; for (i = 0; i < w; i++) col_sums_y[i] = 5 * im2[i]; for (j = 1; j < 4; j++) { j1offs = FS_MINI(j, h - 1) * w; for (i = 0; i < w; i++) col_sums_x[i] += im1[j1offs + i]; for (i = 0; i < w; i++) col_sums_y[i] += im2[j1offs + i]; } ssim = _ctx->level[_l].ssim; c1 = (double)(ssim_c1 * 4096 * (1 << 4 * _l)); for (j = 0; j < h; j++) { int64_t mux; int64_t muy; int i0; int i1; mux = (int64_t)5 * col_sums_x[0]; muy = (int64_t)5 * col_sums_y[0]; for (i = 1; i < 4; i++) { i1 = FS_MINI(i, w - 1); mux += col_sums_x[i1]; muy += col_sums_y[i1]; } for (i = 0; i < w; i++) { ssim[j * w + i] *= (2 * mux * (double)muy + c1) / (mux * (double)mux + muy * (double)muy + c1); if (i + 1 < w) { i0 = FS_MAXI(0, i - 4); i1 = FS_MINI(i + 4, w - 1); mux += (int)col_sums_x[i1] - (int)col_sums_x[i0]; muy += (int)col_sums_x[i1] - (int)col_sums_x[i0]; } } if (j + 1 < h) { j0offs = FS_MAXI(0, j - 4) * w; for (i = 0; i < w; i++) col_sums_x[i] -= im1[j0offs + i]; for (i = 0; i < w; i++) col_sums_y[i] -= im2[j0offs + i]; j1offs = FS_MINI(j + 4, h - 1) * w; for (i = 0; i < w; i++) col_sums_x[i] = (uint32_t)((int64_t)col_sums_x[i] + im1[j1offs + i]); for (i = 0; i < w; i++) col_sums_y[i] = (uint32_t)((int64_t)col_sums_y[i] + im2[j1offs + i]); } } } #define FS_COL_SET(_col, _joffs, _ioffs) \ do { \ unsigned gx; \ unsigned gy; \ gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ col_sums_gx2[(_col)] = gx * (double)gx; \ col_sums_gy2[(_col)] = gy * (double)gy; \ col_sums_gxgy[(_col)] = gx * (double)gy; \ } while (0) #define FS_COL_ADD(_col, _joffs, _ioffs) \ do { \ unsigned gx; \ unsigned gy; \ gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ col_sums_gx2[(_col)] += gx * (double)gx; \ col_sums_gy2[(_col)] += gy * (double)gy; \ col_sums_gxgy[(_col)] += gx * (double)gy; \ } while (0) #define FS_COL_SUB(_col, _joffs, _ioffs) \ do { \ unsigned gx; \ unsigned gy; \ gx = gx_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ gy = gy_buf[((j + (_joffs)) & 7) * stride + i + (_ioffs)]; \ col_sums_gx2[(_col)] -= gx * (double)gx; \ col_sums_gy2[(_col)] -= gy * (double)gy; \ col_sums_gxgy[(_col)] -= gx * (double)gy; \ } while (0) #define FS_COL_COPY(_col1, _col2) \ do { \ col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)]; \ col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)]; \ col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)]; \ } while (0) #define FS_COL_HALVE(_col1, _col2) \ do { \ col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 0.5; \ col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 0.5; \ col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 0.5; \ } while (0) #define FS_COL_DOUBLE(_col1, _col2) \ do { \ col_sums_gx2[(_col1)] = col_sums_gx2[(_col2)] * 2; \ col_sums_gy2[(_col1)] = col_sums_gy2[(_col2)] * 2; \ col_sums_gxgy[(_col1)] = col_sums_gxgy[(_col2)] * 2; \ } while (0) static void fs_calc_structure(fs_ctx *_ctx, int _l, int bit_depth) { uint32_t *im1; uint32_t *im2; unsigned *gx_buf; unsigned *gy_buf; double *ssim; double col_sums_gx2[8]; double col_sums_gy2[8]; double col_sums_gxgy[8]; double c2; int stride; int w; int h; int i; int j; double ssim_c2 = SSIM_C2; #if CONFIG_VP9_HIGHBITDEPTH if (bit_depth == 10) ssim_c2 = SSIM_C2_10; if (bit_depth == 12) ssim_c2 = SSIM_C2_12; #else assert(bit_depth == 8); (void)bit_depth; #endif w = _ctx->level[_l].w; h = _ctx->level[_l].h; im1 = _ctx->level[_l].im1; im2 = _ctx->level[_l].im2; ssim = _ctx->level[_l].ssim; gx_buf = _ctx->col_buf; stride = w + 8; gy_buf = gx_buf + 8 * stride; memset(gx_buf, 0, 2 * 8 * stride * sizeof(*gx_buf)); c2 = ssim_c2 * (1 << 4 * _l) * 16 * 104; for (j = 0; j < h + 4; j++) { if (j < h - 1) { for (i = 0; i < w - 1; i++) { int64_t g1; int64_t g2; int64_t gx; int64_t gy; g1 = labs((int64_t)im1[(j + 1) * w + i + 1] - (int64_t)im1[j * w + i]); g2 = labs((int64_t)im1[(j + 1) * w + i] - (int64_t)im1[j * w + i + 1]); gx = 4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2); g1 = labs((int64_t)im2[(j + 1) * w + i + 1] - (int64_t)im2[j * w + i]); g2 = labs((int64_t)im2[(j + 1) * w + i] - (int64_t)im2[j * w + i + 1]); gy = ((int64_t)4 * FS_MAXI(g1, g2) + FS_MINI(g1, g2)); gx_buf[(j & 7) * stride + i + 4] = (uint32_t)gx; gy_buf[(j & 7) * stride + i + 4] = (uint32_t)gy; } } else { memset(gx_buf + (j & 7) * stride, 0, stride * sizeof(*gx_buf)); memset(gy_buf + (j & 7) * stride, 0, stride * sizeof(*gy_buf)); } if (j >= 4) { int k; col_sums_gx2[3] = col_sums_gx2[2] = col_sums_gx2[1] = col_sums_gx2[0] = 0; col_sums_gy2[3] = col_sums_gy2[2] = col_sums_gy2[1] = col_sums_gy2[0] = 0; col_sums_gxgy[3] = col_sums_gxgy[2] = col_sums_gxgy[1] = col_sums_gxgy[0] = 0; for (i = 4; i < 8; i++) { FS_COL_SET(i, -1, 0); FS_COL_ADD(i, 0, 0); for (k = 1; k < 8 - i; k++) { FS_COL_DOUBLE(i, i); FS_COL_ADD(i, -k - 1, 0); FS_COL_ADD(i, k, 0); } } for (i = 0; i < w; i++) { double mugx2; double mugy2; double mugxgy; mugx2 = col_sums_gx2[0]; for (k = 1; k < 8; k++) mugx2 += col_sums_gx2[k]; mugy2 = col_sums_gy2[0]; for (k = 1; k < 8; k++) mugy2 += col_sums_gy2[k]; mugxgy = col_sums_gxgy[0]; for (k = 1; k < 8; k++) mugxgy += col_sums_gxgy[k]; ssim[(j - 4) * w + i] = (2 * mugxgy + c2) / (mugx2 + mugy2 + c2); if (i + 1 < w) { FS_COL_SET(0, -1, 1); FS_COL_ADD(0, 0, 1); FS_COL_SUB(2, -3, 2); FS_COL_SUB(2, 2, 2); FS_COL_HALVE(1, 2); FS_COL_SUB(3, -4, 3); FS_COL_SUB(3, 3, 3); FS_COL_HALVE(2, 3); FS_COL_COPY(3, 4); FS_COL_DOUBLE(4, 5); FS_COL_ADD(4, -4, 5); FS_COL_ADD(4, 3, 5); FS_COL_DOUBLE(5, 6); FS_COL_ADD(5, -3, 6); FS_COL_ADD(5, 2, 6); FS_COL_DOUBLE(6, 7); FS_COL_ADD(6, -2, 7); FS_COL_ADD(6, 1, 7); FS_COL_SET(7, -1, 8); FS_COL_ADD(7, 0, 8); } } } } } #define FS_NLEVELS (4) /*These weights were derived from the default weights found in Wang's original Matlab implementation: {0.0448, 0.2856, 0.2363, 0.1333}. We drop the finest scale and renormalize the rest to sum to 1.*/ static const double FS_WEIGHTS[FS_NLEVELS] = { 0.2989654541015625, 0.3141326904296875, 0.2473602294921875, 0.1395416259765625 }; static double fs_average(fs_ctx *_ctx, int _l) { double *ssim; double ret; int w; int h; int i; int j; w = _ctx->level[_l].w; h = _ctx->level[_l].h; ssim = _ctx->level[_l].ssim; ret = 0; for (j = 0; j < h; j++) for (i = 0; i < w; i++) ret += ssim[j * w + i]; return pow(ret / (w * h), FS_WEIGHTS[_l]); } static double convert_ssim_db(double _ssim, double _weight) { assert(_weight >= _ssim); if ((_weight - _ssim) < 1e-10) return MAX_SSIM_DB; return 10 * (log10(_weight) - log10(_weight - _ssim)); } static double calc_ssim(const uint8_t *_src, int _systride, const uint8_t *_dst, int _dystride, int _w, int _h, uint32_t _bd, uint32_t _shift) { fs_ctx ctx; double ret; int l; ret = 1; fs_ctx_init(&ctx, _w, _h, FS_NLEVELS); fs_downsample_level0(&ctx, _src, _systride, _dst, _dystride, _w, _h, _bd, _shift); for (l = 0; l < FS_NLEVELS - 1; l++) { fs_calc_structure(&ctx, l, _bd); ret *= fs_average(&ctx, l); fs_downsample_level(&ctx, l + 1); } fs_calc_structure(&ctx, l, _bd); fs_apply_luminance(&ctx, l, _bd); ret *= fs_average(&ctx, l); fs_ctx_clear(&ctx); return ret; } double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *ssim_y, double *ssim_u, double *ssim_v, uint32_t bd, uint32_t in_bd) { double ssimv; uint32_t bd_shift = 0; vpx_clear_system_state(); assert(bd >= in_bd); bd_shift = bd - in_bd; *ssim_y = calc_ssim(source->y_buffer, source->y_stride, dest->y_buffer, dest->y_stride, source->y_crop_width, source->y_crop_height, in_bd, bd_shift); *ssim_u = calc_ssim(source->u_buffer, source->uv_stride, dest->u_buffer, dest->uv_stride, source->uv_crop_width, source->uv_crop_height, in_bd, bd_shift); *ssim_v = calc_ssim(source->v_buffer, source->uv_stride, dest->v_buffer, dest->uv_stride, source->uv_crop_width, source->uv_crop_height, in_bd, bd_shift); ssimv = (*ssim_y) * .8 + .1 * ((*ssim_u) + (*ssim_v)); return convert_ssim_db(ssimv, 1.0); } libvpx-1.8.2/vpx_dsp/fwd_txfm.c000066400000000000000000000752671357355204000165200ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/fwd_txfm.h" void vpx_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, // as the first pass results are transposed, we transpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). int pass; // We need an intermediate buffer between passes. tran_low_t intermediate[4 * 4]; const tran_low_t *in_low = NULL; tran_low_t *out = intermediate; // Do the two transform/transpose passes for (pass = 0; pass < 2; ++pass) { tran_high_t in_high[4]; // canbe16 tran_high_t step[4]; // canbe16 tran_high_t temp1, temp2; // needs32 int i; for (i = 0; i < 4; ++i) { // Load inputs. if (pass == 0) { in_high[0] = input[0 * stride] * 16; in_high[1] = input[1 * stride] * 16; in_high[2] = input[2 * stride] * 16; in_high[3] = input[3 * stride] * 16; if (i == 0 && in_high[0]) { ++in_high[0]; } } else { assert(in_low != NULL); in_high[0] = in_low[0 * 4]; in_high[1] = in_low[1 * 4]; in_high[2] = in_low[2 * 4]; in_high[3] = in_low[3 * 4]; ++in_low; } // Transform. step[0] = in_high[0] + in_high[3]; step[1] = in_high[1] + in_high[2]; step[2] = in_high[1] - in_high[2]; step[3] = in_high[0] - in_high[3]; temp1 = (step[0] + step[1]) * cospi_16_64; temp2 = (step[0] - step[1]) * cospi_16_64; out[0] = (tran_low_t)fdct_round_shift(temp1); out[2] = (tran_low_t)fdct_round_shift(temp2); temp1 = step[2] * cospi_24_64 + step[3] * cospi_8_64; temp2 = -step[2] * cospi_8_64 + step[3] * cospi_24_64; out[1] = (tran_low_t)fdct_round_shift(temp1); out[3] = (tran_low_t)fdct_round_shift(temp2); // Do next column (which is a transposed row in second/horizontal pass) ++input; out += 4; } // Setup in/out for next pass. in_low = intermediate; out = output; } { int i, j; for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) output[j + i * 4] = (output[j + i * 4] + 1) >> 2; } } } void vpx_fdct4x4_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 4; ++r) for (c = 0; c < 4; ++c) sum += input[r * stride + c]; output[0] = sum * 2; } void vpx_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; tran_low_t intermediate[64]; int pass; tran_low_t *out = intermediate; const tran_low_t *in = NULL; // Transform columns for (pass = 0; pass < 2; ++pass) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t t0, t1, t2, t3; // needs32 tran_high_t x0, x1, x2, x3; // canbe16 for (i = 0; i < 8; i++) { // stage 1 if (pass == 0) { s0 = (input[0 * stride] + input[7 * stride]) * 4; s1 = (input[1 * stride] + input[6 * stride]) * 4; s2 = (input[2 * stride] + input[5 * stride]) * 4; s3 = (input[3 * stride] + input[4 * stride]) * 4; s4 = (input[3 * stride] - input[4 * stride]) * 4; s5 = (input[2 * stride] - input[5 * stride]) * 4; s6 = (input[1 * stride] - input[6 * stride]) * 4; s7 = (input[0 * stride] - input[7 * stride]) * 4; ++input; } else { s0 = in[0 * 8] + in[7 * 8]; s1 = in[1 * 8] + in[6 * 8]; s2 = in[2 * 8] + in[5 * 8]; s3 = in[3 * 8] + in[4 * 8]; s4 = in[3 * 8] - in[4 * 8]; s5 = in[2 * 8] - in[5 * 8]; s6 = in[1 * 8] - in[6 * 8]; s7 = in[0 * 8] - in[7 * 8]; ++in; } // fdct4(step, step); x0 = s0 + s3; x1 = s1 + s2; x2 = s1 - s2; x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; t2 = x2 * cospi_24_64 + x3 * cospi_8_64; t3 = -x2 * cospi_8_64 + x3 * cospi_24_64; out[0] = (tran_low_t)fdct_round_shift(t0); out[2] = (tran_low_t)fdct_round_shift(t2); out[4] = (tran_low_t)fdct_round_shift(t1); out[6] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; t2 = fdct_round_shift(t0); t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; x1 = s4 - t2; x2 = s7 - t3; x3 = s7 + t3; // Stage 4 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; out[1] = (tran_low_t)fdct_round_shift(t0); out[3] = (tran_low_t)fdct_round_shift(t2); out[5] = (tran_low_t)fdct_round_shift(t1); out[7] = (tran_low_t)fdct_round_shift(t3); out += 8; } in = intermediate; out = output; } // Rows for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) output[j + i * 8] /= 2; } } void vpx_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; tran_low_t sum = 0; for (r = 0; r < 8; ++r) for (c = 0; c < 8; ++c) sum += input[r * stride + c]; output[0] = sum; } void vpx_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, // as the first pass results are transposed, we transpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). int pass; // We need an intermediate buffer between passes. tran_low_t intermediate[256]; const tran_low_t *in_low = NULL; tran_low_t *out = intermediate; // Do the two transform/transpose passes for (pass = 0; pass < 2; ++pass) { tran_high_t step1[8]; // canbe16 tran_high_t step2[8]; // canbe16 tran_high_t step3[8]; // canbe16 tran_high_t in_high[8]; // canbe16 tran_high_t temp1, temp2; // needs32 int i; for (i = 0; i < 16; i++) { if (0 == pass) { // Calculate input for the first 8 results. in_high[0] = (input[0 * stride] + input[15 * stride]) * 4; in_high[1] = (input[1 * stride] + input[14 * stride]) * 4; in_high[2] = (input[2 * stride] + input[13 * stride]) * 4; in_high[3] = (input[3 * stride] + input[12 * stride]) * 4; in_high[4] = (input[4 * stride] + input[11 * stride]) * 4; in_high[5] = (input[5 * stride] + input[10 * stride]) * 4; in_high[6] = (input[6 * stride] + input[9 * stride]) * 4; in_high[7] = (input[7 * stride] + input[8 * stride]) * 4; // Calculate input for the next 8 results. step1[0] = (input[7 * stride] - input[8 * stride]) * 4; step1[1] = (input[6 * stride] - input[9 * stride]) * 4; step1[2] = (input[5 * stride] - input[10 * stride]) * 4; step1[3] = (input[4 * stride] - input[11 * stride]) * 4; step1[4] = (input[3 * stride] - input[12 * stride]) * 4; step1[5] = (input[2 * stride] - input[13 * stride]) * 4; step1[6] = (input[1 * stride] - input[14 * stride]) * 4; step1[7] = (input[0 * stride] - input[15 * stride]) * 4; } else { // Calculate input for the first 8 results. assert(in_low != NULL); in_high[0] = ((in_low[0 * 16] + 1) >> 2) + ((in_low[15 * 16] + 1) >> 2); in_high[1] = ((in_low[1 * 16] + 1) >> 2) + ((in_low[14 * 16] + 1) >> 2); in_high[2] = ((in_low[2 * 16] + 1) >> 2) + ((in_low[13 * 16] + 1) >> 2); in_high[3] = ((in_low[3 * 16] + 1) >> 2) + ((in_low[12 * 16] + 1) >> 2); in_high[4] = ((in_low[4 * 16] + 1) >> 2) + ((in_low[11 * 16] + 1) >> 2); in_high[5] = ((in_low[5 * 16] + 1) >> 2) + ((in_low[10 * 16] + 1) >> 2); in_high[6] = ((in_low[6 * 16] + 1) >> 2) + ((in_low[9 * 16] + 1) >> 2); in_high[7] = ((in_low[7 * 16] + 1) >> 2) + ((in_low[8 * 16] + 1) >> 2); // Calculate input for the next 8 results. step1[0] = ((in_low[7 * 16] + 1) >> 2) - ((in_low[8 * 16] + 1) >> 2); step1[1] = ((in_low[6 * 16] + 1) >> 2) - ((in_low[9 * 16] + 1) >> 2); step1[2] = ((in_low[5 * 16] + 1) >> 2) - ((in_low[10 * 16] + 1) >> 2); step1[3] = ((in_low[4 * 16] + 1) >> 2) - ((in_low[11 * 16] + 1) >> 2); step1[4] = ((in_low[3 * 16] + 1) >> 2) - ((in_low[12 * 16] + 1) >> 2); step1[5] = ((in_low[2 * 16] + 1) >> 2) - ((in_low[13 * 16] + 1) >> 2); step1[6] = ((in_low[1 * 16] + 1) >> 2) - ((in_low[14 * 16] + 1) >> 2); step1[7] = ((in_low[0 * 16] + 1) >> 2) - ((in_low[15 * 16] + 1) >> 2); in_low++; } // Work on the first eight values; fdct8(input, even_results); { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; // canbe16 tran_high_t t0, t1, t2, t3; // needs32 tran_high_t x0, x1, x2, x3; // canbe16 // stage 1 s0 = in_high[0] + in_high[7]; s1 = in_high[1] + in_high[6]; s2 = in_high[2] + in_high[5]; s3 = in_high[3] + in_high[4]; s4 = in_high[3] - in_high[4]; s5 = in_high[2] - in_high[5]; s6 = in_high[1] - in_high[6]; s7 = in_high[0] - in_high[7]; // fdct4(step, step); x0 = s0 + s3; x1 = s1 + s2; x2 = s1 - s2; x3 = s0 - s3; t0 = (x0 + x1) * cospi_16_64; t1 = (x0 - x1) * cospi_16_64; t2 = x3 * cospi_8_64 + x2 * cospi_24_64; t3 = x3 * cospi_24_64 - x2 * cospi_8_64; out[0] = (tran_low_t)fdct_round_shift(t0); out[4] = (tran_low_t)fdct_round_shift(t2); out[8] = (tran_low_t)fdct_round_shift(t1); out[12] = (tran_low_t)fdct_round_shift(t3); // Stage 2 t0 = (s6 - s5) * cospi_16_64; t1 = (s6 + s5) * cospi_16_64; t2 = fdct_round_shift(t0); t3 = fdct_round_shift(t1); // Stage 3 x0 = s4 + t2; x1 = s4 - t2; x2 = s7 - t3; x3 = s7 + t3; // Stage 4 t0 = x0 * cospi_28_64 + x3 * cospi_4_64; t1 = x1 * cospi_12_64 + x2 * cospi_20_64; t2 = x2 * cospi_12_64 + x1 * -cospi_20_64; t3 = x3 * cospi_28_64 + x0 * -cospi_4_64; out[2] = (tran_low_t)fdct_round_shift(t0); out[6] = (tran_low_t)fdct_round_shift(t2); out[10] = (tran_low_t)fdct_round_shift(t1); out[14] = (tran_low_t)fdct_round_shift(t3); } // Work on the next eight values; step1 -> odd_results { // step 2 temp1 = (step1[5] - step1[2]) * cospi_16_64; temp2 = (step1[4] - step1[3]) * cospi_16_64; step2[2] = fdct_round_shift(temp1); step2[3] = fdct_round_shift(temp2); temp1 = (step1[4] + step1[3]) * cospi_16_64; temp2 = (step1[5] + step1[2]) * cospi_16_64; step2[4] = fdct_round_shift(temp1); step2[5] = fdct_round_shift(temp2); // step 3 step3[0] = step1[0] + step2[3]; step3[1] = step1[1] + step2[2]; step3[2] = step1[1] - step2[2]; step3[3] = step1[0] - step2[3]; step3[4] = step1[7] - step2[4]; step3[5] = step1[6] - step2[5]; step3[6] = step1[6] + step2[5]; step3[7] = step1[7] + step2[4]; // step 4 temp1 = step3[1] * -cospi_8_64 + step3[6] * cospi_24_64; temp2 = step3[2] * cospi_24_64 + step3[5] * cospi_8_64; step2[1] = fdct_round_shift(temp1); step2[2] = fdct_round_shift(temp2); temp1 = step3[2] * cospi_8_64 - step3[5] * cospi_24_64; temp2 = step3[1] * cospi_24_64 + step3[6] * cospi_8_64; step2[5] = fdct_round_shift(temp1); step2[6] = fdct_round_shift(temp2); // step 5 step1[0] = step3[0] + step2[1]; step1[1] = step3[0] - step2[1]; step1[2] = step3[3] + step2[2]; step1[3] = step3[3] - step2[2]; step1[4] = step3[4] - step2[5]; step1[5] = step3[4] + step2[5]; step1[6] = step3[7] - step2[6]; step1[7] = step3[7] + step2[6]; // step 6 temp1 = step1[0] * cospi_30_64 + step1[7] * cospi_2_64; temp2 = step1[1] * cospi_14_64 + step1[6] * cospi_18_64; out[1] = (tran_low_t)fdct_round_shift(temp1); out[9] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[2] * cospi_22_64 + step1[5] * cospi_10_64; temp2 = step1[3] * cospi_6_64 + step1[4] * cospi_26_64; out[5] = (tran_low_t)fdct_round_shift(temp1); out[13] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[3] * -cospi_26_64 + step1[4] * cospi_6_64; temp2 = step1[2] * -cospi_10_64 + step1[5] * cospi_22_64; out[3] = (tran_low_t)fdct_round_shift(temp1); out[11] = (tran_low_t)fdct_round_shift(temp2); temp1 = step1[1] * -cospi_18_64 + step1[6] * cospi_14_64; temp2 = step1[0] * -cospi_2_64 + step1[7] * cospi_30_64; out[7] = (tran_low_t)fdct_round_shift(temp1); out[15] = (tran_low_t)fdct_round_shift(temp2); } // Do next column (which is a transposed row in second/horizontal pass) input++; out += 16; } // Setup in/out for next pass. in_low = intermediate; out = output; } } void vpx_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; int sum = 0; for (r = 0; r < 16; ++r) for (c = 0; c < 16; ++c) sum += input[r * stride + c]; output[0] = (tran_low_t)(sum >> 1); } static INLINE tran_high_t dct_32_round(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); // TODO(debargha, peter.derivaz): Find new bounds for this assert, // and make the bounds consts. // assert(-131072 <= rv && rv <= 131071); return rv; } static INLINE tran_high_t half_round_shift(tran_high_t input) { tran_high_t rv = (input + 1 + (input < 0)) >> 2; return rv; } void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round) { tran_high_t step[32]; // Stage 1 step[0] = input[0] + input[(32 - 1)]; step[1] = input[1] + input[(32 - 2)]; step[2] = input[2] + input[(32 - 3)]; step[3] = input[3] + input[(32 - 4)]; step[4] = input[4] + input[(32 - 5)]; step[5] = input[5] + input[(32 - 6)]; step[6] = input[6] + input[(32 - 7)]; step[7] = input[7] + input[(32 - 8)]; step[8] = input[8] + input[(32 - 9)]; step[9] = input[9] + input[(32 - 10)]; step[10] = input[10] + input[(32 - 11)]; step[11] = input[11] + input[(32 - 12)]; step[12] = input[12] + input[(32 - 13)]; step[13] = input[13] + input[(32 - 14)]; step[14] = input[14] + input[(32 - 15)]; step[15] = input[15] + input[(32 - 16)]; step[16] = -input[16] + input[(32 - 17)]; step[17] = -input[17] + input[(32 - 18)]; step[18] = -input[18] + input[(32 - 19)]; step[19] = -input[19] + input[(32 - 20)]; step[20] = -input[20] + input[(32 - 21)]; step[21] = -input[21] + input[(32 - 22)]; step[22] = -input[22] + input[(32 - 23)]; step[23] = -input[23] + input[(32 - 24)]; step[24] = -input[24] + input[(32 - 25)]; step[25] = -input[25] + input[(32 - 26)]; step[26] = -input[26] + input[(32 - 27)]; step[27] = -input[27] + input[(32 - 28)]; step[28] = -input[28] + input[(32 - 29)]; step[29] = -input[29] + input[(32 - 30)]; step[30] = -input[30] + input[(32 - 31)]; step[31] = -input[31] + input[(32 - 32)]; // Stage 2 output[0] = step[0] + step[16 - 1]; output[1] = step[1] + step[16 - 2]; output[2] = step[2] + step[16 - 3]; output[3] = step[3] + step[16 - 4]; output[4] = step[4] + step[16 - 5]; output[5] = step[5] + step[16 - 6]; output[6] = step[6] + step[16 - 7]; output[7] = step[7] + step[16 - 8]; output[8] = -step[8] + step[16 - 9]; output[9] = -step[9] + step[16 - 10]; output[10] = -step[10] + step[16 - 11]; output[11] = -step[11] + step[16 - 12]; output[12] = -step[12] + step[16 - 13]; output[13] = -step[13] + step[16 - 14]; output[14] = -step[14] + step[16 - 15]; output[15] = -step[15] + step[16 - 16]; output[16] = step[16]; output[17] = step[17]; output[18] = step[18]; output[19] = step[19]; output[20] = dct_32_round((-step[20] + step[27]) * cospi_16_64); output[21] = dct_32_round((-step[21] + step[26]) * cospi_16_64); output[22] = dct_32_round((-step[22] + step[25]) * cospi_16_64); output[23] = dct_32_round((-step[23] + step[24]) * cospi_16_64); output[24] = dct_32_round((step[24] + step[23]) * cospi_16_64); output[25] = dct_32_round((step[25] + step[22]) * cospi_16_64); output[26] = dct_32_round((step[26] + step[21]) * cospi_16_64); output[27] = dct_32_round((step[27] + step[20]) * cospi_16_64); output[28] = step[28]; output[29] = step[29]; output[30] = step[30]; output[31] = step[31]; // dump the magnitude by 4, hence the intermediate values are within // the range of 16 bits. if (round) { output[0] = half_round_shift(output[0]); output[1] = half_round_shift(output[1]); output[2] = half_round_shift(output[2]); output[3] = half_round_shift(output[3]); output[4] = half_round_shift(output[4]); output[5] = half_round_shift(output[5]); output[6] = half_round_shift(output[6]); output[7] = half_round_shift(output[7]); output[8] = half_round_shift(output[8]); output[9] = half_round_shift(output[9]); output[10] = half_round_shift(output[10]); output[11] = half_round_shift(output[11]); output[12] = half_round_shift(output[12]); output[13] = half_round_shift(output[13]); output[14] = half_round_shift(output[14]); output[15] = half_round_shift(output[15]); output[16] = half_round_shift(output[16]); output[17] = half_round_shift(output[17]); output[18] = half_round_shift(output[18]); output[19] = half_round_shift(output[19]); output[20] = half_round_shift(output[20]); output[21] = half_round_shift(output[21]); output[22] = half_round_shift(output[22]); output[23] = half_round_shift(output[23]); output[24] = half_round_shift(output[24]); output[25] = half_round_shift(output[25]); output[26] = half_round_shift(output[26]); output[27] = half_round_shift(output[27]); output[28] = half_round_shift(output[28]); output[29] = half_round_shift(output[29]); output[30] = half_round_shift(output[30]); output[31] = half_round_shift(output[31]); } // Stage 3 step[0] = output[0] + output[(8 - 1)]; step[1] = output[1] + output[(8 - 2)]; step[2] = output[2] + output[(8 - 3)]; step[3] = output[3] + output[(8 - 4)]; step[4] = -output[4] + output[(8 - 5)]; step[5] = -output[5] + output[(8 - 6)]; step[6] = -output[6] + output[(8 - 7)]; step[7] = -output[7] + output[(8 - 8)]; step[8] = output[8]; step[9] = output[9]; step[10] = dct_32_round((-output[10] + output[13]) * cospi_16_64); step[11] = dct_32_round((-output[11] + output[12]) * cospi_16_64); step[12] = dct_32_round((output[12] + output[11]) * cospi_16_64); step[13] = dct_32_round((output[13] + output[10]) * cospi_16_64); step[14] = output[14]; step[15] = output[15]; step[16] = output[16] + output[23]; step[17] = output[17] + output[22]; step[18] = output[18] + output[21]; step[19] = output[19] + output[20]; step[20] = -output[20] + output[19]; step[21] = -output[21] + output[18]; step[22] = -output[22] + output[17]; step[23] = -output[23] + output[16]; step[24] = -output[24] + output[31]; step[25] = -output[25] + output[30]; step[26] = -output[26] + output[29]; step[27] = -output[27] + output[28]; step[28] = output[28] + output[27]; step[29] = output[29] + output[26]; step[30] = output[30] + output[25]; step[31] = output[31] + output[24]; // Stage 4 output[0] = step[0] + step[3]; output[1] = step[1] + step[2]; output[2] = -step[2] + step[1]; output[3] = -step[3] + step[0]; output[4] = step[4]; output[5] = dct_32_round((-step[5] + step[6]) * cospi_16_64); output[6] = dct_32_round((step[6] + step[5]) * cospi_16_64); output[7] = step[7]; output[8] = step[8] + step[11]; output[9] = step[9] + step[10]; output[10] = -step[10] + step[9]; output[11] = -step[11] + step[8]; output[12] = -step[12] + step[15]; output[13] = -step[13] + step[14]; output[14] = step[14] + step[13]; output[15] = step[15] + step[12]; output[16] = step[16]; output[17] = step[17]; output[18] = dct_32_round(step[18] * -cospi_8_64 + step[29] * cospi_24_64); output[19] = dct_32_round(step[19] * -cospi_8_64 + step[28] * cospi_24_64); output[20] = dct_32_round(step[20] * -cospi_24_64 + step[27] * -cospi_8_64); output[21] = dct_32_round(step[21] * -cospi_24_64 + step[26] * -cospi_8_64); output[22] = step[22]; output[23] = step[23]; output[24] = step[24]; output[25] = step[25]; output[26] = dct_32_round(step[26] * cospi_24_64 + step[21] * -cospi_8_64); output[27] = dct_32_round(step[27] * cospi_24_64 + step[20] * -cospi_8_64); output[28] = dct_32_round(step[28] * cospi_8_64 + step[19] * cospi_24_64); output[29] = dct_32_round(step[29] * cospi_8_64 + step[18] * cospi_24_64); output[30] = step[30]; output[31] = step[31]; // Stage 5 step[0] = dct_32_round((output[0] + output[1]) * cospi_16_64); step[1] = dct_32_round((-output[1] + output[0]) * cospi_16_64); step[2] = dct_32_round(output[2] * cospi_24_64 + output[3] * cospi_8_64); step[3] = dct_32_round(output[3] * cospi_24_64 - output[2] * cospi_8_64); step[4] = output[4] + output[5]; step[5] = -output[5] + output[4]; step[6] = -output[6] + output[7]; step[7] = output[7] + output[6]; step[8] = output[8]; step[9] = dct_32_round(output[9] * -cospi_8_64 + output[14] * cospi_24_64); step[10] = dct_32_round(output[10] * -cospi_24_64 + output[13] * -cospi_8_64); step[11] = output[11]; step[12] = output[12]; step[13] = dct_32_round(output[13] * cospi_24_64 + output[10] * -cospi_8_64); step[14] = dct_32_round(output[14] * cospi_8_64 + output[9] * cospi_24_64); step[15] = output[15]; step[16] = output[16] + output[19]; step[17] = output[17] + output[18]; step[18] = -output[18] + output[17]; step[19] = -output[19] + output[16]; step[20] = -output[20] + output[23]; step[21] = -output[21] + output[22]; step[22] = output[22] + output[21]; step[23] = output[23] + output[20]; step[24] = output[24] + output[27]; step[25] = output[25] + output[26]; step[26] = -output[26] + output[25]; step[27] = -output[27] + output[24]; step[28] = -output[28] + output[31]; step[29] = -output[29] + output[30]; step[30] = output[30] + output[29]; step[31] = output[31] + output[28]; // Stage 6 output[0] = step[0]; output[1] = step[1]; output[2] = step[2]; output[3] = step[3]; output[4] = dct_32_round(step[4] * cospi_28_64 + step[7] * cospi_4_64); output[5] = dct_32_round(step[5] * cospi_12_64 + step[6] * cospi_20_64); output[6] = dct_32_round(step[6] * cospi_12_64 + step[5] * -cospi_20_64); output[7] = dct_32_round(step[7] * cospi_28_64 + step[4] * -cospi_4_64); output[8] = step[8] + step[9]; output[9] = -step[9] + step[8]; output[10] = -step[10] + step[11]; output[11] = step[11] + step[10]; output[12] = step[12] + step[13]; output[13] = -step[13] + step[12]; output[14] = -step[14] + step[15]; output[15] = step[15] + step[14]; output[16] = step[16]; output[17] = dct_32_round(step[17] * -cospi_4_64 + step[30] * cospi_28_64); output[18] = dct_32_round(step[18] * -cospi_28_64 + step[29] * -cospi_4_64); output[19] = step[19]; output[20] = step[20]; output[21] = dct_32_round(step[21] * -cospi_20_64 + step[26] * cospi_12_64); output[22] = dct_32_round(step[22] * -cospi_12_64 + step[25] * -cospi_20_64); output[23] = step[23]; output[24] = step[24]; output[25] = dct_32_round(step[25] * cospi_12_64 + step[22] * -cospi_20_64); output[26] = dct_32_round(step[26] * cospi_20_64 + step[21] * cospi_12_64); output[27] = step[27]; output[28] = step[28]; output[29] = dct_32_round(step[29] * cospi_28_64 + step[18] * -cospi_4_64); output[30] = dct_32_round(step[30] * cospi_4_64 + step[17] * cospi_28_64); output[31] = step[31]; // Stage 7 step[0] = output[0]; step[1] = output[1]; step[2] = output[2]; step[3] = output[3]; step[4] = output[4]; step[5] = output[5]; step[6] = output[6]; step[7] = output[7]; step[8] = dct_32_round(output[8] * cospi_30_64 + output[15] * cospi_2_64); step[9] = dct_32_round(output[9] * cospi_14_64 + output[14] * cospi_18_64); step[10] = dct_32_round(output[10] * cospi_22_64 + output[13] * cospi_10_64); step[11] = dct_32_round(output[11] * cospi_6_64 + output[12] * cospi_26_64); step[12] = dct_32_round(output[12] * cospi_6_64 + output[11] * -cospi_26_64); step[13] = dct_32_round(output[13] * cospi_22_64 + output[10] * -cospi_10_64); step[14] = dct_32_round(output[14] * cospi_14_64 + output[9] * -cospi_18_64); step[15] = dct_32_round(output[15] * cospi_30_64 + output[8] * -cospi_2_64); step[16] = output[16] + output[17]; step[17] = -output[17] + output[16]; step[18] = -output[18] + output[19]; step[19] = output[19] + output[18]; step[20] = output[20] + output[21]; step[21] = -output[21] + output[20]; step[22] = -output[22] + output[23]; step[23] = output[23] + output[22]; step[24] = output[24] + output[25]; step[25] = -output[25] + output[24]; step[26] = -output[26] + output[27]; step[27] = output[27] + output[26]; step[28] = output[28] + output[29]; step[29] = -output[29] + output[28]; step[30] = -output[30] + output[31]; step[31] = output[31] + output[30]; // Final stage --- outputs indices are bit-reversed. output[0] = step[0]; output[16] = step[1]; output[8] = step[2]; output[24] = step[3]; output[4] = step[4]; output[20] = step[5]; output[12] = step[6]; output[28] = step[7]; output[2] = step[8]; output[18] = step[9]; output[10] = step[10]; output[26] = step[11]; output[6] = step[12]; output[22] = step[13]; output[14] = step[14]; output[30] = step[15]; output[1] = dct_32_round(step[16] * cospi_31_64 + step[31] * cospi_1_64); output[17] = dct_32_round(step[17] * cospi_15_64 + step[30] * cospi_17_64); output[9] = dct_32_round(step[18] * cospi_23_64 + step[29] * cospi_9_64); output[25] = dct_32_round(step[19] * cospi_7_64 + step[28] * cospi_25_64); output[5] = dct_32_round(step[20] * cospi_27_64 + step[27] * cospi_5_64); output[21] = dct_32_round(step[21] * cospi_11_64 + step[26] * cospi_21_64); output[13] = dct_32_round(step[22] * cospi_19_64 + step[25] * cospi_13_64); output[29] = dct_32_round(step[23] * cospi_3_64 + step[24] * cospi_29_64); output[3] = dct_32_round(step[24] * cospi_3_64 + step[23] * -cospi_29_64); output[19] = dct_32_round(step[25] * cospi_19_64 + step[22] * -cospi_13_64); output[11] = dct_32_round(step[26] * cospi_11_64 + step[21] * -cospi_21_64); output[27] = dct_32_round(step[27] * cospi_27_64 + step[20] * -cospi_5_64); output[7] = dct_32_round(step[28] * cospi_7_64 + step[19] * -cospi_25_64); output[23] = dct_32_round(step[29] * cospi_23_64 + step[18] * -cospi_9_64); output[15] = dct_32_round(step[30] * cospi_15_64 + step[17] * -cospi_17_64); output[31] = dct_32_round(step[31] * cospi_31_64 + step[16] * -cospi_1_64); } void vpx_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; tran_high_t out[32 * 32]; // Columns for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32]; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) output[j + i * 32] = (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); } } // Note that although we use dct_32_round in dct32 computation flow, // this 2d fdct32x32 for rate-distortion optimization loop is operating // within 16 bits precision. void vpx_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride) { int i, j; tran_high_t out[32 * 32]; // Columns for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = input[j * stride + i] * 4; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) // TODO(cd): see quality impact of only doing // output[j * 32 + i] = (temp_out[j] + 1) >> 2; // PS: also change code in vpx_dsp/x86/vpx_dct_sse2.c out[j * 32 + i] = (temp_out[j] + 1 + (temp_out[j] > 0)) >> 2; } // Rows for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = out[j + i * 32]; vpx_fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) output[j + i * 32] = (tran_low_t)temp_out[j]; } } void vpx_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { int r, c; int sum = 0; for (r = 0; r < 32; ++r) for (c = 0; c < 32; ++c) sum += input[r * stride + c]; output[0] = (tran_low_t)(sum >> 3); } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_fdct4x4_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct4x4_c(input, output, stride); } void vpx_highbd_fdct8x8_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct8x8_c(input, output, stride); } void vpx_highbd_fdct8x8_1_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct8x8_1_c(input, output, stride); } void vpx_highbd_fdct16x16_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct16x16_c(input, output, stride); } void vpx_highbd_fdct16x16_1_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct16x16_1_c(input, output, stride); } void vpx_highbd_fdct32x32_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct32x32_c(input, output, stride); } void vpx_highbd_fdct32x32_rd_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct32x32_rd_c(input, output, stride); } void vpx_highbd_fdct32x32_1_c(const int16_t *input, tran_low_t *output, int stride) { vpx_fdct32x32_1_c(input, output, stride); } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/fwd_txfm.h000066400000000000000000000016211357355204000165040ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_FWD_TXFM_H_ #define VPX_VPX_DSP_FWD_TXFM_H_ #include "vpx_dsp/txfm_common.h" static INLINE tran_high_t fdct_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); // TODO(debargha, peter.derivaz): Find new bounds for this assert // and make the bounds consts. // assert(INT16_MIN <= rv && rv <= INT16_MAX); return rv; } void vpx_fdct32(const tran_high_t *input, tran_high_t *output, int round); #endif // VPX_VPX_DSP_FWD_TXFM_H_ libvpx-1.8.2/vpx_dsp/intrapred.c000066400000000000000000000677771357355204000167010ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #define DST(x, y) dst[(x) + (y)*stride] #define AVG3(a, b, c) (((a) + 2 * (b) + (c) + 2) >> 2) #define AVG2(a, b) (((a) + (b) + 1) >> 1) static INLINE void d207_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; (void)above; // first column for (r = 0; r < bs - 1; ++r) dst[r * stride] = AVG2(left[r], left[r + 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; // second column for (r = 0; r < bs - 2; ++r) dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; // rest of last row for (c = 0; c < bs - 2; ++c) dst[(bs - 1) * stride + c] = left[bs - 1]; for (r = bs - 2; r >= 0; --r) for (c = 0; c < bs - 2; ++c) dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; } static INLINE void d63_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; int size; (void)left; for (c = 0; c < bs; ++c) { dst[c] = AVG2(above[c], above[c + 1]); dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); } for (r = 2, size = bs - 2; r < bs; r += 2, --size) { memcpy(dst + (r + 0) * stride, dst + (r >> 1), size); memset(dst + (r + 0) * stride + size, above[bs - 1], bs - size); memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size); memset(dst + (r + 1) * stride + size, above[bs - 1], bs - size); } } static INLINE void d45_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { const uint8_t above_right = above[bs - 1]; const uint8_t *const dst_row0 = dst; int x, size; (void)left; for (x = 0; x < bs - 1; ++x) { dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); } dst[bs - 1] = above_right; dst += stride; for (x = 1, size = bs - 2; x < bs; ++x, --size) { memcpy(dst, dst_row0 + x, size); memset(dst + size, above_right, x + 1); dst += stride; } } static INLINE void d117_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; // first row for (c = 0; c < bs; c++) dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { for (c = 1; c < bs; c++) dst[c] = dst[-2 * stride + c - 1]; dst += stride; } } static INLINE void d135_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i; #if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 // silence a spurious -Warray-bounds warning, possibly related to: // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 uint8_t border[69]; #else uint8_t border[32 + 32 - 1]; // outer border from bottom-left to top-right #endif // dst(bs, bs - 2)[0], i.e., border starting at bottom-left for (i = 0; i < bs - 2; ++i) { border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); } border[bs - 2] = AVG3(above[-1], left[0], left[1]); border[bs - 1] = AVG3(left[0], above[-1], above[0]); border[bs - 0] = AVG3(above[-1], above[0], above[1]); // dst[0][2, size), i.e., remaining top border ascending for (i = 0; i < bs - 2; ++i) { border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); } for (i = 0; i < bs; ++i) { memcpy(dst + i * stride, border + bs - 1 - i, bs); } } static INLINE void d153_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; dst[0] = AVG3(left[0], above[-1], above[0]); dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { for (c = 0; c < bs - 2; c++) dst[c] = dst[-stride + c - 2]; dst += stride; } } static INLINE void v_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; (void)left; for (r = 0; r < bs; r++) { memcpy(dst, above, bs); dst += stride; } } static INLINE void h_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; (void)above; for (r = 0; r < bs; r++) { memset(dst, left[r], bs); dst += stride; } } static INLINE void tm_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r, c; int ytop_left = above[-1]; for (r = 0; r < bs; r++) { for (c = 0; c < bs; c++) dst[c] = clip_pixel(left[r] + above[c] - ytop_left); dst += stride; } } static INLINE void dc_128_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int r; (void)above; (void)left; for (r = 0; r < bs; r++) { memset(dst, 128, bs); dst += stride; } } static INLINE void dc_left_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i, r, expected_dc, sum = 0; (void)above; for (i = 0; i < bs; i++) sum += left[i]; expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { memset(dst, expected_dc, bs); dst += stride; } } static INLINE void dc_top_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i, r, expected_dc, sum = 0; (void)left; for (i = 0; i < bs; i++) sum += above[i]; expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { memset(dst, expected_dc, bs); dst += stride; } } static INLINE void dc_predictor(uint8_t *dst, ptrdiff_t stride, int bs, const uint8_t *above, const uint8_t *left) { int i, r, expected_dc, sum = 0; const int count = 2 * bs; for (i = 0; i < bs; i++) { sum += above[i]; sum += left[i]; } expected_dc = (sum + (count >> 1)) / count; for (r = 0; r < bs; r++) { memset(dst, expected_dc, bs); dst += stride; } } void vpx_he_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int H = above[-1]; const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; memset(dst + stride * 0, AVG3(H, I, J), 4); memset(dst + stride * 1, AVG3(I, J, K), 4); memset(dst + stride * 2, AVG3(J, K, L), 4); memset(dst + stride * 3, AVG3(K, L, L), 4); } void vpx_ve_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int H = above[-1]; const int I = above[0]; const int J = above[1]; const int K = above[2]; const int L = above[3]; const int M = above[4]; (void)left; dst[0] = AVG3(H, I, J); dst[1] = AVG3(I, J, K); dst[2] = AVG3(J, K, L); dst[3] = AVG3(K, L, M); memcpy(dst + stride * 1, dst, 4); memcpy(dst + stride * 2, dst, 4); memcpy(dst + stride * 3, dst, 4); } void vpx_d207_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; (void)above; DST(0, 0) = AVG2(I, J); DST(2, 0) = DST(0, 1) = AVG2(J, K); DST(2, 1) = DST(0, 2) = AVG2(K, L); DST(1, 0) = AVG3(I, J, K); DST(3, 0) = DST(1, 1) = AVG3(J, K, L); DST(3, 1) = DST(1, 2) = AVG3(K, L, L); DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; } void vpx_d63_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; (void)left; DST(0, 0) = AVG2(A, B); DST(1, 0) = DST(0, 2) = AVG2(B, C); DST(2, 0) = DST(1, 2) = AVG2(C, D); DST(3, 0) = DST(2, 2) = AVG2(D, E); DST(3, 2) = AVG2(E, F); // differs from vp8 DST(0, 1) = AVG3(A, B, C); DST(1, 1) = DST(0, 3) = AVG3(B, C, D); DST(2, 1) = DST(1, 3) = AVG3(C, D, E); DST(3, 1) = DST(2, 3) = AVG3(D, E, F); DST(3, 3) = AVG3(E, F, G); // differs from vp8 } void vpx_d63e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; const int H = above[7]; (void)left; DST(0, 0) = AVG2(A, B); DST(1, 0) = DST(0, 2) = AVG2(B, C); DST(2, 0) = DST(1, 2) = AVG2(C, D); DST(3, 0) = DST(2, 2) = AVG2(D, E); DST(3, 2) = AVG3(E, F, G); DST(0, 1) = AVG3(A, B, C); DST(1, 1) = DST(0, 3) = AVG3(B, C, D); DST(2, 1) = DST(1, 3) = AVG3(C, D, E); DST(3, 1) = DST(2, 3) = AVG3(D, E, F); DST(3, 3) = AVG3(F, G, H); } void vpx_d45_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; const int H = above[7]; (void)stride; (void)left; DST(0, 0) = AVG3(A, B, C); DST(1, 0) = DST(0, 1) = AVG3(B, C, D); DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); DST(3, 2) = DST(2, 3) = AVG3(F, G, H); DST(3, 3) = H; // differs from vp8 } void vpx_d45e_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; const int H = above[7]; (void)stride; (void)left; DST(0, 0) = AVG3(A, B, C); DST(1, 0) = DST(0, 1) = AVG3(B, C, D); DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); DST(3, 2) = DST(2, 3) = AVG3(F, G, H); DST(3, 3) = AVG3(G, H, H); } void vpx_d117_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; DST(0, 0) = DST(1, 2) = AVG2(X, A); DST(1, 0) = DST(2, 2) = AVG2(A, B); DST(2, 0) = DST(3, 2) = AVG2(B, C); DST(3, 0) = AVG2(C, D); DST(0, 3) = AVG3(K, J, I); DST(0, 2) = AVG3(J, I, X); DST(0, 1) = DST(1, 3) = AVG3(I, X, A); DST(1, 1) = DST(2, 3) = AVG3(X, A, B); DST(2, 1) = DST(3, 3) = AVG3(A, B, C); DST(3, 1) = AVG3(B, C, D); } void vpx_d135_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; (void)stride; DST(0, 3) = AVG3(J, K, L); DST(1, 3) = DST(0, 2) = AVG3(I, J, K); DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); DST(3, 1) = DST(2, 0) = AVG3(C, B, A); DST(3, 0) = AVG3(D, C, B); } void vpx_d153_predictor_4x4_c(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; DST(0, 0) = DST(2, 1) = AVG2(I, X); DST(0, 1) = DST(2, 2) = AVG2(J, I); DST(0, 2) = DST(2, 3) = AVG2(K, J); DST(0, 3) = AVG2(L, K); DST(3, 0) = AVG3(A, B, C); DST(2, 0) = AVG3(X, A, B); DST(1, 0) = DST(3, 1) = AVG3(I, X, A); DST(1, 1) = DST(3, 2) = AVG3(J, I, X); DST(1, 2) = DST(3, 3) = AVG3(K, J, I); DST(1, 3) = AVG3(L, K, J); } #if CONFIG_VP9_HIGHBITDEPTH static INLINE void highbd_d207_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r, c; (void)above; (void)bd; // First column. for (r = 0; r < bs - 1; ++r) { dst[r * stride] = AVG2(left[r], left[r + 1]); } dst[(bs - 1) * stride] = left[bs - 1]; dst++; // Second column. for (r = 0; r < bs - 2; ++r) { dst[r * stride] = AVG3(left[r], left[r + 1], left[r + 2]); } dst[(bs - 2) * stride] = AVG3(left[bs - 2], left[bs - 1], left[bs - 1]); dst[(bs - 1) * stride] = left[bs - 1]; dst++; // Rest of last row. for (c = 0; c < bs - 2; ++c) dst[(bs - 1) * stride + c] = left[bs - 1]; for (r = bs - 2; r >= 0; --r) { for (c = 0; c < bs - 2; ++c) dst[r * stride + c] = dst[(r + 1) * stride + c - 2]; } } static INLINE void highbd_d63_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r, c; int size; (void)left; (void)bd; for (c = 0; c < bs; ++c) { dst[c] = AVG2(above[c], above[c + 1]); dst[stride + c] = AVG3(above[c], above[c + 1], above[c + 2]); } for (r = 2, size = bs - 2; r < bs; r += 2, --size) { memcpy(dst + (r + 0) * stride, dst + (r >> 1), size * sizeof(*dst)); vpx_memset16(dst + (r + 0) * stride + size, above[bs - 1], bs - size); memcpy(dst + (r + 1) * stride, dst + stride + (r >> 1), size * sizeof(*dst)); vpx_memset16(dst + (r + 1) * stride + size, above[bs - 1], bs - size); } } static INLINE void highbd_d45_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { const uint16_t above_right = above[bs - 1]; const uint16_t *const dst_row0 = dst; int x, size; (void)left; (void)bd; for (x = 0; x < bs - 1; ++x) { dst[x] = AVG3(above[x], above[x + 1], above[x + 2]); } dst[bs - 1] = above_right; dst += stride; for (x = 1, size = bs - 2; x < bs; ++x, --size) { memcpy(dst, dst_row0 + x, size * sizeof(*dst)); vpx_memset16(dst + size, above_right, x + 1); dst += stride; } } static INLINE void highbd_d117_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r, c; (void)bd; // first row for (c = 0; c < bs; c++) dst[c] = AVG2(above[c - 1], above[c]); dst += stride; // second row dst[0] = AVG3(left[0], above[-1], above[0]); for (c = 1; c < bs; c++) dst[c] = AVG3(above[c - 2], above[c - 1], above[c]); dst += stride; // the rest of first col dst[0] = AVG3(above[-1], left[0], left[1]); for (r = 3; r < bs; ++r) dst[(r - 2) * stride] = AVG3(left[r - 3], left[r - 2], left[r - 1]); // the rest of the block for (r = 2; r < bs; ++r) { for (c = 1; c < bs; c++) dst[c] = dst[-2 * stride + c - 1]; dst += stride; } } static INLINE void highbd_d135_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int i; #if defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ > 7 // silence a spurious -Warray-bounds warning, possibly related to: // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=56273 uint16_t border[69]; #else uint16_t border[32 + 32 - 1]; // outer border from bottom-left to top-right #endif (void)bd; // dst(bs, bs - 2)[0], i.e., border starting at bottom-left for (i = 0; i < bs - 2; ++i) { border[i] = AVG3(left[bs - 3 - i], left[bs - 2 - i], left[bs - 1 - i]); } border[bs - 2] = AVG3(above[-1], left[0], left[1]); border[bs - 1] = AVG3(left[0], above[-1], above[0]); border[bs - 0] = AVG3(above[-1], above[0], above[1]); // dst[0][2, size), i.e., remaining top border ascending for (i = 0; i < bs - 2; ++i) { border[bs + 1 + i] = AVG3(above[i], above[i + 1], above[i + 2]); } for (i = 0; i < bs; ++i) { memcpy(dst + i * stride, border + bs - 1 - i, bs * sizeof(dst[0])); } } static INLINE void highbd_d153_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r, c; (void)bd; dst[0] = AVG2(above[-1], left[0]); for (r = 1; r < bs; r++) dst[r * stride] = AVG2(left[r - 1], left[r]); dst++; dst[0] = AVG3(left[0], above[-1], above[0]); dst[stride] = AVG3(above[-1], left[0], left[1]); for (r = 2; r < bs; r++) dst[r * stride] = AVG3(left[r - 2], left[r - 1], left[r]); dst++; for (c = 0; c < bs - 2; c++) dst[c] = AVG3(above[c - 1], above[c], above[c + 1]); dst += stride; for (r = 1; r < bs; ++r) { for (c = 0; c < bs - 2; c++) dst[c] = dst[-stride + c - 2]; dst += stride; } } static INLINE void highbd_v_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r; (void)left; (void)bd; for (r = 0; r < bs; r++) { memcpy(dst, above, bs * sizeof(uint16_t)); dst += stride; } } static INLINE void highbd_h_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r; (void)above; (void)bd; for (r = 0; r < bs; r++) { vpx_memset16(dst, left[r], bs); dst += stride; } } static INLINE void highbd_tm_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r, c; int ytop_left = above[-1]; (void)bd; for (r = 0; r < bs; r++) { for (c = 0; c < bs; c++) dst[c] = clip_pixel_highbd(left[r] + above[c] - ytop_left, bd); dst += stride; } } static INLINE void highbd_dc_128_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int r; (void)above; (void)left; for (r = 0; r < bs; r++) { vpx_memset16(dst, 128 << (bd - 8), bs); dst += stride; } } static INLINE void highbd_dc_left_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int i, r, expected_dc, sum = 0; (void)above; (void)bd; for (i = 0; i < bs; i++) sum += left[i]; expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { vpx_memset16(dst, expected_dc, bs); dst += stride; } } static INLINE void highbd_dc_top_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int i, r, expected_dc, sum = 0; (void)left; (void)bd; for (i = 0; i < bs; i++) sum += above[i]; expected_dc = (sum + (bs >> 1)) / bs; for (r = 0; r < bs; r++) { vpx_memset16(dst, expected_dc, bs); dst += stride; } } static INLINE void highbd_dc_predictor(uint16_t *dst, ptrdiff_t stride, int bs, const uint16_t *above, const uint16_t *left, int bd) { int i, r, expected_dc, sum = 0; const int count = 2 * bs; (void)bd; for (i = 0; i < bs; i++) { sum += above[i]; sum += left[i]; } expected_dc = (sum + (count >> 1)) / count; for (r = 0; r < bs; r++) { vpx_memset16(dst, expected_dc, bs); dst += stride; } } void vpx_highbd_d207_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; (void)above; (void)bd; DST(0, 0) = AVG2(I, J); DST(2, 0) = DST(0, 1) = AVG2(J, K); DST(2, 1) = DST(0, 2) = AVG2(K, L); DST(1, 0) = AVG3(I, J, K); DST(3, 0) = DST(1, 1) = AVG3(J, K, L); DST(3, 1) = DST(1, 2) = AVG3(K, L, L); DST(3, 2) = DST(2, 2) = DST(0, 3) = DST(1, 3) = DST(2, 3) = DST(3, 3) = L; } void vpx_highbd_d63_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; (void)left; (void)bd; DST(0, 0) = AVG2(A, B); DST(1, 0) = DST(0, 2) = AVG2(B, C); DST(2, 0) = DST(1, 2) = AVG2(C, D); DST(3, 0) = DST(2, 2) = AVG2(D, E); DST(3, 2) = AVG2(E, F); // differs from vp8 DST(0, 1) = AVG3(A, B, C); DST(1, 1) = DST(0, 3) = AVG3(B, C, D); DST(2, 1) = DST(1, 3) = AVG3(C, D, E); DST(3, 1) = DST(2, 3) = AVG3(D, E, F); DST(3, 3) = AVG3(E, F, G); // differs from vp8 } void vpx_highbd_d45_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; const int E = above[4]; const int F = above[5]; const int G = above[6]; const int H = above[7]; (void)left; (void)bd; DST(0, 0) = AVG3(A, B, C); DST(1, 0) = DST(0, 1) = AVG3(B, C, D); DST(2, 0) = DST(1, 1) = DST(0, 2) = AVG3(C, D, E); DST(3, 0) = DST(2, 1) = DST(1, 2) = DST(0, 3) = AVG3(D, E, F); DST(3, 1) = DST(2, 2) = DST(1, 3) = AVG3(E, F, G); DST(3, 2) = DST(2, 3) = AVG3(F, G, H); DST(3, 3) = H; // differs from vp8 } void vpx_highbd_d117_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; (void)bd; DST(0, 0) = DST(1, 2) = AVG2(X, A); DST(1, 0) = DST(2, 2) = AVG2(A, B); DST(2, 0) = DST(3, 2) = AVG2(B, C); DST(3, 0) = AVG2(C, D); DST(0, 3) = AVG3(K, J, I); DST(0, 2) = AVG3(J, I, X); DST(0, 1) = DST(1, 3) = AVG3(I, X, A); DST(1, 1) = DST(2, 3) = AVG3(X, A, B); DST(2, 1) = DST(3, 3) = AVG3(A, B, C); DST(3, 1) = AVG3(B, C, D); } void vpx_highbd_d135_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; const int D = above[3]; (void)bd; DST(0, 3) = AVG3(J, K, L); DST(1, 3) = DST(0, 2) = AVG3(I, J, K); DST(2, 3) = DST(1, 2) = DST(0, 1) = AVG3(X, I, J); DST(3, 3) = DST(2, 2) = DST(1, 1) = DST(0, 0) = AVG3(A, X, I); DST(3, 2) = DST(2, 1) = DST(1, 0) = AVG3(B, A, X); DST(3, 1) = DST(2, 0) = AVG3(C, B, A); DST(3, 0) = AVG3(D, C, B); } void vpx_highbd_d153_predictor_4x4_c(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const int X = above[-1]; const int A = above[0]; const int B = above[1]; const int C = above[2]; (void)bd; DST(0, 0) = DST(2, 1) = AVG2(I, X); DST(0, 1) = DST(2, 2) = AVG2(J, I); DST(0, 2) = DST(2, 3) = AVG2(K, J); DST(0, 3) = AVG2(L, K); DST(3, 0) = AVG3(A, B, C); DST(2, 0) = AVG3(X, A, B); DST(1, 0) = DST(3, 1) = AVG3(I, X, A); DST(1, 1) = DST(3, 2) = AVG3(J, I, X); DST(1, 2) = DST(3, 3) = AVG3(K, J, I); DST(1, 3) = AVG3(L, K, J); } #endif // CONFIG_VP9_HIGHBITDEPTH // This serves as a wrapper function, so that all the prediction functions // can be unified and accessed as a pointer array. Note that the boundary // above and left are not necessarily used all the time. #define intra_pred_sized(type, size) \ void vpx_##type##_predictor_##size##x##size##_c( \ uint8_t *dst, ptrdiff_t stride, const uint8_t *above, \ const uint8_t *left) { \ type##_predictor(dst, stride, size, above, left); \ } #if CONFIG_VP9_HIGHBITDEPTH #define intra_pred_highbd_sized(type, size) \ void vpx_highbd_##type##_predictor_##size##x##size##_c( \ uint16_t *dst, ptrdiff_t stride, const uint16_t *above, \ const uint16_t *left, int bd) { \ highbd_##type##_predictor(dst, stride, size, above, left, bd); \ } /* clang-format off */ #define intra_pred_allsizes(type) \ intra_pred_sized(type, 4) \ intra_pred_sized(type, 8) \ intra_pred_sized(type, 16) \ intra_pred_sized(type, 32) \ intra_pred_highbd_sized(type, 4) \ intra_pred_highbd_sized(type, 8) \ intra_pred_highbd_sized(type, 16) \ intra_pred_highbd_sized(type, 32) #define intra_pred_no_4x4(type) \ intra_pred_sized(type, 8) \ intra_pred_sized(type, 16) \ intra_pred_sized(type, 32) \ intra_pred_highbd_sized(type, 8) \ intra_pred_highbd_sized(type, 16) \ intra_pred_highbd_sized(type, 32) #else #define intra_pred_allsizes(type) \ intra_pred_sized(type, 4) \ intra_pred_sized(type, 8) \ intra_pred_sized(type, 16) \ intra_pred_sized(type, 32) #define intra_pred_no_4x4(type) \ intra_pred_sized(type, 8) \ intra_pred_sized(type, 16) \ intra_pred_sized(type, 32) #endif // CONFIG_VP9_HIGHBITDEPTH intra_pred_no_4x4(d207) intra_pred_no_4x4(d63) intra_pred_no_4x4(d45) intra_pred_no_4x4(d117) intra_pred_no_4x4(d135) intra_pred_no_4x4(d153) intra_pred_allsizes(v) intra_pred_allsizes(h) intra_pred_allsizes(tm) intra_pred_allsizes(dc_128) intra_pred_allsizes(dc_left) intra_pred_allsizes(dc_top) intra_pred_allsizes(dc) /* clang-format on */ #undef intra_pred_allsizes libvpx-1.8.2/vpx_dsp/inv_txfm.c000066400000000000000000003013751357355204000165240ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/inv_txfm.h" void vpx_iwht4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; tran_low_t output[16]; tran_high_t a1, b1, c1, d1, e1; const tran_low_t *ip = input; tran_low_t *op = output; for (i = 0; i < 4; i++) { a1 = ip[0] >> UNIT_QUANT_SHIFT; c1 = ip[1] >> UNIT_QUANT_SHIFT; d1 = ip[2] >> UNIT_QUANT_SHIFT; b1 = ip[3] >> UNIT_QUANT_SHIFT; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; op[0] = WRAPLOW(a1); op[1] = WRAPLOW(b1); op[2] = WRAPLOW(c1); op[3] = WRAPLOW(d1); ip += 4; op += 4; } ip = output; for (i = 0; i < 4; i++) { a1 = ip[4 * 0]; c1 = ip[4 * 1]; d1 = ip[4 * 2]; b1 = ip[4 * 3]; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; dest[stride * 0] = clip_pixel_add(dest[stride * 0], WRAPLOW(a1)); dest[stride * 1] = clip_pixel_add(dest[stride * 1], WRAPLOW(b1)); dest[stride * 2] = clip_pixel_add(dest[stride * 2], WRAPLOW(c1)); dest[stride * 3] = clip_pixel_add(dest[stride * 3], WRAPLOW(d1)); ip++; dest++; } } void vpx_iwht4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; const tran_low_t *ip = input; tran_low_t *op = tmp; a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; op[0] = WRAPLOW(a1); op[1] = op[2] = op[3] = WRAPLOW(e1); ip = tmp; for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; dest[stride * 0] = clip_pixel_add(dest[stride * 0], a1); dest[stride * 1] = clip_pixel_add(dest[stride * 1], e1); dest[stride * 2] = clip_pixel_add(dest[stride * 2], e1); dest[stride * 3] = clip_pixel_add(dest[stride * 3], e1); ip++; dest++; } } void iadst4_c(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_low_t x0 = input[0]; tran_low_t x1 = input[1]; tran_low_t x2 = input[2]; tran_low_t x3 = input[3]; if (!(x0 | x1 | x2 | x3)) { memset(output, 0, 4 * sizeof(*output)); return; } // 32-bit result is enough for the following multiplications. s0 = sinpi_1_9 * x0; s1 = sinpi_2_9 * x0; s2 = sinpi_3_9 * x1; s3 = sinpi_4_9 * x2; s4 = sinpi_1_9 * x2; s5 = sinpi_2_9 * x3; s6 = sinpi_4_9 * x3; s7 = WRAPLOW(x0 - x2 + x3); s0 = s0 + s3 + s5; s1 = s1 - s4 - s6; s3 = s2; s2 = sinpi_3_9 * s7; // 1-D transform scaling factor is sqrt(2). // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. output[0] = WRAPLOW(dct_const_round_shift(s0 + s3)); output[1] = WRAPLOW(dct_const_round_shift(s1 + s3)); output[2] = WRAPLOW(dct_const_round_shift(s2)); output[3] = WRAPLOW(dct_const_round_shift(s0 + s1 - s3)); } void idct4_c(const tran_low_t *input, tran_low_t *output) { int16_t step[4]; tran_high_t temp1, temp2; // stage 1 temp1 = ((int16_t)input[0] + (int16_t)input[2]) * cospi_16_64; temp2 = ((int16_t)input[0] - (int16_t)input[2]) * cospi_16_64; step[0] = WRAPLOW(dct_const_round_shift(temp1)); step[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[1] * cospi_24_64 - (int16_t)input[3] * cospi_8_64; temp2 = (int16_t)input[1] * cospi_8_64 + (int16_t)input[3] * cospi_24_64; step[2] = WRAPLOW(dct_const_round_shift(temp1)); step[3] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 output[0] = WRAPLOW(step[0] + step[3]); output[1] = WRAPLOW(step[1] + step[2]); output[2] = WRAPLOW(step[1] - step[2]); output[3] = WRAPLOW(step[0] - step[3]); } void vpx_idct4x4_16_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[4 * 4]; tran_low_t *outptr = out; tran_low_t temp_in[4], temp_out[4]; // Rows for (i = 0; i < 4; ++i) { idct4_c(input, outptr); input += 4; outptr += 4; } // Columns for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; idct4_c(temp_in, temp_out); for (j = 0; j < 4; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4)); } } } void vpx_idct4x4_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { dest[0] = clip_pixel_add(dest[0], a1); dest[1] = clip_pixel_add(dest[1], a1); dest[2] = clip_pixel_add(dest[2], a1); dest[3] = clip_pixel_add(dest[3], a1); dest += stride; } } void iadst8_c(const tran_low_t *input, tran_low_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; tran_high_t x0 = input[7]; tran_high_t x1 = input[0]; tran_high_t x2 = input[5]; tran_high_t x3 = input[2]; tran_high_t x4 = input[3]; tran_high_t x5 = input[4]; tran_high_t x6 = input[1]; tran_high_t x7 = input[6]; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { memset(output, 0, 8 * sizeof(*output)); return; } // stage 1 s0 = (int)(cospi_2_64 * x0 + cospi_30_64 * x1); s1 = (int)(cospi_30_64 * x0 - cospi_2_64 * x1); s2 = (int)(cospi_10_64 * x2 + cospi_22_64 * x3); s3 = (int)(cospi_22_64 * x2 - cospi_10_64 * x3); s4 = (int)(cospi_18_64 * x4 + cospi_14_64 * x5); s5 = (int)(cospi_14_64 * x4 - cospi_18_64 * x5); s6 = (int)(cospi_26_64 * x6 + cospi_6_64 * x7); s7 = (int)(cospi_6_64 * x6 - cospi_26_64 * x7); x0 = WRAPLOW(dct_const_round_shift(s0 + s4)); x1 = WRAPLOW(dct_const_round_shift(s1 + s5)); x2 = WRAPLOW(dct_const_round_shift(s2 + s6)); x3 = WRAPLOW(dct_const_round_shift(s3 + s7)); x4 = WRAPLOW(dct_const_round_shift(s0 - s4)); x5 = WRAPLOW(dct_const_round_shift(s1 - s5)); x6 = WRAPLOW(dct_const_round_shift(s2 - s6)); x7 = WRAPLOW(dct_const_round_shift(s3 - s7)); // stage 2 s0 = (int)x0; s1 = (int)x1; s2 = (int)x2; s3 = (int)x3; s4 = (int)(cospi_8_64 * x4 + cospi_24_64 * x5); s5 = (int)(cospi_24_64 * x4 - cospi_8_64 * x5); s6 = (int)(-cospi_24_64 * x6 + cospi_8_64 * x7); s7 = (int)(cospi_8_64 * x6 + cospi_24_64 * x7); x0 = WRAPLOW(s0 + s2); x1 = WRAPLOW(s1 + s3); x2 = WRAPLOW(s0 - s2); x3 = WRAPLOW(s1 - s3); x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); // stage 3 s2 = (int)(cospi_16_64 * (x2 + x3)); s3 = (int)(cospi_16_64 * (x2 - x3)); s6 = (int)(cospi_16_64 * (x6 + x7)); s7 = (int)(cospi_16_64 * (x6 - x7)); x2 = WRAPLOW(dct_const_round_shift(s2)); x3 = WRAPLOW(dct_const_round_shift(s3)); x6 = WRAPLOW(dct_const_round_shift(s6)); x7 = WRAPLOW(dct_const_round_shift(s7)); output[0] = WRAPLOW(x0); output[1] = WRAPLOW(-x4); output[2] = WRAPLOW(x6); output[3] = WRAPLOW(-x2); output[4] = WRAPLOW(x3); output[5] = WRAPLOW(-x7); output[6] = WRAPLOW(x5); output[7] = WRAPLOW(-x1); } void idct8_c(const tran_low_t *input, tran_low_t *output) { int16_t step1[8], step2[8]; tran_high_t temp1, temp2; // stage 1 step1[0] = (int16_t)input[0]; step1[2] = (int16_t)input[4]; step1[1] = (int16_t)input[2]; step1[3] = (int16_t)input[6]; temp1 = (int16_t)input[1] * cospi_28_64 - (int16_t)input[7] * cospi_4_64; temp2 = (int16_t)input[1] * cospi_4_64 + (int16_t)input[7] * cospi_28_64; step1[4] = WRAPLOW(dct_const_round_shift(temp1)); step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[5] * cospi_12_64 - (int16_t)input[3] * cospi_20_64; temp2 = (int16_t)input[5] * cospi_20_64 + (int16_t)input[3] * cospi_12_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 temp1 = (step1[0] + step1[2]) * cospi_16_64; temp2 = (step1[0] - step1[2]) * cospi_16_64; step2[0] = WRAPLOW(dct_const_round_shift(temp1)); step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[1] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[1] * cospi_8_64 + step1[3] * cospi_24_64; step2[2] = WRAPLOW(dct_const_round_shift(temp1)); step2[3] = WRAPLOW(dct_const_round_shift(temp2)); step2[4] = WRAPLOW(step1[4] + step1[5]); step2[5] = WRAPLOW(step1[4] - step1[5]); step2[6] = WRAPLOW(-step1[6] + step1[7]); step2[7] = WRAPLOW(step1[6] + step1[7]); // stage 3 step1[0] = WRAPLOW(step2[0] + step2[3]); step1[1] = WRAPLOW(step2[1] + step2[2]); step1[2] = WRAPLOW(step2[1] - step2[2]); step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; // stage 4 output[0] = WRAPLOW(step1[0] + step1[7]); output[1] = WRAPLOW(step1[1] + step1[6]); output[2] = WRAPLOW(step1[2] + step1[5]); output[3] = WRAPLOW(step1[3] + step1[4]); output[4] = WRAPLOW(step1[3] - step1[4]); output[5] = WRAPLOW(step1[2] - step1[5]); output[6] = WRAPLOW(step1[1] - step1[6]); output[7] = WRAPLOW(step1[0] - step1[7]); } void vpx_idct8x8_64_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; // First transform rows for (i = 0; i < 8; ++i) { idct8_c(input, outptr); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; idct8_c(temp_in, temp_out); for (j = 0; j < 8; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5)); } } } void vpx_idct8x8_12_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; // First transform rows // Only first 4 row has non-zero coefs for (i = 0; i < 4; ++i) { idct8_c(input, outptr); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; idct8_c(temp_in, temp_out); for (j = 0; j < 8; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5)); } } } void vpx_idct8x8_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } void iadst16_c(const tran_low_t *input, tran_low_t *output) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; tran_high_t x0 = input[15]; tran_high_t x1 = input[0]; tran_high_t x2 = input[13]; tran_high_t x3 = input[2]; tran_high_t x4 = input[11]; tran_high_t x5 = input[4]; tran_high_t x6 = input[9]; tran_high_t x7 = input[6]; tran_high_t x8 = input[7]; tran_high_t x9 = input[8]; tran_high_t x10 = input[5]; tran_high_t x11 = input[10]; tran_high_t x12 = input[3]; tran_high_t x13 = input[12]; tran_high_t x14 = input[1]; tran_high_t x15 = input[14]; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { memset(output, 0, 16 * sizeof(*output)); return; } // stage 1 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; s1 = x0 * cospi_31_64 - x1 * cospi_1_64; s2 = x2 * cospi_5_64 + x3 * cospi_27_64; s3 = x2 * cospi_27_64 - x3 * cospi_5_64; s4 = x4 * cospi_9_64 + x5 * cospi_23_64; s5 = x4 * cospi_23_64 - x5 * cospi_9_64; s6 = x6 * cospi_13_64 + x7 * cospi_19_64; s7 = x6 * cospi_19_64 - x7 * cospi_13_64; s8 = x8 * cospi_17_64 + x9 * cospi_15_64; s9 = x8 * cospi_15_64 - x9 * cospi_17_64; s10 = x10 * cospi_21_64 + x11 * cospi_11_64; s11 = x10 * cospi_11_64 - x11 * cospi_21_64; s12 = x12 * cospi_25_64 + x13 * cospi_7_64; s13 = x12 * cospi_7_64 - x13 * cospi_25_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; x0 = WRAPLOW(dct_const_round_shift(s0 + s8)); x1 = WRAPLOW(dct_const_round_shift(s1 + s9)); x2 = WRAPLOW(dct_const_round_shift(s2 + s10)); x3 = WRAPLOW(dct_const_round_shift(s3 + s11)); x4 = WRAPLOW(dct_const_round_shift(s4 + s12)); x5 = WRAPLOW(dct_const_round_shift(s5 + s13)); x6 = WRAPLOW(dct_const_round_shift(s6 + s14)); x7 = WRAPLOW(dct_const_round_shift(s7 + s15)); x8 = WRAPLOW(dct_const_round_shift(s0 - s8)); x9 = WRAPLOW(dct_const_round_shift(s1 - s9)); x10 = WRAPLOW(dct_const_round_shift(s2 - s10)); x11 = WRAPLOW(dct_const_round_shift(s3 - s11)); x12 = WRAPLOW(dct_const_round_shift(s4 - s12)); x13 = WRAPLOW(dct_const_round_shift(s5 - s13)); x14 = WRAPLOW(dct_const_round_shift(s6 - s14)); x15 = WRAPLOW(dct_const_round_shift(s7 - s15)); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4; s5 = x5; s6 = x6; s7 = x7; s8 = x8 * cospi_4_64 + x9 * cospi_28_64; s9 = x8 * cospi_28_64 - x9 * cospi_4_64; s10 = x10 * cospi_20_64 + x11 * cospi_12_64; s11 = x10 * cospi_12_64 - x11 * cospi_20_64; s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; s13 = x12 * cospi_4_64 + x13 * cospi_28_64; s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; x0 = WRAPLOW(s0 + s4); x1 = WRAPLOW(s1 + s5); x2 = WRAPLOW(s2 + s6); x3 = WRAPLOW(s3 + s7); x4 = WRAPLOW(s0 - s4); x5 = WRAPLOW(s1 - s5); x6 = WRAPLOW(s2 - s6); x7 = WRAPLOW(s3 - s7); x8 = WRAPLOW(dct_const_round_shift(s8 + s12)); x9 = WRAPLOW(dct_const_round_shift(s9 + s13)); x10 = WRAPLOW(dct_const_round_shift(s10 + s14)); x11 = WRAPLOW(dct_const_round_shift(s11 + s15)); x12 = WRAPLOW(dct_const_round_shift(s8 - s12)); x13 = WRAPLOW(dct_const_round_shift(s9 - s13)); x14 = WRAPLOW(dct_const_round_shift(s10 - s14)); x15 = WRAPLOW(dct_const_round_shift(s11 - s15)); // stage 3 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4 * cospi_8_64 + x5 * cospi_24_64; s5 = x4 * cospi_24_64 - x5 * cospi_8_64; s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; s7 = x6 * cospi_8_64 + x7 * cospi_24_64; s8 = x8; s9 = x9; s10 = x10; s11 = x11; s12 = x12 * cospi_8_64 + x13 * cospi_24_64; s13 = x12 * cospi_24_64 - x13 * cospi_8_64; s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; x0 = WRAPLOW(s0 + s2); x1 = WRAPLOW(s1 + s3); x2 = WRAPLOW(s0 - s2); x3 = WRAPLOW(s1 - s3); x4 = WRAPLOW(dct_const_round_shift(s4 + s6)); x5 = WRAPLOW(dct_const_round_shift(s5 + s7)); x6 = WRAPLOW(dct_const_round_shift(s4 - s6)); x7 = WRAPLOW(dct_const_round_shift(s5 - s7)); x8 = WRAPLOW(s8 + s10); x9 = WRAPLOW(s9 + s11); x10 = WRAPLOW(s8 - s10); x11 = WRAPLOW(s9 - s11); x12 = WRAPLOW(dct_const_round_shift(s12 + s14)); x13 = WRAPLOW(dct_const_round_shift(s13 + s15)); x14 = WRAPLOW(dct_const_round_shift(s12 - s14)); x15 = WRAPLOW(dct_const_round_shift(s13 - s15)); // stage 4 s2 = (-cospi_16_64) * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (-x6 + x7); s10 = cospi_16_64 * (x10 + x11); s11 = cospi_16_64 * (-x10 + x11); s14 = (-cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); x2 = WRAPLOW(dct_const_round_shift(s2)); x3 = WRAPLOW(dct_const_round_shift(s3)); x6 = WRAPLOW(dct_const_round_shift(s6)); x7 = WRAPLOW(dct_const_round_shift(s7)); x10 = WRAPLOW(dct_const_round_shift(s10)); x11 = WRAPLOW(dct_const_round_shift(s11)); x14 = WRAPLOW(dct_const_round_shift(s14)); x15 = WRAPLOW(dct_const_round_shift(s15)); output[0] = WRAPLOW(x0); output[1] = WRAPLOW(-x8); output[2] = WRAPLOW(x12); output[3] = WRAPLOW(-x4); output[4] = WRAPLOW(x6); output[5] = WRAPLOW(x14); output[6] = WRAPLOW(x10); output[7] = WRAPLOW(x2); output[8] = WRAPLOW(x3); output[9] = WRAPLOW(x11); output[10] = WRAPLOW(x15); output[11] = WRAPLOW(x7); output[12] = WRAPLOW(x5); output[13] = WRAPLOW(-x13); output[14] = WRAPLOW(x9); output[15] = WRAPLOW(-x1); } void idct16_c(const tran_low_t *input, tran_low_t *output) { int16_t step1[16], step2[16]; tran_high_t temp1, temp2; // stage 1 step1[0] = (int16_t)input[0 / 2]; step1[1] = (int16_t)input[16 / 2]; step1[2] = (int16_t)input[8 / 2]; step1[3] = (int16_t)input[24 / 2]; step1[4] = (int16_t)input[4 / 2]; step1[5] = (int16_t)input[20 / 2]; step1[6] = (int16_t)input[12 / 2]; step1[7] = (int16_t)input[28 / 2]; step1[8] = (int16_t)input[2 / 2]; step1[9] = (int16_t)input[18 / 2]; step1[10] = (int16_t)input[10 / 2]; step1[11] = (int16_t)input[26 / 2]; step1[12] = (int16_t)input[6 / 2]; step1[13] = (int16_t)input[22 / 2]; step1[14] = (int16_t)input[14 / 2]; step1[15] = (int16_t)input[30 / 2]; // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; step2[8] = WRAPLOW(dct_const_round_shift(temp1)); step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; step2[9] = WRAPLOW(dct_const_round_shift(temp1)); step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; step2[11] = WRAPLOW(dct_const_round_shift(temp1)); step2[12] = WRAPLOW(dct_const_round_shift(temp2)); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; step1[4] = WRAPLOW(dct_const_round_shift(temp1)); step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[8] = WRAPLOW(step2[8] + step2[9]); step1[9] = WRAPLOW(step2[8] - step2[9]); step1[10] = WRAPLOW(-step2[10] + step2[11]); step1[11] = WRAPLOW(step2[10] + step2[11]); step1[12] = WRAPLOW(step2[12] + step2[13]); step1[13] = WRAPLOW(step2[12] - step2[13]); step1[14] = WRAPLOW(-step2[14] + step2[15]); step1[15] = WRAPLOW(step2[14] + step2[15]); // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; step2[0] = WRAPLOW(dct_const_round_shift(temp1)); step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; step2[2] = WRAPLOW(dct_const_round_shift(temp1)); step2[3] = WRAPLOW(dct_const_round_shift(temp2)); step2[4] = WRAPLOW(step1[4] + step1[5]); step2[5] = WRAPLOW(step1[4] - step1[5]); step2[6] = WRAPLOW(-step1[6] + step1[7]); step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; step2[9] = WRAPLOW(dct_const_round_shift(temp1)); step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 step1[0] = WRAPLOW(step2[0] + step2[3]); step1[1] = WRAPLOW(step2[1] + step2[2]); step1[2] = WRAPLOW(step2[1] - step2[2]); step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; step1[8] = WRAPLOW(step2[8] + step2[11]); step1[9] = WRAPLOW(step2[9] + step2[10]); step1[10] = WRAPLOW(step2[9] - step2[10]); step1[11] = WRAPLOW(step2[8] - step2[11]); step1[12] = WRAPLOW(-step2[12] + step2[15]); step1[13] = WRAPLOW(-step2[13] + step2[14]); step1[14] = WRAPLOW(step2[13] + step2[14]); step1[15] = WRAPLOW(step2[12] + step2[15]); // stage 6 step2[0] = WRAPLOW(step1[0] + step1[7]); step2[1] = WRAPLOW(step1[1] + step1[6]); step2[2] = WRAPLOW(step1[2] + step1[5]); step2[3] = WRAPLOW(step1[3] + step1[4]); step2[4] = WRAPLOW(step1[3] - step1[4]); step2[5] = WRAPLOW(step1[2] - step1[5]); step2[6] = WRAPLOW(step1[1] - step1[6]); step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; step2[11] = WRAPLOW(dct_const_round_shift(temp1)); step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 output[0] = (tran_low_t)WRAPLOW(step2[0] + step2[15]); output[1] = (tran_low_t)WRAPLOW(step2[1] + step2[14]); output[2] = (tran_low_t)WRAPLOW(step2[2] + step2[13]); output[3] = (tran_low_t)WRAPLOW(step2[3] + step2[12]); output[4] = (tran_low_t)WRAPLOW(step2[4] + step2[11]); output[5] = (tran_low_t)WRAPLOW(step2[5] + step2[10]); output[6] = (tran_low_t)WRAPLOW(step2[6] + step2[9]); output[7] = (tran_low_t)WRAPLOW(step2[7] + step2[8]); output[8] = (tran_low_t)WRAPLOW(step2[7] - step2[8]); output[9] = (tran_low_t)WRAPLOW(step2[6] - step2[9]); output[10] = (tran_low_t)WRAPLOW(step2[5] - step2[10]); output[11] = (tran_low_t)WRAPLOW(step2[4] - step2[11]); output[12] = (tran_low_t)WRAPLOW(step2[3] - step2[12]); output[13] = (tran_low_t)WRAPLOW(step2[2] - step2[13]); output[14] = (tran_low_t)WRAPLOW(step2[1] - step2[14]); output[15] = (tran_low_t)WRAPLOW(step2[0] - step2[15]); } void vpx_idct16x16_256_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows for (i = 0; i < 16; ++i) { idct16_c(input, outptr); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; idct16_c(temp_in, temp_out); for (j = 0; j < 16; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct16x16_38_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows. Since all non-zero dct coefficients are in // upper-left 8x8 area, we only need to calculate first 8 rows here. for (i = 0; i < 8; ++i) { idct16_c(input, outptr); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; idct16_c(temp_in, temp_out); for (j = 0; j < 16; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct16x16_10_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { idct16_c(input, outptr); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; idct16_c(temp_in, temp_out); for (j = 0; j < 16; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct16x16_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } void idct32_c(const tran_low_t *input, tran_low_t *output) { int16_t step1[32], step2[32]; tran_high_t temp1, temp2; // stage 1 step1[0] = (int16_t)input[0]; step1[1] = (int16_t)input[16]; step1[2] = (int16_t)input[8]; step1[3] = (int16_t)input[24]; step1[4] = (int16_t)input[4]; step1[5] = (int16_t)input[20]; step1[6] = (int16_t)input[12]; step1[7] = (int16_t)input[28]; step1[8] = (int16_t)input[2]; step1[9] = (int16_t)input[18]; step1[10] = (int16_t)input[10]; step1[11] = (int16_t)input[26]; step1[12] = (int16_t)input[6]; step1[13] = (int16_t)input[22]; step1[14] = (int16_t)input[14]; step1[15] = (int16_t)input[30]; temp1 = (int16_t)input[1] * cospi_31_64 - (int16_t)input[31] * cospi_1_64; temp2 = (int16_t)input[1] * cospi_1_64 + (int16_t)input[31] * cospi_31_64; step1[16] = WRAPLOW(dct_const_round_shift(temp1)); step1[31] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[17] * cospi_15_64 - (int16_t)input[15] * cospi_17_64; temp2 = (int16_t)input[17] * cospi_17_64 + (int16_t)input[15] * cospi_15_64; step1[17] = WRAPLOW(dct_const_round_shift(temp1)); step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[9] * cospi_23_64 - (int16_t)input[23] * cospi_9_64; temp2 = (int16_t)input[9] * cospi_9_64 + (int16_t)input[23] * cospi_23_64; step1[18] = WRAPLOW(dct_const_round_shift(temp1)); step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[25] * cospi_7_64 - (int16_t)input[7] * cospi_25_64; temp2 = (int16_t)input[25] * cospi_25_64 + (int16_t)input[7] * cospi_7_64; step1[19] = WRAPLOW(dct_const_round_shift(temp1)); step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[5] * cospi_27_64 - (int16_t)input[27] * cospi_5_64; temp2 = (int16_t)input[5] * cospi_5_64 + (int16_t)input[27] * cospi_27_64; step1[20] = WRAPLOW(dct_const_round_shift(temp1)); step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[21] * cospi_11_64 - (int16_t)input[11] * cospi_21_64; temp2 = (int16_t)input[21] * cospi_21_64 + (int16_t)input[11] * cospi_11_64; step1[21] = WRAPLOW(dct_const_round_shift(temp1)); step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[13] * cospi_19_64 - (int16_t)input[19] * cospi_13_64; temp2 = (int16_t)input[13] * cospi_13_64 + (int16_t)input[19] * cospi_19_64; step1[22] = WRAPLOW(dct_const_round_shift(temp1)); step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (int16_t)input[29] * cospi_3_64 - (int16_t)input[3] * cospi_29_64; temp2 = (int16_t)input[29] * cospi_29_64 + (int16_t)input[3] * cospi_3_64; step1[23] = WRAPLOW(dct_const_round_shift(temp1)); step1[24] = WRAPLOW(dct_const_round_shift(temp2)); // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; temp1 = step1[8] * cospi_30_64 - step1[15] * cospi_2_64; temp2 = step1[8] * cospi_2_64 + step1[15] * cospi_30_64; step2[8] = WRAPLOW(dct_const_round_shift(temp1)); step2[15] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[9] * cospi_14_64 - step1[14] * cospi_18_64; temp2 = step1[9] * cospi_18_64 + step1[14] * cospi_14_64; step2[9] = WRAPLOW(dct_const_round_shift(temp1)); step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[10] * cospi_22_64 - step1[13] * cospi_10_64; temp2 = step1[10] * cospi_10_64 + step1[13] * cospi_22_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[11] * cospi_6_64 - step1[12] * cospi_26_64; temp2 = step1[11] * cospi_26_64 + step1[12] * cospi_6_64; step2[11] = WRAPLOW(dct_const_round_shift(temp1)); step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[16] = WRAPLOW(step1[16] + step1[17]); step2[17] = WRAPLOW(step1[16] - step1[17]); step2[18] = WRAPLOW(-step1[18] + step1[19]); step2[19] = WRAPLOW(step1[18] + step1[19]); step2[20] = WRAPLOW(step1[20] + step1[21]); step2[21] = WRAPLOW(step1[20] - step1[21]); step2[22] = WRAPLOW(-step1[22] + step1[23]); step2[23] = WRAPLOW(step1[22] + step1[23]); step2[24] = WRAPLOW(step1[24] + step1[25]); step2[25] = WRAPLOW(step1[24] - step1[25]); step2[26] = WRAPLOW(-step1[26] + step1[27]); step2[27] = WRAPLOW(step1[26] + step1[27]); step2[28] = WRAPLOW(step1[28] + step1[29]); step2[29] = WRAPLOW(step1[28] - step1[29]); step2[30] = WRAPLOW(-step1[30] + step1[31]); step2[31] = WRAPLOW(step1[30] + step1[31]); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; temp1 = step2[4] * cospi_28_64 - step2[7] * cospi_4_64; temp2 = step2[4] * cospi_4_64 + step2[7] * cospi_28_64; step1[4] = WRAPLOW(dct_const_round_shift(temp1)); step1[7] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step2[5] * cospi_12_64 - step2[6] * cospi_20_64; temp2 = step2[5] * cospi_20_64 + step2[6] * cospi_12_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[8] = WRAPLOW(step2[8] + step2[9]); step1[9] = WRAPLOW(step2[8] - step2[9]); step1[10] = WRAPLOW(-step2[10] + step2[11]); step1[11] = WRAPLOW(step2[10] + step2[11]); step1[12] = WRAPLOW(step2[12] + step2[13]); step1[13] = WRAPLOW(step2[12] - step2[13]); step1[14] = WRAPLOW(-step2[14] + step2[15]); step1[15] = WRAPLOW(step2[14] + step2[15]); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * cospi_4_64 + step2[30] * cospi_28_64; temp2 = step2[17] * cospi_28_64 + step2[30] * cospi_4_64; step1[17] = WRAPLOW(dct_const_round_shift(temp1)); step1[30] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[18] * cospi_28_64 - step2[29] * cospi_4_64; temp2 = -step2[18] * cospi_4_64 + step2[29] * cospi_28_64; step1[18] = WRAPLOW(dct_const_round_shift(temp1)); step1[29] = WRAPLOW(dct_const_round_shift(temp2)); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * cospi_20_64 + step2[26] * cospi_12_64; temp2 = step2[21] * cospi_12_64 + step2[26] * cospi_20_64; step1[21] = WRAPLOW(dct_const_round_shift(temp1)); step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[22] * cospi_12_64 - step2[25] * cospi_20_64; temp2 = -step2[22] * cospi_20_64 + step2[25] * cospi_12_64; step1[22] = WRAPLOW(dct_const_round_shift(temp1)); step1[25] = WRAPLOW(dct_const_round_shift(temp2)); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; // stage 4 temp1 = (step1[0] + step1[1]) * cospi_16_64; temp2 = (step1[0] - step1[1]) * cospi_16_64; step2[0] = WRAPLOW(dct_const_round_shift(temp1)); step2[1] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = step1[2] * cospi_24_64 - step1[3] * cospi_8_64; temp2 = step1[2] * cospi_8_64 + step1[3] * cospi_24_64; step2[2] = WRAPLOW(dct_const_round_shift(temp1)); step2[3] = WRAPLOW(dct_const_round_shift(temp2)); step2[4] = WRAPLOW(step1[4] + step1[5]); step2[5] = WRAPLOW(step1[4] - step1[5]); step2[6] = WRAPLOW(-step1[6] + step1[7]); step2[7] = WRAPLOW(step1[6] + step1[7]); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * cospi_8_64 + step1[14] * cospi_24_64; temp2 = step1[9] * cospi_24_64 + step1[14] * cospi_8_64; step2[9] = WRAPLOW(dct_const_round_shift(temp1)); step2[14] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step1[10] * cospi_24_64 - step1[13] * cospi_8_64; temp2 = -step1[10] * cospi_8_64 + step1[13] * cospi_24_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); step2[11] = step1[11]; step2[12] = step1[12]; step2[16] = WRAPLOW(step1[16] + step1[19]); step2[17] = WRAPLOW(step1[17] + step1[18]); step2[18] = WRAPLOW(step1[17] - step1[18]); step2[19] = WRAPLOW(step1[16] - step1[19]); step2[20] = WRAPLOW(-step1[20] + step1[23]); step2[21] = WRAPLOW(-step1[21] + step1[22]); step2[22] = WRAPLOW(step1[21] + step1[22]); step2[23] = WRAPLOW(step1[20] + step1[23]); step2[24] = WRAPLOW(step1[24] + step1[27]); step2[25] = WRAPLOW(step1[25] + step1[26]); step2[26] = WRAPLOW(step1[25] - step1[26]); step2[27] = WRAPLOW(step1[24] - step1[27]); step2[28] = WRAPLOW(-step1[28] + step1[31]); step2[29] = WRAPLOW(-step1[29] + step1[30]); step2[30] = WRAPLOW(step1[29] + step1[30]); step2[31] = WRAPLOW(step1[28] + step1[31]); // stage 5 step1[0] = WRAPLOW(step2[0] + step2[3]); step1[1] = WRAPLOW(step2[1] + step2[2]); step1[2] = WRAPLOW(step2[1] - step2[2]); step1[3] = WRAPLOW(step2[0] - step2[3]); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * cospi_16_64; temp2 = (step2[5] + step2[6]) * cospi_16_64; step1[5] = WRAPLOW(dct_const_round_shift(temp1)); step1[6] = WRAPLOW(dct_const_round_shift(temp2)); step1[7] = step2[7]; step1[8] = WRAPLOW(step2[8] + step2[11]); step1[9] = WRAPLOW(step2[9] + step2[10]); step1[10] = WRAPLOW(step2[9] - step2[10]); step1[11] = WRAPLOW(step2[8] - step2[11]); step1[12] = WRAPLOW(-step2[12] + step2[15]); step1[13] = WRAPLOW(-step2[13] + step2[14]); step1[14] = WRAPLOW(step2[13] + step2[14]); step1[15] = WRAPLOW(step2[12] + step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * cospi_8_64 + step2[29] * cospi_24_64; temp2 = step2[18] * cospi_24_64 + step2[29] * cospi_8_64; step1[18] = WRAPLOW(dct_const_round_shift(temp1)); step1[29] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[19] * cospi_8_64 + step2[28] * cospi_24_64; temp2 = step2[19] * cospi_24_64 + step2[28] * cospi_8_64; step1[19] = WRAPLOW(dct_const_round_shift(temp1)); step1[28] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[20] * cospi_24_64 - step2[27] * cospi_8_64; temp2 = -step2[20] * cospi_8_64 + step2[27] * cospi_24_64; step1[20] = WRAPLOW(dct_const_round_shift(temp1)); step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = -step2[21] * cospi_24_64 - step2[26] * cospi_8_64; temp2 = -step2[21] * cospi_8_64 + step2[26] * cospi_24_64; step1[21] = WRAPLOW(dct_const_round_shift(temp1)); step1[26] = WRAPLOW(dct_const_round_shift(temp2)); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; step1[25] = step2[25]; step1[30] = step2[30]; step1[31] = step2[31]; // stage 6 step2[0] = WRAPLOW(step1[0] + step1[7]); step2[1] = WRAPLOW(step1[1] + step1[6]); step2[2] = WRAPLOW(step1[2] + step1[5]); step2[3] = WRAPLOW(step1[3] + step1[4]); step2[4] = WRAPLOW(step1[3] - step1[4]); step2[5] = WRAPLOW(step1[2] - step1[5]); step2[6] = WRAPLOW(step1[1] - step1[6]); step2[7] = WRAPLOW(step1[0] - step1[7]); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * cospi_16_64; temp2 = (step1[10] + step1[13]) * cospi_16_64; step2[10] = WRAPLOW(dct_const_round_shift(temp1)); step2[13] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step1[11] + step1[12]) * cospi_16_64; temp2 = (step1[11] + step1[12]) * cospi_16_64; step2[11] = WRAPLOW(dct_const_round_shift(temp1)); step2[12] = WRAPLOW(dct_const_round_shift(temp2)); step2[14] = step1[14]; step2[15] = step1[15]; step2[16] = WRAPLOW(step1[16] + step1[23]); step2[17] = WRAPLOW(step1[17] + step1[22]); step2[18] = WRAPLOW(step1[18] + step1[21]); step2[19] = WRAPLOW(step1[19] + step1[20]); step2[20] = WRAPLOW(step1[19] - step1[20]); step2[21] = WRAPLOW(step1[18] - step1[21]); step2[22] = WRAPLOW(step1[17] - step1[22]); step2[23] = WRAPLOW(step1[16] - step1[23]); step2[24] = WRAPLOW(-step1[24] + step1[31]); step2[25] = WRAPLOW(-step1[25] + step1[30]); step2[26] = WRAPLOW(-step1[26] + step1[29]); step2[27] = WRAPLOW(-step1[27] + step1[28]); step2[28] = WRAPLOW(step1[27] + step1[28]); step2[29] = WRAPLOW(step1[26] + step1[29]); step2[30] = WRAPLOW(step1[25] + step1[30]); step2[31] = WRAPLOW(step1[24] + step1[31]); // stage 7 step1[0] = WRAPLOW(step2[0] + step2[15]); step1[1] = WRAPLOW(step2[1] + step2[14]); step1[2] = WRAPLOW(step2[2] + step2[13]); step1[3] = WRAPLOW(step2[3] + step2[12]); step1[4] = WRAPLOW(step2[4] + step2[11]); step1[5] = WRAPLOW(step2[5] + step2[10]); step1[6] = WRAPLOW(step2[6] + step2[9]); step1[7] = WRAPLOW(step2[7] + step2[8]); step1[8] = WRAPLOW(step2[7] - step2[8]); step1[9] = WRAPLOW(step2[6] - step2[9]); step1[10] = WRAPLOW(step2[5] - step2[10]); step1[11] = WRAPLOW(step2[4] - step2[11]); step1[12] = WRAPLOW(step2[3] - step2[12]); step1[13] = WRAPLOW(step2[2] - step2[13]); step1[14] = WRAPLOW(step2[1] - step2[14]); step1[15] = WRAPLOW(step2[0] - step2[15]); step1[16] = step2[16]; step1[17] = step2[17]; step1[18] = step2[18]; step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * cospi_16_64; temp2 = (step2[20] + step2[27]) * cospi_16_64; step1[20] = WRAPLOW(dct_const_round_shift(temp1)); step1[27] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[21] + step2[26]) * cospi_16_64; temp2 = (step2[21] + step2[26]) * cospi_16_64; step1[21] = WRAPLOW(dct_const_round_shift(temp1)); step1[26] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[22] + step2[25]) * cospi_16_64; temp2 = (step2[22] + step2[25]) * cospi_16_64; step1[22] = WRAPLOW(dct_const_round_shift(temp1)); step1[25] = WRAPLOW(dct_const_round_shift(temp2)); temp1 = (-step2[23] + step2[24]) * cospi_16_64; temp2 = (step2[23] + step2[24]) * cospi_16_64; step1[23] = WRAPLOW(dct_const_round_shift(temp1)); step1[24] = WRAPLOW(dct_const_round_shift(temp2)); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage output[0] = WRAPLOW(step1[0] + step1[31]); output[1] = WRAPLOW(step1[1] + step1[30]); output[2] = WRAPLOW(step1[2] + step1[29]); output[3] = WRAPLOW(step1[3] + step1[28]); output[4] = WRAPLOW(step1[4] + step1[27]); output[5] = WRAPLOW(step1[5] + step1[26]); output[6] = WRAPLOW(step1[6] + step1[25]); output[7] = WRAPLOW(step1[7] + step1[24]); output[8] = WRAPLOW(step1[8] + step1[23]); output[9] = WRAPLOW(step1[9] + step1[22]); output[10] = WRAPLOW(step1[10] + step1[21]); output[11] = WRAPLOW(step1[11] + step1[20]); output[12] = WRAPLOW(step1[12] + step1[19]); output[13] = WRAPLOW(step1[13] + step1[18]); output[14] = WRAPLOW(step1[14] + step1[17]); output[15] = WRAPLOW(step1[15] + step1[16]); output[16] = WRAPLOW(step1[15] - step1[16]); output[17] = WRAPLOW(step1[14] - step1[17]); output[18] = WRAPLOW(step1[13] - step1[18]); output[19] = WRAPLOW(step1[12] - step1[19]); output[20] = WRAPLOW(step1[11] - step1[20]); output[21] = WRAPLOW(step1[10] - step1[21]); output[22] = WRAPLOW(step1[9] - step1[22]); output[23] = WRAPLOW(step1[8] - step1[23]); output[24] = WRAPLOW(step1[7] - step1[24]); output[25] = WRAPLOW(step1[6] - step1[25]); output[26] = WRAPLOW(step1[5] - step1[26]); output[27] = WRAPLOW(step1[4] - step1[27]); output[28] = WRAPLOW(step1[3] - step1[28]); output[29] = WRAPLOW(step1[2] - step1[29]); output[30] = WRAPLOW(step1[1] - step1[30]); output[31] = WRAPLOW(step1[0] - step1[31]); } void vpx_idct32x32_1024_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[32 * 32]; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows for (i = 0; i < 32; ++i) { int16_t zero_coeff = 0; for (j = 0; j < 32; ++j) zero_coeff |= input[j]; if (zero_coeff) idct32_c(input, outptr); else memset(outptr, 0, sizeof(tran_low_t) * 32); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32_c(temp_in, temp_out); for (j = 0; j < 32; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct32x32_135_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows // Only upper-left 16x16 has non-zero coeff for (i = 0; i < 16; ++i) { idct32_c(input, outptr); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32_c(temp_in, temp_out); for (j = 0; j < 32; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct32x32_34_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows // Only upper-left 8x8 has non-zero coeff for (i = 0; i < 8; ++i) { idct32_c(input, outptr); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; idct32_c(temp_in, temp_out); for (j = 0; j < 32; ++j) { dest[j * stride + i] = clip_pixel_add(dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6)); } } } void vpx_idct32x32_1_add_c(const tran_low_t *input, uint8_t *dest, int stride) { int i, j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { for (i = 0; i < 32; ++i) dest[i] = clip_pixel_add(dest[i], a1); dest += stride; } } #if CONFIG_VP9_HIGHBITDEPTH // 12 signal input bits + 7 2D forward transform amplify bits + 5 1D inverse // transform amplify bits + 1 bit for contingency in rounding and quantizing #define HIGHBD_VALID_TXFM_MAGNITUDE_RANGE (1 << 25) static INLINE int detect_invalid_highbd_input(const tran_low_t *input, int size) { int i; for (i = 0; i < size; ++i) if (abs(input[i]) >= HIGHBD_VALID_TXFM_MAGNITUDE_RANGE) return 1; return 0; } void vpx_highbd_iwht4x4_16_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { /* 4-point reversible, orthonormal inverse Walsh-Hadamard in 3.5 adds, 0.5 shifts per pixel. */ int i; tran_low_t output[16]; tran_high_t a1, b1, c1, d1, e1; const tran_low_t *ip = input; tran_low_t *op = output; for (i = 0; i < 4; i++) { a1 = ip[0] >> UNIT_QUANT_SHIFT; c1 = ip[1] >> UNIT_QUANT_SHIFT; d1 = ip[2] >> UNIT_QUANT_SHIFT; b1 = ip[3] >> UNIT_QUANT_SHIFT; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; op[0] = HIGHBD_WRAPLOW(a1, bd); op[1] = HIGHBD_WRAPLOW(b1, bd); op[2] = HIGHBD_WRAPLOW(c1, bd); op[3] = HIGHBD_WRAPLOW(d1, bd); ip += 4; op += 4; } ip = output; for (i = 0; i < 4; i++) { a1 = ip[4 * 0]; c1 = ip[4 * 1]; d1 = ip[4 * 2]; b1 = ip[4 * 3]; a1 += c1; d1 -= b1; e1 = (a1 - d1) >> 1; b1 = e1 - b1; c1 = e1 - c1; a1 -= b1; d1 += c1; dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], HIGHBD_WRAPLOW(a1, bd), bd); dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], HIGHBD_WRAPLOW(b1, bd), bd); dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], HIGHBD_WRAPLOW(c1, bd), bd); dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], HIGHBD_WRAPLOW(d1, bd), bd); ip++; dest++; } } void vpx_highbd_iwht4x4_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; tran_high_t a1, e1; tran_low_t tmp[4]; const tran_low_t *ip = input; tran_low_t *op = tmp; (void)bd; a1 = ip[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; op[0] = HIGHBD_WRAPLOW(a1, bd); op[1] = op[2] = op[3] = HIGHBD_WRAPLOW(e1, bd); ip = tmp; for (i = 0; i < 4; i++) { e1 = ip[0] >> 1; a1 = ip[0] - e1; dest[stride * 0] = highbd_clip_pixel_add(dest[stride * 0], a1, bd); dest[stride * 1] = highbd_clip_pixel_add(dest[stride * 1], e1, bd); dest[stride * 2] = highbd_clip_pixel_add(dest[stride * 2], e1, bd); dest[stride * 3] = highbd_clip_pixel_add(dest[stride * 3], e1, bd); ip++; dest++; } } void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_low_t x0 = input[0]; tran_low_t x1 = input[1]; tran_low_t x2 = input[2]; tran_low_t x3 = input[3]; (void)bd; if (detect_invalid_highbd_input(input, 4)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 4); return; } if (!(x0 | x1 | x2 | x3)) { memset(output, 0, 4 * sizeof(*output)); return; } s0 = (tran_high_t)sinpi_1_9 * x0; s1 = (tran_high_t)sinpi_2_9 * x0; s2 = (tran_high_t)sinpi_3_9 * x1; s3 = (tran_high_t)sinpi_4_9 * x2; s4 = (tran_high_t)sinpi_1_9 * x2; s5 = (tran_high_t)sinpi_2_9 * x3; s6 = (tran_high_t)sinpi_4_9 * x3; s7 = (tran_high_t)HIGHBD_WRAPLOW(x0 - x2 + x3, bd); s0 = s0 + s3 + s5; s1 = s1 - s4 - s6; s3 = s2; s2 = sinpi_3_9 * s7; // 1-D transform scaling factor is sqrt(2). // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. output[0] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s3), bd); output[1] = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s3), bd); output[2] = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); output[3] = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s1 - s3), bd); } void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step[4]; tran_high_t temp1, temp2; (void)bd; if (detect_invalid_highbd_input(input, 4)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 4); return; } // stage 1 temp1 = (input[0] + input[2]) * (tran_high_t)cospi_16_64; temp2 = (input[0] - input[2]) * (tran_high_t)cospi_16_64; step[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[1] * (tran_high_t)cospi_24_64 - input[3] * (tran_high_t)cospi_8_64; temp2 = input[1] * (tran_high_t)cospi_8_64 + input[3] * (tran_high_t)cospi_24_64; step[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); // stage 2 output[0] = HIGHBD_WRAPLOW(step[0] + step[3], bd); output[1] = HIGHBD_WRAPLOW(step[1] + step[2], bd); output[2] = HIGHBD_WRAPLOW(step[1] - step[2], bd); output[3] = HIGHBD_WRAPLOW(step[0] - step[3], bd); } void vpx_highbd_idct4x4_16_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[4 * 4]; tran_low_t *outptr = out; tran_low_t temp_in[4], temp_out[4]; // Rows for (i = 0; i < 4; ++i) { vpx_highbd_idct4_c(input, outptr, bd); input += 4; outptr += 4; } // Columns for (i = 0; i < 4; ++i) { for (j = 0; j < 4; ++j) temp_in[j] = out[j * 4 + i]; vpx_highbd_idct4_c(temp_in, temp_out, bd); for (j = 0; j < 4; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 4), bd); } } } void vpx_highbd_idct4x4_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 4); for (i = 0; i < 4; i++) { dest[0] = highbd_clip_pixel_add(dest[0], a1, bd); dest[1] = highbd_clip_pixel_add(dest[1], a1, bd); dest[2] = highbd_clip_pixel_add(dest[2], a1, bd); dest[3] = highbd_clip_pixel_add(dest[3], a1, bd); dest += stride; } } void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7; tran_low_t x0 = input[7]; tran_low_t x1 = input[0]; tran_low_t x2 = input[5]; tran_low_t x3 = input[2]; tran_low_t x4 = input[3]; tran_low_t x5 = input[4]; tran_low_t x6 = input[1]; tran_low_t x7 = input[6]; (void)bd; if (detect_invalid_highbd_input(input, 8)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 8); return; } if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { memset(output, 0, 8 * sizeof(*output)); return; } // stage 1 s0 = (tran_high_t)cospi_2_64 * x0 + (tran_high_t)cospi_30_64 * x1; s1 = (tran_high_t)cospi_30_64 * x0 - (tran_high_t)cospi_2_64 * x1; s2 = (tran_high_t)cospi_10_64 * x2 + (tran_high_t)cospi_22_64 * x3; s3 = (tran_high_t)cospi_22_64 * x2 - (tran_high_t)cospi_10_64 * x3; s4 = (tran_high_t)cospi_18_64 * x4 + (tran_high_t)cospi_14_64 * x5; s5 = (tran_high_t)cospi_14_64 * x4 - (tran_high_t)cospi_18_64 * x5; s6 = (tran_high_t)cospi_26_64 * x6 + (tran_high_t)cospi_6_64 * x7; s7 = (tran_high_t)cospi_6_64 * x6 - (tran_high_t)cospi_26_64 * x7; x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s4), bd); x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s5), bd); x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s6), bd); x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s7), bd); x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s4), bd); x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s5), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s6), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s7), bd); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = (tran_high_t)cospi_8_64 * x4 + (tran_high_t)cospi_24_64 * x5; s5 = (tran_high_t)cospi_24_64 * x4 - (tran_high_t)cospi_8_64 * x5; s6 = (tran_high_t)(-cospi_24_64) * x6 + (tran_high_t)cospi_8_64 * x7; s7 = (tran_high_t)cospi_8_64 * x6 + (tran_high_t)cospi_24_64 * x7; x0 = HIGHBD_WRAPLOW(s0 + s2, bd); x1 = HIGHBD_WRAPLOW(s1 + s3, bd); x2 = HIGHBD_WRAPLOW(s0 - s2, bd); x3 = HIGHBD_WRAPLOW(s1 - s3, bd); x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd); x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd); // stage 3 s2 = (tran_high_t)cospi_16_64 * (x2 + x3); s3 = (tran_high_t)cospi_16_64 * (x2 - x3); s6 = (tran_high_t)cospi_16_64 * (x6 + x7); s7 = (tran_high_t)cospi_16_64 * (x6 - x7); x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd); output[0] = HIGHBD_WRAPLOW(x0, bd); output[1] = HIGHBD_WRAPLOW(-x4, bd); output[2] = HIGHBD_WRAPLOW(x6, bd); output[3] = HIGHBD_WRAPLOW(-x2, bd); output[4] = HIGHBD_WRAPLOW(x3, bd); output[5] = HIGHBD_WRAPLOW(-x7, bd); output[6] = HIGHBD_WRAPLOW(x5, bd); output[7] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[8], step2[8]; tran_high_t temp1, temp2; if (detect_invalid_highbd_input(input, 8)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 8); return; } // stage 1 step1[0] = input[0]; step1[2] = input[4]; step1[1] = input[2]; step1[3] = input[6]; temp1 = input[1] * (tran_high_t)cospi_28_64 - input[7] * (tran_high_t)cospi_4_64; temp2 = input[1] * (tran_high_t)cospi_4_64 + input[7] * (tran_high_t)cospi_28_64; step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[5] * (tran_high_t)cospi_12_64 - input[3] * (tran_high_t)cospi_20_64; temp2 = input[5] * (tran_high_t)cospi_20_64 + input[3] * (tran_high_t)cospi_12_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); // stage 2 & stage 3 - even half vpx_highbd_idct4_c(step1, step1, bd); // stage 2 - odd half step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); // stage 3 - odd half step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[7] = step2[7]; // stage 4 output[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); output[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); output[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); output[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); output[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); output[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); output[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); output[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); } void vpx_highbd_idct8x8_64_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[8 * 8]; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; // First transform rows for (i = 0; i < 8; ++i) { vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); } } } void vpx_highbd_idct8x8_12_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[8 * 8] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[8], temp_out[8]; // First transform rows // Only first 4 row has non-zero coefs for (i = 0; i < 4; ++i) { vpx_highbd_idct8_c(input, outptr, bd); input += 8; outptr += 8; } // Then transform columns for (i = 0; i < 8; ++i) { for (j = 0; j < 8; ++j) temp_in[j] = out[j * 8 + i]; vpx_highbd_idct8_c(temp_in, temp_out, bd); for (j = 0; j < 8; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 5), bd); } } } void vpx_highbd_idct8x8_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 5); for (j = 0; j < 8; ++j) { for (i = 0; i < 8; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_high_t s0, s1, s2, s3, s4, s5, s6, s7, s8; tran_high_t s9, s10, s11, s12, s13, s14, s15; tran_low_t x0 = input[15]; tran_low_t x1 = input[0]; tran_low_t x2 = input[13]; tran_low_t x3 = input[2]; tran_low_t x4 = input[11]; tran_low_t x5 = input[4]; tran_low_t x6 = input[9]; tran_low_t x7 = input[6]; tran_low_t x8 = input[7]; tran_low_t x9 = input[8]; tran_low_t x10 = input[5]; tran_low_t x11 = input[10]; tran_low_t x12 = input[3]; tran_low_t x13 = input[12]; tran_low_t x14 = input[1]; tran_low_t x15 = input[14]; (void)bd; if (detect_invalid_highbd_input(input, 16)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 16); return; } if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { memset(output, 0, 16 * sizeof(*output)); return; } // stage 1 s0 = x0 * (tran_high_t)cospi_1_64 + x1 * (tran_high_t)cospi_31_64; s1 = x0 * (tran_high_t)cospi_31_64 - x1 * (tran_high_t)cospi_1_64; s2 = x2 * (tran_high_t)cospi_5_64 + x3 * (tran_high_t)cospi_27_64; s3 = x2 * (tran_high_t)cospi_27_64 - x3 * (tran_high_t)cospi_5_64; s4 = x4 * (tran_high_t)cospi_9_64 + x5 * (tran_high_t)cospi_23_64; s5 = x4 * (tran_high_t)cospi_23_64 - x5 * (tran_high_t)cospi_9_64; s6 = x6 * (tran_high_t)cospi_13_64 + x7 * (tran_high_t)cospi_19_64; s7 = x6 * (tran_high_t)cospi_19_64 - x7 * (tran_high_t)cospi_13_64; s8 = x8 * (tran_high_t)cospi_17_64 + x9 * (tran_high_t)cospi_15_64; s9 = x8 * (tran_high_t)cospi_15_64 - x9 * (tran_high_t)cospi_17_64; s10 = x10 * (tran_high_t)cospi_21_64 + x11 * (tran_high_t)cospi_11_64; s11 = x10 * (tran_high_t)cospi_11_64 - x11 * (tran_high_t)cospi_21_64; s12 = x12 * (tran_high_t)cospi_25_64 + x13 * (tran_high_t)cospi_7_64; s13 = x12 * (tran_high_t)cospi_7_64 - x13 * (tran_high_t)cospi_25_64; s14 = x14 * (tran_high_t)cospi_29_64 + x15 * (tran_high_t)cospi_3_64; s15 = x14 * (tran_high_t)cospi_3_64 - x15 * (tran_high_t)cospi_29_64; x0 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 + s8), bd); x1 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 + s9), bd); x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 + s10), bd); x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 + s11), bd); x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s12), bd); x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s13), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 + s14), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 + s15), bd); x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s0 - s8), bd); x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s1 - s9), bd); x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s2 - s10), bd); x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s3 - s11), bd); x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s12), bd); x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s13), bd); x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s6 - s14), bd); x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s7 - s15), bd); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4; s5 = x5; s6 = x6; s7 = x7; s8 = x8 * (tran_high_t)cospi_4_64 + x9 * (tran_high_t)cospi_28_64; s9 = x8 * (tran_high_t)cospi_28_64 - x9 * (tran_high_t)cospi_4_64; s10 = x10 * (tran_high_t)cospi_20_64 + x11 * (tran_high_t)cospi_12_64; s11 = x10 * (tran_high_t)cospi_12_64 - x11 * (tran_high_t)cospi_20_64; s12 = -x12 * (tran_high_t)cospi_28_64 + x13 * (tran_high_t)cospi_4_64; s13 = x12 * (tran_high_t)cospi_4_64 + x13 * (tran_high_t)cospi_28_64; s14 = -x14 * (tran_high_t)cospi_12_64 + x15 * (tran_high_t)cospi_20_64; s15 = x14 * (tran_high_t)cospi_20_64 + x15 * (tran_high_t)cospi_12_64; x0 = HIGHBD_WRAPLOW(s0 + s4, bd); x1 = HIGHBD_WRAPLOW(s1 + s5, bd); x2 = HIGHBD_WRAPLOW(s2 + s6, bd); x3 = HIGHBD_WRAPLOW(s3 + s7, bd); x4 = HIGHBD_WRAPLOW(s0 - s4, bd); x5 = HIGHBD_WRAPLOW(s1 - s5, bd); x6 = HIGHBD_WRAPLOW(s2 - s6, bd); x7 = HIGHBD_WRAPLOW(s3 - s7, bd); x8 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 + s12), bd); x9 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 + s13), bd); x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 + s14), bd); x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 + s15), bd); x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s8 - s12), bd); x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s9 - s13), bd); x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s10 - s14), bd); x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s11 - s15), bd); // stage 3 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4 * (tran_high_t)cospi_8_64 + x5 * (tran_high_t)cospi_24_64; s5 = x4 * (tran_high_t)cospi_24_64 - x5 * (tran_high_t)cospi_8_64; s6 = -x6 * (tran_high_t)cospi_24_64 + x7 * (tran_high_t)cospi_8_64; s7 = x6 * (tran_high_t)cospi_8_64 + x7 * (tran_high_t)cospi_24_64; s8 = x8; s9 = x9; s10 = x10; s11 = x11; s12 = x12 * (tran_high_t)cospi_8_64 + x13 * (tran_high_t)cospi_24_64; s13 = x12 * (tran_high_t)cospi_24_64 - x13 * (tran_high_t)cospi_8_64; s14 = -x14 * (tran_high_t)cospi_24_64 + x15 * (tran_high_t)cospi_8_64; s15 = x14 * (tran_high_t)cospi_8_64 + x15 * (tran_high_t)cospi_24_64; x0 = HIGHBD_WRAPLOW(s0 + s2, bd); x1 = HIGHBD_WRAPLOW(s1 + s3, bd); x2 = HIGHBD_WRAPLOW(s0 - s2, bd); x3 = HIGHBD_WRAPLOW(s1 - s3, bd); x4 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 + s6), bd); x5 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 + s7), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s4 - s6), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s5 - s7), bd); x8 = HIGHBD_WRAPLOW(s8 + s10, bd); x9 = HIGHBD_WRAPLOW(s9 + s11, bd); x10 = HIGHBD_WRAPLOW(s8 - s10, bd); x11 = HIGHBD_WRAPLOW(s9 - s11, bd); x12 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 + s14), bd); x13 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 + s15), bd); x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s12 - s14), bd); x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s13 - s15), bd); // stage 4 s2 = (tran_high_t)(-cospi_16_64) * (x2 + x3); s3 = (tran_high_t)cospi_16_64 * (x2 - x3); s6 = (tran_high_t)cospi_16_64 * (x6 + x7); s7 = (tran_high_t)cospi_16_64 * (-x6 + x7); s10 = (tran_high_t)cospi_16_64 * (x10 + x11); s11 = (tran_high_t)cospi_16_64 * (-x10 + x11); s14 = (tran_high_t)(-cospi_16_64) * (x14 + x15); s15 = (tran_high_t)cospi_16_64 * (x14 - x15); x2 = HIGHBD_WRAPLOW(dct_const_round_shift(s2), bd); x3 = HIGHBD_WRAPLOW(dct_const_round_shift(s3), bd); x6 = HIGHBD_WRAPLOW(dct_const_round_shift(s6), bd); x7 = HIGHBD_WRAPLOW(dct_const_round_shift(s7), bd); x10 = HIGHBD_WRAPLOW(dct_const_round_shift(s10), bd); x11 = HIGHBD_WRAPLOW(dct_const_round_shift(s11), bd); x14 = HIGHBD_WRAPLOW(dct_const_round_shift(s14), bd); x15 = HIGHBD_WRAPLOW(dct_const_round_shift(s15), bd); output[0] = HIGHBD_WRAPLOW(x0, bd); output[1] = HIGHBD_WRAPLOW(-x8, bd); output[2] = HIGHBD_WRAPLOW(x12, bd); output[3] = HIGHBD_WRAPLOW(-x4, bd); output[4] = HIGHBD_WRAPLOW(x6, bd); output[5] = HIGHBD_WRAPLOW(x14, bd); output[6] = HIGHBD_WRAPLOW(x10, bd); output[7] = HIGHBD_WRAPLOW(x2, bd); output[8] = HIGHBD_WRAPLOW(x3, bd); output[9] = HIGHBD_WRAPLOW(x11, bd); output[10] = HIGHBD_WRAPLOW(x15, bd); output[11] = HIGHBD_WRAPLOW(x7, bd); output[12] = HIGHBD_WRAPLOW(x5, bd); output[13] = HIGHBD_WRAPLOW(-x13, bd); output[14] = HIGHBD_WRAPLOW(x9, bd); output[15] = HIGHBD_WRAPLOW(-x1, bd); } void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[16], step2[16]; tran_high_t temp1, temp2; (void)bd; if (detect_invalid_highbd_input(input, 16)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 16); return; } // stage 1 step1[0] = input[0 / 2]; step1[1] = input[16 / 2]; step1[2] = input[8 / 2]; step1[3] = input[24 / 2]; step1[4] = input[4 / 2]; step1[5] = input[20 / 2]; step1[6] = input[12 / 2]; step1[7] = input[28 / 2]; step1[8] = input[2 / 2]; step1[9] = input[18 / 2]; step1[10] = input[10 / 2]; step1[11] = input[26 / 2]; step1[12] = input[6 / 2]; step1[13] = input[22 / 2]; step1[14] = input[14 / 2]; step1[15] = input[30 / 2]; // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; temp1 = step1[8] * (tran_high_t)cospi_30_64 - step1[15] * (tran_high_t)cospi_2_64; temp2 = step1[8] * (tran_high_t)cospi_2_64 + step1[15] * (tran_high_t)cospi_30_64; step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[9] * (tran_high_t)cospi_14_64 - step1[14] * (tran_high_t)cospi_18_64; temp2 = step1[9] * (tran_high_t)cospi_18_64 + step1[14] * (tran_high_t)cospi_14_64; step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[10] * (tran_high_t)cospi_22_64 - step1[13] * (tran_high_t)cospi_10_64; temp2 = step1[10] * (tran_high_t)cospi_10_64 + step1[13] * (tran_high_t)cospi_22_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[11] * (tran_high_t)cospi_6_64 - step1[12] * (tran_high_t)cospi_26_64; temp2 = step1[11] * (tran_high_t)cospi_26_64 + step1[12] * (tran_high_t)cospi_6_64; step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; temp1 = step2[4] * (tran_high_t)cospi_28_64 - step2[7] * (tran_high_t)cospi_4_64; temp2 = step2[4] * (tran_high_t)cospi_4_64 + step2[7] * (tran_high_t)cospi_28_64; step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step2[5] * (tran_high_t)cospi_12_64 - step2[6] * (tran_high_t)cospi_20_64; temp2 = step2[5] * (tran_high_t)cospi_20_64 + step2[6] * (tran_high_t)cospi_12_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); // stage 4 temp1 = (step1[0] + step1[1]) * (tran_high_t)cospi_16_64; temp2 = (step1[0] - step1[1]) * (tran_high_t)cospi_16_64; step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[2] * (tran_high_t)cospi_24_64 - step1[3] * (tran_high_t)cospi_8_64; temp2 = step1[2] * (tran_high_t)cospi_8_64 + step1[3] * (tran_high_t)cospi_24_64; step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * (tran_high_t)cospi_8_64 + step1[14] * (tran_high_t)cospi_24_64; temp2 = step1[9] * (tran_high_t)cospi_24_64 + step1[14] * (tran_high_t)cospi_8_64; step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step1[10] * (tran_high_t)cospi_24_64 - step1[13] * (tran_high_t)cospi_8_64; temp2 = -step1[10] * (tran_high_t)cospi_8_64 + step1[13] * (tran_high_t)cospi_24_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[7] = step2[7]; step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); // stage 6 step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * (tran_high_t)cospi_16_64; temp2 = (step1[10] + step1[13]) * (tran_high_t)cospi_16_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * (tran_high_t)cospi_16_64; temp2 = (step1[11] + step1[12]) * (tran_high_t)cospi_16_64; step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; // stage 7 output[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); output[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); output[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); output[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); output[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); output[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); output[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); output[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); output[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); output[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); output[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); output[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); output[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); output[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); output[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); output[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); } void vpx_highbd_idct16x16_256_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[16 * 16]; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows for (i = 0; i < 16; ++i) { vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } void vpx_highbd_idct16x16_38_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows. Since all non-zero dct coefficients are in // upper-left 8x8 area, we only need to calculate first 8 rows here. for (i = 0; i < 8; ++i) { vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { uint16_t *destT = dest; for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { destT[i] = highbd_clip_pixel_add(destT[i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); destT += stride; } } } void vpx_highbd_idct16x16_10_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[16 * 16] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[16], temp_out[16]; // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. for (i = 0; i < 4; ++i) { vpx_highbd_idct16_c(input, outptr, bd); input += 16; outptr += 16; } // Then transform columns for (i = 0; i < 16; ++i) { for (j = 0; j < 16; ++j) temp_in[j] = out[j * 16 + i]; vpx_highbd_idct16_c(temp_in, temp_out, bd); for (j = 0; j < 16; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } void vpx_highbd_idct16x16_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_high_t a1; tran_low_t out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 16; ++j) { for (i = 0; i < 16; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } static void highbd_idct32_c(const tran_low_t *input, tran_low_t *output, int bd) { tran_low_t step1[32], step2[32]; tran_high_t temp1, temp2; (void)bd; if (detect_invalid_highbd_input(input, 32)) { #if CONFIG_COEFFICIENT_RANGE_CHECKING assert(0 && "invalid highbd txfm input"); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING memset(output, 0, sizeof(*output) * 32); return; } // stage 1 step1[0] = input[0]; step1[1] = input[16]; step1[2] = input[8]; step1[3] = input[24]; step1[4] = input[4]; step1[5] = input[20]; step1[6] = input[12]; step1[7] = input[28]; step1[8] = input[2]; step1[9] = input[18]; step1[10] = input[10]; step1[11] = input[26]; step1[12] = input[6]; step1[13] = input[22]; step1[14] = input[14]; step1[15] = input[30]; temp1 = input[1] * (tran_high_t)cospi_31_64 - input[31] * (tran_high_t)cospi_1_64; temp2 = input[1] * (tran_high_t)cospi_1_64 + input[31] * (tran_high_t)cospi_31_64; step1[16] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[31] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[17] * (tran_high_t)cospi_15_64 - input[15] * (tran_high_t)cospi_17_64; temp2 = input[17] * (tran_high_t)cospi_17_64 + input[15] * (tran_high_t)cospi_15_64; step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[9] * (tran_high_t)cospi_23_64 - input[23] * (tran_high_t)cospi_9_64; temp2 = input[9] * (tran_high_t)cospi_9_64 + input[23] * (tran_high_t)cospi_23_64; step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[25] * (tran_high_t)cospi_7_64 - input[7] * (tran_high_t)cospi_25_64; temp2 = input[25] * (tran_high_t)cospi_25_64 + input[7] * (tran_high_t)cospi_7_64; step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[5] * (tran_high_t)cospi_27_64 - input[27] * (tran_high_t)cospi_5_64; temp2 = input[5] * (tran_high_t)cospi_5_64 + input[27] * (tran_high_t)cospi_27_64; step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[21] * (tran_high_t)cospi_11_64 - input[11] * (tran_high_t)cospi_21_64; temp2 = input[21] * (tran_high_t)cospi_21_64 + input[11] * (tran_high_t)cospi_11_64; step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[13] * (tran_high_t)cospi_19_64 - input[19] * (tran_high_t)cospi_13_64; temp2 = input[13] * (tran_high_t)cospi_13_64 + input[19] * (tran_high_t)cospi_19_64; step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = input[29] * (tran_high_t)cospi_3_64 - input[3] * (tran_high_t)cospi_29_64; temp2 = input[29] * (tran_high_t)cospi_29_64 + input[3] * (tran_high_t)cospi_3_64; step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); // stage 2 step2[0] = step1[0]; step2[1] = step1[1]; step2[2] = step1[2]; step2[3] = step1[3]; step2[4] = step1[4]; step2[5] = step1[5]; step2[6] = step1[6]; step2[7] = step1[7]; temp1 = step1[8] * (tran_high_t)cospi_30_64 - step1[15] * (tran_high_t)cospi_2_64; temp2 = step1[8] * (tran_high_t)cospi_2_64 + step1[15] * (tran_high_t)cospi_30_64; step2[8] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[15] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[9] * (tran_high_t)cospi_14_64 - step1[14] * (tran_high_t)cospi_18_64; temp2 = step1[9] * (tran_high_t)cospi_18_64 + step1[14] * (tran_high_t)cospi_14_64; step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[10] * (tran_high_t)cospi_22_64 - step1[13] * (tran_high_t)cospi_10_64; temp2 = step1[10] * (tran_high_t)cospi_10_64 + step1[13] * (tran_high_t)cospi_22_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[11] * (tran_high_t)cospi_6_64 - step1[12] * (tran_high_t)cospi_26_64; temp2 = step1[11] * (tran_high_t)cospi_26_64 + step1[12] * (tran_high_t)cospi_6_64; step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[17], bd); step2[17] = HIGHBD_WRAPLOW(step1[16] - step1[17], bd); step2[18] = HIGHBD_WRAPLOW(-step1[18] + step1[19], bd); step2[19] = HIGHBD_WRAPLOW(step1[18] + step1[19], bd); step2[20] = HIGHBD_WRAPLOW(step1[20] + step1[21], bd); step2[21] = HIGHBD_WRAPLOW(step1[20] - step1[21], bd); step2[22] = HIGHBD_WRAPLOW(-step1[22] + step1[23], bd); step2[23] = HIGHBD_WRAPLOW(step1[22] + step1[23], bd); step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[25], bd); step2[25] = HIGHBD_WRAPLOW(step1[24] - step1[25], bd); step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[27], bd); step2[27] = HIGHBD_WRAPLOW(step1[26] + step1[27], bd); step2[28] = HIGHBD_WRAPLOW(step1[28] + step1[29], bd); step2[29] = HIGHBD_WRAPLOW(step1[28] - step1[29], bd); step2[30] = HIGHBD_WRAPLOW(-step1[30] + step1[31], bd); step2[31] = HIGHBD_WRAPLOW(step1[30] + step1[31], bd); // stage 3 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[2]; step1[3] = step2[3]; temp1 = step2[4] * (tran_high_t)cospi_28_64 - step2[7] * (tran_high_t)cospi_4_64; temp2 = step2[4] * (tran_high_t)cospi_4_64 + step2[7] * (tran_high_t)cospi_28_64; step1[4] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[7] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step2[5] * (tran_high_t)cospi_12_64 - step2[6] * (tran_high_t)cospi_20_64; temp2 = step2[5] * (tran_high_t)cospi_20_64 + step2[6] * (tran_high_t)cospi_12_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[9], bd); step1[9] = HIGHBD_WRAPLOW(step2[8] - step2[9], bd); step1[10] = HIGHBD_WRAPLOW(-step2[10] + step2[11], bd); step1[11] = HIGHBD_WRAPLOW(step2[10] + step2[11], bd); step1[12] = HIGHBD_WRAPLOW(step2[12] + step2[13], bd); step1[13] = HIGHBD_WRAPLOW(step2[12] - step2[13], bd); step1[14] = HIGHBD_WRAPLOW(-step2[14] + step2[15], bd); step1[15] = HIGHBD_WRAPLOW(step2[14] + step2[15], bd); step1[16] = step2[16]; step1[31] = step2[31]; temp1 = -step2[17] * (tran_high_t)cospi_4_64 + step2[30] * (tran_high_t)cospi_28_64; temp2 = step2[17] * (tran_high_t)cospi_28_64 + step2[30] * (tran_high_t)cospi_4_64; step1[17] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[30] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step2[18] * (tran_high_t)cospi_28_64 - step2[29] * (tran_high_t)cospi_4_64; temp2 = -step2[18] * (tran_high_t)cospi_4_64 + step2[29] * (tran_high_t)cospi_28_64; step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[19] = step2[19]; step1[20] = step2[20]; temp1 = -step2[21] * (tran_high_t)cospi_20_64 + step2[26] * (tran_high_t)cospi_12_64; temp2 = step2[21] * (tran_high_t)cospi_12_64 + step2[26] * (tran_high_t)cospi_20_64; step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step2[22] * (tran_high_t)cospi_12_64 - step2[25] * (tran_high_t)cospi_20_64; temp2 = -step2[22] * (tran_high_t)cospi_20_64 + step2[25] * (tran_high_t)cospi_12_64; step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; // stage 4 temp1 = (step1[0] + step1[1]) * (tran_high_t)cospi_16_64; temp2 = (step1[0] - step1[1]) * (tran_high_t)cospi_16_64; step2[0] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[1] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = step1[2] * (tran_high_t)cospi_24_64 - step1[3] * (tran_high_t)cospi_8_64; temp2 = step1[2] * (tran_high_t)cospi_8_64 + step1[3] * (tran_high_t)cospi_24_64; step2[2] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[3] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[4] = HIGHBD_WRAPLOW(step1[4] + step1[5], bd); step2[5] = HIGHBD_WRAPLOW(step1[4] - step1[5], bd); step2[6] = HIGHBD_WRAPLOW(-step1[6] + step1[7], bd); step2[7] = HIGHBD_WRAPLOW(step1[6] + step1[7], bd); step2[8] = step1[8]; step2[15] = step1[15]; temp1 = -step1[9] * (tran_high_t)cospi_8_64 + step1[14] * (tran_high_t)cospi_24_64; temp2 = step1[9] * (tran_high_t)cospi_24_64 + step1[14] * (tran_high_t)cospi_8_64; step2[9] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[14] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step1[10] * (tran_high_t)cospi_24_64 - step1[13] * (tran_high_t)cospi_8_64; temp2 = -step1[10] * (tran_high_t)cospi_8_64 + step1[13] * (tran_high_t)cospi_24_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[11] = step1[11]; step2[12] = step1[12]; step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[19], bd); step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[18], bd); step2[18] = HIGHBD_WRAPLOW(step1[17] - step1[18], bd); step2[19] = HIGHBD_WRAPLOW(step1[16] - step1[19], bd); step2[20] = HIGHBD_WRAPLOW(-step1[20] + step1[23], bd); step2[21] = HIGHBD_WRAPLOW(-step1[21] + step1[22], bd); step2[22] = HIGHBD_WRAPLOW(step1[21] + step1[22], bd); step2[23] = HIGHBD_WRAPLOW(step1[20] + step1[23], bd); step2[24] = HIGHBD_WRAPLOW(step1[24] + step1[27], bd); step2[25] = HIGHBD_WRAPLOW(step1[25] + step1[26], bd); step2[26] = HIGHBD_WRAPLOW(step1[25] - step1[26], bd); step2[27] = HIGHBD_WRAPLOW(step1[24] - step1[27], bd); step2[28] = HIGHBD_WRAPLOW(-step1[28] + step1[31], bd); step2[29] = HIGHBD_WRAPLOW(-step1[29] + step1[30], bd); step2[30] = HIGHBD_WRAPLOW(step1[29] + step1[30], bd); step2[31] = HIGHBD_WRAPLOW(step1[28] + step1[31], bd); // stage 5 step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[3], bd); step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[2], bd); step1[2] = HIGHBD_WRAPLOW(step2[1] - step2[2], bd); step1[3] = HIGHBD_WRAPLOW(step2[0] - step2[3], bd); step1[4] = step2[4]; temp1 = (step2[6] - step2[5]) * (tran_high_t)cospi_16_64; temp2 = (step2[5] + step2[6]) * (tran_high_t)cospi_16_64; step1[5] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[6] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[7] = step2[7]; step1[8] = HIGHBD_WRAPLOW(step2[8] + step2[11], bd); step1[9] = HIGHBD_WRAPLOW(step2[9] + step2[10], bd); step1[10] = HIGHBD_WRAPLOW(step2[9] - step2[10], bd); step1[11] = HIGHBD_WRAPLOW(step2[8] - step2[11], bd); step1[12] = HIGHBD_WRAPLOW(-step2[12] + step2[15], bd); step1[13] = HIGHBD_WRAPLOW(-step2[13] + step2[14], bd); step1[14] = HIGHBD_WRAPLOW(step2[13] + step2[14], bd); step1[15] = HIGHBD_WRAPLOW(step2[12] + step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; temp1 = -step2[18] * (tran_high_t)cospi_8_64 + step2[29] * (tran_high_t)cospi_24_64; temp2 = step2[18] * (tran_high_t)cospi_24_64 + step2[29] * (tran_high_t)cospi_8_64; step1[18] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[29] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step2[19] * (tran_high_t)cospi_8_64 + step2[28] * (tran_high_t)cospi_24_64; temp2 = step2[19] * (tran_high_t)cospi_24_64 + step2[28] * (tran_high_t)cospi_8_64; step1[19] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[28] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step2[20] * (tran_high_t)cospi_24_64 - step2[27] * (tran_high_t)cospi_8_64; temp2 = -step2[20] * (tran_high_t)cospi_8_64 + step2[27] * (tran_high_t)cospi_24_64; step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = -step2[21] * (tran_high_t)cospi_24_64 - step2[26] * (tran_high_t)cospi_8_64; temp2 = -step2[21] * (tran_high_t)cospi_8_64 + step2[26] * (tran_high_t)cospi_24_64; step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; step1[25] = step2[25]; step1[30] = step2[30]; step1[31] = step2[31]; // stage 6 step2[0] = HIGHBD_WRAPLOW(step1[0] + step1[7], bd); step2[1] = HIGHBD_WRAPLOW(step1[1] + step1[6], bd); step2[2] = HIGHBD_WRAPLOW(step1[2] + step1[5], bd); step2[3] = HIGHBD_WRAPLOW(step1[3] + step1[4], bd); step2[4] = HIGHBD_WRAPLOW(step1[3] - step1[4], bd); step2[5] = HIGHBD_WRAPLOW(step1[2] - step1[5], bd); step2[6] = HIGHBD_WRAPLOW(step1[1] - step1[6], bd); step2[7] = HIGHBD_WRAPLOW(step1[0] - step1[7], bd); step2[8] = step1[8]; step2[9] = step1[9]; temp1 = (-step1[10] + step1[13]) * (tran_high_t)cospi_16_64; temp2 = (step1[10] + step1[13]) * (tran_high_t)cospi_16_64; step2[10] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[13] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = (-step1[11] + step1[12]) * (tran_high_t)cospi_16_64; temp2 = (step1[11] + step1[12]) * (tran_high_t)cospi_16_64; step2[11] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step2[12] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step2[14] = step1[14]; step2[15] = step1[15]; step2[16] = HIGHBD_WRAPLOW(step1[16] + step1[23], bd); step2[17] = HIGHBD_WRAPLOW(step1[17] + step1[22], bd); step2[18] = HIGHBD_WRAPLOW(step1[18] + step1[21], bd); step2[19] = HIGHBD_WRAPLOW(step1[19] + step1[20], bd); step2[20] = HIGHBD_WRAPLOW(step1[19] - step1[20], bd); step2[21] = HIGHBD_WRAPLOW(step1[18] - step1[21], bd); step2[22] = HIGHBD_WRAPLOW(step1[17] - step1[22], bd); step2[23] = HIGHBD_WRAPLOW(step1[16] - step1[23], bd); step2[24] = HIGHBD_WRAPLOW(-step1[24] + step1[31], bd); step2[25] = HIGHBD_WRAPLOW(-step1[25] + step1[30], bd); step2[26] = HIGHBD_WRAPLOW(-step1[26] + step1[29], bd); step2[27] = HIGHBD_WRAPLOW(-step1[27] + step1[28], bd); step2[28] = HIGHBD_WRAPLOW(step1[27] + step1[28], bd); step2[29] = HIGHBD_WRAPLOW(step1[26] + step1[29], bd); step2[30] = HIGHBD_WRAPLOW(step1[25] + step1[30], bd); step2[31] = HIGHBD_WRAPLOW(step1[24] + step1[31], bd); // stage 7 step1[0] = HIGHBD_WRAPLOW(step2[0] + step2[15], bd); step1[1] = HIGHBD_WRAPLOW(step2[1] + step2[14], bd); step1[2] = HIGHBD_WRAPLOW(step2[2] + step2[13], bd); step1[3] = HIGHBD_WRAPLOW(step2[3] + step2[12], bd); step1[4] = HIGHBD_WRAPLOW(step2[4] + step2[11], bd); step1[5] = HIGHBD_WRAPLOW(step2[5] + step2[10], bd); step1[6] = HIGHBD_WRAPLOW(step2[6] + step2[9], bd); step1[7] = HIGHBD_WRAPLOW(step2[7] + step2[8], bd); step1[8] = HIGHBD_WRAPLOW(step2[7] - step2[8], bd); step1[9] = HIGHBD_WRAPLOW(step2[6] - step2[9], bd); step1[10] = HIGHBD_WRAPLOW(step2[5] - step2[10], bd); step1[11] = HIGHBD_WRAPLOW(step2[4] - step2[11], bd); step1[12] = HIGHBD_WRAPLOW(step2[3] - step2[12], bd); step1[13] = HIGHBD_WRAPLOW(step2[2] - step2[13], bd); step1[14] = HIGHBD_WRAPLOW(step2[1] - step2[14], bd); step1[15] = HIGHBD_WRAPLOW(step2[0] - step2[15], bd); step1[16] = step2[16]; step1[17] = step2[17]; step1[18] = step2[18]; step1[19] = step2[19]; temp1 = (-step2[20] + step2[27]) * (tran_high_t)cospi_16_64; temp2 = (step2[20] + step2[27]) * (tran_high_t)cospi_16_64; step1[20] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[27] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = (-step2[21] + step2[26]) * (tran_high_t)cospi_16_64; temp2 = (step2[21] + step2[26]) * (tran_high_t)cospi_16_64; step1[21] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[26] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = (-step2[22] + step2[25]) * (tran_high_t)cospi_16_64; temp2 = (step2[22] + step2[25]) * (tran_high_t)cospi_16_64; step1[22] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[25] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); temp1 = (-step2[23] + step2[24]) * (tran_high_t)cospi_16_64; temp2 = (step2[23] + step2[24]) * (tran_high_t)cospi_16_64; step1[23] = HIGHBD_WRAPLOW(dct_const_round_shift(temp1), bd); step1[24] = HIGHBD_WRAPLOW(dct_const_round_shift(temp2), bd); step1[28] = step2[28]; step1[29] = step2[29]; step1[30] = step2[30]; step1[31] = step2[31]; // final stage output[0] = HIGHBD_WRAPLOW(step1[0] + step1[31], bd); output[1] = HIGHBD_WRAPLOW(step1[1] + step1[30], bd); output[2] = HIGHBD_WRAPLOW(step1[2] + step1[29], bd); output[3] = HIGHBD_WRAPLOW(step1[3] + step1[28], bd); output[4] = HIGHBD_WRAPLOW(step1[4] + step1[27], bd); output[5] = HIGHBD_WRAPLOW(step1[5] + step1[26], bd); output[6] = HIGHBD_WRAPLOW(step1[6] + step1[25], bd); output[7] = HIGHBD_WRAPLOW(step1[7] + step1[24], bd); output[8] = HIGHBD_WRAPLOW(step1[8] + step1[23], bd); output[9] = HIGHBD_WRAPLOW(step1[9] + step1[22], bd); output[10] = HIGHBD_WRAPLOW(step1[10] + step1[21], bd); output[11] = HIGHBD_WRAPLOW(step1[11] + step1[20], bd); output[12] = HIGHBD_WRAPLOW(step1[12] + step1[19], bd); output[13] = HIGHBD_WRAPLOW(step1[13] + step1[18], bd); output[14] = HIGHBD_WRAPLOW(step1[14] + step1[17], bd); output[15] = HIGHBD_WRAPLOW(step1[15] + step1[16], bd); output[16] = HIGHBD_WRAPLOW(step1[15] - step1[16], bd); output[17] = HIGHBD_WRAPLOW(step1[14] - step1[17], bd); output[18] = HIGHBD_WRAPLOW(step1[13] - step1[18], bd); output[19] = HIGHBD_WRAPLOW(step1[12] - step1[19], bd); output[20] = HIGHBD_WRAPLOW(step1[11] - step1[20], bd); output[21] = HIGHBD_WRAPLOW(step1[10] - step1[21], bd); output[22] = HIGHBD_WRAPLOW(step1[9] - step1[22], bd); output[23] = HIGHBD_WRAPLOW(step1[8] - step1[23], bd); output[24] = HIGHBD_WRAPLOW(step1[7] - step1[24], bd); output[25] = HIGHBD_WRAPLOW(step1[6] - step1[25], bd); output[26] = HIGHBD_WRAPLOW(step1[5] - step1[26], bd); output[27] = HIGHBD_WRAPLOW(step1[4] - step1[27], bd); output[28] = HIGHBD_WRAPLOW(step1[3] - step1[28], bd); output[29] = HIGHBD_WRAPLOW(step1[2] - step1[29], bd); output[30] = HIGHBD_WRAPLOW(step1[1] - step1[30], bd); output[31] = HIGHBD_WRAPLOW(step1[0] - step1[31], bd); } void vpx_highbd_idct32x32_1024_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[32 * 32]; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows for (i = 0; i < 32; ++i) { tran_low_t zero_coeff = 0; for (j = 0; j < 32; ++j) zero_coeff |= input[j]; if (zero_coeff) highbd_idct32_c(input, outptr, bd); else memset(outptr, 0, sizeof(tran_low_t) * 32); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; highbd_idct32_c(temp_in, temp_out, bd); for (j = 0; j < 32; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } void vpx_highbd_idct32x32_135_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows // Only upper-left 16x16 has non-zero coeff for (i = 0; i < 16; ++i) { highbd_idct32_c(input, outptr, bd); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { uint16_t *destT = dest; for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; highbd_idct32_c(temp_in, temp_out, bd); for (j = 0; j < 32; ++j) { destT[i] = highbd_clip_pixel_add(destT[i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); destT += stride; } } } void vpx_highbd_idct32x32_34_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; tran_low_t out[32 * 32] = { 0 }; tran_low_t *outptr = out; tran_low_t temp_in[32], temp_out[32]; // Rows // Only upper-left 8x8 has non-zero coeff for (i = 0; i < 8; ++i) { highbd_idct32_c(input, outptr, bd); input += 32; outptr += 32; } // Columns for (i = 0; i < 32; ++i) { for (j = 0; j < 32; ++j) temp_in[j] = out[j * 32 + i]; highbd_idct32_c(temp_in, temp_out, bd); for (j = 0; j < 32; ++j) { dest[j * stride + i] = highbd_clip_pixel_add( dest[j * stride + i], ROUND_POWER_OF_TWO(temp_out[j], 6), bd); } } } void vpx_highbd_idct32x32_1_add_c(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; int a1; tran_low_t out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 6); for (j = 0; j < 32; ++j) { for (i = 0; i < 32; ++i) dest[i] = highbd_clip_pixel_add(dest[i], a1, bd); dest += stride; } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/inv_txfm.h000066400000000000000000000114231357355204000165210ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_INV_TXFM_H_ #define VPX_VPX_DSP_INV_TXFM_H_ #include #include "./vpx_config.h" #include "vpx_dsp/txfm_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif static INLINE tran_high_t check_range(tran_high_t input) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid VP9 input streams, intermediate stage coefficients should always // stay within the range of a signed 16 bit integer. Coefficients can go out // of this range for invalid/corrupt VP9 streams. However, strictly checking // this range for every intermediate coefficient can burdensome for a decoder, // therefore the following assertion is only enabled when configured with // --enable-coefficient-range-checking. assert(INT16_MIN <= input); assert(input <= INT16_MAX); #endif // CONFIG_COEFFICIENT_RANGE_CHECKING return input; } static INLINE tran_high_t dct_const_round_shift(tran_high_t input) { tran_high_t rv = ROUND_POWER_OF_TWO(input, DCT_CONST_BITS); return (tran_high_t)rv; } #if CONFIG_VP9_HIGHBITDEPTH static INLINE tran_high_t highbd_check_range(tran_high_t input, int bd) { #if CONFIG_COEFFICIENT_RANGE_CHECKING // For valid highbitdepth VP9 streams, intermediate stage coefficients will // stay within the ranges: // - 8 bit: signed 16 bit integer // - 10 bit: signed 18 bit integer // - 12 bit: signed 20 bit integer const int32_t int_max = (1 << (7 + bd)) - 1; const int32_t int_min = -int_max - 1; assert(int_min <= input); assert(input <= int_max); (void)int_min; #endif // CONFIG_COEFFICIENT_RANGE_CHECKING (void)bd; return input; } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_EMULATE_HARDWARE // When CONFIG_EMULATE_HARDWARE is 1 the transform performs a // non-normative method to handle overflows. A stream that causes // overflows in the inverse transform is considered invalid in VP9, // and a hardware implementer is free to choose any reasonable // method to handle overflows. However to aid in hardware // verification they can use a specific implementation of the // WRAPLOW() macro below that is identical to their intended // hardware implementation (and also use configure options to trigger // the C-implementation of the transform). // // The particular WRAPLOW implementation below performs strict // overflow wrapping to match common hardware implementations. // bd of 8 uses trans_low with 16bits, need to remove 16bits // bd of 10 uses trans_low with 18bits, need to remove 14bits // bd of 12 uses trans_low with 20bits, need to remove 12bits // bd of x uses trans_low with 8+x bits, need to remove 24-x bits #define WRAPLOW(x) ((((int32_t)check_range(x)) << 16) >> 16) #if CONFIG_VP9_HIGHBITDEPTH #define HIGHBD_WRAPLOW(x, bd) \ ((((int32_t)highbd_check_range((x), bd)) << (24 - bd)) >> (24 - bd)) #endif // CONFIG_VP9_HIGHBITDEPTH #else // CONFIG_EMULATE_HARDWARE #define WRAPLOW(x) ((int32_t)check_range(x)) #if CONFIG_VP9_HIGHBITDEPTH #define HIGHBD_WRAPLOW(x, bd) ((int32_t)highbd_check_range((x), bd)) #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_EMULATE_HARDWARE void idct4_c(const tran_low_t *input, tran_low_t *output); void idct8_c(const tran_low_t *input, tran_low_t *output); void idct16_c(const tran_low_t *input, tran_low_t *output); void idct32_c(const tran_low_t *input, tran_low_t *output); void iadst4_c(const tran_low_t *input, tran_low_t *output); void iadst8_c(const tran_low_t *input, tran_low_t *output); void iadst16_c(const tran_low_t *input, tran_low_t *output); #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_idct4_c(const tran_low_t *input, tran_low_t *output, int bd); void vpx_highbd_idct8_c(const tran_low_t *input, tran_low_t *output, int bd); void vpx_highbd_idct16_c(const tran_low_t *input, tran_low_t *output, int bd); void vpx_highbd_iadst4_c(const tran_low_t *input, tran_low_t *output, int bd); void vpx_highbd_iadst8_c(const tran_low_t *input, tran_low_t *output, int bd); void vpx_highbd_iadst16_c(const tran_low_t *input, tran_low_t *output, int bd); static INLINE uint16_t highbd_clip_pixel_add(uint16_t dest, tran_high_t trans, int bd) { trans = HIGHBD_WRAPLOW(trans, bd); return clip_pixel_highbd(dest + (int)trans, bd); } #endif static INLINE uint8_t clip_pixel_add(uint8_t dest, tran_high_t trans) { trans = WRAPLOW(trans); return clip_pixel(dest + (int)trans); } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_INV_TXFM_H_ libvpx-1.8.2/vpx_dsp/loopfilter.c000066400000000000000000000764601357355204000170550ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" static INLINE int8_t signed_char_clamp(int t) { return (int8_t)clamp(t, -128, 127); } #if CONFIG_VP9_HIGHBITDEPTH static INLINE int16_t signed_char_clamp_high(int t, int bd) { switch (bd) { case 10: return (int16_t)clamp(t, -128 * 4, 128 * 4 - 1); case 12: return (int16_t)clamp(t, -128 * 16, 128 * 16 - 1); case 8: default: return (int16_t)clamp(t, -128, 128 - 1); } } #endif // Should we apply any filter at all: 11111111 yes, 00000000 no static INLINE int8_t filter_mask(uint8_t limit, uint8_t blimit, uint8_t p3, uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1, uint8_t q2, uint8_t q3) { int8_t mask = 0; mask |= (abs(p3 - p2) > limit) * -1; mask |= (abs(p2 - p1) > limit) * -1; mask |= (abs(p1 - p0) > limit) * -1; mask |= (abs(q1 - q0) > limit) * -1; mask |= (abs(q2 - q1) > limit) * -1; mask |= (abs(q3 - q2) > limit) * -1; mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; return ~mask; } static INLINE int8_t flat_mask4(uint8_t thresh, uint8_t p3, uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1, uint8_t q2, uint8_t q3) { int8_t mask = 0; mask |= (abs(p1 - p0) > thresh) * -1; mask |= (abs(q1 - q0) > thresh) * -1; mask |= (abs(p2 - p0) > thresh) * -1; mask |= (abs(q2 - q0) > thresh) * -1; mask |= (abs(p3 - p0) > thresh) * -1; mask |= (abs(q3 - q0) > thresh) * -1; return ~mask; } static INLINE int8_t flat_mask5(uint8_t thresh, uint8_t p4, uint8_t p3, uint8_t p2, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1, uint8_t q2, uint8_t q3, uint8_t q4) { int8_t mask = ~flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3); mask |= (abs(p4 - p0) > thresh) * -1; mask |= (abs(q4 - q0) > thresh) * -1; return ~mask; } // Is there high edge variance internal edge: 11111111 yes, 00000000 no static INLINE int8_t hev_mask(uint8_t thresh, uint8_t p1, uint8_t p0, uint8_t q0, uint8_t q1) { int8_t hev = 0; hev |= (abs(p1 - p0) > thresh) * -1; hev |= (abs(q1 - q0) > thresh) * -1; return hev; } static INLINE void filter4(int8_t mask, uint8_t thresh, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1) { int8_t filter1, filter2; const int8_t ps1 = (int8_t)(*op1 ^ 0x80); const int8_t ps0 = (int8_t)(*op0 ^ 0x80); const int8_t qs0 = (int8_t)(*oq0 ^ 0x80); const int8_t qs1 = (int8_t)(*oq1 ^ 0x80); const int8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); // add outer taps if we have high edge variance int8_t filter = signed_char_clamp(ps1 - qs1) & hev; // inner taps filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; // save bottom 3 bits so that we round one side +4 and the other +3 // if it equals 4 we'll set it to adjust by -1 to account for the fact // we'd round it by 3 the other way filter1 = signed_char_clamp(filter + 4) >> 3; filter2 = signed_char_clamp(filter + 3) >> 3; *oq0 = (uint8_t)(signed_char_clamp(qs0 - filter1) ^ 0x80); *op0 = (uint8_t)(signed_char_clamp(ps0 + filter2) ^ 0x80); // outer tap adjustments filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; *oq1 = (uint8_t)(signed_char_clamp(qs1 - filter) ^ 0x80); *op1 = (uint8_t)(signed_char_clamp(ps1 + filter) ^ 0x80); } void vpx_lpf_horizontal_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], p0 = s[-pitch]; const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); filter4(mask, *thresh, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch); ++s; } } void vpx_lpf_horizontal_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_horizontal_4_c(s, pitch, blimit0, limit0, thresh0); vpx_lpf_horizontal_4_c(s + 8, pitch, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); filter4(mask, *thresh, s - 2, s - 1, s, s + 1); s += pitch; } } void vpx_lpf_vertical_4_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0); vpx_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter8(int8_t mask, uint8_t thresh, uint8_t flat, uint8_t *op3, uint8_t *op2, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1, uint8_t *oq2, uint8_t *oq3) { if (flat && mask) { const uint8_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; // 7-tap filter [1, 1, 1, 2, 1, 1, 1] *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); } else { filter4(mask, thresh, op1, op0, oq0, oq1); } } void vpx_lpf_horizontal_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], p0 = s[-pitch]; const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); filter8(mask, *thresh, flat, s - 4 * pitch, s - 3 * pitch, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, s + 3 * pitch); ++s; } } void vpx_lpf_horizontal_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_horizontal_8_c(s, pitch, blimit0, limit0, thresh0); vpx_lpf_horizontal_8_c(s + 8, pitch, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { int i; for (i = 0; i < 8; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3); s += pitch; } } void vpx_lpf_vertical_8_dual_c(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0); vpx_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1); } static INLINE void filter16(int8_t mask, uint8_t thresh, uint8_t flat, uint8_t flat2, uint8_t *op7, uint8_t *op6, uint8_t *op5, uint8_t *op4, uint8_t *op3, uint8_t *op2, uint8_t *op1, uint8_t *op0, uint8_t *oq0, uint8_t *oq1, uint8_t *oq2, uint8_t *oq3, uint8_t *oq4, uint8_t *oq5, uint8_t *oq6, uint8_t *oq7) { if (flat2 && flat && mask) { const uint8_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4, p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint8_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3, q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] *op6 = ROUND_POWER_OF_TWO( p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + q0, 4); *op5 = ROUND_POWER_OF_TWO( p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + q0 + q1, 4); *op4 = ROUND_POWER_OF_TWO( p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + q0 + q1 + q2, 4); *op3 = ROUND_POWER_OF_TWO( p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + q0 + q1 + q2 + q3, 4); *op2 = ROUND_POWER_OF_TWO( p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + q0 + q1 + q2 + q3 + q4, 4); *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); *oq2 = ROUND_POWER_OF_TWO( p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); *oq3 = ROUND_POWER_OF_TWO( p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); *oq4 = ROUND_POWER_OF_TWO( p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); *oq5 = ROUND_POWER_OF_TWO( p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); *oq6 = ROUND_POWER_OF_TWO( p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); } else { filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3); } } static void mb_lpf_horizontal_edge_w(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8 * count; ++i) { const uint8_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], p0 = s[-pitch]; const uint8_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], q3 = s[3 * pitch]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat2 = flat_mask5( 1, s[-8 * pitch], s[-7 * pitch], s[-6 * pitch], s[-5 * pitch], p0, q0, s[4 * pitch], s[5 * pitch], s[6 * pitch], s[7 * pitch]); filter16(mask, *thresh, flat, flat2, s - 8 * pitch, s - 7 * pitch, s - 6 * pitch, s - 5 * pitch, s - 4 * pitch, s - 3 * pitch, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, s + 3 * pitch, s + 4 * pitch, s + 5 * pitch, s + 6 * pitch, s + 7 * pitch); ++s; } } void vpx_lpf_horizontal_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 1); } void vpx_lpf_horizontal_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 2); } static void mb_lpf_vertical_edge_w(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { int i; for (i = 0; i < count; ++i) { const uint8_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint8_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat = flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3); const int8_t flat2 = flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, q0, s[4], s[5], s[6], s[7]); filter16(mask, *thresh, flat, flat2, s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7); s += pitch; } } void vpx_lpf_vertical_16_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 8); } void vpx_lpf_vertical_16_dual_c(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 16); } #if CONFIG_VP9_HIGHBITDEPTH // Should we apply any filter at all: 11111111 yes, 00000000 no ? static INLINE int8_t highbd_filter_mask(uint8_t limit, uint8_t blimit, uint16_t p3, uint16_t p2, uint16_t p1, uint16_t p0, uint16_t q0, uint16_t q1, uint16_t q2, uint16_t q3, int bd) { int8_t mask = 0; int16_t limit16 = (uint16_t)limit << (bd - 8); int16_t blimit16 = (uint16_t)blimit << (bd - 8); mask |= (abs(p3 - p2) > limit16) * -1; mask |= (abs(p2 - p1) > limit16) * -1; mask |= (abs(p1 - p0) > limit16) * -1; mask |= (abs(q1 - q0) > limit16) * -1; mask |= (abs(q2 - q1) > limit16) * -1; mask |= (abs(q3 - q2) > limit16) * -1; mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit16) * -1; return ~mask; } static INLINE int8_t highbd_flat_mask4(uint8_t thresh, uint16_t p3, uint16_t p2, uint16_t p1, uint16_t p0, uint16_t q0, uint16_t q1, uint16_t q2, uint16_t q3, int bd) { int8_t mask = 0; int16_t thresh16 = (uint16_t)thresh << (bd - 8); mask |= (abs(p1 - p0) > thresh16) * -1; mask |= (abs(q1 - q0) > thresh16) * -1; mask |= (abs(p2 - p0) > thresh16) * -1; mask |= (abs(q2 - q0) > thresh16) * -1; mask |= (abs(p3 - p0) > thresh16) * -1; mask |= (abs(q3 - q0) > thresh16) * -1; return ~mask; } static INLINE int8_t highbd_flat_mask5(uint8_t thresh, uint16_t p4, uint16_t p3, uint16_t p2, uint16_t p1, uint16_t p0, uint16_t q0, uint16_t q1, uint16_t q2, uint16_t q3, uint16_t q4, int bd) { int8_t mask = ~highbd_flat_mask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3, bd); int16_t thresh16 = (uint16_t)thresh << (bd - 8); mask |= (abs(p4 - p0) > thresh16) * -1; mask |= (abs(q4 - q0) > thresh16) * -1; return ~mask; } // Is there high edge variance internal edge: // 11111111_11111111 yes, 00000000_00000000 no ? static INLINE int16_t highbd_hev_mask(uint8_t thresh, uint16_t p1, uint16_t p0, uint16_t q0, uint16_t q1, int bd) { int16_t hev = 0; int16_t thresh16 = (uint16_t)thresh << (bd - 8); hev |= (abs(p1 - p0) > thresh16) * -1; hev |= (abs(q1 - q0) > thresh16) * -1; return hev; } static INLINE void highbd_filter4(int8_t mask, uint8_t thresh, uint16_t *op1, uint16_t *op0, uint16_t *oq0, uint16_t *oq1, int bd) { int16_t filter1, filter2; // ^0x80 equivalent to subtracting 0x80 from the values to turn them // into -128 to +127 instead of 0 to 255. int shift = bd - 8; const int16_t ps1 = (int16_t)*op1 - (0x80 << shift); const int16_t ps0 = (int16_t)*op0 - (0x80 << shift); const int16_t qs0 = (int16_t)*oq0 - (0x80 << shift); const int16_t qs1 = (int16_t)*oq1 - (0x80 << shift); const int16_t hev = highbd_hev_mask(thresh, *op1, *op0, *oq0, *oq1, bd); // Add outer taps if we have high edge variance. int16_t filter = signed_char_clamp_high(ps1 - qs1, bd) & hev; // Inner taps. filter = signed_char_clamp_high(filter + 3 * (qs0 - ps0), bd) & mask; // Save bottom 3 bits so that we round one side +4 and the other +3 // if it equals 4 we'll set it to adjust by -1 to account for the fact // we'd round it by 3 the other way. filter1 = signed_char_clamp_high(filter + 4, bd) >> 3; filter2 = signed_char_clamp_high(filter + 3, bd) >> 3; *oq0 = signed_char_clamp_high(qs0 - filter1, bd) + (0x80 << shift); *op0 = signed_char_clamp_high(ps0 + filter2, bd) + (0x80 << shift); // Outer tap adjustments. filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; *oq1 = signed_char_clamp_high(qs1 - filter, bd) + (0x80 << shift); *op1 = signed_char_clamp_high(ps1 + filter, bd) + (0x80 << shift); } void vpx_highbd_lpf_horizontal_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * pitch]; const uint16_t p2 = s[-3 * pitch]; const uint16_t p1 = s[-2 * pitch]; const uint16_t p0 = s[-pitch]; const uint16_t q0 = s[0 * pitch]; const uint16_t q1 = s[1 * pitch]; const uint16_t q2 = s[2 * pitch]; const uint16_t q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); highbd_filter4(mask, *thresh, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, bd); ++s; } } void vpx_highbd_lpf_horizontal_4_dual_c( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_4_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_4_c(s + 8, pitch, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_4_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); highbd_filter4(mask, *thresh, s - 2, s - 1, s, s + 1, bd); s += pitch; } } void vpx_highbd_lpf_vertical_4_dual_c( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_vertical_4_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_4_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, bd); } static INLINE void highbd_filter8(int8_t mask, uint8_t thresh, uint8_t flat, uint16_t *op3, uint16_t *op2, uint16_t *op1, uint16_t *op0, uint16_t *oq0, uint16_t *oq1, uint16_t *oq2, uint16_t *oq3, int bd) { if (flat && mask) { const uint16_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint16_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; // 7-tap filter [1, 1, 1, 2, 1, 1, 1] *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + 2 * p2 + p1 + p0 + q0, 3); *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + 2 * p1 + p0 + q0 + q1, 3); *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + 2 * p0 + q0 + q1 + q2, 3); *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + 2 * q0 + q1 + q2 + q3, 3); *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + 2 * q1 + q2 + q3 + q3, 3); *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + 2 * q2 + q3 + q3 + q3, 3); } else { highbd_filter4(mask, thresh, op1, op0, oq0, oq1, bd); } } void vpx_highbd_lpf_horizontal_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4 * pitch], p2 = s[-3 * pitch], p1 = s[-2 * pitch], p0 = s[-pitch]; const uint16_t q0 = s[0 * pitch], q1 = s[1 * pitch], q2 = s[2 * pitch], q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); highbd_filter8(mask, *thresh, flat, s - 4 * pitch, s - 3 * pitch, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, s + 3 * pitch, bd); ++s; } } void vpx_highbd_lpf_horizontal_8_dual_c( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_8_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_8_c(s + 8, pitch, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_vertical_8_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { int i; for (i = 0; i < 8; ++i) { const uint16_t p3 = s[-4], p2 = s[-3], p1 = s[-2], p0 = s[-1]; const uint16_t q0 = s[0], q1 = s[1], q2 = s[2], q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); highbd_filter8(mask, *thresh, flat, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, bd); s += pitch; } } void vpx_highbd_lpf_vertical_8_dual_c( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_vertical_8_c(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_vertical_8_c(s + 8 * pitch, pitch, blimit1, limit1, thresh1, bd); } static INLINE void highbd_filter16(int8_t mask, uint8_t thresh, uint8_t flat, uint8_t flat2, uint16_t *op7, uint16_t *op6, uint16_t *op5, uint16_t *op4, uint16_t *op3, uint16_t *op2, uint16_t *op1, uint16_t *op0, uint16_t *oq0, uint16_t *oq1, uint16_t *oq2, uint16_t *oq3, uint16_t *oq4, uint16_t *oq5, uint16_t *oq6, uint16_t *oq7, int bd) { if (flat2 && flat && mask) { const uint16_t p7 = *op7; const uint16_t p6 = *op6; const uint16_t p5 = *op5; const uint16_t p4 = *op4; const uint16_t p3 = *op3; const uint16_t p2 = *op2; const uint16_t p1 = *op1; const uint16_t p0 = *op0; const uint16_t q0 = *oq0; const uint16_t q1 = *oq1; const uint16_t q2 = *oq2; const uint16_t q3 = *oq3; const uint16_t q4 = *oq4; const uint16_t q5 = *oq5; const uint16_t q6 = *oq6; const uint16_t q7 = *oq7; // 15-tap filter [1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1] *op6 = ROUND_POWER_OF_TWO( p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + q0, 4); *op5 = ROUND_POWER_OF_TWO( p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + q0 + q1, 4); *op4 = ROUND_POWER_OF_TWO( p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + q0 + q1 + q2, 4); *op3 = ROUND_POWER_OF_TWO( p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + q0 + q1 + q2 + q3, 4); *op2 = ROUND_POWER_OF_TWO( p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + q0 + q1 + q2 + q3 + q4, 4); *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4); *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4); *oq2 = ROUND_POWER_OF_TWO( p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4); *oq3 = ROUND_POWER_OF_TWO( p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4); *oq4 = ROUND_POWER_OF_TWO( p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4); *oq5 = ROUND_POWER_OF_TWO( p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4); *oq6 = ROUND_POWER_OF_TWO( p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4); } else { highbd_filter8(mask, thresh, flat, op3, op2, op1, op0, oq0, oq1, oq2, oq3, bd); } } static void highbd_mb_lpf_horizontal_edge_w(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; // loop filter designed to work using chars so that we can make maximum use // of 8 bit simd instructions. for (i = 0; i < 8 * count; ++i) { const uint16_t p3 = s[-4 * pitch]; const uint16_t p2 = s[-3 * pitch]; const uint16_t p1 = s[-2 * pitch]; const uint16_t p0 = s[-pitch]; const uint16_t q0 = s[0 * pitch]; const uint16_t q1 = s[1 * pitch]; const uint16_t q2 = s[2 * pitch]; const uint16_t q3 = s[3 * pitch]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat2 = highbd_flat_mask5( 1, s[-8 * pitch], s[-7 * pitch], s[-6 * pitch], s[-5 * pitch], p0, q0, s[4 * pitch], s[5 * pitch], s[6 * pitch], s[7 * pitch], bd); highbd_filter16(mask, *thresh, flat, flat2, s - 8 * pitch, s - 7 * pitch, s - 6 * pitch, s - 5 * pitch, s - 4 * pitch, s - 3 * pitch, s - 2 * pitch, s - 1 * pitch, s, s + 1 * pitch, s + 2 * pitch, s + 3 * pitch, s + 4 * pitch, s + 5 * pitch, s + 6 * pitch, s + 7 * pitch, bd); ++s; } } void vpx_highbd_lpf_horizontal_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { highbd_mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 1, bd); } void vpx_highbd_lpf_horizontal_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { highbd_mb_lpf_horizontal_edge_w(s, pitch, blimit, limit, thresh, 2, bd); } static void highbd_mb_lpf_vertical_edge_w(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count, int bd) { int i; for (i = 0; i < count; ++i) { const uint16_t p3 = s[-4]; const uint16_t p2 = s[-3]; const uint16_t p1 = s[-2]; const uint16_t p0 = s[-1]; const uint16_t q0 = s[0]; const uint16_t q1 = s[1]; const uint16_t q2 = s[2]; const uint16_t q3 = s[3]; const int8_t mask = highbd_filter_mask(*limit, *blimit, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat = highbd_flat_mask4(1, p3, p2, p1, p0, q0, q1, q2, q3, bd); const int8_t flat2 = highbd_flat_mask5(1, s[-8], s[-7], s[-6], s[-5], p0, q0, s[4], s[5], s[6], s[7], bd); highbd_filter16(mask, *thresh, flat, flat2, s - 8, s - 7, s - 6, s - 5, s - 4, s - 3, s - 2, s - 1, s, s + 1, s + 2, s + 3, s + 4, s + 5, s + 6, s + 7, bd); s += pitch; } } void vpx_highbd_lpf_vertical_16_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { highbd_mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 8, bd); } void vpx_highbd_lpf_vertical_16_dual_c(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { highbd_mb_lpf_vertical_edge_w(s, pitch, blimit, limit, thresh, 16, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/mips/000077500000000000000000000000001357355204000154655ustar00rootroot00000000000000libvpx-1.8.2/vpx_dsp/mips/add_noise_msa.c000066400000000000000000000034711357355204000204230ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" void vpx_plane_add_noise_msa(uint8_t *start_ptr, const int8_t *noise, int blackclamp, int whiteclamp, int width, int height, int32_t pitch) { int i, j; v16u8 pos0, pos1, ref0, ref1; v16i8 black_clamp, white_clamp, both_clamp; black_clamp = __msa_fill_b(blackclamp); white_clamp = __msa_fill_b(whiteclamp); both_clamp = black_clamp + white_clamp; both_clamp = -both_clamp; for (i = 0; i < height / 2; ++i) { uint8_t *pos0_ptr = start_ptr + (2 * i) * pitch; const int8_t *ref0_ptr = noise + (rand() & 0xff); uint8_t *pos1_ptr = start_ptr + (2 * i + 1) * pitch; const int8_t *ref1_ptr = noise + (rand() & 0xff); for (j = width / 16; j--;) { pos0 = LD_UB(pos0_ptr); ref0 = LD_UB(ref0_ptr); pos1 = LD_UB(pos1_ptr); ref1 = LD_UB(ref1_ptr); pos0 = __msa_subsus_u_b(pos0, black_clamp); pos1 = __msa_subsus_u_b(pos1, black_clamp); pos0 = __msa_subsus_u_b(pos0, both_clamp); pos1 = __msa_subsus_u_b(pos1, both_clamp); pos0 = __msa_subsus_u_b(pos0, white_clamp); pos1 = __msa_subsus_u_b(pos1, white_clamp); pos0 += ref0; ST_UB(pos0, pos0_ptr); pos1 += ref1; ST_UB(pos1, pos1_ptr); pos0_ptr += 16; pos1_ptr += 16; ref0_ptr += 16; ref1_ptr += 16; } } } libvpx-1.8.2/vpx_dsp/mips/avg_msa.c000066400000000000000000000705011357355204000172510ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" uint32_t vpx_avg_8x8_msa(const uint8_t *src, int32_t src_stride) { uint32_t sum_out; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v8u16 sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7; v4u32 sum = { 0 }; LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); HADD_UB4_UH(src0, src1, src2, src3, sum0, sum1, sum2, sum3); HADD_UB4_UH(src4, src5, src6, src7, sum4, sum5, sum6, sum7); ADD4(sum0, sum1, sum2, sum3, sum4, sum5, sum6, sum7, sum0, sum2, sum4, sum6); ADD2(sum0, sum2, sum4, sum6, sum0, sum4); sum0 += sum4; sum = __msa_hadd_u_w(sum0, sum0); sum0 = (v8u16)__msa_pckev_h((v8i16)sum, (v8i16)sum); sum = __msa_hadd_u_w(sum0, sum0); sum = (v4u32)__msa_srari_w((v4i32)sum, 6); sum_out = __msa_copy_u_w((v4i32)sum, 0); return sum_out; } uint32_t vpx_avg_4x4_msa(const uint8_t *src, int32_t src_stride) { uint32_t sum_out; uint32_t src0, src1, src2, src3; v16u8 vec = { 0 }; v8u16 sum0; v4u32 sum1; v2u64 sum2; LW4(src, src_stride, src0, src1, src2, src3); INSERT_W4_UB(src0, src1, src2, src3, vec); sum0 = __msa_hadd_u_h(vec, vec); sum1 = __msa_hadd_u_w(sum0, sum0); sum0 = (v8u16)__msa_pckev_h((v8i16)sum1, (v8i16)sum1); sum1 = __msa_hadd_u_w(sum0, sum0); sum2 = __msa_hadd_u_d(sum1, sum1); sum1 = (v4u32)__msa_srari_w((v4i32)sum2, 4); sum_out = __msa_copy_u_w((v4i32)sum1, 0); return sum_out; } #if !CONFIG_VP9_HIGHBITDEPTH void vpx_hadamard_8x8_msa(const int16_t *src, ptrdiff_t src_stride, int16_t *dst) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; LD_SH8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src3, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src3, src4, src5, src6, src7); ST_SH8(src0, src1, src2, src3, src4, src5, src6, src7, dst, 8); } void vpx_hadamard_16x16_msa(const int16_t *src, ptrdiff_t src_stride, int16_t *dst) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v8i16 src11, src12, src13, src14, src15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; v8i16 tmp6, tmp7, tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15; v8i16 res0, res1, res2, res3, res4, res5, res6, res7; LD_SH2(src, 8, src0, src8); src += src_stride; LD_SH2(src, 8, src1, src9); src += src_stride; LD_SH2(src, 8, src2, src10); src += src_stride; LD_SH2(src, 8, src3, src11); src += src_stride; LD_SH2(src, 8, src4, src12); src += src_stride; LD_SH2(src, 8, src5, src13); src += src_stride; LD_SH2(src, 8, src6, src14); src += src_stride; LD_SH2(src, 8, src7, src15); src += src_stride; BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10, tmp12, tmp14, tmp15, tmp13, tmp11, tmp9); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src3, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src11, src4, src5, src6, src7); ST_SH8(src0, src1, src2, src11, src4, src5, src6, src7, dst, 8); BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9, src12, src13, src15, src14, src11, src10); BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15, tmp11, tmp12, tmp13, tmp9, tmp14, tmp10); TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, src8, src9, src10, src11, src12, src13, src14, src15); BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10, tmp12, tmp14, tmp15, tmp13, tmp11, tmp9); BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9, src12, src13, src15, src14, src11, src10); BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15, tmp11, tmp12, tmp13, tmp9, tmp14, tmp10); TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, res0, res1, res2, res3, res4, res5, res6, res7); LD_SH2(src, 8, src0, src8); src += src_stride; LD_SH2(src, 8, src1, src9); src += src_stride; LD_SH2(src, 8, src2, src10); src += src_stride; LD_SH2(src, 8, src3, src11); src += src_stride; ST_SH8(res0, res1, res2, res3, res4, res5, res6, res7, dst + 64, 8); LD_SH2(src, 8, src4, src12); src += src_stride; LD_SH2(src, 8, src5, src13); src += src_stride; LD_SH2(src, 8, src6, src14); src += src_stride; LD_SH2(src, 8, src7, src15); src += src_stride; BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10, tmp12, tmp14, tmp15, tmp13, tmp11, tmp9); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src3, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); BUTTERFLY_8(src0, src1, src2, src3, src7, src6, src5, src4, tmp0, tmp7, tmp3, tmp4, tmp5, tmp1, tmp6, tmp2); TRANSPOSE8x8_SH_SH(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, src0, src1, src2, src3, src4, src5, src6, src7); ST_SH8(src0, src1, src2, src3, src4, src5, src6, src7, dst + 2 * 64, 8); BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9, src12, src13, src15, src14, src11, src10); BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15, tmp11, tmp12, tmp13, tmp9, tmp14, tmp10); TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, src8, src9, src10, src11, src12, src13, src14, src15); BUTTERFLY_8(src8, src10, src12, src14, src15, src13, src11, src9, tmp8, tmp10, tmp12, tmp14, tmp15, tmp13, tmp11, tmp9); BUTTERFLY_8(tmp8, tmp9, tmp12, tmp13, tmp15, tmp14, tmp11, tmp10, src8, src9, src12, src13, src15, src14, src11, src10); BUTTERFLY_8(src8, src9, src10, src11, src15, src14, src13, src12, tmp8, tmp15, tmp11, tmp12, tmp13, tmp9, tmp14, tmp10); TRANSPOSE8x8_SH_SH(tmp8, tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, res0, res1, res2, res3, res4, res5, res6, res7); ST_SH8(res0, res1, res2, res3, res4, res5, res6, res7, dst + 3 * 64, 8); LD_SH4(dst, 64, src0, src1, src2, src3); LD_SH4(dst + 8, 64, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); SRA_4V(tmp0, tmp1, tmp2, tmp3, 1); SRA_4V(tmp4, tmp5, tmp6, tmp7, 1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); ST_SH4(src0, src1, src2, src3, dst, 64); ST_SH4(src4, src5, src6, src7, dst + 8, 64); dst += 16; LD_SH4(dst, 64, src0, src1, src2, src3); LD_SH4(dst + 8, 64, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); SRA_4V(tmp0, tmp1, tmp2, tmp3, 1); SRA_4V(tmp4, tmp5, tmp6, tmp7, 1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); ST_SH4(src0, src1, src2, src3, dst, 64); ST_SH4(src4, src5, src6, src7, dst + 8, 64); dst += 16; LD_SH4(dst, 64, src0, src1, src2, src3); LD_SH4(dst + 8, 64, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); SRA_4V(tmp0, tmp1, tmp2, tmp3, 1); SRA_4V(tmp4, tmp5, tmp6, tmp7, 1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); ST_SH4(src0, src1, src2, src3, dst, 64); ST_SH4(src4, src5, src6, src7, dst + 8, 64); dst += 16; LD_SH4(dst, 64, src0, src1, src2, src3); LD_SH4(dst + 8, 64, src4, src5, src6, src7); BUTTERFLY_8(src0, src2, src4, src6, src7, src5, src3, src1, tmp0, tmp2, tmp4, tmp6, tmp7, tmp5, tmp3, tmp1); SRA_4V(tmp0, tmp1, tmp2, tmp3, 1); SRA_4V(tmp4, tmp5, tmp6, tmp7, 1); BUTTERFLY_8(tmp0, tmp1, tmp4, tmp5, tmp7, tmp6, tmp3, tmp2, src0, src1, src4, src5, src7, src6, src3, src2); ST_SH4(src0, src1, src2, src3, dst, 64); ST_SH4(src4, src5, src6, src7, dst + 8, 64); } int vpx_satd_msa(const int16_t *data, int length) { int i, satd; v8i16 src0, src1, src2, src3, src4, src5, src6, src7; v8i16 src8, src9, src10, src11, src12, src13, src14, src15; v8i16 zero = { 0 }; v8u16 tmp0_h, tmp1_h, tmp2_h, tmp3_h, tmp4_h, tmp5_h, tmp6_h, tmp7_h; v4u32 tmp0_w = { 0 }; if (16 == length) { LD_SH2(data, 8, src0, src1); tmp0_h = (v8u16)__msa_asub_s_h(src0, zero); tmp1_h = (v8u16)__msa_asub_s_h(src1, zero); tmp0_w = __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); satd = HADD_UW_U32(tmp0_w); } else if (64 == length) { LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7); tmp0_h = (v8u16)__msa_asub_s_h(src0, zero); tmp1_h = (v8u16)__msa_asub_s_h(src1, zero); tmp2_h = (v8u16)__msa_asub_s_h(src2, zero); tmp3_h = (v8u16)__msa_asub_s_h(src3, zero); tmp4_h = (v8u16)__msa_asub_s_h(src4, zero); tmp5_h = (v8u16)__msa_asub_s_h(src5, zero); tmp6_h = (v8u16)__msa_asub_s_h(src6, zero); tmp7_h = (v8u16)__msa_asub_s_h(src7, zero); tmp0_w = __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h); tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h); tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h); tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h); tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h); tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h); satd = HADD_UW_U32(tmp0_w); } else if (256 == length) { for (i = 0; i < 2; ++i) { LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7); data += 8 * 8; LD_SH8(data, 8, src8, src9, src10, src11, src12, src13, src14, src15); data += 8 * 8; tmp0_h = (v8u16)__msa_asub_s_h(src0, zero); tmp1_h = (v8u16)__msa_asub_s_h(src1, zero); tmp2_h = (v8u16)__msa_asub_s_h(src2, zero); tmp3_h = (v8u16)__msa_asub_s_h(src3, zero); tmp4_h = (v8u16)__msa_asub_s_h(src4, zero); tmp5_h = (v8u16)__msa_asub_s_h(src5, zero); tmp6_h = (v8u16)__msa_asub_s_h(src6, zero); tmp7_h = (v8u16)__msa_asub_s_h(src7, zero); tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h); tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h); tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h); tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h); tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h); tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h); tmp0_h = (v8u16)__msa_asub_s_h(src8, zero); tmp1_h = (v8u16)__msa_asub_s_h(src9, zero); tmp2_h = (v8u16)__msa_asub_s_h(src10, zero); tmp3_h = (v8u16)__msa_asub_s_h(src11, zero); tmp4_h = (v8u16)__msa_asub_s_h(src12, zero); tmp5_h = (v8u16)__msa_asub_s_h(src13, zero); tmp6_h = (v8u16)__msa_asub_s_h(src14, zero); tmp7_h = (v8u16)__msa_asub_s_h(src15, zero); tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h); tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h); tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h); tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h); tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h); tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h); } satd = HADD_UW_U32(tmp0_w); } else if (1024 == length) { for (i = 0; i < 8; ++i) { LD_SH8(data, 8, src0, src1, src2, src3, src4, src5, src6, src7); data += 8 * 8; LD_SH8(data, 8, src8, src9, src10, src11, src12, src13, src14, src15); data += 8 * 8; tmp0_h = (v8u16)__msa_asub_s_h(src0, zero); tmp1_h = (v8u16)__msa_asub_s_h(src1, zero); tmp2_h = (v8u16)__msa_asub_s_h(src2, zero); tmp3_h = (v8u16)__msa_asub_s_h(src3, zero); tmp4_h = (v8u16)__msa_asub_s_h(src4, zero); tmp5_h = (v8u16)__msa_asub_s_h(src5, zero); tmp6_h = (v8u16)__msa_asub_s_h(src6, zero); tmp7_h = (v8u16)__msa_asub_s_h(src7, zero); tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h); tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h); tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h); tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h); tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h); tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h); tmp0_h = (v8u16)__msa_asub_s_h(src8, zero); tmp1_h = (v8u16)__msa_asub_s_h(src9, zero); tmp2_h = (v8u16)__msa_asub_s_h(src10, zero); tmp3_h = (v8u16)__msa_asub_s_h(src11, zero); tmp4_h = (v8u16)__msa_asub_s_h(src12, zero); tmp5_h = (v8u16)__msa_asub_s_h(src13, zero); tmp6_h = (v8u16)__msa_asub_s_h(src14, zero); tmp7_h = (v8u16)__msa_asub_s_h(src15, zero); tmp0_w += __msa_hadd_u_w(tmp0_h, tmp0_h); tmp0_w += __msa_hadd_u_w(tmp1_h, tmp1_h); tmp0_w += __msa_hadd_u_w(tmp2_h, tmp2_h); tmp0_w += __msa_hadd_u_w(tmp3_h, tmp3_h); tmp0_w += __msa_hadd_u_w(tmp4_h, tmp4_h); tmp0_w += __msa_hadd_u_w(tmp5_h, tmp5_h); tmp0_w += __msa_hadd_u_w(tmp6_h, tmp6_h); tmp0_w += __msa_hadd_u_w(tmp7_h, tmp7_h); } satd = HADD_UW_U32(tmp0_w); } else { satd = 0; for (i = 0; i < length; ++i) { satd += abs(data[i]); } } return satd; } #endif // !CONFIG_VP9_HIGHBITDEPTH void vpx_int_pro_row_msa(int16_t hbuf[16], const uint8_t *ref, const int ref_stride, const int height) { int i; v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7; v8i16 hbuf_r = { 0 }; v8i16 hbuf_l = { 0 }; v8i16 ref0_r, ref0_l, ref1_r, ref1_l, ref2_r, ref2_l, ref3_r, ref3_l; v8i16 ref4_r, ref4_l, ref5_r, ref5_l, ref6_r, ref6_l, ref7_r, ref7_l; if (16 == height) { for (i = 2; i--;) { LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ref += 8 * ref_stride; UNPCK_UB_SH(ref0, ref0_r, ref0_l); UNPCK_UB_SH(ref1, ref1_r, ref1_l); UNPCK_UB_SH(ref2, ref2_r, ref2_l); UNPCK_UB_SH(ref3, ref3_r, ref3_l); UNPCK_UB_SH(ref4, ref4_r, ref4_l); UNPCK_UB_SH(ref5, ref5_r, ref5_l); UNPCK_UB_SH(ref6, ref6_r, ref6_l); UNPCK_UB_SH(ref7, ref7_r, ref7_l); ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); } SRA_2V(hbuf_r, hbuf_l, 3); ST_SH2(hbuf_r, hbuf_l, hbuf, 8); } else if (32 == height) { for (i = 2; i--;) { LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ref += 8 * ref_stride; UNPCK_UB_SH(ref0, ref0_r, ref0_l); UNPCK_UB_SH(ref1, ref1_r, ref1_l); UNPCK_UB_SH(ref2, ref2_r, ref2_l); UNPCK_UB_SH(ref3, ref3_r, ref3_l); UNPCK_UB_SH(ref4, ref4_r, ref4_l); UNPCK_UB_SH(ref5, ref5_r, ref5_l); UNPCK_UB_SH(ref6, ref6_r, ref6_l); UNPCK_UB_SH(ref7, ref7_r, ref7_l); ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ref += 8 * ref_stride; UNPCK_UB_SH(ref0, ref0_r, ref0_l); UNPCK_UB_SH(ref1, ref1_r, ref1_l); UNPCK_UB_SH(ref2, ref2_r, ref2_l); UNPCK_UB_SH(ref3, ref3_r, ref3_l); UNPCK_UB_SH(ref4, ref4_r, ref4_l); UNPCK_UB_SH(ref5, ref5_r, ref5_l); UNPCK_UB_SH(ref6, ref6_r, ref6_l); UNPCK_UB_SH(ref7, ref7_r, ref7_l); ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); } SRA_2V(hbuf_r, hbuf_l, 4); ST_SH2(hbuf_r, hbuf_l, hbuf, 8); } else if (64 == height) { for (i = 4; i--;) { LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ref += 8 * ref_stride; UNPCK_UB_SH(ref0, ref0_r, ref0_l); UNPCK_UB_SH(ref1, ref1_r, ref1_l); UNPCK_UB_SH(ref2, ref2_r, ref2_l); UNPCK_UB_SH(ref3, ref3_r, ref3_l); UNPCK_UB_SH(ref4, ref4_r, ref4_l); UNPCK_UB_SH(ref5, ref5_r, ref5_l); UNPCK_UB_SH(ref6, ref6_r, ref6_l); UNPCK_UB_SH(ref7, ref7_r, ref7_l); ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); LD_UB8(ref, ref_stride, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ref += 8 * ref_stride; UNPCK_UB_SH(ref0, ref0_r, ref0_l); UNPCK_UB_SH(ref1, ref1_r, ref1_l); UNPCK_UB_SH(ref2, ref2_r, ref2_l); UNPCK_UB_SH(ref3, ref3_r, ref3_l); UNPCK_UB_SH(ref4, ref4_r, ref4_l); UNPCK_UB_SH(ref5, ref5_r, ref5_l); UNPCK_UB_SH(ref6, ref6_r, ref6_l); UNPCK_UB_SH(ref7, ref7_r, ref7_l); ADD4(hbuf_r, ref0_r, hbuf_l, ref0_l, hbuf_r, ref1_r, hbuf_l, ref1_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref2_r, hbuf_l, ref2_l, hbuf_r, ref3_r, hbuf_l, ref3_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref4_r, hbuf_l, ref4_l, hbuf_r, ref5_r, hbuf_l, ref5_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); ADD4(hbuf_r, ref6_r, hbuf_l, ref6_l, hbuf_r, ref7_r, hbuf_l, ref7_l, hbuf_r, hbuf_l, hbuf_r, hbuf_l); } SRA_2V(hbuf_r, hbuf_l, 5); ST_SH2(hbuf_r, hbuf_l, hbuf, 8); } else { const int norm_factor = height >> 1; int cnt; for (cnt = 0; cnt < 16; cnt++) { hbuf[cnt] = 0; } for (i = 0; i < height; ++i) { for (cnt = 0; cnt < 16; cnt++) { hbuf[cnt] += ref[cnt]; } ref += ref_stride; } for (cnt = 0; cnt < 16; cnt++) { hbuf[cnt] /= norm_factor; } } } int16_t vpx_int_pro_col_msa(const uint8_t *ref, const int width) { int16_t sum; v16u8 ref0, ref1, ref2, ref3; v8u16 ref0_h; if (16 == width) { ref0 = LD_UB(ref); ref0_h = __msa_hadd_u_h(ref0, ref0); sum = HADD_UH_U32(ref0_h); } else if (32 == width) { LD_UB2(ref, 16, ref0, ref1); ref0_h = __msa_hadd_u_h(ref0, ref0); ref0_h += __msa_hadd_u_h(ref1, ref1); sum = HADD_UH_U32(ref0_h); } else if (64 == width) { LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref0_h = __msa_hadd_u_h(ref0, ref0); ref0_h += __msa_hadd_u_h(ref1, ref1); ref0_h += __msa_hadd_u_h(ref2, ref2); ref0_h += __msa_hadd_u_h(ref3, ref3); sum = HADD_UH_U32(ref0_h); } else { int idx; sum = 0; for (idx = 0; idx < width; ++idx) { sum += ref[idx]; } } return sum; } int vpx_vector_var_msa(const int16_t *ref, const int16_t *src, const int bwl) { int sse, mean, var; v8i16 src0, src1, src2, src3, src4, src5, src6, src7, ref0, ref1, ref2; v8i16 ref3, ref4, ref5, ref6, ref7, src_l0_m, src_l1_m, src_l2_m, src_l3_m; v8i16 src_l4_m, src_l5_m, src_l6_m, src_l7_m; v4i32 res_l0_m, res_l1_m, res_l2_m, res_l3_m, res_l4_m, res_l5_m, res_l6_m; v4i32 res_l7_m, mean_v; v2i64 sse_v; if (2 == bwl) { LD_SH2(src, 8, src0, src1); LD_SH2(ref, 8, ref0, ref1); ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m); ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m); HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m); HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m); sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m); sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m); DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v); mean_v = res_l0_m + res_l1_m; mean_v += res_l2_m + res_l3_m; sse_v += __msa_splati_d(sse_v, 1); sse = __msa_copy_s_w((v4i32)sse_v, 0); mean = HADD_SW_S32(mean_v); } else if (3 == bwl) { LD_SH4(src, 8, src0, src1, src2, src3); LD_SH4(ref, 8, ref0, ref1, ref2, ref3); ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m); ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m); ILVRL_H2_SH(src2, ref2, src_l4_m, src_l5_m); ILVRL_H2_SH(src3, ref3, src_l6_m, src_l7_m); HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m); HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m); HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m); HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m); sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m); sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m); DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v); DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v); DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v); mean_v = res_l0_m + res_l1_m; mean_v += res_l2_m + res_l3_m; mean_v += res_l4_m + res_l5_m; mean_v += res_l6_m + res_l7_m; sse_v += __msa_splati_d(sse_v, 1); sse = __msa_copy_s_w((v4i32)sse_v, 0); mean = HADD_SW_S32(mean_v); } else if (4 == bwl) { LD_SH8(src, 8, src0, src1, src2, src3, src4, src5, src6, src7); LD_SH8(ref, 8, ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7); ILVRL_H2_SH(src0, ref0, src_l0_m, src_l1_m); ILVRL_H2_SH(src1, ref1, src_l2_m, src_l3_m); ILVRL_H2_SH(src2, ref2, src_l4_m, src_l5_m); ILVRL_H2_SH(src3, ref3, src_l6_m, src_l7_m); HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m); HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m); HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m); HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m); sse_v = __msa_dotp_s_d(res_l0_m, res_l0_m); sse_v = __msa_dpadd_s_d(sse_v, res_l1_m, res_l1_m); DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v); DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v); DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v); mean_v = res_l0_m + res_l1_m; mean_v += res_l2_m + res_l3_m; mean_v += res_l4_m + res_l5_m; mean_v += res_l6_m + res_l7_m; ILVRL_H2_SH(src4, ref4, src_l0_m, src_l1_m); ILVRL_H2_SH(src5, ref5, src_l2_m, src_l3_m); ILVRL_H2_SH(src6, ref6, src_l4_m, src_l5_m); ILVRL_H2_SH(src7, ref7, src_l6_m, src_l7_m); HSUB_UH2_SW(src_l0_m, src_l1_m, res_l0_m, res_l1_m); HSUB_UH2_SW(src_l2_m, src_l3_m, res_l2_m, res_l3_m); HSUB_UH2_SW(src_l4_m, src_l5_m, res_l4_m, res_l5_m); HSUB_UH2_SW(src_l6_m, src_l7_m, res_l6_m, res_l7_m); DPADD_SD2_SD(res_l0_m, res_l1_m, sse_v, sse_v); DPADD_SD2_SD(res_l2_m, res_l3_m, sse_v, sse_v); DPADD_SD2_SD(res_l4_m, res_l5_m, sse_v, sse_v); DPADD_SD2_SD(res_l6_m, res_l7_m, sse_v, sse_v); mean_v += res_l0_m + res_l1_m; mean_v += res_l2_m + res_l3_m; mean_v += res_l4_m + res_l5_m; mean_v += res_l6_m + res_l7_m; sse_v += __msa_splati_d(sse_v, 1); sse = __msa_copy_s_w((v4i32)sse_v, 0); mean = HADD_SW_S32(mean_v); } else { int i; const int width = 4 << bwl; sse = 0; mean = 0; for (i = 0; i < width; ++i) { const int diff = ref[i] - src[i]; mean += diff; sse += diff * diff; } } var = sse - ((mean * mean) >> (bwl + 2)); return var; } void vpx_minmax_8x8_msa(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max) { v16u8 s0, s1, s2, s3, s4, s5, s6, s7, d0, d1, d2, d3, d4, d5, d6, d7; v16u8 diff0, diff1, diff2, diff3, min0, min1, max0, max1; LD_UB8(s, p, s0, s1, s2, s3, s4, s5, s6, s7); LD_UB8(d, dp, d0, d1, d2, d3, d4, d5, d6, d7); PCKEV_D4_UB(s1, s0, s3, s2, s5, s4, s7, s6, s0, s1, s2, s3); PCKEV_D4_UB(d1, d0, d3, d2, d5, d4, d7, d6, d0, d1, d2, d3); diff0 = __msa_asub_u_b(s0, d0); diff1 = __msa_asub_u_b(s1, d1); diff2 = __msa_asub_u_b(s2, d2); diff3 = __msa_asub_u_b(s3, d3); min0 = __msa_min_u_b(diff0, diff1); min1 = __msa_min_u_b(diff2, diff3); min0 = __msa_min_u_b(min0, min1); max0 = __msa_max_u_b(diff0, diff1); max1 = __msa_max_u_b(diff2, diff3); max0 = __msa_max_u_b(max0, max1); min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 8); min0 = __msa_min_u_b(min0, min1); max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 8); max0 = __msa_max_u_b(max0, max1); min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 4); min0 = __msa_min_u_b(min0, min1); max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 4); max0 = __msa_max_u_b(max0, max1); min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 2); min0 = __msa_min_u_b(min0, min1); max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 2); max0 = __msa_max_u_b(max0, max1); min1 = (v16u8)__msa_sldi_b((v16i8)min1, (v16i8)min0, 1); min0 = __msa_min_u_b(min0, min1); max1 = (v16u8)__msa_sldi_b((v16i8)max1, (v16i8)max0, 1); max0 = __msa_max_u_b(max0, max1); *min = min0[0]; *max = max0[0]; } libvpx-1.8.2/vpx_dsp/mips/common_dspr2.c000066400000000000000000000015041357355204000202330ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 uint8_t vpx_ff_cropTbl_a[256 + 2 * CROP_WIDTH]; uint8_t *vpx_ff_cropTbl; void vpx_dsputil_static_init(void) { int i; for (i = 0; i < 256; i++) vpx_ff_cropTbl_a[i + CROP_WIDTH] = i; for (i = 0; i < CROP_WIDTH; i++) { vpx_ff_cropTbl_a[i] = 0; vpx_ff_cropTbl_a[i + CROP_WIDTH + 256] = 255; } vpx_ff_cropTbl = &vpx_ff_cropTbl_a[CROP_WIDTH]; } #endif libvpx-1.8.2/vpx_dsp/mips/common_dspr2.h000066400000000000000000000026621357355204000202460ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ #define VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 #define CROP_WIDTH 512 extern uint8_t *vpx_ff_cropTbl; // From "vpx_dsp/mips/intrapred4_dspr2.c" static INLINE void prefetch_load(const unsigned char *src) { __asm__ __volatile__("pref 0, 0(%[src]) \n\t" : : [src] "r"(src)); } /* prefetch data for store */ static INLINE void prefetch_store(unsigned char *dst) { __asm__ __volatile__("pref 1, 0(%[dst]) \n\t" : : [dst] "r"(dst)); } static INLINE void prefetch_load_streamed(const unsigned char *src) { __asm__ __volatile__("pref 4, 0(%[src]) \n\t" : : [src] "r"(src)); } /* prefetch data for store */ static INLINE void prefetch_store_streamed(unsigned char *dst) { __asm__ __volatile__("pref 5, 0(%[dst]) \n\t" : : [dst] "r"(dst)); } #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_COMMON_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/convolve2_avg_dspr2.c000066400000000000000000000270161357355204000215230ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_bi_avg_vert_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2; uint32_t p1, p2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t Temp1, Temp2; const int16_t *filter = &filter_y[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[filter45] \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "extp %[Temp2], $ac1, 31 \n\t" "lbu %[scratch1], 0(%[dst_ptr]) \n\t" "lbu %[scratch2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ "extp %[Temp2], $ac3, 31 \n\t" "lbu %[scratch1], 2(%[dst_ptr]) \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbu %[scratch2], 3(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1), [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_avg_vert_64_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2; uint32_t p1, p2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t Temp1, Temp2; const int16_t *filter = &filter_y[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[filter45] \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "extp %[Temp2], $ac1, 31 \n\t" "lbu %[scratch1], 0(%[dst_ptr]) \n\t" "lbu %[scratch2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ "extp %[Temp2], $ac3, 31 \n\t" "lbu %[scratch1], 2(%[dst_ptr]) \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbu %[scratch2], 3(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1), [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; uint32_t pos = 38; assert(y_step_q4 == 16); /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); prefetch_store(dst); switch (w) { case 4: case 8: case 16: case 32: convolve_bi_avg_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h); break; case 64: prefetch_store(dst + 32); convolve_bi_avg_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); break; default: vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve2_avg_horiz_dspr2.c000066400000000000000000001323641357355204000227410ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_bi_avg_horiz_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; int32_t Temp1, Temp2, Temp3, Temp4; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2, p3; uint32_t tn1, tn2; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbu %[p2], 3(%[dst]) \n\t" /* load odd 2 */ /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" /* even 1 */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "lbu %[Temp1], 1(%[dst]) \n\t" /* load odd 1 */ "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p3], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tn2], 0(%[dst]) \n\t" /* load even 1 */ /* odd 2. pixel */ "lbux %[tp2], %[Temp3](%[cm]) \n\t" /* even 2 */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "lbux %[tn1], %[Temp2](%[cm]) \n\t" /* odd 1 */ "addqh_r.w %[tn2], %[tn2], %[tp1] \n\t" /* average even 1 */ "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" "extp %[Temp4], $ac2, 31 \n\t" "lbu %[tp1], 2(%[dst]) \n\t" /* load even 2 */ "sb %[tn2], 0(%[dst]) \n\t" /* store even 1 */ /* clamp */ "addqh_r.w %[Temp1], %[Temp1], %[tn1] \n\t" /* average odd 1 */ "lbux %[p3], %[Temp4](%[cm]) \n\t" /* odd 2 */ "sb %[Temp1], 1(%[dst]) \n\t" /* store odd 1 */ "addqh_r.w %[tp1], %[tp1], %[tp2] \n\t" /* average even 2 */ "sb %[tp1], 2(%[dst]) \n\t" /* store even 2 */ "addqh_r.w %[p2], %[p2], %[p3] \n\t" /* average odd 2 */ "sb %[p2], 3(%[dst]) \n\t" /* store odd 2 */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_avg_horiz_8_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2, tp3, tp4; uint32_t p1, p2, p3, p4, n1; uint32_t st0, st1; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "ulw %[tp3], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" "lbu %[Temp2], 0(%[dst]) \n\t" "lbu %[tp4], 2(%[dst]) \n\t" /* even 2. pixel */ "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[st0], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "lbux %[st1], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" "extp %[Temp1], $ac1, 31 \n\t" "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" "addqh_r.w %[tp4], %[tp4], %[st1] \n\t" "sb %[Temp2], 0(%[dst]) \n\t" "sb %[tp4], 2(%[dst]) \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "balign %[tp3], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "lbux %[st0], %[Temp1](%[cm]) \n\t" "lbu %[Temp2], 4(%[dst]) \n\t" "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sb %[Temp2], 4(%[dst]) \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tp3] \n\t" "preceu.ph.qbl %[p4], %[tp3] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tp1], 6(%[dst]) \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "lbux %[st0], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac1, 31 \n\t" "lbu %[tp2], 1(%[dst]) \n\t" "lbu %[tp3], 3(%[dst]) \n\t" "addqh_r.w %[tp1], %[tp1], %[st0] \n\t" /* odd 3. pixel */ "lbux %[st1], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" "addqh_r.w %[tp2], %[tp2], %[st1] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tp4], 5(%[dst]) \n\t" /* odd 4. pixel */ "sb %[tp2], 1(%[dst]) \n\t" "sb %[tp1], 6(%[dst]) \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbu %[tp1], 7(%[dst]) \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "addqh_r.w %[tp3], %[tp3], %[p4] \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[tp4], %[tp4], %[p2] \n\t" "lbux %[p1], %[Temp1](%[cm]) \n\t" "addqh_r.w %[tp1], %[tp1], %[p1] \n\t" /* store bytes */ "sb %[tp3], 3(%[dst]) \n\t" "sb %[tp4], 5(%[dst]) \n\t" "sb %[tp1], 7(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_avg_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } static void convolve_bi_avg_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); prefetch_store(dst_ptr + dst_stride); prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; uint32_t pos = 38; assert(x_step_q4 == 16); /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: convolve_bi_avg_horiz_4_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; case 8: convolve_bi_avg_horiz_8_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; case 16: convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, h, 1); break; case 32: convolve_bi_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, h, 2); break; case 64: prefetch_load(src + 64); prefetch_store(dst + 32); convolve_bi_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; default: vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve2_dspr2.c000066400000000000000000001323631357355204000206700ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_bi_horiz_4_transposed_dspr2( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint8_t *dst_ptr; int32_t Temp1, Temp2; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { dst_ptr = dst; /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp2], $ac2, 31 \n\t" /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp2], %[Temp2](%[cm]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp2], $ac2, 31 \n\t" /* clamp */ "lbux %[p1], %[Temp1](%[cm]) \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" /* store bytes */ "sb %[tp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[p1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[tp2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[p2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [dst_ptr] "+r"(dst_ptr) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [src] "r"(src), [dst_stride] "r"(dst_stride)); /* Next row... */ src += src_stride; dst += 1; } } static void convolve_bi_horiz_8_transposed_dspr2( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint8_t *dst_ptr; uint32_t vector4a = 64; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2, tp3; uint32_t p1, p2, p3, p4; uint8_t *odd_dst; uint32_t dst_pitch_2 = (dst_stride << 1); const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); dst_ptr = dst; odd_dst = (dst_ptr + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "ulw %[tp3], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "balign %[tp3], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" "lbux %[tp1], %[Temp3](%[cm]) \n\t" "extp %[p3], $ac1, 31 \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sb %[Temp2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "sb %[tp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbux %[Temp1], %[p3](%[cm]) " "\n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tp3] \n\t" "preceu.ph.qbl %[p4], %[tp3] \n\t" "sb %[Temp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp1], %[Temp3](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "sb %[tp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "extp %[Temp3], $ac1, 31 \n\t" /* odd 3. pixel */ "lbux %[tp3], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 4. pixel */ "sb %[tp3], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp1], $ac2, 31 \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "lbux %[p1], %[Temp1](%[cm]) \n\t" /* store bytes */ "sb %[p4], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "sb %[p2], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "sb %[p1], 0(%[odd_dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst_ptr] "+r"(dst_ptr), [odd_dst] "+r"(odd_dst) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); /* Next row... */ src += src_stride; dst += 1; } } static void convolve_bi_horiz_16_transposed_dspr2( const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t c, y; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; uint32_t dst_pitch_2 = (dst_stride << 1); uint8_t *odd_dst; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); src = src_ptr; dst = dst_ptr; odd_dst = (dst + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) " "\n\t" "ulw %[qload2], 4(%[src]) " "\n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 1 */ "mthi $zero, $ac1 " "\n\t" "mtlo %[vector_64], $ac2 " "\n\t" /* even 2 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "ulw %[qload1], 8(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter45] " "\n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 3 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p5], %[qload1] " "\n\t" "ulw %[qload2], 12(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p2], %[filter45] " "\n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 4 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p2], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 1 */ "addu %[dst], %[dst], %[dst_pitch_2] " " \n\t" "dpa.w.ph $ac3, %[p3], %[filter45] " "\n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 5 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbl %[p3], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 2 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p4], %[filter45] " "\n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 6 */ "mthi $zero, $ac3 " "\n\t" "sb %[st3], 0(%[dst]) " "\n\t" /* even 3 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter45] " "\n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 7 */ "mthi $zero, $ac1 " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 4 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 20(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p5], %[filter45] " "\n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 8 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 5 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter45] " "\n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 1 */ "mthi $zero, $ac3 " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter45] " "\n\t" /* even 8 */ "sb %[st3], 0(%[dst]) " "\n\t" /* even 6 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "extp %[Temp2], $ac2, 31 " "\n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) " "\n\t" "ulw %[qload2], 5(%[src]) " "\n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 2 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 7 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 9(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p1], %[filter45] " "\n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 3 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter45] " "\n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 4 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 1 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter45] " "\n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 5 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 2 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p4], %[filter45] " "\n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 6 */ "mthi $zero, $ac2 " "\n\t" "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 3 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter45] " "\n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 7 */ "mthi $zero, $ac3 " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 4 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 21(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p5], %[filter45] " "\n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 8 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 5 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p2], %[filter45] " "\n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] " "\n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 8 */ "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 6 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 7 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst] "+r"(dst), [odd_dst] "+r"(odd_dst) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); src += 16; dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); odd_dst = (dst + dst_stride); } /* Next row... */ src_ptr += src_stride; dst_ptr += 1; } } static void convolve_bi_horiz_64_transposed_dspr2( const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t c, y; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; uint32_t dst_pitch_2 = (dst_stride << 1); uint8_t *odd_dst; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); src = src_ptr; dst = dst_ptr; odd_dst = (dst + dst_stride); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) " "\n\t" "ulw %[qload2], 4(%[src]) " "\n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 1 */ "mthi $zero, $ac1 " "\n\t" "mtlo %[vector_64], $ac2 " "\n\t" /* even 2 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "ulw %[qload1], 8(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter45] " "\n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 3 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p5], %[qload1] " "\n\t" "ulw %[qload2], 12(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p2], %[filter45] " "\n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 4 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p2], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 1 */ "addu %[dst], %[dst], %[dst_pitch_2] " " \n\t" "dpa.w.ph $ac3, %[p3], %[filter45] " "\n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 5 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbl %[p3], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 2 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p4], %[filter45] " "\n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 6 */ "mthi $zero, $ac3 " "\n\t" "sb %[st3], 0(%[dst]) " "\n\t" /* even 3 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter45] " "\n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 7 */ "mthi $zero, $ac1 " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 4 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 20(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p5], %[filter45] " "\n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 8 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 5 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter45] " "\n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 1 */ "mthi $zero, $ac3 " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter45] " "\n\t" /* even 8 */ "sb %[st3], 0(%[dst]) " "\n\t" /* even 6 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "extp %[Temp2], $ac2, 31 " "\n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) " "\n\t" "ulw %[qload2], 5(%[src]) " "\n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 2 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 7 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 9(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p1], %[filter45] " "\n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 3 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter45] " "\n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 4 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 1 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter45] " "\n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 5 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 2 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p4], %[filter45] " "\n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 6 */ "mthi $zero, $ac2 " "\n\t" "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 3 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter45] " "\n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 7 */ "mthi $zero, $ac3 " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 4 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 21(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p5], %[filter45] " "\n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 8 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 5 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p2], %[filter45] " "\n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] " "\n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 8 */ "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 6 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 7 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst] "+r"(dst), [odd_dst] "+r"(odd_dst) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); src += 16; dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); odd_dst = (dst + dst_stride); } /* Next row... */ src_ptr += src_stride; dst_ptr += 1; } } void convolve_bi_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter, int w, int h) { int x, y; for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) { int sum = 0; sum += src[x] * filter[3]; sum += src[x + 1] * filter[4]; dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } src += src_stride; dst += 1; } } void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter, int w, int h) { uint32_t pos = 38; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); switch (w) { case 4: convolve_bi_horiz_4_transposed_dspr2(src, src_stride, dst, dst_stride, filter, h); break; case 8: convolve_bi_horiz_8_transposed_dspr2(src, src_stride, dst, dst_stride, filter, h); break; case 16: case 32: convolve_bi_horiz_16_transposed_dspr2(src, src_stride, dst, dst_stride, filter, h, (w / 16)); break; case 64: prefetch_load(src + 32); convolve_bi_horiz_64_transposed_dspr2(src, src_stride, dst, dst_stride, filter, h); break; default: convolve_bi_horiz_transposed(src, src_stride, dst, dst_stride, filter, w, h); break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve2_horiz_dspr2.c000066400000000000000000001077241357355204000221060ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_bi_horiz_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; int32_t Temp1, Temp2, Temp3, Temp4; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp2], %[Temp3](%[cm]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp4], $ac2, 31 \n\t" /* clamp */ "lbux %[p1], %[Temp2](%[cm]) \n\t" "lbux %[p2], %[Temp4](%[cm]) \n\t" /* store bytes */ "sb %[tp1], 0(%[dst]) \n\t" "sb %[p1], 1(%[dst]) \n\t" "sb %[tp2], 2(%[dst]) \n\t" "sb %[p2], 3(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [p1] "=&r"(p1), [p2] "=&r"(p2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_horiz_8_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2, tp3; uint32_t p1, p2, p3, p4; uint32_t st0, st1; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "ulw %[tp3], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[st0], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" "extp %[Temp1], $ac1, 31 \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sb %[st0], 0(%[dst]) \n\t" "lbux %[st1], %[Temp3](%[cm]) \n\t" "balign %[tp3], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbux %[st0], %[Temp1](%[cm]) \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sb %[st1], 2(%[dst]) \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tp3] \n\t" "preceu.ph.qbl %[p4], %[tp3] \n\t" "sb %[st0], 4(%[dst]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "lbux %[st0], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "extp %[Temp3], $ac1, 31 \n\t" /* odd 3. pixel */ "lbux %[st1], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 4. pixel */ "sb %[st1], 1(%[dst]) \n\t" "sb %[st0], 6(%[dst]) \n\t" "dpa.w.ph $ac2, %[p4], %[filter45] \n\t" "extp %[Temp1], $ac2, 31 \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "lbux %[p1], %[Temp1](%[cm]) \n\t" /* store bytes */ "sb %[p4], 3(%[dst]) \n\t" "sb %[p2], 5(%[dst]) \n\t" "sb %[p1], 7(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } static void convolve_bi_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; const int16_t *filter = &filter_x0[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); prefetch_store(dst_ptr + dst_stride); prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter45] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac3, %[p3], %[filter45] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter45] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ "dpa.w.ph $ac3, %[p5], %[filter45] \n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* even 8 */ "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter45] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ "dpa.w.ph $ac2, %[p3], %[filter45] \n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ "dpa.w.ph $ac3, %[p4], %[filter45] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ "dpa.w.ph $ac1, %[p1], %[filter45] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ "dpa.w.ph $ac2, %[p5], %[filter45] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter45] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter45] "r"(filter45), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; uint32_t pos = 38; assert(x_step_q4 == 16); prefetch_load((const uint8_t *)filter_x); /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: convolve_bi_horiz_4_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; case 8: convolve_bi_horiz_8_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; case 16: convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h, 1); break; case 32: convolve_bi_horiz_16_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h, 2); break; case 64: prefetch_load(src + 64); prefetch_store(dst + 32); convolve_bi_horiz_64_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; default: vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve2_vert_dspr2.c000066400000000000000000000235771357355204000217360ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_bi_vert_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2; uint32_t p1, p2; uint32_t scratch1; uint32_t store1, store2; int32_t Temp1, Temp2; const int16_t *filter = &filter_y[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[filter45] \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "extp %[Temp2], $ac1, 31 \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "extp %[Temp2], $ac3, 31 \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1), [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_bi_vert_64_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2; uint32_t p1, p2; uint32_t scratch1; uint32_t store1, store2; int32_t Temp1, Temp2; const int16_t *filter = &filter_y[3]; uint32_t filter45; filter45 = ((const int32_t *)filter)[0]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); for (x = 0; x < 64; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[filter45] \n\t" "dpa.w.ph $ac1, %[p2], %[filter45] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[p2], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[filter45] \n\t" "dpa.w.ph $ac3, %[p2], %[filter45] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "extp %[Temp2], $ac1, 31 \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "extp %[Temp2], $ac3, 31 \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [p1] "=&r"(p1), [p2] "=&r"(p2), [scratch1] "=&r"(scratch1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [filter45] "r"(filter45), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; uint32_t pos = 38; assert(y_step_q4 == 16); /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); prefetch_store(dst); switch (w) { case 4: case 8: case 16: case 32: convolve_bi_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h); break; case 64: prefetch_store(dst + 32); convolve_bi_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); break; default: vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve8_avg_dspr2.c000066400000000000000000001026651357355204000215350ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_avg_vert_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2, load3, load4; uint32_t p1, p2; uint32_t n1, n2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2; vector1b = ((const int32_t *)filter_y)[0]; vector2b = ((const int32_t *)filter_y)[1]; vector3b = ((const int32_t *)filter_y)[2]; vector4b = ((const int32_t *)filter_y)[3]; src -= 3 * src_stride; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac1, 31 \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "lbu %[scratch1], 0(%[dst_ptr]) \n\t" "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "lbu %[scratch2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ "extp %[Temp2], $ac3, 31 \n\t" "lbu %[scratch1], 2(%[dst_ptr]) \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbu %[scratch2], 3(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_avg_vert_64_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2, load3, load4; uint32_t p1, p2; uint32_t n1, n2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2; vector1b = ((const int32_t *)filter_y)[0]; vector2b = ((const int32_t *)filter_y)[1]; vector3b = ((const int32_t *)filter_y)[2]; vector4b = ((const int32_t *)filter_y)[3]; src -= 3 * src_stride; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac1, 31 \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "lbu %[scratch1], 0(%[dst_ptr]) \n\t" "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "lbu %[scratch2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 1 */ "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 2 */ "extp %[Temp2], $ac3, 31 \n\t" "lbu %[scratch1], 2(%[dst_ptr]) \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbu %[scratch2], 3(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[store1], %[store1], %[scratch1] \n\t" /* pixel 3 */ "addqh_r.w %[store2], %[store2], %[scratch2] \n\t" /* pixel 4 */ "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } void vpx_convolve8_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_avg_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { uint32_t pos = 38; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); prefetch_store(dst); switch (w) { case 4: case 8: case 16: case 32: convolve_avg_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h); break; case 64: prefetch_store(dst + 32); convolve_avg_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); break; default: vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } void vpx_convolve8_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { /* Fixed size intermediate buffer places limits on parameters. */ DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]); int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; assert(w <= 64); assert(h <= 64); assert(x_step_q4 == 16); assert(y_step_q4 == 16); if (intermediate_height < h) intermediate_height = h; vpx_convolve8_horiz(src - (src_stride * 3), src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, intermediate_height); vpx_convolve8_avg_vert(temp + 64 * 3, 64, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_convolve_avg_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { int x, y; uint32_t tp1, tp2, tn1, tp3, tp4, tn2; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: /* 1 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 0(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "sw %[tn1], 0(%[dst]) \n\t" /* store */ : [tn1] "=&r"(tn1), [tp1] "=&r"(tp1), [tp2] "=&r"(tp2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; case 8: /* 2 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 0(%[dst]) \n\t" "ulw %[tp3], 4(%[src]) \n\t" "ulw %[tp4], 4(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "sw %[tn1], 0(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 4(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; case 16: /* 4 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 0(%[dst]) \n\t" "ulw %[tp3], 4(%[src]) \n\t" "ulw %[tp4], 4(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 8(%[src]) \n\t" "ulw %[tp2], 8(%[dst]) \n\t" "sw %[tn1], 0(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 4(%[dst]) \n\t" /* store */ "ulw %[tp3], 12(%[src]) \n\t" "ulw %[tp4], 12(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "sw %[tn1], 8(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 12(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; case 32: /* 8 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 0(%[dst]) \n\t" "ulw %[tp3], 4(%[src]) \n\t" "ulw %[tp4], 4(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 8(%[src]) \n\t" "ulw %[tp2], 8(%[dst]) \n\t" "sw %[tn1], 0(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 4(%[dst]) \n\t" /* store */ "ulw %[tp3], 12(%[src]) \n\t" "ulw %[tp4], 12(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 16(%[src]) \n\t" "ulw %[tp2], 16(%[dst]) \n\t" "sw %[tn1], 8(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 12(%[dst]) \n\t" /* store */ "ulw %[tp3], 20(%[src]) \n\t" "ulw %[tp4], 20(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 24(%[src]) \n\t" "ulw %[tp2], 24(%[dst]) \n\t" "sw %[tn1], 16(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 20(%[dst]) \n\t" /* store */ "ulw %[tp3], 28(%[src]) \n\t" "ulw %[tp4], 28(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "sw %[tn1], 24(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 28(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; case 64: prefetch_load(src + 64); prefetch_store(dst + 32); /* 16 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_load(src + src_stride + 64); prefetch_store(dst + dst_stride); prefetch_store(dst + dst_stride + 32); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 0(%[dst]) \n\t" "ulw %[tp3], 4(%[src]) \n\t" "ulw %[tp4], 4(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 8(%[src]) \n\t" "ulw %[tp2], 8(%[dst]) \n\t" "sw %[tn1], 0(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 4(%[dst]) \n\t" /* store */ "ulw %[tp3], 12(%[src]) \n\t" "ulw %[tp4], 12(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 16(%[src]) \n\t" "ulw %[tp2], 16(%[dst]) \n\t" "sw %[tn1], 8(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 12(%[dst]) \n\t" /* store */ "ulw %[tp3], 20(%[src]) \n\t" "ulw %[tp4], 20(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 24(%[src]) \n\t" "ulw %[tp2], 24(%[dst]) \n\t" "sw %[tn1], 16(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 20(%[dst]) \n\t" /* store */ "ulw %[tp3], 28(%[src]) \n\t" "ulw %[tp4], 28(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 32(%[src]) \n\t" "ulw %[tp2], 32(%[dst]) \n\t" "sw %[tn1], 24(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 28(%[dst]) \n\t" /* store */ "ulw %[tp3], 36(%[src]) \n\t" "ulw %[tp4], 36(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 40(%[src]) \n\t" "ulw %[tp2], 40(%[dst]) \n\t" "sw %[tn1], 32(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 36(%[dst]) \n\t" /* store */ "ulw %[tp3], 44(%[src]) \n\t" "ulw %[tp4], 44(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 48(%[src]) \n\t" "ulw %[tp2], 48(%[dst]) \n\t" "sw %[tn1], 40(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 44(%[dst]) \n\t" /* store */ "ulw %[tp3], 52(%[src]) \n\t" "ulw %[tp4], 52(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "ulw %[tp1], 56(%[src]) \n\t" "ulw %[tp2], 56(%[dst]) \n\t" "sw %[tn1], 48(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 52(%[dst]) \n\t" /* store */ "ulw %[tp3], 60(%[src]) \n\t" "ulw %[tp4], 60(%[dst]) \n\t" "adduh_r.qb %[tn1], %[tp2], %[tp1] \n\t" /* average */ "sw %[tn1], 56(%[dst]) \n\t" /* store */ "adduh_r.qb %[tn2], %[tp3], %[tp4] \n\t" /* average */ "sw %[tn2], 60(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; default: for (y = h; y > 0; --y) { for (x = 0; x < w; ++x) { dst[x] = (dst[x] + src[x] + 1) >> 1; } src += src_stride; dst += dst_stride; } break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve8_avg_horiz_dspr2.c000066400000000000000000001710501357355204000227420ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_avg_horiz_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3, Temp4; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2, p3, p4; uint32_t n1, n2, n3, n4; uint32_t tn1, tn2; vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "ulw %[tn2], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn2] \n\t" "balign %[tn1], %[tn2], 3 \n\t" "balign %[tn2], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbu %[p2], 3(%[dst]) \n\t" /* load odd 2 */ /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" /* even 1 */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "lbu %[Temp1], 1(%[dst]) \n\t" /* load odd 1 */ "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "preceu.ph.qbl %[n4], %[tn2] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tn2], 0(%[dst]) \n\t" /* load even 1 */ /* odd 2. pixel */ "lbux %[tp2], %[Temp3](%[cm]) \n\t" /* even 2 */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[n1], %[tn1] \n\t" "lbux %[tn1], %[Temp2](%[cm]) \n\t" /* odd 1 */ "addqh_r.w %[tn2], %[tn2], %[tp1] \n\t" /* average even 1 */ "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector4b] \n\t" "extp %[Temp4], $ac2, 31 \n\t" "lbu %[tp1], 2(%[dst]) \n\t" /* load even 2 */ "sb %[tn2], 0(%[dst]) \n\t" /* store even 1 */ /* clamp */ "addqh_r.w %[Temp1], %[Temp1], %[tn1] \n\t" /* average odd 1 */ "lbux %[n2], %[Temp4](%[cm]) \n\t" /* odd 2 */ "sb %[Temp1], 1(%[dst]) \n\t" /* store odd 1 */ "addqh_r.w %[tp1], %[tp1], %[tp2] \n\t" /* average even 2 */ "sb %[tp1], 2(%[dst]) \n\t" /* store even 2 */ "addqh_r.w %[p2], %[p2], %[n2] \n\t" /* average odd 2 */ "sb %[p2], 3(%[dst]) \n\t" /* store odd 2 */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_avg_horiz_8_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2; uint32_t p1, p2, p3, p4, n1; uint32_t tn1, tn2, tn3; uint32_t st0, st1; vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "ulw %[tn2], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" "lbu %[Temp2], 0(%[dst]) \n\t" "lbu %[tn3], 2(%[dst]) \n\t" /* even 2. pixel */ "preceu.ph.qbr %[p1], %[tn2] \n\t" "preceu.ph.qbl %[n1], %[tn2] \n\t" "ulw %[tn1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[st0], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[tn1] \n\t" "lbux %[st1], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" "extp %[Temp1], $ac1, 31 \n\t" "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" "addqh_r.w %[tn3], %[tn3], %[st1] \n\t" "sb %[Temp2], 0(%[dst]) \n\t" "sb %[tn3], 2(%[dst]) \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "balign %[tn3], %[tn1], 3 \n\t" "balign %[tn1], %[tn2], 3 \n\t" "balign %[tn2], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "lbux %[st0], %[Temp1](%[cm]) \n\t" "lbu %[Temp2], 4(%[dst]) \n\t" "addqh_r.w %[Temp2], %[Temp2], %[st0] \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sb %[Temp2], 4(%[dst]) \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tn2] \n\t" "preceu.ph.qbl %[p4], %[tn2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tp1], 6(%[dst]) \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn1] \n\t" "preceu.ph.qbl %[n1], %[tn1] \n\t" "lbux %[st0], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac1, 31 \n\t" "lbu %[tp2], 1(%[dst]) \n\t" "lbu %[tn2], 3(%[dst]) \n\t" "addqh_r.w %[tp1], %[tp1], %[st0] \n\t" /* odd 3. pixel */ "lbux %[st1], %[Temp2](%[cm]) \n\t" "preceu.ph.qbr %[p2], %[tn3] \n\t" "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" "addqh_r.w %[tp2], %[tp2], %[st1] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "lbu %[tn3], 5(%[dst]) \n\t" /* odd 4. pixel */ "sb %[tp2], 1(%[dst]) \n\t" "sb %[tp1], 6(%[dst]) \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbu %[tn1], 7(%[dst]) \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "addqh_r.w %[tn2], %[tn2], %[p4] \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "addqh_r.w %[tn3], %[tn3], %[p2] \n\t" "lbux %[n1], %[Temp1](%[cm]) \n\t" "addqh_r.w %[tn1], %[tn1], %[n1] \n\t" /* store bytes */ "sb %[tn2], 3(%[dst]) \n\t" "sb %[tn3], 5(%[dst]) \n\t" "sb %[tn1], 7(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [tn3] "=&r"(tn3), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_avg_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ "ulw %[qload2], 16(%[src]) \n\t" "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ "ulw %[qload3], 20(%[src]) \n\t" "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ "ulw %[qload2], 17(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ "ulw %[qload3], 21(%[src]) \n\t" "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } static void convolve_avg_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); prefetch_store(dst_ptr + dst_stride); prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ "lbu %[st2], 0(%[dst]) \n\t" /* load even 1 from dst */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ "lbu %[qload3], 2(%[dst]) \n\t" /* load even 2 from dst */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st2], 0(%[dst]) \n\t" /* store even 1 to dst */ "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload3], %[qload3], %[st2] \n\t" /* average even 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 2(%[dst]) \n\t" /* store even 2 to dst */ "ulw %[qload2], 16(%[src]) \n\t" "lbu %[qload3], 4(%[dst]) \n\t" /* load even 3 from dst */ "lbu %[qload1], 6(%[dst]) \n\t" /* load even 4 from dst */ "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 3 */ "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[qload3], 4(%[dst]) \n\t" /* store even 3 to dst */ "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average even 4 */ "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[qload1], 6(%[dst]) \n\t" /* store even 4 to dst */ "ulw %[qload3], 20(%[src]) \n\t" "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ "lbu %[qload2], 8(%[dst]) \n\t" /* load even 5 from dst */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 5 */ "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[qload2], 8(%[dst]) \n\t" /* store even 5 to dst */ "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ "lbu %[qload3], 10(%[dst]) \n\t" /* load even 6 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ "lbu %[st2], 12(%[dst]) \n\t" /* load even 7 from dst */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average even 6 */ "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ "sb %[qload3], 10(%[dst]) \n\t" /* store even 6 to dst */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" "addqh_r.w %[st2], %[st2], %[st1] \n\t" /* average even 7 */ /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st2], 12(%[dst]) \n\t" /* store even 7 to dst */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ "lbu %[qload2], 14(%[dst]) \n\t" /* load even 8 from dst */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ "lbu %[st1], 1(%[dst]) \n\t" /* load odd 1 from dst */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average even 8 */ "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[qload2], 14(%[dst]) \n\t" /* store even 8 to dst */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ "lbu %[qload3], 3(%[dst]) \n\t" /* load odd 2 from dst */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st3], %[st3], %[st1] \n\t" /* average odd 1 */ "preceu.ph.qbr %[p2], %[qload1] \n\t" "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ "sb %[st3], 1(%[dst]) \n\t" /* store odd 1 to dst */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload3], %[qload3], %[st1] \n\t" /* average odd 2 */ "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[qload3], 3(%[dst]) \n\t" /* store odd 2 to dst */ "lbu %[qload1], 5(%[dst]) \n\t" /* load odd 3 from dst */ "ulw %[qload2], 17(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ "lbu %[st1], 7(%[dst]) \n\t" /* load odd 4 from dst */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "addqh_r.w %[qload1], %[qload1], %[st2] \n\t" /* average odd 3 */ "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[qload1], 5(%[dst]) \n\t" /* store odd 3 to dst */ "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ "lbu %[qload1], 9(%[dst]) \n\t" /* load odd 5 from dst */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "addqh_r.w %[st1], %[st1], %[st3] \n\t" /* average odd 4 */ "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st1], 7(%[dst]) \n\t" /* store odd 4 to dst */ "ulw %[qload3], 21(%[src]) \n\t" "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 5 */ "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[qload1], 9(%[dst]) \n\t" /* store odd 5 to dst */ "lbu %[qload2], 11(%[dst]) \n\t" /* load odd 6 from dst */ "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ "lbu %[qload3], 13(%[dst]) \n\t" /* load odd 7 from dst */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbu %[qload1], 15(%[dst]) \n\t" /* load odd 8 from dst */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "addqh_r.w %[qload2], %[qload2], %[st2] \n\t" /* average odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "addqh_r.w %[qload3], %[qload3], %[st3] \n\t" /* average odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "addqh_r.w %[qload1], %[qload1], %[st1] \n\t" /* average odd 8 */ "sb %[qload2], 11(%[dst]) \n\t" /* store odd 6 to dst */ "sb %[qload3], 13(%[dst]) \n\t" /* store odd 7 to dst */ "sb %[qload1], 15(%[dst]) \n\t" /* store odd 8 to dst */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [qload3] "=&r"(qload3), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } void vpx_convolve8_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_avg_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { uint32_t pos = 38; src -= 3; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: convolve_avg_horiz_4_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; case 8: convolve_avg_horiz_8_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; case 16: convolve_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, h, 1); break; case 32: convolve_avg_horiz_16_dspr2(src, src_stride, dst, dst_stride, filter_x, h, 2); break; case 64: prefetch_load(src + 64); prefetch_store(dst + 32); convolve_avg_horiz_64_dspr2(src, src_stride, dst, dst_stride, filter_x, h); break; default: vpx_convolve8_avg_horiz_c(src + 3, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve8_dspr2.c000066400000000000000000002214571357355204000207010ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_horiz_4_transposed_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint8_t *dst_ptr; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3, Temp4; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2, p3, p4; uint32_t tn1, tn2; vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { dst_ptr = dst; /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "ulw %[tn2], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn2] \n\t" "balign %[tn1], %[tn2], 3 \n\t" "balign %[tn2], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tn2] \n\t" "preceu.ph.qbl %[p4], %[tn2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp2], %[Temp3](%[cm]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn1] \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp4], $ac2, 31 \n\t" /* clamp */ "lbux %[tn1], %[Temp2](%[cm]) \n\t" "lbux %[p2], %[Temp4](%[cm]) \n\t" /* store bytes */ "sb %[tp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[tn1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[tp2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" "sb %[p2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_stride] \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4), [dst_ptr] "+r"(dst_ptr) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [src] "r"(src), [dst_stride] "r"(dst_stride)); /* Next row... */ src += src_stride; dst += 1; } } static void convolve_horiz_8_transposed_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint8_t *dst_ptr; uint32_t vector4a = 64; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2, tp3; uint32_t p1, p2, p3, p4, n1; uint8_t *odd_dst; uint32_t dst_pitch_2 = (dst_stride << 1); vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); dst_ptr = dst; odd_dst = (dst_ptr + dst_stride); __asm__ __volatile__( "ulw %[tp2], 0(%[src]) \n\t" "ulw %[tp1], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tp1] \n\t" "preceu.ph.qbl %[p4], %[tp1] \n\t" "ulw %[tp3], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "preceu.ph.qbr %[p1], %[tp3] \n\t" "preceu.ph.qbl %[n1], %[tp3] \n\t" "ulw %[tp2], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[Temp2], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[tp2] \n\t" "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" "lbux %[tp3], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" "extp %[p3], $ac1, 31 \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sb %[Temp2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "sb %[tp3], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "ulw %[tp1], 1(%[src]) \n\t" "ulw %[tp3], 5(%[src]) \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbux %[tp2], %[p3](%[cm]) \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp3] \n\t" "preceu.ph.qbl %[p4], %[tp3] \n\t" "sb %[tp2], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "ulw %[tp2], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp1], %[Temp3](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[n1], %[tp2] \n\t" "ulw %[Temp1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" "sb %[tp1], 0(%[dst_ptr]) \n\t" "addu %[dst_ptr], %[dst_ptr], %[dst_pitch_2] \n\t" "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac1, 31 \n\t" /* odd 3. pixel */ "lbux %[tp3], %[Temp2](%[cm]) \n\t" "preceu.ph.qbr %[p2], %[Temp1] \n\t" "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 4. pixel */ "sb %[tp3], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac2, 31 \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "lbux %[n1], %[Temp1](%[cm]) \n\t" /* store bytes */ "sb %[p4], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "sb %[p2], 0(%[odd_dst]) \n\t" "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] \n\t" "sb %[n1], 0(%[odd_dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst_ptr] "+r"(dst_ptr), [odd_dst] "+r"(odd_dst) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); /* Next row... */ src += src_stride; dst += 1; } } static void convolve_horiz_16_transposed_dspr2( const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t c, y; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; uint32_t dst_pitch_2 = (dst_stride << 1); uint8_t *odd_dst; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); src = src_ptr; dst = dst_ptr; odd_dst = (dst + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) " "\n\t" "ulw %[qload2], 4(%[src]) " "\n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 1 */ "mthi $zero, $ac1 " "\n\t" "mtlo %[vector_64], $ac2 " "\n\t" /* even 2 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "ulw %[qload2], 8(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter12] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] " "\n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 3 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "ulw %[qload1], 12(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p2], %[filter12] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] " "\n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 4 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 1 */ "addu %[dst], %[dst], %[dst_pitch_2] " " \n\t" "dpa.w.ph $ac3, %[p3], %[filter12] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] " "\n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 5 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 2 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 16(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p4], %[filter12] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] " "\n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 6 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p4], %[qload2] " "\n\t" "sb %[st3], 0(%[dst]) " "\n\t" /* even 3 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter12] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] " "\n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 7 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p1], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 4 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 20(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p5], %[filter12] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] " "\n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 8 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 5 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter12] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] " "\n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 1 */ "mthi $zero, $ac3 " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter12] " "\n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] " "\n\t" /* even 8 */ "sb %[st3], 0(%[dst]) " "\n\t" /* even 6 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter56] " "\n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] " "\n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) " "\n\t" "ulw %[qload2], 5(%[src]) " "\n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 2 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 7 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 9(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p1], %[filter12] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p3], %[filter56] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] " "\n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 3 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter12] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] " "\n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 4 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 1 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter12] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] " "\n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 5 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 2 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 17(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p4], %[filter12] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] " "\n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 6 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p4], %[qload2] " "\n\t" "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 3 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter12] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] " "\n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 7 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbl %[p1], %[qload2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 4 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 21(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p5], %[filter12] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] " "\n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 8 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 5 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p2], %[filter12] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] " "\n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] " "\n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 8 */ "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 6 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 7 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst] "+r"(dst), [odd_dst] "+r"(odd_dst) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); src += 16; dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); odd_dst = (dst + dst_stride); } /* Next row... */ src_ptr += src_stride; dst_ptr += 1; } } static void convolve_horiz_64_transposed_dspr2( const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t c, y; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; uint32_t dst_pitch_2 = (dst_stride << 1); uint8_t *odd_dst; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); src = src_ptr; dst = dst_ptr; odd_dst = (dst + dst_stride); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) " "\n\t" "ulw %[qload2], 4(%[src]) " "\n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 1 */ "mthi $zero, $ac1 " "\n\t" "mtlo %[vector_64], $ac2 " "\n\t" /* even 2 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "ulw %[qload2], 8(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter12] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] " "\n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] " "\n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 3 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "ulw %[qload1], 12(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p2], %[filter12] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] " "\n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] " "\n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 4 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 1 */ "addu %[dst], %[dst], %[dst_pitch_2] " " \n\t" "dpa.w.ph $ac3, %[p3], %[filter12] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] " "\n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] " "\n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 5 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 2 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 16(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p4], %[filter12] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] " "\n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] " "\n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* even 6 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p4], %[qload2] " "\n\t" "sb %[st3], 0(%[dst]) " "\n\t" /* even 3 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter12] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] " "\n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] " "\n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* even 7 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p1], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 4 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 20(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p5], %[filter12] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] " "\n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] " "\n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* even 8 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 5 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter12] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] " "\n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] " "\n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 1 */ "mthi $zero, $ac3 " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter12] " "\n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] " "\n\t" /* even 8 */ "sb %[st3], 0(%[dst]) " "\n\t" /* even 6 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p1], %[filter56] " "\n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] " "\n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) " "\n\t" "ulw %[qload2], 5(%[src]) " "\n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 2 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p1], %[qload1] " "\n\t" "preceu.ph.qbl %[p2], %[qload1] " "\n\t" "preceu.ph.qbr %[p3], %[qload2] " "\n\t" "preceu.ph.qbl %[p4], %[qload2] " "\n\t" "sb %[st1], 0(%[dst]) " "\n\t" /* even 7 */ "addu %[dst], %[dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 9(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p1], %[filter12] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p3], %[filter56] " "\n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] " "\n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 3 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p1], %[qload2] " "\n\t" "preceu.ph.qbl %[p5], %[qload2] " "\n\t" "sb %[st2], 0(%[dst]) " "\n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) " "\n\t" "dpa.w.ph $ac1, %[p2], %[filter12] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] " "\n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] " "\n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 4 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbr %[p2], %[qload1] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 1 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac2, %[p3], %[filter12] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] " "\n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] " "\n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 5 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbl %[p3], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 2 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload2], 17(%[src]) " "\n\t" "dpa.w.ph $ac3, %[p4], %[filter12] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] " "\n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] " "\n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 " "\n\t" /* odd 6 */ "mthi $zero, $ac2 " "\n\t" "preceu.ph.qbr %[p4], %[qload2] " "\n\t" "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 3 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac1, %[p1], %[filter12] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] " "\n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] " "\n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 " "\n\t" /* odd 7 */ "mthi $zero, $ac3 " "\n\t" "preceu.ph.qbl %[p1], %[qload2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 4 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "ulw %[qload1], 21(%[src]) " "\n\t" "dpa.w.ph $ac2, %[p5], %[filter12] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] " "\n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] " "\n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 " "\n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 " "\n\t" /* odd 8 */ "mthi $zero, $ac1 " "\n\t" "preceu.ph.qbr %[p5], %[qload1] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 5 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "dpa.w.ph $ac3, %[p2], %[filter12] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] " "\n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] " "\n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 " "\n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] " "\n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] " "\n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 " "\n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) " "\n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) " "\n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) " "\n\t" /* odd 8 */ "sb %[st2], 0(%[odd_dst]) " "\n\t" /* odd 6 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st3], 0(%[odd_dst]) " "\n\t" /* odd 7 */ "addu %[odd_dst], %[odd_dst], %[dst_pitch_2] " "\n\t" "sb %[st1], 0(%[odd_dst]) " "\n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [p5] "=&r"(p5), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dst] "+r"(dst), [odd_dst] "+r"(odd_dst) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [src] "r"(src), [dst_pitch_2] "r"(dst_pitch_2)); src += 16; dst = (dst_ptr + ((c + 1) * 16 * dst_stride)); odd_dst = (dst + dst_stride); } /* Next row... */ src_ptr += src_stride; dst_ptr += 1; } } void convolve_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter, int w, int h) { int x, y, k; for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) { int sum = 0; for (k = 0; k < 8; ++k) sum += src[x + k] * filter[k]; dst[x * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); } src += src_stride; dst += 1; } } void copy_horiz_transposed(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int w, int h) { int x, y; for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) { dst[x * dst_stride] = src[x]; } src += src_stride; dst += 1; } } void vpx_convolve8_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; const int16_t *const filter_y = filter[y0_q4]; DECLARE_ALIGNED(32, uint8_t, temp[64 * 135]); int32_t intermediate_height = ((h * y_step_q4) >> 4) + 7; uint32_t pos = 38; assert(x_step_q4 == 16); assert(y_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); assert(((const int32_t *)filter_y)[1] != 0x800000); (void)x_step_q4; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); if (intermediate_height < h) intermediate_height = h; /* copy the src to dst */ if (filter_x[3] == 0x80) { copy_horiz_transposed(src - src_stride * 3, src_stride, temp, intermediate_height, w, intermediate_height); } else if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_dspr2(src - src_stride * 3, src_stride, temp, intermediate_height, filter_x, w, intermediate_height); } else { src -= (src_stride * 3 + 3); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); switch (w) { case 4: convolve_horiz_4_transposed_dspr2(src, src_stride, temp, intermediate_height, filter_x, intermediate_height); break; case 8: convolve_horiz_8_transposed_dspr2(src, src_stride, temp, intermediate_height, filter_x, intermediate_height); break; case 16: case 32: convolve_horiz_16_transposed_dspr2(src, src_stride, temp, intermediate_height, filter_x, intermediate_height, (w / 16)); break; case 64: prefetch_load(src + 32); convolve_horiz_64_transposed_dspr2(src, src_stride, temp, intermediate_height, filter_x, intermediate_height); break; default: convolve_horiz_transposed(src, src_stride, temp, intermediate_height, filter_x, w, intermediate_height); break; } } /* copy the src to dst */ if (filter_y[3] == 0x80) { copy_horiz_transposed(temp + 3, intermediate_height, dst, dst_stride, h, w); } else if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_dspr2(temp + 3, intermediate_height, dst, dst_stride, filter_y, h, w); } else { switch (h) { case 4: convolve_horiz_4_transposed_dspr2(temp, intermediate_height, dst, dst_stride, filter_y, w); break; case 8: convolve_horiz_8_transposed_dspr2(temp, intermediate_height, dst, dst_stride, filter_y, w); break; case 16: case 32: convolve_horiz_16_transposed_dspr2(temp, intermediate_height, dst, dst_stride, filter_y, w, (h / 16)); break; case 64: convolve_horiz_64_transposed_dspr2(temp, intermediate_height, dst, dst_stride, filter_y, w); break; default: convolve_horiz_transposed(temp, intermediate_height, dst, dst_stride, filter_y, h, w); break; } } } void vpx_convolve_copy_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { int x, y; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: { uint32_t tp1; /* 1 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], (%[src]) \n\t" "sw %[tp1], (%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; } case 8: { uint32_t tp1, tp2; /* 2 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" "sw %[tp1], 0(%[dst]) \n\t" /* store */ "sw %[tp2], 4(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; } case 16: { uint32_t tp1, tp2, tp3, tp4; /* 4 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" "ulw %[tp3], 8(%[src]) \n\t" "ulw %[tp4], 12(%[src]) \n\t" "sw %[tp1], 0(%[dst]) \n\t" /* store */ "sw %[tp2], 4(%[dst]) \n\t" /* store */ "sw %[tp3], 8(%[dst]) \n\t" /* store */ "sw %[tp4], 12(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; } case 32: { uint32_t tp1, tp2, tp3, tp4; uint32_t tp5, tp6, tp7, tp8; /* 8 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" "ulw %[tp3], 8(%[src]) \n\t" "ulw %[tp4], 12(%[src]) \n\t" "ulw %[tp5], 16(%[src]) \n\t" "ulw %[tp6], 20(%[src]) \n\t" "ulw %[tp7], 24(%[src]) \n\t" "ulw %[tp8], 28(%[src]) \n\t" "sw %[tp1], 0(%[dst]) \n\t" /* store */ "sw %[tp2], 4(%[dst]) \n\t" /* store */ "sw %[tp3], 8(%[dst]) \n\t" /* store */ "sw %[tp4], 12(%[dst]) \n\t" /* store */ "sw %[tp5], 16(%[dst]) \n\t" /* store */ "sw %[tp6], 20(%[dst]) \n\t" /* store */ "sw %[tp7], 24(%[dst]) \n\t" /* store */ "sw %[tp8], 28(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6), [tp7] "=&r"(tp7), [tp8] "=&r"(tp8) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; } case 64: { uint32_t tp1, tp2, tp3, tp4; uint32_t tp5, tp6, tp7, tp8; prefetch_load(src + 64); prefetch_store(dst + 32); /* 16 word storage */ for (y = h; y--;) { prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_load(src + src_stride + 64); prefetch_store(dst + dst_stride); prefetch_store(dst + dst_stride + 32); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" "ulw %[tp3], 8(%[src]) \n\t" "ulw %[tp4], 12(%[src]) \n\t" "ulw %[tp5], 16(%[src]) \n\t" "ulw %[tp6], 20(%[src]) \n\t" "ulw %[tp7], 24(%[src]) \n\t" "ulw %[tp8], 28(%[src]) \n\t" "sw %[tp1], 0(%[dst]) \n\t" /* store */ "sw %[tp2], 4(%[dst]) \n\t" /* store */ "sw %[tp3], 8(%[dst]) \n\t" /* store */ "sw %[tp4], 12(%[dst]) \n\t" /* store */ "sw %[tp5], 16(%[dst]) \n\t" /* store */ "sw %[tp6], 20(%[dst]) \n\t" /* store */ "sw %[tp7], 24(%[dst]) \n\t" /* store */ "sw %[tp8], 28(%[dst]) \n\t" /* store */ "ulw %[tp1], 32(%[src]) \n\t" "ulw %[tp2], 36(%[src]) \n\t" "ulw %[tp3], 40(%[src]) \n\t" "ulw %[tp4], 44(%[src]) \n\t" "ulw %[tp5], 48(%[src]) \n\t" "ulw %[tp6], 52(%[src]) \n\t" "ulw %[tp7], 56(%[src]) \n\t" "ulw %[tp8], 60(%[src]) \n\t" "sw %[tp1], 32(%[dst]) \n\t" /* store */ "sw %[tp2], 36(%[dst]) \n\t" /* store */ "sw %[tp3], 40(%[dst]) \n\t" /* store */ "sw %[tp4], 44(%[dst]) \n\t" /* store */ "sw %[tp5], 48(%[dst]) \n\t" /* store */ "sw %[tp6], 52(%[dst]) \n\t" /* store */ "sw %[tp7], 56(%[dst]) \n\t" /* store */ "sw %[tp8], 60(%[dst]) \n\t" /* store */ : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tp3] "=&r"(tp3), [tp4] "=&r"(tp4), [tp5] "=&r"(tp5), [tp6] "=&r"(tp6), [tp7] "=&r"(tp7), [tp8] "=&r"(tp8) : [src] "r"(src), [dst] "r"(dst)); src += src_stride; dst += dst_stride; } break; } default: for (y = h; y--;) { for (x = 0; x < w; ++x) { dst[x] = src[x]; } src += src_stride; dst += dst_stride; } break; } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve8_horiz_dspr2.c000066400000000000000000001463621357355204000221150ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_horiz_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3, Temp4; uint32_t vector4a = 64; uint32_t tp1, tp2; uint32_t p1, p2, p3, p4; uint32_t n1, n2, n3, n4; uint32_t tn1, tn2; vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "ulw %[tn2], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn2] \n\t" "balign %[tn1], %[tn2], 3 \n\t" "balign %[tn2], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* odd 1. pixel */ "lbux %[tp1], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[n1], %[tp2] \n\t" "preceu.ph.qbl %[n2], %[tp2] \n\t" "preceu.ph.qbr %[n3], %[tn2] \n\t" "preceu.ph.qbl %[n4], %[tn2] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "lbux %[tp2], %[Temp3](%[cm]) \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[n1], %[tn1] \n\t" "dpa.w.ph $ac2, %[n2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[n3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector4b] \n\t" "extp %[Temp4], $ac2, 31 \n\t" /* clamp */ "lbux %[tn1], %[Temp2](%[cm]) \n\t" "lbux %[n2], %[Temp4](%[cm]) \n\t" /* store bytes */ "sb %[tp1], 0(%[dst]) \n\t" "sb %[tn1], 1(%[dst]) \n\t" "sb %[tp2], 2(%[dst]) \n\t" "sb %[n2], 3(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [n2] "=&r"(n2), [n3] "=&r"(n3), [n4] "=&r"(n4), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_horiz_8_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2, Temp3; uint32_t tp1, tp2; uint32_t p1, p2, p3, p4, n1; uint32_t tn1, tn2, tn3; uint32_t st0, st1; vector1b = ((const int32_t *)filter_x0)[0]; vector2b = ((const int32_t *)filter_x0)[1]; vector3b = ((const int32_t *)filter_x0)[2]; vector4b = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_load(src + src_stride); prefetch_load(src + src_stride + 32); prefetch_store(dst + dst_stride); __asm__ __volatile__( "ulw %[tp1], 0(%[src]) \n\t" "ulw %[tp2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tp1] \n\t" "preceu.ph.qbl %[p2], %[tp1] \n\t" "preceu.ph.qbr %[p3], %[tp2] \n\t" "preceu.ph.qbl %[p4], %[tp2] \n\t" "ulw %[tn2], 8(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp1], $ac3, 31 \n\t" /* even 2. pixel */ "preceu.ph.qbr %[p1], %[tn2] \n\t" "preceu.ph.qbl %[n1], %[tn2] \n\t" "ulw %[tn1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac2, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" /* even 3. pixel */ "lbux %[st0], %[Temp1](%[cm]) \n\t" "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[tn1] \n\t" "dpa.w.ph $ac1, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector4b] \n\t" "extp %[Temp1], $ac1, 31 \n\t" /* even 4. pixel */ "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sb %[st0], 0(%[dst]) \n\t" "lbux %[st1], %[Temp3](%[cm]) \n\t" "balign %[tn3], %[tn1], 3 \n\t" "balign %[tn1], %[tn2], 3 \n\t" "balign %[tn2], %[tp2], 3 \n\t" "balign %[tp2], %[tp1], 3 \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp3], $ac2, 31 \n\t" "lbux %[st0], %[Temp1](%[cm]) \n\t" /* odd 1. pixel */ "mtlo %[vector4a], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sb %[st1], 2(%[dst]) \n\t" "preceu.ph.qbr %[p1], %[tp2] \n\t" "preceu.ph.qbl %[p2], %[tp2] \n\t" "preceu.ph.qbr %[p3], %[tn2] \n\t" "preceu.ph.qbl %[p4], %[tn2] \n\t" "sb %[st0], 4(%[dst]) \n\t" "dpa.w.ph $ac3, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p3], %[vector3b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 2. pixel */ "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[tn1] \n\t" "preceu.ph.qbl %[n1], %[tn1] \n\t" "lbux %[st0], %[Temp3](%[cm]) \n\t" "dpa.w.ph $ac1, %[p2], %[vector1b] \n\t" "dpa.w.ph $ac1, %[p3], %[vector2b] \n\t" "dpa.w.ph $ac1, %[p4], %[vector3b] \n\t" "dpa.w.ph $ac1, %[p1], %[vector4b] \n\t" "extp %[Temp3], $ac1, 31 \n\t" /* odd 3. pixel */ "lbux %[st1], %[Temp2](%[cm]) \n\t" "preceu.ph.qbr %[p2], %[tn3] \n\t" "dpa.w.ph $ac3, %[p3], %[vector1b] \n\t" "dpa.w.ph $ac3, %[p4], %[vector2b] \n\t" "dpa.w.ph $ac3, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" /* odd 4. pixel */ "sb %[st1], 1(%[dst]) \n\t" "sb %[st0], 6(%[dst]) \n\t" "dpa.w.ph $ac2, %[p4], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p1], %[vector2b] \n\t" "dpa.w.ph $ac2, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac2, 31 \n\t" /* clamp */ "lbux %[p4], %[Temp3](%[cm]) \n\t" "lbux %[p2], %[Temp2](%[cm]) \n\t" "lbux %[n1], %[Temp1](%[cm]) \n\t" /* store bytes */ "sb %[p4], 3(%[dst]) \n\t" "sb %[p2], 5(%[dst]) \n\t" "sb %[n1], 7(%[dst]) \n\t" : [tp1] "=&r"(tp1), [tp2] "=&r"(tp2), [tn1] "=&r"(tn1), [tn2] "=&r"(tn2), [tn3] "=&r"(tn3), [st0] "=&r"(st0), [st1] "=&r"(st1), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [n1] "=&r"(n1), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_horiz_16_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h, int32_t count) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_store(dst_ptr + dst_stride); for (c = 0; c < count; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ "ulw %[qload2], 16(%[src]) \n\t" "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ "ulw %[qload3], 20(%[src]) \n\t" "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ "ulw %[qload2], 17(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ "ulw %[qload3], 21(%[src]) \n\t" "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } static void convolve_horiz_64_dspr2(const uint8_t *src_ptr, int32_t src_stride, uint8_t *dst_ptr, int32_t dst_stride, const int16_t *filter_x0, int32_t h) { int32_t y, c; const uint8_t *src; uint8_t *dst; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector_64 = 64; int32_t filter12, filter34, filter56, filter78; int32_t Temp1, Temp2, Temp3; uint32_t qload1, qload2, qload3; uint32_t p1, p2, p3, p4, p5; uint32_t st1, st2, st3; filter12 = ((const int32_t *)filter_x0)[0]; filter34 = ((const int32_t *)filter_x0)[1]; filter56 = ((const int32_t *)filter_x0)[2]; filter78 = ((const int32_t *)filter_x0)[3]; for (y = h; y--;) { src = src_ptr; dst = dst_ptr; /* prefetch data to cache memory */ prefetch_load(src_ptr + src_stride); prefetch_load(src_ptr + src_stride + 32); prefetch_load(src_ptr + src_stride + 64); prefetch_store(dst_ptr + dst_stride); prefetch_store(dst_ptr + dst_stride + 32); for (c = 0; c < 4; c++) { __asm__ __volatile__( "ulw %[qload1], 0(%[src]) \n\t" "ulw %[qload2], 4(%[src]) \n\t" /* even 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 1 */ "mthi $zero, $ac1 \n\t" "mtlo %[vector_64], $ac2 \n\t" /* even 2 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "ulw %[qload3], 8(%[src]) \n\t" "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p2], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p3], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac1, %[p4], %[filter78] \n\t" /* even 1 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 1 */ /* even 2. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 3 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "ulw %[qload1], 12(%[src]) \n\t" "dpa.w.ph $ac2, %[p2], %[filter12] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p3], %[filter34] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p4], %[filter56] \n\t" /* even 1 */ "dpa.w.ph $ac2, %[p1], %[filter78] \n\t" /* even 1 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 1 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 1 */ /* even 3. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 4 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st1], 0(%[dst]) \n\t" /* even 1 */ "dpa.w.ph $ac3, %[p3], %[filter12] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p4], %[filter34] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p1], %[filter56] \n\t" /* even 3 */ "dpa.w.ph $ac3, %[p5], %[filter78] \n\t" /* even 3 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 3 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 1 */ /* even 4. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 5 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st2], 2(%[dst]) \n\t" /* even 1 */ "ulw %[qload2], 16(%[src]) \n\t" "dpa.w.ph $ac1, %[p4], %[filter12] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p1], %[filter34] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p5], %[filter56] \n\t" /* even 4 */ "dpa.w.ph $ac1, %[p2], %[filter78] \n\t" /* even 4 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 4 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 3 */ /* even 5. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* even 6 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[st3], 4(%[dst]) \n\t" /* even 3 */ "dpa.w.ph $ac2, %[p1], %[filter12] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p5], %[filter34] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p2], %[filter56] \n\t" /* even 5 */ "dpa.w.ph $ac2, %[p3], %[filter78] \n\t" /* even 5 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 5 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 4 */ /* even 6. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* even 7 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st1], 6(%[dst]) \n\t" /* even 4 */ "ulw %[qload3], 20(%[src]) \n\t" "dpa.w.ph $ac3, %[p5], %[filter12] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* even 6 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* even 6 */ "extp %[Temp3], $ac3, 31 \n\t" /* even 6 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 5 */ /* even 7. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* even 8 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[st2], 8(%[dst]) \n\t" /* even 5 */ "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* even 7 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* even 7 */ "extp %[Temp1], $ac1, 31 \n\t" /* even 7 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* even 6 */ /* even 8. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 1 */ "mthi $zero, $ac3 \n\t" "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* even 8 */ "sb %[st3], 10(%[dst]) \n\t" /* even 6 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* even 8 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* even 8 */ "extp %[Temp2], $ac2, 31 \n\t" /* even 8 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* even 7 */ /* ODD pixels */ "ulw %[qload1], 1(%[src]) \n\t" "ulw %[qload2], 5(%[src]) \n\t" /* odd 1. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 2 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p1], %[qload1] \n\t" "preceu.ph.qbl %[p2], %[qload1] \n\t" "preceu.ph.qbr %[p3], %[qload2] \n\t" "preceu.ph.qbl %[p4], %[qload2] \n\t" "sb %[st1], 12(%[dst]) \n\t" /* even 7 */ "ulw %[qload3], 9(%[src]) \n\t" "dpa.w.ph $ac3, %[p1], %[filter12] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p2], %[filter34] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p3], %[filter56] \n\t" /* odd 1 */ "dpa.w.ph $ac3, %[p4], %[filter78] \n\t" /* odd 1 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 1 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* even 8 */ /* odd 2. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 3 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p1], %[qload3] \n\t" "preceu.ph.qbl %[p5], %[qload3] \n\t" "sb %[st2], 14(%[dst]) \n\t" /* even 8 */ "ulw %[qload1], 13(%[src]) \n\t" "dpa.w.ph $ac1, %[p2], %[filter12] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p3], %[filter34] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p4], %[filter56] \n\t" /* odd 2 */ "dpa.w.ph $ac1, %[p1], %[filter78] \n\t" /* odd 2 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 2 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 1 */ /* odd 3. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 4 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[p2], %[qload1] \n\t" "sb %[st3], 1(%[dst]) \n\t" /* odd 1 */ "dpa.w.ph $ac2, %[p3], %[filter12] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p4], %[filter34] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p1], %[filter56] \n\t" /* odd 3 */ "dpa.w.ph $ac2, %[p5], %[filter78] \n\t" /* odd 3 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 3 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 2 */ /* odd 4. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 5 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbl %[p3], %[qload1] \n\t" "sb %[st1], 3(%[dst]) \n\t" /* odd 2 */ "ulw %[qload2], 17(%[src]) \n\t" "dpa.w.ph $ac3, %[p4], %[filter12] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p1], %[filter34] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p5], %[filter56] \n\t" /* odd 4 */ "dpa.w.ph $ac3, %[p2], %[filter78] \n\t" /* odd 4 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 4 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 3 */ /* odd 5. pixel */ "mtlo %[vector_64], $ac2 \n\t" /* odd 6 */ "mthi $zero, $ac2 \n\t" "preceu.ph.qbr %[p4], %[qload2] \n\t" "sb %[st2], 5(%[dst]) \n\t" /* odd 3 */ "dpa.w.ph $ac1, %[p1], %[filter12] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p5], %[filter34] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p2], %[filter56] \n\t" /* odd 5 */ "dpa.w.ph $ac1, %[p3], %[filter78] \n\t" /* odd 5 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 5 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 4 */ /* odd 6. pixel */ "mtlo %[vector_64], $ac3 \n\t" /* odd 7 */ "mthi $zero, $ac3 \n\t" "preceu.ph.qbl %[p1], %[qload2] \n\t" "sb %[st3], 7(%[dst]) \n\t" /* odd 4 */ "ulw %[qload3], 21(%[src]) \n\t" "dpa.w.ph $ac2, %[p5], %[filter12] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p2], %[filter34] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p3], %[filter56] \n\t" /* odd 6 */ "dpa.w.ph $ac2, %[p4], %[filter78] \n\t" /* odd 6 */ "extp %[Temp2], $ac2, 31 \n\t" /* odd 6 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 5 */ /* odd 7. pixel */ "mtlo %[vector_64], $ac1 \n\t" /* odd 8 */ "mthi $zero, $ac1 \n\t" "preceu.ph.qbr %[p5], %[qload3] \n\t" "sb %[st1], 9(%[dst]) \n\t" /* odd 5 */ "dpa.w.ph $ac3, %[p2], %[filter12] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p3], %[filter34] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p4], %[filter56] \n\t" /* odd 7 */ "dpa.w.ph $ac3, %[p1], %[filter78] \n\t" /* odd 7 */ "extp %[Temp3], $ac3, 31 \n\t" /* odd 7 */ /* odd 8. pixel */ "dpa.w.ph $ac1, %[p3], %[filter12] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p4], %[filter34] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p1], %[filter56] \n\t" /* odd 8 */ "dpa.w.ph $ac1, %[p5], %[filter78] \n\t" /* odd 8 */ "extp %[Temp1], $ac1, 31 \n\t" /* odd 8 */ "lbux %[st2], %[Temp2](%[cm]) \n\t" /* odd 6 */ "lbux %[st3], %[Temp3](%[cm]) \n\t" /* odd 7 */ "lbux %[st1], %[Temp1](%[cm]) \n\t" /* odd 8 */ "sb %[st2], 11(%[dst]) \n\t" /* odd 6 */ "sb %[st3], 13(%[dst]) \n\t" /* odd 7 */ "sb %[st1], 15(%[dst]) \n\t" /* odd 8 */ : [qload1] "=&r"(qload1), [qload2] "=&r"(qload2), [qload3] "=&r"(qload3), [st1] "=&r"(st1), [st2] "=&r"(st2), [st3] "=&r"(st3), [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4), [p5] "=&r"(p5), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3) : [filter12] "r"(filter12), [filter34] "r"(filter34), [filter56] "r"(filter56), [filter78] "r"(filter78), [vector_64] "r"(vector_64), [cm] "r"(cm), [dst] "r"(dst), [src] "r"(src)); src += 16; dst += 16; } /* Next row... */ src_ptr += src_stride; dst_ptr += dst_stride; } } void vpx_convolve8_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); if (vpx_get_filter_taps(filter_x) == 2) { vpx_convolve2_horiz_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { uint32_t pos = 38; prefetch_load((const uint8_t *)filter_x); src -= 3; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); /* prefetch data to cache memory */ prefetch_load(src); prefetch_load(src + 32); prefetch_store(dst); switch (w) { case 4: convolve_horiz_4_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; case 8: convolve_horiz_8_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; case 16: convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h, 1); break; case 32: convolve_horiz_16_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h, 2); break; case 64: prefetch_load(src + 64); prefetch_store(dst + 32); convolve_horiz_64_dspr2(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filter_x, (int32_t)h); break; default: vpx_convolve8_horiz_c(src + 3, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve8_vert_dspr2.c000066400000000000000000000454731357355204000217430ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/convolve_common_dspr2.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" #if HAVE_DSPR2 static void convolve_vert_4_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t w, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2, load3, load4; uint32_t p1, p2; uint32_t n1, n2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2; vector1b = ((const int32_t *)filter_y)[0]; vector2b = ((const int32_t *)filter_y)[1]; vector3b = ((const int32_t *)filter_y)[2]; vector4b = ((const int32_t *)filter_y)[3]; src -= 3 * src_stride; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); for (x = 0; x < w; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac1, 31 \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "lbux %[store1], %[Temp1](%[cm]) \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } static void convolve_vert_64_dspr2(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, const int16_t *filter_y, int32_t h) { int32_t x, y; const uint8_t *src_ptr; uint8_t *dst_ptr; uint8_t *cm = vpx_ff_cropTbl; uint32_t vector4a = 64; uint32_t load1, load2, load3, load4; uint32_t p1, p2; uint32_t n1, n2; uint32_t scratch1, scratch2; uint32_t store1, store2; int32_t vector1b, vector2b, vector3b, vector4b; int32_t Temp1, Temp2; vector1b = ((const int32_t *)filter_y)[0]; vector2b = ((const int32_t *)filter_y)[1]; vector3b = ((const int32_t *)filter_y)[2]; vector4b = ((const int32_t *)filter_y)[3]; src -= 3 * src_stride; for (y = h; y--;) { /* prefetch data to cache memory */ prefetch_store(dst + dst_stride); prefetch_store(dst + dst_stride + 32); for (x = 0; x < 64; x += 4) { src_ptr = src + x; dst_ptr = dst + x; __asm__ __volatile__( "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "mtlo %[vector4a], $ac0 \n\t" "mtlo %[vector4a], $ac1 \n\t" "mtlo %[vector4a], $ac2 \n\t" "mtlo %[vector4a], $ac3 \n\t" "mthi $zero, $ac0 \n\t" "mthi $zero, $ac1 \n\t" "mthi $zero, $ac2 \n\t" "mthi $zero, $ac3 \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac1, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector2b] \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac2, %[p1], %[vector1b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector2b] \n\t" "dpa.w.ph $ac3, %[n1], %[vector1b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector2b] \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load1], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load2], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load3], 0(%[src_ptr]) \n\t" "add %[src_ptr], %[src_ptr], %[src_stride] \n\t" "ulw %[load4], 0(%[src_ptr]) \n\t" "preceu.ph.qbr %[scratch1], %[load1] \n\t" "preceu.ph.qbr %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbr %[scratch2], %[load3] \n\t" "preceu.ph.qbr %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "dpa.w.ph $ac0, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac0, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac0, 31 \n\t" "dpa.w.ph $ac1, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac1, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac1, 31 \n\t" "preceu.ph.qbl %[scratch1], %[load1] \n\t" "preceu.ph.qbl %[p1], %[load2] \n\t" "precrq.ph.w %[n1], %[p1], %[scratch1] \n\t" /* pixel 2 */ "append %[p1], %[scratch1], 16 \n\t" /* pixel 1 */ "preceu.ph.qbl %[scratch2], %[load3] \n\t" "preceu.ph.qbl %[p2], %[load4] \n\t" "precrq.ph.w %[n2], %[p2], %[scratch2] \n\t" /* pixel 2 */ "append %[p2], %[scratch2], 16 \n\t" /* pixel 1 */ "lbux %[store1], %[Temp1](%[cm]) \n\t" "dpa.w.ph $ac2, %[p1], %[vector3b] \n\t" "dpa.w.ph $ac2, %[p2], %[vector4b] \n\t" "extp %[Temp1], $ac2, 31 \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "dpa.w.ph $ac3, %[n1], %[vector3b] \n\t" "dpa.w.ph $ac3, %[n2], %[vector4b] \n\t" "extp %[Temp2], $ac3, 31 \n\t" "sb %[store1], 0(%[dst_ptr]) \n\t" "sb %[store2], 1(%[dst_ptr]) \n\t" "lbux %[store1], %[Temp1](%[cm]) \n\t" "lbux %[store2], %[Temp2](%[cm]) \n\t" "sb %[store1], 2(%[dst_ptr]) \n\t" "sb %[store2], 3(%[dst_ptr]) \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [p1] "=&r"(p1), [p2] "=&r"(p2), [n1] "=&r"(n1), [n2] "=&r"(n2), [scratch1] "=&r"(scratch1), [scratch2] "=&r"(scratch2), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [store1] "=&r"(store1), [store2] "=&r"(store2), [src_ptr] "+r"(src_ptr) : [vector1b] "r"(vector1b), [vector2b] "r"(vector2b), [vector3b] "r"(vector3b), [vector4b] "r"(vector4b), [vector4a] "r"(vector4a), [src_stride] "r"(src_stride), [cm] "r"(cm), [dst_ptr] "r"(dst_ptr)); } /* Next row... */ src += src_stride; dst += dst_stride; } } void vpx_convolve8_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); if (vpx_get_filter_taps(filter_y) == 2) { vpx_convolve2_vert_dspr2(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { uint32_t pos = 38; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); prefetch_store(dst); switch (w) { case 4: case 8: case 16: case 32: convolve_vert_4_dspr2(src, src_stride, dst, dst_stride, filter_y, w, h); break; case 64: prefetch_store(dst + 32); convolve_vert_64_dspr2(src, src_stride, dst, dst_stride, filter_y, h); break; default: vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } #endif libvpx-1.8.2/vpx_dsp/mips/convolve_common_dspr2.h000066400000000000000000000044711357355204000221610ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ #define VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/mips/common_dspr2.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 void vpx_convolve2_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h); void vpx_convolve2_avg_horiz_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h); void vpx_convolve2_avg_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h); void vpx_convolve2_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const int16_t *filter, int w, int h); void vpx_convolve2_vert_dspr2(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h); #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_CONVOLVE_COMMON_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/deblock_msa.c000066400000000000000000001030131357355204000200720ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" extern const int16_t vpx_rv[]; #define VPX_TRANSPOSE8x16_UB_UB( \ in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3, out4, \ out5, out6, out7, out8, out9, out10, out11, out12, out13, out14, out15) \ { \ v8i16 temp0, temp1, temp2, temp3, temp4; \ v8i16 temp5, temp6, temp7, temp8, temp9; \ \ ILVR_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ temp3); \ ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ ILVRL_W2_SH(temp5, temp4, temp6, temp7); \ ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ ILVRL_W2_SH(temp5, temp4, temp8, temp9); \ ILVL_B4_SH(in1, in0, in3, in2, in5, in4, in7, in6, temp0, temp1, temp2, \ temp3); \ ILVR_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ ILVRL_W2_UB(temp5, temp4, out8, out10); \ ILVL_H2_SH(temp1, temp0, temp3, temp2, temp4, temp5); \ ILVRL_W2_UB(temp5, temp4, out12, out14); \ out0 = (v16u8)temp6; \ out2 = (v16u8)temp7; \ out4 = (v16u8)temp8; \ out6 = (v16u8)temp9; \ out9 = (v16u8)__msa_ilvl_d((v2i64)out8, (v2i64)out8); \ out11 = (v16u8)__msa_ilvl_d((v2i64)out10, (v2i64)out10); \ out13 = (v16u8)__msa_ilvl_d((v2i64)out12, (v2i64)out12); \ out15 = (v16u8)__msa_ilvl_d((v2i64)out14, (v2i64)out14); \ out1 = (v16u8)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ out3 = (v16u8)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \ out5 = (v16u8)__msa_ilvl_d((v2i64)out4, (v2i64)out4); \ out7 = (v16u8)__msa_ilvl_d((v2i64)out6, (v2i64)out6); \ } #define VPX_AVER_IF_RETAIN(above2_in, above1_in, src_in, below1_in, below2_in, \ ref, out) \ { \ v16u8 temp0, temp1; \ \ temp1 = __msa_aver_u_b(above2_in, above1_in); \ temp0 = __msa_aver_u_b(below2_in, below1_in); \ temp1 = __msa_aver_u_b(temp1, temp0); \ out = __msa_aver_u_b(src_in, temp1); \ temp0 = __msa_asub_u_b(src_in, above2_in); \ temp1 = __msa_asub_u_b(src_in, above1_in); \ temp0 = (temp0 < ref); \ temp1 = (temp1 < ref); \ temp0 = temp0 & temp1; \ temp1 = __msa_asub_u_b(src_in, below1_in); \ temp1 = (temp1 < ref); \ temp0 = temp0 & temp1; \ temp1 = __msa_asub_u_b(src_in, below2_in); \ temp1 = (temp1 < ref); \ temp0 = temp0 & temp1; \ out = __msa_bmz_v(out, src_in, temp0); \ } #define TRANSPOSE12x16_B(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \ in10, in11, in12, in13, in14, in15) \ { \ v8i16 temp0, temp1, temp2, temp3, temp4; \ v8i16 temp5, temp6, temp7, temp8, temp9; \ \ ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \ ILVRL_H2_SH(temp1, temp0, temp2, temp3); \ ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \ ILVRL_H2_SH(temp1, temp0, temp4, temp5); \ ILVRL_W2_SH(temp4, temp2, temp0, temp1); \ ILVRL_W2_SH(temp5, temp3, temp2, temp3); \ ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \ ILVR_B2_SH(in9, in8, in11, in10, temp4, temp5); \ ILVRL_H2_SH(temp5, temp4, temp6, temp7); \ ILVR_B2_SH(in13, in12, in15, in14, temp4, temp5); \ ILVRL_H2_SH(temp5, temp4, temp8, temp9); \ ILVRL_W2_SH(temp8, temp6, temp4, temp5); \ ILVRL_W2_SH(temp9, temp7, temp6, temp7); \ ILVL_B2_SH(in1, in0, in3, in2, temp8, temp9); \ ILVR_D2_UB(temp4, temp0, temp5, temp1, in0, in2); \ in1 = (v16u8)__msa_ilvl_d((v2i64)temp4, (v2i64)temp0); \ in3 = (v16u8)__msa_ilvl_d((v2i64)temp5, (v2i64)temp1); \ ILVL_B2_SH(in5, in4, in7, in6, temp0, temp1); \ ILVR_D2_UB(temp6, temp2, temp7, temp3, in4, in6); \ in5 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp2); \ in7 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp3); \ ILVL_B4_SH(in9, in8, in11, in10, in13, in12, in15, in14, temp2, temp3, \ temp4, temp5); \ ILVR_H4_SH(temp9, temp8, temp1, temp0, temp3, temp2, temp5, temp4, temp6, \ temp7, temp8, temp9); \ ILVR_W2_SH(temp7, temp6, temp9, temp8, temp0, temp1); \ in8 = (v16u8)__msa_ilvr_d((v2i64)temp1, (v2i64)temp0); \ in9 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp0); \ ILVL_W2_SH(temp7, temp6, temp9, temp8, temp2, temp3); \ in10 = (v16u8)__msa_ilvr_d((v2i64)temp3, (v2i64)temp2); \ in11 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp2); \ } #define VPX_TRANSPOSE12x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, \ in9, in10, in11) \ { \ v8i16 temp0, temp1, temp2, temp3; \ v8i16 temp4, temp5, temp6, temp7; \ \ ILVR_B2_SH(in1, in0, in3, in2, temp0, temp1); \ ILVRL_H2_SH(temp1, temp0, temp2, temp3); \ ILVR_B2_SH(in5, in4, in7, in6, temp0, temp1); \ ILVRL_H2_SH(temp1, temp0, temp4, temp5); \ ILVRL_W2_SH(temp4, temp2, temp0, temp1); \ ILVRL_W2_SH(temp5, temp3, temp2, temp3); \ ILVL_B2_SH(in1, in0, in3, in2, temp4, temp5); \ temp4 = __msa_ilvr_h(temp5, temp4); \ ILVL_B2_SH(in5, in4, in7, in6, temp6, temp7); \ temp5 = __msa_ilvr_h(temp7, temp6); \ ILVRL_W2_SH(temp5, temp4, temp6, temp7); \ in0 = (v16u8)temp0; \ in2 = (v16u8)temp1; \ in4 = (v16u8)temp2; \ in6 = (v16u8)temp3; \ in8 = (v16u8)temp6; \ in10 = (v16u8)temp7; \ in1 = (v16u8)__msa_ilvl_d((v2i64)temp0, (v2i64)temp0); \ in3 = (v16u8)__msa_ilvl_d((v2i64)temp1, (v2i64)temp1); \ in5 = (v16u8)__msa_ilvl_d((v2i64)temp2, (v2i64)temp2); \ in7 = (v16u8)__msa_ilvl_d((v2i64)temp3, (v2i64)temp3); \ in9 = (v16u8)__msa_ilvl_d((v2i64)temp6, (v2i64)temp6); \ in11 = (v16u8)__msa_ilvl_d((v2i64)temp7, (v2i64)temp7); \ } static void postproc_down_across_chroma_msa(uint8_t *src_ptr, uint8_t *dst_ptr, int32_t src_stride, int32_t dst_stride, int32_t cols, uint8_t *f) { uint8_t *p_src = src_ptr; uint8_t *p_dst = dst_ptr; uint8_t *f_orig = f; uint8_t *p_dst_st = dst_ptr; uint16_t col; uint64_t out0, out1, out2, out3; v16u8 above2, above1, below2, below1, src, ref, ref_temp; v16u8 inter0, inter1, inter2, inter3, inter4, inter5; v16u8 inter6, inter7, inter8, inter9, inter10, inter11; for (col = (cols / 16); col--;) { ref = LD_UB(f); LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1); src = LD_UB(p_src); LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0); above2 = LD_UB(p_src + 3 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1); above1 = LD_UB(p_src + 4 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2); src = LD_UB(p_src + 5 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3); below1 = LD_UB(p_src + 6 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4); below2 = LD_UB(p_src + 7 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5); above2 = LD_UB(p_src + 8 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6); above1 = LD_UB(p_src + 9 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7); ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7, p_dst, dst_stride); p_dst += 16; p_src += 16; f += 16; } if (0 != (cols / 16)) { ref = LD_UB(f); LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1); src = LD_UB(p_src); LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0); above2 = LD_UB(p_src + 3 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1); above1 = LD_UB(p_src + 4 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2); src = LD_UB(p_src + 5 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3); below1 = LD_UB(p_src + 6 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4); below2 = LD_UB(p_src + 7 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5); above2 = LD_UB(p_src + 8 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6); above1 = LD_UB(p_src + 9 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7); out0 = __msa_copy_u_d((v2i64)inter0, 0); out1 = __msa_copy_u_d((v2i64)inter1, 0); out2 = __msa_copy_u_d((v2i64)inter2, 0); out3 = __msa_copy_u_d((v2i64)inter3, 0); SD4(out0, out1, out2, out3, p_dst, dst_stride); out0 = __msa_copy_u_d((v2i64)inter4, 0); out1 = __msa_copy_u_d((v2i64)inter5, 0); out2 = __msa_copy_u_d((v2i64)inter6, 0); out3 = __msa_copy_u_d((v2i64)inter7, 0); SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride); } f = f_orig; p_dst = dst_ptr - 2; LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7); for (col = 0; col < (cols / 8); ++col) { ref = LD_UB(f); f += 8; VPX_TRANSPOSE12x8_UB_UB(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9, inter10, inter11); if (0 == col) { above2 = inter2; above1 = inter2; } else { above2 = inter0; above1 = inter1; } src = inter2; below1 = inter3; below2 = inter4; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2); above2 = inter5; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3); above1 = inter6; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4); src = inter7; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5); below1 = inter8; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6); below2 = inter9; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7); if (col == (cols / 8 - 1)) { above2 = inter9; } else { above2 = inter10; } ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8); if (col == (cols / 8 - 1)) { above1 = inter9; } else { above1 = inter11; } ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9); TRANSPOSE8x8_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9, inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9); p_dst += 8; LD_UB2(p_dst, dst_stride, inter0, inter1); ST8x1_UB(inter2, p_dst_st); ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride)); LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3); ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride)); ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride)); LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5); ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride)); ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride)); LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7); ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride)); ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride)); p_dst_st += 8; } } static void postproc_down_across_luma_msa(uint8_t *src_ptr, uint8_t *dst_ptr, int32_t src_stride, int32_t dst_stride, int32_t cols, uint8_t *f) { uint8_t *p_src = src_ptr; uint8_t *p_dst = dst_ptr; uint8_t *p_dst_st = dst_ptr; uint8_t *f_orig = f; uint16_t col; uint64_t out0, out1, out2, out3; v16u8 above2, above1, below2, below1; v16u8 src, ref, ref_temp; v16u8 inter0, inter1, inter2, inter3, inter4, inter5, inter6; v16u8 inter7, inter8, inter9, inter10, inter11; v16u8 inter12, inter13, inter14, inter15; for (col = (cols / 16); col--;) { ref = LD_UB(f); LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1); src = LD_UB(p_src); LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0); above2 = LD_UB(p_src + 3 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1); above1 = LD_UB(p_src + 4 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2); src = LD_UB(p_src + 5 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3); below1 = LD_UB(p_src + 6 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4); below2 = LD_UB(p_src + 7 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5); above2 = LD_UB(p_src + 8 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6); above1 = LD_UB(p_src + 9 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7); src = LD_UB(p_src + 10 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8); below1 = LD_UB(p_src + 11 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9); below2 = LD_UB(p_src + 12 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10); above2 = LD_UB(p_src + 13 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11); above1 = LD_UB(p_src + 14 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12); src = LD_UB(p_src + 15 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13); below1 = LD_UB(p_src + 16 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14); below2 = LD_UB(p_src + 17 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15); ST_UB8(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7, p_dst, dst_stride); ST_UB8(inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15, p_dst + 8 * dst_stride, dst_stride); p_src += 16; p_dst += 16; f += 16; } if (0 != (cols / 16)) { ref = LD_UB(f); LD_UB2(p_src - 2 * src_stride, src_stride, above2, above1); src = LD_UB(p_src); LD_UB2(p_src + 1 * src_stride, src_stride, below1, below2); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter0); above2 = LD_UB(p_src + 3 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter1); above1 = LD_UB(p_src + 4 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter2); src = LD_UB(p_src + 5 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter3); below1 = LD_UB(p_src + 6 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter4); below2 = LD_UB(p_src + 7 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter5); above2 = LD_UB(p_src + 8 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter6); above1 = LD_UB(p_src + 9 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter7); src = LD_UB(p_src + 10 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter8); below1 = LD_UB(p_src + 11 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter9); below2 = LD_UB(p_src + 12 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter10); above2 = LD_UB(p_src + 13 * src_stride); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref, inter11); above1 = LD_UB(p_src + 14 * src_stride); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref, inter12); src = LD_UB(p_src + 15 * src_stride); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref, inter13); below1 = LD_UB(p_src + 16 * src_stride); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref, inter14); below2 = LD_UB(p_src + 17 * src_stride); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref, inter15); out0 = __msa_copy_u_d((v2i64)inter0, 0); out1 = __msa_copy_u_d((v2i64)inter1, 0); out2 = __msa_copy_u_d((v2i64)inter2, 0); out3 = __msa_copy_u_d((v2i64)inter3, 0); SD4(out0, out1, out2, out3, p_dst, dst_stride); out0 = __msa_copy_u_d((v2i64)inter4, 0); out1 = __msa_copy_u_d((v2i64)inter5, 0); out2 = __msa_copy_u_d((v2i64)inter6, 0); out3 = __msa_copy_u_d((v2i64)inter7, 0); SD4(out0, out1, out2, out3, p_dst + 4 * dst_stride, dst_stride); out0 = __msa_copy_u_d((v2i64)inter8, 0); out1 = __msa_copy_u_d((v2i64)inter9, 0); out2 = __msa_copy_u_d((v2i64)inter10, 0); out3 = __msa_copy_u_d((v2i64)inter11, 0); SD4(out0, out1, out2, out3, p_dst + 8 * dst_stride, dst_stride); out0 = __msa_copy_u_d((v2i64)inter12, 0); out1 = __msa_copy_u_d((v2i64)inter13, 0); out2 = __msa_copy_u_d((v2i64)inter14, 0); out3 = __msa_copy_u_d((v2i64)inter15, 0); SD4(out0, out1, out2, out3, p_dst + 12 * dst_stride, dst_stride); } f = f_orig; p_dst = dst_ptr - 2; LD_UB8(p_dst, dst_stride, inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7); LD_UB8(p_dst + 8 * dst_stride, dst_stride, inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15); for (col = 0; col < cols / 8; ++col) { ref = LD_UB(f); f += 8; TRANSPOSE12x16_B(inter0, inter1, inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15); if (0 == col) { above2 = inter2; above1 = inter2; } else { above2 = inter0; above1 = inter1; } src = inter2; below1 = inter3; below2 = inter4; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 0); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter2); above2 = inter5; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 1); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter3); above1 = inter6; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 2); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter4); src = inter7; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 3); VPX_AVER_IF_RETAIN(below1, below2, above2, above1, src, ref_temp, inter5); below1 = inter8; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 4); VPX_AVER_IF_RETAIN(below2, above2, above1, src, below1, ref_temp, inter6); below2 = inter9; ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 5); VPX_AVER_IF_RETAIN(above2, above1, src, below1, below2, ref_temp, inter7); if (col == (cols / 8 - 1)) { above2 = inter9; } else { above2 = inter10; } ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 6); VPX_AVER_IF_RETAIN(above1, src, below1, below2, above2, ref_temp, inter8); if (col == (cols / 8 - 1)) { above1 = inter9; } else { above1 = inter11; } ref_temp = (v16u8)__msa_splati_b((v16i8)ref, 7); VPX_AVER_IF_RETAIN(src, below1, below2, above2, above1, ref_temp, inter9); VPX_TRANSPOSE8x16_UB_UB(inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9, inter2, inter3, inter4, inter5, inter6, inter7, inter8, inter9, inter10, inter11, inter12, inter13, inter14, inter15, above2, above1); p_dst += 8; LD_UB2(p_dst, dst_stride, inter0, inter1); ST8x1_UB(inter2, p_dst_st); ST8x1_UB(inter3, (p_dst_st + 1 * dst_stride)); LD_UB2(p_dst + 2 * dst_stride, dst_stride, inter2, inter3); ST8x1_UB(inter4, (p_dst_st + 2 * dst_stride)); ST8x1_UB(inter5, (p_dst_st + 3 * dst_stride)); LD_UB2(p_dst + 4 * dst_stride, dst_stride, inter4, inter5); ST8x1_UB(inter6, (p_dst_st + 4 * dst_stride)); ST8x1_UB(inter7, (p_dst_st + 5 * dst_stride)); LD_UB2(p_dst + 6 * dst_stride, dst_stride, inter6, inter7); ST8x1_UB(inter8, (p_dst_st + 6 * dst_stride)); ST8x1_UB(inter9, (p_dst_st + 7 * dst_stride)); LD_UB2(p_dst + 8 * dst_stride, dst_stride, inter8, inter9); ST8x1_UB(inter10, (p_dst_st + 8 * dst_stride)); ST8x1_UB(inter11, (p_dst_st + 9 * dst_stride)); LD_UB2(p_dst + 10 * dst_stride, dst_stride, inter10, inter11); ST8x1_UB(inter12, (p_dst_st + 10 * dst_stride)); ST8x1_UB(inter13, (p_dst_st + 11 * dst_stride)); LD_UB2(p_dst + 12 * dst_stride, dst_stride, inter12, inter13); ST8x1_UB(inter14, (p_dst_st + 12 * dst_stride)); ST8x1_UB(inter15, (p_dst_st + 13 * dst_stride)); LD_UB2(p_dst + 14 * dst_stride, dst_stride, inter14, inter15); ST8x1_UB(above2, (p_dst_st + 14 * dst_stride)); ST8x1_UB(above1, (p_dst_st + 15 * dst_stride)); p_dst_st += 8; } } void vpx_post_proc_down_and_across_mb_row_msa(uint8_t *src, uint8_t *dst, int32_t src_stride, int32_t dst_stride, int32_t cols, uint8_t *f, int32_t size) { if (8 == size) { postproc_down_across_chroma_msa(src, dst, src_stride, dst_stride, cols, f); } else if (16 == size) { postproc_down_across_luma_msa(src, dst, src_stride, dst_stride, cols, f); } } void vpx_mbpost_proc_across_ip_msa(uint8_t *src, int32_t pitch, int32_t rows, int32_t cols, int32_t flimit) { int32_t row, col, cnt; uint8_t *src_dup = src; v16u8 src0, src1, tmp_orig; v16u8 tmp = { 0 }; v16i8 zero = { 0 }; v8u16 sum_h, src_r_h, src_l_h; v4u32 src_r_w; v4i32 flimit_vec; flimit_vec = __msa_fill_w(flimit); for (row = rows; row--;) { int32_t sum_sq; int32_t sum = 0; src0 = (v16u8)__msa_fill_b(src_dup[0]); ST8x1_UB(src0, (src_dup - 8)); src0 = (v16u8)__msa_fill_b(src_dup[cols - 1]); ST_UB(src0, src_dup + cols); src_dup[cols + 16] = src_dup[cols - 1]; tmp_orig = (v16u8)__msa_ldi_b(0); tmp_orig[15] = tmp[15]; src1 = LD_UB(src_dup - 8); src1[15] = 0; ILVRL_B2_UH(zero, src1, src_r_h, src_l_h); src_r_w = __msa_dotp_u_w(src_r_h, src_r_h); src_r_w += __msa_dotp_u_w(src_l_h, src_l_h); sum_sq = HADD_SW_S32(src_r_w) + 16; sum_h = __msa_hadd_u_h(src1, src1); sum = HADD_UH_U32(sum_h); { v16u8 src7, src8, src_r, src_l; v16i8 mask; v8u16 add_r, add_l; v8i16 sub_r, sub_l, sum_r, sum_l, mask0, mask1; v4i32 sum_sq0, sum_sq1, sum_sq2, sum_sq3; v4i32 sub0, sub1, sub2, sub3; v4i32 sum0_w, sum1_w, sum2_w, sum3_w; v4i32 mul0, mul1, mul2, mul3; v4i32 total0, total1, total2, total3; v8i16 const8 = __msa_fill_h(8); src7 = LD_UB(src_dup + 7); src8 = LD_UB(src_dup - 8); for (col = 0; col < (cols >> 4); ++col) { ILVRL_B2_UB(src7, src8, src_r, src_l); HSUB_UB2_SH(src_r, src_l, sub_r, sub_l); sum_r[0] = sum + sub_r[0]; for (cnt = 0; cnt < 7; ++cnt) { sum_r[cnt + 1] = sum_r[cnt] + sub_r[cnt + 1]; } sum_l[0] = sum_r[7] + sub_l[0]; for (cnt = 0; cnt < 7; ++cnt) { sum_l[cnt + 1] = sum_l[cnt] + sub_l[cnt + 1]; } sum = sum_l[7]; src1 = LD_UB(src_dup + 16 * col); ILVRL_B2_UH(zero, src1, src_r_h, src_l_h); src7 = (v16u8)((const8 + sum_r + (v8i16)src_r_h) >> 4); src8 = (v16u8)((const8 + sum_l + (v8i16)src_l_h) >> 4); tmp = (v16u8)__msa_pckev_b((v16i8)src8, (v16i8)src7); HADD_UB2_UH(src_r, src_l, add_r, add_l); UNPCK_SH_SW(sub_r, sub0, sub1); UNPCK_SH_SW(sub_l, sub2, sub3); ILVR_H2_SW(zero, add_r, zero, add_l, sum0_w, sum2_w); ILVL_H2_SW(zero, add_r, zero, add_l, sum1_w, sum3_w); MUL4(sum0_w, sub0, sum1_w, sub1, sum2_w, sub2, sum3_w, sub3, mul0, mul1, mul2, mul3); sum_sq0[0] = sum_sq + mul0[0]; for (cnt = 0; cnt < 3; ++cnt) { sum_sq0[cnt + 1] = sum_sq0[cnt] + mul0[cnt + 1]; } sum_sq1[0] = sum_sq0[3] + mul1[0]; for (cnt = 0; cnt < 3; ++cnt) { sum_sq1[cnt + 1] = sum_sq1[cnt] + mul1[cnt + 1]; } sum_sq2[0] = sum_sq1[3] + mul2[0]; for (cnt = 0; cnt < 3; ++cnt) { sum_sq2[cnt + 1] = sum_sq2[cnt] + mul2[cnt + 1]; } sum_sq3[0] = sum_sq2[3] + mul3[0]; for (cnt = 0; cnt < 3; ++cnt) { sum_sq3[cnt + 1] = sum_sq3[cnt] + mul3[cnt + 1]; } sum_sq = sum_sq3[3]; UNPCK_SH_SW(sum_r, sum0_w, sum1_w); UNPCK_SH_SW(sum_l, sum2_w, sum3_w); total0 = sum_sq0 * __msa_ldi_w(15); total0 -= sum0_w * sum0_w; total1 = sum_sq1 * __msa_ldi_w(15); total1 -= sum1_w * sum1_w; total2 = sum_sq2 * __msa_ldi_w(15); total2 -= sum2_w * sum2_w; total3 = sum_sq3 * __msa_ldi_w(15); total3 -= sum3_w * sum3_w; total0 = (total0 < flimit_vec); total1 = (total1 < flimit_vec); total2 = (total2 < flimit_vec); total3 = (total3 < flimit_vec); PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1); mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0); tmp = __msa_bmz_v(tmp, src1, (v16u8)mask); if (col == 0) { uint64_t src_d; src_d = __msa_copy_u_d((v2i64)tmp_orig, 1); SD(src_d, (src_dup - 8)); } src7 = LD_UB(src_dup + 16 * (col + 1) + 7); src8 = LD_UB(src_dup + 16 * (col + 1) - 8); ST_UB(tmp, (src_dup + (16 * col))); } src_dup += pitch; } } } void vpx_mbpost_proc_down_msa(uint8_t *dst_ptr, int32_t pitch, int32_t rows, int32_t cols, int32_t flimit) { int32_t row, col, cnt, i; v4i32 flimit_vec; v16u8 dst7, dst8, dst_r_b, dst_l_b; v16i8 mask; v8u16 add_r, add_l; v8i16 dst_r_h, dst_l_h, sub_r, sub_l, mask0, mask1; v4i32 sub0, sub1, sub2, sub3, total0, total1, total2, total3; flimit_vec = __msa_fill_w(flimit); for (col = 0; col < (cols >> 4); ++col) { uint8_t *dst_tmp = &dst_ptr[col << 4]; v16u8 dst; v16i8 zero = { 0 }; v16u8 tmp[16]; v8i16 mult0, mult1, rv2_0, rv2_1; v8i16 sum0_h = { 0 }; v8i16 sum1_h = { 0 }; v4i32 mul0 = { 0 }; v4i32 mul1 = { 0 }; v4i32 mul2 = { 0 }; v4i32 mul3 = { 0 }; v4i32 sum0_w, sum1_w, sum2_w, sum3_w; v4i32 add0, add1, add2, add3; const int16_t *rv2[16]; dst = LD_UB(dst_tmp); for (cnt = (col << 4), i = 0; i < 16; ++cnt) { rv2[i] = vpx_rv + (i & 7); ++i; } for (cnt = -8; cnt < 0; ++cnt) { ST_UB(dst, dst_tmp + cnt * pitch); } dst = LD_UB((dst_tmp + (rows - 1) * pitch)); for (cnt = rows; cnt < rows + 17; ++cnt) { ST_UB(dst, dst_tmp + cnt * pitch); } for (cnt = -8; cnt <= 6; ++cnt) { dst = LD_UB(dst_tmp + (cnt * pitch)); UNPCK_UB_SH(dst, dst_r_h, dst_l_h); MUL2(dst_r_h, dst_r_h, dst_l_h, dst_l_h, mult0, mult1); mul0 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult0); mul1 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult0); mul2 += (v4i32)__msa_ilvr_h((v8i16)zero, (v8i16)mult1); mul3 += (v4i32)__msa_ilvl_h((v8i16)zero, (v8i16)mult1); ADD2(sum0_h, dst_r_h, sum1_h, dst_l_h, sum0_h, sum1_h); } for (row = 0; row < (rows + 8); ++row) { for (i = 0; i < 8; ++i) { rv2_0[i] = *(rv2[i] + (row & 127)); rv2_1[i] = *(rv2[i + 8] + (row & 127)); } dst7 = LD_UB(dst_tmp + (7 * pitch)); dst8 = LD_UB(dst_tmp - (8 * pitch)); ILVRL_B2_UB(dst7, dst8, dst_r_b, dst_l_b); HSUB_UB2_SH(dst_r_b, dst_l_b, sub_r, sub_l); UNPCK_SH_SW(sub_r, sub0, sub1); UNPCK_SH_SW(sub_l, sub2, sub3); sum0_h += sub_r; sum1_h += sub_l; HADD_UB2_UH(dst_r_b, dst_l_b, add_r, add_l); ILVRL_H2_SW(zero, add_r, add0, add1); ILVRL_H2_SW(zero, add_l, add2, add3); mul0 += add0 * sub0; mul1 += add1 * sub1; mul2 += add2 * sub2; mul3 += add3 * sub3; dst = LD_UB(dst_tmp); ILVRL_B2_SH(zero, dst, dst_r_h, dst_l_h); dst7 = (v16u8)((rv2_0 + sum0_h + dst_r_h) >> 4); dst8 = (v16u8)((rv2_1 + sum1_h + dst_l_h) >> 4); tmp[row & 15] = (v16u8)__msa_pckev_b((v16i8)dst8, (v16i8)dst7); UNPCK_SH_SW(sum0_h, sum0_w, sum1_w); UNPCK_SH_SW(sum1_h, sum2_w, sum3_w); total0 = mul0 * __msa_ldi_w(15); total0 -= sum0_w * sum0_w; total1 = mul1 * __msa_ldi_w(15); total1 -= sum1_w * sum1_w; total2 = mul2 * __msa_ldi_w(15); total2 -= sum2_w * sum2_w; total3 = mul3 * __msa_ldi_w(15); total3 -= sum3_w * sum3_w; total0 = (total0 < flimit_vec); total1 = (total1 < flimit_vec); total2 = (total2 < flimit_vec); total3 = (total3 < flimit_vec); PCKEV_H2_SH(total1, total0, total3, total2, mask0, mask1); mask = __msa_pckev_b((v16i8)mask1, (v16i8)mask0); tmp[row & 15] = __msa_bmz_v(tmp[row & 15], dst, (v16u8)mask); if (row >= 8) { ST_UB(tmp[(row - 8) & 15], (dst_tmp - 8 * pitch)); } dst_tmp += pitch; } } } libvpx-1.8.2/vpx_dsp/mips/fwd_dct32x32_msa.c000066400000000000000000001057021357355204000206120ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/fwd_txfm_msa.h" static void fdct8x32_1d_column_load_butterfly(const int16_t *input, int32_t src_stride, int16_t *temp_buff) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 step0, step1, step2, step3; v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1; v8i16 step0_1, step1_1, step2_1, step3_1; /* 1st and 2nd set */ LD_SH4(input, src_stride, in0, in1, in2, in3); LD_SH4(input + (28 * src_stride), src_stride, in4, in5, in6, in7); LD_SH4(input + (4 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1); LD_SH4(input + (24 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1); SLLI_4V(in0, in1, in2, in3, 2); SLLI_4V(in4, in5, in6, in7, 2); SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2); SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2); BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, step0, step1, step2, step3, in4, in5, in6, in7); BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1); ST_SH4(step0, step1, step2, step3, temp_buff, 8); ST_SH4(in4, in5, in6, in7, temp_buff + (28 * 8), 8); ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (4 * 8), 8); ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (24 * 8), 8); /* 3rd and 4th set */ LD_SH4(input + (8 * src_stride), src_stride, in0, in1, in2, in3); LD_SH4(input + (20 * src_stride), src_stride, in4, in5, in6, in7); LD_SH4(input + (12 * src_stride), src_stride, in0_1, in1_1, in2_1, in3_1); LD_SH4(input + (16 * src_stride), src_stride, in4_1, in5_1, in6_1, in7_1); SLLI_4V(in0, in1, in2, in3, 2); SLLI_4V(in4, in5, in6, in7, 2); SLLI_4V(in0_1, in1_1, in2_1, in3_1, 2); SLLI_4V(in4_1, in5_1, in6_1, in7_1, 2); BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, step0, step1, step2, step3, in4, in5, in6, in7); BUTTERFLY_8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, step0_1, step1_1, step2_1, step3_1, in4_1, in5_1, in6_1, in7_1); ST_SH4(step0, step1, step2, step3, temp_buff + (8 * 8), 8); ST_SH4(in4, in5, in6, in7, temp_buff + (20 * 8), 8); ST_SH4(step0_1, step1_1, step2_1, step3_1, temp_buff + (12 * 8), 8); ST_SH4(in4_1, in5_1, in6_1, in7_1, temp_buff + (15 * 8) + 8, 8); } static void fdct8x32_1d_column_even_store(int16_t *input, int16_t *temp) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 temp0, temp1; /* fdct even */ LD_SH4(input, 8, in0, in1, in2, in3); LD_SH4(input + 96, 8, in12, in13, in14, in15); BUTTERFLY_8(in0, in1, in2, in3, in12, in13, in14, in15, vec0, vec1, vec2, vec3, in12, in13, in14, in15); LD_SH4(input + 32, 8, in4, in5, in6, in7); LD_SH4(input + 64, 8, in8, in9, in10, in11); BUTTERFLY_8(in4, in5, in6, in7, in8, in9, in10, in11, vec4, vec5, vec6, vec7, in8, in9, in10, in11); /* Stage 3 */ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0); DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp); ST_SH(temp1, temp + 512); DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 256); ST_SH(temp1, temp + 768); SUB4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, vec7, vec6, vec5, vec4); DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6); ADD2(vec4, vec5, vec7, vec6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 128); ST_SH(temp1, temp + 896); SUB2(vec4, vec5, vec7, vec6, vec4, vec7); DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 640); ST_SH(temp1, temp + 384); DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5); DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4); ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2); DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3); ADD2(in0, in1, in2, in3, vec0, vec7); DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 64); ST_SH(temp1, temp + 960); SUB2(in0, in1, in2, in3, in0, in2); DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 576); ST_SH(temp1, temp + 448); SUB2(in9, vec2, in14, vec5, vec2, vec5); DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1); SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5); DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 320); ST_SH(temp1, temp + 704); ADD2(in3, in2, in0, in1, vec3, vec4); DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1); FDCT32_POSTPROC_2V_POS_H(temp0, temp1); ST_SH(temp0, temp + 192); ST_SH(temp1, temp + 832); } static void fdct8x32_1d_column_odd_store(int16_t *input, int16_t *temp_ptr) { v8i16 in16, in17, in18, in19, in20, in21, in22, in23; v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5; in20 = LD_SH(input + 32); in21 = LD_SH(input + 40); in26 = LD_SH(input + 80); in27 = LD_SH(input + 88); DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27); DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26); in18 = LD_SH(input + 16); in19 = LD_SH(input + 24); in28 = LD_SH(input + 96); in29 = LD_SH(input + 104); vec4 = in19 - in20; ST_SH(vec4, input + 32); vec4 = in18 - in21; ST_SH(vec4, input + 40); vec4 = in29 - in26; ST_SH(vec4, input + 80); vec4 = in28 - in27; ST_SH(vec4, input + 88); in21 = in18 + in21; in20 = in19 + in20; in27 = in28 + in27; in26 = in29 + in26; LD_SH4(input + 48, 8, in22, in23, in24, in25); DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25); DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24); in16 = LD_SH(input); in17 = LD_SH(input + 8); in30 = LD_SH(input + 112); in31 = LD_SH(input + 120); vec4 = in17 - in22; ST_SH(vec4, input + 16); vec4 = in16 - in23; ST_SH(vec4, input + 24); vec4 = in31 - in24; ST_SH(vec4, input + 96); vec4 = in30 - in25; ST_SH(vec4, input + 104); ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31); DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29); DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28); ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25); DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24); ADD2(in27, in26, in25, in24, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr); ST_SH(vec4, temp_ptr + 960); SUB2(in27, in26, in25, in24, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr + 448); ST_SH(vec4, temp_ptr + 512); SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20); DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25); SUB2(in26, in27, in24, in25, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec4, temp_ptr + 704); ST_SH(vec5, temp_ptr + 256); ADD2(in26, in27, in24, in25, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec4, temp_ptr + 192); ST_SH(vec5, temp_ptr + 768); LD_SH4(input + 16, 8, in22, in23, in20, in21); LD_SH4(input + 80, 8, in26, in27, in24, in25); in16 = in20; in17 = in21; DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27); DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26); SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31); DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30); ADD2(in28, in29, in31, in30, in16, in19); DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr + 832); ST_SH(vec4, temp_ptr + 128); SUB2(in28, in29, in31, in30, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr + 320); ST_SH(vec4, temp_ptr + 640); ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19); DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31); SUB2(in29, in28, in30, in31, in16, in19); DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr + 576); ST_SH(vec4, temp_ptr + 384); ADD2(in29, in28, in30, in31, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4); FDCT32_POSTPROC_2V_POS_H(vec5, vec4); ST_SH(vec5, temp_ptr + 64); ST_SH(vec4, temp_ptr + 896); } static void fdct8x32_1d_column(const int16_t *input, int32_t src_stride, int16_t *tmp_buf, int16_t *tmp_buf_big) { fdct8x32_1d_column_load_butterfly(input, src_stride, tmp_buf); fdct8x32_1d_column_even_store(tmp_buf, tmp_buf_big); fdct8x32_1d_column_odd_store(tmp_buf + 128, (tmp_buf_big + 32)); } static void fdct8x32_1d_row_load_butterfly(int16_t *temp_buff, int16_t *output) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 step0, step1, step2, step3, step4, step5, step6, step7; LD_SH8(temp_buff, 32, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8(temp_buff + 24, 32, in8, in9, in10, in11, in12, in13, in14, in15); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, in10, in11, in12, in13, in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, step0, step1, step2, step3, step4, step5, step6, step7, in8, in9, in10, in11, in12, in13, in14, in15); ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7, output, 8); ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 24 * 8), 8); /* 2nd set */ LD_SH8(temp_buff + 8, 32, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8(temp_buff + 16, 32, in8, in9, in10, in11, in12, in13, in14, in15); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, in10, in11, in12, in13, in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, step0, step1, step2, step3, step4, step5, step6, step7, in8, in9, in10, in11, in12, in13, in14, in15); ST_SH8(step0, step1, step2, step3, step4, step5, step6, step7, (output + 8 * 8), 8); ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, (output + 16 * 8), 8); } static void fdct8x32_1d_row_even_4x(int16_t *input, int16_t *interm_ptr, int16_t *out) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v4i32 vec0_l, vec1_l, vec2_l, vec3_l, vec4_l, vec5_l, vec6_l, vec7_l; v4i32 vec0_r, vec1_r, vec2_r, vec3_r, vec4_r, vec5_r, vec6_r, vec7_r; v4i32 tmp0_w, tmp1_w, tmp2_w, tmp3_w; /* fdct32 even */ /* stage 2 */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8(input + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, in8, in9, in10, in11, in12, in13, in14, in15); ST_SH8(vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, interm_ptr, 8); ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, interm_ptr + 64, 8); /* Stage 3 */ UNPCK_SH_SW(vec0, vec0_l, vec0_r); UNPCK_SH_SW(vec1, vec1_l, vec1_r); UNPCK_SH_SW(vec2, vec2_l, vec2_r); UNPCK_SH_SW(vec3, vec3_l, vec3_r); UNPCK_SH_SW(vec4, vec4_l, vec4_r); UNPCK_SH_SW(vec5, vec5_l, vec5_r); UNPCK_SH_SW(vec6, vec6_l, vec6_r); UNPCK_SH_SW(vec7, vec7_l, vec7_r); ADD4(vec0_r, vec7_r, vec1_r, vec6_r, vec2_r, vec5_r, vec3_r, vec4_r, tmp0_w, tmp1_w, tmp2_w, tmp3_w); BUTTERFLY_4(tmp0_w, tmp1_w, tmp2_w, tmp3_w, vec4_r, vec6_r, vec7_r, vec5_r); ADD4(vec0_l, vec7_l, vec1_l, vec6_l, vec2_l, vec5_l, vec3_l, vec4_l, vec0_r, vec1_r, vec2_r, vec3_r); tmp3_w = vec0_r + vec3_r; vec0_r = vec0_r - vec3_r; vec3_r = vec1_r + vec2_r; vec1_r = vec1_r - vec2_r; DOTP_CONST_PAIR_W(vec4_r, vec6_r, tmp3_w, vec3_r, cospi_16_64, cospi_16_64, vec4_r, tmp3_w, vec6_r, vec3_r); FDCT32_POSTPROC_NEG_W(vec4_r); FDCT32_POSTPROC_NEG_W(tmp3_w); FDCT32_POSTPROC_NEG_W(vec6_r); FDCT32_POSTPROC_NEG_W(vec3_r); PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5); ST_SH2(vec5, vec4, out, 8); DOTP_CONST_PAIR_W(vec5_r, vec7_r, vec0_r, vec1_r, cospi_24_64, cospi_8_64, vec4_r, tmp3_w, vec6_r, vec3_r); FDCT32_POSTPROC_NEG_W(vec4_r); FDCT32_POSTPROC_NEG_W(tmp3_w); FDCT32_POSTPROC_NEG_W(vec6_r); FDCT32_POSTPROC_NEG_W(vec3_r); PCKEV_H2_SH(vec4_r, tmp3_w, vec6_r, vec3_r, vec4, vec5); ST_SH2(vec5, vec4, out + 16, 8); LD_SH8(interm_ptr, 8, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7); SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7); DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6); ADD2(vec4, vec5, vec7, vec6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, in5, in4); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 32); ST_SH(in5, out + 56); SUB2(vec4, vec5, vec7, vec6, vec4, vec7); DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, in5, in4); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 40); ST_SH(in5, out + 48); LD_SH8(interm_ptr + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15); DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5); DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4); ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2); DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3); ADD2(in0, in1, in2, in3, vec0, vec7); DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, in5, in4); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 64); ST_SH(in5, out + 120); SUB2(in0, in1, in2, in3, in0, in2); DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, in5, in4); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 72); ST_SH(in5, out + 112); SUB2(in9, vec2, in14, vec5, vec2, vec5); DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1); SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5); DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, in5, in4); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 80); ST_SH(in5, out + 104); ADD2(in3, in2, in0, in1, vec3, vec4); DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, in4, in5); FDCT_POSTPROC_2V_NEG_H(in4, in5); ST_SH(in4, out + 96); ST_SH(in5, out + 88); } static void fdct8x32_1d_row_even(int16_t *temp, int16_t *out) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1; /* fdct32 even */ /* stage 2 */ LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, in8, in9, in10, in11, in12, in13, in14, in15); /* Stage 3 */ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3); BUTTERFLY_4(in0, in1, in2, in3, temp0, in4, in1, in0); DOTP_CONST_PAIR(temp0, in4, cospi_16_64, cospi_16_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out); ST_SH(temp1, out + 8); DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 16); ST_SH(temp1, out + 24); SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7); DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6); ADD2(vec4, vec5, vec7, vec6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 32); ST_SH(temp1, out + 56); SUB2(vec4, vec5, vec7, vec6, vec4, vec7); DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 40); ST_SH(temp1, out + 48); DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5); DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4); ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2); DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3); ADD2(in0, in1, in2, in3, vec0, vec7); DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 64); ST_SH(temp1, out + 120); SUB2(in0, in1, in2, in3, in0, in2); DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 72); ST_SH(temp1, out + 112); SUB2(in9, vec2, in14, vec5, vec2, vec5); DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1); SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5) DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 80); ST_SH(temp1, out + 104); ADD2(in3, in2, in0, in1, vec3, vec4); DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1); FDCT_POSTPROC_2V_NEG_H(temp0, temp1); ST_SH(temp0, out + 96); ST_SH(temp1, out + 88); } static void fdct8x32_1d_row_odd(int16_t *temp, int16_t *interm_ptr, int16_t *out) { v8i16 in16, in17, in18, in19, in20, in21, in22, in23; v8i16 in24, in25, in26, in27, in28, in29, in30, in31, vec4, vec5; in20 = LD_SH(temp + 32); in21 = LD_SH(temp + 40); in26 = LD_SH(temp + 80); in27 = LD_SH(temp + 88); DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27); DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26); in18 = LD_SH(temp + 16); in19 = LD_SH(temp + 24); in28 = LD_SH(temp + 96); in29 = LD_SH(temp + 104); vec4 = in19 - in20; ST_SH(vec4, interm_ptr + 32); vec4 = in18 - in21; ST_SH(vec4, interm_ptr + 88); vec4 = in28 - in27; ST_SH(vec4, interm_ptr + 56); vec4 = in29 - in26; ST_SH(vec4, interm_ptr + 64); ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26); in22 = LD_SH(temp + 48); in23 = LD_SH(temp + 56); in24 = LD_SH(temp + 64); in25 = LD_SH(temp + 72); DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25); DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24); in16 = LD_SH(temp); in17 = LD_SH(temp + 8); in30 = LD_SH(temp + 112); in31 = LD_SH(temp + 120); vec4 = in17 - in22; ST_SH(vec4, interm_ptr + 40); vec4 = in30 - in25; ST_SH(vec4, interm_ptr + 48); vec4 = in31 - in24; ST_SH(vec4, interm_ptr + 72); vec4 = in16 - in23; ST_SH(vec4, interm_ptr + 80); ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31); DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29); DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28); ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25); DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24); ADD2(in27, in26, in25, in24, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec5, out); ST_SH(vec4, out + 120); SUB2(in27, in26, in25, in24, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec5, out + 112); ST_SH(vec4, out + 8); SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20); DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25); SUB2(in26, in27, in24, in25, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec4, out + 16); ST_SH(vec5, out + 104); ADD2(in26, in27, in24, in25, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec4, out + 24); ST_SH(vec5, out + 96); in20 = LD_SH(interm_ptr + 32); in21 = LD_SH(interm_ptr + 88); in27 = LD_SH(interm_ptr + 56); in26 = LD_SH(interm_ptr + 64); in16 = in20; in17 = in21; DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27); DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26); in22 = LD_SH(interm_ptr + 40); in25 = LD_SH(interm_ptr + 48); in24 = LD_SH(interm_ptr + 72); in23 = LD_SH(interm_ptr + 80); SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31); DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30); ADD2(in28, in29, in31, in30, in16, in19); DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec5, out + 32); ST_SH(vec4, out + 88); SUB2(in28, in29, in31, in30, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec5, out + 40); ST_SH(vec4, out + 80); ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19); DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31); SUB2(in29, in28, in30, in31, in16, in19); DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec5, out + 72); ST_SH(vec4, out + 48); ADD2(in29, in28, in30, in31, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4); FDCT_POSTPROC_2V_NEG_H(vec5, vec4); ST_SH(vec4, out + 56); ST_SH(vec5, out + 64); } static void fdct8x32_1d_row_transpose_store(int16_t *temp, int16_t *output) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1; /* 1st set */ in0 = LD_SH(temp); in4 = LD_SH(temp + 32); in2 = LD_SH(temp + 64); in6 = LD_SH(temp + 96); in1 = LD_SH(temp + 128); in7 = LD_SH(temp + 152); in3 = LD_SH(temp + 192); in5 = LD_SH(temp + 216); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* 2nd set */ in0_1 = LD_SH(temp + 16); in1_1 = LD_SH(temp + 232); in2_1 = LD_SH(temp + 80); in3_1 = LD_SH(temp + 168); in4_1 = LD_SH(temp + 48); in5_1 = LD_SH(temp + 176); in6_1 = LD_SH(temp + 112); in7_1 = LD_SH(temp + 240); ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 32); TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1); /* 3rd set */ in0 = LD_SH(temp + 8); in1 = LD_SH(temp + 136); in2 = LD_SH(temp + 72); in3 = LD_SH(temp + 200); in4 = LD_SH(temp + 40); in5 = LD_SH(temp + 208); in6 = LD_SH(temp + 104); in7 = LD_SH(temp + 144); ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, output + 8, 32); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output + 16, 32); /* 4th set */ in0_1 = LD_SH(temp + 24); in1_1 = LD_SH(temp + 224); in2_1 = LD_SH(temp + 88); in3_1 = LD_SH(temp + 160); in4_1 = LD_SH(temp + 56); in5_1 = LD_SH(temp + 184); in6_1 = LD_SH(temp + 120); in7_1 = LD_SH(temp + 248); TRANSPOSE8x8_SH_SH(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1); ST_SH8(in0_1, in1_1, in2_1, in3_1, in4_1, in5_1, in6_1, in7_1, output + 24, 32); } static void fdct32x8_1d_row(int16_t *temp, int16_t *temp_buf, int16_t *output) { fdct8x32_1d_row_load_butterfly(temp, temp_buf); fdct8x32_1d_row_even(temp_buf, temp_buf); fdct8x32_1d_row_odd(temp_buf + 128, temp, temp_buf + 128); fdct8x32_1d_row_transpose_store(temp_buf, output); } static void fdct32x8_1d_row_4x(int16_t *tmp_buf_big, int16_t *tmp_buf, int16_t *output) { fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf); fdct8x32_1d_row_even_4x(tmp_buf, tmp_buf_big, tmp_buf); fdct8x32_1d_row_odd(tmp_buf + 128, tmp_buf_big, tmp_buf + 128); fdct8x32_1d_row_transpose_store(tmp_buf, output); } void vpx_fdct32x32_msa(const int16_t *input, int16_t *output, int32_t src_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]); DECLARE_ALIGNED(32, int16_t, tmp_buf[256]); /* column transform */ for (i = 0; i < 4; ++i) { fdct8x32_1d_column(input + (8 * i), src_stride, tmp_buf, tmp_buf_big + (8 * i)); } /* row transform */ fdct32x8_1d_row_4x(tmp_buf_big, tmp_buf, output); /* row transform */ for (i = 1; i < 4; ++i) { fdct32x8_1d_row(tmp_buf_big + (i * 256), tmp_buf, output + (i * 256)); } } static void fdct8x32_1d_row_even_rd(int16_t *temp, int16_t *out) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, temp0, temp1; /* fdct32 even */ /* stage 2 */ LD_SH8(temp, 8, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8(temp + 64, 8, in8, in9, in10, in11, in12, in13, in14, in15); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, in8, in9, in10, in11, in12, in13, in14, in15); FDCT_POSTPROC_2V_NEG_H(vec0, vec1); FDCT_POSTPROC_2V_NEG_H(vec2, vec3); FDCT_POSTPROC_2V_NEG_H(vec4, vec5); FDCT_POSTPROC_2V_NEG_H(vec6, vec7); FDCT_POSTPROC_2V_NEG_H(in8, in9); FDCT_POSTPROC_2V_NEG_H(in10, in11); FDCT_POSTPROC_2V_NEG_H(in12, in13); FDCT_POSTPROC_2V_NEG_H(in14, in15); /* Stage 3 */ ADD4(vec0, vec7, vec1, vec6, vec2, vec5, vec3, vec4, in0, in1, in2, in3); temp0 = in0 + in3; in0 = in0 - in3; in3 = in1 + in2; in1 = in1 - in2; DOTP_CONST_PAIR(temp0, in3, cospi_16_64, cospi_16_64, temp1, temp0); ST_SH(temp0, out); ST_SH(temp1, out + 8); DOTP_CONST_PAIR(in0, in1, cospi_24_64, cospi_8_64, temp1, temp0); ST_SH(temp0, out + 16); ST_SH(temp1, out + 24); SUB4(vec3, vec4, vec2, vec5, vec1, vec6, vec0, vec7, vec4, vec5, vec6, vec7); DOTP_CONST_PAIR(vec6, vec5, cospi_16_64, cospi_16_64, vec5, vec6); ADD2(vec4, vec5, vec7, vec6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_28_64, cospi_4_64, temp1, temp0); ST_SH(temp0, out + 32); ST_SH(temp1, out + 56); SUB2(vec4, vec5, vec7, vec6, vec4, vec7); DOTP_CONST_PAIR(vec7, vec4, cospi_12_64, cospi_20_64, temp1, temp0); ST_SH(temp0, out + 40); ST_SH(temp1, out + 48); DOTP_CONST_PAIR(in13, in10, cospi_16_64, cospi_16_64, vec2, vec5); DOTP_CONST_PAIR(in12, in11, cospi_16_64, cospi_16_64, vec3, vec4); ADD4(in8, vec3, in9, vec2, in14, vec5, in15, vec4, in0, vec1, vec6, in2); DOTP_CONST_PAIR(vec6, vec1, cospi_24_64, cospi_8_64, in1, in3); ADD2(in0, in1, in2, in3, vec0, vec7); DOTP_CONST_PAIR(vec7, vec0, cospi_30_64, cospi_2_64, temp1, temp0); ST_SH(temp0, out + 64); ST_SH(temp1, out + 120); SUB2(in0, in1, in2, in3, in0, in2); DOTP_CONST_PAIR(in2, in0, cospi_14_64, cospi_18_64, temp1, temp0); ST_SH(temp0, out + 72); ST_SH(temp1, out + 112); SUB2(in9, vec2, in14, vec5, vec2, vec5); DOTP_CONST_PAIR((-vec2), vec5, cospi_24_64, cospi_8_64, in2, in1); SUB4(in8, vec3, in15, vec4, in3, in2, in0, in1, in3, in0, vec2, vec5); DOTP_CONST_PAIR(vec5, vec2, cospi_22_64, cospi_10_64, temp1, temp0); ST_SH(temp0, out + 80); ST_SH(temp1, out + 104); ADD2(in3, in2, in0, in1, vec3, vec4); DOTP_CONST_PAIR(vec4, vec3, cospi_6_64, cospi_26_64, temp0, temp1); ST_SH(temp0, out + 96); ST_SH(temp1, out + 88); } static void fdct8x32_1d_row_odd_rd(int16_t *temp, int16_t *interm_ptr, int16_t *out) { v8i16 in16, in17, in18, in19, in20, in21, in22, in23; v8i16 in24, in25, in26, in27, in28, in29, in30, in31; v8i16 vec4, vec5; in20 = LD_SH(temp + 32); in21 = LD_SH(temp + 40); in26 = LD_SH(temp + 80); in27 = LD_SH(temp + 88); DOTP_CONST_PAIR(in27, in20, cospi_16_64, cospi_16_64, in20, in27); DOTP_CONST_PAIR(in26, in21, cospi_16_64, cospi_16_64, in21, in26); FDCT_POSTPROC_2V_NEG_H(in20, in21); FDCT_POSTPROC_2V_NEG_H(in26, in27); in18 = LD_SH(temp + 16); in19 = LD_SH(temp + 24); in28 = LD_SH(temp + 96); in29 = LD_SH(temp + 104); FDCT_POSTPROC_2V_NEG_H(in18, in19); FDCT_POSTPROC_2V_NEG_H(in28, in29); vec4 = in19 - in20; ST_SH(vec4, interm_ptr + 32); vec4 = in18 - in21; ST_SH(vec4, interm_ptr + 88); vec4 = in29 - in26; ST_SH(vec4, interm_ptr + 64); vec4 = in28 - in27; ST_SH(vec4, interm_ptr + 56); ADD4(in18, in21, in19, in20, in28, in27, in29, in26, in21, in20, in27, in26); in22 = LD_SH(temp + 48); in23 = LD_SH(temp + 56); in24 = LD_SH(temp + 64); in25 = LD_SH(temp + 72); DOTP_CONST_PAIR(in25, in22, cospi_16_64, cospi_16_64, in22, in25); DOTP_CONST_PAIR(in24, in23, cospi_16_64, cospi_16_64, in23, in24); FDCT_POSTPROC_2V_NEG_H(in22, in23); FDCT_POSTPROC_2V_NEG_H(in24, in25); in16 = LD_SH(temp); in17 = LD_SH(temp + 8); in30 = LD_SH(temp + 112); in31 = LD_SH(temp + 120); FDCT_POSTPROC_2V_NEG_H(in16, in17); FDCT_POSTPROC_2V_NEG_H(in30, in31); vec4 = in17 - in22; ST_SH(vec4, interm_ptr + 40); vec4 = in30 - in25; ST_SH(vec4, interm_ptr + 48); vec4 = in31 - in24; ST_SH(vec4, interm_ptr + 72); vec4 = in16 - in23; ST_SH(vec4, interm_ptr + 80); ADD4(in16, in23, in17, in22, in30, in25, in31, in24, in16, in17, in30, in31); DOTP_CONST_PAIR(in26, in21, cospi_24_64, cospi_8_64, in18, in29); DOTP_CONST_PAIR(in27, in20, cospi_24_64, cospi_8_64, in19, in28); ADD4(in16, in19, in17, in18, in30, in29, in31, in28, in27, in22, in21, in25); DOTP_CONST_PAIR(in21, in22, cospi_28_64, cospi_4_64, in26, in24); ADD2(in27, in26, in25, in24, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_31_64, cospi_1_64, vec4, vec5); ST_SH(vec5, out); ST_SH(vec4, out + 120); SUB2(in27, in26, in25, in24, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_15_64, cospi_17_64, vec5, vec4); ST_SH(vec5, out + 112); ST_SH(vec4, out + 8); SUB4(in17, in18, in16, in19, in31, in28, in30, in29, in23, in26, in24, in20); DOTP_CONST_PAIR((-in23), in20, cospi_28_64, cospi_4_64, in27, in25); SUB2(in26, in27, in24, in25, in23, in20); DOTP_CONST_PAIR(in20, in23, cospi_23_64, cospi_9_64, vec4, vec5); ST_SH(vec4, out + 16); ST_SH(vec5, out + 104); ADD2(in26, in27, in24, in25, in22, in21); DOTP_CONST_PAIR(in21, in22, cospi_7_64, cospi_25_64, vec4, vec5); ST_SH(vec4, out + 24); ST_SH(vec5, out + 96); in20 = LD_SH(interm_ptr + 32); in21 = LD_SH(interm_ptr + 88); in27 = LD_SH(interm_ptr + 56); in26 = LD_SH(interm_ptr + 64); in16 = in20; in17 = in21; DOTP_CONST_PAIR(-in16, in27, cospi_24_64, cospi_8_64, in20, in27); DOTP_CONST_PAIR(-in17, in26, cospi_24_64, cospi_8_64, in21, in26); in22 = LD_SH(interm_ptr + 40); in25 = LD_SH(interm_ptr + 48); in24 = LD_SH(interm_ptr + 72); in23 = LD_SH(interm_ptr + 80); SUB4(in23, in20, in22, in21, in25, in26, in24, in27, in28, in17, in18, in31); DOTP_CONST_PAIR(in18, in17, cospi_12_64, cospi_20_64, in29, in30); in16 = in28 + in29; in19 = in31 + in30; DOTP_CONST_PAIR(in19, in16, cospi_27_64, cospi_5_64, vec5, vec4); ST_SH(vec5, out + 32); ST_SH(vec4, out + 88); SUB2(in28, in29, in31, in30, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_11_64, cospi_21_64, vec5, vec4); ST_SH(vec5, out + 40); ST_SH(vec4, out + 80); ADD4(in22, in21, in23, in20, in24, in27, in25, in26, in16, in29, in30, in19); DOTP_CONST_PAIR(-in16, in19, cospi_12_64, cospi_20_64, in28, in31); SUB2(in29, in28, in30, in31, in16, in19); DOTP_CONST_PAIR(in19, in16, cospi_19_64, cospi_13_64, vec5, vec4); ST_SH(vec5, out + 72); ST_SH(vec4, out + 48); ADD2(in29, in28, in30, in31, in17, in18); DOTP_CONST_PAIR(in18, in17, cospi_3_64, cospi_29_64, vec5, vec4); ST_SH(vec4, out + 56); ST_SH(vec5, out + 64); } static void fdct32x8_1d_row_rd(int16_t *tmp_buf_big, int16_t *tmp_buf, int16_t *output) { fdct8x32_1d_row_load_butterfly(tmp_buf_big, tmp_buf); fdct8x32_1d_row_even_rd(tmp_buf, tmp_buf); fdct8x32_1d_row_odd_rd((tmp_buf + 128), tmp_buf_big, (tmp_buf + 128)); fdct8x32_1d_row_transpose_store(tmp_buf, output); } void vpx_fdct32x32_rd_msa(const int16_t *input, int16_t *out, int32_t src_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, tmp_buf_big[1024]); DECLARE_ALIGNED(32, int16_t, tmp_buf[256]); /* column transform */ for (i = 0; i < 4; ++i) { fdct8x32_1d_column(input + (8 * i), src_stride, &tmp_buf[0], &tmp_buf_big[0] + (8 * i)); } /* row transform */ for (i = 0; i < 4; ++i) { fdct32x8_1d_row_rd(&tmp_buf_big[0] + (8 * i * 32), &tmp_buf[0], out + (8 * i * 32)); } } void vpx_fdct32x32_1_msa(const int16_t *input, int16_t *out, int32_t stride) { int sum, i; v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v4i32 vec_w = { 0 }; for (i = 0; i < 16; ++i) { LD_SH4(input, 8, in0, in1, in2, in3); input += stride; LD_SH4(input, 8, in4, in5, in6, in7); input += stride; ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6); ADD2(in0, in2, in4, in6, in0, in4); vec_w += __msa_hadd_s_w(in0, in0); vec_w += __msa_hadd_s_w(in4, in4); } sum = HADD_SW_S32(vec_w); out[0] = (int16_t)(sum >> 3); } libvpx-1.8.2/vpx_dsp/mips/fwd_txfm_msa.c000066400000000000000000000240231357355204000203100ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/fwd_txfm_msa.h" void vpx_fdct8x8_1_msa(const int16_t *input, tran_low_t *out, int32_t stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v4i32 vec_w; LD_SH8(input, stride, in0, in1, in2, in3, in4, in5, in6, in7); ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6); ADD2(in0, in2, in4, in6, in0, in4); vec_w = __msa_hadd_s_w(in0, in0); vec_w += __msa_hadd_s_w(in4, in4); out[0] = HADD_SW_S32(vec_w); out[1] = 0; } #if !CONFIG_VP9_HIGHBITDEPTH void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr, int32_t src_stride) { v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; v8i16 stp21, stp22, stp23, stp24, stp25, stp26, stp30; v8i16 stp31, stp32, stp33, stp34, stp35, stp36, stp37; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, cnst0, cnst1, cnst4, cnst5; v8i16 coeff = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 }; v8i16 coeff1 = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64, cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 }; v8i16 coeff2 = { -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0 }; LD_SH16(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15); SLLI_4V(in0, in1, in2, in3, 2); SLLI_4V(in4, in5, in6, in7, 2); SLLI_4V(in8, in9, in10, in11, 2); SLLI_4V(in12, in13, in14, in15, 2); ADD4(in0, in15, in1, in14, in2, in13, in3, in12, tmp0, tmp1, tmp2, tmp3); ADD4(in4, in11, in5, in10, in6, in9, in7, in8, tmp4, tmp5, tmp6, tmp7); FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); ST_SH8(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp_ptr, 32); SUB4(in0, in15, in1, in14, in2, in13, in3, in12, in15, in14, in13, in12); SUB4(in4, in11, in5, in10, in6, in9, in7, in8, in11, in10, in9, in8); tmp_ptr += 16; /* stp 1 */ ILVL_H2_SH(in10, in13, in11, in12, vec2, vec4); ILVR_H2_SH(in10, in13, in11, in12, vec3, vec5); cnst4 = __msa_splati_h(coeff, 0); stp25 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst4); cnst5 = __msa_splati_h(coeff, 1); cnst5 = __msa_ilvev_h(cnst5, cnst4); stp22 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst5); stp24 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst4); stp23 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst5); /* stp2 */ BUTTERFLY_4(in8, in9, stp22, stp23, stp30, stp31, stp32, stp33); BUTTERFLY_4(in15, in14, stp25, stp24, stp37, stp36, stp35, stp34); ILVL_H2_SH(stp36, stp31, stp35, stp32, vec2, vec4); ILVR_H2_SH(stp36, stp31, stp35, stp32, vec3, vec5); SPLATI_H2_SH(coeff, 2, 3, cnst0, cnst1); cnst0 = __msa_ilvev_h(cnst0, cnst1); stp26 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst0); cnst0 = __msa_splati_h(coeff, 4); cnst1 = __msa_ilvev_h(cnst1, cnst0); stp21 = DOT_SHIFT_RIGHT_PCK_H(vec2, vec3, cnst1); BUTTERFLY_4(stp30, stp37, stp26, stp21, in8, in15, in14, in9); ILVRL_H2_SH(in15, in8, vec1, vec0); SPLATI_H2_SH(coeff1, 0, 1, cnst0, cnst1); cnst0 = __msa_ilvev_h(cnst0, cnst1); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr); cnst0 = __msa_splati_h(coeff2, 0); cnst0 = __msa_ilvev_h(cnst1, cnst0); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr + 224); ILVRL_H2_SH(in14, in9, vec1, vec0); SPLATI_H2_SH(coeff1, 2, 3, cnst0, cnst1); cnst1 = __msa_ilvev_h(cnst1, cnst0); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1); ST_SH(in8, tmp_ptr + 128); cnst1 = __msa_splati_h(coeff2, 2); cnst0 = __msa_ilvev_h(cnst0, cnst1); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr + 96); SPLATI_H2_SH(coeff, 2, 5, cnst0, cnst1); cnst1 = __msa_ilvev_h(cnst1, cnst0); stp25 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1); cnst1 = __msa_splati_h(coeff, 3); cnst1 = __msa_ilvev_h(cnst0, cnst1); stp22 = DOT_SHIFT_RIGHT_PCK_H(vec4, vec5, cnst1); /* stp4 */ ADD2(stp34, stp25, stp33, stp22, in13, in10); ILVRL_H2_SH(in13, in10, vec1, vec0); SPLATI_H2_SH(coeff1, 4, 5, cnst0, cnst1); cnst0 = __msa_ilvev_h(cnst0, cnst1); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr + 64); cnst0 = __msa_splati_h(coeff2, 1); cnst0 = __msa_ilvev_h(cnst1, cnst0); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr + 160); SUB2(stp34, stp25, stp33, stp22, in12, in11); ILVRL_H2_SH(in12, in11, vec1, vec0); SPLATI_H2_SH(coeff1, 6, 7, cnst0, cnst1); cnst1 = __msa_ilvev_h(cnst1, cnst0); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst1); ST_SH(in8, tmp_ptr + 192); cnst1 = __msa_splati_h(coeff2, 3); cnst0 = __msa_ilvev_h(cnst0, cnst1); in8 = DOT_SHIFT_RIGHT_PCK_H(vec0, vec1, cnst0); ST_SH(in8, tmp_ptr + 32); } void fdct16x8_1d_row(int16_t *input, int16_t *output) { v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 in8, in9, in10, in11, in12, in13, in14, in15; LD_SH8(input, 16, in0, in1, in2, in3, in4, in5, in6, in7); LD_SH8((input + 8), 16, in8, in9, in10, in11, in12, in13, in14, in15); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in8, in9, in10, in11, in12, in13, in14, in15, in8, in9, in10, in11, in12, in13, in14, in15); ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3); ADD4(in4, 1, in5, 1, in6, 1, in7, 1, in4, in5, in6, in7); ADD4(in8, 1, in9, 1, in10, 1, in11, 1, in8, in9, in10, in11); ADD4(in12, 1, in13, 1, in14, 1, in15, 1, in12, in13, in14, in15); SRA_4V(in0, in1, in2, in3, 2); SRA_4V(in4, in5, in6, in7, 2); SRA_4V(in8, in9, in10, in11, 2); SRA_4V(in12, in13, in14, in15, 2); BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, in8, in9, in10, in11, in12, in13, in14, in15); ST_SH8(in8, in9, in10, in11, in12, in13, in14, in15, input, 16); FDCT8x16_EVEN(tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7); LD_SH8(input, 16, in8, in9, in10, in11, in12, in13, in14, in15); FDCT8x16_ODD(in8, in9, in10, in11, in12, in13, in14, in15, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3); ST_SH8(tmp0, in0, tmp1, in1, tmp2, in2, tmp3, in3, output, 16); TRANSPOSE8x8_SH_SH(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7); ST_SH8(tmp4, in4, tmp5, in5, tmp6, in6, tmp7, in7, output + 8, 16); } void vpx_fdct4x4_msa(const int16_t *input, int16_t *output, int32_t src_stride) { v8i16 in0, in1, in2, in3; LD_SH4(input, src_stride, in0, in1, in2, in3); /* fdct4 pre-process */ { v8i16 vec, mask; v16i8 zero = { 0 }; v16i8 one = __msa_ldi_b(1); mask = (v8i16)__msa_sldi_b(zero, one, 15); SLLI_4V(in0, in1, in2, in3, 4); vec = __msa_ceqi_h(in0, 0); vec = vec ^ 255; vec = mask & vec; in0 += vec; } VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_FDCT4(in0, in1, in2, in3, in0, in1, in2, in3); TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); ADD4(in0, 1, in1, 1, in2, 1, in3, 1, in0, in1, in2, in3); SRA_4V(in0, in1, in2, in3, 2); PCKEV_D2_SH(in1, in0, in3, in2, in0, in2); ST_SH2(in0, in2, output, 8); } void vpx_fdct8x8_msa(const int16_t *input, int16_t *output, int32_t src_stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; LD_SH8(input, src_stride, in0, in1, in2, in3, in4, in5, in6, in7); SLLI_4V(in0, in1, in2, in3, 2); SLLI_4V(in4, in5, in6, in7, 2); VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7); ST_SH8(in0, in1, in2, in3, in4, in5, in6, in7, output, 8); } void vpx_fdct16x16_msa(const int16_t *input, int16_t *output, int32_t src_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, tmp_buf[16 * 16]); /* column transform */ for (i = 0; i < 2; ++i) { fdct8x16_1d_column((input + 8 * i), (&tmp_buf[0] + 8 * i), src_stride); } /* row transform */ for (i = 0; i < 2; ++i) { fdct16x8_1d_row((&tmp_buf[0] + (128 * i)), (output + (128 * i))); } } void vpx_fdct16x16_1_msa(const int16_t *input, int16_t *out, int32_t stride) { int sum, i; v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v4i32 vec_w = { 0 }; for (i = 0; i < 4; ++i) { LD_SH2(input, 8, in0, in1); input += stride; LD_SH2(input, 8, in2, in3); input += stride; LD_SH2(input, 8, in4, in5); input += stride; LD_SH2(input, 8, in6, in7); input += stride; ADD4(in0, in1, in2, in3, in4, in5, in6, in7, in0, in2, in4, in6); ADD2(in0, in2, in4, in6, in0, in4); vec_w += __msa_hadd_s_w(in0, in0); vec_w += __msa_hadd_s_w(in4, in4); } sum = HADD_SW_S32(vec_w); out[0] = (int16_t)(sum >> 1); } #endif // !CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/mips/fwd_txfm_msa.h000066400000000000000000000613671357355204000203310ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ #define VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ #include "vpx_dsp/mips/txfm_macros_msa.h" #include "vpx_dsp/txfm_common.h" #define VP9_FDCT4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m; \ v8i16 vec0_m, vec1_m, vec2_m, vec3_m; \ v4i32 vec4_m, vec5_m, vec6_m, vec7_m; \ v8i16 coeff_m = { \ cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, -cospi_8_64, 0, 0, 0 \ }; \ \ BUTTERFLY_4(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m); \ ILVR_H2_SH(vec1_m, vec0_m, vec3_m, vec2_m, vec0_m, vec2_m); \ SPLATI_H2_SH(coeff_m, 0, 1, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ vec5_m = __msa_dotp_s_w(vec0_m, cnst1_m); \ \ SPLATI_H2_SH(coeff_m, 4, 3, cnst2_m, cnst3_m); \ cnst2_m = __msa_ilvev_h(cnst3_m, cnst2_m); \ vec7_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ \ vec4_m = __msa_dotp_s_w(vec0_m, cnst0_m); \ cnst2_m = __msa_splati_h(coeff_m, 2); \ cnst2_m = __msa_ilvev_h(cnst2_m, cnst3_m); \ vec6_m = __msa_dotp_s_w(vec2_m, cnst2_m); \ \ SRARI_W4_SW(vec4_m, vec5_m, vec6_m, vec7_m, DCT_CONST_BITS); \ PCKEV_H4_SH(vec4_m, vec4_m, vec5_m, vec5_m, vec6_m, vec6_m, vec7_m, \ vec7_m, out0, out2, out1, out3); \ } #define SRLI_AVE_S_4V_H(in0, in1, in2, in3, in4, in5, in6, in7) \ { \ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ \ SRLI_H4_SH(in0, in1, in2, in3, vec0_m, vec1_m, vec2_m, vec3_m, 15); \ SRLI_H4_SH(in4, in5, in6, in7, vec4_m, vec5_m, vec6_m, vec7_m, 15); \ AVE_SH4_SH(vec0_m, in0, vec1_m, in1, vec2_m, in2, vec3_m, in3, in0, in1, \ in2, in3); \ AVE_SH4_SH(vec4_m, in4, vec5_m, in5, vec6_m, in6, vec7_m, in7, in4, in5, \ in6, in7); \ } #define VP9_FDCT8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3, out4, out5, out6, out7) \ { \ v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m; \ v8i16 s7_m, x0_m, x1_m, x2_m, x3_m; \ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \ \ /* FDCT stage1 */ \ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \ s3_m, s4_m, s5_m, s6_m, s7_m); \ BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ x1_m = __msa_ilvev_h(x1_m, x0_m); \ out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ \ SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ x2_m = -x2_m; \ x2_m = __msa_ilvev_h(x3_m, x2_m); \ out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ \ out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ x2_m = __msa_splati_h(coeff_m, 2); \ x2_m = __msa_ilvev_h(x2_m, x3_m); \ out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ \ /* stage2 */ \ ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ \ s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ \ /* stage3 */ \ BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ \ /* stage4 */ \ ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ \ SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ x1_m = __msa_ilvev_h(x0_m, x1_m); \ out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ \ SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ x2_m = __msa_ilvev_h(x3_m, x2_m); \ out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ \ x1_m = __msa_splati_h(coeff_m, 5); \ x0_m = -x0_m; \ x0_m = __msa_ilvev_h(x1_m, x0_m); \ out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ \ x2_m = __msa_splati_h(coeff_m, 6); \ x3_m = -x3_m; \ x2_m = __msa_ilvev_h(x2_m, x3_m); \ out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ } #define FDCT8x16_EVEN(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v8i16 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ v8i16 x0_m, x1_m, x2_m, x3_m; \ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ cospi_4_64, cospi_28_64, cospi_12_64, cospi_20_64 }; \ \ /* FDCT stage1 */ \ BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, s0_m, s1_m, s2_m, \ s3_m, s4_m, s5_m, s6_m, s7_m); \ BUTTERFLY_4(s0_m, s1_m, s2_m, s3_m, x0_m, x1_m, x2_m, x3_m); \ ILVL_H2_SH(x1_m, x0_m, x3_m, x2_m, s0_m, s2_m); \ ILVR_H2_SH(x1_m, x0_m, x3_m, x2_m, s1_m, s3_m); \ SPLATI_H2_SH(coeff_m, 0, 1, x0_m, x1_m); \ x1_m = __msa_ilvev_h(x1_m, x0_m); \ out4 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ \ SPLATI_H2_SH(coeff_m, 2, 3, x2_m, x3_m); \ x2_m = -x2_m; \ x2_m = __msa_ilvev_h(x3_m, x2_m); \ out6 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ \ out0 = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ x2_m = __msa_splati_h(coeff_m, 2); \ x2_m = __msa_ilvev_h(x2_m, x3_m); \ out2 = DOT_SHIFT_RIGHT_PCK_H(s2_m, s3_m, x2_m); \ \ /* stage2 */ \ ILVRL_H2_SH(s5_m, s6_m, s1_m, s0_m); \ \ s6_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x0_m); \ s5_m = DOT_SHIFT_RIGHT_PCK_H(s0_m, s1_m, x1_m); \ \ /* stage3 */ \ BUTTERFLY_4(s4_m, s7_m, s6_m, s5_m, x0_m, x3_m, x2_m, x1_m); \ \ /* stage4 */ \ ILVL_H2_SH(x3_m, x0_m, x2_m, x1_m, s4_m, s6_m); \ ILVR_H2_SH(x3_m, x0_m, x2_m, x1_m, s5_m, s7_m); \ \ SPLATI_H2_SH(coeff_m, 4, 5, x0_m, x1_m); \ x1_m = __msa_ilvev_h(x0_m, x1_m); \ out1 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x1_m); \ \ SPLATI_H2_SH(coeff_m, 6, 7, x2_m, x3_m); \ x2_m = __msa_ilvev_h(x3_m, x2_m); \ out5 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ \ x1_m = __msa_splati_h(coeff_m, 5); \ x0_m = -x0_m; \ x0_m = __msa_ilvev_h(x1_m, x0_m); \ out7 = DOT_SHIFT_RIGHT_PCK_H(s4_m, s5_m, x0_m); \ \ x2_m = __msa_splati_h(coeff_m, 6); \ x3_m = -x3_m; \ x2_m = __msa_ilvev_h(x2_m, x3_m); \ out3 = DOT_SHIFT_RIGHT_PCK_H(s6_m, s7_m, x2_m); \ } #define FDCT8x16_ODD(input0, input1, input2, input3, input4, input5, input6, \ input7, out1, out3, out5, out7, out9, out11, out13, \ out15) \ { \ v8i16 stp21_m, stp22_m, stp23_m, stp24_m, stp25_m, stp26_m; \ v8i16 stp30_m, stp31_m, stp32_m, stp33_m, stp34_m, stp35_m; \ v8i16 stp36_m, stp37_m, vec0_m, vec1_m; \ v8i16 vec2_m, vec3_m, vec4_m, vec5_m, vec6_m; \ v8i16 cnst0_m, cnst1_m, cnst4_m, cnst5_m; \ v8i16 coeff_m = { cospi_16_64, -cospi_16_64, cospi_8_64, cospi_24_64, \ -cospi_8_64, -cospi_24_64, cospi_12_64, cospi_20_64 }; \ v8i16 coeff1_m = { cospi_2_64, cospi_30_64, cospi_14_64, cospi_18_64, \ cospi_10_64, cospi_22_64, cospi_6_64, cospi_26_64 }; \ v8i16 coeff2_m = { \ -cospi_2_64, -cospi_10_64, -cospi_18_64, -cospi_26_64, 0, 0, 0, 0 \ }; \ \ /* stp 1 */ \ ILVL_H2_SH(input2, input5, input3, input4, vec2_m, vec4_m); \ ILVR_H2_SH(input2, input5, input3, input4, vec3_m, vec5_m); \ \ cnst4_m = __msa_splati_h(coeff_m, 0); \ stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst4_m); \ \ cnst5_m = __msa_splati_h(coeff_m, 1); \ cnst5_m = __msa_ilvev_h(cnst5_m, cnst4_m); \ stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst5_m); \ stp24_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst4_m); \ stp23_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst5_m); \ \ /* stp2 */ \ BUTTERFLY_4(input0, input1, stp22_m, stp23_m, stp30_m, stp31_m, stp32_m, \ stp33_m); \ BUTTERFLY_4(input7, input6, stp25_m, stp24_m, stp37_m, stp36_m, stp35_m, \ stp34_m); \ \ ILVL_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec2_m, vec4_m); \ ILVR_H2_SH(stp36_m, stp31_m, stp35_m, stp32_m, vec3_m, vec5_m); \ \ SPLATI_H2_SH(coeff_m, 2, 3, cnst0_m, cnst1_m); \ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ stp26_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ \ cnst0_m = __msa_splati_h(coeff_m, 4); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ stp21_m = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ \ SPLATI_H2_SH(coeff_m, 5, 2, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ stp25_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ \ cnst0_m = __msa_splati_h(coeff_m, 3); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ stp22_m = DOT_SHIFT_RIGHT_PCK_H(vec4_m, vec5_m, cnst1_m); \ \ /* stp4 */ \ BUTTERFLY_4(stp30_m, stp37_m, stp26_m, stp21_m, vec6_m, vec2_m, vec4_m, \ vec5_m); \ BUTTERFLY_4(stp33_m, stp34_m, stp25_m, stp22_m, stp21_m, stp23_m, stp24_m, \ stp31_m); \ \ ILVRL_H2_SH(vec2_m, vec6_m, vec1_m, vec0_m); \ SPLATI_H2_SH(coeff1_m, 0, 1, cnst0_m, cnst1_m); \ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ \ out1 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ \ cnst0_m = __msa_splati_h(coeff2_m, 0); \ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ out15 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ \ ILVRL_H2_SH(vec4_m, vec5_m, vec1_m, vec0_m); \ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ \ out9 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ \ cnst1_m = __msa_splati_h(coeff2_m, 2); \ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ out7 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ \ ILVRL_H2_SH(stp23_m, stp21_m, vec1_m, vec0_m); \ SPLATI_H2_SH(coeff1_m, 4, 5, cnst0_m, cnst1_m); \ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ out5 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ \ cnst0_m = __msa_splati_h(coeff2_m, 1); \ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ out11 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ \ ILVRL_H2_SH(stp24_m, stp31_m, vec1_m, vec0_m); \ SPLATI_H2_SH(coeff1_m, 6, 7, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ \ out13 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ \ cnst1_m = __msa_splati_h(coeff2_m, 3); \ cnst0_m = __msa_ilvev_h(cnst0_m, cnst1_m); \ out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ } #define FDCT_POSTPROC_2V_NEG_H(vec0, vec1) \ { \ v8i16 tp0_m, tp1_m; \ v8i16 one_m = __msa_ldi_h(1); \ \ tp0_m = __msa_clti_s_h(vec0, 0); \ tp1_m = __msa_clti_s_h(vec1, 0); \ vec0 += 1; \ vec1 += 1; \ tp0_m = one_m & tp0_m; \ tp1_m = one_m & tp1_m; \ vec0 += tp0_m; \ vec1 += tp1_m; \ vec0 >>= 2; \ vec1 >>= 2; \ } #define FDCT32_POSTPROC_NEG_W(vec) \ { \ v4i32 temp_m; \ v4i32 one_m = __msa_ldi_w(1); \ \ temp_m = __msa_clti_s_w(vec, 0); \ vec += 1; \ temp_m = one_m & temp_m; \ vec += temp_m; \ vec >>= 2; \ } #define FDCT32_POSTPROC_2V_POS_H(vec0, vec1) \ { \ v8i16 tp0_m, tp1_m; \ v8i16 one = __msa_ldi_h(1); \ \ tp0_m = __msa_clei_s_h(vec0, 0); \ tp1_m = __msa_clei_s_h(vec1, 0); \ tp0_m = (v8i16)__msa_xori_b((v16u8)tp0_m, 255); \ tp1_m = (v8i16)__msa_xori_b((v16u8)tp1_m, 255); \ vec0 += 1; \ vec1 += 1; \ tp0_m = one & tp0_m; \ tp1_m = one & tp1_m; \ vec0 += tp0_m; \ vec1 += tp1_m; \ vec0 >>= 2; \ vec1 >>= 2; \ } #define DOTP_CONST_PAIR_W(reg0_left, reg1_left, reg0_right, reg1_right, \ const0, const1, out0, out1, out2, out3) \ { \ v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m, s6_m, s7_m; \ v2i64 tp0_m, tp1_m, tp2_m, tp3_m; \ v4i32 k0_m = __msa_fill_w((int32_t)const0); \ \ s0_m = __msa_fill_w((int32_t)const1); \ k0_m = __msa_ilvev_w(s0_m, k0_m); \ \ ILVRL_W2_SW(-reg1_left, reg0_left, s1_m, s0_m); \ ILVRL_W2_SW(reg0_left, reg1_left, s3_m, s2_m); \ ILVRL_W2_SW(-reg1_right, reg0_right, s5_m, s4_m); \ ILVRL_W2_SW(reg0_right, reg1_right, s7_m, s6_m); \ \ DOTP_SW2_SD(s0_m, s1_m, k0_m, k0_m, tp0_m, tp1_m); \ DOTP_SW2_SD(s4_m, s5_m, k0_m, k0_m, tp2_m, tp3_m); \ tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ out0 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ out1 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ \ DOTP_SW2_SD(s2_m, s3_m, k0_m, k0_m, tp0_m, tp1_m); \ DOTP_SW2_SD(s6_m, s7_m, k0_m, k0_m, tp2_m, tp3_m); \ tp0_m = __msa_srari_d(tp0_m, DCT_CONST_BITS); \ tp1_m = __msa_srari_d(tp1_m, DCT_CONST_BITS); \ tp2_m = __msa_srari_d(tp2_m, DCT_CONST_BITS); \ tp3_m = __msa_srari_d(tp3_m, DCT_CONST_BITS); \ out2 = __msa_pckev_w((v4i32)tp0_m, (v4i32)tp1_m); \ out3 = __msa_pckev_w((v4i32)tp2_m, (v4i32)tp3_m); \ } void fdct8x16_1d_column(const int16_t *input, int16_t *tmp_ptr, int32_t src_stride); void fdct16x8_1d_row(int16_t *input, int16_t *output); #endif // VPX_VPX_DSP_MIPS_FWD_TXFM_MSA_H_ libvpx-1.8.2/vpx_dsp/mips/idct16x16_msa.c000066400000000000000000000423071357355204000201300ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output) { v8i16 loc0, loc1, loc2, loc3; v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15; v8i16 tmp5, tmp6, tmp7; LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); input += 8; LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); TRANSPOSE8x8_SH_SH(reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); TRANSPOSE8x8_SH_SH(reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); SUB4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg0, reg12, reg4, reg8); ADD4(reg2, loc1, reg14, loc0, reg6, loc3, reg10, loc2, reg2, reg14, reg6, reg10); /* stage 2 */ DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); reg9 = reg1 - loc2; reg1 = reg1 + loc2; reg7 = reg15 - loc3; reg15 = reg15 + loc3; DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); loc1 = reg15 + reg3; reg3 = reg15 - reg3; loc2 = reg2 + loc1; reg15 = reg2 - loc1; loc1 = reg1 + reg13; reg13 = reg1 - reg13; loc0 = reg0 + loc1; loc1 = reg0 - loc1; tmp6 = loc0; tmp7 = loc1; reg0 = loc2; DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); loc0 = reg9 + reg5; reg5 = reg9 - reg5; reg2 = reg6 + loc0; reg1 = reg6 - loc0; loc0 = reg7 + reg11; reg11 = reg7 - reg11; loc1 = reg4 + loc0; loc2 = reg4 - loc0; tmp5 = loc1; DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); reg10 = loc0; reg11 = loc1; DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); reg13 = loc2; /* Transpose and store the output */ reg12 = tmp5; reg14 = tmp6; reg3 = tmp7; /* transpose block */ TRANSPOSE8x8_SH_SH(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14); ST_SH8(reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14, output, 16); /* transpose block */ TRANSPOSE8x8_SH_SH(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15); ST_SH8(reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15, (output + 8), 16); } void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 loc0, loc1, loc2, loc3; v8i16 reg0, reg2, reg4, reg6, reg8, reg10, reg12, reg14; v8i16 reg3, reg13, reg11, reg5, reg7, reg9, reg1, reg15; v8i16 tmp5, tmp6, tmp7; /* load up 8x8 */ LD_SH8(input, 16, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); input += 8 * 16; /* load bottom 8x8 */ LD_SH8(input, 16, reg8, reg9, reg10, reg11, reg12, reg13, reg14, reg15); DOTP_CONST_PAIR(reg2, reg14, cospi_28_64, cospi_4_64, reg2, reg14); DOTP_CONST_PAIR(reg10, reg6, cospi_12_64, cospi_20_64, reg10, reg6); BUTTERFLY_4(reg2, reg14, reg6, reg10, loc0, loc1, reg14, reg2); DOTP_CONST_PAIR(reg14, reg2, cospi_16_64, cospi_16_64, loc2, loc3); DOTP_CONST_PAIR(reg0, reg8, cospi_16_64, cospi_16_64, reg0, reg8); DOTP_CONST_PAIR(reg4, reg12, cospi_24_64, cospi_8_64, reg4, reg12); BUTTERFLY_4(reg8, reg0, reg4, reg12, reg2, reg6, reg10, reg14); reg0 = reg2 - loc1; reg2 = reg2 + loc1; reg12 = reg14 - loc0; reg14 = reg14 + loc0; reg4 = reg6 - loc3; reg6 = reg6 + loc3; reg8 = reg10 - loc2; reg10 = reg10 + loc2; /* stage 2 */ DOTP_CONST_PAIR(reg1, reg15, cospi_30_64, cospi_2_64, reg1, reg15); DOTP_CONST_PAIR(reg9, reg7, cospi_14_64, cospi_18_64, loc2, loc3); reg9 = reg1 - loc2; reg1 = reg1 + loc2; reg7 = reg15 - loc3; reg15 = reg15 + loc3; DOTP_CONST_PAIR(reg5, reg11, cospi_22_64, cospi_10_64, reg5, reg11); DOTP_CONST_PAIR(reg13, reg3, cospi_6_64, cospi_26_64, loc0, loc1); BUTTERFLY_4(loc0, loc1, reg11, reg5, reg13, reg3, reg11, reg5); loc1 = reg15 + reg3; reg3 = reg15 - reg3; loc2 = reg2 + loc1; reg15 = reg2 - loc1; loc1 = reg1 + reg13; reg13 = reg1 - reg13; loc0 = reg0 + loc1; loc1 = reg0 - loc1; tmp6 = loc0; tmp7 = loc1; reg0 = loc2; DOTP_CONST_PAIR(reg7, reg9, cospi_24_64, cospi_8_64, reg7, reg9); DOTP_CONST_PAIR((-reg5), (-reg11), cospi_8_64, cospi_24_64, reg5, reg11); loc0 = reg9 + reg5; reg5 = reg9 - reg5; reg2 = reg6 + loc0; reg1 = reg6 - loc0; loc0 = reg7 + reg11; reg11 = reg7 - reg11; loc1 = reg4 + loc0; loc2 = reg4 - loc0; tmp5 = loc1; DOTP_CONST_PAIR(reg5, reg11, cospi_16_64, cospi_16_64, reg5, reg11); BUTTERFLY_4(reg8, reg10, reg11, reg5, loc0, reg4, reg9, loc1); reg10 = loc0; reg11 = loc1; DOTP_CONST_PAIR(reg3, reg13, cospi_16_64, cospi_16_64, reg3, reg13); BUTTERFLY_4(reg12, reg14, reg13, reg3, reg8, reg6, reg7, reg5); reg13 = loc2; /* Transpose and store the output */ reg12 = tmp5; reg14 = tmp6; reg3 = tmp7; SRARI_H4_SH(reg0, reg2, reg4, reg6, 6); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg0, reg2, reg4, reg6); dst += (4 * dst_stride); SRARI_H4_SH(reg8, reg10, reg12, reg14, 6); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg8, reg10, reg12, reg14); dst += (4 * dst_stride); SRARI_H4_SH(reg3, reg13, reg11, reg5, 6); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg3, reg13, reg11, reg5); dst += (4 * dst_stride); SRARI_H4_SH(reg7, reg9, reg1, reg15, 6); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, reg7, reg9, reg1, reg15); } void vpx_idct16x16_256_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); int16_t *out = out_arr; /* transform rows */ for (i = 0; i < 2; ++i) { /* process 16 * 8 block */ vpx_idct16_1d_rows_msa((input + (i << 7)), (out + (i << 7))); } /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), dst_stride); } } void vpx_idct16x16_10_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { uint8_t i; DECLARE_ALIGNED(32, int16_t, out_arr[16 * 16]); int16_t *out = out_arr; /* process 16 * 8 block */ vpx_idct16_1d_rows_msa(input, out); /* short case just considers top 4 rows as valid output */ out += 4 * 16; for (i = 12; i--;) { __asm__ __volatile__( "sw $zero, 0(%[out]) \n\t" "sw $zero, 4(%[out]) \n\t" "sw $zero, 8(%[out]) \n\t" "sw $zero, 12(%[out]) \n\t" "sw $zero, 16(%[out]) \n\t" "sw $zero, 20(%[out]) \n\t" "sw $zero, 24(%[out]) \n\t" "sw $zero, 28(%[out]) \n\t" : : [out] "r"(out)); out += 16; } out = out_arr; /* transform columns */ for (i = 0; i < 2; ++i) { /* process 8 * 16 block */ vpx_idct16_1d_columns_addblk_msa((out + (i << 3)), (dst + (i << 3)), dst_stride); } } void vpx_idct16x16_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { uint8_t i; int16_t out; v8i16 vec, res0, res1, res2, res3, res4, res5, res6, res7; v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3; out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO(out, 6); vec = __msa_fill_h(out); for (i = 4; i--;) { LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); UNPCK_UB_SH(dst0, res0, res4); UNPCK_UB_SH(dst1, res1, res5); UNPCK_UB_SH(dst2, res2, res6); UNPCK_UB_SH(dst3, res3, res7); ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3); ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7); CLIP_SH4_0_255(res0, res1, res2, res3); CLIP_SH4_0_255(res4, res5, res6, res7); PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3, tmp0, tmp1, tmp2, tmp3); ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride); dst += (4 * dst_stride); } } void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output) { v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15; /* load input data */ LD_SH16(input, 8, l0, l8, l1, l9, l2, l10, l3, l11, l4, l12, l5, l13, l6, l14, l7, l15); TRANSPOSE8x8_SH_SH(l0, l1, l2, l3, l4, l5, l6, l7, l0, l1, l2, l3, l4, l5, l6, l7); TRANSPOSE8x8_SH_SH(l8, l9, l10, l11, l12, l13, l14, l15, l8, l9, l10, l11, l12, l13, l14, l15); /* ADST in horizontal */ VP9_IADST8x16_1D(l0, l1, l2, l3, l4, l5, l6, l7, l8, l9, l10, l11, l12, l13, l14, l15, r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15); l1 = -r8; l3 = -r4; l13 = -r13; l15 = -r1; TRANSPOSE8x8_SH_SH(r0, l1, r12, l3, r6, r14, r10, r2, l0, l1, l2, l3, l4, l5, l6, l7); ST_SH8(l0, l1, l2, l3, l4, l5, l6, l7, output, 16); TRANSPOSE8x8_SH_SH(r3, r11, r15, r7, r5, l13, r9, l15, l8, l9, l10, l11, l12, l13, l14, l15); ST_SH8(l8, l9, l10, l11, l12, l13, l14, l15, (output + 8), 16); } void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 v0, v2, v4, v6, k0, k1, k2, k3; v8i16 r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, r12, r13, r14, r15; v8i16 out0, out1, out2, out3, out4, out5, out6, out7; v8i16 out8, out9, out10, out11, out12, out13, out14, out15; v8i16 g0, g1, g2, g3, g4, g5, g6, g7, g8, g9, g10, g11, g12, g13, g14, g15; v8i16 h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11; v8i16 res0, res1, res2, res3, res4, res5, res6, res7; v8i16 res8, res9, res10, res11, res12, res13, res14, res15; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; v16i8 zero = { 0 }; r0 = LD_SH(input + 0 * 16); r3 = LD_SH(input + 3 * 16); r4 = LD_SH(input + 4 * 16); r7 = LD_SH(input + 7 * 16); r8 = LD_SH(input + 8 * 16); r11 = LD_SH(input + 11 * 16); r12 = LD_SH(input + 12 * 16); r15 = LD_SH(input + 15 * 16); /* stage 1 */ k0 = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); k1 = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); k2 = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); k3 = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); MADD_BF(r15, r0, r7, r8, k0, k1, k2, k3, g0, g1, g2, g3); k0 = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); k1 = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); k2 = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); k3 = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); MADD_BF(r11, r4, r3, r12, k0, k1, k2, k3, g8, g9, g10, g11); BUTTERFLY_4(g0, g2, g10, g8, h8, h9, v2, v0); k0 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); k1 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); k2 = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); MADD_BF(g1, g3, g9, g11, k0, k1, k2, k0, h0, h1, h2, h3); r1 = LD_SH(input + 1 * 16); r2 = LD_SH(input + 2 * 16); r5 = LD_SH(input + 5 * 16); r6 = LD_SH(input + 6 * 16); r9 = LD_SH(input + 9 * 16); r10 = LD_SH(input + 10 * 16); r13 = LD_SH(input + 13 * 16); r14 = LD_SH(input + 14 * 16); k0 = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); k1 = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); k2 = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); k3 = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); MADD_BF(r13, r2, r5, r10, k0, k1, k2, k3, g4, g5, g6, g7); k0 = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); k1 = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); k2 = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); k3 = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); MADD_BF(r9, r6, r1, r14, k0, k1, k2, k3, g12, g13, g14, g15); BUTTERFLY_4(g4, g6, g14, g12, h10, h11, v6, v4); BUTTERFLY_4(h8, h9, h11, h10, out0, out1, h11, h10); out1 = -out1; SRARI_H2_SH(out0, out1, 6); dst0 = LD_UB(dst + 0 * dst_stride); dst1 = LD_UB(dst + 15 * dst_stride); ILVR_B2_SH(zero, dst0, zero, dst1, res0, res1); ADD2(res0, out0, res1, out1, res0, res1); CLIP_SH2_0_255(res0, res1); PCKEV_B2_SH(res0, res0, res1, res1, res0, res1); ST8x1_UB(res0, dst); ST8x1_UB(res1, dst + 15 * dst_stride); k0 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); k1 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); k2 = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); MADD_BF(g7, g5, g15, g13, k0, k1, k2, k0, h4, h5, h6, h7); BUTTERFLY_4(h0, h2, h6, h4, out8, out9, out11, out10); out8 = -out8; SRARI_H2_SH(out8, out9, 6); dst8 = LD_UB(dst + 1 * dst_stride); dst9 = LD_UB(dst + 14 * dst_stride); ILVR_B2_SH(zero, dst8, zero, dst9, res8, res9); ADD2(res8, out8, res9, out9, res8, res9); CLIP_SH2_0_255(res8, res9); PCKEV_B2_SH(res8, res8, res9, res9, res8, res9); ST8x1_UB(res8, dst + dst_stride); ST8x1_UB(res9, dst + 14 * dst_stride); k0 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); k1 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); k2 = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); MADD_BF(v0, v2, v4, v6, k0, k1, k2, k0, out4, out6, out5, out7); out4 = -out4; SRARI_H2_SH(out4, out5, 6); dst4 = LD_UB(dst + 3 * dst_stride); dst5 = LD_UB(dst + 12 * dst_stride); ILVR_B2_SH(zero, dst4, zero, dst5, res4, res5); ADD2(res4, out4, res5, out5, res4, res5); CLIP_SH2_0_255(res4, res5); PCKEV_B2_SH(res4, res4, res5, res5, res4, res5); ST8x1_UB(res4, dst + 3 * dst_stride); ST8x1_UB(res5, dst + 12 * dst_stride); MADD_BF(h1, h3, h5, h7, k0, k1, k2, k0, out12, out14, out13, out15); out13 = -out13; SRARI_H2_SH(out12, out13, 6); dst12 = LD_UB(dst + 2 * dst_stride); dst13 = LD_UB(dst + 13 * dst_stride); ILVR_B2_SH(zero, dst12, zero, dst13, res12, res13); ADD2(res12, out12, res13, out13, res12, res13); CLIP_SH2_0_255(res12, res13); PCKEV_B2_SH(res12, res12, res13, res13, res12, res13); ST8x1_UB(res12, dst + 2 * dst_stride); ST8x1_UB(res13, dst + 13 * dst_stride); k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); k3 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); MADD_SHORT(out6, out7, k0, k3, out6, out7); SRARI_H2_SH(out6, out7, 6); dst6 = LD_UB(dst + 4 * dst_stride); dst7 = LD_UB(dst + 11 * dst_stride); ILVR_B2_SH(zero, dst6, zero, dst7, res6, res7); ADD2(res6, out6, res7, out7, res6, res7); CLIP_SH2_0_255(res6, res7); PCKEV_B2_SH(res6, res6, res7, res7, res6, res7); ST8x1_UB(res6, dst + 4 * dst_stride); ST8x1_UB(res7, dst + 11 * dst_stride); MADD_SHORT(out10, out11, k0, k3, out10, out11); SRARI_H2_SH(out10, out11, 6); dst10 = LD_UB(dst + 6 * dst_stride); dst11 = LD_UB(dst + 9 * dst_stride); ILVR_B2_SH(zero, dst10, zero, dst11, res10, res11); ADD2(res10, out10, res11, out11, res10, res11); CLIP_SH2_0_255(res10, res11); PCKEV_B2_SH(res10, res10, res11, res11, res10, res11); ST8x1_UB(res10, dst + 6 * dst_stride); ST8x1_UB(res11, dst + 9 * dst_stride); k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); k2 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); MADD_SHORT(h10, h11, k1, k2, out2, out3); SRARI_H2_SH(out2, out3, 6); dst2 = LD_UB(dst + 7 * dst_stride); dst3 = LD_UB(dst + 8 * dst_stride); ILVR_B2_SH(zero, dst2, zero, dst3, res2, res3); ADD2(res2, out2, res3, out3, res2, res3); CLIP_SH2_0_255(res2, res3); PCKEV_B2_SH(res2, res2, res3, res3, res2, res3); ST8x1_UB(res2, dst + 7 * dst_stride); ST8x1_UB(res3, dst + 8 * dst_stride); MADD_SHORT(out14, out15, k1, k2, out14, out15); SRARI_H2_SH(out14, out15, 6); dst14 = LD_UB(dst + 5 * dst_stride); dst15 = LD_UB(dst + 10 * dst_stride); ILVR_B2_SH(zero, dst14, zero, dst15, res14, res15); ADD2(res14, out14, res15, out15, res14, res15); CLIP_SH2_0_255(res14, res15); PCKEV_B2_SH(res14, res14, res15, res15, res14, res15); ST8x1_UB(res14, dst + 5 * dst_stride); ST8x1_UB(res15, dst + 10 * dst_stride); } libvpx-1.8.2/vpx_dsp/mips/idct32x32_msa.c000066400000000000000000000651241357355204000201260ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" static void idct32x8_row_transpose_store(const int16_t *input, int16_t *tmp_buf) { v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; /* 1st & 2nd 8x8 */ LD_SH8(input, 32, m0, n0, m1, n1, m2, n2, m3, n3); LD_SH8((input + 8), 32, m4, n4, m5, n5, m6, n6, m7, n7); TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3, m0, n0, m1, n1, m2, n2, m3, n3); TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7, m4, n4, m5, n5, m6, n6, m7, n7); ST_SH8(m0, n0, m1, n1, m2, n2, m3, n3, (tmp_buf), 8); ST_SH4(m4, n4, m5, n5, (tmp_buf + 8 * 8), 8); ST_SH4(m6, n6, m7, n7, (tmp_buf + 12 * 8), 8); /* 3rd & 4th 8x8 */ LD_SH8((input + 16), 32, m0, n0, m1, n1, m2, n2, m3, n3); LD_SH8((input + 24), 32, m4, n4, m5, n5, m6, n6, m7, n7); TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3, m0, n0, m1, n1, m2, n2, m3, n3); TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7, m4, n4, m5, n5, m6, n6, m7, n7); ST_SH4(m0, n0, m1, n1, (tmp_buf + 16 * 8), 8); ST_SH4(m2, n2, m3, n3, (tmp_buf + 20 * 8), 8); ST_SH4(m4, n4, m5, n5, (tmp_buf + 24 * 8), 8); ST_SH4(m6, n6, m7, n7, (tmp_buf + 28 * 8), 8); } static void idct32x8_row_even_process_store(int16_t *tmp_buf, int16_t *tmp_eve_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7; /* Even stage 1 */ LD_SH8(tmp_buf, 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0); DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); loc1 = vec3; loc0 = vec1; DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0); BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4); BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5); /* Even stage 2 */ LD_SH8((tmp_buf + 16), 32, reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); vec0 = reg0 + reg4; reg0 = reg0 - reg4; reg4 = reg6 + reg2; reg6 = reg6 - reg2; reg2 = reg1 + reg5; reg1 = reg1 - reg5; reg5 = reg7 + reg3; reg7 = reg7 - reg3; reg3 = vec0; vec1 = reg2; reg2 = reg3 + reg4; reg3 = reg3 - reg4; reg4 = reg5 - vec1; reg5 = reg5 + vec1; DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); vec0 = reg0 - reg6; reg0 = reg0 + reg6; vec1 = reg7 - reg1; reg7 = reg7 + reg1; DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */ BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0); ST_SH(loc0, (tmp_eve_buf + 15 * 8)); ST_SH(loc1, (tmp_eve_buf)); ST_SH(loc2, (tmp_eve_buf + 14 * 8)); ST_SH(loc3, (tmp_eve_buf + 8)); BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0); ST_SH(loc0, (tmp_eve_buf + 13 * 8)); ST_SH(loc1, (tmp_eve_buf + 2 * 8)); ST_SH(loc2, (tmp_eve_buf + 12 * 8)); ST_SH(loc3, (tmp_eve_buf + 3 * 8)); /* Store 8 */ BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0); ST_SH(loc0, (tmp_eve_buf + 11 * 8)); ST_SH(loc1, (tmp_eve_buf + 4 * 8)); ST_SH(loc2, (tmp_eve_buf + 10 * 8)); ST_SH(loc3, (tmp_eve_buf + 5 * 8)); BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0); ST_SH(loc0, (tmp_eve_buf + 9 * 8)); ST_SH(loc1, (tmp_eve_buf + 6 * 8)); ST_SH(loc2, (tmp_eve_buf + 8 * 8)); ST_SH(loc3, (tmp_eve_buf + 7 * 8)); } static void idct32x8_row_odd_process_store(int16_t *tmp_buf, int16_t *tmp_odd_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; /* Odd stage 1 */ reg0 = LD_SH(tmp_buf + 8); reg1 = LD_SH(tmp_buf + 7 * 8); reg2 = LD_SH(tmp_buf + 9 * 8); reg3 = LD_SH(tmp_buf + 15 * 8); reg4 = LD_SH(tmp_buf + 17 * 8); reg5 = LD_SH(tmp_buf + 23 * 8); reg6 = LD_SH(tmp_buf + 25 * 8); reg7 = LD_SH(tmp_buf + 31 * 8); DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); vec0 = reg0 + reg3; reg0 = reg0 - reg3; reg3 = reg7 + reg4; reg7 = reg7 - reg4; reg4 = reg1 + reg2; reg1 = reg1 - reg2; reg2 = reg6 + reg5; reg6 = reg6 - reg5; reg5 = vec0; /* 4 Stores */ ADD2(reg5, reg4, reg3, reg2, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8); SUB2(reg5, reg4, reg3, reg2, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf), 8); /* 4 Stores */ DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8); DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8); /* Odd stage 2 */ /* 8 loads */ reg0 = LD_SH(tmp_buf + 3 * 8); reg1 = LD_SH(tmp_buf + 5 * 8); reg2 = LD_SH(tmp_buf + 11 * 8); reg3 = LD_SH(tmp_buf + 13 * 8); reg4 = LD_SH(tmp_buf + 19 * 8); reg5 = LD_SH(tmp_buf + 21 * 8); reg6 = LD_SH(tmp_buf + 27 * 8); reg7 = LD_SH(tmp_buf + 29 * 8); DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); /* 4 Stores */ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3); DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); BUTTERFLY_4(loc3, loc2, loc0, loc1, vec1, vec0, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8); DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8); /* 4 Stores */ ADD4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec1, vec2, vec0, vec3); BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2); ST_SH(reg0, (tmp_odd_buf + 13 * 8)); ST_SH(reg1, (tmp_odd_buf + 14 * 8)); DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8); /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */ /* Load 8 & Store 8 */ LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3); LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7); ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8); SUB2(reg0, reg4, reg1, reg5, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg2, reg6, reg3, reg7, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8); /* Load 8 & Store 8 */ LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3); LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7); ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8); SUB2(reg0, reg4, reg3, reg7, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg1, reg5, reg2, reg6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } static void idct_butterfly_transpose_store(int16_t *tmp_buf, int16_t *tmp_eve_buf, int16_t *tmp_odd_buf, int16_t *dst) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; /* FINAL BUTTERFLY : Dependency on Even & Odd */ vec0 = LD_SH(tmp_odd_buf); vec1 = LD_SH(tmp_odd_buf + 9 * 8); vec2 = LD_SH(tmp_odd_buf + 14 * 8); vec3 = LD_SH(tmp_odd_buf + 6 * 8); loc0 = LD_SH(tmp_eve_buf); loc1 = LD_SH(tmp_eve_buf + 8 * 8); loc2 = LD_SH(tmp_eve_buf + 4 * 8); loc3 = LD_SH(tmp_eve_buf + 12 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6); ST_SH((loc0 - vec3), (tmp_buf + 31 * 8)); ST_SH((loc1 - vec2), (tmp_buf + 23 * 8)); ST_SH((loc2 - vec1), (tmp_buf + 27 * 8)); ST_SH((loc3 - vec0), (tmp_buf + 19 * 8)); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 4 * 8); vec1 = LD_SH(tmp_odd_buf + 13 * 8); vec2 = LD_SH(tmp_odd_buf + 10 * 8); vec3 = LD_SH(tmp_odd_buf + 3 * 8); loc0 = LD_SH(tmp_eve_buf + 2 * 8); loc1 = LD_SH(tmp_eve_buf + 10 * 8); loc2 = LD_SH(tmp_eve_buf + 6 * 8); loc3 = LD_SH(tmp_eve_buf + 14 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7); ST_SH((loc0 - vec3), (tmp_buf + 29 * 8)); ST_SH((loc1 - vec2), (tmp_buf + 21 * 8)); ST_SH((loc2 - vec1), (tmp_buf + 25 * 8)); ST_SH((loc3 - vec0), (tmp_buf + 17 * 8)); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 2 * 8); vec1 = LD_SH(tmp_odd_buf + 11 * 8); vec2 = LD_SH(tmp_odd_buf + 12 * 8); vec3 = LD_SH(tmp_odd_buf + 7 * 8); loc0 = LD_SH(tmp_eve_buf + 1 * 8); loc1 = LD_SH(tmp_eve_buf + 9 * 8); loc2 = LD_SH(tmp_eve_buf + 5 * 8); loc3 = LD_SH(tmp_eve_buf + 13 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6); ST_SH((loc0 - vec3), (tmp_buf + 30 * 8)); ST_SH((loc1 - vec2), (tmp_buf + 22 * 8)); ST_SH((loc2 - vec1), (tmp_buf + 26 * 8)); ST_SH((loc3 - vec0), (tmp_buf + 18 * 8)); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 5 * 8); vec1 = LD_SH(tmp_odd_buf + 15 * 8); vec2 = LD_SH(tmp_odd_buf + 8 * 8); vec3 = LD_SH(tmp_odd_buf + 1 * 8); loc0 = LD_SH(tmp_eve_buf + 3 * 8); loc1 = LD_SH(tmp_eve_buf + 11 * 8); loc2 = LD_SH(tmp_eve_buf + 7 * 8); loc3 = LD_SH(tmp_eve_buf + 15 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7); ST_SH((loc0 - vec3), (tmp_buf + 28 * 8)); ST_SH((loc1 - vec2), (tmp_buf + 20 * 8)); ST_SH((loc2 - vec1), (tmp_buf + 24 * 8)); ST_SH((loc3 - vec0), (tmp_buf + 16 * 8)); /* Transpose : 16 vectors */ /* 1st & 2nd 8x8 */ TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3, m0, n0, m1, n1, m2, n2, m3, n3); ST_SH4(m0, n0, m1, n1, (dst + 0), 32); ST_SH4(m2, n2, m3, n3, (dst + 4 * 32), 32); TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7, m4, n4, m5, n5, m6, n6, m7, n7); ST_SH4(m4, n4, m5, n5, (dst + 8), 32); ST_SH4(m6, n6, m7, n7, (dst + 8 + 4 * 32), 32); /* 3rd & 4th 8x8 */ LD_SH8((tmp_buf + 8 * 16), 8, m0, n0, m1, n1, m2, n2, m3, n3); LD_SH8((tmp_buf + 12 * 16), 8, m4, n4, m5, n5, m6, n6, m7, n7); TRANSPOSE8x8_SH_SH(m0, n0, m1, n1, m2, n2, m3, n3, m0, n0, m1, n1, m2, n2, m3, n3); ST_SH4(m0, n0, m1, n1, (dst + 16), 32); ST_SH4(m2, n2, m3, n3, (dst + 16 + 4 * 32), 32); TRANSPOSE8x8_SH_SH(m4, n4, m5, n5, m6, n6, m7, n7, m4, n4, m5, n5, m6, n6, m7, n7); ST_SH4(m4, n4, m5, n5, (dst + 24), 32); ST_SH4(m6, n6, m7, n7, (dst + 24 + 4 * 32), 32); } static void idct32x8_1d_rows_msa(const int16_t *input, int16_t *output) { DECLARE_ALIGNED(32, int16_t, tmp_buf[8 * 32]); DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]); DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]); idct32x8_row_transpose_store(input, &tmp_buf[0]); idct32x8_row_even_process_store(&tmp_buf[0], &tmp_eve_buf[0]); idct32x8_row_odd_process_store(&tmp_buf[0], &tmp_odd_buf[0]); idct_butterfly_transpose_store(&tmp_buf[0], &tmp_eve_buf[0], &tmp_odd_buf[0], output); } static void idct8x32_column_even_process_store(int16_t *tmp_buf, int16_t *tmp_eve_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; v8i16 stp0, stp1, stp2, stp3, stp4, stp5, stp6, stp7; /* Even stage 1 */ LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); tmp_buf += (2 * 32); DOTP_CONST_PAIR(reg1, reg7, cospi_28_64, cospi_4_64, reg1, reg7); DOTP_CONST_PAIR(reg5, reg3, cospi_12_64, cospi_20_64, reg5, reg3); BUTTERFLY_4(reg1, reg7, reg3, reg5, vec1, vec3, vec2, vec0); DOTP_CONST_PAIR(vec2, vec0, cospi_16_64, cospi_16_64, loc2, loc3); loc1 = vec3; loc0 = vec1; DOTP_CONST_PAIR(reg0, reg4, cospi_16_64, cospi_16_64, reg0, reg4); DOTP_CONST_PAIR(reg2, reg6, cospi_24_64, cospi_8_64, reg2, reg6); BUTTERFLY_4(reg4, reg0, reg2, reg6, vec1, vec3, vec2, vec0); BUTTERFLY_4(vec0, vec1, loc1, loc0, stp3, stp0, stp7, stp4); BUTTERFLY_4(vec2, vec3, loc3, loc2, stp2, stp1, stp6, stp5); /* Even stage 2 */ /* Load 8 */ LD_SH8(tmp_buf, (4 * 32), reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7); DOTP_CONST_PAIR(reg0, reg7, cospi_30_64, cospi_2_64, reg0, reg7); DOTP_CONST_PAIR(reg4, reg3, cospi_14_64, cospi_18_64, reg4, reg3); DOTP_CONST_PAIR(reg2, reg5, cospi_22_64, cospi_10_64, reg2, reg5); DOTP_CONST_PAIR(reg6, reg1, cospi_6_64, cospi_26_64, reg6, reg1); vec0 = reg0 + reg4; reg0 = reg0 - reg4; reg4 = reg6 + reg2; reg6 = reg6 - reg2; reg2 = reg1 + reg5; reg1 = reg1 - reg5; reg5 = reg7 + reg3; reg7 = reg7 - reg3; reg3 = vec0; vec1 = reg2; reg2 = reg3 + reg4; reg3 = reg3 - reg4; reg4 = reg5 - vec1; reg5 = reg5 + vec1; DOTP_CONST_PAIR(reg7, reg0, cospi_24_64, cospi_8_64, reg0, reg7); DOTP_CONST_PAIR((-reg6), reg1, cospi_24_64, cospi_8_64, reg6, reg1); vec0 = reg0 - reg6; reg0 = reg0 + reg6; vec1 = reg7 - reg1; reg7 = reg7 + reg1; DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, reg6, reg1); DOTP_CONST_PAIR(reg4, reg3, cospi_16_64, cospi_16_64, reg3, reg4); /* Even stage 3 : Dependency on Even stage 1 & Even stage 2 */ /* Store 8 */ BUTTERFLY_4(stp0, stp1, reg7, reg5, loc1, loc3, loc2, loc0); ST_SH2(loc1, loc3, tmp_eve_buf, 8); ST_SH2(loc2, loc0, (tmp_eve_buf + 14 * 8), 8); BUTTERFLY_4(stp2, stp3, reg4, reg1, loc1, loc3, loc2, loc0); ST_SH2(loc1, loc3, (tmp_eve_buf + 2 * 8), 8); ST_SH2(loc2, loc0, (tmp_eve_buf + 12 * 8), 8); /* Store 8 */ BUTTERFLY_4(stp4, stp5, reg6, reg3, loc1, loc3, loc2, loc0); ST_SH2(loc1, loc3, (tmp_eve_buf + 4 * 8), 8); ST_SH2(loc2, loc0, (tmp_eve_buf + 10 * 8), 8); BUTTERFLY_4(stp6, stp7, reg2, reg0, loc1, loc3, loc2, loc0); ST_SH2(loc1, loc3, (tmp_eve_buf + 6 * 8), 8); ST_SH2(loc2, loc0, (tmp_eve_buf + 8 * 8), 8); } static void idct8x32_column_odd_process_store(int16_t *tmp_buf, int16_t *tmp_odd_buf) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 reg0, reg1, reg2, reg3, reg4, reg5, reg6, reg7; /* Odd stage 1 */ reg0 = LD_SH(tmp_buf + 32); reg1 = LD_SH(tmp_buf + 7 * 32); reg2 = LD_SH(tmp_buf + 9 * 32); reg3 = LD_SH(tmp_buf + 15 * 32); reg4 = LD_SH(tmp_buf + 17 * 32); reg5 = LD_SH(tmp_buf + 23 * 32); reg6 = LD_SH(tmp_buf + 25 * 32); reg7 = LD_SH(tmp_buf + 31 * 32); DOTP_CONST_PAIR(reg0, reg7, cospi_31_64, cospi_1_64, reg0, reg7); DOTP_CONST_PAIR(reg4, reg3, cospi_15_64, cospi_17_64, reg3, reg4); DOTP_CONST_PAIR(reg2, reg5, cospi_23_64, cospi_9_64, reg2, reg5); DOTP_CONST_PAIR(reg6, reg1, cospi_7_64, cospi_25_64, reg1, reg6); vec0 = reg0 + reg3; reg0 = reg0 - reg3; reg3 = reg7 + reg4; reg7 = reg7 - reg4; reg4 = reg1 + reg2; reg1 = reg1 - reg2; reg2 = reg6 + reg5; reg6 = reg6 - reg5; reg5 = vec0; /* 4 Stores */ ADD2(reg5, reg4, reg3, reg2, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 4 * 8), 8); SUB2(reg5, reg4, reg3, reg2, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_24_64, cospi_8_64, vec0, vec1); ST_SH2(vec0, vec1, tmp_odd_buf, 8); /* 4 Stores */ DOTP_CONST_PAIR(reg7, reg0, cospi_28_64, cospi_4_64, reg0, reg7); DOTP_CONST_PAIR(reg6, reg1, -cospi_4_64, cospi_28_64, reg1, reg6); BUTTERFLY_4(reg0, reg7, reg6, reg1, vec0, vec1, vec2, vec3); ST_SH2(vec0, vec1, (tmp_odd_buf + 6 * 8), 8); DOTP_CONST_PAIR(vec2, vec3, cospi_24_64, cospi_8_64, vec2, vec3); ST_SH2(vec2, vec3, (tmp_odd_buf + 2 * 8), 8); /* Odd stage 2 */ /* 8 loads */ reg0 = LD_SH(tmp_buf + 3 * 32); reg1 = LD_SH(tmp_buf + 5 * 32); reg2 = LD_SH(tmp_buf + 11 * 32); reg3 = LD_SH(tmp_buf + 13 * 32); reg4 = LD_SH(tmp_buf + 19 * 32); reg5 = LD_SH(tmp_buf + 21 * 32); reg6 = LD_SH(tmp_buf + 27 * 32); reg7 = LD_SH(tmp_buf + 29 * 32); DOTP_CONST_PAIR(reg1, reg6, cospi_27_64, cospi_5_64, reg1, reg6); DOTP_CONST_PAIR(reg5, reg2, cospi_11_64, cospi_21_64, reg2, reg5); DOTP_CONST_PAIR(reg3, reg4, cospi_19_64, cospi_13_64, reg3, reg4); DOTP_CONST_PAIR(reg7, reg0, cospi_3_64, cospi_29_64, reg0, reg7); /* 4 Stores */ SUB4(reg1, reg2, reg6, reg5, reg0, reg3, reg7, reg4, vec0, vec1, vec2, vec3); DOTP_CONST_PAIR(vec1, vec0, cospi_12_64, cospi_20_64, loc0, loc1); DOTP_CONST_PAIR(vec3, vec2, -cospi_20_64, cospi_12_64, loc2, loc3); BUTTERFLY_4(loc2, loc3, loc1, loc0, vec0, vec1, vec3, vec2); ST_SH2(vec0, vec1, (tmp_odd_buf + 12 * 8), 3 * 8); DOTP_CONST_PAIR(vec3, vec2, -cospi_8_64, cospi_24_64, vec0, vec1); ST_SH2(vec0, vec1, (tmp_odd_buf + 10 * 8), 8); /* 4 Stores */ ADD4(reg0, reg3, reg1, reg2, reg5, reg6, reg4, reg7, vec0, vec1, vec2, vec3); BUTTERFLY_4(vec0, vec3, vec2, vec1, reg0, reg1, reg3, reg2); ST_SH2(reg0, reg1, (tmp_odd_buf + 13 * 8), 8); DOTP_CONST_PAIR(reg3, reg2, -cospi_8_64, cospi_24_64, reg0, reg1); ST_SH2(reg0, reg1, (tmp_odd_buf + 8 * 8), 8); /* Odd stage 3 : Dependency on Odd stage 1 & Odd stage 2 */ /* Load 8 & Store 8 */ LD_SH4(tmp_odd_buf, 8, reg0, reg1, reg2, reg3); LD_SH4((tmp_odd_buf + 8 * 8), 8, reg4, reg5, reg6, reg7); ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, tmp_odd_buf, 8); SUB2(reg0, reg4, reg1, reg5, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg2, reg6, reg3, reg7, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 8 * 8), 8); /* Load 8 & Store 8 */ LD_SH4((tmp_odd_buf + 4 * 8), 8, reg1, reg2, reg0, reg3); LD_SH4((tmp_odd_buf + 12 * 8), 8, reg4, reg5, reg6, reg7); ADD4(reg0, reg4, reg1, reg5, reg2, reg6, reg3, reg7, loc0, loc1, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 4 * 8), 8); SUB2(reg0, reg4, reg3, reg7, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc0, loc1); SUB2(reg1, reg5, reg2, reg6, vec0, vec1); DOTP_CONST_PAIR(vec1, vec0, cospi_16_64, cospi_16_64, loc2, loc3); ST_SH4(loc0, loc1, loc2, loc3, (tmp_odd_buf + 12 * 8), 8); } static void idct8x32_column_butterfly_addblk(int16_t *tmp_eve_buf, int16_t *tmp_odd_buf, uint8_t *dst, int32_t dst_stride) { v8i16 vec0, vec1, vec2, vec3, loc0, loc1, loc2, loc3; v8i16 m0, m1, m2, m3, m4, m5, m6, m7, n0, n1, n2, n3, n4, n5, n6, n7; /* FINAL BUTTERFLY : Dependency on Even & Odd */ vec0 = LD_SH(tmp_odd_buf); vec1 = LD_SH(tmp_odd_buf + 9 * 8); vec2 = LD_SH(tmp_odd_buf + 14 * 8); vec3 = LD_SH(tmp_odd_buf + 6 * 8); loc0 = LD_SH(tmp_eve_buf); loc1 = LD_SH(tmp_eve_buf + 8 * 8); loc2 = LD_SH(tmp_eve_buf + 4 * 8); loc3 = LD_SH(tmp_eve_buf + 12 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m0, m4, m2, m6); SRARI_H4_SH(m0, m2, m4, m6, 6); VP9_ADDBLK_ST8x4_UB(dst, (4 * dst_stride), m0, m2, m4, m6); SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m6, m2, m4, m0); SRARI_H4_SH(m0, m2, m4, m6, 6); VP9_ADDBLK_ST8x4_UB((dst + 19 * dst_stride), (4 * dst_stride), m0, m2, m4, m6); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 4 * 8); vec1 = LD_SH(tmp_odd_buf + 13 * 8); vec2 = LD_SH(tmp_odd_buf + 10 * 8); vec3 = LD_SH(tmp_odd_buf + 3 * 8); loc0 = LD_SH(tmp_eve_buf + 2 * 8); loc1 = LD_SH(tmp_eve_buf + 10 * 8); loc2 = LD_SH(tmp_eve_buf + 6 * 8); loc3 = LD_SH(tmp_eve_buf + 14 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m1, m5, m3, m7); SRARI_H4_SH(m1, m3, m5, m7, 6); VP9_ADDBLK_ST8x4_UB((dst + 2 * dst_stride), (4 * dst_stride), m1, m3, m5, m7); SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, m7, m3, m5, m1); SRARI_H4_SH(m1, m3, m5, m7, 6); VP9_ADDBLK_ST8x4_UB((dst + 17 * dst_stride), (4 * dst_stride), m1, m3, m5, m7); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 2 * 8); vec1 = LD_SH(tmp_odd_buf + 11 * 8); vec2 = LD_SH(tmp_odd_buf + 12 * 8); vec3 = LD_SH(tmp_odd_buf + 7 * 8); loc0 = LD_SH(tmp_eve_buf + 1 * 8); loc1 = LD_SH(tmp_eve_buf + 9 * 8); loc2 = LD_SH(tmp_eve_buf + 5 * 8); loc3 = LD_SH(tmp_eve_buf + 13 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n0, n4, n2, n6); SRARI_H4_SH(n0, n2, n4, n6, 6); VP9_ADDBLK_ST8x4_UB((dst + 1 * dst_stride), (4 * dst_stride), n0, n2, n4, n6); SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n6, n2, n4, n0); SRARI_H4_SH(n0, n2, n4, n6, 6); VP9_ADDBLK_ST8x4_UB((dst + 18 * dst_stride), (4 * dst_stride), n0, n2, n4, n6); /* Load 8 & Store 8 */ vec0 = LD_SH(tmp_odd_buf + 5 * 8); vec1 = LD_SH(tmp_odd_buf + 15 * 8); vec2 = LD_SH(tmp_odd_buf + 8 * 8); vec3 = LD_SH(tmp_odd_buf + 1 * 8); loc0 = LD_SH(tmp_eve_buf + 3 * 8); loc1 = LD_SH(tmp_eve_buf + 11 * 8); loc2 = LD_SH(tmp_eve_buf + 7 * 8); loc3 = LD_SH(tmp_eve_buf + 15 * 8); ADD4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n1, n5, n3, n7); SRARI_H4_SH(n1, n3, n5, n7, 6); VP9_ADDBLK_ST8x4_UB((dst + 3 * dst_stride), (4 * dst_stride), n1, n3, n5, n7); SUB4(loc0, vec3, loc1, vec2, loc2, vec1, loc3, vec0, n7, n3, n5, n1); SRARI_H4_SH(n1, n3, n5, n7, 6); VP9_ADDBLK_ST8x4_UB((dst + 16 * dst_stride), (4 * dst_stride), n1, n3, n5, n7); } static void idct8x32_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride) { DECLARE_ALIGNED(32, int16_t, tmp_odd_buf[16 * 8]); DECLARE_ALIGNED(32, int16_t, tmp_eve_buf[16 * 8]); idct8x32_column_even_process_store(input, &tmp_eve_buf[0]); idct8x32_column_odd_process_store(input, &tmp_odd_buf[0]); idct8x32_column_butterfly_addblk(&tmp_eve_buf[0], &tmp_odd_buf[0], dst, dst_stride); } void vpx_idct32x32_1024_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]); int16_t *out_ptr = out_arr; /* transform rows */ for (i = 0; i < 4; ++i) { /* process 32 * 8 block */ idct32x8_1d_rows_msa((input + (i << 8)), (out_ptr + (i << 8))); } /* transform columns */ for (i = 0; i < 4; ++i) { /* process 8 * 32 block */ idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } } void vpx_idct32x32_34_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; DECLARE_ALIGNED(32, int16_t, out_arr[32 * 32]); int16_t *out_ptr = out_arr; for (i = 32; i--;) { __asm__ __volatile__( "sw $zero, 0(%[out_ptr]) \n\t" "sw $zero, 4(%[out_ptr]) \n\t" "sw $zero, 8(%[out_ptr]) \n\t" "sw $zero, 12(%[out_ptr]) \n\t" "sw $zero, 16(%[out_ptr]) \n\t" "sw $zero, 20(%[out_ptr]) \n\t" "sw $zero, 24(%[out_ptr]) \n\t" "sw $zero, 28(%[out_ptr]) \n\t" "sw $zero, 32(%[out_ptr]) \n\t" "sw $zero, 36(%[out_ptr]) \n\t" "sw $zero, 40(%[out_ptr]) \n\t" "sw $zero, 44(%[out_ptr]) \n\t" "sw $zero, 48(%[out_ptr]) \n\t" "sw $zero, 52(%[out_ptr]) \n\t" "sw $zero, 56(%[out_ptr]) \n\t" "sw $zero, 60(%[out_ptr]) \n\t" : : [out_ptr] "r"(out_ptr)); out_ptr += 32; } out_ptr = out_arr; /* rows: only upper-left 8x8 has non-zero coeff */ idct32x8_1d_rows_msa(input, out_ptr); /* transform columns */ for (i = 0; i < 4; ++i) { /* process 8 * 32 block */ idct8x32_1d_columns_addblk_msa((out_ptr + (i << 3)), (dst + (i << 3)), dst_stride); } } void vpx_idct32x32_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int32_t i; int16_t out; v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3; v8i16 res0, res1, res2, res3, res4, res5, res6, res7, vec; out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO(out, 6); vec = __msa_fill_h(out); for (i = 16; i--;) { LD_UB2(dst, 16, dst0, dst1); LD_UB2(dst + dst_stride, 16, dst2, dst3); UNPCK_UB_SH(dst0, res0, res4); UNPCK_UB_SH(dst1, res1, res5); UNPCK_UB_SH(dst2, res2, res6); UNPCK_UB_SH(dst3, res3, res7); ADD4(res0, vec, res1, vec, res2, vec, res3, vec, res0, res1, res2, res3); ADD4(res4, vec, res5, vec, res6, vec, res7, vec, res4, res5, res6, res7); CLIP_SH4_0_255(res0, res1, res2, res3); CLIP_SH4_0_255(res4, res5, res6, res7); PCKEV_B4_UB(res4, res0, res5, res1, res6, res2, res7, res3, tmp0, tmp1, tmp2, tmp3); ST_UB2(tmp0, tmp1, dst, 16); dst += dst_stride; ST_UB2(tmp2, tmp3, dst, 16); dst += dst_stride; } } libvpx-1.8.2/vpx_dsp/mips/idct4x4_msa.c000066400000000000000000000057131357355204000177620ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_iwht4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3; v4i32 in0_r, in1_r, in2_r, in3_r, in4_r; /* load vector elements of 4x4 block */ LD4x4_SH(input, in0, in2, in3, in1); TRANSPOSE4x4_SH_SH(in0, in2, in3, in1, in0, in2, in3, in1); UNPCK_R_SH_SW(in0, in0_r); UNPCK_R_SH_SW(in2, in2_r); UNPCK_R_SH_SW(in3, in3_r); UNPCK_R_SH_SW(in1, in1_r); SRA_4V(in0_r, in1_r, in2_r, in3_r, UNIT_QUANT_SHIFT); in0_r += in2_r; in3_r -= in1_r; in4_r = (in0_r - in3_r) >> 1; in1_r = in4_r - in1_r; in2_r = in4_r - in2_r; in0_r -= in1_r; in3_r += in2_r; TRANSPOSE4x4_SW_SW(in0_r, in1_r, in2_r, in3_r, in0_r, in1_r, in2_r, in3_r); in0_r += in1_r; in2_r -= in3_r; in4_r = (in0_r - in2_r) >> 1; in3_r = in4_r - in3_r; in1_r = in4_r - in1_r; in0_r -= in3_r; in2_r += in1_r; PCKEV_H4_SH(in0_r, in0_r, in1_r, in1_r, in2_r, in2_r, in3_r, in3_r, in0, in1, in2, in3); ADDBLK_ST4x4_UB(in0, in3, in1, in2, dst, dst_stride); } void vpx_iwht4x4_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t a1, e1; v8i16 in1, in0 = { 0 }; a1 = input[0] >> UNIT_QUANT_SHIFT; e1 = a1 >> 1; a1 -= e1; in0 = __msa_insert_h(in0, 0, a1); in0 = __msa_insert_h(in0, 1, e1); in0 = __msa_insert_h(in0, 2, e1); in0 = __msa_insert_h(in0, 3, e1); in1 = in0 >> 1; in0 -= in1; ADDBLK_ST4x4_UB(in0, in1, in1, in1, dst, dst_stride); } void vpx_idct4x4_16_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3; /* load vector elements of 4x4 block */ LD4x4_SH(input, in0, in1, in2, in3); /* rows */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* columns */ TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); VP9_IDCT4x4(in0, in1, in2, in3, in0, in1, in2, in3); /* rounding (add 2^3, divide by 2^4) */ SRARI_H4_SH(in0, in1, in2, in3, 4); ADDBLK_ST4x4_UB(in0, in1, in2, in3, dst, dst_stride); } void vpx_idct4x4_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t out; v8i16 vec; out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO(out, 4); vec = __msa_fill_h(out); ADDBLK_ST4x4_UB(vec, vec, vec, vec, dst, dst_stride); } libvpx-1.8.2/vpx_dsp/mips/idct8x8_msa.c000066400000000000000000000107221357355204000177660ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_msa.h" void vpx_idct8x8_64_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; /* load vector elements of 8x8 block */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); /* rows transform */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* 1D idct8x8 */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* columns transform */ TRANSPOSE8x8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* 1D idct8x8 */ VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* final rounding (add 2^4, divide by 2^5) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 5); SRARI_H4_SH(in4, in5, in6, in7, 5); /* add block and store 8x8 */ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); dst += (4 * dst_stride); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); } void vpx_idct8x8_12_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { v8i16 in0, in1, in2, in3, in4, in5, in6, in7; v8i16 s0, s1, s2, s3, s4, s5, s6, s7, k0, k1, k2, k3, m0, m1, m2, m3; v4i32 tmp0, tmp1, tmp2, tmp3; v8i16 zero = { 0 }; /* load vector elements of 8x8 block */ LD_SH8(input, 8, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, in0, in1, in2, in3); /* stage1 */ ILVL_H2_SH(in3, in0, in2, in1, s0, s1); k0 = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); k1 = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); k2 = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); k3 = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3); SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS); PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1); PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3); BUTTERFLY_4(s0, s1, s3, s2, s4, s7, s6, s5); /* stage2 */ ILVR_H2_SH(in3, in1, in2, in0, s1, s0); k0 = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); k1 = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); k2 = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); k3 = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); DOTP_SH4_SW(s0, s0, s1, s1, k0, k1, k2, k3, tmp0, tmp1, tmp2, tmp3); SRARI_W4_SW(tmp0, tmp1, tmp2, tmp3, DCT_CONST_BITS); PCKEV_H2_SH(zero, tmp0, zero, tmp1, s0, s1); PCKEV_H2_SH(zero, tmp2, zero, tmp3, s2, s3); BUTTERFLY_4(s0, s1, s2, s3, m0, m1, m2, m3); /* stage3 */ s0 = __msa_ilvr_h(s6, s5); k1 = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); DOTP_SH2_SW(s0, s0, k1, k0, tmp0, tmp1); SRARI_W2_SW(tmp0, tmp1, DCT_CONST_BITS); PCKEV_H2_SH(zero, tmp0, zero, tmp1, s2, s3); /* stage4 */ BUTTERFLY_8(m0, m1, m2, m3, s4, s2, s3, s7, in0, in1, in2, in3, in4, in5, in6, in7); TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, in0, in1, in2, in3, in4, in5, in6, in7); /* final rounding (add 2^4, divide by 2^5) and shift */ SRARI_H4_SH(in0, in1, in2, in3, 5); SRARI_H4_SH(in4, in5, in6, in7, 5); /* add block and store 8x8 */ VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3); dst += (4 * dst_stride); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in4, in5, in6, in7); } void vpx_idct8x8_1_add_msa(const int16_t *input, uint8_t *dst, int32_t dst_stride) { int16_t out; int32_t val; v8i16 vec; out = ROUND_POWER_OF_TWO((input[0] * cospi_16_64), DCT_CONST_BITS); out = ROUND_POWER_OF_TWO((out * cospi_16_64), DCT_CONST_BITS); val = ROUND_POWER_OF_TWO(out, 5); vec = __msa_fill_h(val); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); dst += (4 * dst_stride); VP9_ADDBLK_ST8x4_UB(dst, dst_stride, vec, vec, vec, vec); } libvpx-1.8.2/vpx_dsp/mips/intrapred16_dspr2.c000066400000000000000000000455271357355204000211170ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 void vpx_h_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; int32_t tmp9, tmp10, tmp11, tmp12, tmp13, tmp14, tmp15, tmp16; (void)above; __asm__ __volatile__( "lb %[tmp1], (%[left]) \n\t" "lb %[tmp2], 1(%[left]) \n\t" "lb %[tmp3], 2(%[left]) \n\t" "lb %[tmp4], 3(%[left]) \n\t" "lb %[tmp5], 4(%[left]) \n\t" "lb %[tmp6], 5(%[left]) \n\t" "lb %[tmp7], 6(%[left]) \n\t" "lb %[tmp8], 7(%[left]) \n\t" "lb %[tmp9], 8(%[left]) \n\t" "lb %[tmp10], 9(%[left]) \n\t" "lb %[tmp11], 10(%[left]) \n\t" "lb %[tmp12], 11(%[left]) \n\t" "lb %[tmp13], 12(%[left]) \n\t" "lb %[tmp14], 13(%[left]) \n\t" "lb %[tmp15], 14(%[left]) \n\t" "lb %[tmp16], 15(%[left]) \n\t" "replv.qb %[tmp1], %[tmp1] \n\t" "replv.qb %[tmp2], %[tmp2] \n\t" "replv.qb %[tmp3], %[tmp3] \n\t" "replv.qb %[tmp4], %[tmp4] \n\t" "replv.qb %[tmp5], %[tmp5] \n\t" "replv.qb %[tmp6], %[tmp6] \n\t" "replv.qb %[tmp7], %[tmp7] \n\t" "replv.qb %[tmp8], %[tmp8] \n\t" "replv.qb %[tmp9], %[tmp9] \n\t" "replv.qb %[tmp10], %[tmp10] \n\t" "replv.qb %[tmp11], %[tmp11] \n\t" "replv.qb %[tmp12], %[tmp12] \n\t" "replv.qb %[tmp13], %[tmp13] \n\t" "replv.qb %[tmp14], %[tmp14] \n\t" "replv.qb %[tmp15], %[tmp15] \n\t" "replv.qb %[tmp16], %[tmp16] \n\t" "sw %[tmp1], (%[dst]) \n\t" "sw %[tmp1], 4(%[dst]) \n\t" "sw %[tmp1], 8(%[dst]) \n\t" "sw %[tmp1], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp2], (%[dst]) \n\t" "sw %[tmp2], 4(%[dst]) \n\t" "sw %[tmp2], 8(%[dst]) \n\t" "sw %[tmp2], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp3], (%[dst]) \n\t" "sw %[tmp3], 4(%[dst]) \n\t" "sw %[tmp3], 8(%[dst]) \n\t" "sw %[tmp3], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp4], (%[dst]) \n\t" "sw %[tmp4], 4(%[dst]) \n\t" "sw %[tmp4], 8(%[dst]) \n\t" "sw %[tmp4], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp5], (%[dst]) \n\t" "sw %[tmp5], 4(%[dst]) \n\t" "sw %[tmp5], 8(%[dst]) \n\t" "sw %[tmp5], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp6], (%[dst]) \n\t" "sw %[tmp6], 4(%[dst]) \n\t" "sw %[tmp6], 8(%[dst]) \n\t" "sw %[tmp6], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp7], (%[dst]) \n\t" "sw %[tmp7], 4(%[dst]) \n\t" "sw %[tmp7], 8(%[dst]) \n\t" "sw %[tmp7], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp8], (%[dst]) \n\t" "sw %[tmp8], 4(%[dst]) \n\t" "sw %[tmp8], 8(%[dst]) \n\t" "sw %[tmp8], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp9], (%[dst]) \n\t" "sw %[tmp9], 4(%[dst]) \n\t" "sw %[tmp9], 8(%[dst]) \n\t" "sw %[tmp9], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp10], (%[dst]) \n\t" "sw %[tmp10], 4(%[dst]) \n\t" "sw %[tmp10], 8(%[dst]) \n\t" "sw %[tmp10], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp11], (%[dst]) \n\t" "sw %[tmp11], 4(%[dst]) \n\t" "sw %[tmp11], 8(%[dst]) \n\t" "sw %[tmp11], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp12], (%[dst]) \n\t" "sw %[tmp12], 4(%[dst]) \n\t" "sw %[tmp12], 8(%[dst]) \n\t" "sw %[tmp12], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp13], (%[dst]) \n\t" "sw %[tmp13], 4(%[dst]) \n\t" "sw %[tmp13], 8(%[dst]) \n\t" "sw %[tmp13], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp14], (%[dst]) \n\t" "sw %[tmp14], 4(%[dst]) \n\t" "sw %[tmp14], 8(%[dst]) \n\t" "sw %[tmp14], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp15], (%[dst]) \n\t" "sw %[tmp15], 4(%[dst]) \n\t" "sw %[tmp15], 8(%[dst]) \n\t" "sw %[tmp15], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp16], (%[dst]) \n\t" "sw %[tmp16], 4(%[dst]) \n\t" "sw %[tmp16], 8(%[dst]) \n\t" "sw %[tmp16], 12(%[dst]) \n\t" : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7), [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8), [tmp9] "=&r"(tmp9), [tmp10] "=&r"(tmp10), [tmp11] "=&r"(tmp11), [tmp12] "=&r"(tmp12), [tmp13] "=&r"(tmp13), [tmp14] "=&r"(tmp14), [tmp15] "=&r"(tmp15), [tmp16] "=&r"(tmp16) : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } void vpx_dc_predictor_16x16_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t expected_dc; int32_t average; int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1; int32_t above2, left2; __asm__ __volatile__( "lw %[above1], (%[above]) \n\t" "lw %[above2], 4(%[above]) \n\t" "lw %[left1], (%[left]) \n\t" "lw %[left2], 4(%[left]) \n\t" "preceu.ph.qbl %[above_l1], %[above1] \n\t" "preceu.ph.qbr %[above_r1], %[above1] \n\t" "preceu.ph.qbl %[left_l1], %[left1] \n\t" "preceu.ph.qbr %[left_r1], %[left1] \n\t" "addu.ph %[average], %[above_r1], %[above_l1] \n\t" "addu.ph %[average], %[average], %[left_l1] \n\t" "addu.ph %[average], %[average], %[left_r1] \n\t" "preceu.ph.qbl %[above_l1], %[above2] \n\t" "preceu.ph.qbr %[above_r1], %[above2] \n\t" "preceu.ph.qbl %[left_l1], %[left2] \n\t" "preceu.ph.qbr %[left_r1], %[left2] \n\t" "addu.ph %[average], %[average], %[above_l1] \n\t" "addu.ph %[average], %[average], %[above_r1] \n\t" "addu.ph %[average], %[average], %[left_l1] \n\t" "addu.ph %[average], %[average], %[left_r1] \n\t" "lw %[above1], 8(%[above]) \n\t" "lw %[above2], 12(%[above]) \n\t" "lw %[left1], 8(%[left]) \n\t" "lw %[left2], 12(%[left]) \n\t" "preceu.ph.qbl %[above_l1], %[above1] \n\t" "preceu.ph.qbr %[above_r1], %[above1] \n\t" "preceu.ph.qbl %[left_l1], %[left1] \n\t" "preceu.ph.qbr %[left_r1], %[left1] \n\t" "addu.ph %[average], %[average], %[above_l1] \n\t" "addu.ph %[average], %[average], %[above_r1] \n\t" "addu.ph %[average], %[average], %[left_l1] \n\t" "addu.ph %[average], %[average], %[left_r1] \n\t" "preceu.ph.qbl %[above_l1], %[above2] \n\t" "preceu.ph.qbr %[above_r1], %[above2] \n\t" "preceu.ph.qbl %[left_l1], %[left2] \n\t" "preceu.ph.qbr %[left_r1], %[left2] \n\t" "addu.ph %[average], %[average], %[above_l1] \n\t" "addu.ph %[average], %[average], %[above_r1] \n\t" "addu.ph %[average], %[average], %[left_l1] \n\t" "addu.ph %[average], %[average], %[left_r1] \n\t" "addiu %[average], %[average], 16 \n\t" "srl %[tmp], %[average], 16 \n\t" "addu.ph %[average], %[tmp], %[average] \n\t" "srl %[expected_dc], %[average], 5 \n\t" "replv.qb %[expected_dc], %[expected_dc] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "sw %[expected_dc], 8(%[dst]) \n\t" "sw %[expected_dc], 12(%[dst]) \n\t" : [left1] "=&r"(left1), [above1] "=&r"(above1), [left_l1] "=&r"(left_l1), [above_l1] "=&r"(above_l1), [left_r1] "=&r"(left_r1), [above_r1] "=&r"(above_r1), [above2] "=&r"(above2), [left2] "=&r"(left2), [average] "=&r"(average), [tmp] "=&r"(tmp), [expected_dc] "=&r"(expected_dc) : [above] "r"(above), [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/intrapred4_dspr2.c000066400000000000000000000271541357355204000210300ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 void vpx_h_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t tmp1, tmp2, tmp3, tmp4; (void)above; __asm__ __volatile__( "lb %[tmp1], (%[left]) \n\t" "lb %[tmp2], 1(%[left]) \n\t" "lb %[tmp3], 2(%[left]) \n\t" "lb %[tmp4], 3(%[left]) \n\t" "replv.qb %[tmp1], %[tmp1] \n\t" "replv.qb %[tmp2], %[tmp2] \n\t" "replv.qb %[tmp3], %[tmp3] \n\t" "replv.qb %[tmp4], %[tmp4] \n\t" "sw %[tmp1], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp2], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp3], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp4], (%[dst]) \n\t" : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4) : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } void vpx_dc_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t expected_dc; int32_t average; int32_t tmp, above_c, above_l, above_r, left_c, left_r, left_l; __asm__ __volatile__( "lw %[above_c], (%[above]) \n\t" "lw %[left_c], (%[left]) \n\t" "preceu.ph.qbl %[above_l], %[above_c] \n\t" "preceu.ph.qbr %[above_r], %[above_c] \n\t" "preceu.ph.qbl %[left_l], %[left_c] \n\t" "preceu.ph.qbr %[left_r], %[left_c] \n\t" "addu.ph %[average], %[above_r], %[above_l] \n\t" "addu.ph %[average], %[average], %[left_l] \n\t" "addu.ph %[average], %[average], %[left_r] \n\t" "addiu %[average], %[average], 4 \n\t" "srl %[tmp], %[average], 16 \n\t" "addu.ph %[average], %[tmp], %[average] \n\t" "srl %[expected_dc], %[average], 3 \n\t" "replv.qb %[expected_dc], %[expected_dc] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" : [above_c] "=&r"(above_c), [above_l] "=&r"(above_l), [above_r] "=&r"(above_r), [left_c] "=&r"(left_c), [left_l] "=&r"(left_l), [left_r] "=&r"(left_r), [average] "=&r"(average), [tmp] "=&r"(tmp), [expected_dc] "=&r"(expected_dc) : [above] "r"(above), [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } void vpx_tm_predictor_4x4_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t abovel, abover; int32_t left0, left1, left2, left3; int32_t res0, res1; int32_t resl; int32_t resr; int32_t top_left; uint8_t *cm = vpx_ff_cropTbl; __asm__ __volatile__( "ulw %[resl], (%[above]) \n\t" "lbu %[left0], (%[left]) \n\t" "lbu %[left1], 1(%[left]) \n\t" "lbu %[left2], 2(%[left]) \n\t" "lbu %[left3], 3(%[left]) \n\t" "lbu %[top_left], -1(%[above]) \n\t" "preceu.ph.qbl %[abovel], %[resl] \n\t" "preceu.ph.qbr %[abover], %[resl] \n\t" "replv.ph %[left0], %[left0] \n\t" "replv.ph %[left1], %[left1] \n\t" "replv.ph %[left2], %[left2] \n\t" "replv.ph %[left3], %[left3] \n\t" "replv.ph %[top_left], %[top_left] \n\t" "addu.ph %[resl], %[abovel], %[left0] \n\t" "subu.ph %[resl], %[resl], %[top_left] \n\t" "addu.ph %[resr], %[abover], %[left0] \n\t" "subu.ph %[resr], %[resr], %[top_left] \n\t" "sll %[res0], %[resr], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sra %[res1], %[resr], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sll %[res0], %[resl], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sra %[res1], %[resl], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "addu.ph %[resl], %[abovel], %[left1] \n\t" "subu.ph %[resl], %[resl], %[top_left] \n\t" "addu.ph %[resr], %[abover], %[left1] \n\t" "subu.ph %[resr], %[resr], %[top_left] \n\t" "sb %[res0], 2(%[dst]) \n\t" "sb %[res1], 3(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sll %[res0], %[resr], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sra %[res1], %[resr], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sll %[res0], %[resl], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sra %[res1], %[resl], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "addu.ph %[resl], %[abovel], %[left2] \n\t" "subu.ph %[resl], %[resl], %[top_left] \n\t" "addu.ph %[resr], %[abover], %[left2] \n\t" "subu.ph %[resr], %[resr], %[top_left] \n\t" "sb %[res0], 2(%[dst]) \n\t" "sb %[res1], 3(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sll %[res0], %[resr], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sra %[res1], %[resr], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sll %[res0], %[resl], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sra %[res1], %[resl], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "addu.ph %[resl], %[abovel], %[left3] \n\t" "subu.ph %[resl], %[resl], %[top_left] \n\t" "addu.ph %[resr], %[abover], %[left3] \n\t" "subu.ph %[resr], %[resr], %[top_left] \n\t" "sb %[res0], 2(%[dst]) \n\t" "sb %[res1], 3(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sll %[res0], %[resr], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sra %[res1], %[resr], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sll %[res0], %[resl], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sra %[res1], %[resl], 16 \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "sb %[res0], 2(%[dst]) \n\t" "sb %[res1], 3(%[dst]) \n\t" : [abovel] "=&r"(abovel), [abover] "=&r"(abover), [left0] "=&r"(left0), [left1] "=&r"(left1), [left2] "=&r"(left2), [res0] "=&r"(res0), [res1] "=&r"(res1), [left3] "=&r"(left3), [resl] "=&r"(resl), [resr] "=&r"(resr), [top_left] "=&r"(top_left) : [above] "r"(above), [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride), [cm] "r"(cm)); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/intrapred8_dspr2.c000066400000000000000000001034461357355204000210330ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/mips/common_dspr2.h" #if HAVE_DSPR2 void vpx_h_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; (void)above; __asm__ __volatile__( "lb %[tmp1], (%[left]) \n\t" "lb %[tmp2], 1(%[left]) \n\t" "lb %[tmp3], 2(%[left]) \n\t" "lb %[tmp4], 3(%[left]) \n\t" "lb %[tmp5], 4(%[left]) \n\t" "lb %[tmp6], 5(%[left]) \n\t" "lb %[tmp7], 6(%[left]) \n\t" "lb %[tmp8], 7(%[left]) \n\t" "replv.qb %[tmp1], %[tmp1] \n\t" "replv.qb %[tmp2], %[tmp2] \n\t" "replv.qb %[tmp3], %[tmp3] \n\t" "replv.qb %[tmp4], %[tmp4] \n\t" "replv.qb %[tmp5], %[tmp5] \n\t" "replv.qb %[tmp6], %[tmp6] \n\t" "replv.qb %[tmp7], %[tmp7] \n\t" "replv.qb %[tmp8], %[tmp8] \n\t" "sw %[tmp1], (%[dst]) \n\t" "sw %[tmp1], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp2], (%[dst]) \n\t" "sw %[tmp2], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp3], (%[dst]) \n\t" "sw %[tmp3], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp4], (%[dst]) \n\t" "sw %[tmp4], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp5], (%[dst]) \n\t" "sw %[tmp5], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp6], (%[dst]) \n\t" "sw %[tmp6], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp7], (%[dst]) \n\t" "sw %[tmp7], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[tmp8], (%[dst]) \n\t" "sw %[tmp8], 4(%[dst]) \n\t" : [tmp1] "=&r"(tmp1), [tmp2] "=&r"(tmp2), [tmp3] "=&r"(tmp3), [tmp4] "=&r"(tmp4), [tmp5] "=&r"(tmp5), [tmp7] "=&r"(tmp7), [tmp6] "=&r"(tmp6), [tmp8] "=&r"(tmp8) : [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } void vpx_dc_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t expected_dc; int32_t average; int32_t tmp, above1, above_l1, above_r1, left1, left_r1, left_l1; int32_t above2, above_l2, above_r2, left2, left_r2, left_l2; __asm__ __volatile__( "lw %[above1], (%[above]) \n\t" "lw %[above2], 4(%[above]) \n\t" "lw %[left1], (%[left]) \n\t" "lw %[left2], 4(%[left]) \n\t" "preceu.ph.qbl %[above_l1], %[above1] \n\t" "preceu.ph.qbr %[above_r1], %[above1] \n\t" "preceu.ph.qbl %[left_l1], %[left1] \n\t" "preceu.ph.qbr %[left_r1], %[left1] \n\t" "preceu.ph.qbl %[above_l2], %[above2] \n\t" "preceu.ph.qbr %[above_r2], %[above2] \n\t" "preceu.ph.qbl %[left_l2], %[left2] \n\t" "preceu.ph.qbr %[left_r2], %[left2] \n\t" "addu.ph %[average], %[above_r1], %[above_l1] \n\t" "addu.ph %[average], %[average], %[left_l1] \n\t" "addu.ph %[average], %[average], %[left_r1] \n\t" "addu.ph %[average], %[average], %[above_l2] \n\t" "addu.ph %[average], %[average], %[above_r2] \n\t" "addu.ph %[average], %[average], %[left_l2] \n\t" "addu.ph %[average], %[average], %[left_r2] \n\t" "addiu %[average], %[average], 8 \n\t" "srl %[tmp], %[average], 16 \n\t" "addu.ph %[average], %[tmp], %[average] \n\t" "srl %[expected_dc], %[average], 4 \n\t" "replv.qb %[expected_dc], %[expected_dc] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" "add %[dst], %[dst], %[stride] \n\t" "sw %[expected_dc], (%[dst]) \n\t" "sw %[expected_dc], 4(%[dst]) \n\t" : [above1] "=&r"(above1), [above_l1] "=&r"(above_l1), [above_r1] "=&r"(above_r1), [left1] "=&r"(left1), [left_l1] "=&r"(left_l1), [left_r1] "=&r"(left_r1), [above2] "=&r"(above2), [above_l2] "=&r"(above_l2), [above_r2] "=&r"(above_r2), [left2] "=&r"(left2), [left_l2] "=&r"(left_l2), [left_r2] "=&r"(left_r2), [average] "=&r"(average), [tmp] "=&r"(tmp), [expected_dc] "=&r"(expected_dc) : [above] "r"(above), [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride)); } void vpx_tm_predictor_8x8_dspr2(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { int32_t abovel, abover; int32_t abovel_1, abover_1; int32_t left0; int32_t res0, res1, res2, res3; int32_t reshw; int32_t top_left; uint8_t *cm = vpx_ff_cropTbl; __asm__ __volatile__( "ulw %[reshw], (%[above]) \n\t" "ulw %[top_left], 4(%[above]) \n\t" "lbu %[left0], (%[left]) \n\t" "preceu.ph.qbl %[abovel], %[reshw] \n\t" "preceu.ph.qbr %[abover], %[reshw] \n\t" "preceu.ph.qbl %[abovel_1], %[top_left] \n\t" "preceu.ph.qbr %[abover_1], %[top_left] \n\t" "lbu %[top_left], -1(%[above]) \n\t" "replv.ph %[left0], %[left0] \n\t" "replv.ph %[top_left], %[top_left] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 1(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 2(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 3(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 4(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 5(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 6(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbu %[left0], 7(%[left]) \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" "replv.ph %[left0], %[left0] \n\t" "add %[dst], %[dst], %[stride] \n\t" "addu.ph %[reshw], %[abovel], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], (%[dst]) \n\t" "sb %[res1], 1(%[dst]) \n\t" "sb %[res2], 2(%[dst]) \n\t" "sb %[res3], 3(%[dst]) \n\t" "addu.ph %[reshw], %[abovel_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res2], %[reshw], 16 \n\t" "sra %[res2], %[res2], 16 \n\t" "sra %[res3], %[reshw], 16 \n\t" "addu.ph %[reshw], %[abover_1], %[left0] \n\t" "subu.ph %[reshw], %[reshw], %[top_left] \n\t" "sll %[res0], %[reshw], 16 \n\t" "sra %[res0], %[res0], 16 \n\t" "sra %[res1], %[reshw], 16 \n\t" "lbux %[res0], %[res0](%[cm]) \n\t" "lbux %[res1], %[res1](%[cm]) \n\t" "lbux %[res2], %[res2](%[cm]) \n\t" "lbux %[res3], %[res3](%[cm]) \n\t" "sb %[res0], 4(%[dst]) \n\t" "sb %[res1], 5(%[dst]) \n\t" "sb %[res2], 6(%[dst]) \n\t" "sb %[res3], 7(%[dst]) \n\t" : [abovel] "=&r"(abovel), [abover] "=&r"(abover), [abovel_1] "=&r"(abovel_1), [abover_1] "=&r"(abover_1), [left0] "=&r"(left0), [res2] "=&r"(res2), [res3] "=&r"(res3), [res0] "=&r"(res0), [res1] "=&r"(res1), [reshw] "=&r"(reshw), [top_left] "=&r"(top_left) : [above] "r"(above), [left] "r"(left), [dst] "r"(dst), [stride] "r"(stride), [cm] "r"(cm)); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/intrapred_msa.c000066400000000000000000000575201357355204000204720ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" #define IPRED_SUBS_UH2_UH(in0, in1, out0, out1) \ { \ out0 = __msa_subs_u_h(out0, in0); \ out1 = __msa_subs_u_h(out1, in1); \ } static void intra_predict_vert_4x4_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t src_data; src_data = LW(src); SW4(src_data, src_data, src_data, src_data, dst, dst_stride); } static void intra_predict_vert_8x8_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; uint32_t src_data1, src_data2; src_data1 = LW(src); src_data2 = LW(src + 4); for (row = 8; row--;) { SW(src_data1, dst); SW(src_data2, (dst + 4)); dst += dst_stride; } } static void intra_predict_vert_16x16_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; v16u8 src0; src0 = LD_UB(src); for (row = 16; row--;) { ST_UB(src0, dst); dst += dst_stride; } } static void intra_predict_vert_32x32_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; v16u8 src1, src2; src1 = LD_UB(src); src2 = LD_UB(src + 16); for (row = 32; row--;) { ST_UB2(src1, src2, dst, 16); dst += dst_stride; } } static void intra_predict_horiz_4x4_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t out0, out1, out2, out3; out0 = src[0] * 0x01010101; out1 = src[1] * 0x01010101; out2 = src[2] * 0x01010101; out3 = src[3] * 0x01010101; SW4(out0, out1, out2, out3, dst, dst_stride); } static void intra_predict_horiz_8x8_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint64_t out0, out1, out2, out3, out4, out5, out6, out7; out0 = src[0] * 0x0101010101010101ull; out1 = src[1] * 0x0101010101010101ull; out2 = src[2] * 0x0101010101010101ull; out3 = src[3] * 0x0101010101010101ull; out4 = src[4] * 0x0101010101010101ull; out5 = src[5] * 0x0101010101010101ull; out6 = src[6] * 0x0101010101010101ull; out7 = src[7] * 0x0101010101010101ull; SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); SD4(out4, out5, out6, out7, dst, dst_stride); } static void intra_predict_horiz_16x16_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; uint8_t inp0, inp1, inp2, inp3; v16u8 src0, src1, src2, src3; for (row = 4; row--;) { inp0 = src[0]; inp1 = src[1]; inp2 = src[2]; inp3 = src[3]; src += 4; src0 = (v16u8)__msa_fill_b(inp0); src1 = (v16u8)__msa_fill_b(inp1); src2 = (v16u8)__msa_fill_b(inp2); src3 = (v16u8)__msa_fill_b(inp3); ST_UB4(src0, src1, src2, src3, dst, dst_stride); dst += (4 * dst_stride); } } static void intra_predict_horiz_32x32_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; uint8_t inp0, inp1, inp2, inp3; v16u8 src0, src1, src2, src3; for (row = 8; row--;) { inp0 = src[0]; inp1 = src[1]; inp2 = src[2]; inp3 = src[3]; src += 4; src0 = (v16u8)__msa_fill_b(inp0); src1 = (v16u8)__msa_fill_b(inp1); src2 = (v16u8)__msa_fill_b(inp2); src3 = (v16u8)__msa_fill_b(inp3); ST_UB2(src0, src0, dst, 16); dst += dst_stride; ST_UB2(src1, src1, dst, 16); dst += dst_stride; ST_UB2(src2, src2, dst, 16); dst += dst_stride; ST_UB2(src3, src3, dst, 16); dst += dst_stride; } } static void intra_predict_dc_4x4_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint32_t val0, val1; v16i8 store, src = { 0 }; v8u16 sum_h; v4u32 sum_w; v2u64 sum_d; val0 = LW(src_top); val1 = LW(src_left); INSERT_W2_SB(val0, val1, src); sum_h = __msa_hadd_u_h((v16u8)src, (v16u8)src); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); store = __msa_splati_b((v16i8)sum_w, 0); val0 = __msa_copy_u_w((v4i32)store, 0); SW4(val0, val0, val0, val0, dst, dst_stride); } static void intra_predict_dc_tl_4x4_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t val0; v16i8 store, data = { 0 }; v8u16 sum_h; v4u32 sum_w; val0 = LW(src); data = (v16i8)__msa_insert_w((v4i32)data, 0, val0); sum_h = __msa_hadd_u_h((v16u8)data, (v16u8)data); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_w = (v4u32)__msa_srari_w((v4i32)sum_w, 2); store = __msa_splati_b((v16i8)sum_w, 0); val0 = __msa_copy_u_w((v4i32)store, 0); SW4(val0, val0, val0, val0, dst, dst_stride); } static void intra_predict_128dc_4x4_msa(uint8_t *dst, int32_t dst_stride) { uint32_t out; const v16i8 store = __msa_ldi_b(128); out = __msa_copy_u_w((v4i32)store, 0); SW4(out, out, out, out, dst, dst_stride); } static void intra_predict_dc_8x8_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint64_t val0, val1; v16i8 store; v16u8 src = { 0 }; v8u16 sum_h; v4u32 sum_w; v2u64 sum_d; val0 = LD(src_top); val1 = LD(src_left); INSERT_D2_UB(val0, val1, src); sum_h = __msa_hadd_u_h(src, src); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); store = __msa_splati_b((v16i8)sum_w, 0); val0 = __msa_copy_u_d((v2i64)store, 0); SD4(val0, val0, val0, val0, dst, dst_stride); dst += (4 * dst_stride); SD4(val0, val0, val0, val0, dst, dst_stride); } static void intra_predict_dc_tl_8x8_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint64_t val0; v16i8 store; v16u8 data = { 0 }; v8u16 sum_h; v4u32 sum_w; v2u64 sum_d; val0 = LD(src); data = (v16u8)__msa_insert_d((v2i64)data, 0, val0); sum_h = __msa_hadd_u_h(data, data); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 3); store = __msa_splati_b((v16i8)sum_w, 0); val0 = __msa_copy_u_d((v2i64)store, 0); SD4(val0, val0, val0, val0, dst, dst_stride); dst += (4 * dst_stride); SD4(val0, val0, val0, val0, dst, dst_stride); } static void intra_predict_128dc_8x8_msa(uint8_t *dst, int32_t dst_stride) { uint64_t out; const v16i8 store = __msa_ldi_b(128); out = __msa_copy_u_d((v2i64)store, 0); SD4(out, out, out, out, dst, dst_stride); dst += (4 * dst_stride); SD4(out, out, out, out, dst, dst_stride); } static void intra_predict_dc_16x16_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { v16u8 top, left, out; v8u16 sum_h, sum_top, sum_left; v4u32 sum_w; v2u64 sum_d; top = LD_UB(src_top); left = LD_UB(src_left); HADD_UB2_UH(top, left, sum_top, sum_left); sum_h = sum_top + sum_left; sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); } static void intra_predict_dc_tl_16x16_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { v16u8 data, out; v8u16 sum_h; v4u32 sum_w; v2u64 sum_d; data = LD_UB(src); sum_h = __msa_hadd_u_h(data, data); sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 4); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); } static void intra_predict_128dc_16x16_msa(uint8_t *dst, int32_t dst_stride) { const v16u8 out = (v16u8)__msa_ldi_b(128); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); dst += (8 * dst_stride); ST_UB8(out, out, out, out, out, out, out, out, dst, dst_stride); } static void intra_predict_dc_32x32_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint32_t row; v16u8 top0, top1, left0, left1, out; v8u16 sum_h, sum_top0, sum_top1, sum_left0, sum_left1; v4u32 sum_w; v2u64 sum_d; LD_UB2(src_top, 16, top0, top1); LD_UB2(src_left, 16, left0, left1); HADD_UB2_UH(top0, top1, sum_top0, sum_top1); HADD_UB2_UH(left0, left1, sum_left0, sum_left1); sum_h = sum_top0 + sum_top1; sum_h += sum_left0 + sum_left1; sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 6); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); for (row = 16; row--;) { ST_UB2(out, out, dst, 16); dst += dst_stride; ST_UB2(out, out, dst, 16); dst += dst_stride; } } static void intra_predict_dc_tl_32x32_msa(const uint8_t *src, uint8_t *dst, int32_t dst_stride) { uint32_t row; v16u8 data0, data1, out; v8u16 sum_h, sum_data0, sum_data1; v4u32 sum_w; v2u64 sum_d; LD_UB2(src, 16, data0, data1); HADD_UB2_UH(data0, data1, sum_data0, sum_data1); sum_h = sum_data0 + sum_data1; sum_w = __msa_hadd_u_w(sum_h, sum_h); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_pckev_w((v4i32)sum_d, (v4i32)sum_d); sum_d = __msa_hadd_u_d(sum_w, sum_w); sum_w = (v4u32)__msa_srari_w((v4i32)sum_d, 5); out = (v16u8)__msa_splati_b((v16i8)sum_w, 0); for (row = 16; row--;) { ST_UB2(out, out, dst, 16); dst += dst_stride; ST_UB2(out, out, dst, 16); dst += dst_stride; } } static void intra_predict_128dc_32x32_msa(uint8_t *dst, int32_t dst_stride) { uint32_t row; const v16u8 out = (v16u8)__msa_ldi_b(128); for (row = 16; row--;) { ST_UB2(out, out, dst, 16); dst += dst_stride; ST_UB2(out, out, dst, 16); dst += dst_stride; } } static void intra_predict_tm_4x4_msa(const uint8_t *src_top_ptr, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint32_t val; uint8_t top_left = src_top_ptr[-1]; v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; v16u8 src0, src1, src2, src3; v8u16 src_top_left, vec0, vec1, vec2, vec3; src_top_left = (v8u16)__msa_fill_h(top_left); val = LW(src_top_ptr); src_top = (v16i8)__msa_insert_w((v4i32)src_top, 0, val); src_left0 = __msa_fill_b(src_left[0]); src_left1 = __msa_fill_b(src_left[1]); src_left2 = __msa_fill_b(src_left[2]); src_left3 = __msa_fill_b(src_left[3]); ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, src_left3, src_top, src0, src1, src2, src3); HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); ST4x4_UB(tmp0, tmp1, 0, 2, 0, 2, dst, dst_stride); } static void intra_predict_tm_8x8_msa(const uint8_t *src_top_ptr, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint64_t val; uint8_t top_left = src_top_ptr[-1]; uint32_t loop_cnt; v16i8 src_left0, src_left1, src_left2, src_left3, tmp0, tmp1, src_top = { 0 }; v8u16 src_top_left, vec0, vec1, vec2, vec3; v16u8 src0, src1, src2, src3; val = LD(src_top_ptr); src_top = (v16i8)__msa_insert_d((v2i64)src_top, 0, val); src_top_left = (v8u16)__msa_fill_h(top_left); for (loop_cnt = 2; loop_cnt--;) { src_left0 = __msa_fill_b(src_left[0]); src_left1 = __msa_fill_b(src_left[1]); src_left2 = __msa_fill_b(src_left[2]); src_left3 = __msa_fill_b(src_left[3]); src_left += 4; ILVR_B4_UB(src_left0, src_top, src_left1, src_top, src_left2, src_top, src_left3, src_top, src0, src1, src2, src3); HADD_UB4_UH(src0, src1, src2, src3, vec0, vec1, vec2, vec3); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec0, vec1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, vec2, vec3); SAT_UH4_UH(vec0, vec1, vec2, vec3, 7); PCKEV_B2_SB(vec1, vec0, vec3, vec2, tmp0, tmp1); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); } } static void intra_predict_tm_16x16_msa(const uint8_t *src_top_ptr, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint8_t top_left = src_top_ptr[-1]; uint32_t loop_cnt; v16i8 src_top, src_left0, src_left1, src_left2, src_left3; v8u16 src_top_left, res_r, res_l; src_top = LD_SB(src_top_ptr); src_top_left = (v8u16)__msa_fill_h(top_left); for (loop_cnt = 4; loop_cnt--;) { src_left0 = __msa_fill_b(src_left[0]); src_left1 = __msa_fill_b(src_left[1]); src_left2 = __msa_fill_b(src_left[2]); src_left3 = __msa_fill_b(src_left[3]); src_left += 4; ILVRL_B2_UH(src_left0, src_top, res_r, res_l); HADD_UB2_UH(res_r, res_l, res_r, res_l); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); SAT_UH2_UH(res_r, res_l, 7); PCKEV_ST_SB(res_r, res_l, dst); dst += dst_stride; ILVRL_B2_UH(src_left1, src_top, res_r, res_l); HADD_UB2_UH(res_r, res_l, res_r, res_l); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); SAT_UH2_UH(res_r, res_l, 7); PCKEV_ST_SB(res_r, res_l, dst); dst += dst_stride; ILVRL_B2_UH(src_left2, src_top, res_r, res_l); HADD_UB2_UH(res_r, res_l, res_r, res_l); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); SAT_UH2_UH(res_r, res_l, 7); PCKEV_ST_SB(res_r, res_l, dst); dst += dst_stride; ILVRL_B2_UH(src_left3, src_top, res_r, res_l); HADD_UB2_UH(res_r, res_l, res_r, res_l); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r, res_l); SAT_UH2_UH(res_r, res_l, 7); PCKEV_ST_SB(res_r, res_l, dst); dst += dst_stride; } } static void intra_predict_tm_32x32_msa(const uint8_t *src_top, const uint8_t *src_left, uint8_t *dst, int32_t dst_stride) { uint8_t top_left = src_top[-1]; uint32_t loop_cnt; v16i8 src_top0, src_top1, src_left0, src_left1, src_left2, src_left3; v8u16 src_top_left, res_r0, res_r1, res_l0, res_l1; LD_SB2(src_top, 16, src_top0, src_top1); src_top_left = (v8u16)__msa_fill_h(top_left); for (loop_cnt = 8; loop_cnt--;) { src_left0 = __msa_fill_b(src_left[0]); src_left1 = __msa_fill_b(src_left[1]); src_left2 = __msa_fill_b(src_left[2]); src_left3 = __msa_fill_b(src_left[3]); src_left += 4; ILVR_B2_UH(src_left0, src_top0, src_left0, src_top1, res_r0, res_r1); ILVL_B2_UH(src_left0, src_top0, src_left0, src_top1, res_l0, res_l1); HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); PCKEV_ST_SB(res_r0, res_l0, dst); PCKEV_ST_SB(res_r1, res_l1, dst + 16); dst += dst_stride; ILVR_B2_UH(src_left1, src_top0, src_left1, src_top1, res_r0, res_r1); ILVL_B2_UH(src_left1, src_top0, src_left1, src_top1, res_l0, res_l1); HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); PCKEV_ST_SB(res_r0, res_l0, dst); PCKEV_ST_SB(res_r1, res_l1, dst + 16); dst += dst_stride; ILVR_B2_UH(src_left2, src_top0, src_left2, src_top1, res_r0, res_r1); ILVL_B2_UH(src_left2, src_top0, src_left2, src_top1, res_l0, res_l1); HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); PCKEV_ST_SB(res_r0, res_l0, dst); PCKEV_ST_SB(res_r1, res_l1, dst + 16); dst += dst_stride; ILVR_B2_UH(src_left3, src_top0, src_left3, src_top1, res_r0, res_r1); ILVL_B2_UH(src_left3, src_top0, src_left3, src_top1, res_l0, res_l1); HADD_UB4_UH(res_r0, res_l0, res_r1, res_l1, res_r0, res_l0, res_r1, res_l1); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r0, res_l0); IPRED_SUBS_UH2_UH(src_top_left, src_top_left, res_r1, res_l1); SAT_UH4_UH(res_r0, res_l0, res_r1, res_l1, 7); PCKEV_ST_SB(res_r0, res_l0, dst); PCKEV_ST_SB(res_r1, res_l1, dst + 16); dst += dst_stride; } } void vpx_v_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_vert_4x4_msa(above, dst, y_stride); } void vpx_v_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_vert_8x8_msa(above, dst, y_stride); } void vpx_v_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_vert_16x16_msa(above, dst, y_stride); } void vpx_v_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_vert_32x32_msa(above, dst, y_stride); } void vpx_h_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_horiz_4x4_msa(left, dst, y_stride); } void vpx_h_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_horiz_8x8_msa(left, dst, y_stride); } void vpx_h_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_horiz_16x16_msa(left, dst, y_stride); } void vpx_h_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_horiz_32x32_msa(left, dst, y_stride); } void vpx_dc_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_dc_4x4_msa(above, left, dst, y_stride); } void vpx_dc_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_dc_8x8_msa(above, left, dst, y_stride); } void vpx_dc_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_dc_16x16_msa(above, left, dst, y_stride); } void vpx_dc_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_dc_32x32_msa(above, left, dst, y_stride); } void vpx_dc_top_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_dc_tl_4x4_msa(above, dst, y_stride); } void vpx_dc_top_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_dc_tl_8x8_msa(above, dst, y_stride); } void vpx_dc_top_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_dc_tl_16x16_msa(above, dst, y_stride); } void vpx_dc_top_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)left; intra_predict_dc_tl_32x32_msa(above, dst, y_stride); } void vpx_dc_left_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_dc_tl_4x4_msa(left, dst, y_stride); } void vpx_dc_left_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_dc_tl_8x8_msa(left, dst, y_stride); } void vpx_dc_left_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_dc_tl_16x16_msa(left, dst, y_stride); } void vpx_dc_left_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; intra_predict_dc_tl_32x32_msa(left, dst, y_stride); } void vpx_dc_128_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; (void)left; intra_predict_128dc_4x4_msa(dst, y_stride); } void vpx_dc_128_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; (void)left; intra_predict_128dc_8x8_msa(dst, y_stride); } void vpx_dc_128_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; (void)left; intra_predict_128dc_16x16_msa(dst, y_stride); } void vpx_dc_128_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { (void)above; (void)left; intra_predict_128dc_32x32_msa(dst, y_stride); } void vpx_tm_predictor_4x4_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_tm_4x4_msa(above, left, dst, y_stride); } void vpx_tm_predictor_8x8_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_tm_8x8_msa(above, left, dst, y_stride); } void vpx_tm_predictor_16x16_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_tm_16x16_msa(above, left, dst, y_stride); } void vpx_tm_predictor_32x32_msa(uint8_t *dst, ptrdiff_t y_stride, const uint8_t *above, const uint8_t *left) { intra_predict_tm_32x32_msa(above, left, dst, y_stride); } libvpx-1.8.2/vpx_dsp/mips/inv_txfm_dspr2.h000066400000000000000000000077201357355204000206100ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ #define VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_dsp/mips/common_dspr2.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 #define DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input) \ ({ \ int32_t tmp, out; \ int dct_cost_rounding = DCT_CONST_ROUNDING; \ int in = input; \ \ __asm__ __volatile__(/* out = dct_const_round_shift(dc * cospi_16_64); */ \ "mtlo %[dct_cost_rounding], $ac1 " \ " \n\t" \ "mthi $zero, $ac1 " \ " \n\t" \ "madd $ac1, %[in], " \ "%[cospi_16_64] \n\t" \ "extp %[tmp], $ac1, " \ "31 \n\t" \ \ /* out = dct_const_round_shift(out * cospi_16_64); */ \ "mtlo %[dct_cost_rounding], $ac2 " \ " \n\t" \ "mthi $zero, $ac2 " \ " \n\t" \ "madd $ac2, %[tmp], " \ "%[cospi_16_64] \n\t" \ "extp %[out], $ac2, " \ "31 \n\t" \ \ : [tmp] "=&r"(tmp), [out] "=r"(out) \ : [in] "r"(in), \ [dct_cost_rounding] "r"(dct_cost_rounding), \ [cospi_16_64] "r"(cospi_16_64)); \ out; \ }) void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output); void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst4_dspr2(const int16_t *input, int16_t *output); void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows); void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst8_dspr2(const int16_t *input, int16_t *output); void idct16_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows); void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride); void iadst16_dspr2(const int16_t *input, int16_t *output); #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_INV_TXFM_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/inv_txfm_msa.h000066400000000000000000000714221357355204000203360ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ #define VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #include "vpx_dsp/mips/txfm_macros_msa.h" #include "vpx_dsp/txfm_common.h" #define VP9_ADST8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3, out4, out5, out6, out7) \ { \ v8i16 cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst4_m; \ v8i16 vec0_m, vec1_m, vec2_m, vec3_m, s0_m, s1_m; \ v8i16 coeff0_m = { cospi_2_64, cospi_6_64, cospi_10_64, cospi_14_64, \ cospi_18_64, cospi_22_64, cospi_26_64, cospi_30_64 }; \ v8i16 coeff1_m = { cospi_8_64, -cospi_8_64, cospi_16_64, -cospi_16_64, \ cospi_24_64, -cospi_24_64, 0, 0 }; \ \ SPLATI_H2_SH(coeff0_m, 0, 7, cnst0_m, cnst1_m); \ cnst2_m = -cnst0_m; \ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ SPLATI_H2_SH(coeff0_m, 4, 3, cnst2_m, cnst3_m); \ cnst4_m = -cnst2_m; \ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ \ ILVRL_H2_SH(in0, in7, vec1_m, vec0_m); \ ILVRL_H2_SH(in4, in3, vec3_m, vec2_m); \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ cnst2_m, cnst3_m, in7, in0, in4, in3); \ \ SPLATI_H2_SH(coeff0_m, 2, 5, cnst0_m, cnst1_m); \ cnst2_m = -cnst0_m; \ ILVEV_H2_SH(cnst0_m, cnst1_m, cnst1_m, cnst2_m, cnst0_m, cnst1_m); \ SPLATI_H2_SH(coeff0_m, 6, 1, cnst2_m, cnst3_m); \ cnst4_m = -cnst2_m; \ ILVEV_H2_SH(cnst2_m, cnst3_m, cnst3_m, cnst4_m, cnst2_m, cnst3_m); \ \ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst1_m, \ cnst2_m, cnst3_m, in5, in2, in6, in1); \ BUTTERFLY_4(in7, in0, in2, in5, s1_m, s0_m, in2, in5); \ out7 = -s0_m; \ out0 = s1_m; \ \ SPLATI_H4_SH(coeff1_m, 0, 4, 1, 5, cnst0_m, cnst1_m, cnst2_m, cnst3_m); \ \ ILVEV_H2_SH(cnst3_m, cnst0_m, cnst1_m, cnst2_m, cnst3_m, cnst2_m); \ cnst0_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ cnst1_m = cnst0_m; \ \ ILVRL_H2_SH(in4, in3, vec1_m, vec0_m); \ ILVRL_H2_SH(in6, in1, vec3_m, vec2_m); \ DOT_ADD_SUB_SRARI_PCK(vec0_m, vec1_m, vec2_m, vec3_m, cnst0_m, cnst2_m, \ cnst3_m, cnst1_m, out1, out6, s0_m, s1_m); \ \ SPLATI_H2_SH(coeff1_m, 2, 3, cnst0_m, cnst1_m); \ cnst1_m = __msa_ilvev_h(cnst1_m, cnst0_m); \ \ ILVRL_H2_SH(in2, in5, vec1_m, vec0_m); \ ILVRL_H2_SH(s0_m, s1_m, vec3_m, vec2_m); \ out3 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst0_m); \ out4 = DOT_SHIFT_RIGHT_PCK_H(vec0_m, vec1_m, cnst1_m); \ out2 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst0_m); \ out5 = DOT_SHIFT_RIGHT_PCK_H(vec2_m, vec3_m, cnst1_m); \ \ out1 = -out1; \ out3 = -out3; \ out5 = -out5; \ } #define VP9_SET_COSPI_PAIR(c0_h, c1_h) \ ({ \ v8i16 out0_m, r0_m, r1_m; \ \ r0_m = __msa_fill_h(c0_h); \ r1_m = __msa_fill_h(c1_h); \ out0_m = __msa_ilvev_h(r1_m, r0_m); \ \ out0_m; \ }) #define VP9_ADDBLK_ST8x4_UB(dst, dst_stride, in0, in1, in2, in3) \ { \ uint8_t *dst_m = (uint8_t *)(dst); \ v16u8 dst0_m, dst1_m, dst2_m, dst3_m; \ v16i8 tmp0_m, tmp1_m; \ v16i8 zero_m = { 0 }; \ v8i16 res0_m, res1_m, res2_m, res3_m; \ \ LD_UB4(dst_m, dst_stride, dst0_m, dst1_m, dst2_m, dst3_m); \ ILVR_B4_SH(zero_m, dst0_m, zero_m, dst1_m, zero_m, dst2_m, zero_m, dst3_m, \ res0_m, res1_m, res2_m, res3_m); \ ADD4(res0_m, in0, res1_m, in1, res2_m, in2, res3_m, in3, res0_m, res1_m, \ res2_m, res3_m); \ CLIP_SH4_0_255(res0_m, res1_m, res2_m, res3_m); \ PCKEV_B2_SB(res1_m, res0_m, res3_m, res2_m, tmp0_m, tmp1_m); \ ST8x4_UB(tmp0_m, tmp1_m, dst_m, dst_stride); \ } #define VP9_IDCT4x4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 c0_m, c1_m, c2_m, c3_m; \ v8i16 step0_m, step1_m; \ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ \ c0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \ c1_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \ step0_m = __msa_ilvr_h(in2, in0); \ DOTP_SH2_SW(step0_m, step0_m, c0_m, c1_m, tmp0_m, tmp1_m); \ \ c2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \ c3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \ step1_m = __msa_ilvr_h(in3, in1); \ DOTP_SH2_SW(step1_m, step1_m, c2_m, c3_m, tmp2_m, tmp3_m); \ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ \ PCKEV_H2_SW(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tmp0_m, tmp2_m); \ SLDI_B2_0_SW(tmp0_m, tmp2_m, tmp1_m, tmp3_m, 8); \ BUTTERFLY_4((v8i16)tmp0_m, (v8i16)tmp1_m, (v8i16)tmp2_m, (v8i16)tmp3_m, \ out0, out1, out2, out3); \ } #define VP9_IADST4x4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 res0_m, res1_m, c0_m, c1_m; \ v8i16 k1_m, k2_m, k3_m, k4_m; \ v8i16 zero_m = { 0 }; \ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v4i32 int0_m, int1_m, int2_m, int3_m; \ v8i16 mask_m = { sinpi_1_9, sinpi_2_9, sinpi_3_9, sinpi_4_9, \ -sinpi_1_9, -sinpi_2_9, -sinpi_3_9, -sinpi_4_9 }; \ \ SPLATI_H4_SH(mask_m, 3, 0, 1, 2, c0_m, c1_m, k1_m, k2_m); \ ILVEV_H2_SH(c0_m, c1_m, k1_m, k2_m, c0_m, c1_m); \ ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp2_m, tmp1_m); \ int0_m = tmp2_m + tmp1_m; \ \ SPLATI_H2_SH(mask_m, 4, 7, k4_m, k3_m); \ ILVEV_H2_SH(k4_m, k1_m, k3_m, k2_m, c0_m, c1_m); \ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \ int1_m = tmp0_m + tmp1_m; \ \ c0_m = __msa_splati_h(mask_m, 6); \ ILVL_H2_SH(k2_m, c0_m, zero_m, k2_m, c0_m, c1_m); \ ILVR_H2_SH(in0, in2, in1, in3, res0_m, res1_m); \ DOTP_SH2_SW(res0_m, res1_m, c0_m, c1_m, tmp0_m, tmp1_m); \ int2_m = tmp0_m + tmp1_m; \ \ c0_m = __msa_splati_h(mask_m, 6); \ c0_m = __msa_ilvev_h(c0_m, k1_m); \ \ res0_m = __msa_ilvr_h((in1), (in3)); \ tmp0_m = __msa_dotp_s_w(res0_m, c0_m); \ int3_m = tmp2_m + tmp0_m; \ \ res0_m = __msa_ilvr_h((in2), (in3)); \ c1_m = __msa_ilvev_h(k4_m, k3_m); \ \ tmp2_m = __msa_dotp_s_w(res0_m, c1_m); \ res1_m = __msa_ilvr_h((in0), (in2)); \ c1_m = __msa_ilvev_h(k1_m, zero_m); \ \ tmp3_m = __msa_dotp_s_w(res1_m, c1_m); \ int3_m += tmp2_m; \ int3_m += tmp3_m; \ \ SRARI_W4_SW(int0_m, int1_m, int2_m, int3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(int0_m, int0_m, int1_m, int1_m, out0, out1); \ PCKEV_H2_SH(int2_m, int2_m, int3_m, int3_m, out2, out3); \ } #define VP9_SET_CONST_PAIR(mask_h, idx1_h, idx2_h) \ ({ \ v8i16 c0_m, c1_m; \ \ SPLATI_H2_SH(mask_h, idx1_h, idx2_h, c0_m, c1_m); \ c0_m = __msa_ilvev_h(c1_m, c0_m); \ \ c0_m; \ }) /* multiply and add macro */ #define VP9_MADD(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1, \ out2, out3) \ { \ v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \ v4i32 tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd; \ \ ILVRL_H2_SH(inp1, inp0, madd_s1_m, madd_s0_m); \ ILVRL_H2_SH(inp3, inp2, madd_s3_m, madd_s2_m); \ DOTP_SH4_SW(madd_s1_m, madd_s0_m, madd_s1_m, madd_s0_m, cst0, cst0, cst1, \ cst1, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out0, out1); \ DOTP_SH4_SW(madd_s3_m, madd_s2_m, madd_s3_m, madd_s2_m, cst2, cst2, cst3, \ cst3, tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd); \ SRARI_W4_SW(tmp0_madd, tmp1_madd, tmp2_madd, tmp3_madd, DCT_CONST_BITS); \ PCKEV_H2_SH(tmp1_madd, tmp0_madd, tmp3_madd, tmp2_madd, out2, out3); \ } /* idct 8x8 macro */ #define VP9_IDCT8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v8i16 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m; \ v8i16 k0_m, k1_m, k2_m, k3_m, res0_m, res1_m, res2_m, res3_m; \ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v8i16 mask_m = { cospi_28_64, cospi_4_64, cospi_20_64, cospi_12_64, \ cospi_16_64, -cospi_4_64, -cospi_20_64, -cospi_16_64 }; \ \ k0_m = VP9_SET_CONST_PAIR(mask_m, 0, 5); \ k1_m = VP9_SET_CONST_PAIR(mask_m, 1, 0); \ k2_m = VP9_SET_CONST_PAIR(mask_m, 6, 3); \ k3_m = VP9_SET_CONST_PAIR(mask_m, 3, 2); \ VP9_MADD(in1, in7, in3, in5, k0_m, k1_m, k2_m, k3_m, in1, in7, in3, in5); \ SUB2(in1, in3, in7, in5, res0_m, res1_m); \ k0_m = VP9_SET_CONST_PAIR(mask_m, 4, 7); \ k1_m = __msa_splati_h(mask_m, 4); \ \ ILVRL_H2_SH(res0_m, res1_m, res2_m, res3_m); \ DOTP_SH4_SW(res2_m, res3_m, res2_m, res3_m, k0_m, k0_m, k1_m, k1_m, \ tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ SRARI_W4_SW(tmp0_m, tmp1_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ tp4_m = in1 + in3; \ PCKEV_H2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, tp5_m, tp6_m); \ tp7_m = in7 + in5; \ k2_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \ VP9_MADD(in0, in4, in2, in6, k1_m, k0_m, k2_m, k3_m, in0, in4, in2, in6); \ BUTTERFLY_4(in0, in4, in2, in6, tp0_m, tp1_m, tp2_m, tp3_m); \ BUTTERFLY_8(tp0_m, tp1_m, tp2_m, tp3_m, tp4_m, tp5_m, tp6_m, tp7_m, out0, \ out1, out2, out3, out4, out5, out6, out7); \ } #define VP9_IADST8x8_1D(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v4i32 r0_m, r1_m, r2_m, r3_m, r4_m, r5_m, r6_m, r7_m; \ v4i32 m0_m, m1_m, m2_m, m3_m, t0_m, t1_m; \ v8i16 res0_m, res1_m, res2_m, res3_m, k0_m, k1_m, in_s0, in_s1; \ v8i16 mask1_m = { cospi_2_64, cospi_30_64, -cospi_2_64, cospi_10_64, \ cospi_22_64, -cospi_10_64, cospi_18_64, cospi_14_64 }; \ v8i16 mask2_m = { cospi_14_64, -cospi_18_64, cospi_26_64, cospi_6_64, \ -cospi_26_64, cospi_8_64, cospi_24_64, -cospi_8_64 }; \ v8i16 mask3_m = { \ -cospi_24_64, cospi_8_64, cospi_16_64, -cospi_16_64, 0, 0, 0, 0 \ }; \ \ k0_m = VP9_SET_CONST_PAIR(mask1_m, 0, 1); \ k1_m = VP9_SET_CONST_PAIR(mask1_m, 1, 2); \ ILVRL_H2_SH(in1, in0, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \ r1_m, r2_m, r3_m); \ k0_m = VP9_SET_CONST_PAIR(mask1_m, 6, 7); \ k1_m = VP9_SET_CONST_PAIR(mask2_m, 0, 1); \ ILVRL_H2_SH(in5, in4, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m, \ r5_m, r6_m, r7_m); \ ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res0_m, res1_m); \ SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, t0_m, t1_m); \ k0_m = VP9_SET_CONST_PAIR(mask1_m, 3, 4); \ k1_m = VP9_SET_CONST_PAIR(mask1_m, 4, 5); \ ILVRL_H2_SH(in3, in2, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \ r1_m, r2_m, r3_m); \ k0_m = VP9_SET_CONST_PAIR(mask2_m, 2, 3); \ k1_m = VP9_SET_CONST_PAIR(mask2_m, 3, 4); \ ILVRL_H2_SH(in7, in6, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r4_m, \ r5_m, r6_m, r7_m); \ ADD4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, res2_m, res3_m); \ SUB4(r0_m, r4_m, r1_m, r5_m, r2_m, r6_m, r3_m, r7_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SW(m1_m, m0_m, m3_m, m2_m, r2_m, r3_m); \ ILVRL_H2_SW(r3_m, r2_m, m2_m, m3_m); \ BUTTERFLY_4(res0_m, res1_m, res3_m, res2_m, out0, in7, in4, in3); \ k0_m = VP9_SET_CONST_PAIR(mask2_m, 5, 6); \ k1_m = VP9_SET_CONST_PAIR(mask2_m, 6, 7); \ ILVRL_H2_SH(t1_m, t0_m, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, r0_m, \ r1_m, r2_m, r3_m); \ k1_m = VP9_SET_CONST_PAIR(mask3_m, 0, 1); \ DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, r4_m, r5_m, \ r6_m, r7_m); \ ADD4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in1, out6); \ SUB4(r0_m, r6_m, r1_m, r7_m, r2_m, r4_m, r3_m, r5_m, m0_m, m1_m, m2_m, \ m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in2, in5); \ k0_m = VP9_SET_CONST_PAIR(mask3_m, 2, 2); \ k1_m = VP9_SET_CONST_PAIR(mask3_m, 2, 3); \ ILVRL_H2_SH(in4, in3, in_s1, in_s0); \ DOTP_SH4_SW(in_s1, in_s0, in_s1, in_s0, k0_m, k0_m, k1_m, k1_m, m0_m, \ m1_m, m2_m, m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, in3, out4); \ ILVRL_H2_SW(in5, in2, m2_m, m3_m); \ DOTP_SH4_SW(m2_m, m3_m, m2_m, m3_m, k0_m, k0_m, k1_m, k1_m, m0_m, m1_m, \ m2_m, m3_m); \ SRARI_W4_SW(m0_m, m1_m, m2_m, m3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m1_m, m0_m, m3_m, m2_m, out2, in5); \ \ out1 = -in1; \ out3 = -in3; \ out5 = -in5; \ out7 = -in7; \ } #define VP9_IADST8x16_1D(r0, r1, r2, r3, r4, r5, r6, r7, r8, r9, r10, r11, \ r12, r13, r14, r15, out0, out1, out2, out3, out4, \ out5, out6, out7, out8, out9, out10, out11, out12, \ out13, out14, out15) \ { \ v8i16 g0_m, g1_m, g2_m, g3_m, g4_m, g5_m, g6_m, g7_m; \ v8i16 g8_m, g9_m, g10_m, g11_m, g12_m, g13_m, g14_m, g15_m; \ v8i16 h0_m, h1_m, h2_m, h3_m, h4_m, h5_m, h6_m, h7_m; \ v8i16 h8_m, h9_m, h10_m, h11_m; \ v8i16 k0_m, k1_m, k2_m, k3_m; \ \ /* stage 1 */ \ k0_m = VP9_SET_COSPI_PAIR(cospi_1_64, cospi_31_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_31_64, -cospi_1_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_17_64, cospi_15_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_15_64, -cospi_17_64); \ MADD_BF(r15, r0, r7, r8, k0_m, k1_m, k2_m, k3_m, g0_m, g1_m, g2_m, g3_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_5_64, cospi_27_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_27_64, -cospi_5_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_21_64, cospi_11_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_11_64, -cospi_21_64); \ MADD_BF(r13, r2, r5, r10, k0_m, k1_m, k2_m, k3_m, g4_m, g5_m, g6_m, g7_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_9_64, cospi_23_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_23_64, -cospi_9_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_25_64, cospi_7_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_7_64, -cospi_25_64); \ MADD_BF(r11, r4, r3, r12, k0_m, k1_m, k2_m, k3_m, g8_m, g9_m, g10_m, \ g11_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_13_64, cospi_19_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_19_64, -cospi_13_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_29_64, cospi_3_64); \ k3_m = VP9_SET_COSPI_PAIR(cospi_3_64, -cospi_29_64); \ MADD_BF(r9, r6, r1, r14, k0_m, k1_m, k2_m, k3_m, g12_m, g13_m, g14_m, \ g15_m); \ \ /* stage 2 */ \ k0_m = VP9_SET_COSPI_PAIR(cospi_4_64, cospi_28_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_28_64, -cospi_4_64); \ k2_m = VP9_SET_COSPI_PAIR(-cospi_28_64, cospi_4_64); \ MADD_BF(g1_m, g3_m, g9_m, g11_m, k0_m, k1_m, k2_m, k0_m, h0_m, h1_m, h2_m, \ h3_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_12_64, cospi_20_64); \ k1_m = VP9_SET_COSPI_PAIR(-cospi_20_64, cospi_12_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_20_64, -cospi_12_64); \ MADD_BF(g7_m, g5_m, g15_m, g13_m, k0_m, k1_m, k2_m, k0_m, h4_m, h5_m, \ h6_m, h7_m); \ BUTTERFLY_4(h0_m, h2_m, h6_m, h4_m, out8, out9, out11, out10); \ BUTTERFLY_8(g0_m, g2_m, g4_m, g6_m, g14_m, g12_m, g10_m, g8_m, h8_m, h9_m, \ h10_m, h11_m, h6_m, h4_m, h2_m, h0_m); \ \ /* stage 3 */ \ BUTTERFLY_4(h8_m, h9_m, h11_m, h10_m, out0, out1, h11_m, h10_m); \ k0_m = VP9_SET_COSPI_PAIR(cospi_8_64, cospi_24_64); \ k1_m = VP9_SET_COSPI_PAIR(cospi_24_64, -cospi_8_64); \ k2_m = VP9_SET_COSPI_PAIR(-cospi_24_64, cospi_8_64); \ MADD_BF(h0_m, h2_m, h4_m, h6_m, k0_m, k1_m, k2_m, k0_m, out4, out6, out5, \ out7); \ MADD_BF(h1_m, h3_m, h5_m, h7_m, k0_m, k1_m, k2_m, k0_m, out12, out14, \ out13, out15); \ \ /* stage 4 */ \ k0_m = VP9_SET_COSPI_PAIR(cospi_16_64, cospi_16_64); \ k1_m = VP9_SET_COSPI_PAIR(-cospi_16_64, -cospi_16_64); \ k2_m = VP9_SET_COSPI_PAIR(cospi_16_64, -cospi_16_64); \ k3_m = VP9_SET_COSPI_PAIR(-cospi_16_64, cospi_16_64); \ MADD_SHORT(h10_m, h11_m, k1_m, k2_m, out2, out3); \ MADD_SHORT(out6, out7, k0_m, k3_m, out6, out7); \ MADD_SHORT(out10, out11, k0_m, k3_m, out10, out11); \ MADD_SHORT(out14, out15, k1_m, k2_m, out14, out15); \ } void vpx_idct16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride); void vpx_idct16_1d_rows_msa(const int16_t *input, int16_t *output); void vpx_iadst16_1d_columns_addblk_msa(int16_t *input, uint8_t *dst, int32_t dst_stride); void vpx_iadst16_1d_rows_msa(const int16_t *input, int16_t *output); #endif // VPX_VPX_DSP_MIPS_INV_TXFM_MSA_H_ libvpx-1.8.2/vpx_dsp/mips/itrans16_dspr2.c000066400000000000000000002003541357355204000204160ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 void idct16_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) { int i; int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; int step1_10, step1_11, step1_12, step1_13; int step2_0, step2_1, step2_2, step2_3; int step2_8, step2_9, step2_10, step2_11; int step2_12, step2_13, step2_14, step2_15; int load1, load2, load3, load4, load5, load6, load7, load8; int result1, result2, result3, result4; const int const_2_power_13 = 8192; for (i = no_rows; i--;) { /* prefetch row */ prefetch_load((const uint8_t *)(input + 16)); __asm__ __volatile__( "lh %[load1], 0(%[input]) \n\t" "lh %[load2], 16(%[input]) \n\t" "lh %[load3], 8(%[input]) \n\t" "lh %[load4], 24(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "add %[result1], %[load1], %[load2] \n\t" "sub %[result2], %[load1], %[load2] \n\t" "madd $ac1, %[result1], %[cospi_16_64] \n\t" "madd $ac2, %[result2], %[cospi_16_64] \n\t" "extp %[step2_0], $ac1, 31 \n\t" "extp %[step2_1], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac3, %[load3], %[cospi_24_64] \n\t" "msub $ac3, %[load4], %[cospi_8_64] \n\t" "extp %[step2_2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac1, %[load3], %[cospi_8_64] \n\t" "madd $ac1, %[load4], %[cospi_24_64] \n\t" "extp %[step2_3], $ac1, 31 \n\t" "add %[step1_0], %[step2_0], %[step2_3] \n\t" "add %[step1_1], %[step2_1], %[step2_2] \n\t" "sub %[step1_2], %[step2_1], %[step2_2] \n\t" "sub %[step1_3], %[step2_0], %[step2_3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [step2_0] "=&r"(step2_0), [step2_1] "=&r"(step2_1), [step2_2] "=&r"(step2_2), [step2_3] "=&r"(step2_3), [step1_0] "=r"(step1_0), [step1_1] "=r"(step1_1), [step1_2] "=r"(step1_2), [step1_3] "=r"(step1_3) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "lh %[load5], 2(%[input]) \n\t" "lh %[load6], 30(%[input]) \n\t" "lh %[load7], 18(%[input]) \n\t" "lh %[load8], 14(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load5], %[cospi_30_64] \n\t" "msub $ac1, %[load6], %[cospi_2_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load7], %[cospi_14_64] \n\t" "msub $ac3, %[load8], %[cospi_18_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load7], %[cospi_18_64] \n\t" "madd $ac1, %[load8], %[cospi_14_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load5], %[cospi_2_64] \n\t" "madd $ac2, %[load6], %[cospi_30_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "sub %[load5], %[result1], %[result2] \n\t" "sub %[load6], %[result4], %[result3] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load6], %[cospi_24_64] \n\t" "msub $ac1, %[load5], %[cospi_8_64] \n\t" "madd $ac3, %[load5], %[cospi_24_64] \n\t" "madd $ac3, %[load6], %[cospi_8_64] \n\t" "extp %[step2_9], $ac1, 31 \n\t" "extp %[step2_14], $ac3, 31 \n\t" "add %[step2_8], %[result1], %[result2] \n\t" "add %[step2_15], %[result4], %[result3] \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step2_8] "=r"(step2_8), [step2_15] "=r"(step2_15), [step2_9] "=r"(step2_9), [step2_14] "=r"(step2_14) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64), [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "lh %[load1], 10(%[input]) \n\t" "lh %[load2], 22(%[input]) \n\t" "lh %[load3], 26(%[input]) \n\t" "lh %[load4], 6(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_22_64] \n\t" "msub $ac1, %[load2], %[cospi_10_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load3], %[cospi_6_64] \n\t" "msub $ac3, %[load4], %[cospi_26_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load1], %[cospi_10_64] \n\t" "madd $ac1, %[load2], %[cospi_22_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load3], %[cospi_26_64] \n\t" "madd $ac2, %[load4], %[cospi_6_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[result2], %[result1] \n\t" "sub %[load2], %[result4], %[result3] \n\t" "msub $ac1, %[load1], %[cospi_24_64] \n\t" "msub $ac1, %[load2], %[cospi_8_64] \n\t" "madd $ac3, %[load2], %[cospi_24_64] \n\t" "msub $ac3, %[load1], %[cospi_8_64] \n\t" "extp %[step2_10], $ac1, 31 \n\t" "extp %[step2_13], $ac3, 31 \n\t" "add %[step2_11], %[result1], %[result2] \n\t" "add %[step2_12], %[result4], %[result3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step2_10] "=r"(step2_10), [step2_11] "=r"(step2_11), [step2_12] "=r"(step2_12), [step2_13] "=r"(step2_13) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64), [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "lh %[load5], 4(%[input]) \n\t" "lh %[load6], 28(%[input]) \n\t" "lh %[load7], 20(%[input]) \n\t" "lh %[load8], 12(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load5], %[cospi_28_64] \n\t" "msub $ac1, %[load6], %[cospi_4_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load7], %[cospi_12_64] \n\t" "msub $ac3, %[load8], %[cospi_20_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load7], %[cospi_20_64] \n\t" "madd $ac1, %[load8], %[cospi_12_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load5], %[cospi_4_64] \n\t" "madd $ac2, %[load6], %[cospi_28_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load5], %[result4], %[result3] \n\t" "sub %[load5], %[load5], %[result1] \n\t" "add %[load5], %[load5], %[result2] \n\t" "sub %[load6], %[result1], %[result2] \n\t" "sub %[load6], %[load6], %[result3] \n\t" "add %[load6], %[load6], %[result4] \n\t" "madd $ac1, %[load5], %[cospi_16_64] \n\t" "madd $ac3, %[load6], %[cospi_16_64] \n\t" "extp %[step1_5], $ac1, 31 \n\t" "extp %[step1_6], $ac3, 31 \n\t" "add %[step1_4], %[result1], %[result2] \n\t" "add %[step1_7], %[result4], %[result3] \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step1_4] "=r"(step1_4), [step1_5] "=r"(step1_5), [step1_6] "=r"(step1_6), [step1_7] "=r"(step1_7) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64), [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[load5], %[step2_14], %[step2_13] \n\t" "sub %[load5], %[load5], %[step2_9] \n\t" "add %[load5], %[load5], %[step2_10] \n\t" "madd $ac0, %[load5], %[cospi_16_64] \n\t" "sub %[load6], %[step2_14], %[step2_13] \n\t" "sub %[load6], %[load6], %[step2_10] \n\t" "add %[load6], %[load6], %[step2_9] \n\t" "madd $ac1, %[load6], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load5], %[step2_15], %[step2_12] \n\t" "sub %[load5], %[load5], %[step2_8] \n\t" "add %[load5], %[load5], %[step2_11] \n\t" "madd $ac2, %[load5], %[cospi_16_64] \n\t" "sub %[load6], %[step2_15], %[step2_12] \n\t" "sub %[load6], %[load6], %[step2_11] \n\t" "add %[load6], %[load6], %[step2_8] \n\t" "madd $ac3, %[load6], %[cospi_16_64] \n\t" "extp %[step1_10], $ac0, 31 \n\t" "extp %[step1_13], $ac1, 31 \n\t" "extp %[step1_11], $ac2, 31 \n\t" "extp %[step1_12], $ac3, 31 \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [step1_10] "=r"(step1_10), [step1_11] "=r"(step1_11), [step1_12] "=r"(step1_12), [step1_13] "=r"(step1_13) : [const_2_power_13] "r"(const_2_power_13), [step2_14] "r"(step2_14), [step2_13] "r"(step2_13), [step2_9] "r"(step2_9), [step2_10] "r"(step2_10), [step2_15] "r"(step2_15), [step2_12] "r"(step2_12), [step2_8] "r"(step2_8), [step2_11] "r"(step2_11), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "add %[load5], %[step1_0], %[step1_7] \n\t" "add %[load5], %[load5], %[step2_12] \n\t" "add %[load5], %[load5], %[step2_15] \n\t" "add %[load6], %[step1_1], %[step1_6] \n\t" "add %[load6], %[load6], %[step2_13] \n\t" "add %[load6], %[load6], %[step2_14] \n\t" "sh %[load5], 0(%[output]) \n\t" "sh %[load6], 32(%[output]) \n\t" "sub %[load5], %[step1_1], %[step1_6] \n\t" "add %[load5], %[load5], %[step2_9] \n\t" "add %[load5], %[load5], %[step2_10] \n\t" "sub %[load6], %[step1_0], %[step1_7] \n\t" "add %[load6], %[load6], %[step2_8] \n\t" "add %[load6], %[load6], %[step2_11] \n\t" "sh %[load5], 192(%[output]) \n\t" "sh %[load6], 224(%[output]) \n\t" "sub %[load5], %[step1_0], %[step1_7] \n\t" "sub %[load5], %[load5], %[step2_8] \n\t" "sub %[load5], %[load5], %[step2_11] \n\t" "sub %[load6], %[step1_1], %[step1_6] \n\t" "sub %[load6], %[load6], %[step2_9] \n\t" "sub %[load6], %[load6], %[step2_10] \n\t" "sh %[load5], 256(%[output]) \n\t" "sh %[load6], 288(%[output]) \n\t" "add %[load5], %[step1_1], %[step1_6] \n\t" "sub %[load5], %[load5], %[step2_13] \n\t" "sub %[load5], %[load5], %[step2_14] \n\t" "add %[load6], %[step1_0], %[step1_7] \n\t" "sub %[load6], %[load6], %[step2_12] \n\t" "sub %[load6], %[load6], %[step2_15] \n\t" "sh %[load5], 448(%[output]) \n\t" "sh %[load6], 480(%[output]) \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6) : [output] "r"(output), [step1_0] "r"(step1_0), [step1_1] "r"(step1_1), [step1_6] "r"(step1_6), [step1_7] "r"(step1_7), [step2_8] "r"(step2_8), [step2_9] "r"(step2_9), [step2_10] "r"(step2_10), [step2_11] "r"(step2_11), [step2_12] "r"(step2_12), [step2_13] "r"(step2_13), [step2_14] "r"(step2_14), [step2_15] "r"(step2_15)); __asm__ __volatile__( "add %[load5], %[step1_2], %[step1_5] \n\t" "add %[load5], %[load5], %[step1_13] \n\t" "add %[load6], %[step1_3], %[step1_4] \n\t" "add %[load6], %[load6], %[step1_12] \n\t" "sh %[load5], 64(%[output]) \n\t" "sh %[load6], 96(%[output]) \n\t" "sub %[load5], %[step1_3], %[step1_4] \n\t" "add %[load5], %[load5], %[step1_11] \n\t" "sub %[load6], %[step1_2], %[step1_5] \n\t" "add %[load6], %[load6], %[step1_10] \n\t" "sh %[load5], 128(%[output]) \n\t" "sh %[load6], 160(%[output]) \n\t" "sub %[load5], %[step1_2], %[step1_5] \n\t" "sub %[load5], %[load5], %[step1_10] \n\t" "sub %[load6], %[step1_3], %[step1_4] \n\t" "sub %[load6], %[load6], %[step1_11] \n\t" "sh %[load5], 320(%[output]) \n\t" "sh %[load6], 352(%[output]) \n\t" "add %[load5], %[step1_3], %[step1_4] \n\t" "sub %[load5], %[load5], %[step1_12] \n\t" "add %[load6], %[step1_2], %[step1_5] \n\t" "sub %[load6], %[load6], %[step1_13] \n\t" "sh %[load5], 384(%[output]) \n\t" "sh %[load6], 416(%[output]) \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6) : [output] "r"(output), [step1_2] "r"(step1_2), [step1_3] "r"(step1_3), [step1_4] "r"(step1_4), [step1_5] "r"(step1_5), [step1_10] "r"(step1_10), [step1_11] "r"(step1_11), [step1_12] "r"(step1_12), [step1_13] "r"(step1_13)); input += 16; output += 1; } } void idct16_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int i; int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; int step1_8, step1_9, step1_10, step1_11; int step1_12, step1_13, step1_14, step1_15; int step2_0, step2_1, step2_2, step2_3; int step2_8, step2_9, step2_10, step2_11; int step2_12, step2_13, step2_14, step2_15; int load1, load2, load3, load4, load5, load6, load7, load8; int result1, result2, result3, result4; const int const_2_power_13 = 8192; uint8_t *dest_pix; uint8_t *cm = vpx_ff_cropTbl; /* prefetch vpx_ff_cropTbl */ prefetch_load(vpx_ff_cropTbl); prefetch_load(vpx_ff_cropTbl + 32); prefetch_load(vpx_ff_cropTbl + 64); prefetch_load(vpx_ff_cropTbl + 96); prefetch_load(vpx_ff_cropTbl + 128); prefetch_load(vpx_ff_cropTbl + 160); prefetch_load(vpx_ff_cropTbl + 192); prefetch_load(vpx_ff_cropTbl + 224); for (i = 0; i < 16; ++i) { dest_pix = (dest + i); __asm__ __volatile__( "lh %[load1], 0(%[input]) \n\t" "lh %[load2], 16(%[input]) \n\t" "lh %[load3], 8(%[input]) \n\t" "lh %[load4], 24(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "add %[result1], %[load1], %[load2] \n\t" "sub %[result2], %[load1], %[load2] \n\t" "madd $ac1, %[result1], %[cospi_16_64] \n\t" "madd $ac2, %[result2], %[cospi_16_64] \n\t" "extp %[step2_0], $ac1, 31 \n\t" "extp %[step2_1], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac3, %[load3], %[cospi_24_64] \n\t" "msub $ac3, %[load4], %[cospi_8_64] \n\t" "extp %[step2_2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac1, %[load3], %[cospi_8_64] \n\t" "madd $ac1, %[load4], %[cospi_24_64] \n\t" "extp %[step2_3], $ac1, 31 \n\t" "add %[step1_0], %[step2_0], %[step2_3] \n\t" "add %[step1_1], %[step2_1], %[step2_2] \n\t" "sub %[step1_2], %[step2_1], %[step2_2] \n\t" "sub %[step1_3], %[step2_0], %[step2_3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [step2_0] "=&r"(step2_0), [step2_1] "=&r"(step2_1), [step2_2] "=&r"(step2_2), [step2_3] "=&r"(step2_3), [step1_0] "=r"(step1_0), [step1_1] "=r"(step1_1), [step1_2] "=r"(step1_2), [step1_3] "=r"(step1_3) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "lh %[load5], 2(%[input]) \n\t" "lh %[load6], 30(%[input]) \n\t" "lh %[load7], 18(%[input]) \n\t" "lh %[load8], 14(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load5], %[cospi_30_64] \n\t" "msub $ac1, %[load6], %[cospi_2_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load7], %[cospi_14_64] \n\t" "msub $ac3, %[load8], %[cospi_18_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load7], %[cospi_18_64] \n\t" "madd $ac1, %[load8], %[cospi_14_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load5], %[cospi_2_64] \n\t" "madd $ac2, %[load6], %[cospi_30_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "sub %[load5], %[result1], %[result2] \n\t" "sub %[load6], %[result4], %[result3] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load6], %[cospi_24_64] \n\t" "msub $ac1, %[load5], %[cospi_8_64] \n\t" "madd $ac3, %[load5], %[cospi_24_64] \n\t" "madd $ac3, %[load6], %[cospi_8_64] \n\t" "extp %[step2_9], $ac1, 31 \n\t" "extp %[step2_14], $ac3, 31 \n\t" "add %[step2_8], %[result1], %[result2] \n\t" "add %[step2_15], %[result4], %[result3] \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step2_8] "=r"(step2_8), [step2_15] "=r"(step2_15), [step2_9] "=r"(step2_9), [step2_14] "=r"(step2_14) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64), [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "lh %[load1], 10(%[input]) \n\t" "lh %[load2], 22(%[input]) \n\t" "lh %[load3], 26(%[input]) \n\t" "lh %[load4], 6(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_22_64] \n\t" "msub $ac1, %[load2], %[cospi_10_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load3], %[cospi_6_64] \n\t" "msub $ac3, %[load4], %[cospi_26_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load1], %[cospi_10_64] \n\t" "madd $ac1, %[load2], %[cospi_22_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load3], %[cospi_26_64] \n\t" "madd $ac2, %[load4], %[cospi_6_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[result2], %[result1] \n\t" "sub %[load2], %[result4], %[result3] \n\t" "msub $ac1, %[load1], %[cospi_24_64] \n\t" "msub $ac1, %[load2], %[cospi_8_64] \n\t" "madd $ac3, %[load2], %[cospi_24_64] \n\t" "msub $ac3, %[load1], %[cospi_8_64] \n\t" "extp %[step2_10], $ac1, 31 \n\t" "extp %[step2_13], $ac3, 31 \n\t" "add %[step2_11], %[result1], %[result2] \n\t" "add %[step2_12], %[result4], %[result3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step2_10] "=r"(step2_10), [step2_11] "=r"(step2_11), [step2_12] "=r"(step2_12), [step2_13] "=r"(step2_13) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64), [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "lh %[load5], 4(%[input]) \n\t" "lh %[load6], 28(%[input]) \n\t" "lh %[load7], 20(%[input]) \n\t" "lh %[load8], 12(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load5], %[cospi_28_64] \n\t" "msub $ac1, %[load6], %[cospi_4_64] \n\t" "extp %[result1], $ac1, 31 \n\t" "madd $ac3, %[load7], %[cospi_12_64] \n\t" "msub $ac3, %[load8], %[cospi_20_64] \n\t" "extp %[result2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac1, %[load7], %[cospi_20_64] \n\t" "madd $ac1, %[load8], %[cospi_12_64] \n\t" "extp %[result3], $ac1, 31 \n\t" "madd $ac2, %[load5], %[cospi_4_64] \n\t" "madd $ac2, %[load6], %[cospi_28_64] \n\t" "extp %[result4], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load5], %[result4], %[result3] \n\t" "sub %[load5], %[load5], %[result1] \n\t" "add %[load5], %[load5], %[result2] \n\t" "sub %[load6], %[result1], %[result2] \n\t" "sub %[load6], %[load6], %[result3] \n\t" "add %[load6], %[load6], %[result4] \n\t" "madd $ac1, %[load5], %[cospi_16_64] \n\t" "madd $ac3, %[load6], %[cospi_16_64] \n\t" "extp %[step1_5], $ac1, 31 \n\t" "extp %[step1_6], $ac3, 31 \n\t" "add %[step1_4], %[result1], %[result2] \n\t" "add %[step1_7], %[result4], %[result3] \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [result1] "=&r"(result1), [result2] "=&r"(result2), [result3] "=&r"(result3), [result4] "=&r"(result4), [step1_4] "=r"(step1_4), [step1_5] "=r"(step1_5), [step1_6] "=r"(step1_6), [step1_7] "=r"(step1_7) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64), [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[load5], %[step2_14], %[step2_13] \n\t" "sub %[load5], %[load5], %[step2_9] \n\t" "add %[load5], %[load5], %[step2_10] \n\t" "madd $ac0, %[load5], %[cospi_16_64] \n\t" "sub %[load6], %[step2_14], %[step2_13] \n\t" "sub %[load6], %[load6], %[step2_10] \n\t" "add %[load6], %[load6], %[step2_9] \n\t" "madd $ac1, %[load6], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load5], %[step2_15], %[step2_12] \n\t" "sub %[load5], %[load5], %[step2_8] \n\t" "add %[load5], %[load5], %[step2_11] \n\t" "madd $ac2, %[load5], %[cospi_16_64] \n\t" "sub %[load6], %[step2_15], %[step2_12] \n\t" "sub %[load6], %[load6], %[step2_11] \n\t" "add %[load6], %[load6], %[step2_8] \n\t" "madd $ac3, %[load6], %[cospi_16_64] \n\t" "extp %[step1_10], $ac0, 31 \n\t" "extp %[step1_13], $ac1, 31 \n\t" "extp %[step1_11], $ac2, 31 \n\t" "extp %[step1_12], $ac3, 31 \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [step1_10] "=r"(step1_10), [step1_11] "=r"(step1_11), [step1_12] "=r"(step1_12), [step1_13] "=r"(step1_13) : [const_2_power_13] "r"(const_2_power_13), [step2_14] "r"(step2_14), [step2_13] "r"(step2_13), [step2_9] "r"(step2_9), [step2_10] "r"(step2_10), [step2_15] "r"(step2_15), [step2_12] "r"(step2_12), [step2_8] "r"(step2_8), [step2_11] "r"(step2_11), [cospi_16_64] "r"(cospi_16_64)); step1_8 = step2_8 + step2_11; step1_9 = step2_9 + step2_10; step1_14 = step2_13 + step2_14; step1_15 = step2_12 + step2_15; __asm__ __volatile__( "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_0], %[step1_7] \n\t" "add %[load5], %[load5], %[step1_15] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "add %[load6], %[step1_1], %[step1_6] \n\t" "add %[load6], %[load6], %[step1_14] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_2], %[step1_5] \n\t" "add %[load5], %[load5], %[step1_13] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "add %[load6], %[step1_3], %[step1_4] \n\t" "add %[load6], %[load6], %[step1_12] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_3], %[step1_4] \n\t" "add %[load5], %[load5], %[step1_11] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "sub %[load6], %[step1_2], %[step1_5] \n\t" "add %[load6], %[load6], %[step1_10] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "sub %[load5], %[step1_1], %[step1_6] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[load5], %[step1_9] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "sub %[load6], %[step1_0], %[step1_7] \n\t" "add %[load6], %[load6], %[step1_8] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_0], %[step1_7] \n\t" "sub %[load5], %[load5], %[step1_8] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "sub %[load6], %[step1_1], %[step1_6] \n\t" "sub %[load6], %[load6], %[step1_9] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "sub %[load5], %[step1_2], %[step1_5] \n\t" "sub %[load5], %[load5], %[step1_10] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "sub %[load6], %[step1_3], %[step1_4] \n\t" "sub %[load6], %[load6], %[step1_11] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_3], %[step1_4] \n\t" "sub %[load5], %[load5], %[step1_12] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "add %[load6], %[step1_2], %[step1_5] \n\t" "sub %[load6], %[load6], %[step1_13] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load7], 0(%[dest_pix]) \n\t" "add %[load5], %[step1_1], %[step1_6] \n\t" "sub %[load5], %[load5], %[step1_14] \n\t" "addi %[load5], %[load5], 32 \n\t" "sra %[load5], %[load5], 6 \n\t" "add %[load7], %[load7], %[load5] \n\t" "lbux %[load5], %[load7](%[cm]) \n\t" "add %[load6], %[step1_0], %[step1_7] \n\t" "sub %[load6], %[load6], %[step1_15] \n\t" "sb %[load5], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[load8], 0(%[dest_pix]) \n\t" "addi %[load6], %[load6], 32 \n\t" "sra %[load6], %[load6], 6 \n\t" "add %[load8], %[load8], %[load6] \n\t" "lbux %[load6], %[load8](%[cm]) \n\t" "sb %[load6], 0(%[dest_pix]) \n\t" : [load5] "=&r"(load5), [load6] "=&r"(load6), [load7] "=&r"(load7), [load8] "=&r"(load8), [dest_pix] "+r"(dest_pix) : [cm] "r"(cm), [stride] "r"(stride), [step1_0] "r"(step1_0), [step1_1] "r"(step1_1), [step1_2] "r"(step1_2), [step1_3] "r"(step1_3), [step1_4] "r"(step1_4), [step1_5] "r"(step1_5), [step1_6] "r"(step1_6), [step1_7] "r"(step1_7), [step1_8] "r"(step1_8), [step1_9] "r"(step1_9), [step1_10] "r"(step1_10), [step1_11] "r"(step1_11), [step1_12] "r"(step1_12), [step1_13] "r"(step1_13), [step1_14] "r"(step1_14), [step1_15] "r"(step1_15)); input += 16; } } void vpx_idct16x16_256_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // First transform rows idct16_rows_dspr2(input, out, 16); // Then transform columns and add to dest idct16_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct16x16_10_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[16 * 16]); int16_t *outptr = out; uint32_t i; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // First transform rows. Since all non-zero dct coefficients are in // upper-left 4x4 area, we only need to calculate first 4 rows here. idct16_rows_dspr2(input, outptr, 4); outptr += 4; for (i = 0; i < 6; ++i) { __asm__ __volatile__( "sw $zero, 0(%[outptr]) \n\t" "sw $zero, 32(%[outptr]) \n\t" "sw $zero, 64(%[outptr]) \n\t" "sw $zero, 96(%[outptr]) \n\t" "sw $zero, 128(%[outptr]) \n\t" "sw $zero, 160(%[outptr]) \n\t" "sw $zero, 192(%[outptr]) \n\t" "sw $zero, 224(%[outptr]) \n\t" "sw $zero, 256(%[outptr]) \n\t" "sw $zero, 288(%[outptr]) \n\t" "sw $zero, 320(%[outptr]) \n\t" "sw $zero, 352(%[outptr]) \n\t" "sw $zero, 384(%[outptr]) \n\t" "sw $zero, 416(%[outptr]) \n\t" "sw $zero, 448(%[outptr]) \n\t" "sw $zero, 480(%[outptr]) \n\t" : : [outptr] "r"(outptr)); outptr += 2; } // Then transform columns idct16_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct16x16_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { uint32_t pos = 45; int32_t out; int32_t r; int32_t a1, absa1; int32_t vector_a1; int32_t t1, t2, t3, t4; int32_t vector_1, vector_2, vector_3, vector_4; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]); __asm__ __volatile__( "addi %[out], %[out], 32 \n\t" "sra %[a1], %[out], 6 \n\t" : [out] "+r"(out), [a1] "=r"(a1) :); if (a1 < 0) { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__( "abs %[absa1], %[a1] \n\t" "replv.qb %[vector_a1], %[absa1] \n\t" : [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 16; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else if (a1 > 255) { int32_t a11, a12, vector_a11, vector_a12; /* use quad-byte * input and output memory are four byte aligned */ a11 = a1 >> 1; a12 = a1 - a11; __asm__ __volatile__( "replv.qb %[vector_a11], %[a11] \n\t" "replv.qb %[vector_a12], %[a12] \n\t" : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12) : [a11] "r"(a11), [a12] "r"(a12)); for (r = 16; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a11] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t" "addu_s.qb %[vector_3], %[vector_3], %[vector_a12] \n\t" "addu_s.qb %[vector_4], %[vector_4], %[vector_a12] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a11] "r"(vector_a11), [vector_a12] "r"(vector_a12)); } } else { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t" : [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 16; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } void iadst16_dspr2(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, s10, s11, s12, s13, s14, s15; int x0 = input[15]; int x1 = input[0]; int x2 = input[13]; int x3 = input[2]; int x4 = input[11]; int x5 = input[4]; int x6 = input[9]; int x7 = input[6]; int x8 = input[7]; int x9 = input[8]; int x10 = input[5]; int x11 = input[10]; int x12 = input[3]; int x13 = input[12]; int x14 = input[1]; int x15 = input[14]; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7 | x8 | x9 | x10 | x11 | x12 | x13 | x14 | x15)) { output[0] = output[1] = output[2] = output[3] = output[4] = output[5] = output[6] = output[7] = output[8] = output[9] = output[10] = output[11] = output[12] = output[13] = output[14] = output[15] = 0; return; } // stage 1 s0 = x0 * cospi_1_64 + x1 * cospi_31_64; s1 = x0 * cospi_31_64 - x1 * cospi_1_64; s2 = x2 * cospi_5_64 + x3 * cospi_27_64; s3 = x2 * cospi_27_64 - x3 * cospi_5_64; s4 = x4 * cospi_9_64 + x5 * cospi_23_64; s5 = x4 * cospi_23_64 - x5 * cospi_9_64; s6 = x6 * cospi_13_64 + x7 * cospi_19_64; s7 = x6 * cospi_19_64 - x7 * cospi_13_64; s8 = x8 * cospi_17_64 + x9 * cospi_15_64; s9 = x8 * cospi_15_64 - x9 * cospi_17_64; s10 = x10 * cospi_21_64 + x11 * cospi_11_64; s11 = x10 * cospi_11_64 - x11 * cospi_21_64; s12 = x12 * cospi_25_64 + x13 * cospi_7_64; s13 = x12 * cospi_7_64 - x13 * cospi_25_64; s14 = x14 * cospi_29_64 + x15 * cospi_3_64; s15 = x14 * cospi_3_64 - x15 * cospi_29_64; x0 = dct_const_round_shift(s0 + s8); x1 = dct_const_round_shift(s1 + s9); x2 = dct_const_round_shift(s2 + s10); x3 = dct_const_round_shift(s3 + s11); x4 = dct_const_round_shift(s4 + s12); x5 = dct_const_round_shift(s5 + s13); x6 = dct_const_round_shift(s6 + s14); x7 = dct_const_round_shift(s7 + s15); x8 = dct_const_round_shift(s0 - s8); x9 = dct_const_round_shift(s1 - s9); x10 = dct_const_round_shift(s2 - s10); x11 = dct_const_round_shift(s3 - s11); x12 = dct_const_round_shift(s4 - s12); x13 = dct_const_round_shift(s5 - s13); x14 = dct_const_round_shift(s6 - s14); x15 = dct_const_round_shift(s7 - s15); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4; s5 = x5; s6 = x6; s7 = x7; s8 = x8 * cospi_4_64 + x9 * cospi_28_64; s9 = x8 * cospi_28_64 - x9 * cospi_4_64; s10 = x10 * cospi_20_64 + x11 * cospi_12_64; s11 = x10 * cospi_12_64 - x11 * cospi_20_64; s12 = -x12 * cospi_28_64 + x13 * cospi_4_64; s13 = x12 * cospi_4_64 + x13 * cospi_28_64; s14 = -x14 * cospi_12_64 + x15 * cospi_20_64; s15 = x14 * cospi_20_64 + x15 * cospi_12_64; x0 = s0 + s4; x1 = s1 + s5; x2 = s2 + s6; x3 = s3 + s7; x4 = s0 - s4; x5 = s1 - s5; x6 = s2 - s6; x7 = s3 - s7; x8 = dct_const_round_shift(s8 + s12); x9 = dct_const_round_shift(s9 + s13); x10 = dct_const_round_shift(s10 + s14); x11 = dct_const_round_shift(s11 + s15); x12 = dct_const_round_shift(s8 - s12); x13 = dct_const_round_shift(s9 - s13); x14 = dct_const_round_shift(s10 - s14); x15 = dct_const_round_shift(s11 - s15); // stage 3 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = x4 * cospi_8_64 + x5 * cospi_24_64; s5 = x4 * cospi_24_64 - x5 * cospi_8_64; s6 = -x6 * cospi_24_64 + x7 * cospi_8_64; s7 = x6 * cospi_8_64 + x7 * cospi_24_64; s8 = x8; s9 = x9; s10 = x10; s11 = x11; s12 = x12 * cospi_8_64 + x13 * cospi_24_64; s13 = x12 * cospi_24_64 - x13 * cospi_8_64; s14 = -x14 * cospi_24_64 + x15 * cospi_8_64; s15 = x14 * cospi_8_64 + x15 * cospi_24_64; x0 = s0 + s2; x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; x4 = dct_const_round_shift(s4 + s6); x5 = dct_const_round_shift(s5 + s7); x6 = dct_const_round_shift(s4 - s6); x7 = dct_const_round_shift(s5 - s7); x8 = s8 + s10; x9 = s9 + s11; x10 = s8 - s10; x11 = s9 - s11; x12 = dct_const_round_shift(s12 + s14); x13 = dct_const_round_shift(s13 + s15); x14 = dct_const_round_shift(s12 - s14); x15 = dct_const_round_shift(s13 - s15); // stage 4 s2 = (-cospi_16_64) * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (-x6 + x7); s10 = cospi_16_64 * (x10 + x11); s11 = cospi_16_64 * (-x10 + x11); s14 = (-cospi_16_64) * (x14 + x15); s15 = cospi_16_64 * (x14 - x15); x2 = dct_const_round_shift(s2); x3 = dct_const_round_shift(s3); x6 = dct_const_round_shift(s6); x7 = dct_const_round_shift(s7); x10 = dct_const_round_shift(s10); x11 = dct_const_round_shift(s11); x14 = dct_const_round_shift(s14); x15 = dct_const_round_shift(s15); output[0] = x0; output[1] = -x8; output[2] = x12; output[3] = -x4; output[4] = x6; output[5] = x14; output[6] = x10; output[7] = x2; output[8] = x3; output[9] = x11; output[10] = x15; output[11] = x7; output[12] = x5; output[13] = -x13; output[14] = x9; output[15] = -x1; } #endif // HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/itrans32_cols_dspr2.c000066400000000000000000002104171357355204000214350ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 void vpx_idct32_cols_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; int step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; int step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20; int step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27; int step1_28, step1_29, step1_30, step1_31; int step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6; int step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13; int step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20; int step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27; int step2_28, step2_29, step2_30, step2_31; int step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14; int step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21; int step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28; int step3_29, step3_30, step3_31; int temp0, temp1, temp2, temp3; int load1, load2, load3, load4; int result1, result2; int i; uint8_t *dest_pix, *dest_pix1; const int const_2_power_13 = 8192; uint8_t *cm = vpx_ff_cropTbl; /* prefetch vpx_ff_cropTbl */ prefetch_load(vpx_ff_cropTbl); prefetch_load(vpx_ff_cropTbl + 32); prefetch_load(vpx_ff_cropTbl + 64); prefetch_load(vpx_ff_cropTbl + 96); prefetch_load(vpx_ff_cropTbl + 128); prefetch_load(vpx_ff_cropTbl + 160); prefetch_load(vpx_ff_cropTbl + 192); prefetch_load(vpx_ff_cropTbl + 224); for (i = 0; i < 32; ++i) { dest_pix = dest + i; dest_pix1 = dest + i + 31 * stride; __asm__ __volatile__( "lh %[load1], 2(%[input]) \n\t" "lh %[load2], 62(%[input]) \n\t" "lh %[load3], 34(%[input]) \n\t" "lh %[load4], 30(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_31_64] \n\t" "msub $ac1, %[load2], %[cospi_1_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_1_64] \n\t" "madd $ac3, %[load2], %[cospi_31_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_15_64] \n\t" "msub $ac2, %[load4], %[cospi_17_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_17_64] \n\t" "madd $ac1, %[load4], %[cospi_15_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp3], %[temp2] \n\t" "sub %[load2], %[temp0], %[temp1] \n\t" "madd $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "madd $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[step1_17], $ac1, 31 \n\t" "extp %[step1_30], $ac3, 31 \n\t" "add %[step1_16], %[temp0], %[temp1] \n\t" "add %[step1_31], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_16] "=&r"(step1_16), [step1_17] "=&r"(step1_17), [step1_30] "=&r"(step1_30), [step1_31] "=&r"(step1_31) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_31_64] "r"(cospi_31_64), [cospi_1_64] "r"(cospi_1_64), [cospi_4_64] "r"(cospi_4_64), [cospi_17_64] "r"(cospi_17_64), [cospi_15_64] "r"(cospi_15_64), [cospi_28_64] "r"(cospi_28_64)); __asm__ __volatile__( "lh %[load1], 18(%[input]) \n\t" "lh %[load2], 46(%[input]) \n\t" "lh %[load3], 50(%[input]) \n\t" "lh %[load4], 14(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_23_64] \n\t" "msub $ac1, %[load2], %[cospi_9_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_9_64] \n\t" "madd $ac3, %[load2], %[cospi_23_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_7_64] \n\t" "msub $ac2, %[load4], %[cospi_25_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_25_64] \n\t" "madd $ac1, %[load4], %[cospi_7_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "msub $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[step1_18], $ac1, 31 \n\t" "extp %[step1_29], $ac3, 31 \n\t" "add %[step1_19], %[temp0], %[temp1] \n\t" "add %[step1_28], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_18] "=&r"(step1_18), [step1_19] "=&r"(step1_19), [step1_28] "=&r"(step1_28), [step1_29] "=&r"(step1_29) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_23_64] "r"(cospi_23_64), [cospi_9_64] "r"(cospi_9_64), [cospi_4_64] "r"(cospi_4_64), [cospi_7_64] "r"(cospi_7_64), [cospi_25_64] "r"(cospi_25_64), [cospi_28_64] "r"(cospi_28_64)); __asm__ __volatile__( "lh %[load1], 10(%[input]) \n\t" "lh %[load2], 54(%[input]) \n\t" "lh %[load3], 42(%[input]) \n\t" "lh %[load4], 22(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_27_64] \n\t" "msub $ac1, %[load2], %[cospi_5_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_5_64] \n\t" "madd $ac3, %[load2], %[cospi_27_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_11_64] \n\t" "msub $ac2, %[load4], %[cospi_21_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_21_64] \n\t" "madd $ac1, %[load4], %[cospi_11_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp0], %[temp1] \n\t" "sub %[load2], %[temp3], %[temp2] \n\t" "madd $ac1, %[load2], %[cospi_12_64] \n\t" "msub $ac1, %[load1], %[cospi_20_64] \n\t" "madd $ac3, %[load1], %[cospi_12_64] \n\t" "madd $ac3, %[load2], %[cospi_20_64] \n\t" "extp %[step1_21], $ac1, 31 \n\t" "extp %[step1_26], $ac3, 31 \n\t" "add %[step1_20], %[temp0], %[temp1] \n\t" "add %[step1_27], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20), [step1_21] "=&r"(step1_21), [step1_26] "=&r"(step1_26), [step1_27] "=&r"(step1_27) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_27_64] "r"(cospi_27_64), [cospi_5_64] "r"(cospi_5_64), [cospi_11_64] "r"(cospi_11_64), [cospi_21_64] "r"(cospi_21_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64)); __asm__ __volatile__( "lh %[load1], 26(%[input]) \n\t" "lh %[load2], 38(%[input]) \n\t" "lh %[load3], 58(%[input]) \n\t" "lh %[load4], 6(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_19_64] \n\t" "msub $ac1, %[load2], %[cospi_13_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_13_64] \n\t" "madd $ac3, %[load2], %[cospi_19_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_3_64] \n\t" "msub $ac2, %[load4], %[cospi_29_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_29_64] \n\t" "madd $ac1, %[load4], %[cospi_3_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_12_64] \n\t" "msub $ac1, %[load2], %[cospi_20_64] \n\t" "msub $ac3, %[load1], %[cospi_20_64] \n\t" "madd $ac3, %[load2], %[cospi_12_64] \n\t" "extp %[step1_22], $ac1, 31 \n\t" "extp %[step1_25], $ac3, 31 \n\t" "add %[step1_23], %[temp0], %[temp1] \n\t" "add %[step1_24], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22), [step1_23] "=&r"(step1_23), [step1_24] "=&r"(step1_24), [step1_25] "=&r"(step1_25) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_19_64] "r"(cospi_19_64), [cospi_13_64] "r"(cospi_13_64), [cospi_3_64] "r"(cospi_3_64), [cospi_29_64] "r"(cospi_29_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64)); __asm__ __volatile__( "lh %[load1], 4(%[input]) \n\t" "lh %[load2], 60(%[input]) \n\t" "lh %[load3], 36(%[input]) \n\t" "lh %[load4], 28(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_30_64] \n\t" "msub $ac1, %[load2], %[cospi_2_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_2_64] \n\t" "madd $ac3, %[load2], %[cospi_30_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_14_64] \n\t" "msub $ac2, %[load4], %[cospi_18_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_18_64] \n\t" "madd $ac1, %[load4], %[cospi_14_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp0], %[temp1] \n\t" "sub %[load2], %[temp3], %[temp2] \n\t" "msub $ac1, %[load1], %[cospi_8_64] \n\t" "madd $ac1, %[load2], %[cospi_24_64] \n\t" "madd $ac3, %[load1], %[cospi_24_64] \n\t" "madd $ac3, %[load2], %[cospi_8_64] \n\t" "extp %[step2_9], $ac1, 31 \n\t" "extp %[step2_14], $ac3, 31 \n\t" "add %[step2_8], %[temp0], %[temp1] \n\t" "add %[step2_15], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_8] "=&r"(step2_8), [step2_9] "=&r"(step2_9), [step2_14] "=&r"(step2_14), [step2_15] "=&r"(step2_15) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64), [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64)); __asm__ __volatile__( "lh %[load1], 20(%[input]) \n\t" "lh %[load2], 44(%[input]) \n\t" "lh %[load3], 52(%[input]) \n\t" "lh %[load4], 12(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_22_64] \n\t" "msub $ac1, %[load2], %[cospi_10_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_10_64] \n\t" "madd $ac3, %[load2], %[cospi_22_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_6_64] \n\t" "msub $ac2, %[load4], %[cospi_26_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_26_64] \n\t" "madd $ac1, %[load4], %[cospi_6_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_24_64] \n\t" "msub $ac1, %[load2], %[cospi_8_64] \n\t" "madd $ac3, %[load2], %[cospi_24_64] \n\t" "msub $ac3, %[load1], %[cospi_8_64] \n\t" "extp %[step2_10], $ac1, 31 \n\t" "extp %[step2_13], $ac3, 31 \n\t" "add %[step2_11], %[temp0], %[temp1] \n\t" "add %[step2_12], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_10] "=&r"(step2_10), [step2_11] "=&r"(step2_11), [step2_12] "=&r"(step2_12), [step2_13] "=&r"(step2_13) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64), [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "sub %[temp0], %[step2_14], %[step2_13] \n\t" "sub %[temp0], %[temp0], %[step2_9] \n\t" "add %[temp0], %[temp0], %[step2_10] \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp1], %[step2_14], %[step2_13] \n\t" "add %[temp1], %[temp1], %[step2_9] \n\t" "sub %[temp1], %[temp1], %[step2_10] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "sub %[temp0], %[step2_15], %[step2_12] \n\t" "sub %[temp0], %[temp0], %[step2_8] \n\t" "add %[temp0], %[temp0], %[step2_11] \n\t" "madd $ac2, %[temp0], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[temp1], %[step2_15], %[step2_12] \n\t" "add %[temp1], %[temp1], %[step2_8] \n\t" "sub %[temp1], %[temp1], %[step2_11] \n\t" "madd $ac3, %[temp1], %[cospi_16_64] \n\t" "add %[step3_8], %[step2_8], %[step2_11] \n\t" "add %[step3_9], %[step2_9], %[step2_10] \n\t" "add %[step3_14], %[step2_13], %[step2_14] \n\t" "add %[step3_15], %[step2_12], %[step2_15] \n\t" "extp %[step3_10], $ac0, 31 \n\t" "extp %[step3_13], $ac1, 31 \n\t" "extp %[step3_11], $ac2, 31 \n\t" "extp %[step3_12], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_8] "=&r"(step3_8), [step3_9] "=&r"(step3_9), [step3_10] "=&r"(step3_10), [step3_11] "=&r"(step3_11), [step3_12] "=&r"(step3_12), [step3_13] "=&r"(step3_13), [step3_14] "=&r"(step3_14), [step3_15] "=&r"(step3_15) : [const_2_power_13] "r"(const_2_power_13), [step2_8] "r"(step2_8), [step2_9] "r"(step2_9), [step2_10] "r"(step2_10), [step2_11] "r"(step2_11), [step2_12] "r"(step2_12), [step2_13] "r"(step2_13), [step2_14] "r"(step2_14), [step2_15] "r"(step2_15), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_17], %[step1_18] \n\t" "sub %[temp1], %[step1_30], %[step1_29] \n\t" "add %[step3_17], %[step1_17], %[step1_18] \n\t" "add %[step3_30], %[step1_30], %[step1_29] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_18], $ac0, 31 \n\t" "madd $ac1, %[temp0], %[cospi_24_64] \n\t" "madd $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_29], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_18] "=&r"(step3_18), [step3_29] "=&r"(step3_29), [step3_17] "=&r"(step3_17), [step3_30] "=&r"(step3_30) : [const_2_power_13] "r"(const_2_power_13), [step1_17] "r"(step1_17), [step1_18] "r"(step1_18), [step1_30] "r"(step1_30), [step1_29] "r"(step1_29), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_16], %[step1_19] \n\t" "sub %[temp1], %[step1_31], %[step1_28] \n\t" "add %[step3_16], %[step1_16], %[step1_19] \n\t" "add %[step3_31], %[step1_31], %[step1_28] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_19], $ac0, 31 \n\t" "madd $ac1, %[temp0], %[cospi_24_64] \n\t" "madd $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_28], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_16] "=&r"(step3_16), [step3_31] "=&r"(step3_31), [step3_19] "=&r"(step3_19), [step3_28] "=&r"(step3_28) : [const_2_power_13] "r"(const_2_power_13), [step1_16] "r"(step1_16), [step1_19] "r"(step1_19), [step1_31] "r"(step1_31), [step1_28] "r"(step1_28), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_23], %[step1_20] \n\t" "sub %[temp1], %[step1_24], %[step1_27] \n\t" "add %[step3_23], %[step1_23], %[step1_20] \n\t" "add %[step3_24], %[step1_24], %[step1_27] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_27], $ac0, 31 \n\t" "msub $ac1, %[temp0], %[cospi_24_64] \n\t" "msub $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_20], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_23] "=&r"(step3_23), [step3_24] "=&r"(step3_24), [step3_20] "=&r"(step3_20), [step3_27] "=&r"(step3_27) : [const_2_power_13] "r"(const_2_power_13), [step1_23] "r"(step1_23), [step1_20] "r"(step1_20), [step1_24] "r"(step1_24), [step1_27] "r"(step1_27), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_22], %[step1_21] \n\t" "sub %[temp1], %[step1_25], %[step1_26] \n\t" "add %[step3_22], %[step1_22], %[step1_21] \n\t" "add %[step3_25], %[step1_25], %[step1_26] \n\t" "msub $ac0, %[temp0], %[cospi_24_64] \n\t" "msub $ac0, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_21], $ac0, 31 \n\t" "msub $ac1, %[temp0], %[cospi_8_64] \n\t" "madd $ac1, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_26], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_22] "=&r"(step3_22), [step3_25] "=&r"(step3_25), [step3_21] "=&r"(step3_21), [step3_26] "=&r"(step3_26) : [const_2_power_13] "r"(const_2_power_13), [step1_22] "r"(step1_22), [step1_21] "r"(step1_21), [step1_25] "r"(step1_25), [step1_26] "r"(step1_26), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "add %[step2_16], %[step3_16], %[step3_23] \n\t" "add %[step2_17], %[step3_17], %[step3_22] \n\t" "add %[step2_18], %[step3_18], %[step3_21] \n\t" "add %[step2_19], %[step3_19], %[step3_20] \n\t" "sub %[step2_20], %[step3_19], %[step3_20] \n\t" "sub %[step2_21], %[step3_18], %[step3_21] \n\t" "sub %[step2_22], %[step3_17], %[step3_22] \n\t" "sub %[step2_23], %[step3_16], %[step3_23] \n\t" : [step2_16] "=&r"(step2_16), [step2_17] "=&r"(step2_17), [step2_18] "=&r"(step2_18), [step2_19] "=&r"(step2_19), [step2_20] "=&r"(step2_20), [step2_21] "=&r"(step2_21), [step2_22] "=&r"(step2_22), [step2_23] "=&r"(step2_23) : [step3_16] "r"(step3_16), [step3_23] "r"(step3_23), [step3_17] "r"(step3_17), [step3_22] "r"(step3_22), [step3_18] "r"(step3_18), [step3_21] "r"(step3_21), [step3_19] "r"(step3_19), [step3_20] "r"(step3_20)); __asm__ __volatile__( "sub %[step2_24], %[step3_31], %[step3_24] \n\t" "sub %[step2_25], %[step3_30], %[step3_25] \n\t" "sub %[step2_26], %[step3_29], %[step3_26] \n\t" "sub %[step2_27], %[step3_28], %[step3_27] \n\t" "add %[step2_28], %[step3_28], %[step3_27] \n\t" "add %[step2_29], %[step3_29], %[step3_26] \n\t" "add %[step2_30], %[step3_30], %[step3_25] \n\t" "add %[step2_31], %[step3_31], %[step3_24] \n\t" : [step2_24] "=&r"(step2_24), [step2_28] "=&r"(step2_28), [step2_25] "=&r"(step2_25), [step2_29] "=&r"(step2_29), [step2_26] "=&r"(step2_26), [step2_30] "=&r"(step2_30), [step2_27] "=&r"(step2_27), [step2_31] "=&r"(step2_31) : [step3_31] "r"(step3_31), [step3_24] "r"(step3_24), [step3_30] "r"(step3_30), [step3_25] "r"(step3_25), [step3_29] "r"(step3_29), [step3_26] "r"(step3_26), [step3_28] "r"(step3_28), [step3_27] "r"(step3_27)); __asm__ __volatile__( "lh %[load1], 0(%[input]) \n\t" "lh %[load2], 32(%[input]) \n\t" "lh %[load3], 16(%[input]) \n\t" "lh %[load4], 48(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "add %[result1], %[load1], %[load2] \n\t" "sub %[result2], %[load1], %[load2] \n\t" "madd $ac1, %[result1], %[cospi_16_64] \n\t" "madd $ac2, %[result2], %[cospi_16_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "extp %[temp1], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac3, %[load3], %[cospi_24_64] \n\t" "msub $ac3, %[load4], %[cospi_8_64] \n\t" "extp %[temp2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac1, %[load3], %[cospi_8_64] \n\t" "madd $ac1, %[load4], %[cospi_24_64] \n\t" "extp %[temp3], $ac1, 31 \n\t" "add %[step1_0], %[temp0], %[temp3] \n\t" "add %[step1_1], %[temp1], %[temp2] \n\t" "sub %[step1_2], %[temp1], %[temp2] \n\t" "sub %[step1_3], %[temp0], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_0] "=&r"(step1_0), [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2), [step1_3] "=&r"(step1_3) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "lh %[load1], 8(%[input]) \n\t" "lh %[load2], 56(%[input]) \n\t" "lh %[load3], 40(%[input]) \n\t" "lh %[load4], 24(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_12_64] \n\t" "msub $ac2, %[load4], %[cospi_20_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_20_64] \n\t" "madd $ac1, %[load4], %[cospi_12_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp3], %[temp2] \n\t" "sub %[load1], %[load1], %[temp0] \n\t" "add %[load1], %[load1], %[temp1] \n\t" "sub %[load2], %[temp0], %[temp1] \n\t" "sub %[load2], %[load2], %[temp2] \n\t" "add %[load2], %[load2], %[temp3] \n\t" "madd $ac1, %[load1], %[cospi_16_64] \n\t" "madd $ac3, %[load2], %[cospi_16_64] \n\t" "extp %[step1_5], $ac1, 31 \n\t" "extp %[step1_6], $ac3, 31 \n\t" "add %[step1_4], %[temp0], %[temp1] \n\t" "add %[step1_7], %[temp3], %[temp2] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_4] "=&r"(step1_4), [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6), [step1_7] "=&r"(step1_7) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64), [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "add %[step2_0], %[step1_0], %[step1_7] \n\t" "add %[step2_1], %[step1_1], %[step1_6] \n\t" "add %[step2_2], %[step1_2], %[step1_5] \n\t" "add %[step2_3], %[step1_3], %[step1_4] \n\t" "sub %[step2_4], %[step1_3], %[step1_4] \n\t" "sub %[step2_5], %[step1_2], %[step1_5] \n\t" "sub %[step2_6], %[step1_1], %[step1_6] \n\t" "sub %[step2_7], %[step1_0], %[step1_7] \n\t" : [step2_0] "=&r"(step2_0), [step2_4] "=&r"(step2_4), [step2_1] "=&r"(step2_1), [step2_5] "=&r"(step2_5), [step2_2] "=&r"(step2_2), [step2_6] "=&r"(step2_6), [step2_3] "=&r"(step2_3), [step2_7] "=&r"(step2_7) : [step1_0] "r"(step1_0), [step1_7] "r"(step1_7), [step1_1] "r"(step1_1), [step1_6] "r"(step1_6), [step1_2] "r"(step1_2), [step1_5] "r"(step1_5), [step1_3] "r"(step1_3), [step1_4] "r"(step1_4)); // stage 7 __asm__ __volatile__( "add %[step1_0], %[step2_0], %[step3_15] \n\t" "add %[step1_1], %[step2_1], %[step3_14] \n\t" "add %[step1_2], %[step2_2], %[step3_13] \n\t" "add %[step1_3], %[step2_3], %[step3_12] \n\t" "sub %[step1_12], %[step2_3], %[step3_12] \n\t" "sub %[step1_13], %[step2_2], %[step3_13] \n\t" "sub %[step1_14], %[step2_1], %[step3_14] \n\t" "sub %[step1_15], %[step2_0], %[step3_15] \n\t" : [step1_0] "=&r"(step1_0), [step1_12] "=&r"(step1_12), [step1_1] "=&r"(step1_1), [step1_13] "=&r"(step1_13), [step1_2] "=&r"(step1_2), [step1_14] "=&r"(step1_14), [step1_3] "=&r"(step1_3), [step1_15] "=&r"(step1_15) : [step2_0] "r"(step2_0), [step3_15] "r"(step3_15), [step2_1] "r"(step2_1), [step3_14] "r"(step3_14), [step2_2] "r"(step2_2), [step3_13] "r"(step3_13), [step2_3] "r"(step2_3), [step3_12] "r"(step3_12)); __asm__ __volatile__( "add %[step1_4], %[step2_4], %[step3_11] \n\t" "add %[step1_5], %[step2_5], %[step3_10] \n\t" "add %[step1_6], %[step2_6], %[step3_9] \n\t" "add %[step1_7], %[step2_7], %[step3_8] \n\t" "sub %[step1_8], %[step2_7], %[step3_8] \n\t" "sub %[step1_9], %[step2_6], %[step3_9] \n\t" "sub %[step1_10], %[step2_5], %[step3_10] \n\t" "sub %[step1_11], %[step2_4], %[step3_11] \n\t" : [step1_4] "=&r"(step1_4), [step1_8] "=&r"(step1_8), [step1_5] "=&r"(step1_5), [step1_9] "=&r"(step1_9), [step1_6] "=&r"(step1_6), [step1_10] "=&r"(step1_10), [step1_7] "=&r"(step1_7), [step1_11] "=&r"(step1_11) : [step2_4] "r"(step2_4), [step3_11] "r"(step3_11), [step2_5] "r"(step2_5), [step3_10] "r"(step3_10), [step2_6] "r"(step2_6), [step3_9] "r"(step3_9), [step2_7] "r"(step2_7), [step3_8] "r"(step3_8)); __asm__ __volatile__( "sub %[temp0], %[step2_27], %[step2_20] \n\t" "add %[temp1], %[step2_27], %[step2_20] \n\t" "sub %[temp2], %[step2_26], %[step2_21] \n\t" "add %[temp3], %[step2_26], %[step2_21] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "madd $ac2, %[temp2], %[cospi_16_64] \n\t" "madd $ac3, %[temp3], %[cospi_16_64] \n\t" "extp %[step1_20], $ac0, 31 \n\t" "extp %[step1_27], $ac1, 31 \n\t" "extp %[step1_21], $ac2, 31 \n\t" "extp %[step1_26], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20), [step1_27] "=&r"(step1_27), [step1_21] "=&r"(step1_21), [step1_26] "=&r"(step1_26) : [const_2_power_13] "r"(const_2_power_13), [step2_20] "r"(step2_20), [step2_27] "r"(step2_27), [step2_21] "r"(step2_21), [step2_26] "r"(step2_26), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "sub %[temp0], %[step2_25], %[step2_22] \n\t" "add %[temp1], %[step2_25], %[step2_22] \n\t" "sub %[temp2], %[step2_24], %[step2_23] \n\t" "add %[temp3], %[step2_24], %[step2_23] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "madd $ac2, %[temp2], %[cospi_16_64] \n\t" "madd $ac3, %[temp3], %[cospi_16_64] \n\t" "extp %[step1_22], $ac0, 31 \n\t" "extp %[step1_25], $ac1, 31 \n\t" "extp %[step1_23], $ac2, 31 \n\t" "extp %[step1_24], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22), [step1_25] "=&r"(step1_25), [step1_23] "=&r"(step1_23), [step1_24] "=&r"(step1_24) : [const_2_power_13] "r"(const_2_power_13), [step2_22] "r"(step2_22), [step2_25] "r"(step2_25), [step2_23] "r"(step2_23), [step2_24] "r"(step2_24), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_0], %[step2_31] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_1], %[step2_30] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_2], %[step2_29] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_3], %[step2_28] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) : [cm] "r"(cm), [stride] "r"(stride), [step1_0] "r"(step1_0), [step1_1] "r"(step1_1), [step1_2] "r"(step1_2), [step1_3] "r"(step1_3), [step2_28] "r"(step2_28), [step2_29] "r"(step2_29), [step2_30] "r"(step2_30), [step2_31] "r"(step2_31)); step3_12 = ROUND_POWER_OF_TWO((step1_3 - step2_28), 6); step3_13 = ROUND_POWER_OF_TWO((step1_2 - step2_29), 6); step3_14 = ROUND_POWER_OF_TWO((step1_1 - step2_30), 6); step3_15 = ROUND_POWER_OF_TWO((step1_0 - step2_31), 6); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_4], %[step1_27] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_5], %[step1_26] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_6], %[step1_25] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_7], %[step1_24] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) : [cm] "r"(cm), [stride] "r"(stride), [step1_4] "r"(step1_4), [step1_5] "r"(step1_5), [step1_6] "r"(step1_6), [step1_7] "r"(step1_7), [step1_24] "r"(step1_24), [step1_25] "r"(step1_25), [step1_26] "r"(step1_26), [step1_27] "r"(step1_27)); step3_12 = ROUND_POWER_OF_TWO((step1_7 - step1_24), 6); step3_13 = ROUND_POWER_OF_TWO((step1_6 - step1_25), 6); step3_14 = ROUND_POWER_OF_TWO((step1_5 - step1_26), 6); step3_15 = ROUND_POWER_OF_TWO((step1_4 - step1_27), 6); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_8], %[step1_23] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_9], %[step1_22] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_10], %[step1_21] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_11], %[step1_20] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) : [cm] "r"(cm), [stride] "r"(stride), [step1_8] "r"(step1_8), [step1_9] "r"(step1_9), [step1_10] "r"(step1_10), [step1_11] "r"(step1_11), [step1_20] "r"(step1_20), [step1_21] "r"(step1_21), [step1_22] "r"(step1_22), [step1_23] "r"(step1_23)); step3_12 = ROUND_POWER_OF_TWO((step1_11 - step1_20), 6); step3_13 = ROUND_POWER_OF_TWO((step1_10 - step1_21), 6); step3_14 = ROUND_POWER_OF_TWO((step1_9 - step1_22), 6); step3_15 = ROUND_POWER_OF_TWO((step1_8 - step1_23), 6); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_12], %[step2_19] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_13], %[step2_18] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix]) \n\t" "add %[temp0], %[step1_14], %[step2_17] \n\t" "addi %[temp0], %[temp0], 32 \n\t" "sra %[temp0], %[temp0], 6 \n\t" "add %[temp2], %[temp2], %[temp0] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "add %[temp1], %[step1_15], %[step2_16] \n\t" "sb %[temp0], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix]) \n\t" "addi %[temp1], %[temp1], 32 \n\t" "sra %[temp1], %[temp1], 6 \n\t" "add %[temp3], %[temp3], %[temp1] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix]) \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix] "+r"(dest_pix) : [cm] "r"(cm), [stride] "r"(stride), [step1_12] "r"(step1_12), [step1_13] "r"(step1_13), [step1_14] "r"(step1_14), [step1_15] "r"(step1_15), [step2_16] "r"(step2_16), [step2_17] "r"(step2_17), [step2_18] "r"(step2_18), [step2_19] "r"(step2_19)); step3_12 = ROUND_POWER_OF_TWO((step1_15 - step2_16), 6); step3_13 = ROUND_POWER_OF_TWO((step1_14 - step2_17), 6); step3_14 = ROUND_POWER_OF_TWO((step1_13 - step2_18), 6); step3_15 = ROUND_POWER_OF_TWO((step1_12 - step2_19), 6); __asm__ __volatile__( "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_15] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_14] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp2], 0(%[dest_pix1]) \n\t" "add %[temp2], %[temp2], %[step3_13] \n\t" "lbux %[temp0], %[temp2](%[cm]) \n\t" "sb %[temp0], 0(%[dest_pix1]) \n\t" "subu %[dest_pix1], %[dest_pix1], %[stride] \n\t" "lbu %[temp3], 0(%[dest_pix1]) \n\t" "add %[temp3], %[temp3], %[step3_12] \n\t" "lbux %[temp1], %[temp3](%[cm]) \n\t" "sb %[temp1], 0(%[dest_pix1]) \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [dest_pix1] "+r"(dest_pix1) : [cm] "r"(cm), [stride] "r"(stride), [step3_12] "r"(step3_12), [step3_13] "r"(step3_13), [step3_14] "r"(step3_14), [step3_15] "r"(step3_15)); input += 32; } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/itrans32_dspr2.c000066400000000000000000002115401357355204000204130ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 static void idct32_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) { int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6; int step1_7, step1_8, step1_9, step1_10, step1_11, step1_12, step1_13; int step1_14, step1_15, step1_16, step1_17, step1_18, step1_19, step1_20; int step1_21, step1_22, step1_23, step1_24, step1_25, step1_26, step1_27; int step1_28, step1_29, step1_30, step1_31; int step2_0, step2_1, step2_2, step2_3, step2_4, step2_5, step2_6; int step2_7, step2_8, step2_9, step2_10, step2_11, step2_12, step2_13; int step2_14, step2_15, step2_16, step2_17, step2_18, step2_19, step2_20; int step2_21, step2_22, step2_23, step2_24, step2_25, step2_26, step2_27; int step2_28, step2_29, step2_30, step2_31; int step3_8, step3_9, step3_10, step3_11, step3_12, step3_13, step3_14; int step3_15, step3_16, step3_17, step3_18, step3_19, step3_20, step3_21; int step3_22, step3_23, step3_24, step3_25, step3_26, step3_27, step3_28; int step3_29, step3_30, step3_31; int temp0, temp1, temp2, temp3; int load1, load2, load3, load4; int result1, result2; int i; const int const_2_power_13 = 8192; const int32_t *input_int; for (i = no_rows; i--;) { input_int = (const int32_t *)input; if (!(input_int[0] | input_int[1] | input_int[2] | input_int[3] | input_int[4] | input_int[5] | input_int[6] | input_int[7] | input_int[8] | input_int[9] | input_int[10] | input_int[11] | input_int[12] | input_int[13] | input_int[14] | input_int[15])) { input += 32; __asm__ __volatile__( "sh $zero, 0(%[output]) \n\t" "sh $zero, 64(%[output]) \n\t" "sh $zero, 128(%[output]) \n\t" "sh $zero, 192(%[output]) \n\t" "sh $zero, 256(%[output]) \n\t" "sh $zero, 320(%[output]) \n\t" "sh $zero, 384(%[output]) \n\t" "sh $zero, 448(%[output]) \n\t" "sh $zero, 512(%[output]) \n\t" "sh $zero, 576(%[output]) \n\t" "sh $zero, 640(%[output]) \n\t" "sh $zero, 704(%[output]) \n\t" "sh $zero, 768(%[output]) \n\t" "sh $zero, 832(%[output]) \n\t" "sh $zero, 896(%[output]) \n\t" "sh $zero, 960(%[output]) \n\t" "sh $zero, 1024(%[output]) \n\t" "sh $zero, 1088(%[output]) \n\t" "sh $zero, 1152(%[output]) \n\t" "sh $zero, 1216(%[output]) \n\t" "sh $zero, 1280(%[output]) \n\t" "sh $zero, 1344(%[output]) \n\t" "sh $zero, 1408(%[output]) \n\t" "sh $zero, 1472(%[output]) \n\t" "sh $zero, 1536(%[output]) \n\t" "sh $zero, 1600(%[output]) \n\t" "sh $zero, 1664(%[output]) \n\t" "sh $zero, 1728(%[output]) \n\t" "sh $zero, 1792(%[output]) \n\t" "sh $zero, 1856(%[output]) \n\t" "sh $zero, 1920(%[output]) \n\t" "sh $zero, 1984(%[output]) \n\t" : : [output] "r"(output)); output += 1; continue; } /* prefetch row */ prefetch_load((const uint8_t *)(input + 32)); prefetch_load((const uint8_t *)(input + 48)); __asm__ __volatile__( "lh %[load1], 2(%[input]) \n\t" "lh %[load2], 62(%[input]) \n\t" "lh %[load3], 34(%[input]) \n\t" "lh %[load4], 30(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_31_64] \n\t" "msub $ac1, %[load2], %[cospi_1_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_1_64] \n\t" "madd $ac3, %[load2], %[cospi_31_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_15_64] \n\t" "msub $ac2, %[load4], %[cospi_17_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_17_64] \n\t" "madd $ac1, %[load4], %[cospi_15_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp3], %[temp2] \n\t" "sub %[load2], %[temp0], %[temp1] \n\t" "madd $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "madd $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[step1_17], $ac1, 31 \n\t" "extp %[step1_30], $ac3, 31 \n\t" "add %[step1_16], %[temp0], %[temp1] \n\t" "add %[step1_31], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_16] "=&r"(step1_16), [step1_17] "=&r"(step1_17), [step1_30] "=&r"(step1_30), [step1_31] "=&r"(step1_31) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_31_64] "r"(cospi_31_64), [cospi_1_64] "r"(cospi_1_64), [cospi_4_64] "r"(cospi_4_64), [cospi_17_64] "r"(cospi_17_64), [cospi_15_64] "r"(cospi_15_64), [cospi_28_64] "r"(cospi_28_64)); __asm__ __volatile__( "lh %[load1], 18(%[input]) \n\t" "lh %[load2], 46(%[input]) \n\t" "lh %[load3], 50(%[input]) \n\t" "lh %[load4], 14(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_23_64] \n\t" "msub $ac1, %[load2], %[cospi_9_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_9_64] \n\t" "madd $ac3, %[load2], %[cospi_23_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_7_64] \n\t" "msub $ac2, %[load4], %[cospi_25_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_25_64] \n\t" "madd $ac1, %[load4], %[cospi_7_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "msub $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[step1_18], $ac1, 31 \n\t" "extp %[step1_29], $ac3, 31 \n\t" "add %[step1_19], %[temp0], %[temp1] \n\t" "add %[step1_28], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_18] "=&r"(step1_18), [step1_19] "=&r"(step1_19), [step1_28] "=&r"(step1_28), [step1_29] "=&r"(step1_29) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_23_64] "r"(cospi_23_64), [cospi_9_64] "r"(cospi_9_64), [cospi_4_64] "r"(cospi_4_64), [cospi_7_64] "r"(cospi_7_64), [cospi_25_64] "r"(cospi_25_64), [cospi_28_64] "r"(cospi_28_64)); __asm__ __volatile__( "lh %[load1], 10(%[input]) \n\t" "lh %[load2], 54(%[input]) \n\t" "lh %[load3], 42(%[input]) \n\t" "lh %[load4], 22(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_27_64] \n\t" "msub $ac1, %[load2], %[cospi_5_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_5_64] \n\t" "madd $ac3, %[load2], %[cospi_27_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_11_64] \n\t" "msub $ac2, %[load4], %[cospi_21_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_21_64] \n\t" "madd $ac1, %[load4], %[cospi_11_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp0], %[temp1] \n\t" "sub %[load2], %[temp3], %[temp2] \n\t" "madd $ac1, %[load2], %[cospi_12_64] \n\t" "msub $ac1, %[load1], %[cospi_20_64] \n\t" "madd $ac3, %[load1], %[cospi_12_64] \n\t" "madd $ac3, %[load2], %[cospi_20_64] \n\t" "extp %[step1_21], $ac1, 31 \n\t" "extp %[step1_26], $ac3, 31 \n\t" "add %[step1_20], %[temp0], %[temp1] \n\t" "add %[step1_27], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20), [step1_21] "=&r"(step1_21), [step1_26] "=&r"(step1_26), [step1_27] "=&r"(step1_27) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_27_64] "r"(cospi_27_64), [cospi_5_64] "r"(cospi_5_64), [cospi_11_64] "r"(cospi_11_64), [cospi_21_64] "r"(cospi_21_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64)); __asm__ __volatile__( "lh %[load1], 26(%[input]) \n\t" "lh %[load2], 38(%[input]) \n\t" "lh %[load3], 58(%[input]) \n\t" "lh %[load4], 6(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_19_64] \n\t" "msub $ac1, %[load2], %[cospi_13_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_13_64] \n\t" "madd $ac3, %[load2], %[cospi_19_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_3_64] \n\t" "msub $ac2, %[load4], %[cospi_29_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_29_64] \n\t" "madd $ac1, %[load4], %[cospi_3_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_12_64] \n\t" "msub $ac1, %[load2], %[cospi_20_64] \n\t" "msub $ac3, %[load1], %[cospi_20_64] \n\t" "madd $ac3, %[load2], %[cospi_12_64] \n\t" "extp %[step1_22], $ac1, 31 \n\t" "extp %[step1_25], $ac3, 31 \n\t" "add %[step1_23], %[temp0], %[temp1] \n\t" "add %[step1_24], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22), [step1_23] "=&r"(step1_23), [step1_24] "=&r"(step1_24), [step1_25] "=&r"(step1_25) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_19_64] "r"(cospi_19_64), [cospi_13_64] "r"(cospi_13_64), [cospi_3_64] "r"(cospi_3_64), [cospi_29_64] "r"(cospi_29_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64)); __asm__ __volatile__( "lh %[load1], 4(%[input]) \n\t" "lh %[load2], 60(%[input]) \n\t" "lh %[load3], 36(%[input]) \n\t" "lh %[load4], 28(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_30_64] \n\t" "msub $ac1, %[load2], %[cospi_2_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_2_64] \n\t" "madd $ac3, %[load2], %[cospi_30_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_14_64] \n\t" "msub $ac2, %[load4], %[cospi_18_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_18_64] \n\t" "madd $ac1, %[load4], %[cospi_14_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp0], %[temp1] \n\t" "sub %[load2], %[temp3], %[temp2] \n\t" "msub $ac1, %[load1], %[cospi_8_64] \n\t" "madd $ac1, %[load2], %[cospi_24_64] \n\t" "madd $ac3, %[load1], %[cospi_24_64] \n\t" "madd $ac3, %[load2], %[cospi_8_64] \n\t" "extp %[step2_9], $ac1, 31 \n\t" "extp %[step2_14], $ac3, 31 \n\t" "add %[step2_8], %[temp0], %[temp1] \n\t" "add %[step2_15], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_8] "=&r"(step2_8), [step2_9] "=&r"(step2_9), [step2_14] "=&r"(step2_14), [step2_15] "=&r"(step2_15) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_30_64] "r"(cospi_30_64), [cospi_2_64] "r"(cospi_2_64), [cospi_14_64] "r"(cospi_14_64), [cospi_18_64] "r"(cospi_18_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64)); __asm__ __volatile__( "lh %[load1], 20(%[input]) \n\t" "lh %[load2], 44(%[input]) \n\t" "lh %[load3], 52(%[input]) \n\t" "lh %[load4], 12(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_22_64] \n\t" "msub $ac1, %[load2], %[cospi_10_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_10_64] \n\t" "madd $ac3, %[load2], %[cospi_22_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_6_64] \n\t" "msub $ac2, %[load4], %[cospi_26_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_26_64] \n\t" "madd $ac1, %[load4], %[cospi_6_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp1], %[temp0] \n\t" "sub %[load2], %[temp2], %[temp3] \n\t" "msub $ac1, %[load1], %[cospi_24_64] \n\t" "msub $ac1, %[load2], %[cospi_8_64] \n\t" "madd $ac3, %[load2], %[cospi_24_64] \n\t" "msub $ac3, %[load1], %[cospi_8_64] \n\t" "extp %[step2_10], $ac1, 31 \n\t" "extp %[step2_13], $ac3, 31 \n\t" "add %[step2_11], %[temp0], %[temp1] \n\t" "add %[step2_12], %[temp2], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step2_10] "=&r"(step2_10), [step2_11] "=&r"(step2_11), [step2_12] "=&r"(step2_12), [step2_13] "=&r"(step2_13) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_22_64] "r"(cospi_22_64), [cospi_10_64] "r"(cospi_10_64), [cospi_6_64] "r"(cospi_6_64), [cospi_26_64] "r"(cospi_26_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "sub %[temp0], %[step2_14], %[step2_13] \n\t" "sub %[temp0], %[temp0], %[step2_9] \n\t" "add %[temp0], %[temp0], %[step2_10] \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp1], %[step2_14], %[step2_13] \n\t" "add %[temp1], %[temp1], %[step2_9] \n\t" "sub %[temp1], %[temp1], %[step2_10] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "sub %[temp0], %[step2_15], %[step2_12] \n\t" "sub %[temp0], %[temp0], %[step2_8] \n\t" "add %[temp0], %[temp0], %[step2_11] \n\t" "madd $ac2, %[temp0], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[temp1], %[step2_15], %[step2_12] \n\t" "add %[temp1], %[temp1], %[step2_8] \n\t" "sub %[temp1], %[temp1], %[step2_11] \n\t" "madd $ac3, %[temp1], %[cospi_16_64] \n\t" "add %[step3_8], %[step2_8], %[step2_11] \n\t" "add %[step3_9], %[step2_9], %[step2_10] \n\t" "add %[step3_14], %[step2_13], %[step2_14] \n\t" "add %[step3_15], %[step2_12], %[step2_15] \n\t" "extp %[step3_10], $ac0, 31 \n\t" "extp %[step3_13], $ac1, 31 \n\t" "extp %[step3_11], $ac2, 31 \n\t" "extp %[step3_12], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_8] "=&r"(step3_8), [step3_9] "=&r"(step3_9), [step3_10] "=&r"(step3_10), [step3_11] "=&r"(step3_11), [step3_12] "=&r"(step3_12), [step3_13] "=&r"(step3_13), [step3_14] "=&r"(step3_14), [step3_15] "=&r"(step3_15) : [const_2_power_13] "r"(const_2_power_13), [step2_8] "r"(step2_8), [step2_9] "r"(step2_9), [step2_10] "r"(step2_10), [step2_11] "r"(step2_11), [step2_12] "r"(step2_12), [step2_13] "r"(step2_13), [step2_14] "r"(step2_14), [step2_15] "r"(step2_15), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_17], %[step1_18] \n\t" "sub %[temp1], %[step1_30], %[step1_29] \n\t" "add %[step3_17], %[step1_17], %[step1_18] \n\t" "add %[step3_30], %[step1_30], %[step1_29] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_18], $ac0, 31 \n\t" "madd $ac1, %[temp0], %[cospi_24_64] \n\t" "madd $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_29], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_18] "=&r"(step3_18), [step3_29] "=&r"(step3_29), [step3_17] "=&r"(step3_17), [step3_30] "=&r"(step3_30) : [const_2_power_13] "r"(const_2_power_13), [step1_17] "r"(step1_17), [step1_18] "r"(step1_18), [step1_30] "r"(step1_30), [step1_29] "r"(step1_29), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_16], %[step1_19] \n\t" "sub %[temp1], %[step1_31], %[step1_28] \n\t" "add %[step3_16], %[step1_16], %[step1_19] \n\t" "add %[step3_31], %[step1_31], %[step1_28] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_19], $ac0, 31 \n\t" "madd $ac1, %[temp0], %[cospi_24_64] \n\t" "madd $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_28], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_16] "=&r"(step3_16), [step3_31] "=&r"(step3_31), [step3_19] "=&r"(step3_19), [step3_28] "=&r"(step3_28) : [const_2_power_13] "r"(const_2_power_13), [step1_16] "r"(step1_16), [step1_19] "r"(step1_19), [step1_31] "r"(step1_31), [step1_28] "r"(step1_28), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_23], %[step1_20] \n\t" "sub %[temp1], %[step1_24], %[step1_27] \n\t" "add %[step3_23], %[step1_23], %[step1_20] \n\t" "add %[step3_24], %[step1_24], %[step1_27] \n\t" "msub $ac0, %[temp0], %[cospi_8_64] \n\t" "madd $ac0, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_27], $ac0, 31 \n\t" "msub $ac1, %[temp0], %[cospi_24_64] \n\t" "msub $ac1, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_20], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_23] "=&r"(step3_23), [step3_24] "=&r"(step3_24), [step3_20] "=&r"(step3_20), [step3_27] "=&r"(step3_27) : [const_2_power_13] "r"(const_2_power_13), [step1_23] "r"(step1_23), [step1_20] "r"(step1_20), [step1_24] "r"(step1_24), [step1_27] "r"(step1_27), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "sub %[temp0], %[step1_22], %[step1_21] \n\t" "sub %[temp1], %[step1_25], %[step1_26] \n\t" "add %[step3_22], %[step1_22], %[step1_21] \n\t" "add %[step3_25], %[step1_25], %[step1_26] \n\t" "msub $ac0, %[temp0], %[cospi_24_64] \n\t" "msub $ac0, %[temp1], %[cospi_8_64] \n\t" "extp %[step3_21], $ac0, 31 \n\t" "msub $ac1, %[temp0], %[cospi_8_64] \n\t" "madd $ac1, %[temp1], %[cospi_24_64] \n\t" "extp %[step3_26], $ac1, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [step3_22] "=&r"(step3_22), [step3_25] "=&r"(step3_25), [step3_21] "=&r"(step3_21), [step3_26] "=&r"(step3_26) : [const_2_power_13] "r"(const_2_power_13), [step1_22] "r"(step1_22), [step1_21] "r"(step1_21), [step1_25] "r"(step1_25), [step1_26] "r"(step1_26), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64)); __asm__ __volatile__( "add %[step2_16], %[step3_16], %[step3_23] \n\t" "add %[step2_17], %[step3_17], %[step3_22] \n\t" "add %[step2_18], %[step3_18], %[step3_21] \n\t" "add %[step2_19], %[step3_19], %[step3_20] \n\t" "sub %[step2_20], %[step3_19], %[step3_20] \n\t" "sub %[step2_21], %[step3_18], %[step3_21] \n\t" "sub %[step2_22], %[step3_17], %[step3_22] \n\t" "sub %[step2_23], %[step3_16], %[step3_23] \n\t" : [step2_16] "=&r"(step2_16), [step2_17] "=&r"(step2_17), [step2_18] "=&r"(step2_18), [step2_19] "=&r"(step2_19), [step2_20] "=&r"(step2_20), [step2_21] "=&r"(step2_21), [step2_22] "=&r"(step2_22), [step2_23] "=&r"(step2_23) : [step3_16] "r"(step3_16), [step3_23] "r"(step3_23), [step3_17] "r"(step3_17), [step3_22] "r"(step3_22), [step3_18] "r"(step3_18), [step3_21] "r"(step3_21), [step3_19] "r"(step3_19), [step3_20] "r"(step3_20)); __asm__ __volatile__( "sub %[step2_24], %[step3_31], %[step3_24] \n\t" "sub %[step2_25], %[step3_30], %[step3_25] \n\t" "sub %[step2_26], %[step3_29], %[step3_26] \n\t" "sub %[step2_27], %[step3_28], %[step3_27] \n\t" "add %[step2_28], %[step3_28], %[step3_27] \n\t" "add %[step2_29], %[step3_29], %[step3_26] \n\t" "add %[step2_30], %[step3_30], %[step3_25] \n\t" "add %[step2_31], %[step3_31], %[step3_24] \n\t" : [step2_24] "=&r"(step2_24), [step2_28] "=&r"(step2_28), [step2_25] "=&r"(step2_25), [step2_29] "=&r"(step2_29), [step2_26] "=&r"(step2_26), [step2_30] "=&r"(step2_30), [step2_27] "=&r"(step2_27), [step2_31] "=&r"(step2_31) : [step3_31] "r"(step3_31), [step3_24] "r"(step3_24), [step3_30] "r"(step3_30), [step3_25] "r"(step3_25), [step3_29] "r"(step3_29), [step3_26] "r"(step3_26), [step3_28] "r"(step3_28), [step3_27] "r"(step3_27)); __asm__ __volatile__( "lh %[load1], 0(%[input]) \n\t" "lh %[load2], 32(%[input]) \n\t" "lh %[load3], 16(%[input]) \n\t" "lh %[load4], 48(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "add %[result1], %[load1], %[load2] \n\t" "sub %[result2], %[load1], %[load2] \n\t" "madd $ac1, %[result1], %[cospi_16_64] \n\t" "madd $ac2, %[result2], %[cospi_16_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "extp %[temp1], $ac2, 31 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac3, %[load3], %[cospi_24_64] \n\t" "msub $ac3, %[load4], %[cospi_8_64] \n\t" "extp %[temp2], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac1, %[load3], %[cospi_8_64] \n\t" "madd $ac1, %[load4], %[cospi_24_64] \n\t" "extp %[temp3], $ac1, 31 \n\t" "add %[step1_0], %[temp0], %[temp3] \n\t" "add %[step1_1], %[temp1], %[temp2] \n\t" "sub %[step1_2], %[temp1], %[temp2] \n\t" "sub %[step1_3], %[temp0], %[temp3] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [result1] "=&r"(result1), [result2] "=&r"(result2), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_0] "=&r"(step1_0), [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2), [step1_3] "=&r"(step1_3) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_24_64] "r"(cospi_24_64), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "lh %[load1], 8(%[input]) \n\t" "lh %[load2], 56(%[input]) \n\t" "lh %[load3], 40(%[input]) \n\t" "lh %[load4], 24(%[input]) \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac1, %[load1], %[cospi_28_64] \n\t" "msub $ac1, %[load2], %[cospi_4_64] \n\t" "extp %[temp0], $ac1, 31 \n\t" "madd $ac3, %[load1], %[cospi_4_64] \n\t" "madd $ac3, %[load2], %[cospi_28_64] \n\t" "extp %[temp3], $ac3, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "madd $ac2, %[load3], %[cospi_12_64] \n\t" "msub $ac2, %[load4], %[cospi_20_64] \n\t" "extp %[temp1], $ac2, 31 \n\t" "madd $ac1, %[load3], %[cospi_20_64] \n\t" "madd $ac1, %[load4], %[cospi_12_64] \n\t" "extp %[temp2], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "sub %[load1], %[temp3], %[temp2] \n\t" "sub %[load1], %[load1], %[temp0] \n\t" "add %[load1], %[load1], %[temp1] \n\t" "sub %[load2], %[temp0], %[temp1] \n\t" "sub %[load2], %[load2], %[temp2] \n\t" "add %[load2], %[load2], %[temp3] \n\t" "madd $ac1, %[load1], %[cospi_16_64] \n\t" "madd $ac3, %[load2], %[cospi_16_64] \n\t" "extp %[step1_5], $ac1, 31 \n\t" "extp %[step1_6], $ac3, 31 \n\t" "add %[step1_4], %[temp0], %[temp1] \n\t" "add %[step1_7], %[temp3], %[temp2] \n\t" : [load1] "=&r"(load1), [load2] "=&r"(load2), [load3] "=&r"(load3), [load4] "=&r"(load4), [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_4] "=&r"(step1_4), [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6), [step1_7] "=&r"(step1_7) : [const_2_power_13] "r"(const_2_power_13), [input] "r"(input), [cospi_20_64] "r"(cospi_20_64), [cospi_12_64] "r"(cospi_12_64), [cospi_4_64] "r"(cospi_4_64), [cospi_28_64] "r"(cospi_28_64), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "add %[step2_0], %[step1_0], %[step1_7] \n\t" "add %[step2_1], %[step1_1], %[step1_6] \n\t" "add %[step2_2], %[step1_2], %[step1_5] \n\t" "add %[step2_3], %[step1_3], %[step1_4] \n\t" "sub %[step2_4], %[step1_3], %[step1_4] \n\t" "sub %[step2_5], %[step1_2], %[step1_5] \n\t" "sub %[step2_6], %[step1_1], %[step1_6] \n\t" "sub %[step2_7], %[step1_0], %[step1_7] \n\t" : [step2_0] "=&r"(step2_0), [step2_4] "=&r"(step2_4), [step2_1] "=&r"(step2_1), [step2_5] "=&r"(step2_5), [step2_2] "=&r"(step2_2), [step2_6] "=&r"(step2_6), [step2_3] "=&r"(step2_3), [step2_7] "=&r"(step2_7) : [step1_0] "r"(step1_0), [step1_7] "r"(step1_7), [step1_1] "r"(step1_1), [step1_6] "r"(step1_6), [step1_2] "r"(step1_2), [step1_5] "r"(step1_5), [step1_3] "r"(step1_3), [step1_4] "r"(step1_4)); // stage 7 __asm__ __volatile__( "add %[step1_0], %[step2_0], %[step3_15] \n\t" "add %[step1_1], %[step2_1], %[step3_14] \n\t" "add %[step1_2], %[step2_2], %[step3_13] \n\t" "add %[step1_3], %[step2_3], %[step3_12] \n\t" "sub %[step1_12], %[step2_3], %[step3_12] \n\t" "sub %[step1_13], %[step2_2], %[step3_13] \n\t" "sub %[step1_14], %[step2_1], %[step3_14] \n\t" "sub %[step1_15], %[step2_0], %[step3_15] \n\t" : [step1_0] "=&r"(step1_0), [step1_12] "=&r"(step1_12), [step1_1] "=&r"(step1_1), [step1_13] "=&r"(step1_13), [step1_2] "=&r"(step1_2), [step1_14] "=&r"(step1_14), [step1_3] "=&r"(step1_3), [step1_15] "=&r"(step1_15) : [step2_0] "r"(step2_0), [step3_15] "r"(step3_15), [step2_1] "r"(step2_1), [step3_14] "r"(step3_14), [step2_2] "r"(step2_2), [step3_13] "r"(step3_13), [step2_3] "r"(step2_3), [step3_12] "r"(step3_12)); __asm__ __volatile__( "add %[step1_4], %[step2_4], %[step3_11] \n\t" "add %[step1_5], %[step2_5], %[step3_10] \n\t" "add %[step1_6], %[step2_6], %[step3_9] \n\t" "add %[step1_7], %[step2_7], %[step3_8] \n\t" "sub %[step1_8], %[step2_7], %[step3_8] \n\t" "sub %[step1_9], %[step2_6], %[step3_9] \n\t" "sub %[step1_10], %[step2_5], %[step3_10] \n\t" "sub %[step1_11], %[step2_4], %[step3_11] \n\t" : [step1_4] "=&r"(step1_4), [step1_8] "=&r"(step1_8), [step1_5] "=&r"(step1_5), [step1_9] "=&r"(step1_9), [step1_6] "=&r"(step1_6), [step1_10] "=&r"(step1_10), [step1_7] "=&r"(step1_7), [step1_11] "=&r"(step1_11) : [step2_4] "r"(step2_4), [step3_11] "r"(step3_11), [step2_5] "r"(step2_5), [step3_10] "r"(step3_10), [step2_6] "r"(step2_6), [step3_9] "r"(step3_9), [step2_7] "r"(step2_7), [step3_8] "r"(step3_8)); __asm__ __volatile__( "sub %[temp0], %[step2_27], %[step2_20] \n\t" "add %[temp1], %[step2_27], %[step2_20] \n\t" "sub %[temp2], %[step2_26], %[step2_21] \n\t" "add %[temp3], %[step2_26], %[step2_21] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "madd $ac2, %[temp2], %[cospi_16_64] \n\t" "madd $ac3, %[temp3], %[cospi_16_64] \n\t" "extp %[step1_20], $ac0, 31 \n\t" "extp %[step1_27], $ac1, 31 \n\t" "extp %[step1_21], $ac2, 31 \n\t" "extp %[step1_26], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_20] "=&r"(step1_20), [step1_27] "=&r"(step1_27), [step1_21] "=&r"(step1_21), [step1_26] "=&r"(step1_26) : [const_2_power_13] "r"(const_2_power_13), [step2_20] "r"(step2_20), [step2_27] "r"(step2_27), [step2_21] "r"(step2_21), [step2_26] "r"(step2_26), [cospi_16_64] "r"(cospi_16_64)); __asm__ __volatile__( "sub %[temp0], %[step2_25], %[step2_22] \n\t" "add %[temp1], %[step2_25], %[step2_22] \n\t" "sub %[temp2], %[step2_24], %[step2_23] \n\t" "add %[temp3], %[step2_24], %[step2_23] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "mtlo %[const_2_power_13], $ac2 \n\t" "mthi $zero, $ac2 \n\t" "mtlo %[const_2_power_13], $ac3 \n\t" "mthi $zero, $ac3 \n\t" "madd $ac0, %[temp0], %[cospi_16_64] \n\t" "madd $ac1, %[temp1], %[cospi_16_64] \n\t" "madd $ac2, %[temp2], %[cospi_16_64] \n\t" "madd $ac3, %[temp3], %[cospi_16_64] \n\t" "extp %[step1_22], $ac0, 31 \n\t" "extp %[step1_25], $ac1, 31 \n\t" "extp %[step1_23], $ac2, 31 \n\t" "extp %[step1_24], $ac3, 31 \n\t" : [temp0] "=&r"(temp0), [temp1] "=&r"(temp1), [temp2] "=&r"(temp2), [temp3] "=&r"(temp3), [step1_22] "=&r"(step1_22), [step1_25] "=&r"(step1_25), [step1_23] "=&r"(step1_23), [step1_24] "=&r"(step1_24) : [const_2_power_13] "r"(const_2_power_13), [step2_22] "r"(step2_22), [step2_25] "r"(step2_25), [step2_23] "r"(step2_23), [step2_24] "r"(step2_24), [cospi_16_64] "r"(cospi_16_64)); // final stage __asm__ __volatile__( "add %[temp0], %[step1_0], %[step2_31] \n\t" "add %[temp1], %[step1_1], %[step2_30] \n\t" "add %[temp2], %[step1_2], %[step2_29] \n\t" "add %[temp3], %[step1_3], %[step2_28] \n\t" "sub %[load1], %[step1_3], %[step2_28] \n\t" "sub %[load2], %[step1_2], %[step2_29] \n\t" "sub %[load3], %[step1_1], %[step2_30] \n\t" "sub %[load4], %[step1_0], %[step2_31] \n\t" "sh %[temp0], 0(%[output]) \n\t" "sh %[temp1], 64(%[output]) \n\t" "sh %[temp2], 128(%[output]) \n\t" "sh %[temp3], 192(%[output]) \n\t" "sh %[load1], 1792(%[output]) \n\t" "sh %[load2], 1856(%[output]) \n\t" "sh %[load3], 1920(%[output]) \n\t" "sh %[load4], 1984(%[output]) \n\t" : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1), [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3), [temp3] "=&r"(temp3), [load4] "=&r"(load4) : [step1_0] "r"(step1_0), [step2_31] "r"(step2_31), [step1_1] "r"(step1_1), [step2_30] "r"(step2_30), [step1_2] "r"(step1_2), [step2_29] "r"(step2_29), [step1_3] "r"(step1_3), [step2_28] "r"(step2_28), [output] "r"(output)); __asm__ __volatile__( "add %[temp0], %[step1_4], %[step1_27] \n\t" "add %[temp1], %[step1_5], %[step1_26] \n\t" "add %[temp2], %[step1_6], %[step1_25] \n\t" "add %[temp3], %[step1_7], %[step1_24] \n\t" "sub %[load1], %[step1_7], %[step1_24] \n\t" "sub %[load2], %[step1_6], %[step1_25] \n\t" "sub %[load3], %[step1_5], %[step1_26] \n\t" "sub %[load4], %[step1_4], %[step1_27] \n\t" "sh %[temp0], 256(%[output]) \n\t" "sh %[temp1], 320(%[output]) \n\t" "sh %[temp2], 384(%[output]) \n\t" "sh %[temp3], 448(%[output]) \n\t" "sh %[load1], 1536(%[output]) \n\t" "sh %[load2], 1600(%[output]) \n\t" "sh %[load3], 1664(%[output]) \n\t" "sh %[load4], 1728(%[output]) \n\t" : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1), [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3), [temp3] "=&r"(temp3), [load4] "=&r"(load4) : [step1_4] "r"(step1_4), [step1_27] "r"(step1_27), [step1_5] "r"(step1_5), [step1_26] "r"(step1_26), [step1_6] "r"(step1_6), [step1_25] "r"(step1_25), [step1_7] "r"(step1_7), [step1_24] "r"(step1_24), [output] "r"(output)); __asm__ __volatile__( "add %[temp0], %[step1_8], %[step1_23] \n\t" "add %[temp1], %[step1_9], %[step1_22] \n\t" "add %[temp2], %[step1_10], %[step1_21] \n\t" "add %[temp3], %[step1_11], %[step1_20] \n\t" "sub %[load1], %[step1_11], %[step1_20] \n\t" "sub %[load2], %[step1_10], %[step1_21] \n\t" "sub %[load3], %[step1_9], %[step1_22] \n\t" "sub %[load4], %[step1_8], %[step1_23] \n\t" "sh %[temp0], 512(%[output]) \n\t" "sh %[temp1], 576(%[output]) \n\t" "sh %[temp2], 640(%[output]) \n\t" "sh %[temp3], 704(%[output]) \n\t" "sh %[load1], 1280(%[output]) \n\t" "sh %[load2], 1344(%[output]) \n\t" "sh %[load3], 1408(%[output]) \n\t" "sh %[load4], 1472(%[output]) \n\t" : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1), [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3), [temp3] "=&r"(temp3), [load4] "=&r"(load4) : [step1_8] "r"(step1_8), [step1_23] "r"(step1_23), [step1_9] "r"(step1_9), [step1_22] "r"(step1_22), [step1_10] "r"(step1_10), [step1_21] "r"(step1_21), [step1_11] "r"(step1_11), [step1_20] "r"(step1_20), [output] "r"(output)); __asm__ __volatile__( "add %[temp0], %[step1_12], %[step2_19] \n\t" "add %[temp1], %[step1_13], %[step2_18] \n\t" "add %[temp2], %[step1_14], %[step2_17] \n\t" "add %[temp3], %[step1_15], %[step2_16] \n\t" "sub %[load1], %[step1_15], %[step2_16] \n\t" "sub %[load2], %[step1_14], %[step2_17] \n\t" "sub %[load3], %[step1_13], %[step2_18] \n\t" "sub %[load4], %[step1_12], %[step2_19] \n\t" "sh %[temp0], 768(%[output]) \n\t" "sh %[temp1], 832(%[output]) \n\t" "sh %[temp2], 896(%[output]) \n\t" "sh %[temp3], 960(%[output]) \n\t" "sh %[load1], 1024(%[output]) \n\t" "sh %[load2], 1088(%[output]) \n\t" "sh %[load3], 1152(%[output]) \n\t" "sh %[load4], 1216(%[output]) \n\t" : [temp0] "=&r"(temp0), [load1] "=&r"(load1), [temp1] "=&r"(temp1), [load2] "=&r"(load2), [temp2] "=&r"(temp2), [load3] "=&r"(load3), [temp3] "=&r"(temp3), [load4] "=&r"(load4) : [step1_12] "r"(step1_12), [step2_19] "r"(step2_19), [step1_13] "r"(step1_13), [step2_18] "r"(step2_18), [step1_14] "r"(step1_14), [step2_17] "r"(step2_17), [step1_15] "r"(step1_15), [step2_16] "r"(step2_16), [output] "r"(output)); input += 32; output += 1; } } void vpx_idct32x32_1024_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[32 * 32]); int16_t *outptr = out; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // Rows idct32_rows_dspr2(input, outptr, 32); // Columns vpx_idct32_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct32x32_34_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[32 * 32]); int16_t *outptr = out; uint32_t i; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // Rows idct32_rows_dspr2(input, outptr, 8); outptr += 8; __asm__ __volatile__( "sw $zero, 0(%[outptr]) \n\t" "sw $zero, 4(%[outptr]) \n\t" "sw $zero, 8(%[outptr]) \n\t" "sw $zero, 12(%[outptr]) \n\t" "sw $zero, 16(%[outptr]) \n\t" "sw $zero, 20(%[outptr]) \n\t" "sw $zero, 24(%[outptr]) \n\t" "sw $zero, 28(%[outptr]) \n\t" "sw $zero, 32(%[outptr]) \n\t" "sw $zero, 36(%[outptr]) \n\t" "sw $zero, 40(%[outptr]) \n\t" "sw $zero, 44(%[outptr]) \n\t" : : [outptr] "r"(outptr)); for (i = 0; i < 31; ++i) { outptr += 32; __asm__ __volatile__( "sw $zero, 0(%[outptr]) \n\t" "sw $zero, 4(%[outptr]) \n\t" "sw $zero, 8(%[outptr]) \n\t" "sw $zero, 12(%[outptr]) \n\t" "sw $zero, 16(%[outptr]) \n\t" "sw $zero, 20(%[outptr]) \n\t" "sw $zero, 24(%[outptr]) \n\t" "sw $zero, 28(%[outptr]) \n\t" "sw $zero, 32(%[outptr]) \n\t" "sw $zero, 36(%[outptr]) \n\t" "sw $zero, 40(%[outptr]) \n\t" "sw $zero, 44(%[outptr]) \n\t" : : [outptr] "r"(outptr)); } // Columns vpx_idct32_cols_add_blk_dspr2(out, dest, stride); } void vpx_idct32x32_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { int r, out; int32_t a1, absa1; int32_t vector_a1; int32_t t1, t2, t3, t4; int32_t vector_1, vector_2, vector_3, vector_4; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]); __asm__ __volatile__( "addi %[out], %[out], 32 \n\t" "sra %[a1], %[out], 6 \n\t" : [out] "+r"(out), [a1] "=r"(a1) :); if (a1 < 0) { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__( "abs %[absa1], %[a1] \n\t" "replv.qb %[vector_a1], %[absa1] \n\t" : [absa1] "=&r"(absa1), [vector_a1] "=&r"(vector_a1) : [a1] "r"(a1)); for (r = 32; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "lw %[t1], 16(%[dest]) \n\t" "lw %[t2], 20(%[dest]) \n\t" "lw %[t3], 24(%[dest]) \n\t" "lw %[t4], 28(%[dest]) \n\t" "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "subu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "subu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 16(%[dest]) \n\t" "sw %[vector_2], 20(%[dest]) \n\t" "sw %[vector_3], 24(%[dest]) \n\t" "sw %[vector_4], 28(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else if (a1 > 255) { int32_t a11, a12, vector_a11, vector_a12; /* use quad-byte * input and output memory are four byte aligned */ a11 = a1 >> 1; a12 = a1 - a11; __asm__ __volatile__( "replv.qb %[vector_a11], %[a11] \n\t" "replv.qb %[vector_a12], %[a12] \n\t" : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12) : [a11] "r"(a11), [a12] "r"(a12)); for (r = 32; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a11] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t" "addu_s.qb %[vector_3], %[vector_3], %[vector_a12] \n\t" "addu_s.qb %[vector_4], %[vector_4], %[vector_a12] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "lw %[t1], 16(%[dest]) \n\t" "lw %[t2], 20(%[dest]) \n\t" "lw %[t3], 24(%[dest]) \n\t" "lw %[t4], 28(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a11] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t" "addu_s.qb %[vector_3], %[vector_3], %[vector_a12] \n\t" "addu_s.qb %[vector_4], %[vector_4], %[vector_a12] \n\t" "sw %[vector_1], 16(%[dest]) \n\t" "sw %[vector_2], 20(%[dest]) \n\t" "sw %[vector_3], 24(%[dest]) \n\t" "sw %[vector_4], 28(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a11] "r"(vector_a11), [vector_a12] "r"(vector_a12)); } } else { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t" : [vector_a1] "=&r"(vector_a1) : [a1] "r"(a1)); for (r = 32; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "lw %[t3], 8(%[dest]) \n\t" "lw %[t4], 12(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "sw %[vector_3], 8(%[dest]) \n\t" "sw %[vector_4], 12(%[dest]) \n\t" "lw %[t1], 16(%[dest]) \n\t" "lw %[t2], 20(%[dest]) \n\t" "lw %[t3], 24(%[dest]) \n\t" "lw %[t4], 28(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "addu_s.qb %[vector_3], %[t3], %[vector_a1] \n\t" "addu_s.qb %[vector_4], %[t4], %[vector_a1] \n\t" "sw %[vector_1], 16(%[dest]) \n\t" "sw %[vector_2], 20(%[dest]) \n\t" "sw %[vector_3], 24(%[dest]) \n\t" "sw %[vector_4], 28(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [t3] "=&r"(t3), [t4] "=&r"(t4), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [vector_3] "=&r"(vector_3), [vector_4] "=&r"(vector_4), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/itrans4_dspr2.c000066400000000000000000000403351357355204000203340ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 void vpx_idct4_rows_dspr2(const int16_t *input, int16_t *output) { int step_0, step_1, step_2, step_3; int Temp0, Temp1, Temp2, Temp3; const int const_2_power_13 = 8192; int i; for (i = 4; i--;) { __asm__ __volatile__( /* temp_1 = (input[0] + input[2]) * cospi_16_64; step_0 = dct_const_round_shift(temp_1); temp_2 = (input[0] - input[2]) * cospi_16_64; step_1 = dct_const_round_shift(temp_2); */ "lh %[Temp0], 0(%[input]) \n\t" "lh %[Temp1], 4(%[input]) \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "add %[Temp2], %[Temp0], %[Temp1] \n\t" "sub %[Temp3], %[Temp0], %[Temp1] \n\t" "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" "lh %[Temp0], 2(%[input]) \n\t" "lh %[Temp1], 6(%[input]) \n\t" "extp %[step_0], $ac0, 31 \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" "extp %[step_1], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" /* temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; step_2 = dct_const_round_shift(temp1); */ "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" "extp %[step_2], $ac0, 31 \n\t" /* temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; step_3 = dct_const_round_shift(temp2); */ "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" "extp %[step_3], $ac1, 31 \n\t" /* output[0] = step_0 + step_3; output[4] = step_1 + step_2; output[8] = step_1 - step_2; output[12] = step_0 - step_3; */ "add %[Temp0], %[step_0], %[step_3] \n\t" "sh %[Temp0], 0(%[output]) \n\t" "add %[Temp1], %[step_1], %[step_2] \n\t" "sh %[Temp1], 8(%[output]) \n\t" "sub %[Temp2], %[step_1], %[step_2] \n\t" "sh %[Temp2], 16(%[output]) \n\t" "sub %[Temp3], %[step_0], %[step_3] \n\t" "sh %[Temp3], 24(%[output]) \n\t" : [Temp0] "=&r"(Temp0), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [step_0] "=&r"(step_0), [step_1] "=&r"(step_1), [step_2] "=&r"(step_2), [step_3] "=&r"(step_3), [output] "+r"(output) : [const_2_power_13] "r"(const_2_power_13), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64), [cospi_24_64] "r"(cospi_24_64), [input] "r"(input)); input += 4; output += 1; } } void vpx_idct4_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int step_0, step_1, step_2, step_3; int Temp0, Temp1, Temp2, Temp3; const int const_2_power_13 = 8192; const int const_255 = 255; int i; uint8_t *dest_pix; for (i = 0; i < 4; ++i) { dest_pix = (dest + i); __asm__ __volatile__( /* temp_1 = (input[0] + input[2]) * cospi_16_64; step_0 = dct_const_round_shift(temp_1); temp_2 = (input[0] - input[2]) * cospi_16_64; step_1 = dct_const_round_shift(temp_2); */ "lh %[Temp0], 0(%[input]) \n\t" "lh %[Temp1], 4(%[input]) \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "add %[Temp2], %[Temp0], %[Temp1] \n\t" "sub %[Temp3], %[Temp0], %[Temp1] \n\t" "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" "lh %[Temp0], 2(%[input]) \n\t" "lh %[Temp1], 6(%[input]) \n\t" "extp %[step_0], $ac0, 31 \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" "extp %[step_1], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" /* temp1 = input[1] * cospi_24_64 - input[3] * cospi_8_64; step_2 = dct_const_round_shift(temp1); */ "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" "extp %[step_2], $ac0, 31 \n\t" /* temp2 = input[1] * cospi_8_64 + input[3] * cospi_24_64; step_3 = dct_const_round_shift(temp2); */ "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" "extp %[step_3], $ac1, 31 \n\t" /* output[0] = step_0 + step_3; output[4] = step_1 + step_2; output[8] = step_1 - step_2; output[12] = step_0 - step_3; */ "add %[Temp0], %[step_0], %[step_3] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "add %[Temp0], %[step_1], %[step_2] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "sub %[Temp0], %[step_1], %[step_2] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "sub %[Temp0], %[step_0], %[step_3] \n\t" "addi %[Temp0], %[Temp0], 8 \n\t" "sra %[Temp0], %[Temp0], 4 \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" : [Temp0] "=&r"(Temp0), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [step_0] "=&r"(step_0), [step_1] "=&r"(step_1), [step_2] "=&r"(step_2), [step_3] "=&r"(step_3), [dest_pix] "+r"(dest_pix) : [const_2_power_13] "r"(const_2_power_13), [const_255] "r"(const_255), [cospi_8_64] "r"(cospi_8_64), [cospi_16_64] "r"(cospi_16_64), [cospi_24_64] "r"(cospi_24_64), [input] "r"(input), [stride] "r"(stride)); input += 4; } } void vpx_idct4x4_16_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[4 * 4]); int16_t *outptr = out; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // Rows vpx_idct4_rows_dspr2(input, outptr); // Columns vpx_idct4_columns_add_blk_dspr2(&out[0], dest, stride); } void vpx_idct4x4_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { int a1, absa1; int r; int32_t out; int t2, vector_a1, vector_a; uint32_t pos = 45; int16_t input_dc = input[0]; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input_dc); __asm__ __volatile__( "addi %[out], %[out], 8 \n\t" "sra %[a1], %[out], 4 \n\t" : [out] "+r"(out), [a1] "=r"(a1) :); if (a1 < 0) { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__( "abs %[absa1], %[a1] \n\t" "replv.qb %[vector_a1], %[absa1] \n\t" : [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 4; r--;) { __asm__ __volatile__( "lw %[t2], 0(%[dest]) \n\t" "subu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" "sw %[vector_a], 0(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else if (a1 > 255) { int32_t a11, a12, vector_a11, vector_a12; /* use quad-byte * input and output memory are four byte aligned */ a11 = a1 >> 3; a12 = a1 - (a11 * 7); __asm__ __volatile__( "replv.qb %[vector_a11], %[a11] \n\t" "replv.qb %[vector_a12], %[a12] \n\t" : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12) : [a11] "r"(a11), [a12] "r"(a12)); for (r = 4; r--;) { __asm__ __volatile__( "lw %[t2], 4(%[dest]) \n\t" "addu_s.qb %[vector_a], %[t2], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a11] \n\t" "addu_s.qb %[vector_a], %[vector_a], %[vector_a12] \n\t" "sw %[vector_a], 0(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a11] "r"(vector_a11), [vector_a12] "r"(vector_a12)); } } else { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t" : [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 4; r--;) { __asm__ __volatile__( "lw %[t2], 0(%[dest]) \n\t" "addu_s.qb %[vector_a], %[t2], %[vector_a1] \n\t" "sw %[vector_a], 0(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t2] "=&r"(t2), [vector_a] "=&r"(vector_a), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } void iadst4_dspr2(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0, x1, x2, x3; x0 = input[0]; x1 = input[1]; x2 = input[2]; x3 = input[3]; if (!(x0 | x1 | x2 | x3)) { output[0] = output[1] = output[2] = output[3] = 0; return; } // 32-bit result is enough for the following multiplications. s0 = sinpi_1_9 * x0; s1 = sinpi_2_9 * x0; s2 = sinpi_3_9 * x1; s3 = sinpi_4_9 * x2; s4 = sinpi_1_9 * x2; s5 = sinpi_2_9 * x3; s6 = sinpi_4_9 * x3; s7 = x0 - x2 + x3; x0 = s0 + s3 + s5; x1 = s1 - s4 - s6; x2 = sinpi_3_9 * s7; x3 = s2; s0 = x0 + x3; s1 = x1 + x3; s2 = x2; s3 = x0 + x1 - x3; // 1-D transform scaling factor is sqrt(2). // The overall dynamic range is 14b (input) + 14b (multiplication scaling) // + 1b (addition) = 29b. // Hence the output bit depth is 15b. output[0] = dct_const_round_shift(s0); output[1] = dct_const_round_shift(s1); output[2] = dct_const_round_shift(s2); output[3] = dct_const_round_shift(s3); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/itrans8_dspr2.c000066400000000000000000001014271357355204000203400ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/inv_txfm_dspr2.h" #include "vpx_dsp/txfm_common.h" #if HAVE_DSPR2 void idct8_rows_dspr2(const int16_t *input, int16_t *output, uint32_t no_rows) { int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; const int const_2_power_13 = 8192; int Temp0, Temp1, Temp2, Temp3, Temp4; int i; for (i = no_rows; i--;) { __asm__ __volatile__( /* temp_1 = (input[0] + input[4]) * cospi_16_64; step2_0 = dct_const_round_shift(temp_1); temp_2 = (input[0] - input[4]) * cospi_16_64; step2_1 = dct_const_round_shift(temp_2); */ "lh %[Temp0], 0(%[input]) \n\t" "lh %[Temp1], 8(%[input]) \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "add %[Temp2], %[Temp0], %[Temp1] \n\t" "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" "extp %[Temp4], $ac0, 31 \n\t" "sub %[Temp3], %[Temp0], %[Temp1] \n\t" "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "extp %[Temp2], $ac1, 31 \n\t" /* temp_1 = input[2] * cospi_24_64 - input[6] * cospi_8_64; step2_2 = dct_const_round_shift(temp_1); */ "lh %[Temp0], 4(%[input]) \n\t" "lh %[Temp1], 12(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "extp %[Temp3], $ac0, 31 \n\t" /* step1_1 = step2_1 + step2_2; step1_2 = step2_1 - step2_2; */ "add %[step1_1], %[Temp2], %[Temp3] \n\t" "sub %[step1_2], %[Temp2], %[Temp3] \n\t" /* temp_2 = input[2] * cospi_8_64 + input[6] * cospi_24_64; step2_3 = dct_const_round_shift(temp_2); */ "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" "extp %[Temp1], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" /* step1_0 = step2_0 + step2_3; step1_3 = step2_0 - step2_3; */ "add %[step1_0], %[Temp4], %[Temp1] \n\t" "sub %[step1_3], %[Temp4], %[Temp1] \n\t" /* temp_1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; step1_4 = dct_const_round_shift(temp_1); */ "lh %[Temp0], 2(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_28_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "lh %[Temp1], 14(%[input]) \n\t" "lh %[Temp0], 2(%[input]) \n\t" "msub $ac0, %[Temp1], %[cospi_4_64] \n\t" "extp %[step1_4], $ac0, 31 \n\t" /* temp_2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; step1_7 = dct_const_round_shift(temp_2); */ "madd $ac1, %[Temp0], %[cospi_4_64] \n\t" "madd $ac1, %[Temp1], %[cospi_28_64] \n\t" "extp %[step1_7], $ac1, 31 \n\t" /* temp_1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; step1_5 = dct_const_round_shift(temp_1); */ "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "lh %[Temp0], 10(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_12_64] \n\t" "lh %[Temp1], 6(%[input]) \n\t" "msub $ac0, %[Temp1], %[cospi_20_64] \n\t" "extp %[step1_5], $ac0, 31 \n\t" /* temp_2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; step1_6 = dct_const_round_shift(temp_2); */ "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "lh %[Temp0], 10(%[input]) \n\t" "madd $ac1, %[Temp0], %[cospi_20_64] \n\t" "lh %[Temp1], 6(%[input]) \n\t" "madd $ac1, %[Temp1], %[cospi_12_64] \n\t" "extp %[step1_6], $ac1, 31 \n\t" /* temp_1 = (step1_7 - step1_6 - step1_4 + step1_5) * cospi_16_64; temp_2 = (step1_4 - step1_5 - step1_6 + step1_7) * cospi_16_64; */ "sub %[Temp0], %[step1_7], %[step1_6] \n\t" "sub %[Temp0], %[Temp0], %[step1_4] \n\t" "add %[Temp0], %[Temp0], %[step1_5] \n\t" "sub %[Temp1], %[step1_4], %[step1_5] \n\t" "sub %[Temp1], %[Temp1], %[step1_6] \n\t" "add %[Temp1], %[Temp1], %[step1_7] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac0, %[Temp0], %[cospi_16_64] \n\t" "madd $ac1, %[Temp1], %[cospi_16_64] \n\t" /* step1_4 = step1_4 + step1_5; step1_7 = step1_6 + step1_7; */ "add %[step1_4], %[step1_4], %[step1_5] \n\t" "add %[step1_7], %[step1_7], %[step1_6] \n\t" "extp %[step1_5], $ac0, 31 \n\t" "extp %[step1_6], $ac1, 31 \n\t" "add %[Temp0], %[step1_0], %[step1_7] \n\t" "sh %[Temp0], 0(%[output]) \n\t" "add %[Temp1], %[step1_1], %[step1_6] \n\t" "sh %[Temp1], 16(%[output]) \n\t" "add %[Temp0], %[step1_2], %[step1_5] \n\t" "sh %[Temp0], 32(%[output]) \n\t" "add %[Temp1], %[step1_3], %[step1_4] \n\t" "sh %[Temp1], 48(%[output]) \n\t" "sub %[Temp0], %[step1_3], %[step1_4] \n\t" "sh %[Temp0], 64(%[output]) \n\t" "sub %[Temp1], %[step1_2], %[step1_5] \n\t" "sh %[Temp1], 80(%[output]) \n\t" "sub %[Temp0], %[step1_1], %[step1_6] \n\t" "sh %[Temp0], 96(%[output]) \n\t" "sub %[Temp1], %[step1_0], %[step1_7] \n\t" "sh %[Temp1], 112(%[output]) \n\t" : [step1_0] "=&r"(step1_0), [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2), [step1_3] "=&r"(step1_3), [step1_4] "=&r"(step1_4), [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6), [step1_7] "=&r"(step1_7), [Temp0] "=&r"(Temp0), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [Temp4] "=&r"(Temp4) : [const_2_power_13] "r"(const_2_power_13), [cospi_16_64] "r"(cospi_16_64), [cospi_28_64] "r"(cospi_28_64), [cospi_4_64] "r"(cospi_4_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64), [output] "r"(output), [input] "r"(input)); input += 8; output += 1; } } void idct8_columns_add_blk_dspr2(int16_t *input, uint8_t *dest, int stride) { int step1_0, step1_1, step1_2, step1_3, step1_4, step1_5, step1_6, step1_7; int Temp0, Temp1, Temp2, Temp3; int i; const int const_2_power_13 = 8192; const int const_255 = 255; uint8_t *dest_pix; for (i = 0; i < 8; ++i) { dest_pix = (dest + i); __asm__ __volatile__( /* temp_1 = (input[0] + input[4]) * cospi_16_64; step2_0 = dct_const_round_shift(temp_1); temp_2 = (input[0] - input[4]) * cospi_16_64; step2_1 = dct_const_round_shift(temp_2); */ "lh %[Temp0], 0(%[input]) \n\t" "lh %[Temp1], 8(%[input]) \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "add %[Temp2], %[Temp0], %[Temp1] \n\t" "madd $ac0, %[Temp2], %[cospi_16_64] \n\t" "extp %[step1_6], $ac0, 31 \n\t" "sub %[Temp3], %[Temp0], %[Temp1] \n\t" "madd $ac1, %[Temp3], %[cospi_16_64] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "extp %[Temp2], $ac1, 31 \n\t" /* temp_1 = input[2] * cospi_24_64 - input[6] * cospi_8_64; step2_2 = dct_const_round_shift(temp_1); */ "lh %[Temp0], 4(%[input]) \n\t" "lh %[Temp1], 12(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_24_64] \n\t" "msub $ac0, %[Temp1], %[cospi_8_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "extp %[Temp3], $ac0, 31 \n\t" /* step1_1 = step2_1 + step2_2; step1_2 = step2_1 - step2_2; */ "add %[step1_1], %[Temp2], %[Temp3] \n\t" "sub %[step1_2], %[Temp2], %[Temp3] \n\t" /* temp_2 = input[2] * cospi_8_64 + input[6] * cospi_24_64; step2_3 = dct_const_round_shift(temp_2); */ "madd $ac1, %[Temp0], %[cospi_8_64] \n\t" "madd $ac1, %[Temp1], %[cospi_24_64] \n\t" "extp %[Temp1], $ac1, 31 \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" /* step1_0 = step2_0 + step2_3; step1_3 = step2_0 - step2_3; */ "add %[step1_0], %[step1_6], %[Temp1] \n\t" "sub %[step1_3], %[step1_6], %[Temp1] \n\t" /* temp_1 = input[1] * cospi_28_64 - input[7] * cospi_4_64; step1_4 = dct_const_round_shift(temp_1); */ "lh %[Temp0], 2(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_28_64] \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "lh %[Temp1], 14(%[input]) \n\t" "lh %[Temp0], 2(%[input]) \n\t" "msub $ac0, %[Temp1], %[cospi_4_64] \n\t" "extp %[step1_4], $ac0, 31 \n\t" /* temp_2 = input[1] * cospi_4_64 + input[7] * cospi_28_64; step1_7 = dct_const_round_shift(temp_2); */ "madd $ac1, %[Temp0], %[cospi_4_64] \n\t" "madd $ac1, %[Temp1], %[cospi_28_64] \n\t" "extp %[step1_7], $ac1, 31 \n\t" /* temp_1 = input[5] * cospi_12_64 - input[3] * cospi_20_64; step1_5 = dct_const_round_shift(temp_1); */ "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "lh %[Temp0], 10(%[input]) \n\t" "madd $ac0, %[Temp0], %[cospi_12_64] \n\t" "lh %[Temp1], 6(%[input]) \n\t" "msub $ac0, %[Temp1], %[cospi_20_64] \n\t" "extp %[step1_5], $ac0, 31 \n\t" /* temp_2 = input[5] * cospi_20_64 + input[3] * cospi_12_64; step1_6 = dct_const_round_shift(temp_2); */ "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "lh %[Temp0], 10(%[input]) \n\t" "madd $ac1, %[Temp0], %[cospi_20_64] \n\t" "lh %[Temp1], 6(%[input]) \n\t" "madd $ac1, %[Temp1], %[cospi_12_64] \n\t" "extp %[step1_6], $ac1, 31 \n\t" /* temp_1 = (step1_7 - step1_6 - step1_4 + step1_5) * cospi_16_64; temp_2 = (step1_4 - step1_5 - step1_6 + step1_7) * cospi_16_64; */ "sub %[Temp0], %[step1_7], %[step1_6] \n\t" "sub %[Temp0], %[Temp0], %[step1_4] \n\t" "add %[Temp0], %[Temp0], %[step1_5] \n\t" "sub %[Temp1], %[step1_4], %[step1_5] \n\t" "sub %[Temp1], %[Temp1], %[step1_6] \n\t" "add %[Temp1], %[Temp1], %[step1_7] \n\t" "mtlo %[const_2_power_13], $ac0 \n\t" "mthi $zero, $ac0 \n\t" "mtlo %[const_2_power_13], $ac1 \n\t" "mthi $zero, $ac1 \n\t" "madd $ac0, %[Temp0], %[cospi_16_64] \n\t" "madd $ac1, %[Temp1], %[cospi_16_64] \n\t" /* step1_4 = step1_4 + step1_5; step1_7 = step1_6 + step1_7; */ "add %[step1_4], %[step1_4], %[step1_5] \n\t" "add %[step1_7], %[step1_7], %[step1_6] \n\t" "extp %[step1_5], $ac0, 31 \n\t" "extp %[step1_6], $ac1, 31 \n\t" /* add block */ "lbu %[Temp1], 0(%[dest_pix]) \n\t" "add %[Temp0], %[step1_0], %[step1_7] \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "add %[Temp0], %[step1_1], %[step1_6] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "add %[Temp0], %[step1_2], %[step1_5] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "add %[Temp0], %[step1_3], %[step1_4] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "sub %[Temp0], %[step1_3], %[step1_4] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "sub %[Temp0], %[step1_2], %[step1_5] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "sub %[Temp0], %[step1_1], %[step1_6] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "sub %[Temp0], %[step1_0], %[step1_7] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" "addu %[dest_pix], %[dest_pix], %[stride] \n\t" "lbu %[Temp1], 0(%[dest_pix]) \n\t" "addi %[Temp0], %[Temp0], 16 \n\t" "sra %[Temp0], %[Temp0], 5 \n\t" "add %[Temp1], %[Temp1], %[Temp0] \n\t" "slt %[Temp2], %[Temp1], %[const_255] \n\t" "slt %[Temp3], $zero, %[Temp1] \n\t" "movz %[Temp1], %[const_255], %[Temp2] \n\t" "movz %[Temp1], $zero, %[Temp3] \n\t" "sb %[Temp1], 0(%[dest_pix]) \n\t" : [step1_0] "=&r"(step1_0), [step1_1] "=&r"(step1_1), [step1_2] "=&r"(step1_2), [step1_3] "=&r"(step1_3), [step1_4] "=&r"(step1_4), [step1_5] "=&r"(step1_5), [step1_6] "=&r"(step1_6), [step1_7] "=&r"(step1_7), [Temp0] "=&r"(Temp0), [Temp1] "=&r"(Temp1), [Temp2] "=&r"(Temp2), [Temp3] "=&r"(Temp3), [dest_pix] "+r"(dest_pix) : [const_2_power_13] "r"(const_2_power_13), [const_255] "r"(const_255), [cospi_16_64] "r"(cospi_16_64), [cospi_28_64] "r"(cospi_28_64), [cospi_4_64] "r"(cospi_4_64), [cospi_12_64] "r"(cospi_12_64), [cospi_20_64] "r"(cospi_20_64), [cospi_8_64] "r"(cospi_8_64), [cospi_24_64] "r"(cospi_24_64), [input] "r"(input), [stride] "r"(stride)); input += 8; } } void vpx_idct8x8_64_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // First transform rows idct8_rows_dspr2(input, outptr, 8); // Then transform columns and add to dest idct8_columns_add_blk_dspr2(&out[0], dest, stride); } void vpx_idct8x8_12_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { DECLARE_ALIGNED(32, int16_t, out[8 * 8]); int16_t *outptr = out; uint32_t pos = 45; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); // First transform rows idct8_rows_dspr2(input, outptr, 4); outptr += 4; __asm__ __volatile__( "sw $zero, 0(%[outptr]) \n\t" "sw $zero, 4(%[outptr]) \n\t" "sw $zero, 16(%[outptr]) \n\t" "sw $zero, 20(%[outptr]) \n\t" "sw $zero, 32(%[outptr]) \n\t" "sw $zero, 36(%[outptr]) \n\t" "sw $zero, 48(%[outptr]) \n\t" "sw $zero, 52(%[outptr]) \n\t" "sw $zero, 64(%[outptr]) \n\t" "sw $zero, 68(%[outptr]) \n\t" "sw $zero, 80(%[outptr]) \n\t" "sw $zero, 84(%[outptr]) \n\t" "sw $zero, 96(%[outptr]) \n\t" "sw $zero, 100(%[outptr]) \n\t" "sw $zero, 112(%[outptr]) \n\t" "sw $zero, 116(%[outptr]) \n\t" : : [outptr] "r"(outptr)); // Then transform columns and add to dest idct8_columns_add_blk_dspr2(&out[0], dest, stride); } void vpx_idct8x8_1_add_dspr2(const int16_t *input, uint8_t *dest, int stride) { uint32_t pos = 45; int32_t out; int32_t r; int32_t a1, absa1; int32_t t1, t2, vector_a1, vector_1, vector_2; /* bit positon for extract from acc */ __asm__ __volatile__("wrdsp %[pos], 1 \n\t" : : [pos] "r"(pos)); out = DCT_CONST_ROUND_SHIFT_TWICE_COSPI_16_64(input[0]); __asm__ __volatile__( "addi %[out], %[out], 16 \n\t" "sra %[a1], %[out], 5 \n\t" : [out] "+r"(out), [a1] "=r"(a1) :); if (a1 < 0) { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__( "abs %[absa1], %[a1] \n\t" "replv.qb %[vector_a1], %[absa1] \n\t" : [absa1] "=r"(absa1), [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 8; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "subu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "subu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [dest] "+&r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } else if (a1 > 255) { int32_t a11, a12, vector_a11, vector_a12; /* use quad-byte * input and output memory are four byte aligned */ a11 = a1 >> 2; a12 = a1 - (a11 * 3); __asm__ __volatile__( "replv.qb %[vector_a11], %[a11] \n\t" "replv.qb %[vector_a12], %[a12] \n\t" : [vector_a11] "=&r"(vector_a11), [vector_a12] "=&r"(vector_a12) : [a11] "r"(a11), [a12] "r"(a12)); for (r = 8; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a11] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a11] \n\t" "addu_s.qb %[vector_1], %[vector_1], %[vector_a12] \n\t" "addu_s.qb %[vector_2], %[vector_2], %[vector_a12] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [dest] "+r"(dest) : [stride] "r"(stride), [vector_a11] "r"(vector_a11), [vector_a12] "r"(vector_a12)); } } else { /* use quad-byte * input and output memory are four byte aligned */ __asm__ __volatile__("replv.qb %[vector_a1], %[a1] \n\t" : [vector_a1] "=r"(vector_a1) : [a1] "r"(a1)); for (r = 8; r--;) { __asm__ __volatile__( "lw %[t1], 0(%[dest]) \n\t" "lw %[t2], 4(%[dest]) \n\t" "addu_s.qb %[vector_1], %[t1], %[vector_a1] \n\t" "addu_s.qb %[vector_2], %[t2], %[vector_a1] \n\t" "sw %[vector_1], 0(%[dest]) \n\t" "sw %[vector_2], 4(%[dest]) \n\t" "add %[dest], %[dest], %[stride] \n\t" : [t1] "=&r"(t1), [t2] "=&r"(t2), [vector_1] "=&r"(vector_1), [vector_2] "=&r"(vector_2), [dest] "+r"(dest) : [stride] "r"(stride), [vector_a1] "r"(vector_a1)); } } } void iadst8_dspr2(const int16_t *input, int16_t *output) { int s0, s1, s2, s3, s4, s5, s6, s7; int x0, x1, x2, x3, x4, x5, x6, x7; x0 = input[7]; x1 = input[0]; x2 = input[5]; x3 = input[2]; x4 = input[3]; x5 = input[4]; x6 = input[1]; x7 = input[6]; if (!(x0 | x1 | x2 | x3 | x4 | x5 | x6 | x7)) { output[0] = output[1] = output[2] = output[3] = output[4] = output[5] = output[6] = output[7] = 0; return; } // stage 1 s0 = cospi_2_64 * x0 + cospi_30_64 * x1; s1 = cospi_30_64 * x0 - cospi_2_64 * x1; s2 = cospi_10_64 * x2 + cospi_22_64 * x3; s3 = cospi_22_64 * x2 - cospi_10_64 * x3; s4 = cospi_18_64 * x4 + cospi_14_64 * x5; s5 = cospi_14_64 * x4 - cospi_18_64 * x5; s6 = cospi_26_64 * x6 + cospi_6_64 * x7; s7 = cospi_6_64 * x6 - cospi_26_64 * x7; x0 = ROUND_POWER_OF_TWO((s0 + s4), DCT_CONST_BITS); x1 = ROUND_POWER_OF_TWO((s1 + s5), DCT_CONST_BITS); x2 = ROUND_POWER_OF_TWO((s2 + s6), DCT_CONST_BITS); x3 = ROUND_POWER_OF_TWO((s3 + s7), DCT_CONST_BITS); x4 = ROUND_POWER_OF_TWO((s0 - s4), DCT_CONST_BITS); x5 = ROUND_POWER_OF_TWO((s1 - s5), DCT_CONST_BITS); x6 = ROUND_POWER_OF_TWO((s2 - s6), DCT_CONST_BITS); x7 = ROUND_POWER_OF_TWO((s3 - s7), DCT_CONST_BITS); // stage 2 s0 = x0; s1 = x1; s2 = x2; s3 = x3; s4 = cospi_8_64 * x4 + cospi_24_64 * x5; s5 = cospi_24_64 * x4 - cospi_8_64 * x5; s6 = -cospi_24_64 * x6 + cospi_8_64 * x7; s7 = cospi_8_64 * x6 + cospi_24_64 * x7; x0 = s0 + s2; x1 = s1 + s3; x2 = s0 - s2; x3 = s1 - s3; x4 = ROUND_POWER_OF_TWO((s4 + s6), DCT_CONST_BITS); x5 = ROUND_POWER_OF_TWO((s5 + s7), DCT_CONST_BITS); x6 = ROUND_POWER_OF_TWO((s4 - s6), DCT_CONST_BITS); x7 = ROUND_POWER_OF_TWO((s5 - s7), DCT_CONST_BITS); // stage 3 s2 = cospi_16_64 * (x2 + x3); s3 = cospi_16_64 * (x2 - x3); s6 = cospi_16_64 * (x6 + x7); s7 = cospi_16_64 * (x6 - x7); x2 = ROUND_POWER_OF_TWO((s2), DCT_CONST_BITS); x3 = ROUND_POWER_OF_TWO((s3), DCT_CONST_BITS); x6 = ROUND_POWER_OF_TWO((s6), DCT_CONST_BITS); x7 = ROUND_POWER_OF_TWO((s7), DCT_CONST_BITS); output[0] = x0; output[1] = -x4; output[2] = x6; output[3] = -x2; output[4] = x3; output[5] = -x7; output[6] = x5; output[7] = -x1; } #endif // HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/loopfilter_16_msa.c000066400000000000000000001444401357355204000211650ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/loopfilter_msa.h" #include "vpx_ports/mem.h" static int32_t hz_lpf_t4_and_t8_16w(uint8_t *src, int32_t pitch, uint8_t *filter48, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v16u8 flat, mask, hev, thresh, b_limit, limit; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l; v16u8 zero = { 0 }; /* load vector elements */ LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); if (__msa_test_bz_v(flat)) { ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch); return 1; } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l); ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l); VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l, p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r); PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r, q2_filt8_r); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat); ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16); filter48 += (4 * 16); ST_UB2(q1_out, q2_out, filter48, 16); filter48 += (2 * 16); ST_UB(flat, filter48); return 0; } } static void hz_lpf_t16_16w(uint8_t *src, int32_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in; v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in; v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in; v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in; v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l; v8i16 l_out, r_out; flat = LD_UB(filter48 + 96); LD_UB8((src - 8 * pitch), pitch, p7, p6, p5, p4, p3, p2, p1, p0); LD_UB8(src, pitch, q0, q1, q2, q3, q4, q5, q6, q7); VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2); if (__msa_test_bz_v(flat2)) { LD_UB4(filter48, 16, p2, p1, p0, q0); LD_UB2(filter48 + 4 * 16, 16, q1, q2); src -= 3 * pitch; ST_UB4(p2, p1, p0, q0, src, pitch); src += (4 * pitch); ST_UB2(q1, q2, src, pitch); } else { src -= 7 * pitch; ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in); q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0); tmp0_r = p7_r_in << 3; tmp0_r -= p7_r_in; tmp0_r += p6_r_in; tmp0_r += q0_r_in; tmp1_r = p6_r_in + p5_r_in; tmp1_r += p4_r_in; tmp1_r += p3_r_in; tmp1_r += p2_r_in; tmp1_r += p1_r_in; tmp1_r += p0_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in, p5_l_in, p4_l_in); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in, p1_l_in, p0_l_in); q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0); tmp0_l = p7_l_in << 3; tmp0_l -= p7_l_in; tmp0_l += p6_l_in; tmp0_l += q0_l_in; tmp1_l = p6_l_in + p5_l_in; tmp1_l += p4_l_in; tmp1_l += p3_l_in; tmp1_l += p2_l_in; tmp1_l += p1_l_in; tmp1_l += p0_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2); ST_UB(p6, src); src += pitch; /* p5 */ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1); tmp0_r = p5_r_in - p6_r_in; tmp0_r += q1_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1); tmp0_l = p5_l_in - p6_l_in; tmp0_l += q1_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2); ST_UB(p5, src); src += pitch; /* p4 */ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2); tmp0_r = p4_r_in - p5_r_in; tmp0_r += q2_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = (v8i16)__msa_srari_h((v8i16)tmp1_r, 4); q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2); tmp0_l = p4_l_in - p5_l_in; tmp0_l += q2_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2); ST_UB(p4, src); src += pitch; /* p3 */ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3); tmp0_r = p3_r_in - p4_r_in; tmp0_r += q3_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3); tmp0_l = p3_l_in - p4_l_in; tmp0_l += q3_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2); ST_UB(p3, src); src += pitch; /* p2 */ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4); filter8 = LD_UB(filter48); tmp0_r = p2_r_in - p3_r_in; tmp0_r += q4_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4); tmp0_l = p2_l_in - p3_l_in; tmp0_l += q4_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* p1 */ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5); filter8 = LD_UB(filter48 + 16); tmp0_r = p1_r_in - p2_r_in; tmp0_r += q5_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5); tmp0_l = p1_l_in - p2_l_in; tmp0_l += q5_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* p0 */ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6); filter8 = LD_UB(filter48 + 32); tmp0_r = p0_r_in - p1_r_in; tmp0_r += q6_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6); tmp0_l = p0_l_in - p1_l_in; tmp0_l += q6_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* q0 */ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7); filter8 = LD_UB(filter48 + 48); tmp0_r = q7_r_in - p0_r_in; tmp0_r += q0_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7); tmp0_l = q7_l_in - p0_l_in; tmp0_l += q0_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* q1 */ filter8 = LD_UB(filter48 + 64); tmp0_r = q7_r_in - q0_r_in; tmp0_r += q1_r_in; tmp0_r -= p6_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q0_l_in; tmp0_l += q1_l_in; tmp0_l -= p6_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* q2 */ filter8 = LD_UB(filter48 + 80); tmp0_r = q7_r_in - q1_r_in; tmp0_r += q2_r_in; tmp0_r -= p5_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q1_l_in; tmp0_l += q2_l_in; tmp0_l -= p5_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += pitch; /* q3 */ tmp0_r = q7_r_in - q2_r_in; tmp0_r += q3_r_in; tmp0_r -= p4_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q2_l_in; tmp0_l += q3_l_in; tmp0_l -= p4_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2); ST_UB(q3, src); src += pitch; /* q4 */ tmp0_r = q7_r_in - q3_r_in; tmp0_r += q4_r_in; tmp0_r -= p3_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q3_l_in; tmp0_l += q4_l_in; tmp0_l -= p3_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2); ST_UB(q4, src); src += pitch; /* q5 */ tmp0_r = q7_r_in - q4_r_in; tmp0_r += q5_r_in; tmp0_r -= p2_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q4_l_in; tmp0_l += q5_l_in; tmp0_l -= p2_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2); ST_UB(q5, src); src += pitch; /* q6 */ tmp0_r = q7_r_in - q5_r_in; tmp0_r += q6_r_in; tmp0_r -= p1_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q5_l_in; tmp0_l += q6_l_in; tmp0_l -= p1_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2); ST_UB(q6, src); } } static void mb_lpf_horizontal_edge_dual(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, int32_t count) { DECLARE_ALIGNED(32, uint8_t, filter48[16 * 8]); uint8_t early_exit = 0; (void)count; early_exit = hz_lpf_t4_and_t8_16w(src, pitch, &filter48[0], b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { hz_lpf_t16_16w(src, pitch, filter48); } } static void mb_lpf_horizontal_edge(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr, int32_t count) { if (1 == count) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; uint64_t dword0, dword1; v16u8 flat2, mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p7, p6, p5, p4, q4, q5, q6, q7; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v16u8 p0_filter16, p1_filter16; v8i16 p2_filter8, p1_filter8, p0_filter8; v8i16 q0_filter8, q1_filter8, q2_filter8; v8u16 p7_r, p6_r, p5_r, p4_r, q7_r, q6_r, q5_r, q4_r; v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r; v16i8 zero = { 0 }; v8u16 tmp0, tmp1, tmp2; /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat); if (__msa_test_bz_v(flat)) { p1_d = __msa_copy_u_d((v2i64)p1_out, 0); p0_d = __msa_copy_u_d((v2i64)p0_out, 0); q0_d = __msa_copy_u_d((v2i64)q0_out, 0); q1_d = __msa_copy_u_d((v2i64)q1_out, 0); SD4(p1_d, p0_d, q0_d, q1_d, src - 2 * pitch, pitch); } else { /* convert 8 bit input data into 16 bit */ ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8); PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat); /* load 16 vector elements */ LD_UB4((src - 8 * pitch), pitch, p7, p6, p5, p4); LD_UB4(src + (4 * pitch), pitch, q4, q5, q6, q7); VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2); if (__msa_test_bz_v(flat2)) { p2_d = __msa_copy_u_d((v2i64)p2_out, 0); p1_d = __msa_copy_u_d((v2i64)p1_out, 0); p0_d = __msa_copy_u_d((v2i64)p0_out, 0); q0_d = __msa_copy_u_d((v2i64)q0_out, 0); q1_d = __msa_copy_u_d((v2i64)q1_out, 0); q2_d = __msa_copy_u_d((v2i64)q2_out, 0); SD4(p2_d, p1_d, p0_d, q0_d, src - 3 * pitch, pitch); SD(q1_d, src + pitch); SD(q2_d, src + 2 * pitch); } else { /* LSB(right) 8 pixel operation */ ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, q4, zero, q5, zero, q6, zero, q7, p7_r, p6_r, p5_r, p4_r, q4_r, q5_r, q6_r, q7_r); tmp0 = p7_r << 3; tmp0 -= p7_r; tmp0 += p6_r; tmp0 += q0_r; src -= 7 * pitch; /* calculation of p6 and p5 */ tmp1 = p6_r + p5_r + p4_r + p3_r; tmp1 += (p2_r + p1_r + p0_r); tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp0 = p5_r - p6_r + q1_r - p7_r; tmp1 += tmp0; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(p6, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(p5, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of p4 and p3 */ tmp0 = p4_r - p5_r + q2_r - p7_r; tmp2 = p3_r - p4_r + q3_r - p7_r; tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(p4, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(p3, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of p2 and p1 */ tmp0 = p2_r - p3_r + q4_r - p7_r; tmp2 = p1_r - p2_r + q5_r - p7_r; tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(p2_out, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(p1_out, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of p0 and q0 */ tmp0 = (p0_r - p1_r) + (q6_r - p7_r); tmp2 = (q7_r - p0_r) + (q0_r - p7_r); tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(p0_out, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(q0_out, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of q1 and q2 */ tmp0 = q7_r - q0_r + q1_r - p6_r; tmp2 = q7_r - q1_r + q2_r - p5_r; tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(q1_out, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(q2_out, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of q3 and q4 */ tmp0 = (q7_r - q2_r) + (q3_r - p4_r); tmp2 = (q7_r - q3_r) + (q4_r - p3_r); tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(q3, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(q4, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); src += pitch; /* calculation of q5 and q6 */ tmp0 = (q7_r - q4_r) + (q5_r - p2_r); tmp2 = (q7_r - q5_r) + (q6_r - p1_r); tmp1 += tmp0; p0_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); tmp1 += tmp2; p1_filter16 = (v16u8)__msa_srari_h((v8i16)tmp1, 4); PCKEV_B2_UB(zero, p0_filter16, zero, p1_filter16, p0_filter16, p1_filter16); p0_filter16 = __msa_bmnz_v(q5, p0_filter16, flat2); p1_filter16 = __msa_bmnz_v(q6, p1_filter16, flat2); dword0 = __msa_copy_u_d((v2i64)p0_filter16, 0); dword1 = __msa_copy_u_d((v2i64)p1_filter16, 0); SD(dword0, src); src += pitch; SD(dword1, src); } } } else { mb_lpf_horizontal_edge_dual(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, count); } } void vpx_lpf_horizontal_16_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 1); } void vpx_lpf_horizontal_16_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { mb_lpf_horizontal_edge(src, pitch, b_limit_ptr, limit_ptr, thresh_ptr, 2); } static void transpose_16x8_to_8x16(uint8_t *input, int32_t in_pitch, uint8_t *output, int32_t out_pitch) { v16u8 p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org; v16i8 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; LD_UB8(input, in_pitch, p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org); /* 8x8 transpose */ TRANSPOSE8x8_UB_UB(p7_org, p6_org, p5_org, p4_org, p3_org, p2_org, p1_org, p0_org, p7, p6, p5, p4, p3, p2, p1, p0); /* 8x8 transpose */ ILVL_B4_SB(p5_org, p7_org, p4_org, p6_org, p1_org, p3_org, p0_org, p2_org, tmp0, tmp1, tmp2, tmp3); ILVR_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp4, tmp6); ILVL_B2_SB(tmp1, tmp0, tmp3, tmp2, tmp5, tmp7); ILVR_W2_UB(tmp6, tmp4, tmp7, tmp5, q0, q4); ILVL_W2_UB(tmp6, tmp4, tmp7, tmp5, q2, q6); SLDI_B4_0_UB(q0, q2, q4, q6, q1, q3, q5, q7, 8); ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch); output += (8 * out_pitch); ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch); } static void transpose_8x16_to_16x8(uint8_t *input, int32_t in_pitch, uint8_t *output, int32_t out_pitch) { v16u8 p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; LD_UB8(input, in_pitch, p7, p6, p5, p4, p3, p2, p1, p0); LD_UB8(input + (8 * in_pitch), in_pitch, q0, q1, q2, q3, q4, q5, q6, q7); TRANSPOSE16x8_UB_UB(p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7, p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o); ST_UB8(p7_o, p6_o, p5_o, p4_o, p3_o, p2_o, p1_o, p0_o, output, out_pitch); } static void transpose_16x16(uint8_t *input, int32_t in_pitch, uint8_t *output, int32_t out_pitch) { v16u8 row0, row1, row2, row3, row4, row5, row6, row7; v16u8 row8, row9, row10, row11, row12, row13, row14, row15; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; v8i16 tmp0, tmp1, tmp4, tmp5, tmp6, tmp7; v4i32 tmp2, tmp3; LD_UB8(input, in_pitch, row0, row1, row2, row3, row4, row5, row6, row7); input += (8 * in_pitch); LD_UB8(input, in_pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p7, p6, p5, p4, p3, p2, p1, p0); /* transpose 16x8 matrix into 8x16 */ /* total 8 intermediate register and 32 instructions */ q7 = (v16u8)__msa_ilvod_d((v2i64)row8, (v2i64)row0); q6 = (v16u8)__msa_ilvod_d((v2i64)row9, (v2i64)row1); q5 = (v16u8)__msa_ilvod_d((v2i64)row10, (v2i64)row2); q4 = (v16u8)__msa_ilvod_d((v2i64)row11, (v2i64)row3); q3 = (v16u8)__msa_ilvod_d((v2i64)row12, (v2i64)row4); q2 = (v16u8)__msa_ilvod_d((v2i64)row13, (v2i64)row5); q1 = (v16u8)__msa_ilvod_d((v2i64)row14, (v2i64)row6); q0 = (v16u8)__msa_ilvod_d((v2i64)row15, (v2i64)row7); ILVEV_B2_SH(q7, q6, q5, q4, tmp0, tmp1); tmp4 = (v8i16)__msa_ilvod_b((v16i8)q6, (v16i8)q7); tmp5 = (v8i16)__msa_ilvod_b((v16i8)q4, (v16i8)q5); ILVEV_B2_UB(q3, q2, q1, q0, q5, q7); tmp6 = (v8i16)__msa_ilvod_b((v16i8)q2, (v16i8)q3); tmp7 = (v8i16)__msa_ilvod_b((v16i8)q0, (v16i8)q1); ILVEV_H2_SW(tmp0, tmp1, q5, q7, tmp2, tmp3); q0 = (v16u8)__msa_ilvev_w(tmp3, tmp2); q4 = (v16u8)__msa_ilvod_w(tmp3, tmp2); tmp2 = (v4i32)__msa_ilvod_h(tmp1, tmp0); tmp3 = (v4i32)__msa_ilvod_h((v8i16)q7, (v8i16)q5); q2 = (v16u8)__msa_ilvev_w(tmp3, tmp2); q6 = (v16u8)__msa_ilvod_w(tmp3, tmp2); ILVEV_H2_SW(tmp4, tmp5, tmp6, tmp7, tmp2, tmp3); q1 = (v16u8)__msa_ilvev_w(tmp3, tmp2); q5 = (v16u8)__msa_ilvod_w(tmp3, tmp2); tmp2 = (v4i32)__msa_ilvod_h(tmp5, tmp4); tmp3 = (v4i32)__msa_ilvod_h(tmp7, tmp6); q3 = (v16u8)__msa_ilvev_w(tmp3, tmp2); q7 = (v16u8)__msa_ilvod_w(tmp3, tmp2); ST_UB8(p7, p6, p5, p4, p3, p2, p1, p0, output, out_pitch); output += (8 * out_pitch); ST_UB8(q0, q1, q2, q3, q4, q5, q6, q7, output, out_pitch); } static int32_t vt_lpf_t4_and_t8_8w(uint8_t *src, uint8_t *filter48, uint8_t *src_org, int32_t pitch_org, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v16u8 flat, mask, hev, thresh, b_limit, limit; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v16i8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3; /* load vector elements */ LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); /* flat4 */ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); /* filter4 */ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat); if (__msa_test_bz_v(flat)) { ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); ST4x8_UB(vec2, vec3, (src_org - 2), pitch_org); return 1; } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); /* convert 16 bit output data into 8 bit */ p2_r = (v8u16)__msa_pckev_b((v16i8)p2_filt8_r, (v16i8)p2_filt8_r); p1_r = (v8u16)__msa_pckev_b((v16i8)p1_filt8_r, (v16i8)p1_filt8_r); p0_r = (v8u16)__msa_pckev_b((v16i8)p0_filt8_r, (v16i8)p0_filt8_r); q0_r = (v8u16)__msa_pckev_b((v16i8)q0_filt8_r, (v16i8)q0_filt8_r); q1_r = (v8u16)__msa_pckev_b((v16i8)q1_filt8_r, (v16i8)q1_filt8_r); q2_r = (v8u16)__msa_pckev_b((v16i8)q2_filt8_r, (v16i8)q2_filt8_r); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_r, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_r, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_r, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_r, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_r, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_r, flat); ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16); filter48 += (4 * 16); ST_UB2(q1_out, q2_out, filter48, 16); filter48 += (2 * 16); ST_UB(flat, filter48); return 0; } } static int32_t vt_lpf_t16_8w(uint8_t *src, uint8_t *src_org, int32_t pitch, uint8_t *filter48) { v16i8 zero = { 0 }; v16u8 filter8, flat, flat2; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in; v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in; v8u16 tmp0_r, tmp1_r; v8i16 r_out; flat = LD_UB(filter48 + 6 * 16); LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0); LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7); VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2); if (__msa_test_bz_v(flat2)) { v8i16 vec0, vec1, vec2, vec3, vec4; LD_UB4(filter48, 16, p2, p1, p0, q0); LD_UB2(filter48 + 4 * 16, 16, q1, q2); ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec3, vec4); vec2 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1); src_org -= 3; ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec2, 0, (src_org + 4), pitch); src_org += (4 * pitch); ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec2, 4, (src_org + 4), pitch); return 1; } else { src -= 7 * 16; ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in); q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0); tmp0_r = p7_r_in << 3; tmp0_r -= p7_r_in; tmp0_r += p6_r_in; tmp0_r += q0_r_in; tmp1_r = p6_r_in + p5_r_in; tmp1_r += p4_r_in; tmp1_r += p3_r_in; tmp1_r += p2_r_in; tmp1_r += p1_r_in; tmp1_r += p0_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2); ST8x1_UB(p6, src); src += 16; /* p5 */ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1); tmp0_r = p5_r_in - p6_r_in; tmp0_r += q1_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2); ST8x1_UB(p5, src); src += 16; /* p4 */ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2); tmp0_r = p4_r_in - p5_r_in; tmp0_r += q2_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2); ST8x1_UB(p4, src); src += 16; /* p3 */ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3); tmp0_r = p3_r_in - p4_r_in; tmp0_r += q3_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2); ST8x1_UB(p3, src); src += 16; /* p2 */ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4); filter8 = LD_UB(filter48); tmp0_r = p2_r_in - p3_r_in; tmp0_r += q4_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* p1 */ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5); filter8 = LD_UB(filter48 + 16); tmp0_r = p1_r_in - p2_r_in; tmp0_r += q5_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* p0 */ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6); filter8 = LD_UB(filter48 + 32); tmp0_r = p0_r_in - p1_r_in; tmp0_r += q6_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* q0 */ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7); filter8 = LD_UB(filter48 + 48); tmp0_r = q7_r_in - p0_r_in; tmp0_r += q0_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* q1 */ filter8 = LD_UB(filter48 + 64); tmp0_r = q7_r_in - q0_r_in; tmp0_r += q1_r_in; tmp0_r -= p6_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* q2 */ filter8 = LD_UB(filter48 + 80); tmp0_r = q7_r_in - q1_r_in; tmp0_r += q2_r_in; tmp0_r -= p5_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST8x1_UB(filter8, src); src += 16; /* q3 */ tmp0_r = q7_r_in - q2_r_in; tmp0_r += q3_r_in; tmp0_r -= p4_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2); ST8x1_UB(q3, src); src += 16; /* q4 */ tmp0_r = q7_r_in - q3_r_in; tmp0_r += q4_r_in; tmp0_r -= p3_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2); ST8x1_UB(q4, src); src += 16; /* q5 */ tmp0_r = q7_r_in - q4_r_in; tmp0_r += q5_r_in; tmp0_r -= p2_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2); ST8x1_UB(q5, src); src += 16; /* q6 */ tmp0_r = q7_r_in - q5_r_in; tmp0_r += q6_r_in; tmp0_r -= p1_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); r_out = (v8i16)__msa_pckev_b((v16i8)r_out, (v16i8)r_out); q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2); ST8x1_UB(q6, src); return 0; } } void vpx_lpf_vertical_16_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { uint8_t early_exit = 0; DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]); uint8_t *filter48 = &transposed_input[16 * 16]; transpose_16x8_to_8x16(src - 8, pitch, transposed_input, 16); early_exit = vt_lpf_t4_and_t8_8w((transposed_input + 16 * 8), &filter48[0], src, pitch, b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { early_exit = vt_lpf_t16_8w((transposed_input + 16 * 8), src, pitch, &filter48[0]); if (0 == early_exit) { transpose_8x16_to_16x8(transposed_input, 16, src - 8, pitch); } } } static int32_t vt_lpf_t4_and_t8_16w(uint8_t *src, uint8_t *filter48, uint8_t *src_org, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v16u8 flat, mask, hev, thresh, b_limit, limit; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l; v16i8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3, vec4, vec5; /* load vector elements */ LD_UB8(src - (4 * 16), 16, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); /* flat4 */ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); /* filter4 */ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); if (__msa_test_bz_v(flat)) { ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec4, vec5); src_org -= 2; ST4x8_UB(vec2, vec3, src_org, pitch); src_org += 8 * pitch; ST4x8_UB(vec4, vec5, src_org, pitch); return 1; } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l); ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l); VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l, p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r); PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r, q2_filt8_r); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat); ST_UB4(p2_out, p1_out, p0_out, q0_out, filter48, 16); filter48 += (4 * 16); ST_UB2(q1_out, q2_out, filter48, 16); filter48 += (2 * 16); ST_UB(flat, filter48); return 0; } } static int32_t vt_lpf_t16_16w(uint8_t *src, uint8_t *src_org, int32_t pitch, uint8_t *filter48) { v16u8 flat, flat2, filter8; v16i8 zero = { 0 }; v16u8 p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; v8u16 p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in; v8u16 q7_r_in, q6_r_in, q5_r_in, q4_r_in, q3_r_in, q2_r_in, q1_r_in, q0_r_in; v8u16 p7_l_in, p6_l_in, p5_l_in, p4_l_in, p3_l_in, p2_l_in, p1_l_in, p0_l_in; v8u16 q7_l_in, q6_l_in, q5_l_in, q4_l_in, q3_l_in, q2_l_in, q1_l_in, q0_l_in; v8u16 tmp0_r, tmp1_r, tmp0_l, tmp1_l; v8i16 l_out, r_out; flat = LD_UB(filter48 + 6 * 16); LD_UB8((src - 8 * 16), 16, p7, p6, p5, p4, p3, p2, p1, p0); LD_UB8(src, 16, q0, q1, q2, q3, q4, q5, q6, q7); VP9_FLAT5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, flat, flat2); if (__msa_test_bz_v(flat2)) { v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; LD_UB4(filter48, 16, p2, p1, p0, q0); LD_UB2(filter48 + 4 * 16, 16, q1, q2); ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec3, vec4); ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec6, vec7); ILVRL_B2_SH(q2, q1, vec2, vec5); src_org -= 3; ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec2, 0, (src_org + 4), pitch); src_org += (4 * pitch); ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec2, 4, (src_org + 4), pitch); src_org += (4 * pitch); ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec5, 0, (src_org + 4), pitch); src_org += (4 * pitch); ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src_org, pitch); ST2x4_UB(vec5, 4, (src_org + 4), pitch); return 1; } else { src -= 7 * 16; ILVR_B8_UH(zero, p7, zero, p6, zero, p5, zero, p4, zero, p3, zero, p2, zero, p1, zero, p0, p7_r_in, p6_r_in, p5_r_in, p4_r_in, p3_r_in, p2_r_in, p1_r_in, p0_r_in); q0_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q0); tmp0_r = p7_r_in << 3; tmp0_r -= p7_r_in; tmp0_r += p6_r_in; tmp0_r += q0_r_in; tmp1_r = p6_r_in + p5_r_in; tmp1_r += p4_r_in; tmp1_r += p3_r_in; tmp1_r += p2_r_in; tmp1_r += p1_r_in; tmp1_r += p0_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); ILVL_B4_UH(zero, p7, zero, p6, zero, p5, zero, p4, p7_l_in, p6_l_in, p5_l_in, p4_l_in); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l_in, p2_l_in, p1_l_in, p0_l_in); q0_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q0); tmp0_l = p7_l_in << 3; tmp0_l -= p7_l_in; tmp0_l += p6_l_in; tmp0_l += q0_l_in; tmp1_l = p6_l_in + p5_l_in; tmp1_l += p4_l_in; tmp1_l += p3_l_in; tmp1_l += p2_l_in; tmp1_l += p1_l_in; tmp1_l += p0_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p6 = __msa_bmnz_v(p6, (v16u8)r_out, flat2); ST_UB(p6, src); src += 16; /* p5 */ q1_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q1); tmp0_r = p5_r_in - p6_r_in; tmp0_r += q1_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q1_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q1); tmp0_l = p5_l_in - p6_l_in; tmp0_l += q1_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p5 = __msa_bmnz_v(p5, (v16u8)r_out, flat2); ST_UB(p5, src); src += 16; /* p4 */ q2_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q2); tmp0_r = p4_r_in - p5_r_in; tmp0_r += q2_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q2_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q2); tmp0_l = p4_l_in - p5_l_in; tmp0_l += q2_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p4 = __msa_bmnz_v(p4, (v16u8)r_out, flat2); ST_UB(p4, src); src += 16; /* p3 */ q3_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q3); tmp0_r = p3_r_in - p4_r_in; tmp0_r += q3_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q3_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q3); tmp0_l = p3_l_in - p4_l_in; tmp0_l += q3_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); p3 = __msa_bmnz_v(p3, (v16u8)r_out, flat2); ST_UB(p3, src); src += 16; /* p2 */ q4_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q4); filter8 = LD_UB(filter48); tmp0_r = p2_r_in - p3_r_in; tmp0_r += q4_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q4_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q4); tmp0_l = p2_l_in - p3_l_in; tmp0_l += q4_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* p1 */ q5_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q5); filter8 = LD_UB(filter48 + 16); tmp0_r = p1_r_in - p2_r_in; tmp0_r += q5_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q5_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q5); tmp0_l = p1_l_in - p2_l_in; tmp0_l += q5_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)(tmp1_l), 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* p0 */ q6_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q6); filter8 = LD_UB(filter48 + 32); tmp0_r = p0_r_in - p1_r_in; tmp0_r += q6_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q6_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q6); tmp0_l = p0_l_in - p1_l_in; tmp0_l += q6_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* q0 */ q7_r_in = (v8u16)__msa_ilvr_b(zero, (v16i8)q7); filter8 = LD_UB(filter48 + 48); tmp0_r = q7_r_in - p0_r_in; tmp0_r += q0_r_in; tmp0_r -= p7_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); q7_l_in = (v8u16)__msa_ilvl_b(zero, (v16i8)q7); tmp0_l = q7_l_in - p0_l_in; tmp0_l += q0_l_in; tmp0_l -= p7_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* q1 */ filter8 = LD_UB(filter48 + 64); tmp0_r = q7_r_in - q0_r_in; tmp0_r += q1_r_in; tmp0_r -= p6_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q0_l_in; tmp0_l += q1_l_in; tmp0_l -= p6_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* q2 */ filter8 = LD_UB(filter48 + 80); tmp0_r = q7_r_in - q1_r_in; tmp0_r += q2_r_in; tmp0_r -= p5_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q1_l_in; tmp0_l += q2_l_in; tmp0_l -= p5_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); filter8 = __msa_bmnz_v(filter8, (v16u8)r_out, flat2); ST_UB(filter8, src); src += 16; /* q3 */ tmp0_r = q7_r_in - q2_r_in; tmp0_r += q3_r_in; tmp0_r -= p4_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q2_l_in; tmp0_l += q3_l_in; tmp0_l -= p4_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q3 = __msa_bmnz_v(q3, (v16u8)r_out, flat2); ST_UB(q3, src); src += 16; /* q4 */ tmp0_r = q7_r_in - q3_r_in; tmp0_r += q4_r_in; tmp0_r -= p3_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q3_l_in; tmp0_l += q4_l_in; tmp0_l -= p3_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q4 = __msa_bmnz_v(q4, (v16u8)r_out, flat2); ST_UB(q4, src); src += 16; /* q5 */ tmp0_r = q7_r_in - q4_r_in; tmp0_r += q5_r_in; tmp0_r -= p2_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q4_l_in; tmp0_l += q5_l_in; tmp0_l -= p2_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q5 = __msa_bmnz_v(q5, (v16u8)r_out, flat2); ST_UB(q5, src); src += 16; /* q6 */ tmp0_r = q7_r_in - q5_r_in; tmp0_r += q6_r_in; tmp0_r -= p1_r_in; tmp1_r += tmp0_r; r_out = __msa_srari_h((v8i16)tmp1_r, 4); tmp0_l = q7_l_in - q5_l_in; tmp0_l += q6_l_in; tmp0_l -= p1_l_in; tmp1_l += tmp0_l; l_out = __msa_srari_h((v8i16)tmp1_l, 4); r_out = (v8i16)__msa_pckev_b((v16i8)l_out, (v16i8)r_out); q6 = __msa_bmnz_v(q6, (v16u8)r_out, flat2); ST_UB(q6, src); return 0; } } void vpx_lpf_vertical_16_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { uint8_t early_exit = 0; DECLARE_ALIGNED(32, uint8_t, transposed_input[16 * 24]); uint8_t *filter48 = &transposed_input[16 * 16]; transpose_16x16((src - 8), pitch, &transposed_input[0], 16); early_exit = vt_lpf_t4_and_t8_16w((transposed_input + 16 * 8), &filter48[0], src, pitch, b_limit_ptr, limit_ptr, thresh_ptr); if (0 == early_exit) { early_exit = vt_lpf_t16_16w((transposed_input + 16 * 8), src, pitch, &filter48[0]); if (0 == early_exit) { transpose_16x16(transposed_input, 16, (src - 8), pitch); } } } libvpx-1.8.2/vpx_dsp/mips/loopfilter_4_msa.c000066400000000000000000000134751357355204000211050ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/loopfilter_msa.h" void vpx_lpf_horizontal_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { uint64_t p1_d, p0_d, q0_d, q1_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0, p1_out, p0_out, q0_out, q1_out; /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); p1_d = __msa_copy_u_d((v2i64)p1_out, 0); p0_d = __msa_copy_u_d((v2i64)p0_out, 0); q0_d = __msa_copy_u_d((v2i64)q0_out, 0); q1_d = __msa_copy_u_d((v2i64)q1_out, 0); SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch); } void vpx_lpf_horizontal_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, const uint8_t *b_limit1_ptr, const uint8_t *limit1_ptr, const uint8_t *thresh1_ptr) { v16u8 mask, hev, flat, thresh0, b_limit0, limit0, thresh1, b_limit1, limit1; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr); thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr); thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0); b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr); b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr); b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0); limit0 = (v16u8)__msa_fill_b(*limit0_ptr); limit1 = (v16u8)__msa_fill_b(*limit1_ptr); limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev, mask, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1); ST_UB4(p1, p0, q0, q1, (src - 2 * pitch), pitch); } void vpx_lpf_vertical_4_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { v16u8 mask, hev, flat, limit, thresh, b_limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v8i16 vec0, vec1, vec2, vec3; LD_UB8((src - 4), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2, q3); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1); ILVR_B2_SH(p0, p1, q1, q0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); src -= 2; ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch); src += 4 * pitch; ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch); } void vpx_lpf_vertical_4_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0_ptr, const uint8_t *limit0_ptr, const uint8_t *thresh0_ptr, const uint8_t *b_limit1_ptr, const uint8_t *limit1_ptr, const uint8_t *thresh1_ptr) { v16u8 mask, hev, flat; v16u8 thresh0, b_limit0, limit0, thresh1, b_limit1, limit1; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 row0, row1, row2, row3, row4, row5, row6, row7; v16u8 row8, row9, row10, row11, row12, row13, row14, row15; v8i16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5; LD_UB8(src - 4, pitch, row0, row1, row2, row3, row4, row5, row6, row7); LD_UB8(src - 4 + (8 * pitch), pitch, row8, row9, row10, row11, row12, row13, row14, row15); TRANSPOSE16x8_UB_UB(row0, row1, row2, row3, row4, row5, row6, row7, row8, row9, row10, row11, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); thresh0 = (v16u8)__msa_fill_b(*thresh0_ptr); thresh1 = (v16u8)__msa_fill_b(*thresh1_ptr); thresh0 = (v16u8)__msa_ilvr_d((v2i64)thresh1, (v2i64)thresh0); b_limit0 = (v16u8)__msa_fill_b(*b_limit0_ptr); b_limit1 = (v16u8)__msa_fill_b(*b_limit1_ptr); b_limit0 = (v16u8)__msa_ilvr_d((v2i64)b_limit1, (v2i64)b_limit0); limit0 = (v16u8)__msa_fill_b(*limit0_ptr); limit1 = (v16u8)__msa_fill_b(*limit1_ptr); limit0 = (v16u8)__msa_ilvr_d((v2i64)limit1, (v2i64)limit0); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit0, b_limit0, thresh0, hev, mask, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1, p0, q0, q1); ILVR_B2_SH(p0, p1, q1, q0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp2, tmp3); ILVL_B2_SH(p0, p1, q1, q0, tmp0, tmp1); ILVRL_H2_SH(tmp1, tmp0, tmp4, tmp5); src -= 2; ST4x8_UB(tmp2, tmp3, src, pitch); src += (8 * pitch); ST4x8_UB(tmp4, tmp5, src, pitch); } libvpx-1.8.2/vpx_dsp/mips/loopfilter_8_msa.c000066400000000000000000000325551357355204000211110ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/loopfilter_msa.h" void vpx_lpf_horizontal_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { uint64_t p2_d, p1_d, p0_d, q0_d, q1_d, q2_d; v16u8 mask, hev, flat, thresh, b_limit, limit; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v8i16 p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8; v8u16 p3_r, p2_r, p1_r, p0_r, q3_r, q2_r, q1_r, q0_r; v16i8 zero = { 0 }; /* load vector elements */ LD_UB8((src - 4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat); if (__msa_test_bz_v(flat)) { p1_d = __msa_copy_u_d((v2i64)p1_out, 0); p0_d = __msa_copy_u_d((v2i64)p0_out, 0); q0_d = __msa_copy_u_d((v2i64)q0_out, 0); q1_d = __msa_copy_u_d((v2i64)q1_out, 0); SD4(p1_d, p0_d, q0_d, q1_d, (src - 2 * pitch), pitch); } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filter8, p1_filter8, p0_filter8, q0_filter8, q1_filter8, q2_filter8); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(zero, p2_filter8, zero, p1_filter8, zero, p0_filter8, zero, q0_filter8, p2_filter8, p1_filter8, p0_filter8, q0_filter8); PCKEV_B2_SH(zero, q1_filter8, zero, q2_filter8, q1_filter8, q2_filter8); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filter8, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filter8, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filter8, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filter8, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filter8, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_filter8, flat); p2_d = __msa_copy_u_d((v2i64)p2_out, 0); p1_d = __msa_copy_u_d((v2i64)p1_out, 0); p0_d = __msa_copy_u_d((v2i64)p0_out, 0); q0_d = __msa_copy_u_d((v2i64)q0_out, 0); q1_d = __msa_copy_u_d((v2i64)q1_out, 0); q2_d = __msa_copy_u_d((v2i64)q2_out, 0); src -= 3 * pitch; SD4(p2_d, p1_d, p0_d, q0_d, src, pitch); src += (4 * pitch); SD(q1_d, src); src += pitch; SD(q2_d, src); } } void vpx_lpf_horizontal_8_dual_msa( uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *b_limit1, const uint8_t *limit1, const uint8_t *thresh1) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p2_out, p1_out, p0_out, q0_out, q1_out, q2_out; v16u8 flat, mask, hev, tmp, thresh, b_limit, limit; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l; v16u8 zero = { 0 }; /* load vector elements */ LD_UB8(src - (4 * pitch), pitch, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh0); tmp = (v16u8)__msa_fill_b(*thresh1); thresh = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)thresh); b_limit = (v16u8)__msa_fill_b(*b_limit0); tmp = (v16u8)__msa_fill_b(*b_limit1); b_limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)b_limit); limit = (v16u8)__msa_fill_b(*limit0); tmp = (v16u8)__msa_fill_b(*limit1); limit = (v16u8)__msa_ilvr_d((v2i64)tmp, (v2i64)limit); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); if (__msa_test_bz_v(flat)) { ST_UB4(p1_out, p0_out, q0_out, q1_out, (src - 2 * pitch), pitch); } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l); ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l); VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l, p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r); PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r, q2_filt8_r); /* store pixel values */ p2_out = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat); p1_out = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat); p0_out = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat); q0_out = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat); q1_out = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat); q2_out = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat); src -= 3 * pitch; ST_UB4(p2_out, p1_out, p0_out, q0_out, src, pitch); src += (4 * pitch); ST_UB2(q1_out, q2_out, src, pitch); src += (2 * pitch); } } void vpx_lpf_vertical_8_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit_ptr, const uint8_t *limit_ptr, const uint8_t *thresh_ptr) { v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p1_out, p0_out, q0_out, q1_out; v16u8 flat, mask, hev, thresh, b_limit, limit; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v16u8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3, vec4; /* load vector elements */ LD_UB8(src - 4, pitch, p3, p2, p1, p0, q0, q1, q2, q3); TRANSPOSE8x8_UB_UB(p3, p2, p1, p0, q0, q1, q2, q3, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh_ptr); b_limit = (v16u8)__msa_fill_b(*b_limit_ptr); limit = (v16u8)__msa_fill_b(*limit_ptr); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); /* flat4 */ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); /* filter4 */ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); flat = (v16u8)__msa_ilvr_d((v2i64)zero, (v2i64)flat); if (__msa_test_bz_v(flat)) { /* Store 4 pixels p1-_q1 */ ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); src -= 2; ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch); src += 4 * pitch; ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch); } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(p2_filt8_r, p2_filt8_r, p1_filt8_r, p1_filt8_r, p0_filt8_r, p0_filt8_r, q0_filt8_r, q0_filt8_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r); PCKEV_B2_SH(q1_filt8_r, q1_filt8_r, q2_filt8_r, q2_filt8_r, q1_filt8_r, q2_filt8_r); /* store pixel values */ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat); p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat); p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat); q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat); q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat); q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat); /* Store 6 pixels p2-_q2 */ ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); vec4 = (v8i16)__msa_ilvr_b((v16i8)q2, (v16i8)q1); src -= 3; ST4x4_UB(vec2, vec2, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec4, 0, src + 4, pitch); src += (4 * pitch); ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec4, 4, src + 4, pitch); } } void vpx_lpf_vertical_8_dual_msa(uint8_t *src, int32_t pitch, const uint8_t *b_limit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *b_limit1, const uint8_t *limit1, const uint8_t *thresh1) { uint8_t *temp_src; v16u8 p3, p2, p1, p0, q3, q2, q1, q0; v16u8 p1_out, p0_out, q0_out, q1_out; v16u8 flat, mask, hev, thresh, b_limit, limit; v16u8 row4, row5, row6, row7, row12, row13, row14, row15; v8u16 p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r; v8u16 p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l; v8i16 p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r; v8i16 p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l; v16u8 zero = { 0 }; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; temp_src = src - 4; LD_UB8(temp_src, pitch, p0, p1, p2, p3, row4, row5, row6, row7); temp_src += (8 * pitch); LD_UB8(temp_src, pitch, q3, q2, q1, q0, row12, row13, row14, row15); /* transpose 16x8 matrix into 8x16 */ TRANSPOSE16x8_UB_UB(p0, p1, p2, p3, row4, row5, row6, row7, q3, q2, q1, q0, row12, row13, row14, row15, p3, p2, p1, p0, q0, q1, q2, q3); thresh = (v16u8)__msa_fill_b(*thresh0); vec0 = (v8i16)__msa_fill_b(*thresh1); thresh = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)thresh); b_limit = (v16u8)__msa_fill_b(*b_limit0); vec0 = (v8i16)__msa_fill_b(*b_limit1); b_limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)b_limit); limit = (v16u8)__msa_fill_b(*limit0); vec0 = (v8i16)__msa_fill_b(*limit1); limit = (v16u8)__msa_ilvr_d((v2i64)vec0, (v2i64)limit); /* mask and hev */ LPF_MASK_HEV(p3, p2, p1, p0, q0, q1, q2, q3, limit, b_limit, thresh, hev, mask, flat); /* flat4 */ VP9_FLAT4(p3, p2, p0, q0, q2, q3, flat); /* filter4 */ VP9_LPF_FILTER4_4W(p1, p0, q0, q1, mask, hev, p1_out, p0_out, q0_out, q1_out); if (__msa_test_bz_v(flat)) { ILVR_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec2, vec3); ILVL_B2_SH(p0_out, p1_out, q1_out, q0_out, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec4, vec5); src -= 2; ST4x8_UB(vec2, vec3, src, pitch); src += 8 * pitch; ST4x8_UB(vec4, vec5, src, pitch); } else { ILVR_B8_UH(zero, p3, zero, p2, zero, p1, zero, p0, zero, q0, zero, q1, zero, q2, zero, q3, p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r); VP9_FILTER8(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r, q1_filt8_r, q2_filt8_r); ILVL_B4_UH(zero, p3, zero, p2, zero, p1, zero, p0, p3_l, p2_l, p1_l, p0_l); ILVL_B4_UH(zero, q0, zero, q1, zero, q2, zero, q3, q0_l, q1_l, q2_l, q3_l); /* filter8 */ VP9_FILTER8(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, p2_filt8_l, p1_filt8_l, p0_filt8_l, q0_filt8_l, q1_filt8_l, q2_filt8_l); /* convert 16 bit output data into 8 bit */ PCKEV_B4_SH(p2_filt8_l, p2_filt8_r, p1_filt8_l, p1_filt8_r, p0_filt8_l, p0_filt8_r, q0_filt8_l, q0_filt8_r, p2_filt8_r, p1_filt8_r, p0_filt8_r, q0_filt8_r); PCKEV_B2_SH(q1_filt8_l, q1_filt8_r, q2_filt8_l, q2_filt8_r, q1_filt8_r, q2_filt8_r); /* store pixel values */ p2 = __msa_bmnz_v(p2, (v16u8)p2_filt8_r, flat); p1 = __msa_bmnz_v(p1_out, (v16u8)p1_filt8_r, flat); p0 = __msa_bmnz_v(p0_out, (v16u8)p0_filt8_r, flat); q0 = __msa_bmnz_v(q0_out, (v16u8)q0_filt8_r, flat); q1 = __msa_bmnz_v(q1_out, (v16u8)q1_filt8_r, flat); q2 = __msa_bmnz_v(q2, (v16u8)q2_filt8_r, flat); ILVR_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec3, vec4); ILVL_B2_SH(p1, p2, q0, p0, vec0, vec1); ILVRL_H2_SH(vec1, vec0, vec6, vec7); ILVRL_B2_SH(q2, q1, vec2, vec5); src -= 3; ST4x4_UB(vec3, vec3, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec2, 0, src + 4, pitch); src += (4 * pitch); ST4x4_UB(vec4, vec4, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec2, 4, src + 4, pitch); src += (4 * pitch); ST4x4_UB(vec6, vec6, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec5, 0, src + 4, pitch); src += (4 * pitch); ST4x4_UB(vec7, vec7, 0, 1, 2, 3, src, pitch); ST2x4_UB(vec5, 4, src + 4, pitch); } } libvpx-1.8.2/vpx_dsp/mips/loopfilter_filters_dspr2.c000066400000000000000000000274221357355204000226610ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/mips/common_dspr2.h" #include "vpx_dsp/mips/loopfilter_filters_dspr2.h" #include "vpx_dsp/mips/loopfilter_macros_dspr2.h" #include "vpx_dsp/mips/loopfilter_masks_dspr2.h" #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 void vpx_lpf_horizontal_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8_t i; uint32_t mask; uint32_t hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; uint8_t *sm1, *s0, *s1, *s2, *s3, *s4, *s5, *s6; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); /* prefetch data for store */ prefetch_store(s); /* loop filter designed to work using chars so that we can make maximum use of 8 bit simd instructions. */ for (i = 0; i < 2; i++) { sm1 = s - (pitch << 2); s0 = sm1 + pitch; s1 = s0 + pitch; s2 = s - pitch; s3 = s; s4 = s + pitch; s5 = s4 + pitch; s6 = s5 + pitch; __asm__ __volatile__( "lw %[p1], (%[s1]) \n\t" "lw %[p2], (%[s2]) \n\t" "lw %[p3], (%[s3]) \n\t" "lw %[p4], (%[s4]) \n\t" : [p1] "=&r"(p1), [p2] "=&r"(p2), [p3] "=&r"(p3), [p4] "=&r"(p4) : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); /* if (p1 - p4 == 0) and (p2 - p3 == 0) mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { __asm__ __volatile__( "lw %[pm1], (%[sm1]) \n\t" "lw %[p0], (%[s0]) \n\t" "lw %[p5], (%[s5]) \n\t" "lw %[p6], (%[s6]) \n\t" : [pm1] "=&r"(pm1), [p0] "=&r"(p0), [p5] "=&r"(p5), [p6] "=&r"(p6) : [sm1] "r"(sm1), [s0] "r"(s0), [s5] "r"(s5), [s6] "r"(s6)); filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5, p6, thresh_vec, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); __asm__ __volatile__( "sw %[p1], (%[s1]) \n\t" "sw %[p2], (%[s2]) \n\t" "sw %[p3], (%[s3]) \n\t" "sw %[p4], (%[s4]) \n\t" : : [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4), [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); } } s = s + 4; } } void vpx_lpf_vertical_4_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8_t i; uint32_t mask, hev; uint32_t pm1, p0, p1, p2, p3, p4, p5, p6; uint8_t *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); /* prefetch data for store */ prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; s2 = s + pitch; s3 = s2 + pitch; s4 = s3 + pitch; s = s4 + pitch; /* load quad-byte vectors * memory is 4 byte aligned */ p2 = *((uint32_t *)(s1 - 4)); p6 = *((uint32_t *)(s1)); p1 = *((uint32_t *)(s2 - 4)); p5 = *((uint32_t *)(s2)); p0 = *((uint32_t *)(s3 - 4)); p4 = *((uint32_t *)(s3)); pm1 = *((uint32_t *)(s4 - 4)); p3 = *((uint32_t *)(s4)); /* transpose pm1, p0, p1, p2 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p2], %[p1] \n\t" "precr.qb.ph %[prim2], %[p2], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p0], %[pm1] \n\t" "precr.qb.ph %[prim4], %[p0], %[pm1] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[pm1], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p2], %[p1], %[sec3] \n\t" "precrq.ph.w %[p0], %[pm1], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[pm1], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [pm1] "+r"(pm1), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p3, p4, p5, p6 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p6], %[p5] \n\t" "precr.qb.ph %[prim2], %[p6], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p4], %[p3] \n\t" "precr.qb.ph %[prim4], %[p4], %[p3] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p6], %[p5], %[sec3] \n\t" "precrq.ph.w %[p4], %[p3], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p6] "+r"(p6), [p5] "+r"(p5), [p4] "+r"(p4), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* if (p1 - p4 == 0) and (p2 - p3 == 0) * mask will be zero and filtering is not needed */ if (!(((p1 - p4) == 0) && ((p2 - p3) == 0))) { filter_hev_mask_dspr2(limit_vec, flimit_vec, p1, p2, pm1, p0, p3, p4, p5, p6, thresh_vec, &hev, &mask); /* if mask == 0 do filtering is not needed */ if (mask) { /* filtering */ filter_dspr2(mask, hev, &p1, &p2, &p3, &p4); /* unpack processed 4x4 neighborhood * don't use transpose on output data * because memory isn't aligned */ __asm__ __volatile__( "sb %[p4], 1(%[s4]) \n\t" "sb %[p3], 0(%[s4]) \n\t" "sb %[p2], -1(%[s4]) \n\t" "sb %[p1], -2(%[s4]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [s4] "r"(s4)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s3]) \n\t" "sb %[p3], 0(%[s3]) \n\t" "sb %[p2], -1(%[s3]) \n\t" "sb %[p1], -2(%[s3]) \n\t" : [p1] "+r"(p1) : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [s3] "r"(s3)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s2]) \n\t" "sb %[p3], 0(%[s2]) \n\t" "sb %[p2], -1(%[s2]) \n\t" "sb %[p1], -2(%[s2]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [s2] "r"(s2)); __asm__ __volatile__( "srl %[p4], %[p4], 8 \n\t" "srl %[p3], %[p3], 8 \n\t" "srl %[p2], %[p2], 8 \n\t" "srl %[p1], %[p1], 8 \n\t" : [p4] "+r"(p4), [p3] "+r"(p3), [p2] "+r"(p2), [p1] "+r"(p1) :); __asm__ __volatile__( "sb %[p4], 1(%[s1]) \n\t" "sb %[p3], 0(%[s1]) \n\t" "sb %[p2], -1(%[s1]) \n\t" "sb %[p1], -2(%[s1]) \n\t" : : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [s1] "r"(s1)); } } } } void vpx_lpf_horizontal_4_dual_dspr2( uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_horizontal_4_dspr2(s, p, blimit0, limit0, thresh0); vpx_lpf_horizontal_4_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_horizontal_8_dual_dspr2( uint8_t *s, int p /* pitch */, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_horizontal_8_dspr2(s, p, blimit0, limit0, thresh0); vpx_lpf_horizontal_8_dspr2(s + 8, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_4_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_vertical_4_dspr2(s, p, blimit0, limit0, thresh0); vpx_lpf_vertical_4_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_8_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { vpx_lpf_vertical_8_dspr2(s, p, blimit0, limit0, thresh0); vpx_lpf_vertical_8_dspr2(s + 8 * p, p, blimit1, limit1, thresh1); } void vpx_lpf_vertical_16_dual_dspr2(uint8_t *s, int p, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { vpx_lpf_vertical_16_dspr2(s, p, blimit, limit, thresh); vpx_lpf_vertical_16_dspr2(s + 8 * p, p, blimit, limit, thresh); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/loopfilter_filters_dspr2.h000066400000000000000000001112761357355204000226670ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ #define VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 /* inputs & outputs are quad-byte vectors */ static INLINE void filter_dspr2(uint32_t mask, uint32_t hev, uint32_t *ps1, uint32_t *ps0, uint32_t *qs0, uint32_t *qs1) { int32_t vpx_filter_l, vpx_filter_r; int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; int32_t subr_r, subr_l; uint32_t t1, t2, HWM, t3; uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; int32_t vps1, vps0, vqs0, vqs1; int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; uint32_t N128; N128 = 0x80808080; t1 = 0x03000300; t2 = 0x04000400; t3 = 0x01000100; HWM = 0xFF00FF00; vps0 = (*ps0) ^ N128; vps1 = (*ps1) ^ N128; vqs0 = (*qs0) ^ N128; vqs1 = (*qs1) ^ N128; /* use halfword pairs instead quad-bytes because of accuracy */ vps0_l = vps0 & HWM; vps0_r = vps0 << 8; vps0_r = vps0_r & HWM; vps1_l = vps1 & HWM; vps1_r = vps1 << 8; vps1_r = vps1_r & HWM; vqs0_l = vqs0 & HWM; vqs0_r = vqs0 << 8; vqs0_r = vqs0_r & HWM; vqs1_l = vqs1 & HWM; vqs1_r = vqs1 << 8; vqs1_r = vqs1_r & HWM; mask_l = mask & HWM; mask_r = mask << 8; mask_r = mask_r & HWM; hev_l = hev & HWM; hev_r = hev << 8; hev_r = hev_r & HWM; __asm__ __volatile__( /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" /* vpx_filter &= hev; */ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" /* vpx_filter &= mask; */ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" : [vpx_filter_l] "=&r"(vpx_filter_l), [vpx_filter_r] "=&r"(vpx_filter_r), [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r), [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r) : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l), [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r), [HWM] "r"(HWM)); /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__( /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r) : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM), [vpx_filter_l] "r"(vpx_filter_l), [vpx_filter_r] "r"(vpx_filter_r)); __asm__ __volatile__( /* (vpx_filter += 1) >>= 1 */ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" /* vpx_filter &= ~hev; */ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r), [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), [vqs1_r] "+r"(vqs1_r) : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); /* Create quad-bytes from halfword pairs */ vqs0_l = vqs0_l & HWM; vqs1_l = vqs1_l & HWM; vps0_l = vps0_l & HWM; vps1_l = vps1_l & HWM; __asm__ __volatile__( "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r), [vqs0_r] "+r"(vqs0_r) :); vqs0 = vqs0_l | vqs0_r; vqs1 = vqs1_l | vqs1_r; vps0 = vps0_l | vps0_r; vps1 = vps1_l | vps1_r; *ps0 = vps0 ^ N128; *ps1 = vps1 ^ N128; *qs0 = vqs0 ^ N128; *qs1 = vqs1 ^ N128; } static INLINE void filter1_dspr2(uint32_t mask, uint32_t hev, uint32_t ps1, uint32_t ps0, uint32_t qs0, uint32_t qs1, uint32_t *p1_f0, uint32_t *p0_f0, uint32_t *q0_f0, uint32_t *q1_f0) { int32_t vpx_filter_l, vpx_filter_r; int32_t Filter1_l, Filter1_r, Filter2_l, Filter2_r; int32_t subr_r, subr_l; uint32_t t1, t2, HWM, t3; uint32_t hev_l, hev_r, mask_l, mask_r, invhev_l, invhev_r; int32_t vps1, vps0, vqs0, vqs1; int32_t vps1_l, vps1_r, vps0_l, vps0_r, vqs0_l, vqs0_r, vqs1_l, vqs1_r; uint32_t N128; N128 = 0x80808080; t1 = 0x03000300; t2 = 0x04000400; t3 = 0x01000100; HWM = 0xFF00FF00; vps0 = (ps0) ^ N128; vps1 = (ps1) ^ N128; vqs0 = (qs0) ^ N128; vqs1 = (qs1) ^ N128; /* use halfword pairs instead quad-bytes because of accuracy */ vps0_l = vps0 & HWM; vps0_r = vps0 << 8; vps0_r = vps0_r & HWM; vps1_l = vps1 & HWM; vps1_r = vps1 << 8; vps1_r = vps1_r & HWM; vqs0_l = vqs0 & HWM; vqs0_r = vqs0 << 8; vqs0_r = vqs0_r & HWM; vqs1_l = vqs1 & HWM; vqs1_r = vqs1 << 8; vqs1_r = vqs1_r & HWM; mask_l = mask & HWM; mask_r = mask << 8; mask_r = mask_r & HWM; hev_l = hev & HWM; hev_r = hev << 8; hev_r = hev_r & HWM; __asm__ __volatile__( /* vpx_filter = vp8_signed_char_clamp(ps1 - qs1); */ "subq_s.ph %[vpx_filter_l], %[vps1_l], %[vqs1_l] \n\t" "subq_s.ph %[vpx_filter_r], %[vps1_r], %[vqs1_r] \n\t" /* qs0 - ps0 */ "subq_s.ph %[subr_l], %[vqs0_l], %[vps0_l] \n\t" "subq_s.ph %[subr_r], %[vqs0_r], %[vps0_r] \n\t" /* vpx_filter &= hev; */ "and %[vpx_filter_l], %[vpx_filter_l], %[hev_l] \n\t" "and %[vpx_filter_r], %[vpx_filter_r], %[hev_r] \n\t" /* vpx_filter = vp8_signed_char_clamp(vpx_filter + 3 * (qs0 - ps0)); */ "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_l], %[hev_l], %[HWM] \n\t" "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" "xor %[invhev_r], %[hev_r], %[HWM] \n\t" "addq_s.ph %[vpx_filter_l], %[vpx_filter_l], %[subr_l] \n\t" "addq_s.ph %[vpx_filter_r], %[vpx_filter_r], %[subr_r] \n\t" /* vpx_filter &= mask; */ "and %[vpx_filter_l], %[vpx_filter_l], %[mask_l] \n\t" "and %[vpx_filter_r], %[vpx_filter_r], %[mask_r] \n\t" : [vpx_filter_l] "=&r"(vpx_filter_l), [vpx_filter_r] "=&r"(vpx_filter_r), [subr_l] "=&r"(subr_l), [subr_r] "=&r"(subr_r), [invhev_l] "=&r"(invhev_l), [invhev_r] "=&r"(invhev_r) : [vps0_l] "r"(vps0_l), [vps0_r] "r"(vps0_r), [vps1_l] "r"(vps1_l), [vps1_r] "r"(vps1_r), [vqs0_l] "r"(vqs0_l), [vqs0_r] "r"(vqs0_r), [vqs1_l] "r"(vqs1_l), [vqs1_r] "r"(vqs1_r), [mask_l] "r"(mask_l), [mask_r] "r"(mask_r), [hev_l] "r"(hev_l), [hev_r] "r"(hev_r), [HWM] "r"(HWM)); /* save bottom 3 bits so that we round one side +4 and the other +3 */ __asm__ __volatile__( /* Filter2 = vp8_signed_char_clamp(vpx_filter + 3) >>= 3; */ "addq_s.ph %[Filter1_l], %[vpx_filter_l], %[t2] \n\t" "addq_s.ph %[Filter1_r], %[vpx_filter_r], %[t2] \n\t" /* Filter1 = vp8_signed_char_clamp(vpx_filter + 4) >>= 3; */ "addq_s.ph %[Filter2_l], %[vpx_filter_l], %[t1] \n\t" "addq_s.ph %[Filter2_r], %[vpx_filter_r], %[t1] \n\t" "shra.ph %[Filter1_r], %[Filter1_r], 3 \n\t" "shra.ph %[Filter1_l], %[Filter1_l], 3 \n\t" "shra.ph %[Filter2_l], %[Filter2_l], 3 \n\t" "shra.ph %[Filter2_r], %[Filter2_r], 3 \n\t" "and %[Filter1_l], %[Filter1_l], %[HWM] \n\t" "and %[Filter1_r], %[Filter1_r], %[HWM] \n\t" /* vps0 = vp8_signed_char_clamp(ps0 + Filter2); */ "addq_s.ph %[vps0_l], %[vps0_l], %[Filter2_l] \n\t" "addq_s.ph %[vps0_r], %[vps0_r], %[Filter2_r] \n\t" /* vqs0 = vp8_signed_char_clamp(qs0 - Filter1); */ "subq_s.ph %[vqs0_l], %[vqs0_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs0_r], %[vqs0_r], %[Filter1_r] \n\t" : [Filter1_l] "=&r"(Filter1_l), [Filter1_r] "=&r"(Filter1_r), [Filter2_l] "=&r"(Filter2_l), [Filter2_r] "=&r"(Filter2_r), [vps0_l] "+r"(vps0_l), [vps0_r] "+r"(vps0_r), [vqs0_l] "+r"(vqs0_l), [vqs0_r] "+r"(vqs0_r) : [t1] "r"(t1), [t2] "r"(t2), [HWM] "r"(HWM), [vpx_filter_l] "r"(vpx_filter_l), [vpx_filter_r] "r"(vpx_filter_r)); __asm__ __volatile__( /* (vpx_filter += 1) >>= 1 */ "addqh.ph %[Filter1_l], %[Filter1_l], %[t3] \n\t" "addqh.ph %[Filter1_r], %[Filter1_r], %[t3] \n\t" /* vpx_filter &= ~hev; */ "and %[Filter1_l], %[Filter1_l], %[invhev_l] \n\t" "and %[Filter1_r], %[Filter1_r], %[invhev_r] \n\t" /* vps1 = vp8_signed_char_clamp(ps1 + vpx_filter); */ "addq_s.ph %[vps1_l], %[vps1_l], %[Filter1_l] \n\t" "addq_s.ph %[vps1_r], %[vps1_r], %[Filter1_r] \n\t" /* vqs1 = vp8_signed_char_clamp(qs1 - vpx_filter); */ "subq_s.ph %[vqs1_l], %[vqs1_l], %[Filter1_l] \n\t" "subq_s.ph %[vqs1_r], %[vqs1_r], %[Filter1_r] \n\t" : [Filter1_l] "+r"(Filter1_l), [Filter1_r] "+r"(Filter1_r), [vps1_l] "+r"(vps1_l), [vps1_r] "+r"(vps1_r), [vqs1_l] "+r"(vqs1_l), [vqs1_r] "+r"(vqs1_r) : [t3] "r"(t3), [invhev_l] "r"(invhev_l), [invhev_r] "r"(invhev_r)); /* Create quad-bytes from halfword pairs */ vqs0_l = vqs0_l & HWM; vqs1_l = vqs1_l & HWM; vps0_l = vps0_l & HWM; vps1_l = vps1_l & HWM; __asm__ __volatile__( "shrl.ph %[vqs0_r], %[vqs0_r], 8 \n\t" "shrl.ph %[vps0_r], %[vps0_r], 8 \n\t" "shrl.ph %[vqs1_r], %[vqs1_r], 8 \n\t" "shrl.ph %[vps1_r], %[vps1_r], 8 \n\t" : [vps1_r] "+r"(vps1_r), [vqs1_r] "+r"(vqs1_r), [vps0_r] "+r"(vps0_r), [vqs0_r] "+r"(vqs0_r) :); vqs0 = vqs0_l | vqs0_r; vqs1 = vqs1_l | vqs1_r; vps0 = vps0_l | vps0_r; vps1 = vps1_l | vps1_r; *p0_f0 = vps0 ^ N128; *p1_f0 = vps1 ^ N128; *q0_f0 = vqs0 ^ N128; *q1_f0 = vqs1 ^ N128; } static INLINE void mbfilter_dspr2(uint32_t *op3, uint32_t *op2, uint32_t *op1, uint32_t *op0, uint32_t *oq0, uint32_t *oq1, uint32_t *oq2, uint32_t *oq3) { /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; uint32_t res_op2, res_op1, res_op0; uint32_t res_oq0, res_oq1, res_oq2; uint32_t tmp; uint32_t add_p210_q012; uint32_t u32Four = 0x00040004; /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ __asm__ __volatile__( "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" "shll.ph %[tmp], %[p3], 1 \n\t" "addu.ph %[res_op2], %[tmp], %[p3] \n\t" "addu.ph %[res_op1], %[p3], %[p3] \n\t" "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" "shrl.ph %[res_op1], %[res_op1], 3 \n\t" "shrl.ph %[res_op2], %[res_op2], 3 \n\t" "addu.ph %[res_op0], %[p3], %[p0] \n\t" "addu.ph %[res_oq0], %[q0], %[q3] \n\t" "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" "addu.ph %[res_oq1], %[q3], %[q3] \n\t" "shll.ph %[tmp], %[q3], 1 \n\t" "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" "shrl.ph %[res_op0], %[res_op0], 3 \n\t" "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp), [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0), [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2) : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2), [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four)); *op2 = res_op2; *op1 = res_op1; *op0 = res_op0; *oq0 = res_oq0; *oq1 = res_oq1; *oq2 = res_oq2; } static INLINE void mbfilter1_dspr2(uint32_t p3, uint32_t p2, uint32_t p1, uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t *op2_f1, uint32_t *op1_f1, uint32_t *op0_f1, uint32_t *oq0_f1, uint32_t *oq1_f1, uint32_t *oq2_f1) { /* use a 7 tap filter [1, 1, 1, 2, 1, 1, 1] for flat line */ uint32_t res_op2, res_op1, res_op0; uint32_t res_oq0, res_oq1, res_oq2; uint32_t tmp; uint32_t add_p210_q012; uint32_t u32Four = 0x00040004; /* *op2 = ROUND_POWER_OF_TWO(p3 + p3 + p3 + p2 + p2 + p1 + p0 + q0, 3) 1 */ /* *op1 = ROUND_POWER_OF_TWO(p3 + p3 + p2 + p1 + p1 + p0 + q0 + q1, 3) 2 */ /* *op0 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + p0 + q0 + q1 + q2, 3) 3 */ /* *oq0 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q0 + q1 + q2 + q3, 3) 4 */ /* *oq1 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q1 + q2 + q3 + q3, 3) 5 */ /* *oq2 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q2 + q3 + q3 + q3, 3) 6 */ __asm__ __volatile__( "addu.ph %[add_p210_q012], %[p2], %[p1] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[p0] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q0] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q1] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[q2] \n\t" "addu.ph %[add_p210_q012], %[add_p210_q012], %[u32Four] \n\t" "shll.ph %[tmp], %[p3], 1 \n\t" "addu.ph %[res_op2], %[tmp], %[p3] \n\t" "addu.ph %[res_op1], %[p3], %[p3] \n\t" "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" "addu.ph %[res_op1], %[res_op1], %[p1] \n\t" "addu.ph %[res_op2], %[res_op2], %[add_p210_q012] \n\t" "addu.ph %[res_op1], %[res_op1], %[add_p210_q012] \n\t" "subu.ph %[res_op2], %[res_op2], %[q1] \n\t" "subu.ph %[res_op1], %[res_op1], %[q2] \n\t" "subu.ph %[res_op2], %[res_op2], %[q2] \n\t" "shrl.ph %[res_op1], %[res_op1], 3 \n\t" "shrl.ph %[res_op2], %[res_op2], 3 \n\t" "addu.ph %[res_op0], %[p3], %[p0] \n\t" "addu.ph %[res_oq0], %[q0], %[q3] \n\t" "addu.ph %[res_op0], %[res_op0], %[add_p210_q012] \n\t" "addu.ph %[res_oq0], %[res_oq0], %[add_p210_q012] \n\t" "addu.ph %[res_oq1], %[q3], %[q3] \n\t" "shll.ph %[tmp], %[q3], 1 \n\t" "addu.ph %[res_oq1], %[res_oq1], %[q1] \n\t" "addu.ph %[res_oq2], %[tmp], %[q3] \n\t" "addu.ph %[res_oq1], %[res_oq1], %[add_p210_q012] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[add_p210_q012] \n\t" "subu.ph %[res_oq1], %[res_oq1], %[p2] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" "shrl.ph %[res_oq1], %[res_oq1], 3 \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p2] \n\t" "shrl.ph %[res_oq0], %[res_oq0], 3 \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p1] \n\t" "shrl.ph %[res_op0], %[res_op0], 3 \n\t" "shrl.ph %[res_oq2], %[res_oq2], 3 \n\t" : [add_p210_q012] "=&r"(add_p210_q012), [tmp] "=&r"(tmp), [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), [res_op0] "=&r"(res_op0), [res_oq0] "=&r"(res_oq0), [res_oq1] "=&r"(res_oq1), [res_oq2] "=&r"(res_oq2) : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [q1] "r"(q1), [p2] "r"(p2), [q2] "r"(q2), [p3] "r"(p3), [q3] "r"(q3), [u32Four] "r"(u32Four)); *op2_f1 = res_op2; *op1_f1 = res_op1; *op0_f1 = res_op0; *oq0_f1 = res_oq0; *oq1_f1 = res_oq1; *oq2_f1 = res_oq2; } static INLINE void wide_mbfilter_dspr2( uint32_t *op7, uint32_t *op6, uint32_t *op5, uint32_t *op4, uint32_t *op3, uint32_t *op2, uint32_t *op1, uint32_t *op0, uint32_t *oq0, uint32_t *oq1, uint32_t *oq2, uint32_t *oq3, uint32_t *oq4, uint32_t *oq5, uint32_t *oq6, uint32_t *oq7) { const uint32_t p7 = *op7, p6 = *op6, p5 = *op5, p4 = *op4; const uint32_t p3 = *op3, p2 = *op2, p1 = *op1, p0 = *op0; const uint32_t q0 = *oq0, q1 = *oq1, q2 = *oq2, q3 = *oq3; const uint32_t q4 = *oq4, q5 = *oq5, q6 = *oq6, q7 = *oq7; uint32_t res_op6, res_op5, res_op4, res_op3, res_op2, res_op1, res_op0; uint32_t res_oq0, res_oq1, res_oq2, res_oq3, res_oq4, res_oq5, res_oq6; uint32_t tmp; uint32_t add_p6toq6; uint32_t u32Eight = 0x00080008; __asm__ __volatile__( /* addition of p6,p5,p4,p3,p2,p1,p0,q0,q1,q2,q3,q4,q5,q6 which is used most of the time */ "addu.ph %[add_p6toq6], %[p6], %[p5] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[p4] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[p3] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[p2] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[p1] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[p0] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q0] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q1] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q2] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q3] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q4] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q5] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[q6] \n\t" "addu.ph %[add_p6toq6], %[add_p6toq6], %[u32Eight] \n\t" : [add_p6toq6] "=&r"(add_p6toq6) : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [u32Eight] "r"(u32Eight)); __asm__ __volatile__( /* *op6 = ROUND_POWER_OF_TWO(p7 * 7 + p6 * 2 + p5 + p4 + p3 + p2 + p1 + p0 + q0, 4) */ "shll.ph %[tmp], %[p7], 3 \n\t" "subu.ph %[res_op6], %[tmp], %[p7] \n\t" "addu.ph %[res_op6], %[res_op6], %[p6] \n\t" "addu.ph %[res_op6], %[res_op6], %[add_p6toq6] \n\t" "subu.ph %[res_op6], %[res_op6], %[q1] \n\t" "subu.ph %[res_op6], %[res_op6], %[q2] \n\t" "subu.ph %[res_op6], %[res_op6], %[q3] \n\t" "subu.ph %[res_op6], %[res_op6], %[q4] \n\t" "subu.ph %[res_op6], %[res_op6], %[q5] \n\t" "subu.ph %[res_op6], %[res_op6], %[q6] \n\t" "shrl.ph %[res_op6], %[res_op6], 4 \n\t" /* *op5 = ROUND_POWER_OF_TWO(p7 * 6 + p6 + p5 * 2 + p4 + p3 + p2 + p1 + p0 + q0 + q1, 4) */ "shll.ph %[tmp], %[p7], 2 \n\t" "addu.ph %[res_op5], %[tmp], %[p7] \n\t" "addu.ph %[res_op5], %[res_op5], %[p7] \n\t" "addu.ph %[res_op5], %[res_op5], %[p5] \n\t" "addu.ph %[res_op5], %[res_op5], %[add_p6toq6] \n\t" "subu.ph %[res_op5], %[res_op5], %[q2] \n\t" "subu.ph %[res_op5], %[res_op5], %[q3] \n\t" "subu.ph %[res_op5], %[res_op5], %[q4] \n\t" "subu.ph %[res_op5], %[res_op5], %[q5] \n\t" "subu.ph %[res_op5], %[res_op5], %[q6] \n\t" "shrl.ph %[res_op5], %[res_op5], 4 \n\t" /* *op4 = ROUND_POWER_OF_TWO(p7 * 5 + p6 + p5 + p4 * 2 + p3 + p2 + p1 + p0 + q0 + q1 + q2, 4) */ "shll.ph %[tmp], %[p7], 2 \n\t" "addu.ph %[res_op4], %[tmp], %[p7] \n\t" "addu.ph %[res_op4], %[res_op4], %[p4] \n\t" "addu.ph %[res_op4], %[res_op4], %[add_p6toq6] \n\t" "subu.ph %[res_op4], %[res_op4], %[q3] \n\t" "subu.ph %[res_op4], %[res_op4], %[q4] \n\t" "subu.ph %[res_op4], %[res_op4], %[q5] \n\t" "subu.ph %[res_op4], %[res_op4], %[q6] \n\t" "shrl.ph %[res_op4], %[res_op4], 4 \n\t" /* *op3 = ROUND_POWER_OF_TWO(p7 * 4 + p6 + p5 + p4 + p3 * 2 + p2 + p1 + p0 + q0 + q1 + q2 + q3, 4) */ "shll.ph %[tmp], %[p7], 2 \n\t" "addu.ph %[res_op3], %[tmp], %[p3] \n\t" "addu.ph %[res_op3], %[res_op3], %[add_p6toq6] \n\t" "subu.ph %[res_op3], %[res_op3], %[q4] \n\t" "subu.ph %[res_op3], %[res_op3], %[q5] \n\t" "subu.ph %[res_op3], %[res_op3], %[q6] \n\t" "shrl.ph %[res_op3], %[res_op3], 4 \n\t" /* *op2 = ROUND_POWER_OF_TWO(p7 * 3 + p6 + p5 + p4 + p3 + p2 * 2 + p1 + p0 + q0 + q1 + q2 + q3 + q4, 4) */ "shll.ph %[tmp], %[p7], 1 \n\t" "addu.ph %[res_op2], %[tmp], %[p7] \n\t" "addu.ph %[res_op2], %[res_op2], %[p2] \n\t" "addu.ph %[res_op2], %[res_op2], %[add_p6toq6] \n\t" "subu.ph %[res_op2], %[res_op2], %[q5] \n\t" "subu.ph %[res_op2], %[res_op2], %[q6] \n\t" "shrl.ph %[res_op2], %[res_op2], 4 \n\t" /* *op1 = ROUND_POWER_OF_TWO(p7 * 2 + p6 + p5 + p4 + p3 + p2 + p1 * 2 + p0 + q0 + q1 + q2 + q3 + q4 + q5, 4); */ "shll.ph %[tmp], %[p7], 1 \n\t" "addu.ph %[res_op1], %[tmp], %[p1] \n\t" "addu.ph %[res_op1], %[res_op1], %[add_p6toq6] \n\t" "subu.ph %[res_op1], %[res_op1], %[q6] \n\t" "shrl.ph %[res_op1], %[res_op1], 4 \n\t" /* *op0 = ROUND_POWER_OF_TWO(p7 + p6 + p5 + p4 + p3 + p2 + p1 + p0 * 2 + q0 + q1 + q2 + q3 + q4 + q5 + q6, 4) */ "addu.ph %[res_op0], %[p7], %[p0] \n\t" "addu.ph %[res_op0], %[res_op0], %[add_p6toq6] \n\t" "shrl.ph %[res_op0], %[res_op0], 4 \n\t" : [res_op6] "=&r"(res_op6), [res_op5] "=&r"(res_op5), [res_op4] "=&r"(res_op4), [res_op3] "=&r"(res_op3), [res_op2] "=&r"(res_op2), [res_op1] "=&r"(res_op1), [res_op0] "=&r"(res_op0), [tmp] "=&r"(tmp) : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q2] "r"(q2), [q1] "r"(q1), [q3] "r"(q3), [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [add_p6toq6] "r"(add_p6toq6)); *op6 = res_op6; *op5 = res_op5; *op4 = res_op4; *op3 = res_op3; *op2 = res_op2; *op1 = res_op1; *op0 = res_op0; __asm__ __volatile__( /* *oq0 = ROUND_POWER_OF_TWO(p6 + p5 + p4 + p3 + p2 + p1 + p0 + q0 * 2 + q1 + q2 + q3 + q4 + q5 + q6 + q7, 4); */ "addu.ph %[res_oq0], %[q7], %[q0] \n\t" "addu.ph %[res_oq0], %[res_oq0], %[add_p6toq6] \n\t" "shrl.ph %[res_oq0], %[res_oq0], 4 \n\t" /* *oq1 = ROUND_POWER_OF_TWO(p5 + p4 + p3 + p2 + p1 + p0 + q0 + q1 * 2 + q2 + q3 + q4 + q5 + q6 + q7 * 2, 4) */ "shll.ph %[tmp], %[q7], 1 \n\t" "addu.ph %[res_oq1], %[tmp], %[q1] \n\t" "addu.ph %[res_oq1], %[res_oq1], %[add_p6toq6] \n\t" "subu.ph %[res_oq1], %[res_oq1], %[p6] \n\t" "shrl.ph %[res_oq1], %[res_oq1], 4 \n\t" /* *oq2 = ROUND_POWER_OF_TWO(p4 + p3 + p2 + p1 + p0 + q0 + q1 + q2 * 2 + q3 + q4 + q5 + q6 + q7 * 3, 4) */ "shll.ph %[tmp], %[q7], 1 \n\t" "addu.ph %[res_oq2], %[tmp], %[q7] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[q2] \n\t" "addu.ph %[res_oq2], %[res_oq2], %[add_p6toq6] \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p5] \n\t" "subu.ph %[res_oq2], %[res_oq2], %[p6] \n\t" "shrl.ph %[res_oq2], %[res_oq2], 4 \n\t" /* *oq3 = ROUND_POWER_OF_TWO(p3 + p2 + p1 + p0 + q0 + q1 + q2 + q3 * 2 + q4 + q5 + q6 + q7 * 4, 4) */ "shll.ph %[tmp], %[q7], 2 \n\t" "addu.ph %[res_oq3], %[tmp], %[q3] \n\t" "addu.ph %[res_oq3], %[res_oq3], %[add_p6toq6] \n\t" "subu.ph %[res_oq3], %[res_oq3], %[p4] \n\t" "subu.ph %[res_oq3], %[res_oq3], %[p5] \n\t" "subu.ph %[res_oq3], %[res_oq3], %[p6] \n\t" "shrl.ph %[res_oq3], %[res_oq3], 4 \n\t" /* *oq4 = ROUND_POWER_OF_TWO(p2 + p1 + p0 + q0 + q1 + q2 + q3 + q4 * 2 + q5 + q6 + q7 * 5, 4) */ "shll.ph %[tmp], %[q7], 2 \n\t" "addu.ph %[res_oq4], %[tmp], %[q7] \n\t" "addu.ph %[res_oq4], %[res_oq4], %[q4] \n\t" "addu.ph %[res_oq4], %[res_oq4], %[add_p6toq6] \n\t" "subu.ph %[res_oq4], %[res_oq4], %[p3] \n\t" "subu.ph %[res_oq4], %[res_oq4], %[p4] \n\t" "subu.ph %[res_oq4], %[res_oq4], %[p5] \n\t" "subu.ph %[res_oq4], %[res_oq4], %[p6] \n\t" "shrl.ph %[res_oq4], %[res_oq4], 4 \n\t" /* *oq5 = ROUND_POWER_OF_TWO(p1 + p0 + q0 + q1 + q2 + q3 + q4 + q5 * 2 + q6 + q7 * 6, 4) */ "shll.ph %[tmp], %[q7], 2 \n\t" "addu.ph %[res_oq5], %[tmp], %[q7] \n\t" "addu.ph %[res_oq5], %[res_oq5], %[q7] \n\t" "addu.ph %[res_oq5], %[res_oq5], %[q5] \n\t" "addu.ph %[res_oq5], %[res_oq5], %[add_p6toq6] \n\t" "subu.ph %[res_oq5], %[res_oq5], %[p2] \n\t" "subu.ph %[res_oq5], %[res_oq5], %[p3] \n\t" "subu.ph %[res_oq5], %[res_oq5], %[p4] \n\t" "subu.ph %[res_oq5], %[res_oq5], %[p5] \n\t" "subu.ph %[res_oq5], %[res_oq5], %[p6] \n\t" "shrl.ph %[res_oq5], %[res_oq5], 4 \n\t" /* *oq6 = ROUND_POWER_OF_TWO(p0 + q0 + q1 + q2 + q3 + q4 + q5 + q6 * 2 + q7 * 7, 4) */ "shll.ph %[tmp], %[q7], 3 \n\t" "subu.ph %[res_oq6], %[tmp], %[q7] \n\t" "addu.ph %[res_oq6], %[res_oq6], %[q6] \n\t" "addu.ph %[res_oq6], %[res_oq6], %[add_p6toq6] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p1] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p2] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p3] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p4] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p5] \n\t" "subu.ph %[res_oq6], %[res_oq6], %[p6] \n\t" "shrl.ph %[res_oq6], %[res_oq6], 4 \n\t" : [res_oq6] "=&r"(res_oq6), [res_oq5] "=&r"(res_oq5), [res_oq4] "=&r"(res_oq4), [res_oq3] "=&r"(res_oq3), [res_oq2] "=&r"(res_oq2), [res_oq1] "=&r"(res_oq1), [res_oq0] "=&r"(res_oq0), [tmp] "=&r"(tmp) : [q7] "r"(q7), [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3), [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [p1] "r"(p1), [p2] "r"(p2), [p3] "r"(p3), [p4] "r"(p4), [p5] "r"(p5), [p6] "r"(p6), [add_p6toq6] "r"(add_p6toq6)); *oq0 = res_oq0; *oq1 = res_oq1; *oq2 = res_oq2; *oq3 = res_oq3; *oq4 = res_oq4; *oq5 = res_oq5; *oq6 = res_oq6; } #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_LOOPFILTER_FILTERS_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/loopfilter_macros_dspr2.h000066400000000000000000000711771357355204000225100ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ #define VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 #define STORE_F0() \ { \ __asm__ __volatile__( \ "sb %[q1_f0], 1(%[s4]) \n\t" \ "sb %[q0_f0], 0(%[s4]) \n\t" \ "sb %[p0_f0], -1(%[s4]) \n\t" \ "sb %[p1_f0], -2(%[s4]) \n\t" \ \ : \ : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \ [p1_f0] "r"(p1_f0), [s4] "r"(s4)); \ \ __asm__ __volatile__( \ "srl %[q1_f0], %[q1_f0], 8 \n\t" \ "srl %[q0_f0], %[q0_f0], 8 \n\t" \ "srl %[p0_f0], %[p0_f0], 8 \n\t" \ "srl %[p1_f0], %[p1_f0], 8 \n\t" \ \ : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \ [p1_f0] "+r"(p1_f0) \ :); \ \ __asm__ __volatile__( \ "sb %[q1_f0], 1(%[s3]) \n\t" \ "sb %[q0_f0], 0(%[s3]) \n\t" \ "sb %[p0_f0], -1(%[s3]) \n\t" \ "sb %[p1_f0], -2(%[s3]) \n\t" \ \ : [p1_f0] "+r"(p1_f0) \ : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [s3] "r"(s3), \ [p0_f0] "r"(p0_f0)); \ \ __asm__ __volatile__( \ "srl %[q1_f0], %[q1_f0], 8 \n\t" \ "srl %[q0_f0], %[q0_f0], 8 \n\t" \ "srl %[p0_f0], %[p0_f0], 8 \n\t" \ "srl %[p1_f0], %[p1_f0], 8 \n\t" \ \ : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \ [p1_f0] "+r"(p1_f0) \ :); \ \ __asm__ __volatile__( \ "sb %[q1_f0], 1(%[s2]) \n\t" \ "sb %[q0_f0], 0(%[s2]) \n\t" \ "sb %[p0_f0], -1(%[s2]) \n\t" \ "sb %[p1_f0], -2(%[s2]) \n\t" \ \ : \ : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \ [p1_f0] "r"(p1_f0), [s2] "r"(s2)); \ \ __asm__ __volatile__( \ "srl %[q1_f0], %[q1_f0], 8 \n\t" \ "srl %[q0_f0], %[q0_f0], 8 \n\t" \ "srl %[p0_f0], %[p0_f0], 8 \n\t" \ "srl %[p1_f0], %[p1_f0], 8 \n\t" \ \ : [q1_f0] "+r"(q1_f0), [q0_f0] "+r"(q0_f0), [p0_f0] "+r"(p0_f0), \ [p1_f0] "+r"(p1_f0) \ :); \ \ __asm__ __volatile__( \ "sb %[q1_f0], 1(%[s1]) \n\t" \ "sb %[q0_f0], 0(%[s1]) \n\t" \ "sb %[p0_f0], -1(%[s1]) \n\t" \ "sb %[p1_f0], -2(%[s1]) \n\t" \ \ : \ : [q1_f0] "r"(q1_f0), [q0_f0] "r"(q0_f0), [p0_f0] "r"(p0_f0), \ [p1_f0] "r"(p1_f0), [s1] "r"(s1)); \ } #define STORE_F1() \ { \ __asm__ __volatile__( \ "sb %[q2_r], 2(%[s4]) \n\t" \ "sb %[q1_r], 1(%[s4]) \n\t" \ "sb %[q0_r], 0(%[s4]) \n\t" \ "sb %[p0_r], -1(%[s4]) \n\t" \ "sb %[p1_r], -2(%[s4]) \n\t" \ "sb %[p2_r], -3(%[s4]) \n\t" \ \ : \ : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \ [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s4] "r"(s4)); \ \ __asm__ __volatile__( \ "srl %[q2_r], %[q2_r], 16 \n\t" \ "srl %[q1_r], %[q1_r], 16 \n\t" \ "srl %[q0_r], %[q0_r], 16 \n\t" \ "srl %[p0_r], %[p0_r], 16 \n\t" \ "srl %[p1_r], %[p1_r], 16 \n\t" \ "srl %[p2_r], %[p2_r], 16 \n\t" \ \ : [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), [q0_r] "+r"(q0_r), \ [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), [p2_r] "+r"(p2_r) \ :); \ \ __asm__ __volatile__( \ "sb %[q2_r], 2(%[s3]) \n\t" \ "sb %[q1_r], 1(%[s3]) \n\t" \ "sb %[q0_r], 0(%[s3]) \n\t" \ "sb %[p0_r], -1(%[s3]) \n\t" \ "sb %[p1_r], -2(%[s3]) \n\t" \ "sb %[p2_r], -3(%[s3]) \n\t" \ \ : \ : [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), [q0_r] "r"(q0_r), \ [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), [p2_r] "r"(p2_r), [s3] "r"(s3)); \ \ __asm__ __volatile__( \ "sb %[q2_l], 2(%[s2]) \n\t" \ "sb %[q1_l], 1(%[s2]) \n\t" \ "sb %[q0_l], 0(%[s2]) \n\t" \ "sb %[p0_l], -1(%[s2]) \n\t" \ "sb %[p1_l], -2(%[s2]) \n\t" \ "sb %[p2_l], -3(%[s2]) \n\t" \ \ : \ : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \ [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s2] "r"(s2)); \ \ __asm__ __volatile__( \ "srl %[q2_l], %[q2_l], 16 \n\t" \ "srl %[q1_l], %[q1_l], 16 \n\t" \ "srl %[q0_l], %[q0_l], 16 \n\t" \ "srl %[p0_l], %[p0_l], 16 \n\t" \ "srl %[p1_l], %[p1_l], 16 \n\t" \ "srl %[p2_l], %[p2_l], 16 \n\t" \ \ : [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), [q0_l] "+r"(q0_l), \ [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), [p2_l] "+r"(p2_l) \ :); \ \ __asm__ __volatile__( \ "sb %[q2_l], 2(%[s1]) \n\t" \ "sb %[q1_l], 1(%[s1]) \n\t" \ "sb %[q0_l], 0(%[s1]) \n\t" \ "sb %[p0_l], -1(%[s1]) \n\t" \ "sb %[p1_l], -2(%[s1]) \n\t" \ "sb %[p2_l], -3(%[s1]) \n\t" \ \ : \ : [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), [q0_l] "r"(q0_l), \ [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), [p2_l] "r"(p2_l), [s1] "r"(s1)); \ } #define STORE_F2() \ { \ __asm__ __volatile__( \ "sb %[q6_r], 6(%[s4]) \n\t" \ "sb %[q5_r], 5(%[s4]) \n\t" \ "sb %[q4_r], 4(%[s4]) \n\t" \ "sb %[q3_r], 3(%[s4]) \n\t" \ "sb %[q2_r], 2(%[s4]) \n\t" \ "sb %[q1_r], 1(%[s4]) \n\t" \ "sb %[q0_r], 0(%[s4]) \n\t" \ "sb %[p0_r], -1(%[s4]) \n\t" \ "sb %[p1_r], -2(%[s4]) \n\t" \ "sb %[p2_r], -3(%[s4]) \n\t" \ "sb %[p3_r], -4(%[s4]) \n\t" \ "sb %[p4_r], -5(%[s4]) \n\t" \ "sb %[p5_r], -6(%[s4]) \n\t" \ "sb %[p6_r], -7(%[s4]) \n\t" \ \ : \ : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \ [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \ [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \ [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \ [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s4] "r"(s4)); \ \ __asm__ __volatile__( \ "srl %[q6_r], %[q6_r], 16 \n\t" \ "srl %[q5_r], %[q5_r], 16 \n\t" \ "srl %[q4_r], %[q4_r], 16 \n\t" \ "srl %[q3_r], %[q3_r], 16 \n\t" \ "srl %[q2_r], %[q2_r], 16 \n\t" \ "srl %[q1_r], %[q1_r], 16 \n\t" \ "srl %[q0_r], %[q0_r], 16 \n\t" \ "srl %[p0_r], %[p0_r], 16 \n\t" \ "srl %[p1_r], %[p1_r], 16 \n\t" \ "srl %[p2_r], %[p2_r], 16 \n\t" \ "srl %[p3_r], %[p3_r], 16 \n\t" \ "srl %[p4_r], %[p4_r], 16 \n\t" \ "srl %[p5_r], %[p5_r], 16 \n\t" \ "srl %[p6_r], %[p6_r], 16 \n\t" \ \ : [q6_r] "+r"(q6_r), [q5_r] "+r"(q5_r), [q4_r] "+r"(q4_r), \ [q3_r] "+r"(q3_r), [q2_r] "+r"(q2_r), [q1_r] "+r"(q1_r), \ [q0_r] "+r"(q0_r), [p0_r] "+r"(p0_r), [p1_r] "+r"(p1_r), \ [p2_r] "+r"(p2_r), [p3_r] "+r"(p3_r), [p4_r] "+r"(p4_r), \ [p5_r] "+r"(p5_r), [p6_r] "+r"(p6_r) \ :); \ \ __asm__ __volatile__( \ "sb %[q6_r], 6(%[s3]) \n\t" \ "sb %[q5_r], 5(%[s3]) \n\t" \ "sb %[q4_r], 4(%[s3]) \n\t" \ "sb %[q3_r], 3(%[s3]) \n\t" \ "sb %[q2_r], 2(%[s3]) \n\t" \ "sb %[q1_r], 1(%[s3]) \n\t" \ "sb %[q0_r], 0(%[s3]) \n\t" \ "sb %[p0_r], -1(%[s3]) \n\t" \ "sb %[p1_r], -2(%[s3]) \n\t" \ "sb %[p2_r], -3(%[s3]) \n\t" \ "sb %[p3_r], -4(%[s3]) \n\t" \ "sb %[p4_r], -5(%[s3]) \n\t" \ "sb %[p5_r], -6(%[s3]) \n\t" \ "sb %[p6_r], -7(%[s3]) \n\t" \ \ : \ : [q6_r] "r"(q6_r), [q5_r] "r"(q5_r), [q4_r] "r"(q4_r), \ [q3_r] "r"(q3_r), [q2_r] "r"(q2_r), [q1_r] "r"(q1_r), \ [q0_r] "r"(q0_r), [p0_r] "r"(p0_r), [p1_r] "r"(p1_r), \ [p2_r] "r"(p2_r), [p3_r] "r"(p3_r), [p4_r] "r"(p4_r), \ [p5_r] "r"(p5_r), [p6_r] "r"(p6_r), [s3] "r"(s3)); \ \ __asm__ __volatile__( \ "sb %[q6_l], 6(%[s2]) \n\t" \ "sb %[q5_l], 5(%[s2]) \n\t" \ "sb %[q4_l], 4(%[s2]) \n\t" \ "sb %[q3_l], 3(%[s2]) \n\t" \ "sb %[q2_l], 2(%[s2]) \n\t" \ "sb %[q1_l], 1(%[s2]) \n\t" \ "sb %[q0_l], 0(%[s2]) \n\t" \ "sb %[p0_l], -1(%[s2]) \n\t" \ "sb %[p1_l], -2(%[s2]) \n\t" \ "sb %[p2_l], -3(%[s2]) \n\t" \ "sb %[p3_l], -4(%[s2]) \n\t" \ "sb %[p4_l], -5(%[s2]) \n\t" \ "sb %[p5_l], -6(%[s2]) \n\t" \ "sb %[p6_l], -7(%[s2]) \n\t" \ \ : \ : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \ [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \ [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \ [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \ [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s2] "r"(s2)); \ \ __asm__ __volatile__( \ "srl %[q6_l], %[q6_l], 16 \n\t" \ "srl %[q5_l], %[q5_l], 16 \n\t" \ "srl %[q4_l], %[q4_l], 16 \n\t" \ "srl %[q3_l], %[q3_l], 16 \n\t" \ "srl %[q2_l], %[q2_l], 16 \n\t" \ "srl %[q1_l], %[q1_l], 16 \n\t" \ "srl %[q0_l], %[q0_l], 16 \n\t" \ "srl %[p0_l], %[p0_l], 16 \n\t" \ "srl %[p1_l], %[p1_l], 16 \n\t" \ "srl %[p2_l], %[p2_l], 16 \n\t" \ "srl %[p3_l], %[p3_l], 16 \n\t" \ "srl %[p4_l], %[p4_l], 16 \n\t" \ "srl %[p5_l], %[p5_l], 16 \n\t" \ "srl %[p6_l], %[p6_l], 16 \n\t" \ \ : [q6_l] "+r"(q6_l), [q5_l] "+r"(q5_l), [q4_l] "+r"(q4_l), \ [q3_l] "+r"(q3_l), [q2_l] "+r"(q2_l), [q1_l] "+r"(q1_l), \ [q0_l] "+r"(q0_l), [p0_l] "+r"(p0_l), [p1_l] "+r"(p1_l), \ [p2_l] "+r"(p2_l), [p3_l] "+r"(p3_l), [p4_l] "+r"(p4_l), \ [p5_l] "+r"(p5_l), [p6_l] "+r"(p6_l) \ :); \ \ __asm__ __volatile__( \ "sb %[q6_l], 6(%[s1]) \n\t" \ "sb %[q5_l], 5(%[s1]) \n\t" \ "sb %[q4_l], 4(%[s1]) \n\t" \ "sb %[q3_l], 3(%[s1]) \n\t" \ "sb %[q2_l], 2(%[s1]) \n\t" \ "sb %[q1_l], 1(%[s1]) \n\t" \ "sb %[q0_l], 0(%[s1]) \n\t" \ "sb %[p0_l], -1(%[s1]) \n\t" \ "sb %[p1_l], -2(%[s1]) \n\t" \ "sb %[p2_l], -3(%[s1]) \n\t" \ "sb %[p3_l], -4(%[s1]) \n\t" \ "sb %[p4_l], -5(%[s1]) \n\t" \ "sb %[p5_l], -6(%[s1]) \n\t" \ "sb %[p6_l], -7(%[s1]) \n\t" \ \ : \ : [q6_l] "r"(q6_l), [q5_l] "r"(q5_l), [q4_l] "r"(q4_l), \ [q3_l] "r"(q3_l), [q2_l] "r"(q2_l), [q1_l] "r"(q1_l), \ [q0_l] "r"(q0_l), [p0_l] "r"(p0_l), [p1_l] "r"(p1_l), \ [p2_l] "r"(p2_l), [p3_l] "r"(p3_l), [p4_l] "r"(p4_l), \ [p5_l] "r"(p5_l), [p6_l] "r"(p6_l), [s1] "r"(s1)); \ } #define PACK_LEFT_0TO3() \ { \ __asm__ __volatile__( \ "preceu.ph.qbl %[p3_l], %[p3] \n\t" \ "preceu.ph.qbl %[p2_l], %[p2] \n\t" \ "preceu.ph.qbl %[p1_l], %[p1] \n\t" \ "preceu.ph.qbl %[p0_l], %[p0] \n\t" \ "preceu.ph.qbl %[q0_l], %[q0] \n\t" \ "preceu.ph.qbl %[q1_l], %[q1] \n\t" \ "preceu.ph.qbl %[q2_l], %[q2] \n\t" \ "preceu.ph.qbl %[q3_l], %[q3] \n\t" \ \ : [p3_l] "=&r"(p3_l), [p2_l] "=&r"(p2_l), [p1_l] "=&r"(p1_l), \ [p0_l] "=&r"(p0_l), [q0_l] "=&r"(q0_l), [q1_l] "=&r"(q1_l), \ [q2_l] "=&r"(q2_l), [q3_l] "=&r"(q3_l) \ : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \ [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \ } #define PACK_LEFT_4TO7() \ { \ __asm__ __volatile__( \ "preceu.ph.qbl %[p7_l], %[p7] \n\t" \ "preceu.ph.qbl %[p6_l], %[p6] \n\t" \ "preceu.ph.qbl %[p5_l], %[p5] \n\t" \ "preceu.ph.qbl %[p4_l], %[p4] \n\t" \ "preceu.ph.qbl %[q4_l], %[q4] \n\t" \ "preceu.ph.qbl %[q5_l], %[q5] \n\t" \ "preceu.ph.qbl %[q6_l], %[q6] \n\t" \ "preceu.ph.qbl %[q7_l], %[q7] \n\t" \ \ : [p7_l] "=&r"(p7_l), [p6_l] "=&r"(p6_l), [p5_l] "=&r"(p5_l), \ [p4_l] "=&r"(p4_l), [q4_l] "=&r"(q4_l), [q5_l] "=&r"(q5_l), \ [q6_l] "=&r"(q6_l), [q7_l] "=&r"(q7_l) \ : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \ [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \ } #define PACK_RIGHT_0TO3() \ { \ __asm__ __volatile__( \ "preceu.ph.qbr %[p3_r], %[p3] \n\t" \ "preceu.ph.qbr %[p2_r], %[p2] \n\t" \ "preceu.ph.qbr %[p1_r], %[p1] \n\t" \ "preceu.ph.qbr %[p0_r], %[p0] \n\t" \ "preceu.ph.qbr %[q0_r], %[q0] \n\t" \ "preceu.ph.qbr %[q1_r], %[q1] \n\t" \ "preceu.ph.qbr %[q2_r], %[q2] \n\t" \ "preceu.ph.qbr %[q3_r], %[q3] \n\t" \ \ : [p3_r] "=&r"(p3_r), [p2_r] "=&r"(p2_r), [p1_r] "=&r"(p1_r), \ [p0_r] "=&r"(p0_r), [q0_r] "=&r"(q0_r), [q1_r] "=&r"(q1_r), \ [q2_r] "=&r"(q2_r), [q3_r] "=&r"(q3_r) \ : [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), \ [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3)); \ } #define PACK_RIGHT_4TO7() \ { \ __asm__ __volatile__( \ "preceu.ph.qbr %[p7_r], %[p7] \n\t" \ "preceu.ph.qbr %[p6_r], %[p6] \n\t" \ "preceu.ph.qbr %[p5_r], %[p5] \n\t" \ "preceu.ph.qbr %[p4_r], %[p4] \n\t" \ "preceu.ph.qbr %[q4_r], %[q4] \n\t" \ "preceu.ph.qbr %[q5_r], %[q5] \n\t" \ "preceu.ph.qbr %[q6_r], %[q6] \n\t" \ "preceu.ph.qbr %[q7_r], %[q7] \n\t" \ \ : [p7_r] "=&r"(p7_r), [p6_r] "=&r"(p6_r), [p5_r] "=&r"(p5_r), \ [p4_r] "=&r"(p4_r), [q4_r] "=&r"(q4_r), [q5_r] "=&r"(q5_r), \ [q6_r] "=&r"(q6_r), [q7_r] "=&r"(q7_r) \ : [p7] "r"(p7), [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), \ [q4] "r"(q4), [q5] "r"(q5), [q6] "r"(q6), [q7] "r"(q7)); \ } #define COMBINE_LEFT_RIGHT_0TO2() \ { \ __asm__ __volatile__( \ "precr.qb.ph %[p2], %[p2_l], %[p2_r] \n\t" \ "precr.qb.ph %[p1], %[p1_l], %[p1_r] \n\t" \ "precr.qb.ph %[p0], %[p0_l], %[p0_r] \n\t" \ "precr.qb.ph %[q0], %[q0_l], %[q0_r] \n\t" \ "precr.qb.ph %[q1], %[q1_l], %[q1_r] \n\t" \ "precr.qb.ph %[q2], %[q2_l], %[q2_r] \n\t" \ \ : [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), \ [q1] "=&r"(q1), [q2] "=&r"(q2) \ : [p2_l] "r"(p2_l), [p2_r] "r"(p2_r), [p1_l] "r"(p1_l), \ [p1_r] "r"(p1_r), [p0_l] "r"(p0_l), [p0_r] "r"(p0_r), \ [q0_l] "r"(q0_l), [q0_r] "r"(q0_r), [q1_l] "r"(q1_l), \ [q1_r] "r"(q1_r), [q2_l] "r"(q2_l), [q2_r] "r"(q2_r)); \ } #define COMBINE_LEFT_RIGHT_3TO6() \ { \ __asm__ __volatile__( \ "precr.qb.ph %[p6], %[p6_l], %[p6_r] \n\t" \ "precr.qb.ph %[p5], %[p5_l], %[p5_r] \n\t" \ "precr.qb.ph %[p4], %[p4_l], %[p4_r] \n\t" \ "precr.qb.ph %[p3], %[p3_l], %[p3_r] \n\t" \ "precr.qb.ph %[q3], %[q3_l], %[q3_r] \n\t" \ "precr.qb.ph %[q4], %[q4_l], %[q4_r] \n\t" \ "precr.qb.ph %[q5], %[q5_l], %[q5_r] \n\t" \ "precr.qb.ph %[q6], %[q6_l], %[q6_r] \n\t" \ \ : [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4), [p3] "=&r"(p3), \ [q3] "=&r"(q3), [q4] "=&r"(q4), [q5] "=&r"(q5), [q6] "=&r"(q6) \ : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), \ [p3_l] "r"(p3_l), [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), \ [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [q3_l] "r"(q3_l), \ [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), \ [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), \ [q6_r] "r"(q6_r)); \ } #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MACROS_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/loopfilter_masks_dspr2.h000066400000000000000000000412561357355204000223350ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ #define VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #ifdef __cplusplus extern "C" { #endif #if HAVE_DSPR2 /* processing 4 pixels at the same time * compute hev and mask in the same function */ static INLINE void filter_hev_mask_dspr2(uint32_t limit, uint32_t flimit, uint32_t p1, uint32_t p0, uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t thresh, uint32_t *hev, uint32_t *mask) { uint32_t c, r, r3, r_k; uint32_t s1, s2, s3; uint32_t ones = 0xFFFFFFFF; uint32_t hev1; __asm__ __volatile__( /* mask |= (abs(p3 - p2) > limit) */ "subu_s.qb %[c], %[p3], %[p2] \n\t" "subu_s.qb %[r_k], %[p2], %[p3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], $0, %[c] \n\t" /* mask |= (abs(p2 - p1) > limit) */ "subu_s.qb %[c], %[p2], %[p1] \n\t" "subu_s.qb %[r_k], %[p1], %[p2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(p1 - p0) > limit) * hev |= (abs(p1 - p0) > thresh) */ "subu_s.qb %[c], %[p1], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], $0, %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(q1 - q0) > limit) * hev |= (abs(q1 - q0) > thresh) */ "subu_s.qb %[c], %[q1], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], %[r3], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(q2 - q1) > limit) */ "subu_s.qb %[c], %[q2], %[q1] \n\t" "subu_s.qb %[r_k], %[q1], %[q2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r3], %[r3], 24 \n\t" /* mask |= (abs(q3 - q2) > limit) */ "subu_s.qb %[c], %[q3], %[q2] \n\t" "subu_s.qb %[r_k], %[q2], %[q3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3) : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3), [thresh] "r"(thresh)); __asm__ __volatile__( /* abs(p0 - q0) */ "subu_s.qb %[c], %[p0], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[p0] \n\t" "wrdsp %[r3] \n\t" "or %[s1], %[r_k], %[c] \n\t" /* abs(p1 - q1) */ "subu_s.qb %[c], %[p1], %[q1] \n\t" "addu_s.qb %[s3], %[s1], %[s1] \n\t" "pick.qb %[hev1], %[ones], $0 \n\t" "subu_s.qb %[r_k], %[q1], %[p1] \n\t" "or %[s2], %[r_k], %[c] \n\t" /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ "shrl.qb %[s2], %[s2], 1 \n\t" "addu_s.qb %[s1], %[s2], %[s3] \n\t" "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r], %[r], 24 \n\t" "wrdsp %[r] \n\t" "pick.qb %[s2], $0, %[ones] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1), [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3) : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1), [ones] "r"(ones), [flimit] "r"(flimit)); *hev = hev1; *mask = s2; } static INLINE void filter_hev_mask_flatmask4_dspr2( uint32_t limit, uint32_t flimit, uint32_t thresh, uint32_t p1, uint32_t p0, uint32_t p3, uint32_t p2, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t *hev, uint32_t *mask, uint32_t *flat) { uint32_t c, r, r3, r_k, r_flat; uint32_t s1, s2, s3; uint32_t ones = 0xFFFFFFFF; uint32_t flat_thresh = 0x01010101; uint32_t hev1; uint32_t flat1; __asm__ __volatile__( /* mask |= (abs(p3 - p2) > limit) */ "subu_s.qb %[c], %[p3], %[p2] \n\t" "subu_s.qb %[r_k], %[p2], %[p3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], $0, %[c] \n\t" /* mask |= (abs(p2 - p1) > limit) */ "subu_s.qb %[c], %[p2], %[p1] \n\t" "subu_s.qb %[r_k], %[p1], %[p2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" /* mask |= (abs(p1 - p0) > limit) * hev |= (abs(p1 - p0) > thresh) * flat |= (abs(p1 - p0) > thresh) */ "subu_s.qb %[c], %[p1], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], $0, %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], $0, %[c] \n\t" /* mask |= (abs(q1 - q0) > limit) * hev |= (abs(q1 - q0) > thresh) * flat |= (abs(q1 - q0) > thresh) */ "subu_s.qb %[c], %[q1], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[thresh], %[r_k] \n\t" "or %[r3], %[r3], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(p0 - p2) > thresh) */ "subu_s.qb %[c], %[p0], %[p2] \n\t" "subu_s.qb %[r_k], %[p2], %[p0] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(q0 - q2) > thresh) */ "subu_s.qb %[c], %[q0], %[q2] \n\t" "subu_s.qb %[r_k], %[q2], %[q0] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(p3 - p0) > thresh) */ "subu_s.qb %[c], %[p3], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(q3 - q0) > thresh) */ "subu_s.qb %[c], %[q3], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" "sll %[r_flat], %[r_flat], 24 \n\t" /* look at stall here */ "wrdsp %[r_flat] \n\t" "pick.qb %[flat1], $0, %[ones] \n\t" /* mask |= (abs(q2 - q1) > limit) */ "subu_s.qb %[c], %[q2], %[q1] \n\t" "subu_s.qb %[r_k], %[q1], %[q2] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r3], %[r3], 24 \n\t" /* mask |= (abs(q3 - q2) > limit) */ "subu_s.qb %[c], %[q3], %[q2] \n\t" "subu_s.qb %[r_k], %[q2], %[q3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[limit], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r3] "=&r"(r3), [r_flat] "=&r"(r_flat), [flat1] "=&r"(flat1) : [limit] "r"(limit), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q1] "r"(q1), [q0] "r"(q0), [q2] "r"(q2), [q3] "r"(q3), [thresh] "r"(thresh), [flat_thresh] "r"(flat_thresh), [ones] "r"(ones)); __asm__ __volatile__( /* abs(p0 - q0) */ "subu_s.qb %[c], %[p0], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[p0] \n\t" "wrdsp %[r3] \n\t" "or %[s1], %[r_k], %[c] \n\t" /* abs(p1 - q1) */ "subu_s.qb %[c], %[p1], %[q1] \n\t" "addu_s.qb %[s3], %[s1], %[s1] \n\t" "pick.qb %[hev1], %[ones], $0 \n\t" "subu_s.qb %[r_k], %[q1], %[p1] \n\t" "or %[s2], %[r_k], %[c] \n\t" /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > flimit * 2 + limit */ "shrl.qb %[s2], %[s2], 1 \n\t" "addu_s.qb %[s1], %[s2], %[s3] \n\t" "cmpgu.lt.qb %[c], %[flimit], %[s1] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r], %[r], 24 \n\t" "wrdsp %[r] \n\t" "pick.qb %[s2], $0, %[ones] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [s1] "=&r"(s1), [hev1] "=&r"(hev1), [s2] "=&r"(s2), [r] "+r"(r), [s3] "=&r"(s3) : [p0] "r"(p0), [q0] "r"(q0), [p1] "r"(p1), [r3] "r"(r3), [q1] "r"(q1), [ones] "r"(ones), [flimit] "r"(flimit)); *hev = hev1; *mask = s2; *flat = flat1; } static INLINE void flatmask5(uint32_t p4, uint32_t p3, uint32_t p2, uint32_t p1, uint32_t p0, uint32_t q0, uint32_t q1, uint32_t q2, uint32_t q3, uint32_t q4, uint32_t *flat2) { uint32_t c, r, r_k, r_flat; uint32_t ones = 0xFFFFFFFF; uint32_t flat_thresh = 0x01010101; uint32_t flat1, flat3; __asm__ __volatile__( /* flat |= (abs(p4 - p0) > thresh) */ "subu_s.qb %[c], %[p4], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p4] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r], $0, %[c] \n\t" /* flat |= (abs(q4 - q0) > thresh) */ "subu_s.qb %[c], %[q4], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q4] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r], %[r], %[c] \n\t" "sll %[r], %[r], 24 \n\t" "wrdsp %[r] \n\t" "pick.qb %[flat3], $0, %[ones] \n\t" /* flat |= (abs(p1 - p0) > thresh) */ "subu_s.qb %[c], %[p1], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], $0, %[c] \n\t" /* flat |= (abs(q1 - q0) > thresh) */ "subu_s.qb %[c], %[q1], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q1] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(p0 - p2) > thresh) */ "subu_s.qb %[c], %[p0], %[p2] \n\t" "subu_s.qb %[r_k], %[p2], %[p0] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(q0 - q2) > thresh) */ "subu_s.qb %[c], %[q0], %[q2] \n\t" "subu_s.qb %[r_k], %[q2], %[q0] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(p3 - p0) > thresh) */ "subu_s.qb %[c], %[p3], %[p0] \n\t" "subu_s.qb %[r_k], %[p0], %[p3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" /* flat |= (abs(q3 - q0) > thresh) */ "subu_s.qb %[c], %[q3], %[q0] \n\t" "subu_s.qb %[r_k], %[q0], %[q3] \n\t" "or %[r_k], %[r_k], %[c] \n\t" "cmpgu.lt.qb %[c], %[flat_thresh], %[r_k] \n\t" "or %[r_flat], %[r_flat], %[c] \n\t" "sll %[r_flat], %[r_flat], 24 \n\t" "wrdsp %[r_flat] \n\t" "pick.qb %[flat1], $0, %[ones] \n\t" /* flat & flatmask4(thresh, p3, p2, p1, p0, q0, q1, q2, q3) */ "and %[flat1], %[flat3], %[flat1] \n\t" : [c] "=&r"(c), [r_k] "=&r"(r_k), [r] "=&r"(r), [r_flat] "=&r"(r_flat), [flat1] "=&r"(flat1), [flat3] "=&r"(flat3) : [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [q3] "r"(q3), [q4] "r"(q4), [flat_thresh] "r"(flat_thresh), [ones] "r"(ones)); *flat2 = flat1; } #endif // #if HAVE_DSPR2 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MASKS_DSPR2_H_ libvpx-1.8.2/vpx_dsp/mips/loopfilter_mb_dspr2.c000066400000000000000000000563741357355204000216170ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/mips/common_dspr2.h" #include "vpx_dsp/mips/loopfilter_filters_dspr2.h" #include "vpx_dsp/mips/loopfilter_macros_dspr2.h" #include "vpx_dsp/mips/loopfilter_masks_dspr2.h" #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 void vpx_lpf_horizontal_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint32_t mask; uint32_t hev, flat; uint8_t i; uint8_t *sp3, *sp2, *sp1, *sp0, *sq0, *sq1, *sq2, *sq3; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uint32_t p1_f0, p0_f0, q0_f0, q1_f0; uint32_t p3, p2, p1, p0, q0, q1, q2, q3; uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l; uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); /* prefetch data for store */ prefetch_store(s); for (i = 0; i < 2; i++) { sp3 = s - (pitch << 2); sp2 = sp3 + pitch; sp1 = sp2 + pitch; sp0 = sp1 + pitch; sq0 = s; sq1 = s + pitch; sq2 = sq1 + pitch; sq3 = sq2 + pitch; __asm__ __volatile__( "lw %[p3], (%[sp3]) \n\t" "lw %[p2], (%[sp2]) \n\t" "lw %[p1], (%[sp1]) \n\t" "lw %[p0], (%[sp0]) \n\t" "lw %[q0], (%[sq0]) \n\t" "lw %[q1], (%[sq1]) \n\t" "lw %[q2], (%[sq2]) \n\t" "lw %[q3], (%[sq3]) \n\t" : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0) : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0)); filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0, p3, p2, q0, q1, q2, q3, &hev, &mask, &flat); if ((flat == 0) && (mask != 0)) { filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); __asm__ __volatile__( "sw %[p1_f0], (%[sp1]) \n\t" "sw %[p0_f0], (%[sp0]) \n\t" "sw %[q0_f0], (%[sq0]) \n\t" "sw %[q1_f0], (%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } else if ((mask & flat) == 0xFFFFFFFF) { /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); COMBINE_LEFT_RIGHT_0TO2() __asm__ __volatile__( "sw %[p2], (%[sp2]) \n\t" "sw %[p1], (%[sp1]) \n\t" "sw %[p0], (%[sp0]) \n\t" "sw %[q0], (%[sq0]) \n\t" "sw %[q1], (%[sq1]) \n\t" "sw %[q2], (%[sq2]) \n\t" : : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if ((flat != 0) && (mask != 0)) { /* filtering */ filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r], (%[sp2]) \n\t" "sb %[p1_r], (%[sp1]) \n\t" "sb %[p0_r], (%[sp0]) \n\t" "sb %[q0_r], (%[sq0]) \n\t" "sb %[q1_r], (%[sq1]) \n\t" "sb %[q2_r], (%[sq2]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], (%[sp1]) \n\t" "sb %[p0_f0], (%[sp0]) \n\t" "sb %[q0_f0], (%[sq0]) \n\t" "sb %[q1_f0], (%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r), [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r], +1(%[sp2]) \n\t" "sb %[p1_r], +1(%[sp1]) \n\t" "sb %[p0_r], +1(%[sp0]) \n\t" "sb %[q0_r], +1(%[sq0]) \n\t" "sb %[q1_r], +1(%[sq1]) \n\t" "sb %[q2_r], +1(%[sq2]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], +1(%[sp1]) \n\t" "sb %[p0_f0], +1(%[sp0]) \n\t" "sb %[q0_f0], +1(%[sq0]) \n\t" "sb %[q1_f0], +1(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0), [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l], +2(%[sp2]) \n\t" "sb %[p1_l], +2(%[sp1]) \n\t" "sb %[p0_l], +2(%[sp0]) \n\t" "sb %[q0_l], +2(%[sq0]) \n\t" "sb %[q1_l], +2(%[sq1]) \n\t" "sb %[q2_l], +2(%[sq2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], +2(%[sp1]) \n\t" "sb %[p0_f0], +2(%[sp0]) \n\t" "sb %[q0_f0], +2(%[sq0]) \n\t" "sb %[q1_f0], +2(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l), [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l], +3(%[sp2]) \n\t" "sb %[p1_l], +3(%[sp1]) \n\t" "sb %[p0_l], +3(%[sp0]) \n\t" "sb %[q0_l], +3(%[sq0]) \n\t" "sb %[q1_l], +3(%[sq1]) \n\t" "sb %[q2_l], +3(%[sq2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], +3(%[sp1]) \n\t" "sb %[p0_f0], +3(%[sp0]) \n\t" "sb %[q0_f0], +3(%[sq0]) \n\t" "sb %[q1_f0], +3(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } } s = s + 4; } } void vpx_lpf_vertical_8_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8_t i; uint32_t mask, hev, flat; uint8_t *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uint32_t p3, p2, p1, p0, q3, q2, q1, q0; uint32_t p1_f0, p0_f0, q0_f0, q1_f0; uint32_t p0_l, p1_l, p2_l, p3_l, q0_l, q1_l, q2_l, q3_l; uint32_t p0_r, p1_r, p2_r, p3_r, q0_r, q1_r, q2_r, q3_r; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; s2 = s + pitch; s3 = s2 + pitch; s4 = s3 + pitch; s = s4 + pitch; __asm__ __volatile__( "lw %[p0], -4(%[s1]) \n\t" "lw %[p1], -4(%[s2]) \n\t" "lw %[p2], -4(%[s3]) \n\t" "lw %[p3], -4(%[s4]) \n\t" "lw %[q3], (%[s1]) \n\t" "lw %[q2], (%[s2]) \n\t" "lw %[q1], (%[s3]) \n\t" "lw %[q0], (%[s4]) \n\t" : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [q0] "=&r"(q0), [q1] "=&r"(q1), [q2] "=&r"(q2), [q3] "=&r"(q3) : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); /* transpose p3, p2, p1, p0 original (when loaded from memory) register -4 -3 -2 -1 p0 p0_0 p0_1 p0_2 p0_3 p1 p1_0 p1_1 p1_2 p1_3 p2 p2_0 p2_1 p2_2 p2_3 p3 p3_0 p3_1 p3_2 p3_3 after transpose register p0 p3_3 p2_3 p1_3 p0_3 p1 p3_2 p2_2 p1_2 p0_2 p2 p3_1 p2_1 p1_1 p0_1 p3 p3_0 p2_0 p1_0 p0_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p0], %[p1] \n\t" "precr.qb.ph %[prim2], %[p0], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p2], %[p3] \n\t" "precr.qb.ph %[prim4], %[p2], %[p3] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p0], %[p1], %[sec3] \n\t" "precrq.ph.w %[p2], %[p3], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose q0, q1, q2, q3 original (when loaded from memory) register +1 +2 +3 +4 q3 q3_0 q3_1 q3_2 q3_3 q2 q2_0 q2_1 q2_2 q2_3 q1 q1_0 q1_1 q1_2 q1_3 q0 q0_0 q0_1 q0_2 q0_3 after transpose register q3 q0_3 q1_3 q2_3 q3_3 q2 q0_2 q1_2 q2_2 q3_2 q1 q0_1 q1_1 q2_1 q3_1 q0 q0_0 q1_0 q2_0 q3_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[q3], %[q2] \n\t" "precr.qb.ph %[prim2], %[q3], %[q2] \n\t" "precrq.qb.ph %[prim3], %[q1], %[q0] \n\t" "precr.qb.ph %[prim4], %[q1], %[q0] \n\t" "precrq.qb.ph %[q2], %[prim1], %[prim2] \n\t" "precr.qb.ph %[q0], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[q3], %[q2], %[sec3] \n\t" "precrq.ph.w %[q1], %[q0], %[sec4] \n\t" "append %[q2], %[sec3], 16 \n\t" "append %[q0], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1), [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0, p3, p2, q0, q1, q2, q3, &hev, &mask, &flat); if ((flat == 0) && (mask != 0)) { filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); STORE_F0() } else if ((mask & flat) == 0xFFFFFFFF) { /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); STORE_F1() } else if ((flat != 0) && (mask != 0)) { filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r], -3(%[s4]) \n\t" "sb %[p1_r], -2(%[s4]) \n\t" "sb %[p0_r], -1(%[s4]) \n\t" "sb %[q0_r], (%[s4]) \n\t" "sb %[q1_r], +1(%[s4]) \n\t" "sb %[q2_r], +2(%[s4]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [s4] "r"(s4)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s4]) \n\t" "sb %[p0_f0], -1(%[s4]) \n\t" "sb %[q0_f0], (%[s4]) \n\t" "sb %[q1_f0], +1(%[s4]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s4] "r"(s4)); } __asm__ __volatile__( "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r), [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r], -3(%[s3]) \n\t" "sb %[p1_r], -2(%[s3]) \n\t" "sb %[p0_r], -1(%[s3]) \n\t" "sb %[q0_r], (%[s3]) \n\t" "sb %[q1_r], +1(%[s3]) \n\t" "sb %[q2_r], +2(%[s3]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [s3] "r"(s3)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s3]) \n\t" "sb %[p0_f0], -1(%[s3]) \n\t" "sb %[q0_f0], (%[s3]) \n\t" "sb %[q1_f0], +1(%[s3]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s3] "r"(s3)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2] "+r"(p2), [p1] "+r"(p1), [p0] "+r"(p0), [q0] "+r"(q0), [q1] "+r"(q1), [q2] "+r"(q2), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l], -3(%[s2]) \n\t" "sb %[p1_l], -2(%[s2]) \n\t" "sb %[p0_l], -1(%[s2]) \n\t" "sb %[q0_l], (%[s2]) \n\t" "sb %[q1_l], +1(%[s2]) \n\t" "sb %[q2_l], +2(%[s2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [s2] "r"(s2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s2]) \n\t" "sb %[p0_f0], -1(%[s2]) \n\t" "sb %[q0_f0], (%[s2]) \n\t" "sb %[q1_f0], +1(%[s2]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s2] "r"(s2)); } __asm__ __volatile__( "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l), [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l], -3(%[s1]) \n\t" "sb %[p1_l], -2(%[s1]) \n\t" "sb %[p0_l], -1(%[s1]) \n\t" "sb %[q0_l], (%[s1]) \n\t" "sb %[q1_l], +1(%[s1]) \n\t" "sb %[q2_l], +2(%[s1]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [s1] "r"(s1)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s1]) \n\t" "sb %[p0_f0], -1(%[s1]) \n\t" "sb %[q0_f0], (%[s1]) \n\t" "sb %[q1_f0], +1(%[s1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s1] "r"(s1)); } } } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/loopfilter_mb_horiz_dspr2.c000066400000000000000000000765361357355204000230340ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/mips/common_dspr2.h" #include "vpx_dsp/mips/loopfilter_filters_dspr2.h" #include "vpx_dsp/mips/loopfilter_macros_dspr2.h" #include "vpx_dsp/mips/loopfilter_masks_dspr2.h" #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 static void mb_lpf_horizontal_edge(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int count) { uint32_t mask; uint32_t hev, flat, flat2; uint8_t i; uint8_t *sp7, *sp6, *sp5, *sp4, *sp3, *sp2, *sp1, *sp0; uint8_t *sq0, *sq1, *sq2, *sq3, *sq4, *sq5, *sq6, *sq7; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; uint32_t p1_f0, p0_f0, q0_f0, q1_f0; uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l; uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l; uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r; uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r; uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1; uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); /* prefetch data for store */ prefetch_store(s); for (i = 0; i < (2 * count); i++) { sp7 = s - (pitch << 3); sp6 = sp7 + pitch; sp5 = sp6 + pitch; sp4 = sp5 + pitch; sp3 = sp4 + pitch; sp2 = sp3 + pitch; sp1 = sp2 + pitch; sp0 = sp1 + pitch; sq0 = s; sq1 = s + pitch; sq2 = sq1 + pitch; sq3 = sq2 + pitch; sq4 = sq3 + pitch; sq5 = sq4 + pitch; sq6 = sq5 + pitch; sq7 = sq6 + pitch; __asm__ __volatile__( "lw %[p7], (%[sp7]) \n\t" "lw %[p6], (%[sp6]) \n\t" "lw %[p5], (%[sp5]) \n\t" "lw %[p4], (%[sp4]) \n\t" "lw %[p3], (%[sp3]) \n\t" "lw %[p2], (%[sp2]) \n\t" "lw %[p1], (%[sp1]) \n\t" "lw %[p0], (%[sp0]) \n\t" : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4) : [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sp4] "r"(sp4), [sp5] "r"(sp5), [sp6] "r"(sp6), [sp7] "r"(sp7)); __asm__ __volatile__( "lw %[q0], (%[sq0]) \n\t" "lw %[q1], (%[sq1]) \n\t" "lw %[q2], (%[sq2]) \n\t" "lw %[q3], (%[sq3]) \n\t" "lw %[q4], (%[sq4]) \n\t" "lw %[q5], (%[sq5]) \n\t" "lw %[q6], (%[sq6]) \n\t" "lw %[q7], (%[sq7]) \n\t" : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0), [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4) : [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6), [sq7] "r"(sq7)); filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0, p3, p2, q0, q1, q2, q3, &hev, &mask, &flat); flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); /* f0 */ if (((flat2 == 0) && (flat == 0) && (mask != 0)) || ((flat2 != 0) && (flat == 0) && (mask != 0))) { filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); __asm__ __volatile__( "sw %[p1_f0], (%[sp1]) \n\t" "sw %[p0_f0], (%[sp0]) \n\t" "sw %[q0_f0], (%[sq0]) \n\t" "sw %[q1_f0], (%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f2 */ PACK_LEFT_0TO3() PACK_LEFT_4TO7() wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_0TO3() PACK_RIGHT_4TO7() wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r, &q6_r, &q7_r); COMBINE_LEFT_RIGHT_0TO2() COMBINE_LEFT_RIGHT_3TO6() __asm__ __volatile__( "sw %[p6], (%[sp6]) \n\t" "sw %[p5], (%[sp5]) \n\t" "sw %[p4], (%[sp4]) \n\t" "sw %[p3], (%[sp3]) \n\t" "sw %[p2], (%[sp2]) \n\t" "sw %[p1], (%[sp1]) \n\t" "sw %[p0], (%[sp0]) \n\t" : : [p6] "r"(p6), [p5] "r"(p5), [p4] "r"(p4), [p3] "r"(p3), [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0)); __asm__ __volatile__( "sw %[q6], (%[sq6]) \n\t" "sw %[q5], (%[sq5]) \n\t" "sw %[q4], (%[sq4]) \n\t" "sw %[q3], (%[sq3]) \n\t" "sw %[q2], (%[sq2]) \n\t" "sw %[q1], (%[sq1]) \n\t" "sw %[q0], (%[sq0]) \n\t" : : [q6] "r"(q6), [q5] "r"(q5), [q4] "r"(q4), [q3] "r"(q3), [q2] "r"(q2), [q1] "r"(q1), [q0] "r"(q0), [sq6] "r"(sq6), [sq5] "r"(sq5), [sq4] "r"(sq4), [sq3] "r"(sq3), [sq2] "r"(sq2), [sq1] "r"(sq1), [sq0] "r"(sq0)); } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f1 */ /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); COMBINE_LEFT_RIGHT_0TO2() __asm__ __volatile__( "sw %[p2], (%[sp2]) \n\t" "sw %[p1], (%[sp1]) \n\t" "sw %[p0], (%[sp0]) \n\t" "sw %[q0], (%[sq0]) \n\t" "sw %[q1], (%[sq1]) \n\t" "sw %[q2], (%[sq2]) \n\t" : : [p2] "r"(p2), [p1] "r"(p1), [p0] "r"(p0), [q0] "r"(q0), [q1] "r"(q1), [q2] "r"(q2), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) { /* f0+f1 */ filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r], (%[sp2]) \n\t" "sb %[p1_r], (%[sp1]) \n\t" "sb %[p0_r], (%[sp0]) \n\t" "sb %[q0_r], (%[sq0]) \n\t" "sb %[q1_r], (%[sq1]) \n\t" "sb %[q2_r], (%[sq2]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], (%[sp1]) \n\t" "sb %[p0_f0], (%[sp0]) \n\t" "sb %[q0_f0], (%[sq0]) \n\t" "sb %[q1_f0], (%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r), [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r], +1(%[sp2]) \n\t" "sb %[p1_r], +1(%[sp1]) \n\t" "sb %[p0_r], +1(%[sp0]) \n\t" "sb %[q0_r], +1(%[sq0]) \n\t" "sb %[q1_r], +1(%[sq1]) \n\t" "sb %[q2_r], +1(%[sq2]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], +1(%[sp1]) \n\t" "sb %[p0_f0], +1(%[sp0]) \n\t" "sb %[q0_f0], +1(%[sq0]) \n\t" "sb %[q1_f0], +1(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l], +2(%[sp2]) \n\t" "sb %[p1_l], +2(%[sp1]) \n\t" "sb %[p0_l], +2(%[sp0]) \n\t" "sb %[q0_l], +2(%[sq0]) \n\t" "sb %[q1_l], +2(%[sq1]) \n\t" "sb %[q2_l], +2(%[sq2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], +2(%[sp1]) \n\t" "sb %[p0_f0], +2(%[sp0]) \n\t" "sb %[q0_f0], +2(%[sq0]) \n\t" "sb %[q1_f0], +2(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l), [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l], +3(%[sp2]) \n\t" "sb %[p1_l], +3(%[sp1]) \n\t" "sb %[p0_l], +3(%[sp0]) \n\t" "sb %[q0_l], +3(%[sq0]) \n\t" "sb %[q1_l], +3(%[sq1]) \n\t" "sb %[q2_l], +3(%[sq2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], +3(%[sp1]) \n\t" "sb %[p0_f0], +3(%[sp0]) \n\t" "sb %[q0_f0], +3(%[sq0]) \n\t" "sb %[q1_f0], +3(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) { /* f0 + f1 + f2 */ /* f0 function */ filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* f1 function */ /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1, &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1, &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1); /* f2 function */ PACK_LEFT_4TO7() wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_4TO7() wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r, &q6_r, &q7_r); if (mask & flat & flat2 & 0x000000FF) { __asm__ __volatile__( "sb %[p6_r], (%[sp6]) \n\t" "sb %[p5_r], (%[sp5]) \n\t" "sb %[p4_r], (%[sp4]) \n\t" "sb %[p3_r], (%[sp3]) \n\t" "sb %[p2_r], (%[sp2]) \n\t" "sb %[p1_r], (%[sp1]) \n\t" "sb %[p0_r], (%[sp0]) \n\t" : : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [p0_r] "r"(p0_r), [sp0] "r"(sp0)); __asm__ __volatile__( "sb %[q0_r], (%[sq0]) \n\t" "sb %[q1_r], (%[sq1]) \n\t" "sb %[q2_r], (%[sq2]) \n\t" "sb %[q3_r], (%[sq3]) \n\t" "sb %[q4_r], (%[sq4]) \n\t" "sb %[q5_r], (%[sq5]) \n\t" "sb %[q6_r], (%[sq6]) \n\t" : : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6)); } else if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r_f1], (%[sp2]) \n\t" "sb %[p1_r_f1], (%[sp1]) \n\t" "sb %[p0_r_f1], (%[sp0]) \n\t" "sb %[q0_r_f1], (%[sq0]) \n\t" "sb %[q1_r_f1], (%[sq1]) \n\t" "sb %[q2_r_f1], (%[sq2]) \n\t" : : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1), [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1), [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], (%[sp1]) \n\t" "sb %[p0_f0], (%[sp0]) \n\t" "sb %[q0_f0], (%[sq0]) \n\t" "sb %[q1_f0], (%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p6_r], %[p6_r], 16 \n\t" "srl %[p5_r], %[p5_r], 16 \n\t" "srl %[p4_r], %[p4_r], 16 \n\t" "srl %[p3_r], %[p3_r], 16 \n\t" "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[q3_r], %[q3_r], 16 \n\t" "srl %[q4_r], %[q4_r], 16 \n\t" "srl %[q5_r], %[q5_r], 16 \n\t" "srl %[q6_r], %[q6_r], 16 \n\t" : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r), [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r), [p4_r] "+r"(p4_r), [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [q6_r] "+r"(q6_r), [p0_r] "+r"(p0_r) :); __asm__ __volatile__( "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t" "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t" "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t" "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t" "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t" "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1), [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1), [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0x0000FF00) { __asm__ __volatile__( "sb %[p6_r], +1(%[sp6]) \n\t" "sb %[p5_r], +1(%[sp5]) \n\t" "sb %[p4_r], +1(%[sp4]) \n\t" "sb %[p3_r], +1(%[sp3]) \n\t" "sb %[p2_r], +1(%[sp2]) \n\t" "sb %[p1_r], +1(%[sp1]) \n\t" "sb %[p0_r], +1(%[sp0]) \n\t" : : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0)); __asm__ __volatile__( "sb %[q0_r], +1(%[sq0]) \n\t" "sb %[q1_r], +1(%[sq1]) \n\t" "sb %[q2_r], +1(%[sq2]) \n\t" "sb %[q3_r], +1(%[sq3]) \n\t" "sb %[q4_r], +1(%[sq4]) \n\t" "sb %[q5_r], +1(%[sq5]) \n\t" "sb %[q6_r], +1(%[sq6]) \n\t" : : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), [q6_r] "r"(q6_r), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6)); } else if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r_f1], +1(%[sp2]) \n\t" "sb %[p1_r_f1], +1(%[sp1]) \n\t" "sb %[p0_r_f1], +1(%[sp0]) \n\t" "sb %[q0_r_f1], +1(%[sq0]) \n\t" "sb %[q1_r_f1], +1(%[sq1]) \n\t" "sb %[q2_r_f1], +1(%[sq2]) \n\t" : : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1), [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1), [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], +1(%[sp1]) \n\t" "sb %[p0_f0], +1(%[sp0]) \n\t" "sb %[q0_f0], +1(%[sq0]) \n\t" "sb %[q1_f0], +1(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0x00FF0000) { __asm__ __volatile__( "sb %[p6_l], +2(%[sp6]) \n\t" "sb %[p5_l], +2(%[sp5]) \n\t" "sb %[p4_l], +2(%[sp4]) \n\t" "sb %[p3_l], +2(%[sp3]) \n\t" "sb %[p2_l], +2(%[sp2]) \n\t" "sb %[p1_l], +2(%[sp1]) \n\t" "sb %[p0_l], +2(%[sp0]) \n\t" : : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0)); __asm__ __volatile__( "sb %[q0_l], +2(%[sq0]) \n\t" "sb %[q1_l], +2(%[sq1]) \n\t" "sb %[q2_l], +2(%[sq2]) \n\t" "sb %[q3_l], +2(%[sq3]) \n\t" "sb %[q4_l], +2(%[sq4]) \n\t" "sb %[q5_l], +2(%[sq5]) \n\t" "sb %[q6_l], +2(%[sq6]) \n\t" : : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [sq6] "r"(sq6)); } else if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l_f1], +2(%[sp2]) \n\t" "sb %[p1_l_f1], +2(%[sp1]) \n\t" "sb %[p0_l_f1], +2(%[sp0]) \n\t" "sb %[q0_l_f1], +2(%[sq0]) \n\t" "sb %[q1_l_f1], +2(%[sq1]) \n\t" "sb %[q2_l_f1], +2(%[sq2]) \n\t" : : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1), [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1), [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], +2(%[sp1]) \n\t" "sb %[p0_f0], +2(%[sp0]) \n\t" "sb %[q0_f0], +2(%[sq0]) \n\t" "sb %[q1_f0], +2(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } __asm__ __volatile__( "srl %[p6_l], %[p6_l], 16 \n\t" "srl %[p5_l], %[p5_l], 16 \n\t" "srl %[p4_l], %[p4_l], 16 \n\t" "srl %[p3_l], %[p3_l], 16 \n\t" "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[q3_l], %[q3_l], 16 \n\t" "srl %[q4_l], %[q4_l], 16 \n\t" "srl %[q5_l], %[q5_l], 16 \n\t" "srl %[q6_l], %[q6_l], 16 \n\t" : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l), [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l), [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l) :); __asm__ __volatile__( "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t" "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t" "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t" "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t" "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t" "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1), [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1), [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0xFF000000) { __asm__ __volatile__( "sb %[p6_l], +3(%[sp6]) \n\t" "sb %[p5_l], +3(%[sp5]) \n\t" "sb %[p4_l], +3(%[sp4]) \n\t" "sb %[p3_l], +3(%[sp3]) \n\t" "sb %[p2_l], +3(%[sp2]) \n\t" "sb %[p1_l], +3(%[sp1]) \n\t" "sb %[p0_l], +3(%[sp0]) \n\t" : : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [sp6] "r"(sp6), [sp5] "r"(sp5), [sp4] "r"(sp4), [sp3] "r"(sp3), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0)); __asm__ __volatile__( "sb %[q0_l], +3(%[sq0]) \n\t" "sb %[q1_l], +3(%[sq1]) \n\t" "sb %[q2_l], +3(%[sq2]) \n\t" "sb %[q3_l], +3(%[sq3]) \n\t" "sb %[q4_l], +3(%[sq4]) \n\t" "sb %[q5_l], +3(%[sq5]) \n\t" "sb %[q6_l], +3(%[sq6]) \n\t" : : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2), [sq3] "r"(sq3), [sq4] "r"(sq4), [sq5] "r"(sq5), [q6_l] "r"(q6_l), [sq6] "r"(sq6)); } else if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l_f1], +3(%[sp2]) \n\t" "sb %[p1_l_f1], +3(%[sp1]) \n\t" "sb %[p0_l_f1], +3(%[sp0]) \n\t" "sb %[q0_l_f1], +3(%[sq0]) \n\t" "sb %[q1_l_f1], +3(%[sq1]) \n\t" "sb %[q2_l_f1], +3(%[sq2]) \n\t" : : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1), [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1), [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [sp2] "r"(sp2), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1), [sq2] "r"(sq2)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], +3(%[sp1]) \n\t" "sb %[p0_f0], +3(%[sp0]) \n\t" "sb %[q0_f0], +3(%[sq0]) \n\t" "sb %[q1_f0], +3(%[sq1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [sp1] "r"(sp1), [sp0] "r"(sp0), [sq0] "r"(sq0), [sq1] "r"(sq1)); } } s = s + 4; } } void vpx_lpf_horizontal_16_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 1); } void vpx_lpf_horizontal_16_dual_dspr2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { mb_lpf_horizontal_edge(s, pitch, blimit, limit, thresh, 2); } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/loopfilter_mb_vert_dspr2.c000066400000000000000000000772001357355204000226460ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/mips/common_dspr2.h" #include "vpx_dsp/mips/loopfilter_filters_dspr2.h" #include "vpx_dsp/mips/loopfilter_macros_dspr2.h" #include "vpx_dsp/mips/loopfilter_masks_dspr2.h" #include "vpx_mem/vpx_mem.h" #if HAVE_DSPR2 void vpx_lpf_vertical_16_dspr2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { uint8_t i; uint32_t mask, hev, flat, flat2; uint8_t *s1, *s2, *s3, *s4; uint32_t prim1, prim2, sec3, sec4, prim3, prim4; uint32_t thresh_vec, flimit_vec, limit_vec; uint32_t uflimit, ulimit, uthresh; uint32_t p7, p6, p5, p4, p3, p2, p1, p0, q0, q1, q2, q3, q4, q5, q6, q7; uint32_t p1_f0, p0_f0, q0_f0, q1_f0; uint32_t p7_l, p6_l, p5_l, p4_l, p3_l, p2_l, p1_l, p0_l; uint32_t q0_l, q1_l, q2_l, q3_l, q4_l, q5_l, q6_l, q7_l; uint32_t p7_r, p6_r, p5_r, p4_r, p3_r, p2_r, p1_r, p0_r; uint32_t q0_r, q1_r, q2_r, q3_r, q4_r, q5_r, q6_r, q7_r; uint32_t p2_l_f1, p1_l_f1, p0_l_f1, p2_r_f1, p1_r_f1, p0_r_f1; uint32_t q0_l_f1, q1_l_f1, q2_l_f1, q0_r_f1, q1_r_f1, q2_r_f1; uflimit = *blimit; ulimit = *limit; uthresh = *thresh; /* create quad-byte */ __asm__ __volatile__( "replv.qb %[thresh_vec], %[uthresh] \n\t" "replv.qb %[flimit_vec], %[uflimit] \n\t" "replv.qb %[limit_vec], %[ulimit] \n\t" : [thresh_vec] "=&r"(thresh_vec), [flimit_vec] "=&r"(flimit_vec), [limit_vec] "=r"(limit_vec) : [uthresh] "r"(uthresh), [uflimit] "r"(uflimit), [ulimit] "r"(ulimit)); prefetch_store(s + pitch); for (i = 0; i < 2; i++) { s1 = s; s2 = s + pitch; s3 = s2 + pitch; s4 = s3 + pitch; s = s4 + pitch; __asm__ __volatile__( "lw %[p0], -4(%[s1]) \n\t" "lw %[p1], -4(%[s2]) \n\t" "lw %[p2], -4(%[s3]) \n\t" "lw %[p3], -4(%[s4]) \n\t" "lw %[p4], -8(%[s1]) \n\t" "lw %[p5], -8(%[s2]) \n\t" "lw %[p6], -8(%[s3]) \n\t" "lw %[p7], -8(%[s4]) \n\t" : [p3] "=&r"(p3), [p2] "=&r"(p2), [p1] "=&r"(p1), [p0] "=&r"(p0), [p7] "=&r"(p7), [p6] "=&r"(p6), [p5] "=&r"(p5), [p4] "=&r"(p4) : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); __asm__ __volatile__( "lw %[q3], (%[s1]) \n\t" "lw %[q2], (%[s2]) \n\t" "lw %[q1], (%[s3]) \n\t" "lw %[q0], (%[s4]) \n\t" "lw %[q7], +4(%[s1]) \n\t" "lw %[q6], +4(%[s2]) \n\t" "lw %[q5], +4(%[s3]) \n\t" "lw %[q4], +4(%[s4]) \n\t" : [q3] "=&r"(q3), [q2] "=&r"(q2), [q1] "=&r"(q1), [q0] "=&r"(q0), [q7] "=&r"(q7), [q6] "=&r"(q6), [q5] "=&r"(q5), [q4] "=&r"(q4) : [s1] "r"(s1), [s2] "r"(s2), [s3] "r"(s3), [s4] "r"(s4)); /* transpose p3, p2, p1, p0 original (when loaded from memory) register -4 -3 -2 -1 p0 p0_0 p0_1 p0_2 p0_3 p1 p1_0 p1_1 p1_2 p1_3 p2 p2_0 p2_1 p2_2 p2_3 p3 p3_0 p3_1 p3_2 p3_3 after transpose register p0 p3_3 p2_3 p1_3 p0_3 p1 p3_2 p2_2 p1_2 p0_2 p2 p3_1 p2_1 p1_1 p0_1 p3 p3_0 p2_0 p1_0 p0_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p0], %[p1] \n\t" "precr.qb.ph %[prim2], %[p0], %[p1] \n\t" "precrq.qb.ph %[prim3], %[p2], %[p3] \n\t" "precr.qb.ph %[prim4], %[p2], %[p3] \n\t" "precrq.qb.ph %[p1], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p3], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p0], %[p1], %[sec3] \n\t" "precrq.ph.w %[p2], %[p3], %[sec4] \n\t" "append %[p1], %[sec3], 16 \n\t" "append %[p3], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p0] "+r"(p0), [p1] "+r"(p1), [p2] "+r"(p2), [p3] "+r"(p3), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose q0, q1, q2, q3 original (when loaded from memory) register +1 +2 +3 +4 q3 q3_0 q3_1 q3_2 q3_3 q2 q2_0 q2_1 q2_2 q2_3 q1 q1_0 q1_1 q1_2 q1_3 q0 q0_0 q0_1 q0_2 q0_3 after transpose register q3 q0_3 q1_3 q2_3 q3_3 q2 q0_2 q1_2 q2_2 q3_2 q1 q0_1 q1_1 q2_1 q3_1 q0 q0_0 q1_0 q2_0 q3_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[q3], %[q2] \n\t" "precr.qb.ph %[prim2], %[q3], %[q2] \n\t" "precrq.qb.ph %[prim3], %[q1], %[q0] \n\t" "precr.qb.ph %[prim4], %[q1], %[q0] \n\t" "precrq.qb.ph %[q2], %[prim1], %[prim2] \n\t" "precr.qb.ph %[q0], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[q3], %[q2], %[sec3] \n\t" "precrq.ph.w %[q1], %[q0], %[sec4] \n\t" "append %[q2], %[sec3], 16 \n\t" "append %[q0], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [q3] "+r"(q3), [q2] "+r"(q2), [q1] "+r"(q1), [q0] "+r"(q0), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose p7, p6, p5, p4 original (when loaded from memory) register -8 -7 -6 -5 p4 p4_0 p4_1 p4_2 p4_3 p5 p5_0 p5_1 p5_2 p5_3 p6 p6_0 p6_1 p6_2 p6_3 p7 p7_0 p7_1 p7_2 p7_3 after transpose register p4 p7_3 p6_3 p5_3 p4_3 p5 p7_2 p6_2 p5_2 p4_2 p6 p7_1 p6_1 p5_1 p4_1 p7 p7_0 p6_0 p5_0 p4_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[p4], %[p5] \n\t" "precr.qb.ph %[prim2], %[p4], %[p5] \n\t" "precrq.qb.ph %[prim3], %[p6], %[p7] \n\t" "precr.qb.ph %[prim4], %[p6], %[p7] \n\t" "precrq.qb.ph %[p5], %[prim1], %[prim2] \n\t" "precr.qb.ph %[p7], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[p4], %[p5], %[sec3] \n\t" "precrq.ph.w %[p6], %[p7], %[sec4] \n\t" "append %[p5], %[sec3], 16 \n\t" "append %[p7], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [p4] "+r"(p4), [p5] "+r"(p5), [p6] "+r"(p6), [p7] "+r"(p7), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); /* transpose q4, q5, q6, q7 original (when loaded from memory) register +5 +6 +7 +8 q7 q7_0 q7_1 q7_2 q7_3 q6 q6_0 q6_1 q6_2 q6_3 q5 q5_0 q5_1 q5_2 q5_3 q4 q4_0 q4_1 q4_2 q4_3 after transpose register q7 q4_3 q5_3 q26_3 q7_3 q6 q4_2 q5_2 q26_2 q7_2 q5 q4_1 q5_1 q26_1 q7_1 q4 q4_0 q5_0 q26_0 q7_0 */ __asm__ __volatile__( "precrq.qb.ph %[prim1], %[q7], %[q6] \n\t" "precr.qb.ph %[prim2], %[q7], %[q6] \n\t" "precrq.qb.ph %[prim3], %[q5], %[q4] \n\t" "precr.qb.ph %[prim4], %[q5], %[q4] \n\t" "precrq.qb.ph %[q6], %[prim1], %[prim2] \n\t" "precr.qb.ph %[q4], %[prim1], %[prim2] \n\t" "precrq.qb.ph %[sec3], %[prim3], %[prim4] \n\t" "precr.qb.ph %[sec4], %[prim3], %[prim4] \n\t" "precrq.ph.w %[q7], %[q6], %[sec3] \n\t" "precrq.ph.w %[q5], %[q4], %[sec4] \n\t" "append %[q6], %[sec3], 16 \n\t" "append %[q4], %[sec4], 16 \n\t" : [prim1] "=&r"(prim1), [prim2] "=&r"(prim2), [prim3] "=&r"(prim3), [prim4] "=&r"(prim4), [q7] "+r"(q7), [q6] "+r"(q6), [q5] "+r"(q5), [q4] "+r"(q4), [sec3] "=&r"(sec3), [sec4] "=&r"(sec4) :); filter_hev_mask_flatmask4_dspr2(limit_vec, flimit_vec, thresh_vec, p1, p0, p3, p2, q0, q1, q2, q3, &hev, &mask, &flat); flatmask5(p7, p6, p5, p4, p0, q0, q4, q5, q6, q7, &flat2); /* f0 */ if (((flat2 == 0) && (flat == 0) && (mask != 0)) || ((flat2 != 0) && (flat == 0) && (mask != 0))) { filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); STORE_F0() } else if ((flat2 == 0XFFFFFFFF) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f2 */ PACK_LEFT_0TO3() PACK_LEFT_4TO7() wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_0TO3() PACK_RIGHT_4TO7() wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r, &q6_r, &q7_r); STORE_F2() } else if ((flat2 == 0) && (flat == 0xFFFFFFFF) && (mask == 0xFFFFFFFF)) { /* f1 */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); STORE_F1() } else if ((flat2 == 0) && (flat != 0) && (mask != 0)) { /* f0 + f1 */ filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); /* left 2 element operation */ PACK_LEFT_0TO3() mbfilter_dspr2(&p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l); /* right 2 element operation */ PACK_RIGHT_0TO3() mbfilter_dspr2(&p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r); if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r], -3(%[s4]) \n\t" "sb %[p1_r], -2(%[s4]) \n\t" "sb %[p0_r], -1(%[s4]) \n\t" "sb %[q0_r], (%[s4]) \n\t" "sb %[q1_r], +1(%[s4]) \n\t" "sb %[q2_r], +2(%[s4]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [s4] "r"(s4)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s4]) \n\t" "sb %[p0_f0], -1(%[s4]) \n\t" "sb %[q0_f0], (%[s4]) \n\t" "sb %[q1_f0], +1(%[s4]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s4] "r"(s4)); } __asm__ __volatile__( "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r), [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r], -3(%[s3]) \n\t" "sb %[p1_r], -2(%[s3]) \n\t" "sb %[p0_r], -1(%[s3]) \n\t" "sb %[q0_r], (%[s3]) \n\t" "sb %[q1_r], +1(%[s3]) \n\t" "sb %[q2_r], +2(%[s3]) \n\t" : : [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [s3] "r"(s3)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s3]) \n\t" "sb %[p0_f0], -1(%[s3]) \n\t" "sb %[q0_f0], (%[s3]) \n\t" "sb %[q1_f0], +1(%[s3]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s3] "r"(s3)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l], -3(%[s2]) \n\t" "sb %[p1_l], -2(%[s2]) \n\t" "sb %[p0_l], -1(%[s2]) \n\t" "sb %[q0_l], (%[s2]) \n\t" "sb %[q1_l], +1(%[s2]) \n\t" "sb %[q2_l], +2(%[s2]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [s2] "r"(s2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s2]) \n\t" "sb %[p0_f0], -1(%[s2]) \n\t" "sb %[q0_f0], (%[s2]) \n\t" "sb %[q1_f0], +1(%[s2]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s2] "r"(s2)); } __asm__ __volatile__( "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l), [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l], -3(%[s1]) \n\t" "sb %[p1_l], -2(%[s1]) \n\t" "sb %[p0_l], -1(%[s1]) \n\t" "sb %[q0_l], (%[s1]) \n\t" "sb %[q1_l], +1(%[s1]) \n\t" "sb %[q2_l], +2(%[s1]) \n\t" : : [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [s1] "r"(s1)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s1]) \n\t" "sb %[p0_f0], -1(%[s1]) \n\t" "sb %[q0_f0], (%[s1]) \n\t" "sb %[q1_f0], +1(%[s1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s1] "r"(s1)); } } else if ((flat2 != 0) && (flat != 0) && (mask != 0)) { /* f0+f1+f2 */ filter1_dspr2(mask, hev, p1, p0, q0, q1, &p1_f0, &p0_f0, &q0_f0, &q1_f0); PACK_LEFT_0TO3() mbfilter1_dspr2(p3_l, p2_l, p1_l, p0_l, q0_l, q1_l, q2_l, q3_l, &p2_l_f1, &p1_l_f1, &p0_l_f1, &q0_l_f1, &q1_l_f1, &q2_l_f1); PACK_RIGHT_0TO3() mbfilter1_dspr2(p3_r, p2_r, p1_r, p0_r, q0_r, q1_r, q2_r, q3_r, &p2_r_f1, &p1_r_f1, &p0_r_f1, &q0_r_f1, &q1_r_f1, &q2_r_f1); PACK_LEFT_4TO7() wide_mbfilter_dspr2(&p7_l, &p6_l, &p5_l, &p4_l, &p3_l, &p2_l, &p1_l, &p0_l, &q0_l, &q1_l, &q2_l, &q3_l, &q4_l, &q5_l, &q6_l, &q7_l); PACK_RIGHT_4TO7() wide_mbfilter_dspr2(&p7_r, &p6_r, &p5_r, &p4_r, &p3_r, &p2_r, &p1_r, &p0_r, &q0_r, &q1_r, &q2_r, &q3_r, &q4_r, &q5_r, &q6_r, &q7_r); if (mask & flat & flat2 & 0x000000FF) { __asm__ __volatile__( "sb %[p6_r], -7(%[s4]) \n\t" "sb %[p5_r], -6(%[s4]) \n\t" "sb %[p4_r], -5(%[s4]) \n\t" "sb %[p3_r], -4(%[s4]) \n\t" "sb %[p2_r], -3(%[s4]) \n\t" "sb %[p1_r], -2(%[s4]) \n\t" "sb %[p0_r], -1(%[s4]) \n\t" : : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [s4] "r"(s4)); __asm__ __volatile__( "sb %[q0_r], (%[s4]) \n\t" "sb %[q1_r], +1(%[s4]) \n\t" "sb %[q2_r], +2(%[s4]) \n\t" "sb %[q3_r], +3(%[s4]) \n\t" "sb %[q4_r], +4(%[s4]) \n\t" "sb %[q5_r], +5(%[s4]) \n\t" "sb %[q6_r], +6(%[s4]) \n\t" : : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), [q6_r] "r"(q6_r), [s4] "r"(s4)); } else if (mask & flat & 0x000000FF) { __asm__ __volatile__( "sb %[p2_r_f1], -3(%[s4]) \n\t" "sb %[p1_r_f1], -2(%[s4]) \n\t" "sb %[p0_r_f1], -1(%[s4]) \n\t" "sb %[q0_r_f1], (%[s4]) \n\t" "sb %[q1_r_f1], +1(%[s4]) \n\t" "sb %[q2_r_f1], +2(%[s4]) \n\t" : : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1), [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1), [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s4] "r"(s4)); } else if (mask & 0x000000FF) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s4]) \n\t" "sb %[p0_f0], -1(%[s4]) \n\t" "sb %[q0_f0], (%[s4]) \n\t" "sb %[q1_f0], +1(%[s4]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s4] "r"(s4)); } __asm__ __volatile__( "srl %[p6_r], %[p6_r], 16 \n\t" "srl %[p5_r], %[p5_r], 16 \n\t" "srl %[p4_r], %[p4_r], 16 \n\t" "srl %[p3_r], %[p3_r], 16 \n\t" "srl %[p2_r], %[p2_r], 16 \n\t" "srl %[p1_r], %[p1_r], 16 \n\t" "srl %[p0_r], %[p0_r], 16 \n\t" "srl %[q0_r], %[q0_r], 16 \n\t" "srl %[q1_r], %[q1_r], 16 \n\t" "srl %[q2_r], %[q2_r], 16 \n\t" "srl %[q3_r], %[q3_r], 16 \n\t" "srl %[q4_r], %[q4_r], 16 \n\t" "srl %[q5_r], %[q5_r], 16 \n\t" "srl %[q6_r], %[q6_r], 16 \n\t" : [q0_r] "+r"(q0_r), [q1_r] "+r"(q1_r), [q2_r] "+r"(q2_r), [q3_r] "+r"(q3_r), [q4_r] "+r"(q4_r), [q5_r] "+r"(q5_r), [q6_r] "+r"(q6_r), [p6_r] "+r"(p6_r), [p5_r] "+r"(p5_r), [p4_r] "+r"(p4_r), [p3_r] "+r"(p3_r), [p2_r] "+r"(p2_r), [p1_r] "+r"(p1_r), [p0_r] "+r"(p0_r) :); __asm__ __volatile__( "srl %[p2_r_f1], %[p2_r_f1], 16 \n\t" "srl %[p1_r_f1], %[p1_r_f1], 16 \n\t" "srl %[p0_r_f1], %[p0_r_f1], 16 \n\t" "srl %[q0_r_f1], %[q0_r_f1], 16 \n\t" "srl %[q1_r_f1], %[q1_r_f1], 16 \n\t" "srl %[q2_r_f1], %[q2_r_f1], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_r_f1] "+r"(p2_r_f1), [p1_r_f1] "+r"(p1_r_f1), [p0_r_f1] "+r"(p0_r_f1), [q0_r_f1] "+r"(q0_r_f1), [q1_r_f1] "+r"(q1_r_f1), [q2_r_f1] "+r"(q2_r_f1), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0x0000FF00) { __asm__ __volatile__( "sb %[p6_r], -7(%[s3]) \n\t" "sb %[p5_r], -6(%[s3]) \n\t" "sb %[p4_r], -5(%[s3]) \n\t" "sb %[p3_r], -4(%[s3]) \n\t" "sb %[p2_r], -3(%[s3]) \n\t" "sb %[p1_r], -2(%[s3]) \n\t" "sb %[p0_r], -1(%[s3]) \n\t" : : [p6_r] "r"(p6_r), [p5_r] "r"(p5_r), [p4_r] "r"(p4_r), [p3_r] "r"(p3_r), [p2_r] "r"(p2_r), [p1_r] "r"(p1_r), [p0_r] "r"(p0_r), [s3] "r"(s3)); __asm__ __volatile__( "sb %[q0_r], (%[s3]) \n\t" "sb %[q1_r], +1(%[s3]) \n\t" "sb %[q2_r], +2(%[s3]) \n\t" "sb %[q3_r], +3(%[s3]) \n\t" "sb %[q4_r], +4(%[s3]) \n\t" "sb %[q5_r], +5(%[s3]) \n\t" "sb %[q6_r], +6(%[s3]) \n\t" : : [q0_r] "r"(q0_r), [q1_r] "r"(q1_r), [q2_r] "r"(q2_r), [q3_r] "r"(q3_r), [q4_r] "r"(q4_r), [q5_r] "r"(q5_r), [q6_r] "r"(q6_r), [s3] "r"(s3)); } else if (mask & flat & 0x0000FF00) { __asm__ __volatile__( "sb %[p2_r_f1], -3(%[s3]) \n\t" "sb %[p1_r_f1], -2(%[s3]) \n\t" "sb %[p0_r_f1], -1(%[s3]) \n\t" "sb %[q0_r_f1], (%[s3]) \n\t" "sb %[q1_r_f1], +1(%[s3]) \n\t" "sb %[q2_r_f1], +2(%[s3]) \n\t" : : [p2_r_f1] "r"(p2_r_f1), [p1_r_f1] "r"(p1_r_f1), [p0_r_f1] "r"(p0_r_f1), [q0_r_f1] "r"(q0_r_f1), [q1_r_f1] "r"(q1_r_f1), [q2_r_f1] "r"(q2_r_f1), [s3] "r"(s3)); } else if (mask & 0x0000FF00) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s3]) \n\t" "sb %[p0_f0], -1(%[s3]) \n\t" "sb %[q0_f0], (%[s3]) \n\t" "sb %[q1_f0], +1(%[s3]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s3] "r"(s3)); } __asm__ __volatile__( "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0x00FF0000) { __asm__ __volatile__( "sb %[p6_l], -7(%[s2]) \n\t" "sb %[p5_l], -6(%[s2]) \n\t" "sb %[p4_l], -5(%[s2]) \n\t" "sb %[p3_l], -4(%[s2]) \n\t" "sb %[p2_l], -3(%[s2]) \n\t" "sb %[p1_l], -2(%[s2]) \n\t" "sb %[p0_l], -1(%[s2]) \n\t" : : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [s2] "r"(s2)); __asm__ __volatile__( "sb %[q0_l], (%[s2]) \n\t" "sb %[q1_l], +1(%[s2]) \n\t" "sb %[q2_l], +2(%[s2]) \n\t" "sb %[q3_l], +3(%[s2]) \n\t" "sb %[q4_l], +4(%[s2]) \n\t" "sb %[q5_l], +5(%[s2]) \n\t" "sb %[q6_l], +6(%[s2]) \n\t" : : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), [s2] "r"(s2)); } else if (mask & flat & 0x00FF0000) { __asm__ __volatile__( "sb %[p2_l_f1], -3(%[s2]) \n\t" "sb %[p1_l_f1], -2(%[s2]) \n\t" "sb %[p0_l_f1], -1(%[s2]) \n\t" "sb %[q0_l_f1], (%[s2]) \n\t" "sb %[q1_l_f1], +1(%[s2]) \n\t" "sb %[q2_l_f1], +2(%[s2]) \n\t" : : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1), [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1), [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s2] "r"(s2)); } else if (mask & 0x00FF0000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s2]) \n\t" "sb %[p0_f0], -1(%[s2]) \n\t" "sb %[q0_f0], (%[s2]) \n\t" "sb %[q1_f0], +1(%[s2]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s2] "r"(s2)); } __asm__ __volatile__( "srl %[p6_l], %[p6_l], 16 \n\t" "srl %[p5_l], %[p5_l], 16 \n\t" "srl %[p4_l], %[p4_l], 16 \n\t" "srl %[p3_l], %[p3_l], 16 \n\t" "srl %[p2_l], %[p2_l], 16 \n\t" "srl %[p1_l], %[p1_l], 16 \n\t" "srl %[p0_l], %[p0_l], 16 \n\t" "srl %[q0_l], %[q0_l], 16 \n\t" "srl %[q1_l], %[q1_l], 16 \n\t" "srl %[q2_l], %[q2_l], 16 \n\t" "srl %[q3_l], %[q3_l], 16 \n\t" "srl %[q4_l], %[q4_l], 16 \n\t" "srl %[q5_l], %[q5_l], 16 \n\t" "srl %[q6_l], %[q6_l], 16 \n\t" : [q0_l] "+r"(q0_l), [q1_l] "+r"(q1_l), [q2_l] "+r"(q2_l), [q3_l] "+r"(q3_l), [q4_l] "+r"(q4_l), [q5_l] "+r"(q5_l), [q6_l] "+r"(q6_l), [p6_l] "+r"(p6_l), [p5_l] "+r"(p5_l), [p4_l] "+r"(p4_l), [p3_l] "+r"(p3_l), [p2_l] "+r"(p2_l), [p1_l] "+r"(p1_l), [p0_l] "+r"(p0_l) :); __asm__ __volatile__( "srl %[p2_l_f1], %[p2_l_f1], 16 \n\t" "srl %[p1_l_f1], %[p1_l_f1], 16 \n\t" "srl %[p0_l_f1], %[p0_l_f1], 16 \n\t" "srl %[q0_l_f1], %[q0_l_f1], 16 \n\t" "srl %[q1_l_f1], %[q1_l_f1], 16 \n\t" "srl %[q2_l_f1], %[q2_l_f1], 16 \n\t" "srl %[p1_f0], %[p1_f0], 8 \n\t" "srl %[p0_f0], %[p0_f0], 8 \n\t" "srl %[q0_f0], %[q0_f0], 8 \n\t" "srl %[q1_f0], %[q1_f0], 8 \n\t" : [p2_l_f1] "+r"(p2_l_f1), [p1_l_f1] "+r"(p1_l_f1), [p0_l_f1] "+r"(p0_l_f1), [q0_l_f1] "+r"(q0_l_f1), [q1_l_f1] "+r"(q1_l_f1), [q2_l_f1] "+r"(q2_l_f1), [p1_f0] "+r"(p1_f0), [p0_f0] "+r"(p0_f0), [q0_f0] "+r"(q0_f0), [q1_f0] "+r"(q1_f0) :); if (mask & flat & flat2 & 0xFF000000) { __asm__ __volatile__( "sb %[p6_l], -7(%[s1]) \n\t" "sb %[p5_l], -6(%[s1]) \n\t" "sb %[p4_l], -5(%[s1]) \n\t" "sb %[p3_l], -4(%[s1]) \n\t" "sb %[p2_l], -3(%[s1]) \n\t" "sb %[p1_l], -2(%[s1]) \n\t" "sb %[p0_l], -1(%[s1]) \n\t" : : [p6_l] "r"(p6_l), [p5_l] "r"(p5_l), [p4_l] "r"(p4_l), [p3_l] "r"(p3_l), [p2_l] "r"(p2_l), [p1_l] "r"(p1_l), [p0_l] "r"(p0_l), [s1] "r"(s1)); __asm__ __volatile__( "sb %[q0_l], (%[s1]) \n\t" "sb %[q1_l], 1(%[s1]) \n\t" "sb %[q2_l], 2(%[s1]) \n\t" "sb %[q3_l], 3(%[s1]) \n\t" "sb %[q4_l], 4(%[s1]) \n\t" "sb %[q5_l], 5(%[s1]) \n\t" "sb %[q6_l], 6(%[s1]) \n\t" : : [q0_l] "r"(q0_l), [q1_l] "r"(q1_l), [q2_l] "r"(q2_l), [q3_l] "r"(q3_l), [q4_l] "r"(q4_l), [q5_l] "r"(q5_l), [q6_l] "r"(q6_l), [s1] "r"(s1)); } else if (mask & flat & 0xFF000000) { __asm__ __volatile__( "sb %[p2_l_f1], -3(%[s1]) \n\t" "sb %[p1_l_f1], -2(%[s1]) \n\t" "sb %[p0_l_f1], -1(%[s1]) \n\t" "sb %[q0_l_f1], (%[s1]) \n\t" "sb %[q1_l_f1], +1(%[s1]) \n\t" "sb %[q2_l_f1], +2(%[s1]) \n\t" : : [p2_l_f1] "r"(p2_l_f1), [p1_l_f1] "r"(p1_l_f1), [p0_l_f1] "r"(p0_l_f1), [q0_l_f1] "r"(q0_l_f1), [q1_l_f1] "r"(q1_l_f1), [q2_l_f1] "r"(q2_l_f1), [s1] "r"(s1)); } else if (mask & 0xFF000000) { __asm__ __volatile__( "sb %[p1_f0], -2(%[s1]) \n\t" "sb %[p0_f0], -1(%[s1]) \n\t" "sb %[q0_f0], (%[s1]) \n\t" "sb %[q1_f0], +1(%[s1]) \n\t" : : [p1_f0] "r"(p1_f0), [p0_f0] "r"(p0_f0), [q0_f0] "r"(q0_f0), [q1_f0] "r"(q1_f0), [s1] "r"(s1)); } } } } #endif // #if HAVE_DSPR2 libvpx-1.8.2/vpx_dsp/mips/loopfilter_msa.h000066400000000000000000000300351357355204000206560ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ #define VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #define VP9_LPF_FILTER4_4W(p1_in, p0_in, q0_in, q1_in, mask, hev, p1_out, \ p0_out, q0_out, q1_out) \ { \ v16i8 p1_m, p0_m, q0_m, q1_m, filt, q0_sub_p0, t1, t2; \ const v16i8 cnst4b = __msa_ldi_b(4); \ const v16i8 cnst3b = __msa_ldi_b(3); \ \ p1_m = (v16i8)__msa_xori_b(p1_in, 0x80); \ p0_m = (v16i8)__msa_xori_b(p0_in, 0x80); \ q0_m = (v16i8)__msa_xori_b(q0_in, 0x80); \ q1_m = (v16i8)__msa_xori_b(q1_in, 0x80); \ \ filt = __msa_subs_s_b(p1_m, q1_m); \ filt &= hev; \ q0_sub_p0 = __msa_subs_s_b(q0_m, p0_m); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt = __msa_adds_s_b(filt, q0_sub_p0); \ filt &= mask; \ t1 = __msa_adds_s_b(filt, cnst4b); \ t1 >>= cnst3b; \ t2 = __msa_adds_s_b(filt, cnst3b); \ t2 >>= cnst3b; \ q0_m = __msa_subs_s_b(q0_m, t1); \ q0_out = __msa_xori_b((v16u8)q0_m, 0x80); \ p0_m = __msa_adds_s_b(p0_m, t2); \ p0_out = __msa_xori_b((v16u8)p0_m, 0x80); \ filt = __msa_srari_b(t1, 1); \ hev = __msa_xori_b(hev, 0xff); \ filt &= hev; \ q1_m = __msa_subs_s_b(q1_m, filt); \ q1_out = __msa_xori_b((v16u8)q1_m, 0x80); \ p1_m = __msa_adds_s_b(p1_m, filt); \ p1_out = __msa_xori_b((v16u8)p1_m, 0x80); \ } #define VP9_FLAT4(p3_in, p2_in, p0_in, q0_in, q2_in, q3_in, flat_out) \ { \ v16u8 tmp_flat4, p2_a_sub_p0, q2_a_sub_q0, p3_a_sub_p0, q3_a_sub_q0; \ v16u8 zero_in = { 0 }; \ \ tmp_flat4 = __msa_ori_b(zero_in, 1); \ p2_a_sub_p0 = __msa_asub_u_b(p2_in, p0_in); \ q2_a_sub_q0 = __msa_asub_u_b(q2_in, q0_in); \ p3_a_sub_p0 = __msa_asub_u_b(p3_in, p0_in); \ q3_a_sub_q0 = __msa_asub_u_b(q3_in, q0_in); \ \ p2_a_sub_p0 = __msa_max_u_b(p2_a_sub_p0, q2_a_sub_q0); \ flat_out = __msa_max_u_b(p2_a_sub_p0, flat_out); \ p3_a_sub_p0 = __msa_max_u_b(p3_a_sub_p0, q3_a_sub_q0); \ flat_out = __msa_max_u_b(p3_a_sub_p0, flat_out); \ \ flat_out = (tmp_flat4 < (v16u8)flat_out); \ flat_out = __msa_xori_b(flat_out, 0xff); \ flat_out = flat_out & (mask); \ } #define VP9_FLAT5(p7_in, p6_in, p5_in, p4_in, p0_in, q0_in, q4_in, q5_in, \ q6_in, q7_in, flat_in, flat2_out) \ { \ v16u8 tmp_flat5, zero_in = { 0 }; \ v16u8 p4_a_sub_p0, q4_a_sub_q0, p5_a_sub_p0, q5_a_sub_q0; \ v16u8 p6_a_sub_p0, q6_a_sub_q0, p7_a_sub_p0, q7_a_sub_q0; \ \ tmp_flat5 = __msa_ori_b(zero_in, 1); \ p4_a_sub_p0 = __msa_asub_u_b(p4_in, p0_in); \ q4_a_sub_q0 = __msa_asub_u_b(q4_in, q0_in); \ p5_a_sub_p0 = __msa_asub_u_b(p5_in, p0_in); \ q5_a_sub_q0 = __msa_asub_u_b(q5_in, q0_in); \ p6_a_sub_p0 = __msa_asub_u_b(p6_in, p0_in); \ q6_a_sub_q0 = __msa_asub_u_b(q6_in, q0_in); \ p7_a_sub_p0 = __msa_asub_u_b(p7_in, p0_in); \ q7_a_sub_q0 = __msa_asub_u_b(q7_in, q0_in); \ \ p4_a_sub_p0 = __msa_max_u_b(p4_a_sub_p0, q4_a_sub_q0); \ flat2_out = __msa_max_u_b(p5_a_sub_p0, q5_a_sub_q0); \ flat2_out = __msa_max_u_b(p4_a_sub_p0, flat2_out); \ p6_a_sub_p0 = __msa_max_u_b(p6_a_sub_p0, q6_a_sub_q0); \ flat2_out = __msa_max_u_b(p6_a_sub_p0, flat2_out); \ p7_a_sub_p0 = __msa_max_u_b(p7_a_sub_p0, q7_a_sub_q0); \ flat2_out = __msa_max_u_b(p7_a_sub_p0, flat2_out); \ \ flat2_out = (tmp_flat5 < (v16u8)flat2_out); \ flat2_out = __msa_xori_b(flat2_out, 0xff); \ flat2_out = flat2_out & flat_in; \ } #define VP9_FILTER8(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ p2_filt8_out, p1_filt8_out, p0_filt8_out, q0_filt8_out, \ q1_filt8_out, q2_filt8_out) \ { \ v8u16 tmp_filt8_0, tmp_filt8_1, tmp_filt8_2; \ \ tmp_filt8_2 = p2_in + p1_in + p0_in; \ tmp_filt8_0 = p3_in << 1; \ \ tmp_filt8_0 = tmp_filt8_0 + tmp_filt8_2 + q0_in; \ tmp_filt8_1 = tmp_filt8_0 + p3_in + p2_in; \ p2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ \ tmp_filt8_1 = tmp_filt8_0 + p1_in + q1_in; \ p1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ \ tmp_filt8_1 = q2_in + q1_in + q0_in; \ tmp_filt8_2 = tmp_filt8_2 + tmp_filt8_1; \ tmp_filt8_0 = tmp_filt8_2 + (p0_in); \ tmp_filt8_0 = tmp_filt8_0 + (p3_in); \ p0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_0, 3); \ \ tmp_filt8_0 = q2_in + q3_in; \ tmp_filt8_0 = p0_in + tmp_filt8_1 + tmp_filt8_0; \ tmp_filt8_1 = q3_in + q3_in; \ tmp_filt8_1 = tmp_filt8_1 + tmp_filt8_0; \ q2_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ \ tmp_filt8_0 = tmp_filt8_2 + q3_in; \ tmp_filt8_1 = tmp_filt8_0 + q0_in; \ q0_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ \ tmp_filt8_1 = tmp_filt8_0 - p2_in; \ tmp_filt8_0 = q1_in + q3_in; \ tmp_filt8_1 = tmp_filt8_0 + tmp_filt8_1; \ q1_filt8_out = (v8i16)__msa_srari_h((v8i16)tmp_filt8_1, 3); \ } #define LPF_MASK_HEV(p3_in, p2_in, p1_in, p0_in, q0_in, q1_in, q2_in, q3_in, \ limit_in, b_limit_in, thresh_in, hev_out, mask_out, \ flat_out) \ { \ v16u8 p3_asub_p2_m, p2_asub_p1_m, p1_asub_p0_m, q1_asub_q0_m; \ v16u8 p1_asub_q1_m, p0_asub_q0_m, q3_asub_q2_m, q2_asub_q1_m; \ \ /* absolute subtraction of pixel values */ \ p3_asub_p2_m = __msa_asub_u_b(p3_in, p2_in); \ p2_asub_p1_m = __msa_asub_u_b(p2_in, p1_in); \ p1_asub_p0_m = __msa_asub_u_b(p1_in, p0_in); \ q1_asub_q0_m = __msa_asub_u_b(q1_in, q0_in); \ q2_asub_q1_m = __msa_asub_u_b(q2_in, q1_in); \ q3_asub_q2_m = __msa_asub_u_b(q3_in, q2_in); \ p0_asub_q0_m = __msa_asub_u_b(p0_in, q0_in); \ p1_asub_q1_m = __msa_asub_u_b(p1_in, q1_in); \ \ /* calculation of hev */ \ flat_out = __msa_max_u_b(p1_asub_p0_m, q1_asub_q0_m); \ hev_out = thresh_in < (v16u8)flat_out; \ \ /* calculation of mask */ \ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p0_asub_q0_m); \ p1_asub_q1_m >>= 1; \ p0_asub_q0_m = __msa_adds_u_b(p0_asub_q0_m, p1_asub_q1_m); \ \ mask_out = b_limit_in < p0_asub_q0_m; \ mask_out = __msa_max_u_b(flat_out, mask_out); \ p3_asub_p2_m = __msa_max_u_b(p3_asub_p2_m, p2_asub_p1_m); \ mask_out = __msa_max_u_b(p3_asub_p2_m, mask_out); \ q2_asub_q1_m = __msa_max_u_b(q2_asub_q1_m, q3_asub_q2_m); \ mask_out = __msa_max_u_b(q2_asub_q1_m, mask_out); \ \ mask_out = limit_in < (v16u8)mask_out; \ mask_out = __msa_xori_b(mask_out, 0xff); \ } #endif // VPX_VPX_DSP_MIPS_LOOPFILTER_MSA_H_ libvpx-1.8.2/vpx_dsp/mips/macros_msa.h000066400000000000000000003005141357355204000177650ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_MACROS_MSA_H_ #define VPX_VPX_DSP_MIPS_MACROS_MSA_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #define LD_V(RTYPE, psrc) *((const RTYPE *)(psrc)) #define LD_UB(...) LD_V(v16u8, __VA_ARGS__) #define LD_SB(...) LD_V(v16i8, __VA_ARGS__) #define LD_UH(...) LD_V(v8u16, __VA_ARGS__) #define LD_SH(...) LD_V(v8i16, __VA_ARGS__) #define LD_SW(...) LD_V(v4i32, __VA_ARGS__) #define ST_V(RTYPE, in, pdst) *((RTYPE *)(pdst)) = (in) #define ST_UB(...) ST_V(v16u8, __VA_ARGS__) #define ST_SB(...) ST_V(v16i8, __VA_ARGS__) #define ST_SH(...) ST_V(v8i16, __VA_ARGS__) #define ST_SW(...) ST_V(v4i32, __VA_ARGS__) #if (__mips_isa_rev >= 6) #define LH(psrc) \ ({ \ uint16_t val_lh_m = *(const uint16_t *)(psrc); \ val_lh_m; \ }) #define LW(psrc) \ ({ \ uint32_t val_lw_m = *(const uint32_t *)(psrc); \ val_lw_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ uint64_t val_ld_m = *(const uint64_t *)(psrc); \ val_ld_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \ uint32_t val0_ld_m, val1_ld_m; \ uint64_t val_ld_m = 0; \ \ val0_ld_m = LW(psrc_ld_m); \ val1_ld_m = LW(psrc_ld_m + 4); \ \ val_ld_m = (uint64_t)(val1_ld_m); \ val_ld_m = (uint64_t)((val_ld_m << 32) & 0xFFFFFFFF00000000); \ val_ld_m = (uint64_t)(val_ld_m | (uint64_t)val0_ld_m); \ \ val_ld_m; \ }) #endif // (__mips == 64) #define SH(val, pdst) *(uint16_t *)(pdst) = (val); #define SW(val, pdst) *(uint32_t *)(pdst) = (val); #define SD(val, pdst) *(uint64_t *)(pdst) = (val); #else // !(__mips_isa_rev >= 6) #define LH(psrc) \ ({ \ const uint8_t *psrc_lh_m = (const uint8_t *)(psrc); \ uint16_t val_lh_m; \ \ __asm__ __volatile__("ulh %[val_lh_m], %[psrc_lh_m] \n\t" \ \ : [val_lh_m] "=r"(val_lh_m) \ : [psrc_lh_m] "m"(*psrc_lh_m)); \ \ val_lh_m; \ }) #define LW(psrc) \ ({ \ const uint8_t *psrc_lw_m = (const uint8_t *)(psrc); \ uint32_t val_lw_m; \ \ __asm__ __volatile__("ulw %[val_lw_m], %[psrc_lw_m] \n\t" \ \ : [val_lw_m] "=r"(val_lw_m) \ : [psrc_lw_m] "m"(*psrc_lw_m)); \ \ val_lw_m; \ }) #if (__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \ uint64_t val_ld_m = 0; \ \ __asm__ __volatile__("uld %[val_ld_m], %[psrc_ld_m] \n\t" \ \ : [val_ld_m] "=r"(val_ld_m) \ : [psrc_ld_m] "m"(*psrc_ld_m)); \ \ val_ld_m; \ }) #else // !(__mips == 64) #define LD(psrc) \ ({ \ const uint8_t *psrc_ld_m = (const uint8_t *)(psrc); \ uint32_t val0_ld_m, val1_ld_m; \ uint64_t val_ld_m = 0; \ \ val0_ld_m = LW(psrc_ld_m); \ val1_ld_m = LW(psrc_ld_m + 4); \ \ val_ld_m = (uint64_t)(val1_ld_m); \ val_ld_m = (uint64_t)((val_ld_m << 32) & 0xFFFFFFFF00000000); \ val_ld_m = (uint64_t)(val_ld_m | (uint64_t)val0_ld_m); \ \ val_ld_m; \ }) #endif // (__mips == 64) #define SH(val, pdst) \ { \ uint8_t *pdst_sh_m = (uint8_t *)(pdst); \ const uint16_t val_sh_m = (val); \ \ __asm__ __volatile__("ush %[val_sh_m], %[pdst_sh_m] \n\t" \ \ : [pdst_sh_m] "=m"(*pdst_sh_m) \ : [val_sh_m] "r"(val_sh_m)); \ } #define SW(val, pdst) \ { \ uint8_t *pdst_sw_m = (uint8_t *)(pdst); \ const uint32_t val_sw_m = (val); \ \ __asm__ __volatile__("usw %[val_sw_m], %[pdst_sw_m] \n\t" \ \ : [pdst_sw_m] "=m"(*pdst_sw_m) \ : [val_sw_m] "r"(val_sw_m)); \ } #define SD(val, pdst) \ { \ uint8_t *pdst_sd_m = (uint8_t *)(pdst); \ uint32_t val0_sd_m, val1_sd_m; \ \ val0_sd_m = (uint32_t)((val)&0x00000000FFFFFFFF); \ val1_sd_m = (uint32_t)(((val) >> 32) & 0x00000000FFFFFFFF); \ \ SW(val0_sd_m, pdst_sd_m); \ SW(val1_sd_m, pdst_sd_m + 4); \ } #endif // (__mips_isa_rev >= 6) /* Description : Load 4 words with stride Arguments : Inputs - psrc, stride Outputs - out0, out1, out2, out3 Details : Load word in 'out0' from (psrc) Load word in 'out1' from (psrc + stride) Load word in 'out2' from (psrc + 2 * stride) Load word in 'out3' from (psrc + 3 * stride) */ #define LW4(psrc, stride, out0, out1, out2, out3) \ { \ out0 = LW((psrc)); \ out1 = LW((psrc) + stride); \ out2 = LW((psrc) + 2 * stride); \ out3 = LW((psrc) + 3 * stride); \ } /* Description : Load double words with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Details : Load double word in 'out0' from (psrc) Load double word in 'out1' from (psrc + stride) */ #define LD2(psrc, stride, out0, out1) \ { \ out0 = LD((psrc)); \ out1 = LD((psrc) + stride); \ } #define LD4(psrc, stride, out0, out1, out2, out3) \ { \ LD2((psrc), stride, out0, out1); \ LD2((psrc) + 2 * stride, stride, out2, out3); \ } /* Description : Store 4 words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store word from 'in0' to (pdst) Store word from 'in1' to (pdst + stride) Store word from 'in2' to (pdst + 2 * stride) Store word from 'in3' to (pdst + 3 * stride) */ #define SW4(in0, in1, in2, in3, pdst, stride) \ { \ SW(in0, (pdst)) \ SW(in1, (pdst) + stride); \ SW(in2, (pdst) + 2 * stride); \ SW(in3, (pdst) + 3 * stride); \ } /* Description : Store 4 double words with stride Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Store double word from 'in0' to (pdst) Store double word from 'in1' to (pdst + stride) Store double word from 'in2' to (pdst + 2 * stride) Store double word from 'in3' to (pdst + 3 * stride) */ #define SD4(in0, in1, in2, in3, pdst, stride) \ { \ SD(in0, (pdst)) \ SD(in1, (pdst) + stride); \ SD(in2, (pdst) + 2 * stride); \ SD(in3, (pdst) + 3 * stride); \ } /* Description : Load vector elements with stride Arguments : Inputs - psrc, stride Outputs - out0, out1 Return Type - as per RTYPE Details : Load 16 byte elements in 'out0' from (psrc) Load 16 byte elements in 'out1' from (psrc + stride) */ #define LD_V2(RTYPE, psrc, stride, out0, out1) \ { \ out0 = LD_V(RTYPE, (psrc)); \ out1 = LD_V(RTYPE, (psrc) + stride); \ } #define LD_UB2(...) LD_V2(v16u8, __VA_ARGS__) #define LD_SB2(...) LD_V2(v16i8, __VA_ARGS__) #define LD_SH2(...) LD_V2(v8i16, __VA_ARGS__) #define LD_SW2(...) LD_V2(v4i32, __VA_ARGS__) #define LD_V3(RTYPE, psrc, stride, out0, out1, out2) \ { \ LD_V2(RTYPE, (psrc), stride, out0, out1); \ out2 = LD_V(RTYPE, (psrc) + 2 * stride); \ } #define LD_UB3(...) LD_V3(v16u8, __VA_ARGS__) #define LD_V4(RTYPE, psrc, stride, out0, out1, out2, out3) \ { \ LD_V2(RTYPE, (psrc), stride, out0, out1); \ LD_V2(RTYPE, (psrc) + 2 * stride, stride, out2, out3); \ } #define LD_UB4(...) LD_V4(v16u8, __VA_ARGS__) #define LD_SB4(...) LD_V4(v16i8, __VA_ARGS__) #define LD_SH4(...) LD_V4(v8i16, __VA_ARGS__) #define LD_V5(RTYPE, psrc, stride, out0, out1, out2, out3, out4) \ { \ LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ out4 = LD_V(RTYPE, (psrc) + 4 * stride); \ } #define LD_UB5(...) LD_V5(v16u8, __VA_ARGS__) #define LD_SB5(...) LD_V5(v16i8, __VA_ARGS__) #define LD_V7(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6) \ { \ LD_V5(RTYPE, (psrc), stride, out0, out1, out2, out3, out4); \ LD_V2(RTYPE, (psrc) + 5 * stride, stride, out5, out6); \ } #define LD_SB7(...) LD_V7(v16i8, __VA_ARGS__) #define LD_V8(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \ out7) \ { \ LD_V4(RTYPE, (psrc), stride, out0, out1, out2, out3); \ LD_V4(RTYPE, (psrc) + 4 * stride, stride, out4, out5, out6, out7); \ } #define LD_UB8(...) LD_V8(v16u8, __VA_ARGS__) #define LD_SB8(...) LD_V8(v16i8, __VA_ARGS__) #define LD_SH8(...) LD_V8(v8i16, __VA_ARGS__) #define LD_V16(RTYPE, psrc, stride, out0, out1, out2, out3, out4, out5, out6, \ out7, out8, out9, out10, out11, out12, out13, out14, out15) \ { \ LD_V8(RTYPE, (psrc), stride, out0, out1, out2, out3, out4, out5, out6, \ out7); \ LD_V8(RTYPE, (psrc) + 8 * stride, stride, out8, out9, out10, out11, out12, \ out13, out14, out15); \ } #define LD_SH16(...) LD_V16(v8i16, __VA_ARGS__) /* Description : Load 4x4 block of signed halfword elements from 1D source data into 4 vectors (Each vector with 4 signed halfwords) Arguments : Input - psrc Outputs - out0, out1, out2, out3 */ #define LD4x4_SH(psrc, out0, out1, out2, out3) \ { \ out0 = LD_SH(psrc); \ out2 = LD_SH(psrc + 8); \ out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ out3 = (v8i16)__msa_ilvl_d((v2i64)out2, (v2i64)out2); \ } /* Description : Store vectors with stride Arguments : Inputs - in0, in1, pdst, stride Details : Store 16 byte elements from 'in0' to (pdst) Store 16 byte elements from 'in1' to (pdst + stride) */ #define ST_V2(RTYPE, in0, in1, pdst, stride) \ { \ ST_V(RTYPE, in0, (pdst)); \ ST_V(RTYPE, in1, (pdst) + stride); \ } #define ST_UB2(...) ST_V2(v16u8, __VA_ARGS__) #define ST_SH2(...) ST_V2(v8i16, __VA_ARGS__) #define ST_SW2(...) ST_V2(v4i32, __VA_ARGS__) #define ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride) \ { \ ST_V2(RTYPE, in0, in1, (pdst), stride); \ ST_V2(RTYPE, in2, in3, (pdst) + 2 * stride, stride); \ } #define ST_UB4(...) ST_V4(v16u8, __VA_ARGS__) #define ST_SH4(...) ST_V4(v8i16, __VA_ARGS__) #define ST_V8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, pdst, stride) \ { \ ST_V4(RTYPE, in0, in1, in2, in3, pdst, stride); \ ST_V4(RTYPE, in4, in5, in6, in7, (pdst) + 4 * stride, stride); \ } #define ST_UB8(...) ST_V8(v16u8, __VA_ARGS__) #define ST_SH8(...) ST_V8(v8i16, __VA_ARGS__) /* Description : Store 2x4 byte block to destination memory from input vector Arguments : Inputs - in, stidx, pdst, stride Details : Index 'stidx' halfword element from 'in' vector is copied to the GP register and stored to (pdst) Index 'stidx+1' halfword element from 'in' vector is copied to the GP register and stored to (pdst + stride) Index 'stidx+2' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 'stidx+3' halfword element from 'in' vector is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST2x4_UB(in, stidx, pdst, stride) \ { \ uint16_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_2x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_h((v8i16)in, (stidx)); \ out1_m = __msa_copy_u_h((v8i16)in, (stidx + 1)); \ out2_m = __msa_copy_u_h((v8i16)in, (stidx + 2)); \ out3_m = __msa_copy_u_h((v8i16)in, (stidx + 3)); \ \ SH(out0_m, pblk_2x4_m); \ SH(out1_m, pblk_2x4_m + stride); \ SH(out2_m, pblk_2x4_m + 2 * stride); \ SH(out3_m, pblk_2x4_m + 3 * stride); \ } /* Description : Store 4x2 byte block to destination memory from input vector Arguments : Inputs - in, pdst, stride Details : Index 0 word element from 'in' vector is copied to the GP register and stored to (pdst) Index 1 word element from 'in' vector is copied to the GP register and stored to (pdst + stride) */ #define ST4x2_UB(in, pdst, stride) \ { \ uint32_t out0_m, out1_m; \ uint8_t *pblk_4x2_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_w((v4i32)in, 0); \ out1_m = __msa_copy_u_w((v4i32)in, 1); \ \ SW(out0_m, pblk_4x2_m); \ SW(out1_m, pblk_4x2_m + stride); \ } /* Description : Store 4x4 byte block to destination memory from input vector Arguments : Inputs - in0, in1, pdst, stride Details : 'Idx0' word element from input vector 'in0' is copied to the GP register and stored to (pdst) 'Idx1' word element from input vector 'in0' is copied to the GP register and stored to (pdst + stride) 'Idx2' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 2 * stride) 'Idx3' word element from input vector 'in0' is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST4x4_UB(in0, in1, idx0, idx1, idx2, idx3, pdst, stride) \ { \ uint32_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_4x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_w((v4i32)in0, idx0); \ out1_m = __msa_copy_u_w((v4i32)in0, idx1); \ out2_m = __msa_copy_u_w((v4i32)in1, idx2); \ out3_m = __msa_copy_u_w((v4i32)in1, idx3); \ \ SW4(out0_m, out1_m, out2_m, out3_m, pblk_4x4_m, stride); \ } #define ST4x8_UB(in0, in1, pdst, stride) \ { \ uint8_t *pblk_4x8 = (uint8_t *)(pdst); \ \ ST4x4_UB(in0, in0, 0, 1, 2, 3, pblk_4x8, stride); \ ST4x4_UB(in1, in1, 0, 1, 2, 3, pblk_4x8 + 4 * stride, stride); \ } /* Description : Store 8x1 byte block to destination memory from input vector Arguments : Inputs - in, pdst Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst) */ #define ST8x1_UB(in, pdst) \ { \ uint64_t out0_m; \ \ out0_m = __msa_copy_u_d((v2i64)in, 0); \ SD(out0_m, pdst); \ } /* Description : Store 8x2 byte block to destination memory from input vector Arguments : Inputs - in, pdst, stride Details : Index 0 double word element from 'in' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in' vector is copied to the GP register and stored to (pdst + stride) */ #define ST8x2_UB(in, pdst, stride) \ { \ uint64_t out0_m, out1_m; \ uint8_t *pblk_8x2_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_d((v2i64)in, 0); \ out1_m = __msa_copy_u_d((v2i64)in, 1); \ \ SD(out0_m, pblk_8x2_m); \ SD(out1_m, pblk_8x2_m + stride); \ } /* Description : Store 8x4 byte block to destination memory from input vectors Arguments : Inputs - in0, in1, pdst, stride Details : Index 0 double word element from 'in0' vector is copied to the GP register and stored to (pdst) Index 1 double word element from 'in0' vector is copied to the GP register and stored to (pdst + stride) Index 0 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 2 * stride) Index 1 double word element from 'in1' vector is copied to the GP register and stored to (pdst + 3 * stride) */ #define ST8x4_UB(in0, in1, pdst, stride) \ { \ uint64_t out0_m, out1_m, out2_m, out3_m; \ uint8_t *pblk_8x4_m = (uint8_t *)(pdst); \ \ out0_m = __msa_copy_u_d((v2i64)in0, 0); \ out1_m = __msa_copy_u_d((v2i64)in0, 1); \ out2_m = __msa_copy_u_d((v2i64)in1, 0); \ out3_m = __msa_copy_u_d((v2i64)in1, 1); \ \ SD4(out0_m, out1_m, out2_m, out3_m, pblk_8x4_m, stride); \ } /* Description : average with rounding (in0 + in1 + 1) / 2. Arguments : Inputs - in0, in1, in2, in3, Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned byte element from 'in0' vector is added with each unsigned byte element from 'in1' vector. Then the average with rounding is calculated and written to 'out0' */ #define AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_aver_u_b((v16u8)in0, (v16u8)in1); \ out1 = (RTYPE)__msa_aver_u_b((v16u8)in2, (v16u8)in3); \ } #define AVER_UB2_UB(...) AVER_UB2(v16u8, __VA_ARGS__) #define AVER_UB4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ AVER_UB2(RTYPE, in0, in1, in2, in3, out0, out1) \ AVER_UB2(RTYPE, in4, in5, in6, in7, out2, out3) \ } #define AVER_UB4_UB(...) AVER_UB4(v16u8, __VA_ARGS__) /* Description : Immediate number of elements to slide with zero Arguments : Inputs - in0, in1, slide_val Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'zero_m' vector are slid into 'in0' by value specified in the 'slide_val' */ #define SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val) \ { \ v16i8 zero_m = { 0 }; \ out0 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in0, slide_val); \ out1 = (RTYPE)__msa_sldi_b((v16i8)zero_m, (v16i8)in1, slide_val); \ } #define SLDI_B2_0_SW(...) SLDI_B2_0(v4i32, __VA_ARGS__) #define SLDI_B4_0(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, \ slide_val) \ { \ SLDI_B2_0(RTYPE, in0, in1, out0, out1, slide_val); \ SLDI_B2_0(RTYPE, in2, in3, out2, out3, slide_val); \ } #define SLDI_B4_0_UB(...) SLDI_B4_0(v16u8, __VA_ARGS__) /* Description : Immediate number of elements to slide Arguments : Inputs - in0_0, in0_1, in1_0, in1_1, slide_val Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'in0_0' vector are slid into 'in1_0' by value specified in the 'slide_val' */ #define SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \ { \ out0 = (RTYPE)__msa_sldi_b((v16i8)in0_0, (v16i8)in1_0, slide_val); \ out1 = (RTYPE)__msa_sldi_b((v16i8)in0_1, (v16i8)in1_1, slide_val); \ } #define SLDI_B2_UB(...) SLDI_B2(v16u8, __VA_ARGS__) #define SLDI_B2_SH(...) SLDI_B2(v8i16, __VA_ARGS__) #define SLDI_B3(RTYPE, in0_0, in0_1, in0_2, in1_0, in1_1, in1_2, out0, out1, \ out2, slide_val) \ { \ SLDI_B2(RTYPE, in0_0, in0_1, in1_0, in1_1, out0, out1, slide_val) \ out2 = (RTYPE)__msa_sldi_b((v16i8)in0_2, (v16i8)in1_2, slide_val); \ } #define SLDI_B3_SB(...) SLDI_B3(v16i8, __VA_ARGS__) #define SLDI_B3_UH(...) SLDI_B3(v8u16, __VA_ARGS__) /* Description : Shuffle byte vector elements as per mask vector Arguments : Inputs - in0, in1, in2, in3, mask0, mask1 Outputs - out0, out1 Return Type - as per RTYPE Details : Byte elements from 'in0' & 'in1' are copied selectively to 'out0' as per control vector 'mask0' */ #define VSHF_B2(RTYPE, in0, in1, in2, in3, mask0, mask1, out0, out1) \ { \ out0 = (RTYPE)__msa_vshf_b((v16i8)mask0, (v16i8)in1, (v16i8)in0); \ out1 = (RTYPE)__msa_vshf_b((v16i8)mask1, (v16i8)in3, (v16i8)in2); \ } #define VSHF_B2_UB(...) VSHF_B2(v16u8, __VA_ARGS__) #define VSHF_B2_SB(...) VSHF_B2(v16i8, __VA_ARGS__) #define VSHF_B2_UH(...) VSHF_B2(v8u16, __VA_ARGS__) #define VSHF_B2_SH(...) VSHF_B2(v8i16, __VA_ARGS__) #define VSHF_B4(RTYPE, in0, in1, mask0, mask1, mask2, mask3, out0, out1, out2, \ out3) \ { \ VSHF_B2(RTYPE, in0, in1, in0, in1, mask0, mask1, out0, out1); \ VSHF_B2(RTYPE, in0, in1, in0, in1, mask2, mask3, out2, out3); \ } #define VSHF_B4_SB(...) VSHF_B4(v16i8, __VA_ARGS__) #define VSHF_B4_SH(...) VSHF_B4(v8i16, __VA_ARGS__) /* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Unsigned byte elements from 'mult0' are multiplied with unsigned byte elements from 'cnst0' producing a result twice the size of input i.e. unsigned halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_u_h((v16u8)mult0, (v16u8)cnst0); \ out1 = (RTYPE)__msa_dotp_u_h((v16u8)mult1, (v16u8)cnst1); \ } #define DOTP_UB2_UH(...) DOTP_UB2(v8u16, __VA_ARGS__) #define DOTP_UB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_UB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_UB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_UB4_UH(...) DOTP_UB4(v8u16, __VA_ARGS__) /* Description : Dot product of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_h((v16i8)mult0, (v16i8)cnst0); \ out1 = (RTYPE)__msa_dotp_s_h((v16i8)mult1, (v16i8)cnst1); \ } #define DOTP_SB2_SH(...) DOTP_SB2(v8i16, __VA_ARGS__) #define DOTP_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_SB4_SH(...) DOTP_SB4(v8i16, __VA_ARGS__) /* Description : Dot product of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_w((v8i16)mult0, (v8i16)cnst0); \ out1 = (RTYPE)__msa_dotp_s_w((v8i16)mult1, (v8i16)cnst1); \ } #define DOTP_SH2_SW(...) DOTP_SH2(v4i32, __VA_ARGS__) #define DOTP_SH4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DOTP_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DOTP_SH2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DOTP_SH4_SW(...) DOTP_SH4(v4i32, __VA_ARGS__) /* Description : Dot product of word vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed word elements from 'mult0' are multiplied with signed word elements from 'cnst0' producing a result twice the size of input i.e. signed double word. The multiplication result of adjacent odd-even elements are added together and written to the 'out0' vector */ #define DOTP_SW2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dotp_s_d((v4i32)mult0, (v4i32)cnst0); \ out1 = (RTYPE)__msa_dotp_s_d((v4i32)mult1, (v4i32)cnst1); \ } #define DOTP_SW2_SD(...) DOTP_SW2(v2i64, __VA_ARGS__) /* Description : Dot product & addition of byte vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed byte elements from 'mult0' are multiplied with signed byte elements from 'cnst0' producing a result twice the size of input i.e. signed halfword. The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_h((v8i16)out0, (v16i8)mult0, (v16i8)cnst0); \ out1 = (RTYPE)__msa_dpadd_s_h((v8i16)out1, (v16i8)mult1, (v16i8)cnst1); \ } #define DPADD_SB2_SH(...) DPADD_SB2(v8i16, __VA_ARGS__) #define DPADD_SB4(RTYPE, mult0, mult1, mult2, mult3, cnst0, cnst1, cnst2, \ cnst3, out0, out1, out2, out3) \ { \ DPADD_SB2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1); \ DPADD_SB2(RTYPE, mult2, mult3, cnst2, cnst3, out2, out3); \ } #define DPADD_SB4_SH(...) DPADD_SB4(v8i16, __VA_ARGS__) /* Description : Dot product & addition of halfword vector elements Arguments : Inputs - mult0, mult1, cnst0, cnst1 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'mult0' are multiplied with signed halfword elements from 'cnst0' producing a result twice the size of input i.e. signed word. The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SH2(RTYPE, mult0, mult1, cnst0, cnst1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_w((v4i32)out0, (v8i16)mult0, (v8i16)cnst0); \ out1 = (RTYPE)__msa_dpadd_s_w((v4i32)out1, (v8i16)mult1, (v8i16)cnst1); \ } #define DPADD_SH2_SW(...) DPADD_SH2(v4i32, __VA_ARGS__) /* Description : Dot product & addition of double word vector elements Arguments : Inputs - mult0, mult1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed word element from 'mult0' is multiplied with itself producing an intermediate result twice the size of input i.e. signed double word The multiplication result of adjacent odd-even elements are added to the 'out0' vector */ #define DPADD_SD2(RTYPE, mult0, mult1, out0, out1) \ { \ out0 = (RTYPE)__msa_dpadd_s_d((v2i64)out0, (v4i32)mult0, (v4i32)mult0); \ out1 = (RTYPE)__msa_dpadd_s_d((v2i64)out1, (v4i32)mult1, (v4i32)mult1); \ } #define DPADD_SD2_SD(...) DPADD_SD2(v2i64, __VA_ARGS__) /* Description : Minimum values between unsigned elements of either vector are copied to the output vector Arguments : Inputs - in0, in1, min_vec Outputs - in place operation Return Type - as per RTYPE Details : Minimum of unsigned halfword element values from 'in0' and 'min_vec' are written to output vector 'in0' */ #define MIN_UH2(RTYPE, in0, in1, min_vec) \ { \ in0 = (RTYPE)__msa_min_u_h((v8u16)in0, min_vec); \ in1 = (RTYPE)__msa_min_u_h((v8u16)in1, min_vec); \ } #define MIN_UH2_UH(...) MIN_UH2(v8u16, __VA_ARGS__) #define MIN_UH4(RTYPE, in0, in1, in2, in3, min_vec) \ { \ MIN_UH2(RTYPE, in0, in1, min_vec); \ MIN_UH2(RTYPE, in2, in3, min_vec); \ } #define MIN_UH4_UH(...) MIN_UH4(v8u16, __VA_ARGS__) /* Description : Clips all signed halfword elements of input vector between 0 & 255 Arguments : Input - in Output - out_m Return Type - signed halfword */ #define CLIP_SH_0_255(in) \ ({ \ v8i16 max_m = __msa_ldi_h(255); \ v8i16 out_m; \ \ out_m = __msa_maxi_s_h((v8i16)in, 0); \ out_m = __msa_min_s_h((v8i16)max_m, (v8i16)out_m); \ out_m; \ }) #define CLIP_SH2_0_255(in0, in1) \ { \ in0 = CLIP_SH_0_255(in0); \ in1 = CLIP_SH_0_255(in1); \ } #define CLIP_SH4_0_255(in0, in1, in2, in3) \ { \ CLIP_SH2_0_255(in0, in1); \ CLIP_SH2_0_255(in2, in3); \ } /* Description : Horizontal addition of 4 signed word elements of input vector Arguments : Input - in (signed word vector) Output - sum_m (i32 sum) Return Type - signed word (GP) Details : 4 signed word elements of 'in' vector are added together and the resulting integer sum is returned */ #define HADD_SW_S32(in) \ ({ \ v2i64 res0_m, res1_m; \ int32_t sum_m; \ \ res0_m = __msa_hadd_s_d((v4i32)in, (v4i32)in); \ res1_m = __msa_splati_d(res0_m, 1); \ res0_m = res0_m + res1_m; \ sum_m = __msa_copy_s_w((v4i32)res0_m, 0); \ sum_m; \ }) /* Description : Horizontal addition of 4 unsigned word elements Arguments : Input - in (unsigned word vector) Output - sum_m (u32 sum) Return Type - unsigned word (GP) Details : 4 unsigned word elements of 'in' vector are added together and the resulting integer sum is returned */ #define HADD_UW_U32(in) \ ({ \ v2u64 res0_m, res1_m; \ uint32_t sum_m; \ \ res0_m = __msa_hadd_u_d((v4u32)in, (v4u32)in); \ res1_m = (v2u64)__msa_splati_d((v2i64)res0_m, 1); \ res0_m += res1_m; \ sum_m = __msa_copy_u_w((v4i32)res0_m, 0); \ sum_m; \ }) /* Description : Horizontal addition of 8 unsigned halfword elements Arguments : Input - in (unsigned halfword vector) Output - sum_m (u32 sum) Return Type - unsigned word Details : 8 unsigned halfword elements of 'in' vector are added together and the resulting integer sum is returned */ #define HADD_UH_U32(in) \ ({ \ v4u32 res_m; \ uint32_t sum_m; \ \ res_m = __msa_hadd_u_w((v8u16)in, (v8u16)in); \ sum_m = HADD_UW_U32(res_m); \ sum_m; \ }) /* Description : Horizontal addition of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is added to even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0' */ #define HADD_UB2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hadd_u_h((v16u8)in0, (v16u8)in0); \ out1 = (RTYPE)__msa_hadd_u_h((v16u8)in1, (v16u8)in1); \ } #define HADD_UB2_UH(...) HADD_UB2(v8u16, __VA_ARGS__) #define HADD_UB4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3) \ { \ HADD_UB2(RTYPE, in0, in1, out0, out1); \ HADD_UB2(RTYPE, in2, in3, out2, out3); \ } #define HADD_UB4_UH(...) HADD_UB4(v8u16, __VA_ARGS__) /* Description : Horizontal subtraction of unsigned byte vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each unsigned odd byte element from 'in0' is subtracted from even unsigned byte element from 'in0' (pairwise) and the halfword result is written to 'out0' */ #define HSUB_UB2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hsub_u_h((v16u8)in0, (v16u8)in0); \ out1 = (RTYPE)__msa_hsub_u_h((v16u8)in1, (v16u8)in1); \ } #define HSUB_UB2_SH(...) HSUB_UB2(v8i16, __VA_ARGS__) /* Description : SAD (Sum of Absolute Difference) Arguments : Inputs - in0, in1, ref0, ref1 Outputs - sad_m (halfword vector) Return Type - unsigned halfword Details : Absolute difference of all the byte elements from 'in0' with 'ref0' is calculated and preserved in 'diff0'. Then even-odd pairs are added together to generate 8 halfword results. */ #define SAD_UB2_UH(in0, in1, ref0, ref1) \ ({ \ v16u8 diff0_m, diff1_m; \ v8u16 sad_m = { 0 }; \ \ diff0_m = __msa_asub_u_b((v16u8)in0, (v16u8)ref0); \ diff1_m = __msa_asub_u_b((v16u8)in1, (v16u8)ref1); \ \ sad_m += __msa_hadd_u_h((v16u8)diff0_m, (v16u8)diff0_m); \ sad_m += __msa_hadd_u_h((v16u8)diff1_m, (v16u8)diff1_m); \ \ sad_m; \ }) /* Description : Horizontal subtraction of signed halfword vector elements Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Each signed odd halfword element from 'in0' is subtracted from even signed halfword element from 'in0' (pairwise) and the word result is written to 'out0' */ #define HSUB_UH2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_hsub_s_w((v8i16)in0, (v8i16)in0); \ out1 = (RTYPE)__msa_hsub_s_w((v8i16)in1, (v8i16)in1); \ } #define HSUB_UH2_SW(...) HSUB_UH2(v4i32, __VA_ARGS__) /* Description : Set element n input vector to GPR value Arguments : Inputs - in0, in1, in2, in3 Output - out Return Type - as per RTYPE Details : Set element 0 in vector 'out' to value specified in 'in0' */ #define INSERT_W2(RTYPE, in0, in1, out) \ { \ out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \ out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \ } #define INSERT_W2_SB(...) INSERT_W2(v16i8, __VA_ARGS__) #define INSERT_W4(RTYPE, in0, in1, in2, in3, out) \ { \ out = (RTYPE)__msa_insert_w((v4i32)out, 0, in0); \ out = (RTYPE)__msa_insert_w((v4i32)out, 1, in1); \ out = (RTYPE)__msa_insert_w((v4i32)out, 2, in2); \ out = (RTYPE)__msa_insert_w((v4i32)out, 3, in3); \ } #define INSERT_W4_UB(...) INSERT_W4(v16u8, __VA_ARGS__) #define INSERT_W4_SB(...) INSERT_W4(v16i8, __VA_ARGS__) #define INSERT_D2(RTYPE, in0, in1, out) \ { \ out = (RTYPE)__msa_insert_d((v2i64)out, 0, in0); \ out = (RTYPE)__msa_insert_d((v2i64)out, 1, in1); \ } #define INSERT_D2_UB(...) INSERT_D2(v16u8, __VA_ARGS__) #define INSERT_D2_SB(...) INSERT_D2(v16i8, __VA_ARGS__) #define INSERT_D2_SH(...) INSERT_D2(v8i16, __VA_ARGS__) /* Description : Interleave even byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_b((v16i8)in1, (v16i8)in0); \ out1 = (RTYPE)__msa_ilvev_b((v16i8)in3, (v16i8)in2); \ } #define ILVEV_B2_UB(...) ILVEV_B2(v16u8, __VA_ARGS__) #define ILVEV_B2_SH(...) ILVEV_B2(v8i16, __VA_ARGS__) /* Description : Interleave even halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_h((v8i16)in1, (v8i16)in0); \ out1 = (RTYPE)__msa_ilvev_h((v8i16)in3, (v8i16)in2); \ } #define ILVEV_H2_UB(...) ILVEV_H2(v16u8, __VA_ARGS__) #define ILVEV_H2_SH(...) ILVEV_H2(v8i16, __VA_ARGS__) #define ILVEV_H2_SW(...) ILVEV_H2(v4i32, __VA_ARGS__) /* Description : Interleave even word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even word elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_w((v4i32)in1, (v4i32)in0); \ out1 = (RTYPE)__msa_ilvev_w((v4i32)in3, (v4i32)in2); \ } #define ILVEV_W2_SB(...) ILVEV_W2(v16i8, __VA_ARGS__) /* Description : Interleave even double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double word elements of 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvev_d((v2i64)in1, (v2i64)in0); \ out1 = (RTYPE)__msa_ilvev_d((v2i64)in3, (v2i64)in2); \ } #define ILVEV_D2_UB(...) ILVEV_D2(v16u8, __VA_ARGS__) /* Description : Interleave left half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of byte elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvl_b((v16i8)in2, (v16i8)in3); \ } #define ILVL_B2_UB(...) ILVL_B2(v16u8, __VA_ARGS__) #define ILVL_B2_SB(...) ILVL_B2(v16i8, __VA_ARGS__) #define ILVL_B2_UH(...) ILVL_B2(v8u16, __VA_ARGS__) #define ILVL_B2_SH(...) ILVL_B2(v8i16, __VA_ARGS__) #define ILVL_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVL_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVL_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVL_B4_SB(...) ILVL_B4(v16i8, __VA_ARGS__) #define ILVL_B4_SH(...) ILVL_B4(v8i16, __VA_ARGS__) #define ILVL_B4_UH(...) ILVL_B4(v8u16, __VA_ARGS__) /* Description : Interleave left half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvl_h((v8i16)in2, (v8i16)in3); \ } #define ILVL_H2_SH(...) ILVL_H2(v8i16, __VA_ARGS__) #define ILVL_H2_SW(...) ILVL_H2(v4i32, __VA_ARGS__) /* Description : Interleave left half of word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Left half of word elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVL_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvl_w((v4i32)in2, (v4i32)in3); \ } #define ILVL_W2_UB(...) ILVL_W2(v16u8, __VA_ARGS__) #define ILVL_W2_SH(...) ILVL_W2(v8i16, __VA_ARGS__) /* Description : Interleave right half of byte elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements of 'in0' and 'in1' are interleaved and written to out0. */ #define ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvr_b((v16i8)in2, (v16i8)in3); \ } #define ILVR_B2_UB(...) ILVR_B2(v16u8, __VA_ARGS__) #define ILVR_B2_SB(...) ILVR_B2(v16i8, __VA_ARGS__) #define ILVR_B2_UH(...) ILVR_B2(v8u16, __VA_ARGS__) #define ILVR_B2_SH(...) ILVR_B2(v8i16, __VA_ARGS__) #define ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_B4_UB(...) ILVR_B4(v16u8, __VA_ARGS__) #define ILVR_B4_SB(...) ILVR_B4(v16i8, __VA_ARGS__) #define ILVR_B4_UH(...) ILVR_B4(v8u16, __VA_ARGS__) #define ILVR_B4_SH(...) ILVR_B4(v8i16, __VA_ARGS__) #define ILVR_B8(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, \ in11, in12, in13, in14, in15, out0, out1, out2, out3, out4, \ out5, out6, out7) \ { \ ILVR_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3); \ ILVR_B4(RTYPE, in8, in9, in10, in11, in12, in13, in14, in15, out4, out5, \ out6, out7); \ } #define ILVR_B8_UH(...) ILVR_B8(v8u16, __VA_ARGS__) /* Description : Interleave right half of halfword elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of halfword elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvr_h((v8i16)in2, (v8i16)in3); \ } #define ILVR_H2_SH(...) ILVR_H2(v8i16, __VA_ARGS__) #define ILVR_H2_SW(...) ILVR_H2(v4i32, __VA_ARGS__) #define ILVR_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_H4_SH(...) ILVR_H4(v8i16, __VA_ARGS__) #define ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvr_w((v4i32)in2, (v4i32)in3); \ } #define ILVR_W2_UB(...) ILVR_W2(v16u8, __VA_ARGS__) #define ILVR_W2_SH(...) ILVR_W2(v8i16, __VA_ARGS__) #define ILVR_W4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_W2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_W2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_W4_UB(...) ILVR_W4(v16u8, __VA_ARGS__) /* Description : Interleave right half of double word elements from vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of double word elements of 'in0' and 'in1' are interleaved and written to 'out0'. */ #define ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_d((v2i64)(in0), (v2i64)(in1)); \ out1 = (RTYPE)__msa_ilvr_d((v2i64)(in2), (v2i64)(in3)); \ } #define ILVR_D2_UB(...) ILVR_D2(v16u8, __VA_ARGS__) #define ILVR_D2_SB(...) ILVR_D2(v16i8, __VA_ARGS__) #define ILVR_D2_SH(...) ILVR_D2(v8i16, __VA_ARGS__) #define ILVR_D3(RTYPE, in0, in1, in2, in3, in4, in5, out0, out1, out2) \ { \ ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ out2 = (RTYPE)__msa_ilvr_d((v2i64)(in4), (v2i64)(in5)); \ } #define ILVR_D3_SB(...) ILVR_D3(v16i8, __VA_ARGS__) #define ILVR_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ILVR_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ ILVR_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ILVR_D4_SB(...) ILVR_D4(v16i8, __VA_ARGS__) #define ILVR_D4_UB(...) ILVR_D4(v16u8, __VA_ARGS__) /* Description : Interleave both left and right half of input vectors Arguments : Inputs - in0, in1 Outputs - out0, out1 Return Type - as per RTYPE Details : Right half of byte elements from 'in0' and 'in1' are interleaved and written to 'out0' */ #define ILVRL_B2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_ilvl_b((v16i8)in0, (v16i8)in1); \ } #define ILVRL_B2_UB(...) ILVRL_B2(v16u8, __VA_ARGS__) #define ILVRL_B2_SB(...) ILVRL_B2(v16i8, __VA_ARGS__) #define ILVRL_B2_UH(...) ILVRL_B2(v8u16, __VA_ARGS__) #define ILVRL_B2_SH(...) ILVRL_B2(v8i16, __VA_ARGS__) #define ILVRL_H2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ilvl_h((v8i16)in0, (v8i16)in1); \ } #define ILVRL_H2_SH(...) ILVRL_H2(v8i16, __VA_ARGS__) #define ILVRL_H2_SW(...) ILVRL_H2(v4i32, __VA_ARGS__) #define ILVRL_W2(RTYPE, in0, in1, out0, out1) \ { \ out0 = (RTYPE)__msa_ilvr_w((v4i32)in0, (v4i32)in1); \ out1 = (RTYPE)__msa_ilvl_w((v4i32)in0, (v4i32)in1); \ } #define ILVRL_W2_UB(...) ILVRL_W2(v16u8, __VA_ARGS__) #define ILVRL_W2_SB(...) ILVRL_W2(v16i8, __VA_ARGS__) #define ILVRL_W2_SH(...) ILVRL_W2(v8i16, __VA_ARGS__) #define ILVRL_W2_SW(...) ILVRL_W2(v4i32, __VA_ARGS__) /* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range. The results are written in place */ #define SAT_UH2(RTYPE, in0, in1, sat_val) \ { \ in0 = (RTYPE)__msa_sat_u_h((v8u16)in0, sat_val); \ in1 = (RTYPE)__msa_sat_u_h((v8u16)in1, sat_val); \ } #define SAT_UH2_UH(...) SAT_UH2(v8u16, __VA_ARGS__) #define SAT_UH4(RTYPE, in0, in1, in2, in3, sat_val) \ { \ SAT_UH2(RTYPE, in0, in1, sat_val); \ SAT_UH2(RTYPE, in2, in3, sat_val) \ } #define SAT_UH4_UH(...) SAT_UH4(v8u16, __VA_ARGS__) /* Description : Saturate the halfword element values to the max unsigned value of (sat_val + 1) bits The element data width remains unchanged Arguments : Inputs - in0, in1, sat_val Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned halfword element from 'in0' is saturated to the value generated with (sat_val + 1) bit range The results are written in place */ #define SAT_SH2(RTYPE, in0, in1, sat_val) \ { \ in0 = (RTYPE)__msa_sat_s_h((v8i16)in0, sat_val); \ in1 = (RTYPE)__msa_sat_s_h((v8i16)in1, sat_val); \ } #define SAT_SH2_SH(...) SAT_SH2(v8i16, __VA_ARGS__) #define SAT_SH4(RTYPE, in0, in1, in2, in3, sat_val) \ { \ SAT_SH2(RTYPE, in0, in1, sat_val); \ SAT_SH2(RTYPE, in2, in3, sat_val); \ } #define SAT_SH4_SH(...) SAT_SH4(v8i16, __VA_ARGS__) /* Description : Indexed halfword element values are replicated to all elements in output vector Arguments : Inputs - in, idx0, idx1 Outputs - out0, out1 Return Type - as per RTYPE Details : 'idx0' element value from 'in' vector is replicated to all elements in 'out0' vector Valid index range for halfword operation is 0-7 */ #define SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1) \ { \ out0 = (RTYPE)__msa_splati_h((v8i16)in, idx0); \ out1 = (RTYPE)__msa_splati_h((v8i16)in, idx1); \ } #define SPLATI_H2_SH(...) SPLATI_H2(v8i16, __VA_ARGS__) #define SPLATI_H4(RTYPE, in, idx0, idx1, idx2, idx3, out0, out1, out2, out3) \ { \ SPLATI_H2(RTYPE, in, idx0, idx1, out0, out1); \ SPLATI_H2(RTYPE, in, idx2, idx3, out2, out3); \ } #define SPLATI_H4_SB(...) SPLATI_H4(v16i8, __VA_ARGS__) #define SPLATI_H4_SH(...) SPLATI_H4(v8i16, __VA_ARGS__) /* Description : Pack even byte elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even byte elements of 'in0' are copied to the left half of 'out0' & even byte elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_b((v16i8)in0, (v16i8)in1); \ out1 = (RTYPE)__msa_pckev_b((v16i8)in2, (v16i8)in3); \ } #define PCKEV_B2_SB(...) PCKEV_B2(v16i8, __VA_ARGS__) #define PCKEV_B2_UB(...) PCKEV_B2(v16u8, __VA_ARGS__) #define PCKEV_B2_SH(...) PCKEV_B2(v8i16, __VA_ARGS__) #define PCKEV_B4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ PCKEV_B2(RTYPE, in0, in1, in2, in3, out0, out1); \ PCKEV_B2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define PCKEV_B4_SB(...) PCKEV_B4(v16i8, __VA_ARGS__) #define PCKEV_B4_UB(...) PCKEV_B4(v16u8, __VA_ARGS__) #define PCKEV_B4_SH(...) PCKEV_B4(v8i16, __VA_ARGS__) /* Description : Pack even halfword elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even halfword elements of 'in0' are copied to the left half of 'out0' & even halfword elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_pckev_h((v8i16)in2, (v8i16)in3); \ } #define PCKEV_H2_SH(...) PCKEV_H2(v8i16, __VA_ARGS__) #define PCKEV_H2_SW(...) PCKEV_H2(v4i32, __VA_ARGS__) #define PCKEV_H4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ PCKEV_H2(RTYPE, in0, in1, in2, in3, out0, out1); \ PCKEV_H2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define PCKEV_H4_SH(...) PCKEV_H4(v8i16, __VA_ARGS__) /* Description : Pack even double word elements of vector pairs Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Even double elements of 'in0' are copied to the left half of 'out0' & even double elements of 'in1' are copied to the right half of 'out0'. */ #define PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_pckev_d((v2i64)in0, (v2i64)in1); \ out1 = (RTYPE)__msa_pckev_d((v2i64)in2, (v2i64)in3); \ } #define PCKEV_D2_UB(...) PCKEV_D2(v16u8, __VA_ARGS__) #define PCKEV_D2_SH(...) PCKEV_D2(v8i16, __VA_ARGS__) #define PCKEV_D4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ PCKEV_D2(RTYPE, in0, in1, in2, in3, out0, out1); \ PCKEV_D2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define PCKEV_D4_UB(...) PCKEV_D4(v16u8, __VA_ARGS__) /* Description : Each byte element is logically xor'ed with immediate 128 Arguments : Inputs - in0, in1 Outputs - in place operation Return Type - as per RTYPE Details : Each unsigned byte element from input vector 'in0' is logically xor'ed with 128 and the result is stored in-place. */ #define XORI_B2_128(RTYPE, in0, in1) \ { \ in0 = (RTYPE)__msa_xori_b((v16u8)in0, 128); \ in1 = (RTYPE)__msa_xori_b((v16u8)in1, 128); \ } #define XORI_B2_128_UB(...) XORI_B2_128(v16u8, __VA_ARGS__) #define XORI_B2_128_SB(...) XORI_B2_128(v16i8, __VA_ARGS__) #define XORI_B3_128(RTYPE, in0, in1, in2) \ { \ XORI_B2_128(RTYPE, in0, in1); \ in2 = (RTYPE)__msa_xori_b((v16u8)in2, 128); \ } #define XORI_B3_128_SB(...) XORI_B3_128(v16i8, __VA_ARGS__) #define XORI_B4_128(RTYPE, in0, in1, in2, in3) \ { \ XORI_B2_128(RTYPE, in0, in1); \ XORI_B2_128(RTYPE, in2, in3); \ } #define XORI_B4_128_UB(...) XORI_B4_128(v16u8, __VA_ARGS__) #define XORI_B4_128_SB(...) XORI_B4_128(v16i8, __VA_ARGS__) #define XORI_B7_128(RTYPE, in0, in1, in2, in3, in4, in5, in6) \ { \ XORI_B4_128(RTYPE, in0, in1, in2, in3); \ XORI_B3_128(RTYPE, in4, in5, in6); \ } #define XORI_B7_128_SB(...) XORI_B7_128(v16i8, __VA_ARGS__) /* Description : Average of signed halfword elements -> (a + b) / 2 Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3 Return Type - as per RTYPE Details : Each signed halfword element from 'in0' is added to each signed halfword element of 'in1' with full precision resulting in one extra bit in the result. The result is then divided by 2 and written to 'out0' */ #define AVE_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ out0 = (RTYPE)__msa_ave_s_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_ave_s_h((v8i16)in2, (v8i16)in3); \ out2 = (RTYPE)__msa_ave_s_h((v8i16)in4, (v8i16)in5); \ out3 = (RTYPE)__msa_ave_s_h((v8i16)in6, (v8i16)in7); \ } #define AVE_SH4_SH(...) AVE_SH4(v8i16, __VA_ARGS__) /* Description : Addition of signed halfword elements and signed saturation Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Return Type - as per RTYPE Details : Signed halfword elements from 'in0' are added to signed halfword elements of 'in1'. The result is then signed saturated between halfword data type range */ #define ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1) \ { \ out0 = (RTYPE)__msa_adds_s_h((v8i16)in0, (v8i16)in1); \ out1 = (RTYPE)__msa_adds_s_h((v8i16)in2, (v8i16)in3); \ } #define ADDS_SH2_SH(...) ADDS_SH2(v8i16, __VA_ARGS__) #define ADDS_SH4(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3) \ { \ ADDS_SH2(RTYPE, in0, in1, in2, in3, out0, out1); \ ADDS_SH2(RTYPE, in4, in5, in6, in7, out2, out3); \ } #define ADDS_SH4_SH(...) ADDS_SH4(v8i16, __VA_ARGS__) /* Description : Shift left all elements of vector (generic for all data types) Arguments : Inputs - in0, in1, in2, in3, shift Outputs - in place operation Return Type - as per input vector RTYPE Details : Each element of vector 'in0' is left shifted by 'shift' and the result is written in-place. */ #define SLLI_4V(in0, in1, in2, in3, shift) \ { \ in0 = in0 << shift; \ in1 = in1 << shift; \ in2 = in2 << shift; \ in3 = in3 << shift; \ } /* Description : Arithmetic shift right all elements of vector (generic for all data types) Arguments : Inputs - in0, in1, in2, in3, shift Outputs - in place operation Return Type - as per input vector RTYPE Details : Each element of vector 'in0' is right shifted by 'shift' and the result is written in-place. 'shift' is a GP variable. */ #define SRA_2V(in0, in1, shift) \ { \ in0 = in0 >> shift; \ in1 = in1 >> shift; \ } #define SRA_4V(in0, in1, in2, in3, shift) \ { \ in0 = in0 >> shift; \ in1 = in1 >> shift; \ in2 = in2 >> shift; \ in3 = in3 >> shift; \ } /* Description : Shift right arithmetic rounded words Arguments : Inputs - in0, in1, shift Outputs - in place operation Return Type - as per RTYPE Details : Each element of vector 'in0' is shifted right arithmetically by the number of bits in the corresponding element in the vector 'shift'. The last discarded bit is added to shifted value for rounding and the result is written in-place. 'shift' is a vector. */ #define SRAR_W2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srar_w((v4i32)in0, (v4i32)shift); \ in1 = (RTYPE)__msa_srar_w((v4i32)in1, (v4i32)shift); \ } #define SRAR_W4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRAR_W2(RTYPE, in0, in1, shift) \ SRAR_W2(RTYPE, in2, in3, shift) \ } #define SRAR_W4_SW(...) SRAR_W4(v4i32, __VA_ARGS__) /* Description : Shift right arithmetic rounded (immediate) Arguments : Inputs - in0, in1, shift Outputs - in place operation Return Type - as per RTYPE Details : Each element of vector 'in0' is shifted right arithmetically by the value in 'shift'. The last discarded bit is added to the shifted value for rounding and the result is written in-place. 'shift' is an immediate value. */ #define SRARI_H2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srari_h((v8i16)in0, shift); \ in1 = (RTYPE)__msa_srari_h((v8i16)in1, shift); \ } #define SRARI_H2_UH(...) SRARI_H2(v8u16, __VA_ARGS__) #define SRARI_H2_SH(...) SRARI_H2(v8i16, __VA_ARGS__) #define SRARI_H4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRARI_H2(RTYPE, in0, in1, shift); \ SRARI_H2(RTYPE, in2, in3, shift); \ } #define SRARI_H4_UH(...) SRARI_H4(v8u16, __VA_ARGS__) #define SRARI_H4_SH(...) SRARI_H4(v8i16, __VA_ARGS__) #define SRARI_W2(RTYPE, in0, in1, shift) \ { \ in0 = (RTYPE)__msa_srari_w((v4i32)in0, shift); \ in1 = (RTYPE)__msa_srari_w((v4i32)in1, shift); \ } #define SRARI_W2_SW(...) SRARI_W2(v4i32, __VA_ARGS__) #define SRARI_W4(RTYPE, in0, in1, in2, in3, shift) \ { \ SRARI_W2(RTYPE, in0, in1, shift); \ SRARI_W2(RTYPE, in2, in3, shift); \ } #define SRARI_W4_SW(...) SRARI_W4(v4i32, __VA_ARGS__) /* Description : Logical shift right all elements of vector (immediate) Arguments : Inputs - in0, in1, in2, in3, shift Outputs - out0, out1, out2, out3 Return Type - as per RTYPE Details : Each element of vector 'in0' is right shifted by 'shift' and the result is written in-place. 'shift' is an immediate value. */ #define SRLI_H4(RTYPE, in0, in1, in2, in3, out0, out1, out2, out3, shift) \ { \ out0 = (RTYPE)__msa_srli_h((v8i16)in0, shift); \ out1 = (RTYPE)__msa_srli_h((v8i16)in1, shift); \ out2 = (RTYPE)__msa_srli_h((v8i16)in2, shift); \ out3 = (RTYPE)__msa_srli_h((v8i16)in3, shift); \ } #define SRLI_H4_SH(...) SRLI_H4(v8i16, __VA_ARGS__) /* Description : Multiplication of pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element from 'in0' is multiplied with elements from 'in1' and the result is written to 'out0' */ #define MUL2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 * in1; \ out1 = in2 * in3; \ } #define MUL4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ MUL2(in0, in1, in2, in3, out0, out1); \ MUL2(in4, in5, in6, in7, out2, out3); \ } /* Description : Addition of 2 pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element in 'in0' is added to 'in1' and result is written to 'out0'. */ #define ADD2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 + in1; \ out1 = in2 + in3; \ } #define ADD4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ ADD2(in0, in1, in2, in3, out0, out1); \ ADD2(in4, in5, in6, in7, out2, out3); \ } /* Description : Subtraction of 2 pairs of vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1 Details : Each element in 'in1' is subtracted from 'in0' and result is written to 'out0'. */ #define SUB2(in0, in1, in2, in3, out0, out1) \ { \ out0 = in0 - in1; \ out1 = in2 - in3; \ } #define SUB4(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, out3) \ { \ out0 = in0 - in1; \ out1 = in2 - in3; \ out2 = in4 - in5; \ out3 = in6 - in7; \ } /* Description : Sign extend halfword elements from right half of the vector Arguments : Input - in (halfword vector) Output - out (sign extended word vector) Return Type - signed word Details : Sign bit of halfword elements from input vector 'in' is extracted and interleaved with same vector 'in0' to generate 4 word elements keeping sign intact */ #define UNPCK_R_SH_SW(in, out) \ { \ v8i16 sign_m; \ \ sign_m = __msa_clti_s_h((v8i16)in, 0); \ out = (v4i32)__msa_ilvr_h(sign_m, (v8i16)in); \ } /* Description : Sign extend byte elements from input vector and return halfword results in pair of vectors Arguments : Input - in (byte vector) Outputs - out0, out1 (sign extended halfword vectors) Return Type - signed halfword Details : Sign bit of byte elements from input vector 'in' is extracted and interleaved right with same vector 'in0' to generate 8 signed halfword elements in 'out0' Then interleaved left with same vector 'in0' to generate 8 signed halfword elements in 'out1' */ #define UNPCK_SB_SH(in, out0, out1) \ { \ v16i8 tmp_m; \ \ tmp_m = __msa_clti_s_b((v16i8)in, 0); \ ILVRL_B2_SH(tmp_m, in, out0, out1); \ } /* Description : Zero extend unsigned byte elements to halfword elements Arguments : Input - in (unsigned byte vector) Outputs - out0, out1 (unsigned halfword vectors) Return Type - signed halfword Details : Zero extended right half of vector is returned in 'out0' Zero extended left half of vector is returned in 'out1' */ #define UNPCK_UB_SH(in, out0, out1) \ { \ v16i8 zero_m = { 0 }; \ \ ILVRL_B2_SH(zero_m, in, out0, out1); \ } /* Description : Sign extend halfword elements from input vector and return the result in pair of vectors Arguments : Input - in (halfword vector) Outputs - out0, out1 (sign extended word vectors) Return Type - signed word Details : Sign bit of halfword elements from input vector 'in' is extracted and interleaved right with same vector 'in0' to generate 4 signed word elements in 'out0' Then interleaved left with same vector 'in0' to generate 4 signed word elements in 'out1' */ #define UNPCK_SH_SW(in, out0, out1) \ { \ v8i16 tmp_m; \ \ tmp_m = __msa_clti_s_h((v8i16)in, 0); \ ILVRL_H2_SW(tmp_m, in, out0, out1); \ } /* Description : Butterfly of 4 input vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Details : Butterfly operation */ #define BUTTERFLY_4(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ out0 = in0 + in3; \ out1 = in1 + in2; \ \ out2 = in1 - in2; \ out3 = in0 - in3; \ } /* Description : Butterfly of 8 input vectors Arguments : Inputs - in0 ... in7 Outputs - out0 .. out7 Details : Butterfly operation */ #define BUTTERFLY_8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3, out4, out5, out6, out7) \ { \ out0 = in0 + in7; \ out1 = in1 + in6; \ out2 = in2 + in5; \ out3 = in3 + in4; \ \ out4 = in3 - in4; \ out5 = in2 - in5; \ out6 = in1 - in6; \ out7 = in0 - in7; \ } /* Description : Butterfly of 16 input vectors Arguments : Inputs - in0 ... in15 Outputs - out0 .. out15 Details : Butterfly operation */ #define BUTTERFLY_16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, \ in11, in12, in13, in14, in15, out0, out1, out2, out3, \ out4, out5, out6, out7, out8, out9, out10, out11, out12, \ out13, out14, out15) \ { \ out0 = in0 + in15; \ out1 = in1 + in14; \ out2 = in2 + in13; \ out3 = in3 + in12; \ out4 = in4 + in11; \ out5 = in5 + in10; \ out6 = in6 + in9; \ out7 = in7 + in8; \ \ out8 = in7 - in8; \ out9 = in6 - in9; \ out10 = in5 - in10; \ out11 = in4 - in11; \ out12 = in3 - in12; \ out13 = in2 - in13; \ out14 = in1 - in14; \ out15 = in0 - in15; \ } /* Description : Transpose input 8x8 byte block Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - as per RTYPE */ #define TRANSPOSE8x8_UB(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \ out1, out2, out3, out4, out5, out6, out7) \ { \ v16i8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v16i8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ \ ILVR_B4_SB(in2, in0, in3, in1, in6, in4, in7, in5, tmp0_m, tmp1_m, tmp2_m, \ tmp3_m); \ ILVRL_B2_SB(tmp1_m, tmp0_m, tmp4_m, tmp5_m); \ ILVRL_B2_SB(tmp3_m, tmp2_m, tmp6_m, tmp7_m); \ ILVRL_W2(RTYPE, tmp6_m, tmp4_m, out0, out2); \ ILVRL_W2(RTYPE, tmp7_m, tmp5_m, out4, out6); \ SLDI_B2_0(RTYPE, out0, out2, out1, out3, 8); \ SLDI_B2_0(RTYPE, out4, out6, out5, out7, 8); \ } #define TRANSPOSE8x8_UB_UB(...) TRANSPOSE8x8_UB(v16u8, __VA_ARGS__) /* Description : Transpose 16x8 block into 8x16 with byte elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - unsigned byte */ #define TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, \ in10, in11, in12, in13, in14, in15, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v16u8 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v16u8 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ \ ILVEV_D2_UB(in0, in8, in1, in9, out7, out6); \ ILVEV_D2_UB(in2, in10, in3, in11, out5, out4); \ ILVEV_D2_UB(in4, in12, in5, in13, out3, out2); \ ILVEV_D2_UB(in6, in14, in7, in15, out1, out0); \ \ tmp0_m = (v16u8)__msa_ilvev_b((v16i8)out6, (v16i8)out7); \ tmp4_m = (v16u8)__msa_ilvod_b((v16i8)out6, (v16i8)out7); \ tmp1_m = (v16u8)__msa_ilvev_b((v16i8)out4, (v16i8)out5); \ tmp5_m = (v16u8)__msa_ilvod_b((v16i8)out4, (v16i8)out5); \ out5 = (v16u8)__msa_ilvev_b((v16i8)out2, (v16i8)out3); \ tmp6_m = (v16u8)__msa_ilvod_b((v16i8)out2, (v16i8)out3); \ out7 = (v16u8)__msa_ilvev_b((v16i8)out0, (v16i8)out1); \ tmp7_m = (v16u8)__msa_ilvod_b((v16i8)out0, (v16i8)out1); \ \ ILVEV_H2_UB(tmp0_m, tmp1_m, out5, out7, tmp2_m, tmp3_m); \ out0 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out4 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp1_m, (v8i16)tmp0_m); \ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)out7, (v8i16)out5); \ out2 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out6 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ ILVEV_H2_UB(tmp4_m, tmp5_m, tmp6_m, tmp7_m, tmp2_m, tmp3_m); \ out1 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out5 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ \ tmp2_m = (v16u8)__msa_ilvod_h((v8i16)tmp5_m, (v8i16)tmp4_m); \ tmp3_m = (v16u8)__msa_ilvod_h((v8i16)tmp7_m, (v8i16)tmp6_m); \ out3 = (v16u8)__msa_ilvev_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ out7 = (v16u8)__msa_ilvod_w((v4i32)tmp3_m, (v4i32)tmp2_m); \ } /* Description : Transpose 4x4 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Return Type - signed halfword */ #define TRANSPOSE4x4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 s0_m, s1_m; \ \ ILVR_H2_SH(in1, in0, in3, in2, s0_m, s1_m); \ ILVRL_W2_SH(s1_m, s0_m, out0, out2); \ out1 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out0); \ out3 = (v8i16)__msa_ilvl_d((v2i64)out0, (v2i64)out2); \ } /* Description : Transpose 4x8 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - signed halfword */ #define TRANSPOSE4X8_SH_SH(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, \ out2, out3, out4, out5, out6, out7) \ { \ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v8i16 tmp0_n, tmp1_n, tmp2_n, tmp3_n; \ v8i16 zero_m = { 0 }; \ \ ILVR_H4_SH(in1, in0, in3, in2, in5, in4, in7, in6, tmp0_n, tmp1_n, tmp2_n, \ tmp3_n); \ ILVRL_W2_SH(tmp1_n, tmp0_n, tmp0_m, tmp2_m); \ ILVRL_W2_SH(tmp3_n, tmp2_n, tmp1_m, tmp3_m); \ \ out0 = (v8i16)__msa_ilvr_d((v2i64)tmp1_m, (v2i64)tmp0_m); \ out1 = (v8i16)__msa_ilvl_d((v2i64)tmp1_m, (v2i64)tmp0_m); \ out2 = (v8i16)__msa_ilvr_d((v2i64)tmp3_m, (v2i64)tmp2_m); \ out3 = (v8i16)__msa_ilvl_d((v2i64)tmp3_m, (v2i64)tmp2_m); \ \ out4 = zero_m; \ out5 = zero_m; \ out6 = zero_m; \ out7 = zero_m; \ } /* Description : Transpose 8x4 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - signed halfword */ #define TRANSPOSE8X4_SH_SH(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ \ ILVR_H2_SH(in1, in0, in3, in2, tmp0_m, tmp1_m); \ ILVL_H2_SH(in1, in0, in3, in2, tmp2_m, tmp3_m); \ ILVR_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out0, out2); \ ILVL_W2_SH(tmp1_m, tmp0_m, tmp3_m, tmp2_m, out1, out3); \ } /* Description : Transpose 8x8 block with half word elements in vectors Arguments : Inputs - in0, in1, in2, in3, in4, in5, in6, in7 Outputs - out0, out1, out2, out3, out4, out5, out6, out7 Return Type - as per RTYPE */ #define TRANSPOSE8x8_H(RTYPE, in0, in1, in2, in3, in4, in5, in6, in7, out0, \ out1, out2, out3, out4, out5, out6, out7) \ { \ v8i16 s0_m, s1_m; \ v8i16 tmp0_m, tmp1_m, tmp2_m, tmp3_m; \ v8i16 tmp4_m, tmp5_m, tmp6_m, tmp7_m; \ \ ILVR_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tmp0_m, tmp1_m); \ ILVL_H2_SH(in6, in4, in7, in5, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tmp2_m, tmp3_m); \ ILVR_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tmp4_m, tmp5_m); \ ILVL_H2_SH(in2, in0, in3, in1, s0_m, s1_m); \ ILVRL_H2_SH(s1_m, s0_m, tmp6_m, tmp7_m); \ PCKEV_D4(RTYPE, tmp0_m, tmp4_m, tmp1_m, tmp5_m, tmp2_m, tmp6_m, tmp3_m, \ tmp7_m, out0, out2, out4, out6); \ out1 = (RTYPE)__msa_pckod_d((v2i64)tmp0_m, (v2i64)tmp4_m); \ out3 = (RTYPE)__msa_pckod_d((v2i64)tmp1_m, (v2i64)tmp5_m); \ out5 = (RTYPE)__msa_pckod_d((v2i64)tmp2_m, (v2i64)tmp6_m); \ out7 = (RTYPE)__msa_pckod_d((v2i64)tmp3_m, (v2i64)tmp7_m); \ } #define TRANSPOSE8x8_SH_SH(...) TRANSPOSE8x8_H(v8i16, __VA_ARGS__) /* Description : Transpose 4x4 block with word elements in vectors Arguments : Inputs - in0, in1, in2, in3 Outputs - out0, out1, out2, out3 Return Type - signed word */ #define TRANSPOSE4x4_SW_SW(in0, in1, in2, in3, out0, out1, out2, out3) \ { \ v4i32 s0_m, s1_m, s2_m, s3_m; \ \ ILVRL_W2_SW(in1, in0, s0_m, s1_m); \ ILVRL_W2_SW(in3, in2, s2_m, s3_m); \ \ out0 = (v4i32)__msa_ilvr_d((v2i64)s2_m, (v2i64)s0_m); \ out1 = (v4i32)__msa_ilvl_d((v2i64)s2_m, (v2i64)s0_m); \ out2 = (v4i32)__msa_ilvr_d((v2i64)s3_m, (v2i64)s1_m); \ out3 = (v4i32)__msa_ilvl_d((v2i64)s3_m, (v2i64)s1_m); \ } /* Description : Add block 4x4 Arguments : Inputs - in0, in1, in2, in3, pdst, stride Details : Least significant 4 bytes from each input vector are added to the destination bytes, clipped between 0-255 and stored. */ #define ADDBLK_ST4x4_UB(in0, in1, in2, in3, pdst, stride) \ { \ uint32_t src0_m, src1_m, src2_m, src3_m; \ v8i16 inp0_m, inp1_m, res0_m, res1_m; \ v16i8 dst0_m = { 0 }; \ v16i8 dst1_m = { 0 }; \ v16i8 zero_m = { 0 }; \ \ ILVR_D2_SH(in1, in0, in3, in2, inp0_m, inp1_m) \ LW4(pdst, stride, src0_m, src1_m, src2_m, src3_m); \ INSERT_W2_SB(src0_m, src1_m, dst0_m); \ INSERT_W2_SB(src2_m, src3_m, dst1_m); \ ILVR_B2_SH(zero_m, dst0_m, zero_m, dst1_m, res0_m, res1_m); \ ADD2(res0_m, inp0_m, res1_m, inp1_m, res0_m, res1_m); \ CLIP_SH2_0_255(res0_m, res1_m); \ PCKEV_B2_SB(res0_m, res0_m, res1_m, res1_m, dst0_m, dst1_m); \ ST4x4_UB(dst0_m, dst1_m, 0, 1, 0, 1, pdst, stride); \ } /* Description : Pack even elements of input vectors & xor with 128 Arguments : Inputs - in0, in1 Output - out_m Return Type - unsigned byte Details : Signed byte even elements from 'in0' and 'in1' are packed together in one vector and the resulting vector is xor'ed with 128 to shift the range from signed to unsigned byte */ #define PCKEV_XORI128_UB(in0, in1) \ ({ \ v16u8 out_m; \ \ out_m = (v16u8)__msa_pckev_b((v16i8)in1, (v16i8)in0); \ out_m = (v16u8)__msa_xori_b((v16u8)out_m, 128); \ out_m; \ }) /* Description : Converts inputs to unsigned bytes, interleave, average & store as 8x4 unsigned byte block Arguments : Inputs - in0, in1, in2, in3, dst0, dst1, pdst, stride */ #define CONVERT_UB_AVG_ST8x4_UB(in0, in1, in2, in3, dst0, dst1, pdst, stride) \ { \ v16u8 tmp0_m, tmp1_m; \ uint8_t *pdst_m = (uint8_t *)(pdst); \ \ tmp0_m = PCKEV_XORI128_UB(in0, in1); \ tmp1_m = PCKEV_XORI128_UB(in2, in3); \ AVER_UB2_UB(tmp0_m, dst0, tmp1_m, dst1, tmp0_m, tmp1_m); \ ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \ } /* Description : Pack even byte elements and store byte vector in destination memory Arguments : Inputs - in0, in1, pdst */ #define PCKEV_ST_SB(in0, in1, pdst) \ { \ v16i8 tmp_m; \ \ tmp_m = __msa_pckev_b((v16i8)in1, (v16i8)in0); \ ST_SB(tmp_m, (pdst)); \ } /* Description : Horizontal 2 tap filter kernel code Arguments : Inputs - in0, in1, mask, coeff, shift */ #define HORIZ_2TAP_FILT_UH(in0, in1, mask, coeff, shift) \ ({ \ v16i8 tmp0_m; \ v8u16 tmp1_m; \ \ tmp0_m = __msa_vshf_b((v16i8)mask, (v16i8)in1, (v16i8)in0); \ tmp1_m = __msa_dotp_u_h((v16u8)tmp0_m, (v16u8)coeff); \ tmp1_m = (v8u16)__msa_srari_h((v8i16)tmp1_m, shift); \ \ tmp1_m; \ }) #endif // VPX_VPX_DSP_MIPS_MACROS_MSA_H_ libvpx-1.8.2/vpx_dsp/mips/sad_mmi.c000066400000000000000000001216601357355204000172500ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/asmdefs_mmi.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #define SAD_SRC_REF_ABS_SUB_64 \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_REF_ABS_SUB_32 \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_REF_ABS_SUB_16 \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_REF_ABS_SUB_8 \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #if _MIPS_SIM == _ABIO32 #define SAD_SRC_REF_ABS_SUB_4 \ "ulw %[tmp0], 0x00(%[src]) \n\t" \ "mtc1 %[tmp0], %[ftmp1] \n\t" \ "ulw %[tmp0], 0x00(%[ref]) \n\t" \ "mtc1 %[tmp0], %[ftmp2] \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "mthc1 $0, %[ftmp1] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */ #define SAD_SRC_REF_ABS_SUB_4 \ "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" \ "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \ "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "mthc1 $0, %[ftmp1] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #endif /* _MIPS_SIM == _ABIO32 */ #define SAD_SRC_AVGREF_ABS_SUB_64 \ "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x27(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x20(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x2f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x28(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x27(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x20(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x2f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x28(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x27(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x20(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x2f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x28(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x37(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x30(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x3f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x38(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x37(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x30(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x3f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x38(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x37(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x30(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x3f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x38(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_AVGREF_ABS_SUB_32 \ "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x17(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x10(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x1f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x18(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x17(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x10(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x1f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x18(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_AVGREF_ABS_SUB_16 \ "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp3], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp3], 0x00(%[ref]) \n\t" \ "gsldlc1 %[ftmp4], 0x0f(%[ref]) \n\t" \ "gsldrc1 %[ftmp4], 0x08(%[ref]) \n\t" \ "pavgb %[ftmp3], %[ftmp1], %[ftmp3] \n\t" \ "pavgb %[ftmp4], %[ftmp2], %[ftmp4] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[src]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp3] \n\t" \ "pasubub %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "biadd %[ftmp2], %[ftmp2] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "paddw %[ftmp5], %[ftmp5], %[ftmp2] \n\t" #define SAD_SRC_AVGREF_ABS_SUB_8 \ "gsldlc1 %[ftmp1], 0x07(%[second_pred]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ "gsldlc1 %[ftmp2], 0x07(%[ref]) \n\t" \ "gsldrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #if _MIPS_SIM == _ABIO32 #define SAD_SRC_AVGREF_ABS_SUB_4 \ "ulw %[tmp0], 0x00(%[second_pred]) \n\t" \ "mtc1 %[tmp0], %[ftmp1] \n\t" \ "ulw %[tmp0], 0x00(%[ref]) \n\t" \ "mtc1 %[tmp0], %[ftmp2] \n\t" \ "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "mthc1 $0, %[ftmp1] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #else /* _MIPS_SIM == _ABI64 || _MIPS_SIM == _ABIN32 */ #define SAD_SRC_AVGREF_ABS_SUB_4 \ "gslwlc1 %[ftmp1], 0x03(%[second_pred]) \n\t" \ "gslwrc1 %[ftmp1], 0x00(%[second_pred]) \n\t" \ "gslwlc1 %[ftmp2], 0x03(%[ref]) \n\t" \ "gslwrc1 %[ftmp2], 0x00(%[ref]) \n\t" \ "pavgb %[ftmp2], %[ftmp1], %[ftmp2] \n\t" \ "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" \ "pasubub %[ftmp1], %[ftmp1], %[ftmp2] \n\t" \ "mthc1 $0, %[ftmp1] \n\t" \ "biadd %[ftmp1], %[ftmp1] \n\t" \ "paddw %[ftmp3], %[ftmp3], %[ftmp1] \n\t" #endif /* _MIPS_SIM == _ABIO32 */ // depending on call sites, pass **ref_array to avoid & in subsequent call and // de-dup with 4D below. #define sadMxNxK_mmi(m, n, k) \ void vpx_sad##m##x##n##x##k##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref_array, int ref_stride, \ uint32_t *sad_array) { \ int i; \ for (i = 0; i < (k); ++i) \ sad_array[i] = \ vpx_sad##m##x##n##_mmi(src, src_stride, &ref_array[i], ref_stride); \ } // This appears to be equivalent to the above when k == 4 and refs is const #define sadMxNx4D_mmi(m, n) \ void vpx_sad##m##x##n##x4d_mmi(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ for (i = 0; i < 4; ++i) \ sad_array[i] = \ vpx_sad##m##x##n##_mmi(src, src_stride, ref_array[i], ref_stride); \ } static inline unsigned int vpx_sad64x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_REF_ABS_SUB_64 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_REF_ABS_SUB_64 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad64xN(H) \ unsigned int vpx_sad64x##H##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return vpx_sad64x(src, src_stride, ref, ref_stride, H); \ } vpx_sad64xN(64); vpx_sad64xN(32); sadMxNx4D_mmi(64, 64); sadMxNx4D_mmi(64, 32); static inline unsigned int vpx_sad_avg64x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_AVGREF_ABS_SUB_64 MMI_ADDIU(%[second_pred], %[second_pred], 0x40) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_AVGREF_ABS_SUB_64 MMI_ADDIU(%[second_pred], %[second_pred], 0x40) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [second_pred]"+&r"((mips_reg)second_pred), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad_avg64xN(H) \ unsigned int vpx_sad64x##H##_avg_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ return vpx_sad_avg64x(src, src_stride, ref, ref_stride, second_pred, H); \ } vpx_sad_avg64xN(64); vpx_sad_avg64xN(32); static inline unsigned int vpx_sad32x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_REF_ABS_SUB_32 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_REF_ABS_SUB_32 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad32xN(H) \ unsigned int vpx_sad32x##H##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return vpx_sad32x(src, src_stride, ref, ref_stride, H); \ } vpx_sad32xN(64); vpx_sad32xN(32); vpx_sad32xN(16); sadMxNx4D_mmi(32, 64); sadMxNx4D_mmi(32, 32); sadMxNx4D_mmi(32, 16); static inline unsigned int vpx_sad_avg32x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_AVGREF_ABS_SUB_32 MMI_ADDIU(%[second_pred], %[second_pred], 0x20) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_AVGREF_ABS_SUB_32 MMI_ADDIU(%[second_pred], %[second_pred], 0x20) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [second_pred]"+&r"((mips_reg)second_pred), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad_avg32xN(H) \ unsigned int vpx_sad32x##H##_avg_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ return vpx_sad_avg32x(src, src_stride, ref, ref_stride, second_pred, H); \ } vpx_sad_avg32xN(64); vpx_sad_avg32xN(32); vpx_sad_avg32xN(16); static inline unsigned int vpx_sad16x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_REF_ABS_SUB_16 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_REF_ABS_SUB_16 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad16xN(H) \ unsigned int vpx_sad16x##H##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return vpx_sad16x(src, src_stride, ref, ref_stride, H); \ } vpx_sad16xN(32); vpx_sad16xN(16); vpx_sad16xN(8); sadMxNxK_mmi(16, 16, 3); sadMxNxK_mmi(16, 16, 8); sadMxNxK_mmi(16, 8, 3); sadMxNxK_mmi(16, 8, 8); sadMxNx4D_mmi(16, 32); sadMxNx4D_mmi(16, 16); sadMxNx4D_mmi(16, 8); static inline unsigned int vpx_sad_avg16x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3, ftmp4, ftmp5; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_AVGREF_ABS_SUB_16 MMI_ADDIU(%[second_pred], %[second_pred], 0x10) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_AVGREF_ABS_SUB_16 MMI_ADDIU(%[second_pred], %[second_pred], 0x10) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp5] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [ftmp4]"=&f"(ftmp4), [ftmp5]"=&f"(ftmp5), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [second_pred]"+&r"((mips_reg)second_pred), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad_avg16xN(H) \ unsigned int vpx_sad16x##H##_avg_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ return vpx_sad_avg16x(src, src_stride, ref, ref_stride, second_pred, H); \ } vpx_sad_avg16xN(32); vpx_sad_avg16xN(16); vpx_sad_avg16xN(8); static inline unsigned int vpx_sad8x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_REF_ABS_SUB_8 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_REF_ABS_SUB_8 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp3] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad8xN(H) \ unsigned int vpx_sad8x##H##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return vpx_sad8x(src, src_stride, ref, ref_stride, H); \ } vpx_sad8xN(16); vpx_sad8xN(8); vpx_sad8xN(4); sadMxNxK_mmi(8, 16, 3); sadMxNxK_mmi(8, 16, 8); sadMxNxK_mmi(8, 8, 3); sadMxNxK_mmi(8, 8, 8); sadMxNx4D_mmi(8, 16); sadMxNx4D_mmi(8, 8); sadMxNx4D_mmi(8, 4); static inline unsigned int vpx_sad_avg8x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_AVGREF_ABS_SUB_8 MMI_ADDIU(%[second_pred], %[second_pred], 0x08) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_AVGREF_ABS_SUB_8 MMI_ADDIU(%[second_pred], %[second_pred], 0x08) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp3] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [second_pred]"+&r"((mips_reg)second_pred), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad_avg8xN(H) \ unsigned int vpx_sad8x##H##_avg_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ return vpx_sad_avg8x(src, src_stride, ref, ref_stride, second_pred, H); \ } vpx_sad_avg8xN(16); vpx_sad_avg8xN(8); vpx_sad_avg8xN(4); static inline unsigned int vpx_sad4x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_REF_ABS_SUB_4 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_REF_ABS_SUB_4 MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp3] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad4xN(H) \ unsigned int vpx_sad4x##H##_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride) { \ return vpx_sad4x(src, src_stride, ref, ref_stride, H); \ } vpx_sad4xN(8); vpx_sad4xN(4); sadMxNxK_mmi(4, 4, 3); sadMxNxK_mmi(4, 4, 8); sadMxNx4D_mmi(4, 8); sadMxNx4D_mmi(4, 4); static inline unsigned int vpx_sad_avg4x(const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, const uint8_t *second_pred, int counter) { unsigned int sad; double ftmp1, ftmp2, ftmp3; mips_reg l_counter = counter; __asm__ volatile ( "xor %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "1: \n\t" // Include two loop body, to reduce loop time. SAD_SRC_AVGREF_ABS_SUB_4 MMI_ADDIU(%[second_pred], %[second_pred], 0x04) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) SAD_SRC_AVGREF_ABS_SUB_4 MMI_ADDIU(%[second_pred], %[second_pred], 0x04) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[ref], %[ref], %[ref_stride]) MMI_ADDIU(%[counter], %[counter], -0x02) "bnez %[counter], 1b \n\t" "mfc1 %[sad], %[ftmp3] \n\t" : [ftmp1]"=&f"(ftmp1), [ftmp2]"=&f"(ftmp2), [ftmp3]"=&f"(ftmp3), [counter]"+&r"(l_counter), [src]"+&r"(src), [ref]"+&r"(ref), [second_pred]"+&r"((mips_reg)second_pred), [sad]"=&r"(sad) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride) ); return sad; } #define vpx_sad_avg4xN(H) \ unsigned int vpx_sad4x##H##_avg_mmi(const uint8_t *src, int src_stride, \ const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ return vpx_sad_avg4x(src, src_stride, ref, ref_stride, second_pred, H); \ } vpx_sad_avg4xN(8); vpx_sad_avg4xN(4); libvpx-1.8.2/vpx_dsp/mips/sad_msa.c000066400000000000000000001244661357355204000172550ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" #define SAD_INSVE_W4(RTYPE, in0, in1, in2, in3, out) \ { \ out = (RTYPE)__msa_insve_w((v4i32)out, 0, (v4i32)in0); \ out = (RTYPE)__msa_insve_w((v4i32)out, 1, (v4i32)in1); \ out = (RTYPE)__msa_insve_w((v4i32)out, 2, (v4i32)in2); \ out = (RTYPE)__msa_insve_w((v4i32)out, 3, (v4i32)in3); \ } #define SAD_INSVE_W4_UB(...) SAD_INSVE_W4(v16u8, __VA_ARGS__) static uint32_t sad_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3; v16u8 src = { 0 }; v16u8 ref = { 0 }; v16u8 diff; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); INSERT_W4_UB(src0, src1, src2, src3, src); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad += __msa_hadd_u_h(diff, diff); } return HADD_UH_U32(sad); } static uint32_t sad_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); ref += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, ref0, ref1); sad += SAD_UB2_UH(src0, src1, ref0, ref1); } return HADD_UH_U32(sad); } static uint32_t sad_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); LD_UB2(ref, ref_stride, ref0, ref1); ref += (2 * ref_stride); sad += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); LD_UB2(ref, ref_stride, ref0, ref1); ref += (2 * ref_stride); sad += SAD_UB2_UH(src0, src1, ref0, ref1); } return HADD_UH_U32(sad); } static uint32_t sad_32width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(src, 16, src0, src1); src += src_stride; LD_UB2(ref, 16, ref0, ref1); ref += ref_stride; sad += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(src, 16, src0, src1); src += src_stride; LD_UB2(ref, 16, ref0, ref1); ref += ref_stride; sad += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(src, 16, src0, src1); src += src_stride; LD_UB2(ref, 16, ref0, ref1); ref += ref_stride; sad += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(src, 16, src0, src1); src += src_stride; LD_UB2(ref, 16, ref0, ref1); ref += ref_stride; sad += SAD_UB2_UH(src0, src1, ref0, ref1); } return HADD_UH_U32(sad); } static uint32_t sad_64width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height) { int32_t ht_cnt; uint32_t sad = 0; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; for (ht_cnt = (height >> 1); ht_cnt--;) { LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad1 += SAD_UB2_UH(src2, src3, ref2, ref3); LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad1 += SAD_UB2_UH(src2, src3, ref2, ref3); } sad = HADD_UH_U32(sad0); sad += HADD_UH_U32(sad1); return sad; } static void sad_4width_x3_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; uint32_t src0, src1, src2, src3; v16u8 src = { 0 }; v16u8 ref = { 0 }; v16u8 ref0, ref1, ref2, ref3, diff; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); INSERT_W4_UB(src0, src1, src2, src3, src); LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad0 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); } static void sad_8width_x3_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref00, ref11, ref22, ref33; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref00, ref11, ref22, ref33); ref += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref11, ref00, ref33, ref22, src0, src1, ref0, ref1); sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); } static void sad_16width_x3_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; v16u8 src, ref, ref0, ref1, diff; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; for (ht_cnt = (height >> 1); ht_cnt--;) { src = LD_UB(src_ptr); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); src = LD_UB(src_ptr); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); } static void sad_4width_x8_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; uint32_t src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3, diff; v16u8 src = { 0 }; v16u8 ref = { 0 }; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; v8u16 sad4 = { 0 }; v8u16 sad5 = { 0 }; v8u16 sad6 = { 0 }; v8u16 sad7 = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); INSERT_W4_UB(src0, src1, src2, src3, src); src_ptr += (4 * src_stride); LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad0 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad3 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad4 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad5 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad6 += __msa_hadd_u_h(diff, diff); SLDI_B2_UB(ref0, ref1, ref0, ref1, ref0, ref1, 1); SLDI_B2_UB(ref2, ref3, ref2, ref3, ref2, ref3, 1); SAD_INSVE_W4_UB(ref0, ref1, ref2, ref3, ref); diff = __msa_asub_u_b(src, ref); sad7 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); sad_array[4] = HADD_UH_U32(sad4); sad_array[5] = HADD_UH_U32(sad5); sad_array[6] = HADD_UH_U32(sad6); sad_array[7] = HADD_UH_U32(sad7); } static void sad_8width_x8_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref00, ref11, ref22, ref33; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; v8u16 sad4 = { 0 }; v8u16 sad5 = { 0 }; v8u16 sad6 = { 0 }; v8u16 sad7 = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref00, ref11, ref22, ref33); ref += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref11, ref00, ref33, ref22, src0, src1, ref0, ref1); sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad3 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad4 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad5 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad6 += SAD_UB2_UH(src0, src1, ref0, ref1); SLDI_B2_UB(ref00, ref11, ref00, ref11, ref00, ref11, 1); SLDI_B2_UB(ref22, ref33, ref22, ref33, ref22, ref33, 1); PCKEV_D2_UB(ref11, ref00, ref33, ref22, ref0, ref1); sad7 += SAD_UB2_UH(src0, src1, ref0, ref1); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); sad_array[4] = HADD_UH_U32(sad4); sad_array[5] = HADD_UH_U32(sad5); sad_array[6] = HADD_UH_U32(sad6); sad_array[7] = HADD_UH_U32(sad7); } static void sad_16width_x8_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; v16u8 src, ref0, ref1, ref; v16u8 diff; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; v8u16 sad4 = { 0 }; v8u16 sad5 = { 0 }; v8u16 sad6 = { 0 }; v8u16 sad7 = { 0 }; for (ht_cnt = (height >> 1); ht_cnt--;) { src = LD_UB(src_ptr); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 3); diff = __msa_asub_u_b(src, ref); sad3 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 4); diff = __msa_asub_u_b(src, ref); sad4 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 5); diff = __msa_asub_u_b(src, ref); sad5 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 6); diff = __msa_asub_u_b(src, ref); sad6 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 7); diff = __msa_asub_u_b(src, ref); sad7 += __msa_hadd_u_h(diff, diff); src = LD_UB(src_ptr); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 1); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 2); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 3); diff = __msa_asub_u_b(src, ref); sad3 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 4); diff = __msa_asub_u_b(src, ref); sad4 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 5); diff = __msa_asub_u_b(src, ref); sad5 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 6); diff = __msa_asub_u_b(src, ref); sad6 += __msa_hadd_u_h(diff, diff); ref = (v16u8)__msa_sldi_b((v16i8)ref1, (v16i8)ref0, 7); diff = __msa_asub_u_b(src, ref); sad7 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); sad_array[4] = HADD_UH_U32(sad4); sad_array[5] = HADD_UH_U32(sad5); sad_array[6] = HADD_UH_U32(sad6); sad_array[7] = HADD_UH_U32(sad7); } static void sad_4width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, uint32_t *sad_array) { const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr; int32_t ht_cnt; uint32_t src0, src1, src2, src3; uint32_t ref0, ref1, ref2, ref3; v16u8 src = { 0 }; v16u8 ref = { 0 }; v16u8 diff; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; ref2_ptr = aref_ptr[2]; ref3_ptr = aref_ptr[3]; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); INSERT_W4_UB(src0, src1, src2, src3, src); src_ptr += (4 * src_stride); LW4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ref0_ptr += (4 * ref_stride); diff = __msa_asub_u_b(src, ref); sad0 += __msa_hadd_u_h(diff, diff); LW4(ref1_ptr, ref_stride, ref0, ref1, ref2, ref3); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ref1_ptr += (4 * ref_stride); diff = __msa_asub_u_b(src, ref); sad1 += __msa_hadd_u_h(diff, diff); LW4(ref2_ptr, ref_stride, ref0, ref1, ref2, ref3); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ref2_ptr += (4 * ref_stride); diff = __msa_asub_u_b(src, ref); sad2 += __msa_hadd_u_h(diff, diff); LW4(ref3_ptr, ref_stride, ref0, ref1, ref2, ref3); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ref3_ptr += (4 * ref_stride); diff = __msa_asub_u_b(src, ref); sad3 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); } static void sad_8width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7; v16u8 ref8, ref9, ref10, ref11, ref12, ref13, ref14, ref15; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; ref2_ptr = aref_ptr[2]; ref3_ptr = aref_ptr[3]; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_UB4(ref0_ptr, ref_stride, ref0, ref1, ref2, ref3); ref0_ptr += (4 * ref_stride); LD_UB4(ref1_ptr, ref_stride, ref4, ref5, ref6, ref7); ref1_ptr += (4 * ref_stride); LD_UB4(ref2_ptr, ref_stride, ref8, ref9, ref10, ref11); ref2_ptr += (4 * ref_stride); LD_UB4(ref3_ptr, ref_stride, ref12, ref13, ref14, ref15); ref3_ptr += (4 * ref_stride); PCKEV_D2_UB(src1, src0, src3, src2, src0, src1); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); PCKEV_D2_UB(ref5, ref4, ref7, ref6, ref0, ref1); sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); PCKEV_D2_UB(ref9, ref8, ref11, ref10, ref0, ref1); sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); PCKEV_D2_UB(ref13, ref12, ref15, ref14, ref0, ref1); sad3 += SAD_UB2_UH(src0, src1, ref0, ref1); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); } static void sad_16width_x4d_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, uint32_t *sad_array) { int32_t ht_cnt; const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr; v16u8 src, ref0, ref1, ref2, ref3, diff; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; ref2_ptr = aref_ptr[2]; ref3_ptr = aref_ptr[3]; for (ht_cnt = (height >> 1); ht_cnt--;) { src = LD_UB(src_ptr); src_ptr += src_stride; ref0 = LD_UB(ref0_ptr); ref0_ptr += ref_stride; ref1 = LD_UB(ref1_ptr); ref1_ptr += ref_stride; ref2 = LD_UB(ref2_ptr); ref2_ptr += ref_stride; ref3 = LD_UB(ref3_ptr); ref3_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref1); sad1 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref2); sad2 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref3); sad3 += __msa_hadd_u_h(diff, diff); src = LD_UB(src_ptr); src_ptr += src_stride; ref0 = LD_UB(ref0_ptr); ref0_ptr += ref_stride; ref1 = LD_UB(ref1_ptr); ref1_ptr += ref_stride; ref2 = LD_UB(ref2_ptr); ref2_ptr += ref_stride; ref3 = LD_UB(ref3_ptr); ref3_ptr += ref_stride; diff = __msa_asub_u_b(src, ref0); sad0 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref1); sad1 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref2); sad2 += __msa_hadd_u_h(diff, diff); diff = __msa_asub_u_b(src, ref3); sad3 += __msa_hadd_u_h(diff, diff); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); } static void sad_32width_x4d_msa(const uint8_t *src, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, uint32_t *sad_array) { const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr; int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v8u16 sad2 = { 0 }; v8u16 sad3 = { 0 }; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; ref2_ptr = aref_ptr[2]; ref3_ptr = aref_ptr[3]; for (ht_cnt = height; ht_cnt--;) { LD_UB2(src, 16, src0, src1); src += src_stride; LD_UB2(ref0_ptr, 16, ref0, ref1); ref0_ptr += ref_stride; sad0 += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(ref1_ptr, 16, ref0, ref1); ref1_ptr += ref_stride; sad1 += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(ref2_ptr, 16, ref0, ref1); ref2_ptr += ref_stride; sad2 += SAD_UB2_UH(src0, src1, ref0, ref1); LD_UB2(ref3_ptr, 16, ref0, ref1); ref3_ptr += ref_stride; sad3 += SAD_UB2_UH(src0, src1, ref0, ref1); } sad_array[0] = HADD_UH_U32(sad0); sad_array[1] = HADD_UH_U32(sad1); sad_array[2] = HADD_UH_U32(sad2); sad_array[3] = HADD_UH_U32(sad3); } static void sad_64width_x4d_msa(const uint8_t *src, int32_t src_stride, const uint8_t *const aref_ptr[], int32_t ref_stride, int32_t height, uint32_t *sad_array) { const uint8_t *ref0_ptr, *ref1_ptr, *ref2_ptr, *ref3_ptr; int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v8u16 sad0_0 = { 0 }; v8u16 sad0_1 = { 0 }; v8u16 sad1_0 = { 0 }; v8u16 sad1_1 = { 0 }; v8u16 sad2_0 = { 0 }; v8u16 sad2_1 = { 0 }; v8u16 sad3_0 = { 0 }; v8u16 sad3_1 = { 0 }; v4u32 sad; ref0_ptr = aref_ptr[0]; ref1_ptr = aref_ptr[1]; ref2_ptr = aref_ptr[2]; ref3_ptr = aref_ptr[3]; for (ht_cnt = height; ht_cnt--;) { LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref0_ptr, 16, ref0, ref1, ref2, ref3); ref0_ptr += ref_stride; sad0_0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad0_1 += SAD_UB2_UH(src2, src3, ref2, ref3); LD_UB4(ref1_ptr, 16, ref0, ref1, ref2, ref3); ref1_ptr += ref_stride; sad1_0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad1_1 += SAD_UB2_UH(src2, src3, ref2, ref3); LD_UB4(ref2_ptr, 16, ref0, ref1, ref2, ref3); ref2_ptr += ref_stride; sad2_0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad2_1 += SAD_UB2_UH(src2, src3, ref2, ref3); LD_UB4(ref3_ptr, 16, ref0, ref1, ref2, ref3); ref3_ptr += ref_stride; sad3_0 += SAD_UB2_UH(src0, src1, ref0, ref1); sad3_1 += SAD_UB2_UH(src2, src3, ref2, ref3); } sad = __msa_hadd_u_w(sad0_0, sad0_0); sad += __msa_hadd_u_w(sad0_1, sad0_1); sad_array[0] = HADD_UW_U32(sad); sad = __msa_hadd_u_w(sad1_0, sad1_0); sad += __msa_hadd_u_w(sad1_1, sad1_1); sad_array[1] = HADD_UW_U32(sad); sad = __msa_hadd_u_w(sad2_0, sad2_0); sad += __msa_hadd_u_w(sad2_1, sad2_1); sad_array[2] = HADD_UW_U32(sad); sad = __msa_hadd_u_w(sad3_0, sad3_0); sad += __msa_hadd_u_w(sad3_1, sad3_1); sad_array[3] = HADD_UW_U32(sad); } static uint32_t avgsad_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, const uint8_t *sec_pred) { int32_t ht_cnt; uint32_t src0, src1, src2, src3, ref0, ref1, ref2, ref3; v16u8 src = { 0 }; v16u8 ref = { 0 }; v16u8 diff, pred, comp; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); pred = LD_UB(sec_pred); sec_pred += 16; INSERT_W4_UB(src0, src1, src2, src3, src); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); comp = __msa_aver_u_b(pred, ref); diff = __msa_asub_u_b(src, comp); sad += __msa_hadd_u_h(diff, diff); } return HADD_UH_U32(sad); } static uint32_t avgsad_8width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, const uint8_t *sec_pred) { int32_t ht_cnt; v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3; v16u8 diff0, diff1, pred0, pred1; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); ref += (4 * ref_stride); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, ref0, ref1); AVER_UB2_UB(pred0, ref0, pred1, ref1, diff0, diff1); sad += SAD_UB2_UH(src0, src1, diff0, diff1); } return HADD_UH_U32(sad); } static uint32_t avgsad_16width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, const uint8_t *sec_pred) { int32_t ht_cnt; v16u8 src0, src1, src2, src3, ref0, ref1, ref2, ref3; v16u8 pred0, pred1, pred2, pred3, comp0, comp1; v8u16 sad = { 0 }; for (ht_cnt = (height >> 3); ht_cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); ref += (4 * ref_stride); LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += (4 * 16); AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1); sad += SAD_UB2_UH(src0, src1, comp0, comp1); AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1); sad += SAD_UB2_UH(src2, src3, comp0, comp1); LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref0, ref1, ref2, ref3); ref += (4 * ref_stride); LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += (4 * 16); AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1); sad += SAD_UB2_UH(src0, src1, comp0, comp1); AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1); sad += SAD_UB2_UH(src2, src3, comp0, comp1); } return HADD_UH_U32(sad); } static uint32_t avgsad_32width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, const uint8_t *sec_pred) { int32_t ht_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 ref0, ref1, ref2, ref3, ref4, ref5, ref6, ref7; v16u8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7; v16u8 comp0, comp1; v8u16 sad = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, src_stride, src0, src2, src4, src6); LD_UB4(src + 16, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(ref, ref_stride, ref0, ref2, ref4, ref6); LD_UB4(ref + 16, ref_stride, ref1, ref3, ref5, ref7); ref += (4 * ref_stride); LD_UB4(sec_pred, 32, pred0, pred2, pred4, pred6); LD_UB4(sec_pred + 16, 32, pred1, pred3, pred5, pred7); sec_pred += (4 * 32); AVER_UB2_UB(pred0, ref0, pred1, ref1, comp0, comp1); sad += SAD_UB2_UH(src0, src1, comp0, comp1); AVER_UB2_UB(pred2, ref2, pred3, ref3, comp0, comp1); sad += SAD_UB2_UH(src2, src3, comp0, comp1); AVER_UB2_UB(pred4, ref4, pred5, ref5, comp0, comp1); sad += SAD_UB2_UH(src4, src5, comp0, comp1); AVER_UB2_UB(pred6, ref6, pred7, ref7, comp0, comp1); sad += SAD_UB2_UH(src6, src7, comp0, comp1); } return HADD_UH_U32(sad); } static uint32_t avgsad_64width_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, int32_t height, const uint8_t *sec_pred) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v16u8 comp0, comp1, comp2, comp3; v16u8 pred0, pred1, pred2, pred3; v8u16 sad0 = { 0 }; v8u16 sad1 = { 0 }; v4u32 sad; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0, comp1, comp2, comp3); sad0 += SAD_UB2_UH(src0, src1, comp0, comp1); sad1 += SAD_UB2_UH(src2, src3, comp2, comp3); LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0, comp1, comp2, comp3); sad0 += SAD_UB2_UH(src0, src1, comp0, comp1); sad1 += SAD_UB2_UH(src2, src3, comp2, comp3); LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0, comp1, comp2, comp3); sad0 += SAD_UB2_UH(src0, src1, comp0, comp1); sad1 += SAD_UB2_UH(src2, src3, comp2, comp3); LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(ref, 16, ref0, ref1, ref2, ref3); ref += ref_stride; LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; AVER_UB4_UB(pred0, ref0, pred1, ref1, pred2, ref2, pred3, ref3, comp0, comp1, comp2, comp3); sad0 += SAD_UB2_UH(src0, src1, comp0, comp1); sad1 += SAD_UB2_UH(src2, src3, comp2, comp3); } sad = __msa_hadd_u_w(sad0, sad0); sad += __msa_hadd_u_w(sad1, sad1); return HADD_SW_S32(sad); } #define VPX_SAD_4xHEIGHT_MSA(height) \ uint32_t vpx_sad4x##height##_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride) { \ return sad_4width_msa(src, src_stride, ref, ref_stride, height); \ } #define VPX_SAD_8xHEIGHT_MSA(height) \ uint32_t vpx_sad8x##height##_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride) { \ return sad_8width_msa(src, src_stride, ref, ref_stride, height); \ } #define VPX_SAD_16xHEIGHT_MSA(height) \ uint32_t vpx_sad16x##height##_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride) { \ return sad_16width_msa(src, src_stride, ref, ref_stride, height); \ } #define VPX_SAD_32xHEIGHT_MSA(height) \ uint32_t vpx_sad32x##height##_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride) { \ return sad_32width_msa(src, src_stride, ref, ref_stride, height); \ } #define VPX_SAD_64xHEIGHT_MSA(height) \ uint32_t vpx_sad64x##height##_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride) { \ return sad_64width_msa(src, src_stride, ref, ref_stride, height); \ } #define VPX_SAD_4xHEIGHTx3_MSA(height) \ void vpx_sad4x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_4width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_8xHEIGHTx3_MSA(height) \ void vpx_sad8x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_8width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_16xHEIGHTx3_MSA(height) \ void vpx_sad16x##height##x3_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_16width_x3_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_4xHEIGHTx8_MSA(height) \ void vpx_sad4x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_4width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_8xHEIGHTx8_MSA(height) \ void vpx_sad8x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_8width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_16xHEIGHTx8_MSA(height) \ void vpx_sad16x##height##x8_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ uint32_t *sads) { \ sad_16width_x8_msa(src, src_stride, ref, ref_stride, height, sads); \ } #define VPX_SAD_4xHEIGHTx4D_MSA(height) \ void vpx_sad4x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ int32_t ref_stride, uint32_t *sads) { \ sad_4width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \ } #define VPX_SAD_8xHEIGHTx4D_MSA(height) \ void vpx_sad8x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ int32_t ref_stride, uint32_t *sads) { \ sad_8width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \ } #define VPX_SAD_16xHEIGHTx4D_MSA(height) \ void vpx_sad16x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ int32_t ref_stride, uint32_t *sads) { \ sad_16width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \ } #define VPX_SAD_32xHEIGHTx4D_MSA(height) \ void vpx_sad32x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ int32_t ref_stride, uint32_t *sads) { \ sad_32width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \ } #define VPX_SAD_64xHEIGHTx4D_MSA(height) \ void vpx_sad64x##height##x4d_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *const refs[], \ int32_t ref_stride, uint32_t *sads) { \ sad_64width_x4d_msa(src, src_stride, refs, ref_stride, height, sads); \ } #define VPX_AVGSAD_4xHEIGHT_MSA(height) \ uint32_t vpx_sad4x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ const uint8_t *second_pred) { \ return avgsad_4width_msa(src, src_stride, ref, ref_stride, height, \ second_pred); \ } #define VPX_AVGSAD_8xHEIGHT_MSA(height) \ uint32_t vpx_sad8x##height##_avg_msa(const uint8_t *src, int32_t src_stride, \ const uint8_t *ref, int32_t ref_stride, \ const uint8_t *second_pred) { \ return avgsad_8width_msa(src, src_stride, ref, ref_stride, height, \ second_pred); \ } #define VPX_AVGSAD_16xHEIGHT_MSA(height) \ uint32_t vpx_sad16x##height##_avg_msa( \ const uint8_t *src, int32_t src_stride, const uint8_t *ref, \ int32_t ref_stride, const uint8_t *second_pred) { \ return avgsad_16width_msa(src, src_stride, ref, ref_stride, height, \ second_pred); \ } #define VPX_AVGSAD_32xHEIGHT_MSA(height) \ uint32_t vpx_sad32x##height##_avg_msa( \ const uint8_t *src, int32_t src_stride, const uint8_t *ref, \ int32_t ref_stride, const uint8_t *second_pred) { \ return avgsad_32width_msa(src, src_stride, ref, ref_stride, height, \ second_pred); \ } #define VPX_AVGSAD_64xHEIGHT_MSA(height) \ uint32_t vpx_sad64x##height##_avg_msa( \ const uint8_t *src, int32_t src_stride, const uint8_t *ref, \ int32_t ref_stride, const uint8_t *second_pred) { \ return avgsad_64width_msa(src, src_stride, ref, ref_stride, height, \ second_pred); \ } // 64x64 VPX_SAD_64xHEIGHT_MSA(64); VPX_SAD_64xHEIGHTx4D_MSA(64); VPX_AVGSAD_64xHEIGHT_MSA(64); // 64x32 VPX_SAD_64xHEIGHT_MSA(32); VPX_SAD_64xHEIGHTx4D_MSA(32); VPX_AVGSAD_64xHEIGHT_MSA(32); // 32x64 VPX_SAD_32xHEIGHT_MSA(64); VPX_SAD_32xHEIGHTx4D_MSA(64); VPX_AVGSAD_32xHEIGHT_MSA(64); // 32x32 VPX_SAD_32xHEIGHT_MSA(32); VPX_SAD_32xHEIGHTx4D_MSA(32); VPX_AVGSAD_32xHEIGHT_MSA(32); // 32x16 VPX_SAD_32xHEIGHT_MSA(16); VPX_SAD_32xHEIGHTx4D_MSA(16); VPX_AVGSAD_32xHEIGHT_MSA(16); // 16x32 VPX_SAD_16xHEIGHT_MSA(32); VPX_SAD_16xHEIGHTx4D_MSA(32); VPX_AVGSAD_16xHEIGHT_MSA(32); // 16x16 VPX_SAD_16xHEIGHT_MSA(16); VPX_SAD_16xHEIGHTx3_MSA(16); VPX_SAD_16xHEIGHTx8_MSA(16); VPX_SAD_16xHEIGHTx4D_MSA(16); VPX_AVGSAD_16xHEIGHT_MSA(16); // 16x8 VPX_SAD_16xHEIGHT_MSA(8); VPX_SAD_16xHEIGHTx3_MSA(8); VPX_SAD_16xHEIGHTx8_MSA(8); VPX_SAD_16xHEIGHTx4D_MSA(8); VPX_AVGSAD_16xHEIGHT_MSA(8); // 8x16 VPX_SAD_8xHEIGHT_MSA(16); VPX_SAD_8xHEIGHTx3_MSA(16); VPX_SAD_8xHEIGHTx8_MSA(16); VPX_SAD_8xHEIGHTx4D_MSA(16); VPX_AVGSAD_8xHEIGHT_MSA(16); // 8x8 VPX_SAD_8xHEIGHT_MSA(8); VPX_SAD_8xHEIGHTx3_MSA(8); VPX_SAD_8xHEIGHTx8_MSA(8); VPX_SAD_8xHEIGHTx4D_MSA(8); VPX_AVGSAD_8xHEIGHT_MSA(8); // 8x4 VPX_SAD_8xHEIGHT_MSA(4); VPX_SAD_8xHEIGHTx4D_MSA(4); VPX_AVGSAD_8xHEIGHT_MSA(4); // 4x8 VPX_SAD_4xHEIGHT_MSA(8); VPX_SAD_4xHEIGHTx4D_MSA(8); VPX_AVGSAD_4xHEIGHT_MSA(8); // 4x4 VPX_SAD_4xHEIGHT_MSA(4); VPX_SAD_4xHEIGHTx3_MSA(4); VPX_SAD_4xHEIGHTx8_MSA(4); VPX_SAD_4xHEIGHTx4D_MSA(4); VPX_AVGSAD_4xHEIGHT_MSA(4); libvpx-1.8.2/vpx_dsp/mips/sub_pixel_variance_msa.c000066400000000000000000002021341357355204000223350ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_dsp/mips/macros_msa.h" #include "vpx_dsp/variance.h" static const uint8_t bilinear_filters_msa[8][2] = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; #define CALC_MSE_AVG_B(src, ref, var, sub) \ { \ v16u8 src_l0_m, src_l1_m; \ v8i16 res_l0_m, res_l1_m; \ \ ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ \ (sub) += res_l0_m + res_l1_m; \ } #define VARIANCE_WxH(sse, diff, shift) \ (sse) - (((uint32_t)(diff) * (diff)) >> (shift)) #define VARIANCE_LARGE_WxH(sse, diff, shift) \ (sse) - (((int64_t)(diff) * (diff)) >> (shift)) static uint32_t avg_sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t height, int32_t *diff) { int32_t ht_cnt; uint32_t src0, src1, src2, src3; uint32_t ref0, ref1, ref2, ref3; v16u8 pred, src = { 0 }; v16u8 ref = { 0 }; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { pred = LD_UB(sec_pred); sec_pred += 16; LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); INSERT_W4_UB(src0, src1, src2, src3, src); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); src = __msa_aver_u_b(src, pred); CALC_MSE_AVG_B(src, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, ref0, ref1); AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src, ref, pred; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { pred = LD_UB(sec_pred); sec_pred += 16; src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; src = __msa_aver_u_b(src, pred); CALC_MSE_AVG_B(src, ref, var, avg); pred = LD_UB(sec_pred); sec_pred += 16; src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; src = __msa_aver_u_b(src, pred); CALC_MSE_AVG_B(src, ref, var, avg); pred = LD_UB(sec_pred); sec_pred += 16; src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; src = __msa_aver_u_b(src, pred); CALC_MSE_AVG_B(src, ref, var, avg); pred = LD_UB(sec_pred); sec_pred += 16; src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; src = __msa_aver_u_b(src, pred); CALC_MSE_AVG_B(src, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1, pred0, pred1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1, pred0, pred1; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 16; ht_cnt--;) { LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1, pred2, pred3; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 16; ht_cnt--;) { LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1, src2, src3); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src2, ref2, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src3, ref3, var, avg1); LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1, src2, src3); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src2, ref2, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src3, ref3, var, avg1); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t avg_sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, const uint8_t *sec_pred, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1, pred2, pred3; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v8i16 avg2 = { 0 }; v8i16 avg3 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 32; ht_cnt--;) { LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1, src2, src3); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src2, ref2, var, avg2); CALC_MSE_AVG_B(src3, ref3, var, avg3); LD_UB4(sec_pred, 16, pred0, pred1, pred2, pred3); sec_pred += 64; LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; AVER_UB4_UB(src0, pred0, src1, pred1, src2, pred2, src3, pred3, src0, src1, src2, src3); CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src2, ref2, var, avg2); CALC_MSE_AVG_B(src3, ref3, var, avg3); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); vec += __msa_hadd_s_w(avg2, avg2); vec += __msa_hadd_s_w(avg3, avg3); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_4width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 filt0, ref = { 0 }; v16i8 src0, src1, src2, src3; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 vec0, vec1, vec2, vec3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1, src2, src3); ILVEV_W2_SB(src0, src1, src2, src3, src0, src2); src0 = (v16i8)__msa_ilvev_d((v2i64)src2, (v2i64)src0); CALC_MSE_AVG_B(src0, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_8width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 filt0, out, ref0, ref1, ref2, ref3; v16i8 src0, src1, src2, src3; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 vec0, vec1, vec2, vec3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1, src2, src3); out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0); CALC_MSE_AVG_B(out, ref0, var, avg); out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2); CALC_MSE_AVG_B(out, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_16width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v16u8 dst0, dst1, dst2, dst3, filt0; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); dst += (4 * dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_B4_SB(out1, out0, out3, out2, out5, out4, out7, out6, src0, src1, src2, src3); CALC_MSE_AVG_B(src0, dst0, var, avg); CALC_MSE_AVG_B(src1, dst1, var, avg); CALC_MSE_AVG_B(src2, dst2, var, avg); CALC_MSE_AVG_B(src3, dst3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_32width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride, filter, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_sse_diff_64width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_h_msa(src, src_stride, dst, dst_stride, filter, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } static uint32_t sub_pixel_sse_diff_4width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4, out; v16u8 src10_r, src32_r, src21_r, src43_r; v16u8 ref = { 0 }; v16u8 src2110, src4332; v16u8 filt0; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; v8u16 tmp0, tmp1; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); CALC_MSE_AVG_B(out, ref, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_8width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4; v16u8 ref0, ref1, ref2, ref3; v8u16 vec0, vec1, vec2, vec3; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 filt0; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_16width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 out0, out1, out2, out3; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 filt0; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); src0 = src4; CALC_MSE_AVG_B(out0, ref0, var, avg); CALC_MSE_AVG_B(out1, ref1, var, avg); CALC_MSE_AVG_B(out2, ref2, var, avg); CALC_MSE_AVG_B(out3, ref3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_32width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride, filter, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_sse_diff_64width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_v_msa(src, src_stride, dst, dst_stride, filter, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } static uint32_t sub_pixel_sse_diff_4width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 out, ref = { 0 }; v16u8 filt_vt, filt_hz, vec0, vec1; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 }; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4; v8u16 tmp0, tmp1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); CALC_MSE_AVG_B(out, ref, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_8width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 out0, out1; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 hz_out0, hz_out1; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 filt_vt, filt_hz, vec0; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1); CALC_MSE_AVG_B(out0, ref0, var, avg); CALC_MSE_AVG_B(out1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_16width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 ref0, ref1, ref2, ref3; v16u8 filt_hz, filt_vt, vec0, vec1; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 hz_out0, hz_out1, hz_out2, hz_out3; v8u16 tmp0, tmp1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); LD_UB2(src, 8, src0, src1); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src0, src2, src4, src6); LD_UB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); src0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); src1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); src2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); src3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); CALC_MSE_AVG_B(src2, ref2, var, avg); CALC_MSE_AVG_B(src3, ref3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_sse_diff_32width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_sse_diff_64width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += sub_pixel_sse_diff_16width_hv_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height, &diff0[loop_cnt]); src += 16; dst += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } static uint32_t sub_pixel_avg_sse_diff_4width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 out, pred, filt0, ref = { 0 }; v16i8 src0, src1, src2, src3; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 vec0, vec1, vec2, vec3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); pred = LD_UB(sec_pred); sec_pred += 16; LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1, src2, src3); ILVEV_W2_SB(src0, src1, src2, src3, src0, src2); out = (v16u8)__msa_ilvev_d((v2i64)src2, (v2i64)src0); out = __msa_aver_u_b(out, pred); CALC_MSE_AVG_B(out, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_8width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 out, pred, filt0; v16u8 ref0, ref1, ref2, ref3; v16i8 src0, src1, src2, src3; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 vec0, vec1, vec2, vec3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B4_SB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, src0, src1, src2, src3); out = (v16u8)__msa_ilvev_d((v2i64)src1, (v2i64)src0); pred = LD_UB(sec_pred); sec_pred += 16; out = __msa_aver_u_b(out, pred); CALC_MSE_AVG_B(out, ref0, var, avg); out = (v16u8)__msa_ilvev_d((v2i64)src3, (v2i64)src2); pred = LD_UB(sec_pred); sec_pred += 16; out = __msa_aver_u_b(out, pred); CALC_MSE_AVG_B(out, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t subpel_avg_ssediff_16w_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff, int32_t width) { int16_t filtval; uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v16u8 dst0, dst1, dst2, dst3; v16u8 tmp0, tmp1, tmp2, tmp3; v16u8 pred0, pred1, pred2, pred3, filt0; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); dst += (4 * dst_stride); LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3); sec_pred += (4 * width); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UH(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UH(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_B4_UB(out1, out0, out3, out2, out5, out4, out7, out6, tmp0, tmp1, tmp2, tmp3); AVER_UB4_UB(tmp0, pred0, tmp1, pred1, tmp2, pred2, tmp3, pred3, tmp0, tmp1, tmp2, tmp3); CALC_MSE_AVG_B(tmp0, dst0, var, avg); CALC_MSE_AVG_B(tmp1, dst1, var, avg); CALC_MSE_AVG_B(tmp2, dst2, var, avg); CALC_MSE_AVG_B(tmp3, dst3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_16width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { return subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, diff, 16); } static uint32_t sub_pixel_avg_sse_diff_32width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, &diff0[loop_cnt], 32); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_avg_sse_diff_64width_h_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += subpel_avg_ssediff_16w_h_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, &diff0[loop_cnt], 64); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } static uint32_t sub_pixel_avg_sse_diff_4width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 src10_r, src32_r, src21_r, src43_r; v16u8 out, pred, ref = { 0 }; v16u8 src2110, src4332, filt0; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; v8u16 tmp0, tmp1; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); pred = LD_UB(sec_pred); sec_pred += 16; LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); out = __msa_aver_u_b(out, pred); CALC_MSE_AVG_B(out, ref, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_8width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1, filt0; v8u16 vec0, vec1, vec2, vec3; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); ILVR_B4_UH(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src0, src1); AVER_UB2_UB(src0, pred0, src1, pred1, src0, src1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t subpel_avg_ssediff_16w_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff, int32_t width) { int16_t filtval; uint32_t loop_cnt; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1, pred2, pred3; v16u8 src0, src1, src2, src3, src4; v16u8 out0, out1, out2, out3, filt0; v8u16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter); filt0 = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3); sec_pred += (4 * width); ILVR_B2_UH(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UH(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); ILVR_B2_UH(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UH(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); out1 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); out3 = (v16u8)__msa_pckev_b((v16i8)tmp3, (v16i8)tmp2); src0 = src4; LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1, out2, out3); CALC_MSE_AVG_B(out0, ref0, var, avg); CALC_MSE_AVG_B(out1, ref1, var, avg); CALC_MSE_AVG_B(out2, ref2, var, avg); CALC_MSE_AVG_B(out3, ref3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_16width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { return subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, diff, 16); } static uint32_t sub_pixel_avg_sse_diff_32width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, &diff0[loop_cnt], 32); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_avg_sse_diff_64width_v_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += subpel_avg_ssediff_16w_v_msa(src, src_stride, dst, dst_stride, sec_pred, filter, height, &diff0[loop_cnt], 64); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } static uint32_t sub_pixel_avg_sse_diff_4width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; uint32_t ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20 }; v16u8 filt_hz, filt_vt, vec0, vec1; v16u8 out, pred, ref = { 0 }; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); pred = LD_UB(sec_pred); sec_pred += 16; LW4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); out = __msa_aver_u_b(out, pred); CALC_MSE_AVG_B(out, ref, var, avg); src0 = src4; } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_8width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { int16_t filtval; uint32_t loop_cnt; v16u8 ref0, ref1, ref2, ref3; v16u8 src0, src1, src2, src3, src4; v16u8 pred0, pred1, out0, out1; v16u8 filt_hz, filt_vt, vec0; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); src0 = LD_UB(src); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB2(sec_pred, 16, pred0, pred1); sec_pred += 32; LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); PCKEV_D2_UB(ref1, ref0, ref3, ref2, ref0, ref1); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, out0, out1); AVER_UB2_UB(out0, pred0, out1, pred1, out0, out1); CALC_MSE_AVG_B(out0, ref0, var, avg); CALC_MSE_AVG_B(out1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t subpel_avg_ssediff_16w_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff, int32_t width) { int16_t filtval; uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 ref0, ref1, ref2, ref3; v16u8 pred0, pred1, pred2, pred3; v16u8 out0, out1, out2, out3; v16u8 filt_hz, filt_vt, vec0, vec1; v16u8 mask = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; filtval = LH(filter_horiz); filt_hz = (v16u8)__msa_fill_h(filtval); filtval = LH(filter_vert); filt_vt = (v16u8)__msa_fill_h(filtval); LD_UB2(src, 8, src0, src1); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src0, src2, src4, src6); LD_UB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(sec_pred, width, pred0, pred1, pred2, pred3); sec_pred += (4 * width); hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out0 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out1 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out2 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out3 = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); LD_UB4(dst, dst_stride, ref0, ref1, ref2, ref3); dst += (4 * dst_stride); AVER_UB4_UB(out0, pred0, out1, pred1, out2, pred2, out3, pred3, out0, out1, out2, out3); CALC_MSE_AVG_B(out0, ref0, var, avg); CALC_MSE_AVG_B(out1, ref1, var, avg); CALC_MSE_AVG_B(out2, ref2, var, avg); CALC_MSE_AVG_B(out3, ref3, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sub_pixel_avg_sse_diff_16width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { return subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride, sec_pred, filter_horiz, filter_vert, height, diff, 16); } static uint32_t sub_pixel_avg_sse_diff_32width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[2]; for (loop_cnt = 0; loop_cnt < 2; ++loop_cnt) { sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride, sec_pred, filter_horiz, filter_vert, height, &diff0[loop_cnt], 32); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1]; return sse; } static uint32_t sub_pixel_avg_sse_diff_64width_hv_msa( const uint8_t *src, int32_t src_stride, const uint8_t *dst, int32_t dst_stride, const uint8_t *sec_pred, const uint8_t *filter_horiz, const uint8_t *filter_vert, int32_t height, int32_t *diff) { uint32_t loop_cnt, sse = 0; int32_t diff0[4]; for (loop_cnt = 0; loop_cnt < 4; ++loop_cnt) { sse += subpel_avg_ssediff_16w_hv_msa(src, src_stride, dst, dst_stride, sec_pred, filter_horiz, filter_vert, height, &diff0[loop_cnt], 64); src += 16; dst += 16; sec_pred += 16; } *diff = diff0[0] + diff0[1] + diff0[2] + diff0[3]; return sse; } #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4); #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5); #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5); #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6); #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7); #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7); #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8); #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10); #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12); #define VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(wd, ht) \ uint32_t vpx_sub_pixel_variance##wd##x##ht##_msa( \ const uint8_t *src, int32_t src_stride, int32_t x_offset, \ int32_t y_offset, const uint8_t *ref, int32_t ref_stride, \ uint32_t *sse) { \ int32_t diff; \ uint32_t var; \ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ if (y_offset) { \ if (x_offset) { \ *sse = sub_pixel_sse_diff_##wd##width_hv_msa( \ src, src_stride, ref, ref_stride, h_filter, v_filter, ht, &diff); \ } else { \ *sse = sub_pixel_sse_diff_##wd##width_v_msa( \ src, src_stride, ref, ref_stride, v_filter, ht, &diff); \ } \ \ var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \ } else { \ if (x_offset) { \ *sse = sub_pixel_sse_diff_##wd##width_h_msa( \ src, src_stride, ref, ref_stride, h_filter, ht, &diff); \ \ var = VARIANCE_##wd##Wx##ht##H(*sse, diff); \ } else { \ var = vpx_variance##wd##x##ht##_msa(src, src_stride, ref, ref_stride, \ sse); \ } \ } \ \ return var; \ } VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 4); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(4, 8); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 4); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 8); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(8, 16); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 8); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 16); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(16, 32); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 16); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 32); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(32, 64); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 32); VPX_SUB_PIXEL_VARIANCE_WDXHT_MSA(64, 64); #define VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(wd, ht) \ uint32_t vpx_sub_pixel_avg_variance##wd##x##ht##_msa( \ const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \ int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \ uint32_t *sse, const uint8_t *sec_pred) { \ int32_t diff; \ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ if (y_offset) { \ if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_##wd##width_hv_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \ v_filter, ht, &diff); \ } else { \ *sse = sub_pixel_avg_sse_diff_##wd##width_v_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \ &diff); \ } \ } else { \ if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_##wd##width_h_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \ &diff); \ } else { \ *sse = avg_sse_diff_##wd##width_msa(src_ptr, src_stride, ref_ptr, \ ref_stride, sec_pred, ht, &diff); \ } \ } \ \ return VARIANCE_##wd##Wx##ht##H(*sse, diff); \ } VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 4); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(4, 8); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 4); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 8); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(8, 16); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 8); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 16); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(16, 32); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 16); VPX_SUB_PIXEL_AVG_VARIANCE_WDXHT_MSA(32, 32); uint32_t vpx_sub_pixel_avg_variance32x64_msa(const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, uint32_t *sse, const uint8_t *sec_pred) { int32_t diff; const uint8_t *h_filter = bilinear_filters_msa[x_offset]; const uint8_t *v_filter = bilinear_filters_msa[y_offset]; if (y_offset) { if (x_offset) { *sse = sub_pixel_avg_sse_diff_32width_hv_msa( src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, v_filter, 64, &diff); } else { *sse = sub_pixel_avg_sse_diff_32width_v_msa(src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, 64, &diff); } } else { if (x_offset) { *sse = sub_pixel_avg_sse_diff_32width_h_msa(src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, 64, &diff); } else { *sse = avg_sse_diff_32x64_msa(src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, &diff); } } return VARIANCE_32Wx64H(*sse, diff); } #define VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(ht) \ uint32_t vpx_sub_pixel_avg_variance64x##ht##_msa( \ const uint8_t *src_ptr, int32_t src_stride, int32_t x_offset, \ int32_t y_offset, const uint8_t *ref_ptr, int32_t ref_stride, \ uint32_t *sse, const uint8_t *sec_pred) { \ int32_t diff; \ const uint8_t *h_filter = bilinear_filters_msa[x_offset]; \ const uint8_t *v_filter = bilinear_filters_msa[y_offset]; \ \ if (y_offset) { \ if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_64width_hv_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, \ v_filter, ht, &diff); \ } else { \ *sse = sub_pixel_avg_sse_diff_64width_v_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, v_filter, ht, \ &diff); \ } \ } else { \ if (x_offset) { \ *sse = sub_pixel_avg_sse_diff_64width_h_msa( \ src_ptr, src_stride, ref_ptr, ref_stride, sec_pred, h_filter, ht, \ &diff); \ } else { \ *sse = avg_sse_diff_64x##ht##_msa(src_ptr, src_stride, ref_ptr, \ ref_stride, sec_pred, &diff); \ } \ } \ \ return VARIANCE_64Wx##ht##H(*sse, diff); \ } VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(32); VPX_SUB_PIXEL_AVG_VARIANCE64XHEIGHT_MSA(64); libvpx-1.8.2/vpx_dsp/mips/subtract_mmi.c000066400000000000000000000451261357355204000203320ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #include "vpx_ports/asmdefs_mmi.h" void vpx_subtract_block_mmi(int rows, int cols, int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { double ftmp[13]; uint32_t tmp[1]; if (rows == cols) { switch (rows) { case 4: __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[src]) \n\t" "mtc1 %[tmp0], %[ftmp1] \n\t" "ulw %[tmp0], 0x00(%[pred]) \n\t" "mtc1 %[tmp0], %[ftmp2] \n\t" #else "gslwlc1 %[ftmp1], 0x03(%[src]) \n\t" "gslwrc1 %[ftmp1], 0x00(%[src]) \n\t" "gslwlc1 %[ftmp2], 0x03(%[pred]) \n\t" "gslwrc1 %[ftmp2], 0x00(%[pred]) \n\t" #endif MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[src]) \n\t" "mtc1 %[tmp0], %[ftmp3] \n\t" "ulw %[tmp0], 0x00(%[pred]) \n\t" "mtc1 %[tmp0], %[ftmp4] \n\t" #else "gslwlc1 %[ftmp3], 0x03(%[src]) \n\t" "gslwrc1 %[ftmp3], 0x00(%[src]) \n\t" "gslwlc1 %[ftmp4], 0x03(%[pred]) \n\t" "gslwrc1 %[ftmp4], 0x00(%[pred]) \n\t" #endif MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[src]) \n\t" "mtc1 %[tmp0], %[ftmp5] \n\t" "ulw %[tmp0], 0x00(%[pred]) \n\t" "mtc1 %[tmp0], %[ftmp6] \n\t" #else "gslwlc1 %[ftmp5], 0x03(%[src]) \n\t" "gslwrc1 %[ftmp5], 0x00(%[src]) \n\t" "gslwlc1 %[ftmp6], 0x03(%[pred]) \n\t" "gslwrc1 %[ftmp6], 0x00(%[pred]) \n\t" #endif MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) #if _MIPS_SIM == _ABIO32 "ulw %[tmp0], 0x00(%[src]) \n\t" "mtc1 %[tmp0], %[ftmp7] \n\t" "ulw %[tmp0], 0x00(%[pred]) \n\t" "mtc1 %[tmp0], %[ftmp8] \n\t" #else "gslwlc1 %[ftmp7], 0x03(%[src]) \n\t" "gslwrc1 %[ftmp7], 0x00(%[src]) \n\t" "gslwlc1 %[ftmp8], 0x03(%[pred]) \n\t" "gslwrc1 %[ftmp8], 0x00(%[pred]) \n\t" #endif "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp2], %[ftmp0] \n\t" "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp4], %[ftmp0] \n\t" "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp6], %[ftmp0] \n\t" "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp8], %[ftmp0] \n\t" "psubh %[ftmp11], %[ftmp9], %[ftmp10] \n\t" "gssdlc1 %[ftmp11], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp11], 0x00(%[diff]) \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), #if _MIPS_SIM == _ABIO32 [tmp0] "=&r"(tmp[0]), #endif [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff) : [src_stride] "r"((mips_reg)src_stride), [pred_stride] "r"((mips_reg)pred_stride), [diff_stride] "r"((mips_reg)(diff_stride * 2)) : "memory"); break; case 8: __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "li %[tmp0], 0x02 \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "gsldlc1 %[ftmp3], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp3], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp4], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "gsldlc1 %[ftmp7], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp8], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "addiu %[tmp0], %[tmp0], -0x01 \n\t" "bnez %[tmp0], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff) : [pred_stride] "r"((mips_reg)pred_stride), [src_stride] "r"((mips_reg)src_stride), [diff_stride] "r"((mips_reg)(diff_stride * 2)) : "memory"); break; case 16: __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "li %[tmp0], 0x08 \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[pred]) \n\t" "gsldlc1 %[ftmp3], 0x0f(%[src]) \n\t" "gsldrc1 %[ftmp3], 0x08(%[src]) \n\t" "gsldlc1 %[ftmp4], 0x0f(%[pred]) \n\t" "gsldrc1 %[ftmp4], 0x08(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp6], 0x07(%[pred]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[pred]) \n\t" "gsldlc1 %[ftmp7], 0x0f(%[src]) \n\t" "gsldrc1 %[ftmp7], 0x08(%[src]) \n\t" "gsldlc1 %[ftmp8], 0x0f(%[pred]) \n\t" "gsldrc1 %[ftmp8], 0x08(%[pred]) \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[pred], %[pred], %[pred_stride]) "punpcklbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp2], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp2], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" "punpcklbh %[ftmp9], %[ftmp3], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp3], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp4], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp4], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "punpcklbh %[ftmp9], %[ftmp5], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp6], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp6], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x07(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x00(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x0f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x08(%[diff]) \n\t" "punpcklbh %[ftmp9], %[ftmp7], %[ftmp0] \n\t" "punpckhbh %[ftmp10], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp8], %[ftmp0] \n\t" "punpckhbh %[ftmp12], %[ftmp8], %[ftmp0] \n\t" "psubsh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" "psubsh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" "gssdlc1 %[ftmp9], 0x17(%[diff]) \n\t" "gssdrc1 %[ftmp9], 0x10(%[diff]) \n\t" "gssdlc1 %[ftmp10], 0x1f(%[diff]) \n\t" "gssdrc1 %[ftmp10], 0x18(%[diff]) \n\t" MMI_ADDU(%[diff], %[diff], %[diff_stride]) "addiu %[tmp0], %[tmp0], -0x01 \n\t" "bnez %[tmp0], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [tmp0] "=&r"(tmp[0]), [src] "+&r"(src), [pred] "+&r"(pred), [diff] "+&r"(diff) : [pred_stride] "r"((mips_reg)pred_stride), [src_stride] "r"((mips_reg)src_stride), [diff_stride] "r"((mips_reg)(diff_stride * 2)) : "memory"); break; case 32: vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred, pred_stride); break; case 64: vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred, pred_stride); break; default: vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred, pred_stride); break; } } else { vpx_subtract_block_c(rows, cols, diff, diff_stride, src, src_stride, pred, pred_stride); } } libvpx-1.8.2/vpx_dsp/mips/subtract_msa.c000066400000000000000000000217501357355204000203250ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" static void sub_blk_4x4_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *pred_ptr, int32_t pred_stride, int16_t *diff_ptr, int32_t diff_stride) { uint32_t src0, src1, src2, src3; uint32_t pred0, pred1, pred2, pred3; v16i8 src = { 0 }; v16i8 pred = { 0 }; v16u8 src_l0, src_l1; v8i16 diff0, diff1; LW4(src_ptr, src_stride, src0, src1, src2, src3); LW4(pred_ptr, pred_stride, pred0, pred1, pred2, pred3); INSERT_W4_SB(src0, src1, src2, src3, src); INSERT_W4_SB(pred0, pred1, pred2, pred3, pred); ILVRL_B2_UB(src, pred, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST8x4_UB(diff0, diff1, diff_ptr, (2 * diff_stride)); } static void sub_blk_8x8_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *pred_ptr, int32_t pred_stride, int16_t *diff_ptr, int32_t diff_stride) { uint32_t loop_cnt; uint64_t src0, src1, pred0, pred1; v16i8 src = { 0 }; v16i8 pred = { 0 }; v16u8 src_l0, src_l1; v8i16 diff0, diff1; for (loop_cnt = 4; loop_cnt--;) { LD2(src_ptr, src_stride, src0, src1); src_ptr += (2 * src_stride); LD2(pred_ptr, pred_stride, pred0, pred1); pred_ptr += (2 * pred_stride); INSERT_D2_SB(src0, src1, src); INSERT_D2_SB(pred0, pred1, pred); ILVRL_B2_UB(src, pred, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff_ptr, diff_stride); diff_ptr += (2 * diff_stride); } } static void sub_blk_16x16_msa(const uint8_t *src, int32_t src_stride, const uint8_t *pred, int32_t pred_stride, int16_t *diff, int32_t diff_stride) { int8_t count; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7; v16u8 src_l0, src_l1; v8i16 diff0, diff1; for (count = 2; count--;) { LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); LD_SB8(pred, pred_stride, pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7); pred += (8 * pred_stride); ILVRL_B2_UB(src0, pred0, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src1, pred1, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src2, pred2, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src3, pred3, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src4, pred4, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src5, pred5, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src6, pred6, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; ILVRL_B2_UB(src7, pred7, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); diff += diff_stride; } } static void sub_blk_32x32_msa(const uint8_t *src, int32_t src_stride, const uint8_t *pred, int32_t pred_stride, int16_t *diff, int32_t diff_stride) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7; v16u8 src_l0, src_l1; v8i16 diff0, diff1; for (loop_cnt = 8; loop_cnt--;) { LD_SB2(src, 16, src0, src1); src += src_stride; LD_SB2(src, 16, src2, src3); src += src_stride; LD_SB2(src, 16, src4, src5); src += src_stride; LD_SB2(src, 16, src6, src7); src += src_stride; LD_SB2(pred, 16, pred0, pred1); pred += pred_stride; LD_SB2(pred, 16, pred2, pred3); pred += pred_stride; LD_SB2(pred, 16, pred4, pred5); pred += pred_stride; LD_SB2(pred, 16, pred6, pred7); pred += pred_stride; ILVRL_B2_UB(src0, pred0, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src1, pred1, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); diff += diff_stride; ILVRL_B2_UB(src2, pred2, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src3, pred3, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); diff += diff_stride; ILVRL_B2_UB(src4, pred4, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src5, pred5, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); diff += diff_stride; ILVRL_B2_UB(src6, pred6, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src7, pred7, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); diff += diff_stride; } } static void sub_blk_64x64_msa(const uint8_t *src, int32_t src_stride, const uint8_t *pred, int32_t pred_stride, int16_t *diff, int32_t diff_stride) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v16i8 pred0, pred1, pred2, pred3, pred4, pred5, pred6, pred7; v16u8 src_l0, src_l1; v8i16 diff0, diff1; for (loop_cnt = 32; loop_cnt--;) { LD_SB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_SB4(src, 16, src4, src5, src6, src7); src += src_stride; LD_SB4(pred, 16, pred0, pred1, pred2, pred3); pred += pred_stride; LD_SB4(pred, 16, pred4, pred5, pred6, pred7); pred += pred_stride; ILVRL_B2_UB(src0, pred0, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src1, pred1, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); ILVRL_B2_UB(src2, pred2, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 32, 8); ILVRL_B2_UB(src3, pred3, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 48, 8); diff += diff_stride; ILVRL_B2_UB(src4, pred4, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff, 8); ILVRL_B2_UB(src5, pred5, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 16, 8); ILVRL_B2_UB(src6, pred6, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 32, 8); ILVRL_B2_UB(src7, pred7, src_l0, src_l1); HSUB_UB2_SH(src_l0, src_l1, diff0, diff1); ST_SH2(diff0, diff1, diff + 48, 8); diff += diff_stride; } } void vpx_subtract_block_msa(int32_t rows, int32_t cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { if (rows == cols) { switch (rows) { case 4: sub_blk_4x4_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr, diff_stride); break; case 8: sub_blk_8x8_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr, diff_stride); break; case 16: sub_blk_16x16_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr, diff_stride); break; case 32: sub_blk_32x32_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr, diff_stride); break; case 64: sub_blk_64x64_msa(src_ptr, src_stride, pred_ptr, pred_stride, diff_ptr, diff_stride); break; default: vpx_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride, pred_ptr, pred_stride); break; } } else { vpx_subtract_block_c(rows, cols, diff_ptr, diff_stride, src_ptr, src_stride, pred_ptr, pred_stride); } } libvpx-1.8.2/vpx_dsp/mips/sum_squares_msa.c000066400000000000000000000117221357355204000210430ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "./macros_msa.h" uint64_t vpx_sum_squares_2d_i16_msa(const int16_t *src, int src_stride, int size) { int row, col; uint64_t ss_res = 0; v4i32 mul0, mul1; v2i64 res0 = { 0 }; if (4 == size) { uint64_t src0, src1, src2, src3; v8i16 diff0 = { 0 }; v8i16 diff1 = { 0 }; LD4(src, src_stride, src0, src1, src2, src3); INSERT_D2_SH(src0, src1, diff0); INSERT_D2_SH(src2, src3, diff1); DOTP_SH2_SW(diff0, diff1, diff0, diff1, mul0, mul1); mul0 += mul1; res0 = __msa_hadd_s_d(mul0, mul0); res0 += __msa_splati_d(res0, 1); ss_res = (uint64_t)__msa_copy_s_d(res0, 0); } else if (8 == size) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; LD_SH8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DOTP_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); mul0 += mul1; res0 = __msa_hadd_s_d(mul0, mul0); res0 += __msa_splati_d(res0, 1); ss_res = (uint64_t)__msa_copy_s_d(res0, 0); } else if (16 == size) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; LD_SH8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DOTP_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src + 8, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += 8 * src_stride; DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src + 8, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); mul0 += mul1; res0 += __msa_hadd_s_d(mul0, mul0); res0 += __msa_splati_d(res0, 1); ss_res = (uint64_t)__msa_copy_s_d(res0, 0); } else if (0 == (size % 16)) { v8i16 src0, src1, src2, src3, src4, src5, src6, src7; for (row = 0; row < (size >> 4); row++) { for (col = 0; col < size; col += 16) { const int16_t *src_ptr = src + col; LD_SH8(src_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DOTP_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src_ptr + 8, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src_ptr += 8 * src_stride; DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src_ptr, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); LD_SH8(src_ptr + 8, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); DPADD_SH2_SW(src0, src1, src0, src1, mul0, mul1); DPADD_SH2_SW(src2, src3, src2, src3, mul0, mul1); DPADD_SH2_SW(src4, src5, src4, src5, mul0, mul1); DPADD_SH2_SW(src6, src7, src6, src7, mul0, mul1); mul0 += mul1; res0 += __msa_hadd_s_d(mul0, mul0); } src += 16 * src_stride; } res0 += __msa_splati_d(res0, 1); ss_res = (uint64_t)__msa_copy_s_d(res0, 0); } else { int16_t val; for (row = 0; row < size; row++) { for (col = 0; col < size; col++) { val = src[col]; ss_res += val * val; } src += src_stride; } } return ss_res; } libvpx-1.8.2/vpx_dsp/mips/txfm_macros_msa.h000066400000000000000000000137111357355204000210230ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ #define VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #define DOTP_CONST_PAIR(reg0, reg1, cnst0, cnst1, out0, out1) \ { \ v4i32 s0_m, s1_m, s2_m, s3_m, s4_m, s5_m; \ v8i16 k0_m, k1_m, k2_m, zero = { 0 }; \ \ k0_m = __msa_fill_h(cnst0); \ k1_m = __msa_fill_h(cnst1); \ k2_m = __msa_ilvev_h((v8i16)k1_m, k0_m); \ k0_m = __msa_ilvev_h((v8i16)zero, k0_m); \ k1_m = __msa_ilvev_h(k1_m, (v8i16)zero); \ \ ILVRL_H2_SW(reg1, reg0, s5_m, s4_m); \ ILVRL_H2_SW(reg0, reg1, s3_m, s2_m); \ DOTP_SH2_SW(s5_m, s4_m, k0_m, k0_m, s1_m, s0_m); \ s1_m = __msa_dpsub_s_w(s1_m, (v8i16)s5_m, k1_m); \ s0_m = __msa_dpsub_s_w(s0_m, (v8i16)s4_m, k1_m); \ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ out0 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ \ DOTP_SH2_SW(s3_m, s2_m, k2_m, k2_m, s1_m, s0_m); \ SRARI_W2_SW(s1_m, s0_m, DCT_CONST_BITS); \ out1 = __msa_pckev_h((v8i16)s0_m, (v8i16)s1_m); \ } #define DOT_ADD_SUB_SRARI_PCK(in0, in1, in2, in3, in4, in5, in6, in7, dst0, \ dst1, dst2, dst3) \ { \ v4i32 tp0_m, tp1_m, tp2_m, tp3_m, tp4_m; \ v4i32 tp5_m, tp6_m, tp7_m, tp8_m, tp9_m; \ \ DOTP_SH4_SW(in0, in1, in0, in1, in4, in4, in5, in5, tp0_m, tp2_m, tp3_m, \ tp4_m); \ DOTP_SH4_SW(in2, in3, in2, in3, in6, in6, in7, in7, tp5_m, tp6_m, tp7_m, \ tp8_m); \ BUTTERFLY_4(tp0_m, tp3_m, tp7_m, tp5_m, tp1_m, tp9_m, tp7_m, tp5_m); \ BUTTERFLY_4(tp2_m, tp4_m, tp8_m, tp6_m, tp3_m, tp0_m, tp4_m, tp2_m); \ SRARI_W4_SW(tp1_m, tp9_m, tp7_m, tp5_m, DCT_CONST_BITS); \ SRARI_W4_SW(tp3_m, tp0_m, tp4_m, tp2_m, DCT_CONST_BITS); \ PCKEV_H4_SH(tp1_m, tp3_m, tp9_m, tp0_m, tp7_m, tp4_m, tp5_m, tp2_m, dst0, \ dst1, dst2, dst3); \ } #define DOT_SHIFT_RIGHT_PCK_H(in0, in1, in2) \ ({ \ v8i16 dst_m; \ v4i32 tp0_m, tp1_m; \ \ DOTP_SH2_SW(in0, in1, in2, in2, tp1_m, tp0_m); \ SRARI_W2_SW(tp1_m, tp0_m, DCT_CONST_BITS); \ dst_m = __msa_pckev_h((v8i16)tp1_m, (v8i16)tp0_m); \ \ dst_m; \ }) #define MADD_SHORT(m0, m1, c0, c1, res0, res1) \ { \ v4i32 madd0_m, madd1_m, madd2_m, madd3_m; \ v8i16 madd_s0_m, madd_s1_m; \ \ ILVRL_H2_SH(m1, m0, madd_s0_m, madd_s1_m); \ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s0_m, madd_s1_m, c0, c0, c1, c1, \ madd0_m, madd1_m, madd2_m, madd3_m); \ SRARI_W4_SW(madd0_m, madd1_m, madd2_m, madd3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(madd1_m, madd0_m, madd3_m, madd2_m, res0, res1); \ } #define MADD_BF(inp0, inp1, inp2, inp3, cst0, cst1, cst2, cst3, out0, out1, \ out2, out3) \ { \ v8i16 madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m; \ v4i32 tmp0_m, tmp1_m, tmp2_m, tmp3_m, m4_m, m5_m; \ \ ILVRL_H2_SH(inp1, inp0, madd_s0_m, madd_s1_m); \ ILVRL_H2_SH(inp3, inp2, madd_s2_m, madd_s3_m); \ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst0, cst0, cst2, \ cst2, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m); \ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out0, out1); \ DOTP_SH4_SW(madd_s0_m, madd_s1_m, madd_s2_m, madd_s3_m, cst1, cst1, cst3, \ cst3, tmp0_m, tmp1_m, tmp2_m, tmp3_m); \ BUTTERFLY_4(tmp0_m, tmp1_m, tmp3_m, tmp2_m, m4_m, m5_m, tmp3_m, tmp2_m); \ SRARI_W4_SW(m4_m, m5_m, tmp2_m, tmp3_m, DCT_CONST_BITS); \ PCKEV_H2_SH(m5_m, m4_m, tmp3_m, tmp2_m, out2, out3); \ } #endif // VPX_VPX_DSP_MIPS_TXFM_MACROS_MSA_H_ libvpx-1.8.2/vpx_dsp/mips/variance_mmi.c000066400000000000000000002025441357355204000202720ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/variance.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" #include "vpx_ports/asmdefs_mmi.h" static const uint8_t bilinear_filters[8][2] = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; /* Use VARIANCE_SSE_SUM_8_FOR_W64 in vpx_variance64x64,vpx_variance64x32, vpx_variance32x64. VARIANCE_SSE_SUM_8 will lead to sum overflow. */ #define VARIANCE_SSE_SUM_8_FOR_W64 \ /* sse */ \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ "pmaddhw %[ftmp6], %[ftmp4], %[ftmp4] \n\t" \ "pmaddhw %[ftmp7], %[ftmp5], %[ftmp5] \n\t" \ "paddw %[ftmp10], %[ftmp10], %[ftmp6] \n\t" \ "paddw %[ftmp10], %[ftmp10], %[ftmp7] \n\t" \ \ /* sum */ \ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" \ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" \ "punpcklhw %[ftmp1], %[ftmp3], %[ftmp0] \n\t" \ "punpckhhw %[ftmp2], %[ftmp3], %[ftmp0] \n\t" \ "punpcklhw %[ftmp7], %[ftmp5], %[ftmp0] \n\t" \ "punpckhhw %[ftmp8], %[ftmp5], %[ftmp0] \n\t" \ "psubw %[ftmp3], %[ftmp1], %[ftmp7] \n\t" \ "psubw %[ftmp5], %[ftmp2], %[ftmp8] \n\t" \ "punpcklhw %[ftmp1], %[ftmp4], %[ftmp0] \n\t" \ "punpckhhw %[ftmp2], %[ftmp4], %[ftmp0] \n\t" \ "punpcklhw %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ "punpckhhw %[ftmp8], %[ftmp6], %[ftmp0] \n\t" \ "psubw %[ftmp4], %[ftmp1], %[ftmp7] \n\t" \ "psubw %[ftmp6], %[ftmp2], %[ftmp8] \n\t" \ "paddw %[ftmp9], %[ftmp9], %[ftmp3] \n\t" \ "paddw %[ftmp9], %[ftmp9], %[ftmp4] \n\t" \ "paddw %[ftmp9], %[ftmp9], %[ftmp5] \n\t" \ "paddw %[ftmp9], %[ftmp9], %[ftmp6] \n\t" #define VARIANCE_SSE_SUM_4 \ /* sse */ \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "pmaddhw %[ftmp5], %[ftmp4], %[ftmp4] \n\t" \ "paddw %[ftmp6], %[ftmp6], %[ftmp5] \n\t" \ \ /* sum */ \ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "punpcklbh %[ftmp4], %[ftmp2], %[ftmp0] \n\t" \ "paddh %[ftmp7], %[ftmp7], %[ftmp3] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ftmp4] \n\t" #define VARIANCE_SSE_SUM_8 \ /* sse */ \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ "pmaddhw %[ftmp6], %[ftmp4], %[ftmp4] \n\t" \ "pmaddhw %[ftmp7], %[ftmp5], %[ftmp5] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp6] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp7] \n\t" \ \ /* sum */ \ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpcklbh %[ftmp5], %[ftmp2], %[ftmp0] \n\t" \ "punpckhbh %[ftmp6], %[ftmp2], %[ftmp0] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ftmp3] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ftmp4] \n\t" \ "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" \ "paddh %[ftmp12], %[ftmp12], %[ftmp6] \n\t" #define VARIANCE_SSE_8 \ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" \ "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ "pmaddhw %[ftmp6], %[ftmp4], %[ftmp4] \n\t" \ "pmaddhw %[ftmp7], %[ftmp5], %[ftmp5] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp6] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp7] \n\t" #define VARIANCE_SSE_16 \ VARIANCE_SSE_8 \ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" \ "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" \ "pasubub %[ftmp3], %[ftmp1], %[ftmp2] \n\t" \ "punpcklbh %[ftmp4], %[ftmp3], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp3], %[ftmp0] \n\t" \ "pmaddhw %[ftmp6], %[ftmp4], %[ftmp4] \n\t" \ "pmaddhw %[ftmp7], %[ftmp5], %[ftmp5] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp6] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp7] \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A \ /* calculate fdata3[0]~fdata3[3], store at ftmp2*/ \ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \ "pmullh %[ftmp3], %[ftmp3], %[filter_x1] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ "psrlh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B \ /* calculate fdata3[0]~fdata3[3], store at ftmp4*/ \ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \ "pmullh %[ftmp5], %[ftmp5], %[filter_x1] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ "psrlh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A \ /* calculate: temp2[0] ~ temp2[3] */ \ "pmullh %[ftmp2], %[ftmp2], %[filter_y0] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp4], %[filter_y1] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" \ "psrlh %[ftmp2], %[ftmp2], %[ftmp6] \n\t" \ \ /* store: temp2[0] ~ temp2[3] */ \ "and %[ftmp2], %[ftmp2], %[mask] \n\t" \ "packushb %[ftmp2], %[ftmp2], %[ftmp0] \n\t" \ "gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B \ /* calculate: temp2[0] ~ temp2[3] */ \ "pmullh %[ftmp4], %[ftmp4], %[filter_y0] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp2], %[filter_y1] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" \ "psrlh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" \ \ /* store: temp2[0] ~ temp2[3] */ \ "and %[ftmp4], %[ftmp4], %[mask] \n\t" \ "packushb %[ftmp4], %[ftmp4], %[ftmp0] \n\t" \ "gssdrc1 %[ftmp4], 0x00(%[temp2_ptr]) \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \ /* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/ \ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp2], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp3], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp2], %[ftmp2], %[filter_x0] \n\t" \ "pmullh %[ftmp3], %[ftmp3], %[filter_x0] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \ "paddh %[ftmp3], %[ftmp3], %[ff_ph_40] \n\t" \ "pmullh %[ftmp4], %[ftmp4], %[filter_x1] \n\t" \ "pmullh %[ftmp5], %[ftmp5], %[filter_x1] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ftmp4] \n\t" \ "paddh %[ftmp3], %[ftmp3], %[ftmp5] \n\t" \ "psrlh %[ftmp2], %[ftmp2], %[ftmp14] \n\t" \ "psrlh %[ftmp3], %[ftmp3], %[ftmp14] \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \ /* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/ \ "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp8], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp9], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x01(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp8], %[ftmp8], %[filter_x0] \n\t" \ "pmullh %[ftmp9], %[ftmp9], %[filter_x0] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t" \ "paddh %[ftmp9], %[ftmp9], %[ff_ph_40] \n\t" \ "pmullh %[ftmp10], %[ftmp10], %[filter_x1] \n\t" \ "pmullh %[ftmp11], %[ftmp11], %[filter_x1] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ftmp10] \n\t" \ "paddh %[ftmp9], %[ftmp9], %[ftmp11] \n\t" \ "psrlh %[ftmp8], %[ftmp8], %[ftmp14] \n\t" \ "psrlh %[ftmp9], %[ftmp9], %[ftmp14] \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A \ /* calculate: temp2[0] ~ temp2[3] */ \ "pmullh %[ftmp2], %[ftmp2], %[filter_y0] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp8], %[filter_y1] \n\t" \ "paddh %[ftmp2], %[ftmp2], %[ftmp1] \n\t" \ "psrlh %[ftmp2], %[ftmp2], %[ftmp14] \n\t" \ \ /* calculate: temp2[4] ~ temp2[7] */ \ "pmullh %[ftmp3], %[ftmp3], %[filter_y0] \n\t" \ "paddh %[ftmp3], %[ftmp3], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp9], %[filter_y1] \n\t" \ "paddh %[ftmp3], %[ftmp3], %[ftmp1] \n\t" \ "psrlh %[ftmp3], %[ftmp3], %[ftmp14] \n\t" \ \ /* store: temp2[0] ~ temp2[7] */ \ "and %[ftmp2], %[ftmp2], %[mask] \n\t" \ "and %[ftmp3], %[ftmp3], %[mask] \n\t" \ "packushb %[ftmp2], %[ftmp2], %[ftmp3] \n\t" \ "gssdlc1 %[ftmp2], 0x07(%[temp2_ptr]) \n\t" \ "gssdrc1 %[ftmp2], 0x00(%[temp2_ptr]) \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B \ /* calculate: temp2[0] ~ temp2[3] */ \ "pmullh %[ftmp8], %[ftmp8], %[filter_y0] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp2], %[filter_y1] \n\t" \ "paddh %[ftmp8], %[ftmp8], %[ftmp1] \n\t" \ "psrlh %[ftmp8], %[ftmp8], %[ftmp14] \n\t" \ \ /* calculate: temp2[4] ~ temp2[7] */ \ "pmullh %[ftmp9], %[ftmp9], %[filter_y0] \n\t" \ "paddh %[ftmp9], %[ftmp9], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp3], %[filter_y1] \n\t" \ "paddh %[ftmp9], %[ftmp9], %[ftmp1] \n\t" \ "psrlh %[ftmp9], %[ftmp9], %[ftmp14] \n\t" \ \ /* store: temp2[0] ~ temp2[7] */ \ "and %[ftmp8], %[ftmp8], %[mask] \n\t" \ "and %[ftmp9], %[ftmp9], %[mask] \n\t" \ "packushb %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "gssdlc1 %[ftmp8], 0x07(%[temp2_ptr]) \n\t" \ "gssdrc1 %[ftmp8], 0x00(%[temp2_ptr]) \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A \ /* calculate fdata3[0]~fdata3[7], store at ftmp2 and ftmp3*/ \ VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A \ \ /* calculate fdata3[8]~fdata3[15], store at ftmp4 and ftmp5*/ \ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp4], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp5], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp6], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp7], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp4], %[ftmp4], %[filter_x0] \n\t" \ "pmullh %[ftmp5], %[ftmp5], %[filter_x0] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \ "paddh %[ftmp5], %[ftmp5], %[ff_ph_40] \n\t" \ "pmullh %[ftmp6], %[ftmp6], %[filter_x1] \n\t" \ "pmullh %[ftmp7], %[ftmp7], %[filter_x1] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ftmp6] \n\t" \ "paddh %[ftmp5], %[ftmp5], %[ftmp7] \n\t" \ "psrlh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" \ "psrlh %[ftmp5], %[ftmp5], %[ftmp14] \n\t" #define VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B \ /* calculate fdata3[0]~fdata3[7], store at ftmp8 and ftmp9*/ \ VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B \ \ /* calculate fdata3[8]~fdata3[15], store at ftmp10 and ftmp11*/ \ "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp10], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp11], %[ftmp1], %[ftmp0] \n\t" \ "gsldlc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" \ "gsldrc1 %[ftmp1], 0x09(%[src_ptr]) \n\t" \ "punpcklbh %[ftmp12], %[ftmp1], %[ftmp0] \n\t" \ "punpckhbh %[ftmp13], %[ftmp1], %[ftmp0] \n\t" \ "pmullh %[ftmp10], %[ftmp10], %[filter_x0] \n\t" \ "pmullh %[ftmp11], %[ftmp11], %[filter_x0] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ff_ph_40] \n\t" \ "paddh %[ftmp11], %[ftmp11], %[ff_ph_40] \n\t" \ "pmullh %[ftmp12], %[ftmp12], %[filter_x1] \n\t" \ "pmullh %[ftmp13], %[ftmp13], %[filter_x1] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ftmp12] \n\t" \ "paddh %[ftmp11], %[ftmp11], %[ftmp13] \n\t" \ "psrlh %[ftmp10], %[ftmp10], %[ftmp14] \n\t" \ "psrlh %[ftmp11], %[ftmp11], %[ftmp14] \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A \ VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A \ \ /* calculate: temp2[8] ~ temp2[11] */ \ "pmullh %[ftmp4], %[ftmp4], %[filter_y0] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp10], %[filter_y1] \n\t" \ "paddh %[ftmp4], %[ftmp4], %[ftmp1] \n\t" \ "psrlh %[ftmp4], %[ftmp4], %[ftmp14] \n\t" \ \ /* calculate: temp2[12] ~ temp2[15] */ \ "pmullh %[ftmp5], %[ftmp5], %[filter_y0] \n\t" \ "paddh %[ftmp5], %[ftmp5], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp11], %[filter_y1] \n\t" \ "paddh %[ftmp5], %[ftmp5], %[ftmp1] \n\t" \ "psrlh %[ftmp5], %[ftmp5], %[ftmp14] \n\t" \ \ /* store: temp2[8] ~ temp2[15] */ \ "and %[ftmp4], %[ftmp4], %[mask] \n\t" \ "and %[ftmp5], %[ftmp5], %[mask] \n\t" \ "packushb %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ "gssdlc1 %[ftmp4], 0x0f(%[temp2_ptr]) \n\t" \ "gssdrc1 %[ftmp4], 0x08(%[temp2_ptr]) \n\t" #define VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B \ VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B \ \ /* calculate: temp2[8] ~ temp2[11] */ \ "pmullh %[ftmp10], %[ftmp10], %[filter_y0] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp4], %[filter_y1] \n\t" \ "paddh %[ftmp10], %[ftmp10], %[ftmp1] \n\t" \ "psrlh %[ftmp10], %[ftmp10], %[ftmp14] \n\t" \ \ /* calculate: temp2[12] ~ temp2[15] */ \ "pmullh %[ftmp11], %[ftmp11], %[filter_y0] \n\t" \ "paddh %[ftmp11], %[ftmp11], %[ff_ph_40] \n\t" \ "pmullh %[ftmp1], %[ftmp5], %[filter_y1] \n\t" \ "paddh %[ftmp11], %[ftmp11], %[ftmp1] \n\t" \ "psrlh %[ftmp11], %[ftmp11], %[ftmp14] \n\t" \ \ /* store: temp2[8] ~ temp2[15] */ \ "and %[ftmp10], %[ftmp10], %[mask] \n\t" \ "and %[ftmp11], %[ftmp11], %[mask] \n\t" \ "packushb %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ "gssdlc1 %[ftmp10], 0x0f(%[temp2_ptr]) \n\t" \ "gssdrc1 %[ftmp10], 0x08(%[temp2_ptr]) \n\t" // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement // the first-pass of 2-D separable filter. // // Produces int16_t output to retain precision for the next pass. Two filter // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). // It defines the offset required to move from one input to the next. static void var_filter_block2d_bil_first_pass( const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { ref_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } src_ptr += src_pixels_per_line - output_width; ref_ptr += output_width; } } // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement // the second-pass of 2-D separable filter. // // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the // filter is applied horizontally (pixel_step = 1) or vertically // (pixel_step = stride). It defines the offset required to move from one input // to the next. Output is 8-bit. static void var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { ref_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } src_ptr += src_pixels_per_line - output_width; ref_ptr += output_width; } } static inline uint32_t vpx_variance64x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[12]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x27(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x20(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x27(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x20(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x2f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x28(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x2f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x28(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x37(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x30(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x37(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x30(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x3f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x38(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x3f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x38(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t" "mfhc1 %[tmp2], %[ftmp9] \n\t" "addu %[sum], %[tmp1], %[tmp2] \n\t" "dsrl %[ftmp1], %[ftmp10], %[ftmp11] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "swc1 %[ftmp1], 0x00(%[sse]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [tmp2]"=&r"(tmp[2]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr), [sum]"=&r"(sum) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); return *sse - (((int64_t)sum * sum) / (64 * high)); } #define VPX_VARIANCE64XN(n) \ uint32_t vpx_variance64x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_variance64x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE64XN(64) VPX_VARIANCE64XN(32) uint32_t vpx_variance32x64_mmi(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { int sum; double ftmp[12]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" "li %[tmp0], 0x40 \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp9], %[ftmp9], %[ftmp9] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8_FOR_W64 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "mfc1 %[tmp1], %[ftmp9] \n\t" "mfhc1 %[tmp2], %[ftmp9] \n\t" "addu %[sum], %[tmp1], %[tmp2] \n\t" "dsrl %[ftmp1], %[ftmp10], %[ftmp11] \n\t" "paddw %[ftmp1], %[ftmp1], %[ftmp10] \n\t" "swc1 %[ftmp1], 0x00(%[sse]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [tmp2]"=&r"(tmp[2]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr), [sum]"=&r"(sum) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [sse]"r"(sse) : "memory" ); return *sse - (((int64_t)sum * sum) / 2048); } static inline uint32_t vpx_variance32x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "gsldlc1 %[ftmp1], 0x17(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x10(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x17(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x10(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "gsldlc1 %[ftmp1], 0x1f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x18(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x1f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x18(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" "punpcklhw %[ftmp3], %[ftmp10], %[ftmp0] \n\t" "punpckhhw %[ftmp4], %[ftmp10], %[ftmp0] \n\t" "punpcklhw %[ftmp5], %[ftmp12], %[ftmp0] \n\t" "punpckhhw %[ftmp6], %[ftmp12], %[ftmp0] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); return *sse - (((int64_t)sum * sum) / (32 * high)); } #define VPX_VARIANCE32XN(n) \ uint32_t vpx_variance32x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_variance32x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE32XN(32) VPX_VARIANCE32XN(16) static inline uint32_t vpx_variance16x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "gsldlc1 %[ftmp1], 0x0f(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x08(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x0f(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x08(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" "punpcklhw %[ftmp3], %[ftmp10], %[ftmp0] \n\t" "punpckhhw %[ftmp4], %[ftmp10], %[ftmp0] \n\t" "punpcklhw %[ftmp5], %[ftmp12], %[ftmp0] \n\t" "punpckhhw %[ftmp6], %[ftmp12], %[ftmp0] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); return *sse - (((int64_t)sum * sum) / (16 * high)); } #define VPX_VARIANCE16XN(n) \ uint32_t vpx_variance16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_variance16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE16XN(32) VPX_VARIANCE16XN(16) VPX_VARIANCE16XN(8) static inline uint32_t vpx_variance8x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[13]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "xor %[ftmp10], %[ftmp10], %[ftmp10] \n\t" "xor %[ftmp12], %[ftmp12], %[ftmp12] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" "punpcklhw %[ftmp3], %[ftmp10], %[ftmp0] \n\t" "punpckhhw %[ftmp4], %[ftmp10], %[ftmp0] \n\t" "punpcklhw %[ftmp5], %[ftmp12], %[ftmp0] \n\t" "punpckhhw %[ftmp6], %[ftmp12], %[ftmp0] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "dsrl %[ftmp0], %[ftmp3], %[ftmp11] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [ftmp12]"=&f"(ftmp[12]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); return *sse - (((int64_t)sum * sum) / (8 * high)); } #define VPX_VARIANCE8XN(n) \ uint32_t vpx_variance8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_variance8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE8XN(16) VPX_VARIANCE8XN(8) VPX_VARIANCE8XN(4) static inline uint32_t vpx_variance4x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, int high) { int sum; double ftmp[12]; uint32_t tmp[3]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp10] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp6], %[ftmp6], %[ftmp6] \n\t" "xor %[ftmp7], %[ftmp7], %[ftmp7] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src_ptr]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src_ptr]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[ref_ptr]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[ref_ptr]) \n\t" VARIANCE_SSE_SUM_4 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp6], %[ftmp10] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp6] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" "punpcklhw %[ftmp3], %[ftmp7], %[ftmp0] \n\t" "punpckhhw %[ftmp4], %[ftmp7], %[ftmp0] \n\t" "punpcklhw %[ftmp5], %[ftmp8], %[ftmp0] \n\t" "punpckhhw %[ftmp6], %[ftmp8], %[ftmp0] \n\t" "paddw %[ftmp3], %[ftmp3], %[ftmp4] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp5] \n\t" "psubw %[ftmp3], %[ftmp3], %[ftmp6] \n\t" "dsrl %[ftmp0], %[ftmp3], %[ftmp10] \n\t" "paddw %[ftmp0], %[ftmp0], %[ftmp3] \n\t" "swc1 %[ftmp0], 0x00(%[sum]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse), [sum]"r"(&sum) : "memory" ); return *sse - (((int64_t)sum * sum) / (4 * high)); } #define VPX_VARIANCE4XN(n) \ uint32_t vpx_variance4x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_variance4x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } VPX_VARIANCE4XN(8) VPX_VARIANCE4XN(4) static inline uint32_t vpx_mse16x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, uint64_t high) { double ftmp[12]; uint32_t tmp[1]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "1: \n\t" VARIANCE_SSE_16 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); return *sse; } #define vpx_mse16xN(n) \ uint32_t vpx_mse16x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_mse16x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } vpx_mse16xN(16); vpx_mse16xN(8); static inline uint32_t vpx_mse8x(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, uint64_t high) { double ftmp[12]; uint32_t tmp[1]; *sse = 0; __asm__ volatile ( "li %[tmp0], 0x20 \n\t" "mtc1 %[tmp0], %[ftmp11] \n\t" MMI_L(%[tmp0], %[high], 0x00) "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "xor %[ftmp8], %[ftmp8], %[ftmp8] \n\t" "1: \n\t" VARIANCE_SSE_8 "addiu %[tmp0], %[tmp0], -0x01 \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) MMI_ADDU(%[ref_ptr], %[ref_ptr], %[ref_stride]) "bnez %[tmp0], 1b \n\t" "dsrl %[ftmp9], %[ftmp8], %[ftmp11] \n\t" "paddw %[ftmp9], %[ftmp9], %[ftmp8] \n\t" "swc1 %[ftmp9], 0x00(%[sse]) \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [ftmp4]"=&f"(ftmp[4]), [ftmp5]"=&f"(ftmp[5]), [ftmp6]"=&f"(ftmp[6]), [ftmp7]"=&f"(ftmp[7]), [ftmp8]"=&f"(ftmp[8]), [ftmp9]"=&f"(ftmp[9]), [ftmp10]"=&f"(ftmp[10]), [ftmp11]"=&f"(ftmp[11]), [tmp0]"=&r"(tmp[0]), [src_ptr]"+&r"(src_ptr), [ref_ptr]"+&r"(ref_ptr) : [src_stride]"r"((mips_reg)src_stride), [ref_stride]"r"((mips_reg)ref_stride), [high]"r"(&high), [sse]"r"(sse) : "memory" ); return *sse; } #define vpx_mse8xN(n) \ uint32_t vpx_mse8x##n##_mmi(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ return vpx_mse8x(src_ptr, src_stride, ref_ptr, ref_stride, sse, n); \ } vpx_mse8xN(16); vpx_mse8xN(8); #define SUBPIX_VAR(W, H) \ uint32_t vpx_sub_pixel_variance##W##x##H##_mmi( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint16_t fdata3[((H) + 1) * (W)]; \ uint8_t temp2[(H) * (W)]; \ \ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \ W, bilinear_filters[x_offset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ return vpx_variance##W##x##H##_mmi(temp2, W, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR(64, 64) SUBPIX_VAR(64, 32) SUBPIX_VAR(32, 64) SUBPIX_VAR(32, 32) SUBPIX_VAR(32, 16) SUBPIX_VAR(16, 32) static inline void var_filter_block2d_bil_16x(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, uint8_t *temp2, int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[15]; mips_reg tmp[2]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x07) MMI_MTC1(%[tmp0], %[ftmp14]) "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" // fdata3: fdata3[0] ~ fdata3[15] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A // fdata3 +src_stride*1: fdata3[0] ~ fdata3[15] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B // temp2: temp2[0] ~ temp2[15] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A // fdata3 +src_stride*2: fdata3[0] ~ fdata3[15] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A // temp2+16*1: temp2[0] ~ temp2[15] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B "1: \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_A MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_16_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x10) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_16_B "addiu %[counter], %[counter], -0x01 \n\t" "bnez %[counter], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } #define SUBPIX_VAR16XN(H) \ uint32_t vpx_sub_pixel_variance16x##H##_mmi( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint8_t temp2[16 * (H)]; \ var_filter_block2d_bil_16x(src_ptr, src_stride, x_offset, y_offset, temp2, \ ((H)-2) / 2); \ \ return vpx_variance16x##H##_mmi(temp2, 16, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR16XN(16) SUBPIX_VAR16XN(8) static inline void var_filter_block2d_bil_8x(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, uint8_t *temp2, int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[15]; mips_reg tmp[2]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x07) MMI_MTC1(%[tmp0], %[ftmp14]) "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" // fdata3: fdata3[0] ~ fdata3[7] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A // fdata3 +src_stride*1: fdata3[0] ~ fdata3[7] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B // temp2: temp2[0] ~ temp2[7] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A // fdata3 +src_stride*2: fdata3[0] ~ fdata3[7] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A // temp2+8*1: temp2[0] ~ temp2[7] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B "1: \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_A MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_8_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x08) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_8_B "addiu %[counter], %[counter], -0x01 \n\t" "bnez %[counter], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [ftmp7] "=&f"(ftmp[7]), [ftmp8] "=&f"(ftmp[8]), [ftmp9] "=&f"(ftmp[9]), [ftmp10] "=&f"(ftmp[10]), [ftmp11] "=&f"(ftmp[11]), [ftmp12] "=&f"(ftmp[12]), [ftmp13] "=&f"(ftmp[13]), [ftmp14] "=&f"(ftmp[14]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } #define SUBPIX_VAR8XN(H) \ uint32_t vpx_sub_pixel_variance8x##H##_mmi( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint8_t temp2[8 * (H)]; \ var_filter_block2d_bil_8x(src_ptr, src_stride, x_offset, y_offset, temp2, \ ((H)-2) / 2); \ \ return vpx_variance8x##H##_mmi(temp2, 8, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR8XN(16) SUBPIX_VAR8XN(8) SUBPIX_VAR8XN(4) static inline void var_filter_block2d_bil_4x(const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, uint8_t *temp2, int counter) { uint8_t *temp2_ptr = temp2; mips_reg l_counter = counter; double ftmp[7]; mips_reg tmp[2]; DECLARE_ALIGNED(8, const uint64_t, ff_ph_40) = { 0x0040004000400040ULL }; DECLARE_ALIGNED(8, const uint64_t, mask) = { 0x00ff00ff00ff00ffULL }; const uint8_t *filter_x = bilinear_filters[x_offset]; const uint8_t *filter_y = bilinear_filters[y_offset]; __asm__ volatile ( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" MMI_LI(%[tmp0], 0x07) MMI_MTC1(%[tmp0], %[ftmp6]) "pshufh %[filter_x0], %[filter_x0], %[ftmp0] \n\t" "pshufh %[filter_x1], %[filter_x1], %[ftmp0] \n\t" "pshufh %[filter_y0], %[filter_y0], %[ftmp0] \n\t" "pshufh %[filter_y1], %[filter_y1], %[ftmp0] \n\t" // fdata3: fdata3[0] ~ fdata3[3] VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A // fdata3 +src_stride*1: fdata3[0] ~ fdata3[3] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B // temp2: temp2[0] ~ temp2[7] VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A // fdata3 +src_stride*2: fdata3[0] ~ fdata3[3] MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A // temp2+4*1: temp2[0] ~ temp2[7] MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B "1: \n\t" MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_B MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_A MMI_ADDU(%[src_ptr], %[src_ptr], %[src_stride]) VAR_FILTER_BLOCK2D_BIL_FIRST_PASS_4_A MMI_ADDIU(%[temp2_ptr], %[temp2_ptr], 0x04) VAR_FILTER_BLOCK2D_BIL_SECOND_PASS_4_B "addiu %[counter], %[counter], -0x01 \n\t" "bnez %[counter], 1b \n\t" : [ftmp0] "=&f"(ftmp[0]), [ftmp1] "=&f"(ftmp[1]), [ftmp2] "=&f"(ftmp[2]), [ftmp3] "=&f"(ftmp[3]), [ftmp4] "=&f"(ftmp[4]), [ftmp5] "=&f"(ftmp[5]), [ftmp6] "=&f"(ftmp[6]), [tmp0] "=&r"(tmp[0]), [src_ptr] "+&r"(src_ptr), [temp2_ptr] "+&r"(temp2_ptr), [counter]"+&r"(l_counter) : [filter_x0] "f"((uint64_t)filter_x[0]), [filter_x1] "f"((uint64_t)filter_x[1]), [filter_y0] "f"((uint64_t)filter_y[0]), [filter_y1] "f"((uint64_t)filter_y[1]), [src_stride] "r"((mips_reg)src_stride), [ff_ph_40] "f"(ff_ph_40), [mask] "f"(mask) : "memory" ); } #define SUBPIX_VAR4XN(H) \ uint32_t vpx_sub_pixel_variance4x##H##_mmi( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint8_t temp2[4 * (H)]; \ var_filter_block2d_bil_4x(src_ptr, src_stride, x_offset, y_offset, temp2, \ ((H)-2) / 2); \ \ return vpx_variance4x##H##_mmi(temp2, 4, ref_ptr, ref_stride, sse); \ } SUBPIX_VAR4XN(8) SUBPIX_VAR4XN(4) #define SUBPIX_AVG_VAR(W, H) \ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_mmi( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint16_t fdata3[((H) + 1) * (W)]; \ uint8_t temp2[(H) * (W)]; \ DECLARE_ALIGNED(16, uint8_t, temp3[(H) * (W)]); \ \ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, (H) + 1, \ W, bilinear_filters[x_offset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ \ return vpx_variance##W##x##H##_mmi(temp3, W, ref_ptr, ref_stride, sse); \ } SUBPIX_AVG_VAR(64, 64) SUBPIX_AVG_VAR(64, 32) SUBPIX_AVG_VAR(32, 64) SUBPIX_AVG_VAR(32, 32) SUBPIX_AVG_VAR(32, 16) SUBPIX_AVG_VAR(16, 32) SUBPIX_AVG_VAR(16, 16) SUBPIX_AVG_VAR(16, 8) SUBPIX_AVG_VAR(8, 16) SUBPIX_AVG_VAR(8, 8) SUBPIX_AVG_VAR(8, 4) SUBPIX_AVG_VAR(4, 8) SUBPIX_AVG_VAR(4, 4) libvpx-1.8.2/vpx_dsp/mips/variance_msa.c000066400000000000000000000474631357355204000202770ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" #define CALC_MSE_B(src, ref, var) \ { \ v16u8 src_l0_m, src_l1_m; \ v8i16 res_l0_m, res_l1_m; \ \ ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ } #define CALC_MSE_AVG_B(src, ref, var, sub) \ { \ v16u8 src_l0_m, src_l1_m; \ v8i16 res_l0_m, res_l1_m; \ \ ILVRL_B2_UB(src, ref, src_l0_m, src_l1_m); \ HSUB_UB2_SH(src_l0_m, src_l1_m, res_l0_m, res_l1_m); \ DPADD_SH2_SW(res_l0_m, res_l1_m, res_l0_m, res_l1_m, var, var); \ \ sub += res_l0_m + res_l1_m; \ } #define VARIANCE_WxH(sse, diff, shift) \ (sse) - (((uint32_t)(diff) * (diff)) >> (shift)) #define VARIANCE_LARGE_WxH(sse, diff, shift) \ (sse) - (((int64_t)(diff) * (diff)) >> (shift)) static uint32_t sse_diff_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, int32_t *diff) { uint32_t src0, src1, src2, src3; uint32_t ref0, ref1, ref2, ref3; int32_t ht_cnt; v16u8 src = { 0 }; v16u8 ref = { 0 }; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); INSERT_W4_UB(src0, src1, src2, src3, src); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); CALC_MSE_AVG_B(src, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_8width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, ref0, ref1); CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_16width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src, ref; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_AVG_B(src, ref, var, avg); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_AVG_B(src, ref, var, avg); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_AVG_B(src, ref, var, avg); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_AVG_B(src, ref, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_32width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v8i16 avg = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg); CALC_MSE_AVG_B(src1, ref1, var, avg); } vec = __msa_hadd_s_w(avg, avg); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_32x64_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 16; ht_cnt--;) { LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_64x32_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 16; ht_cnt--;) { LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src2, ref2, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src3, ref3, var, avg1); LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src2, ref2, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src3, ref3, var, avg1); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t sse_diff_64x64_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t *diff) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v8i16 avg0 = { 0 }; v8i16 avg1 = { 0 }; v8i16 avg2 = { 0 }; v8i16 avg3 = { 0 }; v4i32 vec, var = { 0 }; for (ht_cnt = 32; ht_cnt--;) { LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src2, ref2, var, avg2); CALC_MSE_AVG_B(src3, ref3, var, avg3); LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_AVG_B(src0, ref0, var, avg0); CALC_MSE_AVG_B(src1, ref1, var, avg1); CALC_MSE_AVG_B(src2, ref2, var, avg2); CALC_MSE_AVG_B(src3, ref3, var, avg3); } vec = __msa_hadd_s_w(avg0, avg0); vec += __msa_hadd_s_w(avg1, avg1); vec += __msa_hadd_s_w(avg2, avg2); vec += __msa_hadd_s_w(avg3, avg3); *diff = HADD_SW_S32(vec); return HADD_SW_S32(var); } static uint32_t get_mb_ss_msa(const int16_t *src) { uint32_t sum, cnt; v8i16 src0, src1, src2, src3; v4i32 src0_l, src1_l, src2_l, src3_l; v4i32 src0_r, src1_r, src2_r, src3_r; v2i64 sq_src_l = { 0 }; v2i64 sq_src_r = { 0 }; for (cnt = 8; cnt--;) { LD_SH4(src, 8, src0, src1, src2, src3); src += 4 * 8; UNPCK_SH_SW(src0, src0_l, src0_r); UNPCK_SH_SW(src1, src1_l, src1_r); UNPCK_SH_SW(src2, src2_l, src2_r); UNPCK_SH_SW(src3, src3_l, src3_r); DPADD_SD2_SD(src0_l, src0_r, sq_src_l, sq_src_r); DPADD_SD2_SD(src1_l, src1_r, sq_src_l, sq_src_r); DPADD_SD2_SD(src2_l, src2_r, sq_src_l, sq_src_r); DPADD_SD2_SD(src3_l, src3_r, sq_src_l, sq_src_r); } sq_src_l += __msa_splati_d(sq_src_l, 1); sq_src_r += __msa_splati_d(sq_src_r, 1); sum = __msa_copy_s_d(sq_src_l, 0); sum += __msa_copy_s_d(sq_src_r, 0); return sum; } static uint32_t sse_4width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; uint32_t src0, src1, src2, src3; uint32_t ref0, ref1, ref2, ref3; v16u8 src = { 0 }; v16u8 ref = { 0 }; v4i32 var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LW4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); INSERT_W4_UB(src0, src1, src2, src3, src); INSERT_W4_UB(ref0, ref1, ref2, ref3, ref); CALC_MSE_B(src, ref, var); } return HADD_SW_S32(var); } static uint32_t sse_8width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v4i32 var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB4(src_ptr, src_stride, src0, src1, src2, src3); src_ptr += (4 * src_stride); LD_UB4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); ref_ptr += (4 * ref_stride); PCKEV_D4_UB(src1, src0, src3, src2, ref1, ref0, ref3, ref2, src0, src1, ref0, ref1); CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src1, ref1, var); } return HADD_SW_S32(var); } static uint32_t sse_16width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src, ref; v4i32 var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_B(src, ref, var); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_B(src, ref, var); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_B(src, ref, var); src = LD_UB(src_ptr); src_ptr += src_stride; ref = LD_UB(ref_ptr); ref_ptr += ref_stride; CALC_MSE_B(src, ref, var); } return HADD_SW_S32(var); } static uint32_t sse_32width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, ref0, ref1; v4i32 var = { 0 }; for (ht_cnt = (height >> 2); ht_cnt--;) { LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src1, ref1, var); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src1, ref1, var); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src1, ref1, var); LD_UB2(src_ptr, 16, src0, src1); src_ptr += src_stride; LD_UB2(ref_ptr, 16, ref0, ref1); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src1, ref1, var); } return HADD_SW_S32(var); } static uint32_t sse_64width_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride, int32_t height) { int32_t ht_cnt; v16u8 src0, src1, src2, src3; v16u8 ref0, ref1, ref2, ref3; v4i32 var = { 0 }; for (ht_cnt = height >> 1; ht_cnt--;) { LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src2, ref2, var); CALC_MSE_B(src1, ref1, var); CALC_MSE_B(src3, ref3, var); LD_UB4(src_ptr, 16, src0, src1, src2, src3); src_ptr += src_stride; LD_UB4(ref_ptr, 16, ref0, ref1, ref2, ref3); ref_ptr += ref_stride; CALC_MSE_B(src0, ref0, var); CALC_MSE_B(src2, ref2, var); CALC_MSE_B(src1, ref1, var); CALC_MSE_B(src3, ref3, var); } return HADD_SW_S32(var); } uint32_t vpx_get4x4sse_cs_msa(const uint8_t *src_ptr, int32_t src_stride, const uint8_t *ref_ptr, int32_t ref_stride) { uint32_t src0, src1, src2, src3; uint32_t ref0, ref1, ref2, ref3; v16i8 src = { 0 }; v16i8 ref = { 0 }; v4i32 err0 = { 0 }; LW4(src_ptr, src_stride, src0, src1, src2, src3); LW4(ref_ptr, ref_stride, ref0, ref1, ref2, ref3); INSERT_W4_SB(src0, src1, src2, src3, src); INSERT_W4_SB(ref0, ref1, ref2, ref3, ref); CALC_MSE_B(src, ref, err0); return HADD_SW_S32(err0); } #define VARIANCE_4Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 4); #define VARIANCE_4Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 5); #define VARIANCE_8Wx4H(sse, diff) VARIANCE_WxH(sse, diff, 5); #define VARIANCE_8Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 6); #define VARIANCE_8Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 7); #define VARIANCE_16Wx8H(sse, diff) VARIANCE_WxH(sse, diff, 7); #define VARIANCE_16Wx16H(sse, diff) VARIANCE_WxH(sse, diff, 8); #define VARIANCE_16Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); #define VARIANCE_32Wx16H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 9); #define VARIANCE_32Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 10); #define VARIANCE_32Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); #define VARIANCE_64Wx32H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 11); #define VARIANCE_64Wx64H(sse, diff) VARIANCE_LARGE_WxH(sse, diff, 12); #define VPX_VARIANCE_WDXHT_MSA(wd, ht) \ uint32_t vpx_variance##wd##x##ht##_msa( \ const uint8_t *src, int32_t src_stride, const uint8_t *ref, \ int32_t ref_stride, uint32_t *sse) { \ int32_t diff; \ \ *sse = \ sse_diff_##wd##width_msa(src, src_stride, ref, ref_stride, ht, &diff); \ \ return VARIANCE_##wd##Wx##ht##H(*sse, diff); \ } VPX_VARIANCE_WDXHT_MSA(4, 4); VPX_VARIANCE_WDXHT_MSA(4, 8); VPX_VARIANCE_WDXHT_MSA(8, 4) VPX_VARIANCE_WDXHT_MSA(8, 8) VPX_VARIANCE_WDXHT_MSA(8, 16) VPX_VARIANCE_WDXHT_MSA(16, 8) VPX_VARIANCE_WDXHT_MSA(16, 16) VPX_VARIANCE_WDXHT_MSA(16, 32) VPX_VARIANCE_WDXHT_MSA(32, 16) VPX_VARIANCE_WDXHT_MSA(32, 32) uint32_t vpx_variance32x64_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { int32_t diff; *sse = sse_diff_32x64_msa(src, src_stride, ref, ref_stride, &diff); return VARIANCE_32Wx64H(*sse, diff); } uint32_t vpx_variance64x32_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { int32_t diff; *sse = sse_diff_64x32_msa(src, src_stride, ref, ref_stride, &diff); return VARIANCE_64Wx32H(*sse, diff); } uint32_t vpx_variance64x64_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { int32_t diff; *sse = sse_diff_64x64_msa(src, src_stride, ref, ref_stride, &diff); return VARIANCE_64Wx64H(*sse, diff); } uint32_t vpx_mse8x8_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 8); return *sse; } uint32_t vpx_mse8x16_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { *sse = sse_8width_msa(src, src_stride, ref, ref_stride, 16); return *sse; } uint32_t vpx_mse16x8_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 8); return *sse; } uint32_t vpx_mse16x16_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse) { *sse = sse_16width_msa(src, src_stride, ref, ref_stride, 16); return *sse; } void vpx_get8x8var_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse, int32_t *sum) { *sse = sse_diff_8width_msa(src, src_stride, ref, ref_stride, 8, sum); } void vpx_get16x16var_msa(const uint8_t *src, int32_t src_stride, const uint8_t *ref, int32_t ref_stride, uint32_t *sse, int32_t *sum) { *sse = sse_diff_16width_msa(src, src_stride, ref, ref_stride, 16, sum); } uint32_t vpx_get_mb_ss_msa(const int16_t *src) { return get_mb_ss_msa(src); } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_avg_horiz_msa.c000066400000000000000000000704241357355204000233700ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" static void common_hz_8t_and_aver_dst_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 dst0 = { 0 }, res; v16u8 mask0, mask1, mask2, mask3; v8i16 filt, res0, res1; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, res0, res1); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); SRARI_H2_SH(res0, res1, FILTER_BITS); SAT_SH2_SH(res0, res1, 7); res = PCKEV_XORI128_UB(res0, res1); res = (v16u8)__msa_aver_u_b(res, dst0); ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_8t_and_aver_dst_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, res0, res1, res2, res3; v16u8 dst0 = { 0 }, dst1 = { 0 }; v8i16 filt, vec0, vec1, vec2, vec3; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); LW4(dst + 4 * dst_stride, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, vec0, vec1); LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, vec2, vec3); SRARI_H4_SH(vec0, vec1, vec2, vec3, FILTER_BITS); SAT_SH4_SH(vec0, vec1, vec2, vec3, 7); PCKEV_B4_UB(vec0, vec0, vec1, vec1, vec2, vec2, vec3, vec3, res0, res1, res2, res3); ILVR_D2_UB(res1, res0, res3, res2, res0, res2); XORI_B2_128_UB(res0, res2); AVER_UB2_UB(res0, dst0, res2, dst1, res0, res2); ST4x8_UB(res0, res2, dst, dst_stride); } static void common_hz_8t_and_aver_dst_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_8t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_8t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_8t_and_aver_dst_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { int32_t loop_cnt; int64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, dst0 = { 0 }, dst1 = { 0 }; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hz_8t_and_aver_dst_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { int32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, dst0, dst1; v8i16 filt, out0, out1, out2, out3; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = height >> 1; loop_cnt--;) { LD_SB2(src, src_stride, src0, src2); LD_SB2(src + 8, src_stride, src1, src3); src += (2 * src_stride); XORI_B4_128_SB(src0, src1, src2, src3); VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12); VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13); VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, vec14); VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, vec15); DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, vec9, vec10, vec11); DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1, vec2, vec3); DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, vec9, vec10, vec11); ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, out2, out3); LD_UB2(dst, dst_stride, dst0, dst1); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); PCKEV_XORI128_AVG_ST_UB(out1, out0, dst0, dst); dst += dst_stride; PCKEV_XORI128_AVG_ST_UB(out3, out2, dst1, dst); dst += dst_stride; } } static void common_hz_8t_and_aver_dst_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 dst1, dst2, mask0, mask1, mask2, mask3; v8i16 filt, out0, out1, out2, out3; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = height; loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; XORI_B4_128_SB(src0, src1, src2, src3); VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12); VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13); VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, vec14); VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, vec15); DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, vec9, vec10, vec11); DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1, vec2, vec3); DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, vec9, vec10, vec11); ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); LD_UB2(dst, 16, dst1, dst2); PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, dst); PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, dst + 16); dst += dst_stride; } } static void common_hz_8t_and_aver_dst_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt, cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 dst1, dst2, mask0, mask1, mask2, mask3; v8i16 filt, out0, out1, out2, out3; v8i16 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8i16 vec8, vec9, vec10, vec11, vec12, vec13, vec14, vec15; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = height; loop_cnt--;) { for (cnt = 0; cnt < 2; ++cnt) { src0 = LD_SB(&src[cnt << 5]); src2 = LD_SB(&src[16 + (cnt << 5)]); src3 = LD_SB(&src[24 + (cnt << 5)]); src1 = __msa_sldi_b(src2, src0, 8); XORI_B4_128_SB(src0, src1, src2, src3); VSHF_B4_SH(src0, src0, mask0, mask1, mask2, mask3, vec0, vec4, vec8, vec12); VSHF_B4_SH(src1, src1, mask0, mask1, mask2, mask3, vec1, vec5, vec9, vec13); VSHF_B4_SH(src2, src2, mask0, mask1, mask2, mask3, vec2, vec6, vec10, vec14); VSHF_B4_SH(src3, src3, mask0, mask1, mask2, mask3, vec3, vec7, vec11, vec15); DOTP_SB4_SH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); DOTP_SB4_SH(vec8, vec9, vec10, vec11, filt2, filt2, filt2, filt2, vec8, vec9, vec10, vec11); DPADD_SB4_SH(vec4, vec5, vec6, vec7, filt1, filt1, filt1, filt1, vec0, vec1, vec2, vec3); DPADD_SB4_SH(vec12, vec13, vec14, vec15, filt3, filt3, filt3, filt3, vec8, vec9, vec10, vec11); ADDS_SH4_SH(vec0, vec8, vec1, vec9, vec2, vec10, vec3, vec11, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); LD_UB2(&dst[cnt << 5], 16, dst1, dst2); PCKEV_XORI128_AVG_ST_UB(out1, out0, dst1, &dst[cnt << 5]); PCKEV_XORI128_AVG_ST_UB(out3, out2, dst2, &dst[16 + (cnt << 5)]); } src += src_stride; dst += dst_stride; } } static void common_hz_2t_and_aver_dst_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, mask; v16u8 filt0, dst0 = { 0 }, vec0, vec1, res; v8u16 vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); SRARI_H2_UH(vec2, vec3, FILTER_BITS); res = (v16u8)__msa_pckev_b((v16i8)vec3, (v16i8)vec2); res = (v16u8)__msa_aver_u_b(res, dst0); ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_2t_and_aver_dst_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, vec0, vec1, vec2, vec3, res0, res1, res2, res3; v16u8 dst0 = { 0 }, dst1 = { 0 }; v8u16 vec4, vec5, vec6, vec7, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); LW4(dst + 4 * dst_stride, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5, vec6, vec7); SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS); PCKEV_B4_UB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, res3); ILVR_D2_UB(res1, res0, res3, res2, res0, res2); AVER_UB2_UB(res0, dst0, res2, dst1, res0, res2); ST4x8_UB(res0, res2, dst, dst_stride); } static void common_hz_2t_and_aver_dst_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_2t_and_aver_dst_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { int64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, mask; v16u8 filt0, dst0 = { 0 }, dst1 = { 0 }; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); PCKEV_AVG_ST8x4_UB(vec0, vec1, vec2, vec3, dst0, dst1, dst, dst_stride); } static void common_hz_2t_and_aver_dst_8x8mult_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { int64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, mask; v16u8 filt0, dst0 = { 0 }, dst1 = { 0 }; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); PCKEV_AVG_ST8x4_UB(vec0, vec1, vec2, vec3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); PCKEV_AVG_ST8x4_UB(vec0, vec1, vec2, vec3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); if (16 == height) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); LD_SB4(src, src_stride, src0, src1, src2, src3); PCKEV_AVG_ST8x4_UB(vec0, vec1, vec2, vec3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); PCKEV_AVG_ST8x4_UB(vec0, vec1, vec2, vec3, dst0, dst1, dst, dst_stride); } } static void common_hz_2t_and_aver_dst_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_hz_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_hz_2t_and_aver_dst_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, dst0, dst1, dst2, dst3; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, res2, res3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, res6, res7); SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); PCKEV_AVG_ST_UB(res1, res0, dst0, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res3, res2, dst1, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res5, res4, dst2, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res7, res6, dst3, dst); dst += dst_stride; for (loop_cnt = (height >> 2) - 1; loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, res2, res3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, res6, res7); SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); PCKEV_AVG_ST_UB(res1, res0, dst0, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res3, res2, dst1, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res5, res4, dst2, dst); dst += dst_stride; PCKEV_AVG_ST_UB(res7, res6, dst3, dst); dst += dst_stride; } } static void common_hz_2t_and_aver_dst_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, dst0, dst1, dst2, dst3; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 res0, res1, res2, res3, res4, res5, res6, res7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); for (loop_cnt = (height >> 1); loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; src4 = LD_SB(src); src6 = LD_SB(src + 16); src7 = LD_SB(src + 24); src5 = __msa_sldi_b(src6, src4, 8); src += src_stride; VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, res0, res1, res2, res3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, res4, res5, res6, res7); SRARI_H4_UH(res0, res1, res2, res3, FILTER_BITS); SRARI_H4_UH(res4, res5, res6, res7, FILTER_BITS); LD_UB2(dst, 16, dst0, dst1); PCKEV_AVG_ST_UB(res1, res0, dst0, dst); PCKEV_AVG_ST_UB(res3, res2, dst1, (dst + 16)); dst += dst_stride; LD_UB2(dst, 16, dst2, dst3); PCKEV_AVG_ST_UB(res5, res4, dst2, dst); PCKEV_AVG_ST_UB(res7, res6, dst3, (dst + 16)); dst += dst_stride; } } static void common_hz_2t_and_aver_dst_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, dst0, dst1, dst2, dst3; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); for (loop_cnt = height; loop_cnt--;) { LD_SB4(src, 16, src0, src2, src4, src6); src7 = LD_SB(src + 56); SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8); src += src_stride; VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); LD_UB4(dst, 16, dst0, dst1, dst2, dst3); PCKEV_AVG_ST_UB(out1, out0, dst0, dst); PCKEV_AVG_ST_UB(out3, out2, dst1, dst + 16); PCKEV_AVG_ST_UB(out5, out4, dst2, dst + 32); PCKEV_AVG_ST_UB(out7, out6, dst3, dst + 48); dst += dst_stride; } } void vpx_convolve8_avg_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; int8_t cnt, filt_hor[8]; assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); for (cnt = 0; cnt < 8; ++cnt) { filt_hor[cnt] = filter_x[cnt]; } if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 8: common_hz_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 16: common_hz_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 32: common_hz_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 64: common_hz_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; default: vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else { switch (w) { case 4: common_hz_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 8: common_hz_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 16: common_hz_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 32: common_hz_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 64: common_hz_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; default: vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_avg_msa.c000066400000000000000000000610141357355204000221500ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" static void common_hv_8ht_8vt_and_aver_dst_4w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16u8 dst0 = { 0 }, mask0, mask1, mask2, mask3, res; v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, hz_out9, res0, res1, vec0, vec1, vec2, vec3, vec4; v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= (3 + 3 * src_stride); /* rearranging filter */ filt = LD_SH(filter_horiz); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8); filt = LD_SH(filter_vert); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); vec2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8); vec3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); res0 = FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8); vec4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8); res1 = FILT_8TAP_DPADD_S_H(vec1, vec2, vec3, vec4, filt_vt0, filt_vt1, filt_vt2, filt_vt3); SRARI_H2_SH(res0, res1, FILTER_BITS); SAT_SH2_SH(res0, res1, 7); res = PCKEV_XORI128_UB(res0, res1); res = (v16u8)__msa_aver_u_b(res, dst0); ST4x4_UB(res, res, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); hz_out5 = hz_out9; vec0 = vec2; vec1 = vec3; vec2 = vec4; } } static void common_hv_8ht_8vt_and_aver_dst_8w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; uint64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3; v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3; v16u8 dst0 = { 0 }, dst1 = { 0 }, mask0, mask1, mask2, mask3; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3; v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= (3 + 3 * src_stride); /* rearranging filter */ filt = LD_SH(filter_horiz); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); filt = LD_SH(filter_vert); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4); ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7); tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8); tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9); tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1, filt_vt2, filt_vt3); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); CONVERT_UB_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); hz_out6 = hz_out10; out0 = out2; out1 = out3; out2 = out8; out4 = out6; out5 = out7; out6 = out9; } } static void common_hv_8ht_8vt_and_aver_dst_16w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_8ht_8vt_and_aver_dst_32w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 4; multiple8_cnt--;) { common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_8ht_8vt_and_aver_dst_64w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 8; multiple8_cnt--;) { common_hv_8ht_8vt_and_aver_dst_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_2ht_2vt_and_aver_dst_4x4_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, mask; v16u8 filt_hz, filt_vt, vec0, vec1; v16u8 dst0 = { 0 }, out; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, tmp0, tmp1, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter_horiz); filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); filt = LD_UH(filter_vert); filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); out = __msa_aver_u_b(out, dst0); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hv_2ht_2vt_and_aver_dst_4x8_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask; v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3, res0, res1; v16u8 dst0 = { 0 }, dst1 = { 0 }; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8u16 hz_out7, hz_out8, tmp0, tmp1, tmp2, tmp3; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS); hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS); hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS); SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1, hz_out3, hz_out5, 8); hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); LW4(dst + 4 * dst_stride, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, res0, res1); AVER_UB2_UB(res0, dst0, res1, dst1, res0, res1); ST4x8_UB(res0, res1, dst, dst_stride); } static void common_hv_2ht_2vt_and_aver_dst_4w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else if (8 == height) { common_hv_2ht_2vt_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } } static void common_hv_2ht_2vt_and_aver_dst_8x4_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { uint64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, mask; v16u8 filt_hz, filt_vt, dst0 = { 0 }, dst1 = { 0 }, vec0, vec1, vec2, vec3; v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec1, filt_vt); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec2, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec3, filt_vt); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst, dst_stride); } static void common_hv_2ht_2vt_and_aver_dst_8x8mult_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; uint64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, mask; v16u8 filt_hz, filt_vt, vec0, dst0 = { 0 }, dst1 = { 0 }; v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); src0 = LD_SB(src); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); PCKEV_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hv_2ht_2vt_and_aver_dst_8w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else { common_hv_2ht_2vt_and_aver_dst_8x8mult_msa( src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); } } static void common_hv_2ht_2vt_and_aver_dst_16w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt_hz, filt_vt, vec0, vec1, dst0, dst1, dst2, dst3; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, tmp0, tmp1; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB2(src, 8, src0, src1); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst1, dst); dst += dst_stride; hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst3, dst); dst += dst_stride; } } static void common_hv_2ht_2vt_and_aver_dst_32w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 16; dst += 16; } } static void common_hv_2ht_2vt_and_aver_dst_64w_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 4; multiple8_cnt--;) { common_hv_2ht_2vt_and_aver_dst_16w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 16; dst += 16; } } void vpx_convolve8_avg_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; const int16_t *const filter_y = filter[y0_q4]; int8_t cnt, filt_hor[8], filt_ver[8]; assert(x_step_q4 == 16); assert(y_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); assert(((const int32_t *)filter_y)[1] != 0x800000); for (cnt = 0; cnt < 8; ++cnt) { filt_hor[cnt] = filter_x[cnt]; filt_ver[cnt] = filter_y[cnt]; } if (vpx_get_filter_taps(filter_x) == 2 && vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], h); break; case 8: common_hv_2ht_2vt_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], h); break; case 16: common_hv_2ht_2vt_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], h); break; case 32: common_hv_2ht_2vt_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], h); break; case 64: common_hv_2ht_2vt_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], h); break; default: vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else if (vpx_get_filter_taps(filter_x) == 2 || vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { switch (w) { case 4: common_hv_8ht_8vt_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, h); break; case 8: common_hv_8ht_8vt_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, h); break; case 16: common_hv_8ht_8vt_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, h); break; case 32: common_hv_8ht_8vt_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, h); break; case 64: common_hv_8ht_8vt_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, h); break; default: vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_avg_vert_msa.c000066400000000000000000000652561357355204000232240ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" static void common_vt_8t_and_aver_dst_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16u8 dst0 = { 0 }, out; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776; v16i8 src10998, filt0, filt1, filt2, filt3; v8i16 filt, out10, out32; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110, src4332, src6554); XORI_B3_128_SB(src2110, src4332, src6554); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); src += (4 * src_stride); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998); XORI_B2_128_SB(src8776, src10998); out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0, filt1, filt2, filt3); out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0, filt1, filt2, filt3); SRARI_H2_SH(out10, out32, FILTER_BITS); SAT_SH2_SH(out10, out32, 7); out = PCKEV_XORI128_UB(out10, out32); out = __msa_aver_u_b(out, dst0); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); src2110 = src6554; src4332 = src8776; src6554 = src10998; src6 = src10; } } static void common_vt_8t_and_aver_dst_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; uint64_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16u8 dst0 = { 0 }, dst1 = { 0 }; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3; v8i16 filt, out0, out1, out2, out3; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); src += (4 * src_stride); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); XORI_B4_128_SB(src7, src8, src9, src10); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); out0 = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0, filt1, filt2, filt3); out1 = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0, filt1, filt2, filt3); out2 = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0, filt1, filt2, filt3); out3 = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); CONVERT_UB_AVG_ST8x4_UB(out0, out1, out2, out3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src54_r = src98_r; src21_r = src65_r; src43_r = src87_r; src65_r = src109_r; src6 = src10; } } static void common_vt_8t_and_aver_dst_16w_mult_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height, int32_t width) { const uint8_t *src_tmp; uint8_t *dst_tmp; uint32_t loop_cnt, cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l; v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l; v16i8 filt0, filt1, filt2, filt3; v16u8 dst0, dst1, dst2, dst3, tmp0, tmp1, tmp2, tmp3; v8i16 out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l, filt; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); for (cnt = (width >> 4); cnt--;) { src_tmp = src; dst_tmp = dst; LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src_tmp += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l, src54_l, src21_l); ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src_tmp, src_stride, src7, src8, src9, src10); src_tmp += (4 * src_stride); LD_UB4(dst_tmp, dst_stride, dst0, dst1, dst2, dst3); XORI_B4_128_SB(src7, src8, src9, src10); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l, src87_l, src98_l, src109_l); out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0, filt1, filt2, filt3); out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0, filt1, filt2, filt3); out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0, filt1, filt2, filt3); out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3); out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0, filt1, filt2, filt3); out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0, filt1, filt2, filt3); out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0, filt1, filt2, filt3); out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0, filt1, filt2, filt3); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS); SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7); PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r, tmp0, tmp1, tmp2, tmp3); XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3); AVER_UB4_UB(tmp0, dst0, tmp1, dst1, tmp2, dst2, tmp3, dst3, dst0, dst1, dst2, dst3); ST_UB4(dst0, dst1, dst2, dst3, dst_tmp, dst_stride); dst_tmp += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src54_r = src98_r; src21_r = src65_r; src43_r = src87_r; src65_r = src109_r; src10_l = src54_l; src32_l = src76_l; src54_l = src98_l; src21_l = src65_l; src43_l = src87_l; src65_l = src109_l; src6 = src10; } src += 16; dst += 16; } } static void common_vt_8t_and_aver_dst_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height, 16); } static void common_vt_8t_and_aver_dst_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height, 32); } static void common_vt_8t_and_aver_dst_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { common_vt_8t_and_aver_dst_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height, 64); } static void common_vt_2t_and_aver_dst_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16i8 src0, src1, src2, src3, src4; v16u8 dst0 = { 0 }, out, filt0, src2110, src4332; v16i8 src10_r, src32_r, src21_r, src43_r; v8i16 filt; v8u16 tmp0, tmp1; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); src4 = LD_SB(src); src += src_stride; LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_UB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); out = (v16u8)__msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); out = __msa_aver_u_b(out, dst0); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_vt_2t_and_aver_dst_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { uint32_t tp0, tp1, tp2, tp3; v16u8 dst0 = { 0 }, dst1 = { 0 }; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src87_r; v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r, src65_r; v16u8 src2110, src4332, src6554, src8776, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); LW4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst0); LW4(dst + 4 * dst_stride, dst_stride, tp0, tp1, tp2, tp3); INSERT_W4_UB(tp0, tp1, tp2, tp3, dst1); ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, src76_r, src87_r); ILVR_D4_UB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src2110, src4332, src6554, src8776); DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_UB(tmp1, tmp0, tmp3, tmp2, src2110, src4332); AVER_UB2_UB(src2110, dst0, src4332, dst1, src2110, src4332); ST4x8_UB(src2110, src4332, dst, dst_stride); } static void common_vt_2t_and_aver_dst_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_and_aver_dst_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_vt_2t_and_aver_dst_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_vt_2t_and_aver_dst_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { int64_t tp0, tp1, tp2, tp3; v16u8 src0, src1, src2, src3, src4; v16u8 dst0 = { 0 }, dst1 = { 0 }, vec0, vec1, vec2, vec3, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_UB5(src, src_stride, src0, src1, src2, src3, src4); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1); ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst, dst_stride); } static void common_vt_2t_and_aver_dst_8x8mult_msa( const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; int64_t tp0, tp1, tp2, tp3; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16u8 dst0 = { 0 }, dst1 = { 0 }, dst2 = { 0 }, dst3 = { 0 }; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 3); loop_cnt--;) { LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); src += (8 * src_stride); LD4(dst, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst0); INSERT_D2_UB(tp2, tp3, dst1); LD4(dst + 4 * dst_stride, dst_stride, tp0, tp1, tp2, tp3); INSERT_D2_UB(tp0, tp1, dst2); INSERT_D2_UB(tp2, tp3, dst3); ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2, vec3); ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst0, dst1, dst, dst_stride); dst += (4 * dst_stride); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST8x4_UB(tmp0, tmp1, tmp2, tmp3, dst2, dst3, dst, dst_stride); dst += (4 * dst_stride); src0 = src8; } } static void common_vt_2t_and_aver_dst_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_and_aver_dst_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_vt_2t_and_aver_dst_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_vt_2t_and_aver_dst_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, dst0, dst1, dst2, dst3, filt0; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 tmp0, tmp1, tmp2, tmp3, filt; /* rearranging filter_y */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); dst += dst_stride; ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst); dst += dst_stride; DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst); dst += dst_stride; DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst); dst += dst_stride; src0 = src4; } } static void common_vt_2t_and_aver_dst_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3, filt; /* rearranging filter_y */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_UB2(src, 16, src0, src5); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); LD_UB4(src + 16, src_stride, src6, src7, src8, src9); LD_UB4(dst + 16, dst_stride, dst4, dst5, dst6, dst7); src += (4 * src_stride); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride); ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst2, dst + 2 * dst_stride); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst3, dst + 3 * dst_stride); ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2); ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 16); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 16 + dst_stride); ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6); ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst6, dst + 16 + 2 * dst_stride); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst7, dst + 16 + 3 * dst_stride); dst += (4 * dst_stride); src0 = src4; src5 = src9; } } static void common_vt_2t_and_aver_dst_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5; v16u8 src6, src7, src8, src9, src10, src11, filt0; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8u16 filt; /* rearranging filter_y */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_UB4(src, 16, src0, src3, src6, src9); src += src_stride; for (loop_cnt = (height >> 1); loop_cnt--;) { LD_UB2(src, src_stride, src1, src2); LD_UB2(dst, dst_stride, dst0, dst1); LD_UB2(src + 16, src_stride, src4, src5); LD_UB2(dst + 16, dst_stride, dst2, dst3); LD_UB2(src + 32, src_stride, src7, src8); LD_UB2(dst + 32, dst_stride, dst4, dst5); LD_UB2(src + 48, src_stride, src10, src11); LD_UB2(dst + 48, dst_stride, dst6, dst7); src += (2 * src_stride); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst0, dst); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst1, dst + dst_stride); ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6); ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); PCKEV_AVG_ST_UB(tmp5, tmp4, dst2, dst + 16); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); PCKEV_AVG_ST_UB(tmp7, tmp6, dst3, dst + 16 + dst_stride); ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2); ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_AVG_ST_UB(tmp1, tmp0, dst4, dst + 32); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_AVG_ST_UB(tmp3, tmp2, dst5, dst + 32 + dst_stride); ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6); ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); PCKEV_AVG_ST_UB(tmp5, tmp4, dst6, (dst + 48)); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); PCKEV_AVG_ST_UB(tmp7, tmp6, dst7, dst + 48 + dst_stride); dst += (2 * dst_stride); src0 = src2; src3 = src5; src6 = src8; src9 = src11; } } void vpx_convolve8_avg_vert_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; int8_t cnt, filt_ver[8]; assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); for (cnt = 0; cnt < 8; ++cnt) { filt_ver[cnt] = filter_y[cnt]; } if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 8: common_vt_2t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 16: common_vt_2t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 32: common_vt_2t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 64: common_vt_2t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; default: vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else { switch (w) { case 4: common_vt_8t_and_aver_dst_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 8: common_vt_8t_and_aver_dst_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 16: common_vt_8t_and_aver_dst_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 32: common_vt_8t_and_aver_dst_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 64: common_vt_8t_and_aver_dst_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; default: vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_horiz_msa.c000066400000000000000000000611641357355204000225340ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" static void common_hz_8t_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16u8 mask0, mask1, mask2, mask3, out; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v8i16 filt, out0, out1; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1); SRARI_H2_SH(out0, out1, FILTER_BITS); SAT_SH2_SH(out0, out1, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_8t_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16i8 filt0, filt1, filt2, filt3; v16i8 src0, src1, src2, src3; v16u8 mask0, mask1, mask2, mask3, out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1); LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); out = PCKEV_XORI128_UB(out2, out3); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); } static void common_hz_8t_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_8t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_8t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_8t_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); ST8x4_UB(tmp0, tmp1, dst, dst_stride); } static void common_hz_8t_8x8mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, tmp0, tmp1; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src1, src2, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (4 * src_stride); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); tmp0 = PCKEV_XORI128_UB(out0, out1); tmp1 = PCKEV_XORI128_UB(out2, out3); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hz_8t_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_8t_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_hz_8t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_hz_8t_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = (height >> 1); loop_cnt--;) { LD_SB2(src, src_stride, src0, src2); LD_SB2(src + 8, src_stride, src1, src3); XORI_B4_128_SB(src0, src1, src2, src3); src += (2 * src_stride); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); dst += dst_stride; out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst); dst += dst_stride; } } static void common_hz_8t_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = (height >> 1); loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst + 16); dst += dst_stride; XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst + 16); dst += dst_stride; } } static void common_hz_8t_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { int32_t loop_cnt; v16i8 src0, src1, src2, src3, filt0, filt1, filt2, filt3; v16u8 mask0, mask1, mask2, mask3, out; v8i16 filt, out0, out1, out2, out3; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= 3; /* rearranging filter */ filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; for (loop_cnt = height; loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst); out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst + 16); src0 = LD_SB(src + 32); src2 = LD_SB(src + 48); src3 = LD_SB(src + 56); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; XORI_B4_128_SB(src0, src1, src2, src3); HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, mask2, mask3, filt0, filt1, filt2, filt3, out0, out1, out2, out3); SRARI_H4_SH(out0, out1, out2, out3, FILTER_BITS); SAT_SH4_SH(out0, out1, out2, out3, 7); out = PCKEV_XORI128_UB(out0, out1); ST_UB(out, dst + 32); out = PCKEV_XORI128_UB(out2, out3); ST_UB(out, dst + 48); dst += dst_stride; } } static void common_hz_2t_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16i8 src0, src1, src2, src3, mask; v16u8 filt0, vec0, vec1, res0, res1; v8u16 vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt0, filt0, vec2, vec3); SRARI_H2_UH(vec2, vec3, FILTER_BITS); PCKEV_B2_UB(vec2, vec2, vec3, vec3, res0, res1); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); } static void common_hz_2t_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16u8 vec0, vec1, vec2, vec3, filt0; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16i8 res0, res1, res2, res3; v8u16 vec4, vec5, vec6, vec7, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); VSHF_B2_UB(src0, src1, src2, src3, mask, mask, vec0, vec1); VSHF_B2_UB(src4, src5, src6, src7, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec4, vec5, vec6, vec7); SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS); PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, res3); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); dst += (4 * dst_stride); ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); } static void common_hz_2t_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_hz_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_hz_2t_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16u8 filt0; v16i8 src0, src1, src2, src3, mask; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B2_SB(vec1, vec0, vec3, vec2, src0, src1); ST8x4_UB(src0, src1, dst, dst_stride); } static void common_hz_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { v16u8 filt0; v16i8 src0, src1, src2, src3, mask, out0, out1; v8u16 vec0, vec1, vec2, vec3, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); if (16 == height) { LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); LD_SB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); VSHF_B2_UH(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UH(src2, src2, src3, src3, mask, mask, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, vec0, vec1, vec2, vec3); SRARI_H4_UH(vec0, vec1, vec2, vec3, FILTER_BITS); PCKEV_B2_SB(vec1, vec0, vec3, vec2, out0, out1); ST8x4_UB(out0, out1, dst + 4 * dst_stride, dst_stride); } } static void common_hz_2t_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_hz_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_hz_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_hz_2t_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); loop_cnt = (height >> 2) - 1; /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_ST_SB(out0, out1, dst); dst += dst_stride; PCKEV_ST_SB(out2, out3, dst); dst += dst_stride; PCKEV_ST_SB(out4, out5, dst); dst += dst_stride; PCKEV_ST_SB(out6, out7, dst); dst += dst_stride; for (; loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_ST_SB(out0, out1, dst); dst += dst_stride; PCKEV_ST_SB(out2, out3, dst); dst += dst_stride; PCKEV_ST_SB(out4, out5, dst); dst += dst_stride; PCKEV_ST_SB(out6, out7, dst); dst += dst_stride; } } static void common_hz_2t_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); for (loop_cnt = height >> 1; loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src3 = LD_SB(src + 24); src1 = __msa_sldi_b(src2, src0, 8); src += src_stride; src4 = LD_SB(src); src6 = LD_SB(src + 16); src7 = LD_SB(src + 24); src5 = __msa_sldi_b(src6, src4, 8); src += src_stride; VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_ST_SB(out0, out1, dst); PCKEV_ST_SB(out2, out3, dst + 16); dst += dst_stride; PCKEV_ST_SB(out4, out5, dst); PCKEV_ST_SB(out6, out7, dst + 16); dst += dst_stride; } } static void common_hz_2t_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt0, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7; v8u16 out0, out1, out2, out3, out4, out5, out6, out7, filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_UH(filter); filt0 = (v16u8)__msa_splati_h((v8i16)filt, 0); for (loop_cnt = height; loop_cnt--;) { src0 = LD_SB(src); src2 = LD_SB(src + 16); src4 = LD_SB(src + 32); src6 = LD_SB(src + 48); src7 = LD_SB(src + 56); SLDI_B3_SB(src2, src4, src6, src0, src2, src4, src1, src3, src5, 8); src += src_stride; VSHF_B2_UB(src0, src0, src1, src1, mask, mask, vec0, vec1); VSHF_B2_UB(src2, src2, src3, src3, mask, mask, vec2, vec3); VSHF_B2_UB(src4, src4, src5, src5, mask, mask, vec4, vec5); VSHF_B2_UB(src6, src6, src7, src7, mask, mask, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, out0, out1, out2, out3); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, out4, out5, out6, out7); SRARI_H4_UH(out0, out1, out2, out3, FILTER_BITS); SRARI_H4_UH(out4, out5, out6, out7, FILTER_BITS); PCKEV_ST_SB(out0, out1, dst); PCKEV_ST_SB(out2, out3, dst + 16); PCKEV_ST_SB(out4, out5, dst + 32); PCKEV_ST_SB(out6, out7, dst + 48); dst += dst_stride; } } void vpx_convolve8_horiz_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_x = filter[x0_q4]; int8_t cnt, filt_hor[8]; assert(x_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); for (cnt = 0; cnt < 8; ++cnt) { filt_hor[cnt] = filter_x[cnt]; } if (vpx_get_filter_taps(filter_x) == 2) { switch (w) { case 4: common_hz_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 8: common_hz_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 16: common_hz_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 32: common_hz_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; case 64: common_hz_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], h); break; default: vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else { switch (w) { case 4: common_hz_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 8: common_hz_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 16: common_hz_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 32: common_hz_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; case 64: common_hz_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, h); break; default: vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_mmi.c000066400000000000000000000772571357355204000213350ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/asmdefs_mmi.h" #include "vpx_ports/mem.h" #define GET_DATA_H_MMI \ "pmaddhw %[ftmp4], %[ftmp4], %[filter1] \n\t" \ "pmaddhw %[ftmp5], %[ftmp5], %[filter2] \n\t" \ "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ "punpckhwd %[ftmp5], %[ftmp4], %[ftmp0] \n\t" \ "paddw %[ftmp4], %[ftmp4], %[ftmp5] \n\t" \ "pmaddhw %[ftmp6], %[ftmp6], %[filter1] \n\t" \ "pmaddhw %[ftmp7], %[ftmp7], %[filter2] \n\t" \ "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ "punpckhwd %[ftmp7], %[ftmp6], %[ftmp0] \n\t" \ "paddw %[ftmp6], %[ftmp6], %[ftmp7] \n\t" \ "punpcklwd %[srcl], %[ftmp4], %[ftmp6] \n\t" \ "pmaddhw %[ftmp8], %[ftmp8], %[filter1] \n\t" \ "pmaddhw %[ftmp9], %[ftmp9], %[filter2] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "punpckhwd %[ftmp9], %[ftmp8], %[ftmp0] \n\t" \ "paddw %[ftmp8], %[ftmp8], %[ftmp9] \n\t" \ "pmaddhw %[ftmp10], %[ftmp10], %[filter1] \n\t" \ "pmaddhw %[ftmp11], %[ftmp11], %[filter2] \n\t" \ "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ "punpckhwd %[ftmp11], %[ftmp10], %[ftmp0] \n\t" \ "paddw %[ftmp10], %[ftmp10], %[ftmp11] \n\t" \ "punpcklwd %[srch], %[ftmp8], %[ftmp10] \n\t" #define GET_DATA_V_MMI \ "punpcklhw %[srcl], %[ftmp4], %[ftmp5] \n\t" \ "pmaddhw %[srcl], %[srcl], %[filter10] \n\t" \ "punpcklhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ "punpcklhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ "punpcklhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ "paddw %[srcl], %[srcl], %[ftmp12] \n\t" \ "punpckhhw %[srch], %[ftmp4], %[ftmp5] \n\t" \ "pmaddhw %[srch], %[srch], %[filter10] \n\t" \ "punpckhhw %[ftmp12], %[ftmp6], %[ftmp7] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter32] \n\t" \ "paddw %[srch], %[srch], %[ftmp12] \n\t" \ "punpckhhw %[ftmp12], %[ftmp8], %[ftmp9] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter54] \n\t" \ "paddw %[srch], %[srch], %[ftmp12] \n\t" \ "punpckhhw %[ftmp12], %[ftmp10], %[ftmp11] \n\t" \ "pmaddhw %[ftmp12], %[ftmp12], %[filter76] \n\t" \ "paddw %[srch], %[srch], %[ftmp12] \n\t" /* clang-format off */ #define ROUND_POWER_OF_TWO_MMI \ /* Add para[0] */ \ "lw %[tmp0], 0x00(%[para]) \n\t" \ MMI_MTC1(%[tmp0], %[ftmp6]) \ "punpcklwd %[ftmp6], %[ftmp6], %[ftmp6] \n\t" \ "paddw %[srcl], %[srcl], %[ftmp6] \n\t" \ "paddw %[srch], %[srch], %[ftmp6] \n\t" \ /* Arithmetic right shift para[1] bits */ \ "lw %[tmp0], 0x04(%[para]) \n\t" \ MMI_MTC1(%[tmp0], %[ftmp5]) \ "psraw %[srcl], %[srcl], %[ftmp5] \n\t" \ "psraw %[srch], %[srch], %[ftmp5] \n\t" /* clang-format on */ #define CLIP_PIXEL_MMI \ /* Staturated operation */ \ "packsswh %[srcl], %[srcl], %[srch] \n\t" \ "packushb %[ftmp12], %[srcl], %[ftmp0] \n\t" static void convolve_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int32_t w, int32_t h) { const int16_t *filter_x = filter[x0_q4]; double ftmp[14]; uint32_t tmp[2]; uint32_t para[5]; para[0] = (1 << ((FILTER_BITS)-1)); para[1] = FILTER_BITS; src -= SUBPEL_TAPS / 2 - 1; src_stride -= w; dst_stride -= w; (void)x_step_q4; /* clang-format off */ __asm__ volatile( "move %[tmp1], %[width] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" "1: \n\t" /* Get 8 data per row */ "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" MMI_ADDIU(%[width], %[width], -0x04) /* Get raw data */ GET_DATA_H_MMI ROUND_POWER_OF_TWO_MMI CLIP_PIXEL_MMI "swc1 %[ftmp12], 0x00(%[dst]) \n\t" MMI_ADDIU(%[dst], %[dst], 0x04) MMI_ADDIU(%[src], %[src], 0x04) /* Loop count */ "bnez %[width], 1b \n\t" "move %[width], %[tmp1] \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[dst], %[dst], %[dst_stride]) MMI_ADDIU(%[height], %[height], -0x01) "bnez %[height], 1b \n\t" : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [src]"+&r"(src), [width]"+&r"(w), [dst]"+&r"(dst), [height]"+&r"(h) : [filter]"r"(filter_x), [para]"r"(para), [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); /* clang-format on */ } static void convolve_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int y0_q4, int y_step_q4, int32_t w, int32_t h) { const int16_t *filter_y = filter[y0_q4]; double ftmp[16]; uint32_t tmp[1]; uint32_t para[2]; ptrdiff_t addr = src_stride; para[0] = (1 << ((FILTER_BITS)-1)); para[1] = FILTER_BITS; src -= src_stride * (SUBPEL_TAPS / 2 - 1); src_stride -= w; dst_stride -= w; (void)y_step_q4; __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" /* Get 8 data per column */ "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" MMI_ADDU(%[tmp0], %[src], %[addr]) "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" MMI_ADDIU(%[width], %[width], -0x04) /* Get raw data */ GET_DATA_V_MMI ROUND_POWER_OF_TWO_MMI CLIP_PIXEL_MMI "swc1 %[ftmp12], 0x00(%[dst]) \n\t" MMI_ADDIU(%[dst], %[dst], 0x04) MMI_ADDIU(%[src], %[src], 0x04) /* Loop count */ "bnez %[width], 1b \n\t" MMI_SUBU(%[width], %[addr], %[src_stride]) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[dst], %[dst], %[dst_stride]) MMI_ADDIU(%[height], %[height], -0x01) "bnez %[height], 1b \n\t" : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), [src]"+&r"(src), [dst]"+&r"(dst), [width]"+&r"(w), [height]"+&r"(h), [tmp0]"=&r"(tmp[0]) : [filter]"r"(filter_y), [para]"r"(para), [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride), [addr]"r"((mips_reg)addr) : "memory" ); } static void convolve_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int32_t w, int32_t h) { const int16_t *filter_x = filter[x0_q4]; double ftmp[14]; uint32_t tmp[2]; uint32_t para[2]; para[0] = (1 << ((FILTER_BITS)-1)); para[1] = FILTER_BITS; src -= SUBPEL_TAPS / 2 - 1; src_stride -= w; dst_stride -= w; (void)x_step_q4; __asm__ volatile( "move %[tmp1], %[width] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[filter1], 0x03(%[filter]) \n\t" "gsldrc1 %[filter1], 0x00(%[filter]) \n\t" "gsldlc1 %[filter2], 0x0b(%[filter]) \n\t" "gsldrc1 %[filter2], 0x08(%[filter]) \n\t" "1: \n\t" /* Get 8 data per row */ "gsldlc1 %[ftmp5], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp7], 0x08(%[src]) \n\t" "gsldrc1 %[ftmp7], 0x01(%[src]) \n\t" "gsldlc1 %[ftmp9], 0x09(%[src]) \n\t" "gsldrc1 %[ftmp9], 0x02(%[src]) \n\t" "gsldlc1 %[ftmp11], 0x0A(%[src]) \n\t" "gsldrc1 %[ftmp11], 0x03(%[src]) \n\t" "punpcklbh %[ftmp4], %[ftmp5], %[ftmp0] \n\t" "punpckhbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp7], %[ftmp0] \n\t" "punpckhbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp8], %[ftmp9], %[ftmp0] \n\t" "punpckhbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp11], %[ftmp0] \n\t" "punpckhbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" MMI_ADDIU(%[width], %[width], -0x04) /* Get raw data */ GET_DATA_H_MMI ROUND_POWER_OF_TWO_MMI CLIP_PIXEL_MMI "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" "li %[tmp0], 0x10001 \n\t" MMI_MTC1(%[tmp0], %[ftmp5]) "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" "swc1 %[ftmp12], 0x00(%[dst]) \n\t" MMI_ADDIU(%[dst], %[dst], 0x04) MMI_ADDIU(%[src], %[src], 0x04) /* Loop count */ "bnez %[width], 1b \n\t" "move %[width], %[tmp1] \n\t" MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[dst], %[dst], %[dst_stride]) MMI_ADDIU(%[height], %[height], -0x01) "bnez %[height], 1b \n\t" : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), [filter1]"=&f"(ftmp[2]), [filter2]"=&f"(ftmp[3]), [ftmp0]"=&f"(ftmp[4]), [ftmp4]"=&f"(ftmp[5]), [ftmp5]"=&f"(ftmp[6]), [ftmp6]"=&f"(ftmp[7]), [ftmp7]"=&f"(ftmp[8]), [ftmp8]"=&f"(ftmp[9]), [ftmp9]"=&f"(ftmp[10]), [ftmp10]"=&f"(ftmp[11]), [ftmp11]"=&f"(ftmp[12]), [ftmp12]"=&f"(ftmp[13]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [src]"+&r"(src), [width]"+&r"(w), [dst]"+&r"(dst), [height]"+&r"(h) : [filter]"r"(filter_x), [para]"r"(para), [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); } static void convolve_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int y0_q4, int y_step_q4, int32_t w, int32_t h) { const int16_t *filter_y = filter[y0_q4]; double ftmp[16]; uint32_t tmp[1]; uint32_t para[2]; ptrdiff_t addr = src_stride; para[0] = (1 << ((FILTER_BITS)-1)); para[1] = FILTER_BITS; src -= src_stride * (SUBPEL_TAPS / 2 - 1); src_stride -= w; dst_stride -= w; (void)y_step_q4; __asm__ volatile( "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "gsldlc1 %[ftmp4], 0x03(%[filter]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[filter]) \n\t" "gsldlc1 %[ftmp5], 0x0b(%[filter]) \n\t" "gsldrc1 %[ftmp5], 0x08(%[filter]) \n\t" "punpcklwd %[filter10], %[ftmp4], %[ftmp4] \n\t" "punpckhwd %[filter32], %[ftmp4], %[ftmp4] \n\t" "punpcklwd %[filter54], %[ftmp5], %[ftmp5] \n\t" "punpckhwd %[filter76], %[ftmp5], %[ftmp5] \n\t" "1: \n\t" /* Get 8 data per column */ "gsldlc1 %[ftmp4], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[src]) \n\t" MMI_ADDU(%[tmp0], %[src], %[addr]) "gsldlc1 %[ftmp5], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp5], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp6], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp6], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp7], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp7], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp8], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp8], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp9], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp9], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp10], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp10], 0x00(%[tmp0]) \n\t" MMI_ADDU(%[tmp0], %[tmp0], %[addr]) "gsldlc1 %[ftmp11], 0x07(%[tmp0]) \n\t" "gsldrc1 %[ftmp11], 0x00(%[tmp0]) \n\t" "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "punpcklbh %[ftmp5], %[ftmp5], %[ftmp0] \n\t" "punpcklbh %[ftmp6], %[ftmp6], %[ftmp0] \n\t" "punpcklbh %[ftmp7], %[ftmp7], %[ftmp0] \n\t" "punpcklbh %[ftmp8], %[ftmp8], %[ftmp0] \n\t" "punpcklbh %[ftmp9], %[ftmp9], %[ftmp0] \n\t" "punpcklbh %[ftmp10], %[ftmp10], %[ftmp0] \n\t" "punpcklbh %[ftmp11], %[ftmp11], %[ftmp0] \n\t" MMI_ADDIU(%[width], %[width], -0x04) /* Get raw data */ GET_DATA_V_MMI ROUND_POWER_OF_TWO_MMI CLIP_PIXEL_MMI "punpcklbh %[ftmp12], %[ftmp12], %[ftmp0] \n\t" "gsldlc1 %[ftmp4], 0x07(%[dst]) \n\t" "gsldrc1 %[ftmp4], 0x00(%[dst]) \n\t" "punpcklbh %[ftmp4], %[ftmp4], %[ftmp0] \n\t" "paddh %[ftmp12], %[ftmp12], %[ftmp4] \n\t" "li %[tmp0], 0x10001 \n\t" MMI_MTC1(%[tmp0], %[ftmp5]) "punpcklhw %[ftmp5], %[ftmp5], %[ftmp5] \n\t" "paddh %[ftmp12], %[ftmp12], %[ftmp5] \n\t" "psrah %[ftmp12], %[ftmp12], %[ftmp5] \n\t" "packushb %[ftmp12], %[ftmp12], %[ftmp0] \n\t" "swc1 %[ftmp12], 0x00(%[dst]) \n\t" MMI_ADDIU(%[dst], %[dst], 0x04) MMI_ADDIU(%[src], %[src], 0x04) /* Loop count */ "bnez %[width], 1b \n\t" MMI_SUBU(%[width], %[addr], %[src_stride]) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDU(%[dst], %[dst], %[dst_stride]) MMI_ADDIU(%[height], %[height], -0x01) "bnez %[height], 1b \n\t" : [srcl]"=&f"(ftmp[0]), [srch]"=&f"(ftmp[1]), [filter10]"=&f"(ftmp[2]), [filter32]"=&f"(ftmp[3]), [filter54]"=&f"(ftmp[4]), [filter76]"=&f"(ftmp[5]), [ftmp0]"=&f"(ftmp[6]), [ftmp4]"=&f"(ftmp[7]), [ftmp5]"=&f"(ftmp[8]), [ftmp6]"=&f"(ftmp[9]), [ftmp7]"=&f"(ftmp[10]), [ftmp8]"=&f"(ftmp[11]), [ftmp9]"=&f"(ftmp[12]), [ftmp10]"=&f"(ftmp[13]), [ftmp11]"=&f"(ftmp[14]), [ftmp12]"=&f"(ftmp[15]), [src]"+&r"(src), [dst]"+&r"(dst), [width]"+&r"(w), [height]"+&r"(h), [tmp0]"=&r"(tmp[0]) : [filter]"r"(filter_y), [para]"r"(para), [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride), [addr]"r"((mips_reg)addr) : "memory" ); } void vpx_convolve_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { int x, y; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; if (w & 0x03) { for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); src += src_stride; dst += dst_stride; } } else { double ftmp[4]; uint32_t tmp[2]; src_stride -= w; dst_stride -= w; __asm__ volatile( "move %[tmp1], %[width] \n\t" "xor %[ftmp0], %[ftmp0], %[ftmp0] \n\t" "li %[tmp0], 0x10001 \n\t" MMI_MTC1(%[tmp0], %[ftmp3]) "punpcklhw %[ftmp3], %[ftmp3], %[ftmp3] \n\t" "1: \n\t" "gsldlc1 %[ftmp1], 0x07(%[src]) \n\t" "gsldrc1 %[ftmp1], 0x00(%[src]) \n\t" "gsldlc1 %[ftmp2], 0x07(%[dst]) \n\t" "gsldrc1 %[ftmp2], 0x00(%[dst]) \n\t" "punpcklbh %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "punpcklbh %[ftmp2], %[ftmp2], %[ftmp0] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp2] \n\t" "paddh %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "psrah %[ftmp1], %[ftmp1], %[ftmp3] \n\t" "packushb %[ftmp1], %[ftmp1], %[ftmp0] \n\t" "swc1 %[ftmp1], 0x00(%[dst]) \n\t" MMI_ADDIU(%[width], %[width], -0x04) MMI_ADDIU(%[dst], %[dst], 0x04) MMI_ADDIU(%[src], %[src], 0x04) "bnez %[width], 1b \n\t" "move %[width], %[tmp1] \n\t" MMI_ADDU(%[dst], %[dst], %[dst_stride]) MMI_ADDU(%[src], %[src], %[src_stride]) MMI_ADDIU(%[height], %[height], -0x01) "bnez %[height], 1b \n\t" : [ftmp0]"=&f"(ftmp[0]), [ftmp1]"=&f"(ftmp[1]), [ftmp2]"=&f"(ftmp[2]), [ftmp3]"=&f"(ftmp[3]), [tmp0]"=&r"(tmp[0]), [tmp1]"=&r"(tmp[1]), [src]"+&r"(src), [dst]"+&r"(dst), [width]"+&r"(w), [height]"+&r"(h) : [src_stride]"r"((mips_reg)src_stride), [dst_stride]"r"((mips_reg)dst_stride) : "memory" ); } } static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); y_q4 += y_step_q4; } ++src; ++dst; } } static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = ROUND_POWER_OF_TWO( dst[y * dst_stride] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); y_q4 += y_step_q4; } ++src; ++dst; } } static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = ROUND_POWER_OF_TWO( dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } void vpx_convolve8_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // When calling in frame scaling function, the smallest scaling factor is x1/4 // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still // big enough. uint8_t temp[64 * 135]; const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); assert(x_step_q4 <= 64); if (w & 0x03) { convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else { convolve_horiz_mmi(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); convolve_vert_mmi(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } } void vpx_convolve8_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { (void)y0_q4; (void)y_step_q4; if (w & 0x03) convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); else convolve_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; if (w & 0x03) convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); else convolve_vert_mmi(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_horiz_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)y0_q4; (void)y_step_q4; if (w & 0x03) convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); else convolve_avg_horiz_mmi(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_avg_vert_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; if (w & 0x03) convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); else convolve_avg_vert_mmi(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_mmi(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { // Fixed size intermediate buffer places limits on parameters. DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); vpx_convolve8_mmi(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); vpx_convolve_avg_mmi(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_msa.c000066400000000000000000001355451357355204000213260ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" const uint8_t mc_filt_mask_arr[16 * 3] = { /* 8 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, /* 4 width cases */ 0, 1, 1, 2, 2, 3, 3, 4, 16, 17, 17, 18, 18, 19, 19, 20, /* 4 width cases */ 8, 9, 9, 10, 10, 11, 11, 12, 24, 25, 25, 26, 26, 27, 27, 28 }; static void common_hv_8ht_8vt_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3; v16u8 mask0, mask1, mask2, mask3, out; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, hz_out9, tmp0, tmp1, out0, out1, out2, out3, out4; v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3; mask0 = LD_UB(&mc_filt_mask_arr[16]); src -= (3 + 3 * src_stride); /* rearranging filter */ filt = LD_SH(filter_horiz); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); hz_out0 = HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out2 = HORIZ_8TAP_FILT(src2, src3, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out4 = HORIZ_8TAP_FILT(src4, src5, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out5 = HORIZ_8TAP_FILT(src5, src6, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); SLDI_B2_SH(hz_out2, hz_out4, hz_out0, hz_out2, hz_out1, hz_out3, 8); filt = LD_SH(filter_vert); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); out2 = (v8i16)__msa_ilvev_b((v16i8)hz_out5, (v16i8)hz_out4); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); hz_out7 = HORIZ_8TAP_FILT(src7, src8, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out6 = (v8i16)__msa_sldi_b((v16i8)hz_out7, (v16i8)hz_out5, 8); out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out9 = HORIZ_8TAP_FILT(src9, src10, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out8 = (v8i16)__msa_sldi_b((v16i8)hz_out9, (v16i8)hz_out7, 8); out4 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8); tmp1 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out4, filt_vt0, filt_vt1, filt_vt2, filt_vt3); SRARI_H2_SH(tmp0, tmp1, FILTER_BITS); SAT_SH2_SH(tmp0, tmp1, 7); out = PCKEV_XORI128_UB(tmp0, tmp1); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); hz_out5 = hz_out9; out0 = out2; out1 = out3; out2 = out4; } } static void common_hv_8ht_8vt_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 filt_hz0, filt_hz1, filt_hz2, filt_hz3; v16u8 mask0, mask1, mask2, mask3, vec0, vec1; v8i16 filt, filt_vt0, filt_vt1, filt_vt2, filt_vt3; v8i16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8i16 hz_out7, hz_out8, hz_out9, hz_out10, tmp0, tmp1, tmp2, tmp3; v8i16 out0, out1, out2, out3, out4, out5, out6, out7, out8, out9; mask0 = LD_UB(&mc_filt_mask_arr[0]); src -= (3 + 3 * src_stride); /* rearranging filter */ filt = LD_SH(filter_horiz); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); mask1 = mask0 + 2; mask2 = mask0 + 4; mask3 = mask0 + 6; LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); hz_out0 = HORIZ_8TAP_FILT(src0, src0, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out1 = HORIZ_8TAP_FILT(src1, src1, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out2 = HORIZ_8TAP_FILT(src2, src2, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out3 = HORIZ_8TAP_FILT(src3, src3, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out4 = HORIZ_8TAP_FILT(src4, src4, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out5 = HORIZ_8TAP_FILT(src5, src5, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); hz_out6 = HORIZ_8TAP_FILT(src6, src6, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); filt = LD_SH(filter_vert); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); ILVEV_B2_SH(hz_out0, hz_out1, hz_out2, hz_out3, out0, out1); ILVEV_B2_SH(hz_out4, hz_out5, hz_out1, hz_out2, out2, out4); ILVEV_B2_SH(hz_out3, hz_out4, hz_out5, hz_out6, out5, out6); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); src += (4 * src_stride); XORI_B4_128_SB(src7, src8, src9, src10); hz_out7 = HORIZ_8TAP_FILT(src7, src7, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out3 = (v8i16)__msa_ilvev_b((v16i8)hz_out7, (v16i8)hz_out6); tmp0 = FILT_8TAP_DPADD_S_H(out0, out1, out2, out3, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out8 = HORIZ_8TAP_FILT(src8, src8, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out7 = (v8i16)__msa_ilvev_b((v16i8)hz_out8, (v16i8)hz_out7); tmp1 = FILT_8TAP_DPADD_S_H(out4, out5, out6, out7, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out9 = HORIZ_8TAP_FILT(src9, src9, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out8 = (v8i16)__msa_ilvev_b((v16i8)hz_out9, (v16i8)hz_out8); tmp2 = FILT_8TAP_DPADD_S_H(out1, out2, out3, out8, filt_vt0, filt_vt1, filt_vt2, filt_vt3); hz_out10 = HORIZ_8TAP_FILT(src10, src10, mask0, mask1, mask2, mask3, filt_hz0, filt_hz1, filt_hz2, filt_hz3); out9 = (v8i16)__msa_ilvev_b((v16i8)hz_out10, (v16i8)hz_out9); tmp3 = FILT_8TAP_DPADD_S_H(out5, out6, out7, out9, filt_vt0, filt_vt1, filt_vt2, filt_vt3); SRARI_H4_SH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); SAT_SH4_SH(tmp0, tmp1, tmp2, tmp3, 7); vec0 = PCKEV_XORI128_UB(tmp0, tmp1); vec1 = PCKEV_XORI128_UB(tmp2, tmp3); ST8x4_UB(vec0, vec1, dst, dst_stride); dst += (4 * dst_stride); hz_out6 = hz_out10; out0 = out2; out1 = out3; out2 = out8; out4 = out6; out5 = out7; out6 = out9; } } static void common_hv_8ht_8vt_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_8ht_8vt_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 4; multiple8_cnt--;) { common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_8ht_8vt_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 8; multiple8_cnt--;) { common_hv_8ht_8vt_8w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 8; dst += 8; } } static void common_hv_2ht_2vt_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, mask; v16u8 filt_vt, filt_hz, vec0, vec1, res0, res1; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, filt, tmp0, tmp1; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter_horiz); filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); filt = LD_UH(filter_vert); filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out1 = (v8u16)__msa_sldi_b((v16i8)hz_out2, (v16i8)hz_out0, 8); hz_out3 = (v8u16)__msa_pckod_d((v2i64)hz_out4, (v2i64)hz_out2); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_B2_UB(tmp0, tmp0, tmp1, tmp1, res0, res1); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); } static void common_hv_2ht_2vt_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, mask; v16i8 res0, res1, res2, res3; v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; v8u16 hz_out0, hz_out1, hz_out2, hz_out3, hz_out4, hz_out5, hz_out6; v8u16 hz_out7, hz_out8, vec4, vec5, vec6, vec7, filt; mask = LD_SB(&mc_filt_mask_arr[16]); /* rearranging filter */ filt = LD_UH(filter_horiz); filt_hz = (v16u8)__msa_splati_h((v8i16)filt, 0); filt = LD_UH(filter_vert); filt_vt = (v16u8)__msa_splati_h((v8i16)filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src1, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src2, src3, mask, filt_hz, FILTER_BITS); hz_out4 = HORIZ_2TAP_FILT_UH(src4, src5, mask, filt_hz, FILTER_BITS); hz_out6 = HORIZ_2TAP_FILT_UH(src6, src7, mask, filt_hz, FILTER_BITS); hz_out8 = HORIZ_2TAP_FILT_UH(src8, src8, mask, filt_hz, FILTER_BITS); SLDI_B3_UH(hz_out2, hz_out4, hz_out6, hz_out0, hz_out2, hz_out4, hz_out1, hz_out3, hz_out5, 8); hz_out7 = (v8u16)__msa_pckod_d((v2i64)hz_out8, (v2i64)hz_out6); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); ILVEV_B2_UB(hz_out4, hz_out5, hz_out6, hz_out7, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt_vt, filt_vt, filt_vt, filt_vt, vec4, vec5, vec6, vec7); SRARI_H4_UH(vec4, vec5, vec6, vec7, FILTER_BITS); PCKEV_B4_SB(vec4, vec4, vec5, vec5, vec6, vec6, vec7, vec7, res0, res1, res2, res3); ST4x4_UB(res0, res1, 0, 1, 0, 1, dst, dst_stride); dst += (4 * dst_stride); ST4x4_UB(res2, res3, 0, 1, 0, 1, dst, dst_stride); } static void common_hv_2ht_2vt_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_4x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else if (8 == height) { common_hv_2ht_2vt_4x8_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } } static void common_hv_2ht_2vt_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert) { v16i8 src0, src1, src2, src3, src4, mask, out0, out1; v16u8 filt_hz, filt_vt, vec0, vec1, vec2, vec3; v8u16 hz_out0, hz_out1, tmp0, tmp1, tmp2, tmp3; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp0 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec1 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp1 = __msa_dotp_u_h(vec1, filt_vt); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec2 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp2 = __msa_dotp_u_h(vec2, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec3 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp3 = __msa_dotp_u_h(vec3, filt_vt); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); } static void common_hv_2ht_2vt_8x8mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, mask, out0, out1; v16u8 filt_hz, filt_vt, vec0; v8u16 hz_out0, hz_out1, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); src0 = LD_SB(src); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 3); loop_cnt--;) { LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp1 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp2 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp3 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); LD_SB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp4 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H2_UH(tmp3, tmp4, FILTER_BITS); PCKEV_B2_SB(tmp2, tmp1, tmp4, tmp3, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp5 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp6 = __msa_dotp_u_h(vec0, filt_vt); hz_out1 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out1, (v16i8)hz_out0); tmp7 = __msa_dotp_u_h(vec0, filt_vt); hz_out0 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); vec0 = (v16u8)__msa_ilvev_b((v16i8)hz_out0, (v16i8)hz_out1); tmp8 = __msa_dotp_u_h(vec0, filt_vt); SRARI_H4_UH(tmp5, tmp6, tmp7, tmp8, FILTER_BITS); PCKEV_B2_SB(tmp6, tmp5, tmp8, tmp7, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); } } static void common_hv_2ht_2vt_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { if (4 == height) { common_hv_2ht_2vt_8x4_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert); } else { common_hv_2ht_2vt_8x8mult_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); } } static void common_hv_2ht_2vt_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, mask; v16u8 filt_hz, filt_vt, vec0, vec1; v8u16 tmp1, tmp2, hz_out0, hz_out1, hz_out2, hz_out3; v8i16 filt; mask = LD_SB(&mc_filt_mask_arr[0]); /* rearranging filter */ filt = LD_SH(filter_horiz); filt_hz = (v16u8)__msa_splati_h(filt, 0); filt = LD_SH(filter_vert); filt_vt = (v16u8)__msa_splati_h(filt, 0); LD_SB2(src, 8, src0, src1); src += src_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src0, src2, src4, src6); LD_SB4(src + 8, src_stride, src1, src3, src5, src7); src += (4 * src_stride); hz_out1 = HORIZ_2TAP_FILT_UH(src0, src0, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src1, src1, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src2, src2, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src3, src3, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out1 = HORIZ_2TAP_FILT_UH(src4, src4, mask, filt_hz, FILTER_BITS); hz_out3 = HORIZ_2TAP_FILT_UH(src5, src5, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out0, hz_out1, hz_out2, hz_out3, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; hz_out0 = HORIZ_2TAP_FILT_UH(src6, src6, mask, filt_hz, FILTER_BITS); hz_out2 = HORIZ_2TAP_FILT_UH(src7, src7, mask, filt_hz, FILTER_BITS); ILVEV_B2_UB(hz_out1, hz_out0, hz_out3, hz_out2, vec0, vec1); DOTP_UB2_UH(vec0, vec1, filt_vt, filt_vt, tmp1, tmp2); SRARI_H2_UH(tmp1, tmp2, FILTER_BITS); PCKEV_ST_SB(tmp1, tmp2, dst); dst += dst_stride; } } static void common_hv_2ht_2vt_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 2; multiple8_cnt--;) { common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 16; dst += 16; } } static void common_hv_2ht_2vt_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter_horiz, int8_t *filter_vert, int32_t height) { int32_t multiple8_cnt; for (multiple8_cnt = 4; multiple8_cnt--;) { common_hv_2ht_2vt_16w_msa(src, src_stride, dst, dst_stride, filter_horiz, filter_vert, height); src += 16; dst += 16; } } void vpx_convolve8_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { const int16_t *const filter_x = filter[x0_q4]; const int16_t *const filter_y = filter[y0_q4]; int8_t cnt, filt_hor[8], filt_ver[8]; assert(x_step_q4 == 16); assert(y_step_q4 == 16); assert(((const int32_t *)filter_x)[1] != 0x800000); assert(((const int32_t *)filter_y)[1] != 0x800000); for (cnt = 0; cnt < 8; ++cnt) { filt_hor[cnt] = filter_x[cnt]; filt_ver[cnt] = filter_y[cnt]; } if (vpx_get_filter_taps(filter_x) == 2 && vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_hv_2ht_2vt_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], (int32_t)h); break; case 8: common_hv_2ht_2vt_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], (int32_t)h); break; case 16: common_hv_2ht_2vt_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], (int32_t)h); break; case 32: common_hv_2ht_2vt_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], (int32_t)h); break; case 64: common_hv_2ht_2vt_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_hor[3], &filt_ver[3], (int32_t)h); break; default: vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else if (vpx_get_filter_taps(filter_x) == 2 || vpx_get_filter_taps(filter_y) == 2) { vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { switch (w) { case 4: common_hv_8ht_8vt_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, (int32_t)h); break; case 8: common_hv_8ht_8vt_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, (int32_t)h); break; case 16: common_hv_8ht_8vt_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, (int32_t)h); break; case 32: common_hv_8ht_8vt_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, (int32_t)h); break; case 64: common_hv_8ht_8vt_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_hor, filt_ver, (int32_t)h); break; default: vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } static void filter_horiz_w4_msa(const uint8_t *src_x, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *x_filter) { uint64_t srcd0, srcd1, srcd2, srcd3; uint32_t res; v16u8 src0 = { 0 }, src1 = { 0 }, dst0; v16i8 out0, out1; v16i8 shf1 = { 0, 8, 16, 24, 4, 12, 20, 28, 1, 9, 17, 25, 5, 13, 21, 29 }; v16i8 shf2 = shf1 + 2; v16i8 filt_shf0 = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9 }; v16i8 filt_shf1 = filt_shf0 + 2; v16i8 filt_shf2 = filt_shf0 + 4; v16i8 filt_shf3 = filt_shf0 + 6; v8i16 filt, src0_h, src1_h, src2_h, src3_h, filt0, filt1, filt2, filt3; LD4(src_x, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src0); INSERT_D2_UB(srcd2, srcd3, src1); VSHF_B2_SB(src0, src1, src0, src1, shf1, shf2, out0, out1); XORI_B2_128_SB(out0, out1); UNPCK_SB_SH(out0, src0_h, src1_h); UNPCK_SB_SH(out1, src2_h, src3_h); filt = LD_SH(x_filter); VSHF_B2_SH(filt, filt, filt, filt, filt_shf0, filt_shf1, filt0, filt1); VSHF_B2_SH(filt, filt, filt, filt, filt_shf2, filt_shf3, filt2, filt3); src0_h *= filt0; src0_h += src1_h * filt1; src0_h += src2_h * filt2; src0_h += src3_h * filt3; src1_h = (v8i16)__msa_sldi_b((v16i8)src0_h, (v16i8)src0_h, 8); src0_h = __msa_adds_s_h(src0_h, src1_h); src0_h = __msa_srari_h(src0_h, FILTER_BITS); src0_h = __msa_sat_s_h(src0_h, 7); dst0 = PCKEV_XORI128_UB(src0_h, src0_h); res = __msa_copy_u_w((v4i32)dst0, 0); SW(res, dst); } static void filter_horiz_w8_msa(const uint8_t *src_x, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *x_filter) { uint64_t srcd0, srcd1, srcd2, srcd3; v16u8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 }; v16u8 tmp0, tmp1, tmp2, tmp3, dst0; v16i8 out0, out1, out2, out3; v16i8 shf1 = { 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 }; v16i8 shf2 = shf1 + 4; v8i16 filt, src0_h, src1_h, src2_h, src3_h, src4_h, src5_h, src6_h, src7_h; v8i16 filt0, filt1, filt2, filt3, filt4, filt5, filt6, filt7; LD4(src_x, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src0); INSERT_D2_UB(srcd2, srcd3, src1); LD4(src_x + 4 * src_pitch, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src2); INSERT_D2_UB(srcd2, srcd3, src3); filt = LD_SH(x_filter); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); SPLATI_H4_SH(filt, 4, 5, 6, 7, filt4, filt5, filt6, filt7); // transpose VSHF_B2_UB(src0, src1, src0, src1, shf1, shf2, tmp0, tmp1); VSHF_B2_UB(src2, src3, src2, src3, shf1, shf2, tmp2, tmp3); ILVRL_W2_SB(tmp2, tmp0, out0, out1); ILVRL_W2_SB(tmp3, tmp1, out2, out3); XORI_B4_128_SB(out0, out1, out2, out3); UNPCK_SB_SH(out0, src0_h, src1_h); UNPCK_SB_SH(out1, src2_h, src3_h); UNPCK_SB_SH(out2, src4_h, src5_h); UNPCK_SB_SH(out3, src6_h, src7_h); src0_h *= filt0; src4_h *= filt4; src0_h += src1_h * filt1; src4_h += src5_h * filt5; src0_h += src2_h * filt2; src4_h += src6_h * filt6; src0_h += src3_h * filt3; src4_h += src7_h * filt7; src0_h = __msa_adds_s_h(src0_h, src4_h); src0_h = __msa_srari_h(src0_h, FILTER_BITS); src0_h = __msa_sat_s_h(src0_h, 7); dst0 = PCKEV_XORI128_UB(src0_h, src0_h); ST8x1_UB(dst0, dst); } static void filter_horiz_w16_msa(const uint8_t *src_x, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *x_filter) { uint64_t srcd0, srcd1, srcd2, srcd3; v16u8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 }; v16u8 src4 = { 0 }, src5 = { 0 }, src6 = { 0 }, src7 = { 0 }; v16u8 tmp0, tmp1, tmp2, tmp3, dst0; v16i8 out0, out1, out2, out3, out4, out5, out6, out7; v16i8 shf1 = { 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 }; v16i8 shf2 = shf1 + 4; v8i16 filt, src0_h, src1_h, src2_h, src3_h, src4_h, src5_h, src6_h, src7_h; v8i16 filt0, filt1, filt2, filt3, filt4, filt5, filt6, filt7; v8i16 dst0_h, dst1_h, dst2_h, dst3_h; LD4(src_x, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src0); INSERT_D2_UB(srcd2, srcd3, src1); LD4(src_x + 4 * src_pitch, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src2); INSERT_D2_UB(srcd2, srcd3, src3); LD4(src_x + 8 * src_pitch, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src4); INSERT_D2_UB(srcd2, srcd3, src5); LD4(src_x + 12 * src_pitch, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_UB(srcd0, srcd1, src6); INSERT_D2_UB(srcd2, srcd3, src7); filt = LD_SH(x_filter); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); SPLATI_H4_SH(filt, 4, 5, 6, 7, filt4, filt5, filt6, filt7); // transpose VSHF_B2_UB(src0, src1, src0, src1, shf1, shf2, tmp0, tmp1); VSHF_B2_UB(src2, src3, src2, src3, shf1, shf2, tmp2, tmp3); ILVRL_W2_SB(tmp2, tmp0, out0, out1); ILVRL_W2_SB(tmp3, tmp1, out2, out3); XORI_B4_128_SB(out0, out1, out2, out3); UNPCK_SB_SH(out0, src0_h, src1_h); UNPCK_SB_SH(out1, src2_h, src3_h); UNPCK_SB_SH(out2, src4_h, src5_h); UNPCK_SB_SH(out3, src6_h, src7_h); VSHF_B2_UB(src4, src5, src4, src5, shf1, shf2, tmp0, tmp1); VSHF_B2_UB(src6, src7, src6, src7, shf1, shf2, tmp2, tmp3); ILVRL_W2_SB(tmp2, tmp0, out4, out5); ILVRL_W2_SB(tmp3, tmp1, out6, out7); XORI_B4_128_SB(out4, out5, out6, out7); dst0_h = src0_h * filt0; dst1_h = src4_h * filt4; dst0_h += src1_h * filt1; dst1_h += src5_h * filt5; dst0_h += src2_h * filt2; dst1_h += src6_h * filt6; dst0_h += src3_h * filt3; dst1_h += src7_h * filt7; UNPCK_SB_SH(out4, src0_h, src1_h); UNPCK_SB_SH(out5, src2_h, src3_h); UNPCK_SB_SH(out6, src4_h, src5_h); UNPCK_SB_SH(out7, src6_h, src7_h); dst2_h = src0_h * filt0; dst3_h = src4_h * filt4; dst2_h += src1_h * filt1; dst3_h += src5_h * filt5; dst2_h += src2_h * filt2; dst3_h += src6_h * filt6; dst2_h += src3_h * filt3; dst3_h += src7_h * filt7; ADDS_SH2_SH(dst0_h, dst1_h, dst2_h, dst3_h, dst0_h, dst2_h); SRARI_H2_SH(dst0_h, dst2_h, FILTER_BITS); SAT_SH2_SH(dst0_h, dst2_h, 7); dst0 = PCKEV_XORI128_UB(dst0_h, dst2_h); ST_UB(dst0, dst); } static void transpose4x4_to_dst(const uint8_t *src, uint8_t *dst, ptrdiff_t dst_stride) { v16u8 in0; v16i8 out0 = { 0, 4, 8, 12, 1, 5, 9, 13, 2, 6, 10, 14, 3, 7, 11, 15 }; in0 = LD_UB(src); out0 = __msa_vshf_b(out0, (v16i8)in0, (v16i8)in0); ST4x4_UB(out0, out0, 0, 1, 2, 3, dst, dst_stride); } static void transpose8x8_to_dst(const uint8_t *src, uint8_t *dst, ptrdiff_t dst_stride) { v16u8 in0, in1, in2, in3, out0, out1, out2, out3, tmp0, tmp1, tmp2, tmp3; v16i8 shf1 = { 0, 8, 16, 24, 1, 9, 17, 25, 2, 10, 18, 26, 3, 11, 19, 27 }; v16i8 shf2 = shf1 + 4; LD_UB4(src, 16, in0, in1, in2, in3); VSHF_B2_UB(in0, in1, in0, in1, shf1, shf2, tmp0, tmp1); VSHF_B2_UB(in2, in3, in2, in3, shf1, shf2, tmp2, tmp3); ILVRL_W2_UB(tmp2, tmp0, out0, out1); ILVRL_W2_UB(tmp3, tmp1, out2, out3); ST8x4_UB(out0, out1, dst, dst_stride); ST8x4_UB(out2, out3, dst + 4 * dst_stride, dst_stride); } static void transpose16x16_to_dst(const uint8_t *src, uint8_t *dst, ptrdiff_t dst_stride) { v16u8 in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12; v16u8 in13, in14, in15, out0, out1, out2, out3, out4, out5, out6, out7, out8; v16u8 out9, out10, out11, out12, out13, out14, out15; LD_UB8(src, 16, in0, in1, in2, in3, in4, in5, in6, in7); LD_UB8(src + 16 * 8, 16, in8, in9, in10, in11, in12, in13, in14, in15); TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, out0, out1, out2, out3, out4, out5, out6, out7); ST_UB8(out0, out1, out2, out3, out4, out5, out6, out7, dst, dst_stride); dst += 8 * dst_stride; SLDI_B4_0_UB(in0, in1, in2, in3, in0, in1, in2, in3, 8); SLDI_B4_0_UB(in4, in5, in6, in7, in4, in5, in6, in7, 8); SLDI_B4_0_UB(in8, in9, in10, in11, in8, in9, in10, in11, 8); SLDI_B4_0_UB(in12, in13, in14, in15, in12, in13, in14, in15, 8); TRANSPOSE16x8_UB_UB(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, in10, in11, in12, in13, in14, in15, out8, out9, out10, out11, out12, out13, out14, out15); ST_UB8(out8, out9, out10, out11, out12, out13, out14, out15, dst, dst_stride); } static void scaledconvolve_horiz_w4(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int h) { DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); int y, z, i; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; y += 4) { int x_q4 = x0_q4; for (z = 0; z < 4; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; if (x_q4 & SUBPEL_MASK) { filter_horiz_w4_msa(src_x, src_stride, temp + (z * 4), x_filter); } else { for (i = 0; i < 4; ++i) { temp[z * 4 + i] = src_x[i * src_stride + 3]; } } x_q4 += x_step_q4; } transpose4x4_to_dst(temp, dst, dst_stride); src += src_stride * 4; dst += dst_stride * 4; } } static void scaledconvolve_horiz_w8(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int h) { DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); int y, z, i; src -= SUBPEL_TAPS / 2 - 1; // This function processes 8x8 areas. The intermediate height is not always // a multiple of 8, so force it to be a multiple of 8 here. y = h + (8 - (h & 0x7)); do { int x_q4 = x0_q4; for (z = 0; z < 8; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; if (x_q4 & SUBPEL_MASK) { filter_horiz_w8_msa(src_x, src_stride, temp + (z * 8), x_filter); } else { for (i = 0; i < 8; ++i) { temp[z * 8 + i] = src_x[3 + i * src_stride]; } } x_q4 += x_step_q4; } transpose8x8_to_dst(temp, dst, dst_stride); src += src_stride * 8; dst += dst_stride * 8; } while (y -= 8); } static void scaledconvolve_horiz_mul16(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { DECLARE_ALIGNED(16, uint8_t, temp[16 * 16]); int x, y, z, i; src -= SUBPEL_TAPS / 2 - 1; // This function processes 16x16 areas. The intermediate height is not always // a multiple of 16, so force it to be a multiple of 8 here. y = h + (16 - (h & 0xF)); do { int x_q4 = x0_q4; for (x = 0; x < w; x += 16) { for (z = 0; z < 16; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; if (x_q4 & SUBPEL_MASK) { filter_horiz_w16_msa(src_x, src_stride, temp + (z * 16), x_filter); } else { for (i = 0; i < 16; ++i) { temp[z * 16 + i] = src_x[3 + i * src_stride]; } } x_q4 += x_step_q4; } transpose16x16_to_dst(temp, dst + x, dst_stride); } src += src_stride * 16; dst += dst_stride * 16; } while (y -= 16); } static void filter_vert_w4_msa(const uint8_t *src_y, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *y_filter) { uint32_t srcw0, srcw1, srcw2, srcw3, srcw4, srcw5, srcw6, srcw7; uint32_t res; v16u8 src0 = { 0 }, src1 = { 0 }, dst0; v16i8 out0, out1; v16i8 shf1 = { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 }; v16i8 shf2 = shf1 + 8; v16i8 filt_shf0 = { 0, 1, 0, 1, 0, 1, 0, 1, 8, 9, 8, 9, 8, 9, 8, 9 }; v16i8 filt_shf1 = filt_shf0 + 2; v16i8 filt_shf2 = filt_shf0 + 4; v16i8 filt_shf3 = filt_shf0 + 6; v8i16 filt, src0_h, src1_h, src2_h, src3_h; v8i16 filt0, filt1, filt2, filt3; LW4(src_y, src_pitch, srcw0, srcw1, srcw2, srcw3); LW4(src_y + 4 * src_pitch, src_pitch, srcw4, srcw5, srcw6, srcw7); INSERT_W4_UB(srcw0, srcw1, srcw2, srcw3, src0); INSERT_W4_UB(srcw4, srcw5, srcw6, srcw7, src1); VSHF_B2_SB(src0, src1, src0, src1, shf1, shf2, out0, out1); XORI_B2_128_SB(out0, out1); UNPCK_SB_SH(out0, src0_h, src1_h); UNPCK_SB_SH(out1, src2_h, src3_h); filt = LD_SH(y_filter); VSHF_B2_SH(filt, filt, filt, filt, filt_shf0, filt_shf1, filt0, filt1); VSHF_B2_SH(filt, filt, filt, filt, filt_shf2, filt_shf3, filt2, filt3); src0_h *= filt0; src0_h += src1_h * filt1; src0_h += src2_h * filt2; src0_h += src3_h * filt3; src1_h = (v8i16)__msa_sldi_b((v16i8)src0_h, (v16i8)src0_h, 8); src0_h = __msa_adds_s_h(src0_h, src1_h); src0_h = __msa_srari_h(src0_h, FILTER_BITS); src0_h = __msa_sat_s_h(src0_h, 7); dst0 = PCKEV_XORI128_UB(src0_h, src0_h); res = __msa_copy_u_w((v4i32)dst0, 0); SW(res, dst); } static void filter_vert_w8_msa(const uint8_t *src_y, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *y_filter) { uint64_t srcd0, srcd1, srcd2, srcd3; v16u8 dst0; v16i8 src0 = { 0 }, src1 = { 0 }, src2 = { 0 }, src3 = { 0 }; v8i16 filt, src0_h, src1_h, src2_h, src3_h, src4_h, src5_h, src6_h, src7_h; v8i16 filt0, filt1, filt2, filt3, filt4, filt5, filt6, filt7; LD4(src_y, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_SB(srcd0, srcd1, src0); INSERT_D2_SB(srcd2, srcd3, src1); LD4(src_y + 4 * src_pitch, src_pitch, srcd0, srcd1, srcd2, srcd3); INSERT_D2_SB(srcd0, srcd1, src2); INSERT_D2_SB(srcd2, srcd3, src3); filt = LD_SH(y_filter); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); SPLATI_H4_SH(filt, 4, 5, 6, 7, filt4, filt5, filt6, filt7); XORI_B4_128_SB(src0, src1, src2, src3); UNPCK_SB_SH(src0, src0_h, src1_h); UNPCK_SB_SH(src1, src2_h, src3_h); UNPCK_SB_SH(src2, src4_h, src5_h); UNPCK_SB_SH(src3, src6_h, src7_h); src0_h *= filt0; src4_h *= filt4; src0_h += src1_h * filt1; src4_h += src5_h * filt5; src0_h += src2_h * filt2; src4_h += src6_h * filt6; src0_h += src3_h * filt3; src4_h += src7_h * filt7; src0_h = __msa_adds_s_h(src0_h, src4_h); src0_h = __msa_srari_h(src0_h, FILTER_BITS); src0_h = __msa_sat_s_h(src0_h, 7); dst0 = PCKEV_XORI128_UB(src0_h, src0_h); ST8x1_UB(dst0, dst); } static void filter_vert_mul_w16_msa(const uint8_t *src_y, ptrdiff_t src_pitch, uint8_t *dst, const int16_t *y_filter, int w) { int x; v16u8 dst0; v16i8 src0, src1, src2, src3, src4, src5, src6, src7; v8i16 filt, src0_h, src1_h, src2_h, src3_h, src4_h, src5_h, src6_h, src7_h; v8i16 src8_h, src9_h, src10_h, src11_h, src12_h, src13_h, src14_h, src15_h; v8i16 filt0, filt1, filt2, filt3, filt4, filt5, filt6, filt7; filt = LD_SH(y_filter); SPLATI_H4_SH(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); SPLATI_H4_SH(filt, 4, 5, 6, 7, filt4, filt5, filt6, filt7); for (x = 0; x < w; x += 16) { LD_SB8(src_y, src_pitch, src0, src1, src2, src3, src4, src5, src6, src7); src_y += 16; XORI_B4_128_SB(src0, src1, src2, src3); XORI_B4_128_SB(src4, src5, src6, src7); UNPCK_SB_SH(src0, src0_h, src1_h); UNPCK_SB_SH(src1, src2_h, src3_h); UNPCK_SB_SH(src2, src4_h, src5_h); UNPCK_SB_SH(src3, src6_h, src7_h); UNPCK_SB_SH(src4, src8_h, src9_h); UNPCK_SB_SH(src5, src10_h, src11_h); UNPCK_SB_SH(src6, src12_h, src13_h); UNPCK_SB_SH(src7, src14_h, src15_h); src0_h *= filt0; src1_h *= filt0; src8_h *= filt4; src9_h *= filt4; src0_h += src2_h * filt1; src1_h += src3_h * filt1; src8_h += src10_h * filt5; src9_h += src11_h * filt5; src0_h += src4_h * filt2; src1_h += src5_h * filt2; src8_h += src12_h * filt6; src9_h += src13_h * filt6; src0_h += src6_h * filt3; src1_h += src7_h * filt3; src8_h += src14_h * filt7; src9_h += src15_h * filt7; ADDS_SH2_SH(src0_h, src8_h, src1_h, src9_h, src0_h, src1_h); SRARI_H2_SH(src0_h, src1_h, FILTER_BITS); SAT_SH2_SH(src0_h, src1_h, 7); dst0 = PCKEV_XORI128_UB(src0_h, src1_h); ST_UB(dst0, dst); dst += 16; } } static void scaledconvolve_vert_w4(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_w4_msa(src_y, src_stride, &dst[y * dst_stride], y_filter); } else { uint32_t srcd = LW(src_y + 3 * src_stride); SW(srcd, dst + y * dst_stride); } y_q4 += y_step_q4; } } static void scaledconvolve_vert_w8(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_w8_msa(src_y, src_stride, &dst[y * dst_stride], y_filter); } else { uint64_t srcd = LD(src_y + 3 * src_stride); SD(srcd, dst + y * dst_stride); } y_q4 += y_step_q4; } } static void scaledconvolve_vert_mul16(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_mul_w16_msa(src_y, src_stride, &dst[y * dst_stride], y_filter, w); } else { for (x = 0; x < w; ++x) { dst[x + y * dst_stride] = src_y[x + 3 * src_stride]; } } y_q4 += y_step_q4; } } void vpx_scaled_2d_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --Require an additional 8 rows for the horiz_w8 transpose tail. DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); assert(x_step_q4 <= 64); if ((0 == x0_q4) && (16 == x_step_q4) && (0 == y0_q4) && (16 == y_step_q4)) { vpx_convolve_copy_msa(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } else { if (w >= 16) { scaledconvolve_horiz_mul16(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); } else if (w == 8) { scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, intermediate_height); } else { scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, intermediate_height); } if (w >= 16) { scaledconvolve_vert_mul16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else if (w == 8) { scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, h); } else { scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, h); } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve8_vert_msa.c000066400000000000000000000647221357355204000223640ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/vpx_convolve_msa.h" static void common_vt_8t_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, src2110, src4332, src6554, src8776; v16i8 src10998, filt0, filt1, filt2, filt3; v16u8 out; v8i16 filt, out10, out32; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); ILVR_D3_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src2110, src4332, src6554); XORI_B3_128_SB(src2110, src4332, src6554); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); src += (4 * src_stride); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); ILVR_D2_SB(src87_r, src76_r, src109_r, src98_r, src8776, src10998); XORI_B2_128_SB(src8776, src10998); out10 = FILT_8TAP_DPADD_S_H(src2110, src4332, src6554, src8776, filt0, filt1, filt2, filt3); out32 = FILT_8TAP_DPADD_S_H(src4332, src6554, src8776, src10998, filt0, filt1, filt2, filt3); SRARI_H2_SH(out10, out32, FILTER_BITS); SAT_SH2_SH(out10, out32, 7); out = PCKEV_XORI128_UB(out10, out32); ST4x4_UB(out, out, 0, 1, 2, 3, dst, dst_stride); dst += (4 * dst_stride); src2110 = src6554; src4332 = src8776; src6554 = src10998; src6 = src10; } } static void common_vt_8t_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3; v16u8 tmp0, tmp1; v8i16 filt, out0_r, out1_r, out2_r, out3_r; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0, filt1, filt2, filt3); out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0, filt1, filt2, filt3); out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0, filt1, filt2, filt3); out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); tmp0 = PCKEV_XORI128_UB(out0_r, out1_r); tmp1 = PCKEV_XORI128_UB(out2_r, out3_r); ST8x4_UB(tmp0, tmp1, dst, dst_stride); dst += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src54_r = src98_r; src21_r = src65_r; src43_r = src87_r; src65_r = src109_r; src6 = src10; } } static void common_vt_8t_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 filt0, filt1, filt2, filt3; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l; v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l; v16u8 tmp0, tmp1, tmp2, tmp3; v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); LD_SB7(src, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l, src54_l, src21_l); ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src += (4 * src_stride); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l, src87_l, src98_l, src109_l); out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0, filt1, filt2, filt3); out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0, filt1, filt2, filt3); out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0, filt1, filt2, filt3); out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3); out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0, filt1, filt2, filt3); out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0, filt1, filt2, filt3); out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0, filt1, filt2, filt3); out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0, filt1, filt2, filt3); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS); SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7); PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r, tmp0, tmp1, tmp2, tmp3); XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3); ST_UB4(tmp0, tmp1, tmp2, tmp3, dst, dst_stride); dst += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src54_r = src98_r; src21_r = src65_r; src43_r = src87_r; src65_r = src109_r; src10_l = src54_l; src32_l = src76_l; src54_l = src98_l; src21_l = src65_l; src43_l = src87_l; src65_l = src109_l; src6 = src10; } } static void common_vt_8t_16w_mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height, int32_t width) { const uint8_t *src_tmp; uint8_t *dst_tmp; uint32_t loop_cnt, cnt; v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16i8 filt0, filt1, filt2, filt3; v16i8 src10_r, src32_r, src54_r, src76_r, src98_r, src21_r, src43_r; v16i8 src65_r, src87_r, src109_r, src10_l, src32_l, src54_l, src76_l; v16i8 src98_l, src21_l, src43_l, src65_l, src87_l, src109_l; v16u8 tmp0, tmp1, tmp2, tmp3; v8i16 filt, out0_r, out1_r, out2_r, out3_r, out0_l, out1_l, out2_l, out3_l; src -= (3 * src_stride); filt = LD_SH(filter); SPLATI_H4_SB(filt, 0, 1, 2, 3, filt0, filt1, filt2, filt3); for (cnt = (width >> 4); cnt--;) { src_tmp = src; dst_tmp = dst; LD_SB7(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6); XORI_B7_128_SB(src0, src1, src2, src3, src4, src5, src6); src_tmp += (7 * src_stride); ILVR_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_r, src32_r, src54_r, src21_r); ILVR_B2_SB(src4, src3, src6, src5, src43_r, src65_r); ILVL_B4_SB(src1, src0, src3, src2, src5, src4, src2, src1, src10_l, src32_l, src54_l, src21_l); ILVL_B2_SB(src4, src3, src6, src5, src43_l, src65_l); for (loop_cnt = (height >> 2); loop_cnt--;) { LD_SB4(src_tmp, src_stride, src7, src8, src9, src10); XORI_B4_128_SB(src7, src8, src9, src10); src_tmp += (4 * src_stride); ILVR_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_r, src87_r, src98_r, src109_r); ILVL_B4_SB(src7, src6, src8, src7, src9, src8, src10, src9, src76_l, src87_l, src98_l, src109_l); out0_r = FILT_8TAP_DPADD_S_H(src10_r, src32_r, src54_r, src76_r, filt0, filt1, filt2, filt3); out1_r = FILT_8TAP_DPADD_S_H(src21_r, src43_r, src65_r, src87_r, filt0, filt1, filt2, filt3); out2_r = FILT_8TAP_DPADD_S_H(src32_r, src54_r, src76_r, src98_r, filt0, filt1, filt2, filt3); out3_r = FILT_8TAP_DPADD_S_H(src43_r, src65_r, src87_r, src109_r, filt0, filt1, filt2, filt3); out0_l = FILT_8TAP_DPADD_S_H(src10_l, src32_l, src54_l, src76_l, filt0, filt1, filt2, filt3); out1_l = FILT_8TAP_DPADD_S_H(src21_l, src43_l, src65_l, src87_l, filt0, filt1, filt2, filt3); out2_l = FILT_8TAP_DPADD_S_H(src32_l, src54_l, src76_l, src98_l, filt0, filt1, filt2, filt3); out3_l = FILT_8TAP_DPADD_S_H(src43_l, src65_l, src87_l, src109_l, filt0, filt1, filt2, filt3); SRARI_H4_SH(out0_r, out1_r, out2_r, out3_r, FILTER_BITS); SRARI_H4_SH(out0_l, out1_l, out2_l, out3_l, FILTER_BITS); SAT_SH4_SH(out0_r, out1_r, out2_r, out3_r, 7); SAT_SH4_SH(out0_l, out1_l, out2_l, out3_l, 7); PCKEV_B4_UB(out0_l, out0_r, out1_l, out1_r, out2_l, out2_r, out3_l, out3_r, tmp0, tmp1, tmp2, tmp3); XORI_B4_128_UB(tmp0, tmp1, tmp2, tmp3); ST_UB4(tmp0, tmp1, tmp2, tmp3, dst_tmp, dst_stride); dst_tmp += (4 * dst_stride); src10_r = src54_r; src32_r = src76_r; src54_r = src98_r; src21_r = src65_r; src43_r = src87_r; src65_r = src109_r; src10_l = src54_l; src32_l = src76_l; src54_l = src98_l; src21_l = src65_l; src43_l = src87_l; src65_l = src109_l; src6 = src10; } src += 16; dst += 16; } } static void common_vt_8t_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height, 32); } static void common_vt_8t_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { common_vt_8t_16w_mult_msa(src, src_stride, dst, dst_stride, filter, height, 64); } static void common_vt_2t_4x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16i8 src0, src1, src2, src3, src4; v16i8 src10_r, src32_r, src21_r, src43_r, src2110, src4332; v16u8 filt0; v8i16 filt; v8u16 tmp0, tmp1; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB5(src, src_stride, src0, src1, src2, src3, src4); src += (5 * src_stride); ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_D2_SB(src21_r, src10_r, src43_r, src32_r, src2110, src4332); DOTP_UB2_UH(src2110, src4332, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); src2110 = __msa_pckev_b((v16i8)tmp1, (v16i8)tmp0); ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); } static void common_vt_2t_4x8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16i8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16i8 src10_r, src32_r, src54_r, src76_r, src21_r, src43_r; v16i8 src65_r, src87_r, src2110, src4332, src6554, src8776; v8u16 tmp0, tmp1, tmp2, tmp3; v16u8 filt0; v8i16 filt; filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_SB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); src8 = LD_SB(src); src += src_stride; ILVR_B4_SB(src1, src0, src2, src1, src3, src2, src4, src3, src10_r, src21_r, src32_r, src43_r); ILVR_B4_SB(src5, src4, src6, src5, src7, src6, src8, src7, src54_r, src65_r, src76_r, src87_r); ILVR_D4_SB(src21_r, src10_r, src43_r, src32_r, src65_r, src54_r, src87_r, src76_r, src2110, src4332, src6554, src8776); DOTP_UB4_UH(src2110, src4332, src6554, src8776, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, src2110, src4332); ST4x4_UB(src2110, src2110, 0, 1, 2, 3, dst, dst_stride); ST4x4_UB(src4332, src4332, 0, 1, 2, 3, dst + 4 * dst_stride, dst_stride); } static void common_vt_2t_4w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_4x4_msa(src, src_stride, dst, dst_stride, filter); } else if (8 == height) { common_vt_2t_4x8_msa(src, src_stride, dst, dst_stride, filter); } } static void common_vt_2t_8x4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter) { v16u8 src0, src1, src2, src3, src4, vec0, vec1, vec2, vec3, filt0; v16i8 out0, out1; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_UB5(src, src_stride, src0, src1, src2, src3, src4); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec1); ILVR_B2_UB(src3, src2, src4, src3, vec2, vec3); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); } static void common_vt_2t_8x8mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v16i8 out0, out1; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 3); loop_cnt--;) { LD_UB8(src, src_stride, src1, src2, src3, src4, src5, src6, src7, src8); src += (8 * src_stride); ILVR_B4_UB(src1, src0, src2, src1, src3, src2, src4, src3, vec0, vec1, vec2, vec3); ILVR_B4_UB(src5, src4, src6, src5, src7, src6, src8, src7, vec4, vec5, vec6, vec7); DOTP_UB4_UH(vec0, vec1, vec2, vec3, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); DOTP_UB4_UH(vec4, vec5, vec6, vec7, filt0, filt0, filt0, filt0, tmp0, tmp1, tmp2, tmp3); SRARI_H4_UH(tmp0, tmp1, tmp2, tmp3, FILTER_BITS); PCKEV_B2_SB(tmp1, tmp0, tmp3, tmp2, out0, out1); ST8x4_UB(out0, out1, dst, dst_stride); dst += (4 * dst_stride); src0 = src8; } } static void common_vt_2t_8w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { if (4 == height) { common_vt_2t_8x4_msa(src, src_stride, dst, dst_stride, filter); } else { common_vt_2t_8x8mult_msa(src, src_stride, dst, dst_stride, filter, height); } } static void common_vt_2t_16w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); src += (4 * src_stride); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst); dst += dst_stride; ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst); dst += dst_stride; DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst); dst += dst_stride; DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst); dst += dst_stride; src0 = src4; } } static void common_vt_2t_32w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9; v16u8 vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); src0 = LD_UB(src); src5 = LD_UB(src + 16); src += src_stride; for (loop_cnt = (height >> 2); loop_cnt--;) { LD_UB4(src, src_stride, src1, src2, src3, src4); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); LD_UB4(src + 16, src_stride, src6, src7, src8, src9); src += (4 * src_stride); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride); ILVR_B2_UB(src3, src2, src4, src3, vec4, vec6); ILVL_B2_UB(src3, src2, src4, src3, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst + 2 * dst_stride); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + 3 * dst_stride); ILVR_B2_UB(src6, src5, src7, src6, vec0, vec2); ILVL_B2_UB(src6, src5, src7, src6, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst + 16); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + 16 + dst_stride); ILVR_B2_UB(src8, src7, src9, src8, vec4, vec6); ILVL_B2_UB(src8, src7, src9, src8, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst + 16 + 2 * dst_stride); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + 16 + 3 * dst_stride); dst += (4 * dst_stride); src0 = src4; src5 = src9; } } static void common_vt_2t_64w_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int8_t *filter, int32_t height) { uint32_t loop_cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7, src8, src9, src10; v16u8 src11, vec0, vec1, vec2, vec3, vec4, vec5, vec6, vec7, filt0; v8u16 tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7; v8i16 filt; /* rearranging filter_y */ filt = LD_SH(filter); filt0 = (v16u8)__msa_splati_h(filt, 0); LD_UB4(src, 16, src0, src3, src6, src9); src += src_stride; for (loop_cnt = (height >> 1); loop_cnt--;) { LD_UB2(src, src_stride, src1, src2); LD_UB2(src + 16, src_stride, src4, src5); LD_UB2(src + 32, src_stride, src7, src8); LD_UB2(src + 48, src_stride, src10, src11); src += (2 * src_stride); ILVR_B2_UB(src1, src0, src2, src1, vec0, vec2); ILVL_B2_UB(src1, src0, src2, src1, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + dst_stride); ILVR_B2_UB(src4, src3, src5, src4, vec4, vec6); ILVL_B2_UB(src4, src3, src5, src4, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); PCKEV_ST_SB(tmp4, tmp5, dst + 16); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); PCKEV_ST_SB(tmp6, tmp7, dst + 16 + dst_stride); ILVR_B2_UB(src7, src6, src8, src7, vec0, vec2); ILVL_B2_UB(src7, src6, src8, src7, vec1, vec3); DOTP_UB2_UH(vec0, vec1, filt0, filt0, tmp0, tmp1); SRARI_H2_UH(tmp0, tmp1, FILTER_BITS); PCKEV_ST_SB(tmp0, tmp1, dst + 32); DOTP_UB2_UH(vec2, vec3, filt0, filt0, tmp2, tmp3); SRARI_H2_UH(tmp2, tmp3, FILTER_BITS); PCKEV_ST_SB(tmp2, tmp3, dst + 32 + dst_stride); ILVR_B2_UB(src10, src9, src11, src10, vec4, vec6); ILVL_B2_UB(src10, src9, src11, src10, vec5, vec7); DOTP_UB2_UH(vec4, vec5, filt0, filt0, tmp4, tmp5); SRARI_H2_UH(tmp4, tmp5, FILTER_BITS); PCKEV_ST_SB(tmp4, tmp5, dst + 48); DOTP_UB2_UH(vec6, vec7, filt0, filt0, tmp6, tmp7); SRARI_H2_UH(tmp6, tmp7, FILTER_BITS); PCKEV_ST_SB(tmp6, tmp7, dst + 48 + dst_stride); dst += (2 * dst_stride); src0 = src2; src3 = src5; src6 = src8; src9 = src11; } } void vpx_convolve8_vert_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int y_step_q4, int w, int h) { const int16_t *const filter_y = filter[y0_q4]; int8_t cnt, filt_ver[8]; assert(y_step_q4 == 16); assert(((const int32_t *)filter_y)[1] != 0x800000); for (cnt = 8; cnt--;) { filt_ver[cnt] = filter_y[cnt]; } if (vpx_get_filter_taps(filter_y) == 2) { switch (w) { case 4: common_vt_2t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 8: common_vt_2t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 16: common_vt_2t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 32: common_vt_2t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; case 64: common_vt_2t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, &filt_ver[3], h); break; default: vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } else { switch (w) { case 4: common_vt_8t_4w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 8: common_vt_8t_8w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 16: common_vt_8t_16w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 32: common_vt_8t_32w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; case 64: common_vt_8t_64w_msa(src, (int32_t)src_stride, dst, (int32_t)dst_stride, filt_ver, h); break; default: vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve_avg_msa.c000066400000000000000000000176431357355204000220710ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" static void avg_width4_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; uint32_t out0, out1, out2, out3; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; if (0 == (height % 4)) { for (cnt = (height / 4); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); out0 = __msa_copy_u_w((v4i32)dst0, 0); out1 = __msa_copy_u_w((v4i32)dst1, 0); out2 = __msa_copy_u_w((v4i32)dst2, 0); out3 = __msa_copy_u_w((v4i32)dst3, 0); SW4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); } } else if (0 == (height % 2)) { for (cnt = (height / 2); cnt--;) { LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); LD_UB2(dst, dst_stride, dst0, dst1); AVER_UB2_UB(src0, dst0, src1, dst1, dst0, dst1); out0 = __msa_copy_u_w((v4i32)dst0, 0); out1 = __msa_copy_u_w((v4i32)dst1, 0); SW(out0, dst); dst += dst_stride; SW(out1, dst); dst += dst_stride; } } } static void avg_width8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; uint64_t out0, out1, out2, out3; v16u8 src0, src1, src2, src3; v16u8 dst0, dst1, dst2, dst3; for (cnt = (height / 4); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); LD_UB4(dst, dst_stride, dst0, dst1, dst2, dst3); AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); out0 = __msa_copy_u_d((v2i64)dst0, 0); out1 = __msa_copy_u_d((v2i64)dst1, 0); out2 = __msa_copy_u_d((v2i64)dst2, 0); out3 = __msa_copy_u_d((v2i64)dst3, 0); SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); } } static void avg_width16_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; for (cnt = (height / 8); cnt--;) { LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); LD_UB8(dst, dst_stride, dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7); AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, dst4, dst5, dst6, dst7); ST_UB8(dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7, dst, dst_stride); dst += (8 * dst_stride); } } static void avg_width32_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; uint8_t *dst_dup = dst; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 src8, src9, src10, src11, src12, src13, src14, src15; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; for (cnt = (height / 8); cnt--;) { LD_UB4(src, src_stride, src0, src2, src4, src6); LD_UB4(src + 16, src_stride, src1, src3, src5, src7); src += (4 * src_stride); LD_UB4(dst_dup, dst_stride, dst0, dst2, dst4, dst6); LD_UB4(dst_dup + 16, dst_stride, dst1, dst3, dst5, dst7); dst_dup += (4 * dst_stride); LD_UB4(src, src_stride, src8, src10, src12, src14); LD_UB4(src + 16, src_stride, src9, src11, src13, src15); src += (4 * src_stride); LD_UB4(dst_dup, dst_stride, dst8, dst10, dst12, dst14); LD_UB4(dst_dup + 16, dst_stride, dst9, dst11, dst13, dst15); dst_dup += (4 * dst_stride); AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, dst4, dst5, dst6, dst7); AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11, dst8, dst9, dst10, dst11); AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15, dst12, dst13, dst14, dst15); ST_UB4(dst0, dst2, dst4, dst6, dst, dst_stride); ST_UB4(dst1, dst3, dst5, dst7, dst + 16, dst_stride); dst += (4 * dst_stride); ST_UB4(dst8, dst10, dst12, dst14, dst, dst_stride); ST_UB4(dst9, dst11, dst13, dst15, dst + 16, dst_stride); dst += (4 * dst_stride); } } static void avg_width64_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; uint8_t *dst_dup = dst; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; v16u8 src8, src9, src10, src11, src12, src13, src14, src15; v16u8 dst0, dst1, dst2, dst3, dst4, dst5, dst6, dst7; v16u8 dst8, dst9, dst10, dst11, dst12, dst13, dst14, dst15; for (cnt = (height / 4); cnt--;) { LD_UB4(src, 16, src0, src1, src2, src3); src += src_stride; LD_UB4(src, 16, src4, src5, src6, src7); src += src_stride; LD_UB4(src, 16, src8, src9, src10, src11); src += src_stride; LD_UB4(src, 16, src12, src13, src14, src15); src += src_stride; LD_UB4(dst_dup, 16, dst0, dst1, dst2, dst3); dst_dup += dst_stride; LD_UB4(dst_dup, 16, dst4, dst5, dst6, dst7); dst_dup += dst_stride; LD_UB4(dst_dup, 16, dst8, dst9, dst10, dst11); dst_dup += dst_stride; LD_UB4(dst_dup, 16, dst12, dst13, dst14, dst15); dst_dup += dst_stride; AVER_UB4_UB(src0, dst0, src1, dst1, src2, dst2, src3, dst3, dst0, dst1, dst2, dst3); AVER_UB4_UB(src4, dst4, src5, dst5, src6, dst6, src7, dst7, dst4, dst5, dst6, dst7); AVER_UB4_UB(src8, dst8, src9, dst9, src10, dst10, src11, dst11, dst8, dst9, dst10, dst11); AVER_UB4_UB(src12, dst12, src13, dst13, src14, dst14, src15, dst15, dst12, dst13, dst14, dst15); ST_UB4(dst0, dst1, dst2, dst3, dst, 16); dst += dst_stride; ST_UB4(dst4, dst5, dst6, dst7, dst, 16); dst += dst_stride; ST_UB4(dst8, dst9, dst10, dst11, dst, 16); dst += dst_stride; ST_UB4(dst12, dst13, dst14, dst15, dst, 16); dst += dst_stride; } } void vpx_convolve_avg_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; switch (w) { case 4: { avg_width4_msa(src, src_stride, dst, dst_stride, h); break; } case 8: { avg_width8_msa(src, src_stride, dst, dst_stride, h); break; } case 16: { avg_width16_msa(src, src_stride, dst, dst_stride, h); break; } case 32: { avg_width32_msa(src, src_stride, dst, dst_stride, h); break; } case 64: { avg_width64_msa(src, src_stride, dst, dst_stride, h); break; } default: { int32_t lp, cnt; for (cnt = h; cnt--;) { for (lp = 0; lp < w; ++lp) { dst[lp] = (((dst[lp] + src[lp]) + 1) >> 1); } src += src_stride; dst += dst_stride; } break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve_copy_msa.c000066400000000000000000000200401357355204000222470ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/mips/macros_msa.h" static void copy_width8_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; uint64_t out0, out1, out2, out3, out4, out5, out6, out7; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; if (0 == height % 12) { for (cnt = (height / 12); cnt--;) { LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); out0 = __msa_copy_u_d((v2i64)src0, 0); out1 = __msa_copy_u_d((v2i64)src1, 0); out2 = __msa_copy_u_d((v2i64)src2, 0); out3 = __msa_copy_u_d((v2i64)src3, 0); out4 = __msa_copy_u_d((v2i64)src4, 0); out5 = __msa_copy_u_d((v2i64)src5, 0); out6 = __msa_copy_u_d((v2i64)src6, 0); out7 = __msa_copy_u_d((v2i64)src7, 0); SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); SD4(out4, out5, out6, out7, dst, dst_stride); dst += (4 * dst_stride); LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); out0 = __msa_copy_u_d((v2i64)src0, 0); out1 = __msa_copy_u_d((v2i64)src1, 0); out2 = __msa_copy_u_d((v2i64)src2, 0); out3 = __msa_copy_u_d((v2i64)src3, 0); SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); } } else if (0 == height % 8) { for (cnt = height >> 3; cnt--;) { LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); out0 = __msa_copy_u_d((v2i64)src0, 0); out1 = __msa_copy_u_d((v2i64)src1, 0); out2 = __msa_copy_u_d((v2i64)src2, 0); out3 = __msa_copy_u_d((v2i64)src3, 0); out4 = __msa_copy_u_d((v2i64)src4, 0); out5 = __msa_copy_u_d((v2i64)src5, 0); out6 = __msa_copy_u_d((v2i64)src6, 0); out7 = __msa_copy_u_d((v2i64)src7, 0); SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); SD4(out4, out5, out6, out7, dst, dst_stride); dst += (4 * dst_stride); } } else if (0 == height % 4) { for (cnt = (height / 4); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); out0 = __msa_copy_u_d((v2i64)src0, 0); out1 = __msa_copy_u_d((v2i64)src1, 0); out2 = __msa_copy_u_d((v2i64)src2, 0); out3 = __msa_copy_u_d((v2i64)src3, 0); SD4(out0, out1, out2, out3, dst, dst_stride); dst += (4 * dst_stride); } } else if (0 == height % 2) { for (cnt = (height / 2); cnt--;) { LD_UB2(src, src_stride, src0, src1); src += (2 * src_stride); out0 = __msa_copy_u_d((v2i64)src0, 0); out1 = __msa_copy_u_d((v2i64)src1, 0); SD(out0, dst); dst += dst_stride; SD(out1, dst); dst += dst_stride; } } } static void copy_16multx8mult_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height, int32_t width) { int32_t cnt, loop_cnt; const uint8_t *src_tmp; uint8_t *dst_tmp; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; for (cnt = (width >> 4); cnt--;) { src_tmp = src; dst_tmp = dst; for (loop_cnt = (height >> 3); loop_cnt--;) { LD_UB8(src_tmp, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src_tmp += (8 * src_stride); ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst_tmp, dst_stride); dst_tmp += (8 * dst_stride); } src += 16; dst += 16; } } static void copy_width16_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; if (0 == height % 12) { for (cnt = (height / 12); cnt--;) { LD_UB8(src, src_stride, src0, src1, src2, src3, src4, src5, src6, src7); src += (8 * src_stride); ST_UB8(src0, src1, src2, src3, src4, src5, src6, src7, dst, dst_stride); dst += (8 * dst_stride); LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); dst += (4 * dst_stride); } } else if (0 == height % 8) { copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 16); } else if (0 == height % 4) { for (cnt = (height >> 2); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); dst += (4 * dst_stride); } } } static void copy_width32_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { int32_t cnt; v16u8 src0, src1, src2, src3, src4, src5, src6, src7; if (0 == height % 12) { for (cnt = (height / 12); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); LD_UB4(src + 16, src_stride, src4, src5, src6, src7); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); dst += (4 * dst_stride); LD_UB4(src, src_stride, src0, src1, src2, src3); LD_UB4(src + 16, src_stride, src4, src5, src6, src7); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); dst += (4 * dst_stride); LD_UB4(src, src_stride, src0, src1, src2, src3); LD_UB4(src + 16, src_stride, src4, src5, src6, src7); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); dst += (4 * dst_stride); } } else if (0 == height % 8) { copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 32); } else if (0 == height % 4) { for (cnt = (height >> 2); cnt--;) { LD_UB4(src, src_stride, src0, src1, src2, src3); LD_UB4(src + 16, src_stride, src4, src5, src6, src7); src += (4 * src_stride); ST_UB4(src0, src1, src2, src3, dst, dst_stride); ST_UB4(src4, src5, src6, src7, dst + 16, dst_stride); dst += (4 * dst_stride); } } } static void copy_width64_msa(const uint8_t *src, int32_t src_stride, uint8_t *dst, int32_t dst_stride, int32_t height) { copy_16multx8mult_msa(src, src_stride, dst, dst_stride, height, 64); } void vpx_convolve_copy_msa(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int32_t x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; switch (w) { case 4: { uint32_t cnt, tmp; /* 1 word storage */ for (cnt = h; cnt--;) { tmp = LW(src); SW(tmp, dst); src += src_stride; dst += dst_stride; } break; } case 8: { copy_width8_msa(src, src_stride, dst, dst_stride, h); break; } case 16: { copy_width16_msa(src, src_stride, dst, dst_stride, h); break; } case 32: { copy_width32_msa(src, src_stride, dst, dst_stride, h); break; } case 64: { copy_width64_msa(src, src_stride, dst, dst_stride, h); break; } default: { uint32_t cnt; for (cnt = h; cnt--;) { memcpy(dst, src, w); src += src_stride; dst += dst_stride; } break; } } } libvpx-1.8.2/vpx_dsp/mips/vpx_convolve_msa.h000066400000000000000000000165001357355204000212300ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ #define VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ #include "vpx_dsp/mips/macros_msa.h" #include "vpx_dsp/vpx_filter.h" extern const uint8_t mc_filt_mask_arr[16 * 3]; #define FILT_8TAP_DPADD_S_H(vec0, vec1, vec2, vec3, filt0, filt1, filt2, \ filt3) \ ({ \ v8i16 tmp_dpadd_0, tmp_dpadd_1; \ \ tmp_dpadd_0 = __msa_dotp_s_h((v16i8)vec0, (v16i8)filt0); \ tmp_dpadd_0 = __msa_dpadd_s_h(tmp_dpadd_0, (v16i8)vec1, (v16i8)filt1); \ tmp_dpadd_1 = __msa_dotp_s_h((v16i8)vec2, (v16i8)filt2); \ tmp_dpadd_1 = __msa_dpadd_s_h(tmp_dpadd_1, (v16i8)vec3, (v16i8)filt3); \ tmp_dpadd_0 = __msa_adds_s_h(tmp_dpadd_0, tmp_dpadd_1); \ \ tmp_dpadd_0; \ }) #define HORIZ_8TAP_FILT(src0, src1, mask0, mask1, mask2, mask3, filt_h0, \ filt_h1, filt_h2, filt_h3) \ ({ \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m; \ v8i16 hz_out_m; \ \ VSHF_B4_SB(src0, src1, mask0, mask1, mask2, mask3, vec0_m, vec1_m, vec2_m, \ vec3_m); \ hz_out_m = FILT_8TAP_DPADD_S_H(vec0_m, vec1_m, vec2_m, vec3_m, filt_h0, \ filt_h1, filt_h2, filt_h3); \ \ hz_out_m = __msa_srari_h(hz_out_m, FILTER_BITS); \ hz_out_m = __msa_sat_s_h(hz_out_m, 7); \ \ hz_out_m; \ }) #define HORIZ_8TAP_4WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, mask3, filt0, filt1, filt2, filt3, \ out0, out1) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ v8i16 res0_m, res1_m, res2_m, res3_m; \ \ VSHF_B2_SB(src0, src1, src2, src3, mask0, mask0, vec0_m, vec1_m); \ DOTP_SB2_SH(vec0_m, vec1_m, filt0, filt0, res0_m, res1_m); \ VSHF_B2_SB(src0, src1, src2, src3, mask1, mask1, vec2_m, vec3_m); \ DPADD_SB2_SH(vec2_m, vec3_m, filt1, filt1, res0_m, res1_m); \ VSHF_B2_SB(src0, src1, src2, src3, mask2, mask2, vec4_m, vec5_m); \ DOTP_SB2_SH(vec4_m, vec5_m, filt2, filt2, res2_m, res3_m); \ VSHF_B2_SB(src0, src1, src2, src3, mask3, mask3, vec6_m, vec7_m); \ DPADD_SB2_SH(vec6_m, vec7_m, filt3, filt3, res2_m, res3_m); \ ADDS_SH2_SH(res0_m, res2_m, res1_m, res3_m, out0, out1); \ } #define HORIZ_8TAP_8WID_4VECS_FILT(src0, src1, src2, src3, mask0, mask1, \ mask2, mask3, filt0, filt1, filt2, filt3, \ out0, out1, out2, out3) \ { \ v16i8 vec0_m, vec1_m, vec2_m, vec3_m, vec4_m, vec5_m, vec6_m, vec7_m; \ v8i16 res0_m, res1_m, res2_m, res3_m, res4_m, res5_m, res6_m, res7_m; \ \ VSHF_B2_SB(src0, src0, src1, src1, mask0, mask0, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask0, mask0, vec2_m, vec3_m); \ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt0, filt0, filt0, filt0, \ res0_m, res1_m, res2_m, res3_m); \ VSHF_B2_SB(src0, src0, src1, src1, mask2, mask2, vec0_m, vec1_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask2, mask2, vec2_m, vec3_m); \ DOTP_SB4_SH(vec0_m, vec1_m, vec2_m, vec3_m, filt2, filt2, filt2, filt2, \ res4_m, res5_m, res6_m, res7_m); \ VSHF_B2_SB(src0, src0, src1, src1, mask1, mask1, vec4_m, vec5_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask1, mask1, vec6_m, vec7_m); \ DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt1, filt1, filt1, filt1, \ res0_m, res1_m, res2_m, res3_m); \ VSHF_B2_SB(src0, src0, src1, src1, mask3, mask3, vec4_m, vec5_m); \ VSHF_B2_SB(src2, src2, src3, src3, mask3, mask3, vec6_m, vec7_m); \ DPADD_SB4_SH(vec4_m, vec5_m, vec6_m, vec7_m, filt3, filt3, filt3, filt3, \ res4_m, res5_m, res6_m, res7_m); \ ADDS_SH4_SH(res0_m, res4_m, res1_m, res5_m, res2_m, res6_m, res3_m, \ res7_m, out0, out1, out2, out3); \ } #define PCKEV_XORI128_AVG_ST_UB(in0, in1, dst, pdst) \ { \ v16u8 tmp_m; \ \ tmp_m = PCKEV_XORI128_UB(in1, in0); \ tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \ ST_UB(tmp_m, (pdst)); \ } #define PCKEV_AVG_ST_UB(in0, in1, dst, pdst) \ { \ v16u8 tmp_m; \ \ tmp_m = (v16u8)__msa_pckev_b((v16i8)in0, (v16i8)in1); \ tmp_m = __msa_aver_u_b(tmp_m, (v16u8)dst); \ ST_UB(tmp_m, (pdst)); \ } #define PCKEV_AVG_ST8x4_UB(in0, in1, in2, in3, dst0, dst1, pdst, stride) \ { \ v16u8 tmp0_m, tmp1_m; \ uint8_t *pdst_m = (uint8_t *)(pdst); \ \ PCKEV_B2_UB(in1, in0, in3, in2, tmp0_m, tmp1_m); \ AVER_UB2_UB(tmp0_m, dst0, tmp1_m, dst1, tmp0_m, tmp1_m); \ ST8x4_UB(tmp0_m, tmp1_m, pdst_m, stride); \ } #endif // VPX_VPX_DSP_MIPS_VPX_CONVOLVE_MSA_H_ libvpx-1.8.2/vpx_dsp/postproc.h000066400000000000000000000013141357355204000165360ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_POSTPROC_H_ #define VPX_VPX_DSP_POSTPROC_H_ #ifdef __cplusplus extern "C" { #endif // Fills a noise buffer with gaussian noise strength determined by sigma. int vpx_setup_noise(double sigma, int8_t *noise, int size); #ifdef __cplusplus } #endif #endif // VPX_VPX_DSP_POSTPROC_H_ libvpx-1.8.2/vpx_dsp/ppc/000077500000000000000000000000001357355204000152775ustar00rootroot00000000000000libvpx-1.8.2/vpx_dsp/ppc/bitdepth_conversion_vsx.h000066400000000000000000000030151357355204000224170ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ #define VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/ppc/types_vsx.h" // Load 8 16 bit values. If the source is 32 bits then pack down with // saturation. static INLINE int16x8_t load_tran_low(int32_t c, const tran_low_t *s) { #if CONFIG_VP9_HIGHBITDEPTH int32x4_t u = vec_vsx_ld(c, s); int32x4_t v = vec_vsx_ld(c, s + 4); return vec_packs(u, v); #else return vec_vsx_ld(c, s); #endif } // Store 8 16 bit values. If the destination is 32 bits then sign extend the // values by multiplying by 1. static INLINE void store_tran_low(int16x8_t v, int32_t c, tran_low_t *s) { #if CONFIG_VP9_HIGHBITDEPTH const int16x8_t one = vec_splat_s16(1); const int32x4_t even = vec_mule(v, one); const int32x4_t odd = vec_mulo(v, one); const int32x4_t high = vec_mergeh(even, odd); const int32x4_t low = vec_mergel(even, odd); vec_vsx_st(high, c, s); vec_vsx_st(low, c, s + 4); #else vec_vsx_st(v, c, s); #endif } #endif // VPX_VPX_DSP_PPC_BITDEPTH_CONVERSION_VSX_H_ libvpx-1.8.2/vpx_dsp/ppc/deblock_vsx.c000066400000000000000000000334471357355204000177610ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" extern const int16_t vpx_rv[]; static const uint8x16_t load_merge = { 0x00, 0x02, 0x04, 0x06, 0x08, 0x0A, 0x0C, 0x0E, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; static const uint8x16_t st8_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; static INLINE uint8x16_t apply_filter(uint8x16_t ctx[4], uint8x16_t v, uint8x16_t filter) { const uint8x16_t k1 = vec_avg(ctx[0], ctx[1]); const uint8x16_t k2 = vec_avg(ctx[3], ctx[2]); const uint8x16_t k3 = vec_avg(k1, k2); const uint8x16_t f_a = vec_max(vec_absd(v, ctx[0]), vec_absd(v, ctx[1])); const uint8x16_t f_b = vec_max(vec_absd(v, ctx[2]), vec_absd(v, ctx[3])); const bool8x16_t mask = vec_cmplt(vec_max(f_a, f_b), filter); return vec_sel(v, vec_avg(k3, v), mask); } static INLINE void vert_ctx(uint8x16_t ctx[4], int col, uint8_t *src, int stride) { ctx[0] = vec_vsx_ld(col - 2 * stride, src); ctx[1] = vec_vsx_ld(col - stride, src); ctx[2] = vec_vsx_ld(col + stride, src); ctx[3] = vec_vsx_ld(col + 2 * stride, src); } static INLINE void horz_ctx(uint8x16_t ctx[4], uint8x16_t left_ctx, uint8x16_t v, uint8x16_t right_ctx) { static const uint8x16_t l2_perm = { 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D }; static const uint8x16_t l1_perm = { 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E }; static const uint8x16_t r1_perm = { 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10 }; static const uint8x16_t r2_perm = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11 }; ctx[0] = vec_perm(left_ctx, v, l2_perm); ctx[1] = vec_perm(left_ctx, v, l1_perm); ctx[2] = vec_perm(v, right_ctx, r1_perm); ctx[3] = vec_perm(v, right_ctx, r2_perm); } void vpx_post_proc_down_and_across_mb_row_vsx(unsigned char *src_ptr, unsigned char *dst_ptr, int src_pixels_per_line, int dst_pixels_per_line, int cols, unsigned char *f, int size) { int row, col; uint8x16_t ctx[4], out, v, left_ctx; for (row = 0; row < size; row++) { for (col = 0; col < cols - 8; col += 16) { const uint8x16_t filter = vec_vsx_ld(col, f); v = vec_vsx_ld(col, src_ptr); vert_ctx(ctx, col, src_ptr, src_pixels_per_line); vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr); } if (col != cols) { const uint8x16_t filter = vec_vsx_ld(col, f); v = vec_vsx_ld(col, src_ptr); vert_ctx(ctx, col, src_ptr, src_pixels_per_line); out = apply_filter(ctx, v, filter); vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr); } /* now post_proc_across */ left_ctx = vec_splats(dst_ptr[0]); v = vec_vsx_ld(0, dst_ptr); for (col = 0; col < cols - 8; col += 16) { const uint8x16_t filter = vec_vsx_ld(col, f); const uint8x16_t right_ctx = (col + 16 == cols) ? vec_splats(dst_ptr[cols - 1]) : vec_vsx_ld(col, dst_ptr + 16); horz_ctx(ctx, left_ctx, v, right_ctx); vec_vsx_st(apply_filter(ctx, v, filter), col, dst_ptr); left_ctx = v; v = right_ctx; } if (col != cols) { const uint8x16_t filter = vec_vsx_ld(col, f); const uint8x16_t right_ctx = vec_splats(dst_ptr[cols - 1]); horz_ctx(ctx, left_ctx, v, right_ctx); out = apply_filter(ctx, v, filter); vec_vsx_st(vec_perm(out, v, st8_perm), col, dst_ptr); } src_ptr += src_pixels_per_line; dst_ptr += dst_pixels_per_line; } } // C: s[c + 7] static INLINE int16x8_t next7l_s16(uint8x16_t c) { static const uint8x16_t next7_perm = { 0x07, 0x10, 0x08, 0x11, 0x09, 0x12, 0x0A, 0x13, 0x0B, 0x14, 0x0C, 0x15, 0x0D, 0x16, 0x0E, 0x17, }; return (int16x8_t)vec_perm(c, vec_zeros_u8, next7_perm); } // Slide across window and add. static INLINE int16x8_t slide_sum_s16(int16x8_t x) { // x = A B C D E F G H // // 0 A B C D E F G const int16x8_t sum1 = vec_add(x, vec_slo(x, vec_splats((int8_t)(2 << 3)))); // 0 0 A B C D E F const int16x8_t sum2 = vec_add(vec_slo(x, vec_splats((int8_t)(4 << 3))), // 0 0 0 A B C D E vec_slo(x, vec_splats((int8_t)(6 << 3)))); // 0 0 0 0 A B C D const int16x8_t sum3 = vec_add(vec_slo(x, vec_splats((int8_t)(8 << 3))), // 0 0 0 0 0 A B C vec_slo(x, vec_splats((int8_t)(10 << 3)))); // 0 0 0 0 0 0 A B const int16x8_t sum4 = vec_add(vec_slo(x, vec_splats((int8_t)(12 << 3))), // 0 0 0 0 0 0 0 A vec_slo(x, vec_splats((int8_t)(14 << 3)))); return vec_add(vec_add(sum1, sum2), vec_add(sum3, sum4)); } // Slide across window and add. static INLINE int32x4_t slide_sumsq_s32(int32x4_t xsq_even, int32x4_t xsq_odd) { // 0 A C E // + 0 B D F int32x4_t sumsq_1 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(4 << 3))), vec_slo(xsq_odd, vec_splats((int8_t)(4 << 3)))); // 0 0 A C // + 0 0 B D int32x4_t sumsq_2 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(8 << 3))), vec_slo(xsq_odd, vec_splats((int8_t)(8 << 3)))); // 0 0 0 A // + 0 0 0 B int32x4_t sumsq_3 = vec_add(vec_slo(xsq_even, vec_splats((int8_t)(12 << 3))), vec_slo(xsq_odd, vec_splats((int8_t)(12 << 3)))); sumsq_1 = vec_add(sumsq_1, xsq_even); sumsq_2 = vec_add(sumsq_2, sumsq_3); return vec_add(sumsq_1, sumsq_2); } // C: (b + sum + val) >> 4 static INLINE int16x8_t filter_s16(int16x8_t b, int16x8_t sum, int16x8_t val) { return vec_sra(vec_add(vec_add(b, sum), val), vec_splats((uint16_t)4)); } // C: sumsq * 15 - sum * sum static INLINE bool16x8_t mask_s16(int32x4_t sumsq_even, int32x4_t sumsq_odd, int16x8_t sum, int32x4_t lim) { static const uint8x16_t mask_merge = { 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D }; const int32x4_t sumsq_odd_scaled = vec_mul(sumsq_odd, vec_splats((int32_t)15)); const int32x4_t sumsq_even_scaled = vec_mul(sumsq_even, vec_splats((int32_t)15)); const int32x4_t thres_odd = vec_sub(sumsq_odd_scaled, vec_mulo(sum, sum)); const int32x4_t thres_even = vec_sub(sumsq_even_scaled, vec_mule(sum, sum)); const bool32x4_t mask_odd = vec_cmplt(thres_odd, lim); const bool32x4_t mask_even = vec_cmplt(thres_even, lim); return vec_perm((bool16x8_t)mask_even, (bool16x8_t)mask_odd, mask_merge); } void vpx_mbpost_proc_across_ip_vsx(unsigned char *src, int pitch, int rows, int cols, int flimit) { int row, col; const int32x4_t lim = vec_splats(flimit); // 8 columns are processed at a time. assert(cols % 8 == 0); for (row = 0; row < rows; row++) { // The sum is signed and requires at most 13 bits. // (8 bits + sign) * 15 (4 bits) int16x8_t sum; // The sum of squares requires at most 20 bits. // (16 bits + sign) * 15 (4 bits) int32x4_t sumsq_even, sumsq_odd; // Fill left context with first col. int16x8_t left_ctx = vec_splats((int16_t)src[0]); int16_t s = src[0] * 9; int32_t ssq = src[0] * src[0] * 9 + 16; // Fill the next 6 columns of the sliding window with cols 2 to 7. for (col = 1; col <= 6; ++col) { s += src[col]; ssq += src[col] * src[col]; } // Set this sum to every element in the window. sum = vec_splats(s); sumsq_even = vec_splats(ssq); sumsq_odd = vec_splats(ssq); for (col = 0; col < cols; col += 8) { bool16x8_t mask; int16x8_t filtered, masked; uint8x16_t out; const uint8x16_t val = vec_vsx_ld(0, src + col); const int16x8_t val_high = unpack_to_s16_h(val); // C: s[c + 7] const int16x8_t right_ctx = (col + 8 == cols) ? vec_splats((int16_t)src[col + 7]) : next7l_s16(val); // C: x = s[c + 7] - s[c - 8]; const int16x8_t x = vec_sub(right_ctx, left_ctx); const int32x4_t xsq_even = vec_sub(vec_mule(right_ctx, right_ctx), vec_mule(left_ctx, left_ctx)); const int32x4_t xsq_odd = vec_sub(vec_mulo(right_ctx, right_ctx), vec_mulo(left_ctx, left_ctx)); const int32x4_t sumsq_tmp = slide_sumsq_s32(xsq_even, xsq_odd); // A C E G // 0 B D F // 0 A C E // 0 0 B D // 0 0 A C // 0 0 0 B // 0 0 0 A sumsq_even = vec_add(sumsq_even, sumsq_tmp); // B D F G // A C E G // 0 B D F // 0 A C E // 0 0 B D // 0 0 A C // 0 0 0 B // 0 0 0 A sumsq_odd = vec_add(sumsq_odd, vec_add(sumsq_tmp, xsq_odd)); sum = vec_add(sum, slide_sum_s16(x)); // C: (8 + sum + s[c]) >> 4 filtered = filter_s16(vec_splats((int16_t)8), sum, val_high); // C: sumsq * 15 - sum * sum mask = mask_s16(sumsq_even, sumsq_odd, sum, lim); masked = vec_sel(val_high, filtered, mask); out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, src + col), load_merge); vec_vsx_st(out, 0, src + col); // Update window sum and square sum sum = vec_splat(sum, 7); sumsq_even = vec_splat(sumsq_odd, 3); sumsq_odd = vec_splat(sumsq_odd, 3); // C: s[c - 8] (for next iteration) left_ctx = val_high; } src += pitch; } } void vpx_mbpost_proc_down_vsx(uint8_t *dst, int pitch, int rows, int cols, int flimit) { int col, row, i; int16x8_t window[16]; const int32x4_t lim = vec_splats(flimit); // 8 columns are processed at a time. assert(cols % 8 == 0); // If rows is less than 8 the bottom border extension fails. assert(rows >= 8); for (col = 0; col < cols; col += 8) { // The sum is signed and requires at most 13 bits. // (8 bits + sign) * 15 (4 bits) int16x8_t r1, sum; // The sum of squares requires at most 20 bits. // (16 bits + sign) * 15 (4 bits) int32x4_t sumsq_even, sumsq_odd; r1 = unpack_to_s16_h(vec_vsx_ld(0, dst)); // Fill sliding window with first row. for (i = 0; i <= 8; i++) { window[i] = r1; } // First 9 rows of the sliding window are the same. // sum = r1 * 9 sum = vec_mladd(r1, vec_splats((int16_t)9), vec_zeros_s16); // sumsq = r1 * r1 * 9 sumsq_even = vec_mule(sum, r1); sumsq_odd = vec_mulo(sum, r1); // Fill the next 6 rows of the sliding window with rows 2 to 7. for (i = 1; i <= 6; ++i) { const int16x8_t next_row = unpack_to_s16_h(vec_vsx_ld(i * pitch, dst)); window[i + 8] = next_row; sum = vec_add(sum, next_row); sumsq_odd = vec_add(sumsq_odd, vec_mulo(next_row, next_row)); sumsq_even = vec_add(sumsq_even, vec_mule(next_row, next_row)); } for (row = 0; row < rows; row++) { int32x4_t d15_even, d15_odd, d0_even, d0_odd; bool16x8_t mask; int16x8_t filtered, masked; uint8x16_t out; const int16x8_t rv = vec_vsx_ld(0, vpx_rv + (row & 127)); // Move the sliding window if (row + 7 < rows) { window[15] = unpack_to_s16_h(vec_vsx_ld((row + 7) * pitch, dst)); } else { window[15] = window[14]; } // C: sum += s[7 * pitch] - s[-8 * pitch]; sum = vec_add(sum, vec_sub(window[15], window[0])); // C: sumsq += s[7 * pitch] * s[7 * pitch] - s[-8 * pitch] * s[-8 * // pitch]; // Optimization Note: Caching a squared-window for odd and even is // slower than just repeating the multiplies. d15_odd = vec_mulo(window[15], window[15]); d15_even = vec_mule(window[15], window[15]); d0_odd = vec_mulo(window[0], window[0]); d0_even = vec_mule(window[0], window[0]); sumsq_odd = vec_add(sumsq_odd, vec_sub(d15_odd, d0_odd)); sumsq_even = vec_add(sumsq_even, vec_sub(d15_even, d0_even)); // C: (vpx_rv[(r & 127) + (c & 7)] + sum + s[0]) >> 4 filtered = filter_s16(rv, sum, window[8]); // C: sumsq * 15 - sum * sum mask = mask_s16(sumsq_even, sumsq_odd, sum, lim); masked = vec_sel(window[8], filtered, mask); // TODO(ltrudeau) If cols % 16 == 0, we could just process 16 per // iteration out = vec_perm((uint8x16_t)masked, vec_vsx_ld(0, dst + row * pitch), load_merge); vec_vsx_st(out, 0, dst + row * pitch); // Optimization Note: Turns out that the following loop is faster than // using pointers to manage the sliding window. for (i = 1; i < 16; i++) { window[i - 1] = window[i]; } } dst += 8; } } libvpx-1.8.2/vpx_dsp/ppc/fdct32x32_vsx.c000066400000000000000000000505021357355204000177670ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/transpose_vsx.h" #include "vpx_dsp/ppc/txfm_common_vsx.h" #include "vpx_dsp/ppc/types_vsx.h" // Returns ((a +/- b) * cospi16 + (2 << 13)) >> 14. static INLINE void single_butterfly(int16x8_t a, int16x8_t b, int16x8_t *add, int16x8_t *sub) { // Since a + b can overflow 16 bits, the multiplication is distributed // (a * c +/- b * c). const int32x4_t ac_e = vec_mule(a, cospi16_v); const int32x4_t ac_o = vec_mulo(a, cospi16_v); const int32x4_t bc_e = vec_mule(b, cospi16_v); const int32x4_t bc_o = vec_mulo(b, cospi16_v); // Reuse the same multiplies for sum and difference. const int32x4_t sum_e = vec_add(ac_e, bc_e); const int32x4_t sum_o = vec_add(ac_o, bc_o); const int32x4_t diff_e = vec_sub(ac_e, bc_e); const int32x4_t diff_o = vec_sub(ac_o, bc_o); // Add rounding offset const int32x4_t rsum_o = vec_add(sum_o, vec_dct_const_rounding); const int32x4_t rsum_e = vec_add(sum_e, vec_dct_const_rounding); const int32x4_t rdiff_o = vec_add(diff_o, vec_dct_const_rounding); const int32x4_t rdiff_e = vec_add(diff_e, vec_dct_const_rounding); const int32x4_t ssum_o = vec_sra(rsum_o, vec_dct_const_bits); const int32x4_t ssum_e = vec_sra(rsum_e, vec_dct_const_bits); const int32x4_t sdiff_o = vec_sra(rdiff_o, vec_dct_const_bits); const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits); // There's no pack operation for even and odd, so we need to permute. *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack); *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack); } // Returns (a * c1 +/- b * c2 + (2 << 13)) >> 14 static INLINE void double_butterfly(int16x8_t a, int16x8_t c1, int16x8_t b, int16x8_t c2, int16x8_t *add, int16x8_t *sub) { const int32x4_t ac1_o = vec_mulo(a, c1); const int32x4_t ac1_e = vec_mule(a, c1); const int32x4_t ac2_o = vec_mulo(a, c2); const int32x4_t ac2_e = vec_mule(a, c2); const int32x4_t bc1_o = vec_mulo(b, c1); const int32x4_t bc1_e = vec_mule(b, c1); const int32x4_t bc2_o = vec_mulo(b, c2); const int32x4_t bc2_e = vec_mule(b, c2); const int32x4_t sum_o = vec_add(ac1_o, bc2_o); const int32x4_t sum_e = vec_add(ac1_e, bc2_e); const int32x4_t diff_o = vec_sub(ac2_o, bc1_o); const int32x4_t diff_e = vec_sub(ac2_e, bc1_e); // Add rounding offset const int32x4_t rsum_o = vec_add(sum_o, vec_dct_const_rounding); const int32x4_t rsum_e = vec_add(sum_e, vec_dct_const_rounding); const int32x4_t rdiff_o = vec_add(diff_o, vec_dct_const_rounding); const int32x4_t rdiff_e = vec_add(diff_e, vec_dct_const_rounding); const int32x4_t ssum_o = vec_sra(rsum_o, vec_dct_const_bits); const int32x4_t ssum_e = vec_sra(rsum_e, vec_dct_const_bits); const int32x4_t sdiff_o = vec_sra(rdiff_o, vec_dct_const_bits); const int32x4_t sdiff_e = vec_sra(rdiff_e, vec_dct_const_bits); // There's no pack operation for even and odd, so we need to permute. *add = (int16x8_t)vec_perm(ssum_e, ssum_o, vec_perm_odd_even_pack); *sub = (int16x8_t)vec_perm(sdiff_e, sdiff_o, vec_perm_odd_even_pack); } // While other architecture combine the load and the stage 1 operations, Power9 // benchmarking show no benefit in such an approach. static INLINE void load(const int16_t *a, int stride, int16x8_t *b) { // Tried out different combinations of load and shift instructions, this is // the fastest one. { const int16x8_t l0 = vec_vsx_ld(0, a); const int16x8_t l1 = vec_vsx_ld(0, a + stride); const int16x8_t l2 = vec_vsx_ld(0, a + 2 * stride); const int16x8_t l3 = vec_vsx_ld(0, a + 3 * stride); const int16x8_t l4 = vec_vsx_ld(0, a + 4 * stride); const int16x8_t l5 = vec_vsx_ld(0, a + 5 * stride); const int16x8_t l6 = vec_vsx_ld(0, a + 6 * stride); const int16x8_t l7 = vec_vsx_ld(0, a + 7 * stride); const int16x8_t l8 = vec_vsx_ld(0, a + 8 * stride); const int16x8_t l9 = vec_vsx_ld(0, a + 9 * stride); const int16x8_t l10 = vec_vsx_ld(0, a + 10 * stride); const int16x8_t l11 = vec_vsx_ld(0, a + 11 * stride); const int16x8_t l12 = vec_vsx_ld(0, a + 12 * stride); const int16x8_t l13 = vec_vsx_ld(0, a + 13 * stride); const int16x8_t l14 = vec_vsx_ld(0, a + 14 * stride); const int16x8_t l15 = vec_vsx_ld(0, a + 15 * stride); b[0] = vec_sl(l0, vec_dct_scale_log2); b[1] = vec_sl(l1, vec_dct_scale_log2); b[2] = vec_sl(l2, vec_dct_scale_log2); b[3] = vec_sl(l3, vec_dct_scale_log2); b[4] = vec_sl(l4, vec_dct_scale_log2); b[5] = vec_sl(l5, vec_dct_scale_log2); b[6] = vec_sl(l6, vec_dct_scale_log2); b[7] = vec_sl(l7, vec_dct_scale_log2); b[8] = vec_sl(l8, vec_dct_scale_log2); b[9] = vec_sl(l9, vec_dct_scale_log2); b[10] = vec_sl(l10, vec_dct_scale_log2); b[11] = vec_sl(l11, vec_dct_scale_log2); b[12] = vec_sl(l12, vec_dct_scale_log2); b[13] = vec_sl(l13, vec_dct_scale_log2); b[14] = vec_sl(l14, vec_dct_scale_log2); b[15] = vec_sl(l15, vec_dct_scale_log2); } { const int16x8_t l16 = vec_vsx_ld(0, a + 16 * stride); const int16x8_t l17 = vec_vsx_ld(0, a + 17 * stride); const int16x8_t l18 = vec_vsx_ld(0, a + 18 * stride); const int16x8_t l19 = vec_vsx_ld(0, a + 19 * stride); const int16x8_t l20 = vec_vsx_ld(0, a + 20 * stride); const int16x8_t l21 = vec_vsx_ld(0, a + 21 * stride); const int16x8_t l22 = vec_vsx_ld(0, a + 22 * stride); const int16x8_t l23 = vec_vsx_ld(0, a + 23 * stride); const int16x8_t l24 = vec_vsx_ld(0, a + 24 * stride); const int16x8_t l25 = vec_vsx_ld(0, a + 25 * stride); const int16x8_t l26 = vec_vsx_ld(0, a + 26 * stride); const int16x8_t l27 = vec_vsx_ld(0, a + 27 * stride); const int16x8_t l28 = vec_vsx_ld(0, a + 28 * stride); const int16x8_t l29 = vec_vsx_ld(0, a + 29 * stride); const int16x8_t l30 = vec_vsx_ld(0, a + 30 * stride); const int16x8_t l31 = vec_vsx_ld(0, a + 31 * stride); b[16] = vec_sl(l16, vec_dct_scale_log2); b[17] = vec_sl(l17, vec_dct_scale_log2); b[18] = vec_sl(l18, vec_dct_scale_log2); b[19] = vec_sl(l19, vec_dct_scale_log2); b[20] = vec_sl(l20, vec_dct_scale_log2); b[21] = vec_sl(l21, vec_dct_scale_log2); b[22] = vec_sl(l22, vec_dct_scale_log2); b[23] = vec_sl(l23, vec_dct_scale_log2); b[24] = vec_sl(l24, vec_dct_scale_log2); b[25] = vec_sl(l25, vec_dct_scale_log2); b[26] = vec_sl(l26, vec_dct_scale_log2); b[27] = vec_sl(l27, vec_dct_scale_log2); b[28] = vec_sl(l28, vec_dct_scale_log2); b[29] = vec_sl(l29, vec_dct_scale_log2); b[30] = vec_sl(l30, vec_dct_scale_log2); b[31] = vec_sl(l31, vec_dct_scale_log2); } } static INLINE void store(tran_low_t *a, const int16x8_t *b) { vec_vsx_st(b[0], 0, a); vec_vsx_st(b[8], 0, a + 8); vec_vsx_st(b[16], 0, a + 16); vec_vsx_st(b[24], 0, a + 24); vec_vsx_st(b[1], 0, a + 32); vec_vsx_st(b[9], 0, a + 40); vec_vsx_st(b[17], 0, a + 48); vec_vsx_st(b[25], 0, a + 56); vec_vsx_st(b[2], 0, a + 64); vec_vsx_st(b[10], 0, a + 72); vec_vsx_st(b[18], 0, a + 80); vec_vsx_st(b[26], 0, a + 88); vec_vsx_st(b[3], 0, a + 96); vec_vsx_st(b[11], 0, a + 104); vec_vsx_st(b[19], 0, a + 112); vec_vsx_st(b[27], 0, a + 120); vec_vsx_st(b[4], 0, a + 128); vec_vsx_st(b[12], 0, a + 136); vec_vsx_st(b[20], 0, a + 144); vec_vsx_st(b[28], 0, a + 152); vec_vsx_st(b[5], 0, a + 160); vec_vsx_st(b[13], 0, a + 168); vec_vsx_st(b[21], 0, a + 176); vec_vsx_st(b[29], 0, a + 184); vec_vsx_st(b[6], 0, a + 192); vec_vsx_st(b[14], 0, a + 200); vec_vsx_st(b[22], 0, a + 208); vec_vsx_st(b[30], 0, a + 216); vec_vsx_st(b[7], 0, a + 224); vec_vsx_st(b[15], 0, a + 232); vec_vsx_st(b[23], 0, a + 240); vec_vsx_st(b[31], 0, a + 248); } // Returns 1 if negative 0 if positive static INLINE int16x8_t vec_sign_s16(int16x8_t a) { return vec_sr(a, vec_shift_sign_s16); } // Add 2 if positive, 1 if negative, and shift by 2. static INLINE int16x8_t sub_round_shift(const int16x8_t a) { const int16x8_t sign = vec_sign_s16(a); return vec_sra(vec_sub(vec_add(a, vec_twos_s16), sign), vec_dct_scale_log2); } // Add 1 if positive, 2 if negative, and shift by 2. // In practice, add 1, then add the sign bit, then shift without rounding. static INLINE int16x8_t add_round_shift_s16(const int16x8_t a) { const int16x8_t sign = vec_sign_s16(a); return vec_sra(vec_add(vec_add(a, vec_ones_s16), sign), vec_dct_scale_log2); } static void fdct32_vsx(const int16x8_t *in, int16x8_t *out, int pass) { int16x8_t temp0[32]; // Hold stages: 1, 4, 7 int16x8_t temp1[32]; // Hold stages: 2, 5 int16x8_t temp2[32]; // Hold stages: 3, 6 int i; // Stage 1 // Unrolling this loops actually slows down Power9 benchmarks for (i = 0; i < 16; i++) { temp0[i] = vec_add(in[i], in[31 - i]); // pass through to stage 3. temp1[i + 16] = vec_sub(in[15 - i], in[i + 16]); } // Stage 2 // Unrolling this loops actually slows down Power9 benchmarks for (i = 0; i < 8; i++) { temp1[i] = vec_add(temp0[i], temp0[15 - i]); temp1[i + 8] = vec_sub(temp0[7 - i], temp0[i + 8]); } // Apply butterflies (in place) on pass through to stage 3. single_butterfly(temp1[27], temp1[20], &temp1[27], &temp1[20]); single_butterfly(temp1[26], temp1[21], &temp1[26], &temp1[21]); single_butterfly(temp1[25], temp1[22], &temp1[25], &temp1[22]); single_butterfly(temp1[24], temp1[23], &temp1[24], &temp1[23]); // dump the magnitude by 4, hence the intermediate values are within // the range of 16 bits. if (pass) { temp1[0] = add_round_shift_s16(temp1[0]); temp1[1] = add_round_shift_s16(temp1[1]); temp1[2] = add_round_shift_s16(temp1[2]); temp1[3] = add_round_shift_s16(temp1[3]); temp1[4] = add_round_shift_s16(temp1[4]); temp1[5] = add_round_shift_s16(temp1[5]); temp1[6] = add_round_shift_s16(temp1[6]); temp1[7] = add_round_shift_s16(temp1[7]); temp1[8] = add_round_shift_s16(temp1[8]); temp1[9] = add_round_shift_s16(temp1[9]); temp1[10] = add_round_shift_s16(temp1[10]); temp1[11] = add_round_shift_s16(temp1[11]); temp1[12] = add_round_shift_s16(temp1[12]); temp1[13] = add_round_shift_s16(temp1[13]); temp1[14] = add_round_shift_s16(temp1[14]); temp1[15] = add_round_shift_s16(temp1[15]); temp1[16] = add_round_shift_s16(temp1[16]); temp1[17] = add_round_shift_s16(temp1[17]); temp1[18] = add_round_shift_s16(temp1[18]); temp1[19] = add_round_shift_s16(temp1[19]); temp1[20] = add_round_shift_s16(temp1[20]); temp1[21] = add_round_shift_s16(temp1[21]); temp1[22] = add_round_shift_s16(temp1[22]); temp1[23] = add_round_shift_s16(temp1[23]); temp1[24] = add_round_shift_s16(temp1[24]); temp1[25] = add_round_shift_s16(temp1[25]); temp1[26] = add_round_shift_s16(temp1[26]); temp1[27] = add_round_shift_s16(temp1[27]); temp1[28] = add_round_shift_s16(temp1[28]); temp1[29] = add_round_shift_s16(temp1[29]); temp1[30] = add_round_shift_s16(temp1[30]); temp1[31] = add_round_shift_s16(temp1[31]); } // Stage 3 temp2[0] = vec_add(temp1[0], temp1[7]); temp2[1] = vec_add(temp1[1], temp1[6]); temp2[2] = vec_add(temp1[2], temp1[5]); temp2[3] = vec_add(temp1[3], temp1[4]); temp2[5] = vec_sub(temp1[2], temp1[5]); temp2[6] = vec_sub(temp1[1], temp1[6]); temp2[8] = temp1[8]; temp2[9] = temp1[9]; single_butterfly(temp1[13], temp1[10], &temp2[13], &temp2[10]); single_butterfly(temp1[12], temp1[11], &temp2[12], &temp2[11]); temp2[14] = temp1[14]; temp2[15] = temp1[15]; temp2[18] = vec_add(temp1[18], temp1[21]); temp2[19] = vec_add(temp1[19], temp1[20]); temp2[20] = vec_sub(temp1[19], temp1[20]); temp2[21] = vec_sub(temp1[18], temp1[21]); temp2[26] = vec_sub(temp1[29], temp1[26]); temp2[27] = vec_sub(temp1[28], temp1[27]); temp2[28] = vec_add(temp1[28], temp1[27]); temp2[29] = vec_add(temp1[29], temp1[26]); // Pass through Stage 4 temp0[7] = vec_sub(temp1[0], temp1[7]); temp0[4] = vec_sub(temp1[3], temp1[4]); temp0[16] = vec_add(temp1[16], temp1[23]); temp0[17] = vec_add(temp1[17], temp1[22]); temp0[22] = vec_sub(temp1[17], temp1[22]); temp0[23] = vec_sub(temp1[16], temp1[23]); temp0[24] = vec_sub(temp1[31], temp1[24]); temp0[25] = vec_sub(temp1[30], temp1[25]); temp0[30] = vec_add(temp1[30], temp1[25]); temp0[31] = vec_add(temp1[31], temp1[24]); // Stage 4 temp0[0] = vec_add(temp2[0], temp2[3]); temp0[1] = vec_add(temp2[1], temp2[2]); temp0[2] = vec_sub(temp2[1], temp2[2]); temp0[3] = vec_sub(temp2[0], temp2[3]); single_butterfly(temp2[6], temp2[5], &temp0[6], &temp0[5]); temp0[9] = vec_add(temp2[9], temp2[10]); temp0[10] = vec_sub(temp2[9], temp2[10]); temp0[13] = vec_sub(temp2[14], temp2[13]); temp0[14] = vec_add(temp2[14], temp2[13]); double_butterfly(temp2[29], cospi8_v, temp2[18], cospi24_v, &temp0[29], &temp0[18]); double_butterfly(temp2[28], cospi8_v, temp2[19], cospi24_v, &temp0[28], &temp0[19]); double_butterfly(temp2[27], cospi24_v, temp2[20], cospi8m_v, &temp0[27], &temp0[20]); double_butterfly(temp2[26], cospi24_v, temp2[21], cospi8m_v, &temp0[26], &temp0[21]); // Pass through Stage 5 temp1[8] = vec_add(temp2[8], temp2[11]); temp1[11] = vec_sub(temp2[8], temp2[11]); temp1[12] = vec_sub(temp2[15], temp2[12]); temp1[15] = vec_add(temp2[15], temp2[12]); // Stage 5 // 0 and 1 pass through to 0 and 16 at the end single_butterfly(temp0[0], temp0[1], &out[0], &out[16]); // 2 and 3 pass through to 8 and 24 at the end double_butterfly(temp0[3], cospi8_v, temp0[2], cospi24_v, &out[8], &out[24]); temp1[4] = vec_add(temp0[4], temp0[5]); temp1[5] = vec_sub(temp0[4], temp0[5]); temp1[6] = vec_sub(temp0[7], temp0[6]); temp1[7] = vec_add(temp0[7], temp0[6]); double_butterfly(temp0[14], cospi8_v, temp0[9], cospi24_v, &temp1[14], &temp1[9]); double_butterfly(temp0[13], cospi24_v, temp0[10], cospi8m_v, &temp1[13], &temp1[10]); temp1[17] = vec_add(temp0[17], temp0[18]); temp1[18] = vec_sub(temp0[17], temp0[18]); temp1[21] = vec_sub(temp0[22], temp0[21]); temp1[22] = vec_add(temp0[22], temp0[21]); temp1[25] = vec_add(temp0[25], temp0[26]); temp1[26] = vec_sub(temp0[25], temp0[26]); temp1[29] = vec_sub(temp0[30], temp0[29]); temp1[30] = vec_add(temp0[30], temp0[29]); // Pass through Stage 6 temp2[16] = vec_add(temp0[16], temp0[19]); temp2[19] = vec_sub(temp0[16], temp0[19]); temp2[20] = vec_sub(temp0[23], temp0[20]); temp2[23] = vec_add(temp0[23], temp0[20]); temp2[24] = vec_add(temp0[24], temp0[27]); temp2[27] = vec_sub(temp0[24], temp0[27]); temp2[28] = vec_sub(temp0[31], temp0[28]); temp2[31] = vec_add(temp0[31], temp0[28]); // Stage 6 // 4 and 7 pass through to 4 and 28 at the end double_butterfly(temp1[7], cospi4_v, temp1[4], cospi28_v, &out[4], &out[28]); // 5 and 6 pass through to 20 and 12 at the end double_butterfly(temp1[6], cospi20_v, temp1[5], cospi12_v, &out[20], &out[12]); temp2[8] = vec_add(temp1[8], temp1[9]); temp2[9] = vec_sub(temp1[8], temp1[9]); temp2[10] = vec_sub(temp1[11], temp1[10]); temp2[11] = vec_add(temp1[11], temp1[10]); temp2[12] = vec_add(temp1[12], temp1[13]); temp2[13] = vec_sub(temp1[12], temp1[13]); temp2[14] = vec_sub(temp1[15], temp1[14]); temp2[15] = vec_add(temp1[15], temp1[14]); double_butterfly(temp1[30], cospi4_v, temp1[17], cospi28_v, &temp2[30], &temp2[17]); double_butterfly(temp1[29], cospi28_v, temp1[18], cospi4m_v, &temp2[29], &temp2[18]); double_butterfly(temp1[26], cospi20_v, temp1[21], cospi12_v, &temp2[26], &temp2[21]); double_butterfly(temp1[25], cospi12_v, temp1[22], cospi20m_v, &temp2[25], &temp2[22]); // Stage 7 double_butterfly(temp2[15], cospi2_v, temp2[8], cospi30_v, &out[2], &out[30]); double_butterfly(temp2[14], cospi18_v, temp2[9], cospi14_v, &out[18], &out[14]); double_butterfly(temp2[13], cospi10_v, temp2[10], cospi22_v, &out[10], &out[22]); double_butterfly(temp2[12], cospi26_v, temp2[11], cospi6_v, &out[26], &out[6]); temp0[16] = vec_add(temp2[16], temp2[17]); temp0[17] = vec_sub(temp2[16], temp2[17]); temp0[18] = vec_sub(temp2[19], temp2[18]); temp0[19] = vec_add(temp2[19], temp2[18]); temp0[20] = vec_add(temp2[20], temp2[21]); temp0[21] = vec_sub(temp2[20], temp2[21]); temp0[22] = vec_sub(temp2[23], temp2[22]); temp0[23] = vec_add(temp2[23], temp2[22]); temp0[24] = vec_add(temp2[24], temp2[25]); temp0[25] = vec_sub(temp2[24], temp2[25]); temp0[26] = vec_sub(temp2[27], temp2[26]); temp0[27] = vec_add(temp2[27], temp2[26]); temp0[28] = vec_add(temp2[28], temp2[29]); temp0[29] = vec_sub(temp2[28], temp2[29]); temp0[30] = vec_sub(temp2[31], temp2[30]); temp0[31] = vec_add(temp2[31], temp2[30]); // Final stage --- outputs indices are bit-reversed. double_butterfly(temp0[31], cospi1_v, temp0[16], cospi31_v, &out[1], &out[31]); double_butterfly(temp0[30], cospi17_v, temp0[17], cospi15_v, &out[17], &out[15]); double_butterfly(temp0[29], cospi9_v, temp0[18], cospi23_v, &out[9], &out[23]); double_butterfly(temp0[28], cospi25_v, temp0[19], cospi7_v, &out[25], &out[7]); double_butterfly(temp0[27], cospi5_v, temp0[20], cospi27_v, &out[5], &out[27]); double_butterfly(temp0[26], cospi21_v, temp0[21], cospi11_v, &out[21], &out[11]); double_butterfly(temp0[25], cospi13_v, temp0[22], cospi19_v, &out[13], &out[19]); double_butterfly(temp0[24], cospi29_v, temp0[23], cospi3_v, &out[29], &out[3]); if (pass == 0) { for (i = 0; i < 32; i++) { out[i] = sub_round_shift(out[i]); } } } void vpx_fdct32x32_rd_vsx(const int16_t *input, tran_low_t *out, int stride) { int16x8_t temp0[32]; int16x8_t temp1[32]; int16x8_t temp2[32]; int16x8_t temp3[32]; int16x8_t temp4[32]; int16x8_t temp5[32]; int16x8_t temp6[32]; // Process in 8x32 columns. load(input, stride, temp0); fdct32_vsx(temp0, temp1, 0); load(input + 8, stride, temp0); fdct32_vsx(temp0, temp2, 0); load(input + 16, stride, temp0); fdct32_vsx(temp0, temp3, 0); load(input + 24, stride, temp0); fdct32_vsx(temp0, temp4, 0); // Generate the top row by munging the first set of 8 from each one // together. transpose_8x8(&temp1[0], &temp0[0]); transpose_8x8(&temp2[0], &temp0[8]); transpose_8x8(&temp3[0], &temp0[16]); transpose_8x8(&temp4[0], &temp0[24]); fdct32_vsx(temp0, temp5, 1); transpose_8x8(&temp5[0], &temp6[0]); transpose_8x8(&temp5[8], &temp6[8]); transpose_8x8(&temp5[16], &temp6[16]); transpose_8x8(&temp5[24], &temp6[24]); store(out, temp6); // Second row of 8x32. transpose_8x8(&temp1[8], &temp0[0]); transpose_8x8(&temp2[8], &temp0[8]); transpose_8x8(&temp3[8], &temp0[16]); transpose_8x8(&temp4[8], &temp0[24]); fdct32_vsx(temp0, temp5, 1); transpose_8x8(&temp5[0], &temp6[0]); transpose_8x8(&temp5[8], &temp6[8]); transpose_8x8(&temp5[16], &temp6[16]); transpose_8x8(&temp5[24], &temp6[24]); store(out + 8 * 32, temp6); // Third row of 8x32 transpose_8x8(&temp1[16], &temp0[0]); transpose_8x8(&temp2[16], &temp0[8]); transpose_8x8(&temp3[16], &temp0[16]); transpose_8x8(&temp4[16], &temp0[24]); fdct32_vsx(temp0, temp5, 1); transpose_8x8(&temp5[0], &temp6[0]); transpose_8x8(&temp5[8], &temp6[8]); transpose_8x8(&temp5[16], &temp6[16]); transpose_8x8(&temp5[24], &temp6[24]); store(out + 16 * 32, temp6); // Final row of 8x32. transpose_8x8(&temp1[24], &temp0[0]); transpose_8x8(&temp2[24], &temp0[8]); transpose_8x8(&temp3[24], &temp0[16]); transpose_8x8(&temp4[24], &temp0[24]); fdct32_vsx(temp0, temp5, 1); transpose_8x8(&temp5[0], &temp6[0]); transpose_8x8(&temp5[8], &temp6[8]); transpose_8x8(&temp5[16], &temp6[16]); transpose_8x8(&temp5[24], &temp6[24]); store(out + 24 * 32, temp6); } libvpx-1.8.2/vpx_dsp/ppc/hadamard_vsx.c000066400000000000000000000100061357355204000201010ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" #include "vpx_dsp/ppc/transpose_vsx.h" #include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" static void vpx_hadamard_s16_8x8_one_pass(int16x8_t v[8]) { const int16x8_t b0 = vec_add(v[0], v[1]); const int16x8_t b1 = vec_sub(v[0], v[1]); const int16x8_t b2 = vec_add(v[2], v[3]); const int16x8_t b3 = vec_sub(v[2], v[3]); const int16x8_t b4 = vec_add(v[4], v[5]); const int16x8_t b5 = vec_sub(v[4], v[5]); const int16x8_t b6 = vec_add(v[6], v[7]); const int16x8_t b7 = vec_sub(v[6], v[7]); const int16x8_t c0 = vec_add(b0, b2); const int16x8_t c1 = vec_add(b1, b3); const int16x8_t c2 = vec_sub(b0, b2); const int16x8_t c3 = vec_sub(b1, b3); const int16x8_t c4 = vec_add(b4, b6); const int16x8_t c5 = vec_add(b5, b7); const int16x8_t c6 = vec_sub(b4, b6); const int16x8_t c7 = vec_sub(b5, b7); v[0] = vec_add(c0, c4); v[1] = vec_sub(c2, c6); v[2] = vec_sub(c0, c4); v[3] = vec_add(c2, c6); v[4] = vec_add(c3, c7); v[5] = vec_sub(c3, c7); v[6] = vec_sub(c1, c5); v[7] = vec_add(c1, c5); } void vpx_hadamard_8x8_vsx(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int16x8_t v[8]; v[0] = vec_vsx_ld(0, src_diff); v[1] = vec_vsx_ld(0, src_diff + src_stride); v[2] = vec_vsx_ld(0, src_diff + (2 * src_stride)); v[3] = vec_vsx_ld(0, src_diff + (3 * src_stride)); v[4] = vec_vsx_ld(0, src_diff + (4 * src_stride)); v[5] = vec_vsx_ld(0, src_diff + (5 * src_stride)); v[6] = vec_vsx_ld(0, src_diff + (6 * src_stride)); v[7] = vec_vsx_ld(0, src_diff + (7 * src_stride)); vpx_hadamard_s16_8x8_one_pass(v); vpx_transpose_s16_8x8(v); vpx_hadamard_s16_8x8_one_pass(v); store_tran_low(v[0], 0, coeff); store_tran_low(v[1], 0, coeff + 8); store_tran_low(v[2], 0, coeff + 16); store_tran_low(v[3], 0, coeff + 24); store_tran_low(v[4], 0, coeff + 32); store_tran_low(v[5], 0, coeff + 40); store_tran_low(v[6], 0, coeff + 48); store_tran_low(v[7], 0, coeff + 56); } void vpx_hadamard_16x16_vsx(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int i; const uint16x8_t ones = vec_splat_u16(1); /* Rearrange 16x16 to 8x32 and remove stride. * Top left first. */ vpx_hadamard_8x8_vsx(src_diff, src_stride, coeff); /* Top right. */ vpx_hadamard_8x8_vsx(src_diff + 8 + 0 * src_stride, src_stride, coeff + 64); /* Bottom left. */ vpx_hadamard_8x8_vsx(src_diff + 0 + 8 * src_stride, src_stride, coeff + 128); /* Bottom right. */ vpx_hadamard_8x8_vsx(src_diff + 8 + 8 * src_stride, src_stride, coeff + 192); /* Overlay the 8x8 blocks and combine. */ for (i = 0; i < 64; i += 8) { const int16x8_t a0 = load_tran_low(0, coeff); const int16x8_t a1 = load_tran_low(0, coeff + 64); const int16x8_t a2 = load_tran_low(0, coeff + 128); const int16x8_t a3 = load_tran_low(0, coeff + 192); /* Prevent the result from escaping int16_t. */ const int16x8_t b0 = vec_sra(a0, ones); const int16x8_t b1 = vec_sra(a1, ones); const int16x8_t b2 = vec_sra(a2, ones); const int16x8_t b3 = vec_sra(a3, ones); const int16x8_t c0 = vec_add(b0, b1); const int16x8_t c2 = vec_add(b2, b3); const int16x8_t c1 = vec_sub(b0, b1); const int16x8_t c3 = vec_sub(b2, b3); const int16x8_t d0 = vec_add(c0, c2); const int16x8_t d1 = vec_add(c1, c3); const int16x8_t d2 = vec_sub(c0, c2); const int16x8_t d3 = vec_sub(c1, c3); store_tran_low(d0, 0, coeff); store_tran_low(d1, 0, coeff + 64); store_tran_low(d2, 0, coeff + 128); store_tran_low(d3, 0, coeff + 192); coeff += 8; } } libvpx-1.8.2/vpx_dsp/ppc/intrapred_vsx.c000066400000000000000000000615631357355204000203460ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" void vpx_v_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vec_vsx_ld(0, above); int i; (void)left; for (i = 0; i < 16; i++, dst += stride) { vec_vsx_st(d, 0, dst); } } void vpx_v_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d0 = vec_vsx_ld(0, above); const uint8x16_t d1 = vec_vsx_ld(16, above); int i; (void)left; for (i = 0; i < 32; i++, dst += stride) { vec_vsx_st(d0, 0, dst); vec_vsx_st(d1, 16, dst); } } // TODO(crbug.com/webm/1522): Fix test failures. #if 0 static const uint32x4_t mask4 = { 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF }; void vpx_h_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vec_vsx_ld(0, left); const uint8x16_t v0 = vec_splat(d, 0); const uint8x16_t v1 = vec_splat(d, 1); const uint8x16_t v2 = vec_splat(d, 2); const uint8x16_t v3 = vec_splat(d, 3); (void)above; vec_vsx_st(vec_sel(v0, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); dst += stride; vec_vsx_st(vec_sel(v1, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); dst += stride; vec_vsx_st(vec_sel(v2, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); dst += stride; vec_vsx_st(vec_sel(v3, vec_vsx_ld(0, dst), (uint8x16_t)mask4), 0, dst); } void vpx_h_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vec_vsx_ld(0, left); const uint8x16_t v0 = vec_splat(d, 0); const uint8x16_t v1 = vec_splat(d, 1); const uint8x16_t v2 = vec_splat(d, 2); const uint8x16_t v3 = vec_splat(d, 3); const uint8x16_t v4 = vec_splat(d, 4); const uint8x16_t v5 = vec_splat(d, 5); const uint8x16_t v6 = vec_splat(d, 6); const uint8x16_t v7 = vec_splat(d, 7); (void)above; vec_vsx_st(xxpermdi(v0, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v1, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v2, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v3, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v4, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v5, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v6, vec_vsx_ld(0, dst), 1), 0, dst); dst += stride; vec_vsx_st(xxpermdi(v7, vec_vsx_ld(0, dst), 1), 0, dst); } #endif void vpx_h_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d = vec_vsx_ld(0, left); const uint8x16_t v0 = vec_splat(d, 0); const uint8x16_t v1 = vec_splat(d, 1); const uint8x16_t v2 = vec_splat(d, 2); const uint8x16_t v3 = vec_splat(d, 3); const uint8x16_t v4 = vec_splat(d, 4); const uint8x16_t v5 = vec_splat(d, 5); const uint8x16_t v6 = vec_splat(d, 6); const uint8x16_t v7 = vec_splat(d, 7); const uint8x16_t v8 = vec_splat(d, 8); const uint8x16_t v9 = vec_splat(d, 9); const uint8x16_t v10 = vec_splat(d, 10); const uint8x16_t v11 = vec_splat(d, 11); const uint8x16_t v12 = vec_splat(d, 12); const uint8x16_t v13 = vec_splat(d, 13); const uint8x16_t v14 = vec_splat(d, 14); const uint8x16_t v15 = vec_splat(d, 15); (void)above; vec_vsx_st(v0, 0, dst); dst += stride; vec_vsx_st(v1, 0, dst); dst += stride; vec_vsx_st(v2, 0, dst); dst += stride; vec_vsx_st(v3, 0, dst); dst += stride; vec_vsx_st(v4, 0, dst); dst += stride; vec_vsx_st(v5, 0, dst); dst += stride; vec_vsx_st(v6, 0, dst); dst += stride; vec_vsx_st(v7, 0, dst); dst += stride; vec_vsx_st(v8, 0, dst); dst += stride; vec_vsx_st(v9, 0, dst); dst += stride; vec_vsx_st(v10, 0, dst); dst += stride; vec_vsx_st(v11, 0, dst); dst += stride; vec_vsx_st(v12, 0, dst); dst += stride; vec_vsx_st(v13, 0, dst); dst += stride; vec_vsx_st(v14, 0, dst); dst += stride; vec_vsx_st(v15, 0, dst); } #define H_PREDICTOR_32(v) \ vec_vsx_st(v, 0, dst); \ vec_vsx_st(v, 16, dst); \ dst += stride void vpx_h_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t d0 = vec_vsx_ld(0, left); const uint8x16_t d1 = vec_vsx_ld(16, left); const uint8x16_t v0_0 = vec_splat(d0, 0); const uint8x16_t v1_0 = vec_splat(d0, 1); const uint8x16_t v2_0 = vec_splat(d0, 2); const uint8x16_t v3_0 = vec_splat(d0, 3); const uint8x16_t v4_0 = vec_splat(d0, 4); const uint8x16_t v5_0 = vec_splat(d0, 5); const uint8x16_t v6_0 = vec_splat(d0, 6); const uint8x16_t v7_0 = vec_splat(d0, 7); const uint8x16_t v8_0 = vec_splat(d0, 8); const uint8x16_t v9_0 = vec_splat(d0, 9); const uint8x16_t v10_0 = vec_splat(d0, 10); const uint8x16_t v11_0 = vec_splat(d0, 11); const uint8x16_t v12_0 = vec_splat(d0, 12); const uint8x16_t v13_0 = vec_splat(d0, 13); const uint8x16_t v14_0 = vec_splat(d0, 14); const uint8x16_t v15_0 = vec_splat(d0, 15); const uint8x16_t v0_1 = vec_splat(d1, 0); const uint8x16_t v1_1 = vec_splat(d1, 1); const uint8x16_t v2_1 = vec_splat(d1, 2); const uint8x16_t v3_1 = vec_splat(d1, 3); const uint8x16_t v4_1 = vec_splat(d1, 4); const uint8x16_t v5_1 = vec_splat(d1, 5); const uint8x16_t v6_1 = vec_splat(d1, 6); const uint8x16_t v7_1 = vec_splat(d1, 7); const uint8x16_t v8_1 = vec_splat(d1, 8); const uint8x16_t v9_1 = vec_splat(d1, 9); const uint8x16_t v10_1 = vec_splat(d1, 10); const uint8x16_t v11_1 = vec_splat(d1, 11); const uint8x16_t v12_1 = vec_splat(d1, 12); const uint8x16_t v13_1 = vec_splat(d1, 13); const uint8x16_t v14_1 = vec_splat(d1, 14); const uint8x16_t v15_1 = vec_splat(d1, 15); (void)above; H_PREDICTOR_32(v0_0); H_PREDICTOR_32(v1_0); H_PREDICTOR_32(v2_0); H_PREDICTOR_32(v3_0); H_PREDICTOR_32(v4_0); H_PREDICTOR_32(v5_0); H_PREDICTOR_32(v6_0); H_PREDICTOR_32(v7_0); H_PREDICTOR_32(v8_0); H_PREDICTOR_32(v9_0); H_PREDICTOR_32(v10_0); H_PREDICTOR_32(v11_0); H_PREDICTOR_32(v12_0); H_PREDICTOR_32(v13_0); H_PREDICTOR_32(v14_0); H_PREDICTOR_32(v15_0); H_PREDICTOR_32(v0_1); H_PREDICTOR_32(v1_1); H_PREDICTOR_32(v2_1); H_PREDICTOR_32(v3_1); H_PREDICTOR_32(v4_1); H_PREDICTOR_32(v5_1); H_PREDICTOR_32(v6_1); H_PREDICTOR_32(v7_1); H_PREDICTOR_32(v8_1); H_PREDICTOR_32(v9_1); H_PREDICTOR_32(v10_1); H_PREDICTOR_32(v11_1); H_PREDICTOR_32(v12_1); H_PREDICTOR_32(v13_1); H_PREDICTOR_32(v14_1); H_PREDICTOR_32(v15_1); } // TODO(crbug.com/webm/1522): Fix test failures. #if 0 void vpx_tm_predictor_4x4_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left)); const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above)); int16x8_t tmp, val; uint8x16_t d; d = vec_vsx_ld(0, dst); tmp = unpack_to_s16_l(d); val = vec_sub(vec_add(vec_splat(l, 0), a), tl); vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); dst += stride; d = vec_vsx_ld(0, dst); tmp = unpack_to_s16_l(d); val = vec_sub(vec_add(vec_splat(l, 1), a), tl); vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); dst += stride; d = vec_vsx_ld(0, dst); tmp = unpack_to_s16_l(d); val = vec_sub(vec_add(vec_splat(l, 2), a), tl); vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); dst += stride; d = vec_vsx_ld(0, dst); tmp = unpack_to_s16_l(d); val = vec_sub(vec_add(vec_splat(l, 3), a), tl); vec_vsx_st(vec_sel(vec_packsu(val, tmp), d, (uint8x16_t)mask4), 0, dst); } void vpx_tm_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); const int16x8_t l = unpack_to_s16_h(vec_vsx_ld(0, left)); const int16x8_t a = unpack_to_s16_h(vec_vsx_ld(0, above)); int16x8_t tmp, val; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 0), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 1), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 2), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 3), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 4), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 5), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 6), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); dst += stride; tmp = unpack_to_s16_l(vec_vsx_ld(0, dst)); val = vec_sub(vec_add(vec_splat(l, 7), a), tl); vec_vsx_st(vec_packsu(val, tmp), 0, dst); } #endif static void tm_predictor_16x8(uint8_t *dst, const ptrdiff_t stride, int16x8_t l, int16x8_t ah, int16x8_t al, int16x8_t tl) { int16x8_t vh, vl, ls; ls = vec_splat(l, 0); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 1); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 2); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 3); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 4); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 5); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 6); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); dst += stride; ls = vec_splat(l, 7); vh = vec_sub(vec_add(ls, ah), tl); vl = vec_sub(vec_add(ls, al), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); } void vpx_tm_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); const uint8x16_t l = vec_vsx_ld(0, left); const int16x8_t lh = unpack_to_s16_h(l); const int16x8_t ll = unpack_to_s16_l(l); const uint8x16_t a = vec_vsx_ld(0, above); const int16x8_t ah = unpack_to_s16_h(a); const int16x8_t al = unpack_to_s16_l(a); tm_predictor_16x8(dst, stride, lh, ah, al, tl); dst += stride * 8; tm_predictor_16x8(dst, stride, ll, ah, al, tl); } static INLINE void tm_predictor_32x1(uint8_t *dst, const int16x8_t ls, const int16x8_t a0h, const int16x8_t a0l, const int16x8_t a1h, const int16x8_t a1l, const int16x8_t tl) { int16x8_t vh, vl; vh = vec_sub(vec_add(ls, a0h), tl); vl = vec_sub(vec_add(ls, a0l), tl); vec_vsx_st(vec_packsu(vh, vl), 0, dst); vh = vec_sub(vec_add(ls, a1h), tl); vl = vec_sub(vec_add(ls, a1l), tl); vec_vsx_st(vec_packsu(vh, vl), 16, dst); } static void tm_predictor_32x8(uint8_t *dst, const ptrdiff_t stride, const int16x8_t l, const uint8x16_t a0, const uint8x16_t a1, const int16x8_t tl) { const int16x8_t a0h = unpack_to_s16_h(a0); const int16x8_t a0l = unpack_to_s16_l(a0); const int16x8_t a1h = unpack_to_s16_h(a1); const int16x8_t a1l = unpack_to_s16_l(a1); tm_predictor_32x1(dst, vec_splat(l, 0), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 1), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 2), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 3), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 4), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 5), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 6), a0h, a0l, a1h, a1l, tl); dst += stride; tm_predictor_32x1(dst, vec_splat(l, 7), a0h, a0l, a1h, a1l, tl); } void vpx_tm_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const int16x8_t tl = unpack_to_s16_h(vec_splat(vec_vsx_ld(-1, above), 0)); const uint8x16_t l0 = vec_vsx_ld(0, left); const uint8x16_t l1 = vec_vsx_ld(16, left); const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t a1 = vec_vsx_ld(16, above); tm_predictor_32x8(dst, stride, unpack_to_s16_h(l0), a0, a1, tl); dst += stride * 8; tm_predictor_32x8(dst, stride, unpack_to_s16_l(l0), a0, a1, tl); dst += stride * 8; tm_predictor_32x8(dst, stride, unpack_to_s16_h(l1), a0, a1, tl); dst += stride * 8; tm_predictor_32x8(dst, stride, unpack_to_s16_l(l1), a0, a1, tl); } static INLINE void dc_fill_predictor_8x8(uint8_t *dst, const ptrdiff_t stride, const uint8x16_t val) { int i; for (i = 0; i < 8; i++, dst += stride) { const uint8x16_t d = vec_vsx_ld(0, dst); vec_vsx_st(xxpermdi(val, d, 1), 0, dst); } } static INLINE void dc_fill_predictor_16x16(uint8_t *dst, const ptrdiff_t stride, const uint8x16_t val) { int i; for (i = 0; i < 16; i++, dst += stride) { vec_vsx_st(val, 0, dst); } } void vpx_dc_128_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7)); (void)above; (void)left; dc_fill_predictor_16x16(dst, stride, v128); } static INLINE void dc_fill_predictor_32x32(uint8_t *dst, const ptrdiff_t stride, const uint8x16_t val) { int i; for (i = 0; i < 32; i++, dst += stride) { vec_vsx_st(val, 0, dst); vec_vsx_st(val, 16, dst); } } void vpx_dc_128_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t v128 = vec_sl(vec_splat_u8(1), vec_splat_u8(7)); (void)above; (void)left; dc_fill_predictor_32x32(dst, stride, v128); } static uint8x16_t avg16(const uint8_t *values) { const int32x4_t sum4s = (int32x4_t)vec_sum4s(vec_vsx_ld(0, values), vec_splat_u32(0)); const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, vec_splat_s32(8)); const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4)); return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } void vpx_dc_left_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { (void)above; dc_fill_predictor_16x16(dst, stride, avg16(left)); } void vpx_dc_top_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { (void)left; dc_fill_predictor_16x16(dst, stride, avg16(above)); } static uint8x16_t avg32(const uint8_t *values) { const uint8x16_t v0 = vec_vsx_ld(0, values); const uint8x16_t v1 = vec_vsx_ld(16, values); const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4)); const int32x4_t sum4s = (int32x4_t)vec_sum4s(v0, vec_sum4s(v1, vec_splat_u32(0))); const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16); const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5)); return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } void vpx_dc_left_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { (void)above; dc_fill_predictor_32x32(dst, stride, avg32(left)); } void vpx_dc_top_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { (void)left; dc_fill_predictor_32x32(dst, stride, avg32(above)); } // TODO(crbug.com/webm/1522): Fix test failures. #if 0 static uint8x16_t dc_avg8(const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t l0 = vec_vsx_ld(0, left); const int32x4_t sum4s = (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0))); const int32x4_t sum4s8 = xxpermdi(sum4s, vec_splat_s32(0), 1); const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s8, vec_splat_s32(8)); const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(4)); return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } #endif static uint8x16_t dc_avg16(const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t l0 = vec_vsx_ld(0, left); const int32x4_t v16 = vec_sl(vec_splat_s32(1), vec_splat_u32(4)); const int32x4_t sum4s = (int32x4_t)vec_sum4s(l0, vec_sum4s(a0, vec_splat_u32(0))); const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v16); const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(5)); return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } // TODO(crbug.com/webm/1522): Fix test failures. #if 0 void vpx_dc_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { dc_fill_predictor_8x8(dst, stride, dc_avg8(above, left)); } #endif void vpx_dc_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { dc_fill_predictor_16x16(dst, stride, dc_avg16(above, left)); } static uint8x16_t dc_avg32(const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t a1 = vec_vsx_ld(16, above); const uint8x16_t l0 = vec_vsx_ld(0, left); const uint8x16_t l1 = vec_vsx_ld(16, left); const int32x4_t v32 = vec_sl(vec_splat_s32(1), vec_splat_u32(5)); const uint32x4_t a_sum = vec_sum4s(a0, vec_sum4s(a1, vec_splat_u32(0))); const int32x4_t sum4s = (int32x4_t)vec_sum4s(l0, vec_sum4s(l1, a_sum)); const uint32x4_t sum = (uint32x4_t)vec_sums(sum4s, v32); const uint32x4_t avg = (uint32x4_t)vec_sr(sum, vec_splat_u32(6)); return vec_splat(vec_pack(vec_pack(avg, vec_splat_u32(0)), vec_splat_u16(0)), 3); } void vpx_dc_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { dc_fill_predictor_32x32(dst, stride, dc_avg32(above, left)); } static uint8x16_t avg3(const uint8x16_t a, const uint8x16_t b, const uint8x16_t c) { const uint8x16_t ac = vec_adds(vec_and(a, c), vec_sr(vec_xor(a, c), vec_splat_u8(1))); return vec_avg(ac, b); } // Workaround vec_sld/vec_xxsldi/vec_lsdoi being missing or broken. static const uint8x16_t sl1 = { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x10 }; // TODO(crbug.com/webm/1522): Fix test failures. #if 0 void vpx_d45_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t af = vec_vsx_ld(0, above); const uint8x16_t above_right = vec_splat(af, 7); const uint8x16_t a = xxpermdi(af, above_right, 1); const uint8x16_t b = vec_perm(a, above_right, sl1); const uint8x16_t c = vec_perm(b, above_right, sl1); uint8x16_t row = avg3(a, b, c); int i; (void)left; for (i = 0; i < 8; i++) { const uint8x16_t d = vec_vsx_ld(0, dst); vec_vsx_st(xxpermdi(row, d, 1), 0, dst); dst += stride; row = vec_perm(row, above_right, sl1); } } #endif void vpx_d45_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t a = vec_vsx_ld(0, above); const uint8x16_t above_right = vec_splat(a, 15); const uint8x16_t b = vec_perm(a, above_right, sl1); const uint8x16_t c = vec_perm(b, above_right, sl1); uint8x16_t row = avg3(a, b, c); int i; (void)left; for (i = 0; i < 16; i++) { vec_vsx_st(row, 0, dst); dst += stride; row = vec_perm(row, above_right, sl1); } } void vpx_d45_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t a1 = vec_vsx_ld(16, above); const uint8x16_t above_right = vec_splat(a1, 15); const uint8x16_t b0 = vec_perm(a0, a1, sl1); const uint8x16_t b1 = vec_perm(a1, above_right, sl1); const uint8x16_t c0 = vec_perm(b0, b1, sl1); const uint8x16_t c1 = vec_perm(b1, above_right, sl1); uint8x16_t row0 = avg3(a0, b0, c0); uint8x16_t row1 = avg3(a1, b1, c1); int i; (void)left; for (i = 0; i < 32; i++) { vec_vsx_st(row0, 0, dst); vec_vsx_st(row1, 16, dst); dst += stride; row0 = vec_perm(row0, row1, sl1); row1 = vec_perm(row1, above_right, sl1); } } // TODO(crbug.com/webm/1522): Fix test failures. #if 0 void vpx_d63_predictor_8x8_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t af = vec_vsx_ld(0, above); const uint8x16_t above_right = vec_splat(af, 9); const uint8x16_t a = xxpermdi(af, above_right, 1); const uint8x16_t b = vec_perm(a, above_right, sl1); const uint8x16_t c = vec_perm(b, above_right, sl1); uint8x16_t row0 = vec_avg(a, b); uint8x16_t row1 = avg3(a, b, c); int i; (void)left; for (i = 0; i < 4; i++) { const uint8x16_t d0 = vec_vsx_ld(0, dst); const uint8x16_t d1 = vec_vsx_ld(0, dst + stride); vec_vsx_st(xxpermdi(row0, d0, 1), 0, dst); vec_vsx_st(xxpermdi(row1, d1, 1), 0, dst + stride); dst += stride * 2; row0 = vec_perm(row0, above_right, sl1); row1 = vec_perm(row1, above_right, sl1); } } #endif void vpx_d63_predictor_16x16_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t a1 = vec_vsx_ld(16, above); const uint8x16_t above_right = vec_splat(a1, 0); const uint8x16_t b = vec_perm(a0, above_right, sl1); const uint8x16_t c = vec_perm(b, above_right, sl1); uint8x16_t row0 = vec_avg(a0, b); uint8x16_t row1 = avg3(a0, b, c); int i; (void)left; for (i = 0; i < 8; i++) { vec_vsx_st(row0, 0, dst); vec_vsx_st(row1, 0, dst + stride); dst += stride * 2; row0 = vec_perm(row0, above_right, sl1); row1 = vec_perm(row1, above_right, sl1); } } void vpx_d63_predictor_32x32_vsx(uint8_t *dst, ptrdiff_t stride, const uint8_t *above, const uint8_t *left) { const uint8x16_t a0 = vec_vsx_ld(0, above); const uint8x16_t a1 = vec_vsx_ld(16, above); const uint8x16_t a2 = vec_vsx_ld(32, above); const uint8x16_t above_right = vec_splat(a2, 0); const uint8x16_t b0 = vec_perm(a0, a1, sl1); const uint8x16_t b1 = vec_perm(a1, above_right, sl1); const uint8x16_t c0 = vec_perm(b0, b1, sl1); const uint8x16_t c1 = vec_perm(b1, above_right, sl1); uint8x16_t row0_0 = vec_avg(a0, b0); uint8x16_t row0_1 = vec_avg(a1, b1); uint8x16_t row1_0 = avg3(a0, b0, c0); uint8x16_t row1_1 = avg3(a1, b1, c1); int i; (void)left; for (i = 0; i < 16; i++) { vec_vsx_st(row0_0, 0, dst); vec_vsx_st(row0_1, 16, dst); vec_vsx_st(row1_0, 0, dst + stride); vec_vsx_st(row1_1, 16, dst + stride); dst += stride * 2; row0_0 = vec_perm(row0_0, row0_1, sl1); row0_1 = vec_perm(row0_1, above_right, sl1); row1_0 = vec_perm(row1_0, row1_1, sl1); row1_1 = vec_perm(row1_1, above_right, sl1); } } libvpx-1.8.2/vpx_dsp/ppc/inv_txfm_vsx.c000066400000000000000000002763441357355204000202150ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vpx_dsp/ppc/bitdepth_conversion_vsx.h" #include "vpx_dsp/ppc/types_vsx.h" #include "vpx_dsp/ppc/inv_txfm_vsx.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/inv_txfm.h" static const int16x8_t cospi1_v = { 16364, 16364, 16364, 16364, 16364, 16364, 16364, 16364 }; static const int16x8_t cospi1m_v = { -16364, -16364, -16364, -16364, -16364, -16364, -16364, -16364 }; static const int16x8_t cospi2_v = { 16305, 16305, 16305, 16305, 16305, 16305, 16305, 16305 }; static const int16x8_t cospi2m_v = { -16305, -16305, -16305, -16305, -16305, -16305, -16305, -16305 }; static const int16x8_t cospi3_v = { 16207, 16207, 16207, 16207, 16207, 16207, 16207, 16207 }; static const int16x8_t cospi4_v = { 16069, 16069, 16069, 16069, 16069, 16069, 16069, 16069 }; static const int16x8_t cospi4m_v = { -16069, -16069, -16069, -16069, -16069, -16069, -16069, -16069 }; static const int16x8_t cospi5_v = { 15893, 15893, 15893, 15893, 15893, 15893, 15893, 15893 }; static const int16x8_t cospi5m_v = { -15893, -15893, -15893, -15893, -15893, -15893, -15893, -15893 }; static const int16x8_t cospi6_v = { 15679, 15679, 15679, 15679, 15679, 15679, 15679, 15679 }; static const int16x8_t cospi7_v = { 15426, 15426, 15426, 15426, 15426, 15426, 15426, 15426 }; static const int16x8_t cospi8_v = { 15137, 15137, 15137, 15137, 15137, 15137, 15137, 15137 }; static const int16x8_t cospi8m_v = { -15137, -15137, -15137, -15137, -15137, -15137, -15137, -15137 }; static const int16x8_t cospi9_v = { 14811, 14811, 14811, 14811, 14811, 14811, 14811, 14811 }; static const int16x8_t cospi9m_v = { -14811, -14811, -14811, -14811, -14811, -14811, -14811, -14811 }; static const int16x8_t cospi10_v = { 14449, 14449, 14449, 14449, 14449, 14449, 14449, 14449 }; static const int16x8_t cospi10m_v = { -14449, -14449, -14449, -14449, -14449, -14449, -14449, -14449 }; static const int16x8_t cospi11_v = { 14053, 14053, 14053, 14053, 14053, 14053, 14053, 14053 }; static const int16x8_t cospi12_v = { 13623, 13623, 13623, 13623, 13623, 13623, 13623, 13623 }; static const int16x8_t cospi12m_v = { -13623, -13623, -13623, -13623, -13623, -13623, -13623, -13623 }; static const int16x8_t cospi13_v = { 13160, 13160, 13160, 13160, 13160, 13160, 13160, 13160 }; static const int16x8_t cospi13m_v = { -13160, -13160, -13160, -13160, -13160, -13160, -13160, -13160 }; static const int16x8_t cospi14_v = { 12665, 12665, 12665, 12665, 12665, 12665, 12665, 12665 }; static const int16x8_t cospi15_v = { 12140, 12140, 12140, 12140, 12140, 12140, 12140, 12140 }; static const int16x8_t cospi16_v = { 11585, 11585, 11585, 11585, 11585, 11585, 11585, 11585 }; static const int16x8_t cospi16m_v = { -11585, -11585, -11585, -11585, -11585, -11585, -11585, -11585 }; static const int16x8_t cospi17_v = { 11003, 11003, 11003, 11003, 11003, 11003, 11003, 11003 }; static const int16x8_t cospi17m_v = { -11003, -11003, -11003, -11003, -11003, -11003, -11003, -11003 }; static const int16x8_t cospi18_v = { 10394, 10394, 10394, 10394, 10394, 10394, 10394, 10394 }; static const int16x8_t cospi18m_v = { -10394, -10394, -10394, -10394, -10394, -10394, -10394, -10394 }; static const int16x8_t cospi19_v = { 9760, 9760, 9760, 9760, 9760, 9760, 9760, 9760 }; static const int16x8_t cospi20_v = { 9102, 9102, 9102, 9102, 9102, 9102, 9102, 9102 }; static const int16x8_t cospi20m_v = { -9102, -9102, -9102, -9102, -9102, -9102, -9102, -9102 }; static const int16x8_t cospi21_v = { 8423, 8423, 8423, 8423, 8423, 8423, 8423, 8423 }; static const int16x8_t cospi21m_v = { -8423, -8423, -8423, -8423, -8423, -8423, -8423, -8423 }; static const int16x8_t cospi22_v = { 7723, 7723, 7723, 7723, 7723, 7723, 7723, 7723 }; static const int16x8_t cospi23_v = { 7005, 7005, 7005, 7005, 7005, 7005, 7005, 7005 }; static const int16x8_t cospi24_v = { 6270, 6270, 6270, 6270, 6270, 6270, 6270, 6270 }; static const int16x8_t cospi24m_v = { -6270, -6270, -6270, -6270, -6270, -6270, -6270, -6270 }; static const int16x8_t cospi25_v = { 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520 }; static const int16x8_t cospi25m_v = { -5520, -5520, -5520, -5520, -5520, -5520, -5520, -5520 }; static const int16x8_t cospi26_v = { 4756, 4756, 4756, 4756, 4756, 4756, 4756, 4756 }; static const int16x8_t cospi26m_v = { -4756, -4756, -4756, -4756, -4756, -4756, -4756, -4756 }; static const int16x8_t cospi27_v = { 3981, 3981, 3981, 3981, 3981, 3981, 3981, 3981 }; static const int16x8_t cospi28_v = { 3196, 3196, 3196, 3196, 3196, 3196, 3196, 3196 }; static const int16x8_t cospi28m_v = { -3196, -3196, -3196, -3196, -3196, -3196, -3196, -3196 }; static const int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 }; static const int16x8_t cospi29m_v = { -2404, -2404, -2404, -2404, -2404, -2404, -2404, -2404 }; static const int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 }; static const int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; static const int16x8_t sinpi_1_9_v = { 5283, 5283, 5283, 5283, 5283, 5283, 5283, 5283 }; static const int16x8_t sinpi_2_9_v = { 9929, 9929, 9929, 9929, 9929, 9929, 9929, 9929 }; static const int16x8_t sinpi_3_9_v = { 13377, 13377, 13377, 13377, 13377, 13377, 13377, 13377 }; static const int16x8_t sinpi_4_9_v = { 15212, 15212, 15212, 15212, 15212, 15212, 15212, 15212 }; static uint8x16_t tr8_mask0 = { 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; static uint8x16_t tr8_mask1 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; #define ROUND_SHIFT_INIT \ const int32x4_t shift = vec_sl(vec_splat_s32(1), vec_splat_u32(13)); \ const uint32x4_t shift14 = vec_splat_u32(14); #define DCT_CONST_ROUND_SHIFT(vec) vec = vec_sra(vec_add(vec, shift), shift14); #define PIXEL_ADD_INIT \ int16x8_t add8 = vec_splat_s16(8); \ uint16x8_t shift4 = vec_splat_u16(4); #define PIXEL_ADD4(out, in) out = vec_sra(vec_add(in, add8), shift4); #define IDCT4(in0, in1, out0, out1) \ t0 = vec_add(in0, in1); \ t1 = vec_sub(in0, in1); \ tmp16_0 = vec_mergeh(t0, t1); \ temp1 = vec_sra(vec_add(vec_mule(tmp16_0, cospi16_v), shift), shift14); \ temp2 = vec_sra(vec_add(vec_mulo(tmp16_0, cospi16_v), shift), shift14); \ \ tmp16_0 = vec_mergel(in0, in1); \ temp3 = vec_sub(vec_mule(tmp16_0, cospi24_v), vec_mulo(tmp16_0, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp3); \ temp4 = vec_add(vec_mule(tmp16_0, cospi8_v), vec_mulo(tmp16_0, cospi24_v)); \ DCT_CONST_ROUND_SHIFT(temp4); \ \ step0 = vec_packs(temp1, temp2); \ step1 = vec_packs(temp4, temp3); \ out0 = vec_add(step0, step1); \ out1 = vec_sub(step0, step1); \ out1 = vec_perm(out1, out1, mask0); #define PACK_STORE(v0, v1) \ tmp16_0 = vec_add(vec_perm(d_u0, d_u1, tr8_mask0), v0); \ tmp16_1 = vec_add(vec_perm(d_u2, d_u3, tr8_mask0), v1); \ output_v = vec_packsu(tmp16_0, tmp16_1); \ \ vec_vsx_st(output_v, 0, tmp_dest); \ for (i = 0; i < 4; i++) \ for (j = 0; j < 4; j++) dest[j * stride + i] = tmp_dest[j * 4 + i]; void vpx_round_store4x4_vsx(int16x8_t *in, int16x8_t *out, uint8_t *dest, int stride) { int i, j; uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest); uint8x16_t zerov = vec_splat_u8(0); int16x8_t d_u0 = (int16x8_t)vec_mergeh(dest0, zerov); int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov); int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov); int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov); int16x8_t tmp16_0, tmp16_1; uint8x16_t output_v; uint8_t tmp_dest[16]; PIXEL_ADD_INIT; PIXEL_ADD4(out[0], in[0]); PIXEL_ADD4(out[1], in[1]); PACK_STORE(out[0], out[1]); } void vpx_idct4_vsx(int16x8_t *in, int16x8_t *out) { int32x4_t temp1, temp2, temp3, temp4; int16x8_t step0, step1, tmp16_0; uint8x16_t mask0 = { 0x8, 0x9, 0xA, 0xB, 0xC, 0xD, 0xE, 0xF, 0x0, 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7 }; int16x8_t t0 = vec_mergeh(in[0], in[1]); int16x8_t t1 = vec_mergel(in[0], in[1]); ROUND_SHIFT_INIT in[0] = vec_mergeh(t0, t1); in[1] = vec_mergel(t0, t1); IDCT4(in[0], in[1], out[0], out[1]); } void vpx_idct4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int16x8_t in[2], out[2]; in[0] = load_tran_low(0, input); in[1] = load_tran_low(8 * sizeof(*input), input); // Rows vpx_idct4_vsx(in, out); // Columns vpx_idct4_vsx(out, in); vpx_round_store4x4_vsx(in, out, dest, stride); } #define TRANSPOSE8x8(in0, in1, in2, in3, in4, in5, in6, in7, out0, out1, out2, \ out3, out4, out5, out6, out7) \ out0 = vec_mergeh(in0, in1); \ out1 = vec_mergel(in0, in1); \ out2 = vec_mergeh(in2, in3); \ out3 = vec_mergel(in2, in3); \ out4 = vec_mergeh(in4, in5); \ out5 = vec_mergel(in4, in5); \ out6 = vec_mergeh(in6, in7); \ out7 = vec_mergel(in6, in7); \ in0 = (int16x8_t)vec_mergeh((int32x4_t)out0, (int32x4_t)out2); \ in1 = (int16x8_t)vec_mergel((int32x4_t)out0, (int32x4_t)out2); \ in2 = (int16x8_t)vec_mergeh((int32x4_t)out1, (int32x4_t)out3); \ in3 = (int16x8_t)vec_mergel((int32x4_t)out1, (int32x4_t)out3); \ in4 = (int16x8_t)vec_mergeh((int32x4_t)out4, (int32x4_t)out6); \ in5 = (int16x8_t)vec_mergel((int32x4_t)out4, (int32x4_t)out6); \ in6 = (int16x8_t)vec_mergeh((int32x4_t)out5, (int32x4_t)out7); \ in7 = (int16x8_t)vec_mergel((int32x4_t)out5, (int32x4_t)out7); \ out0 = vec_perm(in0, in4, tr8_mask0); \ out1 = vec_perm(in0, in4, tr8_mask1); \ out2 = vec_perm(in1, in5, tr8_mask0); \ out3 = vec_perm(in1, in5, tr8_mask1); \ out4 = vec_perm(in2, in6, tr8_mask0); \ out5 = vec_perm(in2, in6, tr8_mask1); \ out6 = vec_perm(in3, in7, tr8_mask0); \ out7 = vec_perm(in3, in7, tr8_mask1); /* for the: temp1 = step[x] * cospi_q - step[y] * cospi_z * temp2 = step[x] * cospi_z + step[y] * cospi_q */ #define STEP8_0(inpt0, inpt1, outpt0, outpt1, cospi0, cospi1) \ tmp16_0 = vec_mergeh(inpt0, inpt1); \ tmp16_1 = vec_mergel(inpt0, inpt1); \ temp10 = vec_sub(vec_mule(tmp16_0, cospi0), vec_mulo(tmp16_0, cospi1)); \ temp11 = vec_sub(vec_mule(tmp16_1, cospi0), vec_mulo(tmp16_1, cospi1)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt0 = vec_packs(temp10, temp11); \ temp10 = vec_add(vec_mule(tmp16_0, cospi1), vec_mulo(tmp16_0, cospi0)); \ temp11 = vec_add(vec_mule(tmp16_1, cospi1), vec_mulo(tmp16_1, cospi0)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt1 = vec_packs(temp10, temp11); #define STEP8_1(inpt0, inpt1, outpt0, outpt1, cospi) \ tmp16_2 = vec_sub(inpt0, inpt1); \ tmp16_3 = vec_add(inpt0, inpt1); \ tmp16_0 = vec_mergeh(tmp16_2, tmp16_3); \ tmp16_1 = vec_mergel(tmp16_2, tmp16_3); \ temp10 = vec_mule(tmp16_0, cospi); \ temp11 = vec_mule(tmp16_1, cospi); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt0 = vec_packs(temp10, temp11); \ temp10 = vec_mulo(tmp16_0, cospi); \ temp11 = vec_mulo(tmp16_1, cospi); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt1 = vec_packs(temp10, temp11); #define IDCT8(in0, in1, in2, in3, in4, in5, in6, in7) \ /* stage 1 */ \ step0 = in0; \ step2 = in4; \ step1 = in2; \ step3 = in6; \ \ STEP8_0(in1, in7, step4, step7, cospi28_v, cospi4_v); \ STEP8_0(in5, in3, step5, step6, cospi12_v, cospi20_v); \ \ /* stage 2 */ \ STEP8_1(step0, step2, in1, in0, cospi16_v); \ STEP8_0(step1, step3, in2, in3, cospi24_v, cospi8_v); \ in4 = vec_add(step4, step5); \ in5 = vec_sub(step4, step5); \ in6 = vec_sub(step7, step6); \ in7 = vec_add(step6, step7); \ \ /* stage 3 */ \ step0 = vec_add(in0, in3); \ step1 = vec_add(in1, in2); \ step2 = vec_sub(in1, in2); \ step3 = vec_sub(in0, in3); \ step4 = in4; \ STEP8_1(in6, in5, step5, step6, cospi16_v); \ step7 = in7; \ \ /* stage 4 */ \ in0 = vec_add(step0, step7); \ in1 = vec_add(step1, step6); \ in2 = vec_add(step2, step5); \ in3 = vec_add(step3, step4); \ in4 = vec_sub(step3, step4); \ in5 = vec_sub(step2, step5); \ in6 = vec_sub(step1, step6); \ in7 = vec_sub(step0, step7); #define PIXEL_ADD(in, out, add, shiftx) \ out = vec_add(vec_sra(vec_add(in, add), shiftx), out); void vpx_idct8_vsx(int16x8_t *in, int16x8_t *out) { int16x8_t step0, step1, step2, step3, step4, step5, step6, step7; int16x8_t tmp16_0, tmp16_1, tmp16_2, tmp16_3; int32x4_t temp10, temp11; ROUND_SHIFT_INIT; TRANSPOSE8x8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]); IDCT8(out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]); } void vpx_round_store8x8_vsx(int16x8_t *in, uint8_t *dest, int stride) { uint8x16_t zerov = vec_splat_u8(0); uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest); uint8x16_t dest4 = vec_vsx_ld(4 * stride, dest); uint8x16_t dest5 = vec_vsx_ld(5 * stride, dest); uint8x16_t dest6 = vec_vsx_ld(6 * stride, dest); uint8x16_t dest7 = vec_vsx_ld(7 * stride, dest); int16x8_t d_u0 = (int16x8_t)vec_mergeh(dest0, zerov); int16x8_t d_u1 = (int16x8_t)vec_mergeh(dest1, zerov); int16x8_t d_u2 = (int16x8_t)vec_mergeh(dest2, zerov); int16x8_t d_u3 = (int16x8_t)vec_mergeh(dest3, zerov); int16x8_t d_u4 = (int16x8_t)vec_mergeh(dest4, zerov); int16x8_t d_u5 = (int16x8_t)vec_mergeh(dest5, zerov); int16x8_t d_u6 = (int16x8_t)vec_mergeh(dest6, zerov); int16x8_t d_u7 = (int16x8_t)vec_mergeh(dest7, zerov); int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(1)); uint16x8_t shift5 = vec_splat_u16(5); uint8x16_t output0, output1, output2, output3; PIXEL_ADD(in[0], d_u0, add, shift5); PIXEL_ADD(in[1], d_u1, add, shift5); PIXEL_ADD(in[2], d_u2, add, shift5); PIXEL_ADD(in[3], d_u3, add, shift5); PIXEL_ADD(in[4], d_u4, add, shift5); PIXEL_ADD(in[5], d_u5, add, shift5); PIXEL_ADD(in[6], d_u6, add, shift5); PIXEL_ADD(in[7], d_u7, add, shift5); output0 = vec_packsu(d_u0, d_u1); output1 = vec_packsu(d_u2, d_u3); output2 = vec_packsu(d_u4, d_u5); output3 = vec_packsu(d_u6, d_u7); vec_vsx_st(xxpermdi(output0, dest0, 1), 0, dest); vec_vsx_st(xxpermdi(output0, dest1, 3), stride, dest); vec_vsx_st(xxpermdi(output1, dest2, 1), 2 * stride, dest); vec_vsx_st(xxpermdi(output1, dest3, 3), 3 * stride, dest); vec_vsx_st(xxpermdi(output2, dest4, 1), 4 * stride, dest); vec_vsx_st(xxpermdi(output2, dest5, 3), 5 * stride, dest); vec_vsx_st(xxpermdi(output3, dest6, 1), 6 * stride, dest); vec_vsx_st(xxpermdi(output3, dest7, 3), 7 * stride, dest); } void vpx_idct8x8_64_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int16x8_t src[8], tmp[8]; src[0] = load_tran_low(0, input); src[1] = load_tran_low(8 * sizeof(*input), input); src[2] = load_tran_low(16 * sizeof(*input), input); src[3] = load_tran_low(24 * sizeof(*input), input); src[4] = load_tran_low(32 * sizeof(*input), input); src[5] = load_tran_low(40 * sizeof(*input), input); src[6] = load_tran_low(48 * sizeof(*input), input); src[7] = load_tran_low(56 * sizeof(*input), input); vpx_idct8_vsx(src, tmp); vpx_idct8_vsx(tmp, src); vpx_round_store8x8_vsx(src, dest, stride); } #define STEP16_1(inpt0, inpt1, outpt0, outpt1, cospi) \ tmp16_0 = vec_mergeh(inpt0, inpt1); \ tmp16_1 = vec_mergel(inpt0, inpt1); \ temp10 = vec_mule(tmp16_0, cospi); \ temp11 = vec_mule(tmp16_1, cospi); \ temp20 = vec_mulo(tmp16_0, cospi); \ temp21 = vec_mulo(tmp16_1, cospi); \ temp30 = vec_sub(temp10, temp20); \ temp10 = vec_add(temp10, temp20); \ temp20 = vec_sub(temp11, temp21); \ temp21 = vec_add(temp11, temp21); \ DCT_CONST_ROUND_SHIFT(temp30); \ DCT_CONST_ROUND_SHIFT(temp20); \ outpt0 = vec_packs(temp30, temp20); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp21); \ outpt1 = vec_packs(temp10, temp21); #define IDCT16(in0, in1, in2, in3, in4, in5, in6, in7, in8, in9, inA, inB, \ inC, inD, inE, inF, out0, out1, out2, out3, out4, out5, out6, \ out7, out8, out9, outA, outB, outC, outD, outE, outF) \ /* stage 1 */ \ /* out0 = in0; */ \ out1 = in8; \ out2 = in4; \ out3 = inC; \ out4 = in2; \ out5 = inA; \ out6 = in6; \ out7 = inE; \ out8 = in1; \ out9 = in9; \ outA = in5; \ outB = inD; \ outC = in3; \ outD = inB; \ outE = in7; \ outF = inF; \ \ /* stage 2 */ \ /* in0 = out0; */ \ in1 = out1; \ in2 = out2; \ in3 = out3; \ in4 = out4; \ in5 = out5; \ in6 = out6; \ in7 = out7; \ \ STEP8_0(out8, outF, in8, inF, cospi30_v, cospi2_v); \ STEP8_0(out9, outE, in9, inE, cospi14_v, cospi18_v); \ STEP8_0(outA, outD, inA, inD, cospi22_v, cospi10_v); \ STEP8_0(outB, outC, inB, inC, cospi6_v, cospi26_v); \ \ /* stage 3 */ \ out0 = in0; \ out1 = in1; \ out2 = in2; \ out3 = in3; \ \ STEP8_0(in4, in7, out4, out7, cospi28_v, cospi4_v); \ STEP8_0(in5, in6, out5, out6, cospi12_v, cospi20_v); \ \ out8 = vec_add(in8, in9); \ out9 = vec_sub(in8, in9); \ outA = vec_sub(inB, inA); \ outB = vec_add(inA, inB); \ outC = vec_add(inC, inD); \ outD = vec_sub(inC, inD); \ outE = vec_sub(inF, inE); \ outF = vec_add(inE, inF); \ \ /* stage 4 */ \ STEP16_1(out0, out1, in1, in0, cospi16_v); \ STEP8_0(out2, out3, in2, in3, cospi24_v, cospi8_v); \ in4 = vec_add(out4, out5); \ in5 = vec_sub(out4, out5); \ in6 = vec_sub(out7, out6); \ in7 = vec_add(out6, out7); \ \ in8 = out8; \ inF = outF; \ tmp16_0 = vec_mergeh(out9, outE); \ tmp16_1 = vec_mergel(out9, outE); \ temp10 = vec_sub(vec_mulo(tmp16_0, cospi24_v), vec_mule(tmp16_0, cospi8_v)); \ temp11 = vec_sub(vec_mulo(tmp16_1, cospi24_v), vec_mule(tmp16_1, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ in9 = vec_packs(temp10, temp11); \ temp10 = vec_add(vec_mule(tmp16_0, cospi24_v), vec_mulo(tmp16_0, cospi8_v)); \ temp11 = vec_add(vec_mule(tmp16_1, cospi24_v), vec_mulo(tmp16_1, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ inE = vec_packs(temp10, temp11); \ \ tmp16_0 = vec_mergeh(outA, outD); \ tmp16_1 = vec_mergel(outA, outD); \ temp10 = \ vec_sub(vec_mule(tmp16_0, cospi24m_v), vec_mulo(tmp16_0, cospi8_v)); \ temp11 = \ vec_sub(vec_mule(tmp16_1, cospi24m_v), vec_mulo(tmp16_1, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ inA = vec_packs(temp10, temp11); \ temp10 = vec_sub(vec_mulo(tmp16_0, cospi24_v), vec_mule(tmp16_0, cospi8_v)); \ temp11 = vec_sub(vec_mulo(tmp16_1, cospi24_v), vec_mule(tmp16_1, cospi8_v)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ inD = vec_packs(temp10, temp11); \ \ inB = outB; \ inC = outC; \ \ /* stage 5 */ \ out0 = vec_add(in0, in3); \ out1 = vec_add(in1, in2); \ out2 = vec_sub(in1, in2); \ out3 = vec_sub(in0, in3); \ out4 = in4; \ STEP16_1(in6, in5, out5, out6, cospi16_v); \ out7 = in7; \ \ out8 = vec_add(in8, inB); \ out9 = vec_add(in9, inA); \ outA = vec_sub(in9, inA); \ outB = vec_sub(in8, inB); \ outC = vec_sub(inF, inC); \ outD = vec_sub(inE, inD); \ outE = vec_add(inD, inE); \ outF = vec_add(inC, inF); \ \ /* stage 6 */ \ in0 = vec_add(out0, out7); \ in1 = vec_add(out1, out6); \ in2 = vec_add(out2, out5); \ in3 = vec_add(out3, out4); \ in4 = vec_sub(out3, out4); \ in5 = vec_sub(out2, out5); \ in6 = vec_sub(out1, out6); \ in7 = vec_sub(out0, out7); \ in8 = out8; \ in9 = out9; \ STEP16_1(outD, outA, inA, inD, cospi16_v); \ STEP16_1(outC, outB, inB, inC, cospi16_v); \ inE = outE; \ inF = outF; \ \ /* stage 7 */ \ out0 = vec_add(in0, inF); \ out1 = vec_add(in1, inE); \ out2 = vec_add(in2, inD); \ out3 = vec_add(in3, inC); \ out4 = vec_add(in4, inB); \ out5 = vec_add(in5, inA); \ out6 = vec_add(in6, in9); \ out7 = vec_add(in7, in8); \ out8 = vec_sub(in7, in8); \ out9 = vec_sub(in6, in9); \ outA = vec_sub(in5, inA); \ outB = vec_sub(in4, inB); \ outC = vec_sub(in3, inC); \ outD = vec_sub(in2, inD); \ outE = vec_sub(in1, inE); \ outF = vec_sub(in0, inF); #define PIXEL_ADD_STORE16(in0, in1, dst, offset) \ d_uh = (int16x8_t)vec_mergeh(dst, zerov); \ d_ul = (int16x8_t)vec_mergel(dst, zerov); \ PIXEL_ADD(in0, d_uh, add, shift6); \ PIXEL_ADD(in1, d_ul, add, shift6); \ vec_vsx_st(vec_packsu(d_uh, d_ul), offset, dest); static void half_idct16x8_vsx(int16x8_t *src) { int16x8_t tmp0[8], tmp1[8]; int32x4_t temp10, temp11, temp20, temp21, temp30; int16x8_t tmp16_0, tmp16_1; ROUND_SHIFT_INIT; TRANSPOSE8x8(src[0], src[2], src[4], src[6], src[8], src[10], src[12], src[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7]); TRANSPOSE8x8(src[1], src[3], src[5], src[7], src[9], src[11], src[13], src[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7]); IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], src[0], src[2], src[4], src[6], src[8], src[10], src[12], src[14], src[1], src[3], src[5], src[7], src[9], src[11], src[13], src[15]); } void vpx_idct16_vsx(int16x8_t *src0, int16x8_t *src1) { int16x8_t tmp0[8], tmp1[8], tmp2[8], tmp3[8]; int32x4_t temp10, temp11, temp20, temp21, temp30; int16x8_t tmp16_0, tmp16_1; ROUND_SHIFT_INIT; TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7]); TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], src0[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7]); TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], src1[14], tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7]); TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], src1[15], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7]); IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], src0[14], src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], src1[14]); IDCT16(tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7], src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], src0[15], src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], src1[15]); } void vpx_round_store16x16_vsx(int16x8_t *src0, int16x8_t *src1, uint8_t *dest, int stride) { uint8x16_t destv[16]; int16x8_t d_uh, d_ul; uint8x16_t zerov = vec_splat_u8(0); uint16x8_t shift6 = vec_splat_u16(6); int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(2)); // load dest LOAD_INPUT16(vec_vsx_ld, dest, 0, stride, destv); PIXEL_ADD_STORE16(src0[0], src0[1], destv[0], 0); PIXEL_ADD_STORE16(src0[2], src0[3], destv[1], stride); PIXEL_ADD_STORE16(src0[4], src0[5], destv[2], 2 * stride); PIXEL_ADD_STORE16(src0[6], src0[7], destv[3], 3 * stride); PIXEL_ADD_STORE16(src0[8], src0[9], destv[4], 4 * stride); PIXEL_ADD_STORE16(src0[10], src0[11], destv[5], 5 * stride); PIXEL_ADD_STORE16(src0[12], src0[13], destv[6], 6 * stride); PIXEL_ADD_STORE16(src0[14], src0[15], destv[7], 7 * stride); PIXEL_ADD_STORE16(src1[0], src1[1], destv[8], 8 * stride); PIXEL_ADD_STORE16(src1[2], src1[3], destv[9], 9 * stride); PIXEL_ADD_STORE16(src1[4], src1[5], destv[10], 10 * stride); PIXEL_ADD_STORE16(src1[6], src1[7], destv[11], 11 * stride); PIXEL_ADD_STORE16(src1[8], src1[9], destv[12], 12 * stride); PIXEL_ADD_STORE16(src1[10], src1[11], destv[13], 13 * stride); PIXEL_ADD_STORE16(src1[12], src1[13], destv[14], 14 * stride); PIXEL_ADD_STORE16(src1[14], src1[15], destv[15], 15 * stride); } void vpx_idct16x16_256_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int16x8_t src0[16], src1[16]; int16x8_t tmp0[8], tmp1[8], tmp2[8], tmp3[8]; int32x4_t temp10, temp11, temp20, temp21, temp30; int16x8_t tmp16_0, tmp16_1; ROUND_SHIFT_INIT; LOAD_INPUT16(load_tran_low, input, 0, 8 * sizeof(*input), src0); LOAD_INPUT16(load_tran_low, input, 8 * 8 * 2 * sizeof(*input), 8 * sizeof(*input), src1); // transform rows // transform the upper half of 16x16 matrix half_idct16x8_vsx(src0); TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7]); TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], src0[15], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7]); // transform the lower half of 16x16 matrix half_idct16x8_vsx(src1); TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], src1[14], tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7]); TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], src1[15], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7]); // transform columns // left half first IDCT16(tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7], tmp2[0], tmp2[1], tmp2[2], tmp2[3], tmp2[4], tmp2[5], tmp2[6], tmp2[7], src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], src0[14], src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], src1[14]); // right half IDCT16(tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7], tmp3[0], tmp3[1], tmp3[2], tmp3[3], tmp3[4], tmp3[5], tmp3[6], tmp3[7], src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], src0[15], src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], src1[15]); vpx_round_store16x16_vsx(src0, src1, dest, stride); } #define LOAD_8x32(load, in00, in01, in02, in03, in10, in11, in12, in13, in20, \ in21, in22, in23, in30, in31, in32, in33, in40, in41, in42, \ in43, in50, in51, in52, in53, in60, in61, in62, in63, in70, \ in71, in72, in73, offset) \ /* load the first row from the 8x32 block*/ \ in00 = load(offset, input); \ in01 = load(offset + 16, input); \ in02 = load(offset + 2 * 16, input); \ in03 = load(offset + 3 * 16, input); \ \ in10 = load(offset + 4 * 16, input); \ in11 = load(offset + 5 * 16, input); \ in12 = load(offset + 6 * 16, input); \ in13 = load(offset + 7 * 16, input); \ \ in20 = load(offset + 8 * 16, input); \ in21 = load(offset + 9 * 16, input); \ in22 = load(offset + 10 * 16, input); \ in23 = load(offset + 11 * 16, input); \ \ in30 = load(offset + 12 * 16, input); \ in31 = load(offset + 13 * 16, input); \ in32 = load(offset + 14 * 16, input); \ in33 = load(offset + 15 * 16, input); \ \ in40 = load(offset + 16 * 16, input); \ in41 = load(offset + 17 * 16, input); \ in42 = load(offset + 18 * 16, input); \ in43 = load(offset + 19 * 16, input); \ \ in50 = load(offset + 20 * 16, input); \ in51 = load(offset + 21 * 16, input); \ in52 = load(offset + 22 * 16, input); \ in53 = load(offset + 23 * 16, input); \ \ in60 = load(offset + 24 * 16, input); \ in61 = load(offset + 25 * 16, input); \ in62 = load(offset + 26 * 16, input); \ in63 = load(offset + 27 * 16, input); \ \ /* load the last row from the 8x32 block*/ \ in70 = load(offset + 28 * 16, input); \ in71 = load(offset + 29 * 16, input); \ in72 = load(offset + 30 * 16, input); \ in73 = load(offset + 31 * 16, input); /* for the: temp1 = -step[x] * cospi_q + step[y] * cospi_z * temp2 = step[x] * cospi_z + step[y] * cospi_q */ #define STEP32(inpt0, inpt1, outpt0, outpt1, cospi0, cospi1) \ tmp16_0 = vec_mergeh(inpt0, inpt1); \ tmp16_1 = vec_mergel(inpt0, inpt1); \ temp10 = vec_sub(vec_mulo(tmp16_0, cospi1), vec_mule(tmp16_0, cospi0)); \ temp11 = vec_sub(vec_mulo(tmp16_1, cospi1), vec_mule(tmp16_1, cospi0)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt0 = vec_packs(temp10, temp11); \ temp10 = vec_add(vec_mule(tmp16_0, cospi1), vec_mulo(tmp16_0, cospi0)); \ temp11 = vec_add(vec_mule(tmp16_1, cospi1), vec_mulo(tmp16_1, cospi0)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt1 = vec_packs(temp10, temp11); /* for the: temp1 = -step[x] * cospi_q - step[y] * cospi_z * temp2 = -step[x] * cospi_z + step[y] * cospi_q */ #define STEP32_1(inpt0, inpt1, outpt0, outpt1, cospi0, cospi1, cospi1m) \ tmp16_0 = vec_mergeh(inpt0, inpt1); \ tmp16_1 = vec_mergel(inpt0, inpt1); \ temp10 = vec_sub(vec_mulo(tmp16_0, cospi1m), vec_mule(tmp16_0, cospi0)); \ temp11 = vec_sub(vec_mulo(tmp16_1, cospi1m), vec_mule(tmp16_1, cospi0)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt0 = vec_packs(temp10, temp11); \ temp10 = vec_sub(vec_mulo(tmp16_0, cospi0), vec_mule(tmp16_0, cospi1)); \ temp11 = vec_sub(vec_mulo(tmp16_1, cospi0), vec_mule(tmp16_1, cospi1)); \ DCT_CONST_ROUND_SHIFT(temp10); \ DCT_CONST_ROUND_SHIFT(temp11); \ outpt1 = vec_packs(temp10, temp11); #define IDCT32(in0, in1, in2, in3, out) \ \ /* stage 1 */ \ /* out[0][0] = in[0][0]; */ \ out[0][1] = in2[0]; \ out[0][2] = in1[0]; \ out[0][3] = in3[0]; \ out[0][4] = in0[4]; \ out[0][5] = in2[4]; \ out[0][6] = in1[4]; \ out[0][7] = in3[4]; \ out[1][0] = in0[2]; \ out[1][1] = in2[2]; \ out[1][2] = in1[2]; \ out[1][3] = in3[2]; \ out[1][4] = in0[6]; \ out[1][5] = in2[6]; \ out[1][6] = in1[6]; \ out[1][7] = in3[6]; \ \ STEP8_0(in0[1], in3[7], out[2][0], out[3][7], cospi31_v, cospi1_v); \ STEP8_0(in2[1], in1[7], out[2][1], out[3][6], cospi15_v, cospi17_v); \ STEP8_0(in1[1], in2[7], out[2][2], out[3][5], cospi23_v, cospi9_v); \ STEP8_0(in3[1], in0[7], out[2][3], out[3][4], cospi7_v, cospi25_v); \ STEP8_0(in0[5], in3[3], out[2][4], out[3][3], cospi27_v, cospi5_v); \ STEP8_0(in2[5], in1[3], out[2][5], out[3][2], cospi11_v, cospi21_v); \ STEP8_0(in1[5], in2[3], out[2][6], out[3][1], cospi19_v, cospi13_v); \ STEP8_0(in3[5], in0[3], out[2][7], out[3][0], cospi3_v, cospi29_v); \ \ /* stage 2 */ \ /* in0[0] = out[0][0]; */ \ in0[1] = out[0][1]; \ in0[2] = out[0][2]; \ in0[3] = out[0][3]; \ in0[4] = out[0][4]; \ in0[5] = out[0][5]; \ in0[6] = out[0][6]; \ in0[7] = out[0][7]; \ \ STEP8_0(out[1][0], out[1][7], in1[0], in1[7], cospi30_v, cospi2_v); \ STEP8_0(out[1][1], out[1][6], in1[1], in1[6], cospi14_v, cospi18_v); \ STEP8_0(out[1][2], out[1][5], in1[2], in1[5], cospi22_v, cospi10_v); \ STEP8_0(out[1][3], out[1][4], in1[3], in1[4], cospi6_v, cospi26_v); \ \ in2[0] = vec_add(out[2][0], out[2][1]); \ in2[1] = vec_sub(out[2][0], out[2][1]); \ in2[2] = vec_sub(out[2][3], out[2][2]); \ in2[3] = vec_add(out[2][3], out[2][2]); \ in2[4] = vec_add(out[2][4], out[2][5]); \ in2[5] = vec_sub(out[2][4], out[2][5]); \ in2[6] = vec_sub(out[2][7], out[2][6]); \ in2[7] = vec_add(out[2][7], out[2][6]); \ in3[0] = vec_add(out[3][0], out[3][1]); \ in3[1] = vec_sub(out[3][0], out[3][1]); \ in3[2] = vec_sub(out[3][3], out[3][2]); \ in3[3] = vec_add(out[3][3], out[3][2]); \ in3[4] = vec_add(out[3][4], out[3][5]); \ in3[5] = vec_sub(out[3][4], out[3][5]); \ in3[6] = vec_sub(out[3][7], out[3][6]); \ in3[7] = vec_add(out[3][6], out[3][7]); \ \ /* stage 3 */ \ out[0][0] = in0[0]; \ out[0][1] = in0[1]; \ out[0][2] = in0[2]; \ out[0][3] = in0[3]; \ \ STEP8_0(in0[4], in0[7], out[0][4], out[0][7], cospi28_v, cospi4_v); \ STEP8_0(in0[5], in0[6], out[0][5], out[0][6], cospi12_v, cospi20_v); \ \ out[1][0] = vec_add(in1[0], in1[1]); \ out[1][1] = vec_sub(in1[0], in1[1]); \ out[1][2] = vec_sub(in1[3], in1[2]); \ out[1][3] = vec_add(in1[2], in1[3]); \ out[1][4] = vec_add(in1[4], in1[5]); \ out[1][5] = vec_sub(in1[4], in1[5]); \ out[1][6] = vec_sub(in1[7], in1[6]); \ out[1][7] = vec_add(in1[6], in1[7]); \ \ out[2][0] = in2[0]; \ out[3][7] = in3[7]; \ STEP32(in2[1], in3[6], out[2][1], out[3][6], cospi4_v, cospi28_v); \ STEP32_1(in2[2], in3[5], out[2][2], out[3][5], cospi28_v, cospi4_v, \ cospi4m_v); \ out[2][3] = in2[3]; \ out[2][4] = in2[4]; \ STEP32(in2[5], in3[2], out[2][5], out[3][2], cospi20_v, cospi12_v); \ STEP32_1(in2[6], in3[1], out[2][6], out[3][1], cospi12_v, cospi20_v, \ cospi20m_v); \ out[2][7] = in2[7]; \ out[3][0] = in3[0]; \ out[3][3] = in3[3]; \ out[3][4] = in3[4]; \ \ /* stage 4 */ \ STEP16_1(out[0][0], out[0][1], in0[1], in0[0], cospi16_v); \ STEP8_0(out[0][2], out[0][3], in0[2], in0[3], cospi24_v, cospi8_v); \ in0[4] = vec_add(out[0][4], out[0][5]); \ in0[5] = vec_sub(out[0][4], out[0][5]); \ in0[6] = vec_sub(out[0][7], out[0][6]); \ in0[7] = vec_add(out[0][7], out[0][6]); \ \ in1[0] = out[1][0]; \ in1[7] = out[1][7]; \ STEP32(out[1][1], out[1][6], in1[1], in1[6], cospi8_v, cospi24_v); \ STEP32_1(out[1][2], out[1][5], in1[2], in1[5], cospi24_v, cospi8_v, \ cospi8m_v); \ in1[3] = out[1][3]; \ in1[4] = out[1][4]; \ \ in2[0] = vec_add(out[2][0], out[2][3]); \ in2[1] = vec_add(out[2][1], out[2][2]); \ in2[2] = vec_sub(out[2][1], out[2][2]); \ in2[3] = vec_sub(out[2][0], out[2][3]); \ in2[4] = vec_sub(out[2][7], out[2][4]); \ in2[5] = vec_sub(out[2][6], out[2][5]); \ in2[6] = vec_add(out[2][5], out[2][6]); \ in2[7] = vec_add(out[2][4], out[2][7]); \ \ in3[0] = vec_add(out[3][0], out[3][3]); \ in3[1] = vec_add(out[3][1], out[3][2]); \ in3[2] = vec_sub(out[3][1], out[3][2]); \ in3[3] = vec_sub(out[3][0], out[3][3]); \ in3[4] = vec_sub(out[3][7], out[3][4]); \ in3[5] = vec_sub(out[3][6], out[3][5]); \ in3[6] = vec_add(out[3][5], out[3][6]); \ in3[7] = vec_add(out[3][4], out[3][7]); \ \ /* stage 5 */ \ out[0][0] = vec_add(in0[0], in0[3]); \ out[0][1] = vec_add(in0[1], in0[2]); \ out[0][2] = vec_sub(in0[1], in0[2]); \ out[0][3] = vec_sub(in0[0], in0[3]); \ out[0][4] = in0[4]; \ STEP16_1(in0[6], in0[5], out[0][5], out[0][6], cospi16_v); \ out[0][7] = in0[7]; \ \ out[1][0] = vec_add(in1[0], in1[3]); \ out[1][1] = vec_add(in1[1], in1[2]); \ out[1][2] = vec_sub(in1[1], in1[2]); \ out[1][3] = vec_sub(in1[0], in1[3]); \ out[1][4] = vec_sub(in1[7], in1[4]); \ out[1][5] = vec_sub(in1[6], in1[5]); \ out[1][6] = vec_add(in1[5], in1[6]); \ out[1][7] = vec_add(in1[4], in1[7]); \ \ out[2][0] = in2[0]; \ out[2][1] = in2[1]; \ STEP32(in2[2], in3[5], out[2][2], out[3][5], cospi8_v, cospi24_v); \ STEP32(in2[3], in3[4], out[2][3], out[3][4], cospi8_v, cospi24_v); \ STEP32_1(in2[4], in3[3], out[2][4], out[3][3], cospi24_v, cospi8_v, \ cospi8m_v); \ STEP32_1(in2[5], in3[2], out[2][5], out[3][2], cospi24_v, cospi8_v, \ cospi8m_v); \ out[2][6] = in2[6]; \ out[2][7] = in2[7]; \ out[3][0] = in3[0]; \ out[3][1] = in3[1]; \ out[3][6] = in3[6]; \ out[3][7] = in3[7]; \ \ /* stage 6 */ \ in0[0] = vec_add(out[0][0], out[0][7]); \ in0[1] = vec_add(out[0][1], out[0][6]); \ in0[2] = vec_add(out[0][2], out[0][5]); \ in0[3] = vec_add(out[0][3], out[0][4]); \ in0[4] = vec_sub(out[0][3], out[0][4]); \ in0[5] = vec_sub(out[0][2], out[0][5]); \ in0[6] = vec_sub(out[0][1], out[0][6]); \ in0[7] = vec_sub(out[0][0], out[0][7]); \ in1[0] = out[1][0]; \ in1[1] = out[1][1]; \ STEP16_1(out[1][5], out[1][2], in1[2], in1[5], cospi16_v); \ STEP16_1(out[1][4], out[1][3], in1[3], in1[4], cospi16_v); \ in1[6] = out[1][6]; \ in1[7] = out[1][7]; \ \ in2[0] = vec_add(out[2][0], out[2][7]); \ in2[1] = vec_add(out[2][1], out[2][6]); \ in2[2] = vec_add(out[2][2], out[2][5]); \ in2[3] = vec_add(out[2][3], out[2][4]); \ in2[4] = vec_sub(out[2][3], out[2][4]); \ in2[5] = vec_sub(out[2][2], out[2][5]); \ in2[6] = vec_sub(out[2][1], out[2][6]); \ in2[7] = vec_sub(out[2][0], out[2][7]); \ \ in3[0] = vec_sub(out[3][7], out[3][0]); \ in3[1] = vec_sub(out[3][6], out[3][1]); \ in3[2] = vec_sub(out[3][5], out[3][2]); \ in3[3] = vec_sub(out[3][4], out[3][3]); \ in3[4] = vec_add(out[3][4], out[3][3]); \ in3[5] = vec_add(out[3][5], out[3][2]); \ in3[6] = vec_add(out[3][6], out[3][1]); \ in3[7] = vec_add(out[3][7], out[3][0]); \ \ /* stage 7 */ \ out[0][0] = vec_add(in0[0], in1[7]); \ out[0][1] = vec_add(in0[1], in1[6]); \ out[0][2] = vec_add(in0[2], in1[5]); \ out[0][3] = vec_add(in0[3], in1[4]); \ out[0][4] = vec_add(in0[4], in1[3]); \ out[0][5] = vec_add(in0[5], in1[2]); \ out[0][6] = vec_add(in0[6], in1[1]); \ out[0][7] = vec_add(in0[7], in1[0]); \ out[1][0] = vec_sub(in0[7], in1[0]); \ out[1][1] = vec_sub(in0[6], in1[1]); \ out[1][2] = vec_sub(in0[5], in1[2]); \ out[1][3] = vec_sub(in0[4], in1[3]); \ out[1][4] = vec_sub(in0[3], in1[4]); \ out[1][5] = vec_sub(in0[2], in1[5]); \ out[1][6] = vec_sub(in0[1], in1[6]); \ out[1][7] = vec_sub(in0[0], in1[7]); \ \ out[2][0] = in2[0]; \ out[2][1] = in2[1]; \ out[2][2] = in2[2]; \ out[2][3] = in2[3]; \ STEP16_1(in3[3], in2[4], out[2][4], out[3][3], cospi16_v); \ STEP16_1(in3[2], in2[5], out[2][5], out[3][2], cospi16_v); \ STEP16_1(in3[1], in2[6], out[2][6], out[3][1], cospi16_v); \ STEP16_1(in3[0], in2[7], out[2][7], out[3][0], cospi16_v); \ out[3][4] = in3[4]; \ out[3][5] = in3[5]; \ out[3][6] = in3[6]; \ out[3][7] = in3[7]; \ \ /* final */ \ in0[0] = vec_add(out[0][0], out[3][7]); \ in0[1] = vec_add(out[0][1], out[3][6]); \ in0[2] = vec_add(out[0][2], out[3][5]); \ in0[3] = vec_add(out[0][3], out[3][4]); \ in0[4] = vec_add(out[0][4], out[3][3]); \ in0[5] = vec_add(out[0][5], out[3][2]); \ in0[6] = vec_add(out[0][6], out[3][1]); \ in0[7] = vec_add(out[0][7], out[3][0]); \ in1[0] = vec_add(out[1][0], out[2][7]); \ in1[1] = vec_add(out[1][1], out[2][6]); \ in1[2] = vec_add(out[1][2], out[2][5]); \ in1[3] = vec_add(out[1][3], out[2][4]); \ in1[4] = vec_add(out[1][4], out[2][3]); \ in1[5] = vec_add(out[1][5], out[2][2]); \ in1[6] = vec_add(out[1][6], out[2][1]); \ in1[7] = vec_add(out[1][7], out[2][0]); \ in2[0] = vec_sub(out[1][7], out[2][0]); \ in2[1] = vec_sub(out[1][6], out[2][1]); \ in2[2] = vec_sub(out[1][5], out[2][2]); \ in2[3] = vec_sub(out[1][4], out[2][3]); \ in2[4] = vec_sub(out[1][3], out[2][4]); \ in2[5] = vec_sub(out[1][2], out[2][5]); \ in2[6] = vec_sub(out[1][1], out[2][6]); \ in2[7] = vec_sub(out[1][0], out[2][7]); \ in3[0] = vec_sub(out[0][7], out[3][0]); \ in3[1] = vec_sub(out[0][6], out[3][1]); \ in3[2] = vec_sub(out[0][5], out[3][2]); \ in3[3] = vec_sub(out[0][4], out[3][3]); \ in3[4] = vec_sub(out[0][3], out[3][4]); \ in3[5] = vec_sub(out[0][2], out[3][5]); \ in3[6] = vec_sub(out[0][1], out[3][6]); \ in3[7] = vec_sub(out[0][0], out[3][7]); // NOT A FULL TRANSPOSE! Transposes just each 8x8 block in each row, // does not transpose rows #define TRANSPOSE_8x32(in, out) \ /* transpose 4 of 8x8 blocks */ \ TRANSPOSE8x8(in[0][0], in[0][1], in[0][2], in[0][3], in[0][4], in[0][5], \ in[0][6], in[0][7], out[0][0], out[0][1], out[0][2], out[0][3], \ out[0][4], out[0][5], out[0][6], out[0][7]); \ TRANSPOSE8x8(in[1][0], in[1][1], in[1][2], in[1][3], in[1][4], in[1][5], \ in[1][6], in[1][7], out[1][0], out[1][1], out[1][2], out[1][3], \ out[1][4], out[1][5], out[1][6], out[1][7]); \ TRANSPOSE8x8(in[2][0], in[2][1], in[2][2], in[2][3], in[2][4], in[2][5], \ in[2][6], in[2][7], out[2][0], out[2][1], out[2][2], out[2][3], \ out[2][4], out[2][5], out[2][6], out[2][7]); \ TRANSPOSE8x8(in[3][0], in[3][1], in[3][2], in[3][3], in[3][4], in[3][5], \ in[3][6], in[3][7], out[3][0], out[3][1], out[3][2], out[3][3], \ out[3][4], out[3][5], out[3][6], out[3][7]); #define PIXEL_ADD_STORE32(in0, in1, in2, in3, step) \ dst = vec_vsx_ld((step)*stride, dest); \ d_uh = (int16x8_t)vec_mergeh(dst, zerov); \ d_ul = (int16x8_t)vec_mergel(dst, zerov); \ PIXEL_ADD(in0, d_uh, add, shift6); \ PIXEL_ADD(in1, d_ul, add, shift6); \ vec_vsx_st(vec_packsu(d_uh, d_ul), (step)*stride, dest); \ dst = vec_vsx_ld((step)*stride + 16, dest); \ d_uh = (int16x8_t)vec_mergeh(dst, zerov); \ d_ul = (int16x8_t)vec_mergel(dst, zerov); \ PIXEL_ADD(in2, d_uh, add, shift6); \ PIXEL_ADD(in3, d_ul, add, shift6); \ vec_vsx_st(vec_packsu(d_uh, d_ul), (step)*stride + 16, dest); #define ADD_STORE_BLOCK(in, offset) \ PIXEL_ADD_STORE32(in[0][0], in[1][0], in[2][0], in[3][0], (offset) + 0); \ PIXEL_ADD_STORE32(in[0][1], in[1][1], in[2][1], in[3][1], (offset) + 1); \ PIXEL_ADD_STORE32(in[0][2], in[1][2], in[2][2], in[3][2], (offset) + 2); \ PIXEL_ADD_STORE32(in[0][3], in[1][3], in[2][3], in[3][3], (offset) + 3); \ PIXEL_ADD_STORE32(in[0][4], in[1][4], in[2][4], in[3][4], (offset) + 4); \ PIXEL_ADD_STORE32(in[0][5], in[1][5], in[2][5], in[3][5], (offset) + 5); \ PIXEL_ADD_STORE32(in[0][6], in[1][6], in[2][6], in[3][6], (offset) + 6); \ PIXEL_ADD_STORE32(in[0][7], in[1][7], in[2][7], in[3][7], (offset) + 7); void vpx_idct32x32_1024_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int16x8_t src0[4][8], src1[4][8], src2[4][8], src3[4][8], tmp[4][8]; int16x8_t tmp16_0, tmp16_1; int32x4_t temp10, temp11, temp20, temp21, temp30; uint8x16_t dst; int16x8_t d_uh, d_ul; int16x8_t add = vec_sl(vec_splat_s16(8), vec_splat_u16(2)); uint16x8_t shift6 = vec_splat_u16(6); uint8x16_t zerov = vec_splat_u8(0); ROUND_SHIFT_INIT; LOAD_8x32(load_tran_low, src0[0][0], src0[1][0], src0[2][0], src0[3][0], src0[0][1], src0[1][1], src0[2][1], src0[3][1], src0[0][2], src0[1][2], src0[2][2], src0[3][2], src0[0][3], src0[1][3], src0[2][3], src0[3][3], src0[0][4], src0[1][4], src0[2][4], src0[3][4], src0[0][5], src0[1][5], src0[2][5], src0[3][5], src0[0][6], src0[1][6], src0[2][6], src0[3][6], src0[0][7], src0[1][7], src0[2][7], src0[3][7], 0); // Rows // transpose the first row of 8x8 blocks TRANSPOSE_8x32(src0, tmp); // transform the 32x8 column IDCT32(tmp[0], tmp[1], tmp[2], tmp[3], src0); TRANSPOSE_8x32(tmp, src0); LOAD_8x32(load_tran_low, src1[0][0], src1[1][0], src1[2][0], src1[3][0], src1[0][1], src1[1][1], src1[2][1], src1[3][1], src1[0][2], src1[1][2], src1[2][2], src1[3][2], src1[0][3], src1[1][3], src1[2][3], src1[3][3], src1[0][4], src1[1][4], src1[2][4], src1[3][4], src1[0][5], src1[1][5], src1[2][5], src1[3][5], src1[0][6], src1[1][6], src1[2][6], src1[3][6], src1[0][7], src1[1][7], src1[2][7], src1[3][7], 512); TRANSPOSE_8x32(src1, tmp); IDCT32(tmp[0], tmp[1], tmp[2], tmp[3], src1); TRANSPOSE_8x32(tmp, src1); LOAD_8x32(load_tran_low, src2[0][0], src2[1][0], src2[2][0], src2[3][0], src2[0][1], src2[1][1], src2[2][1], src2[3][1], src2[0][2], src2[1][2], src2[2][2], src2[3][2], src2[0][3], src2[1][3], src2[2][3], src2[3][3], src2[0][4], src2[1][4], src2[2][4], src2[3][4], src2[0][5], src2[1][5], src2[2][5], src2[3][5], src2[0][6], src2[1][6], src2[2][6], src2[3][6], src2[0][7], src2[1][7], src2[2][7], src2[3][7], 1024); TRANSPOSE_8x32(src2, tmp); IDCT32(tmp[0], tmp[1], tmp[2], tmp[3], src2); TRANSPOSE_8x32(tmp, src2); LOAD_8x32(load_tran_low, src3[0][0], src3[1][0], src3[2][0], src3[3][0], src3[0][1], src3[1][1], src3[2][1], src3[3][1], src3[0][2], src3[1][2], src3[2][2], src3[3][2], src3[0][3], src3[1][3], src3[2][3], src3[3][3], src3[0][4], src3[1][4], src3[2][4], src3[3][4], src3[0][5], src3[1][5], src3[2][5], src3[3][5], src3[0][6], src3[1][6], src3[2][6], src3[3][6], src3[0][7], src3[1][7], src3[2][7], src3[3][7], 1536); TRANSPOSE_8x32(src3, tmp); IDCT32(tmp[0], tmp[1], tmp[2], tmp[3], src3); TRANSPOSE_8x32(tmp, src3); // Columns IDCT32(src0[0], src1[0], src2[0], src3[0], tmp); IDCT32(src0[1], src1[1], src2[1], src3[1], tmp); IDCT32(src0[2], src1[2], src2[2], src3[2], tmp); IDCT32(src0[3], src1[3], src2[3], src3[3], tmp); ADD_STORE_BLOCK(src0, 0); ADD_STORE_BLOCK(src1, 8); ADD_STORE_BLOCK(src2, 16); ADD_STORE_BLOCK(src3, 24); } #define TRANSFORM_COLS \ v32_a = vec_add(v32_a, v32_c); \ v32_d = vec_sub(v32_d, v32_b); \ v32_e = vec_sub(v32_a, v32_d); \ v32_e = vec_sra(v32_e, one); \ v32_b = vec_sub(v32_e, v32_b); \ v32_c = vec_sub(v32_e, v32_c); \ v32_a = vec_sub(v32_a, v32_b); \ v32_d = vec_add(v32_d, v32_c); \ v_a = vec_packs(v32_a, v32_b); \ v_c = vec_packs(v32_c, v32_d); #define TRANSPOSE_WHT \ tmp_a = vec_mergeh(v_a, v_c); \ tmp_c = vec_mergel(v_a, v_c); \ v_a = vec_mergeh(tmp_a, tmp_c); \ v_c = vec_mergel(tmp_a, tmp_c); void vpx_iwht4x4_16_add_vsx(const tran_low_t *input, uint8_t *dest, int stride) { int16x8_t v_a = load_tran_low(0, input); int16x8_t v_c = load_tran_low(8 * sizeof(*input), input); int16x8_t tmp_a, tmp_c; uint16x8_t two = vec_splat_u16(2); uint32x4_t one = vec_splat_u32(1); int16x8_t tmp16_0, tmp16_1; int32x4_t v32_a, v32_c, v32_d, v32_b, v32_e; uint8x16_t dest0 = vec_vsx_ld(0, dest); uint8x16_t dest1 = vec_vsx_ld(stride, dest); uint8x16_t dest2 = vec_vsx_ld(2 * stride, dest); uint8x16_t dest3 = vec_vsx_ld(3 * stride, dest); int16x8_t d_u0 = (int16x8_t)unpack_to_u16_h(dest0); int16x8_t d_u1 = (int16x8_t)unpack_to_u16_h(dest1); int16x8_t d_u2 = (int16x8_t)unpack_to_u16_h(dest2); int16x8_t d_u3 = (int16x8_t)unpack_to_u16_h(dest3); uint8x16_t output_v; uint8_t tmp_dest[16]; int i, j; v_a = vec_sra(v_a, two); v_c = vec_sra(v_c, two); TRANSPOSE_WHT; v32_a = vec_unpackh(v_a); v32_c = vec_unpackl(v_a); v32_d = vec_unpackh(v_c); v32_b = vec_unpackl(v_c); TRANSFORM_COLS; TRANSPOSE_WHT; v32_a = vec_unpackh(v_a); v32_c = vec_unpackl(v_a); v32_d = vec_unpackh(v_c); v32_b = vec_unpackl(v_c); TRANSFORM_COLS; PACK_STORE(v_a, v_c); } void vp9_iadst4_vsx(int16x8_t *in, int16x8_t *out) { int16x8_t sinpi_1_3_v, sinpi_4_2_v, sinpi_2_3_v, sinpi_1_4_v, sinpi_12_n3_v; int32x4_t v_v[5], u_v[4]; int32x4_t zerov = vec_splat_s32(0); int16x8_t tmp0, tmp1; int16x8_t zero16v = vec_splat_s16(0); uint32x4_t shift16 = vec_sl(vec_splat_u32(8), vec_splat_u32(1)); ROUND_SHIFT_INIT; sinpi_1_3_v = vec_mergel(sinpi_1_9_v, sinpi_3_9_v); sinpi_4_2_v = vec_mergel(sinpi_4_9_v, sinpi_2_9_v); sinpi_2_3_v = vec_mergel(sinpi_2_9_v, sinpi_3_9_v); sinpi_1_4_v = vec_mergel(sinpi_1_9_v, sinpi_4_9_v); sinpi_12_n3_v = vec_mergel(vec_add(sinpi_1_9_v, sinpi_2_9_v), vec_sub(zero16v, sinpi_3_9_v)); tmp0 = (int16x8_t)vec_mergeh((int32x4_t)in[0], (int32x4_t)in[1]); tmp1 = (int16x8_t)vec_mergel((int32x4_t)in[0], (int32x4_t)in[1]); in[0] = (int16x8_t)vec_mergeh((int32x4_t)tmp0, (int32x4_t)tmp1); in[1] = (int16x8_t)vec_mergel((int32x4_t)tmp0, (int32x4_t)tmp1); v_v[0] = vec_msum(in[0], sinpi_1_3_v, zerov); v_v[1] = vec_msum(in[1], sinpi_4_2_v, zerov); v_v[2] = vec_msum(in[0], sinpi_2_3_v, zerov); v_v[3] = vec_msum(in[1], sinpi_1_4_v, zerov); v_v[4] = vec_msum(in[0], sinpi_12_n3_v, zerov); in[0] = vec_sub(in[0], in[1]); in[1] = (int16x8_t)vec_sra((int32x4_t)in[1], shift16); in[0] = vec_add(in[0], in[1]); in[0] = (int16x8_t)vec_sl((int32x4_t)in[0], shift16); u_v[0] = vec_add(v_v[0], v_v[1]); u_v[1] = vec_sub(v_v[2], v_v[3]); u_v[2] = vec_msum(in[0], sinpi_1_3_v, zerov); u_v[3] = vec_sub(v_v[1], v_v[3]); u_v[3] = vec_add(u_v[3], v_v[4]); DCT_CONST_ROUND_SHIFT(u_v[0]); DCT_CONST_ROUND_SHIFT(u_v[1]); DCT_CONST_ROUND_SHIFT(u_v[2]); DCT_CONST_ROUND_SHIFT(u_v[3]); out[0] = vec_packs(u_v[0], u_v[1]); out[1] = vec_packs(u_v[2], u_v[3]); } #define MSUM_ROUND_SHIFT(a, b, cospi) \ b = vec_msums(a, cospi, zerov); \ DCT_CONST_ROUND_SHIFT(b); #define IADST_WRAPLOW(in0, in1, tmp0, tmp1, out, cospi) \ MSUM_ROUND_SHIFT(in0, tmp0, cospi); \ MSUM_ROUND_SHIFT(in1, tmp1, cospi); \ out = vec_packs(tmp0, tmp1); void vp9_iadst8_vsx(int16x8_t *in, int16x8_t *out) { int32x4_t tmp0[16], tmp1[16]; int32x4_t zerov = vec_splat_s32(0); int16x8_t zero16v = vec_splat_s16(0); int16x8_t cospi_p02_p30_v = vec_mergel(cospi2_v, cospi30_v); int16x8_t cospi_p30_m02_v = vec_mergel(cospi30_v, cospi2m_v); int16x8_t cospi_p10_p22_v = vec_mergel(cospi10_v, cospi22_v); int16x8_t cospi_p22_m10_v = vec_mergel(cospi22_v, cospi10m_v); int16x8_t cospi_p18_p14_v = vec_mergel(cospi18_v, cospi14_v); int16x8_t cospi_p14_m18_v = vec_mergel(cospi14_v, cospi18m_v); int16x8_t cospi_p26_p06_v = vec_mergel(cospi26_v, cospi6_v); int16x8_t cospi_p06_m26_v = vec_mergel(cospi6_v, cospi26m_v); int16x8_t cospi_p08_p24_v = vec_mergel(cospi8_v, cospi24_v); int16x8_t cospi_p24_m08_v = vec_mergel(cospi24_v, cospi8m_v); int16x8_t cospi_m24_p08_v = vec_mergel(cospi24m_v, cospi8_v); int16x8_t cospi_p16_m16_v = vec_mergel(cospi16_v, cospi16m_v); ROUND_SHIFT_INIT; TRANSPOSE8x8(in[0], in[1], in[2], in[3], in[4], in[5], in[6], in[7], out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7]); // stage 1 // interleave and multiply/add into 32-bit integer in[0] = vec_mergeh(out[7], out[0]); in[1] = vec_mergel(out[7], out[0]); in[2] = vec_mergeh(out[5], out[2]); in[3] = vec_mergel(out[5], out[2]); in[4] = vec_mergeh(out[3], out[4]); in[5] = vec_mergel(out[3], out[4]); in[6] = vec_mergeh(out[1], out[6]); in[7] = vec_mergel(out[1], out[6]); tmp1[0] = vec_msum(in[0], cospi_p02_p30_v, zerov); tmp1[1] = vec_msum(in[1], cospi_p02_p30_v, zerov); tmp1[2] = vec_msum(in[0], cospi_p30_m02_v, zerov); tmp1[3] = vec_msum(in[1], cospi_p30_m02_v, zerov); tmp1[4] = vec_msum(in[2], cospi_p10_p22_v, zerov); tmp1[5] = vec_msum(in[3], cospi_p10_p22_v, zerov); tmp1[6] = vec_msum(in[2], cospi_p22_m10_v, zerov); tmp1[7] = vec_msum(in[3], cospi_p22_m10_v, zerov); tmp1[8] = vec_msum(in[4], cospi_p18_p14_v, zerov); tmp1[9] = vec_msum(in[5], cospi_p18_p14_v, zerov); tmp1[10] = vec_msum(in[4], cospi_p14_m18_v, zerov); tmp1[11] = vec_msum(in[5], cospi_p14_m18_v, zerov); tmp1[12] = vec_msum(in[6], cospi_p26_p06_v, zerov); tmp1[13] = vec_msum(in[7], cospi_p26_p06_v, zerov); tmp1[14] = vec_msum(in[6], cospi_p06_m26_v, zerov); tmp1[15] = vec_msum(in[7], cospi_p06_m26_v, zerov); tmp0[0] = vec_add(tmp1[0], tmp1[8]); tmp0[1] = vec_add(tmp1[1], tmp1[9]); tmp0[2] = vec_add(tmp1[2], tmp1[10]); tmp0[3] = vec_add(tmp1[3], tmp1[11]); tmp0[4] = vec_add(tmp1[4], tmp1[12]); tmp0[5] = vec_add(tmp1[5], tmp1[13]); tmp0[6] = vec_add(tmp1[6], tmp1[14]); tmp0[7] = vec_add(tmp1[7], tmp1[15]); tmp0[8] = vec_sub(tmp1[0], tmp1[8]); tmp0[9] = vec_sub(tmp1[1], tmp1[9]); tmp0[10] = vec_sub(tmp1[2], tmp1[10]); tmp0[11] = vec_sub(tmp1[3], tmp1[11]); tmp0[12] = vec_sub(tmp1[4], tmp1[12]); tmp0[13] = vec_sub(tmp1[5], tmp1[13]); tmp0[14] = vec_sub(tmp1[6], tmp1[14]); tmp0[15] = vec_sub(tmp1[7], tmp1[15]); // shift and rounding DCT_CONST_ROUND_SHIFT(tmp0[0]); DCT_CONST_ROUND_SHIFT(tmp0[1]); DCT_CONST_ROUND_SHIFT(tmp0[2]); DCT_CONST_ROUND_SHIFT(tmp0[3]); DCT_CONST_ROUND_SHIFT(tmp0[4]); DCT_CONST_ROUND_SHIFT(tmp0[5]); DCT_CONST_ROUND_SHIFT(tmp0[6]); DCT_CONST_ROUND_SHIFT(tmp0[7]); DCT_CONST_ROUND_SHIFT(tmp0[8]); DCT_CONST_ROUND_SHIFT(tmp0[9]); DCT_CONST_ROUND_SHIFT(tmp0[10]); DCT_CONST_ROUND_SHIFT(tmp0[11]); DCT_CONST_ROUND_SHIFT(tmp0[12]); DCT_CONST_ROUND_SHIFT(tmp0[13]); DCT_CONST_ROUND_SHIFT(tmp0[14]); DCT_CONST_ROUND_SHIFT(tmp0[15]); // back to 16-bit out[0] = vec_packs(tmp0[0], tmp0[1]); out[1] = vec_packs(tmp0[2], tmp0[3]); out[2] = vec_packs(tmp0[4], tmp0[5]); out[3] = vec_packs(tmp0[6], tmp0[7]); out[4] = vec_packs(tmp0[8], tmp0[9]); out[5] = vec_packs(tmp0[10], tmp0[11]); out[6] = vec_packs(tmp0[12], tmp0[13]); out[7] = vec_packs(tmp0[14], tmp0[15]); // stage 2 in[0] = vec_add(out[0], out[2]); in[1] = vec_add(out[1], out[3]); in[2] = vec_sub(out[0], out[2]); in[3] = vec_sub(out[1], out[3]); in[4] = vec_mergeh(out[4], out[5]); in[5] = vec_mergel(out[4], out[5]); in[6] = vec_mergeh(out[6], out[7]); in[7] = vec_mergel(out[6], out[7]); tmp1[0] = vec_msum(in[4], cospi_p08_p24_v, zerov); tmp1[1] = vec_msum(in[5], cospi_p08_p24_v, zerov); tmp1[2] = vec_msum(in[4], cospi_p24_m08_v, zerov); tmp1[3] = vec_msum(in[5], cospi_p24_m08_v, zerov); tmp1[4] = vec_msum(in[6], cospi_m24_p08_v, zerov); tmp1[5] = vec_msum(in[7], cospi_m24_p08_v, zerov); tmp1[6] = vec_msum(in[6], cospi_p08_p24_v, zerov); tmp1[7] = vec_msum(in[7], cospi_p08_p24_v, zerov); tmp0[0] = vec_add(tmp1[0], tmp1[4]); tmp0[1] = vec_add(tmp1[1], tmp1[5]); tmp0[2] = vec_add(tmp1[2], tmp1[6]); tmp0[3] = vec_add(tmp1[3], tmp1[7]); tmp0[4] = vec_sub(tmp1[0], tmp1[4]); tmp0[5] = vec_sub(tmp1[1], tmp1[5]); tmp0[6] = vec_sub(tmp1[2], tmp1[6]); tmp0[7] = vec_sub(tmp1[3], tmp1[7]); DCT_CONST_ROUND_SHIFT(tmp0[0]); DCT_CONST_ROUND_SHIFT(tmp0[1]); DCT_CONST_ROUND_SHIFT(tmp0[2]); DCT_CONST_ROUND_SHIFT(tmp0[3]); DCT_CONST_ROUND_SHIFT(tmp0[4]); DCT_CONST_ROUND_SHIFT(tmp0[5]); DCT_CONST_ROUND_SHIFT(tmp0[6]); DCT_CONST_ROUND_SHIFT(tmp0[7]); in[4] = vec_packs(tmp0[0], tmp0[1]); in[5] = vec_packs(tmp0[2], tmp0[3]); in[6] = vec_packs(tmp0[4], tmp0[5]); in[7] = vec_packs(tmp0[6], tmp0[7]); // stage 3 out[0] = vec_mergeh(in[2], in[3]); out[1] = vec_mergel(in[2], in[3]); out[2] = vec_mergeh(in[6], in[7]); out[3] = vec_mergel(in[6], in[7]); IADST_WRAPLOW(out[0], out[1], tmp0[0], tmp0[1], in[2], cospi16_v); IADST_WRAPLOW(out[0], out[1], tmp0[0], tmp0[1], in[3], cospi_p16_m16_v); IADST_WRAPLOW(out[2], out[3], tmp0[0], tmp0[1], in[6], cospi16_v); IADST_WRAPLOW(out[2], out[3], tmp0[0], tmp0[1], in[7], cospi_p16_m16_v); out[0] = in[0]; out[2] = in[6]; out[4] = in[3]; out[6] = in[5]; out[1] = vec_sub(zero16v, in[4]); out[3] = vec_sub(zero16v, in[2]); out[5] = vec_sub(zero16v, in[7]); out[7] = vec_sub(zero16v, in[1]); } static void iadst16x8_vsx(int16x8_t *in, int16x8_t *out) { int32x4_t tmp0[32], tmp1[32]; int16x8_t tmp16_0[8]; int16x8_t cospi_p01_p31 = vec_mergel(cospi1_v, cospi31_v); int16x8_t cospi_p31_m01 = vec_mergel(cospi31_v, cospi1m_v); int16x8_t cospi_p05_p27 = vec_mergel(cospi5_v, cospi27_v); int16x8_t cospi_p27_m05 = vec_mergel(cospi27_v, cospi5m_v); int16x8_t cospi_p09_p23 = vec_mergel(cospi9_v, cospi23_v); int16x8_t cospi_p23_m09 = vec_mergel(cospi23_v, cospi9m_v); int16x8_t cospi_p13_p19 = vec_mergel(cospi13_v, cospi19_v); int16x8_t cospi_p19_m13 = vec_mergel(cospi19_v, cospi13m_v); int16x8_t cospi_p17_p15 = vec_mergel(cospi17_v, cospi15_v); int16x8_t cospi_p15_m17 = vec_mergel(cospi15_v, cospi17m_v); int16x8_t cospi_p21_p11 = vec_mergel(cospi21_v, cospi11_v); int16x8_t cospi_p11_m21 = vec_mergel(cospi11_v, cospi21m_v); int16x8_t cospi_p25_p07 = vec_mergel(cospi25_v, cospi7_v); int16x8_t cospi_p07_m25 = vec_mergel(cospi7_v, cospi25m_v); int16x8_t cospi_p29_p03 = vec_mergel(cospi29_v, cospi3_v); int16x8_t cospi_p03_m29 = vec_mergel(cospi3_v, cospi29m_v); int16x8_t cospi_p04_p28 = vec_mergel(cospi4_v, cospi28_v); int16x8_t cospi_p28_m04 = vec_mergel(cospi28_v, cospi4m_v); int16x8_t cospi_p20_p12 = vec_mergel(cospi20_v, cospi12_v); int16x8_t cospi_p12_m20 = vec_mergel(cospi12_v, cospi20m_v); int16x8_t cospi_m28_p04 = vec_mergel(cospi28m_v, cospi4_v); int16x8_t cospi_m12_p20 = vec_mergel(cospi12m_v, cospi20_v); int16x8_t cospi_p08_p24 = vec_mergel(cospi8_v, cospi24_v); int16x8_t cospi_p24_m08 = vec_mergel(cospi24_v, cospi8m_v); int16x8_t cospi_m24_p08 = vec_mergel(cospi24m_v, cospi8_v); int32x4_t zerov = vec_splat_s32(0); ROUND_SHIFT_INIT; tmp16_0[0] = vec_mergeh(in[15], in[0]); tmp16_0[1] = vec_mergel(in[15], in[0]); tmp16_0[2] = vec_mergeh(in[13], in[2]); tmp16_0[3] = vec_mergel(in[13], in[2]); tmp16_0[4] = vec_mergeh(in[11], in[4]); tmp16_0[5] = vec_mergel(in[11], in[4]); tmp16_0[6] = vec_mergeh(in[9], in[6]); tmp16_0[7] = vec_mergel(in[9], in[6]); tmp16_0[8] = vec_mergeh(in[7], in[8]); tmp16_0[9] = vec_mergel(in[7], in[8]); tmp16_0[10] = vec_mergeh(in[5], in[10]); tmp16_0[11] = vec_mergel(in[5], in[10]); tmp16_0[12] = vec_mergeh(in[3], in[12]); tmp16_0[13] = vec_mergel(in[3], in[12]); tmp16_0[14] = vec_mergeh(in[1], in[14]); tmp16_0[15] = vec_mergel(in[1], in[14]); tmp0[0] = vec_msum(tmp16_0[0], cospi_p01_p31, zerov); tmp0[1] = vec_msum(tmp16_0[1], cospi_p01_p31, zerov); tmp0[2] = vec_msum(tmp16_0[0], cospi_p31_m01, zerov); tmp0[3] = vec_msum(tmp16_0[1], cospi_p31_m01, zerov); tmp0[4] = vec_msum(tmp16_0[2], cospi_p05_p27, zerov); tmp0[5] = vec_msum(tmp16_0[3], cospi_p05_p27, zerov); tmp0[6] = vec_msum(tmp16_0[2], cospi_p27_m05, zerov); tmp0[7] = vec_msum(tmp16_0[3], cospi_p27_m05, zerov); tmp0[8] = vec_msum(tmp16_0[4], cospi_p09_p23, zerov); tmp0[9] = vec_msum(tmp16_0[5], cospi_p09_p23, zerov); tmp0[10] = vec_msum(tmp16_0[4], cospi_p23_m09, zerov); tmp0[11] = vec_msum(tmp16_0[5], cospi_p23_m09, zerov); tmp0[12] = vec_msum(tmp16_0[6], cospi_p13_p19, zerov); tmp0[13] = vec_msum(tmp16_0[7], cospi_p13_p19, zerov); tmp0[14] = vec_msum(tmp16_0[6], cospi_p19_m13, zerov); tmp0[15] = vec_msum(tmp16_0[7], cospi_p19_m13, zerov); tmp0[16] = vec_msum(tmp16_0[8], cospi_p17_p15, zerov); tmp0[17] = vec_msum(tmp16_0[9], cospi_p17_p15, zerov); tmp0[18] = vec_msum(tmp16_0[8], cospi_p15_m17, zerov); tmp0[19] = vec_msum(tmp16_0[9], cospi_p15_m17, zerov); tmp0[20] = vec_msum(tmp16_0[10], cospi_p21_p11, zerov); tmp0[21] = vec_msum(tmp16_0[11], cospi_p21_p11, zerov); tmp0[22] = vec_msum(tmp16_0[10], cospi_p11_m21, zerov); tmp0[23] = vec_msum(tmp16_0[11], cospi_p11_m21, zerov); tmp0[24] = vec_msum(tmp16_0[12], cospi_p25_p07, zerov); tmp0[25] = vec_msum(tmp16_0[13], cospi_p25_p07, zerov); tmp0[26] = vec_msum(tmp16_0[12], cospi_p07_m25, zerov); tmp0[27] = vec_msum(tmp16_0[13], cospi_p07_m25, zerov); tmp0[28] = vec_msum(tmp16_0[14], cospi_p29_p03, zerov); tmp0[29] = vec_msum(tmp16_0[15], cospi_p29_p03, zerov); tmp0[30] = vec_msum(tmp16_0[14], cospi_p03_m29, zerov); tmp0[31] = vec_msum(tmp16_0[15], cospi_p03_m29, zerov); tmp1[0] = vec_add(tmp0[0], tmp0[16]); tmp1[1] = vec_add(tmp0[1], tmp0[17]); tmp1[2] = vec_add(tmp0[2], tmp0[18]); tmp1[3] = vec_add(tmp0[3], tmp0[19]); tmp1[4] = vec_add(tmp0[4], tmp0[20]); tmp1[5] = vec_add(tmp0[5], tmp0[21]); tmp1[6] = vec_add(tmp0[6], tmp0[22]); tmp1[7] = vec_add(tmp0[7], tmp0[23]); tmp1[8] = vec_add(tmp0[8], tmp0[24]); tmp1[9] = vec_add(tmp0[9], tmp0[25]); tmp1[10] = vec_add(tmp0[10], tmp0[26]); tmp1[11] = vec_add(tmp0[11], tmp0[27]); tmp1[12] = vec_add(tmp0[12], tmp0[28]); tmp1[13] = vec_add(tmp0[13], tmp0[29]); tmp1[14] = vec_add(tmp0[14], tmp0[30]); tmp1[15] = vec_add(tmp0[15], tmp0[31]); tmp1[16] = vec_sub(tmp0[0], tmp0[16]); tmp1[17] = vec_sub(tmp0[1], tmp0[17]); tmp1[18] = vec_sub(tmp0[2], tmp0[18]); tmp1[19] = vec_sub(tmp0[3], tmp0[19]); tmp1[20] = vec_sub(tmp0[4], tmp0[20]); tmp1[21] = vec_sub(tmp0[5], tmp0[21]); tmp1[22] = vec_sub(tmp0[6], tmp0[22]); tmp1[23] = vec_sub(tmp0[7], tmp0[23]); tmp1[24] = vec_sub(tmp0[8], tmp0[24]); tmp1[25] = vec_sub(tmp0[9], tmp0[25]); tmp1[26] = vec_sub(tmp0[10], tmp0[26]); tmp1[27] = vec_sub(tmp0[11], tmp0[27]); tmp1[28] = vec_sub(tmp0[12], tmp0[28]); tmp1[29] = vec_sub(tmp0[13], tmp0[29]); tmp1[30] = vec_sub(tmp0[14], tmp0[30]); tmp1[31] = vec_sub(tmp0[15], tmp0[31]); DCT_CONST_ROUND_SHIFT(tmp1[0]); DCT_CONST_ROUND_SHIFT(tmp1[1]); DCT_CONST_ROUND_SHIFT(tmp1[2]); DCT_CONST_ROUND_SHIFT(tmp1[3]); DCT_CONST_ROUND_SHIFT(tmp1[4]); DCT_CONST_ROUND_SHIFT(tmp1[5]); DCT_CONST_ROUND_SHIFT(tmp1[6]); DCT_CONST_ROUND_SHIFT(tmp1[7]); DCT_CONST_ROUND_SHIFT(tmp1[8]); DCT_CONST_ROUND_SHIFT(tmp1[9]); DCT_CONST_ROUND_SHIFT(tmp1[10]); DCT_CONST_ROUND_SHIFT(tmp1[11]); DCT_CONST_ROUND_SHIFT(tmp1[12]); DCT_CONST_ROUND_SHIFT(tmp1[13]); DCT_CONST_ROUND_SHIFT(tmp1[14]); DCT_CONST_ROUND_SHIFT(tmp1[15]); DCT_CONST_ROUND_SHIFT(tmp1[16]); DCT_CONST_ROUND_SHIFT(tmp1[17]); DCT_CONST_ROUND_SHIFT(tmp1[18]); DCT_CONST_ROUND_SHIFT(tmp1[19]); DCT_CONST_ROUND_SHIFT(tmp1[20]); DCT_CONST_ROUND_SHIFT(tmp1[21]); DCT_CONST_ROUND_SHIFT(tmp1[22]); DCT_CONST_ROUND_SHIFT(tmp1[23]); DCT_CONST_ROUND_SHIFT(tmp1[24]); DCT_CONST_ROUND_SHIFT(tmp1[25]); DCT_CONST_ROUND_SHIFT(tmp1[26]); DCT_CONST_ROUND_SHIFT(tmp1[27]); DCT_CONST_ROUND_SHIFT(tmp1[28]); DCT_CONST_ROUND_SHIFT(tmp1[29]); DCT_CONST_ROUND_SHIFT(tmp1[30]); DCT_CONST_ROUND_SHIFT(tmp1[31]); in[0] = vec_packs(tmp1[0], tmp1[1]); in[1] = vec_packs(tmp1[2], tmp1[3]); in[2] = vec_packs(tmp1[4], tmp1[5]); in[3] = vec_packs(tmp1[6], tmp1[7]); in[4] = vec_packs(tmp1[8], tmp1[9]); in[5] = vec_packs(tmp1[10], tmp1[11]); in[6] = vec_packs(tmp1[12], tmp1[13]); in[7] = vec_packs(tmp1[14], tmp1[15]); in[8] = vec_packs(tmp1[16], tmp1[17]); in[9] = vec_packs(tmp1[18], tmp1[19]); in[10] = vec_packs(tmp1[20], tmp1[21]); in[11] = vec_packs(tmp1[22], tmp1[23]); in[12] = vec_packs(tmp1[24], tmp1[25]); in[13] = vec_packs(tmp1[26], tmp1[27]); in[14] = vec_packs(tmp1[28], tmp1[29]); in[15] = vec_packs(tmp1[30], tmp1[31]); // stage 2 tmp16_0[0] = vec_mergeh(in[8], in[9]); tmp16_0[1] = vec_mergel(in[8], in[9]); tmp16_0[2] = vec_mergeh(in[10], in[11]); tmp16_0[3] = vec_mergel(in[10], in[11]); tmp16_0[4] = vec_mergeh(in[12], in[13]); tmp16_0[5] = vec_mergel(in[12], in[13]); tmp16_0[6] = vec_mergeh(in[14], in[15]); tmp16_0[7] = vec_mergel(in[14], in[15]); tmp0[0] = vec_msum(tmp16_0[0], cospi_p04_p28, zerov); tmp0[1] = vec_msum(tmp16_0[1], cospi_p04_p28, zerov); tmp0[2] = vec_msum(tmp16_0[0], cospi_p28_m04, zerov); tmp0[3] = vec_msum(tmp16_0[1], cospi_p28_m04, zerov); tmp0[4] = vec_msum(tmp16_0[2], cospi_p20_p12, zerov); tmp0[5] = vec_msum(tmp16_0[3], cospi_p20_p12, zerov); tmp0[6] = vec_msum(tmp16_0[2], cospi_p12_m20, zerov); tmp0[7] = vec_msum(tmp16_0[3], cospi_p12_m20, zerov); tmp0[8] = vec_msum(tmp16_0[4], cospi_m28_p04, zerov); tmp0[9] = vec_msum(tmp16_0[5], cospi_m28_p04, zerov); tmp0[10] = vec_msum(tmp16_0[4], cospi_p04_p28, zerov); tmp0[11] = vec_msum(tmp16_0[5], cospi_p04_p28, zerov); tmp0[12] = vec_msum(tmp16_0[6], cospi_m12_p20, zerov); tmp0[13] = vec_msum(tmp16_0[7], cospi_m12_p20, zerov); tmp0[14] = vec_msum(tmp16_0[6], cospi_p20_p12, zerov); tmp0[15] = vec_msum(tmp16_0[7], cospi_p20_p12, zerov); tmp1[0] = vec_add(tmp0[0], tmp0[8]); tmp1[1] = vec_add(tmp0[1], tmp0[9]); tmp1[2] = vec_add(tmp0[2], tmp0[10]); tmp1[3] = vec_add(tmp0[3], tmp0[11]); tmp1[4] = vec_add(tmp0[4], tmp0[12]); tmp1[5] = vec_add(tmp0[5], tmp0[13]); tmp1[6] = vec_add(tmp0[6], tmp0[14]); tmp1[7] = vec_add(tmp0[7], tmp0[15]); tmp1[8] = vec_sub(tmp0[0], tmp0[8]); tmp1[9] = vec_sub(tmp0[1], tmp0[9]); tmp1[10] = vec_sub(tmp0[2], tmp0[10]); tmp1[11] = vec_sub(tmp0[3], tmp0[11]); tmp1[12] = vec_sub(tmp0[4], tmp0[12]); tmp1[13] = vec_sub(tmp0[5], tmp0[13]); tmp1[14] = vec_sub(tmp0[6], tmp0[14]); tmp1[15] = vec_sub(tmp0[7], tmp0[15]); DCT_CONST_ROUND_SHIFT(tmp1[0]); DCT_CONST_ROUND_SHIFT(tmp1[1]); DCT_CONST_ROUND_SHIFT(tmp1[2]); DCT_CONST_ROUND_SHIFT(tmp1[3]); DCT_CONST_ROUND_SHIFT(tmp1[4]); DCT_CONST_ROUND_SHIFT(tmp1[5]); DCT_CONST_ROUND_SHIFT(tmp1[6]); DCT_CONST_ROUND_SHIFT(tmp1[7]); DCT_CONST_ROUND_SHIFT(tmp1[8]); DCT_CONST_ROUND_SHIFT(tmp1[9]); DCT_CONST_ROUND_SHIFT(tmp1[10]); DCT_CONST_ROUND_SHIFT(tmp1[11]); DCT_CONST_ROUND_SHIFT(tmp1[12]); DCT_CONST_ROUND_SHIFT(tmp1[13]); DCT_CONST_ROUND_SHIFT(tmp1[14]); DCT_CONST_ROUND_SHIFT(tmp1[15]); tmp16_0[0] = vec_add(in[0], in[4]); tmp16_0[1] = vec_add(in[1], in[5]); tmp16_0[2] = vec_add(in[2], in[6]); tmp16_0[3] = vec_add(in[3], in[7]); tmp16_0[4] = vec_sub(in[0], in[4]); tmp16_0[5] = vec_sub(in[1], in[5]); tmp16_0[6] = vec_sub(in[2], in[6]); tmp16_0[7] = vec_sub(in[3], in[7]); tmp16_0[8] = vec_packs(tmp1[0], tmp1[1]); tmp16_0[9] = vec_packs(tmp1[2], tmp1[3]); tmp16_0[10] = vec_packs(tmp1[4], tmp1[5]); tmp16_0[11] = vec_packs(tmp1[6], tmp1[7]); tmp16_0[12] = vec_packs(tmp1[8], tmp1[9]); tmp16_0[13] = vec_packs(tmp1[10], tmp1[11]); tmp16_0[14] = vec_packs(tmp1[12], tmp1[13]); tmp16_0[15] = vec_packs(tmp1[14], tmp1[15]); // stage 3 in[0] = vec_mergeh(tmp16_0[4], tmp16_0[5]); in[1] = vec_mergel(tmp16_0[4], tmp16_0[5]); in[2] = vec_mergeh(tmp16_0[6], tmp16_0[7]); in[3] = vec_mergel(tmp16_0[6], tmp16_0[7]); in[4] = vec_mergeh(tmp16_0[12], tmp16_0[13]); in[5] = vec_mergel(tmp16_0[12], tmp16_0[13]); in[6] = vec_mergeh(tmp16_0[14], tmp16_0[15]); in[7] = vec_mergel(tmp16_0[14], tmp16_0[15]); tmp0[0] = vec_msum(in[0], cospi_p08_p24, zerov); tmp0[1] = vec_msum(in[1], cospi_p08_p24, zerov); tmp0[2] = vec_msum(in[0], cospi_p24_m08, zerov); tmp0[3] = vec_msum(in[1], cospi_p24_m08, zerov); tmp0[4] = vec_msum(in[2], cospi_m24_p08, zerov); tmp0[5] = vec_msum(in[3], cospi_m24_p08, zerov); tmp0[6] = vec_msum(in[2], cospi_p08_p24, zerov); tmp0[7] = vec_msum(in[3], cospi_p08_p24, zerov); tmp0[8] = vec_msum(in[4], cospi_p08_p24, zerov); tmp0[9] = vec_msum(in[5], cospi_p08_p24, zerov); tmp0[10] = vec_msum(in[4], cospi_p24_m08, zerov); tmp0[11] = vec_msum(in[5], cospi_p24_m08, zerov); tmp0[12] = vec_msum(in[6], cospi_m24_p08, zerov); tmp0[13] = vec_msum(in[7], cospi_m24_p08, zerov); tmp0[14] = vec_msum(in[6], cospi_p08_p24, zerov); tmp0[15] = vec_msum(in[7], cospi_p08_p24, zerov); tmp1[0] = vec_add(tmp0[0], tmp0[4]); tmp1[1] = vec_add(tmp0[1], tmp0[5]); tmp1[2] = vec_add(tmp0[2], tmp0[6]); tmp1[3] = vec_add(tmp0[3], tmp0[7]); tmp1[4] = vec_sub(tmp0[0], tmp0[4]); tmp1[5] = vec_sub(tmp0[1], tmp0[5]); tmp1[6] = vec_sub(tmp0[2], tmp0[6]); tmp1[7] = vec_sub(tmp0[3], tmp0[7]); tmp1[8] = vec_add(tmp0[8], tmp0[12]); tmp1[9] = vec_add(tmp0[9], tmp0[13]); tmp1[10] = vec_add(tmp0[10], tmp0[14]); tmp1[11] = vec_add(tmp0[11], tmp0[15]); tmp1[12] = vec_sub(tmp0[8], tmp0[12]); tmp1[13] = vec_sub(tmp0[9], tmp0[13]); tmp1[14] = vec_sub(tmp0[10], tmp0[14]); tmp1[15] = vec_sub(tmp0[11], tmp0[15]); DCT_CONST_ROUND_SHIFT(tmp1[0]); DCT_CONST_ROUND_SHIFT(tmp1[1]); DCT_CONST_ROUND_SHIFT(tmp1[2]); DCT_CONST_ROUND_SHIFT(tmp1[3]); DCT_CONST_ROUND_SHIFT(tmp1[4]); DCT_CONST_ROUND_SHIFT(tmp1[5]); DCT_CONST_ROUND_SHIFT(tmp1[6]); DCT_CONST_ROUND_SHIFT(tmp1[7]); DCT_CONST_ROUND_SHIFT(tmp1[8]); DCT_CONST_ROUND_SHIFT(tmp1[9]); DCT_CONST_ROUND_SHIFT(tmp1[10]); DCT_CONST_ROUND_SHIFT(tmp1[11]); DCT_CONST_ROUND_SHIFT(tmp1[12]); DCT_CONST_ROUND_SHIFT(tmp1[13]); DCT_CONST_ROUND_SHIFT(tmp1[14]); DCT_CONST_ROUND_SHIFT(tmp1[15]); in[0] = vec_add(tmp16_0[0], tmp16_0[2]); in[1] = vec_add(tmp16_0[1], tmp16_0[3]); in[2] = vec_sub(tmp16_0[0], tmp16_0[2]); in[3] = vec_sub(tmp16_0[1], tmp16_0[3]); in[4] = vec_packs(tmp1[0], tmp1[1]); in[5] = vec_packs(tmp1[2], tmp1[3]); in[6] = vec_packs(tmp1[4], tmp1[5]); in[7] = vec_packs(tmp1[6], tmp1[7]); in[8] = vec_add(tmp16_0[8], tmp16_0[10]); in[9] = vec_add(tmp16_0[9], tmp16_0[11]); in[10] = vec_sub(tmp16_0[8], tmp16_0[10]); in[11] = vec_sub(tmp16_0[9], tmp16_0[11]); in[12] = vec_packs(tmp1[8], tmp1[9]); in[13] = vec_packs(tmp1[10], tmp1[11]); in[14] = vec_packs(tmp1[12], tmp1[13]); in[15] = vec_packs(tmp1[14], tmp1[15]); // stage 4 out[0] = vec_mergeh(in[2], in[3]); out[1] = vec_mergel(in[2], in[3]); out[2] = vec_mergeh(in[6], in[7]); out[3] = vec_mergel(in[6], in[7]); out[4] = vec_mergeh(in[10], in[11]); out[5] = vec_mergel(in[10], in[11]); out[6] = vec_mergeh(in[14], in[15]); out[7] = vec_mergel(in[14], in[15]); } void vpx_iadst16_vsx(int16x8_t *src0, int16x8_t *src1) { int16x8_t tmp0[16], tmp1[16], tmp2[8]; int32x4_t tmp3, tmp4; int16x8_t zero16v = vec_splat_s16(0); int32x4_t zerov = vec_splat_s32(0); int16x8_t cospi_p16_m16 = vec_mergel(cospi16_v, cospi16m_v); int16x8_t cospi_m16_p16 = vec_mergel(cospi16m_v, cospi16_v); ROUND_SHIFT_INIT; TRANSPOSE8x8(src0[0], src0[2], src0[4], src0[6], src0[8], src0[10], src0[12], src0[14], tmp0[0], tmp0[1], tmp0[2], tmp0[3], tmp0[4], tmp0[5], tmp0[6], tmp0[7]); TRANSPOSE8x8(src1[0], src1[2], src1[4], src1[6], src1[8], src1[10], src1[12], src1[14], tmp1[0], tmp1[1], tmp1[2], tmp1[3], tmp1[4], tmp1[5], tmp1[6], tmp1[7]); TRANSPOSE8x8(src0[1], src0[3], src0[5], src0[7], src0[9], src0[11], src0[13], src0[15], tmp0[8], tmp0[9], tmp0[10], tmp0[11], tmp0[12], tmp0[13], tmp0[14], tmp0[15]); TRANSPOSE8x8(src1[1], src1[3], src1[5], src1[7], src1[9], src1[11], src1[13], src1[15], tmp1[8], tmp1[9], tmp1[10], tmp1[11], tmp1[12], tmp1[13], tmp1[14], tmp1[15]); iadst16x8_vsx(tmp0, tmp2); IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src0[14], cospi16m_v); IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src1[0], cospi_p16_m16); IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src0[8], cospi16_v); IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src1[6], cospi_m16_p16); IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src0[12], cospi16_v); IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src1[2], cospi_m16_p16); IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src0[10], cospi16m_v); IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src1[4], cospi_p16_m16); src0[0] = tmp0[0]; src0[2] = vec_sub(zero16v, tmp0[8]); src0[4] = tmp0[12]; src0[6] = vec_sub(zero16v, tmp0[4]); src1[8] = tmp0[5]; src1[10] = vec_sub(zero16v, tmp0[13]); src1[12] = tmp0[9]; src1[14] = vec_sub(zero16v, tmp0[1]); iadst16x8_vsx(tmp1, tmp2); IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src0[15], cospi16m_v); IADST_WRAPLOW(tmp2[0], tmp2[1], tmp3, tmp4, src1[1], cospi_p16_m16); IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src0[9], cospi16_v); IADST_WRAPLOW(tmp2[2], tmp2[3], tmp3, tmp4, src1[7], cospi_m16_p16); IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src0[13], cospi16_v); IADST_WRAPLOW(tmp2[4], tmp2[5], tmp3, tmp4, src1[3], cospi_m16_p16); IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src0[11], cospi16m_v); IADST_WRAPLOW(tmp2[6], tmp2[7], tmp3, tmp4, src1[5], cospi_p16_m16); src0[1] = tmp1[0]; src0[3] = vec_sub(zero16v, tmp1[8]); src0[5] = tmp1[12]; src0[7] = vec_sub(zero16v, tmp1[4]); src1[9] = tmp1[5]; src1[11] = vec_sub(zero16v, tmp1[13]); src1[13] = tmp1[9]; src1[15] = vec_sub(zero16v, tmp1[1]); } libvpx-1.8.2/vpx_dsp/ppc/inv_txfm_vsx.h000066400000000000000000000041221357355204000202010ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ #define VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ #include "vpx_dsp/ppc/types_vsx.h" void vpx_round_store4x4_vsx(int16x8_t *in, int16x8_t *out, uint8_t *dest, int stride); void vpx_idct4_vsx(int16x8_t *in, int16x8_t *out); void vp9_iadst4_vsx(int16x8_t *in, int16x8_t *out); void vpx_round_store8x8_vsx(int16x8_t *in, uint8_t *dest, int stride); void vpx_idct8_vsx(int16x8_t *in, int16x8_t *out); void vp9_iadst8_vsx(int16x8_t *in, int16x8_t *out); #define LOAD_INPUT16(load, source, offset, step, in) \ in[0] = load(offset, source); \ in[1] = load((step) + (offset), source); \ in[2] = load(2 * (step) + (offset), source); \ in[3] = load(3 * (step) + (offset), source); \ in[4] = load(4 * (step) + (offset), source); \ in[5] = load(5 * (step) + (offset), source); \ in[6] = load(6 * (step) + (offset), source); \ in[7] = load(7 * (step) + (offset), source); \ in[8] = load(8 * (step) + (offset), source); \ in[9] = load(9 * (step) + (offset), source); \ in[10] = load(10 * (step) + (offset), source); \ in[11] = load(11 * (step) + (offset), source); \ in[12] = load(12 * (step) + (offset), source); \ in[13] = load(13 * (step) + (offset), source); \ in[14] = load(14 * (step) + (offset), source); \ in[15] = load(15 * (step) + (offset), source); void vpx_round_store16x16_vsx(int16x8_t *src0, int16x8_t *src1, uint8_t *dest, int stride); void vpx_idct16_vsx(int16x8_t *src0, int16x8_t *src1); void vpx_iadst16_vsx(int16x8_t *src0, int16x8_t *src1); #endif // VPX_VPX_DSP_PPC_INV_TXFM_VSX_H_ libvpx-1.8.2/vpx_dsp/ppc/quantize_vsx.c000066400000000000000000000274301357355204000202110ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" // Negate 16-bit integers in a when the corresponding signed 16-bit // integer in b is negative. static INLINE int16x8_t vec_sign(int16x8_t a, int16x8_t b) { const int16x8_t mask = vec_sra(b, vec_shift_sign_s16); return vec_xor(vec_add(a, mask), mask); } // Sets the value of a 32-bit integers to 1 when the corresponding value in a is // negative. static INLINE int32x4_t vec_is_neg(int32x4_t a) { return vec_sr(a, vec_shift_sign_s32); } // Multiply the packed 16-bit integers in a and b, producing intermediate 32-bit // integers, and return the high 16 bits of the intermediate integers. // (a * b) >> 16 static INLINE int16x8_t vec_mulhi(int16x8_t a, int16x8_t b) { // madds does ((A * B) >>15) + C, we need >> 16, so we perform an extra right // shift. return vec_sra(vec_madds(a, b, vec_zeros_s16), vec_ones_u16); } // Quantization function used for 4x4, 8x8 and 16x16 blocks. static INLINE int16x8_t quantize_coeff(int16x8_t coeff, int16x8_t coeff_abs, int16x8_t round, int16x8_t quant, int16x8_t quant_shift, bool16x8_t mask) { const int16x8_t rounded = vec_vaddshs(coeff_abs, round); int16x8_t qcoeff = vec_mulhi(rounded, quant); qcoeff = vec_add(qcoeff, rounded); qcoeff = vec_mulhi(qcoeff, quant_shift); qcoeff = vec_sign(qcoeff, coeff); return vec_and(qcoeff, mask); } // Quantization function used for 32x32 blocks. static INLINE int16x8_t quantize_coeff_32(int16x8_t coeff, int16x8_t coeff_abs, int16x8_t round, int16x8_t quant, int16x8_t quant_shift, bool16x8_t mask) { const int16x8_t rounded = vec_vaddshs(coeff_abs, round); int16x8_t qcoeff = vec_mulhi(rounded, quant); qcoeff = vec_add(qcoeff, rounded); // 32x32 blocks require an extra multiplication by 2, this compensates for the // extra right shift added in vec_mulhi, as such vec_madds can be used // directly instead of vec_mulhi (((a * b) >> 15) >> 1) << 1 == (a * b >> 15) qcoeff = vec_madds(qcoeff, quant_shift, vec_zeros_s16); qcoeff = vec_sign(qcoeff, coeff); return vec_and(qcoeff, mask); } // DeQuantization function used for 32x32 blocks. Quantized coeff of 32x32 // blocks are twice as big as for other block sizes. As such, using // vec_mladd results in overflow. static INLINE int16x8_t dequantize_coeff_32(int16x8_t qcoeff, int16x8_t dequant) { int32x4_t dqcoeffe = vec_mule(qcoeff, dequant); int32x4_t dqcoeffo = vec_mulo(qcoeff, dequant); // Add 1 if negative to round towards zero because the C uses division. dqcoeffe = vec_add(dqcoeffe, vec_is_neg(dqcoeffe)); dqcoeffo = vec_add(dqcoeffo, vec_is_neg(dqcoeffo)); dqcoeffe = vec_sra(dqcoeffe, vec_ones_u32); dqcoeffo = vec_sra(dqcoeffo, vec_ones_u32); return (int16x8_t)vec_perm(dqcoeffe, dqcoeffo, vec_perm_odd_even_pack); } static INLINE int16x8_t nonzero_scanindex(int16x8_t qcoeff, bool16x8_t mask, const int16_t *iscan_ptr, int index) { int16x8_t scan = vec_vsx_ld(index, iscan_ptr); bool16x8_t zero_coeff = vec_cmpeq(qcoeff, vec_zeros_s16); scan = vec_sub(scan, mask); return vec_andc(scan, zero_coeff); } // Compare packed 16-bit integers across a, and return the maximum value in // every element. Returns a vector containing the biggest value across vector a. static INLINE int16x8_t vec_max_across(int16x8_t a) { a = vec_max(a, vec_perm(a, a, vec_perm64)); a = vec_max(a, vec_perm(a, a, vec_perm32)); return vec_max(a, vec_perm(a, a, vec_perm16)); } void vpx_quantize_b_vsx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) { int16x8_t qcoeff0, qcoeff1, dqcoeff0, dqcoeff1, eob; bool16x8_t zero_mask0, zero_mask1; // First set of 8 coeff starts with DC + 7 AC int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); int16x8_t round = vec_vsx_ld(0, round_ptr); int16x8_t quant = vec_vsx_ld(0, quant_ptr); int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); int16x8_t coeff0_abs = vec_abs(coeff0); int16x8_t coeff1_abs = vec_abs(coeff1); zero_mask0 = vec_cmpge(coeff0_abs, zbin); zbin = vec_splat(zbin, 1); zero_mask1 = vec_cmpge(coeff1_abs, zbin); (void)scan_ptr; (void)skip_block; assert(!skip_block); qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); vec_vsx_st(qcoeff0, 0, qcoeff_ptr); round = vec_splat(round, 1); quant = vec_splat(quant, 1); quant_shift = vec_splat(quant_shift, 1); qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); vec_vsx_st(qcoeff1, 16, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff0, 0, dqcoeff_ptr); dequant = vec_splat(dequant, 1); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff1, 16, dqcoeff_ptr); eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); if (n_coeffs > 16) { int index = 16; int off0 = 32; int off1 = 48; int off2 = 64; do { int16x8_t coeff2, coeff2_abs, qcoeff2, dqcoeff2, eob2; bool16x8_t zero_mask2; coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); coeff0_abs = vec_abs(coeff0); coeff1_abs = vec_abs(coeff1); coeff2_abs = vec_abs(coeff2); zero_mask0 = vec_cmpge(coeff0_abs, zbin); zero_mask1 = vec_cmpge(coeff1_abs, zbin); zero_mask2 = vec_cmpge(coeff2_abs, zbin); qcoeff0 = quantize_coeff(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); qcoeff1 = quantize_coeff(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); qcoeff2 = quantize_coeff(coeff2, coeff2_abs, round, quant, quant_shift, zero_mask2); vec_vsx_st(qcoeff0, off0, qcoeff_ptr); vec_vsx_st(qcoeff1, off1, qcoeff_ptr); vec_vsx_st(qcoeff2, off2, qcoeff_ptr); dqcoeff0 = vec_mladd(qcoeff0, dequant, vec_zeros_s16); dqcoeff1 = vec_mladd(qcoeff1, dequant, vec_zeros_s16); dqcoeff2 = vec_mladd(qcoeff2, dequant, vec_zeros_s16); vec_vsx_st(dqcoeff0, off0, dqcoeff_ptr); vec_vsx_st(dqcoeff1, off1, dqcoeff_ptr); vec_vsx_st(dqcoeff2, off2, dqcoeff_ptr); eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); eob = vec_max(eob, eob2); index += 24; off0 += 48; off1 += 48; off2 += 48; } while (index < n_coeffs); } eob = vec_max_across(eob); *eob_ptr = eob[0]; } void vpx_quantize_b_32x32_vsx( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan_ptr, const int16_t *iscan_ptr) { // In stage 1, we quantize 16 coeffs (DC + 15 AC) // In stage 2, we loop 42 times and quantize 24 coeffs per iteration // (32 * 32 - 16) / 24 = 42 int num_itr = 42; // Offsets are in bytes, 16 coeffs = 32 bytes int off0 = 32; int off1 = 48; int off2 = 64; int16x8_t qcoeff0, qcoeff1, eob; bool16x8_t zero_mask0, zero_mask1; int16x8_t zbin = vec_vsx_ld(0, zbin_ptr); int16x8_t round = vec_vsx_ld(0, round_ptr); int16x8_t quant = vec_vsx_ld(0, quant_ptr); int16x8_t dequant = vec_vsx_ld(0, dequant_ptr); int16x8_t quant_shift = vec_vsx_ld(0, quant_shift_ptr); int16x8_t coeff0 = vec_vsx_ld(0, coeff_ptr); int16x8_t coeff1 = vec_vsx_ld(16, coeff_ptr); int16x8_t coeff0_abs = vec_abs(coeff0); int16x8_t coeff1_abs = vec_abs(coeff1); (void)scan_ptr; (void)skip_block; (void)n_coeffs; assert(!skip_block); // 32x32 quantization requires that zbin and round be divided by 2 zbin = vec_sra(vec_add(zbin, vec_ones_s16), vec_ones_u16); round = vec_sra(vec_add(round, vec_ones_s16), vec_ones_u16); zero_mask0 = vec_cmpge(coeff0_abs, zbin); zbin = vec_splat(zbin, 1); // remove DC from zbin zero_mask1 = vec_cmpge(coeff1_abs, zbin); qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); round = vec_splat(round, 1); // remove DC from round quant = vec_splat(quant, 1); // remove DC from quant quant_shift = vec_splat(quant_shift, 1); // remove DC from quant_shift qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); vec_vsx_st(qcoeff0, 0, qcoeff_ptr); vec_vsx_st(qcoeff1, 16, qcoeff_ptr); vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), 0, dqcoeff_ptr); dequant = vec_splat(dequant, 1); // remove DC from dequant vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), 16, dqcoeff_ptr); eob = vec_max(nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, 0), nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, 16)); do { int16x8_t coeff2, coeff2_abs, qcoeff2, eob2; bool16x8_t zero_mask2; coeff0 = vec_vsx_ld(off0, coeff_ptr); coeff1 = vec_vsx_ld(off1, coeff_ptr); coeff2 = vec_vsx_ld(off2, coeff_ptr); coeff0_abs = vec_abs(coeff0); coeff1_abs = vec_abs(coeff1); coeff2_abs = vec_abs(coeff2); zero_mask0 = vec_cmpge(coeff0_abs, zbin); zero_mask1 = vec_cmpge(coeff1_abs, zbin); zero_mask2 = vec_cmpge(coeff2_abs, zbin); qcoeff0 = quantize_coeff_32(coeff0, coeff0_abs, round, quant, quant_shift, zero_mask0); qcoeff1 = quantize_coeff_32(coeff1, coeff1_abs, round, quant, quant_shift, zero_mask1); qcoeff2 = quantize_coeff_32(coeff2, coeff2_abs, round, quant, quant_shift, zero_mask2); vec_vsx_st(qcoeff0, off0, qcoeff_ptr); vec_vsx_st(qcoeff1, off1, qcoeff_ptr); vec_vsx_st(qcoeff2, off2, qcoeff_ptr); vec_vsx_st(dequantize_coeff_32(qcoeff0, dequant), off0, dqcoeff_ptr); vec_vsx_st(dequantize_coeff_32(qcoeff1, dequant), off1, dqcoeff_ptr); vec_vsx_st(dequantize_coeff_32(qcoeff2, dequant), off2, dqcoeff_ptr); eob = vec_max(eob, nonzero_scanindex(qcoeff0, zero_mask0, iscan_ptr, off0)); eob2 = vec_max(nonzero_scanindex(qcoeff1, zero_mask1, iscan_ptr, off1), nonzero_scanindex(qcoeff2, zero_mask2, iscan_ptr, off2)); eob = vec_max(eob, eob2); // 24 int16_t is 48 bytes off0 += 48; off1 += 48; off2 += 48; num_itr--; } while (num_itr != 0); eob = vec_max_across(eob); *eob_ptr = eob[0]; } libvpx-1.8.2/vpx_dsp/ppc/sad_vsx.c000066400000000000000000000347401357355204000171220ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #define PROCESS16(offset) \ v_a = vec_vsx_ld(offset, a); \ v_b = vec_vsx_ld(offset, b); \ v_abs = vec_absd(v_a, v_b); \ v_sad = vec_sum4s(v_abs, v_sad); #define SAD8(height) \ unsigned int vpx_sad8x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ int y = 0; \ uint8x16_t v_a, v_b, v_abs; \ uint32x4_t v_sad = vec_zeros_u32; \ \ do { \ PROCESS16(0) \ \ a += a_stride; \ b += b_stride; \ y++; \ } while (y < height); \ \ return v_sad[1] + v_sad[0]; \ } #define SAD16(height) \ unsigned int vpx_sad16x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ int y = 0; \ uint8x16_t v_a, v_b, v_abs; \ uint32x4_t v_sad = vec_zeros_u32; \ \ do { \ PROCESS16(0); \ \ a += a_stride; \ b += b_stride; \ y++; \ } while (y < height); \ \ return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } #define SAD32(height) \ unsigned int vpx_sad32x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ int y = 0; \ uint8x16_t v_a, v_b, v_abs; \ uint32x4_t v_sad = vec_zeros_u32; \ \ do { \ PROCESS16(0); \ PROCESS16(16); \ \ a += a_stride; \ b += b_stride; \ y++; \ } while (y < height); \ \ return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } #define SAD64(height) \ unsigned int vpx_sad64x##height##_vsx(const uint8_t *a, int a_stride, \ const uint8_t *b, int b_stride) { \ int y = 0; \ uint8x16_t v_a, v_b, v_abs; \ uint32x4_t v_sad = vec_zeros_u32; \ \ do { \ PROCESS16(0); \ PROCESS16(16); \ PROCESS16(32); \ PROCESS16(48); \ \ a += a_stride; \ b += b_stride; \ y++; \ } while (y < height); \ \ return v_sad[3] + v_sad[2] + v_sad[1] + v_sad[0]; \ } SAD8(4); SAD8(8); SAD8(16); SAD16(8); SAD16(16); SAD16(32); SAD32(16); SAD32(32); SAD32(64); SAD64(32); SAD64(64); #define SAD16AVG(height) \ unsigned int vpx_sad16x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ DECLARE_ALIGNED(16, uint8_t, comp_pred[16 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 16, height, ref, \ ref_stride); \ \ return vpx_sad16x##height##_vsx(src, src_stride, comp_pred, 16); \ } #define SAD32AVG(height) \ unsigned int vpx_sad32x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ DECLARE_ALIGNED(32, uint8_t, comp_pred[32 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 32, height, ref, \ ref_stride); \ \ return vpx_sad32x##height##_vsx(src, src_stride, comp_pred, 32); \ } #define SAD64AVG(height) \ unsigned int vpx_sad64x##height##_avg_vsx( \ const uint8_t *src, int src_stride, const uint8_t *ref, int ref_stride, \ const uint8_t *second_pred) { \ DECLARE_ALIGNED(64, uint8_t, comp_pred[64 * (height)]); \ vpx_comp_avg_pred_vsx(comp_pred, second_pred, 64, height, ref, \ ref_stride); \ return vpx_sad64x##height##_vsx(src, src_stride, comp_pred, 64); \ } SAD16AVG(8); SAD16AVG(16); SAD16AVG(32); SAD32AVG(16); SAD32AVG(32); SAD32AVG(64); SAD64AVG(32); SAD64AVG(64); #define PROCESS16_4D(offset, ref, v_h, v_l) \ v_b = vec_vsx_ld(offset, ref); \ v_bh = unpack_to_s16_h(v_b); \ v_bl = unpack_to_s16_l(v_b); \ v_subh = vec_sub(v_h, v_bh); \ v_subl = vec_sub(v_l, v_bl); \ v_absh = vec_abs(v_subh); \ v_absl = vec_abs(v_subl); \ v_sad = vec_sum4s(v_absh, v_sad); \ v_sad = vec_sum4s(v_absl, v_sad); #define UNPACK_SRC(offset, srcv_h, srcv_l) \ v_a = vec_vsx_ld(offset, src); \ srcv_h = unpack_to_s16_h(v_a); \ srcv_l = unpack_to_s16_l(v_a); #define SAD16_4D(height) \ void vpx_sad16x##height##x4d_vsx(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ int y; \ unsigned int sad[4]; \ uint8x16_t v_a, v_b; \ int16x8_t v_ah, v_al, v_bh, v_bl, v_absh, v_absl, v_subh, v_subl; \ \ for (i = 0; i < 4; i++) sad_array[i] = 0; \ \ for (y = 0; y < height; y++) { \ UNPACK_SRC(y *src_stride, v_ah, v_al); \ for (i = 0; i < 4; i++) { \ int32x4_t v_sad = vec_splat_s32(0); \ PROCESS16_4D(y *ref_stride, ref_array[i], v_ah, v_al); \ \ vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ } \ } \ } #define SAD32_4D(height) \ void vpx_sad32x##height##x4d_vsx(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ int y; \ unsigned int sad[4]; \ uint8x16_t v_a, v_b; \ int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ int16x8_t v_absh, v_absl, v_subh, v_subl; \ \ for (i = 0; i < 4; i++) sad_array[i] = 0; \ \ for (y = 0; y < height; y++) { \ UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ for (i = 0; i < 4; i++) { \ int32x4_t v_sad = vec_splat_s32(0); \ PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ \ vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ } \ } \ } #define SAD64_4D(height) \ void vpx_sad64x##height##x4d_vsx(const uint8_t *src, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ int y; \ unsigned int sad[4]; \ uint8x16_t v_a, v_b; \ int16x8_t v_ah1, v_al1, v_ah2, v_al2, v_bh, v_bl; \ int16x8_t v_ah3, v_al3, v_ah4, v_al4; \ int16x8_t v_absh, v_absl, v_subh, v_subl; \ \ for (i = 0; i < 4; i++) sad_array[i] = 0; \ \ for (y = 0; y < height; y++) { \ UNPACK_SRC(y *src_stride, v_ah1, v_al1); \ UNPACK_SRC(y *src_stride + 16, v_ah2, v_al2); \ UNPACK_SRC(y *src_stride + 32, v_ah3, v_al3); \ UNPACK_SRC(y *src_stride + 48, v_ah4, v_al4); \ for (i = 0; i < 4; i++) { \ int32x4_t v_sad = vec_splat_s32(0); \ PROCESS16_4D(y *ref_stride, ref_array[i], v_ah1, v_al1); \ PROCESS16_4D(y *ref_stride + 16, ref_array[i], v_ah2, v_al2); \ PROCESS16_4D(y *ref_stride + 32, ref_array[i], v_ah3, v_al3); \ PROCESS16_4D(y *ref_stride + 48, ref_array[i], v_ah4, v_al4); \ \ vec_vsx_st((uint32x4_t)v_sad, 0, sad); \ sad_array[i] += (sad[3] + sad[2] + sad[1] + sad[0]); \ } \ } \ } SAD16_4D(8); SAD16_4D(16); SAD16_4D(32); SAD32_4D(16); SAD32_4D(32); SAD32_4D(64); SAD64_4D(32); SAD64_4D(64); libvpx-1.8.2/vpx_dsp/ppc/subtract_vsx.c000066400000000000000000000101511357355204000201700ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/ppc/types_vsx.h" static VPX_FORCE_INLINE void subtract_block4x4( int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { int16_t *diff1 = diff + 2 * diff_stride; const uint8_t *src1 = src + 2 * src_stride; const uint8_t *pred1 = pred + 2 * pred_stride; const int16x8_t d0 = vec_vsx_ld(0, diff); const int16x8_t d1 = vec_vsx_ld(0, diff + diff_stride); const int16x8_t d2 = vec_vsx_ld(0, diff1); const int16x8_t d3 = vec_vsx_ld(0, diff1 + diff_stride); const uint8x16_t s0 = read4x2(src, (int)src_stride); const uint8x16_t p0 = read4x2(pred, (int)pred_stride); const uint8x16_t s1 = read4x2(src1, (int)src_stride); const uint8x16_t p1 = read4x2(pred1, (int)pred_stride); const int16x8_t da = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); const int16x8_t db = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1)); vec_vsx_st(xxpermdi(da, d0, 1), 0, diff); vec_vsx_st(xxpermdi(da, d1, 3), 0, diff + diff_stride); vec_vsx_st(xxpermdi(db, d2, 1), 0, diff1); vec_vsx_st(xxpermdi(db, d3, 3), 0, diff1 + diff_stride); } void vpx_subtract_block_vsx(int rows, int cols, int16_t *diff, ptrdiff_t diff_stride, const uint8_t *src, ptrdiff_t src_stride, const uint8_t *pred, ptrdiff_t pred_stride) { int r = rows, c; switch (cols) { case 64: case 32: do { for (c = 0; c < cols; c += 32) { const uint8x16_t s0 = vec_vsx_ld(0, src + c); const uint8x16_t s1 = vec_vsx_ld(16, src + c); const uint8x16_t p0 = vec_vsx_ld(0, pred + c); const uint8x16_t p1 = vec_vsx_ld(16, pred + c); const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0)); const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); const int16x8_t d1l = vec_sub(unpack_to_s16_l(s1), unpack_to_s16_l(p1)); const int16x8_t d1h = vec_sub(unpack_to_s16_h(s1), unpack_to_s16_h(p1)); vec_vsx_st(d0h, 0, diff + c); vec_vsx_st(d0l, 16, diff + c); vec_vsx_st(d1h, 0, diff + c + 16); vec_vsx_st(d1l, 16, diff + c + 16); } diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); break; case 16: do { const uint8x16_t s0 = vec_vsx_ld(0, src); const uint8x16_t p0 = vec_vsx_ld(0, pred); const int16x8_t d0l = vec_sub(unpack_to_s16_l(s0), unpack_to_s16_l(p0)); const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); vec_vsx_st(d0h, 0, diff); vec_vsx_st(d0l, 16, diff); diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); break; case 8: do { const uint8x16_t s0 = vec_vsx_ld(0, src); const uint8x16_t p0 = vec_vsx_ld(0, pred); const int16x8_t d0h = vec_sub(unpack_to_s16_h(s0), unpack_to_s16_h(p0)); vec_vsx_st(d0h, 0, diff); diff += diff_stride; pred += pred_stride; src += src_stride; } while (--r); break; case 4: subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride); if (r > 4) { diff += 4 * diff_stride; pred += 4 * pred_stride; src += 4 * src_stride; subtract_block4x4(diff, diff_stride, src, src_stride, pred, pred_stride); } break; default: assert(0); // unreachable } } libvpx-1.8.2/vpx_dsp/ppc/transpose_vsx.h000066400000000000000000000102711357355204000203670ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ #define VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ #include "./vpx_config.h" #include "vpx_dsp/ppc/types_vsx.h" static INLINE void vpx_transpose_s16_8x8(int16x8_t v[8]) { // d = vec_mergeh(a,b): // The even elements of the result are obtained left-to-right, // from the high elements of a. // The odd elements of the result are obtained left-to-right, // from the high elements of b. // // d = vec_mergel(a,b): // The even elements of the result are obtained left-to-right, // from the low elements of a. // The odd elements of the result are obtained left-to-right, // from the low elements of b. // Example, starting with: // v[0]: 00 01 02 03 04 05 06 07 // v[1]: 10 11 12 13 14 15 16 17 // v[2]: 20 21 22 23 24 25 26 27 // v[3]: 30 31 32 33 34 35 36 37 // v[4]: 40 41 42 43 44 45 46 47 // v[5]: 50 51 52 53 54 55 56 57 // v[6]: 60 61 62 63 64 65 66 67 // v[7]: 70 71 72 73 74 75 76 77 int16x8_t b0, b1, b2, b3, b4, b5, b6, b7; int16x8_t c0, c1, c2, c3, c4, c5, c6, c7; b0 = vec_mergeh(v[0], v[4]); b1 = vec_mergel(v[0], v[4]); b2 = vec_mergeh(v[1], v[5]); b3 = vec_mergel(v[1], v[5]); b4 = vec_mergeh(v[2], v[6]); b5 = vec_mergel(v[2], v[6]); b6 = vec_mergeh(v[3], v[7]); b7 = vec_mergel(v[3], v[7]); // After first merge operation // b0: 00 40 01 41 02 42 03 43 // b1: 04 44 05 45 06 46 07 47 // b2: 10 50 11 51 12 52 13 53 // b3: 14 54 15 55 16 56 17 57 // b4: 20 60 21 61 22 62 23 63 // b5: 24 64 25 65 26 66 27 67 // b6: 30 70 31 71 32 62 33 73 // b7: 34 74 35 75 36 76 37 77 c0 = vec_mergeh(b0, b4); c1 = vec_mergel(b0, b4); c2 = vec_mergeh(b1, b5); c3 = vec_mergel(b1, b5); c4 = vec_mergeh(b2, b6); c5 = vec_mergel(b2, b6); c6 = vec_mergeh(b3, b7); c7 = vec_mergel(b3, b7); // After second merge operation // c0: 00 20 40 60 01 21 41 61 // c1: 02 22 42 62 03 23 43 63 // c2: 04 24 44 64 05 25 45 65 // c3: 06 26 46 66 07 27 47 67 // c4: 10 30 50 70 11 31 51 71 // c5: 12 32 52 72 13 33 53 73 // c6: 14 34 54 74 15 35 55 75 // c7: 16 36 56 76 17 37 57 77 v[0] = vec_mergeh(c0, c4); v[1] = vec_mergel(c0, c4); v[2] = vec_mergeh(c1, c5); v[3] = vec_mergel(c1, c5); v[4] = vec_mergeh(c2, c6); v[5] = vec_mergel(c2, c6); v[6] = vec_mergeh(c3, c7); v[7] = vec_mergel(c3, c7); // After last merge operation // v[0]: 00 10 20 30 40 50 60 70 // v[1]: 01 11 21 31 41 51 61 71 // v[2]: 02 12 22 32 42 52 62 72 // v[3]: 03 13 23 33 43 53 63 73 // v[4]: 04 14 24 34 44 54 64 74 // v[5]: 05 15 25 35 45 55 65 75 // v[6]: 06 16 26 36 46 56 66 76 // v[7]: 07 17 27 37 47 57 67 77 } static INLINE void transpose_8x8(const int16x8_t *a, int16x8_t *b) { // Stage 1 const int16x8_t s1_0 = vec_mergeh(a[0], a[4]); const int16x8_t s1_1 = vec_mergel(a[0], a[4]); const int16x8_t s1_2 = vec_mergeh(a[1], a[5]); const int16x8_t s1_3 = vec_mergel(a[1], a[5]); const int16x8_t s1_4 = vec_mergeh(a[2], a[6]); const int16x8_t s1_5 = vec_mergel(a[2], a[6]); const int16x8_t s1_6 = vec_mergeh(a[3], a[7]); const int16x8_t s1_7 = vec_mergel(a[3], a[7]); // Stage 2 const int16x8_t s2_0 = vec_mergeh(s1_0, s1_4); const int16x8_t s2_1 = vec_mergel(s1_0, s1_4); const int16x8_t s2_2 = vec_mergeh(s1_1, s1_5); const int16x8_t s2_3 = vec_mergel(s1_1, s1_5); const int16x8_t s2_4 = vec_mergeh(s1_2, s1_6); const int16x8_t s2_5 = vec_mergel(s1_2, s1_6); const int16x8_t s2_6 = vec_mergeh(s1_3, s1_7); const int16x8_t s2_7 = vec_mergel(s1_3, s1_7); // Stage 2 b[0] = vec_mergeh(s2_0, s2_4); b[1] = vec_mergel(s2_0, s2_4); b[2] = vec_mergeh(s2_1, s2_5); b[3] = vec_mergel(s2_1, s2_5); b[4] = vec_mergeh(s2_2, s2_6); b[5] = vec_mergel(s2_2, s2_6); b[6] = vec_mergeh(s2_3, s2_7); b[7] = vec_mergel(s2_3, s2_7); } #endif // VPX_VPX_DSP_PPC_TRANSPOSE_VSX_H_ libvpx-1.8.2/vpx_dsp/ppc/txfm_common_vsx.h000066400000000000000000000120221357355204000206730ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ #define VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ #include "vpx_dsp/ppc/types_vsx.h" static const int32x4_t vec_dct_const_rounding = { 8192, 8192, 8192, 8192 }; static const uint32x4_t vec_dct_const_bits = { 14, 14, 14, 14 }; static const uint16x8_t vec_dct_scale_log2 = { 2, 2, 2, 2, 2, 2, 2, 2 }; static const int16x8_t cospi1_v = { 16364, 16364, 16364, 16364, 16364, 16364, 16364, 16364 }; static const int16x8_t cospi2_v = { 16305, 16305, 16305, 16305, 16305, 16305, 16305, 16305 }; static const int16x8_t cospi3_v = { 16207, 16207, 16207, 16207, 16207, 16207, 16207, 16207 }; static const int16x8_t cospi4_v = { 16069, 16069, 16069, 16069, 16069, 16069, 16069, 16069 }; static const int16x8_t cospi4m_v = { -16069, -16069, -16069, -16069, -16069, -16069, -16069, -16069 }; static const int16x8_t cospi5_v = { 15893, 15893, 15893, 15893, 15893, 15893, 15893, 15893 }; static const int16x8_t cospi6_v = { 15679, 15679, 15679, 15679, 15679, 15679, 15679, 15679 }; static const int16x8_t cospi7_v = { 15426, 15426, 15426, 15426, 15426, 15426, 15426, 15426 }; static const int16x8_t cospi8_v = { 15137, 15137, 15137, 15137, 15137, 15137, 15137, 15137 }; static const int16x8_t cospi8m_v = { -15137, -15137, -15137, -15137, -15137, -15137, -15137, -15137 }; static const int16x8_t cospi9_v = { 14811, 14811, 14811, 14811, 14811, 14811, 14811, 14811 }; static const int16x8_t cospi10_v = { 14449, 14449, 14449, 14449, 14449, 14449, 14449, 14449 }; static const int16x8_t cospi11_v = { 14053, 14053, 14053, 14053, 14053, 14053, 14053, 14053 }; static const int16x8_t cospi12_v = { 13623, 13623, 13623, 13623, 13623, 13623, 13623, 13623 }; static const int16x8_t cospi13_v = { 13160, 13160, 13160, 13160, 13160, 13160, 13160, 13160 }; static const int16x8_t cospi14_v = { 12665, 12665, 12665, 12665, 12665, 12665, 12665, 12665 }; static const int16x8_t cospi15_v = { 12140, 12140, 12140, 12140, 12140, 12140, 12140, 12140 }; static const int16x8_t cospi16_v = { 11585, 11585, 11585, 11585, 11585, 11585, 11585, 11585 }; static const int16x8_t cospi17_v = { 11003, 11003, 11003, 11003, 11003, 11003, 11003, 11003 }; static const int16x8_t cospi18_v = { 10394, 10394, 10394, 10394, 10394, 10394, 10394, 10394 }; static const int16x8_t cospi19_v = { 9760, 9760, 9760, 9760, 9760, 9760, 9760, 9760 }; static const int16x8_t cospi20_v = { 9102, 9102, 9102, 9102, 9102, 9102, 9102, 9102 }; static const int16x8_t cospi20m_v = { -9102, -9102, -9102, -9102, -9102, -9102, -9102, -9102 }; static const int16x8_t cospi21_v = { 8423, 8423, 8423, 8423, 8423, 8423, 8423, 8423 }; static const int16x8_t cospi22_v = { 7723, 7723, 7723, 7723, 7723, 7723, 7723, 7723 }; static const int16x8_t cospi23_v = { 7005, 7005, 7005, 7005, 7005, 7005, 7005, 7005 }; static const int16x8_t cospi24_v = { 6270, 6270, 6270, 6270, 6270, 6270, 6270, 6270 }; static const int16x8_t cospi25_v = { 5520, 5520, 5520, 5520, 5520, 5520, 5520, 5520 }; static const int16x8_t cospi26_v = { 4756, 4756, 4756, 4756, 4756, 4756, 4756, 4756 }; static const int16x8_t cospi27_v = { 3981, 3981, 3981, 3981, 3981, 3981, 3981, 3981 }; static const int16x8_t cospi28_v = { 3196, 3196, 3196, 3196, 3196, 3196, 3196, 3196 }; static const int16x8_t cospi29_v = { 2404, 2404, 2404, 2404, 2404, 2404, 2404, 2404 }; static const int16x8_t cospi30_v = { 1606, 1606, 1606, 1606, 1606, 1606, 1606, 1606 }; static const int16x8_t cospi31_v = { 804, 804, 804, 804, 804, 804, 804, 804 }; #endif // VPX_VPX_DSP_PPC_TXFM_COMMON_VSX_H_ libvpx-1.8.2/vpx_dsp/ppc/types_vsx.h000066400000000000000000000115171357355204000175210ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PPC_TYPES_VSX_H_ #define VPX_VPX_DSP_PPC_TYPES_VSX_H_ #include typedef vector signed char int8x16_t; typedef vector unsigned char uint8x16_t; typedef vector signed short int16x8_t; typedef vector unsigned short uint16x8_t; typedef vector signed int int32x4_t; typedef vector unsigned int uint32x4_t; typedef vector bool char bool8x16_t; typedef vector bool short bool16x8_t; typedef vector bool int bool32x4_t; #if defined(__clang__) && __clang_major__ < 6 static const uint8x16_t xxpermdi0_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; static const uint8x16_t xxpermdi1_perm = { 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; static const uint8x16_t xxpermdi2_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 }; static const uint8x16_t xxpermdi3_perm = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F }; #define xxpermdi(a, b, c) vec_perm(a, b, xxpermdi##c##_perm) #elif defined(__GNUC__) && \ (__GNUC__ > 6 || (__GNUC__ == 6 && __GNUC_MINOR__ >= 3)) #define xxpermdi(a, b, c) vec_xxpermdi(a, b, c) #endif #ifdef WORDS_BIGENDIAN #define unpack_to_u16_h(v) \ (uint16x8_t) vec_mergeh(vec_splat_u8(0), (uint8x16_t)v) #define unpack_to_u16_l(v) \ (uint16x8_t) vec_mergel(vec_splat_u8(0), (uint8x16_t)v) #define unpack_to_s16_h(v) \ (int16x8_t) vec_mergeh(vec_splat_u8(0), (uint8x16_t)v) #define unpack_to_s16_l(v) \ (int16x8_t) vec_mergel(vec_splat_u8(0), (uint8x16_t)v) #ifndef xxpermdi #define xxpermdi(a, b, c) vec_xxpermdi(a, b, c) #endif #else #define unpack_to_u16_h(v) \ (uint16x8_t) vec_mergeh((uint8x16_t)v, vec_splat_u8(0)) #define unpack_to_u16_l(v) \ (uint16x8_t) vec_mergel((uint8x16_t)v, vec_splat_u8(0)) #define unpack_to_s16_h(v) \ (int16x8_t) vec_mergeh((uint8x16_t)v, vec_splat_u8(0)) #define unpack_to_s16_l(v) \ (int16x8_t) vec_mergel((uint8x16_t)v, vec_splat_u8(0)) #ifndef xxpermdi #define xxpermdi(a, b, c) vec_xxpermdi(b, a, (((c) >> 1) | ((c)&1) << 1) ^ 3) #endif #endif static INLINE uint8x16_t read4x2(const uint8_t *a, int stride) { const uint32x4_t a0 = (uint32x4_t)vec_vsx_ld(0, a); const uint32x4_t a1 = (uint32x4_t)vec_vsx_ld(0, a + stride); return (uint8x16_t)vec_mergeh(a0, a1); } #ifndef __POWER9_VECTOR__ #define vec_absd(a, b) vec_sub(vec_max(a, b), vec_min(a, b)) #endif static const uint8x16_t vec_zeros_u8 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static const int16x8_t vec_zeros_s16 = { 0, 0, 0, 0, 0, 0, 0, 0 }; static const int16x8_t vec_ones_s16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; static const int16x8_t vec_twos_s16 = { 2, 2, 2, 2, 2, 2, 2, 2 }; static const uint16x8_t vec_ones_u16 = { 1, 1, 1, 1, 1, 1, 1, 1 }; static const uint32x4_t vec_ones_u32 = { 1, 1, 1, 1 }; static const int32x4_t vec_zeros_s32 = { 0, 0, 0, 0 }; static const uint32x4_t vec_zeros_u32 = { 0, 0, 0, 0 }; static const uint16x8_t vec_shift_sign_s16 = { 15, 15, 15, 15, 15, 15, 15, 15 }; static const uint32x4_t vec_shift_sign_s32 = { 31, 31, 31, 31 }; static const uint8x16_t vec_perm64 = { 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 }; static const uint8x16_t vec_perm32 = { 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x00, 0x01, 0x02, 0x03 }; static const uint8x16_t vec_perm16 = { 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0E, 0x0D, 0x0E, 0x0F, 0x00, 0x01 }; static const uint8x16_t vec_perm_odd_even_pack = { 0x00, 0x01, 0x10, 0x11, 0x04, 0x05, 0x14, 0x15, 0x08, 0x09, 0x18, 0x19, 0x0C, 0x0D, 0x1C, 0x1D }; #endif // VPX_VPX_DSP_PPC_TYPES_VSX_H_ libvpx-1.8.2/vpx_dsp/ppc/variance_vsx.c000066400000000000000000000216611357355204000201410ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ppc/types_vsx.h" uint32_t vpx_get4x4sse_cs_vsx(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride) { int distortion; const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride)); const int16x8_t a1 = unpack_to_s16_h(read4x2(src_ptr + src_stride * 2, src_stride)); const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride)); const int16x8_t b1 = unpack_to_s16_h(read4x2(ref_ptr + ref_stride * 2, ref_stride)); const int16x8_t d0 = vec_sub(a0, b0); const int16x8_t d1 = vec_sub(a1, b1); const int32x4_t ds = vec_msum(d1, d1, vec_msum(d0, d0, vec_splat_s32(0))); const int32x4_t d = vec_splat(vec_sums(ds, vec_splat_s32(0)), 3); vec_ste(d, 0, &distortion); return distortion; } // TODO(lu_zero): Unroll uint32_t vpx_get_mb_ss_vsx(const int16_t *src_ptr) { unsigned int i, sum = 0; int32x4_t s = vec_splat_s32(0); for (i = 0; i < 256; i += 8) { const int16x8_t v = vec_vsx_ld(0, src_ptr + i); s = vec_msum(v, v, s); } s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); vec_ste((uint32x4_t)s, 0, &sum); return sum; } void vpx_comp_avg_pred_vsx(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; /* comp_pred and pred must be 16 byte aligned. */ assert(((intptr_t)comp_pred & 0xf) == 0); assert(((intptr_t)pred & 0xf) == 0); if (width >= 16) { for (i = 0; i < height; ++i) { for (j = 0; j < width; j += 16) { const uint8x16_t v = vec_avg(vec_vsx_ld(j, pred), vec_vsx_ld(j, ref)); vec_vsx_st(v, j, comp_pred); } comp_pred += width; pred += width; ref += ref_stride; } } else if (width == 8) { // Process 2 lines at time for (i = 0; i < height / 2; ++i) { const uint8x16_t r0 = vec_vsx_ld(0, ref); const uint8x16_t r1 = vec_vsx_ld(0, ref + ref_stride); const uint8x16_t r = xxpermdi(r0, r1, 0); const uint8x16_t v = vec_avg(vec_vsx_ld(0, pred), r); vec_vsx_st(v, 0, comp_pred); comp_pred += 16; // width * 2; pred += 16; // width * 2; ref += ref_stride * 2; } } else { assert(width == 4); // process 4 lines at time for (i = 0; i < height / 4; ++i) { const uint32x4_t r0 = (uint32x4_t)vec_vsx_ld(0, ref); const uint32x4_t r1 = (uint32x4_t)vec_vsx_ld(0, ref + ref_stride); const uint32x4_t r2 = (uint32x4_t)vec_vsx_ld(0, ref + ref_stride * 2); const uint32x4_t r3 = (uint32x4_t)vec_vsx_ld(0, ref + ref_stride * 3); const uint8x16_t r = (uint8x16_t)xxpermdi(vec_mergeh(r0, r1), vec_mergeh(r2, r3), 0); const uint8x16_t v = vec_avg(vec_vsx_ld(0, pred), r); vec_vsx_st(v, 0, comp_pred); comp_pred += 16; // width * 4; pred += 16; // width * 4; ref += ref_stride * 4; } } } static INLINE void variance_inner_32(const uint8_t *src_ptr, const uint8_t *ref_ptr, int32x4_t *sum_squared, int32x4_t *sum) { int32x4_t s = *sum; int32x4_t ss = *sum_squared; const uint8x16_t va0 = vec_vsx_ld(0, src_ptr); const uint8x16_t vb0 = vec_vsx_ld(0, ref_ptr); const uint8x16_t va1 = vec_vsx_ld(16, src_ptr); const uint8x16_t vb1 = vec_vsx_ld(16, ref_ptr); const int16x8_t a0 = unpack_to_s16_h(va0); const int16x8_t b0 = unpack_to_s16_h(vb0); const int16x8_t a1 = unpack_to_s16_l(va0); const int16x8_t b1 = unpack_to_s16_l(vb0); const int16x8_t a2 = unpack_to_s16_h(va1); const int16x8_t b2 = unpack_to_s16_h(vb1); const int16x8_t a3 = unpack_to_s16_l(va1); const int16x8_t b3 = unpack_to_s16_l(vb1); const int16x8_t d0 = vec_sub(a0, b0); const int16x8_t d1 = vec_sub(a1, b1); const int16x8_t d2 = vec_sub(a2, b2); const int16x8_t d3 = vec_sub(a3, b3); s = vec_sum4s(d0, s); ss = vec_msum(d0, d0, ss); s = vec_sum4s(d1, s); ss = vec_msum(d1, d1, ss); s = vec_sum4s(d2, s); ss = vec_msum(d2, d2, ss); s = vec_sum4s(d3, s); ss = vec_msum(d3, d3, ss); *sum = s; *sum_squared = ss; } static INLINE void variance(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { int i; int32x4_t s = vec_splat_s32(0); int32x4_t ss = vec_splat_s32(0); switch (w) { case 4: for (i = 0; i < h / 2; ++i) { const int16x8_t a0 = unpack_to_s16_h(read4x2(src_ptr, src_stride)); const int16x8_t b0 = unpack_to_s16_h(read4x2(ref_ptr, ref_stride)); const int16x8_t d = vec_sub(a0, b0); s = vec_sum4s(d, s); ss = vec_msum(d, d, ss); src_ptr += src_stride * 2; ref_ptr += ref_stride * 2; } break; case 8: for (i = 0; i < h; ++i) { const int16x8_t a0 = unpack_to_s16_h(vec_vsx_ld(0, src_ptr)); const int16x8_t b0 = unpack_to_s16_h(vec_vsx_ld(0, ref_ptr)); const int16x8_t d = vec_sub(a0, b0); s = vec_sum4s(d, s); ss = vec_msum(d, d, ss); src_ptr += src_stride; ref_ptr += ref_stride; } break; case 16: for (i = 0; i < h; ++i) { const uint8x16_t va = vec_vsx_ld(0, src_ptr); const uint8x16_t vb = vec_vsx_ld(0, ref_ptr); const int16x8_t a0 = unpack_to_s16_h(va); const int16x8_t b0 = unpack_to_s16_h(vb); const int16x8_t a1 = unpack_to_s16_l(va); const int16x8_t b1 = unpack_to_s16_l(vb); const int16x8_t d0 = vec_sub(a0, b0); const int16x8_t d1 = vec_sub(a1, b1); s = vec_sum4s(d0, s); ss = vec_msum(d0, d0, ss); s = vec_sum4s(d1, s); ss = vec_msum(d1, d1, ss); src_ptr += src_stride; ref_ptr += ref_stride; } break; case 32: for (i = 0; i < h; ++i) { variance_inner_32(src_ptr, ref_ptr, &ss, &s); src_ptr += src_stride; ref_ptr += ref_stride; } break; case 64: for (i = 0; i < h; ++i) { variance_inner_32(src_ptr, ref_ptr, &ss, &s); variance_inner_32(src_ptr + 32, ref_ptr + 32, &ss, &s); src_ptr += src_stride; ref_ptr += ref_stride; } break; } s = vec_splat(vec_sums(s, vec_splat_s32(0)), 3); vec_ste(s, 0, sum); ss = vec_splat(vec_sums(ss, vec_splat_s32(0)), 3); vec_ste((uint32x4_t)ss, 0, sse); } /* Identical to the variance call except it takes an additional parameter, sum, * and returns that value using pass-by-reference instead of returning * sse - sum^2 / w*h */ #define GET_VAR(W, H) \ void vpx_get##W##x##H##var_vsx(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse, int *sum) { \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ } /* Identical to the variance call except it does not calculate the * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in * variable. */ #define MSE(W, H) \ uint32_t vpx_mse##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ int sum; \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ return *sse; \ } #define VAR(W, H) \ uint32_t vpx_variance##W##x##H##_vsx(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ int sum; \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ return *sse - (uint32_t)(((int64_t)sum * sum) / ((W) * (H))); \ } #define VARIANCES(W, H) VAR(W, H) VARIANCES(64, 64) VARIANCES(64, 32) VARIANCES(32, 64) VARIANCES(32, 32) VARIANCES(32, 16) VARIANCES(16, 32) VARIANCES(16, 16) VARIANCES(16, 8) VARIANCES(8, 16) VARIANCES(8, 8) VARIANCES(8, 4) VARIANCES(4, 8) VARIANCES(4, 4) GET_VAR(16, 16) GET_VAR(8, 8) MSE(16, 16) MSE(16, 8) MSE(8, 16) MSE(8, 8) libvpx-1.8.2/vpx_dsp/ppc/vpx_convolve_vsx.c000066400000000000000000000345501357355204000211020ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/ppc/types_vsx.h" #include "vpx_dsp/vpx_filter.h" // TODO(lu_zero): unroll static VPX_FORCE_INLINE void copy_w16(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { vec_vsx_st(vec_vsx_ld(0, src), 0, dst); src += src_stride; dst += dst_stride; } } static VPX_FORCE_INLINE void copy_w32(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { vec_vsx_st(vec_vsx_ld(0, src), 0, dst); vec_vsx_st(vec_vsx_ld(16, src), 16, dst); src += src_stride; dst += dst_stride; } } static VPX_FORCE_INLINE void copy_w64(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { vec_vsx_st(vec_vsx_ld(0, src), 0, dst); vec_vsx_st(vec_vsx_ld(16, src), 16, dst); vec_vsx_st(vec_vsx_ld(32, src), 32, dst); vec_vsx_st(vec_vsx_ld(48, src), 48, dst); src += src_stride; dst += dst_stride; } } void vpx_convolve_copy_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; switch (w) { case 16: { copy_w16(src, src_stride, dst, dst_stride, h); break; } case 32: { copy_w32(src, src_stride, dst, dst_stride, h); break; } case 64: { copy_w64(src, src_stride, dst, dst_stride, h); break; } default: { int i; for (i = h; i--;) { memcpy(dst, src, w); src += src_stride; dst += dst_stride; } break; } } } static VPX_FORCE_INLINE void avg_w16(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { const uint8x16_t v = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); vec_vsx_st(v, 0, dst); src += src_stride; dst += dst_stride; } } static VPX_FORCE_INLINE void avg_w32(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { const uint8x16_t v0 = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); const uint8x16_t v1 = vec_avg(vec_vsx_ld(16, src), vec_vsx_ld(16, dst)); vec_vsx_st(v0, 0, dst); vec_vsx_st(v1, 16, dst); src += src_stride; dst += dst_stride; } } static VPX_FORCE_INLINE void avg_w64(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, int32_t h) { int i; for (i = h; i--;) { const uint8x16_t v0 = vec_avg(vec_vsx_ld(0, src), vec_vsx_ld(0, dst)); const uint8x16_t v1 = vec_avg(vec_vsx_ld(16, src), vec_vsx_ld(16, dst)); const uint8x16_t v2 = vec_avg(vec_vsx_ld(32, src), vec_vsx_ld(32, dst)); const uint8x16_t v3 = vec_avg(vec_vsx_ld(48, src), vec_vsx_ld(48, dst)); vec_vsx_st(v0, 0, dst); vec_vsx_st(v1, 16, dst); vec_vsx_st(v2, 32, dst); vec_vsx_st(v3, 48, dst); src += src_stride; dst += dst_stride; } } void vpx_convolve_avg_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int32_t y_step_q4, int32_t w, int32_t h) { switch (w) { case 16: { avg_w16(src, src_stride, dst, dst_stride, h); break; } case 32: { avg_w32(src, src_stride, dst, dst_stride, h); break; } case 64: { avg_w64(src, src_stride, dst, dst_stride, h); break; } default: { vpx_convolve_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); break; } } } static VPX_FORCE_INLINE void convolve_line(uint8_t *dst, const int16x8_t s, const int16x8_t f) { const int32x4_t sum = vec_msum(s, f, vec_splat_s32(0)); const int32x4_t bias = vec_sl(vec_splat_s32(1), vec_splat_u32(FILTER_BITS - 1)); const int32x4_t avg = vec_sr(vec_sums(sum, bias), vec_splat_u32(FILTER_BITS)); const uint8x16_t v = vec_splat( vec_packsu(vec_pack(avg, vec_splat_s32(0)), vec_splat_s16(0)), 3); vec_ste(v, 0, dst); } static VPX_FORCE_INLINE void convolve_line_h(uint8_t *dst, const uint8_t *const src_x, const int16_t *const x_filter) { const int16x8_t s = unpack_to_s16_h(vec_vsx_ld(0, src_x)); const int16x8_t f = vec_vsx_ld(0, x_filter); convolve_line(dst, s, f); } // TODO(lu_zero): Implement 8x8 and bigger block special cases static VPX_FORCE_INLINE void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { convolve_line_h(dst + x, &src[x_q4 >> SUBPEL_BITS], x_filters[x_q4 & SUBPEL_MASK]); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static VPX_FORCE_INLINE void convolve_avg_horiz( const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { uint8_t v; convolve_line_h(&v, &src[x_q4 >> SUBPEL_BITS], x_filters[x_q4 & SUBPEL_MASK]); dst[x] = ROUND_POWER_OF_TWO(dst[x] + v, 1); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static uint8x16_t transpose_line_u8_8x8(uint8x16_t a, uint8x16_t b, uint8x16_t c, uint8x16_t d, uint8x16_t e, uint8x16_t f, uint8x16_t g, uint8x16_t h) { uint16x8_t ab = (uint16x8_t)vec_mergeh(a, b); uint16x8_t cd = (uint16x8_t)vec_mergeh(c, d); uint16x8_t ef = (uint16x8_t)vec_mergeh(e, f); uint16x8_t gh = (uint16x8_t)vec_mergeh(g, h); uint32x4_t abcd = (uint32x4_t)vec_mergeh(ab, cd); uint32x4_t efgh = (uint32x4_t)vec_mergeh(ef, gh); return (uint8x16_t)vec_mergeh(abcd, efgh); } static VPX_FORCE_INLINE void convolve_line_v(uint8_t *dst, const uint8_t *const src_y, ptrdiff_t src_stride, const int16_t *const y_filter) { uint8x16_t s0 = vec_vsx_ld(0, src_y + 0 * src_stride); uint8x16_t s1 = vec_vsx_ld(0, src_y + 1 * src_stride); uint8x16_t s2 = vec_vsx_ld(0, src_y + 2 * src_stride); uint8x16_t s3 = vec_vsx_ld(0, src_y + 3 * src_stride); uint8x16_t s4 = vec_vsx_ld(0, src_y + 4 * src_stride); uint8x16_t s5 = vec_vsx_ld(0, src_y + 5 * src_stride); uint8x16_t s6 = vec_vsx_ld(0, src_y + 6 * src_stride); uint8x16_t s7 = vec_vsx_ld(0, src_y + 7 * src_stride); const int16x8_t f = vec_vsx_ld(0, y_filter); uint8_t buf[16]; const uint8x16_t s = transpose_line_u8_8x8(s0, s1, s2, s3, s4, s5, s6, s7); vec_vsx_st(s, 0, buf); convolve_line(dst, unpack_to_s16_h(s), f); } static VPX_FORCE_INLINE void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { convolve_line_v(dst + y * dst_stride, &src[(y_q4 >> SUBPEL_BITS) * src_stride], src_stride, y_filters[y_q4 & SUBPEL_MASK]); y_q4 += y_step_q4; } ++src; ++dst; } } static VPX_FORCE_INLINE void convolve_avg_vert( const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { uint8_t v; convolve_line_v(&v, &src[(y_q4 >> SUBPEL_BITS) * src_stride], src_stride, y_filters[y_q4 & SUBPEL_MASK]); dst[y * dst_stride] = ROUND_POWER_OF_TWO(dst[y * dst_stride] + v, 1); y_q4 += y_step_q4; } ++src; ++dst; } } static VPX_FORCE_INLINE void convolve(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *const filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. DECLARE_ALIGNED(16, uint8_t, temp[64 * 135]); const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)y0_q4; (void)y_step_q4; convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_avg_horiz_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)y0_q4; (void)y_step_q4; convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_vert_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_vert_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_vsx(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Fixed size intermediate buffer places limits on parameters. DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); vpx_convolve8_vsx(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); vpx_convolve_avg_vsx(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); } libvpx-1.8.2/vpx_dsp/prob.c000066400000000000000000000043571357355204000156340ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./prob.h" const uint8_t vpx_norm[256] = { 0, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; static unsigned int tree_merge_probs_impl(unsigned int i, const vpx_tree_index *tree, const vpx_prob *pre_probs, const unsigned int *counts, vpx_prob *probs) { const int l = tree[i]; const unsigned int left_count = (l <= 0) ? counts[-l] : tree_merge_probs_impl(l, tree, pre_probs, counts, probs); const int r = tree[i + 1]; const unsigned int right_count = (r <= 0) ? counts[-r] : tree_merge_probs_impl(r, tree, pre_probs, counts, probs); const unsigned int ct[2] = { left_count, right_count }; probs[i >> 1] = mode_mv_merge_probs(pre_probs[i >> 1], ct); return left_count + right_count; } void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, const unsigned int *counts, vpx_prob *probs) { tree_merge_probs_impl(0, tree, pre_probs, counts, probs); } libvpx-1.8.2/vpx_dsp/prob.h000066400000000000000000000065001357355204000156310ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PROB_H_ #define VPX_VPX_DSP_PROB_H_ #include #include "./vpx_config.h" #include "./vpx_dsp_common.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif typedef uint8_t vpx_prob; #define MAX_PROB 255 #define vpx_prob_half ((vpx_prob)128) typedef int8_t vpx_tree_index; #define TREE_SIZE(leaf_count) (2 * (leaf_count)-2) #define vpx_complement(x) (255 - (x)) #define MODE_MV_COUNT_SAT 20 /* We build coding trees compactly in arrays. Each node of the tree is a pair of vpx_tree_indices. Array index often references a corresponding probability table. Index <= 0 means done encoding/decoding and value = -Index, Index > 0 means need another bit, specification at index. Nonnegative indices are always even; processing begins at node 0. */ typedef const vpx_tree_index vpx_tree[]; static INLINE vpx_prob get_prob(unsigned int num, unsigned int den) { assert(den != 0); { const int p = (int)(((uint64_t)num * 256 + (den >> 1)) / den); // (p > 255) ? 255 : (p < 1) ? 1 : p; const int clipped_prob = p | ((255 - p) >> 23) | (p == 0); return (vpx_prob)clipped_prob; } } static INLINE vpx_prob get_binary_prob(unsigned int n0, unsigned int n1) { const unsigned int den = n0 + n1; if (den == 0) return 128u; return get_prob(n0, den); } /* This function assumes prob1 and prob2 are already within [1,255] range. */ static INLINE vpx_prob weighted_prob(int prob1, int prob2, int factor) { return ROUND_POWER_OF_TWO(prob1 * (256 - factor) + prob2 * factor, 8); } static INLINE vpx_prob merge_probs(vpx_prob pre_prob, const unsigned int ct[2], unsigned int count_sat, unsigned int max_update_factor) { const vpx_prob prob = get_binary_prob(ct[0], ct[1]); const unsigned int count = VPXMIN(ct[0] + ct[1], count_sat); const unsigned int factor = max_update_factor * count / count_sat; return weighted_prob(pre_prob, prob, factor); } // MODE_MV_MAX_UPDATE_FACTOR (128) * count / MODE_MV_COUNT_SAT; static const int count_to_update_factor[MODE_MV_COUNT_SAT + 1] = { 0, 6, 12, 19, 25, 32, 38, 44, 51, 57, 64, 70, 76, 83, 89, 96, 102, 108, 115, 121, 128 }; static INLINE vpx_prob mode_mv_merge_probs(vpx_prob pre_prob, const unsigned int ct[2]) { const unsigned int den = ct[0] + ct[1]; if (den == 0) { return pre_prob; } else { const unsigned int count = VPXMIN(den, MODE_MV_COUNT_SAT); const unsigned int factor = count_to_update_factor[count]; const vpx_prob prob = get_prob(ct[0], den); return weighted_prob(pre_prob, prob, factor); } } void vpx_tree_merge_probs(const vpx_tree_index *tree, const vpx_prob *pre_probs, const unsigned int *counts, vpx_prob *probs); DECLARE_ALIGNED(16, extern const uint8_t, vpx_norm[256]); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_PROB_H_ libvpx-1.8.2/vpx_dsp/psnr.c000066400000000000000000000215301357355204000156440ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/psnr.h" #include "vpx_scale/yv12config.h" double vpx_sse_to_psnr(double samples, double peak, double sse) { if (sse > 0.0) { const double psnr = 10.0 * log10(samples * peak * peak / sse); return psnr > MAX_PSNR ? MAX_PSNR : psnr; } else { return MAX_PSNR; } } /* TODO(yaowu): The block_variance calls the unoptimized versions of variance() * and highbd_8_variance(). It should not. */ static void encoder_variance(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int w, int h, unsigned int *sse, int *sum) { int i, j; *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } a += a_stride; b += b_stride; } } #if CONFIG_VP9_HIGHBITDEPTH static void encoder_highbd_variance64(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, uint64_t *sse, int64_t *sum) { int i, j; uint16_t *a = CONVERT_TO_SHORTPTR(a8); uint16_t *b = CONVERT_TO_SHORTPTR(b8); *sum = 0; *sse = 0; for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { const int diff = a[j] - b[j]; *sum += diff; *sse += diff * diff; } a += a_stride; b += b_stride; } } static void encoder_highbd_8_variance(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int w, int h, unsigned int *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; encoder_highbd_variance64(a8, a_stride, b8, b_stride, w, h, &sse_long, &sum_long); *sse = (unsigned int)sse_long; *sum = (int)sum_long; } #endif // CONFIG_VP9_HIGHBITDEPTH static int64_t get_sse(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height) { const int dw = width % 16; const int dh = height % 16; int64_t total_sse = 0; unsigned int sse = 0; int sum = 0; int x, y; if (dw > 0) { encoder_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, dw, height, &sse, &sum); total_sse += sse; } if (dh > 0) { encoder_variance(&a[(height - dh) * a_stride], a_stride, &b[(height - dh) * b_stride], b_stride, width - dw, dh, &sse, &sum); total_sse += sse; } for (y = 0; y < height / 16; ++y) { const uint8_t *pa = a; const uint8_t *pb = b; for (x = 0; x < width / 16; ++x) { vpx_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; pa += 16; pb += 16; } a += 16 * a_stride; b += 16 * b_stride; } return total_sse; } #if CONFIG_VP9_HIGHBITDEPTH static int64_t highbd_get_sse_shift(const uint8_t *a8, int a_stride, const uint8_t *b8, int b_stride, int width, int height, unsigned int input_shift) { const uint16_t *a = CONVERT_TO_SHORTPTR(a8); const uint16_t *b = CONVERT_TO_SHORTPTR(b8); int64_t total_sse = 0; int x, y; for (y = 0; y < height; ++y) { for (x = 0; x < width; ++x) { int64_t diff; diff = (a[x] >> input_shift) - (b[x] >> input_shift); total_sse += diff * diff; } a += a_stride; b += b_stride; } return total_sse; } static int64_t highbd_get_sse(const uint8_t *a, int a_stride, const uint8_t *b, int b_stride, int width, int height) { int64_t total_sse = 0; int x, y; const int dw = width % 16; const int dh = height % 16; unsigned int sse = 0; int sum = 0; if (dw > 0) { encoder_highbd_8_variance(&a[width - dw], a_stride, &b[width - dw], b_stride, dw, height, &sse, &sum); total_sse += sse; } if (dh > 0) { encoder_highbd_8_variance(&a[(height - dh) * a_stride], a_stride, &b[(height - dh) * b_stride], b_stride, width - dw, dh, &sse, &sum); total_sse += sse; } for (y = 0; y < height / 16; ++y) { const uint8_t *pa = a; const uint8_t *pb = b; for (x = 0; x < width / 16; ++x) { vpx_highbd_8_mse16x16(pa, a_stride, pb, b_stride, &sse); total_sse += sse; pa += 16; pb += 16; } a += 16 * a_stride; b += 16 * b_stride; } return total_sse; } #endif // CONFIG_VP9_HIGHBITDEPTH int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { assert(a->y_crop_width == b->y_crop_width); assert(a->y_crop_height == b->y_crop_height); return get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, a->y_crop_width, a->y_crop_height); } #if CONFIG_VP9_HIGHBITDEPTH int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b) { assert(a->y_crop_width == b->y_crop_width); assert(a->y_crop_height == b->y_crop_height); assert((a->flags & YV12_FLAG_HIGHBITDEPTH) != 0); assert((b->flags & YV12_FLAG_HIGHBITDEPTH) != 0); return highbd_get_sse(a->y_buffer, a->y_stride, b->y_buffer, b->y_stride, a->y_crop_width, a->y_crop_height); } #endif // CONFIG_VP9_HIGHBITDEPTH #if CONFIG_VP9_HIGHBITDEPTH void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr, uint32_t bit_depth, uint32_t in_bit_depth) { const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width }; const int heights[3] = { a->y_crop_height, a->uv_crop_height, a->uv_crop_height }; const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer }; const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride }; const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer }; const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride }; int i; uint64_t total_sse = 0; uint32_t total_samples = 0; const double peak = (double)((1 << in_bit_depth) - 1); const unsigned int input_shift = bit_depth - in_bit_depth; for (i = 0; i < 3; ++i) { const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; uint64_t sse; if (a->flags & YV12_FLAG_HIGHBITDEPTH) { if (input_shift) { sse = highbd_get_sse_shift(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h, input_shift); } else { sse = highbd_get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h); } } else { sse = get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h); } psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); total_sse += sse; total_samples += samples; } psnr->sse[0] = total_sse; psnr->samples[0] = total_samples; psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, (double)total_sse); } #endif // !CONFIG_VP9_HIGHBITDEPTH void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr) { static const double peak = 255.0; const int widths[3] = { a->y_crop_width, a->uv_crop_width, a->uv_crop_width }; const int heights[3] = { a->y_crop_height, a->uv_crop_height, a->uv_crop_height }; const uint8_t *a_planes[3] = { a->y_buffer, a->u_buffer, a->v_buffer }; const int a_strides[3] = { a->y_stride, a->uv_stride, a->uv_stride }; const uint8_t *b_planes[3] = { b->y_buffer, b->u_buffer, b->v_buffer }; const int b_strides[3] = { b->y_stride, b->uv_stride, b->uv_stride }; int i; uint64_t total_sse = 0; uint32_t total_samples = 0; for (i = 0; i < 3; ++i) { const int w = widths[i]; const int h = heights[i]; const uint32_t samples = w * h; const uint64_t sse = get_sse(a_planes[i], a_strides[i], b_planes[i], b_strides[i], w, h); psnr->sse[1 + i] = sse; psnr->samples[1 + i] = samples; psnr->psnr[1 + i] = vpx_sse_to_psnr(samples, peak, (double)sse); total_sse += sse; total_samples += samples; } psnr->sse[0] = total_sse; psnr->samples[0] = total_samples; psnr->psnr[0] = vpx_sse_to_psnr((double)total_samples, peak, (double)total_sse); } libvpx-1.8.2/vpx_dsp/psnr.h000066400000000000000000000035221357355204000156520ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_PSNR_H_ #define VPX_VPX_DSP_PSNR_H_ #include "vpx_scale/yv12config.h" #include "vpx/vpx_encoder.h" #define MAX_PSNR 100.0 #ifdef __cplusplus extern "C" { #endif typedef struct vpx_psnr_pkt PSNR_STATS; // TODO(dkovalev) change vpx_sse_to_psnr signature: double -> int64_t /*!\brief Converts SSE to PSNR * * Converts sum of squared errros (SSE) to peak signal-to-noise ratio (PNSR). * * \param[in] samples Number of samples * \param[in] peak Max sample value * \param[in] sse Sum of squared errors */ double vpx_sse_to_psnr(double samples, double peak, double sse); int64_t vpx_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); #if CONFIG_VP9_HIGHBITDEPTH int64_t vpx_highbd_get_y_sse(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b); void vpx_calc_highbd_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr, unsigned int bit_depth, unsigned int in_bit_depth); #endif void vpx_calc_psnr(const YV12_BUFFER_CONFIG *a, const YV12_BUFFER_CONFIG *b, PSNR_STATS *psnr); double vpx_psnrhvs(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *phvs_y, double *phvs_u, double *phvs_v, uint32_t bd, uint32_t in_bd); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_PSNR_H_ libvpx-1.8.2/vpx_dsp/psnrhvs.c000066400000000000000000000276611357355204000164000ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * This code was originally written by: Gregory Maxwell, at the Daala * project. */ #include #include #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/system_state.h" #include "vpx_dsp/psnr.h" #if !defined(M_PI) #define M_PI (3.141592653589793238462643) #endif #include static void od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) { int i, j; (void)xstride; vpx_fdct8x8(x, y, ystride); for (i = 0; i < 8; i++) for (j = 0; j < 8; j++) *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3; } #if CONFIG_VP9_HIGHBITDEPTH static void hbd_od_bin_fdct8x8(tran_low_t *y, int ystride, const int16_t *x, int xstride) { int i, j; (void)xstride; vpx_highbd_fdct8x8(x, y, ystride); for (i = 0; i < 8; i++) for (j = 0; j < 8; j++) *(y + ystride * i + j) = (*(y + ystride * i + j) + 4) >> 3; } #endif /* Normalized inverse quantization matrix for 8x8 DCT at the point of * transparency. This is not the JPEG based matrix from the paper, this one gives a slightly higher MOS agreement.*/ static const double csf_y[8][8] = { { 1.6193873005, 2.2901594831, 2.08509755623, 1.48366094411, 1.00227514334, 0.678296995242, 0.466224900598, 0.3265091542 }, { 2.2901594831, 1.94321815382, 2.04793073064, 1.68731108984, 1.2305666963, 0.868920337363, 0.61280991668, 0.436405793551 }, { 2.08509755623, 2.04793073064, 1.34329019223, 1.09205635862, 0.875748795257, 0.670882927016, 0.501731932449, 0.372504254596 }, { 1.48366094411, 1.68731108984, 1.09205635862, 0.772819797575, 0.605636379554, 0.48309405692, 0.380429446972, 0.295774038565 }, { 1.00227514334, 1.2305666963, 0.875748795257, 0.605636379554, 0.448996256676, 0.352889268808, 0.283006984131, 0.226951348204 }, { 0.678296995242, 0.868920337363, 0.670882927016, 0.48309405692, 0.352889268808, 0.27032073436, 0.215017739696, 0.17408067321 }, { 0.466224900598, 0.61280991668, 0.501731932449, 0.380429446972, 0.283006984131, 0.215017739696, 0.168869545842, 0.136153931001 }, { 0.3265091542, 0.436405793551, 0.372504254596, 0.295774038565, 0.226951348204, 0.17408067321, 0.136153931001, 0.109083846276 } }; static const double csf_cb420[8][8] = { { 1.91113096927, 2.46074210438, 1.18284184739, 1.14982565193, 1.05017074788, 0.898018824055, 0.74725392039, 0.615105596242 }, { 2.46074210438, 1.58529308355, 1.21363250036, 1.38190029285, 1.33100189972, 1.17428548929, 0.996404342439, 0.830890433625 }, { 1.18284184739, 1.21363250036, 0.978712413627, 1.02624506078, 1.03145147362, 0.960060382087, 0.849823426169, 0.731221236837 }, { 1.14982565193, 1.38190029285, 1.02624506078, 0.861317501629, 0.801821139099, 0.751437590932, 0.685398513368, 0.608694761374 }, { 1.05017074788, 1.33100189972, 1.03145147362, 0.801821139099, 0.676555426187, 0.605503172737, 0.55002013668, 0.495804539034 }, { 0.898018824055, 1.17428548929, 0.960060382087, 0.751437590932, 0.605503172737, 0.514674450957, 0.454353482512, 0.407050308965 }, { 0.74725392039, 0.996404342439, 0.849823426169, 0.685398513368, 0.55002013668, 0.454353482512, 0.389234902883, 0.342353999733 }, { 0.615105596242, 0.830890433625, 0.731221236837, 0.608694761374, 0.495804539034, 0.407050308965, 0.342353999733, 0.295530605237 } }; static const double csf_cr420[8][8] = { { 2.03871978502, 2.62502345193, 1.26180942886, 1.11019789803, 1.01397751469, 0.867069376285, 0.721500455585, 0.593906509971 }, { 2.62502345193, 1.69112867013, 1.17180569821, 1.3342742857, 1.28513006198, 1.13381474809, 0.962064122248, 0.802254508198 }, { 1.26180942886, 1.17180569821, 0.944981930573, 0.990876405848, 0.995903384143, 0.926972725286, 0.820534991409, 0.706020324706 }, { 1.11019789803, 1.3342742857, 0.990876405848, 0.831632933426, 0.77418706195, 0.725539939514, 0.661776842059, 0.587716619023 }, { 1.01397751469, 1.28513006198, 0.995903384143, 0.77418706195, 0.653238524286, 0.584635025748, 0.531064164893, 0.478717061273 }, { 0.867069376285, 1.13381474809, 0.926972725286, 0.725539939514, 0.584635025748, 0.496936637883, 0.438694579826, 0.393021669543 }, { 0.721500455585, 0.962064122248, 0.820534991409, 0.661776842059, 0.531064164893, 0.438694579826, 0.375820256136, 0.330555063063 }, { 0.593906509971, 0.802254508198, 0.706020324706, 0.587716619023, 0.478717061273, 0.393021669543, 0.330555063063, 0.285345396658 } }; static double convert_score_db(double _score, double _weight, int bit_depth) { int16_t pix_max = 255; assert(_score * _weight >= 0.0); if (bit_depth == 10) pix_max = 1023; else if (bit_depth == 12) pix_max = 4095; if (_weight * _score < pix_max * pix_max * 1e-10) return MAX_PSNR; return 10 * (log10(pix_max * pix_max) - log10(_weight * _score)); } static double calc_psnrhvs(const unsigned char *src, int _systride, const unsigned char *dst, int _dystride, double _par, int _w, int _h, int _step, const double _csf[8][8], uint32_t bit_depth, uint32_t _shift) { double ret; const uint8_t *_src8 = src; const uint8_t *_dst8 = dst; const uint16_t *_src16 = CONVERT_TO_SHORTPTR(src); const uint16_t *_dst16 = CONVERT_TO_SHORTPTR(dst); DECLARE_ALIGNED(16, int16_t, dct_s[8 * 8]); DECLARE_ALIGNED(16, int16_t, dct_d[8 * 8]); DECLARE_ALIGNED(16, tran_low_t, dct_s_coef[8 * 8]); DECLARE_ALIGNED(16, tran_low_t, dct_d_coef[8 * 8]); double mask[8][8]; int pixels; int x; int y; (void)_par; ret = pixels = 0; /*In the PSNR-HVS-M paper[1] the authors describe the construction of their masking table as "we have used the quantization table for the color component Y of JPEG [6] that has been also obtained on the basis of CSF. Note that the values in quantization table JPEG have been normalized and then squared." Their CSF matrix (from PSNR-HVS) was also constructed from the JPEG matrices. I can not find any obvious scheme of normalizing to produce their table, but if I multiply their CSF by 0.3885746225901003 and square the result I get their masking table. I have no idea where this constant comes from, but deviating from it too greatly hurts MOS agreement. [1] Nikolay Ponomarenko, Flavia Silvestri, Karen Egiazarian, Marco Carli, Jaakko Astola, Vladimir Lukin, "On between-coefficient contrast masking of DCT basis functions", CD-ROM Proceedings of the Third International Workshop on Video Processing and Quality Metrics for Consumer Electronics VPQM-07, Scottsdale, Arizona, USA, 25-26 January, 2007, 4 p. Suggested in aomedia issue #2363: 0.3885746225901003 is a reciprocal of the maximum coefficient (2.573509) of the old JPEG based matrix from the paper. Since you are not using that, divide by actual maximum coefficient. */ for (x = 0; x < 8; x++) for (y = 0; y < 8; y++) mask[x][y] = (_csf[x][y] / _csf[1][0]) * (_csf[x][y] / _csf[1][0]); for (y = 0; y < _h - 7; y += _step) { for (x = 0; x < _w - 7; x += _step) { int i; int j; double s_means[4]; double d_means[4]; double s_vars[4]; double d_vars[4]; double s_gmean = 0; double d_gmean = 0; double s_gvar = 0; double d_gvar = 0; double s_mask = 0; double d_mask = 0; for (i = 0; i < 4; i++) s_means[i] = d_means[i] = s_vars[i] = d_vars[i] = 0; for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { int sub = ((i & 12) >> 2) + ((j & 12) >> 1); if (bit_depth == 8 && _shift == 0) { dct_s[i * 8 + j] = _src8[(y + i) * _systride + (j + x)]; dct_d[i * 8 + j] = _dst8[(y + i) * _dystride + (j + x)]; } else if (bit_depth == 10 || bit_depth == 12) { dct_s[i * 8 + j] = _src16[(y + i) * _systride + (j + x)] >> _shift; dct_d[i * 8 + j] = _dst16[(y + i) * _dystride + (j + x)] >> _shift; } s_gmean += dct_s[i * 8 + j]; d_gmean += dct_d[i * 8 + j]; s_means[sub] += dct_s[i * 8 + j]; d_means[sub] += dct_d[i * 8 + j]; } } s_gmean /= 64.f; d_gmean /= 64.f; for (i = 0; i < 4; i++) s_means[i] /= 16.f; for (i = 0; i < 4; i++) d_means[i] /= 16.f; for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { int sub = ((i & 12) >> 2) + ((j & 12) >> 1); s_gvar += (dct_s[i * 8 + j] - s_gmean) * (dct_s[i * 8 + j] - s_gmean); d_gvar += (dct_d[i * 8 + j] - d_gmean) * (dct_d[i * 8 + j] - d_gmean); s_vars[sub] += (dct_s[i * 8 + j] - s_means[sub]) * (dct_s[i * 8 + j] - s_means[sub]); d_vars[sub] += (dct_d[i * 8 + j] - d_means[sub]) * (dct_d[i * 8 + j] - d_means[sub]); } } s_gvar *= 1 / 63.f * 64; d_gvar *= 1 / 63.f * 64; for (i = 0; i < 4; i++) s_vars[i] *= 1 / 15.f * 16; for (i = 0; i < 4; i++) d_vars[i] *= 1 / 15.f * 16; if (s_gvar > 0) s_gvar = (s_vars[0] + s_vars[1] + s_vars[2] + s_vars[3]) / s_gvar; if (d_gvar > 0) d_gvar = (d_vars[0] + d_vars[1] + d_vars[2] + d_vars[3]) / d_gvar; #if CONFIG_VP9_HIGHBITDEPTH if (bit_depth == 10 || bit_depth == 12) { hbd_od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); hbd_od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); } #endif if (bit_depth == 8) { od_bin_fdct8x8(dct_s_coef, 8, dct_s, 8); od_bin_fdct8x8(dct_d_coef, 8, dct_d, 8); } for (i = 0; i < 8; i++) for (j = (i == 0); j < 8; j++) s_mask += dct_s_coef[i * 8 + j] * dct_s_coef[i * 8 + j] * mask[i][j]; for (i = 0; i < 8; i++) for (j = (i == 0); j < 8; j++) d_mask += dct_d_coef[i * 8 + j] * dct_d_coef[i * 8 + j] * mask[i][j]; s_mask = sqrt(s_mask * s_gvar) / 32.f; d_mask = sqrt(d_mask * d_gvar) / 32.f; if (d_mask > s_mask) s_mask = d_mask; for (i = 0; i < 8; i++) { for (j = 0; j < 8; j++) { double err; err = fabs((double)(dct_s_coef[i * 8 + j] - dct_d_coef[i * 8 + j])); if (i != 0 || j != 0) err = err < s_mask / mask[i][j] ? 0 : err - s_mask / mask[i][j]; ret += (err * _csf[i][j]) * (err * _csf[i][j]); pixels++; } } } } if (pixels <= 0) return 0; ret /= pixels; return ret; } double vpx_psnrhvs(const YV12_BUFFER_CONFIG *src, const YV12_BUFFER_CONFIG *dest, double *y_psnrhvs, double *u_psnrhvs, double *v_psnrhvs, uint32_t bd, uint32_t in_bd) { double psnrhvs; const double par = 1.0; const int step = 7; uint32_t bd_shift = 0; vpx_clear_system_state(); assert(bd == 8 || bd == 10 || bd == 12); assert(bd >= in_bd); bd_shift = bd - in_bd; *y_psnrhvs = calc_psnrhvs(src->y_buffer, src->y_stride, dest->y_buffer, dest->y_stride, par, src->y_crop_width, src->y_crop_height, step, csf_y, bd, bd_shift); *u_psnrhvs = calc_psnrhvs(src->u_buffer, src->uv_stride, dest->u_buffer, dest->uv_stride, par, src->uv_crop_width, src->uv_crop_height, step, csf_cb420, bd, bd_shift); *v_psnrhvs = calc_psnrhvs(src->v_buffer, src->uv_stride, dest->v_buffer, dest->uv_stride, par, src->uv_crop_width, src->uv_crop_height, step, csf_cr420, bd, bd_shift); psnrhvs = (*y_psnrhvs) * .8 + .1 * ((*u_psnrhvs) + (*v_psnrhvs)); return convert_score_db(psnrhvs, 1.0, in_bd); } libvpx-1.8.2/vpx_dsp/quantize.c000066400000000000000000000300011357355204000165130ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/quantize.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr) { const int rc = 0; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 16; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant; if (tmp) eob = 0; } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr) { int eob = -1; memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { const int coeff = coeff_ptr[0]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + round_ptr[0]; const int abs_qcoeff = (int)((tmp * quant) >> 16); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant; if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; } #endif void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr) { const int n_coeffs = 1024; const int rc = 0; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; int tmp, eob = -1; memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { tmp = clamp(abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1), INT16_MIN, INT16_MAX); tmp = (tmp * quant) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant / 2; if (tmp) eob = 0; } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr) { const int n_coeffs = 1024; int eob = -1; memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); if (!skip_block) { const int coeff = coeff_ptr[0]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[0], 1); const int abs_qcoeff = (int)((tmp * quant) >> 15); qcoeff_ptr[0] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[0] = qcoeff_ptr[0] * dequant / 2; if (abs_qcoeff) eob = 0; } *eob_ptr = eob + 1; } #endif void vpx_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = (int)n_coeffs - 1; i >= 0; i--) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--; else break; } // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. for (i = 0; i < non_zero_count; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= zbins[rc != 0]) { int tmp = clamp(abs_coeff + round_ptr[rc != 0], INT16_MIN, INT16_MAX); tmp = ((((tmp * quant_ptr[rc != 0]) >> 16) + tmp) * quant_shift_ptr[rc != 0]) >> 16; // quantization qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = (tran_low_t)(qcoeff_ptr[rc] * dequant_ptr[rc != 0]); if (tmp) eob = i; } } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_b_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, non_zero_count = (int)n_coeffs, eob = -1; const int zbins[2] = { zbin_ptr[0], zbin_ptr[1] }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = (int)n_coeffs - 1; i >= 0; i--) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; if (coeff < zbins[rc != 0] && coeff > nzbins[rc != 0]) non_zero_count--; else break; } // Quantization pass: All coefficients with index >= zero_flag are // skippable. Note: zero_flag can be zero. for (i = 0; i < non_zero_count; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; if (abs_coeff >= zbins[rc != 0]) { const int64_t tmp1 = abs_coeff + round_ptr[rc != 0]; const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 16); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0]; if (abs_qcoeff) eob = i; } } *eob_ptr = eob + 1; } #endif void vpx_quantize_b_32x32_c(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; int idx = 0; int idx_arr[1024]; int i, eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; // If the coefficient is out of the base ZBIN range, keep it for // quantization. if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) idx_arr[idx++] = i; } // Quantization pass: only process the coefficients selected in // pre-scan pass. Note: idx can be zero. for (i = 0; i < idx; i++) { const int rc = scan[idx_arr[i]]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); int tmp; int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; abs_coeff += ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); abs_coeff = clamp(abs_coeff, INT16_MIN, INT16_MAX); tmp = ((((abs_coeff * quant_ptr[rc != 0]) >> 16) + abs_coeff) * quant_shift_ptr[rc != 0]) >> 15; qcoeff_ptr[rc] = (tmp ^ coeff_sign) - coeff_sign; #if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && !CONFIG_VP9_HIGHBITDEPTH // When tran_low_t is only 16 bits dqcoeff can outrange it. Rather than // truncating with a cast, saturate the value. This is easier to implement // on x86 and preserves the sign of the value. dqcoeff_ptr[rc] = clamp(qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2, INT16_MIN, INT16_MAX); #else dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; #endif // VPX_ARCH_X86 && CONFIG_VP9_HIGHBITDEPTH if (tmp) eob = idx_arr[i]; } *eob_ptr = eob + 1; } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_b_32x32_c( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const int zbins[2] = { ROUND_POWER_OF_TWO(zbin_ptr[0], 1), ROUND_POWER_OF_TWO(zbin_ptr[1], 1) }; const int nzbins[2] = { zbins[0] * -1, zbins[1] * -1 }; int idx = 0; int idx_arr[1024]; int i, eob = -1; (void)iscan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = 0; i < n_coeffs; i++) { const int rc = scan[i]; const int coeff = coeff_ptr[rc]; // If the coefficient is out of the base ZBIN range, keep it for // quantization. if (coeff >= zbins[rc != 0] || coeff <= nzbins[rc != 0]) idx_arr[idx++] = i; } // Quantization pass: only process the coefficients selected in // pre-scan pass. Note: idx can be zero. for (i = 0; i < idx; i++) { const int rc = scan[idx_arr[i]]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; const int abs_qcoeff = (int)((tmp2 * quant_shift_ptr[rc != 0]) >> 15); qcoeff_ptr[rc] = (tran_low_t)((abs_qcoeff ^ coeff_sign) - coeff_sign); dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; if (abs_qcoeff) eob = idx_arr[i]; } *eob_ptr = eob + 1; } #endif libvpx-1.8.2/vpx_dsp/quantize.h000066400000000000000000000036521357355204000165340ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_QUANTIZE_H_ #define VPX_VPX_DSP_QUANTIZE_H_ #include "./vpx_config.h" #include "vpx_dsp/vpx_dsp_common.h" #ifdef __cplusplus extern "C" { #endif void vpx_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr); void vpx_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr); #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_dc(const tran_low_t *coeff_ptr, int n_coeffs, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr); void vpx_highbd_quantize_dc_32x32(const tran_low_t *coeff_ptr, int skip_block, const int16_t *round_ptr, const int16_t quant, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t dequant, uint16_t *eob_ptr); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_QUANTIZE_H_ libvpx-1.8.2/vpx_dsp/sad.c000066400000000000000000000171701357355204000154360ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" /* Sum the difference between every corresponding element of the buffers. */ static INLINE unsigned int sad(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int width, int height) { int y, x; unsigned int sad = 0; for (y = 0; y < height; y++) { for (x = 0; x < width; x++) sad += abs(src_ptr[x] - ref_ptr[x]); src_ptr += src_stride; ref_ptr += ref_stride; } return sad; } #define sadMxN(m, n) \ unsigned int vpx_sad##m##x##n##_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ return sad(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \ } \ unsigned int vpx_sad##m##x##n##_avg_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ DECLARE_ALIGNED(16, uint8_t, comp_pred[m * n]); \ vpx_comp_avg_pred_c(comp_pred, second_pred, m, n, ref_ptr, ref_stride); \ return sad(src_ptr, src_stride, comp_pred, m, m, n); \ } // depending on call sites, pass **ref_array to avoid & in subsequent call and // de-dup with 4D below. #define sadMxNxK(m, n, k) \ void vpx_sad##m##x##n##x##k##_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sad_array) { \ int i; \ for (i = 0; i < k; ++i) \ sad_array[i] = \ vpx_sad##m##x##n##_c(src_ptr, src_stride, &ref_ptr[i], ref_stride); \ } // This appears to be equivalent to the above when k == 4 and refs is const #define sadMxNx4D(m, n) \ void vpx_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ for (i = 0; i < 4; ++i) \ sad_array[i] = \ vpx_sad##m##x##n##_c(src_ptr, src_stride, ref_array[i], ref_stride); \ } /* clang-format off */ // 64x64 sadMxN(64, 64) sadMxNx4D(64, 64) // 64x32 sadMxN(64, 32) sadMxNx4D(64, 32) // 32x64 sadMxN(32, 64) sadMxNx4D(32, 64) // 32x32 sadMxN(32, 32) sadMxNxK(32, 32, 8) sadMxNx4D(32, 32) // 32x16 sadMxN(32, 16) sadMxNx4D(32, 16) // 16x32 sadMxN(16, 32) sadMxNx4D(16, 32) // 16x16 sadMxN(16, 16) sadMxNxK(16, 16, 3) sadMxNxK(16, 16, 8) sadMxNx4D(16, 16) // 16x8 sadMxN(16, 8) sadMxNxK(16, 8, 3) sadMxNxK(16, 8, 8) sadMxNx4D(16, 8) // 8x16 sadMxN(8, 16) sadMxNxK(8, 16, 3) sadMxNxK(8, 16, 8) sadMxNx4D(8, 16) // 8x8 sadMxN(8, 8) sadMxNxK(8, 8, 3) sadMxNxK(8, 8, 8) sadMxNx4D(8, 8) // 8x4 sadMxN(8, 4) sadMxNx4D(8, 4) // 4x8 sadMxN(4, 8) sadMxNx4D(4, 8) // 4x4 sadMxN(4, 4) sadMxNxK(4, 4, 3) sadMxNxK(4, 4, 8) sadMxNx4D(4, 4) /* clang-format on */ #if CONFIG_VP9_HIGHBITDEPTH static INLINE unsigned int highbd_sad(const uint8_t *src8_ptr, int src_stride, const uint8_t *ref8_ptr, int ref_stride, int width, int height) { int y, x; unsigned int sad = 0; const uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); const uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); for (y = 0; y < height; y++) { for (x = 0; x < width; x++) sad += abs(src[x] - ref_ptr[x]); src += src_stride; ref_ptr += ref_stride; } return sad; } static INLINE unsigned int highbd_sadb(const uint8_t *src8_ptr, int src_stride, const uint16_t *ref_ptr, int ref_stride, int width, int height) { int y, x; unsigned int sad = 0; const uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); for (y = 0; y < height; y++) { for (x = 0; x < width; x++) sad += abs(src[x] - ref_ptr[x]); src += src_stride; ref_ptr += ref_stride; } return sad; } #define highbd_sadMxN(m, n) \ unsigned int vpx_highbd_sad##m##x##n##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride) { \ return highbd_sad(src_ptr, src_stride, ref_ptr, ref_stride, m, n); \ } \ unsigned int vpx_highbd_sad##m##x##n##_avg_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ DECLARE_ALIGNED(16, uint16_t, comp_pred[m * n]); \ vpx_highbd_comp_avg_pred_c(comp_pred, CONVERT_TO_SHORTPTR(second_pred), m, \ n, CONVERT_TO_SHORTPTR(ref_ptr), ref_stride); \ return highbd_sadb(src_ptr, src_stride, comp_pred, m, m, n); \ } #define highbd_sadMxNx4D(m, n) \ void vpx_highbd_sad##m##x##n##x4d_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *const ref_array[], \ int ref_stride, uint32_t *sad_array) { \ int i; \ for (i = 0; i < 4; ++i) { \ sad_array[i] = vpx_highbd_sad##m##x##n##_c(src_ptr, src_stride, \ ref_array[i], ref_stride); \ } \ } /* clang-format off */ // 64x64 highbd_sadMxN(64, 64) highbd_sadMxNx4D(64, 64) // 64x32 highbd_sadMxN(64, 32) highbd_sadMxNx4D(64, 32) // 32x64 highbd_sadMxN(32, 64) highbd_sadMxNx4D(32, 64) // 32x32 highbd_sadMxN(32, 32) highbd_sadMxNx4D(32, 32) // 32x16 highbd_sadMxN(32, 16) highbd_sadMxNx4D(32, 16) // 16x32 highbd_sadMxN(16, 32) highbd_sadMxNx4D(16, 32) // 16x16 highbd_sadMxN(16, 16) highbd_sadMxNx4D(16, 16) // 16x8 highbd_sadMxN(16, 8) highbd_sadMxNx4D(16, 8) // 8x16 highbd_sadMxN(8, 16) highbd_sadMxNx4D(8, 16) // 8x8 highbd_sadMxN(8, 8) highbd_sadMxNx4D(8, 8) // 8x4 highbd_sadMxN(8, 4) highbd_sadMxNx4D(8, 4) // 4x8 highbd_sadMxN(4, 8) highbd_sadMxNx4D(4, 8) // 4x4 highbd_sadMxN(4, 4) highbd_sadMxNx4D(4, 4) /* clang-format on */ #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/skin_detection.c000066400000000000000000000057451357355204000176760ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/skin_detection.h" #define MODEL_MODE 1 // Fixed-point skin color model parameters. static const int skin_mean[5][2] = { { 7463, 9614 }, { 6400, 10240 }, { 7040, 10240 }, { 8320, 9280 }, { 6800, 9614 } }; static const int skin_inv_cov[4] = { 4107, 1663, 1663, 2157 }; // q16 static const int skin_threshold[6] = { 1570636, 1400000, 800000, 800000, 800000, 800000 }; // q18 // Thresholds on luminance. static const int y_low = 40; static const int y_high = 220; // Evaluates the Mahalanobis distance measure for the input CbCr values. static int vpx_evaluate_skin_color_difference(const int cb, const int cr, const int idx) { const int cb_q6 = cb << 6; const int cr_q6 = cr << 6; const int cb_diff_q12 = (cb_q6 - skin_mean[idx][0]) * (cb_q6 - skin_mean[idx][0]); const int cbcr_diff_q12 = (cb_q6 - skin_mean[idx][0]) * (cr_q6 - skin_mean[idx][1]); const int cr_diff_q12 = (cr_q6 - skin_mean[idx][1]) * (cr_q6 - skin_mean[idx][1]); const int cb_diff_q2 = (cb_diff_q12 + (1 << 9)) >> 10; const int cbcr_diff_q2 = (cbcr_diff_q12 + (1 << 9)) >> 10; const int cr_diff_q2 = (cr_diff_q12 + (1 << 9)) >> 10; const int skin_diff = skin_inv_cov[0] * cb_diff_q2 + skin_inv_cov[1] * cbcr_diff_q2 + skin_inv_cov[2] * cbcr_diff_q2 + skin_inv_cov[3] * cr_diff_q2; return skin_diff; } // Checks if the input yCbCr values corresponds to skin color. int vpx_skin_pixel(const int y, const int cb, const int cr, int motion) { if (y < y_low || y > y_high) { return 0; } else if (MODEL_MODE == 0) { return (vpx_evaluate_skin_color_difference(cb, cr, 0) < skin_threshold[0]); } else { int i = 0; // Exit on grey. if (cb == 128 && cr == 128) return 0; // Exit on very strong cb. if (cb > 150 && cr < 110) return 0; for (; i < 5; ++i) { int skin_color_diff = vpx_evaluate_skin_color_difference(cb, cr, i); if (skin_color_diff < skin_threshold[i + 1]) { if (y < 60 && skin_color_diff > 3 * (skin_threshold[i + 1] >> 2)) { return 0; } else if (motion == 0 && skin_color_diff > (skin_threshold[i + 1] >> 1)) { return 0; } else { return 1; } } // Exit if difference is much large than the threshold. if (skin_color_diff > (skin_threshold[i + 1] << 3)) { return 0; } } return 0; } } libvpx-1.8.2/vpx_dsp/skin_detection.h000066400000000000000000000012601357355204000176670ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_SKIN_DETECTION_H_ #define VPX_VPX_DSP_SKIN_DETECTION_H_ #ifdef __cplusplus extern "C" { #endif int vpx_skin_pixel(const int y, const int cb, const int cr, int motion); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_SKIN_DETECTION_H_ libvpx-1.8.2/vpx_dsp/ssim.c000066400000000000000000000414271357355204000156440ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/ssim.h" #include "vpx_ports/mem.h" #include "vpx_ports/system_state.h" void vpx_ssim_parms_16x16_c(const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr) { int i, j; for (i = 0; i < 16; i++, s += sp, r += rp) { for (j = 0; j < 16; j++) { *sum_s += s[j]; *sum_r += r[j]; *sum_sq_s += s[j] * s[j]; *sum_sq_r += r[j] * r[j]; *sum_sxr += s[j] * r[j]; } } } void vpx_ssim_parms_8x8_c(const uint8_t *s, int sp, const uint8_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr) { int i, j; for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { *sum_s += s[j]; *sum_r += r[j]; *sum_sq_s += s[j] * s[j]; *sum_sq_r += r[j] * r[j]; *sum_sxr += s[j] * r[j]; } } } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_ssim_parms_8x8_c(const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t *sum_s, uint32_t *sum_r, uint32_t *sum_sq_s, uint32_t *sum_sq_r, uint32_t *sum_sxr) { int i, j; for (i = 0; i < 8; i++, s += sp, r += rp) { for (j = 0; j < 8; j++) { *sum_s += s[j]; *sum_r += r[j]; *sum_sq_s += s[j] * s[j]; *sum_sq_r += r[j] * r[j]; *sum_sxr += s[j] * r[j]; } } } #endif // CONFIG_VP9_HIGHBITDEPTH static const int64_t cc1 = 26634; // (64^2*(.01*255)^2 static const int64_t cc2 = 239708; // (64^2*(.03*255)^2 static const int64_t cc1_10 = 428658; // (64^2*(.01*1023)^2 static const int64_t cc2_10 = 3857925; // (64^2*(.03*1023)^2 static const int64_t cc1_12 = 6868593; // (64^2*(.01*4095)^2 static const int64_t cc2_12 = 61817334; // (64^2*(.03*4095)^2 static double similarity(uint32_t sum_s, uint32_t sum_r, uint32_t sum_sq_s, uint32_t sum_sq_r, uint32_t sum_sxr, int count, uint32_t bd) { double ssim_n, ssim_d; int64_t c1, c2; if (bd == 8) { // scale the constants by number of pixels c1 = (cc1 * count * count) >> 12; c2 = (cc2 * count * count) >> 12; } else if (bd == 10) { c1 = (cc1_10 * count * count) >> 12; c2 = (cc2_10 * count * count) >> 12; } else if (bd == 12) { c1 = (cc1_12 * count * count) >> 12; c2 = (cc2_12 * count * count) >> 12; } else { c1 = c2 = 0; assert(0); } ssim_n = (2.0 * sum_s * sum_r + c1) * (2.0 * count * sum_sxr - 2.0 * sum_s * sum_r + c2); ssim_d = ((double)sum_s * sum_s + (double)sum_r * sum_r + c1) * ((double)count * sum_sq_s - (double)sum_s * sum_s + (double)count * sum_sq_r - (double)sum_r * sum_r + c2); return ssim_n / ssim_d; } static double ssim_8x8(const uint8_t *s, int sp, const uint8_t *r, int rp) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vpx_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s, sum_r, sum_sq_s, sum_sq_r, sum_sxr, 64, 8); } #if CONFIG_VP9_HIGHBITDEPTH static double highbd_ssim_8x8(const uint16_t *s, int sp, const uint16_t *r, int rp, uint32_t bd, uint32_t shift) { uint32_t sum_s = 0, sum_r = 0, sum_sq_s = 0, sum_sq_r = 0, sum_sxr = 0; vpx_highbd_ssim_parms_8x8(s, sp, r, rp, &sum_s, &sum_r, &sum_sq_s, &sum_sq_r, &sum_sxr); return similarity(sum_s >> shift, sum_r >> shift, sum_sq_s >> (2 * shift), sum_sq_r >> (2 * shift), sum_sxr >> (2 * shift), 64, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH // We are using a 8x8 moving window with starting location of each 8x8 window // on the 4x4 pixel grid. Such arrangement allows the windows to overlap // block boundaries to penalize blocking artifacts. static double vpx_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, int height) { int i, j; int samples = 0; double ssim_total = 0; // sample point start with each 4x4 location for (i = 0; i <= height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j <= width - 8; j += 4) { double v = ssim_8x8(img1 + j, stride_img1, img2 + j, stride_img2); ssim_total += v; samples++; } } ssim_total /= samples; return ssim_total; } #if CONFIG_VP9_HIGHBITDEPTH static double vpx_highbd_ssim2(const uint8_t *img1, const uint8_t *img2, int stride_img1, int stride_img2, int width, int height, uint32_t bd, uint32_t shift) { int i, j; int samples = 0; double ssim_total = 0; // sample point start with each 4x4 location for (i = 0; i <= height - 8; i += 4, img1 += stride_img1 * 4, img2 += stride_img2 * 4) { for (j = 0; j <= width - 8; j += 4) { double v = highbd_ssim_8x8(CONVERT_TO_SHORTPTR(img1 + j), stride_img1, CONVERT_TO_SHORTPTR(img2 + j), stride_img2, bd, shift); ssim_total += v; samples++; } } ssim_total /= samples; return ssim_total; } #endif // CONFIG_VP9_HIGHBITDEPTH double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight) { double a, b, c; double ssimv; a = vpx_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_crop_width, source->y_crop_height); b = vpx_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_crop_width, source->uv_crop_height); c = vpx_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_crop_width, source->uv_crop_height); ssimv = a * .8 + .1 * (b + c); *weight = 1; return ssimv; } // traditional ssim as per: http://en.wikipedia.org/wiki/Structural_similarity // // Re working out the math -> // // ssim(x,y) = (2*mean(x)*mean(y) + c1)*(2*cov(x,y)+c2) / // ((mean(x)^2+mean(y)^2+c1)*(var(x)+var(y)+c2)) // // mean(x) = sum(x) / n // // cov(x,y) = (n*sum(xi*yi)-sum(x)*sum(y))/(n*n) // // var(x) = (n*sum(xi*xi)-sum(xi)*sum(xi))/(n*n) // // ssim(x,y) = // (2*sum(x)*sum(y)/(n*n) + c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))/(n*n)+c2) / // (((sum(x)*sum(x)+sum(y)*sum(y))/(n*n) +c1) * // ((n*sum(xi*xi) - sum(xi)*sum(xi))/(n*n)+ // (n*sum(yi*yi) - sum(yi)*sum(yi))/(n*n)+c2))) // // factoring out n*n // // ssim(x,y) = // (2*sum(x)*sum(y) + n*n*c1)*(2*(n*sum(xi*yi)-sum(x)*sum(y))+n*n*c2) / // (((sum(x)*sum(x)+sum(y)*sum(y)) + n*n*c1) * // (n*sum(xi*xi)-sum(xi)*sum(xi)+n*sum(yi*yi)-sum(yi)*sum(yi)+n*n*c2)) // // Replace c1 with n*n * c1 for the final step that leads to this code: // The final step scales by 12 bits so we don't lose precision in the constants. static double ssimv_similarity(const Ssimv *sv, int64_t n) { // Scale the constants by number of pixels. const int64_t c1 = (cc1 * n * n) >> 12; const int64_t c2 = (cc2 * n * n) >> 12; const double l = 1.0 * (2 * sv->sum_s * sv->sum_r + c1) / (sv->sum_s * sv->sum_s + sv->sum_r * sv->sum_r + c1); // Since these variables are unsigned sums, convert to double so // math is done in double arithmetic. const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2); return l * v; } // The first term of the ssim metric is a luminance factor. // // (2*mean(x)*mean(y) + c1)/ (mean(x)^2+mean(y)^2+c1) // // This luminance factor is super sensitive to the dark side of luminance // values and completely insensitive on the white side. check out 2 sets // (1,3) and (250,252) the term gives ( 2*1*3/(1+9) = .60 // 2*250*252/ (250^2+252^2) => .99999997 // // As a result in this tweaked version of the calculation in which the // luminance is taken as percentage off from peak possible. // // 255 * 255 - (sum_s - sum_r) / count * (sum_s - sum_r) / count // static double ssimv_similarity2(const Ssimv *sv, int64_t n) { // Scale the constants by number of pixels. const int64_t c1 = (cc1 * n * n) >> 12; const int64_t c2 = (cc2 * n * n) >> 12; const double mean_diff = (1.0 * sv->sum_s - sv->sum_r) / n; const double l = (255 * 255 - mean_diff * mean_diff + c1) / (255 * 255 + c1); // Since these variables are unsigned, sums convert to double so // math is done in double arithmetic. const double v = (2.0 * n * sv->sum_sxr - 2 * sv->sum_s * sv->sum_r + c2) / (n * sv->sum_sq_s - sv->sum_s * sv->sum_s + n * sv->sum_sq_r - sv->sum_r * sv->sum_r + c2); return l * v; } static void ssimv_parms(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, Ssimv *sv) { vpx_ssim_parms_8x8(img1, img1_pitch, img2, img2_pitch, &sv->sum_s, &sv->sum_r, &sv->sum_sq_s, &sv->sum_sq_r, &sv->sum_sxr); } double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, int width, int height, Ssimv *sv2, Metrics *m, int do_inconsistency) { double dssim_total = 0; double ssim_total = 0; double ssim2_total = 0; double inconsistency_total = 0; int i, j; int c = 0; double norm; double old_ssim_total = 0; vpx_clear_system_state(); // We can sample points as frequently as we like start with 1 per 4x4. for (i = 0; i < height; i += 4, img1 += img1_pitch * 4, img2 += img2_pitch * 4) { for (j = 0; j < width; j += 4, ++c) { Ssimv sv = { 0, 0, 0, 0, 0, 0 }; double ssim; double ssim2; double dssim; uint32_t var_new; uint32_t var_old; uint32_t mean_new; uint32_t mean_old; double ssim_new; double ssim_old; // Not sure there's a great way to handle the edge pixels // in ssim when using a window. Seems biased against edge pixels // however you handle this. This uses only samples that are // fully in the frame. if (j + 8 <= width && i + 8 <= height) { ssimv_parms(img1 + j, img1_pitch, img2 + j, img2_pitch, &sv); } ssim = ssimv_similarity(&sv, 64); ssim2 = ssimv_similarity2(&sv, 64); sv.ssim = ssim2; // dssim is calculated to use as an actual error metric and // is scaled up to the same range as sum square error. // Since we are subsampling every 16th point maybe this should be // *16 ? dssim = 255 * 255 * (1 - ssim2) / 2; // Here I introduce a new error metric: consistency-weighted // SSIM-inconsistency. This metric isolates frames where the // SSIM 'suddenly' changes, e.g. if one frame in every 8 is much // sharper or blurrier than the others. Higher values indicate a // temporally inconsistent SSIM. There are two ideas at work: // // 1) 'SSIM-inconsistency': the total inconsistency value // reflects how much SSIM values are changing between this // source / reference frame pair and the previous pair. // // 2) 'consistency-weighted': weights de-emphasize areas in the // frame where the scene content has changed. Changes in scene // content are detected via changes in local variance and local // mean. // // Thus the overall measure reflects how inconsistent the SSIM // values are, over consistent regions of the frame. // // The metric has three terms: // // term 1 -> uses change in scene Variance to weight error score // 2 * var(Fi)*var(Fi-1) / (var(Fi)^2+var(Fi-1)^2) // larger changes from one frame to the next mean we care // less about consistency. // // term 2 -> uses change in local scene luminance to weight error // 2 * avg(Fi)*avg(Fi-1) / (avg(Fi)^2+avg(Fi-1)^2) // larger changes from one frame to the next mean we care // less about consistency. // // term3 -> measures inconsistency in ssim scores between frames // 1 - ( 2 * ssim(Fi)*ssim(Fi-1)/(ssim(Fi)^2+sssim(Fi-1)^2). // // This term compares the ssim score for the same location in 2 // subsequent frames. var_new = sv.sum_sq_s - sv.sum_s * sv.sum_s / 64; var_old = sv2[c].sum_sq_s - sv2[c].sum_s * sv2[c].sum_s / 64; mean_new = sv.sum_s; mean_old = sv2[c].sum_s; ssim_new = sv.ssim; ssim_old = sv2[c].ssim; if (do_inconsistency) { // We do the metric once for every 4x4 block in the image. Since // we are scaling the error to SSE for use in a psnr calculation // 1.0 = 4x4x255x255 the worst error we can possibly have. static const double kScaling = 4. * 4 * 255 * 255; // The constants have to be non 0 to avoid potential divide by 0 // issues other than that they affect kind of a weighting between // the terms. No testing of what the right terms should be has been // done. static const double c1 = 1, c2 = 1, c3 = 1; // This measures how much consistent variance is in two consecutive // source frames. 1.0 means they have exactly the same variance. const double variance_term = (2.0 * var_old * var_new + c1) / (1.0 * var_old * var_old + 1.0 * var_new * var_new + c1); // This measures how consistent the local mean are between two // consecutive frames. 1.0 means they have exactly the same mean. const double mean_term = (2.0 * mean_old * mean_new + c2) / (1.0 * mean_old * mean_old + 1.0 * mean_new * mean_new + c2); // This measures how consistent the ssims of two // consecutive frames is. 1.0 means they are exactly the same. double ssim_term = pow((2.0 * ssim_old * ssim_new + c3) / (ssim_old * ssim_old + ssim_new * ssim_new + c3), 5); double this_inconsistency; // Floating point math sometimes makes this > 1 by a tiny bit. // We want the metric to scale between 0 and 1.0 so we can convert // it to an snr scaled value. if (ssim_term > 1) ssim_term = 1; // This converts the consistency metric to an inconsistency metric // ( so we can scale it like psnr to something like sum square error. // The reason for the variance and mean terms is the assumption that // if there are big changes in the source we shouldn't penalize // inconsistency in ssim scores a bit less as it will be less visible // to the user. this_inconsistency = (1 - ssim_term) * variance_term * mean_term; this_inconsistency *= kScaling; inconsistency_total += this_inconsistency; } sv2[c] = sv; ssim_total += ssim; ssim2_total += ssim2; dssim_total += dssim; old_ssim_total += ssim_old; } old_ssim_total += 0; } norm = 1. / (width / 4) / (height / 4); ssim_total *= norm; ssim2_total *= norm; m->ssim2 = ssim2_total; m->ssim = ssim_total; if (old_ssim_total == 0) inconsistency_total = 0; m->ssimc = inconsistency_total; m->dssim = dssim_total; return inconsistency_total; } #if CONFIG_VP9_HIGHBITDEPTH double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, uint32_t bd, uint32_t in_bd) { double a, b, c; double ssimv; uint32_t shift = 0; assert(bd >= in_bd); shift = bd - in_bd; a = vpx_highbd_ssim2(source->y_buffer, dest->y_buffer, source->y_stride, dest->y_stride, source->y_crop_width, source->y_crop_height, in_bd, shift); b = vpx_highbd_ssim2(source->u_buffer, dest->u_buffer, source->uv_stride, dest->uv_stride, source->uv_crop_width, source->uv_crop_height, in_bd, shift); c = vpx_highbd_ssim2(source->v_buffer, dest->v_buffer, source->uv_stride, dest->uv_stride, source->uv_crop_width, source->uv_crop_height, in_bd, shift); ssimv = a * .8 + .1 * (b + c); *weight = 1; return ssimv; } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/ssim.h000066400000000000000000000045541357355204000156510ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_SSIM_H_ #define VPX_VPX_DSP_SSIM_H_ #define MAX_SSIM_DB 100.0; #ifdef __cplusplus extern "C" { #endif #include "./vpx_config.h" #include "vpx_scale/yv12config.h" // metrics used for calculating ssim, ssim2, dssim, and ssimc typedef struct { // source sum ( over 8x8 region ) uint32_t sum_s; // reference sum (over 8x8 region ) uint32_t sum_r; // source sum squared ( over 8x8 region ) uint32_t sum_sq_s; // reference sum squared (over 8x8 region ) uint32_t sum_sq_r; // sum of source times reference (over 8x8 region) uint32_t sum_sxr; // calculated ssim score between source and reference double ssim; } Ssimv; // metrics collected on a frame basis typedef struct { // ssim consistency error metric ( see code for explanation ) double ssimc; // standard ssim double ssim; // revised ssim ( see code for explanation) double ssim2; // ssim restated as an error metric like sse double dssim; // dssim converted to decibels double dssimd; // ssimc converted to decibels double ssimcd; } Metrics; double vpx_get_ssim_metrics(uint8_t *img1, int img1_pitch, uint8_t *img2, int img2_pitch, int width, int height, Ssimv *sv2, Metrics *m, int do_inconsistency); double vpx_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight); double vpx_calc_fastssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *ssim_y, double *ssim_u, double *ssim_v, uint32_t bd, uint32_t in_bd); #if CONFIG_VP9_HIGHBITDEPTH double vpx_highbd_calc_ssim(const YV12_BUFFER_CONFIG *source, const YV12_BUFFER_CONFIG *dest, double *weight, uint32_t bd, uint32_t in_bd); #endif // CONFIG_VP9_HIGHBITDEPTH #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_SSIM_H_ libvpx-1.8.2/vpx_dsp/subtract.c000066400000000000000000000032371357355204000165150ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" void vpx_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src_ptr, ptrdiff_t src_stride, const uint8_t *pred_ptr, ptrdiff_t pred_stride) { int r, c; for (r = 0; r < rows; r++) { for (c = 0; c < cols; c++) diff_ptr[c] = src_ptr[c] - pred_ptr[c]; diff_ptr += diff_stride; pred_ptr += pred_stride; src_ptr += src_stride; } } #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_subtract_block_c(int rows, int cols, int16_t *diff_ptr, ptrdiff_t diff_stride, const uint8_t *src8_ptr, ptrdiff_t src_stride, const uint8_t *pred8_ptr, ptrdiff_t pred_stride, int bd) { int r, c; uint16_t *src = CONVERT_TO_SHORTPTR(src8_ptr); uint16_t *pred = CONVERT_TO_SHORTPTR(pred8_ptr); (void)bd; for (r = 0; r < rows; r++) { for (c = 0; c < cols; c++) { diff_ptr[c] = src[c] - pred[c]; } diff_ptr += diff_stride; pred += pred_stride; src += src_stride; } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/sum_squares.c000066400000000000000000000013071357355204000172310ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" uint64_t vpx_sum_squares_2d_i16_c(const int16_t *src, int stride, int size) { int r, c; uint64_t ss = 0; for (r = 0; r < size; r++) { for (c = 0; c < size; c++) { const int16_t v = src[c]; ss += v * v; } src += stride; } return ss; } libvpx-1.8.2/vpx_dsp/txfm_common.h000066400000000000000000000050301357355204000172120ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_TXFM_COMMON_H_ #define VPX_VPX_DSP_TXFM_COMMON_H_ #include "vpx_dsp/vpx_dsp_common.h" // Constants and Macros used by all idct/dct functions #define DCT_CONST_BITS 14 #define DCT_CONST_ROUNDING (1 << (DCT_CONST_BITS - 1)) #define UNIT_QUANT_SHIFT 2 #define UNIT_QUANT_FACTOR (1 << UNIT_QUANT_SHIFT) // Constants: // for (int i = 1; i< 32; ++i) // printf("static const int cospi_%d_64 = %.0f;\n", i, // round(16384 * cos(i*M_PI/64))); // Note: sin(k*Pi/64) = cos((32-k)*Pi/64) static const tran_coef_t cospi_1_64 = 16364; static const tran_coef_t cospi_2_64 = 16305; static const tran_coef_t cospi_3_64 = 16207; static const tran_coef_t cospi_4_64 = 16069; static const tran_coef_t cospi_5_64 = 15893; static const tran_coef_t cospi_6_64 = 15679; static const tran_coef_t cospi_7_64 = 15426; static const tran_coef_t cospi_8_64 = 15137; static const tran_coef_t cospi_9_64 = 14811; static const tran_coef_t cospi_10_64 = 14449; static const tran_coef_t cospi_11_64 = 14053; static const tran_coef_t cospi_12_64 = 13623; static const tran_coef_t cospi_13_64 = 13160; static const tran_coef_t cospi_14_64 = 12665; static const tran_coef_t cospi_15_64 = 12140; static const tran_coef_t cospi_16_64 = 11585; static const tran_coef_t cospi_17_64 = 11003; static const tran_coef_t cospi_18_64 = 10394; static const tran_coef_t cospi_19_64 = 9760; static const tran_coef_t cospi_20_64 = 9102; static const tran_coef_t cospi_21_64 = 8423; static const tran_coef_t cospi_22_64 = 7723; static const tran_coef_t cospi_23_64 = 7005; static const tran_coef_t cospi_24_64 = 6270; static const tran_coef_t cospi_25_64 = 5520; static const tran_coef_t cospi_26_64 = 4756; static const tran_coef_t cospi_27_64 = 3981; static const tran_coef_t cospi_28_64 = 3196; static const tran_coef_t cospi_29_64 = 2404; static const tran_coef_t cospi_30_64 = 1606; static const tran_coef_t cospi_31_64 = 804; // 16384 * sqrt(2) * sin(kPi/9) * 2 / 3 static const tran_coef_t sinpi_1_9 = 5283; static const tran_coef_t sinpi_2_9 = 9929; static const tran_coef_t sinpi_3_9 = 13377; static const tran_coef_t sinpi_4_9 = 15212; #endif // VPX_VPX_DSP_TXFM_COMMON_H_ libvpx-1.8.2/vpx_dsp/variance.c000066400000000000000000000660611357355204000164620ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/variance.h" static const uint8_t bilinear_filters[8][2] = { { 128, 0 }, { 112, 16 }, { 96, 32 }, { 80, 48 }, { 64, 64 }, { 48, 80 }, { 32, 96 }, { 16, 112 }, }; uint32_t vpx_get4x4sse_cs_c(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride) { int distortion = 0; int r, c; for (r = 0; r < 4; ++r) { for (c = 0; c < 4; ++c) { int diff = src_ptr[c] - ref_ptr[c]; distortion += diff * diff; } src_ptr += src_stride; ref_ptr += ref_stride; } return distortion; } uint32_t vpx_get_mb_ss_c(const int16_t *src_ptr) { unsigned int i, sum = 0; for (i = 0; i < 256; ++i) { sum += src_ptr[i] * src_ptr[i]; } return sum; } static void variance(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { int i, j; *sum = 0; *sse = 0; for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { const int diff = src_ptr[j] - ref_ptr[j]; *sum += diff; *sse += diff * diff; } src_ptr += src_stride; ref_ptr += ref_stride; } } // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement // the first-pass of 2-D separable filter. // // Produces int16_t output to retain precision for the next pass. Two filter // taps should sum to FILTER_WEIGHT. pixel_step defines whether the filter is // applied horizontally (pixel_step = 1) or vertically (pixel_step = stride). // It defines the offset required to move from one input to the next. static void var_filter_block2d_bil_first_pass( const uint8_t *src_ptr, uint16_t *ref_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { ref_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } src_ptr += src_pixels_per_line - output_width; ref_ptr += output_width; } } // Applies a 1-D 2-tap bilinear filter to the source block in either horizontal // or vertical direction to produce the filtered output block. Used to implement // the second-pass of 2-D separable filter. // // Requires 16-bit input as produced by filter_block2d_bil_first_pass. Two // filter taps should sum to FILTER_WEIGHT. pixel_step defines whether the // filter is applied horizontally (pixel_step = 1) or vertically // (pixel_step = stride). It defines the offset required to move from one input // to the next. Output is 8-bit. static void var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint8_t *ref_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { ref_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } src_ptr += src_pixels_per_line - output_width; ref_ptr += output_width; } } #define VAR(W, H) \ uint32_t vpx_variance##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ int sum; \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ } #define SUBPIX_VAR(W, H) \ uint32_t vpx_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint8_t temp2[H * W]; \ \ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ W, bilinear_filters[x_offset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ return vpx_variance##W##x##H##_c(temp2, W, ref_ptr, ref_stride, sse); \ } #define SUBPIX_AVG_VAR(W, H) \ uint32_t vpx_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint8_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint8_t, temp3[H * W]); \ \ var_filter_block2d_bil_first_pass(src_ptr, fdata3, src_stride, 1, H + 1, \ W, bilinear_filters[x_offset]); \ var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ vpx_comp_avg_pred_c(temp3, second_pred, W, H, temp2, W); \ \ return vpx_variance##W##x##H##_c(temp3, W, ref_ptr, ref_stride, sse); \ } /* Identical to the variance call except it takes an additional parameter, sum, * and returns that value using pass-by-reference instead of returning * sse - sum^2 / w*h */ #define GET_VAR(W, H) \ void vpx_get##W##x##H##var_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse, int *sum) { \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, sum); \ } /* Identical to the variance call except it does not calculate the * sse - sum^2 / w*h and returns sse in addtion to modifying the passed in * variable. */ #define MSE(W, H) \ uint32_t vpx_mse##W##x##H##_c(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride, \ uint32_t *sse) { \ int sum; \ variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, &sum); \ return *sse; \ } /* All three forms of the variance are available in the same sizes. */ #define VARIANCES(W, H) \ VAR(W, H) \ SUBPIX_VAR(W, H) \ SUBPIX_AVG_VAR(W, H) VARIANCES(64, 64) VARIANCES(64, 32) VARIANCES(32, 64) VARIANCES(32, 32) VARIANCES(32, 16) VARIANCES(16, 32) VARIANCES(16, 16) VARIANCES(16, 8) VARIANCES(8, 16) VARIANCES(8, 8) VARIANCES(8, 4) VARIANCES(4, 8) VARIANCES(4, 4) GET_VAR(16, 16) GET_VAR(8, 8) MSE(16, 16) MSE(16, 8) MSE(8, 16) MSE(8, 8) void vpx_comp_avg_pred_c(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { int i, j; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { const int tmp = pred[j] + ref[j]; comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); } comp_pred += width; pred += width; ref += ref_stride; } } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_variance64(const uint8_t *src8_ptr, int src_stride, const uint8_t *ref8_ptr, int ref_stride, int w, int h, uint64_t *sse, int64_t *sum) { int i, j; uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src8_ptr); uint16_t *ref_ptr = CONVERT_TO_SHORTPTR(ref8_ptr); *sum = 0; *sse = 0; for (i = 0; i < h; ++i) { for (j = 0; j < w; ++j) { const int diff = src_ptr[j] - ref_ptr[j]; *sum += diff; *sse += diff * diff; } src_ptr += src_stride; ref_ptr += ref_stride; } } static void highbd_8_variance(const uint8_t *src8_ptr, int src_stride, const uint8_t *ref8_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)sse_long; *sum = (int)sum_long; } static void highbd_10_variance(const uint8_t *src8_ptr, int src_stride, const uint8_t *ref8_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 2); } static void highbd_12_variance(const uint8_t *src8_ptr, int src_stride, const uint8_t *ref8_ptr, int ref_stride, int w, int h, uint32_t *sse, int *sum) { uint64_t sse_long = 0; int64_t sum_long = 0; highbd_variance64(src8_ptr, src_stride, ref8_ptr, ref_stride, w, h, &sse_long, &sum_long); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); *sum = (int)ROUND_POWER_OF_TWO(sum_long, 4); } #define HIGHBD_VAR(W, H) \ uint32_t vpx_highbd_8_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ return *sse - (uint32_t)(((int64_t)sum * sum) / (W * H)); \ } \ \ uint32_t vpx_highbd_10_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ int64_t var; \ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ int64_t var; \ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ var = (int64_t)(*sse) - (((int64_t)sum * sum) / (W * H)); \ return (var >= 0) ? (uint32_t)var : 0; \ } #define HIGHBD_GET_VAR(S) \ void vpx_highbd_8_get##S##x##S##var_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse, int *sum) { \ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ sum); \ } \ \ void vpx_highbd_10_get##S##x##S##var_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse, int *sum) { \ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ sum); \ } \ \ void vpx_highbd_12_get##S##x##S##var_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse, int *sum) { \ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, S, S, sse, \ sum); \ } #define HIGHBD_MSE(W, H) \ uint32_t vpx_highbd_8_mse##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ highbd_8_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ return *sse; \ } \ \ uint32_t vpx_highbd_10_mse##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ highbd_10_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ return *sse; \ } \ \ uint32_t vpx_highbd_12_mse##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, uint32_t *sse) { \ int sum; \ highbd_12_variance(src_ptr, src_stride, ref_ptr, ref_stride, W, H, sse, \ &sum); \ return *sse; \ } static void highbd_var_filter_block2d_bil_first_pass( const uint8_t *src_ptr8, uint16_t *output_ptr, unsigned int src_pixels_per_line, int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; uint16_t *src_ptr = CONVERT_TO_SHORTPTR(src_ptr8); for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { output_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } // Next row... src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } } static void highbd_var_filter_block2d_bil_second_pass( const uint16_t *src_ptr, uint16_t *output_ptr, unsigned int src_pixels_per_line, unsigned int pixel_step, unsigned int output_height, unsigned int output_width, const uint8_t *filter) { unsigned int i, j; for (i = 0; i < output_height; ++i) { for (j = 0; j < output_width; ++j) { output_ptr[j] = ROUND_POWER_OF_TWO( (int)src_ptr[0] * filter[0] + (int)src_ptr[pixel_step] * filter[1], FILTER_BITS); ++src_ptr; } src_ptr += src_pixels_per_line - output_width; output_ptr += output_width; } } #define HIGHBD_SUBPIX_VAR(W, H) \ uint32_t vpx_highbd_8_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ ref_ptr, ref_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ ref_ptr, ref_stride, sse); \ } \ \ uint32_t vpx_highbd_12_sub_pixel_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp2), W, \ ref_ptr, ref_stride, sse); \ } #define HIGHBD_SUBPIX_AVG_VAR(W, H) \ uint32_t vpx_highbd_8_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ temp2, W); \ \ return vpx_highbd_8_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ ref_ptr, ref_stride, sse); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ temp2, W); \ \ return vpx_highbd_10_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ ref_ptr, ref_stride, sse); \ } \ \ uint32_t vpx_highbd_12_sub_pixel_avg_variance##W##x##H##_c( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, uint32_t *sse, \ const uint8_t *second_pred) { \ uint16_t fdata3[(H + 1) * W]; \ uint16_t temp2[H * W]; \ DECLARE_ALIGNED(16, uint16_t, temp3[H * W]); \ \ highbd_var_filter_block2d_bil_first_pass( \ src_ptr, fdata3, src_stride, 1, H + 1, W, bilinear_filters[x_offset]); \ highbd_var_filter_block2d_bil_second_pass(fdata3, temp2, W, W, H, W, \ bilinear_filters[y_offset]); \ \ vpx_highbd_comp_avg_pred_c(temp3, CONVERT_TO_SHORTPTR(second_pred), W, H, \ temp2, W); \ \ return vpx_highbd_12_variance##W##x##H##_c(CONVERT_TO_BYTEPTR(temp3), W, \ ref_ptr, ref_stride, sse); \ } /* All three forms of the variance are available in the same sizes. */ #define HIGHBD_VARIANCES(W, H) \ HIGHBD_VAR(W, H) \ HIGHBD_SUBPIX_VAR(W, H) \ HIGHBD_SUBPIX_AVG_VAR(W, H) HIGHBD_VARIANCES(64, 64) HIGHBD_VARIANCES(64, 32) HIGHBD_VARIANCES(32, 64) HIGHBD_VARIANCES(32, 32) HIGHBD_VARIANCES(32, 16) HIGHBD_VARIANCES(16, 32) HIGHBD_VARIANCES(16, 16) HIGHBD_VARIANCES(16, 8) HIGHBD_VARIANCES(8, 16) HIGHBD_VARIANCES(8, 8) HIGHBD_VARIANCES(8, 4) HIGHBD_VARIANCES(4, 8) HIGHBD_VARIANCES(4, 4) HIGHBD_GET_VAR(8) HIGHBD_GET_VAR(16) HIGHBD_MSE(16, 16) HIGHBD_MSE(16, 8) HIGHBD_MSE(8, 16) HIGHBD_MSE(8, 8) void vpx_highbd_comp_avg_pred(uint16_t *comp_pred, const uint16_t *pred, int width, int height, const uint16_t *ref, int ref_stride) { int i, j; for (i = 0; i < height; ++i) { for (j = 0; j < width; ++j) { const int tmp = pred[j] + ref[j]; comp_pred[j] = ROUND_POWER_OF_TWO(tmp, 1); } comp_pred += width; pred += width; ref += ref_stride; } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/variance.h000066400000000000000000000056441357355204000164670ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_VARIANCE_H_ #define VPX_VPX_DSP_VARIANCE_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif #define FILTER_BITS 7 #define FILTER_WEIGHT 128 typedef unsigned int (*vpx_sad_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride); typedef unsigned int (*vpx_sad_avg_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, const uint8_t *second_pred); typedef void (*vp8_copy32xn_fn_t)(const uint8_t *src_ptr, int src_stride, uint8_t *ref_ptr, int ref_stride, int n); typedef void (*vpx_sad_multi_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sad_array); typedef void (*vpx_sad_multi_d_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *const b_array[], int ref_stride, unsigned int *sad_array); typedef unsigned int (*vpx_variance_fn_t)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); typedef unsigned int (*vpx_subpixvariance_fn_t)( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse); typedef unsigned int (*vpx_subp_avg_variance_fn_t)( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred); #if CONFIG_VP8 typedef struct variance_vtable { vpx_sad_fn_t sdf; vpx_variance_fn_t vf; vpx_subpixvariance_fn_t svf; vpx_sad_multi_fn_t sdx3f; vpx_sad_multi_fn_t sdx8f; vpx_sad_multi_d_fn_t sdx4df; #if VPX_ARCH_X86 || VPX_ARCH_X86_64 vp8_copy32xn_fn_t copymem; #endif } vp8_variance_fn_ptr_t; #endif // CONFIG_VP8 #if CONFIG_VP9 typedef struct vp9_variance_vtable { vpx_sad_fn_t sdf; vpx_sad_avg_fn_t sdaf; vpx_variance_fn_t vf; vpx_subpixvariance_fn_t svf; vpx_subp_avg_variance_fn_t svaf; vpx_sad_multi_d_fn_t sdx4df; vpx_sad_multi_fn_t sdx8f; } vp9_variance_fn_ptr_t; #endif // CONFIG_VP9 #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_VARIANCE_H_ libvpx-1.8.2/vpx_dsp/vpx_convolve.c000066400000000000000000000511301357355204000174110ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_convolve.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_ports/mem.h" static void convolve_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static void convolve_avg_horiz(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = ROUND_POWER_OF_TWO( dst[x] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static void convolve_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)); y_q4 += y_step_q4; } ++src; ++dst; } } static void convolve_avg_vert(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint8_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = ROUND_POWER_OF_TWO( dst[y * dst_stride] + clip_pixel(ROUND_POWER_OF_TWO(sum, FILTER_BITS)), 1); y_q4 += y_step_q4; } ++src; ++dst; } } void vpx_convolve8_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)y0_q4; (void)y_step_q4; convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)y0_q4; (void)y_step_q4; convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h); } void vpx_convolve8_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { (void)x0_q4; (void)x_step_q4; convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // When calling in frame scaling function, the smallest scaling factor is x1/4 // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still // big enough. uint8_t temp[64 * 135]; const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); assert(x_step_q4 <= 64); convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } void vpx_convolve8_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Fixed size intermediate buffer places limits on parameters. DECLARE_ALIGNED(16, uint8_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); vpx_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); vpx_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h); } void vpx_convolve_copy_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { int r; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; for (r = h; r > 0; --r) { memcpy(dst, src, w); src += src_stride; dst += dst_stride; } } void vpx_convolve_avg_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { int x, y; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); src += src_stride; dst += dst_stride; } } void vpx_scaled_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_scaled_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_scaled_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_scaled_avg_horiz_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_avg_horiz_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_scaled_avg_vert_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_avg_vert_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } void vpx_scaled_avg_2d_c(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { vpx_convolve8_avg_c(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h); } #if CONFIG_VP9_HIGHBITDEPTH static void highbd_convolve_horiz(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h, int bd) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static void highbd_convolve_avg_horiz(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *x_filters, int x0_q4, int x_step_q4, int w, int h, int bd) { int x, y; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; ++y) { int x_q4 = x0_q4; for (x = 0; x < w; ++x) { const uint16_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_x[k] * x_filter[k]; dst[x] = ROUND_POWER_OF_TWO( dst[x] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); x_q4 += x_step_q4; } src += src_stride; dst += dst_stride; } } static void highbd_convolve_vert(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h, int bd) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd); y_q4 += y_step_q4; } ++src; ++dst; } } static void highbd_convolve_avg_vert(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *y_filters, int y0_q4, int y_step_q4, int w, int h, int bd) { int x, y; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (x = 0; x < w; ++x) { int y_q4 = y0_q4; for (y = 0; y < h; ++y) { const uint16_t *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; int k, sum = 0; for (k = 0; k < SUBPEL_TAPS; ++k) sum += src_y[k * src_stride] * y_filter[k]; dst[y * dst_stride] = ROUND_POWER_OF_TWO( dst[y * dst_stride] + clip_pixel_highbd(ROUND_POWER_OF_TWO(sum, FILTER_BITS), bd), 1); y_q4 += y_step_q4; } ++src; ++dst; } } static void highbd_convolve(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. uint16_t temp[64 * 135]; const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32); assert(x_step_q4 <= 32); highbd_convolve_horiz(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height, bd); highbd_convolve_vert(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h, bd); } void vpx_highbd_convolve8_horiz_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)y0_q4; (void)y_step_q4; highbd_convolve_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h, bd); } void vpx_highbd_convolve8_avg_horiz_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)y0_q4; (void)y_step_q4; highbd_convolve_avg_horiz(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, w, h, bd); } void vpx_highbd_convolve8_vert_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)x0_q4; (void)x_step_q4; highbd_convolve_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h, bd); } void vpx_highbd_convolve8_avg_vert_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)x0_q4; (void)x_step_q4; highbd_convolve_avg_vert(src, src_stride, dst, dst_stride, filter, y0_q4, y_step_q4, w, h, bd); } void vpx_highbd_convolve8_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { highbd_convolve(src, src_stride, dst, dst_stride, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); } void vpx_highbd_convolve8_avg_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { // Fixed size intermediate buffer places limits on parameters. DECLARE_ALIGNED(16, uint16_t, temp[64 * 64]); assert(w <= 64); assert(h <= 64); vpx_highbd_convolve8_c(src, src_stride, temp, 64, filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); vpx_highbd_convolve_avg_c(temp, 64, dst, dst_stride, NULL, 0, 0, 0, 0, w, h, bd); } void vpx_highbd_convolve_copy_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { int r; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; for (r = h; r > 0; --r) { memcpy(dst, src, w * sizeof(uint16_t)); src += src_stride; dst += dst_stride; } } void vpx_highbd_convolve_avg_c(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { int x, y; (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; for (y = 0; y < h; ++y) { for (x = 0; x < w; ++x) dst[x] = ROUND_POWER_OF_TWO(dst[x] + src[x], 1); src += src_stride; dst += dst_stride; } } #endif libvpx-1.8.2/vpx_dsp/vpx_convolve.h000066400000000000000000000025571357355204000174270ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_VPX_CONVOLVE_H_ #define VPX_VPX_DSP_VPX_CONVOLVE_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef void (*convolve_fn_t)(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h); #if CONFIG_VP9_HIGHBITDEPTH typedef void (*highbd_convolve_fn_t)(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd); #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_VPX_CONVOLVE_H_ libvpx-1.8.2/vpx_dsp/vpx_dsp.mk000066400000000000000000000371521357355204000165410ustar00rootroot00000000000000## ## Copyright (c) 2015 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## DSP_SRCS-yes += vpx_dsp.mk DSP_SRCS-yes += vpx_dsp_common.h DSP_SRCS-$(HAVE_MSA) += mips/macros_msa.h DSP_SRCS-$(HAVE_AVX2) += x86/bitdepth_conversion_avx2.h DSP_SRCS-$(HAVE_SSE2) += x86/bitdepth_conversion_sse2.h # This file is included in libs.mk. Including it here would cause it to be # compiled into an object. Even as an empty file, this would create an # executable section on the stack. #DSP_SRCS-$(HAVE_SSE2) += x86/bitdepth_conversion_sse2$(ASM) # bit reader DSP_SRCS-yes += prob.h DSP_SRCS-yes += prob.c ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += bitwriter.h DSP_SRCS-yes += bitwriter.c DSP_SRCS-yes += bitwriter_buffer.c DSP_SRCS-yes += bitwriter_buffer.h DSP_SRCS-yes += psnr.c DSP_SRCS-yes += psnr.h DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.c DSP_SRCS-$(CONFIG_INTERNAL_STATS) += ssim.h DSP_SRCS-$(CONFIG_INTERNAL_STATS) += psnrhvs.c DSP_SRCS-$(CONFIG_INTERNAL_STATS) += fastssim.c endif ifeq ($(CONFIG_DECODERS),yes) DSP_SRCS-yes += bitreader.h DSP_SRCS-yes += bitreader.c DSP_SRCS-yes += bitreader_buffer.c DSP_SRCS-yes += bitreader_buffer.h endif # intra predictions DSP_SRCS-yes += intrapred.c DSP_SRCS-$(HAVE_SSE2) += x86/intrapred_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/intrapred_ssse3.asm DSP_SRCS-$(HAVE_VSX) += ppc/intrapred_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_intrapred_intrin_sse2.c DSP_SRCS-$(HAVE_SSSE3) += x86/highbd_intrapred_intrin_ssse3.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_intrapred_neon.c endif # CONFIG_VP9_HIGHBITDEPTH ifneq ($(filter yes,$(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),) DSP_SRCS-yes += add_noise.c DSP_SRCS-yes += deblock.c DSP_SRCS-yes += postproc.h DSP_SRCS-$(HAVE_MSA) += mips/add_noise_msa.c DSP_SRCS-$(HAVE_MSA) += mips/deblock_msa.c DSP_SRCS-$(HAVE_NEON) += arm/deblock_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/add_noise_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/deblock_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/post_proc_sse2.c DSP_SRCS-$(HAVE_VSX) += ppc/deblock_vsx.c endif # CONFIG_POSTPROC DSP_SRCS-$(HAVE_NEON_ASM) += arm/intrapred_neon_asm$(ASM) DSP_SRCS-$(HAVE_NEON) += arm/intrapred_neon.c DSP_SRCS-$(HAVE_MSA) += mips/intrapred_msa.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred4_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred8_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/intrapred16_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/common_dspr2.c DSP_SRCS-yes += vpx_filter.h ifeq ($(CONFIG_VP9),yes) # interpolation filters DSP_SRCS-yes += vpx_convolve.c DSP_SRCS-yes += vpx_convolve.h DSP_SRCS-$(VPX_ARCH_X86)$(VPX_ARCH_X86_64) += x86/convolve.h DSP_SRCS-$(HAVE_SSE2) += x86/convolve_sse2.h DSP_SRCS-$(HAVE_SSSE3) += x86/convolve_ssse3.h DSP_SRCS-$(HAVE_AVX2) += x86/convolve_avx2.h DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_8t_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_4t_intrin_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/vpx_subpixel_bilinear_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_ssse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_bilinear_ssse3.asm DSP_SRCS-$(HAVE_AVX2) += x86/vpx_subpixel_8t_intrin_avx2.c DSP_SRCS-$(HAVE_SSSE3) += x86/vpx_subpixel_8t_intrin_ssse3.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_8t_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/vpx_high_subpixel_bilinear_sse2.asm DSP_SRCS-$(HAVE_AVX2) += x86/highbd_convolve_avx2.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_copy_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_avg_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve8_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_vpx_convolve_neon.c endif DSP_SRCS-$(HAVE_SSE2) += x86/vpx_convolve_copy_sse2.asm DSP_SRCS-$(HAVE_NEON) += arm/vpx_scaled_convolve8_neon.c ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon_asm$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_horiz_filter_type1_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_vert_filter_type1_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_avg_horiz_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_avg_vert_filter_type2_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_avg_horiz_filter_type1_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_avg_vert_filter_type1_neon$(ASM) DSP_SRCS-yes += arm/vpx_convolve_avg_neon_asm$(ASM) DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.c DSP_SRCS-yes += arm/vpx_convolve8_neon_asm.h DSP_SRCS-yes += arm/vpx_convolve_neon.c else ifeq ($(HAVE_NEON),yes) DSP_SRCS-yes += arm/vpx_convolve_copy_neon.c DSP_SRCS-yes += arm/vpx_convolve8_neon.c DSP_SRCS-yes += arm/vpx_convolve_avg_neon.c DSP_SRCS-yes += arm/vpx_convolve_neon.c endif # HAVE_NEON endif # HAVE_NEON_ASM # common (msa) DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_horiz_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_avg_vert_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_horiz_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve8_vert_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_avg_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_copy_msa.c DSP_SRCS-$(HAVE_MSA) += mips/vpx_convolve_msa.h DSP_SRCS-$(HAVE_MMI) += mips/vpx_convolve8_mmi.c # common (dspr2) DSP_SRCS-$(HAVE_DSPR2) += mips/convolve_common_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/convolve2_avg_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve2_avg_horiz_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve2_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve2_horiz_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve2_vert_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve8_avg_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve8_avg_horiz_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve8_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve8_horiz_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/convolve8_vert_dspr2.c DSP_SRCS-$(HAVE_VSX) += ppc/vpx_convolve_vsx.c # loop filters DSP_SRCS-yes += loopfilter.c DSP_SRCS-$(HAVE_SSE2) += x86/loopfilter_sse2.c DSP_SRCS-$(HAVE_AVX2) += x86/loopfilter_avx2.c ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/loopfilter_16_neon$(ASM) DSP_SRCS-yes += arm/loopfilter_8_neon$(ASM) DSP_SRCS-yes += arm/loopfilter_4_neon$(ASM) else DSP_SRCS-$(HAVE_NEON) += arm/loopfilter_neon.c endif # HAVE_NEON_ASM DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_msa.h DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_16_msa.c DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_8_msa.c DSP_SRCS-$(HAVE_MSA) += mips/loopfilter_4_msa.c DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_filters_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_filters_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_macros_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_masks_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_mb_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_mb_horiz_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/loopfilter_mb_vert_dspr2.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_NEON) += arm/highbd_loopfilter_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_loopfilter_sse2.c endif # CONFIG_VP9_HIGHBITDEPTH endif # CONFIG_VP9 DSP_SRCS-yes += txfm_common.h DSP_SRCS-$(HAVE_SSE2) += x86/txfm_common_sse2.h DSP_SRCS-$(HAVE_MSA) += mips/txfm_macros_msa.h # forward transform ifeq ($(CONFIG_VP9_ENCODER),yes) DSP_SRCS-yes += fwd_txfm.c DSP_SRCS-yes += fwd_txfm.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/fwd_txfm_impl_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/fwd_dct32x32_impl_sse2.h ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/fwd_txfm_ssse3_x86_64.asm endif DSP_SRCS-$(HAVE_AVX2) += x86/fwd_txfm_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/fwd_dct32x32_impl_avx2.h DSP_SRCS-$(HAVE_NEON) += arm/fdct_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fdct16x16_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fdct32x32_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fdct_partial_neon.c DSP_SRCS-$(HAVE_NEON) += arm/fwd_txfm_neon.c DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/fwd_txfm_msa.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_MSA) += mips/fwd_dct32x32_msa.c endif # !CONFIG_VP9_HIGHBITDEPTH DSP_SRCS-$(HAVE_VSX) += ppc/fdct32x32_vsx.c endif # CONFIG_VP9_ENCODER # inverse transform ifeq ($(CONFIG_VP9),yes) DSP_SRCS-yes += inv_txfm.h DSP_SRCS-yes += inv_txfm.c DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/inv_txfm_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/inv_wht_sse2.asm DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3.h DSP_SRCS-$(HAVE_SSSE3) += x86/inv_txfm_ssse3.c DSP_SRCS-$(HAVE_NEON_ASM) += arm/save_reg_neon$(ASM) DSP_SRCS-$(HAVE_VSX) += ppc/inv_txfm_vsx.c ifneq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_MSA) += mips/inv_txfm_msa.h DSP_SRCS-$(HAVE_MSA) += mips/idct4x4_msa.c DSP_SRCS-$(HAVE_MSA) += mips/idct8x8_msa.c DSP_SRCS-$(HAVE_MSA) += mips/idct16x16_msa.c DSP_SRCS-$(HAVE_MSA) += mips/idct32x32_msa.c DSP_SRCS-$(HAVE_DSPR2) += mips/inv_txfm_dspr2.h DSP_SRCS-$(HAVE_DSPR2) += mips/itrans4_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans8_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans16_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_dspr2.c DSP_SRCS-$(HAVE_DSPR2) += mips/itrans32_cols_dspr2.c else # CONFIG_VP9_HIGHBITDEPTH DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct4x4_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct8x8_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct16x16_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_34_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_135_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct32x32_1024_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/highbd_idct_neon.h DSP_SRCS-$(HAVE_SSE2) += x86/highbd_inv_txfm_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct4x4_add_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct8x8_add_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct16x16_add_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_idct32x32_add_sse2.c DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_inv_txfm_sse4.h DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_idct4x4_add_sse4.c DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_idct8x8_add_sse4.c DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_idct16x16_add_sse4.c DSP_SRCS-$(HAVE_SSE4_1) += x86/highbd_idct32x32_add_sse4.c endif # !CONFIG_VP9_HIGHBITDEPTH ifeq ($(HAVE_NEON_ASM),yes) DSP_SRCS-yes += arm/idct_neon$(ASM) DSP_SRCS-yes += arm/idct4x4_1_add_neon$(ASM) DSP_SRCS-yes += arm/idct4x4_add_neon$(ASM) else DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_1_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct4x4_add_neon.c endif # HAVE_NEON_ASM DSP_SRCS-$(HAVE_NEON) += arm/idct_neon.h DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_1_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct8x8_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_1_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct16x16_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_1_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_34_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_135_add_neon.c DSP_SRCS-$(HAVE_NEON) += arm/idct32x32_add_neon.c endif # CONFIG_VP9 # quantization ifeq ($(CONFIG_VP9_ENCODER),yes) DSP_SRCS-yes += quantize.c DSP_SRCS-yes += quantize.h DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/quantize_sse2.h DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.c DSP_SRCS-$(HAVE_SSSE3) += x86/quantize_ssse3.h DSP_SRCS-$(HAVE_AVX) += x86/quantize_avx.c DSP_SRCS-$(HAVE_NEON) += arm/quantize_neon.c DSP_SRCS-$(HAVE_VSX) += ppc/quantize_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_quantize_intrin_sse2.c endif # avg DSP_SRCS-yes += avg.c DSP_SRCS-$(HAVE_SSE2) += x86/avg_intrin_sse2.c DSP_SRCS-$(HAVE_AVX2) += x86/avg_intrin_avx2.c DSP_SRCS-$(HAVE_NEON) += arm/avg_neon.c DSP_SRCS-$(HAVE_NEON) += arm/hadamard_neon.c DSP_SRCS-$(HAVE_MSA) += mips/avg_msa.c ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSSE3) += x86/avg_ssse3_x86_64.asm endif DSP_SRCS-$(HAVE_VSX) += ppc/hadamard_vsx.c endif # CONFIG_VP9_ENCODER # skin detection DSP_SRCS-yes += skin_detection.h DSP_SRCS-yes += skin_detection.c ifeq ($(CONFIG_ENCODERS),yes) DSP_SRCS-yes += sad.c DSP_SRCS-yes += subtract.c DSP_SRCS-yes += sum_squares.c DSP_SRCS-$(HAVE_NEON) += arm/sum_squares_neon.c DSP_SRCS-$(HAVE_SSE2) += x86/sum_squares_sse2.c DSP_SRCS-$(HAVE_MSA) += mips/sum_squares_msa.c DSP_SRCS-$(HAVE_NEON) += arm/sad4d_neon.c DSP_SRCS-$(HAVE_NEON) += arm/sad_neon.c DSP_SRCS-$(HAVE_NEON) += arm/subtract_neon.c DSP_SRCS-$(HAVE_MSA) += mips/sad_msa.c DSP_SRCS-$(HAVE_MSA) += mips/subtract_msa.c DSP_SRCS-$(HAVE_MMI) += mips/sad_mmi.c DSP_SRCS-$(HAVE_MMI) += mips/subtract_mmi.c DSP_SRCS-$(HAVE_SSE3) += x86/sad_sse3.asm DSP_SRCS-$(HAVE_SSSE3) += x86/sad_ssse3.asm DSP_SRCS-$(HAVE_SSE4_1) += x86/sad_sse4.asm DSP_SRCS-$(HAVE_AVX2) += x86/sad4d_avx2.c DSP_SRCS-$(HAVE_AVX2) += x86/sad_avx2.c DSP_SRCS-$(HAVE_AVX512) += x86/sad4d_avx512.c DSP_SRCS-$(HAVE_SSE2) += x86/sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/sad_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/subtract_sse2.asm DSP_SRCS-$(HAVE_VSX) += ppc/sad_vsx.c DSP_SRCS-$(HAVE_VSX) += ppc/subtract_vsx.c ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad4d_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_sad_sse2.asm endif # CONFIG_VP9_HIGHBITDEPTH endif # CONFIG_ENCODERS ifneq ($(filter yes,$(CONFIG_ENCODERS) $(CONFIG_POSTPROC) $(CONFIG_VP9_POSTPROC)),) DSP_SRCS-yes += variance.c DSP_SRCS-yes += variance.h DSP_SRCS-$(HAVE_NEON) += arm/avg_pred_neon.c DSP_SRCS-$(HAVE_NEON) += arm/subpel_variance_neon.c DSP_SRCS-$(HAVE_NEON) += arm/variance_neon.c DSP_SRCS-$(HAVE_MSA) += mips/variance_msa.c DSP_SRCS-$(HAVE_MSA) += mips/sub_pixel_variance_msa.c DSP_SRCS-$(HAVE_MMI) += mips/variance_mmi.c DSP_SRCS-$(HAVE_SSE2) += x86/avg_pred_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/variance_sse2.c # Contains SSE2 and SSSE3 DSP_SRCS-$(HAVE_AVX2) += x86/variance_avx2.c DSP_SRCS-$(HAVE_VSX) += ppc/variance_vsx.c ifeq ($(VPX_ARCH_X86_64),yes) DSP_SRCS-$(HAVE_SSE2) += x86/ssim_opt_x86_64.asm endif # VPX_ARCH_X86_64 DSP_SRCS-$(HAVE_SSE2) += x86/subpel_variance_sse2.asm # Contains SSE2 and SSSE3 ifeq ($(CONFIG_VP9_HIGHBITDEPTH),yes) DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_sse2.c DSP_SRCS-$(HAVE_SSE2) += x86/highbd_variance_impl_sse2.asm DSP_SRCS-$(HAVE_SSE2) += x86/highbd_subpel_variance_impl_sse2.asm endif # CONFIG_VP9_HIGHBITDEPTH endif # CONFIG_ENCODERS || CONFIG_POSTPROC || CONFIG_VP9_POSTPROC # Neon utilities DSP_SRCS-$(HAVE_NEON) += arm/mem_neon.h DSP_SRCS-$(HAVE_NEON) += arm/sum_neon.h DSP_SRCS-$(HAVE_NEON) += arm/transpose_neon.h DSP_SRCS-$(HAVE_NEON) += arm/vpx_convolve8_neon.h # PPC VSX utilities DSP_SRCS-$(HAVE_VSX) += ppc/types_vsx.h DSP_SRCS-$(HAVE_VSX) += ppc/txfm_common_vsx.h DSP_SRCS-$(HAVE_VSX) += ppc/transpose_vsx.h DSP_SRCS-$(HAVE_VSX) += ppc/bitdepth_conversion_vsx.h # X86 utilities DSP_SRCS-$(HAVE_SSE2) += x86/mem_sse2.h DSP_SRCS-$(HAVE_SSE2) += x86/transpose_sse2.h DSP_SRCS-no += $(DSP_SRCS_REMOVE-yes) DSP_SRCS-yes += vpx_dsp_rtcd.c DSP_SRCS-yes += vpx_dsp_rtcd_defs.pl $(eval $(call rtcd_h_template,vpx_dsp_rtcd,vpx_dsp/vpx_dsp_rtcd_defs.pl)) libvpx-1.8.2/vpx_dsp/vpx_dsp_common.h000066400000000000000000000042371357355204000177270ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_VPX_DSP_COMMON_H_ #define VPX_VPX_DSP_VPX_DSP_COMMON_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" #ifdef __cplusplus extern "C" { #endif #define VPXMIN(x, y) (((x) < (y)) ? (x) : (y)) #define VPXMAX(x, y) (((x) > (y)) ? (x) : (y)) #define VPX_SWAP(type, a, b) \ do { \ type c = (b); \ (b) = a; \ (a) = c; \ } while (0) #if CONFIG_VP9_HIGHBITDEPTH // Note: // tran_low_t is the datatype used for final transform coefficients. // tran_high_t is the datatype used for intermediate transform stages. typedef int64_t tran_high_t; typedef int32_t tran_low_t; #else // Note: // tran_low_t is the datatype used for final transform coefficients. // tran_high_t is the datatype used for intermediate transform stages. typedef int32_t tran_high_t; typedef int16_t tran_low_t; #endif // CONFIG_VP9_HIGHBITDEPTH typedef int16_t tran_coef_t; static INLINE uint8_t clip_pixel(int val) { return (val > 255) ? 255 : (val < 0) ? 0 : val; } static INLINE int clamp(int value, int low, int high) { return value < low ? low : (value > high ? high : value); } static INLINE double fclamp(double value, double low, double high) { return value < low ? low : (value > high ? high : value); } static INLINE int64_t lclamp(int64_t value, int64_t low, int64_t high) { return value < low ? low : (value > high ? high : value); } static INLINE uint16_t clip_pixel_highbd(int val, int bd) { switch (bd) { case 8: default: return (uint16_t)clamp(val, 0, 255); case 10: return (uint16_t)clamp(val, 0, 1023); case 12: return (uint16_t)clamp(val, 0, 4095); } } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_VPX_DSP_COMMON_H_ libvpx-1.8.2/vpx_dsp/vpx_dsp_rtcd.c000066400000000000000000000010641357355204000173610ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #define RTCD_C #include "./vpx_dsp_rtcd.h" #include "vpx_ports/vpx_once.h" void vpx_dsp_rtcd() { once(setup_rtcd_internal); } libvpx-1.8.2/vpx_dsp/vpx_dsp_rtcd_defs.pl000066400000000000000000003311461357355204000205620ustar00rootroot00000000000000## ## Copyright (c) 2017 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## sub vpx_dsp_forward_decls() { print < #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif #define FILTER_BITS 7 #define SUBPEL_BITS 4 #define SUBPEL_MASK ((1 << SUBPEL_BITS) - 1) #define SUBPEL_SHIFTS (1 << SUBPEL_BITS) #define SUBPEL_TAPS 8 typedef int16_t InterpKernel[SUBPEL_TAPS]; static INLINE int vpx_get_filter_taps(const int16_t *const filter) { assert(filter[3] != 128); if (!filter[0] && !filter[1] && !filter[2]) return 2; else return 8; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_VPX_FILTER_H_ libvpx-1.8.2/vpx_dsp/x86/000077500000000000000000000000001357355204000151425ustar00rootroot00000000000000libvpx-1.8.2/vpx_dsp/x86/add_noise_sse2.asm000066400000000000000000000046041357355204000205310ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;void vpx_plane_add_noise_sse2(uint8_t *start, const int8_t *noise, ; int blackclamp, int whiteclamp, ; int width, int height, int pitch) global sym(vpx_plane_add_noise_sse2) PRIVATE sym(vpx_plane_add_noise_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 GET_GOT rbx push rsi push rdi mov rdx, 0x01010101 mov rax, arg(2) mul rdx movq xmm3, rax pshufd xmm3, xmm3, 0 ; xmm3 is 16 copies of char in blackclamp mov rdx, 0x01010101 mov rax, arg(3) mul rdx movq xmm4, rax pshufd xmm4, xmm4, 0 ; xmm4 is 16 copies of char in whiteclamp movdqu xmm5, xmm3 ; both clamp = black clamp + white clamp paddusb xmm5, xmm4 .addnoise_loop: call sym(LIBVPX_RAND) WRT_PLT mov rcx, arg(1) ;noise and rax, 0xff add rcx, rax mov rdi, rcx movsxd rcx, dword arg(4) ;[Width] mov rsi, arg(0) ;Pos xor rax, rax .addnoise_nextset: movdqu xmm1,[rsi+rax] ; get the source psubusb xmm1, xmm3 ; subtract black clamp paddusb xmm1, xmm5 ; add both clamp psubusb xmm1, xmm4 ; subtract whiteclamp movdqu xmm2,[rdi+rax] ; get the noise for this line paddb xmm1,xmm2 ; add it in movdqu [rsi+rax],xmm1 ; store the result add rax,16 ; move to the next line cmp rax, rcx jl .addnoise_nextset movsxd rax, dword arg(6) ; Pitch add arg(0), rax ; Start += Pitch sub dword arg(5), 1 ; Height -= 1 jg .addnoise_loop ; begin epilog pop rdi pop rsi RESTORE_GOT UNSHADOW_ARGS pop rbp ret SECTION_RODATA align 16 rd42: times 8 dw 0x04 four8s: times 4 dd 8 libvpx-1.8.2/vpx_dsp/x86/avg_intrin_avx2.c000066400000000000000000000424011357355204000204070ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_avx2.h" #include "vpx_ports/mem.h" #if CONFIG_VP9_HIGHBITDEPTH static void highbd_hadamard_col8_avx2(__m256i *in, int iter) { __m256i a0 = in[0]; __m256i a1 = in[1]; __m256i a2 = in[2]; __m256i a3 = in[3]; __m256i a4 = in[4]; __m256i a5 = in[5]; __m256i a6 = in[6]; __m256i a7 = in[7]; __m256i b0 = _mm256_add_epi32(a0, a1); __m256i b1 = _mm256_sub_epi32(a0, a1); __m256i b2 = _mm256_add_epi32(a2, a3); __m256i b3 = _mm256_sub_epi32(a2, a3); __m256i b4 = _mm256_add_epi32(a4, a5); __m256i b5 = _mm256_sub_epi32(a4, a5); __m256i b6 = _mm256_add_epi32(a6, a7); __m256i b7 = _mm256_sub_epi32(a6, a7); a0 = _mm256_add_epi32(b0, b2); a1 = _mm256_add_epi32(b1, b3); a2 = _mm256_sub_epi32(b0, b2); a3 = _mm256_sub_epi32(b1, b3); a4 = _mm256_add_epi32(b4, b6); a5 = _mm256_add_epi32(b5, b7); a6 = _mm256_sub_epi32(b4, b6); a7 = _mm256_sub_epi32(b5, b7); if (iter == 0) { b0 = _mm256_add_epi32(a0, a4); b7 = _mm256_add_epi32(a1, a5); b3 = _mm256_add_epi32(a2, a6); b4 = _mm256_add_epi32(a3, a7); b2 = _mm256_sub_epi32(a0, a4); b6 = _mm256_sub_epi32(a1, a5); b1 = _mm256_sub_epi32(a2, a6); b5 = _mm256_sub_epi32(a3, a7); a0 = _mm256_unpacklo_epi32(b0, b1); a1 = _mm256_unpacklo_epi32(b2, b3); a2 = _mm256_unpackhi_epi32(b0, b1); a3 = _mm256_unpackhi_epi32(b2, b3); a4 = _mm256_unpacklo_epi32(b4, b5); a5 = _mm256_unpacklo_epi32(b6, b7); a6 = _mm256_unpackhi_epi32(b4, b5); a7 = _mm256_unpackhi_epi32(b6, b7); b0 = _mm256_unpacklo_epi64(a0, a1); b1 = _mm256_unpacklo_epi64(a4, a5); b2 = _mm256_unpackhi_epi64(a0, a1); b3 = _mm256_unpackhi_epi64(a4, a5); b4 = _mm256_unpacklo_epi64(a2, a3); b5 = _mm256_unpacklo_epi64(a6, a7); b6 = _mm256_unpackhi_epi64(a2, a3); b7 = _mm256_unpackhi_epi64(a6, a7); in[0] = _mm256_permute2x128_si256(b0, b1, 0x20); in[1] = _mm256_permute2x128_si256(b0, b1, 0x31); in[2] = _mm256_permute2x128_si256(b2, b3, 0x20); in[3] = _mm256_permute2x128_si256(b2, b3, 0x31); in[4] = _mm256_permute2x128_si256(b4, b5, 0x20); in[5] = _mm256_permute2x128_si256(b4, b5, 0x31); in[6] = _mm256_permute2x128_si256(b6, b7, 0x20); in[7] = _mm256_permute2x128_si256(b6, b7, 0x31); } else { in[0] = _mm256_add_epi32(a0, a4); in[7] = _mm256_add_epi32(a1, a5); in[3] = _mm256_add_epi32(a2, a6); in[4] = _mm256_add_epi32(a3, a7); in[2] = _mm256_sub_epi32(a0, a4); in[6] = _mm256_sub_epi32(a1, a5); in[1] = _mm256_sub_epi32(a2, a6); in[5] = _mm256_sub_epi32(a3, a7); } } void vpx_highbd_hadamard_8x8_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { __m128i src16[8]; __m256i src32[8]; src16[0] = _mm_loadu_si128((const __m128i *)src_diff); src16[1] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[2] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[3] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[4] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[5] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[6] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src16[7] = _mm_loadu_si128((const __m128i *)(src_diff += src_stride)); src32[0] = _mm256_cvtepi16_epi32(src16[0]); src32[1] = _mm256_cvtepi16_epi32(src16[1]); src32[2] = _mm256_cvtepi16_epi32(src16[2]); src32[3] = _mm256_cvtepi16_epi32(src16[3]); src32[4] = _mm256_cvtepi16_epi32(src16[4]); src32[5] = _mm256_cvtepi16_epi32(src16[5]); src32[6] = _mm256_cvtepi16_epi32(src16[6]); src32[7] = _mm256_cvtepi16_epi32(src16[7]); highbd_hadamard_col8_avx2(src32, 0); highbd_hadamard_col8_avx2(src32, 1); _mm256_storeu_si256((__m256i *)coeff, src32[0]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[1]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[2]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[3]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[4]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[5]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[6]); coeff += 8; _mm256_storeu_si256((__m256i *)coeff, src32[7]); } void vpx_highbd_hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; tran_low_t *t_coeff = coeff; for (idx = 0; idx < 4; ++idx) { const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; vpx_highbd_hadamard_8x8_avx2(src_ptr, src_stride, t_coeff + idx * 64); } for (idx = 0; idx < 64; idx += 8) { __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 64)); __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 128)); __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 192)); __m256i b0 = _mm256_add_epi32(coeff0, coeff1); __m256i b1 = _mm256_sub_epi32(coeff0, coeff1); __m256i b2 = _mm256_add_epi32(coeff2, coeff3); __m256i b3 = _mm256_sub_epi32(coeff2, coeff3); b0 = _mm256_srai_epi32(b0, 1); b1 = _mm256_srai_epi32(b1, 1); b2 = _mm256_srai_epi32(b2, 1); b3 = _mm256_srai_epi32(b3, 1); coeff0 = _mm256_add_epi32(b0, b2); coeff1 = _mm256_add_epi32(b1, b3); coeff2 = _mm256_sub_epi32(b0, b2); coeff3 = _mm256_sub_epi32(b1, b3); _mm256_storeu_si256((__m256i *)coeff, coeff0); _mm256_storeu_si256((__m256i *)(coeff + 64), coeff1); _mm256_storeu_si256((__m256i *)(coeff + 128), coeff2); _mm256_storeu_si256((__m256i *)(coeff + 192), coeff3); coeff += 8; t_coeff += 8; } } void vpx_highbd_hadamard_32x32_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { int idx; tran_low_t *t_coeff = coeff; for (idx = 0; idx < 4; ++idx) { const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; vpx_highbd_hadamard_16x16_avx2(src_ptr, src_stride, t_coeff + idx * 256); } for (idx = 0; idx < 256; idx += 8) { __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 256)); __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 512)); __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 768)); __m256i b0 = _mm256_add_epi32(coeff0, coeff1); __m256i b1 = _mm256_sub_epi32(coeff0, coeff1); __m256i b2 = _mm256_add_epi32(coeff2, coeff3); __m256i b3 = _mm256_sub_epi32(coeff2, coeff3); b0 = _mm256_srai_epi32(b0, 2); b1 = _mm256_srai_epi32(b1, 2); b2 = _mm256_srai_epi32(b2, 2); b3 = _mm256_srai_epi32(b3, 2); coeff0 = _mm256_add_epi32(b0, b2); coeff1 = _mm256_add_epi32(b1, b3); coeff2 = _mm256_sub_epi32(b0, b2); coeff3 = _mm256_sub_epi32(b1, b3); _mm256_storeu_si256((__m256i *)coeff, coeff0); _mm256_storeu_si256((__m256i *)(coeff + 256), coeff1); _mm256_storeu_si256((__m256i *)(coeff + 512), coeff2); _mm256_storeu_si256((__m256i *)(coeff + 768), coeff3); coeff += 8; t_coeff += 8; } } #endif // CONFIG_VP9_HIGHBITDEPTH static void hadamard_col8x2_avx2(__m256i *in, int iter) { __m256i a0 = in[0]; __m256i a1 = in[1]; __m256i a2 = in[2]; __m256i a3 = in[3]; __m256i a4 = in[4]; __m256i a5 = in[5]; __m256i a6 = in[6]; __m256i a7 = in[7]; __m256i b0 = _mm256_add_epi16(a0, a1); __m256i b1 = _mm256_sub_epi16(a0, a1); __m256i b2 = _mm256_add_epi16(a2, a3); __m256i b3 = _mm256_sub_epi16(a2, a3); __m256i b4 = _mm256_add_epi16(a4, a5); __m256i b5 = _mm256_sub_epi16(a4, a5); __m256i b6 = _mm256_add_epi16(a6, a7); __m256i b7 = _mm256_sub_epi16(a6, a7); a0 = _mm256_add_epi16(b0, b2); a1 = _mm256_add_epi16(b1, b3); a2 = _mm256_sub_epi16(b0, b2); a3 = _mm256_sub_epi16(b1, b3); a4 = _mm256_add_epi16(b4, b6); a5 = _mm256_add_epi16(b5, b7); a6 = _mm256_sub_epi16(b4, b6); a7 = _mm256_sub_epi16(b5, b7); if (iter == 0) { b0 = _mm256_add_epi16(a0, a4); b7 = _mm256_add_epi16(a1, a5); b3 = _mm256_add_epi16(a2, a6); b4 = _mm256_add_epi16(a3, a7); b2 = _mm256_sub_epi16(a0, a4); b6 = _mm256_sub_epi16(a1, a5); b1 = _mm256_sub_epi16(a2, a6); b5 = _mm256_sub_epi16(a3, a7); a0 = _mm256_unpacklo_epi16(b0, b1); a1 = _mm256_unpacklo_epi16(b2, b3); a2 = _mm256_unpackhi_epi16(b0, b1); a3 = _mm256_unpackhi_epi16(b2, b3); a4 = _mm256_unpacklo_epi16(b4, b5); a5 = _mm256_unpacklo_epi16(b6, b7); a6 = _mm256_unpackhi_epi16(b4, b5); a7 = _mm256_unpackhi_epi16(b6, b7); b0 = _mm256_unpacklo_epi32(a0, a1); b1 = _mm256_unpacklo_epi32(a4, a5); b2 = _mm256_unpackhi_epi32(a0, a1); b3 = _mm256_unpackhi_epi32(a4, a5); b4 = _mm256_unpacklo_epi32(a2, a3); b5 = _mm256_unpacklo_epi32(a6, a7); b6 = _mm256_unpackhi_epi32(a2, a3); b7 = _mm256_unpackhi_epi32(a6, a7); in[0] = _mm256_unpacklo_epi64(b0, b1); in[1] = _mm256_unpackhi_epi64(b0, b1); in[2] = _mm256_unpacklo_epi64(b2, b3); in[3] = _mm256_unpackhi_epi64(b2, b3); in[4] = _mm256_unpacklo_epi64(b4, b5); in[5] = _mm256_unpackhi_epi64(b4, b5); in[6] = _mm256_unpacklo_epi64(b6, b7); in[7] = _mm256_unpackhi_epi64(b6, b7); } else { in[0] = _mm256_add_epi16(a0, a4); in[7] = _mm256_add_epi16(a1, a5); in[3] = _mm256_add_epi16(a2, a6); in[4] = _mm256_add_epi16(a3, a7); in[2] = _mm256_sub_epi16(a0, a4); in[6] = _mm256_sub_epi16(a1, a5); in[1] = _mm256_sub_epi16(a2, a6); in[5] = _mm256_sub_epi16(a3, a7); } } static void hadamard_8x8x2_avx2(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) { __m256i src[8]; src[0] = _mm256_loadu_si256((const __m256i *)src_diff); src[1] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[2] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[3] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[4] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[5] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[6] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); src[7] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride)); hadamard_col8x2_avx2(src, 0); hadamard_col8x2_avx2(src, 1); _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[0], src[1], 0x20)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[2], src[3], 0x20)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[4], src[5], 0x20)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[6], src[7], 0x20)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[0], src[1], 0x31)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[2], src[3], 0x31)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[4], src[5], 0x31)); coeff += 16; _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[6], src[7], 0x31)); } static INLINE void hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff, int is_final) { #if CONFIG_VP9_HIGHBITDEPTH DECLARE_ALIGNED(32, int16_t, temp_coeff[16 * 16]); int16_t *t_coeff = temp_coeff; #else int16_t *t_coeff = coeff; #endif int16_t *coeff16 = (int16_t *)coeff; int idx; for (idx = 0; idx < 2; ++idx) { const int16_t *src_ptr = src_diff + idx * 8 * src_stride; hadamard_8x8x2_avx2(src_ptr, src_stride, t_coeff + (idx * 64 * 2)); } for (idx = 0; idx < 64; idx += 16) { const __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); const __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 64)); const __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 128)); const __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 192)); __m256i b0 = _mm256_add_epi16(coeff0, coeff1); __m256i b1 = _mm256_sub_epi16(coeff0, coeff1); __m256i b2 = _mm256_add_epi16(coeff2, coeff3); __m256i b3 = _mm256_sub_epi16(coeff2, coeff3); b0 = _mm256_srai_epi16(b0, 1); b1 = _mm256_srai_epi16(b1, 1); b2 = _mm256_srai_epi16(b2, 1); b3 = _mm256_srai_epi16(b3, 1); if (is_final) { store_tran_low(_mm256_add_epi16(b0, b2), coeff); store_tran_low(_mm256_add_epi16(b1, b3), coeff + 64); store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 128); store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 192); coeff += 16; } else { _mm256_storeu_si256((__m256i *)coeff16, _mm256_add_epi16(b0, b2)); _mm256_storeu_si256((__m256i *)(coeff16 + 64), _mm256_add_epi16(b1, b3)); _mm256_storeu_si256((__m256i *)(coeff16 + 128), _mm256_sub_epi16(b0, b2)); _mm256_storeu_si256((__m256i *)(coeff16 + 192), _mm256_sub_epi16(b1, b3)); coeff16 += 16; } t_coeff += 16; } } void vpx_hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { hadamard_16x16_avx2(src_diff, src_stride, coeff, 1); } void vpx_hadamard_32x32_avx2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { #if CONFIG_VP9_HIGHBITDEPTH // For high bitdepths, it is unnecessary to store_tran_low // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the // next stage. Output to an intermediate buffer first, then store_tran_low() // in the final stage. DECLARE_ALIGNED(32, int16_t, temp_coeff[32 * 32]); int16_t *t_coeff = temp_coeff; #else int16_t *t_coeff = coeff; #endif int idx; for (idx = 0; idx < 4; ++idx) { // src_diff: 9 bit, dynamic range [-255, 255] const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; hadamard_16x16_avx2(src_ptr, src_stride, (tran_low_t *)(t_coeff + idx * 256), 0); } for (idx = 0; idx < 256; idx += 16) { const __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff); const __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 256)); const __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 512)); const __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 768)); __m256i b0 = _mm256_add_epi16(coeff0, coeff1); __m256i b1 = _mm256_sub_epi16(coeff0, coeff1); __m256i b2 = _mm256_add_epi16(coeff2, coeff3); __m256i b3 = _mm256_sub_epi16(coeff2, coeff3); b0 = _mm256_srai_epi16(b0, 2); b1 = _mm256_srai_epi16(b1, 2); b2 = _mm256_srai_epi16(b2, 2); b3 = _mm256_srai_epi16(b3, 2); store_tran_low(_mm256_add_epi16(b0, b2), coeff); store_tran_low(_mm256_add_epi16(b1, b3), coeff + 256); store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 512); store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 768); coeff += 16; t_coeff += 16; } } int vpx_satd_avx2(const tran_low_t *coeff, int length) { const __m256i one = _mm256_set1_epi16(1); __m256i accum = _mm256_setzero_si256(); int i; for (i = 0; i < length; i += 16) { const __m256i src_line = load_tran_low(coeff); const __m256i abs = _mm256_abs_epi16(src_line); const __m256i sum = _mm256_madd_epi16(abs, one); accum = _mm256_add_epi32(accum, sum); coeff += 16; } { // 32 bit horizontal add const __m256i a = _mm256_srli_si256(accum, 8); const __m256i b = _mm256_add_epi32(accum, a); const __m256i c = _mm256_srli_epi64(b, 32); const __m256i d = _mm256_add_epi32(b, c); const __m128i accum_128 = _mm_add_epi32(_mm256_castsi256_si128(d), _mm256_extractf128_si256(d, 1)); return _mm_cvtsi128_si32(accum_128); } } #if CONFIG_VP9_HIGHBITDEPTH int vpx_highbd_satd_avx2(const tran_low_t *coeff, int length) { __m256i accum = _mm256_setzero_si256(); int i; for (i = 0; i < length; i += 8, coeff += 8) { const __m256i src_line = _mm256_loadu_si256((const __m256i *)coeff); const __m256i abs = _mm256_abs_epi32(src_line); accum = _mm256_add_epi32(accum, abs); } { // 32 bit horizontal add const __m256i a = _mm256_srli_si256(accum, 8); const __m256i b = _mm256_add_epi32(accum, a); const __m256i c = _mm256_srli_epi64(b, 32); const __m256i d = _mm256_add_epi32(b, c); const __m128i accum_128 = _mm_add_epi32(_mm256_castsi256_si128(d), _mm256_extractf128_si256(d, 1)); return _mm_cvtsi128_si32(accum_128); } } #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/x86/avg_intrin_sse2.c000066400000000000000000000473021357355204000204100ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_ports/mem.h" void vpx_minmax_8x8_sse2(const uint8_t *s, int p, const uint8_t *d, int dp, int *min, int *max) { __m128i u0, s0, d0, diff, maxabsdiff, minabsdiff, negdiff, absdiff0, absdiff; u0 = _mm_setzero_si128(); // Row 0 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff0 = _mm_max_epi16(diff, negdiff); // Row 1 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(absdiff0, absdiff); minabsdiff = _mm_min_epi16(absdiff0, absdiff); // Row 2 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 2 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); // Row 3 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 3 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); // Row 4 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 4 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); // Row 5 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 5 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); // Row 6 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 6 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); // Row 7 s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); d0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(d + 7 * dp)), u0); diff = _mm_subs_epi16(s0, d0); negdiff = _mm_subs_epi16(u0, diff); absdiff = _mm_max_epi16(diff, negdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, absdiff); minabsdiff = _mm_min_epi16(minabsdiff, absdiff); maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_si128(maxabsdiff, 8)); maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 32)); maxabsdiff = _mm_max_epi16(maxabsdiff, _mm_srli_epi64(maxabsdiff, 16)); *max = _mm_extract_epi16(maxabsdiff, 0); minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_si128(minabsdiff, 8)); minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 32)); minabsdiff = _mm_min_epi16(minabsdiff, _mm_srli_epi64(minabsdiff, 16)); *min = _mm_extract_epi16(minabsdiff, 0); } unsigned int vpx_avg_8x8_sse2(const uint8_t *s, int p) { __m128i s0, s1, u0; unsigned int avg = 0; u0 = _mm_setzero_si128(); s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 4 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 5 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 6 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 7 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 8)); s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 32)); s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); avg = _mm_extract_epi16(s0, 0); return (avg + 32) >> 6; } unsigned int vpx_avg_4x4_sse2(const uint8_t *s, int p) { __m128i s0, s1, u0; unsigned int avg = 0; u0 = _mm_setzero_si128(); s0 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s)), u0); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 2 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)(s + 3 * p)), u0); s0 = _mm_adds_epu16(s0, s1); s0 = _mm_adds_epu16(s0, _mm_srli_si128(s0, 4)); s0 = _mm_adds_epu16(s0, _mm_srli_epi64(s0, 16)); avg = _mm_extract_epi16(s0, 0); return (avg + 8) >> 4; } #if CONFIG_VP9_HIGHBITDEPTH unsigned int vpx_highbd_avg_8x8_sse2(const uint8_t *s8, int p) { __m128i s0, s1; unsigned int avg; const uint16_t *s = CONVERT_TO_SHORTPTR(s8); const __m128i zero = _mm_setzero_si128(); s0 = _mm_loadu_si128((const __m128i *)(s)); s1 = _mm_loadu_si128((const __m128i *)(s + p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 2 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 3 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 4 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 5 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 6 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadu_si128((const __m128i *)(s + 7 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_unpackhi_epi16(s0, zero); s0 = _mm_unpacklo_epi16(s0, zero); s0 = _mm_add_epi32(s0, s1); s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 8)); s0 = _mm_add_epi32(s0, _mm_srli_si128(s0, 4)); avg = _mm_cvtsi128_si32(s0); return (avg + 32) >> 6; } unsigned int vpx_highbd_avg_4x4_sse2(const uint8_t *s8, int p) { __m128i s0, s1; unsigned int avg; const uint16_t *s = CONVERT_TO_SHORTPTR(s8); s0 = _mm_loadl_epi64((const __m128i *)(s)); s1 = _mm_loadl_epi64((const __m128i *)(s + p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadl_epi64((const __m128i *)(s + 2 * p)); s0 = _mm_adds_epu16(s0, s1); s1 = _mm_loadl_epi64((const __m128i *)(s + 3 * p)); s0 = _mm_adds_epu16(s0, s1); s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 4)); s0 = _mm_add_epi16(s0, _mm_srli_si128(s0, 2)); avg = _mm_extract_epi16(s0, 0); return (avg + 8) >> 4; } #endif // CONFIG_VP9_HIGHBITDEPTH static void hadamard_col8_sse2(__m128i *in, int iter) { __m128i a0 = in[0]; __m128i a1 = in[1]; __m128i a2 = in[2]; __m128i a3 = in[3]; __m128i a4 = in[4]; __m128i a5 = in[5]; __m128i a6 = in[6]; __m128i a7 = in[7]; __m128i b0 = _mm_add_epi16(a0, a1); __m128i b1 = _mm_sub_epi16(a0, a1); __m128i b2 = _mm_add_epi16(a2, a3); __m128i b3 = _mm_sub_epi16(a2, a3); __m128i b4 = _mm_add_epi16(a4, a5); __m128i b5 = _mm_sub_epi16(a4, a5); __m128i b6 = _mm_add_epi16(a6, a7); __m128i b7 = _mm_sub_epi16(a6, a7); a0 = _mm_add_epi16(b0, b2); a1 = _mm_add_epi16(b1, b3); a2 = _mm_sub_epi16(b0, b2); a3 = _mm_sub_epi16(b1, b3); a4 = _mm_add_epi16(b4, b6); a5 = _mm_add_epi16(b5, b7); a6 = _mm_sub_epi16(b4, b6); a7 = _mm_sub_epi16(b5, b7); if (iter == 0) { b0 = _mm_add_epi16(a0, a4); b7 = _mm_add_epi16(a1, a5); b3 = _mm_add_epi16(a2, a6); b4 = _mm_add_epi16(a3, a7); b2 = _mm_sub_epi16(a0, a4); b6 = _mm_sub_epi16(a1, a5); b1 = _mm_sub_epi16(a2, a6); b5 = _mm_sub_epi16(a3, a7); a0 = _mm_unpacklo_epi16(b0, b1); a1 = _mm_unpacklo_epi16(b2, b3); a2 = _mm_unpackhi_epi16(b0, b1); a3 = _mm_unpackhi_epi16(b2, b3); a4 = _mm_unpacklo_epi16(b4, b5); a5 = _mm_unpacklo_epi16(b6, b7); a6 = _mm_unpackhi_epi16(b4, b5); a7 = _mm_unpackhi_epi16(b6, b7); b0 = _mm_unpacklo_epi32(a0, a1); b1 = _mm_unpacklo_epi32(a4, a5); b2 = _mm_unpackhi_epi32(a0, a1); b3 = _mm_unpackhi_epi32(a4, a5); b4 = _mm_unpacklo_epi32(a2, a3); b5 = _mm_unpacklo_epi32(a6, a7); b6 = _mm_unpackhi_epi32(a2, a3); b7 = _mm_unpackhi_epi32(a6, a7); in[0] = _mm_unpacklo_epi64(b0, b1); in[1] = _mm_unpackhi_epi64(b0, b1); in[2] = _mm_unpacklo_epi64(b2, b3); in[3] = _mm_unpackhi_epi64(b2, b3); in[4] = _mm_unpacklo_epi64(b4, b5); in[5] = _mm_unpackhi_epi64(b4, b5); in[6] = _mm_unpacklo_epi64(b6, b7); in[7] = _mm_unpackhi_epi64(b6, b7); } else { in[0] = _mm_add_epi16(a0, a4); in[7] = _mm_add_epi16(a1, a5); in[3] = _mm_add_epi16(a2, a6); in[4] = _mm_add_epi16(a3, a7); in[2] = _mm_sub_epi16(a0, a4); in[6] = _mm_sub_epi16(a1, a5); in[1] = _mm_sub_epi16(a2, a6); in[5] = _mm_sub_epi16(a3, a7); } } static INLINE void hadamard_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff, int is_final) { __m128i src[8]; src[0] = _mm_load_si128((const __m128i *)src_diff); src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[2] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[3] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride)); hadamard_col8_sse2(src, 0); hadamard_col8_sse2(src, 1); if (is_final) { store_tran_low(src[0], coeff); coeff += 8; store_tran_low(src[1], coeff); coeff += 8; store_tran_low(src[2], coeff); coeff += 8; store_tran_low(src[3], coeff); coeff += 8; store_tran_low(src[4], coeff); coeff += 8; store_tran_low(src[5], coeff); coeff += 8; store_tran_low(src[6], coeff); coeff += 8; store_tran_low(src[7], coeff); } else { int16_t *coeff16 = (int16_t *)coeff; _mm_store_si128((__m128i *)coeff16, src[0]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[1]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[2]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[3]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[4]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[5]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[6]); coeff16 += 8; _mm_store_si128((__m128i *)coeff16, src[7]); } } void vpx_hadamard_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { hadamard_8x8_sse2(src_diff, src_stride, coeff, 1); } static INLINE void hadamard_16x16_sse2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff, int is_final) { #if CONFIG_VP9_HIGHBITDEPTH // For high bitdepths, it is unnecessary to store_tran_low // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the // next stage. Output to an intermediate buffer first, then store_tran_low() // in the final stage. DECLARE_ALIGNED(32, int16_t, temp_coeff[16 * 16]); int16_t *t_coeff = temp_coeff; #else int16_t *t_coeff = coeff; #endif int16_t *coeff16 = (int16_t *)coeff; int idx; for (idx = 0; idx < 4; ++idx) { const int16_t *src_ptr = src_diff + (idx >> 1) * 8 * src_stride + (idx & 0x01) * 8; hadamard_8x8_sse2(src_ptr, src_stride, (tran_low_t *)(t_coeff + idx * 64), 0); } for (idx = 0; idx < 64; idx += 8) { __m128i coeff0 = _mm_load_si128((const __m128i *)t_coeff); __m128i coeff1 = _mm_load_si128((const __m128i *)(t_coeff + 64)); __m128i coeff2 = _mm_load_si128((const __m128i *)(t_coeff + 128)); __m128i coeff3 = _mm_load_si128((const __m128i *)(t_coeff + 192)); __m128i b0 = _mm_add_epi16(coeff0, coeff1); __m128i b1 = _mm_sub_epi16(coeff0, coeff1); __m128i b2 = _mm_add_epi16(coeff2, coeff3); __m128i b3 = _mm_sub_epi16(coeff2, coeff3); b0 = _mm_srai_epi16(b0, 1); b1 = _mm_srai_epi16(b1, 1); b2 = _mm_srai_epi16(b2, 1); b3 = _mm_srai_epi16(b3, 1); coeff0 = _mm_add_epi16(b0, b2); coeff1 = _mm_add_epi16(b1, b3); coeff2 = _mm_sub_epi16(b0, b2); coeff3 = _mm_sub_epi16(b1, b3); if (is_final) { store_tran_low(coeff0, coeff); store_tran_low(coeff1, coeff + 64); store_tran_low(coeff2, coeff + 128); store_tran_low(coeff3, coeff + 192); coeff += 8; } else { _mm_store_si128((__m128i *)coeff16, coeff0); _mm_store_si128((__m128i *)(coeff16 + 64), coeff1); _mm_store_si128((__m128i *)(coeff16 + 128), coeff2); _mm_store_si128((__m128i *)(coeff16 + 192), coeff3); coeff16 += 8; } t_coeff += 8; } } void vpx_hadamard_16x16_sse2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { hadamard_16x16_sse2(src_diff, src_stride, coeff, 1); } void vpx_hadamard_32x32_sse2(const int16_t *src_diff, ptrdiff_t src_stride, tran_low_t *coeff) { #if CONFIG_VP9_HIGHBITDEPTH // For high bitdepths, it is unnecessary to store_tran_low // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the // next stage. Output to an intermediate buffer first, then store_tran_low() // in the final stage. DECLARE_ALIGNED(32, int16_t, temp_coeff[32 * 32]); int16_t *t_coeff = temp_coeff; #else int16_t *t_coeff = coeff; #endif int idx; for (idx = 0; idx < 4; ++idx) { const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16; hadamard_16x16_sse2(src_ptr, src_stride, (tran_low_t *)(t_coeff + idx * 256), 0); } for (idx = 0; idx < 256; idx += 8) { __m128i coeff0 = _mm_load_si128((const __m128i *)t_coeff); __m128i coeff1 = _mm_load_si128((const __m128i *)(t_coeff + 256)); __m128i coeff2 = _mm_load_si128((const __m128i *)(t_coeff + 512)); __m128i coeff3 = _mm_load_si128((const __m128i *)(t_coeff + 768)); __m128i b0 = _mm_add_epi16(coeff0, coeff1); __m128i b1 = _mm_sub_epi16(coeff0, coeff1); __m128i b2 = _mm_add_epi16(coeff2, coeff3); __m128i b3 = _mm_sub_epi16(coeff2, coeff3); b0 = _mm_srai_epi16(b0, 2); b1 = _mm_srai_epi16(b1, 2); b2 = _mm_srai_epi16(b2, 2); b3 = _mm_srai_epi16(b3, 2); coeff0 = _mm_add_epi16(b0, b2); coeff1 = _mm_add_epi16(b1, b3); store_tran_low(coeff0, coeff); store_tran_low(coeff1, coeff + 256); coeff2 = _mm_sub_epi16(b0, b2); coeff3 = _mm_sub_epi16(b1, b3); store_tran_low(coeff2, coeff + 512); store_tran_low(coeff3, coeff + 768); coeff += 8; t_coeff += 8; } } int vpx_satd_sse2(const tran_low_t *coeff, int length) { int i; const __m128i zero = _mm_setzero_si128(); __m128i accum = zero; for (i = 0; i < length; i += 8) { const __m128i src_line = load_tran_low(coeff); const __m128i inv = _mm_sub_epi16(zero, src_line); const __m128i abs = _mm_max_epi16(src_line, inv); // abs(src_line) const __m128i abs_lo = _mm_unpacklo_epi16(abs, zero); const __m128i abs_hi = _mm_unpackhi_epi16(abs, zero); const __m128i sum = _mm_add_epi32(abs_lo, abs_hi); accum = _mm_add_epi32(accum, sum); coeff += 8; } { // cascading summation of accum __m128i hi = _mm_srli_si128(accum, 8); accum = _mm_add_epi32(accum, hi); hi = _mm_srli_epi64(accum, 32); accum = _mm_add_epi32(accum, hi); } return _mm_cvtsi128_si32(accum); } void vpx_int_pro_row_sse2(int16_t *hbuf, const uint8_t *ref, const int ref_stride, const int height) { int idx; __m128i zero = _mm_setzero_si128(); __m128i src_line = _mm_loadu_si128((const __m128i *)ref); __m128i s0 = _mm_unpacklo_epi8(src_line, zero); __m128i s1 = _mm_unpackhi_epi8(src_line, zero); __m128i t0, t1; int height_1 = height - 1; ref += ref_stride; for (idx = 1; idx < height_1; idx += 2) { src_line = _mm_loadu_si128((const __m128i *)ref); t0 = _mm_unpacklo_epi8(src_line, zero); t1 = _mm_unpackhi_epi8(src_line, zero); s0 = _mm_adds_epu16(s0, t0); s1 = _mm_adds_epu16(s1, t1); ref += ref_stride; src_line = _mm_loadu_si128((const __m128i *)ref); t0 = _mm_unpacklo_epi8(src_line, zero); t1 = _mm_unpackhi_epi8(src_line, zero); s0 = _mm_adds_epu16(s0, t0); s1 = _mm_adds_epu16(s1, t1); ref += ref_stride; } src_line = _mm_loadu_si128((const __m128i *)ref); t0 = _mm_unpacklo_epi8(src_line, zero); t1 = _mm_unpackhi_epi8(src_line, zero); s0 = _mm_adds_epu16(s0, t0); s1 = _mm_adds_epu16(s1, t1); if (height == 64) { s0 = _mm_srai_epi16(s0, 5); s1 = _mm_srai_epi16(s1, 5); } else if (height == 32) { s0 = _mm_srai_epi16(s0, 4); s1 = _mm_srai_epi16(s1, 4); } else { s0 = _mm_srai_epi16(s0, 3); s1 = _mm_srai_epi16(s1, 3); } _mm_storeu_si128((__m128i *)hbuf, s0); hbuf += 8; _mm_storeu_si128((__m128i *)hbuf, s1); } int16_t vpx_int_pro_col_sse2(const uint8_t *ref, const int width) { __m128i zero = _mm_setzero_si128(); __m128i src_line = _mm_loadu_si128((const __m128i *)ref); __m128i s0 = _mm_sad_epu8(src_line, zero); __m128i s1; int i; for (i = 16; i < width; i += 16) { ref += 16; src_line = _mm_loadu_si128((const __m128i *)ref); s1 = _mm_sad_epu8(src_line, zero); s0 = _mm_adds_epu16(s0, s1); } s1 = _mm_srli_si128(s0, 8); s0 = _mm_adds_epu16(s0, s1); return _mm_extract_epi16(s0, 0); } int vpx_vector_var_sse2(const int16_t *ref, const int16_t *src, const int bwl) { int idx; int width = 4 << bwl; int16_t mean; __m128i v0 = _mm_loadu_si128((const __m128i *)ref); __m128i v1 = _mm_load_si128((const __m128i *)src); __m128i diff = _mm_subs_epi16(v0, v1); __m128i sum = diff; __m128i sse = _mm_madd_epi16(diff, diff); ref += 8; src += 8; for (idx = 8; idx < width; idx += 8) { v0 = _mm_loadu_si128((const __m128i *)ref); v1 = _mm_load_si128((const __m128i *)src); diff = _mm_subs_epi16(v0, v1); sum = _mm_add_epi16(sum, diff); v0 = _mm_madd_epi16(diff, diff); sse = _mm_add_epi32(sse, v0); ref += 8; src += 8; } v0 = _mm_srli_si128(sum, 8); sum = _mm_add_epi16(sum, v0); v0 = _mm_srli_epi64(sum, 32); sum = _mm_add_epi16(sum, v0); v0 = _mm_srli_epi32(sum, 16); sum = _mm_add_epi16(sum, v0); v1 = _mm_srli_si128(sse, 8); sse = _mm_add_epi32(sse, v1); v1 = _mm_srli_epi64(sse, 32); sse = _mm_add_epi32(sse, v1); mean = (int16_t)_mm_extract_epi16(sum, 0); return _mm_cvtsi128_si32(sse) - ((mean * mean) >> (bwl + 2)); } libvpx-1.8.2/vpx_dsp/x86/avg_pred_sse2.c000066400000000000000000000047141357355204000200370ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/mem_sse2.h" void vpx_comp_avg_pred_sse2(uint8_t *comp_pred, const uint8_t *pred, int width, int height, const uint8_t *ref, int ref_stride) { /* comp_pred and pred must be 16 byte aligned. */ assert(((intptr_t)comp_pred & 0xf) == 0); assert(((intptr_t)pred & 0xf) == 0); if (width > 8) { int x, y; for (y = 0; y < height; ++y) { for (x = 0; x < width; x += 16) { const __m128i p = _mm_load_si128((const __m128i *)(pred + x)); const __m128i r = _mm_loadu_si128((const __m128i *)(ref + x)); const __m128i avg = _mm_avg_epu8(p, r); _mm_store_si128((__m128i *)(comp_pred + x), avg); } comp_pred += width; pred += width; ref += ref_stride; } } else { // width must be 4 or 8. int i; // Process 16 elements at a time. comp_pred and pred have width == stride // and therefore live in contigious memory. 4*4, 4*8, 8*4, 8*8, and 8*16 are // all divisible by 16 so just ref needs to be massaged when loading. for (i = 0; i < width * height; i += 16) { const __m128i p = _mm_load_si128((const __m128i *)pred); __m128i r; __m128i avg; if (width == ref_stride) { r = _mm_loadu_si128((const __m128i *)ref); ref += 16; } else if (width == 4) { r = _mm_set_epi32(loadu_uint32(ref + 3 * ref_stride), loadu_uint32(ref + 2 * ref_stride), loadu_uint32(ref + ref_stride), loadu_uint32(ref)); ref += 4 * ref_stride; } else { const __m128i r_0 = _mm_loadl_epi64((const __m128i *)ref); assert(width == 8); r = _mm_castps_si128(_mm_loadh_pi(_mm_castsi128_ps(r_0), (const __m64 *)(ref + ref_stride))); ref += 2 * ref_stride; } avg = _mm_avg_epu8(p, r); _mm_store_si128((__m128i *)comp_pred, avg); pred += 16; comp_pred += 16; } } } libvpx-1.8.2/vpx_dsp/x86/avg_ssse3_x86_64.asm000066400000000000000000000064511357355204000205650ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION .text %if VPX_ARCH_X86_64 ; matrix transpose %macro TRANSPOSE8X8 10 ; stage 1 punpcklwd m%9, m%1, m%2 punpcklwd m%10, m%3, m%4 punpckhwd m%1, m%2 punpckhwd m%3, m%4 punpcklwd m%2, m%5, m%6 punpcklwd m%4, m%7, m%8 punpckhwd m%5, m%6 punpckhwd m%7, m%8 ; stage 2 punpckldq m%6, m%9, m%10 punpckldq m%8, m%1, m%3 punpckhdq m%9, m%10 punpckhdq m%1, m%3 punpckldq m%10, m%2, m%4 punpckldq m%3, m%5, m%7 punpckhdq m%2, m%4 punpckhdq m%5, m%7 ; stage 3 punpckhqdq m%4, m%9, m%2 ; out3 punpcklqdq m%9, m%2 ; out2 punpcklqdq m%7, m%1, m%5 ; out6 punpckhqdq m%1, m%5 ; out7 punpckhqdq m%2, m%6, m%10 ; out1 punpcklqdq m%6, m%10 ; out0 punpcklqdq m%5, m%8, m%3 ; out4 punpckhqdq m%8, m%3 ; out5 SWAP %6, %1 SWAP %3, %9 SWAP %8, %6 %endmacro %macro HMD8_1D 0 psubw m8, m0, m1 psubw m9, m2, m3 paddw m0, m1 paddw m2, m3 SWAP 1, 8 SWAP 3, 9 psubw m8, m4, m5 psubw m9, m6, m7 paddw m4, m5 paddw m6, m7 SWAP 5, 8 SWAP 7, 9 psubw m8, m0, m2 psubw m9, m1, m3 paddw m0, m2 paddw m1, m3 SWAP 2, 8 SWAP 3, 9 psubw m8, m4, m6 psubw m9, m5, m7 paddw m4, m6 paddw m5, m7 SWAP 6, 8 SWAP 7, 9 psubw m8, m0, m4 psubw m9, m1, m5 paddw m0, m4 paddw m1, m5 SWAP 4, 8 SWAP 5, 9 psubw m8, m2, m6 psubw m9, m3, m7 paddw m2, m6 paddw m3, m7 SWAP 6, 8 SWAP 7, 9 %endmacro INIT_XMM ssse3 cglobal hadamard_8x8, 3, 5, 11, input, stride, output lea r3, [2 * strideq] lea r4, [4 * strideq] mova m0, [inputq] mova m1, [inputq + r3] lea inputq, [inputq + r4] mova m2, [inputq] mova m3, [inputq + r3] lea inputq, [inputq + r4] mova m4, [inputq] mova m5, [inputq + r3] lea inputq, [inputq + r4] mova m6, [inputq] mova m7, [inputq + r3] HMD8_1D TRANSPOSE8X8 0, 1, 2, 3, 4, 5, 6, 7, 9, 10 HMD8_1D STORE_TRAN_LOW 0, outputq, 0, 8, 9 STORE_TRAN_LOW 1, outputq, 8, 8, 9 STORE_TRAN_LOW 2, outputq, 16, 8, 9 STORE_TRAN_LOW 3, outputq, 24, 8, 9 STORE_TRAN_LOW 4, outputq, 32, 8, 9 STORE_TRAN_LOW 5, outputq, 40, 8, 9 STORE_TRAN_LOW 6, outputq, 48, 8, 9 STORE_TRAN_LOW 7, outputq, 56, 8, 9 RET %endif libvpx-1.8.2/vpx_dsp/x86/bitdepth_conversion_avx2.h000066400000000000000000000030771357355204000223320ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ #define VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" // Load 16 16 bit values. If the source is 32 bits then pack down with // saturation. static INLINE __m256i load_tran_low(const tran_low_t *a) { #if CONFIG_VP9_HIGHBITDEPTH const __m256i a_low = _mm256_loadu_si256((const __m256i *)a); const __m256i a_high = _mm256_loadu_si256((const __m256i *)(a + 8)); return _mm256_packs_epi32(a_low, a_high); #else return _mm256_loadu_si256((const __m256i *)a); #endif } static INLINE void store_tran_low(__m256i a, tran_low_t *b) { #if CONFIG_VP9_HIGHBITDEPTH const __m256i one = _mm256_set1_epi16(1); const __m256i a_hi = _mm256_mulhi_epi16(a, one); const __m256i a_lo = _mm256_mullo_epi16(a, one); const __m256i a_1 = _mm256_unpacklo_epi16(a_lo, a_hi); const __m256i a_2 = _mm256_unpackhi_epi16(a_lo, a_hi); _mm256_storeu_si256((__m256i *)b, a_1); _mm256_storeu_si256((__m256i *)(b + 8), a_2); #else _mm256_storeu_si256((__m256i *)b, a); #endif } #endif // VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_AVX2_H_ libvpx-1.8.2/vpx_dsp/x86/bitdepth_conversion_sse2.asm000066400000000000000000000051671357355204000226610ustar00rootroot00000000000000; ; Copyright (c) 2017 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; ; TODO(johannkoenig): Add the necessary include guards to vpx_config.asm. ; vpx_config.asm is not guarded so can not be included twice. Because this will ; be used in conjunction with x86_abi_support.asm or x86inc.asm, it must be ; included after those files. ; Increment register by sizeof() tran_low_t * 8. %macro INCREMENT_TRAN_LOW 1 %if CONFIG_VP9_HIGHBITDEPTH add %1, 32 %else add %1, 16 %endif %endmacro ; Increment %1 by sizeof() tran_low_t * %2. %macro INCREMENT_ELEMENTS_TRAN_LOW 2 %if CONFIG_VP9_HIGHBITDEPTH lea %1, [%1 + %2 * 4] %else lea %1, [%1 + %2 * 2] %endif %endmacro ; Load %2 + %3 into m%1. ; %3 is the offset in elements, not bytes. ; If tran_low_t is 16 bits (low bit depth configuration) then load the value ; directly. If tran_low_t is 32 bits (high bit depth configuration) then pack ; the values down to 16 bits. %macro LOAD_TRAN_LOW 3 %if CONFIG_VP9_HIGHBITDEPTH mova m%1, [%2 + (%3) * 4] packssdw m%1, [%2 + (%3) * 4 + 16] %else mova m%1, [%2 + (%3) * 2] %endif %endmacro ; Store m%1 to %2 + %3. ; %3 is the offset in elements, not bytes. ; If 5 arguments are provided then m%1 is corrupted. ; If 6 arguments are provided then m%1 is preserved. ; If tran_low_t is 16 bits (low bit depth configuration) then store the value ; directly. If tran_low_t is 32 bits (high bit depth configuration) then sign ; extend the values first. ; Uses m%4-m%6 as scratch registers for high bit depth. %macro STORE_TRAN_LOW 5-6 %if CONFIG_VP9_HIGHBITDEPTH pxor m%4, m%4 mova m%5, m%1 %if %0 == 6 mova m%6, m%1 %endif pcmpgtw m%4, m%1 punpcklwd m%5, m%4 %if %0 == 5 punpckhwd m%1, m%4 %else punpckhwd m%6, m%4 %endif mova [%2 + (%3) * 4 + 0], m%5 %if %0 == 5 mova [%2 + (%3) * 4 + 16], m%1 %else mova [%2 + (%3) * 4 + 16], m%6 %endif %else mova [%2 + (%3) * 2], m%1 %endif %endmacro ; Store zeros (in m%1) to %2 + %3. ; %3 is the offset in elements, not bytes. %macro STORE_ZERO_TRAN_LOW 3 %if CONFIG_VP9_HIGHBITDEPTH mova [%2 + (%3) * 4 + 0], m%1 mova [%2 + (%3) * 4 + 16], m%1 %else mova [%2 + (%3) * 2], m%1 %endif %endmacro libvpx-1.8.2/vpx_dsp/x86/bitdepth_conversion_sse2.h000066400000000000000000000036221357355204000223220ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ #define VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/vpx_dsp_common.h" // Load 8 16 bit values. If the source is 32 bits then pack down with // saturation. static INLINE __m128i load_tran_low(const tran_low_t *a) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i a_low = _mm_load_si128((const __m128i *)a); return _mm_packs_epi32(a_low, *(const __m128i *)(a + 4)); #else return _mm_load_si128((const __m128i *)a); #endif } // Store 8 16 bit values. If the destination is 32 bits then sign extend the // values by multiplying by 1. static INLINE void store_tran_low(__m128i a, tran_low_t *b) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i one = _mm_set1_epi16(1); const __m128i a_hi = _mm_mulhi_epi16(a, one); const __m128i a_lo = _mm_mullo_epi16(a, one); const __m128i a_1 = _mm_unpacklo_epi16(a_lo, a_hi); const __m128i a_2 = _mm_unpackhi_epi16(a_lo, a_hi); _mm_store_si128((__m128i *)(b), a_1); _mm_store_si128((__m128i *)(b + 4), a_2); #else _mm_store_si128((__m128i *)(b), a); #endif } // Zero fill 8 positions in the output buffer. static INLINE void store_zero_tran_low(tran_low_t *a) { const __m128i zero = _mm_setzero_si128(); #if CONFIG_VP9_HIGHBITDEPTH _mm_store_si128((__m128i *)(a), zero); _mm_store_si128((__m128i *)(a + 4), zero); #else _mm_store_si128((__m128i *)(a), zero); #endif } #endif // VPX_VPX_DSP_X86_BITDEPTH_CONVERSION_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/convolve.h000066400000000000000000000475061357355204000171620ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_CONVOLVE_H_ #define VPX_VPX_DSP_X86_CONVOLVE_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_ports/mem.h" // TODO(chiyotsai@google.com): Refactor the code here. Currently this is pretty // hacky and awful to read. Note that there is a filter_x[3] == 128 check in // HIGHBD_FUN_CONV_2D to avoid seg fault due to the fact that the c function // assumes the filter is always 8 tap. typedef void filter8_1dfunction(const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter); // TODO(chiyotsai@google.com): Remove the is_avg argument to the MACROS once we // have 4-tap vert avg filter. #define FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, is_avg) \ void vpx_convolve8_##name##_##opt( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ const int16_t *filter_row = filter[offset]; \ (void)x0_q4; \ (void)x_step_q4; \ (void)y0_q4; \ (void)y_step_q4; \ assert(filter_row[3] != 128); \ assert(step_q4 == 16); \ if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ const int num_taps = 8; \ while (w >= 16) { \ vpx_filter_block1d16_##dir##8_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ src += 16; \ dst += 16; \ w -= 16; \ } \ if (w == 8) { \ vpx_filter_block1d8_##dir##8_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } else if (w == 4) { \ vpx_filter_block1d4_##dir##8_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } \ (void)num_taps; \ } else if (filter_row[2] | filter_row[5]) { \ const int num_taps = is_avg ? 8 : 4; \ while (w >= 16) { \ vpx_filter_block1d16_##dir##4_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ src += 16; \ dst += 16; \ w -= 16; \ } \ if (w == 8) { \ vpx_filter_block1d8_##dir##4_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } else if (w == 4) { \ vpx_filter_block1d4_##dir##4_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } \ (void)num_taps; \ } else { \ const int num_taps = 2; \ while (w >= 16) { \ vpx_filter_block1d16_##dir##2_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ src += 16; \ dst += 16; \ w -= 16; \ } \ if (w == 8) { \ vpx_filter_block1d8_##dir##2_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } else if (w == 4) { \ vpx_filter_block1d4_##dir##2_##avg##opt(src_start, src_stride, dst, \ dst_stride, h, filter_row); \ } \ (void)num_taps; \ } \ } #define FUN_CONV_2D(avg, opt, is_avg) \ void vpx_convolve8_##avg##opt( \ const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { \ const int16_t *filter_x = filter[x0_q4]; \ const int16_t *filter_y = filter[y0_q4]; \ (void)filter_y; \ assert(filter_x[3] != 128); \ assert(filter_y[3] != 128); \ assert(w <= 64); \ assert(h <= 64); \ assert(x_step_q4 == 16); \ assert(y_step_q4 == 16); \ if (filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) { \ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ h + 7); \ vpx_convolve8_##avg##vert_##opt(fdata2 + 3 * 64, 64, dst, dst_stride, \ filter, x0_q4, x_step_q4, y0_q4, \ y_step_q4, w, h); \ } else if (filter_x[2] | filter_x[5]) { \ const int num_taps = is_avg ? 8 : 4; \ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt( \ src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1); \ vpx_convolve8_##avg##vert_##opt(fdata2 + 64 * (num_taps / 2 - 1), 64, \ dst, dst_stride, filter, x0_q4, \ x_step_q4, y0_q4, y_step_q4, w, h); \ } else { \ DECLARE_ALIGNED(16, uint8_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, x0_q4, \ x_step_q4, y0_q4, y_step_q4, w, h + 1); \ vpx_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, w, \ h); \ } \ } #if CONFIG_VP9_HIGHBITDEPTH typedef void highbd_filter8_1dfunction(const uint16_t *src_ptr, const ptrdiff_t src_pitch, uint16_t *output_ptr, ptrdiff_t out_pitch, unsigned int output_height, const int16_t *filter, int bd); #define HIGH_FUN_CONV_1D(name, offset, step_q4, dir, src_start, avg, opt, \ is_avg) \ void vpx_highbd_convolve8_##name##_##opt( \ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter_kernel, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ const int16_t *filter_row = filter_kernel[offset]; \ if (step_q4 == 16 && filter_row[3] != 128) { \ if (filter_row[0] | filter_row[1] | filter_row[6] | filter_row[7]) { \ const int num_taps = 8; \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##8_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 16; \ dst += 16; \ w -= 16; \ } \ while (w >= 8) { \ vpx_highbd_filter_block1d8_##dir##8_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 8; \ dst += 8; \ w -= 8; \ } \ while (w >= 4) { \ vpx_highbd_filter_block1d4_##dir##8_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 4; \ dst += 4; \ w -= 4; \ } \ (void)num_taps; \ } else if (filter_row[2] | filter_row[5]) { \ const int num_taps = is_avg ? 8 : 4; \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##4_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 16; \ dst += 16; \ w -= 16; \ } \ while (w >= 8) { \ vpx_highbd_filter_block1d8_##dir##4_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 8; \ dst += 8; \ w -= 8; \ } \ while (w >= 4) { \ vpx_highbd_filter_block1d4_##dir##4_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 4; \ dst += 4; \ w -= 4; \ } \ (void)num_taps; \ } else { \ const int num_taps = 2; \ while (w >= 16) { \ vpx_highbd_filter_block1d16_##dir##2_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 16; \ dst += 16; \ w -= 16; \ } \ while (w >= 8) { \ vpx_highbd_filter_block1d8_##dir##2_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 8; \ dst += 8; \ w -= 8; \ } \ while (w >= 4) { \ vpx_highbd_filter_block1d4_##dir##2_##avg##opt( \ src_start, src_stride, dst, dst_stride, h, filter_row, bd); \ src += 4; \ dst += 4; \ w -= 4; \ } \ (void)num_taps; \ } \ } \ if (w) { \ vpx_highbd_convolve8_##name##_c(src, src_stride, dst, dst_stride, \ filter_kernel, x0_q4, x_step_q4, y0_q4, \ y_step_q4, w, h, bd); \ } \ } #define HIGH_FUN_CONV_2D(avg, opt, is_avg) \ void vpx_highbd_convolve8_##avg##opt( \ const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, \ ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, \ int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { \ const int16_t *filter_x = filter[x0_q4]; \ assert(w <= 64); \ assert(h <= 64); \ if (x_step_q4 == 16 && y_step_q4 == 16) { \ if ((filter_x[0] | filter_x[1] | filter_x[6] | filter_x[7]) || \ filter_x[3] == 128) { \ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src - 3 * src_stride, src_stride, \ fdata2, 64, filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h + 7, bd); \ vpx_highbd_convolve8_##avg##vert_##opt( \ fdata2 + 192, 64, dst, dst_stride, filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h, bd); \ } else if (filter_x[2] | filter_x[5]) { \ const int num_taps = is_avg ? 8 : 4; \ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 71] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt( \ src - (num_taps / 2 - 1) * src_stride, src_stride, fdata2, 64, \ filter, x0_q4, x_step_q4, y0_q4, y_step_q4, w, h + num_taps - 1, \ bd); \ vpx_highbd_convolve8_##avg##vert_##opt( \ fdata2 + 64 * (num_taps / 2 - 1), 64, dst, dst_stride, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, bd); \ } else { \ DECLARE_ALIGNED(16, uint16_t, fdata2[64 * 65] VPX_UNINITIALIZED); \ vpx_highbd_convolve8_horiz_##opt(src, src_stride, fdata2, 64, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, \ w, h + 1, bd); \ vpx_highbd_convolve8_##avg##vert_##opt(fdata2, 64, dst, dst_stride, \ filter, x0_q4, x_step_q4, \ y0_q4, y_step_q4, w, h, bd); \ } \ } else { \ vpx_highbd_convolve8_##avg##c(src, src_stride, dst, dst_stride, filter, \ x0_q4, x_step_q4, y0_q4, y_step_q4, w, h, \ bd); \ } \ } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // VPX_VPX_DSP_X86_CONVOLVE_H_ libvpx-1.8.2/vpx_dsp/x86/convolve_avx2.h000066400000000000000000000156751357355204000201240ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ #define VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ #include // AVX2 #include "./vpx_config.h" #if defined(__clang__) #if (__clang_major__ > 0 && __clang_major__ < 3) || \ (__clang_major__ == 3 && __clang_minor__ <= 3) || \ (defined(__APPLE__) && defined(__apple_build_version__) && \ ((__clang_major__ == 4 && __clang_minor__ <= 2) || \ (__clang_major__ == 5 && __clang_minor__ == 0))) #define MM256_BROADCASTSI128_SI256(x) \ _mm_broadcastsi128_si256((__m128i const *)&(x)) #else // clang > 3.3, and not 5.0 on macosx. #define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) #endif // clang <= 3.3 #elif defined(__GNUC__) #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 6) #define MM256_BROADCASTSI128_SI256(x) \ _mm_broadcastsi128_si256((__m128i const *)&(x)) #elif __GNUC__ == 4 && __GNUC_MINOR__ == 7 #define MM256_BROADCASTSI128_SI256(x) _mm_broadcastsi128_si256(x) #else // gcc > 4.7 #define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) #endif // gcc <= 4.6 #else // !(gcc || clang) #define MM256_BROADCASTSI128_SI256(x) _mm256_broadcastsi128_si256(x) #endif // __clang__ static INLINE void shuffle_filter_avx2(const int16_t *const filter, __m256i *const f) { const __m256i f_values = MM256_BROADCASTSI128_SI256(_mm_load_si128((const __m128i *)filter)); // pack and duplicate the filter values f[0] = _mm256_shuffle_epi8(f_values, _mm256_set1_epi16(0x0200u)); f[1] = _mm256_shuffle_epi8(f_values, _mm256_set1_epi16(0x0604u)); f[2] = _mm256_shuffle_epi8(f_values, _mm256_set1_epi16(0x0a08u)); f[3] = _mm256_shuffle_epi8(f_values, _mm256_set1_epi16(0x0e0cu)); } static INLINE __m256i convolve8_16_avx2(const __m256i *const s, const __m256i *const f) { // multiply 2 adjacent elements with the filter and add the result const __m256i k_64 = _mm256_set1_epi16(1 << 6); const __m256i x0 = _mm256_maddubs_epi16(s[0], f[0]); const __m256i x1 = _mm256_maddubs_epi16(s[1], f[1]); const __m256i x2 = _mm256_maddubs_epi16(s[2], f[2]); const __m256i x3 = _mm256_maddubs_epi16(s[3], f[3]); __m256i sum1, sum2; // sum the results together, saturating only on the final step // adding x0 with x2 and x1 with x3 is the only order that prevents // outranges for all filters sum1 = _mm256_add_epi16(x0, x2); sum2 = _mm256_add_epi16(x1, x3); // add the rounding offset early to avoid another saturated add sum1 = _mm256_add_epi16(sum1, k_64); sum1 = _mm256_adds_epi16(sum1, sum2); // round and shift by 7 bit each 16 bit sum1 = _mm256_srai_epi16(sum1, 7); return sum1; } static INLINE __m128i convolve8_8_avx2(const __m256i *const s, const __m256i *const f) { // multiply 2 adjacent elements with the filter and add the result const __m128i k_64 = _mm_set1_epi16(1 << 6); const __m128i x0 = _mm_maddubs_epi16(_mm256_castsi256_si128(s[0]), _mm256_castsi256_si128(f[0])); const __m128i x1 = _mm_maddubs_epi16(_mm256_castsi256_si128(s[1]), _mm256_castsi256_si128(f[1])); const __m128i x2 = _mm_maddubs_epi16(_mm256_castsi256_si128(s[2]), _mm256_castsi256_si128(f[2])); const __m128i x3 = _mm_maddubs_epi16(_mm256_castsi256_si128(s[3]), _mm256_castsi256_si128(f[3])); __m128i sum1, sum2; // sum the results together, saturating only on the final step // adding x0 with x2 and x1 with x3 is the only order that prevents // outranges for all filters sum1 = _mm_add_epi16(x0, x2); sum2 = _mm_add_epi16(x1, x3); // add the rounding offset early to avoid another saturated add sum1 = _mm_add_epi16(sum1, k_64); sum1 = _mm_adds_epi16(sum1, sum2); // shift by 7 bit each 16 bit sum1 = _mm_srai_epi16(sum1, 7); return sum1; } static INLINE __m256i mm256_loadu2_si128(const void *lo, const void *hi) { const __m256i tmp = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)lo)); return _mm256_inserti128_si256(tmp, _mm_loadu_si128((const __m128i *)hi), 1); } static INLINE __m256i mm256_loadu2_epi64(const void *lo, const void *hi) { const __m256i tmp = _mm256_castsi128_si256(_mm_loadl_epi64((const __m128i *)lo)); return _mm256_inserti128_si256(tmp, _mm_loadl_epi64((const __m128i *)hi), 1); } static INLINE void mm256_store2_si128(__m128i *const dst_ptr_1, __m128i *const dst_ptr_2, const __m256i *const src) { _mm_store_si128(dst_ptr_1, _mm256_castsi256_si128(*src)); _mm_store_si128(dst_ptr_2, _mm256_extractf128_si256(*src, 1)); } static INLINE void mm256_storeu2_epi64(__m128i *const dst_ptr_1, __m128i *const dst_ptr_2, const __m256i *const src) { _mm_storel_epi64(dst_ptr_1, _mm256_castsi256_si128(*src)); _mm_storel_epi64(dst_ptr_2, _mm256_extractf128_si256(*src, 1)); } static INLINE void mm256_storeu2_epi32(__m128i *const dst_ptr_1, __m128i *const dst_ptr_2, const __m256i *const src) { *((uint32_t *)(dst_ptr_1)) = _mm_cvtsi128_si32(_mm256_castsi256_si128(*src)); *((uint32_t *)(dst_ptr_2)) = _mm_cvtsi128_si32(_mm256_extractf128_si256(*src, 1)); } static INLINE __m256i mm256_round_epi32(const __m256i *const src, const __m256i *const half_depth, const int depth) { const __m256i nearest_src = _mm256_add_epi32(*src, *half_depth); return _mm256_srai_epi32(nearest_src, depth); } static INLINE __m256i mm256_round_epi16(const __m256i *const src, const __m256i *const half_depth, const int depth) { const __m256i nearest_src = _mm256_adds_epi16(*src, *half_depth); return _mm256_srai_epi16(nearest_src, depth); } static INLINE __m256i mm256_madd_add_epi32(const __m256i *const src_0, const __m256i *const src_1, const __m256i *const ker_0, const __m256i *const ker_1) { const __m256i tmp_0 = _mm256_madd_epi16(*src_0, *ker_0); const __m256i tmp_1 = _mm256_madd_epi16(*src_1, *ker_1); return _mm256_add_epi32(tmp_0, tmp_1); } #undef MM256_BROADCASTSI128_SI256 #endif // VPX_VPX_DSP_X86_CONVOLVE_AVX2_H_ libvpx-1.8.2/vpx_dsp/x86/convolve_sse2.h000066400000000000000000000076411357355204000201120ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ #define VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ #include // SSE2 #include "./vpx_config.h" // Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns // values at index 2 and 3 to return 3 2 3 2 3 2 3 2 as 16-bit words static INLINE __m128i extract_quarter_2_epi16_sse2(const __m128i *const reg) { __m128i tmp = _mm_unpacklo_epi32(*reg, *reg); return _mm_unpackhi_epi64(tmp, tmp); } // Interprets the input register as 16-bit words 7 6 5 4 3 2 1 0, then returns // values at index 2 and 3 to return 5 4 5 4 5 4 5 4 as 16-bit words. static INLINE __m128i extract_quarter_3_epi16_sse2(const __m128i *const reg) { __m128i tmp = _mm_unpackhi_epi32(*reg, *reg); return _mm_unpacklo_epi64(tmp, tmp); } // Interprets src as 8-bit words, zero extends to form 16-bit words, then // multiplies with ker and add the adjacent results to form 32-bit words. // Finally adds the result from 1 and 2 together. static INLINE __m128i mm_madd_add_epi8_sse2(const __m128i *const src_1, const __m128i *const src_2, const __m128i *const ker_1, const __m128i *const ker_2) { const __m128i src_1_half = _mm_unpacklo_epi8(*src_1, _mm_setzero_si128()); const __m128i src_2_half = _mm_unpacklo_epi8(*src_2, _mm_setzero_si128()); const __m128i madd_1 = _mm_madd_epi16(src_1_half, *ker_1); const __m128i madd_2 = _mm_madd_epi16(src_2_half, *ker_2); return _mm_add_epi32(madd_1, madd_2); } // Interprets src as 16-bit words, then multiplies with ker and add the // adjacent results to form 32-bit words. Finally adds the result from 1 and 2 // together. static INLINE __m128i mm_madd_add_epi16_sse2(const __m128i *const src_1, const __m128i *const src_2, const __m128i *const ker_1, const __m128i *const ker_2) { const __m128i madd_1 = _mm_madd_epi16(*src_1, *ker_1); const __m128i madd_2 = _mm_madd_epi16(*src_2, *ker_2); return _mm_add_epi32(madd_1, madd_2); } static INLINE __m128i mm_madd_packs_epi16_sse2(const __m128i *const src_0, const __m128i *const src_1, const __m128i *const ker) { const __m128i madd_1 = _mm_madd_epi16(*src_0, *ker); const __m128i madd_2 = _mm_madd_epi16(*src_1, *ker); return _mm_packs_epi32(madd_1, madd_2); } // Interleaves src_1 and src_2 static INLINE __m128i mm_zip_epi32_sse2(const __m128i *const src_1, const __m128i *const src_2) { const __m128i tmp_1 = _mm_unpacklo_epi32(*src_1, *src_2); const __m128i tmp_2 = _mm_unpackhi_epi32(*src_1, *src_2); return _mm_packs_epi32(tmp_1, tmp_2); } static INLINE __m128i mm_round_epi32_sse2(const __m128i *const src, const __m128i *const half_depth, const int depth) { const __m128i nearest_src = _mm_add_epi32(*src, *half_depth); return _mm_srai_epi32(nearest_src, depth); } static INLINE __m128i mm_round_epi16_sse2(const __m128i *const src, const __m128i *const half_depth, const int depth) { const __m128i nearest_src = _mm_adds_epi16(*src, *half_depth); return _mm_srai_epi16(nearest_src, depth); } #endif // VPX_VPX_DSP_X86_CONVOLVE_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/convolve_ssse3.h000066400000000000000000000111531357355204000202670ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ #define VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ #include #include // SSSE3 #include "./vpx_config.h" static INLINE void shuffle_filter_ssse3(const int16_t *const filter, __m128i *const f) { const __m128i f_values = _mm_load_si128((const __m128i *)filter); // pack and duplicate the filter values f[0] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u)); f[1] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u)); f[2] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u)); f[3] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu)); } static INLINE void shuffle_filter_odd_ssse3(const int16_t *const filter, __m128i *const f) { const __m128i f_values = _mm_load_si128((const __m128i *)filter); // pack and duplicate the filter values // It utilizes the fact that the high byte of filter[3] is always 0 to clean // half of f[0] and f[4]. assert(filter[3] >= 0 && filter[3] < 256); f[0] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0007u)); f[1] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0402u)); f[2] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0806u)); f[3] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0c0au)); f[4] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x070eu)); } static INLINE __m128i convolve8_8_ssse3(const __m128i *const s, const __m128i *const f) { // multiply 2 adjacent elements with the filter and add the result const __m128i k_64 = _mm_set1_epi16(1 << 6); const __m128i x0 = _mm_maddubs_epi16(s[0], f[0]); const __m128i x1 = _mm_maddubs_epi16(s[1], f[1]); const __m128i x2 = _mm_maddubs_epi16(s[2], f[2]); const __m128i x3 = _mm_maddubs_epi16(s[3], f[3]); __m128i sum1, sum2; // sum the results together, saturating only on the final step // adding x0 with x2 and x1 with x3 is the only order that prevents // outranges for all filters sum1 = _mm_add_epi16(x0, x2); sum2 = _mm_add_epi16(x1, x3); // add the rounding offset early to avoid another saturated add sum1 = _mm_add_epi16(sum1, k_64); sum1 = _mm_adds_epi16(sum1, sum2); // shift by 7 bit each 16 bit sum1 = _mm_srai_epi16(sum1, 7); return sum1; } static INLINE __m128i convolve8_8_even_offset_ssse3(const __m128i *const s, const __m128i *const f) { // multiply 2 adjacent elements with the filter and add the result const __m128i k_64 = _mm_set1_epi16(1 << 6); const __m128i x0 = _mm_maddubs_epi16(s[0], f[0]); const __m128i x1 = _mm_maddubs_epi16(s[1], f[1]); const __m128i x2 = _mm_maddubs_epi16(s[2], f[2]); const __m128i x3 = _mm_maddubs_epi16(s[3], f[3]); // compensate the subtracted 64 in f[1]. x4 is always non negative. const __m128i x4 = _mm_maddubs_epi16(s[1], _mm_set1_epi8(64)); // add and saturate the results together __m128i temp = _mm_adds_epi16(x0, x3); temp = _mm_adds_epi16(temp, x1); temp = _mm_adds_epi16(temp, x2); temp = _mm_adds_epi16(temp, x4); // round and shift by 7 bit each 16 bit temp = _mm_adds_epi16(temp, k_64); temp = _mm_srai_epi16(temp, 7); return temp; } static INLINE __m128i convolve8_8_odd_offset_ssse3(const __m128i *const s, const __m128i *const f) { // multiply 2 adjacent elements with the filter and add the result const __m128i k_64 = _mm_set1_epi16(1 << 6); const __m128i x0 = _mm_maddubs_epi16(s[0], f[0]); const __m128i x1 = _mm_maddubs_epi16(s[1], f[1]); const __m128i x2 = _mm_maddubs_epi16(s[2], f[2]); const __m128i x3 = _mm_maddubs_epi16(s[3], f[3]); const __m128i x4 = _mm_maddubs_epi16(s[4], f[4]); // compensate the subtracted 64 in f[2]. x5 is always non negative. const __m128i x5 = _mm_maddubs_epi16(s[2], _mm_set1_epi8(64)); __m128i temp; // add and saturate the results together temp = _mm_adds_epi16(x0, x1); temp = _mm_adds_epi16(temp, x2); temp = _mm_adds_epi16(temp, x3); temp = _mm_adds_epi16(temp, x4); temp = _mm_adds_epi16(temp, x5); // round and shift by 7 bit each 16 bit temp = _mm_adds_epi16(temp, k_64); temp = _mm_srai_epi16(temp, 7); return temp; } #endif // VPX_VPX_DSP_X86_CONVOLVE_SSSE3_H_ libvpx-1.8.2/vpx_dsp/x86/deblock_sse2.asm000066400000000000000000000303311357355204000202030ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ;macro in deblock functions %macro FIRST_2_ROWS 0 movdqa xmm4, xmm0 movdqa xmm6, xmm0 movdqa xmm5, xmm1 pavgb xmm5, xmm3 ;calculate absolute value psubusb xmm4, xmm1 psubusb xmm1, xmm0 psubusb xmm6, xmm3 psubusb xmm3, xmm0 paddusb xmm4, xmm1 paddusb xmm6, xmm3 ;get threshold movdqa xmm2, flimit pxor xmm1, xmm1 movdqa xmm7, xmm2 ;get mask psubusb xmm2, xmm4 psubusb xmm7, xmm6 pcmpeqb xmm2, xmm1 pcmpeqb xmm7, xmm1 por xmm7, xmm2 %endmacro %macro SECOND_2_ROWS 0 movdqa xmm6, xmm0 movdqa xmm4, xmm0 movdqa xmm2, xmm1 pavgb xmm1, xmm3 ;calculate absolute value psubusb xmm6, xmm2 psubusb xmm2, xmm0 psubusb xmm4, xmm3 psubusb xmm3, xmm0 paddusb xmm6, xmm2 paddusb xmm4, xmm3 pavgb xmm5, xmm1 ;get threshold movdqa xmm2, flimit pxor xmm1, xmm1 movdqa xmm3, xmm2 ;get mask psubusb xmm2, xmm6 psubusb xmm3, xmm4 pcmpeqb xmm2, xmm1 pcmpeqb xmm3, xmm1 por xmm7, xmm2 por xmm7, xmm3 pavgb xmm5, xmm0 ;decide if or not to use filtered value pand xmm0, xmm7 pandn xmm7, xmm5 paddusb xmm0, xmm7 %endmacro %macro UPDATE_FLIMIT 0 movdqu xmm2, XMMWORD PTR [rbx] movdqu [rsp], xmm2 add rbx, 16 %endmacro SECTION .text ;void vpx_post_proc_down_and_across_mb_row_sse2 ;( ; unsigned char *src_ptr, ; unsigned char *dst_ptr, ; int src_pixels_per_line, ; int dst_pixels_per_line, ; int cols, ; int *flimits, ; int size ;) global sym(vpx_post_proc_down_and_across_mb_row_sse2) PRIVATE sym(vpx_post_proc_down_and_across_mb_row_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 ; put flimit on stack mov rbx, arg(5) ;flimits ptr UPDATE_FLIMIT %define flimit [rsp] mov rsi, arg(0) ;src_ptr mov rdi, arg(1) ;dst_ptr movsxd rax, DWORD PTR arg(2) ;src_pixels_per_line movsxd rcx, DWORD PTR arg(6) ;rows in a macroblock .nextrow: xor rdx, rdx ;col .nextcol: ;load current and next 2 rows movdqu xmm0, XMMWORD PTR [rsi] movdqu xmm1, XMMWORD PTR [rsi + rax] movdqu xmm3, XMMWORD PTR [rsi + 2*rax] FIRST_2_ROWS ;load above 2 rows neg rax movdqu xmm1, XMMWORD PTR [rsi + 2*rax] movdqu xmm3, XMMWORD PTR [rsi + rax] SECOND_2_ROWS movdqu XMMWORD PTR [rdi], xmm0 neg rax ; positive stride add rsi, 16 add rdi, 16 add rdx, 16 cmp edx, dword arg(4) ;cols jge .downdone UPDATE_FLIMIT jmp .nextcol .downdone: ; done with the all cols, start the across filtering in place sub rsi, rdx sub rdi, rdx mov rbx, arg(5) ; flimits UPDATE_FLIMIT ; dup the first byte into the left border 8 times movq mm1, [rdi] punpcklbw mm1, mm1 punpcklwd mm1, mm1 punpckldq mm1, mm1 mov rdx, -8 movq [rdi+rdx], mm1 ; dup the last byte into the right border movsxd rdx, dword arg(4) movq mm1, [rdi + rdx + -1] punpcklbw mm1, mm1 punpcklwd mm1, mm1 punpckldq mm1, mm1 movq [rdi+rdx], mm1 xor rdx, rdx movq mm0, QWORD PTR [rdi-16]; movq mm1, QWORD PTR [rdi-8]; .acrossnextcol: movdqu xmm0, XMMWORD PTR [rdi + rdx] movdqu xmm1, XMMWORD PTR [rdi + rdx -2] movdqu xmm3, XMMWORD PTR [rdi + rdx -1] FIRST_2_ROWS movdqu xmm1, XMMWORD PTR [rdi + rdx +1] movdqu xmm3, XMMWORD PTR [rdi + rdx +2] SECOND_2_ROWS movq QWORD PTR [rdi+rdx-16], mm0 ; store previous 8 bytes movq QWORD PTR [rdi+rdx-8], mm1 ; store previous 8 bytes movdq2q mm0, xmm0 psrldq xmm0, 8 movdq2q mm1, xmm0 add rdx, 16 cmp edx, dword arg(4) ;cols jge .acrossdone UPDATE_FLIMIT jmp .acrossnextcol .acrossdone: ; last 16 pixels movq QWORD PTR [rdi+rdx-16], mm0 cmp edx, dword arg(4) jne .throw_last_8 movq QWORD PTR [rdi+rdx-8], mm1 .throw_last_8: ; done with this rwo add rsi,rax ;next src line mov eax, dword arg(3) ;dst_pixels_per_line add rdi,rax ;next destination mov eax, dword arg(2) ;src_pixels_per_line mov rbx, arg(5) ;flimits UPDATE_FLIMIT dec rcx ;decrement count jnz .nextrow ;next row add rsp, 16 pop rsp ; begin epilog pop rdi pop rsi pop rbx RESTORE_XMM UNSHADOW_ARGS pop rbp ret %undef flimit ;void vpx_mbpost_proc_across_ip_sse2(unsigned char *src, ; int pitch, int rows, int cols,int flimit) global sym(vpx_mbpost_proc_across_ip_sse2) PRIVATE sym(vpx_mbpost_proc_across_ip_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 GET_GOT rbx push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 ; create flimit4 at [rsp] mov eax, dword ptr arg(4) ;flimit mov [rsp], eax mov [rsp+4], eax mov [rsp+8], eax mov [rsp+12], eax %define flimit4 [rsp] ;for(r=0;r // AVX2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #define pair256_set_epi16(a, b) \ _mm256_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) #define pair256_set_epi32(a, b) \ _mm256_set_epi32((int)(b), (int)(a), (int)(b), (int)(a), (int)(b), (int)(a), \ (int)(b), (int)(a)) #if FDCT32x32_HIGH_PRECISION static INLINE __m256i k_madd_epi32_avx2(__m256i a, __m256i b) { __m256i buf0, buf1; buf0 = _mm256_mul_epu32(a, b); a = _mm256_srli_epi64(a, 32); b = _mm256_srli_epi64(b, 32); buf1 = _mm256_mul_epu32(a, b); return _mm256_add_epi64(buf0, buf1); } static INLINE __m256i k_packs_epi64_avx2(__m256i a, __m256i b) { __m256i buf0 = _mm256_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); __m256i buf1 = _mm256_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); return _mm256_unpacklo_epi64(buf0, buf1); } #endif void FDCT32x32_2D_AVX2(const int16_t *input, int16_t *output_org, int stride) { // Calculate pre-multiplied strides const int str1 = stride; const int str2 = 2 * stride; const int str3 = 2 * stride + str1; // We need an intermediate buffer between passes. DECLARE_ALIGNED(32, int16_t, intermediate[32 * 32]); // Constants // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. const __m256i k__cospi_p16_p16 = _mm256_set1_epi16(cospi_16_64); const __m256i k__cospi_p16_m16 = pair256_set_epi16(+cospi_16_64, -cospi_16_64); const __m256i k__cospi_m08_p24 = pair256_set_epi16(-cospi_8_64, cospi_24_64); const __m256i k__cospi_m24_m08 = pair256_set_epi16(-cospi_24_64, -cospi_8_64); const __m256i k__cospi_p24_p08 = pair256_set_epi16(+cospi_24_64, cospi_8_64); const __m256i k__cospi_p12_p20 = pair256_set_epi16(+cospi_12_64, cospi_20_64); const __m256i k__cospi_m20_p12 = pair256_set_epi16(-cospi_20_64, cospi_12_64); const __m256i k__cospi_m04_p28 = pair256_set_epi16(-cospi_4_64, cospi_28_64); const __m256i k__cospi_p28_p04 = pair256_set_epi16(+cospi_28_64, cospi_4_64); const __m256i k__cospi_m28_m04 = pair256_set_epi16(-cospi_28_64, -cospi_4_64); const __m256i k__cospi_m12_m20 = pair256_set_epi16(-cospi_12_64, -cospi_20_64); const __m256i k__cospi_p30_p02 = pair256_set_epi16(+cospi_30_64, cospi_2_64); const __m256i k__cospi_p14_p18 = pair256_set_epi16(+cospi_14_64, cospi_18_64); const __m256i k__cospi_p22_p10 = pair256_set_epi16(+cospi_22_64, cospi_10_64); const __m256i k__cospi_p06_p26 = pair256_set_epi16(+cospi_6_64, cospi_26_64); const __m256i k__cospi_m26_p06 = pair256_set_epi16(-cospi_26_64, cospi_6_64); const __m256i k__cospi_m10_p22 = pair256_set_epi16(-cospi_10_64, cospi_22_64); const __m256i k__cospi_m18_p14 = pair256_set_epi16(-cospi_18_64, cospi_14_64); const __m256i k__cospi_m02_p30 = pair256_set_epi16(-cospi_2_64, cospi_30_64); const __m256i k__cospi_p31_p01 = pair256_set_epi16(+cospi_31_64, cospi_1_64); const __m256i k__cospi_p15_p17 = pair256_set_epi16(+cospi_15_64, cospi_17_64); const __m256i k__cospi_p23_p09 = pair256_set_epi16(+cospi_23_64, cospi_9_64); const __m256i k__cospi_p07_p25 = pair256_set_epi16(+cospi_7_64, cospi_25_64); const __m256i k__cospi_m25_p07 = pair256_set_epi16(-cospi_25_64, cospi_7_64); const __m256i k__cospi_m09_p23 = pair256_set_epi16(-cospi_9_64, cospi_23_64); const __m256i k__cospi_m17_p15 = pair256_set_epi16(-cospi_17_64, cospi_15_64); const __m256i k__cospi_m01_p31 = pair256_set_epi16(-cospi_1_64, cospi_31_64); const __m256i k__cospi_p27_p05 = pair256_set_epi16(+cospi_27_64, cospi_5_64); const __m256i k__cospi_p11_p21 = pair256_set_epi16(+cospi_11_64, cospi_21_64); const __m256i k__cospi_p19_p13 = pair256_set_epi16(+cospi_19_64, cospi_13_64); const __m256i k__cospi_p03_p29 = pair256_set_epi16(+cospi_3_64, cospi_29_64); const __m256i k__cospi_m29_p03 = pair256_set_epi16(-cospi_29_64, cospi_3_64); const __m256i k__cospi_m13_p19 = pair256_set_epi16(-cospi_13_64, cospi_19_64); const __m256i k__cospi_m21_p11 = pair256_set_epi16(-cospi_21_64, cospi_11_64); const __m256i k__cospi_m05_p27 = pair256_set_epi16(-cospi_5_64, cospi_27_64); const __m256i k__DCT_CONST_ROUNDING = _mm256_set1_epi32(DCT_CONST_ROUNDING); const __m256i kZero = _mm256_set1_epi16(0); const __m256i kOne = _mm256_set1_epi16(1); // Do the two transform/transpose passes int pass; for (pass = 0; pass < 2; ++pass) { // We process sixteen columns (transposed rows in second pass) at a time. int column_start; for (column_start = 0; column_start < 32; column_start += 16) { __m256i step1[32]; __m256i step2[32]; __m256i step3[32]; __m256i out[32]; // Stage 1 // Note: even though all the loads below are aligned, using the aligned // intrinsic make the code slightly slower. if (0 == pass) { const int16_t *in = &input[column_start]; // step1[i] = (in[ 0 * stride] + in[(32 - 1) * stride]) << 2; // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { const int16_t *ina = in + 0 * str1; const int16_t *inb = in + 31 * str1; __m256i *step1a = &step1[0]; __m256i *step1b = &step1[31]; const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); step1a[0] = _mm256_add_epi16(ina0, inb0); step1a[1] = _mm256_add_epi16(ina1, inb1); step1a[2] = _mm256_add_epi16(ina2, inb2); step1a[3] = _mm256_add_epi16(ina3, inb3); step1b[-3] = _mm256_sub_epi16(ina3, inb3); step1b[-2] = _mm256_sub_epi16(ina2, inb2); step1b[-1] = _mm256_sub_epi16(ina1, inb1); step1b[-0] = _mm256_sub_epi16(ina0, inb0); step1a[0] = _mm256_slli_epi16(step1a[0], 2); step1a[1] = _mm256_slli_epi16(step1a[1], 2); step1a[2] = _mm256_slli_epi16(step1a[2], 2); step1a[3] = _mm256_slli_epi16(step1a[3], 2); step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 4 * str1; const int16_t *inb = in + 27 * str1; __m256i *step1a = &step1[4]; __m256i *step1b = &step1[27]; const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); step1a[0] = _mm256_add_epi16(ina0, inb0); step1a[1] = _mm256_add_epi16(ina1, inb1); step1a[2] = _mm256_add_epi16(ina2, inb2); step1a[3] = _mm256_add_epi16(ina3, inb3); step1b[-3] = _mm256_sub_epi16(ina3, inb3); step1b[-2] = _mm256_sub_epi16(ina2, inb2); step1b[-1] = _mm256_sub_epi16(ina1, inb1); step1b[-0] = _mm256_sub_epi16(ina0, inb0); step1a[0] = _mm256_slli_epi16(step1a[0], 2); step1a[1] = _mm256_slli_epi16(step1a[1], 2); step1a[2] = _mm256_slli_epi16(step1a[2], 2); step1a[3] = _mm256_slli_epi16(step1a[3], 2); step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 8 * str1; const int16_t *inb = in + 23 * str1; __m256i *step1a = &step1[8]; __m256i *step1b = &step1[23]; const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); step1a[0] = _mm256_add_epi16(ina0, inb0); step1a[1] = _mm256_add_epi16(ina1, inb1); step1a[2] = _mm256_add_epi16(ina2, inb2); step1a[3] = _mm256_add_epi16(ina3, inb3); step1b[-3] = _mm256_sub_epi16(ina3, inb3); step1b[-2] = _mm256_sub_epi16(ina2, inb2); step1b[-1] = _mm256_sub_epi16(ina1, inb1); step1b[-0] = _mm256_sub_epi16(ina0, inb0); step1a[0] = _mm256_slli_epi16(step1a[0], 2); step1a[1] = _mm256_slli_epi16(step1a[1], 2); step1a[2] = _mm256_slli_epi16(step1a[2], 2); step1a[3] = _mm256_slli_epi16(step1a[3], 2); step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 12 * str1; const int16_t *inb = in + 19 * str1; __m256i *step1a = &step1[12]; __m256i *step1b = &step1[19]; const __m256i ina0 = _mm256_loadu_si256((const __m256i *)(ina)); const __m256i ina1 = _mm256_loadu_si256((const __m256i *)(ina + str1)); const __m256i ina2 = _mm256_loadu_si256((const __m256i *)(ina + str2)); const __m256i ina3 = _mm256_loadu_si256((const __m256i *)(ina + str3)); const __m256i inb3 = _mm256_loadu_si256((const __m256i *)(inb - str3)); const __m256i inb2 = _mm256_loadu_si256((const __m256i *)(inb - str2)); const __m256i inb1 = _mm256_loadu_si256((const __m256i *)(inb - str1)); const __m256i inb0 = _mm256_loadu_si256((const __m256i *)(inb)); step1a[0] = _mm256_add_epi16(ina0, inb0); step1a[1] = _mm256_add_epi16(ina1, inb1); step1a[2] = _mm256_add_epi16(ina2, inb2); step1a[3] = _mm256_add_epi16(ina3, inb3); step1b[-3] = _mm256_sub_epi16(ina3, inb3); step1b[-2] = _mm256_sub_epi16(ina2, inb2); step1b[-1] = _mm256_sub_epi16(ina1, inb1); step1b[-0] = _mm256_sub_epi16(ina0, inb0); step1a[0] = _mm256_slli_epi16(step1a[0], 2); step1a[1] = _mm256_slli_epi16(step1a[1], 2); step1a[2] = _mm256_slli_epi16(step1a[2], 2); step1a[3] = _mm256_slli_epi16(step1a[3], 2); step1b[-3] = _mm256_slli_epi16(step1b[-3], 2); step1b[-2] = _mm256_slli_epi16(step1b[-2], 2); step1b[-1] = _mm256_slli_epi16(step1b[-1], 2); step1b[-0] = _mm256_slli_epi16(step1b[-0], 2); } } else { int16_t *in = &intermediate[column_start]; // step1[i] = in[ 0 * 32] + in[(32 - 1) * 32]; // Note: using the same approach as above to have common offset is // counter-productive as all offsets can be calculated at compile // time. // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { __m256i in00 = _mm256_loadu_si256((const __m256i *)(in + 0 * 32)); __m256i in01 = _mm256_loadu_si256((const __m256i *)(in + 1 * 32)); __m256i in02 = _mm256_loadu_si256((const __m256i *)(in + 2 * 32)); __m256i in03 = _mm256_loadu_si256((const __m256i *)(in + 3 * 32)); __m256i in28 = _mm256_loadu_si256((const __m256i *)(in + 28 * 32)); __m256i in29 = _mm256_loadu_si256((const __m256i *)(in + 29 * 32)); __m256i in30 = _mm256_loadu_si256((const __m256i *)(in + 30 * 32)); __m256i in31 = _mm256_loadu_si256((const __m256i *)(in + 31 * 32)); step1[0] = _mm256_add_epi16(in00, in31); step1[1] = _mm256_add_epi16(in01, in30); step1[2] = _mm256_add_epi16(in02, in29); step1[3] = _mm256_add_epi16(in03, in28); step1[28] = _mm256_sub_epi16(in03, in28); step1[29] = _mm256_sub_epi16(in02, in29); step1[30] = _mm256_sub_epi16(in01, in30); step1[31] = _mm256_sub_epi16(in00, in31); } { __m256i in04 = _mm256_loadu_si256((const __m256i *)(in + 4 * 32)); __m256i in05 = _mm256_loadu_si256((const __m256i *)(in + 5 * 32)); __m256i in06 = _mm256_loadu_si256((const __m256i *)(in + 6 * 32)); __m256i in07 = _mm256_loadu_si256((const __m256i *)(in + 7 * 32)); __m256i in24 = _mm256_loadu_si256((const __m256i *)(in + 24 * 32)); __m256i in25 = _mm256_loadu_si256((const __m256i *)(in + 25 * 32)); __m256i in26 = _mm256_loadu_si256((const __m256i *)(in + 26 * 32)); __m256i in27 = _mm256_loadu_si256((const __m256i *)(in + 27 * 32)); step1[4] = _mm256_add_epi16(in04, in27); step1[5] = _mm256_add_epi16(in05, in26); step1[6] = _mm256_add_epi16(in06, in25); step1[7] = _mm256_add_epi16(in07, in24); step1[24] = _mm256_sub_epi16(in07, in24); step1[25] = _mm256_sub_epi16(in06, in25); step1[26] = _mm256_sub_epi16(in05, in26); step1[27] = _mm256_sub_epi16(in04, in27); } { __m256i in08 = _mm256_loadu_si256((const __m256i *)(in + 8 * 32)); __m256i in09 = _mm256_loadu_si256((const __m256i *)(in + 9 * 32)); __m256i in10 = _mm256_loadu_si256((const __m256i *)(in + 10 * 32)); __m256i in11 = _mm256_loadu_si256((const __m256i *)(in + 11 * 32)); __m256i in20 = _mm256_loadu_si256((const __m256i *)(in + 20 * 32)); __m256i in21 = _mm256_loadu_si256((const __m256i *)(in + 21 * 32)); __m256i in22 = _mm256_loadu_si256((const __m256i *)(in + 22 * 32)); __m256i in23 = _mm256_loadu_si256((const __m256i *)(in + 23 * 32)); step1[8] = _mm256_add_epi16(in08, in23); step1[9] = _mm256_add_epi16(in09, in22); step1[10] = _mm256_add_epi16(in10, in21); step1[11] = _mm256_add_epi16(in11, in20); step1[20] = _mm256_sub_epi16(in11, in20); step1[21] = _mm256_sub_epi16(in10, in21); step1[22] = _mm256_sub_epi16(in09, in22); step1[23] = _mm256_sub_epi16(in08, in23); } { __m256i in12 = _mm256_loadu_si256((const __m256i *)(in + 12 * 32)); __m256i in13 = _mm256_loadu_si256((const __m256i *)(in + 13 * 32)); __m256i in14 = _mm256_loadu_si256((const __m256i *)(in + 14 * 32)); __m256i in15 = _mm256_loadu_si256((const __m256i *)(in + 15 * 32)); __m256i in16 = _mm256_loadu_si256((const __m256i *)(in + 16 * 32)); __m256i in17 = _mm256_loadu_si256((const __m256i *)(in + 17 * 32)); __m256i in18 = _mm256_loadu_si256((const __m256i *)(in + 18 * 32)); __m256i in19 = _mm256_loadu_si256((const __m256i *)(in + 19 * 32)); step1[12] = _mm256_add_epi16(in12, in19); step1[13] = _mm256_add_epi16(in13, in18); step1[14] = _mm256_add_epi16(in14, in17); step1[15] = _mm256_add_epi16(in15, in16); step1[16] = _mm256_sub_epi16(in15, in16); step1[17] = _mm256_sub_epi16(in14, in17); step1[18] = _mm256_sub_epi16(in13, in18); step1[19] = _mm256_sub_epi16(in12, in19); } } // Stage 2 { step2[0] = _mm256_add_epi16(step1[0], step1[15]); step2[1] = _mm256_add_epi16(step1[1], step1[14]); step2[2] = _mm256_add_epi16(step1[2], step1[13]); step2[3] = _mm256_add_epi16(step1[3], step1[12]); step2[4] = _mm256_add_epi16(step1[4], step1[11]); step2[5] = _mm256_add_epi16(step1[5], step1[10]); step2[6] = _mm256_add_epi16(step1[6], step1[9]); step2[7] = _mm256_add_epi16(step1[7], step1[8]); step2[8] = _mm256_sub_epi16(step1[7], step1[8]); step2[9] = _mm256_sub_epi16(step1[6], step1[9]); step2[10] = _mm256_sub_epi16(step1[5], step1[10]); step2[11] = _mm256_sub_epi16(step1[4], step1[11]); step2[12] = _mm256_sub_epi16(step1[3], step1[12]); step2[13] = _mm256_sub_epi16(step1[2], step1[13]); step2[14] = _mm256_sub_epi16(step1[1], step1[14]); step2[15] = _mm256_sub_epi16(step1[0], step1[15]); } { const __m256i s2_20_0 = _mm256_unpacklo_epi16(step1[27], step1[20]); const __m256i s2_20_1 = _mm256_unpackhi_epi16(step1[27], step1[20]); const __m256i s2_21_0 = _mm256_unpacklo_epi16(step1[26], step1[21]); const __m256i s2_21_1 = _mm256_unpackhi_epi16(step1[26], step1[21]); const __m256i s2_22_0 = _mm256_unpacklo_epi16(step1[25], step1[22]); const __m256i s2_22_1 = _mm256_unpackhi_epi16(step1[25], step1[22]); const __m256i s2_23_0 = _mm256_unpacklo_epi16(step1[24], step1[23]); const __m256i s2_23_1 = _mm256_unpackhi_epi16(step1[24], step1[23]); const __m256i s2_20_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_m16); const __m256i s2_20_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_m16); const __m256i s2_21_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_m16); const __m256i s2_21_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_m16); const __m256i s2_22_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_m16); const __m256i s2_22_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_m16); const __m256i s2_23_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_m16); const __m256i s2_23_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_m16); const __m256i s2_24_2 = _mm256_madd_epi16(s2_23_0, k__cospi_p16_p16); const __m256i s2_24_3 = _mm256_madd_epi16(s2_23_1, k__cospi_p16_p16); const __m256i s2_25_2 = _mm256_madd_epi16(s2_22_0, k__cospi_p16_p16); const __m256i s2_25_3 = _mm256_madd_epi16(s2_22_1, k__cospi_p16_p16); const __m256i s2_26_2 = _mm256_madd_epi16(s2_21_0, k__cospi_p16_p16); const __m256i s2_26_3 = _mm256_madd_epi16(s2_21_1, k__cospi_p16_p16); const __m256i s2_27_2 = _mm256_madd_epi16(s2_20_0, k__cospi_p16_p16); const __m256i s2_27_3 = _mm256_madd_epi16(s2_20_1, k__cospi_p16_p16); // dct_const_round_shift const __m256i s2_20_4 = _mm256_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING); const __m256i s2_20_5 = _mm256_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING); const __m256i s2_21_4 = _mm256_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING); const __m256i s2_21_5 = _mm256_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING); const __m256i s2_22_4 = _mm256_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING); const __m256i s2_22_5 = _mm256_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING); const __m256i s2_23_4 = _mm256_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING); const __m256i s2_23_5 = _mm256_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING); const __m256i s2_24_4 = _mm256_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING); const __m256i s2_24_5 = _mm256_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING); const __m256i s2_25_4 = _mm256_add_epi32(s2_25_2, k__DCT_CONST_ROUNDING); const __m256i s2_25_5 = _mm256_add_epi32(s2_25_3, k__DCT_CONST_ROUNDING); const __m256i s2_26_4 = _mm256_add_epi32(s2_26_2, k__DCT_CONST_ROUNDING); const __m256i s2_26_5 = _mm256_add_epi32(s2_26_3, k__DCT_CONST_ROUNDING); const __m256i s2_27_4 = _mm256_add_epi32(s2_27_2, k__DCT_CONST_ROUNDING); const __m256i s2_27_5 = _mm256_add_epi32(s2_27_3, k__DCT_CONST_ROUNDING); const __m256i s2_20_6 = _mm256_srai_epi32(s2_20_4, DCT_CONST_BITS); const __m256i s2_20_7 = _mm256_srai_epi32(s2_20_5, DCT_CONST_BITS); const __m256i s2_21_6 = _mm256_srai_epi32(s2_21_4, DCT_CONST_BITS); const __m256i s2_21_7 = _mm256_srai_epi32(s2_21_5, DCT_CONST_BITS); const __m256i s2_22_6 = _mm256_srai_epi32(s2_22_4, DCT_CONST_BITS); const __m256i s2_22_7 = _mm256_srai_epi32(s2_22_5, DCT_CONST_BITS); const __m256i s2_23_6 = _mm256_srai_epi32(s2_23_4, DCT_CONST_BITS); const __m256i s2_23_7 = _mm256_srai_epi32(s2_23_5, DCT_CONST_BITS); const __m256i s2_24_6 = _mm256_srai_epi32(s2_24_4, DCT_CONST_BITS); const __m256i s2_24_7 = _mm256_srai_epi32(s2_24_5, DCT_CONST_BITS); const __m256i s2_25_6 = _mm256_srai_epi32(s2_25_4, DCT_CONST_BITS); const __m256i s2_25_7 = _mm256_srai_epi32(s2_25_5, DCT_CONST_BITS); const __m256i s2_26_6 = _mm256_srai_epi32(s2_26_4, DCT_CONST_BITS); const __m256i s2_26_7 = _mm256_srai_epi32(s2_26_5, DCT_CONST_BITS); const __m256i s2_27_6 = _mm256_srai_epi32(s2_27_4, DCT_CONST_BITS); const __m256i s2_27_7 = _mm256_srai_epi32(s2_27_5, DCT_CONST_BITS); // Combine step2[20] = _mm256_packs_epi32(s2_20_6, s2_20_7); step2[21] = _mm256_packs_epi32(s2_21_6, s2_21_7); step2[22] = _mm256_packs_epi32(s2_22_6, s2_22_7); step2[23] = _mm256_packs_epi32(s2_23_6, s2_23_7); step2[24] = _mm256_packs_epi32(s2_24_6, s2_24_7); step2[25] = _mm256_packs_epi32(s2_25_6, s2_25_7); step2[26] = _mm256_packs_epi32(s2_26_6, s2_26_7); step2[27] = _mm256_packs_epi32(s2_27_6, s2_27_7); } #if !FDCT32x32_HIGH_PRECISION // dump the magnitude by half, hence the intermediate values are within // the range of 16 bits. if (1 == pass) { __m256i s3_00_0 = _mm256_cmpgt_epi16(kZero, step2[0]); __m256i s3_01_0 = _mm256_cmpgt_epi16(kZero, step2[1]); __m256i s3_02_0 = _mm256_cmpgt_epi16(kZero, step2[2]); __m256i s3_03_0 = _mm256_cmpgt_epi16(kZero, step2[3]); __m256i s3_04_0 = _mm256_cmpgt_epi16(kZero, step2[4]); __m256i s3_05_0 = _mm256_cmpgt_epi16(kZero, step2[5]); __m256i s3_06_0 = _mm256_cmpgt_epi16(kZero, step2[6]); __m256i s3_07_0 = _mm256_cmpgt_epi16(kZero, step2[7]); __m256i s2_08_0 = _mm256_cmpgt_epi16(kZero, step2[8]); __m256i s2_09_0 = _mm256_cmpgt_epi16(kZero, step2[9]); __m256i s3_10_0 = _mm256_cmpgt_epi16(kZero, step2[10]); __m256i s3_11_0 = _mm256_cmpgt_epi16(kZero, step2[11]); __m256i s3_12_0 = _mm256_cmpgt_epi16(kZero, step2[12]); __m256i s3_13_0 = _mm256_cmpgt_epi16(kZero, step2[13]); __m256i s2_14_0 = _mm256_cmpgt_epi16(kZero, step2[14]); __m256i s2_15_0 = _mm256_cmpgt_epi16(kZero, step2[15]); __m256i s3_16_0 = _mm256_cmpgt_epi16(kZero, step1[16]); __m256i s3_17_0 = _mm256_cmpgt_epi16(kZero, step1[17]); __m256i s3_18_0 = _mm256_cmpgt_epi16(kZero, step1[18]); __m256i s3_19_0 = _mm256_cmpgt_epi16(kZero, step1[19]); __m256i s3_20_0 = _mm256_cmpgt_epi16(kZero, step2[20]); __m256i s3_21_0 = _mm256_cmpgt_epi16(kZero, step2[21]); __m256i s3_22_0 = _mm256_cmpgt_epi16(kZero, step2[22]); __m256i s3_23_0 = _mm256_cmpgt_epi16(kZero, step2[23]); __m256i s3_24_0 = _mm256_cmpgt_epi16(kZero, step2[24]); __m256i s3_25_0 = _mm256_cmpgt_epi16(kZero, step2[25]); __m256i s3_26_0 = _mm256_cmpgt_epi16(kZero, step2[26]); __m256i s3_27_0 = _mm256_cmpgt_epi16(kZero, step2[27]); __m256i s3_28_0 = _mm256_cmpgt_epi16(kZero, step1[28]); __m256i s3_29_0 = _mm256_cmpgt_epi16(kZero, step1[29]); __m256i s3_30_0 = _mm256_cmpgt_epi16(kZero, step1[30]); __m256i s3_31_0 = _mm256_cmpgt_epi16(kZero, step1[31]); step2[0] = _mm256_sub_epi16(step2[0], s3_00_0); step2[1] = _mm256_sub_epi16(step2[1], s3_01_0); step2[2] = _mm256_sub_epi16(step2[2], s3_02_0); step2[3] = _mm256_sub_epi16(step2[3], s3_03_0); step2[4] = _mm256_sub_epi16(step2[4], s3_04_0); step2[5] = _mm256_sub_epi16(step2[5], s3_05_0); step2[6] = _mm256_sub_epi16(step2[6], s3_06_0); step2[7] = _mm256_sub_epi16(step2[7], s3_07_0); step2[8] = _mm256_sub_epi16(step2[8], s2_08_0); step2[9] = _mm256_sub_epi16(step2[9], s2_09_0); step2[10] = _mm256_sub_epi16(step2[10], s3_10_0); step2[11] = _mm256_sub_epi16(step2[11], s3_11_0); step2[12] = _mm256_sub_epi16(step2[12], s3_12_0); step2[13] = _mm256_sub_epi16(step2[13], s3_13_0); step2[14] = _mm256_sub_epi16(step2[14], s2_14_0); step2[15] = _mm256_sub_epi16(step2[15], s2_15_0); step1[16] = _mm256_sub_epi16(step1[16], s3_16_0); step1[17] = _mm256_sub_epi16(step1[17], s3_17_0); step1[18] = _mm256_sub_epi16(step1[18], s3_18_0); step1[19] = _mm256_sub_epi16(step1[19], s3_19_0); step2[20] = _mm256_sub_epi16(step2[20], s3_20_0); step2[21] = _mm256_sub_epi16(step2[21], s3_21_0); step2[22] = _mm256_sub_epi16(step2[22], s3_22_0); step2[23] = _mm256_sub_epi16(step2[23], s3_23_0); step2[24] = _mm256_sub_epi16(step2[24], s3_24_0); step2[25] = _mm256_sub_epi16(step2[25], s3_25_0); step2[26] = _mm256_sub_epi16(step2[26], s3_26_0); step2[27] = _mm256_sub_epi16(step2[27], s3_27_0); step1[28] = _mm256_sub_epi16(step1[28], s3_28_0); step1[29] = _mm256_sub_epi16(step1[29], s3_29_0); step1[30] = _mm256_sub_epi16(step1[30], s3_30_0); step1[31] = _mm256_sub_epi16(step1[31], s3_31_0); step2[0] = _mm256_add_epi16(step2[0], kOne); step2[1] = _mm256_add_epi16(step2[1], kOne); step2[2] = _mm256_add_epi16(step2[2], kOne); step2[3] = _mm256_add_epi16(step2[3], kOne); step2[4] = _mm256_add_epi16(step2[4], kOne); step2[5] = _mm256_add_epi16(step2[5], kOne); step2[6] = _mm256_add_epi16(step2[6], kOne); step2[7] = _mm256_add_epi16(step2[7], kOne); step2[8] = _mm256_add_epi16(step2[8], kOne); step2[9] = _mm256_add_epi16(step2[9], kOne); step2[10] = _mm256_add_epi16(step2[10], kOne); step2[11] = _mm256_add_epi16(step2[11], kOne); step2[12] = _mm256_add_epi16(step2[12], kOne); step2[13] = _mm256_add_epi16(step2[13], kOne); step2[14] = _mm256_add_epi16(step2[14], kOne); step2[15] = _mm256_add_epi16(step2[15], kOne); step1[16] = _mm256_add_epi16(step1[16], kOne); step1[17] = _mm256_add_epi16(step1[17], kOne); step1[18] = _mm256_add_epi16(step1[18], kOne); step1[19] = _mm256_add_epi16(step1[19], kOne); step2[20] = _mm256_add_epi16(step2[20], kOne); step2[21] = _mm256_add_epi16(step2[21], kOne); step2[22] = _mm256_add_epi16(step2[22], kOne); step2[23] = _mm256_add_epi16(step2[23], kOne); step2[24] = _mm256_add_epi16(step2[24], kOne); step2[25] = _mm256_add_epi16(step2[25], kOne); step2[26] = _mm256_add_epi16(step2[26], kOne); step2[27] = _mm256_add_epi16(step2[27], kOne); step1[28] = _mm256_add_epi16(step1[28], kOne); step1[29] = _mm256_add_epi16(step1[29], kOne); step1[30] = _mm256_add_epi16(step1[30], kOne); step1[31] = _mm256_add_epi16(step1[31], kOne); step2[0] = _mm256_srai_epi16(step2[0], 2); step2[1] = _mm256_srai_epi16(step2[1], 2); step2[2] = _mm256_srai_epi16(step2[2], 2); step2[3] = _mm256_srai_epi16(step2[3], 2); step2[4] = _mm256_srai_epi16(step2[4], 2); step2[5] = _mm256_srai_epi16(step2[5], 2); step2[6] = _mm256_srai_epi16(step2[6], 2); step2[7] = _mm256_srai_epi16(step2[7], 2); step2[8] = _mm256_srai_epi16(step2[8], 2); step2[9] = _mm256_srai_epi16(step2[9], 2); step2[10] = _mm256_srai_epi16(step2[10], 2); step2[11] = _mm256_srai_epi16(step2[11], 2); step2[12] = _mm256_srai_epi16(step2[12], 2); step2[13] = _mm256_srai_epi16(step2[13], 2); step2[14] = _mm256_srai_epi16(step2[14], 2); step2[15] = _mm256_srai_epi16(step2[15], 2); step1[16] = _mm256_srai_epi16(step1[16], 2); step1[17] = _mm256_srai_epi16(step1[17], 2); step1[18] = _mm256_srai_epi16(step1[18], 2); step1[19] = _mm256_srai_epi16(step1[19], 2); step2[20] = _mm256_srai_epi16(step2[20], 2); step2[21] = _mm256_srai_epi16(step2[21], 2); step2[22] = _mm256_srai_epi16(step2[22], 2); step2[23] = _mm256_srai_epi16(step2[23], 2); step2[24] = _mm256_srai_epi16(step2[24], 2); step2[25] = _mm256_srai_epi16(step2[25], 2); step2[26] = _mm256_srai_epi16(step2[26], 2); step2[27] = _mm256_srai_epi16(step2[27], 2); step1[28] = _mm256_srai_epi16(step1[28], 2); step1[29] = _mm256_srai_epi16(step1[29], 2); step1[30] = _mm256_srai_epi16(step1[30], 2); step1[31] = _mm256_srai_epi16(step1[31], 2); } #endif #if FDCT32x32_HIGH_PRECISION if (pass == 0) { #endif // Stage 3 { step3[0] = _mm256_add_epi16(step2[(8 - 1)], step2[0]); step3[1] = _mm256_add_epi16(step2[(8 - 2)], step2[1]); step3[2] = _mm256_add_epi16(step2[(8 - 3)], step2[2]); step3[3] = _mm256_add_epi16(step2[(8 - 4)], step2[3]); step3[4] = _mm256_sub_epi16(step2[(8 - 5)], step2[4]); step3[5] = _mm256_sub_epi16(step2[(8 - 6)], step2[5]); step3[6] = _mm256_sub_epi16(step2[(8 - 7)], step2[6]); step3[7] = _mm256_sub_epi16(step2[(8 - 8)], step2[7]); } { const __m256i s3_10_0 = _mm256_unpacklo_epi16(step2[13], step2[10]); const __m256i s3_10_1 = _mm256_unpackhi_epi16(step2[13], step2[10]); const __m256i s3_11_0 = _mm256_unpacklo_epi16(step2[12], step2[11]); const __m256i s3_11_1 = _mm256_unpackhi_epi16(step2[12], step2[11]); const __m256i s3_10_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_m16); const __m256i s3_10_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_m16); const __m256i s3_11_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_m16); const __m256i s3_11_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_m16); const __m256i s3_12_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_p16); const __m256i s3_12_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_p16); const __m256i s3_13_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_p16); const __m256i s3_13_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_p16); // dct_const_round_shift const __m256i s3_10_4 = _mm256_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); const __m256i s3_10_5 = _mm256_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); const __m256i s3_11_4 = _mm256_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); const __m256i s3_11_5 = _mm256_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); const __m256i s3_12_4 = _mm256_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); const __m256i s3_12_5 = _mm256_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); const __m256i s3_13_4 = _mm256_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); const __m256i s3_13_5 = _mm256_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); const __m256i s3_10_6 = _mm256_srai_epi32(s3_10_4, DCT_CONST_BITS); const __m256i s3_10_7 = _mm256_srai_epi32(s3_10_5, DCT_CONST_BITS); const __m256i s3_11_6 = _mm256_srai_epi32(s3_11_4, DCT_CONST_BITS); const __m256i s3_11_7 = _mm256_srai_epi32(s3_11_5, DCT_CONST_BITS); const __m256i s3_12_6 = _mm256_srai_epi32(s3_12_4, DCT_CONST_BITS); const __m256i s3_12_7 = _mm256_srai_epi32(s3_12_5, DCT_CONST_BITS); const __m256i s3_13_6 = _mm256_srai_epi32(s3_13_4, DCT_CONST_BITS); const __m256i s3_13_7 = _mm256_srai_epi32(s3_13_5, DCT_CONST_BITS); // Combine step3[10] = _mm256_packs_epi32(s3_10_6, s3_10_7); step3[11] = _mm256_packs_epi32(s3_11_6, s3_11_7); step3[12] = _mm256_packs_epi32(s3_12_6, s3_12_7); step3[13] = _mm256_packs_epi32(s3_13_6, s3_13_7); } { step3[16] = _mm256_add_epi16(step2[23], step1[16]); step3[17] = _mm256_add_epi16(step2[22], step1[17]); step3[18] = _mm256_add_epi16(step2[21], step1[18]); step3[19] = _mm256_add_epi16(step2[20], step1[19]); step3[20] = _mm256_sub_epi16(step1[19], step2[20]); step3[21] = _mm256_sub_epi16(step1[18], step2[21]); step3[22] = _mm256_sub_epi16(step1[17], step2[22]); step3[23] = _mm256_sub_epi16(step1[16], step2[23]); step3[24] = _mm256_sub_epi16(step1[31], step2[24]); step3[25] = _mm256_sub_epi16(step1[30], step2[25]); step3[26] = _mm256_sub_epi16(step1[29], step2[26]); step3[27] = _mm256_sub_epi16(step1[28], step2[27]); step3[28] = _mm256_add_epi16(step2[27], step1[28]); step3[29] = _mm256_add_epi16(step2[26], step1[29]); step3[30] = _mm256_add_epi16(step2[25], step1[30]); step3[31] = _mm256_add_epi16(step2[24], step1[31]); } // Stage 4 { step1[0] = _mm256_add_epi16(step3[3], step3[0]); step1[1] = _mm256_add_epi16(step3[2], step3[1]); step1[2] = _mm256_sub_epi16(step3[1], step3[2]); step1[3] = _mm256_sub_epi16(step3[0], step3[3]); step1[8] = _mm256_add_epi16(step3[11], step2[8]); step1[9] = _mm256_add_epi16(step3[10], step2[9]); step1[10] = _mm256_sub_epi16(step2[9], step3[10]); step1[11] = _mm256_sub_epi16(step2[8], step3[11]); step1[12] = _mm256_sub_epi16(step2[15], step3[12]); step1[13] = _mm256_sub_epi16(step2[14], step3[13]); step1[14] = _mm256_add_epi16(step3[13], step2[14]); step1[15] = _mm256_add_epi16(step3[12], step2[15]); } { const __m256i s1_05_0 = _mm256_unpacklo_epi16(step3[6], step3[5]); const __m256i s1_05_1 = _mm256_unpackhi_epi16(step3[6], step3[5]); const __m256i s1_05_2 = _mm256_madd_epi16(s1_05_0, k__cospi_p16_m16); const __m256i s1_05_3 = _mm256_madd_epi16(s1_05_1, k__cospi_p16_m16); const __m256i s1_06_2 = _mm256_madd_epi16(s1_05_0, k__cospi_p16_p16); const __m256i s1_06_3 = _mm256_madd_epi16(s1_05_1, k__cospi_p16_p16); // dct_const_round_shift const __m256i s1_05_4 = _mm256_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING); const __m256i s1_05_5 = _mm256_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING); const __m256i s1_06_4 = _mm256_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING); const __m256i s1_06_5 = _mm256_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING); const __m256i s1_05_6 = _mm256_srai_epi32(s1_05_4, DCT_CONST_BITS); const __m256i s1_05_7 = _mm256_srai_epi32(s1_05_5, DCT_CONST_BITS); const __m256i s1_06_6 = _mm256_srai_epi32(s1_06_4, DCT_CONST_BITS); const __m256i s1_06_7 = _mm256_srai_epi32(s1_06_5, DCT_CONST_BITS); // Combine step1[5] = _mm256_packs_epi32(s1_05_6, s1_05_7); step1[6] = _mm256_packs_epi32(s1_06_6, s1_06_7); } { const __m256i s1_18_0 = _mm256_unpacklo_epi16(step3[18], step3[29]); const __m256i s1_18_1 = _mm256_unpackhi_epi16(step3[18], step3[29]); const __m256i s1_19_0 = _mm256_unpacklo_epi16(step3[19], step3[28]); const __m256i s1_19_1 = _mm256_unpackhi_epi16(step3[19], step3[28]); const __m256i s1_20_0 = _mm256_unpacklo_epi16(step3[20], step3[27]); const __m256i s1_20_1 = _mm256_unpackhi_epi16(step3[20], step3[27]); const __m256i s1_21_0 = _mm256_unpacklo_epi16(step3[21], step3[26]); const __m256i s1_21_1 = _mm256_unpackhi_epi16(step3[21], step3[26]); const __m256i s1_18_2 = _mm256_madd_epi16(s1_18_0, k__cospi_m08_p24); const __m256i s1_18_3 = _mm256_madd_epi16(s1_18_1, k__cospi_m08_p24); const __m256i s1_19_2 = _mm256_madd_epi16(s1_19_0, k__cospi_m08_p24); const __m256i s1_19_3 = _mm256_madd_epi16(s1_19_1, k__cospi_m08_p24); const __m256i s1_20_2 = _mm256_madd_epi16(s1_20_0, k__cospi_m24_m08); const __m256i s1_20_3 = _mm256_madd_epi16(s1_20_1, k__cospi_m24_m08); const __m256i s1_21_2 = _mm256_madd_epi16(s1_21_0, k__cospi_m24_m08); const __m256i s1_21_3 = _mm256_madd_epi16(s1_21_1, k__cospi_m24_m08); const __m256i s1_26_2 = _mm256_madd_epi16(s1_21_0, k__cospi_m08_p24); const __m256i s1_26_3 = _mm256_madd_epi16(s1_21_1, k__cospi_m08_p24); const __m256i s1_27_2 = _mm256_madd_epi16(s1_20_0, k__cospi_m08_p24); const __m256i s1_27_3 = _mm256_madd_epi16(s1_20_1, k__cospi_m08_p24); const __m256i s1_28_2 = _mm256_madd_epi16(s1_19_0, k__cospi_p24_p08); const __m256i s1_28_3 = _mm256_madd_epi16(s1_19_1, k__cospi_p24_p08); const __m256i s1_29_2 = _mm256_madd_epi16(s1_18_0, k__cospi_p24_p08); const __m256i s1_29_3 = _mm256_madd_epi16(s1_18_1, k__cospi_p24_p08); // dct_const_round_shift const __m256i s1_18_4 = _mm256_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING); const __m256i s1_18_5 = _mm256_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING); const __m256i s1_19_4 = _mm256_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING); const __m256i s1_19_5 = _mm256_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING); const __m256i s1_20_4 = _mm256_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING); const __m256i s1_20_5 = _mm256_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING); const __m256i s1_21_4 = _mm256_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING); const __m256i s1_21_5 = _mm256_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING); const __m256i s1_26_4 = _mm256_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING); const __m256i s1_26_5 = _mm256_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING); const __m256i s1_27_4 = _mm256_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING); const __m256i s1_27_5 = _mm256_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING); const __m256i s1_28_4 = _mm256_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING); const __m256i s1_28_5 = _mm256_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING); const __m256i s1_29_4 = _mm256_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING); const __m256i s1_29_5 = _mm256_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING); const __m256i s1_18_6 = _mm256_srai_epi32(s1_18_4, DCT_CONST_BITS); const __m256i s1_18_7 = _mm256_srai_epi32(s1_18_5, DCT_CONST_BITS); const __m256i s1_19_6 = _mm256_srai_epi32(s1_19_4, DCT_CONST_BITS); const __m256i s1_19_7 = _mm256_srai_epi32(s1_19_5, DCT_CONST_BITS); const __m256i s1_20_6 = _mm256_srai_epi32(s1_20_4, DCT_CONST_BITS); const __m256i s1_20_7 = _mm256_srai_epi32(s1_20_5, DCT_CONST_BITS); const __m256i s1_21_6 = _mm256_srai_epi32(s1_21_4, DCT_CONST_BITS); const __m256i s1_21_7 = _mm256_srai_epi32(s1_21_5, DCT_CONST_BITS); const __m256i s1_26_6 = _mm256_srai_epi32(s1_26_4, DCT_CONST_BITS); const __m256i s1_26_7 = _mm256_srai_epi32(s1_26_5, DCT_CONST_BITS); const __m256i s1_27_6 = _mm256_srai_epi32(s1_27_4, DCT_CONST_BITS); const __m256i s1_27_7 = _mm256_srai_epi32(s1_27_5, DCT_CONST_BITS); const __m256i s1_28_6 = _mm256_srai_epi32(s1_28_4, DCT_CONST_BITS); const __m256i s1_28_7 = _mm256_srai_epi32(s1_28_5, DCT_CONST_BITS); const __m256i s1_29_6 = _mm256_srai_epi32(s1_29_4, DCT_CONST_BITS); const __m256i s1_29_7 = _mm256_srai_epi32(s1_29_5, DCT_CONST_BITS); // Combine step1[18] = _mm256_packs_epi32(s1_18_6, s1_18_7); step1[19] = _mm256_packs_epi32(s1_19_6, s1_19_7); step1[20] = _mm256_packs_epi32(s1_20_6, s1_20_7); step1[21] = _mm256_packs_epi32(s1_21_6, s1_21_7); step1[26] = _mm256_packs_epi32(s1_26_6, s1_26_7); step1[27] = _mm256_packs_epi32(s1_27_6, s1_27_7); step1[28] = _mm256_packs_epi32(s1_28_6, s1_28_7); step1[29] = _mm256_packs_epi32(s1_29_6, s1_29_7); } // Stage 5 { step2[4] = _mm256_add_epi16(step1[5], step3[4]); step2[5] = _mm256_sub_epi16(step3[4], step1[5]); step2[6] = _mm256_sub_epi16(step3[7], step1[6]); step2[7] = _mm256_add_epi16(step1[6], step3[7]); } { const __m256i out_00_0 = _mm256_unpacklo_epi16(step1[0], step1[1]); const __m256i out_00_1 = _mm256_unpackhi_epi16(step1[0], step1[1]); const __m256i out_08_0 = _mm256_unpacklo_epi16(step1[2], step1[3]); const __m256i out_08_1 = _mm256_unpackhi_epi16(step1[2], step1[3]); const __m256i out_00_2 = _mm256_madd_epi16(out_00_0, k__cospi_p16_p16); const __m256i out_00_3 = _mm256_madd_epi16(out_00_1, k__cospi_p16_p16); const __m256i out_16_2 = _mm256_madd_epi16(out_00_0, k__cospi_p16_m16); const __m256i out_16_3 = _mm256_madd_epi16(out_00_1, k__cospi_p16_m16); const __m256i out_08_2 = _mm256_madd_epi16(out_08_0, k__cospi_p24_p08); const __m256i out_08_3 = _mm256_madd_epi16(out_08_1, k__cospi_p24_p08); const __m256i out_24_2 = _mm256_madd_epi16(out_08_0, k__cospi_m08_p24); const __m256i out_24_3 = _mm256_madd_epi16(out_08_1, k__cospi_m08_p24); // dct_const_round_shift const __m256i out_00_4 = _mm256_add_epi32(out_00_2, k__DCT_CONST_ROUNDING); const __m256i out_00_5 = _mm256_add_epi32(out_00_3, k__DCT_CONST_ROUNDING); const __m256i out_16_4 = _mm256_add_epi32(out_16_2, k__DCT_CONST_ROUNDING); const __m256i out_16_5 = _mm256_add_epi32(out_16_3, k__DCT_CONST_ROUNDING); const __m256i out_08_4 = _mm256_add_epi32(out_08_2, k__DCT_CONST_ROUNDING); const __m256i out_08_5 = _mm256_add_epi32(out_08_3, k__DCT_CONST_ROUNDING); const __m256i out_24_4 = _mm256_add_epi32(out_24_2, k__DCT_CONST_ROUNDING); const __m256i out_24_5 = _mm256_add_epi32(out_24_3, k__DCT_CONST_ROUNDING); const __m256i out_00_6 = _mm256_srai_epi32(out_00_4, DCT_CONST_BITS); const __m256i out_00_7 = _mm256_srai_epi32(out_00_5, DCT_CONST_BITS); const __m256i out_16_6 = _mm256_srai_epi32(out_16_4, DCT_CONST_BITS); const __m256i out_16_7 = _mm256_srai_epi32(out_16_5, DCT_CONST_BITS); const __m256i out_08_6 = _mm256_srai_epi32(out_08_4, DCT_CONST_BITS); const __m256i out_08_7 = _mm256_srai_epi32(out_08_5, DCT_CONST_BITS); const __m256i out_24_6 = _mm256_srai_epi32(out_24_4, DCT_CONST_BITS); const __m256i out_24_7 = _mm256_srai_epi32(out_24_5, DCT_CONST_BITS); // Combine out[0] = _mm256_packs_epi32(out_00_6, out_00_7); out[16] = _mm256_packs_epi32(out_16_6, out_16_7); out[8] = _mm256_packs_epi32(out_08_6, out_08_7); out[24] = _mm256_packs_epi32(out_24_6, out_24_7); } { const __m256i s2_09_0 = _mm256_unpacklo_epi16(step1[9], step1[14]); const __m256i s2_09_1 = _mm256_unpackhi_epi16(step1[9], step1[14]); const __m256i s2_10_0 = _mm256_unpacklo_epi16(step1[10], step1[13]); const __m256i s2_10_1 = _mm256_unpackhi_epi16(step1[10], step1[13]); const __m256i s2_09_2 = _mm256_madd_epi16(s2_09_0, k__cospi_m08_p24); const __m256i s2_09_3 = _mm256_madd_epi16(s2_09_1, k__cospi_m08_p24); const __m256i s2_10_2 = _mm256_madd_epi16(s2_10_0, k__cospi_m24_m08); const __m256i s2_10_3 = _mm256_madd_epi16(s2_10_1, k__cospi_m24_m08); const __m256i s2_13_2 = _mm256_madd_epi16(s2_10_0, k__cospi_m08_p24); const __m256i s2_13_3 = _mm256_madd_epi16(s2_10_1, k__cospi_m08_p24); const __m256i s2_14_2 = _mm256_madd_epi16(s2_09_0, k__cospi_p24_p08); const __m256i s2_14_3 = _mm256_madd_epi16(s2_09_1, k__cospi_p24_p08); // dct_const_round_shift const __m256i s2_09_4 = _mm256_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING); const __m256i s2_09_5 = _mm256_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING); const __m256i s2_10_4 = _mm256_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING); const __m256i s2_10_5 = _mm256_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING); const __m256i s2_13_4 = _mm256_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING); const __m256i s2_13_5 = _mm256_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING); const __m256i s2_14_4 = _mm256_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING); const __m256i s2_14_5 = _mm256_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING); const __m256i s2_09_6 = _mm256_srai_epi32(s2_09_4, DCT_CONST_BITS); const __m256i s2_09_7 = _mm256_srai_epi32(s2_09_5, DCT_CONST_BITS); const __m256i s2_10_6 = _mm256_srai_epi32(s2_10_4, DCT_CONST_BITS); const __m256i s2_10_7 = _mm256_srai_epi32(s2_10_5, DCT_CONST_BITS); const __m256i s2_13_6 = _mm256_srai_epi32(s2_13_4, DCT_CONST_BITS); const __m256i s2_13_7 = _mm256_srai_epi32(s2_13_5, DCT_CONST_BITS); const __m256i s2_14_6 = _mm256_srai_epi32(s2_14_4, DCT_CONST_BITS); const __m256i s2_14_7 = _mm256_srai_epi32(s2_14_5, DCT_CONST_BITS); // Combine step2[9] = _mm256_packs_epi32(s2_09_6, s2_09_7); step2[10] = _mm256_packs_epi32(s2_10_6, s2_10_7); step2[13] = _mm256_packs_epi32(s2_13_6, s2_13_7); step2[14] = _mm256_packs_epi32(s2_14_6, s2_14_7); } { step2[16] = _mm256_add_epi16(step1[19], step3[16]); step2[17] = _mm256_add_epi16(step1[18], step3[17]); step2[18] = _mm256_sub_epi16(step3[17], step1[18]); step2[19] = _mm256_sub_epi16(step3[16], step1[19]); step2[20] = _mm256_sub_epi16(step3[23], step1[20]); step2[21] = _mm256_sub_epi16(step3[22], step1[21]); step2[22] = _mm256_add_epi16(step1[21], step3[22]); step2[23] = _mm256_add_epi16(step1[20], step3[23]); step2[24] = _mm256_add_epi16(step1[27], step3[24]); step2[25] = _mm256_add_epi16(step1[26], step3[25]); step2[26] = _mm256_sub_epi16(step3[25], step1[26]); step2[27] = _mm256_sub_epi16(step3[24], step1[27]); step2[28] = _mm256_sub_epi16(step3[31], step1[28]); step2[29] = _mm256_sub_epi16(step3[30], step1[29]); step2[30] = _mm256_add_epi16(step1[29], step3[30]); step2[31] = _mm256_add_epi16(step1[28], step3[31]); } // Stage 6 { const __m256i out_04_0 = _mm256_unpacklo_epi16(step2[4], step2[7]); const __m256i out_04_1 = _mm256_unpackhi_epi16(step2[4], step2[7]); const __m256i out_20_0 = _mm256_unpacklo_epi16(step2[5], step2[6]); const __m256i out_20_1 = _mm256_unpackhi_epi16(step2[5], step2[6]); const __m256i out_12_0 = _mm256_unpacklo_epi16(step2[5], step2[6]); const __m256i out_12_1 = _mm256_unpackhi_epi16(step2[5], step2[6]); const __m256i out_28_0 = _mm256_unpacklo_epi16(step2[4], step2[7]); const __m256i out_28_1 = _mm256_unpackhi_epi16(step2[4], step2[7]); const __m256i out_04_2 = _mm256_madd_epi16(out_04_0, k__cospi_p28_p04); const __m256i out_04_3 = _mm256_madd_epi16(out_04_1, k__cospi_p28_p04); const __m256i out_20_2 = _mm256_madd_epi16(out_20_0, k__cospi_p12_p20); const __m256i out_20_3 = _mm256_madd_epi16(out_20_1, k__cospi_p12_p20); const __m256i out_12_2 = _mm256_madd_epi16(out_12_0, k__cospi_m20_p12); const __m256i out_12_3 = _mm256_madd_epi16(out_12_1, k__cospi_m20_p12); const __m256i out_28_2 = _mm256_madd_epi16(out_28_0, k__cospi_m04_p28); const __m256i out_28_3 = _mm256_madd_epi16(out_28_1, k__cospi_m04_p28); // dct_const_round_shift const __m256i out_04_4 = _mm256_add_epi32(out_04_2, k__DCT_CONST_ROUNDING); const __m256i out_04_5 = _mm256_add_epi32(out_04_3, k__DCT_CONST_ROUNDING); const __m256i out_20_4 = _mm256_add_epi32(out_20_2, k__DCT_CONST_ROUNDING); const __m256i out_20_5 = _mm256_add_epi32(out_20_3, k__DCT_CONST_ROUNDING); const __m256i out_12_4 = _mm256_add_epi32(out_12_2, k__DCT_CONST_ROUNDING); const __m256i out_12_5 = _mm256_add_epi32(out_12_3, k__DCT_CONST_ROUNDING); const __m256i out_28_4 = _mm256_add_epi32(out_28_2, k__DCT_CONST_ROUNDING); const __m256i out_28_5 = _mm256_add_epi32(out_28_3, k__DCT_CONST_ROUNDING); const __m256i out_04_6 = _mm256_srai_epi32(out_04_4, DCT_CONST_BITS); const __m256i out_04_7 = _mm256_srai_epi32(out_04_5, DCT_CONST_BITS); const __m256i out_20_6 = _mm256_srai_epi32(out_20_4, DCT_CONST_BITS); const __m256i out_20_7 = _mm256_srai_epi32(out_20_5, DCT_CONST_BITS); const __m256i out_12_6 = _mm256_srai_epi32(out_12_4, DCT_CONST_BITS); const __m256i out_12_7 = _mm256_srai_epi32(out_12_5, DCT_CONST_BITS); const __m256i out_28_6 = _mm256_srai_epi32(out_28_4, DCT_CONST_BITS); const __m256i out_28_7 = _mm256_srai_epi32(out_28_5, DCT_CONST_BITS); // Combine out[4] = _mm256_packs_epi32(out_04_6, out_04_7); out[20] = _mm256_packs_epi32(out_20_6, out_20_7); out[12] = _mm256_packs_epi32(out_12_6, out_12_7); out[28] = _mm256_packs_epi32(out_28_6, out_28_7); } { step3[8] = _mm256_add_epi16(step2[9], step1[8]); step3[9] = _mm256_sub_epi16(step1[8], step2[9]); step3[10] = _mm256_sub_epi16(step1[11], step2[10]); step3[11] = _mm256_add_epi16(step2[10], step1[11]); step3[12] = _mm256_add_epi16(step2[13], step1[12]); step3[13] = _mm256_sub_epi16(step1[12], step2[13]); step3[14] = _mm256_sub_epi16(step1[15], step2[14]); step3[15] = _mm256_add_epi16(step2[14], step1[15]); } { const __m256i s3_17_0 = _mm256_unpacklo_epi16(step2[17], step2[30]); const __m256i s3_17_1 = _mm256_unpackhi_epi16(step2[17], step2[30]); const __m256i s3_18_0 = _mm256_unpacklo_epi16(step2[18], step2[29]); const __m256i s3_18_1 = _mm256_unpackhi_epi16(step2[18], step2[29]); const __m256i s3_21_0 = _mm256_unpacklo_epi16(step2[21], step2[26]); const __m256i s3_21_1 = _mm256_unpackhi_epi16(step2[21], step2[26]); const __m256i s3_22_0 = _mm256_unpacklo_epi16(step2[22], step2[25]); const __m256i s3_22_1 = _mm256_unpackhi_epi16(step2[22], step2[25]); const __m256i s3_17_2 = _mm256_madd_epi16(s3_17_0, k__cospi_m04_p28); const __m256i s3_17_3 = _mm256_madd_epi16(s3_17_1, k__cospi_m04_p28); const __m256i s3_18_2 = _mm256_madd_epi16(s3_18_0, k__cospi_m28_m04); const __m256i s3_18_3 = _mm256_madd_epi16(s3_18_1, k__cospi_m28_m04); const __m256i s3_21_2 = _mm256_madd_epi16(s3_21_0, k__cospi_m20_p12); const __m256i s3_21_3 = _mm256_madd_epi16(s3_21_1, k__cospi_m20_p12); const __m256i s3_22_2 = _mm256_madd_epi16(s3_22_0, k__cospi_m12_m20); const __m256i s3_22_3 = _mm256_madd_epi16(s3_22_1, k__cospi_m12_m20); const __m256i s3_25_2 = _mm256_madd_epi16(s3_22_0, k__cospi_m20_p12); const __m256i s3_25_3 = _mm256_madd_epi16(s3_22_1, k__cospi_m20_p12); const __m256i s3_26_2 = _mm256_madd_epi16(s3_21_0, k__cospi_p12_p20); const __m256i s3_26_3 = _mm256_madd_epi16(s3_21_1, k__cospi_p12_p20); const __m256i s3_29_2 = _mm256_madd_epi16(s3_18_0, k__cospi_m04_p28); const __m256i s3_29_3 = _mm256_madd_epi16(s3_18_1, k__cospi_m04_p28); const __m256i s3_30_2 = _mm256_madd_epi16(s3_17_0, k__cospi_p28_p04); const __m256i s3_30_3 = _mm256_madd_epi16(s3_17_1, k__cospi_p28_p04); // dct_const_round_shift const __m256i s3_17_4 = _mm256_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING); const __m256i s3_17_5 = _mm256_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING); const __m256i s3_18_4 = _mm256_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING); const __m256i s3_18_5 = _mm256_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING); const __m256i s3_21_4 = _mm256_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING); const __m256i s3_21_5 = _mm256_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING); const __m256i s3_22_4 = _mm256_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING); const __m256i s3_22_5 = _mm256_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING); const __m256i s3_17_6 = _mm256_srai_epi32(s3_17_4, DCT_CONST_BITS); const __m256i s3_17_7 = _mm256_srai_epi32(s3_17_5, DCT_CONST_BITS); const __m256i s3_18_6 = _mm256_srai_epi32(s3_18_4, DCT_CONST_BITS); const __m256i s3_18_7 = _mm256_srai_epi32(s3_18_5, DCT_CONST_BITS); const __m256i s3_21_6 = _mm256_srai_epi32(s3_21_4, DCT_CONST_BITS); const __m256i s3_21_7 = _mm256_srai_epi32(s3_21_5, DCT_CONST_BITS); const __m256i s3_22_6 = _mm256_srai_epi32(s3_22_4, DCT_CONST_BITS); const __m256i s3_22_7 = _mm256_srai_epi32(s3_22_5, DCT_CONST_BITS); const __m256i s3_25_4 = _mm256_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING); const __m256i s3_25_5 = _mm256_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING); const __m256i s3_26_4 = _mm256_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING); const __m256i s3_26_5 = _mm256_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING); const __m256i s3_29_4 = _mm256_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING); const __m256i s3_29_5 = _mm256_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING); const __m256i s3_30_4 = _mm256_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING); const __m256i s3_30_5 = _mm256_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING); const __m256i s3_25_6 = _mm256_srai_epi32(s3_25_4, DCT_CONST_BITS); const __m256i s3_25_7 = _mm256_srai_epi32(s3_25_5, DCT_CONST_BITS); const __m256i s3_26_6 = _mm256_srai_epi32(s3_26_4, DCT_CONST_BITS); const __m256i s3_26_7 = _mm256_srai_epi32(s3_26_5, DCT_CONST_BITS); const __m256i s3_29_6 = _mm256_srai_epi32(s3_29_4, DCT_CONST_BITS); const __m256i s3_29_7 = _mm256_srai_epi32(s3_29_5, DCT_CONST_BITS); const __m256i s3_30_6 = _mm256_srai_epi32(s3_30_4, DCT_CONST_BITS); const __m256i s3_30_7 = _mm256_srai_epi32(s3_30_5, DCT_CONST_BITS); // Combine step3[17] = _mm256_packs_epi32(s3_17_6, s3_17_7); step3[18] = _mm256_packs_epi32(s3_18_6, s3_18_7); step3[21] = _mm256_packs_epi32(s3_21_6, s3_21_7); step3[22] = _mm256_packs_epi32(s3_22_6, s3_22_7); // Combine step3[25] = _mm256_packs_epi32(s3_25_6, s3_25_7); step3[26] = _mm256_packs_epi32(s3_26_6, s3_26_7); step3[29] = _mm256_packs_epi32(s3_29_6, s3_29_7); step3[30] = _mm256_packs_epi32(s3_30_6, s3_30_7); } // Stage 7 { const __m256i out_02_0 = _mm256_unpacklo_epi16(step3[8], step3[15]); const __m256i out_02_1 = _mm256_unpackhi_epi16(step3[8], step3[15]); const __m256i out_18_0 = _mm256_unpacklo_epi16(step3[9], step3[14]); const __m256i out_18_1 = _mm256_unpackhi_epi16(step3[9], step3[14]); const __m256i out_10_0 = _mm256_unpacklo_epi16(step3[10], step3[13]); const __m256i out_10_1 = _mm256_unpackhi_epi16(step3[10], step3[13]); const __m256i out_26_0 = _mm256_unpacklo_epi16(step3[11], step3[12]); const __m256i out_26_1 = _mm256_unpackhi_epi16(step3[11], step3[12]); const __m256i out_02_2 = _mm256_madd_epi16(out_02_0, k__cospi_p30_p02); const __m256i out_02_3 = _mm256_madd_epi16(out_02_1, k__cospi_p30_p02); const __m256i out_18_2 = _mm256_madd_epi16(out_18_0, k__cospi_p14_p18); const __m256i out_18_3 = _mm256_madd_epi16(out_18_1, k__cospi_p14_p18); const __m256i out_10_2 = _mm256_madd_epi16(out_10_0, k__cospi_p22_p10); const __m256i out_10_3 = _mm256_madd_epi16(out_10_1, k__cospi_p22_p10); const __m256i out_26_2 = _mm256_madd_epi16(out_26_0, k__cospi_p06_p26); const __m256i out_26_3 = _mm256_madd_epi16(out_26_1, k__cospi_p06_p26); const __m256i out_06_2 = _mm256_madd_epi16(out_26_0, k__cospi_m26_p06); const __m256i out_06_3 = _mm256_madd_epi16(out_26_1, k__cospi_m26_p06); const __m256i out_22_2 = _mm256_madd_epi16(out_10_0, k__cospi_m10_p22); const __m256i out_22_3 = _mm256_madd_epi16(out_10_1, k__cospi_m10_p22); const __m256i out_14_2 = _mm256_madd_epi16(out_18_0, k__cospi_m18_p14); const __m256i out_14_3 = _mm256_madd_epi16(out_18_1, k__cospi_m18_p14); const __m256i out_30_2 = _mm256_madd_epi16(out_02_0, k__cospi_m02_p30); const __m256i out_30_3 = _mm256_madd_epi16(out_02_1, k__cospi_m02_p30); // dct_const_round_shift const __m256i out_02_4 = _mm256_add_epi32(out_02_2, k__DCT_CONST_ROUNDING); const __m256i out_02_5 = _mm256_add_epi32(out_02_3, k__DCT_CONST_ROUNDING); const __m256i out_18_4 = _mm256_add_epi32(out_18_2, k__DCT_CONST_ROUNDING); const __m256i out_18_5 = _mm256_add_epi32(out_18_3, k__DCT_CONST_ROUNDING); const __m256i out_10_4 = _mm256_add_epi32(out_10_2, k__DCT_CONST_ROUNDING); const __m256i out_10_5 = _mm256_add_epi32(out_10_3, k__DCT_CONST_ROUNDING); const __m256i out_26_4 = _mm256_add_epi32(out_26_2, k__DCT_CONST_ROUNDING); const __m256i out_26_5 = _mm256_add_epi32(out_26_3, k__DCT_CONST_ROUNDING); const __m256i out_06_4 = _mm256_add_epi32(out_06_2, k__DCT_CONST_ROUNDING); const __m256i out_06_5 = _mm256_add_epi32(out_06_3, k__DCT_CONST_ROUNDING); const __m256i out_22_4 = _mm256_add_epi32(out_22_2, k__DCT_CONST_ROUNDING); const __m256i out_22_5 = _mm256_add_epi32(out_22_3, k__DCT_CONST_ROUNDING); const __m256i out_14_4 = _mm256_add_epi32(out_14_2, k__DCT_CONST_ROUNDING); const __m256i out_14_5 = _mm256_add_epi32(out_14_3, k__DCT_CONST_ROUNDING); const __m256i out_30_4 = _mm256_add_epi32(out_30_2, k__DCT_CONST_ROUNDING); const __m256i out_30_5 = _mm256_add_epi32(out_30_3, k__DCT_CONST_ROUNDING); const __m256i out_02_6 = _mm256_srai_epi32(out_02_4, DCT_CONST_BITS); const __m256i out_02_7 = _mm256_srai_epi32(out_02_5, DCT_CONST_BITS); const __m256i out_18_6 = _mm256_srai_epi32(out_18_4, DCT_CONST_BITS); const __m256i out_18_7 = _mm256_srai_epi32(out_18_5, DCT_CONST_BITS); const __m256i out_10_6 = _mm256_srai_epi32(out_10_4, DCT_CONST_BITS); const __m256i out_10_7 = _mm256_srai_epi32(out_10_5, DCT_CONST_BITS); const __m256i out_26_6 = _mm256_srai_epi32(out_26_4, DCT_CONST_BITS); const __m256i out_26_7 = _mm256_srai_epi32(out_26_5, DCT_CONST_BITS); const __m256i out_06_6 = _mm256_srai_epi32(out_06_4, DCT_CONST_BITS); const __m256i out_06_7 = _mm256_srai_epi32(out_06_5, DCT_CONST_BITS); const __m256i out_22_6 = _mm256_srai_epi32(out_22_4, DCT_CONST_BITS); const __m256i out_22_7 = _mm256_srai_epi32(out_22_5, DCT_CONST_BITS); const __m256i out_14_6 = _mm256_srai_epi32(out_14_4, DCT_CONST_BITS); const __m256i out_14_7 = _mm256_srai_epi32(out_14_5, DCT_CONST_BITS); const __m256i out_30_6 = _mm256_srai_epi32(out_30_4, DCT_CONST_BITS); const __m256i out_30_7 = _mm256_srai_epi32(out_30_5, DCT_CONST_BITS); // Combine out[2] = _mm256_packs_epi32(out_02_6, out_02_7); out[18] = _mm256_packs_epi32(out_18_6, out_18_7); out[10] = _mm256_packs_epi32(out_10_6, out_10_7); out[26] = _mm256_packs_epi32(out_26_6, out_26_7); out[6] = _mm256_packs_epi32(out_06_6, out_06_7); out[22] = _mm256_packs_epi32(out_22_6, out_22_7); out[14] = _mm256_packs_epi32(out_14_6, out_14_7); out[30] = _mm256_packs_epi32(out_30_6, out_30_7); } { step1[16] = _mm256_add_epi16(step3[17], step2[16]); step1[17] = _mm256_sub_epi16(step2[16], step3[17]); step1[18] = _mm256_sub_epi16(step2[19], step3[18]); step1[19] = _mm256_add_epi16(step3[18], step2[19]); step1[20] = _mm256_add_epi16(step3[21], step2[20]); step1[21] = _mm256_sub_epi16(step2[20], step3[21]); step1[22] = _mm256_sub_epi16(step2[23], step3[22]); step1[23] = _mm256_add_epi16(step3[22], step2[23]); step1[24] = _mm256_add_epi16(step3[25], step2[24]); step1[25] = _mm256_sub_epi16(step2[24], step3[25]); step1[26] = _mm256_sub_epi16(step2[27], step3[26]); step1[27] = _mm256_add_epi16(step3[26], step2[27]); step1[28] = _mm256_add_epi16(step3[29], step2[28]); step1[29] = _mm256_sub_epi16(step2[28], step3[29]); step1[30] = _mm256_sub_epi16(step2[31], step3[30]); step1[31] = _mm256_add_epi16(step3[30], step2[31]); } // Final stage --- outputs indices are bit-reversed. { const __m256i out_01_0 = _mm256_unpacklo_epi16(step1[16], step1[31]); const __m256i out_01_1 = _mm256_unpackhi_epi16(step1[16], step1[31]); const __m256i out_17_0 = _mm256_unpacklo_epi16(step1[17], step1[30]); const __m256i out_17_1 = _mm256_unpackhi_epi16(step1[17], step1[30]); const __m256i out_09_0 = _mm256_unpacklo_epi16(step1[18], step1[29]); const __m256i out_09_1 = _mm256_unpackhi_epi16(step1[18], step1[29]); const __m256i out_25_0 = _mm256_unpacklo_epi16(step1[19], step1[28]); const __m256i out_25_1 = _mm256_unpackhi_epi16(step1[19], step1[28]); const __m256i out_01_2 = _mm256_madd_epi16(out_01_0, k__cospi_p31_p01); const __m256i out_01_3 = _mm256_madd_epi16(out_01_1, k__cospi_p31_p01); const __m256i out_17_2 = _mm256_madd_epi16(out_17_0, k__cospi_p15_p17); const __m256i out_17_3 = _mm256_madd_epi16(out_17_1, k__cospi_p15_p17); const __m256i out_09_2 = _mm256_madd_epi16(out_09_0, k__cospi_p23_p09); const __m256i out_09_3 = _mm256_madd_epi16(out_09_1, k__cospi_p23_p09); const __m256i out_25_2 = _mm256_madd_epi16(out_25_0, k__cospi_p07_p25); const __m256i out_25_3 = _mm256_madd_epi16(out_25_1, k__cospi_p07_p25); const __m256i out_07_2 = _mm256_madd_epi16(out_25_0, k__cospi_m25_p07); const __m256i out_07_3 = _mm256_madd_epi16(out_25_1, k__cospi_m25_p07); const __m256i out_23_2 = _mm256_madd_epi16(out_09_0, k__cospi_m09_p23); const __m256i out_23_3 = _mm256_madd_epi16(out_09_1, k__cospi_m09_p23); const __m256i out_15_2 = _mm256_madd_epi16(out_17_0, k__cospi_m17_p15); const __m256i out_15_3 = _mm256_madd_epi16(out_17_1, k__cospi_m17_p15); const __m256i out_31_2 = _mm256_madd_epi16(out_01_0, k__cospi_m01_p31); const __m256i out_31_3 = _mm256_madd_epi16(out_01_1, k__cospi_m01_p31); // dct_const_round_shift const __m256i out_01_4 = _mm256_add_epi32(out_01_2, k__DCT_CONST_ROUNDING); const __m256i out_01_5 = _mm256_add_epi32(out_01_3, k__DCT_CONST_ROUNDING); const __m256i out_17_4 = _mm256_add_epi32(out_17_2, k__DCT_CONST_ROUNDING); const __m256i out_17_5 = _mm256_add_epi32(out_17_3, k__DCT_CONST_ROUNDING); const __m256i out_09_4 = _mm256_add_epi32(out_09_2, k__DCT_CONST_ROUNDING); const __m256i out_09_5 = _mm256_add_epi32(out_09_3, k__DCT_CONST_ROUNDING); const __m256i out_25_4 = _mm256_add_epi32(out_25_2, k__DCT_CONST_ROUNDING); const __m256i out_25_5 = _mm256_add_epi32(out_25_3, k__DCT_CONST_ROUNDING); const __m256i out_07_4 = _mm256_add_epi32(out_07_2, k__DCT_CONST_ROUNDING); const __m256i out_07_5 = _mm256_add_epi32(out_07_3, k__DCT_CONST_ROUNDING); const __m256i out_23_4 = _mm256_add_epi32(out_23_2, k__DCT_CONST_ROUNDING); const __m256i out_23_5 = _mm256_add_epi32(out_23_3, k__DCT_CONST_ROUNDING); const __m256i out_15_4 = _mm256_add_epi32(out_15_2, k__DCT_CONST_ROUNDING); const __m256i out_15_5 = _mm256_add_epi32(out_15_3, k__DCT_CONST_ROUNDING); const __m256i out_31_4 = _mm256_add_epi32(out_31_2, k__DCT_CONST_ROUNDING); const __m256i out_31_5 = _mm256_add_epi32(out_31_3, k__DCT_CONST_ROUNDING); const __m256i out_01_6 = _mm256_srai_epi32(out_01_4, DCT_CONST_BITS); const __m256i out_01_7 = _mm256_srai_epi32(out_01_5, DCT_CONST_BITS); const __m256i out_17_6 = _mm256_srai_epi32(out_17_4, DCT_CONST_BITS); const __m256i out_17_7 = _mm256_srai_epi32(out_17_5, DCT_CONST_BITS); const __m256i out_09_6 = _mm256_srai_epi32(out_09_4, DCT_CONST_BITS); const __m256i out_09_7 = _mm256_srai_epi32(out_09_5, DCT_CONST_BITS); const __m256i out_25_6 = _mm256_srai_epi32(out_25_4, DCT_CONST_BITS); const __m256i out_25_7 = _mm256_srai_epi32(out_25_5, DCT_CONST_BITS); const __m256i out_07_6 = _mm256_srai_epi32(out_07_4, DCT_CONST_BITS); const __m256i out_07_7 = _mm256_srai_epi32(out_07_5, DCT_CONST_BITS); const __m256i out_23_6 = _mm256_srai_epi32(out_23_4, DCT_CONST_BITS); const __m256i out_23_7 = _mm256_srai_epi32(out_23_5, DCT_CONST_BITS); const __m256i out_15_6 = _mm256_srai_epi32(out_15_4, DCT_CONST_BITS); const __m256i out_15_7 = _mm256_srai_epi32(out_15_5, DCT_CONST_BITS); const __m256i out_31_6 = _mm256_srai_epi32(out_31_4, DCT_CONST_BITS); const __m256i out_31_7 = _mm256_srai_epi32(out_31_5, DCT_CONST_BITS); // Combine out[1] = _mm256_packs_epi32(out_01_6, out_01_7); out[17] = _mm256_packs_epi32(out_17_6, out_17_7); out[9] = _mm256_packs_epi32(out_09_6, out_09_7); out[25] = _mm256_packs_epi32(out_25_6, out_25_7); out[7] = _mm256_packs_epi32(out_07_6, out_07_7); out[23] = _mm256_packs_epi32(out_23_6, out_23_7); out[15] = _mm256_packs_epi32(out_15_6, out_15_7); out[31] = _mm256_packs_epi32(out_31_6, out_31_7); } { const __m256i out_05_0 = _mm256_unpacklo_epi16(step1[20], step1[27]); const __m256i out_05_1 = _mm256_unpackhi_epi16(step1[20], step1[27]); const __m256i out_21_0 = _mm256_unpacklo_epi16(step1[21], step1[26]); const __m256i out_21_1 = _mm256_unpackhi_epi16(step1[21], step1[26]); const __m256i out_13_0 = _mm256_unpacklo_epi16(step1[22], step1[25]); const __m256i out_13_1 = _mm256_unpackhi_epi16(step1[22], step1[25]); const __m256i out_29_0 = _mm256_unpacklo_epi16(step1[23], step1[24]); const __m256i out_29_1 = _mm256_unpackhi_epi16(step1[23], step1[24]); const __m256i out_05_2 = _mm256_madd_epi16(out_05_0, k__cospi_p27_p05); const __m256i out_05_3 = _mm256_madd_epi16(out_05_1, k__cospi_p27_p05); const __m256i out_21_2 = _mm256_madd_epi16(out_21_0, k__cospi_p11_p21); const __m256i out_21_3 = _mm256_madd_epi16(out_21_1, k__cospi_p11_p21); const __m256i out_13_2 = _mm256_madd_epi16(out_13_0, k__cospi_p19_p13); const __m256i out_13_3 = _mm256_madd_epi16(out_13_1, k__cospi_p19_p13); const __m256i out_29_2 = _mm256_madd_epi16(out_29_0, k__cospi_p03_p29); const __m256i out_29_3 = _mm256_madd_epi16(out_29_1, k__cospi_p03_p29); const __m256i out_03_2 = _mm256_madd_epi16(out_29_0, k__cospi_m29_p03); const __m256i out_03_3 = _mm256_madd_epi16(out_29_1, k__cospi_m29_p03); const __m256i out_19_2 = _mm256_madd_epi16(out_13_0, k__cospi_m13_p19); const __m256i out_19_3 = _mm256_madd_epi16(out_13_1, k__cospi_m13_p19); const __m256i out_11_2 = _mm256_madd_epi16(out_21_0, k__cospi_m21_p11); const __m256i out_11_3 = _mm256_madd_epi16(out_21_1, k__cospi_m21_p11); const __m256i out_27_2 = _mm256_madd_epi16(out_05_0, k__cospi_m05_p27); const __m256i out_27_3 = _mm256_madd_epi16(out_05_1, k__cospi_m05_p27); // dct_const_round_shift const __m256i out_05_4 = _mm256_add_epi32(out_05_2, k__DCT_CONST_ROUNDING); const __m256i out_05_5 = _mm256_add_epi32(out_05_3, k__DCT_CONST_ROUNDING); const __m256i out_21_4 = _mm256_add_epi32(out_21_2, k__DCT_CONST_ROUNDING); const __m256i out_21_5 = _mm256_add_epi32(out_21_3, k__DCT_CONST_ROUNDING); const __m256i out_13_4 = _mm256_add_epi32(out_13_2, k__DCT_CONST_ROUNDING); const __m256i out_13_5 = _mm256_add_epi32(out_13_3, k__DCT_CONST_ROUNDING); const __m256i out_29_4 = _mm256_add_epi32(out_29_2, k__DCT_CONST_ROUNDING); const __m256i out_29_5 = _mm256_add_epi32(out_29_3, k__DCT_CONST_ROUNDING); const __m256i out_03_4 = _mm256_add_epi32(out_03_2, k__DCT_CONST_ROUNDING); const __m256i out_03_5 = _mm256_add_epi32(out_03_3, k__DCT_CONST_ROUNDING); const __m256i out_19_4 = _mm256_add_epi32(out_19_2, k__DCT_CONST_ROUNDING); const __m256i out_19_5 = _mm256_add_epi32(out_19_3, k__DCT_CONST_ROUNDING); const __m256i out_11_4 = _mm256_add_epi32(out_11_2, k__DCT_CONST_ROUNDING); const __m256i out_11_5 = _mm256_add_epi32(out_11_3, k__DCT_CONST_ROUNDING); const __m256i out_27_4 = _mm256_add_epi32(out_27_2, k__DCT_CONST_ROUNDING); const __m256i out_27_5 = _mm256_add_epi32(out_27_3, k__DCT_CONST_ROUNDING); const __m256i out_05_6 = _mm256_srai_epi32(out_05_4, DCT_CONST_BITS); const __m256i out_05_7 = _mm256_srai_epi32(out_05_5, DCT_CONST_BITS); const __m256i out_21_6 = _mm256_srai_epi32(out_21_4, DCT_CONST_BITS); const __m256i out_21_7 = _mm256_srai_epi32(out_21_5, DCT_CONST_BITS); const __m256i out_13_6 = _mm256_srai_epi32(out_13_4, DCT_CONST_BITS); const __m256i out_13_7 = _mm256_srai_epi32(out_13_5, DCT_CONST_BITS); const __m256i out_29_6 = _mm256_srai_epi32(out_29_4, DCT_CONST_BITS); const __m256i out_29_7 = _mm256_srai_epi32(out_29_5, DCT_CONST_BITS); const __m256i out_03_6 = _mm256_srai_epi32(out_03_4, DCT_CONST_BITS); const __m256i out_03_7 = _mm256_srai_epi32(out_03_5, DCT_CONST_BITS); const __m256i out_19_6 = _mm256_srai_epi32(out_19_4, DCT_CONST_BITS); const __m256i out_19_7 = _mm256_srai_epi32(out_19_5, DCT_CONST_BITS); const __m256i out_11_6 = _mm256_srai_epi32(out_11_4, DCT_CONST_BITS); const __m256i out_11_7 = _mm256_srai_epi32(out_11_5, DCT_CONST_BITS); const __m256i out_27_6 = _mm256_srai_epi32(out_27_4, DCT_CONST_BITS); const __m256i out_27_7 = _mm256_srai_epi32(out_27_5, DCT_CONST_BITS); // Combine out[5] = _mm256_packs_epi32(out_05_6, out_05_7); out[21] = _mm256_packs_epi32(out_21_6, out_21_7); out[13] = _mm256_packs_epi32(out_13_6, out_13_7); out[29] = _mm256_packs_epi32(out_29_6, out_29_7); out[3] = _mm256_packs_epi32(out_03_6, out_03_7); out[19] = _mm256_packs_epi32(out_19_6, out_19_7); out[11] = _mm256_packs_epi32(out_11_6, out_11_7); out[27] = _mm256_packs_epi32(out_27_6, out_27_7); } #if FDCT32x32_HIGH_PRECISION } else { __m256i lstep1[64], lstep2[64], lstep3[64]; __m256i u[32], v[32], sign[16]; const __m256i K32One = _mm256_set_epi32(1, 1, 1, 1, 1, 1, 1, 1); const __m256i k__pOne_mOne = pair256_set_epi16(1, -1); // start using 32-bit operations // stage 3 { // expanding to 32-bit length while adding and subtracting lstep2[0] = _mm256_unpacklo_epi16(step2[0], step2[7]); lstep2[1] = _mm256_unpackhi_epi16(step2[0], step2[7]); lstep2[2] = _mm256_unpacklo_epi16(step2[1], step2[6]); lstep2[3] = _mm256_unpackhi_epi16(step2[1], step2[6]); lstep2[4] = _mm256_unpacklo_epi16(step2[2], step2[5]); lstep2[5] = _mm256_unpackhi_epi16(step2[2], step2[5]); lstep2[6] = _mm256_unpacklo_epi16(step2[3], step2[4]); lstep2[7] = _mm256_unpackhi_epi16(step2[3], step2[4]); lstep3[0] = _mm256_madd_epi16(lstep2[0], kOne); lstep3[1] = _mm256_madd_epi16(lstep2[1], kOne); lstep3[2] = _mm256_madd_epi16(lstep2[2], kOne); lstep3[3] = _mm256_madd_epi16(lstep2[3], kOne); lstep3[4] = _mm256_madd_epi16(lstep2[4], kOne); lstep3[5] = _mm256_madd_epi16(lstep2[5], kOne); lstep3[6] = _mm256_madd_epi16(lstep2[6], kOne); lstep3[7] = _mm256_madd_epi16(lstep2[7], kOne); lstep3[8] = _mm256_madd_epi16(lstep2[6], k__pOne_mOne); lstep3[9] = _mm256_madd_epi16(lstep2[7], k__pOne_mOne); lstep3[10] = _mm256_madd_epi16(lstep2[4], k__pOne_mOne); lstep3[11] = _mm256_madd_epi16(lstep2[5], k__pOne_mOne); lstep3[12] = _mm256_madd_epi16(lstep2[2], k__pOne_mOne); lstep3[13] = _mm256_madd_epi16(lstep2[3], k__pOne_mOne); lstep3[14] = _mm256_madd_epi16(lstep2[0], k__pOne_mOne); lstep3[15] = _mm256_madd_epi16(lstep2[1], k__pOne_mOne); } { const __m256i s3_10_0 = _mm256_unpacklo_epi16(step2[13], step2[10]); const __m256i s3_10_1 = _mm256_unpackhi_epi16(step2[13], step2[10]); const __m256i s3_11_0 = _mm256_unpacklo_epi16(step2[12], step2[11]); const __m256i s3_11_1 = _mm256_unpackhi_epi16(step2[12], step2[11]); const __m256i s3_10_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_m16); const __m256i s3_10_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_m16); const __m256i s3_11_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_m16); const __m256i s3_11_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_m16); const __m256i s3_12_2 = _mm256_madd_epi16(s3_11_0, k__cospi_p16_p16); const __m256i s3_12_3 = _mm256_madd_epi16(s3_11_1, k__cospi_p16_p16); const __m256i s3_13_2 = _mm256_madd_epi16(s3_10_0, k__cospi_p16_p16); const __m256i s3_13_3 = _mm256_madd_epi16(s3_10_1, k__cospi_p16_p16); // dct_const_round_shift const __m256i s3_10_4 = _mm256_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); const __m256i s3_10_5 = _mm256_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); const __m256i s3_11_4 = _mm256_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); const __m256i s3_11_5 = _mm256_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); const __m256i s3_12_4 = _mm256_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); const __m256i s3_12_5 = _mm256_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); const __m256i s3_13_4 = _mm256_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); const __m256i s3_13_5 = _mm256_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); lstep3[20] = _mm256_srai_epi32(s3_10_4, DCT_CONST_BITS); lstep3[21] = _mm256_srai_epi32(s3_10_5, DCT_CONST_BITS); lstep3[22] = _mm256_srai_epi32(s3_11_4, DCT_CONST_BITS); lstep3[23] = _mm256_srai_epi32(s3_11_5, DCT_CONST_BITS); lstep3[24] = _mm256_srai_epi32(s3_12_4, DCT_CONST_BITS); lstep3[25] = _mm256_srai_epi32(s3_12_5, DCT_CONST_BITS); lstep3[26] = _mm256_srai_epi32(s3_13_4, DCT_CONST_BITS); lstep3[27] = _mm256_srai_epi32(s3_13_5, DCT_CONST_BITS); } { lstep1[32] = _mm256_unpacklo_epi16(step1[16], step2[23]); lstep1[33] = _mm256_unpackhi_epi16(step1[16], step2[23]); lstep1[34] = _mm256_unpacklo_epi16(step1[17], step2[22]); lstep1[35] = _mm256_unpackhi_epi16(step1[17], step2[22]); lstep1[36] = _mm256_unpacklo_epi16(step1[18], step2[21]); lstep1[37] = _mm256_unpackhi_epi16(step1[18], step2[21]); lstep1[38] = _mm256_unpacklo_epi16(step1[19], step2[20]); lstep1[39] = _mm256_unpackhi_epi16(step1[19], step2[20]); lstep1[56] = _mm256_unpacklo_epi16(step1[28], step2[27]); lstep1[57] = _mm256_unpackhi_epi16(step1[28], step2[27]); lstep1[58] = _mm256_unpacklo_epi16(step1[29], step2[26]); lstep1[59] = _mm256_unpackhi_epi16(step1[29], step2[26]); lstep1[60] = _mm256_unpacklo_epi16(step1[30], step2[25]); lstep1[61] = _mm256_unpackhi_epi16(step1[30], step2[25]); lstep1[62] = _mm256_unpacklo_epi16(step1[31], step2[24]); lstep1[63] = _mm256_unpackhi_epi16(step1[31], step2[24]); lstep3[32] = _mm256_madd_epi16(lstep1[32], kOne); lstep3[33] = _mm256_madd_epi16(lstep1[33], kOne); lstep3[34] = _mm256_madd_epi16(lstep1[34], kOne); lstep3[35] = _mm256_madd_epi16(lstep1[35], kOne); lstep3[36] = _mm256_madd_epi16(lstep1[36], kOne); lstep3[37] = _mm256_madd_epi16(lstep1[37], kOne); lstep3[38] = _mm256_madd_epi16(lstep1[38], kOne); lstep3[39] = _mm256_madd_epi16(lstep1[39], kOne); lstep3[40] = _mm256_madd_epi16(lstep1[38], k__pOne_mOne); lstep3[41] = _mm256_madd_epi16(lstep1[39], k__pOne_mOne); lstep3[42] = _mm256_madd_epi16(lstep1[36], k__pOne_mOne); lstep3[43] = _mm256_madd_epi16(lstep1[37], k__pOne_mOne); lstep3[44] = _mm256_madd_epi16(lstep1[34], k__pOne_mOne); lstep3[45] = _mm256_madd_epi16(lstep1[35], k__pOne_mOne); lstep3[46] = _mm256_madd_epi16(lstep1[32], k__pOne_mOne); lstep3[47] = _mm256_madd_epi16(lstep1[33], k__pOne_mOne); lstep3[48] = _mm256_madd_epi16(lstep1[62], k__pOne_mOne); lstep3[49] = _mm256_madd_epi16(lstep1[63], k__pOne_mOne); lstep3[50] = _mm256_madd_epi16(lstep1[60], k__pOne_mOne); lstep3[51] = _mm256_madd_epi16(lstep1[61], k__pOne_mOne); lstep3[52] = _mm256_madd_epi16(lstep1[58], k__pOne_mOne); lstep3[53] = _mm256_madd_epi16(lstep1[59], k__pOne_mOne); lstep3[54] = _mm256_madd_epi16(lstep1[56], k__pOne_mOne); lstep3[55] = _mm256_madd_epi16(lstep1[57], k__pOne_mOne); lstep3[56] = _mm256_madd_epi16(lstep1[56], kOne); lstep3[57] = _mm256_madd_epi16(lstep1[57], kOne); lstep3[58] = _mm256_madd_epi16(lstep1[58], kOne); lstep3[59] = _mm256_madd_epi16(lstep1[59], kOne); lstep3[60] = _mm256_madd_epi16(lstep1[60], kOne); lstep3[61] = _mm256_madd_epi16(lstep1[61], kOne); lstep3[62] = _mm256_madd_epi16(lstep1[62], kOne); lstep3[63] = _mm256_madd_epi16(lstep1[63], kOne); } // stage 4 { // expanding to 32-bit length prior to addition operations sign[0] = _mm256_cmpgt_epi16(kZero, step2[8]); sign[1] = _mm256_cmpgt_epi16(kZero, step2[9]); sign[2] = _mm256_cmpgt_epi16(kZero, step2[14]); sign[3] = _mm256_cmpgt_epi16(kZero, step2[15]); lstep2[16] = _mm256_unpacklo_epi16(step2[8], sign[0]); lstep2[17] = _mm256_unpackhi_epi16(step2[8], sign[0]); lstep2[18] = _mm256_unpacklo_epi16(step2[9], sign[1]); lstep2[19] = _mm256_unpackhi_epi16(step2[9], sign[1]); lstep2[28] = _mm256_unpacklo_epi16(step2[14], sign[2]); lstep2[29] = _mm256_unpackhi_epi16(step2[14], sign[2]); lstep2[30] = _mm256_unpacklo_epi16(step2[15], sign[3]); lstep2[31] = _mm256_unpackhi_epi16(step2[15], sign[3]); lstep1[0] = _mm256_add_epi32(lstep3[6], lstep3[0]); lstep1[1] = _mm256_add_epi32(lstep3[7], lstep3[1]); lstep1[2] = _mm256_add_epi32(lstep3[4], lstep3[2]); lstep1[3] = _mm256_add_epi32(lstep3[5], lstep3[3]); lstep1[4] = _mm256_sub_epi32(lstep3[2], lstep3[4]); lstep1[5] = _mm256_sub_epi32(lstep3[3], lstep3[5]); lstep1[6] = _mm256_sub_epi32(lstep3[0], lstep3[6]); lstep1[7] = _mm256_sub_epi32(lstep3[1], lstep3[7]); lstep1[16] = _mm256_add_epi32(lstep3[22], lstep2[16]); lstep1[17] = _mm256_add_epi32(lstep3[23], lstep2[17]); lstep1[18] = _mm256_add_epi32(lstep3[20], lstep2[18]); lstep1[19] = _mm256_add_epi32(lstep3[21], lstep2[19]); lstep1[20] = _mm256_sub_epi32(lstep2[18], lstep3[20]); lstep1[21] = _mm256_sub_epi32(lstep2[19], lstep3[21]); lstep1[22] = _mm256_sub_epi32(lstep2[16], lstep3[22]); lstep1[23] = _mm256_sub_epi32(lstep2[17], lstep3[23]); lstep1[24] = _mm256_sub_epi32(lstep2[30], lstep3[24]); lstep1[25] = _mm256_sub_epi32(lstep2[31], lstep3[25]); lstep1[26] = _mm256_sub_epi32(lstep2[28], lstep3[26]); lstep1[27] = _mm256_sub_epi32(lstep2[29], lstep3[27]); lstep1[28] = _mm256_add_epi32(lstep3[26], lstep2[28]); lstep1[29] = _mm256_add_epi32(lstep3[27], lstep2[29]); lstep1[30] = _mm256_add_epi32(lstep3[24], lstep2[30]); lstep1[31] = _mm256_add_epi32(lstep3[25], lstep2[31]); } { // to be continued... // const __m256i k32_p16_p16 = pair256_set_epi32(cospi_16_64, cospi_16_64); const __m256i k32_p16_m16 = pair256_set_epi32(cospi_16_64, -cospi_16_64); u[0] = _mm256_unpacklo_epi32(lstep3[12], lstep3[10]); u[1] = _mm256_unpackhi_epi32(lstep3[12], lstep3[10]); u[2] = _mm256_unpacklo_epi32(lstep3[13], lstep3[11]); u[3] = _mm256_unpackhi_epi32(lstep3[13], lstep3[11]); // TODO(jingning): manually inline k_madd_epi32_avx2_ to further hide // instruction latency. v[0] = k_madd_epi32_avx2(u[0], k32_p16_m16); v[1] = k_madd_epi32_avx2(u[1], k32_p16_m16); v[2] = k_madd_epi32_avx2(u[2], k32_p16_m16); v[3] = k_madd_epi32_avx2(u[3], k32_p16_m16); v[4] = k_madd_epi32_avx2(u[0], k32_p16_p16); v[5] = k_madd_epi32_avx2(u[1], k32_p16_p16); v[6] = k_madd_epi32_avx2(u[2], k32_p16_p16); v[7] = k_madd_epi32_avx2(u[3], k32_p16_p16); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); lstep1[10] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); lstep1[11] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); lstep1[12] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); lstep1[13] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); } { const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); const __m256i k32_m24_m08 = pair256_set_epi32(-cospi_24_64, -cospi_8_64); const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); u[0] = _mm256_unpacklo_epi32(lstep3[36], lstep3[58]); u[1] = _mm256_unpackhi_epi32(lstep3[36], lstep3[58]); u[2] = _mm256_unpacklo_epi32(lstep3[37], lstep3[59]); u[3] = _mm256_unpackhi_epi32(lstep3[37], lstep3[59]); u[4] = _mm256_unpacklo_epi32(lstep3[38], lstep3[56]); u[5] = _mm256_unpackhi_epi32(lstep3[38], lstep3[56]); u[6] = _mm256_unpacklo_epi32(lstep3[39], lstep3[57]); u[7] = _mm256_unpackhi_epi32(lstep3[39], lstep3[57]); u[8] = _mm256_unpacklo_epi32(lstep3[40], lstep3[54]); u[9] = _mm256_unpackhi_epi32(lstep3[40], lstep3[54]); u[10] = _mm256_unpacklo_epi32(lstep3[41], lstep3[55]); u[11] = _mm256_unpackhi_epi32(lstep3[41], lstep3[55]); u[12] = _mm256_unpacklo_epi32(lstep3[42], lstep3[52]); u[13] = _mm256_unpackhi_epi32(lstep3[42], lstep3[52]); u[14] = _mm256_unpacklo_epi32(lstep3[43], lstep3[53]); u[15] = _mm256_unpackhi_epi32(lstep3[43], lstep3[53]); v[0] = k_madd_epi32_avx2(u[0], k32_m08_p24); v[1] = k_madd_epi32_avx2(u[1], k32_m08_p24); v[2] = k_madd_epi32_avx2(u[2], k32_m08_p24); v[3] = k_madd_epi32_avx2(u[3], k32_m08_p24); v[4] = k_madd_epi32_avx2(u[4], k32_m08_p24); v[5] = k_madd_epi32_avx2(u[5], k32_m08_p24); v[6] = k_madd_epi32_avx2(u[6], k32_m08_p24); v[7] = k_madd_epi32_avx2(u[7], k32_m08_p24); v[8] = k_madd_epi32_avx2(u[8], k32_m24_m08); v[9] = k_madd_epi32_avx2(u[9], k32_m24_m08); v[10] = k_madd_epi32_avx2(u[10], k32_m24_m08); v[11] = k_madd_epi32_avx2(u[11], k32_m24_m08); v[12] = k_madd_epi32_avx2(u[12], k32_m24_m08); v[13] = k_madd_epi32_avx2(u[13], k32_m24_m08); v[14] = k_madd_epi32_avx2(u[14], k32_m24_m08); v[15] = k_madd_epi32_avx2(u[15], k32_m24_m08); v[16] = k_madd_epi32_avx2(u[12], k32_m08_p24); v[17] = k_madd_epi32_avx2(u[13], k32_m08_p24); v[18] = k_madd_epi32_avx2(u[14], k32_m08_p24); v[19] = k_madd_epi32_avx2(u[15], k32_m08_p24); v[20] = k_madd_epi32_avx2(u[8], k32_m08_p24); v[21] = k_madd_epi32_avx2(u[9], k32_m08_p24); v[22] = k_madd_epi32_avx2(u[10], k32_m08_p24); v[23] = k_madd_epi32_avx2(u[11], k32_m08_p24); v[24] = k_madd_epi32_avx2(u[4], k32_p24_p08); v[25] = k_madd_epi32_avx2(u[5], k32_p24_p08); v[26] = k_madd_epi32_avx2(u[6], k32_p24_p08); v[27] = k_madd_epi32_avx2(u[7], k32_p24_p08); v[28] = k_madd_epi32_avx2(u[0], k32_p24_p08); v[29] = k_madd_epi32_avx2(u[1], k32_p24_p08); v[30] = k_madd_epi32_avx2(u[2], k32_p24_p08); v[31] = k_madd_epi32_avx2(u[3], k32_p24_p08); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[8] = k_packs_epi64_avx2(v[16], v[17]); u[9] = k_packs_epi64_avx2(v[18], v[19]); u[10] = k_packs_epi64_avx2(v[20], v[21]); u[11] = k_packs_epi64_avx2(v[22], v[23]); u[12] = k_packs_epi64_avx2(v[24], v[25]); u[13] = k_packs_epi64_avx2(v[26], v[27]); u[14] = k_packs_epi64_avx2(v[28], v[29]); u[15] = k_packs_epi64_avx2(v[30], v[31]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm256_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm256_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); lstep1[36] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); lstep1[37] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); lstep1[38] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); lstep1[39] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); lstep1[40] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); lstep1[41] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); lstep1[42] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); lstep1[43] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); lstep1[52] = _mm256_srai_epi32(v[8], DCT_CONST_BITS); lstep1[53] = _mm256_srai_epi32(v[9], DCT_CONST_BITS); lstep1[54] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); lstep1[55] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); lstep1[56] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); lstep1[57] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); lstep1[58] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); lstep1[59] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); } // stage 5 { lstep2[8] = _mm256_add_epi32(lstep1[10], lstep3[8]); lstep2[9] = _mm256_add_epi32(lstep1[11], lstep3[9]); lstep2[10] = _mm256_sub_epi32(lstep3[8], lstep1[10]); lstep2[11] = _mm256_sub_epi32(lstep3[9], lstep1[11]); lstep2[12] = _mm256_sub_epi32(lstep3[14], lstep1[12]); lstep2[13] = _mm256_sub_epi32(lstep3[15], lstep1[13]); lstep2[14] = _mm256_add_epi32(lstep1[12], lstep3[14]); lstep2[15] = _mm256_add_epi32(lstep1[13], lstep3[15]); } { const __m256i k32_p16_p16 = pair256_set_epi32(cospi_16_64, cospi_16_64); const __m256i k32_p16_m16 = pair256_set_epi32(cospi_16_64, -cospi_16_64); const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); u[0] = _mm256_unpacklo_epi32(lstep1[0], lstep1[2]); u[1] = _mm256_unpackhi_epi32(lstep1[0], lstep1[2]); u[2] = _mm256_unpacklo_epi32(lstep1[1], lstep1[3]); u[3] = _mm256_unpackhi_epi32(lstep1[1], lstep1[3]); u[4] = _mm256_unpacklo_epi32(lstep1[4], lstep1[6]); u[5] = _mm256_unpackhi_epi32(lstep1[4], lstep1[6]); u[6] = _mm256_unpacklo_epi32(lstep1[5], lstep1[7]); u[7] = _mm256_unpackhi_epi32(lstep1[5], lstep1[7]); // TODO(jingning): manually inline k_madd_epi32_avx2_ to further hide // instruction latency. v[0] = k_madd_epi32_avx2(u[0], k32_p16_p16); v[1] = k_madd_epi32_avx2(u[1], k32_p16_p16); v[2] = k_madd_epi32_avx2(u[2], k32_p16_p16); v[3] = k_madd_epi32_avx2(u[3], k32_p16_p16); v[4] = k_madd_epi32_avx2(u[0], k32_p16_m16); v[5] = k_madd_epi32_avx2(u[1], k32_p16_m16); v[6] = k_madd_epi32_avx2(u[2], k32_p16_m16); v[7] = k_madd_epi32_avx2(u[3], k32_p16_m16); v[8] = k_madd_epi32_avx2(u[4], k32_p24_p08); v[9] = k_madd_epi32_avx2(u[5], k32_p24_p08); v[10] = k_madd_epi32_avx2(u[6], k32_p24_p08); v[11] = k_madd_epi32_avx2(u[7], k32_p24_p08); v[12] = k_madd_epi32_avx2(u[4], k32_m08_p24); v[13] = k_madd_epi32_avx2(u[5], k32_m08_p24); v[14] = k_madd_epi32_avx2(u[6], k32_m08_p24); v[15] = k_madd_epi32_avx2(u[7], k32_m08_p24); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); sign[0] = _mm256_cmpgt_epi32(kZero, u[0]); sign[1] = _mm256_cmpgt_epi32(kZero, u[1]); sign[2] = _mm256_cmpgt_epi32(kZero, u[2]); sign[3] = _mm256_cmpgt_epi32(kZero, u[3]); sign[4] = _mm256_cmpgt_epi32(kZero, u[4]); sign[5] = _mm256_cmpgt_epi32(kZero, u[5]); sign[6] = _mm256_cmpgt_epi32(kZero, u[6]); sign[7] = _mm256_cmpgt_epi32(kZero, u[7]); u[0] = _mm256_sub_epi32(u[0], sign[0]); u[1] = _mm256_sub_epi32(u[1], sign[1]); u[2] = _mm256_sub_epi32(u[2], sign[2]); u[3] = _mm256_sub_epi32(u[3], sign[3]); u[4] = _mm256_sub_epi32(u[4], sign[4]); u[5] = _mm256_sub_epi32(u[5], sign[5]); u[6] = _mm256_sub_epi32(u[6], sign[6]); u[7] = _mm256_sub_epi32(u[7], sign[7]); u[0] = _mm256_add_epi32(u[0], K32One); u[1] = _mm256_add_epi32(u[1], K32One); u[2] = _mm256_add_epi32(u[2], K32One); u[3] = _mm256_add_epi32(u[3], K32One); u[4] = _mm256_add_epi32(u[4], K32One); u[5] = _mm256_add_epi32(u[5], K32One); u[6] = _mm256_add_epi32(u[6], K32One); u[7] = _mm256_add_epi32(u[7], K32One); u[0] = _mm256_srai_epi32(u[0], 2); u[1] = _mm256_srai_epi32(u[1], 2); u[2] = _mm256_srai_epi32(u[2], 2); u[3] = _mm256_srai_epi32(u[3], 2); u[4] = _mm256_srai_epi32(u[4], 2); u[5] = _mm256_srai_epi32(u[5], 2); u[6] = _mm256_srai_epi32(u[6], 2); u[7] = _mm256_srai_epi32(u[7], 2); // Combine out[0] = _mm256_packs_epi32(u[0], u[1]); out[16] = _mm256_packs_epi32(u[2], u[3]); out[8] = _mm256_packs_epi32(u[4], u[5]); out[24] = _mm256_packs_epi32(u[6], u[7]); } { const __m256i k32_m08_p24 = pair256_set_epi32(-cospi_8_64, cospi_24_64); const __m256i k32_m24_m08 = pair256_set_epi32(-cospi_24_64, -cospi_8_64); const __m256i k32_p24_p08 = pair256_set_epi32(cospi_24_64, cospi_8_64); u[0] = _mm256_unpacklo_epi32(lstep1[18], lstep1[28]); u[1] = _mm256_unpackhi_epi32(lstep1[18], lstep1[28]); u[2] = _mm256_unpacklo_epi32(lstep1[19], lstep1[29]); u[3] = _mm256_unpackhi_epi32(lstep1[19], lstep1[29]); u[4] = _mm256_unpacklo_epi32(lstep1[20], lstep1[26]); u[5] = _mm256_unpackhi_epi32(lstep1[20], lstep1[26]); u[6] = _mm256_unpacklo_epi32(lstep1[21], lstep1[27]); u[7] = _mm256_unpackhi_epi32(lstep1[21], lstep1[27]); v[0] = k_madd_epi32_avx2(u[0], k32_m08_p24); v[1] = k_madd_epi32_avx2(u[1], k32_m08_p24); v[2] = k_madd_epi32_avx2(u[2], k32_m08_p24); v[3] = k_madd_epi32_avx2(u[3], k32_m08_p24); v[4] = k_madd_epi32_avx2(u[4], k32_m24_m08); v[5] = k_madd_epi32_avx2(u[5], k32_m24_m08); v[6] = k_madd_epi32_avx2(u[6], k32_m24_m08); v[7] = k_madd_epi32_avx2(u[7], k32_m24_m08); v[8] = k_madd_epi32_avx2(u[4], k32_m08_p24); v[9] = k_madd_epi32_avx2(u[5], k32_m08_p24); v[10] = k_madd_epi32_avx2(u[6], k32_m08_p24); v[11] = k_madd_epi32_avx2(u[7], k32_m08_p24); v[12] = k_madd_epi32_avx2(u[0], k32_p24_p08); v[13] = k_madd_epi32_avx2(u[1], k32_p24_p08); v[14] = k_madd_epi32_avx2(u[2], k32_p24_p08); v[15] = k_madd_epi32_avx2(u[3], k32_p24_p08); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); u[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); u[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); u[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); u[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); u[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); u[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); u[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); lstep2[18] = _mm256_srai_epi32(u[0], DCT_CONST_BITS); lstep2[19] = _mm256_srai_epi32(u[1], DCT_CONST_BITS); lstep2[20] = _mm256_srai_epi32(u[2], DCT_CONST_BITS); lstep2[21] = _mm256_srai_epi32(u[3], DCT_CONST_BITS); lstep2[26] = _mm256_srai_epi32(u[4], DCT_CONST_BITS); lstep2[27] = _mm256_srai_epi32(u[5], DCT_CONST_BITS); lstep2[28] = _mm256_srai_epi32(u[6], DCT_CONST_BITS); lstep2[29] = _mm256_srai_epi32(u[7], DCT_CONST_BITS); } { lstep2[32] = _mm256_add_epi32(lstep1[38], lstep3[32]); lstep2[33] = _mm256_add_epi32(lstep1[39], lstep3[33]); lstep2[34] = _mm256_add_epi32(lstep1[36], lstep3[34]); lstep2[35] = _mm256_add_epi32(lstep1[37], lstep3[35]); lstep2[36] = _mm256_sub_epi32(lstep3[34], lstep1[36]); lstep2[37] = _mm256_sub_epi32(lstep3[35], lstep1[37]); lstep2[38] = _mm256_sub_epi32(lstep3[32], lstep1[38]); lstep2[39] = _mm256_sub_epi32(lstep3[33], lstep1[39]); lstep2[40] = _mm256_sub_epi32(lstep3[46], lstep1[40]); lstep2[41] = _mm256_sub_epi32(lstep3[47], lstep1[41]); lstep2[42] = _mm256_sub_epi32(lstep3[44], lstep1[42]); lstep2[43] = _mm256_sub_epi32(lstep3[45], lstep1[43]); lstep2[44] = _mm256_add_epi32(lstep1[42], lstep3[44]); lstep2[45] = _mm256_add_epi32(lstep1[43], lstep3[45]); lstep2[46] = _mm256_add_epi32(lstep1[40], lstep3[46]); lstep2[47] = _mm256_add_epi32(lstep1[41], lstep3[47]); lstep2[48] = _mm256_add_epi32(lstep1[54], lstep3[48]); lstep2[49] = _mm256_add_epi32(lstep1[55], lstep3[49]); lstep2[50] = _mm256_add_epi32(lstep1[52], lstep3[50]); lstep2[51] = _mm256_add_epi32(lstep1[53], lstep3[51]); lstep2[52] = _mm256_sub_epi32(lstep3[50], lstep1[52]); lstep2[53] = _mm256_sub_epi32(lstep3[51], lstep1[53]); lstep2[54] = _mm256_sub_epi32(lstep3[48], lstep1[54]); lstep2[55] = _mm256_sub_epi32(lstep3[49], lstep1[55]); lstep2[56] = _mm256_sub_epi32(lstep3[62], lstep1[56]); lstep2[57] = _mm256_sub_epi32(lstep3[63], lstep1[57]); lstep2[58] = _mm256_sub_epi32(lstep3[60], lstep1[58]); lstep2[59] = _mm256_sub_epi32(lstep3[61], lstep1[59]); lstep2[60] = _mm256_add_epi32(lstep1[58], lstep3[60]); lstep2[61] = _mm256_add_epi32(lstep1[59], lstep3[61]); lstep2[62] = _mm256_add_epi32(lstep1[56], lstep3[62]); lstep2[63] = _mm256_add_epi32(lstep1[57], lstep3[63]); } // stage 6 { const __m256i k32_p28_p04 = pair256_set_epi32(cospi_28_64, cospi_4_64); const __m256i k32_p12_p20 = pair256_set_epi32(cospi_12_64, cospi_20_64); const __m256i k32_m20_p12 = pair256_set_epi32(-cospi_20_64, cospi_12_64); const __m256i k32_m04_p28 = pair256_set_epi32(-cospi_4_64, cospi_28_64); u[0] = _mm256_unpacklo_epi32(lstep2[8], lstep2[14]); u[1] = _mm256_unpackhi_epi32(lstep2[8], lstep2[14]); u[2] = _mm256_unpacklo_epi32(lstep2[9], lstep2[15]); u[3] = _mm256_unpackhi_epi32(lstep2[9], lstep2[15]); u[4] = _mm256_unpacklo_epi32(lstep2[10], lstep2[12]); u[5] = _mm256_unpackhi_epi32(lstep2[10], lstep2[12]); u[6] = _mm256_unpacklo_epi32(lstep2[11], lstep2[13]); u[7] = _mm256_unpackhi_epi32(lstep2[11], lstep2[13]); u[8] = _mm256_unpacklo_epi32(lstep2[10], lstep2[12]); u[9] = _mm256_unpackhi_epi32(lstep2[10], lstep2[12]); u[10] = _mm256_unpacklo_epi32(lstep2[11], lstep2[13]); u[11] = _mm256_unpackhi_epi32(lstep2[11], lstep2[13]); u[12] = _mm256_unpacklo_epi32(lstep2[8], lstep2[14]); u[13] = _mm256_unpackhi_epi32(lstep2[8], lstep2[14]); u[14] = _mm256_unpacklo_epi32(lstep2[9], lstep2[15]); u[15] = _mm256_unpackhi_epi32(lstep2[9], lstep2[15]); v[0] = k_madd_epi32_avx2(u[0], k32_p28_p04); v[1] = k_madd_epi32_avx2(u[1], k32_p28_p04); v[2] = k_madd_epi32_avx2(u[2], k32_p28_p04); v[3] = k_madd_epi32_avx2(u[3], k32_p28_p04); v[4] = k_madd_epi32_avx2(u[4], k32_p12_p20); v[5] = k_madd_epi32_avx2(u[5], k32_p12_p20); v[6] = k_madd_epi32_avx2(u[6], k32_p12_p20); v[7] = k_madd_epi32_avx2(u[7], k32_p12_p20); v[8] = k_madd_epi32_avx2(u[8], k32_m20_p12); v[9] = k_madd_epi32_avx2(u[9], k32_m20_p12); v[10] = k_madd_epi32_avx2(u[10], k32_m20_p12); v[11] = k_madd_epi32_avx2(u[11], k32_m20_p12); v[12] = k_madd_epi32_avx2(u[12], k32_m04_p28); v[13] = k_madd_epi32_avx2(u[13], k32_m04_p28); v[14] = k_madd_epi32_avx2(u[14], k32_m04_p28); v[15] = k_madd_epi32_avx2(u[15], k32_m04_p28); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); sign[0] = _mm256_cmpgt_epi32(kZero, u[0]); sign[1] = _mm256_cmpgt_epi32(kZero, u[1]); sign[2] = _mm256_cmpgt_epi32(kZero, u[2]); sign[3] = _mm256_cmpgt_epi32(kZero, u[3]); sign[4] = _mm256_cmpgt_epi32(kZero, u[4]); sign[5] = _mm256_cmpgt_epi32(kZero, u[5]); sign[6] = _mm256_cmpgt_epi32(kZero, u[6]); sign[7] = _mm256_cmpgt_epi32(kZero, u[7]); u[0] = _mm256_sub_epi32(u[0], sign[0]); u[1] = _mm256_sub_epi32(u[1], sign[1]); u[2] = _mm256_sub_epi32(u[2], sign[2]); u[3] = _mm256_sub_epi32(u[3], sign[3]); u[4] = _mm256_sub_epi32(u[4], sign[4]); u[5] = _mm256_sub_epi32(u[5], sign[5]); u[6] = _mm256_sub_epi32(u[6], sign[6]); u[7] = _mm256_sub_epi32(u[7], sign[7]); u[0] = _mm256_add_epi32(u[0], K32One); u[1] = _mm256_add_epi32(u[1], K32One); u[2] = _mm256_add_epi32(u[2], K32One); u[3] = _mm256_add_epi32(u[3], K32One); u[4] = _mm256_add_epi32(u[4], K32One); u[5] = _mm256_add_epi32(u[5], K32One); u[6] = _mm256_add_epi32(u[6], K32One); u[7] = _mm256_add_epi32(u[7], K32One); u[0] = _mm256_srai_epi32(u[0], 2); u[1] = _mm256_srai_epi32(u[1], 2); u[2] = _mm256_srai_epi32(u[2], 2); u[3] = _mm256_srai_epi32(u[3], 2); u[4] = _mm256_srai_epi32(u[4], 2); u[5] = _mm256_srai_epi32(u[5], 2); u[6] = _mm256_srai_epi32(u[6], 2); u[7] = _mm256_srai_epi32(u[7], 2); out[4] = _mm256_packs_epi32(u[0], u[1]); out[20] = _mm256_packs_epi32(u[2], u[3]); out[12] = _mm256_packs_epi32(u[4], u[5]); out[28] = _mm256_packs_epi32(u[6], u[7]); } { lstep3[16] = _mm256_add_epi32(lstep2[18], lstep1[16]); lstep3[17] = _mm256_add_epi32(lstep2[19], lstep1[17]); lstep3[18] = _mm256_sub_epi32(lstep1[16], lstep2[18]); lstep3[19] = _mm256_sub_epi32(lstep1[17], lstep2[19]); lstep3[20] = _mm256_sub_epi32(lstep1[22], lstep2[20]); lstep3[21] = _mm256_sub_epi32(lstep1[23], lstep2[21]); lstep3[22] = _mm256_add_epi32(lstep2[20], lstep1[22]); lstep3[23] = _mm256_add_epi32(lstep2[21], lstep1[23]); lstep3[24] = _mm256_add_epi32(lstep2[26], lstep1[24]); lstep3[25] = _mm256_add_epi32(lstep2[27], lstep1[25]); lstep3[26] = _mm256_sub_epi32(lstep1[24], lstep2[26]); lstep3[27] = _mm256_sub_epi32(lstep1[25], lstep2[27]); lstep3[28] = _mm256_sub_epi32(lstep1[30], lstep2[28]); lstep3[29] = _mm256_sub_epi32(lstep1[31], lstep2[29]); lstep3[30] = _mm256_add_epi32(lstep2[28], lstep1[30]); lstep3[31] = _mm256_add_epi32(lstep2[29], lstep1[31]); } { const __m256i k32_m04_p28 = pair256_set_epi32(-cospi_4_64, cospi_28_64); const __m256i k32_m28_m04 = pair256_set_epi32(-cospi_28_64, -cospi_4_64); const __m256i k32_m20_p12 = pair256_set_epi32(-cospi_20_64, cospi_12_64); const __m256i k32_m12_m20 = pair256_set_epi32(-cospi_12_64, -cospi_20_64); const __m256i k32_p12_p20 = pair256_set_epi32(cospi_12_64, cospi_20_64); const __m256i k32_p28_p04 = pair256_set_epi32(cospi_28_64, cospi_4_64); u[0] = _mm256_unpacklo_epi32(lstep2[34], lstep2[60]); u[1] = _mm256_unpackhi_epi32(lstep2[34], lstep2[60]); u[2] = _mm256_unpacklo_epi32(lstep2[35], lstep2[61]); u[3] = _mm256_unpackhi_epi32(lstep2[35], lstep2[61]); u[4] = _mm256_unpacklo_epi32(lstep2[36], lstep2[58]); u[5] = _mm256_unpackhi_epi32(lstep2[36], lstep2[58]); u[6] = _mm256_unpacklo_epi32(lstep2[37], lstep2[59]); u[7] = _mm256_unpackhi_epi32(lstep2[37], lstep2[59]); u[8] = _mm256_unpacklo_epi32(lstep2[42], lstep2[52]); u[9] = _mm256_unpackhi_epi32(lstep2[42], lstep2[52]); u[10] = _mm256_unpacklo_epi32(lstep2[43], lstep2[53]); u[11] = _mm256_unpackhi_epi32(lstep2[43], lstep2[53]); u[12] = _mm256_unpacklo_epi32(lstep2[44], lstep2[50]); u[13] = _mm256_unpackhi_epi32(lstep2[44], lstep2[50]); u[14] = _mm256_unpacklo_epi32(lstep2[45], lstep2[51]); u[15] = _mm256_unpackhi_epi32(lstep2[45], lstep2[51]); v[0] = k_madd_epi32_avx2(u[0], k32_m04_p28); v[1] = k_madd_epi32_avx2(u[1], k32_m04_p28); v[2] = k_madd_epi32_avx2(u[2], k32_m04_p28); v[3] = k_madd_epi32_avx2(u[3], k32_m04_p28); v[4] = k_madd_epi32_avx2(u[4], k32_m28_m04); v[5] = k_madd_epi32_avx2(u[5], k32_m28_m04); v[6] = k_madd_epi32_avx2(u[6], k32_m28_m04); v[7] = k_madd_epi32_avx2(u[7], k32_m28_m04); v[8] = k_madd_epi32_avx2(u[8], k32_m20_p12); v[9] = k_madd_epi32_avx2(u[9], k32_m20_p12); v[10] = k_madd_epi32_avx2(u[10], k32_m20_p12); v[11] = k_madd_epi32_avx2(u[11], k32_m20_p12); v[12] = k_madd_epi32_avx2(u[12], k32_m12_m20); v[13] = k_madd_epi32_avx2(u[13], k32_m12_m20); v[14] = k_madd_epi32_avx2(u[14], k32_m12_m20); v[15] = k_madd_epi32_avx2(u[15], k32_m12_m20); v[16] = k_madd_epi32_avx2(u[12], k32_m20_p12); v[17] = k_madd_epi32_avx2(u[13], k32_m20_p12); v[18] = k_madd_epi32_avx2(u[14], k32_m20_p12); v[19] = k_madd_epi32_avx2(u[15], k32_m20_p12); v[20] = k_madd_epi32_avx2(u[8], k32_p12_p20); v[21] = k_madd_epi32_avx2(u[9], k32_p12_p20); v[22] = k_madd_epi32_avx2(u[10], k32_p12_p20); v[23] = k_madd_epi32_avx2(u[11], k32_p12_p20); v[24] = k_madd_epi32_avx2(u[4], k32_m04_p28); v[25] = k_madd_epi32_avx2(u[5], k32_m04_p28); v[26] = k_madd_epi32_avx2(u[6], k32_m04_p28); v[27] = k_madd_epi32_avx2(u[7], k32_m04_p28); v[28] = k_madd_epi32_avx2(u[0], k32_p28_p04); v[29] = k_madd_epi32_avx2(u[1], k32_p28_p04); v[30] = k_madd_epi32_avx2(u[2], k32_p28_p04); v[31] = k_madd_epi32_avx2(u[3], k32_p28_p04); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[8] = k_packs_epi64_avx2(v[16], v[17]); u[9] = k_packs_epi64_avx2(v[18], v[19]); u[10] = k_packs_epi64_avx2(v[20], v[21]); u[11] = k_packs_epi64_avx2(v[22], v[23]); u[12] = k_packs_epi64_avx2(v[24], v[25]); u[13] = k_packs_epi64_avx2(v[26], v[27]); u[14] = k_packs_epi64_avx2(v[28], v[29]); u[15] = k_packs_epi64_avx2(v[30], v[31]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm256_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm256_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); lstep3[34] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); lstep3[35] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); lstep3[36] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); lstep3[37] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); lstep3[42] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); lstep3[43] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); lstep3[44] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); lstep3[45] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); lstep3[50] = _mm256_srai_epi32(v[8], DCT_CONST_BITS); lstep3[51] = _mm256_srai_epi32(v[9], DCT_CONST_BITS); lstep3[52] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); lstep3[53] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); lstep3[58] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); lstep3[59] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); lstep3[60] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); lstep3[61] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); } // stage 7 { const __m256i k32_p30_p02 = pair256_set_epi32(cospi_30_64, cospi_2_64); const __m256i k32_p14_p18 = pair256_set_epi32(cospi_14_64, cospi_18_64); const __m256i k32_p22_p10 = pair256_set_epi32(cospi_22_64, cospi_10_64); const __m256i k32_p06_p26 = pair256_set_epi32(cospi_6_64, cospi_26_64); const __m256i k32_m26_p06 = pair256_set_epi32(-cospi_26_64, cospi_6_64); const __m256i k32_m10_p22 = pair256_set_epi32(-cospi_10_64, cospi_22_64); const __m256i k32_m18_p14 = pair256_set_epi32(-cospi_18_64, cospi_14_64); const __m256i k32_m02_p30 = pair256_set_epi32(-cospi_2_64, cospi_30_64); u[0] = _mm256_unpacklo_epi32(lstep3[16], lstep3[30]); u[1] = _mm256_unpackhi_epi32(lstep3[16], lstep3[30]); u[2] = _mm256_unpacklo_epi32(lstep3[17], lstep3[31]); u[3] = _mm256_unpackhi_epi32(lstep3[17], lstep3[31]); u[4] = _mm256_unpacklo_epi32(lstep3[18], lstep3[28]); u[5] = _mm256_unpackhi_epi32(lstep3[18], lstep3[28]); u[6] = _mm256_unpacklo_epi32(lstep3[19], lstep3[29]); u[7] = _mm256_unpackhi_epi32(lstep3[19], lstep3[29]); u[8] = _mm256_unpacklo_epi32(lstep3[20], lstep3[26]); u[9] = _mm256_unpackhi_epi32(lstep3[20], lstep3[26]); u[10] = _mm256_unpacklo_epi32(lstep3[21], lstep3[27]); u[11] = _mm256_unpackhi_epi32(lstep3[21], lstep3[27]); u[12] = _mm256_unpacklo_epi32(lstep3[22], lstep3[24]); u[13] = _mm256_unpackhi_epi32(lstep3[22], lstep3[24]); u[14] = _mm256_unpacklo_epi32(lstep3[23], lstep3[25]); u[15] = _mm256_unpackhi_epi32(lstep3[23], lstep3[25]); v[0] = k_madd_epi32_avx2(u[0], k32_p30_p02); v[1] = k_madd_epi32_avx2(u[1], k32_p30_p02); v[2] = k_madd_epi32_avx2(u[2], k32_p30_p02); v[3] = k_madd_epi32_avx2(u[3], k32_p30_p02); v[4] = k_madd_epi32_avx2(u[4], k32_p14_p18); v[5] = k_madd_epi32_avx2(u[5], k32_p14_p18); v[6] = k_madd_epi32_avx2(u[6], k32_p14_p18); v[7] = k_madd_epi32_avx2(u[7], k32_p14_p18); v[8] = k_madd_epi32_avx2(u[8], k32_p22_p10); v[9] = k_madd_epi32_avx2(u[9], k32_p22_p10); v[10] = k_madd_epi32_avx2(u[10], k32_p22_p10); v[11] = k_madd_epi32_avx2(u[11], k32_p22_p10); v[12] = k_madd_epi32_avx2(u[12], k32_p06_p26); v[13] = k_madd_epi32_avx2(u[13], k32_p06_p26); v[14] = k_madd_epi32_avx2(u[14], k32_p06_p26); v[15] = k_madd_epi32_avx2(u[15], k32_p06_p26); v[16] = k_madd_epi32_avx2(u[12], k32_m26_p06); v[17] = k_madd_epi32_avx2(u[13], k32_m26_p06); v[18] = k_madd_epi32_avx2(u[14], k32_m26_p06); v[19] = k_madd_epi32_avx2(u[15], k32_m26_p06); v[20] = k_madd_epi32_avx2(u[8], k32_m10_p22); v[21] = k_madd_epi32_avx2(u[9], k32_m10_p22); v[22] = k_madd_epi32_avx2(u[10], k32_m10_p22); v[23] = k_madd_epi32_avx2(u[11], k32_m10_p22); v[24] = k_madd_epi32_avx2(u[4], k32_m18_p14); v[25] = k_madd_epi32_avx2(u[5], k32_m18_p14); v[26] = k_madd_epi32_avx2(u[6], k32_m18_p14); v[27] = k_madd_epi32_avx2(u[7], k32_m18_p14); v[28] = k_madd_epi32_avx2(u[0], k32_m02_p30); v[29] = k_madd_epi32_avx2(u[1], k32_m02_p30); v[30] = k_madd_epi32_avx2(u[2], k32_m02_p30); v[31] = k_madd_epi32_avx2(u[3], k32_m02_p30); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[8] = k_packs_epi64_avx2(v[16], v[17]); u[9] = k_packs_epi64_avx2(v[18], v[19]); u[10] = k_packs_epi64_avx2(v[20], v[21]); u[11] = k_packs_epi64_avx2(v[22], v[23]); u[12] = k_packs_epi64_avx2(v[24], v[25]); u[13] = k_packs_epi64_avx2(v[26], v[27]); u[14] = k_packs_epi64_avx2(v[28], v[29]); u[15] = k_packs_epi64_avx2(v[30], v[31]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm256_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm256_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm256_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm256_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm256_cmpgt_epi32(kZero, u[0]); v[1] = _mm256_cmpgt_epi32(kZero, u[1]); v[2] = _mm256_cmpgt_epi32(kZero, u[2]); v[3] = _mm256_cmpgt_epi32(kZero, u[3]); v[4] = _mm256_cmpgt_epi32(kZero, u[4]); v[5] = _mm256_cmpgt_epi32(kZero, u[5]); v[6] = _mm256_cmpgt_epi32(kZero, u[6]); v[7] = _mm256_cmpgt_epi32(kZero, u[7]); v[8] = _mm256_cmpgt_epi32(kZero, u[8]); v[9] = _mm256_cmpgt_epi32(kZero, u[9]); v[10] = _mm256_cmpgt_epi32(kZero, u[10]); v[11] = _mm256_cmpgt_epi32(kZero, u[11]); v[12] = _mm256_cmpgt_epi32(kZero, u[12]); v[13] = _mm256_cmpgt_epi32(kZero, u[13]); v[14] = _mm256_cmpgt_epi32(kZero, u[14]); v[15] = _mm256_cmpgt_epi32(kZero, u[15]); u[0] = _mm256_sub_epi32(u[0], v[0]); u[1] = _mm256_sub_epi32(u[1], v[1]); u[2] = _mm256_sub_epi32(u[2], v[2]); u[3] = _mm256_sub_epi32(u[3], v[3]); u[4] = _mm256_sub_epi32(u[4], v[4]); u[5] = _mm256_sub_epi32(u[5], v[5]); u[6] = _mm256_sub_epi32(u[6], v[6]); u[7] = _mm256_sub_epi32(u[7], v[7]); u[8] = _mm256_sub_epi32(u[8], v[8]); u[9] = _mm256_sub_epi32(u[9], v[9]); u[10] = _mm256_sub_epi32(u[10], v[10]); u[11] = _mm256_sub_epi32(u[11], v[11]); u[12] = _mm256_sub_epi32(u[12], v[12]); u[13] = _mm256_sub_epi32(u[13], v[13]); u[14] = _mm256_sub_epi32(u[14], v[14]); u[15] = _mm256_sub_epi32(u[15], v[15]); v[0] = _mm256_add_epi32(u[0], K32One); v[1] = _mm256_add_epi32(u[1], K32One); v[2] = _mm256_add_epi32(u[2], K32One); v[3] = _mm256_add_epi32(u[3], K32One); v[4] = _mm256_add_epi32(u[4], K32One); v[5] = _mm256_add_epi32(u[5], K32One); v[6] = _mm256_add_epi32(u[6], K32One); v[7] = _mm256_add_epi32(u[7], K32One); v[8] = _mm256_add_epi32(u[8], K32One); v[9] = _mm256_add_epi32(u[9], K32One); v[10] = _mm256_add_epi32(u[10], K32One); v[11] = _mm256_add_epi32(u[11], K32One); v[12] = _mm256_add_epi32(u[12], K32One); v[13] = _mm256_add_epi32(u[13], K32One); v[14] = _mm256_add_epi32(u[14], K32One); v[15] = _mm256_add_epi32(u[15], K32One); u[0] = _mm256_srai_epi32(v[0], 2); u[1] = _mm256_srai_epi32(v[1], 2); u[2] = _mm256_srai_epi32(v[2], 2); u[3] = _mm256_srai_epi32(v[3], 2); u[4] = _mm256_srai_epi32(v[4], 2); u[5] = _mm256_srai_epi32(v[5], 2); u[6] = _mm256_srai_epi32(v[6], 2); u[7] = _mm256_srai_epi32(v[7], 2); u[8] = _mm256_srai_epi32(v[8], 2); u[9] = _mm256_srai_epi32(v[9], 2); u[10] = _mm256_srai_epi32(v[10], 2); u[11] = _mm256_srai_epi32(v[11], 2); u[12] = _mm256_srai_epi32(v[12], 2); u[13] = _mm256_srai_epi32(v[13], 2); u[14] = _mm256_srai_epi32(v[14], 2); u[15] = _mm256_srai_epi32(v[15], 2); out[2] = _mm256_packs_epi32(u[0], u[1]); out[18] = _mm256_packs_epi32(u[2], u[3]); out[10] = _mm256_packs_epi32(u[4], u[5]); out[26] = _mm256_packs_epi32(u[6], u[7]); out[6] = _mm256_packs_epi32(u[8], u[9]); out[22] = _mm256_packs_epi32(u[10], u[11]); out[14] = _mm256_packs_epi32(u[12], u[13]); out[30] = _mm256_packs_epi32(u[14], u[15]); } { lstep1[32] = _mm256_add_epi32(lstep3[34], lstep2[32]); lstep1[33] = _mm256_add_epi32(lstep3[35], lstep2[33]); lstep1[34] = _mm256_sub_epi32(lstep2[32], lstep3[34]); lstep1[35] = _mm256_sub_epi32(lstep2[33], lstep3[35]); lstep1[36] = _mm256_sub_epi32(lstep2[38], lstep3[36]); lstep1[37] = _mm256_sub_epi32(lstep2[39], lstep3[37]); lstep1[38] = _mm256_add_epi32(lstep3[36], lstep2[38]); lstep1[39] = _mm256_add_epi32(lstep3[37], lstep2[39]); lstep1[40] = _mm256_add_epi32(lstep3[42], lstep2[40]); lstep1[41] = _mm256_add_epi32(lstep3[43], lstep2[41]); lstep1[42] = _mm256_sub_epi32(lstep2[40], lstep3[42]); lstep1[43] = _mm256_sub_epi32(lstep2[41], lstep3[43]); lstep1[44] = _mm256_sub_epi32(lstep2[46], lstep3[44]); lstep1[45] = _mm256_sub_epi32(lstep2[47], lstep3[45]); lstep1[46] = _mm256_add_epi32(lstep3[44], lstep2[46]); lstep1[47] = _mm256_add_epi32(lstep3[45], lstep2[47]); lstep1[48] = _mm256_add_epi32(lstep3[50], lstep2[48]); lstep1[49] = _mm256_add_epi32(lstep3[51], lstep2[49]); lstep1[50] = _mm256_sub_epi32(lstep2[48], lstep3[50]); lstep1[51] = _mm256_sub_epi32(lstep2[49], lstep3[51]); lstep1[52] = _mm256_sub_epi32(lstep2[54], lstep3[52]); lstep1[53] = _mm256_sub_epi32(lstep2[55], lstep3[53]); lstep1[54] = _mm256_add_epi32(lstep3[52], lstep2[54]); lstep1[55] = _mm256_add_epi32(lstep3[53], lstep2[55]); lstep1[56] = _mm256_add_epi32(lstep3[58], lstep2[56]); lstep1[57] = _mm256_add_epi32(lstep3[59], lstep2[57]); lstep1[58] = _mm256_sub_epi32(lstep2[56], lstep3[58]); lstep1[59] = _mm256_sub_epi32(lstep2[57], lstep3[59]); lstep1[60] = _mm256_sub_epi32(lstep2[62], lstep3[60]); lstep1[61] = _mm256_sub_epi32(lstep2[63], lstep3[61]); lstep1[62] = _mm256_add_epi32(lstep3[60], lstep2[62]); lstep1[63] = _mm256_add_epi32(lstep3[61], lstep2[63]); } // stage 8 { const __m256i k32_p31_p01 = pair256_set_epi32(cospi_31_64, cospi_1_64); const __m256i k32_p15_p17 = pair256_set_epi32(cospi_15_64, cospi_17_64); const __m256i k32_p23_p09 = pair256_set_epi32(cospi_23_64, cospi_9_64); const __m256i k32_p07_p25 = pair256_set_epi32(cospi_7_64, cospi_25_64); const __m256i k32_m25_p07 = pair256_set_epi32(-cospi_25_64, cospi_7_64); const __m256i k32_m09_p23 = pair256_set_epi32(-cospi_9_64, cospi_23_64); const __m256i k32_m17_p15 = pair256_set_epi32(-cospi_17_64, cospi_15_64); const __m256i k32_m01_p31 = pair256_set_epi32(-cospi_1_64, cospi_31_64); u[0] = _mm256_unpacklo_epi32(lstep1[32], lstep1[62]); u[1] = _mm256_unpackhi_epi32(lstep1[32], lstep1[62]); u[2] = _mm256_unpacklo_epi32(lstep1[33], lstep1[63]); u[3] = _mm256_unpackhi_epi32(lstep1[33], lstep1[63]); u[4] = _mm256_unpacklo_epi32(lstep1[34], lstep1[60]); u[5] = _mm256_unpackhi_epi32(lstep1[34], lstep1[60]); u[6] = _mm256_unpacklo_epi32(lstep1[35], lstep1[61]); u[7] = _mm256_unpackhi_epi32(lstep1[35], lstep1[61]); u[8] = _mm256_unpacklo_epi32(lstep1[36], lstep1[58]); u[9] = _mm256_unpackhi_epi32(lstep1[36], lstep1[58]); u[10] = _mm256_unpacklo_epi32(lstep1[37], lstep1[59]); u[11] = _mm256_unpackhi_epi32(lstep1[37], lstep1[59]); u[12] = _mm256_unpacklo_epi32(lstep1[38], lstep1[56]); u[13] = _mm256_unpackhi_epi32(lstep1[38], lstep1[56]); u[14] = _mm256_unpacklo_epi32(lstep1[39], lstep1[57]); u[15] = _mm256_unpackhi_epi32(lstep1[39], lstep1[57]); v[0] = k_madd_epi32_avx2(u[0], k32_p31_p01); v[1] = k_madd_epi32_avx2(u[1], k32_p31_p01); v[2] = k_madd_epi32_avx2(u[2], k32_p31_p01); v[3] = k_madd_epi32_avx2(u[3], k32_p31_p01); v[4] = k_madd_epi32_avx2(u[4], k32_p15_p17); v[5] = k_madd_epi32_avx2(u[5], k32_p15_p17); v[6] = k_madd_epi32_avx2(u[6], k32_p15_p17); v[7] = k_madd_epi32_avx2(u[7], k32_p15_p17); v[8] = k_madd_epi32_avx2(u[8], k32_p23_p09); v[9] = k_madd_epi32_avx2(u[9], k32_p23_p09); v[10] = k_madd_epi32_avx2(u[10], k32_p23_p09); v[11] = k_madd_epi32_avx2(u[11], k32_p23_p09); v[12] = k_madd_epi32_avx2(u[12], k32_p07_p25); v[13] = k_madd_epi32_avx2(u[13], k32_p07_p25); v[14] = k_madd_epi32_avx2(u[14], k32_p07_p25); v[15] = k_madd_epi32_avx2(u[15], k32_p07_p25); v[16] = k_madd_epi32_avx2(u[12], k32_m25_p07); v[17] = k_madd_epi32_avx2(u[13], k32_m25_p07); v[18] = k_madd_epi32_avx2(u[14], k32_m25_p07); v[19] = k_madd_epi32_avx2(u[15], k32_m25_p07); v[20] = k_madd_epi32_avx2(u[8], k32_m09_p23); v[21] = k_madd_epi32_avx2(u[9], k32_m09_p23); v[22] = k_madd_epi32_avx2(u[10], k32_m09_p23); v[23] = k_madd_epi32_avx2(u[11], k32_m09_p23); v[24] = k_madd_epi32_avx2(u[4], k32_m17_p15); v[25] = k_madd_epi32_avx2(u[5], k32_m17_p15); v[26] = k_madd_epi32_avx2(u[6], k32_m17_p15); v[27] = k_madd_epi32_avx2(u[7], k32_m17_p15); v[28] = k_madd_epi32_avx2(u[0], k32_m01_p31); v[29] = k_madd_epi32_avx2(u[1], k32_m01_p31); v[30] = k_madd_epi32_avx2(u[2], k32_m01_p31); v[31] = k_madd_epi32_avx2(u[3], k32_m01_p31); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[8] = k_packs_epi64_avx2(v[16], v[17]); u[9] = k_packs_epi64_avx2(v[18], v[19]); u[10] = k_packs_epi64_avx2(v[20], v[21]); u[11] = k_packs_epi64_avx2(v[22], v[23]); u[12] = k_packs_epi64_avx2(v[24], v[25]); u[13] = k_packs_epi64_avx2(v[26], v[27]); u[14] = k_packs_epi64_avx2(v[28], v[29]); u[15] = k_packs_epi64_avx2(v[30], v[31]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm256_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm256_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm256_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm256_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm256_cmpgt_epi32(kZero, u[0]); v[1] = _mm256_cmpgt_epi32(kZero, u[1]); v[2] = _mm256_cmpgt_epi32(kZero, u[2]); v[3] = _mm256_cmpgt_epi32(kZero, u[3]); v[4] = _mm256_cmpgt_epi32(kZero, u[4]); v[5] = _mm256_cmpgt_epi32(kZero, u[5]); v[6] = _mm256_cmpgt_epi32(kZero, u[6]); v[7] = _mm256_cmpgt_epi32(kZero, u[7]); v[8] = _mm256_cmpgt_epi32(kZero, u[8]); v[9] = _mm256_cmpgt_epi32(kZero, u[9]); v[10] = _mm256_cmpgt_epi32(kZero, u[10]); v[11] = _mm256_cmpgt_epi32(kZero, u[11]); v[12] = _mm256_cmpgt_epi32(kZero, u[12]); v[13] = _mm256_cmpgt_epi32(kZero, u[13]); v[14] = _mm256_cmpgt_epi32(kZero, u[14]); v[15] = _mm256_cmpgt_epi32(kZero, u[15]); u[0] = _mm256_sub_epi32(u[0], v[0]); u[1] = _mm256_sub_epi32(u[1], v[1]); u[2] = _mm256_sub_epi32(u[2], v[2]); u[3] = _mm256_sub_epi32(u[3], v[3]); u[4] = _mm256_sub_epi32(u[4], v[4]); u[5] = _mm256_sub_epi32(u[5], v[5]); u[6] = _mm256_sub_epi32(u[6], v[6]); u[7] = _mm256_sub_epi32(u[7], v[7]); u[8] = _mm256_sub_epi32(u[8], v[8]); u[9] = _mm256_sub_epi32(u[9], v[9]); u[10] = _mm256_sub_epi32(u[10], v[10]); u[11] = _mm256_sub_epi32(u[11], v[11]); u[12] = _mm256_sub_epi32(u[12], v[12]); u[13] = _mm256_sub_epi32(u[13], v[13]); u[14] = _mm256_sub_epi32(u[14], v[14]); u[15] = _mm256_sub_epi32(u[15], v[15]); v[0] = _mm256_add_epi32(u[0], K32One); v[1] = _mm256_add_epi32(u[1], K32One); v[2] = _mm256_add_epi32(u[2], K32One); v[3] = _mm256_add_epi32(u[3], K32One); v[4] = _mm256_add_epi32(u[4], K32One); v[5] = _mm256_add_epi32(u[5], K32One); v[6] = _mm256_add_epi32(u[6], K32One); v[7] = _mm256_add_epi32(u[7], K32One); v[8] = _mm256_add_epi32(u[8], K32One); v[9] = _mm256_add_epi32(u[9], K32One); v[10] = _mm256_add_epi32(u[10], K32One); v[11] = _mm256_add_epi32(u[11], K32One); v[12] = _mm256_add_epi32(u[12], K32One); v[13] = _mm256_add_epi32(u[13], K32One); v[14] = _mm256_add_epi32(u[14], K32One); v[15] = _mm256_add_epi32(u[15], K32One); u[0] = _mm256_srai_epi32(v[0], 2); u[1] = _mm256_srai_epi32(v[1], 2); u[2] = _mm256_srai_epi32(v[2], 2); u[3] = _mm256_srai_epi32(v[3], 2); u[4] = _mm256_srai_epi32(v[4], 2); u[5] = _mm256_srai_epi32(v[5], 2); u[6] = _mm256_srai_epi32(v[6], 2); u[7] = _mm256_srai_epi32(v[7], 2); u[8] = _mm256_srai_epi32(v[8], 2); u[9] = _mm256_srai_epi32(v[9], 2); u[10] = _mm256_srai_epi32(v[10], 2); u[11] = _mm256_srai_epi32(v[11], 2); u[12] = _mm256_srai_epi32(v[12], 2); u[13] = _mm256_srai_epi32(v[13], 2); u[14] = _mm256_srai_epi32(v[14], 2); u[15] = _mm256_srai_epi32(v[15], 2); out[1] = _mm256_packs_epi32(u[0], u[1]); out[17] = _mm256_packs_epi32(u[2], u[3]); out[9] = _mm256_packs_epi32(u[4], u[5]); out[25] = _mm256_packs_epi32(u[6], u[7]); out[7] = _mm256_packs_epi32(u[8], u[9]); out[23] = _mm256_packs_epi32(u[10], u[11]); out[15] = _mm256_packs_epi32(u[12], u[13]); out[31] = _mm256_packs_epi32(u[14], u[15]); } { const __m256i k32_p27_p05 = pair256_set_epi32(cospi_27_64, cospi_5_64); const __m256i k32_p11_p21 = pair256_set_epi32(cospi_11_64, cospi_21_64); const __m256i k32_p19_p13 = pair256_set_epi32(cospi_19_64, cospi_13_64); const __m256i k32_p03_p29 = pair256_set_epi32(cospi_3_64, cospi_29_64); const __m256i k32_m29_p03 = pair256_set_epi32(-cospi_29_64, cospi_3_64); const __m256i k32_m13_p19 = pair256_set_epi32(-cospi_13_64, cospi_19_64); const __m256i k32_m21_p11 = pair256_set_epi32(-cospi_21_64, cospi_11_64); const __m256i k32_m05_p27 = pair256_set_epi32(-cospi_5_64, cospi_27_64); u[0] = _mm256_unpacklo_epi32(lstep1[40], lstep1[54]); u[1] = _mm256_unpackhi_epi32(lstep1[40], lstep1[54]); u[2] = _mm256_unpacklo_epi32(lstep1[41], lstep1[55]); u[3] = _mm256_unpackhi_epi32(lstep1[41], lstep1[55]); u[4] = _mm256_unpacklo_epi32(lstep1[42], lstep1[52]); u[5] = _mm256_unpackhi_epi32(lstep1[42], lstep1[52]); u[6] = _mm256_unpacklo_epi32(lstep1[43], lstep1[53]); u[7] = _mm256_unpackhi_epi32(lstep1[43], lstep1[53]); u[8] = _mm256_unpacklo_epi32(lstep1[44], lstep1[50]); u[9] = _mm256_unpackhi_epi32(lstep1[44], lstep1[50]); u[10] = _mm256_unpacklo_epi32(lstep1[45], lstep1[51]); u[11] = _mm256_unpackhi_epi32(lstep1[45], lstep1[51]); u[12] = _mm256_unpacklo_epi32(lstep1[46], lstep1[48]); u[13] = _mm256_unpackhi_epi32(lstep1[46], lstep1[48]); u[14] = _mm256_unpacklo_epi32(lstep1[47], lstep1[49]); u[15] = _mm256_unpackhi_epi32(lstep1[47], lstep1[49]); v[0] = k_madd_epi32_avx2(u[0], k32_p27_p05); v[1] = k_madd_epi32_avx2(u[1], k32_p27_p05); v[2] = k_madd_epi32_avx2(u[2], k32_p27_p05); v[3] = k_madd_epi32_avx2(u[3], k32_p27_p05); v[4] = k_madd_epi32_avx2(u[4], k32_p11_p21); v[5] = k_madd_epi32_avx2(u[5], k32_p11_p21); v[6] = k_madd_epi32_avx2(u[6], k32_p11_p21); v[7] = k_madd_epi32_avx2(u[7], k32_p11_p21); v[8] = k_madd_epi32_avx2(u[8], k32_p19_p13); v[9] = k_madd_epi32_avx2(u[9], k32_p19_p13); v[10] = k_madd_epi32_avx2(u[10], k32_p19_p13); v[11] = k_madd_epi32_avx2(u[11], k32_p19_p13); v[12] = k_madd_epi32_avx2(u[12], k32_p03_p29); v[13] = k_madd_epi32_avx2(u[13], k32_p03_p29); v[14] = k_madd_epi32_avx2(u[14], k32_p03_p29); v[15] = k_madd_epi32_avx2(u[15], k32_p03_p29); v[16] = k_madd_epi32_avx2(u[12], k32_m29_p03); v[17] = k_madd_epi32_avx2(u[13], k32_m29_p03); v[18] = k_madd_epi32_avx2(u[14], k32_m29_p03); v[19] = k_madd_epi32_avx2(u[15], k32_m29_p03); v[20] = k_madd_epi32_avx2(u[8], k32_m13_p19); v[21] = k_madd_epi32_avx2(u[9], k32_m13_p19); v[22] = k_madd_epi32_avx2(u[10], k32_m13_p19); v[23] = k_madd_epi32_avx2(u[11], k32_m13_p19); v[24] = k_madd_epi32_avx2(u[4], k32_m21_p11); v[25] = k_madd_epi32_avx2(u[5], k32_m21_p11); v[26] = k_madd_epi32_avx2(u[6], k32_m21_p11); v[27] = k_madd_epi32_avx2(u[7], k32_m21_p11); v[28] = k_madd_epi32_avx2(u[0], k32_m05_p27); v[29] = k_madd_epi32_avx2(u[1], k32_m05_p27); v[30] = k_madd_epi32_avx2(u[2], k32_m05_p27); v[31] = k_madd_epi32_avx2(u[3], k32_m05_p27); u[0] = k_packs_epi64_avx2(v[0], v[1]); u[1] = k_packs_epi64_avx2(v[2], v[3]); u[2] = k_packs_epi64_avx2(v[4], v[5]); u[3] = k_packs_epi64_avx2(v[6], v[7]); u[4] = k_packs_epi64_avx2(v[8], v[9]); u[5] = k_packs_epi64_avx2(v[10], v[11]); u[6] = k_packs_epi64_avx2(v[12], v[13]); u[7] = k_packs_epi64_avx2(v[14], v[15]); u[8] = k_packs_epi64_avx2(v[16], v[17]); u[9] = k_packs_epi64_avx2(v[18], v[19]); u[10] = k_packs_epi64_avx2(v[20], v[21]); u[11] = k_packs_epi64_avx2(v[22], v[23]); u[12] = k_packs_epi64_avx2(v[24], v[25]); u[13] = k_packs_epi64_avx2(v[26], v[27]); u[14] = k_packs_epi64_avx2(v[28], v[29]); u[15] = k_packs_epi64_avx2(v[30], v[31]); v[0] = _mm256_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm256_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm256_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm256_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm256_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm256_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm256_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm256_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm256_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm256_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm256_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm256_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm256_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm256_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm256_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm256_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm256_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm256_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm256_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm256_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm256_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm256_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm256_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm256_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm256_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm256_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm256_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm256_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm256_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm256_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm256_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm256_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm256_cmpgt_epi32(kZero, u[0]); v[1] = _mm256_cmpgt_epi32(kZero, u[1]); v[2] = _mm256_cmpgt_epi32(kZero, u[2]); v[3] = _mm256_cmpgt_epi32(kZero, u[3]); v[4] = _mm256_cmpgt_epi32(kZero, u[4]); v[5] = _mm256_cmpgt_epi32(kZero, u[5]); v[6] = _mm256_cmpgt_epi32(kZero, u[6]); v[7] = _mm256_cmpgt_epi32(kZero, u[7]); v[8] = _mm256_cmpgt_epi32(kZero, u[8]); v[9] = _mm256_cmpgt_epi32(kZero, u[9]); v[10] = _mm256_cmpgt_epi32(kZero, u[10]); v[11] = _mm256_cmpgt_epi32(kZero, u[11]); v[12] = _mm256_cmpgt_epi32(kZero, u[12]); v[13] = _mm256_cmpgt_epi32(kZero, u[13]); v[14] = _mm256_cmpgt_epi32(kZero, u[14]); v[15] = _mm256_cmpgt_epi32(kZero, u[15]); u[0] = _mm256_sub_epi32(u[0], v[0]); u[1] = _mm256_sub_epi32(u[1], v[1]); u[2] = _mm256_sub_epi32(u[2], v[2]); u[3] = _mm256_sub_epi32(u[3], v[3]); u[4] = _mm256_sub_epi32(u[4], v[4]); u[5] = _mm256_sub_epi32(u[5], v[5]); u[6] = _mm256_sub_epi32(u[6], v[6]); u[7] = _mm256_sub_epi32(u[7], v[7]); u[8] = _mm256_sub_epi32(u[8], v[8]); u[9] = _mm256_sub_epi32(u[9], v[9]); u[10] = _mm256_sub_epi32(u[10], v[10]); u[11] = _mm256_sub_epi32(u[11], v[11]); u[12] = _mm256_sub_epi32(u[12], v[12]); u[13] = _mm256_sub_epi32(u[13], v[13]); u[14] = _mm256_sub_epi32(u[14], v[14]); u[15] = _mm256_sub_epi32(u[15], v[15]); v[0] = _mm256_add_epi32(u[0], K32One); v[1] = _mm256_add_epi32(u[1], K32One); v[2] = _mm256_add_epi32(u[2], K32One); v[3] = _mm256_add_epi32(u[3], K32One); v[4] = _mm256_add_epi32(u[4], K32One); v[5] = _mm256_add_epi32(u[5], K32One); v[6] = _mm256_add_epi32(u[6], K32One); v[7] = _mm256_add_epi32(u[7], K32One); v[8] = _mm256_add_epi32(u[8], K32One); v[9] = _mm256_add_epi32(u[9], K32One); v[10] = _mm256_add_epi32(u[10], K32One); v[11] = _mm256_add_epi32(u[11], K32One); v[12] = _mm256_add_epi32(u[12], K32One); v[13] = _mm256_add_epi32(u[13], K32One); v[14] = _mm256_add_epi32(u[14], K32One); v[15] = _mm256_add_epi32(u[15], K32One); u[0] = _mm256_srai_epi32(v[0], 2); u[1] = _mm256_srai_epi32(v[1], 2); u[2] = _mm256_srai_epi32(v[2], 2); u[3] = _mm256_srai_epi32(v[3], 2); u[4] = _mm256_srai_epi32(v[4], 2); u[5] = _mm256_srai_epi32(v[5], 2); u[6] = _mm256_srai_epi32(v[6], 2); u[7] = _mm256_srai_epi32(v[7], 2); u[8] = _mm256_srai_epi32(v[8], 2); u[9] = _mm256_srai_epi32(v[9], 2); u[10] = _mm256_srai_epi32(v[10], 2); u[11] = _mm256_srai_epi32(v[11], 2); u[12] = _mm256_srai_epi32(v[12], 2); u[13] = _mm256_srai_epi32(v[13], 2); u[14] = _mm256_srai_epi32(v[14], 2); u[15] = _mm256_srai_epi32(v[15], 2); out[5] = _mm256_packs_epi32(u[0], u[1]); out[21] = _mm256_packs_epi32(u[2], u[3]); out[13] = _mm256_packs_epi32(u[4], u[5]); out[29] = _mm256_packs_epi32(u[6], u[7]); out[3] = _mm256_packs_epi32(u[8], u[9]); out[19] = _mm256_packs_epi32(u[10], u[11]); out[11] = _mm256_packs_epi32(u[12], u[13]); out[27] = _mm256_packs_epi32(u[14], u[15]); } } #endif // Transpose the results, do it as four 8x8 transposes. { int transpose_block; int16_t *output_currStep, *output_nextStep; if (0 == pass) { output_currStep = &intermediate[column_start * 32]; output_nextStep = &intermediate[(column_start + 8) * 32]; } else { output_currStep = &output_org[column_start * 32]; output_nextStep = &output_org[(column_start + 8) * 32]; } for (transpose_block = 0; transpose_block < 4; ++transpose_block) { __m256i *this_out = &out[8 * transpose_block]; // 00 01 02 03 04 05 06 07 08 09 10 11 12 13 14 15 // 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 // 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 // 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 // 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 // 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 // 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 // 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 const __m256i tr0_0 = _mm256_unpacklo_epi16(this_out[0], this_out[1]); const __m256i tr0_1 = _mm256_unpacklo_epi16(this_out[2], this_out[3]); const __m256i tr0_2 = _mm256_unpackhi_epi16(this_out[0], this_out[1]); const __m256i tr0_3 = _mm256_unpackhi_epi16(this_out[2], this_out[3]); const __m256i tr0_4 = _mm256_unpacklo_epi16(this_out[4], this_out[5]); const __m256i tr0_5 = _mm256_unpacklo_epi16(this_out[6], this_out[7]); const __m256i tr0_6 = _mm256_unpackhi_epi16(this_out[4], this_out[5]); const __m256i tr0_7 = _mm256_unpackhi_epi16(this_out[6], this_out[7]); // 00 20 01 21 02 22 03 23 08 28 09 29 10 30 11 31 // 40 60 41 61 42 62 43 63 48 68 49 69 50 70 51 71 // 04 24 05 25 06 26 07 27 12 32 13 33 14 34 15 35 // 44 64 45 65 46 66 47 67 52 72 53 73 54 74 55 75 // 80 100 81 101 82 102 83 103 88 108 89 109 90 110 91 101 // 120 140 121 141 122 142 123 143 128 148 129 149 130 150 131 151 // 84 104 85 105 86 106 87 107 92 112 93 113 94 114 95 115 // 124 144 125 145 126 146 127 147 132 152 133 153 134 154 135 155 const __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_1); const __m256i tr1_1 = _mm256_unpacklo_epi32(tr0_2, tr0_3); const __m256i tr1_2 = _mm256_unpackhi_epi32(tr0_0, tr0_1); const __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_2, tr0_3); const __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_4, tr0_5); const __m256i tr1_5 = _mm256_unpacklo_epi32(tr0_6, tr0_7); const __m256i tr1_6 = _mm256_unpackhi_epi32(tr0_4, tr0_5); const __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_6, tr0_7); // 00 20 40 60 01 21 41 61 08 28 48 68 09 29 49 69 // 04 24 44 64 05 25 45 65 12 32 52 72 13 33 53 73 // 02 22 42 62 03 23 43 63 10 30 50 70 11 31 51 71 // 06 26 46 66 07 27 47 67 14 34 54 74 15 35 55 75 // 80 100 120 140 81 101 121 141 88 108 128 148 89 109 129 149 // 84 104 124 144 85 105 125 145 92 112 132 152 93 113 133 153 // 82 102 122 142 83 103 123 143 90 110 130 150 91 101 131 151 // 86 106 126 146 87 107 127 147 94 114 134 154 95 115 135 155 __m256i tr2_0 = _mm256_unpacklo_epi64(tr1_0, tr1_4); __m256i tr2_1 = _mm256_unpackhi_epi64(tr1_0, tr1_4); __m256i tr2_2 = _mm256_unpacklo_epi64(tr1_2, tr1_6); __m256i tr2_3 = _mm256_unpackhi_epi64(tr1_2, tr1_6); __m256i tr2_4 = _mm256_unpacklo_epi64(tr1_1, tr1_5); __m256i tr2_5 = _mm256_unpackhi_epi64(tr1_1, tr1_5); __m256i tr2_6 = _mm256_unpacklo_epi64(tr1_3, tr1_7); __m256i tr2_7 = _mm256_unpackhi_epi64(tr1_3, tr1_7); // 00 20 40 60 80 100 120 140 08 28 48 68 88 108 128 148 // 01 21 41 61 81 101 121 141 09 29 49 69 89 109 129 149 // 02 22 42 62 82 102 122 142 10 30 50 70 90 110 130 150 // 03 23 43 63 83 103 123 143 11 31 51 71 91 101 131 151 // 04 24 44 64 84 104 124 144 12 32 52 72 92 112 132 152 // 05 25 45 65 85 105 125 145 13 33 53 73 93 113 133 153 // 06 26 46 66 86 106 126 146 14 34 54 74 94 114 134 154 // 07 27 47 67 87 107 127 147 15 35 55 75 95 115 135 155 if (0 == pass) { // output[j] = (output[j] + 1 + (output[j] > 0)) >> 2; // TODO(cd): see quality impact of only doing // output[j] = (output[j] + 1) >> 2; // which would remove the code between here ... __m256i tr2_0_0 = _mm256_cmpgt_epi16(tr2_0, kZero); __m256i tr2_1_0 = _mm256_cmpgt_epi16(tr2_1, kZero); __m256i tr2_2_0 = _mm256_cmpgt_epi16(tr2_2, kZero); __m256i tr2_3_0 = _mm256_cmpgt_epi16(tr2_3, kZero); __m256i tr2_4_0 = _mm256_cmpgt_epi16(tr2_4, kZero); __m256i tr2_5_0 = _mm256_cmpgt_epi16(tr2_5, kZero); __m256i tr2_6_0 = _mm256_cmpgt_epi16(tr2_6, kZero); __m256i tr2_7_0 = _mm256_cmpgt_epi16(tr2_7, kZero); tr2_0 = _mm256_sub_epi16(tr2_0, tr2_0_0); tr2_1 = _mm256_sub_epi16(tr2_1, tr2_1_0); tr2_2 = _mm256_sub_epi16(tr2_2, tr2_2_0); tr2_3 = _mm256_sub_epi16(tr2_3, tr2_3_0); tr2_4 = _mm256_sub_epi16(tr2_4, tr2_4_0); tr2_5 = _mm256_sub_epi16(tr2_5, tr2_5_0); tr2_6 = _mm256_sub_epi16(tr2_6, tr2_6_0); tr2_7 = _mm256_sub_epi16(tr2_7, tr2_7_0); // ... and here. // PS: also change code in vp9/encoder/vp9_dct.c tr2_0 = _mm256_add_epi16(tr2_0, kOne); tr2_1 = _mm256_add_epi16(tr2_1, kOne); tr2_2 = _mm256_add_epi16(tr2_2, kOne); tr2_3 = _mm256_add_epi16(tr2_3, kOne); tr2_4 = _mm256_add_epi16(tr2_4, kOne); tr2_5 = _mm256_add_epi16(tr2_5, kOne); tr2_6 = _mm256_add_epi16(tr2_6, kOne); tr2_7 = _mm256_add_epi16(tr2_7, kOne); tr2_0 = _mm256_srai_epi16(tr2_0, 2); tr2_1 = _mm256_srai_epi16(tr2_1, 2); tr2_2 = _mm256_srai_epi16(tr2_2, 2); tr2_3 = _mm256_srai_epi16(tr2_3, 2); tr2_4 = _mm256_srai_epi16(tr2_4, 2); tr2_5 = _mm256_srai_epi16(tr2_5, 2); tr2_6 = _mm256_srai_epi16(tr2_6, 2); tr2_7 = _mm256_srai_epi16(tr2_7, 2); } // Note: even though all these stores are aligned, using the aligned // intrinsic make the code slightly slower. _mm_storeu_si128((__m128i *)(output_currStep + 0 * 32), _mm256_castsi256_si128(tr2_0)); _mm_storeu_si128((__m128i *)(output_currStep + 1 * 32), _mm256_castsi256_si128(tr2_1)); _mm_storeu_si128((__m128i *)(output_currStep + 2 * 32), _mm256_castsi256_si128(tr2_2)); _mm_storeu_si128((__m128i *)(output_currStep + 3 * 32), _mm256_castsi256_si128(tr2_3)); _mm_storeu_si128((__m128i *)(output_currStep + 4 * 32), _mm256_castsi256_si128(tr2_4)); _mm_storeu_si128((__m128i *)(output_currStep + 5 * 32), _mm256_castsi256_si128(tr2_5)); _mm_storeu_si128((__m128i *)(output_currStep + 6 * 32), _mm256_castsi256_si128(tr2_6)); _mm_storeu_si128((__m128i *)(output_currStep + 7 * 32), _mm256_castsi256_si128(tr2_7)); _mm_storeu_si128((__m128i *)(output_nextStep + 0 * 32), _mm256_extractf128_si256(tr2_0, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 1 * 32), _mm256_extractf128_si256(tr2_1, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 2 * 32), _mm256_extractf128_si256(tr2_2, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 3 * 32), _mm256_extractf128_si256(tr2_3, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 4 * 32), _mm256_extractf128_si256(tr2_4, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 5 * 32), _mm256_extractf128_si256(tr2_5, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 6 * 32), _mm256_extractf128_si256(tr2_6, 1)); _mm_storeu_si128((__m128i *)(output_nextStep + 7 * 32), _mm256_extractf128_si256(tr2_7, 1)); // Process next 8x8 output_currStep += 8; output_nextStep += 8; } } } } } // NOLINT libvpx-1.8.2/vpx_dsp/x86/fwd_dct32x32_impl_sse2.h000066400000000000000000004775471357355204000214340ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "vpx_dsp/fwd_txfm.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/x86/txfm_common_sse2.h" // TODO(jingning) The high bit-depth version needs re-work for performance. // The current SSE2 implementation also causes cross reference to the static // functions in the C implementation file. #if DCT_HIGH_BIT_DEPTH #define ADD_EPI16 _mm_adds_epi16 #define SUB_EPI16 _mm_subs_epi16 #if FDCT32x32_HIGH_PRECISION static void vpx_fdct32x32_rows_c(const int16_t *intermediate, tran_low_t *out) { int i, j; for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i]; vpx_fdct32(temp_in, temp_out, 0); for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)((temp_out[j] + 1 + (temp_out[j] < 0)) >> 2); } } #define HIGH_FDCT32x32_2D_C vpx_highbd_fdct32x32_c #define HIGH_FDCT32x32_2D_ROWS_C vpx_fdct32x32_rows_c #else static void vpx_fdct32x32_rd_rows_c(const int16_t *intermediate, tran_low_t *out) { int i, j; for (i = 0; i < 32; ++i) { tran_high_t temp_in[32], temp_out[32]; for (j = 0; j < 32; ++j) temp_in[j] = intermediate[j * 32 + i]; vpx_fdct32(temp_in, temp_out, 1); for (j = 0; j < 32; ++j) out[j + i * 32] = (tran_low_t)temp_out[j]; } } #define HIGH_FDCT32x32_2D_C vpx_highbd_fdct32x32_rd_c #define HIGH_FDCT32x32_2D_ROWS_C vpx_fdct32x32_rd_rows_c #endif // FDCT32x32_HIGH_PRECISION #else #define ADD_EPI16 _mm_add_epi16 #define SUB_EPI16 _mm_sub_epi16 #endif // DCT_HIGH_BIT_DEPTH void FDCT32x32_2D(const int16_t *input, tran_low_t *output_org, int stride) { // Calculate pre-multiplied strides const int str1 = stride; const int str2 = 2 * stride; const int str3 = 2 * stride + str1; // We need an intermediate buffer between passes. DECLARE_ALIGNED(16, int16_t, intermediate[32 * 32]); // Constants // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(+cospi_16_64, -cospi_16_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(+cospi_24_64, cospi_8_64); const __m128i k__cospi_p12_p20 = pair_set_epi16(+cospi_12_64, cospi_20_64); const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(+cospi_28_64, cospi_4_64); const __m128i k__cospi_m28_m04 = pair_set_epi16(-cospi_28_64, -cospi_4_64); const __m128i k__cospi_m12_m20 = pair_set_epi16(-cospi_12_64, -cospi_20_64); const __m128i k__cospi_p30_p02 = pair_set_epi16(+cospi_30_64, cospi_2_64); const __m128i k__cospi_p14_p18 = pair_set_epi16(+cospi_14_64, cospi_18_64); const __m128i k__cospi_p22_p10 = pair_set_epi16(+cospi_22_64, cospi_10_64); const __m128i k__cospi_p06_p26 = pair_set_epi16(+cospi_6_64, cospi_26_64); const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64); const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64); const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64); const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64); const __m128i k__cospi_p31_p01 = pair_set_epi16(+cospi_31_64, cospi_1_64); const __m128i k__cospi_p15_p17 = pair_set_epi16(+cospi_15_64, cospi_17_64); const __m128i k__cospi_p23_p09 = pair_set_epi16(+cospi_23_64, cospi_9_64); const __m128i k__cospi_p07_p25 = pair_set_epi16(+cospi_7_64, cospi_25_64); const __m128i k__cospi_m25_p07 = pair_set_epi16(-cospi_25_64, cospi_7_64); const __m128i k__cospi_m09_p23 = pair_set_epi16(-cospi_9_64, cospi_23_64); const __m128i k__cospi_m17_p15 = pair_set_epi16(-cospi_17_64, cospi_15_64); const __m128i k__cospi_m01_p31 = pair_set_epi16(-cospi_1_64, cospi_31_64); const __m128i k__cospi_p27_p05 = pair_set_epi16(+cospi_27_64, cospi_5_64); const __m128i k__cospi_p11_p21 = pair_set_epi16(+cospi_11_64, cospi_21_64); const __m128i k__cospi_p19_p13 = pair_set_epi16(+cospi_19_64, cospi_13_64); const __m128i k__cospi_p03_p29 = pair_set_epi16(+cospi_3_64, cospi_29_64); const __m128i k__cospi_m29_p03 = pair_set_epi16(-cospi_29_64, cospi_3_64); const __m128i k__cospi_m13_p19 = pair_set_epi16(-cospi_13_64, cospi_19_64); const __m128i k__cospi_m21_p11 = pair_set_epi16(-cospi_21_64, cospi_11_64); const __m128i k__cospi_m05_p27 = pair_set_epi16(-cospi_5_64, cospi_27_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i kZero = _mm_set1_epi16(0); const __m128i kOne = _mm_set1_epi16(1); // Do the two transform/transpose passes int pass; #if DCT_HIGH_BIT_DEPTH int overflow; #endif for (pass = 0; pass < 2; ++pass) { // We process eight columns (transposed rows in second pass) at a time. int column_start; for (column_start = 0; column_start < 32; column_start += 8) { __m128i step1[32]; __m128i step2[32]; __m128i step3[32]; __m128i out[32]; // Stage 1 // Note: even though all the loads below are aligned, using the aligned // intrinsic make the code slightly slower. if (0 == pass) { const int16_t *in = &input[column_start]; // step1[i] = (in[ 0 * stride] + in[(32 - 1) * stride]) << 2; // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { const int16_t *ina = in + 0 * str1; const int16_t *inb = in + 31 * str1; __m128i *step1a = &step1[0]; __m128i *step1b = &step1[31]; const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); step1a[0] = _mm_add_epi16(ina0, inb0); step1a[1] = _mm_add_epi16(ina1, inb1); step1a[2] = _mm_add_epi16(ina2, inb2); step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); step1a[0] = _mm_slli_epi16(step1a[0], 2); step1a[1] = _mm_slli_epi16(step1a[1], 2); step1a[2] = _mm_slli_epi16(step1a[2], 2); step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 4 * str1; const int16_t *inb = in + 27 * str1; __m128i *step1a = &step1[4]; __m128i *step1b = &step1[27]; const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); step1a[0] = _mm_add_epi16(ina0, inb0); step1a[1] = _mm_add_epi16(ina1, inb1); step1a[2] = _mm_add_epi16(ina2, inb2); step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); step1a[0] = _mm_slli_epi16(step1a[0], 2); step1a[1] = _mm_slli_epi16(step1a[1], 2); step1a[2] = _mm_slli_epi16(step1a[2], 2); step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 8 * str1; const int16_t *inb = in + 23 * str1; __m128i *step1a = &step1[8]; __m128i *step1b = &step1[23]; const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); step1a[0] = _mm_add_epi16(ina0, inb0); step1a[1] = _mm_add_epi16(ina1, inb1); step1a[2] = _mm_add_epi16(ina2, inb2); step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); step1a[0] = _mm_slli_epi16(step1a[0], 2); step1a[1] = _mm_slli_epi16(step1a[1], 2); step1a[2] = _mm_slli_epi16(step1a[2], 2); step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } { const int16_t *ina = in + 12 * str1; const int16_t *inb = in + 19 * str1; __m128i *step1a = &step1[12]; __m128i *step1b = &step1[19]; const __m128i ina0 = _mm_loadu_si128((const __m128i *)(ina)); const __m128i ina1 = _mm_loadu_si128((const __m128i *)(ina + str1)); const __m128i ina2 = _mm_loadu_si128((const __m128i *)(ina + str2)); const __m128i ina3 = _mm_loadu_si128((const __m128i *)(ina + str3)); const __m128i inb3 = _mm_loadu_si128((const __m128i *)(inb - str3)); const __m128i inb2 = _mm_loadu_si128((const __m128i *)(inb - str2)); const __m128i inb1 = _mm_loadu_si128((const __m128i *)(inb - str1)); const __m128i inb0 = _mm_loadu_si128((const __m128i *)(inb)); step1a[0] = _mm_add_epi16(ina0, inb0); step1a[1] = _mm_add_epi16(ina1, inb1); step1a[2] = _mm_add_epi16(ina2, inb2); step1a[3] = _mm_add_epi16(ina3, inb3); step1b[-3] = _mm_sub_epi16(ina3, inb3); step1b[-2] = _mm_sub_epi16(ina2, inb2); step1b[-1] = _mm_sub_epi16(ina1, inb1); step1b[-0] = _mm_sub_epi16(ina0, inb0); step1a[0] = _mm_slli_epi16(step1a[0], 2); step1a[1] = _mm_slli_epi16(step1a[1], 2); step1a[2] = _mm_slli_epi16(step1a[2], 2); step1a[3] = _mm_slli_epi16(step1a[3], 2); step1b[-3] = _mm_slli_epi16(step1b[-3], 2); step1b[-2] = _mm_slli_epi16(step1b[-2], 2); step1b[-1] = _mm_slli_epi16(step1b[-1], 2); step1b[-0] = _mm_slli_epi16(step1b[-0], 2); } } else { int16_t *in = &intermediate[column_start]; // step1[i] = in[ 0 * 32] + in[(32 - 1) * 32]; // Note: using the same approach as above to have common offset is // counter-productive as all offsets can be calculated at compile // time. // Note: the next four blocks could be in a loop. That would help the // instruction cache but is actually slower. { __m128i in00 = _mm_loadu_si128((const __m128i *)(in + 0 * 32)); __m128i in01 = _mm_loadu_si128((const __m128i *)(in + 1 * 32)); __m128i in02 = _mm_loadu_si128((const __m128i *)(in + 2 * 32)); __m128i in03 = _mm_loadu_si128((const __m128i *)(in + 3 * 32)); __m128i in28 = _mm_loadu_si128((const __m128i *)(in + 28 * 32)); __m128i in29 = _mm_loadu_si128((const __m128i *)(in + 29 * 32)); __m128i in30 = _mm_loadu_si128((const __m128i *)(in + 30 * 32)); __m128i in31 = _mm_loadu_si128((const __m128i *)(in + 31 * 32)); step1[0] = ADD_EPI16(in00, in31); step1[1] = ADD_EPI16(in01, in30); step1[2] = ADD_EPI16(in02, in29); step1[3] = ADD_EPI16(in03, in28); step1[28] = SUB_EPI16(in03, in28); step1[29] = SUB_EPI16(in02, in29); step1[30] = SUB_EPI16(in01, in30); step1[31] = SUB_EPI16(in00, in31); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1[0], &step1[1], &step1[2], &step1[3], &step1[28], &step1[29], &step1[30], &step1[31]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { __m128i in04 = _mm_loadu_si128((const __m128i *)(in + 4 * 32)); __m128i in05 = _mm_loadu_si128((const __m128i *)(in + 5 * 32)); __m128i in06 = _mm_loadu_si128((const __m128i *)(in + 6 * 32)); __m128i in07 = _mm_loadu_si128((const __m128i *)(in + 7 * 32)); __m128i in24 = _mm_loadu_si128((const __m128i *)(in + 24 * 32)); __m128i in25 = _mm_loadu_si128((const __m128i *)(in + 25 * 32)); __m128i in26 = _mm_loadu_si128((const __m128i *)(in + 26 * 32)); __m128i in27 = _mm_loadu_si128((const __m128i *)(in + 27 * 32)); step1[4] = ADD_EPI16(in04, in27); step1[5] = ADD_EPI16(in05, in26); step1[6] = ADD_EPI16(in06, in25); step1[7] = ADD_EPI16(in07, in24); step1[24] = SUB_EPI16(in07, in24); step1[25] = SUB_EPI16(in06, in25); step1[26] = SUB_EPI16(in05, in26); step1[27] = SUB_EPI16(in04, in27); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1[4], &step1[5], &step1[6], &step1[7], &step1[24], &step1[25], &step1[26], &step1[27]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { __m128i in08 = _mm_loadu_si128((const __m128i *)(in + 8 * 32)); __m128i in09 = _mm_loadu_si128((const __m128i *)(in + 9 * 32)); __m128i in10 = _mm_loadu_si128((const __m128i *)(in + 10 * 32)); __m128i in11 = _mm_loadu_si128((const __m128i *)(in + 11 * 32)); __m128i in20 = _mm_loadu_si128((const __m128i *)(in + 20 * 32)); __m128i in21 = _mm_loadu_si128((const __m128i *)(in + 21 * 32)); __m128i in22 = _mm_loadu_si128((const __m128i *)(in + 22 * 32)); __m128i in23 = _mm_loadu_si128((const __m128i *)(in + 23 * 32)); step1[8] = ADD_EPI16(in08, in23); step1[9] = ADD_EPI16(in09, in22); step1[10] = ADD_EPI16(in10, in21); step1[11] = ADD_EPI16(in11, in20); step1[20] = SUB_EPI16(in11, in20); step1[21] = SUB_EPI16(in10, in21); step1[22] = SUB_EPI16(in09, in22); step1[23] = SUB_EPI16(in08, in23); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1[8], &step1[9], &step1[10], &step1[11], &step1[20], &step1[21], &step1[22], &step1[23]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { __m128i in12 = _mm_loadu_si128((const __m128i *)(in + 12 * 32)); __m128i in13 = _mm_loadu_si128((const __m128i *)(in + 13 * 32)); __m128i in14 = _mm_loadu_si128((const __m128i *)(in + 14 * 32)); __m128i in15 = _mm_loadu_si128((const __m128i *)(in + 15 * 32)); __m128i in16 = _mm_loadu_si128((const __m128i *)(in + 16 * 32)); __m128i in17 = _mm_loadu_si128((const __m128i *)(in + 17 * 32)); __m128i in18 = _mm_loadu_si128((const __m128i *)(in + 18 * 32)); __m128i in19 = _mm_loadu_si128((const __m128i *)(in + 19 * 32)); step1[12] = ADD_EPI16(in12, in19); step1[13] = ADD_EPI16(in13, in18); step1[14] = ADD_EPI16(in14, in17); step1[15] = ADD_EPI16(in15, in16); step1[16] = SUB_EPI16(in15, in16); step1[17] = SUB_EPI16(in14, in17); step1[18] = SUB_EPI16(in13, in18); step1[19] = SUB_EPI16(in12, in19); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1[12], &step1[13], &step1[14], &step1[15], &step1[16], &step1[17], &step1[18], &step1[19]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } } // Stage 2 { step2[0] = ADD_EPI16(step1[0], step1[15]); step2[1] = ADD_EPI16(step1[1], step1[14]); step2[2] = ADD_EPI16(step1[2], step1[13]); step2[3] = ADD_EPI16(step1[3], step1[12]); step2[4] = ADD_EPI16(step1[4], step1[11]); step2[5] = ADD_EPI16(step1[5], step1[10]); step2[6] = ADD_EPI16(step1[6], step1[9]); step2[7] = ADD_EPI16(step1[7], step1[8]); step2[8] = SUB_EPI16(step1[7], step1[8]); step2[9] = SUB_EPI16(step1[6], step1[9]); step2[10] = SUB_EPI16(step1[5], step1[10]); step2[11] = SUB_EPI16(step1[4], step1[11]); step2[12] = SUB_EPI16(step1[3], step1[12]); step2[13] = SUB_EPI16(step1[2], step1[13]); step2[14] = SUB_EPI16(step1[1], step1[14]); step2[15] = SUB_EPI16(step1[0], step1[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5], &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11], &step2[12], &step2[13], &step2[14], &step2[15]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s2_20_0 = _mm_unpacklo_epi16(step1[27], step1[20]); const __m128i s2_20_1 = _mm_unpackhi_epi16(step1[27], step1[20]); const __m128i s2_21_0 = _mm_unpacklo_epi16(step1[26], step1[21]); const __m128i s2_21_1 = _mm_unpackhi_epi16(step1[26], step1[21]); const __m128i s2_22_0 = _mm_unpacklo_epi16(step1[25], step1[22]); const __m128i s2_22_1 = _mm_unpackhi_epi16(step1[25], step1[22]); const __m128i s2_23_0 = _mm_unpacklo_epi16(step1[24], step1[23]); const __m128i s2_23_1 = _mm_unpackhi_epi16(step1[24], step1[23]); const __m128i s2_20_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_m16); const __m128i s2_20_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_m16); const __m128i s2_21_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_m16); const __m128i s2_21_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_m16); const __m128i s2_22_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_m16); const __m128i s2_22_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_m16); const __m128i s2_23_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_m16); const __m128i s2_23_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_m16); const __m128i s2_24_2 = _mm_madd_epi16(s2_23_0, k__cospi_p16_p16); const __m128i s2_24_3 = _mm_madd_epi16(s2_23_1, k__cospi_p16_p16); const __m128i s2_25_2 = _mm_madd_epi16(s2_22_0, k__cospi_p16_p16); const __m128i s2_25_3 = _mm_madd_epi16(s2_22_1, k__cospi_p16_p16); const __m128i s2_26_2 = _mm_madd_epi16(s2_21_0, k__cospi_p16_p16); const __m128i s2_26_3 = _mm_madd_epi16(s2_21_1, k__cospi_p16_p16); const __m128i s2_27_2 = _mm_madd_epi16(s2_20_0, k__cospi_p16_p16); const __m128i s2_27_3 = _mm_madd_epi16(s2_20_1, k__cospi_p16_p16); // dct_const_round_shift const __m128i s2_20_4 = _mm_add_epi32(s2_20_2, k__DCT_CONST_ROUNDING); const __m128i s2_20_5 = _mm_add_epi32(s2_20_3, k__DCT_CONST_ROUNDING); const __m128i s2_21_4 = _mm_add_epi32(s2_21_2, k__DCT_CONST_ROUNDING); const __m128i s2_21_5 = _mm_add_epi32(s2_21_3, k__DCT_CONST_ROUNDING); const __m128i s2_22_4 = _mm_add_epi32(s2_22_2, k__DCT_CONST_ROUNDING); const __m128i s2_22_5 = _mm_add_epi32(s2_22_3, k__DCT_CONST_ROUNDING); const __m128i s2_23_4 = _mm_add_epi32(s2_23_2, k__DCT_CONST_ROUNDING); const __m128i s2_23_5 = _mm_add_epi32(s2_23_3, k__DCT_CONST_ROUNDING); const __m128i s2_24_4 = _mm_add_epi32(s2_24_2, k__DCT_CONST_ROUNDING); const __m128i s2_24_5 = _mm_add_epi32(s2_24_3, k__DCT_CONST_ROUNDING); const __m128i s2_25_4 = _mm_add_epi32(s2_25_2, k__DCT_CONST_ROUNDING); const __m128i s2_25_5 = _mm_add_epi32(s2_25_3, k__DCT_CONST_ROUNDING); const __m128i s2_26_4 = _mm_add_epi32(s2_26_2, k__DCT_CONST_ROUNDING); const __m128i s2_26_5 = _mm_add_epi32(s2_26_3, k__DCT_CONST_ROUNDING); const __m128i s2_27_4 = _mm_add_epi32(s2_27_2, k__DCT_CONST_ROUNDING); const __m128i s2_27_5 = _mm_add_epi32(s2_27_3, k__DCT_CONST_ROUNDING); const __m128i s2_20_6 = _mm_srai_epi32(s2_20_4, DCT_CONST_BITS); const __m128i s2_20_7 = _mm_srai_epi32(s2_20_5, DCT_CONST_BITS); const __m128i s2_21_6 = _mm_srai_epi32(s2_21_4, DCT_CONST_BITS); const __m128i s2_21_7 = _mm_srai_epi32(s2_21_5, DCT_CONST_BITS); const __m128i s2_22_6 = _mm_srai_epi32(s2_22_4, DCT_CONST_BITS); const __m128i s2_22_7 = _mm_srai_epi32(s2_22_5, DCT_CONST_BITS); const __m128i s2_23_6 = _mm_srai_epi32(s2_23_4, DCT_CONST_BITS); const __m128i s2_23_7 = _mm_srai_epi32(s2_23_5, DCT_CONST_BITS); const __m128i s2_24_6 = _mm_srai_epi32(s2_24_4, DCT_CONST_BITS); const __m128i s2_24_7 = _mm_srai_epi32(s2_24_5, DCT_CONST_BITS); const __m128i s2_25_6 = _mm_srai_epi32(s2_25_4, DCT_CONST_BITS); const __m128i s2_25_7 = _mm_srai_epi32(s2_25_5, DCT_CONST_BITS); const __m128i s2_26_6 = _mm_srai_epi32(s2_26_4, DCT_CONST_BITS); const __m128i s2_26_7 = _mm_srai_epi32(s2_26_5, DCT_CONST_BITS); const __m128i s2_27_6 = _mm_srai_epi32(s2_27_4, DCT_CONST_BITS); const __m128i s2_27_7 = _mm_srai_epi32(s2_27_5, DCT_CONST_BITS); // Combine step2[20] = _mm_packs_epi32(s2_20_6, s2_20_7); step2[21] = _mm_packs_epi32(s2_21_6, s2_21_7); step2[22] = _mm_packs_epi32(s2_22_6, s2_22_7); step2[23] = _mm_packs_epi32(s2_23_6, s2_23_7); step2[24] = _mm_packs_epi32(s2_24_6, s2_24_7); step2[25] = _mm_packs_epi32(s2_25_6, s2_25_7); step2[26] = _mm_packs_epi32(s2_26_6, s2_26_7); step2[27] = _mm_packs_epi32(s2_27_6, s2_27_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step2[20], &step2[21], &step2[22], &step2[23], &step2[24], &step2[25], &step2[26], &step2[27]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } #if !FDCT32x32_HIGH_PRECISION // dump the magnitude by half, hence the intermediate values are within // the range of 16 bits. if (1 == pass) { __m128i s3_00_0 = _mm_cmplt_epi16(step2[0], kZero); __m128i s3_01_0 = _mm_cmplt_epi16(step2[1], kZero); __m128i s3_02_0 = _mm_cmplt_epi16(step2[2], kZero); __m128i s3_03_0 = _mm_cmplt_epi16(step2[3], kZero); __m128i s3_04_0 = _mm_cmplt_epi16(step2[4], kZero); __m128i s3_05_0 = _mm_cmplt_epi16(step2[5], kZero); __m128i s3_06_0 = _mm_cmplt_epi16(step2[6], kZero); __m128i s3_07_0 = _mm_cmplt_epi16(step2[7], kZero); __m128i s2_08_0 = _mm_cmplt_epi16(step2[8], kZero); __m128i s2_09_0 = _mm_cmplt_epi16(step2[9], kZero); __m128i s3_10_0 = _mm_cmplt_epi16(step2[10], kZero); __m128i s3_11_0 = _mm_cmplt_epi16(step2[11], kZero); __m128i s3_12_0 = _mm_cmplt_epi16(step2[12], kZero); __m128i s3_13_0 = _mm_cmplt_epi16(step2[13], kZero); __m128i s2_14_0 = _mm_cmplt_epi16(step2[14], kZero); __m128i s2_15_0 = _mm_cmplt_epi16(step2[15], kZero); __m128i s3_16_0 = _mm_cmplt_epi16(step1[16], kZero); __m128i s3_17_0 = _mm_cmplt_epi16(step1[17], kZero); __m128i s3_18_0 = _mm_cmplt_epi16(step1[18], kZero); __m128i s3_19_0 = _mm_cmplt_epi16(step1[19], kZero); __m128i s3_20_0 = _mm_cmplt_epi16(step2[20], kZero); __m128i s3_21_0 = _mm_cmplt_epi16(step2[21], kZero); __m128i s3_22_0 = _mm_cmplt_epi16(step2[22], kZero); __m128i s3_23_0 = _mm_cmplt_epi16(step2[23], kZero); __m128i s3_24_0 = _mm_cmplt_epi16(step2[24], kZero); __m128i s3_25_0 = _mm_cmplt_epi16(step2[25], kZero); __m128i s3_26_0 = _mm_cmplt_epi16(step2[26], kZero); __m128i s3_27_0 = _mm_cmplt_epi16(step2[27], kZero); __m128i s3_28_0 = _mm_cmplt_epi16(step1[28], kZero); __m128i s3_29_0 = _mm_cmplt_epi16(step1[29], kZero); __m128i s3_30_0 = _mm_cmplt_epi16(step1[30], kZero); __m128i s3_31_0 = _mm_cmplt_epi16(step1[31], kZero); step2[0] = SUB_EPI16(step2[0], s3_00_0); step2[1] = SUB_EPI16(step2[1], s3_01_0); step2[2] = SUB_EPI16(step2[2], s3_02_0); step2[3] = SUB_EPI16(step2[3], s3_03_0); step2[4] = SUB_EPI16(step2[4], s3_04_0); step2[5] = SUB_EPI16(step2[5], s3_05_0); step2[6] = SUB_EPI16(step2[6], s3_06_0); step2[7] = SUB_EPI16(step2[7], s3_07_0); step2[8] = SUB_EPI16(step2[8], s2_08_0); step2[9] = SUB_EPI16(step2[9], s2_09_0); step2[10] = SUB_EPI16(step2[10], s3_10_0); step2[11] = SUB_EPI16(step2[11], s3_11_0); step2[12] = SUB_EPI16(step2[12], s3_12_0); step2[13] = SUB_EPI16(step2[13], s3_13_0); step2[14] = SUB_EPI16(step2[14], s2_14_0); step2[15] = SUB_EPI16(step2[15], s2_15_0); step1[16] = SUB_EPI16(step1[16], s3_16_0); step1[17] = SUB_EPI16(step1[17], s3_17_0); step1[18] = SUB_EPI16(step1[18], s3_18_0); step1[19] = SUB_EPI16(step1[19], s3_19_0); step2[20] = SUB_EPI16(step2[20], s3_20_0); step2[21] = SUB_EPI16(step2[21], s3_21_0); step2[22] = SUB_EPI16(step2[22], s3_22_0); step2[23] = SUB_EPI16(step2[23], s3_23_0); step2[24] = SUB_EPI16(step2[24], s3_24_0); step2[25] = SUB_EPI16(step2[25], s3_25_0); step2[26] = SUB_EPI16(step2[26], s3_26_0); step2[27] = SUB_EPI16(step2[27], s3_27_0); step1[28] = SUB_EPI16(step1[28], s3_28_0); step1[29] = SUB_EPI16(step1[29], s3_29_0); step1[30] = SUB_EPI16(step1[30], s3_30_0); step1[31] = SUB_EPI16(step1[31], s3_31_0); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x32( &step2[0], &step2[1], &step2[2], &step2[3], &step2[4], &step2[5], &step2[6], &step2[7], &step2[8], &step2[9], &step2[10], &step2[11], &step2[12], &step2[13], &step2[14], &step2[15], &step1[16], &step1[17], &step1[18], &step1[19], &step2[20], &step2[21], &step2[22], &step2[23], &step2[24], &step2[25], &step2[26], &step2[27], &step1[28], &step1[29], &step1[30], &step1[31]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH step2[0] = _mm_add_epi16(step2[0], kOne); step2[1] = _mm_add_epi16(step2[1], kOne); step2[2] = _mm_add_epi16(step2[2], kOne); step2[3] = _mm_add_epi16(step2[3], kOne); step2[4] = _mm_add_epi16(step2[4], kOne); step2[5] = _mm_add_epi16(step2[5], kOne); step2[6] = _mm_add_epi16(step2[6], kOne); step2[7] = _mm_add_epi16(step2[7], kOne); step2[8] = _mm_add_epi16(step2[8], kOne); step2[9] = _mm_add_epi16(step2[9], kOne); step2[10] = _mm_add_epi16(step2[10], kOne); step2[11] = _mm_add_epi16(step2[11], kOne); step2[12] = _mm_add_epi16(step2[12], kOne); step2[13] = _mm_add_epi16(step2[13], kOne); step2[14] = _mm_add_epi16(step2[14], kOne); step2[15] = _mm_add_epi16(step2[15], kOne); step1[16] = _mm_add_epi16(step1[16], kOne); step1[17] = _mm_add_epi16(step1[17], kOne); step1[18] = _mm_add_epi16(step1[18], kOne); step1[19] = _mm_add_epi16(step1[19], kOne); step2[20] = _mm_add_epi16(step2[20], kOne); step2[21] = _mm_add_epi16(step2[21], kOne); step2[22] = _mm_add_epi16(step2[22], kOne); step2[23] = _mm_add_epi16(step2[23], kOne); step2[24] = _mm_add_epi16(step2[24], kOne); step2[25] = _mm_add_epi16(step2[25], kOne); step2[26] = _mm_add_epi16(step2[26], kOne); step2[27] = _mm_add_epi16(step2[27], kOne); step1[28] = _mm_add_epi16(step1[28], kOne); step1[29] = _mm_add_epi16(step1[29], kOne); step1[30] = _mm_add_epi16(step1[30], kOne); step1[31] = _mm_add_epi16(step1[31], kOne); step2[0] = _mm_srai_epi16(step2[0], 2); step2[1] = _mm_srai_epi16(step2[1], 2); step2[2] = _mm_srai_epi16(step2[2], 2); step2[3] = _mm_srai_epi16(step2[3], 2); step2[4] = _mm_srai_epi16(step2[4], 2); step2[5] = _mm_srai_epi16(step2[5], 2); step2[6] = _mm_srai_epi16(step2[6], 2); step2[7] = _mm_srai_epi16(step2[7], 2); step2[8] = _mm_srai_epi16(step2[8], 2); step2[9] = _mm_srai_epi16(step2[9], 2); step2[10] = _mm_srai_epi16(step2[10], 2); step2[11] = _mm_srai_epi16(step2[11], 2); step2[12] = _mm_srai_epi16(step2[12], 2); step2[13] = _mm_srai_epi16(step2[13], 2); step2[14] = _mm_srai_epi16(step2[14], 2); step2[15] = _mm_srai_epi16(step2[15], 2); step1[16] = _mm_srai_epi16(step1[16], 2); step1[17] = _mm_srai_epi16(step1[17], 2); step1[18] = _mm_srai_epi16(step1[18], 2); step1[19] = _mm_srai_epi16(step1[19], 2); step2[20] = _mm_srai_epi16(step2[20], 2); step2[21] = _mm_srai_epi16(step2[21], 2); step2[22] = _mm_srai_epi16(step2[22], 2); step2[23] = _mm_srai_epi16(step2[23], 2); step2[24] = _mm_srai_epi16(step2[24], 2); step2[25] = _mm_srai_epi16(step2[25], 2); step2[26] = _mm_srai_epi16(step2[26], 2); step2[27] = _mm_srai_epi16(step2[27], 2); step1[28] = _mm_srai_epi16(step1[28], 2); step1[29] = _mm_srai_epi16(step1[29], 2); step1[30] = _mm_srai_epi16(step1[30], 2); step1[31] = _mm_srai_epi16(step1[31], 2); } #endif // !FDCT32x32_HIGH_PRECISION #if FDCT32x32_HIGH_PRECISION if (pass == 0) { #endif // Stage 3 { step3[0] = ADD_EPI16(step2[(8 - 1)], step2[0]); step3[1] = ADD_EPI16(step2[(8 - 2)], step2[1]); step3[2] = ADD_EPI16(step2[(8 - 3)], step2[2]); step3[3] = ADD_EPI16(step2[(8 - 4)], step2[3]); step3[4] = SUB_EPI16(step2[(8 - 5)], step2[4]); step3[5] = SUB_EPI16(step2[(8 - 6)], step2[5]); step3[6] = SUB_EPI16(step2[(8 - 7)], step2[6]); step3[7] = SUB_EPI16(step2[(8 - 8)], step2[7]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step3[0], &step3[1], &step3[2], &step3[3], &step3[4], &step3[5], &step3[6], &step3[7]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]); const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]); const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]); const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]); const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16); const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16); const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16); const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16); const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16); const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16); const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16); const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16); // dct_const_round_shift const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); const __m128i s3_10_6 = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS); const __m128i s3_10_7 = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS); const __m128i s3_11_6 = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS); const __m128i s3_11_7 = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS); const __m128i s3_12_6 = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS); const __m128i s3_12_7 = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS); const __m128i s3_13_6 = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS); const __m128i s3_13_7 = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS); // Combine step3[10] = _mm_packs_epi32(s3_10_6, s3_10_7); step3[11] = _mm_packs_epi32(s3_11_6, s3_11_7); step3[12] = _mm_packs_epi32(s3_12_6, s3_12_7); step3[13] = _mm_packs_epi32(s3_13_6, s3_13_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&step3[10], &step3[11], &step3[12], &step3[13]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { step3[16] = ADD_EPI16(step2[23], step1[16]); step3[17] = ADD_EPI16(step2[22], step1[17]); step3[18] = ADD_EPI16(step2[21], step1[18]); step3[19] = ADD_EPI16(step2[20], step1[19]); step3[20] = SUB_EPI16(step1[19], step2[20]); step3[21] = SUB_EPI16(step1[18], step2[21]); step3[22] = SUB_EPI16(step1[17], step2[22]); step3[23] = SUB_EPI16(step1[16], step2[23]); step3[24] = SUB_EPI16(step1[31], step2[24]); step3[25] = SUB_EPI16(step1[30], step2[25]); step3[26] = SUB_EPI16(step1[29], step2[26]); step3[27] = SUB_EPI16(step1[28], step2[27]); step3[28] = ADD_EPI16(step2[27], step1[28]); step3[29] = ADD_EPI16(step2[26], step1[29]); step3[30] = ADD_EPI16(step2[25], step1[30]); step3[31] = ADD_EPI16(step2[24], step1[31]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( &step3[16], &step3[17], &step3[18], &step3[19], &step3[20], &step3[21], &step3[22], &step3[23], &step3[24], &step3[25], &step3[26], &step3[27], &step3[28], &step3[29], &step3[30], &step3[31]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } // Stage 4 { step1[0] = ADD_EPI16(step3[3], step3[0]); step1[1] = ADD_EPI16(step3[2], step3[1]); step1[2] = SUB_EPI16(step3[1], step3[2]); step1[3] = SUB_EPI16(step3[0], step3[3]); step1[8] = ADD_EPI16(step3[11], step2[8]); step1[9] = ADD_EPI16(step3[10], step2[9]); step1[10] = SUB_EPI16(step2[9], step3[10]); step1[11] = SUB_EPI16(step2[8], step3[11]); step1[12] = SUB_EPI16(step2[15], step3[12]); step1[13] = SUB_EPI16(step2[14], step3[13]); step1[14] = ADD_EPI16(step3[13], step2[14]); step1[15] = ADD_EPI16(step3[12], step2[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( &step1[0], &step1[1], &step1[2], &step1[3], &step1[4], &step1[5], &step1[6], &step1[7], &step1[8], &step1[9], &step1[10], &step1[11], &step1[12], &step1[13], &step1[14], &step1[15]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s1_05_0 = _mm_unpacklo_epi16(step3[6], step3[5]); const __m128i s1_05_1 = _mm_unpackhi_epi16(step3[6], step3[5]); const __m128i s1_05_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_m16); const __m128i s1_05_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_m16); const __m128i s1_06_2 = _mm_madd_epi16(s1_05_0, k__cospi_p16_p16); const __m128i s1_06_3 = _mm_madd_epi16(s1_05_1, k__cospi_p16_p16); // dct_const_round_shift const __m128i s1_05_4 = _mm_add_epi32(s1_05_2, k__DCT_CONST_ROUNDING); const __m128i s1_05_5 = _mm_add_epi32(s1_05_3, k__DCT_CONST_ROUNDING); const __m128i s1_06_4 = _mm_add_epi32(s1_06_2, k__DCT_CONST_ROUNDING); const __m128i s1_06_5 = _mm_add_epi32(s1_06_3, k__DCT_CONST_ROUNDING); const __m128i s1_05_6 = _mm_srai_epi32(s1_05_4, DCT_CONST_BITS); const __m128i s1_05_7 = _mm_srai_epi32(s1_05_5, DCT_CONST_BITS); const __m128i s1_06_6 = _mm_srai_epi32(s1_06_4, DCT_CONST_BITS); const __m128i s1_06_7 = _mm_srai_epi32(s1_06_5, DCT_CONST_BITS); // Combine step1[5] = _mm_packs_epi32(s1_05_6, s1_05_7); step1[6] = _mm_packs_epi32(s1_06_6, s1_06_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&step1[5], &step1[6]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s1_18_0 = _mm_unpacklo_epi16(step3[18], step3[29]); const __m128i s1_18_1 = _mm_unpackhi_epi16(step3[18], step3[29]); const __m128i s1_19_0 = _mm_unpacklo_epi16(step3[19], step3[28]); const __m128i s1_19_1 = _mm_unpackhi_epi16(step3[19], step3[28]); const __m128i s1_20_0 = _mm_unpacklo_epi16(step3[20], step3[27]); const __m128i s1_20_1 = _mm_unpackhi_epi16(step3[20], step3[27]); const __m128i s1_21_0 = _mm_unpacklo_epi16(step3[21], step3[26]); const __m128i s1_21_1 = _mm_unpackhi_epi16(step3[21], step3[26]); const __m128i s1_18_2 = _mm_madd_epi16(s1_18_0, k__cospi_m08_p24); const __m128i s1_18_3 = _mm_madd_epi16(s1_18_1, k__cospi_m08_p24); const __m128i s1_19_2 = _mm_madd_epi16(s1_19_0, k__cospi_m08_p24); const __m128i s1_19_3 = _mm_madd_epi16(s1_19_1, k__cospi_m08_p24); const __m128i s1_20_2 = _mm_madd_epi16(s1_20_0, k__cospi_m24_m08); const __m128i s1_20_3 = _mm_madd_epi16(s1_20_1, k__cospi_m24_m08); const __m128i s1_21_2 = _mm_madd_epi16(s1_21_0, k__cospi_m24_m08); const __m128i s1_21_3 = _mm_madd_epi16(s1_21_1, k__cospi_m24_m08); const __m128i s1_26_2 = _mm_madd_epi16(s1_21_0, k__cospi_m08_p24); const __m128i s1_26_3 = _mm_madd_epi16(s1_21_1, k__cospi_m08_p24); const __m128i s1_27_2 = _mm_madd_epi16(s1_20_0, k__cospi_m08_p24); const __m128i s1_27_3 = _mm_madd_epi16(s1_20_1, k__cospi_m08_p24); const __m128i s1_28_2 = _mm_madd_epi16(s1_19_0, k__cospi_p24_p08); const __m128i s1_28_3 = _mm_madd_epi16(s1_19_1, k__cospi_p24_p08); const __m128i s1_29_2 = _mm_madd_epi16(s1_18_0, k__cospi_p24_p08); const __m128i s1_29_3 = _mm_madd_epi16(s1_18_1, k__cospi_p24_p08); // dct_const_round_shift const __m128i s1_18_4 = _mm_add_epi32(s1_18_2, k__DCT_CONST_ROUNDING); const __m128i s1_18_5 = _mm_add_epi32(s1_18_3, k__DCT_CONST_ROUNDING); const __m128i s1_19_4 = _mm_add_epi32(s1_19_2, k__DCT_CONST_ROUNDING); const __m128i s1_19_5 = _mm_add_epi32(s1_19_3, k__DCT_CONST_ROUNDING); const __m128i s1_20_4 = _mm_add_epi32(s1_20_2, k__DCT_CONST_ROUNDING); const __m128i s1_20_5 = _mm_add_epi32(s1_20_3, k__DCT_CONST_ROUNDING); const __m128i s1_21_4 = _mm_add_epi32(s1_21_2, k__DCT_CONST_ROUNDING); const __m128i s1_21_5 = _mm_add_epi32(s1_21_3, k__DCT_CONST_ROUNDING); const __m128i s1_26_4 = _mm_add_epi32(s1_26_2, k__DCT_CONST_ROUNDING); const __m128i s1_26_5 = _mm_add_epi32(s1_26_3, k__DCT_CONST_ROUNDING); const __m128i s1_27_4 = _mm_add_epi32(s1_27_2, k__DCT_CONST_ROUNDING); const __m128i s1_27_5 = _mm_add_epi32(s1_27_3, k__DCT_CONST_ROUNDING); const __m128i s1_28_4 = _mm_add_epi32(s1_28_2, k__DCT_CONST_ROUNDING); const __m128i s1_28_5 = _mm_add_epi32(s1_28_3, k__DCT_CONST_ROUNDING); const __m128i s1_29_4 = _mm_add_epi32(s1_29_2, k__DCT_CONST_ROUNDING); const __m128i s1_29_5 = _mm_add_epi32(s1_29_3, k__DCT_CONST_ROUNDING); const __m128i s1_18_6 = _mm_srai_epi32(s1_18_4, DCT_CONST_BITS); const __m128i s1_18_7 = _mm_srai_epi32(s1_18_5, DCT_CONST_BITS); const __m128i s1_19_6 = _mm_srai_epi32(s1_19_4, DCT_CONST_BITS); const __m128i s1_19_7 = _mm_srai_epi32(s1_19_5, DCT_CONST_BITS); const __m128i s1_20_6 = _mm_srai_epi32(s1_20_4, DCT_CONST_BITS); const __m128i s1_20_7 = _mm_srai_epi32(s1_20_5, DCT_CONST_BITS); const __m128i s1_21_6 = _mm_srai_epi32(s1_21_4, DCT_CONST_BITS); const __m128i s1_21_7 = _mm_srai_epi32(s1_21_5, DCT_CONST_BITS); const __m128i s1_26_6 = _mm_srai_epi32(s1_26_4, DCT_CONST_BITS); const __m128i s1_26_7 = _mm_srai_epi32(s1_26_5, DCT_CONST_BITS); const __m128i s1_27_6 = _mm_srai_epi32(s1_27_4, DCT_CONST_BITS); const __m128i s1_27_7 = _mm_srai_epi32(s1_27_5, DCT_CONST_BITS); const __m128i s1_28_6 = _mm_srai_epi32(s1_28_4, DCT_CONST_BITS); const __m128i s1_28_7 = _mm_srai_epi32(s1_28_5, DCT_CONST_BITS); const __m128i s1_29_6 = _mm_srai_epi32(s1_29_4, DCT_CONST_BITS); const __m128i s1_29_7 = _mm_srai_epi32(s1_29_5, DCT_CONST_BITS); // Combine step1[18] = _mm_packs_epi32(s1_18_6, s1_18_7); step1[19] = _mm_packs_epi32(s1_19_6, s1_19_7); step1[20] = _mm_packs_epi32(s1_20_6, s1_20_7); step1[21] = _mm_packs_epi32(s1_21_6, s1_21_7); step1[26] = _mm_packs_epi32(s1_26_6, s1_26_7); step1[27] = _mm_packs_epi32(s1_27_6, s1_27_7); step1[28] = _mm_packs_epi32(s1_28_6, s1_28_7); step1[29] = _mm_packs_epi32(s1_29_6, s1_29_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1[18], &step1[19], &step1[20], &step1[21], &step1[26], &step1[27], &step1[28], &step1[29]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } // Stage 5 { step2[4] = ADD_EPI16(step1[5], step3[4]); step2[5] = SUB_EPI16(step3[4], step1[5]); step2[6] = SUB_EPI16(step3[7], step1[6]); step2[7] = ADD_EPI16(step1[6], step3[7]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&step2[4], &step2[5], &step2[6], &step2[7]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i out_00_0 = _mm_unpacklo_epi16(step1[0], step1[1]); const __m128i out_00_1 = _mm_unpackhi_epi16(step1[0], step1[1]); const __m128i out_08_0 = _mm_unpacklo_epi16(step1[2], step1[3]); const __m128i out_08_1 = _mm_unpackhi_epi16(step1[2], step1[3]); const __m128i out_00_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_p16); const __m128i out_00_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_p16); const __m128i out_16_2 = _mm_madd_epi16(out_00_0, k__cospi_p16_m16); const __m128i out_16_3 = _mm_madd_epi16(out_00_1, k__cospi_p16_m16); const __m128i out_08_2 = _mm_madd_epi16(out_08_0, k__cospi_p24_p08); const __m128i out_08_3 = _mm_madd_epi16(out_08_1, k__cospi_p24_p08); const __m128i out_24_2 = _mm_madd_epi16(out_08_0, k__cospi_m08_p24); const __m128i out_24_3 = _mm_madd_epi16(out_08_1, k__cospi_m08_p24); // dct_const_round_shift const __m128i out_00_4 = _mm_add_epi32(out_00_2, k__DCT_CONST_ROUNDING); const __m128i out_00_5 = _mm_add_epi32(out_00_3, k__DCT_CONST_ROUNDING); const __m128i out_16_4 = _mm_add_epi32(out_16_2, k__DCT_CONST_ROUNDING); const __m128i out_16_5 = _mm_add_epi32(out_16_3, k__DCT_CONST_ROUNDING); const __m128i out_08_4 = _mm_add_epi32(out_08_2, k__DCT_CONST_ROUNDING); const __m128i out_08_5 = _mm_add_epi32(out_08_3, k__DCT_CONST_ROUNDING); const __m128i out_24_4 = _mm_add_epi32(out_24_2, k__DCT_CONST_ROUNDING); const __m128i out_24_5 = _mm_add_epi32(out_24_3, k__DCT_CONST_ROUNDING); const __m128i out_00_6 = _mm_srai_epi32(out_00_4, DCT_CONST_BITS); const __m128i out_00_7 = _mm_srai_epi32(out_00_5, DCT_CONST_BITS); const __m128i out_16_6 = _mm_srai_epi32(out_16_4, DCT_CONST_BITS); const __m128i out_16_7 = _mm_srai_epi32(out_16_5, DCT_CONST_BITS); const __m128i out_08_6 = _mm_srai_epi32(out_08_4, DCT_CONST_BITS); const __m128i out_08_7 = _mm_srai_epi32(out_08_5, DCT_CONST_BITS); const __m128i out_24_6 = _mm_srai_epi32(out_24_4, DCT_CONST_BITS); const __m128i out_24_7 = _mm_srai_epi32(out_24_5, DCT_CONST_BITS); // Combine out[0] = _mm_packs_epi32(out_00_6, out_00_7); out[16] = _mm_packs_epi32(out_16_6, out_16_7); out[8] = _mm_packs_epi32(out_08_6, out_08_7); out[24] = _mm_packs_epi32(out_24_6, out_24_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s2_09_0 = _mm_unpacklo_epi16(step1[9], step1[14]); const __m128i s2_09_1 = _mm_unpackhi_epi16(step1[9], step1[14]); const __m128i s2_10_0 = _mm_unpacklo_epi16(step1[10], step1[13]); const __m128i s2_10_1 = _mm_unpackhi_epi16(step1[10], step1[13]); const __m128i s2_09_2 = _mm_madd_epi16(s2_09_0, k__cospi_m08_p24); const __m128i s2_09_3 = _mm_madd_epi16(s2_09_1, k__cospi_m08_p24); const __m128i s2_10_2 = _mm_madd_epi16(s2_10_0, k__cospi_m24_m08); const __m128i s2_10_3 = _mm_madd_epi16(s2_10_1, k__cospi_m24_m08); const __m128i s2_13_2 = _mm_madd_epi16(s2_10_0, k__cospi_m08_p24); const __m128i s2_13_3 = _mm_madd_epi16(s2_10_1, k__cospi_m08_p24); const __m128i s2_14_2 = _mm_madd_epi16(s2_09_0, k__cospi_p24_p08); const __m128i s2_14_3 = _mm_madd_epi16(s2_09_1, k__cospi_p24_p08); // dct_const_round_shift const __m128i s2_09_4 = _mm_add_epi32(s2_09_2, k__DCT_CONST_ROUNDING); const __m128i s2_09_5 = _mm_add_epi32(s2_09_3, k__DCT_CONST_ROUNDING); const __m128i s2_10_4 = _mm_add_epi32(s2_10_2, k__DCT_CONST_ROUNDING); const __m128i s2_10_5 = _mm_add_epi32(s2_10_3, k__DCT_CONST_ROUNDING); const __m128i s2_13_4 = _mm_add_epi32(s2_13_2, k__DCT_CONST_ROUNDING); const __m128i s2_13_5 = _mm_add_epi32(s2_13_3, k__DCT_CONST_ROUNDING); const __m128i s2_14_4 = _mm_add_epi32(s2_14_2, k__DCT_CONST_ROUNDING); const __m128i s2_14_5 = _mm_add_epi32(s2_14_3, k__DCT_CONST_ROUNDING); const __m128i s2_09_6 = _mm_srai_epi32(s2_09_4, DCT_CONST_BITS); const __m128i s2_09_7 = _mm_srai_epi32(s2_09_5, DCT_CONST_BITS); const __m128i s2_10_6 = _mm_srai_epi32(s2_10_4, DCT_CONST_BITS); const __m128i s2_10_7 = _mm_srai_epi32(s2_10_5, DCT_CONST_BITS); const __m128i s2_13_6 = _mm_srai_epi32(s2_13_4, DCT_CONST_BITS); const __m128i s2_13_7 = _mm_srai_epi32(s2_13_5, DCT_CONST_BITS); const __m128i s2_14_6 = _mm_srai_epi32(s2_14_4, DCT_CONST_BITS); const __m128i s2_14_7 = _mm_srai_epi32(s2_14_5, DCT_CONST_BITS); // Combine step2[9] = _mm_packs_epi32(s2_09_6, s2_09_7); step2[10] = _mm_packs_epi32(s2_10_6, s2_10_7); step2[13] = _mm_packs_epi32(s2_13_6, s2_13_7); step2[14] = _mm_packs_epi32(s2_14_6, s2_14_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&step2[9], &step2[10], &step2[13], &step2[14]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { step2[16] = ADD_EPI16(step1[19], step3[16]); step2[17] = ADD_EPI16(step1[18], step3[17]); step2[18] = SUB_EPI16(step3[17], step1[18]); step2[19] = SUB_EPI16(step3[16], step1[19]); step2[20] = SUB_EPI16(step3[23], step1[20]); step2[21] = SUB_EPI16(step3[22], step1[21]); step2[22] = ADD_EPI16(step1[21], step3[22]); step2[23] = ADD_EPI16(step1[20], step3[23]); step2[24] = ADD_EPI16(step1[27], step3[24]); step2[25] = ADD_EPI16(step1[26], step3[25]); step2[26] = SUB_EPI16(step3[25], step1[26]); step2[27] = SUB_EPI16(step3[24], step1[27]); step2[28] = SUB_EPI16(step3[31], step1[28]); step2[29] = SUB_EPI16(step3[30], step1[29]); step2[30] = ADD_EPI16(step1[29], step3[30]); step2[31] = ADD_EPI16(step1[28], step3[31]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( &step2[16], &step2[17], &step2[18], &step2[19], &step2[20], &step2[21], &step2[22], &step2[23], &step2[24], &step2[25], &step2[26], &step2[27], &step2[28], &step2[29], &step2[30], &step2[31]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } // Stage 6 { const __m128i out_04_0 = _mm_unpacklo_epi16(step2[4], step2[7]); const __m128i out_04_1 = _mm_unpackhi_epi16(step2[4], step2[7]); const __m128i out_20_0 = _mm_unpacklo_epi16(step2[5], step2[6]); const __m128i out_20_1 = _mm_unpackhi_epi16(step2[5], step2[6]); const __m128i out_12_0 = _mm_unpacklo_epi16(step2[5], step2[6]); const __m128i out_12_1 = _mm_unpackhi_epi16(step2[5], step2[6]); const __m128i out_28_0 = _mm_unpacklo_epi16(step2[4], step2[7]); const __m128i out_28_1 = _mm_unpackhi_epi16(step2[4], step2[7]); const __m128i out_04_2 = _mm_madd_epi16(out_04_0, k__cospi_p28_p04); const __m128i out_04_3 = _mm_madd_epi16(out_04_1, k__cospi_p28_p04); const __m128i out_20_2 = _mm_madd_epi16(out_20_0, k__cospi_p12_p20); const __m128i out_20_3 = _mm_madd_epi16(out_20_1, k__cospi_p12_p20); const __m128i out_12_2 = _mm_madd_epi16(out_12_0, k__cospi_m20_p12); const __m128i out_12_3 = _mm_madd_epi16(out_12_1, k__cospi_m20_p12); const __m128i out_28_2 = _mm_madd_epi16(out_28_0, k__cospi_m04_p28); const __m128i out_28_3 = _mm_madd_epi16(out_28_1, k__cospi_m04_p28); // dct_const_round_shift const __m128i out_04_4 = _mm_add_epi32(out_04_2, k__DCT_CONST_ROUNDING); const __m128i out_04_5 = _mm_add_epi32(out_04_3, k__DCT_CONST_ROUNDING); const __m128i out_20_4 = _mm_add_epi32(out_20_2, k__DCT_CONST_ROUNDING); const __m128i out_20_5 = _mm_add_epi32(out_20_3, k__DCT_CONST_ROUNDING); const __m128i out_12_4 = _mm_add_epi32(out_12_2, k__DCT_CONST_ROUNDING); const __m128i out_12_5 = _mm_add_epi32(out_12_3, k__DCT_CONST_ROUNDING); const __m128i out_28_4 = _mm_add_epi32(out_28_2, k__DCT_CONST_ROUNDING); const __m128i out_28_5 = _mm_add_epi32(out_28_3, k__DCT_CONST_ROUNDING); const __m128i out_04_6 = _mm_srai_epi32(out_04_4, DCT_CONST_BITS); const __m128i out_04_7 = _mm_srai_epi32(out_04_5, DCT_CONST_BITS); const __m128i out_20_6 = _mm_srai_epi32(out_20_4, DCT_CONST_BITS); const __m128i out_20_7 = _mm_srai_epi32(out_20_5, DCT_CONST_BITS); const __m128i out_12_6 = _mm_srai_epi32(out_12_4, DCT_CONST_BITS); const __m128i out_12_7 = _mm_srai_epi32(out_12_5, DCT_CONST_BITS); const __m128i out_28_6 = _mm_srai_epi32(out_28_4, DCT_CONST_BITS); const __m128i out_28_7 = _mm_srai_epi32(out_28_5, DCT_CONST_BITS); // Combine out[4] = _mm_packs_epi32(out_04_6, out_04_7); out[20] = _mm_packs_epi32(out_20_6, out_20_7); out[12] = _mm_packs_epi32(out_12_6, out_12_7); out[28] = _mm_packs_epi32(out_28_6, out_28_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { step3[8] = ADD_EPI16(step2[9], step1[8]); step3[9] = SUB_EPI16(step1[8], step2[9]); step3[10] = SUB_EPI16(step1[11], step2[10]); step3[11] = ADD_EPI16(step2[10], step1[11]); step3[12] = ADD_EPI16(step2[13], step1[12]); step3[13] = SUB_EPI16(step1[12], step2[13]); step3[14] = SUB_EPI16(step1[15], step2[14]); step3[15] = ADD_EPI16(step2[14], step1[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step3[8], &step3[9], &step3[10], &step3[11], &step3[12], &step3[13], &step3[14], &step3[15]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i s3_17_0 = _mm_unpacklo_epi16(step2[17], step2[30]); const __m128i s3_17_1 = _mm_unpackhi_epi16(step2[17], step2[30]); const __m128i s3_18_0 = _mm_unpacklo_epi16(step2[18], step2[29]); const __m128i s3_18_1 = _mm_unpackhi_epi16(step2[18], step2[29]); const __m128i s3_21_0 = _mm_unpacklo_epi16(step2[21], step2[26]); const __m128i s3_21_1 = _mm_unpackhi_epi16(step2[21], step2[26]); const __m128i s3_22_0 = _mm_unpacklo_epi16(step2[22], step2[25]); const __m128i s3_22_1 = _mm_unpackhi_epi16(step2[22], step2[25]); const __m128i s3_17_2 = _mm_madd_epi16(s3_17_0, k__cospi_m04_p28); const __m128i s3_17_3 = _mm_madd_epi16(s3_17_1, k__cospi_m04_p28); const __m128i s3_18_2 = _mm_madd_epi16(s3_18_0, k__cospi_m28_m04); const __m128i s3_18_3 = _mm_madd_epi16(s3_18_1, k__cospi_m28_m04); const __m128i s3_21_2 = _mm_madd_epi16(s3_21_0, k__cospi_m20_p12); const __m128i s3_21_3 = _mm_madd_epi16(s3_21_1, k__cospi_m20_p12); const __m128i s3_22_2 = _mm_madd_epi16(s3_22_0, k__cospi_m12_m20); const __m128i s3_22_3 = _mm_madd_epi16(s3_22_1, k__cospi_m12_m20); const __m128i s3_25_2 = _mm_madd_epi16(s3_22_0, k__cospi_m20_p12); const __m128i s3_25_3 = _mm_madd_epi16(s3_22_1, k__cospi_m20_p12); const __m128i s3_26_2 = _mm_madd_epi16(s3_21_0, k__cospi_p12_p20); const __m128i s3_26_3 = _mm_madd_epi16(s3_21_1, k__cospi_p12_p20); const __m128i s3_29_2 = _mm_madd_epi16(s3_18_0, k__cospi_m04_p28); const __m128i s3_29_3 = _mm_madd_epi16(s3_18_1, k__cospi_m04_p28); const __m128i s3_30_2 = _mm_madd_epi16(s3_17_0, k__cospi_p28_p04); const __m128i s3_30_3 = _mm_madd_epi16(s3_17_1, k__cospi_p28_p04); // dct_const_round_shift const __m128i s3_17_4 = _mm_add_epi32(s3_17_2, k__DCT_CONST_ROUNDING); const __m128i s3_17_5 = _mm_add_epi32(s3_17_3, k__DCT_CONST_ROUNDING); const __m128i s3_18_4 = _mm_add_epi32(s3_18_2, k__DCT_CONST_ROUNDING); const __m128i s3_18_5 = _mm_add_epi32(s3_18_3, k__DCT_CONST_ROUNDING); const __m128i s3_21_4 = _mm_add_epi32(s3_21_2, k__DCT_CONST_ROUNDING); const __m128i s3_21_5 = _mm_add_epi32(s3_21_3, k__DCT_CONST_ROUNDING); const __m128i s3_22_4 = _mm_add_epi32(s3_22_2, k__DCT_CONST_ROUNDING); const __m128i s3_22_5 = _mm_add_epi32(s3_22_3, k__DCT_CONST_ROUNDING); const __m128i s3_17_6 = _mm_srai_epi32(s3_17_4, DCT_CONST_BITS); const __m128i s3_17_7 = _mm_srai_epi32(s3_17_5, DCT_CONST_BITS); const __m128i s3_18_6 = _mm_srai_epi32(s3_18_4, DCT_CONST_BITS); const __m128i s3_18_7 = _mm_srai_epi32(s3_18_5, DCT_CONST_BITS); const __m128i s3_21_6 = _mm_srai_epi32(s3_21_4, DCT_CONST_BITS); const __m128i s3_21_7 = _mm_srai_epi32(s3_21_5, DCT_CONST_BITS); const __m128i s3_22_6 = _mm_srai_epi32(s3_22_4, DCT_CONST_BITS); const __m128i s3_22_7 = _mm_srai_epi32(s3_22_5, DCT_CONST_BITS); const __m128i s3_25_4 = _mm_add_epi32(s3_25_2, k__DCT_CONST_ROUNDING); const __m128i s3_25_5 = _mm_add_epi32(s3_25_3, k__DCT_CONST_ROUNDING); const __m128i s3_26_4 = _mm_add_epi32(s3_26_2, k__DCT_CONST_ROUNDING); const __m128i s3_26_5 = _mm_add_epi32(s3_26_3, k__DCT_CONST_ROUNDING); const __m128i s3_29_4 = _mm_add_epi32(s3_29_2, k__DCT_CONST_ROUNDING); const __m128i s3_29_5 = _mm_add_epi32(s3_29_3, k__DCT_CONST_ROUNDING); const __m128i s3_30_4 = _mm_add_epi32(s3_30_2, k__DCT_CONST_ROUNDING); const __m128i s3_30_5 = _mm_add_epi32(s3_30_3, k__DCT_CONST_ROUNDING); const __m128i s3_25_6 = _mm_srai_epi32(s3_25_4, DCT_CONST_BITS); const __m128i s3_25_7 = _mm_srai_epi32(s3_25_5, DCT_CONST_BITS); const __m128i s3_26_6 = _mm_srai_epi32(s3_26_4, DCT_CONST_BITS); const __m128i s3_26_7 = _mm_srai_epi32(s3_26_5, DCT_CONST_BITS); const __m128i s3_29_6 = _mm_srai_epi32(s3_29_4, DCT_CONST_BITS); const __m128i s3_29_7 = _mm_srai_epi32(s3_29_5, DCT_CONST_BITS); const __m128i s3_30_6 = _mm_srai_epi32(s3_30_4, DCT_CONST_BITS); const __m128i s3_30_7 = _mm_srai_epi32(s3_30_5, DCT_CONST_BITS); // Combine step3[17] = _mm_packs_epi32(s3_17_6, s3_17_7); step3[18] = _mm_packs_epi32(s3_18_6, s3_18_7); step3[21] = _mm_packs_epi32(s3_21_6, s3_21_7); step3[22] = _mm_packs_epi32(s3_22_6, s3_22_7); // Combine step3[25] = _mm_packs_epi32(s3_25_6, s3_25_7); step3[26] = _mm_packs_epi32(s3_26_6, s3_26_7); step3[29] = _mm_packs_epi32(s3_29_6, s3_29_7); step3[30] = _mm_packs_epi32(s3_30_6, s3_30_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step3[17], &step3[18], &step3[21], &step3[22], &step3[25], &step3[26], &step3[29], &step3[30]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } // Stage 7 { const __m128i out_02_0 = _mm_unpacklo_epi16(step3[8], step3[15]); const __m128i out_02_1 = _mm_unpackhi_epi16(step3[8], step3[15]); const __m128i out_18_0 = _mm_unpacklo_epi16(step3[9], step3[14]); const __m128i out_18_1 = _mm_unpackhi_epi16(step3[9], step3[14]); const __m128i out_10_0 = _mm_unpacklo_epi16(step3[10], step3[13]); const __m128i out_10_1 = _mm_unpackhi_epi16(step3[10], step3[13]); const __m128i out_26_0 = _mm_unpacklo_epi16(step3[11], step3[12]); const __m128i out_26_1 = _mm_unpackhi_epi16(step3[11], step3[12]); const __m128i out_02_2 = _mm_madd_epi16(out_02_0, k__cospi_p30_p02); const __m128i out_02_3 = _mm_madd_epi16(out_02_1, k__cospi_p30_p02); const __m128i out_18_2 = _mm_madd_epi16(out_18_0, k__cospi_p14_p18); const __m128i out_18_3 = _mm_madd_epi16(out_18_1, k__cospi_p14_p18); const __m128i out_10_2 = _mm_madd_epi16(out_10_0, k__cospi_p22_p10); const __m128i out_10_3 = _mm_madd_epi16(out_10_1, k__cospi_p22_p10); const __m128i out_26_2 = _mm_madd_epi16(out_26_0, k__cospi_p06_p26); const __m128i out_26_3 = _mm_madd_epi16(out_26_1, k__cospi_p06_p26); const __m128i out_06_2 = _mm_madd_epi16(out_26_0, k__cospi_m26_p06); const __m128i out_06_3 = _mm_madd_epi16(out_26_1, k__cospi_m26_p06); const __m128i out_22_2 = _mm_madd_epi16(out_10_0, k__cospi_m10_p22); const __m128i out_22_3 = _mm_madd_epi16(out_10_1, k__cospi_m10_p22); const __m128i out_14_2 = _mm_madd_epi16(out_18_0, k__cospi_m18_p14); const __m128i out_14_3 = _mm_madd_epi16(out_18_1, k__cospi_m18_p14); const __m128i out_30_2 = _mm_madd_epi16(out_02_0, k__cospi_m02_p30); const __m128i out_30_3 = _mm_madd_epi16(out_02_1, k__cospi_m02_p30); // dct_const_round_shift const __m128i out_02_4 = _mm_add_epi32(out_02_2, k__DCT_CONST_ROUNDING); const __m128i out_02_5 = _mm_add_epi32(out_02_3, k__DCT_CONST_ROUNDING); const __m128i out_18_4 = _mm_add_epi32(out_18_2, k__DCT_CONST_ROUNDING); const __m128i out_18_5 = _mm_add_epi32(out_18_3, k__DCT_CONST_ROUNDING); const __m128i out_10_4 = _mm_add_epi32(out_10_2, k__DCT_CONST_ROUNDING); const __m128i out_10_5 = _mm_add_epi32(out_10_3, k__DCT_CONST_ROUNDING); const __m128i out_26_4 = _mm_add_epi32(out_26_2, k__DCT_CONST_ROUNDING); const __m128i out_26_5 = _mm_add_epi32(out_26_3, k__DCT_CONST_ROUNDING); const __m128i out_06_4 = _mm_add_epi32(out_06_2, k__DCT_CONST_ROUNDING); const __m128i out_06_5 = _mm_add_epi32(out_06_3, k__DCT_CONST_ROUNDING); const __m128i out_22_4 = _mm_add_epi32(out_22_2, k__DCT_CONST_ROUNDING); const __m128i out_22_5 = _mm_add_epi32(out_22_3, k__DCT_CONST_ROUNDING); const __m128i out_14_4 = _mm_add_epi32(out_14_2, k__DCT_CONST_ROUNDING); const __m128i out_14_5 = _mm_add_epi32(out_14_3, k__DCT_CONST_ROUNDING); const __m128i out_30_4 = _mm_add_epi32(out_30_2, k__DCT_CONST_ROUNDING); const __m128i out_30_5 = _mm_add_epi32(out_30_3, k__DCT_CONST_ROUNDING); const __m128i out_02_6 = _mm_srai_epi32(out_02_4, DCT_CONST_BITS); const __m128i out_02_7 = _mm_srai_epi32(out_02_5, DCT_CONST_BITS); const __m128i out_18_6 = _mm_srai_epi32(out_18_4, DCT_CONST_BITS); const __m128i out_18_7 = _mm_srai_epi32(out_18_5, DCT_CONST_BITS); const __m128i out_10_6 = _mm_srai_epi32(out_10_4, DCT_CONST_BITS); const __m128i out_10_7 = _mm_srai_epi32(out_10_5, DCT_CONST_BITS); const __m128i out_26_6 = _mm_srai_epi32(out_26_4, DCT_CONST_BITS); const __m128i out_26_7 = _mm_srai_epi32(out_26_5, DCT_CONST_BITS); const __m128i out_06_6 = _mm_srai_epi32(out_06_4, DCT_CONST_BITS); const __m128i out_06_7 = _mm_srai_epi32(out_06_5, DCT_CONST_BITS); const __m128i out_22_6 = _mm_srai_epi32(out_22_4, DCT_CONST_BITS); const __m128i out_22_7 = _mm_srai_epi32(out_22_5, DCT_CONST_BITS); const __m128i out_14_6 = _mm_srai_epi32(out_14_4, DCT_CONST_BITS); const __m128i out_14_7 = _mm_srai_epi32(out_14_5, DCT_CONST_BITS); const __m128i out_30_6 = _mm_srai_epi32(out_30_4, DCT_CONST_BITS); const __m128i out_30_7 = _mm_srai_epi32(out_30_5, DCT_CONST_BITS); // Combine out[2] = _mm_packs_epi32(out_02_6, out_02_7); out[18] = _mm_packs_epi32(out_18_6, out_18_7); out[10] = _mm_packs_epi32(out_10_6, out_10_7); out[26] = _mm_packs_epi32(out_26_6, out_26_7); out[6] = _mm_packs_epi32(out_06_6, out_06_7); out[22] = _mm_packs_epi32(out_22_6, out_22_7); out[14] = _mm_packs_epi32(out_14_6, out_14_7); out[30] = _mm_packs_epi32(out_30_6, out_30_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26], &out[6], &out[22], &out[14], &out[30]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { step1[16] = ADD_EPI16(step3[17], step2[16]); step1[17] = SUB_EPI16(step2[16], step3[17]); step1[18] = SUB_EPI16(step2[19], step3[18]); step1[19] = ADD_EPI16(step3[18], step2[19]); step1[20] = ADD_EPI16(step3[21], step2[20]); step1[21] = SUB_EPI16(step2[20], step3[21]); step1[22] = SUB_EPI16(step2[23], step3[22]); step1[23] = ADD_EPI16(step3[22], step2[23]); step1[24] = ADD_EPI16(step3[25], step2[24]); step1[25] = SUB_EPI16(step2[24], step3[25]); step1[26] = SUB_EPI16(step2[27], step3[26]); step1[27] = ADD_EPI16(step3[26], step2[27]); step1[28] = ADD_EPI16(step3[29], step2[28]); step1[29] = SUB_EPI16(step2[28], step3[29]); step1[30] = SUB_EPI16(step2[31], step3[30]); step1[31] = ADD_EPI16(step3[30], step2[31]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x16( &step1[16], &step1[17], &step1[18], &step1[19], &step1[20], &step1[21], &step1[22], &step1[23], &step1[24], &step1[25], &step1[26], &step1[27], &step1[28], &step1[29], &step1[30], &step1[31]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } // Final stage --- outputs indices are bit-reversed. { const __m128i out_01_0 = _mm_unpacklo_epi16(step1[16], step1[31]); const __m128i out_01_1 = _mm_unpackhi_epi16(step1[16], step1[31]); const __m128i out_17_0 = _mm_unpacklo_epi16(step1[17], step1[30]); const __m128i out_17_1 = _mm_unpackhi_epi16(step1[17], step1[30]); const __m128i out_09_0 = _mm_unpacklo_epi16(step1[18], step1[29]); const __m128i out_09_1 = _mm_unpackhi_epi16(step1[18], step1[29]); const __m128i out_25_0 = _mm_unpacklo_epi16(step1[19], step1[28]); const __m128i out_25_1 = _mm_unpackhi_epi16(step1[19], step1[28]); const __m128i out_01_2 = _mm_madd_epi16(out_01_0, k__cospi_p31_p01); const __m128i out_01_3 = _mm_madd_epi16(out_01_1, k__cospi_p31_p01); const __m128i out_17_2 = _mm_madd_epi16(out_17_0, k__cospi_p15_p17); const __m128i out_17_3 = _mm_madd_epi16(out_17_1, k__cospi_p15_p17); const __m128i out_09_2 = _mm_madd_epi16(out_09_0, k__cospi_p23_p09); const __m128i out_09_3 = _mm_madd_epi16(out_09_1, k__cospi_p23_p09); const __m128i out_25_2 = _mm_madd_epi16(out_25_0, k__cospi_p07_p25); const __m128i out_25_3 = _mm_madd_epi16(out_25_1, k__cospi_p07_p25); const __m128i out_07_2 = _mm_madd_epi16(out_25_0, k__cospi_m25_p07); const __m128i out_07_3 = _mm_madd_epi16(out_25_1, k__cospi_m25_p07); const __m128i out_23_2 = _mm_madd_epi16(out_09_0, k__cospi_m09_p23); const __m128i out_23_3 = _mm_madd_epi16(out_09_1, k__cospi_m09_p23); const __m128i out_15_2 = _mm_madd_epi16(out_17_0, k__cospi_m17_p15); const __m128i out_15_3 = _mm_madd_epi16(out_17_1, k__cospi_m17_p15); const __m128i out_31_2 = _mm_madd_epi16(out_01_0, k__cospi_m01_p31); const __m128i out_31_3 = _mm_madd_epi16(out_01_1, k__cospi_m01_p31); // dct_const_round_shift const __m128i out_01_4 = _mm_add_epi32(out_01_2, k__DCT_CONST_ROUNDING); const __m128i out_01_5 = _mm_add_epi32(out_01_3, k__DCT_CONST_ROUNDING); const __m128i out_17_4 = _mm_add_epi32(out_17_2, k__DCT_CONST_ROUNDING); const __m128i out_17_5 = _mm_add_epi32(out_17_3, k__DCT_CONST_ROUNDING); const __m128i out_09_4 = _mm_add_epi32(out_09_2, k__DCT_CONST_ROUNDING); const __m128i out_09_5 = _mm_add_epi32(out_09_3, k__DCT_CONST_ROUNDING); const __m128i out_25_4 = _mm_add_epi32(out_25_2, k__DCT_CONST_ROUNDING); const __m128i out_25_5 = _mm_add_epi32(out_25_3, k__DCT_CONST_ROUNDING); const __m128i out_07_4 = _mm_add_epi32(out_07_2, k__DCT_CONST_ROUNDING); const __m128i out_07_5 = _mm_add_epi32(out_07_3, k__DCT_CONST_ROUNDING); const __m128i out_23_4 = _mm_add_epi32(out_23_2, k__DCT_CONST_ROUNDING); const __m128i out_23_5 = _mm_add_epi32(out_23_3, k__DCT_CONST_ROUNDING); const __m128i out_15_4 = _mm_add_epi32(out_15_2, k__DCT_CONST_ROUNDING); const __m128i out_15_5 = _mm_add_epi32(out_15_3, k__DCT_CONST_ROUNDING); const __m128i out_31_4 = _mm_add_epi32(out_31_2, k__DCT_CONST_ROUNDING); const __m128i out_31_5 = _mm_add_epi32(out_31_3, k__DCT_CONST_ROUNDING); const __m128i out_01_6 = _mm_srai_epi32(out_01_4, DCT_CONST_BITS); const __m128i out_01_7 = _mm_srai_epi32(out_01_5, DCT_CONST_BITS); const __m128i out_17_6 = _mm_srai_epi32(out_17_4, DCT_CONST_BITS); const __m128i out_17_7 = _mm_srai_epi32(out_17_5, DCT_CONST_BITS); const __m128i out_09_6 = _mm_srai_epi32(out_09_4, DCT_CONST_BITS); const __m128i out_09_7 = _mm_srai_epi32(out_09_5, DCT_CONST_BITS); const __m128i out_25_6 = _mm_srai_epi32(out_25_4, DCT_CONST_BITS); const __m128i out_25_7 = _mm_srai_epi32(out_25_5, DCT_CONST_BITS); const __m128i out_07_6 = _mm_srai_epi32(out_07_4, DCT_CONST_BITS); const __m128i out_07_7 = _mm_srai_epi32(out_07_5, DCT_CONST_BITS); const __m128i out_23_6 = _mm_srai_epi32(out_23_4, DCT_CONST_BITS); const __m128i out_23_7 = _mm_srai_epi32(out_23_5, DCT_CONST_BITS); const __m128i out_15_6 = _mm_srai_epi32(out_15_4, DCT_CONST_BITS); const __m128i out_15_7 = _mm_srai_epi32(out_15_5, DCT_CONST_BITS); const __m128i out_31_6 = _mm_srai_epi32(out_31_4, DCT_CONST_BITS); const __m128i out_31_7 = _mm_srai_epi32(out_31_5, DCT_CONST_BITS); // Combine out[1] = _mm_packs_epi32(out_01_6, out_01_7); out[17] = _mm_packs_epi32(out_17_6, out_17_7); out[9] = _mm_packs_epi32(out_09_6, out_09_7); out[25] = _mm_packs_epi32(out_25_6, out_25_7); out[7] = _mm_packs_epi32(out_07_6, out_07_7); out[23] = _mm_packs_epi32(out_23_6, out_23_7); out[15] = _mm_packs_epi32(out_15_6, out_15_7); out[31] = _mm_packs_epi32(out_31_6, out_31_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25], &out[7], &out[23], &out[15], &out[31]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i out_05_0 = _mm_unpacklo_epi16(step1[20], step1[27]); const __m128i out_05_1 = _mm_unpackhi_epi16(step1[20], step1[27]); const __m128i out_21_0 = _mm_unpacklo_epi16(step1[21], step1[26]); const __m128i out_21_1 = _mm_unpackhi_epi16(step1[21], step1[26]); const __m128i out_13_0 = _mm_unpacklo_epi16(step1[22], step1[25]); const __m128i out_13_1 = _mm_unpackhi_epi16(step1[22], step1[25]); const __m128i out_29_0 = _mm_unpacklo_epi16(step1[23], step1[24]); const __m128i out_29_1 = _mm_unpackhi_epi16(step1[23], step1[24]); const __m128i out_05_2 = _mm_madd_epi16(out_05_0, k__cospi_p27_p05); const __m128i out_05_3 = _mm_madd_epi16(out_05_1, k__cospi_p27_p05); const __m128i out_21_2 = _mm_madd_epi16(out_21_0, k__cospi_p11_p21); const __m128i out_21_3 = _mm_madd_epi16(out_21_1, k__cospi_p11_p21); const __m128i out_13_2 = _mm_madd_epi16(out_13_0, k__cospi_p19_p13); const __m128i out_13_3 = _mm_madd_epi16(out_13_1, k__cospi_p19_p13); const __m128i out_29_2 = _mm_madd_epi16(out_29_0, k__cospi_p03_p29); const __m128i out_29_3 = _mm_madd_epi16(out_29_1, k__cospi_p03_p29); const __m128i out_03_2 = _mm_madd_epi16(out_29_0, k__cospi_m29_p03); const __m128i out_03_3 = _mm_madd_epi16(out_29_1, k__cospi_m29_p03); const __m128i out_19_2 = _mm_madd_epi16(out_13_0, k__cospi_m13_p19); const __m128i out_19_3 = _mm_madd_epi16(out_13_1, k__cospi_m13_p19); const __m128i out_11_2 = _mm_madd_epi16(out_21_0, k__cospi_m21_p11); const __m128i out_11_3 = _mm_madd_epi16(out_21_1, k__cospi_m21_p11); const __m128i out_27_2 = _mm_madd_epi16(out_05_0, k__cospi_m05_p27); const __m128i out_27_3 = _mm_madd_epi16(out_05_1, k__cospi_m05_p27); // dct_const_round_shift const __m128i out_05_4 = _mm_add_epi32(out_05_2, k__DCT_CONST_ROUNDING); const __m128i out_05_5 = _mm_add_epi32(out_05_3, k__DCT_CONST_ROUNDING); const __m128i out_21_4 = _mm_add_epi32(out_21_2, k__DCT_CONST_ROUNDING); const __m128i out_21_5 = _mm_add_epi32(out_21_3, k__DCT_CONST_ROUNDING); const __m128i out_13_4 = _mm_add_epi32(out_13_2, k__DCT_CONST_ROUNDING); const __m128i out_13_5 = _mm_add_epi32(out_13_3, k__DCT_CONST_ROUNDING); const __m128i out_29_4 = _mm_add_epi32(out_29_2, k__DCT_CONST_ROUNDING); const __m128i out_29_5 = _mm_add_epi32(out_29_3, k__DCT_CONST_ROUNDING); const __m128i out_03_4 = _mm_add_epi32(out_03_2, k__DCT_CONST_ROUNDING); const __m128i out_03_5 = _mm_add_epi32(out_03_3, k__DCT_CONST_ROUNDING); const __m128i out_19_4 = _mm_add_epi32(out_19_2, k__DCT_CONST_ROUNDING); const __m128i out_19_5 = _mm_add_epi32(out_19_3, k__DCT_CONST_ROUNDING); const __m128i out_11_4 = _mm_add_epi32(out_11_2, k__DCT_CONST_ROUNDING); const __m128i out_11_5 = _mm_add_epi32(out_11_3, k__DCT_CONST_ROUNDING); const __m128i out_27_4 = _mm_add_epi32(out_27_2, k__DCT_CONST_ROUNDING); const __m128i out_27_5 = _mm_add_epi32(out_27_3, k__DCT_CONST_ROUNDING); const __m128i out_05_6 = _mm_srai_epi32(out_05_4, DCT_CONST_BITS); const __m128i out_05_7 = _mm_srai_epi32(out_05_5, DCT_CONST_BITS); const __m128i out_21_6 = _mm_srai_epi32(out_21_4, DCT_CONST_BITS); const __m128i out_21_7 = _mm_srai_epi32(out_21_5, DCT_CONST_BITS); const __m128i out_13_6 = _mm_srai_epi32(out_13_4, DCT_CONST_BITS); const __m128i out_13_7 = _mm_srai_epi32(out_13_5, DCT_CONST_BITS); const __m128i out_29_6 = _mm_srai_epi32(out_29_4, DCT_CONST_BITS); const __m128i out_29_7 = _mm_srai_epi32(out_29_5, DCT_CONST_BITS); const __m128i out_03_6 = _mm_srai_epi32(out_03_4, DCT_CONST_BITS); const __m128i out_03_7 = _mm_srai_epi32(out_03_5, DCT_CONST_BITS); const __m128i out_19_6 = _mm_srai_epi32(out_19_4, DCT_CONST_BITS); const __m128i out_19_7 = _mm_srai_epi32(out_19_5, DCT_CONST_BITS); const __m128i out_11_6 = _mm_srai_epi32(out_11_4, DCT_CONST_BITS); const __m128i out_11_7 = _mm_srai_epi32(out_11_5, DCT_CONST_BITS); const __m128i out_27_6 = _mm_srai_epi32(out_27_4, DCT_CONST_BITS); const __m128i out_27_7 = _mm_srai_epi32(out_27_5, DCT_CONST_BITS); // Combine out[5] = _mm_packs_epi32(out_05_6, out_05_7); out[21] = _mm_packs_epi32(out_21_6, out_21_7); out[13] = _mm_packs_epi32(out_13_6, out_13_7); out[29] = _mm_packs_epi32(out_29_6, out_29_7); out[3] = _mm_packs_epi32(out_03_6, out_03_7); out[19] = _mm_packs_epi32(out_19_6, out_19_7); out[11] = _mm_packs_epi32(out_11_6, out_11_7); out[27] = _mm_packs_epi32(out_27_6, out_27_7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29], &out[3], &out[19], &out[11], &out[27]); if (overflow) { if (pass == 0) HIGH_FDCT32x32_2D_C(input, output_org, stride); else HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } #if FDCT32x32_HIGH_PRECISION } else { __m128i lstep1[64], lstep2[64], lstep3[64]; __m128i u[32], v[32], sign[16]; const __m128i K32One = _mm_set_epi32(1, 1, 1, 1); const __m128i k__pOne_mOne = pair_set_epi16(1, -1); // start using 32-bit operations // stage 3 { // expanding to 32-bit length while adding and subtracting lstep2[0] = _mm_unpacklo_epi16(step2[0], step2[7]); lstep2[1] = _mm_unpackhi_epi16(step2[0], step2[7]); lstep2[2] = _mm_unpacklo_epi16(step2[1], step2[6]); lstep2[3] = _mm_unpackhi_epi16(step2[1], step2[6]); lstep2[4] = _mm_unpacklo_epi16(step2[2], step2[5]); lstep2[5] = _mm_unpackhi_epi16(step2[2], step2[5]); lstep2[6] = _mm_unpacklo_epi16(step2[3], step2[4]); lstep2[7] = _mm_unpackhi_epi16(step2[3], step2[4]); lstep3[0] = _mm_madd_epi16(lstep2[0], kOne); lstep3[1] = _mm_madd_epi16(lstep2[1], kOne); lstep3[2] = _mm_madd_epi16(lstep2[2], kOne); lstep3[3] = _mm_madd_epi16(lstep2[3], kOne); lstep3[4] = _mm_madd_epi16(lstep2[4], kOne); lstep3[5] = _mm_madd_epi16(lstep2[5], kOne); lstep3[6] = _mm_madd_epi16(lstep2[6], kOne); lstep3[7] = _mm_madd_epi16(lstep2[7], kOne); lstep3[8] = _mm_madd_epi16(lstep2[6], k__pOne_mOne); lstep3[9] = _mm_madd_epi16(lstep2[7], k__pOne_mOne); lstep3[10] = _mm_madd_epi16(lstep2[4], k__pOne_mOne); lstep3[11] = _mm_madd_epi16(lstep2[5], k__pOne_mOne); lstep3[12] = _mm_madd_epi16(lstep2[2], k__pOne_mOne); lstep3[13] = _mm_madd_epi16(lstep2[3], k__pOne_mOne); lstep3[14] = _mm_madd_epi16(lstep2[0], k__pOne_mOne); lstep3[15] = _mm_madd_epi16(lstep2[1], k__pOne_mOne); } { const __m128i s3_10_0 = _mm_unpacklo_epi16(step2[13], step2[10]); const __m128i s3_10_1 = _mm_unpackhi_epi16(step2[13], step2[10]); const __m128i s3_11_0 = _mm_unpacklo_epi16(step2[12], step2[11]); const __m128i s3_11_1 = _mm_unpackhi_epi16(step2[12], step2[11]); const __m128i s3_10_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_m16); const __m128i s3_10_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_m16); const __m128i s3_11_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_m16); const __m128i s3_11_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_m16); const __m128i s3_12_2 = _mm_madd_epi16(s3_11_0, k__cospi_p16_p16); const __m128i s3_12_3 = _mm_madd_epi16(s3_11_1, k__cospi_p16_p16); const __m128i s3_13_2 = _mm_madd_epi16(s3_10_0, k__cospi_p16_p16); const __m128i s3_13_3 = _mm_madd_epi16(s3_10_1, k__cospi_p16_p16); // dct_const_round_shift const __m128i s3_10_4 = _mm_add_epi32(s3_10_2, k__DCT_CONST_ROUNDING); const __m128i s3_10_5 = _mm_add_epi32(s3_10_3, k__DCT_CONST_ROUNDING); const __m128i s3_11_4 = _mm_add_epi32(s3_11_2, k__DCT_CONST_ROUNDING); const __m128i s3_11_5 = _mm_add_epi32(s3_11_3, k__DCT_CONST_ROUNDING); const __m128i s3_12_4 = _mm_add_epi32(s3_12_2, k__DCT_CONST_ROUNDING); const __m128i s3_12_5 = _mm_add_epi32(s3_12_3, k__DCT_CONST_ROUNDING); const __m128i s3_13_4 = _mm_add_epi32(s3_13_2, k__DCT_CONST_ROUNDING); const __m128i s3_13_5 = _mm_add_epi32(s3_13_3, k__DCT_CONST_ROUNDING); lstep3[20] = _mm_srai_epi32(s3_10_4, DCT_CONST_BITS); lstep3[21] = _mm_srai_epi32(s3_10_5, DCT_CONST_BITS); lstep3[22] = _mm_srai_epi32(s3_11_4, DCT_CONST_BITS); lstep3[23] = _mm_srai_epi32(s3_11_5, DCT_CONST_BITS); lstep3[24] = _mm_srai_epi32(s3_12_4, DCT_CONST_BITS); lstep3[25] = _mm_srai_epi32(s3_12_5, DCT_CONST_BITS); lstep3[26] = _mm_srai_epi32(s3_13_4, DCT_CONST_BITS); lstep3[27] = _mm_srai_epi32(s3_13_5, DCT_CONST_BITS); } { lstep1[32] = _mm_unpacklo_epi16(step1[16], step2[23]); lstep1[33] = _mm_unpackhi_epi16(step1[16], step2[23]); lstep1[34] = _mm_unpacklo_epi16(step1[17], step2[22]); lstep1[35] = _mm_unpackhi_epi16(step1[17], step2[22]); lstep1[36] = _mm_unpacklo_epi16(step1[18], step2[21]); lstep1[37] = _mm_unpackhi_epi16(step1[18], step2[21]); lstep1[38] = _mm_unpacklo_epi16(step1[19], step2[20]); lstep1[39] = _mm_unpackhi_epi16(step1[19], step2[20]); lstep1[56] = _mm_unpacklo_epi16(step1[28], step2[27]); lstep1[57] = _mm_unpackhi_epi16(step1[28], step2[27]); lstep1[58] = _mm_unpacklo_epi16(step1[29], step2[26]); lstep1[59] = _mm_unpackhi_epi16(step1[29], step2[26]); lstep1[60] = _mm_unpacklo_epi16(step1[30], step2[25]); lstep1[61] = _mm_unpackhi_epi16(step1[30], step2[25]); lstep1[62] = _mm_unpacklo_epi16(step1[31], step2[24]); lstep1[63] = _mm_unpackhi_epi16(step1[31], step2[24]); lstep3[32] = _mm_madd_epi16(lstep1[32], kOne); lstep3[33] = _mm_madd_epi16(lstep1[33], kOne); lstep3[34] = _mm_madd_epi16(lstep1[34], kOne); lstep3[35] = _mm_madd_epi16(lstep1[35], kOne); lstep3[36] = _mm_madd_epi16(lstep1[36], kOne); lstep3[37] = _mm_madd_epi16(lstep1[37], kOne); lstep3[38] = _mm_madd_epi16(lstep1[38], kOne); lstep3[39] = _mm_madd_epi16(lstep1[39], kOne); lstep3[40] = _mm_madd_epi16(lstep1[38], k__pOne_mOne); lstep3[41] = _mm_madd_epi16(lstep1[39], k__pOne_mOne); lstep3[42] = _mm_madd_epi16(lstep1[36], k__pOne_mOne); lstep3[43] = _mm_madd_epi16(lstep1[37], k__pOne_mOne); lstep3[44] = _mm_madd_epi16(lstep1[34], k__pOne_mOne); lstep3[45] = _mm_madd_epi16(lstep1[35], k__pOne_mOne); lstep3[46] = _mm_madd_epi16(lstep1[32], k__pOne_mOne); lstep3[47] = _mm_madd_epi16(lstep1[33], k__pOne_mOne); lstep3[48] = _mm_madd_epi16(lstep1[62], k__pOne_mOne); lstep3[49] = _mm_madd_epi16(lstep1[63], k__pOne_mOne); lstep3[50] = _mm_madd_epi16(lstep1[60], k__pOne_mOne); lstep3[51] = _mm_madd_epi16(lstep1[61], k__pOne_mOne); lstep3[52] = _mm_madd_epi16(lstep1[58], k__pOne_mOne); lstep3[53] = _mm_madd_epi16(lstep1[59], k__pOne_mOne); lstep3[54] = _mm_madd_epi16(lstep1[56], k__pOne_mOne); lstep3[55] = _mm_madd_epi16(lstep1[57], k__pOne_mOne); lstep3[56] = _mm_madd_epi16(lstep1[56], kOne); lstep3[57] = _mm_madd_epi16(lstep1[57], kOne); lstep3[58] = _mm_madd_epi16(lstep1[58], kOne); lstep3[59] = _mm_madd_epi16(lstep1[59], kOne); lstep3[60] = _mm_madd_epi16(lstep1[60], kOne); lstep3[61] = _mm_madd_epi16(lstep1[61], kOne); lstep3[62] = _mm_madd_epi16(lstep1[62], kOne); lstep3[63] = _mm_madd_epi16(lstep1[63], kOne); } // stage 4 { // expanding to 32-bit length prior to addition operations sign[0] = _mm_cmpgt_epi16(kZero, step2[8]); sign[1] = _mm_cmpgt_epi16(kZero, step2[9]); sign[2] = _mm_cmpgt_epi16(kZero, step2[14]); sign[3] = _mm_cmpgt_epi16(kZero, step2[15]); lstep2[16] = _mm_unpacklo_epi16(step2[8], sign[0]); lstep2[17] = _mm_unpackhi_epi16(step2[8], sign[0]); lstep2[18] = _mm_unpacklo_epi16(step2[9], sign[1]); lstep2[19] = _mm_unpackhi_epi16(step2[9], sign[1]); lstep2[28] = _mm_unpacklo_epi16(step2[14], sign[2]); lstep2[29] = _mm_unpackhi_epi16(step2[14], sign[2]); lstep2[30] = _mm_unpacklo_epi16(step2[15], sign[3]); lstep2[31] = _mm_unpackhi_epi16(step2[15], sign[3]); lstep1[0] = _mm_add_epi32(lstep3[6], lstep3[0]); lstep1[1] = _mm_add_epi32(lstep3[7], lstep3[1]); lstep1[2] = _mm_add_epi32(lstep3[4], lstep3[2]); lstep1[3] = _mm_add_epi32(lstep3[5], lstep3[3]); lstep1[4] = _mm_sub_epi32(lstep3[2], lstep3[4]); lstep1[5] = _mm_sub_epi32(lstep3[3], lstep3[5]); lstep1[6] = _mm_sub_epi32(lstep3[0], lstep3[6]); lstep1[7] = _mm_sub_epi32(lstep3[1], lstep3[7]); lstep1[16] = _mm_add_epi32(lstep3[22], lstep2[16]); lstep1[17] = _mm_add_epi32(lstep3[23], lstep2[17]); lstep1[18] = _mm_add_epi32(lstep3[20], lstep2[18]); lstep1[19] = _mm_add_epi32(lstep3[21], lstep2[19]); lstep1[20] = _mm_sub_epi32(lstep2[18], lstep3[20]); lstep1[21] = _mm_sub_epi32(lstep2[19], lstep3[21]); lstep1[22] = _mm_sub_epi32(lstep2[16], lstep3[22]); lstep1[23] = _mm_sub_epi32(lstep2[17], lstep3[23]); lstep1[24] = _mm_sub_epi32(lstep2[30], lstep3[24]); lstep1[25] = _mm_sub_epi32(lstep2[31], lstep3[25]); lstep1[26] = _mm_sub_epi32(lstep2[28], lstep3[26]); lstep1[27] = _mm_sub_epi32(lstep2[29], lstep3[27]); lstep1[28] = _mm_add_epi32(lstep3[26], lstep2[28]); lstep1[29] = _mm_add_epi32(lstep3[27], lstep2[29]); lstep1[30] = _mm_add_epi32(lstep3[24], lstep2[30]); lstep1[31] = _mm_add_epi32(lstep3[25], lstep2[31]); } { // to be continued... // const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64); const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64); u[0] = _mm_unpacklo_epi32(lstep3[12], lstep3[10]); u[1] = _mm_unpackhi_epi32(lstep3[12], lstep3[10]); u[2] = _mm_unpacklo_epi32(lstep3[13], lstep3[11]); u[3] = _mm_unpackhi_epi32(lstep3[13], lstep3[11]); // TODO(jingning): manually inline k_madd_epi32_ to further hide // instruction latency. v[0] = k_madd_epi32(u[0], k32_p16_m16); v[1] = k_madd_epi32(u[1], k32_p16_m16); v[2] = k_madd_epi32(u[2], k32_p16_m16); v[3] = k_madd_epi32(u[3], k32_p16_m16); v[4] = k_madd_epi32(u[0], k32_p16_p16); v[5] = k_madd_epi32(u[1], k32_p16_p16); v[6] = k_madd_epi32(u[2], k32_p16_p16); v[7] = k_madd_epi32(u[3], k32_p16_p16); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_8(&v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); lstep1[10] = _mm_srai_epi32(v[0], DCT_CONST_BITS); lstep1[11] = _mm_srai_epi32(v[1], DCT_CONST_BITS); lstep1[12] = _mm_srai_epi32(v[2], DCT_CONST_BITS); lstep1[13] = _mm_srai_epi32(v[3], DCT_CONST_BITS); } { const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64); const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64); const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64); u[0] = _mm_unpacklo_epi32(lstep3[36], lstep3[58]); u[1] = _mm_unpackhi_epi32(lstep3[36], lstep3[58]); u[2] = _mm_unpacklo_epi32(lstep3[37], lstep3[59]); u[3] = _mm_unpackhi_epi32(lstep3[37], lstep3[59]); u[4] = _mm_unpacklo_epi32(lstep3[38], lstep3[56]); u[5] = _mm_unpackhi_epi32(lstep3[38], lstep3[56]); u[6] = _mm_unpacklo_epi32(lstep3[39], lstep3[57]); u[7] = _mm_unpackhi_epi32(lstep3[39], lstep3[57]); u[8] = _mm_unpacklo_epi32(lstep3[40], lstep3[54]); u[9] = _mm_unpackhi_epi32(lstep3[40], lstep3[54]); u[10] = _mm_unpacklo_epi32(lstep3[41], lstep3[55]); u[11] = _mm_unpackhi_epi32(lstep3[41], lstep3[55]); u[12] = _mm_unpacklo_epi32(lstep3[42], lstep3[52]); u[13] = _mm_unpackhi_epi32(lstep3[42], lstep3[52]); u[14] = _mm_unpacklo_epi32(lstep3[43], lstep3[53]); u[15] = _mm_unpackhi_epi32(lstep3[43], lstep3[53]); v[0] = k_madd_epi32(u[0], k32_m08_p24); v[1] = k_madd_epi32(u[1], k32_m08_p24); v[2] = k_madd_epi32(u[2], k32_m08_p24); v[3] = k_madd_epi32(u[3], k32_m08_p24); v[4] = k_madd_epi32(u[4], k32_m08_p24); v[5] = k_madd_epi32(u[5], k32_m08_p24); v[6] = k_madd_epi32(u[6], k32_m08_p24); v[7] = k_madd_epi32(u[7], k32_m08_p24); v[8] = k_madd_epi32(u[8], k32_m24_m08); v[9] = k_madd_epi32(u[9], k32_m24_m08); v[10] = k_madd_epi32(u[10], k32_m24_m08); v[11] = k_madd_epi32(u[11], k32_m24_m08); v[12] = k_madd_epi32(u[12], k32_m24_m08); v[13] = k_madd_epi32(u[13], k32_m24_m08); v[14] = k_madd_epi32(u[14], k32_m24_m08); v[15] = k_madd_epi32(u[15], k32_m24_m08); v[16] = k_madd_epi32(u[12], k32_m08_p24); v[17] = k_madd_epi32(u[13], k32_m08_p24); v[18] = k_madd_epi32(u[14], k32_m08_p24); v[19] = k_madd_epi32(u[15], k32_m08_p24); v[20] = k_madd_epi32(u[8], k32_m08_p24); v[21] = k_madd_epi32(u[9], k32_m08_p24); v[22] = k_madd_epi32(u[10], k32_m08_p24); v[23] = k_madd_epi32(u[11], k32_m08_p24); v[24] = k_madd_epi32(u[4], k32_p24_p08); v[25] = k_madd_epi32(u[5], k32_p24_p08); v[26] = k_madd_epi32(u[6], k32_p24_p08); v[27] = k_madd_epi32(u[7], k32_p24_p08); v[28] = k_madd_epi32(u[0], k32_p24_p08); v[29] = k_madd_epi32(u[1], k32_p24_p08); v[30] = k_madd_epi32(u[2], k32_p24_p08); v[31] = k_madd_epi32(u[3], k32_p24_p08); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[8] = k_packs_epi64(v[16], v[17]); u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); u[13] = k_packs_epi64(v[26], v[27]); u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); lstep1[36] = _mm_srai_epi32(v[0], DCT_CONST_BITS); lstep1[37] = _mm_srai_epi32(v[1], DCT_CONST_BITS); lstep1[38] = _mm_srai_epi32(v[2], DCT_CONST_BITS); lstep1[39] = _mm_srai_epi32(v[3], DCT_CONST_BITS); lstep1[40] = _mm_srai_epi32(v[4], DCT_CONST_BITS); lstep1[41] = _mm_srai_epi32(v[5], DCT_CONST_BITS); lstep1[42] = _mm_srai_epi32(v[6], DCT_CONST_BITS); lstep1[43] = _mm_srai_epi32(v[7], DCT_CONST_BITS); lstep1[52] = _mm_srai_epi32(v[8], DCT_CONST_BITS); lstep1[53] = _mm_srai_epi32(v[9], DCT_CONST_BITS); lstep1[54] = _mm_srai_epi32(v[10], DCT_CONST_BITS); lstep1[55] = _mm_srai_epi32(v[11], DCT_CONST_BITS); lstep1[56] = _mm_srai_epi32(v[12], DCT_CONST_BITS); lstep1[57] = _mm_srai_epi32(v[13], DCT_CONST_BITS); lstep1[58] = _mm_srai_epi32(v[14], DCT_CONST_BITS); lstep1[59] = _mm_srai_epi32(v[15], DCT_CONST_BITS); } // stage 5 { lstep2[8] = _mm_add_epi32(lstep1[10], lstep3[8]); lstep2[9] = _mm_add_epi32(lstep1[11], lstep3[9]); lstep2[10] = _mm_sub_epi32(lstep3[8], lstep1[10]); lstep2[11] = _mm_sub_epi32(lstep3[9], lstep1[11]); lstep2[12] = _mm_sub_epi32(lstep3[14], lstep1[12]); lstep2[13] = _mm_sub_epi32(lstep3[15], lstep1[13]); lstep2[14] = _mm_add_epi32(lstep1[12], lstep3[14]); lstep2[15] = _mm_add_epi32(lstep1[13], lstep3[15]); } { const __m128i k32_p16_p16 = pair_set_epi32(cospi_16_64, cospi_16_64); const __m128i k32_p16_m16 = pair_set_epi32(cospi_16_64, -cospi_16_64); const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64); const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64); u[0] = _mm_unpacklo_epi32(lstep1[0], lstep1[2]); u[1] = _mm_unpackhi_epi32(lstep1[0], lstep1[2]); u[2] = _mm_unpacklo_epi32(lstep1[1], lstep1[3]); u[3] = _mm_unpackhi_epi32(lstep1[1], lstep1[3]); u[4] = _mm_unpacklo_epi32(lstep1[4], lstep1[6]); u[5] = _mm_unpackhi_epi32(lstep1[4], lstep1[6]); u[6] = _mm_unpacklo_epi32(lstep1[5], lstep1[7]); u[7] = _mm_unpackhi_epi32(lstep1[5], lstep1[7]); // TODO(jingning): manually inline k_madd_epi32_ to further hide // instruction latency. v[0] = k_madd_epi32(u[0], k32_p16_p16); v[1] = k_madd_epi32(u[1], k32_p16_p16); v[2] = k_madd_epi32(u[2], k32_p16_p16); v[3] = k_madd_epi32(u[3], k32_p16_p16); v[4] = k_madd_epi32(u[0], k32_p16_m16); v[5] = k_madd_epi32(u[1], k32_p16_m16); v[6] = k_madd_epi32(u[2], k32_p16_m16); v[7] = k_madd_epi32(u[3], k32_p16_m16); v[8] = k_madd_epi32(u[4], k32_p24_p08); v[9] = k_madd_epi32(u[5], k32_p24_p08); v[10] = k_madd_epi32(u[6], k32_p24_p08); v[11] = k_madd_epi32(u[7], k32_p24_p08); v[12] = k_madd_epi32(u[4], k32_m08_p24); v[13] = k_madd_epi32(u[5], k32_m08_p24); v[14] = k_madd_epi32(u[6], k32_m08_p24); v[15] = k_madd_epi32(u[7], k32_m08_p24); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); sign[0] = _mm_cmplt_epi32(u[0], kZero); sign[1] = _mm_cmplt_epi32(u[1], kZero); sign[2] = _mm_cmplt_epi32(u[2], kZero); sign[3] = _mm_cmplt_epi32(u[3], kZero); sign[4] = _mm_cmplt_epi32(u[4], kZero); sign[5] = _mm_cmplt_epi32(u[5], kZero); sign[6] = _mm_cmplt_epi32(u[6], kZero); sign[7] = _mm_cmplt_epi32(u[7], kZero); u[0] = _mm_sub_epi32(u[0], sign[0]); u[1] = _mm_sub_epi32(u[1], sign[1]); u[2] = _mm_sub_epi32(u[2], sign[2]); u[3] = _mm_sub_epi32(u[3], sign[3]); u[4] = _mm_sub_epi32(u[4], sign[4]); u[5] = _mm_sub_epi32(u[5], sign[5]); u[6] = _mm_sub_epi32(u[6], sign[6]); u[7] = _mm_sub_epi32(u[7], sign[7]); u[0] = _mm_add_epi32(u[0], K32One); u[1] = _mm_add_epi32(u[1], K32One); u[2] = _mm_add_epi32(u[2], K32One); u[3] = _mm_add_epi32(u[3], K32One); u[4] = _mm_add_epi32(u[4], K32One); u[5] = _mm_add_epi32(u[5], K32One); u[6] = _mm_add_epi32(u[6], K32One); u[7] = _mm_add_epi32(u[7], K32One); u[0] = _mm_srai_epi32(u[0], 2); u[1] = _mm_srai_epi32(u[1], 2); u[2] = _mm_srai_epi32(u[2], 2); u[3] = _mm_srai_epi32(u[3], 2); u[4] = _mm_srai_epi32(u[4], 2); u[5] = _mm_srai_epi32(u[5], 2); u[6] = _mm_srai_epi32(u[6], 2); u[7] = _mm_srai_epi32(u[7], 2); // Combine out[0] = _mm_packs_epi32(u[0], u[1]); out[16] = _mm_packs_epi32(u[2], u[3]); out[8] = _mm_packs_epi32(u[4], u[5]); out[24] = _mm_packs_epi32(u[6], u[7]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&out[0], &out[16], &out[8], &out[24]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i k32_m08_p24 = pair_set_epi32(-cospi_8_64, cospi_24_64); const __m128i k32_m24_m08 = pair_set_epi32(-cospi_24_64, -cospi_8_64); const __m128i k32_p24_p08 = pair_set_epi32(cospi_24_64, cospi_8_64); u[0] = _mm_unpacklo_epi32(lstep1[18], lstep1[28]); u[1] = _mm_unpackhi_epi32(lstep1[18], lstep1[28]); u[2] = _mm_unpacklo_epi32(lstep1[19], lstep1[29]); u[3] = _mm_unpackhi_epi32(lstep1[19], lstep1[29]); u[4] = _mm_unpacklo_epi32(lstep1[20], lstep1[26]); u[5] = _mm_unpackhi_epi32(lstep1[20], lstep1[26]); u[6] = _mm_unpacklo_epi32(lstep1[21], lstep1[27]); u[7] = _mm_unpackhi_epi32(lstep1[21], lstep1[27]); v[0] = k_madd_epi32(u[0], k32_m08_p24); v[1] = k_madd_epi32(u[1], k32_m08_p24); v[2] = k_madd_epi32(u[2], k32_m08_p24); v[3] = k_madd_epi32(u[3], k32_m08_p24); v[4] = k_madd_epi32(u[4], k32_m24_m08); v[5] = k_madd_epi32(u[5], k32_m24_m08); v[6] = k_madd_epi32(u[6], k32_m24_m08); v[7] = k_madd_epi32(u[7], k32_m24_m08); v[8] = k_madd_epi32(u[4], k32_m08_p24); v[9] = k_madd_epi32(u[5], k32_m08_p24); v[10] = k_madd_epi32(u[6], k32_m08_p24); v[11] = k_madd_epi32(u[7], k32_m08_p24); v[12] = k_madd_epi32(u[0], k32_p24_p08); v[13] = k_madd_epi32(u[1], k32_p24_p08); v[14] = k_madd_epi32(u[2], k32_p24_p08); v[15] = k_madd_epi32(u[3], k32_p24_p08); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); u[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); u[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); u[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); u[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); u[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); u[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); u[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); lstep2[18] = _mm_srai_epi32(u[0], DCT_CONST_BITS); lstep2[19] = _mm_srai_epi32(u[1], DCT_CONST_BITS); lstep2[20] = _mm_srai_epi32(u[2], DCT_CONST_BITS); lstep2[21] = _mm_srai_epi32(u[3], DCT_CONST_BITS); lstep2[26] = _mm_srai_epi32(u[4], DCT_CONST_BITS); lstep2[27] = _mm_srai_epi32(u[5], DCT_CONST_BITS); lstep2[28] = _mm_srai_epi32(u[6], DCT_CONST_BITS); lstep2[29] = _mm_srai_epi32(u[7], DCT_CONST_BITS); } { lstep2[32] = _mm_add_epi32(lstep1[38], lstep3[32]); lstep2[33] = _mm_add_epi32(lstep1[39], lstep3[33]); lstep2[34] = _mm_add_epi32(lstep1[36], lstep3[34]); lstep2[35] = _mm_add_epi32(lstep1[37], lstep3[35]); lstep2[36] = _mm_sub_epi32(lstep3[34], lstep1[36]); lstep2[37] = _mm_sub_epi32(lstep3[35], lstep1[37]); lstep2[38] = _mm_sub_epi32(lstep3[32], lstep1[38]); lstep2[39] = _mm_sub_epi32(lstep3[33], lstep1[39]); lstep2[40] = _mm_sub_epi32(lstep3[46], lstep1[40]); lstep2[41] = _mm_sub_epi32(lstep3[47], lstep1[41]); lstep2[42] = _mm_sub_epi32(lstep3[44], lstep1[42]); lstep2[43] = _mm_sub_epi32(lstep3[45], lstep1[43]); lstep2[44] = _mm_add_epi32(lstep1[42], lstep3[44]); lstep2[45] = _mm_add_epi32(lstep1[43], lstep3[45]); lstep2[46] = _mm_add_epi32(lstep1[40], lstep3[46]); lstep2[47] = _mm_add_epi32(lstep1[41], lstep3[47]); lstep2[48] = _mm_add_epi32(lstep1[54], lstep3[48]); lstep2[49] = _mm_add_epi32(lstep1[55], lstep3[49]); lstep2[50] = _mm_add_epi32(lstep1[52], lstep3[50]); lstep2[51] = _mm_add_epi32(lstep1[53], lstep3[51]); lstep2[52] = _mm_sub_epi32(lstep3[50], lstep1[52]); lstep2[53] = _mm_sub_epi32(lstep3[51], lstep1[53]); lstep2[54] = _mm_sub_epi32(lstep3[48], lstep1[54]); lstep2[55] = _mm_sub_epi32(lstep3[49], lstep1[55]); lstep2[56] = _mm_sub_epi32(lstep3[62], lstep1[56]); lstep2[57] = _mm_sub_epi32(lstep3[63], lstep1[57]); lstep2[58] = _mm_sub_epi32(lstep3[60], lstep1[58]); lstep2[59] = _mm_sub_epi32(lstep3[61], lstep1[59]); lstep2[60] = _mm_add_epi32(lstep1[58], lstep3[60]); lstep2[61] = _mm_add_epi32(lstep1[59], lstep3[61]); lstep2[62] = _mm_add_epi32(lstep1[56], lstep3[62]); lstep2[63] = _mm_add_epi32(lstep1[57], lstep3[63]); } // stage 6 { const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64); const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64); const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64); const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64); u[0] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]); u[1] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]); u[2] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]); u[3] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]); u[4] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]); u[5] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]); u[6] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]); u[7] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]); u[8] = _mm_unpacklo_epi32(lstep2[10], lstep2[12]); u[9] = _mm_unpackhi_epi32(lstep2[10], lstep2[12]); u[10] = _mm_unpacklo_epi32(lstep2[11], lstep2[13]); u[11] = _mm_unpackhi_epi32(lstep2[11], lstep2[13]); u[12] = _mm_unpacklo_epi32(lstep2[8], lstep2[14]); u[13] = _mm_unpackhi_epi32(lstep2[8], lstep2[14]); u[14] = _mm_unpacklo_epi32(lstep2[9], lstep2[15]); u[15] = _mm_unpackhi_epi32(lstep2[9], lstep2[15]); v[0] = k_madd_epi32(u[0], k32_p28_p04); v[1] = k_madd_epi32(u[1], k32_p28_p04); v[2] = k_madd_epi32(u[2], k32_p28_p04); v[3] = k_madd_epi32(u[3], k32_p28_p04); v[4] = k_madd_epi32(u[4], k32_p12_p20); v[5] = k_madd_epi32(u[5], k32_p12_p20); v[6] = k_madd_epi32(u[6], k32_p12_p20); v[7] = k_madd_epi32(u[7], k32_p12_p20); v[8] = k_madd_epi32(u[8], k32_m20_p12); v[9] = k_madd_epi32(u[9], k32_m20_p12); v[10] = k_madd_epi32(u[10], k32_m20_p12); v[11] = k_madd_epi32(u[11], k32_m20_p12); v[12] = k_madd_epi32(u[12], k32_m04_p28); v[13] = k_madd_epi32(u[13], k32_m04_p28); v[14] = k_madd_epi32(u[14], k32_m04_p28); v[15] = k_madd_epi32(u[15], k32_m04_p28); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_16( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); sign[0] = _mm_cmplt_epi32(u[0], kZero); sign[1] = _mm_cmplt_epi32(u[1], kZero); sign[2] = _mm_cmplt_epi32(u[2], kZero); sign[3] = _mm_cmplt_epi32(u[3], kZero); sign[4] = _mm_cmplt_epi32(u[4], kZero); sign[5] = _mm_cmplt_epi32(u[5], kZero); sign[6] = _mm_cmplt_epi32(u[6], kZero); sign[7] = _mm_cmplt_epi32(u[7], kZero); u[0] = _mm_sub_epi32(u[0], sign[0]); u[1] = _mm_sub_epi32(u[1], sign[1]); u[2] = _mm_sub_epi32(u[2], sign[2]); u[3] = _mm_sub_epi32(u[3], sign[3]); u[4] = _mm_sub_epi32(u[4], sign[4]); u[5] = _mm_sub_epi32(u[5], sign[5]); u[6] = _mm_sub_epi32(u[6], sign[6]); u[7] = _mm_sub_epi32(u[7], sign[7]); u[0] = _mm_add_epi32(u[0], K32One); u[1] = _mm_add_epi32(u[1], K32One); u[2] = _mm_add_epi32(u[2], K32One); u[3] = _mm_add_epi32(u[3], K32One); u[4] = _mm_add_epi32(u[4], K32One); u[5] = _mm_add_epi32(u[5], K32One); u[6] = _mm_add_epi32(u[6], K32One); u[7] = _mm_add_epi32(u[7], K32One); u[0] = _mm_srai_epi32(u[0], 2); u[1] = _mm_srai_epi32(u[1], 2); u[2] = _mm_srai_epi32(u[2], 2); u[3] = _mm_srai_epi32(u[3], 2); u[4] = _mm_srai_epi32(u[4], 2); u[5] = _mm_srai_epi32(u[5], 2); u[6] = _mm_srai_epi32(u[6], 2); u[7] = _mm_srai_epi32(u[7], 2); out[4] = _mm_packs_epi32(u[0], u[1]); out[20] = _mm_packs_epi32(u[2], u[3]); out[12] = _mm_packs_epi32(u[4], u[5]); out[28] = _mm_packs_epi32(u[6], u[7]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&out[4], &out[20], &out[12], &out[28]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { lstep3[16] = _mm_add_epi32(lstep2[18], lstep1[16]); lstep3[17] = _mm_add_epi32(lstep2[19], lstep1[17]); lstep3[18] = _mm_sub_epi32(lstep1[16], lstep2[18]); lstep3[19] = _mm_sub_epi32(lstep1[17], lstep2[19]); lstep3[20] = _mm_sub_epi32(lstep1[22], lstep2[20]); lstep3[21] = _mm_sub_epi32(lstep1[23], lstep2[21]); lstep3[22] = _mm_add_epi32(lstep2[20], lstep1[22]); lstep3[23] = _mm_add_epi32(lstep2[21], lstep1[23]); lstep3[24] = _mm_add_epi32(lstep2[26], lstep1[24]); lstep3[25] = _mm_add_epi32(lstep2[27], lstep1[25]); lstep3[26] = _mm_sub_epi32(lstep1[24], lstep2[26]); lstep3[27] = _mm_sub_epi32(lstep1[25], lstep2[27]); lstep3[28] = _mm_sub_epi32(lstep1[30], lstep2[28]); lstep3[29] = _mm_sub_epi32(lstep1[31], lstep2[29]); lstep3[30] = _mm_add_epi32(lstep2[28], lstep1[30]); lstep3[31] = _mm_add_epi32(lstep2[29], lstep1[31]); } { const __m128i k32_m04_p28 = pair_set_epi32(-cospi_4_64, cospi_28_64); const __m128i k32_m28_m04 = pair_set_epi32(-cospi_28_64, -cospi_4_64); const __m128i k32_m20_p12 = pair_set_epi32(-cospi_20_64, cospi_12_64); const __m128i k32_m12_m20 = pair_set_epi32(-cospi_12_64, -cospi_20_64); const __m128i k32_p12_p20 = pair_set_epi32(cospi_12_64, cospi_20_64); const __m128i k32_p28_p04 = pair_set_epi32(cospi_28_64, cospi_4_64); u[0] = _mm_unpacklo_epi32(lstep2[34], lstep2[60]); u[1] = _mm_unpackhi_epi32(lstep2[34], lstep2[60]); u[2] = _mm_unpacklo_epi32(lstep2[35], lstep2[61]); u[3] = _mm_unpackhi_epi32(lstep2[35], lstep2[61]); u[4] = _mm_unpacklo_epi32(lstep2[36], lstep2[58]); u[5] = _mm_unpackhi_epi32(lstep2[36], lstep2[58]); u[6] = _mm_unpacklo_epi32(lstep2[37], lstep2[59]); u[7] = _mm_unpackhi_epi32(lstep2[37], lstep2[59]); u[8] = _mm_unpacklo_epi32(lstep2[42], lstep2[52]); u[9] = _mm_unpackhi_epi32(lstep2[42], lstep2[52]); u[10] = _mm_unpacklo_epi32(lstep2[43], lstep2[53]); u[11] = _mm_unpackhi_epi32(lstep2[43], lstep2[53]); u[12] = _mm_unpacklo_epi32(lstep2[44], lstep2[50]); u[13] = _mm_unpackhi_epi32(lstep2[44], lstep2[50]); u[14] = _mm_unpacklo_epi32(lstep2[45], lstep2[51]); u[15] = _mm_unpackhi_epi32(lstep2[45], lstep2[51]); v[0] = k_madd_epi32(u[0], k32_m04_p28); v[1] = k_madd_epi32(u[1], k32_m04_p28); v[2] = k_madd_epi32(u[2], k32_m04_p28); v[3] = k_madd_epi32(u[3], k32_m04_p28); v[4] = k_madd_epi32(u[4], k32_m28_m04); v[5] = k_madd_epi32(u[5], k32_m28_m04); v[6] = k_madd_epi32(u[6], k32_m28_m04); v[7] = k_madd_epi32(u[7], k32_m28_m04); v[8] = k_madd_epi32(u[8], k32_m20_p12); v[9] = k_madd_epi32(u[9], k32_m20_p12); v[10] = k_madd_epi32(u[10], k32_m20_p12); v[11] = k_madd_epi32(u[11], k32_m20_p12); v[12] = k_madd_epi32(u[12], k32_m12_m20); v[13] = k_madd_epi32(u[13], k32_m12_m20); v[14] = k_madd_epi32(u[14], k32_m12_m20); v[15] = k_madd_epi32(u[15], k32_m12_m20); v[16] = k_madd_epi32(u[12], k32_m20_p12); v[17] = k_madd_epi32(u[13], k32_m20_p12); v[18] = k_madd_epi32(u[14], k32_m20_p12); v[19] = k_madd_epi32(u[15], k32_m20_p12); v[20] = k_madd_epi32(u[8], k32_p12_p20); v[21] = k_madd_epi32(u[9], k32_p12_p20); v[22] = k_madd_epi32(u[10], k32_p12_p20); v[23] = k_madd_epi32(u[11], k32_p12_p20); v[24] = k_madd_epi32(u[4], k32_m04_p28); v[25] = k_madd_epi32(u[5], k32_m04_p28); v[26] = k_madd_epi32(u[6], k32_m04_p28); v[27] = k_madd_epi32(u[7], k32_m04_p28); v[28] = k_madd_epi32(u[0], k32_p28_p04); v[29] = k_madd_epi32(u[1], k32_p28_p04); v[30] = k_madd_epi32(u[2], k32_p28_p04); v[31] = k_madd_epi32(u[3], k32_p28_p04); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[8] = k_packs_epi64(v[16], v[17]); u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); u[13] = k_packs_epi64(v[26], v[27]); u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); lstep3[34] = _mm_srai_epi32(v[0], DCT_CONST_BITS); lstep3[35] = _mm_srai_epi32(v[1], DCT_CONST_BITS); lstep3[36] = _mm_srai_epi32(v[2], DCT_CONST_BITS); lstep3[37] = _mm_srai_epi32(v[3], DCT_CONST_BITS); lstep3[42] = _mm_srai_epi32(v[4], DCT_CONST_BITS); lstep3[43] = _mm_srai_epi32(v[5], DCT_CONST_BITS); lstep3[44] = _mm_srai_epi32(v[6], DCT_CONST_BITS); lstep3[45] = _mm_srai_epi32(v[7], DCT_CONST_BITS); lstep3[50] = _mm_srai_epi32(v[8], DCT_CONST_BITS); lstep3[51] = _mm_srai_epi32(v[9], DCT_CONST_BITS); lstep3[52] = _mm_srai_epi32(v[10], DCT_CONST_BITS); lstep3[53] = _mm_srai_epi32(v[11], DCT_CONST_BITS); lstep3[58] = _mm_srai_epi32(v[12], DCT_CONST_BITS); lstep3[59] = _mm_srai_epi32(v[13], DCT_CONST_BITS); lstep3[60] = _mm_srai_epi32(v[14], DCT_CONST_BITS); lstep3[61] = _mm_srai_epi32(v[15], DCT_CONST_BITS); } // stage 7 { const __m128i k32_p30_p02 = pair_set_epi32(cospi_30_64, cospi_2_64); const __m128i k32_p14_p18 = pair_set_epi32(cospi_14_64, cospi_18_64); const __m128i k32_p22_p10 = pair_set_epi32(cospi_22_64, cospi_10_64); const __m128i k32_p06_p26 = pair_set_epi32(cospi_6_64, cospi_26_64); const __m128i k32_m26_p06 = pair_set_epi32(-cospi_26_64, cospi_6_64); const __m128i k32_m10_p22 = pair_set_epi32(-cospi_10_64, cospi_22_64); const __m128i k32_m18_p14 = pair_set_epi32(-cospi_18_64, cospi_14_64); const __m128i k32_m02_p30 = pair_set_epi32(-cospi_2_64, cospi_30_64); u[0] = _mm_unpacklo_epi32(lstep3[16], lstep3[30]); u[1] = _mm_unpackhi_epi32(lstep3[16], lstep3[30]); u[2] = _mm_unpacklo_epi32(lstep3[17], lstep3[31]); u[3] = _mm_unpackhi_epi32(lstep3[17], lstep3[31]); u[4] = _mm_unpacklo_epi32(lstep3[18], lstep3[28]); u[5] = _mm_unpackhi_epi32(lstep3[18], lstep3[28]); u[6] = _mm_unpacklo_epi32(lstep3[19], lstep3[29]); u[7] = _mm_unpackhi_epi32(lstep3[19], lstep3[29]); u[8] = _mm_unpacklo_epi32(lstep3[20], lstep3[26]); u[9] = _mm_unpackhi_epi32(lstep3[20], lstep3[26]); u[10] = _mm_unpacklo_epi32(lstep3[21], lstep3[27]); u[11] = _mm_unpackhi_epi32(lstep3[21], lstep3[27]); u[12] = _mm_unpacklo_epi32(lstep3[22], lstep3[24]); u[13] = _mm_unpackhi_epi32(lstep3[22], lstep3[24]); u[14] = _mm_unpacklo_epi32(lstep3[23], lstep3[25]); u[15] = _mm_unpackhi_epi32(lstep3[23], lstep3[25]); v[0] = k_madd_epi32(u[0], k32_p30_p02); v[1] = k_madd_epi32(u[1], k32_p30_p02); v[2] = k_madd_epi32(u[2], k32_p30_p02); v[3] = k_madd_epi32(u[3], k32_p30_p02); v[4] = k_madd_epi32(u[4], k32_p14_p18); v[5] = k_madd_epi32(u[5], k32_p14_p18); v[6] = k_madd_epi32(u[6], k32_p14_p18); v[7] = k_madd_epi32(u[7], k32_p14_p18); v[8] = k_madd_epi32(u[8], k32_p22_p10); v[9] = k_madd_epi32(u[9], k32_p22_p10); v[10] = k_madd_epi32(u[10], k32_p22_p10); v[11] = k_madd_epi32(u[11], k32_p22_p10); v[12] = k_madd_epi32(u[12], k32_p06_p26); v[13] = k_madd_epi32(u[13], k32_p06_p26); v[14] = k_madd_epi32(u[14], k32_p06_p26); v[15] = k_madd_epi32(u[15], k32_p06_p26); v[16] = k_madd_epi32(u[12], k32_m26_p06); v[17] = k_madd_epi32(u[13], k32_m26_p06); v[18] = k_madd_epi32(u[14], k32_m26_p06); v[19] = k_madd_epi32(u[15], k32_m26_p06); v[20] = k_madd_epi32(u[8], k32_m10_p22); v[21] = k_madd_epi32(u[9], k32_m10_p22); v[22] = k_madd_epi32(u[10], k32_m10_p22); v[23] = k_madd_epi32(u[11], k32_m10_p22); v[24] = k_madd_epi32(u[4], k32_m18_p14); v[25] = k_madd_epi32(u[5], k32_m18_p14); v[26] = k_madd_epi32(u[6], k32_m18_p14); v[27] = k_madd_epi32(u[7], k32_m18_p14); v[28] = k_madd_epi32(u[0], k32_m02_p30); v[29] = k_madd_epi32(u[1], k32_m02_p30); v[30] = k_madd_epi32(u[2], k32_m02_p30); v[31] = k_madd_epi32(u[3], k32_m02_p30); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[8] = k_packs_epi64(v[16], v[17]); u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); u[13] = k_packs_epi64(v[26], v[27]); u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm_cmplt_epi32(u[0], kZero); v[1] = _mm_cmplt_epi32(u[1], kZero); v[2] = _mm_cmplt_epi32(u[2], kZero); v[3] = _mm_cmplt_epi32(u[3], kZero); v[4] = _mm_cmplt_epi32(u[4], kZero); v[5] = _mm_cmplt_epi32(u[5], kZero); v[6] = _mm_cmplt_epi32(u[6], kZero); v[7] = _mm_cmplt_epi32(u[7], kZero); v[8] = _mm_cmplt_epi32(u[8], kZero); v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); v[13] = _mm_cmplt_epi32(u[13], kZero); v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); u[0] = _mm_sub_epi32(u[0], v[0]); u[1] = _mm_sub_epi32(u[1], v[1]); u[2] = _mm_sub_epi32(u[2], v[2]); u[3] = _mm_sub_epi32(u[3], v[3]); u[4] = _mm_sub_epi32(u[4], v[4]); u[5] = _mm_sub_epi32(u[5], v[5]); u[6] = _mm_sub_epi32(u[6], v[6]); u[7] = _mm_sub_epi32(u[7], v[7]); u[8] = _mm_sub_epi32(u[8], v[8]); u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); u[13] = _mm_sub_epi32(u[13], v[13]); u[14] = _mm_sub_epi32(u[14], v[14]); u[15] = _mm_sub_epi32(u[15], v[15]); v[0] = _mm_add_epi32(u[0], K32One); v[1] = _mm_add_epi32(u[1], K32One); v[2] = _mm_add_epi32(u[2], K32One); v[3] = _mm_add_epi32(u[3], K32One); v[4] = _mm_add_epi32(u[4], K32One); v[5] = _mm_add_epi32(u[5], K32One); v[6] = _mm_add_epi32(u[6], K32One); v[7] = _mm_add_epi32(u[7], K32One); v[8] = _mm_add_epi32(u[8], K32One); v[9] = _mm_add_epi32(u[9], K32One); v[10] = _mm_add_epi32(u[10], K32One); v[11] = _mm_add_epi32(u[11], K32One); v[12] = _mm_add_epi32(u[12], K32One); v[13] = _mm_add_epi32(u[13], K32One); v[14] = _mm_add_epi32(u[14], K32One); v[15] = _mm_add_epi32(u[15], K32One); u[0] = _mm_srai_epi32(v[0], 2); u[1] = _mm_srai_epi32(v[1], 2); u[2] = _mm_srai_epi32(v[2], 2); u[3] = _mm_srai_epi32(v[3], 2); u[4] = _mm_srai_epi32(v[4], 2); u[5] = _mm_srai_epi32(v[5], 2); u[6] = _mm_srai_epi32(v[6], 2); u[7] = _mm_srai_epi32(v[7], 2); u[8] = _mm_srai_epi32(v[8], 2); u[9] = _mm_srai_epi32(v[9], 2); u[10] = _mm_srai_epi32(v[10], 2); u[11] = _mm_srai_epi32(v[11], 2); u[12] = _mm_srai_epi32(v[12], 2); u[13] = _mm_srai_epi32(v[13], 2); u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); out[2] = _mm_packs_epi32(u[0], u[1]); out[18] = _mm_packs_epi32(u[2], u[3]); out[10] = _mm_packs_epi32(u[4], u[5]); out[26] = _mm_packs_epi32(u[6], u[7]); out[6] = _mm_packs_epi32(u[8], u[9]); out[22] = _mm_packs_epi32(u[10], u[11]); out[14] = _mm_packs_epi32(u[12], u[13]); out[30] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[2], &out[18], &out[10], &out[26], &out[6], &out[22], &out[14], &out[30]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { lstep1[32] = _mm_add_epi32(lstep3[34], lstep2[32]); lstep1[33] = _mm_add_epi32(lstep3[35], lstep2[33]); lstep1[34] = _mm_sub_epi32(lstep2[32], lstep3[34]); lstep1[35] = _mm_sub_epi32(lstep2[33], lstep3[35]); lstep1[36] = _mm_sub_epi32(lstep2[38], lstep3[36]); lstep1[37] = _mm_sub_epi32(lstep2[39], lstep3[37]); lstep1[38] = _mm_add_epi32(lstep3[36], lstep2[38]); lstep1[39] = _mm_add_epi32(lstep3[37], lstep2[39]); lstep1[40] = _mm_add_epi32(lstep3[42], lstep2[40]); lstep1[41] = _mm_add_epi32(lstep3[43], lstep2[41]); lstep1[42] = _mm_sub_epi32(lstep2[40], lstep3[42]); lstep1[43] = _mm_sub_epi32(lstep2[41], lstep3[43]); lstep1[44] = _mm_sub_epi32(lstep2[46], lstep3[44]); lstep1[45] = _mm_sub_epi32(lstep2[47], lstep3[45]); lstep1[46] = _mm_add_epi32(lstep3[44], lstep2[46]); lstep1[47] = _mm_add_epi32(lstep3[45], lstep2[47]); lstep1[48] = _mm_add_epi32(lstep3[50], lstep2[48]); lstep1[49] = _mm_add_epi32(lstep3[51], lstep2[49]); lstep1[50] = _mm_sub_epi32(lstep2[48], lstep3[50]); lstep1[51] = _mm_sub_epi32(lstep2[49], lstep3[51]); lstep1[52] = _mm_sub_epi32(lstep2[54], lstep3[52]); lstep1[53] = _mm_sub_epi32(lstep2[55], lstep3[53]); lstep1[54] = _mm_add_epi32(lstep3[52], lstep2[54]); lstep1[55] = _mm_add_epi32(lstep3[53], lstep2[55]); lstep1[56] = _mm_add_epi32(lstep3[58], lstep2[56]); lstep1[57] = _mm_add_epi32(lstep3[59], lstep2[57]); lstep1[58] = _mm_sub_epi32(lstep2[56], lstep3[58]); lstep1[59] = _mm_sub_epi32(lstep2[57], lstep3[59]); lstep1[60] = _mm_sub_epi32(lstep2[62], lstep3[60]); lstep1[61] = _mm_sub_epi32(lstep2[63], lstep3[61]); lstep1[62] = _mm_add_epi32(lstep3[60], lstep2[62]); lstep1[63] = _mm_add_epi32(lstep3[61], lstep2[63]); } // stage 8 { const __m128i k32_p31_p01 = pair_set_epi32(cospi_31_64, cospi_1_64); const __m128i k32_p15_p17 = pair_set_epi32(cospi_15_64, cospi_17_64); const __m128i k32_p23_p09 = pair_set_epi32(cospi_23_64, cospi_9_64); const __m128i k32_p07_p25 = pair_set_epi32(cospi_7_64, cospi_25_64); const __m128i k32_m25_p07 = pair_set_epi32(-cospi_25_64, cospi_7_64); const __m128i k32_m09_p23 = pair_set_epi32(-cospi_9_64, cospi_23_64); const __m128i k32_m17_p15 = pair_set_epi32(-cospi_17_64, cospi_15_64); const __m128i k32_m01_p31 = pair_set_epi32(-cospi_1_64, cospi_31_64); u[0] = _mm_unpacklo_epi32(lstep1[32], lstep1[62]); u[1] = _mm_unpackhi_epi32(lstep1[32], lstep1[62]); u[2] = _mm_unpacklo_epi32(lstep1[33], lstep1[63]); u[3] = _mm_unpackhi_epi32(lstep1[33], lstep1[63]); u[4] = _mm_unpacklo_epi32(lstep1[34], lstep1[60]); u[5] = _mm_unpackhi_epi32(lstep1[34], lstep1[60]); u[6] = _mm_unpacklo_epi32(lstep1[35], lstep1[61]); u[7] = _mm_unpackhi_epi32(lstep1[35], lstep1[61]); u[8] = _mm_unpacklo_epi32(lstep1[36], lstep1[58]); u[9] = _mm_unpackhi_epi32(lstep1[36], lstep1[58]); u[10] = _mm_unpacklo_epi32(lstep1[37], lstep1[59]); u[11] = _mm_unpackhi_epi32(lstep1[37], lstep1[59]); u[12] = _mm_unpacklo_epi32(lstep1[38], lstep1[56]); u[13] = _mm_unpackhi_epi32(lstep1[38], lstep1[56]); u[14] = _mm_unpacklo_epi32(lstep1[39], lstep1[57]); u[15] = _mm_unpackhi_epi32(lstep1[39], lstep1[57]); v[0] = k_madd_epi32(u[0], k32_p31_p01); v[1] = k_madd_epi32(u[1], k32_p31_p01); v[2] = k_madd_epi32(u[2], k32_p31_p01); v[3] = k_madd_epi32(u[3], k32_p31_p01); v[4] = k_madd_epi32(u[4], k32_p15_p17); v[5] = k_madd_epi32(u[5], k32_p15_p17); v[6] = k_madd_epi32(u[6], k32_p15_p17); v[7] = k_madd_epi32(u[7], k32_p15_p17); v[8] = k_madd_epi32(u[8], k32_p23_p09); v[9] = k_madd_epi32(u[9], k32_p23_p09); v[10] = k_madd_epi32(u[10], k32_p23_p09); v[11] = k_madd_epi32(u[11], k32_p23_p09); v[12] = k_madd_epi32(u[12], k32_p07_p25); v[13] = k_madd_epi32(u[13], k32_p07_p25); v[14] = k_madd_epi32(u[14], k32_p07_p25); v[15] = k_madd_epi32(u[15], k32_p07_p25); v[16] = k_madd_epi32(u[12], k32_m25_p07); v[17] = k_madd_epi32(u[13], k32_m25_p07); v[18] = k_madd_epi32(u[14], k32_m25_p07); v[19] = k_madd_epi32(u[15], k32_m25_p07); v[20] = k_madd_epi32(u[8], k32_m09_p23); v[21] = k_madd_epi32(u[9], k32_m09_p23); v[22] = k_madd_epi32(u[10], k32_m09_p23); v[23] = k_madd_epi32(u[11], k32_m09_p23); v[24] = k_madd_epi32(u[4], k32_m17_p15); v[25] = k_madd_epi32(u[5], k32_m17_p15); v[26] = k_madd_epi32(u[6], k32_m17_p15); v[27] = k_madd_epi32(u[7], k32_m17_p15); v[28] = k_madd_epi32(u[0], k32_m01_p31); v[29] = k_madd_epi32(u[1], k32_m01_p31); v[30] = k_madd_epi32(u[2], k32_m01_p31); v[31] = k_madd_epi32(u[3], k32_m01_p31); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[8] = k_packs_epi64(v[16], v[17]); u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); u[13] = k_packs_epi64(v[26], v[27]); u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm_cmplt_epi32(u[0], kZero); v[1] = _mm_cmplt_epi32(u[1], kZero); v[2] = _mm_cmplt_epi32(u[2], kZero); v[3] = _mm_cmplt_epi32(u[3], kZero); v[4] = _mm_cmplt_epi32(u[4], kZero); v[5] = _mm_cmplt_epi32(u[5], kZero); v[6] = _mm_cmplt_epi32(u[6], kZero); v[7] = _mm_cmplt_epi32(u[7], kZero); v[8] = _mm_cmplt_epi32(u[8], kZero); v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); v[13] = _mm_cmplt_epi32(u[13], kZero); v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); u[0] = _mm_sub_epi32(u[0], v[0]); u[1] = _mm_sub_epi32(u[1], v[1]); u[2] = _mm_sub_epi32(u[2], v[2]); u[3] = _mm_sub_epi32(u[3], v[3]); u[4] = _mm_sub_epi32(u[4], v[4]); u[5] = _mm_sub_epi32(u[5], v[5]); u[6] = _mm_sub_epi32(u[6], v[6]); u[7] = _mm_sub_epi32(u[7], v[7]); u[8] = _mm_sub_epi32(u[8], v[8]); u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); u[13] = _mm_sub_epi32(u[13], v[13]); u[14] = _mm_sub_epi32(u[14], v[14]); u[15] = _mm_sub_epi32(u[15], v[15]); v[0] = _mm_add_epi32(u[0], K32One); v[1] = _mm_add_epi32(u[1], K32One); v[2] = _mm_add_epi32(u[2], K32One); v[3] = _mm_add_epi32(u[3], K32One); v[4] = _mm_add_epi32(u[4], K32One); v[5] = _mm_add_epi32(u[5], K32One); v[6] = _mm_add_epi32(u[6], K32One); v[7] = _mm_add_epi32(u[7], K32One); v[8] = _mm_add_epi32(u[8], K32One); v[9] = _mm_add_epi32(u[9], K32One); v[10] = _mm_add_epi32(u[10], K32One); v[11] = _mm_add_epi32(u[11], K32One); v[12] = _mm_add_epi32(u[12], K32One); v[13] = _mm_add_epi32(u[13], K32One); v[14] = _mm_add_epi32(u[14], K32One); v[15] = _mm_add_epi32(u[15], K32One); u[0] = _mm_srai_epi32(v[0], 2); u[1] = _mm_srai_epi32(v[1], 2); u[2] = _mm_srai_epi32(v[2], 2); u[3] = _mm_srai_epi32(v[3], 2); u[4] = _mm_srai_epi32(v[4], 2); u[5] = _mm_srai_epi32(v[5], 2); u[6] = _mm_srai_epi32(v[6], 2); u[7] = _mm_srai_epi32(v[7], 2); u[8] = _mm_srai_epi32(v[8], 2); u[9] = _mm_srai_epi32(v[9], 2); u[10] = _mm_srai_epi32(v[10], 2); u[11] = _mm_srai_epi32(v[11], 2); u[12] = _mm_srai_epi32(v[12], 2); u[13] = _mm_srai_epi32(v[13], 2); u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); out[1] = _mm_packs_epi32(u[0], u[1]); out[17] = _mm_packs_epi32(u[2], u[3]); out[9] = _mm_packs_epi32(u[4], u[5]); out[25] = _mm_packs_epi32(u[6], u[7]); out[7] = _mm_packs_epi32(u[8], u[9]); out[23] = _mm_packs_epi32(u[10], u[11]); out[15] = _mm_packs_epi32(u[12], u[13]); out[31] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[1], &out[17], &out[9], &out[25], &out[7], &out[23], &out[15], &out[31]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i k32_p27_p05 = pair_set_epi32(cospi_27_64, cospi_5_64); const __m128i k32_p11_p21 = pair_set_epi32(cospi_11_64, cospi_21_64); const __m128i k32_p19_p13 = pair_set_epi32(cospi_19_64, cospi_13_64); const __m128i k32_p03_p29 = pair_set_epi32(cospi_3_64, cospi_29_64); const __m128i k32_m29_p03 = pair_set_epi32(-cospi_29_64, cospi_3_64); const __m128i k32_m13_p19 = pair_set_epi32(-cospi_13_64, cospi_19_64); const __m128i k32_m21_p11 = pair_set_epi32(-cospi_21_64, cospi_11_64); const __m128i k32_m05_p27 = pair_set_epi32(-cospi_5_64, cospi_27_64); u[0] = _mm_unpacklo_epi32(lstep1[40], lstep1[54]); u[1] = _mm_unpackhi_epi32(lstep1[40], lstep1[54]); u[2] = _mm_unpacklo_epi32(lstep1[41], lstep1[55]); u[3] = _mm_unpackhi_epi32(lstep1[41], lstep1[55]); u[4] = _mm_unpacklo_epi32(lstep1[42], lstep1[52]); u[5] = _mm_unpackhi_epi32(lstep1[42], lstep1[52]); u[6] = _mm_unpacklo_epi32(lstep1[43], lstep1[53]); u[7] = _mm_unpackhi_epi32(lstep1[43], lstep1[53]); u[8] = _mm_unpacklo_epi32(lstep1[44], lstep1[50]); u[9] = _mm_unpackhi_epi32(lstep1[44], lstep1[50]); u[10] = _mm_unpacklo_epi32(lstep1[45], lstep1[51]); u[11] = _mm_unpackhi_epi32(lstep1[45], lstep1[51]); u[12] = _mm_unpacklo_epi32(lstep1[46], lstep1[48]); u[13] = _mm_unpackhi_epi32(lstep1[46], lstep1[48]); u[14] = _mm_unpacklo_epi32(lstep1[47], lstep1[49]); u[15] = _mm_unpackhi_epi32(lstep1[47], lstep1[49]); v[0] = k_madd_epi32(u[0], k32_p27_p05); v[1] = k_madd_epi32(u[1], k32_p27_p05); v[2] = k_madd_epi32(u[2], k32_p27_p05); v[3] = k_madd_epi32(u[3], k32_p27_p05); v[4] = k_madd_epi32(u[4], k32_p11_p21); v[5] = k_madd_epi32(u[5], k32_p11_p21); v[6] = k_madd_epi32(u[6], k32_p11_p21); v[7] = k_madd_epi32(u[7], k32_p11_p21); v[8] = k_madd_epi32(u[8], k32_p19_p13); v[9] = k_madd_epi32(u[9], k32_p19_p13); v[10] = k_madd_epi32(u[10], k32_p19_p13); v[11] = k_madd_epi32(u[11], k32_p19_p13); v[12] = k_madd_epi32(u[12], k32_p03_p29); v[13] = k_madd_epi32(u[13], k32_p03_p29); v[14] = k_madd_epi32(u[14], k32_p03_p29); v[15] = k_madd_epi32(u[15], k32_p03_p29); v[16] = k_madd_epi32(u[12], k32_m29_p03); v[17] = k_madd_epi32(u[13], k32_m29_p03); v[18] = k_madd_epi32(u[14], k32_m29_p03); v[19] = k_madd_epi32(u[15], k32_m29_p03); v[20] = k_madd_epi32(u[8], k32_m13_p19); v[21] = k_madd_epi32(u[9], k32_m13_p19); v[22] = k_madd_epi32(u[10], k32_m13_p19); v[23] = k_madd_epi32(u[11], k32_m13_p19); v[24] = k_madd_epi32(u[4], k32_m21_p11); v[25] = k_madd_epi32(u[5], k32_m21_p11); v[26] = k_madd_epi32(u[6], k32_m21_p11); v[27] = k_madd_epi32(u[7], k32_m21_p11); v[28] = k_madd_epi32(u[0], k32_m05_p27); v[29] = k_madd_epi32(u[1], k32_m05_p27); v[30] = k_madd_epi32(u[2], k32_m05_p27); v[31] = k_madd_epi32(u[3], k32_m05_p27); #if DCT_HIGH_BIT_DEPTH overflow = k_check_epi32_overflow_32( &v[0], &v[1], &v[2], &v[3], &v[4], &v[5], &v[6], &v[7], &v[8], &v[9], &v[10], &v[11], &v[12], &v[13], &v[14], &v[15], &v[16], &v[17], &v[18], &v[19], &v[20], &v[21], &v[22], &v[23], &v[24], &v[25], &v[26], &v[27], &v[28], &v[29], &v[30], &v[31], &kZero); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH u[0] = k_packs_epi64(v[0], v[1]); u[1] = k_packs_epi64(v[2], v[3]); u[2] = k_packs_epi64(v[4], v[5]); u[3] = k_packs_epi64(v[6], v[7]); u[4] = k_packs_epi64(v[8], v[9]); u[5] = k_packs_epi64(v[10], v[11]); u[6] = k_packs_epi64(v[12], v[13]); u[7] = k_packs_epi64(v[14], v[15]); u[8] = k_packs_epi64(v[16], v[17]); u[9] = k_packs_epi64(v[18], v[19]); u[10] = k_packs_epi64(v[20], v[21]); u[11] = k_packs_epi64(v[22], v[23]); u[12] = k_packs_epi64(v[24], v[25]); u[13] = k_packs_epi64(v[26], v[27]); u[14] = k_packs_epi64(v[28], v[29]); u[15] = k_packs_epi64(v[30], v[31]); v[0] = _mm_add_epi32(u[0], k__DCT_CONST_ROUNDING); v[1] = _mm_add_epi32(u[1], k__DCT_CONST_ROUNDING); v[2] = _mm_add_epi32(u[2], k__DCT_CONST_ROUNDING); v[3] = _mm_add_epi32(u[3], k__DCT_CONST_ROUNDING); v[4] = _mm_add_epi32(u[4], k__DCT_CONST_ROUNDING); v[5] = _mm_add_epi32(u[5], k__DCT_CONST_ROUNDING); v[6] = _mm_add_epi32(u[6], k__DCT_CONST_ROUNDING); v[7] = _mm_add_epi32(u[7], k__DCT_CONST_ROUNDING); v[8] = _mm_add_epi32(u[8], k__DCT_CONST_ROUNDING); v[9] = _mm_add_epi32(u[9], k__DCT_CONST_ROUNDING); v[10] = _mm_add_epi32(u[10], k__DCT_CONST_ROUNDING); v[11] = _mm_add_epi32(u[11], k__DCT_CONST_ROUNDING); v[12] = _mm_add_epi32(u[12], k__DCT_CONST_ROUNDING); v[13] = _mm_add_epi32(u[13], k__DCT_CONST_ROUNDING); v[14] = _mm_add_epi32(u[14], k__DCT_CONST_ROUNDING); v[15] = _mm_add_epi32(u[15], k__DCT_CONST_ROUNDING); u[0] = _mm_srai_epi32(v[0], DCT_CONST_BITS); u[1] = _mm_srai_epi32(v[1], DCT_CONST_BITS); u[2] = _mm_srai_epi32(v[2], DCT_CONST_BITS); u[3] = _mm_srai_epi32(v[3], DCT_CONST_BITS); u[4] = _mm_srai_epi32(v[4], DCT_CONST_BITS); u[5] = _mm_srai_epi32(v[5], DCT_CONST_BITS); u[6] = _mm_srai_epi32(v[6], DCT_CONST_BITS); u[7] = _mm_srai_epi32(v[7], DCT_CONST_BITS); u[8] = _mm_srai_epi32(v[8], DCT_CONST_BITS); u[9] = _mm_srai_epi32(v[9], DCT_CONST_BITS); u[10] = _mm_srai_epi32(v[10], DCT_CONST_BITS); u[11] = _mm_srai_epi32(v[11], DCT_CONST_BITS); u[12] = _mm_srai_epi32(v[12], DCT_CONST_BITS); u[13] = _mm_srai_epi32(v[13], DCT_CONST_BITS); u[14] = _mm_srai_epi32(v[14], DCT_CONST_BITS); u[15] = _mm_srai_epi32(v[15], DCT_CONST_BITS); v[0] = _mm_cmplt_epi32(u[0], kZero); v[1] = _mm_cmplt_epi32(u[1], kZero); v[2] = _mm_cmplt_epi32(u[2], kZero); v[3] = _mm_cmplt_epi32(u[3], kZero); v[4] = _mm_cmplt_epi32(u[4], kZero); v[5] = _mm_cmplt_epi32(u[5], kZero); v[6] = _mm_cmplt_epi32(u[6], kZero); v[7] = _mm_cmplt_epi32(u[7], kZero); v[8] = _mm_cmplt_epi32(u[8], kZero); v[9] = _mm_cmplt_epi32(u[9], kZero); v[10] = _mm_cmplt_epi32(u[10], kZero); v[11] = _mm_cmplt_epi32(u[11], kZero); v[12] = _mm_cmplt_epi32(u[12], kZero); v[13] = _mm_cmplt_epi32(u[13], kZero); v[14] = _mm_cmplt_epi32(u[14], kZero); v[15] = _mm_cmplt_epi32(u[15], kZero); u[0] = _mm_sub_epi32(u[0], v[0]); u[1] = _mm_sub_epi32(u[1], v[1]); u[2] = _mm_sub_epi32(u[2], v[2]); u[3] = _mm_sub_epi32(u[3], v[3]); u[4] = _mm_sub_epi32(u[4], v[4]); u[5] = _mm_sub_epi32(u[5], v[5]); u[6] = _mm_sub_epi32(u[6], v[6]); u[7] = _mm_sub_epi32(u[7], v[7]); u[8] = _mm_sub_epi32(u[8], v[8]); u[9] = _mm_sub_epi32(u[9], v[9]); u[10] = _mm_sub_epi32(u[10], v[10]); u[11] = _mm_sub_epi32(u[11], v[11]); u[12] = _mm_sub_epi32(u[12], v[12]); u[13] = _mm_sub_epi32(u[13], v[13]); u[14] = _mm_sub_epi32(u[14], v[14]); u[15] = _mm_sub_epi32(u[15], v[15]); v[0] = _mm_add_epi32(u[0], K32One); v[1] = _mm_add_epi32(u[1], K32One); v[2] = _mm_add_epi32(u[2], K32One); v[3] = _mm_add_epi32(u[3], K32One); v[4] = _mm_add_epi32(u[4], K32One); v[5] = _mm_add_epi32(u[5], K32One); v[6] = _mm_add_epi32(u[6], K32One); v[7] = _mm_add_epi32(u[7], K32One); v[8] = _mm_add_epi32(u[8], K32One); v[9] = _mm_add_epi32(u[9], K32One); v[10] = _mm_add_epi32(u[10], K32One); v[11] = _mm_add_epi32(u[11], K32One); v[12] = _mm_add_epi32(u[12], K32One); v[13] = _mm_add_epi32(u[13], K32One); v[14] = _mm_add_epi32(u[14], K32One); v[15] = _mm_add_epi32(u[15], K32One); u[0] = _mm_srai_epi32(v[0], 2); u[1] = _mm_srai_epi32(v[1], 2); u[2] = _mm_srai_epi32(v[2], 2); u[3] = _mm_srai_epi32(v[3], 2); u[4] = _mm_srai_epi32(v[4], 2); u[5] = _mm_srai_epi32(v[5], 2); u[6] = _mm_srai_epi32(v[6], 2); u[7] = _mm_srai_epi32(v[7], 2); u[8] = _mm_srai_epi32(v[8], 2); u[9] = _mm_srai_epi32(v[9], 2); u[10] = _mm_srai_epi32(v[10], 2); u[11] = _mm_srai_epi32(v[11], 2); u[12] = _mm_srai_epi32(v[12], 2); u[13] = _mm_srai_epi32(v[13], 2); u[14] = _mm_srai_epi32(v[14], 2); u[15] = _mm_srai_epi32(v[15], 2); out[5] = _mm_packs_epi32(u[0], u[1]); out[21] = _mm_packs_epi32(u[2], u[3]); out[13] = _mm_packs_epi32(u[4], u[5]); out[29] = _mm_packs_epi32(u[6], u[7]); out[3] = _mm_packs_epi32(u[8], u[9]); out[19] = _mm_packs_epi32(u[10], u[11]); out[11] = _mm_packs_epi32(u[12], u[13]); out[27] = _mm_packs_epi32(u[14], u[15]); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&out[5], &out[21], &out[13], &out[29], &out[3], &out[19], &out[11], &out[27]); if (overflow) { HIGH_FDCT32x32_2D_ROWS_C(intermediate, output_org); return; } #endif // DCT_HIGH_BIT_DEPTH } } #endif // FDCT32x32_HIGH_PRECISION // Transpose the results, do it as four 8x8 transposes. { int transpose_block; int16_t *output0 = &intermediate[column_start * 32]; tran_low_t *output1 = &output_org[column_start * 32]; for (transpose_block = 0; transpose_block < 4; ++transpose_block) { __m128i *this_out = &out[8 * transpose_block]; // 00 01 02 03 04 05 06 07 // 10 11 12 13 14 15 16 17 // 20 21 22 23 24 25 26 27 // 30 31 32 33 34 35 36 37 // 40 41 42 43 44 45 46 47 // 50 51 52 53 54 55 56 57 // 60 61 62 63 64 65 66 67 // 70 71 72 73 74 75 76 77 const __m128i tr0_0 = _mm_unpacklo_epi16(this_out[0], this_out[1]); const __m128i tr0_1 = _mm_unpacklo_epi16(this_out[2], this_out[3]); const __m128i tr0_2 = _mm_unpackhi_epi16(this_out[0], this_out[1]); const __m128i tr0_3 = _mm_unpackhi_epi16(this_out[2], this_out[3]); const __m128i tr0_4 = _mm_unpacklo_epi16(this_out[4], this_out[5]); const __m128i tr0_5 = _mm_unpacklo_epi16(this_out[6], this_out[7]); const __m128i tr0_6 = _mm_unpackhi_epi16(this_out[4], this_out[5]); const __m128i tr0_7 = _mm_unpackhi_epi16(this_out[6], this_out[7]); // 00 10 01 11 02 12 03 13 // 20 30 21 31 22 32 23 33 // 04 14 05 15 06 16 07 17 // 24 34 25 35 26 36 27 37 // 40 50 41 51 42 52 43 53 // 60 70 61 71 62 72 63 73 // 54 54 55 55 56 56 57 57 // 64 74 65 75 66 76 67 77 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); // 00 10 20 30 01 11 21 31 // 40 50 60 70 41 51 61 71 // 02 12 22 32 03 13 23 33 // 42 52 62 72 43 53 63 73 // 04 14 24 34 05 15 21 36 // 44 54 64 74 45 55 61 76 // 06 16 26 36 07 17 27 37 // 46 56 66 76 47 57 67 77 __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4); __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4); __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6); __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6); __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5); __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5); __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7); __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 // 07 17 27 37 47 57 67 77 if (0 == pass) { // output[j] = (output[j] + 1 + (output[j] > 0)) >> 2; // TODO(cd): see quality impact of only doing // output[j] = (output[j] + 1) >> 2; // which would remove the code between here ... __m128i tr2_0_0 = _mm_cmpgt_epi16(tr2_0, kZero); __m128i tr2_1_0 = _mm_cmpgt_epi16(tr2_1, kZero); __m128i tr2_2_0 = _mm_cmpgt_epi16(tr2_2, kZero); __m128i tr2_3_0 = _mm_cmpgt_epi16(tr2_3, kZero); __m128i tr2_4_0 = _mm_cmpgt_epi16(tr2_4, kZero); __m128i tr2_5_0 = _mm_cmpgt_epi16(tr2_5, kZero); __m128i tr2_6_0 = _mm_cmpgt_epi16(tr2_6, kZero); __m128i tr2_7_0 = _mm_cmpgt_epi16(tr2_7, kZero); tr2_0 = _mm_sub_epi16(tr2_0, tr2_0_0); tr2_1 = _mm_sub_epi16(tr2_1, tr2_1_0); tr2_2 = _mm_sub_epi16(tr2_2, tr2_2_0); tr2_3 = _mm_sub_epi16(tr2_3, tr2_3_0); tr2_4 = _mm_sub_epi16(tr2_4, tr2_4_0); tr2_5 = _mm_sub_epi16(tr2_5, tr2_5_0); tr2_6 = _mm_sub_epi16(tr2_6, tr2_6_0); tr2_7 = _mm_sub_epi16(tr2_7, tr2_7_0); // ... and here. // PS: also change code in vp9/encoder/vp9_dct.c tr2_0 = _mm_add_epi16(tr2_0, kOne); tr2_1 = _mm_add_epi16(tr2_1, kOne); tr2_2 = _mm_add_epi16(tr2_2, kOne); tr2_3 = _mm_add_epi16(tr2_3, kOne); tr2_4 = _mm_add_epi16(tr2_4, kOne); tr2_5 = _mm_add_epi16(tr2_5, kOne); tr2_6 = _mm_add_epi16(tr2_6, kOne); tr2_7 = _mm_add_epi16(tr2_7, kOne); tr2_0 = _mm_srai_epi16(tr2_0, 2); tr2_1 = _mm_srai_epi16(tr2_1, 2); tr2_2 = _mm_srai_epi16(tr2_2, 2); tr2_3 = _mm_srai_epi16(tr2_3, 2); tr2_4 = _mm_srai_epi16(tr2_4, 2); tr2_5 = _mm_srai_epi16(tr2_5, 2); tr2_6 = _mm_srai_epi16(tr2_6, 2); tr2_7 = _mm_srai_epi16(tr2_7, 2); } // Note: even though all these stores are aligned, using the aligned // intrinsic make the code slightly slower. if (pass == 0) { _mm_storeu_si128((__m128i *)(output0 + 0 * 32), tr2_0); _mm_storeu_si128((__m128i *)(output0 + 1 * 32), tr2_1); _mm_storeu_si128((__m128i *)(output0 + 2 * 32), tr2_2); _mm_storeu_si128((__m128i *)(output0 + 3 * 32), tr2_3); _mm_storeu_si128((__m128i *)(output0 + 4 * 32), tr2_4); _mm_storeu_si128((__m128i *)(output0 + 5 * 32), tr2_5); _mm_storeu_si128((__m128i *)(output0 + 6 * 32), tr2_6); _mm_storeu_si128((__m128i *)(output0 + 7 * 32), tr2_7); // Process next 8x8 output0 += 8; } else { storeu_output(&tr2_0, (output1 + 0 * 32)); storeu_output(&tr2_1, (output1 + 1 * 32)); storeu_output(&tr2_2, (output1 + 2 * 32)); storeu_output(&tr2_3, (output1 + 3 * 32)); storeu_output(&tr2_4, (output1 + 4 * 32)); storeu_output(&tr2_5, (output1 + 5 * 32)); storeu_output(&tr2_6, (output1 + 6 * 32)); storeu_output(&tr2_7, (output1 + 7 * 32)); // Process next 8x8 output1 += 8; } } } } } } // NOLINT #undef ADD_EPI16 #undef SUB_EPI16 #undef HIGH_FDCT32x32_2D_C #undef HIGH_FDCT32x32_2D_ROWS_C libvpx-1.8.2/vpx_dsp/x86/fwd_txfm_avx2.c000066400000000000000000000016251357355204000200700ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #if !CONFIG_VP9_HIGHBITDEPTH #define FDCT32x32_2D_AVX2 vpx_fdct32x32_rd_avx2 #define FDCT32x32_HIGH_PRECISION 0 #include "vpx_dsp/x86/fwd_dct32x32_impl_avx2.h" #undef FDCT32x32_2D_AVX2 #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D_AVX2 vpx_fdct32x32_avx2 #define FDCT32x32_HIGH_PRECISION 1 #include "vpx_dsp/x86/fwd_dct32x32_impl_avx2.h" // NOLINT #undef FDCT32x32_2D_AVX2 #undef FDCT32x32_HIGH_PRECISION #endif // !CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/x86/fwd_txfm_impl_sse2.h000066400000000000000000001313321357355204000211110ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/txfm_common.h" #include "vpx_dsp/x86/fwd_txfm_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" #include "vpx_ports/mem.h" // TODO(jingning) The high bit-depth functions need rework for performance. // After we properly fix the high bit-depth function implementations, this // file's dependency should be substantially simplified. #if DCT_HIGH_BIT_DEPTH #define ADD_EPI16 _mm_adds_epi16 #define SUB_EPI16 _mm_subs_epi16 #else #define ADD_EPI16 _mm_add_epi16 #define SUB_EPI16 _mm_sub_epi16 #endif void FDCT4x4_2D(const int16_t *input, tran_low_t *output, int stride) { // This 2D transform implements 4 vertical 1D transforms followed // by 4 horizontal 1D transforms. The multiplies and adds are as given // by Chen, Smith and Fralick ('77). The commands for moving the data // around have been minimized by hand. // For the purposes of the comments, the 16 inputs are referred to at i0 // through iF (in raster order), intermediate variables are a0, b0, c0 // through f, and correspond to the in-place computations mapped to input // locations. The outputs, o0 through oF are labeled according to the // output locations. // Constants // These are the coefficients used for the multiplies. // In the comments, pN means cos(N pi /64) and mN is -cos(N pi /64), // where cospi_N_64 = cos(N pi /64) const __m128i k__cospi_A = octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64); const __m128i k__cospi_B = octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64); const __m128i k__cospi_C = octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64, cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64); const __m128i k__cospi_D = octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64, cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64); const __m128i k__cospi_E = octa_set_epi16(cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64, cospi_16_64); const __m128i k__cospi_F = octa_set_epi16(cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64, cospi_16_64, -cospi_16_64); const __m128i k__cospi_G = octa_set_epi16(cospi_8_64, cospi_24_64, cospi_8_64, cospi_24_64, -cospi_8_64, -cospi_24_64, -cospi_8_64, -cospi_24_64); const __m128i k__cospi_H = octa_set_epi16(cospi_24_64, -cospi_8_64, cospi_24_64, -cospi_8_64, -cospi_24_64, cospi_8_64, -cospi_24_64, cospi_8_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); // This second rounding constant saves doing some extra adds at the end const __m128i k__DCT_CONST_ROUNDING2 = _mm_set1_epi32(DCT_CONST_ROUNDING + (DCT_CONST_ROUNDING << 1)); const int DCT_CONST_BITS2 = DCT_CONST_BITS + 2; const __m128i k__nonzero_bias_a = _mm_setr_epi16(0, 1, 1, 1, 1, 1, 1, 1); const __m128i k__nonzero_bias_b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); __m128i in0, in1; #if DCT_HIGH_BIT_DEPTH __m128i cmp0, cmp1; int test, overflow; #endif // Load inputs. in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); in1 = _mm_unpacklo_epi64( in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride))); in0 = _mm_unpacklo_epi64( in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride))); // in0 = [i0 i1 i2 i3 iC iD iE iF] // in1 = [i4 i5 i6 i7 i8 i9 iA iB] #if DCT_HIGH_BIT_DEPTH // Check inputs small enough to use optimised code cmp0 = _mm_xor_si128(_mm_cmpgt_epi16(in0, _mm_set1_epi16(0x3ff)), _mm_cmplt_epi16(in0, _mm_set1_epi16((int16_t)0xfc00))); cmp1 = _mm_xor_si128(_mm_cmpgt_epi16(in1, _mm_set1_epi16(0x3ff)), _mm_cmplt_epi16(in1, _mm_set1_epi16((int16_t)0xfc00))); test = _mm_movemask_epi8(_mm_or_si128(cmp0, cmp1)); if (test) { vpx_highbd_fdct4x4_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // multiply by 16 to give some extra precision in0 = _mm_slli_epi16(in0, 4); in1 = _mm_slli_epi16(in1, 4); // if (i == 0 && input[0]) input[0] += 1; // add 1 to the upper left pixel if it is non-zero, which helps reduce // the round-trip error { // The mask will only contain whether the first value is zero, all // other comparison will fail as something shifted by 4 (above << 4) // can never be equal to one. To increment in the non-zero case, we // add the mask and one for the first element: // - if zero, mask = -1, v = v - 1 + 1 = v // - if non-zero, mask = 0, v = v + 0 + 1 = v + 1 __m128i mask = _mm_cmpeq_epi16(in0, k__nonzero_bias_a); in0 = _mm_add_epi16(in0, mask); in0 = _mm_add_epi16(in0, k__nonzero_bias_b); } // There are 4 total stages, alternating between an add/subtract stage // followed by an multiply-and-add stage. { // Stage 1: Add/subtract // in0 = [i0 i1 i2 i3 iC iD iE iF] // in1 = [i4 i5 i6 i7 i8 i9 iA iB] const __m128i r0 = _mm_unpacklo_epi16(in0, in1); const __m128i r1 = _mm_unpackhi_epi16(in0, in1); // r0 = [i0 i4 i1 i5 i2 i6 i3 i7] // r1 = [iC i8 iD i9 iE iA iF iB] const __m128i r2 = _mm_shuffle_epi32(r0, 0xB4); const __m128i r3 = _mm_shuffle_epi32(r1, 0xB4); // r2 = [i0 i4 i1 i5 i3 i7 i2 i6] // r3 = [iC i8 iD i9 iF iB iE iA] const __m128i t0 = _mm_add_epi16(r2, r3); const __m128i t1 = _mm_sub_epi16(r2, r3); // t0 = [a0 a4 a1 a5 a3 a7 a2 a6] // t1 = [aC a8 aD a9 aF aB aE aA] // Stage 2: multiply by constants (which gets us into 32 bits). // The constants needed here are: // k__cospi_A = [p16 p16 p16 p16 p16 m16 p16 m16] // k__cospi_B = [p16 m16 p16 m16 p16 p16 p16 p16] // k__cospi_C = [p08 p24 p08 p24 p24 m08 p24 m08] // k__cospi_D = [p24 m08 p24 m08 p08 p24 p08 p24] const __m128i u0 = _mm_madd_epi16(t0, k__cospi_A); const __m128i u2 = _mm_madd_epi16(t0, k__cospi_B); const __m128i u1 = _mm_madd_epi16(t1, k__cospi_C); const __m128i u3 = _mm_madd_epi16(t1, k__cospi_D); // Then add and right-shift to get back to 16-bit range const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); // w0 = [b0 b1 b7 b6] // w1 = [b8 b9 bF bE] // w2 = [b4 b5 b3 b2] // w3 = [bC bD bB bA] const __m128i x0 = _mm_packs_epi32(w0, w1); const __m128i x1 = _mm_packs_epi32(w2, w3); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&x0, &x1); if (overflow) { vpx_highbd_fdct4x4_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // x0 = [b0 b1 b7 b6 b8 b9 bF bE] // x1 = [b4 b5 b3 b2 bC bD bB bA] in0 = _mm_shuffle_epi32(x0, 0xD8); in1 = _mm_shuffle_epi32(x1, 0x8D); // in0 = [b0 b1 b8 b9 b7 b6 bF bE] // in1 = [b3 b2 bB bA b4 b5 bC bD] } { // vertical DCTs finished. Now we do the horizontal DCTs. // Stage 3: Add/subtract const __m128i t0 = ADD_EPI16(in0, in1); const __m128i t1 = SUB_EPI16(in0, in1); // t0 = [c0 c1 c8 c9 c4 c5 cC cD] // t1 = [c3 c2 cB cA -c7 -c6 -cF -cE] #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&t0, &t1); if (overflow) { vpx_highbd_fdct4x4_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Stage 4: multiply by constants (which gets us into 32 bits). { // The constants needed here are: // k__cospi_E = [p16 p16 p16 p16 p16 p16 p16 p16] // k__cospi_F = [p16 m16 p16 m16 p16 m16 p16 m16] // k__cospi_G = [p08 p24 p08 p24 m08 m24 m08 m24] // k__cospi_H = [p24 m08 p24 m08 m24 p08 m24 p08] const __m128i u0 = _mm_madd_epi16(t0, k__cospi_E); const __m128i u1 = _mm_madd_epi16(t0, k__cospi_F); const __m128i u2 = _mm_madd_epi16(t1, k__cospi_G); const __m128i u3 = _mm_madd_epi16(t1, k__cospi_H); // Then add and right-shift to get back to 16-bit range // but this combines the final right-shift as well to save operations // This unusual rounding operations is to maintain bit-accurate // compatibility with the c version of this function which has two // rounding steps in a row. const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING2); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING2); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING2); const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING2); const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS2); const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS2); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS2); const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS2); // w0 = [o0 o4 o8 oC] // w1 = [o2 o6 oA oE] // w2 = [o1 o5 o9 oD] // w3 = [o3 o7 oB oF] // remember the o's are numbered according to the correct output location const __m128i x0 = _mm_packs_epi32(w0, w1); const __m128i x1 = _mm_packs_epi32(w2, w3); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&x0, &x1); if (overflow) { vpx_highbd_fdct4x4_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH { // x0 = [o0 o4 o8 oC o2 o6 oA oE] // x1 = [o1 o5 o9 oD o3 o7 oB oF] const __m128i y0 = _mm_unpacklo_epi16(x0, x1); const __m128i y1 = _mm_unpackhi_epi16(x0, x1); // y0 = [o0 o1 o4 o5 o8 o9 oC oD] // y1 = [o2 o3 o6 o7 oA oB oE oF] in0 = _mm_unpacklo_epi32(y0, y1); // in0 = [o0 o1 o2 o3 o4 o5 o6 o7] in1 = _mm_unpackhi_epi32(y0, y1); // in1 = [o8 o9 oA oB oC oD oE oF] } } } // Post-condition (v + 1) >> 2 is now incorporated into previous // add and right-shift commands. Only 2 store instructions needed // because we are using the fact that 1/3 are stored just after 0/2. storeu_output(&in0, output + 0 * 4); storeu_output(&in1, output + 2 * 4); } void FDCT8x8_2D(const int16_t *input, tran_low_t *output, int stride) { int pass; // Constants // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); #if DCT_HIGH_BIT_DEPTH int overflow; #endif // Load input __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); __m128i in4 = _mm_load_si128((const __m128i *)(input + 4 * stride)); __m128i in5 = _mm_load_si128((const __m128i *)(input + 5 * stride)); __m128i in6 = _mm_load_si128((const __m128i *)(input + 6 * stride)); __m128i in7 = _mm_load_si128((const __m128i *)(input + 7 * stride)); // Pre-condition input (shift by two) in0 = _mm_slli_epi16(in0, 2); in1 = _mm_slli_epi16(in1, 2); in2 = _mm_slli_epi16(in2, 2); in3 = _mm_slli_epi16(in3, 2); in4 = _mm_slli_epi16(in4, 2); in5 = _mm_slli_epi16(in5, 2); in6 = _mm_slli_epi16(in6, 2); in7 = _mm_slli_epi16(in7, 2); // We do two passes, first the columns, then the rows. The results of the // first pass are transposed so that the same column code can be reused. The // results of the second pass are also transposed so that the rows (processed // as columns) are put back in row positions. for (pass = 0; pass < 2; pass++) { // To store results of each pass before the transpose. __m128i res0, res1, res2, res3, res4, res5, res6, res7; // Add/subtract const __m128i q0 = ADD_EPI16(in0, in7); const __m128i q1 = ADD_EPI16(in1, in6); const __m128i q2 = ADD_EPI16(in2, in5); const __m128i q3 = ADD_EPI16(in3, in4); const __m128i q4 = SUB_EPI16(in3, in4); const __m128i q5 = SUB_EPI16(in2, in5); const __m128i q6 = SUB_EPI16(in1, in6); const __m128i q7 = SUB_EPI16(in0, in7); #if DCT_HIGH_BIT_DEPTH if (pass == 1) { overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } } #endif // DCT_HIGH_BIT_DEPTH // Work on first four results { // Add/subtract const __m128i r0 = ADD_EPI16(q0, q3); const __m128i r1 = ADD_EPI16(q1, q2); const __m128i r2 = SUB_EPI16(q1, q2); const __m128i r3 = SUB_EPI16(q0, q3); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Interleave to do the multiply by constants which gets us into 32bits { const __m128i t0 = _mm_unpacklo_epi16(r0, r1); const __m128i t1 = _mm_unpackhi_epi16(r0, r1); const __m128i t2 = _mm_unpacklo_epi16(r2, r3); const __m128i t3 = _mm_unpackhi_epi16(r2, r3); const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p16_p16); const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p16_p16); const __m128i u2 = _mm_madd_epi16(t0, k__cospi_p16_m16); const __m128i u3 = _mm_madd_epi16(t1, k__cospi_p16_m16); const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p24_p08); const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p24_p08); const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m08_p24); const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m08_p24); // dct_const_round_shift const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); // Combine res0 = _mm_packs_epi32(w0, w1); res4 = _mm_packs_epi32(w2, w3); res2 = _mm_packs_epi32(w4, w5); res6 = _mm_packs_epi32(w6, w7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res0, &res4, &res2, &res6); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } } // Work on next four results { // Interleave to do the multiply by constants which gets us into 32bits const __m128i d0 = _mm_unpacklo_epi16(q6, q5); const __m128i d1 = _mm_unpackhi_epi16(q6, q5); const __m128i e0 = _mm_madd_epi16(d0, k__cospi_p16_m16); const __m128i e1 = _mm_madd_epi16(d1, k__cospi_p16_m16); const __m128i e2 = _mm_madd_epi16(d0, k__cospi_p16_p16); const __m128i e3 = _mm_madd_epi16(d1, k__cospi_p16_p16); // dct_const_round_shift const __m128i f0 = _mm_add_epi32(e0, k__DCT_CONST_ROUNDING); const __m128i f1 = _mm_add_epi32(e1, k__DCT_CONST_ROUNDING); const __m128i f2 = _mm_add_epi32(e2, k__DCT_CONST_ROUNDING); const __m128i f3 = _mm_add_epi32(e3, k__DCT_CONST_ROUNDING); const __m128i s0 = _mm_srai_epi32(f0, DCT_CONST_BITS); const __m128i s1 = _mm_srai_epi32(f1, DCT_CONST_BITS); const __m128i s2 = _mm_srai_epi32(f2, DCT_CONST_BITS); const __m128i s3 = _mm_srai_epi32(f3, DCT_CONST_BITS); // Combine const __m128i r0 = _mm_packs_epi32(s0, s1); const __m128i r1 = _mm_packs_epi32(s2, s3); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&r0, &r1); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH { // Add/subtract const __m128i x0 = ADD_EPI16(q4, r0); const __m128i x1 = SUB_EPI16(q4, r0); const __m128i x2 = SUB_EPI16(q7, r1); const __m128i x3 = ADD_EPI16(q7, r1); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Interleave to do the multiply by constants which gets us into 32bits { const __m128i t0 = _mm_unpacklo_epi16(x0, x3); const __m128i t1 = _mm_unpackhi_epi16(x0, x3); const __m128i t2 = _mm_unpacklo_epi16(x1, x2); const __m128i t3 = _mm_unpackhi_epi16(x1, x2); const __m128i u0 = _mm_madd_epi16(t0, k__cospi_p28_p04); const __m128i u1 = _mm_madd_epi16(t1, k__cospi_p28_p04); const __m128i u2 = _mm_madd_epi16(t0, k__cospi_m04_p28); const __m128i u3 = _mm_madd_epi16(t1, k__cospi_m04_p28); const __m128i u4 = _mm_madd_epi16(t2, k__cospi_p12_p20); const __m128i u5 = _mm_madd_epi16(t3, k__cospi_p12_p20); const __m128i u6 = _mm_madd_epi16(t2, k__cospi_m20_p12); const __m128i u7 = _mm_madd_epi16(t3, k__cospi_m20_p12); // dct_const_round_shift const __m128i v0 = _mm_add_epi32(u0, k__DCT_CONST_ROUNDING); const __m128i v1 = _mm_add_epi32(u1, k__DCT_CONST_ROUNDING); const __m128i v2 = _mm_add_epi32(u2, k__DCT_CONST_ROUNDING); const __m128i v3 = _mm_add_epi32(u3, k__DCT_CONST_ROUNDING); const __m128i v4 = _mm_add_epi32(u4, k__DCT_CONST_ROUNDING); const __m128i v5 = _mm_add_epi32(u5, k__DCT_CONST_ROUNDING); const __m128i v6 = _mm_add_epi32(u6, k__DCT_CONST_ROUNDING); const __m128i v7 = _mm_add_epi32(u7, k__DCT_CONST_ROUNDING); const __m128i w0 = _mm_srai_epi32(v0, DCT_CONST_BITS); const __m128i w1 = _mm_srai_epi32(v1, DCT_CONST_BITS); const __m128i w2 = _mm_srai_epi32(v2, DCT_CONST_BITS); const __m128i w3 = _mm_srai_epi32(v3, DCT_CONST_BITS); const __m128i w4 = _mm_srai_epi32(v4, DCT_CONST_BITS); const __m128i w5 = _mm_srai_epi32(v5, DCT_CONST_BITS); const __m128i w6 = _mm_srai_epi32(v6, DCT_CONST_BITS); const __m128i w7 = _mm_srai_epi32(v7, DCT_CONST_BITS); // Combine res1 = _mm_packs_epi32(w0, w1); res7 = _mm_packs_epi32(w2, w3); res5 = _mm_packs_epi32(w4, w5); res3 = _mm_packs_epi32(w6, w7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res1, &res7, &res5, &res3); if (overflow) { vpx_highbd_fdct8x8_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } } } // Transpose the 8x8. { // 00 01 02 03 04 05 06 07 // 10 11 12 13 14 15 16 17 // 20 21 22 23 24 25 26 27 // 30 31 32 33 34 35 36 37 // 40 41 42 43 44 45 46 47 // 50 51 52 53 54 55 56 57 // 60 61 62 63 64 65 66 67 // 70 71 72 73 74 75 76 77 const __m128i tr0_0 = _mm_unpacklo_epi16(res0, res1); const __m128i tr0_1 = _mm_unpacklo_epi16(res2, res3); const __m128i tr0_2 = _mm_unpackhi_epi16(res0, res1); const __m128i tr0_3 = _mm_unpackhi_epi16(res2, res3); const __m128i tr0_4 = _mm_unpacklo_epi16(res4, res5); const __m128i tr0_5 = _mm_unpacklo_epi16(res6, res7); const __m128i tr0_6 = _mm_unpackhi_epi16(res4, res5); const __m128i tr0_7 = _mm_unpackhi_epi16(res6, res7); // 00 10 01 11 02 12 03 13 // 20 30 21 31 22 32 23 33 // 04 14 05 15 06 16 07 17 // 24 34 25 35 26 36 27 37 // 40 50 41 51 42 52 43 53 // 60 70 61 71 62 72 63 73 // 54 54 55 55 56 56 57 57 // 64 74 65 75 66 76 67 77 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); // 00 10 20 30 01 11 21 31 // 40 50 60 70 41 51 61 71 // 02 12 22 32 03 13 23 33 // 42 52 62 72 43 53 63 73 // 04 14 24 34 05 15 21 36 // 44 54 64 74 45 55 61 76 // 06 16 26 36 07 17 27 37 // 46 56 66 76 47 57 67 77 in0 = _mm_unpacklo_epi64(tr1_0, tr1_4); in1 = _mm_unpackhi_epi64(tr1_0, tr1_4); in2 = _mm_unpacklo_epi64(tr1_2, tr1_6); in3 = _mm_unpackhi_epi64(tr1_2, tr1_6); in4 = _mm_unpacklo_epi64(tr1_1, tr1_5); in5 = _mm_unpackhi_epi64(tr1_1, tr1_5); in6 = _mm_unpacklo_epi64(tr1_3, tr1_7); in7 = _mm_unpackhi_epi64(tr1_3, tr1_7); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 // 07 17 27 37 47 57 67 77 } } // Post-condition output and store it { // Post-condition (division by two) // division of two 16 bits signed numbers using shifts // n / 2 = (n - (n >> 15)) >> 1 const __m128i sign_in0 = _mm_srai_epi16(in0, 15); const __m128i sign_in1 = _mm_srai_epi16(in1, 15); const __m128i sign_in2 = _mm_srai_epi16(in2, 15); const __m128i sign_in3 = _mm_srai_epi16(in3, 15); const __m128i sign_in4 = _mm_srai_epi16(in4, 15); const __m128i sign_in5 = _mm_srai_epi16(in5, 15); const __m128i sign_in6 = _mm_srai_epi16(in6, 15); const __m128i sign_in7 = _mm_srai_epi16(in7, 15); in0 = _mm_sub_epi16(in0, sign_in0); in1 = _mm_sub_epi16(in1, sign_in1); in2 = _mm_sub_epi16(in2, sign_in2); in3 = _mm_sub_epi16(in3, sign_in3); in4 = _mm_sub_epi16(in4, sign_in4); in5 = _mm_sub_epi16(in5, sign_in5); in6 = _mm_sub_epi16(in6, sign_in6); in7 = _mm_sub_epi16(in7, sign_in7); in0 = _mm_srai_epi16(in0, 1); in1 = _mm_srai_epi16(in1, 1); in2 = _mm_srai_epi16(in2, 1); in3 = _mm_srai_epi16(in3, 1); in4 = _mm_srai_epi16(in4, 1); in5 = _mm_srai_epi16(in5, 1); in6 = _mm_srai_epi16(in6, 1); in7 = _mm_srai_epi16(in7, 1); // store results store_output(&in0, (output + 0 * 8)); store_output(&in1, (output + 1 * 8)); store_output(&in2, (output + 2 * 8)); store_output(&in3, (output + 3 * 8)); store_output(&in4, (output + 4 * 8)); store_output(&in5, (output + 5 * 8)); store_output(&in6, (output + 6 * 8)); store_output(&in7, (output + 7 * 8)); } } void FDCT16x16_2D(const int16_t *input, tran_low_t *output, int stride) { // The 2D transform is done with two passes which are actually pretty // similar. In the first one, we transform the columns and transpose // the results. In the second one, we transform the rows. To achieve that, // as the first pass results are transposed, we transpose the columns (that // is the transposed rows) and transpose the results (so that it goes back // in normal/row positions). int pass; // We need an intermediate buffer between passes. DECLARE_ALIGNED(16, int16_t, intermediate[256]); const int16_t *in = input; int16_t *out0 = intermediate; tran_low_t *out1 = output; // Constants // When we use them, in one case, they are all the same. In all others // it's a pair of them that we need to repeat four times. This is done // by constructing the 32 bit constant corresponding to that pair. const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_p08_m24 = pair_set_epi16(cospi_8_64, -cospi_24_64); const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p28_p04 = pair_set_epi16(cospi_28_64, cospi_4_64); const __m128i k__cospi_m04_p28 = pair_set_epi16(-cospi_4_64, cospi_28_64); const __m128i k__cospi_p12_p20 = pair_set_epi16(cospi_12_64, cospi_20_64); const __m128i k__cospi_m20_p12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i k__cospi_p30_p02 = pair_set_epi16(cospi_30_64, cospi_2_64); const __m128i k__cospi_p14_p18 = pair_set_epi16(cospi_14_64, cospi_18_64); const __m128i k__cospi_m02_p30 = pair_set_epi16(-cospi_2_64, cospi_30_64); const __m128i k__cospi_m18_p14 = pair_set_epi16(-cospi_18_64, cospi_14_64); const __m128i k__cospi_p22_p10 = pair_set_epi16(cospi_22_64, cospi_10_64); const __m128i k__cospi_p06_p26 = pair_set_epi16(cospi_6_64, cospi_26_64); const __m128i k__cospi_m10_p22 = pair_set_epi16(-cospi_10_64, cospi_22_64); const __m128i k__cospi_m26_p06 = pair_set_epi16(-cospi_26_64, cospi_6_64); const __m128i k__DCT_CONST_ROUNDING = _mm_set1_epi32(DCT_CONST_ROUNDING); const __m128i kOne = _mm_set1_epi16(1); // Do the two transform/transpose passes for (pass = 0; pass < 2; ++pass) { // We process eight columns (transposed rows in second pass) at a time. int column_start; #if DCT_HIGH_BIT_DEPTH int overflow; #endif for (column_start = 0; column_start < 16; column_start += 8) { __m128i in00, in01, in02, in03, in04, in05, in06, in07; __m128i in08, in09, in10, in11, in12, in13, in14, in15; __m128i input0, input1, input2, input3, input4, input5, input6, input7; __m128i step1_0, step1_1, step1_2, step1_3; __m128i step1_4, step1_5, step1_6, step1_7; __m128i step2_1, step2_2, step2_3, step2_4, step2_5, step2_6; __m128i step3_0, step3_1, step3_2, step3_3; __m128i step3_4, step3_5, step3_6, step3_7; __m128i res00, res01, res02, res03, res04, res05, res06, res07; __m128i res08, res09, res10, res11, res12, res13, res14, res15; // Load and pre-condition input. if (0 == pass) { in00 = _mm_load_si128((const __m128i *)(in + 0 * stride)); in01 = _mm_load_si128((const __m128i *)(in + 1 * stride)); in02 = _mm_load_si128((const __m128i *)(in + 2 * stride)); in03 = _mm_load_si128((const __m128i *)(in + 3 * stride)); in04 = _mm_load_si128((const __m128i *)(in + 4 * stride)); in05 = _mm_load_si128((const __m128i *)(in + 5 * stride)); in06 = _mm_load_si128((const __m128i *)(in + 6 * stride)); in07 = _mm_load_si128((const __m128i *)(in + 7 * stride)); in08 = _mm_load_si128((const __m128i *)(in + 8 * stride)); in09 = _mm_load_si128((const __m128i *)(in + 9 * stride)); in10 = _mm_load_si128((const __m128i *)(in + 10 * stride)); in11 = _mm_load_si128((const __m128i *)(in + 11 * stride)); in12 = _mm_load_si128((const __m128i *)(in + 12 * stride)); in13 = _mm_load_si128((const __m128i *)(in + 13 * stride)); in14 = _mm_load_si128((const __m128i *)(in + 14 * stride)); in15 = _mm_load_si128((const __m128i *)(in + 15 * stride)); // x = x << 2 in00 = _mm_slli_epi16(in00, 2); in01 = _mm_slli_epi16(in01, 2); in02 = _mm_slli_epi16(in02, 2); in03 = _mm_slli_epi16(in03, 2); in04 = _mm_slli_epi16(in04, 2); in05 = _mm_slli_epi16(in05, 2); in06 = _mm_slli_epi16(in06, 2); in07 = _mm_slli_epi16(in07, 2); in08 = _mm_slli_epi16(in08, 2); in09 = _mm_slli_epi16(in09, 2); in10 = _mm_slli_epi16(in10, 2); in11 = _mm_slli_epi16(in11, 2); in12 = _mm_slli_epi16(in12, 2); in13 = _mm_slli_epi16(in13, 2); in14 = _mm_slli_epi16(in14, 2); in15 = _mm_slli_epi16(in15, 2); } else { in00 = _mm_load_si128((const __m128i *)(in + 0 * 16)); in01 = _mm_load_si128((const __m128i *)(in + 1 * 16)); in02 = _mm_load_si128((const __m128i *)(in + 2 * 16)); in03 = _mm_load_si128((const __m128i *)(in + 3 * 16)); in04 = _mm_load_si128((const __m128i *)(in + 4 * 16)); in05 = _mm_load_si128((const __m128i *)(in + 5 * 16)); in06 = _mm_load_si128((const __m128i *)(in + 6 * 16)); in07 = _mm_load_si128((const __m128i *)(in + 7 * 16)); in08 = _mm_load_si128((const __m128i *)(in + 8 * 16)); in09 = _mm_load_si128((const __m128i *)(in + 9 * 16)); in10 = _mm_load_si128((const __m128i *)(in + 10 * 16)); in11 = _mm_load_si128((const __m128i *)(in + 11 * 16)); in12 = _mm_load_si128((const __m128i *)(in + 12 * 16)); in13 = _mm_load_si128((const __m128i *)(in + 13 * 16)); in14 = _mm_load_si128((const __m128i *)(in + 14 * 16)); in15 = _mm_load_si128((const __m128i *)(in + 15 * 16)); // x = (x + 1) >> 2 in00 = _mm_add_epi16(in00, kOne); in01 = _mm_add_epi16(in01, kOne); in02 = _mm_add_epi16(in02, kOne); in03 = _mm_add_epi16(in03, kOne); in04 = _mm_add_epi16(in04, kOne); in05 = _mm_add_epi16(in05, kOne); in06 = _mm_add_epi16(in06, kOne); in07 = _mm_add_epi16(in07, kOne); in08 = _mm_add_epi16(in08, kOne); in09 = _mm_add_epi16(in09, kOne); in10 = _mm_add_epi16(in10, kOne); in11 = _mm_add_epi16(in11, kOne); in12 = _mm_add_epi16(in12, kOne); in13 = _mm_add_epi16(in13, kOne); in14 = _mm_add_epi16(in14, kOne); in15 = _mm_add_epi16(in15, kOne); in00 = _mm_srai_epi16(in00, 2); in01 = _mm_srai_epi16(in01, 2); in02 = _mm_srai_epi16(in02, 2); in03 = _mm_srai_epi16(in03, 2); in04 = _mm_srai_epi16(in04, 2); in05 = _mm_srai_epi16(in05, 2); in06 = _mm_srai_epi16(in06, 2); in07 = _mm_srai_epi16(in07, 2); in08 = _mm_srai_epi16(in08, 2); in09 = _mm_srai_epi16(in09, 2); in10 = _mm_srai_epi16(in10, 2); in11 = _mm_srai_epi16(in11, 2); in12 = _mm_srai_epi16(in12, 2); in13 = _mm_srai_epi16(in13, 2); in14 = _mm_srai_epi16(in14, 2); in15 = _mm_srai_epi16(in15, 2); } in += 8; // Calculate input for the first 8 results. { input0 = ADD_EPI16(in00, in15); input1 = ADD_EPI16(in01, in14); input2 = ADD_EPI16(in02, in13); input3 = ADD_EPI16(in03, in12); input4 = ADD_EPI16(in04, in11); input5 = ADD_EPI16(in05, in10); input6 = ADD_EPI16(in06, in09); input7 = ADD_EPI16(in07, in08); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&input0, &input1, &input2, &input3, &input4, &input5, &input6, &input7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // Calculate input for the next 8 results. { step1_0 = SUB_EPI16(in07, in08); step1_1 = SUB_EPI16(in06, in09); step1_2 = SUB_EPI16(in05, in10); step1_3 = SUB_EPI16(in04, in11); step1_4 = SUB_EPI16(in03, in12); step1_5 = SUB_EPI16(in02, in13); step1_6 = SUB_EPI16(in01, in14); step1_7 = SUB_EPI16(in00, in15); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3, &step1_4, &step1_5, &step1_6, &step1_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // Work on the first eight values; fdct8(input, even_results); { // Add/subtract const __m128i q0 = ADD_EPI16(input0, input7); const __m128i q1 = ADD_EPI16(input1, input6); const __m128i q2 = ADD_EPI16(input2, input5); const __m128i q3 = ADD_EPI16(input3, input4); const __m128i q4 = SUB_EPI16(input3, input4); const __m128i q5 = SUB_EPI16(input2, input5); const __m128i q6 = SUB_EPI16(input1, input6); const __m128i q7 = SUB_EPI16(input0, input7); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&q0, &q1, &q2, &q3, &q4, &q5, &q6, &q7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Work on first four results { // Add/subtract const __m128i r0 = ADD_EPI16(q0, q3); const __m128i r1 = ADD_EPI16(q1, q2); const __m128i r2 = SUB_EPI16(q1, q2); const __m128i r3 = SUB_EPI16(q0, q3); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&r0, &r1, &r2, &r3); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Interleave to do the multiply by constants which gets us // into 32 bits. { const __m128i t0 = _mm_unpacklo_epi16(r0, r1); const __m128i t1 = _mm_unpackhi_epi16(r0, r1); const __m128i t2 = _mm_unpacklo_epi16(r2, r3); const __m128i t3 = _mm_unpackhi_epi16(r2, r3); res00 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res08 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res04 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res12 = mult_round_shift(&t2, &t3, &k__cospi_m08_p24, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res00, &res08, &res04, &res12); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } } // Work on next four results { // Interleave to do the multiply by constants which gets us // into 32 bits. const __m128i d0 = _mm_unpacklo_epi16(q6, q5); const __m128i d1 = _mm_unpackhi_epi16(q6, q5); const __m128i r0 = mult_round_shift(&d0, &d1, &k__cospi_p16_m16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); const __m128i r1 = mult_round_shift(&d0, &d1, &k__cospi_p16_p16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x2(&r0, &r1); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH { // Add/subtract const __m128i x0 = ADD_EPI16(q4, r0); const __m128i x1 = SUB_EPI16(q4, r0); const __m128i x2 = SUB_EPI16(q7, r1); const __m128i x3 = ADD_EPI16(q7, r1); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&x0, &x1, &x2, &x3); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH // Interleave to do the multiply by constants which gets us // into 32 bits. { const __m128i t0 = _mm_unpacklo_epi16(x0, x3); const __m128i t1 = _mm_unpackhi_epi16(x0, x3); const __m128i t2 = _mm_unpacklo_epi16(x1, x2); const __m128i t3 = _mm_unpackhi_epi16(x1, x2); res02 = mult_round_shift(&t0, &t1, &k__cospi_p28_p04, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res14 = mult_round_shift(&t0, &t1, &k__cospi_m04_p28, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res10 = mult_round_shift(&t2, &t3, &k__cospi_p12_p20, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res06 = mult_round_shift(&t2, &t3, &k__cospi_m20_p12, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res02, &res14, &res10, &res06); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } } } } // Work on the next eight values; step1 -> odd_results { // step 2 { const __m128i t0 = _mm_unpacklo_epi16(step1_5, step1_2); const __m128i t1 = _mm_unpackhi_epi16(step1_5, step1_2); const __m128i t2 = _mm_unpacklo_epi16(step1_4, step1_3); const __m128i t3 = _mm_unpackhi_epi16(step1_4, step1_3); step2_2 = mult_round_shift(&t0, &t1, &k__cospi_p16_m16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_3 = mult_round_shift(&t2, &t3, &k__cospi_p16_m16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_5 = mult_round_shift(&t0, &t1, &k__cospi_p16_p16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_4 = mult_round_shift(&t2, &t3, &k__cospi_p16_p16, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&step2_2, &step2_3, &step2_5, &step2_4); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // step 3 { step3_0 = ADD_EPI16(step1_0, step2_3); step3_1 = ADD_EPI16(step1_1, step2_2); step3_2 = SUB_EPI16(step1_1, step2_2); step3_3 = SUB_EPI16(step1_0, step2_3); step3_4 = SUB_EPI16(step1_7, step2_4); step3_5 = SUB_EPI16(step1_6, step2_5); step3_6 = ADD_EPI16(step1_6, step2_5); step3_7 = ADD_EPI16(step1_7, step2_4); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step3_0, &step3_1, &step3_2, &step3_3, &step3_4, &step3_5, &step3_6, &step3_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // step 4 { const __m128i t0 = _mm_unpacklo_epi16(step3_1, step3_6); const __m128i t1 = _mm_unpackhi_epi16(step3_1, step3_6); const __m128i t2 = _mm_unpacklo_epi16(step3_2, step3_5); const __m128i t3 = _mm_unpackhi_epi16(step3_2, step3_5); step2_1 = mult_round_shift(&t0, &t1, &k__cospi_m08_p24, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_2 = mult_round_shift(&t2, &t3, &k__cospi_p24_p08, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_6 = mult_round_shift(&t0, &t1, &k__cospi_p24_p08, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); step2_5 = mult_round_shift(&t2, &t3, &k__cospi_p08_m24, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&step2_1, &step2_2, &step2_6, &step2_5); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // step 5 { step1_0 = ADD_EPI16(step3_0, step2_1); step1_1 = SUB_EPI16(step3_0, step2_1); step1_2 = ADD_EPI16(step3_3, step2_2); step1_3 = SUB_EPI16(step3_3, step2_2); step1_4 = SUB_EPI16(step3_4, step2_5); step1_5 = ADD_EPI16(step3_4, step2_5); step1_6 = SUB_EPI16(step3_7, step2_6); step1_7 = ADD_EPI16(step3_7, step2_6); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x8(&step1_0, &step1_1, &step1_2, &step1_3, &step1_4, &step1_5, &step1_6, &step1_7); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } // step 6 { const __m128i t0 = _mm_unpacklo_epi16(step1_0, step1_7); const __m128i t1 = _mm_unpackhi_epi16(step1_0, step1_7); const __m128i t2 = _mm_unpacklo_epi16(step1_1, step1_6); const __m128i t3 = _mm_unpackhi_epi16(step1_1, step1_6); res01 = mult_round_shift(&t0, &t1, &k__cospi_p30_p02, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res09 = mult_round_shift(&t2, &t3, &k__cospi_p14_p18, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res15 = mult_round_shift(&t0, &t1, &k__cospi_m02_p30, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res07 = mult_round_shift(&t2, &t3, &k__cospi_m18_p14, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res01, &res09, &res15, &res07); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } { const __m128i t0 = _mm_unpacklo_epi16(step1_2, step1_5); const __m128i t1 = _mm_unpackhi_epi16(step1_2, step1_5); const __m128i t2 = _mm_unpacklo_epi16(step1_3, step1_4); const __m128i t3 = _mm_unpackhi_epi16(step1_3, step1_4); res05 = mult_round_shift(&t0, &t1, &k__cospi_p22_p10, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res13 = mult_round_shift(&t2, &t3, &k__cospi_p06_p26, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res11 = mult_round_shift(&t0, &t1, &k__cospi_m10_p22, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); res03 = mult_round_shift(&t2, &t3, &k__cospi_m26_p06, &k__DCT_CONST_ROUNDING, DCT_CONST_BITS); #if DCT_HIGH_BIT_DEPTH overflow = check_epi16_overflow_x4(&res05, &res13, &res11, &res03); if (overflow) { vpx_highbd_fdct16x16_c(input, output, stride); return; } #endif // DCT_HIGH_BIT_DEPTH } } // Transpose the results, do it as two 8x8 transposes. transpose_and_output8x8(&res00, &res01, &res02, &res03, &res04, &res05, &res06, &res07, pass, out0, out1); transpose_and_output8x8(&res08, &res09, &res10, &res11, &res12, &res13, &res14, &res15, pass, out0 + 8, out1 + 8); if (pass == 0) { out0 += 8 * 16; } else { out1 += 8 * 16; } } // Setup in/out for next pass. in = intermediate; } } #undef ADD_EPI16 #undef SUB_EPI16 libvpx-1.8.2/vpx_dsp/x86/fwd_txfm_sse2.c000066400000000000000000000212001357355204000200530ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_dsp/x86/fwd_txfm_sse2.h" void vpx_fdct4x4_1_sse2(const int16_t *input, tran_low_t *output, int stride) { __m128i in0, in1; __m128i tmp; const __m128i zero = _mm_setzero_si128(); in0 = _mm_loadl_epi64((const __m128i *)(input + 0 * stride)); in1 = _mm_loadl_epi64((const __m128i *)(input + 1 * stride)); in1 = _mm_unpacklo_epi64( in1, _mm_loadl_epi64((const __m128i *)(input + 2 * stride))); in0 = _mm_unpacklo_epi64( in0, _mm_loadl_epi64((const __m128i *)(input + 3 * stride))); tmp = _mm_add_epi16(in0, in1); in0 = _mm_unpacklo_epi16(zero, tmp); in1 = _mm_unpackhi_epi16(zero, tmp); in0 = _mm_srai_epi32(in0, 16); in1 = _mm_srai_epi32(in1, 16); tmp = _mm_add_epi32(in0, in1); in0 = _mm_unpacklo_epi32(tmp, zero); in1 = _mm_unpackhi_epi32(tmp, zero); tmp = _mm_add_epi32(in0, in1); in0 = _mm_srli_si128(tmp, 8); in1 = _mm_add_epi32(tmp, in0); in0 = _mm_slli_epi32(in1, 1); output[0] = (tran_low_t)_mm_cvtsi128_si32(in0); } void vpx_fdct8x8_1_sse2(const int16_t *input, tran_low_t *output, int stride) { __m128i in0 = _mm_load_si128((const __m128i *)(input + 0 * stride)); __m128i in1 = _mm_load_si128((const __m128i *)(input + 1 * stride)); __m128i in2 = _mm_load_si128((const __m128i *)(input + 2 * stride)); __m128i in3 = _mm_load_si128((const __m128i *)(input + 3 * stride)); __m128i u0, u1, sum; u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); in0 = _mm_load_si128((const __m128i *)(input + 4 * stride)); in1 = _mm_load_si128((const __m128i *)(input + 5 * stride)); in2 = _mm_load_si128((const __m128i *)(input + 6 * stride)); in3 = _mm_load_si128((const __m128i *)(input + 7 * stride)); sum = _mm_add_epi16(u0, u1); in0 = _mm_add_epi16(in0, in1); in2 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, in0); u0 = _mm_setzero_si128(); sum = _mm_add_epi16(sum, in2); in0 = _mm_unpacklo_epi16(u0, sum); in1 = _mm_unpackhi_epi16(u0, sum); in0 = _mm_srai_epi32(in0, 16); in1 = _mm_srai_epi32(in1, 16); sum = _mm_add_epi32(in0, in1); in0 = _mm_unpacklo_epi32(sum, u0); in1 = _mm_unpackhi_epi32(sum, u0); sum = _mm_add_epi32(in0, in1); in0 = _mm_srli_si128(sum, 8); in1 = _mm_add_epi32(sum, in0); output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } void vpx_fdct16x16_1_sse2(const int16_t *input, tran_low_t *output, int stride) { __m128i in0, in1, in2, in3; __m128i u0, u1; __m128i sum = _mm_setzero_si128(); int i; for (i = 0; i < 2; ++i) { in0 = _mm_load_si128((const __m128i *)(input + 0 * stride + 0)); in1 = _mm_load_si128((const __m128i *)(input + 0 * stride + 8)); in2 = _mm_load_si128((const __m128i *)(input + 1 * stride + 0)); in3 = _mm_load_si128((const __m128i *)(input + 1 * stride + 8)); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 2 * stride + 0)); in1 = _mm_load_si128((const __m128i *)(input + 2 * stride + 8)); in2 = _mm_load_si128((const __m128i *)(input + 3 * stride + 0)); in3 = _mm_load_si128((const __m128i *)(input + 3 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 4 * stride + 0)); in1 = _mm_load_si128((const __m128i *)(input + 4 * stride + 8)); in2 = _mm_load_si128((const __m128i *)(input + 5 * stride + 0)); in3 = _mm_load_si128((const __m128i *)(input + 5 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 6 * stride + 0)); in1 = _mm_load_si128((const __m128i *)(input + 6 * stride + 8)); in2 = _mm_load_si128((const __m128i *)(input + 7 * stride + 0)); in3 = _mm_load_si128((const __m128i *)(input + 7 * stride + 8)); sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); sum = _mm_add_epi16(sum, u1); input += 8 * stride; } u0 = _mm_setzero_si128(); in0 = _mm_unpacklo_epi16(u0, sum); in1 = _mm_unpackhi_epi16(u0, sum); in0 = _mm_srai_epi32(in0, 16); in1 = _mm_srai_epi32(in1, 16); sum = _mm_add_epi32(in0, in1); in0 = _mm_unpacklo_epi32(sum, u0); in1 = _mm_unpackhi_epi32(sum, u0); sum = _mm_add_epi32(in0, in1); in0 = _mm_srli_si128(sum, 8); in1 = _mm_add_epi32(sum, in0); in1 = _mm_srai_epi32(in1, 1); output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } void vpx_fdct32x32_1_sse2(const int16_t *input, tran_low_t *output, int stride) { __m128i in0, in1, in2, in3; __m128i u0, u1; __m128i sum = _mm_setzero_si128(); int i; for (i = 0; i < 8; ++i) { in0 = _mm_load_si128((const __m128i *)(input + 0)); in1 = _mm_load_si128((const __m128i *)(input + 8)); in2 = _mm_load_si128((const __m128i *)(input + 16)); in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 0)); in1 = _mm_load_si128((const __m128i *)(input + 8)); in2 = _mm_load_si128((const __m128i *)(input + 16)); in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 0)); in1 = _mm_load_si128((const __m128i *)(input + 8)); in2 = _mm_load_si128((const __m128i *)(input + 16)); in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); in0 = _mm_load_si128((const __m128i *)(input + 0)); in1 = _mm_load_si128((const __m128i *)(input + 8)); in2 = _mm_load_si128((const __m128i *)(input + 16)); in3 = _mm_load_si128((const __m128i *)(input + 24)); input += stride; sum = _mm_add_epi16(sum, u1); u0 = _mm_add_epi16(in0, in1); u1 = _mm_add_epi16(in2, in3); sum = _mm_add_epi16(sum, u0); sum = _mm_add_epi16(sum, u1); } u0 = _mm_setzero_si128(); in0 = _mm_unpacklo_epi16(u0, sum); in1 = _mm_unpackhi_epi16(u0, sum); in0 = _mm_srai_epi32(in0, 16); in1 = _mm_srai_epi32(in1, 16); sum = _mm_add_epi32(in0, in1); in0 = _mm_unpacklo_epi32(sum, u0); in1 = _mm_unpackhi_epi32(sum, u0); sum = _mm_add_epi32(in0, in1); in0 = _mm_srli_si128(sum, 8); in1 = _mm_add_epi32(sum, in0); in1 = _mm_srai_epi32(in1, 3); output[0] = (tran_low_t)_mm_cvtsi128_si32(in1); } #define DCT_HIGH_BIT_DEPTH 0 #define FDCT4x4_2D vpx_fdct4x4_sse2 #define FDCT8x8_2D vpx_fdct8x8_sse2 #define FDCT16x16_2D vpx_fdct16x16_sse2 #include "vpx_dsp/x86/fwd_txfm_impl_sse2.h" #undef FDCT4x4_2D #undef FDCT8x8_2D #undef FDCT16x16_2D #define FDCT32x32_2D vpx_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 #include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vpx_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 #include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #undef DCT_HIGH_BIT_DEPTH #if CONFIG_VP9_HIGHBITDEPTH #define DCT_HIGH_BIT_DEPTH 1 #define FDCT4x4_2D vpx_highbd_fdct4x4_sse2 #define FDCT8x8_2D vpx_highbd_fdct8x8_sse2 #define FDCT16x16_2D vpx_highbd_fdct16x16_sse2 #include "vpx_dsp/x86/fwd_txfm_impl_sse2.h" // NOLINT #undef FDCT4x4_2D #undef FDCT8x8_2D #undef FDCT16x16_2D #define FDCT32x32_2D vpx_highbd_fdct32x32_rd_sse2 #define FDCT32x32_HIGH_PRECISION 0 #include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #define FDCT32x32_2D vpx_highbd_fdct32x32_sse2 #define FDCT32x32_HIGH_PRECISION 1 #include "vpx_dsp/x86/fwd_dct32x32_impl_sse2.h" // NOLINT #undef FDCT32x32_2D #undef FDCT32x32_HIGH_PRECISION #undef DCT_HIGH_BIT_DEPTH #endif // CONFIG_VP9_HIGHBITDEPTH libvpx-1.8.2/vpx_dsp/x86/fwd_txfm_sse2.h000066400000000000000000000375511357355204000201000ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_FWD_TXFM_SSE2_H_ #define VPX_VPX_DSP_X86_FWD_TXFM_SSE2_H_ #ifdef __cplusplus extern "C" { #endif #define pair_set_epi32(a, b) \ _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a)) static INLINE __m128i k_madd_epi32(__m128i a, __m128i b) { __m128i buf0, buf1; buf0 = _mm_mul_epu32(a, b); a = _mm_srli_epi64(a, 32); b = _mm_srli_epi64(b, 32); buf1 = _mm_mul_epu32(a, b); return _mm_add_epi64(buf0, buf1); } static INLINE __m128i k_packs_epi64(__m128i a, __m128i b) { __m128i buf0 = _mm_shuffle_epi32(a, _MM_SHUFFLE(0, 0, 2, 0)); __m128i buf1 = _mm_shuffle_epi32(b, _MM_SHUFFLE(0, 0, 2, 0)); return _mm_unpacklo_epi64(buf0, buf1); } static INLINE int check_epi16_overflow_x2(const __m128i *preg0, const __m128i *preg1) { const __m128i max_overflow = _mm_set1_epi16(0x7fff); const __m128i min_overflow = _mm_set1_epi16((short)0x8000); __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), _mm_cmpeq_epi16(*preg0, min_overflow)); __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow), _mm_cmpeq_epi16(*preg1, min_overflow)); cmp0 = _mm_or_si128(cmp0, cmp1); return _mm_movemask_epi8(cmp0); } static INLINE int check_epi16_overflow_x4(const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3) { const __m128i max_overflow = _mm_set1_epi16(0x7fff); const __m128i min_overflow = _mm_set1_epi16((short)0x8000); __m128i cmp0 = _mm_or_si128(_mm_cmpeq_epi16(*preg0, max_overflow), _mm_cmpeq_epi16(*preg0, min_overflow)); __m128i cmp1 = _mm_or_si128(_mm_cmpeq_epi16(*preg1, max_overflow), _mm_cmpeq_epi16(*preg1, min_overflow)); __m128i cmp2 = _mm_or_si128(_mm_cmpeq_epi16(*preg2, max_overflow), _mm_cmpeq_epi16(*preg2, min_overflow)); __m128i cmp3 = _mm_or_si128(_mm_cmpeq_epi16(*preg3, max_overflow), _mm_cmpeq_epi16(*preg3, min_overflow)); cmp0 = _mm_or_si128(_mm_or_si128(cmp0, cmp1), _mm_or_si128(cmp2, cmp3)); return _mm_movemask_epi8(cmp0); } static INLINE int check_epi16_overflow_x8( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7) { int res0, res1; res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3); res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7); return res0 + res1; } static INLINE int check_epi16_overflow_x12( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *preg8, const __m128i *preg9, const __m128i *preg10, const __m128i *preg11) { int res0, res1; res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3); res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7); if (!res0) res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11); return res0 + res1; } static INLINE int check_epi16_overflow_x16( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *preg8, const __m128i *preg9, const __m128i *preg10, const __m128i *preg11, const __m128i *preg12, const __m128i *preg13, const __m128i *preg14, const __m128i *preg15) { int res0, res1; res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3); res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7); if (!res0) { res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11); if (!res1) res1 = check_epi16_overflow_x4(preg12, preg13, preg14, preg15); } return res0 + res1; } static INLINE int check_epi16_overflow_x32( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *preg8, const __m128i *preg9, const __m128i *preg10, const __m128i *preg11, const __m128i *preg12, const __m128i *preg13, const __m128i *preg14, const __m128i *preg15, const __m128i *preg16, const __m128i *preg17, const __m128i *preg18, const __m128i *preg19, const __m128i *preg20, const __m128i *preg21, const __m128i *preg22, const __m128i *preg23, const __m128i *preg24, const __m128i *preg25, const __m128i *preg26, const __m128i *preg27, const __m128i *preg28, const __m128i *preg29, const __m128i *preg30, const __m128i *preg31) { int res0, res1; res0 = check_epi16_overflow_x4(preg0, preg1, preg2, preg3); res1 = check_epi16_overflow_x4(preg4, preg5, preg6, preg7); if (!res0) { res0 = check_epi16_overflow_x4(preg8, preg9, preg10, preg11); if (!res1) { res1 = check_epi16_overflow_x4(preg12, preg13, preg14, preg15); if (!res0) { res0 = check_epi16_overflow_x4(preg16, preg17, preg18, preg19); if (!res1) { res1 = check_epi16_overflow_x4(preg20, preg21, preg22, preg23); if (!res0) { res0 = check_epi16_overflow_x4(preg24, preg25, preg26, preg27); if (!res1) res1 = check_epi16_overflow_x4(preg28, preg29, preg30, preg31); } } } } } return res0 + res1; } static INLINE int k_check_epi32_overflow_4(const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *zero) { __m128i minus_one = _mm_set1_epi32(-1); // Check for overflows __m128i reg0_shifted = _mm_slli_epi64(*preg0, 1); __m128i reg1_shifted = _mm_slli_epi64(*preg1, 1); __m128i reg2_shifted = _mm_slli_epi64(*preg2, 1); __m128i reg3_shifted = _mm_slli_epi64(*preg3, 1); __m128i reg0_top_dwords = _mm_shuffle_epi32(reg0_shifted, _MM_SHUFFLE(0, 0, 3, 1)); __m128i reg1_top_dwords = _mm_shuffle_epi32(reg1_shifted, _MM_SHUFFLE(0, 0, 3, 1)); __m128i reg2_top_dwords = _mm_shuffle_epi32(reg2_shifted, _MM_SHUFFLE(0, 0, 3, 1)); __m128i reg3_top_dwords = _mm_shuffle_epi32(reg3_shifted, _MM_SHUFFLE(0, 0, 3, 1)); __m128i top_dwords_01 = _mm_unpacklo_epi64(reg0_top_dwords, reg1_top_dwords); __m128i top_dwords_23 = _mm_unpacklo_epi64(reg2_top_dwords, reg3_top_dwords); __m128i valid_positve_01 = _mm_cmpeq_epi32(top_dwords_01, *zero); __m128i valid_positve_23 = _mm_cmpeq_epi32(top_dwords_23, *zero); __m128i valid_negative_01 = _mm_cmpeq_epi32(top_dwords_01, minus_one); __m128i valid_negative_23 = _mm_cmpeq_epi32(top_dwords_23, minus_one); int overflow_01 = _mm_movemask_epi8(_mm_cmpeq_epi32(valid_positve_01, valid_negative_01)); int overflow_23 = _mm_movemask_epi8(_mm_cmpeq_epi32(valid_positve_23, valid_negative_23)); return (overflow_01 + overflow_23); } static INLINE int k_check_epi32_overflow_8( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *zero) { int overflow = k_check_epi32_overflow_4(preg0, preg1, preg2, preg3, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg4, preg5, preg6, preg7, zero); } return overflow; } static INLINE int k_check_epi32_overflow_16( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *preg8, const __m128i *preg9, const __m128i *preg10, const __m128i *preg11, const __m128i *preg12, const __m128i *preg13, const __m128i *preg14, const __m128i *preg15, const __m128i *zero) { int overflow = k_check_epi32_overflow_4(preg0, preg1, preg2, preg3, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg4, preg5, preg6, preg7, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg8, preg9, preg10, preg11, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg12, preg13, preg14, preg15, zero); } } } return overflow; } static INLINE int k_check_epi32_overflow_32( const __m128i *preg0, const __m128i *preg1, const __m128i *preg2, const __m128i *preg3, const __m128i *preg4, const __m128i *preg5, const __m128i *preg6, const __m128i *preg7, const __m128i *preg8, const __m128i *preg9, const __m128i *preg10, const __m128i *preg11, const __m128i *preg12, const __m128i *preg13, const __m128i *preg14, const __m128i *preg15, const __m128i *preg16, const __m128i *preg17, const __m128i *preg18, const __m128i *preg19, const __m128i *preg20, const __m128i *preg21, const __m128i *preg22, const __m128i *preg23, const __m128i *preg24, const __m128i *preg25, const __m128i *preg26, const __m128i *preg27, const __m128i *preg28, const __m128i *preg29, const __m128i *preg30, const __m128i *preg31, const __m128i *zero) { int overflow = k_check_epi32_overflow_4(preg0, preg1, preg2, preg3, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg4, preg5, preg6, preg7, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg8, preg9, preg10, preg11, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg12, preg13, preg14, preg15, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg16, preg17, preg18, preg19, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg20, preg21, preg22, preg23, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg24, preg25, preg26, preg27, zero); if (!overflow) { overflow = k_check_epi32_overflow_4(preg28, preg29, preg30, preg31, zero); } } } } } } } return overflow; } static INLINE void store_output(const __m128i *poutput, tran_low_t *dst_ptr) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i zero = _mm_setzero_si128(); const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero); __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits); __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits); _mm_store_si128((__m128i *)(dst_ptr), out0); _mm_store_si128((__m128i *)(dst_ptr + 4), out1); #else _mm_store_si128((__m128i *)(dst_ptr), *poutput); #endif // CONFIG_VP9_HIGHBITDEPTH } static INLINE void storeu_output(const __m128i *poutput, tran_low_t *dst_ptr) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i zero = _mm_setzero_si128(); const __m128i sign_bits = _mm_cmplt_epi16(*poutput, zero); __m128i out0 = _mm_unpacklo_epi16(*poutput, sign_bits); __m128i out1 = _mm_unpackhi_epi16(*poutput, sign_bits); _mm_storeu_si128((__m128i *)(dst_ptr), out0); _mm_storeu_si128((__m128i *)(dst_ptr + 4), out1); #else _mm_storeu_si128((__m128i *)(dst_ptr), *poutput); #endif // CONFIG_VP9_HIGHBITDEPTH } static INLINE __m128i mult_round_shift(const __m128i *pin0, const __m128i *pin1, const __m128i *pmultiplier, const __m128i *prounding, const int shift) { const __m128i u0 = _mm_madd_epi16(*pin0, *pmultiplier); const __m128i u1 = _mm_madd_epi16(*pin1, *pmultiplier); const __m128i v0 = _mm_add_epi32(u0, *prounding); const __m128i v1 = _mm_add_epi32(u1, *prounding); const __m128i w0 = _mm_srai_epi32(v0, shift); const __m128i w1 = _mm_srai_epi32(v1, shift); return _mm_packs_epi32(w0, w1); } static INLINE void transpose_and_output8x8( const __m128i *pin00, const __m128i *pin01, const __m128i *pin02, const __m128i *pin03, const __m128i *pin04, const __m128i *pin05, const __m128i *pin06, const __m128i *pin07, const int pass, int16_t *out0_ptr, tran_low_t *out1_ptr) { // 00 01 02 03 04 05 06 07 // 10 11 12 13 14 15 16 17 // 20 21 22 23 24 25 26 27 // 30 31 32 33 34 35 36 37 // 40 41 42 43 44 45 46 47 // 50 51 52 53 54 55 56 57 // 60 61 62 63 64 65 66 67 // 70 71 72 73 74 75 76 77 const __m128i tr0_0 = _mm_unpacklo_epi16(*pin00, *pin01); const __m128i tr0_1 = _mm_unpacklo_epi16(*pin02, *pin03); const __m128i tr0_2 = _mm_unpackhi_epi16(*pin00, *pin01); const __m128i tr0_3 = _mm_unpackhi_epi16(*pin02, *pin03); const __m128i tr0_4 = _mm_unpacklo_epi16(*pin04, *pin05); const __m128i tr0_5 = _mm_unpacklo_epi16(*pin06, *pin07); const __m128i tr0_6 = _mm_unpackhi_epi16(*pin04, *pin05); const __m128i tr0_7 = _mm_unpackhi_epi16(*pin06, *pin07); // 00 10 01 11 02 12 03 13 // 20 30 21 31 22 32 23 33 // 04 14 05 15 06 16 07 17 // 24 34 25 35 26 36 27 37 // 40 50 41 51 42 52 43 53 // 60 70 61 71 62 72 63 73 // 54 54 55 55 56 56 57 57 // 64 74 65 75 66 76 67 77 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); const __m128i tr1_4 = _mm_unpacklo_epi32(tr0_4, tr0_5); const __m128i tr1_5 = _mm_unpacklo_epi32(tr0_6, tr0_7); const __m128i tr1_6 = _mm_unpackhi_epi32(tr0_4, tr0_5); const __m128i tr1_7 = _mm_unpackhi_epi32(tr0_6, tr0_7); // 00 10 20 30 01 11 21 31 // 40 50 60 70 41 51 61 71 // 02 12 22 32 03 13 23 33 // 42 52 62 72 43 53 63 73 // 04 14 24 34 05 15 21 36 // 44 54 64 74 45 55 61 76 // 06 16 26 36 07 17 27 37 // 46 56 66 76 47 57 67 77 const __m128i tr2_0 = _mm_unpacklo_epi64(tr1_0, tr1_4); const __m128i tr2_1 = _mm_unpackhi_epi64(tr1_0, tr1_4); const __m128i tr2_2 = _mm_unpacklo_epi64(tr1_2, tr1_6); const __m128i tr2_3 = _mm_unpackhi_epi64(tr1_2, tr1_6); const __m128i tr2_4 = _mm_unpacklo_epi64(tr1_1, tr1_5); const __m128i tr2_5 = _mm_unpackhi_epi64(tr1_1, tr1_5); const __m128i tr2_6 = _mm_unpacklo_epi64(tr1_3, tr1_7); const __m128i tr2_7 = _mm_unpackhi_epi64(tr1_3, tr1_7); // 00 10 20 30 40 50 60 70 // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 // 03 13 23 33 43 53 63 73 // 04 14 24 34 44 54 64 74 // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 // 07 17 27 37 47 57 67 77 if (pass == 0) { _mm_storeu_si128((__m128i *)(out0_ptr + 0 * 16), tr2_0); _mm_storeu_si128((__m128i *)(out0_ptr + 1 * 16), tr2_1); _mm_storeu_si128((__m128i *)(out0_ptr + 2 * 16), tr2_2); _mm_storeu_si128((__m128i *)(out0_ptr + 3 * 16), tr2_3); _mm_storeu_si128((__m128i *)(out0_ptr + 4 * 16), tr2_4); _mm_storeu_si128((__m128i *)(out0_ptr + 5 * 16), tr2_5); _mm_storeu_si128((__m128i *)(out0_ptr + 6 * 16), tr2_6); _mm_storeu_si128((__m128i *)(out0_ptr + 7 * 16), tr2_7); } else { storeu_output(&tr2_0, (out1_ptr + 0 * 16)); storeu_output(&tr2_1, (out1_ptr + 1 * 16)); storeu_output(&tr2_2, (out1_ptr + 2 * 16)); storeu_output(&tr2_3, (out1_ptr + 3 * 16)); storeu_output(&tr2_4, (out1_ptr + 4 * 16)); storeu_output(&tr2_5, (out1_ptr + 5 * 16)); storeu_output(&tr2_6, (out1_ptr + 6 * 16)); storeu_output(&tr2_7, (out1_ptr + 7 * 16)); } } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_DSP_X86_FWD_TXFM_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/fwd_txfm_ssse3_x86_64.asm000066400000000000000000000160601357355204000216230ustar00rootroot00000000000000; ; Copyright (c) 2015 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pw_11585x2: times 8 dw 23170 pd_8192: times 4 dd 8192 %macro TRANSFORM_COEFFS 2 pw_%1_%2: dw %1, %2, %1, %2, %1, %2, %1, %2 pw_%2_m%1: dw %2, -%1, %2, -%1, %2, -%1, %2, -%1 %endmacro TRANSFORM_COEFFS 11585, 11585 TRANSFORM_COEFFS 15137, 6270 TRANSFORM_COEFFS 16069, 3196 TRANSFORM_COEFFS 9102, 13623 SECTION .text %if VPX_ARCH_X86_64 INIT_XMM ssse3 cglobal fdct8x8, 3, 5, 13, input, output, stride mova m8, [GLOBAL(pd_8192)] mova m12, [GLOBAL(pw_11585x2)] lea r3, [2 * strideq] lea r4, [4 * strideq] mova m0, [inputq] mova m1, [inputq + r3] lea inputq, [inputq + r4] mova m2, [inputq] mova m3, [inputq + r3] lea inputq, [inputq + r4] mova m4, [inputq] mova m5, [inputq + r3] lea inputq, [inputq + r4] mova m6, [inputq] mova m7, [inputq + r3] ; left shift by 2 to increase forward transformation precision psllw m0, 2 psllw m1, 2 psllw m2, 2 psllw m3, 2 psllw m4, 2 psllw m5, 2 psllw m6, 2 psllw m7, 2 ; column transform ; stage 1 paddw m10, m0, m7 psubw m0, m7 paddw m9, m1, m6 psubw m1, m6 paddw m7, m2, m5 psubw m2, m5 paddw m6, m3, m4 psubw m3, m4 ; stage 2 paddw m5, m9, m7 psubw m9, m7 paddw m4, m10, m6 psubw m10, m6 paddw m7, m1, m2 psubw m1, m2 ; stage 3 paddw m6, m4, m5 psubw m4, m5 pmulhrsw m1, m12 pmulhrsw m7, m12 ; sin(pi / 8), cos(pi / 8) punpcklwd m2, m10, m9 punpckhwd m10, m9 pmaddwd m5, m2, [GLOBAL(pw_15137_6270)] pmaddwd m2, [GLOBAL(pw_6270_m15137)] pmaddwd m9, m10, [GLOBAL(pw_15137_6270)] pmaddwd m10, [GLOBAL(pw_6270_m15137)] paddd m5, m8 paddd m2, m8 paddd m9, m8 paddd m10, m8 psrad m5, 14 psrad m2, 14 psrad m9, 14 psrad m10, 14 packssdw m5, m9 packssdw m2, m10 pmulhrsw m6, m12 pmulhrsw m4, m12 paddw m9, m3, m1 psubw m3, m1 paddw m10, m0, m7 psubw m0, m7 ; stage 4 ; sin(pi / 16), cos(pi / 16) punpcklwd m1, m10, m9 punpckhwd m10, m9 pmaddwd m7, m1, [GLOBAL(pw_16069_3196)] pmaddwd m1, [GLOBAL(pw_3196_m16069)] pmaddwd m9, m10, [GLOBAL(pw_16069_3196)] pmaddwd m10, [GLOBAL(pw_3196_m16069)] paddd m7, m8 paddd m1, m8 paddd m9, m8 paddd m10, m8 psrad m7, 14 psrad m1, 14 psrad m9, 14 psrad m10, 14 packssdw m7, m9 packssdw m1, m10 ; sin(3 * pi / 16), cos(3 * pi / 16) punpcklwd m11, m0, m3 punpckhwd m0, m3 pmaddwd m9, m11, [GLOBAL(pw_9102_13623)] pmaddwd m11, [GLOBAL(pw_13623_m9102)] pmaddwd m3, m0, [GLOBAL(pw_9102_13623)] pmaddwd m0, [GLOBAL(pw_13623_m9102)] paddd m9, m8 paddd m11, m8 paddd m3, m8 paddd m0, m8 psrad m9, 14 psrad m11, 14 psrad m3, 14 psrad m0, 14 packssdw m9, m3 packssdw m11, m0 ; transpose ; stage 1 punpcklwd m0, m6, m7 punpcklwd m3, m5, m11 punpckhwd m6, m7 punpckhwd m5, m11 punpcklwd m7, m4, m9 punpcklwd m10, m2, m1 punpckhwd m4, m9 punpckhwd m2, m1 ; stage 2 punpckldq m9, m0, m3 punpckldq m1, m6, m5 punpckhdq m0, m3 punpckhdq m6, m5 punpckldq m3, m7, m10 punpckldq m5, m4, m2 punpckhdq m7, m10 punpckhdq m4, m2 ; stage 3 punpcklqdq m10, m9, m3 punpckhqdq m9, m3 punpcklqdq m2, m0, m7 punpckhqdq m0, m7 punpcklqdq m3, m1, m5 punpckhqdq m1, m5 punpcklqdq m7, m6, m4 punpckhqdq m6, m4 ; row transform ; stage 1 paddw m5, m10, m6 psubw m10, m6 paddw m4, m9, m7 psubw m9, m7 paddw m6, m2, m1 psubw m2, m1 paddw m7, m0, m3 psubw m0, m3 ;stage 2 paddw m1, m5, m7 psubw m5, m7 paddw m3, m4, m6 psubw m4, m6 paddw m7, m9, m2 psubw m9, m2 ; stage 3 punpcklwd m6, m1, m3 punpckhwd m1, m3 pmaddwd m2, m6, [GLOBAL(pw_11585_11585)] pmaddwd m6, [GLOBAL(pw_11585_m11585)] pmaddwd m3, m1, [GLOBAL(pw_11585_11585)] pmaddwd m1, [GLOBAL(pw_11585_m11585)] paddd m2, m8 paddd m6, m8 paddd m3, m8 paddd m1, m8 psrad m2, 14 psrad m6, 14 psrad m3, 14 psrad m1, 14 packssdw m2, m3 packssdw m6, m1 pmulhrsw m7, m12 pmulhrsw m9, m12 punpcklwd m3, m5, m4 punpckhwd m5, m4 pmaddwd m1, m3, [GLOBAL(pw_15137_6270)] pmaddwd m3, [GLOBAL(pw_6270_m15137)] pmaddwd m4, m5, [GLOBAL(pw_15137_6270)] pmaddwd m5, [GLOBAL(pw_6270_m15137)] paddd m1, m8 paddd m3, m8 paddd m4, m8 paddd m5, m8 psrad m1, 14 psrad m3, 14 psrad m4, 14 psrad m5, 14 packssdw m1, m4 packssdw m3, m5 paddw m4, m0, m9 psubw m0, m9 paddw m5, m10, m7 psubw m10, m7 ; stage 4 punpcklwd m9, m5, m4 punpckhwd m5, m4 pmaddwd m7, m9, [GLOBAL(pw_16069_3196)] pmaddwd m9, [GLOBAL(pw_3196_m16069)] pmaddwd m4, m5, [GLOBAL(pw_16069_3196)] pmaddwd m5, [GLOBAL(pw_3196_m16069)] paddd m7, m8 paddd m9, m8 paddd m4, m8 paddd m5, m8 psrad m7, 14 psrad m9, 14 psrad m4, 14 psrad m5, 14 packssdw m7, m4 packssdw m9, m5 punpcklwd m4, m10, m0 punpckhwd m10, m0 pmaddwd m5, m4, [GLOBAL(pw_9102_13623)] pmaddwd m4, [GLOBAL(pw_13623_m9102)] pmaddwd m0, m10, [GLOBAL(pw_9102_13623)] pmaddwd m10, [GLOBAL(pw_13623_m9102)] paddd m5, m8 paddd m4, m8 paddd m0, m8 paddd m10, m8 psrad m5, 14 psrad m4, 14 psrad m0, 14 psrad m10, 14 packssdw m5, m0 packssdw m4, m10 ; transpose ; stage 1 punpcklwd m0, m2, m7 punpcklwd m10, m1, m4 punpckhwd m2, m7 punpckhwd m1, m4 punpcklwd m7, m6, m5 punpcklwd m4, m3, m9 punpckhwd m6, m5 punpckhwd m3, m9 ; stage 2 punpckldq m5, m0, m10 punpckldq m9, m2, m1 punpckhdq m0, m10 punpckhdq m2, m1 punpckldq m10, m7, m4 punpckldq m1, m6, m3 punpckhdq m7, m4 punpckhdq m6, m3 ; stage 3 punpcklqdq m4, m5, m10 punpckhqdq m5, m10 punpcklqdq m3, m0, m7 punpckhqdq m0, m7 punpcklqdq m10, m9, m1 punpckhqdq m9, m1 punpcklqdq m7, m2, m6 punpckhqdq m2, m6 psraw m1, m4, 15 psraw m6, m5, 15 psraw m8, m3, 15 psraw m11, m0, 15 psubw m4, m1 psubw m5, m6 psubw m3, m8 psubw m0, m11 psraw m4, 1 psraw m5, 1 psraw m3, 1 psraw m0, 1 psraw m1, m10, 15 psraw m6, m9, 15 psraw m8, m7, 15 psraw m11, m2, 15 psubw m10, m1 psubw m9, m6 psubw m7, m8 psubw m2, m11 psraw m10, 1 psraw m9, 1 psraw m7, 1 psraw m2, 1 mova [outputq + 0], m4 mova [outputq + 16], m5 mova [outputq + 32], m3 mova [outputq + 48], m0 mova [outputq + 64], m10 mova [outputq + 80], m9 mova [outputq + 96], m7 mova [outputq + 112], m2 RET %endif libvpx-1.8.2/vpx_dsp/x86/highbd_convolve_avx2.c000066400000000000000000001542541357355204000214210ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/convolve.h" #include "vpx_dsp/x86/convolve_avx2.h" // ----------------------------------------------------------------------------- // Copy and average void vpx_highbd_convolve_copy_avx2(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; assert(w % 4 == 0); if (w > 32) { // w = 64 do { const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); const __m256i p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); const __m256i p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); src += src_stride; _mm256_storeu_si256((__m256i *)dst, p0); _mm256_storeu_si256((__m256i *)(dst + 16), p1); _mm256_storeu_si256((__m256i *)(dst + 32), p2); _mm256_storeu_si256((__m256i *)(dst + 48), p3); dst += dst_stride; h--; } while (h > 0); } else if (w > 16) { // w = 32 do { const __m256i p0 = _mm256_loadu_si256((const __m256i *)src); const __m256i p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); src += src_stride; _mm256_storeu_si256((__m256i *)dst, p0); _mm256_storeu_si256((__m256i *)(dst + 16), p1); dst += dst_stride; h--; } while (h > 0); } else if (w > 8) { // w = 16 __m256i p0, p1; do { p0 = _mm256_loadu_si256((const __m256i *)src); src += src_stride; p1 = _mm256_loadu_si256((const __m256i *)src); src += src_stride; _mm256_storeu_si256((__m256i *)dst, p0); dst += dst_stride; _mm256_storeu_si256((__m256i *)dst, p1); dst += dst_stride; h -= 2; } while (h > 0); } else if (w > 4) { // w = 8 __m128i p0, p1; do { p0 = _mm_loadu_si128((const __m128i *)src); src += src_stride; p1 = _mm_loadu_si128((const __m128i *)src); src += src_stride; _mm_storeu_si128((__m128i *)dst, p0); dst += dst_stride; _mm_storeu_si128((__m128i *)dst, p1); dst += dst_stride; h -= 2; } while (h > 0); } else { // w = 4 __m128i p0, p1; do { p0 = _mm_loadl_epi64((const __m128i *)src); src += src_stride; p1 = _mm_loadl_epi64((const __m128i *)src); src += src_stride; _mm_storel_epi64((__m128i *)dst, p0); dst += dst_stride; _mm_storel_epi64((__m128i *)dst, p1); dst += dst_stride; h -= 2; } while (h > 0); } } void vpx_highbd_convolve_avg_avx2(const uint16_t *src, ptrdiff_t src_stride, uint16_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h, int bd) { (void)filter; (void)x0_q4; (void)x_step_q4; (void)y0_q4; (void)y_step_q4; (void)bd; assert(w % 4 == 0); if (w > 32) { // w = 64 __m256i p0, p1, p2, p3, u0, u1, u2, u3; do { p0 = _mm256_loadu_si256((const __m256i *)src); p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); p2 = _mm256_loadu_si256((const __m256i *)(src + 32)); p3 = _mm256_loadu_si256((const __m256i *)(src + 48)); src += src_stride; u0 = _mm256_loadu_si256((const __m256i *)dst); u1 = _mm256_loadu_si256((const __m256i *)(dst + 16)); u2 = _mm256_loadu_si256((const __m256i *)(dst + 32)); u3 = _mm256_loadu_si256((const __m256i *)(dst + 48)); _mm256_storeu_si256((__m256i *)dst, _mm256_avg_epu16(p0, u0)); _mm256_storeu_si256((__m256i *)(dst + 16), _mm256_avg_epu16(p1, u1)); _mm256_storeu_si256((__m256i *)(dst + 32), _mm256_avg_epu16(p2, u2)); _mm256_storeu_si256((__m256i *)(dst + 48), _mm256_avg_epu16(p3, u3)); dst += dst_stride; h--; } while (h > 0); } else if (w > 16) { // w = 32 __m256i p0, p1, u0, u1; do { p0 = _mm256_loadu_si256((const __m256i *)src); p1 = _mm256_loadu_si256((const __m256i *)(src + 16)); src += src_stride; u0 = _mm256_loadu_si256((const __m256i *)dst); u1 = _mm256_loadu_si256((const __m256i *)(dst + 16)); _mm256_storeu_si256((__m256i *)dst, _mm256_avg_epu16(p0, u0)); _mm256_storeu_si256((__m256i *)(dst + 16), _mm256_avg_epu16(p1, u1)); dst += dst_stride; h--; } while (h > 0); } else if (w > 8) { // w = 16 __m256i p0, p1, u0, u1; do { p0 = _mm256_loadu_si256((const __m256i *)src); p1 = _mm256_loadu_si256((const __m256i *)(src + src_stride)); src += src_stride << 1; u0 = _mm256_loadu_si256((const __m256i *)dst); u1 = _mm256_loadu_si256((const __m256i *)(dst + dst_stride)); _mm256_storeu_si256((__m256i *)dst, _mm256_avg_epu16(p0, u0)); _mm256_storeu_si256((__m256i *)(dst + dst_stride), _mm256_avg_epu16(p1, u1)); dst += dst_stride << 1; h -= 2; } while (h > 0); } else if (w > 4) { // w = 8 __m128i p0, p1, u0, u1; do { p0 = _mm_loadu_si128((const __m128i *)src); p1 = _mm_loadu_si128((const __m128i *)(src + src_stride)); src += src_stride << 1; u0 = _mm_loadu_si128((const __m128i *)dst); u1 = _mm_loadu_si128((const __m128i *)(dst + dst_stride)); _mm_storeu_si128((__m128i *)dst, _mm_avg_epu16(p0, u0)); _mm_storeu_si128((__m128i *)(dst + dst_stride), _mm_avg_epu16(p1, u1)); dst += dst_stride << 1; h -= 2; } while (h > 0); } else { // w = 4 __m128i p0, p1, u0, u1; do { p0 = _mm_loadl_epi64((const __m128i *)src); p1 = _mm_loadl_epi64((const __m128i *)(src + src_stride)); src += src_stride << 1; u0 = _mm_loadl_epi64((const __m128i *)dst); u1 = _mm_loadl_epi64((const __m128i *)(dst + dst_stride)); _mm_storel_epi64((__m128i *)dst, _mm_avg_epu16(u0, p0)); _mm_storel_epi64((__m128i *)(dst + dst_stride), _mm_avg_epu16(u1, p1)); dst += dst_stride << 1; h -= 2; } while (h > 0); } } // ----------------------------------------------------------------------------- // Horizontal and vertical filtering static const uint8_t signal_pattern_0[32] = { 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9 }; static const uint8_t signal_pattern_1[32] = { 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13 }; static const uint8_t signal_pattern_2[32] = { 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 12, 13, 14, 15 }; static const uint32_t signal_index[8] = { 2, 3, 4, 5, 2, 3, 4, 5 }; #define CONV8_ROUNDING_BITS (7) #define CONV8_ROUNDING_NUM (1 << (CONV8_ROUNDING_BITS - 1)) // ----------------------------------------------------------------------------- // Horizontal Filtering static INLINE void pack_pixels(const __m256i *s, __m256i *p /*p[4]*/) { const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index); const __m256i sf0 = _mm256_loadu_si256((const __m256i *)signal_pattern_0); const __m256i sf1 = _mm256_loadu_si256((const __m256i *)signal_pattern_1); const __m256i c = _mm256_permutevar8x32_epi32(*s, idx); p[0] = _mm256_shuffle_epi8(*s, sf0); // x0x6 p[1] = _mm256_shuffle_epi8(*s, sf1); // x1x7 p[2] = _mm256_shuffle_epi8(c, sf0); // x2x4 p[3] = _mm256_shuffle_epi8(c, sf1); // x3x5 } // Note: // Shared by 8x2 and 16x1 block static INLINE void pack_16_pixels(const __m256i *s0, const __m256i *s1, __m256i *x /*x[8]*/) { __m256i pp[8]; pack_pixels(s0, pp); pack_pixels(s1, &pp[4]); x[0] = _mm256_permute2x128_si256(pp[0], pp[4], 0x20); x[1] = _mm256_permute2x128_si256(pp[1], pp[5], 0x20); x[2] = _mm256_permute2x128_si256(pp[2], pp[6], 0x20); x[3] = _mm256_permute2x128_si256(pp[3], pp[7], 0x20); x[4] = x[2]; x[5] = x[3]; x[6] = _mm256_permute2x128_si256(pp[0], pp[4], 0x31); x[7] = _mm256_permute2x128_si256(pp[1], pp[5], 0x31); } static INLINE void pack_8x1_pixels(const uint16_t *src, __m256i *x) { __m256i pp[8]; __m256i s0; s0 = _mm256_loadu_si256((const __m256i *)src); pack_pixels(&s0, pp); x[0] = _mm256_permute2x128_si256(pp[0], pp[2], 0x30); x[1] = _mm256_permute2x128_si256(pp[1], pp[3], 0x30); x[2] = _mm256_permute2x128_si256(pp[2], pp[0], 0x30); x[3] = _mm256_permute2x128_si256(pp[3], pp[1], 0x30); } static INLINE void pack_8x2_pixels(const uint16_t *src, ptrdiff_t stride, __m256i *x) { __m256i s0, s1; s0 = _mm256_loadu_si256((const __m256i *)src); s1 = _mm256_loadu_si256((const __m256i *)(src + stride)); pack_16_pixels(&s0, &s1, x); } static INLINE void pack_16x1_pixels(const uint16_t *src, __m256i *x) { __m256i s0, s1; s0 = _mm256_loadu_si256((const __m256i *)src); s1 = _mm256_loadu_si256((const __m256i *)(src + 8)); pack_16_pixels(&s0, &s1, x); } // Note: // Shared by horizontal and vertical filtering static INLINE void pack_filters(const int16_t *filter, __m256i *f /*f[4]*/) { const __m128i h = _mm_loadu_si128((const __m128i *)filter); const __m256i hh = _mm256_insertf128_si256(_mm256_castsi128_si256(h), h, 1); const __m256i p0 = _mm256_set1_epi32(0x03020100); const __m256i p1 = _mm256_set1_epi32(0x07060504); const __m256i p2 = _mm256_set1_epi32(0x0b0a0908); const __m256i p3 = _mm256_set1_epi32(0x0f0e0d0c); f[0] = _mm256_shuffle_epi8(hh, p0); f[1] = _mm256_shuffle_epi8(hh, p1); f[2] = _mm256_shuffle_epi8(hh, p2); f[3] = _mm256_shuffle_epi8(hh, p3); } static INLINE void filter_8x1_pixels(const __m256i *sig /*sig[4]*/, const __m256i *fil /*fil[4]*/, __m256i *y) { __m256i a, a0, a1; a0 = _mm256_madd_epi16(fil[0], sig[0]); a1 = _mm256_madd_epi16(fil[3], sig[3]); a = _mm256_add_epi32(a0, a1); a0 = _mm256_madd_epi16(fil[1], sig[1]); a1 = _mm256_madd_epi16(fil[2], sig[2]); { const __m256i min = _mm256_min_epi32(a0, a1); a = _mm256_add_epi32(a, min); } { const __m256i max = _mm256_max_epi32(a0, a1); a = _mm256_add_epi32(a, max); } { const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1)); a = _mm256_add_epi32(a, rounding); *y = _mm256_srai_epi32(a, CONV8_ROUNDING_BITS); } } static INLINE void store_8x1_pixels(const __m256i *y, const __m256i *mask, uint16_t *dst) { const __m128i a0 = _mm256_castsi256_si128(*y); const __m128i a1 = _mm256_extractf128_si256(*y, 1); __m128i res = _mm_packus_epi32(a0, a1); res = _mm_min_epi16(res, _mm256_castsi256_si128(*mask)); _mm_storeu_si128((__m128i *)dst, res); } static INLINE void store_8x2_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst, ptrdiff_t pitch) { __m256i a = _mm256_packus_epi32(*y0, *y1); a = _mm256_min_epi16(a, *mask); _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(a)); _mm_storeu_si128((__m128i *)(dst + pitch), _mm256_extractf128_si256(a, 1)); } static INLINE void store_16x1_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst) { __m256i a = _mm256_packus_epi32(*y0, *y1); a = _mm256_min_epi16(a, *mask); _mm256_storeu_si256((__m256i *)dst, a); } static void vpx_highbd_filter_block1d8_h8_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[8], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); src_ptr -= 3; do { pack_8x2_pixels(src_ptr, src_pitch, signal); filter_8x1_pixels(signal, ff, &res0); filter_8x1_pixels(&signal[4], ff, &res1); store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); height -= 2; src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; } while (height > 1); if (height > 0) { pack_8x1_pixels(src_ptr, signal); filter_8x1_pixels(signal, ff, &res0); store_8x1_pixels(&res0, &max, dst_ptr); } } static void vpx_highbd_filter_block1d16_h8_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[8], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); src_ptr -= 3; do { pack_16x1_pixels(src_ptr, signal); filter_8x1_pixels(signal, ff, &res0); filter_8x1_pixels(&signal[4], ff, &res1); store_16x1_pixels(&res0, &res1, &max, dst_ptr); height -= 1; src_ptr += src_pitch; dst_ptr += dst_pitch; } while (height > 0); } // ----------------------------------------------------------------------------- // 2-tap horizontal filtering static INLINE void pack_2t_filter(const int16_t *filter, __m256i *f) { const __m128i h = _mm_loadu_si128((const __m128i *)filter); const __m256i hh = _mm256_insertf128_si256(_mm256_castsi128_si256(h), h, 1); const __m256i p = _mm256_set1_epi32(0x09080706); f[0] = _mm256_shuffle_epi8(hh, p); } // can be used by pack_8x2_2t_pixels() and pack_16x1_2t_pixels() // the difference is s0/s1 specifies first and second rows or, // first 16 samples and 8-sample shifted 16 samples static INLINE void pack_16_2t_pixels(const __m256i *s0, const __m256i *s1, __m256i *sig) { const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index); const __m256i sf2 = _mm256_loadu_si256((const __m256i *)signal_pattern_2); __m256i x0 = _mm256_shuffle_epi8(*s0, sf2); __m256i x1 = _mm256_shuffle_epi8(*s1, sf2); __m256i r0 = _mm256_permutevar8x32_epi32(*s0, idx); __m256i r1 = _mm256_permutevar8x32_epi32(*s1, idx); r0 = _mm256_shuffle_epi8(r0, sf2); r1 = _mm256_shuffle_epi8(r1, sf2); sig[0] = _mm256_permute2x128_si256(x0, x1, 0x20); sig[1] = _mm256_permute2x128_si256(r0, r1, 0x20); } static INLINE void pack_8x2_2t_pixels(const uint16_t *src, const ptrdiff_t pitch, __m256i *sig) { const __m256i r0 = _mm256_loadu_si256((const __m256i *)src); const __m256i r1 = _mm256_loadu_si256((const __m256i *)(src + pitch)); pack_16_2t_pixels(&r0, &r1, sig); } static INLINE void pack_16x1_2t_pixels(const uint16_t *src, __m256i *sig /*sig[2]*/) { const __m256i r0 = _mm256_loadu_si256((const __m256i *)src); const __m256i r1 = _mm256_loadu_si256((const __m256i *)(src + 8)); pack_16_2t_pixels(&r0, &r1, sig); } static INLINE void pack_8x1_2t_pixels(const uint16_t *src, __m256i *sig /*sig[2]*/) { const __m256i idx = _mm256_loadu_si256((const __m256i *)signal_index); const __m256i sf2 = _mm256_loadu_si256((const __m256i *)signal_pattern_2); __m256i r0 = _mm256_loadu_si256((const __m256i *)src); __m256i x0 = _mm256_shuffle_epi8(r0, sf2); r0 = _mm256_permutevar8x32_epi32(r0, idx); r0 = _mm256_shuffle_epi8(r0, sf2); sig[0] = _mm256_permute2x128_si256(x0, r0, 0x20); } // can be used by filter_8x2_2t_pixels() and filter_16x1_2t_pixels() static INLINE void filter_16_2t_pixels(const __m256i *sig, const __m256i *f, __m256i *y0, __m256i *y1) { const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1)); __m256i x0 = _mm256_madd_epi16(sig[0], *f); __m256i x1 = _mm256_madd_epi16(sig[1], *f); x0 = _mm256_add_epi32(x0, rounding); x1 = _mm256_add_epi32(x1, rounding); *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); *y1 = _mm256_srai_epi32(x1, CONV8_ROUNDING_BITS); } static INLINE void filter_8x1_2t_pixels(const __m256i *sig, const __m256i *f, __m256i *y0) { const __m256i rounding = _mm256_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1)); __m256i x0 = _mm256_madd_epi16(sig[0], *f); x0 = _mm256_add_epi32(x0, rounding); *y0 = _mm256_srai_epi32(x0, CONV8_ROUNDING_BITS); } static void vpx_highbd_filter_block1d8_h2_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[2], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); src_ptr -= 3; do { pack_8x2_2t_pixels(src_ptr, src_pitch, signal); filter_16_2t_pixels(signal, &ff, &res0, &res1); store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); height -= 2; src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; } while (height > 1); if (height > 0) { pack_8x1_2t_pixels(src_ptr, signal); filter_8x1_2t_pixels(signal, &ff, &res0); store_8x1_pixels(&res0, &max, dst_ptr); } } static void vpx_highbd_filter_block1d16_h2_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[2], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); src_ptr -= 3; do { pack_16x1_2t_pixels(src_ptr, signal); filter_16_2t_pixels(signal, &ff, &res0, &res1); store_16x1_pixels(&res0, &res1, &max, dst_ptr); height -= 1; src_ptr += src_pitch; dst_ptr += dst_pitch; } while (height > 0); } // ----------------------------------------------------------------------------- // Vertical Filtering static void pack_8x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { __m256i s0 = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)src)); __m256i s1 = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src + pitch))); __m256i s2 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 2 * pitch))); __m256i s3 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 3 * pitch))); __m256i s4 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 4 * pitch))); __m256i s5 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 5 * pitch))); __m256i s6 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 6 * pitch))); s0 = _mm256_inserti128_si256(s0, _mm256_castsi256_si128(s1), 1); s1 = _mm256_inserti128_si256(s1, _mm256_castsi256_si128(s2), 1); s2 = _mm256_inserti128_si256(s2, _mm256_castsi256_si128(s3), 1); s3 = _mm256_inserti128_si256(s3, _mm256_castsi256_si128(s4), 1); s4 = _mm256_inserti128_si256(s4, _mm256_castsi256_si128(s5), 1); s5 = _mm256_inserti128_si256(s5, _mm256_castsi256_si128(s6), 1); sig[0] = _mm256_unpacklo_epi16(s0, s1); sig[4] = _mm256_unpackhi_epi16(s0, s1); sig[1] = _mm256_unpacklo_epi16(s2, s3); sig[5] = _mm256_unpackhi_epi16(s2, s3); sig[2] = _mm256_unpacklo_epi16(s4, s5); sig[6] = _mm256_unpackhi_epi16(s4, s5); sig[8] = s6; } static INLINE void pack_8x9_pixels(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { // base + 7th row __m256i s0 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 7 * pitch))); // base + 8th row __m256i s1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src + 8 * pitch))); __m256i s2 = _mm256_inserti128_si256(sig[8], _mm256_castsi256_si128(s0), 1); __m256i s3 = _mm256_inserti128_si256(s0, _mm256_castsi256_si128(s1), 1); sig[3] = _mm256_unpacklo_epi16(s2, s3); sig[7] = _mm256_unpackhi_epi16(s2, s3); sig[8] = s1; } static INLINE void filter_8x9_pixels(const __m256i *sig, const __m256i *f, __m256i *y0, __m256i *y1) { filter_8x1_pixels(sig, f, y0); filter_8x1_pixels(&sig[4], f, y1); } static INLINE void update_pixels(__m256i *sig) { int i; for (i = 0; i < 3; ++i) { sig[i] = sig[i + 1]; sig[i + 4] = sig[i + 5]; } } static void vpx_highbd_filter_block1d8_v8_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[9], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); pack_8x9_init(src_ptr, src_pitch, signal); do { pack_8x9_pixels(src_ptr, src_pitch, signal); filter_8x9_pixels(signal, ff, &res0, &res1); store_8x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); update_pixels(signal); src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; height -= 2; } while (height > 0); } static void pack_16x9_init(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { __m256i u0, u1, u2, u3; // load 0-6 rows const __m256i s0 = _mm256_loadu_si256((const __m256i *)src); const __m256i s1 = _mm256_loadu_si256((const __m256i *)(src + pitch)); const __m256i s2 = _mm256_loadu_si256((const __m256i *)(src + 2 * pitch)); const __m256i s3 = _mm256_loadu_si256((const __m256i *)(src + 3 * pitch)); const __m256i s4 = _mm256_loadu_si256((const __m256i *)(src + 4 * pitch)); const __m256i s5 = _mm256_loadu_si256((const __m256i *)(src + 5 * pitch)); const __m256i s6 = _mm256_loadu_si256((const __m256i *)(src + 6 * pitch)); u0 = _mm256_permute2x128_si256(s0, s1, 0x20); // 0, 1 low u1 = _mm256_permute2x128_si256(s0, s1, 0x31); // 0, 1 high u2 = _mm256_permute2x128_si256(s1, s2, 0x20); // 1, 2 low u3 = _mm256_permute2x128_si256(s1, s2, 0x31); // 1, 2 high sig[0] = _mm256_unpacklo_epi16(u0, u2); sig[4] = _mm256_unpackhi_epi16(u0, u2); sig[8] = _mm256_unpacklo_epi16(u1, u3); sig[12] = _mm256_unpackhi_epi16(u1, u3); u0 = _mm256_permute2x128_si256(s2, s3, 0x20); u1 = _mm256_permute2x128_si256(s2, s3, 0x31); u2 = _mm256_permute2x128_si256(s3, s4, 0x20); u3 = _mm256_permute2x128_si256(s3, s4, 0x31); sig[1] = _mm256_unpacklo_epi16(u0, u2); sig[5] = _mm256_unpackhi_epi16(u0, u2); sig[9] = _mm256_unpacklo_epi16(u1, u3); sig[13] = _mm256_unpackhi_epi16(u1, u3); u0 = _mm256_permute2x128_si256(s4, s5, 0x20); u1 = _mm256_permute2x128_si256(s4, s5, 0x31); u2 = _mm256_permute2x128_si256(s5, s6, 0x20); u3 = _mm256_permute2x128_si256(s5, s6, 0x31); sig[2] = _mm256_unpacklo_epi16(u0, u2); sig[6] = _mm256_unpackhi_epi16(u0, u2); sig[10] = _mm256_unpacklo_epi16(u1, u3); sig[14] = _mm256_unpackhi_epi16(u1, u3); sig[16] = s6; } static void pack_16x9_pixels(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { // base + 7th row const __m256i s7 = _mm256_loadu_si256((const __m256i *)(src + 7 * pitch)); // base + 8th row const __m256i s8 = _mm256_loadu_si256((const __m256i *)(src + 8 * pitch)); __m256i u0, u1, u2, u3; u0 = _mm256_permute2x128_si256(sig[16], s7, 0x20); u1 = _mm256_permute2x128_si256(sig[16], s7, 0x31); u2 = _mm256_permute2x128_si256(s7, s8, 0x20); u3 = _mm256_permute2x128_si256(s7, s8, 0x31); sig[3] = _mm256_unpacklo_epi16(u0, u2); sig[7] = _mm256_unpackhi_epi16(u0, u2); sig[11] = _mm256_unpacklo_epi16(u1, u3); sig[15] = _mm256_unpackhi_epi16(u1, u3); sig[16] = s8; } static INLINE void filter_16x9_pixels(const __m256i *sig, const __m256i *f, __m256i *y0, __m256i *y1) { __m256i res[4]; int i; for (i = 0; i < 4; ++i) { filter_8x1_pixels(&sig[i << 2], f, &res[i]); } { const __m256i l0l1 = _mm256_packus_epi32(res[0], res[1]); const __m256i h0h1 = _mm256_packus_epi32(res[2], res[3]); *y0 = _mm256_permute2x128_si256(l0l1, h0h1, 0x20); *y1 = _mm256_permute2x128_si256(l0l1, h0h1, 0x31); } } static INLINE void store_16x2_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst, ptrdiff_t pitch) { __m256i p = _mm256_min_epi16(*y0, *mask); _mm256_storeu_si256((__m256i *)dst, p); p = _mm256_min_epi16(*y1, *mask); _mm256_storeu_si256((__m256i *)(dst + pitch), p); } static void update_16x9_pixels(__m256i *sig) { update_pixels(&sig[0]); update_pixels(&sig[8]); } static void vpx_highbd_filter_block1d16_v8_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[17], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); pack_16x9_init(src_ptr, src_pitch, signal); do { pack_16x9_pixels(src_ptr, src_pitch, signal); filter_16x9_pixels(signal, ff, &res0, &res1); store_16x2_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); update_16x9_pixels(signal); src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; height -= 2; } while (height > 0); } // ----------------------------------------------------------------------------- // 2-tap vertical filtering static void pack_16x2_init(const uint16_t *src, __m256i *sig) { sig[2] = _mm256_loadu_si256((const __m256i *)src); } static INLINE void pack_16x2_2t_pixels(const uint16_t *src, ptrdiff_t pitch, __m256i *sig) { // load the next row const __m256i u = _mm256_loadu_si256((const __m256i *)(src + pitch)); sig[0] = _mm256_unpacklo_epi16(sig[2], u); sig[1] = _mm256_unpackhi_epi16(sig[2], u); sig[2] = u; } static INLINE void filter_16x2_2t_pixels(const __m256i *sig, const __m256i *f, __m256i *y0, __m256i *y1) { filter_16_2t_pixels(sig, f, y0, y1); } static void vpx_highbd_filter_block1d16_v2_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[3], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); pack_16x2_init(src_ptr, signal); do { pack_16x2_2t_pixels(src_ptr, src_pitch, signal); filter_16x2_2t_pixels(signal, &ff, &res0, &res1); store_16x1_pixels(&res0, &res1, &max, dst_ptr); src_ptr += src_pitch; dst_ptr += dst_pitch; height -= 1; } while (height > 0); } static INLINE void pack_8x1_2t_filter(const int16_t *filter, __m128i *f) { const __m128i h = _mm_loadu_si128((const __m128i *)filter); const __m128i p = _mm_set1_epi32(0x09080706); f[0] = _mm_shuffle_epi8(h, p); } static void pack_8x2_init(const uint16_t *src, __m128i *sig) { sig[2] = _mm_loadu_si128((const __m128i *)src); } static INLINE void pack_8x2_2t_pixels_ver(const uint16_t *src, ptrdiff_t pitch, __m128i *sig) { // load the next row const __m128i u = _mm_loadu_si128((const __m128i *)(src + pitch)); sig[0] = _mm_unpacklo_epi16(sig[2], u); sig[1] = _mm_unpackhi_epi16(sig[2], u); sig[2] = u; } static INLINE void filter_8_2t_pixels(const __m128i *sig, const __m128i *f, __m128i *y0, __m128i *y1) { const __m128i rounding = _mm_set1_epi32(1 << (CONV8_ROUNDING_BITS - 1)); __m128i x0 = _mm_madd_epi16(sig[0], *f); __m128i x1 = _mm_madd_epi16(sig[1], *f); x0 = _mm_add_epi32(x0, rounding); x1 = _mm_add_epi32(x1, rounding); *y0 = _mm_srai_epi32(x0, CONV8_ROUNDING_BITS); *y1 = _mm_srai_epi32(x1, CONV8_ROUNDING_BITS); } static INLINE void store_8x1_2t_pixels_ver(const __m128i *y0, const __m128i *y1, const __m128i *mask, uint16_t *dst) { __m128i res = _mm_packus_epi32(*y0, *y1); res = _mm_min_epi16(res, *mask); _mm_storeu_si128((__m128i *)dst, res); } static void vpx_highbd_filter_block1d8_v2_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m128i signal[3], res0, res1; const __m128i max = _mm_set1_epi16((1 << bd) - 1); __m128i ff; pack_8x1_2t_filter(filter, &ff); pack_8x2_init(src_ptr, signal); do { pack_8x2_2t_pixels_ver(src_ptr, src_pitch, signal); filter_8_2t_pixels(signal, &ff, &res0, &res1); store_8x1_2t_pixels_ver(&res0, &res1, &max, dst_ptr); src_ptr += src_pitch; dst_ptr += dst_pitch; height -= 1; } while (height > 0); } // Calculation with averaging the input pixels static INLINE void store_8x1_avg_pixels(const __m256i *y0, const __m256i *mask, uint16_t *dst) { const __m128i a0 = _mm256_castsi256_si128(*y0); const __m128i a1 = _mm256_extractf128_si256(*y0, 1); __m128i res = _mm_packus_epi32(a0, a1); const __m128i pix = _mm_loadu_si128((const __m128i *)dst); res = _mm_min_epi16(res, _mm256_castsi256_si128(*mask)); res = _mm_avg_epu16(res, pix); _mm_storeu_si128((__m128i *)dst, res); } static INLINE void store_8x2_avg_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst, ptrdiff_t pitch) { __m256i a = _mm256_packus_epi32(*y0, *y1); const __m128i pix0 = _mm_loadu_si128((const __m128i *)dst); const __m128i pix1 = _mm_loadu_si128((const __m128i *)(dst + pitch)); const __m256i pix = _mm256_insertf128_si256(_mm256_castsi128_si256(pix0), pix1, 1); a = _mm256_min_epi16(a, *mask); a = _mm256_avg_epu16(a, pix); _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(a)); _mm_storeu_si128((__m128i *)(dst + pitch), _mm256_extractf128_si256(a, 1)); } static INLINE void store_16x1_avg_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst) { __m256i a = _mm256_packus_epi32(*y0, *y1); const __m256i pix = _mm256_loadu_si256((const __m256i *)dst); a = _mm256_min_epi16(a, *mask); a = _mm256_avg_epu16(a, pix); _mm256_storeu_si256((__m256i *)dst, a); } static INLINE void store_16x2_avg_pixels(const __m256i *y0, const __m256i *y1, const __m256i *mask, uint16_t *dst, ptrdiff_t pitch) { const __m256i pix0 = _mm256_loadu_si256((const __m256i *)dst); const __m256i pix1 = _mm256_loadu_si256((const __m256i *)(dst + pitch)); __m256i p = _mm256_min_epi16(*y0, *mask); p = _mm256_avg_epu16(p, pix0); _mm256_storeu_si256((__m256i *)dst, p); p = _mm256_min_epi16(*y1, *mask); p = _mm256_avg_epu16(p, pix1); _mm256_storeu_si256((__m256i *)(dst + pitch), p); } static INLINE void store_8x1_2t_avg_pixels_ver(const __m128i *y0, const __m128i *y1, const __m128i *mask, uint16_t *dst) { __m128i res = _mm_packus_epi32(*y0, *y1); const __m128i pix = _mm_loadu_si128((const __m128i *)dst); res = _mm_min_epi16(res, *mask); res = _mm_avg_epu16(res, pix); _mm_storeu_si128((__m128i *)dst, res); } static void vpx_highbd_filter_block1d8_h8_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[8], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); src_ptr -= 3; do { pack_8x2_pixels(src_ptr, src_pitch, signal); filter_8x1_pixels(signal, ff, &res0); filter_8x1_pixels(&signal[4], ff, &res1); store_8x2_avg_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); height -= 2; src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; } while (height > 1); if (height > 0) { pack_8x1_pixels(src_ptr, signal); filter_8x1_pixels(signal, ff, &res0); store_8x1_avg_pixels(&res0, &max, dst_ptr); } } static void vpx_highbd_filter_block1d16_h8_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[8], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); src_ptr -= 3; do { pack_16x1_pixels(src_ptr, signal); filter_8x1_pixels(signal, ff, &res0); filter_8x1_pixels(&signal[4], ff, &res1); store_16x1_avg_pixels(&res0, &res1, &max, dst_ptr); height -= 1; src_ptr += src_pitch; dst_ptr += dst_pitch; } while (height > 0); } static void vpx_highbd_filter_block1d4_h4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We extract the middle four elements of the kernel into two registers in // the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum. Calling add on the two // halves gives us the output. Since avx2 allows us to use 256-bit buffer, we // can do this two rows at a time. __m256i src_reg, src_reg_shift_0, src_reg_shift_2; __m256i res_reg; __m256i idx_shift_0 = _mm256_setr_epi8(0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9); __m256i idx_shift_2 = _mm256_setr_epi8(4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13); __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m256i reg_round = _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); const ptrdiff_t unrolled_src_stride = src_stride << 1; const ptrdiff_t unrolled_dst_stride = dst_stride << 1; int h; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); for (h = height; h >= 2; h -= 2) { // Load the source src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Get the output res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Round the result res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); // Finally combine to get the final dst res_reg = _mm256_packus_epi32(res_reg, res_reg); res_reg = _mm256_min_epi16(res_reg, reg_max); mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); src_ptr += unrolled_src_stride; dst_ptr += unrolled_dst_stride; } // Repeat for the last row if needed if (h > 0) { // Load the source src_reg = mm256_loadu2_si128(src_ptr, src_ptr + 4); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Get the output res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Round the result res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); // Finally combine to get the final dst res_reg = _mm256_packus_epi32(res_reg, res_reg); res_reg = _mm256_min_epi16(res_reg, reg_max); _mm_storel_epi64((__m128i *)dst_ptr, _mm256_castsi256_si128(res_reg)); } } static void vpx_highbd_filter_block1d8_h4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will extract the middle four elements of the kernel into two registers // in the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum of the first half. // Calling add gives us first half of the output. Repat again to get the whole // output. Since avx2 allows us to use 256-bit buffer, we can do this two rows // at a time. __m256i src_reg, src_reg_shift_0, src_reg_shift_2; __m256i res_reg, res_first, res_last; __m256i idx_shift_0 = _mm256_setr_epi8(0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9, 0, 1, 2, 3, 2, 3, 4, 5, 4, 5, 6, 7, 6, 7, 8, 9); __m256i idx_shift_2 = _mm256_setr_epi8(4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13, 4, 5, 6, 7, 6, 7, 8, 9, 8, 9, 10, 11, 10, 11, 12, 13); __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m256i reg_round = _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); const ptrdiff_t unrolled_src_stride = src_stride << 1; const ptrdiff_t unrolled_dst_stride = dst_stride << 1; int h; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); for (h = height; h >= 2; h -= 2) { // Load the source src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Result for first half res_first = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Do again to get the second half of dst // Load the source src_reg = mm256_loadu2_si128(src_ptr + 4, src_ptr + src_stride + 4); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Result for second half res_last = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Round each result res_first = mm256_round_epi32(&res_first, ®_round, CONV8_ROUNDING_BITS); res_last = mm256_round_epi32(&res_last, ®_round, CONV8_ROUNDING_BITS); // Finally combine to get the final dst res_reg = _mm256_packus_epi32(res_first, res_last); res_reg = _mm256_min_epi16(res_reg, reg_max); mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); src_ptr += unrolled_src_stride; dst_ptr += unrolled_dst_stride; } // Repeat for the last row if needed if (h > 0) { src_reg = mm256_loadu2_si128(src_ptr, src_ptr + 4); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); res_reg = mm256_madd_add_epi32(&src_reg_shift_0, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); res_reg = _mm256_packus_epi32(res_reg, res_reg); res_reg = _mm256_min_epi16(res_reg, reg_max); mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + 4), &res_reg); } } static void vpx_highbd_filter_block1d16_h4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { vpx_highbd_filter_block1d8_h4_avx2(src_ptr, src_stride, dst_ptr, dst_stride, height, kernel, bd); vpx_highbd_filter_block1d8_h4_avx2(src_ptr + 8, src_stride, dst_ptr + 8, dst_stride, height, kernel, bd); } static void vpx_highbd_filter_block1d8_v8_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[9], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); pack_8x9_init(src_ptr, src_pitch, signal); do { pack_8x9_pixels(src_ptr, src_pitch, signal); filter_8x9_pixels(signal, ff, &res0, &res1); store_8x2_avg_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); update_pixels(signal); src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; height -= 2; } while (height > 0); } static void vpx_highbd_filter_block1d16_v8_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[17], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff[4]; pack_filters(filter, ff); pack_16x9_init(src_ptr, src_pitch, signal); do { pack_16x9_pixels(src_ptr, src_pitch, signal); filter_16x9_pixels(signal, ff, &res0, &res1); store_16x2_avg_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); update_16x9_pixels(signal); src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; height -= 2; } while (height > 0); } static void vpx_highbd_filter_block1d8_h2_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[2], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); src_ptr -= 3; do { pack_8x2_2t_pixels(src_ptr, src_pitch, signal); filter_16_2t_pixels(signal, &ff, &res0, &res1); store_8x2_avg_pixels(&res0, &res1, &max, dst_ptr, dst_pitch); height -= 2; src_ptr += src_pitch << 1; dst_ptr += dst_pitch << 1; } while (height > 1); if (height > 0) { pack_8x1_2t_pixels(src_ptr, signal); filter_8x1_2t_pixels(signal, &ff, &res0); store_8x1_avg_pixels(&res0, &max, dst_ptr); } } static void vpx_highbd_filter_block1d16_h2_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[2], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); src_ptr -= 3; do { pack_16x1_2t_pixels(src_ptr, signal); filter_16_2t_pixels(signal, &ff, &res0, &res1); store_16x1_avg_pixels(&res0, &res1, &max, dst_ptr); height -= 1; src_ptr += src_pitch; dst_ptr += dst_pitch; } while (height > 0); } static void vpx_highbd_filter_block1d16_v2_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m256i signal[3], res0, res1; const __m256i max = _mm256_set1_epi16((1 << bd) - 1); __m256i ff; pack_2t_filter(filter, &ff); pack_16x2_init(src_ptr, signal); do { pack_16x2_2t_pixels(src_ptr, src_pitch, signal); filter_16x2_2t_pixels(signal, &ff, &res0, &res1); store_16x1_avg_pixels(&res0, &res1, &max, dst_ptr); src_ptr += src_pitch; dst_ptr += dst_pitch; height -= 1; } while (height > 0); } static void vpx_highbd_filter_block1d8_v2_avg_avx2( const uint16_t *src_ptr, ptrdiff_t src_pitch, uint16_t *dst_ptr, ptrdiff_t dst_pitch, uint32_t height, const int16_t *filter, int bd) { __m128i signal[3], res0, res1; const __m128i max = _mm_set1_epi16((1 << bd) - 1); __m128i ff; pack_8x1_2t_filter(filter, &ff); pack_8x2_init(src_ptr, signal); do { pack_8x2_2t_pixels_ver(src_ptr, src_pitch, signal); filter_8_2t_pixels(signal, &ff, &res0, &res1); store_8x1_2t_avg_pixels_ver(&res0, &res1, &max, dst_ptr); src_ptr += src_pitch; dst_ptr += dst_pitch; height -= 1; } while (height > 0); } static void vpx_highbd_filter_block1d4_v4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load two rows of pixels and rearrange them into the form // ... s[1,0] s[0,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel partial output. Then // we can call add with another row to get the output. // Register for source s[-1:3, :] __m256i src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; __m256i src_reg_m1001, src_reg_1223; // Result after multiply and add __m256i res_reg; __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of kernel used const __m256i reg_round = _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); // Row -1 to row 0 src_reg_m10 = mm256_loadu2_epi64((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); // Row 0 to row 1 src_reg_1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); // First three rows src_reg_m1001 = _mm256_unpacklo_epi16(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); src_reg_12 = _mm256_inserti128_si256(src_reg_1, _mm256_castsi256_si128(src_reg_2), 1); src_reg_3 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); src_reg_23 = _mm256_inserti128_si256(src_reg_2, _mm256_castsi256_si128(src_reg_3), 1); // Last three rows src_reg_1223 = _mm256_unpacklo_epi16(src_reg_12, src_reg_23); // Output res_reg = mm256_madd_add_epi32(&src_reg_m1001, &src_reg_1223, &kernel_reg_23, &kernel_reg_45); // Round the words res_reg = mm256_round_epi32(&res_reg, ®_round, CONV8_ROUNDING_BITS); // Combine to get the result res_reg = _mm256_packus_epi32(res_reg, res_reg); res_reg = _mm256_min_epi16(res_reg, reg_max); // Save the result mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001 = src_reg_1223; src_reg_1 = src_reg_3; } } static void vpx_highbd_filter_block1d8_v4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load two rows of pixels and rearrange them into the form // ... s[1,0] s[0,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel partial output. Then // we can call add with another row to get the output. // Register for source s[-1:3, :] __m256i src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; __m256i src_reg_m1001_lo, src_reg_m1001_hi, src_reg_1223_lo, src_reg_1223_hi; __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of kernel // Result after multiply and add __m256i res_reg, res_reg_lo, res_reg_hi; const __m256i reg_round = _mm256_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m256i reg_max = _mm256_set1_epi16((1 << bd) - 1); const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi32(kernel_reg, 0x55); kernel_reg_45 = _mm256_shuffle_epi32(kernel_reg, 0xaa); // Row -1 to row 0 src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); // Row 0 to row 1 src_reg_1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); // First three rows src_reg_m1001_lo = _mm256_unpacklo_epi16(src_reg_m10, src_reg_01); src_reg_m1001_hi = _mm256_unpackhi_epi16(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3))); src_reg_12 = _mm256_inserti128_si256(src_reg_1, _mm256_castsi256_si128(src_reg_2), 1); src_reg_3 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4))); src_reg_23 = _mm256_inserti128_si256(src_reg_2, _mm256_castsi256_si128(src_reg_3), 1); // Last three rows src_reg_1223_lo = _mm256_unpacklo_epi16(src_reg_12, src_reg_23); src_reg_1223_hi = _mm256_unpackhi_epi16(src_reg_12, src_reg_23); // Output from first half res_reg_lo = mm256_madd_add_epi32(&src_reg_m1001_lo, &src_reg_1223_lo, &kernel_reg_23, &kernel_reg_45); // Output from second half res_reg_hi = mm256_madd_add_epi32(&src_reg_m1001_hi, &src_reg_1223_hi, &kernel_reg_23, &kernel_reg_45); // Round the words res_reg_lo = mm256_round_epi32(&res_reg_lo, ®_round, CONV8_ROUNDING_BITS); res_reg_hi = mm256_round_epi32(&res_reg_hi, ®_round, CONV8_ROUNDING_BITS); // Combine to get the result res_reg = _mm256_packus_epi32(res_reg_lo, res_reg_hi); res_reg = _mm256_min_epi16(res_reg, reg_max); // Save the result mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001_lo = src_reg_1223_lo; src_reg_m1001_hi = src_reg_1223_hi; src_reg_1 = src_reg_3; } } static void vpx_highbd_filter_block1d16_v4_avx2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { vpx_highbd_filter_block1d8_v4_avx2(src_ptr, src_stride, dst_ptr, dst_stride, height, kernel, bd); vpx_highbd_filter_block1d8_v4_avx2(src_ptr + 8, src_stride, dst_ptr + 8, dst_stride, height, kernel, bd); } // From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; // From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; #define vpx_highbd_filter_block1d4_h8_avx2 vpx_highbd_filter_block1d4_h8_sse2 #define vpx_highbd_filter_block1d4_h2_avx2 vpx_highbd_filter_block1d4_h2_sse2 #define vpx_highbd_filter_block1d4_v8_avx2 vpx_highbd_filter_block1d4_v8_sse2 #define vpx_highbd_filter_block1d4_v2_avx2 vpx_highbd_filter_block1d4_v2_sse2 // Use the [vh]8 version because there is no [vh]4 implementation. #define vpx_highbd_filter_block1d16_v4_avg_avx2 \ vpx_highbd_filter_block1d16_v8_avg_avx2 #define vpx_highbd_filter_block1d16_h4_avg_avx2 \ vpx_highbd_filter_block1d16_h8_avg_avx2 #define vpx_highbd_filter_block1d8_v4_avg_avx2 \ vpx_highbd_filter_block1d8_v8_avg_avx2 #define vpx_highbd_filter_block1d8_h4_avg_avx2 \ vpx_highbd_filter_block1d8_h8_avg_avx2 #define vpx_highbd_filter_block1d4_v4_avg_avx2 \ vpx_highbd_filter_block1d4_v8_avg_avx2 #define vpx_highbd_filter_block1d4_h4_avg_avx2 \ vpx_highbd_filter_block1d4_h8_avg_avx2 HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2, 0); HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , avx2, 0); HIGH_FUN_CONV_2D(, avx2, 0); // From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; // From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; #define vpx_highbd_filter_block1d4_h8_avg_avx2 \ vpx_highbd_filter_block1d4_h8_avg_sse2 #define vpx_highbd_filter_block1d4_h2_avg_avx2 \ vpx_highbd_filter_block1d4_h2_avg_sse2 #define vpx_highbd_filter_block1d4_v8_avg_avx2 \ vpx_highbd_filter_block1d4_v8_avg_sse2 #define vpx_highbd_filter_block1d4_v2_avg_avx2 \ vpx_highbd_filter_block1d4_v2_avg_sse2 HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2, 1); HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), avg_, avx2, 1); HIGH_FUN_CONV_2D(avg_, avx2, 1); #undef HIGHBD_FUNC libvpx-1.8.2/vpx_dsp/x86/highbd_idct16x16_add_sse2.c000066400000000000000000000272121357355204000220140ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_idct16_4col_stage5(const __m128i *const in, __m128i *const out) { // stage 5 out[0] = _mm_add_epi32(in[0], in[3]); out[1] = _mm_add_epi32(in[1], in[2]); out[2] = _mm_sub_epi32(in[1], in[2]); out[3] = _mm_sub_epi32(in[0], in[3]); highbd_butterfly_cospi16_sse2(in[6], in[5], &out[6], &out[5]); out[8] = _mm_add_epi32(in[8], in[11]); out[9] = _mm_add_epi32(in[9], in[10]); out[10] = _mm_sub_epi32(in[9], in[10]); out[11] = _mm_sub_epi32(in[8], in[11]); out[12] = _mm_sub_epi32(in[15], in[12]); out[13] = _mm_sub_epi32(in[14], in[13]); out[14] = _mm_add_epi32(in[14], in[13]); out[15] = _mm_add_epi32(in[15], in[12]); } static INLINE void highbd_idct16_4col_stage6(const __m128i *const in, __m128i *const out) { out[0] = _mm_add_epi32(in[0], in[7]); out[1] = _mm_add_epi32(in[1], in[6]); out[2] = _mm_add_epi32(in[2], in[5]); out[3] = _mm_add_epi32(in[3], in[4]); out[4] = _mm_sub_epi32(in[3], in[4]); out[5] = _mm_sub_epi32(in[2], in[5]); out[6] = _mm_sub_epi32(in[1], in[6]); out[7] = _mm_sub_epi32(in[0], in[7]); out[8] = in[8]; out[9] = in[9]; highbd_butterfly_cospi16_sse2(in[13], in[10], &out[13], &out[10]); highbd_butterfly_cospi16_sse2(in[12], in[11], &out[12], &out[11]); out[14] = in[14]; out[15] = in[15]; } static INLINE void highbd_idct16_4col(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; // stage 2 highbd_butterfly_sse2(io[1], io[15], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_butterfly_sse2(io[9], io[7], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_butterfly_sse2(io[5], io[11], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_butterfly_sse2(io[13], io[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 highbd_butterfly_sse2(io[2], io[14], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse2(io[10], io[6], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] step1[11] = _mm_add_epi32(step2[10], step2[11]); step1[12] = _mm_add_epi32(step2[13], step2[12]); step1[13] = _mm_sub_epi32(step2[13], step2[12]); // step1[13] = -step1[13] step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); // stage 4 highbd_butterfly_cospi16_sse2(io[0], io[8], &step2[0], &step2[1]); highbd_butterfly_sse2(io[4], io[12], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); highbd_butterfly_sse2(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse2(step1[10], step1[13], cospi_8_64, cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step1[7] = _mm_add_epi32(step1[7], step1[6]); step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } static INLINE void highbd_idct16x16_38_4col(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; __m128i temp1[2], sign[2]; // stage 2 highbd_partial_butterfly_sse2(io[1], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_neg_sse2(io[7], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_partial_butterfly_sse2(io[5], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_partial_butterfly_neg_sse2(io[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 highbd_partial_butterfly_sse2(io[2], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_partial_butterfly_neg_sse2(io[6], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] step1[11] = _mm_add_epi32(step2[10], step2[11]); step1[12] = _mm_add_epi32(step2[13], step2[12]); step1[13] = _mm_sub_epi32(step2[13], step2[12]); // step1[13] = -step1[13] step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); // stage 4 abs_extend_64bit_sse2(io[0], temp1, sign); step2[0] = multiplication_round_shift_sse2(temp1, sign, cospi_16_64); step2[1] = step2[0]; highbd_partial_butterfly_sse2(io[4], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); highbd_butterfly_sse2(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse2(step1[10], step1[13], cospi_8_64, cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step1[7] = _mm_add_epi32(step1[7], step1[6]); step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } static INLINE void highbd_idct16x16_10_4col(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; __m128i temp[2], sign[2]; // stage 2 highbd_partial_butterfly_sse2(io[1], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_neg_sse2(io[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 highbd_partial_butterfly_sse2(io[2], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = _mm_sub_epi32(_mm_setzero_si128(), step2[11]); // step1[10] = -step1[10] step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = _mm_sub_epi32(_mm_setzero_si128(), step2[12]); // step1[13] = -step1[13] step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 abs_extend_64bit_sse2(io[0], temp, sign); step2[0] = multiplication_round_shift_sse2(temp, sign, cospi_16_64); step2[1] = step2[0]; step2[2] = _mm_setzero_si128(); step2[3] = _mm_setzero_si128(); highbd_butterfly_sse2(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse2(step1[10], step1[13], cospi_8_64, cospi_24_64, &step2[13], &step2[10]); step2[5] = step1[4]; step2[6] = step1[7]; step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } void vpx_highbd_idct16x16_256_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16], *in; if (bd == 8) { __m128i l[16], r[16]; in = l; for (i = 0; i < 2; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]); idct16_8col(in, in); in = r; input += 128; } for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(l + i, out); transpose_16bit_8x8(r + i, out + 8); idct16_8col(out, out); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[4][16]; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]); highbd_idct16_4col(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); highbd_idct16_4col(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct16x16_38_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16]; if (bd == 8) { __m128i in[16], temp[16]; highbd_load_pack_transpose_32bit_8x8(input, 16, in); for (i = 8; i < 16; i++) { in[i] = _mm_setzero_si128(); } idct16_8col(in, temp); for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(temp + i, in); idct16_8col(in, out); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[2][16], *in; for (i = 0; i < 2; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(input, 16, in); highbd_idct16x16_38_4col(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); highbd_idct16x16_38_4col(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct16x16_10_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16]; if (bd == 8) { __m128i in[16], l[16]; in[0] = load_pack_8_32bit(input + 0 * 16); in[1] = load_pack_8_32bit(input + 1 * 16); in[2] = load_pack_8_32bit(input + 2 * 16); in[3] = load_pack_8_32bit(input + 3 * 16); idct16x16_10_pass1(in, l); for (i = 0; i < 16; i += 8) { int j; idct16x16_10_pass2(l + i, in); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, in[j], bd); } dest += 8; } } else { __m128i all[2][16], *in; for (i = 0; i < 2; i++) { in = all[i]; highbd_load_transpose_32bit_4x4(input, 16, in); highbd_idct16x16_10_4col(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(&all[0][i], out); highbd_idct16x16_10_4col(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct16x16_1_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { highbd_idct_1_add_kernel(input, dest, stride, bd, 16); } libvpx-1.8.2/vpx_dsp/x86/highbd_idct16x16_add_sse4.c000066400000000000000000000264741357355204000220270ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE4.1 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_idct16_4col_stage5(const __m128i *const in, __m128i *const out) { // stage 5 out[0] = _mm_add_epi32(in[0], in[3]); out[1] = _mm_add_epi32(in[1], in[2]); out[2] = _mm_sub_epi32(in[1], in[2]); out[3] = _mm_sub_epi32(in[0], in[3]); highbd_butterfly_cospi16_sse4_1(in[6], in[5], &out[6], &out[5]); out[8] = _mm_add_epi32(in[8], in[11]); out[9] = _mm_add_epi32(in[9], in[10]); out[10] = _mm_sub_epi32(in[9], in[10]); out[11] = _mm_sub_epi32(in[8], in[11]); out[12] = _mm_sub_epi32(in[15], in[12]); out[13] = _mm_sub_epi32(in[14], in[13]); out[14] = _mm_add_epi32(in[14], in[13]); out[15] = _mm_add_epi32(in[15], in[12]); } static INLINE void highbd_idct16_4col_stage6(const __m128i *const in, __m128i *const out) { out[0] = _mm_add_epi32(in[0], in[7]); out[1] = _mm_add_epi32(in[1], in[6]); out[2] = _mm_add_epi32(in[2], in[5]); out[3] = _mm_add_epi32(in[3], in[4]); out[4] = _mm_sub_epi32(in[3], in[4]); out[5] = _mm_sub_epi32(in[2], in[5]); out[6] = _mm_sub_epi32(in[1], in[6]); out[7] = _mm_sub_epi32(in[0], in[7]); out[8] = in[8]; out[9] = in[9]; highbd_butterfly_cospi16_sse4_1(in[13], in[10], &out[13], &out[10]); highbd_butterfly_cospi16_sse4_1(in[12], in[11], &out[12], &out[11]); out[14] = in[14]; out[15] = in[15]; } void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; // stage 2 highbd_butterfly_sse4_1(io[1], io[15], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_butterfly_sse4_1(io[9], io[7], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(io[5], io[11], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_butterfly_sse4_1(io[13], io[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 highbd_butterfly_sse4_1(io[2], io[14], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse4_1(io[10], io[6], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[11], step2[10]); step1[11] = _mm_add_epi32(step2[11], step2[10]); step1[12] = _mm_add_epi32(step2[12], step2[13]); step1[13] = _mm_sub_epi32(step2[12], step2[13]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); // stage 4 highbd_butterfly_cospi16_sse4_1(io[0], io[8], &step2[0], &step2[1]); highbd_butterfly_sse4_1(io[4], io[12], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); highbd_butterfly_sse4_1(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(step1[10], step1[13], -cospi_8_64, -cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step1[7] = _mm_add_epi32(step1[7], step1[6]); step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } static INLINE void highbd_idct16x16_38_4col(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; __m128i temp1[2]; // stage 2 highbd_partial_butterfly_sse4_1(io[1], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_sse4_1(io[7], -cospi_18_64, cospi_14_64, &step2[9], &step2[14]); highbd_partial_butterfly_sse4_1(io[5], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_partial_butterfly_sse4_1(io[3], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 highbd_partial_butterfly_sse4_1(io[2], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_partial_butterfly_sse4_1(io[6], -cospi_20_64, cospi_12_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[10] = _mm_sub_epi32(step2[11], step2[10]); step1[11] = _mm_add_epi32(step2[11], step2[10]); step1[12] = _mm_add_epi32(step2[12], step2[13]); step1[13] = _mm_sub_epi32(step2[12], step2[13]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); // stage 4 extend_64bit(io[0], temp1); step2[0] = multiplication_round_shift_sse4_1(temp1, cospi_16_64); step2[1] = step2[0]; highbd_partial_butterfly_sse4_1(io[4], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); highbd_butterfly_sse4_1(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(step1[10], step1[13], -cospi_8_64, -cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step1[4] = _mm_add_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step1[7] = _mm_add_epi32(step1[7], step1[6]); step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } static INLINE void highbd_idct16x16_10_4col(__m128i *const io /*io[16]*/) { __m128i step1[16], step2[16]; __m128i temp[2]; // stage 2 highbd_partial_butterfly_sse4_1(io[1], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_sse4_1(io[3], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 highbd_partial_butterfly_sse4_1(io[2], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); step1[8] = step2[8]; step1[9] = step2[8]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[14] = step2[15]; step1[15] = step2[15]; // stage 4 extend_64bit(io[0], temp); step2[0] = multiplication_round_shift_sse4_1(temp, cospi_16_64); step2[1] = step2[0]; step2[2] = _mm_setzero_si128(); step2[3] = _mm_setzero_si128(); highbd_butterfly_sse4_1(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(step1[10], step1[13], -cospi_8_64, -cospi_24_64, &step2[13], &step2[10]); step2[5] = step1[4]; step2[6] = step1[7]; step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; highbd_idct16_4col_stage5(step2, step1); highbd_idct16_4col_stage6(step1, step2); highbd_idct16_4col_stage7(step2, io); } void vpx_highbd_idct16x16_256_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16], *in; if (bd == 8) { __m128i l[16], r[16]; in = l; for (i = 0; i < 2; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 16, &in[8]); idct16_8col(in, in); in = r; input += 128; } for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(l + i, out); transpose_16bit_8x8(r + i, out + 8); idct16_8col(out, out); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[4][16]; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 16, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 16, &in[8]); vpx_highbd_idct16_4col_sse4_1(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); vpx_highbd_idct16_4col_sse4_1(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct16x16_38_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16]; if (bd == 8) { __m128i in[16], temp[16]; highbd_load_pack_transpose_32bit_8x8(&input[0], 16, &in[0]); for (i = 8; i < 16; i++) { in[i] = _mm_setzero_si128(); } idct16_8col(in, temp); for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(temp + i, in); idct16_8col(in, out); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[2][16], *in; for (i = 0; i < 2; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(input, 16, in); highbd_idct16x16_38_4col(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); highbd_idct16x16_38_4col(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct16x16_10_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i; __m128i out[16]; if (bd == 8) { __m128i in[16], l[16]; in[0] = load_pack_8_32bit(input + 0 * 16); in[1] = load_pack_8_32bit(input + 1 * 16); in[2] = load_pack_8_32bit(input + 2 * 16); in[3] = load_pack_8_32bit(input + 3 * 16); idct16x16_10_pass1(in, l); for (i = 0; i < 16; i += 8) { int j; idct16x16_10_pass2(l + i, in); for (j = 0; j < 16; ++j) { highbd_write_buffer_8(dest + j * stride, in[j], bd); } dest += 8; } } else { __m128i all[2][16], *in; for (i = 0; i < 2; i++) { in = all[i]; highbd_load_transpose_32bit_4x4(input, 16, in); highbd_idct16x16_10_4col(in); input += 4 * 16; } for (i = 0; i < 16; i += 4) { int j; transpose_32bit_4x4(&all[0][i], out); highbd_idct16x16_10_4col(out); for (j = 0; j < 16; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } libvpx-1.8.2/vpx_dsp/x86/highbd_idct32x32_add_sse2.c000066400000000000000000000701761357355204000220170ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_idct32_4x32_quarter_2_stage_4_to_6( __m128i *const step1 /*step1[16]*/, __m128i *const out /*out[16]*/) { __m128i step2[32]; // stage 4 step2[8] = step1[8]; step2[15] = step1[15]; highbd_butterfly_sse2(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse2(step1[10], step1[13], cospi_8_64, cospi_24_64, &step2[13], &step2[10]); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 step1[8] = _mm_add_epi32(step2[8], step2[11]); step1[9] = _mm_add_epi32(step2[9], step2[10]); step1[10] = _mm_sub_epi32(step2[9], step2[10]); step1[11] = _mm_sub_epi32(step2[8], step2[11]); step1[12] = _mm_sub_epi32(step2[15], step2[12]); step1[13] = _mm_sub_epi32(step2[14], step2[13]); step1[14] = _mm_add_epi32(step2[14], step2[13]); step1[15] = _mm_add_epi32(step2[15], step2[12]); // stage 6 out[8] = step1[8]; out[9] = step1[9]; highbd_butterfly_sse2(step1[13], step1[10], cospi_16_64, cospi_16_64, &out[10], &out[13]); highbd_butterfly_sse2(step1[12], step1[11], cospi_16_64, cospi_16_64, &out[11], &out[12]); out[14] = step1[14]; out[15] = step1[15]; } static INLINE void highbd_idct32_4x32_quarter_3_4_stage_4_to_7( __m128i *const step1 /*step1[32]*/, __m128i *const out /*out[32]*/) { __m128i step2[32]; // stage 4 step2[16] = _mm_add_epi32(step1[16], step1[19]); step2[17] = _mm_add_epi32(step1[17], step1[18]); step2[18] = _mm_sub_epi32(step1[17], step1[18]); step2[19] = _mm_sub_epi32(step1[16], step1[19]); step2[20] = _mm_sub_epi32(step1[20], step1[23]); // step2[20] = -step2[20] step2[21] = _mm_sub_epi32(step1[21], step1[22]); // step2[21] = -step2[21] step2[22] = _mm_add_epi32(step1[21], step1[22]); step2[23] = _mm_add_epi32(step1[20], step1[23]); step2[24] = _mm_add_epi32(step1[27], step1[24]); step2[25] = _mm_add_epi32(step1[26], step1[25]); step2[26] = _mm_sub_epi32(step1[26], step1[25]); // step2[26] = -step2[26] step2[27] = _mm_sub_epi32(step1[27], step1[24]); // step2[27] = -step2[27] step2[28] = _mm_sub_epi32(step1[31], step1[28]); step2[29] = _mm_sub_epi32(step1[30], step1[29]); step2[30] = _mm_add_epi32(step1[29], step1[30]); step2[31] = _mm_add_epi32(step1[28], step1[31]); // stage 5 step1[16] = step2[16]; step1[17] = step2[17]; highbd_butterfly_sse2(step2[29], step2[18], cospi_24_64, cospi_8_64, &step1[18], &step1[29]); highbd_butterfly_sse2(step2[28], step2[19], cospi_24_64, cospi_8_64, &step1[19], &step1[28]); highbd_butterfly_sse2(step2[20], step2[27], cospi_8_64, cospi_24_64, &step1[27], &step1[20]); highbd_butterfly_sse2(step2[21], step2[26], cospi_8_64, cospi_24_64, &step1[26], &step1[21]); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; step1[25] = step2[25]; step1[30] = step2[30]; step1[31] = step2[31]; // stage 6 step2[16] = _mm_add_epi32(step1[16], step1[23]); step2[17] = _mm_add_epi32(step1[17], step1[22]); step2[18] = _mm_add_epi32(step1[18], step1[21]); step2[19] = _mm_add_epi32(step1[19], step1[20]); step2[20] = _mm_sub_epi32(step1[19], step1[20]); step2[21] = _mm_sub_epi32(step1[18], step1[21]); step2[22] = _mm_sub_epi32(step1[17], step1[22]); step2[23] = _mm_sub_epi32(step1[16], step1[23]); step2[24] = _mm_sub_epi32(step1[31], step1[24]); step2[25] = _mm_sub_epi32(step1[30], step1[25]); step2[26] = _mm_sub_epi32(step1[29], step1[26]); step2[27] = _mm_sub_epi32(step1[28], step1[27]); step2[28] = _mm_add_epi32(step1[27], step1[28]); step2[29] = _mm_add_epi32(step1[26], step1[29]); step2[30] = _mm_add_epi32(step1[25], step1[30]); step2[31] = _mm_add_epi32(step1[24], step1[31]); // stage 7 out[16] = step2[16]; out[17] = step2[17]; out[18] = step2[18]; out[19] = step2[19]; highbd_butterfly_sse2(step2[27], step2[20], cospi_16_64, cospi_16_64, &out[20], &out[27]); highbd_butterfly_sse2(step2[26], step2[21], cospi_16_64, cospi_16_64, &out[21], &out[26]); highbd_butterfly_sse2(step2[25], step2[22], cospi_16_64, cospi_16_64, &out[22], &out[25]); highbd_butterfly_sse2(step2[24], step2[23], cospi_16_64, cospi_16_64, &out[23], &out[24]); out[28] = step2[28]; out[29] = step2[29]; out[30] = step2[30]; out[31] = step2[31]; } // Group the coefficient calculation into smaller functions to prevent stack // spillover in 32x32 idct optimizations: // quarter_1: 0-7 // quarter_2: 8-15 // quarter_3_4: 16-23, 24-31 // For each 4x32 block __m128i in[32], // Input with index, 0, 4, 8, 12, 16, 20, 24, 28 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_butterfly_sse2(in[4], in[28], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse2(in[20], in[12], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 4 highbd_butterfly_sse2(in[0], in[16], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); highbd_butterfly_sse2(in[8], in[24], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_sse2(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6, 10, 14, 18, 22, 26, 30 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_2( const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_butterfly_sse2(in[2], in[30], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_butterfly_sse2(in[18], in[14], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_butterfly_sse2(in[10], in[22], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_butterfly_sse2(in[26], in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] step1[11] = _mm_add_epi32(step2[10], step2[11]); step1[12] = _mm_add_epi32(step2[13], step2[12]); step1[13] = _mm_sub_epi32(step2[13], step2[12]); // step1[13] = -step1[13] highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_1024_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_1024_4x32_quarter_1(in, temp); highbd_idct32_1024_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_butterfly_sse2(in[1], in[31], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_butterfly_sse2(in[17], in[15], cospi_15_64, cospi_17_64, &step1[17], &step1[30]); highbd_butterfly_sse2(in[9], in[23], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); highbd_butterfly_sse2(in[25], in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); highbd_butterfly_sse2(in[5], in[27], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_butterfly_sse2(in[21], in[11], cospi_11_64, cospi_21_64, &step1[21], &step1[26]); highbd_butterfly_sse2(in[13], in[19], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); highbd_butterfly_sse2(in[29], in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi32(step1[16], step1[17]); step2[17] = _mm_sub_epi32(step1[16], step1[17]); step2[18] = _mm_sub_epi32(step1[18], step1[19]); // step2[18] = -step2[18] step2[19] = _mm_add_epi32(step1[18], step1[19]); step2[20] = _mm_add_epi32(step1[20], step1[21]); step2[21] = _mm_sub_epi32(step1[20], step1[21]); step2[22] = _mm_sub_epi32(step1[22], step1[23]); // step2[22] = -step2[22] step2[23] = _mm_add_epi32(step1[22], step1[23]); step2[24] = _mm_add_epi32(step1[25], step1[24]); step2[25] = _mm_sub_epi32(step1[25], step1[24]); // step2[25] = -step2[25] step2[26] = _mm_sub_epi32(step1[27], step1[26]); step2[27] = _mm_add_epi32(step1[27], step1[26]); step2[28] = _mm_add_epi32(step1[29], step1[28]); step2[29] = _mm_sub_epi32(step1[29], step1[28]); // step2[29] = -step2[29] step2[30] = _mm_sub_epi32(step1[31], step1[30]); step2[31] = _mm_add_epi32(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse2(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse2(step2[18], step2[29], cospi_4_64, cospi_28_64, &step1[29], &step1[18]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse2(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse2(step2[22], step2[25], cospi_20_64, cospi_12_64, &step1[25], &step1[22]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_1024_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_1024_4x32_quarter_1_2(io, temp); highbd_idct32_1024_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_1024_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[4][32], io[32]; // rows for (i = 0; i < 4; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &io[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 32, &io[8]); highbd_load_pack_transpose_32bit_8x8(&input[16], 32, &io[16]); highbd_load_pack_transpose_32bit_8x8(&input[24], 32, &io[24]); idct32_1024_8x32(io, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { // Transpose 32x8 block to 8x32 block transpose_16bit_8x8(col[0] + i, io); transpose_16bit_8x8(col[1] + i, io + 8); transpose_16bit_8x8(col[2] + i, io + 16); transpose_16bit_8x8(col[3] + i, io + 24); idct32_1024_8x32(io, io); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, io[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 8; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_load_transpose_32bit_8x4(&input[16], 32, &in[16]); highbd_load_transpose_32bit_8x4(&input[24], 32, &in[24]); highbd_idct32_1024_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); transpose_32bit_4x4(all[4] + i, out + 16); transpose_32bit_4x4(all[5] + i, out + 20); transpose_32bit_4x4(all[6] + i, out + 24); transpose_32bit_4x4(all[7] + i, out + 28); highbd_idct32_1024_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } // ----------------------------------------------------------------------------- // For each 4x32 block __m128i in[32], // Input with index, 0, 4, 8, 12 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_partial_butterfly_sse2(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_partial_butterfly_neg_sse2(in[12], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 4 highbd_partial_butterfly_sse2(in[0], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); highbd_partial_butterfly_sse2(in[8], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_sse2(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6, 10, 14 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_2( const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_partial_butterfly_sse2(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_neg_sse2(in[14], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_partial_butterfly_sse2(in[10], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_partial_butterfly_neg_sse2(in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); step1[10] = _mm_sub_epi32(step2[10], step2[11]); // step1[10] = -step1[10] step1[11] = _mm_add_epi32(step2[10], step2[11]); step1[12] = _mm_add_epi32(step2[13], step2[12]); step1[13] = _mm_sub_epi32(step2[13], step2[12]); // step1[13] = -step1[13] highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_135_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_135_4x32_quarter_1(in, temp); highbd_idct32_135_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_partial_butterfly_sse2(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_partial_butterfly_neg_sse2(in[15], cospi_15_64, cospi_17_64, &step1[17], &step1[30]); highbd_partial_butterfly_sse2(in[9], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); highbd_partial_butterfly_neg_sse2(in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); highbd_partial_butterfly_sse2(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_partial_butterfly_neg_sse2(in[11], cospi_11_64, cospi_21_64, &step1[21], &step1[26]); highbd_partial_butterfly_sse2(in[13], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); highbd_partial_butterfly_neg_sse2(in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi32(step1[16], step1[17]); step2[17] = _mm_sub_epi32(step1[16], step1[17]); step2[18] = _mm_sub_epi32(step1[18], step1[19]); // step2[18] = -step2[18] step2[19] = _mm_add_epi32(step1[18], step1[19]); step2[20] = _mm_add_epi32(step1[20], step1[21]); step2[21] = _mm_sub_epi32(step1[20], step1[21]); step2[22] = _mm_sub_epi32(step1[22], step1[23]); // step2[22] = -step2[22] step2[23] = _mm_add_epi32(step1[22], step1[23]); step2[24] = _mm_add_epi32(step1[25], step1[24]); step2[25] = _mm_sub_epi32(step1[25], step1[24]); // step2[25] = -step2[25] step2[26] = _mm_sub_epi32(step1[27], step1[26]); step2[27] = _mm_add_epi32(step1[27], step1[26]); step2[28] = _mm_add_epi32(step1[29], step1[28]); step2[29] = _mm_sub_epi32(step1[29], step1[28]); // step2[29] = -step2[29] step2[30] = _mm_sub_epi32(step1[31], step1[30]); step2[31] = _mm_add_epi32(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse2(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse2(step2[18], step2[29], cospi_4_64, cospi_28_64, &step1[29], &step1[18]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse2(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse2(step2[22], step2[25], cospi_20_64, cospi_12_64, &step1[25], &step1[22]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_135_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_135_4x32_quarter_1_2(io, temp); highbd_idct32_135_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_135_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[2][32], in[32], out[32]; for (i = 16; i < 32; i++) { in[i] = _mm_setzero_si128(); } // rows for (i = 0; i < 2; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &in[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 32, &in[8]); idct32_1024_8x32(in, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col[0] + i, in); transpose_16bit_8x8(col[1] + i, in + 8); idct32_1024_8x32(in, out); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_idct32_135_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); highbd_idct32_135_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } // ----------------------------------------------------------------------------- // For each 4x32 block __m128i in[32], // Input with index, 0, 4 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_partial_butterfly_sse2(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); // stage 4 highbd_partial_butterfly_sse2(in[0], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; highbd_butterfly_sse2(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_2(const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_partial_butterfly_sse2(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_neg_sse2(in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = step2[8]; step1[9] = step2[8]; step1[14] = step2[15]; step1[15] = step2[15]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; step1[10] = _mm_sub_epi32(_mm_setzero_si128(), step1[10]); // step1[10] = -step1[10] step1[13] = _mm_sub_epi32(_mm_setzero_si128(), step1[13]); // step1[13] = -step1[13] highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_34_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_34_4x32_quarter_1(in, temp); highbd_idct32_34_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_partial_butterfly_sse2(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_partial_butterfly_neg_sse2(in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); highbd_partial_butterfly_sse2(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_partial_butterfly_neg_sse2(in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 2 step2[16] = step1[16]; step2[17] = step1[16]; step2[18] = step1[19]; step2[19] = step1[19]; step2[20] = step1[20]; step2[21] = step1[20]; step2[22] = step1[23]; step2[23] = step1[23]; step2[24] = step1[24]; step2[25] = step1[24]; step2[26] = step1[27]; step2[27] = step1[27]; step2[28] = step1[28]; step2[29] = step1[28]; step2[30] = step1[31]; step2[31] = step1[31]; // stage 3 step2[18] = _mm_sub_epi32(_mm_setzero_si128(), step2[18]); // step2[18] = -step2[18] step2[22] = _mm_sub_epi32(_mm_setzero_si128(), step2[22]); // step2[22] = -step2[22] step2[25] = _mm_sub_epi32(_mm_setzero_si128(), step2[25]); // step2[25] = -step2[25] step2[29] = _mm_sub_epi32(_mm_setzero_si128(), step2[29]); // step2[29] = -step2[29] step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse2(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse2(step2[18], step2[29], cospi_4_64, cospi_28_64, &step1[29], &step1[18]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse2(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse2(step2[22], step2[25], cospi_20_64, cospi_12_64, &step1[25], &step1[22]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_34_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_34_4x32_quarter_1_2(io, temp); highbd_idct32_34_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_34_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[32], in[32], out[32]; // rows highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &in[0]); idct32_34_8x32_sse2(in, col); // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col + i, in); idct32_34_8x32_sse2(in, out); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_idct32_34_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); highbd_idct32_34_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } void vpx_highbd_idct32x32_1_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { highbd_idct_1_add_kernel(input, dest, stride, bd, 32); } libvpx-1.8.2/vpx_dsp/x86/highbd_idct32x32_add_sse4.c000066400000000000000000000663531357355204000220230ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE4.1 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_ssse3.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void highbd_idct32_4x32_quarter_2_stage_4_to_6( __m128i *const step1 /*step1[16]*/, __m128i *const out /*out[16]*/) { __m128i step2[32]; // stage 4 step2[8] = step1[8]; step2[15] = step1[15]; highbd_butterfly_sse4_1(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(step1[13], step1[10], -cospi_8_64, cospi_24_64, &step2[10], &step2[13]); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 step1[8] = _mm_add_epi32(step2[8], step2[11]); step1[9] = _mm_add_epi32(step2[9], step2[10]); step1[10] = _mm_sub_epi32(step2[9], step2[10]); step1[11] = _mm_sub_epi32(step2[8], step2[11]); step1[12] = _mm_sub_epi32(step2[15], step2[12]); step1[13] = _mm_sub_epi32(step2[14], step2[13]); step1[14] = _mm_add_epi32(step2[14], step2[13]); step1[15] = _mm_add_epi32(step2[15], step2[12]); // stage 6 out[8] = step1[8]; out[9] = step1[9]; highbd_butterfly_sse4_1(step1[13], step1[10], cospi_16_64, cospi_16_64, &out[10], &out[13]); highbd_butterfly_sse4_1(step1[12], step1[11], cospi_16_64, cospi_16_64, &out[11], &out[12]); out[14] = step1[14]; out[15] = step1[15]; } static INLINE void highbd_idct32_4x32_quarter_3_4_stage_4_to_7( __m128i *const step1 /*step1[32]*/, __m128i *const out /*out[32]*/) { __m128i step2[32]; // stage 4 step2[16] = _mm_add_epi32(step1[16], step1[19]); step2[17] = _mm_add_epi32(step1[17], step1[18]); step2[18] = _mm_sub_epi32(step1[17], step1[18]); step2[19] = _mm_sub_epi32(step1[16], step1[19]); step2[20] = _mm_sub_epi32(step1[23], step1[20]); step2[21] = _mm_sub_epi32(step1[22], step1[21]); step2[22] = _mm_add_epi32(step1[22], step1[21]); step2[23] = _mm_add_epi32(step1[23], step1[20]); step2[24] = _mm_add_epi32(step1[24], step1[27]); step2[25] = _mm_add_epi32(step1[25], step1[26]); step2[26] = _mm_sub_epi32(step1[25], step1[26]); step2[27] = _mm_sub_epi32(step1[24], step1[27]); step2[28] = _mm_sub_epi32(step1[31], step1[28]); step2[29] = _mm_sub_epi32(step1[30], step1[29]); step2[30] = _mm_add_epi32(step1[29], step1[30]); step2[31] = _mm_add_epi32(step1[28], step1[31]); // stage 5 step1[16] = step2[16]; step1[17] = step2[17]; highbd_butterfly_sse4_1(step2[29], step2[18], cospi_24_64, cospi_8_64, &step1[18], &step1[29]); highbd_butterfly_sse4_1(step2[28], step2[19], cospi_24_64, cospi_8_64, &step1[19], &step1[28]); highbd_butterfly_sse4_1(step2[27], step2[20], -cospi_8_64, cospi_24_64, &step1[20], &step1[27]); highbd_butterfly_sse4_1(step2[26], step2[21], -cospi_8_64, cospi_24_64, &step1[21], &step1[26]); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; step1[25] = step2[25]; step1[30] = step2[30]; step1[31] = step2[31]; // stage 6 step2[16] = _mm_add_epi32(step1[16], step1[23]); step2[17] = _mm_add_epi32(step1[17], step1[22]); step2[18] = _mm_add_epi32(step1[18], step1[21]); step2[19] = _mm_add_epi32(step1[19], step1[20]); step2[20] = _mm_sub_epi32(step1[19], step1[20]); step2[21] = _mm_sub_epi32(step1[18], step1[21]); step2[22] = _mm_sub_epi32(step1[17], step1[22]); step2[23] = _mm_sub_epi32(step1[16], step1[23]); step2[24] = _mm_sub_epi32(step1[31], step1[24]); step2[25] = _mm_sub_epi32(step1[30], step1[25]); step2[26] = _mm_sub_epi32(step1[29], step1[26]); step2[27] = _mm_sub_epi32(step1[28], step1[27]); step2[28] = _mm_add_epi32(step1[27], step1[28]); step2[29] = _mm_add_epi32(step1[26], step1[29]); step2[30] = _mm_add_epi32(step1[25], step1[30]); step2[31] = _mm_add_epi32(step1[24], step1[31]); // stage 7 out[16] = step2[16]; out[17] = step2[17]; out[18] = step2[18]; out[19] = step2[19]; highbd_butterfly_sse4_1(step2[27], step2[20], cospi_16_64, cospi_16_64, &out[20], &out[27]); highbd_butterfly_sse4_1(step2[26], step2[21], cospi_16_64, cospi_16_64, &out[21], &out[26]); highbd_butterfly_sse4_1(step2[25], step2[22], cospi_16_64, cospi_16_64, &out[22], &out[25]); highbd_butterfly_sse4_1(step2[24], step2[23], cospi_16_64, cospi_16_64, &out[23], &out[24]); out[28] = step2[28]; out[29] = step2[29]; out[30] = step2[30]; out[31] = step2[31]; } // Group the coefficient calculation into smaller functions to prevent stack // spillover in 32x32 idct optimizations: // quarter_1: 0-7 // quarter_2: 8-15 // quarter_3_4: 16-23, 24-31 // For each 4x32 block __m128i in[32], // Input with index, 0, 4, 8, 12, 16, 20, 24, 28 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_butterfly_sse4_1(in[4], in[28], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse4_1(in[20], in[12], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 4 highbd_butterfly_sse4_1(in[0], in[16], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); highbd_butterfly_sse4_1(in[8], in[24], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_sse4_1(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6, 10, 14, 18, 22, 26, 30 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_2( const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_butterfly_sse4_1(in[2], in[30], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_butterfly_sse4_1(in[18], in[14], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); highbd_butterfly_sse4_1(in[10], in[22], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_butterfly_sse4_1(in[26], in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); step1[10] = _mm_sub_epi32(step2[11], step2[10]); step1[11] = _mm_add_epi32(step2[11], step2[10]); step1[12] = _mm_add_epi32(step2[12], step2[13]); step1[13] = _mm_sub_epi32(step2[12], step2[13]); highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_1024_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_1024_4x32_quarter_1(in, temp); highbd_idct32_1024_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_1024_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_butterfly_sse4_1(in[1], in[31], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_butterfly_sse4_1(in[17], in[15], cospi_15_64, cospi_17_64, &step1[17], &step1[30]); highbd_butterfly_sse4_1(in[9], in[23], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); highbd_butterfly_sse4_1(in[25], in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); highbd_butterfly_sse4_1(in[5], in[27], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_butterfly_sse4_1(in[21], in[11], cospi_11_64, cospi_21_64, &step1[21], &step1[26]); highbd_butterfly_sse4_1(in[13], in[19], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); highbd_butterfly_sse4_1(in[29], in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi32(step1[16], step1[17]); step2[17] = _mm_sub_epi32(step1[16], step1[17]); step2[18] = _mm_sub_epi32(step1[19], step1[18]); step2[19] = _mm_add_epi32(step1[19], step1[18]); step2[20] = _mm_add_epi32(step1[20], step1[21]); step2[21] = _mm_sub_epi32(step1[20], step1[21]); step2[22] = _mm_sub_epi32(step1[23], step1[22]); step2[23] = _mm_add_epi32(step1[23], step1[22]); step2[24] = _mm_add_epi32(step1[24], step1[25]); step2[25] = _mm_sub_epi32(step1[24], step1[25]); step2[26] = _mm_sub_epi32(step1[27], step1[26]); step2[27] = _mm_add_epi32(step1[27], step1[26]); step2[28] = _mm_add_epi32(step1[28], step1[29]); step2[29] = _mm_sub_epi32(step1[28], step1[29]); step2[30] = _mm_sub_epi32(step1[31], step1[30]); step2[31] = _mm_add_epi32(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse4_1(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse4_1(step2[29], step2[18], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse4_1(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse4_1(step2[25], step2[22], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_1024_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_1024_4x32_quarter_1_2(io, temp); highbd_idct32_1024_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_1024_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[4][32], io[32]; // rows for (i = 0; i < 4; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &io[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 32, &io[8]); highbd_load_pack_transpose_32bit_8x8(&input[16], 32, &io[16]); highbd_load_pack_transpose_32bit_8x8(&input[24], 32, &io[24]); idct32_1024_8x32(io, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { // Transpose 32x8 block to 8x32 block transpose_16bit_8x8(col[0] + i, io); transpose_16bit_8x8(col[1] + i, io + 8); transpose_16bit_8x8(col[2] + i, io + 16); transpose_16bit_8x8(col[3] + i, io + 24); idct32_1024_8x32(io, io); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, io[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 8; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_load_transpose_32bit_8x4(&input[16], 32, &in[16]); highbd_load_transpose_32bit_8x4(&input[24], 32, &in[24]); highbd_idct32_1024_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); transpose_32bit_4x4(all[4] + i, out + 16); transpose_32bit_4x4(all[5] + i, out + 20); transpose_32bit_4x4(all[6] + i, out + 24); transpose_32bit_4x4(all[7] + i, out + 28); highbd_idct32_1024_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } // ----------------------------------------------------------------------------- // For each 4x32 block __m128i in[32], // Input with index, 0, 4, 8, 12 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_partial_butterfly_sse4_1(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_partial_butterfly_sse4_1(in[12], -cospi_20_64, cospi_12_64, &step1[5], &step1[6]); // stage 4 highbd_partial_butterfly_sse4_1(in[0], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); highbd_partial_butterfly_sse4_1(in[8], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_sse4_1(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6, 10, 14 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_2( const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_partial_butterfly_sse4_1(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_sse4_1(in[14], -cospi_18_64, cospi_14_64, &step2[9], &step2[14]); highbd_partial_butterfly_sse4_1(in[10], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); highbd_partial_butterfly_sse4_1(in[6], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi32(step2[8], step2[9]); step1[9] = _mm_sub_epi32(step2[8], step2[9]); step1[14] = _mm_sub_epi32(step2[15], step2[14]); step1[15] = _mm_add_epi32(step2[15], step2[14]); step1[10] = _mm_sub_epi32(step2[11], step2[10]); step1[11] = _mm_add_epi32(step2[11], step2[10]); step1[12] = _mm_add_epi32(step2[12], step2[13]); step1[13] = _mm_sub_epi32(step2[12], step2[13]); highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_135_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_135_4x32_quarter_1(in, temp); highbd_idct32_135_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_135_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_partial_butterfly_sse4_1(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_partial_butterfly_sse4_1(in[15], -cospi_17_64, cospi_15_64, &step1[17], &step1[30]); highbd_partial_butterfly_sse4_1(in[9], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); highbd_partial_butterfly_sse4_1(in[7], -cospi_25_64, cospi_7_64, &step1[19], &step1[28]); highbd_partial_butterfly_sse4_1(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_partial_butterfly_sse4_1(in[11], -cospi_21_64, cospi_11_64, &step1[21], &step1[26]); highbd_partial_butterfly_sse4_1(in[13], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); highbd_partial_butterfly_sse4_1(in[3], -cospi_29_64, cospi_3_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi32(step1[16], step1[17]); step2[17] = _mm_sub_epi32(step1[16], step1[17]); step2[18] = _mm_sub_epi32(step1[19], step1[18]); step2[19] = _mm_add_epi32(step1[19], step1[18]); step2[20] = _mm_add_epi32(step1[20], step1[21]); step2[21] = _mm_sub_epi32(step1[20], step1[21]); step2[22] = _mm_sub_epi32(step1[23], step1[22]); step2[23] = _mm_add_epi32(step1[23], step1[22]); step2[24] = _mm_add_epi32(step1[24], step1[25]); step2[25] = _mm_sub_epi32(step1[24], step1[25]); step2[26] = _mm_sub_epi32(step1[27], step1[26]); step2[27] = _mm_add_epi32(step1[27], step1[26]); step2[28] = _mm_add_epi32(step1[28], step1[29]); step2[29] = _mm_sub_epi32(step1[28], step1[29]); step2[30] = _mm_sub_epi32(step1[31], step1[30]); step2[31] = _mm_add_epi32(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse4_1(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse4_1(step2[29], step2[18], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse4_1(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse4_1(step2[25], step2[22], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_135_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_135_4x32_quarter_1_2(io, temp); highbd_idct32_135_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_135_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[2][32], in[32], out[32]; // rows for (i = 0; i < 2; i++) { highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &in[0]); highbd_load_pack_transpose_32bit_8x8(&input[8], 32, &in[8]); idct32_135_8x32_ssse3(in, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col[0] + i, in); transpose_16bit_8x8(col[1] + i, in + 8); idct32_135_8x32_ssse3(in, out); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_idct32_135_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); highbd_idct32_135_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } // ----------------------------------------------------------------------------- // For each 4x32 block __m128i in[32], // Input with index, 0, 4 // output pixels: 0-7 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 highbd_partial_butterfly_sse4_1(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); // stage 4 highbd_partial_butterfly_sse4_1(in[0], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; // stage 5 step1[0] = step2[0]; step1[1] = step2[1]; step1[2] = step2[1]; step1[3] = step2[0]; step1[4] = step2[4]; highbd_butterfly_sse4_1(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi32(step1[0], step1[7]); out[1] = _mm_add_epi32(step1[1], step1[6]); out[2] = _mm_add_epi32(step1[2], step1[5]); out[3] = _mm_add_epi32(step1[3], step1[4]); out[4] = _mm_sub_epi32(step1[3], step1[4]); out[5] = _mm_sub_epi32(step1[2], step1[5]); out[6] = _mm_sub_epi32(step1[1], step1[6]); out[7] = _mm_sub_epi32(step1[0], step1[7]); } // For each 4x32 block __m128i in[32], // Input with index, 2, 6 // output pixels: 8-15 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_2(const __m128i *in /*in[32]*/, __m128i *out /*out[16]*/) { __m128i step1[32], step2[32]; // stage 2 highbd_partial_butterfly_sse4_1(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); highbd_partial_butterfly_sse4_1(in[6], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 step1[8] = step2[8]; step1[9] = step2[8]; step1[14] = step2[15]; step1[15] = step2[15]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; highbd_idct32_4x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void highbd_idct32_34_4x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; highbd_idct32_34_4x32_quarter_1(in, temp); highbd_idct32_34_4x32_quarter_2(in, temp); // stage 7 highbd_add_sub_butterfly(temp, out, 16); } // For each 4x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void highbd_idct32_34_4x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 highbd_partial_butterfly_sse4_1(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); highbd_partial_butterfly_sse4_1(in[7], -cospi_25_64, cospi_7_64, &step1[19], &step1[28]); highbd_partial_butterfly_sse4_1(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); highbd_partial_butterfly_sse4_1(in[3], -cospi_29_64, cospi_3_64, &step1[23], &step1[24]); // stage 2 step2[16] = step1[16]; step2[17] = step1[16]; step2[18] = step1[19]; step2[19] = step1[19]; step2[20] = step1[20]; step2[21] = step1[20]; step2[22] = step1[23]; step2[23] = step1[23]; step2[24] = step1[24]; step2[25] = step1[24]; step2[26] = step1[27]; step2[27] = step1[27]; step2[28] = step1[28]; step2[29] = step1[28]; step2[30] = step1[31]; step2[31] = step1[31]; // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; highbd_butterfly_sse4_1(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); highbd_butterfly_sse4_1(step2[29], step2[18], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); step1[19] = step2[19]; step1[20] = step2[20]; highbd_butterfly_sse4_1(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); highbd_butterfly_sse4_1(step2[25], step2[22], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; highbd_idct32_4x32_quarter_3_4_stage_4_to_7(step1, out); } static void highbd_idct32_34_4x32(__m128i *const io /*io[32]*/) { __m128i temp[32]; highbd_idct32_34_4x32_quarter_1_2(io, temp); highbd_idct32_34_4x32_quarter_3_4(io, temp); // final stage highbd_add_sub_butterfly(temp, io, 32); } void vpx_highbd_idct32x32_34_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int i, j; if (bd == 8) { __m128i col[32], in[32], out[32]; // rows highbd_load_pack_transpose_32bit_8x8(&input[0], 32, &in[0]); idct32_34_8x32_ssse3(in, col); // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col + i, in); idct32_34_8x32_ssse3(in, out); for (j = 0; j < 32; ++j) { highbd_write_buffer_8(dest + j * stride, out[j], bd); } dest += 8; } } else { __m128i all[8][32], out[32], *in; for (i = 0; i < 4; i++) { in = all[i]; highbd_load_transpose_32bit_8x4(&input[0], 32, &in[0]); highbd_load_transpose_32bit_8x4(&input[8], 32, &in[8]); highbd_idct32_34_4x32(in); input += 4 * 32; } for (i = 0; i < 32; i += 4) { transpose_32bit_4x4(all[0] + i, out + 0); transpose_32bit_4x4(all[1] + i, out + 4); transpose_32bit_4x4(all[2] + i, out + 8); transpose_32bit_4x4(all[3] + i, out + 12); highbd_idct32_34_4x32(out); for (j = 0; j < 32; ++j) { highbd_write_buffer_4(dest + j * stride, out[j], bd); } dest += 4; } } } libvpx-1.8.2/vpx_dsp/x86/highbd_idct4x4_add_sse2.c000066400000000000000000000153171357355204000216510ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" static INLINE __m128i dct_const_round_shift_4_sse2(const __m128i in0, const __m128i in1) { const __m128i t0 = _mm_unpacklo_epi32(in0, in1); // 0, 1 const __m128i t1 = _mm_unpackhi_epi32(in0, in1); // 2, 3 const __m128i t2 = _mm_unpacklo_epi64(t0, t1); // 0, 1, 2, 3 return dct_const_round_shift_sse2(t2); } static INLINE void highbd_idct4_small_sse2(__m128i *const io) { const __m128i cospi_p16_p16 = _mm_setr_epi32(cospi_16_64, 0, cospi_16_64, 0); const __m128i cospi_p08_p08 = _mm_setr_epi32(cospi_8_64, 0, cospi_8_64, 0); const __m128i cospi_p24_p24 = _mm_setr_epi32(cospi_24_64, 0, cospi_24_64, 0); __m128i temp1[4], temp2[4], step[4]; transpose_32bit_4x4(io, io); // Note: There is no 32-bit signed multiply SIMD instruction in SSE2. // _mm_mul_epu32() is used which can only guarantee the lower 32-bit // (signed) result is meaningful, which is enough in this function. // stage 1 temp1[0] = _mm_add_epi32(io[0], io[2]); // input[0] + input[2] temp2[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] temp1[1] = _mm_srli_si128(temp1[0], 4); // 1, 3 temp2[1] = _mm_srli_si128(temp2[0], 4); // 1, 3 temp1[0] = _mm_mul_epu32(temp1[0], cospi_p16_p16); // ([0] + [2])*cospi_16_64 temp1[1] = _mm_mul_epu32(temp1[1], cospi_p16_p16); // ([0] + [2])*cospi_16_64 temp2[0] = _mm_mul_epu32(temp2[0], cospi_p16_p16); // ([0] - [2])*cospi_16_64 temp2[1] = _mm_mul_epu32(temp2[1], cospi_p16_p16); // ([0] - [2])*cospi_16_64 step[0] = dct_const_round_shift_4_sse2(temp1[0], temp1[1]); step[1] = dct_const_round_shift_4_sse2(temp2[0], temp2[1]); temp1[3] = _mm_srli_si128(io[1], 4); temp2[3] = _mm_srli_si128(io[3], 4); temp1[0] = _mm_mul_epu32(io[1], cospi_p24_p24); // input[1] * cospi_24_64 temp1[1] = _mm_mul_epu32(temp1[3], cospi_p24_p24); // input[1] * cospi_24_64 temp2[0] = _mm_mul_epu32(io[1], cospi_p08_p08); // input[1] * cospi_8_64 temp2[1] = _mm_mul_epu32(temp1[3], cospi_p08_p08); // input[1] * cospi_8_64 temp1[2] = _mm_mul_epu32(io[3], cospi_p08_p08); // input[3] * cospi_8_64 temp1[3] = _mm_mul_epu32(temp2[3], cospi_p08_p08); // input[3] * cospi_8_64 temp2[2] = _mm_mul_epu32(io[3], cospi_p24_p24); // input[3] * cospi_24_64 temp2[3] = _mm_mul_epu32(temp2[3], cospi_p24_p24); // input[3] * cospi_24_64 temp1[0] = _mm_sub_epi64(temp1[0], temp1[2]); // [1]*cospi_24 - [3]*cospi_8 temp1[1] = _mm_sub_epi64(temp1[1], temp1[3]); // [1]*cospi_24 - [3]*cospi_8 temp2[0] = _mm_add_epi64(temp2[0], temp2[2]); // [1]*cospi_8 + [3]*cospi_24 temp2[1] = _mm_add_epi64(temp2[1], temp2[3]); // [1]*cospi_8 + [3]*cospi_24 step[2] = dct_const_round_shift_4_sse2(temp1[0], temp1[1]); step[3] = dct_const_round_shift_4_sse2(temp2[0], temp2[1]); // stage 2 io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] io[1] = _mm_add_epi32(step[1], step[2]); // step[1] + step[2] io[2] = _mm_sub_epi32(step[1], step[2]); // step[1] - step[2] io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3] } static INLINE void highbd_idct4_large_sse2(__m128i *const io) { __m128i step[4]; transpose_32bit_4x4(io, io); // stage 1 highbd_butterfly_cospi16_sse2(io[0], io[2], &step[0], &step[1]); highbd_butterfly_sse2(io[1], io[3], cospi_24_64, cospi_8_64, &step[2], &step[3]); // stage 2 io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] io[1] = _mm_add_epi32(step[1], step[2]); // step[1] + step[2] io[2] = _mm_sub_epi32(step[1], step[2]); // step[1] - step[2] io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3] } void vpx_highbd_idct4x4_16_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int16_t max = 0, min = 0; __m128i io[4], io_short[2]; io[0] = _mm_load_si128((const __m128i *)(input + 0)); io[1] = _mm_load_si128((const __m128i *)(input + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 8)); io[3] = _mm_load_si128((const __m128i *)(input + 12)); io_short[0] = _mm_packs_epi32(io[0], io[1]); io_short[1] = _mm_packs_epi32(io[2], io[3]); if (bd != 8) { __m128i max_input, min_input; max_input = _mm_max_epi16(io_short[0], io_short[1]); min_input = _mm_min_epi16(io_short[0], io_short[1]); max_input = _mm_max_epi16(max_input, _mm_srli_si128(max_input, 8)); min_input = _mm_min_epi16(min_input, _mm_srli_si128(min_input, 8)); max_input = _mm_max_epi16(max_input, _mm_srli_si128(max_input, 4)); min_input = _mm_min_epi16(min_input, _mm_srli_si128(min_input, 4)); max_input = _mm_max_epi16(max_input, _mm_srli_si128(max_input, 2)); min_input = _mm_min_epi16(min_input, _mm_srli_si128(min_input, 2)); max = (int16_t)_mm_extract_epi16(max_input, 0); min = (int16_t)_mm_extract_epi16(min_input, 0); } if (bd == 8 || (max < 4096 && min >= -4096)) { idct4_sse2(io_short); idct4_sse2(io_short); io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8)); io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8)); io[0] = _mm_srai_epi16(io_short[0], 4); io[1] = _mm_srai_epi16(io_short[1], 4); } else { if (max < 32767 && min > -32768) { highbd_idct4_small_sse2(io); highbd_idct4_small_sse2(io); } else { highbd_idct4_large_sse2(io); highbd_idct4_large_sse2(io); } io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8)); io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8)); } recon_and_store_4x4(io, dest, stride, bd); } void vpx_highbd_idct4x4_1_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { int a1, i; tran_low_t out; __m128i dc, d; out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, 4); dc = _mm_set1_epi16(a1); for (i = 0; i < 4; ++i) { d = _mm_loadl_epi64((const __m128i *)dest); d = add_clamp(d, dc, bd); _mm_storel_epi64((__m128i *)dest, d); dest += stride; } } libvpx-1.8.2/vpx_dsp/x86/highbd_idct4x4_add_sse4.c000066400000000000000000000032401357355204000216430ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE4.1 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" void vpx_highbd_idct4x4_16_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { __m128i io[4]; io[0] = _mm_load_si128((const __m128i *)(input + 0)); io[1] = _mm_load_si128((const __m128i *)(input + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 8)); io[3] = _mm_load_si128((const __m128i *)(input + 12)); if (bd == 8) { __m128i io_short[2]; io_short[0] = _mm_packs_epi32(io[0], io[1]); io_short[1] = _mm_packs_epi32(io[2], io[3]); idct4_sse2(io_short); idct4_sse2(io_short); io_short[0] = _mm_add_epi16(io_short[0], _mm_set1_epi16(8)); io_short[1] = _mm_add_epi16(io_short[1], _mm_set1_epi16(8)); io[0] = _mm_srai_epi16(io_short[0], 4); io[1] = _mm_srai_epi16(io_short[1], 4); } else { highbd_idct4_sse4_1(io); highbd_idct4_sse4_1(io); io[0] = wraplow_16bit_shift4(io[0], io[1], _mm_set1_epi32(8)); io[1] = wraplow_16bit_shift4(io[2], io[3], _mm_set1_epi32(8)); } recon_and_store_4x4(io, dest, stride, bd); } libvpx-1.8.2/vpx_dsp/x86/highbd_idct8x8_add_sse2.c000066400000000000000000000163671357355204000216670ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" static void highbd_idct8x8_half1d(__m128i *const io) { __m128i step1[8], step2[8]; transpose_32bit_4x4x2(io, io); // stage 1 step1[0] = io[0]; step1[2] = io[4]; step1[1] = io[2]; step1[3] = io[6]; highbd_butterfly_sse2(io[1], io[7], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse2(io[5], io[3], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 2 highbd_butterfly_cospi16_sse2(step1[0], step1[2], &step2[0], &step2[1]); highbd_butterfly_sse2(step1[1], step1[3], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_cospi16_sse2(step2[6], step2[5], &step1[6], &step1[5]); step1[7] = step2[7]; // stage 4 highbd_idct8_stage4(step1, io); } static void highbd_idct8x8_12_half1d(__m128i *const io) { __m128i temp1[4], sign[2], step1[8], step2[8]; transpose_32bit_4x4(io, io); // stage 1 step1[0] = io[0]; step1[1] = io[2]; abs_extend_64bit_sse2(io[1], temp1, sign); step1[4] = multiplication_round_shift_sse2(temp1, sign, cospi_28_64); step1[7] = multiplication_round_shift_sse2(temp1, sign, cospi_4_64); abs_extend_64bit_sse2(io[3], temp1, sign); step1[5] = multiplication_neg_round_shift_sse2(temp1, sign, cospi_20_64); step1[6] = multiplication_round_shift_sse2(temp1, sign, cospi_12_64); // stage 2 abs_extend_64bit_sse2(step1[0], temp1, sign); step2[0] = multiplication_round_shift_sse2(temp1, sign, cospi_16_64); abs_extend_64bit_sse2(step1[1], temp1, sign); step2[2] = multiplication_round_shift_sse2(temp1, sign, cospi_24_64); step2[3] = multiplication_round_shift_sse2(temp1, sign, cospi_8_64); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[0], step2[2]); step1[2] = _mm_sub_epi32(step2[0], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_cospi16_sse2(step2[6], step2[5], &step1[6], &step1[5]); step1[7] = step2[7]; // stage 4 highbd_idct8_stage4(step1, io); } void vpx_highbd_idct8x8_64_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { __m128i io[16]; io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); if (bd == 8) { __m128i io_short[8]; io_short[0] = _mm_packs_epi32(io[0], io[4]); io_short[1] = _mm_packs_epi32(io[1], io[5]); io_short[2] = _mm_packs_epi32(io[2], io[6]); io_short[3] = _mm_packs_epi32(io[3], io[7]); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); io_short[4] = _mm_packs_epi32(io[8], io[12]); io_short[5] = _mm_packs_epi32(io[9], io[13]); io_short[6] = _mm_packs_epi32(io[10], io[14]); io_short[7] = _mm_packs_epi32(io[11], io[15]); vpx_idct8_sse2(io_short); vpx_idct8_sse2(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; highbd_idct8x8_half1d(io); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); highbd_idct8x8_half1d(&io[8]); temp[0] = io[4]; temp[1] = io[5]; temp[2] = io[6]; temp[3] = io[7]; io[4] = io[8]; io[5] = io[9]; io[6] = io[10]; io[7] = io[11]; highbd_idct8x8_half1d(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; highbd_idct8x8_half1d(&io[8]); highbd_idct8x8_final_round(io); } recon_and_store_8x8(io, dest, stride, bd); } void vpx_highbd_idct8x8_12_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const __m128i zero = _mm_setzero_si128(); __m128i io[16]; io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); if (bd == 8) { __m128i io_short[8]; io_short[0] = _mm_packs_epi32(io[0], zero); io_short[1] = _mm_packs_epi32(io[1], zero); io_short[2] = _mm_packs_epi32(io[2], zero); io_short[3] = _mm_packs_epi32(io[3], zero); idct8x8_12_add_kernel_sse2(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; highbd_idct8x8_12_half1d(io); temp[0] = io[4]; temp[1] = io[5]; temp[2] = io[6]; temp[3] = io[7]; highbd_idct8x8_12_half1d(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; highbd_idct8x8_12_half1d(&io[8]); highbd_idct8x8_final_round(io); } recon_and_store_8x8(io, dest, stride, bd); } void vpx_highbd_idct8x8_1_add_sse2(const tran_low_t *input, uint16_t *dest, int stride, int bd) { highbd_idct_1_add_kernel(input, dest, stride, bd, 8); } libvpx-1.8.2/vpx_dsp/x86/highbd_idct8x8_add_sse4.c000066400000000000000000000161661357355204000216660ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE4.1 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse4.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_ssse3.h" #include "vpx_dsp/x86/transpose_sse2.h" void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io) { __m128i step1[8], step2[8]; transpose_32bit_4x4x2(io, io); // stage 1 step1[0] = io[0]; step1[2] = io[4]; step1[1] = io[2]; step1[3] = io[6]; highbd_butterfly_sse4_1(io[1], io[7], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); highbd_butterfly_sse4_1(io[5], io[3], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 2 highbd_butterfly_cospi16_sse4_1(step1[0], step1[2], &step2[0], &step2[1]); highbd_butterfly_sse4_1(step1[1], step1[3], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[1], step2[2]); step1[2] = _mm_sub_epi32(step2[1], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_cospi16_sse4_1(step2[6], step2[5], &step1[6], &step1[5]); step1[7] = step2[7]; // stage 4 highbd_idct8_stage4(step1, io); } static void highbd_idct8x8_12_half1d(__m128i *const io) { __m128i temp1[2], step1[8], step2[8]; transpose_32bit_4x4(io, io); // stage 1 step1[0] = io[0]; step1[1] = io[2]; extend_64bit(io[1], temp1); step1[4] = multiplication_round_shift_sse4_1(temp1, cospi_28_64); step1[7] = multiplication_round_shift_sse4_1(temp1, cospi_4_64); extend_64bit(io[3], temp1); step1[5] = multiplication_round_shift_sse4_1(temp1, -cospi_20_64); step1[6] = multiplication_round_shift_sse4_1(temp1, cospi_12_64); // stage 2 extend_64bit(step1[0], temp1); step2[0] = multiplication_round_shift_sse4_1(temp1, cospi_16_64); extend_64bit(step1[1], temp1); step2[2] = multiplication_round_shift_sse4_1(temp1, cospi_24_64); step2[3] = multiplication_round_shift_sse4_1(temp1, cospi_8_64); step2[4] = _mm_add_epi32(step1[4], step1[5]); step2[5] = _mm_sub_epi32(step1[4], step1[5]); step2[6] = _mm_sub_epi32(step1[7], step1[6]); step2[7] = _mm_add_epi32(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi32(step2[0], step2[3]); step1[1] = _mm_add_epi32(step2[0], step2[2]); step1[2] = _mm_sub_epi32(step2[0], step2[2]); step1[3] = _mm_sub_epi32(step2[0], step2[3]); step1[4] = step2[4]; highbd_butterfly_cospi16_sse4_1(step2[6], step2[5], &step1[6], &step1[5]); step1[7] = step2[7]; // stage 4 highbd_idct8_stage4(step1, io); } void vpx_highbd_idct8x8_64_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { __m128i io[16]; io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); io[4] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 4)); io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); io[5] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 4)); io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); io[6] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 4)); io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); io[7] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 4)); if (bd == 8) { __m128i io_short[8]; io_short[0] = _mm_packs_epi32(io[0], io[4]); io_short[1] = _mm_packs_epi32(io[1], io[5]); io_short[2] = _mm_packs_epi32(io[2], io[6]); io_short[3] = _mm_packs_epi32(io[3], io[7]); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); io_short[4] = _mm_packs_epi32(io[8], io[12]); io_short[5] = _mm_packs_epi32(io[9], io[13]); io_short[6] = _mm_packs_epi32(io[10], io[14]); io_short[7] = _mm_packs_epi32(io[11], io[15]); vpx_idct8_sse2(io_short); vpx_idct8_sse2(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; vpx_highbd_idct8x8_half1d_sse4_1(io); io[8] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 0)); io[12] = _mm_load_si128((const __m128i *)(input + 4 * 8 + 4)); io[9] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 0)); io[13] = _mm_load_si128((const __m128i *)(input + 5 * 8 + 4)); io[10] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 0)); io[14] = _mm_load_si128((const __m128i *)(input + 6 * 8 + 4)); io[11] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 0)); io[15] = _mm_load_si128((const __m128i *)(input + 7 * 8 + 4)); vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); temp[0] = io[4]; temp[1] = io[5]; temp[2] = io[6]; temp[3] = io[7]; io[4] = io[8]; io[5] = io[9]; io[6] = io[10]; io[7] = io[11]; vpx_highbd_idct8x8_half1d_sse4_1(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; vpx_highbd_idct8x8_half1d_sse4_1(&io[8]); highbd_idct8x8_final_round(io); } recon_and_store_8x8(io, dest, stride, bd); } void vpx_highbd_idct8x8_12_add_sse4_1(const tran_low_t *input, uint16_t *dest, int stride, int bd) { const __m128i zero = _mm_setzero_si128(); __m128i io[16]; io[0] = _mm_load_si128((const __m128i *)(input + 0 * 8 + 0)); io[1] = _mm_load_si128((const __m128i *)(input + 1 * 8 + 0)); io[2] = _mm_load_si128((const __m128i *)(input + 2 * 8 + 0)); io[3] = _mm_load_si128((const __m128i *)(input + 3 * 8 + 0)); if (bd == 8) { __m128i io_short[8]; io_short[0] = _mm_packs_epi32(io[0], zero); io_short[1] = _mm_packs_epi32(io[1], zero); io_short[2] = _mm_packs_epi32(io[2], zero); io_short[3] = _mm_packs_epi32(io[3], zero); idct8x8_12_add_kernel_ssse3(io_short); round_shift_8x8(io_short, io); } else { __m128i temp[4]; highbd_idct8x8_12_half1d(io); temp[0] = io[4]; temp[1] = io[5]; temp[2] = io[6]; temp[3] = io[7]; highbd_idct8x8_12_half1d(io); io[8] = temp[0]; io[9] = temp[1]; io[10] = temp[2]; io[11] = temp[3]; highbd_idct8x8_12_half1d(&io[8]); highbd_idct8x8_final_round(io); } recon_and_store_8x8(io, dest, stride, bd); } libvpx-1.8.2/vpx_dsp/x86/highbd_intrapred_intrin_sse2.c000066400000000000000000000515051357355204000231300ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" // ----------------------------------------------------------------------------- void vpx_highbd_h_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i left_u16 = _mm_loadl_epi64((const __m128i *)left); const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0); const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55); const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa); const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff); (void)above; (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); } void vpx_highbd_h_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i left_u16 = _mm_load_si128((const __m128i *)left); const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0); const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55); const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa); const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff); const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0); const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55); const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa); const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff); (void)above; (void)bd; _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row0, row0)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row1, row1)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row2, row2)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpacklo_epi64(row3, row3)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row4, row4)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row5, row5)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row6, row6)); dst += stride; _mm_store_si128((__m128i *)dst, _mm_unpackhi_epi64(row7, row7)); } static INLINE void h_store_16_unpacklo(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpacklo_epi64(*row, *row); _mm_store_si128((__m128i *)*dst, val); _mm_store_si128((__m128i *)(*dst + 8), val); *dst += stride; } static INLINE void h_store_16_unpackhi(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpackhi_epi64(*row, *row); _mm_store_si128((__m128i *)(*dst), val); _mm_store_si128((__m128i *)(*dst + 8), val); *dst += stride; } void vpx_highbd_h_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { int i; (void)above; (void)bd; for (i = 0; i < 2; i++, left += 8) { const __m128i left_u16 = _mm_load_si128((const __m128i *)left); const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0); const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55); const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa); const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff); const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0); const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55); const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa); const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff); h_store_16_unpacklo(&dst, stride, &row0); h_store_16_unpacklo(&dst, stride, &row1); h_store_16_unpacklo(&dst, stride, &row2); h_store_16_unpacklo(&dst, stride, &row3); h_store_16_unpackhi(&dst, stride, &row4); h_store_16_unpackhi(&dst, stride, &row5); h_store_16_unpackhi(&dst, stride, &row6); h_store_16_unpackhi(&dst, stride, &row7); } } static INLINE void h_store_32_unpacklo(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpacklo_epi64(*row, *row); _mm_store_si128((__m128i *)(*dst), val); _mm_store_si128((__m128i *)(*dst + 8), val); _mm_store_si128((__m128i *)(*dst + 16), val); _mm_store_si128((__m128i *)(*dst + 24), val); *dst += stride; } static INLINE void h_store_32_unpackhi(uint16_t **dst, const ptrdiff_t stride, const __m128i *row) { const __m128i val = _mm_unpackhi_epi64(*row, *row); _mm_store_si128((__m128i *)(*dst), val); _mm_store_si128((__m128i *)(*dst + 8), val); _mm_store_si128((__m128i *)(*dst + 16), val); _mm_store_si128((__m128i *)(*dst + 24), val); *dst += stride; } void vpx_highbd_h_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { int i; (void)above; (void)bd; for (i = 0; i < 4; i++, left += 8) { const __m128i left_u16 = _mm_load_si128((const __m128i *)left); const __m128i row0 = _mm_shufflelo_epi16(left_u16, 0x0); const __m128i row1 = _mm_shufflelo_epi16(left_u16, 0x55); const __m128i row2 = _mm_shufflelo_epi16(left_u16, 0xaa); const __m128i row3 = _mm_shufflelo_epi16(left_u16, 0xff); const __m128i row4 = _mm_shufflehi_epi16(left_u16, 0x0); const __m128i row5 = _mm_shufflehi_epi16(left_u16, 0x55); const __m128i row6 = _mm_shufflehi_epi16(left_u16, 0xaa); const __m128i row7 = _mm_shufflehi_epi16(left_u16, 0xff); h_store_32_unpacklo(&dst, stride, &row0); h_store_32_unpacklo(&dst, stride, &row1); h_store_32_unpacklo(&dst, stride, &row2); h_store_32_unpacklo(&dst, stride, &row3); h_store_32_unpackhi(&dst, stride, &row4); h_store_32_unpackhi(&dst, stride, &row5); h_store_32_unpackhi(&dst, stride, &row6); h_store_32_unpackhi(&dst, stride, &row7); } } //------------------------------------------------------------------------------ // DC 4x4 static INLINE __m128i dc_sum_4(const uint16_t *ref) { const __m128i _dcba = _mm_loadl_epi64((const __m128i *)ref); const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe); const __m128i a = _mm_add_epi16(_dcba, _xxdc); return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1)); } static INLINE void dc_store_4x4(uint16_t *dst, ptrdiff_t stride, const __m128i *dc) { const __m128i dc_dup = _mm_shufflelo_epi16(*dc, 0x0); int i; for (i = 0; i < 4; ++i, dst += stride) { _mm_storel_epi64((__m128i *)dst, dc_dup); } } void vpx_highbd_dc_left_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i two = _mm_cvtsi32_si128(2); const __m128i sum = dc_sum_4(left); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2); (void)above; (void)bd; dc_store_4x4(dst, stride, &dc); } void vpx_highbd_dc_top_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i two = _mm_cvtsi32_si128(2); const __m128i sum = dc_sum_4(above); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, two), 2); (void)left; (void)bd; dc_store_4x4(dst, stride, &dc); } void vpx_highbd_dc_128_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1)); const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0); (void)above; (void)left; dc_store_4x4(dst, stride, &dc_dup); } //------------------------------------------------------------------------------ // DC 8x8 static INLINE __m128i dc_sum_8(const uint16_t *ref) { const __m128i ref_u16 = _mm_load_si128((const __m128i *)ref); const __m128i _dcba = _mm_add_epi16(ref_u16, _mm_srli_si128(ref_u16, 8)); const __m128i _xxdc = _mm_shufflelo_epi16(_dcba, 0xe); const __m128i a = _mm_add_epi16(_dcba, _xxdc); return _mm_add_epi16(a, _mm_shufflelo_epi16(a, 0x1)); } static INLINE void dc_store_8x8(uint16_t *dst, ptrdiff_t stride, const __m128i *dc) { const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0); const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo); int i; for (i = 0; i < 8; ++i, dst += stride) { _mm_store_si128((__m128i *)dst, dc_dup); } } void vpx_highbd_dc_left_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i four = _mm_cvtsi32_si128(4); const __m128i sum = dc_sum_8(left); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3); (void)above; (void)bd; dc_store_8x8(dst, stride, &dc); } void vpx_highbd_dc_top_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i four = _mm_cvtsi32_si128(4); const __m128i sum = dc_sum_8(above); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, four), 3); (void)left; (void)bd; dc_store_8x8(dst, stride, &dc); } void vpx_highbd_dc_128_predictor_8x8_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1)); const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0); (void)above; (void)left; dc_store_8x8(dst, stride, &dc_dup); } //------------------------------------------------------------------------------ // DC 16x16 static INLINE __m128i dc_sum_16(const uint16_t *ref) { const __m128i sum_lo = dc_sum_8(ref); const __m128i sum_hi = dc_sum_8(ref + 8); return _mm_add_epi16(sum_lo, sum_hi); } static INLINE void dc_store_16x16(uint16_t *dst, ptrdiff_t stride, const __m128i *dc) { const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0); const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo); int i; for (i = 0; i < 16; ++i, dst += stride) { _mm_store_si128((__m128i *)dst, dc_dup); _mm_store_si128((__m128i *)(dst + 8), dc_dup); } } void vpx_highbd_dc_left_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i eight = _mm_cvtsi32_si128(8); const __m128i sum = dc_sum_16(left); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4); (void)above; (void)bd; dc_store_16x16(dst, stride, &dc); } void vpx_highbd_dc_top_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i eight = _mm_cvtsi32_si128(8); const __m128i sum = dc_sum_16(above); const __m128i dc = _mm_srli_epi16(_mm_add_epi16(sum, eight), 4); (void)left; (void)bd; dc_store_16x16(dst, stride, &dc); } void vpx_highbd_dc_128_predictor_16x16_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1)); const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0); (void)above; (void)left; dc_store_16x16(dst, stride, &dc_dup); } //------------------------------------------------------------------------------ // DC 32x32 static INLINE __m128i dc_sum_32(const uint16_t *ref) { const __m128i zero = _mm_setzero_si128(); const __m128i sum_a = dc_sum_16(ref); const __m128i sum_b = dc_sum_16(ref + 16); // 12 bit bd will outrange, so expand to 32 bit before adding final total return _mm_add_epi32(_mm_unpacklo_epi16(sum_a, zero), _mm_unpacklo_epi16(sum_b, zero)); } static INLINE void dc_store_32x32(uint16_t *dst, ptrdiff_t stride, const __m128i *dc) { const __m128i dc_dup_lo = _mm_shufflelo_epi16(*dc, 0); const __m128i dc_dup = _mm_unpacklo_epi64(dc_dup_lo, dc_dup_lo); int i; for (i = 0; i < 32; ++i, dst += stride) { _mm_store_si128((__m128i *)dst, dc_dup); _mm_store_si128((__m128i *)(dst + 8), dc_dup); _mm_store_si128((__m128i *)(dst + 16), dc_dup); _mm_store_si128((__m128i *)(dst + 24), dc_dup); } } void vpx_highbd_dc_left_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i sixteen = _mm_cvtsi32_si128(16); const __m128i sum = dc_sum_32(left); const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5); (void)above; (void)bd; dc_store_32x32(dst, stride, &dc); } void vpx_highbd_dc_top_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i sixteen = _mm_cvtsi32_si128(16); const __m128i sum = dc_sum_32(above); const __m128i dc = _mm_srli_epi32(_mm_add_epi32(sum, sixteen), 5); (void)left; (void)bd; dc_store_32x32(dst, stride, &dc); } void vpx_highbd_dc_128_predictor_32x32_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i dc = _mm_cvtsi32_si128(1 << (bd - 1)); const __m128i dc_dup = _mm_shufflelo_epi16(dc, 0x0); (void)above; (void)left; dc_store_32x32(dst, stride, &dc_dup); } // ----------------------------------------------------------------------------- /* ; ------------------------------------------ ; input: x, y, z, result ; ; trick from pascal ; (x+2y+z+2)>>2 can be calculated as: ; result = avg(x,z) ; result -= xor(x,z) & 1 ; result = avg(result,y) ; ------------------------------------------ */ static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y, const __m128i *z) { const __m128i one = _mm_set1_epi16(1); const __m128i a = _mm_avg_epu16(*x, *z); const __m128i b = _mm_subs_epu16(a, _mm_and_si128(_mm_xor_si128(*x, *z), one)); return _mm_avg_epu16(b, *y); } void vpx_highbd_d117_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const __m128i XXXXABCD = _mm_loadu_si128((const __m128i *)(above - 4)); const __m128i KXXXABCD = _mm_insert_epi16(XXXXABCD, K, 0); const __m128i KJXXABCD = _mm_insert_epi16(KXXXABCD, J, 1); const __m128i KJIXABCD = _mm_insert_epi16(KJXXABCD, I, 2); const __m128i JIXABCD0 = _mm_srli_si128(KJIXABCD, 2); const __m128i IXABCD00 = _mm_srli_si128(KJIXABCD, 4); const __m128i avg2 = _mm_avg_epu16(KJIXABCD, JIXABCD0); const __m128i avg3 = avg3_epu16(&KJIXABCD, &JIXABCD0, &IXABCD00); const __m128i row0 = _mm_srli_si128(avg2, 6); const __m128i row1 = _mm_srli_si128(avg3, 4); const __m128i row2 = _mm_srli_si128(avg2, 4); const __m128i row3 = _mm_srli_si128(avg3, 2); (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); dst -= stride; dst[0] = _mm_extract_epi16(avg3, 1); dst[stride] = _mm_extract_epi16(avg3, 0); } void vpx_highbd_d135_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const __m128i XXXXABCD = _mm_loadu_si128((const __m128i *)(above - 4)); const __m128i KXXXABCD = _mm_insert_epi16(XXXXABCD, K, 0); const __m128i KJXXABCD = _mm_insert_epi16(KXXXABCD, J, 1); const __m128i KJIXABCD = _mm_insert_epi16(KJXXABCD, I, 2); const __m128i JIXABCD0 = _mm_srli_si128(KJIXABCD, 2); const __m128i LKJIXABC = _mm_insert_epi16(_mm_slli_si128(KJIXABCD, 2), L, 0); const __m128i avg3 = avg3_epu16(&JIXABCD0, &KJIXABCD, &LKJIXABC); const __m128i row0 = _mm_srli_si128(avg3, 6); const __m128i row1 = _mm_srli_si128(avg3, 4); const __m128i row2 = _mm_srli_si128(avg3, 2); const __m128i row3 = avg3; (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); } void vpx_highbd_d153_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const int I = left[0]; const int J = left[1]; const int K = left[2]; const int L = left[3]; const __m128i XXXXXABC = _mm_castps_si128( _mm_loadh_pi(_mm_setzero_ps(), (const __m64 *)(above - 1))); const __m128i LXXXXABC = _mm_insert_epi16(XXXXXABC, L, 0); const __m128i LKXXXABC = _mm_insert_epi16(LXXXXABC, K, 1); const __m128i LKJXXABC = _mm_insert_epi16(LKXXXABC, J, 2); const __m128i LKJIXABC = _mm_insert_epi16(LKJXXABC, I, 3); const __m128i KJIXABC0 = _mm_srli_si128(LKJIXABC, 2); const __m128i JIXABC00 = _mm_srli_si128(LKJIXABC, 4); const __m128i avg3 = avg3_epu16(&LKJIXABC, &KJIXABC0, &JIXABC00); const __m128i avg2 = _mm_avg_epu16(LKJIXABC, KJIXABC0); const __m128i row3 = _mm_unpacklo_epi16(avg2, avg3); const __m128i row2 = _mm_srli_si128(row3, 4); const __m128i row1 = _mm_srli_si128(row3, 8); const __m128i row0 = _mm_srli_si128(avg3, 4); (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst[0] = _mm_extract_epi16(avg2, 3); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); } void vpx_highbd_d207_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i IJKL0000 = _mm_load_si128((const __m128i *)left); const __m128i LLLL0000 = _mm_shufflelo_epi16(IJKL0000, 0xff); const __m128i IJKLLLLL = _mm_unpacklo_epi64(IJKL0000, LLLL0000); const __m128i JKLLLLL0 = _mm_srli_si128(IJKLLLLL, 2); const __m128i KLLLLL00 = _mm_srli_si128(IJKLLLLL, 4); const __m128i avg3 = avg3_epu16(&IJKLLLLL, &JKLLLLL0, &KLLLLL00); const __m128i avg2 = _mm_avg_epu16(IJKLLLLL, JKLLLLL0); const __m128i row0 = _mm_unpacklo_epi16(avg2, avg3); const __m128i row1 = _mm_srli_si128(row0, 4); const __m128i row2 = _mm_srli_si128(row0, 8); const __m128i row3 = LLLL0000; (void)above; (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); } void vpx_highbd_d63_predictor_4x4_sse2(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i ABCDEFGH = _mm_loadu_si128((const __m128i *)above); const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2); const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 4); const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGH0, &CDEFGH00); const __m128i avg2 = _mm_avg_epu16(ABCDEFGH, BCDEFGH0); const __m128i row0 = avg2; const __m128i row1 = avg3; const __m128i row2 = _mm_srli_si128(avg2, 2); const __m128i row3 = _mm_srli_si128(avg3, 2); (void)left; (void)bd; _mm_storel_epi64((__m128i *)dst, row0); dst += stride; _mm_storel_epi64((__m128i *)dst, row1); dst += stride; _mm_storel_epi64((__m128i *)dst, row2); dst += stride; _mm_storel_epi64((__m128i *)dst, row3); } libvpx-1.8.2/vpx_dsp/x86/highbd_intrapred_intrin_ssse3.c000066400000000000000000001227461357355204000233220ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" // ----------------------------------------------------------------------------- /* ; ------------------------------------------ ; input: x, y, z, result ; ; trick from pascal ; (x+2y+z+2)>>2 can be calculated as: ; result = avg(x,z) ; result -= xor(x,z) & 1 ; result = avg(result,y) ; ------------------------------------------ */ static INLINE __m128i avg3_epu16(const __m128i *x, const __m128i *y, const __m128i *z) { const __m128i one = _mm_set1_epi16(1); const __m128i a = _mm_avg_epu16(*x, *z); const __m128i b = _mm_subs_epu16(a, _mm_and_si128(_mm_xor_si128(*x, *z), one)); return _mm_avg_epu16(b, *y); } void vpx_highbd_d45_predictor_4x4_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i ABCDEFGH = _mm_loadu_si128((const __m128i *)above); const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2); const __m128i CDEFGH00 = _mm_srli_si128(ABCDEFGH, 4); const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGH0, &CDEFGH00); (void)left; (void)bd; _mm_storel_epi64((__m128i *)dst, avg3); dst += stride; _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 2)); dst += stride; _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 4)); dst += stride; _mm_storel_epi64((__m128i *)dst, _mm_srli_si128(avg3, 6)); dst[3] = above[7]; // aka H } static INLINE void d45_store_8(uint16_t **dst, const ptrdiff_t stride, __m128i *row, const __m128i *ar) { *row = _mm_alignr_epi8(*ar, *row, 2); _mm_store_si128((__m128i *)*dst, *row); *dst += stride; } void vpx_highbd_d45_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above); const __m128i ABCDHHHH = _mm_shufflehi_epi16(ABCDEFGH, 0xff); const __m128i HHHHHHHH = _mm_unpackhi_epi64(ABCDHHHH, ABCDHHHH); const __m128i BCDEFGHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 2); const __m128i CDEFGHHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 4); __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGHH, &CDEFGHHH); (void)left; (void)bd; _mm_store_si128((__m128i *)dst, avg3); dst += stride; d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); d45_store_8(&dst, stride, &avg3, &HHHHHHHH); } static INLINE void d45_store_16(uint16_t **dst, const ptrdiff_t stride, __m128i *row_0, __m128i *row_1, const __m128i *ar) { *row_0 = _mm_alignr_epi8(*row_1, *row_0, 2); *row_1 = _mm_alignr_epi8(*ar, *row_1, 2); _mm_store_si128((__m128i *)*dst, *row_0); _mm_store_si128((__m128i *)(*dst + 8), *row_1); *dst += stride; } void vpx_highbd_d45_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i AR0 = _mm_shufflehi_epi16(A1, 0xff); const __m128i AR = _mm_unpackhi_epi64(AR0, AR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(AR, A1, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(AR, A1, 4); __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); (void)left; (void)bd; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); dst += stride; d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); d45_store_16(&dst, stride, &avg3_0, &avg3_1, &AR); } void vpx_highbd_d45_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i A2 = _mm_load_si128((const __m128i *)(above + 16)); const __m128i A3 = _mm_load_si128((const __m128i *)(above + 24)); const __m128i AR0 = _mm_shufflehi_epi16(A3, 0xff); const __m128i AR = _mm_unpackhi_epi64(AR0, AR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(A2, A1, 2); const __m128i B2 = _mm_alignr_epi8(A3, A2, 2); const __m128i B3 = _mm_alignr_epi8(AR, A3, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(A2, A1, 4); const __m128i C2 = _mm_alignr_epi8(A3, A2, 4); const __m128i C3 = _mm_alignr_epi8(AR, A3, 4); __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); int i; (void)left; (void)bd; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); _mm_store_si128((__m128i *)(dst + 16), avg3_2); _mm_store_si128((__m128i *)(dst + 24), avg3_3); dst += stride; for (i = 1; i < 32; ++i) { avg3_0 = _mm_alignr_epi8(avg3_1, avg3_0, 2); avg3_1 = _mm_alignr_epi8(avg3_2, avg3_1, 2); avg3_2 = _mm_alignr_epi8(avg3_3, avg3_2, 2); avg3_3 = _mm_alignr_epi8(AR, avg3_3, 2); _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); _mm_store_si128((__m128i *)(dst + 16), avg3_2); _mm_store_si128((__m128i *)(dst + 24), avg3_3); dst += stride; } } DECLARE_ALIGNED(16, static const uint8_t, rotate_right_epu16[16]) = { 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 0, 1 }; static INLINE __m128i rotr_epu16(__m128i *a, const __m128i *rotrw) { *a = _mm_shuffle_epi8(*a, *rotrw); return *a; } void vpx_highbd_d117_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above); const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left); const __m128i IXABCDEF = _mm_alignr_epi8(XABCDEFG, _mm_slli_si128(IJKLMNOP, 14), 14); const __m128i avg3 = avg3_epu16(&ABCDEFGH, &XABCDEFG, &IXABCDEF); const __m128i avg2 = _mm_avg_epu16(ABCDEFGH, XABCDEFG); const __m128i XIJKLMNO = _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14); const __m128i JKLMNOP0 = _mm_srli_si128(IJKLMNOP, 2); __m128i avg3_left = avg3_epu16(&XIJKLMNO, &IJKLMNOP, &JKLMNOP0); __m128i rowa = avg2; __m128i rowb = avg3; int i; (void)bd; for (i = 0; i < 8; i += 2) { _mm_store_si128((__m128i *)dst, rowa); dst += stride; _mm_store_si128((__m128i *)dst, rowb); dst += stride; rowa = _mm_alignr_epi8(rowa, rotr_epu16(&avg3_left, &rotrw), 14); rowb = _mm_alignr_epi8(rowb, rotr_epu16(&avg3_left, &rotrw), 14); } } void vpx_highbd_d117_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i B0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i B1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i avg2_0 = _mm_avg_epu16(A0, B0); const __m128i avg2_1 = _mm_avg_epu16(A1, B1); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i C0 = _mm_alignr_epi8(B0, _mm_slli_si128(L0, 14), 14); const __m128i C1 = _mm_alignr_epi8(B1, B0, 14); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(B0, 14), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i L0_ = _mm_alignr_epi8(L1, L0, 2); const __m128i L1_ = _mm_srli_si128(L1, 2); __m128i rowa_0 = avg2_0; __m128i rowa_1 = avg2_1; __m128i rowb_0 = avg3_0; __m128i rowb_1 = avg3_1; __m128i avg3_left[2]; int i, j; (void)bd; avg3_left[0] = avg3_epu16(&XL0, &L0, &L0_); avg3_left[1] = avg3_epu16(&XL1, &L1, &L1_); for (i = 0; i < 2; ++i) { __m128i avg_left = avg3_left[i]; for (j = 0; j < 8; j += 2) { _mm_store_si128((__m128i *)dst, rowa_0); _mm_store_si128((__m128i *)(dst + 8), rowa_1); dst += stride; _mm_store_si128((__m128i *)dst, rowb_0); _mm_store_si128((__m128i *)(dst + 8), rowb_1); dst += stride; rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14); rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14); rowb_1 = _mm_alignr_epi8(rowb_1, rowb_0, 14); rowb_0 = _mm_alignr_epi8(rowb_0, rotr_epu16(&avg_left, &rotrw), 14); } } } void vpx_highbd_d117_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i A2 = _mm_load_si128((const __m128i *)(above + 16)); const __m128i A3 = _mm_load_si128((const __m128i *)(above + 24)); const __m128i B0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i B1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i B2 = _mm_loadu_si128((const __m128i *)(above + 15)); const __m128i B3 = _mm_loadu_si128((const __m128i *)(above + 23)); const __m128i avg2_0 = _mm_avg_epu16(A0, B0); const __m128i avg2_1 = _mm_avg_epu16(A1, B1); const __m128i avg2_2 = _mm_avg_epu16(A2, B2); const __m128i avg2_3 = _mm_avg_epu16(A3, B3); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16)); const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24)); const __m128i C0 = _mm_alignr_epi8(B0, _mm_slli_si128(L0, 14), 14); const __m128i C1 = _mm_alignr_epi8(B1, B0, 14); const __m128i C2 = _mm_alignr_epi8(B2, B1, 14); const __m128i C3 = _mm_alignr_epi8(B3, B2, 14); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(B0, 14), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14); const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14); const __m128i L0_ = _mm_alignr_epi8(L1, L0, 2); const __m128i L1_ = _mm_alignr_epi8(L2, L1, 2); const __m128i L2_ = _mm_alignr_epi8(L3, L2, 2); const __m128i L3_ = _mm_srli_si128(L3, 2); __m128i rowa_0 = avg2_0; __m128i rowa_1 = avg2_1; __m128i rowa_2 = avg2_2; __m128i rowa_3 = avg2_3; __m128i rowb_0 = avg3_0; __m128i rowb_1 = avg3_1; __m128i rowb_2 = avg3_2; __m128i rowb_3 = avg3_3; __m128i avg3_left[4]; int i, j; (void)bd; avg3_left[0] = avg3_epu16(&XL0, &L0, &L0_); avg3_left[1] = avg3_epu16(&XL1, &L1, &L1_); avg3_left[2] = avg3_epu16(&XL2, &L2, &L2_); avg3_left[3] = avg3_epu16(&XL3, &L3, &L3_); for (i = 0; i < 4; ++i) { __m128i avg_left = avg3_left[i]; for (j = 0; j < 8; j += 2) { _mm_store_si128((__m128i *)dst, rowa_0); _mm_store_si128((__m128i *)(dst + 8), rowa_1); _mm_store_si128((__m128i *)(dst + 16), rowa_2); _mm_store_si128((__m128i *)(dst + 24), rowa_3); dst += stride; _mm_store_si128((__m128i *)dst, rowb_0); _mm_store_si128((__m128i *)(dst + 8), rowb_1); _mm_store_si128((__m128i *)(dst + 16), rowb_2); _mm_store_si128((__m128i *)(dst + 24), rowb_3); dst += stride; rowa_3 = _mm_alignr_epi8(rowa_3, rowa_2, 14); rowa_2 = _mm_alignr_epi8(rowa_2, rowa_1, 14); rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14); rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14); rowb_3 = _mm_alignr_epi8(rowb_3, rowb_2, 14); rowb_2 = _mm_alignr_epi8(rowb_2, rowb_1, 14); rowb_1 = _mm_alignr_epi8(rowb_1, rowb_0, 14); rowb_0 = _mm_alignr_epi8(rowb_0, rotr_epu16(&avg_left, &rotrw), 14); } } } void vpx_highbd_d135_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above); const __m128i BCDEFGH0 = _mm_srli_si128(ABCDEFGH, 2); const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left); const __m128i XIJKLMNO = _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14); const __m128i AXIJKLMN = _mm_alignr_epi8(XIJKLMNO, _mm_slli_si128(ABCDEFGH, 14), 14); const __m128i avg3 = avg3_epu16(&XABCDEFG, &ABCDEFGH, &BCDEFGH0); __m128i avg3_left = avg3_epu16(&IJKLMNOP, &XIJKLMNO, &AXIJKLMN); __m128i rowa = avg3; int i; (void)bd; for (i = 0; i < 8; ++i) { rowa = _mm_alignr_epi8(rowa, rotr_epu16(&avg3_left, &rotrw), 14); _mm_store_si128((__m128i *)dst, rowa); dst += stride; } } void vpx_highbd_d135_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i B0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i B1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i C0 = _mm_alignr_epi8(B1, B0, 2); const __m128i C1 = _mm_srli_si128(B1, 2); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i L0_ = _mm_alignr_epi8(XL0, _mm_slli_si128(B0, 14), 14); const __m128i L1_ = _mm_alignr_epi8(XL1, XL0, 14); __m128i rowa_0 = avg3_0; __m128i rowa_1 = avg3_1; __m128i avg3_left[2]; int i, j; (void)bd; avg3_left[0] = avg3_epu16(&L0, &XL0, &L0_); avg3_left[1] = avg3_epu16(&L1, &XL1, &L1_); for (i = 0; i < 2; ++i) { __m128i avg_left = avg3_left[i]; for (j = 0; j < 8; ++j) { rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14); rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14); _mm_store_si128((__m128i *)dst, rowa_0); _mm_store_si128((__m128i *)(dst + 8), rowa_1); dst += stride; } } } void vpx_highbd_d135_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i rotrw = _mm_load_si128((const __m128i *)rotate_right_epu16); const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i A2 = _mm_loadu_si128((const __m128i *)(above + 15)); const __m128i A3 = _mm_loadu_si128((const __m128i *)(above + 23)); const __m128i B0 = _mm_load_si128((const __m128i *)above); const __m128i B1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i B2 = _mm_load_si128((const __m128i *)(above + 16)); const __m128i B3 = _mm_load_si128((const __m128i *)(above + 24)); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16)); const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24)); const __m128i C0 = _mm_alignr_epi8(B1, B0, 2); const __m128i C1 = _mm_alignr_epi8(B2, B1, 2); const __m128i C2 = _mm_alignr_epi8(B3, B2, 2); const __m128i C3 = _mm_srli_si128(B3, 2); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14); const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14); const __m128i L0_ = _mm_alignr_epi8(XL0, _mm_slli_si128(B0, 14), 14); const __m128i L1_ = _mm_alignr_epi8(XL1, XL0, 14); const __m128i L2_ = _mm_alignr_epi8(XL2, XL1, 14); const __m128i L3_ = _mm_alignr_epi8(XL3, XL2, 14); __m128i rowa_0 = avg3_0; __m128i rowa_1 = avg3_1; __m128i rowa_2 = avg3_2; __m128i rowa_3 = avg3_3; __m128i avg3_left[4]; int i, j; (void)bd; avg3_left[0] = avg3_epu16(&L0, &XL0, &L0_); avg3_left[1] = avg3_epu16(&L1, &XL1, &L1_); avg3_left[2] = avg3_epu16(&L2, &XL2, &L2_); avg3_left[3] = avg3_epu16(&L3, &XL3, &L3_); for (i = 0; i < 4; ++i) { __m128i avg_left = avg3_left[i]; for (j = 0; j < 8; ++j) { rowa_3 = _mm_alignr_epi8(rowa_3, rowa_2, 14); rowa_2 = _mm_alignr_epi8(rowa_2, rowa_1, 14); rowa_1 = _mm_alignr_epi8(rowa_1, rowa_0, 14); rowa_0 = _mm_alignr_epi8(rowa_0, rotr_epu16(&avg_left, &rotrw), 14); _mm_store_si128((__m128i *)dst, rowa_0); _mm_store_si128((__m128i *)(dst + 8), rowa_1); _mm_store_si128((__m128i *)(dst + 16), rowa_2); _mm_store_si128((__m128i *)(dst + 24), rowa_3); dst += stride; } } } void vpx_highbd_d153_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i XABCDEFG = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i ABCDEFG0 = _mm_srli_si128(XABCDEFG, 2); const __m128i BCDEFG00 = _mm_srli_si128(XABCDEFG, 4); const __m128i avg3 = avg3_epu16(&BCDEFG00, &ABCDEFG0, &XABCDEFG); const __m128i IJKLMNOP = _mm_load_si128((const __m128i *)left); const __m128i XIJKLMNO = _mm_alignr_epi8(IJKLMNOP, _mm_slli_si128(XABCDEFG, 14), 14); const __m128i AXIJKLMN = _mm_alignr_epi8(XIJKLMNO, _mm_slli_si128(XABCDEFG, 12), 14); const __m128i avg3_left = avg3_epu16(&IJKLMNOP, &XIJKLMNO, &AXIJKLMN); const __m128i avg2_left = _mm_avg_epu16(IJKLMNOP, XIJKLMNO); const __m128i avg2_avg3_lo = _mm_unpacklo_epi16(avg2_left, avg3_left); const __m128i avg2_avg3_hi = _mm_unpackhi_epi16(avg2_left, avg3_left); const __m128i row0 = _mm_alignr_epi8(avg3, _mm_slli_si128(avg2_avg3_lo, 12), 12); const __m128i row1 = _mm_alignr_epi8(row0, _mm_slli_si128(avg2_avg3_lo, 8), 12); const __m128i row2 = _mm_alignr_epi8(row1, _mm_slli_si128(avg2_avg3_lo, 4), 12); const __m128i row3 = _mm_alignr_epi8(row2, avg2_avg3_lo, 12); const __m128i row4 = _mm_alignr_epi8(row3, _mm_slli_si128(avg2_avg3_hi, 12), 12); const __m128i row5 = _mm_alignr_epi8(row4, _mm_slli_si128(avg2_avg3_hi, 8), 12); const __m128i row6 = _mm_alignr_epi8(row5, _mm_slli_si128(avg2_avg3_hi, 4), 12); const __m128i row7 = _mm_alignr_epi8(row6, avg2_avg3_hi, 12); (void)bd; _mm_store_si128((__m128i *)dst, row0); dst += stride; _mm_store_si128((__m128i *)dst, row1); dst += stride; _mm_store_si128((__m128i *)dst, row2); dst += stride; _mm_store_si128((__m128i *)dst, row3); dst += stride; _mm_store_si128((__m128i *)dst, row4); dst += stride; _mm_store_si128((__m128i *)dst, row5); dst += stride; _mm_store_si128((__m128i *)dst, row6); dst += stride; _mm_store_si128((__m128i *)dst, row7); } void vpx_highbd_d153_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_srli_si128(A1, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_srli_si128(A1, 4); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14); const __m128i AXL0 = _mm_alignr_epi8(XL0, _mm_slli_si128(A0, 12), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i AXL1 = _mm_alignr_epi8(L1, L0, 12); const __m128i avg3_left_0 = avg3_epu16(&L0, &XL0, &AXL0); const __m128i avg2_left_0 = _mm_avg_epu16(L0, XL0); const __m128i avg3_left_1 = avg3_epu16(&L1, &XL1, &AXL1); const __m128i avg2_left_1 = _mm_avg_epu16(L1, XL1); __m128i row_0 = avg3_0; __m128i row_1 = avg3_1; __m128i avg2_avg3_left[2][2]; int i, j; (void)bd; avg2_avg3_left[0][0] = _mm_unpacklo_epi16(avg2_left_0, avg3_left_0); avg2_avg3_left[0][1] = _mm_unpackhi_epi16(avg2_left_0, avg3_left_0); avg2_avg3_left[1][0] = _mm_unpacklo_epi16(avg2_left_1, avg3_left_1); avg2_avg3_left[1][1] = _mm_unpackhi_epi16(avg2_left_1, avg3_left_1); for (j = 0; j < 2; ++j) { for (i = 0; i < 2; ++i) { const __m128i avg2_avg3 = avg2_avg3_left[j][i]; row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 12), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); dst += stride; row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 8), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); dst += stride; row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 4), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); dst += stride; row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, avg2_avg3, 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); dst += stride; } } } void vpx_highbd_d153_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_loadu_si128((const __m128i *)(above - 1)); const __m128i A1 = _mm_loadu_si128((const __m128i *)(above + 7)); const __m128i A2 = _mm_loadu_si128((const __m128i *)(above + 15)); const __m128i A3 = _mm_loadu_si128((const __m128i *)(above + 23)); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(A2, A1, 2); const __m128i B2 = _mm_alignr_epi8(A3, A2, 2); const __m128i B3 = _mm_srli_si128(A3, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(A2, A1, 4); const __m128i C2 = _mm_alignr_epi8(A3, A2, 4); const __m128i C3 = _mm_srli_si128(A3, 4); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); const __m128i L0 = _mm_load_si128((const __m128i *)left); const __m128i L1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i L2 = _mm_load_si128((const __m128i *)(left + 16)); const __m128i L3 = _mm_load_si128((const __m128i *)(left + 24)); const __m128i XL0 = _mm_alignr_epi8(L0, _mm_slli_si128(A0, 14), 14); const __m128i XL1 = _mm_alignr_epi8(L1, L0, 14); const __m128i XL2 = _mm_alignr_epi8(L2, L1, 14); const __m128i XL3 = _mm_alignr_epi8(L3, L2, 14); const __m128i AXL0 = _mm_alignr_epi8(XL0, _mm_slli_si128(A0, 12), 14); const __m128i AXL1 = _mm_alignr_epi8(L1, L0, 12); const __m128i AXL2 = _mm_alignr_epi8(L2, L1, 12); const __m128i AXL3 = _mm_alignr_epi8(L3, L2, 12); const __m128i avg3_left_0 = avg3_epu16(&L0, &XL0, &AXL0); const __m128i avg3_left_1 = avg3_epu16(&L1, &XL1, &AXL1); const __m128i avg3_left_2 = avg3_epu16(&L2, &XL2, &AXL2); const __m128i avg3_left_3 = avg3_epu16(&L3, &XL3, &AXL3); const __m128i avg2_left_0 = _mm_avg_epu16(L0, XL0); const __m128i avg2_left_1 = _mm_avg_epu16(L1, XL1); const __m128i avg2_left_2 = _mm_avg_epu16(L2, XL2); const __m128i avg2_left_3 = _mm_avg_epu16(L3, XL3); __m128i row_0 = avg3_0; __m128i row_1 = avg3_1; __m128i row_2 = avg3_2; __m128i row_3 = avg3_3; __m128i avg2_avg3_left[4][2]; int i, j; (void)bd; avg2_avg3_left[0][0] = _mm_unpacklo_epi16(avg2_left_0, avg3_left_0); avg2_avg3_left[0][1] = _mm_unpackhi_epi16(avg2_left_0, avg3_left_0); avg2_avg3_left[1][0] = _mm_unpacklo_epi16(avg2_left_1, avg3_left_1); avg2_avg3_left[1][1] = _mm_unpackhi_epi16(avg2_left_1, avg3_left_1); avg2_avg3_left[2][0] = _mm_unpacklo_epi16(avg2_left_2, avg3_left_2); avg2_avg3_left[2][1] = _mm_unpackhi_epi16(avg2_left_2, avg3_left_2); avg2_avg3_left[3][0] = _mm_unpacklo_epi16(avg2_left_3, avg3_left_3); avg2_avg3_left[3][1] = _mm_unpackhi_epi16(avg2_left_3, avg3_left_3); for (j = 0; j < 4; ++j) { for (i = 0; i < 2; ++i) { const __m128i avg2_avg3 = avg2_avg3_left[j][i]; row_3 = _mm_alignr_epi8(row_3, row_2, 12); row_2 = _mm_alignr_epi8(row_2, row_1, 12); row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 12), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); _mm_store_si128((__m128i *)(dst + 16), row_2); _mm_store_si128((__m128i *)(dst + 24), row_3); dst += stride; row_3 = _mm_alignr_epi8(row_3, row_2, 12); row_2 = _mm_alignr_epi8(row_2, row_1, 12); row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 8), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); _mm_store_si128((__m128i *)(dst + 16), row_2); _mm_store_si128((__m128i *)(dst + 24), row_3); dst += stride; row_3 = _mm_alignr_epi8(row_3, row_2, 12); row_2 = _mm_alignr_epi8(row_2, row_1, 12); row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, _mm_slli_si128(avg2_avg3, 4), 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); _mm_store_si128((__m128i *)(dst + 16), row_2); _mm_store_si128((__m128i *)(dst + 24), row_3); dst += stride; row_3 = _mm_alignr_epi8(row_3, row_2, 12); row_2 = _mm_alignr_epi8(row_2, row_1, 12); row_1 = _mm_alignr_epi8(row_1, row_0, 12); row_0 = _mm_alignr_epi8(row_0, avg2_avg3, 12); _mm_store_si128((__m128i *)dst, row_0); _mm_store_si128((__m128i *)(dst + 8), row_1); _mm_store_si128((__m128i *)(dst + 16), row_2); _mm_store_si128((__m128i *)(dst + 24), row_3); dst += stride; } } } static INLINE void d207_store_4x8(uint16_t **dst, const ptrdiff_t stride, const __m128i *a, const __m128i *b) { _mm_store_si128((__m128i *)*dst, *a); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 4)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 8)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 12)); *dst += stride; } void vpx_highbd_d207_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)left); const __m128i ABCDHHHH = _mm_shufflehi_epi16(ABCDEFGH, 0xff); const __m128i HHHHHHHH = _mm_unpackhi_epi64(ABCDHHHH, ABCDHHHH); const __m128i BCDEFGHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 2); const __m128i CDEFGHHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 4); const __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGHH, &CDEFGHHH); const __m128i avg2 = _mm_avg_epu16(ABCDEFGH, BCDEFGHH); const __m128i out_a = _mm_unpacklo_epi16(avg2, avg3); const __m128i out_b = _mm_unpackhi_epi16(avg2, avg3); (void)above; (void)bd; d207_store_4x8(&dst, stride, &out_a, &out_b); d207_store_4x8(&dst, stride, &out_b, &HHHHHHHH); } static INLINE void d207_store_4x16(uint16_t **dst, const ptrdiff_t stride, const __m128i *a, const __m128i *b, const __m128i *c) { _mm_store_si128((__m128i *)*dst, *a); _mm_store_si128((__m128i *)(*dst + 8), *b); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 4)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 4)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 8)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 8)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 12)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 12)); *dst += stride; } void vpx_highbd_d207_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)left); const __m128i A1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i LR0 = _mm_shufflehi_epi16(A1, 0xff); const __m128i LR = _mm_unpackhi_epi64(LR0, LR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(LR, A1, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(LR, A1, 4); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i avg2_0 = _mm_avg_epu16(A0, B0); const __m128i avg2_1 = _mm_avg_epu16(A1, B1); const __m128i out_a = _mm_unpacklo_epi16(avg2_0, avg3_0); const __m128i out_b = _mm_unpackhi_epi16(avg2_0, avg3_0); const __m128i out_c = _mm_unpacklo_epi16(avg2_1, avg3_1); const __m128i out_d = _mm_unpackhi_epi16(avg2_1, avg3_1); (void)above; (void)bd; d207_store_4x16(&dst, stride, &out_a, &out_b, &out_c); d207_store_4x16(&dst, stride, &out_b, &out_c, &out_d); d207_store_4x16(&dst, stride, &out_c, &out_d, &LR); d207_store_4x16(&dst, stride, &out_d, &LR, &LR); } static INLINE void d207_store_4x32(uint16_t **dst, const ptrdiff_t stride, const __m128i *a, const __m128i *b, const __m128i *c, const __m128i *d, const __m128i *e) { _mm_store_si128((__m128i *)*dst, *a); _mm_store_si128((__m128i *)(*dst + 8), *b); _mm_store_si128((__m128i *)(*dst + 16), *c); _mm_store_si128((__m128i *)(*dst + 24), *d); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 4)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 4)); _mm_store_si128((__m128i *)(*dst + 16), _mm_alignr_epi8(*d, *c, 4)); _mm_store_si128((__m128i *)(*dst + 24), _mm_alignr_epi8(*e, *d, 4)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 8)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 8)); _mm_store_si128((__m128i *)(*dst + 16), _mm_alignr_epi8(*d, *c, 8)); _mm_store_si128((__m128i *)(*dst + 24), _mm_alignr_epi8(*e, *d, 8)); *dst += stride; _mm_store_si128((__m128i *)*dst, _mm_alignr_epi8(*b, *a, 12)); _mm_store_si128((__m128i *)(*dst + 8), _mm_alignr_epi8(*c, *b, 12)); _mm_store_si128((__m128i *)(*dst + 16), _mm_alignr_epi8(*d, *c, 12)); _mm_store_si128((__m128i *)(*dst + 24), _mm_alignr_epi8(*e, *d, 12)); *dst += stride; } void vpx_highbd_d207_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)left); const __m128i A1 = _mm_load_si128((const __m128i *)(left + 8)); const __m128i A2 = _mm_load_si128((const __m128i *)(left + 16)); const __m128i A3 = _mm_load_si128((const __m128i *)(left + 24)); const __m128i LR0 = _mm_shufflehi_epi16(A3, 0xff); const __m128i LR = _mm_unpackhi_epi64(LR0, LR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(A2, A1, 2); const __m128i B2 = _mm_alignr_epi8(A3, A2, 2); const __m128i B3 = _mm_alignr_epi8(LR, A3, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(A2, A1, 4); const __m128i C2 = _mm_alignr_epi8(A3, A2, 4); const __m128i C3 = _mm_alignr_epi8(LR, A3, 4); const __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); const __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); const __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); const __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); const __m128i avg2_0 = _mm_avg_epu16(A0, B0); const __m128i avg2_1 = _mm_avg_epu16(A1, B1); const __m128i avg2_2 = _mm_avg_epu16(A2, B2); const __m128i avg2_3 = _mm_avg_epu16(A3, B3); const __m128i out_a = _mm_unpacklo_epi16(avg2_0, avg3_0); const __m128i out_b = _mm_unpackhi_epi16(avg2_0, avg3_0); const __m128i out_c = _mm_unpacklo_epi16(avg2_1, avg3_1); const __m128i out_d = _mm_unpackhi_epi16(avg2_1, avg3_1); const __m128i out_e = _mm_unpacklo_epi16(avg2_2, avg3_2); const __m128i out_f = _mm_unpackhi_epi16(avg2_2, avg3_2); const __m128i out_g = _mm_unpacklo_epi16(avg2_3, avg3_3); const __m128i out_h = _mm_unpackhi_epi16(avg2_3, avg3_3); (void)above; (void)bd; d207_store_4x32(&dst, stride, &out_a, &out_b, &out_c, &out_d, &out_e); d207_store_4x32(&dst, stride, &out_b, &out_c, &out_d, &out_e, &out_f); d207_store_4x32(&dst, stride, &out_c, &out_d, &out_e, &out_f, &out_g); d207_store_4x32(&dst, stride, &out_d, &out_e, &out_f, &out_g, &out_h); d207_store_4x32(&dst, stride, &out_e, &out_f, &out_g, &out_h, &LR); d207_store_4x32(&dst, stride, &out_f, &out_g, &out_h, &LR, &LR); d207_store_4x32(&dst, stride, &out_g, &out_h, &LR, &LR, &LR); d207_store_4x32(&dst, stride, &out_h, &LR, &LR, &LR, &LR); } static INLINE void d63_store_4x8(uint16_t **dst, const ptrdiff_t stride, __m128i *a, __m128i *b, const __m128i *ar) { _mm_store_si128((__m128i *)*dst, *a); *dst += stride; _mm_store_si128((__m128i *)*dst, *b); *dst += stride; *a = _mm_alignr_epi8(*ar, *a, 2); *b = _mm_alignr_epi8(*ar, *b, 2); _mm_store_si128((__m128i *)*dst, *a); *dst += stride; _mm_store_si128((__m128i *)*dst, *b); *dst += stride; *a = _mm_alignr_epi8(*ar, *a, 2); *b = _mm_alignr_epi8(*ar, *b, 2); } void vpx_highbd_d63_predictor_8x8_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i ABCDEFGH = _mm_load_si128((const __m128i *)above); const __m128i ABCDHHHH = _mm_shufflehi_epi16(ABCDEFGH, 0xff); const __m128i HHHHHHHH = _mm_unpackhi_epi64(ABCDHHHH, ABCDHHHH); const __m128i BCDEFGHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 2); const __m128i CDEFGHHH = _mm_alignr_epi8(HHHHHHHH, ABCDEFGH, 4); __m128i avg3 = avg3_epu16(&ABCDEFGH, &BCDEFGHH, &CDEFGHHH); __m128i avg2 = _mm_avg_epu16(ABCDEFGH, BCDEFGHH); (void)left; (void)bd; d63_store_4x8(&dst, stride, &avg2, &avg3, &HHHHHHHH); d63_store_4x8(&dst, stride, &avg2, &avg3, &HHHHHHHH); } void vpx_highbd_d63_predictor_16x16_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i AR0 = _mm_shufflehi_epi16(A1, 0xff); const __m128i AR = _mm_unpackhi_epi64(AR0, AR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(AR, A1, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(AR, A1, 4); __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); __m128i avg2_0 = _mm_avg_epu16(A0, B0); __m128i avg2_1 = _mm_avg_epu16(A1, B1); int i; (void)left; (void)bd; for (i = 0; i < 14; i += 2) { _mm_store_si128((__m128i *)dst, avg2_0); _mm_store_si128((__m128i *)(dst + 8), avg2_1); dst += stride; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); dst += stride; avg2_0 = _mm_alignr_epi8(avg2_1, avg2_0, 2); avg2_1 = _mm_alignr_epi8(AR, avg2_1, 2); avg3_0 = _mm_alignr_epi8(avg3_1, avg3_0, 2); avg3_1 = _mm_alignr_epi8(AR, avg3_1, 2); } _mm_store_si128((__m128i *)dst, avg2_0); _mm_store_si128((__m128i *)(dst + 8), avg2_1); dst += stride; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); } void vpx_highbd_d63_predictor_32x32_ssse3(uint16_t *dst, ptrdiff_t stride, const uint16_t *above, const uint16_t *left, int bd) { const __m128i A0 = _mm_load_si128((const __m128i *)above); const __m128i A1 = _mm_load_si128((const __m128i *)(above + 8)); const __m128i A2 = _mm_load_si128((const __m128i *)(above + 16)); const __m128i A3 = _mm_load_si128((const __m128i *)(above + 24)); const __m128i AR0 = _mm_shufflehi_epi16(A3, 0xff); const __m128i AR = _mm_unpackhi_epi64(AR0, AR0); const __m128i B0 = _mm_alignr_epi8(A1, A0, 2); const __m128i B1 = _mm_alignr_epi8(A2, A1, 2); const __m128i B2 = _mm_alignr_epi8(A3, A2, 2); const __m128i B3 = _mm_alignr_epi8(AR, A3, 2); const __m128i C0 = _mm_alignr_epi8(A1, A0, 4); const __m128i C1 = _mm_alignr_epi8(A2, A1, 4); const __m128i C2 = _mm_alignr_epi8(A3, A2, 4); const __m128i C3 = _mm_alignr_epi8(AR, A3, 4); __m128i avg3_0 = avg3_epu16(&A0, &B0, &C0); __m128i avg3_1 = avg3_epu16(&A1, &B1, &C1); __m128i avg3_2 = avg3_epu16(&A2, &B2, &C2); __m128i avg3_3 = avg3_epu16(&A3, &B3, &C3); __m128i avg2_0 = _mm_avg_epu16(A0, B0); __m128i avg2_1 = _mm_avg_epu16(A1, B1); __m128i avg2_2 = _mm_avg_epu16(A2, B2); __m128i avg2_3 = _mm_avg_epu16(A3, B3); int i; (void)left; (void)bd; for (i = 0; i < 30; i += 2) { _mm_store_si128((__m128i *)dst, avg2_0); _mm_store_si128((__m128i *)(dst + 8), avg2_1); _mm_store_si128((__m128i *)(dst + 16), avg2_2); _mm_store_si128((__m128i *)(dst + 24), avg2_3); dst += stride; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); _mm_store_si128((__m128i *)(dst + 16), avg3_2); _mm_store_si128((__m128i *)(dst + 24), avg3_3); dst += stride; avg2_0 = _mm_alignr_epi8(avg2_1, avg2_0, 2); avg2_1 = _mm_alignr_epi8(avg2_2, avg2_1, 2); avg2_2 = _mm_alignr_epi8(avg2_3, avg2_2, 2); avg2_3 = _mm_alignr_epi8(AR, avg2_3, 2); avg3_0 = _mm_alignr_epi8(avg3_1, avg3_0, 2); avg3_1 = _mm_alignr_epi8(avg3_2, avg3_1, 2); avg3_2 = _mm_alignr_epi8(avg3_3, avg3_2, 2); avg3_3 = _mm_alignr_epi8(AR, avg3_3, 2); } _mm_store_si128((__m128i *)dst, avg2_0); _mm_store_si128((__m128i *)(dst + 8), avg2_1); _mm_store_si128((__m128i *)(dst + 16), avg2_2); _mm_store_si128((__m128i *)(dst + 24), avg2_3); dst += stride; _mm_store_si128((__m128i *)dst, avg3_0); _mm_store_si128((__m128i *)(dst + 8), avg3_1); _mm_store_si128((__m128i *)(dst + 16), avg3_2); _mm_store_si128((__m128i *)(dst + 24), avg3_3); } libvpx-1.8.2/vpx_dsp/x86/highbd_intrapred_sse2.asm000066400000000000000000000344511357355204000221040ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pw_4: times 8 dw 4 pw_8: times 8 dw 8 pw_16: times 4 dd 16 pw_32: times 4 dd 32 SECTION .text INIT_XMM sse2 cglobal highbd_dc_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset GET_GOT goffsetq movq m0, [aboveq] movq m2, [leftq] paddw m0, m2 pshuflw m1, m0, 0xe paddw m0, m1 pshuflw m1, m0, 0x1 paddw m0, m1 paddw m0, [GLOBAL(pw_4)] psraw m0, 3 pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq*2], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq*2], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal highbd_dc_predictor_8x8, 4, 5, 4, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] mova m2, [leftq] DEFINE_ARGS dst, stride, stride3, one mov oned, 0x00010001 lea stride3q, [strideq*3] movd m3, oned pshufd m3, m3, 0x0 paddw m0, m2 pmaddwd m0, m3 packssdw m0, m1 pmaddwd m0, m3 packssdw m0, m1 pmaddwd m0, m3 paddw m0, [GLOBAL(pw_8)] psrlw m0, 4 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 mova [dstq ], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*4 ], m0 mova [dstq+stride3q*2], m0 lea dstq, [dstq+strideq*8] mova [dstq ], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*4 ], m0 mova [dstq+stride3q*2], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal highbd_dc_predictor_16x16, 4, 5, 5, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] mova m3, [aboveq+16] mova m2, [leftq] mova m4, [leftq+16] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 paddw m0, m2 paddw m0, m3 paddw m0, m4 movhlps m2, m0 paddw m0, m2 punpcklwd m0, m1 movhlps m2, m0 paddd m0, m2 punpckldq m0, m1 movhlps m2, m0 paddd m0, m2 paddd m0, [GLOBAL(pw_16)] psrad m0, 5 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 .loop: mova [dstq ], m0 mova [dstq +16], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2 +16], m0 mova [dstq+strideq*4 ], m0 mova [dstq+strideq*4 +16], m0 mova [dstq+stride3q*2 ], m0 mova [dstq+stride3q*2+16], m0 lea dstq, [dstq+strideq*8] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal highbd_dc_predictor_32x32, 4, 5, 7, dst, stride, above, left, goffset GET_GOT goffsetq mova m0, [aboveq] mova m2, [aboveq+16] mova m3, [aboveq+32] mova m4, [aboveq+48] paddw m0, m2 paddw m3, m4 mova m2, [leftq] mova m4, [leftq+16] mova m5, [leftq+32] mova m6, [leftq+48] paddw m2, m4 paddw m5, m6 paddw m0, m3 paddw m2, m5 pxor m1, m1 paddw m0, m2 DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 movhlps m2, m0 paddw m0, m2 punpcklwd m0, m1 movhlps m2, m0 paddd m0, m2 punpckldq m0, m1 movhlps m2, m0 paddd m0, m2 paddd m0, [GLOBAL(pw_32)] psrad m0, 6 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 .loop: mova [dstq ], m0 mova [dstq +16 ], m0 mova [dstq +32 ], m0 mova [dstq +48 ], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16 ], m0 mova [dstq+strideq*2+32 ], m0 mova [dstq+strideq*2+48 ], m0 mova [dstq+strideq*4 ], m0 mova [dstq+strideq*4+16 ], m0 mova [dstq+strideq*4+32 ], m0 mova [dstq+strideq*4+48 ], m0 mova [dstq+stride3q*2 ], m0 mova [dstq+stride3q*2 +16], m0 mova [dstq+stride3q*2 +32], m0 mova [dstq+stride3q*2 +48], m0 lea dstq, [dstq+strideq*8] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal highbd_v_predictor_4x4, 3, 3, 1, dst, stride, above movq m0, [aboveq] movq [dstq ], m0 movq [dstq+strideq*2], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq*2], m0 RET INIT_XMM sse2 cglobal highbd_v_predictor_8x8, 3, 3, 1, dst, stride, above mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] mova [dstq ], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*4 ], m0 mova [dstq+stride3q*2], m0 lea dstq, [dstq+strideq*8] mova [dstq ], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*4 ], m0 mova [dstq+stride3q*2], m0 RET INIT_XMM sse2 cglobal highbd_v_predictor_16x16, 3, 4, 2, dst, stride, above mova m0, [aboveq] mova m1, [aboveq+16] DEFINE_ARGS dst, stride, stride3, nlines4 lea stride3q, [strideq*3] mov nlines4d, 4 .loop: mova [dstq ], m0 mova [dstq +16], m1 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2 +16], m1 mova [dstq+strideq*4 ], m0 mova [dstq+strideq*4 +16], m1 mova [dstq+stride3q*2 ], m0 mova [dstq+stride3q*2+16], m1 lea dstq, [dstq+strideq*8] dec nlines4d jnz .loop REP_RET INIT_XMM sse2 cglobal highbd_v_predictor_32x32, 3, 4, 4, dst, stride, above mova m0, [aboveq] mova m1, [aboveq+16] mova m2, [aboveq+32] mova m3, [aboveq+48] DEFINE_ARGS dst, stride, stride3, nlines4 lea stride3q, [strideq*3] mov nlines4d, 8 .loop: mova [dstq ], m0 mova [dstq +16], m1 mova [dstq +32], m2 mova [dstq +48], m3 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2 +16], m1 mova [dstq+strideq*2 +32], m2 mova [dstq+strideq*2 +48], m3 mova [dstq+strideq*4 ], m0 mova [dstq+strideq*4 +16], m1 mova [dstq+strideq*4 +32], m2 mova [dstq+strideq*4 +48], m3 mova [dstq+stride3q*2 ], m0 mova [dstq+stride3q*2 +16], m1 mova [dstq+stride3q*2 +32], m2 mova [dstq+stride3q*2 +48], m3 lea dstq, [dstq+strideq*8] dec nlines4d jnz .loop REP_RET INIT_XMM sse2 cglobal highbd_tm_predictor_4x4, 5, 5, 6, dst, stride, above, left, bd movd m1, [aboveq-2] movq m0, [aboveq] pshuflw m1, m1, 0x0 movlhps m0, m0 ; t1 t2 t3 t4 t1 t2 t3 t4 movlhps m1, m1 ; tl tl tl tl tl tl tl tl ; Get the values to compute the maximum value at this bit depth pcmpeqw m3, m3 movd m4, bdd psubw m0, m1 ; t1-tl t2-tl t3-tl t4-tl psllw m3, m4 pcmpeqw m2, m2 pxor m4, m4 ; min possible value pxor m3, m2 ; max possible value mova m1, [leftq] pshuflw m2, m1, 0x0 pshuflw m5, m1, 0x55 movlhps m2, m5 ; l1 l1 l1 l1 l2 l2 l2 l2 paddw m2, m0 ;Clamp to the bit-depth pminsw m2, m3 pmaxsw m2, m4 ;Store the values movq [dstq ], m2 movhpd [dstq+strideq*2], m2 lea dstq, [dstq+strideq*4] pshuflw m2, m1, 0xaa pshuflw m5, m1, 0xff movlhps m2, m5 paddw m2, m0 ;Clamp to the bit-depth pminsw m2, m3 pmaxsw m2, m4 ;Store the values movq [dstq ], m2 movhpd [dstq+strideq*2], m2 RET INIT_XMM sse2 cglobal highbd_tm_predictor_8x8, 5, 6, 5, dst, stride, above, left, bd, one movd m1, [aboveq-2] mova m0, [aboveq] pshuflw m1, m1, 0x0 ; Get the values to compute the maximum value at this bit depth mov oned, 1 pxor m3, m3 pxor m4, m4 pinsrw m3, oned, 0 pinsrw m4, bdd, 0 pshuflw m3, m3, 0x0 DEFINE_ARGS dst, stride, line, left punpcklqdq m3, m3 mov lineq, -4 mova m2, m3 punpcklqdq m1, m1 psllw m3, m4 add leftq, 16 psubw m3, m2 ; max possible value pxor m4, m4 ; min possible value psubw m0, m1 .loop: movd m1, [leftq+lineq*4] movd m2, [leftq+lineq*4+2] pshuflw m1, m1, 0x0 pshuflw m2, m2, 0x0 punpcklqdq m1, m1 punpcklqdq m2, m2 paddw m1, m0 paddw m2, m0 ;Clamp to the bit-depth pminsw m1, m3 pminsw m2, m3 pmaxsw m1, m4 pmaxsw m2, m4 ;Store the values mova [dstq ], m1 mova [dstq+strideq*2], m2 lea dstq, [dstq+strideq*4] inc lineq jnz .loop REP_RET INIT_XMM sse2 cglobal highbd_tm_predictor_16x16, 5, 5, 8, dst, stride, above, left, bd movd m2, [aboveq-2] mova m0, [aboveq] mova m1, [aboveq+16] pshuflw m2, m2, 0x0 ; Get the values to compute the maximum value at this bit depth pcmpeqw m3, m3 movd m4, bdd punpcklqdq m2, m2 psllw m3, m4 pcmpeqw m5, m5 pxor m4, m4 ; min possible value pxor m3, m5 ; max possible value DEFINE_ARGS dst, stride, line, left mov lineq, -8 psubw m0, m2 psubw m1, m2 .loop: movd m7, [leftq] pshuflw m5, m7, 0x0 pshuflw m2, m7, 0x55 punpcklqdq m5, m5 ; l1 l1 l1 l1 l1 l1 l1 l1 punpcklqdq m2, m2 ; l2 l2 l2 l2 l2 l2 l2 l2 paddw m6, m5, m0 ; t1-tl+l1 to t4-tl+l1 paddw m5, m1 ; t5-tl+l1 to t8-tl+l1 pminsw m6, m3 pminsw m5, m3 pmaxsw m6, m4 ; Clamp to the bit-depth pmaxsw m5, m4 mova [dstq ], m6 mova [dstq +16], m5 paddw m6, m2, m0 paddw m2, m1 pminsw m6, m3 pminsw m2, m3 pmaxsw m6, m4 pmaxsw m2, m4 mova [dstq+strideq*2 ], m6 mova [dstq+strideq*2+16], m2 lea dstq, [dstq+strideq*4] inc lineq lea leftq, [leftq+4] jnz .loop REP_RET INIT_XMM sse2 cglobal highbd_tm_predictor_32x32, 5, 5, 8, dst, stride, above, left, bd movd m0, [aboveq-2] mova m1, [aboveq] mova m2, [aboveq+16] mova m3, [aboveq+32] mova m4, [aboveq+48] pshuflw m0, m0, 0x0 ; Get the values to compute the maximum value at this bit depth pcmpeqw m5, m5 movd m6, bdd psllw m5, m6 pcmpeqw m7, m7 pxor m6, m6 ; min possible value pxor m5, m7 ; max possible value punpcklqdq m0, m0 DEFINE_ARGS dst, stride, line, left mov lineq, -16 psubw m1, m0 psubw m2, m0 psubw m3, m0 psubw m4, m0 .loop: movd m7, [leftq] pshuflw m7, m7, 0x0 punpcklqdq m7, m7 ; l1 l1 l1 l1 l1 l1 l1 l1 paddw m0, m7, m1 pminsw m0, m5 pmaxsw m0, m6 mova [dstq ], m0 paddw m0, m7, m2 pminsw m0, m5 pmaxsw m0, m6 mova [dstq +16], m0 paddw m0, m7, m3 pminsw m0, m5 pmaxsw m0, m6 mova [dstq +32], m0 paddw m0, m7, m4 pminsw m0, m5 pmaxsw m0, m6 mova [dstq +48], m0 movd m7, [leftq+2] pshuflw m7, m7, 0x0 punpcklqdq m7, m7 ; l2 l2 l2 l2 l2 l2 l2 l2 paddw m0, m7, m1 pminsw m0, m5 pmaxsw m0, m6 mova [dstq+strideq*2 ], m0 paddw m0, m7, m2 pminsw m0, m5 pmaxsw m0, m6 mova [dstq+strideq*2+16], m0 paddw m0, m7, m3 pminsw m0, m5 pmaxsw m0, m6 mova [dstq+strideq*2+32], m0 paddw m0, m7, m4 pminsw m0, m5 pmaxsw m0, m6 mova [dstq+strideq*2+48], m0 lea dstq, [dstq+strideq*4] lea leftq, [leftq+4] inc lineq jnz .loop REP_RET libvpx-1.8.2/vpx_dsp/x86/highbd_inv_txfm_sse2.h000066400000000000000000000371711357355204000214170ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ #define VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ #include // SSE2 #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" // Note: There is no 64-bit bit-level shifting SIMD instruction. All // coefficients are left shifted by 2, so that dct_const_round_shift() can be // done by right shifting 2 bytes. static INLINE void extend_64bit(const __m128i in, __m128i *const out /*out[2]*/) { out[0] = _mm_unpacklo_epi32(in, in); // 0, 0, 1, 1 out[1] = _mm_unpackhi_epi32(in, in); // 2, 2, 3, 3 } static INLINE __m128i wraplow_16bit_shift4(const __m128i in0, const __m128i in1, const __m128i rounding) { __m128i temp[2]; temp[0] = _mm_add_epi32(in0, rounding); temp[1] = _mm_add_epi32(in1, rounding); temp[0] = _mm_srai_epi32(temp[0], 4); temp[1] = _mm_srai_epi32(temp[1], 4); return _mm_packs_epi32(temp[0], temp[1]); } static INLINE __m128i wraplow_16bit_shift5(const __m128i in0, const __m128i in1, const __m128i rounding) { __m128i temp[2]; temp[0] = _mm_add_epi32(in0, rounding); temp[1] = _mm_add_epi32(in1, rounding); temp[0] = _mm_srai_epi32(temp[0], 5); temp[1] = _mm_srai_epi32(temp[1], 5); return _mm_packs_epi32(temp[0], temp[1]); } static INLINE __m128i dct_const_round_shift_64bit(const __m128i in) { const __m128i t = _mm_add_epi64(in, pair_set_epi32(DCT_CONST_ROUNDING << 2, 0)); return _mm_srli_si128(t, 2); } static INLINE __m128i pack_4(const __m128i in0, const __m128i in1) { const __m128i t0 = _mm_unpacklo_epi32(in0, in1); // 0, 2 const __m128i t1 = _mm_unpackhi_epi32(in0, in1); // 1, 3 return _mm_unpacklo_epi32(t0, t1); // 0, 1, 2, 3 } static INLINE void abs_extend_64bit_sse2(const __m128i in, __m128i *const out /*out[2]*/, __m128i *const sign /*sign[2]*/) { sign[0] = _mm_srai_epi32(in, 31); out[0] = _mm_xor_si128(in, sign[0]); out[0] = _mm_sub_epi32(out[0], sign[0]); sign[1] = _mm_unpackhi_epi32(sign[0], sign[0]); // 64-bit sign of 2, 3 sign[0] = _mm_unpacklo_epi32(sign[0], sign[0]); // 64-bit sign of 0, 1 out[1] = _mm_unpackhi_epi32(out[0], out[0]); // 2, 3 out[0] = _mm_unpacklo_epi32(out[0], out[0]); // 0, 1 } // Note: cospi must be non negative. static INLINE __m128i multiply_apply_sign_sse2(const __m128i in, const __m128i sign, const __m128i cospi) { __m128i out = _mm_mul_epu32(in, cospi); out = _mm_xor_si128(out, sign); return _mm_sub_epi64(out, sign); } // Note: c must be non negative. static INLINE __m128i multiplication_round_shift_sse2( const __m128i *const in /*in[2]*/, const __m128i *const sign /*sign[2]*/, const int c) { const __m128i pair_c = pair_set_epi32(c << 2, 0); __m128i t0, t1; assert(c >= 0); t0 = multiply_apply_sign_sse2(in[0], sign[0], pair_c); t1 = multiply_apply_sign_sse2(in[1], sign[1], pair_c); t0 = dct_const_round_shift_64bit(t0); t1 = dct_const_round_shift_64bit(t1); return pack_4(t0, t1); } // Note: c must be non negative. static INLINE __m128i multiplication_neg_round_shift_sse2( const __m128i *const in /*in[2]*/, const __m128i *const sign /*sign[2]*/, const int c) { const __m128i pair_c = pair_set_epi32(c << 2, 0); __m128i t0, t1; assert(c >= 0); t0 = multiply_apply_sign_sse2(in[0], sign[0], pair_c); t1 = multiply_apply_sign_sse2(in[1], sign[1], pair_c); t0 = _mm_sub_epi64(_mm_setzero_si128(), t0); t1 = _mm_sub_epi64(_mm_setzero_si128(), t1); t0 = dct_const_round_shift_64bit(t0); t1 = dct_const_round_shift_64bit(t1); return pack_4(t0, t1); } // Note: c0 and c1 must be non negative. static INLINE void highbd_butterfly_sse2(const __m128i in0, const __m128i in1, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { const __m128i pair_c0 = pair_set_epi32(c0 << 2, 0); const __m128i pair_c1 = pair_set_epi32(c1 << 2, 0); __m128i temp1[4], temp2[4], sign1[2], sign2[2]; assert(c0 >= 0); assert(c1 >= 0); abs_extend_64bit_sse2(in0, temp1, sign1); abs_extend_64bit_sse2(in1, temp2, sign2); temp1[2] = multiply_apply_sign_sse2(temp1[0], sign1[0], pair_c1); temp1[3] = multiply_apply_sign_sse2(temp1[1], sign1[1], pair_c1); temp1[0] = multiply_apply_sign_sse2(temp1[0], sign1[0], pair_c0); temp1[1] = multiply_apply_sign_sse2(temp1[1], sign1[1], pair_c0); temp2[2] = multiply_apply_sign_sse2(temp2[0], sign2[0], pair_c0); temp2[3] = multiply_apply_sign_sse2(temp2[1], sign2[1], pair_c0); temp2[0] = multiply_apply_sign_sse2(temp2[0], sign2[0], pair_c1); temp2[1] = multiply_apply_sign_sse2(temp2[1], sign2[1], pair_c1); temp1[0] = _mm_sub_epi64(temp1[0], temp2[0]); temp1[1] = _mm_sub_epi64(temp1[1], temp2[1]); temp2[0] = _mm_add_epi64(temp1[2], temp2[2]); temp2[1] = _mm_add_epi64(temp1[3], temp2[3]); temp1[0] = dct_const_round_shift_64bit(temp1[0]); temp1[1] = dct_const_round_shift_64bit(temp1[1]); temp2[0] = dct_const_round_shift_64bit(temp2[0]); temp2[1] = dct_const_round_shift_64bit(temp2[1]); *out0 = pack_4(temp1[0], temp1[1]); *out1 = pack_4(temp2[0], temp2[1]); } // Note: c0 and c1 must be non negative. static INLINE void highbd_partial_butterfly_sse2(const __m128i in, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { __m128i temp[2], sign[2]; assert(c0 >= 0); assert(c1 >= 0); abs_extend_64bit_sse2(in, temp, sign); *out0 = multiplication_round_shift_sse2(temp, sign, c0); *out1 = multiplication_round_shift_sse2(temp, sign, c1); } // Note: c0 and c1 must be non negative. static INLINE void highbd_partial_butterfly_neg_sse2(const __m128i in, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { __m128i temp[2], sign[2]; assert(c0 >= 0); assert(c1 >= 0); abs_extend_64bit_sse2(in, temp, sign); *out0 = multiplication_neg_round_shift_sse2(temp, sign, c1); *out1 = multiplication_round_shift_sse2(temp, sign, c0); } static INLINE void highbd_butterfly_cospi16_sse2(const __m128i in0, const __m128i in1, __m128i *const out0, __m128i *const out1) { __m128i temp1[2], temp2, sign[2]; temp2 = _mm_add_epi32(in0, in1); abs_extend_64bit_sse2(temp2, temp1, sign); *out0 = multiplication_round_shift_sse2(temp1, sign, cospi_16_64); temp2 = _mm_sub_epi32(in0, in1); abs_extend_64bit_sse2(temp2, temp1, sign); *out1 = multiplication_round_shift_sse2(temp1, sign, cospi_16_64); } // Only do addition and subtraction butterfly, size = 16, 32 static INLINE void highbd_add_sub_butterfly(const __m128i *in, __m128i *out, int size) { int i = 0; const int num = size >> 1; const int bound = size - 1; while (i < num) { out[i] = _mm_add_epi32(in[i], in[bound - i]); out[bound - i] = _mm_sub_epi32(in[i], in[bound - i]); i++; } } static INLINE void highbd_idct8_stage4(const __m128i *const in, __m128i *const out) { out[0] = _mm_add_epi32(in[0], in[7]); out[1] = _mm_add_epi32(in[1], in[6]); out[2] = _mm_add_epi32(in[2], in[5]); out[3] = _mm_add_epi32(in[3], in[4]); out[4] = _mm_sub_epi32(in[3], in[4]); out[5] = _mm_sub_epi32(in[2], in[5]); out[6] = _mm_sub_epi32(in[1], in[6]); out[7] = _mm_sub_epi32(in[0], in[7]); } static INLINE void highbd_idct8x8_final_round(__m128i *const io) { io[0] = wraplow_16bit_shift5(io[0], io[8], _mm_set1_epi32(16)); io[1] = wraplow_16bit_shift5(io[1], io[9], _mm_set1_epi32(16)); io[2] = wraplow_16bit_shift5(io[2], io[10], _mm_set1_epi32(16)); io[3] = wraplow_16bit_shift5(io[3], io[11], _mm_set1_epi32(16)); io[4] = wraplow_16bit_shift5(io[4], io[12], _mm_set1_epi32(16)); io[5] = wraplow_16bit_shift5(io[5], io[13], _mm_set1_epi32(16)); io[6] = wraplow_16bit_shift5(io[6], io[14], _mm_set1_epi32(16)); io[7] = wraplow_16bit_shift5(io[7], io[15], _mm_set1_epi32(16)); } static INLINE void highbd_idct16_4col_stage7(const __m128i *const in, __m128i *const out) { out[0] = _mm_add_epi32(in[0], in[15]); out[1] = _mm_add_epi32(in[1], in[14]); out[2] = _mm_add_epi32(in[2], in[13]); out[3] = _mm_add_epi32(in[3], in[12]); out[4] = _mm_add_epi32(in[4], in[11]); out[5] = _mm_add_epi32(in[5], in[10]); out[6] = _mm_add_epi32(in[6], in[9]); out[7] = _mm_add_epi32(in[7], in[8]); out[8] = _mm_sub_epi32(in[7], in[8]); out[9] = _mm_sub_epi32(in[6], in[9]); out[10] = _mm_sub_epi32(in[5], in[10]); out[11] = _mm_sub_epi32(in[4], in[11]); out[12] = _mm_sub_epi32(in[3], in[12]); out[13] = _mm_sub_epi32(in[2], in[13]); out[14] = _mm_sub_epi32(in[1], in[14]); out[15] = _mm_sub_epi32(in[0], in[15]); } static INLINE __m128i add_clamp(const __m128i in0, const __m128i in1, const int bd) { const __m128i zero = _mm_set1_epi16(0); // Faster than _mm_set1_epi16((1 << bd) - 1). const __m128i one = _mm_set1_epi16(1); const __m128i max = _mm_sub_epi16(_mm_slli_epi16(one, bd), one); __m128i d; d = _mm_adds_epi16(in0, in1); d = _mm_max_epi16(d, zero); d = _mm_min_epi16(d, max); return d; } static INLINE void highbd_idct_1_add_kernel(const tran_low_t *input, uint16_t *dest, int stride, int bd, const int size) { int a1, i, j; tran_low_t out; __m128i dc, d; out = HIGHBD_WRAPLOW( dct_const_round_shift(input[0] * (tran_high_t)cospi_16_64), bd); out = HIGHBD_WRAPLOW(dct_const_round_shift(out * (tran_high_t)cospi_16_64), bd); a1 = ROUND_POWER_OF_TWO(out, (size == 8) ? 5 : 6); dc = _mm_set1_epi16(a1); for (i = 0; i < size; ++i) { for (j = 0; j < size; j += 8) { d = _mm_load_si128((const __m128i *)(&dest[j])); d = add_clamp(d, dc, bd); _mm_store_si128((__m128i *)(&dest[j]), d); } dest += stride; } } static INLINE void recon_and_store_4(const __m128i in, uint16_t *const dest, const int bd) { __m128i d; d = _mm_loadl_epi64((const __m128i *)dest); d = add_clamp(d, in, bd); _mm_storel_epi64((__m128i *)dest, d); } static INLINE void recon_and_store_4x2(const __m128i in, uint16_t *const dest, const int stride, const int bd) { __m128i d; d = _mm_loadl_epi64((const __m128i *)(dest + 0 * stride)); d = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(d), (const __m64 *)(dest + 1 * stride))); d = add_clamp(d, in, bd); _mm_storel_epi64((__m128i *)(dest + 0 * stride), d); _mm_storeh_pi((__m64 *)(dest + 1 * stride), _mm_castsi128_ps(d)); } static INLINE void recon_and_store_4x4(const __m128i *const in, uint16_t *dest, const int stride, const int bd) { recon_and_store_4x2(in[0], dest, stride, bd); dest += 2 * stride; recon_and_store_4x2(in[1], dest, stride, bd); } static INLINE void recon_and_store_8(const __m128i in, uint16_t **const dest, const int stride, const int bd) { __m128i d; d = _mm_load_si128((const __m128i *)(*dest)); d = add_clamp(d, in, bd); _mm_store_si128((__m128i *)(*dest), d); *dest += stride; } static INLINE void recon_and_store_8x8(const __m128i *const in, uint16_t *dest, const int stride, const int bd) { recon_and_store_8(in[0], &dest, stride, bd); recon_and_store_8(in[1], &dest, stride, bd); recon_and_store_8(in[2], &dest, stride, bd); recon_and_store_8(in[3], &dest, stride, bd); recon_and_store_8(in[4], &dest, stride, bd); recon_and_store_8(in[5], &dest, stride, bd); recon_and_store_8(in[6], &dest, stride, bd); recon_and_store_8(in[7], &dest, stride, bd); } static INLINE __m128i load_pack_8_32bit(const tran_low_t *const input) { const __m128i t0 = _mm_load_si128((const __m128i *)(input + 0)); const __m128i t1 = _mm_load_si128((const __m128i *)(input + 4)); return _mm_packs_epi32(t0, t1); } static INLINE void highbd_load_pack_transpose_32bit_8x8(const tran_low_t *input, const int stride, __m128i *const in) { in[0] = load_pack_8_32bit(input + 0 * stride); in[1] = load_pack_8_32bit(input + 1 * stride); in[2] = load_pack_8_32bit(input + 2 * stride); in[3] = load_pack_8_32bit(input + 3 * stride); in[4] = load_pack_8_32bit(input + 4 * stride); in[5] = load_pack_8_32bit(input + 5 * stride); in[6] = load_pack_8_32bit(input + 6 * stride); in[7] = load_pack_8_32bit(input + 7 * stride); transpose_16bit_8x8(in, in); } static INLINE void highbd_load_transpose_32bit_8x4(const tran_low_t *input, const int stride, __m128i *in) { in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride + 0)); in[1] = _mm_load_si128((const __m128i *)(input + 0 * stride + 4)); in[2] = _mm_load_si128((const __m128i *)(input + 1 * stride + 0)); in[3] = _mm_load_si128((const __m128i *)(input + 1 * stride + 4)); in[4] = _mm_load_si128((const __m128i *)(input + 2 * stride + 0)); in[5] = _mm_load_si128((const __m128i *)(input + 2 * stride + 4)); in[6] = _mm_load_si128((const __m128i *)(input + 3 * stride + 0)); in[7] = _mm_load_si128((const __m128i *)(input + 3 * stride + 4)); transpose_32bit_8x4(in, in); } static INLINE void highbd_load_transpose_32bit_4x4(const tran_low_t *input, const int stride, __m128i *in) { in[0] = _mm_load_si128((const __m128i *)(input + 0 * stride)); in[1] = _mm_load_si128((const __m128i *)(input + 1 * stride)); in[2] = _mm_load_si128((const __m128i *)(input + 2 * stride)); in[3] = _mm_load_si128((const __m128i *)(input + 3 * stride)); transpose_32bit_4x4(in, in); } static INLINE void highbd_write_buffer_8(uint16_t *dest, const __m128i in, const int bd) { const __m128i final_rounding = _mm_set1_epi16(1 << 5); __m128i out; out = _mm_adds_epi16(in, final_rounding); out = _mm_srai_epi16(out, 6); recon_and_store_8(out, &dest, 0, bd); } static INLINE void highbd_write_buffer_4(uint16_t *const dest, const __m128i in, const int bd) { const __m128i final_rounding = _mm_set1_epi32(1 << 5); __m128i out; out = _mm_add_epi32(in, final_rounding); out = _mm_srai_epi32(out, 6); out = _mm_packs_epi32(out, out); recon_and_store_4(out, dest, bd); } #endif // VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/highbd_inv_txfm_sse4.h000066400000000000000000000104141357355204000214100ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ #define VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ #include // SSE4.1 #include "./vpx_config.h" #include "vpx_dsp/x86/highbd_inv_txfm_sse2.h" static INLINE __m128i multiplication_round_shift_sse4_1( const __m128i *const in /*in[2]*/, const int c) { const __m128i pair_c = pair_set_epi32(c * 4, 0); __m128i t0, t1; t0 = _mm_mul_epi32(in[0], pair_c); t1 = _mm_mul_epi32(in[1], pair_c); t0 = dct_const_round_shift_64bit(t0); t1 = dct_const_round_shift_64bit(t1); return pack_4(t0, t1); } static INLINE void highbd_butterfly_sse4_1(const __m128i in0, const __m128i in1, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { const __m128i pair_c0 = pair_set_epi32(4 * c0, 0); const __m128i pair_c1 = pair_set_epi32(4 * c1, 0); __m128i temp1[4], temp2[4]; extend_64bit(in0, temp1); extend_64bit(in1, temp2); temp1[2] = _mm_mul_epi32(temp1[0], pair_c1); temp1[3] = _mm_mul_epi32(temp1[1], pair_c1); temp1[0] = _mm_mul_epi32(temp1[0], pair_c0); temp1[1] = _mm_mul_epi32(temp1[1], pair_c0); temp2[2] = _mm_mul_epi32(temp2[0], pair_c0); temp2[3] = _mm_mul_epi32(temp2[1], pair_c0); temp2[0] = _mm_mul_epi32(temp2[0], pair_c1); temp2[1] = _mm_mul_epi32(temp2[1], pair_c1); temp1[0] = _mm_sub_epi64(temp1[0], temp2[0]); temp1[1] = _mm_sub_epi64(temp1[1], temp2[1]); temp2[0] = _mm_add_epi64(temp1[2], temp2[2]); temp2[1] = _mm_add_epi64(temp1[3], temp2[3]); temp1[0] = dct_const_round_shift_64bit(temp1[0]); temp1[1] = dct_const_round_shift_64bit(temp1[1]); temp2[0] = dct_const_round_shift_64bit(temp2[0]); temp2[1] = dct_const_round_shift_64bit(temp2[1]); *out0 = pack_4(temp1[0], temp1[1]); *out1 = pack_4(temp2[0], temp2[1]); } static INLINE void highbd_butterfly_cospi16_sse4_1(const __m128i in0, const __m128i in1, __m128i *const out0, __m128i *const out1) { __m128i temp1[2], temp2; temp2 = _mm_add_epi32(in0, in1); extend_64bit(temp2, temp1); *out0 = multiplication_round_shift_sse4_1(temp1, cospi_16_64); temp2 = _mm_sub_epi32(in0, in1); extend_64bit(temp2, temp1); *out1 = multiplication_round_shift_sse4_1(temp1, cospi_16_64); } static INLINE void highbd_partial_butterfly_sse4_1(const __m128i in, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { __m128i temp[2]; extend_64bit(in, temp); *out0 = multiplication_round_shift_sse4_1(temp, c0); *out1 = multiplication_round_shift_sse4_1(temp, c1); } static INLINE void highbd_idct4_sse4_1(__m128i *const io) { __m128i temp[2], step[4]; transpose_32bit_4x4(io, io); // stage 1 temp[0] = _mm_add_epi32(io[0], io[2]); // input[0] + input[2] extend_64bit(temp[0], temp); step[0] = multiplication_round_shift_sse4_1(temp, cospi_16_64); temp[0] = _mm_sub_epi32(io[0], io[2]); // input[0] - input[2] extend_64bit(temp[0], temp); step[1] = multiplication_round_shift_sse4_1(temp, cospi_16_64); highbd_butterfly_sse4_1(io[1], io[3], cospi_24_64, cospi_8_64, &step[2], &step[3]); // stage 2 io[0] = _mm_add_epi32(step[0], step[3]); // step[0] + step[3] io[1] = _mm_add_epi32(step[1], step[2]); // step[1] + step[2] io[2] = _mm_sub_epi32(step[1], step[2]); // step[1] - step[2] io[3] = _mm_sub_epi32(step[0], step[3]); // step[0] - step[3] } void vpx_highbd_idct8x8_half1d_sse4_1(__m128i *const io); void vpx_highbd_idct16_4col_sse4_1(__m128i *const io /*io[16]*/); #endif // VPX_VPX_DSP_X86_HIGHBD_INV_TXFM_SSE4_H_ libvpx-1.8.2/vpx_dsp/x86/highbd_loopfilter_sse2.c000066400000000000000000001276261357355204000217440ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" static INLINE __m128i signed_char_clamp_bd_sse2(__m128i value, int bd) { __m128i ubounded; __m128i lbounded; __m128i retval; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); __m128i t80, max, min; if (bd == 8) { t80 = _mm_set1_epi16(0x80); max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 8), one), t80); } else if (bd == 10) { t80 = _mm_set1_epi16(0x200); max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 10), one), t80); } else { // bd == 12 t80 = _mm_set1_epi16(0x800); max = _mm_subs_epi16(_mm_subs_epi16(_mm_slli_epi16(one, 12), one), t80); } min = _mm_subs_epi16(zero, t80); ubounded = _mm_cmpgt_epi16(value, max); lbounded = _mm_cmplt_epi16(value, min); retval = _mm_andnot_si128(_mm_or_si128(ubounded, lbounded), value); ubounded = _mm_and_si128(ubounded, max); lbounded = _mm_and_si128(lbounded, min); retval = _mm_or_si128(retval, ubounded); retval = _mm_or_si128(retval, lbounded); return retval; } // TODO(debargha, peter): Break up large functions into smaller ones // in this file. void vpx_highbd_lpf_horizontal_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi16(1); __m128i blimit_v, limit_v, thresh_v; __m128i q7, p7, q6, p6, q5, p5, q4, p4, q3, p3, q2, p2, q1, p1, q0, p0; __m128i mask, hev, flat, flat2, abs_p1p0, abs_q1q0; __m128i ps1, qs1, ps0, qs0; __m128i abs_p0q0, abs_p1q1, ffff, work; __m128i filt, work_a, filter1, filter2; __m128i flat2_q6, flat2_p6, flat2_q5, flat2_p5, flat2_q4, flat2_p4; __m128i flat2_q3, flat2_p3, flat2_q2, flat2_p2, flat2_q1, flat2_p1; __m128i flat2_q0, flat2_p0; __m128i flat_q2, flat_p2, flat_q1, flat_p1, flat_q0, flat_p0; __m128i pixelFilter_p, pixelFilter_q; __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; __m128i sum_p7, sum_q7, sum_p3, sum_q3; __m128i t4, t3, t80, t1; __m128i eight, four; if (bd == 8) { blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); } else if (bd == 10) { blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); } else { // bd == 12 blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); } q4 = _mm_load_si128((__m128i *)(s + 4 * pitch)); p4 = _mm_load_si128((__m128i *)(s - 5 * pitch)); q3 = _mm_load_si128((__m128i *)(s + 3 * pitch)); p3 = _mm_load_si128((__m128i *)(s - 4 * pitch)); q2 = _mm_load_si128((__m128i *)(s + 2 * pitch)); p2 = _mm_load_si128((__m128i *)(s - 3 * pitch)); q1 = _mm_load_si128((__m128i *)(s + 1 * pitch)); p1 = _mm_load_si128((__m128i *)(s - 2 * pitch)); q0 = _mm_load_si128((__m128i *)(s + 0 * pitch)); p0 = _mm_load_si128((__m128i *)(s - 1 * pitch)); // highbd_filter_mask abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1)); ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0); abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0)); abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1)); // highbd_hev_mask (in C code this is actually called from highbd_filter4) flat = _mm_max_epi16(abs_p1p0, abs_q1q0); hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); // abs(p0 - q0) * 2 abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); // abs(p1 - q1) / 2 mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)), _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1))); mask = _mm_max_epi16(work, mask); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)), _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2))); mask = _mm_max_epi16(work, mask); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)), _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // return ~mask // lp filter // highbd_filter4 t4 = _mm_set1_epi16(4); t3 = _mm_set1_epi16(3); if (bd == 8) t80 = _mm_set1_epi16(0x80); else if (bd == 10) t80 = _mm_set1_epi16(0x200); else // bd == 12 t80 = _mm_set1_epi16(0x800); t1 = _mm_set1_epi16(0x1); ps1 = _mm_subs_epi16(p1, t80); qs1 = _mm_subs_epi16(q1, t80); ps0 = _mm_subs_epi16(p0, t80); qs0 = _mm_subs_epi16(q0, t80); filt = _mm_and_si128(signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd), hev); work_a = _mm_subs_epi16(qs0, ps0); filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd); filt = _mm_and_si128(filt, mask); filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd); filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd); // Filter1 >> 3 filter1 = _mm_srai_epi16(filter1, 0x3); filter2 = _mm_srai_epi16(filter2, 0x3); qs0 = _mm_adds_epi16( signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), t80); ps0 = _mm_adds_epi16( signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), t80); filt = _mm_adds_epi16(filter1, t1); filt = _mm_srai_epi16(filt, 1); filt = _mm_andnot_si128(hev, filt); qs1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd), t80); ps1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd), t80); // end highbd_filter4 // loopfilter done // highbd_flat_mask4 flat = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p2, p0), _mm_subs_epu16(p0, p2)), _mm_or_si128(_mm_subs_epu16(p3, p0), _mm_subs_epu16(p0, p3))); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(q2, q0), _mm_subs_epu16(q0, q2)), _mm_or_si128(_mm_subs_epu16(q3, q0), _mm_subs_epu16(q0, q3))); flat = _mm_max_epi16(work, flat); work = _mm_max_epi16(abs_p1p0, abs_q1q0); flat = _mm_max_epi16(work, flat); if (bd == 8) flat = _mm_subs_epu16(flat, one); else if (bd == 10) flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 2)); else // bd == 12 flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 4)); flat = _mm_cmpeq_epi16(flat, zero); // end flat_mask4 // flat & mask = flat && mask (as used in filter8) // (because, in both vars, each block of 16 either all 1s or all 0s) flat = _mm_and_si128(flat, mask); p5 = _mm_load_si128((__m128i *)(s - 6 * pitch)); q5 = _mm_load_si128((__m128i *)(s + 5 * pitch)); p6 = _mm_load_si128((__m128i *)(s - 7 * pitch)); q6 = _mm_load_si128((__m128i *)(s + 6 * pitch)); p7 = _mm_load_si128((__m128i *)(s - 8 * pitch)); q7 = _mm_load_si128((__m128i *)(s + 7 * pitch)); // highbd_flat_mask5 (arguments passed in are p0, q0, p4-p7, q4-q7 // but referred to as p0-p4 & q0-q4 in fn) flat2 = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p4, p0), _mm_subs_epu16(p0, p4)), _mm_or_si128(_mm_subs_epu16(q4, q0), _mm_subs_epu16(q0, q4))); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p5, p0), _mm_subs_epu16(p0, p5)), _mm_or_si128(_mm_subs_epu16(q5, q0), _mm_subs_epu16(q0, q5))); flat2 = _mm_max_epi16(work, flat2); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p6, p0), _mm_subs_epu16(p0, p6)), _mm_or_si128(_mm_subs_epu16(q6, q0), _mm_subs_epu16(q0, q6))); flat2 = _mm_max_epi16(work, flat2); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p7, p0), _mm_subs_epu16(p0, p7)), _mm_or_si128(_mm_subs_epu16(q7, q0), _mm_subs_epu16(q0, q7))); flat2 = _mm_max_epi16(work, flat2); if (bd == 8) flat2 = _mm_subs_epu16(flat2, one); else if (bd == 10) flat2 = _mm_subs_epu16(flat2, _mm_slli_epi16(one, 2)); else // bd == 12 flat2 = _mm_subs_epu16(flat2, _mm_slli_epi16(one, 4)); flat2 = _mm_cmpeq_epi16(flat2, zero); flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask // end highbd_flat_mask5 // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // flat and wide flat calculations eight = _mm_set1_epi16(8); four = _mm_set1_epi16(4); pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6, p5), _mm_add_epi16(p4, p3)); pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6, q5), _mm_add_epi16(q4, q3)); pixetFilter_p2p1p0 = _mm_add_epi16(p0, _mm_add_epi16(p2, p1)); pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); pixetFilter_q2q1q0 = _mm_add_epi16(q0, _mm_add_epi16(q2, q1)); pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); pixetFilter_p2p1p0 = _mm_add_epi16( four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); flat2_p0 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7, p0)), 4); flat2_q0 = _mm_srli_epi16(_mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7, q0)), 4); flat_p0 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(p3, p0)), 3); flat_q0 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(q3, q0)), 3); sum_p7 = _mm_add_epi16(p7, p7); sum_q7 = _mm_add_epi16(q7, q7); sum_p3 = _mm_add_epi16(p3, p3); sum_q3 = _mm_add_epi16(q3, q3); pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6); flat2_p1 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1)), 4); flat2_q1 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1)), 4); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2); flat_p1 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p1)), 3); flat_q1 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q1)), 3); sum_p7 = _mm_add_epi16(sum_p7, p7); sum_q7 = _mm_add_epi16(sum_q7, q7); sum_p3 = _mm_add_epi16(sum_p3, p3); sum_q3 = _mm_add_epi16(sum_q3, q3); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5); flat2_p2 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2)), 4); flat2_q2 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2)), 4); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1); flat_p2 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p2)), 3); flat_q2 = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q2)), 3); sum_p7 = _mm_add_epi16(sum_p7, p7); sum_q7 = _mm_add_epi16(sum_q7, q7); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4); flat2_p3 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3)), 4); flat2_q3 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3)), 4); sum_p7 = _mm_add_epi16(sum_p7, p7); sum_q7 = _mm_add_epi16(sum_q7, q7); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3); flat2_p4 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4)), 4); flat2_q4 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4)), 4); sum_p7 = _mm_add_epi16(sum_p7, p7); sum_q7 = _mm_add_epi16(sum_q7, q7); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2); flat2_p5 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5)), 4); flat2_q5 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5)), 4); sum_p7 = _mm_add_epi16(sum_p7, p7); sum_q7 = _mm_add_epi16(sum_q7, q7); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1); flat2_p6 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6)), 4); flat2_q6 = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6)), 4); // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // highbd_filter8 p2 = _mm_andnot_si128(flat, p2); // p2 remains unchanged if !(flat && mask) flat_p2 = _mm_and_si128(flat, flat_p2); // when (flat && mask) p2 = _mm_or_si128(p2, flat_p2); // full list of p2 values q2 = _mm_andnot_si128(flat, q2); flat_q2 = _mm_and_si128(flat, flat_q2); q2 = _mm_or_si128(q2, flat_q2); // full list of q2 values ps1 = _mm_andnot_si128(flat, ps1); // p1 takes the value assigned to in in filter4 if !(flat && mask) flat_p1 = _mm_and_si128(flat, flat_p1); // when (flat && mask) p1 = _mm_or_si128(ps1, flat_p1); // full list of p1 values qs1 = _mm_andnot_si128(flat, qs1); flat_q1 = _mm_and_si128(flat, flat_q1); q1 = _mm_or_si128(qs1, flat_q1); // full list of q1 values ps0 = _mm_andnot_si128(flat, ps0); // p0 takes the value assigned to in in filter4 if !(flat && mask) flat_p0 = _mm_and_si128(flat, flat_p0); // when (flat && mask) p0 = _mm_or_si128(ps0, flat_p0); // full list of p0 values qs0 = _mm_andnot_si128(flat, qs0); flat_q0 = _mm_and_si128(flat, flat_q0); q0 = _mm_or_si128(qs0, flat_q0); // full list of q0 values // end highbd_filter8 // highbd_filter16 p6 = _mm_andnot_si128(flat2, p6); // p6 remains unchanged if !(flat2 && flat && mask) flat2_p6 = _mm_and_si128(flat2, flat2_p6); // get values for when (flat2 && flat && mask) p6 = _mm_or_si128(p6, flat2_p6); // full list of p6 values q6 = _mm_andnot_si128(flat2, q6); // q6 remains unchanged if !(flat2 && flat && mask) flat2_q6 = _mm_and_si128(flat2, flat2_q6); // get values for when (flat2 && flat && mask) q6 = _mm_or_si128(q6, flat2_q6); // full list of q6 values _mm_store_si128((__m128i *)(s - 7 * pitch), p6); _mm_store_si128((__m128i *)(s + 6 * pitch), q6); p5 = _mm_andnot_si128(flat2, p5); // p5 remains unchanged if !(flat2 && flat && mask) flat2_p5 = _mm_and_si128(flat2, flat2_p5); // get values for when (flat2 && flat && mask) p5 = _mm_or_si128(p5, flat2_p5); // full list of p5 values q5 = _mm_andnot_si128(flat2, q5); // q5 remains unchanged if !(flat2 && flat && mask) flat2_q5 = _mm_and_si128(flat2, flat2_q5); // get values for when (flat2 && flat && mask) q5 = _mm_or_si128(q5, flat2_q5); // full list of q5 values _mm_store_si128((__m128i *)(s - 6 * pitch), p5); _mm_store_si128((__m128i *)(s + 5 * pitch), q5); p4 = _mm_andnot_si128(flat2, p4); // p4 remains unchanged if !(flat2 && flat && mask) flat2_p4 = _mm_and_si128(flat2, flat2_p4); // get values for when (flat2 && flat && mask) p4 = _mm_or_si128(p4, flat2_p4); // full list of p4 values q4 = _mm_andnot_si128(flat2, q4); // q4 remains unchanged if !(flat2 && flat && mask) flat2_q4 = _mm_and_si128(flat2, flat2_q4); // get values for when (flat2 && flat && mask) q4 = _mm_or_si128(q4, flat2_q4); // full list of q4 values _mm_store_si128((__m128i *)(s - 5 * pitch), p4); _mm_store_si128((__m128i *)(s + 4 * pitch), q4); p3 = _mm_andnot_si128(flat2, p3); // p3 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_p3 = _mm_and_si128(flat2, flat2_p3); // get values for when (flat2 && flat && mask) p3 = _mm_or_si128(p3, flat2_p3); // full list of p3 values q3 = _mm_andnot_si128(flat2, q3); // q3 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_q3 = _mm_and_si128(flat2, flat2_q3); // get values for when (flat2 && flat && mask) q3 = _mm_or_si128(q3, flat2_q3); // full list of q3 values _mm_store_si128((__m128i *)(s - 4 * pitch), p3); _mm_store_si128((__m128i *)(s + 3 * pitch), q3); p2 = _mm_andnot_si128(flat2, p2); // p2 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_p2 = _mm_and_si128(flat2, flat2_p2); // get values for when (flat2 && flat && mask) p2 = _mm_or_si128(p2, flat2_p2); // full list of p2 values q2 = _mm_andnot_si128(flat2, q2); // q2 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_q2 = _mm_and_si128(flat2, flat2_q2); // get values for when (flat2 && flat && mask) q2 = _mm_or_si128(q2, flat2_q2); // full list of q2 values _mm_store_si128((__m128i *)(s - 3 * pitch), p2); _mm_store_si128((__m128i *)(s + 2 * pitch), q2); p1 = _mm_andnot_si128(flat2, p1); // p1 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_p1 = _mm_and_si128(flat2, flat2_p1); // get values for when (flat2 && flat && mask) p1 = _mm_or_si128(p1, flat2_p1); // full list of p1 values q1 = _mm_andnot_si128(flat2, q1); // q1 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_q1 = _mm_and_si128(flat2, flat2_q1); // get values for when (flat2 && flat && mask) q1 = _mm_or_si128(q1, flat2_q1); // full list of q1 values _mm_store_si128((__m128i *)(s - 2 * pitch), p1); _mm_store_si128((__m128i *)(s + 1 * pitch), q1); p0 = _mm_andnot_si128(flat2, p0); // p0 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_p0 = _mm_and_si128(flat2, flat2_p0); // get values for when (flat2 && flat && mask) p0 = _mm_or_si128(p0, flat2_p0); // full list of p0 values q0 = _mm_andnot_si128(flat2, q0); // q0 takes value from highbd_filter8 if !(flat2 && flat && mask) flat2_q0 = _mm_and_si128(flat2, flat2_q0); // get values for when (flat2 && flat && mask) q0 = _mm_or_si128(q0, flat2_q0); // full list of q0 values _mm_store_si128((__m128i *)(s - 1 * pitch), p0); _mm_store_si128((__m128i *)(s - 0 * pitch), q0); } void vpx_highbd_lpf_horizontal_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { vpx_highbd_lpf_horizontal_16_sse2(s, pitch, blimit, limit, thresh, bd); vpx_highbd_lpf_horizontal_16_sse2(s + 8, pitch, blimit, limit, thresh, bd); } void vpx_highbd_lpf_horizontal_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, flat_op2[16]); DECLARE_ALIGNED(16, uint16_t, flat_op1[16]); DECLARE_ALIGNED(16, uint16_t, flat_op0[16]); DECLARE_ALIGNED(16, uint16_t, flat_oq2[16]); DECLARE_ALIGNED(16, uint16_t, flat_oq1[16]); DECLARE_ALIGNED(16, uint16_t, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); __m128i blimit_v, limit_v, thresh_v; __m128i mask, hev, flat; __m128i p3 = _mm_load_si128((__m128i *)(s - 4 * pitch)); __m128i q3 = _mm_load_si128((__m128i *)(s + 3 * pitch)); __m128i p2 = _mm_load_si128((__m128i *)(s - 3 * pitch)); __m128i q2 = _mm_load_si128((__m128i *)(s + 2 * pitch)); __m128i p1 = _mm_load_si128((__m128i *)(s - 2 * pitch)); __m128i q1 = _mm_load_si128((__m128i *)(s + 1 * pitch)); __m128i p0 = _mm_load_si128((__m128i *)(s - 1 * pitch)); __m128i q0 = _mm_load_si128((__m128i *)(s + 0 * pitch)); const __m128i one = _mm_set1_epi16(1); const __m128i ffff = _mm_cmpeq_epi16(one, one); __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; const __m128i four = _mm_set1_epi16(4); __m128i workp_a, workp_b, workp_shft; const __m128i t4 = _mm_set1_epi16(4); const __m128i t3 = _mm_set1_epi16(3); __m128i t80; const __m128i t1 = _mm_set1_epi16(0x1); __m128i ps1, ps0, qs0, qs1; __m128i filt; __m128i work_a; __m128i filter1, filter2; if (bd == 8) { blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); t80 = _mm_set1_epi16(0x80); } else if (bd == 10) { blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); t80 = _mm_set1_epi16(0x200); } else { // bd == 12 blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); t80 = _mm_set1_epi16(0x800); } ps1 = _mm_subs_epi16(p1, t80); ps0 = _mm_subs_epi16(p0, t80); qs0 = _mm_subs_epi16(q0, t80); qs1 = _mm_subs_epi16(q1, t80); // filter_mask and hev_mask abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1)); abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0)); abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1)); flat = _mm_max_epi16(abs_p1p0, abs_q1q0); hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; // So taking maximums continues to work: mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); mask = _mm_max_epi16(abs_p1p0, mask); // mask |= (abs(p1 - p0) > limit) * -1; mask = _mm_max_epi16(abs_q1q0, mask); // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)), _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2))); mask = _mm_max_epi16(work, mask); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3)), _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // flat_mask4 flat = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p2, p0), _mm_subs_epu16(p0, p2)), _mm_or_si128(_mm_subs_epu16(q2, q0), _mm_subs_epu16(q0, q2))); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p3, p0), _mm_subs_epu16(p0, p3)), _mm_or_si128(_mm_subs_epu16(q3, q0), _mm_subs_epu16(q0, q3))); flat = _mm_max_epi16(work, flat); flat = _mm_max_epi16(abs_p1p0, flat); flat = _mm_max_epi16(abs_q1q0, flat); if (bd == 8) flat = _mm_subs_epu16(flat, one); else if (bd == 10) flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 2)); else // bd == 12 flat = _mm_subs_epu16(flat, _mm_slli_epi16(one, 4)); flat = _mm_cmpeq_epi16(flat, zero); flat = _mm_and_si128(flat, mask); // flat & mask // Added before shift for rounding part of ROUND_POWER_OF_TWO workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_op2[0], workp_shft); workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_op1[0], workp_shft); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_op0[0], workp_shft); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_oq0[0], workp_shft); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_oq1[0], workp_shft); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_store_si128((__m128i *)&flat_oq2[0], workp_shft); // lp filter filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd); filt = _mm_and_si128(filt, hev); work_a = _mm_subs_epi16(qs0, ps0); filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = signed_char_clamp_bd_sse2(filt, bd); filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi16(filt, t4); filter2 = _mm_adds_epi16(filt, t3); // Filter1 >> 3 filter1 = signed_char_clamp_bd_sse2(filter1, bd); filter1 = _mm_srai_epi16(filter1, 3); // Filter2 >> 3 filter2 = signed_char_clamp_bd_sse2(filter2, bd); filter2 = _mm_srai_epi16(filter2, 3); // filt >> 1 filt = _mm_adds_epi16(filter1, t1); filt = _mm_srai_epi16(filt, 1); // filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; filt = _mm_andnot_si128(hev, filt); work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd); work_a = _mm_adds_epi16(work_a, t80); q0 = _mm_load_si128((__m128i *)flat_oq0); work_a = _mm_andnot_si128(flat, work_a); q0 = _mm_and_si128(flat, q0); q0 = _mm_or_si128(work_a, q0); work_a = signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd); work_a = _mm_adds_epi16(work_a, t80); q1 = _mm_load_si128((__m128i *)flat_oq1); work_a = _mm_andnot_si128(flat, work_a); q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_load_si128((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); q2 = _mm_or_si128(work_a, q2); work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd); work_a = _mm_adds_epi16(work_a, t80); p0 = _mm_load_si128((__m128i *)flat_op0); work_a = _mm_andnot_si128(flat, work_a); p0 = _mm_and_si128(flat, p0); p0 = _mm_or_si128(work_a, p0); work_a = signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd); work_a = _mm_adds_epi16(work_a, t80); p1 = _mm_load_si128((__m128i *)flat_op1); work_a = _mm_andnot_si128(flat, work_a); p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_load_si128((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); _mm_store_si128((__m128i *)(s - 3 * pitch), p2); _mm_store_si128((__m128i *)(s - 2 * pitch), p1); _mm_store_si128((__m128i *)(s - 1 * pitch), p0); _mm_store_si128((__m128i *)(s + 0 * pitch), q0); _mm_store_si128((__m128i *)(s + 1 * pitch), q1); _mm_store_si128((__m128i *)(s + 2 * pitch), q2); } void vpx_highbd_lpf_horizontal_8_dual_sse2( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_8_sse2(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_8_sse2(s + 8, pitch, blimit1, limit1, thresh1, bd); } void vpx_highbd_lpf_horizontal_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { const __m128i zero = _mm_set1_epi16(0); __m128i blimit_v, limit_v, thresh_v; __m128i mask, hev, flat; __m128i p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); __m128i p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); __m128i p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); __m128i p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); __m128i q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); __m128i q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); __m128i q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); __m128i q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu16(p1, p0), _mm_subs_epu16(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu16(q1, q0), _mm_subs_epu16(q0, q1)); const __m128i ffff = _mm_cmpeq_epi16(abs_p1p0, abs_p1p0); const __m128i one = _mm_set1_epi16(1); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu16(p0, q0), _mm_subs_epu16(q0, p0)); __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu16(p1, q1), _mm_subs_epu16(q1, p1)); __m128i work; const __m128i t4 = _mm_set1_epi16(4); const __m128i t3 = _mm_set1_epi16(3); __m128i t80; __m128i tff80; __m128i tffe0; __m128i t1f; // equivalent to shifting 0x1f left by bitdepth - 8 // and setting new bits to 1 const __m128i t1 = _mm_set1_epi16(0x1); __m128i t7f; // equivalent to shifting 0x7f left by bitdepth - 8 // and setting new bits to 1 __m128i ps1, ps0, qs0, qs1; __m128i filt; __m128i work_a; __m128i filter1, filter2; if (bd == 8) { blimit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero); limit_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero); thresh_v = _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero); t80 = _mm_set1_epi16(0x80); tff80 = _mm_set1_epi16((int16_t)0xff80); tffe0 = _mm_set1_epi16((int16_t)0xffe0); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 8); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 8); } else if (bd == 10) { blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 2); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 2); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 2); t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), 2); tff80 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xff80), 2); tffe0 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xffe0), 2); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 6); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 6); } else { // bd == 12 blimit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)blimit), zero), 4); limit_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)limit), zero), 4); thresh_v = _mm_slli_epi16( _mm_unpacklo_epi8(_mm_load_si128((const __m128i *)thresh), zero), 4); t80 = _mm_slli_epi16(_mm_set1_epi16(0x80), 4); tff80 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xff80), 4); tffe0 = _mm_slli_epi16(_mm_set1_epi16((int16_t)0xffe0), 4); t1f = _mm_srli_epi16(_mm_set1_epi16(0x1fff), 4); t7f = _mm_srli_epi16(_mm_set1_epi16(0x7fff), 4); } ps1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); ps0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); qs0 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); qs1 = _mm_subs_epi16(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); // filter_mask and hev_mask flat = _mm_max_epi16(abs_p1p0, abs_q1q0); hev = _mm_subs_epu16(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi16(hev, zero), ffff); abs_p0q0 = _mm_adds_epu16(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(abs_p1q1, 1); mask = _mm_subs_epu16(_mm_adds_epu16(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi16(mask, zero), ffff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; // So taking maximums continues to work: mask = _mm_and_si128(mask, _mm_adds_epu16(limit_v, one)); mask = _mm_max_epi16(flat, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(p2, p1), _mm_subs_epu16(p1, p2)), _mm_or_si128(_mm_subs_epu16(p3, p2), _mm_subs_epu16(p2, p3))); mask = _mm_max_epi16(work, mask); work = _mm_max_epi16( _mm_or_si128(_mm_subs_epu16(q2, q1), _mm_subs_epu16(q1, q2)), _mm_or_si128(_mm_subs_epu16(q3, q2), _mm_subs_epu16(q2, q3))); mask = _mm_max_epi16(work, mask); mask = _mm_subs_epu16(mask, limit_v); mask = _mm_cmpeq_epi16(mask, zero); // filter4 filt = signed_char_clamp_bd_sse2(_mm_subs_epi16(ps1, qs1), bd); filt = _mm_and_si128(filt, hev); work_a = _mm_subs_epi16(qs0, ps0); filt = _mm_adds_epi16(filt, work_a); filt = _mm_adds_epi16(filt, work_a); filt = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, work_a), bd); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t4), bd); filter2 = signed_char_clamp_bd_sse2(_mm_adds_epi16(filt, t3), bd); // Filter1 >> 3 work_a = _mm_cmpgt_epi16(zero, filter1); // get the values that are <0 filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, tffe0); // sign bits for the values < 0 filter1 = _mm_and_si128(filter1, t1f); // clamp the range filter1 = _mm_or_si128(filter1, work_a); // reinsert the sign bits // Filter2 >> 3 work_a = _mm_cmpgt_epi16(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, tffe0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); // filt >> 1 filt = _mm_adds_epi16(filter1, t1); work_a = _mm_cmpgt_epi16(zero, filt); filt = _mm_srli_epi16(filt, 1); work_a = _mm_and_si128(work_a, tff80); filt = _mm_and_si128(filt, t7f); filt = _mm_or_si128(filt, work_a); filt = _mm_andnot_si128(hev, filt); q0 = _mm_adds_epi16( signed_char_clamp_bd_sse2(_mm_subs_epi16(qs0, filter1), bd), t80); q1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_subs_epi16(qs1, filt), bd), t80); p0 = _mm_adds_epi16( signed_char_clamp_bd_sse2(_mm_adds_epi16(ps0, filter2), bd), t80); p1 = _mm_adds_epi16(signed_char_clamp_bd_sse2(_mm_adds_epi16(ps1, filt), bd), t80); _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); } void vpx_highbd_lpf_horizontal_4_dual_sse2( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { vpx_highbd_lpf_horizontal_4_sse2(s, pitch, blimit0, limit0, thresh0, bd); vpx_highbd_lpf_horizontal_4_sse2(s + 8, pitch, blimit1, limit1, thresh1, bd); } static INLINE void highbd_transpose(uint16_t *src[], int in_p, uint16_t *dst[], int out_p, int num_8x8_to_transpose) { int idx8x8 = 0; __m128i p0, p1, p2, p3, p4, p5, p6, p7, x0, x1, x2, x3, x4, x5, x6, x7; do { uint16_t *in = src[idx8x8]; uint16_t *out = dst[idx8x8]; p0 = _mm_loadu_si128((__m128i *)(in + 0 * in_p)); // 00 01 02 03 04 05 06 07 p1 = _mm_loadu_si128((__m128i *)(in + 1 * in_p)); // 10 11 12 13 14 15 16 17 p2 = _mm_loadu_si128((__m128i *)(in + 2 * in_p)); // 20 21 22 23 24 25 26 27 p3 = _mm_loadu_si128((__m128i *)(in + 3 * in_p)); // 30 31 32 33 34 35 36 37 p4 = _mm_loadu_si128((__m128i *)(in + 4 * in_p)); // 40 41 42 43 44 45 46 47 p5 = _mm_loadu_si128((__m128i *)(in + 5 * in_p)); // 50 51 52 53 54 55 56 57 p6 = _mm_loadu_si128((__m128i *)(in + 6 * in_p)); // 60 61 62 63 64 65 66 67 p7 = _mm_loadu_si128((__m128i *)(in + 7 * in_p)); // 70 71 72 73 74 75 76 77 // 00 10 01 11 02 12 03 13 x0 = _mm_unpacklo_epi16(p0, p1); // 20 30 21 31 22 32 23 33 x1 = _mm_unpacklo_epi16(p2, p3); // 40 50 41 51 42 52 43 53 x2 = _mm_unpacklo_epi16(p4, p5); // 60 70 61 71 62 72 63 73 x3 = _mm_unpacklo_epi16(p6, p7); // 00 10 20 30 01 11 21 31 x4 = _mm_unpacklo_epi32(x0, x1); // 40 50 60 70 41 51 61 71 x5 = _mm_unpacklo_epi32(x2, x3); // 00 10 20 30 40 50 60 70 x6 = _mm_unpacklo_epi64(x4, x5); // 01 11 21 31 41 51 61 71 x7 = _mm_unpackhi_epi64(x4, x5); _mm_storeu_si128((__m128i *)(out + 0 * out_p), x6); // 00 10 20 30 40 50 60 70 _mm_storeu_si128((__m128i *)(out + 1 * out_p), x7); // 01 11 21 31 41 51 61 71 // 02 12 22 32 03 13 23 33 x4 = _mm_unpackhi_epi32(x0, x1); // 42 52 62 72 43 53 63 73 x5 = _mm_unpackhi_epi32(x2, x3); // 02 12 22 32 42 52 62 72 x6 = _mm_unpacklo_epi64(x4, x5); // 03 13 23 33 43 53 63 73 x7 = _mm_unpackhi_epi64(x4, x5); _mm_storeu_si128((__m128i *)(out + 2 * out_p), x6); // 02 12 22 32 42 52 62 72 _mm_storeu_si128((__m128i *)(out + 3 * out_p), x7); // 03 13 23 33 43 53 63 73 // 04 14 05 15 06 16 07 17 x0 = _mm_unpackhi_epi16(p0, p1); // 24 34 25 35 26 36 27 37 x1 = _mm_unpackhi_epi16(p2, p3); // 44 54 45 55 46 56 47 57 x2 = _mm_unpackhi_epi16(p4, p5); // 64 74 65 75 66 76 67 77 x3 = _mm_unpackhi_epi16(p6, p7); // 04 14 24 34 05 15 25 35 x4 = _mm_unpacklo_epi32(x0, x1); // 44 54 64 74 45 55 65 75 x5 = _mm_unpacklo_epi32(x2, x3); // 04 14 24 34 44 54 64 74 x6 = _mm_unpacklo_epi64(x4, x5); // 05 15 25 35 45 55 65 75 x7 = _mm_unpackhi_epi64(x4, x5); _mm_storeu_si128((__m128i *)(out + 4 * out_p), x6); // 04 14 24 34 44 54 64 74 _mm_storeu_si128((__m128i *)(out + 5 * out_p), x7); // 05 15 25 35 45 55 65 75 // 06 16 26 36 07 17 27 37 x4 = _mm_unpackhi_epi32(x0, x1); // 46 56 66 76 47 57 67 77 x5 = _mm_unpackhi_epi32(x2, x3); // 06 16 26 36 46 56 66 76 x6 = _mm_unpacklo_epi64(x4, x5); // 07 17 27 37 47 57 67 77 x7 = _mm_unpackhi_epi64(x4, x5); _mm_storeu_si128((__m128i *)(out + 6 * out_p), x6); // 06 16 26 36 46 56 66 76 _mm_storeu_si128((__m128i *)(out + 7 * out_p), x7); // 07 17 27 37 47 57 67 77 } while (++idx8x8 < num_8x8_to_transpose); } static INLINE void highbd_transpose8x16(uint16_t *in0, uint16_t *in1, int in_p, uint16_t *out, int out_p) { uint16_t *src0[1]; uint16_t *src1[1]; uint16_t *dest0[1]; uint16_t *dest1[1]; src0[0] = in0; src1[0] = in1; dest0[0] = out; dest1[0] = out + 8; highbd_transpose(src0, in_p, dest0, out_p, 1); highbd_transpose(src1, in_p, dest1, out_p, 1); } void vpx_highbd_lpf_vertical_4_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; // Transpose 8x8 src[0] = s - 4; dst[0] = t_dst; highbd_transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_highbd_lpf_horizontal_4_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; // Transpose back highbd_transpose(src, 8, dst, pitch, 1); } void vpx_highbd_lpf_vertical_4_dual_sse2( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); uint16_t *src[2]; uint16_t *dst[2]; // Transpose 8x16 highbd_transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_highbd_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd); src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; dst[1] = s - 4 + pitch * 8; // Transpose back highbd_transpose(src, 16, dst, pitch, 2); } void vpx_highbd_lpf_vertical_8_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 8]); uint16_t *src[1]; uint16_t *dst[1]; // Transpose 8x8 src[0] = s - 4; dst[0] = t_dst; highbd_transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_highbd_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; dst[0] = s - 4; // Transpose back highbd_transpose(src, 8, dst, pitch, 1); } void vpx_highbd_lpf_vertical_8_dual_sse2( uint16_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[16 * 8]); uint16_t *src[2]; uint16_t *dst[2]; // Transpose 8x16 highbd_transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_highbd_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1, bd); src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; dst[1] = s - 4 + pitch * 8; // Transpose back highbd_transpose(src, 16, dst, pitch, 2); } void vpx_highbd_lpf_vertical_16_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[8 * 16]); uint16_t *src[2]; uint16_t *dst[2]; src[0] = s - 8; src[1] = s; dst[0] = t_dst; dst[1] = t_dst + 8 * 8; // Transpose 16x8 highbd_transpose(src, pitch, dst, 8, 2); // Loop filtering vpx_highbd_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh, bd); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; dst[1] = s; // Transpose back highbd_transpose(src, 8, dst, pitch, 2); } void vpx_highbd_lpf_vertical_16_dual_sse2(uint16_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh, int bd) { DECLARE_ALIGNED(16, uint16_t, t_dst[256]); // Transpose 16x16 highbd_transpose8x16(s - 8, s - 8 + 8 * pitch, pitch, t_dst, 16); highbd_transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16); // Loop filtering vpx_highbd_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh, bd); // Transpose back highbd_transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch); highbd_transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * pitch, pitch); } libvpx-1.8.2/vpx_dsp/x86/highbd_quantize_intrin_sse2.c000066400000000000000000000133211357355204000227720ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_dsp_common.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #if CONFIG_VP9_HIGHBITDEPTH void vpx_highbd_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t count, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { int i, j, non_zero_regs = (int)count / 4, eob_i = -1; __m128i zbins[2]; __m128i nzbins[2]; zbins[0] = _mm_set_epi32((int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[1], (int)zbin_ptr[0]); zbins[1] = _mm_set1_epi32((int)zbin_ptr[1]); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); (void)scan; (void)skip_block; assert(!skip_block); memset(qcoeff_ptr, 0, count * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, count * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = ((int)count / 4) - 1; i >= 0; i--) { __m128i coeffs, cmp1, cmp2; int test; coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]); cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]); cmp1 = _mm_and_si128(cmp1, cmp2); test = _mm_movemask_epi8(cmp1); if (test == 0xffff) non_zero_regs--; else break; } // Quantization pass: for (i = 0; i < non_zero_regs; i++) { __m128i coeffs, coeffs_sign, tmp1, tmp2; int test; int abs_coeff[4]; int coeff_sign[4]; coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); coeffs_sign = _mm_srai_epi32(coeffs, 31); coeffs = _mm_sub_epi32(_mm_xor_si128(coeffs, coeffs_sign), coeffs_sign); tmp1 = _mm_cmpgt_epi32(coeffs, zbins[i != 0]); tmp2 = _mm_cmpeq_epi32(coeffs, zbins[i != 0]); tmp1 = _mm_or_si128(tmp1, tmp2); test = _mm_movemask_epi8(tmp1); _mm_storeu_si128((__m128i *)abs_coeff, coeffs); _mm_storeu_si128((__m128i *)coeff_sign, coeffs_sign); for (j = 0; j < 4; j++) { if (test & (1 << (4 * j))) { int k = 4 * i + j; const int64_t tmp3 = abs_coeff[j] + round_ptr[k != 0]; const int64_t tmp4 = ((tmp3 * quant_ptr[k != 0]) >> 16) + tmp3; const uint32_t abs_qcoeff = (uint32_t)((tmp4 * quant_shift_ptr[k != 0]) >> 16); qcoeff_ptr[k] = (int)(abs_qcoeff ^ coeff_sign[j]) - coeff_sign[j]; dqcoeff_ptr[k] = qcoeff_ptr[k] * dequant_ptr[k != 0]; if (abs_qcoeff) eob_i = iscan[k] > eob_i ? iscan[k] : eob_i; } } } *eob_ptr = eob_i + 1; } void vpx_highbd_quantize_b_32x32_sse2( const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { __m128i zbins[2]; __m128i nzbins[2]; int idx = 0; int idx_arr[1024]; int i, eob = -1; const int zbin0_tmp = ROUND_POWER_OF_TWO(zbin_ptr[0], 1); const int zbin1_tmp = ROUND_POWER_OF_TWO(zbin_ptr[1], 1); (void)scan; (void)skip_block; assert(!skip_block); zbins[0] = _mm_set_epi32(zbin1_tmp, zbin1_tmp, zbin1_tmp, zbin0_tmp); zbins[1] = _mm_set1_epi32(zbin1_tmp); nzbins[0] = _mm_setzero_si128(); nzbins[1] = _mm_setzero_si128(); nzbins[0] = _mm_sub_epi32(nzbins[0], zbins[0]); nzbins[1] = _mm_sub_epi32(nzbins[1], zbins[1]); memset(qcoeff_ptr, 0, n_coeffs * sizeof(*qcoeff_ptr)); memset(dqcoeff_ptr, 0, n_coeffs * sizeof(*dqcoeff_ptr)); // Pre-scan pass for (i = 0; i < n_coeffs / 4; i++) { __m128i coeffs, cmp1, cmp2; int test; coeffs = _mm_load_si128((const __m128i *)(coeff_ptr + i * 4)); cmp1 = _mm_cmplt_epi32(coeffs, zbins[i != 0]); cmp2 = _mm_cmpgt_epi32(coeffs, nzbins[i != 0]); cmp1 = _mm_and_si128(cmp1, cmp2); test = _mm_movemask_epi8(cmp1); if (!(test & 0xf)) idx_arr[idx++] = i * 4; if (!(test & 0xf0)) idx_arr[idx++] = i * 4 + 1; if (!(test & 0xf00)) idx_arr[idx++] = i * 4 + 2; if (!(test & 0xf000)) idx_arr[idx++] = i * 4 + 3; } // Quantization pass: only process the coefficients selected in // pre-scan pass. Note: idx can be zero. for (i = 0; i < idx; i++) { const int rc = idx_arr[i]; const int coeff = coeff_ptr[rc]; const int coeff_sign = (coeff >> 31); const int abs_coeff = (coeff ^ coeff_sign) - coeff_sign; const int64_t tmp1 = abs_coeff + ROUND_POWER_OF_TWO(round_ptr[rc != 0], 1); const int64_t tmp2 = ((tmp1 * quant_ptr[rc != 0]) >> 16) + tmp1; const uint32_t abs_qcoeff = (uint32_t)((tmp2 * quant_shift_ptr[rc != 0]) >> 15); qcoeff_ptr[rc] = (int)(abs_qcoeff ^ coeff_sign) - coeff_sign; dqcoeff_ptr[rc] = qcoeff_ptr[rc] * dequant_ptr[rc != 0] / 2; if (abs_qcoeff) eob = iscan[idx_arr[i]] > eob ? iscan[idx_arr[i]] : eob; } *eob_ptr = eob + 1; } #endif libvpx-1.8.2/vpx_dsp/x86/highbd_sad4d_sse2.asm000066400000000000000000000216541357355204000211140ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text ; HIGH_PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_4x2x4 5-6 0 movh m0, [srcq +%2*2] %if %1 == 1 movu m4, [ref1q+%3*2] movu m5, [ref2q+%3*2] movu m6, [ref3q+%3*2] movu m7, [ref4q+%3*2] movhps m0, [srcq +%4*2] movhps m4, [ref1q+%5*2] movhps m5, [ref2q+%5*2] movhps m6, [ref3q+%5*2] movhps m7, [ref4q+%5*2] mova m3, m0 mova m2, m0 psubusw m3, m4 psubusw m2, m5 psubusw m4, m0 psubusw m5, m0 por m4, m3 por m5, m2 pmaddwd m4, m1 pmaddwd m5, m1 mova m3, m0 mova m2, m0 psubusw m3, m6 psubusw m2, m7 psubusw m6, m0 psubusw m7, m0 por m6, m3 por m7, m2 pmaddwd m6, m1 pmaddwd m7, m1 %else movu m2, [ref1q+%3*2] movhps m0, [srcq +%4*2] movhps m2, [ref1q+%5*2] mova m3, m0 psubusw m3, m2 psubusw m2, m0 por m2, m3 pmaddwd m2, m1 paddd m4, m2 movu m2, [ref2q+%3*2] mova m3, m0 movhps m2, [ref2q+%5*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 pmaddwd m2, m1 paddd m5, m2 movu m2, [ref3q+%3*2] mova m3, m0 movhps m2, [ref3q+%5*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 pmaddwd m2, m1 paddd m6, m2 movu m2, [ref4q+%3*2] mova m3, m0 movhps m2, [ref4q+%5*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 pmaddwd m2, m1 paddd m7, m2 %endif %if %6 == 1 lea srcq, [srcq +src_strideq*4] lea ref1q, [ref1q+ref_strideq*4] lea ref2q, [ref2q+ref_strideq*4] lea ref3q, [ref3q+ref_strideq*4] lea ref4q, [ref4q+ref_strideq*4] %endif %endmacro ; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_8x2x4 5-6 0 ; 1st 8 px mova m0, [srcq +%2*2] %if %1 == 1 movu m4, [ref1q+%3*2] movu m5, [ref2q+%3*2] movu m6, [ref3q+%3*2] movu m7, [ref4q+%3*2] mova m3, m0 mova m2, m0 psubusw m3, m4 psubusw m2, m5 psubusw m4, m0 psubusw m5, m0 por m4, m3 por m5, m2 pmaddwd m4, m1 pmaddwd m5, m1 mova m3, m0 mova m2, m0 psubusw m3, m6 psubusw m2, m7 psubusw m6, m0 psubusw m7, m0 por m6, m3 por m7, m2 pmaddwd m6, m1 pmaddwd m7, m1 %else mova m3, m0 movu m2, [ref1q+%3*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m4, m2 movu m2, [ref2q+%3*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m5, m2 movu m2, [ref3q+%3*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m6, m2 movu m2, [ref4q+%3*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 pmaddwd m2, m1 paddd m7, m2 %endif ; 2nd 8 px mova m0, [srcq +(%4)*2] mova m3, m0 movu m2, [ref1q+(%5)*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m4, m2 movu m2, [ref2q+(%5)*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m5, m2 movu m2, [ref3q+(%5)*2] psubusw m3, m2 psubusw m2, m0 por m2, m3 mova m3, m0 pmaddwd m2, m1 paddd m6, m2 movu m2, [ref4q+(%5)*2] psubusw m3, m2 psubusw m2, m0 %if %6 == 1 lea srcq, [srcq +src_strideq*4] lea ref1q, [ref1q+ref_strideq*4] lea ref2q, [ref2q+ref_strideq*4] lea ref3q, [ref3q+ref_strideq*4] lea ref4q, [ref4q+ref_strideq*4] %endif por m2, m3 pmaddwd m2, m1 paddd m7, m2 %endmacro ; HIGH_PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_16x2x4 5-6 0 HIGH_PROCESS_8x2x4 %1, %2, %3, (%2 + 8), (%3 + 8) HIGH_PROCESS_8x2x4 0, %4, %5, (%4 + 8), (%5 + 8), %6 %endmacro ; HIGH_PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_32x2x4 5-6 0 HIGH_PROCESS_16x2x4 %1, %2, %3, (%2 + 16), (%3 + 16) HIGH_PROCESS_16x2x4 0, %4, %5, (%4 + 16), (%5 + 16), %6 %endmacro ; HIGH_PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro HIGH_PROCESS_64x2x4 5-6 0 HIGH_PROCESS_32x2x4 %1, %2, %3, (%2 + 32), (%3 + 32) HIGH_PROCESS_32x2x4 0, %4, %5, (%4 + 32), (%5 + 32), %6 %endmacro ; void vpx_highbd_sadNxNx4d_sse2(uint8_t *src, int src_stride, ; uint8_t *ref[4], int ref_stride, ; uint32_t res[4]); ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16 or 8x8 %macro HIGH_SADNXN4D 2 %if UNIX64 cglobal highbd_sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ res, ref2, ref3, ref4 %else cglobal highbd_sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ ref2, ref3, ref4 %endif ; set m1 push srcq mov srcd, 0x00010001 movd m1, srcd pshufd m1, m1, 0x0 pop srcq movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided mov ref2q, [ref1q+gprsize*1] mov ref3q, [ref1q+gprsize*2] mov ref4q, [ref1q+gprsize*3] mov ref1q, [ref1q+gprsize*0] ; convert byte pointers to short pointers shl srcq, 1 shl ref2q, 1 shl ref3q, 1 shl ref4q, 1 shl ref1q, 1 HIGH_PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 %rep (%2-4)/2 HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 %endrep HIGH_PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 ; N.B. HIGH_PROCESS outputs dwords (32 bits) ; so in high bit depth even the smallest width (4) needs 128bits i.e. XMM movhlps m0, m4 movhlps m1, m5 movhlps m2, m6 movhlps m3, m7 paddd m4, m0 paddd m5, m1 paddd m6, m2 paddd m7, m3 punpckldq m4, m5 punpckldq m6, m7 movhlps m0, m4 movhlps m1, m6 paddd m4, m0 paddd m6, m1 punpcklqdq m4, m6 movifnidn r4, r4mp movu [r4], m4 RET %endmacro INIT_XMM sse2 HIGH_SADNXN4D 64, 64 HIGH_SADNXN4D 64, 32 HIGH_SADNXN4D 32, 64 HIGH_SADNXN4D 32, 32 HIGH_SADNXN4D 32, 16 HIGH_SADNXN4D 16, 32 HIGH_SADNXN4D 16, 16 HIGH_SADNXN4D 16, 8 HIGH_SADNXN4D 8, 16 HIGH_SADNXN4D 8, 8 HIGH_SADNXN4D 8, 4 HIGH_SADNXN4D 4, 8 HIGH_SADNXN4D 4, 4 libvpx-1.8.2/vpx_dsp/x86/highbd_sad_sse2.asm000066400000000000000000000272121357355204000206600ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text %macro HIGH_SAD_FN 4 %if %4 == 0 %if %3 == 5 cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, n_rows %else ; %3 == 7 cglobal highbd_sad%1x%2, 4, %3, 7, src, src_stride, ref, ref_stride, \ src_stride3, ref_stride3, n_rows %endif ; %3 == 5/7 %else ; avg %if %3 == 5 cglobal highbd_sad%1x%2_avg, 5, 1 + %3, 7, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 cglobal highbd_sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 7, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 %if VPX_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m %endif ; x86-32/64 %endif ; %3 == 5/7 %endif ; avg/sad movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided %if %3 == 7 lea src_stride3q, [src_strideq*3] lea ref_stride3q, [ref_strideq*3] %endif ; %3 == 7 ; convert src, ref & second_pred to short ptrs (from byte ptrs) shl srcq, 1 shl refq, 1 %if %4 == 1 shl second_predq, 1 %endif %endmacro ; unsigned int vpx_highbd_sad64x{16,32,64}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD64XN 1-2 0 HIGH_SAD_FN 64, %1, 5, %2 mov n_rowsd, %1 pxor m0, m0 pxor m6, m6 .loop: ; first half of each row movu m1, [refq] movu m2, [refq+16] movu m3, [refq+32] movu m4, [refq+48] %if %2 == 1 pavgw m1, [second_predq+mmsize*0] pavgw m2, [second_predq+mmsize*1] pavgw m3, [second_predq+mmsize*2] pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif mova m5, [srcq] psubusw m5, m1 psubusw m1, [srcq] por m1, m5 mova m5, [srcq+16] psubusw m5, m2 psubusw m2, [srcq+16] por m2, m5 mova m5, [srcq+32] psubusw m5, m3 psubusw m3, [srcq+32] por m3, m5 mova m5, [srcq+48] psubusw m5, m4 psubusw m4, [srcq+48] por m4, m5 paddw m1, m2 paddw m3, m4 movhlps m2, m1 movhlps m4, m3 paddw m1, m2 paddw m3, m4 punpcklwd m1, m6 punpcklwd m3, m6 paddd m0, m1 paddd m0, m3 ; second half of each row movu m1, [refq+64] movu m2, [refq+80] movu m3, [refq+96] movu m4, [refq+112] %if %2 == 1 pavgw m1, [second_predq+mmsize*0] pavgw m2, [second_predq+mmsize*1] pavgw m3, [second_predq+mmsize*2] pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif mova m5, [srcq+64] psubusw m5, m1 psubusw m1, [srcq+64] por m1, m5 mova m5, [srcq+80] psubusw m5, m2 psubusw m2, [srcq+80] por m2, m5 mova m5, [srcq+96] psubusw m5, m3 psubusw m3, [srcq+96] por m3, m5 mova m5, [srcq+112] psubusw m5, m4 psubusw m4, [srcq+112] por m4, m5 paddw m1, m2 paddw m3, m4 movhlps m2, m1 movhlps m4, m3 paddw m1, m2 paddw m3, m4 punpcklwd m1, m6 punpcklwd m3, m6 lea refq, [refq+ref_strideq*2] paddd m0, m1 lea srcq, [srcq+src_strideq*2] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 punpckldq m0, m6 movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 HIGH_SAD64XN 64 ; highbd_sad64x64_sse2 HIGH_SAD64XN 32 ; highbd_sad64x32_sse2 HIGH_SAD64XN 64, 1 ; highbd_sad64x64_avg_sse2 HIGH_SAD64XN 32, 1 ; highbd_sad64x32_avg_sse2 ; unsigned int vpx_highbd_sad32x{16,32,64}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD32XN 1-2 0 HIGH_SAD_FN 32, %1, 5, %2 mov n_rowsd, %1 pxor m0, m0 pxor m6, m6 .loop: movu m1, [refq] movu m2, [refq+16] movu m3, [refq+32] movu m4, [refq+48] %if %2 == 1 pavgw m1, [second_predq+mmsize*0] pavgw m2, [second_predq+mmsize*1] pavgw m3, [second_predq+mmsize*2] pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif mova m5, [srcq] psubusw m5, m1 psubusw m1, [srcq] por m1, m5 mova m5, [srcq+16] psubusw m5, m2 psubusw m2, [srcq+16] por m2, m5 mova m5, [srcq+32] psubusw m5, m3 psubusw m3, [srcq+32] por m3, m5 mova m5, [srcq+48] psubusw m5, m4 psubusw m4, [srcq+48] por m4, m5 paddw m1, m2 paddw m3, m4 movhlps m2, m1 movhlps m4, m3 paddw m1, m2 paddw m3, m4 punpcklwd m1, m6 punpcklwd m3, m6 lea refq, [refq+ref_strideq*2] paddd m0, m1 lea srcq, [srcq+src_strideq*2] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 punpckldq m0, m6 movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 HIGH_SAD32XN 64 ; highbd_sad32x64_sse2 HIGH_SAD32XN 32 ; highbd_sad32x32_sse2 HIGH_SAD32XN 16 ; highbd_sad32x16_sse2 HIGH_SAD32XN 64, 1 ; highbd_sad32x64_avg_sse2 HIGH_SAD32XN 32, 1 ; highbd_sad32x32_avg_sse2 HIGH_SAD32XN 16, 1 ; highbd_sad32x16_avg_sse2 ; unsigned int vpx_highbd_sad16x{8,16,32}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD16XN 1-2 0 HIGH_SAD_FN 16, %1, 5, %2 mov n_rowsd, %1/2 pxor m0, m0 pxor m6, m6 .loop: movu m1, [refq] movu m2, [refq+16] movu m3, [refq+ref_strideq*2] movu m4, [refq+ref_strideq*2+16] %if %2 == 1 pavgw m1, [second_predq+mmsize*0] pavgw m2, [second_predq+16] pavgw m3, [second_predq+mmsize*2] pavgw m4, [second_predq+mmsize*2+16] lea second_predq, [second_predq+mmsize*4] %endif mova m5, [srcq] psubusw m5, m1 psubusw m1, [srcq] por m1, m5 mova m5, [srcq+16] psubusw m5, m2 psubusw m2, [srcq+16] por m2, m5 mova m5, [srcq+src_strideq*2] psubusw m5, m3 psubusw m3, [srcq+src_strideq*2] por m3, m5 mova m5, [srcq+src_strideq*2+16] psubusw m5, m4 psubusw m4, [srcq+src_strideq*2+16] por m4, m5 paddw m1, m2 paddw m3, m4 movhlps m2, m1 movhlps m4, m3 paddw m1, m2 paddw m3, m4 punpcklwd m1, m6 punpcklwd m3, m6 lea refq, [refq+ref_strideq*4] paddd m0, m1 lea srcq, [srcq+src_strideq*4] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 punpckldq m0, m6 movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 HIGH_SAD16XN 32 ; highbd_sad16x32_sse2 HIGH_SAD16XN 16 ; highbd_sad16x16_sse2 HIGH_SAD16XN 8 ; highbd_sad16x8_sse2 HIGH_SAD16XN 32, 1 ; highbd_sad16x32_avg_sse2 HIGH_SAD16XN 16, 1 ; highbd_sad16x16_avg_sse2 HIGH_SAD16XN 8, 1 ; highbd_sad16x8_avg_sse2 ; unsigned int vpx_highbd_sad8x{4,8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro HIGH_SAD8XN 1-2 0 HIGH_SAD_FN 8, %1, 7, %2 mov n_rowsd, %1/4 pxor m0, m0 pxor m6, m6 .loop: movu m1, [refq] movu m2, [refq+ref_strideq*2] movu m3, [refq+ref_strideq*4] movu m4, [refq+ref_stride3q*2] %if %2 == 1 pavgw m1, [second_predq+mmsize*0] pavgw m2, [second_predq+mmsize*1] pavgw m3, [second_predq+mmsize*2] pavgw m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif mova m5, [srcq] psubusw m5, m1 psubusw m1, [srcq] por m1, m5 mova m5, [srcq+src_strideq*2] psubusw m5, m2 psubusw m2, [srcq+src_strideq*2] por m2, m5 mova m5, [srcq+src_strideq*4] psubusw m5, m3 psubusw m3, [srcq+src_strideq*4] por m3, m5 mova m5, [srcq+src_stride3q*2] psubusw m5, m4 psubusw m4, [srcq+src_stride3q*2] por m4, m5 paddw m1, m2 paddw m3, m4 movhlps m2, m1 movhlps m4, m3 paddw m1, m2 paddw m3, m4 punpcklwd m1, m6 punpcklwd m3, m6 lea refq, [refq+ref_strideq*8] paddd m0, m1 lea srcq, [srcq+src_strideq*8] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 punpckldq m0, m6 movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 HIGH_SAD8XN 16 ; highbd_sad8x16_sse2 HIGH_SAD8XN 8 ; highbd_sad8x8_sse2 HIGH_SAD8XN 4 ; highbd_sad8x4_sse2 HIGH_SAD8XN 16, 1 ; highbd_sad8x16_avg_sse2 HIGH_SAD8XN 8, 1 ; highbd_sad8x8_avg_sse2 HIGH_SAD8XN 4, 1 ; highbd_sad8x4_avg_sse2 libvpx-1.8.2/vpx_dsp/x86/highbd_subpel_variance_impl_sse2.asm000066400000000000000000000762301357355204000243000ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pw_8: times 8 dw 8 bilin_filter_m_sse2: times 8 dw 16 times 8 dw 0 times 8 dw 14 times 8 dw 2 times 8 dw 12 times 8 dw 4 times 8 dw 10 times 8 dw 6 times 16 dw 8 times 8 dw 6 times 8 dw 10 times 8 dw 4 times 8 dw 12 times 8 dw 2 times 8 dw 14 SECTION .text ; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride, ; int x_offset, int y_offset, ; const uint8_t *ref, ptrdiff_t ref_stride, ; int height, unsigned int *sse); ; ; This function returns the SE and stores SSE in the given pointer. %macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse psubw %3, %4 psubw %1, %2 mova %4, %3 ; make copies to manipulate to calc sum mova %2, %1 ; use originals for calc sse pmaddwd %3, %3 paddw %4, %2 pmaddwd %1, %1 movhlps %2, %4 paddd %6, %3 paddw %4, %2 pxor %2, %2 pcmpgtw %2, %4 ; mask for 0 > %4 (sum) punpcklwd %4, %2 ; sign-extend word to dword paddd %6, %1 paddd %5, %4 %endmacro %macro STORE_AND_RET 0 %if mmsize == 16 ; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit ; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg. ; We have to sign-extend it before adding the words within the register ; and outputing to a dword. movhlps m3, m7 movhlps m4, m6 paddd m7, m3 paddd m6, m4 pshufd m3, m7, 0x1 pshufd m4, m6, 0x1 paddd m7, m3 paddd m6, m4 mov r1, ssem ; r1 = unsigned int *sse movd [r1], m7 ; store sse movd eax, m6 ; store sum as return value %endif RET %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp add srcq, src_stridemp %else lea srcq, [srcq + src_strideq*2] %endif %endmacro %macro SUBPEL_VARIANCE 1-2 0 ; W %define bilin_filter_m bilin_filter_m_sse2 %define filter_idx_shift 5 %if VPX_ARCH_X86_64 %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, \ second_pred, second_stride, height, sse %define second_str second_strideq %else cglobal highbd_sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, height, sse %endif %define block_height heightd %define bilin_filter sseq %else %if CONFIG_PIC=1 %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, \ second_pred, second_stride, height, sse %define block_height dword heightm %define second_str second_stridemp %else cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, height, sse %define block_height heightd %endif ; reuse argument stack space %define g_bilin_filterm x_offsetm %define g_pw_8m y_offsetm ; Store bilin_filter and pw_8 location in stack %if GET_GOT_DEFINED == 1 GET_GOT eax add esp, 4 ; restore esp %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx LOAD_IF_USED 0, 1 ; load eax, ecx back %else %if %2 == 1 ; avg cglobal highbd_sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, \ second_pred, second_stride, height, sse %define block_height dword heightm %define second_str second_stridemp %else cglobal highbd_sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, height, sse %define block_height heightd %endif %define bilin_filter bilin_filter_m %endif %endif ASSERT %1 <= 16 ; m6 overflows if w > 16 pxor m6, m6 ; sum pxor m7, m7 ; sse %if %1 < 16 sar block_height, 1 %endif %if %2 == 1 ; avg shl second_str, 1 %endif ; FIXME(rbultje) replace by jumptable? test x_offsetd, x_offsetd jnz .x_nonzero ; x_offset == 0 test y_offsetd, y_offsetd jnz .x_zero_y_nonzero ; x_offset == 0 && y_offset == 0 .x_zero_y_zero_loop: %if %1 == 16 movu m0, [srcq] movu m2, [srcq + 16] mova m1, [refq] mova m3, [refq + 16] %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m2, [second_predq+16] %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq + src_strideq*2] mova m1, [refq] mova m3, [refq + ref_strideq*2] %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m2, [second_predq] %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_zero_y_zero_loop STORE_AND_RET .x_zero_y_nonzero: cmp y_offsetd, 8 jne .x_zero_y_nonhalf ; x_offset == 0 && y_offset == 0.5 .x_zero_y_half_loop: %if %1 == 16 movu m0, [srcq] movu m1, [srcq+16] movu m4, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*2+16] mova m2, [refq] mova m3, [refq+16] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*4] mova m2, [refq] mova m3, [refq+ref_strideq*2] pavgw m0, m1 pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_zero_y_half_loop STORE_AND_RET .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif .x_zero_y_other_loop: %if %1 == 16 movu m0, [srcq] movu m1, [srcq + 16] movu m4, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*2+16] mova m2, [refq] mova m3, [refq+16] ; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can ; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of ; instructions is the same (5), but it is 1 mul instead of 2, so might be ; slightly faster because of pmullw latency. It would also cut our rodata ; tables in half for this function, and save 1-2 registers on x86-64. pmullw m1, filter_y_a pmullw m5, filter_y_b paddw m1, filter_rnd pmullw m0, filter_y_a pmullw m4, filter_y_b paddw m0, filter_rnd paddw m1, m5 paddw m0, m4 psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m5, [srcq+src_strideq*4] mova m4, m1 mova m2, [refq] mova m3, [refq+ref_strideq*2] pmullw m1, filter_y_a pmullw m5, filter_y_b paddw m1, filter_rnd pmullw m0, filter_y_a pmullw m4, filter_y_b paddw m0, filter_rnd paddw m1, m5 paddw m0, m4 psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_zero_y_other_loop %undef filter_y_a %undef filter_y_b %undef filter_rnd STORE_AND_RET .x_nonzero: cmp x_offsetd, 8 jne .x_nonhalf ; x_offset == 0.5 test y_offsetd, y_offsetd jnz .x_half_y_nonzero ; x_offset == 0.5 && y_offset == 0 .x_half_y_zero_loop: %if %1 == 16 movu m0, [srcq] movu m1, [srcq + 16] movu m4, [srcq + 2] movu m5, [srcq + 18] mova m2, [refq] mova m3, [refq + 16] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq + src_strideq*2] movu m4, [srcq + 2] movu m5, [srcq + src_strideq*2 + 2] mova m2, [refq] mova m3, [refq + ref_strideq*2] pavgw m0, m4 pavgw m1, m5 %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m1, [second_predq] %endif SUM_SSE m0, m2, m1, m3, m6, m7 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_half_y_zero_loop STORE_AND_RET .x_half_y_nonzero: cmp y_offsetd, 8 jne .x_half_y_nonhalf ; x_offset == 0.5 && y_offset == 0.5 %if %1 == 16 movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+2] movu m3, [srcq+18] lea srcq, [srcq + src_strideq*2] pavgw m0, m2 pavgw m1, m3 .x_half_y_half_loop: movu m2, [srcq] movu m3, [srcq + 16] movu m4, [srcq + 2] movu m5, [srcq + 18] pavgw m2, m4 pavgw m3, m5 pavgw m0, m2 pavgw m1, m3 mova m4, [refq] mova m5, [refq + 16] %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 mova m0, m2 mova m1, m3 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq+2] lea srcq, [srcq + src_strideq*2] pavgw m0, m2 .x_half_y_half_loop: movu m2, [srcq] movu m3, [srcq + src_strideq*2] movu m4, [srcq + 2] movu m5, [srcq + src_strideq*2 + 2] pavgw m2, m4 pavgw m3, m5 pavgw m0, m2 pavgw m2, m3 mova m4, [refq] mova m5, [refq + ref_strideq*2] %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m2, [second_predq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_half_y_half_loop STORE_AND_RET .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+y_offsetq] mova m9, [bilin_filter+y_offsetq+16] mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86_32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif %if %1 == 16 movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+2] movu m3, [srcq+18] lea srcq, [srcq + src_strideq*2] pavgw m0, m2 pavgw m1, m3 .x_half_y_other_loop: movu m2, [srcq] movu m3, [srcq+16] movu m4, [srcq+2] movu m5, [srcq+18] pavgw m2, m4 pavgw m3, m5 mova m4, m2 mova m5, m3 pmullw m1, filter_y_a pmullw m3, filter_y_b paddw m1, filter_rnd paddw m1, m3 pmullw m0, filter_y_a pmullw m2, filter_y_b paddw m0, filter_rnd psrlw m1, 4 paddw m0, m2 mova m2, [refq] psrlw m0, 4 mova m3, [refq+16] %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 mova m0, m4 mova m1, m5 lea srcq, [srcq + src_strideq*2] lea refq, [refq + ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq+2] lea srcq, [srcq + src_strideq*2] pavgw m0, m2 .x_half_y_other_loop: movu m2, [srcq] movu m3, [srcq+src_strideq*2] movu m4, [srcq+2] movu m5, [srcq+src_strideq*2+2] pavgw m2, m4 pavgw m3, m5 mova m4, m2 mova m5, m3 pmullw m4, filter_y_a pmullw m3, filter_y_b paddw m4, filter_rnd paddw m4, m3 pmullw m0, filter_y_a pmullw m2, filter_y_b paddw m0, filter_rnd psrlw m4, 4 paddw m0, m2 mova m2, [refq] psrlw m0, 4 mova m3, [refq+ref_strideq*2] %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m4, [second_predq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 lea srcq, [srcq + src_strideq*4] lea refq, [refq + ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_half_y_other_loop %undef filter_y_a %undef filter_y_b %undef filter_rnd STORE_AND_RET .x_nonhalf: test y_offsetd, y_offsetd jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif .x_other_y_zero_loop: %if %1 == 16 movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+2] movu m3, [srcq+18] mova m4, [refq] mova m5, [refq+16] pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd paddw m1, m3 paddw m0, m2 psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m1, [srcq+src_strideq*2] movu m2, [srcq+2] movu m3, [srcq+src_strideq*2+2] mova m4, [refq] mova m5, [refq+ref_strideq*2] pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd paddw m1, m3 paddw m0, m2 psrlw m1, 4 psrlw m0, 4 %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m1, [second_predq] %endif SUM_SSE m0, m4, m1, m5, m6, m7 lea srcq, [srcq+src_strideq*4] lea refq, [refq+ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_other_y_zero_loop %undef filter_x_a %undef filter_x_b %undef filter_rnd STORE_AND_RET .x_nonhalf_y_nonzero: cmp y_offsetd, 8 jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif %if %1 == 16 movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+2] movu m3, [srcq+18] pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd paddw m0, m2 paddw m1, m3 psrlw m0, 4 psrlw m1, 4 lea srcq, [srcq+src_strideq*2] .x_other_y_half_loop: movu m2, [srcq] movu m3, [srcq+16] movu m4, [srcq+2] movu m5, [srcq+18] pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m2, filter_rnd pmullw m3, filter_x_a pmullw m5, filter_x_b paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 mova m4, [refq] mova m5, [refq+16] psrlw m2, 4 psrlw m3, 4 pavgw m0, m2 pavgw m1, m3 %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m4, m1, m5, m6, m7 mova m0, m2 mova m1, m3 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq+2] pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd paddw m0, m2 psrlw m0, 4 lea srcq, [srcq+src_strideq*2] .x_other_y_half_loop: movu m2, [srcq] movu m3, [srcq+src_strideq*2] movu m4, [srcq+2] movu m5, [srcq+src_strideq*2+2] pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m2, filter_rnd pmullw m3, filter_x_a pmullw m5, filter_x_b paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 mova m4, [refq] mova m5, [refq+ref_strideq*2] psrlw m2, 4 psrlw m3, 4 pavgw m0, m2 pavgw m2, m3 %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m2, [second_predq] %endif SUM_SSE m0, m4, m2, m5, m6, m7 mova m0, m3 lea srcq, [srcq+src_strideq*4] lea refq, [refq+ref_strideq*4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_other_y_half_loop %undef filter_x_a %undef filter_x_b %undef filter_rnd STORE_AND_RET .x_nonhalf_y_nonhalf: ; loading filter - this is same as in 8-bit depth %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift ; filter_idx_shift = 5 shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && mmsize == 16 mova m8, [bilin_filter+x_offsetq] mova m9, [bilin_filter+x_offsetq+16] mova m10, [bilin_filter+y_offsetq] mova m11, [bilin_filter+y_offsetq+16] mova m12, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_y_a m10 %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq mov tempq, g_bilin_filterm add x_offsetq, tempq add y_offsetq, tempq %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter add y_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif ; end of load filter ; x_offset == bilin interpolation && y_offset == bilin interpolation %if %1 == 16 movu m0, [srcq] movu m2, [srcq+2] movu m1, [srcq+16] movu m3, [srcq+18] pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd pmullw m1, filter_x_a pmullw m3, filter_x_b paddw m1, filter_rnd paddw m0, m2 paddw m1, m3 psrlw m0, 4 psrlw m1, 4 INC_SRC_BY_SRC_STRIDE .x_other_y_other_loop: movu m2, [srcq] movu m4, [srcq+2] movu m3, [srcq+16] movu m5, [srcq+18] pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m2, filter_rnd pmullw m3, filter_x_a pmullw m5, filter_x_b paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 psrlw m2, 4 psrlw m3, 4 mova m4, m2 mova m5, m3 pmullw m0, filter_y_a pmullw m2, filter_y_b paddw m0, filter_rnd pmullw m1, filter_y_a pmullw m3, filter_y_b paddw m0, m2 paddw m1, filter_rnd mova m2, [refq] paddw m1, m3 psrlw m0, 4 psrlw m1, 4 mova m3, [refq+16] %if %2 == 1 ; avg pavgw m0, [second_predq] pavgw m1, [second_predq+16] %endif SUM_SSE m0, m2, m1, m3, m6, m7 mova m0, m4 mova m1, m5 INC_SRC_BY_SRC_STRIDE lea refq, [refq + ref_strideq * 2] %if %2 == 1 ; avg add second_predq, second_str %endif %else ; %1 < 16 movu m0, [srcq] movu m2, [srcq+2] pmullw m0, filter_x_a pmullw m2, filter_x_b paddw m0, filter_rnd paddw m0, m2 psrlw m0, 4 INC_SRC_BY_SRC_STRIDE .x_other_y_other_loop: movu m2, [srcq] movu m4, [srcq+2] INC_SRC_BY_SRC_STRIDE movu m3, [srcq] movu m5, [srcq+2] pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m2, filter_rnd pmullw m3, filter_x_a pmullw m5, filter_x_b paddw m3, filter_rnd paddw m2, m4 paddw m3, m5 psrlw m2, 4 psrlw m3, 4 mova m4, m2 mova m5, m3 pmullw m0, filter_y_a pmullw m2, filter_y_b paddw m0, filter_rnd pmullw m4, filter_y_a pmullw m3, filter_y_b paddw m0, m2 paddw m4, filter_rnd mova m2, [refq] paddw m4, m3 psrlw m0, 4 psrlw m4, 4 mova m3, [refq+ref_strideq*2] %if %2 == 1 ; avg pavgw m0, [second_predq] add second_predq, second_str pavgw m4, [second_predq] %endif SUM_SSE m0, m2, m4, m3, m6, m7 mova m0, m5 INC_SRC_BY_SRC_STRIDE lea refq, [refq + ref_strideq * 4] %if %2 == 1 ; avg add second_predq, second_str %endif %endif dec block_height jg .x_other_y_other_loop %undef filter_x_a %undef filter_x_b %undef filter_y_a %undef filter_y_b %undef filter_rnd STORE_AND_RET %endmacro INIT_XMM sse2 SUBPEL_VARIANCE 8 SUBPEL_VARIANCE 16 INIT_XMM sse2 SUBPEL_VARIANCE 8, 1 SUBPEL_VARIANCE 16, 1 libvpx-1.8.2/vpx_dsp/x86/highbd_variance_impl_sse2.asm000066400000000000000000000233111357355204000227160ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" SECTION .text ;unsigned int vpx_highbd_calc16x16var_sse2 ;( ; unsigned char * src_ptr, ; int src_stride, ; unsigned char * ref_ptr, ; int ref_stride, ; unsigned int * SSE, ; int * Sum ;) global sym(vpx_highbd_calc16x16var_sse2) PRIVATE sym(vpx_highbd_calc16x16var_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rbx push rsi push rdi ; end prolog mov rsi, arg(0) ;[src_ptr] mov rdi, arg(2) ;[ref_ptr] movsxd rax, DWORD PTR arg(1) ;[src_stride] movsxd rdx, DWORD PTR arg(3) ;[ref_stride] add rax, rax ; source stride in bytes add rdx, rdx ; recon stride in bytes ; Prefetch data prefetcht0 [rsi] prefetcht0 [rsi+16] prefetcht0 [rsi+rax] prefetcht0 [rsi+rax+16] lea rbx, [rsi+rax*2] prefetcht0 [rbx] prefetcht0 [rbx+16] prefetcht0 [rbx+rax] prefetcht0 [rbx+rax+16] prefetcht0 [rdi] prefetcht0 [rdi+16] prefetcht0 [rdi+rdx] prefetcht0 [rdi+rdx+16] lea rbx, [rdi+rdx*2] prefetcht0 [rbx] prefetcht0 [rbx+16] prefetcht0 [rbx+rdx] prefetcht0 [rbx+rdx+16] pxor xmm0, xmm0 ; clear xmm0 for unpack pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs pxor xmm6, xmm6 ; clear xmm6 for accumulating sse mov rcx, 16 .var16loop: movdqu xmm1, XMMWORD PTR [rsi] movdqu xmm2, XMMWORD PTR [rdi] lea rbx, [rsi+rax*2] prefetcht0 [rbx] prefetcht0 [rbx+16] prefetcht0 [rbx+rax] prefetcht0 [rbx+rax+16] lea rbx, [rdi+rdx*2] prefetcht0 [rbx] prefetcht0 [rbx+16] prefetcht0 [rbx+rdx] prefetcht0 [rbx+rdx+16] pxor xmm5, xmm5 psubw xmm1, xmm2 movdqu xmm3, XMMWORD PTR [rsi+16] paddw xmm5, xmm1 pmaddwd xmm1, xmm1 movdqu xmm2, XMMWORD PTR [rdi+16] paddd xmm6, xmm1 psubw xmm3, xmm2 movdqu xmm1, XMMWORD PTR [rsi+rax] paddw xmm5, xmm3 pmaddwd xmm3, xmm3 movdqu xmm2, XMMWORD PTR [rdi+rdx] paddd xmm6, xmm3 psubw xmm1, xmm2 movdqu xmm3, XMMWORD PTR [rsi+rax+16] paddw xmm5, xmm1 pmaddwd xmm1, xmm1 movdqu xmm2, XMMWORD PTR [rdi+rdx+16] paddd xmm6, xmm1 psubw xmm3, xmm2 paddw xmm5, xmm3 pmaddwd xmm3, xmm3 paddd xmm6, xmm3 movdqa xmm1, xmm5 movdqa xmm2, xmm5 pcmpgtw xmm1, xmm0 pcmpeqw xmm2, xmm0 por xmm1, xmm2 pcmpeqw xmm1, xmm0 movdqa xmm2, xmm5 punpcklwd xmm5, xmm1 punpckhwd xmm2, xmm1 paddd xmm7, xmm5 paddd xmm7, xmm2 lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] sub rcx, 2 jnz .var16loop movdqa xmm4, xmm6 punpckldq xmm6, xmm0 punpckhdq xmm4, xmm0 movdqa xmm5, xmm7 paddd xmm6, xmm4 punpckldq xmm7, xmm0 punpckhdq xmm5, xmm0 paddd xmm7, xmm5 movdqa xmm4, xmm6 movdqa xmm5, xmm7 psrldq xmm4, 8 psrldq xmm5, 8 paddd xmm6, xmm4 paddd xmm7, xmm5 mov rdi, arg(4) ; [SSE] mov rax, arg(5) ; [Sum] movd DWORD PTR [rdi], xmm6 movd DWORD PTR [rax], xmm7 ; begin epilog pop rdi pop rsi pop rbx RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;unsigned int vpx_highbd_calc8x8var_sse2 ;( ; unsigned char * src_ptr, ; int src_stride, ; unsigned char * ref_ptr, ; int ref_stride, ; unsigned int * SSE, ; int * Sum ;) global sym(vpx_highbd_calc8x8var_sse2) PRIVATE sym(vpx_highbd_calc8x8var_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rbx push rsi push rdi ; end prolog mov rsi, arg(0) ;[src_ptr] mov rdi, arg(2) ;[ref_ptr] movsxd rax, DWORD PTR arg(1) ;[src_stride] movsxd rdx, DWORD PTR arg(3) ;[ref_stride] add rax, rax ; source stride in bytes add rdx, rdx ; recon stride in bytes ; Prefetch data prefetcht0 [rsi] prefetcht0 [rsi+rax] lea rbx, [rsi+rax*2] prefetcht0 [rbx] prefetcht0 [rbx+rax] prefetcht0 [rdi] prefetcht0 [rdi+rdx] lea rbx, [rdi+rdx*2] prefetcht0 [rbx] prefetcht0 [rbx+rdx] pxor xmm0, xmm0 ; clear xmm0 for unpack pxor xmm7, xmm7 ; clear xmm7 for accumulating diffs pxor xmm6, xmm6 ; clear xmm6 for accumulating sse mov rcx, 8 .var8loop: movdqu xmm1, XMMWORD PTR [rsi] movdqu xmm2, XMMWORD PTR [rdi] lea rbx, [rsi+rax*4] prefetcht0 [rbx] prefetcht0 [rbx+rax] lea rbx, [rbx+rax*2] prefetcht0 [rbx] prefetcht0 [rbx+rax] lea rbx, [rdi+rdx*4] prefetcht0 [rbx] prefetcht0 [rbx+rdx] lea rbx, [rbx+rdx*2] prefetcht0 [rbx] prefetcht0 [rbx+rdx] pxor xmm5, xmm5 psubw xmm1, xmm2 movdqu xmm3, XMMWORD PTR [rsi+rax] paddw xmm5, xmm1 pmaddwd xmm1, xmm1 movdqu xmm2, XMMWORD PTR [rdi+rdx] paddd xmm6, xmm1 lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] psubw xmm3, xmm2 movdqu xmm1, XMMWORD PTR [rsi] paddw xmm5, xmm3 pmaddwd xmm3, xmm3 movdqu xmm2, XMMWORD PTR [rdi] paddd xmm6, xmm3 psubw xmm1, xmm2 movdqu xmm3, XMMWORD PTR [rsi+rax] paddw xmm5, xmm1 pmaddwd xmm1, xmm1 movdqu xmm2, XMMWORD PTR [rdi+rdx] paddd xmm6, xmm1 psubw xmm3, xmm2 paddw xmm5, xmm3 pmaddwd xmm3, xmm3 paddd xmm6, xmm3 movdqa xmm1, xmm5 movdqa xmm2, xmm5 pcmpgtw xmm1, xmm0 pcmpeqw xmm2, xmm0 por xmm1, xmm2 pcmpeqw xmm1, xmm0 movdqa xmm2, xmm5 punpcklwd xmm5, xmm1 punpckhwd xmm2, xmm1 paddd xmm7, xmm5 paddd xmm7, xmm2 lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] sub rcx, 4 jnz .var8loop movdqa xmm4, xmm6 punpckldq xmm6, xmm0 punpckhdq xmm4, xmm0 movdqa xmm5, xmm7 paddd xmm6, xmm4 punpckldq xmm7, xmm0 punpckhdq xmm5, xmm0 paddd xmm7, xmm5 movdqa xmm4, xmm6 movdqa xmm5, xmm7 psrldq xmm4, 8 psrldq xmm5, 8 paddd xmm6, xmm4 paddd xmm7, xmm5 mov rdi, arg(4) ; [SSE] mov rax, arg(5) ; [Sum] movd DWORD PTR [rdi], xmm6 movd DWORD PTR [rax], xmm7 ; begin epilog pop rdi pop rsi pop rbx RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/highbd_variance_sse2.c000066400000000000000000001016661357355204000213510ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" typedef uint32_t (*high_variance_fn_t)(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, uint32_t *sse, int *sum); uint32_t vpx_highbd_calc8x8var_sse2(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, uint32_t *sse, int *sum); uint32_t vpx_highbd_calc16x16var_sse2(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, uint32_t *sse, int *sum); static void highbd_8_variance_sse2(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, int w, int h, uint32_t *sse, int *sum, high_variance_fn_t var_fn, int block_size) { int i, j; *sse = 0; *sum = 0; for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { unsigned int sse0; int sum0; var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse0, &sum0); *sse += sse0; *sum += sum0; } } } static void highbd_10_variance_sse2(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, int w, int h, uint32_t *sse, int *sum, high_variance_fn_t var_fn, int block_size) { int i, j; uint64_t sse_long = 0; int32_t sum_long = 0; for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { unsigned int sse0; int sum0; var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse0, &sum0); sse_long += sse0; sum_long += sum0; } } *sum = ROUND_POWER_OF_TWO(sum_long, 2); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 4); } static void highbd_12_variance_sse2(const uint16_t *src, int src_stride, const uint16_t *ref, int ref_stride, int w, int h, uint32_t *sse, int *sum, high_variance_fn_t var_fn, int block_size) { int i, j; uint64_t sse_long = 0; int32_t sum_long = 0; for (i = 0; i < h; i += block_size) { for (j = 0; j < w; j += block_size) { unsigned int sse0; int sum0; var_fn(src + src_stride * i + j, src_stride, ref + ref_stride * i + j, ref_stride, &sse0, &sum0); sse_long += sse0; sum_long += sum0; } } *sum = ROUND_POWER_OF_TWO(sum_long, 4); *sse = (uint32_t)ROUND_POWER_OF_TWO(sse_long, 8); } #define HIGH_GET_VAR(S) \ void vpx_highbd_8_get##S##x##S##var_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse, int *sum) { \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \ sum); \ } \ \ void vpx_highbd_10_get##S##x##S##var_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse, int *sum) { \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \ sum); \ *sum = ROUND_POWER_OF_TWO(*sum, 2); \ *sse = ROUND_POWER_OF_TWO(*sse, 4); \ } \ \ void vpx_highbd_12_get##S##x##S##var_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse, int *sum) { \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ vpx_highbd_calc##S##x##S##var_sse2(src, src_stride, ref, ref_stride, sse, \ sum); \ *sum = ROUND_POWER_OF_TWO(*sum, 4); \ *sse = ROUND_POWER_OF_TWO(*sse, 8); \ } HIGH_GET_VAR(16); HIGH_GET_VAR(8); #undef HIGH_GET_VAR #define VAR_FN(w, h, block_size, shift) \ uint32_t vpx_highbd_8_variance##w##x##h##_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse) { \ int sum; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ highbd_8_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ return *sse - (uint32_t)(((int64_t)sum * sum) >> (shift)); \ } \ \ uint32_t vpx_highbd_10_variance##w##x##h##_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse) { \ int sum; \ int64_t var; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ highbd_10_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ var = (int64_t)(*sse) - (((int64_t)sum * sum) >> (shift)); \ return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_variance##w##x##h##_sse2( \ const uint8_t *src8, int src_stride, const uint8_t *ref8, \ int ref_stride, uint32_t *sse) { \ int sum; \ int64_t var; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ highbd_12_variance_sse2( \ src, src_stride, ref, ref_stride, w, h, sse, &sum, \ vpx_highbd_calc##block_size##x##block_size##var_sse2, block_size); \ var = (int64_t)(*sse) - (((int64_t)sum * sum) >> (shift)); \ return (var >= 0) ? (uint32_t)var : 0; \ } VAR_FN(64, 64, 16, 12); VAR_FN(64, 32, 16, 11); VAR_FN(32, 64, 16, 11); VAR_FN(32, 32, 16, 10); VAR_FN(32, 16, 16, 9); VAR_FN(16, 32, 16, 9); VAR_FN(16, 16, 16, 8); VAR_FN(16, 8, 8, 7); VAR_FN(8, 16, 8, 7); VAR_FN(8, 8, 8, 6); #undef VAR_FN unsigned int vpx_highbd_8_mse16x16_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, vpx_highbd_calc16x16var_sse2, 16); return *sse; } unsigned int vpx_highbd_10_mse16x16_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, vpx_highbd_calc16x16var_sse2, 16); return *sse; } unsigned int vpx_highbd_12_mse16x16_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 16, 16, sse, &sum, vpx_highbd_calc16x16var_sse2, 16); return *sse; } unsigned int vpx_highbd_8_mse8x8_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_8_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum, vpx_highbd_calc8x8var_sse2, 8); return *sse; } unsigned int vpx_highbd_10_mse8x8_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_10_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum, vpx_highbd_calc8x8var_sse2, 8); return *sse; } unsigned int vpx_highbd_12_mse8x8_sse2(const uint8_t *src8, int src_stride, const uint8_t *ref8, int ref_stride, unsigned int *sse) { int sum; uint16_t *src = CONVERT_TO_SHORTPTR(src8); uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); highbd_12_variance_sse2(src, src_stride, ref, ref_stride, 8, 8, sse, &sum, vpx_highbd_calc8x8var_sse2, 8); return *sse; } // The 2 unused parameters are place holders for PIC enabled build. // These definitions are for functions defined in // highbd_subpel_variance_impl_sse2.asm #define DECL(w, opt) \ int vpx_highbd_sub_pixel_variance##w##xh_##opt( \ const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ const uint16_t *ref, ptrdiff_t ref_stride, int height, \ unsigned int *sse, void *unused0, void *unused); #define DECLS(opt) \ DECL(8, opt); \ DECL(16, opt) DECLS(sse2); #undef DECLS #undef DECL #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ uint32_t vpx_highbd_8_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \ NULL); \ if (w > wf) { \ unsigned int sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ } \ *sse_ptr = sse; \ return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ int64_t var; \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ int se = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src, src_stride, x_offset, y_offset, ref, ref_stride, h, &sse, NULL, \ NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, h, \ &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ } \ se = ROUND_POWER_OF_TWO(se, 2); \ sse = ROUND_POWER_OF_TWO(sse, 4); \ *sse_ptr = sse; \ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \ return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr) { \ int start_row; \ uint32_t sse; \ int se = 0; \ int64_t var; \ uint64_t long_sse = 0; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ for (start_row = 0; start_row < h; start_row += 16) { \ uint32_t sse2; \ int height = h - start_row < 16 ? h - start_row : 16; \ int se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, x_offset, y_offset, \ ref + (start_row * ref_stride), ref_stride, height, &sse2, NULL, \ NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 16 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 16 + (start_row * ref_stride), ref_stride, height, \ &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 32 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \ height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \ height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ } \ } \ } \ se = ROUND_POWER_OF_TWO(se, 4); \ sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \ *sse_ptr = sse; \ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \ return (var >= 0) ? (uint32_t)var : 0; \ } #define FNS(opt) \ FN(64, 64, 16, 6, 6, opt, (int64_t)); \ FN(64, 32, 16, 6, 5, opt, (int64_t)); \ FN(32, 64, 16, 5, 6, opt, (int64_t)); \ FN(32, 32, 16, 5, 5, opt, (int64_t)); \ FN(32, 16, 16, 5, 4, opt, (int64_t)); \ FN(16, 32, 16, 4, 5, opt, (int64_t)); \ FN(16, 16, 16, 4, 4, opt, (int64_t)); \ FN(16, 8, 16, 4, 3, opt, (int64_t)); \ FN(8, 16, 8, 3, 4, opt, (int64_t)); \ FN(8, 8, 8, 3, 3, opt, (int64_t)); \ FN(8, 4, 8, 3, 2, opt, (int64_t)); FNS(sse2); #undef FNS #undef FN // The 2 unused parameters are place holders for PIC enabled build. #define DECL(w, opt) \ int vpx_highbd_sub_pixel_avg_variance##w##xh_##opt( \ const uint16_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \ const uint16_t *ref, ptrdiff_t ref_stride, const uint16_t *second, \ ptrdiff_t second_stride, int height, unsigned int *sse, void *unused0, \ void *unused); #define DECLS(opt1) \ DECL(16, opt1) \ DECL(8, opt1) DECLS(sse2); #undef DECL #undef DECLS #define FN(w, h, wf, wlog2, hlog2, opt, cast) \ uint32_t vpx_highbd_8_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \ NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \ sec + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \ sec + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \ sec + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ } \ *sse_ptr = sse; \ return sse - (uint32_t)((cast se * se) >> (wlog2 + hlog2)); \ } \ \ uint32_t vpx_highbd_10_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ int64_t var; \ uint32_t sse; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ int se = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src, src_stride, x_offset, y_offset, ref, ref_stride, sec, w, h, &sse, \ NULL, NULL); \ if (w > wf) { \ uint32_t sse2; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16, src_stride, x_offset, y_offset, ref + 16, ref_stride, \ sec + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 32, src_stride, x_offset, y_offset, ref + 32, ref_stride, \ sec + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48, src_stride, x_offset, y_offset, ref + 48, ref_stride, \ sec + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse += sse2; \ } \ } \ se = ROUND_POWER_OF_TWO(se, 2); \ sse = ROUND_POWER_OF_TWO(sse, 4); \ *sse_ptr = sse; \ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \ return (var >= 0) ? (uint32_t)var : 0; \ } \ \ uint32_t vpx_highbd_12_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src8, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref8, int ref_stride, uint32_t *sse_ptr, \ const uint8_t *sec8) { \ int start_row; \ int64_t var; \ uint32_t sse; \ int se = 0; \ uint64_t long_sse = 0; \ uint16_t *src = CONVERT_TO_SHORTPTR(src8); \ uint16_t *ref = CONVERT_TO_SHORTPTR(ref8); \ uint16_t *sec = CONVERT_TO_SHORTPTR(sec8); \ for (start_row = 0; start_row < h; start_row += 16) { \ uint32_t sse2; \ int height = h - start_row < 16 ? h - start_row : 16; \ int se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + (start_row * src_stride), src_stride, x_offset, y_offset, \ ref + (start_row * ref_stride), ref_stride, sec + (start_row * w), \ w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 16 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 16 + (start_row * ref_stride), ref_stride, \ sec + 16 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ if (w > wf * 2) { \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 32 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 32 + (start_row * ref_stride), ref_stride, \ sec + 32 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ se2 = vpx_highbd_sub_pixel_avg_variance##wf##xh_##opt( \ src + 48 + (start_row * src_stride), src_stride, x_offset, \ y_offset, ref + 48 + (start_row * ref_stride), ref_stride, \ sec + 48 + (start_row * w), w, height, &sse2, NULL, NULL); \ se += se2; \ long_sse += sse2; \ } \ } \ } \ se = ROUND_POWER_OF_TWO(se, 4); \ sse = (uint32_t)ROUND_POWER_OF_TWO(long_sse, 8); \ *sse_ptr = sse; \ var = (int64_t)(sse) - ((cast se * se) >> (wlog2 + hlog2)); \ return (var >= 0) ? (uint32_t)var : 0; \ } #define FNS(opt1) \ FN(64, 64, 16, 6, 6, opt1, (int64_t)); \ FN(64, 32, 16, 6, 5, opt1, (int64_t)); \ FN(32, 64, 16, 5, 6, opt1, (int64_t)); \ FN(32, 32, 16, 5, 5, opt1, (int64_t)); \ FN(32, 16, 16, 5, 4, opt1, (int64_t)); \ FN(16, 32, 16, 4, 5, opt1, (int64_t)); \ FN(16, 16, 16, 4, 4, opt1, (int64_t)); \ FN(16, 8, 16, 4, 3, opt1, (int64_t)); \ FN(8, 16, 8, 4, 3, opt1, (int64_t)); \ FN(8, 8, 8, 3, 3, opt1, (int64_t)); \ FN(8, 4, 8, 3, 2, opt1, (int64_t)); FNS(sse2); #undef FNS #undef FN libvpx-1.8.2/vpx_dsp/x86/intrapred_sse2.asm000066400000000000000000000633551357355204000206040ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pb_1: times 16 db 1 pw_4: times 8 dw 4 pw_8: times 8 dw 8 pw_16: times 8 dw 16 pw_32: times 8 dw 32 dc_128: times 16 db 128 pw2_4: times 8 dw 2 pw2_8: times 8 dw 4 pw2_16: times 8 dw 8 pw2_32: times 8 dw 16 SECTION .text ; ------------------------------------------ ; input: x, y, z, result ; ; trick from pascal ; (x+2y+z+2)>>2 can be calculated as: ; result = avg(x,z) ; result -= xor(x,z) & 1 ; result = avg(result,y) ; ------------------------------------------ %macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 pavgb %4, %1, %3 pxor %3, %1 pand %3, [GLOBAL(pb_1)] psubb %4, %3 pavgb %4, %2 %endmacro INIT_XMM sse2 cglobal d45_predictor_4x4, 3, 4, 4, dst, stride, above, goffset GET_GOT goffsetq movq m0, [aboveq] DEFINE_ARGS dst, stride, temp psrldq m1, m0, 1 psrldq m2, m0, 2 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; store 4 lines movd [dstq ], m3 psrlq m3, 8 movd [dstq+strideq ], m3 lea dstq, [dstq+strideq*2] psrlq m3, 8 movd [dstq ], m3 psrlq m3, 8 movd [dstq+strideq ], m3 psrlq m0, 56 movd tempd, m0 mov [dstq+strideq+3], tempb RESTORE_GOT RET INIT_XMM sse2 cglobal d45_predictor_8x8, 3, 4, 4, dst, stride, above, goffset GET_GOT goffsetq movu m1, [aboveq] pslldq m0, m1, 1 psrldq m2, m1, 1 DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 punpckhbw m0, m0 ; 7 7 punpcklwd m0, m0 ; 7 7 7 7 punpckldq m0, m0 ; 7 7 7 7 7 7 7 7 punpcklqdq m3, m0 ; -1 0 1 2 3 4 5 6 7 7 7 7 7 7 7 7 ; store 4 lines psrldq m3, 1 movq [dstq ], m3 psrldq m3, 1 movq [dstq+strideq ], m3 psrldq m3, 1 movq [dstq+strideq*2], m3 psrldq m3, 1 movq [dstq+stride3q ], m3 lea dstq, [dstq+strideq*4] ; store next 4 lines psrldq m3, 1 movq [dstq ], m3 psrldq m3, 1 movq [dstq+strideq ], m3 psrldq m3, 1 movq [dstq+strideq*2], m3 psrldq m3, 1 movq [dstq+stride3q ], m3 RESTORE_GOT RET INIT_XMM sse2 cglobal d207_predictor_4x4, 4, 4, 5, dst, stride, unused, left, goffset GET_GOT goffsetq movd m0, [leftq] ; abcd [byte] punpcklbw m4, m0, m0 ; aabb ccdd punpcklwd m4, m4 ; aaaa bbbb cccc dddd psrldq m4, 12 ; dddd punpckldq m0, m4 ; abcd dddd psrldq m1, m0, 1 ; bcdd psrldq m2, m0, 2 ; cddd X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; a2bc b2cd c3d d pavgb m1, m0 ; ab, bc, cd, d [byte] punpcklbw m1, m3 ; ab, a2bc, bc, b2cd, cd, c3d, d, d movd [dstq ], m1 psrlq m1, 16 ; bc, b2cd, cd, c3d, d, d movd [dstq+strideq], m1 lea dstq, [dstq+strideq*2] psrlq m1, 16 ; cd, c3d, d, d movd [dstq ], m1 movd [dstq+strideq], m4 ; d, d, d, d RESTORE_GOT RET INIT_XMM sse2 cglobal dc_predictor_4x4, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq movd m2, [leftq] movd m0, [aboveq] pxor m1, m1 punpckldq m0, m2 psadbw m0, m1 paddw m0, [GLOBAL(pw_4)] psraw m0, 3 pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 lea dstq, [dstq+strideq*2] movd [dstq ], m0 movd [dstq+strideq], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_left_predictor_4x4, 2, 5, 2, dst, stride, above, left, goffset movifnidn leftq, leftmp GET_GOT goffsetq pxor m1, m1 movd m0, [leftq] psadbw m0, m1 paddw m0, [GLOBAL(pw2_4)] psraw m0, 2 pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 lea dstq, [dstq+strideq*2] movd [dstq ], m0 movd [dstq+strideq], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_top_predictor_4x4, 3, 5, 2, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 movd m0, [aboveq] psadbw m0, m1 paddw m0, [GLOBAL(pw2_4)] psraw m0, 2 pshuflw m0, m0, 0x0 packuswb m0, m0 movd [dstq ], m0 movd [dstq+strideq], m0 lea dstq, [dstq+strideq*2] movd [dstq ], m0 movd [dstq+strideq], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_predictor_8x8, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 movq m0, [aboveq] movq m2, [leftq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] psadbw m0, m1 psadbw m2, m1 paddw m0, m2 paddw m0, [GLOBAL(pw_8)] psraw m0, 4 punpcklbw m0, m0 pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_top_predictor_8x8, 3, 5, 2, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 movq m0, [aboveq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] psadbw m0, m1 paddw m0, [GLOBAL(pw2_8)] psraw m0, 3 punpcklbw m0, m0 pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_left_predictor_8x8, 2, 5, 2, dst, stride, above, left, goffset movifnidn leftq, leftmp GET_GOT goffsetq pxor m1, m1 movq m0, [leftq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] psadbw m0, m1 paddw m0, [GLOBAL(pw2_8)] psraw m0, 3 punpcklbw m0, m0 pshuflw m0, m0, 0x0 movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_128_predictor_4x4, 2, 5, 1, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] movd m0, [GLOBAL(dc_128)] movd [dstq ], m0 movd [dstq+strideq ], m0 movd [dstq+strideq*2], m0 movd [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_128_predictor_8x8, 2, 5, 1, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] movq m0, [GLOBAL(dc_128)] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM sse2 cglobal dc_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] mova m2, [leftq] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 psadbw m0, m1 psadbw m2, m1 paddw m0, m2 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw_16)] psraw m0, 5 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq+strideq ], m0 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_top_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 psadbw m0, m1 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_16)] psraw m0, 4 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq+strideq ], m0 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_left_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [leftq] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 psadbw m0, m1 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_16)] psraw m0, 4 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq+strideq ], m0 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_128_predictor_16x16, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 4 mova m0, [GLOBAL(dc_128)] .loop: mova [dstq ], m0 mova [dstq+strideq ], m0 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT RET INIT_XMM sse2 cglobal dc_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] mova m2, [aboveq+16] mova m3, [leftq] mova m4, [leftq+16] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 psadbw m0, m1 psadbw m2, m1 psadbw m3, m1 psadbw m4, m1 paddw m0, m2 paddw m0, m3 paddw m0, m4 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw_32)] psraw m0, 6 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq +16], m0 mova [dstq+strideq ], m0 mova [dstq+strideq +16], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m0 mova [dstq+stride3q ], m0 mova [dstq+stride3q +16], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_top_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [aboveq] mova m2, [aboveq+16] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 psadbw m0, m1 psadbw m2, m1 paddw m0, m2 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_32)] psraw m0, 5 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq +16], m0 mova [dstq+strideq ], m0 mova [dstq+strideq +16], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m0 mova [dstq+stride3q ], m0 mova [dstq+stride3q +16], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_left_predictor_32x32, 4, 5, 5, dst, stride, above, left, goffset GET_GOT goffsetq pxor m1, m1 mova m0, [leftq] mova m2, [leftq+16] DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 psadbw m0, m1 psadbw m2, m1 paddw m0, m2 movhlps m2, m0 paddw m0, m2 paddw m0, [GLOBAL(pw2_32)] psraw m0, 5 pshuflw m0, m0, 0x0 punpcklqdq m0, m0 packuswb m0, m0 .loop: mova [dstq ], m0 mova [dstq +16], m0 mova [dstq+strideq ], m0 mova [dstq+strideq +16], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m0 mova [dstq+stride3q ], m0 mova [dstq+stride3q +16], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT REP_RET INIT_XMM sse2 cglobal dc_128_predictor_32x32, 4, 5, 3, dst, stride, above, left, goffset GET_GOT goffsetq DEFINE_ARGS dst, stride, stride3, lines4 lea stride3q, [strideq*3] mov lines4d, 8 mova m0, [GLOBAL(dc_128)] .loop: mova [dstq ], m0 mova [dstq +16], m0 mova [dstq+strideq ], m0 mova [dstq+strideq +16], m0 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m0 mova [dstq+stride3q ], m0 mova [dstq+stride3q +16], m0 lea dstq, [dstq+strideq*4] dec lines4d jnz .loop RESTORE_GOT RET INIT_XMM sse2 cglobal v_predictor_4x4, 3, 3, 1, dst, stride, above movd m0, [aboveq] movd [dstq ], m0 movd [dstq+strideq], m0 lea dstq, [dstq+strideq*2] movd [dstq ], m0 movd [dstq+strideq], m0 RET INIT_XMM sse2 cglobal v_predictor_8x8, 3, 3, 1, dst, stride, above movq m0, [aboveq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] movq [dstq ], m0 movq [dstq+strideq ], m0 movq [dstq+strideq*2], m0 movq [dstq+stride3q ], m0 RET INIT_XMM sse2 cglobal v_predictor_16x16, 3, 4, 1, dst, stride, above mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, nlines4 lea stride3q, [strideq*3] mov nlines4d, 4 .loop: mova [dstq ], m0 mova [dstq+strideq ], m0 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] dec nlines4d jnz .loop REP_RET INIT_XMM sse2 cglobal v_predictor_32x32, 3, 4, 2, dst, stride, above mova m0, [aboveq] mova m1, [aboveq+16] DEFINE_ARGS dst, stride, stride3, nlines4 lea stride3q, [strideq*3] mov nlines4d, 8 .loop: mova [dstq ], m0 mova [dstq +16], m1 mova [dstq+strideq ], m0 mova [dstq+strideq +16], m1 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m1 mova [dstq+stride3q ], m0 mova [dstq+stride3q +16], m1 lea dstq, [dstq+strideq*4] dec nlines4d jnz .loop REP_RET INIT_XMM sse2 cglobal h_predictor_4x4, 2, 4, 4, dst, stride, line, left movifnidn leftq, leftmp movd m0, [leftq] punpcklbw m0, m0 punpcklbw m0, m0 pshufd m1, m0, 0x1 movd [dstq ], m0 movd [dstq+strideq], m1 pshufd m2, m0, 0x2 lea dstq, [dstq+strideq*2] pshufd m3, m0, 0x3 movd [dstq ], m2 movd [dstq+strideq], m3 RET INIT_XMM sse2 cglobal h_predictor_8x8, 2, 5, 3, dst, stride, line, left movifnidn leftq, leftmp mov lineq, -2 DEFINE_ARGS dst, stride, line, left, stride3 lea stride3q, [strideq*3] movq m0, [leftq ] punpcklbw m0, m0 ; l1 l1 l2 l2 ... l8 l8 .loop: pshuflw m1, m0, 0x0 ; l1 l1 l1 l1 l1 l1 l1 l1 pshuflw m2, m0, 0x55 ; l2 l2 l2 l2 l2 l2 l2 l2 movq [dstq ], m1 movq [dstq+strideq], m2 pshuflw m1, m0, 0xaa pshuflw m2, m0, 0xff movq [dstq+strideq*2], m1 movq [dstq+stride3q ], m2 pshufd m0, m0, 0xe ; [63:0] l5 l5 l6 l6 l7 l7 l8 l8 inc lineq lea dstq, [dstq+strideq*4] jnz .loop REP_RET INIT_XMM sse2 cglobal h_predictor_16x16, 2, 5, 3, dst, stride, line, left movifnidn leftq, leftmp mov lineq, -4 DEFINE_ARGS dst, stride, line, left, stride3 lea stride3q, [strideq*3] .loop: movd m0, [leftq] punpcklbw m0, m0 punpcklbw m0, m0 ; l1 to l4 each repeated 4 times pshufd m1, m0, 0x0 ; l1 repeated 16 times pshufd m2, m0, 0x55 ; l2 repeated 16 times mova [dstq ], m1 mova [dstq+strideq ], m2 pshufd m1, m0, 0xaa pshufd m2, m0, 0xff mova [dstq+strideq*2], m1 mova [dstq+stride3q ], m2 inc lineq lea leftq, [leftq+4 ] lea dstq, [dstq+strideq*4] jnz .loop REP_RET INIT_XMM sse2 cglobal h_predictor_32x32, 2, 5, 3, dst, stride, line, left movifnidn leftq, leftmp mov lineq, -8 DEFINE_ARGS dst, stride, line, left, stride3 lea stride3q, [strideq*3] .loop: movd m0, [leftq] punpcklbw m0, m0 punpcklbw m0, m0 ; l1 to l4 each repeated 4 times pshufd m1, m0, 0x0 ; l1 repeated 16 times pshufd m2, m0, 0x55 ; l2 repeated 16 times mova [dstq ], m1 mova [dstq+16 ], m1 mova [dstq+strideq ], m2 mova [dstq+strideq+16 ], m2 pshufd m1, m0, 0xaa pshufd m2, m0, 0xff mova [dstq+strideq*2 ], m1 mova [dstq+strideq*2+16], m1 mova [dstq+stride3q ], m2 mova [dstq+stride3q+16 ], m2 inc lineq lea leftq, [leftq+4 ] lea dstq, [dstq+strideq*4] jnz .loop REP_RET INIT_XMM sse2 cglobal tm_predictor_4x4, 4, 4, 5, dst, stride, above, left pxor m1, m1 movq m0, [aboveq-1]; [63:0] tl t1 t2 t3 t4 x x x punpcklbw m0, m1 pshuflw m2, m0, 0x0 ; [63:0] tl tl tl tl [word] psrldq m0, 2 psubw m0, m2 ; [63:0] t1-tl t2-tl t3-tl t4-tl [word] movd m2, [leftq] punpcklbw m2, m1 pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] paddw m4, m0 paddw m3, m0 packuswb m4, m4 packuswb m3, m3 movd [dstq ], m4 movd [dstq+strideq], m3 lea dstq, [dstq+strideq*2] pshuflw m4, m2, 0xaa pshuflw m3, m2, 0xff paddw m4, m0 paddw m3, m0 packuswb m4, m4 packuswb m3, m3 movd [dstq ], m4 movd [dstq+strideq], m3 RET INIT_XMM sse2 cglobal tm_predictor_8x8, 4, 4, 5, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] movq m0, [aboveq] punpcklbw m2, m1 punpcklbw m0, m1 ; t1 t2 t3 t4 t5 t6 t7 t8 [word] pshuflw m2, m2, 0x0 ; [63:0] tl tl tl tl [word] DEFINE_ARGS dst, stride, line, left mov lineq, -4 punpcklqdq m2, m2 ; tl tl tl tl tl tl tl tl [word] psubw m0, m2 ; t1-tl t2-tl ... t8-tl [word] movq m2, [leftq] punpcklbw m2, m1 ; l1 l2 l3 l4 l5 l6 l7 l8 [word] .loop: pshuflw m4, m2, 0x0 ; [63:0] l1 l1 l1 l1 [word] pshuflw m3, m2, 0x55 ; [63:0] l2 l2 l2 l2 [word] punpcklqdq m4, m4 ; l1 l1 l1 l1 l1 l1 l1 l1 [word] punpcklqdq m3, m3 ; l2 l2 l2 l2 l2 l2 l2 l2 [word] paddw m4, m0 paddw m3, m0 packuswb m4, m3 movq [dstq ], m4 movhps [dstq+strideq], m4 lea dstq, [dstq+strideq*2] psrldq m2, 4 inc lineq jnz .loop REP_RET INIT_XMM sse2 cglobal tm_predictor_16x16, 4, 5, 8, dst, stride, above, left pxor m1, m1 mova m2, [aboveq-16]; mova m0, [aboveq] ; t1 t2 ... t16 [byte] punpckhbw m2, m1 ; [127:112] tl [word] punpckhbw m4, m0, m1 punpcklbw m0, m1 ; m0:m4 t1 t2 ... t16 [word] DEFINE_ARGS dst, stride, line, left, stride8 mov lineq, -8 pshufhw m2, m2, 0xff mova m3, [leftq] ; l1 l2 ... l16 [byte] punpckhqdq m2, m2 ; tl repeated 8 times [word] psubw m0, m2 psubw m4, m2 ; m0:m4 t1-tl t2-tl ... t16-tl [word] punpckhbw m5, m3, m1 punpcklbw m3, m1 ; m3:m5 l1 l2 ... l16 [word] lea stride8q, [strideq*8] .loop: pshuflw m6, m3, 0x0 pshuflw m7, m5, 0x0 punpcklqdq m6, m6 ; l1 repeated 8 times [word] punpcklqdq m7, m7 ; l8 repeated 8 times [word] paddw m1, m6, m0 paddw m6, m4 ; m1:m6 ti-tl+l1 [i=1,15] [word] psrldq m5, 2 packuswb m1, m6 mova [dstq ], m1 paddw m1, m7, m0 paddw m7, m4 ; m1:m7 ti-tl+l8 [i=1,15] [word] psrldq m3, 2 packuswb m1, m7 mova [dstq+stride8q], m1 inc lineq lea dstq, [dstq+strideq] jnz .loop REP_RET INIT_XMM sse2 cglobal tm_predictor_32x32, 4, 4, 8, dst, stride, above, left pxor m1, m1 movd m2, [aboveq-1] mova m0, [aboveq] mova m4, [aboveq+16] punpcklbw m2, m1 punpckhbw m3, m0, m1 punpckhbw m5, m4, m1 punpcklbw m0, m1 punpcklbw m4, m1 pshuflw m2, m2, 0x0 DEFINE_ARGS dst, stride, line, left mov lineq, -16 punpcklqdq m2, m2 add leftq, 32 psubw m0, m2 psubw m3, m2 psubw m4, m2 psubw m5, m2 .loop: movd m2, [leftq+lineq*2] pxor m1, m1 punpcklbw m2, m1 pshuflw m7, m2, 0x55 pshuflw m2, m2, 0x0 punpcklqdq m2, m2 punpcklqdq m7, m7 paddw m6, m2, m3 paddw m1, m2, m0 packuswb m1, m6 mova [dstq ], m1 paddw m6, m2, m5 paddw m1, m2, m4 packuswb m1, m6 mova [dstq+16 ], m1 paddw m6, m7, m3 paddw m1, m7, m0 packuswb m1, m6 mova [dstq+strideq ], m1 paddw m6, m7, m5 paddw m1, m7, m4 packuswb m1, m6 mova [dstq+strideq+16], m1 lea dstq, [dstq+strideq*2] inc lineq jnz .loop REP_RET libvpx-1.8.2/vpx_dsp/x86/intrapred_ssse3.asm000066400000000000000000000736151357355204000207700ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pb_1: times 16 db 1 sh_b12345677: db 1, 2, 3, 4, 5, 6, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b23456777: db 2, 3, 4, 5, 6, 7, 7, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b0123456777777777: db 0, 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7 sh_b1234567777777777: db 1, 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 sh_b2345677777777777: db 2, 3, 4, 5, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7 sh_b123456789abcdeff: db 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15 sh_b23456789abcdefff: db 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 15, 15 sh_b32104567: db 3, 2, 1, 0, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0 sh_b8091a2b345: db 8, 0, 9, 1, 10, 2, 11, 3, 4, 5, 0, 0, 0, 0, 0, 0 sh_b76543210: db 7, 6, 5, 4, 3, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0 sh_b65432108: db 6, 5, 4, 3, 2, 1, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0 sh_b54321089: db 5, 4, 3, 2, 1, 0, 8, 9, 0, 0, 0, 0, 0, 0, 0, 0 sh_b89abcdef: db 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 0 sh_bfedcba9876543210: db 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 SECTION .text INIT_XMM ssse3 cglobal d45_predictor_16x16, 3, 6, 4, dst, stride, above, dst8, line, goffset GET_GOT goffsetq mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, dst8, line lea stride3q, [strideq*3] lea dst8q, [dstq+strideq*8] mova m1, [GLOBAL(sh_b123456789abcdeff)] pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] pavgb m3, m2, m0 pxor m2, m0 pshufb m0, m1 pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m0, m3 ; first 4 lines and first half of 3rd 4 lines mov lined, 2 .loop: mova [dstq ], m0 movhps [dst8q ], m0 pshufb m0, m1 mova [dstq +strideq ], m0 movhps [dst8q+strideq ], m0 pshufb m0, m1 mova [dstq +strideq*2 ], m0 movhps [dst8q+strideq*2 ], m0 pshufb m0, m1 mova [dstq +stride3q ], m0 movhps [dst8q+stride3q ], m0 pshufb m0, m1 lea dstq, [dstq +strideq*4] lea dst8q, [dst8q+strideq*4] dec lined jnz .loop ; bottom-right 8x8 block movhps [dstq +8], m0 movhps [dstq+strideq +8], m0 movhps [dstq+strideq*2+8], m0 movhps [dstq+stride3q +8], m0 lea dstq, [dstq+strideq*4] movhps [dstq +8], m0 movhps [dstq+strideq +8], m0 movhps [dstq+strideq*2+8], m0 movhps [dstq+stride3q +8], m0 RESTORE_GOT RET INIT_XMM ssse3 cglobal d45_predictor_32x32, 3, 6, 7, dst, stride, above, dst16, line, goffset GET_GOT goffsetq mova m0, [aboveq] mova m4, [aboveq+16] DEFINE_ARGS dst, stride, stride3, dst16, line lea stride3q, [strideq*3] lea dst16q, [dstq +strideq*8] lea dst16q, [dst16q+strideq*8] mova m1, [GLOBAL(sh_b123456789abcdeff)] pshufb m2, m4, [GLOBAL(sh_b23456789abcdefff)] pavgb m3, m2, m4 pxor m2, m4 palignr m5, m4, m0, 1 palignr m6, m4, m0, 2 pshufb m4, m1 pand m2, [GLOBAL(pb_1)] psubb m3, m2 pavgb m4, m3 pavgb m3, m0, m6 pxor m0, m6 pand m0, [GLOBAL(pb_1)] psubb m3, m0 pavgb m5, m3 ; write 4x4 lines (and the first half of the second 4x4 lines) mov lined, 4 .loop: mova [dstq ], m5 mova [dstq +16], m4 mova [dst16q ], m4 palignr m3, m4, m5, 1 pshufb m4, m1 mova [dstq +strideq ], m3 mova [dstq +strideq +16], m4 mova [dst16q+strideq ], m4 palignr m5, m4, m3, 1 pshufb m4, m1 mova [dstq +strideq*2 ], m5 mova [dstq +strideq*2+16], m4 mova [dst16q+strideq*2 ], m4 palignr m3, m4, m5, 1 pshufb m4, m1 mova [dstq +stride3q ], m3 mova [dstq +stride3q +16], m4 mova [dst16q+stride3q ], m4 palignr m5, m4, m3, 1 pshufb m4, m1 lea dstq, [dstq +strideq*4] lea dst16q, [dst16q+strideq*4] dec lined jnz .loop ; write second half of second 4x4 lines mova [dstq +16], m4 mova [dstq +strideq +16], m4 mova [dstq +strideq*2+16], m4 mova [dstq +stride3q +16], m4 lea dstq, [dstq +strideq*4] mova [dstq +16], m4 mova [dstq +strideq +16], m4 mova [dstq +strideq*2+16], m4 mova [dstq +stride3q +16], m4 lea dstq, [dstq +strideq*4] mova [dstq +16], m4 mova [dstq +strideq +16], m4 mova [dstq +strideq*2+16], m4 mova [dstq +stride3q +16], m4 lea dstq, [dstq +strideq*4] mova [dstq +16], m4 mova [dstq +strideq +16], m4 mova [dstq +strideq*2+16], m4 mova [dstq +stride3q +16], m4 RESTORE_GOT RET ; ------------------------------------------ ; input: x, y, z, result ; ; trick from pascal ; (x+2y+z+2)>>2 can be calculated as: ; result = avg(x,z) ; result -= xor(x,z) & 1 ; result = avg(result,y) ; ------------------------------------------ %macro X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 4 pavgb %4, %1, %3 pxor %3, %1 pand %3, [GLOBAL(pb_1)] psubb %4, %3 pavgb %4, %2 %endmacro INIT_XMM ssse3 cglobal d63_predictor_4x4, 3, 4, 5, dst, stride, above, goffset GET_GOT goffsetq movq m3, [aboveq] pshufb m1, m3, [GLOBAL(sh_b23456777)] pshufb m2, m3, [GLOBAL(sh_b12345677)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m2, m1, m4 pavgb m3, m2 ; store 4 lines movd [dstq ], m3 movd [dstq+strideq], m4 lea dstq, [dstq+strideq*2] psrldq m3, 1 psrldq m4, 1 movd [dstq ], m3 movd [dstq+strideq], m4 RESTORE_GOT RET INIT_XMM ssse3 cglobal d63_predictor_8x8, 3, 4, 5, dst, stride, above, goffset GET_GOT goffsetq movq m3, [aboveq] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] pshufb m3, [GLOBAL(sh_b0123456777777777)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m4 pavgb m3, m2 ; store 4 lines movq [dstq ], m3 movq [dstq+strideq], m4 psrldq m3, 1 psrldq m4, 1 movq [dstq+strideq*2], m3 movq [dstq+stride3q ], m4 lea dstq, [dstq+strideq*4] psrldq m3, 1 psrldq m4, 1 ; store 4 lines movq [dstq ], m3 movq [dstq+strideq], m4 psrldq m3, 1 psrldq m4, 1 movq [dstq+strideq*2], m3 movq [dstq+stride3q ], m4 RESTORE_GOT RET INIT_XMM ssse3 cglobal d63_predictor_16x16, 3, 5, 5, dst, stride, above, line, goffset GET_GOT goffsetq mova m0, [aboveq] DEFINE_ARGS dst, stride, stride3, line lea stride3q, [strideq*3] mova m1, [GLOBAL(sh_b123456789abcdeff)] pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] pshufb m3, m0, m1 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m3, m2, m4 pavgb m0, m3 mov lined, 4 .loop: mova [dstq ], m0 mova [dstq+strideq ], m4 pshufb m0, m1 pshufb m4, m1 mova [dstq+strideq*2], m0 mova [dstq+stride3q ], m4 pshufb m0, m1 pshufb m4, m1 lea dstq, [dstq+strideq*4] dec lined jnz .loop RESTORE_GOT REP_RET INIT_XMM ssse3 cglobal d63_predictor_32x32, 3, 5, 8, dst, stride, above, line, goffset GET_GOT goffsetq mova m0, [aboveq] mova m7, [aboveq+16] DEFINE_ARGS dst, stride, stride3, line mova m1, [GLOBAL(sh_b123456789abcdeff)] lea stride3q, [strideq*3] pshufb m2, m7, [GLOBAL(sh_b23456789abcdefff)] pshufb m3, m7, m1 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m2, m4 palignr m6, m7, m0, 1 palignr m5, m7, m0, 2 pavgb m7, m3 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m6, m5, m2 pavgb m0, m6 mov lined, 8 .loop: mova [dstq ], m0 mova [dstq +16], m7 mova [dstq+strideq ], m2 mova [dstq+strideq +16], m4 palignr m3, m7, m0, 1 palignr m5, m4, m2, 1 pshufb m7, m1 pshufb m4, m1 mova [dstq+strideq*2 ], m3 mova [dstq+strideq*2+16], m7 mova [dstq+stride3q ], m5 mova [dstq+stride3q +16], m4 palignr m0, m7, m3, 1 palignr m2, m4, m5, 1 pshufb m7, m1 pshufb m4, m1 lea dstq, [dstq+strideq*4] dec lined jnz .loop RESTORE_GOT REP_RET INIT_XMM ssse3 cglobal d153_predictor_4x4, 4, 5, 4, dst, stride, above, left, goffset GET_GOT goffsetq movd m0, [leftq] ; l1, l2, l3, l4 movd m1, [aboveq-1] ; tl, t1, t2, t3 punpckldq m0, m1 ; l1, l2, l3, l4, tl, t1, t2, t3 pshufb m0, [GLOBAL(sh_b32104567)]; l4, l3, l2, l1, tl, t1, t2, t3 psrldq m1, m0, 1 ; l3, l2, l1, tl, t1, t2, t3 psrldq m2, m0, 2 ; l2, l1, tl, t1, t2, t3 ; comments below are for a predictor like this ; A1 B1 C1 D1 ; A2 B2 A1 B1 ; A3 B3 A2 B2 ; A4 B4 A3 B3 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 ; 3-tap avg B4 B3 B2 B1 C1 D1 pavgb m1, m0 ; 2-tap avg A4 A3 A2 A1 punpcklqdq m3, m1 ; B4 B3 B2 B1 C1 D1 x x A4 A3 A2 A1 .. DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] pshufb m3, [GLOBAL(sh_b8091a2b345)] ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 .. movd [dstq+stride3q ], m3 psrldq m3, 2 ; A3 B3 A2 B2 A1 B1 C1 D1 .. movd [dstq+strideq*2], m3 psrldq m3, 2 ; A2 B2 A1 B1 C1 D1 .. movd [dstq+strideq ], m3 psrldq m3, 2 ; A1 B1 C1 D1 .. movd [dstq ], m3 RESTORE_GOT RET INIT_XMM ssse3 cglobal d153_predictor_8x8, 4, 5, 8, dst, stride, above, left, goffset GET_GOT goffsetq movq m0, [leftq] ; [0- 7] l1-8 [byte] movhps m0, [aboveq-1] ; [8-15] tl, t1-7 [byte] pshufb m1, m0, [GLOBAL(sh_b76543210)] ; l8-1 [word] pshufb m2, m0, [GLOBAL(sh_b65432108)] ; l7-1,tl [word] pshufb m3, m0, [GLOBAL(sh_b54321089)] ; l6-1,tl,t1 [word] pshufb m0, [GLOBAL(sh_b89abcdef)] ; tl,t1-7 [word] psrldq m4, m0, 1 ; t1-7 [word] psrldq m5, m0, 2 ; t2-7 [word] ; comments below are for a predictor like this ; A1 B1 C1 D1 E1 F1 G1 H1 ; A2 B2 A1 B1 C1 D1 E1 F1 ; A3 B3 A2 B2 A1 B1 C1 D1 ; A4 B4 A3 B3 A2 B2 A1 B1 ; A5 B5 A4 B4 A3 B3 A2 B2 ; A6 B6 A5 B5 A4 B4 A3 B3 ; A7 B7 A6 B6 A5 B5 A4 B4 ; A8 B8 A7 B7 A6 B6 A5 B5 pavgb m6, m1, m2 ; 2-tap avg A8-A1 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m4, m5, m7 ; 3-tap avg C-H1 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m2, m3, m0 ; 3-tap avg B8-1 punpcklbw m6, m0 ; A-B8, A-B7 ... A-B2, A-B1 DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] movhps [dstq+stride3q], m6 ; A-B4, A-B3, A-B2, A-B1 palignr m0, m7, m6, 10 ; A-B3, A-B2, A-B1, C-H1 movq [dstq+strideq*2], m0 psrldq m0, 2 ; A-B2, A-B1, C-H1 movq [dstq+strideq ], m0 psrldq m0, 2 ; A-H1 movq [dstq ], m0 lea dstq, [dstq+strideq*4] movq [dstq+stride3q ], m6 ; A-B8, A-B7, A-B6, A-B5 psrldq m6, 2 ; A-B7, A-B6, A-B5, A-B4 movq [dstq+strideq*2], m6 psrldq m6, 2 ; A-B6, A-B5, A-B4, A-B3 movq [dstq+strideq ], m6 psrldq m6, 2 ; A-B5, A-B4, A-B3, A-B2 movq [dstq ], m6 RESTORE_GOT RET INIT_XMM ssse3 cglobal d153_predictor_16x16, 4, 5, 8, dst, stride, above, left, goffset GET_GOT goffsetq mova m0, [leftq] movu m7, [aboveq-1] ; comments below are for a predictor like this ; A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 O1 P1 ; A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 M1 N1 ; A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 K1 L1 ; A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 I1 J1 ; A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 G1 H1 ; A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 E1 F1 ; A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 C1 D1 ; A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 A1 B1 ; A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 A2 B2 ; Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 A3 B3 ; Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 A4 B4 ; Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 A5 B5 ; Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 A6 B6 ; Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 A7 B7 ; Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 A8 B8 ; Ag Bg Af Bf Ae Be Ad Bd Ac Bc Ab Bb Aa Ba A9 B9 pshufb m6, m7, [GLOBAL(sh_bfedcba9876543210)] palignr m5, m0, m6, 15 palignr m3, m0, m6, 14 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] pavgb m5, m0 ; A1 - Ag punpcklbw m0, m4, m5 ; A-B8 ... A-B1 punpckhbw m4, m5 ; A-B9 ... A-Bg pshufb m3, m7, [GLOBAL(sh_b123456789abcdeff)] pshufb m5, m7, [GLOBAL(sh_b23456789abcdefff)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg C1-P1 pshufb m6, m0, [GLOBAL(sh_bfedcba9876543210)] DEFINE_ARGS dst, stride, stride3 lea stride3q, [strideq*3] palignr m2, m1, m6, 14 mova [dstq ], m2 palignr m2, m1, m6, 12 mova [dstq+strideq ], m2 palignr m2, m1, m6, 10 mova [dstq+strideq*2], m2 palignr m2, m1, m6, 8 mova [dstq+stride3q ], m2 lea dstq, [dstq+strideq*4] palignr m2, m1, m6, 6 mova [dstq ], m2 palignr m2, m1, m6, 4 mova [dstq+strideq ], m2 palignr m2, m1, m6, 2 mova [dstq+strideq*2], m2 pshufb m4, [GLOBAL(sh_bfedcba9876543210)] mova [dstq+stride3q ], m6 lea dstq, [dstq+strideq*4] palignr m2, m6, m4, 14 mova [dstq ], m2 palignr m2, m6, m4, 12 mova [dstq+strideq ], m2 palignr m2, m6, m4, 10 mova [dstq+strideq*2], m2 palignr m2, m6, m4, 8 mova [dstq+stride3q ], m2 lea dstq, [dstq+strideq*4] palignr m2, m6, m4, 6 mova [dstq ], m2 palignr m2, m6, m4, 4 mova [dstq+strideq ], m2 palignr m2, m6, m4, 2 mova [dstq+strideq*2], m2 mova [dstq+stride3q ], m4 RESTORE_GOT RET INIT_XMM ssse3 cglobal d153_predictor_32x32, 4, 5, 8, dst, stride, above, left, goffset GET_GOT goffsetq mova m0, [leftq] movu m7, [aboveq-1] movu m1, [aboveq+15] pshufb m4, m1, [GLOBAL(sh_b123456789abcdeff)] pshufb m6, m1, [GLOBAL(sh_b23456789abcdefff)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m4, m6, m2 ; 3-tap avg above [high] palignr m3, m1, m7, 1 palignr m5, m1, m7, 2 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m7, m3, m5, m1 ; 3-tap avg above [low] pshufb m7, [GLOBAL(sh_bfedcba9876543210)] palignr m5, m0, m7, 15 palignr m3, m0, m7, 14 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m5, m3, m4 ; 3-tap avg B3-Bg pavgb m5, m0 ; A1 - Ag punpcklbw m6, m4, m5 ; A-B8 ... A-B1 punpckhbw m4, m5 ; A-B9 ... A-Bg pshufb m6, [GLOBAL(sh_bfedcba9876543210)] pshufb m4, [GLOBAL(sh_bfedcba9876543210)] DEFINE_ARGS dst, stride, stride3, left, line lea stride3q, [strideq*3] palignr m5, m2, m1, 14 palignr m7, m1, m6, 14 mova [dstq ], m7 mova [dstq+16 ], m5 palignr m5, m2, m1, 12 palignr m7, m1, m6, 12 mova [dstq+strideq ], m7 mova [dstq+strideq+16 ], m5 palignr m5, m2, m1, 10 palignr m7, m1, m6, 10 mova [dstq+strideq*2 ], m7 mova [dstq+strideq*2+16], m5 palignr m5, m2, m1, 8 palignr m7, m1, m6, 8 mova [dstq+stride3q ], m7 mova [dstq+stride3q+16 ], m5 lea dstq, [dstq+strideq*4] palignr m5, m2, m1, 6 palignr m7, m1, m6, 6 mova [dstq ], m7 mova [dstq+16 ], m5 palignr m5, m2, m1, 4 palignr m7, m1, m6, 4 mova [dstq+strideq ], m7 mova [dstq+strideq+16 ], m5 palignr m5, m2, m1, 2 palignr m7, m1, m6, 2 mova [dstq+strideq*2 ], m7 mova [dstq+strideq*2+16], m5 mova [dstq+stride3q ], m6 mova [dstq+stride3q+16 ], m1 lea dstq, [dstq+strideq*4] palignr m5, m1, m6, 14 palignr m3, m6, m4, 14 mova [dstq ], m3 mova [dstq+16 ], m5 palignr m5, m1, m6, 12 palignr m3, m6, m4, 12 mova [dstq+strideq ], m3 mova [dstq+strideq+16 ], m5 palignr m5, m1, m6, 10 palignr m3, m6, m4, 10 mova [dstq+strideq*2 ], m3 mova [dstq+strideq*2+16], m5 palignr m5, m1, m6, 8 palignr m3, m6, m4, 8 mova [dstq+stride3q ], m3 mova [dstq+stride3q+16 ], m5 lea dstq, [dstq+strideq*4] palignr m5, m1, m6, 6 palignr m3, m6, m4, 6 mova [dstq ], m3 mova [dstq+16 ], m5 palignr m5, m1, m6, 4 palignr m3, m6, m4, 4 mova [dstq+strideq ], m3 mova [dstq+strideq+16 ], m5 palignr m5, m1, m6, 2 palignr m3, m6, m4, 2 mova [dstq+strideq*2 ], m3 mova [dstq+strideq*2+16], m5 mova [dstq+stride3q ], m4 mova [dstq+stride3q+16 ], m6 lea dstq, [dstq+strideq*4] mova m7, [leftq] mova m3, [leftq+16] palignr m5, m3, m7, 15 palignr m0, m3, m7, 14 X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m3, m5, m0, m2 ; 3-tap avg Bh - pavgb m5, m3 ; Ah - punpcklbw m3, m2, m5 ; A-B8 ... A-B1 punpckhbw m2, m5 ; A-B9 ... A-Bg pshufb m3, [GLOBAL(sh_bfedcba9876543210)] pshufb m2, [GLOBAL(sh_bfedcba9876543210)] palignr m7, m6, m4, 14 palignr m0, m4, m3, 14 mova [dstq ], m0 mova [dstq+16 ], m7 palignr m7, m6, m4, 12 palignr m0, m4, m3, 12 mova [dstq+strideq ], m0 mova [dstq+strideq+16 ], m7 palignr m7, m6, m4, 10 palignr m0, m4, m3, 10 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m7 palignr m7, m6, m4, 8 palignr m0, m4, m3, 8 mova [dstq+stride3q ], m0 mova [dstq+stride3q+16 ], m7 lea dstq, [dstq+strideq*4] palignr m7, m6, m4, 6 palignr m0, m4, m3, 6 mova [dstq ], m0 mova [dstq+16 ], m7 palignr m7, m6, m4, 4 palignr m0, m4, m3, 4 mova [dstq+strideq ], m0 mova [dstq+strideq+16 ], m7 palignr m7, m6, m4, 2 palignr m0, m4, m3, 2 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m7 mova [dstq+stride3q ], m3 mova [dstq+stride3q+16 ], m4 lea dstq, [dstq+strideq*4] palignr m7, m4, m3, 14 palignr m0, m3, m2, 14 mova [dstq ], m0 mova [dstq+16 ], m7 palignr m7, m4, m3, 12 palignr m0, m3, m2, 12 mova [dstq+strideq ], m0 mova [dstq+strideq+16 ], m7 palignr m7, m4, m3, 10 palignr m0, m3, m2, 10 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m7 palignr m7, m4, m3, 8 palignr m0, m3, m2, 8 mova [dstq+stride3q ], m0 mova [dstq+stride3q+16 ], m7 lea dstq, [dstq+strideq*4] palignr m7, m4, m3, 6 palignr m0, m3, m2, 6 mova [dstq ], m0 mova [dstq+16 ], m7 palignr m7, m4, m3, 4 palignr m0, m3, m2, 4 mova [dstq+strideq ], m0 mova [dstq+strideq+16 ], m7 palignr m7, m4, m3, 2 palignr m0, m3, m2, 2 mova [dstq+strideq*2 ], m0 mova [dstq+strideq*2+16], m7 mova [dstq+stride3q ], m2 mova [dstq+stride3q+16 ], m3 RESTORE_GOT RET INIT_XMM ssse3 cglobal d207_predictor_8x8, 4, 5, 4, dst, stride, stride3, left, goffset GET_GOT goffsetq movq m3, [leftq] ; abcdefgh [byte] lea stride3q, [strideq*3] pshufb m1, m3, [GLOBAL(sh_b2345677777777777)] pshufb m0, m3, [GLOBAL(sh_b0123456777777777)] pshufb m2, m3, [GLOBAL(sh_b1234567777777777)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m2, m1, m3 pavgb m0, m2 punpcklbw m0, m3 ; interleaved output movq [dstq ], m0 psrldq m0, 2 movq [dstq+strideq ], m0 psrldq m0, 2 movq [dstq+strideq*2], m0 psrldq m0, 2 movq [dstq+stride3q ], m0 lea dstq, [dstq+strideq*4] pshufhw m0, m0, q0000 ; de, d2ef, ef, e2fg, fg, f2gh, gh, g3h, 8xh psrldq m0, 2 movq [dstq ], m0 psrldq m0, 2 movq [dstq+strideq ], m0 psrldq m0, 2 movq [dstq+strideq*2], m0 psrldq m0, 2 movq [dstq+stride3q ], m0 RESTORE_GOT RET INIT_XMM ssse3 cglobal d207_predictor_16x16, 4, 5, 5, dst, stride, stride3, left, goffset GET_GOT goffsetq lea stride3q, [strideq*3] mova m0, [leftq] ; abcdefghijklmnop [byte] pshufb m1, m0, [GLOBAL(sh_b123456789abcdeff)] ; bcdefghijklmnopp pshufb m2, m0, [GLOBAL(sh_b23456789abcdefff)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m0, m1, m2, m3 pavgb m1, m0 ; ab, bc, cd .. no, op, pp [byte] punpckhbw m4, m1, m3 ; interleaved input punpcklbw m1, m3 ; interleaved output mova [dstq ], m1 palignr m3, m4, m1, 2 mova [dstq+strideq ], m3 palignr m3, m4, m1, 4 mova [dstq+strideq*2], m3 palignr m3, m4, m1, 6 mova [dstq+stride3q ], m3 lea dstq, [dstq+strideq*4] palignr m3, m4, m1, 8 mova [dstq ], m3 palignr m3, m4, m1, 10 mova [dstq+strideq ], m3 palignr m3, m4, m1, 12 mova [dstq+strideq*2], m3 palignr m3, m4, m1, 14 mova [dstq+stride3q ], m3 DEFINE_ARGS dst, stride, stride3, line mov lined, 2 mova m0, [GLOBAL(sh_b23456789abcdefff)] .loop: lea dstq, [dstq+strideq*4] mova [dstq ], m4 pshufb m4, m0 mova [dstq+strideq ], m4 pshufb m4, m0 mova [dstq+strideq*2], m4 pshufb m4, m0 mova [dstq+stride3q ], m4 pshufb m4, m0 dec lined jnz .loop RESTORE_GOT REP_RET INIT_XMM ssse3 cglobal d207_predictor_32x32, 4, 5, 8, dst, stride, stride3, left, goffset GET_GOT goffsetq lea stride3q, [strideq*3] mova m1, [leftq] ; 0-15 [byte] mova m2, [leftq+16] ; 16-31 [byte] pshufb m0, m2, [GLOBAL(sh_b23456789abcdefff)] pshufb m4, m2, [GLOBAL(sh_b123456789abcdeff)] X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m2, m4, m0, m3 palignr m6, m2, m1, 1 palignr m5, m2, m1, 2 pavgb m2, m4 ; high 16px even lines X_PLUS_2Y_PLUS_Z_PLUS_2_RSH_2 m1, m6, m5, m0 pavgb m1, m6 ; low 16px even lines punpckhbw m6, m1, m0 ; interleaved output 2 punpcklbw m1, m0 ; interleaved output 1 punpckhbw m7, m2, m3 ; interleaved output 4 punpcklbw m2, m3 ; interleaved output 3 ; output 1st 8 lines (and half of 2nd 8 lines) DEFINE_ARGS dst, stride, stride3, dst8 lea dst8q, [dstq+strideq*8] mova [dstq ], m1 mova [dstq +16], m6 mova [dst8q ], m6 palignr m0, m6, m1, 2 palignr m4, m2, m6, 2 mova [dstq +strideq ], m0 mova [dstq +strideq +16], m4 mova [dst8q+strideq ], m4 palignr m0, m6, m1, 4 palignr m4, m2, m6, 4 mova [dstq +strideq*2 ], m0 mova [dstq +strideq*2+16], m4 mova [dst8q+strideq*2 ], m4 palignr m0, m6, m1, 6 palignr m4, m2, m6, 6 mova [dstq +stride3q ], m0 mova [dstq +stride3q +16], m4 mova [dst8q+stride3q ], m4 lea dstq, [dstq +strideq*4] lea dst8q, [dst8q+strideq*4] palignr m0, m6, m1, 8 palignr m4, m2, m6, 8 mova [dstq ], m0 mova [dstq +16], m4 mova [dst8q ], m4 palignr m0, m6, m1, 10 palignr m4, m2, m6, 10 mova [dstq +strideq ], m0 mova [dstq +strideq +16], m4 mova [dst8q+strideq ], m4 palignr m0, m6, m1, 12 palignr m4, m2, m6, 12 mova [dstq +strideq*2 ], m0 mova [dstq +strideq*2+16], m4 mova [dst8q+strideq*2 ], m4 palignr m0, m6, m1, 14 palignr m4, m2, m6, 14 mova [dstq +stride3q ], m0 mova [dstq +stride3q +16], m4 mova [dst8q+stride3q ], m4 lea dstq, [dstq+strideq*4] lea dst8q, [dst8q+strideq*4] ; output 2nd half of 2nd 8 lines and half of 3rd 8 lines mova [dstq +16], m2 mova [dst8q ], m2 palignr m4, m7, m2, 2 mova [dstq +strideq +16], m4 mova [dst8q+strideq ], m4 palignr m4, m7, m2, 4 mova [dstq +strideq*2+16], m4 mova [dst8q+strideq*2 ], m4 palignr m4, m7, m2, 6 mova [dstq +stride3q +16], m4 mova [dst8q+stride3q ], m4 lea dstq, [dstq+strideq*4] lea dst8q, [dst8q+strideq*4] palignr m4, m7, m2, 8 mova [dstq +16], m4 mova [dst8q ], m4 palignr m4, m7, m2, 10 mova [dstq +strideq +16], m4 mova [dst8q+strideq ], m4 palignr m4, m7, m2, 12 mova [dstq +strideq*2+16], m4 mova [dst8q+strideq*2 ], m4 palignr m4, m7, m2, 14 mova [dstq +stride3q +16], m4 mova [dst8q+stride3q ], m4 lea dstq, [dstq+strideq*4] lea dst8q, [dst8q+strideq*4] ; output 2nd half of 3rd 8 lines and half of 4th 8 lines mova m0, [GLOBAL(sh_b23456789abcdefff)] mova [dstq +16], m7 mova [dst8q ], m7 pshufb m7, m0 mova [dstq +strideq +16], m7 mova [dst8q+strideq ], m7 pshufb m7, m0 mova [dstq +strideq*2+16], m7 mova [dst8q+strideq*2 ], m7 pshufb m7, m0 mova [dstq +stride3q +16], m7 mova [dst8q+stride3q ], m7 pshufb m7, m0 lea dstq, [dstq+strideq*4] lea dst8q, [dst8q+strideq*4] mova [dstq +16], m7 mova [dst8q ], m7 pshufb m7, m0 mova [dstq +strideq +16], m7 mova [dst8q+strideq ], m7 pshufb m7, m0 mova [dstq +strideq*2+16], m7 mova [dst8q+strideq*2 ], m7 pshufb m7, m0 mova [dstq +stride3q +16], m7 mova [dst8q+stride3q ], m7 pshufb m7, m0 lea dstq, [dstq+strideq*4] ; output last half of 4th 8 lines mova [dstq +16], m7 mova [dstq +strideq +16], m7 mova [dstq +strideq*2+16], m7 mova [dstq +stride3q +16], m7 lea dstq, [dstq+strideq*4] mova [dstq +16], m7 mova [dstq +strideq +16], m7 mova [dstq +strideq*2+16], m7 mova [dstq +stride3q +16], m7 ; done! RESTORE_GOT RET libvpx-1.8.2/vpx_dsp/x86/inv_txfm_sse2.c000066400000000000000000001272541357355204000201070ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void transpose_16bit_4(__m128i *res) { const __m128i tr0_0 = _mm_unpacklo_epi16(res[0], res[1]); const __m128i tr0_1 = _mm_unpackhi_epi16(res[0], res[1]); res[0] = _mm_unpacklo_epi16(tr0_0, tr0_1); res[1] = _mm_unpackhi_epi16(tr0_0, tr0_1); } void vpx_idct4x4_16_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i eight = _mm_set1_epi16(8); __m128i in[2]; // Rows in[0] = load_input_data8(input); in[1] = load_input_data8(input + 8); idct4_sse2(in); // Columns idct4_sse2(in); // Final round and shift in[0] = _mm_add_epi16(in[0], eight); in[1] = _mm_add_epi16(in[1], eight); in[0] = _mm_srai_epi16(in[0], 4); in[1] = _mm_srai_epi16(in[1], 4); recon_and_store4x4_sse2(in, dest, stride); } void vpx_idct4x4_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { const __m128i zero = _mm_setzero_si128(); int a; __m128i dc_value, d[2]; a = (int)dct_const_round_shift((int16_t)input[0] * cospi_16_64); a = (int)dct_const_round_shift(a * cospi_16_64); a = ROUND_POWER_OF_TWO(a, 4); dc_value = _mm_set1_epi16(a); // Reconstruction and Store d[0] = _mm_cvtsi32_si128(*(const int *)(dest)); d[1] = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)); d[0] = _mm_unpacklo_epi32(d[0], _mm_cvtsi32_si128(*(const int *)(dest + stride))); d[1] = _mm_unpacklo_epi32( _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)), d[1]); d[0] = _mm_unpacklo_epi8(d[0], zero); d[1] = _mm_unpacklo_epi8(d[1], zero); d[0] = _mm_add_epi16(d[0], dc_value); d[1] = _mm_add_epi16(d[1], dc_value); d[0] = _mm_packus_epi16(d[0], d[1]); *(int *)dest = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride) = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d[0]); } void idct4_sse2(__m128i *const in) { const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); __m128i u[2]; transpose_16bit_4(in); // stage 1 u[0] = _mm_unpacklo_epi16(in[0], in[1]); u[1] = _mm_unpackhi_epi16(in[0], in[1]); u[0] = idct_calc_wraplow_sse2(k__cospi_p16_p16, k__cospi_p16_m16, u[0]); u[1] = idct_calc_wraplow_sse2(k__cospi_p08_p24, k__cospi_p24_m08, u[1]); // stage 2 in[0] = _mm_add_epi16(u[0], u[1]); in[1] = _mm_sub_epi16(u[0], u[1]); in[1] = _mm_shuffle_epi32(in[1], 0x4E); } void iadst4_sse2(__m128i *const in) { const __m128i k__sinpi_1_3 = pair_set_epi16(sinpi_1_9, sinpi_3_9); const __m128i k__sinpi_4_2 = pair_set_epi16(sinpi_4_9, sinpi_2_9); const __m128i k__sinpi_2_3 = pair_set_epi16(sinpi_2_9, sinpi_3_9); const __m128i k__sinpi_1_4 = pair_set_epi16(sinpi_1_9, sinpi_4_9); const __m128i k__sinpi_12_n3 = pair_set_epi16(sinpi_1_9 + sinpi_2_9, -sinpi_3_9); __m128i u[4], v[5]; // 00 01 20 21 02 03 22 23 // 10 11 30 31 12 13 32 33 const __m128i tr0_0 = _mm_unpacklo_epi32(in[0], in[1]); const __m128i tr0_1 = _mm_unpackhi_epi32(in[0], in[1]); // 00 01 10 11 20 21 30 31 // 02 03 12 13 22 23 32 33 in[0] = _mm_unpacklo_epi32(tr0_0, tr0_1); in[1] = _mm_unpackhi_epi32(tr0_0, tr0_1); v[0] = _mm_madd_epi16(in[0], k__sinpi_1_3); // s_1 * x0 + s_3 * x1 v[1] = _mm_madd_epi16(in[1], k__sinpi_4_2); // s_4 * x2 + s_2 * x3 v[2] = _mm_madd_epi16(in[0], k__sinpi_2_3); // s_2 * x0 + s_3 * x1 v[3] = _mm_madd_epi16(in[1], k__sinpi_1_4); // s_1 * x2 + s_4 * x3 v[4] = _mm_madd_epi16(in[0], k__sinpi_12_n3); // (s_1 + s_2) * x0 - s_3 * x1 in[0] = _mm_sub_epi16(in[0], in[1]); // x0 - x2 in[1] = _mm_srli_epi32(in[1], 16); in[0] = _mm_add_epi16(in[0], in[1]); in[0] = _mm_slli_epi32(in[0], 16); // x0 - x2 + x3 u[0] = _mm_add_epi32(v[0], v[1]); u[1] = _mm_sub_epi32(v[2], v[3]); u[2] = _mm_madd_epi16(in[0], k__sinpi_1_3); u[3] = _mm_sub_epi32(v[1], v[3]); u[3] = _mm_add_epi32(u[3], v[4]); u[0] = dct_const_round_shift_sse2(u[0]); u[1] = dct_const_round_shift_sse2(u[1]); u[2] = dct_const_round_shift_sse2(u[2]); u[3] = dct_const_round_shift_sse2(u[3]); in[0] = _mm_packs_epi32(u[0], u[1]); in[1] = _mm_packs_epi32(u[2], u[3]); } static INLINE void load_buffer_8x8(const tran_low_t *const input, __m128i *const in) { in[0] = load_input_data8(input + 0 * 8); in[1] = load_input_data8(input + 1 * 8); in[2] = load_input_data8(input + 2 * 8); in[3] = load_input_data8(input + 3 * 8); in[4] = load_input_data8(input + 4 * 8); in[5] = load_input_data8(input + 5 * 8); in[6] = load_input_data8(input + 6 * 8); in[7] = load_input_data8(input + 7 * 8); } void vpx_idct8x8_64_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i in[8]; int i; // Load input data. load_buffer_8x8(input, in); // 2-D for (i = 0; i < 2; i++) { vpx_idct8_sse2(in); } write_buffer_8x8(in, dest, stride); } void vpx_idct8x8_12_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i io[8]; io[0] = load_input_data4(input + 0 * 8); io[1] = load_input_data4(input + 1 * 8); io[2] = load_input_data4(input + 2 * 8); io[3] = load_input_data4(input + 3 * 8); idct8x8_12_add_kernel_sse2(io); write_buffer_8x8(io, dest, stride); } static INLINE void recon_and_store_8_dual(uint8_t *const dest, const __m128i in_x, const int stride) { const __m128i zero = _mm_setzero_si128(); __m128i d0, d1; d0 = _mm_loadl_epi64((__m128i *)(dest + 0 * stride)); d1 = _mm_loadl_epi64((__m128i *)(dest + 1 * stride)); d0 = _mm_unpacklo_epi8(d0, zero); d1 = _mm_unpacklo_epi8(d1, zero); d0 = _mm_add_epi16(in_x, d0); d1 = _mm_add_epi16(in_x, d1); d0 = _mm_packus_epi16(d0, d1); _mm_storel_epi64((__m128i *)(dest + 0 * stride), d0); _mm_storeh_pi((__m64 *)(dest + 1 * stride), _mm_castsi128_ps(d0)); } void vpx_idct8x8_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i dc_value; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 5); dc_value = _mm_set1_epi16((int16_t)a1); recon_and_store_8_dual(dest, dc_value, stride); dest += 2 * stride; recon_and_store_8_dual(dest, dc_value, stride); dest += 2 * stride; recon_and_store_8_dual(dest, dc_value, stride); dest += 2 * stride; recon_and_store_8_dual(dest, dc_value, stride); } void vpx_idct8_sse2(__m128i *const in) { // 8x8 Transpose is copied from vpx_fdct8x8_sse2() transpose_16bit_8x8(in, in); // 4-stage 1D idct8x8 idct8(in, in); } void iadst8_sse2(__m128i *const in) { const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p10_p22 = pair_set_epi16(cospi_10_64, cospi_22_64); const __m128i k__cospi_p22_m10 = pair_set_epi16(cospi_22_64, -cospi_10_64); const __m128i k__cospi_p18_p14 = pair_set_epi16(cospi_18_64, cospi_14_64); const __m128i k__cospi_p14_m18 = pair_set_epi16(cospi_14_64, -cospi_18_64); const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i kZero = _mm_set1_epi16(0); __m128i s[8], u[16], v[8], w[16]; // transpose transpose_16bit_8x8(in, in); // column transformation // stage 1 // interleave and multiply/add into 32-bit integer s[0] = _mm_unpacklo_epi16(in[7], in[0]); s[1] = _mm_unpackhi_epi16(in[7], in[0]); s[2] = _mm_unpacklo_epi16(in[5], in[2]); s[3] = _mm_unpackhi_epi16(in[5], in[2]); s[4] = _mm_unpacklo_epi16(in[3], in[4]); s[5] = _mm_unpackhi_epi16(in[3], in[4]); s[6] = _mm_unpacklo_epi16(in[1], in[6]); s[7] = _mm_unpackhi_epi16(in[1], in[6]); u[0] = _mm_madd_epi16(s[0], k__cospi_p02_p30); u[1] = _mm_madd_epi16(s[1], k__cospi_p02_p30); u[2] = _mm_madd_epi16(s[0], k__cospi_p30_m02); u[3] = _mm_madd_epi16(s[1], k__cospi_p30_m02); u[4] = _mm_madd_epi16(s[2], k__cospi_p10_p22); u[5] = _mm_madd_epi16(s[3], k__cospi_p10_p22); u[6] = _mm_madd_epi16(s[2], k__cospi_p22_m10); u[7] = _mm_madd_epi16(s[3], k__cospi_p22_m10); u[8] = _mm_madd_epi16(s[4], k__cospi_p18_p14); u[9] = _mm_madd_epi16(s[5], k__cospi_p18_p14); u[10] = _mm_madd_epi16(s[4], k__cospi_p14_m18); u[11] = _mm_madd_epi16(s[5], k__cospi_p14_m18); u[12] = _mm_madd_epi16(s[6], k__cospi_p26_p06); u[13] = _mm_madd_epi16(s[7], k__cospi_p26_p06); u[14] = _mm_madd_epi16(s[6], k__cospi_p06_m26); u[15] = _mm_madd_epi16(s[7], k__cospi_p06_m26); // addition w[0] = _mm_add_epi32(u[0], u[8]); w[1] = _mm_add_epi32(u[1], u[9]); w[2] = _mm_add_epi32(u[2], u[10]); w[3] = _mm_add_epi32(u[3], u[11]); w[4] = _mm_add_epi32(u[4], u[12]); w[5] = _mm_add_epi32(u[5], u[13]); w[6] = _mm_add_epi32(u[6], u[14]); w[7] = _mm_add_epi32(u[7], u[15]); w[8] = _mm_sub_epi32(u[0], u[8]); w[9] = _mm_sub_epi32(u[1], u[9]); w[10] = _mm_sub_epi32(u[2], u[10]); w[11] = _mm_sub_epi32(u[3], u[11]); w[12] = _mm_sub_epi32(u[4], u[12]); w[13] = _mm_sub_epi32(u[5], u[13]); w[14] = _mm_sub_epi32(u[6], u[14]); w[15] = _mm_sub_epi32(u[7], u[15]); // shift and rounding u[0] = dct_const_round_shift_sse2(w[0]); u[1] = dct_const_round_shift_sse2(w[1]); u[2] = dct_const_round_shift_sse2(w[2]); u[3] = dct_const_round_shift_sse2(w[3]); u[4] = dct_const_round_shift_sse2(w[4]); u[5] = dct_const_round_shift_sse2(w[5]); u[6] = dct_const_round_shift_sse2(w[6]); u[7] = dct_const_round_shift_sse2(w[7]); u[8] = dct_const_round_shift_sse2(w[8]); u[9] = dct_const_round_shift_sse2(w[9]); u[10] = dct_const_round_shift_sse2(w[10]); u[11] = dct_const_round_shift_sse2(w[11]); u[12] = dct_const_round_shift_sse2(w[12]); u[13] = dct_const_round_shift_sse2(w[13]); u[14] = dct_const_round_shift_sse2(w[14]); u[15] = dct_const_round_shift_sse2(w[15]); // back to 16-bit and pack 8 integers into __m128i in[0] = _mm_packs_epi32(u[0], u[1]); in[1] = _mm_packs_epi32(u[2], u[3]); in[2] = _mm_packs_epi32(u[4], u[5]); in[3] = _mm_packs_epi32(u[6], u[7]); in[4] = _mm_packs_epi32(u[8], u[9]); in[5] = _mm_packs_epi32(u[10], u[11]); in[6] = _mm_packs_epi32(u[12], u[13]); in[7] = _mm_packs_epi32(u[14], u[15]); // stage 2 s[0] = _mm_add_epi16(in[0], in[2]); s[1] = _mm_add_epi16(in[1], in[3]); s[2] = _mm_sub_epi16(in[0], in[2]); s[3] = _mm_sub_epi16(in[1], in[3]); u[0] = _mm_unpacklo_epi16(in[4], in[5]); u[1] = _mm_unpackhi_epi16(in[4], in[5]); u[2] = _mm_unpacklo_epi16(in[6], in[7]); u[3] = _mm_unpackhi_epi16(in[6], in[7]); v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); w[0] = _mm_add_epi32(v[0], v[4]); w[1] = _mm_add_epi32(v[1], v[5]); w[2] = _mm_add_epi32(v[2], v[6]); w[3] = _mm_add_epi32(v[3], v[7]); w[4] = _mm_sub_epi32(v[0], v[4]); w[5] = _mm_sub_epi32(v[1], v[5]); w[6] = _mm_sub_epi32(v[2], v[6]); w[7] = _mm_sub_epi32(v[3], v[7]); u[0] = dct_const_round_shift_sse2(w[0]); u[1] = dct_const_round_shift_sse2(w[1]); u[2] = dct_const_round_shift_sse2(w[2]); u[3] = dct_const_round_shift_sse2(w[3]); u[4] = dct_const_round_shift_sse2(w[4]); u[5] = dct_const_round_shift_sse2(w[5]); u[6] = dct_const_round_shift_sse2(w[6]); u[7] = dct_const_round_shift_sse2(w[7]); // back to 16-bit intergers s[4] = _mm_packs_epi32(u[0], u[1]); s[5] = _mm_packs_epi32(u[2], u[3]); s[6] = _mm_packs_epi32(u[4], u[5]); s[7] = _mm_packs_epi32(u[6], u[7]); // stage 3 u[0] = _mm_unpacklo_epi16(s[2], s[3]); u[1] = _mm_unpackhi_epi16(s[2], s[3]); u[2] = _mm_unpacklo_epi16(s[6], s[7]); u[3] = _mm_unpackhi_epi16(s[6], s[7]); s[2] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_p16_p16); s[3] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_p16_m16); s[6] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_p16_p16); s[7] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_p16_m16); in[0] = s[0]; in[1] = _mm_sub_epi16(kZero, s[4]); in[2] = s[6]; in[3] = _mm_sub_epi16(kZero, s[2]); in[4] = s[3]; in[5] = _mm_sub_epi16(kZero, s[7]); in[6] = s[5]; in[7] = _mm_sub_epi16(kZero, s[1]); } static INLINE void idct16_load8x8(const tran_low_t *const input, __m128i *const in) { in[0] = load_input_data8(input + 0 * 16); in[1] = load_input_data8(input + 1 * 16); in[2] = load_input_data8(input + 2 * 16); in[3] = load_input_data8(input + 3 * 16); in[4] = load_input_data8(input + 4 * 16); in[5] = load_input_data8(input + 5 * 16); in[6] = load_input_data8(input + 6 * 16); in[7] = load_input_data8(input + 7 * 16); } void vpx_idct16x16_256_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i l[16], r[16], out[16], *in; int i; in = l; for (i = 0; i < 2; i++) { idct16_load8x8(input, in); transpose_16bit_8x8(in, in); idct16_load8x8(input + 8, in + 8); transpose_16bit_8x8(in + 8, in + 8); idct16_8col(in, in); in = r; input += 128; } for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(l + i, out); transpose_16bit_8x8(r + i, out + 8); idct16_8col(out, out); for (j = 0; j < 16; ++j) { write_buffer_8x1(dest + j * stride, out[j]); } dest += 8; } } void vpx_idct16x16_38_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i in[16], temp[16], out[16]; int i; idct16_load8x8(input, in); transpose_16bit_8x8(in, in); for (i = 8; i < 16; i++) { in[i] = _mm_setzero_si128(); } idct16_8col(in, temp); for (i = 0; i < 16; i += 8) { int j; transpose_16bit_8x8(temp + i, in); idct16_8col(in, out); for (j = 0; j < 16; ++j) { write_buffer_8x1(dest + j * stride, out[j]); } dest += 8; } } void vpx_idct16x16_10_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i in[16], l[16]; int i; // First 1-D inverse DCT // Load input data. in[0] = load_input_data4(input + 0 * 16); in[1] = load_input_data4(input + 1 * 16); in[2] = load_input_data4(input + 2 * 16); in[3] = load_input_data4(input + 3 * 16); idct16x16_10_pass1(in, l); // Second 1-D inverse transform, performed per 8x16 block for (i = 0; i < 16; i += 8) { int j; idct16x16_10_pass2(l + i, in); for (j = 0; j < 16; ++j) { write_buffer_8x1(dest + j * stride, in[j]); } dest += 8; } } static INLINE void recon_and_store_16(uint8_t *const dest, const __m128i in_x) { const __m128i zero = _mm_setzero_si128(); __m128i d0, d1; d0 = _mm_load_si128((__m128i *)(dest)); d1 = _mm_unpackhi_epi8(d0, zero); d0 = _mm_unpacklo_epi8(d0, zero); d0 = _mm_add_epi16(in_x, d0); d1 = _mm_add_epi16(in_x, d1); d0 = _mm_packus_epi16(d0, d1); _mm_store_si128((__m128i *)(dest), d0); } void vpx_idct16x16_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i dc_value; int i; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); dc_value = _mm_set1_epi16((int16_t)a1); for (i = 0; i < 16; ++i) { recon_and_store_16(dest, dc_value); dest += stride; } } void vpx_iadst16_8col_sse2(__m128i *const in) { // perform 16x16 1-D ADST for 8 columns __m128i s[16], x[16], u[32], v[32]; const __m128i k__cospi_p01_p31 = pair_set_epi16(cospi_1_64, cospi_31_64); const __m128i k__cospi_p31_m01 = pair_set_epi16(cospi_31_64, -cospi_1_64); const __m128i k__cospi_p05_p27 = pair_set_epi16(cospi_5_64, cospi_27_64); const __m128i k__cospi_p27_m05 = pair_set_epi16(cospi_27_64, -cospi_5_64); const __m128i k__cospi_p09_p23 = pair_set_epi16(cospi_9_64, cospi_23_64); const __m128i k__cospi_p23_m09 = pair_set_epi16(cospi_23_64, -cospi_9_64); const __m128i k__cospi_p13_p19 = pair_set_epi16(cospi_13_64, cospi_19_64); const __m128i k__cospi_p19_m13 = pair_set_epi16(cospi_19_64, -cospi_13_64); const __m128i k__cospi_p17_p15 = pair_set_epi16(cospi_17_64, cospi_15_64); const __m128i k__cospi_p15_m17 = pair_set_epi16(cospi_15_64, -cospi_17_64); const __m128i k__cospi_p21_p11 = pair_set_epi16(cospi_21_64, cospi_11_64); const __m128i k__cospi_p11_m21 = pair_set_epi16(cospi_11_64, -cospi_21_64); const __m128i k__cospi_p25_p07 = pair_set_epi16(cospi_25_64, cospi_7_64); const __m128i k__cospi_p07_m25 = pair_set_epi16(cospi_7_64, -cospi_25_64); const __m128i k__cospi_p29_p03 = pair_set_epi16(cospi_29_64, cospi_3_64); const __m128i k__cospi_p03_m29 = pair_set_epi16(cospi_3_64, -cospi_29_64); const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i k__cospi_p20_p12 = pair_set_epi16(cospi_20_64, cospi_12_64); const __m128i k__cospi_p12_m20 = pair_set_epi16(cospi_12_64, -cospi_20_64); const __m128i k__cospi_m28_p04 = pair_set_epi16(-cospi_28_64, cospi_4_64); const __m128i k__cospi_m12_p20 = pair_set_epi16(-cospi_12_64, cospi_20_64); const __m128i k__cospi_p08_p24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_m08 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i k__cospi_m24_p08 = pair_set_epi16(-cospi_24_64, cospi_8_64); const __m128i k__cospi_m16_m16 = _mm_set1_epi16(-cospi_16_64); const __m128i k__cospi_p16_p16 = _mm_set1_epi16(cospi_16_64); const __m128i k__cospi_p16_m16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); const __m128i kZero = _mm_set1_epi16(0); u[0] = _mm_unpacklo_epi16(in[15], in[0]); u[1] = _mm_unpackhi_epi16(in[15], in[0]); u[2] = _mm_unpacklo_epi16(in[13], in[2]); u[3] = _mm_unpackhi_epi16(in[13], in[2]); u[4] = _mm_unpacklo_epi16(in[11], in[4]); u[5] = _mm_unpackhi_epi16(in[11], in[4]); u[6] = _mm_unpacklo_epi16(in[9], in[6]); u[7] = _mm_unpackhi_epi16(in[9], in[6]); u[8] = _mm_unpacklo_epi16(in[7], in[8]); u[9] = _mm_unpackhi_epi16(in[7], in[8]); u[10] = _mm_unpacklo_epi16(in[5], in[10]); u[11] = _mm_unpackhi_epi16(in[5], in[10]); u[12] = _mm_unpacklo_epi16(in[3], in[12]); u[13] = _mm_unpackhi_epi16(in[3], in[12]); u[14] = _mm_unpacklo_epi16(in[1], in[14]); u[15] = _mm_unpackhi_epi16(in[1], in[14]); v[0] = _mm_madd_epi16(u[0], k__cospi_p01_p31); v[1] = _mm_madd_epi16(u[1], k__cospi_p01_p31); v[2] = _mm_madd_epi16(u[0], k__cospi_p31_m01); v[3] = _mm_madd_epi16(u[1], k__cospi_p31_m01); v[4] = _mm_madd_epi16(u[2], k__cospi_p05_p27); v[5] = _mm_madd_epi16(u[3], k__cospi_p05_p27); v[6] = _mm_madd_epi16(u[2], k__cospi_p27_m05); v[7] = _mm_madd_epi16(u[3], k__cospi_p27_m05); v[8] = _mm_madd_epi16(u[4], k__cospi_p09_p23); v[9] = _mm_madd_epi16(u[5], k__cospi_p09_p23); v[10] = _mm_madd_epi16(u[4], k__cospi_p23_m09); v[11] = _mm_madd_epi16(u[5], k__cospi_p23_m09); v[12] = _mm_madd_epi16(u[6], k__cospi_p13_p19); v[13] = _mm_madd_epi16(u[7], k__cospi_p13_p19); v[14] = _mm_madd_epi16(u[6], k__cospi_p19_m13); v[15] = _mm_madd_epi16(u[7], k__cospi_p19_m13); v[16] = _mm_madd_epi16(u[8], k__cospi_p17_p15); v[17] = _mm_madd_epi16(u[9], k__cospi_p17_p15); v[18] = _mm_madd_epi16(u[8], k__cospi_p15_m17); v[19] = _mm_madd_epi16(u[9], k__cospi_p15_m17); v[20] = _mm_madd_epi16(u[10], k__cospi_p21_p11); v[21] = _mm_madd_epi16(u[11], k__cospi_p21_p11); v[22] = _mm_madd_epi16(u[10], k__cospi_p11_m21); v[23] = _mm_madd_epi16(u[11], k__cospi_p11_m21); v[24] = _mm_madd_epi16(u[12], k__cospi_p25_p07); v[25] = _mm_madd_epi16(u[13], k__cospi_p25_p07); v[26] = _mm_madd_epi16(u[12], k__cospi_p07_m25); v[27] = _mm_madd_epi16(u[13], k__cospi_p07_m25); v[28] = _mm_madd_epi16(u[14], k__cospi_p29_p03); v[29] = _mm_madd_epi16(u[15], k__cospi_p29_p03); v[30] = _mm_madd_epi16(u[14], k__cospi_p03_m29); v[31] = _mm_madd_epi16(u[15], k__cospi_p03_m29); u[0] = _mm_add_epi32(v[0], v[16]); u[1] = _mm_add_epi32(v[1], v[17]); u[2] = _mm_add_epi32(v[2], v[18]); u[3] = _mm_add_epi32(v[3], v[19]); u[4] = _mm_add_epi32(v[4], v[20]); u[5] = _mm_add_epi32(v[5], v[21]); u[6] = _mm_add_epi32(v[6], v[22]); u[7] = _mm_add_epi32(v[7], v[23]); u[8] = _mm_add_epi32(v[8], v[24]); u[9] = _mm_add_epi32(v[9], v[25]); u[10] = _mm_add_epi32(v[10], v[26]); u[11] = _mm_add_epi32(v[11], v[27]); u[12] = _mm_add_epi32(v[12], v[28]); u[13] = _mm_add_epi32(v[13], v[29]); u[14] = _mm_add_epi32(v[14], v[30]); u[15] = _mm_add_epi32(v[15], v[31]); u[16] = _mm_sub_epi32(v[0], v[16]); u[17] = _mm_sub_epi32(v[1], v[17]); u[18] = _mm_sub_epi32(v[2], v[18]); u[19] = _mm_sub_epi32(v[3], v[19]); u[20] = _mm_sub_epi32(v[4], v[20]); u[21] = _mm_sub_epi32(v[5], v[21]); u[22] = _mm_sub_epi32(v[6], v[22]); u[23] = _mm_sub_epi32(v[7], v[23]); u[24] = _mm_sub_epi32(v[8], v[24]); u[25] = _mm_sub_epi32(v[9], v[25]); u[26] = _mm_sub_epi32(v[10], v[26]); u[27] = _mm_sub_epi32(v[11], v[27]); u[28] = _mm_sub_epi32(v[12], v[28]); u[29] = _mm_sub_epi32(v[13], v[29]); u[30] = _mm_sub_epi32(v[14], v[30]); u[31] = _mm_sub_epi32(v[15], v[31]); u[0] = dct_const_round_shift_sse2(u[0]); u[1] = dct_const_round_shift_sse2(u[1]); u[2] = dct_const_round_shift_sse2(u[2]); u[3] = dct_const_round_shift_sse2(u[3]); u[4] = dct_const_round_shift_sse2(u[4]); u[5] = dct_const_round_shift_sse2(u[5]); u[6] = dct_const_round_shift_sse2(u[6]); u[7] = dct_const_round_shift_sse2(u[7]); u[8] = dct_const_round_shift_sse2(u[8]); u[9] = dct_const_round_shift_sse2(u[9]); u[10] = dct_const_round_shift_sse2(u[10]); u[11] = dct_const_round_shift_sse2(u[11]); u[12] = dct_const_round_shift_sse2(u[12]); u[13] = dct_const_round_shift_sse2(u[13]); u[14] = dct_const_round_shift_sse2(u[14]); u[15] = dct_const_round_shift_sse2(u[15]); u[16] = dct_const_round_shift_sse2(u[16]); u[17] = dct_const_round_shift_sse2(u[17]); u[18] = dct_const_round_shift_sse2(u[18]); u[19] = dct_const_round_shift_sse2(u[19]); u[20] = dct_const_round_shift_sse2(u[20]); u[21] = dct_const_round_shift_sse2(u[21]); u[22] = dct_const_round_shift_sse2(u[22]); u[23] = dct_const_round_shift_sse2(u[23]); u[24] = dct_const_round_shift_sse2(u[24]); u[25] = dct_const_round_shift_sse2(u[25]); u[26] = dct_const_round_shift_sse2(u[26]); u[27] = dct_const_round_shift_sse2(u[27]); u[28] = dct_const_round_shift_sse2(u[28]); u[29] = dct_const_round_shift_sse2(u[29]); u[30] = dct_const_round_shift_sse2(u[30]); u[31] = dct_const_round_shift_sse2(u[31]); s[0] = _mm_packs_epi32(u[0], u[1]); s[1] = _mm_packs_epi32(u[2], u[3]); s[2] = _mm_packs_epi32(u[4], u[5]); s[3] = _mm_packs_epi32(u[6], u[7]); s[4] = _mm_packs_epi32(u[8], u[9]); s[5] = _mm_packs_epi32(u[10], u[11]); s[6] = _mm_packs_epi32(u[12], u[13]); s[7] = _mm_packs_epi32(u[14], u[15]); s[8] = _mm_packs_epi32(u[16], u[17]); s[9] = _mm_packs_epi32(u[18], u[19]); s[10] = _mm_packs_epi32(u[20], u[21]); s[11] = _mm_packs_epi32(u[22], u[23]); s[12] = _mm_packs_epi32(u[24], u[25]); s[13] = _mm_packs_epi32(u[26], u[27]); s[14] = _mm_packs_epi32(u[28], u[29]); s[15] = _mm_packs_epi32(u[30], u[31]); // stage 2 u[0] = _mm_unpacklo_epi16(s[8], s[9]); u[1] = _mm_unpackhi_epi16(s[8], s[9]); u[2] = _mm_unpacklo_epi16(s[10], s[11]); u[3] = _mm_unpackhi_epi16(s[10], s[11]); u[4] = _mm_unpacklo_epi16(s[12], s[13]); u[5] = _mm_unpackhi_epi16(s[12], s[13]); u[6] = _mm_unpacklo_epi16(s[14], s[15]); u[7] = _mm_unpackhi_epi16(s[14], s[15]); v[0] = _mm_madd_epi16(u[0], k__cospi_p04_p28); v[1] = _mm_madd_epi16(u[1], k__cospi_p04_p28); v[2] = _mm_madd_epi16(u[0], k__cospi_p28_m04); v[3] = _mm_madd_epi16(u[1], k__cospi_p28_m04); v[4] = _mm_madd_epi16(u[2], k__cospi_p20_p12); v[5] = _mm_madd_epi16(u[3], k__cospi_p20_p12); v[6] = _mm_madd_epi16(u[2], k__cospi_p12_m20); v[7] = _mm_madd_epi16(u[3], k__cospi_p12_m20); v[8] = _mm_madd_epi16(u[4], k__cospi_m28_p04); v[9] = _mm_madd_epi16(u[5], k__cospi_m28_p04); v[10] = _mm_madd_epi16(u[4], k__cospi_p04_p28); v[11] = _mm_madd_epi16(u[5], k__cospi_p04_p28); v[12] = _mm_madd_epi16(u[6], k__cospi_m12_p20); v[13] = _mm_madd_epi16(u[7], k__cospi_m12_p20); v[14] = _mm_madd_epi16(u[6], k__cospi_p20_p12); v[15] = _mm_madd_epi16(u[7], k__cospi_p20_p12); u[0] = _mm_add_epi32(v[0], v[8]); u[1] = _mm_add_epi32(v[1], v[9]); u[2] = _mm_add_epi32(v[2], v[10]); u[3] = _mm_add_epi32(v[3], v[11]); u[4] = _mm_add_epi32(v[4], v[12]); u[5] = _mm_add_epi32(v[5], v[13]); u[6] = _mm_add_epi32(v[6], v[14]); u[7] = _mm_add_epi32(v[7], v[15]); u[8] = _mm_sub_epi32(v[0], v[8]); u[9] = _mm_sub_epi32(v[1], v[9]); u[10] = _mm_sub_epi32(v[2], v[10]); u[11] = _mm_sub_epi32(v[3], v[11]); u[12] = _mm_sub_epi32(v[4], v[12]); u[13] = _mm_sub_epi32(v[5], v[13]); u[14] = _mm_sub_epi32(v[6], v[14]); u[15] = _mm_sub_epi32(v[7], v[15]); u[0] = dct_const_round_shift_sse2(u[0]); u[1] = dct_const_round_shift_sse2(u[1]); u[2] = dct_const_round_shift_sse2(u[2]); u[3] = dct_const_round_shift_sse2(u[3]); u[4] = dct_const_round_shift_sse2(u[4]); u[5] = dct_const_round_shift_sse2(u[5]); u[6] = dct_const_round_shift_sse2(u[6]); u[7] = dct_const_round_shift_sse2(u[7]); u[8] = dct_const_round_shift_sse2(u[8]); u[9] = dct_const_round_shift_sse2(u[9]); u[10] = dct_const_round_shift_sse2(u[10]); u[11] = dct_const_round_shift_sse2(u[11]); u[12] = dct_const_round_shift_sse2(u[12]); u[13] = dct_const_round_shift_sse2(u[13]); u[14] = dct_const_round_shift_sse2(u[14]); u[15] = dct_const_round_shift_sse2(u[15]); x[0] = _mm_add_epi16(s[0], s[4]); x[1] = _mm_add_epi16(s[1], s[5]); x[2] = _mm_add_epi16(s[2], s[6]); x[3] = _mm_add_epi16(s[3], s[7]); x[4] = _mm_sub_epi16(s[0], s[4]); x[5] = _mm_sub_epi16(s[1], s[5]); x[6] = _mm_sub_epi16(s[2], s[6]); x[7] = _mm_sub_epi16(s[3], s[7]); x[8] = _mm_packs_epi32(u[0], u[1]); x[9] = _mm_packs_epi32(u[2], u[3]); x[10] = _mm_packs_epi32(u[4], u[5]); x[11] = _mm_packs_epi32(u[6], u[7]); x[12] = _mm_packs_epi32(u[8], u[9]); x[13] = _mm_packs_epi32(u[10], u[11]); x[14] = _mm_packs_epi32(u[12], u[13]); x[15] = _mm_packs_epi32(u[14], u[15]); // stage 3 u[0] = _mm_unpacklo_epi16(x[4], x[5]); u[1] = _mm_unpackhi_epi16(x[4], x[5]); u[2] = _mm_unpacklo_epi16(x[6], x[7]); u[3] = _mm_unpackhi_epi16(x[6], x[7]); u[4] = _mm_unpacklo_epi16(x[12], x[13]); u[5] = _mm_unpackhi_epi16(x[12], x[13]); u[6] = _mm_unpacklo_epi16(x[14], x[15]); u[7] = _mm_unpackhi_epi16(x[14], x[15]); v[0] = _mm_madd_epi16(u[0], k__cospi_p08_p24); v[1] = _mm_madd_epi16(u[1], k__cospi_p08_p24); v[2] = _mm_madd_epi16(u[0], k__cospi_p24_m08); v[3] = _mm_madd_epi16(u[1], k__cospi_p24_m08); v[4] = _mm_madd_epi16(u[2], k__cospi_m24_p08); v[5] = _mm_madd_epi16(u[3], k__cospi_m24_p08); v[6] = _mm_madd_epi16(u[2], k__cospi_p08_p24); v[7] = _mm_madd_epi16(u[3], k__cospi_p08_p24); v[8] = _mm_madd_epi16(u[4], k__cospi_p08_p24); v[9] = _mm_madd_epi16(u[5], k__cospi_p08_p24); v[10] = _mm_madd_epi16(u[4], k__cospi_p24_m08); v[11] = _mm_madd_epi16(u[5], k__cospi_p24_m08); v[12] = _mm_madd_epi16(u[6], k__cospi_m24_p08); v[13] = _mm_madd_epi16(u[7], k__cospi_m24_p08); v[14] = _mm_madd_epi16(u[6], k__cospi_p08_p24); v[15] = _mm_madd_epi16(u[7], k__cospi_p08_p24); u[0] = _mm_add_epi32(v[0], v[4]); u[1] = _mm_add_epi32(v[1], v[5]); u[2] = _mm_add_epi32(v[2], v[6]); u[3] = _mm_add_epi32(v[3], v[7]); u[4] = _mm_sub_epi32(v[0], v[4]); u[5] = _mm_sub_epi32(v[1], v[5]); u[6] = _mm_sub_epi32(v[2], v[6]); u[7] = _mm_sub_epi32(v[3], v[7]); u[8] = _mm_add_epi32(v[8], v[12]); u[9] = _mm_add_epi32(v[9], v[13]); u[10] = _mm_add_epi32(v[10], v[14]); u[11] = _mm_add_epi32(v[11], v[15]); u[12] = _mm_sub_epi32(v[8], v[12]); u[13] = _mm_sub_epi32(v[9], v[13]); u[14] = _mm_sub_epi32(v[10], v[14]); u[15] = _mm_sub_epi32(v[11], v[15]); v[0] = dct_const_round_shift_sse2(u[0]); v[1] = dct_const_round_shift_sse2(u[1]); v[2] = dct_const_round_shift_sse2(u[2]); v[3] = dct_const_round_shift_sse2(u[3]); v[4] = dct_const_round_shift_sse2(u[4]); v[5] = dct_const_round_shift_sse2(u[5]); v[6] = dct_const_round_shift_sse2(u[6]); v[7] = dct_const_round_shift_sse2(u[7]); v[8] = dct_const_round_shift_sse2(u[8]); v[9] = dct_const_round_shift_sse2(u[9]); v[10] = dct_const_round_shift_sse2(u[10]); v[11] = dct_const_round_shift_sse2(u[11]); v[12] = dct_const_round_shift_sse2(u[12]); v[13] = dct_const_round_shift_sse2(u[13]); v[14] = dct_const_round_shift_sse2(u[14]); v[15] = dct_const_round_shift_sse2(u[15]); s[0] = _mm_add_epi16(x[0], x[2]); s[1] = _mm_add_epi16(x[1], x[3]); s[2] = _mm_sub_epi16(x[0], x[2]); s[3] = _mm_sub_epi16(x[1], x[3]); s[4] = _mm_packs_epi32(v[0], v[1]); s[5] = _mm_packs_epi32(v[2], v[3]); s[6] = _mm_packs_epi32(v[4], v[5]); s[7] = _mm_packs_epi32(v[6], v[7]); s[8] = _mm_add_epi16(x[8], x[10]); s[9] = _mm_add_epi16(x[9], x[11]); s[10] = _mm_sub_epi16(x[8], x[10]); s[11] = _mm_sub_epi16(x[9], x[11]); s[12] = _mm_packs_epi32(v[8], v[9]); s[13] = _mm_packs_epi32(v[10], v[11]); s[14] = _mm_packs_epi32(v[12], v[13]); s[15] = _mm_packs_epi32(v[14], v[15]); // stage 4 u[0] = _mm_unpacklo_epi16(s[2], s[3]); u[1] = _mm_unpackhi_epi16(s[2], s[3]); u[2] = _mm_unpacklo_epi16(s[6], s[7]); u[3] = _mm_unpackhi_epi16(s[6], s[7]); u[4] = _mm_unpacklo_epi16(s[10], s[11]); u[5] = _mm_unpackhi_epi16(s[10], s[11]); u[6] = _mm_unpacklo_epi16(s[14], s[15]); u[7] = _mm_unpackhi_epi16(s[14], s[15]); in[7] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_m16_m16); in[8] = idct_calc_wraplow_sse2(u[0], u[1], k__cospi_p16_m16); in[4] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_p16_p16); in[11] = idct_calc_wraplow_sse2(u[2], u[3], k__cospi_m16_p16); in[6] = idct_calc_wraplow_sse2(u[4], u[5], k__cospi_p16_p16); in[9] = idct_calc_wraplow_sse2(u[4], u[5], k__cospi_m16_p16); in[5] = idct_calc_wraplow_sse2(u[6], u[7], k__cospi_m16_m16); in[10] = idct_calc_wraplow_sse2(u[6], u[7], k__cospi_p16_m16); in[0] = s[0]; in[1] = _mm_sub_epi16(kZero, s[8]); in[2] = s[12]; in[3] = _mm_sub_epi16(kZero, s[4]); in[12] = s[5]; in[13] = _mm_sub_epi16(kZero, s[13]); in[14] = s[9]; in[15] = _mm_sub_epi16(kZero, s[1]); } void idct16_sse2(__m128i *const in0, __m128i *const in1) { transpose_16bit_16x16(in0, in1); idct16_8col(in0, in0); idct16_8col(in1, in1); } void iadst16_sse2(__m128i *const in0, __m128i *const in1) { transpose_16bit_16x16(in0, in1); vpx_iadst16_8col_sse2(in0); vpx_iadst16_8col_sse2(in1); } // Group the coefficient calculation into smaller functions to prevent stack // spillover in 32x32 idct optimizations: // quarter_1: 0-7 // quarter_2: 8-15 // quarter_3_4: 16-23, 24-31 // For each 8x32 block __m128i in[32], // Input with index, 0, 4 // output pixels: 0-7 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_1(const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { const __m128i zero = _mm_setzero_si128(); __m128i step1[8], step2[8]; // stage 3 butterfly(in[4], zero, cospi_28_64, cospi_4_64, &step1[4], &step1[7]); // stage 4 step2[0] = butterfly_cospi16(in[0]); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; // stage 5 step1[0] = step2[0]; step1[1] = step2[0]; step1[2] = step2[0]; step1[3] = step2[0]; step1[4] = step2[4]; butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi16(step1[0], step1[7]); out[1] = _mm_add_epi16(step1[1], step1[6]); out[2] = _mm_add_epi16(step1[2], step1[5]); out[3] = _mm_add_epi16(step1[3], step1[4]); out[4] = _mm_sub_epi16(step1[3], step1[4]); out[5] = _mm_sub_epi16(step1[2], step1[5]); out[6] = _mm_sub_epi16(step1[1], step1[6]); out[7] = _mm_sub_epi16(step1[0], step1[7]); } // For each 8x32 block __m128i in[32], // Input with index, 2, 6 // output pixels: 8-15 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_2(const __m128i *const in /*in[32]*/, __m128i *const out /*out[16]*/) { const __m128i zero = _mm_setzero_si128(); __m128i step1[16], step2[16]; // stage 2 butterfly(in[2], zero, cospi_30_64, cospi_2_64, &step2[8], &step2[15]); butterfly(zero, in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = step2[8]; step1[9] = step2[8]; step1[14] = step2[15]; step1[15] = step2[15]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; idct32_8x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void idct32_34_8x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; idct32_34_8x32_quarter_1(in, temp); idct32_34_8x32_quarter_2(in, temp); // stage 7 add_sub_butterfly(temp, out, 16); } // For each 8x32 block __m128i in[32], // Input with odd index, 1, 3, 5, 7 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { const __m128i zero = _mm_setzero_si128(); __m128i step1[32]; // stage 1 butterfly(in[1], zero, cospi_31_64, cospi_1_64, &step1[16], &step1[31]); butterfly(zero, in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); butterfly(in[5], zero, cospi_27_64, cospi_5_64, &step1[20], &step1[27]); butterfly(zero, in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 3 butterfly(step1[31], step1[16], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); butterfly(step1[28], step1[19], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); butterfly(step1[27], step1[20], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); butterfly(step1[24], step1[23], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); idct32_8x32_quarter_3_4_stage_4_to_7(step1, out); } void idct32_34_8x32_sse2(const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[32]; idct32_34_8x32_quarter_1_2(in, temp); idct32_34_8x32_quarter_3_4(in, temp); // final stage add_sub_butterfly(temp, out, 32); } // Only upper-left 8x8 has non-zero coeff void vpx_idct32x32_34_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i io[32], col[32]; int i; // Load input data. Only need to load the top left 8x8 block. load_transpose_16bit_8x8(input, 32, io); idct32_34_8x32_sse2(io, col); for (i = 0; i < 32; i += 8) { int j; transpose_16bit_8x8(col + i, io); idct32_34_8x32_sse2(io, io); for (j = 0; j < 32; ++j) { write_buffer_8x1(dest + j * stride, io[j]); } dest += 8; } } // For each 8x32 block __m128i in[32], // Input with index, 0, 4, 8, 12, 16, 20, 24, 28 // output pixels: 0-7 in __m128i out[32] static INLINE void idct32_1024_8x32_quarter_1( const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 butterfly(in[4], in[28], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); butterfly(in[20], in[12], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 4 butterfly(in[0], in[16], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); butterfly(in[8], in[24], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi16(step1[4], step1[5]); step2[5] = _mm_sub_epi16(step1[4], step1[5]); step2[6] = _mm_sub_epi16(step1[7], step1[6]); step2[7] = _mm_add_epi16(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi16(step2[0], step2[3]); step1[1] = _mm_add_epi16(step2[1], step2[2]); step1[2] = _mm_sub_epi16(step2[1], step2[2]); step1[3] = _mm_sub_epi16(step2[0], step2[3]); step1[4] = step2[4]; butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi16(step1[0], step1[7]); out[1] = _mm_add_epi16(step1[1], step1[6]); out[2] = _mm_add_epi16(step1[2], step1[5]); out[3] = _mm_add_epi16(step1[3], step1[4]); out[4] = _mm_sub_epi16(step1[3], step1[4]); out[5] = _mm_sub_epi16(step1[2], step1[5]); out[6] = _mm_sub_epi16(step1[1], step1[6]); out[7] = _mm_sub_epi16(step1[0], step1[7]); } // For each 8x32 block __m128i in[32], // Input with index, 2, 6, 10, 14, 18, 22, 26, 30 // output pixels: 8-15 in __m128i out[32] static INLINE void idct32_1024_8x32_quarter_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[16]*/) { __m128i step1[16], step2[16]; // stage 2 butterfly(in[2], in[30], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); butterfly(in[18], in[14], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); butterfly(in[10], in[22], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); butterfly(in[26], in[6], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi16(step2[8], step2[9]); step1[9] = _mm_sub_epi16(step2[8], step2[9]); step1[10] = _mm_sub_epi16(step2[11], step2[10]); step1[11] = _mm_add_epi16(step2[11], step2[10]); step1[12] = _mm_add_epi16(step2[12], step2[13]); step1[13] = _mm_sub_epi16(step2[12], step2[13]); step1[14] = _mm_sub_epi16(step2[15], step2[14]); step1[15] = _mm_add_epi16(step2[15], step2[14]); idct32_8x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void idct32_1024_8x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; idct32_1024_8x32_quarter_1(in, temp); idct32_1024_8x32_quarter_2(in, temp); // stage 7 add_sub_butterfly(temp, out, 16); } // For each 8x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void idct32_1024_8x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 butterfly(in[1], in[31], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); butterfly(in[17], in[15], cospi_15_64, cospi_17_64, &step1[17], &step1[30]); butterfly(in[9], in[23], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); butterfly(in[25], in[7], cospi_7_64, cospi_25_64, &step1[19], &step1[28]); butterfly(in[5], in[27], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); butterfly(in[21], in[11], cospi_11_64, cospi_21_64, &step1[21], &step1[26]); butterfly(in[13], in[19], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); butterfly(in[29], in[3], cospi_3_64, cospi_29_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi16(step1[16], step1[17]); step2[17] = _mm_sub_epi16(step1[16], step1[17]); step2[18] = _mm_sub_epi16(step1[19], step1[18]); step2[19] = _mm_add_epi16(step1[19], step1[18]); step2[20] = _mm_add_epi16(step1[20], step1[21]); step2[21] = _mm_sub_epi16(step1[20], step1[21]); step2[22] = _mm_sub_epi16(step1[23], step1[22]); step2[23] = _mm_add_epi16(step1[23], step1[22]); step2[24] = _mm_add_epi16(step1[24], step1[25]); step2[25] = _mm_sub_epi16(step1[24], step1[25]); step2[26] = _mm_sub_epi16(step1[27], step1[26]); step2[27] = _mm_add_epi16(step1[27], step1[26]); step2[28] = _mm_add_epi16(step1[28], step1[29]); step2[29] = _mm_sub_epi16(step1[28], step1[29]); step2[30] = _mm_sub_epi16(step1[31], step1[30]); step2[31] = _mm_add_epi16(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; butterfly(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); butterfly(step2[29], step2[18], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); step1[19] = step2[19]; step1[20] = step2[20]; butterfly(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); butterfly(step2[25], step2[22], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; idct32_8x32_quarter_3_4_stage_4_to_7(step1, out); } void idct32_1024_8x32(const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[32]; idct32_1024_8x32_quarter_1_2(in, temp); idct32_1024_8x32_quarter_3_4(in, temp); // final stage add_sub_butterfly(temp, out, 32); } void vpx_idct32x32_1024_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i col[4][32], io[32]; int i; // rows for (i = 0; i < 4; i++) { load_transpose_16bit_8x8(&input[0], 32, &io[0]); load_transpose_16bit_8x8(&input[8], 32, &io[8]); load_transpose_16bit_8x8(&input[16], 32, &io[16]); load_transpose_16bit_8x8(&input[24], 32, &io[24]); idct32_1024_8x32(io, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { // Transpose 32x8 block to 8x32 block transpose_16bit_8x8(col[0] + i, io); transpose_16bit_8x8(col[1] + i, io + 8); transpose_16bit_8x8(col[2] + i, io + 16); transpose_16bit_8x8(col[3] + i, io + 24); idct32_1024_8x32(io, io); store_buffer_8x32(io, dest, stride); dest += 8; } } void vpx_idct32x32_135_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i col[2][32], in[32], out[32]; int i; for (i = 16; i < 32; i++) { in[i] = _mm_setzero_si128(); } // rows for (i = 0; i < 2; i++) { load_transpose_16bit_8x8(&input[0], 32, &in[0]); load_transpose_16bit_8x8(&input[8], 32, &in[8]); idct32_1024_8x32(in, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col[0] + i, in); transpose_16bit_8x8(col[1] + i, in + 8); idct32_1024_8x32(in, out); store_buffer_8x32(out, dest, stride); dest += 8; } } void vpx_idct32x32_1_add_sse2(const tran_low_t *input, uint8_t *dest, int stride) { __m128i dc_value; int j; tran_high_t a1; tran_low_t out = WRAPLOW(dct_const_round_shift((int16_t)input[0] * cospi_16_64)); out = WRAPLOW(dct_const_round_shift(out * cospi_16_64)); a1 = ROUND_POWER_OF_TWO(out, 6); dc_value = _mm_set1_epi16((int16_t)a1); for (j = 0; j < 32; ++j) { recon_and_store_16(dest + j * stride + 0, dc_value); recon_and_store_16(dest + j * stride + 16, dc_value); } } libvpx-1.8.2/vpx_dsp/x86/inv_txfm_sse2.h000066400000000000000000000704061357355204000201100ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ #define VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ #include // SSE2 #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/inv_txfm.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void idct8x8_12_transpose_16bit_4x8(const __m128i *const in, __m128i *const out) { // Unpack 16 bit elements. Goes from: // in[0]: 30 31 32 33 00 01 02 03 // in[1]: 20 21 22 23 10 11 12 13 // in[2]: 40 41 42 43 70 71 72 73 // in[3]: 50 51 52 53 60 61 62 63 // to: // tr0_0: 00 10 01 11 02 12 03 13 // tr0_1: 20 30 21 31 22 32 23 33 // tr0_2: 40 50 41 51 42 52 43 53 // tr0_3: 60 70 61 71 62 72 63 73 const __m128i tr0_0 = _mm_unpackhi_epi16(in[0], in[1]); const __m128i tr0_1 = _mm_unpacklo_epi16(in[1], in[0]); const __m128i tr0_2 = _mm_unpacklo_epi16(in[2], in[3]); const __m128i tr0_3 = _mm_unpackhi_epi16(in[3], in[2]); // Unpack 32 bit elements resulting in: // tr1_0: 00 10 20 30 01 11 21 31 // tr1_1: 02 12 22 32 03 13 23 33 // tr1_2: 40 50 60 70 41 51 61 71 // tr1_3: 42 52 62 72 43 53 63 73 const __m128i tr1_0 = _mm_unpacklo_epi32(tr0_0, tr0_1); const __m128i tr1_1 = _mm_unpacklo_epi32(tr0_2, tr0_3); const __m128i tr1_2 = _mm_unpackhi_epi32(tr0_0, tr0_1); const __m128i tr1_3 = _mm_unpackhi_epi32(tr0_2, tr0_3); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 40 50 60 70 // out[1]: 01 11 21 31 41 51 61 71 // out[2]: 02 12 22 32 42 52 62 72 // out[3]: 03 13 23 33 43 53 63 73 out[0] = _mm_unpacklo_epi64(tr1_0, tr1_1); out[1] = _mm_unpackhi_epi64(tr1_0, tr1_1); out[2] = _mm_unpacklo_epi64(tr1_2, tr1_3); out[3] = _mm_unpackhi_epi64(tr1_2, tr1_3); } static INLINE __m128i dct_const_round_shift_sse2(const __m128i in) { const __m128i t = _mm_add_epi32(in, _mm_set1_epi32(DCT_CONST_ROUNDING)); return _mm_srai_epi32(t, DCT_CONST_BITS); } static INLINE __m128i idct_madd_round_shift_sse2(const __m128i in, const __m128i cospi) { const __m128i t = _mm_madd_epi16(in, cospi); return dct_const_round_shift_sse2(t); } // Calculate the dot product between in0/1 and x and wrap to short. static INLINE __m128i idct_calc_wraplow_sse2(const __m128i in0, const __m128i in1, const __m128i x) { const __m128i t0 = idct_madd_round_shift_sse2(in0, x); const __m128i t1 = idct_madd_round_shift_sse2(in1, x); return _mm_packs_epi32(t0, t1); } // Multiply elements by constants and add them together. static INLINE void butterfly(const __m128i in0, const __m128i in1, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { const __m128i cst0 = pair_set_epi16(c0, -c1); const __m128i cst1 = pair_set_epi16(c1, c0); const __m128i lo = _mm_unpacklo_epi16(in0, in1); const __m128i hi = _mm_unpackhi_epi16(in0, in1); *out0 = idct_calc_wraplow_sse2(lo, hi, cst0); *out1 = idct_calc_wraplow_sse2(lo, hi, cst1); } static INLINE __m128i butterfly_cospi16(const __m128i in) { const __m128i cst = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i lo = _mm_unpacklo_epi16(in, _mm_setzero_si128()); const __m128i hi = _mm_unpackhi_epi16(in, _mm_setzero_si128()); return idct_calc_wraplow_sse2(lo, hi, cst); } // Functions to allow 8 bit optimisations to be used when profile 0 is used with // highbitdepth enabled static INLINE __m128i load_input_data4(const tran_low_t *data) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i zero = _mm_setzero_si128(); const __m128i in = _mm_load_si128((const __m128i *)data); return _mm_packs_epi32(in, zero); #else return _mm_loadl_epi64((const __m128i *)data); #endif } static INLINE __m128i load_input_data8(const tran_low_t *data) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i in0 = _mm_load_si128((const __m128i *)data); const __m128i in1 = _mm_load_si128((const __m128i *)(data + 4)); return _mm_packs_epi32(in0, in1); #else return _mm_load_si128((const __m128i *)data); #endif } static INLINE void load_transpose_16bit_8x8(const tran_low_t *input, const int stride, __m128i *const in) { in[0] = load_input_data8(input + 0 * stride); in[1] = load_input_data8(input + 1 * stride); in[2] = load_input_data8(input + 2 * stride); in[3] = load_input_data8(input + 3 * stride); in[4] = load_input_data8(input + 4 * stride); in[5] = load_input_data8(input + 5 * stride); in[6] = load_input_data8(input + 6 * stride); in[7] = load_input_data8(input + 7 * stride); transpose_16bit_8x8(in, in); } static INLINE void recon_and_store(uint8_t *const dest, const __m128i in_x) { const __m128i zero = _mm_setzero_si128(); __m128i d0 = _mm_loadl_epi64((__m128i *)(dest)); d0 = _mm_unpacklo_epi8(d0, zero); d0 = _mm_add_epi16(in_x, d0); d0 = _mm_packus_epi16(d0, d0); _mm_storel_epi64((__m128i *)(dest), d0); } static INLINE void round_shift_8x8(const __m128i *const in, __m128i *const out) { const __m128i final_rounding = _mm_set1_epi16(1 << 4); out[0] = _mm_add_epi16(in[0], final_rounding); out[1] = _mm_add_epi16(in[1], final_rounding); out[2] = _mm_add_epi16(in[2], final_rounding); out[3] = _mm_add_epi16(in[3], final_rounding); out[4] = _mm_add_epi16(in[4], final_rounding); out[5] = _mm_add_epi16(in[5], final_rounding); out[6] = _mm_add_epi16(in[6], final_rounding); out[7] = _mm_add_epi16(in[7], final_rounding); out[0] = _mm_srai_epi16(out[0], 5); out[1] = _mm_srai_epi16(out[1], 5); out[2] = _mm_srai_epi16(out[2], 5); out[3] = _mm_srai_epi16(out[3], 5); out[4] = _mm_srai_epi16(out[4], 5); out[5] = _mm_srai_epi16(out[5], 5); out[6] = _mm_srai_epi16(out[6], 5); out[7] = _mm_srai_epi16(out[7], 5); } static INLINE void write_buffer_8x8(const __m128i *const in, uint8_t *const dest, const int stride) { __m128i t[8]; round_shift_8x8(in, t); recon_and_store(dest + 0 * stride, t[0]); recon_and_store(dest + 1 * stride, t[1]); recon_and_store(dest + 2 * stride, t[2]); recon_and_store(dest + 3 * stride, t[3]); recon_and_store(dest + 4 * stride, t[4]); recon_and_store(dest + 5 * stride, t[5]); recon_and_store(dest + 6 * stride, t[6]); recon_and_store(dest + 7 * stride, t[7]); } static INLINE void recon_and_store4x4_sse2(const __m128i *const in, uint8_t *const dest, const int stride) { const __m128i zero = _mm_setzero_si128(); __m128i d[2]; // Reconstruction and Store d[0] = _mm_cvtsi32_si128(*(const int *)(dest)); d[1] = _mm_cvtsi32_si128(*(const int *)(dest + stride * 3)); d[0] = _mm_unpacklo_epi32(d[0], _mm_cvtsi32_si128(*(const int *)(dest + stride))); d[1] = _mm_unpacklo_epi32( _mm_cvtsi32_si128(*(const int *)(dest + stride * 2)), d[1]); d[0] = _mm_unpacklo_epi8(d[0], zero); d[1] = _mm_unpacklo_epi8(d[1], zero); d[0] = _mm_add_epi16(d[0], in[0]); d[1] = _mm_add_epi16(d[1], in[1]); d[0] = _mm_packus_epi16(d[0], d[1]); *(int *)dest = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride) = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride * 2) = _mm_cvtsi128_si32(d[0]); d[0] = _mm_srli_si128(d[0], 4); *(int *)(dest + stride * 3) = _mm_cvtsi128_si32(d[0]); } static INLINE void store_buffer_8x32(__m128i *in, uint8_t *dst, int stride) { const __m128i final_rounding = _mm_set1_epi16(1 << 5); int j = 0; while (j < 32) { in[j] = _mm_adds_epi16(in[j], final_rounding); in[j + 1] = _mm_adds_epi16(in[j + 1], final_rounding); in[j] = _mm_srai_epi16(in[j], 6); in[j + 1] = _mm_srai_epi16(in[j + 1], 6); recon_and_store(dst, in[j]); dst += stride; recon_and_store(dst, in[j + 1]); dst += stride; j += 2; } } static INLINE void write_buffer_8x1(uint8_t *const dest, const __m128i in) { const __m128i final_rounding = _mm_set1_epi16(1 << 5); __m128i out; out = _mm_adds_epi16(in, final_rounding); out = _mm_srai_epi16(out, 6); recon_and_store(dest, out); } // Only do addition and subtraction butterfly, size = 16, 32 static INLINE void add_sub_butterfly(const __m128i *in, __m128i *out, int size) { int i = 0; const int num = size >> 1; const int bound = size - 1; while (i < num) { out[i] = _mm_add_epi16(in[i], in[bound - i]); out[bound - i] = _mm_sub_epi16(in[i], in[bound - i]); i++; } } static INLINE void idct8(const __m128i *const in /*in[8]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 1 butterfly(in[1], in[7], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); butterfly(in[5], in[3], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); // stage 2 butterfly(in[0], in[4], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); butterfly(in[2], in[6], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi16(step1[4], step1[5]); step2[5] = _mm_sub_epi16(step1[4], step1[5]); step2[6] = _mm_sub_epi16(step1[7], step1[6]); step2[7] = _mm_add_epi16(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi16(step2[0], step2[3]); step1[1] = _mm_add_epi16(step2[1], step2[2]); step1[2] = _mm_sub_epi16(step2[1], step2[2]); step1[3] = _mm_sub_epi16(step2[0], step2[3]); butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); // stage 4 out[0] = _mm_add_epi16(step1[0], step2[7]); out[1] = _mm_add_epi16(step1[1], step1[6]); out[2] = _mm_add_epi16(step1[2], step1[5]); out[3] = _mm_add_epi16(step1[3], step2[4]); out[4] = _mm_sub_epi16(step1[3], step2[4]); out[5] = _mm_sub_epi16(step1[2], step1[5]); out[6] = _mm_sub_epi16(step1[1], step1[6]); out[7] = _mm_sub_epi16(step1[0], step2[7]); } static INLINE void idct8x8_12_add_kernel_sse2(__m128i *const io /*io[8]*/) { const __m128i zero = _mm_setzero_si128(); const __m128i cp_16_16 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i cp_16_n16 = pair_set_epi16(cospi_16_64, -cospi_16_64); __m128i step1[8], step2[8], tmp[4]; transpose_16bit_4x4(io, io); // io[0]: 00 10 20 30 01 11 21 31 // io[1]: 02 12 22 32 03 13 23 33 // stage 1 { const __m128i cp_28_n4 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i cp_4_28 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i cp_n20_12 = pair_set_epi16(-cospi_20_64, cospi_12_64); const __m128i cp_12_20 = pair_set_epi16(cospi_12_64, cospi_20_64); const __m128i lo_1 = _mm_unpackhi_epi16(io[0], zero); const __m128i lo_3 = _mm_unpackhi_epi16(io[1], zero); step1[4] = idct_calc_wraplow_sse2(cp_28_n4, cp_4_28, lo_1); // step1 4&7 step1[5] = idct_calc_wraplow_sse2(cp_n20_12, cp_12_20, lo_3); // step1 5&6 } // stage 2 { const __m128i cp_24_n8 = pair_set_epi16(cospi_24_64, -cospi_8_64); const __m128i cp_8_24 = pair_set_epi16(cospi_8_64, cospi_24_64); const __m128i lo_0 = _mm_unpacklo_epi16(io[0], zero); const __m128i lo_2 = _mm_unpacklo_epi16(io[1], zero); const __m128i t = idct_madd_round_shift_sse2(cp_16_16, lo_0); step2[0] = _mm_packs_epi32(t, t); // step2 0&1 step2[2] = idct_calc_wraplow_sse2(cp_8_24, cp_24_n8, lo_2); // step2 3&2 step2[4] = _mm_add_epi16(step1[4], step1[5]); // step2 4&7 step2[5] = _mm_sub_epi16(step1[4], step1[5]); // step2 5&6 step2[6] = _mm_unpackhi_epi64(step2[5], zero); // step2 6 } // stage 3 { const __m128i lo_65 = _mm_unpacklo_epi16(step2[6], step2[5]); tmp[0] = _mm_add_epi16(step2[0], step2[2]); // step1 0&1 tmp[1] = _mm_sub_epi16(step2[0], step2[2]); // step1 3&2 step1[2] = _mm_unpackhi_epi64(tmp[1], tmp[0]); // step1 2&1 step1[3] = _mm_unpacklo_epi64(tmp[1], tmp[0]); // step1 3&0 step1[5] = idct_calc_wraplow_sse2(cp_16_n16, cp_16_16, lo_65); // step1 5&6 } // stage 4 tmp[0] = _mm_add_epi16(step1[3], step2[4]); // output 3&0 tmp[1] = _mm_add_epi16(step1[2], step1[5]); // output 2&1 tmp[2] = _mm_sub_epi16(step1[3], step2[4]); // output 4&7 tmp[3] = _mm_sub_epi16(step1[2], step1[5]); // output 5&6 idct8x8_12_transpose_16bit_4x8(tmp, io); io[4] = io[5] = io[6] = io[7] = zero; idct8(io, io); } static INLINE void idct16_8col(const __m128i *const in /*in[16]*/, __m128i *const out /*out[16]*/) { __m128i step1[16], step2[16]; // stage 2 butterfly(in[1], in[15], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); butterfly(in[9], in[7], cospi_14_64, cospi_18_64, &step2[9], &step2[14]); butterfly(in[5], in[11], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); butterfly(in[13], in[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 butterfly(in[2], in[14], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); butterfly(in[10], in[6], cospi_12_64, cospi_20_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi16(step2[8], step2[9]); step1[9] = _mm_sub_epi16(step2[8], step2[9]); step1[10] = _mm_sub_epi16(step2[11], step2[10]); step1[11] = _mm_add_epi16(step2[10], step2[11]); step1[12] = _mm_add_epi16(step2[12], step2[13]); step1[13] = _mm_sub_epi16(step2[12], step2[13]); step1[14] = _mm_sub_epi16(step2[15], step2[14]); step1[15] = _mm_add_epi16(step2[14], step2[15]); // stage 4 butterfly(in[0], in[8], cospi_16_64, cospi_16_64, &step2[1], &step2[0]); butterfly(in[4], in[12], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); butterfly(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); butterfly(step1[10], step1[13], -cospi_8_64, -cospi_24_64, &step2[13], &step2[10]); step2[5] = _mm_sub_epi16(step1[4], step1[5]); step1[4] = _mm_add_epi16(step1[4], step1[5]); step2[6] = _mm_sub_epi16(step1[7], step1[6]); step1[7] = _mm_add_epi16(step1[6], step1[7]); step2[8] = step1[8]; step2[11] = step1[11]; step2[12] = step1[12]; step2[15] = step1[15]; // stage 5 step1[0] = _mm_add_epi16(step2[0], step2[3]); step1[1] = _mm_add_epi16(step2[1], step2[2]); step1[2] = _mm_sub_epi16(step2[1], step2[2]); step1[3] = _mm_sub_epi16(step2[0], step2[3]); butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi16(step2[8], step2[11]); step1[9] = _mm_add_epi16(step2[9], step2[10]); step1[10] = _mm_sub_epi16(step2[9], step2[10]); step1[11] = _mm_sub_epi16(step2[8], step2[11]); step1[12] = _mm_sub_epi16(step2[15], step2[12]); step1[13] = _mm_sub_epi16(step2[14], step2[13]); step1[14] = _mm_add_epi16(step2[14], step2[13]); step1[15] = _mm_add_epi16(step2[15], step2[12]); // stage 6 step2[0] = _mm_add_epi16(step1[0], step1[7]); step2[1] = _mm_add_epi16(step1[1], step1[6]); step2[2] = _mm_add_epi16(step1[2], step1[5]); step2[3] = _mm_add_epi16(step1[3], step1[4]); step2[4] = _mm_sub_epi16(step1[3], step1[4]); step2[5] = _mm_sub_epi16(step1[2], step1[5]); step2[6] = _mm_sub_epi16(step1[1], step1[6]); step2[7] = _mm_sub_epi16(step1[0], step1[7]); butterfly(step1[13], step1[10], cospi_16_64, cospi_16_64, &step2[10], &step2[13]); butterfly(step1[12], step1[11], cospi_16_64, cospi_16_64, &step2[11], &step2[12]); // stage 7 out[0] = _mm_add_epi16(step2[0], step1[15]); out[1] = _mm_add_epi16(step2[1], step1[14]); out[2] = _mm_add_epi16(step2[2], step2[13]); out[3] = _mm_add_epi16(step2[3], step2[12]); out[4] = _mm_add_epi16(step2[4], step2[11]); out[5] = _mm_add_epi16(step2[5], step2[10]); out[6] = _mm_add_epi16(step2[6], step1[9]); out[7] = _mm_add_epi16(step2[7], step1[8]); out[8] = _mm_sub_epi16(step2[7], step1[8]); out[9] = _mm_sub_epi16(step2[6], step1[9]); out[10] = _mm_sub_epi16(step2[5], step2[10]); out[11] = _mm_sub_epi16(step2[4], step2[11]); out[12] = _mm_sub_epi16(step2[3], step2[12]); out[13] = _mm_sub_epi16(step2[2], step2[13]); out[14] = _mm_sub_epi16(step2[1], step1[14]); out[15] = _mm_sub_epi16(step2[0], step1[15]); } static INLINE void idct16x16_10_pass1(const __m128i *const input /*input[4]*/, __m128i *const output /*output[16]*/) { const __m128i zero = _mm_setzero_si128(); const __m128i k__cospi_p16_p16 = pair_set_epi16(cospi_16_64, cospi_16_64); const __m128i k__cospi_m16_p16 = pair_set_epi16(-cospi_16_64, cospi_16_64); __m128i step1[16], step2[16]; transpose_16bit_4x4(input, output); // stage 2 { const __m128i k__cospi_p30_m02 = pair_set_epi16(cospi_30_64, -cospi_2_64); const __m128i k__cospi_p02_p30 = pair_set_epi16(cospi_2_64, cospi_30_64); const __m128i k__cospi_p06_m26 = pair_set_epi16(cospi_6_64, -cospi_26_64); const __m128i k__cospi_p26_p06 = pair_set_epi16(cospi_26_64, cospi_6_64); const __m128i lo_1_15 = _mm_unpackhi_epi16(output[0], zero); const __m128i lo_13_3 = _mm_unpackhi_epi16(zero, output[1]); step2[8] = idct_calc_wraplow_sse2(k__cospi_p30_m02, k__cospi_p02_p30, lo_1_15); // step2 8&15 step2[11] = idct_calc_wraplow_sse2(k__cospi_p06_m26, k__cospi_p26_p06, lo_13_3); // step2 11&12 } // stage 3 { const __m128i k__cospi_p28_m04 = pair_set_epi16(cospi_28_64, -cospi_4_64); const __m128i k__cospi_p04_p28 = pair_set_epi16(cospi_4_64, cospi_28_64); const __m128i lo_2_14 = _mm_unpacklo_epi16(output[1], zero); step1[4] = idct_calc_wraplow_sse2(k__cospi_p28_m04, k__cospi_p04_p28, lo_2_14); // step1 4&7 step1[13] = _mm_unpackhi_epi64(step2[11], zero); step1[14] = _mm_unpackhi_epi64(step2[8], zero); } // stage 4 { const __m128i k__cospi_m08_p24 = pair_set_epi16(-cospi_8_64, cospi_24_64); const __m128i k__cospi_p24_p08 = pair_set_epi16(cospi_24_64, cospi_8_64); const __m128i k__cospi_m24_m08 = pair_set_epi16(-cospi_24_64, -cospi_8_64); const __m128i lo_0_8 = _mm_unpacklo_epi16(output[0], zero); const __m128i lo_9_14 = _mm_unpacklo_epi16(step2[8], step1[14]); const __m128i lo_10_13 = _mm_unpacklo_epi16(step2[11], step1[13]); const __m128i t = idct_madd_round_shift_sse2(lo_0_8, k__cospi_p16_p16); step1[0] = _mm_packs_epi32(t, t); // step2 0&1 step2[9] = idct_calc_wraplow_sse2(k__cospi_m08_p24, k__cospi_p24_p08, lo_9_14); // step2 9&14 step2[10] = idct_calc_wraplow_sse2(k__cospi_m24_m08, k__cospi_m08_p24, lo_10_13); // step2 10&13 step2[6] = _mm_unpackhi_epi64(step1[4], zero); } // stage 5 { const __m128i lo_5_6 = _mm_unpacklo_epi16(step1[4], step2[6]); step1[6] = idct_calc_wraplow_sse2(k__cospi_p16_p16, k__cospi_m16_p16, lo_5_6); // step1 6&5 step1[8] = _mm_add_epi16(step2[8], step2[11]); step1[9] = _mm_add_epi16(step2[9], step2[10]); step1[10] = _mm_sub_epi16(step2[9], step2[10]); step1[11] = _mm_sub_epi16(step2[8], step2[11]); step1[12] = _mm_unpackhi_epi64(step1[11], zero); step1[13] = _mm_unpackhi_epi64(step1[10], zero); step1[14] = _mm_unpackhi_epi64(step1[9], zero); step1[15] = _mm_unpackhi_epi64(step1[8], zero); } // stage 6 { const __m128i lo_10_13 = _mm_unpacklo_epi16(step1[10], step1[13]); const __m128i lo_11_12 = _mm_unpacklo_epi16(step1[11], step1[12]); step2[10] = idct_calc_wraplow_sse2(k__cospi_m16_p16, k__cospi_p16_p16, lo_10_13); // step2 10&13 step2[11] = idct_calc_wraplow_sse2(k__cospi_m16_p16, k__cospi_p16_p16, lo_11_12); // step2 11&12 step2[13] = _mm_unpackhi_epi64(step2[10], zero); step2[12] = _mm_unpackhi_epi64(step2[11], zero); step2[3] = _mm_add_epi16(step1[0], step1[4]); step2[1] = _mm_add_epi16(step1[0], step1[6]); step2[6] = _mm_sub_epi16(step1[0], step1[6]); step2[4] = _mm_sub_epi16(step1[0], step1[4]); step2[0] = _mm_unpackhi_epi64(step2[3], zero); step2[2] = _mm_unpackhi_epi64(step2[1], zero); step2[5] = _mm_unpackhi_epi64(step2[6], zero); step2[7] = _mm_unpackhi_epi64(step2[4], zero); } // stage 7. Left 8x16 only. output[0] = _mm_add_epi16(step2[0], step1[15]); output[1] = _mm_add_epi16(step2[1], step1[14]); output[2] = _mm_add_epi16(step2[2], step2[13]); output[3] = _mm_add_epi16(step2[3], step2[12]); output[4] = _mm_add_epi16(step2[4], step2[11]); output[5] = _mm_add_epi16(step2[5], step2[10]); output[6] = _mm_add_epi16(step2[6], step1[9]); output[7] = _mm_add_epi16(step2[7], step1[8]); output[8] = _mm_sub_epi16(step2[7], step1[8]); output[9] = _mm_sub_epi16(step2[6], step1[9]); output[10] = _mm_sub_epi16(step2[5], step2[10]); output[11] = _mm_sub_epi16(step2[4], step2[11]); output[12] = _mm_sub_epi16(step2[3], step2[12]); output[13] = _mm_sub_epi16(step2[2], step2[13]); output[14] = _mm_sub_epi16(step2[1], step1[14]); output[15] = _mm_sub_epi16(step2[0], step1[15]); } static INLINE void idct16x16_10_pass2(__m128i *const l /*l[8]*/, __m128i *const io /*io[16]*/) { const __m128i zero = _mm_setzero_si128(); __m128i step1[16], step2[16]; transpose_16bit_4x8(l, io); // stage 2 butterfly(io[1], zero, cospi_30_64, cospi_2_64, &step2[8], &step2[15]); butterfly(zero, io[3], cospi_6_64, cospi_26_64, &step2[11], &step2[12]); // stage 3 butterfly(io[2], zero, cospi_28_64, cospi_4_64, &step1[4], &step1[7]); // stage 4 step1[0] = butterfly_cospi16(io[0]); butterfly(step2[15], step2[8], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); butterfly(step2[11], step2[12], -cospi_8_64, -cospi_24_64, &step2[13], &step2[10]); // stage 5 butterfly(step1[7], step1[4], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[8] = _mm_add_epi16(step2[8], step2[11]); step1[9] = _mm_add_epi16(step2[9], step2[10]); step1[10] = _mm_sub_epi16(step2[9], step2[10]); step1[11] = _mm_sub_epi16(step2[8], step2[11]); step1[12] = _mm_sub_epi16(step2[15], step2[12]); step1[13] = _mm_sub_epi16(step2[14], step2[13]); step1[14] = _mm_add_epi16(step2[14], step2[13]); step1[15] = _mm_add_epi16(step2[15], step2[12]); // stage 6 step2[0] = _mm_add_epi16(step1[0], step1[7]); step2[1] = _mm_add_epi16(step1[0], step1[6]); step2[2] = _mm_add_epi16(step1[0], step1[5]); step2[3] = _mm_add_epi16(step1[0], step1[4]); step2[4] = _mm_sub_epi16(step1[0], step1[4]); step2[5] = _mm_sub_epi16(step1[0], step1[5]); step2[6] = _mm_sub_epi16(step1[0], step1[6]); step2[7] = _mm_sub_epi16(step1[0], step1[7]); butterfly(step1[13], step1[10], cospi_16_64, cospi_16_64, &step2[10], &step2[13]); butterfly(step1[12], step1[11], cospi_16_64, cospi_16_64, &step2[11], &step2[12]); // stage 7 io[0] = _mm_add_epi16(step2[0], step1[15]); io[1] = _mm_add_epi16(step2[1], step1[14]); io[2] = _mm_add_epi16(step2[2], step2[13]); io[3] = _mm_add_epi16(step2[3], step2[12]); io[4] = _mm_add_epi16(step2[4], step2[11]); io[5] = _mm_add_epi16(step2[5], step2[10]); io[6] = _mm_add_epi16(step2[6], step1[9]); io[7] = _mm_add_epi16(step2[7], step1[8]); io[8] = _mm_sub_epi16(step2[7], step1[8]); io[9] = _mm_sub_epi16(step2[6], step1[9]); io[10] = _mm_sub_epi16(step2[5], step2[10]); io[11] = _mm_sub_epi16(step2[4], step2[11]); io[12] = _mm_sub_epi16(step2[3], step2[12]); io[13] = _mm_sub_epi16(step2[2], step2[13]); io[14] = _mm_sub_epi16(step2[1], step1[14]); io[15] = _mm_sub_epi16(step2[0], step1[15]); } static INLINE void idct32_8x32_quarter_2_stage_4_to_6( __m128i *const step1 /*step1[16]*/, __m128i *const out /*out[16]*/) { __m128i step2[32]; // stage 4 step2[8] = step1[8]; step2[15] = step1[15]; butterfly(step1[14], step1[9], cospi_24_64, cospi_8_64, &step2[9], &step2[14]); butterfly(step1[13], step1[10], -cospi_8_64, cospi_24_64, &step2[10], &step2[13]); step2[11] = step1[11]; step2[12] = step1[12]; // stage 5 step1[8] = _mm_add_epi16(step2[8], step2[11]); step1[9] = _mm_add_epi16(step2[9], step2[10]); step1[10] = _mm_sub_epi16(step2[9], step2[10]); step1[11] = _mm_sub_epi16(step2[8], step2[11]); step1[12] = _mm_sub_epi16(step2[15], step2[12]); step1[13] = _mm_sub_epi16(step2[14], step2[13]); step1[14] = _mm_add_epi16(step2[14], step2[13]); step1[15] = _mm_add_epi16(step2[15], step2[12]); // stage 6 out[8] = step1[8]; out[9] = step1[9]; butterfly(step1[13], step1[10], cospi_16_64, cospi_16_64, &out[10], &out[13]); butterfly(step1[12], step1[11], cospi_16_64, cospi_16_64, &out[11], &out[12]); out[14] = step1[14]; out[15] = step1[15]; } static INLINE void idct32_8x32_quarter_3_4_stage_4_to_7( __m128i *const step1 /*step1[32]*/, __m128i *const out /*out[32]*/) { __m128i step2[32]; // stage 4 step2[16] = _mm_add_epi16(step1[16], step1[19]); step2[17] = _mm_add_epi16(step1[17], step1[18]); step2[18] = _mm_sub_epi16(step1[17], step1[18]); step2[19] = _mm_sub_epi16(step1[16], step1[19]); step2[20] = _mm_sub_epi16(step1[23], step1[20]); step2[21] = _mm_sub_epi16(step1[22], step1[21]); step2[22] = _mm_add_epi16(step1[22], step1[21]); step2[23] = _mm_add_epi16(step1[23], step1[20]); step2[24] = _mm_add_epi16(step1[24], step1[27]); step2[25] = _mm_add_epi16(step1[25], step1[26]); step2[26] = _mm_sub_epi16(step1[25], step1[26]); step2[27] = _mm_sub_epi16(step1[24], step1[27]); step2[28] = _mm_sub_epi16(step1[31], step1[28]); step2[29] = _mm_sub_epi16(step1[30], step1[29]); step2[30] = _mm_add_epi16(step1[29], step1[30]); step2[31] = _mm_add_epi16(step1[28], step1[31]); // stage 5 step1[16] = step2[16]; step1[17] = step2[17]; butterfly(step2[29], step2[18], cospi_24_64, cospi_8_64, &step1[18], &step1[29]); butterfly(step2[28], step2[19], cospi_24_64, cospi_8_64, &step1[19], &step1[28]); butterfly(step2[27], step2[20], -cospi_8_64, cospi_24_64, &step1[20], &step1[27]); butterfly(step2[26], step2[21], -cospi_8_64, cospi_24_64, &step1[21], &step1[26]); step1[22] = step2[22]; step1[23] = step2[23]; step1[24] = step2[24]; step1[25] = step2[25]; step1[30] = step2[30]; step1[31] = step2[31]; // stage 6 out[16] = _mm_add_epi16(step1[16], step1[23]); out[17] = _mm_add_epi16(step1[17], step1[22]); out[18] = _mm_add_epi16(step1[18], step1[21]); out[19] = _mm_add_epi16(step1[19], step1[20]); step2[20] = _mm_sub_epi16(step1[19], step1[20]); step2[21] = _mm_sub_epi16(step1[18], step1[21]); step2[22] = _mm_sub_epi16(step1[17], step1[22]); step2[23] = _mm_sub_epi16(step1[16], step1[23]); step2[24] = _mm_sub_epi16(step1[31], step1[24]); step2[25] = _mm_sub_epi16(step1[30], step1[25]); step2[26] = _mm_sub_epi16(step1[29], step1[26]); step2[27] = _mm_sub_epi16(step1[28], step1[27]); out[28] = _mm_add_epi16(step1[27], step1[28]); out[29] = _mm_add_epi16(step1[26], step1[29]); out[30] = _mm_add_epi16(step1[25], step1[30]); out[31] = _mm_add_epi16(step1[24], step1[31]); // stage 7 butterfly(step2[27], step2[20], cospi_16_64, cospi_16_64, &out[20], &out[27]); butterfly(step2[26], step2[21], cospi_16_64, cospi_16_64, &out[21], &out[26]); butterfly(step2[25], step2[22], cospi_16_64, cospi_16_64, &out[22], &out[25]); butterfly(step2[24], step2[23], cospi_16_64, cospi_16_64, &out[23], &out[24]); } void idct4_sse2(__m128i *const in); void vpx_idct8_sse2(__m128i *const in); void idct16_sse2(__m128i *const in0, __m128i *const in1); void iadst4_sse2(__m128i *const in); void iadst8_sse2(__m128i *const in); void vpx_iadst16_8col_sse2(__m128i *const in); void iadst16_sse2(__m128i *const in0, __m128i *const in1); void idct32_1024_8x32(const __m128i *const in, __m128i *const out); void idct32_34_8x32_sse2(const __m128i *const in, __m128i *const out); void idct32_34_8x32_ssse3(const __m128i *const in, __m128i *const out); #endif // VPX_VPX_DSP_X86_INV_TXFM_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/inv_txfm_ssse3.c000066400000000000000000000305441357355204000202660ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/inv_txfm_ssse3.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void partial_butterfly_ssse3(const __m128i in, const int c0, const int c1, __m128i *const out0, __m128i *const out1) { const __m128i cst0 = _mm_set1_epi16(2 * c0); const __m128i cst1 = _mm_set1_epi16(2 * c1); *out0 = _mm_mulhrs_epi16(in, cst0); *out1 = _mm_mulhrs_epi16(in, cst1); } static INLINE __m128i partial_butterfly_cospi16_ssse3(const __m128i in) { const __m128i coef_pair = _mm_set1_epi16(2 * cospi_16_64); return _mm_mulhrs_epi16(in, coef_pair); } void vpx_idct8x8_12_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride) { __m128i io[8]; io[0] = load_input_data4(input + 0 * 8); io[1] = load_input_data4(input + 1 * 8); io[2] = load_input_data4(input + 2 * 8); io[3] = load_input_data4(input + 3 * 8); idct8x8_12_add_kernel_ssse3(io); write_buffer_8x8(io, dest, stride); } // Group the coefficient calculation into smaller functions to prevent stack // spillover in 32x32 idct optimizations: // quarter_1: 0-7 // quarter_2: 8-15 // quarter_3_4: 16-23, 24-31 // For each 8x32 block __m128i in[32], // Input with index, 0, 4 // output pixels: 0-7 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_1(const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 partial_butterfly_ssse3(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); // stage 4 step2[0] = partial_butterfly_cospi16_ssse3(in[0]); step2[4] = step1[4]; step2[5] = step1[4]; step2[6] = step1[7]; step2[7] = step1[7]; // stage 5 step1[0] = step2[0]; step1[1] = step2[0]; step1[2] = step2[0]; step1[3] = step2[0]; step1[4] = step2[4]; butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi16(step1[0], step1[7]); out[1] = _mm_add_epi16(step1[1], step1[6]); out[2] = _mm_add_epi16(step1[2], step1[5]); out[3] = _mm_add_epi16(step1[3], step1[4]); out[4] = _mm_sub_epi16(step1[3], step1[4]); out[5] = _mm_sub_epi16(step1[2], step1[5]); out[6] = _mm_sub_epi16(step1[1], step1[6]); out[7] = _mm_sub_epi16(step1[0], step1[7]); } // For each 8x32 block __m128i in[32], // Input with index, 2, 6 // output pixels: 8-15 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_2(const __m128i *const in /*in[32]*/, __m128i *const out /*out[16]*/) { __m128i step1[16], step2[16]; // stage 2 partial_butterfly_ssse3(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); partial_butterfly_ssse3(in[6], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 step1[8] = step2[8]; step1[9] = step2[8]; step1[14] = step2[15]; step1[15] = step2[15]; step1[10] = step2[11]; step1[11] = step2[11]; step1[12] = step2[12]; step1[13] = step2[12]; idct32_8x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void idct32_34_8x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; idct32_34_8x32_quarter_1(in, temp); idct32_34_8x32_quarter_2(in, temp); // stage 7 add_sub_butterfly(temp, out, 16); } // For each 8x32 block __m128i in[32], // Input with odd index, 1, 3, 5, 7 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void idct32_34_8x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32]; // stage 1 partial_butterfly_ssse3(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); partial_butterfly_ssse3(in[7], -cospi_25_64, cospi_7_64, &step1[19], &step1[28]); partial_butterfly_ssse3(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); partial_butterfly_ssse3(in[3], -cospi_29_64, cospi_3_64, &step1[23], &step1[24]); // stage 3 butterfly(step1[31], step1[16], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); butterfly(step1[28], step1[19], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); butterfly(step1[27], step1[20], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); butterfly(step1[24], step1[23], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); idct32_8x32_quarter_3_4_stage_4_to_7(step1, out); } void idct32_34_8x32_ssse3(const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[32]; idct32_34_8x32_quarter_1_2(in, temp); idct32_34_8x32_quarter_3_4(in, temp); // final stage add_sub_butterfly(temp, out, 32); } // Only upper-left 8x8 has non-zero coeff void vpx_idct32x32_34_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride) { __m128i io[32], col[32]; int i; // Load input data. Only need to load the top left 8x8 block. load_transpose_16bit_8x8(input, 32, io); idct32_34_8x32_ssse3(io, col); for (i = 0; i < 32; i += 8) { int j; transpose_16bit_8x8(col + i, io); idct32_34_8x32_ssse3(io, io); for (j = 0; j < 32; ++j) { write_buffer_8x1(dest + j * stride, io[j]); } dest += 8; } } // For each 8x32 block __m128i in[32], // Input with index, 0, 4, 8, 12 // output pixels: 0-7 in __m128i out[32] static INLINE void idct32_135_8x32_quarter_1(const __m128i *const in /*in[32]*/, __m128i *const out /*out[8]*/) { __m128i step1[8], step2[8]; // stage 3 partial_butterfly_ssse3(in[4], cospi_28_64, cospi_4_64, &step1[4], &step1[7]); partial_butterfly_ssse3(in[12], -cospi_20_64, cospi_12_64, &step1[5], &step1[6]); // stage 4 step2[0] = partial_butterfly_cospi16_ssse3(in[0]); partial_butterfly_ssse3(in[8], cospi_24_64, cospi_8_64, &step2[2], &step2[3]); step2[4] = _mm_add_epi16(step1[4], step1[5]); step2[5] = _mm_sub_epi16(step1[4], step1[5]); step2[6] = _mm_sub_epi16(step1[7], step1[6]); step2[7] = _mm_add_epi16(step1[7], step1[6]); // stage 5 step1[0] = _mm_add_epi16(step2[0], step2[3]); step1[1] = _mm_add_epi16(step2[0], step2[2]); step1[2] = _mm_sub_epi16(step2[0], step2[2]); step1[3] = _mm_sub_epi16(step2[0], step2[3]); step1[4] = step2[4]; butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); step1[7] = step2[7]; // stage 6 out[0] = _mm_add_epi16(step1[0], step1[7]); out[1] = _mm_add_epi16(step1[1], step1[6]); out[2] = _mm_add_epi16(step1[2], step1[5]); out[3] = _mm_add_epi16(step1[3], step1[4]); out[4] = _mm_sub_epi16(step1[3], step1[4]); out[5] = _mm_sub_epi16(step1[2], step1[5]); out[6] = _mm_sub_epi16(step1[1], step1[6]); out[7] = _mm_sub_epi16(step1[0], step1[7]); } // For each 8x32 block __m128i in[32], // Input with index, 2, 6, 10, 14 // output pixels: 8-15 in __m128i out[32] static INLINE void idct32_135_8x32_quarter_2(const __m128i *const in /*in[32]*/, __m128i *const out /*out[16]*/) { __m128i step1[16], step2[16]; // stage 2 partial_butterfly_ssse3(in[2], cospi_30_64, cospi_2_64, &step2[8], &step2[15]); partial_butterfly_ssse3(in[14], -cospi_18_64, cospi_14_64, &step2[9], &step2[14]); partial_butterfly_ssse3(in[10], cospi_22_64, cospi_10_64, &step2[10], &step2[13]); partial_butterfly_ssse3(in[6], -cospi_26_64, cospi_6_64, &step2[11], &step2[12]); // stage 3 step1[8] = _mm_add_epi16(step2[8], step2[9]); step1[9] = _mm_sub_epi16(step2[8], step2[9]); step1[10] = _mm_sub_epi16(step2[11], step2[10]); step1[11] = _mm_add_epi16(step2[11], step2[10]); step1[12] = _mm_add_epi16(step2[12], step2[13]); step1[13] = _mm_sub_epi16(step2[12], step2[13]); step1[14] = _mm_sub_epi16(step2[15], step2[14]); step1[15] = _mm_add_epi16(step2[15], step2[14]); idct32_8x32_quarter_2_stage_4_to_6(step1, out); } static INLINE void idct32_135_8x32_quarter_1_2( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[16]; idct32_135_8x32_quarter_1(in, temp); idct32_135_8x32_quarter_2(in, temp); // stage 7 add_sub_butterfly(temp, out, 16); } // For each 8x32 block __m128i in[32], // Input with odd index, // 1, 3, 5, 7, 9, 11, 13, 15 // output pixels: 16-23, 24-31 in __m128i out[32] static INLINE void idct32_135_8x32_quarter_3_4( const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i step1[32], step2[32]; // stage 1 partial_butterfly_ssse3(in[1], cospi_31_64, cospi_1_64, &step1[16], &step1[31]); partial_butterfly_ssse3(in[15], -cospi_17_64, cospi_15_64, &step1[17], &step1[30]); partial_butterfly_ssse3(in[9], cospi_23_64, cospi_9_64, &step1[18], &step1[29]); partial_butterfly_ssse3(in[7], -cospi_25_64, cospi_7_64, &step1[19], &step1[28]); partial_butterfly_ssse3(in[5], cospi_27_64, cospi_5_64, &step1[20], &step1[27]); partial_butterfly_ssse3(in[11], -cospi_21_64, cospi_11_64, &step1[21], &step1[26]); partial_butterfly_ssse3(in[13], cospi_19_64, cospi_13_64, &step1[22], &step1[25]); partial_butterfly_ssse3(in[3], -cospi_29_64, cospi_3_64, &step1[23], &step1[24]); // stage 2 step2[16] = _mm_add_epi16(step1[16], step1[17]); step2[17] = _mm_sub_epi16(step1[16], step1[17]); step2[18] = _mm_sub_epi16(step1[19], step1[18]); step2[19] = _mm_add_epi16(step1[19], step1[18]); step2[20] = _mm_add_epi16(step1[20], step1[21]); step2[21] = _mm_sub_epi16(step1[20], step1[21]); step2[22] = _mm_sub_epi16(step1[23], step1[22]); step2[23] = _mm_add_epi16(step1[23], step1[22]); step2[24] = _mm_add_epi16(step1[24], step1[25]); step2[25] = _mm_sub_epi16(step1[24], step1[25]); step2[26] = _mm_sub_epi16(step1[27], step1[26]); step2[27] = _mm_add_epi16(step1[27], step1[26]); step2[28] = _mm_add_epi16(step1[28], step1[29]); step2[29] = _mm_sub_epi16(step1[28], step1[29]); step2[30] = _mm_sub_epi16(step1[31], step1[30]); step2[31] = _mm_add_epi16(step1[31], step1[30]); // stage 3 step1[16] = step2[16]; step1[31] = step2[31]; butterfly(step2[30], step2[17], cospi_28_64, cospi_4_64, &step1[17], &step1[30]); butterfly(step2[29], step2[18], -cospi_4_64, cospi_28_64, &step1[18], &step1[29]); step1[19] = step2[19]; step1[20] = step2[20]; butterfly(step2[26], step2[21], cospi_12_64, cospi_20_64, &step1[21], &step1[26]); butterfly(step2[25], step2[22], -cospi_20_64, cospi_12_64, &step1[22], &step1[25]); step1[23] = step2[23]; step1[24] = step2[24]; step1[27] = step2[27]; step1[28] = step2[28]; idct32_8x32_quarter_3_4_stage_4_to_7(step1, out); } void idct32_135_8x32_ssse3(const __m128i *const in /*in[32]*/, __m128i *const out /*out[32]*/) { __m128i temp[32]; idct32_135_8x32_quarter_1_2(in, temp); idct32_135_8x32_quarter_3_4(in, temp); // final stage add_sub_butterfly(temp, out, 32); } void vpx_idct32x32_135_add_ssse3(const tran_low_t *input, uint8_t *dest, int stride) { __m128i col[2][32], io[32]; int i; // rows for (i = 0; i < 2; i++) { load_transpose_16bit_8x8(&input[0], 32, &io[0]); load_transpose_16bit_8x8(&input[8], 32, &io[8]); idct32_135_8x32_ssse3(io, col[i]); input += 32 << 3; } // columns for (i = 0; i < 32; i += 8) { transpose_16bit_8x8(col[0] + i, io); transpose_16bit_8x8(col[1] + i, io + 8); idct32_135_8x32_ssse3(io, io); store_buffer_8x32(io, dest, stride); dest += 8; } } libvpx-1.8.2/vpx_dsp/x86/inv_txfm_ssse3.h000066400000000000000000000110341357355204000202640ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ #define VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/inv_txfm_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_dsp/x86/txfm_common_sse2.h" static INLINE void idct8x8_12_add_kernel_ssse3(__m128i *const io /* io[8] */) { const __m128i cp_28d_4d = dual_set_epi16(2 * cospi_28_64, 2 * cospi_4_64); const __m128i cp_n20d_12d = dual_set_epi16(-2 * cospi_20_64, 2 * cospi_12_64); const __m128i cp_8d_24d = dual_set_epi16(2 * cospi_8_64, 2 * cospi_24_64); const __m128i cp_16_16 = _mm_set1_epi16(cospi_16_64); const __m128i cp_16_n16 = pair_set_epi16(cospi_16_64, -cospi_16_64); const __m128i cospi_16_64d = _mm_set1_epi16((int16_t)(2 * cospi_16_64)); const __m128i cospi_28_64d = _mm_set1_epi16((int16_t)(2 * cospi_28_64)); const __m128i cospi_4_64d = _mm_set1_epi16((int16_t)(2 * cospi_4_64)); const __m128i cospi_n20_64d = _mm_set1_epi16((int16_t)(-2 * cospi_20_64)); const __m128i cospi_12_64d = _mm_set1_epi16((int16_t)(2 * cospi_12_64)); const __m128i cospi_24_64d = _mm_set1_epi16((int16_t)(2 * cospi_24_64)); const __m128i cospi_8_64d = _mm_set1_epi16((int16_t)(2 * cospi_8_64)); __m128i step1[8], step2[8], tmp[4]; // pass 1 transpose_16bit_4x4(io, io); // io[0]: 00 10 20 30 01 11 21 31 // io[1]: 02 12 22 32 03 13 23 33 // stage 1 tmp[0] = _mm_unpacklo_epi64(io[0], io[0]); tmp[1] = _mm_unpackhi_epi64(io[0], io[0]); tmp[2] = _mm_unpacklo_epi64(io[1], io[1]); tmp[3] = _mm_unpackhi_epi64(io[1], io[1]); step1[4] = _mm_mulhrs_epi16(tmp[1], cp_28d_4d); // step1 4&7 step1[5] = _mm_mulhrs_epi16(tmp[3], cp_n20d_12d); // step1 5&6 // stage 2 step2[0] = _mm_mulhrs_epi16(tmp[0], cospi_16_64d); // step2 0&1 step2[2] = _mm_mulhrs_epi16(tmp[2], cp_8d_24d); // step2 3&2 step2[4] = _mm_add_epi16(step1[4], step1[5]); // step2 4&7 step2[5] = _mm_sub_epi16(step1[4], step1[5]); // step2 5&6 step2[6] = _mm_unpackhi_epi64(step2[5], step2[5]); // step2 6 // stage 3 tmp[0] = _mm_unpacklo_epi16(step2[6], step2[5]); step1[5] = idct_calc_wraplow_sse2(cp_16_n16, cp_16_16, tmp[0]); // step1 5&6 tmp[0] = _mm_add_epi16(step2[0], step2[2]); // step1 0&1 tmp[1] = _mm_sub_epi16(step2[0], step2[2]); // step1 3&2 step1[2] = _mm_unpackhi_epi64(tmp[1], tmp[0]); // step1 2&1 step1[3] = _mm_unpacklo_epi64(tmp[1], tmp[0]); // step1 3&0 // stage 4 tmp[0] = _mm_add_epi16(step1[3], step2[4]); // output 3&0 tmp[1] = _mm_add_epi16(step1[2], step1[5]); // output 2&1 tmp[2] = _mm_sub_epi16(step1[3], step2[4]); // output 4&7 tmp[3] = _mm_sub_epi16(step1[2], step1[5]); // output 5&6 // pass 2 idct8x8_12_transpose_16bit_4x8(tmp, io); // stage 1 step1[4] = _mm_mulhrs_epi16(io[1], cospi_28_64d); step1[7] = _mm_mulhrs_epi16(io[1], cospi_4_64d); step1[5] = _mm_mulhrs_epi16(io[3], cospi_n20_64d); step1[6] = _mm_mulhrs_epi16(io[3], cospi_12_64d); // stage 2 step2[0] = _mm_mulhrs_epi16(io[0], cospi_16_64d); // step2[1] = step2[0] step2[2] = _mm_mulhrs_epi16(io[2], cospi_24_64d); step2[3] = _mm_mulhrs_epi16(io[2], cospi_8_64d); step2[4] = _mm_add_epi16(step1[4], step1[5]); step2[5] = _mm_sub_epi16(step1[4], step1[5]); step2[6] = _mm_sub_epi16(step1[7], step1[6]); step2[7] = _mm_add_epi16(step1[7], step1[6]); // stage 3 step1[0] = _mm_add_epi16(step2[0], step2[3]); step1[1] = _mm_add_epi16(step2[0], step2[2]); step1[2] = _mm_sub_epi16(step2[0], step2[2]); step1[3] = _mm_sub_epi16(step2[0], step2[3]); butterfly(step2[6], step2[5], cospi_16_64, cospi_16_64, &step1[5], &step1[6]); // stage 4 io[0] = _mm_add_epi16(step1[0], step2[7]); io[1] = _mm_add_epi16(step1[1], step1[6]); io[2] = _mm_add_epi16(step1[2], step1[5]); io[3] = _mm_add_epi16(step1[3], step2[4]); io[4] = _mm_sub_epi16(step1[3], step2[4]); io[5] = _mm_sub_epi16(step1[2], step1[5]); io[6] = _mm_sub_epi16(step1[1], step1[6]); io[7] = _mm_sub_epi16(step1[0], step2[7]); } void idct32_135_8x32_ssse3(const __m128i *const in, __m128i *const out); #endif // VPX_VPX_DSP_X86_INV_TXFM_SSSE3_H_ libvpx-1.8.2/vpx_dsp/x86/inv_wht_sse2.asm000066400000000000000000000053451357355204000202650ustar00rootroot00000000000000; ; Copyright (c) 2015 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" %include "vpx_dsp/x86/bitdepth_conversion_sse2.asm" SECTION .text %macro REORDER_INPUTS 0 ; a c d b to a b c d SWAP 1, 3, 2 %endmacro %macro TRANSFORM_COLS 0 ; input: ; m0 a ; m1 b ; m2 c ; m3 d paddw m0, m2 psubw m3, m1 ; wide subtract punpcklwd m4, m0 punpcklwd m5, m3 psrad m4, 16 psrad m5, 16 psubd m4, m5 psrad m4, 1 packssdw m4, m4 ; e psubw m5, m4, m1 ; b psubw m4, m2 ; c psubw m0, m5 paddw m3, m4 ; m0 a SWAP 1, 5 ; m1 b SWAP 2, 4 ; m2 c ; m3 d %endmacro %macro TRANSPOSE_4X4 0 punpcklwd m0, m2 punpcklwd m1, m3 mova m2, m0 punpcklwd m0, m1 punpckhwd m2, m1 pshufd m1, m0, 0x0e pshufd m3, m2, 0x0e %endmacro ; transpose a 4x4 int16 matrix in xmm0 and xmm1 to the bottom half of xmm0-xmm3 %macro TRANSPOSE_4X4_WIDE 0 mova m3, m0 punpcklwd m0, m1 punpckhwd m3, m1 mova m2, m0 punpcklwd m0, m3 punpckhwd m2, m3 pshufd m1, m0, 0x0e pshufd m3, m2, 0x0e %endmacro %macro ADD_STORE_4P_2X 5 ; src1, src2, tmp1, tmp2, zero movd m%3, [outputq] movd m%4, [outputq + strideq] punpcklbw m%3, m%5 punpcklbw m%4, m%5 paddw m%1, m%3 paddw m%2, m%4 packuswb m%1, m%5 packuswb m%2, m%5 movd [outputq], m%1 movd [outputq + strideq], m%2 %endmacro INIT_XMM sse2 cglobal iwht4x4_16_add, 3, 3, 7, input, output, stride LOAD_TRAN_LOW 0, inputq, 0 LOAD_TRAN_LOW 1, inputq, 8 psraw m0, 2 psraw m1, 2 TRANSPOSE_4X4_WIDE REORDER_INPUTS TRANSFORM_COLS TRANSPOSE_4X4 REORDER_INPUTS TRANSFORM_COLS pxor m4, m4 ADD_STORE_4P_2X 0, 1, 5, 6, 4 lea outputq, [outputq + 2 * strideq] ADD_STORE_4P_2X 2, 3, 5, 6, 4 RET libvpx-1.8.2/vpx_dsp/x86/loopfilter_avx2.c000066400000000000000000001071071357355204000204330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include /* AVX2 */ #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" void vpx_lpf_horizontal_16_avx2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; __m128i abs_p1p0; const __m128i thresh_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)thresh[0])); const __m128i limit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)limit[0])); const __m128i blimit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)blimit[0])); q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * pitch)); q4p4 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * pitch))); q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * pitch)); q3p3 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * pitch))); q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); q2p2 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * pitch)); q1p1 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); q0p0 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * pitch))); p0q0 = _mm_shuffle_epi32(q0p0, 78); { __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; abs_p1p0 = _mm_or_si128(_mm_subs_epu8(q1p1, q0p0), _mm_subs_epu8(q0p0, q1p1)); abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); fe = _mm_set1_epi8((int8_t)0xfe); ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); abs_p0q0 = _mm_or_si128(_mm_subs_epu8(q0p0, p0q0), _mm_subs_epu8(p0q0, q0p0)); abs_p1q1 = _mm_or_si128(_mm_subs_epu8(q1p1, p1q1), _mm_subs_epu8(p1q1, q1p1)); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q2p2, q1p1), _mm_subs_epu8(q1p1, q2p2)), _mm_or_si128(_mm_subs_epu8(q3p3, q2p2), _mm_subs_epu8(q2p2, q3p3))); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } // lp filter { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi16(0x1); __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); __m128i qs0 = _mm_xor_si128(p0q0, t80); __m128i qs1 = _mm_xor_si128(p1q1, t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, qs0ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); filter1 = _mm_unpacklo_epi8(zero, filter1); filter1 = _mm_srai_epi16(filter1, 0xB); filter2 = _mm_unpacklo_epi8(zero, filter2); filter2 = _mm_srai_epi16(filter2, 0xB); /* Filter1 >> 3 */ filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); /* filt >> 1 */ filt = _mm_adds_epi16(filter1, t1); filt = _mm_srai_epi16(filt, 1); filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), filt); filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); // loopfilter done { __m128i work; flat = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q2p2, q0p0), _mm_subs_epu8(q0p0, q2p2)), _mm_or_si128(_mm_subs_epu8(q3p3, q0p0), _mm_subs_epu8(q0p0, q3p3))); flat = _mm_max_epu8(abs_p1p0, flat); flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * pitch)); q5p5 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * pitch))); q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * pitch)); q6p6 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * pitch))); flat2 = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q4p4, q0p0), _mm_subs_epu8(q0p0, q4p4)), _mm_or_si128(_mm_subs_epu8(q5p5, q0p0), _mm_subs_epu8(q0p0, q5p5))); q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * pitch)); q7p7 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * pitch))); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q6p6, q0p0), _mm_subs_epu8(q0p0, q6p6)), _mm_or_si128(_mm_subs_epu8(q7p7, q0p0), _mm_subs_epu8(q0p0, q7p7))); flat2 = _mm_max_epu8(work, flat2); flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); flat2 = _mm_subs_epu8(flat2, one); flat2 = _mm_cmpeq_epi8(flat2, zero); flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // flat and wide flat calculations { const __m128i eight = _mm_set1_epi16(8); const __m128i four = _mm_set1_epi16(4); __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; __m128i pixelFilter_p, pixelFilter_q; __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; p7_16 = _mm_unpacklo_epi8(q7p7, zero); p6_16 = _mm_unpacklo_epi8(q6p6, zero); p5_16 = _mm_unpacklo_epi8(q5p5, zero); p4_16 = _mm_unpacklo_epi8(q4p4, zero); p3_16 = _mm_unpacklo_epi8(q3p3, zero); p2_16 = _mm_unpacklo_epi8(q2p2, zero); p1_16 = _mm_unpacklo_epi8(q1p1, zero); p0_16 = _mm_unpacklo_epi8(q0p0, zero); q0_16 = _mm_unpackhi_epi8(q0p0, zero); q1_16 = _mm_unpackhi_epi8(q1p1, zero); q2_16 = _mm_unpackhi_epi8(q2p2, zero); q3_16 = _mm_unpackhi_epi8(q3p3, zero); q4_16 = _mm_unpackhi_epi8(q4p4, zero); q5_16 = _mm_unpackhi_epi8(q5p5, zero); q6_16 = _mm_unpackhi_epi8(q6p6, zero); q7_16 = _mm_unpackhi_epi8(q7p7, zero); pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), _mm_add_epi16(p4_16, p3_16)); pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), _mm_add_epi16(q4_16, q3_16)); pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); pixetFilter_p2p1p0 = _mm_add_epi16( four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7_16, p0_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7_16, q0_16)), 4); flat2_q0p0 = _mm_packus_epi16(res_p, res_q); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(p3_16, p0_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(q3_16, q0_16)), 3); flat_q0p0 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(p7_16, p7_16); sum_q7 = _mm_add_epi16(q7_16, q7_16); sum_p3 = _mm_add_epi16(p3_16, p3_16); sum_q3 = _mm_add_epi16(q3_16, q3_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1_16)), 4); flat2_q1p1 = _mm_packus_epi16(res_p, res_q); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p1_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q1_16)), 3); flat_q1p1 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); sum_p3 = _mm_add_epi16(sum_p3, p3_16); sum_q3 = _mm_add_epi16(sum_q3, q3_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2_16)), 4); flat2_q2p2 = _mm_packus_epi16(res_p, res_q); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p2_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q2_16)), 3); flat_q2p2 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3_16)), 4); flat2_q3p3 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4_16)), 4); flat2_q4p4 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5_16)), 4); flat2_q5p5 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6_16)), 4); flat2_q6p6 = _mm_packus_epi16(res_p, res_q); } // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ flat = _mm_shuffle_epi32(flat, 68); flat2 = _mm_shuffle_epi32(flat2, 68); q2p2 = _mm_andnot_si128(flat, q2p2); flat_q2p2 = _mm_and_si128(flat, flat_q2p2); q2p2 = _mm_or_si128(q2p2, flat_q2p2); qs1ps1 = _mm_andnot_si128(flat, qs1ps1); flat_q1p1 = _mm_and_si128(flat, flat_q1p1); q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); qs0ps0 = _mm_andnot_si128(flat, qs0ps0); flat_q0p0 = _mm_and_si128(flat, flat_q0p0); q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); q6p6 = _mm_andnot_si128(flat2, q6p6); flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); q6p6 = _mm_or_si128(q6p6, flat2_q6p6); _mm_storel_epi64((__m128i *)(s - 7 * pitch), q6p6); _mm_storeh_pi((__m64 *)(s + 6 * pitch), _mm_castsi128_ps(q6p6)); q5p5 = _mm_andnot_si128(flat2, q5p5); flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); q5p5 = _mm_or_si128(q5p5, flat2_q5p5); _mm_storel_epi64((__m128i *)(s - 6 * pitch), q5p5); _mm_storeh_pi((__m64 *)(s + 5 * pitch), _mm_castsi128_ps(q5p5)); q4p4 = _mm_andnot_si128(flat2, q4p4); flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); q4p4 = _mm_or_si128(q4p4, flat2_q4p4); _mm_storel_epi64((__m128i *)(s - 5 * pitch), q4p4); _mm_storeh_pi((__m64 *)(s + 4 * pitch), _mm_castsi128_ps(q4p4)); q3p3 = _mm_andnot_si128(flat2, q3p3); flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); q3p3 = _mm_or_si128(q3p3, flat2_q3p3); _mm_storel_epi64((__m128i *)(s - 4 * pitch), q3p3); _mm_storeh_pi((__m64 *)(s + 3 * pitch), _mm_castsi128_ps(q3p3)); q2p2 = _mm_andnot_si128(flat2, q2p2); flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); q2p2 = _mm_or_si128(q2p2, flat2_q2p2); _mm_storel_epi64((__m128i *)(s - 3 * pitch), q2p2); _mm_storeh_pi((__m64 *)(s + 2 * pitch), _mm_castsi128_ps(q2p2)); q1p1 = _mm_andnot_si128(flat2, q1p1); flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); q1p1 = _mm_or_si128(q1p1, flat2_q1p1); _mm_storel_epi64((__m128i *)(s - 2 * pitch), q1p1); _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(q1p1)); q0p0 = _mm_andnot_si128(flat2, q0p0); flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); q0p0 = _mm_or_si128(q0p0, flat2_q0p0); _mm_storel_epi64((__m128i *)(s - 1 * pitch), q0p0); _mm_storeh_pi((__m64 *)(s - 0 * pitch), _mm_castsi128_ps(q0p0)); } } DECLARE_ALIGNED(32, static const uint8_t, filt_loopfilter_avx2[32]) = { 0, 128, 1, 128, 2, 128, 3, 128, 4, 128, 5, 128, 6, 128, 7, 128, 8, 128, 9, 128, 10, 128, 11, 128, 12, 128, 13, 128, 14, 128, 15, 128 }; void vpx_lpf_horizontal_16_dual_avx2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { __m128i mask, hev, flat, flat2; const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); __m128i p7, p6, p5; __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; __m128i q5, q6, q7; __m256i p256_7, q256_7, p256_6, q256_6, p256_5, q256_5, p256_4, q256_4, p256_3, q256_3, p256_2, q256_2, p256_1, q256_1, p256_0, q256_0; const __m128i thresh_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)thresh[0])); const __m128i limit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)limit[0])); const __m128i blimit_v = _mm_broadcastb_epi8(_mm_cvtsi32_si128((int)blimit[0])); p256_4 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 5 * pitch))); p256_3 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 4 * pitch))); p256_2 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 3 * pitch))); p256_1 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 2 * pitch))); p256_0 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 1 * pitch))); q256_0 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 0 * pitch))); q256_1 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 1 * pitch))); q256_2 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 2 * pitch))); q256_3 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 3 * pitch))); q256_4 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 4 * pitch))); p4 = _mm256_castsi256_si128(p256_4); p3 = _mm256_castsi256_si128(p256_3); p2 = _mm256_castsi256_si128(p256_2); p1 = _mm256_castsi256_si128(p256_1); p0 = _mm256_castsi256_si128(p256_0); q0 = _mm256_castsi256_si128(q256_0); q1 = _mm256_castsi256_si128(q256_1); q2 = _mm256_castsi256_si128(q256_2); q3 = _mm256_castsi256_si128(q256_3); q4 = _mm256_castsi256_si128(q256_4); { const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), _mm_subs_epu8(q1, p1)); __m128i work; flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(flat, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); mask = _mm_max_epu8(work, mask); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); mask = _mm_max_epu8(work, mask); mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } // lp filter { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); __m128i ps1 = _mm_xor_si128(p1, t80); __m128i ps0 = _mm_xor_si128(p0, t80); __m128i qs0 = _mm_xor_si128(q0, t80); __m128i qs1 = _mm_xor_si128(q1, t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; __m128i flat2_p6, flat2_p5, flat2_p4, flat2_p3, flat2_p2, flat2_p1, flat2_p0, flat2_q0, flat2_q1, flat2_q2, flat2_q3, flat2_q4, flat2_q5, flat2_q6, flat_p2, flat_p1, flat_p0, flat_q0, flat_q1, flat_q2; filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); /* (vpx_filter + 3 * (qs0 - ps0)) & mask */ filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); /* Filter1 >> 3 */ work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); qs0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); /* Filter2 >> 3 */ work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); ps0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); /* filt >> 1 */ filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); work_a = _mm_and_si128(work_a, t80); filt = _mm_and_si128(filt, t7f); filt = _mm_or_si128(filt, work_a); filt = _mm_andnot_si128(hev, filt); ps1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); qs1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); // loopfilter done { __m128i work; work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p2, p0), _mm_subs_epu8(p0, p2)), _mm_or_si128(_mm_subs_epu8(q2, q0), _mm_subs_epu8(q0, q2))); flat = _mm_max_epu8(work, flat); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p3, p0), _mm_subs_epu8(p0, p3)), _mm_or_si128(_mm_subs_epu8(q3, q0), _mm_subs_epu8(q0, q3))); flat = _mm_max_epu8(work, flat); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p4, p0), _mm_subs_epu8(p0, p4)), _mm_or_si128(_mm_subs_epu8(q4, q0), _mm_subs_epu8(q0, q4))); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); p256_5 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 6 * pitch))); q256_5 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 5 * pitch))); p5 = _mm256_castsi256_si128(p256_5); q5 = _mm256_castsi256_si128(q256_5); flat2 = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p5, p0), _mm_subs_epu8(p0, p5)), _mm_or_si128(_mm_subs_epu8(q5, q0), _mm_subs_epu8(q0, q5))); flat2 = _mm_max_epu8(work, flat2); p256_6 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 7 * pitch))); q256_6 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 6 * pitch))); p6 = _mm256_castsi256_si128(p256_6); q6 = _mm256_castsi256_si128(q256_6); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p6, p0), _mm_subs_epu8(p0, p6)), _mm_or_si128(_mm_subs_epu8(q6, q0), _mm_subs_epu8(q0, q6))); flat2 = _mm_max_epu8(work, flat2); p256_7 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s - 8 * pitch))); q256_7 = _mm256_castpd_si256( _mm256_broadcast_pd((__m128d const *)(s + 7 * pitch))); p7 = _mm256_castsi256_si128(p256_7); q7 = _mm256_castsi256_si128(q256_7); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p7, p0), _mm_subs_epu8(p0, p7)), _mm_or_si128(_mm_subs_epu8(q7, q0), _mm_subs_epu8(q0, q7))); flat2 = _mm_max_epu8(work, flat2); flat2 = _mm_subs_epu8(flat2, one); flat2 = _mm_cmpeq_epi8(flat2, zero); flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // flat and wide flat calculations { const __m256i eight = _mm256_set1_epi16(8); const __m256i four = _mm256_set1_epi16(4); __m256i pixelFilter_p, pixelFilter_q, pixetFilter_p2p1p0, pixetFilter_q2q1q0, sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; const __m256i filter = _mm256_load_si256((__m256i const *)filt_loopfilter_avx2); p256_7 = _mm256_shuffle_epi8(p256_7, filter); p256_6 = _mm256_shuffle_epi8(p256_6, filter); p256_5 = _mm256_shuffle_epi8(p256_5, filter); p256_4 = _mm256_shuffle_epi8(p256_4, filter); p256_3 = _mm256_shuffle_epi8(p256_3, filter); p256_2 = _mm256_shuffle_epi8(p256_2, filter); p256_1 = _mm256_shuffle_epi8(p256_1, filter); p256_0 = _mm256_shuffle_epi8(p256_0, filter); q256_0 = _mm256_shuffle_epi8(q256_0, filter); q256_1 = _mm256_shuffle_epi8(q256_1, filter); q256_2 = _mm256_shuffle_epi8(q256_2, filter); q256_3 = _mm256_shuffle_epi8(q256_3, filter); q256_4 = _mm256_shuffle_epi8(q256_4, filter); q256_5 = _mm256_shuffle_epi8(q256_5, filter); q256_6 = _mm256_shuffle_epi8(q256_6, filter); q256_7 = _mm256_shuffle_epi8(q256_7, filter); pixelFilter_p = _mm256_add_epi16(_mm256_add_epi16(p256_6, p256_5), _mm256_add_epi16(p256_4, p256_3)); pixelFilter_q = _mm256_add_epi16(_mm256_add_epi16(q256_6, q256_5), _mm256_add_epi16(q256_4, q256_3)); pixetFilter_p2p1p0 = _mm256_add_epi16(p256_0, _mm256_add_epi16(p256_2, p256_1)); pixelFilter_p = _mm256_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); pixetFilter_q2q1q0 = _mm256_add_epi16(q256_0, _mm256_add_epi16(q256_2, q256_1)); pixelFilter_q = _mm256_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); pixelFilter_p = _mm256_add_epi16( eight, _mm256_add_epi16(pixelFilter_p, pixelFilter_q)); pixetFilter_p2p1p0 = _mm256_add_epi16( four, _mm256_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(p256_7, p256_0)), 4); flat2_p0 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(q256_7, q256_0)), 4); flat2_q0 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); res_p = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_p2p1p0, _mm256_add_epi16(p256_3, p256_0)), 3); flat_p0 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_p2p1p0, _mm256_add_epi16(q256_3, q256_0)), 3); flat_q0 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(p256_7, p256_7); sum_q7 = _mm256_add_epi16(q256_7, q256_7); sum_p3 = _mm256_add_epi16(p256_3, p256_3); sum_q3 = _mm256_add_epi16(q256_3, q256_3); pixelFilter_q = _mm256_sub_epi16(pixelFilter_p, p256_6); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_6); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_1)), 4); flat2_p1 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_1)), 4); flat2_q1 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_p2p1p0, p256_2); pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_2); res_p = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_p2p1p0, _mm256_add_epi16(sum_p3, p256_1)), 3); flat_p1 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_q2q1q0, _mm256_add_epi16(sum_q3, q256_1)), 3); flat_q1 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(sum_p7, p256_7); sum_q7 = _mm256_add_epi16(sum_q7, q256_7); sum_p3 = _mm256_add_epi16(sum_p3, p256_3); sum_q3 = _mm256_add_epi16(sum_q3, q256_3); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_5); pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_5); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_2)), 4); flat2_p2 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_2)), 4); flat2_q2 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); pixetFilter_p2p1p0 = _mm256_sub_epi16(pixetFilter_p2p1p0, q256_1); pixetFilter_q2q1q0 = _mm256_sub_epi16(pixetFilter_q2q1q0, p256_1); res_p = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_p2p1p0, _mm256_add_epi16(sum_p3, p256_2)), 3); flat_p2 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16(_mm256_add_epi16(pixetFilter_q2q1q0, _mm256_add_epi16(sum_q3, q256_2)), 3); flat_q2 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(sum_p7, p256_7); sum_q7 = _mm256_add_epi16(sum_q7, q256_7); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_4); pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_4); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_3)), 4); flat2_p3 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_3)), 4); flat2_q3 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(sum_p7, p256_7); sum_q7 = _mm256_add_epi16(sum_q7, q256_7); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_3); pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_3); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_4)), 4); flat2_p4 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_4)), 4); flat2_q4 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(sum_p7, p256_7); sum_q7 = _mm256_add_epi16(sum_q7, q256_7); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_2); pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_2); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_5)), 4); flat2_p5 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_5)), 4); flat2_q5 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); sum_p7 = _mm256_add_epi16(sum_p7, p256_7); sum_q7 = _mm256_add_epi16(sum_q7, q256_7); pixelFilter_p = _mm256_sub_epi16(pixelFilter_p, q256_1); pixelFilter_q = _mm256_sub_epi16(pixelFilter_q, p256_1); res_p = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_p, _mm256_add_epi16(sum_p7, p256_6)), 4); flat2_p6 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_p, res_p), 168)); res_q = _mm256_srli_epi16( _mm256_add_epi16(pixelFilter_q, _mm256_add_epi16(sum_q7, q256_6)), 4); flat2_q6 = _mm256_castsi256_si128( _mm256_permute4x64_epi64(_mm256_packus_epi16(res_q, res_q), 168)); } // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ p2 = _mm_andnot_si128(flat, p2); flat_p2 = _mm_and_si128(flat, flat_p2); p2 = _mm_or_si128(flat_p2, p2); p1 = _mm_andnot_si128(flat, ps1); flat_p1 = _mm_and_si128(flat, flat_p1); p1 = _mm_or_si128(flat_p1, p1); p0 = _mm_andnot_si128(flat, ps0); flat_p0 = _mm_and_si128(flat, flat_p0); p0 = _mm_or_si128(flat_p0, p0); q0 = _mm_andnot_si128(flat, qs0); flat_q0 = _mm_and_si128(flat, flat_q0); q0 = _mm_or_si128(flat_q0, q0); q1 = _mm_andnot_si128(flat, qs1); flat_q1 = _mm_and_si128(flat, flat_q1); q1 = _mm_or_si128(flat_q1, q1); q2 = _mm_andnot_si128(flat, q2); flat_q2 = _mm_and_si128(flat, flat_q2); q2 = _mm_or_si128(flat_q2, q2); p6 = _mm_andnot_si128(flat2, p6); flat2_p6 = _mm_and_si128(flat2, flat2_p6); p6 = _mm_or_si128(flat2_p6, p6); _mm_storeu_si128((__m128i *)(s - 7 * pitch), p6); p5 = _mm_andnot_si128(flat2, p5); flat2_p5 = _mm_and_si128(flat2, flat2_p5); p5 = _mm_or_si128(flat2_p5, p5); _mm_storeu_si128((__m128i *)(s - 6 * pitch), p5); p4 = _mm_andnot_si128(flat2, p4); flat2_p4 = _mm_and_si128(flat2, flat2_p4); p4 = _mm_or_si128(flat2_p4, p4); _mm_storeu_si128((__m128i *)(s - 5 * pitch), p4); p3 = _mm_andnot_si128(flat2, p3); flat2_p3 = _mm_and_si128(flat2, flat2_p3); p3 = _mm_or_si128(flat2_p3, p3); _mm_storeu_si128((__m128i *)(s - 4 * pitch), p3); p2 = _mm_andnot_si128(flat2, p2); flat2_p2 = _mm_and_si128(flat2, flat2_p2); p2 = _mm_or_si128(flat2_p2, p2); _mm_storeu_si128((__m128i *)(s - 3 * pitch), p2); p1 = _mm_andnot_si128(flat2, p1); flat2_p1 = _mm_and_si128(flat2, flat2_p1); p1 = _mm_or_si128(flat2_p1, p1); _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); p0 = _mm_andnot_si128(flat2, p0); flat2_p0 = _mm_and_si128(flat2, flat2_p0); p0 = _mm_or_si128(flat2_p0, p0); _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); q0 = _mm_andnot_si128(flat2, q0); flat2_q0 = _mm_and_si128(flat2, flat2_q0); q0 = _mm_or_si128(flat2_q0, q0); _mm_storeu_si128((__m128i *)(s - 0 * pitch), q0); q1 = _mm_andnot_si128(flat2, q1); flat2_q1 = _mm_and_si128(flat2, flat2_q1); q1 = _mm_or_si128(flat2_q1, q1); _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); q2 = _mm_andnot_si128(flat2, q2); flat2_q2 = _mm_and_si128(flat2, flat2_q2); q2 = _mm_or_si128(flat2_q2, q2); _mm_storeu_si128((__m128i *)(s + 2 * pitch), q2); q3 = _mm_andnot_si128(flat2, q3); flat2_q3 = _mm_and_si128(flat2, flat2_q3); q3 = _mm_or_si128(flat2_q3, q3); _mm_storeu_si128((__m128i *)(s + 3 * pitch), q3); q4 = _mm_andnot_si128(flat2, q4); flat2_q4 = _mm_and_si128(flat2, flat2_q4); q4 = _mm_or_si128(flat2_q4, q4); _mm_storeu_si128((__m128i *)(s + 4 * pitch), q4); q5 = _mm_andnot_si128(flat2, q5); flat2_q5 = _mm_and_si128(flat2, flat2_q5); q5 = _mm_or_si128(flat2_q5, q5); _mm_storeu_si128((__m128i *)(s + 5 * pitch), q5); q6 = _mm_andnot_si128(flat2, q6); flat2_q6 = _mm_and_si128(flat2, flat2_q6); q6 = _mm_or_si128(flat2_q6, q6); _mm_storeu_si128((__m128i *)(s + 6 * pitch), q6); } } libvpx-1.8.2/vpx_dsp/x86/loopfilter_sse2.c000066400000000000000000002272101357355204000204250ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSE2 #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_ports/emmintrin_compat.h" #include "vpx_dsp/x86/mem_sse2.h" static INLINE __m128i abs_diff(__m128i a, __m128i b) { return _mm_or_si128(_mm_subs_epu8(a, b), _mm_subs_epu8(b, a)); } // filter_mask and hev_mask #define FILTER_HEV_MASK \ do { \ /* (abs(q1 - q0), abs(p1 - p0) */ \ __m128i flat = abs_diff(q1p1, q0p0); \ /* abs(p1 - q1), abs(p0 - q0) */ \ const __m128i abs_p1q1p0q0 = abs_diff(p1p0, q1q0); \ __m128i abs_p0q0, abs_p1q1, work; \ \ /* const uint8_t hev = hev_mask(thresh, *op1, *op0, *oq0, *oq1); */ \ hev = \ _mm_unpacklo_epi8(_mm_max_epu8(flat, _mm_srli_si128(flat, 8)), zero); \ hev = _mm_cmpgt_epi16(hev, thresh_v); \ hev = _mm_packs_epi16(hev, hev); \ \ /* const int8_t mask = filter_mask(*limit, *blimit, */ \ /* p3, p2, p1, p0, q0, q1, q2, q3); */ \ abs_p0q0 = \ _mm_adds_epu8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p0 - q0) * 2 */ \ abs_p1q1 = \ _mm_unpackhi_epi8(abs_p1q1p0q0, abs_p1q1p0q0); /* abs(p1 - q1) */ \ abs_p1q1 = _mm_srli_epi16(abs_p1q1, 9); \ abs_p1q1 = _mm_packs_epi16(abs_p1q1, abs_p1q1); /* abs(p1 - q1) / 2 */ \ /* abs(p0 - q0) * 2 + abs(p1 - q1) / 2 */ \ mask = _mm_adds_epu8(abs_p0q0, abs_p1q1); \ /* abs(p3 - p2), abs(p2 - p1) */ \ work = abs_diff(p3p2, p2p1); \ flat = _mm_max_epu8(work, flat); \ /* abs(q3 - q2), abs(q2 - q1) */ \ work = abs_diff(q3q2, q2q1); \ flat = _mm_max_epu8(work, flat); \ flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); \ mask = _mm_unpacklo_epi64(mask, flat); \ mask = _mm_subs_epu8(mask, limit_v); \ mask = _mm_cmpeq_epi8(mask, zero); \ mask = _mm_and_si128(mask, _mm_srli_si128(mask, 8)); \ } while (0) #define FILTER4 \ do { \ const __m128i t3t4 = \ _mm_set_epi8(3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4); \ const __m128i t80 = _mm_set1_epi8((int8_t)0x80); \ __m128i filter, filter2filter1, work; \ \ ps1ps0 = _mm_xor_si128(p1p0, t80); /* ^ 0x80 */ \ qs1qs0 = _mm_xor_si128(q1q0, t80); \ \ /* int8_t filter = signed_char_clamp(ps1 - qs1) & hev; */ \ work = _mm_subs_epi8(ps1ps0, qs1qs0); \ filter = _mm_and_si128(_mm_srli_si128(work, 8), hev); \ /* filter = signed_char_clamp(filter + 3 * (qs0 - ps0)) & mask; */ \ filter = _mm_subs_epi8(filter, work); \ filter = _mm_subs_epi8(filter, work); \ filter = _mm_subs_epi8(filter, work); /* + 3 * (qs0 - ps0) */ \ filter = _mm_and_si128(filter, mask); /* & mask */ \ filter = _mm_unpacklo_epi64(filter, filter); \ \ /* filter1 = signed_char_clamp(filter + 4) >> 3; */ \ /* filter2 = signed_char_clamp(filter + 3) >> 3; */ \ filter2filter1 = _mm_adds_epi8(filter, t3t4); /* signed_char_clamp */ \ filter = _mm_unpackhi_epi8(filter2filter1, filter2filter1); \ filter2filter1 = _mm_unpacklo_epi8(filter2filter1, filter2filter1); \ filter2filter1 = _mm_srai_epi16(filter2filter1, 11); /* >> 3 */ \ filter = _mm_srai_epi16(filter, 11); /* >> 3 */ \ filter2filter1 = _mm_packs_epi16(filter2filter1, filter); \ \ /* filter = ROUND_POWER_OF_TWO(filter1, 1) & ~hev; */ \ filter = _mm_subs_epi8(filter2filter1, ff); /* + 1 */ \ filter = _mm_unpacklo_epi8(filter, filter); \ filter = _mm_srai_epi16(filter, 9); /* round */ \ filter = _mm_packs_epi16(filter, filter); \ filter = _mm_andnot_si128(hev, filter); \ \ hev = _mm_unpackhi_epi64(filter2filter1, filter); \ filter2filter1 = _mm_unpacklo_epi64(filter2filter1, filter); \ \ /* signed_char_clamp(qs1 - filter), signed_char_clamp(qs0 - filter1) */ \ qs1qs0 = _mm_subs_epi8(qs1qs0, filter2filter1); \ /* signed_char_clamp(ps1 + filter), signed_char_clamp(ps0 + filter2) */ \ ps1ps0 = _mm_adds_epi8(ps1ps0, hev); \ qs1qs0 = _mm_xor_si128(qs1qs0, t80); /* ^ 0x80 */ \ ps1ps0 = _mm_xor_si128(ps1ps0, t80); /* ^ 0x80 */ \ } while (0) void vpx_lpf_horizontal_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i limit_v = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)blimit), _mm_loadl_epi64((const __m128i *)limit)); const __m128i thresh_v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)thresh), zero); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; __m128i mask, hev; p3p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * pitch)), _mm_loadl_epi64((__m128i *)(s - 4 * pitch))); q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), _mm_loadl_epi64((__m128i *)(s + 1 * pitch))); q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), _mm_loadl_epi64((__m128i *)(s + 0 * pitch))); q3q2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s + 2 * pitch)), _mm_loadl_epi64((__m128i *)(s + 3 * pitch))); p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); q1q0 = _mm_unpackhi_epi64(q0p0, q1p1); q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); FILTER_HEV_MASK; FILTER4; _mm_storeh_pi((__m64 *)(s - 2 * pitch), _mm_castsi128_ps(ps1ps0)); // *op1 _mm_storel_epi64((__m128i *)(s - 1 * pitch), ps1ps0); // *op0 _mm_storel_epi64((__m128i *)(s + 0 * pitch), qs1qs0); // *oq0 _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(qs1qs0)); // *oq1 } void vpx_lpf_vertical_4_sse2(uint8_t *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i limit_v = _mm_unpacklo_epi64(_mm_loadl_epi64((const __m128i *)blimit), _mm_loadl_epi64((const __m128i *)limit)); const __m128i thresh_v = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)thresh), zero); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i x0, x1, x2, x3; __m128i q1p1, q0p0, p3p2, p2p1, p1p0, q3q2, q2q1, q1q0, ps1ps0, qs1qs0; __m128i mask, hev; // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 q1q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 0 * pitch - 4)), _mm_loadl_epi64((__m128i *)(s + 1 * pitch - 4))); // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 x1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 2 * pitch - 4)), _mm_loadl_epi64((__m128i *)(s + 3 * pitch - 4))); // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 x2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 4 * pitch - 4)), _mm_loadl_epi64((__m128i *)(s + 5 * pitch - 4))); // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 x3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(s + 6 * pitch - 4)), _mm_loadl_epi64((__m128i *)(s + 7 * pitch - 4))); // Transpose 8x8 // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 p1p0 = _mm_unpacklo_epi16(q1q0, x1); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 x0 = _mm_unpacklo_epi16(x2, x3); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 p3p2 = _mm_unpacklo_epi32(p1p0, x0); // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 p1p0 = _mm_unpackhi_epi32(p1p0, x0); p3p2 = _mm_unpackhi_epi64(p3p2, _mm_slli_si128(p3p2, 8)); // swap lo and high p1p0 = _mm_unpackhi_epi64(p1p0, _mm_slli_si128(p1p0, 8)); // swap lo and high // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 q1q0 = _mm_unpackhi_epi16(q1q0, x1); // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 x2 = _mm_unpackhi_epi16(x2, x3); // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 q3q2 = _mm_unpackhi_epi32(q1q0, x2); // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 q1q0 = _mm_unpacklo_epi32(q1q0, x2); q0p0 = _mm_unpacklo_epi64(p1p0, q1q0); q1p1 = _mm_unpackhi_epi64(p1p0, q1q0); p1p0 = _mm_unpacklo_epi64(q0p0, q1p1); p2p1 = _mm_unpacklo_epi64(q1p1, p3p2); q2q1 = _mm_unpacklo_epi64(_mm_srli_si128(q1p1, 8), q3q2); FILTER_HEV_MASK; FILTER4; // Transpose 8x4 to 4x8 // qs1qs0: 20 21 22 23 24 25 26 27 30 31 32 33 34 34 36 37 // ps1ps0: 10 11 12 13 14 15 16 17 00 01 02 03 04 05 06 07 // 00 01 02 03 04 05 06 07 10 11 12 13 14 15 16 17 ps1ps0 = _mm_unpackhi_epi64(ps1ps0, _mm_slli_si128(ps1ps0, 8)); // 10 30 11 31 12 32 13 33 14 34 15 35 16 36 17 37 x0 = _mm_unpackhi_epi8(ps1ps0, qs1qs0); // 00 20 01 21 02 22 03 23 04 24 05 25 06 26 07 27 ps1ps0 = _mm_unpacklo_epi8(ps1ps0, qs1qs0); // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 qs1qs0 = _mm_unpackhi_epi8(ps1ps0, x0); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 ps1ps0 = _mm_unpacklo_epi8(ps1ps0, x0); storeu_uint32(s + 0 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); storeu_uint32(s + 1 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); storeu_uint32(s + 2 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); ps1ps0 = _mm_srli_si128(ps1ps0, 4); storeu_uint32(s + 3 * pitch - 2, _mm_cvtsi128_si32(ps1ps0)); storeu_uint32(s + 4 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); storeu_uint32(s + 5 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); storeu_uint32(s + 6 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); qs1qs0 = _mm_srli_si128(qs1qs0, 4); storeu_uint32(s + 7 * pitch - 2, _mm_cvtsi128_si32(qs1qs0)); } void vpx_lpf_horizontal_16_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); const __m128i limit_v = _mm_load_si128((const __m128i *)limit); const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat, flat2; __m128i q7p7, q6p6, q5p5, q4p4, q3p3, q2p2, q1p1, q0p0, p0q0, p1q1; __m128i abs_p1p0; q4p4 = _mm_loadl_epi64((__m128i *)(s - 5 * pitch)); q4p4 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q4p4), (__m64 *)(s + 4 * pitch))); q3p3 = _mm_loadl_epi64((__m128i *)(s - 4 * pitch)); q3p3 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q3p3), (__m64 *)(s + 3 * pitch))); q2p2 = _mm_loadl_epi64((__m128i *)(s - 3 * pitch)); q2p2 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q2p2), (__m64 *)(s + 2 * pitch))); q1p1 = _mm_loadl_epi64((__m128i *)(s - 2 * pitch)); q1p1 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q1p1), (__m64 *)(s + 1 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); q0p0 = _mm_loadl_epi64((__m128i *)(s - 1 * pitch)); q0p0 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q0p0), (__m64 *)(s - 0 * pitch))); p0q0 = _mm_shuffle_epi32(q0p0, 78); { __m128i abs_p1q1, abs_p0q0, abs_q1q0, fe, ff, work; abs_p1p0 = abs_diff(q1p1, q0p0); abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); fe = _mm_set1_epi8((int8_t)0xfe); ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); abs_p0q0 = abs_diff(q0p0, p0q0); abs_p1q1 = abs_diff(q1p1, p1q1); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } // lp filter { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi16(0x1); __m128i qs1ps1 = _mm_xor_si128(q1p1, t80); __m128i qs0ps0 = _mm_xor_si128(q0p0, t80); __m128i qs0 = _mm_xor_si128(p0q0, t80); __m128i qs1 = _mm_xor_si128(p1q1, t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; __m128i flat2_q6p6, flat2_q5p5, flat2_q4p4, flat2_q3p3, flat2_q2p2; __m128i flat2_q1p1, flat2_q0p0, flat_q2p2, flat_q1p1, flat_q0p0; filt = _mm_and_si128(_mm_subs_epi8(qs1ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, qs0ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); filter1 = _mm_unpacklo_epi8(zero, filter1); filter1 = _mm_srai_epi16(filter1, 0xB); filter2 = _mm_unpacklo_epi8(zero, filter2); filter2 = _mm_srai_epi16(filter2, 0xB); // Filter1 >> 3 filt = _mm_packs_epi16(filter2, _mm_subs_epi16(zero, filter1)); qs0ps0 = _mm_xor_si128(_mm_adds_epi8(qs0ps0, filt), t80); // filt >> 1 filt = _mm_adds_epi16(filter1, t1); filt = _mm_srai_epi16(filt, 1); filt = _mm_andnot_si128(_mm_srai_epi16(_mm_unpacklo_epi8(zero, hev), 0x8), filt); filt = _mm_packs_epi16(filt, _mm_subs_epi16(zero, filt)); qs1ps1 = _mm_xor_si128(_mm_adds_epi8(qs1ps1, filt), t80); // loopfilter done { __m128i work; flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); flat = _mm_max_epu8(abs_p1p0, flat); flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); q5p5 = _mm_loadl_epi64((__m128i *)(s - 6 * pitch)); q5p5 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q5p5), (__m64 *)(s + 5 * pitch))); q6p6 = _mm_loadl_epi64((__m128i *)(s - 7 * pitch)); q6p6 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q6p6), (__m64 *)(s + 6 * pitch))); flat2 = _mm_max_epu8(abs_diff(q4p4, q0p0), abs_diff(q5p5, q0p0)); q7p7 = _mm_loadl_epi64((__m128i *)(s - 8 * pitch)); q7p7 = _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(q7p7), (__m64 *)(s + 7 * pitch))); work = _mm_max_epu8(abs_diff(q6p6, q0p0), abs_diff(q7p7, q0p0)); flat2 = _mm_max_epu8(work, flat2); flat2 = _mm_max_epu8(flat2, _mm_srli_si128(flat2, 8)); flat2 = _mm_subs_epu8(flat2, one); flat2 = _mm_cmpeq_epi8(flat2, zero); flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // flat and wide flat calculations { const __m128i eight = _mm_set1_epi16(8); const __m128i four = _mm_set1_epi16(4); __m128i p7_16, p6_16, p5_16, p4_16, p3_16, p2_16, p1_16, p0_16; __m128i q7_16, q6_16, q5_16, q4_16, q3_16, q2_16, q1_16, q0_16; __m128i pixelFilter_p, pixelFilter_q; __m128i pixetFilter_p2p1p0, pixetFilter_q2q1q0; __m128i sum_p7, sum_q7, sum_p3, sum_q3, res_p, res_q; p7_16 = _mm_unpacklo_epi8(q7p7, zero); p6_16 = _mm_unpacklo_epi8(q6p6, zero); p5_16 = _mm_unpacklo_epi8(q5p5, zero); p4_16 = _mm_unpacklo_epi8(q4p4, zero); p3_16 = _mm_unpacklo_epi8(q3p3, zero); p2_16 = _mm_unpacklo_epi8(q2p2, zero); p1_16 = _mm_unpacklo_epi8(q1p1, zero); p0_16 = _mm_unpacklo_epi8(q0p0, zero); q0_16 = _mm_unpackhi_epi8(q0p0, zero); q1_16 = _mm_unpackhi_epi8(q1p1, zero); q2_16 = _mm_unpackhi_epi8(q2p2, zero); q3_16 = _mm_unpackhi_epi8(q3p3, zero); q4_16 = _mm_unpackhi_epi8(q4p4, zero); q5_16 = _mm_unpackhi_epi8(q5p5, zero); q6_16 = _mm_unpackhi_epi8(q6p6, zero); q7_16 = _mm_unpackhi_epi8(q7p7, zero); pixelFilter_p = _mm_add_epi16(_mm_add_epi16(p6_16, p5_16), _mm_add_epi16(p4_16, p3_16)); pixelFilter_q = _mm_add_epi16(_mm_add_epi16(q6_16, q5_16), _mm_add_epi16(q4_16, q3_16)); pixetFilter_p2p1p0 = _mm_add_epi16(p0_16, _mm_add_epi16(p2_16, p1_16)); pixelFilter_p = _mm_add_epi16(pixelFilter_p, pixetFilter_p2p1p0); pixetFilter_q2q1q0 = _mm_add_epi16(q0_16, _mm_add_epi16(q2_16, q1_16)); pixelFilter_q = _mm_add_epi16(pixelFilter_q, pixetFilter_q2q1q0); pixelFilter_p = _mm_add_epi16(eight, _mm_add_epi16(pixelFilter_p, pixelFilter_q)); pixetFilter_p2p1p0 = _mm_add_epi16( four, _mm_add_epi16(pixetFilter_p2p1p0, pixetFilter_q2q1q0)); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(p7_16, p0_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(q7_16, q0_16)), 4); flat2_q0p0 = _mm_packus_epi16(res_p, res_q); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(p3_16, p0_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(q3_16, q0_16)), 3); flat_q0p0 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(p7_16, p7_16); sum_q7 = _mm_add_epi16(q7_16, q7_16); sum_p3 = _mm_add_epi16(p3_16, p3_16); sum_q3 = _mm_add_epi16(q3_16, q3_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_p, p6_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q6_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p1_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q1_16)), 4); flat2_q1p1 = _mm_packus_epi16(res_p, res_q); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_p2p1p0, p2_16); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q2_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p1_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q1_16)), 3); flat_q1p1 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); sum_p3 = _mm_add_epi16(sum_p3, p3_16); sum_q3 = _mm_add_epi16(sum_q3, q3_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q5_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p5_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p2_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q2_16)), 4); flat2_q2p2 = _mm_packus_epi16(res_p, res_q); pixetFilter_p2p1p0 = _mm_sub_epi16(pixetFilter_p2p1p0, q1_16); pixetFilter_q2q1q0 = _mm_sub_epi16(pixetFilter_q2q1q0, p1_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixetFilter_p2p1p0, _mm_add_epi16(sum_p3, p2_16)), 3); res_q = _mm_srli_epi16( _mm_add_epi16(pixetFilter_q2q1q0, _mm_add_epi16(sum_q3, q2_16)), 3); flat_q2p2 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q4_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p4_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p3_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q3_16)), 4); flat2_q3p3 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q3_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p3_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p4_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q4_16)), 4); flat2_q4p4 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q2_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p2_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p5_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q5_16)), 4); flat2_q5p5 = _mm_packus_epi16(res_p, res_q); sum_p7 = _mm_add_epi16(sum_p7, p7_16); sum_q7 = _mm_add_epi16(sum_q7, q7_16); pixelFilter_p = _mm_sub_epi16(pixelFilter_p, q1_16); pixelFilter_q = _mm_sub_epi16(pixelFilter_q, p1_16); res_p = _mm_srli_epi16( _mm_add_epi16(pixelFilter_p, _mm_add_epi16(sum_p7, p6_16)), 4); res_q = _mm_srli_epi16( _mm_add_epi16(pixelFilter_q, _mm_add_epi16(sum_q7, q6_16)), 4); flat2_q6p6 = _mm_packus_epi16(res_p, res_q); } // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ flat = _mm_shuffle_epi32(flat, 68); flat2 = _mm_shuffle_epi32(flat2, 68); q2p2 = _mm_andnot_si128(flat, q2p2); flat_q2p2 = _mm_and_si128(flat, flat_q2p2); q2p2 = _mm_or_si128(q2p2, flat_q2p2); qs1ps1 = _mm_andnot_si128(flat, qs1ps1); flat_q1p1 = _mm_and_si128(flat, flat_q1p1); q1p1 = _mm_or_si128(qs1ps1, flat_q1p1); qs0ps0 = _mm_andnot_si128(flat, qs0ps0); flat_q0p0 = _mm_and_si128(flat, flat_q0p0); q0p0 = _mm_or_si128(qs0ps0, flat_q0p0); q6p6 = _mm_andnot_si128(flat2, q6p6); flat2_q6p6 = _mm_and_si128(flat2, flat2_q6p6); q6p6 = _mm_or_si128(q6p6, flat2_q6p6); _mm_storel_epi64((__m128i *)(s - 7 * pitch), q6p6); _mm_storeh_pi((__m64 *)(s + 6 * pitch), _mm_castsi128_ps(q6p6)); q5p5 = _mm_andnot_si128(flat2, q5p5); flat2_q5p5 = _mm_and_si128(flat2, flat2_q5p5); q5p5 = _mm_or_si128(q5p5, flat2_q5p5); _mm_storel_epi64((__m128i *)(s - 6 * pitch), q5p5); _mm_storeh_pi((__m64 *)(s + 5 * pitch), _mm_castsi128_ps(q5p5)); q4p4 = _mm_andnot_si128(flat2, q4p4); flat2_q4p4 = _mm_and_si128(flat2, flat2_q4p4); q4p4 = _mm_or_si128(q4p4, flat2_q4p4); _mm_storel_epi64((__m128i *)(s - 5 * pitch), q4p4); _mm_storeh_pi((__m64 *)(s + 4 * pitch), _mm_castsi128_ps(q4p4)); q3p3 = _mm_andnot_si128(flat2, q3p3); flat2_q3p3 = _mm_and_si128(flat2, flat2_q3p3); q3p3 = _mm_or_si128(q3p3, flat2_q3p3); _mm_storel_epi64((__m128i *)(s - 4 * pitch), q3p3); _mm_storeh_pi((__m64 *)(s + 3 * pitch), _mm_castsi128_ps(q3p3)); q2p2 = _mm_andnot_si128(flat2, q2p2); flat2_q2p2 = _mm_and_si128(flat2, flat2_q2p2); q2p2 = _mm_or_si128(q2p2, flat2_q2p2); _mm_storel_epi64((__m128i *)(s - 3 * pitch), q2p2); _mm_storeh_pi((__m64 *)(s + 2 * pitch), _mm_castsi128_ps(q2p2)); q1p1 = _mm_andnot_si128(flat2, q1p1); flat2_q1p1 = _mm_and_si128(flat2, flat2_q1p1); q1p1 = _mm_or_si128(q1p1, flat2_q1p1); _mm_storel_epi64((__m128i *)(s - 2 * pitch), q1p1); _mm_storeh_pi((__m64 *)(s + 1 * pitch), _mm_castsi128_ps(q1p1)); q0p0 = _mm_andnot_si128(flat2, q0p0); flat2_q0p0 = _mm_and_si128(flat2, flat2_q0p0); q0p0 = _mm_or_si128(q0p0, flat2_q0p0); _mm_storel_epi64((__m128i *)(s - 1 * pitch), q0p0); _mm_storeh_pi((__m64 *)(s - 0 * pitch), _mm_castsi128_ps(q0p0)); } } static INLINE __m128i filter_add2_sub2(const __m128i *const total, const __m128i *const a1, const __m128i *const a2, const __m128i *const s1, const __m128i *const s2) { __m128i x = _mm_add_epi16(*a1, *total); x = _mm_add_epi16(_mm_sub_epi16(x, _mm_add_epi16(*s1, *s2)), *a2); return x; } static INLINE __m128i filter8_mask(const __m128i *const flat, const __m128i *const other_filt, const __m128i *const f8_lo, const __m128i *const f8_hi) { const __m128i f8 = _mm_packus_epi16(_mm_srli_epi16(*f8_lo, 3), _mm_srli_epi16(*f8_hi, 3)); const __m128i result = _mm_and_si128(*flat, f8); return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); } static INLINE __m128i filter16_mask(const __m128i *const flat, const __m128i *const other_filt, const __m128i *const f_lo, const __m128i *const f_hi) { const __m128i f = _mm_packus_epi16(_mm_srli_epi16(*f_lo, 4), _mm_srli_epi16(*f_hi, 4)); const __m128i result = _mm_and_si128(*flat, f); return _mm_or_si128(_mm_andnot_si128(*flat, *other_filt), result); } void vpx_lpf_horizontal_16_dual_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { const __m128i zero = _mm_set1_epi16(0); const __m128i one = _mm_set1_epi8(1); const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); const __m128i limit_v = _mm_load_si128((const __m128i *)limit); const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat, flat2; __m128i p7, p6, p5; __m128i p4, p3, p2, p1, p0, q0, q1, q2, q3, q4; __m128i q5, q6, q7; __m128i op2, op1, op0, oq0, oq1, oq2; __m128i max_abs_p1p0q1q0; p7 = _mm_loadu_si128((__m128i *)(s - 8 * pitch)); p6 = _mm_loadu_si128((__m128i *)(s - 7 * pitch)); p5 = _mm_loadu_si128((__m128i *)(s - 6 * pitch)); p4 = _mm_loadu_si128((__m128i *)(s - 5 * pitch)); p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); q4 = _mm_loadu_si128((__m128i *)(s + 4 * pitch)); q5 = _mm_loadu_si128((__m128i *)(s + 5 * pitch)); q6 = _mm_loadu_si128((__m128i *)(s + 6 * pitch)); q7 = _mm_loadu_si128((__m128i *)(s + 7 * pitch)); { const __m128i abs_p1p0 = abs_diff(p1, p0); const __m128i abs_q1q0 = abs_diff(q1, q0); const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(zero, zero); __m128i abs_p0q0 = abs_diff(p0, q0); __m128i abs_p1q1 = abs_diff(p1, q1); __m128i work; max_abs_p1p0q1q0 = _mm_max_epu8(abs_p1p0, abs_q1q0); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(max_abs_p1p0q1q0, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8(abs_diff(p2, p1), abs_diff(p3, p2)); mask = _mm_max_epu8(work, mask); work = _mm_max_epu8(abs_diff(q2, q1), abs_diff(q3, q2)); mask = _mm_max_epu8(work, mask); mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); } { __m128i work; work = _mm_max_epu8(abs_diff(p2, p0), abs_diff(q2, q0)); flat = _mm_max_epu8(work, max_abs_p1p0q1q0); work = _mm_max_epu8(abs_diff(p3, p0), abs_diff(q3, q0)); flat = _mm_max_epu8(work, flat); work = _mm_max_epu8(abs_diff(p4, p0), abs_diff(q4, q0)); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); flat2 = _mm_max_epu8(abs_diff(p5, p0), abs_diff(q5, q0)); flat2 = _mm_max_epu8(work, flat2); work = _mm_max_epu8(abs_diff(p6, p0), abs_diff(q6, q0)); flat2 = _mm_max_epu8(work, flat2); work = _mm_max_epu8(abs_diff(p7, p0), abs_diff(q7, q0)); flat2 = _mm_max_epu8(work, flat2); flat2 = _mm_subs_epu8(flat2, one); flat2 = _mm_cmpeq_epi8(flat2, zero); flat2 = _mm_and_si128(flat2, flat); // flat2 & flat & mask } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // filter4 { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); const __m128i ff = _mm_cmpeq_epi8(t4, t4); __m128i filt; __m128i work_a; __m128i filter1, filter2; op1 = _mm_xor_si128(p1, t80); op0 = _mm_xor_si128(p0, t80); oq0 = _mm_xor_si128(q0, t80); oq1 = _mm_xor_si128(q1, t80); hev = _mm_subs_epu8(max_abs_p1p0q1q0, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); filt = _mm_and_si128(_mm_subs_epi8(op1, oq1), hev); work_a = _mm_subs_epi8(oq0, op0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); oq0 = _mm_xor_si128(_mm_subs_epi8(oq0, filter1), t80); // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); op0 = _mm_xor_si128(_mm_adds_epi8(op0, filter2), t80); // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); work_a = _mm_and_si128(work_a, t80); filt = _mm_and_si128(filt, t7f); filt = _mm_or_si128(filt, work_a); filt = _mm_andnot_si128(hev, filt); op1 = _mm_xor_si128(_mm_adds_epi8(op1, filt), t80); oq1 = _mm_xor_si128(_mm_subs_epi8(oq1, filt), t80); // loopfilter done // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // filter8 { const __m128i four = _mm_set1_epi16(4); const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); __m128i f8_lo, f8_hi; f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, four), _mm_add_epi16(p3_lo, p2_lo)); f8_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f8_lo), _mm_add_epi16(p2_lo, p1_lo)); f8_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f8_lo); f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, four), _mm_add_epi16(p3_hi, p2_hi)); f8_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f8_hi), _mm_add_epi16(p2_hi, p1_hi)); f8_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f8_hi); op2 = filter8_mask(&flat, &p2, &f8_lo, &f8_hi); f8_lo = filter_add2_sub2(&f8_lo, &q1_lo, &p1_lo, &p2_lo, &p3_lo); f8_hi = filter_add2_sub2(&f8_hi, &q1_hi, &p1_hi, &p2_hi, &p3_hi); op1 = filter8_mask(&flat, &op1, &f8_lo, &f8_hi); f8_lo = filter_add2_sub2(&f8_lo, &q2_lo, &p0_lo, &p1_lo, &p3_lo); f8_hi = filter_add2_sub2(&f8_hi, &q2_hi, &p0_hi, &p1_hi, &p3_hi); op0 = filter8_mask(&flat, &op0, &f8_lo, &f8_hi); f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q0_lo, &p0_lo, &p3_lo); f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q0_hi, &p0_hi, &p3_hi); oq0 = filter8_mask(&flat, &oq0, &f8_lo, &f8_hi); f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q1_lo, &q0_lo, &p2_lo); f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q1_hi, &q0_hi, &p2_hi); oq1 = filter8_mask(&flat, &oq1, &f8_lo, &f8_hi); f8_lo = filter_add2_sub2(&f8_lo, &q3_lo, &q2_lo, &q1_lo, &p1_lo); f8_hi = filter_add2_sub2(&f8_hi, &q3_hi, &q2_hi, &q1_hi, &p1_hi); oq2 = filter8_mask(&flat, &q2, &f8_lo, &f8_hi); } // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ // wide flat calculations { const __m128i eight = _mm_set1_epi16(8); const __m128i p7_lo = _mm_unpacklo_epi8(p7, zero); const __m128i p6_lo = _mm_unpacklo_epi8(p6, zero); const __m128i p5_lo = _mm_unpacklo_epi8(p5, zero); const __m128i p4_lo = _mm_unpacklo_epi8(p4, zero); const __m128i p3_lo = _mm_unpacklo_epi8(p3, zero); const __m128i p2_lo = _mm_unpacklo_epi8(p2, zero); const __m128i p1_lo = _mm_unpacklo_epi8(p1, zero); const __m128i p0_lo = _mm_unpacklo_epi8(p0, zero); const __m128i q0_lo = _mm_unpacklo_epi8(q0, zero); const __m128i q1_lo = _mm_unpacklo_epi8(q1, zero); const __m128i q2_lo = _mm_unpacklo_epi8(q2, zero); const __m128i q3_lo = _mm_unpacklo_epi8(q3, zero); const __m128i q4_lo = _mm_unpacklo_epi8(q4, zero); const __m128i q5_lo = _mm_unpacklo_epi8(q5, zero); const __m128i q6_lo = _mm_unpacklo_epi8(q6, zero); const __m128i q7_lo = _mm_unpacklo_epi8(q7, zero); const __m128i p7_hi = _mm_unpackhi_epi8(p7, zero); const __m128i p6_hi = _mm_unpackhi_epi8(p6, zero); const __m128i p5_hi = _mm_unpackhi_epi8(p5, zero); const __m128i p4_hi = _mm_unpackhi_epi8(p4, zero); const __m128i p3_hi = _mm_unpackhi_epi8(p3, zero); const __m128i p2_hi = _mm_unpackhi_epi8(p2, zero); const __m128i p1_hi = _mm_unpackhi_epi8(p1, zero); const __m128i p0_hi = _mm_unpackhi_epi8(p0, zero); const __m128i q0_hi = _mm_unpackhi_epi8(q0, zero); const __m128i q1_hi = _mm_unpackhi_epi8(q1, zero); const __m128i q2_hi = _mm_unpackhi_epi8(q2, zero); const __m128i q3_hi = _mm_unpackhi_epi8(q3, zero); const __m128i q4_hi = _mm_unpackhi_epi8(q4, zero); const __m128i q5_hi = _mm_unpackhi_epi8(q5, zero); const __m128i q6_hi = _mm_unpackhi_epi8(q6, zero); const __m128i q7_hi = _mm_unpackhi_epi8(q7, zero); __m128i f_lo; __m128i f_hi; f_lo = _mm_sub_epi16(_mm_slli_epi16(p7_lo, 3), p7_lo); // p7 * 7 f_lo = _mm_add_epi16(_mm_slli_epi16(p6_lo, 1), _mm_add_epi16(p4_lo, f_lo)); f_lo = _mm_add_epi16(_mm_add_epi16(p3_lo, f_lo), _mm_add_epi16(p2_lo, p1_lo)); f_lo = _mm_add_epi16(_mm_add_epi16(p0_lo, q0_lo), f_lo); f_lo = _mm_add_epi16(_mm_add_epi16(p5_lo, eight), f_lo); f_hi = _mm_sub_epi16(_mm_slli_epi16(p7_hi, 3), p7_hi); // p7 * 7 f_hi = _mm_add_epi16(_mm_slli_epi16(p6_hi, 1), _mm_add_epi16(p4_hi, f_hi)); f_hi = _mm_add_epi16(_mm_add_epi16(p3_hi, f_hi), _mm_add_epi16(p2_hi, p1_hi)); f_hi = _mm_add_epi16(_mm_add_epi16(p0_hi, q0_hi), f_hi); f_hi = _mm_add_epi16(_mm_add_epi16(p5_hi, eight), f_hi); p6 = filter16_mask(&flat2, &p6, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 7 * pitch), p6); f_lo = filter_add2_sub2(&f_lo, &q1_lo, &p5_lo, &p6_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q1_hi, &p5_hi, &p6_hi, &p7_hi); p5 = filter16_mask(&flat2, &p5, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 6 * pitch), p5); f_lo = filter_add2_sub2(&f_lo, &q2_lo, &p4_lo, &p5_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q2_hi, &p4_hi, &p5_hi, &p7_hi); p4 = filter16_mask(&flat2, &p4, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 5 * pitch), p4); f_lo = filter_add2_sub2(&f_lo, &q3_lo, &p3_lo, &p4_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q3_hi, &p3_hi, &p4_hi, &p7_hi); p3 = filter16_mask(&flat2, &p3, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 4 * pitch), p3); f_lo = filter_add2_sub2(&f_lo, &q4_lo, &p2_lo, &p3_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q4_hi, &p2_hi, &p3_hi, &p7_hi); op2 = filter16_mask(&flat2, &op2, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 3 * pitch), op2); f_lo = filter_add2_sub2(&f_lo, &q5_lo, &p1_lo, &p2_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q5_hi, &p1_hi, &p2_hi, &p7_hi); op1 = filter16_mask(&flat2, &op1, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 2 * pitch), op1); f_lo = filter_add2_sub2(&f_lo, &q6_lo, &p0_lo, &p1_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q6_hi, &p0_hi, &p1_hi, &p7_hi); op0 = filter16_mask(&flat2, &op0, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 1 * pitch), op0); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q0_lo, &p0_lo, &p7_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q0_hi, &p0_hi, &p7_hi); oq0 = filter16_mask(&flat2, &oq0, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s - 0 * pitch), oq0); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q1_lo, &p6_lo, &q0_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q1_hi, &p6_hi, &q0_hi); oq1 = filter16_mask(&flat2, &oq1, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 1 * pitch), oq1); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q2_lo, &p5_lo, &q1_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q2_hi, &p5_hi, &q1_hi); oq2 = filter16_mask(&flat2, &oq2, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 2 * pitch), oq2); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q3_lo, &p4_lo, &q2_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q3_hi, &p4_hi, &q2_hi); q3 = filter16_mask(&flat2, &q3, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 3 * pitch), q3); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q4_lo, &p3_lo, &q3_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q4_hi, &p3_hi, &q3_hi); q4 = filter16_mask(&flat2, &q4, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 4 * pitch), q4); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q5_lo, &p2_lo, &q4_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q5_hi, &p2_hi, &q4_hi); q5 = filter16_mask(&flat2, &q5, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 5 * pitch), q5); f_lo = filter_add2_sub2(&f_lo, &q7_lo, &q6_lo, &p1_lo, &q5_lo); f_hi = filter_add2_sub2(&f_hi, &q7_hi, &q6_hi, &p1_hi, &q5_hi); q6 = filter16_mask(&flat2, &q6, &f_lo, &f_hi); _mm_storeu_si128((__m128i *)(s + 6 * pitch), q6); } // wide flat // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ } } void vpx_lpf_horizontal_8_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); const __m128i blimit_v = _mm_load_si128((const __m128i *)blimit); const __m128i limit_v = _mm_load_si128((const __m128i *)limit); const __m128i thresh_v = _mm_load_si128((const __m128i *)thresh); __m128i mask, hev, flat; __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i q3p3, q2p2, q1p1, q0p0, p1q1, p0q0; q3p3 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 4 * pitch)), _mm_loadl_epi64((__m128i *)(s + 3 * pitch))); q2p2 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 3 * pitch)), _mm_loadl_epi64((__m128i *)(s + 2 * pitch))); q1p1 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), _mm_loadl_epi64((__m128i *)(s + 1 * pitch))); q0p0 = _mm_unpacklo_epi64(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), _mm_loadl_epi64((__m128i *)(s - 0 * pitch))); p1q1 = _mm_shuffle_epi32(q1p1, 78); p0q0 = _mm_shuffle_epi32(q0p0, 78); { // filter_mask and hev_mask const __m128i one = _mm_set1_epi8(1); const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(fe, fe); __m128i abs_p1q1, abs_p0q0, abs_q1q0, abs_p1p0, work; abs_p1p0 = abs_diff(q1p1, q0p0); abs_q1q0 = _mm_srli_si128(abs_p1p0, 8); abs_p0q0 = abs_diff(q0p0, p0q0); abs_p1q1 = abs_diff(q1p1, p1q1); flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh_v); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit_v); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(abs_p1p0, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8(abs_diff(q2p2, q1p1), abs_diff(q3p3, q2p2)); mask = _mm_max_epu8(work, mask); mask = _mm_max_epu8(mask, _mm_srli_si128(mask, 8)); mask = _mm_subs_epu8(mask, limit_v); mask = _mm_cmpeq_epi8(mask, zero); // flat_mask4 flat = _mm_max_epu8(abs_diff(q2p2, q0p0), abs_diff(q3p3, q0p0)); flat = _mm_max_epu8(abs_p1p0, flat); flat = _mm_max_epu8(flat, _mm_srli_si128(flat, 8)); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); } { const __m128i four = _mm_set1_epi16(4); unsigned char *src = s; { __m128i workp_a, workp_b, workp_shft; p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * pitch)), zero); p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * pitch)), zero); p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * pitch)), zero); p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * pitch)), zero); q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * pitch)), zero); q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * pitch)), zero); q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * pitch)), zero); q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * pitch)), zero); workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op2[0], _mm_packus_epi16(workp_shft, workp_shft)); workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op1[0], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op0[0], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq0[0], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq1[0], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq2[0], _mm_packus_epi16(workp_shft, workp_shft)); } } // lp filter { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i ps1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = _mm_xor_si128(_mm_loadl_epi64((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); // Filter1 >> 3 filter1 = _mm_unpacklo_epi8(zero, filter1); filter1 = _mm_srai_epi16(filter1, 11); filter1 = _mm_packs_epi16(filter1, filter1); // Filter2 >> 3 filter2 = _mm_unpacklo_epi8(zero, filter2); filter2 = _mm_srai_epi16(filter2, 11); filter2 = _mm_packs_epi16(filter2, zero); // filt >> 1 filt = _mm_adds_epi8(filter1, t1); filt = _mm_unpacklo_epi8(zero, filt); filt = _mm_srai_epi16(filt, 9); filt = _mm_packs_epi16(filt, zero); filt = _mm_andnot_si128(hev, filt); work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); q0 = _mm_loadl_epi64((__m128i *)flat_oq0); work_a = _mm_andnot_si128(flat, work_a); q0 = _mm_and_si128(flat, q0); q0 = _mm_or_si128(work_a, q0); work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); q1 = _mm_loadl_epi64((__m128i *)flat_oq1); work_a = _mm_andnot_si128(flat, work_a); q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_loadl_epi64((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); q2 = _mm_or_si128(work_a, q2); work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); p0 = _mm_loadl_epi64((__m128i *)flat_op0); work_a = _mm_andnot_si128(flat, work_a); p0 = _mm_and_si128(flat, p0); p0 = _mm_or_si128(work_a, p0); work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); p1 = _mm_loadl_epi64((__m128i *)flat_op1); work_a = _mm_andnot_si128(flat, work_a); p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_loadl_epi64((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); _mm_storel_epi64((__m128i *)(s - 3 * pitch), p2); _mm_storel_epi64((__m128i *)(s - 2 * pitch), p1); _mm_storel_epi64((__m128i *)(s - 1 * pitch), p0); _mm_storel_epi64((__m128i *)(s + 0 * pitch), q0); _mm_storel_epi64((__m128i *)(s + 1 * pitch), q1); _mm_storel_epi64((__m128i *)(s + 2 * pitch), q2); } } void vpx_lpf_horizontal_8_dual_sse2( uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { DECLARE_ALIGNED(16, unsigned char, flat_op2[16]); DECLARE_ALIGNED(16, unsigned char, flat_op1[16]); DECLARE_ALIGNED(16, unsigned char, flat_op0[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq2[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq1[16]); DECLARE_ALIGNED(16, unsigned char, flat_oq0[16]); const __m128i zero = _mm_set1_epi16(0); const __m128i blimit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)blimit0), _mm_load_si128((const __m128i *)blimit1)); const __m128i limit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)limit0), _mm_load_si128((const __m128i *)limit1)); const __m128i thresh = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)thresh0), _mm_load_si128((const __m128i *)thresh1)); __m128i mask, hev, flat; __m128i p3, p2, p1, p0, q0, q1, q2, q3; p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); { const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); const __m128i one = _mm_set1_epi8(1); const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), _mm_subs_epu8(q1, p1)); __m128i work; // filter_mask and hev_mask flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(flat, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); mask = _mm_max_epu8(work, mask); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); mask = _mm_max_epu8(work, mask); mask = _mm_subs_epu8(mask, limit); mask = _mm_cmpeq_epi8(mask, zero); // flat_mask4 work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p2, p0), _mm_subs_epu8(p0, p2)), _mm_or_si128(_mm_subs_epu8(q2, q0), _mm_subs_epu8(q0, q2))); flat = _mm_max_epu8(work, flat); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p3, p0), _mm_subs_epu8(p0, p3)), _mm_or_si128(_mm_subs_epu8(q3, q0), _mm_subs_epu8(q0, q3))); flat = _mm_max_epu8(work, flat); flat = _mm_subs_epu8(flat, one); flat = _mm_cmpeq_epi8(flat, zero); flat = _mm_and_si128(flat, mask); } { const __m128i four = _mm_set1_epi16(4); unsigned char *src = s; int i = 0; do { __m128i workp_a, workp_b, workp_shft; p3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 4 * pitch)), zero); p2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 3 * pitch)), zero); p1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 2 * pitch)), zero); p0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 1 * pitch)), zero); q0 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src - 0 * pitch)), zero); q1 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 1 * pitch)), zero); q2 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 2 * pitch)), zero); q3 = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(src + 3 * pitch)), zero); workp_a = _mm_add_epi16(_mm_add_epi16(p3, p3), _mm_add_epi16(p2, p1)); workp_a = _mm_add_epi16(_mm_add_epi16(workp_a, four), p0); workp_b = _mm_add_epi16(_mm_add_epi16(q0, p2), p3); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op2[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); workp_b = _mm_add_epi16(_mm_add_epi16(q0, q1), p1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op1[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q2); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p1), p0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_op0[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p3), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, p0), q0); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq0[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p2), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q0), q1); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq1[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); workp_a = _mm_add_epi16(_mm_sub_epi16(workp_a, p1), q3); workp_b = _mm_add_epi16(_mm_sub_epi16(workp_b, q1), q2); workp_shft = _mm_srli_epi16(_mm_add_epi16(workp_a, workp_b), 3); _mm_storel_epi64((__m128i *)&flat_oq2[i * 8], _mm_packus_epi16(workp_shft, workp_shft)); src += 8; } while (++i < 2); } // lp filter { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); work_a = _mm_and_si128(work_a, t80); filt = _mm_and_si128(filt, t7f); filt = _mm_or_si128(filt, work_a); filt = _mm_andnot_si128(hev, filt); work_a = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); q0 = _mm_load_si128((__m128i *)flat_oq0); work_a = _mm_andnot_si128(flat, work_a); q0 = _mm_and_si128(flat, q0); q0 = _mm_or_si128(work_a, q0); work_a = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); q1 = _mm_load_si128((__m128i *)flat_oq1); work_a = _mm_andnot_si128(flat, work_a); q1 = _mm_and_si128(flat, q1); q1 = _mm_or_si128(work_a, q1); work_a = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q2 = _mm_load_si128((__m128i *)flat_oq2); work_a = _mm_andnot_si128(flat, work_a); q2 = _mm_and_si128(flat, q2); q2 = _mm_or_si128(work_a, q2); work_a = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); p0 = _mm_load_si128((__m128i *)flat_op0); work_a = _mm_andnot_si128(flat, work_a); p0 = _mm_and_si128(flat, p0); p0 = _mm_or_si128(work_a, p0); work_a = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); p1 = _mm_load_si128((__m128i *)flat_op1); work_a = _mm_andnot_si128(flat, work_a); p1 = _mm_and_si128(flat, p1); p1 = _mm_or_si128(work_a, p1); work_a = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p2 = _mm_load_si128((__m128i *)flat_op2); work_a = _mm_andnot_si128(flat, work_a); p2 = _mm_and_si128(flat, p2); p2 = _mm_or_si128(work_a, p2); _mm_storeu_si128((__m128i *)(s - 3 * pitch), p2); _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); _mm_storeu_si128((__m128i *)(s + 2 * pitch), q2); } } void vpx_lpf_horizontal_4_dual_sse2(unsigned char *s, int pitch, const unsigned char *blimit0, const unsigned char *limit0, const unsigned char *thresh0, const unsigned char *blimit1, const unsigned char *limit1, const unsigned char *thresh1) { const __m128i blimit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)blimit0), _mm_load_si128((const __m128i *)blimit1)); const __m128i limit = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)limit0), _mm_load_si128((const __m128i *)limit1)); const __m128i thresh = _mm_unpacklo_epi64(_mm_load_si128((const __m128i *)thresh0), _mm_load_si128((const __m128i *)thresh1)); const __m128i zero = _mm_set1_epi16(0); __m128i p3, p2, p1, p0, q0, q1, q2, q3; __m128i mask, hev, flat; p3 = _mm_loadu_si128((__m128i *)(s - 4 * pitch)); p2 = _mm_loadu_si128((__m128i *)(s - 3 * pitch)); p1 = _mm_loadu_si128((__m128i *)(s - 2 * pitch)); p0 = _mm_loadu_si128((__m128i *)(s - 1 * pitch)); q0 = _mm_loadu_si128((__m128i *)(s - 0 * pitch)); q1 = _mm_loadu_si128((__m128i *)(s + 1 * pitch)); q2 = _mm_loadu_si128((__m128i *)(s + 2 * pitch)); q3 = _mm_loadu_si128((__m128i *)(s + 3 * pitch)); // filter_mask and hev_mask { const __m128i abs_p1p0 = _mm_or_si128(_mm_subs_epu8(p1, p0), _mm_subs_epu8(p0, p1)); const __m128i abs_q1q0 = _mm_or_si128(_mm_subs_epu8(q1, q0), _mm_subs_epu8(q0, q1)); const __m128i fe = _mm_set1_epi8((int8_t)0xfe); const __m128i ff = _mm_cmpeq_epi8(abs_p1p0, abs_p1p0); __m128i abs_p0q0 = _mm_or_si128(_mm_subs_epu8(p0, q0), _mm_subs_epu8(q0, p0)); __m128i abs_p1q1 = _mm_or_si128(_mm_subs_epu8(p1, q1), _mm_subs_epu8(q1, p1)); __m128i work; flat = _mm_max_epu8(abs_p1p0, abs_q1q0); hev = _mm_subs_epu8(flat, thresh); hev = _mm_xor_si128(_mm_cmpeq_epi8(hev, zero), ff); abs_p0q0 = _mm_adds_epu8(abs_p0q0, abs_p0q0); abs_p1q1 = _mm_srli_epi16(_mm_and_si128(abs_p1q1, fe), 1); mask = _mm_subs_epu8(_mm_adds_epu8(abs_p0q0, abs_p1q1), blimit); mask = _mm_xor_si128(_mm_cmpeq_epi8(mask, zero), ff); // mask |= (abs(p0 - q0) * 2 + abs(p1 - q1) / 2 > blimit) * -1; mask = _mm_max_epu8(flat, mask); // mask |= (abs(p1 - p0) > limit) * -1; // mask |= (abs(q1 - q0) > limit) * -1; work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(p2, p1), _mm_subs_epu8(p1, p2)), _mm_or_si128(_mm_subs_epu8(p3, p2), _mm_subs_epu8(p2, p3))); mask = _mm_max_epu8(work, mask); work = _mm_max_epu8( _mm_or_si128(_mm_subs_epu8(q2, q1), _mm_subs_epu8(q1, q2)), _mm_or_si128(_mm_subs_epu8(q3, q2), _mm_subs_epu8(q2, q3))); mask = _mm_max_epu8(work, mask); mask = _mm_subs_epu8(mask, limit); mask = _mm_cmpeq_epi8(mask, zero); } // filter4 { const __m128i t4 = _mm_set1_epi8(4); const __m128i t3 = _mm_set1_epi8(3); const __m128i t80 = _mm_set1_epi8((int8_t)0x80); const __m128i te0 = _mm_set1_epi8((int8_t)0xe0); const __m128i t1f = _mm_set1_epi8(0x1f); const __m128i t1 = _mm_set1_epi8(0x1); const __m128i t7f = _mm_set1_epi8(0x7f); const __m128i ps1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 2 * pitch)), t80); const __m128i ps0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s - 1 * pitch)), t80); const __m128i qs0 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 0 * pitch)), t80); const __m128i qs1 = _mm_xor_si128(_mm_loadu_si128((__m128i *)(s + 1 * pitch)), t80); __m128i filt; __m128i work_a; __m128i filter1, filter2; filt = _mm_and_si128(_mm_subs_epi8(ps1, qs1), hev); work_a = _mm_subs_epi8(qs0, ps0); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); filt = _mm_adds_epi8(filt, work_a); // (vpx_filter + 3 * (qs0 - ps0)) & mask filt = _mm_and_si128(filt, mask); filter1 = _mm_adds_epi8(filt, t4); filter2 = _mm_adds_epi8(filt, t3); // Filter1 >> 3 work_a = _mm_cmpgt_epi8(zero, filter1); filter1 = _mm_srli_epi16(filter1, 3); work_a = _mm_and_si128(work_a, te0); filter1 = _mm_and_si128(filter1, t1f); filter1 = _mm_or_si128(filter1, work_a); // Filter2 >> 3 work_a = _mm_cmpgt_epi8(zero, filter2); filter2 = _mm_srli_epi16(filter2, 3); work_a = _mm_and_si128(work_a, te0); filter2 = _mm_and_si128(filter2, t1f); filter2 = _mm_or_si128(filter2, work_a); // filt >> 1 filt = _mm_adds_epi8(filter1, t1); work_a = _mm_cmpgt_epi8(zero, filt); filt = _mm_srli_epi16(filt, 1); work_a = _mm_and_si128(work_a, t80); filt = _mm_and_si128(filt, t7f); filt = _mm_or_si128(filt, work_a); filt = _mm_andnot_si128(hev, filt); q0 = _mm_xor_si128(_mm_subs_epi8(qs0, filter1), t80); q1 = _mm_xor_si128(_mm_subs_epi8(qs1, filt), t80); p0 = _mm_xor_si128(_mm_adds_epi8(ps0, filter2), t80); p1 = _mm_xor_si128(_mm_adds_epi8(ps1, filt), t80); _mm_storeu_si128((__m128i *)(s - 2 * pitch), p1); _mm_storeu_si128((__m128i *)(s - 1 * pitch), p0); _mm_storeu_si128((__m128i *)(s + 0 * pitch), q0); _mm_storeu_si128((__m128i *)(s + 1 * pitch), q1); } } static INLINE void transpose8x16(unsigned char *in0, unsigned char *in1, int in_p, unsigned char *out, int out_p) { __m128i x0, x1, x2, x3, x4, x5, x6, x7; __m128i x8, x9, x10, x11, x12, x13, x14, x15; // 2-way interleave w/hoisting of unpacks x0 = _mm_loadl_epi64((__m128i *)in0); // 1 x1 = _mm_loadl_epi64((__m128i *)(in0 + in_p)); // 3 x0 = _mm_unpacklo_epi8(x0, x1); // 1 x2 = _mm_loadl_epi64((__m128i *)(in0 + 2 * in_p)); // 5 x3 = _mm_loadl_epi64((__m128i *)(in0 + 3 * in_p)); // 7 x1 = _mm_unpacklo_epi8(x2, x3); // 2 x4 = _mm_loadl_epi64((__m128i *)(in0 + 4 * in_p)); // 9 x5 = _mm_loadl_epi64((__m128i *)(in0 + 5 * in_p)); // 11 x2 = _mm_unpacklo_epi8(x4, x5); // 3 x6 = _mm_loadl_epi64((__m128i *)(in0 + 6 * in_p)); // 13 x7 = _mm_loadl_epi64((__m128i *)(in0 + 7 * in_p)); // 15 x3 = _mm_unpacklo_epi8(x6, x7); // 4 x4 = _mm_unpacklo_epi16(x0, x1); // 9 x8 = _mm_loadl_epi64((__m128i *)in1); // 2 x9 = _mm_loadl_epi64((__m128i *)(in1 + in_p)); // 4 x8 = _mm_unpacklo_epi8(x8, x9); // 5 x5 = _mm_unpacklo_epi16(x2, x3); // 10 x10 = _mm_loadl_epi64((__m128i *)(in1 + 2 * in_p)); // 6 x11 = _mm_loadl_epi64((__m128i *)(in1 + 3 * in_p)); // 8 x9 = _mm_unpacklo_epi8(x10, x11); // 6 x12 = _mm_loadl_epi64((__m128i *)(in1 + 4 * in_p)); // 10 x13 = _mm_loadl_epi64((__m128i *)(in1 + 5 * in_p)); // 12 x10 = _mm_unpacklo_epi8(x12, x13); // 7 x12 = _mm_unpacklo_epi16(x8, x9); // 11 x14 = _mm_loadl_epi64((__m128i *)(in1 + 6 * in_p)); // 14 x15 = _mm_loadl_epi64((__m128i *)(in1 + 7 * in_p)); // 16 x11 = _mm_unpacklo_epi8(x14, x15); // 8 x13 = _mm_unpacklo_epi16(x10, x11); // 12 x6 = _mm_unpacklo_epi32(x4, x5); // 13 x7 = _mm_unpackhi_epi32(x4, x5); // 14 x14 = _mm_unpacklo_epi32(x12, x13); // 15 x15 = _mm_unpackhi_epi32(x12, x13); // 16 // Store first 4-line result _mm_storeu_si128((__m128i *)out, _mm_unpacklo_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + out_p), _mm_unpackhi_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 2 * out_p), _mm_unpacklo_epi64(x7, x15)); _mm_storeu_si128((__m128i *)(out + 3 * out_p), _mm_unpackhi_epi64(x7, x15)); x4 = _mm_unpackhi_epi16(x0, x1); x5 = _mm_unpackhi_epi16(x2, x3); x12 = _mm_unpackhi_epi16(x8, x9); x13 = _mm_unpackhi_epi16(x10, x11); x6 = _mm_unpacklo_epi32(x4, x5); x7 = _mm_unpackhi_epi32(x4, x5); x14 = _mm_unpacklo_epi32(x12, x13); x15 = _mm_unpackhi_epi32(x12, x13); // Store second 4-line result _mm_storeu_si128((__m128i *)(out + 4 * out_p), _mm_unpacklo_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 5 * out_p), _mm_unpackhi_epi64(x6, x14)); _mm_storeu_si128((__m128i *)(out + 6 * out_p), _mm_unpacklo_epi64(x7, x15)); _mm_storeu_si128((__m128i *)(out + 7 * out_p), _mm_unpackhi_epi64(x7, x15)); } static INLINE void transpose(unsigned char *src[], int in_p, unsigned char *dst[], int out_p, int num_8x8_to_transpose) { int idx8x8 = 0; __m128i x0, x1, x2, x3, x4, x5, x6, x7; do { unsigned char *in = src[idx8x8]; unsigned char *out = dst[idx8x8]; x0 = _mm_loadl_epi64((__m128i *)(in + 0 * in_p)); // 00 01 02 03 04 05 06 07 x1 = _mm_loadl_epi64((__m128i *)(in + 1 * in_p)); // 10 11 12 13 14 15 16 17 // 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 x0 = _mm_unpacklo_epi8(x0, x1); x2 = _mm_loadl_epi64((__m128i *)(in + 2 * in_p)); // 20 21 22 23 24 25 26 27 x3 = _mm_loadl_epi64((__m128i *)(in + 3 * in_p)); // 30 31 32 33 34 35 36 37 // 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 x1 = _mm_unpacklo_epi8(x2, x3); x4 = _mm_loadl_epi64((__m128i *)(in + 4 * in_p)); // 40 41 42 43 44 45 46 47 x5 = _mm_loadl_epi64((__m128i *)(in + 5 * in_p)); // 50 51 52 53 54 55 56 57 // 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 x2 = _mm_unpacklo_epi8(x4, x5); x6 = _mm_loadl_epi64((__m128i *)(in + 6 * in_p)); // 60 61 62 63 64 65 66 67 x7 = _mm_loadl_epi64((__m128i *)(in + 7 * in_p)); // 70 71 72 73 74 75 76 77 // 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 x3 = _mm_unpacklo_epi8(x6, x7); // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 x4 = _mm_unpacklo_epi16(x0, x1); // 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 x5 = _mm_unpacklo_epi16(x2, x3); // 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 x6 = _mm_unpacklo_epi32(x4, x5); mm_storelu(out + 0 * out_p, x6); // 00 10 20 30 40 50 60 70 mm_storehu(out + 1 * out_p, x6); // 01 11 21 31 41 51 61 71 // 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 x7 = _mm_unpackhi_epi32(x4, x5); mm_storelu(out + 2 * out_p, x7); // 02 12 22 32 42 52 62 72 mm_storehu(out + 3 * out_p, x7); // 03 13 23 33 43 53 63 73 // 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 x4 = _mm_unpackhi_epi16(x0, x1); // 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 x5 = _mm_unpackhi_epi16(x2, x3); // 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 x6 = _mm_unpacklo_epi32(x4, x5); mm_storelu(out + 4 * out_p, x6); // 04 14 24 34 44 54 64 74 mm_storehu(out + 5 * out_p, x6); // 05 15 25 35 45 55 65 75 // 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 x7 = _mm_unpackhi_epi32(x4, x5); mm_storelu(out + 6 * out_p, x7); // 06 16 26 36 46 56 66 76 mm_storehu(out + 7 * out_p, x7); // 07 17 27 37 47 57 67 77 } while (++idx8x8 < num_8x8_to_transpose); } void vpx_lpf_vertical_4_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); unsigned char *src[2]; unsigned char *dst[2]; // Transpose 8x16 transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_lpf_horizontal_4_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; dst[1] = s - 4 + pitch * 8; // Transpose back transpose(src, 16, dst, pitch, 2); } void vpx_lpf_vertical_8_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 8]); unsigned char *src[1]; unsigned char *dst[1]; // Transpose 8x8 src[0] = s - 4; dst[0] = t_dst; transpose(src, pitch, dst, 8, 1); // Loop filtering vpx_lpf_horizontal_8_sse2(t_dst + 4 * 8, 8, blimit, limit, thresh); src[0] = t_dst; dst[0] = s - 4; // Transpose back transpose(src, 8, dst, pitch, 1); } void vpx_lpf_vertical_8_dual_sse2(uint8_t *s, int pitch, const uint8_t *blimit0, const uint8_t *limit0, const uint8_t *thresh0, const uint8_t *blimit1, const uint8_t *limit1, const uint8_t *thresh1) { DECLARE_ALIGNED(16, unsigned char, t_dst[16 * 8]); unsigned char *src[2]; unsigned char *dst[2]; // Transpose 8x16 transpose8x16(s - 4, s - 4 + pitch * 8, pitch, t_dst, 16); // Loop filtering vpx_lpf_horizontal_8_dual_sse2(t_dst + 4 * 16, 16, blimit0, limit0, thresh0, blimit1, limit1, thresh1); src[0] = t_dst; src[1] = t_dst + 8; dst[0] = s - 4; dst[1] = s - 4 + pitch * 8; // Transpose back transpose(src, 16, dst, pitch, 2); } void vpx_lpf_vertical_16_sse2(unsigned char *s, int pitch, const unsigned char *blimit, const unsigned char *limit, const unsigned char *thresh) { DECLARE_ALIGNED(8, unsigned char, t_dst[8 * 16]); unsigned char *src[2]; unsigned char *dst[2]; src[0] = s - 8; src[1] = s; dst[0] = t_dst; dst[1] = t_dst + 8 * 8; // Transpose 16x8 transpose(src, pitch, dst, 8, 2); // Loop filtering vpx_lpf_horizontal_16_sse2(t_dst + 8 * 8, 8, blimit, limit, thresh); src[0] = t_dst; src[1] = t_dst + 8 * 8; dst[0] = s - 8; dst[1] = s; // Transpose back transpose(src, 8, dst, pitch, 2); } void vpx_lpf_vertical_16_dual_sse2(unsigned char *s, int pitch, const uint8_t *blimit, const uint8_t *limit, const uint8_t *thresh) { DECLARE_ALIGNED(16, unsigned char, t_dst[256]); // Transpose 16x16 transpose8x16(s - 8, s - 8 + 8 * pitch, pitch, t_dst, 16); transpose8x16(s, s + 8 * pitch, pitch, t_dst + 8 * 16, 16); // Loop filtering vpx_lpf_horizontal_16_dual_sse2(t_dst + 8 * 16, 16, blimit, limit, thresh); // Transpose back transpose8x16(t_dst, t_dst + 8 * 16, 16, s - 8, pitch); transpose8x16(t_dst + 8, t_dst + 8 + 8 * 16, 16, s - 8 + 8 * pitch, pitch); } libvpx-1.8.2/vpx_dsp/x86/mem_sse2.h000066400000000000000000000136071357355204000170340ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_MEM_SSE2_H_ #define VPX_VPX_DSP_X86_MEM_SSE2_H_ #include // SSE2 #include #include "./vpx_config.h" static INLINE void storeu_uint32(void *dst, uint32_t v) { memcpy(dst, &v, sizeof(v)); } static INLINE uint32_t loadu_uint32(const void *src) { uint32_t v; memcpy(&v, src, sizeof(v)); return v; } static INLINE __m128i load_unaligned_u32(const void *a) { uint32_t val; memcpy(&val, a, sizeof(val)); return _mm_cvtsi32_si128(val); } static INLINE void store_unaligned_u32(void *const a, const __m128i v) { const uint32_t val = _mm_cvtsi128_si32(v); memcpy(a, &val, sizeof(val)); } #define mm_storelu(dst, v) memcpy((dst), (const char *)&(v), 8) #define mm_storehu(dst, v) memcpy((dst), (const char *)&(v) + 8, 8) static INLINE __m128i loadh_epi64(const __m128i s, const void *const src) { return _mm_castps_si128( _mm_loadh_pi(_mm_castsi128_ps(s), (const __m64 *)src)); } static INLINE void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride)); d[1] = _mm_cvtsi32_si128(*(const int *)(s + 1 * stride)); d[2] = _mm_cvtsi32_si128(*(const int *)(s + 2 * stride)); d[3] = _mm_cvtsi32_si128(*(const int *)(s + 3 * stride)); } static INLINE void load_8bit_4x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { load_8bit_4x4(s + 0 * stride, stride, &d[0]); load_8bit_4x4(s + 4 * stride, stride, &d[4]); } static INLINE void load_8bit_8x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_loadl_epi64((const __m128i *)(s + 0 * stride)); d[1] = _mm_loadl_epi64((const __m128i *)(s + 1 * stride)); d[2] = _mm_loadl_epi64((const __m128i *)(s + 2 * stride)); d[3] = _mm_loadl_epi64((const __m128i *)(s + 3 * stride)); } static INLINE void load_8bit_8x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { load_8bit_8x4(s + 0 * stride, stride, &d[0]); load_8bit_8x4(s + 4 * stride, stride, &d[4]); } static INLINE void load_8bit_16x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_load_si128((const __m128i *)(s + 0 * stride)); d[1] = _mm_load_si128((const __m128i *)(s + 1 * stride)); d[2] = _mm_load_si128((const __m128i *)(s + 2 * stride)); d[3] = _mm_load_si128((const __m128i *)(s + 3 * stride)); d[4] = _mm_load_si128((const __m128i *)(s + 4 * stride)); d[5] = _mm_load_si128((const __m128i *)(s + 5 * stride)); d[6] = _mm_load_si128((const __m128i *)(s + 6 * stride)); d[7] = _mm_load_si128((const __m128i *)(s + 7 * stride)); } static INLINE void loadu_8bit_16x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { d[0] = _mm_loadu_si128((const __m128i *)(s + 0 * stride)); d[1] = _mm_loadu_si128((const __m128i *)(s + 1 * stride)); d[2] = _mm_loadu_si128((const __m128i *)(s + 2 * stride)); d[3] = _mm_loadu_si128((const __m128i *)(s + 3 * stride)); } static INLINE void loadu_8bit_16x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) { loadu_8bit_16x4(s + 0 * stride, stride, &d[0]); loadu_8bit_16x4(s + 4 * stride, stride, &d[4]); } static INLINE void _mm_storeh_epi64(__m128i *const d, const __m128i s) { _mm_storeh_pi((__m64 *)d, _mm_castsi128_ps(s)); } static INLINE void store_8bit_4x4(const __m128i *const s, uint8_t *const d, const ptrdiff_t stride) { *(int *)(d + 0 * stride) = _mm_cvtsi128_si32(s[0]); *(int *)(d + 1 * stride) = _mm_cvtsi128_si32(s[1]); *(int *)(d + 2 * stride) = _mm_cvtsi128_si32(s[2]); *(int *)(d + 3 * stride) = _mm_cvtsi128_si32(s[3]); } static INLINE void store_8bit_4x4_sse2(const __m128i s, uint8_t *const d, const ptrdiff_t stride) { __m128i ss[4]; ss[0] = s; ss[1] = _mm_srli_si128(s, 4); ss[2] = _mm_srli_si128(s, 8); ss[3] = _mm_srli_si128(s, 12); store_8bit_4x4(ss, d, stride); } static INLINE void store_8bit_8x4_from_16x2(const __m128i *const s, uint8_t *const d, const ptrdiff_t stride) { _mm_storel_epi64((__m128i *)(d + 0 * stride), s[0]); _mm_storeh_epi64((__m128i *)(d + 1 * stride), s[0]); _mm_storel_epi64((__m128i *)(d + 2 * stride), s[1]); _mm_storeh_epi64((__m128i *)(d + 3 * stride), s[1]); } static INLINE void store_8bit_8x8(const __m128i *const s, uint8_t *const d, const ptrdiff_t stride) { _mm_storel_epi64((__m128i *)(d + 0 * stride), s[0]); _mm_storel_epi64((__m128i *)(d + 1 * stride), s[1]); _mm_storel_epi64((__m128i *)(d + 2 * stride), s[2]); _mm_storel_epi64((__m128i *)(d + 3 * stride), s[3]); _mm_storel_epi64((__m128i *)(d + 4 * stride), s[4]); _mm_storel_epi64((__m128i *)(d + 5 * stride), s[5]); _mm_storel_epi64((__m128i *)(d + 6 * stride), s[6]); _mm_storel_epi64((__m128i *)(d + 7 * stride), s[7]); } static INLINE void storeu_8bit_16x4(const __m128i *const s, uint8_t *const d, const ptrdiff_t stride) { _mm_storeu_si128((__m128i *)(d + 0 * stride), s[0]); _mm_storeu_si128((__m128i *)(d + 1 * stride), s[1]); _mm_storeu_si128((__m128i *)(d + 2 * stride), s[2]); _mm_storeu_si128((__m128i *)(d + 3 * stride), s[3]); } #endif // VPX_VPX_DSP_X86_MEM_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/post_proc_sse2.c000066400000000000000000000112371357355204000202560ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/mem_sse2.h" extern const int16_t vpx_rv[]; void vpx_mbpost_proc_down_sse2(unsigned char *dst, int pitch, int rows, int cols, int flimit) { int col; const __m128i zero = _mm_setzero_si128(); const __m128i f = _mm_set1_epi32(flimit); DECLARE_ALIGNED(16, int16_t, above_context[8 * 8]); // 8 columns are processed at a time. // If rows is less than 8 the bottom border extension fails. assert(cols % 8 == 0); assert(rows >= 8); for (col = 0; col < cols; col += 8) { int row, i; __m128i s = _mm_loadl_epi64((__m128i *)dst); __m128i sum, sumsq_0, sumsq_1; __m128i tmp_0, tmp_1; __m128i below_context; s = _mm_unpacklo_epi8(s, zero); for (i = 0; i < 8; ++i) { _mm_store_si128((__m128i *)above_context + i, s); } // sum *= 9 sum = _mm_slli_epi16(s, 3); sum = _mm_add_epi16(s, sum); // sum^2 * 9 == (sum * 9) * sum tmp_0 = _mm_mullo_epi16(sum, s); tmp_1 = _mm_mulhi_epi16(sum, s); sumsq_0 = _mm_unpacklo_epi16(tmp_0, tmp_1); sumsq_1 = _mm_unpackhi_epi16(tmp_0, tmp_1); // Prime sum/sumsq for (i = 1; i <= 6; ++i) { __m128i a = _mm_loadl_epi64((__m128i *)(dst + i * pitch)); a = _mm_unpacklo_epi8(a, zero); sum = _mm_add_epi16(sum, a); a = _mm_mullo_epi16(a, a); sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(a, zero)); sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(a, zero)); } for (row = 0; row < rows + 8; row++) { const __m128i above = _mm_load_si128((__m128i *)above_context + (row & 7)); __m128i this_row = _mm_loadl_epi64((__m128i *)(dst + row * pitch)); __m128i above_sq, below_sq; __m128i mask_0, mask_1; __m128i multmp_0, multmp_1; __m128i rv; __m128i out; this_row = _mm_unpacklo_epi8(this_row, zero); if (row + 7 < rows) { // Instead of copying the end context we just stop loading when we get // to the last one. below_context = _mm_loadl_epi64((__m128i *)(dst + (row + 7) * pitch)); below_context = _mm_unpacklo_epi8(below_context, zero); } sum = _mm_sub_epi16(sum, above); sum = _mm_add_epi16(sum, below_context); // context^2 fits in 16 bits. Don't need to mulhi and combine. Just zero // extend. Unfortunately we can't do below_sq - above_sq in 16 bits // because x86 does not have unpack with sign extension. above_sq = _mm_mullo_epi16(above, above); sumsq_0 = _mm_sub_epi32(sumsq_0, _mm_unpacklo_epi16(above_sq, zero)); sumsq_1 = _mm_sub_epi32(sumsq_1, _mm_unpackhi_epi16(above_sq, zero)); below_sq = _mm_mullo_epi16(below_context, below_context); sumsq_0 = _mm_add_epi32(sumsq_0, _mm_unpacklo_epi16(below_sq, zero)); sumsq_1 = _mm_add_epi32(sumsq_1, _mm_unpackhi_epi16(below_sq, zero)); // sumsq * 16 - sumsq == sumsq * 15 mask_0 = _mm_slli_epi32(sumsq_0, 4); mask_0 = _mm_sub_epi32(mask_0, sumsq_0); mask_1 = _mm_slli_epi32(sumsq_1, 4); mask_1 = _mm_sub_epi32(mask_1, sumsq_1); multmp_0 = _mm_mullo_epi16(sum, sum); multmp_1 = _mm_mulhi_epi16(sum, sum); mask_0 = _mm_sub_epi32(mask_0, _mm_unpacklo_epi16(multmp_0, multmp_1)); mask_1 = _mm_sub_epi32(mask_1, _mm_unpackhi_epi16(multmp_0, multmp_1)); // mask - f gives a negative value when mask < f mask_0 = _mm_sub_epi32(mask_0, f); mask_1 = _mm_sub_epi32(mask_1, f); // Shift the sign bit down to create a mask mask_0 = _mm_srai_epi32(mask_0, 31); mask_1 = _mm_srai_epi32(mask_1, 31); mask_0 = _mm_packs_epi32(mask_0, mask_1); rv = _mm_loadu_si128((__m128i const *)(vpx_rv + (row & 127))); mask_1 = _mm_add_epi16(rv, sum); mask_1 = _mm_add_epi16(mask_1, this_row); mask_1 = _mm_srai_epi16(mask_1, 4); mask_1 = _mm_and_si128(mask_0, mask_1); mask_0 = _mm_andnot_si128(mask_0, this_row); out = _mm_or_si128(mask_1, mask_0); _mm_storel_epi64((__m128i *)(dst + row * pitch), _mm_packus_epi16(out, zero)); _mm_store_si128((__m128i *)above_context + ((row + 8) & 7), this_row); } dst += 8; } } libvpx-1.8.2/vpx_dsp/x86/quantize_avx.c000066400000000000000000000237541357355204000200370ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #if defined(_MSC_VER) #include #endif #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_dsp/x86/quantize_sse2.h" #include "vpx_dsp/x86/quantize_ssse3.h" void vpx_quantize_b_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m256i big_zero = _mm256_setzero_si256(); int index; __m128i zbin, round, quant, dequant, shift; __m128i coeff0, coeff1; __m128i qcoeff0, qcoeff1; __m128i cmp_mask0, cmp_mask1; __m128i all_zero; __m128i eob = zero, eob0; (void)scan; (void)skip_block; assert(!skip_block); *eob_ptr = 0; load_b_values(zbin_ptr, &zbin, round_ptr, &round, quant_ptr, &quant, dequant_ptr, &dequant, quant_shift_ptr, &shift); // Do DC and first 15 AC. coeff0 = load_tran_low(coeff_ptr); coeff1 = load_tran_low(coeff_ptr + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_test_all_zeros(all_zero, all_zero)) { _mm256_store_si256((__m256i *)(qcoeff_ptr), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr), big_zero); #if CONFIG_VP9_HIGHBITDEPTH _mm256_store_si256((__m256i *)(qcoeff_ptr + 8), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + 8), big_zero); #endif // CONFIG_VP9_HIGHBITDEPTH if (n_coeffs == 16) return; round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); dequant = _mm_unpackhi_epi64(dequant, dequant); } else { calculate_qcoeff(&qcoeff0, round, quant, shift); round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); // Reinsert signs qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); // Mask out zbin threshold coeffs qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. for (index = 16; index < n_coeffs; index += 16) { coeff0 = load_tran_low(coeff_ptr + index); coeff1 = load_tran_low(coeff_ptr + index + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_test_all_zeros(all_zero, all_zero)) { _mm256_store_si256((__m256i *)(qcoeff_ptr + index), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + index), big_zero); #if CONFIG_VP9_HIGHBITDEPTH _mm256_store_si256((__m256i *)(qcoeff_ptr + index + 8), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + index + 8), big_zero); #endif // CONFIG_VP9_HIGHBITDEPTH continue; } calculate_qcoeff(&qcoeff0, round, quant, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, zero); eob = _mm_max_epi16(eob, eob0); } *eob_ptr = accumulate_eob(eob); } void vpx_quantize_b_32x32_avx(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m128i one = _mm_set1_epi16(1); const __m256i big_zero = _mm256_setzero_si256(); int index; __m128i zbin, round, quant, dequant, shift; __m128i coeff0, coeff1; __m128i qcoeff0, qcoeff1; __m128i cmp_mask0, cmp_mask1; __m128i all_zero; __m128i eob = zero, eob0; (void)scan; (void)n_coeffs; (void)skip_block; assert(!skip_block); // Setup global values. // The 32x32 halves zbin and round. zbin = _mm_load_si128((const __m128i *)zbin_ptr); // Shift with rounding. zbin = _mm_add_epi16(zbin, one); zbin = _mm_srli_epi16(zbin, 1); // x86 has no "greater *or equal*" comparison. Subtract 1 from zbin so // it is a strict "greater" comparison. zbin = _mm_sub_epi16(zbin, one); round = _mm_load_si128((const __m128i *)round_ptr); round = _mm_add_epi16(round, one); round = _mm_srli_epi16(round, 1); quant = _mm_load_si128((const __m128i *)quant_ptr); dequant = _mm_load_si128((const __m128i *)dequant_ptr); shift = _mm_load_si128((const __m128i *)quant_shift_ptr); shift = _mm_slli_epi16(shift, 1); // Do DC and first 15 AC. coeff0 = load_tran_low(coeff_ptr); coeff1 = load_tran_low(coeff_ptr + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC. cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_test_all_zeros(all_zero, all_zero)) { _mm256_store_si256((__m256i *)(qcoeff_ptr), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr), big_zero); #if CONFIG_VP9_HIGHBITDEPTH _mm256_store_si256((__m256i *)(qcoeff_ptr + 8), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + 8), big_zero); #endif // CONFIG_VP9_HIGHBITDEPTH round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); dequant = _mm_unpackhi_epi64(dequant, dequant); } else { calculate_qcoeff(&qcoeff0, round, quant, shift); round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); // Reinsert signs. qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); // Mask out zbin threshold coeffs. qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + 8); eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. for (index = 16; index < 32 * 32; index += 16) { coeff0 = load_tran_low(coeff_ptr + index); coeff1 = load_tran_low(coeff_ptr + index + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_test_all_zeros(all_zero, all_zero)) { _mm256_store_si256((__m256i *)(qcoeff_ptr + index), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + index), big_zero); #if CONFIG_VP9_HIGHBITDEPTH _mm256_store_si256((__m256i *)(qcoeff_ptr + index + 8), big_zero); _mm256_store_si256((__m256i *)(dqcoeff_ptr + index + 8), big_zero); #endif // CONFIG_VP9_HIGHBITDEPTH continue; } calculate_qcoeff(&qcoeff0, round, quant, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr + index); calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + index + 8); eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, zero); eob = _mm_max_epi16(eob, eob0); } *eob_ptr = accumulate_eob(eob); } libvpx-1.8.2/vpx_dsp/x86/quantize_sse2.c000066400000000000000000000100121357355204000200740ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_dsp/x86/quantize_sse2.h" void vpx_quantize_b_sse2(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); int index = 16; __m128i zbin, round, quant, dequant, shift; __m128i coeff0, coeff1, coeff0_sign, coeff1_sign; __m128i qcoeff0, qcoeff1; __m128i cmp_mask0, cmp_mask1; __m128i eob, eob0; (void)scan; (void)skip_block; assert(!skip_block); // Setup global values. load_b_values(zbin_ptr, &zbin, round_ptr, &round, quant_ptr, &quant, dequant_ptr, &dequant, quant_shift_ptr, &shift); // Do DC and first 15 AC. coeff0 = load_tran_low(coeff_ptr); coeff1 = load_tran_low(coeff_ptr + 8); // Poor man's abs(). coeff0_sign = _mm_srai_epi16(coeff0, 15); coeff1_sign = _mm_srai_epi16(coeff1, 15); qcoeff0 = invert_sign_sse2(coeff0, coeff0_sign); qcoeff1 = invert_sign_sse2(coeff1, coeff1_sign); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); calculate_qcoeff(&qcoeff0, round, quant, shift); round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); // Reinsert signs qcoeff0 = invert_sign_sse2(qcoeff0, coeff0_sign); qcoeff1 = invert_sign_sse2(qcoeff1, coeff1_sign); // Mask out zbin threshold coeffs qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); // AC only loop. while (index < n_coeffs) { coeff0 = load_tran_low(coeff_ptr + index); coeff1 = load_tran_low(coeff_ptr + index + 8); coeff0_sign = _mm_srai_epi16(coeff0, 15); coeff1_sign = _mm_srai_epi16(coeff1, 15); qcoeff0 = invert_sign_sse2(coeff0, coeff0_sign); qcoeff1 = invert_sign_sse2(coeff1, coeff1_sign); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); calculate_qcoeff(&qcoeff0, round, quant, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); qcoeff0 = invert_sign_sse2(qcoeff0, coeff0_sign); qcoeff1 = invert_sign_sse2(qcoeff1, coeff1_sign); qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, zero); eob = _mm_max_epi16(eob, eob0); index += 16; } *eob_ptr = accumulate_eob(eob); } libvpx-1.8.2/vpx_dsp/x86/quantize_sse2.h000066400000000000000000000075211357355204000201140ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ #define VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" static INLINE void load_b_values(const int16_t *zbin_ptr, __m128i *zbin, const int16_t *round_ptr, __m128i *round, const int16_t *quant_ptr, __m128i *quant, const int16_t *dequant_ptr, __m128i *dequant, const int16_t *shift_ptr, __m128i *shift) { *zbin = _mm_load_si128((const __m128i *)zbin_ptr); *round = _mm_load_si128((const __m128i *)round_ptr); *quant = _mm_load_si128((const __m128i *)quant_ptr); *zbin = _mm_sub_epi16(*zbin, _mm_set1_epi16(1)); *dequant = _mm_load_si128((const __m128i *)dequant_ptr); *shift = _mm_load_si128((const __m128i *)shift_ptr); } // With ssse3 and later abs() and sign() are preferred. static INLINE __m128i invert_sign_sse2(__m128i a, __m128i sign) { a = _mm_xor_si128(a, sign); return _mm_sub_epi16(a, sign); } static INLINE void calculate_qcoeff(__m128i *coeff, const __m128i round, const __m128i quant, const __m128i shift) { __m128i tmp, qcoeff; qcoeff = _mm_adds_epi16(*coeff, round); tmp = _mm_mulhi_epi16(qcoeff, quant); qcoeff = _mm_add_epi16(tmp, qcoeff); *coeff = _mm_mulhi_epi16(qcoeff, shift); } static INLINE void calculate_dqcoeff_and_store(__m128i qcoeff, __m128i dequant, tran_low_t *dqcoeff) { #if CONFIG_VP9_HIGHBITDEPTH const __m128i low = _mm_mullo_epi16(qcoeff, dequant); const __m128i high = _mm_mulhi_epi16(qcoeff, dequant); const __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high); const __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high); _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); #else const __m128i dqcoeff16 = _mm_mullo_epi16(qcoeff, dequant); _mm_store_si128((__m128i *)(dqcoeff), dqcoeff16); #endif // CONFIG_VP9_HIGHBITDEPTH } // Scan 16 values for eob reference in scan. Use masks (-1) from comparing to // zbin to add 1 to the index in 'scan'. static INLINE __m128i scan_for_eob(__m128i *coeff0, __m128i *coeff1, const __m128i zbin_mask0, const __m128i zbin_mask1, const int16_t *scan, const int index, const __m128i zero) { const __m128i zero_coeff0 = _mm_cmpeq_epi16(*coeff0, zero); const __m128i zero_coeff1 = _mm_cmpeq_epi16(*coeff1, zero); __m128i scan0 = _mm_load_si128((const __m128i *)(scan + index)); __m128i scan1 = _mm_load_si128((const __m128i *)(scan + index + 8)); __m128i eob0, eob1; // Add one to convert from indices to counts scan0 = _mm_sub_epi16(scan0, zbin_mask0); scan1 = _mm_sub_epi16(scan1, zbin_mask1); eob0 = _mm_andnot_si128(zero_coeff0, scan0); eob1 = _mm_andnot_si128(zero_coeff1, scan1); return _mm_max_epi16(eob0, eob1); } static INLINE int16_t accumulate_eob(__m128i eob) { __m128i eob_shuffled; eob_shuffled = _mm_shuffle_epi32(eob, 0xe); eob = _mm_max_epi16(eob, eob_shuffled); eob_shuffled = _mm_shufflelo_epi16(eob, 0xe); eob = _mm_max_epi16(eob, eob_shuffled); eob_shuffled = _mm_shufflelo_epi16(eob, 0x1); eob = _mm_max_epi16(eob, eob_shuffled); return _mm_extract_epi16(eob, 1); } #endif // VPX_VPX_DSP_X86_QUANTIZE_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/quantize_ssse3.c000066400000000000000000000225061357355204000202730ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/bitdepth_conversion_sse2.h" #include "vpx_dsp/x86/quantize_sse2.h" #include "vpx_dsp/x86/quantize_ssse3.h" void vpx_quantize_b_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); int index = 16; __m128i zbin, round, quant, dequant, shift; __m128i coeff0, coeff1; __m128i qcoeff0, qcoeff1; __m128i cmp_mask0, cmp_mask1; __m128i eob, eob0; (void)scan; (void)skip_block; assert(!skip_block); load_b_values(zbin_ptr, &zbin, round_ptr, &round, quant_ptr, &quant, dequant_ptr, &dequant, quant_shift_ptr, &shift); // Do DC and first 15 AC. coeff0 = load_tran_low(coeff_ptr); coeff1 = load_tran_low(coeff_ptr + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); calculate_qcoeff(&qcoeff0, round, quant, shift); round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); // Reinsert signs qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); // Mask out zbin threshold coeffs qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + 8); eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); // AC only loop. while (index < n_coeffs) { coeff0 = load_tran_low(coeff_ptr + index); coeff1 = load_tran_low(coeff_ptr + index + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); calculate_qcoeff(&qcoeff0, round, quant, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); calculate_dqcoeff_and_store(qcoeff0, dequant, dqcoeff_ptr + index); calculate_dqcoeff_and_store(qcoeff1, dequant, dqcoeff_ptr + index + 8); eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, zero); eob = _mm_max_epi16(eob, eob0); index += 16; } *eob_ptr = accumulate_eob(eob); } void vpx_quantize_b_32x32_ssse3(const tran_low_t *coeff_ptr, intptr_t n_coeffs, int skip_block, const int16_t *zbin_ptr, const int16_t *round_ptr, const int16_t *quant_ptr, const int16_t *quant_shift_ptr, tran_low_t *qcoeff_ptr, tran_low_t *dqcoeff_ptr, const int16_t *dequant_ptr, uint16_t *eob_ptr, const int16_t *scan, const int16_t *iscan) { const __m128i zero = _mm_setzero_si128(); const __m128i one = _mm_set1_epi16(1); int index; __m128i zbin, round, quant, dequant, shift; __m128i coeff0, coeff1; __m128i qcoeff0, qcoeff1; __m128i cmp_mask0, cmp_mask1; __m128i all_zero; __m128i eob = zero, eob0; (void)scan; (void)n_coeffs; (void)skip_block; assert(!skip_block); // Setup global values. // The 32x32 halves zbin and round. zbin = _mm_load_si128((const __m128i *)zbin_ptr); // Shift with rounding. zbin = _mm_add_epi16(zbin, one); zbin = _mm_srli_epi16(zbin, 1); // x86 has no "greater *or equal*" comparison. Subtract 1 from zbin so // it is a strict "greater" comparison. zbin = _mm_sub_epi16(zbin, one); round = _mm_load_si128((const __m128i *)round_ptr); round = _mm_add_epi16(round, one); round = _mm_srli_epi16(round, 1); quant = _mm_load_si128((const __m128i *)quant_ptr); dequant = _mm_load_si128((const __m128i *)dequant_ptr); shift = _mm_load_si128((const __m128i *)quant_shift_ptr); // I suspect this is not technically OK because quant_shift can be up // to 1 << 16 and shifting up again will outrange that, but the test is not // comprehensive enough to catch that and "it's been that way forever" shift = _mm_slli_epi16(shift, 1); // Do DC and first 15 AC. coeff0 = load_tran_low(coeff_ptr); coeff1 = load_tran_low(coeff_ptr + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); zbin = _mm_unpackhi_epi64(zbin, zbin); // Switch DC to AC. cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_movemask_epi8(all_zero) == 0) { _mm_store_si128((__m128i *)(qcoeff_ptr), zero); _mm_store_si128((__m128i *)(qcoeff_ptr + 8), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + 8), zero); #if CONFIG_VP9_HIGHBITDEPTH _mm_store_si128((__m128i *)(qcoeff_ptr + 4), zero); _mm_store_si128((__m128i *)(qcoeff_ptr + 12), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + 4), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + 12), zero); #endif // CONFIG_HIGHBITDEPTH round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); dequant = _mm_unpackhi_epi64(dequant, dequant); } else { calculate_qcoeff(&qcoeff0, round, quant, shift); round = _mm_unpackhi_epi64(round, round); quant = _mm_unpackhi_epi64(quant, quant); shift = _mm_unpackhi_epi64(shift, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); // Reinsert signs. qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); // Mask out zbin threshold coeffs. qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr); store_tran_low(qcoeff1, qcoeff_ptr + 8); calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr); dequant = _mm_unpackhi_epi64(dequant, dequant); calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + 8); eob = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, 0, zero); } // AC only loop. for (index = 16; index < 32 * 32; index += 16) { coeff0 = load_tran_low(coeff_ptr + index); coeff1 = load_tran_low(coeff_ptr + index + 8); qcoeff0 = _mm_abs_epi16(coeff0); qcoeff1 = _mm_abs_epi16(coeff1); cmp_mask0 = _mm_cmpgt_epi16(qcoeff0, zbin); cmp_mask1 = _mm_cmpgt_epi16(qcoeff1, zbin); all_zero = _mm_or_si128(cmp_mask0, cmp_mask1); if (_mm_movemask_epi8(all_zero) == 0) { _mm_store_si128((__m128i *)(qcoeff_ptr + index), zero); _mm_store_si128((__m128i *)(qcoeff_ptr + index + 8), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + index), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + index + 8), zero); #if CONFIG_VP9_HIGHBITDEPTH _mm_store_si128((__m128i *)(qcoeff_ptr + index + 4), zero); _mm_store_si128((__m128i *)(qcoeff_ptr + index + 12), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + index + 4), zero); _mm_store_si128((__m128i *)(dqcoeff_ptr + index + 12), zero); #endif // CONFIG_VP9_HIGHBITDEPTH continue; } calculate_qcoeff(&qcoeff0, round, quant, shift); calculate_qcoeff(&qcoeff1, round, quant, shift); qcoeff0 = _mm_sign_epi16(qcoeff0, coeff0); qcoeff1 = _mm_sign_epi16(qcoeff1, coeff1); qcoeff0 = _mm_and_si128(qcoeff0, cmp_mask0); qcoeff1 = _mm_and_si128(qcoeff1, cmp_mask1); store_tran_low(qcoeff0, qcoeff_ptr + index); store_tran_low(qcoeff1, qcoeff_ptr + index + 8); calculate_dqcoeff_and_store_32x32(qcoeff0, dequant, zero, dqcoeff_ptr + index); calculate_dqcoeff_and_store_32x32(qcoeff1, dequant, zero, dqcoeff_ptr + 8 + index); eob0 = scan_for_eob(&qcoeff0, &qcoeff1, cmp_mask0, cmp_mask1, iscan, index, zero); eob = _mm_max_epi16(eob, eob0); } *eob_ptr = accumulate_eob(eob); } libvpx-1.8.2/vpx_dsp/x86/quantize_ssse3.h000066400000000000000000000035521357355204000203000ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ #define VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ #include #include "./vpx_config.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/quantize_sse2.h" static INLINE void calculate_dqcoeff_and_store_32x32(const __m128i qcoeff, const __m128i dequant, const __m128i zero, tran_low_t *dqcoeff) { // Un-sign to bias rounding like C. const __m128i coeff = _mm_abs_epi16(qcoeff); const __m128i sign_0 = _mm_unpacklo_epi16(zero, qcoeff); const __m128i sign_1 = _mm_unpackhi_epi16(zero, qcoeff); const __m128i low = _mm_mullo_epi16(coeff, dequant); const __m128i high = _mm_mulhi_epi16(coeff, dequant); __m128i dqcoeff32_0 = _mm_unpacklo_epi16(low, high); __m128i dqcoeff32_1 = _mm_unpackhi_epi16(low, high); // "Divide" by 2. dqcoeff32_0 = _mm_srli_epi32(dqcoeff32_0, 1); dqcoeff32_1 = _mm_srli_epi32(dqcoeff32_1, 1); dqcoeff32_0 = _mm_sign_epi32(dqcoeff32_0, sign_0); dqcoeff32_1 = _mm_sign_epi32(dqcoeff32_1, sign_1); #if CONFIG_VP9_HIGHBITDEPTH _mm_store_si128((__m128i *)(dqcoeff), dqcoeff32_0); _mm_store_si128((__m128i *)(dqcoeff + 4), dqcoeff32_1); #else _mm_store_si128((__m128i *)(dqcoeff), _mm_packs_epi32(dqcoeff32_0, dqcoeff32_1)); #endif // CONFIG_VP9_HIGHBITDEPTH } #endif // VPX_VPX_DSP_X86_QUANTIZE_SSSE3_H_ libvpx-1.8.2/vpx_dsp/x86/sad4d_avx2.c000066400000000000000000000151561357355204000172550ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // AVX2 #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" static INLINE void calc_final_4(const __m256i *const sums /*[4]*/, uint32_t *sad_array) { const __m256i t0 = _mm256_hadd_epi32(sums[0], sums[1]); const __m256i t1 = _mm256_hadd_epi32(sums[2], sums[3]); const __m256i t2 = _mm256_hadd_epi32(t0, t1); const __m128i sum = _mm_add_epi32(_mm256_castsi256_si128(t2), _mm256_extractf128_si256(t2, 1)); _mm_storeu_si128((__m128i *)sad_array, sum); } void vpx_sad32x32x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]) { int i; const uint8_t *refs[4]; __m256i sums[4]; refs[0] = ref_array[0]; refs[1] = ref_array[1]; refs[2] = ref_array[2]; refs[3] = ref_array[3]; sums[0] = _mm256_setzero_si256(); sums[1] = _mm256_setzero_si256(); sums[2] = _mm256_setzero_si256(); sums[3] = _mm256_setzero_si256(); for (i = 0; i < 32; i++) { __m256i r[4]; // load src and all ref[] const __m256i s = _mm256_load_si256((const __m256i *)src_ptr); r[0] = _mm256_loadu_si256((const __m256i *)refs[0]); r[1] = _mm256_loadu_si256((const __m256i *)refs[1]); r[2] = _mm256_loadu_si256((const __m256i *)refs[2]); r[3] = _mm256_loadu_si256((const __m256i *)refs[3]); // sum of the absolute differences between every ref[] to src r[0] = _mm256_sad_epu8(r[0], s); r[1] = _mm256_sad_epu8(r[1], s); r[2] = _mm256_sad_epu8(r[2], s); r[3] = _mm256_sad_epu8(r[3], s); // sum every ref[] sums[0] = _mm256_add_epi32(sums[0], r[0]); sums[1] = _mm256_add_epi32(sums[1], r[1]); sums[2] = _mm256_add_epi32(sums[2], r[2]); sums[3] = _mm256_add_epi32(sums[3], r[3]); src_ptr += src_stride; refs[0] += ref_stride; refs[1] += ref_stride; refs[2] += ref_stride; refs[3] += ref_stride; } calc_final_4(sums, sad_array); } void vpx_sad32x32x8_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, uint32_t *sad_array) { int i; __m256i sums[8]; sums[0] = _mm256_setzero_si256(); sums[1] = _mm256_setzero_si256(); sums[2] = _mm256_setzero_si256(); sums[3] = _mm256_setzero_si256(); sums[4] = _mm256_setzero_si256(); sums[5] = _mm256_setzero_si256(); sums[6] = _mm256_setzero_si256(); sums[7] = _mm256_setzero_si256(); for (i = 0; i < 32; i++) { __m256i r[8]; // load src and all ref[] const __m256i s = _mm256_load_si256((const __m256i *)src_ptr); r[0] = _mm256_loadu_si256((const __m256i *)&ref_ptr[0]); r[1] = _mm256_loadu_si256((const __m256i *)&ref_ptr[1]); r[2] = _mm256_loadu_si256((const __m256i *)&ref_ptr[2]); r[3] = _mm256_loadu_si256((const __m256i *)&ref_ptr[3]); r[4] = _mm256_loadu_si256((const __m256i *)&ref_ptr[4]); r[5] = _mm256_loadu_si256((const __m256i *)&ref_ptr[5]); r[6] = _mm256_loadu_si256((const __m256i *)&ref_ptr[6]); r[7] = _mm256_loadu_si256((const __m256i *)&ref_ptr[7]); // sum of the absolute differences between every ref[] to src r[0] = _mm256_sad_epu8(r[0], s); r[1] = _mm256_sad_epu8(r[1], s); r[2] = _mm256_sad_epu8(r[2], s); r[3] = _mm256_sad_epu8(r[3], s); r[4] = _mm256_sad_epu8(r[4], s); r[5] = _mm256_sad_epu8(r[5], s); r[6] = _mm256_sad_epu8(r[6], s); r[7] = _mm256_sad_epu8(r[7], s); // sum every ref[] sums[0] = _mm256_add_epi32(sums[0], r[0]); sums[1] = _mm256_add_epi32(sums[1], r[1]); sums[2] = _mm256_add_epi32(sums[2], r[2]); sums[3] = _mm256_add_epi32(sums[3], r[3]); sums[4] = _mm256_add_epi32(sums[4], r[4]); sums[5] = _mm256_add_epi32(sums[5], r[5]); sums[6] = _mm256_add_epi32(sums[6], r[6]); sums[7] = _mm256_add_epi32(sums[7], r[7]); src_ptr += src_stride; ref_ptr += ref_stride; } calc_final_4(sums, sad_array); calc_final_4(sums + 4, sad_array + 4); } void vpx_sad64x64x4d_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t sad_array[4]) { __m256i sums[4]; int i; const uint8_t *refs[4]; refs[0] = ref_array[0]; refs[1] = ref_array[1]; refs[2] = ref_array[2]; refs[3] = ref_array[3]; sums[0] = _mm256_setzero_si256(); sums[1] = _mm256_setzero_si256(); sums[2] = _mm256_setzero_si256(); sums[3] = _mm256_setzero_si256(); for (i = 0; i < 64; i++) { __m256i r_lo[4], r_hi[4]; // load 64 bytes from src and all ref[] const __m256i s_lo = _mm256_load_si256((const __m256i *)src_ptr); const __m256i s_hi = _mm256_load_si256((const __m256i *)(src_ptr + 32)); r_lo[0] = _mm256_loadu_si256((const __m256i *)refs[0]); r_hi[0] = _mm256_loadu_si256((const __m256i *)(refs[0] + 32)); r_lo[1] = _mm256_loadu_si256((const __m256i *)refs[1]); r_hi[1] = _mm256_loadu_si256((const __m256i *)(refs[1] + 32)); r_lo[2] = _mm256_loadu_si256((const __m256i *)refs[2]); r_hi[2] = _mm256_loadu_si256((const __m256i *)(refs[2] + 32)); r_lo[3] = _mm256_loadu_si256((const __m256i *)refs[3]); r_hi[3] = _mm256_loadu_si256((const __m256i *)(refs[3] + 32)); // sum of the absolute differences between every ref[] to src r_lo[0] = _mm256_sad_epu8(r_lo[0], s_lo); r_lo[1] = _mm256_sad_epu8(r_lo[1], s_lo); r_lo[2] = _mm256_sad_epu8(r_lo[2], s_lo); r_lo[3] = _mm256_sad_epu8(r_lo[3], s_lo); r_hi[0] = _mm256_sad_epu8(r_hi[0], s_hi); r_hi[1] = _mm256_sad_epu8(r_hi[1], s_hi); r_hi[2] = _mm256_sad_epu8(r_hi[2], s_hi); r_hi[3] = _mm256_sad_epu8(r_hi[3], s_hi); // sum every ref[] sums[0] = _mm256_add_epi32(sums[0], r_lo[0]); sums[1] = _mm256_add_epi32(sums[1], r_lo[1]); sums[2] = _mm256_add_epi32(sums[2], r_lo[2]); sums[3] = _mm256_add_epi32(sums[3], r_lo[3]); sums[0] = _mm256_add_epi32(sums[0], r_hi[0]); sums[1] = _mm256_add_epi32(sums[1], r_hi[1]); sums[2] = _mm256_add_epi32(sums[2], r_hi[2]); sums[3] = _mm256_add_epi32(sums[3], r_hi[3]); src_ptr += src_stride; refs[0] += ref_stride; refs[1] += ref_stride; refs[2] += ref_stride; refs[3] += ref_stride; } calc_final_4(sums, sad_array); } libvpx-1.8.2/vpx_dsp/x86/sad4d_avx512.c000066400000000000000000000061751357355204000174240ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // AVX512 #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" void vpx_sad64x64x4d_avx512(const uint8_t *src_ptr, int src_stride, const uint8_t *const ref_array[4], int ref_stride, uint32_t res[4]) { __m512i src_reg, ref0_reg, ref1_reg, ref2_reg, ref3_reg; __m512i sum_ref0, sum_ref1, sum_ref2, sum_ref3; __m512i sum_mlow, sum_mhigh; int i; const uint8_t *ref0, *ref1, *ref2, *ref3; ref0 = ref_array[0]; ref1 = ref_array[1]; ref2 = ref_array[2]; ref3 = ref_array[3]; sum_ref0 = _mm512_set1_epi16(0); sum_ref1 = _mm512_set1_epi16(0); sum_ref2 = _mm512_set1_epi16(0); sum_ref3 = _mm512_set1_epi16(0); for (i = 0; i < 64; i++) { // load src and all ref[] src_reg = _mm512_loadu_si512((const __m512i *)src_ptr); ref0_reg = _mm512_loadu_si512((const __m512i *)ref0); ref1_reg = _mm512_loadu_si512((const __m512i *)ref1); ref2_reg = _mm512_loadu_si512((const __m512i *)ref2); ref3_reg = _mm512_loadu_si512((const __m512i *)ref3); // sum of the absolute differences between every ref[] to src ref0_reg = _mm512_sad_epu8(ref0_reg, src_reg); ref1_reg = _mm512_sad_epu8(ref1_reg, src_reg); ref2_reg = _mm512_sad_epu8(ref2_reg, src_reg); ref3_reg = _mm512_sad_epu8(ref3_reg, src_reg); // sum every ref[] sum_ref0 = _mm512_add_epi32(sum_ref0, ref0_reg); sum_ref1 = _mm512_add_epi32(sum_ref1, ref1_reg); sum_ref2 = _mm512_add_epi32(sum_ref2, ref2_reg); sum_ref3 = _mm512_add_epi32(sum_ref3, ref3_reg); src_ptr += src_stride; ref0 += ref_stride; ref1 += ref_stride; ref2 += ref_stride; ref3 += ref_stride; } { __m256i sum256; __m128i sum128; // in sum_ref[] the result is saved in the first 4 bytes // the other 4 bytes are zeroed. // sum_ref1 and sum_ref3 are shifted left by 4 bytes sum_ref1 = _mm512_bslli_epi128(sum_ref1, 4); sum_ref3 = _mm512_bslli_epi128(sum_ref3, 4); // merge sum_ref0 and sum_ref1 also sum_ref2 and sum_ref3 sum_ref0 = _mm512_or_si512(sum_ref0, sum_ref1); sum_ref2 = _mm512_or_si512(sum_ref2, sum_ref3); // merge every 64 bit from each sum_ref[] sum_mlow = _mm512_unpacklo_epi64(sum_ref0, sum_ref2); sum_mhigh = _mm512_unpackhi_epi64(sum_ref0, sum_ref2); // add the low 64 bit to the high 64 bit sum_mlow = _mm512_add_epi32(sum_mlow, sum_mhigh); // add the low 128 bit to the high 128 bit sum256 = _mm256_add_epi32(_mm512_castsi512_si256(sum_mlow), _mm512_extracti32x8_epi32(sum_mlow, 1)); sum128 = _mm_add_epi32(_mm256_castsi256_si128(sum256), _mm256_extractf128_si256(sum256, 1)); _mm_storeu_si128((__m128i *)(res), sum128); } } libvpx-1.8.2/vpx_dsp/x86/sad4d_sse2.asm000066400000000000000000000165471357355204000176140ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text ; PROCESS_4x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro PROCESS_4x2x4 5-6 0 movd m0, [srcq +%2] %if %1 == 1 movd m6, [ref1q+%3] movd m4, [ref2q+%3] movd m7, [ref3q+%3] movd m5, [ref4q+%3] movd m1, [srcq +%4] movd m2, [ref1q+%5] punpckldq m0, m1 punpckldq m6, m2 movd m1, [ref2q+%5] movd m2, [ref3q+%5] movd m3, [ref4q+%5] punpckldq m4, m1 punpckldq m7, m2 punpckldq m5, m3 movlhps m0, m0 movlhps m6, m4 movlhps m7, m5 psadbw m6, m0 psadbw m7, m0 %else movd m1, [ref1q+%3] movd m5, [ref1q+%5] movd m2, [ref2q+%3] movd m4, [ref2q+%5] punpckldq m1, m5 punpckldq m2, m4 movd m3, [ref3q+%3] movd m5, [ref3q+%5] punpckldq m3, m5 movd m4, [ref4q+%3] movd m5, [ref4q+%5] punpckldq m4, m5 movd m5, [srcq +%4] punpckldq m0, m5 movlhps m0, m0 movlhps m1, m2 movlhps m3, m4 psadbw m1, m0 psadbw m3, m0 paddd m6, m1 paddd m7, m3 %endif %if %6 == 1 lea srcq, [srcq +src_strideq*2] lea ref1q, [ref1q+ref_strideq*2] lea ref2q, [ref2q+ref_strideq*2] lea ref3q, [ref3q+ref_strideq*2] lea ref4q, [ref4q+ref_strideq*2] %endif %endmacro ; PROCESS_8x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro PROCESS_8x2x4 5-6 0 movh m0, [srcq +%2] %if %1 == 1 movh m4, [ref1q+%3] movh m5, [ref2q+%3] movh m6, [ref3q+%3] movh m7, [ref4q+%3] movhps m0, [srcq +%4] movhps m4, [ref1q+%5] movhps m5, [ref2q+%5] movhps m6, [ref3q+%5] movhps m7, [ref4q+%5] psadbw m4, m0 psadbw m5, m0 psadbw m6, m0 psadbw m7, m0 %else movh m1, [ref1q+%3] movh m2, [ref2q+%3] movh m3, [ref3q+%3] movhps m0, [srcq +%4] movhps m1, [ref1q+%5] movhps m2, [ref2q+%5] movhps m3, [ref3q+%5] psadbw m1, m0 psadbw m2, m0 psadbw m3, m0 paddd m4, m1 movh m1, [ref4q+%3] movhps m1, [ref4q+%5] paddd m5, m2 paddd m6, m3 psadbw m1, m0 paddd m7, m1 %endif %if %6 == 1 lea srcq, [srcq +src_strideq*2] lea ref1q, [ref1q+ref_strideq*2] lea ref2q, [ref2q+ref_strideq*2] lea ref3q, [ref3q+ref_strideq*2] lea ref4q, [ref4q+ref_strideq*2] %endif %endmacro ; PROCESS_16x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro PROCESS_16x2x4 5-6 0 ; 1st 16 px mova m0, [srcq +%2] %if %1 == 1 movu m4, [ref1q+%3] movu m5, [ref2q+%3] movu m6, [ref3q+%3] movu m7, [ref4q+%3] psadbw m4, m0 psadbw m5, m0 psadbw m6, m0 psadbw m7, m0 %else movu m1, [ref1q+%3] movu m2, [ref2q+%3] movu m3, [ref3q+%3] psadbw m1, m0 psadbw m2, m0 psadbw m3, m0 paddd m4, m1 movu m1, [ref4q+%3] paddd m5, m2 paddd m6, m3 psadbw m1, m0 paddd m7, m1 %endif ; 2nd 16 px mova m0, [srcq +%4] movu m1, [ref1q+%5] movu m2, [ref2q+%5] movu m3, [ref3q+%5] psadbw m1, m0 psadbw m2, m0 psadbw m3, m0 paddd m4, m1 movu m1, [ref4q+%5] paddd m5, m2 paddd m6, m3 %if %6 == 1 lea srcq, [srcq +src_strideq*2] lea ref1q, [ref1q+ref_strideq*2] lea ref2q, [ref2q+ref_strideq*2] lea ref3q, [ref3q+ref_strideq*2] lea ref4q, [ref4q+ref_strideq*2] %endif psadbw m1, m0 paddd m7, m1 %endmacro ; PROCESS_32x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro PROCESS_32x2x4 5-6 0 PROCESS_16x2x4 %1, %2, %3, %2 + 16, %3 + 16 PROCESS_16x2x4 0, %4, %5, %4 + 16, %5 + 16, %6 %endmacro ; PROCESS_64x2x4 first, off_{first,second}_{src,ref}, advance_at_end %macro PROCESS_64x2x4 5-6 0 PROCESS_32x2x4 %1, %2, %3, %2 + 32, %3 + 32 PROCESS_32x2x4 0, %4, %5, %4 + 32, %5 + 32, %6 %endmacro ; void vpx_sadNxNx4d_sse2(uint8_t *src, int src_stride, ; uint8_t *ref[4], int ref_stride, ; uint32_t res[4]); ; where NxN = 64x64, 32x32, 16x16, 16x8, 8x16, 8x8, 8x4, 4x8 and 4x4 %macro SADNXN4D 2 %if UNIX64 cglobal sad%1x%2x4d, 5, 8, 8, src, src_stride, ref1, ref_stride, \ res, ref2, ref3, ref4 %else cglobal sad%1x%2x4d, 4, 7, 8, src, src_stride, ref1, ref_stride, \ ref2, ref3, ref4 %endif movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided mov ref2q, [ref1q+gprsize*1] mov ref3q, [ref1q+gprsize*2] mov ref4q, [ref1q+gprsize*3] mov ref1q, [ref1q+gprsize*0] PROCESS_%1x2x4 1, 0, 0, src_strideq, ref_strideq, 1 %rep (%2-4)/2 PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 1 %endrep PROCESS_%1x2x4 0, 0, 0, src_strideq, ref_strideq, 0 %if %1 > 4 pslldq m5, 4 pslldq m7, 4 por m4, m5 por m6, m7 mova m5, m4 mova m7, m6 punpcklqdq m4, m6 punpckhqdq m5, m7 movifnidn r4, r4mp paddd m4, m5 movu [r4], m4 RET %else movifnidn r4, r4mp pshufd m6, m6, 0x08 pshufd m7, m7, 0x08 movq [r4+0], m6 movq [r4+8], m7 RET %endif %endmacro INIT_XMM sse2 SADNXN4D 64, 64 SADNXN4D 64, 32 SADNXN4D 32, 64 SADNXN4D 32, 32 SADNXN4D 32, 16 SADNXN4D 16, 32 SADNXN4D 16, 16 SADNXN4D 16, 8 SADNXN4D 8, 16 SADNXN4D 8, 8 SADNXN4D 8, 4 SADNXN4D 4, 8 SADNXN4D 4, 4 libvpx-1.8.2/vpx_dsp/x86/sad_avx2.c000066400000000000000000000251701357355204000170220ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #define FSAD64_H(h) \ unsigned int vpx_sad64x##h##_avx2(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ int i, res; \ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \ __m256i sum_sad = _mm256_setzero_si256(); \ __m256i sum_sad_h; \ __m128i sum_sad128; \ for (i = 0; i < h; i++) { \ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \ sad1_reg = _mm256_sad_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \ sad2_reg = _mm256_sad_epu8( \ ref2_reg, _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \ sum_sad = \ _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \ ref_ptr += ref_stride; \ src_ptr += src_stride; \ } \ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \ res = _mm_cvtsi128_si32(sum_sad128); \ return res; \ } #define FSAD32_H(h) \ unsigned int vpx_sad32x##h##_avx2(const uint8_t *src_ptr, int src_stride, \ const uint8_t *ref_ptr, int ref_stride) { \ int i, res; \ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \ __m256i sum_sad = _mm256_setzero_si256(); \ __m256i sum_sad_h; \ __m128i sum_sad128; \ int ref2_stride = ref_stride << 1; \ int src2_stride = src_stride << 1; \ int max = h >> 1; \ for (i = 0; i < max; i++) { \ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \ sad1_reg = _mm256_sad_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \ sad2_reg = _mm256_sad_epu8( \ ref2_reg, \ _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \ sum_sad = \ _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \ ref_ptr += ref2_stride; \ src_ptr += src2_stride; \ } \ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \ res = _mm_cvtsi128_si32(sum_sad128); \ return res; \ } #define FSAD64 \ FSAD64_H(64); \ FSAD64_H(32); #define FSAD32 \ FSAD32_H(64); \ FSAD32_H(32); \ FSAD32_H(16); FSAD64; FSAD32; #undef FSAD64 #undef FSAD32 #undef FSAD64_H #undef FSAD32_H #define FSADAVG64_H(h) \ unsigned int vpx_sad64x##h##_avg_avx2( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ int i, res; \ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \ __m256i sum_sad = _mm256_setzero_si256(); \ __m256i sum_sad_h; \ __m128i sum_sad128; \ for (i = 0; i < h; i++) { \ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + 32)); \ ref1_reg = _mm256_avg_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)second_pred)); \ ref2_reg = _mm256_avg_epu8( \ ref2_reg, _mm256_loadu_si256((__m256i const *)(second_pred + 32))); \ sad1_reg = _mm256_sad_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \ sad2_reg = _mm256_sad_epu8( \ ref2_reg, _mm256_loadu_si256((__m256i const *)(src_ptr + 32))); \ sum_sad = \ _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \ ref_ptr += ref_stride; \ src_ptr += src_stride; \ second_pred += 64; \ } \ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \ res = _mm_cvtsi128_si32(sum_sad128); \ return res; \ } #define FSADAVG32_H(h) \ unsigned int vpx_sad32x##h##_avg_avx2( \ const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, \ int ref_stride, const uint8_t *second_pred) { \ int i, res; \ __m256i sad1_reg, sad2_reg, ref1_reg, ref2_reg; \ __m256i sum_sad = _mm256_setzero_si256(); \ __m256i sum_sad_h; \ __m128i sum_sad128; \ int ref2_stride = ref_stride << 1; \ int src2_stride = src_stride << 1; \ int max = h >> 1; \ for (i = 0; i < max; i++) { \ ref1_reg = _mm256_loadu_si256((__m256i const *)ref_ptr); \ ref2_reg = _mm256_loadu_si256((__m256i const *)(ref_ptr + ref_stride)); \ ref1_reg = _mm256_avg_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)second_pred)); \ ref2_reg = _mm256_avg_epu8( \ ref2_reg, _mm256_loadu_si256((__m256i const *)(second_pred + 32))); \ sad1_reg = _mm256_sad_epu8( \ ref1_reg, _mm256_loadu_si256((__m256i const *)src_ptr)); \ sad2_reg = _mm256_sad_epu8( \ ref2_reg, \ _mm256_loadu_si256((__m256i const *)(src_ptr + src_stride))); \ sum_sad = \ _mm256_add_epi32(sum_sad, _mm256_add_epi32(sad1_reg, sad2_reg)); \ ref_ptr += ref2_stride; \ src_ptr += src2_stride; \ second_pred += 64; \ } \ sum_sad_h = _mm256_srli_si256(sum_sad, 8); \ sum_sad = _mm256_add_epi32(sum_sad, sum_sad_h); \ sum_sad128 = _mm256_extracti128_si256(sum_sad, 1); \ sum_sad128 = _mm_add_epi32(_mm256_castsi256_si128(sum_sad), sum_sad128); \ res = _mm_cvtsi128_si32(sum_sad128); \ return res; \ } #define FSADAVG64 \ FSADAVG64_H(64); \ FSADAVG64_H(32); #define FSADAVG32 \ FSADAVG32_H(64); \ FSADAVG32_H(32); \ FSADAVG32_H(16); FSADAVG64; FSADAVG32; #undef FSADAVG64 #undef FSADAVG32 #undef FSADAVG64_H #undef FSADAVG32_H libvpx-1.8.2/vpx_dsp/x86/sad_sse2.asm000066400000000000000000000202661357355204000173550ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text %macro SAD_FN 4 %if %4 == 0 %if %3 == 5 cglobal sad%1x%2, 4, %3, 5, src, src_stride, ref, ref_stride, n_rows %else ; %3 == 7 cglobal sad%1x%2, 4, %3, 6, src, src_stride, ref, ref_stride, \ src_stride3, ref_stride3, n_rows %endif ; %3 == 5/7 %else ; avg %if %3 == 5 cglobal sad%1x%2_avg, 5, 1 + %3, 5, src, src_stride, ref, ref_stride, \ second_pred, n_rows %else ; %3 == 7 cglobal sad%1x%2_avg, 5, VPX_ARCH_X86_64 + %3, 6, src, src_stride, \ ref, ref_stride, \ second_pred, \ src_stride3, ref_stride3 %if VPX_ARCH_X86_64 %define n_rowsd r7d %else ; x86-32 %define n_rowsd dword r0m %endif ; x86-32/64 %endif ; %3 == 5/7 %endif ; avg/sad movsxdifnidn src_strideq, src_strided movsxdifnidn ref_strideq, ref_strided %if %3 == 7 lea src_stride3q, [src_strideq*3] lea ref_stride3q, [ref_strideq*3] %endif ; %3 == 7 %endmacro ; unsigned int vpx_sad64x64_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD64XN 1-2 0 SAD_FN 64, %1, 5, %2 mov n_rowsd, %1 pxor m0, m0 .loop: movu m1, [refq] movu m2, [refq+16] movu m3, [refq+32] movu m4, [refq+48] %if %2 == 1 pavgb m1, [second_predq+mmsize*0] pavgb m2, [second_predq+mmsize*1] pavgb m3, [second_predq+mmsize*2] pavgb m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif psadbw m1, [srcq] psadbw m2, [srcq+16] psadbw m3, [srcq+32] psadbw m4, [srcq+48] paddd m1, m2 paddd m3, m4 add refq, ref_strideq paddd m0, m1 add srcq, src_strideq paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 SAD64XN 64 ; sad64x64_sse2 SAD64XN 32 ; sad64x32_sse2 SAD64XN 64, 1 ; sad64x64_avg_sse2 SAD64XN 32, 1 ; sad64x32_avg_sse2 ; unsigned int vpx_sad32x32_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD32XN 1-2 0 SAD_FN 32, %1, 5, %2 mov n_rowsd, %1/2 pxor m0, m0 .loop: movu m1, [refq] movu m2, [refq+16] movu m3, [refq+ref_strideq] movu m4, [refq+ref_strideq+16] %if %2 == 1 pavgb m1, [second_predq+mmsize*0] pavgb m2, [second_predq+mmsize*1] pavgb m3, [second_predq+mmsize*2] pavgb m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif psadbw m1, [srcq] psadbw m2, [srcq+16] psadbw m3, [srcq+src_strideq] psadbw m4, [srcq+src_strideq+16] paddd m1, m2 paddd m3, m4 lea refq, [refq+ref_strideq*2] paddd m0, m1 lea srcq, [srcq+src_strideq*2] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 SAD32XN 64 ; sad32x64_sse2 SAD32XN 32 ; sad32x32_sse2 SAD32XN 16 ; sad32x16_sse2 SAD32XN 64, 1 ; sad32x64_avg_sse2 SAD32XN 32, 1 ; sad32x32_avg_sse2 SAD32XN 16, 1 ; sad32x16_avg_sse2 ; unsigned int vpx_sad16x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD16XN 1-2 0 SAD_FN 16, %1, 7, %2 mov n_rowsd, %1/4 pxor m0, m0 .loop: movu m1, [refq] movu m2, [refq+ref_strideq] movu m3, [refq+ref_strideq*2] movu m4, [refq+ref_stride3q] %if %2 == 1 pavgb m1, [second_predq+mmsize*0] pavgb m2, [second_predq+mmsize*1] pavgb m3, [second_predq+mmsize*2] pavgb m4, [second_predq+mmsize*3] lea second_predq, [second_predq+mmsize*4] %endif psadbw m1, [srcq] psadbw m2, [srcq+src_strideq] psadbw m3, [srcq+src_strideq*2] psadbw m4, [srcq+src_stride3q] paddd m1, m2 paddd m3, m4 lea refq, [refq+ref_strideq*4] paddd m0, m1 lea srcq, [srcq+src_strideq*4] paddd m0, m3 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 SAD16XN 32 ; sad16x32_sse2 SAD16XN 16 ; sad16x16_sse2 SAD16XN 8 ; sad16x8_sse2 SAD16XN 32, 1 ; sad16x32_avg_sse2 SAD16XN 16, 1 ; sad16x16_avg_sse2 SAD16XN 8, 1 ; sad16x8_avg_sse2 ; unsigned int vpx_sad8x{8,16}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD8XN 1-2 0 SAD_FN 8, %1, 7, %2 mov n_rowsd, %1/4 pxor m0, m0 .loop: movh m1, [refq] movhps m1, [refq+ref_strideq] movh m2, [refq+ref_strideq*2] movhps m2, [refq+ref_stride3q] %if %2 == 1 pavgb m1, [second_predq+mmsize*0] pavgb m2, [second_predq+mmsize*1] lea second_predq, [second_predq+mmsize*2] %endif movh m3, [srcq] movhps m3, [srcq+src_strideq] movh m4, [srcq+src_strideq*2] movhps m4, [srcq+src_stride3q] psadbw m1, m3 psadbw m2, m4 lea refq, [refq+ref_strideq*4] paddd m0, m1 lea srcq, [srcq+src_strideq*4] paddd m0, m2 dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 SAD8XN 16 ; sad8x16_sse2 SAD8XN 8 ; sad8x8_sse2 SAD8XN 4 ; sad8x4_sse2 SAD8XN 16, 1 ; sad8x16_avg_sse2 SAD8XN 8, 1 ; sad8x8_avg_sse2 SAD8XN 4, 1 ; sad8x4_avg_sse2 ; unsigned int vpx_sad4x{4, 8}_sse2(uint8_t *src, int src_stride, ; uint8_t *ref, int ref_stride); %macro SAD4XN 1-2 0 SAD_FN 4, %1, 7, %2 mov n_rowsd, %1/4 pxor m0, m0 .loop: movd m1, [refq] movd m2, [refq+ref_strideq] movd m3, [refq+ref_strideq*2] movd m4, [refq+ref_stride3q] punpckldq m1, m2 punpckldq m3, m4 movlhps m1, m3 %if %2 == 1 pavgb m1, [second_predq+mmsize*0] lea second_predq, [second_predq+mmsize*1] %endif movd m2, [srcq] movd m5, [srcq+src_strideq] movd m4, [srcq+src_strideq*2] movd m3, [srcq+src_stride3q] punpckldq m2, m5 punpckldq m4, m3 movlhps m2, m4 psadbw m1, m2 lea refq, [refq+ref_strideq*4] paddd m0, m1 lea srcq, [srcq+src_strideq*4] dec n_rowsd jg .loop movhlps m1, m0 paddd m0, m1 movd eax, m0 RET %endmacro INIT_XMM sse2 SAD4XN 8 ; sad4x8_sse SAD4XN 4 ; sad4x4_sse SAD4XN 8, 1 ; sad4x8_avg_sse SAD4XN 4, 1 ; sad4x4_avg_sse libvpx-1.8.2/vpx_dsp/x86/sad_sse3.asm000066400000000000000000000261711357355204000173570ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro STACK_FRAME_CREATE_X3 0 %if ABI_IS_32BIT %define src_ptr rsi %define src_stride rax %define ref_ptr rdi %define ref_stride rdx %define end_ptr rcx %define ret_var rbx %define result_ptr arg(4) %define height dword ptr arg(4) push rbp mov rbp, rsp push rsi push rdi push rbx mov rsi, arg(0) ; src_ptr mov rdi, arg(2) ; ref_ptr movsxd rax, dword ptr arg(1) ; src_stride movsxd rdx, dword ptr arg(3) ; ref_stride %else %if LIBVPX_YASM_WIN64 SAVE_XMM 7, u %define src_ptr rcx %define src_stride rdx %define ref_ptr r8 %define ref_stride r9 %define end_ptr r10 %define ret_var r11 %define result_ptr [rsp+xmm_stack_space+8+4*8] %define height dword ptr [rsp+xmm_stack_space+8+4*8] %else %define src_ptr rdi %define src_stride rsi %define ref_ptr rdx %define ref_stride rcx %define end_ptr r9 %define ret_var r10 %define result_ptr r8 %define height r8 %endif %endif %endmacro %macro STACK_FRAME_DESTROY_X3 0 %define src_ptr %define src_stride %define ref_ptr %define ref_stride %define end_ptr %define ret_var %define result_ptr %define height %if ABI_IS_32BIT pop rbx pop rdi pop rsi pop rbp %else %if LIBVPX_YASM_WIN64 RESTORE_XMM %endif %endif ret %endmacro %macro PROCESS_16X2X3 5 %if %1==0 movdqa xmm0, XMMWORD PTR [%2] lddqu xmm5, XMMWORD PTR [%3] lddqu xmm6, XMMWORD PTR [%3+1] lddqu xmm7, XMMWORD PTR [%3+2] psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else movdqa xmm0, XMMWORD PTR [%2] lddqu xmm1, XMMWORD PTR [%3] lddqu xmm2, XMMWORD PTR [%3+1] lddqu xmm3, XMMWORD PTR [%3+2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endif movdqa xmm0, XMMWORD PTR [%2+%4] lddqu xmm1, XMMWORD PTR [%3+%5] lddqu xmm2, XMMWORD PTR [%3+%5+1] lddqu xmm3, XMMWORD PTR [%3+%5+2] %if %1==0 || %1==1 lea %2, [%2+%4*2] lea %3, [%3+%5*2] %endif psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endmacro %macro PROCESS_8X2X3 5 %if %1==0 movq mm0, QWORD PTR [%2] movq mm5, QWORD PTR [%3] movq mm6, QWORD PTR [%3+1] movq mm7, QWORD PTR [%3+2] psadbw mm5, mm0 psadbw mm6, mm0 psadbw mm7, mm0 %else movq mm0, QWORD PTR [%2] movq mm1, QWORD PTR [%3] movq mm2, QWORD PTR [%3+1] movq mm3, QWORD PTR [%3+2] psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm5, mm1 paddw mm6, mm2 paddw mm7, mm3 %endif movq mm0, QWORD PTR [%2+%4] movq mm1, QWORD PTR [%3+%5] movq mm2, QWORD PTR [%3+%5+1] movq mm3, QWORD PTR [%3+%5+2] %if %1==0 || %1==1 lea %2, [%2+%4*2] lea %3, [%3+%5*2] %endif psadbw mm1, mm0 psadbw mm2, mm0 psadbw mm3, mm0 paddw mm5, mm1 paddw mm6, mm2 paddw mm7, mm3 %endmacro SECTION .text ;void int vpx_sad16x16x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad16x16x3_sse3) PRIVATE sym(vpx_sad16x16x3_sse3): STACK_FRAME_CREATE_X3 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 movd [rcx+8], xmm0 STACK_FRAME_DESTROY_X3 ;void int vpx_sad16x8x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad16x8x3_sse3) PRIVATE sym(vpx_sad16x8x3_sse3): STACK_FRAME_CREATE_X3 PROCESS_16X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_16X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride mov rcx, result_ptr movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 movd [rcx], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 movd [rcx+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 movd [rcx+8], xmm0 STACK_FRAME_DESTROY_X3 ;void int vpx_sad8x16x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad8x16x3_sse3) PRIVATE sym(vpx_sad8x16x3_sse3): STACK_FRAME_CREATE_X3 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride mov rcx, result_ptr punpckldq mm5, mm6 movq [rcx], mm5 movd [rcx+8], mm7 STACK_FRAME_DESTROY_X3 ;void int vpx_sad8x8x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad8x8x3_sse3) PRIVATE sym(vpx_sad8x8x3_sse3): STACK_FRAME_CREATE_X3 PROCESS_8X2X3 0, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 1, src_ptr, ref_ptr, src_stride, ref_stride PROCESS_8X2X3 2, src_ptr, ref_ptr, src_stride, ref_stride mov rcx, result_ptr punpckldq mm5, mm6 movq [rcx], mm5 movd [rcx+8], mm7 STACK_FRAME_DESTROY_X3 ;void int vpx_sad4x4x3_sse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad4x4x3_sse3) PRIVATE sym(vpx_sad4x4x3_sse3): STACK_FRAME_CREATE_X3 movd mm0, DWORD PTR [src_ptr] movd mm1, DWORD PTR [ref_ptr] movd mm2, DWORD PTR [src_ptr+src_stride] movd mm3, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm2 punpcklbw mm1, mm3 movd mm4, DWORD PTR [ref_ptr+1] movd mm5, DWORD PTR [ref_ptr+2] movd mm2, DWORD PTR [ref_ptr+ref_stride+1] movd mm3, DWORD PTR [ref_ptr+ref_stride+2] psadbw mm1, mm0 punpcklbw mm4, mm2 punpcklbw mm5, mm3 psadbw mm4, mm0 psadbw mm5, mm0 lea src_ptr, [src_ptr+src_stride*2] lea ref_ptr, [ref_ptr+ref_stride*2] movd mm0, DWORD PTR [src_ptr] movd mm2, DWORD PTR [ref_ptr] movd mm3, DWORD PTR [src_ptr+src_stride] movd mm6, DWORD PTR [ref_ptr+ref_stride] punpcklbw mm0, mm3 punpcklbw mm2, mm6 movd mm3, DWORD PTR [ref_ptr+1] movd mm7, DWORD PTR [ref_ptr+2] psadbw mm2, mm0 paddw mm1, mm2 movd mm2, DWORD PTR [ref_ptr+ref_stride+1] movd mm6, DWORD PTR [ref_ptr+ref_stride+2] punpcklbw mm3, mm2 punpcklbw mm7, mm6 psadbw mm3, mm0 psadbw mm7, mm0 paddw mm3, mm4 paddw mm7, mm5 mov rcx, result_ptr punpckldq mm1, mm3 movq [rcx], mm1 movd [rcx+8], mm7 STACK_FRAME_DESTROY_X3 libvpx-1.8.2/vpx_dsp/x86/sad_sse4.asm000066400000000000000000000231201357355204000173470ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro PROCESS_16X2X8 1 %if %1 movdqa xmm0, XMMWORD PTR [rsi] movq xmm1, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] movq xmm2, MMWORD PTR [rdi+16] punpcklqdq xmm1, xmm3 punpcklqdq xmm3, xmm2 movdqa xmm2, xmm1 mpsadbw xmm1, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 psrldq xmm0, 8 movdqa xmm4, xmm3 mpsadbw xmm3, xmm0, 0x0 mpsadbw xmm4, xmm0, 0x5 paddw xmm1, xmm2 paddw xmm1, xmm3 paddw xmm1, xmm4 %else movdqa xmm0, XMMWORD PTR [rsi] movq xmm5, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] movq xmm2, MMWORD PTR [rdi+16] punpcklqdq xmm5, xmm3 punpcklqdq xmm3, xmm2 movdqa xmm2, xmm5 mpsadbw xmm5, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 psrldq xmm0, 8 movdqa xmm4, xmm3 mpsadbw xmm3, xmm0, 0x0 mpsadbw xmm4, xmm0, 0x5 paddw xmm5, xmm2 paddw xmm5, xmm3 paddw xmm5, xmm4 paddw xmm1, xmm5 %endif movdqa xmm0, XMMWORD PTR [rsi + rax] movq xmm5, MMWORD PTR [rdi+ rdx] movq xmm3, MMWORD PTR [rdi+ rdx+8] movq xmm2, MMWORD PTR [rdi+ rdx+16] punpcklqdq xmm5, xmm3 punpcklqdq xmm3, xmm2 lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] movdqa xmm2, xmm5 mpsadbw xmm5, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 psrldq xmm0, 8 movdqa xmm4, xmm3 mpsadbw xmm3, xmm0, 0x0 mpsadbw xmm4, xmm0, 0x5 paddw xmm5, xmm2 paddw xmm5, xmm3 paddw xmm5, xmm4 paddw xmm1, xmm5 %endmacro %macro PROCESS_8X2X8 1 %if %1 movq xmm0, MMWORD PTR [rsi] movq xmm1, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] punpcklqdq xmm1, xmm3 movdqa xmm2, xmm1 mpsadbw xmm1, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 paddw xmm1, xmm2 %else movq xmm0, MMWORD PTR [rsi] movq xmm5, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] punpcklqdq xmm5, xmm3 movdqa xmm2, xmm5 mpsadbw xmm5, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 paddw xmm5, xmm2 paddw xmm1, xmm5 %endif movq xmm0, MMWORD PTR [rsi + rax] movq xmm5, MMWORD PTR [rdi+ rdx] movq xmm3, MMWORD PTR [rdi+ rdx+8] punpcklqdq xmm5, xmm3 lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] movdqa xmm2, xmm5 mpsadbw xmm5, xmm0, 0x0 mpsadbw xmm2, xmm0, 0x5 paddw xmm5, xmm2 paddw xmm1, xmm5 %endmacro %macro PROCESS_4X2X8 1 %if %1 movd xmm0, [rsi] movq xmm1, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] punpcklqdq xmm1, xmm3 mpsadbw xmm1, xmm0, 0x0 %else movd xmm0, [rsi] movq xmm5, MMWORD PTR [rdi] movq xmm3, MMWORD PTR [rdi+8] punpcklqdq xmm5, xmm3 mpsadbw xmm5, xmm0, 0x0 paddw xmm1, xmm5 %endif movd xmm0, [rsi + rax] movq xmm5, MMWORD PTR [rdi+ rdx] movq xmm3, MMWORD PTR [rdi+ rdx+8] punpcklqdq xmm5, xmm3 lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] mpsadbw xmm5, xmm0, 0x0 paddw xmm1, xmm5 %endmacro %macro WRITE_AS_INTS 0 mov rdi, arg(4) ;Results pxor xmm0, xmm0 movdqa xmm2, xmm1 punpcklwd xmm1, xmm0 punpckhwd xmm2, xmm0 movdqa [rdi], xmm1 movdqa [rdi + 16], xmm2 %endmacro SECTION .text ;void vpx_sad16x16x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array); global sym(vpx_sad16x16x8_sse4_1) PRIVATE sym(vpx_sad16x16x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_16X2X8 1 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 WRITE_AS_INTS ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void vpx_sad16x8x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); global sym(vpx_sad16x8x8_sse4_1) PRIVATE sym(vpx_sad16x8x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_16X2X8 1 PROCESS_16X2X8 0 PROCESS_16X2X8 0 PROCESS_16X2X8 0 WRITE_AS_INTS ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void vpx_sad8x8x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); global sym(vpx_sad8x8x8_sse4_1) PRIVATE sym(vpx_sad8x8x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_8X2X8 1 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 WRITE_AS_INTS ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void vpx_sad8x16x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); global sym(vpx_sad8x16x8_sse4_1) PRIVATE sym(vpx_sad8x16x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_8X2X8 1 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 PROCESS_8X2X8 0 WRITE_AS_INTS ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret ;void vpx_sad4x4x8_sse4_1( ; const unsigned char *src_ptr, ; int src_stride, ; const unsigned char *ref_ptr, ; int ref_stride, ; unsigned short *sad_array ;); global sym(vpx_sad4x4x8_sse4_1) PRIVATE sym(vpx_sad4x4x8_sse4_1): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 push rsi push rdi ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride PROCESS_4X2X8 1 PROCESS_4X2X8 0 WRITE_AS_INTS ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/sad_ssse3.asm000066400000000000000000000317401357355204000175400ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro PROCESS_16X2X3 1 %if %1 movdqa xmm0, XMMWORD PTR [rsi] lddqu xmm5, XMMWORD PTR [rdi] lddqu xmm6, XMMWORD PTR [rdi+1] lddqu xmm7, XMMWORD PTR [rdi+2] psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else movdqa xmm0, XMMWORD PTR [rsi] lddqu xmm1, XMMWORD PTR [rdi] lddqu xmm2, XMMWORD PTR [rdi+1] lddqu xmm3, XMMWORD PTR [rdi+2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endif movdqa xmm0, XMMWORD PTR [rsi+rax] lddqu xmm1, XMMWORD PTR [rdi+rdx] lddqu xmm2, XMMWORD PTR [rdi+rdx+1] lddqu xmm3, XMMWORD PTR [rdi+rdx+2] lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endmacro %macro PROCESS_16X2X3_OFFSET 2 %if %1 movdqa xmm0, XMMWORD PTR [rsi] movdqa xmm4, XMMWORD PTR [rdi] movdqa xmm7, XMMWORD PTR [rdi+16] movdqa xmm5, xmm7 palignr xmm5, xmm4, %2 movdqa xmm6, xmm7 palignr xmm6, xmm4, (%2+1) palignr xmm7, xmm4, (%2+2) psadbw xmm5, xmm0 psadbw xmm6, xmm0 psadbw xmm7, xmm0 %else movdqa xmm0, XMMWORD PTR [rsi] movdqa xmm4, XMMWORD PTR [rdi] movdqa xmm3, XMMWORD PTR [rdi+16] movdqa xmm1, xmm3 palignr xmm1, xmm4, %2 movdqa xmm2, xmm3 palignr xmm2, xmm4, (%2+1) palignr xmm3, xmm4, (%2+2) psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endif movdqa xmm0, XMMWORD PTR [rsi+rax] movdqa xmm4, XMMWORD PTR [rdi+rdx] movdqa xmm3, XMMWORD PTR [rdi+rdx+16] movdqa xmm1, xmm3 palignr xmm1, xmm4, %2 movdqa xmm2, xmm3 palignr xmm2, xmm4, (%2+1) palignr xmm3, xmm4, (%2+2) lea rsi, [rsi+rax*2] lea rdi, [rdi+rdx*2] psadbw xmm1, xmm0 psadbw xmm2, xmm0 psadbw xmm3, xmm0 paddw xmm5, xmm1 paddw xmm6, xmm2 paddw xmm7, xmm3 %endmacro %macro PROCESS_16X16X3_OFFSET 2 %2_aligned_by_%1: sub rdi, %1 PROCESS_16X2X3_OFFSET 1, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 jmp %2_store_off %endmacro %macro PROCESS_16X8X3_OFFSET 2 %2_aligned_by_%1: sub rdi, %1 PROCESS_16X2X3_OFFSET 1, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 PROCESS_16X2X3_OFFSET 0, %1 jmp %2_store_off %endmacro SECTION .text ;void int vpx_sad16x16x3_ssse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad16x16x3_ssse3) PRIVATE sym(vpx_sad16x16x3_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 push rsi push rdi push rcx ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr mov rdx, 0xf and rdx, rdi jmp .vpx_sad16x16x3_ssse3_skiptable .vpx_sad16x16x3_ssse3_jumptable: dd .vpx_sad16x16x3_ssse3_aligned_by_0 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_1 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_2 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_3 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_4 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_5 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_6 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_7 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_8 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_9 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_10 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_11 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_12 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_13 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_14 - .vpx_sad16x16x3_ssse3_do_jump dd .vpx_sad16x16x3_ssse3_aligned_by_15 - .vpx_sad16x16x3_ssse3_do_jump .vpx_sad16x16x3_ssse3_skiptable: call .vpx_sad16x16x3_ssse3_do_jump .vpx_sad16x16x3_ssse3_do_jump: pop rcx ; get the address of do_jump mov rax, .vpx_sad16x16x3_ssse3_jumptable - .vpx_sad16x16x3_ssse3_do_jump add rax, rcx ; get the absolute address of vpx_sad16x16x3_ssse3_jumptable movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable add rcx, rax movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride jmp rcx PROCESS_16X16X3_OFFSET 0, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 1, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 2, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 3, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 4, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 5, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 6, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 7, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 8, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 9, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 10, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 11, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 12, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 13, .vpx_sad16x16x3_ssse3 PROCESS_16X16X3_OFFSET 14, .vpx_sad16x16x3_ssse3 .vpx_sad16x16x3_ssse3_aligned_by_15: PROCESS_16X2X3 1 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 .vpx_sad16x16x3_ssse3_store_off: mov rdi, arg(4) ;Results movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 movd [rdi], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 movd [rdi+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 movd [rdi+8], xmm0 ; begin epilog pop rcx pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void int vpx_sad16x8x3_ssse3( ; unsigned char *src_ptr, ; int src_stride, ; unsigned char *ref_ptr, ; int ref_stride, ; int *results) global sym(vpx_sad16x8x3_ssse3) PRIVATE sym(vpx_sad16x8x3_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 5 SAVE_XMM 7 push rsi push rdi push rcx ; end prolog mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;ref_ptr mov rdx, 0xf and rdx, rdi jmp .vpx_sad16x8x3_ssse3_skiptable .vpx_sad16x8x3_ssse3_jumptable: dd .vpx_sad16x8x3_ssse3_aligned_by_0 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_1 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_2 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_3 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_4 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_5 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_6 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_7 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_8 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_9 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_10 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_11 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_12 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_13 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_14 - .vpx_sad16x8x3_ssse3_do_jump dd .vpx_sad16x8x3_ssse3_aligned_by_15 - .vpx_sad16x8x3_ssse3_do_jump .vpx_sad16x8x3_ssse3_skiptable: call .vpx_sad16x8x3_ssse3_do_jump .vpx_sad16x8x3_ssse3_do_jump: pop rcx ; get the address of do_jump mov rax, .vpx_sad16x8x3_ssse3_jumptable - .vpx_sad16x8x3_ssse3_do_jump add rax, rcx ; get the absolute address of vpx_sad16x8x3_ssse3_jumptable movsxd rax, dword [rax + 4*rdx] ; get the 32 bit offset from the jumptable add rcx, rax movsxd rax, dword ptr arg(1) ;src_stride movsxd rdx, dword ptr arg(3) ;ref_stride jmp rcx PROCESS_16X8X3_OFFSET 0, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 1, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 2, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 3, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 4, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 5, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 6, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 7, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 8, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 9, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 10, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 11, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 12, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 13, .vpx_sad16x8x3_ssse3 PROCESS_16X8X3_OFFSET 14, .vpx_sad16x8x3_ssse3 .vpx_sad16x8x3_ssse3_aligned_by_15: PROCESS_16X2X3 1 PROCESS_16X2X3 0 PROCESS_16X2X3 0 PROCESS_16X2X3 0 .vpx_sad16x8x3_ssse3_store_off: mov rdi, arg(4) ;Results movq xmm0, xmm5 psrldq xmm5, 8 paddw xmm0, xmm5 movd [rdi], xmm0 ;- movq xmm0, xmm6 psrldq xmm6, 8 paddw xmm0, xmm6 movd [rdi+4], xmm0 ;- movq xmm0, xmm7 psrldq xmm7, 8 paddw xmm0, xmm7 movd [rdi+8], xmm0 ; begin epilog pop rcx pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/ssim_opt_x86_64.asm000066400000000000000000000141441357355204000205230ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ; tabulate_ssim - sums sum_s,sum_r,sum_sq_s,sum_sq_r, sum_sxr %macro TABULATE_SSIM 0 paddusw xmm15, xmm3 ; sum_s paddusw xmm14, xmm4 ; sum_r movdqa xmm1, xmm3 pmaddwd xmm1, xmm1 paddd xmm13, xmm1 ; sum_sq_s movdqa xmm2, xmm4 pmaddwd xmm2, xmm2 paddd xmm12, xmm2 ; sum_sq_r pmaddwd xmm3, xmm4 paddd xmm11, xmm3 ; sum_sxr %endmacro ; Sum across the register %1 starting with q words %macro SUM_ACROSS_Q 1 movdqa xmm2,%1 punpckldq %1,xmm0 punpckhdq xmm2,xmm0 paddq %1,xmm2 movdqa xmm2,%1 punpcklqdq %1,xmm0 punpckhqdq xmm2,xmm0 paddq %1,xmm2 %endmacro ; Sum across the register %1 starting with q words %macro SUM_ACROSS_W 1 movdqa xmm1, %1 punpcklwd %1,xmm0 punpckhwd xmm1,xmm0 paddd %1, xmm1 SUM_ACROSS_Q %1 %endmacro SECTION .text ;void ssim_parms_sse2( ; unsigned char *s, ; int sp, ; unsigned char *r, ; int rp ; uint32_t *sum_s, ; uint32_t *sum_r, ; uint32_t *sum_sq_s, ; uint32_t *sum_sq_r, ; uint32_t *sum_sxr); ; ; TODO: Use parm passing through structure, probably don't need the pxors ; ( calling app will initialize to 0 ) could easily fit everything in sse2 ; without too much hastle, and can probably do better estimates with psadw ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. global sym(vpx_ssim_parms_16x16_sse2) PRIVATE sym(vpx_ssim_parms_16x16_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 SAVE_XMM 15 push rsi push rdi ; end prolog mov rsi, arg(0) ;s mov rcx, arg(1) ;sp mov rdi, arg(2) ;r mov rax, arg(3) ;rp pxor xmm0, xmm0 pxor xmm15,xmm15 ;sum_s pxor xmm14,xmm14 ;sum_r pxor xmm13,xmm13 ;sum_sq_s pxor xmm12,xmm12 ;sum_sq_r pxor xmm11,xmm11 ;sum_sxr mov rdx, 16 ;row counter .NextRow: ;grab source and reference pixels movdqu xmm5, [rsi] movdqu xmm6, [rdi] movdqa xmm3, xmm5 movdqa xmm4, xmm6 punpckhbw xmm3, xmm0 ; high_s punpckhbw xmm4, xmm0 ; high_r TABULATE_SSIM movdqa xmm3, xmm5 movdqa xmm4, xmm6 punpcklbw xmm3, xmm0 ; low_s punpcklbw xmm4, xmm0 ; low_r TABULATE_SSIM add rsi, rcx ; next s row add rdi, rax ; next r row dec rdx ; counter jnz .NextRow SUM_ACROSS_W xmm15 SUM_ACROSS_W xmm14 SUM_ACROSS_Q xmm13 SUM_ACROSS_Q xmm12 SUM_ACROSS_Q xmm11 mov rdi,arg(4) movd [rdi], xmm15; mov rdi,arg(5) movd [rdi], xmm14; mov rdi,arg(6) movd [rdi], xmm13; mov rdi,arg(7) movd [rdi], xmm12; mov rdi,arg(8) movd [rdi], xmm11; ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void ssim_parms_sse2( ; unsigned char *s, ; int sp, ; unsigned char *r, ; int rp ; uint32_t *sum_s, ; uint32_t *sum_r, ; uint32_t *sum_sq_s, ; uint32_t *sum_sq_r, ; uint32_t *sum_sxr); ; ; TODO: Use parm passing through structure, probably don't need the pxors ; ( calling app will initialize to 0 ) could easily fit everything in sse2 ; without too much hastle, and can probably do better estimates with psadw ; or pavgb At this point this is just meant to be first pass for calculating ; all the parms needed for 16x16 ssim so we can play with dssim as distortion ; in mode selection code. global sym(vpx_ssim_parms_8x8_sse2) PRIVATE sym(vpx_ssim_parms_8x8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 9 SAVE_XMM 15 push rsi push rdi ; end prolog mov rsi, arg(0) ;s mov rcx, arg(1) ;sp mov rdi, arg(2) ;r mov rax, arg(3) ;rp pxor xmm0, xmm0 pxor xmm15,xmm15 ;sum_s pxor xmm14,xmm14 ;sum_r pxor xmm13,xmm13 ;sum_sq_s pxor xmm12,xmm12 ;sum_sq_r pxor xmm11,xmm11 ;sum_sxr mov rdx, 8 ;row counter .NextRow: ;grab source and reference pixels movq xmm3, [rsi] movq xmm4, [rdi] punpcklbw xmm3, xmm0 ; low_s punpcklbw xmm4, xmm0 ; low_r TABULATE_SSIM add rsi, rcx ; next s row add rdi, rax ; next r row dec rdx ; counter jnz .NextRow SUM_ACROSS_W xmm15 SUM_ACROSS_W xmm14 SUM_ACROSS_Q xmm13 SUM_ACROSS_Q xmm12 SUM_ACROSS_Q xmm11 mov rdi,arg(4) movd [rdi], xmm15; mov rdi,arg(5) movd [rdi], xmm14; mov rdi,arg(6) movd [rdi], xmm13; mov rdi,arg(7) movd [rdi], xmm12; mov rdi,arg(8) movd [rdi], xmm11; ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/subpel_variance_sse2.asm000066400000000000000000001250441357355204000217500ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pw_8: times 8 dw 8 bilin_filter_m_sse2: times 8 dw 16 times 8 dw 0 times 8 dw 14 times 8 dw 2 times 8 dw 12 times 8 dw 4 times 8 dw 10 times 8 dw 6 times 16 dw 8 times 8 dw 6 times 8 dw 10 times 8 dw 4 times 8 dw 12 times 8 dw 2 times 8 dw 14 bilin_filter_m_ssse3: times 8 db 16, 0 times 8 db 14, 2 times 8 db 12, 4 times 8 db 10, 6 times 16 db 8 times 8 db 6, 10 times 8 db 4, 12 times 8 db 2, 14 SECTION .text ; int vpx_sub_pixel_varianceNxh(const uint8_t *src, ptrdiff_t src_stride, ; int x_offset, int y_offset, ; const uint8_t *ref, ptrdiff_t ref_stride, ; int height, unsigned int *sse); ; ; This function returns the SE and stores SSE in the given pointer. %macro SUM_SSE 6 ; src1, ref1, src2, ref2, sum, sse psubw %3, %4 psubw %1, %2 paddw %5, %3 pmaddwd %3, %3 paddw %5, %1 pmaddwd %1, %1 paddd %6, %3 paddd %6, %1 %endmacro %macro STORE_AND_RET 1 %if %1 > 4 ; if H=64 and W=16, we have 8 words of each 2(1bit)x64(6bit)x9bit=16bit ; in m6, i.e. it _exactly_ fits in a signed word per word in the xmm reg. ; We have to sign-extend it before adding the words within the register ; and outputing to a dword. pcmpgtw m5, m6 ; mask for 0 > x movhlps m3, m7 punpcklwd m4, m6, m5 punpckhwd m6, m5 ; sign-extend m6 word->dword paddd m7, m3 paddd m6, m4 pshufd m3, m7, 0x1 movhlps m4, m6 paddd m7, m3 paddd m6, m4 mov r1, ssem ; r1 = unsigned int *sse pshufd m4, m6, 0x1 movd [r1], m7 ; store sse paddd m6, m4 movd raxd, m6 ; store sum as return value %else ; 4xh pshuflw m4, m6, 0xe pshuflw m3, m7, 0xe paddw m6, m4 paddd m7, m3 pcmpgtw m5, m6 ; mask for 0 > x mov r1, ssem ; r1 = unsigned int *sse punpcklwd m6, m5 ; sign-extend m6 word->dword movd [r1], m7 ; store sse pshuflw m4, m6, 0xe paddd m6, m4 movd raxd, m6 ; store sum as return value %endif RET %endmacro %macro INC_SRC_BY_SRC_STRIDE 0 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 add srcq, src_stridemp %else add srcq, src_strideq %endif %endmacro %macro SUBPEL_VARIANCE 1-2 0 ; W %if cpuflag(ssse3) %define bilin_filter_m bilin_filter_m_ssse3 %define filter_idx_shift 4 %else %define bilin_filter_m bilin_filter_m_sse2 %define filter_idx_shift 5 %endif ; FIXME(rbultje) only bilinear filters use >8 registers, and ssse3 only uses ; 11, not 13, if the registers are ordered correctly. May make a minor speed ; difference on Win64 %if VPX_ARCH_X86_64 %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 9, 10, 13, src, src_stride, \ x_offset, y_offset, ref, ref_stride, \ second_pred, second_stride, height, sse %define second_str second_strideq %else cglobal sub_pixel_variance%1xh, 7, 8, 13, src, src_stride, \ x_offset, y_offset, ref, ref_stride, \ height, sse %endif %define block_height heightd %define bilin_filter sseq %else %if CONFIG_PIC=1 %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, ref, ref_stride, \ second_pred, second_stride, height, sse %define block_height dword heightm %define second_str second_stridemp %else cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, ref, ref_stride, \ height, sse %define block_height heightd %endif ; reuse argument stack space %define g_bilin_filterm x_offsetm %define g_pw_8m y_offsetm ;Store bilin_filter and pw_8 location in stack %if GET_GOT_DEFINED == 1 GET_GOT eax add esp, 4 ; restore esp %endif lea ecx, [GLOBAL(bilin_filter_m)] mov g_bilin_filterm, ecx lea ecx, [GLOBAL(pw_8)] mov g_pw_8m, ecx LOAD_IF_USED 0, 1 ; load eax, ecx back %else %if %2 == 1 ; avg cglobal sub_pixel_avg_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, \ ref, ref_stride, second_pred, second_stride, \ height, sse %define block_height dword heightm %define second_str second_stridemp %else cglobal sub_pixel_variance%1xh, 7, 7, 13, src, src_stride, \ x_offset, y_offset, ref, ref_stride, \ height, sse %define block_height heightd %endif %define bilin_filter bilin_filter_m %endif %endif %if %1 == 4 %define movx movd %else %define movx movh %endif ASSERT %1 <= 16 ; m6 overflows if w > 16 pxor m6, m6 ; sum pxor m7, m7 ; sse ; FIXME(rbultje) if both filters are bilinear, we don't actually use m5; we ; could perhaps use it for something more productive then pxor m5, m5 ; dedicated zero register %if %1 < 16 sar block_height, 1 %if %2 == 1 ; avg shl second_str, 1 %endif %endif ; FIXME(rbultje) replace by jumptable? test x_offsetd, x_offsetd jnz .x_nonzero ; x_offset == 0 test y_offsetd, y_offsetd jnz .x_zero_y_nonzero ; x_offset == 0 && y_offset == 0 .x_zero_y_zero_loop: %if %1 == 16 movu m0, [srcq] mova m1, [refq] %if %2 == 1 ; avg pavgb m0, [second_predq] punpckhbw m3, m1, m5 punpcklbw m1, m5 %endif punpckhbw m2, m0, m5 punpcklbw m0, m5 %if %2 == 0 ; !avg punpckhbw m3, m1, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] %if %2 == 1 ; avg %if %1 > 4 movhps m0, [srcq+src_strideq] %else ; 4xh movx m1, [srcq+src_strideq] punpckldq m0, m1 %endif %else ; !avg movx m2, [srcq+src_strideq] %endif movx m1, [refq] movx m3, [refq+ref_strideq] %if %2 == 1 ; avg %if %1 > 4 pavgb m0, [second_predq] %else movh m2, [second_predq] pavgb m0, m2 %endif punpcklbw m3, m5 punpcklbw m1, m5 %if %1 > 4 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh punpcklbw m0, m5 movhlps m2, m0 %endif %else ; !avg punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_zero_y_zero_loop STORE_AND_RET %1 .x_zero_y_nonzero: cmp y_offsetd, 4 jne .x_zero_y_nonhalf ; x_offset == 0 && y_offset == 0.5 .x_zero_y_half_loop: %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] mova m1, [refq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg pavgb m0, [second_predq] %endif punpcklbw m1, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m2, [srcq+src_strideq] %if %2 == 1 ; avg %if %1 > 4 movhps m2, [srcq+src_strideq*2] %else ; 4xh movx m1, [srcq+src_strideq*2] punpckldq m2, m1 %endif movx m1, [refq] %if %1 > 4 movlhps m0, m2 %else ; 4xh punpckldq m0, m2 %endif movx m3, [refq+ref_strideq] pavgb m0, m2 punpcklbw m1, m5 %if %1 > 4 pavgb m0, [second_predq] punpcklbw m3, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh movh m4, [second_predq] pavgb m0, m4 punpcklbw m3, m5 punpcklbw m0, m5 movhlps m2, m0 %endif %else ; !avg movx m4, [srcq+src_strideq*2] movx m1, [refq] pavgb m0, m2 movx m3, [refq+ref_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_zero_y_half_loop STORE_AND_RET %1 .x_zero_y_nonhalf: ; x_offset == 0 && y_offset == bilin interpolation %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] %endif mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ; x86-32 or mmx %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0, reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif .x_zero_y_other_loop: %if %1 == 16 movu m0, [srcq] movu m4, [srcq+src_strideq] mova m1, [refq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 pmaddubsw m2, filter_y_a pmaddubsw m0, filter_y_a paddw m2, filter_rnd paddw m0, filter_rnd %else punpckhbw m2, m0, m5 punpckhbw m3, m4, m5 punpcklbw m0, m5 punpcklbw m4, m5 ; FIXME(rbultje) instead of out=((num-x)*in1+x*in2+rnd)>>log2(num), we can ; also do out=in1+(((num-x)*(in2-in1)+rnd)>>log2(num)). Total number of ; instructions is the same (5), but it is 1 mul instead of 2, so might be ; slightly faster because of pmullw latency. It would also cut our rodata ; tables in half for this function, and save 1-2 registers on x86-64. pmullw m2, filter_y_a pmullw m3, filter_y_b paddw m2, filter_rnd pmullw m0, filter_y_a pmullw m4, filter_y_b paddw m0, filter_rnd paddw m2, m3 paddw m0, m4 %endif psraw m2, 4 psraw m0, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif punpckhbw m3, m1, m5 punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m2, [srcq+src_strideq] movx m4, [srcq+src_strideq*2] movx m3, [refq+ref_strideq] %if cpuflag(ssse3) movx m1, [refq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a pmaddubsw m2, filter_y_a punpcklbw m3, m5 paddw m2, filter_rnd paddw m0, filter_rnd %else punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m4, m5 pmullw m0, filter_y_a pmullw m1, m2, filter_y_b punpcklbw m3, m5 paddw m0, filter_rnd pmullw m2, filter_y_a pmullw m4, filter_y_b paddw m0, m1 paddw m2, filter_rnd movx m1, [refq] paddw m2, m4 %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline %if %1 == 4 movlhps m0, m2 %endif packuswb m0, m2 %if %1 > 4 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 %endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_zero_y_other_loop %undef filter_y_a %undef filter_y_b %undef filter_rnd STORE_AND_RET %1 .x_nonzero: cmp x_offsetd, 4 jne .x_nonhalf ; x_offset == 0.5 test y_offsetd, y_offsetd jnz .x_half_y_nonzero ; x_offset == 0.5 && y_offset == 0 .x_half_y_zero_loop: %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] mova m1, [refq] pavgb m0, m4 punpckhbw m3, m1, m5 %if %2 == 1 ; avg pavgb m0, [second_predq] %endif punpcklbw m1, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m4, [srcq+1] %if %2 == 1 ; avg %if %1 > 4 movhps m0, [srcq+src_strideq] movhps m4, [srcq+src_strideq+1] %else ; 4xh movx m1, [srcq+src_strideq] punpckldq m0, m1 movx m2, [srcq+src_strideq+1] punpckldq m4, m2 %endif movx m1, [refq] movx m3, [refq+ref_strideq] pavgb m0, m4 punpcklbw m3, m5 %if %1 > 4 pavgb m0, [second_predq] punpcklbw m1, m5 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else ; 4xh movh m2, [second_predq] pavgb m0, m2 punpcklbw m1, m5 punpcklbw m0, m5 movhlps m2, m0 %endif %else ; !avg movx m2, [srcq+src_strideq] movx m1, [refq] pavgb m0, m4 movx m4, [srcq+src_strideq+1] movx m3, [refq+ref_strideq] pavgb m2, m4 punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_half_y_zero_loop STORE_AND_RET %1 .x_half_y_nonzero: cmp y_offsetd, 4 jne .x_half_y_nonhalf ; x_offset == 0.5 && y_offset == 0.5 %if %1 == 16 movu m0, [srcq] movu m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 .x_half_y_half_loop: movu m4, [srcq] movu m3, [srcq+1] mova m1, [refq] pavgb m4, m3 punpckhbw m3, m1, m5 pavgb m0, m4 %if %2 == 1 ; avg punpcklbw m1, m5 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else punpckhbw m2, m0, m5 punpcklbw m0, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 .x_half_y_half_loop: movx m2, [srcq] movx m3, [srcq+1] %if %2 == 1 ; avg %if %1 > 4 movhps m2, [srcq+src_strideq] movhps m3, [srcq+src_strideq+1] %else movx m1, [srcq+src_strideq] punpckldq m2, m1 movx m1, [srcq+src_strideq+1] punpckldq m3, m1 %endif pavgb m2, m3 %if %1 > 4 movlhps m0, m2 movhlps m4, m2 %else ; 4xh punpckldq m0, m2 pshuflw m4, m2, 0xe %endif movx m1, [refq] pavgb m0, m2 movx m3, [refq+ref_strideq] %if %1 > 4 pavgb m0, [second_predq] %else movh m2, [second_predq] pavgb m0, m2 %endif punpcklbw m3, m5 punpcklbw m1, m5 %if %1 > 4 punpckhbw m2, m0, m5 punpcklbw m0, m5 %else punpcklbw m0, m5 movhlps m2, m0 %endif %else ; !avg movx m4, [srcq+src_strideq] movx m1, [srcq+src_strideq+1] pavgb m2, m3 pavgb m4, m1 pavgb m0, m2 pavgb m2, m4 movx m1, [refq] movx m3, [refq+ref_strideq] punpcklbw m0, m5 punpcklbw m2, m5 punpcklbw m3, m5 punpcklbw m1, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_half_y_half_loop STORE_AND_RET %1 .x_half_y_nonhalf: ; x_offset == 0.5 && y_offset == bilin interpolation %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+y_offsetq+16] %endif mova m10, [GLOBAL(pw_8)] %define filter_y_a m8 %define filter_y_b m9 %define filter_rnd m10 %else ;x86_32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; x_offset == 0.5. We can reuse x_offset reg %define tempq x_offsetq add y_offsetq, g_bilin_filterm %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add y_offsetq, bilin_filter %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif %if %1 == 16 movu m0, [srcq] movu m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 .x_half_y_other_loop: movu m4, [srcq] movu m2, [srcq+1] mova m1, [refq] pavgb m4, m2 %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 pmaddubsw m2, filter_y_a pmaddubsw m0, filter_y_a paddw m2, filter_rnd paddw m0, filter_rnd psraw m2, 4 %else punpckhbw m2, m0, m5 punpckhbw m3, m4, m5 pmullw m2, filter_y_a pmullw m3, filter_y_b paddw m2, filter_rnd punpcklbw m0, m5 paddw m2, m3 punpcklbw m3, m4, m5 pmullw m0, filter_y_a pmullw m3, filter_y_b paddw m0, filter_rnd psraw m2, 4 paddw m0, m3 %endif punpckhbw m3, m1, m5 psraw m0, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m3, [srcq+1] add srcq, src_strideq pavgb m0, m3 %if notcpuflag(ssse3) punpcklbw m0, m5 %endif .x_half_y_other_loop: movx m2, [srcq] movx m1, [srcq+1] movx m4, [srcq+src_strideq] movx m3, [srcq+src_strideq+1] pavgb m2, m1 pavgb m4, m3 movx m3, [refq+ref_strideq] %if cpuflag(ssse3) movx m1, [refq] punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a pmaddubsw m2, filter_y_a punpcklbw m3, m5 paddw m0, filter_rnd paddw m2, filter_rnd %else punpcklbw m2, m5 punpcklbw m4, m5 pmullw m0, filter_y_a pmullw m1, m2, filter_y_b punpcklbw m3, m5 paddw m0, filter_rnd pmullw m2, filter_y_a paddw m0, m1 pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m2, m1 movx m1, [refq] %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline %if %1 == 4 movlhps m0, m2 %endif packuswb m0, m2 %if %1 > 4 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 %endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_half_y_other_loop %undef filter_y_a %undef filter_y_b %undef filter_rnd STORE_AND_RET %1 .x_nonhalf: test y_offsetd, y_offsetd jnz .x_nonhalf_y_nonzero ; x_offset == bilin interpolation && y_offset == 0 %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] %endif mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ;y_offset == 0. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif .x_other_y_zero_loop: %if %1 == 16 movu m0, [srcq] movu m4, [srcq+1] mova m1, [refq] %if cpuflag(ssse3) punpckhbw m2, m0, m4 punpcklbw m0, m4 pmaddubsw m2, filter_x_a pmaddubsw m0, filter_x_a paddw m2, filter_rnd paddw m0, filter_rnd %else punpckhbw m2, m0, m5 punpckhbw m3, m4, m5 punpcklbw m0, m5 punpcklbw m4, m5 pmullw m2, filter_x_a pmullw m3, filter_x_b paddw m2, filter_rnd pmullw m0, filter_x_a pmullw m4, filter_x_b paddw m0, filter_rnd paddw m2, m3 paddw m0, m4 %endif psraw m2, 4 psraw m0, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif punpckhbw m3, m1, m5 punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] movx m2, [srcq+src_strideq] movx m4, [srcq+src_strideq+1] movx m3, [refq+ref_strideq] %if cpuflag(ssse3) punpcklbw m0, m1 movx m1, [refq] punpcklbw m2, m4 pmaddubsw m0, filter_x_a pmaddubsw m2, filter_x_a punpcklbw m3, m5 paddw m0, filter_rnd paddw m2, filter_rnd %else punpcklbw m0, m5 punpcklbw m1, m5 punpcklbw m2, m5 punpcklbw m4, m5 pmullw m0, filter_x_a pmullw m1, filter_x_b punpcklbw m3, m5 paddw m0, filter_rnd pmullw m2, filter_x_a pmullw m4, filter_x_b paddw m0, m1 paddw m2, filter_rnd movx m1, [refq] paddw m2, m4 %endif psraw m0, 4 psraw m2, 4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline %if %1 == 4 movlhps m0, m2 %endif packuswb m0, m2 %if %1 > 4 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 %endif %endif punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_other_y_zero_loop %undef filter_x_a %undef filter_x_b %undef filter_rnd STORE_AND_RET %1 .x_nonhalf_y_nonzero: cmp y_offsetd, 4 jne .x_nonhalf_y_nonhalf ; x_offset == bilin interpolation && y_offset == 0.5 %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] %endif mova m10, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_rnd m10 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; y_offset == 0.5. We can reuse y_offset reg. %define tempq y_offsetq add x_offsetq, g_bilin_filterm %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif %if %1 == 16 movu m0, [srcq] movu m1, [srcq+1] %if cpuflag(ssse3) punpckhbw m2, m0, m1 punpcklbw m0, m1 pmaddubsw m2, filter_x_a pmaddubsw m0, filter_x_a paddw m2, filter_rnd paddw m0, filter_rnd %else punpckhbw m2, m0, m5 punpckhbw m3, m1, m5 punpcklbw m0, m5 punpcklbw m1, m5 pmullw m0, filter_x_a pmullw m1, filter_x_b paddw m0, filter_rnd pmullw m2, filter_x_a pmullw m3, filter_x_b paddw m2, filter_rnd paddw m0, m1 paddw m2, m3 %endif psraw m0, 4 psraw m2, 4 add srcq, src_strideq packuswb m0, m2 .x_other_y_half_loop: movu m4, [srcq] movu m3, [srcq+1] %if cpuflag(ssse3) mova m1, [refq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a paddw m2, filter_rnd paddw m4, filter_rnd psraw m2, 4 psraw m4, 4 packuswb m4, m2 pavgb m0, m4 punpckhbw m3, m1, m5 punpcklbw m1, m5 %else punpckhbw m2, m4, m5 punpckhbw m1, m3, m5 punpcklbw m4, m5 punpcklbw m3, m5 pmullw m4, filter_x_a pmullw m3, filter_x_b paddw m4, filter_rnd pmullw m2, filter_x_a pmullw m1, filter_x_b paddw m2, filter_rnd paddw m4, m3 paddw m2, m1 mova m1, [refq] psraw m4, 4 psraw m2, 4 punpckhbw m3, m1, m5 ; FIXME(rbultje) the repeated pack/unpack here around m0/m2 is because we ; have a 1-register shortage to be able to store the backup of the bilin ; filtered second line as words as cache for the next line. Packing into ; a byte costs 1 pack and 2 unpacks, but saves a register. packuswb m4, m2 punpcklbw m1, m5 pavgb m0, m4 %endif %if %2 == 1 ; avg ; FIXME(rbultje) pipeline pavgb m0, [second_predq] %endif punpckhbw m2, m0, m5 punpcklbw m0, m5 SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 add srcq, src_strideq add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] %if cpuflag(ssse3) punpcklbw m0, m1 pmaddubsw m0, filter_x_a paddw m0, filter_rnd %else punpcklbw m0, m5 punpcklbw m1, m5 pmullw m0, filter_x_a pmullw m1, filter_x_b paddw m0, filter_rnd paddw m0, m1 %endif add srcq, src_strideq psraw m0, 4 .x_other_y_half_loop: movx m2, [srcq] movx m1, [srcq+1] movx m4, [srcq+src_strideq] movx m3, [srcq+src_strideq+1] %if cpuflag(ssse3) punpcklbw m2, m1 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a movx m1, [refq] movx m3, [refq+ref_strideq] paddw m2, filter_rnd paddw m4, filter_rnd %else punpcklbw m2, m5 punpcklbw m1, m5 punpcklbw m4, m5 punpcklbw m3, m5 pmullw m2, filter_x_a pmullw m1, filter_x_b paddw m2, filter_rnd pmullw m4, filter_x_a pmullw m3, filter_x_b paddw m4, filter_rnd paddw m2, m1 movx m1, [refq] paddw m4, m3 movx m3, [refq+ref_strideq] %endif psraw m2, 4 psraw m4, 4 pavgw m0, m2 pavgw m2, m4 %if %2 == 1 ; avg ; FIXME(rbultje) pipeline - also consider going to bytes here %if %1 == 4 movlhps m0, m2 %endif packuswb m0, m2 %if %1 > 4 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 %endif %endif punpcklbw m3, m5 punpcklbw m1, m5 SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 lea srcq, [srcq+src_strideq*2] lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_other_y_half_loop %undef filter_x_a %undef filter_x_b %undef filter_rnd STORE_AND_RET %1 .x_nonhalf_y_nonhalf: %if VPX_ARCH_X86_64 lea bilin_filter, [GLOBAL(bilin_filter_m)] %endif shl x_offsetd, filter_idx_shift shl y_offsetd, filter_idx_shift %if VPX_ARCH_X86_64 && %1 > 4 mova m8, [bilin_filter+x_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m9, [bilin_filter+x_offsetq+16] %endif mova m10, [bilin_filter+y_offsetq] %if notcpuflag(ssse3) ; FIXME(rbultje) don't scatter registers on x86-64 mova m11, [bilin_filter+y_offsetq+16] %endif mova m12, [GLOBAL(pw_8)] %define filter_x_a m8 %define filter_x_b m9 %define filter_y_a m10 %define filter_y_b m11 %define filter_rnd m12 %else ; x86-32 %if VPX_ARCH_X86=1 && CONFIG_PIC=1 ; In this case, there is NO unused register. Used src_stride register. Later, ; src_stride has to be loaded from stack when it is needed. %define tempq src_strideq mov tempq, g_bilin_filterm add x_offsetq, tempq add y_offsetq, tempq %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] mov tempq, g_pw_8m %define filter_rnd [tempq] %else add x_offsetq, bilin_filter add y_offsetq, bilin_filter %define filter_x_a [x_offsetq] %define filter_x_b [x_offsetq+16] %define filter_y_a [y_offsetq] %define filter_y_b [y_offsetq+16] %define filter_rnd [GLOBAL(pw_8)] %endif %endif ; x_offset == bilin interpolation && y_offset == bilin interpolation %if %1 == 16 movu m0, [srcq] movu m1, [srcq+1] %if cpuflag(ssse3) punpckhbw m2, m0, m1 punpcklbw m0, m1 pmaddubsw m2, filter_x_a pmaddubsw m0, filter_x_a paddw m2, filter_rnd paddw m0, filter_rnd %else punpckhbw m2, m0, m5 punpckhbw m3, m1, m5 punpcklbw m0, m5 punpcklbw m1, m5 pmullw m0, filter_x_a pmullw m1, filter_x_b paddw m0, filter_rnd pmullw m2, filter_x_a pmullw m3, filter_x_b paddw m2, filter_rnd paddw m0, m1 paddw m2, m3 %endif psraw m0, 4 psraw m2, 4 INC_SRC_BY_SRC_STRIDE packuswb m0, m2 .x_other_y_other_loop: %if cpuflag(ssse3) movu m4, [srcq] movu m3, [srcq+1] mova m1, [refq] punpckhbw m2, m4, m3 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a punpckhbw m3, m1, m5 paddw m2, filter_rnd paddw m4, filter_rnd psraw m2, 4 psraw m4, 4 packuswb m4, m2 punpckhbw m2, m0, m4 punpcklbw m0, m4 pmaddubsw m2, filter_y_a pmaddubsw m0, filter_y_a punpcklbw m1, m5 paddw m2, filter_rnd paddw m0, filter_rnd psraw m2, 4 psraw m0, 4 %else movu m3, [srcq] movu m4, [srcq+1] punpckhbw m1, m3, m5 punpckhbw m2, m4, m5 punpcklbw m3, m5 punpcklbw m4, m5 pmullw m3, filter_x_a pmullw m4, filter_x_b paddw m3, filter_rnd pmullw m1, filter_x_a pmullw m2, filter_x_b paddw m1, filter_rnd paddw m3, m4 paddw m1, m2 psraw m3, 4 psraw m1, 4 packuswb m4, m3, m1 punpckhbw m2, m0, m5 punpcklbw m0, m5 pmullw m2, filter_y_a pmullw m1, filter_y_b paddw m2, filter_rnd pmullw m0, filter_y_a pmullw m3, filter_y_b paddw m2, m1 mova m1, [refq] paddw m0, filter_rnd psraw m2, 4 paddw m0, m3 punpckhbw m3, m1, m5 psraw m0, 4 punpcklbw m1, m5 %endif %if %2 == 1 ; avg ; FIXME(rbultje) pipeline packuswb m0, m2 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %endif SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 INC_SRC_BY_SRC_STRIDE add refq, ref_strideq %else ; %1 < 16 movx m0, [srcq] movx m1, [srcq+1] %if cpuflag(ssse3) punpcklbw m0, m1 pmaddubsw m0, filter_x_a paddw m0, filter_rnd %else punpcklbw m0, m5 punpcklbw m1, m5 pmullw m0, filter_x_a pmullw m1, filter_x_b paddw m0, filter_rnd paddw m0, m1 %endif psraw m0, 4 %if cpuflag(ssse3) packuswb m0, m0 %endif INC_SRC_BY_SRC_STRIDE .x_other_y_other_loop: movx m2, [srcq] movx m1, [srcq+1] INC_SRC_BY_SRC_STRIDE movx m4, [srcq] movx m3, [srcq+1] %if cpuflag(ssse3) punpcklbw m2, m1 punpcklbw m4, m3 pmaddubsw m2, filter_x_a pmaddubsw m4, filter_x_a movx m3, [refq+ref_strideq] movx m1, [refq] paddw m2, filter_rnd paddw m4, filter_rnd psraw m2, 4 psraw m4, 4 packuswb m2, m2 packuswb m4, m4 punpcklbw m0, m2 punpcklbw m2, m4 pmaddubsw m0, filter_y_a pmaddubsw m2, filter_y_a punpcklbw m3, m5 paddw m0, filter_rnd paddw m2, filter_rnd psraw m0, 4 psraw m2, 4 punpcklbw m1, m5 %else punpcklbw m2, m5 punpcklbw m1, m5 punpcklbw m4, m5 punpcklbw m3, m5 pmullw m2, filter_x_a pmullw m1, filter_x_b paddw m2, filter_rnd pmullw m4, filter_x_a pmullw m3, filter_x_b paddw m4, filter_rnd paddw m2, m1 paddw m4, m3 psraw m2, 4 psraw m4, 4 pmullw m0, filter_y_a pmullw m3, m2, filter_y_b paddw m0, filter_rnd pmullw m2, filter_y_a pmullw m1, m4, filter_y_b paddw m2, filter_rnd paddw m0, m3 movx m3, [refq+ref_strideq] paddw m2, m1 movx m1, [refq] psraw m0, 4 psraw m2, 4 punpcklbw m3, m5 punpcklbw m1, m5 %endif %if %2 == 1 ; avg ; FIXME(rbultje) pipeline %if %1 == 4 movlhps m0, m2 %endif packuswb m0, m2 %if %1 > 4 pavgb m0, [second_predq] punpckhbw m2, m0, m5 punpcklbw m0, m5 %else movh m2, [second_predq] pavgb m0, m2 punpcklbw m0, m5 movhlps m2, m0 %endif %endif SUM_SSE m0, m1, m2, m3, m6, m7 mova m0, m4 INC_SRC_BY_SRC_STRIDE lea refq, [refq+ref_strideq*2] %endif %if %2 == 1 ; avg add second_predq, second_str %endif dec block_height jg .x_other_y_other_loop %undef filter_x_a %undef filter_x_b %undef filter_y_a %undef filter_y_b %undef filter_rnd %undef movx STORE_AND_RET %1 %endmacro ; FIXME(rbultje) the non-bilinear versions (i.e. x=0,8&&y=0,8) are identical ; between the ssse3 and non-ssse3 version. It may make sense to merge their ; code in the sense that the ssse3 version would jump to the appropriate ; location in the sse/2 version, rather than duplicating that code in the ; binary. INIT_XMM sse2 SUBPEL_VARIANCE 4 SUBPEL_VARIANCE 8 SUBPEL_VARIANCE 16 INIT_XMM ssse3 SUBPEL_VARIANCE 4 SUBPEL_VARIANCE 8 SUBPEL_VARIANCE 16 INIT_XMM sse2 SUBPEL_VARIANCE 4, 1 SUBPEL_VARIANCE 8, 1 SUBPEL_VARIANCE 16, 1 INIT_XMM ssse3 SUBPEL_VARIANCE 4, 1 SUBPEL_VARIANCE 8, 1 SUBPEL_VARIANCE 16, 1 libvpx-1.8.2/vpx_dsp/x86/subtract_sse2.asm000066400000000000000000000072751357355204000204420ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text ; void vpx_subtract_block(int rows, int cols, ; int16_t *diff, ptrdiff_t diff_stride, ; const uint8_t *src, ptrdiff_t src_stride, ; const uint8_t *pred, ptrdiff_t pred_stride) INIT_XMM sse2 cglobal subtract_block, 7, 7, 8, \ rows, cols, diff, diff_stride, src, src_stride, \ pred, pred_stride %define pred_str colsq pxor m7, m7 ; dedicated zero register cmp colsd, 4 je .case_4 cmp colsd, 8 je .case_8 cmp colsd, 16 je .case_16 cmp colsd, 32 je .case_32 %macro loop16 6 mova m0, [srcq+%1] mova m4, [srcq+%2] mova m1, [predq+%3] mova m5, [predq+%4] punpckhbw m2, m0, m7 punpckhbw m3, m1, m7 punpcklbw m0, m7 punpcklbw m1, m7 psubw m2, m3 psubw m0, m1 punpckhbw m1, m4, m7 punpckhbw m3, m5, m7 punpcklbw m4, m7 punpcklbw m5, m7 psubw m1, m3 psubw m4, m5 mova [diffq+mmsize*0+%5], m0 mova [diffq+mmsize*1+%5], m2 mova [diffq+mmsize*0+%6], m4 mova [diffq+mmsize*1+%6], m1 %endmacro mov pred_str, pred_stridemp .loop_64: loop16 0*mmsize, 1*mmsize, 0*mmsize, 1*mmsize, 0*mmsize, 2*mmsize loop16 2*mmsize, 3*mmsize, 2*mmsize, 3*mmsize, 4*mmsize, 6*mmsize lea diffq, [diffq+diff_strideq*2] add predq, pred_str add srcq, src_strideq dec rowsd jg .loop_64 RET .case_32: mov pred_str, pred_stridemp .loop_32: loop16 0, mmsize, 0, mmsize, 0, 2*mmsize lea diffq, [diffq+diff_strideq*2] add predq, pred_str add srcq, src_strideq dec rowsd jg .loop_32 RET .case_16: mov pred_str, pred_stridemp .loop_16: loop16 0, src_strideq, 0, pred_str, 0, diff_strideq*2 lea diffq, [diffq+diff_strideq*4] lea predq, [predq+pred_str*2] lea srcq, [srcq+src_strideq*2] sub rowsd, 2 jg .loop_16 RET %macro loop_h 0 movh m0, [srcq] movh m2, [srcq+src_strideq] movh m1, [predq] movh m3, [predq+pred_str] punpcklbw m0, m7 punpcklbw m1, m7 punpcklbw m2, m7 punpcklbw m3, m7 psubw m0, m1 psubw m2, m3 mova [diffq], m0 mova [diffq+diff_strideq*2], m2 %endmacro .case_8: mov pred_str, pred_stridemp .loop_8: loop_h lea diffq, [diffq+diff_strideq*4] lea srcq, [srcq+src_strideq*2] lea predq, [predq+pred_str*2] sub rowsd, 2 jg .loop_8 RET INIT_MMX .case_4: mov pred_str, pred_stridemp .loop_4: loop_h lea diffq, [diffq+diff_strideq*4] lea srcq, [srcq+src_strideq*2] lea predq, [predq+pred_str*2] sub rowsd, 2 jg .loop_4 RET libvpx-1.8.2/vpx_dsp/x86/sum_squares_sse2.c000066400000000000000000000075161357355204000206220ustar00rootroot00000000000000/* * Copyright (c) 2016 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/mem_sse2.h" uint64_t vpx_sum_squares_2d_i16_sse2(const int16_t *src, int stride, int size) { // Over 75% of all calls are with size == 4. if (size == 4) { __m128i s[2], sq[2], ss; s[0] = _mm_loadl_epi64((const __m128i *)(src + 0 * stride)); s[0] = loadh_epi64(s[0], src + 1 * stride); s[1] = _mm_loadl_epi64((const __m128i *)(src + 2 * stride)); s[1] = loadh_epi64(s[1], src + 3 * stride); sq[0] = _mm_madd_epi16(s[0], s[0]); sq[1] = _mm_madd_epi16(s[1], s[1]); sq[0] = _mm_add_epi32(sq[0], sq[1]); ss = _mm_add_epi32(sq[0], _mm_srli_si128(sq[0], 8)); ss = _mm_add_epi32(ss, _mm_srli_epi64(ss, 32)); return (uint64_t)_mm_cvtsi128_si32(ss); } else { // Generic case int r = size; const __m128i v_zext_mask_q = _mm_set_epi32(0, 0xffffffff, 0, 0xffffffff); __m128i v_acc_q = _mm_setzero_si128(); assert(size % 8 == 0); do { int c = 0; __m128i v_acc_d = _mm_setzero_si128(); do { const int16_t *const b = src + c; const __m128i v_val_0_w = _mm_load_si128((const __m128i *)(b + 0 * stride)); const __m128i v_val_1_w = _mm_load_si128((const __m128i *)(b + 1 * stride)); const __m128i v_val_2_w = _mm_load_si128((const __m128i *)(b + 2 * stride)); const __m128i v_val_3_w = _mm_load_si128((const __m128i *)(b + 3 * stride)); const __m128i v_val_4_w = _mm_load_si128((const __m128i *)(b + 4 * stride)); const __m128i v_val_5_w = _mm_load_si128((const __m128i *)(b + 5 * stride)); const __m128i v_val_6_w = _mm_load_si128((const __m128i *)(b + 6 * stride)); const __m128i v_val_7_w = _mm_load_si128((const __m128i *)(b + 7 * stride)); const __m128i v_sq_0_d = _mm_madd_epi16(v_val_0_w, v_val_0_w); const __m128i v_sq_1_d = _mm_madd_epi16(v_val_1_w, v_val_1_w); const __m128i v_sq_2_d = _mm_madd_epi16(v_val_2_w, v_val_2_w); const __m128i v_sq_3_d = _mm_madd_epi16(v_val_3_w, v_val_3_w); const __m128i v_sq_4_d = _mm_madd_epi16(v_val_4_w, v_val_4_w); const __m128i v_sq_5_d = _mm_madd_epi16(v_val_5_w, v_val_5_w); const __m128i v_sq_6_d = _mm_madd_epi16(v_val_6_w, v_val_6_w); const __m128i v_sq_7_d = _mm_madd_epi16(v_val_7_w, v_val_7_w); const __m128i v_sum_01_d = _mm_add_epi32(v_sq_0_d, v_sq_1_d); const __m128i v_sum_23_d = _mm_add_epi32(v_sq_2_d, v_sq_3_d); const __m128i v_sum_45_d = _mm_add_epi32(v_sq_4_d, v_sq_5_d); const __m128i v_sum_67_d = _mm_add_epi32(v_sq_6_d, v_sq_7_d); const __m128i v_sum_0123_d = _mm_add_epi32(v_sum_01_d, v_sum_23_d); const __m128i v_sum_4567_d = _mm_add_epi32(v_sum_45_d, v_sum_67_d); v_acc_d = _mm_add_epi32(v_acc_d, v_sum_0123_d); v_acc_d = _mm_add_epi32(v_acc_d, v_sum_4567_d); c += 8; } while (c < size); v_acc_q = _mm_add_epi64(v_acc_q, _mm_and_si128(v_acc_d, v_zext_mask_q)); v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_epi64(v_acc_d, 32)); src += 8 * stride; r -= 8; } while (r); v_acc_q = _mm_add_epi64(v_acc_q, _mm_srli_si128(v_acc_q, 8)); #if VPX_ARCH_X86_64 return (uint64_t)_mm_cvtsi128_si64(v_acc_q); #else { uint64_t tmp; _mm_storel_epi64((__m128i *)&tmp, v_acc_q); return tmp; } #endif } } libvpx-1.8.2/vpx_dsp/x86/transpose_sse2.h000066400000000000000000000312661357355204000202750ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ #define VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ #include // SSE2 #include "./vpx_config.h" static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) { // Unpack 16 bit elements. Goes from: // in[0]: 00 01 02 03 // in[1]: 10 11 12 13 // in[2]: 20 21 22 23 // in[3]: 30 31 32 33 // to: // a0: 00 10 01 11 02 12 03 13 // a1: 20 30 21 31 22 32 23 33 const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]); // Unpack 32 bit elements resulting in: // 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 return _mm_unpacklo_epi16(a0, a1); } static INLINE void transpose_8bit_8x8(const __m128i *const in, __m128i *const out) { // Unpack 8 bit elements. Goes from: // in[0]: 00 01 02 03 04 05 06 07 // in[1]: 10 11 12 13 14 15 16 17 // in[2]: 20 21 22 23 24 25 26 27 // in[3]: 30 31 32 33 34 35 36 37 // in[4]: 40 41 42 43 44 45 46 47 // in[5]: 50 51 52 53 54 55 56 57 // in[6]: 60 61 62 63 64 65 66 67 // in[7]: 70 71 72 73 74 75 76 77 // to: // a0: 00 10 01 11 02 12 03 13 04 14 05 15 06 16 07 17 // a1: 20 30 21 31 22 32 23 33 24 34 25 35 26 36 27 37 // a2: 40 50 41 51 42 52 43 53 44 54 45 55 46 56 47 57 // a3: 60 70 61 71 62 72 63 73 64 74 65 75 66 76 67 77 const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]); const __m128i a2 = _mm_unpacklo_epi8(in[4], in[5]); const __m128i a3 = _mm_unpacklo_epi8(in[6], in[7]); // Unpack 16 bit elements resulting in: // b0: 00 10 20 30 01 11 21 31 02 12 22 32 03 13 23 33 // b1: 40 50 60 70 41 51 61 71 42 52 62 72 43 53 63 73 // b2: 04 14 24 34 05 15 25 35 06 16 26 36 07 17 27 37 // b3: 44 54 64 74 45 55 65 75 46 56 66 76 47 57 67 77 const __m128i b0 = _mm_unpacklo_epi16(a0, a1); const __m128i b1 = _mm_unpackhi_epi16(a0, a1); const __m128i b2 = _mm_unpacklo_epi16(a2, a3); const __m128i b3 = _mm_unpackhi_epi16(a2, a3); // Unpack 32 bit elements resulting in: // c0: 00 10 20 30 40 50 60 70 01 11 21 31 41 51 61 71 // c1: 02 12 22 32 42 52 62 72 03 13 23 33 43 53 63 73 // c2: 04 14 24 34 44 54 64 74 05 15 25 35 45 55 65 75 // c3: 06 16 26 36 46 56 66 76 07 17 27 37 47 57 67 77 const __m128i c0 = _mm_unpacklo_epi32(b0, b2); const __m128i c1 = _mm_unpackhi_epi32(b0, b2); const __m128i c2 = _mm_unpacklo_epi32(b1, b3); const __m128i c3 = _mm_unpackhi_epi32(b1, b3); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 40 50 60 70 // out[1]: 01 11 21 31 41 51 61 71 // out[2]: 02 12 22 32 42 52 62 72 // out[3]: 03 13 23 33 43 53 63 73 // out[4]: 04 14 24 34 44 54 64 74 // out[5]: 05 15 25 35 45 55 65 75 // out[6]: 06 16 26 36 46 56 66 76 // out[7]: 07 17 27 37 47 57 67 77 out[0] = _mm_unpacklo_epi64(c0, c0); out[1] = _mm_unpackhi_epi64(c0, c0); out[2] = _mm_unpacklo_epi64(c1, c1); out[3] = _mm_unpackhi_epi64(c1, c1); out[4] = _mm_unpacklo_epi64(c2, c2); out[5] = _mm_unpackhi_epi64(c2, c2); out[6] = _mm_unpacklo_epi64(c3, c3); out[7] = _mm_unpackhi_epi64(c3, c3); } static INLINE void transpose_16bit_4x4(const __m128i *const in, __m128i *const out) { // Unpack 16 bit elements. Goes from: // in[0]: 00 01 02 03 XX XX XX XX // in[1]: 10 11 12 13 XX XX XX XX // in[2]: 20 21 22 23 XX XX XX XX // in[3]: 30 31 32 33 XX XX XX XX // to: // a0: 00 10 01 11 02 12 03 13 // a1: 20 30 21 31 22 32 23 33 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); // Unpack 32 bit elements resulting in: // out[0]: 00 10 20 30 01 11 21 31 // out[1]: 02 12 22 32 03 13 23 33 out[0] = _mm_unpacklo_epi32(a0, a1); out[1] = _mm_unpackhi_epi32(a0, a1); } static INLINE void transpose_16bit_4x8(const __m128i *const in, __m128i *const out) { // Unpack 16 bit elements. Goes from: // in[0]: 00 01 02 03 XX XX XX XX // in[1]: 10 11 12 13 XX XX XX XX // in[2]: 20 21 22 23 XX XX XX XX // in[3]: 30 31 32 33 XX XX XX XX // in[4]: 40 41 42 43 XX XX XX XX // in[5]: 50 51 52 53 XX XX XX XX // in[6]: 60 61 62 63 XX XX XX XX // in[7]: 70 71 72 73 XX XX XX XX // to: // a0: 00 10 01 11 02 12 03 13 // a1: 20 30 21 31 22 32 23 33 // a2: 40 50 41 51 42 52 43 53 // a3: 60 70 61 71 62 72 63 73 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); // Unpack 32 bit elements resulting in: // b0: 00 10 20 30 01 11 21 31 // b1: 40 50 60 70 41 51 61 71 // b2: 02 12 22 32 03 13 23 33 // b3: 42 52 62 72 43 53 63 73 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); const __m128i b1 = _mm_unpacklo_epi32(a2, a3); const __m128i b2 = _mm_unpackhi_epi32(a0, a1); const __m128i b3 = _mm_unpackhi_epi32(a2, a3); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 40 50 60 70 // out[1]: 01 11 21 31 41 51 61 71 // out[2]: 02 12 22 32 42 52 62 72 // out[3]: 03 13 23 33 43 53 63 73 out[0] = _mm_unpacklo_epi64(b0, b1); out[1] = _mm_unpackhi_epi64(b0, b1); out[2] = _mm_unpacklo_epi64(b2, b3); out[3] = _mm_unpackhi_epi64(b2, b3); } static INLINE void transpose_16bit_8x8(const __m128i *const in, __m128i *const out) { // Unpack 16 bit elements. Goes from: // in[0]: 00 01 02 03 04 05 06 07 // in[1]: 10 11 12 13 14 15 16 17 // in[2]: 20 21 22 23 24 25 26 27 // in[3]: 30 31 32 33 34 35 36 37 // in[4]: 40 41 42 43 44 45 46 47 // in[5]: 50 51 52 53 54 55 56 57 // in[6]: 60 61 62 63 64 65 66 67 // in[7]: 70 71 72 73 74 75 76 77 // to: // a0: 00 10 01 11 02 12 03 13 // a1: 20 30 21 31 22 32 23 33 // a2: 40 50 41 51 42 52 43 53 // a3: 60 70 61 71 62 72 63 73 // a4: 04 14 05 15 06 16 07 17 // a5: 24 34 25 35 26 36 27 37 // a6: 44 54 45 55 46 56 47 57 // a7: 64 74 65 75 66 76 67 77 const __m128i a0 = _mm_unpacklo_epi16(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi16(in[2], in[3]); const __m128i a2 = _mm_unpacklo_epi16(in[4], in[5]); const __m128i a3 = _mm_unpacklo_epi16(in[6], in[7]); const __m128i a4 = _mm_unpackhi_epi16(in[0], in[1]); const __m128i a5 = _mm_unpackhi_epi16(in[2], in[3]); const __m128i a6 = _mm_unpackhi_epi16(in[4], in[5]); const __m128i a7 = _mm_unpackhi_epi16(in[6], in[7]); // Unpack 32 bit elements resulting in: // b0: 00 10 20 30 01 11 21 31 // b1: 40 50 60 70 41 51 61 71 // b2: 04 14 24 34 05 15 25 35 // b3: 44 54 64 74 45 55 65 75 // b4: 02 12 22 32 03 13 23 33 // b5: 42 52 62 72 43 53 63 73 // b6: 06 16 26 36 07 17 27 37 // b7: 46 56 66 76 47 57 67 77 const __m128i b0 = _mm_unpacklo_epi32(a0, a1); const __m128i b1 = _mm_unpacklo_epi32(a2, a3); const __m128i b2 = _mm_unpacklo_epi32(a4, a5); const __m128i b3 = _mm_unpacklo_epi32(a6, a7); const __m128i b4 = _mm_unpackhi_epi32(a0, a1); const __m128i b5 = _mm_unpackhi_epi32(a2, a3); const __m128i b6 = _mm_unpackhi_epi32(a4, a5); const __m128i b7 = _mm_unpackhi_epi32(a6, a7); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 40 50 60 70 // out[1]: 01 11 21 31 41 51 61 71 // out[2]: 02 12 22 32 42 52 62 72 // out[3]: 03 13 23 33 43 53 63 73 // out[4]: 04 14 24 34 44 54 64 74 // out[5]: 05 15 25 35 45 55 65 75 // out[6]: 06 16 26 36 46 56 66 76 // out[7]: 07 17 27 37 47 57 67 77 out[0] = _mm_unpacklo_epi64(b0, b1); out[1] = _mm_unpackhi_epi64(b0, b1); out[2] = _mm_unpacklo_epi64(b4, b5); out[3] = _mm_unpackhi_epi64(b4, b5); out[4] = _mm_unpacklo_epi64(b2, b3); out[5] = _mm_unpackhi_epi64(b2, b3); out[6] = _mm_unpacklo_epi64(b6, b7); out[7] = _mm_unpackhi_epi64(b6, b7); } // Transpose in-place static INLINE void transpose_16bit_16x16(__m128i *const left, __m128i *const right) { __m128i tbuf[8]; transpose_16bit_8x8(left, left); transpose_16bit_8x8(right, tbuf); transpose_16bit_8x8(left + 8, right); transpose_16bit_8x8(right + 8, right + 8); left[8] = tbuf[0]; left[9] = tbuf[1]; left[10] = tbuf[2]; left[11] = tbuf[3]; left[12] = tbuf[4]; left[13] = tbuf[5]; left[14] = tbuf[6]; left[15] = tbuf[7]; } static INLINE void transpose_32bit_4x4(const __m128i *const in, __m128i *const out) { // Unpack 32 bit elements. Goes from: // in[0]: 00 01 02 03 // in[1]: 10 11 12 13 // in[2]: 20 21 22 23 // in[3]: 30 31 32 33 // to: // a0: 00 10 01 11 // a1: 20 30 21 31 // a2: 02 12 03 13 // a3: 22 32 23 33 const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi32(in[2], in[3]); const __m128i a2 = _mm_unpackhi_epi32(in[0], in[1]); const __m128i a3 = _mm_unpackhi_epi32(in[2], in[3]); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 // out[1]: 01 11 21 31 // out[2]: 02 12 22 32 // out[3]: 03 13 23 33 out[0] = _mm_unpacklo_epi64(a0, a1); out[1] = _mm_unpackhi_epi64(a0, a1); out[2] = _mm_unpacklo_epi64(a2, a3); out[3] = _mm_unpackhi_epi64(a2, a3); } static INLINE void transpose_32bit_4x4x2(const __m128i *const in, __m128i *const out) { // Unpack 32 bit elements. Goes from: // in[0]: 00 01 02 03 // in[1]: 10 11 12 13 // in[2]: 20 21 22 23 // in[3]: 30 31 32 33 // in[4]: 04 05 06 07 // in[5]: 14 15 16 17 // in[6]: 24 25 26 27 // in[7]: 34 35 36 37 // to: // a0: 00 10 01 11 // a1: 20 30 21 31 // a2: 02 12 03 13 // a3: 22 32 23 33 // a4: 04 14 05 15 // a5: 24 34 25 35 // a6: 06 16 07 17 // a7: 26 36 27 37 const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]); const __m128i a1 = _mm_unpacklo_epi32(in[2], in[3]); const __m128i a2 = _mm_unpackhi_epi32(in[0], in[1]); const __m128i a3 = _mm_unpackhi_epi32(in[2], in[3]); const __m128i a4 = _mm_unpacklo_epi32(in[4], in[5]); const __m128i a5 = _mm_unpacklo_epi32(in[6], in[7]); const __m128i a6 = _mm_unpackhi_epi32(in[4], in[5]); const __m128i a7 = _mm_unpackhi_epi32(in[6], in[7]); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 // out[1]: 01 11 21 31 // out[2]: 02 12 22 32 // out[3]: 03 13 23 33 // out[4]: 04 14 24 34 // out[5]: 05 15 25 35 // out[6]: 06 16 26 36 // out[7]: 07 17 27 37 out[0] = _mm_unpacklo_epi64(a0, a1); out[1] = _mm_unpackhi_epi64(a0, a1); out[2] = _mm_unpacklo_epi64(a2, a3); out[3] = _mm_unpackhi_epi64(a2, a3); out[4] = _mm_unpacklo_epi64(a4, a5); out[5] = _mm_unpackhi_epi64(a4, a5); out[6] = _mm_unpacklo_epi64(a6, a7); out[7] = _mm_unpackhi_epi64(a6, a7); } static INLINE void transpose_32bit_8x4(const __m128i *const in, __m128i *const out) { // Unpack 32 bit elements. Goes from: // in[0]: 00 01 02 03 // in[1]: 04 05 06 07 // in[2]: 10 11 12 13 // in[3]: 14 15 16 17 // in[4]: 20 21 22 23 // in[5]: 24 25 26 27 // in[6]: 30 31 32 33 // in[7]: 34 35 36 37 // to: // a0: 00 10 01 11 // a1: 20 30 21 31 // a2: 02 12 03 13 // a3: 22 32 23 33 // a4: 04 14 05 15 // a5: 24 34 25 35 // a6: 06 16 07 17 // a7: 26 36 27 37 const __m128i a0 = _mm_unpacklo_epi32(in[0], in[2]); const __m128i a1 = _mm_unpacklo_epi32(in[4], in[6]); const __m128i a2 = _mm_unpackhi_epi32(in[0], in[2]); const __m128i a3 = _mm_unpackhi_epi32(in[4], in[6]); const __m128i a4 = _mm_unpacklo_epi32(in[1], in[3]); const __m128i a5 = _mm_unpacklo_epi32(in[5], in[7]); const __m128i a6 = _mm_unpackhi_epi32(in[1], in[3]); const __m128i a7 = _mm_unpackhi_epi32(in[5], in[7]); // Unpack 64 bit elements resulting in: // out[0]: 00 10 20 30 // out[1]: 01 11 21 31 // out[2]: 02 12 22 32 // out[3]: 03 13 23 33 // out[4]: 04 14 24 34 // out[5]: 05 15 25 35 // out[6]: 06 16 26 36 // out[7]: 07 17 27 37 out[0] = _mm_unpacklo_epi64(a0, a1); out[1] = _mm_unpackhi_epi64(a0, a1); out[2] = _mm_unpacklo_epi64(a2, a3); out[3] = _mm_unpackhi_epi64(a2, a3); out[4] = _mm_unpacklo_epi64(a4, a5); out[5] = _mm_unpackhi_epi64(a4, a5); out[6] = _mm_unpacklo_epi64(a6, a7); out[7] = _mm_unpackhi_epi64(a6, a7); } #endif // VPX_VPX_DSP_X86_TRANSPOSE_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/txfm_common_sse2.h000066400000000000000000000025131357355204000205760ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ #define VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ #include #include "vpx/vpx_integer.h" #define pair_set_epi16(a, b) \ _mm_set_epi16((int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a), \ (int16_t)(b), (int16_t)(a), (int16_t)(b), (int16_t)(a)) #define pair_set_epi32(a, b) \ _mm_set_epi32((int)(b), (int)(a), (int)(b), (int)(a)) #define dual_set_epi16(a, b) \ _mm_set_epi16((int16_t)(b), (int16_t)(b), (int16_t)(b), (int16_t)(b), \ (int16_t)(a), (int16_t)(a), (int16_t)(a), (int16_t)(a)) #define octa_set_epi16(a, b, c, d, e, f, g, h) \ _mm_setr_epi16((int16_t)(a), (int16_t)(b), (int16_t)(c), (int16_t)(d), \ (int16_t)(e), (int16_t)(f), (int16_t)(g), (int16_t)(h)) #endif // VPX_VPX_DSP_X86_TXFM_COMMON_SSE2_H_ libvpx-1.8.2/vpx_dsp/x86/variance_avx2.c000066400000000000000000001067211357355204000200450ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // AVX2 #include "./vpx_dsp_rtcd.h" /* clang-format off */ DECLARE_ALIGNED(32, static const uint8_t, bilinear_filters_avx2[512]) = { 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 16, 0, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 6, 10, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 4, 12, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, 2, 14, }; DECLARE_ALIGNED(32, static const int8_t, adjacent_sub_avx2[32]) = { 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1 }; /* clang-format on */ static INLINE void variance_kernel_avx2(const __m256i src, const __m256i ref, __m256i *const sse, __m256i *const sum) { const __m256i adj_sub = _mm256_load_si256((__m256i const *)adjacent_sub_avx2); // unpack into pairs of source and reference values const __m256i src_ref0 = _mm256_unpacklo_epi8(src, ref); const __m256i src_ref1 = _mm256_unpackhi_epi8(src, ref); // subtract adjacent elements using src*1 + ref*-1 const __m256i diff0 = _mm256_maddubs_epi16(src_ref0, adj_sub); const __m256i diff1 = _mm256_maddubs_epi16(src_ref1, adj_sub); const __m256i madd0 = _mm256_madd_epi16(diff0, diff0); const __m256i madd1 = _mm256_madd_epi16(diff1, diff1); // add to the running totals *sum = _mm256_add_epi16(*sum, _mm256_add_epi16(diff0, diff1)); *sse = _mm256_add_epi32(*sse, _mm256_add_epi32(madd0, madd1)); } static INLINE void variance_final_from_32bit_sum_avx2(__m256i vsse, __m128i vsum, unsigned int *const sse, int *const sum) { // extract the low lane and add it to the high lane const __m128i sse_reg_128 = _mm_add_epi32(_mm256_castsi256_si128(vsse), _mm256_extractf128_si256(vsse, 1)); // unpack sse and sum registers and add const __m128i sse_sum_lo = _mm_unpacklo_epi32(sse_reg_128, vsum); const __m128i sse_sum_hi = _mm_unpackhi_epi32(sse_reg_128, vsum); const __m128i sse_sum = _mm_add_epi32(sse_sum_lo, sse_sum_hi); // perform the final summation and extract the results const __m128i res = _mm_add_epi32(sse_sum, _mm_srli_si128(sse_sum, 8)); *((int *)sse) = _mm_cvtsi128_si32(res); *((int *)sum) = _mm_extract_epi32(res, 1); } static INLINE void variance_final_from_16bit_sum_avx2(__m256i vsse, __m256i vsum, unsigned int *const sse, int *const sum) { // extract the low lane and add it to the high lane const __m128i sum_reg_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), _mm256_extractf128_si256(vsum, 1)); const __m128i sum_reg_64 = _mm_add_epi16(sum_reg_128, _mm_srli_si128(sum_reg_128, 8)); const __m128i sum_int32 = _mm_cvtepi16_epi32(sum_reg_64); variance_final_from_32bit_sum_avx2(vsse, sum_int32, sse, sum); } static INLINE __m256i sum_to_32bit_avx2(const __m256i sum) { const __m256i sum_lo = _mm256_cvtepi16_epi32(_mm256_castsi256_si128(sum)); const __m256i sum_hi = _mm256_cvtepi16_epi32(_mm256_extractf128_si256(sum, 1)); return _mm256_add_epi32(sum_lo, sum_hi); } static INLINE void variance16_kernel_avx2( const uint8_t *const src, const int src_stride, const uint8_t *const ref, const int ref_stride, __m256i *const sse, __m256i *const sum) { const __m128i s0 = _mm_loadu_si128((__m128i const *)(src + 0 * src_stride)); const __m128i s1 = _mm_loadu_si128((__m128i const *)(src + 1 * src_stride)); const __m128i r0 = _mm_loadu_si128((__m128i const *)(ref + 0 * ref_stride)); const __m128i r1 = _mm_loadu_si128((__m128i const *)(ref + 1 * ref_stride)); const __m256i s = _mm256_inserti128_si256(_mm256_castsi128_si256(s0), s1, 1); const __m256i r = _mm256_inserti128_si256(_mm256_castsi128_si256(r0), r1, 1); variance_kernel_avx2(s, r, sse, sum); } static INLINE void variance32_kernel_avx2(const uint8_t *const src, const uint8_t *const ref, __m256i *const sse, __m256i *const sum) { const __m256i s = _mm256_loadu_si256((__m256i const *)(src)); const __m256i r = _mm256_loadu_si256((__m256i const *)(ref)); variance_kernel_avx2(s, r, sse, sum); } static INLINE void variance16_avx2(const uint8_t *src, const int src_stride, const uint8_t *ref, const int ref_stride, const int h, __m256i *const vsse, __m256i *const vsum) { int i; *vsum = _mm256_setzero_si256(); *vsse = _mm256_setzero_si256(); for (i = 0; i < h; i += 2) { variance16_kernel_avx2(src, src_stride, ref, ref_stride, vsse, vsum); src += 2 * src_stride; ref += 2 * ref_stride; } } static INLINE void variance32_avx2(const uint8_t *src, const int src_stride, const uint8_t *ref, const int ref_stride, const int h, __m256i *const vsse, __m256i *const vsum) { int i; *vsum = _mm256_setzero_si256(); *vsse = _mm256_setzero_si256(); for (i = 0; i < h; i++) { variance32_kernel_avx2(src, ref, vsse, vsum); src += src_stride; ref += ref_stride; } } static INLINE void variance64_avx2(const uint8_t *src, const int src_stride, const uint8_t *ref, const int ref_stride, const int h, __m256i *const vsse, __m256i *const vsum) { int i; *vsum = _mm256_setzero_si256(); for (i = 0; i < h; i++) { variance32_kernel_avx2(src + 0, ref + 0, vsse, vsum); variance32_kernel_avx2(src + 32, ref + 32, vsse, vsum); src += src_stride; ref += ref_stride; } } void vpx_get16x16var_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, sum); } #define FILTER_SRC(filter) \ /* filter the source */ \ exp_src_lo = _mm256_maddubs_epi16(exp_src_lo, filter); \ exp_src_hi = _mm256_maddubs_epi16(exp_src_hi, filter); \ \ /* add 8 to source */ \ exp_src_lo = _mm256_add_epi16(exp_src_lo, pw8); \ exp_src_hi = _mm256_add_epi16(exp_src_hi, pw8); \ \ /* divide source by 16 */ \ exp_src_lo = _mm256_srai_epi16(exp_src_lo, 4); \ exp_src_hi = _mm256_srai_epi16(exp_src_hi, 4); #define CALC_SUM_SSE_INSIDE_LOOP \ /* expand each byte to 2 bytes */ \ exp_dst_lo = _mm256_unpacklo_epi8(dst_reg, zero_reg); \ exp_dst_hi = _mm256_unpackhi_epi8(dst_reg, zero_reg); \ /* source - dest */ \ exp_src_lo = _mm256_sub_epi16(exp_src_lo, exp_dst_lo); \ exp_src_hi = _mm256_sub_epi16(exp_src_hi, exp_dst_hi); \ /* caculate sum */ \ *sum_reg = _mm256_add_epi16(*sum_reg, exp_src_lo); \ exp_src_lo = _mm256_madd_epi16(exp_src_lo, exp_src_lo); \ *sum_reg = _mm256_add_epi16(*sum_reg, exp_src_hi); \ exp_src_hi = _mm256_madd_epi16(exp_src_hi, exp_src_hi); \ /* calculate sse */ \ *sse_reg = _mm256_add_epi32(*sse_reg, exp_src_lo); \ *sse_reg = _mm256_add_epi32(*sse_reg, exp_src_hi); // final calculation to sum and sse #define CALC_SUM_AND_SSE \ res_cmp = _mm256_cmpgt_epi16(zero_reg, sum_reg); \ sse_reg_hi = _mm256_srli_si256(sse_reg, 8); \ sum_reg_lo = _mm256_unpacklo_epi16(sum_reg, res_cmp); \ sum_reg_hi = _mm256_unpackhi_epi16(sum_reg, res_cmp); \ sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \ sum_reg = _mm256_add_epi32(sum_reg_lo, sum_reg_hi); \ \ sse_reg_hi = _mm256_srli_si256(sse_reg, 4); \ sum_reg_hi = _mm256_srli_si256(sum_reg, 8); \ \ sse_reg = _mm256_add_epi32(sse_reg, sse_reg_hi); \ sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \ *((int *)sse) = _mm_cvtsi128_si32(_mm256_castsi256_si128(sse_reg)) + \ _mm_cvtsi128_si32(_mm256_extractf128_si256(sse_reg, 1)); \ sum_reg_hi = _mm256_srli_si256(sum_reg, 4); \ sum_reg = _mm256_add_epi32(sum_reg, sum_reg_hi); \ sum = _mm_cvtsi128_si32(_mm256_castsi256_si128(sum_reg)) + \ _mm_cvtsi128_si32(_mm256_extractf128_si256(sum_reg, 1)); static INLINE void spv32_x0_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_reg = _mm256_loadu_si256((__m256i const *)src); if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(src_reg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_reg, zero_reg); } CALC_SUM_SSE_INSIDE_LOOP src += src_stride; dst += dst_stride; } } // (x == 0, y == 4) or (x == 4, y == 0). sstep determines the direction. static INLINE void spv32_half_zero(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int sstep) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)src); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep)); const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(src_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_avg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_avg, zero_reg); } CALC_SUM_SSE_INSIDE_LOOP src += src_stride; dst += dst_stride; } } static INLINE void spv32_x0_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg) { spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, sum_reg, sse_reg, src_stride); } static INLINE void spv32_x4_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg) { spv32_half_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, sum_reg, sse_reg, 1); } static INLINE void spv32_x4_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i src_a = _mm256_loadu_si256((__m256i const *)src); const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1)); __m256i prev_src_avg = _mm256_avg_epu8(src_a, src_b); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; src += src_stride; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)(src)); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); const __m256i current_avg = _mm256_avg_epu8(prev_src_avg, src_avg); prev_src_avg = src_avg; if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_reg = _mm256_avg_epu8(current_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(current_avg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(current_avg, zero_reg); } // save current source average CALC_SUM_SSE_INSIDE_LOOP dst += dst_stride; src += src_stride; } } // (x == 0, y == bil) or (x == 4, y == bil). sstep determines the direction. static INLINE void spv32_bilin_zero(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int offset, int sstep) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( (__m256i const *)(bilinear_filters_avx2 + (offset << 5))); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)src); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + sstep)); exp_src_lo = _mm256_unpacklo_epi8(src_0, src_1); exp_src_hi = _mm256_unpackhi_epi8(src_0, src_1); FILTER_SRC(filter) if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg); second_pred += second_stride; exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); } CALC_SUM_SSE_INSIDE_LOOP src += src_stride; dst += dst_stride; } } static INLINE void spv32_x0_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int y_offset) { spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, sum_reg, sse_reg, y_offset, src_stride); } static INLINE void spv32_xb_y0(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int x_offset) { spv32_bilin_zero(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, sum_reg, sse_reg, x_offset, 1); } static INLINE void spv32_x4_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int y_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( (__m256i const *)(bilinear_filters_avx2 + (y_offset << 5))); const __m256i src_a = _mm256_loadu_si256((__m256i const *)src); const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1)); __m256i prev_src_avg = _mm256_avg_epu8(src_a, src_b); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; int i; src += src_stride; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)src); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); const __m256i src_avg = _mm256_avg_epu8(src_0, src_1); exp_src_lo = _mm256_unpacklo_epi8(prev_src_avg, src_avg); exp_src_hi = _mm256_unpackhi_epi8(prev_src_avg, src_avg); prev_src_avg = src_avg; FILTER_SRC(filter) if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src_avg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src_avg, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); second_pred += second_stride; } CALC_SUM_SSE_INSIDE_LOOP dst += dst_stride; src += src_stride; } } static INLINE void spv32_xb_y4(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int x_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i filter = _mm256_load_si256( (__m256i const *)(bilinear_filters_avx2 + (x_offset << 5))); const __m256i src_a = _mm256_loadu_si256((__m256i const *)src); const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1)); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; __m256i src_reg, src_pack; int i; exp_src_lo = _mm256_unpacklo_epi8(src_a, src_b); exp_src_hi = _mm256_unpackhi_epi8(src_a, src_b); FILTER_SRC(filter) // convert each 16 bit to 8 bit to each low and high lane source src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); src += src_stride; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)src); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); exp_src_lo = _mm256_unpacklo_epi8(src_0, src_1); exp_src_hi = _mm256_unpackhi_epi8(src_0, src_1); FILTER_SRC(filter) src_reg = _mm256_packus_epi16(exp_src_lo, exp_src_hi); // average between previous pack to the current src_pack = _mm256_avg_epu8(src_pack, src_reg); if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i avg_pack = _mm256_avg_epu8(src_pack, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_pack, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_pack, zero_reg); second_pred += second_stride; } else { exp_src_lo = _mm256_unpacklo_epi8(src_pack, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(src_pack, zero_reg); } CALC_SUM_SSE_INSIDE_LOOP src_pack = src_reg; dst += dst_stride; src += src_stride; } } static INLINE void spv32_xb_yb(const uint8_t *src, int src_stride, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, __m256i *sum_reg, __m256i *sse_reg, int x_offset, int y_offset) { const __m256i zero_reg = _mm256_setzero_si256(); const __m256i pw8 = _mm256_set1_epi16(8); const __m256i xfilter = _mm256_load_si256( (__m256i const *)(bilinear_filters_avx2 + (x_offset << 5))); const __m256i yfilter = _mm256_load_si256( (__m256i const *)(bilinear_filters_avx2 + (y_offset << 5))); const __m256i src_a = _mm256_loadu_si256((__m256i const *)src); const __m256i src_b = _mm256_loadu_si256((__m256i const *)(src + 1)); __m256i exp_src_lo, exp_src_hi, exp_dst_lo, exp_dst_hi; __m256i prev_src_pack, src_pack; int i; exp_src_lo = _mm256_unpacklo_epi8(src_a, src_b); exp_src_hi = _mm256_unpackhi_epi8(src_a, src_b); FILTER_SRC(xfilter) // convert each 16 bit to 8 bit to each low and high lane source prev_src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); src += src_stride; for (i = 0; i < height; i++) { const __m256i dst_reg = _mm256_loadu_si256((__m256i const *)dst); const __m256i src_0 = _mm256_loadu_si256((__m256i const *)src); const __m256i src_1 = _mm256_loadu_si256((__m256i const *)(src + 1)); exp_src_lo = _mm256_unpacklo_epi8(src_0, src_1); exp_src_hi = _mm256_unpackhi_epi8(src_0, src_1); FILTER_SRC(xfilter) src_pack = _mm256_packus_epi16(exp_src_lo, exp_src_hi); // merge previous pack to current pack source exp_src_lo = _mm256_unpacklo_epi8(prev_src_pack, src_pack); exp_src_hi = _mm256_unpackhi_epi8(prev_src_pack, src_pack); FILTER_SRC(yfilter) if (do_sec) { const __m256i sec_reg = _mm256_loadu_si256((__m256i const *)second_pred); const __m256i exp_src = _mm256_packus_epi16(exp_src_lo, exp_src_hi); const __m256i avg_reg = _mm256_avg_epu8(exp_src, sec_reg); exp_src_lo = _mm256_unpacklo_epi8(avg_reg, zero_reg); exp_src_hi = _mm256_unpackhi_epi8(avg_reg, zero_reg); second_pred += second_stride; } prev_src_pack = src_pack; CALC_SUM_SSE_INSIDE_LOOP dst += dst_stride; src += src_stride; } } static INLINE int sub_pix_var32xh(const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int do_sec, int height, unsigned int *sse) { const __m256i zero_reg = _mm256_setzero_si256(); __m256i sum_reg = _mm256_setzero_si256(); __m256i sse_reg = _mm256_setzero_si256(); __m256i sse_reg_hi, res_cmp, sum_reg_lo, sum_reg_hi; int sum; // x_offset = 0 and y_offset = 0 if (x_offset == 0) { if (y_offset == 0) { spv32_x0_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg); // x_offset = 0 and y_offset = 4 } else if (y_offset == 4) { spv32_x0_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg); // x_offset = 0 and y_offset = bilin interpolation } else { spv32_x0_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg, y_offset); } // x_offset = 4 and y_offset = 0 } else if (x_offset == 4) { if (y_offset == 0) { spv32_x4_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg); // x_offset = 4 and y_offset = 4 } else if (y_offset == 4) { spv32_x4_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg); // x_offset = 4 and y_offset = bilin interpolation } else { spv32_x4_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg, y_offset); } // x_offset = bilin interpolation and y_offset = 0 } else { if (y_offset == 0) { spv32_xb_y0(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg, x_offset); // x_offset = bilin interpolation and y_offset = 4 } else if (y_offset == 4) { spv32_xb_y4(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg, x_offset); // x_offset = bilin interpolation and y_offset = bilin interpolation } else { spv32_xb_yb(src, src_stride, dst, dst_stride, second_pred, second_stride, do_sec, height, &sum_reg, &sse_reg, x_offset, y_offset); } } CALC_SUM_AND_SSE return sum; } static unsigned int sub_pixel_variance32xh_avx2( const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, int dst_stride, int height, unsigned int *sse) { return sub_pix_var32xh(src, src_stride, x_offset, y_offset, dst, dst_stride, NULL, 0, 0, height, sse); } static unsigned int sub_pixel_avg_variance32xh_avx2( const uint8_t *src, int src_stride, int x_offset, int y_offset, const uint8_t *dst, int dst_stride, const uint8_t *second_pred, int second_stride, int height, unsigned int *sse) { return sub_pix_var32xh(src, src_stride, x_offset, y_offset, dst, dst_stride, second_pred, second_stride, 1, height, sse); } typedef void (*get_var_avx2)(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum); unsigned int vpx_variance16x8_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 7); } unsigned int vpx_variance16x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } unsigned int vpx_variance16x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; __m128i vsum_128; variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); vsum_128 = _mm_add_epi16(_mm256_castsi256_si128(vsum), _mm256_extractf128_si256(vsum, 1)); vsum_128 = _mm_add_epi32(_mm_cvtepi16_epi32(vsum_128), _mm_cvtepi16_epi32(_mm_srli_si128(vsum_128, 8))); variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 10); } unsigned int vpx_variance32x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; __m128i vsum_128; variance32_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 64, &vsse, &vsum); vsum = sum_to_32bit_avx2(vsum); vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), _mm256_extractf128_si256(vsum, 1)); variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x32_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m256i vsse = _mm256_setzero_si256(); __m256i vsum = _mm256_setzero_si256(); __m128i vsum_128; int sum; variance64_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); vsum = sum_to_32bit_avx2(vsum); vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), _mm256_extractf128_si256(vsum, 1)); variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x64_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m256i vsse = _mm256_setzero_si256(); __m256i vsum = _mm256_setzero_si256(); __m128i vsum_128; int sum; int i = 0; for (i = 0; i < 2; i++) { __m256i vsum16; variance64_avx2(src_ptr + 32 * i * src_stride, src_stride, ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16); vsum = _mm256_add_epi32(vsum, sum_to_32bit_avx2(vsum16)); } vsum_128 = _mm_add_epi32(_mm256_castsi256_si128(vsum), _mm256_extractf128_si256(vsum, 1)); variance_final_from_32bit_sum_avx2(vsse, vsum_128, sse, &sum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); } unsigned int vpx_mse16x8_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse; } unsigned int vpx_mse16x16_avx2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { int sum; __m256i vsse, vsum; variance16_avx2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_from_16bit_sum_avx2(vsse, vsum, sse, &sum); return *sse; } unsigned int vpx_sub_pixel_variance64x64_avx2( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { unsigned int sse1; const int se1 = sub_pixel_variance32xh_avx2( src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 64, &sse1); unsigned int sse2; const int se2 = sub_pixel_variance32xh_avx2(src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, ref_stride, 64, &sse2); const int se = se1 + se2; *sse = sse1 + sse2; return *sse - (uint32_t)(((int64_t)se * se) >> 12); } unsigned int vpx_sub_pixel_variance32x32_avx2( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { const int se = sub_pixel_variance32xh_avx2( src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, 32, sse); return *sse - (uint32_t)(((int64_t)se * se) >> 10); } unsigned int vpx_sub_pixel_avg_variance64x64_avx2( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred) { unsigned int sse1; const int se1 = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, second_pred, 64, 64, &sse1); unsigned int sse2; const int se2 = sub_pixel_avg_variance32xh_avx2( src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, ref_stride, second_pred + 32, 64, 64, &sse2); const int se = se1 + se2; *sse = sse1 + sse2; return *sse - (uint32_t)(((int64_t)se * se) >> 12); } unsigned int vpx_sub_pixel_avg_variance32x32_avx2( const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, const uint8_t *second_pred) { // Process 32 elements in parallel. const int se = sub_pixel_avg_variance32xh_avx2(src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, second_pred, 32, 32, sse); return *sse - (uint32_t)(((int64_t)se * se) >> 10); } libvpx-1.8.2/vpx_dsp/x86/variance_sse2.c000066400000000000000000000566121357355204000200440ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include // SSE2 #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_ports/mem.h" #include "vpx_dsp/x86/mem_sse2.h" static INLINE unsigned int add32x4_sse2(__m128i val) { val = _mm_add_epi32(val, _mm_srli_si128(val, 8)); val = _mm_add_epi32(val, _mm_srli_si128(val, 4)); return _mm_cvtsi128_si32(val); } unsigned int vpx_get_mb_ss_sse2(const int16_t *src_ptr) { __m128i vsum = _mm_setzero_si128(); int i; for (i = 0; i < 32; ++i) { const __m128i v = _mm_loadu_si128((const __m128i *)src_ptr); vsum = _mm_add_epi32(vsum, _mm_madd_epi16(v, v)); src_ptr += 8; } return add32x4_sse2(vsum); } static INLINE __m128i load4x2_sse2(const uint8_t *const p, const int stride) { const __m128i p0 = _mm_cvtsi32_si128(loadu_uint32(p + 0 * stride)); const __m128i p1 = _mm_cvtsi32_si128(loadu_uint32(p + 1 * stride)); const __m128i p01 = _mm_unpacklo_epi32(p0, p1); return _mm_unpacklo_epi8(p01, _mm_setzero_si128()); } static INLINE void variance_kernel_sse2(const __m128i src_ptr, const __m128i ref_ptr, __m128i *const sse, __m128i *const sum) { const __m128i diff = _mm_sub_epi16(src_ptr, ref_ptr); *sse = _mm_add_epi32(*sse, _mm_madd_epi16(diff, diff)); *sum = _mm_add_epi16(*sum, diff); } // Can handle 128 pixels' diff sum (such as 8x16 or 16x8) // Slightly faster than variance_final_256_pel_sse2() static INLINE void variance_final_128_pel_sse2(__m128i vsse, __m128i vsum, unsigned int *const sse, int *const sum) { *sse = add32x4_sse2(vsse); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 2)); *sum = (int16_t)_mm_extract_epi16(vsum, 0); } // Can handle 256 pixels' diff sum (such as 16x16) static INLINE void variance_final_256_pel_sse2(__m128i vsse, __m128i vsum, unsigned int *const sse, int *const sum) { *sse = add32x4_sse2(vsse); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 4)); *sum = (int16_t)_mm_extract_epi16(vsum, 0); *sum += (int16_t)_mm_extract_epi16(vsum, 1); } // Can handle 512 pixels' diff sum (such as 16x32 or 32x16) static INLINE void variance_final_512_pel_sse2(__m128i vsse, __m128i vsum, unsigned int *const sse, int *const sum) { *sse = add32x4_sse2(vsse); vsum = _mm_add_epi16(vsum, _mm_srli_si128(vsum, 8)); vsum = _mm_unpacklo_epi16(vsum, vsum); vsum = _mm_srai_epi32(vsum, 16); *sum = add32x4_sse2(vsum); } static INLINE __m128i sum_to_32bit_sse2(const __m128i sum) { const __m128i sum_lo = _mm_srai_epi32(_mm_unpacklo_epi16(sum, sum), 16); const __m128i sum_hi = _mm_srai_epi32(_mm_unpackhi_epi16(sum, sum), 16); return _mm_add_epi32(sum_lo, sum_hi); } // Can handle 1024 pixels' diff sum (such as 32x32) static INLINE int sum_final_sse2(const __m128i sum) { const __m128i t = sum_to_32bit_sse2(sum); return add32x4_sse2(t); } static INLINE void variance4_sse2(const uint8_t *src_ptr, const int src_stride, const uint8_t *ref_ptr, const int ref_stride, const int h, __m128i *const sse, __m128i *const sum) { int i; assert(h <= 256); // May overflow for larger height. *sse = _mm_setzero_si128(); *sum = _mm_setzero_si128(); for (i = 0; i < h; i += 2) { const __m128i s = load4x2_sse2(src_ptr, src_stride); const __m128i r = load4x2_sse2(ref_ptr, ref_stride); variance_kernel_sse2(s, r, sse, sum); src_ptr += 2 * src_stride; ref_ptr += 2 * ref_stride; } } static INLINE void variance8_sse2(const uint8_t *src_ptr, const int src_stride, const uint8_t *ref_ptr, const int ref_stride, const int h, __m128i *const sse, __m128i *const sum) { const __m128i zero = _mm_setzero_si128(); int i; assert(h <= 128); // May overflow for larger height. *sse = _mm_setzero_si128(); *sum = _mm_setzero_si128(); for (i = 0; i < h; i++) { const __m128i s = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)src_ptr), zero); const __m128i r = _mm_unpacklo_epi8(_mm_loadl_epi64((const __m128i *)ref_ptr), zero); variance_kernel_sse2(s, r, sse, sum); src_ptr += src_stride; ref_ptr += ref_stride; } } static INLINE void variance16_kernel_sse2(const uint8_t *const src_ptr, const uint8_t *const ref_ptr, __m128i *const sse, __m128i *const sum) { const __m128i zero = _mm_setzero_si128(); const __m128i s = _mm_loadu_si128((const __m128i *)src_ptr); const __m128i r = _mm_loadu_si128((const __m128i *)ref_ptr); const __m128i src0 = _mm_unpacklo_epi8(s, zero); const __m128i ref0 = _mm_unpacklo_epi8(r, zero); const __m128i src1 = _mm_unpackhi_epi8(s, zero); const __m128i ref1 = _mm_unpackhi_epi8(r, zero); variance_kernel_sse2(src0, ref0, sse, sum); variance_kernel_sse2(src1, ref1, sse, sum); } static INLINE void variance16_sse2(const uint8_t *src_ptr, const int src_stride, const uint8_t *ref_ptr, const int ref_stride, const int h, __m128i *const sse, __m128i *const sum) { int i; assert(h <= 64); // May overflow for larger height. *sse = _mm_setzero_si128(); *sum = _mm_setzero_si128(); for (i = 0; i < h; ++i) { variance16_kernel_sse2(src_ptr, ref_ptr, sse, sum); src_ptr += src_stride; ref_ptr += ref_stride; } } static INLINE void variance32_sse2(const uint8_t *src_ptr, const int src_stride, const uint8_t *ref_ptr, const int ref_stride, const int h, __m128i *const sse, __m128i *const sum) { int i; assert(h <= 32); // May overflow for larger height. // Don't initialize sse here since it's an accumulation. *sum = _mm_setzero_si128(); for (i = 0; i < h; ++i) { variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum); variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum); src_ptr += src_stride; ref_ptr += ref_stride; } } static INLINE void variance64_sse2(const uint8_t *src_ptr, const int src_stride, const uint8_t *ref_ptr, const int ref_stride, const int h, __m128i *const sse, __m128i *const sum) { int i; assert(h <= 16); // May overflow for larger height. // Don't initialize sse here since it's an accumulation. *sum = _mm_setzero_si128(); for (i = 0; i < h; ++i) { variance16_kernel_sse2(src_ptr + 0, ref_ptr + 0, sse, sum); variance16_kernel_sse2(src_ptr + 16, ref_ptr + 16, sse, sum); variance16_kernel_sse2(src_ptr + 32, ref_ptr + 32, sse, sum); variance16_kernel_sse2(src_ptr + 48, ref_ptr + 48, sse, sum); src_ptr += src_stride; ref_ptr += ref_stride; } } void vpx_get8x8var_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { __m128i vsse, vsum; variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, sum); } void vpx_get16x16var_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, int *sum) { __m128i vsse, vsum; variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_256_pel_sse2(vsse, vsum, sse, sum); } unsigned int vpx_variance4x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 4); } unsigned int vpx_variance4x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance4_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } unsigned int vpx_variance8x4_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 4, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 5); } unsigned int vpx_variance8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 6); } unsigned int vpx_variance8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } unsigned int vpx_variance16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 8, &vsse, &vsum); variance_final_128_pel_sse2(vsse, vsum, sse, &sum); return *sse - ((sum * sum) >> 7); } unsigned int vpx_variance16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_256_pel_sse2(vsse, vsum, sse, &sum); return *sse - (uint32_t)(((int64_t)sum * sum) >> 8); } unsigned int vpx_variance16x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse, vsum; int sum; variance16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); variance_final_512_pel_sse2(vsse, vsum, sse, &sum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse = _mm_setzero_si128(); __m128i vsum; int sum; variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 16, &vsse, &vsum); variance_final_512_pel_sse2(vsse, vsum, sse, &sum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 9); } unsigned int vpx_variance32x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse = _mm_setzero_si128(); __m128i vsum; int sum; variance32_sse2(src_ptr, src_stride, ref_ptr, ref_stride, 32, &vsse, &vsum); *sse = add32x4_sse2(vsse); sum = sum_final_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 10); } unsigned int vpx_variance32x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse = _mm_setzero_si128(); __m128i vsum = _mm_setzero_si128(); int sum; int i = 0; for (i = 0; i < 2; i++) { __m128i vsum16; variance32_sse2(src_ptr + 32 * i * src_stride, src_stride, ref_ptr + 32 * i * ref_stride, ref_stride, 32, &vsse, &vsum16); vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); } *sse = add32x4_sse2(vsse); sum = add32x4_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x32_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse = _mm_setzero_si128(); __m128i vsum = _mm_setzero_si128(); int sum; int i = 0; for (i = 0; i < 2; i++) { __m128i vsum16; variance64_sse2(src_ptr + 16 * i * src_stride, src_stride, ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16); vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); } *sse = add32x4_sse2(vsse); sum = add32x4_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 11); } unsigned int vpx_variance64x64_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { __m128i vsse = _mm_setzero_si128(); __m128i vsum = _mm_setzero_si128(); int sum; int i = 0; for (i = 0; i < 4; i++) { __m128i vsum16; variance64_sse2(src_ptr + 16 * i * src_stride, src_stride, ref_ptr + 16 * i * ref_stride, ref_stride, 16, &vsse, &vsum16); vsum = _mm_add_epi32(vsum, sum_to_32bit_sse2(vsum16)); } *sse = add32x4_sse2(vsse); sum = add32x4_sse2(vsum); return *sse - (unsigned int)(((int64_t)sum * sum) >> 12); } unsigned int vpx_mse8x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { vpx_variance8x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } unsigned int vpx_mse8x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { vpx_variance8x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } unsigned int vpx_mse16x8_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { vpx_variance16x8_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } unsigned int vpx_mse16x16_sse2(const uint8_t *src_ptr, int src_stride, const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { vpx_variance16x16_sse2(src_ptr, src_stride, ref_ptr, ref_stride, sse); return *sse; } // The 2 unused parameters are place holders for PIC enabled build. // These definitions are for functions defined in subpel_variance.asm #define DECL(w, opt) \ int vpx_sub_pixel_variance##w##xh_##opt( \ const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \ int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, int height, \ unsigned int *sse, void *unused0, void *unused) #define DECLS(opt1, opt2) \ DECL(4, opt1); \ DECL(8, opt1); \ DECL(16, opt1) DECLS(sse2, sse2); DECLS(ssse3, ssse3); #undef DECLS #undef DECL #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_variance##w##x##h##_##opt( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse) { \ unsigned int sse_tmp; \ int se = vpx_sub_pixel_variance##wf##xh_##opt( \ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, h, \ &sse_tmp, NULL, NULL); \ if (w > wf) { \ unsigned int sse2; \ int se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \ ref_stride, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ if (w > wf * 2) { \ se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \ ref_stride, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ se2 = vpx_sub_pixel_variance##wf##xh_##opt( \ src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \ ref_stride, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ } \ } \ *sse = sse_tmp; \ return sse_tmp - \ (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ FN(16, 8, 16, 4, 3, opt1, (int32_t), (int32_t)); \ FN(8, 16, 8, 3, 4, opt1, (int32_t), (int32_t)); \ FN(8, 8, 8, 3, 3, opt1, (int32_t), (int32_t)); \ FN(8, 4, 8, 3, 2, opt1, (int32_t), (int32_t)); \ FN(4, 8, 4, 2, 3, opt1, (int32_t), (int32_t)); \ FN(4, 4, 4, 2, 2, opt1, (int32_t), (int32_t)) FNS(sse2, sse2); FNS(ssse3, ssse3); #undef FNS #undef FN // The 2 unused parameters are place holders for PIC enabled build. #define DECL(w, opt) \ int vpx_sub_pixel_avg_variance##w##xh_##opt( \ const uint8_t *src_ptr, ptrdiff_t src_stride, int x_offset, \ int y_offset, const uint8_t *ref_ptr, ptrdiff_t ref_stride, \ const uint8_t *second_pred, ptrdiff_t second_stride, int height, \ unsigned int *sse, void *unused0, void *unused) #define DECLS(opt1, opt2) \ DECL(4, opt1); \ DECL(8, opt1); \ DECL(16, opt1) DECLS(sse2, sse2); DECLS(ssse3, ssse3); #undef DECL #undef DECLS #define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \ unsigned int vpx_sub_pixel_avg_variance##w##x##h##_##opt( \ const uint8_t *src_ptr, int src_stride, int x_offset, int y_offset, \ const uint8_t *ref_ptr, int ref_stride, unsigned int *sse, \ const uint8_t *second_pred) { \ unsigned int sse_tmp; \ int se = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ src_ptr, src_stride, x_offset, y_offset, ref_ptr, ref_stride, \ second_pred, w, h, &sse_tmp, NULL, NULL); \ if (w > wf) { \ unsigned int sse2; \ int se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ src_ptr + 16, src_stride, x_offset, y_offset, ref_ptr + 16, \ ref_stride, second_pred + 16, w, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ if (w > wf * 2) { \ se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ src_ptr + 32, src_stride, x_offset, y_offset, ref_ptr + 32, \ ref_stride, second_pred + 32, w, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ se2 = vpx_sub_pixel_avg_variance##wf##xh_##opt( \ src_ptr + 48, src_stride, x_offset, y_offset, ref_ptr + 48, \ ref_stride, second_pred + 48, w, h, &sse2, NULL, NULL); \ se += se2; \ sse_tmp += sse2; \ } \ } \ *sse = sse_tmp; \ return sse_tmp - \ (unsigned int)(cast_prod(cast se * se) >> (wlog2 + hlog2)); \ } #define FNS(opt1, opt2) \ FN(64, 64, 16, 6, 6, opt1, (int64_t), (int64_t)); \ FN(64, 32, 16, 6, 5, opt1, (int64_t), (int64_t)); \ FN(32, 64, 16, 5, 6, opt1, (int64_t), (int64_t)); \ FN(32, 32, 16, 5, 5, opt1, (int64_t), (int64_t)); \ FN(32, 16, 16, 5, 4, opt1, (int64_t), (int64_t)); \ FN(16, 32, 16, 4, 5, opt1, (int64_t), (int64_t)); \ FN(16, 16, 16, 4, 4, opt1, (uint32_t), (int64_t)); \ FN(16, 8, 16, 4, 3, opt1, (uint32_t), (int32_t)); \ FN(8, 16, 8, 3, 4, opt1, (uint32_t), (int32_t)); \ FN(8, 8, 8, 3, 3, opt1, (uint32_t), (int32_t)); \ FN(8, 4, 8, 3, 2, opt1, (uint32_t), (int32_t)); \ FN(4, 8, 4, 2, 3, opt1, (uint32_t), (int32_t)); \ FN(4, 4, 4, 2, 2, opt1, (uint32_t), (int32_t)) FNS(sse2, sse); FNS(ssse3, ssse3); #undef FNS #undef FN libvpx-1.8.2/vpx_dsp/x86/vpx_convolve_copy_sse2.asm000066400000000000000000000147711357355204000223740ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION .text %macro convolve_fn 1-2 %ifidn %1, avg %define AUX_XMM_REGS 4 %else %define AUX_XMM_REGS 0 %endif %ifidn %2, highbd %define pavg pavgw cglobal %2_convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \ dst, dst_stride, \ f, fxo, fxs, fyo, fys, w, h, bd %else %define pavg pavgb cglobal convolve_%1, 4, 8, 4+AUX_XMM_REGS, src, src_stride, \ dst, dst_stride, \ f, fxo, fxs, fyo, fys, w, h %endif mov r4d, dword wm %ifidn %2, highbd shl r4d, 1 shl src_strideq, 1 shl dst_strideq, 1 %else cmp r4d, 4 je .w4 %endif cmp r4d, 8 je .w8 cmp r4d, 16 je .w16 cmp r4d, 32 je .w32 %ifidn %2, highbd cmp r4d, 64 je .w64 mov r4d, dword hm .loop128: movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+32] movu m3, [srcq+48] %ifidn %1, avg pavg m0, [dstq] pavg m1, [dstq+16] pavg m2, [dstq+32] pavg m3, [dstq+48] %endif mova [dstq ], m0 mova [dstq+16], m1 mova [dstq+32], m2 mova [dstq+48], m3 movu m0, [srcq+64] movu m1, [srcq+80] movu m2, [srcq+96] movu m3, [srcq+112] add srcq, src_strideq %ifidn %1, avg pavg m0, [dstq+64] pavg m1, [dstq+80] pavg m2, [dstq+96] pavg m3, [dstq+112] %endif mova [dstq+64], m0 mova [dstq+80], m1 mova [dstq+96], m2 mova [dstq+112], m3 add dstq, dst_strideq dec r4d jnz .loop128 RET %endif .w64: mov r4d, dword hm .loop64: movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+32] movu m3, [srcq+48] add srcq, src_strideq %ifidn %1, avg pavg m0, [dstq] pavg m1, [dstq+16] pavg m2, [dstq+32] pavg m3, [dstq+48] %endif mova [dstq ], m0 mova [dstq+16], m1 mova [dstq+32], m2 mova [dstq+48], m3 add dstq, dst_strideq dec r4d jnz .loop64 RET .w32: mov r4d, dword hm .loop32: movu m0, [srcq] movu m1, [srcq+16] movu m2, [srcq+src_strideq] movu m3, [srcq+src_strideq+16] lea srcq, [srcq+src_strideq*2] %ifidn %1, avg pavg m0, [dstq] pavg m1, [dstq +16] pavg m2, [dstq+dst_strideq] pavg m3, [dstq+dst_strideq+16] %endif mova [dstq ], m0 mova [dstq +16], m1 mova [dstq+dst_strideq ], m2 mova [dstq+dst_strideq+16], m3 lea dstq, [dstq+dst_strideq*2] sub r4d, 2 jnz .loop32 RET .w16: mov r4d, dword hm lea r5q, [src_strideq*3] lea r6q, [dst_strideq*3] .loop16: movu m0, [srcq] movu m1, [srcq+src_strideq] movu m2, [srcq+src_strideq*2] movu m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg pavg m0, [dstq] pavg m1, [dstq+dst_strideq] pavg m2, [dstq+dst_strideq*2] pavg m3, [dstq+r6q] %endif mova [dstq ], m0 mova [dstq+dst_strideq ], m1 mova [dstq+dst_strideq*2], m2 mova [dstq+r6q ], m3 lea dstq, [dstq+dst_strideq*4] sub r4d, 4 jnz .loop16 RET .w8: mov r4d, dword hm lea r5q, [src_strideq*3] lea r6q, [dst_strideq*3] .loop8: movh m0, [srcq] movh m1, [srcq+src_strideq] movh m2, [srcq+src_strideq*2] movh m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg movh m4, [dstq] movh m5, [dstq+dst_strideq] movh m6, [dstq+dst_strideq*2] movh m7, [dstq+r6q] pavg m0, m4 pavg m1, m5 pavg m2, m6 pavg m3, m7 %endif movh [dstq ], m0 movh [dstq+dst_strideq ], m1 movh [dstq+dst_strideq*2], m2 movh [dstq+r6q ], m3 lea dstq, [dstq+dst_strideq*4] sub r4d, 4 jnz .loop8 RET %ifnidn %2, highbd .w4: mov r4d, dword hm lea r5q, [src_strideq*3] lea r6q, [dst_strideq*3] .loop4: movd m0, [srcq] movd m1, [srcq+src_strideq] movd m2, [srcq+src_strideq*2] movd m3, [srcq+r5q] lea srcq, [srcq+src_strideq*4] %ifidn %1, avg movd m4, [dstq] movd m5, [dstq+dst_strideq] movd m6, [dstq+dst_strideq*2] movd m7, [dstq+r6q] pavg m0, m4 pavg m1, m5 pavg m2, m6 pavg m3, m7 %endif movd [dstq ], m0 movd [dstq+dst_strideq ], m1 movd [dstq+dst_strideq*2], m2 movd [dstq+r6q ], m3 lea dstq, [dstq+dst_strideq*4] sub r4d, 4 jnz .loop4 RET %endif %endmacro INIT_XMM sse2 convolve_fn copy convolve_fn avg %if CONFIG_VP9_HIGHBITDEPTH convolve_fn copy, highbd convolve_fn avg, highbd %endif libvpx-1.8.2/vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm000066400000000000000000000607121357355204000227500ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ;Note: tap3 and tap4 have to be applied and added after other taps to avoid ;overflow. %macro HIGH_GET_FILTERS_4 0 mov rdx, arg(5) ;filter ptr mov rcx, 0x00000040 movdqa xmm7, [rdx] ;load filters pshuflw xmm0, xmm7, 0b ;k0 pshuflw xmm1, xmm7, 01010101b ;k1 pshuflw xmm2, xmm7, 10101010b ;k2 pshuflw xmm3, xmm7, 11111111b ;k3 psrldq xmm7, 8 pshuflw xmm4, xmm7, 0b ;k4 pshuflw xmm5, xmm7, 01010101b ;k5 pshuflw xmm6, xmm7, 10101010b ;k6 pshuflw xmm7, xmm7, 11111111b ;k7 punpcklwd xmm0, xmm6 punpcklwd xmm2, xmm5 punpcklwd xmm3, xmm4 punpcklwd xmm1, xmm7 movdqa k0k6, xmm0 movdqa k2k5, xmm2 movdqa k3k4, xmm3 movdqa k1k7, xmm1 movq xmm6, rcx pshufd xmm6, xmm6, 0 movdqa krd, xmm6 ;Compute max and min values of a pixel mov rdx, 0x00010001 movsxd rcx, DWORD PTR arg(6) ;bd movq xmm0, rdx movq xmm1, rcx pshufd xmm0, xmm0, 0b movdqa xmm2, xmm0 psllw xmm0, xmm1 psubw xmm0, xmm2 pxor xmm1, xmm1 movdqa max, xmm0 ;max value (for clamping) movdqa min, xmm1 ;min value (for clamping) %endm %macro HIGH_APPLY_FILTER_4 1 punpcklwd xmm0, xmm6 ;two row in one register punpcklwd xmm1, xmm7 punpcklwd xmm2, xmm5 punpcklwd xmm3, xmm4 pmaddwd xmm0, k0k6 ;multiply the filter factors pmaddwd xmm1, k1k7 pmaddwd xmm2, k2k5 pmaddwd xmm3, k3k4 paddd xmm0, xmm1 ;sum paddd xmm0, xmm2 paddd xmm0, xmm3 paddd xmm0, krd ;rounding psrad xmm0, 7 ;shift packssdw xmm0, xmm0 ;pack to word ;clamp the values pminsw xmm0, max pmaxsw xmm0, min %if %1 movq xmm1, [rdi] pavgw xmm0, xmm1 %endif movq [rdi], xmm0 %endm %macro HIGH_GET_FILTERS 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x00000040 movdqa xmm7, [rdx] ;load filters pshuflw xmm0, xmm7, 0b ;k0 pshuflw xmm1, xmm7, 01010101b ;k1 pshuflw xmm2, xmm7, 10101010b ;k2 pshuflw xmm3, xmm7, 11111111b ;k3 pshufhw xmm4, xmm7, 0b ;k4 pshufhw xmm5, xmm7, 01010101b ;k5 pshufhw xmm6, xmm7, 10101010b ;k6 pshufhw xmm7, xmm7, 11111111b ;k7 punpcklqdq xmm2, xmm2 punpcklqdq xmm3, xmm3 punpcklwd xmm0, xmm1 punpckhwd xmm6, xmm7 punpckhwd xmm2, xmm5 punpckhwd xmm3, xmm4 movdqa k0k1, xmm0 ;store filter factors on stack movdqa k6k7, xmm6 movdqa k2k5, xmm2 movdqa k3k4, xmm3 movq xmm6, rcx pshufd xmm6, xmm6, 0 movdqa krd, xmm6 ;rounding ;Compute max and min values of a pixel mov rdx, 0x00010001 movsxd rcx, DWORD PTR arg(6) ;bd movq xmm0, rdx movq xmm1, rcx pshufd xmm0, xmm0, 0b movdqa xmm2, xmm0 psllw xmm0, xmm1 psubw xmm0, xmm2 pxor xmm1, xmm1 movdqa max, xmm0 ;max value (for clamping) movdqa min, xmm1 ;min value (for clamping) %endm %macro LOAD_VERT_8 1 movdqu xmm0, [rsi + %1] ;0 movdqu xmm1, [rsi + rax + %1] ;1 movdqu xmm6, [rsi + rdx * 2 + %1] ;6 lea rsi, [rsi + rax] movdqu xmm7, [rsi + rdx * 2 + %1] ;7 movdqu xmm2, [rsi + rax + %1] ;2 movdqu xmm3, [rsi + rax * 2 + %1] ;3 movdqu xmm4, [rsi + rdx + %1] ;4 movdqu xmm5, [rsi + rax * 4 + %1] ;5 %endm %macro HIGH_APPLY_FILTER_8 2 movdqu temp, xmm4 movdqa xmm4, xmm0 punpcklwd xmm0, xmm1 punpckhwd xmm4, xmm1 movdqa xmm1, xmm6 punpcklwd xmm6, xmm7 punpckhwd xmm1, xmm7 movdqa xmm7, xmm2 punpcklwd xmm2, xmm5 punpckhwd xmm7, xmm5 movdqu xmm5, temp movdqu temp, xmm4 movdqa xmm4, xmm3 punpcklwd xmm3, xmm5 punpckhwd xmm4, xmm5 movdqu xmm5, temp pmaddwd xmm0, k0k1 pmaddwd xmm5, k0k1 pmaddwd xmm6, k6k7 pmaddwd xmm1, k6k7 pmaddwd xmm2, k2k5 pmaddwd xmm7, k2k5 pmaddwd xmm3, k3k4 pmaddwd xmm4, k3k4 paddd xmm0, xmm6 paddd xmm0, xmm2 paddd xmm0, xmm3 paddd xmm5, xmm1 paddd xmm5, xmm7 paddd xmm5, xmm4 paddd xmm0, krd ;rounding paddd xmm5, krd psrad xmm0, 7 ;shift psrad xmm5, 7 packssdw xmm0, xmm5 ;pack back to word ;clamp the values pminsw xmm0, max pmaxsw xmm0, min %if %1 movdqu xmm1, [rdi + %2] pavgw xmm0, xmm1 %endif movdqu [rdi + %2], xmm0 %endm SECTION .text ;void vpx_highbd_filter_block1d4_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d4_v8_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 7 %define k0k6 [rsp + 16 * 0] %define k2k5 [rsp + 16 * 1] %define k3k4 [rsp + 16 * 2] %define k1k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define max [rsp + 16 * 5] %define min [rsp + 16 * 6] HIGH_GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movq xmm0, [rsi] ;load src: row 0 movq xmm1, [rsi + rax] ;1 movq xmm6, [rsi + rdx * 2] ;6 lea rsi, [rsi + rax] movq xmm7, [rsi + rdx * 2] ;7 movq xmm2, [rsi + rax] ;2 movq xmm3, [rsi + rax * 2] ;3 movq xmm4, [rsi + rdx] ;4 movq xmm5, [rsi + rax * 4] ;5 HIGH_APPLY_FILTER_4 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 7 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_highbd_filter_block1d8_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d8_v8_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 HIGH_APPLY_FILTER_8 0, 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_highbd_filter_block1d16_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d16_v8_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 HIGH_APPLY_FILTER_8 0, 0 sub rsi, rax LOAD_VERT_8 16 HIGH_APPLY_FILTER_8 0, 16 add rdi, rbx dec rcx jnz .loop add rsp, 16 * 8 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d4_v8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 7 %define k0k6 [rsp + 16 * 0] %define k2k5 [rsp + 16 * 1] %define k3k4 [rsp + 16 * 2] %define k1k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define max [rsp + 16 * 5] %define min [rsp + 16 * 6] HIGH_GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movq xmm0, [rsi] ;load src: row 0 movq xmm1, [rsi + rax] ;1 movq xmm6, [rsi + rdx * 2] ;6 lea rsi, [rsi + rax] movq xmm7, [rsi + rdx * 2] ;7 movq xmm2, [rsi + rax] ;2 movq xmm3, [rsi + rax * 2] ;3 movq xmm4, [rsi + rdx] ;4 movq xmm5, [rsi + rax * 4] ;5 HIGH_APPLY_FILTER_4 1 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 7 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d8_v8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 HIGH_APPLY_FILTER_8 1, 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_v8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rbx, [rbx + rbx] lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 HIGH_APPLY_FILTER_8 1, 0 sub rsi, rax LOAD_VERT_8 16 HIGH_APPLY_FILTER_8 1, 16 add rdi, rbx dec rcx jnz .loop add rsp, 16 * 8 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_highbd_filter_block1d4_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d4_h8_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 7 %define k0k6 [rsp + 16 * 0] %define k2k5 [rsp + 16 * 1] %define k3k4 [rsp + 16 * 2] %define k1k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define max [rsp + 16 * 5] %define min [rsp + 16 * 6] HIGH_GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm4, [rsi + 2] movdqa xmm1, xmm0 movdqa xmm6, xmm4 movdqa xmm7, xmm4 movdqa xmm2, xmm0 movdqa xmm3, xmm0 movdqa xmm5, xmm4 psrldq xmm1, 2 psrldq xmm6, 4 psrldq xmm7, 6 psrldq xmm2, 4 psrldq xmm3, 6 psrldq xmm5, 2 HIGH_APPLY_FILTER_4 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 7 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_highbd_filter_block1d8_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d8_h8_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm1, [rsi - 4] movdqu xmm2, [rsi - 2] movdqu xmm3, [rsi] movdqu xmm4, [rsi + 2] movdqu xmm5, [rsi + 4] movdqu xmm6, [rsi + 6] movdqu xmm7, [rsi + 8] HIGH_APPLY_FILTER_8 0, 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_highbd_filter_block1d16_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_highbd_filter_block1d16_h8_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm1, [rsi - 4] movdqu xmm2, [rsi - 2] movdqu xmm3, [rsi] movdqu xmm4, [rsi + 2] movdqu xmm5, [rsi + 4] movdqu xmm6, [rsi + 6] movdqu xmm7, [rsi + 8] HIGH_APPLY_FILTER_8 0, 0 movdqu xmm0, [rsi + 10] ;load src movdqu xmm1, [rsi + 12] movdqu xmm2, [rsi + 14] movdqu xmm3, [rsi + 16] movdqu xmm4, [rsi + 18] movdqu xmm5, [rsi + 20] movdqu xmm6, [rsi + 22] movdqu xmm7, [rsi + 24] HIGH_APPLY_FILTER_8 0, 16 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d4_h8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 7 %define k0k6 [rsp + 16 * 0] %define k2k5 [rsp + 16 * 1] %define k3k4 [rsp + 16 * 2] %define k1k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define max [rsp + 16 * 5] %define min [rsp + 16 * 6] HIGH_GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm4, [rsi + 2] movdqa xmm1, xmm0 movdqa xmm6, xmm4 movdqa xmm7, xmm4 movdqa xmm2, xmm0 movdqa xmm3, xmm0 movdqa xmm5, xmm4 psrldq xmm1, 2 psrldq xmm6, 4 psrldq xmm7, 6 psrldq xmm2, 4 psrldq xmm3, 6 psrldq xmm5, 2 HIGH_APPLY_FILTER_4 1 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 7 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d8_h8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm1, [rsi - 4] movdqu xmm2, [rsi - 2] movdqu xmm3, [rsi] movdqu xmm4, [rsi + 2] movdqu xmm5, [rsi + 4] movdqu xmm6, [rsi + 6] movdqu xmm7, [rsi + 8] HIGH_APPLY_FILTER_8 1, 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_h8_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 8 %define k0k1 [rsp + 16 * 0] %define k6k7 [rsp + 16 * 1] %define k2k5 [rsp + 16 * 2] %define k3k4 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define temp [rsp + 16 * 5] %define max [rsp + 16 * 6] %define min [rsp + 16 * 7] HIGH_GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch lea rax, [rax + rax] ;bytes per line lea rdx, [rdx + rdx] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 6] ;load src movdqu xmm1, [rsi - 4] movdqu xmm2, [rsi - 2] movdqu xmm3, [rsi] movdqu xmm4, [rsi + 2] movdqu xmm5, [rsi + 4] movdqu xmm6, [rsi + 6] movdqu xmm7, [rsi + 8] HIGH_APPLY_FILTER_8 1, 0 movdqu xmm0, [rsi + 10] ;load src movdqu xmm1, [rsi + 12] movdqu xmm2, [rsi + 14] movdqu xmm3, [rsi + 16] movdqu xmm4, [rsi + 18] movdqu xmm5, [rsi + 20] movdqu xmm6, [rsi + 22] movdqu xmm7, [rsi + 24] HIGH_APPLY_FILTER_8 1, 16 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 8 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm000066400000000000000000000271771357355204000242120ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro HIGH_GET_PARAM_4 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x00000040 movdqa xmm3, [rdx] ;load filters pshuflw xmm4, xmm3, 11111111b ;k3 psrldq xmm3, 8 pshuflw xmm3, xmm3, 0b ;k4 punpcklwd xmm4, xmm3 ;k3k4 movq xmm3, rcx ;rounding pshufd xmm3, xmm3, 0 mov rdx, 0x00010001 movsxd rcx, DWORD PTR arg(6) ;bd movq xmm5, rdx movq xmm2, rcx pshufd xmm5, xmm5, 0b movdqa xmm1, xmm5 psllw xmm5, xmm2 psubw xmm5, xmm1 ;max value (for clamping) pxor xmm2, xmm2 ;min value (for clamping) movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro HIGH_APPLY_FILTER_4 1 punpcklwd xmm0, xmm1 ;two row in one register pmaddwd xmm0, xmm4 ;multiply the filter factors paddd xmm0, xmm3 ;rounding psrad xmm0, 7 ;shift packssdw xmm0, xmm0 ;pack to word ;clamp the values pminsw xmm0, xmm5 pmaxsw xmm0, xmm2 %if %1 movq xmm1, [rdi] pavgw xmm0, xmm1 %endif movq [rdi], xmm0 lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] dec rcx %endm %if VPX_ARCH_X86_64 %macro HIGH_GET_PARAM 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x00000040 movdqa xmm6, [rdx] ;load filters pshuflw xmm7, xmm6, 11111111b ;k3 pshufhw xmm6, xmm6, 0b ;k4 psrldq xmm6, 8 punpcklwd xmm7, xmm6 ;k3k4k3k4k3k4k3k4 movq xmm4, rcx ;rounding pshufd xmm4, xmm4, 0 mov rdx, 0x00010001 movsxd rcx, DWORD PTR arg(6) ;bd movq xmm8, rdx movq xmm5, rcx pshufd xmm8, xmm8, 0b movdqa xmm1, xmm8 psllw xmm8, xmm5 psubw xmm8, xmm1 ;max value (for clamping) pxor xmm5, xmm5 ;min value (for clamping) movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro HIGH_APPLY_FILTER_8 1 movdqa xmm6, xmm0 punpckhwd xmm6, xmm1 punpcklwd xmm0, xmm1 pmaddwd xmm6, xmm7 pmaddwd xmm0, xmm7 paddd xmm6, xmm4 ;rounding paddd xmm0, xmm4 ;rounding psrad xmm6, 7 ;shift psrad xmm0, 7 ;shift packssdw xmm0, xmm6 ;pack back to word ;clamp the values pminsw xmm0, xmm8 pmaxsw xmm0, xmm5 %if %1 movdqu xmm1, [rdi] pavgw xmm0, xmm1 %endif movdqu [rdi], xmm0 ;store the result lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] dec rcx %endm %macro HIGH_APPLY_FILTER_16 1 movdqa xmm9, xmm0 movdqa xmm6, xmm2 punpckhwd xmm9, xmm1 punpckhwd xmm6, xmm3 punpcklwd xmm0, xmm1 punpcklwd xmm2, xmm3 pmaddwd xmm9, xmm7 pmaddwd xmm6, xmm7 pmaddwd xmm0, xmm7 pmaddwd xmm2, xmm7 paddd xmm9, xmm4 ;rounding paddd xmm6, xmm4 paddd xmm0, xmm4 paddd xmm2, xmm4 psrad xmm9, 7 ;shift psrad xmm6, 7 psrad xmm0, 7 psrad xmm2, 7 packssdw xmm0, xmm9 ;pack back to word packssdw xmm2, xmm6 ;pack back to word ;clamp the values pminsw xmm0, xmm8 pmaxsw xmm0, xmm5 pminsw xmm2, xmm8 pmaxsw xmm2, xmm5 %if %1 movdqu xmm1, [rdi] movdqu xmm3, [rdi + 16] pavgw xmm0, xmm1 pavgw xmm2, xmm3 %endif movdqu [rdi], xmm0 ;store the result movdqu [rdi + 16], xmm2 ;store the result lea rsi, [rsi + 2*rax] lea rdi, [rdi + 2*rdx] dec rcx %endm %endif SECTION .text global sym(vpx_highbd_filter_block1d4_v2_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 push rsi push rdi ; end prolog HIGH_GET_PARAM_4 .loop: movq xmm0, [rsi] ;load src movq xmm1, [rsi + 2*rax] HIGH_APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret %if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_v2_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 8 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + 2*rax] ;1 HIGH_APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_v2_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 9 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm2, [rsi + 16] movdqu xmm1, [rsi + 2*rax] ;1 movdqu xmm3, [rsi + 2*rax + 16] HIGH_APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif global sym(vpx_highbd_filter_block1d4_v2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 push rsi push rdi ; end prolog HIGH_GET_PARAM_4 .loop: movq xmm0, [rsi] ;load src movq xmm1, [rsi + 2*rax] HIGH_APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret %if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_v2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 8 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + 2*rax] ;1 HIGH_APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_v2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 9 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + 2*rax] ;1 movdqu xmm2, [rsi + 16] movdqu xmm3, [rsi + 2*rax + 16] HIGH_APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif global sym(vpx_highbd_filter_block1d4_h2_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 push rsi push rdi ; end prolog HIGH_GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 2 HIGH_APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret %if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_h2_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 8 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 2] HIGH_APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_h2_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 9 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 2] movdqu xmm2, [rsi + 16] movdqu xmm3, [rsi + 18] HIGH_APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif global sym(vpx_highbd_filter_block1d4_h2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d4_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 push rsi push rdi ; end prolog HIGH_GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 2 HIGH_APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret %if VPX_ARCH_X86_64 global sym(vpx_highbd_filter_block1d8_h2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d8_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 8 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 2] HIGH_APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_highbd_filter_block1d16_h2_avg_sse2) PRIVATE sym(vpx_highbd_filter_block1d16_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 7 SAVE_XMM 9 push rsi push rdi ; end prolog HIGH_GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 2] movdqu xmm2, [rsi + 16] movdqu xmm3, [rsi + 18] HIGH_APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret %endif libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_4t_intrin_sse2.c000066400000000000000000001425501357355204000227730ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_dsp_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_dsp/x86/convolve.h" #include "vpx_dsp/x86/convolve_sse2.h" #include "vpx_ports/mem.h" #define CONV8_ROUNDING_BITS (7) #define CONV8_ROUNDING_NUM (1 << (CONV8_ROUNDING_BITS - 1)) static void vpx_filter_block1d16_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; __m128i dst_first, dst_second; __m128i even, odd; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); for (h = height; h > 0; --h) { // We will load multiple shifted versions of the row and shuffle them into // 16-bit words of the form // ... s[2] s[1] s[0] s[-1] // ... s[4] s[3] s[2] s[1] // Then we call multiply and add to get partial results // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] // The two results are then added together for the first half of even // output. // Repeat multiple times to get the whole outoput src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_1 = _mm_srli_si128(src_reg, 1); src_reg_shift_2 = _mm_srli_si128(src_reg, 2); src_reg_shift_3 = _mm_srli_si128(src_reg, 3); // Output 6 4 2 0 even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Output 7 5 3 1 odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, &kernel_reg_23, &kernel_reg_45); // Combine to get the first half of the dst dst_first = mm_zip_epi32_sse2(&even, &odd); // Do again to get the second half of dst src_reg = _mm_loadu_si128((const __m128i *)(src_ptr + 8)); src_reg_shift_1 = _mm_srli_si128(src_reg, 1); src_reg_shift_2 = _mm_srli_si128(src_reg, 2); src_reg_shift_3 = _mm_srli_si128(src_reg, 3); // Output 14 12 10 8 even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Output 15 13 11 9 odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, &kernel_reg_23, &kernel_reg_45); // Combine to get the second half of the dst dst_second = mm_zip_epi32_sse2(&even, &odd); // Round each result dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); dst_second = mm_round_epi16_sse2(&dst_second, ®_32, 6); // Finally combine to get the final dst dst_first = _mm_packus_epi16(dst_first, dst_second); _mm_store_si128((__m128i *)dst_ptr, dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } /* The macro used to generate functions shifts the src_ptr up by 3 rows already * */ static void vpx_filter_block1d16_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10_lo, src_reg_m10_hi, src_reg_01_lo, src_reg_01_hi; __m128i src_reg_12_lo, src_reg_12_hi, src_reg_23_lo, src_reg_23_hi; // Half of half of the interleaved rows __m128i src_reg_m10_lo_1, src_reg_m10_lo_2, src_reg_m10_hi_1, src_reg_m10_hi_2; __m128i src_reg_01_lo_1, src_reg_01_lo_2, src_reg_01_hi_1, src_reg_01_hi_2; __m128i src_reg_12_lo_1, src_reg_12_lo_2, src_reg_12_hi_1, src_reg_12_hi_2; __m128i src_reg_23_lo_1, src_reg_23_lo_2, src_reg_23_hi_1, src_reg_23_hi_2; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; __m128i res_reg_m1012, res_reg_0123; __m128i res_reg_m1012_lo, res_reg_0123_lo, res_reg_m1012_hi, res_reg_0123_hi; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit // words, // shuffle the data into the form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,7] s[-1,7] s[0,6] s[-1,6] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // ... s[0,13] s[-1,13] s[0,12] s[-1,12] // so that we can call multiply and add with the kernel to get 32-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // First shuffle the data src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); src_reg_m10_hi = _mm_unpackhi_epi8(src_reg_m1, src_reg_0); src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); src_reg_m10_lo_2 = _mm_unpackhi_epi8(src_reg_m10_lo, _mm_setzero_si128()); src_reg_m10_hi_1 = _mm_unpacklo_epi8(src_reg_m10_hi, _mm_setzero_si128()); src_reg_m10_hi_2 = _mm_unpackhi_epi8(src_reg_m10_hi, _mm_setzero_si128()); // More shuffling src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); src_reg_01_hi = _mm_unpackhi_epi8(src_reg_0, src_reg_1); src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); src_reg_01_lo_2 = _mm_unpackhi_epi8(src_reg_01_lo, _mm_setzero_si128()); src_reg_01_hi_1 = _mm_unpacklo_epi8(src_reg_01_hi, _mm_setzero_si128()); src_reg_01_hi_2 = _mm_unpackhi_epi8(src_reg_01_hi, _mm_setzero_si128()); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); src_reg_12_hi = _mm_unpackhi_epi8(src_reg_1, src_reg_2); src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); src_reg_23_hi = _mm_unpackhi_epi8(src_reg_2, src_reg_3); // Partial output from first half res_reg_m10_lo = mm_madd_packs_epi16_sse2( &src_reg_m10_lo_1, &src_reg_m10_lo_2, &kernel_reg_23); res_reg_01_lo = mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, &src_reg_01_lo_2, &kernel_reg_23); src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); src_reg_12_lo_2 = _mm_unpackhi_epi8(src_reg_12_lo, _mm_setzero_si128()); res_reg_12_lo = mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, &src_reg_12_lo_2, &kernel_reg_45); src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); src_reg_23_lo_2 = _mm_unpackhi_epi8(src_reg_23_lo, _mm_setzero_si128()); res_reg_23_lo = mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, &src_reg_23_lo_2, &kernel_reg_45); // Add to get first half of the results res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); // Now repeat everything again for the second half // Partial output for second half res_reg_m10_hi = mm_madd_packs_epi16_sse2( &src_reg_m10_hi_1, &src_reg_m10_hi_2, &kernel_reg_23); res_reg_01_hi = mm_madd_packs_epi16_sse2(&src_reg_01_hi_1, &src_reg_01_hi_2, &kernel_reg_23); src_reg_12_hi_1 = _mm_unpacklo_epi8(src_reg_12_hi, _mm_setzero_si128()); src_reg_12_hi_2 = _mm_unpackhi_epi8(src_reg_12_hi, _mm_setzero_si128()); res_reg_12_hi = mm_madd_packs_epi16_sse2(&src_reg_12_hi_1, &src_reg_12_hi_2, &kernel_reg_45); src_reg_23_hi_1 = _mm_unpacklo_epi8(src_reg_23_hi, _mm_setzero_si128()); src_reg_23_hi_2 = _mm_unpackhi_epi8(src_reg_23_hi, _mm_setzero_si128()); res_reg_23_hi = mm_madd_packs_epi16_sse2(&src_reg_23_hi_1, &src_reg_23_hi_2, &kernel_reg_45); // Second half of the results res_reg_m1012_hi = _mm_adds_epi16(res_reg_m10_hi, res_reg_12_hi); res_reg_0123_hi = _mm_adds_epi16(res_reg_01_hi, res_reg_23_hi); // Round the words res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); res_reg_m1012_hi = mm_round_epi16_sse2(&res_reg_m1012_hi, ®_32, 6); res_reg_0123_hi = mm_round_epi16_sse2(&res_reg_0123_hi, ®_32, 6); // Combine to get the result res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, res_reg_m1012_hi); res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, res_reg_0123_hi); _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10_lo_1 = src_reg_12_lo_1; src_reg_m10_lo_2 = src_reg_12_lo_2; src_reg_m10_hi_1 = src_reg_12_hi_1; src_reg_m10_hi_2 = src_reg_12_hi_2; src_reg_01_lo_1 = src_reg_23_lo_1; src_reg_01_lo_2 = src_reg_23_lo_2; src_reg_01_hi_1 = src_reg_23_hi_1; src_reg_01_hi_2 = src_reg_23_hi_2; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d8_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; __m128i dst_first; __m128i even, odd; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); for (h = height; h > 0; --h) { // We will load multiple shifted versions of the row and shuffle them into // 16-bit words of the form // ... s[2] s[1] s[0] s[-1] // ... s[4] s[3] s[2] s[1] // Then we call multiply and add to get partial results // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] // The two results are then added together to get the even output src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_1 = _mm_srli_si128(src_reg, 1); src_reg_shift_2 = _mm_srli_si128(src_reg, 2); src_reg_shift_3 = _mm_srli_si128(src_reg, 3); // Output 6 4 2 0 even = mm_madd_add_epi8_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Output 7 5 3 1 odd = mm_madd_add_epi8_sse2(&src_reg_shift_1, &src_reg_shift_3, &kernel_reg_23, &kernel_reg_45); // Combine to get the first half of the dst dst_first = mm_zip_epi32_sse2(&even, &odd); dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); // Saturate and convert to 8-bit words dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); _mm_storel_epi64((__m128i *)dst_ptr, dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_filter_block1d8_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10_lo, src_reg_01_lo; __m128i src_reg_12_lo, src_reg_23_lo; // Half of half of the interleaved rows __m128i src_reg_m10_lo_1, src_reg_m10_lo_2; __m128i src_reg_01_lo_1, src_reg_01_lo_2; __m128i src_reg_12_lo_1, src_reg_12_lo_2; __m128i src_reg_23_lo_1, src_reg_23_lo_2; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; __m128i res_reg_m1012, res_reg_0123; __m128i res_reg_m1012_lo, res_reg_0123_lo; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit // words, // shuffle the data into the form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,7] s[-1,7] s[0,6] s[-1,6] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // ... s[0,13] s[-1,13] s[0,12] s[-1,12] // so that we can call multiply and add with the kernel to get 32-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // First shuffle the data src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); src_reg_m10_lo_2 = _mm_unpackhi_epi8(src_reg_m10_lo, _mm_setzero_si128()); // More shuffling src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); src_reg_01_lo_2 = _mm_unpackhi_epi8(src_reg_01_lo, _mm_setzero_si128()); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); // Partial output res_reg_m10_lo = mm_madd_packs_epi16_sse2( &src_reg_m10_lo_1, &src_reg_m10_lo_2, &kernel_reg_23); res_reg_01_lo = mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, &src_reg_01_lo_2, &kernel_reg_23); src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); src_reg_12_lo_2 = _mm_unpackhi_epi8(src_reg_12_lo, _mm_setzero_si128()); res_reg_12_lo = mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, &src_reg_12_lo_2, &kernel_reg_45); src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); src_reg_23_lo_2 = _mm_unpackhi_epi8(src_reg_23_lo, _mm_setzero_si128()); res_reg_23_lo = mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, &src_reg_23_lo_2, &kernel_reg_45); // Add to get results res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); // Round the words res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); // Convert to 8-bit words res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, _mm_setzero_si128()); res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, _mm_setzero_si128()); // Save only half of the register (8 words) _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10_lo_1 = src_reg_12_lo_1; src_reg_m10_lo_2 = src_reg_12_lo_2; src_reg_01_lo_1 = src_reg_23_lo_1; src_reg_01_lo_2 = src_reg_23_lo_2; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d4_h4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; __m128i dst_first; __m128i tmp_0, tmp_1; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); for (h = height; h > 0; --h) { // We will load multiple shifted versions of the row and shuffle them into // 16-bit words of the form // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Then we call multiply and add to get partial results // s[1]k[3]+s[0]k[2] s[0]k[3]s[-1]k[2] // s[3]k[5]+s[2]k[4] s[2]k[5]s[1]k[4] // The two results are then added together to get the output src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_1 = _mm_srli_si128(src_reg, 1); src_reg_shift_2 = _mm_srli_si128(src_reg, 2); src_reg_shift_3 = _mm_srli_si128(src_reg, 3); // Convert to 16-bit words src_reg = _mm_unpacklo_epi8(src_reg, _mm_setzero_si128()); src_reg_shift_1 = _mm_unpacklo_epi8(src_reg_shift_1, _mm_setzero_si128()); src_reg_shift_2 = _mm_unpacklo_epi8(src_reg_shift_2, _mm_setzero_si128()); src_reg_shift_3 = _mm_unpacklo_epi8(src_reg_shift_3, _mm_setzero_si128()); // Shuffle into the right format tmp_0 = _mm_unpacklo_epi32(src_reg, src_reg_shift_1); tmp_1 = _mm_unpacklo_epi32(src_reg_shift_2, src_reg_shift_3); // Partial output tmp_0 = _mm_madd_epi16(tmp_0, kernel_reg_23); tmp_1 = _mm_madd_epi16(tmp_1, kernel_reg_45); // Output dst_first = _mm_add_epi32(tmp_0, tmp_1); dst_first = _mm_packs_epi32(dst_first, _mm_setzero_si128()); dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); // Saturate and convert to 8-bit words dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_filter_block1d4_v4_sse2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10_lo, src_reg_01_lo; __m128i src_reg_12_lo, src_reg_23_lo; // Half of half of the interleaved rows __m128i src_reg_m10_lo_1; __m128i src_reg_01_lo_1; __m128i src_reg_12_lo_1; __m128i src_reg_23_lo_1; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; __m128i res_reg_m1012, res_reg_0123; __m128i res_reg_m1012_lo, res_reg_0123_lo; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding const __m128i reg_zero = _mm_setzero_si128(); // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); // We will load two rows of pixels as 8-bit words, rearrange them as 16-bit // words, // shuffle the data into the form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,7] s[-1,7] s[0,6] s[-1,6] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // ... s[0,13] s[-1,13] s[0,12] s[-1,12] // so that we can call multiply and add with the kernel to get 32-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // First shuffle the data src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); src_reg_m10_lo_1 = _mm_unpacklo_epi8(src_reg_m10_lo, _mm_setzero_si128()); // More shuffling src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); src_reg_01_lo_1 = _mm_unpacklo_epi8(src_reg_01_lo, _mm_setzero_si128()); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); // Partial output res_reg_m10_lo = mm_madd_packs_epi16_sse2(&src_reg_m10_lo_1, ®_zero, &kernel_reg_23); res_reg_01_lo = mm_madd_packs_epi16_sse2(&src_reg_01_lo_1, ®_zero, &kernel_reg_23); src_reg_12_lo_1 = _mm_unpacklo_epi8(src_reg_12_lo, _mm_setzero_si128()); res_reg_12_lo = mm_madd_packs_epi16_sse2(&src_reg_12_lo_1, ®_zero, &kernel_reg_45); src_reg_23_lo_1 = _mm_unpacklo_epi8(src_reg_23_lo, _mm_setzero_si128()); res_reg_23_lo = mm_madd_packs_epi16_sse2(&src_reg_23_lo_1, ®_zero, &kernel_reg_45); // Add to get results res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); // Round the words res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); // Convert to 8-bit words res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, reg_zero); res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, reg_zero); // Save only half of the register (8 words) *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(res_reg_m1012); *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10_lo_1 = src_reg_12_lo_1; src_reg_01_lo_1 = src_reg_23_lo_1; src_reg_1 = src_reg_3; } } #if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 static void vpx_highbd_filter_block1d4_h4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load multiple shifted versions of the row and shuffle them into // 16-bit words of the form // ... s[2] s[1] s[0] s[-1] // ... s[4] s[3] s[2] s[1] // Then we call multiply and add to get partial results // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] // The two results are then added together to get the even output __m128i src_reg, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; __m128i res_reg; __m128i even, odd; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_round = _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); const __m128i reg_zero = _mm_setzero_si128(); int h; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); for (h = height; h > 0; --h) { src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_1 = _mm_srli_si128(src_reg, 2); src_reg_shift_2 = _mm_srli_si128(src_reg, 4); src_reg_shift_3 = _mm_srli_si128(src_reg, 6); // Output 2 0 even = mm_madd_add_epi16_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Output 3 1 odd = mm_madd_add_epi16_sse2(&src_reg_shift_1, &src_reg_shift_3, &kernel_reg_23, &kernel_reg_45); // Combine to get the first half of the dst res_reg = _mm_unpacklo_epi32(even, odd); res_reg = mm_round_epi32_sse2(&res_reg, ®_round, CONV8_ROUNDING_BITS); res_reg = _mm_packs_epi32(res_reg, reg_zero); // Saturate the result and save res_reg = _mm_min_epi16(res_reg, reg_max); res_reg = _mm_max_epi16(res_reg, reg_zero); _mm_storel_epi64((__m128i *)dst_ptr, res_reg); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_highbd_filter_block1d4_v4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load two rows of pixels as 16-bit words, and shuffle them into the // form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,7] s[-1,7] s[0,6] s[-1,6] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // ... s[0,13] s[-1,13] s[0,12] s[-1,12] // so that we can call multiply and add with the kernel to get 32-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10, src_reg_01; __m128i src_reg_12, src_reg_23; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10, res_reg_01, res_reg_12, res_reg_23; __m128i res_reg_m1012, res_reg_0123; const __m128i reg_round = _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); const __m128i reg_zero = _mm_setzero_si128(); // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); // First shuffle the data src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); src_reg_m10 = _mm_unpacklo_epi16(src_reg_m1, src_reg_0); // More shuffling src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01 = _mm_unpacklo_epi16(src_reg_0, src_reg_1); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12 = _mm_unpacklo_epi16(src_reg_1, src_reg_2); src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23 = _mm_unpacklo_epi16(src_reg_2, src_reg_3); // Partial output res_reg_m10 = _mm_madd_epi16(src_reg_m10, kernel_reg_23); res_reg_01 = _mm_madd_epi16(src_reg_01, kernel_reg_23); res_reg_12 = _mm_madd_epi16(src_reg_12, kernel_reg_45); res_reg_23 = _mm_madd_epi16(src_reg_23, kernel_reg_45); // Add to get results res_reg_m1012 = _mm_add_epi32(res_reg_m10, res_reg_12); res_reg_0123 = _mm_add_epi32(res_reg_01, res_reg_23); // Round the words res_reg_m1012 = mm_round_epi32_sse2(&res_reg_m1012, ®_round, CONV8_ROUNDING_BITS); res_reg_0123 = mm_round_epi32_sse2(&res_reg_0123, ®_round, CONV8_ROUNDING_BITS); res_reg_m1012 = _mm_packs_epi32(res_reg_m1012, reg_zero); res_reg_0123 = _mm_packs_epi32(res_reg_0123, reg_zero); // Saturate according to bit depth res_reg_m1012 = _mm_min_epi16(res_reg_m1012, reg_max); res_reg_0123 = _mm_min_epi16(res_reg_0123, reg_max); res_reg_m1012 = _mm_max_epi16(res_reg_m1012, reg_zero); res_reg_0123 = _mm_max_epi16(res_reg_0123, reg_zero); // Save only half of the register (8 words) _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10 = src_reg_12; src_reg_01 = src_reg_23; src_reg_1 = src_reg_3; } } static void vpx_highbd_filter_block1d8_h4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load multiple shifted versions of the row and shuffle them into // 16-bit words of the form // ... s[2] s[1] s[0] s[-1] // ... s[4] s[3] s[2] s[1] // Then we call multiply and add to get partial results // s[2]k[3]+s[1]k[2] s[0]k[3]s[-1]k[2] // s[4]k[5]+s[3]k[4] s[2]k[5]s[1]k[4] // The two results are then added together for the first half of even // output. // Repeat multiple times to get the whole outoput __m128i src_reg, src_reg_next, src_reg_shift_1, src_reg_shift_2, src_reg_shift_3; __m128i res_reg; __m128i even, odd; __m128i tmp_0, tmp_1; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_round = _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); const __m128i reg_zero = _mm_setzero_si128(); int h; // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); for (h = height; h > 0; --h) { // We will put first half in the first half of the reg, and second half in // second half src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_next = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); // Output 6 4 2 0 tmp_0 = _mm_srli_si128(src_reg, 4); tmp_1 = _mm_srli_si128(src_reg_next, 2); src_reg_shift_2 = _mm_unpacklo_epi64(tmp_0, tmp_1); even = mm_madd_add_epi16_sse2(&src_reg, &src_reg_shift_2, &kernel_reg_23, &kernel_reg_45); // Output 7 5 3 1 tmp_0 = _mm_srli_si128(src_reg, 2); tmp_1 = src_reg_next; src_reg_shift_1 = _mm_unpacklo_epi64(tmp_0, tmp_1); tmp_0 = _mm_srli_si128(src_reg, 6); tmp_1 = _mm_srli_si128(src_reg_next, 4); src_reg_shift_3 = _mm_unpacklo_epi64(tmp_0, tmp_1); odd = mm_madd_add_epi16_sse2(&src_reg_shift_1, &src_reg_shift_3, &kernel_reg_23, &kernel_reg_45); // Combine to get the first half of the dst even = mm_round_epi32_sse2(&even, ®_round, CONV8_ROUNDING_BITS); odd = mm_round_epi32_sse2(&odd, ®_round, CONV8_ROUNDING_BITS); res_reg = mm_zip_epi32_sse2(&even, &odd); // Saturate the result and save res_reg = _mm_min_epi16(res_reg, reg_max); res_reg = _mm_max_epi16(res_reg, reg_zero); _mm_store_si128((__m128i *)dst_ptr, res_reg); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_highbd_filter_block1d8_v4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { // We will load two rows of pixels as 16-bit words, and shuffle them into the // form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,7] s[-1,7] s[0,6] s[-1,6] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // ... s[0,13] s[-1,13] s[0,12] s[-1,12] // so that we can call multiply and add with the kernel to get 32-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10_lo, src_reg_01_lo, src_reg_m10_hi, src_reg_01_hi; __m128i src_reg_12_lo, src_reg_23_lo, src_reg_12_hi, src_reg_23_hi; // Result after multiply and add __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; __m128i res_reg_m1012, res_reg_0123; __m128i res_reg_m1012_lo, res_reg_0123_lo; __m128i res_reg_m1012_hi, res_reg_0123_hi; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_round = _mm_set1_epi32(CONV8_ROUNDING_NUM); // Used for rounding const __m128i reg_max = _mm_set1_epi16((1 << bd) - 1); const __m128i reg_zero = _mm_setzero_si128(); // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_23 = extract_quarter_2_epi16_sse2(&kernel_reg); kernel_reg_45 = extract_quarter_3_epi16_sse2(&kernel_reg); // First shuffle the data src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); src_reg_m10_lo = _mm_unpacklo_epi16(src_reg_m1, src_reg_0); src_reg_m10_hi = _mm_unpackhi_epi16(src_reg_m1, src_reg_0); // More shuffling src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01_lo = _mm_unpacklo_epi16(src_reg_0, src_reg_1); src_reg_01_hi = _mm_unpackhi_epi16(src_reg_0, src_reg_1); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12_lo = _mm_unpacklo_epi16(src_reg_1, src_reg_2); src_reg_12_hi = _mm_unpackhi_epi16(src_reg_1, src_reg_2); src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23_lo = _mm_unpacklo_epi16(src_reg_2, src_reg_3); src_reg_23_hi = _mm_unpackhi_epi16(src_reg_2, src_reg_3); // Partial output for first half res_reg_m10_lo = _mm_madd_epi16(src_reg_m10_lo, kernel_reg_23); res_reg_01_lo = _mm_madd_epi16(src_reg_01_lo, kernel_reg_23); res_reg_12_lo = _mm_madd_epi16(src_reg_12_lo, kernel_reg_45); res_reg_23_lo = _mm_madd_epi16(src_reg_23_lo, kernel_reg_45); // Add to get results res_reg_m1012_lo = _mm_add_epi32(res_reg_m10_lo, res_reg_12_lo); res_reg_0123_lo = _mm_add_epi32(res_reg_01_lo, res_reg_23_lo); // Round the words res_reg_m1012_lo = mm_round_epi32_sse2(&res_reg_m1012_lo, ®_round, CONV8_ROUNDING_BITS); res_reg_0123_lo = mm_round_epi32_sse2(&res_reg_0123_lo, ®_round, CONV8_ROUNDING_BITS); // Partial output for first half res_reg_m10_hi = _mm_madd_epi16(src_reg_m10_hi, kernel_reg_23); res_reg_01_hi = _mm_madd_epi16(src_reg_01_hi, kernel_reg_23); res_reg_12_hi = _mm_madd_epi16(src_reg_12_hi, kernel_reg_45); res_reg_23_hi = _mm_madd_epi16(src_reg_23_hi, kernel_reg_45); // Add to get results res_reg_m1012_hi = _mm_add_epi32(res_reg_m10_hi, res_reg_12_hi); res_reg_0123_hi = _mm_add_epi32(res_reg_01_hi, res_reg_23_hi); // Round the words res_reg_m1012_hi = mm_round_epi32_sse2(&res_reg_m1012_hi, ®_round, CONV8_ROUNDING_BITS); res_reg_0123_hi = mm_round_epi32_sse2(&res_reg_0123_hi, ®_round, CONV8_ROUNDING_BITS); // Combine the two halfs res_reg_m1012 = _mm_packs_epi32(res_reg_m1012_lo, res_reg_m1012_hi); res_reg_0123 = _mm_packs_epi32(res_reg_0123_lo, res_reg_0123_hi); // Saturate according to bit depth res_reg_m1012 = _mm_min_epi16(res_reg_m1012, reg_max); res_reg_0123 = _mm_min_epi16(res_reg_0123, reg_max); res_reg_m1012 = _mm_max_epi16(res_reg_m1012, reg_zero); res_reg_0123 = _mm_max_epi16(res_reg_0123, reg_zero); // Save only half of the register (8 words) _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10_lo = src_reg_12_lo; src_reg_m10_hi = src_reg_12_hi; src_reg_01_lo = src_reg_23_lo; src_reg_01_hi = src_reg_23_hi; src_reg_1 = src_reg_3; } } static void vpx_highbd_filter_block1d16_h4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { vpx_highbd_filter_block1d8_h4_sse2(src_ptr, src_stride, dst_ptr, dst_stride, height, kernel, bd); vpx_highbd_filter_block1d8_h4_sse2(src_ptr + 8, src_stride, dst_ptr + 8, dst_stride, height, kernel, bd); } static void vpx_highbd_filter_block1d16_v4_sse2( const uint16_t *src_ptr, ptrdiff_t src_stride, uint16_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel, int bd) { vpx_highbd_filter_block1d8_v4_sse2(src_ptr, src_stride, dst_ptr, dst_stride, height, kernel, bd); vpx_highbd_filter_block1d8_v4_sse2(src_ptr + 8, src_stride, dst_ptr + 8, dst_stride, height, kernel, bd); } #endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 // From vpx_subpixel_8t_sse2.asm. filter8_1dfunction vpx_filter_block1d16_v8_sse2; filter8_1dfunction vpx_filter_block1d16_h8_sse2; filter8_1dfunction vpx_filter_block1d8_v8_sse2; filter8_1dfunction vpx_filter_block1d8_h8_sse2; filter8_1dfunction vpx_filter_block1d4_v8_sse2; filter8_1dfunction vpx_filter_block1d4_h8_sse2; filter8_1dfunction vpx_filter_block1d16_v8_avg_sse2; filter8_1dfunction vpx_filter_block1d16_h8_avg_sse2; filter8_1dfunction vpx_filter_block1d8_v8_avg_sse2; filter8_1dfunction vpx_filter_block1d8_h8_avg_sse2; filter8_1dfunction vpx_filter_block1d4_v8_avg_sse2; filter8_1dfunction vpx_filter_block1d4_h8_avg_sse2; // Use the [vh]8 version because there is no [vh]4 implementation. #define vpx_filter_block1d16_v4_avg_sse2 vpx_filter_block1d16_v8_avg_sse2 #define vpx_filter_block1d16_h4_avg_sse2 vpx_filter_block1d16_h8_avg_sse2 #define vpx_filter_block1d8_v4_avg_sse2 vpx_filter_block1d8_v8_avg_sse2 #define vpx_filter_block1d8_h4_avg_sse2 vpx_filter_block1d8_h8_avg_sse2 #define vpx_filter_block1d4_v4_avg_sse2 vpx_filter_block1d4_v8_avg_sse2 #define vpx_filter_block1d4_h4_avg_sse2 vpx_filter_block1d4_h8_avg_sse2 // From vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm. filter8_1dfunction vpx_filter_block1d16_v2_sse2; filter8_1dfunction vpx_filter_block1d16_h2_sse2; filter8_1dfunction vpx_filter_block1d8_v2_sse2; filter8_1dfunction vpx_filter_block1d8_h2_sse2; filter8_1dfunction vpx_filter_block1d4_v2_sse2; filter8_1dfunction vpx_filter_block1d4_h2_sse2; filter8_1dfunction vpx_filter_block1d16_v2_avg_sse2; filter8_1dfunction vpx_filter_block1d16_h2_avg_sse2; filter8_1dfunction vpx_filter_block1d8_v2_avg_sse2; filter8_1dfunction vpx_filter_block1d8_h2_avg_sse2; filter8_1dfunction vpx_filter_block1d4_v2_avg_sse2; filter8_1dfunction vpx_filter_block1d4_h2_avg_sse2; // void vpx_convolve8_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_horiz_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); // void vpx_convolve8_avg_vert_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2, 0); FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - (num_taps / 2 - 1) * src_stride, , sse2, 0); FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2, 1); FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - (num_taps / 2 - 1) * src_stride, avg_, sse2, 1); // void vpx_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); FUN_CONV_2D(, sse2, 0); FUN_CONV_2D(avg_, sse2, 1); #if CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 // From vpx_dsp/x86/vpx_high_subpixel_8t_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v8_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h8_avg_sse2; // Use the [vh]8 version because there is no [vh]4 implementation. #define vpx_highbd_filter_block1d16_v4_avg_sse2 \ vpx_highbd_filter_block1d16_v8_avg_sse2 #define vpx_highbd_filter_block1d16_h4_avg_sse2 \ vpx_highbd_filter_block1d16_h8_avg_sse2 #define vpx_highbd_filter_block1d8_v4_avg_sse2 \ vpx_highbd_filter_block1d8_v8_avg_sse2 #define vpx_highbd_filter_block1d8_h4_avg_sse2 \ vpx_highbd_filter_block1d8_h8_avg_sse2 #define vpx_highbd_filter_block1d4_v4_avg_sse2 \ vpx_highbd_filter_block1d4_v8_avg_sse2 #define vpx_highbd_filter_block1d4_h4_avg_sse2 \ vpx_highbd_filter_block1d4_h8_avg_sse2 // From vpx_dsp/x86/vpx_high_subpixel_bilinear_sse2.asm. highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_v2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d16_h2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_v2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d8_h2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_v2_avg_sse2; highbd_filter8_1dfunction vpx_highbd_filter_block1d4_h2_avg_sse2; // void vpx_highbd_convolve8_horiz_sse2(const uint8_t *src, // ptrdiff_t src_stride, // uint8_t *dst, // ptrdiff_t dst_stride, // const int16_t *filter_x, // int x_step_q4, // const int16_t *filter_y, // int y_step_q4, // int w, int h, int bd); // void vpx_highbd_convolve8_vert_sse2(const uint8_t *src, // ptrdiff_t src_stride, // uint8_t *dst, // ptrdiff_t dst_stride, // const int16_t *filter_x, // int x_step_q4, // const int16_t *filter_y, // int y_step_q4, // int w, int h, int bd); // void vpx_highbd_convolve8_avg_horiz_sse2(const uint8_t *src, // ptrdiff_t src_stride, // uint8_t *dst, // ptrdiff_t dst_stride, // const int16_t *filter_x, // int x_step_q4, // const int16_t *filter_y, // int y_step_q4, // int w, int h, int bd); // void vpx_highbd_convolve8_avg_vert_sse2(const uint8_t *src, // ptrdiff_t src_stride, // uint8_t *dst, // ptrdiff_t dst_stride, // const int16_t *filter_x, // int x_step_q4, // const int16_t *filter_y, // int y_step_q4, // int w, int h, int bd); HIGH_FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , sse2, 0); HIGH_FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , sse2, 0); HIGH_FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, sse2, 1); HIGH_FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), avg_, sse2, 1); // void vpx_highbd_convolve8_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h, int bd); // void vpx_highbd_convolve8_avg_sse2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h, int bd); HIGH_FUN_CONV_2D(, sse2, 0); HIGH_FUN_CONV_2D(avg_, sse2, 1); #endif // CONFIG_VP9_HIGHBITDEPTH && VPX_ARCH_X86_64 libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_8t_intrin_avx2.c000066400000000000000000001175071357355204000230070ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/x86/convolve.h" #include "vpx_dsp/x86/convolve_avx2.h" #include "vpx_dsp/x86/convolve_sse2.h" #include "vpx_ports/mem.h" // filters for 16_h8 DECLARE_ALIGNED(32, static const uint8_t, filt1_global_avx2[32]) = { 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8 }; DECLARE_ALIGNED(32, static const uint8_t, filt2_global_avx2[32]) = { 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10 }; DECLARE_ALIGNED(32, static const uint8_t, filt3_global_avx2[32]) = { 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12 }; DECLARE_ALIGNED(32, static const uint8_t, filt4_global_avx2[32]) = { 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14 }; static INLINE void vpx_filter_block1d16_h8_x_avx2( const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line, uint8_t *output_ptr, ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter, const int avg) { __m128i outReg1, outReg2; __m256i outReg32b1, outReg32b2; unsigned int i; ptrdiff_t src_stride, dst_stride; __m256i f[4], filt[4], s[4]; shuffle_filter_avx2(filter, f); filt[0] = _mm256_load_si256((__m256i const *)filt1_global_avx2); filt[1] = _mm256_load_si256((__m256i const *)filt2_global_avx2); filt[2] = _mm256_load_si256((__m256i const *)filt3_global_avx2); filt[3] = _mm256_load_si256((__m256i const *)filt4_global_avx2); // multiple the size of the source and destination stride by two src_stride = src_pixels_per_line << 1; dst_stride = output_pitch << 1; for (i = output_height; i > 1; i -= 2) { __m256i srcReg; // load the 2 strides of source srcReg = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr - 3))); srcReg = _mm256_inserti128_si256( srcReg, _mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line - 3)), 1); // filter the source buffer s[0] = _mm256_shuffle_epi8(srcReg, filt[0]); s[1] = _mm256_shuffle_epi8(srcReg, filt[1]); s[2] = _mm256_shuffle_epi8(srcReg, filt[2]); s[3] = _mm256_shuffle_epi8(srcReg, filt[3]); outReg32b1 = convolve8_16_avx2(s, f); // reading 2 strides of the next 16 bytes // (part of it was being read by earlier read) srcReg = _mm256_castsi128_si256(_mm_loadu_si128((const __m128i *)(src_ptr + 5))); srcReg = _mm256_inserti128_si256( srcReg, _mm_loadu_si128((const __m128i *)(src_ptr + src_pixels_per_line + 5)), 1); // filter the source buffer s[0] = _mm256_shuffle_epi8(srcReg, filt[0]); s[1] = _mm256_shuffle_epi8(srcReg, filt[1]); s[2] = _mm256_shuffle_epi8(srcReg, filt[2]); s[3] = _mm256_shuffle_epi8(srcReg, filt[3]); outReg32b2 = convolve8_16_avx2(s, f); // shrink to 8 bit each 16 bits, the low and high 64-bits of each lane // contain the first and second convolve result respectively outReg32b1 = _mm256_packus_epi16(outReg32b1, outReg32b2); src_ptr += src_stride; // average if necessary outReg1 = _mm256_castsi256_si128(outReg32b1); outReg2 = _mm256_extractf128_si256(outReg32b1, 1); if (avg) { outReg1 = _mm_avg_epu8(outReg1, _mm_load_si128((__m128i *)output_ptr)); outReg2 = _mm_avg_epu8( outReg2, _mm_load_si128((__m128i *)(output_ptr + output_pitch))); } // save 16 bytes _mm_store_si128((__m128i *)output_ptr, outReg1); // save the next 16 bits _mm_store_si128((__m128i *)(output_ptr + output_pitch), outReg2); output_ptr += dst_stride; } // if the number of strides is odd. // process only 16 bytes if (i > 0) { __m128i srcReg; // load the first 16 bytes of the last row srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer s[0] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[0]))); s[1] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[1]))); s[2] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[2]))); s[3] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[3]))); outReg1 = convolve8_8_avx2(s, f); // reading the next 16 bytes // (part of it was being read by earlier read) srcReg = _mm_loadu_si128((const __m128i *)(src_ptr + 5)); // filter the source buffer s[0] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[0]))); s[1] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[1]))); s[2] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[2]))); s[3] = _mm256_castsi128_si256( _mm_shuffle_epi8(srcReg, _mm256_castsi256_si128(filt[3]))); outReg2 = convolve8_8_avx2(s, f); // shrink to 8 bit each 16 bits, the low and high 64-bits of each lane // contain the first and second convolve result respectively outReg1 = _mm_packus_epi16(outReg1, outReg2); // average if necessary if (avg) { outReg1 = _mm_avg_epu8(outReg1, _mm_load_si128((__m128i *)output_ptr)); } // save 16 bytes _mm_store_si128((__m128i *)output_ptr, outReg1); } } static void vpx_filter_block1d16_h8_avx2( const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *output_ptr, ptrdiff_t dst_stride, uint32_t output_height, const int16_t *filter) { vpx_filter_block1d16_h8_x_avx2(src_ptr, src_stride, output_ptr, dst_stride, output_height, filter, 0); } static void vpx_filter_block1d16_h8_avg_avx2( const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *output_ptr, ptrdiff_t dst_stride, uint32_t output_height, const int16_t *filter) { vpx_filter_block1d16_h8_x_avx2(src_ptr, src_stride, output_ptr, dst_stride, output_height, filter, 1); } static INLINE void vpx_filter_block1d16_v8_x_avx2( const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter, const int avg) { __m128i outReg1, outReg2; __m256i srcRegHead1; unsigned int i; ptrdiff_t src_stride, dst_stride; __m256i f[4], s1[4], s2[4]; shuffle_filter_avx2(filter, f); // multiple the size of the source and destination stride by two src_stride = src_pitch << 1; dst_stride = out_pitch << 1; { __m128i s[6]; __m256i s32b[6]; // load 16 bytes 7 times in stride of src_pitch s[0] = _mm_loadu_si128((const __m128i *)(src_ptr + 0 * src_pitch)); s[1] = _mm_loadu_si128((const __m128i *)(src_ptr + 1 * src_pitch)); s[2] = _mm_loadu_si128((const __m128i *)(src_ptr + 2 * src_pitch)); s[3] = _mm_loadu_si128((const __m128i *)(src_ptr + 3 * src_pitch)); s[4] = _mm_loadu_si128((const __m128i *)(src_ptr + 4 * src_pitch)); s[5] = _mm_loadu_si128((const __m128i *)(src_ptr + 5 * src_pitch)); srcRegHead1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + 6 * src_pitch))); // have each consecutive loads on the same 256 register s32b[0] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[0]), s[1], 1); s32b[1] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[1]), s[2], 1); s32b[2] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[2]), s[3], 1); s32b[3] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[3]), s[4], 1); s32b[4] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[4]), s[5], 1); s32b[5] = _mm256_inserti128_si256(_mm256_castsi128_si256(s[5]), _mm256_castsi256_si128(srcRegHead1), 1); // merge every two consecutive registers except the last one // the first lanes contain values for filtering odd rows (1,3,5...) and // the second lanes contain values for filtering even rows (2,4,6...) s1[0] = _mm256_unpacklo_epi8(s32b[0], s32b[1]); s2[0] = _mm256_unpackhi_epi8(s32b[0], s32b[1]); s1[1] = _mm256_unpacklo_epi8(s32b[2], s32b[3]); s2[1] = _mm256_unpackhi_epi8(s32b[2], s32b[3]); s1[2] = _mm256_unpacklo_epi8(s32b[4], s32b[5]); s2[2] = _mm256_unpackhi_epi8(s32b[4], s32b[5]); } for (i = output_height; i > 1; i -= 2) { __m256i srcRegHead2, srcRegHead3; // load the next 2 loads of 16 bytes and have every two // consecutive loads in the same 256 bit register srcRegHead2 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + 7 * src_pitch))); srcRegHead1 = _mm256_inserti128_si256( srcRegHead1, _mm256_castsi256_si128(srcRegHead2), 1); srcRegHead3 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + 8 * src_pitch))); srcRegHead2 = _mm256_inserti128_si256( srcRegHead2, _mm256_castsi256_si128(srcRegHead3), 1); // merge the two new consecutive registers // the first lane contain values for filtering odd rows (1,3,5...) and // the second lane contain values for filtering even rows (2,4,6...) s1[3] = _mm256_unpacklo_epi8(srcRegHead1, srcRegHead2); s2[3] = _mm256_unpackhi_epi8(srcRegHead1, srcRegHead2); s1[0] = convolve8_16_avx2(s1, f); s2[0] = convolve8_16_avx2(s2, f); // shrink to 8 bit each 16 bits, the low and high 64-bits of each lane // contain the first and second convolve result respectively s1[0] = _mm256_packus_epi16(s1[0], s2[0]); src_ptr += src_stride; // average if necessary outReg1 = _mm256_castsi256_si128(s1[0]); outReg2 = _mm256_extractf128_si256(s1[0], 1); if (avg) { outReg1 = _mm_avg_epu8(outReg1, _mm_load_si128((__m128i *)output_ptr)); outReg2 = _mm_avg_epu8( outReg2, _mm_load_si128((__m128i *)(output_ptr + out_pitch))); } // save 16 bytes _mm_store_si128((__m128i *)output_ptr, outReg1); // save the next 16 bits _mm_store_si128((__m128i *)(output_ptr + out_pitch), outReg2); output_ptr += dst_stride; // shift down by two rows s1[0] = s1[1]; s2[0] = s2[1]; s1[1] = s1[2]; s2[1] = s2[2]; s1[2] = s1[3]; s2[2] = s2[3]; srcRegHead1 = srcRegHead3; } // if the number of strides is odd. // process only 16 bytes if (i > 0) { // load the last 16 bytes const __m128i srcRegHead2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_pitch * 7)); // merge the last 2 results together s1[0] = _mm256_castsi128_si256( _mm_unpacklo_epi8(_mm256_castsi256_si128(srcRegHead1), srcRegHead2)); s2[0] = _mm256_castsi128_si256( _mm_unpackhi_epi8(_mm256_castsi256_si128(srcRegHead1), srcRegHead2)); outReg1 = convolve8_8_avx2(s1, f); outReg2 = convolve8_8_avx2(s2, f); // shrink to 8 bit each 16 bits, the low and high 64-bits of each lane // contain the first and second convolve result respectively outReg1 = _mm_packus_epi16(outReg1, outReg2); // average if necessary if (avg) { outReg1 = _mm_avg_epu8(outReg1, _mm_load_si128((__m128i *)output_ptr)); } // save 16 bytes _mm_store_si128((__m128i *)output_ptr, outReg1); } } static void vpx_filter_block1d16_v8_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *filter) { vpx_filter_block1d16_v8_x_avx2(src_ptr, src_stride, dst_ptr, dst_stride, height, filter, 0); } static void vpx_filter_block1d16_v8_avg_avx2( const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *filter) { vpx_filter_block1d16_v8_x_avx2(src_ptr, src_stride, dst_ptr, dst_stride, height, filter, 1); } static void vpx_filter_block1d16_h4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into two registers in the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum. Calling add gives us // first half of the output. Repeat again to get the second half of the // output. Finally we shuffle again to combine the two outputs. // Since avx2 allows us to use 256-bit buffer, we can do this two rows at a // time. __m128i kernel_reg; // Kernel __m256i kernel_reg_256, kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding const ptrdiff_t unrolled_src_stride = src_stride << 1; const ptrdiff_t unrolled_dst_stride = dst_stride << 1; int h; __m256i src_reg, src_reg_shift_0, src_reg_shift_2; __m256i dst_first, dst_second; __m256i tmp_0, tmp_1; __m256i idx_shift_0 = _mm256_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); __m256i idx_shift_2 = _mm256_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_256 = _mm256_broadcastsi128_si256(kernel_reg); kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0302u)); kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0504u)); for (h = height; h >= 2; h -= 2) { // Load the source src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Partial result for first half tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_first = _mm256_adds_epi16(tmp_0, tmp_1); // Do again to get the second half of dst // Load the source src_reg = mm256_loadu2_si128(src_ptr + 8, src_ptr + src_stride + 8); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Partial result for second half tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_second = _mm256_adds_epi16(tmp_0, tmp_1); // Round each result dst_first = mm256_round_epi16(&dst_first, ®_32, 6); dst_second = mm256_round_epi16(&dst_second, ®_32, 6); // Finally combine to get the final dst dst_first = _mm256_packus_epi16(dst_first, dst_second); mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &dst_first); src_ptr += unrolled_src_stride; dst_ptr += unrolled_dst_stride; } // Repeat for the last row if needed if (h > 0) { src_reg = _mm256_loadu_si256((const __m256i *)src_ptr); // Reorder into 2 1 1 2 src_reg = _mm256_permute4x64_epi64(src_reg, 0x94); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_first = _mm256_adds_epi16(tmp_0, tmp_1); dst_first = mm256_round_epi16(&dst_first, ®_32, 6); dst_first = _mm256_packus_epi16(dst_first, dst_first); dst_first = _mm256_permute4x64_epi64(dst_first, 0x8); _mm_store_si128((__m128i *)dst_ptr, _mm256_castsi256_si128(dst_first)); } } static void vpx_filter_block1d16_v4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[1,0] s[0,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel partial output. Then // we can call add with another row to get the output. // Register for source s[-1:3, :] __m256i src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; __m256i src_reg_m1001_lo, src_reg_m1001_hi, src_reg_1223_lo, src_reg_1223_hi; __m128i kernel_reg; // Kernel __m256i kernel_reg_256, kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m256i res_reg_m1001_lo, res_reg_1223_lo, res_reg_m1001_hi, res_reg_1223_hi; __m256i res_reg, res_reg_lo, res_reg_hi; const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_256 = _mm256_broadcastsi128_si256(kernel_reg); kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0302u)); kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg_256, _mm256_set1_epi16(0x0504u)); // Row -1 to row 0 src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); // Row 0 to row 1 src_reg_1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); // First three rows src_reg_m1001_lo = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); src_reg_m1001_hi = _mm256_unpackhi_epi8(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3))); src_reg_12 = _mm256_inserti128_si256(src_reg_1, _mm256_castsi256_si128(src_reg_2), 1); src_reg_3 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4))); src_reg_23 = _mm256_inserti128_si256(src_reg_2, _mm256_castsi256_si128(src_reg_3), 1); // Last three rows src_reg_1223_lo = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); src_reg_1223_hi = _mm256_unpackhi_epi8(src_reg_12, src_reg_23); // Output from first half res_reg_m1001_lo = _mm256_maddubs_epi16(src_reg_m1001_lo, kernel_reg_23); res_reg_1223_lo = _mm256_maddubs_epi16(src_reg_1223_lo, kernel_reg_45); res_reg_lo = _mm256_adds_epi16(res_reg_m1001_lo, res_reg_1223_lo); // Output from second half res_reg_m1001_hi = _mm256_maddubs_epi16(src_reg_m1001_hi, kernel_reg_23); res_reg_1223_hi = _mm256_maddubs_epi16(src_reg_1223_hi, kernel_reg_45); res_reg_hi = _mm256_adds_epi16(res_reg_m1001_hi, res_reg_1223_hi); // Round the words res_reg_lo = mm256_round_epi16(&res_reg_lo, ®_32, 6); res_reg_hi = mm256_round_epi16(&res_reg_hi, ®_32, 6); // Combine to get the result res_reg = _mm256_packus_epi16(res_reg_lo, res_reg_hi); // Save the result mm256_store2_si128((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001_lo = src_reg_1223_lo; src_reg_m1001_hi = src_reg_1223_hi; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d8_h4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into two registers in the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum. Calling add gives us // first half of the output. Repeat again to get the second half of the // output. Finally we shuffle again to combine the two outputs. // Since avx2 allows us to use 256-bit buffer, we can do this two rows at a // time. __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding const ptrdiff_t unrolled_src_stride = src_stride << 1; const ptrdiff_t unrolled_dst_stride = dst_stride << 1; int h; __m256i src_reg, src_reg_shift_0, src_reg_shift_2; __m256i dst_reg; __m256i tmp_0, tmp_1; __m256i idx_shift_0 = _mm256_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); __m256i idx_shift_2 = _mm256_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0302u)); kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0504u)); for (h = height; h >= 2; h -= 2) { // Load the source src_reg = mm256_loadu2_si128(src_ptr, src_ptr + src_stride); src_reg_shift_0 = _mm256_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm256_shuffle_epi8(src_reg, idx_shift_2); // Get the output tmp_0 = _mm256_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm256_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_reg = _mm256_adds_epi16(tmp_0, tmp_1); // Round the result dst_reg = mm256_round_epi16(&dst_reg, ®_32, 6); // Finally combine to get the final dst dst_reg = _mm256_packus_epi16(dst_reg, dst_reg); mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &dst_reg); src_ptr += unrolled_src_stride; dst_ptr += unrolled_dst_stride; } // Repeat for the last row if needed if (h > 0) { __m128i src_reg = _mm_loadu_si128((const __m128i *)src_ptr); __m128i dst_reg; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding __m128i tmp_0, tmp_1; __m128i src_reg_shift_0 = _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(idx_shift_0)); __m128i src_reg_shift_2 = _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(idx_shift_2)); tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, _mm256_castsi256_si128(kernel_reg_23)); tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, _mm256_castsi256_si128(kernel_reg_45)); dst_reg = _mm_adds_epi16(tmp_0, tmp_1); dst_reg = mm_round_epi16_sse2(&dst_reg, ®_32, 6); dst_reg = _mm_packus_epi16(dst_reg, _mm_setzero_si128()); _mm_storel_epi64((__m128i *)dst_ptr, dst_reg); } } static void vpx_filter_block1d8_v4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[1,0] s[0,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel partial output. Then // we can call add with another row to get the output. // Register for source s[-1:3, :] __m256i src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; __m256i src_reg_m1001, src_reg_1223; __m128i kernel_reg_128; // Kernel __m256i kernel_reg, kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m256i res_reg_m1001, res_reg_1223; __m256i res_reg; const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg_23 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0302u)); kernel_reg_45 = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi16(0x0504u)); // Row -1 to row 0 src_reg_m10 = mm256_loadu2_epi64((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); // Row 0 to row 1 src_reg_1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); // First three rows src_reg_m1001 = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); src_reg_12 = _mm256_inserti128_si256(src_reg_1, _mm256_castsi256_si128(src_reg_2), 1); src_reg_3 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); src_reg_23 = _mm256_inserti128_si256(src_reg_2, _mm256_castsi256_si128(src_reg_3), 1); // Last three rows src_reg_1223 = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); // Output res_reg_m1001 = _mm256_maddubs_epi16(src_reg_m1001, kernel_reg_23); res_reg_1223 = _mm256_maddubs_epi16(src_reg_1223, kernel_reg_45); res_reg = _mm256_adds_epi16(res_reg_m1001, res_reg_1223); // Round the words res_reg = mm256_round_epi16(&res_reg, ®_32, 6); // Combine to get the result res_reg = _mm256_packus_epi16(res_reg, res_reg); // Save the result mm256_storeu2_epi64((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001 = src_reg_1223; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d4_h4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into a single register in the form // k[5:2] k[5:2] k[5:2] k[5:2] // Then we shuffle the source into // s[5:2] s[4:1] s[3:0] s[2:-1] // Calling multiply and add gives us half of the sum next to each other. // Calling horizontal add then gives us the output. // Since avx2 has 256-bit register, we can do 2 rows at a time. __m128i kernel_reg_128; // Kernel __m256i kernel_reg; const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding int h; const ptrdiff_t unrolled_src_stride = src_stride << 1; const ptrdiff_t unrolled_dst_stride = dst_stride << 1; __m256i src_reg, src_reg_shuf; __m256i dst; __m256i shuf_idx = _mm256_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6, 0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi32(0x05040302u)); for (h = height; h > 1; h -= 2) { // Load the source src_reg = mm256_loadu2_epi64((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); src_reg_shuf = _mm256_shuffle_epi8(src_reg, shuf_idx); // Get the result dst = _mm256_maddubs_epi16(src_reg_shuf, kernel_reg); dst = _mm256_hadds_epi16(dst, _mm256_setzero_si256()); // Round result dst = mm256_round_epi16(&dst, ®_32, 6); // Pack to 8-bits dst = _mm256_packus_epi16(dst, _mm256_setzero_si256()); // Save mm256_storeu2_epi32((__m128i *const)dst_ptr, (__m128i *const)(dst_ptr + dst_stride), &dst); src_ptr += unrolled_src_stride; dst_ptr += unrolled_dst_stride; } if (h > 0) { // Load the source const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding __m128i src_reg = _mm_loadl_epi64((const __m128i *)src_ptr); __m128i src_reg_shuf = _mm_shuffle_epi8(src_reg, _mm256_castsi256_si128(shuf_idx)); // Get the result __m128i dst = _mm_maddubs_epi16(src_reg_shuf, _mm256_castsi256_si128(kernel_reg)); dst = _mm_hadds_epi16(dst, _mm_setzero_si128()); // Round result dst = mm_round_epi16_sse2(&dst, ®_32, 6); // Pack to 8-bits dst = _mm_packus_epi16(dst, _mm_setzero_si128()); *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst); } } static void vpx_filter_block1d4_v4_avx2(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[3,0] s[2,0] s[1,0] s[0,0] s[2,0] s[1,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel to get partial output. // Calling horizontal add then gives us the completely output // Register for source s[-1:3, :] __m256i src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m256i src_reg_m10, src_reg_01, src_reg_12, src_reg_23; __m256i src_reg_m1001, src_reg_1223, src_reg_m1012_1023; __m128i kernel_reg_128; // Kernel __m256i kernel_reg; // Result after multiply and add __m256i res_reg; const __m256i reg_32 = _mm256_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg_128 = _mm_loadu_si128((const __m128i *)kernel); kernel_reg_128 = _mm_srai_epi16(kernel_reg_128, 1); kernel_reg_128 = _mm_packs_epi16(kernel_reg_128, kernel_reg_128); kernel_reg = _mm256_broadcastsi128_si256(kernel_reg_128); kernel_reg = _mm256_shuffle_epi8(kernel_reg, _mm256_set1_epi32(0x05040302u)); // Row -1 to row 0 src_reg_m10 = mm256_loadu2_si128((const __m128i *)src_ptr, (const __m128i *)(src_ptr + src_stride)); // Row 0 to row 1 src_reg_1 = _mm256_castsi128_si256( _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2))); src_reg_01 = _mm256_permute2x128_si256(src_reg_m10, src_reg_1, 0x21); // First three rows src_reg_m1001 = _mm256_unpacklo_epi8(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3))); src_reg_12 = _mm256_inserti128_si256(src_reg_1, _mm256_castsi256_si128(src_reg_2), 1); src_reg_3 = _mm256_castsi128_si256( _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4))); src_reg_23 = _mm256_inserti128_si256(src_reg_2, _mm256_castsi256_si128(src_reg_3), 1); // Last three rows src_reg_1223 = _mm256_unpacklo_epi8(src_reg_12, src_reg_23); // Combine all the rows src_reg_m1012_1023 = _mm256_unpacklo_epi16(src_reg_m1001, src_reg_1223); // Output res_reg = _mm256_maddubs_epi16(src_reg_m1012_1023, kernel_reg); res_reg = _mm256_hadds_epi16(res_reg, _mm256_setzero_si256()); // Round the words res_reg = mm256_round_epi16(&res_reg, ®_32, 6); // Combine to get the result res_reg = _mm256_packus_epi16(res_reg, res_reg); // Save the result mm256_storeu2_epi32((__m128i *)dst_ptr, (__m128i *)(dst_ptr + dst_stride), &res_reg); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001 = src_reg_1223; src_reg_1 = src_reg_3; } } #if HAVE_AVX2 && HAVE_SSSE3 filter8_1dfunction vpx_filter_block1d4_v8_ssse3; #if VPX_ARCH_X86_64 filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; #define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_intrin_ssse3 #define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_intrin_ssse3 #define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_intrin_ssse3 #else // VPX_ARCH_X86 filter8_1dfunction vpx_filter_block1d8_v8_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_ssse3; #define vpx_filter_block1d8_v8_avx2 vpx_filter_block1d8_v8_ssse3 #define vpx_filter_block1d8_h8_avx2 vpx_filter_block1d8_h8_ssse3 #define vpx_filter_block1d4_h8_avx2 vpx_filter_block1d4_h8_ssse3 #endif // VPX_ARCH_X86_64 filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; #define vpx_filter_block1d8_v8_avg_avx2 vpx_filter_block1d8_v8_avg_ssse3 #define vpx_filter_block1d8_h8_avg_avx2 vpx_filter_block1d8_h8_avg_ssse3 #define vpx_filter_block1d4_v8_avg_avx2 vpx_filter_block1d4_v8_avg_ssse3 #define vpx_filter_block1d4_h8_avg_avx2 vpx_filter_block1d4_h8_avg_ssse3 filter8_1dfunction vpx_filter_block1d16_v2_ssse3; filter8_1dfunction vpx_filter_block1d16_h2_ssse3; filter8_1dfunction vpx_filter_block1d8_v2_ssse3; filter8_1dfunction vpx_filter_block1d8_h2_ssse3; filter8_1dfunction vpx_filter_block1d4_v2_ssse3; filter8_1dfunction vpx_filter_block1d4_h2_ssse3; #define vpx_filter_block1d4_v8_avx2 vpx_filter_block1d4_v8_ssse3 #define vpx_filter_block1d16_v2_avx2 vpx_filter_block1d16_v2_ssse3 #define vpx_filter_block1d16_h2_avx2 vpx_filter_block1d16_h2_ssse3 #define vpx_filter_block1d8_v2_avx2 vpx_filter_block1d8_v2_ssse3 #define vpx_filter_block1d8_h2_avx2 vpx_filter_block1d8_h2_ssse3 #define vpx_filter_block1d4_v2_avx2 vpx_filter_block1d4_v2_ssse3 #define vpx_filter_block1d4_h2_avx2 vpx_filter_block1d4_h2_ssse3 filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; #define vpx_filter_block1d16_v2_avg_avx2 vpx_filter_block1d16_v2_avg_ssse3 #define vpx_filter_block1d16_h2_avg_avx2 vpx_filter_block1d16_h2_avg_ssse3 #define vpx_filter_block1d8_v2_avg_avx2 vpx_filter_block1d8_v2_avg_ssse3 #define vpx_filter_block1d8_h2_avg_avx2 vpx_filter_block1d8_h2_avg_ssse3 #define vpx_filter_block1d4_v2_avg_avx2 vpx_filter_block1d4_v2_avg_ssse3 #define vpx_filter_block1d4_h2_avg_avx2 vpx_filter_block1d4_h2_avg_ssse3 #define vpx_filter_block1d16_v4_avg_avx2 vpx_filter_block1d16_v8_avg_avx2 #define vpx_filter_block1d16_h4_avg_avx2 vpx_filter_block1d16_h8_avg_avx2 #define vpx_filter_block1d8_v4_avg_avx2 vpx_filter_block1d8_v8_avg_avx2 #define vpx_filter_block1d8_h4_avg_avx2 vpx_filter_block1d8_h8_avg_avx2 #define vpx_filter_block1d4_v4_avg_avx2 vpx_filter_block1d4_v8_avg_avx2 #define vpx_filter_block1d4_h4_avg_avx2 vpx_filter_block1d4_h8_avg_avx2 // void vpx_convolve8_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_horiz_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); // void vpx_convolve8_avg_vert_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , avx2, 0); FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , avx2, 0); FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, avx2, 1); FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), avg_, avx2, 1); // void vpx_convolve8_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_avx2(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); FUN_CONV_2D(, avx2, 0); FUN_CONV_2D(avg_, avx2, 1); #endif // HAVE_AX2 && HAVE_SSSE3 libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_8t_intrin_ssse3.c000066400000000000000000001240131357355204000231550ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include // SSSE3 #include #include "./vpx_config.h" #include "./vpx_dsp_rtcd.h" #include "vpx_dsp/vpx_filter.h" #include "vpx_dsp/x86/convolve.h" #include "vpx_dsp/x86/convolve_sse2.h" #include "vpx_dsp/x86/convolve_ssse3.h" #include "vpx_dsp/x86/mem_sse2.h" #include "vpx_dsp/x86/transpose_sse2.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" static INLINE __m128i shuffle_filter_convolve8_8_ssse3( const __m128i *const s, const int16_t *const filter) { __m128i f[4]; shuffle_filter_ssse3(filter, f); return convolve8_8_ssse3(s, f); } // Used by the avx2 implementation. #if VPX_ARCH_X86_64 // Use the intrinsics below filter8_1dfunction vpx_filter_block1d4_h8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_intrin_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_intrin_ssse3; #define vpx_filter_block1d4_h8_ssse3 vpx_filter_block1d4_h8_intrin_ssse3 #define vpx_filter_block1d8_h8_ssse3 vpx_filter_block1d8_h8_intrin_ssse3 #define vpx_filter_block1d8_v8_ssse3 vpx_filter_block1d8_v8_intrin_ssse3 #else // VPX_ARCH_X86 // Use the assembly in vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm. filter8_1dfunction vpx_filter_block1d4_h8_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_ssse3; #endif #if VPX_ARCH_X86_64 void vpx_filter_block1d4_h8_intrin_ssse3( const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { __m128i firstFilters, secondFilters, shuffle1, shuffle2; __m128i srcRegFilt1, srcRegFilt2; __m128i addFilterReg64, filtersReg, srcReg; unsigned int i; // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64 addFilterReg64 = _mm_set1_epi32((int)0x0400040u); filtersReg = _mm_loadu_si128((const __m128i *)filter); // converting the 16 bit (short) to 8 bit (byte) and have the same data // in both lanes of 128 bit register. filtersReg = _mm_packs_epi16(filtersReg, filtersReg); // duplicate only the first 16 bits in the filter into the first lane firstFilters = _mm_shufflelo_epi16(filtersReg, 0); // duplicate only the third 16 bit in the filter into the first lane secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu); // duplicate only the seconds 16 bits in the filter into the second lane // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3 firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u); // duplicate only the forth 16 bits in the filter into the second lane // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7 secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu); // loading the local filters shuffle1 = _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6); shuffle2 = _mm_setr_epi8(4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10); for (i = 0; i < output_height; i++) { srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer srcRegFilt1 = _mm_shuffle_epi8(srcReg, shuffle1); srcRegFilt2 = _mm_shuffle_epi8(srcReg, shuffle2); // multiply 2 adjacent elements with the filter and add the result srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters); srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters); // sum the results together, saturating only on the final step // the specific order of the additions prevents outranges srcRegFilt1 = _mm_add_epi16(srcRegFilt1, srcRegFilt2); // extract the higher half of the register srcRegFilt2 = _mm_srli_si128(srcRegFilt1, 8); // add the rounding offset early to avoid another saturated add srcRegFilt1 = _mm_add_epi16(srcRegFilt1, addFilterReg64); srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2); // shift by 7 bit each 16 bits srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7); // shrink to 8 bit each 16 bits srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1); src_ptr += src_pitch; // save only 4 bytes *((int *)&output_ptr[0]) = _mm_cvtsi128_si32(srcRegFilt1); output_ptr += output_pitch; } } void vpx_filter_block1d8_h8_intrin_ssse3( const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t output_pitch, uint32_t output_height, const int16_t *filter) { unsigned int i; __m128i f[4], filt[4], s[4]; shuffle_filter_ssse3(filter, f); filt[0] = _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); filt[1] = _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); filt[2] = _mm_setr_epi8(4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12); filt[3] = _mm_setr_epi8(6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14); for (i = 0; i < output_height; i++) { const __m128i srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3)); // filter the source buffer s[0] = _mm_shuffle_epi8(srcReg, filt[0]); s[1] = _mm_shuffle_epi8(srcReg, filt[1]); s[2] = _mm_shuffle_epi8(srcReg, filt[2]); s[3] = _mm_shuffle_epi8(srcReg, filt[3]); s[0] = convolve8_8_ssse3(s, f); // shrink to 8 bit each 16 bits s[0] = _mm_packus_epi16(s[0], s[0]); src_ptr += src_pitch; // save only 8 bytes _mm_storel_epi64((__m128i *)&output_ptr[0], s[0]); output_ptr += output_pitch; } } void vpx_filter_block1d8_v8_intrin_ssse3( const uint8_t *src_ptr, ptrdiff_t src_pitch, uint8_t *output_ptr, ptrdiff_t out_pitch, uint32_t output_height, const int16_t *filter) { unsigned int i; __m128i f[4], s[8], ss[4]; shuffle_filter_ssse3(filter, f); // load the first 7 rows of 8 bytes s[0] = _mm_loadl_epi64((const __m128i *)(src_ptr + 0 * src_pitch)); s[1] = _mm_loadl_epi64((const __m128i *)(src_ptr + 1 * src_pitch)); s[2] = _mm_loadl_epi64((const __m128i *)(src_ptr + 2 * src_pitch)); s[3] = _mm_loadl_epi64((const __m128i *)(src_ptr + 3 * src_pitch)); s[4] = _mm_loadl_epi64((const __m128i *)(src_ptr + 4 * src_pitch)); s[5] = _mm_loadl_epi64((const __m128i *)(src_ptr + 5 * src_pitch)); s[6] = _mm_loadl_epi64((const __m128i *)(src_ptr + 6 * src_pitch)); for (i = 0; i < output_height; i++) { // load the last 8 bytes s[7] = _mm_loadl_epi64((const __m128i *)(src_ptr + 7 * src_pitch)); // merge the result together ss[0] = _mm_unpacklo_epi8(s[0], s[1]); ss[1] = _mm_unpacklo_epi8(s[2], s[3]); // merge the result together ss[2] = _mm_unpacklo_epi8(s[4], s[5]); ss[3] = _mm_unpacklo_epi8(s[6], s[7]); ss[0] = convolve8_8_ssse3(ss, f); // shrink to 8 bit each 16 bits ss[0] = _mm_packus_epi16(ss[0], ss[0]); src_ptr += src_pitch; // shift down a row s[0] = s[1]; s[1] = s[2]; s[2] = s[3]; s[3] = s[4]; s[4] = s[5]; s[5] = s[6]; s[6] = s[7]; // save only 8 bytes convolve result _mm_storel_epi64((__m128i *)&output_ptr[0], ss[0]); output_ptr += out_pitch; } } #endif // VPX_ARCH_X86_64 static void vpx_filter_block1d16_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into two registers in the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum. Calling add gives us // first half of the output. Repeat again to get the second half of the // output. Finally we shuffle again to combine the two outputs. __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shift_0, src_reg_shift_2; __m128i dst_first, dst_second; __m128i tmp_0, tmp_1; __m128i idx_shift_0 = _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); __m128i idx_shift_2 = _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); for (h = height; h > 0; --h) { // Load the source src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); // Partial result for first half tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_first = _mm_adds_epi16(tmp_0, tmp_1); // Do again to get the second half of dst // Load the source src_reg = _mm_loadu_si128((const __m128i *)(src_ptr + 8)); src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); // Partial result for first half tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_second = _mm_adds_epi16(tmp_0, tmp_1); // Round each result dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); dst_second = mm_round_epi16_sse2(&dst_second, ®_32, 6); // Finally combine to get the final dst dst_first = _mm_packus_epi16(dst_first, dst_second); _mm_store_si128((__m128i *)dst_ptr, dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_filter_block1d16_v4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // ... s[0,9] s[-1,9] s[0,8] s[-1,8] // so that we can call multiply and add with the kernel to get 16-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10_lo, src_reg_m10_hi, src_reg_01_lo, src_reg_01_hi; __m128i src_reg_12_lo, src_reg_12_hi, src_reg_23_lo, src_reg_23_hi; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10_lo, res_reg_01_lo, res_reg_12_lo, res_reg_23_lo; __m128i res_reg_m10_hi, res_reg_01_hi, res_reg_12_hi, res_reg_23_hi; __m128i res_reg_m1012, res_reg_0123; __m128i res_reg_m1012_lo, res_reg_0123_lo, res_reg_m1012_hi, res_reg_0123_hi; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); // First shuffle the data src_reg_m1 = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_0 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride)); src_reg_m10_lo = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); src_reg_m10_hi = _mm_unpackhi_epi8(src_reg_m1, src_reg_0); // More shuffling src_reg_1 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01_lo = _mm_unpacklo_epi8(src_reg_0, src_reg_1); src_reg_01_hi = _mm_unpackhi_epi8(src_reg_0, src_reg_1); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12_lo = _mm_unpacklo_epi8(src_reg_1, src_reg_2); src_reg_12_hi = _mm_unpackhi_epi8(src_reg_1, src_reg_2); src_reg_3 = _mm_loadu_si128((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23_lo = _mm_unpacklo_epi8(src_reg_2, src_reg_3); src_reg_23_hi = _mm_unpackhi_epi8(src_reg_2, src_reg_3); // Partial output from first half res_reg_m10_lo = _mm_maddubs_epi16(src_reg_m10_lo, kernel_reg_23); res_reg_01_lo = _mm_maddubs_epi16(src_reg_01_lo, kernel_reg_23); res_reg_12_lo = _mm_maddubs_epi16(src_reg_12_lo, kernel_reg_45); res_reg_23_lo = _mm_maddubs_epi16(src_reg_23_lo, kernel_reg_45); // Add to get first half of the results res_reg_m1012_lo = _mm_adds_epi16(res_reg_m10_lo, res_reg_12_lo); res_reg_0123_lo = _mm_adds_epi16(res_reg_01_lo, res_reg_23_lo); // Partial output for second half res_reg_m10_hi = _mm_maddubs_epi16(src_reg_m10_hi, kernel_reg_23); res_reg_01_hi = _mm_maddubs_epi16(src_reg_01_hi, kernel_reg_23); res_reg_12_hi = _mm_maddubs_epi16(src_reg_12_hi, kernel_reg_45); res_reg_23_hi = _mm_maddubs_epi16(src_reg_23_hi, kernel_reg_45); // Second half of the results res_reg_m1012_hi = _mm_adds_epi16(res_reg_m10_hi, res_reg_12_hi); res_reg_0123_hi = _mm_adds_epi16(res_reg_01_hi, res_reg_23_hi); // Round the words res_reg_m1012_lo = mm_round_epi16_sse2(&res_reg_m1012_lo, ®_32, 6); res_reg_0123_lo = mm_round_epi16_sse2(&res_reg_0123_lo, ®_32, 6); res_reg_m1012_hi = mm_round_epi16_sse2(&res_reg_m1012_hi, ®_32, 6); res_reg_0123_hi = mm_round_epi16_sse2(&res_reg_0123_hi, ®_32, 6); // Combine to get the result res_reg_m1012 = _mm_packus_epi16(res_reg_m1012_lo, res_reg_m1012_hi); res_reg_0123 = _mm_packus_epi16(res_reg_0123_lo, res_reg_0123_hi); _mm_store_si128((__m128i *)dst_ptr, res_reg_m1012); _mm_store_si128((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10_lo = src_reg_12_lo; src_reg_m10_hi = src_reg_12_hi; src_reg_01_lo = src_reg_23_lo; src_reg_01_hi = src_reg_23_hi; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d8_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into two registers in the form // ... k[3] k[2] k[3] k[2] // ... k[5] k[4] k[5] k[4] // Then we shuffle the source into // ... s[1] s[0] s[0] s[-1] // ... s[3] s[2] s[2] s[1] // Calling multiply and add gives us half of the sum. Calling add gives us // first half of the output. Repeat again to get the second half of the // output. Finally we shuffle again to combine the two outputs. __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shift_0, src_reg_shift_2; __m128i dst_first; __m128i tmp_0, tmp_1; __m128i idx_shift_0 = _mm_setr_epi8(0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8); __m128i idx_shift_2 = _mm_setr_epi8(2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); for (h = height; h > 0; --h) { // Load the source src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shift_0 = _mm_shuffle_epi8(src_reg, idx_shift_0); src_reg_shift_2 = _mm_shuffle_epi8(src_reg, idx_shift_2); // Get the result tmp_0 = _mm_maddubs_epi16(src_reg_shift_0, kernel_reg_23); tmp_1 = _mm_maddubs_epi16(src_reg_shift_2, kernel_reg_45); dst_first = _mm_adds_epi16(tmp_0, tmp_1); // Round round result dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); // Pack to 8-bits dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); _mm_storel_epi64((__m128i *)dst_ptr, dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_filter_block1d8_v4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[0,1] s[-1,1] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel to get 16-bit words of // the form // ... s[0,1]k[3]+s[-1,1]k[2] s[0,0]k[3]+s[-1,0]k[2] // Finally, we can add multiple rows together to get the desired output. // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. lo is first half, hi second __m128i src_reg_m10, src_reg_01; __m128i src_reg_12, src_reg_23; __m128i kernel_reg; // Kernel __m128i kernel_reg_23, kernel_reg_45; // Segments of the kernel used // Result after multiply and add __m128i res_reg_m10, res_reg_01, res_reg_12, res_reg_23; __m128i res_reg_m1012, res_reg_0123; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg_23 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0302u)); kernel_reg_45 = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi16(0x0504u)); // First shuffle the data src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); src_reg_m10 = _mm_unpacklo_epi8(src_reg_m1, src_reg_0); // More shuffling src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01 = _mm_unpacklo_epi8(src_reg_0, src_reg_1); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12 = _mm_unpacklo_epi8(src_reg_1, src_reg_2); src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23 = _mm_unpacklo_epi8(src_reg_2, src_reg_3); // Partial output res_reg_m10 = _mm_maddubs_epi16(src_reg_m10, kernel_reg_23); res_reg_01 = _mm_maddubs_epi16(src_reg_01, kernel_reg_23); res_reg_12 = _mm_maddubs_epi16(src_reg_12, kernel_reg_45); res_reg_23 = _mm_maddubs_epi16(src_reg_23, kernel_reg_45); // Add to get entire output res_reg_m1012 = _mm_adds_epi16(res_reg_m10, res_reg_12); res_reg_0123 = _mm_adds_epi16(res_reg_01, res_reg_23); // Round the words res_reg_m1012 = mm_round_epi16_sse2(&res_reg_m1012, ®_32, 6); res_reg_0123 = mm_round_epi16_sse2(&res_reg_0123, ®_32, 6); // Pack from 16-bit to 8-bit res_reg_m1012 = _mm_packus_epi16(res_reg_m1012, _mm_setzero_si128()); res_reg_0123 = _mm_packus_epi16(res_reg_0123, _mm_setzero_si128()); _mm_storel_epi64((__m128i *)dst_ptr, res_reg_m1012); _mm_storel_epi64((__m128i *)(dst_ptr + dst_stride), res_reg_0123); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m10 = src_reg_12; src_reg_01 = src_reg_23; src_reg_1 = src_reg_3; } } static void vpx_filter_block1d4_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will cast the kernel from 16-bit words to 8-bit words, and then extract // the middle four elements of the kernel into a single register in the form // k[5:2] k[5:2] k[5:2] k[5:2] // Then we shuffle the source into // s[5:2] s[4:1] s[3:0] s[2:-1] // Calling multiply and add gives us half of the sum next to each other. // Calling horizontal add then gives us the output. __m128i kernel_reg; // Kernel const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding int h; __m128i src_reg, src_reg_shuf; __m128i dst_first; __m128i shuf_idx = _mm_setr_epi8(0, 1, 2, 3, 1, 2, 3, 4, 2, 3, 4, 5, 3, 4, 5, 6); // Start one pixel before as we need tap/2 - 1 = 1 sample from the past src_ptr -= 1; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi32(0x05040302u)); for (h = height; h > 0; --h) { // Load the source src_reg = _mm_loadu_si128((const __m128i *)src_ptr); src_reg_shuf = _mm_shuffle_epi8(src_reg, shuf_idx); // Get the result dst_first = _mm_maddubs_epi16(src_reg_shuf, kernel_reg); dst_first = _mm_hadds_epi16(dst_first, _mm_setzero_si128()); // Round result dst_first = mm_round_epi16_sse2(&dst_first, ®_32, 6); // Pack to 8-bits dst_first = _mm_packus_epi16(dst_first, _mm_setzero_si128()); *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(dst_first); src_ptr += src_stride; dst_ptr += dst_stride; } } static void vpx_filter_block1d4_v4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_stride, uint8_t *dst_ptr, ptrdiff_t dst_stride, uint32_t height, const int16_t *kernel) { // We will load two rows of pixels as 8-bit words, rearrange them into the // form // ... s[2,0] s[1,0] s[0,0] s[-1,0] // so that we can call multiply and add with the kernel partial output. Then // we can call horizontal add to get the output. // Finally, we can add multiple rows together to get the desired output. // This is done two rows at a time // Register for source s[-1:3, :] __m128i src_reg_m1, src_reg_0, src_reg_1, src_reg_2, src_reg_3; // Interleaved rows of the source. __m128i src_reg_m10, src_reg_01; __m128i src_reg_12, src_reg_23; __m128i src_reg_m1001, src_reg_1223; __m128i src_reg_m1012_1023_lo, src_reg_m1012_1023_hi; __m128i kernel_reg; // Kernel // Result after multiply and add __m128i reg_0, reg_1; const __m128i reg_32 = _mm_set1_epi16(32); // Used for rounding // We will compute the result two rows at a time const ptrdiff_t src_stride_unrolled = src_stride << 1; const ptrdiff_t dst_stride_unrolled = dst_stride << 1; int h; // Load Kernel kernel_reg = _mm_loadu_si128((const __m128i *)kernel); kernel_reg = _mm_srai_epi16(kernel_reg, 1); kernel_reg = _mm_packs_epi16(kernel_reg, kernel_reg); kernel_reg = _mm_shuffle_epi8(kernel_reg, _mm_set1_epi32(0x05040302u)); // First shuffle the data src_reg_m1 = _mm_loadl_epi64((const __m128i *)src_ptr); src_reg_0 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride)); src_reg_m10 = _mm_unpacklo_epi32(src_reg_m1, src_reg_0); // More shuffling src_reg_1 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 2)); src_reg_01 = _mm_unpacklo_epi32(src_reg_0, src_reg_1); // Put three rows next to each other src_reg_m1001 = _mm_unpacklo_epi8(src_reg_m10, src_reg_01); for (h = height; h > 1; h -= 2) { src_reg_2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 3)); src_reg_12 = _mm_unpacklo_epi32(src_reg_1, src_reg_2); src_reg_3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_stride * 4)); src_reg_23 = _mm_unpacklo_epi32(src_reg_2, src_reg_3); // Put three rows next to each other src_reg_1223 = _mm_unpacklo_epi8(src_reg_12, src_reg_23); // Put all four rows next to each other src_reg_m1012_1023_lo = _mm_unpacklo_epi16(src_reg_m1001, src_reg_1223); src_reg_m1012_1023_hi = _mm_unpackhi_epi16(src_reg_m1001, src_reg_1223); // Get the results reg_0 = _mm_maddubs_epi16(src_reg_m1012_1023_lo, kernel_reg); reg_1 = _mm_maddubs_epi16(src_reg_m1012_1023_hi, kernel_reg); reg_0 = _mm_hadds_epi16(reg_0, _mm_setzero_si128()); reg_1 = _mm_hadds_epi16(reg_1, _mm_setzero_si128()); // Round the words reg_0 = mm_round_epi16_sse2(®_0, ®_32, 6); reg_1 = mm_round_epi16_sse2(®_1, ®_32, 6); // Pack from 16-bit to 8-bit and put them in the right order reg_0 = _mm_packus_epi16(reg_0, reg_0); reg_1 = _mm_packus_epi16(reg_1, reg_1); // Save the result *((uint32_t *)(dst_ptr)) = _mm_cvtsi128_si32(reg_0); *((uint32_t *)(dst_ptr + dst_stride)) = _mm_cvtsi128_si32(reg_1); // Update the source by two rows src_ptr += src_stride_unrolled; dst_ptr += dst_stride_unrolled; src_reg_m1001 = src_reg_1223; src_reg_1 = src_reg_3; } } // From vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm filter8_1dfunction vpx_filter_block1d16_v8_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_ssse3; filter8_1dfunction vpx_filter_block1d16_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d16_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_h8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v8_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_h8_avg_ssse3; // Use the [vh]8 version because there is no [vh]4 implementation. #define vpx_filter_block1d16_v4_avg_ssse3 vpx_filter_block1d16_v8_avg_ssse3 #define vpx_filter_block1d16_h4_avg_ssse3 vpx_filter_block1d16_h8_avg_ssse3 #define vpx_filter_block1d8_v4_avg_ssse3 vpx_filter_block1d8_v8_avg_ssse3 #define vpx_filter_block1d8_h4_avg_ssse3 vpx_filter_block1d8_h8_avg_ssse3 #define vpx_filter_block1d4_v4_avg_ssse3 vpx_filter_block1d4_v8_avg_ssse3 #define vpx_filter_block1d4_h4_avg_ssse3 vpx_filter_block1d4_h8_avg_ssse3 // From vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm filter8_1dfunction vpx_filter_block1d16_v2_ssse3; filter8_1dfunction vpx_filter_block1d16_h2_ssse3; filter8_1dfunction vpx_filter_block1d8_v2_ssse3; filter8_1dfunction vpx_filter_block1d8_h2_ssse3; filter8_1dfunction vpx_filter_block1d4_v2_ssse3; filter8_1dfunction vpx_filter_block1d4_h2_ssse3; filter8_1dfunction vpx_filter_block1d16_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d16_h2_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d8_h2_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_v2_avg_ssse3; filter8_1dfunction vpx_filter_block1d4_h2_avg_ssse3; // void vpx_convolve8_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_horiz_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); // void vpx_convolve8_avg_vert_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, // int y_step_q4, int w, int h); FUN_CONV_1D(horiz, x0_q4, x_step_q4, h, src, , ssse3, 0); FUN_CONV_1D(vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), , ssse3, 0); FUN_CONV_1D(avg_horiz, x0_q4, x_step_q4, h, src, avg_, ssse3, 1); FUN_CONV_1D(avg_vert, y0_q4, y_step_q4, v, src - src_stride * (num_taps / 2 - 1), avg_, ssse3, 1); static void filter_horiz_w8_ssse3(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const int16_t *const x_filter) { __m128i s[8], ss[4], temp; load_8bit_8x8(src, src_stride, s); // 00 01 10 11 20 21 30 31 40 41 50 51 60 61 70 71 // 02 03 12 13 22 23 32 33 42 43 52 53 62 63 72 73 // 04 05 14 15 24 25 34 35 44 45 54 55 64 65 74 75 // 06 07 16 17 26 27 36 37 46 47 56 57 66 67 76 77 transpose_16bit_4x8(s, ss); temp = shuffle_filter_convolve8_8_ssse3(ss, x_filter); // shrink to 8 bit each 16 bits temp = _mm_packus_epi16(temp, temp); // save only 8 bytes convolve result _mm_storel_epi64((__m128i *)dst, temp); } static void transpose8x8_to_dst(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const ptrdiff_t dst_stride) { __m128i s[8]; load_8bit_8x8(src, src_stride, s); transpose_8bit_8x8(s, s); store_8bit_8x8(s, dst, dst_stride); } static void scaledconvolve_horiz_w8(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const x_filters, const int x0_q4, const int x_step_q4, const int w, const int h) { DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]); int x, y, z; src -= SUBPEL_TAPS / 2 - 1; // This function processes 8x8 areas. The intermediate height is not always // a multiple of 8, so force it to be a multiple of 8 here. y = h + (8 - (h & 0x7)); do { int x_q4 = x0_q4; for (x = 0; x < w; x += 8) { // process 8 src_x steps for (z = 0; z < 8; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; if (x_q4 & SUBPEL_MASK) { filter_horiz_w8_ssse3(src_x, src_stride, temp + (z * 8), x_filter); } else { int i; for (i = 0; i < 8; ++i) { temp[z * 8 + i] = src_x[i * src_stride + 3]; } } x_q4 += x_step_q4; } // transpose the 8x8 filters values back to dst transpose8x8_to_dst(temp, 8, dst + x, dst_stride); } src += src_stride * 8; dst += dst_stride * 8; } while (y -= 8); } static void filter_horiz_w4_ssse3(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const int16_t *const filter) { __m128i s[4], ss[2]; __m128i temp; load_8bit_8x4(src, src_stride, s); transpose_16bit_4x4(s, ss); // 00 01 10 11 20 21 30 31 s[0] = ss[0]; // 02 03 12 13 22 23 32 33 s[1] = _mm_srli_si128(ss[0], 8); // 04 05 14 15 24 25 34 35 s[2] = ss[1]; // 06 07 16 17 26 27 36 37 s[3] = _mm_srli_si128(ss[1], 8); temp = shuffle_filter_convolve8_8_ssse3(s, filter); // shrink to 8 bit each 16 bits temp = _mm_packus_epi16(temp, temp); // save only 4 bytes *(int *)dst = _mm_cvtsi128_si32(temp); } static void transpose4x4_to_dst(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const ptrdiff_t dst_stride) { __m128i s[4]; load_8bit_4x4(src, src_stride, s); s[0] = transpose_8bit_4x4(s); s[1] = _mm_srli_si128(s[0], 4); s[2] = _mm_srli_si128(s[0], 8); s[3] = _mm_srli_si128(s[0], 12); store_8bit_4x4(s, dst, dst_stride); } static void scaledconvolve_horiz_w4(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *dst, const ptrdiff_t dst_stride, const InterpKernel *const x_filters, const int x0_q4, const int x_step_q4, const int w, const int h) { DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]); int x, y, z; src -= SUBPEL_TAPS / 2 - 1; for (y = 0; y < h; y += 4) { int x_q4 = x0_q4; for (x = 0; x < w; x += 4) { // process 4 src_x steps for (z = 0; z < 4; ++z) { const uint8_t *const src_x = &src[x_q4 >> SUBPEL_BITS]; const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK]; if (x_q4 & SUBPEL_MASK) { filter_horiz_w4_ssse3(src_x, src_stride, temp + (z * 4), x_filter); } else { int i; for (i = 0; i < 4; ++i) { temp[z * 4 + i] = src_x[i * src_stride + 3]; } } x_q4 += x_step_q4; } // transpose the 4x4 filters values back to dst transpose4x4_to_dst(temp, 4, dst + x, dst_stride); } src += src_stride * 4; dst += dst_stride * 4; } } static __m128i filter_vert_kernel(const __m128i *const s, const int16_t *const filter) { __m128i ss[4]; __m128i temp; // 00 10 01 11 02 12 03 13 ss[0] = _mm_unpacklo_epi8(s[0], s[1]); // 20 30 21 31 22 32 23 33 ss[1] = _mm_unpacklo_epi8(s[2], s[3]); // 40 50 41 51 42 52 43 53 ss[2] = _mm_unpacklo_epi8(s[4], s[5]); // 60 70 61 71 62 72 63 73 ss[3] = _mm_unpacklo_epi8(s[6], s[7]); temp = shuffle_filter_convolve8_8_ssse3(ss, filter); // shrink to 8 bit each 16 bits return _mm_packus_epi16(temp, temp); } static void filter_vert_w4_ssse3(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const int16_t *const filter) { __m128i s[8]; __m128i temp; load_8bit_4x8(src, src_stride, s); temp = filter_vert_kernel(s, filter); // save only 4 bytes *(int *)dst = _mm_cvtsi128_si32(temp); } static void scaledconvolve_vert_w4( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_w4_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); } else { memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); } y_q4 += y_step_q4; } } static void filter_vert_w8_ssse3(const uint8_t *const src, const ptrdiff_t src_stride, uint8_t *const dst, const int16_t *const filter) { __m128i s[8], temp; load_8bit_8x8(src, src_stride, s); temp = filter_vert_kernel(s, filter); // save only 8 bytes convolve result _mm_storel_epi64((__m128i *)dst, temp); } static void scaledconvolve_vert_w8( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_w8_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter); } else { memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); } y_q4 += y_step_q4; } } static void filter_vert_w16_ssse3(const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst, const int16_t *const filter, const int w) { int i; __m128i f[4]; shuffle_filter_ssse3(filter, f); for (i = 0; i < w; i += 16) { __m128i s[8], s_lo[4], s_hi[4], temp_lo, temp_hi; loadu_8bit_16x8(src, src_stride, s); // merge the result together s_lo[0] = _mm_unpacklo_epi8(s[0], s[1]); s_hi[0] = _mm_unpackhi_epi8(s[0], s[1]); s_lo[1] = _mm_unpacklo_epi8(s[2], s[3]); s_hi[1] = _mm_unpackhi_epi8(s[2], s[3]); s_lo[2] = _mm_unpacklo_epi8(s[4], s[5]); s_hi[2] = _mm_unpackhi_epi8(s[4], s[5]); s_lo[3] = _mm_unpacklo_epi8(s[6], s[7]); s_hi[3] = _mm_unpackhi_epi8(s[6], s[7]); temp_lo = convolve8_8_ssse3(s_lo, f); temp_hi = convolve8_8_ssse3(s_hi, f); // shrink to 8 bit each 16 bits, the first lane contain the first convolve // result and the second lane contain the second convolve result temp_hi = _mm_packus_epi16(temp_lo, temp_hi); src += 16; // save 16 bytes convolve result _mm_store_si128((__m128i *)&dst[i], temp_hi); } } static void scaledconvolve_vert_w16( const uint8_t *src, const ptrdiff_t src_stride, uint8_t *const dst, const ptrdiff_t dst_stride, const InterpKernel *const y_filters, const int y0_q4, const int y_step_q4, const int w, const int h) { int y; int y_q4 = y0_q4; src -= src_stride * (SUBPEL_TAPS / 2 - 1); for (y = 0; y < h; ++y) { const unsigned char *src_y = &src[(y_q4 >> SUBPEL_BITS) * src_stride]; const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK]; if (y_q4 & SUBPEL_MASK) { filter_vert_w16_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter, w); } else { memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w); } y_q4 += y_step_q4; } } void vpx_scaled_2d_ssse3(const uint8_t *src, ptrdiff_t src_stride, uint8_t *dst, ptrdiff_t dst_stride, const InterpKernel *filter, int x0_q4, int x_step_q4, int y0_q4, int y_step_q4, int w, int h) { // Note: Fixed size intermediate buffer, temp, places limits on parameters. // 2d filtering proceeds in 2 steps: // (1) Interpolate horizontally into an intermediate buffer, temp. // (2) Interpolate temp vertically to derive the sub-pixel result. // Deriving the maximum number of rows in the temp buffer (135): // --Smallest scaling factor is x1/2 ==> y_step_q4 = 32 (Normative). // --Largest block size is 64x64 pixels. // --64 rows in the downscaled frame span a distance of (64 - 1) * 32 in the // original frame (in 1/16th pixel units). // --Must round-up because block may be located at sub-pixel position. // --Require an additional SUBPEL_TAPS rows for the 8-tap filter tails. // --((64 - 1) * 32 + 15) >> 4 + 8 = 135. // --Require an additional 8 rows for the horiz_w8 transpose tail. // When calling in frame scaling function, the smallest scaling factor is x1/4 // ==> y_step_q4 = 64. Since w and h are at most 16, the temp buffer is still // big enough. DECLARE_ALIGNED(16, uint8_t, temp[(135 + 8) * 64]); const int intermediate_height = (((h - 1) * y_step_q4 + y0_q4) >> SUBPEL_BITS) + SUBPEL_TAPS; assert(w <= 64); assert(h <= 64); assert(y_step_q4 <= 32 || (y_step_q4 <= 64 && h <= 32)); assert(x_step_q4 <= 64); if (w >= 8) { scaledconvolve_horiz_w8(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); } else { scaledconvolve_horiz_w4(src - src_stride * (SUBPEL_TAPS / 2 - 1), src_stride, temp, 64, filter, x0_q4, x_step_q4, w, intermediate_height); } if (w >= 16) { scaledconvolve_vert_w16(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else if (w == 8) { scaledconvolve_vert_w8(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } else { scaledconvolve_vert_w4(temp + 64 * (SUBPEL_TAPS / 2 - 1), 64, dst, dst_stride, filter, y0_q4, y_step_q4, w, h); } } // void vpx_convolve8_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); // void vpx_convolve8_avg_ssse3(const uint8_t *src, ptrdiff_t src_stride, // uint8_t *dst, ptrdiff_t dst_stride, // const InterpKernel *filter, int x0_q4, // int32_t x_step_q4, int y0_q4, int y_step_q4, // int w, int h); FUN_CONV_2D(, ssse3, 0); FUN_CONV_2D(avg_, ssse3, 1); libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_8t_sse2.asm000066400000000000000000000565101357355204000217520ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" ;Note: tap3 and tap4 have to be applied and added after other taps to avoid ;overflow. %macro GET_FILTERS_4 0 mov rdx, arg(5) ;filter ptr mov rcx, 0x0400040 movdqa xmm7, [rdx] ;load filters pshuflw xmm0, xmm7, 0b ;k0 pshuflw xmm1, xmm7, 01010101b ;k1 pshuflw xmm2, xmm7, 10101010b ;k2 pshuflw xmm3, xmm7, 11111111b ;k3 psrldq xmm7, 8 pshuflw xmm4, xmm7, 0b ;k4 pshuflw xmm5, xmm7, 01010101b ;k5 pshuflw xmm6, xmm7, 10101010b ;k6 pshuflw xmm7, xmm7, 11111111b ;k7 punpcklqdq xmm0, xmm1 punpcklqdq xmm2, xmm3 punpcklqdq xmm5, xmm4 punpcklqdq xmm6, xmm7 movdqa k0k1, xmm0 movdqa k2k3, xmm2 movdqa k5k4, xmm5 movdqa k6k7, xmm6 movq xmm6, rcx pshufd xmm6, xmm6, 0 movdqa krd, xmm6 pxor xmm7, xmm7 movdqa zero, xmm7 %endm %macro APPLY_FILTER_4 1 punpckldq xmm0, xmm1 ;two row in one register punpckldq xmm6, xmm7 punpckldq xmm2, xmm3 punpckldq xmm5, xmm4 punpcklbw xmm0, zero ;unpack to word punpcklbw xmm6, zero punpcklbw xmm2, zero punpcklbw xmm5, zero pmullw xmm0, k0k1 ;multiply the filter factors pmullw xmm6, k6k7 pmullw xmm2, k2k3 pmullw xmm5, k5k4 paddsw xmm0, xmm6 ;sum movdqa xmm1, xmm0 psrldq xmm1, 8 paddsw xmm0, xmm1 paddsw xmm0, xmm2 psrldq xmm2, 8 paddsw xmm0, xmm5 psrldq xmm5, 8 paddsw xmm0, xmm2 paddsw xmm0, xmm5 paddsw xmm0, krd ;rounding psraw xmm0, 7 ;shift packuswb xmm0, xmm0 ;pack to byte %if %1 movd xmm1, [rdi] pavgb xmm0, xmm1 %endif movd [rdi], xmm0 %endm %macro GET_FILTERS 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x0400040 movdqa xmm7, [rdx] ;load filters pshuflw xmm0, xmm7, 0b ;k0 pshuflw xmm1, xmm7, 01010101b ;k1 pshuflw xmm2, xmm7, 10101010b ;k2 pshuflw xmm3, xmm7, 11111111b ;k3 pshufhw xmm4, xmm7, 0b ;k4 pshufhw xmm5, xmm7, 01010101b ;k5 pshufhw xmm6, xmm7, 10101010b ;k6 pshufhw xmm7, xmm7, 11111111b ;k7 punpcklwd xmm0, xmm0 punpcklwd xmm1, xmm1 punpcklwd xmm2, xmm2 punpcklwd xmm3, xmm3 punpckhwd xmm4, xmm4 punpckhwd xmm5, xmm5 punpckhwd xmm6, xmm6 punpckhwd xmm7, xmm7 movdqa k0, xmm0 ;store filter factors on stack movdqa k1, xmm1 movdqa k2, xmm2 movdqa k3, xmm3 movdqa k4, xmm4 movdqa k5, xmm5 movdqa k6, xmm6 movdqa k7, xmm7 movq xmm6, rcx pshufd xmm6, xmm6, 0 movdqa krd, xmm6 ;rounding pxor xmm7, xmm7 movdqa zero, xmm7 %endm %macro LOAD_VERT_8 1 movq xmm0, [rsi + %1] ;0 movq xmm1, [rsi + rax + %1] ;1 movq xmm6, [rsi + rdx * 2 + %1] ;6 lea rsi, [rsi + rax] movq xmm7, [rsi + rdx * 2 + %1] ;7 movq xmm2, [rsi + rax + %1] ;2 movq xmm3, [rsi + rax * 2 + %1] ;3 movq xmm4, [rsi + rdx + %1] ;4 movq xmm5, [rsi + rax * 4 + %1] ;5 %endm %macro APPLY_FILTER_8 2 punpcklbw xmm0, zero punpcklbw xmm1, zero punpcklbw xmm6, zero punpcklbw xmm7, zero punpcklbw xmm2, zero punpcklbw xmm5, zero punpcklbw xmm3, zero punpcklbw xmm4, zero pmullw xmm0, k0 pmullw xmm1, k1 pmullw xmm6, k6 pmullw xmm7, k7 pmullw xmm2, k2 pmullw xmm5, k5 pmullw xmm3, k3 pmullw xmm4, k4 paddsw xmm0, xmm1 paddsw xmm0, xmm6 paddsw xmm0, xmm7 paddsw xmm0, xmm2 paddsw xmm0, xmm5 paddsw xmm0, xmm3 paddsw xmm0, xmm4 paddsw xmm0, krd ;rounding psraw xmm0, 7 ;shift packuswb xmm0, xmm0 ;pack back to byte %if %1 movq xmm1, [rdi + %2] pavgb xmm0, xmm1 %endif movq [rdi + %2], xmm0 %endm SECTION .text ;void vpx_filter_block1d4_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d4_v8_sse2) PRIVATE sym(vpx_filter_block1d4_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 6 %define k0k1 [rsp + 16 * 0] %define k2k3 [rsp + 16 * 1] %define k5k4 [rsp + 16 * 2] %define k6k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define zero [rsp + 16 * 5] GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movd xmm0, [rsi] ;load src: row 0 movd xmm1, [rsi + rax] ;1 movd xmm6, [rsi + rdx * 2] ;6 lea rsi, [rsi + rax] movd xmm7, [rsi + rdx * 2] ;7 movd xmm2, [rsi + rax] ;2 movd xmm3, [rsi + rax * 2] ;3 movd xmm4, [rsi + rdx] ;4 movd xmm5, [rsi + rax * 4] ;5 APPLY_FILTER_4 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 6 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_filter_block1d8_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d8_v8_sse2) PRIVATE sym(vpx_filter_block1d8_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 APPLY_FILTER_8 0, 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_filter_block1d16_v8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pitch, ; unsigned char *output_ptr, ; unsigned int out_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d16_v8_sse2) PRIVATE sym(vpx_filter_block1d16_v8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 APPLY_FILTER_8 0, 0 sub rsi, rax LOAD_VERT_8 8 APPLY_FILTER_8 0, 8 add rdi, rbx dec rcx jnz .loop add rsp, 16 * 10 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_v8_avg_sse2) PRIVATE sym(vpx_filter_block1d4_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 6 %define k0k1 [rsp + 16 * 0] %define k2k3 [rsp + 16 * 1] %define k5k4 [rsp + 16 * 2] %define k6k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define zero [rsp + 16 * 5] GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movd xmm0, [rsi] ;load src: row 0 movd xmm1, [rsi + rax] ;1 movd xmm6, [rsi + rdx * 2] ;6 lea rsi, [rsi + rax] movd xmm7, [rsi + rdx * 2] ;7 movd xmm2, [rsi + rax] ;2 movd xmm3, [rsi + rax * 2] ;3 movd xmm4, [rsi + rdx] ;4 movd xmm5, [rsi + rax * 4] ;5 APPLY_FILTER_4 1 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 6 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_v8_avg_sse2) PRIVATE sym(vpx_filter_block1d8_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 APPLY_FILTER_8 1, 0 lea rdi, [rdi + rbx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_v8_avg_sse2) PRIVATE sym(vpx_filter_block1d16_v8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi push rbx ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rbx, DWORD PTR arg(3) ;out_pitch lea rdx, [rax + rax * 2] movsxd rcx, DWORD PTR arg(4) ;output_height .loop: LOAD_VERT_8 0 APPLY_FILTER_8 1, 0 sub rsi, rax LOAD_VERT_8 8 APPLY_FILTER_8 1, 8 add rdi, rbx dec rcx jnz .loop add rsp, 16 * 10 pop rsp pop rbx ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_filter_block1d4_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d4_h8_sse2) PRIVATE sym(vpx_filter_block1d4_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 6 %define k0k1 [rsp + 16 * 0] %define k2k3 [rsp + 16 * 1] %define k5k4 [rsp + 16 * 2] %define k6k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define zero [rsp + 16 * 5] GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm3, xmm0 movdqa xmm5, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm3, 3 psrldq xmm5, 5 psrldq xmm4, 4 APPLY_FILTER_4 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 6 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_filter_block1d8_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d8_h8_sse2) PRIVATE sym(vpx_filter_block1d8_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 0, 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret ;void vpx_filter_block1d16_h8_sse2 ;( ; unsigned char *src_ptr, ; unsigned int src_pixels_per_line, ; unsigned char *output_ptr, ; unsigned int output_pitch, ; unsigned int output_height, ; short *filter ;) global sym(vpx_filter_block1d16_h8_sse2) PRIVATE sym(vpx_filter_block1d16_h8_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 0, 0 movdqu xmm0, [rsi + 5] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 0, 8 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_h8_avg_sse2) PRIVATE sym(vpx_filter_block1d4_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 6 %define k0k1 [rsp + 16 * 0] %define k2k3 [rsp + 16 * 1] %define k5k4 [rsp + 16 * 2] %define k6k7 [rsp + 16 * 3] %define krd [rsp + 16 * 4] %define zero [rsp + 16 * 5] GET_FILTERS_4 mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm3, xmm0 movdqa xmm5, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm3, 3 psrldq xmm5, 5 psrldq xmm4, 4 APPLY_FILTER_4 1 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 6 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_h8_avg_sse2) PRIVATE sym(vpx_filter_block1d8_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 1, 0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_h8_avg_sse2) PRIVATE sym(vpx_filter_block1d16_h8_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog ALIGN_STACK 16, rax sub rsp, 16 * 10 %define k0 [rsp + 16 * 0] %define k1 [rsp + 16 * 1] %define k2 [rsp + 16 * 2] %define k3 [rsp + 16 * 3] %define k4 [rsp + 16 * 4] %define k5 [rsp + 16 * 5] %define k6 [rsp + 16 * 6] %define k7 [rsp + 16 * 7] %define krd [rsp + 16 * 8] %define zero [rsp + 16 * 9] GET_FILTERS movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height .loop: movdqu xmm0, [rsi - 3] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 1, 0 movdqu xmm0, [rsi + 5] ;load src movdqa xmm1, xmm0 movdqa xmm6, xmm0 movdqa xmm7, xmm0 movdqa xmm2, xmm0 movdqa xmm5, xmm0 movdqa xmm3, xmm0 movdqa xmm4, xmm0 psrldq xmm1, 1 psrldq xmm6, 6 psrldq xmm7, 7 psrldq xmm2, 2 psrldq xmm5, 5 psrldq xmm3, 3 psrldq xmm4, 4 APPLY_FILTER_8 1, 8 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx jnz .loop add rsp, 16 * 10 pop rsp ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_8t_ssse3.asm000066400000000000000000000700351357355204000221340ustar00rootroot00000000000000; ; Copyright (c) 2015 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "third_party/x86inc/x86inc.asm" SECTION_RODATA pw_64: times 8 dw 64 ; %define USE_PMULHRSW ; NOTE: pmulhrsw has a latency of 5 cycles. Tests showed a performance loss ; when using this instruction. ; ; The add order below (based on ffvp9) must be followed to prevent outranges. ; x = k0k1 + k4k5 ; y = k2k3 + k6k7 ; z = signed SAT(x + y) SECTION .text %define LOCAL_VARS_SIZE 16*6 %macro SETUP_LOCAL_VARS 0 ; TODO(slavarnway): using xmm registers for these on VPX_ARCH_X86_64 + ; pmaddubsw has a higher latency on some platforms, this might be eased by ; interleaving the instructions. %define k0k1 [rsp + 16*0] %define k2k3 [rsp + 16*1] %define k4k5 [rsp + 16*2] %define k6k7 [rsp + 16*3] packsswb m4, m4 ; TODO(slavarnway): multiple pshufb instructions had a higher latency on ; some platforms. pshuflw m0, m4, 0b ;k0_k1 pshuflw m1, m4, 01010101b ;k2_k3 pshuflw m2, m4, 10101010b ;k4_k5 pshuflw m3, m4, 11111111b ;k6_k7 punpcklqdq m0, m0 punpcklqdq m1, m1 punpcklqdq m2, m2 punpcklqdq m3, m3 mova k0k1, m0 mova k2k3, m1 mova k4k5, m2 mova k6k7, m3 %if VPX_ARCH_X86_64 %define krd m12 %define tmp0 [rsp + 16*4] %define tmp1 [rsp + 16*5] mova krd, [GLOBAL(pw_64)] %else %define krd [rsp + 16*4] %if CONFIG_PIC=0 mova m6, [GLOBAL(pw_64)] %else ; build constants without accessing global memory pcmpeqb m6, m6 ;all ones psrlw m6, 15 psllw m6, 6 ;aka pw_64 %endif mova krd, m6 %endif %endm ;------------------------------------------------------------------------------- %if VPX_ARCH_X86_64 %define LOCAL_VARS_SIZE_H4 0 %else %define LOCAL_VARS_SIZE_H4 16*4 %endif %macro SUBPIX_HFILTER4 1 cglobal filter_block1d4_%1, 6, 6, 11, LOCAL_VARS_SIZE_H4, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] packsswb m4, m4 %if VPX_ARCH_X86_64 %define k0k1k4k5 m8 %define k2k3k6k7 m9 %define krd m10 mova krd, [GLOBAL(pw_64)] pshuflw k0k1k4k5, m4, 0b ;k0_k1 pshufhw k0k1k4k5, k0k1k4k5, 10101010b ;k0_k1_k4_k5 pshuflw k2k3k6k7, m4, 01010101b ;k2_k3 pshufhw k2k3k6k7, k2k3k6k7, 11111111b ;k2_k3_k6_k7 %else %define k0k1k4k5 [rsp + 16*0] %define k2k3k6k7 [rsp + 16*1] %define krd [rsp + 16*2] pshuflw m6, m4, 0b ;k0_k1 pshufhw m6, m6, 10101010b ;k0_k1_k4_k5 pshuflw m7, m4, 01010101b ;k2_k3 pshufhw m7, m7, 11111111b ;k2_k3_k6_k7 %if CONFIG_PIC=0 mova m1, [GLOBAL(pw_64)] %else ; build constants without accessing global memory pcmpeqb m1, m1 ;all ones psrlw m1, 15 psllw m1, 6 ;aka pw_64 %endif mova k0k1k4k5, m6 mova k2k3k6k7, m7 mova krd, m1 %endif dec heightd .loop: ;Do two rows at once movu m4, [srcq - 3] movu m5, [srcq + sstrideq - 3] punpckhbw m1, m4, m4 punpcklbw m4, m4 punpckhbw m3, m5, m5 punpcklbw m5, m5 palignr m0, m1, m4, 1 pmaddubsw m0, k0k1k4k5 palignr m1, m4, 5 pmaddubsw m1, k2k3k6k7 palignr m2, m3, m5, 1 pmaddubsw m2, k0k1k4k5 palignr m3, m5, 5 pmaddubsw m3, k2k3k6k7 punpckhqdq m4, m0, m2 punpcklqdq m0, m2 punpckhqdq m5, m1, m3 punpcklqdq m1, m3 paddsw m0, m4 paddsw m1, m5 %ifidn %1, h8_avg movd m4, [dstq] movd m5, [dstq + dstrideq] %endif paddsw m0, m1 paddsw m0, krd psraw m0, 7 packuswb m0, m0 psrldq m1, m0, 4 %ifidn %1, h8_avg pavgb m0, m4 pavgb m1, m5 %endif movd [dstq], m0 movd [dstq + dstrideq], m1 lea srcq, [srcq + sstrideq ] prefetcht0 [srcq + 4 * sstrideq - 3] lea srcq, [srcq + sstrideq ] lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] sub heightd, 2 jg .loop ; Do last row if output_height is odd jne .done movu m4, [srcq - 3] punpckhbw m1, m4, m4 punpcklbw m4, m4 palignr m0, m1, m4, 1 palignr m1, m4, 5 pmaddubsw m0, k0k1k4k5 pmaddubsw m1, k2k3k6k7 psrldq m2, m0, 8 psrldq m3, m1, 8 paddsw m0, m2 paddsw m1, m3 paddsw m0, m1 paddsw m0, krd psraw m0, 7 packuswb m0, m0 %ifidn %1, h8_avg movd m4, [dstq] pavgb m0, m4 %endif movd [dstq], m0 .done: REP_RET %endm ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER8 1 cglobal filter_block1d8_%1, 6, 6, 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS dec heightd .loop: ;Do two rows at once movu m0, [srcq - 3] movu m4, [srcq + sstrideq - 3] punpckhbw m1, m0, m0 punpcklbw m0, m0 palignr m5, m1, m0, 13 pmaddubsw m5, k6k7 palignr m2, m1, m0, 5 palignr m3, m1, m0, 9 palignr m1, m0, 1 pmaddubsw m1, k0k1 punpckhbw m6, m4, m4 punpcklbw m4, m4 pmaddubsw m2, k2k3 pmaddubsw m3, k4k5 palignr m7, m6, m4, 13 palignr m0, m6, m4, 5 pmaddubsw m7, k6k7 paddsw m1, m3 paddsw m2, m5 paddsw m1, m2 %ifidn %1, h8_avg movh m2, [dstq] movhps m2, [dstq + dstrideq] %endif palignr m5, m6, m4, 9 palignr m6, m4, 1 pmaddubsw m0, k2k3 pmaddubsw m6, k0k1 paddsw m1, krd pmaddubsw m5, k4k5 psraw m1, 7 paddsw m0, m7 paddsw m6, m5 paddsw m6, m0 paddsw m6, krd psraw m6, 7 packuswb m1, m6 %ifidn %1, h8_avg pavgb m1, m2 %endif movh [dstq], m1 movhps [dstq + dstrideq], m1 lea srcq, [srcq + sstrideq ] prefetcht0 [srcq + 4 * sstrideq - 3] lea srcq, [srcq + sstrideq ] lea dstq, [dstq + 2 * dstrideq ] prefetcht0 [srcq + 2 * sstrideq - 3] sub heightd, 2 jg .loop ; Do last row if output_height is odd jne .done movu m0, [srcq - 3] punpckhbw m3, m0, m0 punpcklbw m0, m0 palignr m1, m3, m0, 1 palignr m2, m3, m0, 5 palignr m4, m3, m0, 13 palignr m3, m0, 9 pmaddubsw m1, k0k1 pmaddubsw m2, k2k3 pmaddubsw m3, k4k5 pmaddubsw m4, k6k7 paddsw m1, m3 paddsw m4, m2 paddsw m1, m4 paddsw m1, krd psraw m1, 7 packuswb m1, m1 %ifidn %1, h8_avg movh m0, [dstq] pavgb m1, m0 %endif movh [dstq], m1 .done: REP_RET %endm ;------------------------------------------------------------------------------- %macro SUBPIX_HFILTER16 1 cglobal filter_block1d16_%1, 6, 6, 14, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS .loop: prefetcht0 [srcq + 2 * sstrideq -3] movu m0, [srcq - 3] movu m4, [srcq - 2] pmaddubsw m0, k0k1 pmaddubsw m4, k0k1 movu m1, [srcq - 1] movu m5, [srcq + 0] pmaddubsw m1, k2k3 pmaddubsw m5, k2k3 movu m2, [srcq + 1] movu m6, [srcq + 2] pmaddubsw m2, k4k5 pmaddubsw m6, k4k5 movu m3, [srcq + 3] movu m7, [srcq + 4] pmaddubsw m3, k6k7 pmaddubsw m7, k6k7 paddsw m0, m2 paddsw m1, m3 paddsw m0, m1 paddsw m4, m6 paddsw m5, m7 paddsw m4, m5 paddsw m0, krd paddsw m4, krd psraw m0, 7 psraw m4, 7 packuswb m0, m0 packuswb m4, m4 punpcklbw m0, m4 %ifidn %1, h8_avg pavgb m0, [dstq] %endif lea srcq, [srcq + sstrideq] mova [dstq], m0 lea dstq, [dstq + dstrideq] dec heightd jnz .loop REP_RET %endm INIT_XMM ssse3 SUBPIX_HFILTER16 h8 ; vpx_filter_block1d16_h8_ssse3 SUBPIX_HFILTER16 h8_avg ; vpx_filter_block1d16_h8_avg_ssse3 SUBPIX_HFILTER8 h8 ; vpx_filter_block1d8_h8_ssse3 SUBPIX_HFILTER8 h8_avg ; vpx_filter_block1d8_h8_avg_ssse3 SUBPIX_HFILTER4 h8 ; vpx_filter_block1d4_h8_ssse3 SUBPIX_HFILTER4 h8_avg ; vpx_filter_block1d4_h8_avg_ssse3 ;------------------------------------------------------------------------------- ; TODO(Linfeng): Detect cpu type and choose the code with better performance. %define X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON 1 %if VPX_ARCH_X86_64 && X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON %define NUM_GENERAL_REG_USED 9 %else %define NUM_GENERAL_REG_USED 6 %endif %macro SUBPIX_VFILTER 2 cglobal filter_block1d%2_%1, 6, NUM_GENERAL_REG_USED, 15, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS %ifidn %2, 8 %define movx movh %else %define movx movd %endif dec heightd %if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON %if VPX_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq %else %define src1q filterq %define sstride6q dstrideq %define dst_stride dstridemp %endif mov src1q, srcq add src1q, sstrideq lea sstride6q, [sstrideq + sstrideq * 4] add sstride6q, sstrideq ;pitch * 6 .loop: ;Do two rows at once movx m0, [srcq ] ;A movx m1, [src1q ] ;B punpcklbw m0, m1 ;A B movx m2, [srcq + sstrideq * 2 ] ;C pmaddubsw m0, k0k1 mova m6, m2 movx m3, [src1q + sstrideq * 2] ;D punpcklbw m2, m3 ;C D pmaddubsw m2, k2k3 movx m4, [srcq + sstrideq * 4 ] ;E mova m7, m4 movx m5, [src1q + sstrideq * 4] ;F punpcklbw m4, m5 ;E F pmaddubsw m4, k4k5 punpcklbw m1, m6 ;A B next iter movx m6, [srcq + sstride6q ] ;G punpcklbw m5, m6 ;E F next iter punpcklbw m3, m7 ;C D next iter pmaddubsw m5, k4k5 movx m7, [src1q + sstride6q ] ;H punpcklbw m6, m7 ;G H pmaddubsw m6, k6k7 pmaddubsw m3, k2k3 pmaddubsw m1, k0k1 paddsw m0, m4 paddsw m2, m6 movx m6, [srcq + sstrideq * 8 ] ;H next iter punpcklbw m7, m6 pmaddubsw m7, k6k7 paddsw m0, m2 paddsw m0, krd psraw m0, 7 paddsw m1, m5 packuswb m0, m0 paddsw m3, m7 paddsw m1, m3 paddsw m1, krd psraw m1, 7 lea srcq, [srcq + sstrideq * 2 ] lea src1q, [src1q + sstrideq * 2] packuswb m1, m1 %ifidn %1, v8_avg movx m2, [dstq] pavgb m0, m2 %endif movx [dstq], m0 add dstq, dst_stride %ifidn %1, v8_avg movx m3, [dstq] pavgb m1, m3 %endif movx [dstq], m1 add dstq, dst_stride sub heightd, 2 jg .loop ; Do last row if output_height is odd jne .done movx m0, [srcq ] ;A movx m1, [srcq + sstrideq ] ;B movx m6, [srcq + sstride6q ] ;G punpcklbw m0, m1 ;A B movx m7, [src1q + sstride6q ] ;H pmaddubsw m0, k0k1 movx m2, [srcq + sstrideq * 2 ] ;C punpcklbw m6, m7 ;G H movx m3, [src1q + sstrideq * 2] ;D pmaddubsw m6, k6k7 movx m4, [srcq + sstrideq * 4 ] ;E punpcklbw m2, m3 ;C D movx m5, [src1q + sstrideq * 4] ;F punpcklbw m4, m5 ;E F pmaddubsw m2, k2k3 pmaddubsw m4, k4k5 paddsw m2, m6 paddsw m0, m4 paddsw m0, m2 paddsw m0, krd psraw m0, 7 packuswb m0, m0 %ifidn %1, v8_avg movx m1, [dstq] pavgb m0, m1 %endif movx [dstq], m0 %else ; VPX_ARCH_X86_64 movx m0, [srcq ] ;A movx m1, [srcq + sstrideq ] ;B lea srcq, [srcq + sstrideq * 2 ] movx m2, [srcq] ;C movx m3, [srcq + sstrideq] ;D lea srcq, [srcq + sstrideq * 2 ] movx m4, [srcq] ;E movx m5, [srcq + sstrideq] ;F lea srcq, [srcq + sstrideq * 2 ] movx m6, [srcq] ;G punpcklbw m0, m1 ;A B punpcklbw m1, m2 ;A B next iter punpcklbw m2, m3 ;C D punpcklbw m3, m4 ;C D next iter punpcklbw m4, m5 ;E F punpcklbw m5, m6 ;E F next iter .loop: ;Do two rows at once movx m7, [srcq + sstrideq] ;H lea srcq, [srcq + sstrideq * 2 ] movx m14, [srcq] ;H next iter punpcklbw m6, m7 ;G H punpcklbw m7, m14 ;G H next iter pmaddubsw m8, m0, k0k1 pmaddubsw m9, m1, k0k1 mova m0, m2 mova m1, m3 pmaddubsw m10, m2, k2k3 pmaddubsw m11, m3, k2k3 mova m2, m4 mova m3, m5 pmaddubsw m4, k4k5 pmaddubsw m5, k4k5 paddsw m8, m4 paddsw m9, m5 mova m4, m6 mova m5, m7 pmaddubsw m6, k6k7 pmaddubsw m7, k6k7 paddsw m10, m6 paddsw m11, m7 paddsw m8, m10 paddsw m9, m11 mova m6, m14 paddsw m8, krd paddsw m9, krd psraw m8, 7 psraw m9, 7 %ifidn %2, 4 packuswb m8, m8 packuswb m9, m9 %else packuswb m8, m9 %endif %ifidn %1, v8_avg movx m7, [dstq] %ifidn %2, 4 movx m10, [dstq + dstrideq] pavgb m9, m10 %else movhpd m7, [dstq + dstrideq] %endif pavgb m8, m7 %endif movx [dstq], m8 %ifidn %2, 4 movx [dstq + dstrideq], m9 %else movhpd [dstq + dstrideq], m8 %endif lea dstq, [dstq + dstrideq * 2 ] sub heightd, 2 jg .loop ; Do last row if output_height is odd jne .done movx m7, [srcq + sstrideq] ;H punpcklbw m6, m7 ;G H pmaddubsw m0, k0k1 pmaddubsw m2, k2k3 pmaddubsw m4, k4k5 pmaddubsw m6, k6k7 paddsw m0, m4 paddsw m2, m6 paddsw m0, m2 paddsw m0, krd psraw m0, 7 packuswb m0, m0 %ifidn %1, v8_avg movx m1, [dstq] pavgb m0, m1 %endif movx [dstq], m0 %endif ; VPX_ARCH_X86_64 .done: REP_RET %endm ;------------------------------------------------------------------------------- %macro SUBPIX_VFILTER16 1 cglobal filter_block1d16_%1, 6, NUM_GENERAL_REG_USED, 16, LOCAL_VARS_SIZE, \ src, sstride, dst, dstride, height, filter mova m4, [filterq] SETUP_LOCAL_VARS %if VPX_ARCH_X86 || X86_SUBPIX_VFILTER_PREFER_SLOW_CELERON %if VPX_ARCH_X86_64 %define src1q r7 %define sstride6q r8 %define dst_stride dstrideq %else %define src1q filterq %define sstride6q dstrideq %define dst_stride dstridemp %endif lea src1q, [srcq + sstrideq] lea sstride6q, [sstrideq + sstrideq * 4] add sstride6q, sstrideq ;pitch * 6 .loop: movh m0, [srcq ] ;A movh m1, [src1q ] ;B movh m2, [srcq + sstrideq * 2 ] ;C movh m3, [src1q + sstrideq * 2] ;D movh m4, [srcq + sstrideq * 4 ] ;E movh m5, [src1q + sstrideq * 4] ;F punpcklbw m0, m1 ;A B movh m6, [srcq + sstride6q] ;G punpcklbw m2, m3 ;C D movh m7, [src1q + sstride6q] ;H punpcklbw m4, m5 ;E F pmaddubsw m0, k0k1 movh m3, [srcq + 8] ;A pmaddubsw m2, k2k3 punpcklbw m6, m7 ;G H movh m5, [srcq + sstrideq + 8] ;B pmaddubsw m4, k4k5 punpcklbw m3, m5 ;A B movh m7, [srcq + sstrideq * 2 + 8] ;C pmaddubsw m6, k6k7 movh m5, [src1q + sstrideq * 2 + 8] ;D punpcklbw m7, m5 ;C D paddsw m2, m6 pmaddubsw m3, k0k1 movh m1, [srcq + sstrideq * 4 + 8] ;E paddsw m0, m4 pmaddubsw m7, k2k3 movh m6, [src1q + sstrideq * 4 + 8] ;F punpcklbw m1, m6 ;E F paddsw m0, m2 paddsw m0, krd movh m2, [srcq + sstride6q + 8] ;G pmaddubsw m1, k4k5 movh m5, [src1q + sstride6q + 8] ;H psraw m0, 7 punpcklbw m2, m5 ;G H pmaddubsw m2, k6k7 paddsw m7, m2 paddsw m3, m1 paddsw m3, m7 paddsw m3, krd psraw m3, 7 packuswb m0, m3 add srcq, sstrideq add src1q, sstrideq %ifidn %1, v8_avg pavgb m0, [dstq] %endif mova [dstq], m0 add dstq, dst_stride dec heightd jnz .loop REP_RET %else ; VPX_ARCH_X86_64 dec heightd movu m1, [srcq ] ;A movu m3, [srcq + sstrideq ] ;B lea srcq, [srcq + sstrideq * 2] punpcklbw m0, m1, m3 ;A B punpckhbw m1, m3 ;A B movu m5, [srcq] ;C punpcklbw m2, m3, m5 ;A B next iter punpckhbw m3, m5 ;A B next iter mova tmp0, m2 ;store to stack mova tmp1, m3 ;store to stack movu m7, [srcq + sstrideq] ;D lea srcq, [srcq + sstrideq * 2] punpcklbw m4, m5, m7 ;C D punpckhbw m5, m7 ;C D movu m9, [srcq] ;E punpcklbw m6, m7, m9 ;C D next iter punpckhbw m7, m9 ;C D next iter movu m11, [srcq + sstrideq] ;F lea srcq, [srcq + sstrideq * 2] punpcklbw m8, m9, m11 ;E F punpckhbw m9, m11 ;E F movu m2, [srcq] ;G punpcklbw m10, m11, m2 ;E F next iter punpckhbw m11, m2 ;E F next iter .loop: ;Do two rows at once pmaddubsw m13, m0, k0k1 mova m0, m4 pmaddubsw m14, m8, k4k5 pmaddubsw m15, m4, k2k3 mova m4, m8 paddsw m13, m14 movu m3, [srcq + sstrideq] ;H lea srcq, [srcq + sstrideq * 2] punpcklbw m14, m2, m3 ;G H mova m8, m14 pmaddubsw m14, k6k7 paddsw m15, m14 paddsw m13, m15 paddsw m13, krd psraw m13, 7 pmaddubsw m14, m1, k0k1 pmaddubsw m1, m9, k4k5 pmaddubsw m15, m5, k2k3 paddsw m14, m1 mova m1, m5 mova m5, m9 punpckhbw m2, m3 ;G H mova m9, m2 pmaddubsw m2, k6k7 paddsw m15, m2 paddsw m14, m15 paddsw m14, krd psraw m14, 7 packuswb m13, m14 %ifidn %1, v8_avg pavgb m13, [dstq] %endif mova [dstq], m13 ; next iter pmaddubsw m15, tmp0, k0k1 pmaddubsw m14, m10, k4k5 pmaddubsw m13, m6, k2k3 paddsw m15, m14 mova tmp0, m6 mova m6, m10 movu m2, [srcq] ;G next iter punpcklbw m14, m3, m2 ;G H next iter mova m10, m14 pmaddubsw m14, k6k7 paddsw m13, m14 paddsw m15, m13 paddsw m15, krd psraw m15, 7 pmaddubsw m14, tmp1, k0k1 mova tmp1, m7 pmaddubsw m13, m7, k2k3 mova m7, m11 pmaddubsw m11, k4k5 paddsw m14, m11 punpckhbw m3, m2 ;G H next iter mova m11, m3 pmaddubsw m3, k6k7 paddsw m13, m3 paddsw m14, m13 paddsw m14, krd psraw m14, 7 packuswb m15, m14 %ifidn %1, v8_avg pavgb m15, [dstq + dstrideq] %endif mova [dstq + dstrideq], m15 lea dstq, [dstq + dstrideq * 2] sub heightd, 2 jg .loop ; Do last row if output_height is odd jne .done movu m3, [srcq + sstrideq] ;H punpcklbw m6, m2, m3 ;G H punpckhbw m2, m3 ;G H pmaddubsw m0, k0k1 pmaddubsw m1, k0k1 pmaddubsw m4, k2k3 pmaddubsw m5, k2k3 pmaddubsw m8, k4k5 pmaddubsw m9, k4k5 pmaddubsw m6, k6k7 pmaddubsw m2, k6k7 paddsw m0, m8 paddsw m1, m9 paddsw m4, m6 paddsw m5, m2 paddsw m0, m4 paddsw m1, m5 paddsw m0, krd paddsw m1, krd psraw m0, 7 psraw m1, 7 packuswb m0, m1 %ifidn %1, v8_avg pavgb m0, [dstq] %endif mova [dstq], m0 .done: REP_RET %endif ; VPX_ARCH_X86_64 %endm INIT_XMM ssse3 SUBPIX_VFILTER16 v8 ; vpx_filter_block1d16_v8_ssse3 SUBPIX_VFILTER16 v8_avg ; vpx_filter_block1d16_v8_avg_ssse3 SUBPIX_VFILTER v8, 8 ; vpx_filter_block1d8_v8_ssse3 SUBPIX_VFILTER v8_avg, 8 ; vpx_filter_block1d8_v8_avg_ssse3 SUBPIX_VFILTER v8, 4 ; vpx_filter_block1d4_v8_ssse3 SUBPIX_VFILTER v8_avg, 4 ; vpx_filter_block1d4_v8_avg_ssse3 libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_bilinear_sse2.asm000066400000000000000000000236761357355204000232130ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro GET_PARAM_4 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x0400040 movdqa xmm3, [rdx] ;load filters pshuflw xmm4, xmm3, 11111111b ;k3 psrldq xmm3, 8 pshuflw xmm3, xmm3, 0b ;k4 punpcklqdq xmm4, xmm3 ;k3k4 movq xmm3, rcx ;rounding pshufd xmm3, xmm3, 0 pxor xmm2, xmm2 movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro APPLY_FILTER_4 1 punpckldq xmm0, xmm1 ;two row in one register punpcklbw xmm0, xmm2 ;unpack to word pmullw xmm0, xmm4 ;multiply the filter factors movdqa xmm1, xmm0 psrldq xmm1, 8 paddsw xmm0, xmm1 paddsw xmm0, xmm3 ;rounding psraw xmm0, 7 ;shift packuswb xmm0, xmm0 ;pack to byte %if %1 movd xmm1, [rdi] pavgb xmm0, xmm1 %endif movd [rdi], xmm0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm %macro GET_PARAM 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov rcx, 0x0400040 movdqa xmm7, [rdx] ;load filters pshuflw xmm6, xmm7, 11111111b ;k3 pshufhw xmm7, xmm7, 0b ;k4 punpcklwd xmm6, xmm6 punpckhwd xmm7, xmm7 movq xmm4, rcx ;rounding pshufd xmm4, xmm4, 0 pxor xmm5, xmm5 movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro APPLY_FILTER_8 1 punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 pmullw xmm0, xmm6 pmullw xmm1, xmm7 paddsw xmm0, xmm1 paddsw xmm0, xmm4 ;rounding psraw xmm0, 7 ;shift packuswb xmm0, xmm0 ;pack back to byte %if %1 movq xmm1, [rdi] pavgb xmm0, xmm1 %endif movq [rdi], xmm0 ;store the result lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm %macro APPLY_FILTER_16 1 punpcklbw xmm0, xmm5 punpcklbw xmm1, xmm5 punpckhbw xmm2, xmm5 punpckhbw xmm3, xmm5 pmullw xmm0, xmm6 pmullw xmm1, xmm7 pmullw xmm2, xmm6 pmullw xmm3, xmm7 paddsw xmm0, xmm1 paddsw xmm2, xmm3 paddsw xmm0, xmm4 ;rounding paddsw xmm2, xmm4 psraw xmm0, 7 ;shift psraw xmm2, 7 packuswb xmm0, xmm2 ;pack back to byte %if %1 movdqu xmm1, [rdi] pavgb xmm0, xmm1 %endif movdqu [rdi], xmm0 ;store the result lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm SECTION .text global sym(vpx_filter_block1d4_v2_sse2) PRIVATE sym(vpx_filter_block1d4_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movd xmm0, [rsi] ;load src movd xmm1, [rsi + rax] APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_v2_sse2) PRIVATE sym(vpx_filter_block1d8_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movq xmm0, [rsi] ;0 movq xmm1, [rsi + rax] ;1 APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_v2_sse2) PRIVATE sym(vpx_filter_block1d16_v2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + rax] ;1 movdqa xmm2, xmm0 movdqa xmm3, xmm1 APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_v2_avg_sse2) PRIVATE sym(vpx_filter_block1d4_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movd xmm0, [rsi] ;load src movd xmm1, [rsi + rax] APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_v2_avg_sse2) PRIVATE sym(vpx_filter_block1d8_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movq xmm0, [rsi] ;0 movq xmm1, [rsi + rax] ;1 APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_v2_avg_sse2) PRIVATE sym(vpx_filter_block1d16_v2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + rax] ;1 movdqa xmm2, xmm0 movdqa xmm3, xmm1 APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_h2_sse2) PRIVATE sym(vpx_filter_block1d4_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_h2_sse2) PRIVATE sym(vpx_filter_block1d8_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_h2_sse2) PRIVATE sym(vpx_filter_block1d16_h2_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 1] movdqa xmm2, xmm0 movdqa xmm3, xmm1 APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_h2_avg_sse2) PRIVATE sym(vpx_filter_block1d4_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_h2_avg_sse2) PRIVATE sym(vpx_filter_block1d8_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_h2_avg_sse2) PRIVATE sym(vpx_filter_block1d16_h2_avg_sse2): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 1] movdqa xmm2, xmm0 movdqa xmm3, xmm1 APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_dsp/x86/vpx_subpixel_bilinear_ssse3.asm000066400000000000000000000222111357355204000233570ustar00rootroot00000000000000; ; Copyright (c) 2014 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" %macro GET_PARAM_4 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov ecx, 0x01000100 movdqa xmm3, [rdx] ;load filters psrldq xmm3, 6 packsswb xmm3, xmm3 pshuflw xmm3, xmm3, 0b ;k3_k4 movd xmm2, ecx ;rounding_shift pshufd xmm2, xmm2, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro APPLY_FILTER_4 1 punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm3 pmulhrsw xmm0, xmm2 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack to byte %if %1 movd xmm1, [rdi] pavgb xmm0, xmm1 %endif movd [rdi], xmm0 lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm %macro GET_PARAM 0 mov rdx, arg(5) ;filter ptr mov rsi, arg(0) ;src_ptr mov rdi, arg(2) ;output_ptr mov ecx, 0x01000100 movdqa xmm7, [rdx] ;load filters psrldq xmm7, 6 packsswb xmm7, xmm7 pshuflw xmm7, xmm7, 0b ;k3_k4 punpcklwd xmm7, xmm7 movd xmm6, ecx ;rounding_shift pshufd xmm6, xmm6, 0 movsxd rax, DWORD PTR arg(1) ;pixels_per_line movsxd rdx, DWORD PTR arg(3) ;out_pitch movsxd rcx, DWORD PTR arg(4) ;output_height %endm %macro APPLY_FILTER_8 1 punpcklbw xmm0, xmm1 pmaddubsw xmm0, xmm7 pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) packuswb xmm0, xmm0 ;pack back to byte %if %1 movq xmm1, [rdi] pavgb xmm0, xmm1 %endif movq [rdi], xmm0 ;store the result lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm %macro APPLY_FILTER_16 1 punpcklbw xmm0, xmm1 punpckhbw xmm2, xmm1 pmaddubsw xmm0, xmm7 pmaddubsw xmm2, xmm7 pmulhrsw xmm0, xmm6 ;rounding(+64)+shift(>>7) pmulhrsw xmm2, xmm6 packuswb xmm0, xmm2 ;pack back to byte %if %1 movdqu xmm1, [rdi] pavgb xmm0, xmm1 %endif movdqu [rdi], xmm0 ;store the result lea rsi, [rsi + rax] lea rdi, [rdi + rdx] dec rcx %endm SECTION .text global sym(vpx_filter_block1d4_v2_ssse3) PRIVATE sym(vpx_filter_block1d4_v2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movd xmm0, [rsi] ;load src movd xmm1, [rsi + rax] APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_v2_ssse3) PRIVATE sym(vpx_filter_block1d8_v2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movq xmm0, [rsi] ;0 movq xmm1, [rsi + rax] ;1 APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_v2_ssse3) PRIVATE sym(vpx_filter_block1d16_v2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + rax] ;1 movdqa xmm2, xmm0 APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_v2_avg_ssse3) PRIVATE sym(vpx_filter_block1d4_v2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movd xmm0, [rsi] ;load src movd xmm1, [rsi + rax] APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_v2_avg_ssse3) PRIVATE sym(vpx_filter_block1d8_v2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movq xmm0, [rsi] ;0 movq xmm1, [rsi + rax] ;1 APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_v2_avg_ssse3) PRIVATE sym(vpx_filter_block1d16_v2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;0 movdqu xmm1, [rsi + rax] ;1 movdqa xmm2, xmm0 APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_h2_ssse3) PRIVATE sym(vpx_filter_block1d4_h2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_4 0 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_h2_ssse3) PRIVATE sym(vpx_filter_block1d8_h2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_8 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_h2_ssse3) PRIVATE sym(vpx_filter_block1d16_h2_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 1] movdqa xmm2, xmm0 APPLY_FILTER_16 0 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d4_h2_avg_ssse3) PRIVATE sym(vpx_filter_block1d4_h2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 push rsi push rdi ; end prolog GET_PARAM_4 .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_4 1 jnz .loop ; begin epilog pop rdi pop rsi UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d8_h2_avg_ssse3) PRIVATE sym(vpx_filter_block1d8_h2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqa xmm1, xmm0 psrldq xmm1, 1 APPLY_FILTER_8 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret global sym(vpx_filter_block1d16_h2_avg_ssse3) PRIVATE sym(vpx_filter_block1d16_h2_avg_ssse3): push rbp mov rbp, rsp SHADOW_ARGS_TO_STACK 6 SAVE_XMM 7 push rsi push rdi ; end prolog GET_PARAM .loop: movdqu xmm0, [rsi] ;load src movdqu xmm1, [rsi + 1] movdqa xmm2, xmm0 APPLY_FILTER_16 1 jnz .loop ; begin epilog pop rdi pop rsi RESTORE_XMM UNSHADOW_ARGS pop rbp ret libvpx-1.8.2/vpx_mem/000077500000000000000000000000001357355204000145055ustar00rootroot00000000000000libvpx-1.8.2/vpx_mem/include/000077500000000000000000000000001357355204000161305ustar00rootroot00000000000000libvpx-1.8.2/vpx_mem/include/vpx_mem_intrnl.h000066400000000000000000000020201357355204000213340ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ #define VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ #include "./vpx_config.h" #define ADDRESS_STORAGE_SIZE sizeof(size_t) #ifndef DEFAULT_ALIGNMENT #if defined(VXWORKS) /*default addr alignment to use in calls to vpx_* functions other than * vpx_memalign*/ #define DEFAULT_ALIGNMENT 32 #else #define DEFAULT_ALIGNMENT (2 * sizeof(void *)) /* NOLINT */ #endif #endif /*returns an addr aligned to the byte boundary specified by align*/ #define align_addr(addr, align) \ (void *)(((size_t)(addr) + ((align)-1)) & ~(size_t)((align)-1)) #endif // VPX_VPX_MEM_INCLUDE_VPX_MEM_INTRNL_H_ libvpx-1.8.2/vpx_mem/vpx_mem.c000066400000000000000000000050621357355204000163270ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_mem.h" #include #include #include #include #include "include/vpx_mem_intrnl.h" #include "vpx/vpx_integer.h" #if !defined(VPX_MAX_ALLOCABLE_MEMORY) #if SIZE_MAX > (1ULL << 40) #define VPX_MAX_ALLOCABLE_MEMORY (1ULL << 40) #else // For 32-bit targets keep this below INT_MAX to avoid valgrind warnings. #define VPX_MAX_ALLOCABLE_MEMORY ((1ULL << 31) - (1 << 16)) #endif #endif // Returns 0 in case of overflow of nmemb * size. static int check_size_argument_overflow(uint64_t nmemb, uint64_t size) { const uint64_t total_size = nmemb * size; if (nmemb == 0) return 1; if (size > VPX_MAX_ALLOCABLE_MEMORY / nmemb) return 0; if (total_size != (size_t)total_size) return 0; return 1; } static size_t *get_malloc_address_location(void *const mem) { return ((size_t *)mem) - 1; } static uint64_t get_aligned_malloc_size(size_t size, size_t align) { return (uint64_t)size + align - 1 + ADDRESS_STORAGE_SIZE; } static void set_actual_malloc_address(void *const mem, const void *const malloc_addr) { size_t *const malloc_addr_location = get_malloc_address_location(mem); *malloc_addr_location = (size_t)malloc_addr; } static void *get_actual_malloc_address(void *const mem) { size_t *const malloc_addr_location = get_malloc_address_location(mem); return (void *)(*malloc_addr_location); } void *vpx_memalign(size_t align, size_t size) { void *x = NULL, *addr; const uint64_t aligned_size = get_aligned_malloc_size(size, align); if (!check_size_argument_overflow(1, aligned_size)) return NULL; addr = malloc((size_t)aligned_size); if (addr) { x = align_addr((unsigned char *)addr + ADDRESS_STORAGE_SIZE, align); set_actual_malloc_address(x, addr); } return x; } void *vpx_malloc(size_t size) { return vpx_memalign(DEFAULT_ALIGNMENT, size); } void *vpx_calloc(size_t num, size_t size) { void *x; if (!check_size_argument_overflow(num, size)) return NULL; x = vpx_malloc(num * size); if (x) memset(x, 0, num * size); return x; } void vpx_free(void *memblk) { if (memblk) { void *addr = get_actual_malloc_address(memblk); free(addr); } } libvpx-1.8.2/vpx_mem/vpx_mem.h000066400000000000000000000022411357355204000163300ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_MEM_VPX_MEM_H_ #define VPX_VPX_MEM_VPX_MEM_H_ #include "vpx_config.h" #if defined(__uClinux__) #include #endif #include #include #include "vpx/vpx_integer.h" #if defined(__cplusplus) extern "C" { #endif void *vpx_memalign(size_t align, size_t size); void *vpx_malloc(size_t size); void *vpx_calloc(size_t num, size_t size); void vpx_free(void *memblk); #if CONFIG_VP9_HIGHBITDEPTH static INLINE void *vpx_memset16(void *dest, int val, size_t length) { size_t i; uint16_t *dest16 = (uint16_t *)dest; for (i = 0; i < length; i++) *dest16++ = val; return dest; } #endif #include #ifdef VPX_MEM_PLTFRM #include VPX_MEM_PLTFRM #endif #if defined(__cplusplus) } #endif #endif // VPX_VPX_MEM_VPX_MEM_H_ libvpx-1.8.2/vpx_mem/vpx_mem.mk000066400000000000000000000001701357355204000165070ustar00rootroot00000000000000MEM_SRCS-yes += vpx_mem.mk MEM_SRCS-yes += vpx_mem.c MEM_SRCS-yes += vpx_mem.h MEM_SRCS-yes += include/vpx_mem_intrnl.h libvpx-1.8.2/vpx_ports/000077500000000000000000000000001357355204000150765ustar00rootroot00000000000000libvpx-1.8.2/vpx_ports/arm.h000066400000000000000000000020201357355204000160200ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_ARM_H_ #define VPX_VPX_PORTS_ARM_H_ #include #include "vpx_config.h" #ifdef __cplusplus extern "C" { #endif /*ARMv5TE "Enhanced DSP" instructions.*/ #define HAS_EDSP 0x01 /*ARMv6 "Parallel" or "Media" instructions.*/ #define HAS_MEDIA 0x02 /*ARMv7 optional NEON instructions.*/ #define HAS_NEON 0x04 int arm_cpu_caps(void); // Earlier gcc compilers have issues with some neon intrinsics #if !defined(__clang__) && defined(__GNUC__) && __GNUC__ == 4 && \ __GNUC_MINOR__ <= 6 #define VPX_INCOMPATIBLE_GCC #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_PORTS_ARM_H_ libvpx-1.8.2/vpx_ports/arm_cpudetect.c000066400000000000000000000075351357355204000200730ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "./vpx_config.h" #include "vpx_ports/arm.h" #ifdef WINAPI_FAMILY #include #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) #define getenv(x) NULL #endif #endif static int arm_cpu_env_flags(int *flags) { char *env; env = getenv("VPX_SIMD_CAPS"); if (env && *env) { *flags = (int)strtol(env, NULL, 0); return 0; } *flags = 0; return -1; } static int arm_cpu_env_mask(void) { char *env; env = getenv("VPX_SIMD_CAPS_MASK"); return env && *env ? (int)strtol(env, NULL, 0) : ~0; } #if !CONFIG_RUNTIME_CPU_DETECT int arm_cpu_caps(void) { /* This function should actually be a no-op. There is no way to adjust any of * these because the RTCD tables do not exist: the functions are called * statically */ int flags; int mask; if (!arm_cpu_env_flags(&flags)) { return flags; } mask = arm_cpu_env_mask(); #if HAVE_NEON || HAVE_NEON_ASM flags |= HAS_NEON; #endif /* HAVE_NEON || HAVE_NEON_ASM */ return flags & mask; } #elif defined(_MSC_VER) /* end !CONFIG_RUNTIME_CPU_DETECT */ /*For GetExceptionCode() and EXCEPTION_ILLEGAL_INSTRUCTION.*/ #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #ifndef WIN32_EXTRA_LEAN #define WIN32_EXTRA_LEAN #endif #include int arm_cpu_caps(void) { int flags; int mask; if (!arm_cpu_env_flags(&flags)) { return flags; } mask = arm_cpu_env_mask(); /* MSVC has no inline __asm support for ARM, but it does let you __emit * instructions via their assembled hex code. * All of these instructions should be essentially nops. */ #if HAVE_NEON || HAVE_NEON_ASM if (mask & HAS_NEON) { __try { /*VORR q0,q0,q0*/ __emit(0xF2200150); flags |= HAS_NEON; } __except (GetExceptionCode() == EXCEPTION_ILLEGAL_INSTRUCTION) { /*Ignore exception.*/ } } #endif /* HAVE_NEON || HAVE_NEON_ASM */ return flags & mask; } #elif defined(__ANDROID__) /* end _MSC_VER */ #include int arm_cpu_caps(void) { int flags; int mask; uint64_t features; if (!arm_cpu_env_flags(&flags)) { return flags; } mask = arm_cpu_env_mask(); features = android_getCpuFeatures(); #if HAVE_NEON || HAVE_NEON_ASM if (features & ANDROID_CPU_ARM_FEATURE_NEON) flags |= HAS_NEON; #endif /* HAVE_NEON || HAVE_NEON_ASM */ return flags & mask; } #elif defined(__linux__) /* end __ANDROID__ */ #include int arm_cpu_caps(void) { FILE *fin; int flags; int mask; if (!arm_cpu_env_flags(&flags)) { return flags; } mask = arm_cpu_env_mask(); /* Reading /proc/self/auxv would be easier, but that doesn't work reliably * on Android. * This also means that detection will fail in Scratchbox. */ fin = fopen("/proc/cpuinfo", "r"); if (fin != NULL) { /* 512 should be enough for anybody (it's even enough for all the flags * that x86 has accumulated... so far). */ char buf[512]; while (fgets(buf, 511, fin) != NULL) { #if HAVE_NEON || HAVE_NEON_ASM if (memcmp(buf, "Features", 8) == 0) { char *p; p = strstr(buf, " neon"); if (p != NULL && (p[5] == ' ' || p[5] == '\n')) { flags |= HAS_NEON; } } #endif /* HAVE_NEON || HAVE_NEON_ASM */ } fclose(fin); } return flags & mask; } #else /* end __linux__ */ #error \ "--enable-runtime-cpu-detect selected, but no CPU detection method " \ "available for your platform. Reconfigure with --disable-runtime-cpu-detect." #endif libvpx-1.8.2/vpx_ports/asmdefs_mmi.h000066400000000000000000000052201357355204000175320ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_ASMDEFS_MMI_H_ #define VPX_VPX_PORTS_ASMDEFS_MMI_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #if HAVE_MMI #if HAVE_MIPS64 #define mips_reg int64_t #define MMI_ADDU(reg1, reg2, reg3) \ "daddu " #reg1 ", " #reg2 ", " #reg3 " \n\t" #define MMI_ADDIU(reg1, reg2, immediate) \ "daddiu " #reg1 ", " #reg2 ", " #immediate " \n\t" #define MMI_ADDI(reg1, reg2, immediate) \ "daddi " #reg1 ", " #reg2 ", " #immediate " \n\t" #define MMI_SUBU(reg1, reg2, reg3) \ "dsubu " #reg1 ", " #reg2 ", " #reg3 " \n\t" #define MMI_L(reg, addr, bias) \ "ld " #reg ", " #bias "(" #addr ") \n\t" #define MMI_SRL(reg1, reg2, shift) \ "dsrl " #reg1 ", " #reg2 ", " #shift " \n\t" #define MMI_SLL(reg1, reg2, shift) \ "dsll " #reg1 ", " #reg2 ", " #shift " \n\t" #define MMI_MTC1(reg, fp) \ "dmtc1 " #reg ", " #fp " \n\t" #define MMI_LI(reg, immediate) \ "dli " #reg ", " #immediate " \n\t" #else #define mips_reg int32_t #define MMI_ADDU(reg1, reg2, reg3) \ "addu " #reg1 ", " #reg2 ", " #reg3 " \n\t" #define MMI_ADDIU(reg1, reg2, immediate) \ "addiu " #reg1 ", " #reg2 ", " #immediate " \n\t" #define MMI_ADDI(reg1, reg2, immediate) \ "addi " #reg1 ", " #reg2 ", " #immediate " \n\t" #define MMI_SUBU(reg1, reg2, reg3) \ "subu " #reg1 ", " #reg2 ", " #reg3 " \n\t" #define MMI_L(reg, addr, bias) \ "lw " #reg ", " #bias "(" #addr ") \n\t" #define MMI_SRL(reg1, reg2, shift) \ "srl " #reg1 ", " #reg2 ", " #shift " \n\t" #define MMI_SLL(reg1, reg2, shift) \ "sll " #reg1 ", " #reg2 ", " #shift " \n\t" #define MMI_MTC1(reg, fp) \ "mtc1 " #reg ", " #fp " \n\t" #define MMI_LI(reg, immediate) \ "li " #reg ", " #immediate " \n\t" #endif /* HAVE_MIPS64 */ #endif /* HAVE_MMI */ #endif // VPX_VPX_PORTS_ASMDEFS_MMI_H_ libvpx-1.8.2/vpx_ports/bitops.h000066400000000000000000000033701357355204000165520ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_BITOPS_H_ #define VPX_VPX_PORTS_BITOPS_H_ #include #include "vpx_ports/msvc.h" #ifdef _MSC_VER #if defined(_M_X64) || defined(_M_IX86) #include #define USE_MSC_INTRINSICS #endif #endif #ifdef __cplusplus extern "C" { #endif // These versions of get_msb() are only valid when n != 0 because all // of the optimized versions are undefined when n == 0: // https://gcc.gnu.org/onlinedocs/gcc/Other-Builtins.html // use GNU builtins where available. #if defined(__GNUC__) && \ ((__GNUC__ == 3 && __GNUC_MINOR__ >= 4) || __GNUC__ >= 4) static INLINE int get_msb(unsigned int n) { assert(n != 0); return 31 ^ __builtin_clz(n); } #elif defined(USE_MSC_INTRINSICS) #pragma intrinsic(_BitScanReverse) static INLINE int get_msb(unsigned int n) { unsigned long first_set_bit; assert(n != 0); _BitScanReverse(&first_set_bit, n); return first_set_bit; } #undef USE_MSC_INTRINSICS #else // Returns (int)floor(log2(n)). n must be > 0. static INLINE int get_msb(unsigned int n) { int log = 0; unsigned int value = n; int i; assert(n != 0); for (i = 4; i >= 0; --i) { const int shift = (1 << i); const unsigned int x = value >> shift; if (x != 0) { value = x; log += shift; } } return log; } #endif #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_PORTS_BITOPS_H_ libvpx-1.8.2/vpx_ports/emmintrin_compat.h000066400000000000000000000032221357355204000206130ustar00rootroot00000000000000/* * Copyright (c) 2012 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ #define VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ #if defined(__GNUC__) && __GNUC__ < 4 /* From emmintrin.h (gcc 4.5.3) */ /* Casts between various SP, DP, INT vector types. Note that these do no conversion of values, they just change the type. */ extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_ps(__m128d __A) { return (__m128)__A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castpd_si128(__m128d __A) { return (__m128i)__A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_pd(__m128 __A) { return (__m128d)__A; } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castps_si128(__m128 __A) { return (__m128i)__A; } extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_ps(__m128i __A) { return (__m128)__A; } extern __inline __m128d __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_castsi128_pd(__m128i __A) { return (__m128d)__A; } #endif #endif // VPX_VPX_PORTS_EMMINTRIN_COMPAT_H_ libvpx-1.8.2/vpx_ports/emms_mmx.asm000066400000000000000000000010441357355204000174210ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" section .text global sym(vpx_clear_system_state) PRIVATE sym(vpx_clear_system_state): emms ret libvpx-1.8.2/vpx_ports/emms_mmx.c000066400000000000000000000010071357355204000170620ustar00rootroot00000000000000/* * Copyright (c) 2018 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "vpx_ports/system_state.h" void vpx_clear_system_state() { _mm_empty(); } libvpx-1.8.2/vpx_ports/float_control_word.asm000066400000000000000000000014241357355204000215010ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_ports/x86_abi_support.asm" section .text %if LIBVPX_YASM_WIN64 global sym(vpx_winx64_fldcw) PRIVATE sym(vpx_winx64_fldcw): sub rsp, 8 mov [rsp], rcx ; win x64 specific fldcw [rsp] add rsp, 8 ret global sym(vpx_winx64_fstcw) PRIVATE sym(vpx_winx64_fstcw): sub rsp, 8 fstcw [rsp] mov rax, [rsp] add rsp, 8 ret %endif libvpx-1.8.2/vpx_ports/mem.h000066400000000000000000000052361357355204000160330ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_MEM_H_ #define VPX_VPX_PORTS_MEM_H_ #include "vpx_config.h" #include "vpx/vpx_integer.h" #if (defined(__GNUC__) && __GNUC__) || defined(__SUNPRO_C) #define DECLARE_ALIGNED(n, typ, val) typ val __attribute__((aligned(n))) #elif defined(_MSC_VER) #define DECLARE_ALIGNED(n, typ, val) __declspec(align(n)) typ val #else #warning No alignment directives known for this compiler. #define DECLARE_ALIGNED(n, typ, val) typ val #endif #if HAVE_NEON && defined(_MSC_VER) #define __builtin_prefetch(x) #endif /* Shift down with rounding */ #define ROUND_POWER_OF_TWO(value, n) (((value) + (1 << ((n)-1))) >> (n)) #define ROUND64_POWER_OF_TWO(value, n) (((value) + (1ULL << ((n)-1))) >> (n)) #define ALIGN_POWER_OF_TWO(value, n) \ (((value) + ((1 << (n)) - 1)) & ~((1 << (n)) - 1)) #define CONVERT_TO_SHORTPTR(x) ((uint16_t *)(((uintptr_t)(x)) << 1)) #define CAST_TO_SHORTPTR(x) ((uint16_t *)((uintptr_t)(x))) #if CONFIG_VP9_HIGHBITDEPTH #define CONVERT_TO_BYTEPTR(x) ((uint8_t *)(((uintptr_t)(x)) >> 1)) #define CAST_TO_BYTEPTR(x) ((uint8_t *)((uintptr_t)(x))) #endif // CONFIG_VP9_HIGHBITDEPTH #if !defined(__has_feature) #define __has_feature(x) 0 #endif // !defined(__has_feature) #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) #define VPX_WITH_ASAN 1 #else #define VPX_WITH_ASAN 0 #endif // __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) #if !defined(__has_attribute) #define __has_attribute(x) 0 #endif // !defined(__has_attribute) #if __has_attribute(uninitialized) // Attribute "uninitialized" disables -ftrivial-auto-var-init=pattern for // the specified variable. // // -ftrivial-auto-var-init is security risk mitigation feature, so attribute // should not be used "just in case", but only to fix real performance // bottlenecks when other approaches do not work. In general the compiler is // quite effective at eliminating unneeded initializations introduced by the // flag, e.g. when they are followed by actual initialization by a program. // However if compiler optimization fails and code refactoring is hard, the // attribute can be used as a workaround. #define VPX_UNINITIALIZED __attribute__((uninitialized)) #else #define VPX_UNINITIALIZED #endif // __has_attribute(uninitialized) #endif // VPX_VPX_PORTS_MEM_H_ libvpx-1.8.2/vpx_ports/mem_ops.h000066400000000000000000000152011357355204000167050ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_MEM_OPS_H_ #define VPX_VPX_PORTS_MEM_OPS_H_ /* \file * \brief Provides portable memory access primitives * * This function provides portable primitives for getting and setting of * signed and unsigned integers in 16, 24, and 32 bit sizes. The operations * can be performed on unaligned data regardless of hardware support for * unaligned accesses. * * The type used to pass the integral values may be changed by defining * MEM_VALUE_T with the appropriate type. The type given must be an integral * numeric type. * * The actual functions instantiated have the MEM_VALUE_T type name pasted * on to the symbol name. This allows the developer to instantiate these * operations for multiple types within the same translation unit. This is * of somewhat questionable utility, but the capability exists nonetheless. * Users not making use of this functionality should call the functions * without the type name appended, and the preprocessor will take care of * it. * * NOTE: This code is not supported on platforms where char > 1 octet ATM. */ #ifndef MAU_T /* Minimum Access Unit for this target */ #define MAU_T unsigned char #endif #ifndef MEM_VALUE_T #define MEM_VALUE_T int #endif #undef MEM_VALUE_T_SZ_BITS #define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3) #undef mem_ops_wrap_symbol #define mem_ops_wrap_symbol(fn) mem_ops_wrap_symbol2(fn, MEM_VALUE_T) #undef mem_ops_wrap_symbol2 #define mem_ops_wrap_symbol2(fn, typ) mem_ops_wrap_symbol3(fn, typ) #undef mem_ops_wrap_symbol3 #define mem_ops_wrap_symbol3(fn, typ) fn##_as_##typ /* * Include aligned access routines */ #define INCLUDED_BY_MEM_OPS_H #include "mem_ops_aligned.h" #undef INCLUDED_BY_MEM_OPS_H #undef mem_get_be16 #define mem_get_be16 mem_ops_wrap_symbol(mem_get_be16) static unsigned MEM_VALUE_T mem_get_be16(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = mem[0] << 8; val |= mem[1]; return val; } #undef mem_get_be24 #define mem_get_be24 mem_ops_wrap_symbol(mem_get_be24) static unsigned MEM_VALUE_T mem_get_be24(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = mem[0] << 16; val |= mem[1] << 8; val |= mem[2]; return val; } #undef mem_get_be32 #define mem_get_be32 mem_ops_wrap_symbol(mem_get_be32) static unsigned MEM_VALUE_T mem_get_be32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = ((unsigned MEM_VALUE_T)mem[0]) << 24; val |= mem[1] << 16; val |= mem[2] << 8; val |= mem[3]; return val; } #undef mem_get_le16 #define mem_get_le16 mem_ops_wrap_symbol(mem_get_le16) static unsigned MEM_VALUE_T mem_get_le16(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = mem[1] << 8; val |= mem[0]; return val; } #undef mem_get_le24 #define mem_get_le24 mem_ops_wrap_symbol(mem_get_le24) static unsigned MEM_VALUE_T mem_get_le24(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = mem[2] << 16; val |= mem[1] << 8; val |= mem[0]; return val; } #undef mem_get_le32 #define mem_get_le32 mem_ops_wrap_symbol(mem_get_le32) static unsigned MEM_VALUE_T mem_get_le32(const void *vmem) { unsigned MEM_VALUE_T val; const MAU_T *mem = (const MAU_T *)vmem; val = ((unsigned MEM_VALUE_T)mem[3]) << 24; val |= mem[2] << 16; val |= mem[1] << 8; val |= mem[0]; return val; } #define mem_get_s_generic(end, sz) \ static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz(const void *vmem) { \ const MAU_T *mem = (const MAU_T *)vmem; \ signed MEM_VALUE_T val = mem_get_##end##sz(mem); \ return (val << (MEM_VALUE_T_SZ_BITS - sz)) >> (MEM_VALUE_T_SZ_BITS - sz); \ } /* clang-format off */ #undef mem_get_sbe16 #define mem_get_sbe16 mem_ops_wrap_symbol(mem_get_sbe16) mem_get_s_generic(be, 16) #undef mem_get_sbe24 #define mem_get_sbe24 mem_ops_wrap_symbol(mem_get_sbe24) mem_get_s_generic(be, 24) #undef mem_get_sbe32 #define mem_get_sbe32 mem_ops_wrap_symbol(mem_get_sbe32) mem_get_s_generic(be, 32) #undef mem_get_sle16 #define mem_get_sle16 mem_ops_wrap_symbol(mem_get_sle16) mem_get_s_generic(le, 16) #undef mem_get_sle24 #define mem_get_sle24 mem_ops_wrap_symbol(mem_get_sle24) mem_get_s_generic(le, 24) #undef mem_get_sle32 #define mem_get_sle32 mem_ops_wrap_symbol(mem_get_sle32) mem_get_s_generic(le, 32) #undef mem_put_be16 #define mem_put_be16 mem_ops_wrap_symbol(mem_put_be16) static VPX_INLINE void mem_put_be16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 8) & 0xff); mem[1] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_be24 #define mem_put_be24 mem_ops_wrap_symbol(mem_put_be24) static VPX_INLINE void mem_put_be24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 16) & 0xff); mem[1] = (MAU_T)((val >> 8) & 0xff); mem[2] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_be32 #define mem_put_be32 mem_ops_wrap_symbol(mem_put_be32) static VPX_INLINE void mem_put_be32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 24) & 0xff); mem[1] = (MAU_T)((val >> 16) & 0xff); mem[2] = (MAU_T)((val >> 8) & 0xff); mem[3] = (MAU_T)((val >> 0) & 0xff); } #undef mem_put_le16 #define mem_put_le16 mem_ops_wrap_symbol(mem_put_le16) static VPX_INLINE void mem_put_le16(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 0) & 0xff); mem[1] = (MAU_T)((val >> 8) & 0xff); } #undef mem_put_le24 #define mem_put_le24 mem_ops_wrap_symbol(mem_put_le24) static VPX_INLINE void mem_put_le24(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 0) & 0xff); mem[1] = (MAU_T)((val >> 8) & 0xff); mem[2] = (MAU_T)((val >> 16) & 0xff); } #undef mem_put_le32 #define mem_put_le32 mem_ops_wrap_symbol(mem_put_le32) static VPX_INLINE void mem_put_le32(void *vmem, MEM_VALUE_T val) { MAU_T *mem = (MAU_T *)vmem; mem[0] = (MAU_T)((val >> 0) & 0xff); mem[1] = (MAU_T)((val >> 8) & 0xff); mem[2] = (MAU_T)((val >> 16) & 0xff); mem[3] = (MAU_T)((val >> 24) & 0xff); } /* clang-format on */ #endif // VPX_VPX_PORTS_MEM_OPS_H_ libvpx-1.8.2/vpx_ports/mem_ops_aligned.h000066400000000000000000000160641357355204000204000ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ #define VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ #include "vpx/vpx_integer.h" /* \file * \brief Provides portable memory access primitives for operating on aligned * data * * This file is split from mem_ops.h for easier maintenance. See mem_ops.h * for a more detailed description of these primitives. */ #ifndef INCLUDED_BY_MEM_OPS_H #error Include mem_ops.h, not mem_ops_aligned.h directly. #endif /* Architectures that provide instructions for doing this byte swapping * could redefine these macros. */ #define swap_endian_16(val, raw) \ do { \ val = (uint16_t)(((raw >> 8) & 0x00ff) | ((raw << 8) & 0xff00)); \ } while (0) #define swap_endian_32(val, raw) \ do { \ val = ((raw >> 24) & 0x000000ff) | ((raw >> 8) & 0x0000ff00) | \ ((raw << 8) & 0x00ff0000) | ((raw << 24) & 0xff000000); \ } while (0) #define swap_endian_16_se(val, raw) \ do { \ swap_endian_16(val, raw); \ val = ((val << 16) >> 16); \ } while (0) #define swap_endian_32_se(val, raw) swap_endian_32(val, raw) #define mem_get_ne_aligned_generic(end, sz) \ static VPX_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \ const void *vmem) { \ const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \ return *mem; \ } #define mem_get_sne_aligned_generic(end, sz) \ static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \ const void *vmem) { \ const int##sz##_t *mem = (const int##sz##_t *)vmem; \ return *mem; \ } #define mem_get_se_aligned_generic(end, sz) \ static VPX_INLINE unsigned MEM_VALUE_T mem_get_##end##sz##_aligned( \ const void *vmem) { \ const uint##sz##_t *mem = (const uint##sz##_t *)vmem; \ unsigned MEM_VALUE_T val, raw = *mem; \ swap_endian_##sz(val, raw); \ return val; \ } #define mem_get_sse_aligned_generic(end, sz) \ static VPX_INLINE signed MEM_VALUE_T mem_get_s##end##sz##_aligned( \ const void *vmem) { \ const int##sz##_t *mem = (const int##sz##_t *)vmem; \ unsigned MEM_VALUE_T val, raw = *mem; \ swap_endian_##sz##_se(val, raw); \ return val; \ } #define mem_put_ne_aligned_generic(end, sz) \ static VPX_INLINE void mem_put_##end##sz##_aligned(void *vmem, \ MEM_VALUE_T val) { \ uint##sz##_t *mem = (uint##sz##_t *)vmem; \ *mem = (uint##sz##_t)val; \ } #define mem_put_se_aligned_generic(end, sz) \ static VPX_INLINE void mem_put_##end##sz##_aligned(void *vmem, \ MEM_VALUE_T val) { \ uint##sz##_t *mem = (uint##sz##_t *)vmem, raw; \ swap_endian_##sz(raw, val); \ *mem = (uint##sz##_t)raw; \ } #include "vpx_config.h" #if CONFIG_BIG_ENDIAN #define mem_get_be_aligned_generic(sz) mem_get_ne_aligned_generic(be, sz) #define mem_get_sbe_aligned_generic(sz) mem_get_sne_aligned_generic(be, sz) #define mem_get_le_aligned_generic(sz) mem_get_se_aligned_generic(le, sz) #define mem_get_sle_aligned_generic(sz) mem_get_sse_aligned_generic(le, sz) #define mem_put_be_aligned_generic(sz) mem_put_ne_aligned_generic(be, sz) #define mem_put_le_aligned_generic(sz) mem_put_se_aligned_generic(le, sz) #else #define mem_get_be_aligned_generic(sz) mem_get_se_aligned_generic(be, sz) #define mem_get_sbe_aligned_generic(sz) mem_get_sse_aligned_generic(be, sz) #define mem_get_le_aligned_generic(sz) mem_get_ne_aligned_generic(le, sz) #define mem_get_sle_aligned_generic(sz) mem_get_sne_aligned_generic(le, sz) #define mem_put_be_aligned_generic(sz) mem_put_se_aligned_generic(be, sz) #define mem_put_le_aligned_generic(sz) mem_put_ne_aligned_generic(le, sz) #endif /* clang-format off */ #undef mem_get_be16_aligned #define mem_get_be16_aligned mem_ops_wrap_symbol(mem_get_be16_aligned) mem_get_be_aligned_generic(16) #undef mem_get_be32_aligned #define mem_get_be32_aligned mem_ops_wrap_symbol(mem_get_be32_aligned) mem_get_be_aligned_generic(32) #undef mem_get_le16_aligned #define mem_get_le16_aligned mem_ops_wrap_symbol(mem_get_le16_aligned) mem_get_le_aligned_generic(16) #undef mem_get_le32_aligned #define mem_get_le32_aligned mem_ops_wrap_symbol(mem_get_le32_aligned) mem_get_le_aligned_generic(32) #undef mem_get_sbe16_aligned #define mem_get_sbe16_aligned mem_ops_wrap_symbol(mem_get_sbe16_aligned) mem_get_sbe_aligned_generic(16) #undef mem_get_sbe32_aligned #define mem_get_sbe32_aligned mem_ops_wrap_symbol(mem_get_sbe32_aligned) mem_get_sbe_aligned_generic(32) #undef mem_get_sle16_aligned #define mem_get_sle16_aligned mem_ops_wrap_symbol(mem_get_sle16_aligned) mem_get_sle_aligned_generic(16) #undef mem_get_sle32_aligned #define mem_get_sle32_aligned mem_ops_wrap_symbol(mem_get_sle32_aligned) mem_get_sle_aligned_generic(32) #undef mem_put_be16_aligned #define mem_put_be16_aligned mem_ops_wrap_symbol(mem_put_be16_aligned) mem_put_be_aligned_generic(16) #undef mem_put_be32_aligned #define mem_put_be32_aligned mem_ops_wrap_symbol(mem_put_be32_aligned) mem_put_be_aligned_generic(32) #undef mem_put_le16_aligned #define mem_put_le16_aligned mem_ops_wrap_symbol(mem_put_le16_aligned) mem_put_le_aligned_generic(16) #undef mem_put_le32_aligned #define mem_put_le32_aligned mem_ops_wrap_symbol(mem_put_le32_aligned) mem_put_le_aligned_generic(32) #undef mem_get_ne_aligned_generic #undef mem_get_se_aligned_generic #undef mem_get_sne_aligned_generic #undef mem_get_sse_aligned_generic #undef mem_put_ne_aligned_generic #undef mem_put_se_aligned_generic #undef swap_endian_16 #undef swap_endian_32 #undef swap_endian_16_se #undef swap_endian_32_se /* clang-format on */ #endif // VPX_VPX_PORTS_MEM_OPS_ALIGNED_H_ libvpx-1.8.2/vpx_ports/msvc.h000066400000000000000000000015571357355204000162270ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_MSVC_H_ #define VPX_VPX_PORTS_MSVC_H_ #ifdef _MSC_VER #include "./vpx_config.h" #if _MSC_VER < 1900 // VS2015 provides snprintf #define snprintf _snprintf #endif // _MSC_VER < 1900 #if _MSC_VER < 1800 // VS2013 provides round #include static INLINE double round(double x) { if (x < 0) return ceil(x - 0.5); else return floor(x + 0.5); } #endif // _MSC_VER < 1800 #endif // _MSC_VER #endif // VPX_VPX_PORTS_MSVC_H_ libvpx-1.8.2/vpx_ports/ppc.h000066400000000000000000000012521357355204000160310ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_PPC_H_ #define VPX_VPX_PORTS_PPC_H_ #include #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif #define HAS_VSX 0x01 int ppc_simd_caps(void); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_PORTS_PPC_H_ libvpx-1.8.2/vpx_ports/ppc_cpudetect.c000066400000000000000000000035041357355204000200660ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include "./vpx_config.h" #include "vpx_ports/ppc.h" #if CONFIG_RUNTIME_CPU_DETECT static int cpu_env_flags(int *flags) { char *env; env = getenv("VPX_SIMD_CAPS"); if (env && *env) { *flags = (int)strtol(env, NULL, 0); return 0; } *flags = 0; return -1; } static int cpu_env_mask(void) { char *env; env = getenv("VPX_SIMD_CAPS_MASK"); return env && *env ? (int)strtol(env, NULL, 0) : ~0; } int ppc_simd_caps(void) { int flags; int mask; int fd; ssize_t count; unsigned int i; uint64_t buf[64]; // If VPX_SIMD_CAPS is set then allow only those capabilities. if (!cpu_env_flags(&flags)) { return flags; } mask = cpu_env_mask(); fd = open("/proc/self/auxv", O_RDONLY); if (fd < 0) { return 0; } while ((count = read(fd, buf, sizeof(buf))) > 0) { for (i = 0; i < (count / sizeof(*buf)); i += 2) { if (buf[i] == AT_HWCAP) { #if HAVE_VSX if (buf[i + 1] & PPC_FEATURE_HAS_VSX) { flags |= HAS_VSX; } #endif // HAVE_VSX goto out_close; } else if (buf[i] == AT_NULL) { goto out_close; } } } out_close: close(fd); return flags & mask; } #else // If there is no RTCD the function pointers are not used and can not be // changed. int ppc_simd_caps(void) { return 0; } #endif // CONFIG_RUNTIME_CPU_DETECT libvpx-1.8.2/vpx_ports/system_state.h000066400000000000000000000014761357355204000200030ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_SYSTEM_STATE_H_ #define VPX_VPX_PORTS_SYSTEM_STATE_H_ #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif #if (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX extern void vpx_clear_system_state(void); #else #define vpx_clear_system_state() #endif // (VPX_ARCH_X86 || VPX_ARCH_X86_64) && HAVE_MMX #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_PORTS_SYSTEM_STATE_H_ libvpx-1.8.2/vpx_ports/vpx_once.h000066400000000000000000000075251357355204000171010ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_VPX_ONCE_H_ #define VPX_VPX_PORTS_VPX_ONCE_H_ #include "vpx_config.h" /* Implement a function wrapper to guarantee initialization * thread-safety for library singletons. * * NOTE: These functions use static locks, and can only be * used with one common argument per compilation unit. So * * file1.c: * vpx_once(foo); * ... * vpx_once(foo); * * file2.c: * vpx_once(bar); * * will ensure foo() and bar() are each called only once, but in * * file1.c: * vpx_once(foo); * vpx_once(bar): * * bar() will never be called because the lock is used up * by the call to foo(). */ #if CONFIG_MULTITHREAD && defined(_WIN32) #include #include /* Declare a per-compilation-unit state variable to track the progress * of calling func() only once. This must be at global scope because * local initializers are not thread-safe in MSVC prior to Visual * Studio 2015. * * As a static, once_state will be zero-initialized as program start. */ static LONG once_state; static void once(void (*func)(void)) { /* Try to advance once_state from its initial value of 0 to 1. * Only one thread can succeed in doing so. */ if (InterlockedCompareExchange(&once_state, 1, 0) == 0) { /* We're the winning thread, having set once_state to 1. * Call our function. */ func(); /* Now advance once_state to 2, unblocking any other threads. */ InterlockedIncrement(&once_state); return; } /* We weren't the winning thread, but we want to block on * the state variable so we don't return before func() * has finished executing elsewhere. * * Try to advance once_state from 2 to 2, which is only possible * after the winning thead advances it from 1 to 2. */ while (InterlockedCompareExchange(&once_state, 2, 2) != 2) { /* State isn't yet 2. Try again. * * We are used for singleton initialization functions, * which should complete quickly. Contention will likewise * be rare, so it's worthwhile to use a simple but cpu- * intensive busy-wait instead of successive backoff, * waiting on a kernel object, or another heavier-weight scheme. * * We can at least yield our timeslice. */ Sleep(0); } /* We've seen once_state advance to 2, so we know func() * has been called. And we've left once_state as we found it, * so other threads will have the same experience. * * It's safe to return now. */ return; } #elif CONFIG_MULTITHREAD && defined(__OS2__) #define INCL_DOS #include static void once(void (*func)(void)) { static int done; /* If the initialization is complete, return early. */ if (done) return; /* Causes all other threads in the process to block themselves * and give up their time slice. */ DosEnterCritSec(); if (!done) { func(); done = 1; } /* Restores normal thread dispatching for the current process. */ DosExitCritSec(); } #elif CONFIG_MULTITHREAD && HAVE_PTHREAD_H #include static void once(void (*func)(void)) { static pthread_once_t lock = PTHREAD_ONCE_INIT; pthread_once(&lock, func); } #else /* No-op version that performs no synchronization. *_rtcd() is idempotent, * so as long as your platform provides atomic loads/stores of pointers * no synchronization is strictly necessary. */ static void once(void (*func)(void)) { static int done; if (!done) { func(); done = 1; } } #endif #endif // VPX_VPX_PORTS_VPX_ONCE_H_ libvpx-1.8.2/vpx_ports/vpx_ports.mk000066400000000000000000000023161357355204000174750ustar00rootroot00000000000000## ## Copyright (c) 2012 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## PORTS_SRCS-yes += vpx_ports.mk PORTS_SRCS-yes += bitops.h PORTS_SRCS-yes += mem.h PORTS_SRCS-yes += msvc.h PORTS_SRCS-yes += system_state.h PORTS_SRCS-yes += vpx_timer.h ifeq ($(VPX_ARCH_X86),yes) PORTS_SRCS-$(HAVE_MMX) += emms_mmx.c endif ifeq ($(VPX_ARCH_X86_64),yes) # Visual Studio x64 does not support the _mm_empty() intrinsic. PORTS_SRCS-$(HAVE_MMX) += emms_mmx.asm endif ifeq ($(VPX_ARCH_X86_64),yes) PORTS_SRCS-$(CONFIG_MSVS) += float_control_word.asm endif ifeq ($(VPX_ARCH_X86)$(VPX_ARCH_X86_64),yes) PORTS_SRCS-yes += x86.h PORTS_SRCS-yes += x86_abi_support.asm endif PORTS_SRCS-$(VPX_ARCH_ARM) += arm_cpudetect.c PORTS_SRCS-$(VPX_ARCH_ARM) += arm.h PORTS_SRCS-$(VPX_ARCH_PPC) += ppc_cpudetect.c PORTS_SRCS-$(VPX_ARCH_PPC) += ppc.h ifeq ($(VPX_ARCH_MIPS), yes) PORTS_SRCS-yes += asmdefs_mmi.h endif libvpx-1.8.2/vpx_ports/vpx_timer.h000066400000000000000000000051631357355204000172710ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_VPX_TIMER_H_ #define VPX_VPX_PORTS_VPX_TIMER_H_ #include "./vpx_config.h" #include "vpx/vpx_integer.h" #if CONFIG_OS_SUPPORT #if defined(_WIN32) /* * Win32 specific includes */ #undef NOMINMAX #define NOMINMAX #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #else /* * POSIX specific includes */ #include /* timersub is not provided by msys at this time. */ #ifndef timersub #define timersub(a, b, result) \ do { \ (result)->tv_sec = (a)->tv_sec - (b)->tv_sec; \ (result)->tv_usec = (a)->tv_usec - (b)->tv_usec; \ if ((result)->tv_usec < 0) { \ --(result)->tv_sec; \ (result)->tv_usec += 1000000; \ } \ } while (0) #endif #endif struct vpx_usec_timer { #if defined(_WIN32) LARGE_INTEGER begin, end; #else struct timeval begin, end; #endif }; static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) { #if defined(_WIN32) QueryPerformanceCounter(&t->begin); #else gettimeofday(&t->begin, NULL); #endif } static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) { #if defined(_WIN32) QueryPerformanceCounter(&t->end); #else gettimeofday(&t->end, NULL); #endif } static INLINE int64_t vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { #if defined(_WIN32) LARGE_INTEGER freq, diff; diff.QuadPart = t->end.QuadPart - t->begin.QuadPart; QueryPerformanceFrequency(&freq); return diff.QuadPart * 1000000 / freq.QuadPart; #else struct timeval diff; timersub(&t->end, &t->begin, &diff); return (int64_t)diff.tv_sec * 1000000 + diff.tv_usec; #endif } #else /* CONFIG_OS_SUPPORT = 0*/ /* Empty timer functions if CONFIG_OS_SUPPORT = 0 */ #ifndef timersub #define timersub(a, b, result) #endif struct vpx_usec_timer { void *dummy; }; static INLINE void vpx_usec_timer_start(struct vpx_usec_timer *t) {} static INLINE void vpx_usec_timer_mark(struct vpx_usec_timer *t) {} static INLINE int vpx_usec_timer_elapsed(struct vpx_usec_timer *t) { return 0; } #endif /* CONFIG_OS_SUPPORT */ #endif // VPX_VPX_PORTS_VPX_TIMER_H_ libvpx-1.8.2/vpx_ports/x86.h000066400000000000000000000273371357355204000157100ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_PORTS_X86_H_ #define VPX_VPX_PORTS_X86_H_ #include #if defined(_MSC_VER) #include /* For __cpuidex, __rdtsc */ #endif #include "vpx_config.h" #include "vpx/vpx_integer.h" #ifdef __cplusplus extern "C" { #endif typedef enum { VPX_CPU_UNKNOWN = -1, VPX_CPU_AMD, VPX_CPU_AMD_OLD, VPX_CPU_CENTAUR, VPX_CPU_CYRIX, VPX_CPU_INTEL, VPX_CPU_NEXGEN, VPX_CPU_NSC, VPX_CPU_RISE, VPX_CPU_SIS, VPX_CPU_TRANSMETA, VPX_CPU_TRANSMETA_OLD, VPX_CPU_UMC, VPX_CPU_VIA, VPX_CPU_LAST } vpx_cpu_t; #if defined(__GNUC__) && __GNUC__ || defined(__ANDROID__) #if VPX_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ __asm__ __volatile__("cpuid \n\t" \ : "=a"(ax), "=b"(bx), "=c"(cx), "=d"(dx) \ : "a"(func), "c"(func2)); #else #define cpuid(func, func2, ax, bx, cx, dx) \ __asm__ __volatile__( \ "mov %%ebx, %%edi \n\t" \ "cpuid \n\t" \ "xchg %%edi, %%ebx \n\t" \ : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ : "a"(func), "c"(func2)); #endif #elif defined(__SUNPRO_C) || \ defined(__SUNPRO_CC) /* end __GNUC__ or __ANDROID__*/ #if VPX_ARCH_X86_64 #define cpuid(func, func2, ax, bx, cx, dx) \ asm volatile( \ "xchg %rsi, %rbx \n\t" \ "cpuid \n\t" \ "movl %ebx, %edi \n\t" \ "xchg %rsi, %rbx \n\t" \ : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ : "a"(func), "c"(func2)); #else #define cpuid(func, func2, ax, bx, cx, dx) \ asm volatile( \ "pushl %ebx \n\t" \ "cpuid \n\t" \ "movl %ebx, %edi \n\t" \ "popl %ebx \n\t" \ : "=a"(ax), "=D"(bx), "=c"(cx), "=d"(dx) \ : "a"(func), "c"(func2)); #endif #else /* end __SUNPRO__ */ #if VPX_ARCH_X86_64 #if defined(_MSC_VER) && _MSC_VER > 1500 #define cpuid(func, func2, a, b, c, d) \ do { \ int regs[4]; \ __cpuidex(regs, func, func2); \ a = regs[0]; \ b = regs[1]; \ c = regs[2]; \ d = regs[3]; \ } while (0) #else #define cpuid(func, func2, a, b, c, d) \ do { \ int regs[4]; \ __cpuid(regs, func); \ a = regs[0]; \ b = regs[1]; \ c = regs[2]; \ d = regs[3]; \ } while (0) #endif #else #define cpuid(func, func2, a, b, c, d) \ __asm mov eax, func __asm mov ecx, func2 __asm cpuid __asm mov a, \ eax __asm mov b, ebx __asm mov c, ecx __asm mov d, edx #endif #endif /* end others */ // NaCl has no support for xgetbv or the raw opcode. #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__)) static INLINE uint64_t xgetbv(void) { const uint32_t ecx = 0; uint32_t eax, edx; // Use the raw opcode for xgetbv for compatibility with older toolchains. __asm__ volatile(".byte 0x0f, 0x01, 0xd0\n" : "=a"(eax), "=d"(edx) : "c"(ecx)); return ((uint64_t)edx << 32) | eax; } #elif (defined(_M_X64) || defined(_M_IX86)) && defined(_MSC_FULL_VER) && \ _MSC_FULL_VER >= 160040219 // >= VS2010 SP1 #include #define xgetbv() _xgetbv(0) #elif defined(_MSC_VER) && defined(_M_IX86) static INLINE uint64_t xgetbv(void) { uint32_t eax_, edx_; __asm { xor ecx, ecx // ecx = 0 // Use the raw opcode for xgetbv for compatibility with older toolchains. __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0 mov eax_, eax mov edx_, edx } return ((uint64_t)edx_ << 32) | eax_; } #else #define xgetbv() 0U // no AVX for older x64 or unrecognized toolchains. #endif #if defined(_MSC_VER) && _MSC_VER >= 1700 #undef NOMINMAX #define NOMINMAX #ifndef WIN32_LEAN_AND_MEAN #define WIN32_LEAN_AND_MEAN #endif #include #if WINAPI_FAMILY_PARTITION(WINAPI_FAMILY_APP) #define getenv(x) NULL #endif #endif #define HAS_MMX 0x001 #define HAS_SSE 0x002 #define HAS_SSE2 0x004 #define HAS_SSE3 0x008 #define HAS_SSSE3 0x010 #define HAS_SSE4_1 0x020 #define HAS_AVX 0x040 #define HAS_AVX2 0x080 #define HAS_AVX512 0x100 #ifndef BIT #define BIT(n) (1u << (n)) #endif static INLINE int x86_simd_caps(void) { unsigned int flags = 0; unsigned int mask = ~0; unsigned int max_cpuid_val, reg_eax, reg_ebx, reg_ecx, reg_edx; char *env; (void)reg_ebx; /* See if the CPU capabilities are being overridden by the environment */ env = getenv("VPX_SIMD_CAPS"); if (env && *env) return (int)strtol(env, NULL, 0); env = getenv("VPX_SIMD_CAPS_MASK"); if (env && *env) mask = (unsigned int)strtoul(env, NULL, 0); /* Ensure that the CPUID instruction supports extended features */ cpuid(0, 0, max_cpuid_val, reg_ebx, reg_ecx, reg_edx); if (max_cpuid_val < 1) return 0; /* Get the standard feature flags */ cpuid(1, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); if (reg_edx & BIT(23)) flags |= HAS_MMX; if (reg_edx & BIT(25)) flags |= HAS_SSE; /* aka xmm */ if (reg_edx & BIT(26)) flags |= HAS_SSE2; /* aka wmt */ if (reg_ecx & BIT(0)) flags |= HAS_SSE3; if (reg_ecx & BIT(9)) flags |= HAS_SSSE3; if (reg_ecx & BIT(19)) flags |= HAS_SSE4_1; // bits 27 (OSXSAVE) & 28 (256-bit AVX) if ((reg_ecx & (BIT(27) | BIT(28))) == (BIT(27) | BIT(28))) { // Check for OS-support of YMM state. Necessary for AVX and AVX2. if ((xgetbv() & 0x6) == 0x6) { flags |= HAS_AVX; if (max_cpuid_val >= 7) { /* Get the leaf 7 feature flags. Needed to check for AVX2 support */ cpuid(7, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); if (reg_ebx & BIT(5)) flags |= HAS_AVX2; // bits 16 (AVX-512F) & 17 (AVX-512DQ) & 28 (AVX-512CD) & // 30 (AVX-512BW) & 32 (AVX-512VL) if ((reg_ebx & (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) == (BIT(16) | BIT(17) | BIT(28) | BIT(30) | BIT(31))) { // Check for OS-support of ZMM and YMM state. Necessary for AVX-512. if ((xgetbv() & 0xe6) == 0xe6) flags |= HAS_AVX512; } } } } return flags & mask; } // Fine-Grain Measurement Functions // // If you are timing a small region of code, access the timestamp counter // (TSC) via: // // unsigned int start = x86_tsc_start(); // ... // unsigned int end = x86_tsc_end(); // unsigned int diff = end - start; // // The start/end functions introduce a few more instructions than using // x86_readtsc directly, but prevent the CPU's out-of-order execution from // affecting the measurement (by having earlier/later instructions be evaluated // in the time interval). See the white paper, "How to Benchmark Code // Execution Times on Intel® IA-32 and IA-64 Instruction Set Architectures" by // Gabriele Paoloni for more information. // // If you are timing a large function (CPU time > a couple of seconds), use // x86_readtsc64 to read the timestamp counter in a 64-bit integer. The // out-of-order leakage that can occur is minimal compared to total runtime. static INLINE unsigned int x86_readtsc(void) { #if defined(__GNUC__) && __GNUC__ unsigned int tsc; __asm__ __volatile__("rdtsc\n\t" : "=a"(tsc) :); return tsc; #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) unsigned int tsc; asm volatile("rdtsc\n\t" : "=a"(tsc) :); return tsc; #else #if VPX_ARCH_X86_64 return (unsigned int)__rdtsc(); #else __asm rdtsc; #endif #endif } // 64-bit CPU cycle counter static INLINE uint64_t x86_readtsc64(void) { #if defined(__GNUC__) && __GNUC__ uint32_t hi, lo; __asm__ __volatile__("rdtsc" : "=a"(lo), "=d"(hi)); return ((uint64_t)hi << 32) | lo; #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) uint_t hi, lo; asm volatile("rdtsc\n\t" : "=a"(lo), "=d"(hi)); return ((uint64_t)hi << 32) | lo; #else #if VPX_ARCH_X86_64 return (uint64_t)__rdtsc(); #else __asm rdtsc; #endif #endif } // 32-bit CPU cycle counter with a partial fence against out-of-order execution. static INLINE unsigned int x86_readtscp(void) { #if defined(__GNUC__) && __GNUC__ unsigned int tscp; __asm__ __volatile__("rdtscp\n\t" : "=a"(tscp) :); return tscp; #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) unsigned int tscp; asm volatile("rdtscp\n\t" : "=a"(tscp) :); return tscp; #elif defined(_MSC_VER) unsigned int ui; return (unsigned int)__rdtscp(&ui); #else #if VPX_ARCH_X86_64 return (unsigned int)__rdtscp(); #else __asm rdtscp; #endif #endif } static INLINE unsigned int x86_tsc_start(void) { unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); return x86_readtsc(); } static INLINE unsigned int x86_tsc_end(void) { uint32_t v = x86_readtscp(); unsigned int reg_eax, reg_ebx, reg_ecx, reg_edx; cpuid(0, 0, reg_eax, reg_ebx, reg_ecx, reg_edx); return v; } #if defined(__GNUC__) && __GNUC__ #define x86_pause_hint() __asm__ __volatile__("pause \n\t") #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) #define x86_pause_hint() asm volatile("pause \n\t") #else #if VPX_ARCH_X86_64 #define x86_pause_hint() _mm_pause(); #else #define x86_pause_hint() __asm pause #endif #endif #if defined(__GNUC__) && __GNUC__ static void x87_set_control_word(unsigned short mode) { __asm__ __volatile__("fldcw %0" : : "m"(*&mode)); } static unsigned short x87_get_control_word(void) { unsigned short mode; __asm__ __volatile__("fstcw %0\n\t" : "=m"(*&mode) :); return mode; } #elif defined(__SUNPRO_C) || defined(__SUNPRO_CC) static void x87_set_control_word(unsigned short mode) { asm volatile("fldcw %0" : : "m"(*&mode)); } static unsigned short x87_get_control_word(void) { unsigned short mode; asm volatile("fstcw %0\n\t" : "=m"(*&mode) :); return mode; } #elif VPX_ARCH_X86_64 /* No fldcw intrinsics on Windows x64, punt to external asm */ extern void vpx_winx64_fldcw(unsigned short mode); extern unsigned short vpx_winx64_fstcw(void); #define x87_set_control_word vpx_winx64_fldcw #define x87_get_control_word vpx_winx64_fstcw #else static void x87_set_control_word(unsigned short mode) { __asm { fldcw mode } } static unsigned short x87_get_control_word(void) { unsigned short mode; __asm { fstcw mode } return mode; } #endif static INLINE unsigned int x87_set_double_precision(void) { unsigned int mode = x87_get_control_word(); // Intel 64 and IA-32 Architectures Developer's Manual: Vol. 1 // https://www.intel.com/content/dam/www/public/us/en/documents/manuals/64-ia-32-architectures-software-developer-vol-1-manual.pdf // 8.1.5.2 Precision Control Field // Bits 8 and 9 (0x300) of the x87 FPU Control Word ("Precision Control") // determine the number of bits used in floating point calculations. To match // later SSE instructions restrict x87 operations to Double Precision (0x200). // Precision PC Field // Single Precision (24-Bits) 00B // Reserved 01B // Double Precision (53-Bits) 10B // Extended Precision (64-Bits) 11B x87_set_control_word((mode & ~0x300) | 0x200); return mode; } #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_PORTS_X86_H_ libvpx-1.8.2/vpx_ports/x86_abi_support.asm000066400000000000000000000222111357355204000206320ustar00rootroot00000000000000; ; Copyright (c) 2010 The WebM project authors. All Rights Reserved. ; ; Use of this source code is governed by a BSD-style license ; that can be found in the LICENSE file in the root of the source ; tree. An additional intellectual property rights grant can be found ; in the file PATENTS. All contributing project authors may ; be found in the AUTHORS file in the root of the source tree. ; %include "vpx_config.asm" ; 32/64 bit compatibility macros ; ; In general, we make the source use 64 bit syntax, then twiddle with it using ; the preprocessor to get the 32 bit syntax on 32 bit platforms. ; %ifidn __OUTPUT_FORMAT__,elf32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,macho32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,win32 %define ABI_IS_32BIT 1 %elifidn __OUTPUT_FORMAT__,aout %define ABI_IS_32BIT 1 %else %define ABI_IS_32BIT 0 %endif %if ABI_IS_32BIT %define rax eax %define rbx ebx %define rcx ecx %define rdx edx %define rsi esi %define rdi edi %define rsp esp %define rbp ebp %define movsxd mov %macro movq 2 %ifidn %1,eax movd %1,%2 %elifidn %2,eax movd %1,%2 %elifidn %1,ebx movd %1,%2 %elifidn %2,ebx movd %1,%2 %elifidn %1,ecx movd %1,%2 %elifidn %2,ecx movd %1,%2 %elifidn %1,edx movd %1,%2 %elifidn %2,edx movd %1,%2 %elifidn %1,esi movd %1,%2 %elifidn %2,esi movd %1,%2 %elifidn %1,edi movd %1,%2 %elifidn %2,edi movd %1,%2 %elifidn %1,esp movd %1,%2 %elifidn %2,esp movd %1,%2 %elifidn %1,ebp movd %1,%2 %elifidn %2,ebp movd %1,%2 %else movq %1,%2 %endif %endmacro %endif ; LIBVPX_YASM_WIN64 ; Set LIBVPX_YASM_WIN64 if output is Windows 64bit so the code will work if x64 ; or win64 is defined on the Yasm command line. %ifidn __OUTPUT_FORMAT__,win64 %define LIBVPX_YASM_WIN64 1 %elifidn __OUTPUT_FORMAT__,x64 %define LIBVPX_YASM_WIN64 1 %else %define LIBVPX_YASM_WIN64 0 %endif ; sym() ; Return the proper symbol name for the target ABI. ; ; Certain ABIs, notably MS COFF and Darwin MACH-O, require that symbols ; with C linkage be prefixed with an underscore. ; %ifidn __OUTPUT_FORMAT__,elf32 %define sym(x) x %elifidn __OUTPUT_FORMAT__,elf64 %define sym(x) x %elifidn __OUTPUT_FORMAT__,elfx32 %define sym(x) x %elif LIBVPX_YASM_WIN64 %define sym(x) x %else %define sym(x) _ %+ x %endif ; PRIVATE ; Macro for the attribute to hide a global symbol for the target ABI. ; This is only active if CHROMIUM is defined. ; ; Chromium doesn't like exported global symbols due to symbol clashing with ; plugins among other things. ; ; Requires Chromium's patched copy of yasm: ; http://src.chromium.org/viewvc/chrome?view=rev&revision=73761 ; http://www.tortall.net/projects/yasm/ticket/236 ; %ifdef CHROMIUM %ifidn __OUTPUT_FORMAT__,elf32 %define PRIVATE :hidden %elifidn __OUTPUT_FORMAT__,elf64 %define PRIVATE :hidden %elifidn __OUTPUT_FORMAT__,elfx32 %define PRIVATE :hidden %elif LIBVPX_YASM_WIN64 %define PRIVATE %else %define PRIVATE :private_extern %endif %else %define PRIVATE %endif ; arg() ; Return the address specification of the given argument ; %if ABI_IS_32BIT %define arg(x) [ebp+8+4*x] %else ; 64 bit ABI passes arguments in registers. This is a workaround to get up ; and running quickly. Relies on SHADOW_ARGS_TO_STACK %if LIBVPX_YASM_WIN64 %define arg(x) [rbp+16+8*x] %else %define arg(x) [rbp-8-8*x] %endif %endif ; REG_SZ_BYTES, REG_SZ_BITS ; Size of a register %if ABI_IS_32BIT %define REG_SZ_BYTES 4 %define REG_SZ_BITS 32 %else %define REG_SZ_BYTES 8 %define REG_SZ_BITS 64 %endif ; ALIGN_STACK ; This macro aligns the stack to the given alignment (in bytes). The stack ; is left such that the previous value of the stack pointer is the first ; argument on the stack (ie, the inverse of this macro is 'pop rsp.') ; This macro uses one temporary register, which is not preserved, and thus ; must be specified as an argument. %macro ALIGN_STACK 2 mov %2, rsp and rsp, -%1 lea rsp, [rsp - (%1 - REG_SZ_BYTES)] push %2 %endmacro ; ; The Microsoft assembler tries to impose a certain amount of type safety in ; its register usage. YASM doesn't recognize these directives, so we just ; %define them away to maintain as much compatibility as possible with the ; original inline assembler we're porting from. ; %idefine PTR %idefine XMMWORD %idefine MMWORD ; PIC macros ; %if ABI_IS_32BIT %if CONFIG_PIC=1 %ifidn __OUTPUT_FORMAT__,elf32 %define WRT_PLT wrt ..plt %macro GET_GOT 1 extern _GLOBAL_OFFSET_TABLE_ push %1 call %%get_got %%sub_offset: jmp %%exitGG %%get_got: mov %1, [esp] add %1, _GLOBAL_OFFSET_TABLE_ + $$ - %%sub_offset wrt ..gotpc ret %%exitGG: %undef GLOBAL %define GLOBAL(x) x + %1 wrt ..gotoff %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro %elifidn __OUTPUT_FORMAT__,macho32 %macro GET_GOT 1 push %1 call %%get_got %%get_got: pop %1 %undef GLOBAL %define GLOBAL(x) x + %1 - %%get_got %undef RESTORE_GOT %define RESTORE_GOT pop %1 %endmacro %endif %endif %ifdef CHROMIUM %ifidn __OUTPUT_FORMAT__,macho32 %define HIDDEN_DATA(x) x:private_extern %else %define HIDDEN_DATA(x) x %endif %else %define HIDDEN_DATA(x) x %endif %else %macro GET_GOT 1 %endmacro %define GLOBAL(x) rel x %ifidn __OUTPUT_FORMAT__,elf64 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden %elifidn __OUTPUT_FORMAT__,elfx32 %define WRT_PLT wrt ..plt %define HIDDEN_DATA(x) x:data hidden %elifidn __OUTPUT_FORMAT__,macho64 %ifdef CHROMIUM %define HIDDEN_DATA(x) x:private_extern %else %define HIDDEN_DATA(x) x %endif %else %define HIDDEN_DATA(x) x %endif %endif %ifnmacro GET_GOT %macro GET_GOT 1 %endmacro %define GLOBAL(x) x %endif %ifndef RESTORE_GOT %define RESTORE_GOT %endif %ifndef WRT_PLT %define WRT_PLT %endif %if ABI_IS_32BIT %macro SHADOW_ARGS_TO_STACK 1 %endm %define UNSHADOW_ARGS %else %if LIBVPX_YASM_WIN64 %macro SHADOW_ARGS_TO_STACK 1 ; argc %if %1 > 0 mov arg(0),rcx %endif %if %1 > 1 mov arg(1),rdx %endif %if %1 > 2 mov arg(2),r8 %endif %if %1 > 3 mov arg(3),r9 %endif %endm %else %macro SHADOW_ARGS_TO_STACK 1 ; argc %if %1 > 0 push rdi %endif %if %1 > 1 push rsi %endif %if %1 > 2 push rdx %endif %if %1 > 3 push rcx %endif %if %1 > 4 push r8 %endif %if %1 > 5 push r9 %endif %if %1 > 6 %assign i %1-6 %assign off 16 %rep i mov rax,[rbp+off] push rax %assign off off+8 %endrep %endif %endm %endif %define UNSHADOW_ARGS mov rsp, rbp %endif ; Win64 ABI requires that XMM6:XMM15 are callee saved ; SAVE_XMM n, [u] ; store registers 6-n on the stack ; if u is specified, use unaligned movs. ; Win64 ABI requires 16 byte stack alignment, but then pushes an 8 byte return ; value. Typically we follow this up with 'push rbp' - re-aligning the stack - ; but in some cases this is not done and unaligned movs must be used. %if LIBVPX_YASM_WIN64 %macro SAVE_XMM 1-2 a %if %1 < 6 %error Only xmm registers 6-15 must be preserved %else %assign last_xmm %1 %define movxmm movdq %+ %2 %assign xmm_stack_space ((last_xmm - 5) * 16) sub rsp, xmm_stack_space %assign i 6 %rep (last_xmm - 5) movxmm [rsp + ((i - 6) * 16)], xmm %+ i %assign i i+1 %endrep %endif %endmacro %macro RESTORE_XMM 0 %ifndef last_xmm %error RESTORE_XMM must be paired with SAVE_XMM n %else %assign i last_xmm %rep (last_xmm - 5) movxmm xmm %+ i, [rsp +((i - 6) * 16)] %assign i i-1 %endrep add rsp, xmm_stack_space ; there are a couple functions which return from multiple places. ; otherwise, we could uncomment these: ; %undef last_xmm ; %undef xmm_stack_space ; %undef movxmm %endif %endmacro %else %macro SAVE_XMM 1-2 %endmacro %macro RESTORE_XMM 0 %endmacro %endif ; Name of the rodata section ; ; .rodata seems to be an elf-ism, as it doesn't work on OSX. ; %ifidn __OUTPUT_FORMAT__,macho64 %define SECTION_RODATA section .text %elifidn __OUTPUT_FORMAT__,macho32 %macro SECTION_RODATA 0 section .text %endmacro %elifidn __OUTPUT_FORMAT__,aout %define SECTION_RODATA section .data %else %define SECTION_RODATA section .rodata %endif ; Tell GNU ld that we don't require an executable stack. %ifidn __OUTPUT_FORMAT__,elf32 section .note.GNU-stack noalloc noexec nowrite progbits section .text %elifidn __OUTPUT_FORMAT__,elf64 section .note.GNU-stack noalloc noexec nowrite progbits section .text %elifidn __OUTPUT_FORMAT__,elfx32 section .note.GNU-stack noalloc noexec nowrite progbits section .text %endif ; On Android platforms use lrand48 when building postproc routines. Prior to L ; rand() was not available. %if CONFIG_POSTPROC=1 || CONFIG_VP9_POSTPROC=1 %ifdef __ANDROID__ extern sym(lrand48) %define LIBVPX_RAND lrand48 %else extern sym(rand) %define LIBVPX_RAND rand %endif %endif ; CONFIG_POSTPROC || CONFIG_VP9_POSTPROC libvpx-1.8.2/vpx_scale/000077500000000000000000000000001357355204000150165ustar00rootroot00000000000000libvpx-1.8.2/vpx_scale/generic/000077500000000000000000000000001357355204000164325ustar00rootroot00000000000000libvpx-1.8.2/vpx_scale/generic/gen_scalers.c000066400000000000000000000162061357355204000210700ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_scale_rtcd.h" #include "vpx_scale/vpx_scale.h" #include "vpx_mem/vpx_mem.h" /**************************************************************************** * Imports ****************************************************************************/ /**************************************************************************** * * * INPUTS : const unsigned char *source : Pointer to source data. * unsigned int source_width : Stride of source. * unsigned char *dest : Pointer to destination data. * unsigned int dest_width : Stride of dest (UNUSED). * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Copies horizontal line of pixels from source to * destination scaling up by 4 to 5. * * SPECIAL NOTES : None. * ****************************************************************************/ void vp8_horizontal_line_5_4_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) { unsigned i; unsigned int a, b, c, d, e; unsigned char *des = dest; const unsigned char *src = source; (void)dest_width; for (i = 0; i < source_width; i += 5) { a = src[0]; b = src[1]; c = src[2]; d = src[3]; e = src[4]; des[0] = (unsigned char)a; des[1] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); des[2] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); des[3] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); src += 5; des += 4; } } void vp8_vertical_band_5_4_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { unsigned int i; unsigned int a, b, c, d, e; unsigned char *des = dest; unsigned char *src = source; for (i = 0; i < dest_width; i++) { a = src[0 * src_pitch]; b = src[1 * src_pitch]; c = src[2 * src_pitch]; d = src[3 * src_pitch]; e = src[4 * src_pitch]; des[0 * dest_pitch] = (unsigned char)a; des[1 * dest_pitch] = (unsigned char)((b * 192 + c * 64 + 128) >> 8); des[2 * dest_pitch] = (unsigned char)((c * 128 + d * 128 + 128) >> 8); des[3 * dest_pitch] = (unsigned char)((d * 64 + e * 192 + 128) >> 8); src++; des++; } } /*7*************************************************************************** * * ROUTINE : vp8_horizontal_line_3_5_scale_c * * INPUTS : const unsigned char *source : Pointer to source data. * unsigned int source_width : Stride of source. * unsigned char *dest : Pointer to destination data. * unsigned int dest_width : Stride of dest (UNUSED). * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Copies horizontal line of pixels from source to * destination scaling up by 3 to 5. * * SPECIAL NOTES : None. * * ****************************************************************************/ void vp8_horizontal_line_5_3_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) { unsigned int i; unsigned int a, b, c, d, e; unsigned char *des = dest; const unsigned char *src = source; (void)dest_width; for (i = 0; i < source_width; i += 5) { a = src[0]; b = src[1]; c = src[2]; d = src[3]; e = src[4]; des[0] = (unsigned char)a; des[1] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); des[2] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); src += 5; des += 3; } } void vp8_vertical_band_5_3_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { unsigned int i; unsigned int a, b, c, d, e; unsigned char *des = dest; unsigned char *src = source; for (i = 0; i < dest_width; i++) { a = src[0 * src_pitch]; b = src[1 * src_pitch]; c = src[2 * src_pitch]; d = src[3 * src_pitch]; e = src[4 * src_pitch]; des[0 * dest_pitch] = (unsigned char)a; des[1 * dest_pitch] = (unsigned char)((b * 85 + c * 171 + 128) >> 8); des[2 * dest_pitch] = (unsigned char)((d * 171 + e * 85 + 128) >> 8); src++; des++; } } /**************************************************************************** * * ROUTINE : vp8_horizontal_line_1_2_scale_c * * INPUTS : const unsigned char *source : Pointer to source data. * unsigned int source_width : Stride of source. * unsigned char *dest : Pointer to destination data. * unsigned int dest_width : Stride of dest (UNUSED). * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Copies horizontal line of pixels from source to * destination scaling up by 1 to 2. * * SPECIAL NOTES : None. * ****************************************************************************/ void vp8_horizontal_line_2_1_scale_c(const unsigned char *source, unsigned int source_width, unsigned char *dest, unsigned int dest_width) { unsigned int i; unsigned int a; unsigned char *des = dest; const unsigned char *src = source; (void)dest_width; for (i = 0; i < source_width; i += 2) { a = src[0]; des[0] = (unsigned char)(a); src += 2; des += 1; } } void vp8_vertical_band_2_1_scale_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { (void)dest_pitch; (void)src_pitch; memcpy(dest, source, dest_width); } void vp8_vertical_band_2_1_scale_i_c(unsigned char *source, unsigned int src_pitch, unsigned char *dest, unsigned int dest_pitch, unsigned int dest_width) { int i; int temp; int width = dest_width; (void)dest_pitch; for (i = 0; i < width; i++) { temp = 8; temp += source[i - (int)src_pitch] * 3; temp += source[i] * 10; temp += source[i + src_pitch] * 3; temp >>= 4; dest[i] = (unsigned char)(temp); } } libvpx-1.8.2/vpx_scale/generic/vpx_scale.c000066400000000000000000000466071357355204000205770ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ /**************************************************************************** * * Module Title : scale.c * * Description : Image scaling functions. * ***************************************************************************/ /**************************************************************************** * Header Files ****************************************************************************/ #include "./vpx_scale_rtcd.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" #include "vpx_scale/yv12config.h" typedef struct { int expanded_frame_width; int expanded_frame_height; int HScale; int HRatio; int VScale; int VRatio; YV12_BUFFER_CONFIG *src_yuv_config; YV12_BUFFER_CONFIG *dst_yuv_config; } SCALE_VARS; /**************************************************************************** * * ROUTINE : scale1d_2t1_i * * INPUTS : const unsigned char *source : Pointer to data to be scaled. * int source_step : Number of pixels to step on in * source. * unsigned int source_scale : Scale for source (UNUSED). * unsigned int source_length : Length of source (UNUSED). * unsigned char *dest : Pointer to output data array. * int dest_step : Number of pixels to step on in * destination. * unsigned int dest_scale : Scale for destination * (UNUSED). * unsigned int dest_length : Length of destination. * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Performs 2-to-1 interpolated scaling. * * SPECIAL NOTES : None. * ****************************************************************************/ static void scale1d_2t1_i(const unsigned char *source, int source_step, unsigned int source_scale, unsigned int source_length, unsigned char *dest, int dest_step, unsigned int dest_scale, unsigned int dest_length) { unsigned int i, j; unsigned int temp; int source_pitch = source_step; (void)source_length; (void)source_scale; (void)dest_scale; source_step *= 2; dest[0] = source[0]; for (i = dest_step, j = source_step; i < dest_length * dest_step; i += dest_step, j += source_step) { temp = 8; temp += 3 * source[j - source_pitch]; temp += 10 * source[j]; temp += 3 * source[j + source_pitch]; temp >>= 4; dest[i] = (char)(temp); } } /**************************************************************************** * * ROUTINE : scale1d_2t1_ps * * INPUTS : const unsigned char *source : Pointer to data to be scaled. * int source_step : Number of pixels to step on in * source. * unsigned int source_scale : Scale for source (UNUSED). * unsigned int source_length : Length of source (UNUSED). * unsigned char *dest : Pointer to output data array. * int dest_step : Number of pixels to step on in * destination. * unsigned int dest_scale : Scale for destination * (UNUSED). * unsigned int dest_length : Length of destination. * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Performs 2-to-1 point subsampled scaling. * * SPECIAL NOTES : None. * ****************************************************************************/ static void scale1d_2t1_ps(const unsigned char *source, int source_step, unsigned int source_scale, unsigned int source_length, unsigned char *dest, int dest_step, unsigned int dest_scale, unsigned int dest_length) { unsigned int i, j; (void)source_length; (void)source_scale; (void)dest_scale; source_step *= 2; j = 0; for (i = 0; i < dest_length * dest_step; i += dest_step, j += source_step) dest[i] = source[j]; } /**************************************************************************** * * ROUTINE : scale1d_c * * INPUTS : const unsigned char *source : Pointer to data to be scaled. * int source_step : Number of pixels to step on in * source. * unsigned int source_scale : Scale for source. * unsigned int source_length : Length of source (UNUSED). * unsigned char *dest : Pointer to output data array. * int dest_step : Number of pixels to step on in * destination. * unsigned int dest_scale : Scale for destination. * unsigned int dest_length : Length of destination. * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Performs linear interpolation in one dimension. * * SPECIAL NOTES : None. * ****************************************************************************/ static void scale1d_c(const unsigned char *source, int source_step, unsigned int source_scale, unsigned int source_length, unsigned char *dest, int dest_step, unsigned int dest_scale, unsigned int dest_length) { unsigned int i; unsigned int round_value = dest_scale / 2; unsigned int left_modifier = dest_scale; unsigned int right_modifier = 0; unsigned char left_pixel = *source; unsigned char right_pixel = *(source + source_step); (void)source_length; /* These asserts are needed if there are boundary issues... */ /*assert ( dest_scale > source_scale );*/ /*assert ( (source_length-1) * dest_scale >= (dest_length-1) * source_scale * );*/ for (i = 0; i < dest_length * dest_step; i += dest_step) { dest[i] = (char)((left_modifier * left_pixel + right_modifier * right_pixel + round_value) / dest_scale); right_modifier += source_scale; while (right_modifier > dest_scale) { right_modifier -= dest_scale; source += source_step; left_pixel = *source; right_pixel = *(source + source_step); } left_modifier = dest_scale - right_modifier; } } /**************************************************************************** * * ROUTINE : Scale2D * * INPUTS : const unsigned char *source : Pointer to data to be * scaled. * int source_pitch : Stride of source image. * unsigned int source_width : Width of input image. * unsigned int source_height : Height of input image. * unsigned char *dest : Pointer to output data * array. * int dest_pitch : Stride of destination * image. * unsigned int dest_width : Width of destination image. * unsigned int dest_height : Height of destination * image. * unsigned char *temp_area : Pointer to temp work area. * unsigned char temp_area_height : Height of temp work area. * unsigned int hscale : Horizontal scale factor * numerator. * unsigned int hratio : Horizontal scale factor * denominator. * unsigned int vscale : Vertical scale factor * numerator. * unsigned int vratio : Vertical scale factor * denominator. * unsigned int interlaced : Interlace flag. * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Performs 2-tap linear interpolation in two dimensions. * * SPECIAL NOTES : Expansion is performed one band at a time to help with * caching. * ****************************************************************************/ static void Scale2D( /*const*/ unsigned char *source, int source_pitch, unsigned int source_width, unsigned int source_height, unsigned char *dest, int dest_pitch, unsigned int dest_width, unsigned int dest_height, unsigned char *temp_area, unsigned char temp_area_height, unsigned int hscale, unsigned int hratio, unsigned int vscale, unsigned int vratio, unsigned int interlaced) { /*unsigned*/ int i, j, k; int bands; int dest_band_height; int source_band_height; typedef void (*Scale1D)(const unsigned char *source, int source_step, unsigned int source_scale, unsigned int source_length, unsigned char *dest, int dest_step, unsigned int dest_scale, unsigned int dest_length); Scale1D Scale1Dv = scale1d_c; Scale1D Scale1Dh = scale1d_c; void (*horiz_line_scale)(const unsigned char *, unsigned int, unsigned char *, unsigned int) = NULL; void (*vert_band_scale)(unsigned char *, unsigned int, unsigned char *, unsigned int, unsigned int) = NULL; int ratio_scalable = 1; int interpolation = 0; unsigned char *source_base; unsigned char *line_src; source_base = (unsigned char *)source; if (source_pitch < 0) { int offset; offset = (source_height - 1); offset *= source_pitch; source_base += offset; } /* find out the ratio for each direction */ switch (hratio * 10 / hscale) { case 8: /* 4-5 Scale in Width direction */ horiz_line_scale = vp8_horizontal_line_5_4_scale; break; case 6: /* 3-5 Scale in Width direction */ horiz_line_scale = vp8_horizontal_line_5_3_scale; break; case 5: /* 1-2 Scale in Width direction */ horiz_line_scale = vp8_horizontal_line_2_1_scale; break; default: /* The ratio is not acceptable now */ /* throw("The ratio is not acceptable for now!"); */ ratio_scalable = 0; break; } switch (vratio * 10 / vscale) { case 8: /* 4-5 Scale in vertical direction */ vert_band_scale = vp8_vertical_band_5_4_scale; source_band_height = 5; dest_band_height = 4; break; case 6: /* 3-5 Scale in vertical direction */ vert_band_scale = vp8_vertical_band_5_3_scale; source_band_height = 5; dest_band_height = 3; break; case 5: /* 1-2 Scale in vertical direction */ if (interlaced) { /* if the content is interlaced, point sampling is used */ vert_band_scale = vp8_vertical_band_2_1_scale; } else { interpolation = 1; /* if the content is progressive, interplo */ vert_band_scale = vp8_vertical_band_2_1_scale_i; } source_band_height = 2; dest_band_height = 1; break; default: /* The ratio is not acceptable now */ /* throw("The ratio is not acceptable for now!"); */ ratio_scalable = 0; break; } if (ratio_scalable) { if (source_height == dest_height) { /* for each band of the image */ for (k = 0; k < (int)dest_height; k++) { horiz_line_scale(source, source_width, dest, dest_width); source += source_pitch; dest += dest_pitch; } return; } if (interpolation) { if (source < source_base) source = source_base; horiz_line_scale(source, source_width, temp_area, dest_width); } for (k = 0; k < (int)(dest_height + dest_band_height - 1) / dest_band_height; k++) { /* scale one band horizontally */ for (i = 0; i < source_band_height; i++) { /* Trap case where we could read off the base of the source buffer */ line_src = (unsigned char *)source + i * source_pitch; if (line_src < source_base) line_src = source_base; horiz_line_scale(line_src, source_width, temp_area + (i + 1) * dest_pitch, dest_width); } /* Vertical scaling is in place */ vert_band_scale(temp_area + dest_pitch, dest_pitch, dest, dest_pitch, dest_width); if (interpolation) memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_width); /* Next band... */ source += (unsigned long)source_band_height * source_pitch; dest += (unsigned long)dest_band_height * dest_pitch; } return; } if (hscale == 2 && hratio == 1) Scale1Dh = scale1d_2t1_ps; if (vscale == 2 && vratio == 1) { if (interlaced) Scale1Dv = scale1d_2t1_ps; else Scale1Dv = scale1d_2t1_i; } if (source_height == dest_height) { /* for each band of the image */ for (k = 0; k < (int)dest_height; k++) { Scale1Dh(source, 1, hscale, source_width + 1, dest, 1, hratio, dest_width); source += source_pitch; dest += dest_pitch; } return; } if (dest_height > source_height) { dest_band_height = temp_area_height - 1; source_band_height = dest_band_height * source_height / dest_height; } else { source_band_height = temp_area_height - 1; dest_band_height = source_band_height * vratio / vscale; } /* first row needs to be done so that we can stay one row ahead for vertical * zoom */ Scale1Dh(source, 1, hscale, source_width + 1, temp_area, 1, hratio, dest_width); /* for each band of the image */ bands = (dest_height + dest_band_height - 1) / dest_band_height; for (k = 0; k < bands; k++) { /* scale one band horizontally */ for (i = 1; i < source_band_height + 1; i++) { if (k * source_band_height + i < (int)source_height) { Scale1Dh(source + i * source_pitch, 1, hscale, source_width + 1, temp_area + i * dest_pitch, 1, hratio, dest_width); } else { /* Duplicate the last row */ /* copy temp_area row 0 over from last row in the past */ memcpy(temp_area + i * dest_pitch, temp_area + (i - 1) * dest_pitch, dest_pitch); } } /* scale one band vertically */ for (j = 0; j < (int)dest_width; j++) { Scale1Dv(&temp_area[j], dest_pitch, vscale, source_band_height + 1, &dest[j], dest_pitch, vratio, dest_band_height); } /* copy temp_area row 0 over from last row in the past */ memcpy(temp_area, temp_area + source_band_height * dest_pitch, dest_pitch); /* move to the next band */ source += source_band_height * source_pitch; dest += dest_band_height * dest_pitch; } } /**************************************************************************** * * ROUTINE : vpx_scale_frame * * INPUTS : YV12_BUFFER_CONFIG *src : Pointer to frame to be * scaled. * YV12_BUFFER_CONFIG *dst : Pointer to buffer to hold * scaled frame. * unsigned char *temp_area : Pointer to temp work area. * unsigned char temp_area_height : Height of temp work area. * unsigned int hscale : Horizontal scale factor * numerator. * unsigned int hratio : Horizontal scale factor * denominator. * unsigned int vscale : Vertical scale factor * numerator. * unsigned int vratio : Vertical scale factor * denominator. * unsigned int interlaced : Interlace flag. * * OUTPUTS : None. * * RETURNS : void * * FUNCTION : Performs 2-tap linear interpolation in two dimensions. * * SPECIAL NOTES : Expansion is performed one band at a time to help with * caching. * ****************************************************************************/ void vpx_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, unsigned char *temp_area, unsigned char temp_height, unsigned int hscale, unsigned int hratio, unsigned int vscale, unsigned int vratio, unsigned int interlaced) { int i; int dw = (hscale - 1 + src->y_width * hratio) / hscale; int dh = (vscale - 1 + src->y_height * vratio) / vscale; /* call our internal scaling routines!! */ Scale2D((unsigned char *)src->y_buffer, src->y_stride, src->y_width, src->y_height, (unsigned char *)dst->y_buffer, dst->y_stride, dw, dh, temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); if (dw < (int)dst->y_width) for (i = 0; i < dh; i++) memset(dst->y_buffer + i * dst->y_stride + dw - 1, dst->y_buffer[i * dst->y_stride + dw - 2], dst->y_width - dw + 1); if (dh < (int)dst->y_height) for (i = dh - 1; i < (int)dst->y_height; i++) memcpy(dst->y_buffer + i * dst->y_stride, dst->y_buffer + (dh - 2) * dst->y_stride, dst->y_width + 1); Scale2D((unsigned char *)src->u_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *)dst->u_buffer, dst->uv_stride, dw / 2, dh / 2, temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) memset(dst->u_buffer + i * dst->uv_stride + dw / 2 - 1, dst->u_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int)dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) memcpy(dst->u_buffer + i * dst->uv_stride, dst->u_buffer + (dh / 2 - 2) * dst->uv_stride, dst->uv_width); Scale2D((unsigned char *)src->v_buffer, src->uv_stride, src->uv_width, src->uv_height, (unsigned char *)dst->v_buffer, dst->uv_stride, dw / 2, dh / 2, temp_area, temp_height, hscale, hratio, vscale, vratio, interlaced); if (dw / 2 < (int)dst->uv_width) for (i = 0; i < dst->uv_height; i++) memset(dst->v_buffer + i * dst->uv_stride + dw / 2 - 1, dst->v_buffer[i * dst->uv_stride + dw / 2 - 2], dst->uv_width - dw / 2 + 1); if (dh / 2 < (int)dst->uv_height) for (i = dh / 2 - 1; i < (int)dst->y_height / 2; i++) memcpy(dst->v_buffer + i * dst->uv_stride, dst->v_buffer + (dh / 2 - 2) * dst->uv_stride, dst->uv_width); } libvpx-1.8.2/vpx_scale/generic/yv12config.c000066400000000000000000000247521357355204000205770ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #if defined(VPX_MAX_ALLOCABLE_MEMORY) #include "vp9/common/vp9_onyxc_int.h" #endif // VPX_MAX_ALLOCABLE_MEMORY /**************************************************************************** * Exports ****************************************************************************/ /**************************************************************************** * ****************************************************************************/ #define yv12_align_addr(addr, align) \ (void *)(((size_t)(addr) + ((align)-1)) & (size_t) - (align)) int vp8_yv12_de_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf) { if (ybf) { // If libvpx is using frame buffer callbacks then buffer_alloc_sz must // not be set. if (ybf->buffer_alloc_sz > 0) { vpx_free(ybf->buffer_alloc); } /* buffer_alloc isn't accessed by most functions. Rather y_buffer, u_buffer and v_buffer point to buffer_alloc and are used. Clear out all of this so that a freed pointer isn't inadvertently used */ memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); } else { return -1; } return 0; } int vp8_yv12_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { if (ybf) { int aligned_width = (width + 15) & ~15; int aligned_height = (height + 15) & ~15; int y_stride = ((aligned_width + 2 * border) + 31) & ~31; int yplane_size = (aligned_height + 2 * border) * y_stride; int uv_width = aligned_width >> 1; int uv_height = aligned_height >> 1; /** There is currently a bunch of code which assumes * uv_stride == y_stride/2, so enforce this here. */ int uv_stride = y_stride >> 1; int uvplane_size = (uv_height + border) * uv_stride; const size_t frame_size = yplane_size + 2 * uvplane_size; if (!ybf->buffer_alloc) { ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, frame_size); #if defined(__has_feature) #if __has_feature(memory_sanitizer) // This memset is needed for fixing the issue of using uninitialized // value in msan test. It will cause a perf loss, so only do this for // msan test. memset(ybf->buffer_alloc, 0, frame_size); #endif #endif ybf->buffer_alloc_sz = frame_size; } if (!ybf->buffer_alloc || ybf->buffer_alloc_sz < frame_size) return -1; /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) return -3; ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + 1) / 2; ybf->uv_crop_height = (height + 1) / 2; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->alpha_width = 0; ybf->alpha_height = 0; ybf->alpha_stride = 0; ybf->border = border; ybf->frame_size = frame_size; ybf->y_buffer = ybf->buffer_alloc + (border * y_stride) + border; ybf->u_buffer = ybf->buffer_alloc + yplane_size + (border / 2 * uv_stride) + border / 2; ybf->v_buffer = ybf->buffer_alloc + yplane_size + uvplane_size + (border / 2 * uv_stride) + border / 2; ybf->alpha_buffer = NULL; ybf->corrupted = 0; /* assume not currupted by errors */ return 0; } return -2; } int vp8_yv12_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int border) { if (ybf) { vp8_yv12_de_alloc_frame_buffer(ybf); return vp8_yv12_realloc_frame_buffer(ybf, width, height, border); } return -2; } #if CONFIG_VP9 // TODO(jkoleszar): Maybe replace this with struct vpx_image int vpx_free_frame_buffer(YV12_BUFFER_CONFIG *ybf) { if (ybf) { if (ybf->buffer_alloc_sz > 0) { vpx_free(ybf->buffer_alloc); } /* buffer_alloc isn't accessed by most functions. Rather y_buffer, u_buffer and v_buffer point to buffer_alloc and are used. Clear out all of this so that a freed pointer isn't inadvertently used */ memset(ybf, 0, sizeof(YV12_BUFFER_CONFIG)); } else { return -1; } return 0; } int vpx_realloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int ss_x, int ss_y, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border, int byte_alignment, vpx_codec_frame_buffer_t *fb, vpx_get_frame_buffer_cb_fn_t cb, void *cb_priv) { #if CONFIG_SIZE_LIMIT if (width > DECODE_WIDTH_LIMIT || height > DECODE_HEIGHT_LIMIT) return -1; #endif /* Only support allocating buffers that have a border that's a multiple * of 32. The border restriction is required to get 16-byte alignment of * the start of the chroma rows without introducing an arbitrary gap * between planes, which would break the semantics of things like * vpx_img_set_rect(). */ if (border & 0x1f) return -3; if (ybf) { const int vp9_byte_align = (byte_alignment == 0) ? 1 : byte_alignment; const int aligned_width = (width + 7) & ~7; const int aligned_height = (height + 7) & ~7; const int y_stride = ((aligned_width + 2 * border) + 31) & ~31; const uint64_t yplane_size = (aligned_height + 2 * border) * (uint64_t)y_stride + byte_alignment; const int uv_width = aligned_width >> ss_x; const int uv_height = aligned_height >> ss_y; const int uv_stride = y_stride >> ss_x; const int uv_border_w = border >> ss_x; const int uv_border_h = border >> ss_y; const uint64_t uvplane_size = (uv_height + 2 * uv_border_h) * (uint64_t)uv_stride + byte_alignment; #if CONFIG_VP9_HIGHBITDEPTH const uint64_t frame_size = (1 + use_highbitdepth) * (yplane_size + 2 * uvplane_size); #else const uint64_t frame_size = yplane_size + 2 * uvplane_size; #endif // CONFIG_VP9_HIGHBITDEPTH uint8_t *buf = NULL; #if defined(VPX_MAX_ALLOCABLE_MEMORY) // The decoder may allocate REF_FRAMES frame buffers in the frame buffer // pool. Bound the total amount of allocated memory as if these REF_FRAMES // frame buffers were allocated in a single allocation. if (frame_size > VPX_MAX_ALLOCABLE_MEMORY / REF_FRAMES) return -1; #endif // VPX_MAX_ALLOCABLE_MEMORY // frame_size is stored in buffer_alloc_sz, which is a size_t. If it won't // fit, fail early. if (frame_size > SIZE_MAX) { return -1; } if (cb != NULL) { const int align_addr_extra_size = 31; const uint64_t external_frame_size = frame_size + align_addr_extra_size; assert(fb != NULL); if (external_frame_size != (size_t)external_frame_size) return -1; // Allocation to hold larger frame, or first allocation. if (cb(cb_priv, (size_t)external_frame_size, fb) < 0) return -1; if (fb->data == NULL || fb->size < external_frame_size) return -1; ybf->buffer_alloc = (uint8_t *)yv12_align_addr(fb->data, 32); #if defined(__has_feature) #if __has_feature(memory_sanitizer) // This memset is needed for fixing the issue of using uninitialized // value in msan test. It will cause a perf loss, so only do this for // msan test. memset(ybf->buffer_alloc, 0, (size_t)frame_size); #endif #endif } else if (frame_size > ybf->buffer_alloc_sz) { // Allocation to hold larger frame, or first allocation. vpx_free(ybf->buffer_alloc); ybf->buffer_alloc = NULL; ybf->buffer_alloc_sz = 0; ybf->buffer_alloc = (uint8_t *)vpx_memalign(32, (size_t)frame_size); if (!ybf->buffer_alloc) return -1; ybf->buffer_alloc_sz = (size_t)frame_size; // This memset is needed for fixing valgrind error from C loop filter // due to access uninitialized memory in frame border. It could be // removed if border is totally removed. memset(ybf->buffer_alloc, 0, ybf->buffer_alloc_sz); } ybf->y_crop_width = width; ybf->y_crop_height = height; ybf->y_width = aligned_width; ybf->y_height = aligned_height; ybf->y_stride = y_stride; ybf->uv_crop_width = (width + ss_x) >> ss_x; ybf->uv_crop_height = (height + ss_y) >> ss_y; ybf->uv_width = uv_width; ybf->uv_height = uv_height; ybf->uv_stride = uv_stride; ybf->border = border; ybf->frame_size = (size_t)frame_size; ybf->subsampling_x = ss_x; ybf->subsampling_y = ss_y; buf = ybf->buffer_alloc; #if CONFIG_VP9_HIGHBITDEPTH if (use_highbitdepth) { // Store uint16 addresses when using 16bit framebuffers buf = CONVERT_TO_BYTEPTR(ybf->buffer_alloc); ybf->flags = YV12_FLAG_HIGHBITDEPTH; } else { ybf->flags = 0; } #endif // CONFIG_VP9_HIGHBITDEPTH ybf->y_buffer = (uint8_t *)yv12_align_addr( buf + (border * y_stride) + border, vp9_byte_align); ybf->u_buffer = (uint8_t *)yv12_align_addr( buf + yplane_size + (uv_border_h * uv_stride) + uv_border_w, vp9_byte_align); ybf->v_buffer = (uint8_t *)yv12_align_addr(buf + yplane_size + uvplane_size + (uv_border_h * uv_stride) + uv_border_w, vp9_byte_align); ybf->corrupted = 0; /* assume not corrupted by errors */ return 0; } return -2; } int vpx_alloc_frame_buffer(YV12_BUFFER_CONFIG *ybf, int width, int height, int ss_x, int ss_y, #if CONFIG_VP9_HIGHBITDEPTH int use_highbitdepth, #endif int border, int byte_alignment) { if (ybf) { vpx_free_frame_buffer(ybf); return vpx_realloc_frame_buffer(ybf, width, height, ss_x, ss_y, #if CONFIG_VP9_HIGHBITDEPTH use_highbitdepth, #endif border, byte_alignment, NULL, NULL, NULL); } return -2; } #endif libvpx-1.8.2/vpx_scale/generic/yv12extend.c000066400000000000000000000247351357355204000206220ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "./vpx_scale_rtcd.h" #include "vpx/vpx_integer.h" #include "vpx_mem/vpx_mem.h" #include "vpx_ports/mem.h" #include "vpx_scale/yv12config.h" #if CONFIG_VP9_HIGHBITDEPTH #include "vp9/common/vp9_common.h" #endif static void extend_plane(uint8_t *const src, int src_stride, int width, int height, int extend_top, int extend_left, int extend_bottom, int extend_right) { int i; const int linesize = extend_left + extend_right + width; /* copy the left and right most columns out */ uint8_t *src_ptr1 = src; uint8_t *src_ptr2 = src + width - 1; uint8_t *dst_ptr1 = src - extend_left; uint8_t *dst_ptr2 = src + width; for (i = 0; i < height; ++i) { memset(dst_ptr1, src_ptr1[0], extend_left); memset(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_stride; src_ptr2 += src_stride; dst_ptr1 += src_stride; dst_ptr2 += src_stride; } /* Now copy the top and bottom lines into each line of the respective * borders */ src_ptr1 = src - extend_left; src_ptr2 = src + src_stride * (height - 1) - extend_left; dst_ptr1 = src + src_stride * -extend_top - extend_left; dst_ptr2 = src + src_stride * height - extend_left; for (i = 0; i < extend_top; ++i) { memcpy(dst_ptr1, src_ptr1, linesize); dst_ptr1 += src_stride; } for (i = 0; i < extend_bottom; ++i) { memcpy(dst_ptr2, src_ptr2, linesize); dst_ptr2 += src_stride; } } #if CONFIG_VP9_HIGHBITDEPTH static void extend_plane_high(uint8_t *const src8, int src_stride, int width, int height, int extend_top, int extend_left, int extend_bottom, int extend_right) { int i; const int linesize = extend_left + extend_right + width; uint16_t *src = CONVERT_TO_SHORTPTR(src8); /* copy the left and right most columns out */ uint16_t *src_ptr1 = src; uint16_t *src_ptr2 = src + width - 1; uint16_t *dst_ptr1 = src - extend_left; uint16_t *dst_ptr2 = src + width; for (i = 0; i < height; ++i) { vpx_memset16(dst_ptr1, src_ptr1[0], extend_left); vpx_memset16(dst_ptr2, src_ptr2[0], extend_right); src_ptr1 += src_stride; src_ptr2 += src_stride; dst_ptr1 += src_stride; dst_ptr2 += src_stride; } /* Now copy the top and bottom lines into each line of the respective * borders */ src_ptr1 = src - extend_left; src_ptr2 = src + src_stride * (height - 1) - extend_left; dst_ptr1 = src + src_stride * -extend_top - extend_left; dst_ptr2 = src + src_stride * height - extend_left; for (i = 0; i < extend_top; ++i) { memcpy(dst_ptr1, src_ptr1, linesize * sizeof(uint16_t)); dst_ptr1 += src_stride; } for (i = 0; i < extend_bottom; ++i) { memcpy(dst_ptr2, src_ptr2, linesize * sizeof(uint16_t)); dst_ptr2 += src_stride; } } #endif void vp8_yv12_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { const int uv_border = ybf->border / 2; assert(ybf->border % 2 == 0); assert(ybf->y_height - ybf->y_crop_height < 16); assert(ybf->y_width - ybf->y_crop_width < 16); assert(ybf->y_height - ybf->y_crop_height >= 0); assert(ybf->y_width - ybf->y_crop_width >= 0); extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height, ybf->border, ybf->border, ybf->border + ybf->y_height - ybf->y_crop_height, ybf->border + ybf->y_width - ybf->y_crop_width); extend_plane(ybf->u_buffer, ybf->uv_stride, ybf->uv_crop_width, ybf->uv_crop_height, uv_border, uv_border, uv_border + ybf->uv_height - ybf->uv_crop_height, uv_border + ybf->uv_width - ybf->uv_crop_width); extend_plane(ybf->v_buffer, ybf->uv_stride, ybf->uv_crop_width, ybf->uv_crop_height, uv_border, uv_border, uv_border + ybf->uv_height - ybf->uv_crop_height, uv_border + ybf->uv_width - ybf->uv_crop_width); } #if CONFIG_VP9 static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { const int c_w = ybf->uv_crop_width; const int c_h = ybf->uv_crop_height; const int ss_x = ybf->uv_width < ybf->y_width; const int ss_y = ybf->uv_height < ybf->y_height; const int c_et = ext_size >> ss_y; const int c_el = ext_size >> ss_x; const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height; const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width; assert(ybf->y_height - ybf->y_crop_height < 16); assert(ybf->y_width - ybf->y_crop_width < 16); assert(ybf->y_height - ybf->y_crop_height >= 0); assert(ybf->y_width - ybf->y_crop_width >= 0); #if CONFIG_VP9_HIGHBITDEPTH if (ybf->flags & YV12_FLAG_HIGHBITDEPTH) { extend_plane_high(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height, ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height, ext_size + ybf->y_width - ybf->y_crop_width); extend_plane_high(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); extend_plane_high(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); return; } #endif extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height, ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height, ext_size + ybf->y_width - ybf->y_crop_width); extend_plane(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); extend_plane(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); } void vpx_extend_frame_borders_c(YV12_BUFFER_CONFIG *ybf) { extend_frame(ybf, ybf->border); } void vpx_extend_frame_inner_borders_c(YV12_BUFFER_CONFIG *ybf) { const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? VP9INNERBORDERINPIXELS : ybf->border; extend_frame(ybf, inner_bw); } #if CONFIG_VP9_HIGHBITDEPTH static void memcpy_short_addr(uint8_t *dst8, const uint8_t *src8, int num) { uint16_t *dst = CONVERT_TO_SHORTPTR(dst8); uint16_t *src = CONVERT_TO_SHORTPTR(src8); memcpy(dst, src, num * sizeof(uint16_t)); } #endif // CONFIG_VP9_HIGHBITDEPTH #endif // CONFIG_VP9 // Copies the source image into the destination image and updates the // destination's UMV borders. // Note: The frames are assumed to be identical in size. void vp8_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { int row; const uint8_t *src = src_ybc->y_buffer; uint8_t *dst = dst_ybc->y_buffer; #if 0 /* These assertions are valid in the codec, but the libvpx-tester uses * this code slightly differently. */ assert(src_ybc->y_width == dst_ybc->y_width); assert(src_ybc->y_height == dst_ybc->y_height); #endif for (row = 0; row < src_ybc->y_height; ++row) { memcpy(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } src = src_ybc->u_buffer; dst = dst_ybc->u_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } src = src_ybc->v_buffer; dst = dst_ybc->v_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } vp8_yv12_extend_frame_borders_c(dst_ybc); } #if CONFIG_VP9 void vpx_yv12_copy_frame_c(const YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { int row; const uint8_t *src = src_ybc->y_buffer; uint8_t *dst = dst_ybc->y_buffer; #if 0 /* These assertions are valid in the codec, but the libvpx-tester uses * this code slightly differently. */ assert(src_ybc->y_width == dst_ybc->y_width); assert(src_ybc->y_height == dst_ybc->y_height); #endif #if CONFIG_VP9_HIGHBITDEPTH if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { assert(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH); for (row = 0; row < src_ybc->y_height; ++row) { memcpy_short_addr(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } src = src_ybc->u_buffer; dst = dst_ybc->u_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy_short_addr(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } src = src_ybc->v_buffer; dst = dst_ybc->v_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy_short_addr(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } vpx_extend_frame_borders_c(dst_ybc); return; } else { assert(!(dst_ybc->flags & YV12_FLAG_HIGHBITDEPTH)); } #endif for (row = 0; row < src_ybc->y_height; ++row) { memcpy(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } src = src_ybc->u_buffer; dst = dst_ybc->u_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } src = src_ybc->v_buffer; dst = dst_ybc->v_buffer; for (row = 0; row < src_ybc->uv_height; ++row) { memcpy(dst, src, src_ybc->uv_width); src += src_ybc->uv_stride; dst += dst_ybc->uv_stride; } vpx_extend_frame_borders_c(dst_ybc); } #endif // CONFIG_VP9 void vpx_yv12_copy_y_c(const YV12_BUFFER_CONFIG *src_ybc, YV12_BUFFER_CONFIG *dst_ybc) { int row; const uint8_t *src = src_ybc->y_buffer; uint8_t *dst = dst_ybc->y_buffer; #if CONFIG_VP9_HIGHBITDEPTH if (src_ybc->flags & YV12_FLAG_HIGHBITDEPTH) { const uint16_t *src16 = CONVERT_TO_SHORTPTR(src); uint16_t *dst16 = CONVERT_TO_SHORTPTR(dst); for (row = 0; row < src_ybc->y_height; ++row) { memcpy(dst16, src16, src_ybc->y_width * sizeof(uint16_t)); src16 += src_ybc->y_stride; dst16 += dst_ybc->y_stride; } return; } #endif for (row = 0; row < src_ybc->y_height; ++row) { memcpy(dst, src, src_ybc->y_width); src += src_ybc->y_stride; dst += dst_ybc->y_stride; } } libvpx-1.8.2/vpx_scale/mips/000077500000000000000000000000001357355204000157665ustar00rootroot00000000000000libvpx-1.8.2/vpx_scale/mips/dspr2/000077500000000000000000000000001357355204000170205ustar00rootroot00000000000000libvpx-1.8.2/vpx_scale/mips/dspr2/yv12extend_dspr2.c000066400000000000000000000107451357355204000223160ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./vpx_config.h" #include "vpx_scale/yv12config.h" #include "vpx_mem/vpx_mem.h" #include "vpx_scale/vpx_scale.h" #if HAVE_DSPR2 static void extend_plane(uint8_t *const src, int src_stride, int width, int height, int extend_top, int extend_left, int extend_bottom, int extend_right) { int i, j; uint8_t *left_src, *right_src; uint8_t *left_dst_start, *right_dst_start; uint8_t *left_dst, *right_dst; uint8_t *top_src, *bot_src; uint8_t *top_dst, *bot_dst; uint32_t left_pix; uint32_t right_pix; uint32_t linesize; /* copy the left and right most columns out */ left_src = src; right_src = src + width - 1; left_dst_start = src - extend_left; right_dst_start = src + width; for (i = height; i--;) { left_dst = left_dst_start; right_dst = right_dst_start; __asm__ __volatile__( "lb %[left_pix], 0(%[left_src]) \n\t" "lb %[right_pix], 0(%[right_src]) \n\t" "replv.qb %[left_pix], %[left_pix] \n\t" "replv.qb %[right_pix], %[right_pix] \n\t" : [left_pix] "=&r"(left_pix), [right_pix] "=&r"(right_pix) : [left_src] "r"(left_src), [right_src] "r"(right_src)); for (j = extend_left / 4; j--;) { __asm__ __volatile__( "sw %[left_pix], 0(%[left_dst]) \n\t" "sw %[right_pix], 0(%[right_dst]) \n\t" : : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix), [right_dst] "r"(right_dst), [right_pix] "r"(right_pix)); left_dst += 4; right_dst += 4; } for (j = extend_left % 4; j--;) { __asm__ __volatile__( "sb %[left_pix], 0(%[left_dst]) \n\t" "sb %[right_pix], 0(%[right_dst]) \n\t" : : [left_dst] "r"(left_dst), [left_pix] "r"(left_pix), [right_dst] "r"(right_dst), [right_pix] "r"(right_pix)); left_dst += 1; right_dst += 1; } left_src += src_stride; right_src += src_stride; left_dst_start += src_stride; right_dst_start += src_stride; } /* Now copy the top and bottom lines into each line of the respective * borders */ top_src = src - extend_left; bot_src = src + src_stride * (height - 1) - extend_left; top_dst = src + src_stride * (-extend_top) - extend_left; bot_dst = src + src_stride * (height)-extend_left; linesize = extend_left + extend_right + width; for (i = 0; i < extend_top; i++) { memcpy(top_dst, top_src, linesize); top_dst += src_stride; } for (i = 0; i < extend_bottom; i++) { memcpy(bot_dst, bot_src, linesize); bot_dst += src_stride; } } static void extend_frame(YV12_BUFFER_CONFIG *const ybf, int ext_size) { const int c_w = ybf->uv_crop_width; const int c_h = ybf->uv_crop_height; const int ss_x = ybf->uv_width < ybf->y_width; const int ss_y = ybf->uv_height < ybf->y_height; const int c_et = ext_size >> ss_y; const int c_el = ext_size >> ss_x; const int c_eb = c_et + ybf->uv_height - ybf->uv_crop_height; const int c_er = c_el + ybf->uv_width - ybf->uv_crop_width; assert(ybf->y_height - ybf->y_crop_height < 16); assert(ybf->y_width - ybf->y_crop_width < 16); assert(ybf->y_height - ybf->y_crop_height >= 0); assert(ybf->y_width - ybf->y_crop_width >= 0); extend_plane(ybf->y_buffer, ybf->y_stride, ybf->y_crop_width, ybf->y_crop_height, ext_size, ext_size, ext_size + ybf->y_height - ybf->y_crop_height, ext_size + ybf->y_width - ybf->y_crop_width); extend_plane(ybf->u_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); extend_plane(ybf->v_buffer, ybf->uv_stride, c_w, c_h, c_et, c_el, c_eb, c_er); } void vpx_extend_frame_borders_dspr2(YV12_BUFFER_CONFIG *ybf) { extend_frame(ybf, ybf->border); } void vpx_extend_frame_inner_borders_dspr2(YV12_BUFFER_CONFIG *ybf) { const int inner_bw = (ybf->border > VP9INNERBORDERINPIXELS) ? VP9INNERBORDERINPIXELS : ybf->border; extend_frame(ybf, inner_bw); } #endif libvpx-1.8.2/vpx_scale/vpx_scale.h000066400000000000000000000016161357355204000171570ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_SCALE_VPX_SCALE_H_ #define VPX_VPX_SCALE_VPX_SCALE_H_ #include "vpx_scale/yv12config.h" extern void vpx_scale_frame(YV12_BUFFER_CONFIG *src, YV12_BUFFER_CONFIG *dst, unsigned char *temp_area, unsigned char temp_height, unsigned int hscale, unsigned int hratio, unsigned int vscale, unsigned int vratio, unsigned int interlaced); #endif // VPX_VPX_SCALE_VPX_SCALE_H_ libvpx-1.8.2/vpx_scale/vpx_scale.mk000066400000000000000000000011121357355204000173260ustar00rootroot00000000000000SCALE_SRCS-yes += vpx_scale.mk SCALE_SRCS-yes += yv12config.h SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += vpx_scale.h SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/vpx_scale.c SCALE_SRCS-yes += generic/yv12config.c SCALE_SRCS-yes += generic/yv12extend.c SCALE_SRCS-$(CONFIG_SPATIAL_RESAMPLING) += generic/gen_scalers.c SCALE_SRCS-yes += vpx_scale_rtcd.c SCALE_SRCS-yes += vpx_scale_rtcd.pl #mips(dspr2) SCALE_SRCS-$(HAVE_DSPR2) += mips/dspr2/yv12extend_dspr2.c SCALE_SRCS-no += $(SCALE_SRCS_REMOVE-yes) $(eval $(call rtcd_h_template,vpx_scale_rtcd,vpx_scale/vpx_scale_rtcd.pl)) libvpx-1.8.2/vpx_scale/vpx_scale_rtcd.c000066400000000000000000000010701357355204000201600ustar00rootroot00000000000000/* * Copyright (c) 2011 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpx_config.h" #define RTCD_C #include "./vpx_scale_rtcd.h" #include "vpx_ports/vpx_once.h" void vpx_scale_rtcd() { once(setup_rtcd_internal); } libvpx-1.8.2/vpx_scale/vpx_scale_rtcd.pl000066400000000000000000000047651357355204000203670ustar00rootroot00000000000000## ## Copyright (c) 2017 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## sub vpx_scale_forward_decls() { print < #include "./vpx_config.h" #include "vpx/vpx_integer.h" #if defined(__GNUC__) #define LOCAL_GCC_VERSION ((__GNUC__ << 8) | __GNUC_MINOR__) #define LOCAL_GCC_PREREQ(maj, min) (LOCAL_GCC_VERSION >= (((maj) << 8) | (min))) #else #define LOCAL_GCC_VERSION 0 #define LOCAL_GCC_PREREQ(maj, min) 0 #endif // handle clang compatibility #ifndef __has_builtin #define __has_builtin(x) 0 #endif // some endian fix (e.g.: mips-gcc doesn't define __BIG_ENDIAN__) #if !defined(WORDS_BIGENDIAN) && \ (defined(__BIG_ENDIAN__) || defined(_M_PPC) || \ (defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))) #define WORDS_BIGENDIAN #endif #if defined(WORDS_BIGENDIAN) #define HToLE32 BSwap32 #define HToLE16 BSwap16 #define HToBE64(x) (x) #define HToBE32(x) (x) #else #define HToLE32(x) (x) #define HToLE16(x) (x) #define HToBE64(X) BSwap64(X) #define HToBE32(X) BSwap32(X) #endif #if LOCAL_GCC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) #define HAVE_BUILTIN_BSWAP16 #endif #if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap32) #define HAVE_BUILTIN_BSWAP32 #endif #if LOCAL_GCC_PREREQ(4, 3) || __has_builtin(__builtin_bswap64) #define HAVE_BUILTIN_BSWAP64 #endif #if HAVE_MIPS32 && defined(__mips__) && !defined(__mips64) && \ defined(__mips_isa_rev) && (__mips_isa_rev >= 2) && (__mips_isa_rev < 6) #define VPX_USE_MIPS32_R2 #endif static INLINE uint16_t BSwap16(uint16_t x) { #if defined(HAVE_BUILTIN_BSWAP16) return __builtin_bswap16(x); #elif defined(_MSC_VER) return _byteswap_ushort(x); #else // gcc will recognize a 'rorw $8, ...' here: return (x >> 8) | ((x & 0xff) << 8); #endif // HAVE_BUILTIN_BSWAP16 } static INLINE uint32_t BSwap32(uint32_t x) { #if defined(VPX_USE_MIPS32_R2) uint32_t ret; __asm__ volatile( "wsbh %[ret], %[x] \n\t" "rotr %[ret], %[ret], 16 \n\t" : [ret] "=r"(ret) : [x] "r"(x)); return ret; #elif defined(HAVE_BUILTIN_BSWAP32) return __builtin_bswap32(x); #elif defined(__i386__) || defined(__x86_64__) uint32_t swapped_bytes; __asm__ volatile("bswap %0" : "=r"(swapped_bytes) : "0"(x)); return swapped_bytes; #elif defined(_MSC_VER) return (uint32_t)_byteswap_ulong(x); #else return (x >> 24) | ((x >> 8) & 0xff00) | ((x << 8) & 0xff0000) | (x << 24); #endif // HAVE_BUILTIN_BSWAP32 } static INLINE uint64_t BSwap64(uint64_t x) { #if defined(HAVE_BUILTIN_BSWAP64) return __builtin_bswap64(x); #elif defined(__x86_64__) uint64_t swapped_bytes; __asm__ volatile("bswapq %0" : "=r"(swapped_bytes) : "0"(x)); return swapped_bytes; #elif defined(_MSC_VER) return (uint64_t)_byteswap_uint64(x); #else // generic code for swapping 64-bit values (suggested by bdb@) x = ((x & 0xffffffff00000000ull) >> 32) | ((x & 0x00000000ffffffffull) << 32); x = ((x & 0xffff0000ffff0000ull) >> 16) | ((x & 0x0000ffff0000ffffull) << 16); x = ((x & 0xff00ff00ff00ff00ull) >> 8) | ((x & 0x00ff00ff00ff00ffull) << 8); return x; #endif // HAVE_BUILTIN_BSWAP64 } #endif // VPX_VPX_UTIL_ENDIAN_INL_H_ libvpx-1.8.2/vpx_util/vpx_atomics.h000066400000000000000000000101271357355204000174120ustar00rootroot00000000000000/* * Copyright (c) 2017 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_UTIL_VPX_ATOMICS_H_ #define VPX_VPX_UTIL_VPX_ATOMICS_H_ #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif // __cplusplus #if CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD // Look for built-in atomic support. We cannot use or // since neither is guaranteed to exist on both C and C++ platforms, and we need // to back the atomic type with the same type (g++ needs to be able to use // gcc-built code). g++ 6 doesn't support _Atomic as a keyword and can't use the // stdatomic.h header. Even if both and existed it's not // guaranteed that atomic_int is the same type as std::atomic_int. // See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=60932#c13. #if !defined(__has_builtin) #define __has_builtin(x) 0 // Compatibility with non-clang compilers. #endif // !defined(__has_builtin) #if (__has_builtin(__atomic_load_n)) || \ (defined(__GNUC__) && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7))) // For GCC >= 4.7 and Clang versions that support __atomic builtins, use those. #define VPX_USE_ATOMIC_BUILTINS #else // Use platform-specific asm barriers. #if defined(_MSC_VER) // TODO(pbos): This assumes that newer versions of MSVC are building with the // default /volatile:ms (or older, where this is always true. Consider adding // support for using instead of stdatomic.h when building C++11 under // MSVC. It's unclear what to do for plain C under /volatile:iso (inline asm?), // there're no explicit Interlocked* functions for only storing or loading // (presumably because volatile has historically implied that on MSVC). // // For earlier versions of MSVC or the default /volatile:ms volatile int are // acquire/release and require no barrier. #define vpx_atomic_memory_barrier() \ do { \ } while (0) #else #if VPX_ARCH_X86 || VPX_ARCH_X86_64 // Use a compiler barrier on x86, no runtime penalty. #define vpx_atomic_memory_barrier() __asm__ __volatile__("" ::: "memory") #elif VPX_ARCH_ARM #define vpx_atomic_memory_barrier() __asm__ __volatile__("dmb ish" ::: "memory") #elif VPX_ARCH_MIPS #define vpx_atomic_memory_barrier() __asm__ __volatile__("sync" ::: "memory") #else #error Unsupported architecture! #endif // VPX_ARCH_X86 || VPX_ARCH_X86_64 #endif // defined(_MSC_VER) #endif // atomic builtin availability check // These are wrapped in a struct so that they are not easily accessed directly // on any platform (to discourage programmer errors by setting values directly). // This primitive MUST be initialized using vpx_atomic_init or VPX_ATOMIC_INIT // (NOT memset) and accessed through vpx_atomic_ functions. typedef struct vpx_atomic_int { volatile int value; } vpx_atomic_int; #define VPX_ATOMIC_INIT(num) \ { num } // Initialization of an atomic int, not thread safe. static INLINE void vpx_atomic_init(vpx_atomic_int *atomic, int value) { atomic->value = value; } static INLINE void vpx_atomic_store_release(vpx_atomic_int *atomic, int value) { #if defined(VPX_USE_ATOMIC_BUILTINS) __atomic_store_n(&atomic->value, value, __ATOMIC_RELEASE); #else vpx_atomic_memory_barrier(); atomic->value = value; #endif // defined(VPX_USE_ATOMIC_BUILTINS) } static INLINE int vpx_atomic_load_acquire(const vpx_atomic_int *atomic) { #if defined(VPX_USE_ATOMIC_BUILTINS) return __atomic_load_n(&atomic->value, __ATOMIC_ACQUIRE); #else int v = atomic->value; vpx_atomic_memory_barrier(); return v; #endif // defined(VPX_USE_ATOMIC_BUILTINS) } #undef VPX_USE_ATOMIC_BUILTINS #undef vpx_atomic_memory_barrier #endif /* CONFIG_OS_SUPPORT && CONFIG_MULTITHREAD */ #ifdef __cplusplus } // extern "C" #endif // __cplusplus #endif // VPX_VPX_UTIL_VPX_ATOMICS_H_ libvpx-1.8.2/vpx_util/vpx_debug_util.c000066400000000000000000000214501357355204000200720ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include "vpx_util/vpx_debug_util.h" #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG static int frame_idx_w = 0; static int frame_idx_r = 0; void bitstream_queue_set_frame_write(int frame_idx) { frame_idx_w = frame_idx; } int bitstream_queue_get_frame_write(void) { return frame_idx_w; } void bitstream_queue_set_frame_read(int frame_idx) { frame_idx_r = frame_idx; } int bitstream_queue_get_frame_read(void) { return frame_idx_r; } #endif #if CONFIG_BITSTREAM_DEBUG #define QUEUE_MAX_SIZE 2000000 static int result_queue[QUEUE_MAX_SIZE]; static int prob_queue[QUEUE_MAX_SIZE]; static int queue_r = 0; static int queue_w = 0; static int queue_prev_w = -1; static int skip_r = 0; static int skip_w = 0; void bitstream_queue_set_skip_write(int skip) { skip_w = skip; } void bitstream_queue_set_skip_read(int skip) { skip_r = skip; } void bitstream_queue_record_write(void) { queue_prev_w = queue_w; } void bitstream_queue_reset_write(void) { queue_w = queue_prev_w; } int bitstream_queue_get_write(void) { return queue_w; } int bitstream_queue_get_read(void) { return queue_r; } void bitstream_queue_pop(int *result, int *prob) { if (!skip_r) { if (queue_w == queue_r) { printf("buffer underflow queue_w %d queue_r %d\n", queue_w, queue_r); assert(0); } *result = result_queue[queue_r]; *prob = prob_queue[queue_r]; queue_r = (queue_r + 1) % QUEUE_MAX_SIZE; } } void bitstream_queue_push(int result, const int prob) { if (!skip_w) { result_queue[queue_w] = result; prob_queue[queue_w] = prob; queue_w = (queue_w + 1) % QUEUE_MAX_SIZE; if (queue_w == queue_r) { printf("buffer overflow queue_w %d queue_r %d\n", queue_w, queue_r); assert(0); } } } #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_MISMATCH_DEBUG static int frame_buf_idx_r = 0; static int frame_buf_idx_w = 0; #define MAX_FRAME_BUF_NUM 20 #define MAX_FRAME_STRIDE 1920 #define MAX_FRAME_HEIGHT 1080 static uint16_t frame_pre[MAX_FRAME_BUF_NUM][3] [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction only static uint16_t frame_tx[MAX_FRAME_BUF_NUM][3] [MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT]; // prediction + txfm static int frame_stride = MAX_FRAME_STRIDE; static int frame_height = MAX_FRAME_HEIGHT; static int frame_size = MAX_FRAME_STRIDE * MAX_FRAME_HEIGHT; void mismatch_move_frame_idx_w(void) { frame_buf_idx_w = (frame_buf_idx_w + 1) % MAX_FRAME_BUF_NUM; if (frame_buf_idx_w == frame_buf_idx_r) { printf("frame_buf overflow\n"); assert(0); } } void mismatch_reset_frame(int num_planes) { int plane; for (plane = 0; plane < num_planes; ++plane) { memset(frame_pre[frame_buf_idx_w][plane], 0, sizeof(frame_pre[frame_buf_idx_w][plane][0]) * frame_size); memset(frame_tx[frame_buf_idx_w][plane], 0, sizeof(frame_tx[frame_buf_idx_w][plane][0]) * frame_size); } } void mismatch_move_frame_idx_r(void) { if (frame_buf_idx_w == frame_buf_idx_r) { printf("frame_buf underflow\n"); assert(0); } frame_buf_idx_r = (frame_buf_idx_r + 1) % MAX_FRAME_BUF_NUM; } void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd) { const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; int r, c; if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { printf("frame_buf undersized\n"); assert(0); } for (r = 0; r < blk_h; ++r) { for (c = 0; c < blk_w; ++c) { frame_pre[frame_buf_idx_w][plane] [(r + pixel_r) * frame_stride + c + pixel_c] = src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; } } #if 0 { int ref_frame_idx = 3; int ref_plane = 1; int ref_pixel_c = 162; int ref_pixel_r = 16; if (frame_idx_w == ref_frame_idx && plane == ref_plane && ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { printf( "\nrecord_block_pre frame_idx %d plane %d pixel_c %d pixel_r %d blk_w" " %d blk_h %d\n", frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); } } #endif } void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd) { const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; int r, c; if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { printf("frame_buf undersized\n"); assert(0); } for (r = 0; r < blk_h; ++r) { for (c = 0; c < blk_w; ++c) { frame_tx[frame_buf_idx_w][plane] [(r + pixel_r) * frame_stride + c + pixel_c] = src16 ? src16[r * src_stride + c] : src[r * src_stride + c]; } } #if 0 { int ref_frame_idx = 3; int ref_plane = 1; int ref_pixel_c = 162; int ref_pixel_r = 16; if (frame_idx_w == ref_frame_idx && plane == ref_plane && ref_pixel_c >= pixel_c && ref_pixel_c < pixel_c + blk_w && ref_pixel_r >= pixel_r && ref_pixel_r < pixel_r + blk_h) { printf( "\nrecord_block_tx frame_idx %d plane %d pixel_c %d pixel_r %d blk_w " "%d blk_h %d\n", frame_idx_w, plane, pixel_c, pixel_r, blk_w, blk_h); } } #endif } void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd) { const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; int mismatch = 0; int r, c; if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { printf("frame_buf undersized\n"); assert(0); } for (r = 0; r < blk_h; ++r) { for (c = 0; c < blk_w; ++c) { if (frame_pre[frame_buf_idx_r][plane] [(r + pixel_r) * frame_stride + c + pixel_c] != (uint16_t)(src16 ? src16[r * src_stride + c] : src[r * src_stride + c])) { mismatch = 1; } } } if (mismatch) { int rr, cc; printf( "\ncheck_block_pre failed frame_idx %d plane %d " "pixel_c %d pixel_r " "%d blk_w %d blk_h %d\n", frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); printf("enc\n"); for (rr = 0; rr < blk_h; ++rr) { for (cc = 0; cc < blk_w; ++cc) { printf("%d ", frame_pre[frame_buf_idx_r][plane] [(rr + pixel_r) * frame_stride + cc + pixel_c]); } printf("\n"); } printf("dec\n"); for (rr = 0; rr < blk_h; ++rr) { for (cc = 0; cc < blk_w; ++cc) { printf("%d ", src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); } printf("\n"); } assert(0); } } void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd) { const uint16_t *src16 = highbd ? CONVERT_TO_SHORTPTR(src) : NULL; int mismatch = 0; int r, c; if (pixel_c + blk_w >= frame_stride || pixel_r + blk_h >= frame_height) { printf("frame_buf undersized\n"); assert(0); } for (r = 0; r < blk_h; ++r) { for (c = 0; c < blk_w; ++c) { if (frame_tx[frame_buf_idx_r][plane] [(r + pixel_r) * frame_stride + c + pixel_c] != (uint16_t)(src16 ? src16[r * src_stride + c] : src[r * src_stride + c])) { mismatch = 1; } } } if (mismatch) { int rr, cc; printf( "\ncheck_block_tx failed frame_idx %d plane %d pixel_c " "%d pixel_r " "%d blk_w %d blk_h %d\n", frame_idx_r, plane, pixel_c, pixel_r, blk_w, blk_h); printf("enc\n"); for (rr = 0; rr < blk_h; ++rr) { for (cc = 0; cc < blk_w; ++cc) { printf("%d ", frame_tx[frame_buf_idx_r][plane] [(rr + pixel_r) * frame_stride + cc + pixel_c]); } printf("\n"); } printf("dec\n"); for (rr = 0; rr < blk_h; ++rr) { for (cc = 0; cc < blk_w; ++cc) { printf("%d ", src16 ? src16[rr * src_stride + cc] : src[rr * src_stride + cc]); } printf("\n"); } assert(0); } } #endif // CONFIG_MISMATCH_DEBUG libvpx-1.8.2/vpx_util/vpx_debug_util.h000066400000000000000000000054721357355204000201050ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ #define VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ #include "./vpx_config.h" #include "vpx_dsp/prob.h" #ifdef __cplusplus extern "C" { #endif #if CONFIG_BITSTREAM_DEBUG || CONFIG_MISMATCH_DEBUG void bitstream_queue_set_frame_write(int frame_idx); int bitstream_queue_get_frame_write(void); void bitstream_queue_set_frame_read(int frame_idx); int bitstream_queue_get_frame_read(void); #endif #if CONFIG_BITSTREAM_DEBUG /* This is a debug tool used to detect bitstream error. On encoder side, it * pushes each bit and probability into a queue before the bit is written into * the Arithmetic coder. On decoder side, whenever a bit is read out from the * Arithmetic coder, it pops out the reference bit and probability from the * queue as well. If the two results do not match, this debug tool will report * an error. This tool can be used to pin down the bitstream error precisely. * By combining gdb's backtrace method, we can detect which module causes the * bitstream error. */ int bitstream_queue_get_write(void); int bitstream_queue_get_read(void); void bitstream_queue_record_write(void); void bitstream_queue_reset_write(void); void bitstream_queue_pop(int *result, int *prob); void bitstream_queue_push(int result, const int prob); void bitstream_queue_set_skip_write(int skip); void bitstream_queue_set_skip_read(int skip); #endif // CONFIG_BITSTREAM_DEBUG #if CONFIG_MISMATCH_DEBUG void mismatch_move_frame_idx_w(void); void mismatch_move_frame_idx_r(void); void mismatch_reset_frame(int num_planes); void mismatch_record_block_pre(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd); void mismatch_record_block_tx(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd); void mismatch_check_block_pre(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd); void mismatch_check_block_tx(const uint8_t *src, int src_stride, int plane, int pixel_c, int pixel_r, int blk_w, int blk_h, int highbd); #endif // CONFIG_MISMATCH_DEBUG #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_UTIL_VPX_DEBUG_UTIL_H_ libvpx-1.8.2/vpx_util/vpx_thread.c000066400000000000000000000124761357355204000172260ustar00rootroot00000000000000// Copyright 2013 Google Inc. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the COPYING file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Multi-threaded worker // // Original source: // https://chromium.googlesource.com/webm/libwebp #include #include // for memset() #include "./vpx_thread.h" #include "vpx_mem/vpx_mem.h" #if CONFIG_MULTITHREAD struct VPxWorkerImpl { pthread_mutex_t mutex_; pthread_cond_t condition_; pthread_t thread_; }; //------------------------------------------------------------------------------ static void execute(VPxWorker *const worker); // Forward declaration. static THREADFN thread_loop(void *ptr) { VPxWorker *const worker = (VPxWorker *)ptr; int done = 0; while (!done) { pthread_mutex_lock(&worker->impl_->mutex_); while (worker->status_ == OK) { // wait in idling mode pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); } if (worker->status_ == WORK) { execute(worker); worker->status_ = OK; } else if (worker->status_ == NOT_OK) { // finish the worker done = 1; } // signal to the main thread that we're done (for sync()) pthread_cond_signal(&worker->impl_->condition_); pthread_mutex_unlock(&worker->impl_->mutex_); } return THREAD_RETURN(NULL); // Thread is finished } // main thread state control static void change_state(VPxWorker *const worker, VPxWorkerStatus new_status) { // No-op when attempting to change state on a thread that didn't come up. // Checking status_ without acquiring the lock first would result in a data // race. if (worker->impl_ == NULL) return; pthread_mutex_lock(&worker->impl_->mutex_); if (worker->status_ >= OK) { // wait for the worker to finish while (worker->status_ != OK) { pthread_cond_wait(&worker->impl_->condition_, &worker->impl_->mutex_); } // assign new status and release the working thread if needed if (new_status != OK) { worker->status_ = new_status; pthread_cond_signal(&worker->impl_->condition_); } } pthread_mutex_unlock(&worker->impl_->mutex_); } #endif // CONFIG_MULTITHREAD //------------------------------------------------------------------------------ static void init(VPxWorker *const worker) { memset(worker, 0, sizeof(*worker)); worker->status_ = NOT_OK; } static int sync(VPxWorker *const worker) { #if CONFIG_MULTITHREAD change_state(worker, OK); #endif assert(worker->status_ <= OK); return !worker->had_error; } static int reset(VPxWorker *const worker) { int ok = 1; worker->had_error = 0; if (worker->status_ < OK) { #if CONFIG_MULTITHREAD worker->impl_ = (VPxWorkerImpl *)vpx_calloc(1, sizeof(*worker->impl_)); if (worker->impl_ == NULL) { return 0; } if (pthread_mutex_init(&worker->impl_->mutex_, NULL)) { goto Error; } if (pthread_cond_init(&worker->impl_->condition_, NULL)) { pthread_mutex_destroy(&worker->impl_->mutex_); goto Error; } pthread_mutex_lock(&worker->impl_->mutex_); ok = !pthread_create(&worker->impl_->thread_, NULL, thread_loop, worker); if (ok) worker->status_ = OK; pthread_mutex_unlock(&worker->impl_->mutex_); if (!ok) { pthread_mutex_destroy(&worker->impl_->mutex_); pthread_cond_destroy(&worker->impl_->condition_); Error: vpx_free(worker->impl_); worker->impl_ = NULL; return 0; } #else worker->status_ = OK; #endif } else if (worker->status_ > OK) { ok = sync(worker); } assert(!ok || (worker->status_ == OK)); return ok; } static void execute(VPxWorker *const worker) { if (worker->hook != NULL) { worker->had_error |= !worker->hook(worker->data1, worker->data2); } } static void launch(VPxWorker *const worker) { #if CONFIG_MULTITHREAD change_state(worker, WORK); #else execute(worker); #endif } static void end(VPxWorker *const worker) { #if CONFIG_MULTITHREAD if (worker->impl_ != NULL) { change_state(worker, NOT_OK); pthread_join(worker->impl_->thread_, NULL); pthread_mutex_destroy(&worker->impl_->mutex_); pthread_cond_destroy(&worker->impl_->condition_); vpx_free(worker->impl_); worker->impl_ = NULL; } #else worker->status_ = NOT_OK; assert(worker->impl_ == NULL); #endif assert(worker->status_ == NOT_OK); } //------------------------------------------------------------------------------ static VPxWorkerInterface g_worker_interface = { init, reset, sync, launch, execute, end }; int vpx_set_worker_interface(const VPxWorkerInterface *const winterface) { if (winterface == NULL || winterface->init == NULL || winterface->reset == NULL || winterface->sync == NULL || winterface->launch == NULL || winterface->execute == NULL || winterface->end == NULL) { return 0; } g_worker_interface = *winterface; return 1; } const VPxWorkerInterface *vpx_get_worker_interface(void) { return &g_worker_interface; } //------------------------------------------------------------------------------ libvpx-1.8.2/vpx_util/vpx_thread.h000066400000000000000000000333151357355204000172260ustar00rootroot00000000000000// Copyright 2013 Google Inc. All Rights Reserved. // // Use of this source code is governed by a BSD-style license // that can be found in the COPYING file in the root of the source // tree. An additional intellectual property rights grant can be found // in the file PATENTS. All contributing project authors may // be found in the AUTHORS file in the root of the source tree. // ----------------------------------------------------------------------------- // // Multi-threaded worker // // Original source: // https://chromium.googlesource.com/webm/libwebp #ifndef VPX_VPX_UTIL_VPX_THREAD_H_ #define VPX_VPX_UTIL_VPX_THREAD_H_ #include "./vpx_config.h" #ifdef __cplusplus extern "C" { #endif // Set maximum decode threads to be 8 due to the limit of frame buffers // and not enough semaphores in the emulation layer on windows. #define MAX_DECODE_THREADS 8 #if CONFIG_MULTITHREAD #if defined(_WIN32) && !HAVE_PTHREAD_H #include // NOLINT #include // NOLINT #include // NOLINT typedef HANDLE pthread_t; typedef CRITICAL_SECTION pthread_mutex_t; #if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater #define USE_WINDOWS_CONDITION_VARIABLE typedef CONDITION_VARIABLE pthread_cond_t; #else typedef struct { HANDLE waiting_sem_; HANDLE received_sem_; HANDLE signal_event_; } pthread_cond_t; #endif // _WIN32_WINNT >= 0x600 #ifndef WINAPI_FAMILY_PARTITION #define WINAPI_PARTITION_DESKTOP 1 #define WINAPI_FAMILY_PARTITION(x) x #endif #if !WINAPI_FAMILY_PARTITION(WINAPI_PARTITION_DESKTOP) #define USE_CREATE_THREAD #endif //------------------------------------------------------------------------------ // simplistic pthread emulation layer // _beginthreadex requires __stdcall #if defined(__GNUC__) && \ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2)) #define THREADFN __attribute__((force_align_arg_pointer)) unsigned int __stdcall #else #define THREADFN unsigned int __stdcall #endif #define THREAD_RETURN(val) (unsigned int)((DWORD_PTR)val) #if _WIN32_WINNT >= 0x0501 // Windows XP or greater #define WaitForSingleObject(obj, timeout) \ WaitForSingleObjectEx(obj, timeout, FALSE /*bAlertable*/) #endif static INLINE int pthread_create(pthread_t *const thread, const void *attr, unsigned int(__stdcall *start)(void *), void *arg) { (void)attr; #ifdef USE_CREATE_THREAD *thread = CreateThread(NULL, /* lpThreadAttributes */ 0, /* dwStackSize */ start, arg, 0, /* dwStackSize */ NULL); /* lpThreadId */ #else *thread = (pthread_t)_beginthreadex(NULL, /* void *security */ 0, /* unsigned stack_size */ start, arg, 0, /* unsigned initflag */ NULL); /* unsigned *thrdaddr */ #endif if (*thread == NULL) return 1; SetThreadPriority(*thread, THREAD_PRIORITY_ABOVE_NORMAL); return 0; } static INLINE int pthread_join(pthread_t thread, void **value_ptr) { (void)value_ptr; return (WaitForSingleObject(thread, INFINITE) != WAIT_OBJECT_0 || CloseHandle(thread) == 0); } // Mutex static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, void *mutexattr) { (void)mutexattr; #if _WIN32_WINNT >= 0x0600 // Windows Vista / Server 2008 or greater InitializeCriticalSectionEx(mutex, 0 /*dwSpinCount*/, 0 /*Flags*/); #else InitializeCriticalSection(mutex); #endif return 0; } static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { return TryEnterCriticalSection(mutex) ? 0 : EBUSY; } static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { EnterCriticalSection(mutex); return 0; } static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { LeaveCriticalSection(mutex); return 0; } static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { DeleteCriticalSection(mutex); return 0; } // Condition static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { int ok = 1; #ifdef USE_WINDOWS_CONDITION_VARIABLE (void)condition; #else ok &= (CloseHandle(condition->waiting_sem_) != 0); ok &= (CloseHandle(condition->received_sem_) != 0); ok &= (CloseHandle(condition->signal_event_) != 0); #endif return !ok; } static INLINE int pthread_cond_init(pthread_cond_t *const condition, void *cond_attr) { (void)cond_attr; #ifdef USE_WINDOWS_CONDITION_VARIABLE InitializeConditionVariable(condition); #else condition->waiting_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); condition->received_sem_ = CreateSemaphore(NULL, 0, MAX_DECODE_THREADS, NULL); condition->signal_event_ = CreateEvent(NULL, FALSE, FALSE, NULL); if (condition->waiting_sem_ == NULL || condition->received_sem_ == NULL || condition->signal_event_ == NULL) { pthread_cond_destroy(condition); return 1; } #endif return 0; } static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { int ok = 1; #ifdef USE_WINDOWS_CONDITION_VARIABLE WakeAllConditionVariable(condition); #else while (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { // a thread is waiting in pthread_cond_wait: allow it to be notified ok &= SetEvent(condition->signal_event_); // wait until the event is consumed so the signaler cannot consume // the event via its own pthread_cond_wait. ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != WAIT_OBJECT_0); } #endif return !ok; } static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { int ok = 1; #ifdef USE_WINDOWS_CONDITION_VARIABLE WakeConditionVariable(condition); #else if (WaitForSingleObject(condition->waiting_sem_, 0) == WAIT_OBJECT_0) { // a thread is waiting in pthread_cond_wait: allow it to be notified ok = SetEvent(condition->signal_event_); // wait until the event is consumed so the signaler cannot consume // the event via its own pthread_cond_wait. ok &= (WaitForSingleObject(condition->received_sem_, INFINITE) != WAIT_OBJECT_0); } #endif return !ok; } static INLINE int pthread_cond_wait(pthread_cond_t *const condition, pthread_mutex_t *const mutex) { int ok; #ifdef USE_WINDOWS_CONDITION_VARIABLE ok = SleepConditionVariableCS(condition, mutex, INFINITE); #else // note that there is a consumer available so the signal isn't dropped in // pthread_cond_signal if (!ReleaseSemaphore(condition->waiting_sem_, 1, NULL)) return 1; // now unlock the mutex so pthread_cond_signal may be issued pthread_mutex_unlock(mutex); ok = (WaitForSingleObject(condition->signal_event_, INFINITE) == WAIT_OBJECT_0); ok &= ReleaseSemaphore(condition->received_sem_, 1, NULL); pthread_mutex_lock(mutex); #endif return !ok; } #elif defined(__OS2__) #define INCL_DOS #include // NOLINT #include // NOLINT #include // NOLINT #include // NOLINT #if defined(__STRICT_ANSI__) // _beginthread() is not declared on __STRICT_ANSI__ mode. Declare here. int _beginthread(void (*)(void *), void *, unsigned, void *); #endif #define pthread_t TID #define pthread_mutex_t HMTX typedef struct { HEV event_sem_; HEV ack_sem_; volatile unsigned wait_count_; } pthread_cond_t; //------------------------------------------------------------------------------ // simplistic pthread emulation layer #define THREADFN void * #define THREAD_RETURN(val) (val) typedef struct { void *(*start_)(void *); void *arg_; } thread_arg; static void thread_start(void *arg) { thread_arg targ = *(thread_arg *)arg; free(arg); targ.start_(targ.arg_); } static INLINE int pthread_create(pthread_t *const thread, const void *attr, void *(*start)(void *), void *arg) { int tid; thread_arg *targ = (thread_arg *)malloc(sizeof(*targ)); if (targ == NULL) return 1; (void)attr; targ->start_ = start; targ->arg_ = arg; tid = (pthread_t)_beginthread(thread_start, NULL, 1024 * 1024, targ); if (tid == -1) { free(targ); return 1; } *thread = tid; return 0; } static INLINE int pthread_join(pthread_t thread, void **value_ptr) { (void)value_ptr; return DosWaitThread(&thread, DCWW_WAIT) != 0; } // Mutex static INLINE int pthread_mutex_init(pthread_mutex_t *const mutex, void *mutexattr) { (void)mutexattr; return DosCreateMutexSem(NULL, mutex, 0, FALSE) != 0; } static INLINE int pthread_mutex_trylock(pthread_mutex_t *const mutex) { return DosRequestMutexSem(*mutex, SEM_IMMEDIATE_RETURN) == 0 ? 0 : EBUSY; } static INLINE int pthread_mutex_lock(pthread_mutex_t *const mutex) { return DosRequestMutexSem(*mutex, SEM_INDEFINITE_WAIT) != 0; } static INLINE int pthread_mutex_unlock(pthread_mutex_t *const mutex) { return DosReleaseMutexSem(*mutex) != 0; } static INLINE int pthread_mutex_destroy(pthread_mutex_t *const mutex) { return DosCloseMutexSem(*mutex) != 0; } // Condition static INLINE int pthread_cond_destroy(pthread_cond_t *const condition) { int ok = 1; ok &= DosCloseEventSem(condition->event_sem_) == 0; ok &= DosCloseEventSem(condition->ack_sem_) == 0; return !ok; } static INLINE int pthread_cond_init(pthread_cond_t *const condition, void *cond_attr) { int ok = 1; (void)cond_attr; ok &= DosCreateEventSem(NULL, &condition->event_sem_, DCE_POSTONE, FALSE) == 0; ok &= DosCreateEventSem(NULL, &condition->ack_sem_, DCE_POSTONE, FALSE) == 0; if (!ok) { pthread_cond_destroy(condition); return 1; } condition->wait_count_ = 0; return 0; } static INLINE int pthread_cond_signal(pthread_cond_t *const condition) { int ok = 1; if (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) { ok &= DosPostEventSem(condition->event_sem_) == 0; ok &= DosWaitEventSem(condition->ack_sem_, SEM_INDEFINITE_WAIT) == 0; } return !ok; } static INLINE int pthread_cond_broadcast(pthread_cond_t *const condition) { int ok = 1; while (!__atomic_cmpxchg32(&condition->wait_count_, 0, 0)) ok &= pthread_cond_signal(condition) == 0; return !ok; } static INLINE int pthread_cond_wait(pthread_cond_t *const condition, pthread_mutex_t *const mutex) { int ok = 1; __atomic_increment(&condition->wait_count_); ok &= pthread_mutex_unlock(mutex) == 0; ok &= DosWaitEventSem(condition->event_sem_, SEM_INDEFINITE_WAIT) == 0; __atomic_decrement(&condition->wait_count_); ok &= DosPostEventSem(condition->ack_sem_) == 0; pthread_mutex_lock(mutex); return !ok; } #else // _WIN32 #include // NOLINT #define THREADFN void * #define THREAD_RETURN(val) val #endif #endif // CONFIG_MULTITHREAD // State of the worker thread object typedef enum { NOT_OK = 0, // object is unusable OK, // ready to work WORK // busy finishing the current task } VPxWorkerStatus; // Function to be called by the worker thread. Takes two opaque pointers as // arguments (data1 and data2), and should return false in case of error. typedef int (*VPxWorkerHook)(void *, void *); // Platform-dependent implementation details for the worker. typedef struct VPxWorkerImpl VPxWorkerImpl; // Synchronization object used to launch job in the worker thread typedef struct { VPxWorkerImpl *impl_; VPxWorkerStatus status_; VPxWorkerHook hook; // hook to call void *data1; // first argument passed to 'hook' void *data2; // second argument passed to 'hook' int had_error; // return value of the last call to 'hook' } VPxWorker; // The interface for all thread-worker related functions. All these functions // must be implemented. typedef struct { // Must be called first, before any other method. void (*init)(VPxWorker *const worker); // Must be called to initialize the object and spawn the thread. Re-entrant. // Will potentially launch the thread. Returns false in case of error. int (*reset)(VPxWorker *const worker); // Makes sure the previous work is finished. Returns true if worker->had_error // was not set and no error condition was triggered by the working thread. int (*sync)(VPxWorker *const worker); // Triggers the thread to call hook() with data1 and data2 arguments. These // hook/data1/data2 values can be changed at any time before calling this // function, but not be changed afterward until the next call to Sync(). void (*launch)(VPxWorker *const worker); // This function is similar to launch() except that it calls the // hook directly instead of using a thread. Convenient to bypass the thread // mechanism while still using the VPxWorker structs. sync() must // still be called afterward (for error reporting). void (*execute)(VPxWorker *const worker); // Kill the thread and terminate the object. To use the object again, one // must call reset() again. void (*end)(VPxWorker *const worker); } VPxWorkerInterface; // Install a new set of threading functions, overriding the defaults. This // should be done before any workers are started, i.e., before any encoding or // decoding takes place. The contents of the interface struct are copied, it // is safe to free the corresponding memory after this call. This function is // not thread-safe. Return false in case of invalid pointer or methods. int vpx_set_worker_interface(const VPxWorkerInterface *const winterface); // Retrieve the currently set thread worker interface. const VPxWorkerInterface *vpx_get_worker_interface(void); //------------------------------------------------------------------------------ #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_UTIL_VPX_THREAD_H_ libvpx-1.8.2/vpx_util/vpx_timestamp.h000066400000000000000000000022311357355204000177530ustar00rootroot00000000000000/* * Copyright (c) 2019 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_UTIL_VPX_TIMESTAMP_H_ #define VPX_VPX_UTIL_VPX_TIMESTAMP_H_ #ifdef __cplusplus extern "C" { #endif // __cplusplus // Rational Number with an int64 numerator typedef struct vpx_rational64 { int64_t num; // fraction numerator int den; // fraction denominator } vpx_rational64_t; // alias for struct vpx_rational64_t static INLINE int gcd(int64_t a, int b) { int r; // remainder while (b > 0) { r = (int)(a % b); a = b; b = r; } return (int)a; } static INLINE void reduce_ratio(vpx_rational64_t *ratio) { const int denom = gcd(ratio->num, ratio->den); ratio->num /= denom; ratio->den /= denom; } #ifdef __cplusplus } // extern "C" #endif // __cplusplus #endif // VPX_VPX_UTIL_VPX_TIMESTAMP_H_ libvpx-1.8.2/vpx_util/vpx_util.mk000066400000000000000000000015151357355204000171110ustar00rootroot00000000000000## ## Copyright (c) 2015 The WebM project authors. All Rights Reserved. ## ## Use of this source code is governed by a BSD-style license ## that can be found in the LICENSE file in the root of the source ## tree. An additional intellectual property rights grant can be found ## in the file PATENTS. All contributing project authors may ## be found in the AUTHORS file in the root of the source tree. ## UTIL_SRCS-yes += vpx_atomics.h UTIL_SRCS-yes += vpx_util.mk UTIL_SRCS-yes += vpx_thread.c UTIL_SRCS-yes += vpx_thread.h UTIL_SRCS-yes += endian_inl.h UTIL_SRCS-yes += vpx_write_yuv_frame.h UTIL_SRCS-yes += vpx_write_yuv_frame.c UTIL_SRCS-yes += vpx_timestamp.h UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.h UTIL_SRCS-$(or $(CONFIG_BITSTREAM_DEBUG),$(CONFIG_MISMATCH_DEBUG)) += vpx_debug_util.c libvpx-1.8.2/vpx_util/vpx_write_yuv_frame.c000066400000000000000000000022161357355204000211550ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "vpx_dsp/skin_detection.h" #include "vpx_util/vpx_write_yuv_frame.h" void vpx_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s) { #if defined(OUTPUT_YUV_SRC) || defined(OUTPUT_YUV_DENOISED) || \ defined(OUTPUT_YUV_SKINMAP) || defined(OUTPUT_YUV_SVC_SRC) unsigned char *src = s->y_buffer; int h = s->y_crop_height; do { fwrite(src, s->y_width, 1, yuv_file); src += s->y_stride; } while (--h); src = s->u_buffer; h = s->uv_crop_height; do { fwrite(src, s->uv_width, 1, yuv_file); src += s->uv_stride; } while (--h); src = s->v_buffer; h = s->uv_crop_height; do { fwrite(src, s->uv_width, 1, yuv_file); src += s->uv_stride; } while (--h); #else (void)yuv_file; (void)s; #endif } libvpx-1.8.2/vpx_util/vpx_write_yuv_frame.h000066400000000000000000000013601357355204000211610ustar00rootroot00000000000000/* * Copyright (c) 2015 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ #define VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ #include #include "vpx_scale/yv12config.h" #ifdef __cplusplus extern "C" { #endif void vpx_write_yuv_frame(FILE *yuv_file, YV12_BUFFER_CONFIG *s); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPX_UTIL_VPX_WRITE_YUV_FRAME_H_ libvpx-1.8.2/vpxdec.c000066400000000000000000001110571357355204000144740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include #include #include #include #include #include "./vpx_config.h" #if CONFIG_LIBYUV #include "third_party/libyuv/include/libyuv/scale.h" #endif #include "./args.h" #include "./ivfdec.h" #include "vpx/vpx_decoder.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" #if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif #include "./md5_utils.h" #include "./tools_common.h" #if CONFIG_WEBM_IO #include "./webmdec.h" #endif #include "./y4menc.h" static const char *exec_name; struct VpxDecInputContext { struct VpxInputContext *vpx_input_ctx; struct WebmInputContext *webm_ctx; }; static const arg_def_t help = ARG_DEF(NULL, "help", 0, "Show usage options and exit"); static const arg_def_t looparg = ARG_DEF(NULL, "loops", 1, "Number of times to decode the file"); static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, "Codec to use"); static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0, "Output raw YV12 frames"); static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0, "Output raw I420 frames"); static const arg_def_t flipuvarg = ARG_DEF(NULL, "flipuv", 0, "Flip the chroma planes in the output"); static const arg_def_t rawvideo = ARG_DEF(NULL, "rawvideo", 0, "Output raw YUV frames"); static const arg_def_t noblitarg = ARG_DEF(NULL, "noblit", 0, "Don't process the decoded frames"); static const arg_def_t progressarg = ARG_DEF(NULL, "progress", 0, "Show progress after each frame decodes"); static const arg_def_t limitarg = ARG_DEF(NULL, "limit", 1, "Stop decoding after n frames"); static const arg_def_t skiparg = ARG_DEF(NULL, "skip", 1, "Skip the first n input frames"); static const arg_def_t postprocarg = ARG_DEF(NULL, "postproc", 0, "Postprocess decoded frames"); static const arg_def_t summaryarg = ARG_DEF(NULL, "summary", 0, "Show timing summary"); static const arg_def_t outputfile = ARG_DEF("o", "output", 1, "Output file name pattern (see below)"); static const arg_def_t threadsarg = ARG_DEF("t", "threads", 1, "Max threads to use"); static const arg_def_t frameparallelarg = ARG_DEF(NULL, "frame-parallel", 0, "Frame parallel decode (ignored)"); static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, "Show version string"); static const arg_def_t error_concealment = ARG_DEF(NULL, "error-concealment", 0, "Enable decoder error-concealment"); static const arg_def_t scalearg = ARG_DEF("S", "scale", 0, "Scale output frames uniformly"); static const arg_def_t continuearg = ARG_DEF("k", "keep-going", 0, "(debug) Continue decoding after error"); static const arg_def_t fb_arg = ARG_DEF(NULL, "frame-buffers", 1, "Number of frame buffers to use"); static const arg_def_t md5arg = ARG_DEF(NULL, "md5", 0, "Compute the MD5 sum of the decoded frame"); #if CONFIG_VP9_HIGHBITDEPTH static const arg_def_t outbitdeptharg = ARG_DEF(NULL, "output-bit-depth", 1, "Output bit-depth for decoded frames"); #endif static const arg_def_t svcdecodingarg = ARG_DEF( NULL, "svc-decode-layer", 1, "Decode SVC stream up to given spatial layer"); static const arg_def_t framestatsarg = ARG_DEF(NULL, "framestats", 1, "Output per-frame stats (.csv format)"); static const arg_def_t rowmtarg = ARG_DEF(NULL, "row-mt", 1, "Enable multi-threading to run row-wise in VP9"); static const arg_def_t lpfoptarg = ARG_DEF(NULL, "lpf-opt", 1, "Do loopfilter without waiting for all threads to sync."); static const arg_def_t *all_args[] = { &help, &codecarg, &use_yv12, &use_i420, &flipuvarg, &rawvideo, &noblitarg, &progressarg, &limitarg, &skiparg, &postprocarg, &summaryarg, &outputfile, &threadsarg, &frameparallelarg, &verbosearg, &scalearg, &fb_arg, &md5arg, &error_concealment, &continuearg, #if CONFIG_VP9_HIGHBITDEPTH &outbitdeptharg, #endif &svcdecodingarg, &framestatsarg, &rowmtarg, &lpfoptarg, NULL }; #if CONFIG_VP8_DECODER static const arg_def_t addnoise_level = ARG_DEF(NULL, "noise-level", 1, "Enable VP8 postproc add noise"); static const arg_def_t deblock = ARG_DEF(NULL, "deblock", 0, "Enable VP8 deblocking"); static const arg_def_t demacroblock_level = ARG_DEF( NULL, "demacroblock-level", 1, "Enable VP8 demacroblocking, w/ level"); static const arg_def_t mfqe = ARG_DEF(NULL, "mfqe", 0, "Enable multiframe quality enhancement"); static const arg_def_t *vp8_pp_args[] = { &addnoise_level, &deblock, &demacroblock_level, &mfqe, NULL }; #endif #if CONFIG_LIBYUV static INLINE int libyuv_scale(vpx_image_t *src, vpx_image_t *dst, FilterModeEnum mode) { #if CONFIG_VP9_HIGHBITDEPTH if (src->fmt == VPX_IMG_FMT_I42016) { assert(dst->fmt == VPX_IMG_FMT_I42016); return I420Scale_16( (uint16_t *)src->planes[VPX_PLANE_Y], src->stride[VPX_PLANE_Y] / 2, (uint16_t *)src->planes[VPX_PLANE_U], src->stride[VPX_PLANE_U] / 2, (uint16_t *)src->planes[VPX_PLANE_V], src->stride[VPX_PLANE_V] / 2, src->d_w, src->d_h, (uint16_t *)dst->planes[VPX_PLANE_Y], dst->stride[VPX_PLANE_Y] / 2, (uint16_t *)dst->planes[VPX_PLANE_U], dst->stride[VPX_PLANE_U] / 2, (uint16_t *)dst->planes[VPX_PLANE_V], dst->stride[VPX_PLANE_V] / 2, dst->d_w, dst->d_h, mode); } #endif assert(src->fmt == VPX_IMG_FMT_I420); assert(dst->fmt == VPX_IMG_FMT_I420); return I420Scale(src->planes[VPX_PLANE_Y], src->stride[VPX_PLANE_Y], src->planes[VPX_PLANE_U], src->stride[VPX_PLANE_U], src->planes[VPX_PLANE_V], src->stride[VPX_PLANE_V], src->d_w, src->d_h, dst->planes[VPX_PLANE_Y], dst->stride[VPX_PLANE_Y], dst->planes[VPX_PLANE_U], dst->stride[VPX_PLANE_U], dst->planes[VPX_PLANE_V], dst->stride[VPX_PLANE_V], dst->d_w, dst->d_h, mode); } #endif static void show_help(FILE *fout, int shorthelp) { int i; fprintf(fout, "Usage: %s filename\n\n", exec_name); if (shorthelp) { fprintf(fout, "Use --help to see the full list of options.\n"); return; } fprintf(fout, "Options:\n"); arg_show_usage(fout, all_args); #if CONFIG_VP8_DECODER fprintf(fout, "\nVP8 Postprocessing Options:\n"); arg_show_usage(fout, vp8_pp_args); #endif fprintf(fout, "\nOutput File Patterns:\n\n" " The -o argument specifies the name of the file(s) to " "write to. If the\n argument does not include any escape " "characters, the output will be\n written to a single file. " "Otherwise, the filename will be calculated by\n expanding " "the following escape characters:\n"); fprintf(fout, "\n\t%%w - Frame width" "\n\t%%h - Frame height" "\n\t%% - Frame number, zero padded to places (1..9)" "\n\n Pattern arguments are only supported in conjunction " "with the --yv12 and\n --i420 options. If the -o option is " "not specified, the output will be\n directed to stdout.\n"); fprintf(fout, "\nIncluded decoders:\n\n"); for (i = 0; i < get_vpx_decoder_count(); ++i) { const VpxInterface *const decoder = get_vpx_decoder_by_index(i); fprintf(fout, " %-6s - %s\n", decoder->name, vpx_codec_iface_name(decoder->codec_interface())); } } void usage_exit(void) { show_help(stderr, 1); exit(EXIT_FAILURE); } static int raw_read_frame(FILE *infile, uint8_t **buffer, size_t *bytes_read, size_t *buffer_size) { char raw_hdr[RAW_FRAME_HDR_SZ]; size_t frame_size = 0; if (fread(raw_hdr, RAW_FRAME_HDR_SZ, 1, infile) != 1) { if (!feof(infile)) warn("Failed to read RAW frame size\n"); } else { const size_t kCorruptFrameThreshold = 256 * 1024 * 1024; const size_t kFrameTooSmallThreshold = 256 * 1024; frame_size = mem_get_le32(raw_hdr); if (frame_size > kCorruptFrameThreshold) { warn("Read invalid frame size (%u)\n", (unsigned int)frame_size); frame_size = 0; } if (frame_size < kFrameTooSmallThreshold) { warn("Warning: Read invalid frame size (%u) - not a raw file?\n", (unsigned int)frame_size); } if (frame_size > *buffer_size) { uint8_t *new_buf = realloc(*buffer, 2 * frame_size); if (new_buf) { *buffer = new_buf; *buffer_size = 2 * frame_size; } else { warn("Failed to allocate compressed data buffer\n"); frame_size = 0; } } } if (!feof(infile)) { if (fread(*buffer, 1, frame_size, infile) != frame_size) { warn("Failed to read full frame\n"); return 1; } *bytes_read = frame_size; return 0; } return 1; } static int dec_read_frame(struct VpxDecInputContext *input, uint8_t **buf, size_t *bytes_in_buffer, size_t *buffer_size) { switch (input->vpx_input_ctx->file_type) { #if CONFIG_WEBM_IO case FILE_TYPE_WEBM: return webm_read_frame(input->webm_ctx, buf, bytes_in_buffer); #endif case FILE_TYPE_RAW: return raw_read_frame(input->vpx_input_ctx->file, buf, bytes_in_buffer, buffer_size); case FILE_TYPE_IVF: return ivf_read_frame(input->vpx_input_ctx->file, buf, bytes_in_buffer, buffer_size); default: return 1; } } static void update_image_md5(const vpx_image_t *img, const int planes[3], MD5Context *md5) { int i, y; for (i = 0; i < 3; ++i) { const int plane = planes[i]; const unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = vpx_img_plane_width(img, plane) * ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); const int h = vpx_img_plane_height(img, plane); for (y = 0; y < h; ++y) { MD5Update(md5, buf, w); buf += stride; } } } static void write_image_file(const vpx_image_t *img, const int planes[3], FILE *file) { int i, y; #if CONFIG_VP9_HIGHBITDEPTH const int bytes_per_sample = ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) ? 2 : 1); #else const int bytes_per_sample = 1; #endif for (i = 0; i < 3; ++i) { const int plane = planes[i]; const unsigned char *buf = img->planes[plane]; const int stride = img->stride[plane]; const int w = vpx_img_plane_width(img, plane); const int h = vpx_img_plane_height(img, plane); for (y = 0; y < h; ++y) { fwrite(buf, bytes_per_sample, w, file); buf += stride; } } } static int file_is_raw(struct VpxInputContext *input) { uint8_t buf[32]; int is_raw = 0; vpx_codec_stream_info_t si; si.sz = sizeof(si); if (fread(buf, 1, 32, input->file) == 32) { int i; if (mem_get_le32(buf) < 256 * 1024 * 1024) { for (i = 0; i < get_vpx_decoder_count(); ++i) { const VpxInterface *const decoder = get_vpx_decoder_by_index(i); if (!vpx_codec_peek_stream_info(decoder->codec_interface(), buf + 4, 32 - 4, &si)) { is_raw = 1; input->fourcc = decoder->fourcc; input->width = si.w; input->height = si.h; input->framerate.numerator = 30; input->framerate.denominator = 1; break; } } } } rewind(input->file); return is_raw; } static void show_progress(int frame_in, int frame_out, uint64_t dx_time) { fprintf(stderr, "%d decoded frames/%d showed frames in %" PRId64 " us (%.2f fps)\r", frame_in, frame_out, dx_time, (double)frame_out * 1000000.0 / (double)dx_time); } struct ExternalFrameBuffer { uint8_t *data; size_t size; int in_use; }; struct ExternalFrameBufferList { int num_external_frame_buffers; struct ExternalFrameBuffer *ext_fb; }; // Callback used by libvpx to request an external frame buffer. |cb_priv| // Application private data passed into the set function. |min_size| is the // minimum size in bytes needed to decode the next frame. |fb| pointer to the // frame buffer. static int get_vp9_frame_buffer(void *cb_priv, size_t min_size, vpx_codec_frame_buffer_t *fb) { int i; struct ExternalFrameBufferList *const ext_fb_list = (struct ExternalFrameBufferList *)cb_priv; if (ext_fb_list == NULL) return -1; // Find a free frame buffer. for (i = 0; i < ext_fb_list->num_external_frame_buffers; ++i) { if (!ext_fb_list->ext_fb[i].in_use) break; } if (i == ext_fb_list->num_external_frame_buffers) return -1; if (ext_fb_list->ext_fb[i].size < min_size) { free(ext_fb_list->ext_fb[i].data); ext_fb_list->ext_fb[i].data = (uint8_t *)calloc(min_size, sizeof(uint8_t)); if (!ext_fb_list->ext_fb[i].data) return -1; ext_fb_list->ext_fb[i].size = min_size; } fb->data = ext_fb_list->ext_fb[i].data; fb->size = ext_fb_list->ext_fb[i].size; ext_fb_list->ext_fb[i].in_use = 1; // Set the frame buffer's private data to point at the external frame buffer. fb->priv = &ext_fb_list->ext_fb[i]; return 0; } // Callback used by libvpx when there are no references to the frame buffer. // |cb_priv| user private data passed into the set function. |fb| pointer // to the frame buffer. static int release_vp9_frame_buffer(void *cb_priv, vpx_codec_frame_buffer_t *fb) { struct ExternalFrameBuffer *const ext_fb = (struct ExternalFrameBuffer *)fb->priv; (void)cb_priv; ext_fb->in_use = 0; return 0; } static void generate_filename(const char *pattern, char *out, size_t q_len, unsigned int d_w, unsigned int d_h, unsigned int frame_in) { const char *p = pattern; char *q = out; do { char *next_pat = strchr(p, '%'); if (p == next_pat) { size_t pat_len; /* parse the pattern */ q[q_len - 1] = '\0'; switch (p[1]) { case 'w': snprintf(q, q_len - 1, "%d", d_w); break; case 'h': snprintf(q, q_len - 1, "%d", d_h); break; case '1': snprintf(q, q_len - 1, "%d", frame_in); break; case '2': snprintf(q, q_len - 1, "%02d", frame_in); break; case '3': snprintf(q, q_len - 1, "%03d", frame_in); break; case '4': snprintf(q, q_len - 1, "%04d", frame_in); break; case '5': snprintf(q, q_len - 1, "%05d", frame_in); break; case '6': snprintf(q, q_len - 1, "%06d", frame_in); break; case '7': snprintf(q, q_len - 1, "%07d", frame_in); break; case '8': snprintf(q, q_len - 1, "%08d", frame_in); break; case '9': snprintf(q, q_len - 1, "%09d", frame_in); break; default: die("Unrecognized pattern %%%c\n", p[1]); break; } pat_len = strlen(q); if (pat_len >= q_len - 1) die("Output filename too long.\n"); q += pat_len; p += 2; q_len -= pat_len; } else { size_t copy_len; /* copy the next segment */ if (!next_pat) copy_len = strlen(p); else copy_len = next_pat - p; if (copy_len >= q_len - 1) die("Output filename too long.\n"); memcpy(q, p, copy_len); q[copy_len] = '\0'; q += copy_len; p += copy_len; q_len -= copy_len; } } while (*p); } static int is_single_file(const char *outfile_pattern) { const char *p = outfile_pattern; do { p = strchr(p, '%'); if (p && p[1] >= '1' && p[1] <= '9') return 0; // pattern contains sequence number, so it's not unique if (p) p++; } while (p); return 1; } static void print_md5(unsigned char digest[16], const char *filename) { int i; for (i = 0; i < 16; ++i) printf("%02x", digest[i]); printf(" %s\n", filename); } static FILE *open_outfile(const char *name) { if (strcmp("-", name) == 0) { set_binary_mode(stdout); return stdout; } else { FILE *file = fopen(name, "wb"); if (!file) fatal("Failed to open output file '%s'", name); return file; } } #if CONFIG_VP9_HIGHBITDEPTH static int img_shifted_realloc_required(const vpx_image_t *img, const vpx_image_t *shifted, vpx_img_fmt_t required_fmt) { return img->d_w != shifted->d_w || img->d_h != shifted->d_h || required_fmt != shifted->fmt; } #endif static int main_loop(int argc, const char **argv_) { vpx_codec_ctx_t decoder; char *fn = NULL; int i; int ret = EXIT_FAILURE; uint8_t *buf = NULL; size_t bytes_in_buffer = 0, buffer_size = 0; FILE *infile; int frame_in = 0, frame_out = 0, flipuv = 0, noblit = 0; int do_md5 = 0, progress = 0; int stop_after = 0, postproc = 0, summary = 0, quiet = 1; int arg_skip = 0; int ec_enabled = 0; int keep_going = 0; int enable_row_mt = 0; int enable_lpf_opt = 0; const VpxInterface *interface = NULL; const VpxInterface *fourcc_interface = NULL; uint64_t dx_time = 0; struct arg arg; char **argv, **argi, **argj; int single_file; int use_y4m = 1; int opt_yv12 = 0; int opt_i420 = 0; vpx_codec_dec_cfg_t cfg = { 0, 0, 0 }; #if CONFIG_VP9_HIGHBITDEPTH unsigned int output_bit_depth = 0; #endif int svc_decoding = 0; int svc_spatial_layer = 0; #if CONFIG_VP8_DECODER vp8_postproc_cfg_t vp8_pp_cfg = { 0, 0, 0 }; #endif int frames_corrupted = 0; int dec_flags = 0; int do_scale = 0; vpx_image_t *scaled_img = NULL; #if CONFIG_VP9_HIGHBITDEPTH vpx_image_t *img_shifted = NULL; #endif int frame_avail, got_data, flush_decoder = 0; int num_external_frame_buffers = 0; struct ExternalFrameBufferList ext_fb_list = { 0, NULL }; const char *outfile_pattern = NULL; char outfile_name[PATH_MAX] = { 0 }; FILE *outfile = NULL; FILE *framestats_file = NULL; MD5Context md5_ctx; unsigned char md5_digest[16]; struct VpxDecInputContext input = { NULL, NULL }; struct VpxInputContext vpx_input_ctx; #if CONFIG_WEBM_IO struct WebmInputContext webm_ctx; memset(&(webm_ctx), 0, sizeof(webm_ctx)); input.webm_ctx = &webm_ctx; #endif input.vpx_input_ctx = &vpx_input_ctx; /* Parse command line */ exec_name = argv_[0]; argv = argv_dup(argc - 1, argv_ + 1); for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { memset(&arg, 0, sizeof(arg)); arg.argv_step = 1; if (arg_match(&arg, &help, argi)) { show_help(stdout, 0); exit(EXIT_SUCCESS); } else if (arg_match(&arg, &codecarg, argi)) { interface = get_vpx_decoder_by_name(arg.val); if (!interface) die("Error: Unrecognized argument (%s) to --codec\n", arg.val); } else if (arg_match(&arg, &looparg, argi)) { // no-op } else if (arg_match(&arg, &outputfile, argi)) outfile_pattern = arg.val; else if (arg_match(&arg, &use_yv12, argi)) { use_y4m = 0; flipuv = 1; opt_yv12 = 1; } else if (arg_match(&arg, &use_i420, argi)) { use_y4m = 0; flipuv = 0; opt_i420 = 1; } else if (arg_match(&arg, &rawvideo, argi)) { use_y4m = 0; } else if (arg_match(&arg, &flipuvarg, argi)) flipuv = 1; else if (arg_match(&arg, &noblitarg, argi)) noblit = 1; else if (arg_match(&arg, &progressarg, argi)) progress = 1; else if (arg_match(&arg, &limitarg, argi)) stop_after = arg_parse_uint(&arg); else if (arg_match(&arg, &skiparg, argi)) arg_skip = arg_parse_uint(&arg); else if (arg_match(&arg, &postprocarg, argi)) postproc = 1; else if (arg_match(&arg, &md5arg, argi)) do_md5 = 1; else if (arg_match(&arg, &summaryarg, argi)) summary = 1; else if (arg_match(&arg, &threadsarg, argi)) cfg.threads = arg_parse_uint(&arg); #if CONFIG_VP9_DECODER else if (arg_match(&arg, &frameparallelarg, argi)) { /* ignored for compatibility */ } #endif else if (arg_match(&arg, &verbosearg, argi)) quiet = 0; else if (arg_match(&arg, &scalearg, argi)) do_scale = 1; else if (arg_match(&arg, &fb_arg, argi)) num_external_frame_buffers = arg_parse_uint(&arg); else if (arg_match(&arg, &continuearg, argi)) keep_going = 1; #if CONFIG_VP9_HIGHBITDEPTH else if (arg_match(&arg, &outbitdeptharg, argi)) { output_bit_depth = arg_parse_uint(&arg); } #endif else if (arg_match(&arg, &svcdecodingarg, argi)) { svc_decoding = 1; svc_spatial_layer = arg_parse_uint(&arg); } else if (arg_match(&arg, &framestatsarg, argi)) { framestats_file = fopen(arg.val, "w"); if (!framestats_file) { die("Error: Could not open --framestats file (%s) for writing.\n", arg.val); } } else if (arg_match(&arg, &rowmtarg, argi)) { enable_row_mt = arg_parse_uint(&arg); } else if (arg_match(&arg, &lpfoptarg, argi)) { enable_lpf_opt = arg_parse_uint(&arg); } #if CONFIG_VP8_DECODER else if (arg_match(&arg, &addnoise_level, argi)) { postproc = 1; vp8_pp_cfg.post_proc_flag |= VP8_ADDNOISE; vp8_pp_cfg.noise_level = arg_parse_uint(&arg); } else if (arg_match(&arg, &demacroblock_level, argi)) { postproc = 1; vp8_pp_cfg.post_proc_flag |= VP8_DEMACROBLOCK; vp8_pp_cfg.deblocking_level = arg_parse_uint(&arg); } else if (arg_match(&arg, &deblock, argi)) { postproc = 1; vp8_pp_cfg.post_proc_flag |= VP8_DEBLOCK; } else if (arg_match(&arg, &mfqe, argi)) { postproc = 1; vp8_pp_cfg.post_proc_flag |= VP8_MFQE; } else if (arg_match(&arg, &error_concealment, argi)) { ec_enabled = 1; } #endif // CONFIG_VP8_DECODER else argj++; } /* Check for unrecognized options */ for (argi = argv; *argi; argi++) if (argi[0][0] == '-' && strlen(argi[0]) > 1) die("Error: Unrecognized option %s\n", *argi); /* Handle non-option arguments */ fn = argv[0]; if (!fn) { free(argv); fprintf(stderr, "No input file specified!\n"); usage_exit(); } /* Open file */ infile = strcmp(fn, "-") ? fopen(fn, "rb") : set_binary_mode(stdin); if (!infile) { fatal("Failed to open input file '%s'", strcmp(fn, "-") ? fn : "stdin"); } #if CONFIG_OS_SUPPORT /* Make sure we don't dump to the terminal, unless forced to with -o - */ if (!outfile_pattern && isatty(fileno(stdout)) && !do_md5 && !noblit) { fprintf(stderr, "Not dumping raw video to your terminal. Use '-o -' to " "override.\n"); return EXIT_FAILURE; } #endif input.vpx_input_ctx->file = infile; if (file_is_ivf(input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_IVF; #if CONFIG_WEBM_IO else if (file_is_webm(input.webm_ctx, input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_WEBM; #endif else if (file_is_raw(input.vpx_input_ctx)) input.vpx_input_ctx->file_type = FILE_TYPE_RAW; else { fprintf(stderr, "Unrecognized input file type.\n"); #if !CONFIG_WEBM_IO fprintf(stderr, "vpxdec was built without WebM container support.\n"); #endif free(argv); return EXIT_FAILURE; } outfile_pattern = outfile_pattern ? outfile_pattern : "-"; single_file = is_single_file(outfile_pattern); if (!noblit && single_file) { generate_filename(outfile_pattern, outfile_name, PATH_MAX, vpx_input_ctx.width, vpx_input_ctx.height, 0); if (do_md5) MD5Init(&md5_ctx); else outfile = open_outfile(outfile_name); } if (use_y4m && !noblit) { if (!single_file) { fprintf(stderr, "YUV4MPEG2 not supported with output patterns," " try --i420 or --yv12 or --rawvideo.\n"); return EXIT_FAILURE; } #if CONFIG_WEBM_IO if (vpx_input_ctx.file_type == FILE_TYPE_WEBM) { if (webm_guess_framerate(input.webm_ctx, input.vpx_input_ctx)) { fprintf(stderr, "Failed to guess framerate -- error parsing " "webm file?\n"); return EXIT_FAILURE; } } #endif } fourcc_interface = get_vpx_decoder_by_fourcc(vpx_input_ctx.fourcc); if (interface && fourcc_interface && interface != fourcc_interface) warn("Header indicates codec: %s\n", fourcc_interface->name); else interface = fourcc_interface; if (!interface) interface = get_vpx_decoder_by_index(0); dec_flags = (postproc ? VPX_CODEC_USE_POSTPROC : 0) | (ec_enabled ? VPX_CODEC_USE_ERROR_CONCEALMENT : 0); if (vpx_codec_dec_init(&decoder, interface->codec_interface(), &cfg, dec_flags)) { fprintf(stderr, "Failed to initialize decoder: %s\n", vpx_codec_error(&decoder)); goto fail2; } if (svc_decoding) { if (vpx_codec_control(&decoder, VP9_DECODE_SVC_SPATIAL_LAYER, svc_spatial_layer)) { fprintf(stderr, "Failed to set spatial layer for svc decode: %s\n", vpx_codec_error(&decoder)); goto fail; } } if (interface->fourcc == VP9_FOURCC && vpx_codec_control(&decoder, VP9D_SET_ROW_MT, enable_row_mt)) { fprintf(stderr, "Failed to set decoder in row multi-thread mode: %s\n", vpx_codec_error(&decoder)); goto fail; } if (interface->fourcc == VP9_FOURCC && vpx_codec_control(&decoder, VP9D_SET_LOOP_FILTER_OPT, enable_lpf_opt)) { fprintf(stderr, "Failed to set decoder in optimized loopfilter mode: %s\n", vpx_codec_error(&decoder)); goto fail; } if (!quiet) fprintf(stderr, "%s\n", decoder.name); #if CONFIG_VP8_DECODER if (vp8_pp_cfg.post_proc_flag && vpx_codec_control(&decoder, VP8_SET_POSTPROC, &vp8_pp_cfg)) { fprintf(stderr, "Failed to configure postproc: %s\n", vpx_codec_error(&decoder)); goto fail; } #endif if (arg_skip) fprintf(stderr, "Skipping first %d frames.\n", arg_skip); while (arg_skip) { if (dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) break; arg_skip--; } if (num_external_frame_buffers > 0) { ext_fb_list.num_external_frame_buffers = num_external_frame_buffers; ext_fb_list.ext_fb = (struct ExternalFrameBuffer *)calloc( num_external_frame_buffers, sizeof(*ext_fb_list.ext_fb)); if (vpx_codec_set_frame_buffer_functions(&decoder, get_vp9_frame_buffer, release_vp9_frame_buffer, &ext_fb_list)) { fprintf(stderr, "Failed to configure external frame buffers: %s\n", vpx_codec_error(&decoder)); goto fail; } } frame_avail = 1; got_data = 0; if (framestats_file) fprintf(framestats_file, "bytes,qp\n"); /* Decode file */ while (frame_avail || got_data) { vpx_codec_iter_t iter = NULL; vpx_image_t *img; struct vpx_usec_timer timer; int corrupted = 0; frame_avail = 0; if (!stop_after || frame_in < stop_after) { if (!dec_read_frame(&input, &buf, &bytes_in_buffer, &buffer_size)) { frame_avail = 1; frame_in++; vpx_usec_timer_start(&timer); if (vpx_codec_decode(&decoder, buf, (unsigned int)bytes_in_buffer, NULL, 0)) { const char *detail = vpx_codec_error_detail(&decoder); warn("Failed to decode frame %d: %s", frame_in, vpx_codec_error(&decoder)); if (detail) warn("Additional information: %s", detail); corrupted = 1; if (!keep_going) goto fail; } if (framestats_file) { int qp; if (vpx_codec_control(&decoder, VPXD_GET_LAST_QUANTIZER, &qp)) { warn("Failed VPXD_GET_LAST_QUANTIZER: %s", vpx_codec_error(&decoder)); if (!keep_going) goto fail; } fprintf(framestats_file, "%d,%d\n", (int)bytes_in_buffer, qp); } vpx_usec_timer_mark(&timer); dx_time += vpx_usec_timer_elapsed(&timer); } else { flush_decoder = 1; } } else { flush_decoder = 1; } vpx_usec_timer_start(&timer); if (flush_decoder) { // Flush the decoder in frame parallel decode. if (vpx_codec_decode(&decoder, NULL, 0, NULL, 0)) { warn("Failed to flush decoder: %s", vpx_codec_error(&decoder)); corrupted = 1; if (!keep_going) goto fail; } } got_data = 0; if ((img = vpx_codec_get_frame(&decoder, &iter))) { ++frame_out; got_data = 1; } vpx_usec_timer_mark(&timer); dx_time += (unsigned int)vpx_usec_timer_elapsed(&timer); if (!corrupted && vpx_codec_control(&decoder, VP8D_GET_FRAME_CORRUPTED, &corrupted)) { warn("Failed VP8_GET_FRAME_CORRUPTED: %s", vpx_codec_error(&decoder)); if (!keep_going) goto fail; } frames_corrupted += corrupted; if (progress) show_progress(frame_in, frame_out, dx_time); if (!noblit && img) { const int PLANES_YUV[] = { VPX_PLANE_Y, VPX_PLANE_U, VPX_PLANE_V }; const int PLANES_YVU[] = { VPX_PLANE_Y, VPX_PLANE_V, VPX_PLANE_U }; const int *planes = flipuv ? PLANES_YVU : PLANES_YUV; if (do_scale) { if (frame_out == 1) { // If the output frames are to be scaled to a fixed display size then // use the width and height specified in the container. If either of // these is set to 0, use the display size set in the first frame // header. If that is unavailable, use the raw decoded size of the // first decoded frame. int render_width = vpx_input_ctx.width; int render_height = vpx_input_ctx.height; if (!render_width || !render_height) { int render_size[2]; if (vpx_codec_control(&decoder, VP9D_GET_DISPLAY_SIZE, render_size)) { // As last resort use size of first frame as display size. render_width = img->d_w; render_height = img->d_h; } else { render_width = render_size[0]; render_height = render_size[1]; } } scaled_img = vpx_img_alloc(NULL, img->fmt, render_width, render_height, 16); scaled_img->bit_depth = img->bit_depth; } if (img->d_w != scaled_img->d_w || img->d_h != scaled_img->d_h) { #if CONFIG_LIBYUV libyuv_scale(img, scaled_img, kFilterBox); img = scaled_img; #else fprintf(stderr, "Failed to scale output frame: %s.\n" "Scaling is disabled in this configuration. " "To enable scaling, configure with --enable-libyuv\n", vpx_codec_error(&decoder)); goto fail; #endif } } #if CONFIG_VP9_HIGHBITDEPTH // Default to codec bit depth if output bit depth not set if (!output_bit_depth && single_file && !do_md5) { output_bit_depth = img->bit_depth; } // Shift up or down if necessary if (output_bit_depth != 0 && output_bit_depth != img->bit_depth) { const vpx_img_fmt_t shifted_fmt = output_bit_depth == 8 ? img->fmt ^ (img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) : img->fmt | VPX_IMG_FMT_HIGHBITDEPTH; if (img_shifted && img_shifted_realloc_required(img, img_shifted, shifted_fmt)) { vpx_img_free(img_shifted); img_shifted = NULL; } if (!img_shifted) { img_shifted = vpx_img_alloc(NULL, shifted_fmt, img->d_w, img->d_h, 16); img_shifted->bit_depth = output_bit_depth; } if (output_bit_depth > img->bit_depth) { vpx_img_upshift(img_shifted, img, output_bit_depth - img->bit_depth); } else { vpx_img_downshift(img_shifted, img, img->bit_depth - output_bit_depth); } img = img_shifted; } #endif if (single_file) { if (use_y4m) { char buf[Y4M_BUFFER_SIZE] = { 0 }; size_t len = 0; if (img->fmt == VPX_IMG_FMT_I440 || img->fmt == VPX_IMG_FMT_I44016) { fprintf(stderr, "Cannot produce y4m output for 440 sampling.\n"); goto fail; } if (frame_out == 1) { // Y4M file header len = y4m_write_file_header( buf, sizeof(buf), vpx_input_ctx.width, vpx_input_ctx.height, &vpx_input_ctx.framerate, img->fmt, img->bit_depth); if (do_md5) { MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len); } else { fputs(buf, outfile); } } // Y4M frame header len = y4m_write_frame_header(buf, sizeof(buf)); if (do_md5) { MD5Update(&md5_ctx, (md5byte *)buf, (unsigned int)len); } else { fputs(buf, outfile); } } else { if (frame_out == 1) { // Check if --yv12 or --i420 options are consistent with the // bit-stream decoded if (opt_i420) { if (img->fmt != VPX_IMG_FMT_I420 && img->fmt != VPX_IMG_FMT_I42016) { fprintf(stderr, "Cannot produce i420 output for bit-stream.\n"); goto fail; } } if (opt_yv12) { if ((img->fmt != VPX_IMG_FMT_I420 && img->fmt != VPX_IMG_FMT_YV12) || img->bit_depth != 8) { fprintf(stderr, "Cannot produce yv12 output for bit-stream.\n"); goto fail; } } } } if (do_md5) { update_image_md5(img, planes, &md5_ctx); } else { if (!corrupted) write_image_file(img, planes, outfile); } } else { generate_filename(outfile_pattern, outfile_name, PATH_MAX, img->d_w, img->d_h, frame_in); if (do_md5) { MD5Init(&md5_ctx); update_image_md5(img, planes, &md5_ctx); MD5Final(md5_digest, &md5_ctx); print_md5(md5_digest, outfile_name); } else { outfile = open_outfile(outfile_name); write_image_file(img, planes, outfile); fclose(outfile); } } } } if (summary || progress) { show_progress(frame_in, frame_out, dx_time); fprintf(stderr, "\n"); } if (frames_corrupted) { fprintf(stderr, "WARNING: %d frames corrupted.\n", frames_corrupted); } else { ret = EXIT_SUCCESS; } fail: if (vpx_codec_destroy(&decoder)) { fprintf(stderr, "Failed to destroy decoder: %s\n", vpx_codec_error(&decoder)); } fail2: if (!noblit && single_file) { if (do_md5) { MD5Final(md5_digest, &md5_ctx); print_md5(md5_digest, outfile_name); } else { fclose(outfile); } } #if CONFIG_WEBM_IO if (input.vpx_input_ctx->file_type == FILE_TYPE_WEBM) webm_free(input.webm_ctx); #endif if (input.vpx_input_ctx->file_type != FILE_TYPE_WEBM) free(buf); if (scaled_img) vpx_img_free(scaled_img); #if CONFIG_VP9_HIGHBITDEPTH if (img_shifted) vpx_img_free(img_shifted); #endif for (i = 0; i < ext_fb_list.num_external_frame_buffers; ++i) { free(ext_fb_list.ext_fb[i].data); } free(ext_fb_list.ext_fb); fclose(infile); if (framestats_file) fclose(framestats_file); free(argv); return ret; } int main(int argc, const char **argv_) { unsigned int loops = 1, i; char **argv, **argi, **argj; struct arg arg; int error = 0; argv = argv_dup(argc - 1, argv_ + 1); for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { memset(&arg, 0, sizeof(arg)); arg.argv_step = 1; if (arg_match(&arg, &looparg, argi)) { loops = arg_parse_uint(&arg); break; } } free(argv); for (i = 0; !error && i < loops; i++) error = main_loop(argc, argv_); return error; } libvpx-1.8.2/vpxenc.c000066400000000000000000002217411357355204000145100ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpxenc.h" #include "./vpx_config.h" #include #include #include #include #include #include #include #if CONFIG_LIBYUV #include "third_party/libyuv/include/libyuv/scale.h" #endif #include "vpx/vpx_encoder.h" #if CONFIG_DECODERS #include "vpx/vpx_decoder.h" #endif #include "./args.h" #include "./ivfenc.h" #include "./tools_common.h" #if CONFIG_VP8_ENCODER || CONFIG_VP9_ENCODER #include "vpx/vp8cx.h" #endif #if CONFIG_VP8_DECODER || CONFIG_VP9_DECODER #include "vpx/vp8dx.h" #endif #include "vpx/vpx_integer.h" #include "vpx_ports/mem_ops.h" #include "vpx_ports/vpx_timer.h" #include "./rate_hist.h" #include "./vpxstats.h" #include "./warnings.h" #if CONFIG_WEBM_IO #include "./webmenc.h" #endif #include "./y4minput.h" static size_t wrap_fwrite(const void *ptr, size_t size, size_t nmemb, FILE *stream) { return fwrite(ptr, size, nmemb, stream); } #define fwrite wrap_fwrite static const char *exec_name; static void warn_or_exit_on_errorv(vpx_codec_ctx_t *ctx, int fatal, const char *s, va_list ap) { if (ctx->err) { const char *detail = vpx_codec_error_detail(ctx); vfprintf(stderr, s, ap); fprintf(stderr, ": %s\n", vpx_codec_error(ctx)); if (detail) fprintf(stderr, " %s\n", detail); if (fatal) exit(EXIT_FAILURE); } } static void ctx_exit_on_error(vpx_codec_ctx_t *ctx, const char *s, ...) { va_list ap; va_start(ap, s); warn_or_exit_on_errorv(ctx, 1, s, ap); va_end(ap); } static void warn_or_exit_on_error(vpx_codec_ctx_t *ctx, int fatal, const char *s, ...) { va_list ap; va_start(ap, s); warn_or_exit_on_errorv(ctx, fatal, s, ap); va_end(ap); } static const arg_def_t help = ARG_DEF(NULL, "help", 0, "Show usage options and exit"); static const arg_def_t debugmode = ARG_DEF("D", "debug", 0, "Debug mode (makes output deterministic)"); static const arg_def_t outputfile = ARG_DEF("o", "output", 1, "Output filename"); static const arg_def_t use_yv12 = ARG_DEF(NULL, "yv12", 0, "Input file is YV12 "); static const arg_def_t use_i420 = ARG_DEF(NULL, "i420", 0, "Input file is I420 (default)"); static const arg_def_t use_i422 = ARG_DEF(NULL, "i422", 0, "Input file is I422"); static const arg_def_t use_i444 = ARG_DEF(NULL, "i444", 0, "Input file is I444"); static const arg_def_t use_i440 = ARG_DEF(NULL, "i440", 0, "Input file is I440"); static const arg_def_t codecarg = ARG_DEF(NULL, "codec", 1, "Codec to use"); static const arg_def_t passes = ARG_DEF("p", "passes", 1, "Number of passes (1/2)"); static const arg_def_t pass_arg = ARG_DEF(NULL, "pass", 1, "Pass to execute (1/2)"); static const arg_def_t fpf_name = ARG_DEF(NULL, "fpf", 1, "First pass statistics file name"); #if CONFIG_FP_MB_STATS static const arg_def_t fpmbf_name = ARG_DEF(NULL, "fpmbf", 1, "First pass block statistics file name"); #endif static const arg_def_t limit = ARG_DEF(NULL, "limit", 1, "Stop encoding after n input frames"); static const arg_def_t skip = ARG_DEF(NULL, "skip", 1, "Skip the first n input frames"); static const arg_def_t deadline = ARG_DEF("d", "deadline", 1, "Deadline per frame (usec)"); static const arg_def_t best_dl = ARG_DEF(NULL, "best", 0, "Use Best Quality Deadline"); static const arg_def_t good_dl = ARG_DEF(NULL, "good", 0, "Use Good Quality Deadline"); static const arg_def_t rt_dl = ARG_DEF(NULL, "rt", 0, "Use Realtime Quality Deadline"); static const arg_def_t quietarg = ARG_DEF("q", "quiet", 0, "Do not print encode progress"); static const arg_def_t verbosearg = ARG_DEF("v", "verbose", 0, "Show encoder parameters"); static const arg_def_t psnrarg = ARG_DEF(NULL, "psnr", 0, "Show PSNR in status line"); static const struct arg_enum_list test_decode_enum[] = { { "off", TEST_DECODE_OFF }, { "fatal", TEST_DECODE_FATAL }, { "warn", TEST_DECODE_WARN }, { NULL, 0 } }; static const arg_def_t recontest = ARG_DEF_ENUM( NULL, "test-decode", 1, "Test encode/decode mismatch", test_decode_enum); static const arg_def_t framerate = ARG_DEF(NULL, "fps", 1, "Stream frame rate (rate/scale)"); static const arg_def_t use_webm = ARG_DEF(NULL, "webm", 0, "Output WebM (default when WebM IO is enabled)"); static const arg_def_t use_ivf = ARG_DEF(NULL, "ivf", 0, "Output IVF"); static const arg_def_t out_part = ARG_DEF("P", "output-partitions", 0, "Makes encoder output partitions. Requires IVF output!"); static const arg_def_t q_hist_n = ARG_DEF(NULL, "q-hist", 1, "Show quantizer histogram (n-buckets)"); static const arg_def_t rate_hist_n = ARG_DEF(NULL, "rate-hist", 1, "Show rate histogram (n-buckets)"); static const arg_def_t disable_warnings = ARG_DEF(NULL, "disable-warnings", 0, "Disable warnings about potentially incorrect encode settings."); static const arg_def_t disable_warning_prompt = ARG_DEF("y", "disable-warning-prompt", 0, "Display warnings, but do not prompt user to continue."); #if CONFIG_VP9_HIGHBITDEPTH static const arg_def_t test16bitinternalarg = ARG_DEF( NULL, "test-16bit-internal", 0, "Force use of 16 bit internal buffer"); #endif static const arg_def_t *main_args[] = { &help, &debugmode, &outputfile, &codecarg, &passes, &pass_arg, &fpf_name, &limit, &skip, &deadline, &best_dl, &good_dl, &rt_dl, &quietarg, &verbosearg, &psnrarg, &use_webm, &use_ivf, &out_part, &q_hist_n, &rate_hist_n, &disable_warnings, &disable_warning_prompt, &recontest, NULL }; static const arg_def_t usage = ARG_DEF("u", "usage", 1, "Usage profile number to use"); static const arg_def_t threads = ARG_DEF("t", "threads", 1, "Max number of threads to use"); static const arg_def_t profile = ARG_DEF(NULL, "profile", 1, "Bitstream profile number to use"); static const arg_def_t width = ARG_DEF("w", "width", 1, "Frame width"); static const arg_def_t height = ARG_DEF("h", "height", 1, "Frame height"); #if CONFIG_WEBM_IO static const struct arg_enum_list stereo_mode_enum[] = { { "mono", STEREO_FORMAT_MONO }, { "left-right", STEREO_FORMAT_LEFT_RIGHT }, { "bottom-top", STEREO_FORMAT_BOTTOM_TOP }, { "top-bottom", STEREO_FORMAT_TOP_BOTTOM }, { "right-left", STEREO_FORMAT_RIGHT_LEFT }, { NULL, 0 } }; static const arg_def_t stereo_mode = ARG_DEF_ENUM( NULL, "stereo-mode", 1, "Stereo 3D video format", stereo_mode_enum); #endif static const arg_def_t timebase = ARG_DEF( NULL, "timebase", 1, "Output timestamp precision (fractional seconds)"); static const arg_def_t error_resilient = ARG_DEF(NULL, "error-resilient", 1, "Enable error resiliency features"); static const arg_def_t lag_in_frames = ARG_DEF(NULL, "lag-in-frames", 1, "Max number of frames to lag"); static const arg_def_t *global_args[] = { &use_yv12, &use_i420, &use_i422, &use_i444, &use_i440, &usage, &threads, &profile, &width, &height, #if CONFIG_WEBM_IO &stereo_mode, #endif &timebase, &framerate, &error_resilient, #if CONFIG_VP9_HIGHBITDEPTH &test16bitinternalarg, #endif &lag_in_frames, NULL }; static const arg_def_t dropframe_thresh = ARG_DEF(NULL, "drop-frame", 1, "Temporal resampling threshold (buf %)"); static const arg_def_t resize_allowed = ARG_DEF(NULL, "resize-allowed", 1, "Spatial resampling enabled (bool)"); static const arg_def_t resize_width = ARG_DEF(NULL, "resize-width", 1, "Width of encoded frame"); static const arg_def_t resize_height = ARG_DEF(NULL, "resize-height", 1, "Height of encoded frame"); static const arg_def_t resize_up_thresh = ARG_DEF(NULL, "resize-up", 1, "Upscale threshold (buf %)"); static const arg_def_t resize_down_thresh = ARG_DEF(NULL, "resize-down", 1, "Downscale threshold (buf %)"); static const struct arg_enum_list end_usage_enum[] = { { "vbr", VPX_VBR }, { "cbr", VPX_CBR }, { "cq", VPX_CQ }, { "q", VPX_Q }, { NULL, 0 } }; static const arg_def_t end_usage = ARG_DEF_ENUM(NULL, "end-usage", 1, "Rate control mode", end_usage_enum); static const arg_def_t target_bitrate = ARG_DEF(NULL, "target-bitrate", 1, "Bitrate (kbps)"); static const arg_def_t min_quantizer = ARG_DEF(NULL, "min-q", 1, "Minimum (best) quantizer"); static const arg_def_t max_quantizer = ARG_DEF(NULL, "max-q", 1, "Maximum (worst) quantizer"); static const arg_def_t undershoot_pct = ARG_DEF(NULL, "undershoot-pct", 1, "Datarate undershoot (min) target (%)"); static const arg_def_t overshoot_pct = ARG_DEF(NULL, "overshoot-pct", 1, "Datarate overshoot (max) target (%)"); static const arg_def_t buf_sz = ARG_DEF(NULL, "buf-sz", 1, "Client buffer size (ms)"); static const arg_def_t buf_initial_sz = ARG_DEF(NULL, "buf-initial-sz", 1, "Client initial buffer size (ms)"); static const arg_def_t buf_optimal_sz = ARG_DEF(NULL, "buf-optimal-sz", 1, "Client optimal buffer size (ms)"); static const arg_def_t *rc_args[] = { &dropframe_thresh, &resize_allowed, &resize_width, &resize_height, &resize_up_thresh, &resize_down_thresh, &end_usage, &target_bitrate, &min_quantizer, &max_quantizer, &undershoot_pct, &overshoot_pct, &buf_sz, &buf_initial_sz, &buf_optimal_sz, NULL }; static const arg_def_t bias_pct = ARG_DEF(NULL, "bias-pct", 1, "CBR/VBR bias (0=CBR, 100=VBR)"); static const arg_def_t minsection_pct = ARG_DEF(NULL, "minsection-pct", 1, "GOP min bitrate (% of target)"); static const arg_def_t maxsection_pct = ARG_DEF(NULL, "maxsection-pct", 1, "GOP max bitrate (% of target)"); static const arg_def_t corpus_complexity = ARG_DEF(NULL, "corpus-complexity", 1, "corpus vbr complexity midpoint"); static const arg_def_t *rc_twopass_args[] = { &bias_pct, &minsection_pct, &maxsection_pct, &corpus_complexity, NULL }; static const arg_def_t kf_min_dist = ARG_DEF(NULL, "kf-min-dist", 1, "Minimum keyframe interval (frames)"); static const arg_def_t kf_max_dist = ARG_DEF(NULL, "kf-max-dist", 1, "Maximum keyframe interval (frames)"); static const arg_def_t kf_disabled = ARG_DEF(NULL, "disable-kf", 0, "Disable keyframe placement"); static const arg_def_t *kf_args[] = { &kf_min_dist, &kf_max_dist, &kf_disabled, NULL }; static const arg_def_t noise_sens = ARG_DEF(NULL, "noise-sensitivity", 1, "Noise sensitivity (frames to blur)"); static const arg_def_t sharpness = ARG_DEF(NULL, "sharpness", 1, "Increase sharpness at the expense of lower PSNR. (0..7)"); static const arg_def_t static_thresh = ARG_DEF(NULL, "static-thresh", 1, "Motion detection threshold"); static const arg_def_t arnr_maxframes = ARG_DEF(NULL, "arnr-maxframes", 1, "AltRef max frames (0..15)"); static const arg_def_t arnr_strength = ARG_DEF(NULL, "arnr-strength", 1, "AltRef filter strength (0..6)"); static const arg_def_t arnr_type = ARG_DEF(NULL, "arnr-type", 1, "AltRef filter type (1..3)"); static const struct arg_enum_list tuning_enum[] = { { "psnr", VP8_TUNE_PSNR }, { "ssim", VP8_TUNE_SSIM }, { NULL, 0 } }; static const arg_def_t tune_ssim = ARG_DEF_ENUM(NULL, "tune", 1, "Material to favor", tuning_enum); static const arg_def_t cq_level = ARG_DEF(NULL, "cq-level", 1, "Constant/Constrained Quality level"); static const arg_def_t max_intra_rate_pct = ARG_DEF(NULL, "max-intra-rate", 1, "Max I-frame bitrate (pct)"); static const arg_def_t gf_cbr_boost_pct = ARG_DEF( NULL, "gf-cbr-boost", 1, "Boost for Golden Frame in CBR mode (pct)"); #if CONFIG_VP8_ENCODER static const arg_def_t cpu_used_vp8 = ARG_DEF(NULL, "cpu-used", 1, "CPU Used (-16..16)"); static const arg_def_t auto_altref_vp8 = ARG_DEF( NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames. (0..1)"); static const arg_def_t token_parts = ARG_DEF(NULL, "token-parts", 1, "Number of token partitions to use, log2"); static const arg_def_t screen_content_mode = ARG_DEF(NULL, "screen-content-mode", 1, "Screen content mode"); static const arg_def_t *vp8_args[] = { &cpu_used_vp8, &auto_altref_vp8, &noise_sens, &sharpness, &static_thresh, &token_parts, &arnr_maxframes, &arnr_strength, &arnr_type, &tune_ssim, &cq_level, &max_intra_rate_pct, &gf_cbr_boost_pct, &screen_content_mode, NULL }; static const int vp8_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, VP8E_SET_NOISE_SENSITIVITY, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, VP8E_SET_TOKEN_PARTITIONS, VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, VP8E_SET_GF_CBR_BOOST_PCT, VP8E_SET_SCREEN_CONTENT_MODE, 0 }; #endif #if CONFIG_VP9_ENCODER static const arg_def_t cpu_used_vp9 = ARG_DEF(NULL, "cpu-used", 1, "CPU Used (-9..9)"); static const arg_def_t auto_altref_vp9 = ARG_DEF( NULL, "auto-alt-ref", 1, "Enable automatic alt reference frames, 2+ enables multi-layer. (0..6)"); static const arg_def_t tile_cols = ARG_DEF(NULL, "tile-columns", 1, "Number of tile columns to use, log2"); static const arg_def_t tile_rows = ARG_DEF(NULL, "tile-rows", 1, "Number of tile rows to use, log2 (set to 0 while threads > 1)"); static const arg_def_t enable_tpl_model = ARG_DEF(NULL, "enable-tpl", 1, "Enable temporal dependency model"); static const arg_def_t lossless = ARG_DEF(NULL, "lossless", 1, "Lossless mode (0: false (default), 1: true)"); static const arg_def_t frame_parallel_decoding = ARG_DEF( NULL, "frame-parallel", 1, "Enable frame parallel decodability features"); static const arg_def_t aq_mode = ARG_DEF( NULL, "aq-mode", 1, "Adaptive quantization mode (0: off (default), 1: variance 2: complexity, " "3: cyclic refresh, 4: equator360)"); static const arg_def_t alt_ref_aq = ARG_DEF(NULL, "alt-ref-aq", 1, "Special adaptive quantization for " "the alternate reference frames."); static const arg_def_t frame_periodic_boost = ARG_DEF(NULL, "frame-boost", 1, "Enable frame periodic boost (0: off (default), 1: on)"); static const arg_def_t max_inter_rate_pct = ARG_DEF(NULL, "max-inter-rate", 1, "Max P-frame bitrate (pct)"); static const arg_def_t min_gf_interval = ARG_DEF( NULL, "min-gf-interval", 1, "min gf/arf frame interval (default 0, indicating in-built behavior)"); static const arg_def_t max_gf_interval = ARG_DEF( NULL, "max-gf-interval", 1, "max gf/arf frame interval (default 0, indicating in-built behavior)"); static const struct arg_enum_list color_space_enum[] = { { "unknown", VPX_CS_UNKNOWN }, { "bt601", VPX_CS_BT_601 }, { "bt709", VPX_CS_BT_709 }, { "smpte170", VPX_CS_SMPTE_170 }, { "smpte240", VPX_CS_SMPTE_240 }, { "bt2020", VPX_CS_BT_2020 }, { "reserved", VPX_CS_RESERVED }, { "sRGB", VPX_CS_SRGB }, { NULL, 0 } }; static const arg_def_t input_color_space = ARG_DEF_ENUM(NULL, "color-space", 1, "The color space of input content:", color_space_enum); #if CONFIG_VP9_HIGHBITDEPTH static const struct arg_enum_list bitdepth_enum[] = { { "8", VPX_BITS_8 }, { "10", VPX_BITS_10 }, { "12", VPX_BITS_12 }, { NULL, 0 } }; static const arg_def_t bitdeptharg = ARG_DEF_ENUM( "b", "bit-depth", 1, "Bit depth for codec (8 for version <=1, 10 or 12 for version 2)", bitdepth_enum); static const arg_def_t inbitdeptharg = ARG_DEF(NULL, "input-bit-depth", 1, "Bit depth of input"); #endif static const struct arg_enum_list tune_content_enum[] = { { "default", VP9E_CONTENT_DEFAULT }, { "screen", VP9E_CONTENT_SCREEN }, { "film", VP9E_CONTENT_FILM }, { NULL, 0 } }; static const arg_def_t tune_content = ARG_DEF_ENUM( NULL, "tune-content", 1, "Tune content type", tune_content_enum); static const arg_def_t target_level = ARG_DEF( NULL, "target-level", 1, "Target level\n" " 255: off (default)\n" " 0: only keep level stats\n" " 1: adaptively set alt-ref " "distance and column tile limit based on picture size, and keep" " level stats\n" " 10: level 1.0 11: level 1.1 " "... 62: level 6.2"); static const arg_def_t row_mt = ARG_DEF(NULL, "row-mt", 1, "Enable row based non-deterministic multi-threading in VP9"); #endif #if CONFIG_VP9_ENCODER static const arg_def_t *vp9_args[] = { &cpu_used_vp9, &auto_altref_vp9, &sharpness, &static_thresh, &tile_cols, &tile_rows, &enable_tpl_model, &arnr_maxframes, &arnr_strength, &arnr_type, &tune_ssim, &cq_level, &max_intra_rate_pct, &max_inter_rate_pct, &gf_cbr_boost_pct, &lossless, &frame_parallel_decoding, &aq_mode, &alt_ref_aq, &frame_periodic_boost, &noise_sens, &tune_content, &input_color_space, &min_gf_interval, &max_gf_interval, &target_level, &row_mt, #if CONFIG_VP9_HIGHBITDEPTH &bitdeptharg, &inbitdeptharg, #endif // CONFIG_VP9_HIGHBITDEPTH NULL }; static const int vp9_arg_ctrl_map[] = { VP8E_SET_CPUUSED, VP8E_SET_ENABLEAUTOALTREF, VP8E_SET_SHARPNESS, VP8E_SET_STATIC_THRESHOLD, VP9E_SET_TILE_COLUMNS, VP9E_SET_TILE_ROWS, VP9E_SET_TPL, VP8E_SET_ARNR_MAXFRAMES, VP8E_SET_ARNR_STRENGTH, VP8E_SET_ARNR_TYPE, VP8E_SET_TUNING, VP8E_SET_CQ_LEVEL, VP8E_SET_MAX_INTRA_BITRATE_PCT, VP9E_SET_MAX_INTER_BITRATE_PCT, VP9E_SET_GF_CBR_BOOST_PCT, VP9E_SET_LOSSLESS, VP9E_SET_FRAME_PARALLEL_DECODING, VP9E_SET_AQ_MODE, VP9E_SET_ALT_REF_AQ, VP9E_SET_FRAME_PERIODIC_BOOST, VP9E_SET_NOISE_SENSITIVITY, VP9E_SET_TUNE_CONTENT, VP9E_SET_COLOR_SPACE, VP9E_SET_MIN_GF_INTERVAL, VP9E_SET_MAX_GF_INTERVAL, VP9E_SET_TARGET_LEVEL, VP9E_SET_ROW_MT, 0 }; #endif static const arg_def_t *no_args[] = { NULL }; static void show_help(FILE *fout, int shorthelp) { int i; const int num_encoder = get_vpx_encoder_count(); fprintf(fout, "Usage: %s -o dst_filename src_filename \n", exec_name); if (shorthelp) { fprintf(fout, "Use --help to see the full list of options.\n"); return; } fprintf(fout, "\nOptions:\n"); arg_show_usage(fout, main_args); fprintf(fout, "\nEncoder Global Options:\n"); arg_show_usage(fout, global_args); fprintf(fout, "\nRate Control Options:\n"); arg_show_usage(fout, rc_args); fprintf(fout, "\nTwopass Rate Control Options:\n"); arg_show_usage(fout, rc_twopass_args); fprintf(fout, "\nKeyframe Placement Options:\n"); arg_show_usage(fout, kf_args); #if CONFIG_VP8_ENCODER fprintf(fout, "\nVP8 Specific Options:\n"); arg_show_usage(fout, vp8_args); #endif #if CONFIG_VP9_ENCODER fprintf(fout, "\nVP9 Specific Options:\n"); arg_show_usage(fout, vp9_args); #endif fprintf(fout, "\nStream timebase (--timebase):\n" " The desired precision of timestamps in the output, expressed\n" " in fractional seconds. Default is 1/1000.\n"); fprintf(fout, "\nIncluded encoders:\n\n"); for (i = 0; i < num_encoder; ++i) { const VpxInterface *const encoder = get_vpx_encoder_by_index(i); const char *defstr = (i == (num_encoder - 1)) ? "(default)" : ""; fprintf(fout, " %-6s - %s %s\n", encoder->name, vpx_codec_iface_name(encoder->codec_interface()), defstr); } fprintf(fout, "\n "); fprintf(fout, "Use --codec to switch to a non-default encoder.\n\n"); } void usage_exit(void) { show_help(stderr, 1); exit(EXIT_FAILURE); } #define NELEMENTS(x) (sizeof(x) / sizeof(x[0])) #if CONFIG_VP9_ENCODER #define ARG_CTRL_CNT_MAX NELEMENTS(vp9_arg_ctrl_map) #else #define ARG_CTRL_CNT_MAX NELEMENTS(vp8_arg_ctrl_map) #endif #if !CONFIG_WEBM_IO typedef int stereo_format_t; struct WebmOutputContext { int debug; }; #endif /* Per-stream configuration */ struct stream_config { struct vpx_codec_enc_cfg cfg; const char *out_fn; const char *stats_fn; #if CONFIG_FP_MB_STATS const char *fpmb_stats_fn; #endif stereo_format_t stereo_fmt; int arg_ctrls[ARG_CTRL_CNT_MAX][2]; int arg_ctrl_cnt; int write_webm; #if CONFIG_VP9_HIGHBITDEPTH // whether to use 16bit internal buffers int use_16bit_internal; #endif }; struct stream_state { int index; struct stream_state *next; struct stream_config config; FILE *file; struct rate_hist *rate_hist; struct WebmOutputContext webm_ctx; uint64_t psnr_sse_total; uint64_t psnr_samples_total; double psnr_totals[4]; int psnr_count; int counts[64]; vpx_codec_ctx_t encoder; unsigned int frames_out; uint64_t cx_time; size_t nbytes; stats_io_t stats; #if CONFIG_FP_MB_STATS stats_io_t fpmb_stats; #endif struct vpx_image *img; vpx_codec_ctx_t decoder; int mismatch_seen; }; static void validate_positive_rational(const char *msg, struct vpx_rational *rat) { if (rat->den < 0) { rat->num *= -1; rat->den *= -1; } if (rat->num < 0) die("Error: %s must be positive\n", msg); if (!rat->den) die("Error: %s has zero denominator\n", msg); } static void parse_global_config(struct VpxEncoderConfig *global, char **argv) { char **argi, **argj; struct arg arg; const int num_encoder = get_vpx_encoder_count(); if (num_encoder < 1) die("Error: no valid encoder available\n"); /* Initialize default parameters */ memset(global, 0, sizeof(*global)); global->codec = get_vpx_encoder_by_index(num_encoder - 1); global->passes = 0; global->color_type = I420; /* Assign default deadline to good quality */ global->deadline = VPX_DL_GOOD_QUALITY; for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { arg.argv_step = 1; if (arg_match(&arg, &help, argi)) { show_help(stdout, 0); exit(EXIT_SUCCESS); } else if (arg_match(&arg, &codecarg, argi)) { global->codec = get_vpx_encoder_by_name(arg.val); if (!global->codec) die("Error: Unrecognized argument (%s) to --codec\n", arg.val); } else if (arg_match(&arg, &passes, argi)) { global->passes = arg_parse_uint(&arg); if (global->passes < 1 || global->passes > 2) die("Error: Invalid number of passes (%d)\n", global->passes); } else if (arg_match(&arg, &pass_arg, argi)) { global->pass = arg_parse_uint(&arg); if (global->pass < 1 || global->pass > 2) die("Error: Invalid pass selected (%d)\n", global->pass); } else if (arg_match(&arg, &usage, argi)) global->usage = arg_parse_uint(&arg); else if (arg_match(&arg, &deadline, argi)) global->deadline = arg_parse_uint(&arg); else if (arg_match(&arg, &best_dl, argi)) global->deadline = VPX_DL_BEST_QUALITY; else if (arg_match(&arg, &good_dl, argi)) global->deadline = VPX_DL_GOOD_QUALITY; else if (arg_match(&arg, &rt_dl, argi)) global->deadline = VPX_DL_REALTIME; else if (arg_match(&arg, &use_yv12, argi)) global->color_type = YV12; else if (arg_match(&arg, &use_i420, argi)) global->color_type = I420; else if (arg_match(&arg, &use_i422, argi)) global->color_type = I422; else if (arg_match(&arg, &use_i444, argi)) global->color_type = I444; else if (arg_match(&arg, &use_i440, argi)) global->color_type = I440; else if (arg_match(&arg, &quietarg, argi)) global->quiet = 1; else if (arg_match(&arg, &verbosearg, argi)) global->verbose = 1; else if (arg_match(&arg, &limit, argi)) global->limit = arg_parse_uint(&arg); else if (arg_match(&arg, &skip, argi)) global->skip_frames = arg_parse_uint(&arg); else if (arg_match(&arg, &psnrarg, argi)) global->show_psnr = 1; else if (arg_match(&arg, &recontest, argi)) global->test_decode = arg_parse_enum_or_int(&arg); else if (arg_match(&arg, &framerate, argi)) { global->framerate = arg_parse_rational(&arg); validate_positive_rational(arg.name, &global->framerate); global->have_framerate = 1; } else if (arg_match(&arg, &out_part, argi)) global->out_part = 1; else if (arg_match(&arg, &debugmode, argi)) global->debug = 1; else if (arg_match(&arg, &q_hist_n, argi)) global->show_q_hist_buckets = arg_parse_uint(&arg); else if (arg_match(&arg, &rate_hist_n, argi)) global->show_rate_hist_buckets = arg_parse_uint(&arg); else if (arg_match(&arg, &disable_warnings, argi)) global->disable_warnings = 1; else if (arg_match(&arg, &disable_warning_prompt, argi)) global->disable_warning_prompt = 1; else argj++; } if (global->pass) { /* DWIM: Assume the user meant passes=2 if pass=2 is specified */ if (global->pass > global->passes) { warn("Assuming --pass=%d implies --passes=%d\n", global->pass, global->pass); global->passes = global->pass; } } /* Validate global config */ if (global->passes == 0) { #if CONFIG_VP9_ENCODER // Make default VP9 passes = 2 until there is a better quality 1-pass // encoder if (global->codec != NULL && global->codec->name != NULL) global->passes = (strcmp(global->codec->name, "vp9") == 0 && global->deadline != VPX_DL_REALTIME) ? 2 : 1; #else global->passes = 1; #endif } if (global->deadline == VPX_DL_REALTIME && global->passes > 1) { warn("Enforcing one-pass encoding in realtime mode\n"); global->passes = 1; } } static struct stream_state *new_stream(struct VpxEncoderConfig *global, struct stream_state *prev) { struct stream_state *stream; stream = calloc(1, sizeof(*stream)); if (stream == NULL) { fatal("Failed to allocate new stream."); } if (prev) { memcpy(stream, prev, sizeof(*stream)); stream->index++; prev->next = stream; } else { vpx_codec_err_t res; /* Populate encoder configuration */ res = vpx_codec_enc_config_default(global->codec->codec_interface(), &stream->config.cfg, global->usage); if (res) fatal("Failed to get config: %s\n", vpx_codec_err_to_string(res)); /* Change the default timebase to a high enough value so that the * encoder will always create strictly increasing timestamps. */ stream->config.cfg.g_timebase.den = 1000; /* Never use the library's default resolution, require it be parsed * from the file or set on the command line. */ stream->config.cfg.g_w = 0; stream->config.cfg.g_h = 0; /* Initialize remaining stream parameters */ stream->config.write_webm = 1; #if CONFIG_WEBM_IO stream->config.stereo_fmt = STEREO_FORMAT_MONO; stream->webm_ctx.last_pts_ns = -1; stream->webm_ctx.writer = NULL; stream->webm_ctx.segment = NULL; #endif /* Allows removal of the application version from the EBML tags */ stream->webm_ctx.debug = global->debug; /* Default lag_in_frames is 0 in realtime mode CBR mode*/ if (global->deadline == VPX_DL_REALTIME && stream->config.cfg.rc_end_usage == 1) stream->config.cfg.g_lag_in_frames = 0; } /* Output files must be specified for each stream */ stream->config.out_fn = NULL; stream->next = NULL; return stream; } static int parse_stream_params(struct VpxEncoderConfig *global, struct stream_state *stream, char **argv) { char **argi, **argj; struct arg arg; static const arg_def_t **ctrl_args = no_args; static const int *ctrl_args_map = NULL; struct stream_config *config = &stream->config; int eos_mark_found = 0; #if CONFIG_VP9_HIGHBITDEPTH int test_16bit_internal = 0; #endif // Handle codec specific options if (0) { #if CONFIG_VP8_ENCODER } else if (strcmp(global->codec->name, "vp8") == 0) { ctrl_args = vp8_args; ctrl_args_map = vp8_arg_ctrl_map; #endif #if CONFIG_VP9_ENCODER } else if (strcmp(global->codec->name, "vp9") == 0) { ctrl_args = vp9_args; ctrl_args_map = vp9_arg_ctrl_map; #endif } for (argi = argj = argv; (*argj = *argi); argi += arg.argv_step) { arg.argv_step = 1; /* Once we've found an end-of-stream marker (--) we want to continue * shifting arguments but not consuming them. */ if (eos_mark_found) { argj++; continue; } else if (!strcmp(*argj, "--")) { eos_mark_found = 1; continue; } if (arg_match(&arg, &outputfile, argi)) { config->out_fn = arg.val; } else if (arg_match(&arg, &fpf_name, argi)) { config->stats_fn = arg.val; #if CONFIG_FP_MB_STATS } else if (arg_match(&arg, &fpmbf_name, argi)) { config->fpmb_stats_fn = arg.val; #endif } else if (arg_match(&arg, &use_webm, argi)) { #if CONFIG_WEBM_IO config->write_webm = 1; #else die("Error: --webm specified but webm is disabled."); #endif } else if (arg_match(&arg, &use_ivf, argi)) { config->write_webm = 0; } else if (arg_match(&arg, &threads, argi)) { config->cfg.g_threads = arg_parse_uint(&arg); } else if (arg_match(&arg, &profile, argi)) { config->cfg.g_profile = arg_parse_uint(&arg); } else if (arg_match(&arg, &width, argi)) { config->cfg.g_w = arg_parse_uint(&arg); } else if (arg_match(&arg, &height, argi)) { config->cfg.g_h = arg_parse_uint(&arg); #if CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &bitdeptharg, argi)) { config->cfg.g_bit_depth = arg_parse_enum_or_int(&arg); } else if (arg_match(&arg, &inbitdeptharg, argi)) { config->cfg.g_input_bit_depth = arg_parse_uint(&arg); #endif #if CONFIG_WEBM_IO } else if (arg_match(&arg, &stereo_mode, argi)) { config->stereo_fmt = arg_parse_enum_or_int(&arg); #endif } else if (arg_match(&arg, &timebase, argi)) { config->cfg.g_timebase = arg_parse_rational(&arg); validate_positive_rational(arg.name, &config->cfg.g_timebase); } else if (arg_match(&arg, &error_resilient, argi)) { config->cfg.g_error_resilient = arg_parse_uint(&arg); } else if (arg_match(&arg, &end_usage, argi)) { config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg); } else if (arg_match(&arg, &lag_in_frames, argi)) { config->cfg.g_lag_in_frames = arg_parse_uint(&arg); if (global->deadline == VPX_DL_REALTIME && config->cfg.rc_end_usage == VPX_CBR && config->cfg.g_lag_in_frames != 0) { warn("non-zero %s option ignored in realtime CBR mode.\n", arg.name); config->cfg.g_lag_in_frames = 0; } } else if (arg_match(&arg, &dropframe_thresh, argi)) { config->cfg.rc_dropframe_thresh = arg_parse_uint(&arg); } else if (arg_match(&arg, &resize_allowed, argi)) { config->cfg.rc_resize_allowed = arg_parse_uint(&arg); } else if (arg_match(&arg, &resize_width, argi)) { config->cfg.rc_scaled_width = arg_parse_uint(&arg); } else if (arg_match(&arg, &resize_height, argi)) { config->cfg.rc_scaled_height = arg_parse_uint(&arg); } else if (arg_match(&arg, &resize_up_thresh, argi)) { config->cfg.rc_resize_up_thresh = arg_parse_uint(&arg); } else if (arg_match(&arg, &resize_down_thresh, argi)) { config->cfg.rc_resize_down_thresh = arg_parse_uint(&arg); } else if (arg_match(&arg, &end_usage, argi)) { config->cfg.rc_end_usage = arg_parse_enum_or_int(&arg); } else if (arg_match(&arg, &target_bitrate, argi)) { config->cfg.rc_target_bitrate = arg_parse_uint(&arg); } else if (arg_match(&arg, &min_quantizer, argi)) { config->cfg.rc_min_quantizer = arg_parse_uint(&arg); } else if (arg_match(&arg, &max_quantizer, argi)) { config->cfg.rc_max_quantizer = arg_parse_uint(&arg); } else if (arg_match(&arg, &undershoot_pct, argi)) { config->cfg.rc_undershoot_pct = arg_parse_uint(&arg); } else if (arg_match(&arg, &overshoot_pct, argi)) { config->cfg.rc_overshoot_pct = arg_parse_uint(&arg); } else if (arg_match(&arg, &buf_sz, argi)) { config->cfg.rc_buf_sz = arg_parse_uint(&arg); } else if (arg_match(&arg, &buf_initial_sz, argi)) { config->cfg.rc_buf_initial_sz = arg_parse_uint(&arg); } else if (arg_match(&arg, &buf_optimal_sz, argi)) { config->cfg.rc_buf_optimal_sz = arg_parse_uint(&arg); } else if (arg_match(&arg, &bias_pct, argi)) { config->cfg.rc_2pass_vbr_bias_pct = arg_parse_uint(&arg); if (global->passes < 2) warn("option %s ignored in one-pass mode.\n", arg.name); } else if (arg_match(&arg, &minsection_pct, argi)) { config->cfg.rc_2pass_vbr_minsection_pct = arg_parse_uint(&arg); if (global->passes < 2) warn("option %s ignored in one-pass mode.\n", arg.name); } else if (arg_match(&arg, &maxsection_pct, argi)) { config->cfg.rc_2pass_vbr_maxsection_pct = arg_parse_uint(&arg); if (global->passes < 2) warn("option %s ignored in one-pass mode.\n", arg.name); } else if (arg_match(&arg, &corpus_complexity, argi)) { config->cfg.rc_2pass_vbr_corpus_complexity = arg_parse_uint(&arg); if (global->passes < 2) warn("option %s ignored in one-pass mode.\n", arg.name); } else if (arg_match(&arg, &kf_min_dist, argi)) { config->cfg.kf_min_dist = arg_parse_uint(&arg); } else if (arg_match(&arg, &kf_max_dist, argi)) { config->cfg.kf_max_dist = arg_parse_uint(&arg); } else if (arg_match(&arg, &kf_disabled, argi)) { config->cfg.kf_mode = VPX_KF_DISABLED; #if CONFIG_VP9_HIGHBITDEPTH } else if (arg_match(&arg, &test16bitinternalarg, argi)) { if (strcmp(global->codec->name, "vp9") == 0) { test_16bit_internal = 1; } #endif } else { int i, match = 0; for (i = 0; ctrl_args[i]; i++) { if (arg_match(&arg, ctrl_args[i], argi)) { int j; match = 1; /* Point either to the next free element or the first * instance of this control. */ for (j = 0; j < config->arg_ctrl_cnt; j++) if (ctrl_args_map != NULL && config->arg_ctrls[j][0] == ctrl_args_map[i]) break; /* Update/insert */ assert(j < (int)ARG_CTRL_CNT_MAX); if (ctrl_args_map != NULL && j < (int)ARG_CTRL_CNT_MAX) { config->arg_ctrls[j][0] = ctrl_args_map[i]; config->arg_ctrls[j][1] = arg_parse_enum_or_int(&arg); if (j == config->arg_ctrl_cnt) config->arg_ctrl_cnt++; } } } if (!match) argj++; } } #if CONFIG_VP9_HIGHBITDEPTH if (strcmp(global->codec->name, "vp9") == 0) { config->use_16bit_internal = test_16bit_internal | (config->cfg.g_profile > 1); } #endif return eos_mark_found; } #define FOREACH_STREAM(func) \ do { \ struct stream_state *stream; \ for (stream = streams; stream; stream = stream->next) { \ func; \ } \ } while (0) static void validate_stream_config(const struct stream_state *stream, const struct VpxEncoderConfig *global) { const struct stream_state *streami; (void)global; if (!stream->config.cfg.g_w || !stream->config.cfg.g_h) fatal( "Stream %d: Specify stream dimensions with --width (-w) " " and --height (-h)", stream->index); // Check that the codec bit depth is greater than the input bit depth. if (stream->config.cfg.g_input_bit_depth > (unsigned int)stream->config.cfg.g_bit_depth) { fatal("Stream %d: codec bit depth (%d) less than input bit depth (%d)", stream->index, (int)stream->config.cfg.g_bit_depth, stream->config.cfg.g_input_bit_depth); } for (streami = stream; streami; streami = streami->next) { /* All streams require output files */ if (!streami->config.out_fn) fatal("Stream %d: Output file is required (specify with -o)", streami->index); /* Check for two streams outputting to the same file */ if (streami != stream) { const char *a = stream->config.out_fn; const char *b = streami->config.out_fn; if (!strcmp(a, b) && strcmp(a, "/dev/null") && strcmp(a, ":nul")) fatal("Stream %d: duplicate output file (from stream %d)", streami->index, stream->index); } /* Check for two streams sharing a stats file. */ if (streami != stream) { const char *a = stream->config.stats_fn; const char *b = streami->config.stats_fn; if (a && b && !strcmp(a, b)) fatal("Stream %d: duplicate stats file (from stream %d)", streami->index, stream->index); } #if CONFIG_FP_MB_STATS /* Check for two streams sharing a mb stats file. */ if (streami != stream) { const char *a = stream->config.fpmb_stats_fn; const char *b = streami->config.fpmb_stats_fn; if (a && b && !strcmp(a, b)) fatal("Stream %d: duplicate mb stats file (from stream %d)", streami->index, stream->index); } #endif } } static void set_stream_dimensions(struct stream_state *stream, unsigned int w, unsigned int h) { if (!stream->config.cfg.g_w) { if (!stream->config.cfg.g_h) stream->config.cfg.g_w = w; else stream->config.cfg.g_w = w * stream->config.cfg.g_h / h; } if (!stream->config.cfg.g_h) { stream->config.cfg.g_h = h * stream->config.cfg.g_w / w; } } static const char *file_type_to_string(enum VideoFileType t) { switch (t) { case FILE_TYPE_RAW: return "RAW"; case FILE_TYPE_Y4M: return "Y4M"; default: return "Other"; } } static const char *image_format_to_string(vpx_img_fmt_t f) { switch (f) { case VPX_IMG_FMT_I420: return "I420"; case VPX_IMG_FMT_I422: return "I422"; case VPX_IMG_FMT_I444: return "I444"; case VPX_IMG_FMT_I440: return "I440"; case VPX_IMG_FMT_YV12: return "YV12"; case VPX_IMG_FMT_I42016: return "I42016"; case VPX_IMG_FMT_I42216: return "I42216"; case VPX_IMG_FMT_I44416: return "I44416"; case VPX_IMG_FMT_I44016: return "I44016"; default: return "Other"; } } static void show_stream_config(struct stream_state *stream, struct VpxEncoderConfig *global, struct VpxInputContext *input) { #define SHOW(field) \ fprintf(stderr, " %-28s = %d\n", #field, stream->config.cfg.field) if (stream->index == 0) { fprintf(stderr, "Codec: %s\n", vpx_codec_iface_name(global->codec->codec_interface())); fprintf(stderr, "Source file: %s File Type: %s Format: %s\n", input->filename, file_type_to_string(input->file_type), image_format_to_string(input->fmt)); } if (stream->next || stream->index) fprintf(stderr, "\nStream Index: %d\n", stream->index); fprintf(stderr, "Destination file: %s\n", stream->config.out_fn); fprintf(stderr, "Encoder parameters:\n"); SHOW(g_usage); SHOW(g_threads); SHOW(g_profile); SHOW(g_w); SHOW(g_h); SHOW(g_bit_depth); SHOW(g_input_bit_depth); SHOW(g_timebase.num); SHOW(g_timebase.den); SHOW(g_error_resilient); SHOW(g_pass); SHOW(g_lag_in_frames); SHOW(rc_dropframe_thresh); SHOW(rc_resize_allowed); SHOW(rc_scaled_width); SHOW(rc_scaled_height); SHOW(rc_resize_up_thresh); SHOW(rc_resize_down_thresh); SHOW(rc_end_usage); SHOW(rc_target_bitrate); SHOW(rc_min_quantizer); SHOW(rc_max_quantizer); SHOW(rc_undershoot_pct); SHOW(rc_overshoot_pct); SHOW(rc_buf_sz); SHOW(rc_buf_initial_sz); SHOW(rc_buf_optimal_sz); SHOW(rc_2pass_vbr_bias_pct); SHOW(rc_2pass_vbr_minsection_pct); SHOW(rc_2pass_vbr_maxsection_pct); SHOW(rc_2pass_vbr_corpus_complexity); SHOW(kf_mode); SHOW(kf_min_dist); SHOW(kf_max_dist); } static void open_output_file(struct stream_state *stream, struct VpxEncoderConfig *global, const struct VpxRational *pixel_aspect_ratio) { const char *fn = stream->config.out_fn; const struct vpx_codec_enc_cfg *const cfg = &stream->config.cfg; if (cfg->g_pass == VPX_RC_FIRST_PASS) return; stream->file = strcmp(fn, "-") ? fopen(fn, "wb") : set_binary_mode(stdout); if (!stream->file) fatal("Failed to open output file"); if (stream->config.write_webm && fseek(stream->file, 0, SEEK_CUR)) fatal("WebM output to pipes not supported."); #if CONFIG_WEBM_IO if (stream->config.write_webm) { stream->webm_ctx.stream = stream->file; write_webm_file_header(&stream->webm_ctx, cfg, stream->config.stereo_fmt, global->codec->fourcc, pixel_aspect_ratio); } #else (void)pixel_aspect_ratio; #endif if (!stream->config.write_webm) { ivf_write_file_header(stream->file, cfg, global->codec->fourcc, 0); } } static void close_output_file(struct stream_state *stream, unsigned int fourcc) { const struct vpx_codec_enc_cfg *const cfg = &stream->config.cfg; if (cfg->g_pass == VPX_RC_FIRST_PASS) return; #if CONFIG_WEBM_IO if (stream->config.write_webm) { write_webm_file_footer(&stream->webm_ctx); } #endif if (!stream->config.write_webm) { if (!fseek(stream->file, 0, SEEK_SET)) ivf_write_file_header(stream->file, &stream->config.cfg, fourcc, stream->frames_out); } fclose(stream->file); } static void setup_pass(struct stream_state *stream, struct VpxEncoderConfig *global, int pass) { if (stream->config.stats_fn) { if (!stats_open_file(&stream->stats, stream->config.stats_fn, pass)) fatal("Failed to open statistics store"); } else { if (!stats_open_mem(&stream->stats, pass)) fatal("Failed to open statistics store"); } #if CONFIG_FP_MB_STATS if (stream->config.fpmb_stats_fn) { if (!stats_open_file(&stream->fpmb_stats, stream->config.fpmb_stats_fn, pass)) fatal("Failed to open mb statistics store"); } else { if (!stats_open_mem(&stream->fpmb_stats, pass)) fatal("Failed to open mb statistics store"); } #endif stream->config.cfg.g_pass = global->passes == 2 ? pass ? VPX_RC_LAST_PASS : VPX_RC_FIRST_PASS : VPX_RC_ONE_PASS; if (pass) { stream->config.cfg.rc_twopass_stats_in = stats_get(&stream->stats); #if CONFIG_FP_MB_STATS stream->config.cfg.rc_firstpass_mb_stats_in = stats_get(&stream->fpmb_stats); #endif } stream->cx_time = 0; stream->nbytes = 0; stream->frames_out = 0; } static void initialize_encoder(struct stream_state *stream, struct VpxEncoderConfig *global) { int i; int flags = 0; flags |= global->show_psnr ? VPX_CODEC_USE_PSNR : 0; flags |= global->out_part ? VPX_CODEC_USE_OUTPUT_PARTITION : 0; #if CONFIG_VP9_HIGHBITDEPTH flags |= stream->config.use_16bit_internal ? VPX_CODEC_USE_HIGHBITDEPTH : 0; #endif /* Construct Encoder Context */ vpx_codec_enc_init(&stream->encoder, global->codec->codec_interface(), &stream->config.cfg, flags); ctx_exit_on_error(&stream->encoder, "Failed to initialize encoder"); /* Note that we bypass the vpx_codec_control wrapper macro because * we're being clever to store the control IDs in an array. Real * applications will want to make use of the enumerations directly */ for (i = 0; i < stream->config.arg_ctrl_cnt; i++) { int ctrl = stream->config.arg_ctrls[i][0]; int value = stream->config.arg_ctrls[i][1]; if (vpx_codec_control_(&stream->encoder, ctrl, value)) fprintf(stderr, "Error: Tried to set control %d = %d\n", ctrl, value); ctx_exit_on_error(&stream->encoder, "Failed to control codec"); } #if CONFIG_DECODERS if (global->test_decode != TEST_DECODE_OFF) { const VpxInterface *decoder = get_vpx_decoder_by_name(global->codec->name); vpx_codec_dec_init(&stream->decoder, decoder->codec_interface(), NULL, 0); } #endif } static void encode_frame(struct stream_state *stream, struct VpxEncoderConfig *global, struct vpx_image *img, unsigned int frames_in) { vpx_codec_pts_t frame_start, next_frame_start; struct vpx_codec_enc_cfg *cfg = &stream->config.cfg; struct vpx_usec_timer timer; frame_start = (cfg->g_timebase.den * (int64_t)(frames_in - 1) * global->framerate.den) / cfg->g_timebase.num / global->framerate.num; next_frame_start = (cfg->g_timebase.den * (int64_t)(frames_in)*global->framerate.den) / cfg->g_timebase.num / global->framerate.num; /* Scale if necessary */ #if CONFIG_VP9_HIGHBITDEPTH if (img) { if ((img->fmt & VPX_IMG_FMT_HIGHBITDEPTH) && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) { if (img->fmt != VPX_IMG_FMT_I42016) { fprintf(stderr, "%s can only scale 4:2:0 inputs\n", exec_name); exit(EXIT_FAILURE); } #if CONFIG_LIBYUV if (!stream->img) { stream->img = vpx_img_alloc(NULL, VPX_IMG_FMT_I42016, cfg->g_w, cfg->g_h, 16); } I420Scale_16( (uint16_t *)img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y] / 2, (uint16_t *)img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U] / 2, (uint16_t *)img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V] / 2, img->d_w, img->d_h, (uint16_t *)stream->img->planes[VPX_PLANE_Y], stream->img->stride[VPX_PLANE_Y] / 2, (uint16_t *)stream->img->planes[VPX_PLANE_U], stream->img->stride[VPX_PLANE_U] / 2, (uint16_t *)stream->img->planes[VPX_PLANE_V], stream->img->stride[VPX_PLANE_V] / 2, stream->img->d_w, stream->img->d_h, kFilterBox); img = stream->img; #else stream->encoder.err = 1; ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame.\n" "Scaling disabled in this configuration. \n" "To enable, configure with --enable-libyuv\n", stream->index); #endif } } #endif if (img && (img->d_w != cfg->g_w || img->d_h != cfg->g_h)) { if (img->fmt != VPX_IMG_FMT_I420 && img->fmt != VPX_IMG_FMT_YV12) { fprintf(stderr, "%s can only scale 4:2:0 8bpp inputs\n", exec_name); exit(EXIT_FAILURE); } #if CONFIG_LIBYUV if (!stream->img) stream->img = vpx_img_alloc(NULL, VPX_IMG_FMT_I420, cfg->g_w, cfg->g_h, 16); I420Scale( img->planes[VPX_PLANE_Y], img->stride[VPX_PLANE_Y], img->planes[VPX_PLANE_U], img->stride[VPX_PLANE_U], img->planes[VPX_PLANE_V], img->stride[VPX_PLANE_V], img->d_w, img->d_h, stream->img->planes[VPX_PLANE_Y], stream->img->stride[VPX_PLANE_Y], stream->img->planes[VPX_PLANE_U], stream->img->stride[VPX_PLANE_U], stream->img->planes[VPX_PLANE_V], stream->img->stride[VPX_PLANE_V], stream->img->d_w, stream->img->d_h, kFilterBox); img = stream->img; #else stream->encoder.err = 1; ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame.\n" "Scaling disabled in this configuration. \n" "To enable, configure with --enable-libyuv\n", stream->index); #endif } vpx_usec_timer_start(&timer); vpx_codec_encode(&stream->encoder, img, frame_start, (unsigned long)(next_frame_start - frame_start), 0, global->deadline); vpx_usec_timer_mark(&timer); stream->cx_time += vpx_usec_timer_elapsed(&timer); ctx_exit_on_error(&stream->encoder, "Stream %d: Failed to encode frame", stream->index); } static void update_quantizer_histogram(struct stream_state *stream) { if (stream->config.cfg.g_pass != VPX_RC_FIRST_PASS) { int q; vpx_codec_control(&stream->encoder, VP8E_GET_LAST_QUANTIZER_64, &q); ctx_exit_on_error(&stream->encoder, "Failed to read quantizer"); stream->counts[q]++; } } static void get_cx_data(struct stream_state *stream, struct VpxEncoderConfig *global, int *got_data) { const vpx_codec_cx_pkt_t *pkt; const struct vpx_codec_enc_cfg *cfg = &stream->config.cfg; vpx_codec_iter_t iter = NULL; *got_data = 0; while ((pkt = vpx_codec_get_cx_data(&stream->encoder, &iter))) { static size_t fsize = 0; static FileOffset ivf_header_pos = 0; switch (pkt->kind) { case VPX_CODEC_CX_FRAME_PKT: if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { stream->frames_out++; } if (!global->quiet) fprintf(stderr, " %6luF", (unsigned long)pkt->data.frame.sz); update_rate_histogram(stream->rate_hist, cfg, pkt); #if CONFIG_WEBM_IO if (stream->config.write_webm) { write_webm_block(&stream->webm_ctx, cfg, pkt); } #endif if (!stream->config.write_webm) { if (pkt->data.frame.partition_id <= 0) { ivf_header_pos = ftello(stream->file); fsize = pkt->data.frame.sz; ivf_write_frame_header(stream->file, pkt->data.frame.pts, fsize); } else { fsize += pkt->data.frame.sz; if (!(pkt->data.frame.flags & VPX_FRAME_IS_FRAGMENT)) { const FileOffset currpos = ftello(stream->file); fseeko(stream->file, ivf_header_pos, SEEK_SET); ivf_write_frame_size(stream->file, fsize); fseeko(stream->file, currpos, SEEK_SET); } } (void)fwrite(pkt->data.frame.buf, 1, pkt->data.frame.sz, stream->file); } stream->nbytes += pkt->data.raw.sz; *got_data = 1; #if CONFIG_DECODERS if (global->test_decode != TEST_DECODE_OFF && !stream->mismatch_seen) { vpx_codec_decode(&stream->decoder, pkt->data.frame.buf, (unsigned int)pkt->data.frame.sz, NULL, 0); if (stream->decoder.err) { warn_or_exit_on_error(&stream->decoder, global->test_decode == TEST_DECODE_FATAL, "Failed to decode frame %d in stream %d", stream->frames_out + 1, stream->index); stream->mismatch_seen = stream->frames_out + 1; } } #endif break; case VPX_CODEC_STATS_PKT: stream->frames_out++; stats_write(&stream->stats, pkt->data.twopass_stats.buf, pkt->data.twopass_stats.sz); stream->nbytes += pkt->data.raw.sz; break; #if CONFIG_FP_MB_STATS case VPX_CODEC_FPMB_STATS_PKT: stats_write(&stream->fpmb_stats, pkt->data.firstpass_mb_stats.buf, pkt->data.firstpass_mb_stats.sz); stream->nbytes += pkt->data.raw.sz; break; #endif case VPX_CODEC_PSNR_PKT: if (global->show_psnr) { int i; stream->psnr_sse_total += pkt->data.psnr.sse[0]; stream->psnr_samples_total += pkt->data.psnr.samples[0]; for (i = 0; i < 4; i++) { if (!global->quiet) fprintf(stderr, "%.3f ", pkt->data.psnr.psnr[i]); stream->psnr_totals[i] += pkt->data.psnr.psnr[i]; } stream->psnr_count++; } break; default: break; } } } static void show_psnr(struct stream_state *stream, double peak) { int i; double ovpsnr; if (!stream->psnr_count) return; fprintf(stderr, "Stream %d PSNR (Overall/Avg/Y/U/V)", stream->index); ovpsnr = sse_to_psnr((double)stream->psnr_samples_total, peak, (double)stream->psnr_sse_total); fprintf(stderr, " %.3f", ovpsnr); for (i = 0; i < 4; i++) { fprintf(stderr, " %.3f", stream->psnr_totals[i] / stream->psnr_count); } fprintf(stderr, "\n"); } static float usec_to_fps(uint64_t usec, unsigned int frames) { return (float)(usec > 0 ? frames * 1000000.0 / (float)usec : 0); } static void test_decode(struct stream_state *stream, enum TestDecodeFatality fatal, const VpxInterface *codec) { vpx_image_t enc_img, dec_img; if (stream->mismatch_seen) return; /* Get the internal reference frame */ if (strcmp(codec->name, "vp8") == 0) { struct vpx_ref_frame ref_enc, ref_dec; int width, height; width = (stream->config.cfg.g_w + 15) & ~15; height = (stream->config.cfg.g_h + 15) & ~15; vpx_img_alloc(&ref_enc.img, VPX_IMG_FMT_I420, width, height, 1); enc_img = ref_enc.img; vpx_img_alloc(&ref_dec.img, VPX_IMG_FMT_I420, width, height, 1); dec_img = ref_dec.img; ref_enc.frame_type = VP8_LAST_FRAME; ref_dec.frame_type = VP8_LAST_FRAME; vpx_codec_control(&stream->encoder, VP8_COPY_REFERENCE, &ref_enc); vpx_codec_control(&stream->decoder, VP8_COPY_REFERENCE, &ref_dec); } else { struct vp9_ref_frame ref_enc, ref_dec; ref_enc.idx = 0; ref_dec.idx = 0; vpx_codec_control(&stream->encoder, VP9_GET_REFERENCE, &ref_enc); enc_img = ref_enc.img; vpx_codec_control(&stream->decoder, VP9_GET_REFERENCE, &ref_dec); dec_img = ref_dec.img; #if CONFIG_VP9_HIGHBITDEPTH if ((enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) != (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH)) { if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { vpx_img_alloc(&enc_img, enc_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, enc_img.d_w, enc_img.d_h, 16); vpx_img_truncate_16_to_8(&enc_img, &ref_enc.img); } if (dec_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { vpx_img_alloc(&dec_img, dec_img.fmt - VPX_IMG_FMT_HIGHBITDEPTH, dec_img.d_w, dec_img.d_h, 16); vpx_img_truncate_16_to_8(&dec_img, &ref_dec.img); } } #endif } ctx_exit_on_error(&stream->encoder, "Failed to get encoder reference frame"); ctx_exit_on_error(&stream->decoder, "Failed to get decoder reference frame"); if (!compare_img(&enc_img, &dec_img)) { int y[4], u[4], v[4]; #if CONFIG_VP9_HIGHBITDEPTH if (enc_img.fmt & VPX_IMG_FMT_HIGHBITDEPTH) { find_mismatch_high(&enc_img, &dec_img, y, u, v); } else { find_mismatch(&enc_img, &dec_img, y, u, v); } #else find_mismatch(&enc_img, &dec_img, y, u, v); #endif stream->decoder.err = 1; warn_or_exit_on_error(&stream->decoder, fatal == TEST_DECODE_FATAL, "Stream %d: Encode/decode mismatch on frame %d at" " Y[%d, %d] {%d/%d}," " U[%d, %d] {%d/%d}," " V[%d, %d] {%d/%d}", stream->index, stream->frames_out, y[0], y[1], y[2], y[3], u[0], u[1], u[2], u[3], v[0], v[1], v[2], v[3]); stream->mismatch_seen = stream->frames_out; } vpx_img_free(&enc_img); vpx_img_free(&dec_img); } static void print_time(const char *label, int64_t etl) { int64_t hours; int64_t mins; int64_t secs; if (etl >= 0) { hours = etl / 3600; etl -= hours * 3600; mins = etl / 60; etl -= mins * 60; secs = etl; fprintf(stderr, "[%3s %2" PRId64 ":%02" PRId64 ":%02" PRId64 "] ", label, hours, mins, secs); } else { fprintf(stderr, "[%3s unknown] ", label); } } int main(int argc, const char **argv_) { int pass; vpx_image_t raw; #if CONFIG_VP9_HIGHBITDEPTH vpx_image_t raw_shift; int allocated_raw_shift = 0; int use_16bit_internal = 0; int input_shift = 0; #endif int frame_avail, got_data; struct VpxInputContext input; struct VpxEncoderConfig global; struct stream_state *streams = NULL; char **argv, **argi; uint64_t cx_time = 0; int stream_cnt = 0; int res = 0; memset(&input, 0, sizeof(input)); exec_name = argv_[0]; /* Setup default input stream settings */ input.framerate.numerator = 30; input.framerate.denominator = 1; input.only_i420 = 1; input.bit_depth = 0; /* First parse the global configuration values, because we want to apply * other parameters on top of the default configuration provided by the * codec. */ argv = argv_dup(argc - 1, argv_ + 1); parse_global_config(&global, argv); if (argc < 3) usage_exit(); switch (global.color_type) { case I420: input.fmt = VPX_IMG_FMT_I420; break; case I422: input.fmt = VPX_IMG_FMT_I422; break; case I444: input.fmt = VPX_IMG_FMT_I444; break; case I440: input.fmt = VPX_IMG_FMT_I440; break; case YV12: input.fmt = VPX_IMG_FMT_YV12; break; } { /* Now parse each stream's parameters. Using a local scope here * due to the use of 'stream' as loop variable in FOREACH_STREAM * loops */ struct stream_state *stream = NULL; do { stream = new_stream(&global, stream); stream_cnt++; if (!streams) streams = stream; } while (parse_stream_params(&global, stream, argv)); } /* Check for unrecognized options */ for (argi = argv; *argi; argi++) if (argi[0][0] == '-' && argi[0][1]) die("Error: Unrecognized option %s\n", *argi); FOREACH_STREAM(check_encoder_config(global.disable_warning_prompt, &global, &stream->config.cfg);); /* Handle non-option arguments */ input.filename = argv[0]; if (!input.filename) { fprintf(stderr, "No input file specified!\n"); usage_exit(); } /* Decide if other chroma subsamplings than 4:2:0 are supported */ if (global.codec->fourcc == VP9_FOURCC) input.only_i420 = 0; for (pass = global.pass ? global.pass - 1 : 0; pass < global.passes; pass++) { int frames_in = 0, seen_frames = 0; int64_t estimated_time_left = -1; int64_t average_rate = -1; int64_t lagged_count = 0; open_input_file(&input); /* If the input file doesn't specify its w/h (raw files), try to get * the data from the first stream's configuration. */ if (!input.width || !input.height) { FOREACH_STREAM({ if (stream->config.cfg.g_w && stream->config.cfg.g_h) { input.width = stream->config.cfg.g_w; input.height = stream->config.cfg.g_h; break; } }); } /* Update stream configurations from the input file's parameters */ if (!input.width || !input.height) fatal( "Specify stream dimensions with --width (-w) " " and --height (-h)"); /* If input file does not specify bit-depth but input-bit-depth parameter * exists, assume that to be the input bit-depth. However, if the * input-bit-depth paramter does not exist, assume the input bit-depth * to be the same as the codec bit-depth. */ if (!input.bit_depth) { FOREACH_STREAM({ if (stream->config.cfg.g_input_bit_depth) input.bit_depth = stream->config.cfg.g_input_bit_depth; else input.bit_depth = stream->config.cfg.g_input_bit_depth = (int)stream->config.cfg.g_bit_depth; }); if (input.bit_depth > 8) input.fmt |= VPX_IMG_FMT_HIGHBITDEPTH; } else { FOREACH_STREAM( { stream->config.cfg.g_input_bit_depth = input.bit_depth; }); } FOREACH_STREAM(set_stream_dimensions(stream, input.width, input.height)); FOREACH_STREAM(validate_stream_config(stream, &global)); /* Ensure that --passes and --pass are consistent. If --pass is set and * --passes=2, ensure --fpf was set. */ if (global.pass && global.passes == 2) FOREACH_STREAM({ if (!stream->config.stats_fn) die("Stream %d: Must specify --fpf when --pass=%d" " and --passes=2\n", stream->index, global.pass); }); #if !CONFIG_WEBM_IO FOREACH_STREAM({ if (stream->config.write_webm) { stream->config.write_webm = 0; warn( "vpxenc was compiled without WebM container support." "Producing IVF output"); } }); #endif /* Use the frame rate from the file only if none was specified * on the command-line. */ if (!global.have_framerate) { global.framerate.num = input.framerate.numerator; global.framerate.den = input.framerate.denominator; FOREACH_STREAM(stream->config.cfg.g_timebase.den = global.framerate.num; stream->config.cfg.g_timebase.num = global.framerate.den); } /* Show configuration */ if (global.verbose && pass == 0) FOREACH_STREAM(show_stream_config(stream, &global, &input)); if (pass == (global.pass ? global.pass - 1 : 0)) { if (input.file_type == FILE_TYPE_Y4M) /*The Y4M reader does its own allocation. Just initialize this here to avoid problems if we never read any frames.*/ memset(&raw, 0, sizeof(raw)); else vpx_img_alloc(&raw, input.fmt, input.width, input.height, 32); FOREACH_STREAM(stream->rate_hist = init_rate_histogram( &stream->config.cfg, &global.framerate)); } FOREACH_STREAM(setup_pass(stream, &global, pass)); FOREACH_STREAM( open_output_file(stream, &global, &input.pixel_aspect_ratio)); FOREACH_STREAM(initialize_encoder(stream, &global)); #if CONFIG_VP9_HIGHBITDEPTH if (strcmp(global.codec->name, "vp9") == 0) { // Check to see if at least one stream uses 16 bit internal. // Currently assume that the bit_depths for all streams using // highbitdepth are the same. FOREACH_STREAM({ if (stream->config.use_16bit_internal) { use_16bit_internal = 1; } if (stream->config.cfg.g_profile == 0) { input_shift = 0; } else { input_shift = (int)stream->config.cfg.g_bit_depth - stream->config.cfg.g_input_bit_depth; } }); } #endif frame_avail = 1; got_data = 0; while (frame_avail || got_data) { struct vpx_usec_timer timer; if (!global.limit || frames_in < global.limit) { frame_avail = read_frame(&input, &raw); if (frame_avail) frames_in++; seen_frames = frames_in > global.skip_frames ? frames_in - global.skip_frames : 0; if (!global.quiet) { float fps = usec_to_fps(cx_time, seen_frames); fprintf(stderr, "\rPass %d/%d ", pass + 1, global.passes); if (stream_cnt == 1) fprintf(stderr, "frame %4d/%-4d %7" PRId64 "B ", frames_in, streams->frames_out, (int64_t)streams->nbytes); else fprintf(stderr, "frame %4d ", frames_in); fprintf(stderr, "%7" PRId64 " %s %.2f %s ", cx_time > 9999999 ? cx_time / 1000 : cx_time, cx_time > 9999999 ? "ms" : "us", fps >= 1.0 ? fps : fps * 60, fps >= 1.0 ? "fps" : "fpm"); print_time("ETA", estimated_time_left); } } else frame_avail = 0; if (frames_in > global.skip_frames) { #if CONFIG_VP9_HIGHBITDEPTH vpx_image_t *frame_to_encode; if (input_shift || (use_16bit_internal && input.bit_depth == 8)) { assert(use_16bit_internal); // Input bit depth and stream bit depth do not match, so up // shift frame to stream bit depth if (!allocated_raw_shift) { vpx_img_alloc(&raw_shift, raw.fmt | VPX_IMG_FMT_HIGHBITDEPTH, input.width, input.height, 32); allocated_raw_shift = 1; } vpx_img_upshift(&raw_shift, &raw, input_shift); frame_to_encode = &raw_shift; } else { frame_to_encode = &raw; } vpx_usec_timer_start(&timer); if (use_16bit_internal) { assert(frame_to_encode->fmt & VPX_IMG_FMT_HIGHBITDEPTH); FOREACH_STREAM({ if (stream->config.use_16bit_internal) encode_frame(stream, &global, frame_avail ? frame_to_encode : NULL, frames_in); else assert(0); }); } else { assert((frame_to_encode->fmt & VPX_IMG_FMT_HIGHBITDEPTH) == 0); FOREACH_STREAM(encode_frame(stream, &global, frame_avail ? frame_to_encode : NULL, frames_in)); } #else vpx_usec_timer_start(&timer); FOREACH_STREAM(encode_frame(stream, &global, frame_avail ? &raw : NULL, frames_in)); #endif vpx_usec_timer_mark(&timer); cx_time += vpx_usec_timer_elapsed(&timer); FOREACH_STREAM(update_quantizer_histogram(stream)); got_data = 0; FOREACH_STREAM(get_cx_data(stream, &global, &got_data)); if (!got_data && input.length && streams != NULL && !streams->frames_out) { lagged_count = global.limit ? seen_frames : ftello(input.file); } else if (input.length) { int64_t remaining; int64_t rate; if (global.limit) { const int64_t frame_in_lagged = (seen_frames - lagged_count) * 1000; rate = cx_time ? frame_in_lagged * (int64_t)1000000 / cx_time : 0; remaining = 1000 * (global.limit - global.skip_frames - seen_frames + lagged_count); } else { const int64_t input_pos = ftello(input.file); const int64_t input_pos_lagged = input_pos - lagged_count; const int64_t limit = input.length; rate = cx_time ? input_pos_lagged * (int64_t)1000000 / cx_time : 0; remaining = limit - input_pos + lagged_count; } average_rate = (average_rate <= 0) ? rate : (average_rate * 7 + rate) / 8; estimated_time_left = average_rate ? remaining / average_rate : -1; } if (got_data && global.test_decode != TEST_DECODE_OFF) FOREACH_STREAM(test_decode(stream, global.test_decode, global.codec)); } fflush(stdout); if (!global.quiet) fprintf(stderr, "\033[K"); } if (stream_cnt > 1) fprintf(stderr, "\n"); if (!global.quiet) { FOREACH_STREAM(fprintf( stderr, "\rPass %d/%d frame %4d/%-4d %7" PRId64 "B %7" PRId64 "b/f %7" PRId64 "b/s %7" PRId64 " %s (%.2f fps)\033[K\n", pass + 1, global.passes, frames_in, stream->frames_out, (int64_t)stream->nbytes, seen_frames ? (int64_t)(stream->nbytes * 8 / seen_frames) : 0, seen_frames ? (int64_t)stream->nbytes * 8 * (int64_t)global.framerate.num / global.framerate.den / seen_frames : 0, stream->cx_time > 9999999 ? stream->cx_time / 1000 : stream->cx_time, stream->cx_time > 9999999 ? "ms" : "us", usec_to_fps(stream->cx_time, seen_frames))); } if (global.show_psnr) { if (global.codec->fourcc == VP9_FOURCC) { FOREACH_STREAM( show_psnr(stream, (1 << stream->config.cfg.g_input_bit_depth) - 1)); } else { FOREACH_STREAM(show_psnr(stream, 255.0)); } } FOREACH_STREAM(vpx_codec_destroy(&stream->encoder)); if (global.test_decode != TEST_DECODE_OFF) { FOREACH_STREAM(vpx_codec_destroy(&stream->decoder)); } close_input_file(&input); if (global.test_decode == TEST_DECODE_FATAL) { FOREACH_STREAM(res |= stream->mismatch_seen); } FOREACH_STREAM(close_output_file(stream, global.codec->fourcc)); FOREACH_STREAM(stats_close(&stream->stats, global.passes - 1)); #if CONFIG_FP_MB_STATS FOREACH_STREAM(stats_close(&stream->fpmb_stats, global.passes - 1)); #endif if (global.pass) break; } if (global.show_q_hist_buckets) FOREACH_STREAM( show_q_histogram(stream->counts, global.show_q_hist_buckets)); if (global.show_rate_hist_buckets) FOREACH_STREAM(show_rate_histogram(stream->rate_hist, &stream->config.cfg, global.show_rate_hist_buckets)); FOREACH_STREAM(destroy_rate_histogram(stream->rate_hist)); #if CONFIG_INTERNAL_STATS /* TODO(jkoleszar): This doesn't belong in this executable. Do it for now, * to match some existing utilities. */ if (!(global.pass == 1 && global.passes == 2)) FOREACH_STREAM({ FILE *f = fopen("opsnr.stt", "a"); if (stream->mismatch_seen) { fprintf(f, "First mismatch occurred in frame %d\n", stream->mismatch_seen); } else { fprintf(f, "No mismatch detected in recon buffers\n"); } fclose(f); }); #endif #if CONFIG_VP9_HIGHBITDEPTH if (allocated_raw_shift) vpx_img_free(&raw_shift); #endif vpx_img_free(&raw); free(argv); free(streams); return res ? EXIT_FAILURE : EXIT_SUCCESS; } libvpx-1.8.2/vpxenc.h000066400000000000000000000026641357355204000145160ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPXENC_H_ #define VPX_VPXENC_H_ #include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { #endif enum TestDecodeFatality { TEST_DECODE_OFF, TEST_DECODE_FATAL, TEST_DECODE_WARN, }; typedef enum { I420, // 4:2:0 8+ bit-depth I422, // 4:2:2 8+ bit-depth I444, // 4:4:4 8+ bit-depth I440, // 4:4:0 8+ bit-depth YV12, // 4:2:0 with uv flipped, only 8-bit depth } ColorInputType; struct VpxInterface; /* Configuration elements common to all streams. */ struct VpxEncoderConfig { const struct VpxInterface *codec; int passes; int pass; int usage; int deadline; ColorInputType color_type; int quiet; int verbose; int limit; int skip_frames; int show_psnr; enum TestDecodeFatality test_decode; int have_framerate; struct vpx_rational framerate; int out_part; int debug; int show_q_hist_buckets; int show_rate_hist_buckets; int disable_warnings; int disable_warning_prompt; int experimental_bitstream; }; #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPXENC_H_ libvpx-1.8.2/vpxstats.c000066400000000000000000000052401357355204000150730ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./vpxstats.h" #include #include #include #include "./tools_common.h" int stats_open_file(stats_io_t *stats, const char *fpf, int pass) { int res; stats->pass = pass; if (pass == 0) { stats->file = fopen(fpf, "wb"); stats->buf.sz = 0; stats->buf.buf = NULL; res = (stats->file != NULL); } else { size_t nbytes; stats->file = fopen(fpf, "rb"); if (stats->file == NULL) fatal("First-pass stats file does not exist!"); if (fseek(stats->file, 0, SEEK_END)) fatal("First-pass stats file must be seekable!"); stats->buf.sz = stats->buf_alloc_sz = ftell(stats->file); rewind(stats->file); stats->buf.buf = malloc(stats->buf_alloc_sz); if (!stats->buf.buf) fatal("Failed to allocate first-pass stats buffer (%lu bytes)", (unsigned int)stats->buf_alloc_sz); nbytes = fread(stats->buf.buf, 1, stats->buf.sz, stats->file); res = (nbytes == stats->buf.sz); } return res; } int stats_open_mem(stats_io_t *stats, int pass) { int res; stats->pass = pass; if (!pass) { stats->buf.sz = 0; stats->buf_alloc_sz = 64 * 1024; stats->buf.buf = malloc(stats->buf_alloc_sz); } stats->buf_ptr = stats->buf.buf; res = (stats->buf.buf != NULL); return res; } void stats_close(stats_io_t *stats, int last_pass) { if (stats->file) { if (stats->pass == last_pass) { free(stats->buf.buf); } fclose(stats->file); stats->file = NULL; } else { if (stats->pass == last_pass) free(stats->buf.buf); } } void stats_write(stats_io_t *stats, const void *pkt, size_t len) { if (stats->file) { (void)fwrite(pkt, 1, len, stats->file); } else { if (stats->buf.sz + len > stats->buf_alloc_sz) { size_t new_sz = stats->buf_alloc_sz + 64 * 1024; char *new_ptr = realloc(stats->buf.buf, new_sz); if (new_ptr) { stats->buf_ptr = new_ptr + (stats->buf_ptr - (char *)stats->buf.buf); stats->buf.buf = new_ptr; stats->buf_alloc_sz = new_sz; } else { fatal("Failed to realloc firstpass stats buffer."); } } memcpy(stats->buf_ptr, pkt, len); stats->buf.sz += len; stats->buf_ptr += len; } } vpx_fixed_buf_t stats_get(stats_io_t *stats) { return stats->buf; } libvpx-1.8.2/vpxstats.h000066400000000000000000000021421357355204000150760ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_VPXSTATS_H_ #define VPX_VPXSTATS_H_ #include #include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { #endif /* This structure is used to abstract the different ways of handling * first pass statistics */ typedef struct { vpx_fixed_buf_t buf; int pass; FILE *file; char *buf_ptr; size_t buf_alloc_sz; } stats_io_t; int stats_open_file(stats_io_t *stats, const char *fpf, int pass); int stats_open_mem(stats_io_t *stats, int pass); void stats_close(stats_io_t *stats, int last_pass); void stats_write(stats_io_t *stats, const void *pkt, size_t len); vpx_fixed_buf_t stats_get(stats_io_t *stats); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_VPXSTATS_H_ libvpx-1.8.2/warnings.c000066400000000000000000000066311357355204000150340ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./warnings.h" #include #include #include #include #include "vpx/vpx_encoder.h" #include "./tools_common.h" #include "./vpxenc.h" static const char quantizer_warning_string[] = "Bad quantizer values. Quantizer values should not be equal, and should " "differ by at least 8."; static const char lag_in_frames_with_realtime[] = "Lag in frames is ignored when deadline is set to realtime for cbr mode."; struct WarningListNode { const char *warning_string; struct WarningListNode *next_warning; }; struct WarningList { struct WarningListNode *warning_node; }; static void add_warning(const char *warning_string, struct WarningList *warning_list) { struct WarningListNode **node = &warning_list->warning_node; struct WarningListNode *new_node = malloc(sizeof(*new_node)); if (new_node == NULL) { fatal("Unable to allocate warning node."); } new_node->warning_string = warning_string; new_node->next_warning = NULL; while (*node != NULL) node = &(*node)->next_warning; *node = new_node; } static void free_warning_list(struct WarningList *warning_list) { while (warning_list->warning_node != NULL) { struct WarningListNode *const node = warning_list->warning_node; warning_list->warning_node = node->next_warning; free(node); } } static int continue_prompt(int num_warnings) { int c; fprintf(stderr, "%d encoder configuration warning(s). Continue? (y to continue) ", num_warnings); c = getchar(); return c == 'y'; } static void check_quantizer(int min_q, int max_q, struct WarningList *warning_list) { const int lossless = min_q == 0 && max_q == 0; if (!lossless && (min_q == max_q || abs(max_q - min_q) < 8)) add_warning(quantizer_warning_string, warning_list); } static void check_lag_in_frames_realtime_deadline( int lag_in_frames, int deadline, int rc_end_usage, struct WarningList *warning_list) { if (deadline == VPX_DL_REALTIME && lag_in_frames != 0 && rc_end_usage == 1) add_warning(lag_in_frames_with_realtime, warning_list); } void check_encoder_config(int disable_prompt, const struct VpxEncoderConfig *global_config, const struct vpx_codec_enc_cfg *stream_config) { int num_warnings = 0; struct WarningListNode *warning = NULL; struct WarningList warning_list = { 0 }; check_quantizer(stream_config->rc_min_quantizer, stream_config->rc_max_quantizer, &warning_list); check_lag_in_frames_realtime_deadline( stream_config->g_lag_in_frames, global_config->deadline, stream_config->rc_end_usage, &warning_list); /* Count and print warnings. */ for (warning = warning_list.warning_node; warning != NULL; warning = warning->next_warning, ++num_warnings) { warn(warning->warning_string); } free_warning_list(&warning_list); if (num_warnings) { if (!disable_prompt && !continue_prompt(num_warnings)) exit(EXIT_FAILURE); } } libvpx-1.8.2/warnings.h000066400000000000000000000017541357355204000150420ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_WARNINGS_H_ #define VPX_WARNINGS_H_ #ifdef __cplusplus extern "C" { #endif struct vpx_codec_enc_cfg; struct VpxEncoderConfig; /* * Checks config for improperly used settings. Warns user upon encountering * settings that will lead to poor output quality. Prompts user to continue * when warnings are issued. */ void check_encoder_config(int disable_prompt, const struct VpxEncoderConfig *global_config, const struct vpx_codec_enc_cfg *stream_config); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_WARNINGS_H_ libvpx-1.8.2/webmdec.cc000066400000000000000000000156451357355204000147620ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./webmdec.h" #include #include #include "third_party/libwebm/mkvparser/mkvparser.h" #include "third_party/libwebm/mkvparser/mkvreader.h" namespace { void reset(struct WebmInputContext *const webm_ctx) { if (webm_ctx->reader != NULL) { mkvparser::MkvReader *const reader = reinterpret_cast(webm_ctx->reader); delete reader; } if (webm_ctx->segment != NULL) { mkvparser::Segment *const segment = reinterpret_cast(webm_ctx->segment); delete segment; } if (webm_ctx->buffer != NULL) { delete[] webm_ctx->buffer; } webm_ctx->reader = NULL; webm_ctx->segment = NULL; webm_ctx->buffer = NULL; webm_ctx->cluster = NULL; webm_ctx->block_entry = NULL; webm_ctx->block = NULL; webm_ctx->block_frame_index = 0; webm_ctx->video_track_index = 0; webm_ctx->timestamp_ns = 0; webm_ctx->is_key_frame = false; } void get_first_cluster(struct WebmInputContext *const webm_ctx) { mkvparser::Segment *const segment = reinterpret_cast(webm_ctx->segment); const mkvparser::Cluster *const cluster = segment->GetFirst(); webm_ctx->cluster = cluster; } void rewind_and_reset(struct WebmInputContext *const webm_ctx, struct VpxInputContext *const vpx_ctx) { rewind(vpx_ctx->file); reset(webm_ctx); } } // namespace int file_is_webm(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx) { mkvparser::MkvReader *const reader = new mkvparser::MkvReader(vpx_ctx->file); webm_ctx->reader = reader; webm_ctx->reached_eos = 0; mkvparser::EBMLHeader header; long long pos = 0; if (header.Parse(reader, pos) < 0) { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } mkvparser::Segment *segment; if (mkvparser::Segment::CreateInstance(reader, pos, segment)) { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } webm_ctx->segment = segment; if (segment->Load() < 0) { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } const mkvparser::Tracks *const tracks = segment->GetTracks(); const mkvparser::VideoTrack *video_track = NULL; for (unsigned long i = 0; i < tracks->GetTracksCount(); ++i) { const mkvparser::Track *const track = tracks->GetTrackByIndex(i); if (track->GetType() == mkvparser::Track::kVideo) { video_track = static_cast(track); webm_ctx->video_track_index = static_cast(track->GetNumber()); break; } } if (video_track == NULL || video_track->GetCodecId() == NULL) { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } if (!strncmp(video_track->GetCodecId(), "V_VP8", 5)) { vpx_ctx->fourcc = VP8_FOURCC; } else if (!strncmp(video_track->GetCodecId(), "V_VP9", 5)) { vpx_ctx->fourcc = VP9_FOURCC; } else { rewind_and_reset(webm_ctx, vpx_ctx); return 0; } vpx_ctx->framerate.denominator = 0; vpx_ctx->framerate.numerator = 0; vpx_ctx->width = static_cast(video_track->GetWidth()); vpx_ctx->height = static_cast(video_track->GetHeight()); get_first_cluster(webm_ctx); return 1; } int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer, size_t *buffer_size) { // This check is needed for frame parallel decoding, in which case this // function could be called even after it has reached end of input stream. if (webm_ctx->reached_eos) { return 1; } mkvparser::Segment *const segment = reinterpret_cast(webm_ctx->segment); const mkvparser::Cluster *cluster = reinterpret_cast(webm_ctx->cluster); const mkvparser::Block *block = reinterpret_cast(webm_ctx->block); const mkvparser::BlockEntry *block_entry = reinterpret_cast(webm_ctx->block_entry); bool block_entry_eos = false; do { long status = 0; bool get_new_block = false; if (block_entry == NULL && !block_entry_eos) { status = cluster->GetFirst(block_entry); get_new_block = true; } else if (block_entry_eos || block_entry->EOS()) { cluster = segment->GetNext(cluster); if (cluster == NULL || cluster->EOS()) { *buffer_size = 0; webm_ctx->reached_eos = 1; return 1; } status = cluster->GetFirst(block_entry); block_entry_eos = false; get_new_block = true; } else if (block == NULL || webm_ctx->block_frame_index == block->GetFrameCount() || block->GetTrackNumber() != webm_ctx->video_track_index) { status = cluster->GetNext(block_entry, block_entry); if (block_entry == NULL || block_entry->EOS()) { block_entry_eos = true; continue; } get_new_block = true; } if (status || block_entry == NULL) { return -1; } if (get_new_block) { block = block_entry->GetBlock(); if (block == NULL) return -1; webm_ctx->block_frame_index = 0; } } while (block_entry_eos || block->GetTrackNumber() != webm_ctx->video_track_index); webm_ctx->cluster = cluster; webm_ctx->block_entry = block_entry; webm_ctx->block = block; const mkvparser::Block::Frame &frame = block->GetFrame(webm_ctx->block_frame_index); ++webm_ctx->block_frame_index; if (frame.len > static_cast(*buffer_size)) { delete[] * buffer; *buffer = new uint8_t[frame.len]; if (*buffer == NULL) { return -1; } webm_ctx->buffer = *buffer; } *buffer_size = frame.len; webm_ctx->timestamp_ns = block->GetTime(cluster); webm_ctx->is_key_frame = block->IsKey(); mkvparser::MkvReader *const reader = reinterpret_cast(webm_ctx->reader); return frame.Read(reader, *buffer) ? -1 : 0; } int webm_guess_framerate(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx) { uint32_t i = 0; uint8_t *buffer = NULL; size_t buffer_size = 0; while (webm_ctx->timestamp_ns < 1000000000 && i < 50) { if (webm_read_frame(webm_ctx, &buffer, &buffer_size)) { break; } ++i; } vpx_ctx->framerate.numerator = (i - 1) * 1000000; vpx_ctx->framerate.denominator = static_cast(webm_ctx->timestamp_ns / 1000); delete[] buffer; get_first_cluster(webm_ctx); webm_ctx->block = NULL; webm_ctx->block_entry = NULL; webm_ctx->block_frame_index = 0; webm_ctx->timestamp_ns = 0; webm_ctx->reached_eos = 0; return 0; } void webm_free(struct WebmInputContext *webm_ctx) { reset(webm_ctx); } libvpx-1.8.2/webmdec.h000066400000000000000000000042701357355204000146140ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_WEBMDEC_H_ #define VPX_WEBMDEC_H_ #include "./tools_common.h" #ifdef __cplusplus extern "C" { #endif struct VpxInputContext; struct WebmInputContext { void *reader; void *segment; uint8_t *buffer; const void *cluster; const void *block_entry; const void *block; int block_frame_index; int video_track_index; uint64_t timestamp_ns; int is_key_frame; int reached_eos; }; // Checks if the input is a WebM file. If so, initializes WebMInputContext so // that webm_read_frame can be called to retrieve a video frame. // Returns 1 on success and 0 on failure or input is not WebM file. // TODO(vigneshv): Refactor this function into two smaller functions specific // to their task. int file_is_webm(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx); // Reads a WebM Video Frame. Memory for the buffer is created, owned and managed // by this function. For the first call, |buffer| should be NULL and // |*buffer_size| should be 0. Once all the frames are read and used, // webm_free() should be called, otherwise there will be a leak. // Parameters: // webm_ctx - WebmInputContext object // buffer - pointer where the frame data will be filled. // buffer_size - pointer to buffer size. // Return values: // 0 - Success // 1 - End of Stream // -1 - Error int webm_read_frame(struct WebmInputContext *webm_ctx, uint8_t **buffer, size_t *buffer_size); // Guesses the frame rate of the input file based on the container timestamps. int webm_guess_framerate(struct WebmInputContext *webm_ctx, struct VpxInputContext *vpx_ctx); // Resets the WebMInputContext. void webm_free(struct WebmInputContext *webm_ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_WEBMDEC_H_ libvpx-1.8.2/webmenc.cc000066400000000000000000000071501357355204000147640ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include "./webmenc.h" #include #include "third_party/libwebm/mkvmuxer/mkvmuxer.h" #include "third_party/libwebm/mkvmuxer/mkvmuxerutil.h" #include "third_party/libwebm/mkvmuxer/mkvwriter.h" namespace { const uint64_t kDebugTrackUid = 0xDEADBEEF; const int kVideoTrackNumber = 1; } // namespace void write_webm_file_header(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, stereo_format_t stereo_fmt, unsigned int fourcc, const struct VpxRational *par) { mkvmuxer::MkvWriter *const writer = new mkvmuxer::MkvWriter(webm_ctx->stream); mkvmuxer::Segment *const segment = new mkvmuxer::Segment(); segment->Init(writer); segment->set_mode(mkvmuxer::Segment::kFile); segment->OutputCues(true); mkvmuxer::SegmentInfo *const info = segment->GetSegmentInfo(); const uint64_t kTimecodeScale = 1000000; info->set_timecode_scale(kTimecodeScale); std::string version = "vpxenc"; if (!webm_ctx->debug) { version.append(std::string(" ") + vpx_codec_version_str()); } info->set_writing_app(version.c_str()); const uint64_t video_track_id = segment->AddVideoTrack(static_cast(cfg->g_w), static_cast(cfg->g_h), kVideoTrackNumber); mkvmuxer::VideoTrack *const video_track = static_cast( segment->GetTrackByNumber(video_track_id)); video_track->SetStereoMode(stereo_fmt); const char *codec_id; switch (fourcc) { case VP8_FOURCC: codec_id = "V_VP8"; break; case VP9_FOURCC: default: codec_id = "V_VP9"; break; } video_track->set_codec_id(codec_id); if (par->numerator > 1 || par->denominator > 1) { // TODO(fgalligan): Add support of DisplayUnit, Display Aspect Ratio type // to WebM format. const uint64_t display_width = static_cast( ((cfg->g_w * par->numerator * 1.0) / par->denominator) + .5); video_track->set_display_width(display_width); video_track->set_display_height(cfg->g_h); } if (webm_ctx->debug) { video_track->set_uid(kDebugTrackUid); } webm_ctx->writer = writer; webm_ctx->segment = segment; } void write_webm_block(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt) { mkvmuxer::Segment *const segment = reinterpret_cast(webm_ctx->segment); int64_t pts_ns = pkt->data.frame.pts * 1000000000ll * cfg->g_timebase.num / cfg->g_timebase.den; if (pts_ns <= webm_ctx->last_pts_ns) pts_ns = webm_ctx->last_pts_ns + 1000000; webm_ctx->last_pts_ns = pts_ns; segment->AddFrame(static_cast(pkt->data.frame.buf), pkt->data.frame.sz, kVideoTrackNumber, pts_ns, pkt->data.frame.flags & VPX_FRAME_IS_KEY); } void write_webm_file_footer(struct WebmOutputContext *webm_ctx) { mkvmuxer::MkvWriter *const writer = reinterpret_cast(webm_ctx->writer); mkvmuxer::Segment *const segment = reinterpret_cast(webm_ctx->segment); segment->Finalize(); delete segment; delete writer; webm_ctx->writer = NULL; webm_ctx->segment = NULL; } libvpx-1.8.2/webmenc.h000066400000000000000000000027521357355204000146310ustar00rootroot00000000000000/* * Copyright (c) 2013 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_WEBMENC_H_ #define VPX_WEBMENC_H_ #include #include #include "tools_common.h" #include "vpx/vpx_encoder.h" #ifdef __cplusplus extern "C" { #endif struct WebmOutputContext { int debug; FILE *stream; int64_t last_pts_ns; void *writer; void *segment; }; /* Stereo 3D packed frame format */ typedef enum stereo_format { STEREO_FORMAT_MONO = 0, STEREO_FORMAT_LEFT_RIGHT = 1, STEREO_FORMAT_BOTTOM_TOP = 2, STEREO_FORMAT_TOP_BOTTOM = 3, STEREO_FORMAT_RIGHT_LEFT = 11 } stereo_format_t; void write_webm_file_header(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, stereo_format_t stereo_fmt, unsigned int fourcc, const struct VpxRational *par); void write_webm_block(struct WebmOutputContext *webm_ctx, const vpx_codec_enc_cfg_t *cfg, const vpx_codec_cx_pkt_t *pkt); void write_webm_file_footer(struct WebmOutputContext *webm_ctx); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_WEBMENC_H_ libvpx-1.8.2/y4menc.c000066400000000000000000000046341357355204000144040ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #include #include "./y4menc.h" int y4m_write_file_header(char *buf, size_t len, int width, int height, const struct VpxRational *framerate, vpx_img_fmt_t fmt, unsigned int bit_depth) { const char *color; switch (bit_depth) { case 8: color = fmt == VPX_IMG_FMT_I444 ? "C444\n" : fmt == VPX_IMG_FMT_I422 ? "C422\n" : "C420jpeg\n"; break; case 9: color = fmt == VPX_IMG_FMT_I44416 ? "C444p9 XYSCSS=444P9\n" : fmt == VPX_IMG_FMT_I42216 ? "C422p9 XYSCSS=422P9\n" : "C420p9 XYSCSS=420P9\n"; break; case 10: color = fmt == VPX_IMG_FMT_I44416 ? "C444p10 XYSCSS=444P10\n" : fmt == VPX_IMG_FMT_I42216 ? "C422p10 XYSCSS=422P10\n" : "C420p10 XYSCSS=420P10\n"; break; case 12: color = fmt == VPX_IMG_FMT_I44416 ? "C444p12 XYSCSS=444P12\n" : fmt == VPX_IMG_FMT_I42216 ? "C422p12 XYSCSS=422P12\n" : "C420p12 XYSCSS=420P12\n"; break; case 14: color = fmt == VPX_IMG_FMT_I44416 ? "C444p14 XYSCSS=444P14\n" : fmt == VPX_IMG_FMT_I42216 ? "C422p14 XYSCSS=422P14\n" : "C420p14 XYSCSS=420P14\n"; break; case 16: color = fmt == VPX_IMG_FMT_I44416 ? "C444p16 XYSCSS=444P16\n" : fmt == VPX_IMG_FMT_I42216 ? "C422p16 XYSCSS=422P16\n" : "C420p16 XYSCSS=420P16\n"; break; default: color = NULL; assert(0); } return snprintf(buf, len, "YUV4MPEG2 W%u H%u F%u:%u I%c %s", width, height, framerate->numerator, framerate->denominator, 'p', color); } int y4m_write_frame_header(char *buf, size_t len) { return snprintf(buf, len, "FRAME\n"); } libvpx-1.8.2/y4menc.h000066400000000000000000000016171357355204000144070ustar00rootroot00000000000000/* * Copyright (c) 2014 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. */ #ifndef VPX_Y4MENC_H_ #define VPX_Y4MENC_H_ #include "./tools_common.h" #include "vpx/vpx_decoder.h" #ifdef __cplusplus extern "C" { #endif #define Y4M_BUFFER_SIZE 128 int y4m_write_file_header(char *buf, size_t len, int width, int height, const struct VpxRational *framerate, vpx_img_fmt_t fmt, unsigned int bit_depth); int y4m_write_frame_header(char *buf, size_t len); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_Y4MENC_H_ libvpx-1.8.2/y4minput.c000066400000000000000000001150711357355204000147740ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * Based on code from the OggTheora software codec source code, * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. */ #include #include #include #include "vpx/vpx_integer.h" #include "y4minput.h" // Reads 'size' bytes from 'file' into 'buf' with some fault tolerance. // Returns true on success. static int file_read(void *buf, size_t size, FILE *file) { const int kMaxRetries = 5; int retry_count = 0; int file_error; size_t len = 0; do { const size_t n = fread((uint8_t *)buf + len, 1, size - len, file); len += n; file_error = ferror(file); if (file_error) { if (errno == EINTR || errno == EAGAIN) { clearerr(file); continue; } else { fprintf(stderr, "Error reading file: %u of %u bytes read, %d: %s\n", (uint32_t)len, (uint32_t)size, errno, strerror(errno)); return 0; } } } while (!feof(file) && len < size && ++retry_count < kMaxRetries); if (!feof(file) && len != size) { fprintf(stderr, "Error reading file: %u of %u bytes read," " error: %d, retries: %d, %d: %s\n", (uint32_t)len, (uint32_t)size, file_error, retry_count, errno, strerror(errno)); } return len == size; } static int y4m_parse_tags(y4m_input *_y4m, char *_tags) { int got_w; int got_h; int got_fps; int got_interlace; int got_par; int got_chroma; char *p; char *q; got_w = got_h = got_fps = got_interlace = got_par = got_chroma = 0; for (p = _tags;; p = q) { /*Skip any leading spaces.*/ while (*p == ' ') p++; /*If that's all we have, stop.*/ if (p[0] == '\0') break; /*Find the end of this tag.*/ for (q = p + 1; *q != '\0' && *q != ' '; q++) { } /*Process the tag.*/ switch (p[0]) { case 'W': { if (sscanf(p + 1, "%d", &_y4m->pic_w) != 1) return -1; got_w = 1; break; } case 'H': { if (sscanf(p + 1, "%d", &_y4m->pic_h) != 1) return -1; got_h = 1; break; } case 'F': { if (sscanf(p + 1, "%d:%d", &_y4m->fps_n, &_y4m->fps_d) != 2) { return -1; } got_fps = 1; break; } case 'I': { _y4m->interlace = p[1]; got_interlace = 1; break; } case 'A': { if (sscanf(p + 1, "%d:%d", &_y4m->par_n, &_y4m->par_d) != 2) { return -1; } got_par = 1; break; } case 'C': { if (q - p > 16) return -1; memcpy(_y4m->chroma_type, p + 1, q - p - 1); _y4m->chroma_type[q - p - 1] = '\0'; got_chroma = 1; break; } /*Ignore unknown tags.*/ } } if (!got_w || !got_h || !got_fps) return -1; if (!got_interlace) _y4m->interlace = '?'; if (!got_par) _y4m->par_n = _y4m->par_d = 0; /*Chroma-type is not specified in older files, e.g., those generated by mplayer.*/ if (!got_chroma) strcpy(_y4m->chroma_type, "420"); return 0; } /*All anti-aliasing filters in the following conversion functions are based on one of two window functions: The 6-tap Lanczos window (for down-sampling and shifts): sinc(\pi*t)*sinc(\pi*t/3), |t|<3 (sinc(t)==sin(t)/t) 0, |t|>=3 The 4-tap Mitchell window (for up-sampling): 7|t|^3-12|t|^2+16/3, |t|<1 -(7/3)|x|^3+12|x|^2-20|x|+32/3, |t|<2 0, |t|>=2 The number of taps is intentionally kept small to reduce computational overhead and limit ringing. The taps from these filters are scaled so that their sum is 1, and the result is scaled by 128 and rounded to integers to create a filter whose intermediate values fit inside 16 bits. Coefficients are rounded in such a way as to ensure their sum is still 128, which is usually equivalent to normal rounding. Conversions which require both horizontal and vertical filtering could have these steps pipelined, for less memory consumption and better cache performance, but we do them separately for simplicity.*/ #define OC_MINI(_a, _b) ((_a) > (_b) ? (_b) : (_a)) #define OC_MAXI(_a, _b) ((_a) < (_b) ? (_b) : (_a)) #define OC_CLAMPI(_a, _b, _c) (OC_MAXI(_a, OC_MINI(_b, _c))) /*420jpeg chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | 420mpeg2 chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | BR | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | BR | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | We use a resampling filter to shift the site locations one quarter pixel (at the chroma plane's resolution) to the right. The 4:2:2 modes look exactly the same, except there are twice as many chroma lines, and they are vertically co-sited with the luma samples in both the mpeg2 and jpeg cases (thus requiring no vertical resampling).*/ static void y4m_42xmpeg2_42xjpeg_helper(unsigned char *_dst, const unsigned char *_src, int _c_w, int _c_h) { int y; int x; for (y = 0; y < _c_h; y++) { /*Filter: [4 -17 114 35 -9 1]/128, derived from a 6-tap Lanczos window.*/ for (x = 0; x < OC_MINI(_c_w, 2); x++) { _dst[x] = (unsigned char)OC_CLAMPI( 0, (4 * _src[0] - 17 * _src[OC_MAXI(x - 1, 0)] + 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[OC_MINI(x + 3, _c_w - 1)] + 64) >> 7, 255); } for (; x < _c_w - 3; x++) { _dst[x] = (unsigned char)OC_CLAMPI( 0, (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] + 35 * _src[x + 1] - 9 * _src[x + 2] + _src[x + 3] + 64) >> 7, 255); } for (; x < _c_w; x++) { _dst[x] = (unsigned char)OC_CLAMPI( 0, (4 * _src[x - 2] - 17 * _src[x - 1] + 114 * _src[x] + 35 * _src[OC_MINI(x + 1, _c_w - 1)] - 9 * _src[OC_MINI(x + 2, _c_w - 1)] + _src[_c_w - 1] + 64) >> 7, 255); } _dst += _c_w; _src += _c_w; } } /*Handles both 422 and 420mpeg2 to 422jpeg and 420jpeg, respectively.*/ static void y4m_convert_42xmpeg2_42xjpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { int c_w; int c_h; int c_sz; int pli; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; for (pli = 1; pli < 3; pli++) { y4m_42xmpeg2_42xjpeg_helper(_dst, _aux, c_w, c_h); _dst += c_sz; _aux += c_sz; } } /*This format is only used for interlaced content, but is included for completeness. 420jpeg chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | 420paldv chroma samples are sited like: YR------Y-------YR------Y------- | | | | | | | | | | | | YB------Y-------YB------Y------- | | | | | | | | | | | | YR------Y-------YR------Y------- | | | | | | | | | | | | YB------Y-------YB------Y------- | | | | | | | | | | | | We use a resampling filter to shift the site locations one quarter pixel (at the chroma plane's resolution) to the right. Then we use another filter to move the C_r location down one quarter pixel, and the C_b location up one quarter pixel.*/ static void y4m_convert_42xpaldv_42xjpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { unsigned char *tmp; int c_w; int c_h; int c_sz; int pli; int y; int x; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + 1) / 2; c_h = (_y4m->pic_h + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; c_sz = c_w * c_h; tmp = _aux + 2 * c_sz; for (pli = 1; pli < 3; pli++) { /*First do the horizontal re-sampling. This is the same as the mpeg2 case, except that after the horizontal case, we need to apply a second vertical filter.*/ y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h); _aux += c_sz; switch (pli) { case 1: { /*Slide C_b up a quarter-pel. This is the same filter used above, but in the other order.*/ for (x = 0; x < c_w; x++) { for (y = 0; y < OC_MINI(c_h, 3); y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (tmp[0] - 9 * tmp[OC_MAXI(y - 2, 0) * c_w] + 35 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + 64) >> 7, 255); } for (; y < c_h - 2; y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] - 17 * tmp[(y + 1) * c_w] + 4 * tmp[(y + 2) * c_w] + 64) >> 7, 255); } for (; y < c_h; y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (tmp[(y - 3) * c_w] - 9 * tmp[(y - 2) * c_w] + 35 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] - 17 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] + 4 * tmp[(c_h - 1) * c_w] + 64) >> 7, 255); } _dst++; tmp++; } _dst += c_sz - c_w; tmp -= c_w; break; } case 2: { /*Slide C_r down a quarter-pel. This is the same as the horizontal filter.*/ for (x = 0; x < c_w; x++) { for (y = 0; y < OC_MINI(c_h, 2); y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (4 * tmp[0] - 17 * tmp[OC_MAXI(y - 1, 0) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[OC_MINI(y + 3, c_h - 1) * c_w] + 64) >> 7, 255); } for (; y < c_h - 3; y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[(y + 1) * c_w] - 9 * tmp[(y + 2) * c_w] + tmp[(y + 3) * c_w] + 64) >> 7, 255); } for (; y < c_h; y++) { _dst[y * c_w] = (unsigned char)OC_CLAMPI( 0, (4 * tmp[(y - 2) * c_w] - 17 * tmp[(y - 1) * c_w] + 114 * tmp[y * c_w] + 35 * tmp[OC_MINI(y + 1, c_h - 1) * c_w] - 9 * tmp[OC_MINI(y + 2, c_h - 1) * c_w] + tmp[(c_h - 1) * c_w] + 64) >> 7, 255); } _dst++; tmp++; } break; } } /*For actual interlaced material, this would have to be done separately on each field, and the shift amounts would be different. C_r moves down 1/8, C_b up 3/8 in the top field, and C_r moves down 3/8, C_b up 1/8 in the bottom field. The corresponding filters would be: Down 1/8 (reverse order for up): [3 -11 125 15 -4 0]/128 Down 3/8 (reverse order for up): [4 -19 98 56 -13 2]/128*/ } } /*Perform vertical filtering to reduce a single plane from 4:2:2 to 4:2:0. This is used as a helper by several converation routines.*/ static void y4m_422jpeg_420jpeg_helper(unsigned char *_dst, const unsigned char *_src, int _c_w, int _c_h) { int y; int x; /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/ for (x = 0; x < _c_w; x++) { for (y = 0; y < OC_MINI(_c_h, 2); y += 2) { _dst[(y >> 1) * _c_w] = OC_CLAMPI(0, (64 * _src[0] + 78 * _src[OC_MINI(1, _c_h - 1) * _c_w] - 17 * _src[OC_MINI(2, _c_h - 1) * _c_w] + 3 * _src[OC_MINI(3, _c_h - 1) * _c_w] + 64) >> 7, 255); } for (; y < _c_h - 3; y += 2) { _dst[(y >> 1) * _c_w] = OC_CLAMPI(0, (3 * (_src[(y - 2) * _c_w] + _src[(y + 3) * _c_w]) - 17 * (_src[(y - 1) * _c_w] + _src[(y + 2) * _c_w]) + 78 * (_src[y * _c_w] + _src[(y + 1) * _c_w]) + 64) >> 7, 255); } for (; y < _c_h; y += 2) { _dst[(y >> 1) * _c_w] = OC_CLAMPI( 0, (3 * (_src[(y - 2) * _c_w] + _src[(_c_h - 1) * _c_w]) - 17 * (_src[(y - 1) * _c_w] + _src[OC_MINI(y + 2, _c_h - 1) * _c_w]) + 78 * (_src[y * _c_w] + _src[OC_MINI(y + 1, _c_h - 1) * _c_w]) + 64) >> 7, 255); } _src++; _dst++; } } /*420jpeg chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | 422jpeg chroma samples are sited like: Y---BR--Y-------Y---BR--Y------- | | | | | | | | | | | | Y---BR--Y-------Y---BR--Y------- | | | | | | | | | | | | Y---BR--Y-------Y---BR--Y------- | | | | | | | | | | | | Y---BR--Y-------Y---BR--Y------- | | | | | | | | | | | | We use a resampling filter to decimate the chroma planes by two in the vertical direction.*/ static void y4m_convert_422jpeg_420jpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { int c_w; int c_h; int c_sz; int dst_c_w; int dst_c_h; int dst_c_sz; int pli; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h; c_h = _y4m->pic_h; dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; dst_c_sz = dst_c_w * dst_c_h; for (pli = 1; pli < 3; pli++) { y4m_422jpeg_420jpeg_helper(_dst, _aux, c_w, c_h); _aux += c_sz; _dst += dst_c_sz; } } /*420jpeg chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | 422 chroma samples are sited like: YBR-----Y-------YBR-----Y------- | | | | | | | | | | | | YBR-----Y-------YBR-----Y------- | | | | | | | | | | | | YBR-----Y-------YBR-----Y------- | | | | | | | | | | | | YBR-----Y-------YBR-----Y------- | | | | | | | | | | | | We use a resampling filter to shift the original site locations one quarter pixel (at the original chroma resolution) to the right. Then we use a second resampling filter to decimate the chroma planes by two in the vertical direction.*/ static void y4m_convert_422_420jpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { unsigned char *tmp; int c_w; int c_h; int c_sz; int dst_c_h; int dst_c_sz; int pli; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h; c_h = _y4m->pic_h; dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; dst_c_sz = c_w * dst_c_h; tmp = _aux + 2 * c_sz; for (pli = 1; pli < 3; pli++) { /*In reality, the horizontal and vertical steps could be pipelined, for less memory consumption and better cache performance, but we do them separately for simplicity.*/ /*First do horizontal filtering (convert to 422jpeg)*/ y4m_42xmpeg2_42xjpeg_helper(tmp, _aux, c_w, c_h); /*Now do the vertical filtering.*/ y4m_422jpeg_420jpeg_helper(_dst, tmp, c_w, c_h); _aux += c_sz; _dst += dst_c_sz; } } /*420jpeg chroma samples are sited like: Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | Y-------Y-------Y-------Y------- | | | | | BR | | BR | | | | | Y-------Y-------Y-------Y------- | | | | | | | | | | | | 411 chroma samples are sited like: YBR-----Y-------Y-------Y------- | | | | | | | | | | | | YBR-----Y-------Y-------Y------- | | | | | | | | | | | | YBR-----Y-------Y-------Y------- | | | | | | | | | | | | YBR-----Y-------Y-------Y------- | | | | | | | | | | | | We use a filter to resample at site locations one eighth pixel (at the source chroma plane's horizontal resolution) and five eighths of a pixel to the right. Then we use another filter to decimate the planes by 2 in the vertical direction.*/ static void y4m_convert_411_420jpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { unsigned char *tmp; int c_w; int c_h; int c_sz; int dst_c_w; int dst_c_h; int dst_c_sz; int tmp_sz; int pli; int y; int x; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h; c_h = _y4m->pic_h; dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; dst_c_sz = dst_c_w * dst_c_h; tmp_sz = dst_c_w * c_h; tmp = _aux + 2 * c_sz; for (pli = 1; pli < 3; pli++) { /*In reality, the horizontal and vertical steps could be pipelined, for less memory consumption and better cache performance, but we do them separately for simplicity.*/ /*First do horizontal filtering (convert to 422jpeg)*/ for (y = 0; y < c_h; y++) { /*Filters: [1 110 18 -1]/128 and [-3 50 86 -5]/128, both derived from a 4-tap Mitchell window.*/ for (x = 0; x < OC_MINI(c_w, 1); x++) { tmp[x << 1] = (unsigned char)OC_CLAMPI( 0, (111 * _aux[0] + 18 * _aux[OC_MINI(1, c_w - 1)] - _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255); tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI( 0, (47 * _aux[0] + 86 * _aux[OC_MINI(1, c_w - 1)] - 5 * _aux[OC_MINI(2, c_w - 1)] + 64) >> 7, 255); } for (; x < c_w - 2; x++) { tmp[x << 1] = (unsigned char)OC_CLAMPI(0, (_aux[x - 1] + 110 * _aux[x] + 18 * _aux[x + 1] - _aux[x + 2] + 64) >> 7, 255); tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI( 0, (-3 * _aux[x - 1] + 50 * _aux[x] + 86 * _aux[x + 1] - 5 * _aux[x + 2] + 64) >> 7, 255); } for (; x < c_w; x++) { tmp[x << 1] = (unsigned char)OC_CLAMPI( 0, (_aux[x - 1] + 110 * _aux[x] + 18 * _aux[OC_MINI(x + 1, c_w - 1)] - _aux[c_w - 1] + 64) >> 7, 255); if ((x << 1 | 1) < dst_c_w) { tmp[x << 1 | 1] = (unsigned char)OC_CLAMPI( 0, (-3 * _aux[x - 1] + 50 * _aux[x] + 86 * _aux[OC_MINI(x + 1, c_w - 1)] - 5 * _aux[c_w - 1] + 64) >> 7, 255); } } tmp += dst_c_w; _aux += c_w; } tmp -= tmp_sz; /*Now do the vertical filtering.*/ y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h); _dst += dst_c_sz; } } /*Convert 444 to 420jpeg.*/ static void y4m_convert_444_420jpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { unsigned char *tmp; int c_w; int c_h; int c_sz; int dst_c_w; int dst_c_h; int dst_c_sz; int tmp_sz; int pli; int y; int x; /*Skip past the luma data.*/ _dst += _y4m->pic_w * _y4m->pic_h; /*Compute the size of each chroma plane.*/ c_w = (_y4m->pic_w + _y4m->src_c_dec_h - 1) / _y4m->src_c_dec_h; c_h = _y4m->pic_h; dst_c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; dst_c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; dst_c_sz = dst_c_w * dst_c_h; tmp_sz = dst_c_w * c_h; tmp = _aux + 2 * c_sz; for (pli = 1; pli < 3; pli++) { /*Filter: [3 -17 78 78 -17 3]/128, derived from a 6-tap Lanczos window.*/ for (y = 0; y < c_h; y++) { for (x = 0; x < OC_MINI(c_w, 2); x += 2) { tmp[x >> 1] = OC_CLAMPI(0, (64 * _aux[0] + 78 * _aux[OC_MINI(1, c_w - 1)] - 17 * _aux[OC_MINI(2, c_w - 1)] + 3 * _aux[OC_MINI(3, c_w - 1)] + 64) >> 7, 255); } for (; x < c_w - 3; x += 2) { tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[x + 3]) - 17 * (_aux[x - 1] + _aux[x + 2]) + 78 * (_aux[x] + _aux[x + 1]) + 64) >> 7, 255); } for (; x < c_w; x += 2) { tmp[x >> 1] = OC_CLAMPI(0, (3 * (_aux[x - 2] + _aux[c_w - 1]) - 17 * (_aux[x - 1] + _aux[OC_MINI(x + 2, c_w - 1)]) + 78 * (_aux[x] + _aux[OC_MINI(x + 1, c_w - 1)]) + 64) >> 7, 255); } tmp += dst_c_w; _aux += c_w; } tmp -= tmp_sz; /*Now do the vertical filtering.*/ y4m_422jpeg_420jpeg_helper(_dst, tmp, dst_c_w, c_h); _dst += dst_c_sz; } } /*The image is padded with empty chroma components at 4:2:0.*/ static void y4m_convert_mono_420jpeg(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { int c_sz; (void)_aux; _dst += _y4m->pic_w * _y4m->pic_h; c_sz = ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) * ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v); memset(_dst, 128, c_sz * 2); } /*No conversion function needed.*/ static void y4m_convert_null(y4m_input *_y4m, unsigned char *_dst, unsigned char *_aux) { (void)_y4m; (void)_dst; (void)_aux; } int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, int only_420) { char buffer[80] = { 0 }; int ret; int i; /*Read until newline, or 80 cols, whichever happens first.*/ for (i = 0; i < 79; i++) { if (_nskip > 0) { buffer[i] = *_skip++; _nskip--; } else { if (!file_read(buffer + i, 1, _fin)) return -1; } if (buffer[i] == '\n') break; } /*We skipped too much header data.*/ if (_nskip > 0) return -1; if (i == 79) { fprintf(stderr, "Error parsing header; not a YUV2MPEG2 file?\n"); return -1; } buffer[i] = '\0'; if (memcmp(buffer, "YUV4MPEG", 8)) { fprintf(stderr, "Incomplete magic for YUV4MPEG file.\n"); return -1; } if (buffer[8] != '2') { fprintf(stderr, "Incorrect YUV input file version; YUV4MPEG2 required.\n"); } ret = y4m_parse_tags(_y4m, buffer + 5); if (ret < 0) { fprintf(stderr, "Error parsing YUV4MPEG2 header.\n"); return ret; } if (_y4m->interlace == '?') { fprintf(stderr, "Warning: Input video interlacing format unknown; " "assuming progressive scan.\n"); } else if (_y4m->interlace != 'p') { fprintf(stderr, "Input video is interlaced; " "Only progressive scan handled.\n"); return -1; } _y4m->vpx_fmt = VPX_IMG_FMT_I420; _y4m->bps = 12; _y4m->bit_depth = 8; if (strcmp(_y4m->chroma_type, "420") == 0 || strcmp(_y4m->chroma_type, "420jpeg") == 0) { _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2); /* Natively supported: no conversion required. */ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; } else if (strcmp(_y4m->chroma_type, "420p10") == 0) { _y4m->src_c_dec_h = 2; _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 2; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2)); /* Natively supported: no conversion required. */ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; _y4m->bit_depth = 10; _y4m->bps = 15; _y4m->vpx_fmt = VPX_IMG_FMT_I42016; if (only_420) { fprintf(stderr, "Unsupported conversion from 420p10 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "420p12") == 0) { _y4m->src_c_dec_h = 2; _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 2; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2)); /* Natively supported: no conversion required. */ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; _y4m->bit_depth = 12; _y4m->bps = 18; _y4m->vpx_fmt = VPX_IMG_FMT_I42016; if (only_420) { fprintf(stderr, "Unsupported conversion from 420p12 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "420mpeg2") == 0) { _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first.*/ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2); _y4m->convert = y4m_convert_42xmpeg2_42xjpeg; } else if (strcmp(_y4m->chroma_type, "420paldv") == 0) { _y4m->src_c_dec_h = _y4m->dst_c_dec_h = _y4m->src_c_dec_v = _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first. We need to make two filter passes, so we need some extra space in the aux buffer.*/ _y4m->aux_buf_sz = 3 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2); _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * ((_y4m->pic_h + 1) / 2); _y4m->convert = y4m_convert_42xpaldv_42xjpeg; } else if (strcmp(_y4m->chroma_type, "422jpeg") == 0) { _y4m->src_c_dec_h = _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 1; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first.*/ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_422jpeg_420jpeg; } else if (strcmp(_y4m->chroma_type, "422") == 0) { _y4m->src_c_dec_h = 2; _y4m->src_c_dec_v = 1; if (only_420) { _y4m->dst_c_dec_h = 2; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first. We need to make two filter passes, so we need some extra space in the aux buffer.*/ _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_422_420jpeg; } else { _y4m->vpx_fmt = VPX_IMG_FMT_I422; _y4m->bps = 16; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; /*Natively supported: no conversion required.*/ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; } } else if (strcmp(_y4m->chroma_type, "422p10") == 0) { _y4m->src_c_dec_h = 2; _y4m->src_c_dec_v = 1; _y4m->vpx_fmt = VPX_IMG_FMT_I42216; _y4m->bps = 20; _y4m->bit_depth = 10; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h); _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; if (only_420) { fprintf(stderr, "Unsupported conversion from 422p10 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "422p12") == 0) { _y4m->src_c_dec_h = 2; _y4m->src_c_dec_v = 1; _y4m->vpx_fmt = VPX_IMG_FMT_I42216; _y4m->bps = 24; _y4m->bit_depth = 12; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = 2 * (_y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + 1) / 2) * _y4m->pic_h); _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; if (only_420) { fprintf(stderr, "Unsupported conversion from 422p12 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "411") == 0) { _y4m->src_c_dec_h = 4; _y4m->dst_c_dec_h = 2; _y4m->src_c_dec_v = 1; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first. We need to make two filter passes, so we need some extra space in the aux buffer.*/ _y4m->aux_buf_read_sz = 2 * ((_y4m->pic_w + 3) / 4) * _y4m->pic_h; _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_411_420jpeg; fprintf(stderr, "Unsupported conversion from yuv 411\n"); return -1; } else if (strcmp(_y4m->chroma_type, "444") == 0) { _y4m->src_c_dec_h = 1; _y4m->src_c_dec_v = 1; if (only_420) { _y4m->dst_c_dec_h = 2; _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*Chroma filter required: read into the aux buf first. We need to make two filter passes, so we need some extra space in the aux buffer.*/ _y4m->aux_buf_read_sz = 2 * _y4m->pic_w * _y4m->pic_h; _y4m->aux_buf_sz = _y4m->aux_buf_read_sz + ((_y4m->pic_w + 1) / 2) * _y4m->pic_h; _y4m->convert = y4m_convert_444_420jpeg; } else { _y4m->vpx_fmt = VPX_IMG_FMT_I444; _y4m->bps = 24; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = 3 * _y4m->pic_w * _y4m->pic_h; /*Natively supported: no conversion required.*/ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; } } else if (strcmp(_y4m->chroma_type, "444p10") == 0) { _y4m->src_c_dec_h = 1; _y4m->src_c_dec_v = 1; _y4m->vpx_fmt = VPX_IMG_FMT_I44416; _y4m->bps = 30; _y4m->bit_depth = 10; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h; _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; if (only_420) { fprintf(stderr, "Unsupported conversion from 444p10 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "444p12") == 0) { _y4m->src_c_dec_h = 1; _y4m->src_c_dec_v = 1; _y4m->vpx_fmt = VPX_IMG_FMT_I44416; _y4m->bps = 36; _y4m->bit_depth = 12; _y4m->dst_c_dec_h = _y4m->src_c_dec_h; _y4m->dst_c_dec_v = _y4m->src_c_dec_v; _y4m->dst_buf_read_sz = 2 * 3 * _y4m->pic_w * _y4m->pic_h; _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_null; if (only_420) { fprintf(stderr, "Unsupported conversion from 444p12 to 420jpeg\n"); return -1; } } else if (strcmp(_y4m->chroma_type, "mono") == 0) { _y4m->src_c_dec_h = _y4m->src_c_dec_v = 0; _y4m->dst_c_dec_h = _y4m->dst_c_dec_v = 2; _y4m->dst_buf_read_sz = _y4m->pic_w * _y4m->pic_h; /*No extra space required, but we need to clear the chroma planes.*/ _y4m->aux_buf_sz = _y4m->aux_buf_read_sz = 0; _y4m->convert = y4m_convert_mono_420jpeg; } else { fprintf(stderr, "Unknown chroma sampling type: %s\n", _y4m->chroma_type); return -1; } /*The size of the final frame buffers is always computed from the destination chroma decimation type.*/ _y4m->dst_buf_sz = _y4m->pic_w * _y4m->pic_h + 2 * ((_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h) * ((_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v); if (_y4m->bit_depth == 8) _y4m->dst_buf = (unsigned char *)malloc(_y4m->dst_buf_sz); else _y4m->dst_buf = (unsigned char *)malloc(2 * _y4m->dst_buf_sz); if (_y4m->aux_buf_sz > 0) _y4m->aux_buf = (unsigned char *)malloc(_y4m->aux_buf_sz); return 0; } void y4m_input_close(y4m_input *_y4m) { free(_y4m->dst_buf); free(_y4m->aux_buf); } int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *_img) { char frame[6]; int pic_sz; int c_w; int c_h; int c_sz; int bytes_per_sample = _y4m->bit_depth > 8 ? 2 : 1; /*Read and skip the frame header.*/ if (!file_read(frame, 6, _fin)) return 0; if (memcmp(frame, "FRAME", 5)) { fprintf(stderr, "Loss of framing in Y4M input data\n"); return -1; } if (frame[5] != '\n') { char c; int j; for (j = 0; j < 79 && file_read(&c, 1, _fin) && c != '\n'; j++) { } if (j == 79) { fprintf(stderr, "Error parsing Y4M frame header\n"); return -1; } } /*Read the frame data that needs no conversion.*/ if (!file_read(_y4m->dst_buf, _y4m->dst_buf_read_sz, _fin)) { fprintf(stderr, "Error reading Y4M frame data.\n"); return -1; } /*Read the frame data that does need conversion.*/ if (!file_read(_y4m->aux_buf, _y4m->aux_buf_read_sz, _fin)) { fprintf(stderr, "Error reading Y4M frame data.\n"); return -1; } /*Now convert the just read frame.*/ (*_y4m->convert)(_y4m, _y4m->dst_buf, _y4m->aux_buf); /*Fill in the frame buffer pointers. We don't use vpx_img_wrap() because it forces padding for odd picture sizes, which would require a separate fread call for every row.*/ memset(_img, 0, sizeof(*_img)); /*Y4M has the planes in Y'CbCr order, which libvpx calls Y, U, and V.*/ _img->fmt = _y4m->vpx_fmt; _img->w = _img->d_w = _y4m->pic_w; _img->h = _img->d_h = _y4m->pic_h; _img->x_chroma_shift = _y4m->dst_c_dec_h >> 1; _img->y_chroma_shift = _y4m->dst_c_dec_v >> 1; _img->bps = _y4m->bps; /*Set up the buffer pointers.*/ pic_sz = _y4m->pic_w * _y4m->pic_h * bytes_per_sample; c_w = (_y4m->pic_w + _y4m->dst_c_dec_h - 1) / _y4m->dst_c_dec_h; c_w *= bytes_per_sample; c_h = (_y4m->pic_h + _y4m->dst_c_dec_v - 1) / _y4m->dst_c_dec_v; c_sz = c_w * c_h; _img->stride[VPX_PLANE_Y] = _img->stride[VPX_PLANE_ALPHA] = _y4m->pic_w * bytes_per_sample; _img->stride[VPX_PLANE_U] = _img->stride[VPX_PLANE_V] = c_w; _img->planes[VPX_PLANE_Y] = _y4m->dst_buf; _img->planes[VPX_PLANE_U] = _y4m->dst_buf + pic_sz; _img->planes[VPX_PLANE_V] = _y4m->dst_buf + pic_sz + c_sz; _img->planes[VPX_PLANE_ALPHA] = _y4m->dst_buf + pic_sz + 2 * c_sz; return 1; } libvpx-1.8.2/y4minput.h000066400000000000000000000035041357355204000147760ustar00rootroot00000000000000/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree. * * Based on code from the OggTheora software codec source code, * Copyright (C) 2002-2010 The Xiph.Org Foundation and contributors. */ #ifndef VPX_Y4MINPUT_H_ #define VPX_Y4MINPUT_H_ #include #include "vpx/vpx_image.h" #ifdef __cplusplus extern "C" { #endif typedef struct y4m_input y4m_input; /*The function used to perform chroma conversion.*/ typedef void (*y4m_convert_func)(y4m_input *_y4m, unsigned char *_dst, unsigned char *_src); struct y4m_input { int pic_w; int pic_h; int fps_n; int fps_d; int par_n; int par_d; char interlace; int src_c_dec_h; int src_c_dec_v; int dst_c_dec_h; int dst_c_dec_v; char chroma_type[16]; /*The size of each converted frame buffer.*/ size_t dst_buf_sz; /*The amount to read directly into the converted frame buffer.*/ size_t dst_buf_read_sz; /*The size of the auxilliary buffer.*/ size_t aux_buf_sz; /*The amount to read into the auxilliary buffer.*/ size_t aux_buf_read_sz; y4m_convert_func convert; unsigned char *dst_buf; unsigned char *aux_buf; enum vpx_img_fmt vpx_fmt; int bps; unsigned int bit_depth; }; int y4m_input_open(y4m_input *_y4m, FILE *_fin, char *_skip, int _nskip, int only_420); void y4m_input_close(y4m_input *_y4m); int y4m_input_fetch_frame(y4m_input *_y4m, FILE *_fin, vpx_image_t *img); #ifdef __cplusplus } // extern "C" #endif #endif // VPX_Y4MINPUT_H_